2298 files changed, 190907 insertions, 94214 deletions
diff --git a/contrib/llvm/FREEBSD-Xlist b/contrib/llvm/FREEBSD-Xlist
index 9fbda5f30544..f694223a183f 100644
--- a/contrib/llvm/FREEBSD-Xlist
+++ b/contrib/llvm/FREEBSD-Xlist
@@ -2,17 +2,15 @@
 .arcconfig
 .clang-format
 .clang-tidy
+.gitattributes
 .gitignore
 CMakeLists.txt
 CODE_OWNERS.TXT
 CREDITS.TXT
 LLVMBuild.txt
-Makefile
-Makefile.common
-Makefile.config.in
-Makefile.rules
 README.txt
-autoconf/
+RELEASE_TESTERS.TXT
+benchmarks/
 bindings/
 cmake/
 configure
@@ -21,473 +19,485 @@ examples/
 include/llvm/CMakeLists.txt
 include/llvm/Config/
 include/llvm/IR/CMakeLists.txt
-include/llvm/Support/DataTypes.h.cmake
+include/llvm/Support/CMakeLists.txt
 include/llvm/Support/LICENSE.TXT
 lib/Analysis/CMakeLists.txt
-lib/Analysis/IPA/CMakeLists.txt
-lib/Analysis/IPA/LLVMBuild.txt
-lib/Analysis/IPA/Makefile
 lib/Analysis/LLVMBuild.txt
-lib/Analysis/Makefile
 lib/Analysis/README.txt
 lib/AsmParser/CMakeLists.txt
 lib/AsmParser/LLVMBuild.txt
-lib/AsmParser/Makefile
+lib/BinaryFormat/CMakeLists.txt
+lib/BinaryFormat/LLVMBuild.txt
 lib/Bitcode/CMakeLists.txt
 lib/Bitcode/LLVMBuild.txt
-lib/Bitcode/Makefile
 lib/Bitcode/Reader/CMakeLists.txt
 lib/Bitcode/Reader/LLVMBuild.txt
-lib/Bitcode/Reader/Makefile
 lib/Bitcode/Writer/CMakeLists.txt
 lib/Bitcode/Writer/LLVMBuild.txt
-lib/Bitcode/Writer/Makefile
 lib/CMakeLists.txt
 lib/CodeGen/AsmPrinter/CMakeLists.txt
 lib/CodeGen/AsmPrinter/LLVMBuild.txt
-lib/CodeGen/AsmPrinter/Makefile
 lib/CodeGen/CMakeLists.txt
+lib/CodeGen/GlobalISel/CMakeLists.txt
+lib/CodeGen/GlobalISel/LLVMBuild.txt
 lib/CodeGen/LLVMBuild.txt
-lib/CodeGen/Makefile
+lib/CodeGen/MIRParser/CMakeLists.txt
+lib/CodeGen/MIRParser/LLVMBuild.txt
 lib/CodeGen/README.txt
 lib/CodeGen/SelectionDAG/CMakeLists.txt
 lib/CodeGen/SelectionDAG/LLVMBuild.txt
-lib/CodeGen/SelectionDAG/Makefile
 lib/DebugInfo/CMakeLists.txt
+lib/DebugInfo/CodeView/CMakeLists.txt
+lib/DebugInfo/CodeView/LLVMBuild.txt
 lib/DebugInfo/DWARF/CMakeLists.txt
 lib/DebugInfo/DWARF/LLVMBuild.txt
-lib/DebugInfo/DWARF/Makefile
 lib/DebugInfo/LLVMBuild.txt
-lib/DebugInfo/Makefile
+lib/DebugInfo/MSF/CMakeLists.txt
+lib/DebugInfo/MSF/LLVMBuild.txt
 lib/DebugInfo/PDB/CMakeLists.txt
 lib/DebugInfo/PDB/LLVMBuild.txt
-lib/DebugInfo/PDB/Makefile
+lib/DebugInfo/Symbolize/CMakeLists.txt
+lib/DebugInfo/Symbolize/LLVMBuild.txt
+lib/Demangle/CMakeLists.txt
+lib/Demangle/LLVMBuild.txt
 lib/ExecutionEngine/CMakeLists.txt
 lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
 lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
-lib/ExecutionEngine/IntelJITEvents/Makefile
 lib/ExecutionEngine/Interpreter/CMakeLists.txt
 lib/ExecutionEngine/Interpreter/LLVMBuild.txt
-lib/ExecutionEngine/Interpreter/Makefile
 lib/ExecutionEngine/LLVMBuild.txt
 lib/ExecutionEngine/MCJIT/CMakeLists.txt
 lib/ExecutionEngine/MCJIT/LLVMBuild.txt
-lib/ExecutionEngine/MCJIT/Makefile
-lib/ExecutionEngine/Makefile
 lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
 lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
-lib/ExecutionEngine/OProfileJIT/Makefile
 lib/ExecutionEngine/Orc/CMakeLists.txt
 lib/ExecutionEngine/Orc/LLVMBuild.txt
-lib/ExecutionEngine/Orc/Makefile
+lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
+lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
 lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
 lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
-lib/ExecutionEngine/RuntimeDyld/Makefile
+lib/FuzzMutate/CMakeLists.txt
+lib/FuzzMutate/LLVMBuild.txt
 lib/Fuzzer/
 lib/IR/CMakeLists.txt
 lib/IR/LLVMBuild.txt
-lib/IR/Makefile
 lib/IRReader/CMakeLists.txt
 lib/IRReader/LLVMBuild.txt
-lib/IRReader/Makefile
 lib/LLVMBuild.txt
 lib/LTO/CMakeLists.txt
 lib/LTO/LLVMBuild.txt
-lib/LTO/Makefile
 lib/LineEditor/CMakeLists.txt
 lib/LineEditor/LLVMBuild.txt
-lib/LineEditor/Makefile
 lib/Linker/CMakeLists.txt
 lib/Linker/LLVMBuild.txt
-lib/Linker/Makefile
 lib/MC/CMakeLists.txt
 lib/MC/LLVMBuild.txt
 lib/MC/MCDisassembler/CMakeLists.txt
 lib/MC/MCDisassembler/LLVMBuild.txt
-lib/MC/MCDisassembler/Makefile
 lib/MC/MCParser/CMakeLists.txt
 lib/MC/MCParser/LLVMBuild.txt
-lib/MC/MCParser/Makefile
-lib/MC/Makefile
-lib/Makefile
+lib/MCA/CMakeLists.txt
+lib/MCA/LLVMBuild.txt
 lib/Object/CMakeLists.txt
 lib/Object/LLVMBuild.txt
-lib/Object/Makefile
+lib/ObjectYAML/CMakeLists.txt
+lib/ObjectYAML/LLVMBuild.txt
+lib/OptRemarks/CMakeLists.txt
+lib/OptRemarks/LLVMBuild.txt
 lib/Option/CMakeLists.txt
 lib/Option/LLVMBuild.txt
-lib/Option/Makefile
 lib/Passes/CMakeLists.txt
 lib/Passes/LLVMBuild.txt
-lib/Passes/Makefile
 lib/ProfileData/CMakeLists.txt
+lib/ProfileData/Coverage/CMakeLists.txt
+lib/ProfileData/Coverage/LLVMBuild.txt
 lib/ProfileData/LLVMBuild.txt
-lib/ProfileData/Makefile
 lib/Support/CMakeLists.txt
 lib/Support/LLVMBuild.txt
-lib/Support/Makefile
 lib/Support/README.txt.system
 lib/TableGen/CMakeLists.txt
 lib/TableGen/LLVMBuild.txt
-lib/TableGen/Makefile
 lib/Target/AArch64/AsmParser/CMakeLists.txt
 lib/Target/AArch64/AsmParser/LLVMBuild.txt
-lib/Target/AArch64/AsmParser/Makefile
 lib/Target/AArch64/CMakeLists.txt
 lib/Target/AArch64/Disassembler/CMakeLists.txt
 lib/Target/AArch64/Disassembler/LLVMBuild.txt
-lib/Target/AArch64/Disassembler/Makefile
 lib/Target/AArch64/InstPrinter/CMakeLists.txt
 lib/Target/AArch64/InstPrinter/LLVMBuild.txt
-lib/Target/AArch64/InstPrinter/Makefile
 lib/Target/AArch64/LLVMBuild.txt
 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
 lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
-lib/Target/AArch64/MCTargetDesc/Makefile
-lib/Target/AArch64/Makefile
 lib/Target/AArch64/TargetInfo/CMakeLists.txt
 lib/Target/AArch64/TargetInfo/LLVMBuild.txt
-lib/Target/AArch64/TargetInfo/Makefile
 lib/Target/AArch64/Utils/CMakeLists.txt
 lib/Target/AArch64/Utils/LLVMBuild.txt
-lib/Target/AArch64/Utils/Makefile
+lib/Target/AMDGPU/AsmParser/CMakeLists.txt
+lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
+lib/Target/AMDGPU/CMakeLists.txt
+lib/Target/AMDGPU/Disassembler/CMakeLists.txt
+lib/Target/AMDGPU/Disassembler/LLVMBuild.txt
+lib/Target/AMDGPU/InstPrinter/CMakeLists.txt
+lib/Target/AMDGPU/InstPrinter/LLVMBuild.txt
+lib/Target/AMDGPU/LLVMBuild.txt
+lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
+lib/Target/AMDGPU/TargetInfo/CMakeLists.txt
+lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt
+lib/Target/AMDGPU/Utils/CMakeLists.txt
+lib/Target/AMDGPU/Utils/LLVMBuild.txt
+lib/Target/ARC/CMakeLists.txt
+lib/Target/ARC/Disassembler/CMakeLists.txt
+lib/Target/ARC/Disassembler/LLVMBuild.txt
+lib/Target/ARC/InstPrinter/CMakeLists.txt
+lib/Target/ARC/InstPrinter/LLVMBuild.txt
+lib/Target/ARC/LLVMBuild.txt
+lib/Target/ARC/MCTargetDesc/CMakeLists.txt
+lib/Target/ARC/MCTargetDesc/LLVMBuild.txt
+lib/Target/ARC/TargetInfo/CMakeLists.txt
+lib/Target/ARC/TargetInfo/LLVMBuild.txt
 lib/Target/ARM/AsmParser/CMakeLists.txt
 lib/Target/ARM/AsmParser/LLVMBuild.txt
-lib/Target/ARM/AsmParser/Makefile
 lib/Target/ARM/CMakeLists.txt
 lib/Target/ARM/Disassembler/CMakeLists.txt
 lib/Target/ARM/Disassembler/LLVMBuild.txt
-lib/Target/ARM/Disassembler/Makefile
 lib/Target/ARM/InstPrinter/CMakeLists.txt
 lib/Target/ARM/InstPrinter/LLVMBuild.txt
-lib/Target/ARM/InstPrinter/Makefile
 lib/Target/ARM/LLVMBuild.txt
 lib/Target/ARM/MCTargetDesc/CMakeLists.txt
 lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
-lib/Target/ARM/MCTargetDesc/Makefile
-lib/Target/ARM/Makefile
 lib/Target/ARM/README-Thumb.txt
 lib/Target/ARM/README-Thumb2.txt
 lib/Target/ARM/README.txt
 lib/Target/ARM/TargetInfo/CMakeLists.txt
 lib/Target/ARM/TargetInfo/LLVMBuild.txt
-lib/Target/ARM/TargetInfo/Makefile
+lib/Target/ARM/Utils/CMakeLists.txt
+lib/Target/ARM/Utils/LLVMBuild.txt
+lib/Target/AVR/AsmParser/CMakeLists.txt
+lib/Target/AVR/AsmParser/LLVMBuild.txt
+lib/Target/AVR/CMakeLists.txt
+lib/Target/AVR/Disassembler/CMakeLists.txt
+lib/Target/AVR/Disassembler/LLVMBuild.txt
+lib/Target/AVR/InstPrinter/CMakeLists.txt
+lib/Target/AVR/InstPrinter/LLVMBuild.txt
+lib/Target/AVR/LLVMBuild.txt
+lib/Target/AVR/MCTargetDesc/CMakeLists.txt
+lib/Target/AVR/MCTargetDesc/LLVMBuild.txt
+lib/Target/AVR/TargetInfo/CMakeLists.txt
+lib/Target/AVR/TargetInfo/LLVMBuild.txt
+lib/Target/BPF/AsmParser/CMakeLists.txt
+lib/Target/BPF/AsmParser/LLVMBuild.txt
 lib/Target/BPF/CMakeLists.txt
+lib/Target/BPF/Disassembler/CMakeLists.txt
+lib/Target/BPF/Disassembler/LLVMBuild.txt
 lib/Target/BPF/InstPrinter/CMakeLists.txt
 lib/Target/BPF/InstPrinter/LLVMBuild.txt
-lib/Target/BPF/InstPrinter/Makefile
 lib/Target/BPF/LLVMBuild.txt
 lib/Target/BPF/MCTargetDesc/CMakeLists.txt
 lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
-lib/Target/BPF/MCTargetDesc/Makefile
-lib/Target/BPF/Makefile
 lib/Target/BPF/TargetInfo/CMakeLists.txt
 lib/Target/BPF/TargetInfo/LLVMBuild.txt
-lib/Target/BPF/TargetInfo/Makefile
 lib/Target/CMakeLists.txt
-lib/Target/CppBackend/CMakeLists.txt
-lib/Target/CppBackend/LLVMBuild.txt
-lib/Target/CppBackend/Makefile
-lib/Target/CppBackend/TargetInfo/CMakeLists.txt
-lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
-lib/Target/CppBackend/TargetInfo/Makefile
+lib/Target/Hexagon/AsmParser/CMakeLists.txt
+lib/Target/Hexagon/AsmParser/LLVMBuild.txt
 lib/Target/Hexagon/CMakeLists.txt
 lib/Target/Hexagon/Disassembler/CMakeLists.txt
 lib/Target/Hexagon/Disassembler/LLVMBuild.txt
-lib/Target/Hexagon/Disassembler/Makefile
 lib/Target/Hexagon/LLVMBuild.txt
 lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
 lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
-lib/Target/Hexagon/MCTargetDesc/Makefile
-lib/Target/Hexagon/Makefile
 lib/Target/Hexagon/TargetInfo/CMakeLists.txt
 lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
-lib/Target/Hexagon/TargetInfo/Makefile
 lib/Target/LLVMBuild.txt
+lib/Target/Lanai/AsmParser/CMakeLists.txt
+lib/Target/Lanai/AsmParser/LLVMBuild.txt
+lib/Target/Lanai/CMakeLists.txt
+lib/Target/Lanai/Disassembler/CMakeLists.txt
+lib/Target/Lanai/Disassembler/LLVMBuild.txt
+lib/Target/Lanai/InstPrinter/CMakeLists.txt
+lib/Target/Lanai/InstPrinter/LLVMBuild.txt
+lib/Target/Lanai/LLVMBuild.txt
+lib/Target/Lanai/MCTargetDesc/CMakeLists.txt
+lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
+lib/Target/Lanai/TargetInfo/CMakeLists.txt
+lib/Target/Lanai/TargetInfo/LLVMBuild.txt
+lib/Target/MSP430/AsmParser/CMakeLists.txt
+lib/Target/MSP430/AsmParser/LLVMBuild.txt
 lib/Target/MSP430/CMakeLists.txt
+lib/Target/MSP430/Disassembler/CMakeLists.txt
+lib/Target/MSP430/Disassembler/LLVMBuild.txt
 lib/Target/MSP430/InstPrinter/CMakeLists.txt
 lib/Target/MSP430/InstPrinter/LLVMBuild.txt
-lib/Target/MSP430/InstPrinter/Makefile
 lib/Target/MSP430/LLVMBuild.txt
 lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
 lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
-lib/Target/MSP430/MCTargetDesc/Makefile
-lib/Target/MSP430/Makefile
 lib/Target/MSP430/README.txt
 lib/Target/MSP430/TargetInfo/CMakeLists.txt
 lib/Target/MSP430/TargetInfo/LLVMBuild.txt
-lib/Target/MSP430/TargetInfo/Makefile
-lib/Target/Makefile
 lib/Target/Mips/AsmParser/CMakeLists.txt
 lib/Target/Mips/AsmParser/LLVMBuild.txt
-lib/Target/Mips/AsmParser/Makefile
 lib/Target/Mips/CMakeLists.txt
 lib/Target/Mips/Disassembler/CMakeLists.txt
 lib/Target/Mips/Disassembler/LLVMBuild.txt
-lib/Target/Mips/Disassembler/Makefile
 lib/Target/Mips/InstPrinter/CMakeLists.txt
 lib/Target/Mips/InstPrinter/LLVMBuild.txt
-lib/Target/Mips/InstPrinter/Makefile
 lib/Target/Mips/LLVMBuild.txt
 lib/Target/Mips/MCTargetDesc/CMakeLists.txt
 lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
-lib/Target/Mips/MCTargetDesc/Makefile
-lib/Target/Mips/Makefile
 lib/Target/Mips/TargetInfo/CMakeLists.txt
 lib/Target/Mips/TargetInfo/LLVMBuild.txt
-lib/Target/Mips/TargetInfo/Makefile
 lib/Target/NVPTX/CMakeLists.txt
 lib/Target/NVPTX/InstPrinter/CMakeLists.txt
 lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
-lib/Target/NVPTX/InstPrinter/Makefile
 lib/Target/NVPTX/LLVMBuild.txt
 lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
 lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
-lib/Target/NVPTX/MCTargetDesc/Makefile
-lib/Target/NVPTX/Makefile
 lib/Target/NVPTX/TargetInfo/CMakeLists.txt
 lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
-lib/Target/NVPTX/TargetInfo/Makefile
 lib/Target/PowerPC/AsmParser/CMakeLists.txt
 lib/Target/PowerPC/AsmParser/LLVMBuild.txt
-lib/Target/PowerPC/AsmParser/Makefile
 lib/Target/PowerPC/CMakeLists.txt
 lib/Target/PowerPC/Disassembler/CMakeLists.txt
 lib/Target/PowerPC/Disassembler/LLVMBuild.txt
-lib/Target/PowerPC/Disassembler/Makefile
 lib/Target/PowerPC/InstPrinter/CMakeLists.txt
 lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
-lib/Target/PowerPC/InstPrinter/Makefile
 lib/Target/PowerPC/LLVMBuild.txt
 lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
 lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
-lib/Target/PowerPC/MCTargetDesc/Makefile
-lib/Target/PowerPC/Makefile
 lib/Target/PowerPC/README.txt
 lib/Target/PowerPC/README_ALTIVEC.txt
 lib/Target/PowerPC/TargetInfo/CMakeLists.txt
 lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
-lib/Target/PowerPC/TargetInfo/Makefile
-lib/Target/R600/AsmParser/CMakeLists.txt
-lib/Target/R600/AsmParser/LLVMBuild.txt
-lib/Target/R600/AsmParser/Makefile
-lib/Target/R600/CMakeLists.txt
-lib/Target/R600/InstPrinter/CMakeLists.txt
-lib/Target/R600/InstPrinter/LLVMBuild.txt
-lib/Target/R600/InstPrinter/Makefile
-lib/Target/R600/LLVMBuild.txt
-lib/Target/R600/MCTargetDesc/CMakeLists.txt
-lib/Target/R600/MCTargetDesc/LLVMBuild.txt
-lib/Target/R600/MCTargetDesc/Makefile
-lib/Target/R600/Makefile
-lib/Target/R600/TargetInfo/CMakeLists.txt
-lib/Target/R600/TargetInfo/LLVMBuild.txt
-lib/Target/R600/TargetInfo/Makefile
 lib/Target/README.txt
+lib/Target/RISCV/AsmParser/CMakeLists.txt
+lib/Target/RISCV/AsmParser/LLVMBuild.txt
+lib/Target/RISCV/CMakeLists.txt
+lib/Target/RISCV/Disassembler/CMakeLists.txt
+lib/Target/RISCV/Disassembler/LLVMBuild.txt
+lib/Target/RISCV/InstPrinter/CMakeLists.txt
+lib/Target/RISCV/InstPrinter/LLVMBuild.txt
+lib/Target/RISCV/LLVMBuild.txt
+lib/Target/RISCV/MCTargetDesc/CMakeLists.txt
+lib/Target/RISCV/MCTargetDesc/LLVMBuild.txt
+lib/Target/RISCV/TargetInfo/CMakeLists.txt
+lib/Target/RISCV/TargetInfo/LLVMBuild.txt
+lib/Target/RISCV/Utils/CMakeLists.txt
+lib/Target/RISCV/Utils/LLVMBuild.txt
 lib/Target/Sparc/AsmParser/CMakeLists.txt
 lib/Target/Sparc/AsmParser/LLVMBuild.txt
-lib/Target/Sparc/AsmParser/Makefile
 lib/Target/Sparc/CMakeLists.txt
 lib/Target/Sparc/Disassembler/CMakeLists.txt
 lib/Target/Sparc/Disassembler/LLVMBuild.txt
-lib/Target/Sparc/Disassembler/Makefile
 lib/Target/Sparc/InstPrinter/CMakeLists.txt
 lib/Target/Sparc/InstPrinter/LLVMBuild.txt
-lib/Target/Sparc/InstPrinter/Makefile
 lib/Target/Sparc/LLVMBuild.txt
 lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
 lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
-lib/Target/Sparc/MCTargetDesc/Makefile
-lib/Target/Sparc/Makefile
 lib/Target/Sparc/README.txt
 lib/Target/Sparc/TargetInfo/CMakeLists.txt
 lib/Target/Sparc/TargetInfo/LLVMBuild.txt
-lib/Target/Sparc/TargetInfo/Makefile
 lib/Target/SystemZ/AsmParser/CMakeLists.txt
 lib/Target/SystemZ/AsmParser/LLVMBuild.txt
-lib/Target/SystemZ/AsmParser/Makefile
 lib/Target/SystemZ/CMakeLists.txt
 lib/Target/SystemZ/Disassembler/CMakeLists.txt
 lib/Target/SystemZ/Disassembler/LLVMBuild.txt
-lib/Target/SystemZ/Disassembler/Makefile
 lib/Target/SystemZ/InstPrinter/CMakeLists.txt
 lib/Target/SystemZ/InstPrinter/LLVMBuild.txt
-lib/Target/SystemZ/InstPrinter/Makefile
 lib/Target/SystemZ/LLVMBuild.txt
 lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
 lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
-lib/Target/SystemZ/MCTargetDesc/Makefile
-lib/Target/SystemZ/Makefile
 lib/Target/SystemZ/TargetInfo/CMakeLists.txt
 lib/Target/SystemZ/TargetInfo/LLVMBuild.txt
-lib/Target/SystemZ/TargetInfo/Makefile
+lib/Target/WebAssembly/AsmParser/CMakeLists.txt
+lib/Target/WebAssembly/AsmParser/LLVMBuild.txt
+lib/Target/WebAssembly/CMakeLists.txt
+lib/Target/WebAssembly/Disassembler/CMakeLists.txt
+lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
+lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
+lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
+lib/Target/WebAssembly/LLVMBuild.txt
+lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
+lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
+lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
+lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
 lib/Target/X86/AsmParser/CMakeLists.txt
 lib/Target/X86/AsmParser/LLVMBuild.txt
-lib/Target/X86/AsmParser/Makefile
 lib/Target/X86/CMakeLists.txt
 lib/Target/X86/Disassembler/CMakeLists.txt
 lib/Target/X86/Disassembler/LLVMBuild.txt
-lib/Target/X86/Disassembler/Makefile
 lib/Target/X86/InstPrinter/CMakeLists.txt
 lib/Target/X86/InstPrinter/LLVMBuild.txt
-lib/Target/X86/InstPrinter/Makefile
 lib/Target/X86/LLVMBuild.txt
 lib/Target/X86/MCTargetDesc/CMakeLists.txt
 lib/Target/X86/MCTargetDesc/LLVMBuild.txt
-lib/Target/X86/MCTargetDesc/Makefile
-lib/Target/X86/Makefile
 lib/Target/X86/README-FPStack.txt
-lib/Target/X86/README-MMX.txt
 lib/Target/X86/README-SSE.txt
-lib/Target/X86/README-UNIMPLEMENTED.txt
 lib/Target/X86/README-X86-64.txt
 lib/Target/X86/README.txt
 lib/Target/X86/TargetInfo/CMakeLists.txt
 lib/Target/X86/TargetInfo/LLVMBuild.txt
-lib/Target/X86/TargetInfo/Makefile
 lib/Target/X86/Utils/CMakeLists.txt
 lib/Target/X86/Utils/LLVMBuild.txt
-lib/Target/X86/Utils/Makefile
-lib/Target/X86/X86CompilationCallback_Win64.asm
 lib/Target/XCore/CMakeLists.txt
 lib/Target/XCore/Disassembler/CMakeLists.txt
 lib/Target/XCore/Disassembler/LLVMBuild.txt
-lib/Target/XCore/Disassembler/Makefile
 lib/Target/XCore/InstPrinter/CMakeLists.txt
 lib/Target/XCore/InstPrinter/LLVMBuild.txt
-lib/Target/XCore/InstPrinter/Makefile
 lib/Target/XCore/LLVMBuild.txt
 lib/Target/XCore/MCTargetDesc/CMakeLists.txt
 lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
-lib/Target/XCore/MCTargetDesc/Makefile
-lib/Target/XCore/Makefile
 lib/Target/XCore/README.txt
 lib/Target/XCore/TargetInfo/CMakeLists.txt
 lib/Target/XCore/TargetInfo/LLVMBuild.txt
-lib/Target/XCore/TargetInfo/Makefile
+lib/Testing/CMakeLists.txt
+lib/Testing/LLVMBuild.txt
+lib/Testing/Support/CMakeLists.txt
+lib/Testing/Support/LLVMBuild.txt
+lib/TextAPI/CMakeLists.txt
+lib/TextAPI/LLVMBuild.txt
+lib/ToolDrivers/CMakeLists.txt
+lib/ToolDrivers/LLVMBuild.txt
+lib/ToolDrivers/llvm-dlltool/CMakeLists.txt
+lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt
+lib/ToolDrivers/llvm-lib/CMakeLists.txt
+lib/ToolDrivers/llvm-lib/LLVMBuild.txt
+lib/Transforms/AggressiveInstCombine/CMakeLists.txt
+lib/Transforms/AggressiveInstCombine/LLVMBuild.txt
 lib/Transforms/CMakeLists.txt
+lib/Transforms/Coroutines/CMakeLists.txt
+lib/Transforms/Coroutines/LLVMBuild.txt
 lib/Transforms/Hello/
 lib/Transforms/IPO/CMakeLists.txt
 lib/Transforms/IPO/LLVMBuild.txt
-lib/Transforms/IPO/Makefile
 lib/Transforms/InstCombine/CMakeLists.txt
 lib/Transforms/InstCombine/LLVMBuild.txt
-lib/Transforms/InstCombine/Makefile
 lib/Transforms/Instrumentation/CMakeLists.txt
 lib/Transforms/Instrumentation/LLVMBuild.txt
-lib/Transforms/Instrumentation/Makefile
 lib/Transforms/LLVMBuild.txt
-lib/Transforms/Makefile
 lib/Transforms/ObjCARC/CMakeLists.txt
 lib/Transforms/ObjCARC/LLVMBuild.txt
-lib/Transforms/ObjCARC/Makefile
 lib/Transforms/Scalar/CMakeLists.txt
 lib/Transforms/Scalar/LLVMBuild.txt
-lib/Transforms/Scalar/Makefile
 lib/Transforms/Utils/CMakeLists.txt
 lib/Transforms/Utils/LLVMBuild.txt
-lib/Transforms/Utils/Makefile
 lib/Transforms/Vectorize/CMakeLists.txt
 lib/Transforms/Vectorize/LLVMBuild.txt
-lib/Transforms/Vectorize/Makefile
+lib/WindowsManifest/CMakeLists.txt
+lib/WindowsManifest/LLVMBuild.txt
+lib/XRay/CMakeLists.txt
+lib/XRay/LLVMBuild.txt
 llvm.spec.in
-projects/
+projects/CMakeLists.txt
+projects/LLVMBuild.txt
+resources/
+runtimes/
 test/
 tools/CMakeLists.txt
 tools/LLVMBuild.txt
-tools/Makefile
+tools/bugpoint-passes/
 tools/bugpoint/CMakeLists.txt
 tools/bugpoint/LLVMBuild.txt
-tools/bugpoint/Makefile
-tools/bugpoint-passes/
 tools/dsymutil/
 tools/gold/
 tools/llc/CMakeLists.txt
 tools/llc/LLVMBuild.txt
-tools/llc/Makefile
 tools/lli/CMakeLists.txt
 tools/lli/ChildTarget/CMakeLists.txt
 tools/lli/ChildTarget/LLVMBuild.txt
-tools/lli/ChildTarget/Makefile
 tools/lli/LLVMBuild.txt
-tools/lli/Makefile
 tools/llvm-ar/CMakeLists.txt
 tools/llvm-ar/LLVMBuild.txt
-tools/llvm-ar/Makefile
-tools/llvm-ar/install_symlink.cmake
+tools/llvm-as-fuzzer/
+tools/llvm-as-parasitic-coverage-repro/
 tools/llvm-as/CMakeLists.txt
 tools/llvm-as/LLVMBuild.txt
-tools/llvm-as/Makefile
 tools/llvm-bcanalyzer/CMakeLists.txt
 tools/llvm-bcanalyzer/LLVMBuild.txt
-tools/llvm-bcanalyzer/Makefile
 tools/llvm-c-test/
+tools/llvm-cat/
+tools/llvm-cfi-verify/
 tools/llvm-config/
 tools/llvm-cov/CMakeLists.txt
 tools/llvm-cov/LLVMBuild.txt
-tools/llvm-cov/Makefile
+tools/llvm-cvtres/
 tools/llvm-cxxdump/CMakeLists.txt
 tools/llvm-cxxdump/LLVMBuild.txt
-tools/llvm-cxxdump/Makefile
+tools/llvm-cxxfilt/CMakeLists.txt
+tools/llvm-cxxmap/CMakeLists.txt
+tools/llvm-cxxmap/LLVMBuild.txt
 tools/llvm-diff/CMakeLists.txt
 tools/llvm-diff/LLVMBuild.txt
-tools/llvm-diff/Makefile
 tools/llvm-dis/CMakeLists.txt
 tools/llvm-dis/LLVMBuild.txt
-tools/llvm-dis/Makefile
 tools/llvm-dwarfdump/CMakeLists.txt
 tools/llvm-dwarfdump/LLVMBuild.txt
-tools/llvm-dwarfdump/Makefile
+tools/llvm-dwarfdump/fuzzer/
+tools/llvm-dwp/
+tools/llvm-elfabi/
+tools/llvm-exegesis/
 tools/llvm-extract/CMakeLists.txt
 tools/llvm-extract/LLVMBuild.txt
-tools/llvm-extract/Makefile
 tools/llvm-go/
+tools/llvm-isel-fuzzer/
+tools/llvm-itanium-demangle-fuzzer/
 tools/llvm-jitlistener/
 tools/llvm-link/CMakeLists.txt
 tools/llvm-link/LLVMBuild.txt
-tools/llvm-link/Makefile
 tools/llvm-lto/CMakeLists.txt
 tools/llvm-lto/LLVMBuild.txt
-tools/llvm-lto/Makefile
+tools/llvm-lto2/CMakeLists.txt
+tools/llvm-lto2/LLVMBuild.txt
+tools/llvm-mc-assemble-fuzzer/
+tools/llvm-mc-disassemble-fuzzer/
 tools/llvm-mc/CMakeLists.txt
 tools/llvm-mc/LLVMBuild.txt
-tools/llvm-mc/Makefile
+tools/llvm-mca/CMakeLists.txt
+tools/llvm-mca/LLVMBuild.txt
 tools/llvm-mcmarkup/
+tools/llvm-microsoft-demangle-fuzzer/
+tools/llvm-modextract/CMakeLists.txt
+tools/llvm-modextract/LLVMBuild.txt
+tools/llvm-mt/
 tools/llvm-nm/CMakeLists.txt
 tools/llvm-nm/LLVMBuild.txt
-tools/llvm-nm/Makefile
+tools/llvm-objcopy/CMakeLists.txt
+tools/llvm-objcopy/LLVMBuild.txt
 tools/llvm-objdump/CMakeLists.txt
 tools/llvm-objdump/LLVMBuild.txt
-tools/llvm-objdump/Makefile
-tools/llvm-pdbdump/CMakeLists.txt
-tools/llvm-pdbdump/LLVMBuild.txt
-tools/llvm-pdbdump/Makefile
+tools/llvm-opt-fuzzer/
+tools/llvm-opt-report/
+tools/llvm-pdbutil/CMakeLists.txt
+tools/llvm-pdbutil/LLVMBuild.txt
+tools/llvm-pdbutil/fuzzer/
 tools/llvm-profdata/CMakeLists.txt
 tools/llvm-profdata/LLVMBuild.txt
-tools/llvm-profdata/Makefile
+tools/llvm-rc/
 tools/llvm-readobj/CMakeLists.txt
 tools/llvm-readobj/LLVMBuild.txt
-tools/llvm-readobj/Makefile
 tools/llvm-rtdyld/CMakeLists.txt
 tools/llvm-rtdyld/LLVMBuild.txt
-tools/llvm-rtdyld/Makefile
 tools/llvm-shlib/
 tools/llvm-size/
+tools/llvm-special-case-list-fuzzer/
+tools/llvm-split/
 tools/llvm-stress/CMakeLists.txt
 tools/llvm-stress/LLVMBuild.txt
-tools/llvm-stress/Makefile
+tools/llvm-strings/
 tools/llvm-symbolizer/CMakeLists.txt
-tools/llvm-symbolizer/Makefile
+tools/llvm-undname/
 tools/llvm-vtabledump/
+tools/llvm-xray/CMakeLists.txt
+tools/llvm-yaml-numeric-parser-fuzzer/
 tools/lto/
-tools/macho-dump/CMakeLists.txt
-tools/macho-dump/LLVMBuild.txt
-tools/macho-dump/Makefile
 tools/msbuild/
 tools/obj2yaml/
+tools/opt-remarks/
+tools/opt-viewer/
 tools/opt/CMakeLists.txt
 tools/opt/LLVMBuild.txt
-tools/opt/Makefile
+tools/sancov/
+tools/sanstats/
 tools/verify-uselistorder/
+tools/xcode-toolchain/
 tools/yaml2obj/
 unittests/
 utils/DSAclean.py
@@ -498,33 +508,47 @@ utils/GetRepositoryPath
 utils/GetSourceVersion
 utils/KillTheDoctor/
 utils/LLVMBuild.txt
-utils/Makefile
+utils/LLVMVisualizers/
 utils/Misc/
 utils/PerfectShuffle/
+utils/Reviewing/
 utils/TableGen/CMakeLists.txt
 utils/TableGen/LLVMBuild.txt
-utils/TableGen/Makefile
 utils/TableGen/tdtags
 utils/Target/
 utils/UpdateCMakeLists.pl
+utils/UpdateTestChecks/
+utils/abtest/
+utils/abtest.py
+utils/benchmark/
 utils/bisect
+utils/bisect-skip-count
 utils/bugpoint/
+utils/bugpoint_gisel_reducer.py
 utils/buildit/
 utils/check-each-file
 utils/clang-parse-diagnostics-file
 utils/codegen-diff
+utils/collect_and_build_with_pgo.py
 utils/count/
 utils/countloc.sh
 utils/create_ladder_graph.py
 utils/crosstool/
+utils/demangle_tree.py
+utils/docker/
 utils/emacs/
+utils/extract_symbols.py
+utils/extract_vplan.py
 utils/findmisopt
 utils/findoptdiff
 utils/findsym.pl
 utils/fpcmp/
+utils/gdb-scripts/
 utils/getsrcs.sh
 utils/git/
 utils/git-svn/
+utils/gn/
+utils/indirect_calls.py
 utils/jedit/
 utils/kate/
 utils/lint/
@@ -532,23 +556,33 @@ utils/lit/
 utils/lldbDataFormatters.py
 utils/llvm-build/
 utils/llvm-compilers-check
+utils/llvm-gisel-cov.py
 utils/llvm-lit/
 utils/llvm-native-gxx
 utils/llvm.grm
-utils/llvm.natvis
 utils/llvmdo
 utils/llvmgrep
-utils/makellvm
 utils/not/
+utils/opt-viewer/
+utils/prepare-code-coverage-artifact.py
 utils/release/
+utils/sanitizers/
+utils/schedcover.py
 utils/shuffle_fuzz.py
+utils/shuffle_select_fuzz_tester.py
 utils/sort_includes.py
-utils/test_debuginfo.pl
 utils/testgen/
 utils/textmate/
+utils/unicode-case-fold.py
 utils/unittest/
+utils/update_analyze_test_checks.py
+utils/update_cc_test_checks.py
 utils/update_llc_test_checks.py
+utils/update_mca_test_checks.py
+utils/update_mir_test_checks.py
+utils/update_test_checks.py
 utils/valgrind/
 utils/vim/
+utils/vscode/
 utils/wciia.py
 utils/yaml-bench/
diff --git a/contrib/llvm/LICENSE.TXT b/contrib/llvm/LICENSE.TXT
index 461398bab7a7..e4d67d16fea1 100644
--- a/contrib/llvm/LICENSE.TXT
+++ b/contrib/llvm/LICENSE.TXT
@@ -4,7 +4,7 @@ LLVM Release License
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign.
 All rights reserved.
 
 Developed by:
diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h
index 6792219f8730..06de058bdc58 100644
--- a/contrib/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm/include/llvm-c/Core.h
@@ -54,6 +54,8 @@ extern "C" {
  * @{
  */
 
+/// External users depend on the following values being stable. It is not safe
+/// to reorder them.
 typedef enum {
   /* Terminator Instructions */
   LLVMRet            = 1,
@@ -64,6 +66,9 @@ typedef enum {
   /* removed 6 due to API changes */
   LLVMUnreachable    = 7,
 
+  /* Standard Unary Operators */
+  LLVMFNeg           = 66,
+
   /* Standard Binary Operators */
   LLVMAdd            = 8,
   LLVMFAdd           = 9,
@@ -516,6 +521,23 @@ void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback,
                                  void *OpaqueHandle);
 
 /**
+ * Retrieve whether the given context is set to discard all value names.
+ *
+ * @see LLVMContext::shouldDiscardValueNames()
+ */
+LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C);
+
+/**
+ * Set whether the given context discards all value names.
+ *
+ * If true, only the names of GlobalValue objects will be available in the IR.
+ * This can be used to save memory and runtime, especially in release mode.
+ *
+ * @see LLVMContext::setDiscardValueNames()
+ */
+void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard);
+
+/**
  * Destroy a context instance.
  *
  * This should be called for every call to LLVMContextCreate() or memory
@@ -843,6 +865,63 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
 LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
 
 /**
+ * Obtain an iterator to the first NamedMDNode in a Module.
+ *
+ * @see llvm::Module::named_metadata_begin()
+ */
+LLVMNamedMDNodeRef LLVMGetFirstNamedMetadata(LLVMModuleRef M);
+
+/**
+ * Obtain an iterator to the last NamedMDNode in a Module.
+ *
+ * @see llvm::Module::named_metadata_end()
+ */
+LLVMNamedMDNodeRef LLVMGetLastNamedMetadata(LLVMModuleRef M);
+
+/**
+ * Advance a NamedMDNode iterator to the next NamedMDNode.
+ *
+ * Returns NULL if the iterator was already at the end and there are no more
+ * named metadata nodes.
+ */
+LLVMNamedMDNodeRef LLVMGetNextNamedMetadata(LLVMNamedMDNodeRef NamedMDNode);
+
+/**
+ * Decrement a NamedMDNode iterator to the previous NamedMDNode.
+ *
+ * Returns NULL if the iterator was already at the beginning and there are
+ * no previous named metadata nodes.
+ */
+LLVMNamedMDNodeRef LLVMGetPreviousNamedMetadata(LLVMNamedMDNodeRef NamedMDNode);
+
+/**
+ * Retrieve a NamedMDNode with the given name, returning NULL if no such
+ * node exists.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ */
+LLVMNamedMDNodeRef LLVMGetNamedMetadata(LLVMModuleRef M,
+                                        const char *Name, size_t NameLen);
+
+/**
+ * Retrieve a NamedMDNode with the given name, creating a new node if no such
+ * node exists.
+ *
+ * @see llvm::Module::getOrInsertNamedMetadata()
+ */
+LLVMNamedMDNodeRef LLVMGetOrInsertNamedMetadata(LLVMModuleRef M,
+                                                const char *Name,
+                                                size_t NameLen);
+
+/**
+ * Retrieve the name of a NamedMDNode.
+ *
+ * @see llvm::NamedMDNode::getName()
+ */
+const char *LLVMGetNamedMetadataName(LLVMNamedMDNodeRef NamedMD,
+                                     size_t *NameLen);
+
+/**
  * Obtain the number of operands for named metadata in a module.
  *
  * @see llvm::Module::getNamedMetadata()
@@ -873,6 +952,44 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name,
                                  LLVMValueRef Val);
 
 /**
+ * Return the directory of the debug location for this value, which must be
+ * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ * @see llvm::GlobalVariable::getDebugInfo()
+ * @see llvm::Function::getSubprogram()
+ */
+const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length);
+
+/**
+ * Return the filename of the debug location for this value, which must be
+ * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ * @see llvm::GlobalVariable::getDebugInfo()
+ * @see llvm::Function::getSubprogram()
+ */
+const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length);
+
+/**
+ * Return the line number of the debug location for this value, which must be
+ * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ * @see llvm::GlobalVariable::getDebugInfo()
+ * @see llvm::Function::getSubprogram()
+ */
+unsigned LLVMGetDebugLocLine(LLVMValueRef Val);
+
+/**
+ * Return the column number of the debug location for this value, which must be
+ * an llvm::Instruction.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ */
+unsigned LLVMGetDebugLocColumn(LLVMValueRef Val);
+
+/**
  * Add a function to a module under a specified name.
  *
  * @see llvm::Function::Create()
@@ -1222,6 +1339,13 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
 LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
 
 /**
+ * Determine whether a structure is literal.
+ *
+ * @see llvm::StructType::isLiteral()
+ */
+LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy);
+
+/**
  * @}
  */
 
@@ -1408,6 +1532,7 @@ LLVMTypeRef LLVMX86MMXType(void);
       macro(ConstantVector)                 \
       macro(GlobalValue)                    \
         macro(GlobalAlias)                  \
+        macro(GlobalIFunc)                  \
         macro(GlobalObject)                 \
           macro(Function)                   \
           macro(GlobalVariable)             \
@@ -1417,7 +1542,9 @@ LLVMTypeRef LLVMX86MMXType(void);
       macro(CallInst)                       \
         macro(IntrinsicInst)                \
           macro(DbgInfoIntrinsic)           \
-            macro(DbgDeclareInst)           \
+            macro(DbgVariableIntrinsic)     \
+              macro(DbgDeclareInst)         \
+            macro(DbgLabelInst)             \
           macro(MemIntrinsic)               \
             macro(MemCpyInst)               \
             macro(MemMoveInst)              \
@@ -1434,16 +1561,15 @@ LLVMTypeRef LLVMX86MMXType(void);
       macro(SelectInst)                     \
       macro(ShuffleVectorInst)              \
       macro(StoreInst)                      \
-      macro(TerminatorInst)                 \
-        macro(BranchInst)                   \
-        macro(IndirectBrInst)               \
-        macro(InvokeInst)                   \
-        macro(ReturnInst)                   \
-        macro(SwitchInst)                   \
-        macro(UnreachableInst)              \
-        macro(ResumeInst)                   \
-        macro(CleanupReturnInst)            \
-        macro(CatchReturnInst)              \
+      macro(BranchInst)                     \
+      macro(IndirectBrInst)                 \
+      macro(InvokeInst)                     \
+      macro(ReturnInst)                     \
+      macro(SwitchInst)                     \
+      macro(UnreachableInst)                \
+      macro(ResumeInst)                     \
+      macro(CleanupReturnInst)              \
+      macro(CatchReturnInst)                \
       macro(FuncletPadInst)                 \
         macro(CatchPadInst)                 \
         macro(CleanupPadInst)               \
@@ -1959,9 +2085,14 @@ LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices);
+LLVMValueRef LLVMConstGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
+                           LLVMValueRef *ConstantIndices, unsigned NumIndices);
 LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
                                   LLVMValueRef *ConstantIndices,
                                   unsigned NumIndices);
+LLVMValueRef LLVMConstInBoundsGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal,
+                                   LLVMValueRef *ConstantIndices,
+                                   unsigned NumIndices);
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
 LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
@@ -2037,6 +2168,14 @@ void LLVMSetDLLStorageClass(LLVMValueRef Global, LLVMDLLStorageClass Class);
 LLVMUnnamedAddr LLVMGetUnnamedAddress(LLVMValueRef Global);
 void LLVMSetUnnamedAddress(LLVMValueRef Global, LLVMUnnamedAddr UnnamedAddr);
 
+/**
+ * Returns the "value type" of a global value.  This differs from the formal
+ * type of a global value which is always a pointer type.
+ *
+ * @see llvm::GlobalValue::getValueType()
+ */
+LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global);
+
 /** Deprecated: Use LLVMGetUnnamedAddress instead. */
 LLVMBool LLVMHasUnnamedAddr(LLVMValueRef Global);
 /** Deprecated: Use LLVMSetUnnamedAddress instead. */
@@ -2068,6 +2207,58 @@ unsigned LLVMGetAlignment(LLVMValueRef V);
 void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes);
 
 /**
+ * Sets a metadata attachment, erasing the existing metadata attachment if
+ * it already exists for the given kind.
+ *
+ * @see llvm::GlobalObject::setMetadata()
+ */
+void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind,
+                           LLVMMetadataRef MD);
+
+/**
+ * Erases a metadata attachment of the given kind if it exists.
+ *
+ * @see llvm::GlobalObject::eraseMetadata()
+ */
+void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind);
+
+/**
+ * Removes all metadata attachments from this value.
+ *
+ * @see llvm::GlobalObject::clearMetadata()
+ */
+void LLVMGlobalClearMetadata(LLVMValueRef Global);
+
+/**
+ * Retrieves an array of metadata entries representing the metadata attached to
+ * this value. The caller is responsible for freeing this array by calling
+ * \c LLVMDisposeValueMetadataEntries.
+ *
+ * @see llvm::GlobalObject::getAllMetadata()
+ */
+LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value,
+                                                  size_t *NumEntries);
+
+/**
+ * Destroys value metadata entries.
+ */
+void LLVMDisposeValueMetadataEntries(LLVMValueMetadataEntry *Entries);
+
+/**
+ * Returns the kind of a value metadata entry at a specific index.
+ */
+unsigned LLVMValueMetadataEntriesGetKind(LLVMValueMetadataEntry *Entries,
+                                         unsigned Index);
+
+/**
+ * Returns the underlying metadata node of a value metadata entry at a
+ * specific index.
+ */
+LLVMMetadataRef
+LLVMValueMetadataEntriesGetMetadata(LLVMValueMetadataEntry *Entries,
+                                    unsigned Index);
+
+/**
  * @}
  */
 
@@ -2218,6 +2409,54 @@ void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn);
 unsigned LLVMGetIntrinsicID(LLVMValueRef Fn);
 
 /**
+ * Create or insert the declaration of an intrinsic.  For overloaded intrinsics,
+ * parameter types must be provided to uniquely identify an overload.
+ *
+ * @see llvm::Intrinsic::getDeclaration()
+ */
+LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod,
+                                         unsigned ID,
+                                         LLVMTypeRef *ParamTypes,
+                                         size_t ParamCount);
+
+/**
+ * Retrieves the type of an intrinsic.  For overloaded intrinsics, parameter
+ * types must be provided to uniquely identify an overload.
+ *
+ * @see llvm::Intrinsic::getType()
+ */
+LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
+                                 LLVMTypeRef *ParamTypes, size_t ParamCount);
+
+/**
+ * Retrieves the name of an intrinsic.
+ *
+ * @see llvm::Intrinsic::getName()
+ */
+const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength);
+
+/**
+ * Copies the name of an overloaded intrinsic identified by a given list of
+ * parameter types.
+ *
+ * Unlike LLVMIntrinsicGetName, the caller is responsible for freeing the
+ * returned string.
+ *
+ * @see llvm::Intrinsic::getName()
+ */
+const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
+                                            LLVMTypeRef *ParamTypes,
+                                            size_t ParamCount,
+                                            size_t *NameLength);
+
+/**
+ * Obtain if the intrinsic identified by the given ID is overloaded.
+ *
+ * @see llvm::Intrinsic::isOverloaded()
+ */
+LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID);
+
+/**
  * Obtain the calling function of a function.
  *
  * The returned value corresponds to the LLVMCallConv enumeration.
@@ -2514,7 +2753,7 @@ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB);
  * If the basic block does not have a terminator (it is not well-formed
  * if it doesn't), then NULL is returned.
  *
- * The returned LLVMValueRef corresponds to a llvm::TerminatorInst.
+ * The returned LLVMValueRef corresponds to an llvm::Instruction.
  *
  * @see llvm::BasicBlock::getTerminator()
  */
@@ -2573,6 +2812,14 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
 LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
 
 /**
+ * Create a new basic block without inserting it into a function.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
+LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C,
+                                                const char *Name);
+
+/**
  * Append a basic block to the end of a function.
  *
  * @see llvm::BasicBlock::Create()
@@ -2695,6 +2942,16 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
 void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
 
 /**
+ * Returns the metadata associated with an instruction value, but filters out
+ * all the debug locations.
+ *
+ * @see llvm::Instruction::getAllMetadataOtherThanDebugLoc()
+ */
+LLVMValueMetadataEntry *
+LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Instr,
+                                               size_t *NumEntries);
+
+/**
  * Obtain the basic block to which an instruction belongs.
  *
  * @see llvm::Instruction::getParent()
@@ -2777,6 +3034,15 @@ LLVMRealPredicate LLVMGetFCmpPredicate(LLVMValueRef Inst);
 LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst);
 
 /**
+ * Determine whether an instruction is a terminator. This routine is named to
+ * be compatible with historical functions that did this by querying the
+ * underlying C++ type.
+ *
+ * @see llvm::Instruction::isTerminator()
+ */
+LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst);
+
+/**
  * @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations
  *
  * Functions in this group apply to instructions that refer to call
@@ -2839,6 +3105,13 @@ void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
                                        const char *K, unsigned KLen);
 
 /**
+ * Obtain the function type called by this instruction.
+ *
+ * @see llvm::CallBase::getFunctionType()
+ */
+LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef C);
+
+/**
  * Obtain the pointer to the function invoked by this instruction.
  *
  * This expects an LLVMValueRef that corresponds to a llvm::CallInst or
@@ -2916,8 +3189,8 @@ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B);
 /**
  * @defgroup LLVMCCoreValueInstructionTerminator Terminators
  *
- * Functions in this group only apply to instructions that map to
- * llvm::TerminatorInst instances.
+ * Functions in this group only apply to instructions for which
+ * LLVMIsATerminatorInst returns true.
  *
  * @{
  */
@@ -2925,21 +3198,21 @@ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B);
 /**
  * Return the number of successors that this terminator has.
  *
- * @see llvm::TerminatorInst::getNumSuccessors
+ * @see llvm::Instruction::getNumSuccessors
  */
 unsigned LLVMGetNumSuccessors(LLVMValueRef Term);
 
 /**
  * Return the specified successor.
  *
- * @see llvm::TerminatorInst::getSuccessor
+ * @see llvm::Instruction::getSuccessor
  */
 LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i);
 
 /**
  * Update the specified successor to point at the provided block.
  *
- * @see llvm::TerminatorInst::setSuccessor
+ * @see llvm::Instruction::setSuccessor
  */
 void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block);
 
@@ -3130,10 +3403,16 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef, LLVMValueRef V,
                              LLVMBasicBlockRef Else, unsigned NumCases);
 LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
                                  unsigned NumDests);
+// LLVMBuildInvoke is deprecated in favor of LLVMBuildInvoke2, in preparation
+// for opaque pointer types.
 LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn,
                              LLVMValueRef *Args, unsigned NumArgs,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                              const char *Name);
+LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Fn,
+                              LLVMValueRef *Args, unsigned NumArgs,
+                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+                              const char *Name);
 LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef);
 
 /* Exception Handling */
@@ -3290,13 +3569,48 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name);
 LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
 LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef, LLVMTypeRef Ty,
                                   LLVMValueRef Val, const char *Name);
+
+/**
+ * Creates and inserts a memset to the specified pointer and the 
+ * specified value.
+ *
+ * @see llvm::IRRBuilder::CreateMemSet()
+ */
+LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr,
+                             LLVMValueRef Val, LLVMValueRef Len,
+                             unsigned Align);
+/**
+ * Creates and inserts a memcpy between the specified pointers.
+ *
+ * @see llvm::IRRBuilder::CreateMemCpy()
+ */
+LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B, 
+                             LLVMValueRef Dst, unsigned DstAlign,
+                             LLVMValueRef Src, unsigned SrcAlign,
+                             LLVMValueRef Size);
+/**
+ * Creates and inserts a memmove between the specified pointers.
+ *
+ * @see llvm::IRRBuilder::CreateMemMove()
+ */
+LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B, 
+                              LLVMValueRef Dst, unsigned DstAlign,
+                              LLVMValueRef Src, unsigned SrcAlign,
+                              LLVMValueRef Size);
+
 LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
 LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef, LLVMTypeRef Ty,
                                   LLVMValueRef Val, const char *Name);
 LLVMValueRef LLVMBuildFree(LLVMBuilderRef, LLVMValueRef PointerVal);
+// LLVMBuildLoad is deprecated in favor of LLVMBuildLoad2, in preparation for
+// opaque pointer types.
 LLVMValueRef LLVMBuildLoad(LLVMBuilderRef, LLVMValueRef PointerVal,
                            const char *Name);
+LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef, LLVMTypeRef Ty,
+                            LLVMValueRef PointerVal, const char *Name);
 LLVMValueRef LLVMBuildStore(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef Ptr);
+// LLVMBuildGEP, LLVMBuildInBoundsGEP, and LLVMBuildStructGEP are deprecated in
+// favor of LLVMBuild*GEP2, in preparation for opaque pointer types.
 LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                           LLVMValueRef *Indices, unsigned NumIndices,
                           const char *Name);
@@ -3305,6 +3619,15 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                   const char *Name);
 LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                 unsigned Idx, const char *Name);
+LLVMValueRef LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                           LLVMValueRef Pointer, LLVMValueRef *Indices,
+                           unsigned NumIndices, const char *Name);
+LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                   LLVMValueRef Pointer, LLVMValueRef *Indices,
+                                   unsigned NumIndices, const char *Name);
+LLVMValueRef LLVMBuildStructGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                 LLVMValueRef Pointer, unsigned Idx,
+                                 const char *Name);
 LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
                                    const char *Name);
 LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
@@ -3351,11 +3674,16 @@ LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
                            LLVMTypeRef DestTy, const char *Name);
 LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val,
                                   LLVMTypeRef DestTy, const char *Name);
-LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/
-                              LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildIntCast2(LLVMBuilderRef, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, LLVMBool IsSigned,
+                               const char *Name);
 LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val,
                              LLVMTypeRef DestTy, const char *Name);
 
+/** Deprecated: This cast is always signed. Use LLVMBuildIntCast2 instead. */
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/
+                              LLVMTypeRef DestTy, const char *Name);
+
 /* Comparisons */
 LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op,
                            LLVMValueRef LHS, LLVMValueRef RHS,
@@ -3366,9 +3694,14 @@ LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef, LLVMRealPredicate Op,
 
 /* Miscellaneous instructions */
 LLVMValueRef LLVMBuildPhi(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
+// LLVMBuildCall is deprecated in favor of LLVMBuildCall2, in preparation for
+// opaque pointer types.
 LLVMValueRef LLVMBuildCall(LLVMBuilderRef, LLVMValueRef Fn,
                            LLVMValueRef *Args, unsigned NumArgs,
                            const char *Name);
+LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn,
+                            LLVMValueRef *Args, unsigned NumArgs,
+                            const char *Name);
 LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If,
                              LLVMValueRef Then, LLVMValueRef Else,
                              const char *Name);
diff --git a/contrib/llvm/include/llvm-c/DebugInfo.h b/contrib/llvm/include/llvm-c/DebugInfo.h
index cee6755f1874..87a72034b0e8 100644
--- a/contrib/llvm/include/llvm-c/DebugInfo.h
+++ b/contrib/llvm/include/llvm-c/DebugInfo.h
@@ -54,9 +54,12 @@ typedef enum {
   LLVMDIFlagMainSubprogram = 1 << 21,
   LLVMDIFlagTypePassByValue = 1 << 22,
   LLVMDIFlagTypePassByReference = 1 << 23,
-  LLVMDIFlagFixedEnum = 1 << 24,
+  LLVMDIFlagEnumClass = 1 << 24,
+  LLVMDIFlagFixedEnum = LLVMDIFlagEnumClass, // Deprecated.
   LLVMDIFlagThunk = 1 << 25,
   LLVMDIFlagTrivial = 1 << 26,
+  LLVMDIFlagBigEndian = 1 << 27,
+  LLVMDIFlagLittleEndian = 1 << 28,
   LLVMDIFlagIndirectVirtualBase = (1 << 2) | (1 << 5),
   LLVMDIFlagAccessibility = LLVMDIFlagPrivate | LLVMDIFlagProtected |
                             LLVMDIFlagPublic,
@@ -125,6 +128,44 @@ typedef enum {
 } LLVMDWARFEmissionKind;
 
 /**
+ * The kind of metadata nodes.
+ */
+enum {
+  LLVMMDStringMetadataKind,
+  LLVMConstantAsMetadataMetadataKind,
+  LLVMLocalAsMetadataMetadataKind,
+  LLVMDistinctMDOperandPlaceholderMetadataKind,
+  LLVMMDTupleMetadataKind,
+  LLVMDILocationMetadataKind,
+  LLVMDIExpressionMetadataKind,
+  LLVMDIGlobalVariableExpressionMetadataKind,
+  LLVMGenericDINodeMetadataKind,
+  LLVMDISubrangeMetadataKind,
+  LLVMDIEnumeratorMetadataKind,
+  LLVMDIBasicTypeMetadataKind,
+  LLVMDIDerivedTypeMetadataKind,
+  LLVMDICompositeTypeMetadataKind,
+  LLVMDISubroutineTypeMetadataKind,
+  LLVMDIFileMetadataKind,
+  LLVMDICompileUnitMetadataKind,
+  LLVMDISubprogramMetadataKind,
+  LLVMDILexicalBlockMetadataKind,
+  LLVMDILexicalBlockFileMetadataKind,
+  LLVMDINamespaceMetadataKind,
+  LLVMDIModuleMetadataKind,
+  LLVMDITemplateTypeParameterMetadataKind,
+  LLVMDITemplateValueParameterMetadataKind,
+  LLVMDIGlobalVariableMetadataKind,
+  LLVMDILocalVariableMetadataKind,
+  LLVMDILabelMetadataKind,
+  LLVMDIObjCPropertyMetadataKind,
+  LLVMDIImportedEntityMetadataKind,
+  LLVMDIMacroMetadataKind,
+  LLVMDIMacroFileMetadataKind
+};
+typedef unsigned LLVMMetadataKind;
+
+/**
  * An LLVM DWARF type encoding.
  */
 typedef unsigned LLVMDWARFTypeEncoding;
@@ -531,11 +572,13 @@ LLVMDIBuilderCreateUnspecifiedType(LLVMDIBuilderRef Builder, const char *Name,
  * \param NameLen     Length of type name.
  * \param SizeInBits  Size of the type.
  * \param Encoding    DWARF encoding code, e.g. \c LLVMDWARFTypeEncoding_float.
+ * \param Flags       Flags to encode optional attribute like endianity
  */
 LLVMMetadataRef
 LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Builder, const char *Name,
                              size_t NameLen, uint64_t SizeInBits,
-                             LLVMDWARFTypeEncoding Encoding);
+                             LLVMDWARFTypeEncoding Encoding,
+                             LLVMDIFlags Flags);
 
 /**
  * Create debugging information entry for a pointer.
@@ -965,21 +1008,15 @@ LLVMDIBuilderCreateConstantValueExpression(LLVMDIBuilderRef Builder,
  * \param Expr        The location of the global relative to the attached
  *                    GlobalVariable.
  * \param Decl        Reference to the corresponding declaration.
+ *                    variables.
  * \param AlignInBits Variable alignment(or 0 if no alignment attr was
  *                    specified)
  */
-LLVMMetadataRef
-LLVMDIBuilderCreateGlobalVariableExpression(LLVMDIBuilderRef Builder,
-                                            LLVMMetadataRef Scope,
-                                            const char *Name, size_t NameLen,
-                                            const char *Linkage, size_t LinkLen,
-                                            LLVMMetadataRef File,
-                                            unsigned LineNo,
-                                            LLVMMetadataRef Ty,
-                                            LLVMBool LocalToUnit,
-                                            LLVMMetadataRef Expr,
-                                            LLVMMetadataRef Decl,
-                                            uint32_t AlignInBits);
+LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+    size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File,
+    unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+    LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits);
 /**
  * Create a new temporary \c MDNode.  Suitable for use in constructing cyclic
  * \c MDNode structures. A temporary \c MDNode is not uniqued, may be RAUW'd,
@@ -1025,17 +1062,11 @@ void LLVMMetadataReplaceAllUsesWith(LLVMMetadataRef TempTargetMetadata,
  * \param AlignInBits Variable alignment(or 0 if no alignment attr was
  *                    specified)
  */
-LLVMMetadataRef
-LLVMDIBuilderCreateTempGlobalVariableFwdDecl(LLVMDIBuilderRef Builder,
-                                             LLVMMetadataRef Scope,
-                                             const char *Name, size_t NameLen,
-                                             const char *Linkage, size_t LnkLen,
-                                             LLVMMetadataRef File,
-                                             unsigned LineNo,
-                                             LLVMMetadataRef Ty,
-                                             LLVMBool LocalToUnit,
-                                             LLVMMetadataRef Decl,
-                                             uint32_t AlignInBits);
+LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+    size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File,
+    unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+    LLVMMetadataRef Decl, uint32_t AlignInBits);
 
 /**
  * Insert a new llvm.dbg.declare intrinsic call before the given instruction.
@@ -1149,6 +1180,13 @@ LLVMMetadataRef LLVMGetSubprogram(LLVMValueRef Func);
  */
 void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP);
 
+/**
+ * Obtain the enumerated type of a Metadata instance.
+ *
+ * @see llvm::Metadata::getMetadataID()
+ */
+LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/contrib/llvm/include/llvm-c/Error.h b/contrib/llvm/include/llvm-c/Error.h
new file mode 100644
index 000000000000..71e84661222b
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Error.h
@@ -0,0 +1,69 @@
+/*===------- llvm-c/Error.h - llvm::Error class C Interface -------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file defines the C interface to LLVM's Error class.                   *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ERROR_H
+#define LLVM_C_ERROR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LLVMErrorSuccess 0
+
+/**
+ * Opaque reference to an error instance. Null serves as the 'success' value.
+ */
+typedef struct LLVMOpaqueError *LLVMErrorRef;
+
+/**
+ * Error type identifier.
+ */
+typedef const void *LLVMErrorTypeId;
+
+/**
+ * Returns the type id for the given error instance, which must be a failure
+ * value (i.e. non-null).
+ */
+LLVMErrorTypeId LLVMGetErrorTypeId(LLVMErrorRef Err);
+
+/**
+ * Dispose of the given error without handling it. This operation consumes the
+ * error, and the given LLVMErrorRef value is not usable once this call returns.
+ * Note: This method *only* needs to be called if the error is not being passed
+ * to some other consuming operation, e.g. LLVMGetErrorMessage.
+ */
+void LLVMConsumeError(LLVMErrorRef Err);
+
+/**
+ * Returns the given string's error message. This operation consumes the error,
+ * and the given LLVMErrorRef value is not usable once this call returns.
+ * The caller is responsible for disposing of the string by calling
+ * LLVMDisposeErrorMessage.
+ */
+char *LLVMGetErrorMessage(LLVMErrorRef Err);
+
+/**
+ * Dispose of the given error message.
+ */
+void LLVMDisposeErrorMessage(char *ErrMsg);
+
+/**
+ * Returns the type id for llvm StringError.
+ */
+LLVMErrorTypeId LLVMGetStringErrorTypeId();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/llvm/include/llvm-c/ExecutionEngine.h b/contrib/llvm/include/llvm-c/ExecutionEngine.h
index 49ae6fee45f0..e8ebef9ab15d 100644
--- a/contrib/llvm/include/llvm-c/ExecutionEngine.h
+++ b/contrib/llvm/include/llvm-c/ExecutionEngine.h
@@ -186,7 +186,7 @@ void LLVMDisposeMCJITMemoryManager(LLVMMCJITMemoryManagerRef MM);
 
 LLVMJITEventListenerRef LLVMCreateGDBRegistrationListener(void);
 LLVMJITEventListenerRef LLVMCreateIntelJITEventListener(void);
-LLVMJITEventListenerRef LLVMCreateOprofileJITEventListener(void);
+LLVMJITEventListenerRef LLVMCreateOProfileJITEventListener(void);
 LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void);
 
 /**
diff --git a/contrib/llvm/include/llvm-c/OptRemarks.h b/contrib/llvm/include/llvm-c/OptRemarks.h
new file mode 100644
index 000000000000..6a90394e711c
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/OptRemarks.h
@@ -0,0 +1,204 @@
+/*===-- llvm-c/OptRemarks.h - OptRemarks Public C Interface -------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header provides a public interface to an opt-remark library.          *|
+|* LLVM provides an implementation of this interface.                         *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_OPT_REMARKS_H
+#define LLVM_C_OPT_REMARKS_H
+
+#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
+#ifdef __cplusplus
+#include <cstddef>
+extern "C" {
+#else
+#include <stddef.h>
+#endif /* !defined(__cplusplus) */
+
+/**
+ * @defgroup LLVMCOPTREMARKS OptRemarks
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+#define OPT_REMARKS_API_VERSION 0
+
+/**
+ * String containing a buffer and a length. The buffer is not guaranteed to be
+ * zero-terminated.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+  const char *Str;
+  uint32_t Len;
+} LLVMOptRemarkStringRef;
+
+/**
+ * DebugLoc containing File, Line and Column.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+  // File:
+  LLVMOptRemarkStringRef SourceFile;
+  // Line:
+  uint32_t SourceLineNumber;
+  // Column:
+  uint32_t SourceColumnNumber;
+} LLVMOptRemarkDebugLoc;
+
+/**
+ * Element of the "Args" list. The key might give more information about what
+ * are the semantics of the value, e.g. "Callee" will tell you that the value
+ * is a symbol that names a function.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+  // e.g. "Callee"
+  LLVMOptRemarkStringRef Key;
+  // e.g. "malloc"
+  LLVMOptRemarkStringRef Value;
+
+  // "DebugLoc": Optional
+  LLVMOptRemarkDebugLoc DebugLoc;
+} LLVMOptRemarkArg;
+
+/**
+ * One remark entry.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+typedef struct {
+  // e.g. !Missed, !Passed
+  LLVMOptRemarkStringRef RemarkType;
+  // "Pass": Required
+  LLVMOptRemarkStringRef PassName;
+  // "Name": Required
+  LLVMOptRemarkStringRef RemarkName;
+  // "Function": Required
+  LLVMOptRemarkStringRef FunctionName;
+
+  // "DebugLoc": Optional
+  LLVMOptRemarkDebugLoc DebugLoc;
+  // "Hotness": Optional
+  uint32_t Hotness;
+  // "Args": Optional. It is an array of `num_args` elements.
+  uint32_t NumArgs;
+  LLVMOptRemarkArg *Args;
+} LLVMOptRemarkEntry;
+
+typedef struct LLVMOptRemarkOpaqueParser *LLVMOptRemarkParserRef;
+
+/**
+ * Creates a remark parser that can be used to read and parse the buffer located
+ * in \p Buf of size \p Size.
+ *
+ * \p Buf cannot be NULL.
+ *
+ * This function should be paired with LLVMOptRemarkParserDispose() to avoid
+ * leaking resources.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
+                                                        uint64_t Size);
+
+/**
+ * Returns the next remark in the file.
+ *
+ * The value pointed to by the return value is invalidated by the next call to
+ * LLVMOptRemarkParserGetNext().
+ *
+ * If the parser reaches the end of the buffer, the return value will be NULL.
+ *
+ * In the case of an error, the return value will be NULL, and:
+ *
+ * 1) LLVMOptRemarkParserHasError() will return `1`.
+ *
+ * 2) LLVMOptRemarkParserGetErrorMessage() will return a descriptive error
+ *    message.
+ *
+ * An error may occur if:
+ *
+ * 1) An argument is invalid.
+ *
+ * 2) There is a YAML parsing error. This type of error aborts parsing
+ *    immediately and returns `1`. It can occur on malformed YAML.
+ *
+ * 3) Remark parsing error. If this type of error occurs, the parser won't call
+ *    the handler and will continue to the next one. It can occur on malformed
+ *    remarks, like missing or extra fields in the file.
+ *
+ * Here is a quick example of the usage:
+ *
+ * ```
+ *  LLVMOptRemarkParserRef Parser = LLVMOptRemarkParserCreate(Buf, Size);
+ *  LLVMOptRemarkEntry *Remark = NULL;
+ *  while ((Remark == LLVMOptRemarkParserGetNext(Parser))) {
+ *    // use Remark
+ *  }
+ *  bool HasError = LLVMOptRemarkParserHasError(Parser);
+ *  LLVMOptRemarkParserDispose(Parser);
+ * ```
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern LLVMOptRemarkEntry *
+LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Returns `1` if the parser encountered an error while parsing the buffer.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Returns a null-terminated string containing an error message.
+ *
+ * In case of no error, the result is `NULL`.
+ *
+ * The memory of the string is bound to the lifetime of \p Parser. If
+ * LLVMOptRemarkParserDispose() is called, the memory of the string will be
+ * released.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern const char *
+LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Releases all the resources used by \p Parser.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser);
+
+/**
+ * Returns the version of the opt-remarks dylib.
+ *
+ * \since OPT_REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMOptRemarkVersion(void);
+
+/**
+ * @} // endgoup LLVMCOPTREMARKS
+ */
+
+#ifdef __cplusplus
+}
+#endif /* !defined(__cplusplus) */
+
+#endif /* LLVM_C_OPT_REMARKS_H */
diff --git a/contrib/llvm/include/llvm-c/OrcBindings.h b/contrib/llvm/include/llvm-c/OrcBindings.h
index 9497f0d40776..570db87fee94 100644
--- a/contrib/llvm/include/llvm-c/OrcBindings.h
+++ b/contrib/llvm/include/llvm-c/OrcBindings.h
@@ -22,6 +22,7 @@
 #ifndef LLVM_C_ORCBINDINGS_H
 #define LLVM_C_ORCBINDINGS_H
 
+#include "llvm-c/Error.h"
 #include "llvm-c/Object.h"
 #include "llvm-c/TargetMachine.h"
 
@@ -36,8 +37,6 @@ typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name, void *LookupCtx);
 typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack,
                                                  void *CallbackCtx);
 
-typedef enum { LLVMOrcErrSuccess = 0, LLVMOrcErrGeneric } LLVMOrcErrorCode;
-
 /**
  * Create an ORC JIT stack.
  *
@@ -72,43 +71,41 @@ void LLVMOrcDisposeMangledSymbol(char *MangledSymbol);
 /**
  * Create a lazy compile callback.
  */
-LLVMOrcErrorCode
-LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
-                                 LLVMOrcTargetAddress *RetAddr,
-                                 LLVMOrcLazyCompileCallbackFn Callback,
-                                 void *CallbackCtx);
+LLVMErrorRef LLVMOrcCreateLazyCompileCallback(
+    LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr,
+    LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx);
 
 /**
  * Create a named indirect call stub.
  */
-LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
-                                           const char *StubName,
-                                           LLVMOrcTargetAddress InitAddr);
+LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+                                       const char *StubName,
+                                       LLVMOrcTargetAddress InitAddr);
 
 /**
  * Set the pointer for the given indirect stub.
  */
-LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
-                                               const char *StubName,
-                                               LLVMOrcTargetAddress NewAddr);
+LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+                                           const char *StubName,
+                                           LLVMOrcTargetAddress NewAddr);
 
 /**
  * Add module to be eagerly compiled.
  */
-LLVMOrcErrorCode
-LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
-                            LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
-                            LLVMOrcSymbolResolverFn SymbolResolver,
-                            void *SymbolResolverCtx);
+LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
+                                         LLVMOrcModuleHandle *RetHandle,
+                                         LLVMModuleRef Mod,
+                                         LLVMOrcSymbolResolverFn SymbolResolver,
+                                         void *SymbolResolverCtx);
 
 /**
  * Add module to be lazily compiled one function at a time.
  */
-LLVMOrcErrorCode
-LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
-                           LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
-                           LLVMOrcSymbolResolverFn SymbolResolver,
-                           void *SymbolResolverCtx);
+LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
+                                        LLVMOrcModuleHandle *RetHandle,
+                                        LLVMModuleRef Mod,
+                                        LLVMOrcSymbolResolverFn SymbolResolver,
+                                        void *SymbolResolverCtx);
 
 /**
  * Add an object file.
@@ -118,11 +115,11 @@ LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
  * Clients should *not* dispose of the 'Obj' argument: the JIT will manage it
  * from this call onwards.
  */
-LLVMOrcErrorCode LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
-                                      LLVMOrcModuleHandle *RetHandle,
-                                      LLVMMemoryBufferRef Obj,
-                                      LLVMOrcSymbolResolverFn SymbolResolver,
-                                      void *SymbolResolverCtx);
+LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
+                                  LLVMOrcModuleHandle *RetHandle,
+                                  LLVMMemoryBufferRef Obj,
+                                  LLVMOrcSymbolResolverFn SymbolResolver,
+                                  void *SymbolResolverCtx);
 
 /**
  * Remove a module set from the JIT.
@@ -130,29 +127,29 @@ LLVMOrcErrorCode LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
  * This works for all modules that can be added via OrcAdd*, including object
  * files.
  */
-LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
-                                     LLVMOrcModuleHandle H);
+LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
+                                 LLVMOrcModuleHandle H);
 
 /**
  * Get symbol address from JIT instance.
  */
-LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
-                                         LLVMOrcTargetAddress *RetAddr,
-                                         const char *SymbolName);
+LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+                                     LLVMOrcTargetAddress *RetAddr,
+                                     const char *SymbolName);
 
 /**
  * Get symbol address from JIT instance, searching only the specified
  * handle.
  */
-LLVMOrcErrorCode LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
-                                           LLVMOrcTargetAddress *RetAddr,
-                                           LLVMOrcModuleHandle H,
-                                           const char *SymbolName);
+LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
+                                       LLVMOrcTargetAddress *RetAddr,
+                                       LLVMOrcModuleHandle H,
+                                       const char *SymbolName);
 
 /**
  * Dispose of an ORC JIT stack.
  */
-LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
+LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
 
 /**
  * Register a JIT Event Listener.
diff --git a/contrib/llvm/include/llvm-c/TargetMachine.h b/contrib/llvm/include/llvm-c/TargetMachine.h
index 7f672b5d10d6..c06e9edc9aaf 100644
--- a/contrib/llvm/include/llvm-c/TargetMachine.h
+++ b/contrib/llvm/include/llvm-c/TargetMachine.h
@@ -39,12 +39,16 @@ typedef enum {
     LLVMRelocDefault,
     LLVMRelocStatic,
     LLVMRelocPIC,
-    LLVMRelocDynamicNoPic
+    LLVMRelocDynamicNoPic,
+    LLVMRelocROPI,
+    LLVMRelocRWPI,
+    LLVMRelocROPI_RWPI
 } LLVMRelocMode;
 
 typedef enum {
     LLVMCodeModelDefault,
     LLVMCodeModelJITDefault,
+    LLVMCodeModelTiny,
     LLVMCodeModelSmall,
     LLVMCodeModelKernel,
     LLVMCodeModelMedium,
diff --git a/contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h b/contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
new file mode 100644
index 000000000000..8756a22e917a
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Transforms/AggressiveInstCombine.h
@@ -0,0 +1,43 @@
+/*===-- AggressiveInstCombine.h ---------------------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMAggressiveInstCombine.a,    *|
+|* which combines instructions to form fewer, simple IR instructions.         *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_AGGRESSIVEINSTCOMBINE_H
+#define LLVM_C_TRANSFORMS_AGGRESSIVEINSTCOMBINE_H
+
+#include "llvm-c/Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCTransformsAggressiveInstCombine Aggressive Instruction Combining transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
+/** See llvm::createAggressiveInstCombinerPass function. */
+void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
+
diff --git a/contrib/llvm/include/llvm-c/Transforms/Coroutines.h b/contrib/llvm/include/llvm-c/Transforms/Coroutines.h
new file mode 100644
index 000000000000..827e30fb2d7c
--- /dev/null
+++ b/contrib/llvm/include/llvm-c/Transforms/Coroutines.h
@@ -0,0 +1,55 @@
+/*===-- Coroutines.h - Coroutines Library C Interface -----------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMCoroutines.a, which         *|
+|* implements various scalar transformations of the LLVM IR.                  *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_COROUTINES_H
+#define LLVM_C_TRANSFORMS_COROUTINES_H
+
+#include "llvm-c/Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCTransformsCoroutines Coroutine transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
+/** See llvm::createCoroEarlyPass function. */
+void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM);
+
+/** See llvm::createCoroSplitPass function. */
+void LLVMAddCoroSplitPass(LLVMPassManagerRef PM);
+
+/** See llvm::createCoroElidePass function. */
+void LLVMAddCoroElidePass(LLVMPassManagerRef PM);
+
+/** See llvm::createCoroCleanupPass function. */
+void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
diff --git a/contrib/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
index f55cdce86be9..3c3bb4eb9b82 100644
--- a/contrib/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
@@ -35,9 +35,6 @@ extern "C" {
 /** See llvm::createAggressiveDCEPass function. */
 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
 
-/** See llvm::createAggressiveInstCombinerPass function. */
-void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM);
-
 /** See llvm::createBitTrackingDCEPass function. */
 void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
 
@@ -95,6 +92,9 @@ void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM);
 /** See llvm::createLoopUnswitchPass function. */
 void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM);
 
+/** See llvm::createLowerAtomicPass function. */
+void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM);
+
 /** See llvm::createMemCpyOptPass function. */
 void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM);
 
@@ -153,6 +153,9 @@ void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM);
 /** See llvm::createBasicAliasAnalysisPass function */
 void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM);
 
+/** See llvm::createUnifyFunctionExitNodesPass function */
+void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM);
+
 /**
  * @}
  */
diff --git a/contrib/llvm/include/llvm-c/Types.h b/contrib/llvm/include/llvm-c/Types.h
index 4a33542e86cc..ce1acf3e0421 100644
--- a/contrib/llvm/include/llvm-c/Types.h
+++ b/contrib/llvm/include/llvm-c/Types.h
@@ -90,6 +90,20 @@ typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
 typedef struct LLVMOpaqueMetadata *LLVMMetadataRef;
 
 /**
+ * Represents an LLVM Named Metadata Node.
+ *
+ * This models llvm::NamedMDNode.
+ */
+typedef struct LLVMOpaqueNamedMDNode *LLVMNamedMDNodeRef;
+
+/**
+ * Represents an entry in a Global Object's metadata attachments.
+ *
+ * This models std::pair<unsigned, MDNode *>
+ */
+typedef struct LLVMOpaqueValueMetadataEntry LLVMValueMetadataEntry;
+
+/**
  * Represents an LLVM basic block builder.
  *
  * This models llvm::IRBuilder.
diff --git a/contrib/llvm/include/llvm-c/lto.h b/contrib/llvm/include/llvm-c/lto.h
index 1acd610f70ac..090cd34af4e9 100644
--- a/contrib/llvm/include/llvm-c/lto.h
+++ b/contrib/llvm/include/llvm-c/lto.h
@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
  * @{
  */
 
-#define LTO_API_VERSION 22
+#define LTO_API_VERSION 23
 
 /**
  * \since prior to LTO_API_VERSION=3
@@ -828,6 +828,16 @@ extern void thinlto_codegen_set_cache_size_bytes(thinlto_code_gen_t cg,
                                                  unsigned max_size_bytes);
 
 /**
+ * Same as thinlto_codegen_set_cache_size_bytes, except the maximum size is in
+ * megabytes (2^20 bytes).
+ *
+ * \since LTO_API_VERSION=23
+ */
+extern void
+thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg,
+                                         unsigned max_size_megabytes);
+
+/**
  * Sets the maximum number of files in the cache directory. An unspecified
  * default value will be applied. A value of 0 will be ignored.
  *
diff --git a/contrib/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm/include/llvm/ADT/APFloat.h
index 5c59af4c04ba..c6fa5ad674f6 100644
--- a/contrib/llvm/include/llvm/ADT/APFloat.h
+++ b/contrib/llvm/include/llvm/ADT/APFloat.h
@@ -870,13 +870,13 @@ public:
   /// Factory for NaN values.
   ///
   /// \param Negative - True iff the NaN generated should be negative.
-  /// \param type - The unspecified fill bits for creating the NaN, 0 by
+  /// \param payload - The unspecified fill bits for creating the NaN, 0 by
   /// default.  The value is truncated as necessary.
   static APFloat getNaN(const fltSemantics &Sem, bool Negative = false,
-                        unsigned type = 0) {
-    if (type) {
-      APInt fill(64, type);
-      return getQNaN(Sem, Negative, &fill);
+                        uint64_t payload = 0) {
+    if (payload) {
+      APInt intPayload(64, payload);
+      return getQNaN(Sem, Negative, &intPayload);
     } else {
       return getQNaN(Sem, Negative, nullptr);
     }
@@ -1243,6 +1243,32 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) {
   return (A.compare(B) == APFloat::cmpLessThan) ? B : A;
 }
 
+/// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2
+/// arguments, propagating NaNs and treating -0 as less than +0.
+LLVM_READONLY
+inline APFloat minimum(const APFloat &A, const APFloat &B) {
+  if (A.isNaN())
+    return A;
+  if (B.isNaN())
+    return B;
+  if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+    return A.isNegative() ? A : B;
+  return (B.compare(A) == APFloat::cmpLessThan) ? B : A;
+}
+
+/// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2
+/// arguments, propagating NaNs and treating -0 as less than +0.
+LLVM_READONLY
+inline APFloat maximum(const APFloat &A, const APFloat &B) {
+  if (A.isNaN())
+    return A;
+  if (B.isNaN())
+    return B;
+  if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+    return A.isNegative() ? B : A;
+  return (A.compare(B) == APFloat::cmpLessThan) ? B : A;
+}
+
 } // namespace llvm
 
 #undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h
index 6bf6b22fb010..6e106ff8bf5d 100644
--- a/contrib/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm/include/llvm/ADT/APInt.h
@@ -31,6 +31,7 @@ class raw_ostream;
 
 template <typename T> class SmallVectorImpl;
 template <typename T> class ArrayRef;
+template <typename T> class Optional;
 
 class APInt;
 
@@ -84,7 +85,7 @@ public:
     UP,
   };
 
-  static const WordType WORD_MAX = ~WordType(0);
+  static const WordType WORDTYPE_MAX = ~WordType(0);
 
 private:
   /// This union is used to store the integer value. When the
@@ -149,7 +150,7 @@ private:
     unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
 
     // Mask out the high bits.
-    uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits);
+    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
     if (isSingleWord())
       U.VAL &= mask;
     else
@@ -394,7 +395,7 @@ public:
   /// This checks to see if the value has all bits of the APInt are set or not.
   bool isAllOnesValue() const {
     if (isSingleWord())
-      return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth);
+      return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
     return countTrailingOnesSlowCase() == BitWidth;
   }
 
@@ -495,7 +496,7 @@ public:
     assert(numBits != 0 && "numBits must be non-zero");
     assert(numBits <= BitWidth && "numBits out of range");
     if (isSingleWord())
-      return U.VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits));
+      return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
     unsigned Ones = countTrailingOnesSlowCase();
     return (numBits == Ones) &&
            ((Ones + countLeadingZerosSlowCase()) == BitWidth);
@@ -559,7 +560,7 @@ public:
   ///
   /// \returns the all-ones value for an APInt of the specified bit-width.
   static APInt getAllOnesValue(unsigned numBits) {
-    return APInt(numBits, WORD_MAX, true);
+    return APInt(numBits, WORDTYPE_MAX, true);
   }
 
   /// Get the '0' value.
@@ -1104,6 +1105,12 @@ public:
   APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
   APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
 
+  // Operations that saturate
+  APInt sadd_sat(const APInt &RHS) const;
+  APInt uadd_sat(const APInt &RHS) const;
+  APInt ssub_sat(const APInt &RHS) const;
+  APInt usub_sat(const APInt &RHS) const;
+
   /// Array-indexing support.
   ///
   /// \returns the bit value at bitPosition
@@ -1382,7 +1389,7 @@ public:
   /// Set every bit to 1.
   void setAllBits() {
     if (isSingleWord())
-      U.VAL = WORD_MAX;
+      U.VAL = WORDTYPE_MAX;
     else
       // Set all the bits in all the words.
       memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
@@ -1394,7 +1401,7 @@ public:
   ///
   /// Set the given bit to 1 whose position is given as "bitPosition".
   void setBit(unsigned BitPosition) {
-    assert(BitPosition <= BitWidth && "BitPosition out of range");
+    assert(BitPosition < BitWidth && "BitPosition out of range");
     WordType Mask = maskBit(BitPosition);
     if (isSingleWord())
       U.VAL |= Mask;
@@ -1415,7 +1422,7 @@ public:
     if (loBit == hiBit)
       return;
     if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
-      uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
+      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
       mask <<= loBit;
       if (isSingleWord())
         U.VAL |= mask;
@@ -1453,7 +1460,7 @@ public:
   ///
   /// Set the given bit to 0 whose position is given as "bitPosition".
   void clearBit(unsigned BitPosition) {
-    assert(BitPosition <= BitWidth && "BitPosition out of range");
+    assert(BitPosition < BitWidth && "BitPosition out of range");
     WordType Mask = ~maskBit(BitPosition);
     if (isSingleWord())
       U.VAL &= Mask;
@@ -1469,7 +1476,7 @@ public:
   /// Toggle every bit to its opposite value.
   void flipAllBits() {
     if (isSingleWord()) {
-      U.VAL ^= WORD_MAX;
+      U.VAL ^= WORDTYPE_MAX;
       clearUnusedBits();
     } else {
       flipAllBitsSlowCase();
@@ -1758,7 +1765,7 @@ public:
   /// referencing 2 in a space where 2 does no exist.
   unsigned nearestLogBase2() const {
     // Special case when we have a bitwidth of 1. If VAL is 1, then we
-    // get 0. If VAL is 0, we get WORD_MAX which gets truncated to
+    // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
     // UINT32_MAX.
     if (BitWidth == 1)
       return U.VAL - 1;
@@ -2166,6 +2173,41 @@ APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
 /// Return A sign-divided by B, rounded by the given rounding mode.
 APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
 
+/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
+/// (e.g. 32 for i32).
+/// This function finds the smallest number n, such that
+/// (a) n >= 0 and q(n) = 0, or
+/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
+///     integers, belong to two different intervals [Rk, Rk+R),
+///     where R = 2^BW, and k is an integer.
+/// The idea here is to find when q(n) "overflows" 2^BW, while at the
+/// same time "allowing" subtraction. In unsigned modulo arithmetic a
+/// subtraction (treated as addition of negated numbers) would always
+/// count as an overflow, but here we want to allow values to decrease
+/// and increase as long as they are within the same interval.
+/// Specifically, adding of two negative numbers should not cause an
+/// overflow (as long as the magnitude does not exceed the bith width).
+/// On the other hand, given a positive number, adding a negative
+/// number to it can give a negative result, which would cause the
+/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
+/// treated as a special case of an overflow.
+///
+/// This function returns None if after finding k that minimizes the
+/// positive solution to q(n) = kR, both solutions are contained between
+/// two consecutive integers.
+///
+/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
+/// in arithmetic modulo 2^BW, and treating the values as signed) by the
+/// virtue of *signed* overflow. This function will *not* find such an n,
+/// however it may find a value of n satisfying the inequalities due to
+/// an *unsigned* overflow (if the values are treated as unsigned).
+/// To find a solution for a signed overflow, treat it as a problem of
+/// finding an unsigned overflow with a range with of BW-1.
+///
+/// The returned value may have a different bit width from the input
+/// coefficients.
+Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
+                                           unsigned RangeWidth);
 } // End of APIntOps namespace
 
 // See friend declaration above. This additional declaration is required in
diff --git a/contrib/llvm/include/llvm/ADT/Any.h b/contrib/llvm/include/llvm/ADT/Any.h
index c64c39987542..7faa4c963d3d 100644
--- a/contrib/llvm/include/llvm/ADT/Any.h
+++ b/contrib/llvm/include/llvm/ADT/Any.h
@@ -65,6 +65,16 @@ public:
       typename std::enable_if<
           llvm::conjunction<
               llvm::negation<std::is_same<typename std::decay<T>::type, Any>>,
+              // We also disable this overload when an `Any` object can be
+              // converted to the parameter type because in that case, this
+              // constructor may combine with that conversion during overload
+              // resolution for determining copy constructibility, and then
+              // when we try to determine copy constructibility below we may
+              // infinitely recurse. This is being evaluated by the standards
+              // committee as a potential DR in `std::any` as well, but we're
+              // going ahead and adopting it to work-around usage of `Any` with
+              // types that need to be implicitly convertible from an `Any`.
+              llvm::negation<std::is_convertible<Any, typename std::decay<T>::type>>,
               std::is_copy_constructible<typename std::decay<T>::type>>::value,
           int>::type = 0>
   Any(T &&Value) {
diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h
index 438c7d84c581..9ab1da7c6913 100644
--- a/contrib/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm/include/llvm/ADT/BitVector.h
@@ -503,6 +503,23 @@ public:
     return (*this)[Idx];
   }
 
+  // Push single bit to end of vector.
+  void push_back(bool Val) {
+    unsigned OldSize = Size;
+    unsigned NewSize = Size + 1;
+
+    // Resize, which will insert zeros.
+    // If we already fit then the unused bits will be already zero.
+    if (NewSize > getBitCapacity())
+      resize(NewSize, false);
+    else
+      Size = NewSize;
+
+    // If true, set single bit.
+    if (Val)
+      set(OldSize);
+  }
+
   /// Test if any common bits are set.
   bool anyCommon(const BitVector &RHS) const {
     unsigned ThisWords = NumBitWords(size());
diff --git a/contrib/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm/include/llvm/ADT/DenseMap.h
index ba60b7972a8f..1f50502fff92 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMap.h
@@ -25,6 +25,7 @@
 #include <cassert>
 #include <cstddef>
 #include <cstring>
+#include <initializer_list>
 #include <iterator>
 #include <new>
 #include <type_traits>
@@ -38,6 +39,34 @@ namespace detail {
 // implementation without requiring two members.
 template <typename KeyT, typename ValueT>
 struct DenseMapPair : public std::pair<KeyT, ValueT> {
+
+  // FIXME: Switch to inheriting constructors when we drop support for older
+  //        clang versions.
+  // NOTE: This default constructor is declared with '{}' rather than
+  //       '= default' to work around a separate bug in clang-3.8. This can
+  //       also go when we switch to inheriting constructors.
+  DenseMapPair() {}
+
+  DenseMapPair(const KeyT &Key, const ValueT &Value)
+      : std::pair<KeyT, ValueT>(Key, Value) {}
+
+  DenseMapPair(KeyT &&Key, ValueT &&Value)
+      : std::pair<KeyT, ValueT>(std::move(Key), std::move(Value)) {}
+
+  template <typename AltKeyT, typename AltValueT>
+  DenseMapPair(AltKeyT &&AltKey, AltValueT &&AltValue,
+               typename std::enable_if<
+                   std::is_convertible<AltKeyT, KeyT>::value &&
+                   std::is_convertible<AltValueT, ValueT>::value>::type * = 0)
+      : std::pair<KeyT, ValueT>(std::forward<AltKeyT>(AltKey),
+                                std::forward<AltValueT>(AltValue)) {}
+
+  template <typename AltPairT>
+  DenseMapPair(AltPairT &&AltPair,
+               typename std::enable_if<std::is_convertible<
+                   AltPairT, std::pair<KeyT, ValueT>>::value>::type * = 0)
+      : std::pair<KeyT, ValueT>(std::forward<AltPairT>(AltPair)) {}
+
   KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
   const KeyT &getFirst() const { return std::pair<KeyT, ValueT>::first; }
   ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; }
@@ -46,9 +75,10 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
 
 } // end namespace detail
 
-template <
-    typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo<KeyT>,
-    typename Bucket = detail::DenseMapPair<KeyT, ValueT>, bool IsConst = false>
+template <typename KeyT, typename ValueT,
+          typename KeyInfoT = DenseMapInfo<KeyT>,
+          typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
+          bool IsConst = false>
 class DenseMapIterator;
 
 template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
@@ -393,7 +423,7 @@ protected:
     setNumTombstones(other.getNumTombstones());
 
     if (isPodLike<KeyT>::value && isPodLike<ValueT>::value)
-      memcpy(getBuckets(), other.getBuckets(),
+      memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(),
              getNumBuckets() * sizeof(BucketT));
     else
       for (size_t i = 0; i < getNumBuckets(); ++i) {
@@ -639,9 +669,43 @@ public:
   }
 };
 
+/// Equality comparison for DenseMap.
+///
+/// Iterates over elements of LHS confirming that each (key, value) pair in LHS
+/// is also in RHS, and that no additional pairs are in RHS.
+/// Equivalent to N calls to RHS.find and N value comparisons. Amortized
+/// complexity is linear, worst case is O(N^2) (if every hash collides).
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+          typename BucketT>
+bool operator==(
+    const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS,
+    const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) {
+  if (LHS.size() != RHS.size())
+    return false;
+
+  for (auto &KV : LHS) {
+    auto I = RHS.find(KV.first);
+    if (I == RHS.end() || I->second != KV.second)
+      return false;
+  }
+
+  return true;
+}
+
+/// Inequality comparison for DenseMap.
+///
+/// Equivalent to !(LHS == RHS). See operator== for performance notes.
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+          typename BucketT>
+bool operator!=(
+    const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS,
+    const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) {
+  return !(LHS == RHS);
+}
+
 template <typename KeyT, typename ValueT,
           typename KeyInfoT = DenseMapInfo<KeyT>,
-          typename BucketT = detail::DenseMapPair<KeyT, ValueT>>
+          typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>>
 class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
                                      KeyT, ValueT, KeyInfoT, BucketT> {
   friend class DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>;
@@ -676,6 +740,11 @@ public:
     this->insert(I, E);
   }
 
+  DenseMap(std::initializer_list<typename BaseT::value_type> Vals) {
+    init(Vals.size());
+    this->insert(Vals.begin(), Vals.end());
+  }
+
   ~DenseMap() {
     this->destroyAll();
     operator delete(Buckets);
@@ -798,7 +867,7 @@ private:
 
 template <typename KeyT, typename ValueT, unsigned InlineBuckets = 4,
           typename KeyInfoT = DenseMapInfo<KeyT>,
-          typename BucketT = detail::DenseMapPair<KeyT, ValueT>>
+          typename BucketT = llvm::detail::DenseMapPair<KeyT, ValueT>>
 class SmallDenseMap
     : public DenseMapBase<
           SmallDenseMap<KeyT, ValueT, InlineBuckets, KeyInfoT, BucketT>, KeyT,
diff --git a/contrib/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm/include/llvm/ADT/DenseSet.h
index b495e25dd5e5..e85a38587e41 100644
--- a/contrib/llvm/include/llvm/ADT/DenseSet.h
+++ b/contrib/llvm/include/llvm/ADT/DenseSet.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cstddef>
@@ -67,7 +68,7 @@ public:
   explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {}
 
   DenseSetImpl(std::initializer_list<ValueT> Elems)
-      : DenseSetImpl(Elems.size()) {
+      : DenseSetImpl(PowerOf2Ceil(Elems.size())) {
     insert(Elems.begin(), Elems.end());
   }
 
@@ -136,8 +137,8 @@ public:
   public:
     using difference_type = typename MapTy::const_iterator::difference_type;
     using value_type = ValueT;
-    using pointer = value_type *;
-    using reference = value_type &;
+    using pointer = const value_type *;
+    using reference = const value_type &;
     using iterator_category = std::forward_iterator_tag;
 
     ConstIterator() = default;
@@ -214,6 +215,34 @@ public:
   }
 };
 
+/// Equality comparison for DenseSet.
+///
+/// Iterates over elements of LHS confirming that each element is also a member
+/// of RHS, and that RHS contains no additional values.
+/// Equivalent to N calls to RHS.count. Amortized complexity is linear, worst
+/// case is O(N^2) (if every hash collides).
+template <typename ValueT, typename MapTy, typename ValueInfoT>
+bool operator==(const DenseSetImpl<ValueT, MapTy, ValueInfoT> &LHS,
+                const DenseSetImpl<ValueT, MapTy, ValueInfoT> &RHS) {
+  if (LHS.size() != RHS.size())
+    return false;
+
+  for (auto &E : LHS)
+    if (!RHS.count(E))
+      return false;
+
+  return true;
+}
+
+/// Inequality comparison for DenseSet.
+///
+/// Equivalent to !(LHS == RHS). See operator== for performance notes.
+template <typename ValueT, typename MapTy, typename ValueInfoT>
+bool operator!=(const DenseSetImpl<ValueT, MapTy, ValueInfoT> &LHS,
+                const DenseSetImpl<ValueT, MapTy, ValueInfoT> &RHS) {
+  return !(LHS == RHS);
+}
+
 } // end namespace detail
 
 /// Implements a dense probed hash-table based set.
diff --git a/contrib/llvm/include/llvm/ADT/GraphTraits.h b/contrib/llvm/include/llvm/ADT/GraphTraits.h
index 27c647f4bbbd..d39b50fdc488 100644
--- a/contrib/llvm/include/llvm/ADT/GraphTraits.h
+++ b/contrib/llvm/include/llvm/ADT/GraphTraits.h
@@ -25,6 +25,13 @@ namespace llvm {
 // GraphTraits - This class should be specialized by different graph types...
 // which is why the default version is empty.
 //
+// This template evolved from supporting `BasicBlock` to also later supporting
+// more complex types (e.g. CFG and DomTree).
+//
+// GraphTraits can be used to create a view over a graph interpreting it
+// differently without requiring a copy of the original graph. This could
+// be achieved by carrying more data in NodeRef. See LoopBodyTraits for one
+// example.
 template<class GraphType>
 struct GraphTraits {
   // Elements to provide:
diff --git a/contrib/llvm/include/llvm/ADT/Hashing.h b/contrib/llvm/include/llvm/ADT/Hashing.h
index 9f830baa4243..9175c545b7c9 100644
--- a/contrib/llvm/include/llvm/ADT/Hashing.h
+++ b/contrib/llvm/include/llvm/ADT/Hashing.h
@@ -133,7 +133,7 @@ hash_code hash_value(const std::basic_string<T> &arg);
 /// undone. This makes it thread-hostile and very hard to use outside of
 /// immediately on start of a simple program designed for reproducible
 /// behavior.
-void set_fixed_execution_hash_seed(size_t fixed_value);
+void set_fixed_execution_hash_seed(uint64_t fixed_value);
 
 
 // All of the implementation details of actually computing the various hash
@@ -316,9 +316,9 @@ struct hash_state {
 /// This variable can be set using the \see llvm::set_fixed_execution_seed
 /// function. See that function for details. Do not, under any circumstances,
 /// set or read this variable.
-extern size_t fixed_seed_override;
+extern uint64_t fixed_seed_override;
 
-inline size_t get_execution_seed() {
+inline uint64_t get_execution_seed() {
   // FIXME: This needs to be a per-execution seed. This is just a placeholder
   // implementation. Switching to a per-execution seed is likely to flush out
   // instability bugs and so will happen as its own commit.
@@ -326,8 +326,7 @@ inline size_t get_execution_seed() {
   // However, if there is a fixed seed override set the first time this is
   // called, return that instead of the per-execution seed.
   const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
-  static size_t seed = fixed_seed_override ? fixed_seed_override
-                                           : (size_t)seed_prime;
+  static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
   return seed;
 }
 
@@ -402,7 +401,7 @@ bool store_and_advance(char *&buffer_ptr, char *buffer_end, const T& value,
 /// combining them, this (as an optimization) directly combines the integers.
 template <typename InputIteratorT>
 hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
-  const size_t seed = get_execution_seed();
+  const uint64_t seed = get_execution_seed();
   char buffer[64], *buffer_ptr = buffer;
   char *const buffer_end = std::end(buffer);
   while (first != last && store_and_advance(buffer_ptr, buffer_end,
@@ -446,7 +445,7 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
 template <typename ValueT>
 typename std::enable_if<is_hashable_data<ValueT>::value, hash_code>::type
 hash_combine_range_impl(ValueT *first, ValueT *last) {
-  const size_t seed = get_execution_seed();
+  const uint64_t seed = get_execution_seed();
   const char *s_begin = reinterpret_cast<const char *>(first);
   const char *s_end = reinterpret_cast<const char *>(last);
   const size_t length = std::distance(s_begin, s_end);
@@ -496,7 +495,7 @@ namespace detail {
 struct hash_combine_recursive_helper {
   char buffer[64];
   hash_state state;
-  const size_t seed;
+  const uint64_t seed;
 
 public:
   /// Construct a recursive hash combining helper.
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableList.h b/contrib/llvm/include/llvm/ADT/ImmutableList.h
index 1f5e9813798d..0541dc2566ed 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableList.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableList.h
@@ -31,8 +31,9 @@ class ImmutableListImpl : public FoldingSetNode {
   T Head;
   const ImmutableListImpl* Tail;
 
-  ImmutableListImpl(const T& head, const ImmutableListImpl* tail = nullptr)
-    : Head(head), Tail(tail) {}
+  template <typename ElemT>
+  ImmutableListImpl(ElemT &&head, const ImmutableListImpl *tail = nullptr)
+    : Head(std::forward<ElemT>(head)), Tail(tail) {}
 
 public:
   ImmutableListImpl(const ImmutableListImpl &) = delete;
@@ -66,6 +67,9 @@ public:
   using value_type = T;
   using Factory = ImmutableListFactory<T>;
 
+  static_assert(std::is_trivially_destructible<T>::value,
+                "T must be trivially destructible!");
+
 private:
   const ImmutableListImpl<T>* X;
 
@@ -90,6 +94,9 @@ public:
     bool operator==(const iterator& I) const { return L == I.L; }
     bool operator!=(const iterator& I) const { return L != I.L; }
     const value_type& operator*() const { return L->getHead(); }
+    const typename std::remove_reference<value_type>::type* operator->() const {
+      return &L->getHead();
+    }
 
     ImmutableList getList() const { return L; }
   };
@@ -123,14 +130,14 @@ public:
   bool operator==(const ImmutableList& L) const { return isEqual(L); }
 
   /// getHead - Returns the head of the list.
-  const T& getHead() {
+  const T& getHead() const {
     assert(!isEmpty() && "Cannot get the head of an empty list.");
     return X->getHead();
   }
 
   /// getTail - Returns the tail of the list, which is another (possibly empty)
   ///  ImmutableList.
-  ImmutableList getTail() {
+  ImmutableList getTail() const {
     return X ? X->getTail() : nullptr;
   }
 
@@ -166,7 +173,8 @@ public:
     if (ownsAllocator()) delete &getAllocator();
   }
 
-  LLVM_NODISCARD ImmutableList<T> concat(const T &Head, ImmutableList<T> Tail) {
+  template <typename ElemT>
+  LLVM_NODISCARD ImmutableList<T> concat(ElemT &&Head, ImmutableList<T> Tail) {
     // Profile the new list to see if it already exists in our cache.
     FoldingSetNodeID ID;
     void* InsertPos;
@@ -179,7 +187,7 @@ public:
       // The list does not exist in our cache.  Create it.
       BumpPtrAllocator& A = getAllocator();
       L = (ListTy*) A.Allocate<ListTy>();
-      new (L) ListTy(Head, TailImpl);
+      new (L) ListTy(std::forward<ElemT>(Head), TailImpl);
 
       // Insert the new list into the cache.
       Cache.InsertNode(L, InsertPos);
@@ -188,16 +196,24 @@ public:
     return L;
   }
 
-  LLVM_NODISCARD ImmutableList<T> add(const T& D, ImmutableList<T> L) {
-    return concat(D, L);
+  template <typename ElemT>
+  LLVM_NODISCARD ImmutableList<T> add(ElemT &&Data, ImmutableList<T> L) {
+    return concat(std::forward<ElemT>(Data), L);
+  }
+
+  template <typename ...CtorArgs>
+  LLVM_NODISCARD ImmutableList<T> emplace(ImmutableList<T> Tail,
+                                          CtorArgs &&...Args) {
+    return concat(T(std::forward<CtorArgs>(Args)...), Tail);
   }
 
   ImmutableList<T> getEmptyList() const {
     return ImmutableList<T>(nullptr);
   }
 
-  ImmutableList<T> create(const T& X) {
-    return Concat(X, getEmptyList());
+  template <typename ElemT>
+  ImmutableList<T> create(ElemT &&Data) {
+    return concat(std::forward<ElemT>(Data), getEmptyList());
   }
 };
 
diff --git a/contrib/llvm/include/llvm/ADT/IntervalMap.h b/contrib/llvm/include/llvm/ADT/IntervalMap.h
index f71366811218..2af61049e5af 100644
--- a/contrib/llvm/include/llvm/ADT/IntervalMap.h
+++ b/contrib/llvm/include/llvm/ADT/IntervalMap.h
@@ -101,6 +101,7 @@
 
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/bit.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/RecyclingAllocator.h"
@@ -963,6 +964,7 @@ public:
 
 private:
   // The root data is either a RootLeaf or a RootBranchData instance.
+  LLVM_ALIGNAS(RootLeaf) LLVM_ALIGNAS(RootBranchData)
   AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
 
   // Tree height.
@@ -977,15 +979,10 @@ private:
   // Allocator used for creating external nodes.
   Allocator &allocator;
 
-  /// dataAs - Represent data as a node type without breaking aliasing rules.
+  /// Represent data as a node type without breaking aliasing rules.
   template <typename T>
   T &dataAs() const {
-    union {
-      const char *d;
-      T *t;
-    } u;
-    u.d = data.buffer;
-    return *u.t;
+    return *bit_cast<T *>(const_cast<char *>(data.buffer));
   }
 
   const RootLeaf &rootLeaf() const {
@@ -1137,6 +1134,19 @@ public:
     I.find(x);
     return I;
   }
+
+  /// overlaps(a, b) - Return true if the intervals in this map overlap with the
+  /// interval [a;b].
+  bool overlaps(KeyT a, KeyT b) {
+    assert(Traits::nonEmpty(a, b));
+    const_iterator I = find(a);
+    if (!I.valid())
+      return false;
+    // [a;b] and [x;y] overlap iff x<=b and a<=y. The find() call guarantees the
+    // second part (y = find(a).stop()), so it is sufficient to check the first
+    // one.
+    return !Traits::stopLess(b, I.start());
+  }
 };
 
 /// treeSafeLookup - Return the mapped value at x or NotFound, assuming a
diff --git a/contrib/llvm/include/llvm/ADT/Optional.h b/contrib/llvm/include/llvm/ADT/Optional.h
index 353e5d0ec9df..76937d632ae1 100644
--- a/contrib/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm/include/llvm/ADT/Optional.h
@@ -29,7 +29,7 @@ namespace llvm {
 
 namespace optional_detail {
 /// Storage for any type.
-template <typename T, bool IsPodLike> struct OptionalStorage {
+template <typename T, bool = isPodLike<T>::value> struct OptionalStorage {
   AlignedCharArrayUnion<T> storage;
   bool hasVal = false;
 
@@ -108,28 +108,10 @@ template <typename T, bool IsPodLike> struct OptionalStorage {
   }
 };
 
-#if !defined(__GNUC__) || defined(__clang__) // GCC up to GCC7 miscompiles this.
-/// Storage for trivially copyable types only.
-template <typename T> struct OptionalStorage<T, true> {
-  AlignedCharArrayUnion<T> storage;
-  bool hasVal = false;
-
-  OptionalStorage() = default;
-
-  OptionalStorage(const T &y) : hasVal(true) { new (storage.buffer) T(y); }
-  OptionalStorage &operator=(const T &y) {
-    *reinterpret_cast<T *>(storage.buffer) = y;
-    hasVal = true;
-    return *this;
-  }
-
-  void reset() { hasVal = false; }
-};
-#endif
 } // namespace optional_detail
 
 template <typename T> class Optional {
-  optional_detail::OptionalStorage<T, isPodLike<T>::value> Storage;
+  optional_detail::OptionalStorage<T> Storage;
 
 public:
   using value_type = T;
diff --git a/contrib/llvm/include/llvm/ADT/PointerIntPair.h b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
index 884d05155bff..6d1b53a90ad2 100644
--- a/contrib/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/contrib/llvm/include/llvm/ADT/PointerIntPair.h
@@ -42,6 +42,8 @@ template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
           typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
           typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
 class PointerIntPair {
+  // Used by MSVC visualizer and generally helpful for debugging/visualizing.
+  using InfoTy = Info;
   intptr_t Value = 0;
 
 public:
diff --git a/contrib/llvm/include/llvm/ADT/PointerSumType.h b/contrib/llvm/include/llvm/ADT/PointerSumType.h
index e37957160d98..a19e45a46218 100644
--- a/contrib/llvm/include/llvm/ADT/PointerSumType.h
+++ b/contrib/llvm/include/llvm/ADT/PointerSumType.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_ADT_POINTERSUMTYPE_H
 #define LLVM_ADT_POINTERSUMTYPE_H
 
+#include "llvm/ADT/bit.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <cassert>
@@ -58,56 +59,142 @@ template <typename TagT, typename... MemberTs> struct PointerSumTypeHelper;
 /// and may be desirable to set to a state that is particularly desirable to
 /// default construct.
 ///
+/// Having a supported zero-valued tag also enables getting the address of a
+/// pointer stored with that tag provided it is stored in its natural bit
+/// representation. This works because in the case of a zero-valued tag, the
+/// pointer's value is directly stored into this object and we can expose the
+/// address of that internal storage. This is especially useful when building an
+/// `ArrayRef` of a single pointer stored in a sum type.
+///
 /// There is no support for constructing or accessing with a dynamic tag as
 /// that would fundamentally violate the type safety provided by the sum type.
 template <typename TagT, typename... MemberTs> class PointerSumType {
-  uintptr_t Value = 0;
-
   using HelperT = detail::PointerSumTypeHelper<TagT, MemberTs...>;
 
+  // We keep both the raw value and the min tag value's pointer in a union. When
+  // the minimum tag value is zero, this allows code below to cleanly expose the
+  // address of the zero-tag pointer instead of just the zero-tag pointer
+  // itself. This is especially useful when building `ArrayRef`s out of a single
+  // pointer. However, we have to carefully access the union due to the active
+  // member potentially changing. When we *store* a new value, we directly
+  // access the union to allow us to store using the obvious types. However,
+  // when we *read* a value, we copy the underlying storage out to avoid relying
+  // on one member or the other being active.
+  union StorageT {
+    // Ensure we get a null default constructed value. We don't use a member
+    // initializer because some compilers seem to not implement those correctly
+    // for a union.
+    StorageT() : Value(0) {}
+
+    uintptr_t Value;
+
+    typename HelperT::template Lookup<HelperT::MinTag>::PointerT MinTagPointer;
+  };
+
+  StorageT Storage;
+
 public:
   constexpr PointerSumType() = default;
 
+  /// A typed setter to a given tagged member of the sum type.
+  template <TagT N>
+  void set(typename HelperT::template Lookup<N>::PointerT Pointer) {
+    void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer);
+    assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 &&
+           "Pointer is insufficiently aligned to store the discriminant!");
+    Storage.Value = reinterpret_cast<uintptr_t>(V) | N;
+  }
+
   /// A typed constructor for a specific tagged member of the sum type.
   template <TagT N>
   static PointerSumType
   create(typename HelperT::template Lookup<N>::PointerT Pointer) {
     PointerSumType Result;
-    void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer);
-    assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 &&
-           "Pointer is insufficiently aligned to store the discriminant!");
-    Result.Value = reinterpret_cast<uintptr_t>(V) | N;
+    Result.set<N>(Pointer);
     return Result;
   }
 
-  TagT getTag() const { return static_cast<TagT>(Value & HelperT::TagMask); }
+  /// Clear the value to null with the min tag type.
+  void clear() { set<HelperT::MinTag>(nullptr); }
+
+  TagT getTag() const {
+    return static_cast<TagT>(getOpaqueValue() & HelperT::TagMask);
+  }
 
   template <TagT N> bool is() const { return N == getTag(); }
 
   template <TagT N> typename HelperT::template Lookup<N>::PointerT get() const {
-    void *P = is<N>() ? getImpl() : nullptr;
+    void *P = is<N>() ? getVoidPtr() : nullptr;
     return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(P);
   }
 
   template <TagT N>
   typename HelperT::template Lookup<N>::PointerT cast() const {
     assert(is<N>() && "This instance has a different active member.");
-    return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(getImpl());
+    return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(
+        getVoidPtr());
+  }
+
+  /// If the tag is zero and the pointer's value isn't changed when being
+  /// stored, get the address of the stored value type-punned to the zero-tag's
+  /// pointer type.
+  typename HelperT::template Lookup<HelperT::MinTag>::PointerT const *
+  getAddrOfZeroTagPointer() const {
+    return const_cast<PointerSumType *>(this)->getAddrOfZeroTagPointer();
   }
 
-  explicit operator bool() const { return Value & HelperT::PointerMask; }
-  bool operator==(const PointerSumType &R) const { return Value == R.Value; }
-  bool operator!=(const PointerSumType &R) const { return Value != R.Value; }
-  bool operator<(const PointerSumType &R) const { return Value < R.Value; }
-  bool operator>(const PointerSumType &R) const { return Value > R.Value; }
-  bool operator<=(const PointerSumType &R) const { return Value <= R.Value; }
-  bool operator>=(const PointerSumType &R) const { return Value >= R.Value; }
+  /// If the tag is zero and the pointer's value isn't changed when being
+  /// stored, get the address of the stored value type-punned to the zero-tag's
+  /// pointer type.
+  typename HelperT::template Lookup<HelperT::MinTag>::PointerT *
+  getAddrOfZeroTagPointer() {
+    static_assert(HelperT::MinTag == 0, "Non-zero minimum tag value!");
+    assert(is<HelperT::MinTag>() && "The active tag is not zero!");
+    // Store the initial value of the pointer when read out of our storage.
+    auto InitialPtr = get<HelperT::MinTag>();
+    // Now update the active member of the union to be the actual pointer-typed
+    // member so that accessing it indirectly through the returned address is
+    // valid.
+    Storage.MinTagPointer = InitialPtr;
+    // Finally, validate that this was a no-op as expected by reading it back
+    // out using the same underlying-storage read as above.
+    assert(InitialPtr == get<HelperT::MinTag>() &&
+           "Switching to typed storage changed the pointer returned!");
+    // Now we can correctly return an address to typed storage.
+    return &Storage.MinTagPointer;
+  }
+
+  explicit operator bool() const {
+    return getOpaqueValue() & HelperT::PointerMask;
+  }
+  bool operator==(const PointerSumType &R) const {
+    return getOpaqueValue() == R.getOpaqueValue();
+  }
+  bool operator!=(const PointerSumType &R) const {
+    return getOpaqueValue() != R.getOpaqueValue();
+  }
+  bool operator<(const PointerSumType &R) const {
+    return getOpaqueValue() < R.getOpaqueValue();
+  }
+  bool operator>(const PointerSumType &R) const {
+    return getOpaqueValue() > R.getOpaqueValue();
+  }
+  bool operator<=(const PointerSumType &R) const {
+    return getOpaqueValue() <= R.getOpaqueValue();
+  }
+  bool operator>=(const PointerSumType &R) const {
+    return getOpaqueValue() >= R.getOpaqueValue();
+  }
 
-  uintptr_t getOpaqueValue() const { return Value; }
+  uintptr_t getOpaqueValue() const {
+    // Read the underlying storage of the union, regardless of the active
+    // member.
+    return bit_cast<uintptr_t>(Storage);
+  }
 
 protected:
-  void *getImpl() const {
-    return reinterpret_cast<void *>(Value & HelperT::PointerMask);
+  void *getVoidPtr() const {
+    return reinterpret_cast<void *>(getOpaqueValue() & HelperT::PointerMask);
   }
 };
 
@@ -151,8 +238,9 @@ struct PointerSumTypeHelper : MemberTs... {
   enum { NumTagBits = Min<MemberTs::TraitsT::NumLowBitsAvailable...>::value };
 
   // Also compute the smallest discriminant and various masks for convenience.
+  constexpr static TagT MinTag =
+      static_cast<TagT>(Min<MemberTs::Tag...>::value);
   enum : uint64_t {
-    MinTag = Min<MemberTs::Tag...>::value,
     PointerMask = static_cast<uint64_t>(-1) << NumTagBits,
     TagMask = ~PointerMask
   };
diff --git a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
index dc8a9b6e78b2..d77b12228cb1 100644
--- a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -296,12 +296,15 @@ class ReversePostOrderTraversal {
 
 public:
   using rpo_iterator = typename std::vector<NodeRef>::reverse_iterator;
+  using const_rpo_iterator = typename std::vector<NodeRef>::const_reverse_iterator;
 
   ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); }
 
   // Because we want a reverse post order, use reverse iterators from the vector
   rpo_iterator begin() { return Blocks.rbegin(); }
+  const_rpo_iterator begin() const { return Blocks.crbegin(); }
   rpo_iterator end() { return Blocks.rend(); }
+  const_rpo_iterator end() const { return Blocks.crend(); }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm/include/llvm/ADT/STLExtras.h
index 94365dd9ced1..f66ca7c08a73 100644
--- a/contrib/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm/include/llvm/ADT/STLExtras.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/abi-breaking.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
@@ -70,6 +71,16 @@ template <typename B1, typename... Bn>
 struct conjunction<B1, Bn...>
     : std::conditional<bool(B1::value), conjunction<Bn...>, B1>::type {};
 
+template <typename T> struct make_const_ptr {
+  using type =
+      typename std::add_pointer<typename std::add_const<T>::type>::type;
+};
+
+template <typename T> struct make_const_ref {
+  using type = typename std::add_lvalue_reference<
+      typename std::add_const<T>::type>::type;
+};
+
 //===----------------------------------------------------------------------===//
 //     Extra additions to <functional>
 //===----------------------------------------------------------------------===//
@@ -194,6 +205,12 @@ void adl_swap(T &&lhs, T &&rhs) noexcept(
   adl_detail::adl_swap(std::forward<T>(lhs), std::forward<T>(rhs));
 }
 
+/// Test whether \p RangeOrContainer is empty. Similar to C++17 std::empty.
+template <typename T>
+constexpr bool empty(const T &RangeOrContainer) {
+  return adl_begin(RangeOrContainer) == adl_end(RangeOrContainer);
+}
+
 // mapped_iterator - This is a simple iterator adapter that causes a function to
 // be applied whenever operator* is invoked on the iterator.
 
@@ -418,9 +435,94 @@ make_filter_range(RangeT &&Range, PredicateT Pred) {
                       std::end(std::forward<RangeT>(Range)), Pred));
 }
 
-// forward declarations required by zip_shortest/zip_first
+/// A pseudo-iterator adaptor that is designed to implement "early increment"
+/// style loops.
+///
+/// This is *not a normal iterator* and should almost never be used directly. It
+/// is intended primarily to be used with range based for loops and some range
+/// algorithms.
+///
+/// The iterator isn't quite an `OutputIterator` or an `InputIterator` but
+/// somewhere between them. The constraints of these iterators are:
+///
+/// - On construction or after being incremented, it is comparable and
+///   dereferencable. It is *not* incrementable.
+/// - After being dereferenced, it is neither comparable nor dereferencable, it
+///   is only incrementable.
+///
+/// This means you can only dereference the iterator once, and you can only
+/// increment it once between dereferences.
+template <typename WrappedIteratorT>
+class early_inc_iterator_impl
+    : public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
+                                   WrappedIteratorT, std::input_iterator_tag> {
+  using BaseT =
+      iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
+                            WrappedIteratorT, std::input_iterator_tag>;
+
+  using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer;
+
+protected:
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+  bool IsEarlyIncremented = false;
+#endif
+
+public:
+  early_inc_iterator_impl(WrappedIteratorT I) : BaseT(I) {}
+
+  using BaseT::operator*;
+  typename BaseT::reference operator*() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    assert(!IsEarlyIncremented && "Cannot dereference twice!");
+    IsEarlyIncremented = true;
+#endif
+    return *(this->I)++;
+  }
+
+  using BaseT::operator++;
+  early_inc_iterator_impl &operator++() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    assert(IsEarlyIncremented && "Cannot increment before dereferencing!");
+    IsEarlyIncremented = false;
+#endif
+    return *this;
+  }
+
+  using BaseT::operator==;
+  bool operator==(const early_inc_iterator_impl &RHS) const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    assert(!IsEarlyIncremented && "Cannot compare after dereferencing!");
+#endif
+    return BaseT::operator==(RHS);
+  }
+};
+
+/// Make a range that does early increment to allow mutation of the underlying
+/// range without disrupting iteration.
+///
+/// The underlying iterator will be incremented immediately after it is
+/// dereferenced, allowing deletion of the current node or insertion of nodes to
+/// not disrupt iteration provided they do not invalidate the *next* iterator --
+/// the current iterator can be invalidated.
+///
+/// This requires a very exact pattern of use that is only really suitable to
+/// range based for loops and other range algorithms that explicitly guarantee
+/// to dereference exactly once each element, and to increment exactly once each
+/// element.
+template <typename RangeT>
+iterator_range<early_inc_iterator_impl<detail::IterOfRange<RangeT>>>
+make_early_inc_range(RangeT &&Range) {
+  using EarlyIncIteratorT =
+      early_inc_iterator_impl<detail::IterOfRange<RangeT>>;
+  return make_range(EarlyIncIteratorT(std::begin(std::forward<RangeT>(Range))),
+                    EarlyIncIteratorT(std::end(std::forward<RangeT>(Range))));
+}
+
+// forward declarations required by zip_shortest/zip_first/zip_longest
 template <typename R, typename UnaryPredicate>
 bool all_of(R &&range, UnaryPredicate P);
+template <typename R, typename UnaryPredicate>
+bool any_of(R &&range, UnaryPredicate P);
 
 template <size_t... I> struct index_sequence;
 
@@ -571,6 +673,132 @@ detail::zippy<detail::zip_first, T, U, Args...> zip_first(T &&t, U &&u,
       std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
 }
 
+namespace detail {
+template <typename Iter>
+static Iter next_or_end(const Iter &I, const Iter &End) {
+  if (I == End)
+    return End;
+  return std::next(I);
+}
+
+template <typename Iter>
+static auto deref_or_none(const Iter &I, const Iter &End)
+    -> llvm::Optional<typename std::remove_const<
+        typename std::remove_reference<decltype(*I)>::type>::type> {
+  if (I == End)
+    return None;
+  return *I;
+}
+
+template <typename Iter> struct ZipLongestItemType {
+  using type =
+      llvm::Optional<typename std::remove_const<typename std::remove_reference<
+          decltype(*std::declval<Iter>())>::type>::type>;
+};
+
+template <typename... Iters> struct ZipLongestTupleType {
+  using type = std::tuple<typename ZipLongestItemType<Iters>::type...>;
+};
+
+template <typename... Iters>
+class zip_longest_iterator
+    : public iterator_facade_base<
+          zip_longest_iterator<Iters...>,
+          typename std::common_type<
+              std::forward_iterator_tag,
+              typename std::iterator_traits<Iters>::iterator_category...>::type,
+          typename ZipLongestTupleType<Iters...>::type,
+          typename std::iterator_traits<typename std::tuple_element<
+              0, std::tuple<Iters...>>::type>::difference_type,
+          typename ZipLongestTupleType<Iters...>::type *,
+          typename ZipLongestTupleType<Iters...>::type> {
+public:
+  using value_type = typename ZipLongestTupleType<Iters...>::type;
+
+private:
+  std::tuple<Iters...> iterators;
+  std::tuple<Iters...> end_iterators;
+
+  template <size_t... Ns>
+  bool test(const zip_longest_iterator<Iters...> &other,
+            index_sequence<Ns...>) const {
+    return llvm::any_of(
+        std::initializer_list<bool>{std::get<Ns>(this->iterators) !=
+                                    std::get<Ns>(other.iterators)...},
+        identity<bool>{});
+  }
+
+  template <size_t... Ns> value_type deref(index_sequence<Ns...>) const {
+    return value_type(
+        deref_or_none(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...);
+  }
+
+  template <size_t... Ns>
+  decltype(iterators) tup_inc(index_sequence<Ns...>) const {
+    return std::tuple<Iters...>(
+        next_or_end(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...);
+  }
+
+public:
+  zip_longest_iterator(std::pair<Iters &&, Iters &&>... ts)
+      : iterators(std::forward<Iters>(ts.first)...),
+        end_iterators(std::forward<Iters>(ts.second)...) {}
+
+  value_type operator*() { return deref(index_sequence_for<Iters...>{}); }
+
+  value_type operator*() const { return deref(index_sequence_for<Iters...>{}); }
+
+  zip_longest_iterator<Iters...> &operator++() {
+    iterators = tup_inc(index_sequence_for<Iters...>{});
+    return *this;
+  }
+
+  bool operator==(const zip_longest_iterator<Iters...> &other) const {
+    return !test(other, index_sequence_for<Iters...>{});
+  }
+};
+
+template <typename... Args> class zip_longest_range {
+public:
+  using iterator =
+      zip_longest_iterator<decltype(adl_begin(std::declval<Args>()))...>;
+  using iterator_category = typename iterator::iterator_category;
+  using value_type = typename iterator::value_type;
+  using difference_type = typename iterator::difference_type;
+  using pointer = typename iterator::pointer;
+  using reference = typename iterator::reference;
+
+private:
+  std::tuple<Args...> ts;
+
+  template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) const {
+    return iterator(std::make_pair(adl_begin(std::get<Ns>(ts)),
+                                   adl_end(std::get<Ns>(ts)))...);
+  }
+
+  template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) const {
+    return iterator(std::make_pair(adl_end(std::get<Ns>(ts)),
+                                   adl_end(std::get<Ns>(ts)))...);
+  }
+
+public:
+  zip_longest_range(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {}
+
+  iterator begin() const { return begin_impl(index_sequence_for<Args...>{}); }
+  iterator end() const { return end_impl(index_sequence_for<Args...>{}); }
+};
+} // namespace detail
+
+/// Iterate over two or more iterators at the same time. Iteration continues
+/// until all iterators reach the end. The llvm::Optional only contains a value
+/// if the iterator has not reached the end.
+template <typename T, typename U, typename... Args>
+detail::zip_longest_range<T, U, Args...> zip_longest(T &&t, U &&u,
+                                                     Args &&... args) {
+  return detail::zip_longest_range<T, U, Args...>(
+      std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
+}
+
 /// Iterator wrapper that concatenates sequences together.
 ///
 /// This can concatenate different iterators, even with different types, into
@@ -593,18 +821,20 @@ class concat_iterator
   /// Note that something like iterator_range seems nice at first here, but the
   /// range properties are of little benefit and end up getting in the way
   /// because we need to do mutation on the current iterators.
-  std::tuple<std::pair<IterTs, IterTs>...> IterPairs;
+  std::tuple<IterTs...> Begins;
+  std::tuple<IterTs...> Ends;
 
   /// Attempts to increment a specific iterator.
   ///
   /// Returns true if it was able to increment the iterator. Returns false if
   /// the iterator is already at the end iterator.
   template <size_t Index> bool incrementHelper() {
-    auto &IterPair = std::get<Index>(IterPairs);
-    if (IterPair.first == IterPair.second)
+    auto &Begin = std::get<Index>(Begins);
+    auto &End = std::get<Index>(Ends);
+    if (Begin == End)
       return false;
 
-    ++IterPair.first;
+    ++Begin;
     return true;
   }
 
@@ -628,11 +858,12 @@ class concat_iterator
   /// dereferences the iterator and returns the address of the resulting
   /// reference.
   template <size_t Index> ValueT *getHelper() const {
-    auto &IterPair = std::get<Index>(IterPairs);
-    if (IterPair.first == IterPair.second)
+    auto &Begin = std::get<Index>(Begins);
+    auto &End = std::get<Index>(Ends);
+    if (Begin == End)
       return nullptr;
 
-    return &*IterPair.first;
+    return &*Begin;
   }
 
   /// Finds the first non-end iterator, dereferences, and returns the resulting
@@ -659,7 +890,7 @@ public:
   /// iterators.
   template <typename... RangeTs>
   explicit concat_iterator(RangeTs &&... Ranges)
-      : IterPairs({std::begin(Ranges), std::end(Ranges)}...) {}
+      : Begins(std::begin(Ranges)...), Ends(std::end(Ranges)...) {}
 
   using BaseT::operator++;
 
@@ -671,7 +902,7 @@ public:
   ValueT &operator*() const { return get(index_sequence_for<IterTs...>()); }
 
   bool operator==(const concat_iterator &RHS) const {
-    return IterPairs == RHS.IterPairs;
+    return Begins == RHS.Begins && Ends == RHS.Ends;
   }
 };
 
@@ -740,6 +971,19 @@ struct less_second {
   }
 };
 
+/// \brief Function object to apply a binary function to the first component of
+/// a std::pair.
+template<typename FuncTy>
+struct on_first {
+  FuncTy func;
+
+  template <typename T>
+  auto operator()(const T &lhs, const T &rhs) const
+      -> decltype(func(lhs.first, rhs.first)) {
+    return func(lhs.first, rhs.first);
+  }
+};
+
 // A subset of N3658. More stuff can be added as-needed.
 
 /// Represents a compile-time sequence of integers.
@@ -877,6 +1121,10 @@ inline void sort(IteratorTy Start, IteratorTy End) {
   std::sort(Start, End);
 }
 
+template <typename Container> inline void sort(Container &&C) {
+  llvm::sort(adl_begin(C), adl_end(C));
+}
+
 template <typename IteratorTy, typename Compare>
 inline void sort(IteratorTy Start, IteratorTy End, Compare Comp) {
 #ifdef EXPENSIVE_CHECKS
@@ -886,6 +1134,11 @@ inline void sort(IteratorTy Start, IteratorTy End, Compare Comp) {
   std::sort(Start, End, Comp);
 }
 
+template <typename Container, typename Compare>
+inline void sort(Container &&C, Compare Comp) {
+  llvm::sort(adl_begin(C), adl_end(C), Comp);
+}
+
 //===----------------------------------------------------------------------===//
 //     Extra additions to <algorithm>
 //===----------------------------------------------------------------------===//
@@ -908,6 +1161,18 @@ void DeleteContainerSeconds(Container &C) {
   C.clear();
 }
 
+/// Get the size of a range. This is a wrapper function around std::distance
+/// which is only enabled when the operation is O(1).
+template <typename R>
+auto size(R &&Range, typename std::enable_if<
+                         std::is_same<typename std::iterator_traits<decltype(
+                                          Range.begin())>::iterator_category,
+                                      std::random_access_iterator_tag>::value,
+                         void>::type * = nullptr)
+    -> decltype(std::distance(Range.begin(), Range.end())) {
+  return std::distance(Range.begin(), Range.end());
+}
+
 /// Provide wrappers to std::for_each which take ranges instead of having to
 /// pass begin/end explicitly.
 template <typename R, typename UnaryPredicate>
@@ -1018,6 +1283,33 @@ auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
   return std::lower_bound(adl_begin(Range), adl_end(Range), I);
 }
 
+template <typename R, typename ForwardIt, typename Compare>
+auto lower_bound(R &&Range, ForwardIt I, Compare C)
+    -> decltype(adl_begin(Range)) {
+  return std::lower_bound(adl_begin(Range), adl_end(Range), I, C);
+}
+
+/// Provide wrappers to std::upper_bound which take ranges instead of having to
+/// pass begin/end explicitly.
+template <typename R, typename ForwardIt>
+auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
+  return std::upper_bound(adl_begin(Range), adl_end(Range), I);
+}
+
+template <typename R, typename ForwardIt, typename Compare>
+auto upper_bound(R &&Range, ForwardIt I, Compare C)
+    -> decltype(adl_begin(Range)) {
+  return std::upper_bound(adl_begin(Range), adl_end(Range), I, C);
+}
+/// Wrapper function around std::equal to detect if all elements
+/// in a container are same.
+template <typename R>
+bool is_splat(R &&Range) {
+  size_t range_size = size(Range);
+  return range_size != 0 && (range_size == 1 ||
+         std::equal(adl_begin(Range) + 1, adl_end(Range), adl_begin(Range)));
+}
+
 /// Given a range of type R, iterate the entire range and return a
 /// SmallVector with elements of the vector.  This is useful, for example,
 /// when you want to iterate a range and then sort the results.
@@ -1039,18 +1331,6 @@ void erase_if(Container &C, UnaryPredicate P) {
   C.erase(remove_if(C, P), C.end());
 }
 
-/// Get the size of a range. This is a wrapper function around std::distance
-/// which is only enabled when the operation is O(1).
-template <typename R>
-auto size(R &&Range, typename std::enable_if<
-                         std::is_same<typename std::iterator_traits<decltype(
-                                          Range.begin())>::iterator_category,
-                                      std::random_access_iterator_tag>::value,
-                         void>::type * = nullptr)
-    -> decltype(std::distance(Range.begin(), Range.end())) {
-  return std::distance(Range.begin(), Range.end());
-}
-
 //===----------------------------------------------------------------------===//
 //     Extra additions to <memory>
 //===----------------------------------------------------------------------===//
@@ -1263,6 +1543,40 @@ auto apply_tuple(F &&f, Tuple &&t) -> decltype(detail::apply_tuple_impl(
                                   Indices{});
 }
 
+/// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N)
+/// time. Not meant for use with random-access iterators.
+template <typename IterTy>
+bool hasNItems(
+    IterTy &&Begin, IterTy &&End, unsigned N,
+    typename std::enable_if<
+        !std::is_same<
+            typename std::iterator_traits<typename std::remove_reference<
+                decltype(Begin)>::type>::iterator_category,
+            std::random_access_iterator_tag>::value,
+        void>::type * = nullptr) {
+  for (; N; --N, ++Begin)
+    if (Begin == End)
+      return false; // Too few.
+  return Begin == End;
+}
+
+/// Return true if the sequence [Begin, End) has N or more items. Runs in O(N)
+/// time. Not meant for use with random-access iterators.
+template <typename IterTy>
+bool hasNItemsOrMore(
+    IterTy &&Begin, IterTy &&End, unsigned N,
+    typename std::enable_if<
+        !std::is_same<
+            typename std::iterator_traits<typename std::remove_reference<
+                decltype(Begin)>::type>::iterator_category,
+            std::random_access_iterator_tag>::value,
+        void>::type * = nullptr) {
+  for (; N; --N, ++Begin)
+    if (Begin == End)
+      return false; // Too few.
+  return true;
+}
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_STLEXTRAS_H
diff --git a/contrib/llvm/include/llvm/ADT/SmallBitVector.h b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
index b6391746639b..0a73dbd60671 100644
--- a/contrib/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallBitVector.h
@@ -92,10 +92,6 @@ public:
   };
 
 private:
-  bool isSmall() const {
-    return X & uintptr_t(1);
-  }
-
   BitVector *getPointer() const {
     assert(!isSmall());
     return reinterpret_cast<BitVector *>(X);
@@ -186,6 +182,8 @@ public:
     return make_range(set_bits_begin(), set_bits_end());
   }
 
+  bool isSmall() const { return X & uintptr_t(1); }
+
   /// Tests whether there are no bits in this bitvector.
   bool empty() const {
     return isSmall() ? getSmallSize() == 0 : getPointer()->empty();
@@ -242,7 +240,7 @@ public:
       uintptr_t Bits = getSmallBits();
       if (Bits == 0)
         return -1;
-      return NumBaseBits - countLeadingZeros(Bits);
+      return NumBaseBits - countLeadingZeros(Bits) - 1;
     }
     return getPointer()->find_last();
   }
@@ -265,7 +263,9 @@ public:
         return -1;
 
       uintptr_t Bits = getSmallBits();
-      return NumBaseBits - countLeadingOnes(Bits);
+      // Set unused bits.
+      Bits |= ~uintptr_t(0) << getSmallSize();
+      return NumBaseBits - countLeadingOnes(Bits) - 1;
     }
     return getPointer()->find_last_unset();
   }
@@ -465,6 +465,11 @@ public:
     return (*this)[Idx];
   }
 
+  // Push single bit to end of vector.
+  void push_back(bool Val) {
+    resize(size() + 1, Val);
+  }
+
   /// Test if any common bits are set.
   bool anyCommon(const SmallBitVector &RHS) const {
     if (isSmall() && RHS.isSmall())
@@ -482,10 +487,17 @@ public:
   bool operator==(const SmallBitVector &RHS) const {
     if (size() != RHS.size())
       return false;
-    if (isSmall())
+    if (isSmall() && RHS.isSmall())
       return getSmallBits() == RHS.getSmallBits();
-    else
+    else if (!isSmall() && !RHS.isSmall())
       return *getPointer() == *RHS.getPointer();
+    else {
+      for (size_t i = 0, e = size(); i != e; ++i) {
+        if ((*this)[i] != RHS[i])
+          return false;
+      }
+      return true;
+    }
   }
 
   bool operator!=(const SmallBitVector &RHS) const {
@@ -493,16 +505,19 @@ public:
   }
 
   // Intersection, union, disjoint union.
+  // FIXME BitVector::operator&= does not resize the LHS but this does
   SmallBitVector &operator&=(const SmallBitVector &RHS) {
     resize(std::max(size(), RHS.size()));
-    if (isSmall())
+    if (isSmall() && RHS.isSmall())
       setSmallBits(getSmallBits() & RHS.getSmallBits());
-    else if (!RHS.isSmall())
+    else if (!isSmall() && !RHS.isSmall())
       getPointer()->operator&=(*RHS.getPointer());
     else {
-      SmallBitVector Copy = RHS;
-      Copy.resize(size());
-      getPointer()->operator&=(*Copy.getPointer());
+      size_t i, e;
+      for (i = 0, e = std::min(size(), RHS.size()); i != e; ++i)
+        (*this)[i] = test(i) && RHS.test(i);
+      for (e = size(); i != e; ++i)
+        reset(i);
     }
     return *this;
   }
@@ -542,28 +557,26 @@ public:
 
   SmallBitVector &operator|=(const SmallBitVector &RHS) {
     resize(std::max(size(), RHS.size()));
-    if (isSmall())
+    if (isSmall() && RHS.isSmall())
       setSmallBits(getSmallBits() | RHS.getSmallBits());
-    else if (!RHS.isSmall())
+    else if (!isSmall() && !RHS.isSmall())
       getPointer()->operator|=(*RHS.getPointer());
     else {
-      SmallBitVector Copy = RHS;
-      Copy.resize(size());
-      getPointer()->operator|=(*Copy.getPointer());
+      for (size_t i = 0, e = RHS.size(); i != e; ++i)
+        (*this)[i] = test(i) || RHS.test(i);
     }
     return *this;
   }
 
   SmallBitVector &operator^=(const SmallBitVector &RHS) {
     resize(std::max(size(), RHS.size()));
-    if (isSmall())
+    if (isSmall() && RHS.isSmall())
       setSmallBits(getSmallBits() ^ RHS.getSmallBits());
-    else if (!RHS.isSmall())
+    else if (!isSmall() && !RHS.isSmall())
       getPointer()->operator^=(*RHS.getPointer());
     else {
-      SmallBitVector Copy = RHS;
-      Copy.resize(size());
-      getPointer()->operator^=(*Copy.getPointer());
+      for (size_t i = 0, e = RHS.size(); i != e; ++i)
+        (*this)[i] = test(i) != RHS.test(i);
     }
     return *this;
   }
diff --git a/contrib/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm/include/llvm/ADT/SmallVector.h
index acb4426b4f45..0636abbb1fbf 100644
--- a/contrib/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallVector.h
@@ -182,7 +182,7 @@ public:
 
 /// SmallVectorTemplateBase<isPodLike = false> - This is where we put method
 /// implementations that are designed to work with non-POD-like T's.
-template <typename T, bool isPodLike>
+template <typename T, bool = isPodLike<T>::value>
 class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
 protected:
   SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
@@ -299,7 +299,7 @@ protected:
     // use memcpy here. Note that I and E are iterators and thus might be
     // invalid for memcpy if they are equal.
     if (I != E)
-      memcpy(Dest, I, (E - I) * sizeof(T));
+      memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
   }
 
   /// Double the size of the allocated memory, guaranteeing space for at
@@ -310,7 +310,7 @@ public:
   void push_back(const T &Elt) {
     if (LLVM_UNLIKELY(this->size() >= this->capacity()))
       this->grow();
-    memcpy(this->end(), &Elt, sizeof(T));
+    memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T));
     this->set_size(this->size() + 1);
   }
 
@@ -320,8 +320,8 @@ public:
 /// This class consists of common code factored out of the SmallVector class to
 /// reduce code duplication based on the SmallVector 'N' template parameter.
 template <typename T>
-class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
-  using SuperClass = SmallVectorTemplateBase<T, isPodLike<T>::value>;
+class SmallVectorImpl : public SmallVectorTemplateBase<T> {
+  using SuperClass = SmallVectorTemplateBase<T>;
 
 public:
   using iterator = typename SuperClass::iterator;
diff --git a/contrib/llvm/include/llvm/ADT/SparseBitVector.h b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
index 4cbf40c76805..84e73bcbace8 100644
--- a/contrib/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
@@ -261,21 +261,33 @@ class SparseBitVector {
     BITWORD_SIZE = SparseBitVectorElement<ElementSize>::BITWORD_SIZE
   };
 
-  // Pointer to our current Element.
-  ElementListIter CurrElementIter;
   ElementList Elements;
+  // Pointer to our current Element. This has no visible effect on the external
+  // state of a SparseBitVector, it's just used to improve performance in the
+  // common case of testing/modifying bits with similar indices.
+  mutable ElementListIter CurrElementIter;
 
   // This is like std::lower_bound, except we do linear searching from the
   // current position.
-  ElementListIter FindLowerBound(unsigned ElementIndex) {
+  ElementListIter FindLowerBoundImpl(unsigned ElementIndex) const {
+
+    // We cache a non-const iterator so we're forced to resort to const_cast to
+    // get the begin/end in the case where 'this' is const. To avoid duplication
+    // of code with the only difference being whether the const cast is present
+    // 'this' is always const in this particular function and we sort out the
+    // difference in FindLowerBound and FindLowerBoundConst.
+    ElementListIter Begin =
+        const_cast<SparseBitVector<ElementSize> *>(this)->Elements.begin();
+    ElementListIter End =
+        const_cast<SparseBitVector<ElementSize> *>(this)->Elements.end();
 
     if (Elements.empty()) {
-      CurrElementIter = Elements.begin();
-      return Elements.begin();
+      CurrElementIter = Begin;
+      return CurrElementIter;
     }
 
     // Make sure our current iterator is valid.
-    if (CurrElementIter == Elements.end())
+    if (CurrElementIter == End)
       --CurrElementIter;
 
     // Search from our current iterator, either backwards or forwards,
@@ -284,17 +296,23 @@ class SparseBitVector {
     if (CurrElementIter->index() == ElementIndex) {
       return ElementIter;
     } else if (CurrElementIter->index() > ElementIndex) {
-      while (ElementIter != Elements.begin()
+      while (ElementIter != Begin
              && ElementIter->index() > ElementIndex)
         --ElementIter;
     } else {
-      while (ElementIter != Elements.end() &&
+      while (ElementIter != End &&
              ElementIter->index() < ElementIndex)
         ++ElementIter;
     }
     CurrElementIter = ElementIter;
     return ElementIter;
   }
+  ElementListConstIter FindLowerBoundConst(unsigned ElementIndex) const {
+    return FindLowerBoundImpl(ElementIndex);
+  }
+  ElementListIter FindLowerBound(unsigned ElementIndex) {
+    return FindLowerBoundImpl(ElementIndex);
+  }
 
   // Iterator to walk set bits in the bitmap.  This iterator is a lot uglier
   // than it would be, in order to be efficient.
@@ -423,22 +441,12 @@ class SparseBitVector {
 public:
   using iterator = SparseBitVectorIterator;
 
-  SparseBitVector() {
-    CurrElementIter = Elements.begin();
-  }
+  SparseBitVector() : Elements(), CurrElementIter(Elements.begin()) {}
 
-  // SparseBitVector copy ctor.
-  SparseBitVector(const SparseBitVector &RHS) {
-    ElementListConstIter ElementIter = RHS.Elements.begin();
-    while (ElementIter != RHS.Elements.end()) {
-      Elements.push_back(SparseBitVectorElement<ElementSize>(*ElementIter));
-      ++ElementIter;
-    }
-
-    CurrElementIter = Elements.begin ();
-  }
-
-  ~SparseBitVector() = default;
+  SparseBitVector(const SparseBitVector &RHS)
+      : Elements(RHS.Elements), CurrElementIter(Elements.begin()) {}
+  SparseBitVector(SparseBitVector &&RHS)
+      : Elements(std::move(RHS.Elements)), CurrElementIter(Elements.begin()) {}
 
   // Clear.
   void clear() {
@@ -450,26 +458,23 @@ public:
     if (this == &RHS)
       return *this;
 
-    Elements.clear();
-
-    ElementListConstIter ElementIter = RHS.Elements.begin();
-    while (ElementIter != RHS.Elements.end()) {
-      Elements.push_back(SparseBitVectorElement<ElementSize>(*ElementIter));
-      ++ElementIter;
-    }
-
-    CurrElementIter = Elements.begin ();
-
+    Elements = RHS.Elements;
+    CurrElementIter = Elements.begin();
+    return *this;
+  }
+  SparseBitVector &operator=(SparseBitVector &&RHS) {
+    Elements = std::move(RHS.Elements);
+    CurrElementIter = Elements.begin();
     return *this;
   }
 
   // Test, Reset, and Set a bit in the bitmap.
-  bool test(unsigned Idx) {
+  bool test(unsigned Idx) const {
     if (Elements.empty())
       return false;
 
     unsigned ElementIndex = Idx / ElementSize;
-    ElementListIter ElementIter = FindLowerBound(ElementIndex);
+    ElementListConstIter ElementIter = FindLowerBoundConst(ElementIndex);
 
     // If we can't find an element that is supposed to contain this bit, there
     // is nothing more to do.
diff --git a/contrib/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm/include/llvm/ADT/StringExtras.h
index 71b0e7527cb7..60a03633a8a6 100644
--- a/contrib/llvm/include/llvm/ADT/StringExtras.h
+++ b/contrib/llvm/include/llvm/ADT/StringExtras.h
@@ -139,22 +139,23 @@ inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
 
 /// Convert buffer \p Input to its hexadecimal representation.
 /// The returned string is double the size of \p Input.
-inline std::string toHex(StringRef Input) {
+inline std::string toHex(StringRef Input, bool LowerCase = false) {
   static const char *const LUT = "0123456789ABCDEF";
+  const uint8_t Offset = LowerCase ? 32 : 0;
   size_t Length = Input.size();
 
   std::string Output;
   Output.reserve(2 * Length);
   for (size_t i = 0; i < Length; ++i) {
     const unsigned char c = Input[i];
-    Output.push_back(LUT[c >> 4]);
-    Output.push_back(LUT[c & 15]);
+    Output.push_back(LUT[c >> 4] | Offset);
+    Output.push_back(LUT[c & 15] | Offset);
   }
   return Output;
 }
 
-inline std::string toHex(ArrayRef<uint8_t> Input) {
-  return toHex(toStringRef(Input));
+inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
+  return toHex(toStringRef(Input), LowerCase);
 }
 
 inline uint8_t hexFromNibbles(char MSB, char LSB) {
diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h
index c95b16dd4e8c..e06a68e27317 100644
--- a/contrib/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm/include/llvm/ADT/Triple.h
@@ -55,12 +55,11 @@ public:
     bpfel,          // eBPF or extended BPF or 64-bit BPF (little endian)
     bpfeb,          // eBPF or extended BPF or 64-bit BPF (big endian)
     hexagon,        // Hexagon: hexagon
-    mips,           // MIPS: mips, mipsallegrex
-    mipsel,         // MIPSEL: mipsel, mipsallegrexel
-    mips64,         // MIPS64: mips64
-    mips64el,       // MIPS64EL: mips64el
+    mips,           // MIPS: mips, mipsallegrex, mipsr6
+    mipsel,         // MIPSEL: mipsel, mipsallegrexe, mipsr6el
+    mips64,         // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6
+    mips64el,       // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el
     msp430,         // MSP430: msp430
-    nios2,          // NIOSII: nios2
     ppc,            // PPC: powerpc
     ppc64,          // PPC64: powerpc64, ppu
     ppc64le,        // PPC64LE: powerpc64le
@@ -101,6 +100,7 @@ public:
   enum SubArchType {
     NoSubArch,
 
+    ARMSubArch_v8_5a,
     ARMSubArch_v8_4a,
     ARMSubArch_v8_3a,
     ARMSubArch_v8_2a,
@@ -125,7 +125,9 @@ public:
 
     KalimbaSubArch_v3,
     KalimbaSubArch_v4,
-    KalimbaSubArch_v5
+    KalimbaSubArch_v5,
+
+    MipsSubArch_r6
   };
   enum VendorType {
     UnknownVendor,
@@ -182,7 +184,10 @@ public:
     Mesa3D,
     Contiki,
     AMDPAL,     // AMD PAL Runtime
-    LastOSType = AMDPAL
+    HermitCore, // HermitCore Unikernel/Multikernel
+    Hurd,       // GNU/Hurd
+    WASI,       // Experimental WebAssembly OS
+    LastOSType = WASI
   };
   enum EnvironmentType {
     UnknownEnvironment,
@@ -578,9 +583,20 @@ public:
     return getOS() == Triple::KFreeBSD;
   }
 
+  /// Tests whether the OS is Hurd.
+  bool isOSHurd() const {
+    return getOS() == Triple::Hurd;
+  }
+
+  /// Tests whether the OS is WASI.
+  bool isOSWASI() const {
+    return getOS() == Triple::WASI;
+  }
+
   /// Tests whether the OS uses glibc.
   bool isOSGlibc() const {
-    return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD) &&
+    return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD ||
+            getOS() == Triple::Hurd) &&
            !isAndroid();
   }
 
diff --git a/contrib/llvm/include/llvm/ADT/bit.h b/contrib/llvm/include/llvm/ADT/bit.h
new file mode 100644
index 000000000000..a4aba7b6a9ee
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/bit.h
@@ -0,0 +1,59 @@
+//===-- llvm/ADT/bit.h - C++20 <bit> ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C++20 <bit> header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_BIT_H
+#define LLVM_ADT_BIT_H
+
+#include "llvm/Support/Compiler.h"
+#include <cstring>
+#include <type_traits>
+
+namespace llvm {
+
+// This implementation of bit_cast is different from the C++17 one in two ways:
+//  - It isn't constexpr because that requires compiler support.
+//  - It requires trivially-constructible To, to avoid UB in the implementation.
+template <typename To, typename From
+          , typename = typename std::enable_if<sizeof(To) == sizeof(From)>::type
+#if (__has_feature(is_trivially_constructible) && defined(_LIBCPP_VERSION)) || \
+    (defined(__GNUC__) && __GNUC__ >= 5)
+          , typename = typename std::is_trivially_constructible<To>::type
+#elif __has_feature(is_trivially_constructible)
+          , typename = typename std::enable_if<__is_trivially_constructible(To)>::type
+#else
+  // See comment below.
+#endif
+#if (__has_feature(is_trivially_copyable) && defined(_LIBCPP_VERSION)) || \
+    (defined(__GNUC__) && __GNUC__ >= 5)
+          , typename = typename std::enable_if<std::is_trivially_copyable<To>::value>::type
+          , typename = typename std::enable_if<std::is_trivially_copyable<From>::value>::type
+#elif __has_feature(is_trivially_copyable)
+          , typename = typename std::enable_if<__is_trivially_copyable(To)>::type
+          , typename = typename std::enable_if<__is_trivially_copyable(From)>::type
+#else
+  // This case is GCC 4.x. clang with libc++ or libstdc++ never get here. Unlike
+  // llvm/Support/type_traits.h's isPodLike we don't want to provide a
+  // good-enough answer here: developers in that configuration will hit
+  // compilation failures on the bots instead of locally. That's acceptable
+  // because it's very few developers, and only until we move past C++11.
+#endif
+>
+inline To bit_cast(const From &from) noexcept {
+  To to;
+  std::memcpy(&to, &from, sizeof(To));
+  return to;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ADT/iterator.h b/contrib/llvm/include/llvm/ADT/iterator.h
index 549c5221173d..40e490cf7864 100644
--- a/contrib/llvm/include/llvm/ADT/iterator.h
+++ b/contrib/llvm/include/llvm/ADT/iterator.h
@@ -202,9 +202,7 @@ template <
     typename ReferenceT = typename std::conditional<
         std::is_same<T, typename std::iterator_traits<
                             WrappedIteratorT>::value_type>::value,
-        typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type,
-    // Don't provide these, they are mostly to act as aliases below.
-    typename WrappedTraitsT = std::iterator_traits<WrappedIteratorT>>
+        typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type>
 class iterator_adaptor_base
     : public iterator_facade_base<DerivedT, IteratorCategoryT, T,
                                   DifferenceTypeT, PointerT, ReferenceT> {
@@ -311,8 +309,10 @@ make_pointee_range(RangeT &&Range) {
 template <typename WrappedIteratorT,
           typename T = decltype(&*std::declval<WrappedIteratorT>())>
 class pointer_iterator
-    : public iterator_adaptor_base<pointer_iterator<WrappedIteratorT, T>,
-                                   WrappedIteratorT, T> {
+    : public iterator_adaptor_base<
+          pointer_iterator<WrappedIteratorT, T>, WrappedIteratorT,
+          typename std::iterator_traits<WrappedIteratorT>::iterator_category,
+          T> {
   mutable T Ptr;
 
 public:
@@ -334,6 +334,34 @@ make_pointer_range(RangeT &&Range) {
                     PointerIteratorT(std::end(std::forward<RangeT>(Range))));
 }
 
+// Wrapper iterator over iterator ItType, adding DataRef to the type of ItType,
+// to create NodeRef = std::pair<InnerTypeOfItType, DataRef>.
+template <typename ItType, typename NodeRef, typename DataRef>
+class WrappedPairNodeDataIterator
+    : public iterator_adaptor_base<
+          WrappedPairNodeDataIterator<ItType, NodeRef, DataRef>, ItType,
+          typename std::iterator_traits<ItType>::iterator_category, NodeRef,
+          std::ptrdiff_t, NodeRef *, NodeRef &> {
+  using BaseT = iterator_adaptor_base<
+      WrappedPairNodeDataIterator, ItType,
+      typename std::iterator_traits<ItType>::iterator_category, NodeRef,
+      std::ptrdiff_t, NodeRef *, NodeRef &>;
+
+  const DataRef DR;
+  mutable NodeRef NR;
+
+public:
+  WrappedPairNodeDataIterator(ItType Begin, const DataRef DR)
+      : BaseT(Begin), DR(DR) {
+    NR.first = DR;
+  }
+
+  NodeRef &operator*() const {
+    NR.second = *this->I;
+    return NR;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_ITERATOR_H
diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
index be3496bbd955..e2a2ac0622e8 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -43,7 +43,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -335,8 +334,7 @@ public:
 
   /// A convenience wrapper around the primary \c alias interface.
   AliasResult alias(const Value *V1, const Value *V2) {
-    return alias(V1, MemoryLocation::UnknownSize, V2,
-                 MemoryLocation::UnknownSize);
+    return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown());
   }
 
   /// A trivial helper function to check to see if the specified pointers are
@@ -364,7 +362,8 @@ public:
 
   /// A convenience wrapper around the \c isMustAlias helper interface.
   bool isMustAlias(const Value *V1, const Value *V2) {
-    return alias(V1, 1, V2, 1) == MustAlias;
+    return alias(V1, LocationSize::precise(1), V2, LocationSize::precise(1)) ==
+           MustAlias;
   }
 
   /// Checks whether the given location points to constant memory, or if
@@ -382,15 +381,15 @@ public:
   /// \name Simple mod/ref information
   /// @{
 
-  /// Get the ModRef info associated with a pointer argument of a callsite. The
+  /// Get the ModRef info associated with a pointer argument of a call. The
   /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
   /// that these bits do not necessarily account for the overall behavior of
   /// the function, but rather only provide additional per-argument
   /// information. This never sets ModRefInfo::Must.
-  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+  ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
 
   /// Return the behavior of the given call site.
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
 
   /// Return the behavior when calling the given function.
   FunctionModRefBehavior getModRefBehavior(const Function *F);
@@ -406,8 +405,8 @@ public:
   /// property (e.g. calls to 'sin' and 'cos').
   ///
   /// This property corresponds to the GCC 'const' attribute.
-  bool doesNotAccessMemory(ImmutableCallSite CS) {
-    return getModRefBehavior(CS) == FMRB_DoesNotAccessMemory;
+  bool doesNotAccessMemory(const CallBase *Call) {
+    return getModRefBehavior(Call) == FMRB_DoesNotAccessMemory;
   }
 
   /// Checks if the specified function is known to never read or write memory.
@@ -434,8 +433,8 @@ public:
   /// absence of interfering store instructions, such as CSE of strlen calls.
   ///
   /// This property corresponds to the GCC 'pure' attribute.
-  bool onlyReadsMemory(ImmutableCallSite CS) {
-    return onlyReadsMemory(getModRefBehavior(CS));
+  bool onlyReadsMemory(const CallBase *Call) {
+    return onlyReadsMemory(getModRefBehavior(Call));
   }
 
   /// Checks if the specified function is known to only read from non-volatile
@@ -500,36 +499,12 @@ public:
 
   /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
 
   /// getModRefInfo (for call sites) - A convenience wrapper.
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const Value *P,
+  ModRefInfo getModRefInfo(const CallBase *Call, const Value *P,
                            LocationSize Size) {
-    return getModRefInfo(CS, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for calls) - Return information about whether
-  /// a particular call modifies or reads the specified memory location.
-  ModRefInfo getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
-    return getModRefInfo(ImmutableCallSite(C), Loc);
-  }
-
-  /// getModRefInfo (for calls) - A convenience wrapper.
-  ModRefInfo getModRefInfo(const CallInst *C, const Value *P,
-                           LocationSize Size) {
-    return getModRefInfo(C, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for invokes) - Return information about whether
-  /// a particular invoke modifies or reads the specified memory location.
-  ModRefInfo getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
-    return getModRefInfo(ImmutableCallSite(I), Loc);
-  }
-
-  /// getModRefInfo (for invokes) - A convenience wrapper.
-  ModRefInfo getModRefInfo(const InvokeInst *I, const Value *P,
-                           LocationSize Size) {
-    return getModRefInfo(I, MemoryLocation(P, Size));
+    return getModRefInfo(Call, MemoryLocation(P, Size));
   }
 
   /// getModRefInfo (for loads) - Return information about whether
@@ -569,7 +544,7 @@ public:
 
   /// getModRefInfo (for cmpxchges) - A convenience wrapper.
   ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P,
-                           unsigned Size) {
+                           LocationSize Size) {
     return getModRefInfo(CX, MemoryLocation(P, Size));
   }
 
@@ -579,7 +554,7 @@ public:
 
   /// getModRefInfo (for atomicrmws) - A convenience wrapper.
   ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P,
-                           unsigned Size) {
+                           LocationSize Size) {
     return getModRefInfo(RMW, MemoryLocation(P, Size));
   }
 
@@ -626,8 +601,8 @@ public:
   ModRefInfo getModRefInfo(const Instruction *I,
                            const Optional<MemoryLocation> &OptLoc) {
     if (OptLoc == None) {
-      if (auto CS = ImmutableCallSite(I)) {
-        return createModRefInfo(getModRefBehavior(CS));
+      if (const auto *Call = dyn_cast<CallBase>(I)) {
+        return createModRefInfo(getModRefBehavior(Call));
       }
     }
 
@@ -661,12 +636,12 @@ public:
 
   /// Return information about whether a call and an instruction may refer to
   /// the same memory locations.
-  ModRefInfo getModRefInfo(Instruction *I, ImmutableCallSite Call);
+  ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call);
 
   /// Return information about whether two call sites may refer to the same set
   /// of memory locations. See the AA documentation for details:
   ///   http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
-  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
 
   /// Return information about whether a particular call site modifies
   /// or reads the specified memory location \p MemLoc before instruction \p I
@@ -777,25 +752,25 @@ public:
   /// that these bits do not necessarily account for the overall behavior of
   /// the function, but rather only provide additional per-argument
   /// information.
-  virtual ModRefInfo getArgModRefInfo(ImmutableCallSite CS,
+  virtual ModRefInfo getArgModRefInfo(const CallBase *Call,
                                       unsigned ArgIdx) = 0;
 
   /// Return the behavior of the given call site.
-  virtual FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) = 0;
+  virtual FunctionModRefBehavior getModRefBehavior(const CallBase *Call) = 0;
 
   /// Return the behavior when calling the given function.
   virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0;
 
   /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
-  virtual ModRefInfo getModRefInfo(ImmutableCallSite CS,
+  virtual ModRefInfo getModRefInfo(const CallBase *Call,
                                    const MemoryLocation &Loc) = 0;
 
   /// Return information about whether two call sites may refer to the same set
   /// of memory locations. See the AA documentation for details:
   ///   http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
-  virtual ModRefInfo getModRefInfo(ImmutableCallSite CS1,
-                                   ImmutableCallSite CS2) = 0;
+  virtual ModRefInfo getModRefInfo(const CallBase *Call1,
+                                   const CallBase *Call2) = 0;
 
   /// @}
 };
@@ -827,26 +802,26 @@ public:
     return Result.pointsToConstantMemory(Loc, OrLocal);
   }
 
-  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) override {
-    return Result.getArgModRefInfo(CS, ArgIdx);
+  ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
+    return Result.getArgModRefInfo(Call, ArgIdx);
   }
 
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
-    return Result.getModRefBehavior(CS);
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call) override {
+    return Result.getModRefBehavior(Call);
   }
 
   FunctionModRefBehavior getModRefBehavior(const Function *F) override {
     return Result.getModRefBehavior(F);
   }
 
-  ModRefInfo getModRefInfo(ImmutableCallSite CS,
+  ModRefInfo getModRefInfo(const CallBase *Call,
                            const MemoryLocation &Loc) override {
-    return Result.getModRefInfo(CS, Loc);
+    return Result.getModRefInfo(Call, Loc);
   }
 
-  ModRefInfo getModRefInfo(ImmutableCallSite CS1,
-                           ImmutableCallSite CS2) override {
-    return Result.getModRefInfo(CS1, CS2);
+  ModRefInfo getModRefInfo(const CallBase *Call1,
+                           const CallBase *Call2) override {
+    return Result.getModRefInfo(Call1, Call2);
   }
 };
 
@@ -901,25 +876,28 @@ protected:
                  : CurrentResult.pointsToConstantMemory(Loc, OrLocal);
     }
 
-    ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
-      return AAR ? AAR->getArgModRefInfo(CS, ArgIdx) : CurrentResult.getArgModRefInfo(CS, ArgIdx);
+    ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
+      return AAR ? AAR->getArgModRefInfo(Call, ArgIdx)
+                 : CurrentResult.getArgModRefInfo(Call, ArgIdx);
     }
 
-    FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
-      return AAR ? AAR->getModRefBehavior(CS) : CurrentResult.getModRefBehavior(CS);
+    FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
+      return AAR ? AAR->getModRefBehavior(Call)
+                 : CurrentResult.getModRefBehavior(Call);
     }
 
     FunctionModRefBehavior getModRefBehavior(const Function *F) {
       return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
     }
 
-    ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
-      return AAR ? AAR->getModRefInfo(CS, Loc)
-                 : CurrentResult.getModRefInfo(CS, Loc);
+    ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+      return AAR ? AAR->getModRefInfo(Call, Loc)
+                 : CurrentResult.getModRefInfo(Call, Loc);
     }
 
-    ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
-      return AAR ? AAR->getModRefInfo(CS1, CS2) : CurrentResult.getModRefInfo(CS1, CS2);
+    ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+      return AAR ? AAR->getModRefInfo(Call1, Call2)
+                 : CurrentResult.getModRefInfo(Call1, Call2);
     }
   };
 
@@ -951,11 +929,11 @@ public:
     return false;
   }
 
-  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+  ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
     return ModRefInfo::ModRef;
   }
 
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
     return FMRB_UnknownModRefBehavior;
   }
 
@@ -963,11 +941,11 @@ public:
     return FMRB_UnknownModRefBehavior;
   }
 
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
     return ModRefInfo::ModRef;
   }
 
-  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
     return ModRefInfo::ModRef;
   }
 };
@@ -1075,6 +1053,29 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
+/// A wrapper pass for external alias analyses. This just squirrels away the
+/// callback used to run any analyses and register their results.
+struct ExternalAAWrapperPass : ImmutablePass {
+  using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;
+
+  CallbackT CB;
+
+  static char ID;
+
+  ExternalAAWrapperPass() : ImmutablePass(ID) {
+    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  explicit ExternalAAWrapperPass(CallbackT CB)
+      : ImmutablePass(ID), CB(std::move(CB)) {
+    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+};
+
 FunctionPass *createAAResultsWrapperPass();
 
 /// A wrapper pass around a callback which can be used to populate the
diff --git a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
index c9680ff40d1e..7ed5cd5c4734 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -52,9 +52,13 @@ class AliasSet : public ilist_node<AliasSet> {
     PointerRec **PrevInList = nullptr;
     PointerRec *NextInList = nullptr;
     AliasSet *AS = nullptr;
-    LocationSize Size = 0;
+    LocationSize Size = LocationSize::mapEmpty();
     AAMDNodes AAInfo;
 
+    // Whether the size for this record has been set at all. This makes no
+    // guarantees about the size being known.
+    bool isSizeSet() const { return Size != LocationSize::mapEmpty(); }
+
   public:
     PointerRec(Value *V)
       : Val(V), AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {}
@@ -71,9 +75,10 @@ class AliasSet : public ilist_node<AliasSet> {
 
     bool updateSizeAndAAInfo(LocationSize NewSize, const AAMDNodes &NewAAInfo) {
       bool SizeChanged = false;
-      if (NewSize > Size) {
-        Size = NewSize;
-        SizeChanged = true;
+      if (NewSize != Size) {
+        LocationSize OldSize = Size;
+        Size = isSizeSet() ? Size.unionWith(NewSize) : NewSize;
+        SizeChanged = OldSize != Size;
       }
 
       if (AAInfo == DenseMapInfo<AAMDNodes>::getEmptyKey())
@@ -91,7 +96,10 @@ class AliasSet : public ilist_node<AliasSet> {
       return SizeChanged;
     }
 
-    LocationSize getSize() const { return Size; }
+    LocationSize getSize() const {
+      assert(isSizeSet() && "Getting an unset size!");
+      return Size;
+    }
 
     /// Return the AAInfo, or null if there is no information or conflicting
     /// information.
@@ -175,9 +183,6 @@ class AliasSet : public ilist_node<AliasSet> {
   };
   unsigned Alias : 1;
 
-  /// True if this alias set contains volatile loads or stores.
-  unsigned Volatile : 1;
-
   unsigned SetSize = 0;
 
   void addRef() { ++RefCount; }
@@ -203,9 +208,6 @@ public:
   bool isMustAlias() const { return Alias == SetMustAlias; }
   bool isMayAlias()  const { return Alias == SetMayAlias; }
 
-  /// Return true if this alias set contains volatile loads or stores.
-  bool isVolatile() const { return Volatile; }
-
   /// Return true if this alias set should be ignored as part of the
   /// AliasSetTracker object.
   bool isForwardingAliasSet() const { return Forward; }
@@ -224,6 +226,10 @@ public:
   // track of the list's exact size.
   unsigned size() { return SetSize; }
 
+  /// If this alias set is known to contain a single instruction and *only* a
+  /// single unique instruction, return it.  Otherwise, return nullptr.
+  Instruction* getUniqueInstruction();
+
   void print(raw_ostream &OS) const;
   void dump() const;
 
@@ -264,7 +270,7 @@ private:
   // Can only be created by AliasSetTracker.
   AliasSet()
       : PtrListEnd(&PtrList), RefCount(0),  AliasAny(false), Access(NoAccess),
-        Alias(SetMustAlias), Volatile(false) {}
+        Alias(SetMustAlias) {}
 
   PointerRec *getSomePointer() const {
     return PtrList;
@@ -303,8 +309,6 @@ private:
       dropRef(AST);
   }
 
-  void setVolatile() { Volatile = true; }
-
 public:
   /// Return true if the specified pointer "may" (or must) alias one of the
   /// members in the set.
@@ -379,23 +383,11 @@ public:
   /// Return the alias sets that are active.
   const ilist<AliasSet> &getAliasSets() const { return AliasSets; }
 
-  /// Return the alias set that the specified pointer lives in. If the New
-  /// argument is non-null, this method sets the value to true if a new alias
-  /// set is created to contain the pointer (because the pointer didn't alias
-  /// anything).
-  AliasSet &getAliasSetForPointer(Value *P, LocationSize Size,
-                                  const AAMDNodes &AAInfo);
-
-  /// Return the alias set containing the location specified if one exists,
-  /// otherwise return null.
-  AliasSet *getAliasSetForPointerIfExists(const Value *P, LocationSize Size,
-                                          const AAMDNodes &AAInfo) {
-    return mergeAliasSetsForPointer(P, Size, AAInfo);
-  }
-
-  /// Return true if the specified instruction "may" (or must) alias one of the
-  /// members in any of the sets.
-  bool containsUnknown(const Instruction *I) const;
+  /// Return the alias set which contains the specified memory location.  If
+  /// the memory location aliases two or more existing alias sets, will have
+  /// the effect of merging those alias sets before the single resulting alias
+  /// set is returned.
+  AliasSet &getAliasSetFor(const MemoryLocation &MemLoc);
 
   /// Return the underlying alias analysis object used by this tracker.
   AliasAnalysis &getAliasAnalysis() const { return AA; }
@@ -445,8 +437,7 @@ private:
     return *Entry;
   }
 
-  AliasSet &addPointer(Value *P, LocationSize Size, const AAMDNodes &AAInfo,
-                       AliasSet::AccessLattice E);
+  AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E);
   AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size,
                                      const AAMDNodes &AAInfo);
 
diff --git a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 6344e84b58eb..820d7ac0935a 100644
--- a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -21,7 +21,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include <algorithm>
@@ -84,18 +84,18 @@ public:
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
 
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
 
-  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
 
   /// Chases pointers until we find a (constant global) or not.
   bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
 
   /// Get the location associated with a pointer argument of a callsite.
-  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+  ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
 
   /// Returns the behavior when calling the given call site.
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
 
   /// Returns the behavior when calling the given function. For use when the
   /// call site is not known.
@@ -115,7 +115,7 @@ private:
     unsigned ZExtBits;
     unsigned SExtBits;
 
-    int64_t Scale;
+    APInt Scale;
 
     bool operator==(const VariableGEPIndex &Other) const {
       return V == Other.V && ZExtBits == Other.ZExtBits &&
@@ -133,10 +133,10 @@ private:
     // Base pointer of the GEP
     const Value *Base;
     // Total constant offset w.r.t the base from indexing into structs
-    int64_t StructOffset;
+    APInt StructOffset;
     // Total constant offset w.r.t the base from indexing through
     // pointers/arrays/vectors
-    int64_t OtherOffset;
+    APInt OtherOffset;
     // Scaled variable (non-constant) indices.
     SmallVector<VariableGEPIndex, 4> VarIndices;
   };
@@ -189,7 +189,7 @@ private:
   bool
   constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
                           LocationSize V1Size, LocationSize V2Size,
-                          int64_t BaseOffset, AssumptionCache *AC,
+                          APInt BaseOffset, AssumptionCache *AC,
                           DominatorTree *DT);
 
   bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
index ca12db6208b8..0b2618735697 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -56,7 +56,7 @@ public:
 
   const Function *getFunction() const;
   const BranchProbabilityInfo *getBPI() const;
-  void view() const;
+  void view(StringRef = "BlockFrequencyDAGs") const;
 
   /// getblockFreq - Return block frequency. Return 0 if we don't have the
   /// information. Please note that initial frequency is equal to ENTRY_FREQ. It
diff --git a/contrib/llvm/include/llvm/Analysis/CFG.h b/contrib/llvm/include/llvm/Analysis/CFG.h
index cccdd1637411..caae0b6e2a8f 100644
--- a/contrib/llvm/include/llvm/Analysis/CFG.h
+++ b/contrib/llvm/include/llvm/Analysis/CFG.h
@@ -25,7 +25,6 @@ class DominatorTree;
 class Function;
 class Instruction;
 class LoopInfo;
-class TerminatorInst;
 
 /// Analyze the specified function to find all of the loop backedges in the
 /// function and return them.  This is a relatively cheap (compared to
@@ -46,7 +45,7 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
 /// edges from a block with multiple successors to a block with multiple
 /// predecessors.
 ///
-bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
                     bool AllowIdenticalEdges = false);
 
 /// Determine whether instruction 'To' is reachable from 'From',
diff --git a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
index 5786769cc500..5996dd90bcfd 100644
--- a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -150,7 +150,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   /// Display the raw branch weights from PGO.
   std::string getEdgeAttributes(const BasicBlock *Node, succ_const_iterator I,
                                 const Function *F) {
-    const TerminatorInst *TI = Node->getTerminator();
+    const Instruction *TI = Node->getTerminator();
     if (TI->getNumSuccessors() == 1)
       return "";
 
@@ -172,8 +172,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
 
     // Prepend a 'W' to indicate that this is a weight rather than the actual
     // profile count (due to scaling).
-    Twine Attrs = "label=\"W:" + Twine(Weight->getZExtValue()) + "\"";
-    return Attrs.str();
+    return ("label=\"W:" + Twine(Weight->getZExtValue()) + "\"").str();
   }
 };
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 5e83ea2a6e2b..61b99f6c3e6b 100644
--- a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -364,6 +364,10 @@ public:
                             InvalidSCCSet,       nullptr,   nullptr,
                             InlinedInternalEdges};
 
+    // Request PassInstrumentation from analysis manager, will use it to run
+    // instrumenting callbacks for the passes later.
+    PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
     PreservedAnalyses PA = PreservedAnalyses::all();
     CG.buildRefSCCs();
     for (auto RCI = CG.postorder_ref_scc_begin(),
@@ -428,8 +432,20 @@ public:
 
             UR.UpdatedRC = nullptr;
             UR.UpdatedC = nullptr;
+
+            // Check the PassInstrumentation's BeforePass callbacks before
+            // running the pass, skip its execution completely if asked to
+            // (callback returns false).
+            if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+              continue;
+
             PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
 
+            if (UR.InvalidatedSCCs.count(C))
+              PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+            else
+              PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
             // Update the SCC and RefSCC if necessary.
             C = UR.UpdatedC ? UR.UpdatedC : C;
             RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
@@ -615,12 +631,20 @@ public:
       if (CG.lookupSCC(*N) != CurrentC)
         continue;
 
-      PreservedAnalyses PassPA = Pass.run(N->getFunction(), FAM);
+      Function &F = N->getFunction();
+
+      PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
+      if (!PI.runBeforePass<Function>(Pass, F))
+        continue;
+
+      PreservedAnalyses PassPA = Pass.run(F, FAM);
+
+      PI.runAfterPass<Function>(Pass, F);
 
       // We know that the function pass couldn't have invalidated any other
       // function's analyses (that's the contract of a function pass), so
       // directly handle the function analysis manager's invalidation here.
-      FAM.invalidate(N->getFunction(), PassPA);
+      FAM.invalidate(F, PassPA);
 
       // Then intersect the preserved set so that invalidation of module
       // analyses will eventually occur when the module pass completes.
@@ -690,6 +714,8 @@ public:
   PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR) {
     PreservedAnalyses PA = PreservedAnalyses::all();
+    PassInstrumentation PI =
+        AM.getResult<PassInstrumentationAnalysis>(InitialC, CG);
 
     // The SCC may be refined while we are running passes over it, so set up
     // a pointer that we can update.
@@ -733,8 +759,17 @@ public:
     auto CallCounts = ScanSCC(*C, CallHandles);
 
     for (int Iteration = 0;; ++Iteration) {
+
+      if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+        continue;
+
       PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
 
+      if (UR.InvalidatedSCCs.count(C))
+        PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+      else
+        PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
       // If the SCC structure has changed, bail immediately and let the outer
       // CGSCC layer handle any iteration to reflect the refined structure.
       if (UR.UpdatedC && UR.UpdatedC != C) {
diff --git a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
index 7a869a51233a..aaaaff9ae252 100644
--- a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -22,6 +22,14 @@ namespace llvm {
   class DominatorTree;
   class OrderedBasicBlock;
 
+  /// The default value for MaxUsesToExplore argument. It's relatively small to
+  /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis,
+  /// where the results can't be cached.
+  /// TODO: we should probably introduce a caching CaptureTracking analysis and
+  /// use it where possible. The caching version can use much higher limit or
+  /// don't have this cap at all.
+  unsigned constexpr DefaultMaxUsesToExplore = 20;
+
   /// PointerMayBeCaptured - Return true if this pointer value may be captured
   /// by the enclosing function (which is required to exist).  This routine can
   /// be expensive, so consider caching the results.  The boolean ReturnCaptures
@@ -29,9 +37,12 @@ namespace llvm {
   /// counts as capturing it or not.  The boolean StoreCaptures specified
   /// whether storing the value (or part of it) into memory anywhere
   /// automatically counts as capturing it or not.
+  /// MaxUsesToExplore specifies how many uses should the analysis explore for
+  /// one value before giving up due too "too many uses".
   bool PointerMayBeCaptured(const Value *V,
                             bool ReturnCaptures,
-                            bool StoreCaptures);
+                            bool StoreCaptures,
+                            unsigned MaxUsesToExplore = DefaultMaxUsesToExplore);
 
   /// PointerMayBeCapturedBefore - Return true if this pointer value may be
   /// captured by the enclosing function (which is required to exist). If a
@@ -44,10 +55,13 @@ namespace llvm {
   /// or not. Captures by the provided instruction are considered if the
   /// final parameter is true. An ordered basic block in \p OBB could be used
   /// to speed up capture-tracker queries.
+  /// MaxUsesToExplore specifies how many uses should the analysis explore for
+  /// one value before giving up due too "too many uses".
   bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
                                   bool StoreCaptures, const Instruction *I,
                                   const DominatorTree *DT, bool IncludeI = false,
-                                  OrderedBasicBlock *OBB = nullptr);
+                                  OrderedBasicBlock *OBB = nullptr,
+                                  unsigned MaxUsesToExplore = DefaultMaxUsesToExplore);
 
   /// This callback is used in conjunction with PointerMayBeCaptured. In
   /// addition to the interface here, you'll need to provide your own getters
@@ -75,7 +89,10 @@ namespace llvm {
   /// PointerMayBeCaptured - Visit the value and the values derived from it and
   /// find values which appear to be capturing the pointer value. This feeds
   /// results into and is controlled by the CaptureTracker object.
-  void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker);
+  /// MaxUsesToExplore specifies how many uses should the analysis explore for
+  /// one value before giving up due too "too many uses".
+  void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
+                            unsigned MaxUsesToExplore = DefaultMaxUsesToExplore);
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h
index 3cc69d9fea29..0e9c6a96b0f4 100644
--- a/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/CmpInstAnalysis.h
@@ -46,19 +46,18 @@ namespace llvm {
   ///
   unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false);
 
-  /// This is the complement of getICmpCode, which turns an opcode and two
-  /// operands into either a constant true or false, or the predicate for a new
-  /// ICmp instruction. The sign is passed in to determine which kind of
-  /// predicate to use in the new icmp instruction.
+  /// This is the complement of getICmpCode. It turns a predicate code into
+  /// either a constant true or false or the predicate for a new ICmp.
+  /// The sign is passed in to determine which kind of predicate to use in the
+  /// new ICmp instruction.
   /// Non-NULL return value will be a true or false constant.
-  /// NULL return means a new ICmp is needed. The predicate for which is output
-  /// in NewICmpPred.
-  Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
-                      CmpInst::Predicate &NewICmpPred);
+  /// NULL return means a new ICmp is needed. The predicate is output in Pred.
+  Constant *getPredForICmpCode(unsigned Code, bool Sign, Type *OpTy,
+                               CmpInst::Predicate &Pred);
 
   /// Return true if both predicates match sign or if at least one of them is an
   /// equality comparison (which is signless).
-  bool PredicatesFoldable(CmpInst::Predicate p1, CmpInst::Predicate p2);
+  bool predicatesFoldable(CmpInst::Predicate P1, CmpInst::Predicate P2);
 
   /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The
   /// returned predicate is either == or !=. Returns false if decomposition
diff --git a/contrib/llvm/include/llvm/Analysis/DemandedBits.h b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
index d4384609762d..4c4e3f6c99e7 100644
--- a/contrib/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
@@ -44,19 +44,30 @@ public:
     F(F), AC(AC), DT(DT) {}
 
   /// Return the bits demanded from instruction I.
+  ///
+  /// For vector instructions individual vector elements are not distinguished:
+  /// A bit is demanded if it is demanded for any of the vector elements. The
+  /// size of the return value corresponds to the type size in bits of the
+  /// scalar type.
+  ///
+  /// Instructions that do not have integer or vector of integer type are
+  /// accepted, but will always produce a mask with all bits set.
   APInt getDemandedBits(Instruction *I);
 
   /// Return true if, during analysis, I could not be reached.
   bool isInstructionDead(Instruction *I);
 
+  /// Return whether this use is dead by means of not having any demanded bits.
+  bool isUseDead(Use *U);
+
   void print(raw_ostream &OS);
 
 private:
   void performAnalysis();
   void determineLiveOperandBits(const Instruction *UserI,
-    const Instruction *I, unsigned OperandNo,
+    const Value *Val, unsigned OperandNo,
     const APInt &AOut, APInt &AB,
-    KnownBits &Known, KnownBits &Known2);
+    KnownBits &Known, KnownBits &Known2, bool &KnownBitsComputed);
 
   Function &F;
   AssumptionCache &AC;
@@ -67,6 +78,9 @@ private:
   // The set of visited instructions (non-integer-typed only).
   SmallPtrSet<Instruction*, 32> Visited;
   DenseMap<Instruction *, APInt> AliveBits;
+  // Uses with no demanded bits. If the user also has no demanded bits, the use
+  // might not be stored explicitly in this map, to save memory during analysis.
+  SmallPtrSet<Use *, 16> DeadUses;
 };
 
 class DemandedBitsWrapperPass : public FunctionPass {
diff --git a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
index c8ec737a2cb9..69d0e2c1513e 100644
--- a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -936,6 +936,17 @@ template <typename T> class ArrayRef;
     friend struct AnalysisInfoMixin<DependenceAnalysis>;
   }; // class DependenceAnalysis
 
+  /// Printer pass to dump DA results.
+  struct DependenceAnalysisPrinterPass
+      : public PassInfoMixin<DependenceAnalysisPrinterPass> {
+    DependenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+  private:
+    raw_ostream &OS;
+  }; // class DependenceAnalysisPrinterPass
+
   /// Legacy pass manager pass to access dependence information
   class DependenceAnalysisWrapperPass : public FunctionPass {
   public:
diff --git a/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index 328c8645d3c0..d834862db095 100644
--- a/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -7,55 +7,199 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// The divergence analysis is an LLVM pass which can be used to find out
-// if a branch instruction in a GPU program is divergent or not. It can help
-// branch optimizations such as jump threading and loop unswitching to make
-// better decisions.
+// \file
+// The divergence analysis determines which instructions and branches are
+// divergent given a set of divergent source instructions.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
 #define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/SyncDependenceAnalysis.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
+#include <vector>
 
 namespace llvm {
+class Module;
 class Value;
-class DivergenceAnalysis : public FunctionPass {
+class Instruction;
+class Loop;
+class raw_ostream;
+class TargetTransformInfo;
+
+/// \brief Generic divergence analysis for reducible CFGs.
+///
+/// This analysis propagates divergence in a data-parallel context from sources
+/// of divergence to all users. It requires reducible CFGs. All assignments
+/// should be in SSA form.
+class DivergenceAnalysis {
 public:
-  static char ID;
+  /// \brief This instance will analyze the whole function \p F or the loop \p
+  /// RegionLoop.
+  ///
+  /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop.
+  /// Otherwise the whole function is analyzed.
+  /// \param IsLCSSAForm whether the analysis may assume that the IR in the
+  /// region in in LCSSA form.
+  DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
+                     const DominatorTree &DT, const LoopInfo &LI,
+                     SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
 
-  DivergenceAnalysis() : FunctionPass(ID) {
-    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
-  }
+  /// \brief The loop that defines the analyzed region (if any).
+  const Loop *getRegionLoop() const { return RegionLoop; }
+  const Function &getFunction() const { return F; }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  /// \brief Whether \p BB is part of the region.
+  bool inRegion(const BasicBlock &BB) const;
+  /// \brief Whether \p I is part of the region.
+  bool inRegion(const Instruction &I) const;
 
-  bool runOnFunction(Function &F) override;
+  /// \brief Mark \p UniVal as a value that is always uniform.
+  void addUniformOverride(const Value &UniVal);
 
-  // Print all divergent branches in the function.
-  void print(raw_ostream &OS, const Module *) const override;
+  /// \brief Mark \p DivVal as a value that is always divergent.
+  void markDivergent(const Value &DivVal);
 
-  // Returns true if V is divergent at its definition.
-  //
-  // Even if this function returns false, V may still be divergent when used
-  // in a different basic block.
-  bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+  /// \brief Propagate divergence to all instructions in the region.
+  /// Divergence is seeded by calls to \p markDivergent.
+  void compute();
+
+  /// \brief Whether any value was marked or analyzed to be divergent.
+  bool hasDetectedDivergence() const { return !DivergentValues.empty(); }
+
+  /// \brief Whether \p Val will always return a uniform value regardless of its
+  /// operands
+  bool isAlwaysUniform(const Value &Val) const;
+
+  /// \brief Whether \p Val is a divergent value
+  bool isDivergent(const Value &Val) const;
+
+  void print(raw_ostream &OS, const Module *) const;
+
+private:
+  bool updateTerminator(const Instruction &Term) const;
+  bool updatePHINode(const PHINode &Phi) const;
+
+  /// \brief Computes whether \p Inst is divergent based on the
+  /// divergence of its operands.
+  ///
+  /// \returns Whether \p Inst is divergent.
+  ///
+  /// This should only be called for non-phi, non-terminator instructions.
+  bool updateNormalInstruction(const Instruction &Inst) const;
+
+  /// \brief Mark users of live-out users as divergent.
+  ///
+  /// \param LoopHeader the header of the divergent loop.
+  ///
+  /// Marks all users of live-out values of the loop headed by \p LoopHeader
+  /// as divergent and puts them on the worklist.
+  void taintLoopLiveOuts(const BasicBlock &LoopHeader);
+
+  /// \brief Push all users of \p Val (in the region) to the worklist
+  void pushUsers(const Value &I);
+
+  /// \brief Push all phi nodes in @block to the worklist
+  void pushPHINodes(const BasicBlock &Block);
+
+  /// \brief Mark \p Block as join divergent
+  ///
+  /// A block is join divergent if two threads may reach it from different
+  /// incoming blocks at the same time.
+  void markBlockJoinDivergent(const BasicBlock &Block) {
+    DivergentJoinBlocks.insert(&Block);
+  }
+
+  /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
+  bool isTemporalDivergent(const BasicBlock &ObservingBlock,
+                           const Value &Val) const;
+
+  /// \brief Whether \p Block is join divergent
+  ///
+  /// (see markBlockJoinDivergent).
+  bool isJoinDivergent(const BasicBlock &Block) const {
+    return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end();
+  }
 
-  // Returns true if V is uniform/non-divergent.
+  /// \brief Propagate control-induced divergence to users (phi nodes and
+  /// instructions).
   //
-  // Even if this function returns true, V may still be divergent when used
-  // in a different basic block.
-  bool isUniform(const Value *V) const { return !isDivergent(V); }
+  // \param JoinBlock is a divergent loop exit or join point of two disjoint
+  // paths.
+  // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop.
+  bool propagateJoinDivergence(const BasicBlock &JoinBlock,
+                               const Loop *TermLoop);
 
-  // Keep the analysis results uptodate by removing an erased value.
-  void removeValue(const Value *V) { DivergentValues.erase(V); }
+  /// \brief Propagate induced value divergence due to control divergence in \p
+  /// Term.
+  void propagateBranchDivergence(const Instruction &Term);
+
+  /// \brief Propagate divergent caused by a divergent loop exit.
+  ///
+  /// \param ExitingLoop is a divergent loop.
+  void propagateLoopDivergence(const Loop &ExitingLoop);
 
 private:
-  // Stores all divergent values.
+  const Function &F;
+  // If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
+  // Otw, analyze the whole function
+  const Loop *RegionLoop;
+
+  const DominatorTree &DT;
+  const LoopInfo &LI;
+
+  // Recognized divergent loops
+  DenseSet<const Loop *> DivergentLoops;
+
+  // The SDA links divergent branches to divergent control-flow joins.
+  SyncDependenceAnalysis &SDA;
+
+  // Use simplified code path for LCSSA form.
+  bool IsLCSSAForm;
+
+  // Set of known-uniform values.
+  DenseSet<const Value *> UniformOverrides;
+
+  // Blocks with joining divergent control from different predecessors.
+  DenseSet<const BasicBlock *> DivergentJoinBlocks;
+
+  // Detected/marked divergent values.
   DenseSet<const Value *> DivergentValues;
+
+  // Internal worklist for divergence propagation.
+  std::vector<const Instruction *> Worklist;
 };
-} // End llvm namespace
 
-#endif //LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
-\ No newline at end of file
+/// \brief Divergence analysis frontend for GPU kernels.
+class GPUDivergenceAnalysis {
+  SyncDependenceAnalysis SDA;
+  DivergenceAnalysis DA;
+
+public:
+  /// Runs the divergence analysis on @F, a GPU kernel
+  GPUDivergenceAnalysis(Function &F, const DominatorTree &DT,
+                        const PostDominatorTree &PDT, const LoopInfo &LI,
+                        const TargetTransformInfo &TTI);
+
+  /// Whether any divergence was detected.
+  bool hasDivergence() const { return DA.hasDetectedDivergence(); }
+
+  /// The GPU kernel this analysis result is for
+  const Function &getFunction() const { return DA.getFunction(); }
+
+  /// Whether \p V is divergent.
+  bool isDivergent(const Value &V) const;
+
+  /// Whether \p V is uniform/non-divergent
+  bool isUniform(const Value &V) const { return !isDivergent(V); }
+
+  /// Print all divergent values in the kernel.
+  void print(raw_ostream &OS, const Module *) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
index 09cef68ce70f..3a664ca6ef50 100644
--- a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
+++ b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
@@ -88,7 +88,7 @@ public:
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
 
   using AAResultBase::getModRefInfo;
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
 
   /// getModRefBehavior - Return the behavior of the specified function if
   /// called from the specified call site.  The call site may be null in which
@@ -98,7 +98,7 @@ public:
   /// getModRefBehavior - Return the behavior of the specified function if
   /// called from the specified call site.  The call site may be null in which
   /// case the most generic behavior of this function should be returned.
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
 
 private:
   FunctionInfo *getFunctionInfo(const Function *F);
@@ -113,7 +113,7 @@ private:
   void CollectSCCMembership(CallGraph &CG);
 
   bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
-  ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
+  ModRefInfo getModRefInfoForArgument(const CallBase *Call,
                                       const GlobalValue *GV);
 };
 
diff --git a/contrib/llvm/include/llvm/Analysis/GuardUtils.h b/contrib/llvm/include/llvm/Analysis/GuardUtils.h
new file mode 100644
index 000000000000..3b151eeafc81
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/GuardUtils.h
@@ -0,0 +1,26 @@
+//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform analyzes related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_GUARDUTILS_H
+#define LLVM_ANALYSIS_GUARDUTILS_H
+
+namespace llvm {
+
+class User;
+
+/// Returns true iff \p U has semantics of a guard.
+bool isGuard(const User *U);
+
+} // llvm
+
+#endif // LLVM_ANALYSIS_GUARDUTILS_H
+
diff --git a/contrib/llvm/include/llvm/Analysis/IVDescriptors.h b/contrib/llvm/include/llvm/Analysis/IVDescriptors.h
new file mode 100644
index 000000000000..64b4ae23cc59
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -0,0 +1,357 @@
+//===- llvm/Analysis/IVDescriptors.h - IndVar Descriptors -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file "describes" induction and recurrence variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_IVDESCRIPTORS_H
+#define LLVM_ANALYSIS_IVDESCRIPTORS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+
+class AliasSet;
+class AliasSetTracker;
+class BasicBlock;
+class DataLayout;
+class Loop;
+class LoopInfo;
+class OptimizationRemarkEmitter;
+class PredicatedScalarEvolution;
+class PredIteratorCache;
+class ScalarEvolution;
+class SCEV;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+
+/// The RecurrenceDescriptor is used to identify recurrences variables in a
+/// loop. Reduction is a special case of recurrence that has uses of the
+/// recurrence variable outside the loop. The method isReductionPHI identifies
+/// reductions that are basic recurrences.
+///
+/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min,
+/// or max of a set of terms. For example: for(i=0; i<n; i++) { total +=
+/// array[i]; } is a summation of array elements. Basic recurrences are a
+/// special case of chains of recurrences (CR). See ScalarEvolution for CR
+/// references.
+
+/// This struct holds information about recurrence variables.
+class RecurrenceDescriptor {
+public:
+  /// This enum represents the kinds of recurrences that we support.
+  enum RecurrenceKind {
+    RK_NoRecurrence,  ///< Not a recurrence.
+    RK_IntegerAdd,    ///< Sum of integers.
+    RK_IntegerMult,   ///< Product of integers.
+    RK_IntegerOr,     ///< Bitwise or logical OR of numbers.
+    RK_IntegerAnd,    ///< Bitwise or logical AND of numbers.
+    RK_IntegerXor,    ///< Bitwise or logical XOR of numbers.
+    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
+    RK_FloatAdd,      ///< Sum of floats.
+    RK_FloatMult,     ///< Product of floats.
+    RK_FloatMinMax    ///< Min/max implemented in terms of select(cmp()).
+  };
+
+  // This enum represents the kind of minmax recurrence.
+  enum MinMaxRecurrenceKind {
+    MRK_Invalid,
+    MRK_UIntMin,
+    MRK_UIntMax,
+    MRK_SIntMin,
+    MRK_SIntMax,
+    MRK_FloatMin,
+    MRK_FloatMax
+  };
+
+  RecurrenceDescriptor() = default;
+
+  RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
+                       MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
+                       bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
+        UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+    CastInsts.insert(CI.begin(), CI.end());
+  }
+
+  /// This POD struct holds information about a potential recurrence operation.
+  class InstDesc {
+  public:
+    InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
+        : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
+          UnsafeAlgebraInst(UAI) {}
+
+    InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
+        : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
+          UnsafeAlgebraInst(UAI) {}
+
+    bool isRecurrence() { return IsRecurrence; }
+
+    bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+    Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
+    MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
+
+    Instruction *getPatternInst() { return PatternLastInst; }
+
+  private:
+    // Is this instruction a recurrence candidate.
+    bool IsRecurrence;
+    // The last instruction in a min/max pattern (select of the select(icmp())
+    // pattern), or the current recurrence instruction otherwise.
+    Instruction *PatternLastInst;
+    // If this is a min/max pattern the comparison predicate.
+    MinMaxRecurrenceKind MinMaxKind;
+    // Recurrence has unsafe algebra.
+    Instruction *UnsafeAlgebraInst;
+  };
+
+  /// Returns a struct describing if the instruction 'I' can be a recurrence
+  /// variable of type 'Kind'. If the recurrence is a min/max pattern of
+  /// select(icmp()) this function advances the instruction pointer 'I' from the
+  /// compare instruction to the select instruction and stores this pointer in
+  /// 'PatternLastInst' member of the returned struct.
+  static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+                                    InstDesc &Prev, bool HasFunNoNaNAttr);
+
+  /// Returns true if instruction I has multiple uses in Insts
+  static bool hasMultipleUsesOf(Instruction *I,
+                                SmallPtrSetImpl<Instruction *> &Insts,
+                                unsigned MaxNumUses);
+
+  /// Returns true if all uses of the instruction I is within the Set.
+  static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
+
+  /// Returns a struct describing if the instruction if the instruction is a
+  /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
+  /// or max(X, Y).
+  static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
+
+  /// Returns a struct describing if the instruction is a
+  /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
+  static InstDesc isConditionalRdxPattern(RecurrenceKind Kind, Instruction *I);
+
+  /// Returns identity corresponding to the RecurrenceKind.
+  static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
+
+  /// Returns the opcode of binary operation corresponding to the
+  /// RecurrenceKind.
+  static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
+
+  /// Returns true if Phi is a reduction of type Kind and adds it to the
+  /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
+  /// non-null, the minimal bit width needed to compute the reduction will be
+  /// computed.
+  static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
+                              bool HasFunNoNaNAttr,
+                              RecurrenceDescriptor &RedDes,
+                              DemandedBits *DB = nullptr,
+                              AssumptionCache *AC = nullptr,
+                              DominatorTree *DT = nullptr);
+
+  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
+  /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
+  /// non-null, the minimal bit width needed to compute the reduction will be
+  /// computed.
+  static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
+                             RecurrenceDescriptor &RedDes,
+                             DemandedBits *DB = nullptr,
+                             AssumptionCache *AC = nullptr,
+                             DominatorTree *DT = nullptr);
+
+  /// Returns true if Phi is a first-order recurrence. A first-order recurrence
+  /// is a non-reduction recurrence relation in which the value of the
+  /// recurrence in the current loop iteration equals a value defined in the
+  /// previous iteration. \p SinkAfter includes pairs of instructions where the
+  /// first will be rescheduled to appear after the second if/when the loop is
+  /// vectorized. It may be augmented with additional pairs if needed in order
+  /// to handle Phi as a first-order recurrence.
+  static bool
+  isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+                         DenseMap<Instruction *, Instruction *> &SinkAfter,
+                         DominatorTree *DT);
+
+  RecurrenceKind getRecurrenceKind() { return Kind; }
+
+  MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
+
+  TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
+
+  Instruction *getLoopExitInstr() { return LoopExitInstr; }
+
+  /// Returns true if the recurrence has unsafe algebra which requires a relaxed
+  /// floating-point model.
+  bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+  /// Returns first unsafe algebra instruction in the PHI node's use-chain.
+  Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
+  /// Returns true if the recurrence kind is an integer kind.
+  static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
+
+  /// Returns true if the recurrence kind is a floating point kind.
+  static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
+
+  /// Returns true if the recurrence kind is an arithmetic kind.
+  static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
+
+  /// Returns the type of the recurrence. This type can be narrower than the
+  /// actual type of the Phi if the recurrence has been type-promoted.
+  Type *getRecurrenceType() { return RecurrenceType; }
+
+  /// Returns a reference to the instructions used for type-promoting the
+  /// recurrence.
+  SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
+
+  /// Returns true if all source operands of the recurrence are SExtInsts.
+  bool isSigned() { return IsSigned; }
+
+private:
+  // The starting value of the recurrence.
+  // It does not have to be zero!
+  TrackingVH<Value> StartValue;
+  // The instruction who's value is used outside the loop.
+  Instruction *LoopExitInstr = nullptr;
+  // The kind of the recurrence.
+  RecurrenceKind Kind = RK_NoRecurrence;
+  // If this a min/max recurrence the kind of recurrence.
+  MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
+  // First occurrence of unasfe algebra in the PHI's use-chain.
+  Instruction *UnsafeAlgebraInst = nullptr;
+  // The type of the recurrence.
+  Type *RecurrenceType = nullptr;
+  // True if all source operands of the recurrence are SExtInsts.
+  bool IsSigned = false;
+  // Instructions used for type-promoting the recurrence.
+  SmallPtrSet<Instruction *, 8> CastInsts;
+};
+
+/// A struct for saving information about induction variables.
+class InductionDescriptor {
+public:
+  /// This enum represents the kinds of inductions that we support.
+  enum InductionKind {
+    IK_NoInduction,  ///< Not an induction variable.
+    IK_IntInduction, ///< Integer induction variable. Step = C.
+    IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
+    IK_FpInduction   ///< Floating point induction variable.
+  };
+
+public:
+  /// Default constructor - creates an invalid induction.
+  InductionDescriptor() = default;
+
+  /// Get the consecutive direction. Returns:
+  ///   0 - unknown or non-consecutive.
+  ///   1 - consecutive and increasing.
+  ///  -1 - consecutive and decreasing.
+  int getConsecutiveDirection() const;
+
+  Value *getStartValue() const { return StartValue; }
+  InductionKind getKind() const { return IK; }
+  const SCEV *getStep() const { return Step; }
+  BinaryOperator *getInductionBinOp() const { return InductionBinOp; }
+  ConstantInt *getConstIntStepValue() const;
+
+  /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
+  /// induction, the induction descriptor \p D will contain the data describing
+  /// this induction. If by some other means the caller has a better SCEV
+  /// expression for \p Phi than the one returned by the ScalarEvolution
+  /// analysis, it can be passed through \p Expr. If the def-use chain
+  /// associated with the phi includes casts (that we know we can ignore
+  /// under proper runtime checks), they are passed through \p CastsToIgnore.
+  static bool
+  isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE,
+                 InductionDescriptor &D, const SCEV *Expr = nullptr,
+                 SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
+
+  /// Returns true if \p Phi is a floating point induction in the loop \p L.
+  /// If \p Phi is an induction, the induction descriptor \p D will contain
+  /// the data describing this induction.
+  static bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE,
+                               InductionDescriptor &D);
+
+  /// Returns true if \p Phi is a loop \p L induction, in the context associated
+  /// with the run-time predicate of PSE. If \p Assume is true, this can add
+  /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
+  /// induction.
+  /// If \p Phi is an induction, \p D will contain the data describing this
+  /// induction.
+  static bool isInductionPHI(PHINode *Phi, const Loop *L,
+                             PredicatedScalarEvolution &PSE,
+                             InductionDescriptor &D, bool Assume = false);
+
+  /// Returns true if the induction type is FP and the binary operator does
+  /// not have the "fast-math" property. Such operation requires a relaxed FP
+  /// mode.
+  bool hasUnsafeAlgebra() {
+    return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
+  }
+
+  /// Returns induction operator that does not have "fast-math" property
+  /// and requires FP unsafe mode.
+  Instruction *getUnsafeAlgebraInst() {
+    if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
+      return nullptr;
+    return InductionBinOp;
+  }
+
+  /// Returns binary opcode of the induction operator.
+  Instruction::BinaryOps getInductionOpcode() const {
+    return InductionBinOp ? InductionBinOp->getOpcode()
+                          : Instruction::BinaryOpsEnd;
+  }
+
+  /// Returns a reference to the type cast instructions in the induction
+  /// update chain, that are redundant when guarded with a runtime
+  /// SCEV overflow check.
+  const SmallVectorImpl<Instruction *> &getCastInsts() const {
+    return RedundantCasts;
+  }
+
+private:
+  /// Private constructor - used by \c isInductionPHI.
+  InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
+                      BinaryOperator *InductionBinOp = nullptr,
+                      SmallVectorImpl<Instruction *> *Casts = nullptr);
+
+  /// Start value.
+  TrackingVH<Value> StartValue;
+  /// Induction kind.
+  InductionKind IK = IK_NoInduction;
+  /// Step value.
+  const SCEV *Step = nullptr;
+  // Instruction that advances induction variable.
+  BinaryOperator *InductionBinOp = nullptr;
+  // Instructions used for type-casts of the induction variable,
+  // that are redundant when guarded with a runtime SCEV overflow check.
+  SmallVector<Instruction *, 2> RedundantCasts;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_IVDESCRIPTORS_H
diff --git a/contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h b/contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h
deleted file mode 100644
index dde56a143c51..000000000000
--- a/contrib/llvm/include/llvm/Analysis/IndirectCallSiteVisitor.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===-- IndirectCallSiteVisitor.h - indirect call-sites visitor -----------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements defines a visitor class and a helper function that find
-// all indirect call-sites in a function.
-
-#include "llvm/IR/InstVisitor.h"
-#include <vector>
-
-namespace llvm {
-// Visitor class that finds all indirect call sites.
-struct PGOIndirectCallSiteVisitor
-    : public InstVisitor<PGOIndirectCallSiteVisitor> {
-  std::vector<Instruction *> IndirectCallInsts;
-  PGOIndirectCallSiteVisitor() {}
-
-  void visitCallSite(CallSite CS) {
-    if (CS.isIndirectCall())
-      IndirectCallInsts.push_back(CS.getInstruction());
-  }
-};
-
-// Helper function that finds all indirect call sites.
-inline std::vector<Instruction *> findIndirectCallSites(Function &F) {
-  PGOIndirectCallSiteVisitor ICV;
-  ICV.visit(F);
-  return ICV.IndirectCallInsts;
-}
-}
diff --git a/contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h
new file mode 100644
index 000000000000..d00cf63368f1
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -0,0 +1,39 @@
+//===-- IndirectCallVisitor.h - indirect call visitor ---------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements defines a visitor class and a helper function that find
+// all indirect call-sites in a function.
+
+#ifndef LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
+#define LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
+
+#include "llvm/IR/InstVisitor.h"
+#include <vector>
+
+namespace llvm {
+// Visitor class that finds all indirect call.
+struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
+  std::vector<Instruction *> IndirectCalls;
+  PGOIndirectCallVisitor() {}
+
+  void visitCallBase(CallBase &Call) {
+    if (Call.isIndirectCall())
+      IndirectCalls.push_back(&Call);
+  }
+};
+
+// Helper function that finds all indirect call sites.
+inline std::vector<Instruction *> findIndirectCalls(Function &F) {
+  PGOIndirectCallVisitor ICV;
+  ICV.visit(F);
+  return ICV.IndirectCalls;
+}
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm/include/llvm/Analysis/InlineCost.h
index 8c412057fb81..4c270354b0c4 100644
--- a/contrib/llvm/include/llvm/Analysis/InlineCost.h
+++ b/contrib/llvm/include/llvm/Analysis/InlineCost.h
@@ -46,7 +46,6 @@ const int IndirectCallThreshold = 100;
 const int CallPenalty = 25;
 const int LastCallToStaticBonus = 15000;
 const int ColdccPenalty = 2000;
-const int NoreturnPenalty = 10000;
 /// Do not inline functions which allocate this many bytes on the stack
 /// when the caller is recursive.
 const unsigned TotalAllocaSizeRecursiveCaller = 1024;
@@ -74,8 +73,15 @@ class InlineCost {
   /// The adjusted threshold against which this cost was computed.
   const int Threshold;
 
+  /// Must be set for Always and Never instances.
+  const char *Reason = nullptr;
+
   // Trivial constructor, interesting logic in the factory functions below.
-  InlineCost(int Cost, int Threshold) : Cost(Cost), Threshold(Threshold) {}
+  InlineCost(int Cost, int Threshold, const char *Reason = nullptr)
+      : Cost(Cost), Threshold(Threshold), Reason(Reason) {
+    assert((isVariable() || Reason) &&
+           "Reason must be provided for Never or Always");
+  }
 
 public:
   static InlineCost get(int Cost, int Threshold) {
@@ -83,11 +89,11 @@ public:
     assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
     return InlineCost(Cost, Threshold);
   }
-  static InlineCost getAlways() {
-    return InlineCost(AlwaysInlineCost, 0);
+  static InlineCost getAlways(const char *Reason) {
+    return InlineCost(AlwaysInlineCost, 0, Reason);
   }
-  static InlineCost getNever() {
-    return InlineCost(NeverInlineCost, 0);
+  static InlineCost getNever(const char *Reason) {
+    return InlineCost(NeverInlineCost, 0, Reason);
   }
 
   /// Test whether the inline cost is low enough for inlining.
@@ -112,12 +118,30 @@ public:
     return Threshold;
   }
 
+  /// Get the reason of Always or Never.
+  const char *getReason() const {
+    assert((Reason || isVariable()) &&
+           "InlineCost reason must be set for Always or Never");
+    return Reason;
+  }
+
   /// Get the cost delta from the threshold for inlining.
   /// Only valid if the cost is of the variable kind. Returns a negative
   /// value if the cost is too high to inline.
   int getCostDelta() const { return Threshold - getCost(); }
 };
 
+/// InlineResult is basically true or false. For false results the message
+/// describes a reason why it is decided not to inline.
+struct InlineResult {
+  const char *message = nullptr;
+  InlineResult(bool result, const char *message = nullptr)
+      : message(result ? nullptr : (message ? message : "cost > threshold")) {}
+  InlineResult(const char *message = nullptr) : message(message) {}
+  operator bool() const { return !message; }
+  operator const char *() const { return message; }
+};
+
 /// Thresholds to tune inline cost analysis. The inline cost analysis decides
 /// the condition to apply a threshold and applies it. Otherwise,
 /// DefaultThreshold is used. If a threshold is Optional, it is applied only
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
new file mode 100644
index 000000000000..073e6ec3b7f6
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -0,0 +1,150 @@
+//===-- InstructionPrecedenceTracking.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implements a class that is able to define some instructions as "special"
+// (e.g. as having implicit control flow, or writing memory, or having another
+// interesting property) and then efficiently answers queries of the types:
+// 1. Are there any special instructions in the block of interest?
+// 2. Return first of the special instructions in the given block;
+// 3. Check if the given instruction is preceeded by the first special
+//    instruction in the same block.
+// The class provides caching that allows to answer these queries quickly. The
+// user must make sure that the cached data is invalidated properly whenever
+// a content of some tracked block is changed.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
+#define LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/OrderedInstructions.h"
+
+namespace llvm {
+
+class InstructionPrecedenceTracking {
+  // Maps a block to the topmost special instruction in it. If the value is
+  // nullptr, it means that it is known that this block does not contain any
+  // special instructions.
+  DenseMap<const BasicBlock *, const Instruction *> FirstSpecialInsts;
+  // Allows to answer queries about precedence of instructions within one block.
+  OrderedInstructions OI;
+
+  // Fills information about the given block's special instructions.
+  void fill(const BasicBlock *BB);
+
+#ifndef NDEBUG
+  /// Asserts that the cached info for \p BB is up-to-date. This helps to catch
+  /// the usage error of accessing a block without properly invalidating after a
+  /// previous transform.
+  void validate(const BasicBlock *BB) const;
+
+  /// Asserts whether or not the contents of this tracking is up-to-date. This
+  /// helps to catch the usage error of accessing a block without properly
+  /// invalidating after a previous transform.
+  void validateAll() const;
+#endif
+
+protected:
+  InstructionPrecedenceTracking(DominatorTree *DT)
+      : OI(OrderedInstructions(DT)) {}
+
+  /// Returns the topmost special instruction from the block \p BB. Returns
+  /// nullptr if there is no special instructions in the block.
+  const Instruction *getFirstSpecialInstruction(const BasicBlock *BB);
+
+  /// Returns true iff at least one instruction from the basic block \p BB is
+  /// special.
+  bool hasSpecialInstructions(const BasicBlock *BB);
+
+  /// Returns true iff the first special instruction of \p Insn's block exists
+  /// and dominates \p Insn.
+  bool isPreceededBySpecialInstruction(const Instruction *Insn);
+
+  /// A predicate that defines whether or not the instruction \p Insn is
+  /// considered special and needs to be tracked. Implementing this method in
+  /// children classes allows to implement tracking of implicit control flow,
+  /// memory writing instructions or any other kinds of instructions we might
+  /// be interested in.
+  virtual bool isSpecialInstruction(const Instruction *Insn) const = 0;
+
+  virtual ~InstructionPrecedenceTracking() = default;
+
+public:
+  /// Notifies this tracking that we are going to insert a new instruction \p
+  /// Inst to the basic block \p BB. It makes all necessary updates to internal
+  /// caches to keep them consistent.
+  void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
+
+  /// Notifies this tracking that we are going to remove the instruction \p Inst
+  /// It makes all necessary updates to internal caches to keep them consistent.
+  void removeInstruction(const Instruction *Inst);
+
+  /// Invalidates all information from this tracking.
+  void clear();
+};
+
+/// This class allows to keep track on instructions with implicit control flow.
+/// These are instructions that may not pass execution to their successors. For
+/// example, throwing calls and guards do not always do this. If we need to know
+/// for sure that some instruction is guaranteed to execute if the given block
+/// is reached, then we need to make sure that there is no implicit control flow
+/// instruction (ICFI) preceeding it. For example, this check is required if we
+/// perform PRE moving non-speculable instruction to other place.
+class ImplicitControlFlowTracking : public InstructionPrecedenceTracking {
+public:
+  ImplicitControlFlowTracking(DominatorTree *DT)
+      : InstructionPrecedenceTracking(DT) {}
+
+  /// Returns the topmost instruction with implicit control flow from the given
+  /// basic block. Returns nullptr if there is no such instructions in the block.
+  const Instruction *getFirstICFI(const BasicBlock *BB) {
+    return getFirstSpecialInstruction(BB);
+  }
+
+  /// Returns true if at least one instruction from the given basic block has
+  /// implicit control flow.
+  bool hasICF(const BasicBlock *BB) {
+    return hasSpecialInstructions(BB);
+  }
+
+  /// Returns true if the first ICFI of Insn's block exists and dominates Insn.
+  bool isDominatedByICFIFromSameBlock(const Instruction *Insn) {
+    return isPreceededBySpecialInstruction(Insn);
+  }
+
+  virtual bool isSpecialInstruction(const Instruction *Insn) const;
+};
+
+class MemoryWriteTracking : public InstructionPrecedenceTracking {
+public:
+  MemoryWriteTracking(DominatorTree *DT) : InstructionPrecedenceTracking(DT) {}
+
+  /// Returns the topmost instruction that may write memory from the given
+  /// basic block. Returns nullptr if there is no such instructions in the block.
+  const Instruction *getFirstMemoryWrite(const BasicBlock *BB) {
+    return getFirstSpecialInstruction(BB);
+  }
+
+  /// Returns true if at least one instruction from the given basic block may
+  /// write memory.
+  bool mayWriteToMemory(const BasicBlock *BB) {
+    return hasSpecialInstructions(BB);
+  }
+
+  /// Returns true if the first memory writing instruction of Insn's block
+  /// exists and dominates Insn.
+  bool isDominatedByMemoryWriteFromSameBlock(const Instruction *Insn) {
+    return isPreceededBySpecialInstruction(Insn);
+  }
+
+  virtual bool isSpecialInstruction(const Instruction *Insn) const;
+};
+
+} // llvm
+
+#endif // LLVM_ANALYSIS_INSTRUCTIONPRECEDENCETRACKING_H
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
index 4f896bddff87..6662e91037e1 100644
--- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -32,6 +32,8 @@
 #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
 
 namespace llvm {
@@ -40,7 +42,6 @@ template <typename T, typename... TArgs> class AnalysisManager;
 template <class T> class ArrayRef;
 class AssumptionCache;
 class DominatorTree;
-class Instruction;
 class ImmutableCallSite;
 class DataLayout;
 class FastMathFlags;
@@ -50,6 +51,41 @@ class Pass;
 class TargetLibraryInfo;
 class Type;
 class Value;
+class MDNode;
+class BinaryOperator;
+
+/// InstrInfoQuery provides an interface to query additional information for
+/// instructions like metadata or keywords like nsw, which provides conservative
+/// results if the users specified it is safe to use.
+struct InstrInfoQuery {
+  InstrInfoQuery(bool UMD) : UseInstrInfo(UMD) {}
+  InstrInfoQuery() : UseInstrInfo(true) {}
+  bool UseInstrInfo = true;
+
+  MDNode *getMetadata(const Instruction *I, unsigned KindID) const {
+    if (UseInstrInfo)
+      return I->getMetadata(KindID);
+    return nullptr;
+  }
+
+  template <class InstT> bool hasNoUnsignedWrap(const InstT *Op) const {
+    if (UseInstrInfo)
+      return Op->hasNoUnsignedWrap();
+    return false;
+  }
+
+  template <class InstT> bool hasNoSignedWrap(const InstT *Op) const {
+    if (UseInstrInfo)
+      return Op->hasNoSignedWrap();
+    return false;
+  }
+
+  bool isExact(const BinaryOperator *Op) const {
+    if (UseInstrInfo && isa<PossiblyExactOperator>(Op))
+      return cast<PossiblyExactOperator>(Op)->isExact();
+    return false;
+  }
+};
 
 struct SimplifyQuery {
   const DataLayout &DL;
@@ -58,14 +94,19 @@ struct SimplifyQuery {
   AssumptionCache *AC = nullptr;
   const Instruction *CxtI = nullptr;
 
+  // Wrapper to query additional information for instructions like metadata or
+  // keywords like nsw, which provides conservative results if those cannot
+  // be safely used.
+  const InstrInfoQuery IIQ;
+
   SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr)
       : DL(DL), CxtI(CXTI) {}
 
   SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI,
                 const DominatorTree *DT = nullptr,
                 AssumptionCache *AC = nullptr,
-                const Instruction *CXTI = nullptr)
-      : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {}
+                const Instruction *CXTI = nullptr, bool UseInstrInfo = true)
+      : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo) {}
   SimplifyQuery getWithInstruction(Instruction *I) const {
     SimplifyQuery Copy(*this);
     Copy.CxtI = I;
diff --git a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 6b1950733246..3083db75b81c 100644
--- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -6,7 +6,7 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
+/// \file
 /// Compute iterated dominance frontiers using a linear time algorithm.
 ///
 /// The algorithm used here is based on:
@@ -28,6 +28,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFGDiff.h"
 #include "llvm/IR/Dominators.h"
 
 namespace llvm {
@@ -45,17 +46,21 @@ namespace llvm {
 template <class NodeTy, bool IsPostDom>
 class IDFCalculator {
  public:
-  IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
-      : DT(DT), useLiveIn(false) {}
+   IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
+       : DT(DT), GD(nullptr), useLiveIn(false) {}
 
-  /// Give the IDF calculator the set of blocks in which the value is
-  /// defined.  This is equivalent to the set of starting blocks it should be
-  /// calculating the IDF for (though later gets pruned based on liveness).
-  ///
-  /// Note: This set *must* live for the entire lifetime of the IDF calculator.
-  void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
-    DefBlocks = &Blocks;
-  }
+   IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
+                 const GraphDiff<BasicBlock *, IsPostDom> *GD)
+       : DT(DT), GD(GD), useLiveIn(false) {}
+
+   /// Give the IDF calculator the set of blocks in which the value is
+   /// defined.  This is equivalent to the set of starting blocks it should be
+   /// calculating the IDF for (though later gets pruned based on liveness).
+   ///
+   /// Note: This set *must* live for the entire lifetime of the IDF calculator.
+   void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
+     DefBlocks = &Blocks;
+   }
 
   /// Give the IDF calculator the set of blocks in which the value is
   /// live on entry to the block.   This is used to prune the IDF calculation to
@@ -85,6 +90,7 @@ class IDFCalculator {
 
 private:
  DominatorTreeBase<BasicBlock, IsPostDom> &DT;
+ const GraphDiff<BasicBlock *, IsPostDom> *GD;
  bool useLiveIn;
  const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
  const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
diff --git a/contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
new file mode 100644
index 000000000000..fc426ad7fb64
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -0,0 +1,69 @@
+//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The kernel divergence analysis is an LLVM pass which can be used to find out
+// if a branch instruction in a GPU program (kernel) is divergent or not. It can help
+// branch optimizations such as jump threading and loop unswitching to make
+// better decisions.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+
+namespace llvm {
+class Value;
+class GPUDivergenceAnalysis;
+class LegacyDivergenceAnalysis : public FunctionPass {
+public:
+  static char ID;
+
+  LegacyDivergenceAnalysis() : FunctionPass(ID) {
+    initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnFunction(Function &F) override;
+
+  // Print all divergent branches in the function.
+  void print(raw_ostream &OS, const Module *) const override;
+
+  // Returns true if V is divergent at its definition.
+  //
+  // Even if this function returns false, V may still be divergent when used
+  // in a different basic block.
+  bool isDivergent(const Value *V) const;
+
+  // Returns true if V is uniform/non-divergent.
+  //
+  // Even if this function returns true, V may still be divergent when used
+  // in a different basic block.
+  bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+  // Keep the analysis results uptodate by removing an erased value.
+  void removeValue(const Value *V) { DivergentValues.erase(V); }
+
+private:
+  // Whether analysis should be performed by GPUDivergenceAnalysis.
+  bool shouldUseGPUDivergenceAnalysis(const Function &F) const;
+
+  // (optional) handle to new DivergenceAnalysis
+  std::unique_ptr<GPUDivergenceAnalysis> gpuDA;
+
+  // Stores all divergent values.
+  DenseSet<const Value *> DivergentValues;
+};
+} // End llvm namespace
+
+#endif //LLVM_ANALYSIS_LEGACY_DIVERGENCE_ANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index d27b3e42bbeb..4ed00e207753 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -97,6 +97,19 @@ public:
   /// Set of potential dependent memory accesses.
   typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
 
+  /// Type to keep track of the status of the dependence check. The order of
+  /// the elements is important and has to be from most permissive to least
+  /// permissive.
+  enum class VectorizationSafetyStatus {
+    // Can vectorize safely without RT checks. All dependences are known to be
+    // safe.
+    Safe,
+    // Can possibly vectorize with RT checks to overcome unknown dependencies.
+    PossiblySafeWithRtChecks,
+    // Cannot vectorize due to known unsafe dependencies.
+    Unsafe,
+  };
+
   /// Dependece between memory access instructions.
   struct Dependence {
     /// The type of the dependence.
@@ -146,7 +159,7 @@ public:
     Instruction *getDestination(const LoopAccessInfo &LAI) const;
 
     /// Dependence types that don't prevent vectorization.
-    static bool isSafeForVectorization(DepType Type);
+    static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
 
     /// Lexically forward dependence.
     bool isForward() const;
@@ -164,8 +177,8 @@ public:
 
   MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
       : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
-        ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
-        RecordDependences(true) {}
+        FoundNonConstantDistanceDependence(false),
+        Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
 
   /// Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -193,7 +206,9 @@ public:
 
   /// No memory dependence was encountered that would inhibit
   /// vectorization.
-  bool isSafeForVectorization() const { return SafeForVectorization; }
+  bool isSafeForVectorization() const {
+    return Status == VectorizationSafetyStatus::Safe;
+  }
 
   /// The maximum number of bytes of a vector register we can vectorize
   /// the accesses safely with.
@@ -205,7 +220,10 @@ public:
 
   /// In same cases when the dependency check fails we can still
   /// vectorize the loop with a dynamic array access check.
-  bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+  bool shouldRetryWithRuntimeCheck() const {
+    return FoundNonConstantDistanceDependence &&
+           Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
+  }
 
   /// Returns the memory dependences.  If null is returned we exceeded
   /// the MaxDependences threshold and this information is not
@@ -267,11 +285,12 @@ private:
 
   /// If we see a non-constant dependence distance we can still try to
   /// vectorize this loop with runtime checks.
-  bool ShouldRetryWithRuntimeCheck;
+  bool FoundNonConstantDistanceDependence;
 
-  /// No memory dependence was encountered that would inhibit
-  /// vectorization.
-  bool SafeForVectorization;
+  /// Result of the dependence checks, indicating whether the checked
+  /// dependences are safe for vectorization, require RT checks or are known to
+  /// be unsafe.
+  VectorizationSafetyStatus Status;
 
   //// True if Dependences reflects the dependences in the
   //// loop.  If false we exceeded MaxDependences and
@@ -304,6 +323,11 @@ private:
   /// \return false if we shouldn't vectorize at all or avoid larger
   /// vectorization factors by limiting MaxSafeDepDistBytes.
   bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
+
+  /// Updates the current safety status with \p S. We can go from Safe to
+  /// either PossiblySafeWithRtChecks or Unsafe and from
+  /// PossiblySafeWithRtChecks to Unsafe.
+  void mergeInStatus(VectorizationSafetyStatus S);
 };
 
 /// Holds information about the memory runtime legality checks to verify
@@ -564,11 +588,10 @@ public:
   /// Print the information about the memory accesses in the loop.
   void print(raw_ostream &OS, unsigned Depth = 0) const;
 
-  /// Checks existence of store to invariant address inside loop.
-  /// If the loop has any store to invariant address, then it returns true,
-  /// else returns false.
-  bool hasStoreToLoopInvariantAddress() const {
-    return StoreToLoopInvariantAddress;
+  /// If the loop has memory dependence involving an invariant address, i.e. two
+  /// stores or a store and a load, then return true, else return false.
+  bool hasDependenceInvolvingLoopInvariantAddress() const {
+    return HasDependenceInvolvingLoopInvariantAddress;
   }
 
   /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
@@ -621,9 +644,8 @@ private:
   /// Cache the result of analyzeLoop.
   bool CanVecMem;
 
-  /// Indicator for storing to uniform addresses.
-  /// If a loop has write to a loop invariant address then it should be true.
-  bool StoreToLoopInvariantAddress;
+  /// Indicator that there are non vectorizable stores to a uniform address.
+  bool HasDependenceInvolvingLoopInvariantAddress;
 
   /// The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
index 30b29d66a1d1..72873546a068 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@@ -408,6 +408,12 @@ public:
   /// Verify loop structure of this loop and all nested loops.
   void verifyLoopNest(DenseSet<const LoopT *> *Loops) const;
 
+  /// Returns true if the loop is annotated parallel.
+  ///
+  /// Derived classes can override this method using static template
+  /// polymorphism.
+  bool isAnnotatedParallel() const { return false; }
+
   /// Print loop with all the BBs inside it.
   void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const;
 
@@ -989,6 +995,26 @@ public:
 /// Function to print a loop's contents as LLVM's text IR assembly.
 void printLoop(Loop &L, raw_ostream &OS, const std::string &Banner = "");
 
+/// Find and return the loop attribute node for the attribute @p Name in
+/// @p LoopID. Return nullptr if there is no such attribute.
+MDNode *findOptionMDForLoopID(MDNode *LoopID, StringRef Name);
+
+/// Find string metadata for a loop.
+///
+/// Returns the MDNode where the first operand is the metadata's name. The
+/// following operands are the metadata's values. If no metadata with @p Name is
+/// found, return nullptr.
+MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
+
+/// Return whether an MDNode might represent an access group.
+///
+/// Access group metadata nodes have to be distinct and empty. Being
+/// always-empty ensures that it never needs to be changed (which -- because
+/// MDNodes are designed immutable -- would require creating a new MDNode). Note
+/// that this is not a sufficient condition: not every distinct and empty NDNode
+/// is representing an access group.
+bool isValidAsAccessGroup(MDNode *AccGroup);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 941389858868..2b807919fedf 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -392,7 +392,10 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest(
 template <class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
                                     bool Verbose) const {
-  OS.indent(Depth * 2) << "Loop at depth " << getLoopDepth() << " containing: ";
+  OS.indent(Depth * 2);
+  if (static_cast<const LoopT *>(this)->isAnnotatedParallel())
+    OS << "Parallel ";
+  OS << "Loop at depth " << getLoopDepth() << " containing: ";
 
   BlockT *H = getHeader();
   for (unsigned i = 0; i < getBlocks().size(); ++i) {
@@ -640,8 +643,8 @@ void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {
 
 template <typename T>
 bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) {
-  llvm::sort(BB1.begin(), BB1.end());
-  llvm::sort(BB2.begin(), BB2.end());
+  llvm::sort(BB1);
+  llvm::sort(BB2);
   return BB1 == BB2;
 }
 
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 1c40cffc7f67..958d4fe4b832 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -37,7 +37,6 @@
 namespace llvm {
 
 class AssumptionCache;
-class CallSite;
 class DominatorTree;
 class Function;
 class Instruction;
@@ -304,7 +303,7 @@ private:
     /// The maximum size of the dereferences of the pointer.
     ///
     /// May be UnknownSize if the sizes are unknown.
-    LocationSize Size = MemoryLocation::UnknownSize;
+    LocationSize Size = LocationSize::unknown();
     /// The AA tags associated with dereferences of the pointer.
     ///
     /// The members may be null if there are no tags or conflicting tags.
@@ -398,7 +397,7 @@ public:
   /// invalidated on the next non-local query or when an instruction is
   /// removed.  Clients must copy this data if they want it around longer than
   /// that.
-  const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS);
+  const NonLocalDepInfo &getNonLocalCallDependency(CallBase *QueryCall);
 
   /// Perform a full dependency query for an access to the QueryInst's
   /// specified memory location, returning the set of instructions that either
@@ -482,9 +481,9 @@ public:
   void releaseMemory();
 
 private:
-  MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
-                                         BasicBlock::iterator ScanIt,
-                                         BasicBlock *BB);
+  MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,
+                                     BasicBlock::iterator ScanIt,
+                                     BasicBlock *BB);
   bool getNonLocalPointerDepFromBB(Instruction *QueryInst,
                                    const PHITransAddr &Pointer,
                                    const MemoryLocation &Loc, bool isLoad,
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryLocation.h b/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
index 6b680000312c..fca18c1b5999 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_ANALYSIS_MEMORYLOCATION_H
 #define LLVM_ANALYSIS_MEMORYLOCATION_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
 
 namespace llvm {
@@ -34,8 +34,134 @@ class AnyMemIntrinsic;
 class TargetLibraryInfo;
 
 // Represents the size of a MemoryLocation. Logically, it's an
-// Optional<uint64_t>, with a special UnknownSize value from `MemoryLocation`.
-using LocationSize = uint64_t;
+// Optional<uint63_t> that also carries a bit to represent whether the integer
+// it contains, N, is 'precise'. Precise, in this context, means that we know
+// that the area of storage referenced by the given MemoryLocation must be
+// precisely N bytes. An imprecise value is formed as the union of two or more
+// precise values, and can conservatively represent all of the values unioned
+// into it. Importantly, imprecise values are an *upper-bound* on the size of a
+// MemoryLocation.
+//
+// Concretely, a precise MemoryLocation is (%p, 4) in
+// store i32 0, i32* %p
+//
+// Since we know that %p must be at least 4 bytes large at this point.
+// Otherwise, we have UB. An example of an imprecise MemoryLocation is (%p, 4)
+// at the memcpy in
+//
+//   %n = select i1 %foo, i64 1, i64 4
+//   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %baz, i64 %n, i32 1,
+//                                        i1 false)
+//
+// ...Since we'll copy *up to* 4 bytes into %p, but we can't guarantee that
+// we'll ever actually do so.
+//
+// If asked to represent a pathologically large value, this will degrade to
+// None.
+class LocationSize {
+  enum : uint64_t {
+    Unknown = ~uint64_t(0),
+    ImpreciseBit = uint64_t(1) << 63,
+    MapEmpty = Unknown - 1,
+    MapTombstone = Unknown - 2,
+
+    // The maximum value we can represent without falling back to 'unknown'.
+    MaxValue = (MapTombstone - 1) & ~ImpreciseBit,
+  };
+
+  uint64_t Value;
+
+  // Hack to support implicit construction. This should disappear when the
+  // public LocationSize ctor goes away.
+  enum DirectConstruction { Direct };
+
+  constexpr LocationSize(uint64_t Raw, DirectConstruction): Value(Raw) {}
+
+  static_assert(Unknown & ImpreciseBit, "Unknown is imprecise by definition.");
+public:
+  // FIXME: Migrate all users to construct via either `precise` or `upperBound`,
+  // to make it more obvious at the callsite the kind of size that they're
+  // providing.
+  //
+  // Since the overwhelming majority of users of this provide precise values,
+  // this assumes the provided value is precise.
+  constexpr LocationSize(uint64_t Raw)
+      : Value(Raw > MaxValue ? Unknown : Raw) {}
+
+  static LocationSize precise(uint64_t Value) { return LocationSize(Value); }
+
+  static LocationSize upperBound(uint64_t Value) {
+    // You can't go lower than 0, so give a precise result.
+    if (LLVM_UNLIKELY(Value == 0))
+      return precise(0);
+    if (LLVM_UNLIKELY(Value > MaxValue))
+      return unknown();
+    return LocationSize(Value | ImpreciseBit, Direct);
+  }
+
+  constexpr static LocationSize unknown() {
+    return LocationSize(Unknown, Direct);
+  }
+
+  // Sentinel values, generally used for maps.
+  constexpr static LocationSize mapTombstone() {
+    return LocationSize(MapTombstone, Direct);
+  }
+  constexpr static LocationSize mapEmpty() {
+    return LocationSize(MapEmpty, Direct);
+  }
+
+  // Returns a LocationSize that can correctly represent either `*this` or
+  // `Other`.
+  LocationSize unionWith(LocationSize Other) const {
+    if (Other == *this)
+      return *this;
+
+    if (!hasValue() || !Other.hasValue())
+      return unknown();
+
+    return upperBound(std::max(getValue(), Other.getValue()));
+  }
+
+  bool hasValue() const { return Value != Unknown; }
+  uint64_t getValue() const {
+    assert(hasValue() && "Getting value from an unknown LocationSize!");
+    return Value & ~ImpreciseBit;
+  }
+
+  // Returns whether or not this value is precise. Note that if a value is
+  // precise, it's guaranteed to not be `unknown()`.
+  bool isPrecise() const {
+    return (Value & ImpreciseBit) == 0;
+  }
+
+  // Convenience method to check if this LocationSize's value is 0.
+  bool isZero() const { return hasValue() && getValue() == 0; }
+
+  bool operator==(const LocationSize &Other) const {
+    return Value == Other.Value;
+  }
+
+  bool operator!=(const LocationSize &Other) const {
+    return !(*this == Other);
+  }
+
+  // Ordering operators are not provided, since it's unclear if there's only one
+  // reasonable way to compare:
+  // - values that don't exist against values that do, and
+  // - precise values to imprecise values
+
+  void print(raw_ostream &OS) const;
+
+  // Returns an opaque value that represents this LocationSize. Cannot be
+  // reliably converted back into a LocationSize.
+  uint64_t toRaw() const { return Value; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, LocationSize Size) {
+  Size.print(OS);
+  return OS;
+}
 
 /// Representation for a specific memory location.
 ///
@@ -108,11 +234,15 @@ public:
   static MemoryLocation getForDest(const AnyMemIntrinsic *MI);
 
   /// Return a location representing a particular argument of a call.
-  static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx,
-                                       const TargetLibraryInfo &TLI);
+  static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,
+                                       const TargetLibraryInfo *TLI);
+  static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx,
+                                       const TargetLibraryInfo &TLI) {
+    return getForArgument(Call, ArgIdx, &TLI);
+  }
 
   explicit MemoryLocation(const Value *Ptr = nullptr,
-                          LocationSize Size = UnknownSize,
+                          LocationSize Size = LocationSize::unknown(),
                           const AAMDNodes &AATags = AAMDNodes())
       : Ptr(Ptr), Size(Size), AATags(AATags) {}
 
@@ -139,13 +269,30 @@ public:
   }
 };
 
-// Specialize DenseMapInfo for MemoryLocation.
+// Specialize DenseMapInfo.
+template <> struct DenseMapInfo<LocationSize> {
+  static inline LocationSize getEmptyKey() {
+    return LocationSize::mapEmpty();
+  }
+  static inline LocationSize getTombstoneKey() {
+    return LocationSize::mapTombstone();
+  }
+  static unsigned getHashValue(const LocationSize &Val) {
+    return DenseMapInfo<uint64_t>::getHashValue(Val.toRaw());
+  }
+  static bool isEqual(const LocationSize &LHS, const LocationSize &RHS) {
+    return LHS == RHS;
+  }
+};
+
 template <> struct DenseMapInfo<MemoryLocation> {
   static inline MemoryLocation getEmptyKey() {
-    return MemoryLocation(DenseMapInfo<const Value *>::getEmptyKey(), 0);
+    return MemoryLocation(DenseMapInfo<const Value *>::getEmptyKey(),
+                          DenseMapInfo<LocationSize>::getEmptyKey());
   }
   static inline MemoryLocation getTombstoneKey() {
-    return MemoryLocation(DenseMapInfo<const Value *>::getTombstoneKey(), 0);
+    return MemoryLocation(DenseMapInfo<const Value *>::getTombstoneKey(),
+                          DenseMapInfo<LocationSize>::getTombstoneKey());
   }
   static unsigned getHashValue(const MemoryLocation &Val) {
     return DenseMapInfo<const Value *>::getHashValue(Val.Ptr) ^
diff --git a/contrib/llvm/include/llvm/Analysis/MemorySSA.h b/contrib/llvm/include/llvm/Analysis/MemorySSA.h
index d445e4430e5c..17e2d0c73977 100644
--- a/contrib/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/contrib/llvm/include/llvm/Analysis/MemorySSA.h
@@ -280,9 +280,10 @@ protected:
   friend class MemorySSAUpdater;
 
   MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty,
-                 DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB)
-      : MemoryAccess(C, Vty, DeleteValue, BB, 1), MemoryInstruction(MI),
-        OptimizedAccessAlias(MayAlias) {
+                 DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB,
+                 unsigned NumOperands)
+      : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands),
+        MemoryInstruction(MI), OptimizedAccessAlias(MayAlias) {
     setDefiningAccess(DMA);
   }
 
@@ -308,11 +309,6 @@ private:
   Optional<AliasResult> OptimizedAccessAlias;
 };
 
-template <>
-struct OperandTraits<MemoryUseOrDef>
-    : public FixedNumOperandTraits<MemoryUseOrDef, 1> {};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
-
 /// Represents read-only accesses to memory
 ///
 /// In particular, the set of Instructions that will be represented by
@@ -323,7 +319,8 @@ public:
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
 
   MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
-      : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB) {}
+      : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB,
+                       /*NumOperands=*/1) {}
 
   // allocate space for exactly one operand
   void *operator new(size_t s) { return User::operator new(s, 1); }
@@ -381,31 +378,33 @@ public:
 
   MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB,
             unsigned Ver)
-      : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB), ID(Ver) {}
+      : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB,
+                       /*NumOperands=*/2),
+        ID(Ver) {}
 
-  // allocate space for exactly one operand
-  void *operator new(size_t s) { return User::operator new(s, 1); }
+  // allocate space for exactly two operands
+  void *operator new(size_t s) { return User::operator new(s, 2); }
 
   static bool classof(const Value *MA) {
     return MA->getValueID() == MemoryDefVal;
   }
 
   void setOptimized(MemoryAccess *MA) {
-    Optimized = MA;
-    OptimizedID = getDefiningAccess()->getID();
+    setOperand(1, MA);
+    OptimizedID = MA->getID();
   }
 
   MemoryAccess *getOptimized() const {
-    return cast_or_null<MemoryAccess>(Optimized);
+    return cast_or_null<MemoryAccess>(getOperand(1));
   }
 
   bool isOptimized() const {
-    return getOptimized() && getDefiningAccess() &&
-           OptimizedID == getDefiningAccess()->getID();
+    return getOptimized() && OptimizedID == getOptimized()->getID();
   }
 
   void resetOptimized() {
     OptimizedID = INVALID_MEMORYACCESS_ID;
+    setOperand(1, nullptr);
   }
 
   void print(raw_ostream &OS) const;
@@ -417,13 +416,34 @@ private:
 
   const unsigned ID;
   unsigned OptimizedID = INVALID_MEMORYACCESS_ID;
-  WeakVH Optimized;
 };
 
 template <>
-struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 1> {};
+struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 2> {};
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess)
 
+template <>
+struct OperandTraits<MemoryUseOrDef> {
+  static Use *op_begin(MemoryUseOrDef *MUD) {
+    if (auto *MU = dyn_cast<MemoryUse>(MUD))
+      return OperandTraits<MemoryUse>::op_begin(MU);
+    return OperandTraits<MemoryDef>::op_begin(cast<MemoryDef>(MUD));
+  }
+
+  static Use *op_end(MemoryUseOrDef *MUD) {
+    if (auto *MU = dyn_cast<MemoryUse>(MUD))
+      return OperandTraits<MemoryUse>::op_end(MU);
+    return OperandTraits<MemoryDef>::op_end(cast<MemoryDef>(MUD));
+  }
+
+  static unsigned operands(const MemoryUseOrDef *MUD) {
+    if (const auto *MU = dyn_cast<MemoryUse>(MUD))
+      return OperandTraits<MemoryUse>::operands(MU);
+    return OperandTraits<MemoryDef>::operands(cast<MemoryDef>(MUD));
+  }
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
+
 /// Represents phi nodes for memory accesses.
 ///
 /// These have the same semantic as regular phi nodes, with the exception that
@@ -684,13 +704,19 @@ public:
   ~MemorySSA();
 
   MemorySSAWalker *getWalker();
+  MemorySSAWalker *getSkipSelfWalker();
 
   /// Given a memory Mod/Ref'ing instruction, get the MemorySSA
   /// access associated with it. If passed a basic block gets the memory phi
   /// node that exists for that block, if there is one. Otherwise, this will get
   /// a MemoryUseOrDef.
-  MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
-  MemoryPhi *getMemoryAccess(const BasicBlock *BB) const;
+  MemoryUseOrDef *getMemoryAccess(const Instruction *I) const {
+    return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
+  }
+
+  MemoryPhi *getMemoryAccess(const BasicBlock *BB) const {
+    return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+  }
 
   void dump() const;
   void print(raw_ostream &) const;
@@ -750,6 +776,9 @@ public:
   /// all uses, uses appear in the right places).  This is used by unit tests.
   void verifyMemorySSA() const;
 
+  /// Check clobber sanity for an access.
+  void checkClobberSanityAccess(const MemoryAccess *MA) const;
+
   /// Used in various insertion functions to specify whether we are talking
   /// about the beginning or end of a block.
   enum InsertionPlace { Beginning, End };
@@ -764,6 +793,7 @@ protected:
   void verifyDomination(Function &F) const;
   void verifyOrdering(Function &F) const;
   void verifyDominationNumbers(const Function &F) const;
+  void verifyClobberSanity(const Function &F) const;
 
   // This is used by the use optimizer and updater.
   AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
@@ -796,16 +826,20 @@ protected:
                                InsertionPlace);
   void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
                              AccessList::iterator);
-  MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *);
+  MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
+                                      const MemoryUseOrDef *Template = nullptr);
 
 private:
+  class ClobberWalkerBase;
   class CachingWalker;
+  class SkipSelfWalker;
   class OptimizeUses;
 
   CachingWalker *getWalkerImpl();
   void buildMemorySSA();
   void optimizeUses();
 
+  void prepareForMoveTo(MemoryAccess *, BasicBlock *);
   void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
 
   using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
@@ -816,7 +850,8 @@ private:
   void markUnreachableAsLiveOnEntry(BasicBlock *BB);
   bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
   MemoryPhi *createMemoryPhi(BasicBlock *BB);
-  MemoryUseOrDef *createNewAccess(Instruction *);
+  MemoryUseOrDef *createNewAccess(Instruction *,
+                                  const MemoryUseOrDef *Template = nullptr);
   MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
   void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
   MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
@@ -851,7 +886,9 @@ private:
   mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
 
   // Memory SSA building info
+  std::unique_ptr<ClobberWalkerBase> WalkerBase;
   std::unique_ptr<CachingWalker> Walker;
+  std::unique_ptr<SkipSelfWalker> SkipWalker;
   unsigned NextID;
 };
 
diff --git a/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 38f08c1eebdc..169d5bd9fa8b 100644
--- a/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/contrib/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -35,8 +35,11 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFGDiff.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OperandTraits.h"
@@ -45,6 +48,7 @@
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -57,6 +61,12 @@ class MemoryAccess;
 class LLVMContext;
 class raw_ostream;
 
+using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
+using PhiToDefMap = SmallDenseMap<MemoryPhi *, MemoryAccess *>;
+using CFGUpdate = cfg::Update<BasicBlock *>;
+using GraphDiffInvBBPair =
+    std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>;
+
 class MemorySSAUpdater {
 private:
   MemorySSA *MSSA;
@@ -70,6 +80,7 @@ private:
 
 public:
   MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {}
+
   /// Insert a definition into the MemorySSA IR.  RenameUses will rename any use
   /// below the new def block (and any inserted phis).  RenameUses should be set
   /// to true if the definition may cause new aliases for loads below it.  This
@@ -89,15 +100,48 @@ public:
   /// Where a mayalias b, *does* require RenameUses be set to true.
   void insertDef(MemoryDef *Def, bool RenameUses = false);
   void insertUse(MemoryUse *Use);
+  /// Update the MemoryPhi in `To` following an edge deletion between `From` and
+  /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made.
+  void removeEdge(BasicBlock *From, BasicBlock *To);
+  /// Update the MemoryPhi in `To` to have a single incoming edge from `From`,
+  /// following a CFG change that replaced multiple edges (switch) with a direct
+  /// branch.
+  void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To);
+  /// Update MemorySSA after a loop was cloned, given the blocks in RPO order,
+  /// the exit blocks and a 1:1 mapping of all blocks and instructions
+  /// cloned. This involves duplicating all defs and uses in the cloned blocks
+  /// Updating phi nodes in exit block successors is done separately.
+  void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
+                           ArrayRef<BasicBlock *> ExitBlocks,
+                           const ValueToValueMapTy &VM,
+                           bool IgnoreIncomingWithNoClones = false);
+  // Block BB was fully or partially cloned into its predecessor P1. Map
+  // contains the 1:1 mapping of instructions cloned and VM[BB]=P1.
+  void updateForClonedBlockIntoPred(BasicBlock *BB, BasicBlock *P1,
+                                    const ValueToValueMapTy &VM);
+  /// Update phi nodes in exit block successors following cloning. Exit blocks
+  /// that were not cloned don't have additional predecessors added.
+  void updateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
+                                     const ValueToValueMapTy &VMap,
+                                     DominatorTree &DT);
+  void updateExitBlocksForClonedLoop(
+      ArrayRef<BasicBlock *> ExitBlocks,
+      ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT);
+
+  /// Apply CFG updates, analogous with the DT edge updates.
+  void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT);
+  /// Apply CFG insert updates, analogous with the DT edge updates.
+  void applyInsertUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT);
+
   void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where);
   void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where);
   void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
                    MemorySSA::InsertionPlace Where);
-  /// `From` block was spliced into `From` and `To`.
-  /// Move all accesses from `From` to `To` starting at instruction `Start`.
-  /// `To` is newly created BB, so empty of MemorySSA::MemoryAccesses.
-  /// Edges are already updated, so successors of `To` with MPhi nodes need to
-  /// update incoming block.
+  /// `From` block was spliced into `From` and `To`. There is a CFG edge from
+  /// `From` to `To`. Move all accesses from `From` to `To` starting at
+  /// instruction `Start`. `To` is newly created BB, so empty of
+  /// MemorySSA::MemoryAccesses. Edges are already updated, so successors of
+  /// `To` with MPhi nodes need to update incoming block.
   /// |------|        |------|
   /// | From |        | From |
   /// |      |        |------|
@@ -108,12 +152,12 @@ public:
   /// |------|        |------|
   void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To,
                                 Instruction *Start);
-  /// `From` block was merged into `To`. All instructions were moved and
-  /// `From` is an empty block with successor edges; `From` is about to be
-  /// deleted. Move all accesses from `From` to `To` starting at instruction
-  /// `Start`. `To` may have multiple successors, `From` has a single
-  /// predecessor. `From` may have successors with MPhi nodes, replace their
-  /// incoming block with `To`.
+  /// `From` block was merged into `To`. There is a CFG edge from `To` to
+  /// `From`.`To` still branches to `From`, but all instructions were moved and
+  /// `From` is now an empty block; `From` is about to be deleted. Move all
+  /// accesses from `From` to `To` starting at instruction `Start`. `To` may
+  /// have multiple successors, `From` has a single predecessor. `From` may have
+  /// successors with MPhi nodes, replace their incoming block with `To`.
   /// |------|        |------|
   /// |  To  |        |  To  |
   /// |------|        |      |
@@ -124,15 +168,14 @@ public:
   /// |------|        |------|
   void moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
                                Instruction *Start);
-  /// BasicBlock Old had New, an empty BasicBlock, added directly before it,
-  /// and the predecessors in Preds that used to point to Old, now point to
-  /// New. If New is the only predecessor, move Old's Phi, if present, to New.
+  /// A new empty BasicBlock (New) now branches directly to Old. Some of
+  /// Old's predecessors (Preds) are now branching to New instead of Old.
+  /// If New is the only predecessor, move Old's Phi, if present, to New.
   /// Otherwise, add a new Phi in New with appropriate incoming values, and
   /// update the incoming values in Old's Phi node too, if present.
-  void
-  wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New,
-                                               ArrayRef<BasicBlock *> Preds);
-
+  void wireOldPredecessorsToNewImmediatePredecessor(
+      BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds,
+      bool IdenticalEdgesWereMerged = true);
   // The below are utility functions. Other than creation of accesses to pass
   // to insertDef, and removeAccess to remove accesses, you should generally
   // not attempt to update memoryssa yourself. It is very non-trivial to get
@@ -220,6 +263,23 @@ private:
   template <class RangeType>
   MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
   void fixupDefs(const SmallVectorImpl<WeakVH> &);
+  // Clone all uses and defs from BB to NewBB given a 1:1 map of all
+  // instructions and blocks cloned, and a map of MemoryPhi : Definition
+  // (MemoryAccess Phi or Def). VMap maps old instructions to cloned
+  // instructions and old blocks to cloned blocks. MPhiMap, is created in the
+  // caller of this private method, and maps existing MemoryPhis to new
+  // definitions that new MemoryAccesses must point to. These definitions may
+  // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such,
+  // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses
+  // may be MemoryPhis or MemoryDefs and not MemoryUses.
+  void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
+                        const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap);
+  template <typename Iter>
+  void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
+                                            Iter ValuesBegin, Iter ValuesEnd,
+                                            DominatorTree &DT);
+  void applyInsertUpdates(ArrayRef<CFGUpdate>, DominatorTree &DT,
+                          const GraphDiff<BasicBlock *> *GD);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Analysis/MustExecute.h b/contrib/llvm/include/llvm/Analysis/MustExecute.h
index 97ad76d451ca..ad3222c17e62 100644
--- a/contrib/llvm/include/llvm/Analysis/MustExecute.h
+++ b/contrib/llvm/include/llvm/Analysis/MustExecute.h
@@ -19,6 +19,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
@@ -31,33 +32,138 @@ class DominatorTree;
 class Loop;
 
 /// Captures loop safety information.
-/// It keep information for loop & its header may throw exception or otherwise
+/// It keep information for loop blocks may throw exception or otherwise
 /// exit abnormaly on any iteration of the loop which might actually execute
 /// at runtime.  The primary way to consume this infromation is via
 /// isGuaranteedToExecute below, but some callers bailout or fallback to
 /// alternate reasoning if a loop contains any implicit control flow.
-struct LoopSafetyInfo {
-  bool MayThrow = false;       // The current loop contains an instruction which
-                               // may throw.
-  bool HeaderMayThrow = false; // Same as previous, but specific to loop header
+/// NOTE: LoopSafetyInfo contains cached information regarding loops and their
+/// particular blocks. This information is only dropped on invocation of
+/// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
+/// any thrower instructions have been added or removed from them, or if the
+/// control flow has changed, or in case of other meaningful modifications, the
+/// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
+/// loop were made and the info wasn't recomputed properly, the behavior of all
+/// methods except for computeLoopSafetyInfo is undefined.
+class LoopSafetyInfo {
   // Used to update funclet bundle operands.
   DenseMap<BasicBlock *, ColorVector> BlockColors;
 
+protected:
+  /// Computes block colors.
+  void computeBlockColors(const Loop *CurLoop);
+
+public:
+  /// Returns block colors map that is used to update funclet operand bundles.
+  const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
+
+  /// Copy colors of block \p Old into the block \p New.
+  void copyColors(BasicBlock *New, BasicBlock *Old);
+
+  /// Returns true iff the block \p BB potentially may throw exception. It can
+  /// be false-positive in cases when we want to avoid complex analysis.
+  virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
+
+  /// Returns true iff any block of the loop for which this info is contains an
+  /// instruction that may throw or otherwise exit abnormally.
+  virtual bool anyBlockMayThrow() const = 0;
+
+  /// Return true if we must reach the block \p BB under assumption that the
+  /// loop \p CurLoop is entered.
+  bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
+                               const DominatorTree *DT) const;
+
+  /// Computes safety information for a loop checks loop body & header for
+  /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
+  /// as argument. Updates safety information in LoopSafetyInfo argument.
+  /// Note: This is defined to clear and reinitialize an already initialized
+  /// LoopSafetyInfo.  Some callers rely on this fact.
+  virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
+
+  /// Returns true if the instruction in a loop is guaranteed to execute at
+  /// least once (under the assumption that the loop is entered).
+  virtual bool isGuaranteedToExecute(const Instruction &Inst,
+                                     const DominatorTree *DT,
+                                     const Loop *CurLoop) const = 0;
+
   LoopSafetyInfo() = default;
+
+  virtual ~LoopSafetyInfo() = default;
 };
 
-/// Computes safety information for a loop checks loop body & header for
-/// the possibility of may throw exception, it takes LoopSafetyInfo and loop as
-/// argument. Updates safety information in LoopSafetyInfo argument.
-/// Note: This is defined to clear and reinitialize an already initialized
-/// LoopSafetyInfo.  Some callers rely on this fact.
-void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *);
-
-/// Returns true if the instruction in a loop is guaranteed to execute at least
-/// once (under the assumption that the loop is entered).
-bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT,
-                           const Loop *CurLoop,
-                           const LoopSafetyInfo *SafetyInfo);
+
+/// Simple and conservative implementation of LoopSafetyInfo that can give
+/// false-positive answers to its queries in order to avoid complicated
+/// analysis.
+class SimpleLoopSafetyInfo: public LoopSafetyInfo {
+  bool MayThrow = false;       // The current loop contains an instruction which
+                               // may throw.
+  bool HeaderMayThrow = false; // Same as previous, but specific to loop header
+
+public:
+  virtual bool blockMayThrow(const BasicBlock *BB) const;
+
+  virtual bool anyBlockMayThrow() const;
+
+  virtual void computeLoopSafetyInfo(const Loop *CurLoop);
+
+  virtual bool isGuaranteedToExecute(const Instruction &Inst,
+                                     const DominatorTree *DT,
+                                     const Loop *CurLoop) const;
+
+  SimpleLoopSafetyInfo() : LoopSafetyInfo() {};
+
+  virtual ~SimpleLoopSafetyInfo() {};
+};
+
+/// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
+/// give precise answers on "may throw" queries. This implementation uses cache
+/// that should be invalidated by calling the methods insertInstructionTo and
+/// removeInstruction whenever we modify a basic block's contents by adding or
+/// removing instructions.
+class ICFLoopSafetyInfo: public LoopSafetyInfo {
+  bool MayThrow = false;       // The current loop contains an instruction which
+                               // may throw.
+  // Contains information about implicit control flow in this loop's blocks.
+  mutable ImplicitControlFlowTracking ICF;
+  // Contains information about instruction that may possibly write memory.
+  mutable MemoryWriteTracking MW;
+
+public:
+  virtual bool blockMayThrow(const BasicBlock *BB) const;
+
+  virtual bool anyBlockMayThrow() const;
+
+  virtual void computeLoopSafetyInfo(const Loop *CurLoop);
+
+  virtual bool isGuaranteedToExecute(const Instruction &Inst,
+                                     const DominatorTree *DT,
+                                     const Loop *CurLoop) const;
+
+  /// Returns true if we could not execute a memory-modifying instruction before
+  /// we enter \p BB under assumption that \p CurLoop is entered.
+  bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
+      const;
+
+  /// Returns true if we could not execute a memory-modifying instruction before
+  /// we execute \p I under assumption that \p CurLoop is entered.
+  bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
+      const;
+
+  /// Inform the safety info that we are planning to insert a new instruction
+  /// \p Inst into the basic block \p BB. It will make all cache updates to keep
+  /// it correct after this insertion.
+  void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
+
+  /// Inform safety info that we are planning to remove the instruction \p Inst
+  /// from its block. It will make all cache updates to keep it correct after
+  /// this removal.
+  void removeInstruction(const Instruction *Inst);
+
+  ICFLoopSafetyInfo(DominatorTree *DT) : LoopSafetyInfo(), ICF(DT), MW(DT) {};
+
+  virtual ~ICFLoopSafetyInfo() {};
+};
 
 }
 
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
index 559c77c30811..58a67042ea2d 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -60,7 +60,7 @@ public:
   FunctionModRefBehavior getModRefBehavior(const Function *F);
 
   using AAResultBase::getModRefInfo;
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 07beb0bb60a3..1f497fab35da 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -51,25 +51,25 @@ extern bool EnableARCOpts;
 /// on.
 inline bool ModuleHasARC(const Module &M) {
   return
-    M.getNamedValue("objc_retain") ||
-    M.getNamedValue("objc_release") ||
-    M.getNamedValue("objc_autorelease") ||
-    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
-    M.getNamedValue("objc_unsafeClaimAutoreleasedReturnValue") ||
-    M.getNamedValue("objc_retainBlock") ||
-    M.getNamedValue("objc_autoreleaseReturnValue") ||
-    M.getNamedValue("objc_autoreleasePoolPush") ||
-    M.getNamedValue("objc_loadWeakRetained") ||
-    M.getNamedValue("objc_loadWeak") ||
-    M.getNamedValue("objc_destroyWeak") ||
-    M.getNamedValue("objc_storeWeak") ||
-    M.getNamedValue("objc_initWeak") ||
-    M.getNamedValue("objc_moveWeak") ||
-    M.getNamedValue("objc_copyWeak") ||
-    M.getNamedValue("objc_retainedObject") ||
-    M.getNamedValue("objc_unretainedObject") ||
-    M.getNamedValue("objc_unretainedPointer") ||
-    M.getNamedValue("clang.arc.use");
+    M.getNamedValue("llvm.objc.retain") ||
+    M.getNamedValue("llvm.objc.release") ||
+    M.getNamedValue("llvm.objc.autorelease") ||
+    M.getNamedValue("llvm.objc.retainAutoreleasedReturnValue") ||
+    M.getNamedValue("llvm.objc.unsafeClaimAutoreleasedReturnValue") ||
+    M.getNamedValue("llvm.objc.retainBlock") ||
+    M.getNamedValue("llvm.objc.autoreleaseReturnValue") ||
+    M.getNamedValue("llvm.objc.autoreleasePoolPush") ||
+    M.getNamedValue("llvm.objc.loadWeakRetained") ||
+    M.getNamedValue("llvm.objc.loadWeak") ||
+    M.getNamedValue("llvm.objc.destroyWeak") ||
+    M.getNamedValue("llvm.objc.storeWeak") ||
+    M.getNamedValue("llvm.objc.initWeak") ||
+    M.getNamedValue("llvm.objc.moveWeak") ||
+    M.getNamedValue("llvm.objc.copyWeak") ||
+    M.getNamedValue("llvm.objc.retainedObject") ||
+    M.getNamedValue("llvm.objc.unretainedObject") ||
+    M.getNamedValue("llvm.objc.unretainedPointer") ||
+    M.getNamedValue("llvm.objc.clang.arc.use");
 }
 
 /// This is a wrapper around getUnderlyingObject which also knows how to
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h b/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
index 0b92d8b48356..018ea1f851be 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCInstKind.h
@@ -11,6 +11,7 @@
 #define LLVM_ANALYSIS_OBJCARCINSTKIND_H
 
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Instructions.h"
 
 namespace llvm {
@@ -48,7 +49,7 @@ enum class ARCInstKind {
   CopyWeak,                 ///< objc_copyWeak (derived)
   DestroyWeak,              ///< objc_destroyWeak (derived)
   StoreStrong,              ///< objc_storeStrong (derived)
-  IntrinsicUser,            ///< clang.arc.use
+  IntrinsicUser,            ///< llvm.objc.clang.arc.use
   CallOrUser,               ///< could call objc_release and/or "use" pointers
   Call,                     ///< could call objc_release
   User,                     ///< could "use" a pointer
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/OrderedInstructions.h b/contrib/llvm/include/llvm/Analysis/OrderedInstructions.h
index 7f57fde638b8..7e3850b87c57 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/OrderedInstructions.h
+++ b/contrib/llvm/include/llvm/Analysis/OrderedInstructions.h
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
-#define LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
+#ifndef LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H
+#define LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
@@ -62,4 +62,4 @@ public:
 
 } // end namespace llvm
 
-#endif // LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
+#endif // LLVM_ANALYSIS_ORDEREDINSTRUCTIONS_H
diff --git a/contrib/llvm/include/llvm/Analysis/Passes.h b/contrib/llvm/include/llvm/Analysis/Passes.h
index 09b28a0b0884..081dd5000835 100644
--- a/contrib/llvm/include/llvm/Analysis/Passes.h
+++ b/contrib/llvm/include/llvm/Analysis/Passes.h
@@ -61,10 +61,10 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
-  // createDivergenceAnalysisPass - This pass determines which branches in a GPU
+  // createLegacyDivergenceAnalysisPass - This pass determines which branches in a GPU
   // program are divergent.
   //
-  FunctionPass *createDivergenceAnalysisPass();
+  FunctionPass *createLegacyDivergenceAnalysisPass();
 
   //===--------------------------------------------------------------------===//
   //
diff --git a/contrib/llvm/include/llvm/Analysis/PhiValues.h b/contrib/llvm/include/llvm/Analysis/PhiValues.h
index 6607b329c04f..76204ac1bc6c 100644
--- a/contrib/llvm/include/llvm/Analysis/PhiValues.h
+++ b/contrib/llvm/include/llvm/Analysis/PhiValues.h
@@ -88,6 +88,22 @@ private:
   /// All values reachable from each component.
   DenseMap<unsigned int, ConstValueSet> ReachableMap;
 
+  /// A CallbackVH to notify PhiValues when a value is deleted or replaced, so
+  /// that the cached information for that value can be cleared to avoid
+  /// dangling pointers to invalid values.
+  class PhiValuesCallbackVH final : public CallbackVH {
+    PhiValues *PV;
+    void deleted() override;
+    void allUsesReplacedWith(Value *New) override;
+
+  public:
+    PhiValuesCallbackVH(Value *V, PhiValues *PV = nullptr)
+        : CallbackVH(V), PV(PV) {}
+  };
+
+  /// A set of callbacks to the values that processPhi has seen.
+  DenseSet<PhiValuesCallbackVH, DenseMapInfo<Value *>> TrackedValues;
+
   /// The function that the PhiValues is for.
   const Function &F;
 
diff --git a/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index 58b67e74ba51..3aef4be72d71 100644
--- a/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -98,14 +98,14 @@ public:
   bool isFunctionEntryCold(const Function *F);
   /// Returns true if \p F contains only cold code.
   bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
-  /// Returns true if \p F is a hot function.
+  /// Returns true if count \p C is considered hot.
   bool isHotCount(uint64_t C);
   /// Returns true if count \p C is considered cold.
   bool isColdCount(uint64_t C);
-  /// Returns true if BasicBlock \p B is considered hot.
-  bool isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
-  /// Returns true if BasicBlock \p B is considered cold.
-  bool isColdBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
+  /// Returns true if BasicBlock \p BB is considered hot.
+  bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
+  /// Returns true if BasicBlock \p BB is considered cold.
+  bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
   /// Returns true if CallSite \p CS is considered hot.
   bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
   /// Returns true if Callsite \p CS is considered cold.
@@ -134,9 +134,8 @@ public:
   static char ID;
   ProfileSummaryInfoWrapperPass();
 
-  ProfileSummaryInfo *getPSI() {
-    return &*PSI;
-  }
+  ProfileSummaryInfo &getPSI() { return *PSI; }
+  const ProfileSummaryInfo &getPSI() const { return *PSI; }
 
   bool doInitialization(Module &M) override;
   bool doFinalization(Module &M) override;
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
index 89918e3c205b..8f4200b07e5c 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1833,6 +1833,10 @@ private:
   const SCEV *getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                  SCEV::NoWrapFlags Flags);
 
+  // Get addrec expr already created or create a new one.
+  const SCEV *getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                    const Loop *L, SCEV::NoWrapFlags Flags);
+
   /// Return x if \p Val is f(x) where f is a 1-1 function.
   const SCEV *stripInjectiveFunctions(const SCEV *Val) const;
 
diff --git a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index 508968e16e5d..1356c6e9198a 100644
--- a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -16,7 +16,7 @@
 #define LLVM_ANALYSIS_SCOPEDNOALIASAA_H
 
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include <memory>
@@ -41,8 +41,8 @@ public:
   }
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
-  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
 
 private:
   bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
diff --git a/contrib/llvm/include/llvm/Analysis/SparsePropagation.h b/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
index defcf96afb25..02a2e64268b7 100644
--- a/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/contrib/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -189,12 +189,12 @@ private:
 
   /// getFeasibleSuccessors - Return a vector of booleans to indicate which
   /// successors are reachable from a given terminator instruction.
-  void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs,
+  void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs,
                              bool AggressiveUndef);
 
   void visitInst(Instruction &I);
   void visitPHINode(PHINode &I);
-  void visitTerminatorInst(TerminatorInst &TI);
+  void visitTerminator(Instruction &TI);
 };
 
 //===----------------------------------------------------------------------===//
@@ -286,7 +286,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::markEdgeExecutable(
 
 template <class LatticeKey, class LatticeVal, class KeyInfo>
 void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors(
-    TerminatorInst &TI, SmallVectorImpl<bool> &Succs, bool AggressiveUndef) {
+    Instruction &TI, SmallVectorImpl<bool> &Succs, bool AggressiveUndef) {
   Succs.resize(TI.getNumSuccessors());
   if (TI.getNumSuccessors() == 0)
     return;
@@ -330,7 +330,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors(
     return;
   }
 
-  if (TI.isExceptional()) {
+  if (TI.isExceptionalTerminator()) {
     Succs.assign(Succs.size(), true);
     return;
   }
@@ -374,7 +374,7 @@ template <class LatticeKey, class LatticeVal, class KeyInfo>
 bool SparseSolver<LatticeKey, LatticeVal, KeyInfo>::isEdgeFeasible(
     BasicBlock *From, BasicBlock *To, bool AggressiveUndef) {
   SmallVector<bool, 16> SuccFeasible;
-  TerminatorInst *TI = From->getTerminator();
+  Instruction *TI = From->getTerminator();
   getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
 
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
@@ -385,8 +385,8 @@ bool SparseSolver<LatticeKey, LatticeVal, KeyInfo>::isEdgeFeasible(
 }
 
 template <class LatticeKey, class LatticeVal, class KeyInfo>
-void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitTerminatorInst(
-    TerminatorInst &TI) {
+void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitTerminator(
+    Instruction &TI) {
   SmallVector<bool, 16> SuccFeasible;
   getFeasibleSuccessors(TI, SuccFeasible, true);
 
@@ -465,8 +465,8 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitInst(Instruction &I) {
     if (ChangedValue.second != LatticeFunc->getUntrackedVal())
       UpdateState(ChangedValue.first, ChangedValue.second);
 
-  if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
-    visitTerminatorInst(*TI);
+  if (I.isTerminator())
+    visitTerminator(I);
 }
 
 template <class LatticeKey, class LatticeVal, class KeyInfo>
diff --git a/contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
new file mode 100644
index 000000000000..8a151650a34c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -0,0 +1,120 @@
+//===- StackSafetyAnalysis.h - Stack memory safety analysis -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Stack Safety Analysis detects allocas and arguments with safe access.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_STACKSAFETYANALYSIS_H
+#define LLVM_ANALYSIS_STACKSAFETYANALYSIS_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// Interface to access stack safety analysis results for single function.
+class StackSafetyInfo {
+public:
+  struct FunctionInfo;
+
+private:
+  std::unique_ptr<FunctionInfo> Info;
+
+public:
+  StackSafetyInfo();
+  StackSafetyInfo(FunctionInfo &&Info);
+  StackSafetyInfo(StackSafetyInfo &&);
+  StackSafetyInfo &operator=(StackSafetyInfo &&);
+  ~StackSafetyInfo();
+
+  // TODO: Add useful for client methods.
+  void print(raw_ostream &O) const;
+};
+
+/// StackSafetyInfo wrapper for the new pass manager.
+class StackSafetyAnalysis : public AnalysisInfoMixin<StackSafetyAnalysis> {
+  friend AnalysisInfoMixin<StackSafetyAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  using Result = StackSafetyInfo;
+  StackSafetyInfo run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Printer pass for the \c StackSafetyAnalysis results.
+class StackSafetyPrinterPass : public PassInfoMixin<StackSafetyPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit StackSafetyPrinterPass(raw_ostream &OS) : OS(OS) {}
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// StackSafetyInfo wrapper for the legacy pass manager
+class StackSafetyInfoWrapperPass : public FunctionPass {
+  StackSafetyInfo SSI;
+
+public:
+  static char ID;
+  StackSafetyInfoWrapperPass();
+
+  const StackSafetyInfo &getResult() const { return SSI; }
+
+  void print(raw_ostream &O, const Module *M) const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnFunction(Function &F) override;
+};
+
+using StackSafetyGlobalInfo = std::map<const GlobalValue *, StackSafetyInfo>;
+
+/// This pass performs the global (interprocedural) stack safety analysis (new
+/// pass manager).
+class StackSafetyGlobalAnalysis
+    : public AnalysisInfoMixin<StackSafetyGlobalAnalysis> {
+  friend AnalysisInfoMixin<StackSafetyGlobalAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  using Result = StackSafetyGlobalInfo;
+  Result run(Module &M, ModuleAnalysisManager &AM);
+};
+
+/// Printer pass for the \c StackSafetyGlobalAnalysis results.
+class StackSafetyGlobalPrinterPass
+    : public PassInfoMixin<StackSafetyGlobalPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit StackSafetyGlobalPrinterPass(raw_ostream &OS) : OS(OS) {}
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+/// This pass performs the global (interprocedural) stack safety analysis
+/// (legacy pass manager).
+class StackSafetyGlobalInfoWrapperPass : public ModulePass {
+  StackSafetyGlobalInfo SSI;
+
+public:
+  static char ID;
+
+  StackSafetyGlobalInfoWrapperPass();
+
+  const StackSafetyGlobalInfo &getResult() const { return SSI; }
+
+  void print(raw_ostream &O, const Module *M) const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnModule(Module &M) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_STACKSAFETYANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
new file mode 100644
index 000000000000..df693d9d8e8c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -0,0 +1,86 @@
+//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file defines the SyncDependenceAnalysis class, which computes for
+// every divergent branch the set of phi nodes that the branch will make
+// divergent.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include <memory>
+
+namespace llvm {
+
+class BasicBlock;
+class DominatorTree;
+class Loop;
+class PostDominatorTree;
+
+using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>;
+
+/// \brief Relates points of divergent control to join points in
+/// reducible CFGs.
+///
+/// This analysis relates points of divergent control to points of converging
+/// divergent control. The analysis requires all loops to be reducible.
+class SyncDependenceAnalysis {
+  void visitSuccessor(const BasicBlock &succBlock, const Loop *termLoop,
+                      const BasicBlock *defBlock);
+
+public:
+  bool inRegion(const BasicBlock &BB) const;
+
+  ~SyncDependenceAnalysis();
+  SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT,
+                         const LoopInfo &LI);
+
+  /// \brief Computes divergent join points and loop exits caused by branch
+  /// divergence in \p Term.
+  ///
+  /// The set of blocks which are reachable by disjoint paths from \p Term.
+  /// The set also contains loop exits if there two disjoint paths:
+  /// one from \p Term to the loop exit and another from \p Term to the loop
+  /// header. Those exit blocks are added to the returned set.
+  /// If L is the parent loop of \p Term and an exit of L is in the returned
+  /// set then L is a divergent loop.
+  const ConstBlockSet &join_blocks(const Instruction &Term);
+
+  /// \brief Computes divergent join points and loop exits (in the surrounding
+  /// loop) caused by the divergent loop exits of\p Loop.
+  ///
+  /// The set of blocks which are reachable by disjoint paths from the
+  /// loop exits of \p Loop.
+  /// This treats the loop as a single node in \p Loop's parent loop.
+  /// The returned set has the same properties as for join_blocks(TermInst&).
+  const ConstBlockSet &join_blocks(const Loop &Loop);
+
+private:
+  static ConstBlockSet EmptyBlockSet;
+
+  ReversePostOrderTraversal<const Function *> FuncRPOT;
+  const DominatorTree &DT;
+  const PostDominatorTree &PDT;
+  const LoopInfo &LI;
+
+  std::map<const Loop *, std::unique_ptr<ConstBlockSet>> CachedLoopExitJoins;
+  std::map<const Instruction *, std::unique_ptr<ConstBlockSet>>
+      CachedBranchJoins;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
diff --git a/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
index 87f4a0100b38..db80bef001e2 100644
--- a/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -36,16 +36,17 @@ public:
   using EdgeRef = typename CGT::EdgeRef;
   using SccTy = std::vector<NodeRef>;
 
-  using GetRelBBFreqTy = function_ref<Optional<Scaled64>(EdgeRef)>;
-  using GetCountTy = function_ref<uint64_t(NodeRef)>;
-  using AddCountTy = function_ref<void(NodeRef, uint64_t)>;
+  // Not all EdgeRef have information about the source of the edge. Hence
+  // NodeRef corresponding to the source of the EdgeRef is explicitly passed.
+  using GetProfCountTy = function_ref<Optional<Scaled64>(NodeRef, EdgeRef)>;
+  using AddCountTy = function_ref<void(NodeRef, Scaled64)>;
 
-  static void propagate(const CallGraphType &CG, GetRelBBFreqTy GetRelBBFreq,
-                        GetCountTy GetCount, AddCountTy AddCount);
+  static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount,
+                        AddCountTy AddCount);
 
 private:
-  static void propagateFromSCC(const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq,
-                               GetCountTy GetCount, AddCountTy AddCount);
+  static void propagateFromSCC(const SccTy &SCC, GetProfCountTy GetProfCount,
+                               AddCountTy AddCount);
 };
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index f94debba9c52..518a85ee1a01 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -565,6 +565,30 @@ TLI_DEFINE_STRING_INTERNAL("cosl")
 /// char *ctermid(char *s);
 TLI_DEFINE_ENUM_INTERNAL(ctermid)
 TLI_DEFINE_STRING_INTERNAL("ctermid")
+/// int execl(const char *path, const char *arg, ...);
+TLI_DEFINE_ENUM_INTERNAL(execl)
+TLI_DEFINE_STRING_INTERNAL("execl")
+/// int execle(const char *file, const char *arg, ..., char * const envp[]);
+TLI_DEFINE_ENUM_INTERNAL(execle)
+TLI_DEFINE_STRING_INTERNAL("execle")
+/// int execlp(const char *file, const char *arg, ...);
+TLI_DEFINE_ENUM_INTERNAL(execlp)
+TLI_DEFINE_STRING_INTERNAL("execlp")
+/// int execv(const char *path, char *const argv[]);
+TLI_DEFINE_ENUM_INTERNAL(execv)
+TLI_DEFINE_STRING_INTERNAL("execv")
+/// int execvP(const char *file, const char *search_path, char *const argv[]);
+TLI_DEFINE_ENUM_INTERNAL(execvP)
+TLI_DEFINE_STRING_INTERNAL("execvP")
+/// int execve(const char *filename, char *const argv[], char *const envp[]);
+TLI_DEFINE_ENUM_INTERNAL(execve)
+TLI_DEFINE_STRING_INTERNAL("execve")
+/// int execvp(const char *file, char *const argv[]);
+TLI_DEFINE_ENUM_INTERNAL(execvp)
+TLI_DEFINE_STRING_INTERNAL("execvp")
+/// int execvpe(const char *file, char *const argv[], char *const envp[]);
+TLI_DEFINE_ENUM_INTERNAL(execvpe)
+TLI_DEFINE_STRING_INTERNAL("execvpe")
 /// double exp(double x);
 TLI_DEFINE_ENUM_INTERNAL(exp)
 TLI_DEFINE_STRING_INTERNAL("exp")
@@ -709,6 +733,9 @@ TLI_DEFINE_STRING_INTERNAL("fopen")
 /// FILE *fopen64(const char *filename, const char *opentype)
 TLI_DEFINE_ENUM_INTERNAL(fopen64)
 TLI_DEFINE_STRING_INTERNAL("fopen64")
+/// int fork();
+TLI_DEFINE_ENUM_INTERNAL(fork)
+TLI_DEFINE_STRING_INTERNAL("fork")
 /// int fprintf(FILE *stream, const char *format, ...);
 TLI_DEFINE_ENUM_INTERNAL(fprintf)
 TLI_DEFINE_STRING_INTERNAL("fprintf")
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 59657cca40f5..223175d17c2d 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -289,7 +289,7 @@ public:
   /// Returns whether V is a source of divergence.
   ///
   /// This function provides the target-dependent information for
-  /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
+  /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
   /// builds the dependency graph, and then runs the reachability algorithm
   /// starting with the sources of divergence.
   bool isSourceOfDivergence(const Value *V) const;
@@ -581,12 +581,21 @@ public:
   struct MemCmpExpansionOptions {
     // The list of available load sizes (in bytes), sorted in decreasing order.
     SmallVector<unsigned, 8> LoadSizes;
+    // Set to true to allow overlapping loads. For example, 7-byte compares can
+    // be done with two 4-byte compares instead of 4+2+1-byte compares. This
+    // requires all loads in LoadSizes to be doable in an unaligned way.
+    bool AllowOverlappingLoads = false;
   };
   const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
 
   /// Enable matching of interleaved access groups.
   bool enableInterleavedAccessVectorization() const;
 
+  /// Enable matching of interleaved access groups that contain predicated
+  /// accesses or gaps and therefore vectorized using masked
+  /// vector loads/stores.
+  bool enableMaskedInterleavedAccessVectorization() const;
+
   /// Indicate that it is potentially unsafe to automatically vectorize
   /// floating-point operations because the semantics of vector and scalar
   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
@@ -739,6 +748,10 @@ public:
   /// and the number of execution units in the CPU.
   unsigned getMaxInterleaveFactor(unsigned VF) const;
 
+  /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
+  static OperandValueKind getOperandInfo(Value *V,
+                                         OperandValueProperties &OpProps);
+
   /// This is an approximation of reciprocal throughput of a math/logic op.
   /// A higher cost indicates less expected throughput.
   /// From Agner Fog's guides, reciprocal throughput is "the average number of
@@ -762,7 +775,9 @@ public:
 
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The index and subtype parameters are used by the subvector insertion and
-  /// extraction shuffle kinds.
+  /// extraction shuffle kinds to show the insert/extract point and the type of
+  /// the subvector being inserted/extracted.
+  /// NOTE: For subvector extractions Tp represents the source type.
   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
                      Type *SubTp = nullptr) const;
 
@@ -817,9 +832,13 @@ public:
   ///    load allows gaps)
   /// \p Alignment is the alignment of the memory operation
   /// \p AddressSpace is address space of the pointer.
+  /// \p UseMaskForCond indicates if the memory access is predicated.
+  /// \p UseMaskForGaps indicates if gaps should be masked.
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace) const;
+                                 unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false) const;
 
   /// Calculate the cost of performing a vector reduction.
   ///
@@ -915,6 +934,14 @@ public:
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
+  /// \returns True if the caller and callee agree on how \p Args will be passed
+  /// to the callee.
+  /// \param[out] Args The list of compatible arguments.  The implementation may
+  /// filter out any incompatible args from this list.
+  bool areFunctionArgsABICompatible(const Function *Caller,
+                                    const Function *Callee,
+                                    SmallPtrSetImpl<Argument *> &Args) const;
+
   /// The type of load/store indexing.
   enum MemIndexedMode {
     MIM_Unindexed,  ///< No indexing.
@@ -1068,6 +1095,7 @@ public:
   virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
       bool IsZeroCmp) const = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
+  virtual bool enableMaskedInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
                                               unsigned BitWidth,
@@ -1128,7 +1156,9 @@ public:
                                          unsigned Factor,
                                          ArrayRef<unsigned> Indices,
                                          unsigned Alignment,
-                                         unsigned AddressSpace) = 0;
+                                         unsigned AddressSpace,
+                                         bool UseMaskForCond = false,
+                                         bool UseMaskForGaps = false) = 0;
   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
                                          bool IsPairwiseForm) = 0;
   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
@@ -1157,6 +1187,9 @@ public:
       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
   virtual bool areInlineCompatible(const Function *Caller,
                                    const Function *Callee) const = 0;
+  virtual bool
+  areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
+                               SmallPtrSetImpl<Argument *> &Args) const = 0;
   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
   virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
@@ -1342,6 +1375,9 @@ public:
   bool enableInterleavedAccessVectorization() override {
     return Impl.enableInterleavedAccessVectorization();
   }
+  bool enableMaskedInterleavedAccessVectorization() override {
+    return Impl.enableMaskedInterleavedAccessVectorization();
+  }
   bool isFPVectorizationPotentiallyUnsafe() override {
     return Impl.isFPVectorizationPotentiallyUnsafe();
   }
@@ -1467,9 +1503,11 @@ public:
   }
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace) override {
+                                 unsigned AddressSpace, bool UseMaskForCond,
+                                 bool UseMaskForGaps) override {
     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+                                           Alignment, AddressSpace,
+                                           UseMaskForCond, UseMaskForGaps);
   }
   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
                                  bool IsPairwiseForm) override {
@@ -1530,6 +1568,11 @@ public:
                            const Function *Callee) const override {
     return Impl.areInlineCompatible(Caller, Callee);
   }
+  bool areFunctionArgsABICompatible(
+      const Function *Caller, const Function *Callee,
+      SmallPtrSetImpl<Argument *> &Args) const override {
+    return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
+  }
   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
   }
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d80ae1d6845d..c9a234deeb7d 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -158,6 +158,9 @@ public:
     case Intrinsic::dbg_label:
     case Intrinsic::invariant_start:
     case Intrinsic::invariant_end:
+    case Intrinsic::launder_invariant_group:
+    case Intrinsic::strip_invariant_group:
+    case Intrinsic::is_constant:
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
     case Intrinsic::objectsize:
@@ -311,6 +314,8 @@ public:
 
   bool enableInterleavedAccessVectorization() { return false; }
 
+  bool enableMaskedInterleavedAccessVectorization() { return false; }
+
   bool isFPVectorizationPotentiallyUnsafe() { return false; }
 
   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
@@ -448,8 +453,9 @@ public:
   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                       unsigned Factor,
                                       ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace) {
+                                      unsigned Alignment, unsigned AddressSpace,
+                                      bool UseMaskForCond = false,
+                                      bool UseMaskForGaps = false) {
     return 1;
   }
 
@@ -520,6 +526,14 @@ public:
             Callee->getFnAttribute("target-features"));
   }
 
+  bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
+                                    SmallPtrSetImpl<Argument *> &Args) const {
+    return (Caller->getFnAttribute("target-cpu") ==
+            Callee->getFnAttribute("target-cpu")) &&
+           (Caller->getFnAttribute("target-features") ==
+            Callee->getFnAttribute("target-features"));
+  }
+
   bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
                           const DataLayout &DL) const {
     return false;
diff --git a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
index 7fcfdb3a817c..d2e6df22425e 100644
--- a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -17,7 +17,7 @@
 #define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H
 
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include <memory>
@@ -43,10 +43,10 @@ public:
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
   bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
   FunctionModRefBehavior getModRefBehavior(const Function *F);
-  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
-  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
 
 private:
   bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h
index 6764563f6830..3bf9c5d20741 100644
--- a/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -20,6 +20,8 @@
 
 namespace llvm {
 
+class DominatorTree;
+
 /// The type of CFI jumptable needed for a function.
 enum CfiFunctionLinkage {
   CFL_Definition = 0,
@@ -39,7 +41,8 @@ struct DevirtCallSite {
 /// call sites based on the call and return them in DevirtCalls.
 void findDevirtualizableCallsForTypeTest(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
-    SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI);
+    SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
+    DominatorTree &DT);
 
 /// Given a call to the intrinsic \@llvm.type.checked.load, find all
 /// devirtualizable call sites based on the call and return them in DevirtCalls.
@@ -47,7 +50,7 @@ void findDevirtualizableCallsForTypeCheckedLoad(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
     SmallVectorImpl<Instruction *> &LoadedPtrs,
     SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
-    const CallInst *CI);
+    const CallInst *CI, DominatorTree &DT);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
index c1a91a8e5981..f46fdfcb608e 100644
--- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
@@ -55,14 +55,16 @@ class Value;
                         AssumptionCache *AC = nullptr,
                         const Instruction *CxtI = nullptr,
                         const DominatorTree *DT = nullptr,
-                        OptimizationRemarkEmitter *ORE = nullptr);
+                        OptimizationRemarkEmitter *ORE = nullptr,
+                        bool UseInstrInfo = true);
 
   /// Returns the known bits rather than passing by reference.
   KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
                              unsigned Depth = 0, AssumptionCache *AC = nullptr,
                              const Instruction *CxtI = nullptr,
                              const DominatorTree *DT = nullptr,
-                             OptimizationRemarkEmitter *ORE = nullptr);
+                             OptimizationRemarkEmitter *ORE = nullptr,
+                             bool UseInstrInfo = true);
 
   /// Compute known bits from the range metadata.
   /// \p KnownZero the set of bits that are known to be zero
@@ -75,7 +77,8 @@ class Value;
                            const DataLayout &DL,
                            AssumptionCache *AC = nullptr,
                            const Instruction *CxtI = nullptr,
-                           const DominatorTree *DT = nullptr);
+                           const DominatorTree *DT = nullptr,
+                           bool UseInstrInfo = true);
 
   /// Return true if the given value is known to have exactly one bit set when
   /// defined. For vectors return true if every element is known to be a power
@@ -86,7 +89,8 @@ class Value;
                               bool OrZero = false, unsigned Depth = 0,
                               AssumptionCache *AC = nullptr,
                               const Instruction *CxtI = nullptr,
-                              const DominatorTree *DT = nullptr);
+                              const DominatorTree *DT = nullptr,
+                              bool UseInstrInfo = true);
 
   bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
 
@@ -99,7 +103,8 @@ class Value;
   bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
-                      const DominatorTree *DT = nullptr);
+                      const DominatorTree *DT = nullptr,
+                      bool UseInstrInfo = true);
 
   /// Return true if the two given values are negation.
   /// Currently can recoginze Value pair:
@@ -112,28 +117,32 @@ class Value;
                           unsigned Depth = 0,
                           AssumptionCache *AC = nullptr,
                           const Instruction *CxtI = nullptr,
-                          const DominatorTree *DT = nullptr);
+                          const DominatorTree *DT = nullptr,
+                          bool UseInstrInfo = true);
 
   /// Returns true if the given value is known be positive (i.e. non-negative
   /// and non-zero).
   bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                        AssumptionCache *AC = nullptr,
                        const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr);
+                       const DominatorTree *DT = nullptr,
+                       bool UseInstrInfo = true);
 
   /// Returns true if the given value is known be negative (i.e. non-positive
   /// and non-zero).
   bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                        AssumptionCache *AC = nullptr,
                        const Instruction *CxtI = nullptr,
-                       const DominatorTree *DT = nullptr);
+                       const DominatorTree *DT = nullptr,
+                       bool UseInstrInfo = true);
 
   /// Return true if the given values are known to be non-equal when defined.
   /// Supports scalar integer types only.
   bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
-                      AssumptionCache *AC = nullptr,
-                      const Instruction *CxtI = nullptr,
-                      const DominatorTree *DT = nullptr);
+                       AssumptionCache *AC = nullptr,
+                       const Instruction *CxtI = nullptr,
+                       const DominatorTree *DT = nullptr,
+                       bool UseInstrInfo = true);
 
   /// Return true if 'V & Mask' is known to be zero. We use this predicate to
   /// simplify operations downstream. Mask is known to be zero for bits that V
@@ -148,7 +157,8 @@ class Value;
                          const DataLayout &DL,
                          unsigned Depth = 0, AssumptionCache *AC = nullptr,
                          const Instruction *CxtI = nullptr,
-                         const DominatorTree *DT = nullptr);
+                         const DominatorTree *DT = nullptr,
+                         bool UseInstrInfo = true);
 
   /// Return the number of times the sign bit of the register is replicated into
   /// the other bits. We know that at least 1 bit is always equal to the sign
@@ -160,7 +170,8 @@ class Value;
   unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL,
                               unsigned Depth = 0, AssumptionCache *AC = nullptr,
                               const Instruction *CxtI = nullptr,
-                              const DominatorTree *DT = nullptr);
+                              const DominatorTree *DT = nullptr,
+                              bool UseInstrInfo = true);
 
   /// This function computes the integer multiple of Base that equals V. If
   /// successful, it returns true and returns the multiple in Multiple. If
@@ -194,7 +205,8 @@ class Value;
   /// Return true if the floating-point scalar value is not a NaN or if the
   /// floating-point vector value has no NaN elements. Return false if a value
   /// could ever be NaN.
-  bool isKnownNeverNaN(const Value *V);
+  bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
+                       unsigned Depth = 0);
 
   /// Return true if we can prove that the specified FP value's sign bit is 0.
   ///
@@ -209,7 +221,8 @@ class Value;
   /// return the i8 value that it is represented with. This is true for all i8
   /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
   /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
-  /// i16 0x1234), return null.
+  /// i16 0x1234), return null. If the value is entirely undef and padding,
+  /// return undef.
   Value *isBytewiseValue(Value *V);
 
   /// Given an aggregrate and an sequence of indices, see if the scalar value
@@ -284,10 +297,10 @@ class Value;
 
   /// This function returns call pointer argument that is considered the same by
   /// aliasing rules. You CAN'T use it to replace one value with another.
-  const Value *getArgumentAliasingToReturnedPointer(ImmutableCallSite CS);
-  inline Value *getArgumentAliasingToReturnedPointer(CallSite CS) {
-    return const_cast<Value *>(
-        getArgumentAliasingToReturnedPointer(ImmutableCallSite(CS)));
+  const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call);
+  inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) {
+    return const_cast<Value *>(getArgumentAliasingToReturnedPointer(
+        const_cast<const CallBase *>(Call)));
   }
 
   // {launder,strip}.invariant.group returns pointer that aliases its argument,
@@ -296,7 +309,7 @@ class Value;
   // considered as capture. The arguments are not marked as returned neither,
   // because it would make it useless.
   bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
-      ImmutableCallSite CS);
+      const CallBase *Call);
 
   /// This method strips off any GEP address adjustments and pointer casts from
   /// the specified value, returning the original object being addressed. Note
@@ -405,18 +418,21 @@ class Value;
                                                const DataLayout &DL,
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
-                                               const DominatorTree *DT);
+                                               const DominatorTree *DT,
+                                               bool UseInstrInfo = true);
   OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
                                              const DataLayout &DL,
                                              AssumptionCache *AC,
                                              const Instruction *CxtI,
-                                             const DominatorTree *DT);
+                                             const DominatorTree *DT,
+                                             bool UseInstrInfo = true);
   OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
                                                const Value *RHS,
                                                const DataLayout &DL,
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
-                                               const DominatorTree *DT);
+                                               const DominatorTree *DT,
+                                               bool UseInstrInfo = true);
   OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS,
                                              const DataLayout &DL,
                                              AssumptionCache *AC = nullptr,
@@ -594,6 +610,12 @@ class Value;
   Optional<bool> isImpliedCondition(const Value *LHS, const Value *RHS,
                                     const DataLayout &DL, bool LHSIsTrue = true,
                                     unsigned Depth = 0);
+
+  /// Return the boolean condition value in the context of the given instruction
+  /// if it is known based on dominating conditions.
+  Optional<bool> isImpliedByDomCondition(const Value *Cond,
+                                         const Instruction *ContextI,
+                                         const DataLayout &DL);
 } // end namespace llvm
 
 #endif // LLVM_ANALYSIS_VALUETRACKING_H
diff --git a/contrib/llvm/include/llvm/Analysis/VectorUtils.h b/contrib/llvm/include/llvm/Analysis/VectorUtils.h
index 9fde36d61091..be4d4f17b9ad 100644
--- a/contrib/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/contrib/llvm/include/llvm/Analysis/VectorUtils.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_VECTORUTILS_H
 
 #include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"
 
@@ -23,6 +24,7 @@ namespace llvm {
 template <typename T> class ArrayRef;
 class DemandedBits;
 class GetElementPtrInst;
+template <typename InstTy> class InterleaveGroup;
 class Loop;
 class ScalarEvolution;
 class TargetTransformInfo;
@@ -115,8 +117,24 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
                          DemandedBits &DB,
                          const TargetTransformInfo *TTI=nullptr);
 
+/// Compute the union of two access-group lists.
+///
+/// If the list contains just one access group, it is returned directly. If the
+/// list is empty, returns nullptr.
+MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
+
+/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
+/// are both in. If either instruction does not access memory at all, it is
+/// considered to be in every list.
+///
+/// If the list contains just one access group, it is returned directly. If the
+/// list is empty, returns nullptr.
+MDNode *intersectAccessGroups(const Instruction *Inst1,
+                              const Instruction *Inst2);
+
 /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
-/// MD_nontemporal].  For K in Kinds, we get the MDNode for K from each of the
+/// MD_nontemporal, MD_access_group].
+/// For K in Kinds, we get the MDNode for K from each of the
 /// elements of VL, compute their "intersection" (i.e., the most generic
 /// metadata value that covers all of the individual values), and set I's
 /// metadata for M equal to the intersection value.
@@ -124,6 +142,35 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
 /// This function always sets a (possibly null) value for each K in Kinds.
 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
 
+/// Create a mask that filters the members of an interleave group where there
+/// are gaps.
+///
+/// For example, the mask for \p Group with interleave-factor 3
+/// and \p VF 4, that has only its first member present is:
+///
+///   <1,0,0,1,0,0,1,0,0,1,0,0>
+///
+/// Note: The result is a mask of 0's and 1's, as opposed to the other
+/// create[*]Mask() utilities which create a shuffle mask (mask that
+/// consists of indices).
+Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+                               const InterleaveGroup<Instruction> &Group);
+
+/// Create a mask with replicated elements.
+///
+/// This function creates a shuffle mask for replicating each of the \p VF
+/// elements in a vector \p ReplicationFactor times. It can be used to
+/// transform a mask of \p VF elements into a mask of
+/// \p VF * \p ReplicationFactor elements used by a predicated
+/// interleaved-group of loads/stores whose Interleaved-factor ==
+/// \p ReplicationFactor.
+///
+/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+///
+///   <0,0,0,1,1,1,2,2,2,3,3,3>
+Constant *createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor,
+                               unsigned VF);
+
 /// Create an interleave shuffle mask.
 ///
 /// This function creates a shuffle mask for interleaving \p NumVecs vectors of
@@ -176,6 +223,381 @@ Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
 /// elements, it will be padded with undefs.
 Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
 
+/// The group of interleaved loads/stores sharing the same stride and
+/// close to each other.
+///
+/// Each member in this group has an index starting from 0, and the largest
+/// index should be less than interleaved factor, which is equal to the absolute
+/// value of the access's stride.
+///
+/// E.g. An interleaved load group of factor 4:
+///        for (unsigned i = 0; i < 1024; i+=4) {
+///          a = A[i];                           // Member of index 0
+///          b = A[i+1];                         // Member of index 1
+///          d = A[i+3];                         // Member of index 3
+///          ...
+///        }
+///
+///      An interleaved store group of factor 4:
+///        for (unsigned i = 0; i < 1024; i+=4) {
+///          ...
+///          A[i]   = a;                         // Member of index 0
+///          A[i+1] = b;                         // Member of index 1
+///          A[i+2] = c;                         // Member of index 2
+///          A[i+3] = d;                         // Member of index 3
+///        }
+///
+/// Note: the interleaved load group could have gaps (missing members), but
+/// the interleaved store group doesn't allow gaps.
+template <typename InstTy> class InterleaveGroup {
+public:
+  InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
+      : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
+
+  InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
+      : Align(Align), InsertPos(Instr) {
+    assert(Align && "The alignment should be non-zero");
+
+    Factor = std::abs(Stride);
+    assert(Factor > 1 && "Invalid interleave factor");
+
+    Reverse = Stride < 0;
+    Members[0] = Instr;
+  }
+
+  bool isReverse() const { return Reverse; }
+  unsigned getFactor() const { return Factor; }
+  unsigned getAlignment() const { return Align; }
+  unsigned getNumMembers() const { return Members.size(); }
+
+  /// Try to insert a new member \p Instr with index \p Index and
+  /// alignment \p NewAlign. The index is related to the leader and it could be
+  /// negative if it is the new leader.
+  ///
+  /// \returns false if the instruction doesn't belong to the group.
+  bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) {
+    assert(NewAlign && "The new member's alignment should be non-zero");
+
+    int Key = Index + SmallestKey;
+
+    // Skip if there is already a member with the same index.
+    if (Members.find(Key) != Members.end())
+      return false;
+
+    if (Key > LargestKey) {
+      // The largest index is always less than the interleave factor.
+      if (Index >= static_cast<int>(Factor))
+        return false;
+
+      LargestKey = Key;
+    } else if (Key < SmallestKey) {
+      // The largest index is always less than the interleave factor.
+      if (LargestKey - Key >= static_cast<int>(Factor))
+        return false;
+
+      SmallestKey = Key;
+    }
+
+    // It's always safe to select the minimum alignment.
+    Align = std::min(Align, NewAlign);
+    Members[Key] = Instr;
+    return true;
+  }
+
+  /// Get the member with the given index \p Index
+  ///
+  /// \returns nullptr if contains no such member.
+  InstTy *getMember(unsigned Index) const {
+    int Key = SmallestKey + Index;
+    auto Member = Members.find(Key);
+    if (Member == Members.end())
+      return nullptr;
+
+    return Member->second;
+  }
+
+  /// Get the index for the given member. Unlike the key in the member
+  /// map, the index starts from 0.
+  unsigned getIndex(const InstTy *Instr) const {
+    for (auto I : Members) {
+      if (I.second == Instr)
+        return I.first - SmallestKey;
+    }
+
+    llvm_unreachable("InterleaveGroup contains no such member");
+  }
+
+  InstTy *getInsertPos() const { return InsertPos; }
+  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
+
+  /// Add metadata (e.g. alias info) from the instructions in this group to \p
+  /// NewInst.
+  ///
+  /// FIXME: this function currently does not add noalias metadata a'la
+  /// addNewMedata.  To do that we need to compute the intersection of the
+  /// noalias info from all members.
+  void addMetadata(InstTy *NewInst) const;
+
+  /// Returns true if this Group requires a scalar iteration to handle gaps.
+  bool requiresScalarEpilogue() const {
+    // If the last member of the Group exists, then a scalar epilog is not
+    // needed for this group.
+    if (getMember(getFactor() - 1))
+      return false;
+
+    // We have a group with gaps. It therefore cannot be a group of stores,
+    // and it can't be a reversed access, because such groups get invalidated.
+    assert(!getMember(0)->mayWriteToMemory() &&
+           "Group should have been invalidated");
+    assert(!isReverse() && "Group should have been invalidated");
+
+    // This is a group of loads, with gaps, and without a last-member
+    return true;
+  }
+
+private:
+  unsigned Factor; // Interleave Factor.
+  bool Reverse;
+  unsigned Align;
+  DenseMap<int, InstTy *> Members;
+  int SmallestKey = 0;
+  int LargestKey = 0;
+
+  // To avoid breaking dependences, vectorized instructions of an interleave
+  // group should be inserted at either the first load or the last store in
+  // program order.
+  //
+  // E.g. %even = load i32             // Insert Position
+  //      %add = add i32 %even         // Use of %even
+  //      %odd = load i32
+  //
+  //      store i32 %even
+  //      %odd = add i32               // Def of %odd
+  //      store i32 %odd               // Insert Position
+  InstTy *InsertPos;
+};
+
+/// Drive the analysis of interleaved memory accesses in the loop.
+///
+/// Use this class to analyze interleaved accesses only when we can vectorize
+/// a loop. Otherwise it's meaningless to do analysis as the vectorization
+/// on interleaved accesses is unsafe.
+///
+/// The analysis collects interleave groups and records the relationships
+/// between the member and the group in a map.
+class InterleavedAccessInfo {
+public:
+  InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+                        DominatorTree *DT, LoopInfo *LI,
+                        const LoopAccessInfo *LAI)
+      : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
+
+  ~InterleavedAccessInfo() { reset(); }
+
+  /// Analyze the interleaved accesses and collect them in interleave
+  /// groups. Substitute symbolic strides using \p Strides.
+  /// Consider also predicated loads/stores in the analysis if
+  /// \p EnableMaskedInterleavedGroup is true.
+  void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
+
+  /// Invalidate groups, e.g., in case all blocks in loop will be predicated
+  /// contrary to original assumption. Although we currently prevent group
+  /// formation for predicated accesses, we may be able to relax this limitation
+  /// in the future once we handle more complicated blocks.
+  void reset() {
+    SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
+    // Avoid releasing a pointer twice.
+    for (auto &I : InterleaveGroupMap)
+      DelSet.insert(I.second);
+    for (auto *Ptr : DelSet)
+      delete Ptr;
+    InterleaveGroupMap.clear();
+    RequiresScalarEpilogue = false;
+  }
+
+
+  /// Check if \p Instr belongs to any interleave group.
+  bool isInterleaved(Instruction *Instr) const {
+    return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
+  }
+
+  /// Get the interleave group that \p Instr belongs to.
+  ///
+  /// \returns nullptr if doesn't have such group.
+  InterleaveGroup<Instruction> *
+  getInterleaveGroup(const Instruction *Instr) const {
+    if (InterleaveGroupMap.count(Instr))
+      return InterleaveGroupMap.find(Instr)->second;
+    return nullptr;
+  }
+
+  iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>
+  getInterleaveGroups() {
+    return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
+  }
+
+  /// Returns true if an interleaved group that may access memory
+  /// out-of-bounds requires a scalar epilogue iteration for correctness.
+  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
+
+  /// Invalidate groups that require a scalar epilogue (due to gaps). This can
+  /// happen when optimizing for size forbids a scalar epilogue, and the gap
+  /// cannot be filtered by masking the load/store.
+  void invalidateGroupsRequiringScalarEpilogue();
+
+private:
+  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+  /// The interleaved access analysis can also add new predicates (for example
+  /// by versioning strides of pointers).
+  PredicatedScalarEvolution &PSE;
+
+  Loop *TheLoop;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const LoopAccessInfo *LAI;
+
+  /// True if the loop may contain non-reversed interleaved groups with
+  /// out-of-bounds accesses. We ensure we don't speculatively access memory
+  /// out-of-bounds by executing at least one scalar epilogue iteration.
+  bool RequiresScalarEpilogue = false;
+
+  /// Holds the relationships between the members and the interleave group.
+  DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap;
+
+  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
+
+  /// Holds dependences among the memory accesses in the loop. It maps a source
+  /// access to a set of dependent sink accesses.
+  DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
+
+  /// The descriptor for a strided memory access.
+  struct StrideDescriptor {
+    StrideDescriptor() = default;
+    StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
+                     unsigned Align)
+        : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+
+    // The access's stride. It is negative for a reverse access.
+    int64_t Stride = 0;
+
+    // The scalar expression of this access.
+    const SCEV *Scev = nullptr;
+
+    // The size of the memory object.
+    uint64_t Size = 0;
+
+    // The alignment of this access.
+    unsigned Align = 0;
+  };
+
+  /// A type for holding instructions and their stride descriptors.
+  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
+
+  /// Create a new interleave group with the given instruction \p Instr,
+  /// stride \p Stride and alignment \p Align.
+  ///
+  /// \returns the newly created interleave group.
+  InterleaveGroup<Instruction> *
+  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
+    assert(!InterleaveGroupMap.count(Instr) &&
+           "Already in an interleaved access group");
+    InterleaveGroupMap[Instr] =
+        new InterleaveGroup<Instruction>(Instr, Stride, Align);
+    InterleaveGroups.insert(InterleaveGroupMap[Instr]);
+    return InterleaveGroupMap[Instr];
+  }
+
+  /// Release the group and remove all the relationships.
+  void releaseGroup(InterleaveGroup<Instruction> *Group) {
+    for (unsigned i = 0; i < Group->getFactor(); i++)
+      if (Instruction *Member = Group->getMember(i))
+        InterleaveGroupMap.erase(Member);
+
+    InterleaveGroups.erase(Group);
+    delete Group;
+  }
+
+  /// Collect all the accesses with a constant stride in program order.
+  void collectConstStrideAccesses(
+      MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+      const ValueToValueMap &Strides);
+
+  /// Returns true if \p Stride is allowed in an interleaved group.
+  static bool isStrided(int Stride);
+
+  /// Returns true if \p BB is a predicated block.
+  bool isPredicated(BasicBlock *BB) const {
+    return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
+  }
+
+  /// Returns true if LoopAccessInfo can be used for dependence queries.
+  bool areDependencesValid() const {
+    return LAI && LAI->getDepChecker().getDependences();
+  }
+
+  /// Returns true if memory accesses \p A and \p B can be reordered, if
+  /// necessary, when constructing interleaved groups.
+  ///
+  /// \p A must precede \p B in program order. We return false if reordering is
+  /// not necessary or is prevented because \p A and \p B may be dependent.
+  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
+                                                 StrideEntry *B) const {
+    // Code motion for interleaved accesses can potentially hoist strided loads
+    // and sink strided stores. The code below checks the legality of the
+    // following two conditions:
+    //
+    // 1. Potentially moving a strided load (B) before any store (A) that
+    //    precedes B, or
+    //
+    // 2. Potentially moving a strided store (A) after any load or store (B)
+    //    that A precedes.
+    //
+    // It's legal to reorder A and B if we know there isn't a dependence from A
+    // to B. Note that this determination is conservative since some
+    // dependences could potentially be reordered safely.
+
+    // A is potentially the source of a dependence.
+    auto *Src = A->first;
+    auto SrcDes = A->second;
+
+    // B is potentially the sink of a dependence.
+    auto *Sink = B->first;
+    auto SinkDes = B->second;
+
+    // Code motion for interleaved accesses can't violate WAR dependences.
+    // Thus, reordering is legal if the source isn't a write.
+    if (!Src->mayWriteToMemory())
+      return true;
+
+    // At least one of the accesses must be strided.
+    if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
+      return true;
+
+    // If dependence information is not available from LoopAccessInfo,
+    // conservatively assume the instructions can't be reordered.
+    if (!areDependencesValid())
+      return false;
+
+    // If we know there is a dependence from source to sink, assume the
+    // instructions can't be reordered. Otherwise, reordering is legal.
+    return Dependences.find(Src) == Dependences.end() ||
+           !Dependences.lookup(Src).count(Sink);
+  }
+
+  /// Collect the dependences from LoopAccessInfo.
+  ///
+  /// We process the dependences once during the interleaved access analysis to
+  /// enable constant-time dependence queries.
+  void collectDependences() {
+    if (!areDependencesValid())
+      return;
+    auto *Deps = LAI->getDepChecker().getDependences();
+    for (auto Dep : *Deps)
+      Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
+  }
+};
+
 } // llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h b/contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
new file mode 100644
index 000000000000..de44f41720ed
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
@@ -0,0 +1,70 @@
+//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is a verifier for AMDGPU HSA metadata, which can verify both
+/// well-typed metadata and untyped metadata. When verifying in the non-strict
+/// mode, untyped metadata is coerced into the correct type if possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
+#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
+
+#include "llvm/BinaryFormat/MsgPackTypes.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAMD {
+namespace V3 {
+
+/// Verifier for AMDGPU HSA metadata.
+///
+/// Operates in two modes:
+///
+/// In strict mode, metadata must already be well-typed.
+///
+/// In non-strict mode, metadata is coerced into expected types when possible.
+class MetadataVerifier {
+  bool Strict;
+
+  bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
+                    function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
+  bool verifyInteger(msgpack::Node &Node);
+  bool verifyArray(msgpack::Node &Node,
+                   function_ref<bool(msgpack::Node &)> verifyNode,
+                   Optional<size_t> Size = None);
+  bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
+                   function_ref<bool(msgpack::Node &)> verifyNode);
+  bool
+  verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
+                    msgpack::ScalarNode::ScalarKind SKind,
+                    function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
+  bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
+                          bool Required);
+  bool verifyKernelArgs(msgpack::Node &Node);
+  bool verifyKernel(msgpack::Node &Node);
+
+public:
+  /// Construct a MetadataVerifier, specifying whether it will operate in \p
+  /// Strict mode.
+  MetadataVerifier(bool Strict) : Strict(Strict) {}
+
+  /// Verify given HSA metadata.
+  ///
+  /// \returns True when successful, false when metadata is invalid.
+  bool verify(msgpack::Node &HSAMetadataRoot);
+};
+
+} // end namespace V3
+} // end namespace HSAMD
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def
index 944c5dd1c157..6ad3cb57f62f 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -18,9 +18,11 @@
     defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED ||             \
     defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE ||  \
     defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO ||                       \
-    defined HANDLE_DW_RLE || defined HANDLE_DW_CFA ||                          \
+    defined HANDLE_DW_RLE ||                                                   \
+    (defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) ||                   \
     defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT ||                \
-    defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX)
+    defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX ||                   \
+    defined HANDLE_DW_END)
 #error "Missing macro definition of HANDLE_DW*"
 #endif
 
@@ -41,7 +43,7 @@
 #endif
 
 #ifndef HANDLE_DW_LANG
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_ATE
@@ -84,6 +86,10 @@
 #define HANDLE_DW_CFA(ID, NAME)
 #endif
 
+#ifndef HANDLE_DW_CFA_PRED
+#define HANDLE_DW_CFA_PRED(ID, NAME, PRED)
+#endif
+
 #ifndef HANDLE_DW_APPLE_PROPERTY
 #define HANDLE_DW_APPLE_PROPERTY(ID, NAME)
 #endif
@@ -100,6 +106,10 @@
 #define HANDLE_DW_IDX(ID, NAME)
 #endif
 
+#ifndef HANDLE_DW_END
+#define HANDLE_DW_END(ID, NAME)
+#endif
+
 HANDLE_DW_TAG(0x0000, null, 2, DWARF)
 HANDLE_DW_TAG(0x0001, array_type, 2, DWARF)
 HANDLE_DW_TAG(0x0002, class_type, 2, DWARF)
@@ -622,50 +632,50 @@ HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
 HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
 
 // DWARF languages.
-HANDLE_DW_LANG(0x0001, C89, 2, DWARF)
-HANDLE_DW_LANG(0x0002, C, 2, DWARF)
-HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF)
-HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF)
-HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF)
-HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF)
-HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF)
-HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF)
-HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF)
-HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF)
+HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0002, C, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0003, Ada83, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0004, C_plus_plus, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0005, Cobol74, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0006, Cobol85, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0007, Fortran77, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0008, Fortran90, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0009, Pascal83, 1, 2, DWARF)
+HANDLE_DW_LANG(0x000a, Modula2, 1, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_LANG(0x000b, Java, 3, DWARF)
-HANDLE_DW_LANG(0x000c, C99, 3, DWARF)
-HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF)
-HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF)
-HANDLE_DW_LANG(0x000f, PLI, 3, DWARF)
-HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF)
-HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF)
-HANDLE_DW_LANG(0x0012, UPC, 3, DWARF)
-HANDLE_DW_LANG(0x0013, D, 3, DWARF)
+HANDLE_DW_LANG(0x000b, Java, 0, 3, DWARF)
+HANDLE_DW_LANG(0x000c, C99, 0, 3, DWARF)
+HANDLE_DW_LANG(0x000d, Ada95, 1, 3, DWARF)
+HANDLE_DW_LANG(0x000e, Fortran95, 1, 3, DWARF)
+HANDLE_DW_LANG(0x000f, PLI, 1, 3, DWARF)
+HANDLE_DW_LANG(0x0010, ObjC, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0012, UPC, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0013, D, 0, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_LANG(0x0014, Python, 4, DWARF)
+HANDLE_DW_LANG(0x0014, Python, 0, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF)
-HANDLE_DW_LANG(0x0016, Go, 5, DWARF)
-HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF)
-HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF)
-HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF)
-HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF)
-HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF)
-HANDLE_DW_LANG(0x001c, Rust, 5, DWARF)
-HANDLE_DW_LANG(0x001d, C11, 5, DWARF)
-HANDLE_DW_LANG(0x001e, Swift, 5, DWARF)
-HANDLE_DW_LANG(0x001f, Julia, 5, DWARF)
-HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF)
-HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF)
-HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF)
-HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF)
-HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF)
-HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF)
+HANDLE_DW_LANG(0x0015, OpenCL, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0016, Go, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0017, Modula3, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0018, Haskell, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0019, C_plus_plus_03, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001a, C_plus_plus_11, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001b, OCaml, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001c, Rust, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001d, C11, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001e, Swift, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001f, Julia, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0020, Dylan, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0021, C_plus_plus_14, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0022, Fortran03, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0023, Fortran08, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0024, RenderScript, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0025, BLISS, 0, 5, DWARF)
 // Vendor extensions:
-HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS)
-HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE)
-HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND)
+HANDLE_DW_LANG(0x8001, Mips_Assembler, None, 0, MIPS)
+HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, 0, GOOGLE)
+HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, 0, BORLAND)
 
 // DWARF attribute type encodings.
 HANDLE_DW_ATE(0x01, address, 2, DWARF)
@@ -690,6 +700,11 @@ HANDLE_DW_ATE(0x10, UTF, 4, DWARF)
 HANDLE_DW_ATE(0x11, UCS, 5, DWARF)
 HANDLE_DW_ATE(0x12, ASCII, 5, DWARF)
 
+// DWARF attribute endianity
+HANDLE_DW_END(0x00, default)
+HANDLE_DW_END(0x01, big)
+HANDLE_DW_END(0x02, little)
+
 // DWARF virtuality codes.
 HANDLE_DW_VIRTUALITY(0x00, none)
 HANDLE_DW_VIRTUALITY(0x01, virtual)
@@ -821,9 +836,10 @@ HANDLE_DW_CFA(0x14, val_offset)
 HANDLE_DW_CFA(0x15, val_offset_sf)
 HANDLE_DW_CFA(0x16, val_expression)
 // Vendor extensions:
-HANDLE_DW_CFA(0x1d, MIPS_advance_loc8)
-HANDLE_DW_CFA(0x2d, GNU_window_save)
-HANDLE_DW_CFA(0x2e, GNU_args_size)
+HANDLE_DW_CFA_PRED(0x1d, MIPS_advance_loc8, SELECT_MIPS64)
+HANDLE_DW_CFA_PRED(0x2d, GNU_window_save, SELECT_SPARC)
+HANDLE_DW_CFA_PRED(0x2d, AARCH64_negate_ra_state, SELECT_AARCH64)
+HANDLE_DW_CFA_PRED(0x2e, GNU_args_size, SELECT_X86)
 
 // Apple Objective-C Property Attributes.
 // Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind!
@@ -863,6 +879,7 @@ HANDLE_DWARF_SECTION(DebugTypes, ".debug_types", "debug-types")
 HANDLE_DWARF_SECTION(DebugLine, ".debug_line", "debug-line")
 HANDLE_DWARF_SECTION(DebugLineStr, ".debug_line_str", "debug-line-str")
 HANDLE_DWARF_SECTION(DebugLoc, ".debug_loc", "debug-loc")
+HANDLE_DWARF_SECTION(DebugLoclists, ".debug_loclists", "debug-loclists")
 HANDLE_DWARF_SECTION(DebugFrame, ".debug_frame", "debug-frame")
 HANDLE_DWARF_SECTION(DebugMacro, ".debug_macro", "debug-macro")
 HANDLE_DWARF_SECTION(DebugNames, ".debug_names", "debug-names")
@@ -905,7 +922,9 @@ HANDLE_DW_IDX(0x05, type_hash)
 #undef HANDLE_DW_MACRO
 #undef HANDLE_DW_RLE
 #undef HANDLE_DW_CFA
+#undef HANDLE_DW_CFA_PRED
 #undef HANDLE_DW_APPLE_PROPERTY
 #undef HANDLE_DW_UT
 #undef HANDLE_DWARF_SECTION
 #undef HANDLE_DW_IDX
+#undef HANDLE_DW_END
diff --git a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h
index 9036f405eaea..525a04d5e6cf 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -26,6 +26,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/ADT/Triple.h"
 
 namespace llvm {
 class StringRef;
@@ -150,9 +151,8 @@ enum DecimalSignEncoding {
 
 enum EndianityEncoding {
   // Endianity attribute values
-  DW_END_default = 0x00,
-  DW_END_big = 0x01,
-  DW_END_little = 0x02,
+#define HANDLE_DW_END(ID, NAME) DW_END_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_END_lo_user = 0x40,
   DW_END_hi_user = 0xff
 };
@@ -184,7 +184,8 @@ enum DefaultedMemberAttribute {
 };
 
 enum SourceLanguage {
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID,
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
+  DW_LANG_##NAME = ID,
 #include "llvm/BinaryFormat/Dwarf.def"
   DW_LANG_lo_user = 0x8000,
   DW_LANG_hi_user = 0xffff
@@ -273,6 +274,7 @@ enum RangeListEntries {
 /// Call frame instruction encodings.
 enum CallFrameInfo {
 #define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID,
+#define HANDLE_DW_CFA_PRED(ID, NAME, ARCH) DW_CFA_##NAME = ID,
 #include "llvm/BinaryFormat/Dwarf.def"
   DW_CFA_extended = 0x00,
 
@@ -431,7 +433,7 @@ StringRef LNStandardString(unsigned Standard);
 StringRef LNExtendedString(unsigned Encoding);
 StringRef MacinfoString(unsigned Encoding);
 StringRef RangeListEncodingString(unsigned Encoding);
-StringRef CallFrameString(unsigned Encoding);
+StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch);
 StringRef ApplePropertyString(unsigned);
 StringRef UnitTypeString(unsigned);
 StringRef AtomTypeString(unsigned Atom);
@@ -489,6 +491,8 @@ unsigned AttributeEncodingVendor(TypeKind E);
 unsigned LanguageVendor(SourceLanguage L);
 /// @}
 
+Optional<unsigned> LanguageLowerBound(SourceLanguage L);
+
 /// A helper struct providing information about the byte size of DW_FORM
 /// values that vary in size depending on the DWARF version, address byte
 /// size, or DWARF32/DWARF64.
diff --git a/contrib/llvm/include/llvm/BinaryFormat/ELF.h b/contrib/llvm/include/llvm/BinaryFormat/ELF.h
index 2e778779117b..ce35d127d433 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/ELF.h
@@ -582,6 +582,7 @@ enum {
   EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60
   EF_HEXAGON_MACH_V62 = 0x00000062, // Hexagon V62
   EF_HEXAGON_MACH_V65 = 0x00000065, // Hexagon V65
+  EF_HEXAGON_MACH_V66 = 0x00000066, // Hexagon V66
 
   // Highest ISA version flags
   EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0]
@@ -594,6 +595,7 @@ enum {
   EF_HEXAGON_ISA_V60 = 0x00000060,  // Hexagon V60 ISA
   EF_HEXAGON_ISA_V62 = 0x00000062,  // Hexagon V62 ISA
   EF_HEXAGON_ISA_V65 = 0x00000065,  // Hexagon V65 ISA
+  EF_HEXAGON_ISA_V66 = 0x00000066,  // Hexagon V66 ISA
 };
 
 // Hexagon-specific section indexes for common small data
@@ -701,6 +703,7 @@ enum : unsigned {
   EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
   EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
   EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+  EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
 
   // Reserved for AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
@@ -708,11 +711,14 @@ enum : unsigned {
 
   // First/last AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
-  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX906,
+  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
 
-  // Indicates if the xnack target feature is enabled for all code contained in
-  // the object.
+  // Indicates if the "xnack" target feature is enabled for all code contained
+  // in the object.
   EF_AMDGPU_XNACK = 0x100,
+  // Indicates if the "sram-ecc" target feature is enabled for all code
+  // contained in the object.
+  EF_AMDGPU_SRAM_ECC = 0x200,
 };
 
 // ELF Relocation types for AMDGPU
@@ -725,6 +731,38 @@ enum {
 #include "ELFRelocs/BPF.def"
 };
 
+// MSP430 specific e_flags
+enum : unsigned {
+  EF_MSP430_MACH_MSP430x11 = 11,
+  EF_MSP430_MACH_MSP430x11x1 = 110,
+  EF_MSP430_MACH_MSP430x12 = 12,
+  EF_MSP430_MACH_MSP430x13 = 13,
+  EF_MSP430_MACH_MSP430x14 = 14,
+  EF_MSP430_MACH_MSP430x15 = 15,
+  EF_MSP430_MACH_MSP430x16 = 16,
+  EF_MSP430_MACH_MSP430x20 = 20,
+  EF_MSP430_MACH_MSP430x22 = 22,
+  EF_MSP430_MACH_MSP430x23 = 23,
+  EF_MSP430_MACH_MSP430x24 = 24,
+  EF_MSP430_MACH_MSP430x26 = 26,
+  EF_MSP430_MACH_MSP430x31 = 31,
+  EF_MSP430_MACH_MSP430x32 = 32,
+  EF_MSP430_MACH_MSP430x33 = 33,
+  EF_MSP430_MACH_MSP430x41 = 41,
+  EF_MSP430_MACH_MSP430x42 = 42,
+  EF_MSP430_MACH_MSP430x43 = 43,
+  EF_MSP430_MACH_MSP430x44 = 44,
+  EF_MSP430_MACH_MSP430X = 45,
+  EF_MSP430_MACH_MSP430x46 = 46,
+  EF_MSP430_MACH_MSP430x47 = 47,
+  EF_MSP430_MACH_MSP430x54 = 54,
+};
+
+// ELF Relocation types for MSP430
+enum {
+#include "ELFRelocs/MSP430.def"
+};
+
 #undef ELF_RELOC
 
 // Section header.
@@ -829,6 +867,8 @@ enum : unsigned {
   SHT_MIPS_DWARF = 0x7000001e,          // DWARF debugging section.
   SHT_MIPS_ABIFLAGS = 0x7000002a,       // ABI information.
 
+  SHT_MSP430_ATTRIBUTES = 0x70000003U,
+
   SHT_HIPROC = 0x7fffffff,              // Highest processor arch-specific type.
   SHT_LOUSER = 0x80000000,              // Lowest type reserved for applications.
   SHT_HIUSER = 0xffffffff               // Highest type reserved for applications.
@@ -1321,7 +1361,7 @@ enum {
   GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
 };
 
-// AMDGPU specific notes.
+// AMD specific notes. (Code Object V2)
 enum {
   // Note types with values between 0 and 9 (inclusive) are reserved.
   NT_AMD_AMDGPU_HSA_METADATA = 10,
@@ -1329,6 +1369,12 @@ enum {
   NT_AMD_AMDGPU_PAL_METADATA = 12
 };
 
+// AMDGPU specific notes. (Code Object V3)
+enum {
+  // Note types with values between 0 and 31 (inclusive) are reserved.
+  NT_AMDGPU_METADATA = 32
+};
+
 enum {
   GNU_ABI_TAG_LINUX = 0,
   GNU_ABI_TAG_HURD = 1,
@@ -1339,6 +1385,8 @@ enum {
   GNU_ABI_TAG_NACL = 6,
 };
 
+constexpr const char *ELF_NOTE_GNU = "GNU";
+
 // Android packed relocation group flags.
 enum {
   RELOCATION_GROUPED_BY_INFO_FLAG = 1,
diff --git a/contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def b/contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def
new file mode 100644
index 000000000000..96990abf2db4
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/ELFRelocs/MSP430.def
@@ -0,0 +1,16 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_MSP430_NONE,               0)
+ELF_RELOC(R_MSP430_32,                 1)
+ELF_RELOC(R_MSP430_10_PCREL,           2)
+ELF_RELOC(R_MSP430_16,                 3)
+ELF_RELOC(R_MSP430_16_PCREL,           4)
+ELF_RELOC(R_MSP430_16_BYTE,            5)
+ELF_RELOC(R_MSP430_16_PCREL_BYTE,      6)
+ELF_RELOC(R_MSP430_2X_PCREL,           7)
+ELF_RELOC(R_MSP430_RL_PCREL,           8)
+ELF_RELOC(R_MSP430_8,                  9)
+ELF_RELOC(R_MSP430_SYM_DIFF,           10)
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MachO.h b/contrib/llvm/include/llvm/BinaryFormat/MachO.h
index c5294c76ebf7..b3d60984249f 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/MachO.h
@@ -486,7 +486,10 @@ enum PlatformType {
   PLATFORM_IOS = 2,
   PLATFORM_TVOS = 3,
   PLATFORM_WATCHOS = 4,
-  PLATFORM_BRIDGEOS = 5
+  PLATFORM_BRIDGEOS = 5,
+  PLATFORM_IOSSIMULATOR = 7,
+  PLATFORM_TVOSSIMULATOR = 8,
+  PLATFORM_WATCHOSSIMULATOR = 9
 };
 
 // Values for tools enum in build_tool_version.
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPack.def b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.def
new file mode 100644
index 000000000000..781b49f46aeb
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.def
@@ -0,0 +1,108 @@
+//===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Macros for running through MessagePack enumerators.
+///
+//===----------------------------------------------------------------------===//
+
+#if !(                                                                         \
+    defined HANDLE_MP_FIRST_BYTE || defined HANDLE_MP_FIX_BITS ||              \
+    defined HANDLE_MP_FIX_BITS_MASK || defined HANDLE_MP_FIX_MAX ||            \
+    defined HANDLE_MP_FIX_LEN || defined HANDLE_MP_FIX_MIN)
+#error "Missing macro definition of HANDLE_MP*"
+#endif
+
+#ifndef HANDLE_MP_FIRST_BYTE
+#define HANDLE_MP_FIRST_BYTE(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_BITS
+#define HANDLE_MP_FIX_BITS(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_BITS_MASK
+#define HANDLE_MP_FIX_BITS_MASK(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_MAX
+#define HANDLE_MP_FIX_MAX(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_LEN
+#define HANDLE_MP_FIX_LEN(ID, NAME)
+#endif
+
+#ifndef HANDLE_MP_FIX_MIN
+#define HANDLE_MP_FIX_MIN(ID, NAME)
+#endif
+
+HANDLE_MP_FIRST_BYTE(0xc0, Nil)
+HANDLE_MP_FIRST_BYTE(0xc2, False)
+HANDLE_MP_FIRST_BYTE(0xc3, True)
+HANDLE_MP_FIRST_BYTE(0xc4, Bin8)
+HANDLE_MP_FIRST_BYTE(0xc5, Bin16)
+HANDLE_MP_FIRST_BYTE(0xc6, Bin32)
+HANDLE_MP_FIRST_BYTE(0xc7, Ext8)
+HANDLE_MP_FIRST_BYTE(0xc8, Ext16)
+HANDLE_MP_FIRST_BYTE(0xc9, Ext32)
+HANDLE_MP_FIRST_BYTE(0xca, Float32)
+HANDLE_MP_FIRST_BYTE(0xcb, Float64)
+HANDLE_MP_FIRST_BYTE(0xcc, UInt8)
+HANDLE_MP_FIRST_BYTE(0xcd, UInt16)
+HANDLE_MP_FIRST_BYTE(0xce, UInt32)
+HANDLE_MP_FIRST_BYTE(0xcf, UInt64)
+HANDLE_MP_FIRST_BYTE(0xd0, Int8)
+HANDLE_MP_FIRST_BYTE(0xd1, Int16)
+HANDLE_MP_FIRST_BYTE(0xd2, Int32)
+HANDLE_MP_FIRST_BYTE(0xd3, Int64)
+HANDLE_MP_FIRST_BYTE(0xd4, FixExt1)
+HANDLE_MP_FIRST_BYTE(0xd5, FixExt2)
+HANDLE_MP_FIRST_BYTE(0xd6, FixExt4)
+HANDLE_MP_FIRST_BYTE(0xd7, FixExt8)
+HANDLE_MP_FIRST_BYTE(0xd8, FixExt16)
+HANDLE_MP_FIRST_BYTE(0xd9, Str8)
+HANDLE_MP_FIRST_BYTE(0xda, Str16)
+HANDLE_MP_FIRST_BYTE(0xdb, Str32)
+HANDLE_MP_FIRST_BYTE(0xdc, Array16)
+HANDLE_MP_FIRST_BYTE(0xdd, Array32)
+HANDLE_MP_FIRST_BYTE(0xde, Map16)
+HANDLE_MP_FIRST_BYTE(0xdf, Map32)
+
+HANDLE_MP_FIX_BITS(0x00, PositiveInt)
+HANDLE_MP_FIX_BITS(0x80, Map)
+HANDLE_MP_FIX_BITS(0x90, Array)
+HANDLE_MP_FIX_BITS(0xa0, String)
+HANDLE_MP_FIX_BITS(0xe0, NegativeInt)
+
+HANDLE_MP_FIX_BITS_MASK(0x80, PositiveInt)
+HANDLE_MP_FIX_BITS_MASK(0xf0, Map)
+HANDLE_MP_FIX_BITS_MASK(0xf0, Array)
+HANDLE_MP_FIX_BITS_MASK(0xe0, String)
+HANDLE_MP_FIX_BITS_MASK(0xe0, NegativeInt)
+
+HANDLE_MP_FIX_MAX(0x7f, PositiveInt)
+HANDLE_MP_FIX_MAX(0x0f, Map)
+HANDLE_MP_FIX_MAX(0x0f, Array)
+HANDLE_MP_FIX_MAX(0x1f, String)
+
+HANDLE_MP_FIX_LEN(0x01, Ext1)
+HANDLE_MP_FIX_LEN(0x02, Ext2)
+HANDLE_MP_FIX_LEN(0x04, Ext4)
+HANDLE_MP_FIX_LEN(0x08, Ext8)
+HANDLE_MP_FIX_LEN(0x10, Ext16)
+
+HANDLE_MP_FIX_MIN(-0x20, NegativeInt)
+
+#undef HANDLE_MP_FIRST_BYTE
+#undef HANDLE_MP_FIX_BITS
+#undef HANDLE_MP_FIX_BITS_MASK
+#undef HANDLE_MP_FIX_MAX
+#undef HANDLE_MP_FIX_LEN
+#undef HANDLE_MP_FIX_MIN
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPack.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.h
new file mode 100644
index 000000000000..d431912a53e5
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPack.h
@@ -0,0 +1,93 @@
+//===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains constants used for implementing MessagePack support.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MSGPACK_H
+#define LLVM_BINARYFORMAT_MSGPACK_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace msgpack {
+
+/// The endianness of all multi-byte encoded values in MessagePack.
+constexpr support::endianness Endianness = support::big;
+
+/// The first byte identifiers of MessagePack object formats.
+namespace FirstByte {
+#define HANDLE_MP_FIRST_BYTE(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// Most significant bits used to identify "Fix" variants in MessagePack.
+///
+/// For example, FixStr objects encode their size in the five least significant
+/// bits of their first byte, which is identified by the bit pattern "101" in
+/// the three most significant bits. So FixBits::String contains 0b10100000.
+///
+/// A corresponding mask of the bit pattern is found in \c FixBitsMask.
+namespace FixBits {
+#define HANDLE_MP_FIX_BITS(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// Mask of bits used to identify "Fix" variants in MessagePack.
+///
+/// For example, FixStr objects encode their size in the five least significant
+/// bits of their first byte, which is identified by the bit pattern "101" in
+/// the three most significant bits. So FixBitsMask::String contains
+/// 0b11100000.
+///
+/// The corresponding bit pattern to mask for is found in FixBits.
+namespace FixBitsMask {
+#define HANDLE_MP_FIX_BITS_MASK(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// The maximum value or size encodable in "Fix" variants of formats.
+///
+/// For example, FixStr objects encode their size in the five least significant
+/// bits of their first byte, so the largest encodable size is 0b00011111.
+namespace FixMax {
+#define HANDLE_MP_FIX_MAX(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// The exact size encodable in "Fix" variants of formats.
+///
+/// The only objects for which an exact size makes sense are of Extension type.
+///
+/// For example, FixExt4 stores an extension type containing exactly four bytes.
+namespace FixLen {
+#define HANDLE_MP_FIX_LEN(ID, NAME) constexpr uint8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+/// The minimum value or size encodable in "Fix" variants of formats.
+///
+/// The only object for which a minimum makes sense is a negative FixNum.
+///
+/// Negative FixNum objects encode their signed integer value in one byte, but
+/// they must have the pattern "111" as their three most significant bits. This
+/// means all values are negative, and the smallest representable value is
+/// 0b11100000.
+namespace FixMin {
+#define HANDLE_MP_FIX_MIN(ID, NAME) constexpr int8_t NAME = ID;
+#include "llvm/BinaryFormat/MsgPack.def"
+}
+
+} // end namespace msgpack
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MSGPACK_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h
new file mode 100644
index 000000000000..511c31407455
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPackReader.h
@@ -0,0 +1,148 @@
+//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file
+///  This is a MessagePack reader.
+///
+///  See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
+///  standard.
+///
+///  Typical usage:
+///  \code
+///  StringRef input = GetInput();
+///  msgpack::Reader MPReader(input);
+///  msgpack::Object Obj;
+///
+///  while (MPReader.read(Obj)) {
+///    switch (Obj.Kind) {
+///    case msgpack::Type::Int:
+//       // Use Obj.Int
+///      break;
+///    // ...
+///    }
+///  }
+///  \endcode
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSGPACKREADER_H
+#define LLVM_SUPPORT_MSGPACKREADER_H
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+namespace msgpack {
+
+/// MessagePack types as defined in the standard, with the exception of Integer
+/// being divided into a signed Int and unsigned UInt variant in order to map
+/// directly to C++ types.
+///
+/// The types map onto corresponding union members of the \c Object struct.
+enum class Type : uint8_t {
+  Int,
+  UInt,
+  Nil,
+  Boolean,
+  Float,
+  String,
+  Binary,
+  Array,
+  Map,
+  Extension,
+};
+
+/// Extension types are composed of a user-defined type ID and an uninterpreted
+/// sequence of bytes.
+struct ExtensionType {
+  /// User-defined extension type.
+  int8_t Type;
+  /// Raw bytes of the extension object.
+  StringRef Bytes;
+};
+
+/// MessagePack object, represented as a tagged union of C++ types.
+///
+/// All types except \c Type::Nil (which has only one value, and so is
+/// completely represented by the \c Kind itself) map to a exactly one union
+/// member.
+struct Object {
+  Type Kind;
+  union {
+    /// Value for \c Type::Int.
+    int64_t Int;
+    /// Value for \c Type::Uint.
+    uint64_t UInt;
+    /// Value for \c Type::Boolean.
+    bool Bool;
+    /// Value for \c Type::Float.
+    double Float;
+    /// Value for \c Type::String and \c Type::Binary.
+    StringRef Raw;
+    /// Value for \c Type::Array and \c Type::Map.
+    size_t Length;
+    /// Value for \c Type::Extension.
+    ExtensionType Extension;
+  };
+
+  Object() : Kind(Type::Int), Int(0) {}
+};
+
+/// Reads MessagePack objects from memory, one at a time.
+class Reader {
+public:
+  /// Construct a reader, keeping a reference to the \p InputBuffer.
+  Reader(MemoryBufferRef InputBuffer);
+  /// Construct a reader, keeping a reference to the \p Input.
+  Reader(StringRef Input);
+
+  Reader(const Reader &) = delete;
+  Reader &operator=(const Reader &) = delete;
+
+  /// Read one object from the input buffer, advancing past it.
+  ///
+  /// The \p Obj is updated with the kind of the object read, and the
+  /// corresponding union member is updated.
+  ///
+  /// For the collection objects (Array and Map), only the length is read, and
+  /// the caller must make and additional \c N calls (in the case of Array) or
+  /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
+  /// elements.
+  ///
+  /// \param [out] Obj filled with next object on success.
+  ///
+  /// \returns true when object successfully read, false when at end of
+  /// input (and so \p Obj was not updated), otherwise an error.
+  Expected<bool> read(Object &Obj);
+
+private:
+  MemoryBufferRef InputBuffer;
+  StringRef::iterator Current;
+  StringRef::iterator End;
+
+  size_t remainingSpace() {
+    // The rest of the code maintains the invariant that End >= Current, so
+    // that this cast is always defined behavior.
+    return static_cast<size_t>(End - Current);
+  }
+
+  template <class T> Expected<bool> readRaw(Object &Obj);
+  template <class T> Expected<bool> readInt(Object &Obj);
+  template <class T> Expected<bool> readUInt(Object &Obj);
+  template <class T> Expected<bool> readLength(Object &Obj);
+  template <class T> Expected<bool> readExt(Object &Obj);
+  Expected<bool> createRaw(Object &Obj, uint32_t Size);
+  Expected<bool> createExt(Object &Obj, uint32_t Size);
+};
+
+} // end namespace msgpack
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_MSGPACKREADER_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h
new file mode 100644
index 000000000000..f96cd4c338fd
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPackTypes.h
@@ -0,0 +1,372 @@
+//===- MsgPackTypes.h - MsgPack Types ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is a data structure for representing MessagePack "documents", with
+/// methods to go to and from MessagePack. The types also specialize YAMLIO
+/// traits in order to go to and from YAML.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+
+#ifndef LLVM_BINARYFORMAT_MSGPACKTYPES_H
+#define LLVM_BINARYFORMAT_MSGPACKTYPES_H
+
+namespace llvm {
+namespace msgpack {
+
+class Node;
+
+/// Short-hand for a Node pointer.
+using NodePtr = std::shared_ptr<Node>;
+
+/// Short-hand for an Optional Node pointer.
+using OptNodePtr = Optional<NodePtr>;
+
+/// Abstract base-class which can be any MessagePack type.
+class Node {
+public:
+  enum NodeKind {
+    NK_Scalar,
+    NK_Array,
+    NK_Map,
+  };
+
+private:
+  virtual void anchor() = 0;
+  const NodeKind Kind;
+
+  static Expected<OptNodePtr> readArray(Reader &MPReader, size_t Length);
+  static Expected<OptNodePtr> readMap(Reader &MPReader, size_t Length);
+
+public:
+  NodeKind getKind() const { return Kind; }
+
+  /// Construct a Node. Used by derived classes to track kind information.
+  Node(NodeKind Kind) : Kind(Kind) {}
+
+  virtual ~Node() = default;
+
+  /// Read from a MessagePack reader \p MPReader, returning an error if one is
+  /// encountered, or None if \p MPReader is at the end of stream, or some Node
+  /// pointer if some type is read.
+  static Expected<OptNodePtr> read(Reader &MPReader);
+
+  /// Write to a MessagePack writer \p MPWriter.
+  virtual void write(Writer &MPWriter) = 0;
+};
+
+/// A MessagePack scalar.
+class ScalarNode : public Node {
+public:
+  enum ScalarKind {
+    SK_Int,
+    SK_UInt,
+    SK_Nil,
+    SK_Boolean,
+    SK_Float,
+    SK_String,
+    SK_Binary,
+  };
+
+private:
+  void anchor() override;
+
+  void destroy();
+
+  ScalarKind SKind;
+
+  union {
+    int64_t IntValue;
+    uint64_t UIntValue;
+    bool BoolValue;
+    double FloatValue;
+    std::string StringValue;
+  };
+
+public:
+  /// Construct an Int ScalarNode.
+  ScalarNode(int64_t IntValue);
+  /// Construct an Int ScalarNode.
+  ScalarNode(int32_t IntValue);
+  /// Construct an UInt ScalarNode.
+  ScalarNode(uint64_t UIntValue);
+  /// Construct an UInt ScalarNode.
+  ScalarNode(uint32_t UIntValue);
+  /// Construct a Nil ScalarNode.
+  ScalarNode();
+  /// Construct a Boolean ScalarNode.
+  ScalarNode(bool BoolValue);
+  /// Construct a Float ScalarNode.
+  ScalarNode(double FloatValue);
+  /// Construct a String ScalarNode.
+  ScalarNode(StringRef StringValue);
+  /// Construct a String ScalarNode.
+  ScalarNode(const char *StringValue);
+  /// Construct a String ScalarNode.
+  ScalarNode(std::string &&StringValue);
+  /// Construct a Binary ScalarNode.
+  ScalarNode(MemoryBufferRef BinaryValue);
+
+  ~ScalarNode();
+
+  ScalarNode &operator=(const ScalarNode &RHS) = delete;
+  /// A ScalarNode can only be move assigned.
+  ScalarNode &operator=(ScalarNode &&RHS);
+
+  /// Change the kind of this ScalarNode, zero initializing it to the new type.
+  void setScalarKind(ScalarKind SKind) {
+    switch (SKind) {
+    case SK_Int:
+      *this = int64_t(0);
+      break;
+    case SK_UInt:
+      *this = uint64_t(0);
+      break;
+    case SK_Boolean:
+      *this = false;
+      break;
+    case SK_Float:
+      *this = 0.0;
+      break;
+    case SK_String:
+      *this = StringRef();
+      break;
+    case SK_Binary:
+      *this = MemoryBufferRef("", "");
+      break;
+    case SK_Nil:
+      *this = ScalarNode();
+      break;
+    }
+  }
+
+  /// Get the current kind of ScalarNode.
+  ScalarKind getScalarKind() { return SKind; }
+
+  /// Get the value of an Int scalar.
+  ///
+  /// \warning Assumes getScalarKind() == SK_Int
+  int64_t getInt() {
+    assert(SKind == SK_Int);
+    return IntValue;
+  }
+
+  /// Get the value of a UInt scalar.
+  ///
+  /// \warning Assumes getScalarKind() == SK_UInt
+  uint64_t getUInt() {
+    assert(SKind == SK_UInt);
+    return UIntValue;
+  }
+
+  /// Get the value of an Boolean scalar.
+  ///
+  /// \warning Assumes getScalarKind() == SK_Boolean
+  bool getBool() {
+    assert(SKind == SK_Boolean);
+    return BoolValue;
+  }
+
+  /// Get the value of an Float scalar.
+  ///
+  /// \warning Assumes getScalarKind() == SK_Float
+  double getFloat() {
+    assert(SKind == SK_Float);
+    return FloatValue;
+  }
+
+  /// Get the value of a String scalar.
+  ///
+  /// \warning Assumes getScalarKind() == SK_String
+  StringRef getString() {
+    assert(SKind == SK_String);
+    return StringValue;
+  }
+
+  /// Get the value of a Binary scalar.
+  ///
+  /// \warning Assumes getScalarKind() == SK_Binary
+  StringRef getBinary() {
+    assert(SKind == SK_Binary);
+    return StringValue;
+  }
+
+  static bool classof(const Node *N) { return N->getKind() == NK_Scalar; }
+
+  void write(Writer &MPWriter) override;
+
+  /// Parse a YAML scalar of the current ScalarKind from \p ScalarStr.
+  ///
+  /// \returns An empty string on success, otherwise an error message.
+  StringRef inputYAML(StringRef ScalarStr);
+
+  /// Output a YAML scalar of the current ScalarKind into \p OS.
+  void outputYAML(raw_ostream &OS) const;
+
+  /// Determine which YAML quoting type the current value would need when
+  /// output.
+  yaml::QuotingType mustQuoteYAML(StringRef ScalarStr) const;
+
+  /// Get the YAML tag for the current ScalarKind.
+  StringRef getYAMLTag() const;
+
+  /// Flag which affects how the type handles YAML tags when reading and
+  /// writing.
+  ///
+  /// When false, tags are used when reading and writing. When reading, the tag
+  /// is used to decide the ScalarKind before parsing. When writing, the tag is
+  /// output along with the value.
+  ///
+  /// When true, tags are ignored when reading and writing. When reading, the
+  /// ScalarKind is always assumed to be String. When writing, the tag is not
+  /// output.
+  bool IgnoreTag = false;
+
+  static const char *IntTag;
+  static const char *NilTag;
+  static const char *BooleanTag;
+  static const char *FloatTag;
+  static const char *StringTag;
+  static const char *BinaryTag;
+};
+
+class ArrayNode : public Node, public std::vector<NodePtr> {
+  void anchor() override;
+
+public:
+  ArrayNode() : Node(NK_Array) {}
+  static bool classof(const Node *N) { return N->getKind() == NK_Array; }
+
+  void write(Writer &MPWriter) override {
+    MPWriter.writeArraySize(this->size());
+    for (auto &N : *this)
+      N->write(MPWriter);
+  }
+};
+
+class MapNode : public Node, public StringMap<NodePtr> {
+  void anchor() override;
+
+public:
+  MapNode() : Node(NK_Map) {}
+  static bool classof(const Node *N) { return N->getKind() == NK_Map; }
+
+  void write(Writer &MPWriter) override {
+    MPWriter.writeMapSize(this->size());
+    for (auto &N : *this) {
+      MPWriter.write(N.first());
+      N.second->write(MPWriter);
+    }
+  }
+};
+
+} // end namespace msgpack
+
+namespace yaml {
+
+template <> struct PolymorphicTraits<msgpack::NodePtr> {
+  static NodeKind getKind(const msgpack::NodePtr &N) {
+    if (isa<msgpack::ScalarNode>(*N))
+      return NodeKind::Scalar;
+    if (isa<msgpack::MapNode>(*N))
+      return NodeKind::Map;
+    if (isa<msgpack::ArrayNode>(*N))
+      return NodeKind::Sequence;
+    llvm_unreachable("NodeKind not supported");
+  }
+  static msgpack::ScalarNode &getAsScalar(msgpack::NodePtr &N) {
+    if (!N || !isa<msgpack::ScalarNode>(*N))
+      N.reset(new msgpack::ScalarNode());
+    return *cast<msgpack::ScalarNode>(N.get());
+  }
+  static msgpack::MapNode &getAsMap(msgpack::NodePtr &N) {
+    if (!N || !isa<msgpack::MapNode>(*N))
+      N.reset(new msgpack::MapNode());
+    return *cast<msgpack::MapNode>(N.get());
+  }
+  static msgpack::ArrayNode &getAsSequence(msgpack::NodePtr &N) {
+    if (!N || !isa<msgpack::ArrayNode>(*N))
+      N.reset(new msgpack::ArrayNode());
+    return *cast<msgpack::ArrayNode>(N.get());
+  }
+};
+
+template <> struct TaggedScalarTraits<msgpack::ScalarNode> {
+  static void output(const msgpack::ScalarNode &S, void *Ctxt,
+                     raw_ostream &ScalarOS, raw_ostream &TagOS) {
+    if (!S.IgnoreTag)
+      TagOS << S.getYAMLTag();
+    S.outputYAML(ScalarOS);
+  }
+
+  static StringRef input(StringRef ScalarStr, StringRef Tag, void *Ctxt,
+                         msgpack::ScalarNode &S) {
+    if (Tag == msgpack::ScalarNode::IntTag) {
+      S.setScalarKind(msgpack::ScalarNode::SK_UInt);
+      if (S.inputYAML(ScalarStr) == StringRef())
+        return StringRef();
+      S.setScalarKind(msgpack::ScalarNode::SK_Int);
+      return S.inputYAML(ScalarStr);
+    }
+
+    if (S.IgnoreTag || Tag == msgpack::ScalarNode::StringTag ||
+        Tag == "tag:yaml.org,2002:str")
+      S.setScalarKind(msgpack::ScalarNode::SK_String);
+    else if (Tag == msgpack::ScalarNode::NilTag)
+      S.setScalarKind(msgpack::ScalarNode::SK_Nil);
+    else if (Tag == msgpack::ScalarNode::BooleanTag)
+      S.setScalarKind(msgpack::ScalarNode::SK_Boolean);
+    else if (Tag == msgpack::ScalarNode::FloatTag)
+      S.setScalarKind(msgpack::ScalarNode::SK_Float);
+    else if (Tag == msgpack::ScalarNode::StringTag)
+      S.setScalarKind(msgpack::ScalarNode::SK_String);
+    else if (Tag == msgpack::ScalarNode::BinaryTag)
+      S.setScalarKind(msgpack::ScalarNode::SK_Binary);
+    else
+      return "Unsupported messagepack tag";
+
+    return S.inputYAML(ScalarStr);
+  }
+
+  static QuotingType mustQuote(const msgpack::ScalarNode &S, StringRef Str) {
+    return S.mustQuoteYAML(Str);
+  }
+};
+
+template <> struct CustomMappingTraits<msgpack::MapNode> {
+  static void inputOne(IO &IO, StringRef Key, msgpack::MapNode &M) {
+    IO.mapRequired(Key.str().c_str(), M[Key]);
+  }
+  static void output(IO &IO, msgpack::MapNode &M) {
+    for (auto &N : M)
+      IO.mapRequired(N.getKey().str().c_str(), N.getValue());
+  }
+};
+
+template <> struct SequenceTraits<msgpack::ArrayNode> {
+  static size_t size(IO &IO, msgpack::ArrayNode &A) { return A.size(); }
+  static msgpack::NodePtr &element(IO &IO, msgpack::ArrayNode &A,
+                                   size_t Index) {
+    if (Index >= A.size())
+      A.resize(Index + 1);
+    return A[Index];
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+#endif //  LLVM_BINARYFORMAT_MSGPACKTYPES_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h b/contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
new file mode 100644
index 000000000000..98af422c9f19
--- /dev/null
+++ b/contrib/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
@@ -0,0 +1,131 @@
+//===- MsgPackWriter.h - Simple MsgPack writer ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file
+///  This file contains a MessagePack writer.
+///
+///  See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
+///  specification.
+///
+///  Typical usage:
+///  \code
+///  raw_ostream output = GetOutputStream();
+///  msgpack::Writer MPWriter(output);
+///  MPWriter.writeNil();
+///  MPWriter.write(false);
+///  MPWriter.write("string");
+///  // ...
+///  \endcode
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSGPACKPARSER_H
+#define LLVM_SUPPORT_MSGPACKPARSER_H
+
+#include "llvm/BinaryFormat/MsgPack.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace msgpack {
+
+/// Writes MessagePack objects to an output stream, one at a time.
+class Writer {
+public:
+  /// Construct a writer, optionally enabling "Compatibility Mode" as defined
+  /// in the MessagePack specification.
+  ///
+  /// When in \p Compatible mode, the writer will write \c Str16 formats
+  /// instead of \c Str8 formats, and will refuse to write any \c Bin formats.
+  ///
+  /// \param OS stream to output MessagePack objects to.
+  /// \param Compatible when set, write in "Compatibility Mode".
+  Writer(raw_ostream &OS, bool Compatible = false);
+
+  Writer(const Writer &) = delete;
+  Writer &operator=(const Writer &) = delete;
+
+  /// Write a \em Nil to the output stream.
+  ///
+  /// The output will be the \em nil format.
+  void writeNil();
+
+  /// Write a \em Boolean to the output stream.
+  ///
+  /// The output will be a \em bool format.
+  void write(bool b);
+
+  /// Write a signed integer to the output stream.
+  ///
+  /// The output will be in the smallest possible \em int format.
+  ///
+  /// The format chosen may be for an unsigned integer.
+  void write(int64_t i);
+
+  /// Write an unsigned integer to the output stream.
+  ///
+  /// The output will be in the smallest possible \em int format.
+  void write(uint64_t u);
+
+  /// Write a floating point number to the output stream.
+  ///
+  /// The output will be in the smallest possible \em float format.
+  void write(double d);
+
+  /// Write a string to the output stream.
+  ///
+  /// The output will be in the smallest possible \em str format.
+  void write(StringRef s);
+
+  /// Write a memory buffer to the output stream.
+  ///
+  /// The output will be in the smallest possible \em bin format.
+  ///
+  /// \warning Do not use this overload if in \c Compatible mode.
+  void write(MemoryBufferRef Buffer);
+
+  /// Write the header for an \em Array of the given size.
+  ///
+  /// The output will be in the smallest possible \em array format.
+  //
+  /// The header contains an identifier for the \em array format used, as well
+  /// as an encoding of the size of the array.
+  ///
+  /// N.B. The caller must subsequently call \c Write an additional \p Size
+  /// times to complete the array.
+  void writeArraySize(uint32_t Size);
+
+  /// Write the header for a \em Map of the given size.
+  ///
+  /// The output will be in the smallest possible \em map format.
+  //
+  /// The header contains an identifier for the \em map format used, as well
+  /// as an encoding of the size of the map.
+  ///
+  /// N.B. The caller must subsequently call \c Write and additional \c Size*2
+  /// times to complete the map. Each even numbered call to \c Write defines a
+  /// new key, and each odd numbered call defines the previous key's value.
+  void writeMapSize(uint32_t Size);
+
+  /// Write a typed memory buffer (an extension type) to the output stream.
+  ///
+  /// The output will be in the smallest possible \em ext format.
+  void writeExt(int8_t Type, MemoryBufferRef Buffer);
+
+private:
+  support::endian::Writer EW;
+  bool Compatible;
+};
+
+} // end namespace msgpack
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_MSGPACKPARSER_H
diff --git a/contrib/llvm/include/llvm/BinaryFormat/Wasm.h b/contrib/llvm/include/llvm/BinaryFormat/Wasm.h
index fa5448dacec4..d9f0f94b298d 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/contrib/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -16,6 +16,7 @@
 #define LLVM_BINARYFORMAT_WASM_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 namespace wasm {
@@ -25,7 +26,7 @@ const char WasmMagic[] = {'\0', 'a', 's', 'm'};
 // Wasm binary format version
 const uint32_t WasmVersion = 0x1;
 // Wasm linking metadata version
-const uint32_t WasmMetadataVersion = 0x1;
+const uint32_t WasmMetadataVersion = 0x2;
 // Wasm uses a 64k page size
 const uint32_t WasmPageSize = 65536;
 
@@ -34,9 +35,12 @@ struct WasmObjectHeader {
   uint32_t Version;
 };
 
-struct WasmSignature {
-  std::vector<uint8_t> ParamTypes;
-  uint8_t ReturnType;
+struct WasmDylinkInfo {
+  uint32_t MemorySize; // Memory size in bytes
+  uint32_t MemoryAlignment;  // P2 alignment of memory
+  uint32_t TableSize;  // Table size in elements
+  uint32_t TableAlignment;  // P2 alignment of table
+  std::vector<StringRef> Needed; // Shared library depenedencies
 };
 
 struct WasmExport {
@@ -79,6 +83,18 @@ struct WasmGlobal {
   StringRef SymbolName; // from the "linking" section
 };
 
+struct WasmEventType {
+  // Kind of event. Currently only WASM_EVENT_ATTRIBUTE_EXCEPTION is possible.
+  uint32_t Attribute;
+  uint32_t SigIndex;
+};
+
+struct WasmEvent {
+  uint32_t Index;
+  WasmEventType Type;
+  StringRef SymbolName; // from the "linking" section
+};
+
 struct WasmImport {
   StringRef Module;
   StringRef Field;
@@ -88,6 +104,7 @@ struct WasmImport {
     WasmGlobalType Global;
     WasmTable Table;
     WasmLimits Memory;
+    WasmEventType Event;
   };
 };
 
@@ -104,8 +121,8 @@ struct WasmFunction {
   uint32_t Size;
   uint32_t CodeOffset;  // start of Locals and Body
   StringRef SymbolName; // from the "linking" section
-  StringRef DebugName; // from the "name" section
-  uint32_t Comdat; // from the "comdat info" section
+  StringRef DebugName;  // from the "name" section
+  uint32_t Comdat;      // from the "comdat info" section
 };
 
 struct WasmDataSegment {
@@ -171,18 +188,20 @@ struct WasmLinkingData {
 };
 
 enum : unsigned {
-  WASM_SEC_CUSTOM = 0,   // Custom / User-defined section
-  WASM_SEC_TYPE = 1,     // Function signature declarations
-  WASM_SEC_IMPORT = 2,   // Import declarations
-  WASM_SEC_FUNCTION = 3, // Function declarations
-  WASM_SEC_TABLE = 4,    // Indirect function table and other tables
-  WASM_SEC_MEMORY = 5,   // Memory attributes
-  WASM_SEC_GLOBAL = 6,   // Global declarations
-  WASM_SEC_EXPORT = 7,   // Exports
-  WASM_SEC_START = 8,    // Start function declaration
-  WASM_SEC_ELEM = 9,     // Elements section
-  WASM_SEC_CODE = 10,    // Function bodies (code)
-  WASM_SEC_DATA = 11     // Data segments
+  WASM_SEC_CUSTOM = 0,     // Custom / User-defined section
+  WASM_SEC_TYPE = 1,       // Function signature declarations
+  WASM_SEC_IMPORT = 2,     // Import declarations
+  WASM_SEC_FUNCTION = 3,   // Function declarations
+  WASM_SEC_TABLE = 4,      // Indirect function table and other tables
+  WASM_SEC_MEMORY = 5,     // Memory attributes
+  WASM_SEC_GLOBAL = 6,     // Global declarations
+  WASM_SEC_EXPORT = 7,     // Exports
+  WASM_SEC_START = 8,      // Start function declaration
+  WASM_SEC_ELEM = 9,       // Elements section
+  WASM_SEC_CODE = 10,      // Function bodies (code)
+  WASM_SEC_DATA = 11,      // Data segments
+  WASM_SEC_DATACOUNT = 12, // Data segment count
+  WASM_SEC_EVENT = 13      // Event declarations
 };
 
 // Type immediate encodings used in various contexts.
@@ -191,7 +210,8 @@ enum : unsigned {
   WASM_TYPE_I64 = 0x7E,
   WASM_TYPE_F32 = 0x7D,
   WASM_TYPE_F64 = 0x7C,
-  WASM_TYPE_ANYFUNC = 0x70,
+  WASM_TYPE_V128 = 0x7B,
+  WASM_TYPE_FUNCREF = 0x70,
   WASM_TYPE_EXCEPT_REF = 0x68,
   WASM_TYPE_FUNC = 0x60,
   WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
@@ -203,12 +223,13 @@ enum : unsigned {
   WASM_EXTERNAL_TABLE = 0x1,
   WASM_EXTERNAL_MEMORY = 0x2,
   WASM_EXTERNAL_GLOBAL = 0x3,
+  WASM_EXTERNAL_EVENT = 0x4,
 };
 
 // Opcodes used in initializer expressions.
 enum : unsigned {
   WASM_OPCODE_END = 0x0b,
-  WASM_OPCODE_GET_GLOBAL = 0x23,
+  WASM_OPCODE_GLOBAL_GET = 0x23,
   WASM_OPCODE_I32_CONST = 0x41,
   WASM_OPCODE_I64_CONST = 0x42,
   WASM_OPCODE_F32_CONST = 0x43,
@@ -217,35 +238,27 @@ enum : unsigned {
 
 enum : unsigned {
   WASM_LIMITS_FLAG_HAS_MAX = 0x1,
-};
-
-// Subset of types that a value can have
-enum class ValType {
-  I32 = WASM_TYPE_I32,
-  I64 = WASM_TYPE_I64,
-  F32 = WASM_TYPE_F32,
-  F64 = WASM_TYPE_F64,
-  EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+  WASM_LIMITS_FLAG_IS_SHARED = 0x2,
 };
 
 // Kind codes used in the custom "name" section
 enum : unsigned {
   WASM_NAMES_FUNCTION = 0x1,
-  WASM_NAMES_LOCAL    = 0x2,
+  WASM_NAMES_LOCAL = 0x2,
 };
 
 // Kind codes used in the custom "linking" section
 enum : unsigned {
-  WASM_SEGMENT_INFO   = 0x5,
-  WASM_INIT_FUNCS     = 0x6,
-  WASM_COMDAT_INFO    = 0x7,
-  WASM_SYMBOL_TABLE   = 0x8,
+  WASM_SEGMENT_INFO = 0x5,
+  WASM_INIT_FUNCS = 0x6,
+  WASM_COMDAT_INFO = 0x7,
+  WASM_SYMBOL_TABLE = 0x8,
 };
 
 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
 enum : unsigned {
-  WASM_COMDAT_DATA        = 0x0,
-  WASM_COMDAT_FUNCTION    = 0x1,
+  WASM_COMDAT_DATA = 0x0,
+  WASM_COMDAT_FUNCTION = 0x1,
 };
 
 // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
@@ -254,17 +267,23 @@ enum WasmSymbolType : unsigned {
   WASM_SYMBOL_TYPE_DATA = 0x1,
   WASM_SYMBOL_TYPE_GLOBAL = 0x2,
   WASM_SYMBOL_TYPE_SECTION = 0x3,
+  WASM_SYMBOL_TYPE_EVENT = 0x4,
+};
+
+// Kinds of event attributes.
+enum WasmEventAttribute : unsigned {
+  WASM_EVENT_ATTRIBUTE_EXCEPTION = 0x0,
 };
 
-const unsigned WASM_SYMBOL_BINDING_MASK       = 0x3;
-const unsigned WASM_SYMBOL_VISIBILITY_MASK    = 0xc;
+const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
+const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
 
-const unsigned WASM_SYMBOL_BINDING_GLOBAL     = 0x0;
-const unsigned WASM_SYMBOL_BINDING_WEAK       = 0x1;
-const unsigned WASM_SYMBOL_BINDING_LOCAL      = 0x2;
+const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
+const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
+const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
-const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN  = 0x4;
-const unsigned WASM_SYMBOL_UNDEFINED          = 0x10;
+const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
+const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
 
 #define WASM_RELOC(name, value) name = value,
 
@@ -274,9 +293,32 @@ enum : unsigned {
 
 #undef WASM_RELOC
 
+// Subset of types that a value can have
+enum class ValType {
+  I32 = WASM_TYPE_I32,
+  I64 = WASM_TYPE_I64,
+  F32 = WASM_TYPE_F32,
+  F64 = WASM_TYPE_F64,
+  V128 = WASM_TYPE_V128,
+  EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+};
+
+struct WasmSignature {
+  SmallVector<wasm::ValType, 1> Returns;
+  SmallVector<wasm::ValType, 4> Params;
+  // Support empty and tombstone instances, needed by DenseMap.
+  enum { Plain, Empty, Tombstone } State = Plain;
+
+  WasmSignature(SmallVector<wasm::ValType, 1> &&InReturns,
+                SmallVector<wasm::ValType, 4> &&InParams)
+      : Returns(InReturns), Params(InParams) {}
+  WasmSignature() = default;
+};
+
 // Useful comparison operators
 inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) {
-  return LHS.ReturnType == RHS.ReturnType && LHS.ParamTypes == RHS.ParamTypes;
+  return LHS.State == RHS.State && LHS.Returns == RHS.Returns &&
+         LHS.Params == RHS.Params;
 }
 
 inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) {
diff --git a/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 8ffd51e483f3..b3a08e70c1d5 100644
--- a/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/contrib/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -1,4 +1,3 @@
-
 #ifndef WASM_RELOC
 #error "WASM_RELOC must be defined"
 #endif
@@ -13,3 +12,4 @@ WASM_RELOC(R_WEBASSEMBLY_TYPE_INDEX_LEB,       6)
 WASM_RELOC(R_WEBASSEMBLY_GLOBAL_INDEX_LEB,     7)
 WASM_RELOC(R_WEBASSEMBLY_FUNCTION_OFFSET_I32,  8)
 WASM_RELOC(R_WEBASSEMBLY_SECTION_OFFSET_I32,   9)
+WASM_RELOC(R_WEBASSEMBLY_EVENT_INDEX_LEB,     10)
diff --git a/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h b/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
index ce8bdd9cf0b4..0d7cc141f2ce 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -51,6 +51,7 @@ class Module;
   struct BitcodeLTOInfo {
     bool IsThinLTO;
     bool HasSummary;
+    bool EnableSplitLTOUnit;
   };
 
   /// Represents a module in a bitcode file.
diff --git a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 6723cf42dd2c..f0d11e9c1689 100644
--- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -342,6 +342,7 @@ enum ConstantsCodes {
   CST_CODE_INLINEASM = 23,       // INLINEASM:     [sideeffect|alignstack|
                                  //                 asmdialect,asmstr,conststr]
   CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, //      [opty, flags, n x operands]
+  CST_CODE_CE_UNOP = 25,         // CE_UNOP:      [opcode, opval]
 };
 
 /// CastOpcodes - These are values used in the bitcode files to encode which
@@ -364,6 +365,14 @@ enum CastOpcodes {
   CAST_ADDRSPACECAST = 12
 };
 
+/// UnaryOpcodes - These are values used in the bitcode files to encode which
+/// unop a CST_CODE_CE_UNOP or a XXX refers to.  The values of these enums
+/// have no fixed relation to the LLVM IR enum values.  Changing these will
+/// break compatibility with old files.
+enum UnaryOpcodes {
+  UNOP_NEG = 0
+};
+
 /// BinaryOpcodes - These are values used in the bitcode files to encode which
 /// binop a CST_CODE_CE_BINOP or a XXX refers to.  The values of these enums
 /// have no fixed relation to the LLVM IR enum values.  Changing these will
@@ -524,6 +533,7 @@ enum FunctionCodes {
   // 53 is unused.
   // 54 is unused.
   FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
+  FUNC_CODE_INST_UNOP = 56,      // UNOP:       [opcode, ty, opval]
 };
 
 enum UseListCodes {
@@ -591,6 +601,7 @@ enum AttributeKindCodes {
   ATTR_KIND_NOCF_CHECK = 56,
   ATTR_KIND_OPT_FOR_FUZZING = 57,
   ATTR_KIND_SHADOWCALLSTACK = 58,
+  ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index b6056380916c..413901d218f9 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -71,6 +71,7 @@ class MCTargetOptions;
 class MDNode;
 class Module;
 class raw_ostream;
+class StackMaps;
 class TargetLoweringObjectFile;
 class TargetMachine;
 
@@ -137,6 +138,9 @@ private:
 
   static char ID;
 
+protected:
+  /// Protected struct HandlerInfo and Handlers permit target extended
+  /// AsmPrinter adds their own handlers.
   struct HandlerInfo {
     AsmPrinterHandler *Handler;
     const char *TimerName;
@@ -365,6 +369,9 @@ public:
   /// emit the proxies we previously omitted in EmitGlobalVariable.
   void emitGlobalGOTEquivs();
 
+  /// Emit the stack maps.
+  void emitStackMaps(StackMaps &SM);
+
   //===------------------------------------------------------------------===//
   // Overridable Hooks
   //===------------------------------------------------------------------===//
@@ -542,7 +549,7 @@ public:
   ///
   /// \p Value - The value to emit.
   /// \p Size - The size of the integer (in bytes) to emit.
-  virtual void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const;
+  virtual void EmitDebugValue(const MCExpr *Value, unsigned Size) const;
 
   //===------------------------------------------------------------------===//
   // Dwarf Lowering Routines
@@ -631,6 +638,11 @@ private:
   /// inline asm.
   void EmitInlineAsm(const MachineInstr *MI) const;
 
+  /// Add inline assembly info to the diagnostics machinery, so we can
+  /// emit file and position info. Returns SrcMgr memory buffer position.
+  unsigned addInlineAsmDiagBuffer(StringRef AsmStr,
+                                  const MDNode *LocMDNode) const;
+
   //===------------------------------------------------------------------===//
   // Internal Implementation Details
   //===------------------------------------------------------------------===//
@@ -647,6 +659,8 @@ private:
   void EmitLLVMUsedList(const ConstantArray *InitList);
   /// Emit llvm.ident metadata in an '.ident' directive.
   void EmitModuleIdents(Module &M);
+  /// Emit bytes for llvm.commandline metadata.
+  void EmitModuleCommandLines(Module &M);
   void EmitXXStructorList(const DataLayout &DL, const Constant *List,
                           bool isCtor);
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
index f5ac95a20b10..a8b13200dd4e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinterHandler.h
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===//
+//===-- llvm/CodeGen/AsmPrinterHandler.h -----------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
+#ifndef LLVM_CODEGEN_ASMPRINTERHANDLER_H
+#define LLVM_CODEGEN_ASMPRINTERHANDLER_H
 
 #include "llvm/Support/DataTypes.h"
 
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f76a2426377a..f105d887c397 100644
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -80,6 +80,23 @@ private:
   using BaseT = TargetTransformInfoImplCRTPBase<T>;
   using TTI = TargetTransformInfo;
 
+  /// Estimate a cost of Broadcast as an extract and sequence of insert
+  /// operations.
+  unsigned getBroadcastShuffleOverhead(Type *Ty) {
+    assert(Ty->isVectorTy() && "Can only shuffle vectors");
+    unsigned Cost = 0;
+    // Broadcast cost is equal to the cost of extracting the zero'th element
+    // plus the cost of inserting it into every element of the result vector.
+    Cost += static_cast<T *>(this)->getVectorInstrCost(
+        Instruction::ExtractElement, Ty, 0);
+
+    for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::InsertElement, Ty, i);
+    }
+    return Cost;
+  }
+
   /// Estimate a cost of shuffle as a sequence of extract and insert
   /// operations.
   unsigned getPermuteShuffleOverhead(Type *Ty) {
@@ -101,6 +118,50 @@ private:
     return Cost;
   }
 
+  /// Estimate a cost of subvector extraction as a sequence of extract and
+  /// insert operations.
+  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
+    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+           "Can only extract subvectors from vectors");
+    int NumSubElts = SubTy->getVectorNumElements();
+    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
+           "SK_ExtractSubvector index out of range");
+
+    unsigned Cost = 0;
+    // Subvector extraction cost is equal to the cost of extracting element from
+    // the source type plus the cost of inserting them into the result vector
+    // type.
+    for (int i = 0; i != NumSubElts; ++i) {
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::ExtractElement, Ty, i + Index);
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::InsertElement, SubTy, i);
+    }
+    return Cost;
+  }
+
+  /// Estimate a cost of subvector insertion as a sequence of extract and
+  /// insert operations.
+  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
+    assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+           "Can only insert subvectors into vectors");
+    int NumSubElts = SubTy->getVectorNumElements();
+    assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
+           "SK_InsertSubvector index out of range");
+
+    unsigned Cost = 0;
+    // Subvector insertion cost is equal to the cost of extracting element from
+    // the source type plus the cost of inserting them into the result vector
+    // type.
+    for (int i = 0; i != NumSubElts; ++i) {
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::ExtractElement, SubTy, i);
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::InsertElement, Ty, i + Index);
+    }
+    return Cost;
+  }
+
   /// Local query method delegates up to T which *must* implement this!
   const TargetSubtargetInfo *getST() const {
     return static_cast<const T *>(this)->getST();
@@ -554,14 +615,20 @@ public:
   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp) {
     switch (Kind) {
+    case TTI::SK_Broadcast:
+      return getBroadcastShuffleOverhead(Tp);
     case TTI::SK_Select:
+    case TTI::SK_Reverse:
     case TTI::SK_Transpose:
     case TTI::SK_PermuteSingleSrc:
     case TTI::SK_PermuteTwoSrc:
       return getPermuteShuffleOverhead(Tp);
-    default:
-      return 1;
+    case TTI::SK_ExtractSubvector:
+      return getExtractSubvectorOverhead(Tp, Index, SubTp);
+    case TTI::SK_InsertSubvector:
+      return getInsertSubvectorOverhead(Tp, Index, SubTp);
     }
+    llvm_unreachable("Unknown TTI::ShuffleKind");
   }
 
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -783,8 +850,9 @@ public:
   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                       unsigned Factor,
                                       ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace) {
+                                      unsigned Alignment, unsigned AddressSpace,
+                                      bool UseMaskForCond = false,
+                                      bool UseMaskForGaps = false) {
     VectorType *VT = dyn_cast<VectorType>(VecTy);
     assert(VT && "Expect a vector type for interleaved memory op");
 
@@ -795,8 +863,13 @@ public:
     VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
 
     // Firstly, the cost of load/store operation.
-    unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
-        Opcode, VecTy, Alignment, AddressSpace);
+    unsigned Cost;
+    if (UseMaskForCond || UseMaskForGaps)
+      Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
+          Opcode, VecTy, Alignment, AddressSpace);
+    else
+      Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
+                                                     AddressSpace);
 
     // Legalize the vector type, and get the legalized and unlegalized type
     // sizes.
@@ -892,6 +965,40 @@ public:
                     ->getVectorInstrCost(Instruction::InsertElement, VT, i);
     }
 
+    if (!UseMaskForCond)
+      return Cost;
+
+    Type *I8Type = Type::getInt8Ty(VT->getContext());
+    VectorType *MaskVT = VectorType::get(I8Type, NumElts);
+    SubVT = VectorType::get(I8Type, NumSubElts);
+
+    // The Mask shuffling cost is extract all the elements of the Mask
+    // and insert each of them Factor times into the wide vector:
+    //
+    // E.g. an interleaved group with factor 3:
+    //    %mask = icmp ult <8 x i32> %vec1, %vec2
+    //    %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+    //        <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+    // The cost is estimated as extract all mask elements from the <8xi1> mask
+    // vector and insert them factor times into the <24xi1> shuffled mask
+    // vector.
+    for (unsigned i = 0; i < NumSubElts; i++)
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::ExtractElement, SubVT, i);
+
+    for (unsigned i = 0; i < NumElts; i++)
+      Cost += static_cast<T *>(this)->getVectorInstrCost(
+          Instruction::InsertElement, MaskVT, i);
+
+    // The Gaps mask is invariant and created outside the loop, therefore the
+    // cost of creating it is not accounted for here. However if we have both
+    // a MaskForGaps and some other mask that guards the execution of the
+    // memory access, we need to account for the cost of And-ing the two masks
+    // inside the loop.
+    if (UseMaskForGaps)
+      Cost += static_cast<T *>(this)->getArithmeticInstrCost(
+          BinaryOperator::And, MaskVT); 
+
     return Cost;
   }
 
@@ -901,6 +1008,7 @@ public:
                                  unsigned VF = 1) {
     unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
     assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+    auto *ConcreteTTI = static_cast<T *>(this);
 
     switch (IID) {
     default: {
@@ -926,29 +1034,24 @@ public:
         ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
       }
 
-      return static_cast<T *>(this)->
-        getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
+      return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
+                                                ScalarizationCost);
     }
     case Intrinsic::masked_scatter: {
       assert(VF == 1 && "Can't vectorize types here.");
       Value *Mask = Args[3];
       bool VarMask = !isa<Constant>(Mask);
       unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
-      return
-        static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
-                                                       Args[0]->getType(),
-                                                       Args[1], VarMask,
-                                                       Alignment);
+      return ConcreteTTI->getGatherScatterOpCost(
+          Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
     }
     case Intrinsic::masked_gather: {
       assert(VF == 1 && "Can't vectorize types here.");
       Value *Mask = Args[2];
       bool VarMask = !isa<Constant>(Mask);
       unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
-      return
-        static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
-                                                       RetTy, Args[0], VarMask,
-                                                       Alignment);
+      return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
+                                                 Args[0], VarMask, Alignment);
     }
     case Intrinsic::experimental_vector_reduce_add:
     case Intrinsic::experimental_vector_reduce_mul:
@@ -964,6 +1067,45 @@ public:
     case Intrinsic::experimental_vector_reduce_umax:
     case Intrinsic::experimental_vector_reduce_umin:
       return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
+    case Intrinsic::fshl:
+    case Intrinsic::fshr: {
+      Value *X = Args[0];
+      Value *Y = Args[1];
+      Value *Z = Args[2];
+      TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
+      TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
+      TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
+      TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
+      TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
+      OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
+                                                              : TTI::OP_None;
+      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+      unsigned Cost = 0;
+      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
+                                                  OpKindX, OpKindZ, OpPropsX);
+      Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
+                                                  OpKindY, OpKindZ, OpPropsY);
+      // Non-constant shift amounts requires a modulo.
+      if (OpKindZ != TTI::OK_UniformConstantValue &&
+          OpKindZ != TTI::OK_NonUniformConstantValue)
+        Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
+                                                    OpKindZ, OpKindBW, OpPropsZ,
+                                                    OpPropsBW);
+      // For non-rotates (X != Y) we must add shift-by-zero handling costs.
+      if (X != Y) {
+        Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+        if (RetVF > 1)
+          CondTy = VectorType::get(CondTy, RetVF);
+        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
+                                                CondTy, nullptr);
+        Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+                                                CondTy, nullptr);
+      }
+      return Cost;
+    }
     }
   }
 
@@ -1036,15 +1178,18 @@ public:
     case Intrinsic::fabs:
       ISDs.push_back(ISD::FABS);
       break;
+    case Intrinsic::canonicalize:
+      ISDs.push_back(ISD::FCANONICALIZE);
+      break;
     case Intrinsic::minnum:
       ISDs.push_back(ISD::FMINNUM);
       if (FMF.noNaNs())
-        ISDs.push_back(ISD::FMINNAN);
+        ISDs.push_back(ISD::FMINIMUM);
       break;
     case Intrinsic::maxnum:
       ISDs.push_back(ISD::FMAXNUM);
       if (FMF.noNaNs())
-        ISDs.push_back(ISD::FMAXNAN);
+        ISDs.push_back(ISD::FMAXIMUM);
       break;
     case Intrinsic::copysign:
       ISDs.push_back(ISD::FCOPYSIGN);
@@ -1136,7 +1281,8 @@ public:
     SmallVector<unsigned, 2> CustomCost;
     for (unsigned ISD : ISDs) {
       if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-        if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
+        if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
+            TLI->isFAbsFree(LT.second)) {
           return 0;
         }
 
@@ -1280,24 +1426,36 @@ public:
         LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
     while (NumVecElts > MVTLen) {
       NumVecElts /= 2;
+      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
       // Assume the pairwise shuffles add a cost.
       ShuffleCost += (IsPairwise + 1) *
                      ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
-                                                 NumVecElts, Ty);
-      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
-      Ty = VectorType::get(ScalarTy, NumVecElts);
+                                                 NumVecElts, SubTy);
+      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
+      Ty = SubTy;
       ++LongVectorCount;
     }
+
+    NumReduxLevels -= LongVectorCount;
+
     // The minimal length of the vector is limited by the real length of vector
     // operations performed on the current platform. That's why several final
     // reduction operations are performed on the vectors with the same
     // architecture-dependent length.
-    ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
-                   ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
-                                               NumVecElts, Ty);
-    ArithCost += (NumReduxLevels - LongVectorCount) *
+
+    // Non pairwise reductions need one shuffle per reduction level. Pairwise
+    // reductions need two shuffles on every level, but the last one. On that
+    // level one of the shuffles is <0, u, u, ...> which is identity.
+    unsigned NumShuffles = NumReduxLevels;
+    if (IsPairwise && NumReduxLevels >= 1)
+      NumShuffles += NumReduxLevels - 1;
+    ShuffleCost += NumShuffles *
+                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
+                                               0, Ty);
+    ArithCost += NumReduxLevels *
                  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
-    return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
+    return ShuffleCost + ArithCost +
+           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
   }
 
   /// Try to calculate op costs for min/max reduction operations.
@@ -1327,37 +1485,46 @@ public:
         LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
     while (NumVecElts > MVTLen) {
       NumVecElts /= 2;
+      Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
+      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+
       // Assume the pairwise shuffles add a cost.
       ShuffleCost += (IsPairwise + 1) *
                      ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
-                                                 NumVecElts, Ty);
+                                                 NumVecElts, SubTy);
       MinMaxCost +=
-          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
-          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
+          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
                                           nullptr);
-      Ty = VectorType::get(ScalarTy, NumVecElts);
-      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+      Ty = SubTy;
       ++LongVectorCount;
     }
+
+    NumReduxLevels -= LongVectorCount;
+
     // The minimal length of the vector is limited by the real length of vector
     // operations performed on the current platform. That's why several final
     // reduction opertions are perfomed on the vectors with the same
     // architecture-dependent length.
-    ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
-                   ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
-                                               NumVecElts, Ty);
+
+    // Non pairwise reductions need one shuffle per reduction level. Pairwise
+    // reductions need two shuffles on every level, but the last one. On that
+    // level one of the shuffles is <0, u, u, ...> which is identity.
+    unsigned NumShuffles = NumReduxLevels;
+    if (IsPairwise && NumReduxLevels >= 1)
+      NumShuffles += NumReduxLevels - 1;
+    ShuffleCost += NumShuffles *
+                   ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
+                                               0, Ty);
     MinMaxCost +=
-        (NumReduxLevels - LongVectorCount) *
+        NumReduxLevels *
         (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
                                          nullptr));
-    // Need 3 extractelement instructions for scalarization + an additional
-    // scalar select instruction.
+    // The last min/max should be in vector registers and we counted it above.
+    // So just need a single extractelement.
     return ShuffleCost + MinMaxCost +
-           3 * getScalarizationOverhead(Ty, /*Insert=*/false,
-                                        /*Extract=*/true) +
-           ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
-                                           ScalarCondTy, nullptr);
+           ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
   }
 
   unsigned getVectorSplitCost() { return 1; }
diff --git a/contrib/llvm/include/llvm/CodeGen/GCs.h b/contrib/llvm/include/llvm/CodeGen/BuiltinGCs.h
index 5207f801c84e..1767922fb5ac 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCs.h
+++ b/contrib/llvm/include/llvm/CodeGen/BuiltinGCs.h
@@ -1,4 +1,4 @@
-//===-- GCs.h - Garbage collector linkage hacks ---------------------------===//
+//===-- BuiltinGCs.h - Garbage collector linkage hacks --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains hack functions to force linking in the GC components.
+// This file contains hack functions to force linking in the builtin GC
+// components.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,32 +16,18 @@
 #define LLVM_CODEGEN_GCS_H
 
 namespace llvm {
-class GCStrategy;
-class GCMetadataPrinter;
 
 /// FIXME: Collector instances are not useful on their own. These no longer
 ///        serve any purpose except to link in the plugins.
 
-/// Creates a CoreCLR-compatible garbage collector.
-void linkCoreCLRGC();
-
-/// Creates an ocaml-compatible garbage collector.
-void linkOcamlGC();
+/// Ensure the definition of the builtin GCs gets linked in
+void linkAllBuiltinGCs();
 
 /// Creates an ocaml-compatible metadata printer.
 void linkOcamlGCPrinter();
 
-/// Creates an erlang-compatible garbage collector.
-void linkErlangGC();
-
 /// Creates an erlang-compatible metadata printer.
 void linkErlangGCPrinter();
-
-/// Creates a shadow stack garbage collector. This collector requires no code
-/// generator support.
-void linkShadowStackGC();
-
-void linkStatepointExampleGC();
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc b/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc
index 7d2d167289e0..568d329a5e8c 100644
--- a/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc
+++ b/contrib/llvm/include/llvm/CodeGen/CommandFlags.inc
@@ -74,7 +74,8 @@ static cl::opt<ThreadModel::Model> TMModel(
 
 static cl::opt<llvm::CodeModel::Model> CMModel(
     "code-model", cl::desc("Choose code model"),
-    cl::values(clEnumValN(CodeModel::Small, "small", "Small code model"),
+    cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"),
+               clEnumValN(CodeModel::Small, "small", "Small code model"),
                clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
                clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
                clEnumValN(CodeModel::Large, "large", "Large code model")));
@@ -113,10 +114,16 @@ static cl::opt<TargetMachine::CodeGenFileType> FileType(
                clEnumValN(TargetMachine::CGFT_Null, "null",
                           "Emit nothing, for performance testing")));
 
-static cl::opt<bool>
-    DisableFPElim("disable-fp-elim",
-                  cl::desc("Disable frame pointer elimination optimization"),
-                  cl::init(false));
+static cl::opt<llvm::FramePointer::FP> FramePointerUsage(
+    "frame-pointer", cl::desc("Specify frame pointer elimination optimization"),
+    cl::init(llvm::FramePointer::None),
+    cl::values(
+        clEnumValN(llvm::FramePointer::All, "all",
+                   "Disable frame pointer elimination"),
+        clEnumValN(llvm::FramePointer::NonLeaf, "non-leaf",
+                   "Disable frame pointer elimination for non-leaf frame"),
+        clEnumValN(llvm::FramePointer::None, "none",
+                   "Enable frame pointer elimination")));
 
 static cl::opt<bool> EnableUnsafeFPMath(
     "enable-unsafe-fp-math",
@@ -367,9 +374,14 @@ setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) {
       NewAttrs.addAttribute("target-cpu", CPU);
     if (!Features.empty())
       NewAttrs.addAttribute("target-features", Features);
-    if (DisableFPElim.getNumOccurrences() > 0)
-      NewAttrs.addAttribute("no-frame-pointer-elim",
-                            DisableFPElim ? "true" : "false");
+    if (FramePointerUsage.getNumOccurrences() > 0) {
+      if (FramePointerUsage == llvm::FramePointer::All)
+        NewAttrs.addAttribute("frame-pointer", "all");
+      else if (FramePointerUsage == llvm::FramePointer::NonLeaf)
+        NewAttrs.addAttribute("frame-pointer", "non-leaf");
+      else if (FramePointerUsage == llvm::FramePointer::None)
+        NewAttrs.addAttribute("frame-pointer", "none");
+    }
     if (DisableTailCalls.getNumOccurrences() > 0)
       NewAttrs.addAttribute("disable-tail-calls",
                             toStringRef(DisableTailCalls));
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index a262cb38b175..befc28f084e7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===//
+//===- llvm/CodeGen/DbgEntityHistoryCalculator.h ----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#ifndef LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
+#define LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
@@ -33,20 +33,19 @@ class DbgValueHistoryMap {
 public:
   using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
   using InstrRanges = SmallVector<InstrRange, 4>;
-  using InlinedVariable =
-      std::pair<const DILocalVariable *, const DILocation *>;
-  using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>;
+  using InlinedEntity = std::pair<const DINode *, const DILocation *>;
+  using InstrRangesMap = MapVector<InlinedEntity, InstrRanges>;
 
 private:
   InstrRangesMap VarInstrRanges;
 
 public:
-  void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
-  void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
+  void startInstrRange(InlinedEntity Var, const MachineInstr &MI);
+  void endInstrRange(InlinedEntity Var, const MachineInstr &MI);
 
   // Returns register currently describing @Var. If @Var is currently
   // unaccessible or is not described by a register, returns 0.
-  unsigned getRegisterForVar(InlinedVariable Var) const;
+  unsigned getRegisterForVar(InlinedEntity Var) const;
 
   bool empty() const { return VarInstrRanges.empty(); }
   void clear() { VarInstrRanges.clear(); }
@@ -58,10 +57,31 @@ public:
 #endif
 };
 
-void calculateDbgValueHistory(const MachineFunction *MF,
-                              const TargetRegisterInfo *TRI,
-                              DbgValueHistoryMap &Result);
+/// For each inlined instance of a source-level label, keep the corresponding
+/// DBG_LABEL instruction. The DBG_LABEL instruction could be used to generate
+/// a temporary (assembler) label before it.
+class DbgLabelInstrMap {
+public:
+  using InlinedEntity = std::pair<const DINode *, const DILocation *>;
+  using InstrMap = MapVector<InlinedEntity, const MachineInstr *>;
+
+private:
+  InstrMap LabelInstr;
+
+public:
+  void  addInstr(InlinedEntity Label, const MachineInstr &MI);
+
+  bool empty() const { return LabelInstr.empty(); }
+  void clear() { LabelInstr.clear(); }
+  InstrMap::const_iterator begin() const { return LabelInstr.begin(); }
+  InstrMap::const_iterator end() const { return LabelInstr.end(); }
+};
+
+void calculateDbgEntityHistory(const MachineFunction *MF,
+                               const TargetRegisterInfo *TRI,
+                               DbgValueHistoryMap &DbgValues,
+                               DbgLabelInstrMap &DbgLabels);
 
 } // end namespace llvm
 
-#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#endif // LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/include/llvm/CodeGen/DebugHandlerBase.h
index 1ccefe32be75..4f0d14d317f2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/include/llvm/CodeGen/DebugHandlerBase.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===//
+//===-- llvm/CodeGen/DebugHandlerBase.h -----------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
+#ifndef LLVM_CODEGEN_DEBUGHANDLERBASE_H
+#define LLVM_CODEGEN_DEBUGHANDLERBASE_H
 
-#include "AsmPrinterHandler.h"
-#include "DbgValueHistoryCalculator.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -82,6 +82,9 @@ protected:
   /// variable.  Variables are listed in order of appearance.
   DbgValueHistoryMap DbgValues;
 
+  /// Mapping of inlined labels and DBG_LABEL machine instruction.
+  DbgLabelInstrMap DbgLabels;
+
   /// Maps instruction with label emitted before instruction.
   /// FIXME: Make this private from DwarfDebug, we have the necessary accessors
   /// for it.
@@ -122,6 +125,10 @@ public:
   /// Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
+  /// Return the function-local offset of an instruction. A label for the
+  /// instruction \p MI should exist (\ref getLabelAfterInsn).
+  const MCExpr *getFunctionLocalOffsetAfterInsn(const MachineInstr *MI);
+
   /// If this type is derived from a base type then return base type size.
   static uint64_t getBaseTypeSize(const DITypeRef TyRef);
 };
diff --git a/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
index e6c0483cfc35..8b1a7af17bbf 100644
--- a/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/contrib/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
 #define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
 
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/StringMap.h"
 
 namespace llvm {
@@ -18,34 +19,52 @@ class MCSymbol;
 
 /// Data for a string pool entry.
 struct DwarfStringPoolEntry {
+  static constexpr unsigned NotIndexed = -1;
+
   MCSymbol *Symbol;
   unsigned Offset;
   unsigned Index;
+
+  bool isIndexed() const { return Index != NotIndexed; }
 };
 
 /// String pool entry reference.
-struct DwarfStringPoolEntryRef {
-  const StringMapEntry<DwarfStringPoolEntry> *I = nullptr;
+class DwarfStringPoolEntryRef {
+  PointerIntPair<const StringMapEntry<DwarfStringPoolEntry> *, 1, bool>
+      MapEntryAndIndexed;
+
+  const StringMapEntry<DwarfStringPoolEntry> *getMapEntry() const {
+    return MapEntryAndIndexed.getPointer();
+  }
 
 public:
   DwarfStringPoolEntryRef() = default;
-  explicit DwarfStringPoolEntryRef(
-      const StringMapEntry<DwarfStringPoolEntry> &I)
-      : I(&I) {}
+  DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry> &Entry,
+                          bool Indexed)
+      : MapEntryAndIndexed(&Entry, Indexed) {}
 
-  explicit operator bool() const { return I; }
+  explicit operator bool() const { return getMapEntry(); }
   MCSymbol *getSymbol() const {
-    assert(I->second.Symbol && "No symbol available!");
-    return I->second.Symbol;
+    assert(getMapEntry()->second.Symbol && "No symbol available!");
+    return getMapEntry()->second.Symbol;
   }
-  unsigned getOffset() const { return I->second.Offset; }
-  unsigned getIndex() const { return I->second.Index; }
-  StringRef getString() const { return I->first(); }
+  unsigned getOffset() const { return getMapEntry()->second.Offset; }
+  bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
+  unsigned getIndex() const {
+    assert(isIndexed());
+    assert(getMapEntry()->getValue().isIndexed());
+    return getMapEntry()->second.Index;
+  }
+  StringRef getString() const { return getMapEntry()->first(); }
   /// Return the entire string pool entry for convenience.
-  DwarfStringPoolEntry getEntry() const { return I->getValue(); }
+  DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); }
 
-  bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; }
-  bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; }
+  bool operator==(const DwarfStringPoolEntryRef &X) const {
+    return getMapEntry() == X.getMapEntry();
+  }
+  bool operator!=(const DwarfStringPoolEntryRef &X) const {
+    return getMapEntry() != X.getMapEntry();
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 2da00b7d61ab..7c658515de09 100644
--- a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -1,4 +1,4 @@
-//===- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen ---===//
+//===- FunctionLoweringInfo.h - Lower functions from LLVM IR ---*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -246,6 +246,7 @@ public:
       return 0;
     unsigned &R = ValueMap[V];
     assert(R == 0 && "Already initialized this value register!");
+    assert(VirtReg2Value.empty());
     return R = CreateRegs(V->getType());
   }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
index ad2599fc120e..7fb27202c122 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadata.h
@@ -55,12 +55,11 @@ class MCSymbol;
 /// GCPoint - Metadata for a collector-safe point in machine code.
 ///
 struct GCPoint {
-  GC::PointKind Kind; ///< The kind of the safe point.
   MCSymbol *Label;    ///< A label.
   DebugLoc Loc;
 
-  GCPoint(GC::PointKind K, MCSymbol *L, DebugLoc DL)
-      : Kind(K), Label(L), Loc(std::move(DL)) {}
+  GCPoint(MCSymbol *L, DebugLoc DL)
+      : Label(L), Loc(std::move(DL)) {}
 };
 
 /// GCRoot - Metadata for a pointer to an object managed by the garbage
@@ -124,8 +123,8 @@ public:
   /// addSafePoint - Notes the existence of a safe point. Num is the ID of the
   /// label just prior to the safe point (if the code generator is using
   /// MachineModuleInfo).
-  void addSafePoint(GC::PointKind Kind, MCSymbol *Label, const DebugLoc &DL) {
-    SafePoints.emplace_back(Kind, Label, DL);
+  void addSafePoint(MCSymbol *Label, const DebugLoc &DL) {
+    SafePoints.emplace_back(Label, DL);
   }
 
   /// getFrameSize/setFrameSize - Records the function's frame size.
diff --git a/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h b/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
index 1cc69a7b71af..5f1efb2ce02c 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -29,6 +29,7 @@ class GCMetadataPrinter;
 class GCModuleInfo;
 class GCStrategy;
 class Module;
+class StackMaps;
 
 /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the
 /// defaults from Registry.
@@ -60,6 +61,11 @@ public:
   /// Called after the assembly for the module is generated by
   /// the AsmPrinter (but before target specific hooks)
   virtual void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {}
+
+  /// Called when the stack maps are generated. Return true if
+  /// stack maps with a custom format are generated. Otherwise
+  /// returns false and the default format will be used.
+  virtual bool emitStackMaps(StackMaps &SM, AsmPrinter &AP) { return false; }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
index f835bacfb548..5a60cd7cb823 100644
--- a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
+++ b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h
@@ -59,19 +59,6 @@ namespace llvm {
 
 class Type;
 
-namespace GC {
-
-/// PointKind - Used to indicate whether the address of the call instruction
-/// or the address after the call instruction is listed in the stackmap.  For
-/// most runtimes, PostCall safepoints are appropriate.
-///
-enum PointKind {
-  PreCall, ///< Instr is a call instruction.
-  PostCall ///< Instr is the return address of a call.
-};
-
-} // end namespace GC
-
 /// GCStrategy describes a garbage collector algorithm's code generation
 /// requirements, and provides overridable hooks for those needs which cannot
 /// be abstractly described.  GCStrategy objects must be looked up through
@@ -88,11 +75,7 @@ protected:
                                /// if set, none of the other options can be
                                /// anything but their default values.
 
-  unsigned NeededSafePoints = 0;    ///< Bitmask of required safe points.
-  bool CustomReadBarriers = false;  ///< Default is to insert loads.
-  bool CustomWriteBarriers = false; ///< Default is to insert stores.
-  bool CustomRoots = false;      ///< Default is to pass through to backend.
-  bool InitRoots= true;          ///< If set, roots are nulled during lowering.
+  bool NeededSafePoints = false;    ///< if set, calls are inferred to be safepoints
   bool UsesMetadata = false;     ///< If set, backend must emit metadata tables.
 
 public:
@@ -103,16 +86,6 @@ public:
   /// name string specified on functions which use this strategy.
   const std::string &getName() const { return Name; }
 
-  /// By default, write barriers are replaced with simple store
-  /// instructions. If true, you must provide a custom pass to lower
-  /// calls to \@llvm.gcwrite.
-  bool customWriteBarrier() const { return CustomWriteBarriers; }
-
-  /// By default, read barriers are replaced with simple load
-  /// instructions. If true, you must provide a custom pass to lower
-  /// calls to \@llvm.gcread.
-  bool customReadBarrier() const { return CustomReadBarriers; }
-
   /// Returns true if this strategy is expecting the use of gc.statepoints,
   /// and false otherwise.
   bool useStatepoints() const { return UseStatepoints; }
@@ -135,25 +108,8 @@ public:
    */
   ///@{
 
-  /// True if safe points of any kind are required. By default, none are
-  /// recorded.
-  bool needsSafePoints() const { return NeededSafePoints != 0; }
-
-  /// True if the given kind of safe point is required. By default, none are
-  /// recorded.
-  bool needsSafePoint(GC::PointKind Kind) const {
-    return (NeededSafePoints & 1 << Kind) != 0;
-  }
-
-  /// By default, roots are left for the code generator so it can generate a
-  /// stack map. If true, you must provide a custom pass to lower
-  /// calls to \@llvm.gcroot.
-  bool customRoots() const { return CustomRoots; }
-
-  /// If set, gcroot intrinsics should initialize their allocas to null
-  /// before the first use. This is necessary for most GCs and is enabled by
-  /// default.
-  bool initializeRoots() const { return InitRoots; }
+  /// True if safe points need to be inferred on call sites
+  bool needsSafePoints() const { return NeededSafePoints; }
 
   /// If set, appropriate metadata tables must be emitted by the back-end
   /// (assembler, JIT, or otherwise). For statepoint, this method is
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
new file mode 100644
index 000000000000..ce2d285a99e5
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -0,0 +1,237 @@
+//===- llvm/CodeGen/GlobalISel/CSEInfo.h ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for continuously CSEing during GISel passes.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_CSEINFO_H
+#define LLVM_CODEGEN_GLOBALISEL_CSEINFO_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+/// A class that wraps MachineInstrs and derives from FoldingSetNode in order to
+/// be uniqued in a CSEMap. The tradeoff here is extra memory allocations for
+/// UniqueMachineInstr vs making MachineInstr bigger.
+class UniqueMachineInstr : public FoldingSetNode {
+  friend class GISelCSEInfo;
+  const MachineInstr *MI;
+  explicit UniqueMachineInstr(const MachineInstr *MI) : MI(MI) {}
+
+public:
+  void Profile(FoldingSetNodeID &ID);
+};
+
+// Class representing some configuration that can be done during CSE analysis.
+// Currently it only supports shouldCSE method that each pass can set.
+class CSEConfig {
+public:
+  virtual ~CSEConfig() = default;
+  // Hook for defining which Generic instructions should be CSEd.
+  // GISelCSEInfo currently only calls this hook when dealing with generic
+  // opcodes.
+  virtual bool shouldCSEOpc(unsigned Opc);
+};
+
+// TODO: Find a better place for this.
+// Commonly used for O0 config.
+class CSEConfigConstantOnly : public CSEConfig {
+public:
+  virtual ~CSEConfigConstantOnly() = default;
+  virtual bool shouldCSEOpc(unsigned Opc) override;
+};
+
+/// The CSE Analysis object.
+/// This installs itself as a delegate to the MachineFunction to track
+/// new instructions as well as deletions. It however will not be able to
+/// track instruction mutations. In such cases, recordNewInstruction should be
+/// called (for eg inside MachineIRBuilder::recordInsertion).
+/// Also because of how just the instruction can be inserted without adding any
+/// operands to the instruction, instructions are uniqued and inserted lazily.
+/// CSEInfo should assert when trying to enter an incomplete instruction into
+/// the CSEMap. There is Opcode level granularity on which instructions can be
+/// CSE'd and for now, only Generic instructions are CSEable.
+class GISelCSEInfo : public GISelChangeObserver {
+  // Make it accessible only to CSEMIRBuilder.
+  friend class CSEMIRBuilder;
+
+  BumpPtrAllocator UniqueInstrAllocator;
+  FoldingSet<UniqueMachineInstr> CSEMap;
+  MachineRegisterInfo *MRI = nullptr;
+  MachineFunction *MF = nullptr;
+  std::unique_ptr<CSEConfig> CSEOpt;
+  /// Keep a cache of UniqueInstrs for each MachineInstr. In GISel,
+  /// often instructions are mutated (while their ID has completely changed).
+  /// Whenever mutation happens, invalidate the UniqueMachineInstr for the
+  /// MachineInstr
+  DenseMap<const MachineInstr *, UniqueMachineInstr *> InstrMapping;
+
+  /// Store instructions that are not fully formed in TemporaryInsts.
+  /// Also because CSE insertion happens lazily, we can remove insts from this
+  /// list and avoid inserting and then removing from the CSEMap.
+  GISelWorkList<8> TemporaryInsts;
+
+  // Only used in asserts.
+  DenseMap<unsigned, unsigned> OpcodeHitTable;
+
+  bool isUniqueMachineInstValid(const UniqueMachineInstr &UMI) const;
+
+  void invalidateUniqueMachineInstr(UniqueMachineInstr *UMI);
+
+  UniqueMachineInstr *getNodeIfExists(FoldingSetNodeID &ID,
+                                      MachineBasicBlock *MBB, void *&InsertPos);
+
+  /// Allocate and construct a new UniqueMachineInstr for MI and return.
+  UniqueMachineInstr *getUniqueInstrForMI(const MachineInstr *MI);
+
+  void insertNode(UniqueMachineInstr *UMI, void *InsertPos = nullptr);
+
+  /// Get the MachineInstr(Unique) if it exists already in the CSEMap and the
+  /// same MachineBasicBlock.
+  MachineInstr *getMachineInstrIfExists(FoldingSetNodeID &ID,
+                                        MachineBasicBlock *MBB,
+                                        void *&InsertPos);
+
+  /// Use this method to allocate a new UniqueMachineInstr for MI and insert it
+  /// into the CSEMap. MI should return true for shouldCSE(MI->getOpcode())
+  void insertInstr(MachineInstr *MI, void *InsertPos = nullptr);
+
+public:
+  GISelCSEInfo() = default;
+
+  virtual ~GISelCSEInfo();
+
+  void setMF(MachineFunction &MF);
+
+  /// Records a newly created inst in a list and lazily insert it to the CSEMap.
+  /// Sometimes, this method might be called with a partially constructed
+  /// MachineInstr,
+  //  (right after BuildMI without adding any operands) - and in such cases,
+  //  defer the hashing of the instruction to a later stage.
+  void recordNewInstruction(MachineInstr *MI);
+
+  /// Use this callback to inform CSE about a newly fully created instruction.
+  void handleRecordedInst(MachineInstr *MI);
+
+  /// Use this callback to insert all the recorded instructions. At this point,
+  /// all of these insts need to be fully constructed and should not be missing
+  /// any operands.
+  void handleRecordedInsts();
+
+  /// Remove this inst from the CSE map. If this inst has not been inserted yet,
+  /// it will be removed from the Tempinsts list if it exists.
+  void handleRemoveInst(MachineInstr *MI);
+
+  void releaseMemory();
+
+  void setCSEConfig(std::unique_ptr<CSEConfig> Opt) { CSEOpt = std::move(Opt); }
+
+  bool shouldCSE(unsigned Opc) const;
+
+  void analyze(MachineFunction &MF);
+
+  void countOpcodeHit(unsigned Opc);
+
+  void print();
+
+  // Observer API
+  void erasingInstr(MachineInstr &MI) override;
+  void createdInstr(MachineInstr &MI) override;
+  void changingInstr(MachineInstr &MI) override;
+  void changedInstr(MachineInstr &MI) override;
+};
+
+class TargetRegisterClass;
+class RegisterBank;
+
+// Simple builder class to easily profile properties about MIs.
+class GISelInstProfileBuilder {
+  FoldingSetNodeID &ID;
+  const MachineRegisterInfo &MRI;
+
+public:
+  GISelInstProfileBuilder(FoldingSetNodeID &ID, const MachineRegisterInfo &MRI)
+      : ID(ID), MRI(MRI) {}
+  // Profiling methods.
+  const GISelInstProfileBuilder &addNodeIDOpcode(unsigned Opc) const;
+  const GISelInstProfileBuilder &addNodeIDRegType(const LLT &Ty) const;
+  const GISelInstProfileBuilder &addNodeIDRegType(const unsigned) const;
+
+  const GISelInstProfileBuilder &
+  addNodeIDRegType(const TargetRegisterClass *RC) const;
+  const GISelInstProfileBuilder &addNodeIDRegType(const RegisterBank *RB) const;
+
+  const GISelInstProfileBuilder &addNodeIDRegNum(unsigned Reg) const;
+
+  const GISelInstProfileBuilder &addNodeIDImmediate(int64_t Imm) const;
+  const GISelInstProfileBuilder &
+  addNodeIDMBB(const MachineBasicBlock *MBB) const;
+
+  const GISelInstProfileBuilder &
+  addNodeIDMachineOperand(const MachineOperand &MO) const;
+
+  const GISelInstProfileBuilder &addNodeIDFlag(unsigned Flag) const;
+  const GISelInstProfileBuilder &addNodeID(const MachineInstr *MI) const;
+};
+
+/// Simple wrapper that does the following.
+/// 1) Lazily evaluate the MachineFunction to compute CSEable instructions.
+/// 2) Allows configuration of which instructions are CSEd through CSEConfig
+/// object. Provides a method called get which takes a CSEConfig object.
+class GISelCSEAnalysisWrapper {
+  GISelCSEInfo Info;
+  MachineFunction *MF = nullptr;
+  bool AlreadyComputed = false;
+
+public:
+  /// Takes a CSEConfig object that defines what opcodes get CSEd.
+  /// If CSEConfig is already set, and the CSE Analysis has been preserved,
+  /// it will not use the new CSEOpt(use Recompute to force using the new
+  /// CSEOpt).
+  GISelCSEInfo &get(std::unique_ptr<CSEConfig> CSEOpt, bool ReCompute = false);
+  void setMF(MachineFunction &MFunc) { MF = &MFunc; }
+  void setComputed(bool Computed) { AlreadyComputed = Computed; }
+  void releaseMemory() { Info.releaseMemory(); }
+};
+
+/// The actual analysis pass wrapper.
+class GISelCSEAnalysisWrapperPass : public MachineFunctionPass {
+  GISelCSEAnalysisWrapper Wrapper;
+
+public:
+  static char ID;
+  GISelCSEAnalysisWrapperPass() : MachineFunctionPass(ID) {
+    initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  const GISelCSEAnalysisWrapper &getCSEWrapper() const { return Wrapper; }
+  GISelCSEAnalysisWrapper &getCSEWrapper() { return Wrapper; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void releaseMemory() override {
+    Wrapper.releaseMemory();
+    Wrapper.setComputed(false);
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
new file mode 100644
index 000000000000..a8fb736ebbb5
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
@@ -0,0 +1,110 @@
+//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.h  --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a version of MachineIRBuilder which CSEs insts within
+/// a MachineBasicBlock.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_CSEMIRBUILDER_H
+#define LLVM_CODEGEN_GLOBALISEL_CSEMIRBUILDER_H
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+
+namespace llvm {
+
+/// Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
+/// Eg usage.
+///
+///
+/// GISelCSEInfo *Info =
+/// &getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEInfo(); CSEMIRBuilder
+/// CB(Builder.getState()); CB.setCSEInfo(Info); auto A = CB.buildConstant(s32,
+/// 42); auto B = CB.buildConstant(s32, 42); assert(A == B); unsigned CReg =
+/// MRI.createGenericVirtualRegister(s32); auto C = CB.buildConstant(CReg, 42);
+/// assert(C->getOpcode() == TargetOpcode::COPY);
+/// Explicitly passing in a register would materialize a copy if possible.
+/// CSEMIRBuilder also does trivial constant folding for binary ops.
+class CSEMIRBuilder : public MachineIRBuilder {
+
+  /// Returns true if A dominates B (within the same basic block).
+  /// Both iterators must be in the same basic block.
+  //
+  // TODO: Another approach for checking dominance is having two iterators and
+  // making them go towards each other until they meet or reach begin/end. Which
+  // approach is better? Should this even change dynamically? For G_CONSTANTS
+  // most of which will be at the top of the BB, the top down approach would be
+  // a better choice. Does IRTranslator placing constants at the beginning still
+  // make sense? Should this change based on Opcode?
+  bool dominates(MachineBasicBlock::const_iterator A,
+                 MachineBasicBlock::const_iterator B) const;
+
+  /// For given ID, find a machineinstr in the CSE Map. If found, check if it
+  /// dominates the current insertion point and if not, move it just before the
+  /// current insertion point and return it. If not found, return Null
+  /// MachineInstrBuilder.
+  MachineInstrBuilder getDominatingInstrForID(FoldingSetNodeID &ID,
+                                              void *&NodeInsertPos);
+  /// Simple check if we can CSE (we have the CSEInfo) or if this Opcode is
+  /// safe to CSE.
+  bool canPerformCSEForOpc(unsigned Opc) const;
+
+  void profileDstOp(const DstOp &Op, GISelInstProfileBuilder &B) const;
+
+  void profileDstOps(ArrayRef<DstOp> Ops, GISelInstProfileBuilder &B) const {
+    for (const DstOp &Op : Ops)
+      profileDstOp(Op, B);
+  }
+
+  void profileSrcOp(const SrcOp &Op, GISelInstProfileBuilder &B) const;
+
+  void profileSrcOps(ArrayRef<SrcOp> Ops, GISelInstProfileBuilder &B) const {
+    for (const SrcOp &Op : Ops)
+      profileSrcOp(Op, B);
+  }
+
+  void profileMBBOpcode(GISelInstProfileBuilder &B, unsigned Opc) const;
+
+  void profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
+                         ArrayRef<SrcOp> SrcOps, Optional<unsigned> Flags,
+                         GISelInstProfileBuilder &B) const;
+
+  // Takes a MachineInstrBuilder and inserts it into the CSEMap using the
+  // NodeInsertPos.
+  MachineInstrBuilder memoizeMI(MachineInstrBuilder MIB, void *NodeInsertPos);
+
+  // If we have can CSE an instruction, but still need to materialize to a VReg,
+  // we emit a copy from the CSE'd inst to the VReg.
+  MachineInstrBuilder generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
+                                               MachineInstrBuilder &MIB);
+
+  // If we have can CSE an instruction, but still need to materialize to a VReg,
+  // check if we can generate copies. It's not possible to return a single MIB,
+  // while emitting copies to multiple vregs.
+  bool checkCopyToDefsPossible(ArrayRef<DstOp> DstOps);
+
+public:
+  // Pull in base class constructors.
+  using MachineIRBuilder::MachineIRBuilder;
+  // Unhide buildInstr
+  MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+                                 ArrayRef<SrcOp> SrcOps,
+                                 Optional<unsigned> Flag = None) override;
+  // Bring in the other overload from the base class.
+  using MachineIRBuilder::buildConstant;
+
+  MachineInstrBuilder buildConstant(const DstOp &Res,
+                                    const ConstantInt &Val) override;
+
+  // Bring in the other overload from the base class.
+  using MachineIRBuilder::buildFConstant;
+  MachineInstrBuilder buildFConstant(const DstOp &Res,
+                                     const ConstantFP &Val) override;
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 58eb412d8c24..ab498e8f070b 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -40,6 +40,7 @@ class Value;
 class CallLowering {
   const TargetLowering *TLI;
 
+  virtual void anchor();
 public:
   struct ArgInfo {
     unsigned Reg;
@@ -108,6 +109,9 @@ public:
     MachineIRBuilder &MIRBuilder;
     MachineRegisterInfo &MRI;
     CCAssignFn *AssignFn;
+
+  private:
+    virtual void anchor();
   };
 
 protected:
@@ -138,12 +142,12 @@ public:
   virtual ~CallLowering() = default;
 
   /// This hook must be implemented to lower outgoing return values, described
-  /// by \p Val, into the specified virtual register \p VReg.
+  /// by \p Val, into the specified virtual registers \p VRegs.
   /// This hook is used by GlobalISel.
   ///
   /// \return True if the lowering succeeds, false otherwise.
-  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder,
-                           const Value *Val, unsigned VReg) const {
+  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                           ArrayRef<unsigned> VRegs) const {
     return false;
   }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
index 36a33deb4a64..b097c7817762 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -21,6 +21,7 @@
 namespace llvm {
 class MachineRegisterInfo;
 class CombinerInfo;
+class GISelCSEInfo;
 class TargetPassConfig;
 class MachineFunction;
 
@@ -28,14 +29,17 @@ class Combiner {
 public:
   Combiner(CombinerInfo &CombinerInfo, const TargetPassConfig *TPC);
 
-  bool combineMachineInstrs(MachineFunction &MF);
+  /// If CSEInfo is not null, then the Combiner will setup observer for
+  /// CSEInfo and instantiate a CSEMIRBuilder. Pass nullptr if CSE is not
+  /// needed.
+  bool combineMachineInstrs(MachineFunction &MF, GISelCSEInfo *CSEInfo);
 
 protected:
   CombinerInfo &CInfo;
 
   MachineRegisterInfo *MRI = nullptr;
   const TargetPassConfig *TPC;
-  MachineIRBuilder Builder;
+  std::unique_ptr<MachineIRBuilder> Builder;
 };
 
 } // End namespace llvm.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5d5b8398452c..6e9ac01c1ee2 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -1,4 +1,4 @@
-//== llvm/CodeGen/GlobalISel/CombinerHelper.h -------------- -*- C++ -*-==//
+//===-- llvm/CodeGen/GlobalISel/CombinerHelper.h --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,21 +20,36 @@
 
 namespace llvm {
 
+class GISelChangeObserver;
 class MachineIRBuilder;
 class MachineRegisterInfo;
 class MachineInstr;
+class MachineOperand;
 
 class CombinerHelper {
   MachineIRBuilder &Builder;
   MachineRegisterInfo &MRI;
+  GISelChangeObserver &Observer;
 
 public:
-  CombinerHelper(MachineIRBuilder &B);
+  CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B);
+
+  /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
+  void replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, unsigned ToReg) const;
+
+  /// Replace a single register operand with a new register and inform the
+  /// observer of the changes.
+  void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
+                        unsigned ToReg) const;
 
   /// If \p MI is COPY, try to combine it.
   /// Returns true if MI changed.
   bool tryCombineCopy(MachineInstr &MI);
 
+  /// If \p MI is extend that consumes the result of a load, try to combine it.
+  /// Returns true if MI changed.
+  bool tryCombineExtendingLoads(MachineInstr &MI);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 1d248547adbf..d21aa3f725d9 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -17,10 +17,12 @@
 #include <cassert>
 namespace llvm {
 
+class GISelChangeObserver;
 class LegalizerInfo;
 class MachineInstr;
 class MachineIRBuilder;
 class MachineRegisterInfo;
+
 // Contains information relevant to enabling/disabling various combines for a
 // pass.
 class CombinerInfo {
@@ -41,7 +43,19 @@ public:
   /// illegal ops that are created.
   bool LegalizeIllegalOps; // TODO: Make use of this.
   const LegalizerInfo *LInfo;
-  virtual bool combine(MachineInstr &MI, MachineIRBuilder &B) const = 0;
+
+  /// Attempt to combine instructions using MI as the root.
+  ///
+  /// Use Observer to report the creation, modification, and erasure of
+  /// instructions. GISelChangeObserver will automatically report certain
+  /// kinds of operations. These operations are:
+  /// * Instructions that are newly inserted into the MachineFunction
+  /// * Instructions that are erased from the MachineFunction.
+  ///
+  /// However, it is important to report instruction modification and this is
+  /// not automatic.
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+                       MachineIRBuilder &B) const = 0;
 };
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
index 8d61f9a68279..220a571b21db 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -15,91 +15,20 @@
 
 namespace llvm {
 
-static Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
-                                         const unsigned Op2,
-                                         const MachineRegisterInfo &MRI) {
-  auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
-  auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
-  if (MaybeOp1Cst && MaybeOp2Cst) {
-    LLT Ty = MRI.getType(Op1);
-    APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
-    APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
-    switch (Opcode) {
-    default:
-      break;
-    case TargetOpcode::G_ADD:
-      return C1 + C2;
-    case TargetOpcode::G_AND:
-      return C1 & C2;
-    case TargetOpcode::G_ASHR:
-      return C1.ashr(C2);
-    case TargetOpcode::G_LSHR:
-      return C1.lshr(C2);
-    case TargetOpcode::G_MUL:
-      return C1 * C2;
-    case TargetOpcode::G_OR:
-      return C1 | C2;
-    case TargetOpcode::G_SHL:
-      return C1 << C2;
-    case TargetOpcode::G_SUB:
-      return C1 - C2;
-    case TargetOpcode::G_XOR:
-      return C1 ^ C2;
-    case TargetOpcode::G_UDIV:
-      if (!C2.getBoolValue())
-        break;
-      return C1.udiv(C2);
-    case TargetOpcode::G_SDIV:
-      if (!C2.getBoolValue())
-        break;
-      return C1.sdiv(C2);
-    case TargetOpcode::G_UREM:
-      if (!C2.getBoolValue())
-        break;
-      return C1.urem(C2);
-    case TargetOpcode::G_SREM:
-      if (!C2.getBoolValue())
-        break;
-      return C1.srem(C2);
-    }
-  }
-  return None;
-}
-
 /// An MIRBuilder which does trivial constant folding of binary ops.
 /// Calls to buildInstr will also try to constant fold binary ops.
-class ConstantFoldingMIRBuilder
-    : public FoldableInstructionsBuilder<ConstantFoldingMIRBuilder> {
+class ConstantFoldingMIRBuilder : public MachineIRBuilder {
 public:
   // Pull in base class constructors.
-  using FoldableInstructionsBuilder<
-      ConstantFoldingMIRBuilder>::FoldableInstructionsBuilder;
-  // Unhide buildInstr
-  using FoldableInstructionsBuilder<ConstantFoldingMIRBuilder>::buildInstr;
+  using MachineIRBuilder::MachineIRBuilder;
 
-  // Implement buildBinaryOp required by FoldableInstructionsBuilder which
-  // tries to constant fold.
-  MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst,
-                                    unsigned Src0, unsigned Src1) {
-    validateBinaryOp(Dst, Src0, Src1);
-    auto MaybeCst = ConstantFoldBinOp(Opcode, Src0, Src1, getMF().getRegInfo());
-    if (MaybeCst)
-      return buildConstant(Dst, MaybeCst->getSExtValue());
-    return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1);
-  }
-
-  template <typename DstTy, typename UseArg1Ty, typename UseArg2Ty>
-  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, UseArg1Ty &&Arg1,
-                                 UseArg2Ty &&Arg2) {
-    unsigned Dst = getDestFromArg(Ty);
-    return buildInstr(Opc, Dst, getRegFromArg(std::forward<UseArg1Ty>(Arg1)),
-                      getRegFromArg(std::forward<UseArg2Ty>(Arg2)));
-  }
+  virtual ~ConstantFoldingMIRBuilder() = default;
 
   // Try to provide an overload for buildInstr for binary ops in order to
   // constant fold.
-  MachineInstrBuilder buildInstr(unsigned Opc, unsigned Dst, unsigned Src0,
-                                 unsigned Src1) {
+  MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+                                 ArrayRef<SrcOp> SrcOps,
+                                 Optional<unsigned> Flags = None) override {
     switch (Opc) {
     default:
       break;
@@ -116,19 +45,18 @@ public:
     case TargetOpcode::G_SDIV:
     case TargetOpcode::G_UREM:
     case TargetOpcode::G_SREM: {
-      return buildBinaryOp(Opc, Dst, Src0, Src1);
+      assert(DstOps.size() == 1 && "Invalid dst ops");
+      assert(SrcOps.size() == 2 && "Invalid src ops");
+      const DstOp &Dst = DstOps[0];
+      const SrcOp &Src0 = SrcOps[0];
+      const SrcOp &Src1 = SrcOps[1];
+      if (auto MaybeCst =
+              ConstantFoldBinOp(Opc, Src0.getReg(), Src1.getReg(), *getMRI()))
+        return buildConstant(Dst, MaybeCst->getSExtValue());
+      break;
     }
     }
-    return buildInstr(Opc).addDef(Dst).addUse(Src0).addUse(Src1);
-  }
-
-  // Fallback implementation of buildInstr.
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
-                                 UseArgsTy &&... Args) {
-    auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
-    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
-    return MIB;
+    return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps);
   }
 };
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
new file mode 100644
index 000000000000..c8e8a7a5a7cb
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -0,0 +1,111 @@
+//===----- llvm/CodeGen/GlobalISel/GISelChangeObserver.h ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This contains common code to allow clients to notify changes to machine
+/// instr.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_GISELCHANGEOBSERVER_H
+#define LLVM_CODEGEN_GLOBALISEL_GISELCHANGEOBSERVER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+class MachineInstr;
+class MachineRegisterInfo;
+
+/// Abstract class that contains various methods for clients to notify about
+/// changes. This should be the preferred way for APIs to notify changes.
+/// Typically calling erasingInstr/createdInstr multiple times should not affect
+/// the result. The observer would likely need to check if it was already
+/// notified earlier (consider using GISelWorkList).
+class GISelChangeObserver {
+  SmallPtrSet<MachineInstr *, 4> ChangingAllUsesOfReg;
+
+public:
+  virtual ~GISelChangeObserver() {}
+
+  /// An instruction is about to be erased.
+  virtual void erasingInstr(MachineInstr &MI) = 0;
+  /// An instruction was created and inserted into the function.
+  virtual void createdInstr(MachineInstr &MI) = 0;
+  /// This instruction is about to be mutated in some way.
+  virtual void changingInstr(MachineInstr &MI) = 0;
+  /// This instruction was mutated in some way.
+  virtual void changedInstr(MachineInstr &MI) = 0;
+
+  /// All the instructions using the given register are being changed.
+  /// For convenience, finishedChangingAllUsesOfReg() will report the completion
+  /// of the changes. The use list may change between this call and
+  /// finishedChangingAllUsesOfReg().
+  void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg);
+  /// All instructions reported as changing by changingAllUsesOfReg() have
+  /// finished being changed.
+  void finishedChangingAllUsesOfReg();
+
+};
+
+/// Simple wrapper observer that takes several observers, and calls
+/// each one for each event. If there are multiple observers (say CSE,
+/// Legalizer, Combiner), it's sufficient to register this to the machine
+/// function as the delegate.
+class GISelObserverWrapper : public MachineFunction::Delegate,
+                             public GISelChangeObserver {
+  SmallVector<GISelChangeObserver *, 4> Observers;
+
+public:
+  GISelObserverWrapper() = default;
+  GISelObserverWrapper(ArrayRef<GISelChangeObserver *> Obs)
+      : Observers(Obs.begin(), Obs.end()) {}
+  // Adds an observer.
+  void addObserver(GISelChangeObserver *O) { Observers.push_back(O); }
+  // Removes an observer from the list and does nothing if observer is not
+  // present.
+  void removeObserver(GISelChangeObserver *O) {
+    auto It = std::find(Observers.begin(), Observers.end(), O);
+    if (It != Observers.end())
+      Observers.erase(It);
+  }
+  // API for Observer.
+  void erasingInstr(MachineInstr &MI) override {
+    for (auto &O : Observers)
+      O->erasingInstr(MI);
+  }
+  void createdInstr(MachineInstr &MI) override {
+    for (auto &O : Observers)
+      O->createdInstr(MI);
+  }
+  void changingInstr(MachineInstr &MI) override {
+    for (auto &O : Observers)
+      O->changingInstr(MI);
+  }
+  void changedInstr(MachineInstr &MI) override {
+    for (auto &O : Observers)
+      O->changedInstr(MI);
+  }
+  // API for MachineFunction::Delegate
+  void MF_HandleInsertion(MachineInstr &MI) override { createdInstr(MI); }
+  void MF_HandleRemoval(MachineInstr &MI) override { erasingInstr(MI); }
+};
+
+/// A simple RAII based CSEInfo installer.
+/// Use this in a scope to install a delegate to the MachineFunction and reset
+/// it at the end of the scope.
+class RAIIDelegateInstaller {
+  MachineFunction &MF;
+  MachineFunction::Delegate *Delegate;
+
+public:
+  RAIIDelegateInstaller(MachineFunction &MF, MachineFunction::Delegate *Del);
+  ~RAIIDelegateInstaller();
+};
+
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index 167905dc9aa1..1571841a208d 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -12,38 +12,42 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/Debug.h"
 
 namespace llvm {
 
 class MachineInstr;
+class MachineFunction;
 
-// Worklist which mostly works similar to InstCombineWorkList, but on MachineInstrs.
-// The main difference with something like a SetVector is that erasing an element doesn't
-// move all elements over one place - instead just nulls out the element of the vector.
-// FIXME: Does it make sense to factor out common code with the instcombinerWorkList?
+// Worklist which mostly works similar to InstCombineWorkList, but on
+// MachineInstrs. The main difference with something like a SetVector is that
+// erasing an element doesn't move all elements over one place - instead just
+// nulls out the element of the vector.
+//
+// FIXME: Does it make sense to factor out common code with the
+// instcombinerWorkList?
 template<unsigned N>
 class GISelWorkList {
-  SmallVector<MachineInstr*, N> Worklist;
-  DenseMap<MachineInstr*, unsigned> WorklistMap;
+  SmallVector<MachineInstr *, N> Worklist;
+  DenseMap<MachineInstr *, unsigned> WorklistMap;
 
 public:
-  GISelWorkList() = default;
+  GISelWorkList() {}
 
   bool empty() const { return WorklistMap.empty(); }
 
   unsigned size() const { return WorklistMap.size(); }
 
-  /// Add - Add the specified instruction to the worklist if it isn't already
-  /// in it.
+  /// Add the specified instruction to the worklist if it isn't already in it.
   void insert(MachineInstr *I) {
-    if (WorklistMap.try_emplace(I, Worklist.size()).second) {
+    if (WorklistMap.try_emplace(I, Worklist.size()).second)
       Worklist.push_back(I);
-    }
   }
 
-  /// Remove - remove I from the worklist if it exists.
-  void remove(MachineInstr *I) {
+  /// Remove I from the worklist if it exists.
+  void remove(const MachineInstr *I) {
     auto It = WorklistMap.find(I);
     if (It == WorklistMap.end()) return; // Not in worklist.
 
@@ -53,6 +57,11 @@ public:
     WorklistMap.erase(It);
   }
 
+  void clear() {
+    Worklist.clear();
+    WorklistMap.clear();
+  }
+
   MachineInstr *pop_back_val() {
     MachineInstr *I;
     do {
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 2498ee933210..d1770bf6e4ce 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -21,11 +21,11 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Types.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/Allocator.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Allocator.h"
 #include <memory>
 #include <utility>
 
@@ -300,6 +300,8 @@ private:
 
   bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder);
 
+  bool translateFNeg(const User &U, MachineIRBuilder &MIRBuilder);
+
   bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) {
     return translateBinaryOp(TargetOpcode::G_ADD, U, MIRBuilder);
   }
@@ -442,11 +444,13 @@ private:
   // I.e., compared to regular MIBuilder, this one also inserts the instruction
   // in the current block, it can creates block, etc., basically a kind of
   // IRBuilder, but for Machine IR.
-  MachineIRBuilder CurBuilder;
+  // CSEMIRBuilder CurBuilder;
+  std::unique_ptr<MachineIRBuilder> CurBuilder;
 
   // Builder set to the entry block (just after ABI lowering instructions). Used
   // as a convenient location for Constants.
-  MachineIRBuilder EntryBuilder;
+  // CSEMIRBuilder EntryBuilder;
+  std::unique_ptr<MachineIRBuilder> EntryBuilder;
 
   // The MachineFunction currently being translated.
   MachineFunction *MF;
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 873587651efd..20bec7650179 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --===========//
+//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,12 +14,14 @@
 
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "legalizer"
+using namespace llvm::MIPatternMatch;
 
 namespace llvm {
 class LegalizationArtifactCombiner {
@@ -36,15 +38,29 @@ public:
                         SmallVectorImpl<MachineInstr *> &DeadInsts) {
     if (MI.getOpcode() != TargetOpcode::G_ANYEXT)
       return false;
-    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
-                                           MI.getOperand(1).getReg(), MRI)) {
+
+    Builder.setInstr(MI);
+    unsigned DstReg = MI.getOperand(0).getReg();
+    unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+    // aext(trunc x) - > aext/copy/trunc x
+    unsigned TruncSrc;
+    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
-      unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned SrcReg = DefMI->getOperand(1).getReg();
-      Builder.setInstr(MI);
-      // We get a copy/trunc/extend depending on the sizes
-      Builder.buildAnyExtOrTrunc(DstReg, SrcReg);
-      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+      return true;
+    }
+
+    // aext([asz]ext x) -> [asz]ext x
+    unsigned ExtSrc;
+    MachineInstr *ExtMI;
+    if (mi_match(SrcReg, MRI,
+                 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
+                                                    m_GSExt(m_Reg(ExtSrc)),
+                                                    m_GZExt(m_Reg(ExtSrc)))))) {
+      Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+      markInstAndDefDead(MI, *ExtMI, DeadInsts);
       return true;
     }
     return tryFoldImplicitDef(MI, DeadInsts);
@@ -55,24 +71,25 @@ public:
 
     if (MI.getOpcode() != TargetOpcode::G_ZEXT)
       return false;
-    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
-                                           MI.getOperand(1).getReg(), MRI)) {
-      unsigned DstReg = MI.getOperand(0).getReg();
+
+    Builder.setInstr(MI);
+    unsigned DstReg = MI.getOperand(0).getReg();
+    unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+    // zext(trunc x) - > and (aext/copy/trunc x), mask
+    unsigned TruncSrc;
+    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLT DstTy = MRI.getType(DstReg);
       if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
           isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
         return false;
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
-      Builder.setInstr(MI);
-      unsigned ZExtSrc = MI.getOperand(1).getReg();
-      LLT ZExtSrcTy = MRI.getType(ZExtSrc);
-      APInt Mask = APInt::getAllOnesValue(ZExtSrcTy.getSizeInBits());
-      auto MaskCstMIB = Builder.buildConstant(DstTy, Mask.getZExtValue());
-      unsigned TruncSrc = DefMI->getOperand(1).getReg();
-      // We get a copy/trunc/extend depending on the sizes
-      auto SrcCopyOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
-      Builder.buildAnd(DstReg, SrcCopyOrTrunc, MaskCstMIB);
-      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      LLT SrcTy = MRI.getType(SrcReg);
+      APInt Mask = APInt::getAllOnesValue(SrcTy.getSizeInBits());
+      auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue());
+      Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
+                       MIBMask);
+      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
     }
     return tryFoldImplicitDef(MI, DeadInsts);
@@ -83,33 +100,34 @@ public:
 
     if (MI.getOpcode() != TargetOpcode::G_SEXT)
       return false;
-    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
-                                           MI.getOperand(1).getReg(), MRI)) {
-      unsigned DstReg = MI.getOperand(0).getReg();
+
+    Builder.setInstr(MI);
+    unsigned DstReg = MI.getOperand(0).getReg();
+    unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+
+    // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c
+    unsigned TruncSrc;
+    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLT DstTy = MRI.getType(DstReg);
       if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) ||
           isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) ||
           isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
         return false;
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
-      Builder.setInstr(MI);
-      unsigned SExtSrc = MI.getOperand(1).getReg();
-      LLT SExtSrcTy = MRI.getType(SExtSrc);
-      unsigned SizeDiff = DstTy.getSizeInBits() - SExtSrcTy.getSizeInBits();
-      auto SizeDiffMIB = Builder.buildConstant(DstTy, SizeDiff);
-      unsigned TruncSrcReg = DefMI->getOperand(1).getReg();
-      // We get a copy/trunc/extend depending on the sizes
-      auto SrcCopyExtOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrcReg);
-      auto ShlMIB = Builder.buildInstr(TargetOpcode::G_SHL, DstTy,
-                                       SrcCopyExtOrTrunc, SizeDiffMIB);
-      Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, ShlMIB, SizeDiffMIB);
-      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      LLT SrcTy = MRI.getType(SrcReg);
+      unsigned ShAmt = DstTy.getSizeInBits() - SrcTy.getSizeInBits();
+      auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt);
+      auto MIBShl = Builder.buildInstr(
+          TargetOpcode::G_SHL, {DstTy},
+          {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt});
+      Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt});
+      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
     }
     return tryFoldImplicitDef(MI, DeadInsts);
   }
 
-  /// Try to fold sb = EXTEND (G_IMPLICIT_DEF sa) -> sb = G_IMPLICIT_DEF
+  /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
   bool tryFoldImplicitDef(MachineInstr &MI,
                           SmallVectorImpl<MachineInstr *> &DeadInsts) {
     unsigned Opcode = MI.getOpcode();
@@ -119,13 +137,25 @@ public:
 
     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
                                            MI.getOperand(1).getReg(), MRI)) {
+      Builder.setInstr(MI);
       unsigned DstReg = MI.getOperand(0).getReg();
       LLT DstTy = MRI.getType(DstReg);
-      if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
-        return false;
-      LLVM_DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;);
-      Builder.setInstr(MI);
-      Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg);
+
+      if (Opcode == TargetOpcode::G_ANYEXT) {
+        // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
+        if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
+          return false;
+        LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
+        Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
+      } else {
+        // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
+        // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
+        if (isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+          return false;
+        LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
+        Builder.buildConstant(DstReg, 0);
+      }
+
       markInstAndDefDead(MI, *DefMI, DeadInsts);
       return true;
     }
@@ -139,8 +169,20 @@ public:
       return false;
 
     unsigned NumDefs = MI.getNumOperands() - 1;
-    MachineInstr *MergeI = getOpcodeDef(TargetOpcode::G_MERGE_VALUES,
-                                        MI.getOperand(NumDefs).getReg(), MRI);
+
+    unsigned MergingOpcode;
+    LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
+    LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
+    if (OpTy.isVector() && DestTy.isVector())
+      MergingOpcode = TargetOpcode::G_CONCAT_VECTORS;
+    else if (OpTy.isVector() && !DestTy.isVector())
+      MergingOpcode = TargetOpcode::G_BUILD_VECTOR;
+    else
+      MergingOpcode = TargetOpcode::G_MERGE_VALUES;
+
+    MachineInstr *MergeI =
+        getOpcodeDef(MergingOpcode, MI.getOperand(NumDefs).getReg(), MRI);
+
     if (!MergeI)
       return false;
 
@@ -277,6 +319,19 @@ private:
     auto Step = LI.getAction(Query);
     return Step.Action == Unsupported || Step.Action == NotFound;
   }
+
+  /// Looks through copy instructions and returns the actual
+  /// source register.
+  unsigned lookThroughCopyInstrs(unsigned Reg) {
+    unsigned TmpReg;
+    while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
+      if (MRI.getType(TmpReg).isValid())
+        Reg = TmpReg;
+      else
+        break;
+    }
+    return Reg;
+  }
 };
 
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index d122e67b87b8..9b4ecf9284e3 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -32,6 +32,7 @@ namespace llvm {
 class LegalizerInfo;
 class Legalizer;
 class MachineRegisterInfo;
+class GISelChangeObserver;
 
 class LegalizerHelper {
 public:
@@ -48,7 +49,10 @@ public:
     UnableToLegalize,
   };
 
-  LegalizerHelper(MachineFunction &MF);
+  LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer,
+                  MachineIRBuilder &B);
+  LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
+                  GISelChangeObserver &Observer, MachineIRBuilder &B);
 
   /// Replace \p MI by a sequence of legal instructions that can implement the
   /// same operation. Note that this means \p MI may be deleted, so any iterator
@@ -87,7 +91,7 @@ public:
 
   /// Expose MIRBuilder so clients can set their own RecordInsertInstruction
   /// functions
-  MachineIRBuilder MIRBuilder;
+  MachineIRBuilder &MIRBuilder;
 
   /// Expose LegalizerInfo so the clients can re-use.
   const LegalizerInfo &getLegalizerInfo() const { return LI; }
@@ -112,8 +116,12 @@ private:
   void extractParts(unsigned Reg, LLT Ty, int NumParts,
                     SmallVectorImpl<unsigned> &VRegs);
 
+  LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
   MachineRegisterInfo &MRI;
   const LegalizerInfo &LI;
+  /// To keep track of changes made by the LegalizerHelper.
+  GISelChangeObserver &Observer;
 };
 
 /// Helper function that creates the given libcall.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index a8c26082f221..13776dd3e87d 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -39,6 +39,7 @@ class MachineInstr;
 class MachineIRBuilder;
 class MachineRegisterInfo;
 class MCInstrInfo;
+class GISelChangeObserver;
 
 namespace LegalizeActions {
 enum LegalizeAction : std::uint8_t {
@@ -121,7 +122,7 @@ struct LegalityQuery {
   ArrayRef<LLT> Types;
 
   struct MemDesc {
-    uint64_t Size;
+    uint64_t SizeInBits;
     AtomicOrdering Ordering;
   };
 
@@ -651,6 +652,20 @@ public:
     return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
   }
 
+  /// Widen the scalar to match the size of another.
+  LegalizeRuleSet &minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx) {
+    typeIdx(TypeIdx);
+    return widenScalarIf(
+        [=](const LegalityQuery &Query) {
+          return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
+                 Query.Types[TypeIdx].getSizeInBits();
+        },
+        [=](const LegalityQuery &Query) {
+          return std::make_pair(TypeIdx,
+                                Query.Types[LargeTypeIdx].getElementType());
+        });
+  }
+
   /// Add more elements to the vector to reach the next power of two.
   /// No effect if the type is not a vector or the element count is a power of
   /// two.
@@ -693,6 +708,8 @@ public:
         },
         [=](const LegalityQuery &Query) {
           LLT VecTy = Query.Types[TypeIdx];
+          if (MaxElements == 1)
+            return std::make_pair(TypeIdx, VecTy.getElementType());
           return std::make_pair(
               TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits()));
         });
@@ -947,9 +964,9 @@ public:
 
   bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
 
-  virtual bool legalizeCustom(MachineInstr &MI,
-                              MachineRegisterInfo &MRI,
-                              MachineIRBuilder &MIRBuilder) const;
+  virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              MachineIRBuilder &MIRBuilder,
+                              GISelChangeObserver &Observer) const;
 
 private:
   /// Determine what action should be taken to legalize the given generic
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index ac1673de5f3f..37de8f030410 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
 #define LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
 
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/Types.h"
 
 #include "llvm/CodeGen/LowLevelType.h"
@@ -30,6 +31,7 @@ namespace llvm {
 class MachineFunction;
 class MachineInstr;
 class TargetInstrInfo;
+class GISelChangeObserver;
 
 /// Class which stores all the state required in a MachineIRBuilder.
 /// Since MachineIRBuilders will only store state in this object, it allows
@@ -50,62 +52,177 @@ struct MachineIRBuilderState {
   MachineBasicBlock::iterator II;
   /// @}
 
-  std::function<void(MachineInstr *)> InsertedInstr;
+  GISelChangeObserver *Observer;
+
+  GISelCSEInfo *CSEInfo;
 };
 
-/// Helper class to build MachineInstr.
-/// It keeps internally the insertion point and debug location for all
-/// the new instructions we want to create.
-/// This information can be modify via the related setters.
-class MachineIRBuilderBase {
+class DstOp {
+  union {
+    LLT LLTTy;
+    unsigned Reg;
+    const TargetRegisterClass *RC;
+  };
 
-  MachineIRBuilderState State;
-  const TargetInstrInfo &getTII() {
-    assert(State.TII && "TargetInstrInfo is not set");
-    return *State.TII;
+public:
+  enum class DstType { Ty_LLT, Ty_Reg, Ty_RC };
+  DstOp(unsigned R) : Reg(R), Ty(DstType::Ty_Reg) {}
+  DstOp(const LLT &T) : LLTTy(T), Ty(DstType::Ty_LLT) {}
+  DstOp(const TargetRegisterClass *TRC) : RC(TRC), Ty(DstType::Ty_RC) {}
+
+  void addDefToMIB(MachineRegisterInfo &MRI, MachineInstrBuilder &MIB) const {
+    switch (Ty) {
+    case DstType::Ty_Reg:
+      MIB.addDef(Reg);
+      break;
+    case DstType::Ty_LLT:
+      MIB.addDef(MRI.createGenericVirtualRegister(LLTTy));
+      break;
+    case DstType::Ty_RC:
+      MIB.addDef(MRI.createVirtualRegister(RC));
+      break;
+    }
   }
 
-  void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend);
+  LLT getLLTTy(const MachineRegisterInfo &MRI) const {
+    switch (Ty) {
+    case DstType::Ty_RC:
+      return LLT{};
+    case DstType::Ty_LLT:
+      return LLTTy;
+    case DstType::Ty_Reg:
+      return MRI.getType(Reg);
+    }
+    llvm_unreachable("Unrecognised DstOp::DstType enum");
+  }
 
-protected:
-  unsigned getDestFromArg(unsigned Reg) { return Reg; }
-  unsigned getDestFromArg(LLT Ty) {
-    return getMF().getRegInfo().createGenericVirtualRegister(Ty);
+  unsigned getReg() const {
+    assert(Ty == DstType::Ty_Reg && "Not a register");
+    return Reg;
   }
-  unsigned getDestFromArg(const TargetRegisterClass *RC) {
-    return getMF().getRegInfo().createVirtualRegister(RC);
+
+  const TargetRegisterClass *getRegClass() const {
+    switch (Ty) {
+    case DstType::Ty_RC:
+      return RC;
+    default:
+      llvm_unreachable("Not a RC Operand");
+    }
   }
 
-  void addUseFromArg(MachineInstrBuilder &MIB, unsigned Reg) {
-    MIB.addUse(Reg);
+  DstType getDstOpKind() const { return Ty; }
+
+private:
+  DstType Ty;
+};
+
+class SrcOp {
+  union {
+    MachineInstrBuilder SrcMIB;
+    unsigned Reg;
+    CmpInst::Predicate Pred;
+  };
+
+public:
+  enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate };
+  SrcOp(unsigned R) : Reg(R), Ty(SrcType::Ty_Reg) {}
+  SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {}
+  SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {}
+
+  void addSrcToMIB(MachineInstrBuilder &MIB) const {
+    switch (Ty) {
+    case SrcType::Ty_Predicate:
+      MIB.addPredicate(Pred);
+      break;
+    case SrcType::Ty_Reg:
+      MIB.addUse(Reg);
+      break;
+    case SrcType::Ty_MIB:
+      MIB.addUse(SrcMIB->getOperand(0).getReg());
+      break;
+    }
   }
 
-  void addUseFromArg(MachineInstrBuilder &MIB, const MachineInstrBuilder &UseMIB) {
-    MIB.addUse(UseMIB->getOperand(0).getReg());
+  LLT getLLTTy(const MachineRegisterInfo &MRI) const {
+    switch (Ty) {
+    case SrcType::Ty_Predicate:
+      llvm_unreachable("Not a register operand");
+    case SrcType::Ty_Reg:
+      return MRI.getType(Reg);
+    case SrcType::Ty_MIB:
+      return MRI.getType(SrcMIB->getOperand(0).getReg());
+    }
+    llvm_unreachable("Unrecognised SrcOp::SrcType enum");
   }
 
-  void addUsesFromArgs(MachineInstrBuilder &MIB) { }
-  template<typename UseArgTy, typename ... UseArgsTy>
-  void addUsesFromArgs(MachineInstrBuilder &MIB, UseArgTy &&Arg1, UseArgsTy &&... Args) {
-    addUseFromArg(MIB, Arg1);
-    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
+  unsigned getReg() const {
+    switch (Ty) {
+    case SrcType::Ty_Predicate:
+      llvm_unreachable("Not a register operand");
+    case SrcType::Ty_Reg:
+      return Reg;
+    case SrcType::Ty_MIB:
+      return SrcMIB->getOperand(0).getReg();
+    }
+    llvm_unreachable("Unrecognised SrcOp::SrcType enum");
   }
-  unsigned getRegFromArg(unsigned Reg) { return Reg; }
-  unsigned getRegFromArg(const MachineInstrBuilder &MIB) {
-    return MIB->getOperand(0).getReg();
+
+  CmpInst::Predicate getPredicate() const {
+    switch (Ty) {
+    case SrcType::Ty_Predicate:
+      return Pred;
+    default:
+      llvm_unreachable("Not a register operand");
+    }
   }
 
-  void validateBinaryOp(unsigned Res, unsigned Op0, unsigned Op1);
+  SrcType getSrcOpKind() const { return Ty; }
+
+private:
+  SrcType Ty;
+};
+
+class FlagsOp {
+  Optional<unsigned> Flags;
+
+public:
+  explicit FlagsOp(unsigned F) : Flags(F) {}
+  FlagsOp() : Flags(None) {}
+  Optional<unsigned> getFlags() const { return Flags; }
+};
+/// Helper class to build MachineInstr.
+/// It keeps internally the insertion point and debug location for all
+/// the new instructions we want to create.
+/// This information can be modify via the related setters.
+class MachineIRBuilder {
+
+  MachineIRBuilderState State;
+
+protected:
+  void validateTruncExt(const LLT &Dst, const LLT &Src, bool IsExtend);
+
+  void validateBinaryOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
+
+  void validateSelectOp(const LLT &ResTy, const LLT &TstTy, const LLT &Op0Ty,
+                        const LLT &Op1Ty);
+  void recordInsertion(MachineInstr *MI) const;
 
 public:
   /// Some constructors for easy use.
-  MachineIRBuilderBase() = default;
-  MachineIRBuilderBase(MachineFunction &MF) { setMF(MF); }
-  MachineIRBuilderBase(MachineInstr &MI) : MachineIRBuilderBase(*MI.getMF()) {
+  MachineIRBuilder() = default;
+  MachineIRBuilder(MachineFunction &MF) { setMF(MF); }
+  MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) {
     setInstr(MI);
   }
 
-  MachineIRBuilderBase(const MachineIRBuilderState &BState) : State(BState) {}
+  virtual ~MachineIRBuilder() = default;
+
+  MachineIRBuilder(const MachineIRBuilderState &BState) : State(BState) {}
+
+  const TargetInstrInfo &getTII() {
+    assert(State.TII && "TargetInstrInfo is not set");
+    return *State.TII;
+  }
 
   /// Getter for the function we currently build.
   MachineFunction &getMF() {
@@ -118,16 +235,25 @@ public:
 
   /// Getter for MRI
   MachineRegisterInfo *getMRI() { return State.MRI; }
+  const MachineRegisterInfo *getMRI() const { return State.MRI; }
 
   /// Getter for the State
   MachineIRBuilderState &getState() { return State; }
 
   /// Getter for the basic block we currently build.
-  MachineBasicBlock &getMBB() {
+  const MachineBasicBlock &getMBB() const {
     assert(State.MBB && "MachineBasicBlock is not set");
     return *State.MBB;
   }
 
+  MachineBasicBlock &getMBB() {
+    return const_cast<MachineBasicBlock &>(
+        const_cast<const MachineIRBuilder *>(this)->getMBB());
+  }
+
+  GISelCSEInfo *getCSEInfo() { return State.CSEInfo; }
+  const GISelCSEInfo *getCSEInfo() const { return State.CSEInfo; }
+
   /// Current insertion point for new instructions.
   MachineBasicBlock::iterator getInsertPt() { return State.II; }
 
@@ -137,10 +263,12 @@ public:
   void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II);
   /// @}
 
+  void setCSEInfo(GISelCSEInfo *Info);
+
   /// \name Setters for the insertion point.
   /// @{
   /// Set the MachineFunction where to build instructions.
-  void setMF(MachineFunction &);
+  void setMF(MachineFunction &MF);
 
   /// Set the insertion point to the  end of \p MBB.
   /// \pre \p MBB must be contained by getMF().
@@ -151,12 +279,8 @@ public:
   void setInstr(MachineInstr &MI);
   /// @}
 
-  /// \name Control where instructions we create are recorded (typically for
-  /// visiting again later during legalization).
-  /// @{
-  void recordInsertion(MachineInstr *InsertedInstr) const;
-  void recordInsertions(std::function<void(MachineInstr *)> InsertedInstr);
-  void stopRecordingInsertions();
+  void setChangeObserver(GISelChangeObserver &Observer);
+  void stopObservingChanges();
   /// @}
 
   /// Set the debug location to \p DL for all the next build instructions.
@@ -208,6 +332,10 @@ public:
                                          const MDNode *Variable,
                                          const MDNode *Expr);
 
+  /// Build and insert a DBG_LABEL instructions specifying that \p Label is
+  /// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label".
+  MachineInstrBuilder buildDbgLabel(const MDNode *Label);
+
   /// Build and insert \p Res = G_FRAME_INDEX \p Idx
   ///
   /// G_FRAME_INDEX materializes the address of an alloca value or other
@@ -296,9 +424,9 @@ public:
   ///      registers with the same scalar type (typically s1)
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0,
-                                 unsigned Op1, unsigned CarryIn);
-
+  MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut,
+                                 const SrcOp &Op0, const SrcOp &Op1,
+                                 const SrcOp &CarryIn);
 
   /// Build and insert \p Res = G_ANYEXT \p Op0
   ///
@@ -314,11 +442,7 @@ public:
   ///
   /// \return The newly created instruction.
 
-  MachineInstrBuilder buildAnyExt(unsigned Res, unsigned Op);
-  template <typename DstType, typename ArgType>
-  MachineInstrBuilder buildAnyExt(DstType &&Res, ArgType &&Arg) {
-    return buildAnyExt(getDestFromArg(Res), getRegFromArg(Arg));
-  }
+  MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert \p Res = G_SEXT \p Op
   ///
@@ -332,11 +456,7 @@ public:
   /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
-  template <typename DstType, typename ArgType>
-  MachineInstrBuilder buildSExt(DstType &&Res, ArgType &&Arg) {
-    return buildSExt(getDestFromArg(Res), getRegFromArg(Arg));
-  }
-  MachineInstrBuilder buildSExt(unsigned Res, unsigned Op);
+  MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert \p Res = G_ZEXT \p Op
   ///
@@ -350,11 +470,7 @@ public:
   /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
-  template <typename DstType, typename ArgType>
-  MachineInstrBuilder buildZExt(DstType &&Res, ArgType &&Arg) {
-    return buildZExt(getDestFromArg(Res), getRegFromArg(Arg));
-  }
-  MachineInstrBuilder buildZExt(unsigned Res, unsigned Op);
+  MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
   /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
@@ -364,11 +480,7 @@ public:
   /// \pre \p Op must be a generic virtual register with scalar or vector type.
   ///
   /// \return The newly created instruction.
-  template <typename DstTy, typename UseArgTy>
-  MachineInstrBuilder buildSExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
-    return buildSExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
-  }
-  MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op);
+  MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
   /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
@@ -378,11 +490,7 @@ public:
   /// \pre \p Op must be a generic virtual register with scalar or vector type.
   ///
   /// \return The newly created instruction.
-  template <typename DstTy, typename UseArgTy>
-  MachineInstrBuilder buildZExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
-    return buildZExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
-  }
-  MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op);
+  MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op);
 
   // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or
   /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
@@ -392,11 +500,7 @@ public:
   /// \pre \p Op must be a generic virtual register with scalar or vector type.
   ///
   /// \return The newly created instruction.
-  template <typename DstTy, typename UseArgTy>
-  MachineInstrBuilder buildAnyExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
-    return buildAnyExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
-  }
-  MachineInstrBuilder buildAnyExtOrTrunc(unsigned Res, unsigned Op);
+  MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p
   /// Op, or \p Res = COPY \p Op depending on the differing sizes of \p Res and
@@ -407,15 +511,11 @@ public:
   /// \pre \p Op must be a generic virtual register with scalar or vector type.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, unsigned Res,
-                                      unsigned Op);
+  MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res,
+                                      const SrcOp &Op);
 
   /// Build and insert an appropriate cast between two registers of equal size.
-  template <typename DstType, typename ArgType>
-  MachineInstrBuilder buildCast(DstType &&Res, ArgType &&Arg) {
-    return buildCast(getDestFromArg(Res), getRegFromArg(Arg));
-  }
-  MachineInstrBuilder buildCast(unsigned Dst, unsigned Src);
+  MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src);
 
   /// Build and insert G_BR \p Dest
   ///
@@ -460,7 +560,8 @@ public:
   ///      type.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildConstant(unsigned Res, const ConstantInt &Val);
+  virtual MachineInstrBuilder buildConstant(const DstOp &Res,
+                                            const ConstantInt &Val);
 
   /// Build and insert \p Res = G_CONSTANT \p Val
   ///
@@ -470,12 +571,8 @@ public:
   /// \pre \p Res must be a generic virtual register with scalar type.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildConstant(unsigned Res, int64_t Val);
+  MachineInstrBuilder buildConstant(const DstOp &Res, int64_t Val);
 
-  template <typename DstType>
-  MachineInstrBuilder buildConstant(DstType &&Res, int64_t Val) {
-    return buildConstant(getDestFromArg(Res), Val);
-  }
   /// Build and insert \p Res = G_FCONSTANT \p Val
   ///
   /// G_FCONSTANT is a floating-point constant with the specified size and
@@ -485,17 +582,10 @@ public:
   /// \pre \p Res must be a generic virtual register with scalar type.
   ///
   /// \return The newly created instruction.
-  template <typename DstType>
-  MachineInstrBuilder buildFConstant(DstType &&Res, const ConstantFP &Val) {
-    return buildFConstant(getDestFromArg(Res), Val);
-  }
-  MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val);
+  virtual MachineInstrBuilder buildFConstant(const DstOp &Res,
+                                             const ConstantFP &Val);
 
-  template <typename DstType>
-  MachineInstrBuilder buildFConstant(DstType &&Res, double Val) {
-    return buildFConstant(getDestFromArg(Res), Val);
-  }
-  MachineInstrBuilder buildFConstant(unsigned Res, double Val);
+  MachineInstrBuilder buildFConstant(const DstOp &Res, double Val);
 
   /// Build and insert \p Res = COPY Op
   ///
@@ -504,11 +594,7 @@ public:
   /// \pre setBasicBlock or setMI must have been called.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildCopy(unsigned Res, unsigned Op);
-  template <typename DstType, typename SrcType>
-  MachineInstrBuilder buildCopy(DstType &&Res, SrcType &&Src) {
-    return buildCopy(getDestFromArg(Res), getRegFromArg(Src));
-  }
+  MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert `Res = G_LOAD Addr, MMO`.
   ///
@@ -555,10 +641,7 @@ public:
   MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index);
 
   /// Build and insert \p Res = IMPLICIT_DEF.
-  template <typename DstType> MachineInstrBuilder buildUndef(DstType &&Res) {
-    return buildUndef(getDestFromArg(Res));
-  }
-  MachineInstrBuilder buildUndef(unsigned Res);
+  MachineInstrBuilder buildUndef(const DstOp &Res);
 
   /// Build and insert instructions to put \p Ops together at the specified p
   /// Indices to form a larger register.
@@ -587,7 +670,7 @@ public:
   /// \pre The type of all \p Ops registers must be identical.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildMerge(unsigned Res, ArrayRef<unsigned> Ops);
+  MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<unsigned> Ops);
 
   /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
   ///
@@ -599,7 +682,50 @@ public:
   /// \pre The type of all \p Res registers must be identical.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildUnmerge(ArrayRef<unsigned> Res, unsigned Op);
+  MachineInstrBuilder buildUnmerge(ArrayRef<LLT> Res, const SrcOp &Op);
+  MachineInstrBuilder buildUnmerge(ArrayRef<unsigned> Res, const SrcOp &Op);
+
+  /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ...
+  ///
+  /// G_BUILD_VECTOR creates a vector value from multiple scalar registers.
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre The entire register \p Res (and no more) must be covered by the
+  ///      input scalar registers.
+  /// \pre The type of all \p Ops registers must be identical.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildBuildVector(const DstOp &Res,
+                                       ArrayRef<unsigned> Ops);
+
+  /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ...
+  ///
+  /// G_BUILD_VECTOR_TRUNC creates a vector value from multiple scalar registers
+  /// which have types larger than the destination vector element type, and
+  /// truncates the values to fit.
+  ///
+  /// If the operands given are already the same size as the vector elt type,
+  /// then this method will instead create a G_BUILD_VECTOR instruction.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre The type of all \p Ops registers must be identical.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
+                                            ArrayRef<unsigned> Ops);
+
+  /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
+  ///
+  /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more
+  /// vectors.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre The entire register \p Res (and no more) must be covered by the input
+  ///      registers.
+  /// \pre The type of all source operands must be identical.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildConcatVectors(const DstOp &Res,
+                                         ArrayRef<unsigned> Ops);
 
   MachineInstrBuilder buildInsert(unsigned Res, unsigned Src,
                                   unsigned Op, unsigned Index);
@@ -627,11 +753,7 @@ public:
   /// \pre \p Res must be smaller than \p Op
   ///
   /// \return The newly created instruction.
-  template <typename DstType, typename SrcType>
-  MachineInstrBuilder buildFPTrunc(DstType &&Res, SrcType &&Src) {
-    return buildFPTrunc(getDestFromArg(Res), getRegFromArg(Src));
-  }
-  MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op);
+  MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert \p Res = G_TRUNC \p Op
   ///
@@ -644,11 +766,7 @@ public:
   /// \pre \p Res must be smaller than \p Op
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildTrunc(unsigned Res, unsigned Op);
-  template <typename DstType, typename SrcType>
-  MachineInstrBuilder buildTrunc(DstType &&Res, SrcType &&Src) {
-    return buildTrunc(getDestFromArg(Res), getRegFromArg(Src));
-  }
+  MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op);
 
   /// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1
   ///
@@ -662,8 +780,8 @@ public:
   /// \pre \p Pred must be an integer predicate.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildICmp(CmpInst::Predicate Pred,
-                                unsigned Res, unsigned Op0, unsigned Op1);
+  MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res,
+                                const SrcOp &Op0, const SrcOp &Op1);
 
   /// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1
   ///
@@ -677,8 +795,8 @@ public:
   /// \pre \p Pred must be a floating-point predicate.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred,
-                                unsigned Res, unsigned Op0, unsigned Op1);
+  MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res,
+                                const SrcOp &Op0, const SrcOp &Op1);
 
   /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
   ///
@@ -690,8 +808,8 @@ public:
   ///      elements as the other parameters.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst,
-                                  unsigned Op0, unsigned Op1);
+  MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst,
+                                  const SrcOp &Op0, const SrcOp &Op1);
 
   /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
   /// \p Elt, \p Idx
@@ -703,8 +821,10 @@ public:
   ///      with scalar type.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildInsertVectorElement(unsigned Res, unsigned Val,
-                                               unsigned Elt, unsigned Idx);
+  MachineInstrBuilder buildInsertVectorElement(const DstOp &Res,
+                                               const SrcOp &Val,
+                                               const SrcOp &Elt,
+                                               const SrcOp &Idx);
 
   /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
   ///
@@ -714,8 +834,9 @@ public:
   /// \pre \p Idx must be a generic virtual register with scalar type.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val,
-                                                unsigned Idx);
+  MachineInstrBuilder buildExtractVectorElement(const DstOp &Res,
+                                                const SrcOp &Val,
+                                                const SrcOp &Idx);
 
   /// Build and insert `OldValRes<def>, SuccessRes<def> =
   /// G_ATOMIC_CMPXCHG_WITH_SUCCESS Addr, CmpVal, NewVal, MMO`.
@@ -952,19 +1073,7 @@ public:
   ///
   /// \return The newly created instruction.
   MachineInstrBuilder buildBlockAddress(unsigned Res, const BlockAddress *BA);
-};
-
-/// A CRTP class that contains methods for building instructions that can
-/// be constant folded. MachineIRBuilders that want to inherit from this will
-/// need to implement buildBinaryOp (for constant folding binary ops).
-/// Alternatively, they can implement buildInstr(Opc, Dst, Uses...) to perform
-/// additional folding for Opc.
-template <typename Base>
-class FoldableInstructionsBuilder : public MachineIRBuilderBase {
-  Base &base() { return static_cast<Base &>(*this); }
 
-public:
-  using MachineIRBuilderBase::MachineIRBuilderBase;
   /// Build and insert \p Res = G_ADD \p Op0, \p Op1
   ///
   /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
@@ -976,13 +1085,10 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
 
-  MachineInstrBuilder buildAdd(unsigned Dst, unsigned Src0, unsigned Src1) {
-    return base().buildBinaryOp(TargetOpcode::G_ADD, Dst, Src0, Src1);
-  }
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
-    unsigned Res = base().getDestFromArg(Ty);
-    return base().buildAdd(Res, (base().getRegFromArg(UseArgs))...);
+  MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1,
+                               Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_ADD, {Dst}, {Src0, Src1}, Flags);
   }
 
   /// Build and insert \p Res = G_SUB \p Op0, \p Op1
@@ -996,13 +1102,10 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
 
-  MachineInstrBuilder buildSub(unsigned Dst, unsigned Src0, unsigned Src1) {
-    return base().buildBinaryOp(TargetOpcode::G_SUB, Dst, Src0, Src1);
-  }
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildSub(DstTy &&Ty, UseArgsTy &&... UseArgs) {
-    unsigned Res = base().getDestFromArg(Ty);
-    return base().buildSub(Res, (base().getRegFromArg(UseArgs))...);
+  MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1,
+                               Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_SUB, {Dst}, {Src0, Src1}, Flags);
   }
 
   /// Build and insert \p Res = G_MUL \p Op0, \p Op1
@@ -1015,13 +1118,10 @@ public:
   ///      with the same (scalar or vector) type).
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildMul(unsigned Dst, unsigned Src0, unsigned Src1) {
-    return base().buildBinaryOp(TargetOpcode::G_MUL, Dst, Src0, Src1);
-  }
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildMul(DstTy &&Ty, UseArgsTy &&... UseArgs) {
-    unsigned Res = base().getDestFromArg(Ty);
-    return base().buildMul(Res, (base().getRegFromArg(UseArgs))...);
+  MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1,
+                               Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags);
   }
 
   /// Build and insert \p Res = G_AND \p Op0, \p Op1
@@ -1035,13 +1135,9 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
 
-  MachineInstrBuilder buildAnd(unsigned Dst, unsigned Src0, unsigned Src1) {
-    return base().buildBinaryOp(TargetOpcode::G_AND, Dst, Src0, Src1);
-  }
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildAnd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
-    unsigned Res = base().getDestFromArg(Ty);
-    return base().buildAnd(Res, (base().getRegFromArg(UseArgs))...);
+  MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_AND, {Dst}, {Src0, Src1});
   }
 
   /// Build and insert \p Res = G_OR \p Op0, \p Op1
@@ -1054,39 +1150,14 @@ public:
   ///      with the same (scalar or vector) type).
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildOr(unsigned Dst, unsigned Src0, unsigned Src1) {
-    return base().buildBinaryOp(TargetOpcode::G_OR, Dst, Src0, Src1);
-  }
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildOr(DstTy &&Ty, UseArgsTy &&... UseArgs) {
-    unsigned Res = base().getDestFromArg(Ty);
-    return base().buildOr(Res, (base().getRegFromArg(UseArgs))...);
+  MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0,
+                              const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_OR, {Dst}, {Src0, Src1});
   }
-};
 
-class MachineIRBuilder : public FoldableInstructionsBuilder<MachineIRBuilder> {
-public:
-  using FoldableInstructionsBuilder<
-      MachineIRBuilder>::FoldableInstructionsBuilder;
-  MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst,
-                                    unsigned Src0, unsigned Src1) {
-    validateBinaryOp(Dst, Src0, Src1);
-    return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1);
-  }
-  using FoldableInstructionsBuilder<MachineIRBuilder>::buildInstr;
-  /// DAG like Generic method for building arbitrary instructions as above.
-  /// \Opc opcode for the instruction.
-  /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst
-  /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder)
-  /// Uses of type MachineInstrBuilder will perform
-  /// getOperand(0).getReg() to convert to register.
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
-                                 UseArgsTy &&... Args) {
-    auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
-    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
-    return MIB;
-  }
+  virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
+                                         ArrayRef<SrcOp> SrcOps,
+                                         Optional<unsigned> Flags = None);
 };
 
 } // End namespace llvm.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index 82fd7eddb68a..c33b32b2db40 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -103,8 +103,8 @@ public:
   /// Currently the TableGen-like file would look like:
   /// \code
   /// PartialMapping[] = {
-  /// /*32-bit add*/ {0, 32, GPR},
-  /// /*2x32-bit add*/ {0, 32, GPR}, {0, 32, GPR}, // <-- Same entry 3x
+  /// /*32-bit add*/    {0, 32, GPR}, // Scalar entry repeated for first vec elt.
+  /// /*2x32-bit add*/  {0, 32, GPR}, {32, 32, GPR},
   /// /*<2x32-bit> vadd {0, 64, VPR}
   /// }; // PartialMapping duplicated.
   ///
@@ -118,14 +118,15 @@ public:
   /// With the array of pointer, we would have:
   /// \code
   /// PartialMapping[] = {
-  /// /*32-bit add*/ {0, 32, GPR},
+  /// /*32-bit add lower */ {0, 32, GPR},
+  /// /*32-bit add upper */ {32, 32, GPR},
   /// /*<2x32-bit> vadd {0, 64, VPR}
   /// }; // No more duplication.
   ///
   /// BreakDowns[] = {
   /// /*AddBreakDown*/ &PartialMapping[0],
-  /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[0],
-  /// /*VAddBreakDown*/ &PartialMapping[1]
+  /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1],
+  /// /*VAddBreakDown*/ &PartialMapping[2]
   /// }; // Addresses of PartialMapping duplicated (smaller).
   ///
   /// ValueMapping[] {
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 51e3a2732972..82b791d35b2b 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -108,5 +108,8 @@ APFloat getAPFloatFromSize(double Val, unsigned Size);
 /// fallback.
 void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU);
 
+Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
+                                  const unsigned Op2,
+                                  const MachineRegisterInfo &MRI);
 } // End namespace llvm.
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 80bd796d5374..9c918ae1104f 100644
--- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -70,7 +70,7 @@ namespace ISD {
     /// of the frame or return address to return.  An index of zero corresponds
     /// to the current function's frame or return address, an index of one to
     /// the parent's frame or return address, and so on.
-    FRAMEADDR, RETURNADDR, ADDROFRETURNADDR,
+    FRAMEADDR, RETURNADDR, ADDROFRETURNADDR, SPONENTRY,
 
     /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
     /// Materializes the offset from the local object pointer of another
@@ -256,6 +256,29 @@ namespace ISD {
     /// Same for multiplication.
     SMULO, UMULO,
 
+    /// RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2
+    /// integers with the same bit width (W). If the true value of LHS + RHS
+    /// exceeds the largest value that can be represented by W bits, the
+    /// resulting value is this maximum value. Otherwise, if this value is less
+    /// than the smallest value that can be represented by W bits, the
+    /// resulting value is this minimum value.
+    SADDSAT, UADDSAT,
+
+    /// RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2
+    /// integers with the same bit width (W). If the true value of LHS - RHS
+    /// exceeds the largest value that can be represented by W bits, the
+    /// resulting value is this maximum value. Otherwise, if this value is less
+    /// than the smallest value that can be represented by W bits, the
+    /// resulting value is this minimum value.
+    SSUBSAT, USUBSAT,
+
+    /// RESULT = SMULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
+    /// 2 integers with the same width and scale. SCALE represents the scale of
+    /// both operands as fixed point numbers. This SCALE parameter must be a
+    /// constant integer. A scale of zero is effectively performing
+    /// multiplication on 2 integers.
+    SMULFIX,
+
     /// Simple binary floating point operators.
     FADD, FSUB, FMUL, FDIV, FREM,
 
@@ -272,7 +295,8 @@ namespace ISD {
     /// They are used to limit optimizations while the DAG is being optimized.
     STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS,
     STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2,
-    STRICT_FRINT, STRICT_FNEARBYINT,
+    STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
+    STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
 
     /// FMA - Perform a * b + c with no intermediate rounding step.
     FMA,
@@ -377,9 +401,13 @@ namespace ISD {
     /// When the 1st operand is a vector, the shift amount must be in the same
     /// type. (TLI.getShiftAmountTy() will return the same type when the input
     /// type is a vector.)
-    /// For rotates, the shift amount is treated as an unsigned amount modulo
-    /// the element size of the first operand.
-    SHL, SRA, SRL, ROTL, ROTR,
+    /// For rotates and funnel shifts, the shift amount is treated as an unsigned
+    /// amount modulo the element size of the first operand.
+    ///
+    /// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
+    /// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+    /// fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+    SHL, SRA, SRL, ROTL, ROTR, FSHL, FSHR,
 
     /// Byte Swap and Counting operators.
     BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
@@ -461,31 +489,33 @@ namespace ISD {
     /// in-register any-extension of the low lanes of an integer vector. The
     /// result type must have fewer elements than the operand type, and those
     /// elements must be larger integer types such that the total size of the
-    /// operand type and the result type match. Each of the low operand
-    /// elements is any-extended into the corresponding, wider result
-    /// elements with the high bits becoming undef.
+    /// operand type is less than or equal to the size of the result type. Each
+    /// of the low operand elements is any-extended into the corresponding,
+    /// wider result elements with the high bits becoming undef.
+    /// NOTE: The type legalizer prefers to make the operand and result size
+    /// the same to allow expansion to shuffle vector during op legalization.
     ANY_EXTEND_VECTOR_INREG,
 
     /// SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an
     /// in-register sign-extension of the low lanes of an integer vector. The
     /// result type must have fewer elements than the operand type, and those
     /// elements must be larger integer types such that the total size of the
-    /// operand type and the result type match. Each of the low operand
-    /// elements is sign-extended into the corresponding, wider result
-    /// elements.
-    // FIXME: The SIGN_EXTEND_INREG node isn't specifically limited to
-    // scalars, but it also doesn't handle vectors well. Either it should be
-    // restricted to scalars or this node (and its handling) should be merged
-    // into it.
+    /// operand type is less than or equal to the size of the result type. Each
+    /// of the low operand elements is sign-extended into the corresponding,
+    /// wider result elements.
+    /// NOTE: The type legalizer prefers to make the operand and result size
+    /// the same to allow expansion to shuffle vector during op legalization.
     SIGN_EXTEND_VECTOR_INREG,
 
     /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an
     /// in-register zero-extension of the low lanes of an integer vector. The
     /// result type must have fewer elements than the operand type, and those
     /// elements must be larger integer types such that the total size of the
-    /// operand type and the result type match. Each of the low operand
-    /// elements is zero-extended into the corresponding, wider result
-    /// elements.
+    /// operand type is less than or equal to the size of the result type. Each
+    /// of the low operand elements is zero-extended into the corresponding,
+    /// wider result elements.
+    /// NOTE: The type legalizer prefers to make the operand and result size
+    /// the same to allow expansion to shuffle vector during op legalization.
     ZERO_EXTEND_VECTOR_INREG,
 
     /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
@@ -550,22 +580,29 @@ namespace ISD {
     /// is often a storage-only type but has native conversions.
     FP16_TO_FP, FP_TO_FP16,
 
-    /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
-    /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary
-    /// floating point operations. These are inspired by libm.
-    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    /// Perform various unary floating-point operations inspired by libm.
+    FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
     /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
     /// values.
-    /// In the case where a single input is NaN, the non-NaN input is returned.
+    //
+    /// In the case where a single input is a NaN (either signaling or quiet),
+    /// the non-NaN input is returned.
     ///
     /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
     FMINNUM, FMAXNUM,
-    /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that
-    /// when a single input is NaN, NaN is returned.
-    FMINNAN, FMAXNAN,
+
+    /// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
+    /// two values, following the IEEE-754 2008 definition. This differs from
+    /// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a
+    /// signaling NaN, returns a quiet NaN.
+    FMINNUM_IEEE, FMAXNUM_IEEE,
+
+    /// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
+    /// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008
+    /// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics.
+    FMINIMUM, FMAXIMUM,
 
     /// FSINCOS - Compute both fsin and fcos as a single operation.
     FSINCOS,
@@ -786,11 +823,20 @@ namespace ISD {
     // Masked load and store - consecutive vector load and store operations
     // with additional mask operand that prevents memory accesses to the
     // masked-off lanes.
+    //
+    // Val, OutChain = MLOAD(BasePtr, Mask, PassThru)
+    // OutChain = MSTORE(Value, BasePtr, Mask)
     MLOAD, MSTORE,
 
     // Masked gather and scatter - load and store operations for a vector of
     // random addresses with additional mask operand that prevents memory
     // accesses to the masked-off lanes.
+    //
+    // Val, OutChain = GATHER(InChain, PassThru, Mask, BasePtr, Index, Scale)
+    // OutChain = SCATTER(InChain, Value, Mask, BasePtr, Index, Scale)
+    //
+    // The Index operand can have more vector elements than the other operands
+    // due to type legalization. The extra elements are ignored.
     MGATHER, MSCATTER,
 
     /// This corresponds to the llvm.lifetime.* intrinsics. The first operand
diff --git a/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index c3046da90b8d..38fcb37b1e69 100644
--- a/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/contrib/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
 #define LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
 
-#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
 #include <cstdlib>
 
 namespace {
diff --git a/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index fee131e4a3c6..18c13ca8f598 100644
--- a/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
 #define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
 
-#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Target/TargetMachine.h"
@@ -36,11 +36,7 @@ namespace {
       (void) llvm::createGreedyRegisterAllocator();
       (void) llvm::createDefaultPBQPRegisterAllocator();
 
-      llvm::linkCoreCLRGC();
-      llvm::linkOcamlGC();
-      llvm::linkErlangGC();
-      llvm::linkShadowStackGC();
-      llvm::linkStatepointExampleGC();
+      llvm::linkAllBuiltinGCs();
 
       (void) llvm::createBURRListDAGScheduler(nullptr,
                                               llvm::CodeGenOpt::Default);
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h
index 291a07a712cb..16ab1dc475c4 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -198,10 +198,10 @@ class VirtRegMap;
     void pruneValue(LiveRange &LR, SlotIndex Kill,
                     SmallVectorImpl<SlotIndex> *EndPoints);
 
-    /// This function should not be used. Its intend is to tell you that
-    /// you are doing something wrong if you call pruveValue directly on a
+    /// This function should not be used. Its intent is to tell you that you are
+    /// doing something wrong if you call pruneValue directly on a
     /// LiveInterval. Indeed, you are supposed to call pruneValue on the main
-    /// LiveRange and all the LiveRange of the subranges if any.
+    /// LiveRange and all the LiveRanges of the subranges if any.
     LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex,
                                           SmallVectorImpl<SlotIndex> *) {
       llvm_unreachable(
diff --git a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 301a45066b4c..7312902e21b7 100644
--- a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -48,7 +48,8 @@ class raw_ostream;
 /// when walking backward/forward through a basic block.
 class LivePhysRegs {
   const TargetRegisterInfo *TRI = nullptr;
-  SparseSet<unsigned> LiveRegs;
+  using RegisterSet = SparseSet<MCPhysReg, identity<MCPhysReg>>;
+  RegisterSet LiveRegs;
 
 public:
   /// Constructs an unitialized set. init() needs to be called to initialize it.
@@ -76,7 +77,7 @@ public:
   bool empty() const { return LiveRegs.empty(); }
 
   /// Adds a physical register and all its sub-registers to the set.
-  void addReg(unsigned Reg) {
+  void addReg(MCPhysReg Reg) {
     assert(TRI && "LivePhysRegs is not initialized.");
     assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
     for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -86,7 +87,7 @@ public:
 
   /// Removes a physical register, all its sub-registers, and all its
   /// super-registers from the set.
-  void removeReg(unsigned Reg) {
+  void removeReg(MCPhysReg Reg) {
     assert(TRI && "LivePhysRegs is not initialized.");
     assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
     for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R)
@@ -95,7 +96,7 @@ public:
 
   /// Removes physical registers clobbered by the regmask operand \p MO.
   void removeRegsInMask(const MachineOperand &MO,
-        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers =
+        SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers =
         nullptr);
 
   /// Returns true if register \p Reg is contained in the set. This also
@@ -103,10 +104,10 @@ public:
   /// addReg() always adds all sub-registers to the set as well.
   /// Note: Returns false if just some sub registers are live, use available()
   /// when searching a free register.
-  bool contains(unsigned Reg) const { return LiveRegs.count(Reg); }
+  bool contains(MCPhysReg Reg) const { return LiveRegs.count(Reg); }
 
   /// Returns true if register \p Reg and no aliasing register is in the set.
-  bool available(const MachineRegisterInfo &MRI, unsigned Reg) const;
+  bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const;
 
   /// Remove defined registers and regmask kills from the set.
   void removeDefs(const MachineInstr &MI);
@@ -126,7 +127,7 @@ public:
   /// defined or clobbered by a regmask.  The operand will identify whether this
   /// is a regmask or register operand.
   void stepForward(const MachineInstr &MI,
-        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers);
+        SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers);
 
   /// Adds all live-in registers of basic block \p MBB.
   /// Live in registers are the registers in the blocks live-in list and the
@@ -143,7 +144,7 @@ public:
   /// registers.
   void addLiveOutsNoPristines(const MachineBasicBlock &MBB);
 
-  using const_iterator = SparseSet<unsigned>::const_iterator;
+  using const_iterator = RegisterSet::const_iterator;
 
   const_iterator begin() const { return LiveRegs.begin(); }
   const_iterator end() const { return LiveRegs.end(); }
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h b/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h
index 249545906e01..5e9dd8b3cdf6 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveRegUnits.h
@@ -85,14 +85,14 @@ public:
   bool empty() const { return Units.none(); }
 
   /// Adds register units covered by physical register \p Reg.
-  void addReg(unsigned Reg) {
+  void addReg(MCPhysReg Reg) {
     for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit)
       Units.set(*Unit);
   }
 
   /// Adds register units covered by physical register \p Reg that are
   /// part of the lanemask \p Mask.
-  void addRegMasked(unsigned Reg, LaneBitmask Mask) {
+  void addRegMasked(MCPhysReg Reg, LaneBitmask Mask) {
     for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
       LaneBitmask UnitMask = (*Unit).second;
       if (UnitMask.none() || (UnitMask & Mask).any())
@@ -101,7 +101,7 @@ public:
   }
 
   /// Removes all register units covered by physical register \p Reg.
-  void removeReg(unsigned Reg) {
+  void removeReg(MCPhysReg Reg) {
     for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit)
       Units.reset(*Unit);
   }
@@ -115,7 +115,7 @@ public:
   void addRegsInMask(const uint32_t *RegMask);
 
   /// Returns true if no part of physical register \p Reg is live.
-  bool available(unsigned Reg) const {
+  bool available(MCPhysReg Reg) const {
     for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
       if (Units.test(*Unit))
         return false;
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 7f46406c4789..98ac81915dc0 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -425,6 +425,7 @@ struct MachineFrameInfo {
   StringValue StackProtector;
   // TODO: Serialize FunctionContextIdx
   unsigned MaxCallFrameSize = ~0u; ///< ~0u means: not computed yet.
+  unsigned CVBytesOfCalleeSavedRegisters = 0;
   bool HasOpaqueSPAdjustment = false;
   bool HasVAStart = false;
   bool HasMustTailInVarArgFunc = false;
@@ -443,6 +444,8 @@ struct MachineFrameInfo {
            AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls &&
            StackProtector == Other.StackProtector &&
            MaxCallFrameSize == Other.MaxCallFrameSize &&
+           CVBytesOfCalleeSavedRegisters ==
+               Other.CVBytesOfCalleeSavedRegisters &&
            HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment &&
            HasVAStart == Other.HasVAStart &&
            HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc &&
@@ -465,6 +468,8 @@ template <> struct MappingTraits<MachineFrameInfo> {
     YamlIO.mapOptional("stackProtector", MFI.StackProtector,
                        StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0);
+    YamlIO.mapOptional("cvBytesOfCalleeSavedRegisters",
+                       MFI.CVBytesOfCalleeSavedRegisters, 0U);
     YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment,
                        false);
     YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false);
@@ -489,6 +494,7 @@ struct MachineFunction {
   bool FailedISel = false;
   // Register information
   bool TracksRegLiveness = false;
+  bool HasWinCFI = false;
   std::vector<VirtualRegisterDefinition> VirtualRegisters;
   std::vector<MachineFunctionLiveIn> LiveIns;
   Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
@@ -512,6 +518,7 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapOptional("selected", MF.Selected, false);
     YamlIO.mapOptional("failedISel", MF.FailedISel, false);
     YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
+    YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false);
     YamlIO.mapOptional("registers", MF.VirtualRegisters,
                        std::vector<VirtualRegisterDefinition>());
     YamlIO.mapOptional("liveins", MF.LiveIns,
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index ace33efd8713..ec2f270fcb3f 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -569,6 +569,12 @@ public:
     return !empty() && back().isReturn();
   }
 
+  /// Convenience function that returns true if the bock ends in a EH scope
+  /// return instruction.
+  bool isEHScopeReturnBlock() const {
+    return !empty() && back().isEHScopeReturn();
+  }
+
   /// Split the critical edge from this block to the given successor block, and
   /// return the newly created block, or null if splitting is not possible.
   ///
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 2d6081f3577d..c2706a21a177 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -28,9 +28,14 @@ class AllocaInst;
 
 /// The CalleeSavedInfo class tracks the information need to locate where a
 /// callee saved register is in the current frame.
+/// Callee saved reg can also be saved to a different register rather than
+/// on the stack by setting DstReg instead of FrameIdx.
 class CalleeSavedInfo {
   unsigned Reg;
-  int FrameIdx;
+  union {
+    int FrameIdx;
+    unsigned DstReg;
+  };
   /// Flag indicating whether the register is actually restored in the epilog.
   /// In most cases, if a register is saved, it is also restored. There are
   /// some situations, though, when this is not the case. For example, the
@@ -44,17 +49,29 @@ class CalleeSavedInfo {
   /// by implicit uses on the return instructions, however, the required
   /// changes in the ARM backend would be quite extensive.
   bool Restored;
+  /// Flag indicating whether the register is spilled to stack or another
+  /// register.
+  bool SpilledToReg;
 
 public:
   explicit CalleeSavedInfo(unsigned R, int FI = 0)
-  : Reg(R), FrameIdx(FI), Restored(true) {}
+  : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
 
   // Accessors.
   unsigned getReg()                        const { return Reg; }
   int getFrameIdx()                        const { return FrameIdx; }
-  void setFrameIdx(int FI)                       { FrameIdx = FI; }
+  unsigned getDstReg()                     const { return DstReg; }
+  void setFrameIdx(int FI) {
+    FrameIdx = FI;
+    SpilledToReg = false;
+  }
+  void setDstReg(unsigned SpillReg) {
+    DstReg = SpillReg;
+    SpilledToReg = true;
+  }
   bool isRestored()                        const { return Restored; }
   void setRestored(bool R)                       { Restored = R; }
+  bool isSpilledToReg()                    const { return SpilledToReg; }
 };
 
 /// The MachineFrameInfo class represents an abstract stack frame until
@@ -266,10 +283,14 @@ private:
   /// It is only valid during and after prolog/epilog code insertion.
   unsigned MaxCallFrameSize = ~0u;
 
+  /// The number of bytes of callee saved registers that the target wants to
+  /// report for the current function in the CodeView S_FRAMEPROC record.
+  unsigned CVBytesOfCalleeSavedRegisters = 0;
+
   /// The prolog/epilog code inserter fills in this vector with each
-  /// callee saved register saved in the frame.  Beyond its use by the prolog/
-  /// epilog code inserter, this data used for debug info and exception
-  /// handling.
+  /// callee saved register saved in either the frame or a different
+  /// register.  Beyond its use by the prolog/ epilog code inserter,
+  /// this data is used for debug info and exception handling.
   std::vector<CalleeSavedInfo> CSInfo;
 
   /// Has CSInfo been set yet?
@@ -603,6 +624,15 @@ public:
   }
   void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
 
+  /// Returns how many bytes of callee-saved registers the target pushed in the
+  /// prologue. Only used for debug info.
+  unsigned getCVBytesOfCalleeSavedRegisters() const {
+    return CVBytesOfCalleeSavedRegisters;
+  }
+  void setCVBytesOfCalleeSavedRegisters(unsigned S) {
+    CVBytesOfCalleeSavedRegisters = S;
+  }
+
   /// Create a new object at a fixed location on the stack.
   /// All fixed objects should be created before other objects are created for
   /// efficiency. By default, fixed objects are not pointed to by LLVM IR
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
index e8a4d529faac..25edf5bcce51 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -58,6 +58,7 @@ class DILocalVariable;
 class DILocation;
 class Function;
 class GlobalValue;
+class LLVMTargetMachine;
 class MachineConstantPool;
 class MachineFrameInfo;
 class MachineFunction;
@@ -70,7 +71,6 @@ class Pass;
 class PseudoSourceValueManager;
 class raw_ostream;
 class SlotIndexes;
-class TargetMachine;
 class TargetRegisterClass;
 class TargetSubtargetInfo;
 struct WasmEHFuncInfo;
@@ -225,7 +225,7 @@ struct LandingPadInfo {
 
 class MachineFunction {
   const Function &F;
-  const TargetMachine &Target;
+  const LLVMTargetMachine &Target;
   const TargetSubtargetInfo *STI;
   MCContext &Ctx;
   MachineModuleInfo &MMI;
@@ -294,7 +294,7 @@ class MachineFunction {
   bool HasInlineAsm = false;
 
   /// True if any WinCFI instruction have been emitted in this function.
-  Optional<bool> HasWinCFI;
+  bool HasWinCFI = false;
 
   /// Current high-level properties of the IR of the function (e.g. is in SSA
   /// form or whether registers have been allocated)
@@ -316,6 +316,9 @@ class MachineFunction {
   /// Map a landing pad's EH symbol to the call site indexes.
   DenseMap<MCSymbol*, SmallVector<unsigned, 4>> LPadToCallSiteMap;
 
+  /// Map a landing pad to its index.
+  DenseMap<const MachineBasicBlock *, unsigned> WasmLPadToIndexMap;
+
   /// Map of invoke call site index values to associated begin EH_LABEL.
   DenseMap<MCSymbol*, unsigned> CallSiteMap;
 
@@ -363,10 +366,31 @@ public:
                     int Slot, const DILocation *Loc)
         : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
   };
+
+  class Delegate {
+    virtual void anchor();
+
+  public:
+    virtual ~Delegate() = default;
+    /// Callback after an insertion. This should not modify the MI directly.
+    virtual void MF_HandleInsertion(MachineInstr &MI) = 0;
+    /// Callback before a removal. This should not modify the MI directly.
+    virtual void MF_HandleRemoval(MachineInstr &MI) = 0;
+  };
+
+private:
+  Delegate *TheDelegate = nullptr;
+
+  // Callbacks for insertion and removal.
+  void handleInsertion(MachineInstr &MI);
+  void handleRemoval(MachineInstr &MI);
+  friend struct ilist_traits<MachineInstr>;
+
+public:
   using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
   VariableDbgInfoMapTy VariableDbgInfos;
 
-  MachineFunction(const Function &F, const TargetMachine &Target,
+  MachineFunction(const Function &F, const LLVMTargetMachine &Target,
                   const TargetSubtargetInfo &STI, unsigned FunctionNum,
                   MachineModuleInfo &MMI);
   MachineFunction(const MachineFunction &) = delete;
@@ -379,6 +403,23 @@ public:
     init();
   }
 
+  /// Reset the currently registered delegate - otherwise assert.
+  void resetDelegate(Delegate *delegate) {
+    assert(TheDelegate == delegate &&
+           "Only the current delegate can perform reset!");
+    TheDelegate = nullptr;
+  }
+
+  /// Set the delegate. resetDelegate must be called before attempting
+  /// to set.
+  void setDelegate(Delegate *delegate) {
+    assert(delegate && !TheDelegate &&
+           "Attempted to set delegate to null, or to change it without "
+           "first resetting it!");
+
+    TheDelegate = delegate;
+  }
+
   MachineModuleInfo &getMMI() const { return MMI; }
   MCContext &getContext() const { return Ctx; }
 
@@ -397,7 +438,7 @@ public:
   unsigned getFunctionNumber() const { return FunctionNumber; }
 
   /// getTarget - Return the target machine this machine code is compiled with
-  const TargetMachine &getTarget() const { return Target; }
+  const LLVMTargetMachine &getTarget() const { return Target; }
 
   /// getSubtarget - Return the subtarget for which this machine code is being
   /// compiled.
@@ -484,8 +525,7 @@ public:
   }
 
   bool hasWinCFI() const {
-    assert(HasWinCFI.hasValue() && "HasWinCFI not set yet!");
-    return *HasWinCFI;
+    return HasWinCFI;
   }
   void setHasWinCFI(bool v) { HasWinCFI = v; }
 
@@ -619,6 +659,14 @@ public:
     BasicBlocks.sort(comp);
   }
 
+  /// Return the number of \p MachineInstrs in this \p MachineFunction.
+  unsigned getInstructionCount() const {
+    unsigned InstrCount = 0;
+    for (const MachineBasicBlock &MBB : BasicBlocks)
+      InstrCount += MBB.size();
+    return InstrCount;
+  }
+
   //===--------------------------------------------------------------------===//
   // Internal functions used to automatically number MachineBasicBlocks
 
@@ -711,23 +759,14 @@ public:
   /// Allocate and initialize a register mask with @p NumRegister bits.
   uint32_t *allocateRegMask();
 
-  /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
-  /// pointers.  This array is owned by the MachineFunction.
-  MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);
-
-  /// extractLoadMemRefs - Allocate an array and populate it with just the
-  /// load information from the given MachineMemOperand sequence.
-  std::pair<MachineInstr::mmo_iterator,
-            MachineInstr::mmo_iterator>
-    extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
-                       MachineInstr::mmo_iterator End);
-
-  /// extractStoreMemRefs - Allocate an array and populate it with just the
-  /// store information from the given MachineMemOperand sequence.
-  std::pair<MachineInstr::mmo_iterator,
-            MachineInstr::mmo_iterator>
-    extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
-                        MachineInstr::mmo_iterator End);
+  /// Allocate and construct an extra info structure for a `MachineInstr`.
+  ///
+  /// This is allocated on the function's allocator and so lives the life of
+  /// the function.
+  MachineInstr::ExtraInfo *
+  createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
+                    MCSymbol *PreInstrSymbol = nullptr,
+                    MCSymbol *PostInstrSymbol = nullptr);
 
   /// Allocate a string and populate it with the given external symbol name.
   const char *createExternalSymbolName(StringRef Name);
@@ -776,7 +815,8 @@ public:
   LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
 
   /// Remap landing pad labels and remove any deleted landing pads.
-  void tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = nullptr);
+  void tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap = nullptr,
+                       bool TidyIfNoBeginLabels = true);
 
   /// Return a reference to the landing pad info for the current function.
   const std::vector<LandingPadInfo> &getLandingPads() const {
@@ -788,7 +828,9 @@ public:
   void addInvoke(MachineBasicBlock *LandingPad,
                  MCSymbol *BeginLabel, MCSymbol *EndLabel);
 
-  /// Add a new panding pad.  Returns the label ID for the landing pad entry.
+  /// Add a new panding pad, and extract the exception handling information from
+  /// the landingpad instruction. Returns the label ID for the landing pad
+  /// entry.
   MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
 
   /// Provide the catch typeinfo for a landing pad.
@@ -817,6 +859,22 @@ public:
   /// Map the landing pad's EH symbol to the call site indexes.
   void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);
 
+  /// Map the landing pad to its index. Used for Wasm exception handling.
+  void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) {
+    WasmLPadToIndexMap[LPad] = Index;
+  }
+
+  /// Returns true if the landing pad has an associate index in wasm EH.
+  bool hasWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
+    return WasmLPadToIndexMap.count(LPad);
+  }
+
+  /// Get the index in wasm EH for a given landing pad.
+  unsigned getWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
+    assert(hasWasmLandingPadIndex(LPad));
+    return WasmLPadToIndexMap.lookup(LPad);
+  }
+
   /// Get the call site indexes for a landing pad EH symbol.
   SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
     assert(hasCallSiteLandingPad(Sym) &&
@@ -880,15 +938,6 @@ public:
   }
 };
 
-/// \name Exception Handling
-/// \{
-
-/// Extract the exception handling information from the landingpad instruction
-/// and add them to the specified machine module info.
-void addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB);
-
-/// \}
-
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for function basic block graphs (CFGs)
 //===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
index 88e13cdf4138..ea1a2a536fc7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -17,16 +17,20 @@
 #define LLVM_CODEGEN_MACHINEINSTR_H
 
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/PointerSumType.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/TrailingObjects.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -61,7 +65,7 @@ class MachineInstr
     : public ilist_node_with_parent<MachineInstr, MachineBasicBlock,
                                     ilist_sentinel_tracking<true>> {
 public:
-  using mmo_iterator = MachineMemOperand **;
+  using mmo_iterator = ArrayRef<MachineMemOperand *>::iterator;
 
   /// Flags to specify different kinds of comments to output in
   /// assembly code.  These flags carry semantic information not
@@ -93,8 +97,14 @@ public:
                                         // contraction operations like fma.
     FmAfn        = 1 << 9,              // Instruction may map to Fast math
                                         // instrinsic approximation.
-    FmReassoc    = 1 << 10              // Instruction supports Fast math
+    FmReassoc    = 1 << 10,             // Instruction supports Fast math
                                         // reassociation of operand order.
+    NoUWrap      = 1 << 11,             // Instruction supports binary operator
+                                        // no unsigned wrap.
+    NoSWrap      = 1 << 12,             // Instruction supports binary operator
+                                        // no signed wrap.
+    IsExact      = 1 << 13              // Instruction supports division is
+                                        // known to be exact.
   };
 
 private:
@@ -118,14 +128,102 @@ private:
                                         // anything other than to convey comment
                                         // information to AsmPrinter.
 
-  uint8_t NumMemRefs = 0;               // Information on memory references.
-  // Note that MemRefs == nullptr,  means 'don't know', not 'no memory access'.
-  // Calling code must treat missing information conservatively.  If the number
-  // of memory operands required to be precise exceeds the maximum value of
-  // NumMemRefs - currently 256 - we remove the operands entirely. Note also
-  // that this is a non-owning reference to a shared copy on write buffer owned
-  // by the MachineFunction and created via MF.allocateMemRefsArray.
-  mmo_iterator MemRefs = nullptr;
+  /// Internal implementation detail class that provides out-of-line storage for
+  /// extra info used by the machine instruction when this info cannot be stored
+  /// in-line within the instruction itself.
+  ///
+  /// This has to be defined eagerly due to the implementation constraints of
+  /// `PointerSumType` where it is used.
+  class ExtraInfo final
+      : TrailingObjects<ExtraInfo, MachineMemOperand *, MCSymbol *> {
+  public:
+    static ExtraInfo *create(BumpPtrAllocator &Allocator,
+                             ArrayRef<MachineMemOperand *> MMOs,
+                             MCSymbol *PreInstrSymbol = nullptr,
+                             MCSymbol *PostInstrSymbol = nullptr) {
+      bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
+      bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
+      auto *Result = new (Allocator.Allocate(
+          totalSizeToAlloc<MachineMemOperand *, MCSymbol *>(
+              MMOs.size(), HasPreInstrSymbol + HasPostInstrSymbol),
+          alignof(ExtraInfo)))
+          ExtraInfo(MMOs.size(), HasPreInstrSymbol, HasPostInstrSymbol);
+
+      // Copy the actual data into the trailing objects.
+      std::copy(MMOs.begin(), MMOs.end(),
+                Result->getTrailingObjects<MachineMemOperand *>());
+
+      if (HasPreInstrSymbol)
+        Result->getTrailingObjects<MCSymbol *>()[0] = PreInstrSymbol;
+      if (HasPostInstrSymbol)
+        Result->getTrailingObjects<MCSymbol *>()[HasPreInstrSymbol] =
+            PostInstrSymbol;
+
+      return Result;
+    }
+
+    ArrayRef<MachineMemOperand *> getMMOs() const {
+      return makeArrayRef(getTrailingObjects<MachineMemOperand *>(), NumMMOs);
+    }
+
+    MCSymbol *getPreInstrSymbol() const {
+      return HasPreInstrSymbol ? getTrailingObjects<MCSymbol *>()[0] : nullptr;
+    }
+
+    MCSymbol *getPostInstrSymbol() const {
+      return HasPostInstrSymbol
+                 ? getTrailingObjects<MCSymbol *>()[HasPreInstrSymbol]
+                 : nullptr;
+    }
+
+  private:
+    friend TrailingObjects;
+
+    // Description of the extra info, used to interpret the actual optional
+    // data appended.
+    //
+    // Note that this is not terribly space optimized. This leaves a great deal
+    // of flexibility to fit more in here later.
+    const int NumMMOs;
+    const bool HasPreInstrSymbol;
+    const bool HasPostInstrSymbol;
+
+    // Implement the `TrailingObjects` internal API.
+    size_t numTrailingObjects(OverloadToken<MachineMemOperand *>) const {
+      return NumMMOs;
+    }
+    size_t numTrailingObjects(OverloadToken<MCSymbol *>) const {
+      return HasPreInstrSymbol + HasPostInstrSymbol;
+    }
+
+    // Just a boring constructor to allow us to initialize the sizes. Always use
+    // the `create` routine above.
+    ExtraInfo(int NumMMOs, bool HasPreInstrSymbol, bool HasPostInstrSymbol)
+        : NumMMOs(NumMMOs), HasPreInstrSymbol(HasPreInstrSymbol),
+          HasPostInstrSymbol(HasPostInstrSymbol) {}
+  };
+
+  /// Enumeration of the kinds of inline extra info available. It is important
+  /// that the `MachineMemOperand` inline kind has a tag value of zero to make
+  /// it accessible as an `ArrayRef`.
+  enum ExtraInfoInlineKinds {
+    EIIK_MMO = 0,
+    EIIK_PreInstrSymbol,
+    EIIK_PostInstrSymbol,
+    EIIK_OutOfLine
+  };
+
+  // We store extra information about the instruction here. The common case is
+  // expected to be nothing or a single pointer (typically a MMO or a symbol).
+  // We work to optimize this common case by storing it inline here rather than
+  // requiring a separate allocation, but we fall back to an allocation when
+  // multiple pointers are needed.
+  PointerSumType<ExtraInfoInlineKinds,
+                 PointerSumTypeMember<EIIK_MMO, MachineMemOperand *>,
+                 PointerSumTypeMember<EIIK_PreInstrSymbol, MCSymbol *>,
+                 PointerSumTypeMember<EIIK_PostInstrSymbol, MCSymbol *>,
+                 PointerSumTypeMember<EIIK_OutOfLine, ExtraInfo *>>
+      Info;
 
   DebugLoc debugLoc;                    // Source line information.
 
@@ -310,7 +408,7 @@ public:
   /// Returns the opcode of this MachineInstr.
   unsigned getOpcode() const { return MCID->Opcode; }
 
-  /// Access to explicit operands of the instruction.
+  /// Retuns the total number of operands.
   unsigned getNumOperands() const { return NumOperands; }
 
   const MachineOperand& getOperand(unsigned i) const {
@@ -412,28 +510,70 @@ public:
     return I - operands_begin();
   }
 
-  /// Access to memory operands of the instruction
-  mmo_iterator memoperands_begin() const { return MemRefs; }
-  mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
+  /// Access to memory operands of the instruction. If there are none, that does
+  /// not imply anything about whether the function accesses memory. Instead,
+  /// the caller must behave conservatively.
+  ArrayRef<MachineMemOperand *> memoperands() const {
+    if (!Info)
+      return {};
+
+    if (Info.is<EIIK_MMO>())
+      return makeArrayRef(Info.getAddrOfZeroTagPointer(), 1);
+
+    if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+      return EI->getMMOs();
+
+    return {};
+  }
+
+  /// Access to memory operands of the instruction.
+  ///
+  /// If `memoperands_begin() == memoperands_end()`, that does not imply
+  /// anything about whether the function accesses memory. Instead, the caller
+  /// must behave conservatively.
+  mmo_iterator memoperands_begin() const { return memoperands().begin(); }
+
+  /// Access to memory operands of the instruction.
+  ///
+  /// If `memoperands_begin() == memoperands_end()`, that does not imply
+  /// anything about whether the function accesses memory. Instead, the caller
+  /// must behave conservatively.
+  mmo_iterator memoperands_end() const { return memoperands().end(); }
+
   /// Return true if we don't have any memory operands which described the
   /// memory access done by this instruction.  If this is true, calling code
   /// must be conservative.
-  bool memoperands_empty() const { return NumMemRefs == 0; }
-
-  iterator_range<mmo_iterator>  memoperands() {
-    return make_range(memoperands_begin(), memoperands_end());
-  }
-  iterator_range<mmo_iterator> memoperands() const {
-    return make_range(memoperands_begin(), memoperands_end());
-  }
+  bool memoperands_empty() const { return memoperands().empty(); }
 
   /// Return true if this instruction has exactly one MachineMemOperand.
-  bool hasOneMemOperand() const {
-    return NumMemRefs == 1;
-  }
+  bool hasOneMemOperand() const { return memoperands().size() == 1; }
 
   /// Return the number of memory operands.
-  unsigned getNumMemOperands() const { return NumMemRefs; }
+  unsigned getNumMemOperands() const { return memoperands().size(); }
+
+  /// Helper to extract a pre-instruction symbol if one has been added.
+  MCSymbol *getPreInstrSymbol() const {
+    if (!Info)
+      return nullptr;
+    if (MCSymbol *S = Info.get<EIIK_PreInstrSymbol>())
+      return S;
+    if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+      return EI->getPreInstrSymbol();
+
+    return nullptr;
+  }
+
+  /// Helper to extract a post-instruction symbol if one has been added.
+  MCSymbol *getPostInstrSymbol() const {
+    if (!Info)
+      return nullptr;
+    if (MCSymbol *S = Info.get<EIIK_PostInstrSymbol>())
+      return S;
+    if (ExtraInfo *EI = Info.get<EIIK_OutOfLine>())
+      return EI->getPostInstrSymbol();
+
+    return nullptr;
+  }
 
   /// API for querying MachineInstr properties. They are the same as MCInstrDesc
   /// queries but they are bundle aware.
@@ -450,6 +590,8 @@ public:
   /// The second argument indicates whether the query should look inside
   /// instruction bundles.
   bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const {
+    assert(MCFlag < 64 &&
+           "MCFlag out of range for bit mask in getFlags/hasPropertyInBundle.");
     // Inline the fast path for unbundled or bundle-internal instructions.
     if (Type == IgnoreBundle || !isBundled() || isBundledWithPred())
       return getDesc().getFlags() & (1ULL << MCFlag);
@@ -482,6 +624,12 @@ public:
     return hasProperty(MCID::Return, Type);
   }
 
+  /// Return true if this is an instruction that marks the end of an EH scope,
+  /// i.e., a catchpad or a cleanuppad instruction.
+  bool isEHScopeReturn(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::EHScopeReturn, Type);
+  }
+
   bool isCall(QueryType Type = AnyInBundle) const {
     return hasProperty(MCID::Call, Type);
   }
@@ -1323,47 +1471,63 @@ public:
   /// fewer operand than it started with.
   void RemoveOperand(unsigned OpNo);
 
+  /// Clear this MachineInstr's memory reference descriptor list.  This resets
+  /// the memrefs to their most conservative state.  This should be used only
+  /// as a last resort since it greatly pessimizes our knowledge of the memory
+  /// access performed by the instruction.
+  void dropMemRefs(MachineFunction &MF);
+
+  /// Assign this MachineInstr's memory reference descriptor list.
+  ///
+  /// Unlike other methods, this *will* allocate them into a new array
+  /// associated with the provided `MachineFunction`.
+  void setMemRefs(MachineFunction &MF, ArrayRef<MachineMemOperand *> MemRefs);
+
   /// Add a MachineMemOperand to the machine instruction.
   /// This function should be used only occasionally. The setMemRefs function
   /// is the primary method for setting up a MachineInstr's MemRefs list.
   void addMemOperand(MachineFunction &MF, MachineMemOperand *MO);
 
-  /// Assign this MachineInstr's memory reference descriptor list.
-  /// This does not transfer ownership.
-  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
-    setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs));
-  }
+  /// Clone another MachineInstr's memory reference descriptor list and replace
+  /// ours with it.
+  ///
+  /// Note that `*this` may be the incoming MI!
+  ///
+  /// Prefer this API whenever possible as it can avoid allocations in common
+  /// cases.
+  void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI);
 
-  /// Assign this MachineInstr's memory reference descriptor list.  First
-  /// element in the pair is the begin iterator/pointer to the array; the
-  /// second is the number of MemoryOperands.  This does not transfer ownership
-  /// of the underlying memory.
-  void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) {
-    MemRefs = NewMemRefs.first;
-    NumMemRefs = uint8_t(NewMemRefs.second);
-    assert(NumMemRefs == NewMemRefs.second &&
-           "Too many memrefs - must drop memory operands");
-  }
+  /// Clone the merge of multiple MachineInstrs' memory reference descriptors
+  /// list and replace ours with it.
+  ///
+  /// Note that `*this` may be one of the incoming MIs!
+  ///
+  /// Prefer this API whenever possible as it can avoid allocations in common
+  /// cases.
+  void cloneMergedMemRefs(MachineFunction &MF,
+                          ArrayRef<const MachineInstr *> MIs);
 
-  /// Return a set of memrefs (begin iterator, size) which conservatively
-  /// describe the memory behavior of both MachineInstrs.  This is appropriate
-  /// for use when merging two MachineInstrs into one. This routine does not
-  /// modify the memrefs of the this MachineInstr.
-  std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
+  /// Set a symbol that will be emitted just prior to the instruction itself.
+  ///
+  /// Setting this to a null pointer will remove any such symbol.
+  ///
+  /// FIXME: This is not fully implemented yet.
+  void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol);
+
+  /// Set a symbol that will be emitted just after the instruction itself.
+  ///
+  /// Setting this to a null pointer will remove any such symbol.
+  ///
+  /// FIXME: This is not fully implemented yet.
+  void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol);
 
   /// Return the MIFlags which represent both MachineInstrs. This
   /// should be used when merging two MachineInstrs into one. This routine does
   /// not modify the MIFlags of this MachineInstr.
   uint16_t mergeFlagsWith(const MachineInstr& Other) const;
 
-  /// Clear this MachineInstr's memory reference descriptor list.  This resets
-  /// the memrefs to their most conservative state.  This should be used only
-  /// as a last resort since it greatly pessimizes our knowledge of the memory
-  /// access performed by the instruction.
-  void dropMemRefs() {
-    MemRefs = nullptr;
-    NumMemRefs = 0;
-  }
+  /// Copy all flags to MachineInst MIFlags
+  void copyIRFlags(const Instruction &I);
 
   /// Break any tie involving OpIdx.
   void untieRegOperand(unsigned OpIdx) {
@@ -1377,6 +1541,13 @@ public:
   /// Add all implicit def and use operands to this instruction.
   void addImplicitDefUseOperands(MachineFunction &MF);
 
+  /// Scan instructions following MI and collect any matching DBG_VALUEs.
+  void collectDebugValues(SmallVectorImpl<MachineInstr *> &DbgValues);
+
+  /// Find all DBG_VALUEs immediately following this instruction that point
+  /// to a register def in this instruction and point them to \p Reg instead.
+  void changeDebugValuesDefReg(unsigned Reg);
+
 private:
   /// If this instruction is embedded into a MachineFunction, return the
   /// MachineRegisterInfo object for the current function, otherwise
@@ -1394,7 +1565,7 @@ private:
   void AddRegOperandsToUseLists(MachineRegisterInfo&);
 
   /// Slow path for hasProperty when we're dealing with a bundle.
-  bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
+  bool hasPropertyInBundle(uint64_t Mask, QueryType Type) const;
 
   /// Implements the logic of getRegClassConstraintEffectForVReg for the
   /// this MI and the given operand index \p OpIdx.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 665608755741..b5e523f655e7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -191,15 +191,20 @@ public:
     return *this;
   }
 
-  const MachineInstrBuilder &setMemRefs(MachineInstr::mmo_iterator b,
-                                        MachineInstr::mmo_iterator e) const {
-    MI->setMemRefs(b, e);
+  const MachineInstrBuilder &
+  setMemRefs(ArrayRef<MachineMemOperand *> MMOs) const {
+    MI->setMemRefs(*MF, MMOs);
     return *this;
   }
 
-  const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator,
-                                        unsigned> MemOperandsRef) const {
-    MI->setMemRefs(MemOperandsRef);
+  const MachineInstrBuilder &cloneMemRefs(const MachineInstr &OtherMI) const {
+    MI->cloneMemRefs(*MF, OtherMI);
+    return *this;
+  }
+
+  const MachineInstrBuilder &
+  cloneMergedMemRefs(ArrayRef<const MachineInstr *> OtherMIs) const {
+    MI->cloneMergedMemRefs(*MF, OtherMIs);
     return *this;
   }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 554e89019b76..4371420bc7a2 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -46,10 +46,10 @@ namespace llvm {
 class BasicBlock;
 class CallInst;
 class Function;
-class MachineFunction;
+class LLVMTargetMachine;
 class MMIAddrLabelMap;
+class MachineFunction;
 class Module;
-class TargetMachine;
 
 //===----------------------------------------------------------------------===//
 /// This class can be derived from and used by targets to hold private
@@ -76,7 +76,7 @@ protected:
 /// for specific use.
 ///
 class MachineModuleInfo : public ImmutablePass {
-  const TargetMachine &TM;
+  const LLVMTargetMachine &TM;
 
   /// This is the MCContext used for the entire code generator.
   MCContext Context;
@@ -145,7 +145,7 @@ class MachineModuleInfo : public ImmutablePass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  explicit MachineModuleInfo(const TargetMachine *TM = nullptr);
+  explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr);
   ~MachineModuleInfo() override;
 
   // Initialization and Finalization
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 6a87fa2fbf00..17df1fa792b7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -80,6 +80,28 @@ public:
   SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
 };
 
+/// MachineModuleInfoCOFF - This is a MachineModuleInfoImpl implementation
+/// for COFF targets.
+class MachineModuleInfoCOFF : public MachineModuleInfoImpl {
+  /// GVStubs - These stubs are used to materialize global addresses in PIC
+  /// mode.
+  DenseMap<MCSymbol *, StubValueTy> GVStubs;
+
+  virtual void anchor(); // Out of line virtual method.
+
+public:
+  MachineModuleInfoCOFF(const MachineModuleInfo &) {}
+
+  StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+    assert(Sym && "Key cannot be null");
+    return GVStubs[Sym];
+  }
+
+  /// Accessor methods to return the set of stubs in sorted order.
+
+  SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h b/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h
index 95bfc24b57ff..bfd1e994053a 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -61,9 +61,6 @@ public:
   /// \p OutlinedFunctions.
   unsigned FunctionIdx;
 
-  /// Set to false if the candidate overlapped with another candidate.
-  bool InCandidateList = true;
-
   /// Identifier denoting the instructions to emit to call an outlined function
   /// from this point. Defined by the target.
   unsigned CallConstructionID;
@@ -82,6 +79,12 @@ public:
   /// been used across the sequence.
   LiveRegUnits UsedInSequence;
 
+  /// Target-specific flags for this Candidate's MBB.
+  unsigned Flags = 0x0;
+
+  /// True if initLRU has been called on this Candidate.
+  bool LRUWasSet = false;
+
   /// Return the number of instructions in this Candidate.
   unsigned getLength() const { return Len; }
 
@@ -99,9 +102,7 @@ public:
   }
 
   /// Returns the call overhead of this candidate if it is in the list.
-  unsigned getCallOverhead() const {
-    return InCandidateList ? CallOverhead : 0;
-  }
+  unsigned getCallOverhead() const { return CallOverhead; }
 
   MachineBasicBlock::iterator &front() { return FirstInst; }
   MachineBasicBlock::iterator &back() { return LastInst; }
@@ -120,9 +121,9 @@ public:
   Candidate(unsigned StartIdx, unsigned Len,
             MachineBasicBlock::iterator &FirstInst,
             MachineBasicBlock::iterator &LastInst, MachineBasicBlock *MBB,
-            unsigned FunctionIdx)
+            unsigned FunctionIdx, unsigned Flags)
       : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst),
-        MBB(MBB), FunctionIdx(FunctionIdx) {}
+        MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {}
   Candidate() {}
 
   /// Used to ensure that \p Candidates are outlined in an order that
@@ -138,6 +139,10 @@ public:
   void initLRU(const TargetRegisterInfo &TRI) {
     assert(MBB->getParent()->getRegInfo().tracksLiveness() &&
            "Candidate's Machine Function must track liveness");
+    // Only initialize once.
+    if (LRUWasSet)
+      return;
+    LRUWasSet = true;
     LRU.init(TRI);
     LRU.addLiveOuts(*MBB);
 
@@ -158,24 +163,13 @@ public:
 /// class of candidate.
 struct OutlinedFunction {
 
-private:
-  /// The number of candidates for this \p OutlinedFunction.
-  unsigned OccurrenceCount = 0;
-
 public:
-  std::vector<std::shared_ptr<Candidate>> Candidates;
+  std::vector<Candidate> Candidates;
 
   /// The actual outlined function created.
   /// This is initialized after we go through and create the actual function.
   MachineFunction *MF = nullptr;
 
-  /// A number assigned to this function which appears at the end of its name.
-  unsigned Name;
-
-  /// The sequence of integers corresponding to the instructions in this
-  /// function.
-  std::vector<unsigned> Sequence;
-
   /// Represents the size of a sequence in bytes. (Some instructions vary
   /// widely in size, so just counting the instructions isn't very useful.)
   unsigned SequenceSize;
@@ -187,49 +181,41 @@ public:
   unsigned FrameConstructionID;
 
   /// Return the number of candidates for this \p OutlinedFunction.
-  unsigned getOccurrenceCount() { return OccurrenceCount; }
-
-  /// Decrement the occurrence count of this OutlinedFunction and return the
-  /// new count.
-  unsigned decrement() {
-    assert(OccurrenceCount > 0 && "Can't decrement an empty function!");
-    OccurrenceCount--;
-    return getOccurrenceCount();
-  }
+  unsigned getOccurrenceCount() const { return Candidates.size(); }
 
   /// Return the number of bytes it would take to outline this
   /// function.
-  unsigned getOutliningCost() {
+  unsigned getOutliningCost() const {
     unsigned CallOverhead = 0;
-    for (std::shared_ptr<Candidate> &C : Candidates)
-      CallOverhead += C->getCallOverhead();
+    for (const Candidate &C : Candidates)
+      CallOverhead += C.getCallOverhead();
     return CallOverhead + SequenceSize + FrameOverhead;
   }
 
   /// Return the size in bytes of the unoutlined sequences.
-  unsigned getNotOutlinedCost() { return OccurrenceCount * SequenceSize; }
+  unsigned getNotOutlinedCost() const {
+    return getOccurrenceCount() * SequenceSize;
+  }
 
   /// Return the number of instructions that would be saved by outlining
   /// this function.
-  unsigned getBenefit() {
+  unsigned getBenefit() const {
     unsigned NotOutlinedCost = getNotOutlinedCost();
     unsigned OutlinedCost = getOutliningCost();
     return (NotOutlinedCost < OutlinedCost) ? 0
                                             : NotOutlinedCost - OutlinedCost;
   }
 
-  OutlinedFunction(std::vector<Candidate> &Cands,
-                   unsigned SequenceSize, unsigned FrameOverhead,
-                   unsigned FrameConstructionID)
-      : SequenceSize(SequenceSize), FrameOverhead(FrameOverhead),
-        FrameConstructionID(FrameConstructionID) {
-    OccurrenceCount = Cands.size();
-    for (Candidate &C : Cands)
-      Candidates.push_back(std::make_shared<outliner::Candidate>(C));
-
-    unsigned B = getBenefit();
-    for (std::shared_ptr<Candidate> &C : Candidates)
-      C->Benefit = B;
+  /// Return the number of instructions in this sequence.
+  unsigned getNumInstrs() const { return Candidates[0].getLength(); }
+
+  OutlinedFunction(std::vector<Candidate> &Candidates, unsigned SequenceSize,
+                   unsigned FrameOverhead, unsigned FrameConstructionID)
+      : Candidates(Candidates), SequenceSize(SequenceSize),
+        FrameOverhead(FrameOverhead), FrameConstructionID(FrameConstructionID) {
+    const unsigned B = getBenefit();
+    for (Candidate &C : Candidates)
+      C.Benefit = B;
   }
 
   OutlinedFunction() {}
diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
index 3aba0bba7d1a..a031c92d914f 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
@@ -24,22 +24,20 @@
 
 namespace llvm {
 
-using MachinePassCtor = void *(*)();
-
 //===----------------------------------------------------------------------===//
 ///
 /// MachinePassRegistryListener - Listener to adds and removals of nodes in
 /// registration list.
 ///
 //===----------------------------------------------------------------------===//
-class MachinePassRegistryListener {
-  virtual void anchor();
+template <class PassCtorTy> class MachinePassRegistryListener {
+  virtual void anchor() {}
 
 public:
   MachinePassRegistryListener() = default;
   virtual ~MachinePassRegistryListener() = default;
 
-  virtual void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) = 0;
+  virtual void NotifyAdd(StringRef N, PassCtorTy C, StringRef D) = 0;
   virtual void NotifyRemove(StringRef N) = 0;
 };
 
@@ -48,15 +46,15 @@ public:
 /// MachinePassRegistryNode - Machine pass node stored in registration list.
 ///
 //===----------------------------------------------------------------------===//
-class MachinePassRegistryNode {
+template <typename PassCtorTy> class MachinePassRegistryNode {
 private:
   MachinePassRegistryNode *Next = nullptr; // Next function pass in list.
   StringRef Name;                       // Name of function pass.
   StringRef Description;                // Description string.
-  MachinePassCtor Ctor;                 // Function pass creator.
+  PassCtorTy Ctor;                      // Pass creator.
 
 public:
-  MachinePassRegistryNode(const char *N, const char *D, MachinePassCtor C)
+  MachinePassRegistryNode(const char *N, const char *D, PassCtorTy C)
       : Name(N), Description(D), Ctor(C) {}
 
   // Accessors
@@ -64,7 +62,7 @@ public:
   MachinePassRegistryNode **getNextAddress()    { return &Next; }
   StringRef getName()                   const { return Name; }
   StringRef getDescription()            const { return Description; }
-  MachinePassCtor getCtor()               const { return Ctor; }
+  PassCtorTy getCtor() const { return Ctor; }
   void setNext(MachinePassRegistryNode *N)      { Next = N; }
 };
 
@@ -73,11 +71,12 @@ public:
 /// MachinePassRegistry - Track the registration of machine passes.
 ///
 //===----------------------------------------------------------------------===//
-class MachinePassRegistry {
+template <typename PassCtorTy> class MachinePassRegistry {
 private:
-  MachinePassRegistryNode *List;        // List of registry nodes.
-  MachinePassCtor Default;              // Default function pass creator.
-  MachinePassRegistryListener *Listener; // Listener for list adds are removes.
+  MachinePassRegistryNode<PassCtorTy> *List; // List of registry nodes.
+  PassCtorTy Default;                        // Default function pass creator.
+  MachinePassRegistryListener<PassCtorTy>
+      *Listener; // Listener for list adds are removes.
 
 public:
   // NO CONSTRUCTOR - we don't want static constructor ordering to mess
@@ -85,19 +84,47 @@ public:
 
   // Accessors.
   //
-  MachinePassRegistryNode *getList()                    { return List; }
-  MachinePassCtor getDefault()                          { return Default; }
-  void setDefault(MachinePassCtor C)                    { Default = C; }
-  void setDefault(StringRef Name);
-  void setListener(MachinePassRegistryListener *L)      { Listener = L; }
+  MachinePassRegistryNode<PassCtorTy> *getList() { return List; }
+  PassCtorTy getDefault() { return Default; }
+  void setDefault(PassCtorTy C) { Default = C; }
+  /// setDefault - Set the default constructor by name.
+  void setDefault(StringRef Name) {
+    PassCtorTy Ctor = nullptr;
+    for (MachinePassRegistryNode<PassCtorTy> *R = getList(); R;
+         R = R->getNext()) {
+      if (R->getName() == Name) {
+        Ctor = R->getCtor();
+        break;
+      }
+    }
+    assert(Ctor && "Unregistered pass name");
+    setDefault(Ctor);
+  }
+  void setListener(MachinePassRegistryListener<PassCtorTy> *L) { Listener = L; }
 
   /// Add - Adds a function pass to the registration list.
   ///
-  void Add(MachinePassRegistryNode *Node);
+  void Add(MachinePassRegistryNode<PassCtorTy> *Node) {
+    Node->setNext(List);
+    List = Node;
+    if (Listener)
+      Listener->NotifyAdd(Node->getName(), Node->getCtor(),
+                          Node->getDescription());
+  }
 
   /// Remove - Removes a function pass from the registration list.
   ///
-  void Remove(MachinePassRegistryNode *Node);
+  void Remove(MachinePassRegistryNode<PassCtorTy> *Node) {
+    for (MachinePassRegistryNode<PassCtorTy> **I = &List; *I;
+         I = (*I)->getNextAddress()) {
+      if (*I == Node) {
+        if (Listener)
+          Listener->NotifyRemove(Node->getName());
+        *I = (*I)->getNext();
+        break;
+      }
+    }
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -105,9 +132,11 @@ public:
 /// RegisterPassParser class - Handle the addition of new machine passes.
 ///
 //===----------------------------------------------------------------------===//
-template<class RegistryClass>
-class RegisterPassParser : public MachinePassRegistryListener,
-                   public cl::parser<typename RegistryClass::FunctionPassCtor> {
+template <class RegistryClass>
+class RegisterPassParser
+    : public MachinePassRegistryListener<
+          typename RegistryClass::FunctionPassCtor>,
+      public cl::parser<typename RegistryClass::FunctionPassCtor> {
 public:
   RegisterPassParser(cl::Option &O)
       : cl::parser<typename RegistryClass::FunctionPassCtor>(O) {}
@@ -129,8 +158,9 @@ public:
   }
 
   // Implement the MachinePassRegistryListener callbacks.
-  void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) override {
-    this->addLiteralOption(N, (typename RegistryClass::FunctionPassCtor)C, D);
+  void NotifyAdd(StringRef N, typename RegistryClass::FunctionPassCtor C,
+                 StringRef D) override {
+    this->addLiteralOption(N, C, D);
   }
   void NotifyRemove(StringRef N) override {
     this->removeLiteralOption(N);
diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h b/contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h
new file mode 100644
index 000000000000..38cb33e90e63
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -0,0 +1,608 @@
+//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlap loop iterations and exploits ILP via a compiler transformation.
+//
+// Swing Modulo Scheduling is an implementation of software pipelining
+// that generates schedules that are near optimal in terms of initiation
+// interval, register requirements, and stage count. See the papers:
+//
+// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
+// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
+// Conference on Parallel Architectures and Compilation Techiniques.
+//
+// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
+// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
+// Transactions on Computers, Vol. 50, No. 3, 2001.
+//
+// "An Implementation of Swing Modulo Scheduling With Extensions for
+// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
+// Urbana-Champaign, 2005.
+//
+//
+// The SMS algorithm consists of three main steps after computing the minimal
+// initiation interval (MII).
+// 1) Analyze the dependence graph and compute information about each
+//    instruction in the graph.
+// 2) Order the nodes (instructions) by priority based upon the heuristics
+//    described in the algorithm.
+// 3) Attempt to schedule the nodes in the specified order using the MII.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
+#define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+namespace llvm {
+
+class NodeSet;
+class SMSchedule;
+
+extern cl::opt<bool> SwpEnableCopyToPhi;
+
+/// The main class in the implementation of the target independent
+/// software pipeliner pass.
+class MachinePipeliner : public MachineFunctionPass {
+public:
+  MachineFunction *MF = nullptr;
+  const MachineLoopInfo *MLI = nullptr;
+  const MachineDominatorTree *MDT = nullptr;
+  const InstrItineraryData *InstrItins;
+  const TargetInstrInfo *TII = nullptr;
+  RegisterClassInfo RegClassInfo;
+
+#ifndef NDEBUG
+  static int NumTries;
+#endif
+
+  /// Cache the target analysis information about the loop.
+  struct LoopInfo {
+    MachineBasicBlock *TBB = nullptr;
+    MachineBasicBlock *FBB = nullptr;
+    SmallVector<MachineOperand, 4> BrCond;
+    MachineInstr *LoopInductionVar = nullptr;
+    MachineInstr *LoopCompare = nullptr;
+  };
+  LoopInfo LI;
+
+  static char ID;
+
+  MachinePipeliner() : MachineFunctionPass(ID) {
+    initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<LiveIntervals>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  void preprocessPhiNodes(MachineBasicBlock &B);
+  bool canPipelineLoop(MachineLoop &L);
+  bool scheduleLoop(MachineLoop &L);
+  bool swingModuloScheduler(MachineLoop &L);
+};
+
+/// This class builds the dependence graph for the instructions in a loop,
+/// and attempts to schedule the instructions using the SMS algorithm.
+class SwingSchedulerDAG : public ScheduleDAGInstrs {
+  MachinePipeliner &Pass;
+  /// The minimum initiation interval between iterations for this schedule.
+  unsigned MII = 0;
+  /// Set to true if a valid pipelined schedule is found for the loop.
+  bool Scheduled = false;
+  MachineLoop &Loop;
+  LiveIntervals &LIS;
+  const RegisterClassInfo &RegClassInfo;
+
+  /// A toplogical ordering of the SUnits, which is needed for changing
+  /// dependences and iterating over the SUnits.
+  ScheduleDAGTopologicalSort Topo;
+
+  struct NodeInfo {
+    int ASAP = 0;
+    int ALAP = 0;
+    int ZeroLatencyDepth = 0;
+    int ZeroLatencyHeight = 0;
+
+    NodeInfo() = default;
+  };
+  /// Computed properties for each node in the graph.
+  std::vector<NodeInfo> ScheduleInfo;
+
+  enum OrderKind { BottomUp = 0, TopDown = 1 };
+  /// Computed node ordering for scheduling.
+  SetVector<SUnit *> NodeOrder;
+
+  using NodeSetType = SmallVector<NodeSet, 8>;
+  using ValueMapTy = DenseMap<unsigned, unsigned>;
+  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
+
+  /// Instructions to change when emitting the final schedule.
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
+
+  /// We may create a new instruction, so remember it because it
+  /// must be deleted when the pass is finished.
+  SmallPtrSet<MachineInstr *, 4> NewMIs;
+
+  /// Ordered list of DAG postprocessing steps.
+  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+  /// Helper class to implement Johnson's circuit finding algorithm.
+  class Circuits {
+    std::vector<SUnit> &SUnits;
+    SetVector<SUnit *> Stack;
+    BitVector Blocked;
+    SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
+    SmallVector<SmallVector<int, 4>, 16> AdjK;
+    // Node to Index from ScheduleDAGTopologicalSort
+    std::vector<int> *Node2Idx;
+    unsigned NumPaths;
+    static unsigned MaxPaths;
+
+  public:
+    Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
+        : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
+      Node2Idx = new std::vector<int>(SUs.size());
+      unsigned Idx = 0;
+      for (const auto &NodeNum : Topo)
+        Node2Idx->at(NodeNum) = Idx++;
+    }
+
+    ~Circuits() { delete Node2Idx; }
+
+    /// Reset the data structures used in the circuit algorithm.
+    void reset() {
+      Stack.clear();
+      Blocked.reset();
+      B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
+      NumPaths = 0;
+    }
+
+    void createAdjacencyStructure(SwingSchedulerDAG *DAG);
+    bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
+    void unblock(int U);
+  };
+
+  struct CopyToPhiMutation : public ScheduleDAGMutation {
+    void apply(ScheduleDAGInstrs *DAG) override;
+  };
+
+public:
+  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
+                    const RegisterClassInfo &rci)
+      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
+        RegClassInfo(rci), Topo(SUnits, &ExitSU) {
+    P.MF->getSubtarget().getSMSMutations(Mutations);
+    if (SwpEnableCopyToPhi)
+      Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
+  }
+
+  void schedule() override;
+  void finishBlock() override;
+
+  /// Return true if the loop kernel has been scheduled.
+  bool hasNewSchedule() { return Scheduled; }
+
+  /// Return the earliest time an instruction may be scheduled.
+  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
+
+  /// Return the latest time an instruction my be scheduled.
+  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
+
+  /// The mobility function, which the number of slots in which
+  /// an instruction may be scheduled.
+  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
+
+  /// The depth, in the dependence graph, for a node.
+  unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
+
+  /// The maximum unweighted length of a path from an arbitrary node to the
+  /// given node in which each edge has latency 0
+  int getZeroLatencyDepth(SUnit *Node) {
+    return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
+  }
+
+  /// The height, in the dependence graph, for a node.
+  unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
+
+  /// The maximum unweighted length of a path from the given node to an
+  /// arbitrary node in which each edge has latency 0
+  int getZeroLatencyHeight(SUnit *Node) {
+    return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
+  }
+
+  /// Return true if the dependence is a back-edge in the data dependence graph.
+  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
+  /// using an anti dependence from a Phi to an instruction.
+  bool isBackedge(SUnit *Source, const SDep &Dep) {
+    if (Dep.getKind() != SDep::Anti)
+      return false;
+    return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
+  }
+
+  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
+
+  /// The distance function, which indicates that operation V of iteration I
+  /// depends on operations U of iteration I-distance.
+  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
+    // Instructions that feed a Phi have a distance of 1. Computing larger
+    // values for arrays requires data dependence information.
+    if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
+      return 1;
+    return 0;
+  }
+
+  /// Set the Minimum Initiation Interval for this schedule attempt.
+  void setMII(unsigned mii) { MII = mii; }
+
+  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
+
+  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
+
+  /// Return the new base register that was stored away for the changed
+  /// instruction.
+  unsigned getInstrBaseReg(SUnit *SU) {
+    DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+        InstrChanges.find(SU);
+    if (It != InstrChanges.end())
+      return It->second.first;
+    return 0;
+  }
+
+  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+    Mutations.push_back(std::move(Mutation));
+  }
+
+  static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
+
+private:
+  void addLoopCarriedDependences(AliasAnalysis *AA);
+  void updatePhiDependences();
+  void changeDependences();
+  unsigned calculateResMII();
+  unsigned calculateRecMII(NodeSetType &RecNodeSets);
+  void findCircuits(NodeSetType &NodeSets);
+  void fuseRecs(NodeSetType &NodeSets);
+  void removeDuplicateNodes(NodeSetType &NodeSets);
+  void computeNodeFunctions(NodeSetType &NodeSets);
+  void registerPressureFilter(NodeSetType &NodeSets);
+  void colocateNodeSets(NodeSetType &NodeSets);
+  void checkNodeSets(NodeSetType &NodeSets);
+  void groupRemainingNodes(NodeSetType &NodeSets);
+  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+                         SetVector<SUnit *> &NodesAdded);
+  void computeNodeOrder(NodeSetType &NodeSets);
+  void checkValidNodeOrder(const NodeSetType &Circuits) const;
+  bool schedulePipeline(SMSchedule &Schedule);
+  void generatePipelinedLoop(SMSchedule &Schedule);
+  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
+                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+                      MBBVectorTy &PrologBBs);
+  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+                            MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+                            SMSchedule &Schedule, ValueMapTy *VRMap,
+                            InstrMapTy &InstrMap, unsigned LastStageNum,
+                            unsigned CurStageNum, bool IsLast);
+  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+                    MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+                    SMSchedule &Schedule, ValueMapTy *VRMap,
+                    InstrMapTy &InstrMap, unsigned LastStageNum,
+                    unsigned CurStageNum, bool IsLast);
+  void removeDeadInstructions(MachineBasicBlock *KernelBB,
+                              MBBVectorTy &EpilogBBs);
+  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+                      SMSchedule &Schedule);
+  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
+                   MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
+                   ValueMapTy *VRMap);
+  bool computeDelta(MachineInstr &MI, unsigned &Delta);
+  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+                         unsigned Num);
+  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+                           unsigned InstStageNum);
+  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+                                    unsigned InstStageNum,
+                                    SMSchedule &Schedule);
+  void updateInstruction(MachineInstr *NewMI, bool LastDef,
+                         unsigned CurStageNum, unsigned InstrStageNum,
+                         SMSchedule &Schedule, ValueMapTy *VRMap);
+  MachineInstr *findDefInLoop(unsigned Reg);
+  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+                         unsigned LoopStage, ValueMapTy *VRMap,
+                         MachineBasicBlock *BB);
+  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+                        SMSchedule &Schedule, ValueMapTy *VRMap,
+                        InstrMapTy &InstrMap);
+  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
+                             InstrMapTy &InstrMap, unsigned CurStageNum,
+                             unsigned PhiNum, MachineInstr *Phi,
+                             unsigned OldReg, unsigned NewReg,
+                             unsigned PrevReg = 0);
+  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
+                             unsigned &OffsetPos, unsigned &NewBase,
+                             int64_t &NewOffset);
+  void postprocessDAG();
+};
+
+/// A NodeSet contains a set of SUnit DAG nodes with additional information
+/// that assigns a priority to the set.
+class NodeSet {
+  SetVector<SUnit *> Nodes;
+  bool HasRecurrence = false;
+  unsigned RecMII = 0;
+  int MaxMOV = 0;
+  unsigned MaxDepth = 0;
+  unsigned Colocate = 0;
+  SUnit *ExceedPressure = nullptr;
+  unsigned Latency = 0;
+
+public:
+  using iterator = SetVector<SUnit *>::const_iterator;
+
+  NodeSet() = default;
+  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
+    Latency = 0;
+    for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
+      for (const SDep &Succ : Nodes[i]->Succs)
+        if (Nodes.count(Succ.getSUnit()))
+          Latency += Succ.getLatency();
+  }
+
+  bool insert(SUnit *SU) { return Nodes.insert(SU); }
+
+  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
+
+  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
+    return Nodes.remove_if(P);
+  }
+
+  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
+
+  bool hasRecurrence() { return HasRecurrence; };
+
+  unsigned size() const { return Nodes.size(); }
+
+  bool empty() const { return Nodes.empty(); }
+
+  SUnit *getNode(unsigned i) const { return Nodes[i]; };
+
+  void setRecMII(unsigned mii) { RecMII = mii; };
+
+  void setColocate(unsigned c) { Colocate = c; };
+
+  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
+
+  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
+
+  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
+
+  int getRecMII() { return RecMII; }
+
+  /// Summarize node functions for the entire node set.
+  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
+    for (SUnit *SU : *this) {
+      MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
+      MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
+    }
+  }
+
+  unsigned getLatency() { return Latency; }
+
+  unsigned getMaxDepth() { return MaxDepth; }
+
+  void clear() {
+    Nodes.clear();
+    RecMII = 0;
+    HasRecurrence = false;
+    MaxMOV = 0;
+    MaxDepth = 0;
+    Colocate = 0;
+    ExceedPressure = nullptr;
+  }
+
+  operator SetVector<SUnit *> &() { return Nodes; }
+
+  /// Sort the node sets by importance. First, rank them by recurrence MII,
+  /// then by mobility (least mobile done first), and finally by depth.
+  /// Each node set may contain a colocate value which is used as the first
+  /// tie breaker, if it's set.
+  bool operator>(const NodeSet &RHS) const {
+    if (RecMII == RHS.RecMII) {
+      if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
+        return Colocate < RHS.Colocate;
+      if (MaxMOV == RHS.MaxMOV)
+        return MaxDepth > RHS.MaxDepth;
+      return MaxMOV < RHS.MaxMOV;
+    }
+    return RecMII > RHS.RecMII;
+  }
+
+  bool operator==(const NodeSet &RHS) const {
+    return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
+           MaxDepth == RHS.MaxDepth;
+  }
+
+  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
+
+  iterator begin() { return Nodes.begin(); }
+  iterator end() { return Nodes.end(); }
+  void print(raw_ostream &os) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+/// This class represents the scheduled code.  The main data structure is a
+/// map from scheduled cycle to instructions.  During scheduling, the
+/// data structure explicitly represents all stages/iterations.   When
+/// the algorithm finshes, the schedule is collapsed into a single stage,
+/// which represents instructions from different loop iterations.
+///
+/// The SMS algorithm allows negative values for cycles, so the first cycle
+/// in the schedule is the smallest cycle value.
+class SMSchedule {
+private:
+  /// Map from execution cycle to instructions.
+  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
+
+  /// Map from instruction to execution cycle.
+  std::map<SUnit *, int> InstrToCycle;
+
+  /// Map for each register and the max difference between its uses and def.
+  /// The first element in the pair is the max difference in stages. The
+  /// second is true if the register defines a Phi value and loop value is
+  /// scheduled before the Phi.
+  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+  /// Keep track of the first cycle value in the schedule.  It starts
+  /// as zero, but the algorithm allows negative values.
+  int FirstCycle = 0;
+
+  /// Keep track of the last cycle value in the schedule.
+  int LastCycle = 0;
+
+  /// The initiation interval (II) for the schedule.
+  int InitiationInterval = 0;
+
+  /// Target machine information.
+  const TargetSubtargetInfo &ST;
+
+  /// Virtual register information.
+  MachineRegisterInfo &MRI;
+
+  std::unique_ptr<DFAPacketizer> Resources;
+
+public:
+  SMSchedule(MachineFunction *mf)
+      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
+        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
+
+  void reset() {
+    ScheduledInstrs.clear();
+    InstrToCycle.clear();
+    RegToStageDiff.clear();
+    FirstCycle = 0;
+    LastCycle = 0;
+    InitiationInterval = 0;
+  }
+
+  /// Set the initiation interval for this schedule.
+  void setInitiationInterval(int ii) { InitiationInterval = ii; }
+
+  /// Return the first cycle in the completed schedule.  This
+  /// can be a negative value.
+  int getFirstCycle() const { return FirstCycle; }
+
+  /// Return the last cycle in the finalized schedule.
+  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
+
+  /// Return the cycle of the earliest scheduled instruction in the dependence
+  /// chain.
+  int earliestCycleInChain(const SDep &Dep);
+
+  /// Return the cycle of the latest scheduled instruction in the dependence
+  /// chain.
+  int latestCycleInChain(const SDep &Dep);
+
+  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+                    int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
+  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+
+  /// Iterators for the cycle to instruction map.
+  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
+  using const_sched_iterator =
+      DenseMap<int, std::deque<SUnit *>>::const_iterator;
+
+  /// Return true if the instruction is scheduled at the specified stage.
+  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
+    return (stageScheduled(SU) == (int)StageNum);
+  }
+
+  /// Return the stage for a scheduled instruction.  Return -1 if
+  /// the instruction has not been scheduled.
+  int stageScheduled(SUnit *SU) const {
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+    if (it == InstrToCycle.end())
+      return -1;
+    return (it->second - FirstCycle) / InitiationInterval;
+  }
+
+  /// Return the cycle for a scheduled instruction. This function normalizes
+  /// the first cycle to be 0.
+  unsigned cycleScheduled(SUnit *SU) const {
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+    assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
+    return (it->second - FirstCycle) % InitiationInterval;
+  }
+
+  /// Return the maximum stage count needed for this schedule.
+  unsigned getMaxStageCount() {
+    return (LastCycle - FirstCycle) / InitiationInterval;
+  }
+
+  /// Return the max. number of stages/iterations that can occur between a
+  /// register definition and its uses.
+  unsigned getStagesForReg(int Reg, unsigned CurStage) {
+    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+    if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
+      return 1;
+    return Stages.first;
+  }
+
+  /// The number of stages for a Phi is a little different than other
+  /// instructions. The minimum value computed in RegToStageDiff is 1
+  /// because we assume the Phi is needed for at least 1 iteration.
+  /// This is not the case if the loop value is scheduled prior to the
+  /// Phi in the same stage.  This function returns the number of stages
+  /// or iterations needed between the Phi definition and any uses.
+  unsigned getStagesForPhi(int Reg) {
+    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+    if (Stages.second)
+      return Stages.first;
+    return Stages.first - 1;
+  }
+
+  /// Return the instructions that are scheduled at the specified cycle.
+  std::deque<SUnit *> &getInstructions(int cycle) {
+    return ScheduledInstrs[cycle];
+  }
+
+  bool isValidSchedule(SwingSchedulerDAG *SSD);
+  void finalizeSchedule(SwingSchedulerDAG *SSD);
+  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+                       std::deque<SUnit *> &Insts);
+  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
+  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
+                             MachineOperand &MO);
+  void print(raw_ostream &os) const;
+  void dump() const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MACHINEPIPELINER_H
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 5bf4a49c8b3b..fef010a23ef9 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -689,15 +689,14 @@ public:
                                                unsigned MinNumRegs = 0);
 
   /// Constrain the register class or the register bank of the virtual register
-  /// \p Reg to be a common subclass and a common bank of both registers
-  /// provided respectively. Do nothing if any of the attributes (classes,
-  /// banks, or low-level types) of the registers are deemed incompatible, or if
-  /// the resulting register will have a class smaller than before and of size
-  /// less than \p MinNumRegs. Return true if such register attributes exist,
-  /// false otherwise.
+  /// \p Reg (and low-level type) to be a common subclass or a common bank of
+  /// both registers provided respectively (and a common low-level type). Do
+  /// nothing if any of the attributes (classes, banks, or low-level types) of
+  /// the registers are deemed incompatible, or if the resulting register will
+  /// have a class smaller than before and of size less than \p MinNumRegs.
+  /// Return true if such register attributes exist, false otherwise.
   ///
-  /// \note Assumes that each register has either a low-level type or a class
-  /// assigned, but not both. Use this method instead of constrainRegClass and
+  /// \note Use this method instead of constrainRegClass and
   /// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG
   /// ISel / FastISel and GlobalISel's InstructionSelect pass respectively.
   bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg,
@@ -717,6 +716,10 @@ public:
   unsigned createVirtualRegister(const TargetRegisterClass *RegClass,
                                  StringRef Name = "");
 
+  /// Create and return a new virtual register in the function with the same
+  /// attributes as the given register.
+  unsigned cloneVirtualRegister(unsigned VReg, StringRef Name = "");
+
   /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
   /// (target independent) virtual register.
   LLT getType(unsigned Reg) const {
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
index 85ffa4eda2b8..4bc31ae7c61a 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -132,17 +132,19 @@ struct MachineSchedContext {
 
 /// MachineSchedRegistry provides a selection of available machine instruction
 /// schedulers.
-class MachineSchedRegistry : public MachinePassRegistryNode {
+class MachineSchedRegistry
+    : public MachinePassRegistryNode<
+          ScheduleDAGInstrs *(*)(MachineSchedContext *)> {
 public:
   using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
 
   // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
   using FunctionPassCtor = ScheduleDAGCtor;
 
-  static MachinePassRegistry Registry;
+  static MachinePassRegistry<ScheduleDAGCtor> Registry;
 
   MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
-    : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+      : MachinePassRegistryNode(N, D, C) {
     Registry.Add(this);
   }
 
@@ -158,7 +160,7 @@ public:
     return (MachineSchedRegistry *)Registry.getList();
   }
 
-  static void setListener(MachinePassRegistryListener *L) {
+  static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
     Registry.setListener(L);
   }
 };
@@ -466,6 +468,9 @@ public:
   PressureDiff &getPressureDiff(const SUnit *SU) {
     return SUPressureDiffs[SU->NodeNum];
   }
+  const PressureDiff &getPressureDiff(const SUnit *SU) const {
+    return SUPressureDiffs[SU->NodeNum];
+  }
 
   /// Compute a DFSResult after DAG building is complete, and before any
   /// queue comparisons.
@@ -491,6 +496,8 @@ public:
   /// Compute the cyclic critical path through the DAG.
   unsigned computeCyclicCriticalPath();
 
+  void dump() const override;
+
 protected:
   // Top-Level entry points for the schedule() driver...
 
@@ -787,7 +794,7 @@ public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason : uint8_t {
-    NoCand, Only1, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak,
+    NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak,
     RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
 
@@ -895,6 +902,10 @@ protected:
 #ifndef NDEBUG
   void traceCandidate(const SchedCandidate &Cand);
 #endif
+
+private:
+  bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone,
+                           bool ComputeRemLatency, unsigned &RemLatency) const;
 };
 
 // Utility functions used by heuristics in tryCandidate().
@@ -917,7 +928,7 @@ bool tryPressure(const PressureChange &TryP,
                  const TargetRegisterInfo *TRI,
                  const MachineFunction &MF);
 unsigned getWeakLeft(const SUnit *SU, bool isTop);
-int biasPhysRegCopy(const SUnit *SU, bool isTop);
+int biasPhysReg(const SUnit *SU, bool isTop);
 
 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
 /// the schedule.
@@ -995,7 +1006,7 @@ protected:
                          const RegPressureTracker &RPTracker,
                          SchedCandidate &Candidate);
 
-  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+  void reschedulePhysReg(SUnit *SU, bool isTop);
 };
 
 /// PostGenericScheduler - Interface to the scheduling algorithm used by
diff --git a/contrib/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm/include/llvm/CodeGen/Passes.h
index cb12b14f4435..acf1ebb5bc83 100644
--- a/contrib/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm/include/llvm/CodeGen/Passes.h
@@ -379,14 +379,20 @@ namespace llvm {
   ///
   FunctionPass *createInterleavedAccessPass();
 
+  /// InterleavedLoadCombines Pass - This pass identifies interleaved loads and
+  /// combines them into wide loads detectable by InterleavedAccessPass
+  ///
+  FunctionPass *createInterleavedLoadCombinePass();
+
   /// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all
   /// TLS variables for the emulated TLS model.
   ///
   ModulePass *createLowerEmuTLSPass();
 
-  /// This pass lowers the \@llvm.load.relative intrinsic to instructions.
-  /// This is unsafe to do earlier because a pass may combine the constant
-  /// initializer into the load, which may result in an overflowing evaluation.
+  /// This pass lowers the \@llvm.load.relative and \@llvm.objc.* intrinsics to
+  /// instructions.  This is unsafe to do earlier because a pass may combine the
+  /// constant initializer into the load, which may result in an overflowing
+  /// evaluation.
   ModulePass *createPreISelIntrinsicLoweringPass();
 
   /// GlobalMerge - This pass merges internal (by default) globals into structs
diff --git a/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h
index 7a007eb8bcea..b7f83e515b7e 100644
--- a/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass implements IR lowering for the llvm.load.relative intrinsic.
+// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
+// intrinsics.
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CODEGEN_PREISELINTRINSICLOWERING_H
diff --git a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index bdf0bb731540..f66191bc9fb4 100644
--- a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -36,7 +36,7 @@ raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
 /// below the stack frame (e.g., argument space), or constant pool.
 class PseudoSourceValue {
 public:
-  enum PSVKind {
+  enum PSVKind : unsigned {
     Stack,
     GOT,
     JumpTable,
@@ -48,7 +48,7 @@ public:
   };
 
 private:
-  PSVKind Kind;
+  unsigned Kind;
   unsigned AddressSpace;
   friend raw_ostream &llvm::operator<<(raw_ostream &OS,
                                        const PseudoSourceValue* PSV);
@@ -60,11 +60,11 @@ private:
   virtual void printCustom(raw_ostream &O) const;
 
 public:
-  explicit PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII);
+  explicit PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII);
 
   virtual ~PseudoSourceValue();
 
-  PSVKind kind() const { return Kind; }
+  unsigned kind() const { return Kind; }
 
   bool isStack() const { return Kind == Stack; }
   bool isGOT() const { return Kind == GOT; }
@@ -116,7 +116,7 @@ public:
 
 class CallEntryPseudoSourceValue : public PseudoSourceValue {
 protected:
-  CallEntryPseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII);
+  CallEntryPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII);
 
 public:
   bool isConstant(const MachineFrameInfo *) const override;
diff --git a/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h b/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
index 481747dc163e..b518fbb9c9da 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegAllocRegistry.h
@@ -26,14 +26,14 @@ class FunctionPass;
 /// RegisterRegAlloc class - Track the registration of register allocators.
 ///
 //===----------------------------------------------------------------------===//
-class RegisterRegAlloc : public MachinePassRegistryNode {
+class RegisterRegAlloc : public MachinePassRegistryNode<FunctionPass *(*)()> {
 public:
   using FunctionPassCtor = FunctionPass *(*)();
 
-  static MachinePassRegistry Registry;
+  static MachinePassRegistry<FunctionPassCtor> Registry;
 
   RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
-      : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+      : MachinePassRegistryNode(N, D, C) {
     Registry.Add(this);
   }
 
@@ -48,15 +48,11 @@ public:
     return (RegisterRegAlloc *)Registry.getList();
   }
 
-  static FunctionPassCtor getDefault() {
-    return (FunctionPassCtor)Registry.getDefault();
-  }
+  static FunctionPassCtor getDefault() { return Registry.getDefault(); }
 
-  static void setDefault(FunctionPassCtor C) {
-    Registry.setDefault((MachinePassCtor)C);
-  }
+  static void setDefault(FunctionPassCtor C) { Registry.setDefault(C); }
 
-  static void setListener(MachinePassRegistryListener *L) {
+  static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
     Registry.setListener(L);
   }
 };
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
index efd175eeed30..efecc61d9c30 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -29,7 +29,7 @@
 namespace llvm {
 
 class Function;
-class TargetMachine;
+class LLVMTargetMachine;
 
 class PhysicalRegisterUsageInfo : public ImmutablePass {
 public:
@@ -41,7 +41,7 @@ public:
   }
 
   /// Set TargetMachine which is used to print analysis.
-  void setTargetMachine(const TargetMachine &TM);
+  void setTargetMachine(const LLVMTargetMachine &TM);
 
   bool doInitialization(Module &M) override;
 
@@ -63,7 +63,7 @@ private:
   /// and 1 means content of register will be preserved around function call.
   DenseMap<const Function *, std::vector<uint32_t>> RegMasks;
 
-  const TargetMachine *TM;
+  const LLVMTargetMachine *TM;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 56adc2e2fbfa..0870d67db390 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -33,15 +33,15 @@
 namespace llvm {
 
 template<class Graph> class GraphWriter;
+class LLVMTargetMachine;
 class MachineFunction;
 class MachineRegisterInfo;
 class MCInstrDesc;
 struct MCSchedClassDesc;
-class ScheduleDAG;
 class SDNode;
 class SUnit;
+class ScheduleDAG;
 class TargetInstrInfo;
-class TargetMachine;
 class TargetRegisterClass;
 class TargetRegisterInfo;
 
@@ -236,8 +236,7 @@ class TargetRegisterInfo;
       Contents.Reg = Reg;
     }
 
-    raw_ostream &print(raw_ostream &O,
-                       const TargetRegisterInfo *TRI = nullptr) const;
+    void dump(const TargetRegisterInfo *TRI = nullptr) const;
   };
 
   template <>
@@ -459,12 +458,7 @@ class TargetRegisterInfo;
     /// edge occurs first.
     void biasCriticalPath();
 
-    void dump(const ScheduleDAG *G) const;
-    void dumpAll(const ScheduleDAG *G) const;
-    raw_ostream &print(raw_ostream &O,
-                       const SUnit *Entry = nullptr,
-                       const SUnit *Exit = nullptr) const;
-    raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const;
+    void dumpAttributes() const;
 
   private:
     void ComputeDepth();
@@ -564,7 +558,7 @@ class TargetRegisterInfo;
 
   class ScheduleDAG {
   public:
-    const TargetMachine &TM;            ///< Target processor
+    const LLVMTargetMachine &TM;        ///< Target processor
     const TargetInstrInfo *TII;         ///< Target instruction information
     const TargetRegisterInfo *TRI;      ///< Target processor register info
     MachineFunction &MF;                ///< Machine function
@@ -597,7 +591,9 @@ class TargetRegisterInfo;
     virtual void viewGraph(const Twine &Name, const Twine &Title);
     virtual void viewGraph();
 
-    virtual void dumpNode(const SUnit *SU) const = 0;
+    virtual void dumpNode(const SUnit &SU) const = 0;
+    virtual void dump() const = 0;
+    void dumpNodeName(const SUnit &SU) const;
 
     /// Returns a label for an SUnit node in a visualization of the ScheduleDAG.
     virtual std::string getGraphNodeLabel(const SUnit *SU) const = 0;
@@ -614,6 +610,9 @@ class TargetRegisterInfo;
     unsigned VerifyScheduledDAG(bool isBottomUp);
 #endif
 
+  protected:
+    void dumpNodeAll(const SUnit &SU) const;
+
   private:
     /// Returns the MCInstrDesc of this SDNode or NULL.
     const MCInstrDesc *getNodeDesc(const SDNode *Node) const;
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 520a23846f6e..daad18125db9 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -327,7 +327,8 @@ namespace llvm {
     /// whole MachineFunction. By default does nothing.
     virtual void finalizeSchedule() {}
 
-    void dumpNode(const SUnit *SU) const override;
+    void dumpNode(const SUnit &SU) const override;
+    void dump() const override;
 
     /// Returns a label for a DAG node that points to an instruction.
     std::string getGraphNodeLabel(const SUnit *SU) const override;
diff --git a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
index badf927d0e95..fbe559f25556 100644
--- a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h
@@ -29,16 +29,19 @@ namespace llvm {
 class ScheduleDAGSDNodes;
 class SelectionDAGISel;
 
-class RegisterScheduler : public MachinePassRegistryNode {
+class RegisterScheduler
+    : public MachinePassRegistryNode<
+          ScheduleDAGSDNodes *(*)(SelectionDAGISel *, CodeGenOpt::Level)> {
 public:
   using FunctionPassCtor = ScheduleDAGSDNodes *(*)(SelectionDAGISel*,
                                                    CodeGenOpt::Level);
 
-  static MachinePassRegistry Registry;
+  static MachinePassRegistry<FunctionPassCtor> Registry;
 
   RegisterScheduler(const char *N, const char *D, FunctionPassCtor C)
-  : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
-  { Registry.Add(this); }
+      : MachinePassRegistryNode(N, D, C) {
+    Registry.Add(this);
+  }
   ~RegisterScheduler() { Registry.Remove(this); }
 
 
@@ -51,7 +54,7 @@ public:
     return (RegisterScheduler *)Registry.getList();
   }
 
-  static void setListener(MachinePassRegistryListener *L) {
+  static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) {
     Registry.setListener(L);
   }
 };
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
index 888f9425ff90..67fe87fc96af 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -28,7 +28,7 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -188,8 +188,8 @@ public:
     return DbgValues.empty() && ByvalParmDbgValues.empty() && DbgLabels.empty();
   }
 
-  ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) {
-    DbgValMapType::iterator I = DbgValMap.find(Node);
+  ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) const {
+    auto I = DbgValMap.find(Node);
     if (I != DbgValMap.end())
       return I->second;
     return ArrayRef<SDDbgValue*>();
@@ -229,7 +229,7 @@ class SelectionDAG {
   LLVMContext *Context;
   CodeGenOpt::Level OptLevel;
 
-  DivergenceAnalysis * DA = nullptr;
+  LegacyDivergenceAnalysis * DA = nullptr;
   FunctionLoweringInfo * FLI = nullptr;
 
   /// The function-level optimization remark emitter.  Used to emit remarks
@@ -308,6 +308,9 @@ public:
         : DAGUpdateListener(DAG), Callback(std::move(Callback)) {}
 
     void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); }
+
+   private:
+    virtual void anchor();
   };
 
   /// When true, additional steps are taken to
@@ -382,7 +385,7 @@ public:
   /// Prepare this SelectionDAG to process code in the given MachineFunction.
   void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
             Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
-            DivergenceAnalysis * Divergence);
+            LegacyDivergenceAnalysis * Divergence);
 
   void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
     FLI = FuncInfo;
@@ -471,7 +474,9 @@ public:
     return Root;
   }
 
+#ifndef NDEBUG
   void VerifyDAGDiverence();
+#endif
 
   /// This iterates over the nodes in the SelectionDAG, folding
   /// certain types of nodes together, or eliminating superfluous nodes.  The
@@ -784,24 +789,6 @@ public:
   /// value assuming it was the smaller SrcTy value.
   SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
 
-  /// Return an operation which will any-extend the low lanes of the operand
-  /// into the specified vector type. For example,
-  /// this can convert a v16i8 into a v4i32 by any-extending the low four
-  /// lanes of the operand from i8 to i32.
-  SDValue getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
-
-  /// Return an operation which will sign extend the low lanes of the operand
-  /// into the specified vector type. For example,
-  /// this can convert a v16i8 into a v4i32 by sign extending the low four
-  /// lanes of the operand from i8 to i32.
-  SDValue getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
-
-  /// Return an operation which will zero extend the low lanes of the operand
-  /// into the specified vector type. For example,
-  /// this can convert a v16i8 into a v4i32 by zero extending the low four
-  /// lanes of the operand from i8 to i32.
-  SDValue getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT);
-
   /// Convert Op, which must be of integer type, to the integer type VT,
   /// by using an extension appropriate for the target's
   /// BooleanContent for type OpVT or truncating it.
@@ -945,41 +932,45 @@ public:
                           Type *SizeTy, unsigned ElemSz, bool isTailCall,
                           MachinePointerInfo DstPtrInfo);
 
-  /// Helper function to make it easier to build SetCC's if you just
-  /// have an ISD::CondCode instead of an SDValue.
-  ///
+  /// Helper function to make it easier to build SetCC's if you just have an
+  /// ISD::CondCode instead of an SDValue.
   SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
                    ISD::CondCode Cond) {
     assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&
-      "Cannot compare scalars to vectors");
+           "Cannot compare scalars to vectors");
     assert(LHS.getValueType().isVector() == VT.isVector() &&
-      "Cannot compare scalars to vectors");
+           "Cannot compare scalars to vectors");
     assert(Cond != ISD::SETCC_INVALID &&
-        "Cannot create a setCC of an invalid node.");
+           "Cannot create a setCC of an invalid node.");
     return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
   }
 
-  /// Helper function to make it easier to build Select's if you just
-  /// have operands and don't want to check for vector.
+  /// Helper function to make it easier to build Select's if you just have
+  /// operands and don't want to check for vector.
   SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS,
                     SDValue RHS) {
     assert(LHS.getValueType() == RHS.getValueType() &&
            "Cannot use select on differing types");
     assert(VT.isVector() == LHS.getValueType().isVector() &&
            "Cannot mix vectors and scalars");
-    return getNode(Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
-                   Cond, LHS, RHS);
+    auto Opcode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
+    return getNode(Opcode, DL, VT, Cond, LHS, RHS);
   }
 
-  /// Helper function to make it easier to build SelectCC's if you
-  /// just have an ISD::CondCode instead of an SDValue.
-  ///
+  /// Helper function to make it easier to build SelectCC's if you just have an
+  /// ISD::CondCode instead of an SDValue.
   SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
                       SDValue False, ISD::CondCode Cond) {
-    return getNode(ISD::SELECT_CC, DL, True.getValueType(),
-                   LHS, RHS, True, False, getCondCode(Cond));
+    return getNode(ISD::SELECT_CC, DL, True.getValueType(), LHS, RHS, True,
+                   False, getCondCode(Cond));
   }
 
+  /// Try to simplify a select/vselect into 1 of its operands or a constant.
+  SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal);
+
+  /// Try to simplify a shift into 1 of its operands or a constant.
+  SDValue simplifyShift(SDValue X, SDValue Y);
+
   /// VAArg produces a result and token chain, and takes a pointer
   /// and a source value as input.
   SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
@@ -1140,6 +1131,13 @@ public:
   /// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
   SDValue expandVACopy(SDNode *Node);
 
+  /// Returs an GlobalAddress of the function from the current module with
+  /// name matching the given ExternalSymbol. Additionally can provide the
+  /// matched function.
+  /// Panics the function doesn't exists.
+  SDValue getSymbolFunctionGlobalAddress(SDValue Op,
+                                         Function **TargetFunction = nullptr);
+
   /// *Mutate* the specified node in-place to have the
   /// specified operands.  If the resultant node already exists in the DAG,
   /// this does not modify the specified node, instead it returns the node that
@@ -1156,6 +1154,11 @@ public:
                                SDValue Op3, SDValue Op4, SDValue Op5);
   SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
 
+  /// *Mutate* the specified machine node's memory references to the provided
+  /// list.
+  void setNodeMemRefs(MachineSDNode *N,
+                      ArrayRef<MachineMemOperand *> NewMemRefs);
+
   // Propagates the change in divergence to users
   void updateDivergence(SDNode * N);
 
@@ -1346,7 +1349,7 @@ public:
   void AddDbgLabel(SDDbgLabel *DB);
 
   /// Get the debug values which reference the given SDNode.
-  ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) {
+  ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) const {
     return DbgInfo->getSDDbgValues(SD);
   }
 
@@ -1429,15 +1432,15 @@ public:
   /// every vector element.
   /// Targets can implement the computeKnownBitsForTargetNode method in the
   /// TargetLowering class to allow target nodes to be understood.
-  void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth = 0) const;
+  KnownBits computeKnownBits(SDValue Op, unsigned Depth = 0) const;
 
   /// Determine which bits of Op are known to be either zero or one and return
   /// them in Known. The DemandedElts argument allows us to only collect the
   /// known bits that are shared by the requested vector elements.
   /// Targets can implement the computeKnownBitsForTargetNode method in the
   /// TargetLowering class to allow target nodes to be understood.
-  void computeKnownBits(SDValue Op, KnownBits &Known, const APInt &DemandedElts,
-                        unsigned Depth = 0) const;
+  KnownBits computeKnownBits(SDValue Op, const APInt &DemandedElts,
+                             unsigned Depth = 0) const;
 
   /// Used to represent the possible overflow behavior of an operation.
   /// Never: the operation cannot overflow.
@@ -1484,8 +1487,15 @@ public:
   ///     X|Cst == X+Cst iff X&Cst = 0.
   bool isBaseWithConstantOffset(SDValue Op) const;
 
-  /// Test whether the given SDValue is known to never be NaN.
-  bool isKnownNeverNaN(SDValue Op) const;
+  /// Test whether the given SDValue is known to never be NaN. If \p SNaN is
+  /// true, returns if \p Op is known to never be a signaling NaN (it may still
+  /// be a qNaN).
+  bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
+
+  /// \returns true if \p Op is known to never be a signaling NaN.
+  bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
+    return isKnownNeverNaN(Op, true, Depth);
+  }
 
   /// Test whether the given floating point SDValue is known to never be
   /// positive or negative zero.
@@ -1503,6 +1513,27 @@ public:
   /// allow an 'add' to be transformed into an 'or'.
   bool haveNoCommonBitsSet(SDValue A, SDValue B) const;
 
+  /// Test whether \p V has a splatted value for all the demanded elements.
+  ///
+  /// On success \p UndefElts will indicate the elements that have UNDEF
+  /// values instead of the splat value, this is only guaranteed to be correct
+  /// for \p DemandedElts.
+  ///
+  /// NOTE: The function will return true for a demanded splat of UNDEF values.
+  bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts);
+
+  /// Test whether \p V has a splatted value.
+  bool isSplatValue(SDValue V, bool AllowUndefs = false);
+
+  /// Match a binop + shuffle pyramid that represents a horizontal reduction
+  /// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
+  /// Extract. The reduction must use one of the opcodes listed in /p
+  /// CandidateBinOps and on success /p BinOp will contain the matching opcode.
+  /// Returns the vector that is being reduced on, or SDValue() if a reduction
+  /// was not matched.
+  SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+                              ArrayRef<ISD::NodeType> CandidateBinOps);
+
   /// Utility function used by legalize and lowering to
   /// "unroll" a vector operation by splitting out the scalars and operating
   /// on each element individually.  If the ResNE is 0, fully unroll the vector
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 580606441a9d..2b2c48d57bc0 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -45,18 +45,21 @@ public:
         IsIndexSignExt(IsIndexSignExt) {}
 
   SDValue getBase() { return Base; }
+  SDValue getBase() const { return Base; }
   SDValue getIndex() { return Index; }
+  SDValue getIndex() const { return Index; }
 
-  bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG) {
+  bool equalBaseIndex(const BaseIndexOffset &Other,
+                      const SelectionDAG &DAG) const {
     int64_t Off;
     return equalBaseIndex(Other, DAG, Off);
   }
 
-  bool equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG,
-                      int64_t &Off);
+  bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG,
+                      int64_t &Off) const;
 
   /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG);
+  static BaseIndexOffset match(const LSBaseSDNode *N, const SelectionDAG &DAG);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 86df0af7303f..6758c55c696a 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -132,6 +132,7 @@ public:
     OPC_CheckChild2Same, OPC_CheckChild3Same,
     OPC_CheckPatternPredicate,
     OPC_CheckPredicate,
+    OPC_CheckPredicateWithOperands,
     OPC_CheckOpcode,
     OPC_SwitchOpcode,
     OPC_CheckType,
@@ -267,6 +268,17 @@ public:
     llvm_unreachable("Tblgen should generate the implementation of this!");
   }
 
+  /// CheckNodePredicateWithOperands - This function is generated by tblgen in
+  /// the target.
+  /// It runs node predicate number PredNo and returns true if it succeeds or
+  /// false if it fails.  The number is a private implementation detail to the
+  /// code tblgen produces.
+  virtual bool CheckNodePredicateWithOperands(
+      SDNode *N, unsigned PredNo,
+      const SmallVectorImpl<SDValue> &Operands) const {
+    llvm_unreachable("Tblgen should generate the implementation of this!");
+  }
+
   virtual bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,
                                    unsigned PatternNo,
                         SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 1af22185d366..10f284179084 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -672,6 +672,12 @@ public:
       case ISD::STRICT_FLOG2:
       case ISD::STRICT_FRINT:
       case ISD::STRICT_FNEARBYINT:
+      case ISD::STRICT_FMAXNUM:
+      case ISD::STRICT_FMINNUM:
+      case ISD::STRICT_FCEIL:
+      case ISD::STRICT_FFLOOR:
+      case ISD::STRICT_FROUND:
+      case ISD::STRICT_FTRUNC:
         return true;
     }
   }
@@ -1589,15 +1595,38 @@ bool isAllOnesConstant(SDValue V);
 /// Returns true if \p V is a constant integer one.
 bool isOneConstant(SDValue V);
 
+/// Return the non-bitcasted source operand of \p V if it exists.
+/// If \p V is not a bitcasted value, it is returned as-is.
+SDValue peekThroughBitcasts(SDValue V);
+
+/// Return the non-bitcasted and one-use source operand of \p V if it exists.
+/// If \p V is not a bitcasted one-use value, it is returned as-is.
+SDValue peekThroughOneUseBitcasts(SDValue V);
+
 /// Returns true if \p V is a bitwise not operation. Assumes that an all ones
 /// constant is canonicalized to be operand 1.
 bool isBitwiseNot(SDValue V);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
+
+/// Return true if the value is a constant 0 integer or a splatted vector of
+/// a constant 0 integer (with no undefs).
+/// Build vector implicit truncation is not an issue for null values.
+bool isNullOrNullSplat(SDValue V);
+
+/// Return true if the value is a constant 1 integer or a splatted vector of a
+/// constant 1 integer (with no undefs).
+/// Does not permit build vector implicit truncation.
+bool isOneOrOneSplat(SDValue V);
+
+/// Return true if the value is a constant -1 integer or a splatted vector of a
+/// constant -1 integer (with no undefs).
+/// Does not permit build vector implicit truncation.
+bool isAllOnesOrAllOnesSplat(SDValue V);
 
 class GlobalAddressSDNode : public SDNode {
   friend class SelectionDAG;
@@ -2113,12 +2142,15 @@ public:
                         MachineMemOperand *MMO)
       : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
 
-  // In the both nodes address is Op1, mask is Op2:
-  // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
-  // MaskedStoreSDNode (Chain, ptr, mask, data)
+  // MaskedLoadSDNode (Chain, ptr, mask, passthru)
+  // MaskedStoreSDNode (Chain, data, ptr, mask)
   // Mask is a vector of i1 elements
-  const SDValue &getBasePtr() const { return getOperand(1); }
-  const SDValue &getMask() const    { return getOperand(2); }
+  const SDValue &getBasePtr() const {
+    return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2);
+  }
+  const SDValue &getMask() const {
+    return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
+  }
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MLOAD ||
@@ -2143,7 +2175,10 @@ public:
     return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
   }
 
-  const SDValue &getSrc0() const { return getOperand(3); }
+  const SDValue &getBasePtr() const { return getOperand(1); }
+  const SDValue &getMask() const    { return getOperand(2); }
+  const SDValue &getPassThru() const { return getOperand(3); }
+
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MLOAD;
   }
@@ -2175,7 +2210,9 @@ public:
   /// memory at base_addr.
   bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
 
-  const SDValue &getValue() const { return getOperand(3); }
+  const SDValue &getValue() const   { return getOperand(1); }
+  const SDValue &getBasePtr() const { return getOperand(2); }
+  const SDValue &getMask() const    { return getOperand(3); }
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MSTORE;
@@ -2201,7 +2238,6 @@ public:
   const SDValue &getBasePtr() const { return getOperand(3); }
   const SDValue &getIndex()   const { return getOperand(4); }
   const SDValue &getMask()    const { return getOperand(2); }
-  const SDValue &getValue()   const { return getOperand(1); }
   const SDValue &getScale()   const { return getOperand(5); }
 
   static bool classof(const SDNode *N) {
@@ -2220,6 +2256,8 @@ public:
                      EVT MemVT, MachineMemOperand *MMO)
       : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
 
+  const SDValue &getPassThru() const { return getOperand(1); }
+
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MGATHER;
   }
@@ -2235,6 +2273,8 @@ public:
                       EVT MemVT, MachineMemOperand *MMO)
       : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
 
+  const SDValue &getValue() const { return getOperand(1); }
+
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MSCATTER;
   }
@@ -2243,32 +2283,60 @@ public:
 /// An SDNode that represents everything that will be needed
 /// to construct a MachineInstr. These nodes are created during the
 /// instruction selection proper phase.
+///
+/// Note that the only supported way to set the `memoperands` is by calling the
+/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
+/// inside the DAG rather than in the node.
 class MachineSDNode : public SDNode {
-public:
-  using mmo_iterator = MachineMemOperand **;
-
 private:
   friend class SelectionDAG;
 
   MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
       : SDNode(Opc, Order, DL, VTs) {}
 
-  /// Memory reference descriptions for this instruction.
-  mmo_iterator MemRefs = nullptr;
-  mmo_iterator MemRefsEnd = nullptr;
+  // We use a pointer union between a single `MachineMemOperand` pointer and
+  // a pointer to an array of `MachineMemOperand` pointers. This is null when
+  // the number of these is zero, the single pointer variant used when the
+  // number is one, and the array is used for larger numbers.
+  //
+  // The array is allocated via the `SelectionDAG`'s allocator and so will
+  // always live until the DAG is cleaned up and doesn't require ownership here.
+  //
+  // We can't use something simpler like `TinyPtrVector` here because `SDNode`
+  // subclasses aren't managed in a conforming C++ manner. See the comments on
+  // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
+  // constraint here is that these don't manage memory with their constructor or
+  // destructor and can be initialized to a good state even if they start off
+  // uninitialized.
+  PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
+
+  // Note that this could be folded into the above `MemRefs` member if doing so
+  // is advantageous at some point. We don't need to store this in most cases.
+  // However, at the moment this doesn't appear to make the allocation any
+  // smaller and makes the code somewhat simpler to read.
+  int NumMemRefs = 0;
 
 public:
-  mmo_iterator memoperands_begin() const { return MemRefs; }
-  mmo_iterator memoperands_end() const { return MemRefsEnd; }
-  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+  using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
+
+  ArrayRef<MachineMemOperand *> memoperands() const {
+    // Special case the common cases.
+    if (NumMemRefs == 0)
+      return {};
+    if (NumMemRefs == 1)
+      return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
+
+    // Otherwise we have an actual array.
+    return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
+  }
+  mmo_iterator memoperands_begin() const { return memoperands().begin(); }
+  mmo_iterator memoperands_end() const { return memoperands().end(); }
+  bool memoperands_empty() const { return memoperands().empty(); }
 
-  /// Assign this MachineSDNodes's memory reference descriptor
-  /// list. This does not transfer ownership.
-  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
-    for (mmo_iterator MMI = NewMemRefs, MME = NewMemRefsEnd; MMI != MME; ++MMI)
-      assert(*MMI && "Null mem ref detected!");
-    MemRefs = NewMemRefs;
-    MemRefsEnd = NewMemRefsEnd;
+  /// Clear out the memory reference descriptor list.
+  void clearMemRefs() {
+    MemRefs = nullptr;
+    NumMemRefs = 0;
   }
 
   static bool classof(const SDNode *N) {
@@ -2405,17 +2473,32 @@ namespace ISD {
       cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
   }
 
+  /// Return true if the node is a math/logic binary operator. This corresponds
+  /// to the IR function of the same name.
+  inline bool isBinaryOp(const SDNode *N) {
+    auto Op = N->getOpcode();
+    return (Op == ISD::ADD || Op == ISD::SUB || Op == ISD::MUL ||
+            Op == ISD::AND || Op == ISD::OR || Op == ISD::XOR ||
+            Op == ISD::SHL || Op == ISD::SRL || Op == ISD::SRA ||
+            Op == ISD::SDIV || Op == ISD::UDIV || Op == ISD::SREM ||
+            Op == ISD::UREM || Op == ISD::FADD || Op == ISD::FSUB ||
+            Op == ISD::FMUL || Op == ISD::FDIV || Op == ISD::FREM);
+  }
+
   /// Attempt to match a unary predicate against a scalar/splat constant or
   /// every element of a constant BUILD_VECTOR.
+  /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
   bool matchUnaryPredicate(SDValue Op,
-                           std::function<bool(ConstantSDNode *)> Match);
+                           std::function<bool(ConstantSDNode *)> Match,
+                           bool AllowUndefs = false);
 
   /// Attempt to match a binary predicate against a pair of scalar/splat
   /// constants or every element of a pair of constant BUILD_VECTORs.
+  /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
   bool matchBinaryPredicate(
       SDValue LHS, SDValue RHS,
-      std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match);
-
+      std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+      bool AllowUndefs = false);
 } // end namespace ISD
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
index 334267d9828b..8c8a7be459fd 100644
--- a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -413,8 +413,14 @@ class raw_ostream;
     /// Returns the base index for the given instruction.
     SlotIndex getInstructionIndex(const MachineInstr &MI) const {
       // Instructions inside a bundle have the same number as the bundle itself.
-      const MachineInstr &BundleStart = *getBundleStart(MI.getIterator());
-      Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleStart);
+      auto BundleStart = getBundleStart(MI.getIterator());
+      auto BundleEnd = getBundleEnd(MI.getIterator());
+      // Use the first non-debug instruction in the bundle to get SlotIndex.
+      const MachineInstr &BundleNonDebug =
+          *skipDebugInstructionsForward(BundleStart, BundleEnd);
+      assert(!BundleNonDebug.isDebugInstr() &&
+             "Could not use a debug instruction to query mi2iMap.");
+      Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleNonDebug);
       assert(itr != mi2iMap.end() && "Instruction not found in maps.");
       return itr->second;
     }
@@ -442,7 +448,7 @@ class raw_ostream;
     /// MI is not required to have an index.
     SlotIndex getIndexBefore(const MachineInstr &MI) const {
       const MachineBasicBlock *MBB = MI.getParent();
-      assert(MBB && "MI must be inserted inna basic block");
+      assert(MBB && "MI must be inserted in a basic block");
       MachineBasicBlock::const_iterator I = MI, B = MBB->begin();
       while (true) {
         if (I == B)
@@ -459,7 +465,7 @@ class raw_ostream;
     /// MI is not required to have an index.
     SlotIndex getIndexAfter(const MachineInstr &MI) const {
       const MachineBasicBlock *MBB = MI.getParent();
-      assert(MBB && "MI must be inserted inna basic block");
+      assert(MBB && "MI must be inserted in a basic block");
       MachineBasicBlock::const_iterator I = MI, E = MBB->end();
       while (true) {
         ++I;
@@ -674,7 +680,7 @@ class raw_ostream;
       idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
 
       renumberIndexes(newItr);
-      llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+      llvm::sort(idx2MBBMap, Idx2MBBCompare());
     }
 
     /// Free the resources that were required to maintain a SlotIndex.
diff --git a/contrib/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
index e584a4136e4f..8be9ae378557 100644
--- a/contrib/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
@@ -236,25 +236,6 @@ public:
     FnInfos.clear();
   }
 
-  /// Generate a stackmap record for a stackmap instruction.
-  ///
-  /// MI must be a raw STACKMAP, not a PATCHPOINT.
-  void recordStackMap(const MachineInstr &MI);
-
-  /// Generate a stackmap record for a patchpoint instruction.
-  void recordPatchPoint(const MachineInstr &MI);
-
-  /// Generate a stackmap record for a statepoint instruction.
-  void recordStatepoint(const MachineInstr &MI);
-
-  /// If there is any stack map data, create a stack map section and serialize
-  /// the map info into it. This clears the stack map data structures
-  /// afterwards.
-  void serializeToStackMapSection();
-
-private:
-  static const char *WSMP;
-
   using LocationVec = SmallVector<Location, 8>;
   using LiveOutVec = SmallVector<LiveOutReg, 8>;
   using ConstantPool = MapVector<uint64_t, uint64_t>;
@@ -283,6 +264,31 @@ private:
   using FnInfoMap = MapVector<const MCSymbol *, FunctionInfo>;
   using CallsiteInfoList = std::vector<CallsiteInfo>;
 
+  /// Generate a stackmap record for a stackmap instruction.
+  ///
+  /// MI must be a raw STACKMAP, not a PATCHPOINT.
+  void recordStackMap(const MachineInstr &MI);
+
+  /// Generate a stackmap record for a patchpoint instruction.
+  void recordPatchPoint(const MachineInstr &MI);
+
+  /// Generate a stackmap record for a statepoint instruction.
+  void recordStatepoint(const MachineInstr &MI);
+
+  /// If there is any stack map data, create a stack map section and serialize
+  /// the map info into it. This clears the stack map data structures
+  /// afterwards.
+  void serializeToStackMapSection();
+
+  /// Get call site info.
+  CallsiteInfoList &getCSInfos() { return CSInfos; }
+
+  /// Get function info.
+  FnInfoMap &getFnInfos() { return FnInfos; }
+
+private:
+  static const char *WSMP;
+
   AsmPrinter &AP;
   CallsiteInfoList CSInfos;
   ConstantPool ConstPool;
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index f8effee998e3..b4d1da941433 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -207,8 +207,11 @@ public:
     return false;
   }
 
-  /// Return true if the target needs to disable frame pointer elimination.
-  virtual bool noFramePointerElim(const MachineFunction &MF) const;
+  /// Return true if the target wants to keep the frame pointer regardless of
+  /// the function attribute "frame-pointer".
+  virtual bool keepFramePointer(const MachineFunction &MF) const {
+    return false;
+  }
 
   /// hasFP - Return true if the specified function should have a dedicated
   /// frame pointer register. For most targets this is true only if the function
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index b5bc561d834c..961b90e9bc12 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -246,14 +246,14 @@ public:
   }
 
   /// If the specified machine instruction has a load from a stack slot,
-  /// return true along with the FrameIndex of the loaded stack slot and the
-  /// machine mem operand containing the reference.
+  /// return true along with the FrameIndices of the loaded stack slot and the
+  /// machine mem operands containing the reference.
   /// If not, return false.  Unlike isLoadFromStackSlot, this returns true for
   /// any instructions that loads from the stack.  This is just a hint, as some
   /// cases may be missed.
-  virtual bool hasLoadFromStackSlot(const MachineInstr &MI,
-                                    const MachineMemOperand *&MMO,
-                                    int &FrameIndex) const;
+  virtual bool hasLoadFromStackSlot(
+      const MachineInstr &MI,
+      SmallVectorImpl<const MachineMemOperand *> &Accesses) const;
 
   /// If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
@@ -284,14 +284,14 @@ public:
   }
 
   /// If the specified machine instruction has a store to a stack slot,
-  /// return true along with the FrameIndex of the loaded stack slot and the
-  /// machine mem operand containing the reference.
+  /// return true along with the FrameIndices of the loaded stack slot and the
+  /// machine mem operands containing the reference.
   /// If not, return false.  Unlike isStoreToStackSlot,
   /// this returns true for any instructions that stores to the
   /// stack.  This is just a hint, as some cases may be missed.
-  virtual bool hasStoreToStackSlot(const MachineInstr &MI,
-                                   const MachineMemOperand *&MMO,
-                                   int &FrameIndex) const;
+  virtual bool hasStoreToStackSlot(
+      const MachineInstr &MI,
+      SmallVectorImpl<const MachineMemOperand *> &Accesses) const;
 
   /// Return true if the specified machine instruction
   /// is a copy of one stack slot to another and has no other effect.
@@ -846,15 +846,33 @@ public:
     llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
   }
 
+protected:
+  /// Target-dependent implemenation for IsCopyInstr.
   /// If the specific machine instruction is a instruction that moves/copies
   /// value from one register to another register return true along with
   /// @Source machine operand and @Destination machine operand.
-  virtual bool isCopyInstr(const MachineInstr &MI,
-                           const MachineOperand *&SourceOpNum,
-                           const MachineOperand *&Destination) const {
+  virtual bool isCopyInstrImpl(const MachineInstr &MI,
+                               const MachineOperand *&Source,
+                               const MachineOperand *&Destination) const {
     return false;
   }
 
+public:
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  /// For COPY-instruction the method naturally returns true, for all other
+  /// instructions the method calls target-dependent implementation.
+  bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Source,
+                   const MachineOperand *&Destination) const {
+    if (MI.isCopy()) {
+      Destination = &MI.getOperand(0);
+      Source = &MI.getOperand(1);
+      return true;
+    }
+    return isCopyInstrImpl(MI, Source, Destination);
+  }
+
   /// Store the specified register of the given register class to the specified
   /// stack frame index. The store instruction is to be added to the given
   /// machine basic block before the specified machine instruction. If isKill
@@ -1063,7 +1081,7 @@ public:
   /// getAddressSpaceForPseudoSourceKind - Given the kind of memory
   /// (e.g. stack) the target returns the corresponding address space.
   virtual unsigned
-  getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const {
+  getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
     return 0;
   }
 
@@ -1118,11 +1136,11 @@ public:
     return false;
   }
 
-  /// Get the base register and byte offset of an instruction that reads/writes
+  /// Get the base operand and byte offset of an instruction that reads/writes
   /// memory.
-  virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
-                                     int64_t &Offset,
-                                     const TargetRegisterInfo *TRI) const {
+  virtual bool getMemOperandWithOffset(MachineInstr &MI,
+                                       MachineOperand *&BaseOp, int64_t &Offset,
+                                       const TargetRegisterInfo *TRI) const {
     return false;
   }
 
@@ -1146,8 +1164,8 @@ public:
   /// or
   ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
   /// to TargetPassConfig::createMachineScheduler() to have an effect.
-  virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
-                                   MachineInstr &SecondLdSt, unsigned BaseReg2,
+  virtual bool shouldClusterMemOps(MachineOperand &BaseOp1,
+                                   MachineOperand &BaseOp2,
                                    unsigned NumLoads) const {
     llvm_unreachable("target did not implement shouldClusterMemOps()");
   }
@@ -1617,10 +1635,11 @@ public:
         "Target didn't implement TargetInstrInfo::getOutliningType!");
   }
 
-  /// Returns target-defined flags defining properties of the MBB for
-  /// the outliner.
-  virtual unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
-    return 0x0;
+  /// Optional target hook that returns true if \p MBB is safe to outline from,
+  /// and returns any target-specific information in \p Flags.
+  virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                                      unsigned &Flags) const {
+    return true;
   }
 
   /// Insert a custom frame for outlined functions.
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h
index 40540bd6e1ff..23dbaac03ebe 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -29,7 +29,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -163,6 +163,7 @@ public:
     LLOnly,  // Expand the (load) instruction into just a load-linked, which has
              // greater atomic guarantees than a normal load.
     CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
+    MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
   };
 
   /// Enum that specifies when a multiplication should be expanded.
@@ -268,6 +269,14 @@ public:
     return true;
   }
 
+  /// Return true if it is profitable to convert a select of FP constants into
+  /// a constant pool load whose address depends on the select condition. The
+  /// parameter may be used to differentiate a select with FP compare from
+  /// integer compare.
+  virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+    return true;
+  }
+
   /// Return true if multiple condition registers are available.
   bool hasMultipleConditionRegisters() const {
     return HasMultipleConditionRegisters;
@@ -278,7 +287,7 @@ public:
 
   /// Return the preferred vector type legalization action.
   virtual TargetLoweringBase::LegalizeTypeAction
-  getPreferredVectorAction(EVT VT) const {
+  getPreferredVectorAction(MVT VT) const {
     // The default action for one element vectors is to scalarize
     if (VT.getVectorNumElements() == 1)
       return TypeScalarizeVector;
@@ -545,6 +554,12 @@ public:
     return false;
   }
 
+  /// Return true if inserting a scalar into a variable element of an undef
+  /// vector is more efficiently handled by splatting the scalar instead.
+  virtual bool shouldSplatInsEltVarIndex(EVT) const {
+    return false;
+  }
+
   /// Return true if target supports floating point exceptions.
   bool hasFloatingPointExceptions() const {
     return HasFloatingPointExceptions;
@@ -790,6 +805,38 @@ public:
     return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
   }
 
+  /// Custom method defined by each target to indicate if an operation which
+  /// may require a scale is supported natively by the target.
+  /// If not, the operation is illegal.
+  virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
+                                              unsigned Scale) const {
+    return false;
+  }
+
+  /// Some fixed point operations may be natively supported by the target but
+  /// only for specific scales. This method allows for checking
+  /// if the width is supported by the target for a given operation that may
+  /// depend on scale.
+  LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
+                                              unsigned Scale) const {
+    auto Action = getOperationAction(Op, VT);
+    if (Action != Legal)
+      return Action;
+
+    // This operation is supported in this type but may only work on specific
+    // scales.
+    bool Supported;
+    switch (Op) {
+    default:
+      llvm_unreachable("Unexpected fixed point operation.");
+    case ISD::SMULFIX:
+      Supported = isSupportedFixedPointOperation(Op, VT, Scale);
+      break;
+    }
+
+    return Supported ? Action : Expand;
+  }
+
   LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
     unsigned EqOpc;
     switch (Op) {
@@ -798,6 +845,7 @@ public:
       case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
       case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
       case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
+      case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
       case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
       case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
       case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
@@ -811,6 +859,12 @@ public:
       case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
       case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
       case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+      case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
+      case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
+      case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
+      case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
+      case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
+      case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
     }
 
     auto Action = getOperationAction(EqOpc, VT);
@@ -1199,13 +1253,15 @@ public:
   /// reduce runtime.
   virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
 
-  // Return true if it is profitable to reduce the given load node to a smaller
-  // type.
-  //
-  // e.g. (i16 (trunc (i32 (load x))) -> i16 load x should be performed
-  virtual bool shouldReduceLoadWidth(SDNode *Load,
-                                     ISD::LoadExtType ExtTy,
+  /// Return true if it is profitable to reduce a load to a smaller type.
+  /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
+  virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
                                      EVT NewVT) const {
+    // By default, assume that it is cheaper to extract a subvector from a wide
+    // vector load rather than creating multiple narrow vector loads.
+    if (NewVT.isVector() && !Load->hasOneUse())
+      return false;
+
     return true;
   }
 
@@ -1428,6 +1484,12 @@ public:
     return PrefLoopAlignment;
   }
 
+  /// Should loops be aligned even when the function is marked OptSize (but not
+  /// MinSize).
+  virtual bool alignLoopsWithOptSize() const {
+    return false;
+  }
+
   /// If the target has a standard location for the stack protector guard,
   /// returns the address of that location. Otherwise, returns nullptr.
   /// DEPRECATED: please override useLoadStackGuardNode and customize
@@ -1549,6 +1611,26 @@ public:
     llvm_unreachable("Store conditional unimplemented on this target");
   }
 
+  /// Perform a masked atomicrmw using a target-specific intrinsic. This
+  /// represents the core LL/SC loop which will be lowered at a late stage by
+  /// the backend.
+  virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
+                                              AtomicRMWInst *AI,
+                                              Value *AlignedAddr, Value *Incr,
+                                              Value *Mask, Value *ShiftAmt,
+                                              AtomicOrdering Ord) const {
+    llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
+  }
+
+  /// Perform a masked cmpxchg using a target-specific intrinsic. This
+  /// represents the core LL/SC loop which will be lowered at a late stage by
+  /// the backend.
+  virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
+      IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
+      Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+    llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
+  }
+
   /// Inserts in the IR a target-specific intrinsic specifying a fence.
   /// It is called by AtomicExpandPass before expanding an
   ///   AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
@@ -1625,11 +1707,11 @@ public:
     return AtomicExpansionKind::None;
   }
 
-  /// Returns true if the given atomic cmpxchg should be expanded by the
-  /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
-  /// (through emitLoadLinked() and emitStoreConditional()).
-  virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
-    return false;
+  /// Returns how the given atomic cmpxchg should be expanded by the IR-level
+  /// AtomicExpand pass.
+  virtual AtomicExpansionKind
+  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
+    return AtomicExpansionKind::None;
   }
 
   /// Returns how the IR-level AtomicExpand pass should expand the given
@@ -1687,6 +1769,25 @@ public:
     return false;
   }
 
+  /// Return true if it is profitable to transform an integer
+  /// multiplication-by-constant into simpler operations like shifts and adds.
+  /// This may be true if the target does not directly support the
+  /// multiplication operation for the specified type or the sequence of simpler
+  /// ops is faster than the multiply.
+  virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+    return false;
+  }
+
+  /// Return true if it is more correct/profitable to use strict FP_TO_INT
+  /// conversion operations - canonicalizing the FP source value instead of
+  /// converting all cases and then selecting based on value.
+  /// This may be true if the target throws exceptions for out of bounds
+  /// conversions or has fast FP CMOV.
+  virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+                                        bool IsSigned) const {
+    return false;
+  }
+
   //===--------------------------------------------------------------------===//
   // TargetLowering Configuration Methods - These methods should be invoked by
   // the derived class constructor to configure this object for the target.
@@ -2015,6 +2116,14 @@ public:
     return true;
   }
 
+  /// Return true if the specified immediate is legal for the value input of a
+  /// store instruction.
+  virtual bool isLegalStoreImmediate(int64_t Value) const {
+    // Default implementation assumes that at least 0 works since it is likely
+    // that a zero register exists or a zero immediate is allowed.
+    return Value == 0;
+  }
+
   /// Return true if it's significantly cheaper to shift a vector by a uniform
   /// scalar than by an amount which will vary across each lane. On x86, for
   /// example, there is a "psllw" instruction for the former case, but no simple
@@ -2046,10 +2155,12 @@ public:
     case ISD::UADDO:
     case ISD::ADDC:
     case ISD::ADDE:
+    case ISD::SADDSAT:
+    case ISD::UADDSAT:
     case ISD::FMINNUM:
     case ISD::FMAXNUM:
-    case ISD::FMINNAN:
-    case ISD::FMAXNAN:
+    case ISD::FMINIMUM:
+    case ISD::FMAXIMUM:
       return true;
     default: return false;
     }
@@ -2153,6 +2264,12 @@ public:
     return false;
   }
 
+  /// Return true if sign-extension from FromTy to ToTy is cheaper than
+  /// zero-extension.
+  virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
+    return false;
+  }
+
   /// Return true if the target supplies and combines to a paired load
   /// two loaded values of type LoadedType next to each other in memory.
   /// RequiredAlignment gives the minimal alignment constraints that must be met
@@ -2292,6 +2409,12 @@ public:
     return false;
   }
 
+  /// Try to convert an extract element of a vector binary operation into an
+  /// extract element followed by a scalar operation.
+  virtual bool shouldScalarizeBinop(SDValue VecOp) const {
+    return false;
+  }
+
   // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
   // even if the vector itself has multiple uses.
   virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
@@ -2648,7 +2771,7 @@ public:
 
   virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
                                           FunctionLoweringInfo *FLI,
-                                          DivergenceAnalysis *DA) const {
+                                          LegacyDivergenceAnalysis *DA) const {
     return false;
   }
 
@@ -2774,36 +2897,33 @@ public:
   bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
                         TargetLoweringOpt &TLO) const;
 
-  /// Helper for SimplifyDemandedBits that can simplify an operation with
-  /// multiple uses.  This function simplifies operand \p OpIdx of \p User and
-  /// then updates \p User with the simplified version. No other uses of
-  /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this
-  /// function behaves exactly like function SimplifyDemandedBits declared
-  /// below except that it also updates the DAG by calling
-  /// DCI.CommitTargetLoweringOpt.
-  bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
-                            DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
-
-  /// Look at Op.  At this point, we know that only the DemandedMask bits of the
+  /// Look at Op.  At this point, we know that only the DemandedBits bits of the
   /// result of Op are ever used downstream.  If we can use this information to
   /// simplify Op, create a new simplified DAG node and return true, returning
   /// the original and new nodes in Old and New.  Otherwise, analyze the
   /// expression and return a mask of KnownOne and KnownZero bits for the
   /// expression (used to simplify the caller).  The KnownZero/One bits may only
-  /// be accurate for those bits in the DemandedMask.
+  /// be accurate for those bits in the Demanded masks.
   /// \p AssumeSingleUse When this parameter is true, this function will
   ///    attempt to simplify \p Op even if there are multiple uses.
   ///    Callers are responsible for correctly updating the DAG based on the
   ///    results of this function, because simply replacing replacing TLO.Old
   ///    with TLO.New will be incorrect when this parameter is true and TLO.Old
   ///    has multiple uses.
-  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
-                            KnownBits &Known,
-                            TargetLoweringOpt &TLO,
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                            const APInt &DemandedElts, KnownBits &Known,
+                            TargetLoweringOpt &TLO, unsigned Depth = 0,
+                            bool AssumeSingleUse = false) const;
+
+  /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
+  /// Adds Op back to the worklist upon success.
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                            KnownBits &Known, TargetLoweringOpt &TLO,
                             unsigned Depth = 0,
                             bool AssumeSingleUse = false) const;
 
-  /// Helper wrapper around SimplifyDemandedBits
+  /// Helper wrapper around SimplifyDemandedBits.
+  /// Adds Op back to the worklist upon success.
   bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
                             DAGCombinerInfo &DCI) const;
 
@@ -2826,7 +2946,8 @@ public:
                                   TargetLoweringOpt &TLO, unsigned Depth = 0,
                                   bool AssumeSingleUse = false) const;
 
-  /// Helper wrapper around SimplifyDemandedVectorElts
+  /// Helper wrapper around SimplifyDemandedVectorElts.
+  /// Adds Op back to the worklist upon success.
   bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
                                   APInt &KnownUndef, APInt &KnownZero,
                                   DAGCombinerInfo &DCI) const;
@@ -2863,11 +2984,30 @@ public:
   /// elements, returning true on success. Otherwise, analyze the expression and
   /// return a mask of KnownUndef and KnownZero elements for the expression
   /// (used to simplify the caller). The KnownUndef/Zero elements may only be
-  /// accurate for those bits in the DemandedMask
+  /// accurate for those bits in the DemandedMask.
   virtual bool SimplifyDemandedVectorEltsForTargetNode(
       SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
       APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
 
+  /// Attempt to simplify any target nodes based on the demanded bits/elts,
+  /// returning true on success. Otherwise, analyze the
+  /// expression and return a mask of KnownOne and KnownZero bits for the
+  /// expression (used to simplify the caller).  The KnownZero/One bits may only
+  /// be accurate for those bits in the Demanded masks.
+  virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+                                                 const APInt &DemandedBits,
+                                                 const APInt &DemandedElts,
+                                                 KnownBits &Known,
+                                                 TargetLoweringOpt &TLO,
+                                                 unsigned Depth = 0) const;
+
+  /// If \p SNaN is false, \returns true if \p Op is known to never be any
+  /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
+  /// NaN.
+  virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
+                                            const SelectionDAG &DAG,
+                                            bool SNaN = false,
+                                            unsigned Depth = 0) const;
   struct DAGCombinerInfo {
     void *DC;  // The DAG Combiner object.
     CombineLevel Level;
@@ -2935,12 +3075,25 @@ public:
   ///
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-  /// Return true if it is profitable to move a following shift through this
-  //  node, adjusting any immediate operands as necessary to preserve semantics.
-  //  This transformation may not be desirable if it disrupts a particularly
-  //  auspicious target-specific tree (e.g. bitfield extraction in AArch64).
-  //  By default, it returns true.
-  virtual bool isDesirableToCommuteWithShift(const SDNode *N) const {
+  /// Return true if it is profitable to move this shift by a constant amount
+  /// though its operand, adjusting any immediate operands as necessary to
+  /// preserve semantics. This transformation may not be desirable if it
+  /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
+  /// extraction in AArch64). By default, it returns true.
+  ///
+  /// @param N the shift node
+  /// @param Level the current DAGCombine legalization level.
+  virtual bool isDesirableToCommuteWithShift(const SDNode *N,
+                                             CombineLevel Level) const {
+    return true;
+  }
+
+  /// Return true if it is profitable to fold a pair of shifts into a mask.
+  /// This is usually true on most targets. But some targets, like Thumb1,
+  /// have immediate shift instructions, but no immediate "and" instruction;
+  /// this makes the fold unprofitable.
+  virtual bool shouldFoldShiftPairToMask(const SDNode *N,
+                                         CombineLevel Level) const {
     return true;
   }
 
@@ -3488,11 +3641,9 @@ public:
   //===--------------------------------------------------------------------===//
   // Div utility functions
   //
-  SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
-                    bool IsAfterLegalization,
+  SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
                     SmallVectorImpl<SDNode *> &Created) const;
-  SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
-                    bool IsAfterLegalization,
+  SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
                     SmallVectorImpl<SDNode *> &Created) const;
 
   /// Targets may override this function to provide custom SDIV lowering for
@@ -3584,12 +3735,68 @@ public:
                  SDValue LL = SDValue(), SDValue LH = SDValue(),
                  SDValue RL = SDValue(), SDValue RH = SDValue()) const;
 
+  /// Expand funnel shift.
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Expand rotations.
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
   /// Expand float(f32) to SINT(i64) conversion
   /// \param N Node to expand
   /// \param Result output after conversion
   /// \returns True, if the expansion was successful, false otherwise
   bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
 
+  /// Expand float to UINT conversion
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Expand UINT(i64) to double(f64) conversion
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
+  SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
+
+  /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
+  /// vector nodes can only succeed if all operations are legal/custom.
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
+  /// vector nodes can only succeed if all operations are legal/custom.
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
+  /// vector nodes can only succeed if all operations are legal/custom.
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Expand ABS nodes. Expands vector/scalar ABS nodes,
+  /// vector nodes can only succeed if all operations are legal/custom.
+  /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
   /// Turn load of vector type into a load of the individual elements.
   /// \param LD load to expand
   /// \returns MERGE_VALUEs of the scalar loads with their chains.
@@ -3627,6 +3834,15 @@ public:
   SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
                                   SDValue Index) const;
 
+  /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
+  /// method accepts integers as its arguments.
+  SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
+
+  /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
+  /// integers as its arguments.
+  SDValue getExpandedFixedPointMultiplication(SDNode *Node,
+                                              SelectionDAG &DAG) const;
+
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
   //
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index f5c7fc824ab4..052d1f8bc686 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -90,6 +90,8 @@ public:
   const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
                                        const GlobalValue *RHS,
                                        const TargetMachine &TM) const override;
+
+  MCSection *getSectionForCommandLines() const override;
 };
 
 class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 8f5c9cb8c3fa..3288711a335d 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -90,6 +90,19 @@ private:
   AnalysisID StartAfter = nullptr;
   AnalysisID StopBefore = nullptr;
   AnalysisID StopAfter = nullptr;
+
+  unsigned StartBeforeInstanceNum = 0;
+  unsigned StartBeforeCount = 0;
+
+  unsigned StartAfterInstanceNum = 0;
+  unsigned StartAfterCount = 0;
+
+  unsigned StopBeforeInstanceNum = 0;
+  unsigned StopBeforeCount = 0;
+
+  unsigned StopAfterInstanceNum = 0;
+  unsigned StopAfterCount = 0;
+
   bool Started = true;
   bool Stopped = false;
   bool AddingMachinePasses = false;
@@ -145,13 +158,13 @@ public:
 
   CodeGenOpt::Level getOptLevel() const;
 
-  /// Describe the status of the codegen
-  /// pipeline set by this target pass config.
-  /// Having a limited codegen pipeline means that options
-  /// have been used to restrict what codegen is doing.
-  /// In particular, that means that codegen won't emit
-  /// assembly code.
-  bool hasLimitedCodeGenPipeline() const;
+  /// Returns true if one of the `-start-after`, `-start-before`, `-stop-after`
+  /// or `-stop-before` options is set.
+  static bool hasLimitedCodeGenPipeline();
+
+  /// Returns true if none of the `-stop-before` and `-stop-after` options is
+  /// set.
+  static bool willCompleteCodeGenPipeline();
 
   /// If hasLimitedCodeGenPipeline is true, this method
   /// returns a string with the name of the options, separated
@@ -159,13 +172,6 @@ public:
   std::string
   getLimitedCodeGenPipelineReason(const char *Separator = "/") const;
 
-  /// Check if the codegen pipeline is limited in such a way that it
-  /// won't be complete. When the codegen pipeline is not complete,
-  /// this means it may not be possible to generate assembly from it.
-  bool willCompleteCodeGenPipeline() const {
-    return !hasLimitedCodeGenPipeline() || (!StopAfter && !StopBefore);
-  }
-
   void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
 
   bool getEnableTailMerge() const { return EnableTailMerge; }
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 55a8ba630a59..0fbff3137653 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -510,6 +510,13 @@ public:
   /// markSuperRegs() and checkAllSuperRegsMarked() in this case.
   virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
 
+  /// Returns false if we can't guarantee that Physreg, specified as an IR asm
+  /// clobber constraint, will be preserved across the statement.
+  virtual bool isAsmClobberable(const MachineFunction &MF,
+                               unsigned PhysReg) const {
+    return true;
+  }
+
   /// Returns true if PhysReg is unallocatable and constant throughout the
   /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
   virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
@@ -817,13 +824,6 @@ public:
     // Do nothing.
   }
 
-  /// The creation of multiple copy hints have been implemented in
-  /// weightCalcHelper(), but since this affects so many tests for many
-  /// targets, this is temporarily disabled per default. THIS SHOULD BE
-  /// "GENERAL GOODNESS" and hopefully all targets will update their tests
-  /// and enable this soon. This hook should then be removed.
-  virtual bool enableMultipleCopyHints() const { return false; }
-
   /// Allow the target to reverse allocation order of local live ranges. This
   /// will generally allocate shorter local live ranges first. For targets with
   /// many registers, this could reduce regalloc compile time by a large
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 227e591f5a7d..968e4c4b8102 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H
 #define LLVM_CODEGEN_TARGETSUBTARGETINFO_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -144,6 +145,43 @@ public:
     return 0;
   }
 
+  /// Returns true if MI is a dependency breaking zero-idiom instruction for the
+  /// subtarget.
+  ///
+  /// This function also sets bits in Mask related to input operands that
+  /// are not in a data dependency relationship.  There is one bit for each
+  /// machine operand; implicit operands follow explicit operands in the bit
+  /// representation used for Mask.  An empty (i.e. a mask with all bits
+  /// cleared) means: data dependencies are "broken" for all the explicit input
+  /// machine operands of MI.
+  virtual bool isZeroIdiom(const MachineInstr *MI, APInt &Mask) const {
+    return false;
+  }
+
+  /// Returns true if MI is a dependency breaking instruction for the subtarget.
+  ///
+  /// Similar in behavior to `isZeroIdiom`. However, it knows how to identify
+  /// all dependency breaking instructions (i.e. not just zero-idioms).
+  /// 
+  /// As for `isZeroIdiom`, this method returns a mask of "broken" dependencies.
+  /// (See method `isZeroIdiom` for a detailed description of Mask).
+  virtual bool isDependencyBreaking(const MachineInstr *MI, APInt &Mask) const {
+    return isZeroIdiom(MI, Mask);
+  }
+
+  /// Returns true if MI is a candidate for move elimination.
+  ///
+  /// A candidate for move elimination may be optimized out at register renaming
+  /// stage. Subtargets can specify the set of optimizable moves by
+  /// instantiating tablegen class `IsOptimizableRegisterMove` (see
+  /// llvm/Target/TargetInstrPredicate.td).
+  ///
+  /// SubtargetEmitter is responsible for processing all the definitions of class
+  /// IsOptimizableRegisterMove, and auto-generate an override for this method.
+  virtual bool isOptimizableRegisterMove(const MachineInstr *MI) const {
+    return false;
+  }
+
   /// True if the subtarget should run MachineScheduler after aggressive
   /// coalescing.
   ///
diff --git a/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
index 3ad6760d8813..219fff988f6e 100644
--- a/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -14,13 +14,15 @@
 #ifndef LLVM_CODEGEN_WASMEHFUNCINFO_H
 #define LLVM_CODEGEN_WASMEHFUNCINFO_H
 
-#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/BasicBlock.h"
 
 namespace llvm {
 
+enum EventTag { CPP_EXCEPTION = 0, C_LONGJMP = 1 };
+
 using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>;
 
 struct WasmEHFuncInfo {
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index 9dbeb438f4ae..11ca9ff108de 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -45,13 +45,8 @@ public:
     return RecordData.drop_front(sizeof(RecordPrefix));
   }
 
-  Optional<uint32_t> hash() const { return Hash; }
-
-  void setHash(uint32_t Value) { Hash = Value; }
-
   Kind Type;
   ArrayRef<uint8_t> RecordData;
-  Optional<uint32_t> Hash;
 };
 
 template <typename Kind> struct RemappedRecord {
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
index 4ce9f68cffd9..8e0d9f608e93 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -231,6 +231,8 @@ enum class FrameProcedureOptions : uint32_t {
   Inlined = 0x00000800,
   StrictSecurityChecks = 0x00001000,
   SafeBuffers = 0x00002000,
+  EncodedLocalBasePointerMask = 0x0000C000,
+  EncodedParamBasePointerMask = 0x00030000,
   ProfileGuidedOptimization = 0x00040000,
   ValidProfileCounts = 0x00080000,
   OptimizedForSpeed = 0x00100000,
@@ -356,7 +358,9 @@ enum class PointerOptions : uint32_t {
   Const = 0x00000400,
   Unaligned = 0x00000800,
   Restrict = 0x00001000,
-  WinRTSmartPointer = 0x00080000
+  WinRTSmartPointer = 0x00080000,
+  LValueRefThisPointer = 0x00100000,
+  RValueRefThisPointer = 0x00200000
 };
 CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(PointerOptions)
 
@@ -510,6 +514,19 @@ enum class RegisterId : uint16_t {
 #undef CV_REGISTER
 };
 
+/// Two-bit value indicating which register is the designated frame pointer
+/// register. Appears in the S_FRAMEPROC record flags.
+enum class EncodedFramePtrReg : uint8_t {
+  None = 0,
+  StackPtr = 1,
+  FramePtr = 2,
+  BasePtr = 3,
+};
+
+RegisterId decodeFramePtrReg(EncodedFramePtrReg EncodedReg, CPUType CPU);
+
+EncodedFramePtrReg encodeFramePtrReg(RegisterId Reg, CPUType CPU);
+
 /// These values correspond to the THUNK_ORDINAL enumeration.
 enum class ThunkOrdinal : uint8_t {
   Standard,
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
index 586a720ce6e4..d4615d02220d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
@@ -24,23 +24,32 @@ enum class cv_error_code {
   no_records,
   unknown_member_record,
 };
+} // namespace codeview
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::codeview::cv_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace codeview {
+const std::error_category &CVErrorCategory();
+
+inline std::error_code make_error_code(cv_error_code E) {
+  return std::error_code(static_cast<int>(E), CVErrorCategory());
+}
 
 /// Base class for errors originating when parsing raw PDB files
-class CodeViewError : public ErrorInfo<CodeViewError> {
+class CodeViewError : public ErrorInfo<CodeViewError, StringError> {
 public:
+  using ErrorInfo<CodeViewError,
+                  StringError>::ErrorInfo; // inherit constructors
+  CodeViewError(const Twine &S) : ErrorInfo(S, cv_error_code::unspecified) {}
   static char ID;
-  CodeViewError(cv_error_code C);
-  CodeViewError(const std::string &Context);
-  CodeViewError(cv_error_code C, const std::string &Context);
+};
 
-  void log(raw_ostream &OS) const override;
-  const std::string &getErrorMessage() const;
-  std::error_code convertToErrorCode() const override;
+} // namespace codeview
+} // namespace llvm
 
-private:
-  std::string ErrMsg;
-  cv_error_code Code;
-};
-}
-}
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 6da8893bd61a..fdfcf4d53a23 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -18,251 +18,342 @@
 // This currently only contains the "register subset shared by all processor
 // types" (ERR etc.) and the x86 registers.
 
-CV_REGISTER(CVRegERR, 30000)
-CV_REGISTER(CVRegTEB, 30001)
-CV_REGISTER(CVRegTIMER, 30002)
-CV_REGISTER(CVRegEFAD1, 30003)
-CV_REGISTER(CVRegEFAD2, 30004)
-CV_REGISTER(CVRegEFAD3, 30005)
-CV_REGISTER(CVRegVFRAME, 30006)
-CV_REGISTER(CVRegHANDLE, 30007)
-CV_REGISTER(CVRegPARAMS, 30008)
-CV_REGISTER(CVRegLOCALS, 30009)
-CV_REGISTER(CVRegTID, 30010)
-CV_REGISTER(CVRegENV, 30011)
-CV_REGISTER(CVRegCMDLN, 30012)
-
-CV_REGISTER(CVRegNONE, 0)
-CV_REGISTER(CVRegAL, 1)
-CV_REGISTER(CVRegCL, 2)
-CV_REGISTER(CVRegDL, 3)
-CV_REGISTER(CVRegBL, 4)
-CV_REGISTER(CVRegAH, 5)
-CV_REGISTER(CVRegCH, 6)
-CV_REGISTER(CVRegDH, 7)
-CV_REGISTER(CVRegBH, 8)
-CV_REGISTER(CVRegAX, 9)
-CV_REGISTER(CVRegCX, 10)
-CV_REGISTER(CVRegDX, 11)
-CV_REGISTER(CVRegBX, 12)
-CV_REGISTER(CVRegSP, 13)
-CV_REGISTER(CVRegBP, 14)
-CV_REGISTER(CVRegSI, 15)
-CV_REGISTER(CVRegDI, 16)
-CV_REGISTER(CVRegEAX, 17)
-CV_REGISTER(CVRegECX, 18)
-CV_REGISTER(CVRegEDX, 19)
-CV_REGISTER(CVRegEBX, 20)
-CV_REGISTER(CVRegESP, 21)
-CV_REGISTER(CVRegEBP, 22)
-CV_REGISTER(CVRegESI, 23)
-CV_REGISTER(CVRegEDI, 24)
-CV_REGISTER(CVRegES, 25)
-CV_REGISTER(CVRegCS, 26)
-CV_REGISTER(CVRegSS, 27)
-CV_REGISTER(CVRegDS, 28)
-CV_REGISTER(CVRegFS, 29)
-CV_REGISTER(CVRegGS, 30)
-CV_REGISTER(CVRegIP, 31)
-CV_REGISTER(CVRegFLAGS, 32)
-CV_REGISTER(CVRegEIP, 33)
-CV_REGISTER(CVRegEFLAGS, 34)
-CV_REGISTER(CVRegTEMP, 40)
-CV_REGISTER(CVRegTEMPH, 41)
-CV_REGISTER(CVRegQUOTE, 42)
-CV_REGISTER(CVRegPCDR3, 43)
-CV_REGISTER(CVRegPCDR4, 44)
-CV_REGISTER(CVRegPCDR5, 45)
-CV_REGISTER(CVRegPCDR6, 46)
-CV_REGISTER(CVRegPCDR7, 47)
-CV_REGISTER(CVRegCR0, 80)
-CV_REGISTER(CVRegCR1, 81)
-CV_REGISTER(CVRegCR2, 82)
-CV_REGISTER(CVRegCR3, 83)
-CV_REGISTER(CVRegCR4, 84)
-CV_REGISTER(CVRegDR0, 90)
-CV_REGISTER(CVRegDR1, 91)
-CV_REGISTER(CVRegDR2, 92)
-CV_REGISTER(CVRegDR3, 93)
-CV_REGISTER(CVRegDR4, 94)
-CV_REGISTER(CVRegDR5, 95)
-CV_REGISTER(CVRegDR6, 96)
-CV_REGISTER(CVRegDR7, 97)
-CV_REGISTER(CVRegGDTR, 110)
-CV_REGISTER(CVRegGDTL, 111)
-CV_REGISTER(CVRegIDTR, 112)
-CV_REGISTER(CVRegIDTL, 113)
-CV_REGISTER(CVRegLDTR, 114)
-CV_REGISTER(CVRegTR, 115)
-
-CV_REGISTER(CVRegPSEUDO1, 116)
-CV_REGISTER(CVRegPSEUDO2, 117)
-CV_REGISTER(CVRegPSEUDO3, 118)
-CV_REGISTER(CVRegPSEUDO4, 119)
-CV_REGISTER(CVRegPSEUDO5, 120)
-CV_REGISTER(CVRegPSEUDO6, 121)
-CV_REGISTER(CVRegPSEUDO7, 122)
-CV_REGISTER(CVRegPSEUDO8, 123)
-CV_REGISTER(CVRegPSEUDO9, 124)
-
-CV_REGISTER(CVRegST0, 128)
-CV_REGISTER(CVRegST1, 129)
-CV_REGISTER(CVRegST2, 130)
-CV_REGISTER(CVRegST3, 131)
-CV_REGISTER(CVRegST4, 132)
-CV_REGISTER(CVRegST5, 133)
-CV_REGISTER(CVRegST6, 134)
-CV_REGISTER(CVRegST7, 135)
-CV_REGISTER(CVRegCTRL, 136)
-CV_REGISTER(CVRegSTAT, 137)
-CV_REGISTER(CVRegTAG, 138)
-CV_REGISTER(CVRegFPIP, 139)
-CV_REGISTER(CVRegFPCS, 140)
-CV_REGISTER(CVRegFPDO, 141)
-CV_REGISTER(CVRegFPDS, 142)
-CV_REGISTER(CVRegISEM, 143)
-CV_REGISTER(CVRegFPEIP, 144)
-CV_REGISTER(CVRegFPEDO, 145)
-
-CV_REGISTER(CVRegMM0, 146)
-CV_REGISTER(CVRegMM1, 147)
-CV_REGISTER(CVRegMM2, 148)
-CV_REGISTER(CVRegMM3, 149)
-CV_REGISTER(CVRegMM4, 150)
-CV_REGISTER(CVRegMM5, 151)
-CV_REGISTER(CVRegMM6, 152)
-CV_REGISTER(CVRegMM7, 153)
-
-CV_REGISTER(CVRegXMM0, 154)
-CV_REGISTER(CVRegXMM1, 155)
-CV_REGISTER(CVRegXMM2, 156)
-CV_REGISTER(CVRegXMM3, 157)
-CV_REGISTER(CVRegXMM4, 158)
-CV_REGISTER(CVRegXMM5, 159)
-CV_REGISTER(CVRegXMM6, 160)
-CV_REGISTER(CVRegXMM7, 161)
-
-CV_REGISTER(CVRegMXCSR, 211)
-
-CV_REGISTER(CVRegEDXEAX, 212)
-
-CV_REGISTER(CVRegEMM0L, 220)
-CV_REGISTER(CVRegEMM1L, 221)
-CV_REGISTER(CVRegEMM2L, 222)
-CV_REGISTER(CVRegEMM3L, 223)
-CV_REGISTER(CVRegEMM4L, 224)
-CV_REGISTER(CVRegEMM5L, 225)
-CV_REGISTER(CVRegEMM6L, 226)
-CV_REGISTER(CVRegEMM7L, 227)
-
-CV_REGISTER(CVRegEMM0H, 228)
-CV_REGISTER(CVRegEMM1H, 229)
-CV_REGISTER(CVRegEMM2H, 230)
-CV_REGISTER(CVRegEMM3H, 231)
-CV_REGISTER(CVRegEMM4H, 232)
-CV_REGISTER(CVRegEMM5H, 233)
-CV_REGISTER(CVRegEMM6H, 234)
-CV_REGISTER(CVRegEMM7H, 235)
-
-CV_REGISTER(CVRegMM00, 236)
-CV_REGISTER(CVRegMM01, 237)
-CV_REGISTER(CVRegMM10, 238)
-CV_REGISTER(CVRegMM11, 239)
-CV_REGISTER(CVRegMM20, 240)
-CV_REGISTER(CVRegMM21, 241)
-CV_REGISTER(CVRegMM30, 242)
-CV_REGISTER(CVRegMM31, 243)
-CV_REGISTER(CVRegMM40, 244)
-CV_REGISTER(CVRegMM41, 245)
-CV_REGISTER(CVRegMM50, 246)
-CV_REGISTER(CVRegMM51, 247)
-CV_REGISTER(CVRegMM60, 248)
-CV_REGISTER(CVRegMM61, 249)
-CV_REGISTER(CVRegMM70, 250)
-CV_REGISTER(CVRegMM71, 251)
-
-CV_REGISTER(CVRegBND0, 396)
-CV_REGISTER(CVRegBND1, 397)
-CV_REGISTER(CVRegBND2, 398)
-
-
-CV_REGISTER(CVRegXMM8, 252)
-CV_REGISTER(CVRegXMM9, 253)
-CV_REGISTER(CVRegXMM10, 254)
-CV_REGISTER(CVRegXMM11, 255)
-CV_REGISTER(CVRegXMM12, 256)
-CV_REGISTER(CVRegXMM13, 257)
-CV_REGISTER(CVRegXMM14, 258)
-CV_REGISTER(CVRegXMM15, 259)
-
-
-CV_REGISTER(CVRegSIL, 324)
-CV_REGISTER(CVRegDIL, 325)
-CV_REGISTER(CVRegBPL, 326)
-CV_REGISTER(CVRegSPL, 327)
-
-CV_REGISTER(CVRegRAX, 328)
-CV_REGISTER(CVRegRBX, 329)
-CV_REGISTER(CVRegRCX, 330)
-CV_REGISTER(CVRegRDX, 331)
-CV_REGISTER(CVRegRSI, 332)
-CV_REGISTER(CVRegRDI, 333)
-CV_REGISTER(CVRegRBP, 334)
-CV_REGISTER(CVRegRSP, 335)
-
-CV_REGISTER(CVRegR8, 336)
-CV_REGISTER(CVRegR9, 337)
-CV_REGISTER(CVRegR10, 338)
-CV_REGISTER(CVRegR11, 339)
-CV_REGISTER(CVRegR12, 340)
-CV_REGISTER(CVRegR13, 341)
-CV_REGISTER(CVRegR14, 342)
-CV_REGISTER(CVRegR15, 343)
-
-CV_REGISTER(CVRegR8B, 344)
-CV_REGISTER(CVRegR9B, 345)
-CV_REGISTER(CVRegR10B, 346)
-CV_REGISTER(CVRegR11B, 347)
-CV_REGISTER(CVRegR12B, 348)
-CV_REGISTER(CVRegR13B, 349)
-CV_REGISTER(CVRegR14B, 350)
-CV_REGISTER(CVRegR15B, 351)
-
-CV_REGISTER(CVRegR8W, 352)
-CV_REGISTER(CVRegR9W, 353)
-CV_REGISTER(CVRegR10W, 354)
-CV_REGISTER(CVRegR11W, 355)
-CV_REGISTER(CVRegR12W, 356)
-CV_REGISTER(CVRegR13W, 357)
-CV_REGISTER(CVRegR14W, 358)
-CV_REGISTER(CVRegR15W, 359)
-
-CV_REGISTER(CVRegR8D, 360)
-CV_REGISTER(CVRegR9D, 361)
-CV_REGISTER(CVRegR10D, 362)
-CV_REGISTER(CVRegR11D, 363)
-CV_REGISTER(CVRegR12D, 364)
-CV_REGISTER(CVRegR13D, 365)
-CV_REGISTER(CVRegR14D, 366)
-CV_REGISTER(CVRegR15D, 367)
+// Some system headers define macros that conflict with our enums. Every
+// compiler supported by LLVM has the push_macro and pop_macro pragmas, so use
+// them to avoid the conflict.
+#pragma push_macro("CR0")
+#pragma push_macro("CR1")
+#pragma push_macro("CR2")
+#pragma push_macro("CR3")
+#pragma push_macro("CR4")
+
+CV_REGISTER(ERR, 30000)
+CV_REGISTER(TEB, 30001)
+CV_REGISTER(TIMER, 30002)
+CV_REGISTER(EFAD1, 30003)
+CV_REGISTER(EFAD2, 30004)
+CV_REGISTER(EFAD3, 30005)
+CV_REGISTER(VFRAME, 30006)
+CV_REGISTER(HANDLE, 30007)
+CV_REGISTER(PARAMS, 30008)
+CV_REGISTER(LOCALS, 30009)
+CV_REGISTER(TID, 30010)
+CV_REGISTER(ENV, 30011)
+CV_REGISTER(CMDLN, 30012)
+
+CV_REGISTER(NONE, 0)
+CV_REGISTER(AL, 1)
+CV_REGISTER(CL, 2)
+CV_REGISTER(DL, 3)
+CV_REGISTER(BL, 4)
+CV_REGISTER(AH, 5)
+CV_REGISTER(CH, 6)
+CV_REGISTER(DH, 7)
+CV_REGISTER(BH, 8)
+CV_REGISTER(AX, 9)
+CV_REGISTER(CX, 10)
+CV_REGISTER(DX, 11)
+CV_REGISTER(BX, 12)
+CV_REGISTER(SP, 13)
+CV_REGISTER(BP, 14)
+CV_REGISTER(SI, 15)
+CV_REGISTER(DI, 16)
+CV_REGISTER(EAX, 17)
+CV_REGISTER(ECX, 18)
+CV_REGISTER(EDX, 19)
+CV_REGISTER(EBX, 20)
+CV_REGISTER(ESP, 21)
+CV_REGISTER(EBP, 22)
+CV_REGISTER(ESI, 23)
+CV_REGISTER(EDI, 24)
+CV_REGISTER(ES, 25)
+CV_REGISTER(CS, 26)
+CV_REGISTER(SS, 27)
+CV_REGISTER(DS, 28)
+CV_REGISTER(FS, 29)
+CV_REGISTER(GS, 30)
+CV_REGISTER(IP, 31)
+CV_REGISTER(FLAGS, 32)
+CV_REGISTER(EIP, 33)
+CV_REGISTER(EFLAGS, 34)
+CV_REGISTER(TEMP, 40)
+CV_REGISTER(TEMPH, 41)
+CV_REGISTER(QUOTE, 42)
+CV_REGISTER(PCDR3, 43)
+CV_REGISTER(PCDR4, 44)
+CV_REGISTER(PCDR5, 45)
+CV_REGISTER(PCDR6, 46)
+CV_REGISTER(PCDR7, 47)
+CV_REGISTER(CR0, 80)
+CV_REGISTER(CR1, 81)
+CV_REGISTER(CR2, 82)
+CV_REGISTER(CR3, 83)
+CV_REGISTER(CR4, 84)
+CV_REGISTER(DR0, 90)
+CV_REGISTER(DR1, 91)
+CV_REGISTER(DR2, 92)
+CV_REGISTER(DR3, 93)
+CV_REGISTER(DR4, 94)
+CV_REGISTER(DR5, 95)
+CV_REGISTER(DR6, 96)
+CV_REGISTER(DR7, 97)
+CV_REGISTER(GDTR, 110)
+CV_REGISTER(GDTL, 111)
+CV_REGISTER(IDTR, 112)
+CV_REGISTER(IDTL, 113)
+CV_REGISTER(LDTR, 114)
+CV_REGISTER(TR, 115)
+
+CV_REGISTER(PSEUDO1, 116)
+CV_REGISTER(PSEUDO2, 117)
+CV_REGISTER(PSEUDO3, 118)
+CV_REGISTER(PSEUDO4, 119)
+CV_REGISTER(PSEUDO5, 120)
+CV_REGISTER(PSEUDO6, 121)
+CV_REGISTER(PSEUDO7, 122)
+CV_REGISTER(PSEUDO8, 123)
+CV_REGISTER(PSEUDO9, 124)
+
+CV_REGISTER(ST0, 128)
+CV_REGISTER(ST1, 129)
+CV_REGISTER(ST2, 130)
+CV_REGISTER(ST3, 131)
+CV_REGISTER(ST4, 132)
+CV_REGISTER(ST5, 133)
+CV_REGISTER(ST6, 134)
+CV_REGISTER(ST7, 135)
+CV_REGISTER(CTRL, 136)
+CV_REGISTER(STAT, 137)
+CV_REGISTER(TAG, 138)
+CV_REGISTER(FPIP, 139)
+CV_REGISTER(FPCS, 140)
+CV_REGISTER(FPDO, 141)
+CV_REGISTER(FPDS, 142)
+CV_REGISTER(ISEM, 143)
+CV_REGISTER(FPEIP, 144)
+CV_REGISTER(FPEDO, 145)
+
+CV_REGISTER(MM0, 146)
+CV_REGISTER(MM1, 147)
+CV_REGISTER(MM2, 148)
+CV_REGISTER(MM3, 149)
+CV_REGISTER(MM4, 150)
+CV_REGISTER(MM5, 151)
+CV_REGISTER(MM6, 152)
+CV_REGISTER(MM7, 153)
+
+CV_REGISTER(XMM0, 154)
+CV_REGISTER(XMM1, 155)
+CV_REGISTER(XMM2, 156)
+CV_REGISTER(XMM3, 157)
+CV_REGISTER(XMM4, 158)
+CV_REGISTER(XMM5, 159)
+CV_REGISTER(XMM6, 160)
+CV_REGISTER(XMM7, 161)
+
+CV_REGISTER(MXCSR, 211)
+
+CV_REGISTER(EDXEAX, 212)
+
+CV_REGISTER(EMM0L, 220)
+CV_REGISTER(EMM1L, 221)
+CV_REGISTER(EMM2L, 222)
+CV_REGISTER(EMM3L, 223)
+CV_REGISTER(EMM4L, 224)
+CV_REGISTER(EMM5L, 225)
+CV_REGISTER(EMM6L, 226)
+CV_REGISTER(EMM7L, 227)
+
+CV_REGISTER(EMM0H, 228)
+CV_REGISTER(EMM1H, 229)
+CV_REGISTER(EMM2H, 230)
+CV_REGISTER(EMM3H, 231)
+CV_REGISTER(EMM4H, 232)
+CV_REGISTER(EMM5H, 233)
+CV_REGISTER(EMM6H, 234)
+CV_REGISTER(EMM7H, 235)
+
+CV_REGISTER(MM00, 236)
+CV_REGISTER(MM01, 237)
+CV_REGISTER(MM10, 238)
+CV_REGISTER(MM11, 239)
+CV_REGISTER(MM20, 240)
+CV_REGISTER(MM21, 241)
+CV_REGISTER(MM30, 242)
+CV_REGISTER(MM31, 243)
+CV_REGISTER(MM40, 244)
+CV_REGISTER(MM41, 245)
+CV_REGISTER(MM50, 246)
+CV_REGISTER(MM51, 247)
+CV_REGISTER(MM60, 248)
+CV_REGISTER(MM61, 249)
+CV_REGISTER(MM70, 250)
+CV_REGISTER(MM71, 251)
+
+CV_REGISTER(BND0, 396)
+CV_REGISTER(BND1, 397)
+CV_REGISTER(BND2, 398)
+
+
+CV_REGISTER(XMM8, 252)
+CV_REGISTER(XMM9, 253)
+CV_REGISTER(XMM10, 254)
+CV_REGISTER(XMM11, 255)
+CV_REGISTER(XMM12, 256)
+CV_REGISTER(XMM13, 257)
+CV_REGISTER(XMM14, 258)
+CV_REGISTER(XMM15, 259)
+
+
+CV_REGISTER(SIL, 324)
+CV_REGISTER(DIL, 325)
+CV_REGISTER(BPL, 326)
+CV_REGISTER(SPL, 327)
+
+CV_REGISTER(RAX, 328)
+CV_REGISTER(RBX, 329)
+CV_REGISTER(RCX, 330)
+CV_REGISTER(RDX, 331)
+CV_REGISTER(RSI, 332)
+CV_REGISTER(RDI, 333)
+CV_REGISTER(RBP, 334)
+CV_REGISTER(RSP, 335)
+
+CV_REGISTER(R8, 336)
+CV_REGISTER(R9, 337)
+CV_REGISTER(R10, 338)
+CV_REGISTER(R11, 339)
+CV_REGISTER(R12, 340)
+CV_REGISTER(R13, 341)
+CV_REGISTER(R14, 342)
+CV_REGISTER(R15, 343)
+
+CV_REGISTER(R8B, 344)
+CV_REGISTER(R9B, 345)
+CV_REGISTER(R10B, 346)
+CV_REGISTER(R11B, 347)
+CV_REGISTER(R12B, 348)
+CV_REGISTER(R13B, 349)
+CV_REGISTER(R14B, 350)
+CV_REGISTER(R15B, 351)
+
+CV_REGISTER(R8W, 352)
+CV_REGISTER(R9W, 353)
+CV_REGISTER(R10W, 354)
+CV_REGISTER(R11W, 355)
+CV_REGISTER(R12W, 356)
+CV_REGISTER(R13W, 357)
+CV_REGISTER(R14W, 358)
+CV_REGISTER(R15W, 359)
+
+CV_REGISTER(R8D, 360)
+CV_REGISTER(R9D, 361)
+CV_REGISTER(R10D, 362)
+CV_REGISTER(R11D, 363)
+CV_REGISTER(R12D, 364)
+CV_REGISTER(R13D, 365)
+CV_REGISTER(R14D, 366)
+CV_REGISTER(R15D, 367)
 
 
 // cvconst.h defines both CV_REG_YMM0 (252) and CV_AMD64_YMM0 (368). Keep the
 // original prefix to distinguish them.
 
-CV_REGISTER(CVRegAMD64_YMM0, 368)
-CV_REGISTER(CVRegAMD64_YMM1, 369)
-CV_REGISTER(CVRegAMD64_YMM2, 370)
-CV_REGISTER(CVRegAMD64_YMM3, 371)
-CV_REGISTER(CVRegAMD64_YMM4, 372)
-CV_REGISTER(CVRegAMD64_YMM5, 373)
-CV_REGISTER(CVRegAMD64_YMM6, 374)
-CV_REGISTER(CVRegAMD64_YMM7, 375)
-CV_REGISTER(CVRegAMD64_YMM8, 376)
-CV_REGISTER(CVRegAMD64_YMM9, 377)
-CV_REGISTER(CVRegAMD64_YMM10, 378)
-CV_REGISTER(CVRegAMD64_YMM11, 379)
-CV_REGISTER(CVRegAMD64_YMM12, 380)
-CV_REGISTER(CVRegAMD64_YMM13, 381)
-CV_REGISTER(CVRegAMD64_YMM14, 382)
-CV_REGISTER(CVRegAMD64_YMM15, 383)
+CV_REGISTER(AMD64_YMM0, 368)
+CV_REGISTER(AMD64_YMM1, 369)
+CV_REGISTER(AMD64_YMM2, 370)
+CV_REGISTER(AMD64_YMM3, 371)
+CV_REGISTER(AMD64_YMM4, 372)
+CV_REGISTER(AMD64_YMM5, 373)
+CV_REGISTER(AMD64_YMM6, 374)
+CV_REGISTER(AMD64_YMM7, 375)
+CV_REGISTER(AMD64_YMM8, 376)
+CV_REGISTER(AMD64_YMM9, 377)
+CV_REGISTER(AMD64_YMM10, 378)
+CV_REGISTER(AMD64_YMM11, 379)
+CV_REGISTER(AMD64_YMM12, 380)
+CV_REGISTER(AMD64_YMM13, 381)
+CV_REGISTER(AMD64_YMM14, 382)
+CV_REGISTER(AMD64_YMM15, 383)
+
+CV_REGISTER(AMD64_XMM16, 694)
+CV_REGISTER(AMD64_XMM17, 695)
+CV_REGISTER(AMD64_XMM18, 696)
+CV_REGISTER(AMD64_XMM19, 697)
+CV_REGISTER(AMD64_XMM20, 698)
+CV_REGISTER(AMD64_XMM21, 699)
+CV_REGISTER(AMD64_XMM22, 700)
+CV_REGISTER(AMD64_XMM23, 701)
+CV_REGISTER(AMD64_XMM24, 702)
+CV_REGISTER(AMD64_XMM25, 703)
+CV_REGISTER(AMD64_XMM26, 704)
+CV_REGISTER(AMD64_XMM27, 705)
+CV_REGISTER(AMD64_XMM28, 706)
+CV_REGISTER(AMD64_XMM29, 707)
+CV_REGISTER(AMD64_XMM30, 708)
+CV_REGISTER(AMD64_XMM31, 709)
+
+CV_REGISTER(AMD64_YMM16, 710)
+CV_REGISTER(AMD64_YMM17, 711)
+CV_REGISTER(AMD64_YMM18, 712)
+CV_REGISTER(AMD64_YMM19, 713)
+CV_REGISTER(AMD64_YMM20, 714)
+CV_REGISTER(AMD64_YMM21, 715)
+CV_REGISTER(AMD64_YMM22, 716)
+CV_REGISTER(AMD64_YMM23, 717)
+CV_REGISTER(AMD64_YMM24, 718)
+CV_REGISTER(AMD64_YMM25, 719)
+CV_REGISTER(AMD64_YMM26, 720)
+CV_REGISTER(AMD64_YMM27, 721)
+CV_REGISTER(AMD64_YMM28, 722)
+CV_REGISTER(AMD64_YMM29, 723)
+CV_REGISTER(AMD64_YMM30, 724)
+CV_REGISTER(AMD64_YMM31, 725)
+
+CV_REGISTER(AMD64_ZMM0, 726)
+CV_REGISTER(AMD64_ZMM1, 727)
+CV_REGISTER(AMD64_ZMM2, 728)
+CV_REGISTER(AMD64_ZMM3, 729)
+CV_REGISTER(AMD64_ZMM4, 730)
+CV_REGISTER(AMD64_ZMM5, 731)
+CV_REGISTER(AMD64_ZMM6, 732)
+CV_REGISTER(AMD64_ZMM7, 733)
+CV_REGISTER(AMD64_ZMM8, 734)
+CV_REGISTER(AMD64_ZMM9, 735)
+CV_REGISTER(AMD64_ZMM10, 736)
+CV_REGISTER(AMD64_ZMM11, 737)
+CV_REGISTER(AMD64_ZMM12, 738)
+CV_REGISTER(AMD64_ZMM13, 739)
+CV_REGISTER(AMD64_ZMM14, 740)
+CV_REGISTER(AMD64_ZMM15, 741)
+CV_REGISTER(AMD64_ZMM16, 742)
+CV_REGISTER(AMD64_ZMM17, 743)
+CV_REGISTER(AMD64_ZMM18, 744)
+CV_REGISTER(AMD64_ZMM19, 745)
+CV_REGISTER(AMD64_ZMM20, 746)
+CV_REGISTER(AMD64_ZMM21, 747)
+CV_REGISTER(AMD64_ZMM22, 748)
+CV_REGISTER(AMD64_ZMM23, 749)
+CV_REGISTER(AMD64_ZMM24, 750)
+CV_REGISTER(AMD64_ZMM25, 751)
+CV_REGISTER(AMD64_ZMM26, 752)
+CV_REGISTER(AMD64_ZMM27, 753)
+CV_REGISTER(AMD64_ZMM28, 754)
+CV_REGISTER(AMD64_ZMM29, 755)
+CV_REGISTER(AMD64_ZMM30, 756)
+CV_REGISTER(AMD64_ZMM31, 757)
+
+CV_REGISTER(AMD64_K0, 758)
+CV_REGISTER(AMD64_K1, 759)
+CV_REGISTER(AMD64_K2, 760)
+CV_REGISTER(AMD64_K3, 761)
+CV_REGISTER(AMD64_K4, 762)
+CV_REGISTER(AMD64_K5, 763)
+CV_REGISTER(AMD64_K6, 764)
+CV_REGISTER(AMD64_K7, 765)
+
+#pragma pop_macro("CR0")
+#pragma pop_macro("CR1")
+#pragma pop_macro("CR2")
+#pragma pop_macro("CR3")
+#pragma pop_macro("CR4")
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
index 1e329c7c3f14..847d93f0e985 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
@@ -13,6 +13,7 @@
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsection.h"
 #include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
@@ -26,21 +27,23 @@ public:
   }
 
   Error initialize(BinaryStreamReader Reader);
+  Error initialize(BinaryStreamRef Stream);
 
   FixedStreamArray<FrameData>::Iterator begin() const { return Frames.begin(); }
   FixedStreamArray<FrameData>::Iterator end() const { return Frames.end(); }
 
-  const void *getRelocPtr() const { return RelocPtr; }
+  const support::ulittle32_t *getRelocPtr() const { return RelocPtr; }
 
 private:
-  const uint32_t *RelocPtr = nullptr;
+  const support::ulittle32_t *RelocPtr = nullptr;
   FixedStreamArray<FrameData> Frames;
 };
 
 class DebugFrameDataSubsection final : public DebugSubsection {
 public:
-  DebugFrameDataSubsection()
-      : DebugSubsection(DebugSubsectionKind::FrameData) {}
+  DebugFrameDataSubsection(bool IncludeRelocPtr)
+      : DebugSubsection(DebugSubsectionKind::FrameData),
+        IncludeRelocPtr(IncludeRelocPtr) {}
   static bool classof(const DebugSubsection *S) {
     return S->kind() == DebugSubsectionKind::FrameData;
   }
@@ -52,6 +55,7 @@ public:
   void setFrames(ArrayRef<FrameData> Frames);
 
 private:
+  bool IncludeRelocPtr = false;
   std::vector<FrameData> Frames;
 };
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
index 58449c2c7565..36237e1a4d9e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
@@ -180,26 +180,6 @@ template <typename T> serialize_numeric_impl<T> serialize_numeric(T &Item) {
   return serialize_numeric_impl<T>(Item);
 }
 
-// This field is only present in the byte record if the condition is true.  The
-// condition is evaluated lazily, so it can depend on items that were
-// deserialized
-// earlier.
-#define CV_CONDITIONAL_FIELD(I, C)                                             \
-  serialize_conditional(I, [&]() { return !!(C); })
-
-// This is an array of N items, where N is evaluated lazily, so it can refer
-// to a field deserialized earlier.
-#define CV_ARRAY_FIELD_N(I, N) serialize_array(I, [&]() { return N; })
-
-// This is an array that exhausts the remainder of the input buffer.
-#define CV_ARRAY_FIELD_TAIL(I) serialize_array_tail(I)
-
-// This is an array that consumes null terminated strings until a double null
-// is encountered.
-#define CV_STRING_ARRAY_NULL_TERM(I) serialize_null_term_string_array(I)
-
-#define CV_NUMERIC_FIELD(I) serialize_numeric(I)
-
 template <typename T, typename U>
 Error consume(BinaryStreamReader &Reader,
               const serialize_conditional_impl<T, U> &Item) {
@@ -242,9 +222,6 @@ Error consume(BinaryStreamReader &Reader, T &&X, U &&Y, Args &&... Rest) {
   return consume(Reader, Y, std::forward<Args>(Rest)...);
 }
 
-#define CV_DESERIALIZE(...)                                                    \
-  if (auto EC = consume(__VA_ARGS__))                                          \
-    return std::move(EC);
 }
 }
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index b5479db97a15..6b5dd2d20d17 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -47,7 +47,7 @@ public:
     return Error::success();
   }
   template <typename T> static Expected<T> deserializeAs(CVSymbol Symbol) {
-    T Record(Symbol.kind());
+    T Record(static_cast<SymbolRecordKind>(Symbol.kind()));
     if (auto EC = deserializeAs<T>(Symbol, Record))
       return std::move(EC);
     return Record;
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index 293daa851bdd..215da2e2b522 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -27,10 +27,10 @@ class CVSymbolDumper {
 public:
   CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types,
                  CodeViewContainer Container,
-                 std::unique_ptr<SymbolDumpDelegate> ObjDelegate,
+                 std::unique_ptr<SymbolDumpDelegate> ObjDelegate, CPUType CPU,
                  bool PrintRecordBytes)
       : W(W), Types(Types), Container(Container),
-        ObjDelegate(std::move(ObjDelegate)),
+        ObjDelegate(std::move(ObjDelegate)), CompilationCPUType(CPU),
         PrintRecordBytes(PrintRecordBytes) {}
 
   /// Dumps one type record.  Returns false if there was a type parsing error,
@@ -43,12 +43,14 @@ public:
   /// parse error, and true otherwise.
   Error dump(const CVSymbolArray &Symbols);
 
+  CPUType getCompilationCPUType() const { return CompilationCPUType; }
+
 private:
   ScopedPrinter &W;
   TypeCollection &Types;
   CodeViewContainer Container;
   std::unique_ptr<SymbolDumpDelegate> ObjDelegate;
-
+  CPUType CompilationCPUType;
   bool PrintRecordBytes;
 };
 } // end namespace codeview
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 93306824012e..b58825c4a788 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -358,6 +358,7 @@ public:
 // S_PUB32
 class PublicSym32 : public SymbolRecord {
 public:
+  PublicSym32() : SymbolRecord(SymbolRecordKind::PublicSym32) {}
   explicit PublicSym32(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   explicit PublicSym32(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::PublicSym32),
@@ -399,6 +400,7 @@ public:
   uint16_t Module;
   StringRef Name;
 
+  uint16_t modi() const { return Module - 1; }
   uint32_t RecordOffset;
 };
 
@@ -636,6 +638,7 @@ public:
 // S_OBJNAME
 class ObjNameSym : public SymbolRecord {
 public:
+  explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {}
   explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   ObjNameSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) {
@@ -718,6 +721,7 @@ public:
 // S_COMPILE3
 class Compile3Sym : public SymbolRecord {
 public:
+  Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {}
   explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   Compile3Sym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::Compile3Sym),
@@ -739,8 +743,17 @@ public:
     Flags = CompileSym3Flags((uint32_t(Flags) & 0xFFFFFF00) | uint32_t(Lang));
   }
 
-  uint8_t getLanguage() const { return static_cast<uint32_t>(Flags) & 0xFF; }
-  uint32_t getFlags() const { return static_cast<uint32_t>(Flags) & ~0xFF; }
+  SourceLanguage getLanguage() const {
+    return static_cast<SourceLanguage>(static_cast<uint32_t>(Flags) & 0xFF);
+  }
+  CompileSym3Flags getFlags() const {
+    return static_cast<CompileSym3Flags>(static_cast<uint32_t>(Flags) & ~0xFF);
+  }
+
+  bool hasOptimizations() const {
+    return CompileSym3Flags::None !=
+           (getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG));
+  }
 
   uint32_t RecordOffset;
 };
@@ -761,7 +774,21 @@ public:
   uint16_t SectionIdOfExceptionHandler;
   FrameProcedureOptions Flags;
 
+  /// Extract the register this frame uses to refer to local variables.
+  RegisterId getLocalFramePtrReg(CPUType CPU) const {
+    return decodeFramePtrReg(
+        EncodedFramePtrReg((uint32_t(Flags) >> 14U) & 0x3U), CPU);
+  }
+
+  /// Extract the register this frame uses to refer to parameters.
+  RegisterId getParamFramePtrReg(CPUType CPU) const {
+    return decodeFramePtrReg(
+        EncodedFramePtrReg((uint32_t(Flags) >> 16U) & 0x3U), CPU);
+  }
+
   uint32_t RecordOffset;
+
+private:
 };
 
 // S_CALLSITEINFO
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
new file mode 100644
index 000000000000..3713fe118eaa
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
@@ -0,0 +1,62 @@
+//===- SymbolRecordHelpers.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+
+namespace llvm {
+namespace codeview {
+/// Return true if this symbol opens a scope. This implies that the symbol has
+/// "parent" and "end" fields, which contain the offset of the S_END or
+/// S_INLINESITE_END record.
+inline bool symbolOpensScope(SymbolKind Kind) {
+  switch (Kind) {
+  case SymbolKind::S_GPROC32:
+  case SymbolKind::S_LPROC32:
+  case SymbolKind::S_LPROC32_ID:
+  case SymbolKind::S_GPROC32_ID:
+  case SymbolKind::S_BLOCK32:
+  case SymbolKind::S_SEPCODE:
+  case SymbolKind::S_THUNK32:
+  case SymbolKind::S_INLINESITE:
+  case SymbolKind::S_INLINESITE2:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// Return true if this ssymbol ends a scope.
+inline bool symbolEndsScope(SymbolKind Kind) {
+  switch (Kind) {
+  case SymbolKind::S_END:
+  case SymbolKind::S_PROC_ID_END:
+  case SymbolKind::S_INLINESITE_END:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// Given a symbol P for which symbolOpensScope(P) == true, return the
+/// corresponding end offset.
+uint32_t getScopeEndOffset(const CVSymbol &Symbol);
+uint32_t getScopeParentOffset(const CVSymbol &Symbol);
+
+CVSymbolArray limitSymbolArrayToScope(const CVSymbolArray &Symbols,
+                                      uint32_t ScopeBegin);
+
+} // namespace codeview
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index c71281de7145..58463a6b13df 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -134,6 +134,8 @@ public:
     return static_cast<SimpleTypeMode>(Index & SimpleModeMask);
   }
 
+  TypeIndex makeDirect() const { return TypeIndex{getSimpleKind()}; }
+
   static TypeIndex None() { return TypeIndex(SimpleTypeKind::None); }
   static TypeIndex Void() { return TypeIndex(SimpleTypeKind::Void); }
   static TypeIndex VoidPointer32() {
@@ -143,6 +145,13 @@ public:
     return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64);
   }
 
+  static TypeIndex NullptrT() {
+    // std::nullptr_t uses the pointer mode that doesn't indicate bit-width,
+    // presumably because std::nullptr_t is intended to be compatible with any
+    // pointer type.
+    return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer);
+  }
+
   static TypeIndex SignedCharacter() {
     return TypeIndex(SimpleTypeKind::SignedCharacter);
   }
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 61ebdf878ce7..7b4a30ee622d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -95,6 +95,11 @@ struct MemberAttributes {
     return MP == MethodKind::IntroducingVirtual ||
            MP == MethodKind::PureIntroducingVirtual;
   }
+
+  /// Is this method static.
+  bool isStatic() const {
+    return getMethodKind() == MethodKind::Static;
+  }
 };
 
 // Does not correspond to any tag, this is the tail of an LF_POINTER record
@@ -264,14 +269,18 @@ public:
 // LF_POINTER
 class PointerRecord : public TypeRecord {
 public:
+  // ---------------------------XXXXX
   static const uint32_t PointerKindShift = 0;
   static const uint32_t PointerKindMask = 0x1F;
 
+  // ------------------------XXX-----
   static const uint32_t PointerModeShift = 5;
   static const uint32_t PointerModeMask = 0x07;
 
-  static const uint32_t PointerOptionMask = 0xFF;
+  // ----------XXX------XXXXX--------
+  static const uint32_t PointerOptionMask = 0x381f00;
 
+  // -------------XXXXXX------------
   static const uint32_t PointerSizeShift = 13;
   static const uint32_t PointerSizeMask = 0xFF;
 
@@ -305,7 +314,7 @@ public:
   }
 
   PointerOptions getOptions() const {
-    return static_cast<PointerOptions>(Attrs);
+    return static_cast<PointerOptions>(Attrs & PointerOptionMask);
   }
 
   uint8_t getSize() const {
@@ -334,6 +343,14 @@ public:
     return !!(Attrs & uint32_t(PointerOptions::Restrict));
   }
 
+  bool isLValueReferenceThisPtr() const {
+    return !!(Attrs & uint32_t(PointerOptions::LValueRefThisPointer));
+  }
+
+  bool isRValueReferenceThisPtr() const {
+    return !!(Attrs & uint32_t(PointerOptions::RValueRefThisPointer));
+  }
+
   TypeIndex ReferentType;
   uint32_t Attrs;
   Optional<MemberPointerInfo> MemberInfo;
@@ -429,6 +446,14 @@ public:
     return (Options & ClassOptions::ForwardReference) != ClassOptions::None;
   }
 
+  bool containsNestedClass() const {
+    return (Options & ClassOptions::ContainsNestedClass) != ClassOptions::None;
+  }
+
+  bool isScoped() const {
+    return (Options & ClassOptions::Scoped) != ClassOptions::None;
+  }
+
   uint16_t getMemberCount() const { return MemberCount; }
   ClassOptions getOptions() const { return Options; }
   TypeIndex getFieldList() const { return FieldList; }
@@ -655,7 +680,17 @@ public:
 
   ArrayRef<TypeIndex> getArgs() const { return ArgIndices; }
 
-  SmallVector<TypeIndex, 4> ArgIndices;
+  /// Indices of known build info arguments.
+  enum BuildInfoArg {
+    CurrentDirectory, ///< Absolute CWD path
+    BuildTool,        ///< Absolute compiler path
+    SourceFile,       ///< Path to main source file, relative or absolute
+    TypeServerPDB,    ///< Absolute path of type server PDB (/Fd)
+    CommandLine,      ///< Full canonical command line (maybe -cc1)
+    MaxArgs
+  };
+
+  SmallVector<TypeIndex, MaxArgs> ArgIndices;
 };
 
 // LF_VFTABLE
@@ -923,6 +958,7 @@ public:
 
   uint32_t Signature;
 };
+
 } // end namespace codeview
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
new file mode 100644
index 000000000000..389472ed1aea
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -0,0 +1,28 @@
+//===- TypeRecordHelpers.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H
+
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+
+namespace llvm {
+  namespace codeview {
+    /// Given an arbitrary codeview type, determine if it is an LF_STRUCTURE,
+    /// LF_CLASS, LF_INTERFACE, LF_UNION, or LF_ENUM with the forward ref class
+    /// option.
+    bool isUdtForwardRef(CVType CVT);
+
+    /// Given a CVType which is assumed to be an LF_MODIFIER, return the
+    /// TypeIndex of the type that the LF_MODIFIER modifies.
+    TypeIndex getModifiedType(const CVType &CVT);
+  }
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
index 583740d2eb4b..0b9f54ec60bf 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@@ -83,18 +83,21 @@ Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
 Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds,
                             MergingTypeTableBuilder &DestTypes,
                             SmallVectorImpl<TypeIndex> &SourceToDest,
-                            const CVTypeArray &IdsAndTypes);
+                            const CVTypeArray &IdsAndTypes,
+                            Optional<uint32_t> &PCHSignature);
 
 Error mergeTypeAndIdRecords(GlobalTypeTableBuilder &DestIds,
                             GlobalTypeTableBuilder &DestTypes,
                             SmallVectorImpl<TypeIndex> &SourceToDest,
                             const CVTypeArray &IdsAndTypes,
-                            ArrayRef<GloballyHashedType> Hashes);
+                            ArrayRef<GloballyHashedType> Hashes,
+                            Optional<uint32_t> &PCHSignature);
 
 Error mergeTypeRecords(GlobalTypeTableBuilder &Dest,
                        SmallVectorImpl<TypeIndex> &SourceToDest,
                        const CVTypeArray &Types,
-                       ArrayRef<GloballyHashedType> Hashes);
+                       ArrayRef<GloballyHashedType> Hashes,
+                       Optional<uint32_t> &PCHSignature);
 
 Error mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef<TypeIndex> Types,
                      SmallVectorImpl<TypeIndex> &SourceToDest,
diff --git a/contrib/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
index bbdd5e0d9c3f..85e96402a246 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
@@ -81,7 +81,7 @@ class DIInliningInfo {
 public:
   DIInliningInfo() = default;
 
-  DILineInfo getFrame(unsigned Index) const {
+  const DILineInfo & getFrame(unsigned Index) const {
     assert(Index < Frames.size());
     return Frames[Index];
   }
@@ -98,6 +98,11 @@ public:
   void addFrame(const DILineInfo &Frame) {
     Frames.push_back(Frame);
   }
+  
+  void resize(unsigned i) {
+    Frames.resize(i);
+  }
+  
 };
 
 /// Container for description of a global variable.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index c219ca75e640..33797419a7b8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -18,20 +18,20 @@ namespace llvm {
 class DWARFCompileUnit : public DWARFUnit {
 public:
   DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
-                   const DWARFUnitHeader &Header,
-                   const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+                   const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+                   const DWARFSection *RS, const DWARFSection *LocSection,
                    StringRef SS, const DWARFSection &SOS,
                    const DWARFSection *AOS, const DWARFSection &LS, bool LE,
-                   bool IsDWO, const DWARFUnitSectionBase &UnitSection)
-      : DWARFUnit(Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
-                  UnitSection) {}
+                   bool IsDWO, const DWARFUnitVector &UnitVector)
+      : DWARFUnit(Context, Section, Header, DA, RS, LocSection, SS, SOS, AOS,
+                  LS, LE, IsDWO, UnitVector) {}
 
-  // VTable anchor.
+  /// VTable anchor.
   ~DWARFCompileUnit() override;
-
-  void dump(raw_ostream &OS, DIDumpOptions DumpOpts);
-
-  static const DWARFSectionKind Section = DW_SECT_INFO;
+  /// Dump this compile unit to \p OS.
+  void dump(raw_ostream &OS, DIDumpOptions DumpOpts) override;
+  /// Enable LLVM-style RTTI.
+  static bool classof(const DWARFUnit *U) { return !U->isTypeUnit(); }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index f5419fe02421..dbb6be04544b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -57,8 +57,7 @@ enum class ErrorPolicy { Halt, Continue };
 /// This data structure is the top level entity that deals with dwarf debug
 /// information parsing. The actual data is supplied through DWARFObj.
 class DWARFContext : public DIContext {
-  DWARFUnitSection<DWARFCompileUnit> CUs;
-  std::deque<DWARFUnitSection<DWARFTypeUnit>> TUs;
+  DWARFUnitVector NormalUnits;
   std::unique_ptr<DWARFUnitIndex> CUIndex;
   std::unique_ptr<DWARFGdbIndex> GdbIndex;
   std::unique_ptr<DWARFUnitIndex> TUIndex;
@@ -75,10 +74,9 @@ class DWARFContext : public DIContext {
   std::unique_ptr<AppleAcceleratorTable> AppleNamespaces;
   std::unique_ptr<AppleAcceleratorTable> AppleObjC;
 
-  DWARFUnitSection<DWARFCompileUnit> DWOCUs;
-  std::deque<DWARFUnitSection<DWARFTypeUnit>> DWOTUs;
+  DWARFUnitVector DWOUnits;
   std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
-  std::unique_ptr<DWARFDebugLocDWO> LocDWO;
+  std::unique_ptr<DWARFDebugLoclists> LocDWO;
 
   /// The maximum DWARF version of all units.
   unsigned MaxVersion = 0;
@@ -95,22 +93,17 @@ class DWARFContext : public DIContext {
   std::unique_ptr<MCRegisterInfo> RegInfo;
 
   /// Read compile units from the debug_info section (if necessary)
-  /// and store them in CUs.
-  void parseCompileUnits();
-
-  /// Read type units from the debug_types sections (if necessary)
-  /// and store them in TUs.
-  void parseTypeUnits();
+  /// and type units from the debug_types sections (if necessary)
+  /// and store them in NormalUnits.
+  void parseNormalUnits();
 
   /// Read compile units from the debug_info.dwo section (if necessary)
-  /// and store them in DWOCUs.
-  void parseDWOCompileUnits();
-
-  /// Read type units from the debug_types.dwo section (if necessary)
-  /// and store them in DWOTUs.
-  void parseDWOTypeUnits();
+  /// and type units from the debug_types.dwo section (if necessary)
+  /// and store them in DWOUnits.
+  /// If \p Lazy is true, set up to parse but don't actually parse them.
+  enum { EagerParse = false, LazyParse = true };
+  void parseDWOUnits(bool Lazy = false);
 
-protected:
   std::unique_ptr<const DWARFObject> DObj;
 
 public:
@@ -139,68 +132,95 @@ public:
 
   bool verify(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override;
 
-  using cu_iterator_range = DWARFUnitSection<DWARFCompileUnit>::iterator_range;
-  using tu_iterator_range = DWARFUnitSection<DWARFTypeUnit>::iterator_range;
-  using tu_section_iterator_range = iterator_range<decltype(TUs)::iterator>;
+  using unit_iterator_range = DWARFUnitVector::iterator_range;
 
-  /// Get compile units in this context.
-  cu_iterator_range compile_units() {
-    parseCompileUnits();
-    return cu_iterator_range(CUs.begin(), CUs.end());
+  /// Get units from .debug_info in this context.
+  unit_iterator_range info_section_units() {
+    parseNormalUnits();
+    return unit_iterator_range(NormalUnits.begin(),
+                               NormalUnits.begin() +
+                                   NormalUnits.getNumInfoUnits());
   }
 
+  /// Get units from .debug_types in this context.
+  unit_iterator_range types_section_units() {
+    parseNormalUnits();
+    return unit_iterator_range(
+        NormalUnits.begin() + NormalUnits.getNumInfoUnits(), NormalUnits.end());
+  }
+
+  /// Get compile units in this context.
+  unit_iterator_range compile_units() { return info_section_units(); }
+
   /// Get type units in this context.
-  tu_section_iterator_range type_unit_sections() {
-    parseTypeUnits();
-    return tu_section_iterator_range(TUs.begin(), TUs.end());
+  unit_iterator_range type_units() { return types_section_units(); }
+
+  /// Get all normal compile/type units in this context.
+  unit_iterator_range normal_units() {
+    parseNormalUnits();
+    return unit_iterator_range(NormalUnits.begin(), NormalUnits.end());
   }
 
-  /// Get compile units in the DWO context.
-  cu_iterator_range dwo_compile_units() {
-    parseDWOCompileUnits();
-    return cu_iterator_range(DWOCUs.begin(), DWOCUs.end());
+  /// Get units from .debug_info..dwo in the DWO context.
+  unit_iterator_range dwo_info_section_units() {
+    parseDWOUnits();
+    return unit_iterator_range(DWOUnits.begin(),
+                               DWOUnits.begin() + DWOUnits.getNumInfoUnits());
+  }
+
+  /// Get units from .debug_types.dwo in the DWO context.
+  unit_iterator_range dwo_types_section_units() {
+    parseDWOUnits();
+    return unit_iterator_range(DWOUnits.begin() + DWOUnits.getNumInfoUnits(),
+                               DWOUnits.end());
   }
 
+  /// Get compile units in the DWO context.
+  unit_iterator_range dwo_compile_units() { return dwo_info_section_units(); }
+
   /// Get type units in the DWO context.
-  tu_section_iterator_range dwo_type_unit_sections() {
-    parseDWOTypeUnits();
-    return tu_section_iterator_range(DWOTUs.begin(), DWOTUs.end());
+  unit_iterator_range dwo_type_units() { return dwo_types_section_units(); }
+
+  /// Get all units in the DWO context.
+  unit_iterator_range dwo_units() {
+    parseDWOUnits();
+    return unit_iterator_range(DWOUnits.begin(), DWOUnits.end());
   }
 
   /// Get the number of compile units in this context.
   unsigned getNumCompileUnits() {
-    parseCompileUnits();
-    return CUs.size();
+    parseNormalUnits();
+    return NormalUnits.getNumInfoUnits();
   }
 
-  /// Get the number of compile units in this context.
+  /// Get the number of type units in this context.
   unsigned getNumTypeUnits() {
-    parseTypeUnits();
-    return TUs.size();
+    parseNormalUnits();
+    return NormalUnits.getNumTypesUnits();
   }
 
   /// Get the number of compile units in the DWO context.
   unsigned getNumDWOCompileUnits() {
-    parseDWOCompileUnits();
-    return DWOCUs.size();
+    parseDWOUnits();
+    return DWOUnits.getNumInfoUnits();
   }
 
-  /// Get the number of compile units in the DWO context.
+  /// Get the number of type units in the DWO context.
   unsigned getNumDWOTypeUnits() {
-    parseDWOTypeUnits();
-    return DWOTUs.size();
+    parseDWOUnits();
+    return DWOUnits.getNumTypesUnits();
   }
 
-  /// Get the compile unit at the specified index for this compile unit.
-  DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) {
-    parseCompileUnits();
-    return CUs[index].get();
+  /// Get the unit at the specified index.
+  DWARFUnit *getUnitAtIndex(unsigned index) {
+    parseNormalUnits();
+    return NormalUnits[index].get();
   }
 
-  /// Get the compile unit at the specified index for the DWO compile units.
-  DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) {
-    parseDWOCompileUnits();
-    return DWOCUs[index].get();
+  /// Get the unit at the specified index for the DWO units.
+  DWARFUnit *getDWOUnitAtIndex(unsigned index) {
+    parseDWOUnits();
+    return DWOUnits[index].get();
   }
 
   DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash);
@@ -211,7 +231,17 @@ public:
   /// Get a DIE given an exact offset.
   DWARFDie getDIEForOffset(uint32_t Offset);
 
-  unsigned getMaxVersion() const { return MaxVersion; }
+  unsigned getMaxVersion() {
+    // Ensure info units have been parsed to discover MaxVersion
+    info_section_units();
+    return MaxVersion;
+  }
+
+  unsigned getMaxDWOVersion() {
+    // Ensure DWO info units have been parsed to discover MaxVersion
+    dwo_info_section_units();
+    return MaxVersion;
+  }
 
   void setMaxVersionIfGreater(unsigned Version) {
     if (Version > MaxVersion)
@@ -232,7 +262,7 @@ public:
   const DWARFDebugAbbrev *getDebugAbbrevDWO();
 
   /// Get a pointer to the parsed DebugLoc object.
-  const DWARFDebugLocDWO *getDebugLocDWO();
+  const DWARFDebugLoclists *getDebugLocDWO();
 
   /// Get a pointer to the parsed DebugAranges object.
   const DWARFDebugAranges *getDebugAranges();
@@ -327,6 +357,13 @@ public:
   /// TODO: refactor compile_units() to make this const.
   uint8_t getCUAddrSize();
 
+  /// Dump Error as warning message to stderr.
+  static void dumpWarning(Error Warning);
+
+  Triple::ArchType getArch() const {
+    return getDWARFObj().getFile()->getArch();
+  }
+
 private:
   /// Return the compile unit which contains instruction with provided
   /// address.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index ff1c7fb38389..7dc07d774aba 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
 #include "llvm/Support/Error.h"
@@ -59,9 +60,11 @@ public:
   unsigned size() const { return (unsigned)Instructions.size(); }
   bool empty() const { return Instructions.empty(); }
 
-  CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor)
+  CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor,
+             Triple::ArchType Arch)
       : CodeAlignmentFactor(CodeAlignmentFactor),
-        DataAlignmentFactor(DataAlignmentFactor) {}
+        DataAlignmentFactor(DataAlignmentFactor),
+        Arch(Arch) {}
 
   /// Parse and store a sequence of CFI instructions from Data,
   /// starting at *Offset and ending at EndOffset. *Offset is updated
@@ -76,6 +79,7 @@ private:
   std::vector<Instruction> Instructions;
   const uint64_t CodeAlignmentFactor;
   const int64_t DataAlignmentFactor;
+  Triple::ArchType Arch;
 
   /// Convenience method to add a new instruction with the given opcode.
   void addInstruction(uint8_t Opcode) {
@@ -130,8 +134,9 @@ public:
   enum FrameKind { FK_CIE, FK_FDE };
 
   FrameEntry(FrameKind K, uint64_t Offset, uint64_t Length, uint64_t CodeAlign,
-             int64_t DataAlign)
-      : Kind(K), Offset(Offset), Length(Length), CFIs(CodeAlign, DataAlign) {}
+             int64_t DataAlign, Triple::ArchType Arch)
+      : Kind(K), Offset(Offset), Length(Length),
+        CFIs(CodeAlign, DataAlign, Arch) {}
 
   virtual ~FrameEntry() {}
 
@@ -168,9 +173,9 @@ public:
       int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister,
       SmallString<8> AugmentationData, uint32_t FDEPointerEncoding,
       uint32_t LSDAPointerEncoding, Optional<uint64_t> Personality,
-      Optional<uint32_t> PersonalityEnc)
+      Optional<uint32_t> PersonalityEnc, Triple::ArchType Arch)
       : FrameEntry(FK_CIE, Offset, Length, CodeAlignmentFactor,
-                   DataAlignmentFactor),
+                   DataAlignmentFactor, Arch),
         Version(Version), Augmentation(std::move(Augmentation)),
         AddressSize(AddressSize), SegmentDescriptorSize(SegmentDescriptorSize),
         CodeAlignmentFactor(CodeAlignmentFactor),
@@ -224,10 +229,11 @@ public:
   // is obtained lazily once it's actually required.
   FDE(uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset,
       uint64_t InitialLocation, uint64_t AddressRange, CIE *Cie,
-      Optional<uint64_t> LSDAAddress)
+      Optional<uint64_t> LSDAAddress, Triple::ArchType Arch)
       : FrameEntry(FK_FDE, Offset, Length,
                    Cie ? Cie->getCodeAlignmentFactor() : 0,
-                   Cie ? Cie->getDataAlignmentFactor() : 0),
+                   Cie ? Cie->getDataAlignmentFactor() : 0,
+                   Arch),
         LinkedCIEOffset(LinkedCIEOffset), InitialLocation(InitialLocation),
         AddressRange(AddressRange), LinkedCIE(Cie), LSDAAddress(LSDAAddress) {}
 
@@ -256,6 +262,7 @@ private:
 
 /// A parsed .debug_frame or .eh_frame section
 class DWARFDebugFrame {
+  const Triple::ArchType Arch;
   // True if this is parsing an eh_frame section.
   const bool IsEH;
   // Not zero for sane pointer values coming out of eh_frame
@@ -272,7 +279,8 @@ public:
   // it is a .debug_frame section. EHFrameAddress should be different
   // than zero for correct parsing of .eh_frame addresses when they
   // use a PC-relative encoding.
-  DWARFDebugFrame(bool IsEH = false, uint64_t EHFrameAddress = 0);
+  DWARFDebugFrame(Triple::ArchType Arch,
+                  bool IsEH = false, uint64_t EHFrameAddress = 0);
   ~DWARFDebugFrame();
 
   /// Dump the section data into the given stream.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 5b2af34bbcf5..d50af5a057f1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -247,10 +247,11 @@ public:
     void clear();
 
     /// Parse prologue and all rows.
-    Error parse(DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
-                const DWARFContext &Ctx, const DWARFUnit *U,
-                std::function<void(Error)> RecoverableErrorCallback = warn,
-                raw_ostream *OS = nullptr);
+    Error parse(
+        DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
+        const DWARFContext &Ctx, const DWARFUnit *U,
+        std::function<void(Error)> RecoverableErrorCallback,
+        raw_ostream *OS = nullptr);
 
     using RowVector = std::vector<Row>;
     using RowIter = RowVector::const_iterator;
@@ -273,14 +274,13 @@ public:
   Expected<const LineTable *> getOrParseLineTable(
       DWARFDataExtractor &DebugLineData, uint32_t Offset,
       const DWARFContext &Ctx, const DWARFUnit *U,
-      std::function<void(Error)> RecoverableErrorCallback = warn);
+      std::function<void(Error)> RecoverableErrorCallback);
 
   /// Helper to allow for parsing of an entire .debug_line section in sequence.
   class SectionParser {
   public:
-    using cu_range = DWARFUnitSection<DWARFCompileUnit>::iterator_range;
-    using tu_range =
-        iterator_range<std::deque<DWARFUnitSection<DWARFTypeUnit>>::iterator>;
+    using cu_range = DWARFUnitVector::iterator_range;
+    using tu_range = DWARFUnitVector::iterator_range;
     using LineToUnitMap = std::map<uint64_t, DWARFUnit *>;
 
     SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, cu_range CUs,
@@ -296,16 +296,17 @@ public:
     /// \param OS - if not null, the parser will print information about the
     /// table as it parses it.
     LineTable
-    parseNext(function_ref<void(Error)> RecoverableErrorCallback = warn,
-              function_ref<void(Error)> UnrecoverableErrorCallback = warn,
-              raw_ostream *OS = nullptr);
+    parseNext(
+        function_ref<void(Error)> RecoverableErrorCallback,
+        function_ref<void(Error)> UnrecoverableErrorCallback,
+        raw_ostream *OS = nullptr);
 
     /// Skip the current line table and go to the following line table (if
     /// present) immediately.
     ///
     /// \param ErrorCallback - report any prologue parsing issues via this
     /// callback.
-    void skip(function_ref<void(Error)> ErrorCallback = warn);
+    void skip(function_ref<void(Error)> ErrorCallback);
 
     /// Indicates if the parser has parsed as much as possible.
     ///
@@ -328,12 +329,6 @@ public:
     bool Done = false;
   };
 
-  /// Helper function for DWARFDebugLine parse functions, to report issues
-  /// identified during parsing.
-  ///
-  /// \param Err The Error to report.
-  static void warn(Error Err);
-
 private:
   struct ParsingState {
     ParsingState(struct LineTable *LT);
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index 9a73745fb6b4..da2098e15402 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -73,19 +73,21 @@ public:
                                               uint32_t *Offset);
 };
 
-class DWARFDebugLocDWO {
+class DWARFDebugLoclists {
 public:
   struct Entry {
-    uint64_t Start;
-    uint32_t Length;
+    uint8_t Kind;
+    uint64_t Value0;
+    uint64_t Value1;
     SmallVector<char, 4> Loc;
   };
 
   struct LocationList {
     unsigned Offset;
     SmallVector<Entry, 2> Entries;
-    void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize,
-              const MCRegisterInfo *RegInfo, unsigned Indent) const;
+    void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian,
+              unsigned AddressSize, const MCRegisterInfo *RegInfo,
+              unsigned Indent) const;
   };
 
 private:
@@ -98,15 +100,15 @@ private:
   bool IsLittleEndian;
 
 public:
-  void parse(DataExtractor data);
-  void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo,
+  void parse(DataExtractor data, unsigned Version);
+  void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo,
             Optional<uint64_t> Offset) const;
 
   /// Return the location list at the given offset or nullptr.
   LocationList const *getLocationListAtOffset(uint64_t Offset) const;
 
-  static Optional<LocationList> parseOneLocationList(DataExtractor Data,
-                                                     uint32_t *Offset);
+  static Optional<LocationList>
+  parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index cae4804e61d3..9e1656eb1615 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
 #include <cstdint>
 #include <vector>
 
@@ -67,7 +68,8 @@ private:
   bool GnuStyle;
 
 public:
-  DWARFDebugPubTable(StringRef Data, bool LittleEndian, bool GnuStyle);
+  DWARFDebugPubTable(const DWARFObject &Obj, const DWARFSection &Sec,
+                     bool LittleEndian, bool GnuStyle);
 
   void dump(raw_ostream &OS) const;
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index ce7436d9faa3..bc26edf00647 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -18,7 +18,6 @@
 
 namespace llvm {
 
-struct BaseAddress;
 class raw_ostream;
 
 class DWARFDebugRangeList {
@@ -78,7 +77,7 @@ public:
   /// list. Has to be passed base address of the compile unit referencing this
   /// range list.
   DWARFAddressRangesVector
-  getAbsoluteRanges(llvm::Optional<BaseAddress> BaseAddr) const;
+  getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index e2e8ab5ed219..5cc8d789e598 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_DWARFDEBUGRNGLISTS_H
 #define LLVM_DEBUGINFO_DWARFDEBUGRNGLISTS_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
@@ -23,6 +24,7 @@ namespace llvm {
 
 class Error;
 class raw_ostream;
+class DWARFUnit;
 
 /// A class representing a single range list entry.
 struct RangeListEntry : public DWARFListEntryBase {
@@ -35,7 +37,9 @@ struct RangeListEntry : public DWARFListEntryBase {
 
   Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr);
   void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
-            uint64_t &CurrentBase, DIDumpOptions DumpOpts) const;
+            uint64_t &CurrentBase, DIDumpOptions DumpOpts,
+            llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+                LookupPooledAddress) const;
   bool isSentinel() const { return EntryKind == dwarf::DW_RLE_end_of_list; }
 };
 
@@ -44,7 +48,8 @@ class DWARFDebugRnglist : public DWARFListType<RangeListEntry> {
 public:
   /// Build a DWARFAddressRangesVector from a rangelist.
   DWARFAddressRangesVector
-  getAbsoluteRanges(llvm::Optional<BaseAddress> BaseAddr) const;
+  getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
+                    DWARFUnit &U) const;
 };
 
 class DWARFDebugRnglistTable : public DWARFListTableBase<DWARFDebugRnglist> {
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index c77034f6348f..56d46cd739a2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -180,6 +180,7 @@ public:
   /// \returns a valid DWARFDie instance if the attribute exists, or an invalid
   /// DWARFDie object if it doesn't.
   DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const;
+  DWARFDie getAttributeValueAsReferencedDie(const DWARFFormValue &V) const;
 
   /// Extract the range base attribute from this DIE as absolute section offset.
   ///
@@ -404,6 +405,10 @@ public:
       Die = Die.getPreviousSibling();
   }
 
+  llvm::DWARFDie::iterator base() const {
+    return llvm::DWARFDie::iterator(AtEnd ? Die : Die.getSibling());
+  }
+
   reverse_iterator<llvm::DWARFDie::iterator> &operator++() {
     assert(!AtEnd && "Incrementing rend");
     llvm::DWARFDie D = Die.getPreviousSibling();
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 1b5f71c946f9..727e853c09fb 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -61,7 +61,6 @@ public:
 
   dwarf::Form getForm() const { return Form; }
   uint64_t getRawUValue() const { return Value.uval; }
-  uint64_t getSectionIndex() const { return Value.SectionIndex; }
   void setForm(dwarf::Form F) { Form = F; }
   void setUValue(uint64_t V) { Value.uval = V; }
   void setSValue(int64_t V) { Value.sval = V; }
@@ -75,6 +74,10 @@ public:
   bool isFormClass(FormClass FC) const;
   const DWARFUnit *getUnit() const { return U; }
   void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const;
+  void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts,
+                            SectionedAddress SA) const;
+  static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
+                                 DIDumpOptions DumpOpts, uint64_t SectionIndex);
 
   /// Extracts a value in \p Data at offset \p *OffsetPtr. The information
   /// in \p FormParams is needed to interpret some forms. The optional
@@ -101,6 +104,7 @@ public:
   Optional<int64_t> getAsSignedConstant() const;
   Optional<const char *> getAsCString() const;
   Optional<uint64_t> getAsAddress() const;
+  Optional<SectionedAddress> getAsSectionedAddress() const;
   Optional<uint64_t> getAsSectionOffset() const;
   Optional<ArrayRef<uint8_t>> getAsBlock() const;
   Optional<uint64_t> getAsCStringOffset() const;
@@ -238,6 +242,13 @@ inline Optional<uint64_t> toAddress(const Optional<DWARFFormValue> &V) {
   return None;
 }
 
+inline Optional<SectionedAddress>
+toSectionedAddress(const Optional<DWARFFormValue> &V) {
+  if (V)
+    return V->getAsSectionedAddress();
+  return None;
+}
+
 /// Take an optional DWARFFormValue and extract a address.
 ///
 /// \param V and optional DWARFFormValue to attempt to extract the value from.
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
index 8d1ac5c83c23..073e02903c39 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -24,6 +24,7 @@ class DWARFGdbIndex {
   uint32_t Version;
 
   uint32_t CuListOffset;
+  uint32_t TuListOffset;
   uint32_t AddressAreaOffset;
   uint32_t SymbolTableOffset;
   uint32_t ConstantPoolOffset;
@@ -34,6 +35,13 @@ class DWARFGdbIndex {
   };
   SmallVector<CompUnitEntry, 0> CuList;
 
+  struct TypeUnitEntry {
+    uint64_t Offset;
+    uint64_t TypeOffset;
+    uint64_t TypeSignature;
+  };
+  SmallVector<TypeUnitEntry, 0> TuList;
+
   struct AddressEntry {
     uint64_t LowAddress;  /// The low address.
     uint64_t HighAddress; /// The high address.
@@ -55,6 +63,7 @@ class DWARFGdbIndex {
   uint32_t StringPoolOffset;
 
   void dumpCUList(raw_ostream &OS) const;
+  void dumpTUList(raw_ostream &OS) const;
   void dumpAddressArea(raw_ostream &OS) const;
   void dumpSymbolTable(raw_ostream &OS) const;
   void dumpConstantPool(raw_ostream &OS) const;
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index ab12f3bc08b0..9b987314f209 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -13,6 +13,7 @@
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -43,10 +44,6 @@ protected:
   ListEntries Entries;
 
 public:
-  // FIXME: We need to consolidate the various verions of "createError"
-  // that are used in the DWARF consumer. Until then, this is a workaround.
-  Error createError(const char *, const char *, uint32_t);
-
   const ListEntries &getEntries() const { return Entries; }
   bool empty() const { return Entries.empty(); }
   void clear() { Entries.clear(); }
@@ -102,6 +99,7 @@ public:
   uint32_t getHeaderOffset() const { return HeaderOffset; }
   uint8_t getAddrSize() const { return HeaderData.AddrSize; }
   uint32_t getLength() const { return HeaderData.Length; }
+  uint16_t getVersion() const { return HeaderData.Version; }
   StringRef getSectionName() const { return SectionName; }
   StringRef getListTypeString() const { return ListTypeString; }
   dwarf::DwarfFormat getFormat() const { return Format; }
@@ -159,7 +157,10 @@ public:
   uint32_t getHeaderOffset() const { return Header.getHeaderOffset(); }
   uint8_t getAddrSize() const { return Header.getAddrSize(); }
 
-  void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const;
+  void dump(raw_ostream &OS,
+            llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+                LookupPooledAddress,
+            DIDumpOptions DumpOpts = {}) const;
 
   /// Return the contents of the offset entry designated by a given index.
   Optional<uint32_t> getOffsetEntry(uint32_t Index) const {
@@ -213,7 +214,8 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
                                             StringRef SectionName,
                                             StringRef ListTypeString) {
   if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End)
-    return createError("invalid %s list offset 0x%" PRIx32,
+    return createStringError(errc::invalid_argument,
+                       "invalid %s list offset 0x%" PRIx32,
                        ListTypeString.data(), *OffsetPtr);
   Entries.clear();
   while (*OffsetPtr < End) {
@@ -224,14 +226,18 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
     if (Entry.isSentinel())
       return Error::success();
   }
-  return createError("no end of list marker detected at end of %s table "
+  return createStringError(errc::illegal_byte_sequence,
+                     "no end of list marker detected at end of %s table "
                      "starting at offset 0x%" PRIx32,
                      SectionName.data(), HeaderOffset);
 }
 
 template <typename DWARFListType>
-void DWARFListTableBase<DWARFListType>::dump(raw_ostream &OS,
-                                             DIDumpOptions DumpOpts) const {
+void DWARFListTableBase<DWARFListType>::dump(
+    raw_ostream &OS,
+    llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+        LookupPooledAddress,
+    DIDumpOptions DumpOpts) const {
   Header.dump(OS, DumpOpts);
   OS << HeaderString << "\n";
 
@@ -250,7 +256,7 @@ void DWARFListTableBase<DWARFListType>::dump(raw_ostream &OS,
   for (const auto &List : ListMap)
     for (const auto &Entry : List.second.getEntries())
       Entry.dump(OS, getAddrSize(), MaxEncodingStringLength, CurrentBase,
-                 DumpOpts);
+                 DumpOpts, LookupPooledAddress);
 }
 
 template <typename DWARFListType>
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
index 6e8f370f4aea..d611b5d075c8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFObject.h
@@ -33,11 +33,13 @@ public:
   virtual ArrayRef<SectionName> getSectionNames() const { return {}; }
   virtual bool isLittleEndian() const = 0;
   virtual uint8_t getAddressSize() const { llvm_unreachable("unimplemented"); }
-  virtual const DWARFSection &getInfoSection() const { return Dummy; }
+  virtual void
+  forEachInfoSections(function_ref<void(const DWARFSection &)> F) const {}
   virtual void
   forEachTypesSections(function_ref<void(const DWARFSection &)> F) const {}
   virtual StringRef getAbbrevSection() const { return ""; }
   virtual const DWARFSection &getLocSection() const { return Dummy; }
+  virtual const DWARFSection &getLoclistsSection() const { return Dummy; }
   virtual StringRef getARangeSection() const { return ""; }
   virtual StringRef getDebugFrameSection() const { return ""; }
   virtual StringRef getEHFrameSection() const { return ""; }
@@ -47,12 +49,13 @@ public:
   virtual const DWARFSection &getRangeSection() const { return Dummy; }
   virtual const DWARFSection &getRnglistsSection() const { return Dummy; }
   virtual StringRef getMacinfoSection() const { return ""; }
-  virtual StringRef getPubNamesSection() const { return ""; }
-  virtual StringRef getPubTypesSection() const { return ""; }
-  virtual StringRef getGnuPubNamesSection() const { return ""; }
-  virtual StringRef getGnuPubTypesSection() const { return ""; }
+  virtual const DWARFSection &getPubNamesSection() const { return Dummy; }
+  virtual const DWARFSection &getPubTypesSection() const { return Dummy; }
+  virtual const DWARFSection &getGnuPubNamesSection() const { return Dummy; }
+  virtual const DWARFSection &getGnuPubTypesSection() const { return Dummy; }
   virtual const DWARFSection &getStringOffsetSection() const { return Dummy; }
-  virtual const DWARFSection &getInfoDWOSection() const { return Dummy; }
+  virtual void
+  forEachInfoDWOSections(function_ref<void(const DWARFSection &)> F) const {}
   virtual void
   forEachTypesDWOSections(function_ref<void(const DWARFSection &)> F) const {}
   virtual StringRef getAbbrevDWOSection() const { return ""; }
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h
index 77045f0794ae..7f8235965297 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h
@@ -23,6 +23,11 @@ struct SectionName {
   bool IsNameUnique;
 };
 
+struct SectionedAddress {
+  uint64_t Address;
+  uint64_t SectionIndex;
+};
+
 } // end namespace llvm
 
 #endif // LLVM_DEBUGINFO_DWARF_DWARFSECTION_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index cb5a78ee3dbf..8ca5ba13fc23 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -26,19 +26,20 @@ class raw_ostream;
 class DWARFTypeUnit : public DWARFUnit {
 public:
   DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
-                const DWARFUnitHeader &Header,
-                const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+                const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+                const DWARFSection *RS, const DWARFSection *LocSection,
                 StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
                 const DWARFSection &LS, bool LE, bool IsDWO,
-                const DWARFUnitSectionBase &UnitSection)
-      : DWARFUnit(Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
-                  UnitSection) {}
+                const DWARFUnitVector &UnitVector)
+      : DWARFUnit(Context, Section, Header, DA, RS, LocSection, SS, SOS, AOS,
+                  LS, LE, IsDWO, UnitVector) {}
 
   uint64_t getTypeHash() const { return getHeader().getTypeHash(); }
   uint32_t getTypeOffset() const { return getHeader().getTypeOffset(); }
 
-  void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {});
-  static const DWARFSectionKind Section = DW_SECT_TYPES;
+  void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override;
+  // Enable LLVM-style RTTI.
+  static bool classof(const DWARFUnit *U) { return U->isTypeUnit(); }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 988a7958184c..79c3ce1106d5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -72,7 +72,8 @@ public:
   /// Parse a unit header from \p debug_info starting at \p offset_ptr.
   bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info,
                uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO,
-               const DWARFUnitIndex *Index = nullptr);
+               const DWARFUnitIndex *Index = nullptr,
+               const DWARFUnitIndex::Entry *Entry = nullptr);
   uint32_t getOffset() const { return Offset; }
   const dwarf::FormParams &getFormParams() const { return FormParams; }
   uint16_t getVersion() const { return FormParams.Version; }
@@ -101,133 +102,66 @@ public:
   uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
 };
 
-/// Base class for all DWARFUnitSection classes. This provides the
-/// functionality common to all unit types.
-class DWARFUnitSectionBase {
-public:
-  /// Returns the Unit that contains the given section offset in the
-  /// same section this Unit originated from.
-  virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0;
-  virtual DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) = 0;
-
-  void parse(DWARFContext &C, const DWARFSection &Section);
-  void parseDWO(DWARFContext &C, const DWARFSection &DWOSection,
-                bool Lazy = false);
-
-protected:
-  ~DWARFUnitSectionBase() = default;
-
-  virtual void parseImpl(DWARFContext &Context, const DWARFObject &Obj,
-                         const DWARFSection &Section,
-                         const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                         StringRef SS, const DWARFSection &SOS,
-                         const DWARFSection *AOS, const DWARFSection &LS,
-                         bool isLittleEndian, bool isDWO, bool Lazy) = 0;
-};
-
 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
                                         DWARFSectionKind Kind);
 
-/// Concrete instance of DWARFUnitSection, specialized for one Unit type.
-template<typename UnitType>
-class DWARFUnitSection final : public SmallVector<std::unique_ptr<UnitType>, 1>,
-                               public DWARFUnitSectionBase {
-  bool Parsed = false;
-  std::function<std::unique_ptr<UnitType>(uint32_t)> Parser;
+/// Describe a collection of units. Intended to hold all units either from
+/// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo.
+class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> {
+  std::function<std::unique_ptr<DWARFUnit>(uint32_t, DWARFSectionKind,
+                                           const DWARFSection *,
+                                           const DWARFUnitIndex::Entry *)>
+      Parser;
+  int NumInfoUnits = -1;
 
 public:
-  using UnitVector = SmallVectorImpl<std::unique_ptr<UnitType>>;
+  using UnitVector = SmallVectorImpl<std::unique_ptr<DWARFUnit>>;
   using iterator = typename UnitVector::iterator;
   using iterator_range = llvm::iterator_range<typename UnitVector::iterator>;
 
-  UnitType *getUnitForOffset(uint32_t Offset) const override {
-    auto *CU = std::upper_bound(
-        this->begin(), this->end(), Offset,
-        [](uint32_t LHS, const std::unique_ptr<UnitType> &RHS) {
-          return LHS < RHS->getNextUnitOffset();
-        });
-    if (CU != this->end() && (*CU)->getOffset() <= Offset)
-      return CU->get();
-    return nullptr;
-  }
-  UnitType *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) override {
-    const auto *CUOff = E.getOffset(DW_SECT_INFO);
-    if (!CUOff)
-      return nullptr;
-
-    auto Offset = CUOff->Offset;
-
-    auto *CU = std::upper_bound(
-        this->begin(), this->end(), CUOff->Offset,
-        [](uint32_t LHS, const std::unique_ptr<UnitType> &RHS) {
-          return LHS < RHS->getNextUnitOffset();
-        });
-    if (CU != this->end() && (*CU)->getOffset() <= Offset)
-      return CU->get();
-
-    if (!Parser)
-      return nullptr;
-
-    auto U = Parser(Offset);
-    if (!U)
-      U = nullptr;
-
-    auto *NewCU = U.get();
-    this->insert(CU, std::move(U));
-    return NewCU;
+  DWARFUnit *getUnitForOffset(uint32_t Offset) const;
+  DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
+
+  /// Read units from a .debug_info or .debug_types section.  Calls made
+  /// before finishedInfoUnits() are assumed to be for .debug_info sections,
+  /// calls after finishedInfoUnits() are for .debug_types sections.  Caller
+  /// must not mix calls to addUnitsForSection and addUnitsForDWOSection.
+  void addUnitsForSection(DWARFContext &C, const DWARFSection &Section,
+                          DWARFSectionKind SectionKind);
+  /// Read units from a .debug_info.dwo or .debug_types.dwo section.  Calls
+  /// made before finishedInfoUnits() are assumed to be for .debug_info.dwo
+  /// sections, calls after finishedInfoUnits() are for .debug_types.dwo
+  /// sections.  Caller must not mix calls to addUnitsForSection and
+  /// addUnitsForDWOSection.
+  void addUnitsForDWOSection(DWARFContext &C, const DWARFSection &DWOSection,
+                             DWARFSectionKind SectionKind, bool Lazy = false);
+
+  /// Add an existing DWARFUnit to this UnitVector. This is used by the DWARF
+  /// verifier to process unit separately.
+  DWARFUnit *addUnit(std::unique_ptr<DWARFUnit> Unit);
+
+  /// Returns number of all units held by this instance.
+  unsigned getNumUnits() const { return size(); }
+  /// Returns number of units from all .debug_info[.dwo] sections.
+  unsigned getNumInfoUnits() const {
+    return NumInfoUnits == -1 ? size() : NumInfoUnits;
   }
+  /// Returns number of units from all .debug_types[.dwo] sections.
+  unsigned getNumTypesUnits() const { return size() - NumInfoUnits; }
+  /// Indicate that parsing .debug_info[.dwo] is done, and remaining units
+  /// will be from .debug_types[.dwo].
+  void finishedInfoUnits() { NumInfoUnits = size(); }
 
 private:
-  void parseImpl(DWARFContext &Context, const DWARFObject &Obj,
-                 const DWARFSection &Section, const DWARFDebugAbbrev *DA,
-                 const DWARFSection *RS, StringRef SS, const DWARFSection &SOS,
-                 const DWARFSection *AOS, const DWARFSection &LS, bool LE,
-                 bool IsDWO, bool Lazy) override {
-    if (Parsed)
-      return;
-    DWARFDataExtractor Data(Obj, Section, LE, 0);
-    if (!Parser) {
-      const DWARFUnitIndex *Index = nullptr;
-      if (IsDWO)
-        Index = &getDWARFUnitIndex(Context, UnitType::Section);
-      Parser = [=, &Context, &Section, &SOS,
-                &LS](uint32_t Offset) -> std::unique_ptr<UnitType> {
-        if (!Data.isValidOffset(Offset))
-          return nullptr;
-        DWARFUnitHeader Header;
-        if (!Header.extract(Context, Data, &Offset, UnitType::Section, Index))
-          return nullptr;
-        auto U = llvm::make_unique<UnitType>(
-            Context, Section, Header, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
-            *this);
-        return U;
-      };
-    }
-    if (Lazy)
-      return;
-    auto I = this->begin();
-    uint32_t Offset = 0;
-    while (Data.isValidOffset(Offset)) {
-      if (I != this->end() && (*I)->getOffset() == Offset) {
-        ++I;
-        continue;
-      }
-      auto U = Parser(Offset);
-      if (!U)
-        break;
-      Offset = U->getNextUnitOffset();
-      I = std::next(this->insert(I, std::move(U)));
-    }
-    Parsed = true;
-  }
+  void addUnitsImpl(DWARFContext &Context, const DWARFObject &Obj,
+                    const DWARFSection &Section, const DWARFDebugAbbrev *DA,
+                    const DWARFSection *RS, const DWARFSection *LocSection,
+                    StringRef SS, const DWARFSection &SOS,
+                    const DWARFSection *AOS, const DWARFSection &LS, bool LE,
+                    bool IsDWO, bool Lazy, DWARFSectionKind SectionKind);
 };
 
 /// Represents base address of the CU.
-struct BaseAddress {
-  uint64_t Address;
-  uint64_t SectionIndex;
-};
-
 /// Represents a unit's contribution to the string offsets table.
 struct StrOffsetsContributionDescriptor {
   uint64_t Base = 0;
@@ -261,14 +195,20 @@ class DWARFUnit {
   const DWARFDebugAbbrev *Abbrev;
   const DWARFSection *RangeSection;
   uint32_t RangeSectionBase;
+  /// We either keep track of the location list section or its data, depending
+  /// on whether we are handling a split DWARF section or not.
+  union {
+    const DWARFSection *LocSection;
+    StringRef LocSectionData;
+  };
   const DWARFSection &LineSection;
   StringRef StringSection;
   const DWARFSection &StringOffsetSection;
   const DWARFSection *AddrOffsetSection;
   uint32_t AddrOffsetSectionBase = 0;
   bool isLittleEndian;
-  bool isDWO;
-  const DWARFUnitSectionBase &UnitSection;
+  bool IsDWO;
+  const DWARFUnitVector &UnitVector;
 
   /// Start, length, and DWARF format of the unit's contribution to the string
   /// offsets table (DWARF v5).
@@ -278,7 +218,7 @@ class DWARFUnit {
   Optional<DWARFDebugRnglistTable> RngListTable;
 
   mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
-  llvm::Optional<BaseAddress> BaseAddr;
+  llvm::Optional<SectionedAddress> BaseAddr;
   /// The compile unit debug information entry items.
   std::vector<DWARFDebugInfoEntry> DieArray;
 
@@ -308,28 +248,30 @@ protected:
   /// length and form. The given offset is expected to be derived from the unit
   /// DIE's DW_AT_str_offsets_base attribute.
   Optional<StrOffsetsContributionDescriptor>
-  determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
-                                          uint64_t Offset);
+  determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
 
   /// Find the unit's contribution to the string offsets table and determine its
   /// length and form. The given offset is expected to be 0 in a dwo file or,
   /// in a dwp file, the start of the unit's contribution to the string offsets
   /// table section (as determined by the index table).
   Optional<StrOffsetsContributionDescriptor>
-  determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
-                                             uint64_t Offset);
+  determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
 
 public:
   DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
-            const DWARFUnitHeader &Header,
-            const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS,
-            const DWARFSection &SOS, const DWARFSection *AOS,
+            const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+            const DWARFSection *RS, const DWARFSection *LocSection,
+            StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
             const DWARFSection &LS, bool LE, bool IsDWO,
-            const DWARFUnitSectionBase &UnitSection);
+            const DWARFUnitVector &UnitVector);
 
   virtual ~DWARFUnit();
 
+  bool isDWOUnit() const { return IsDWO; }
   DWARFContext& getContext() const { return Context; }
+  const DWARFSection &getInfoSection() const { return InfoSection; }
+  const DWARFSection *getLocSection() const { return LocSection; }
+  StringRef getLocSectionData() const { return LocSectionData; }
   uint32_t getOffset() const { return Header.getOffset(); }
   const dwarf::FormParams &getFormParams() const {
     return Header.getFormParams();
@@ -342,6 +284,7 @@ public:
   }
   uint32_t getLength() const { return Header.getLength(); }
   uint8_t getUnitType() const { return Header.getUnitType(); }
+  bool isTypeUnit() const { return Header.isTypeUnit(); }
   uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); }
   const DWARFSection &getLineSection() const { return LineSection; }
   StringRef getStringSection() const { return StringSection; }
@@ -362,8 +305,8 @@ public:
     RangeSectionBase = Base;
   }
 
-  bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
-  bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
+  Optional<SectionedAddress> getAddrOffsetSectionItem(uint32_t Index) const;
+  Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
 
   DWARFDataExtractor getDebugInfoExtractor() const;
 
@@ -433,7 +376,7 @@ public:
     llvm_unreachable("Invalid UnitType.");
   }
 
-  llvm::Optional<BaseAddress> getBaseAddress();
+  llvm::Optional<SectionedAddress> getBaseAddress();
 
   DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
     extractDIEsIfNeeded(ExtractUnitDIEOnly);
@@ -467,7 +410,7 @@ public:
     return None;
   }
 
-  void collectAddressRanges(DWARFAddressRangesVector &CURanges);
+  Expected<DWARFAddressRangesVector> collectAddressRanges();
 
   /// Returns subprogram DIE with address range encompassing the provided
   /// address. The pointer is alive as long as parsed compile unit DIEs are not
@@ -480,8 +423,8 @@ public:
   void getInlinedChainForAddress(uint64_t Address,
                                  SmallVectorImpl<DWARFDie> &InlinedChain);
 
-  /// getUnitSection - Return the DWARFUnitSection containing this unit.
-  const DWARFUnitSectionBase &getUnitSection() const { return UnitSection; }
+  /// Return the DWARFUnitVector containing this unit.
+  const DWARFUnitVector &getUnitVector() const { return UnitVector; }
 
   /// Returns the number of DIEs in the unit. Parses the unit
   /// if necessary.
@@ -541,6 +484,7 @@ public:
     return die_iterator_range(DieArray.begin(), DieArray.end());
   }
 
+  virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0;
 private:
   /// Size in bytes of the .debug_info data associated with this compile unit.
   size_t getDebugInfoSize() const {
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
index 49ed4bb222f3..16be5f9401c0 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -74,6 +74,7 @@ private:
   int InfoColumn = -1;
   std::unique_ptr<DWARFSectionKind[]> ColumnKinds;
   std::unique_ptr<Entry[]> Rows;
+  mutable std::vector<Entry *> OffsetLookup;
 
   static StringRef getColumnHeader(DWARFSectionKind DS);
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index a829510a219d..e47fbea5646e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -14,6 +14,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 
 #include <cstdint>
 #include <map>
@@ -96,10 +97,14 @@ private:
   /// lies between to valid DIEs.
   std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
   uint32_t NumDebugLineErrors = 0;
+  // Used to relax some checks that do not currently work portably
+  bool IsObjectFile;
+  bool IsMachOObject;
 
   raw_ostream &error() const;
   raw_ostream &warn() const;
   raw_ostream &note() const;
+  raw_ostream &dump(const DWARFDie &Die, unsigned indent = 0) const;
 
   /// Verifies the abbreviations section.
   ///
@@ -113,20 +118,20 @@ private:
   /// \returns The number of errors that occurred during verification.
   unsigned verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev);
 
-  /// Verifies the header of a unit in the .debug_info section.
+  /// Verifies the header of a unit in a .debug_info or .debug_types section.
   ///
   /// This function currently checks for:
   /// - Unit is in 32-bit DWARF format. The function can be modified to
   /// support 64-bit format.
   /// - The DWARF version is valid
   /// - The unit type is valid (if unit is in version >=5)
-  /// - The unit doesn't extend beyond .debug_info section
+  /// - The unit doesn't extend beyond the containing section
   /// - The address size is valid
   /// - The offset in the .debug_abbrev section is valid
   ///
-  /// \param DebugInfoData The .debug_info section data
+  /// \param DebugInfoData The section data
   /// \param Offset A reference to the offset start of the unit. The offset will
-  /// be updated to point to the next unit in .debug_info
+  /// be updated to point to the next unit in the section
   /// \param UnitIndex The index of the unit to be verified
   /// \param UnitType A reference to the type of the unit
   /// \param isUnitDWARF64 A reference to a flag that shows whether the unit is
@@ -137,7 +142,7 @@ private:
                         uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
                         bool &isUnitDWARF64);
 
-  /// Verifies the header of a unit in the .debug_info section.
+  /// Verifies the header of a unit in a .debug_info or .debug_types section.
   ///
   /// This function currently verifies:
   ///  - The debug info attributes.
@@ -146,13 +151,29 @@ private:
   ///  - That the root DIE is a unit DIE.
   ///  - If a unit type is provided, that the unit DIE matches the unit type.
   ///  - The DIE ranges.
+  ///  - That call site entries are only nested within subprograms with a
+  ///    DW_AT_call attribute.
   ///
-  /// \param Unit      The DWARF Unit to verifiy.
-  /// \param UnitType  An optional unit type which will be used to verify the
-  ///                  type of the unit DIE.
+  /// \param Unit      The DWARF Unit to verify.
   ///
-  /// \returns true if the content is verified successfully, false otherwise.
-  bool verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType = 0);
+  /// \returns The number of errors that occurred during verification.
+  unsigned verifyUnitContents(DWARFUnit &Unit);
+
+  /// Verifies the unit headers and contents in a .debug_info or .debug_types
+  /// section.
+  ///
+  /// \param S           The DWARF Section to verify.
+  /// \param SectionKind The object-file section kind that S comes from.
+  ///
+  /// \returns The number of errors that occurred during verification.
+  unsigned verifyUnitSection(const DWARFSection &S,
+                             DWARFSectionKind SectionKind);
+
+  /// Verifies that a call site entry is nested within a subprogram with a
+  /// DW_AT_call attribute.
+  ///
+  /// \returns Number of errors that occurred during verification.
+  unsigned verifyDebugInfoCallSite(const DWARFDie &Die);
 
   /// Verify that all Die ranges are valid.
   ///
@@ -172,7 +193,7 @@ private:
   /// \param AttrValue    The DWARF attribute value to check
   ///
   /// \returns NumErrors The number of errors occurred during verification of
-  /// attributes' values in a .debug_info section unit
+  /// attributes' values in a unit
   unsigned verifyDebugInfoAttribute(const DWARFDie &Die,
                                     DWARFAttribute &AttrValue);
 
@@ -180,14 +201,14 @@ private:
   ///
   /// This function currently checks for:
   /// - All DW_FORM_ref values that are CU relative have valid CU offsets
-  /// - All DW_FORM_ref_addr values have valid .debug_info offsets
+  /// - All DW_FORM_ref_addr values have valid section offsets
   /// - All DW_FORM_strp values have valid .debug_str offsets
   ///
   /// \param Die          The DWARF DIE that owns the attribute value
   /// \param AttrValue    The DWARF attribute value to check
   ///
   /// \returns NumErrors The number of errors occurred during verification of
-  /// attributes' forms in a .debug_info section unit
+  /// attributes' forms in a unit
   unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
 
   /// Verifies the all valid references that were found when iterating through
@@ -199,7 +220,7 @@ private:
   /// CU relative and absolute references.
   ///
   /// \returns NumErrors The number of errors occurred during verification of
-  /// references for the .debug_info section
+  /// references for the .debug_info and .debug_types sections
   unsigned verifyDebugInfoReferences();
 
   /// Verify the DW_AT_stmt_list encoding and value and ensure that no
@@ -268,8 +289,8 @@ private:
 
 public:
   DWARFVerifier(raw_ostream &S, DWARFContext &D,
-                DIDumpOptions DumpOpts = DIDumpOptions::getForSingleDIE())
-      : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)) {}
+                DIDumpOptions DumpOpts = DIDumpOptions::getForSingleDIE());
+
   /// Verify the information in any of the following sections, if available:
   /// .debug_abbrev, debug_abbrev.dwo
   ///
@@ -280,12 +301,12 @@ public:
   /// false otherwise.
   bool handleDebugAbbrev();
 
-  /// Verify the information in the .debug_info section.
+  /// Verify the information in the .debug_info and .debug_types sections.
   ///
-  /// Any errors are reported to the stream that was this object was
+  /// Any errors are reported to the stream that this object was
   /// constructed with.
   ///
-  /// \returns true if the .debug_info verifies successfully, false otherwise.
+  /// \returns true if all sections verify successfully, false otherwise.
   bool handleDebugInfo();
 
   /// Verify the information in the .debug_line section.
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
index e66aeca3cd45..5c043a7837b3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
@@ -24,22 +24,28 @@ enum class msf_error_code {
   invalid_format,
   block_in_use
 };
+} // namespace msf
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::msf::msf_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace msf {
+const std::error_category &MSFErrCategory();
+
+inline std::error_code make_error_code(msf_error_code E) {
+  return std::error_code(static_cast<int>(E), MSFErrCategory());
+}
 
 /// Base class for errors originating when parsing raw PDB files
-class MSFError : public ErrorInfo<MSFError> {
+class MSFError : public ErrorInfo<MSFError, StringError> {
 public:
+  using ErrorInfo<MSFError, StringError>::ErrorInfo; // inherit constructors
+  MSFError(const Twine &S) : ErrorInfo(S, msf_error_code::unspecified) {}
   static char ID;
-  MSFError(msf_error_code C);
-  MSFError(const std::string &Context);
-  MSFError(msf_error_code C, const std::string &Context);
-
-  void log(raw_ostream &OS) const override;
-  const std::string &getErrorMessage() const;
-  std::error_code convertToErrorCode() const override;
-
-private:
-  std::string ErrMsg;
-  msf_error_code Code;
 };
 } // namespace msf
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index 9713dce362d2..ac7f19637ab1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -43,11 +43,6 @@ public:
 
   void reset() override { Enumerator->reset(); }
 
-  ConcreteSymbolEnumerator<ChildType> *clone() const override {
-    std::unique_ptr<IPDBEnumSymbols> WrappedClone(Enumerator->clone());
-    return new ConcreteSymbolEnumerator<ChildType>(std::move(WrappedClone));
-  }
-
 private:
 
   std::unique_ptr<IPDBEnumSymbols> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
index 930bea6060b2..881d7329ab66 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
@@ -24,7 +24,6 @@ public:
   llvm::Optional<RecordType> getItemAtIndex(uint32_t Index) const override;
   bool getNext(RecordType &Record) override;
   void reset() override;
-  DIADataStream *clone() const override;
 
 private:
   CComPtr<IDiaEnumDebugStreamData> StreamData;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
index ffae6645e94b..1f129052d034 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
@@ -27,7 +27,6 @@ public:
   ChildTypePtr getChildAtIndex(uint32_t Index) const override;
   ChildTypePtr getNext() override;
   void reset() override;
-  DIAEnumDebugStreams *clone() const override;
 
 private:
   CComPtr<IDiaEnumDebugStreams> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
new file mode 100644
index 000000000000..f3b02f07e648
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
@@ -0,0 +1,36 @@
+//==- DIAEnumFrameData.h --------------------------------------- -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_DIA_DIAENUMFRAMEDATA_H
+#define LLVM_DEBUGINFO_PDB_DIA_DIAENUMFRAMEDATA_H
+
+#include "DIASupport.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
+
+namespace llvm {
+namespace pdb {
+
+class DIAEnumFrameData : public IPDBEnumChildren<IPDBFrameData> {
+public:
+  explicit DIAEnumFrameData(CComPtr<IDiaEnumFrameData> DiaEnumerator);
+
+  uint32_t getChildCount() const override;
+  ChildTypePtr getChildAtIndex(uint32_t Index) const override;
+  ChildTypePtr getNext() override;
+  void reset() override;
+
+private:
+  CComPtr<IDiaEnumFrameData> Enumerator;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
index 39490a4b2209..4669a8d31196 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
@@ -16,22 +16,18 @@
 
 namespace llvm {
 namespace pdb {
-class DIASession;
 
 class DIAEnumInjectedSources : public IPDBEnumChildren<IPDBInjectedSource> {
 public:
   explicit DIAEnumInjectedSources(
-      const DIASession &PDBSession,
       CComPtr<IDiaEnumInjectedSources> DiaEnumerator);
 
   uint32_t getChildCount() const override;
   ChildTypePtr getChildAtIndex(uint32_t Index) const override;
   ChildTypePtr getNext() override;
   void reset() override;
-  DIAEnumInjectedSources *clone() const override;
 
 private:
-  const DIASession &Session;
   CComPtr<IDiaEnumInjectedSources> Enumerator;
 };
 } // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
index 08f0de124ede..f1cb6268a26d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
@@ -26,7 +26,6 @@ public:
   ChildTypePtr getChildAtIndex(uint32_t Index) const override;
   ChildTypePtr getNext() override;
   void reset() override;
-  DIAEnumLineNumbers *clone() const override;
 
 private:
   CComPtr<IDiaEnumLineNumbers> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
index 52c9563b5d5f..ac2ae317d263 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
@@ -28,7 +28,6 @@ public:
   ChildTypePtr getChildAtIndex(uint32_t Index) const override;
   ChildTypePtr getNext() override;
   void reset() override;
-  DIAEnumSectionContribs *clone() const override;
 
 private:
   const DIASession &Session;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
index e69d18f5ba37..dac3df06a178 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
@@ -27,7 +27,6 @@ public:
   ChildTypePtr getChildAtIndex(uint32_t Index) const override;
   ChildTypePtr getNext() override;
   void reset() override;
-  DIAEnumSourceFiles *clone() const override;
 
 private:
   const DIASession &Session;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
index f779cd1f4be3..9689859ae0f8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
@@ -27,7 +27,6 @@ public:
   std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
   std::unique_ptr<PDBSymbol> getNext() override;
   void reset() override;
-  DIAEnumSymbols *clone() const override;
 
 private:
   const DIASession &Session;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
index 926fcfe69648..f4f856ebb6fd 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
@@ -26,7 +26,6 @@ public:
   std::unique_ptr<IPDBTable> getChildAtIndex(uint32_t Index) const override;
   std::unique_ptr<IPDBTable> getNext() override;
   void reset() override;
-  DIAEnumTables *clone() const override;
 
 private:
   CComPtr<IDiaEnumTables> Enumerator;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
index 35a39a0df5ca..2b33a65a0a14 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
@@ -23,23 +23,29 @@ enum class dia_error_code {
   already_loaded,
   debug_info_mismatch,
 };
+} // namespace pdb
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::pdb::dia_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace pdb {
+const std::error_category &DIAErrCategory();
+
+inline std::error_code make_error_code(dia_error_code E) {
+  return std::error_code(static_cast<int>(E), DIAErrCategory());
+}
 
 /// Base class for errors originating in DIA SDK, e.g. COM calls
-class DIAError : public ErrorInfo<DIAError> {
+class DIAError : public ErrorInfo<DIAError, StringError> {
 public:
+  using ErrorInfo<DIAError, StringError>::ErrorInfo;
+  DIAError(const Twine &S) : ErrorInfo(S, dia_error_code::unspecified) {}
   static char ID;
-  DIAError(dia_error_code C);
-  DIAError(StringRef Context);
-  DIAError(dia_error_code C, StringRef Context);
-
-  void log(raw_ostream &OS) const override;
-  StringRef getErrorMessage() const;
-  std::error_code convertToErrorCode() const override;
-
-private:
-  std::string ErrMsg;
-  dia_error_code Code;
 };
-}
-}
+} // namespace pdb
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
new file mode 100644
index 000000000000..0ce6cfc93030
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
@@ -0,0 +1,39 @@
+//===- DIAFrameData.h - DIA Impl. of IPDBFrameData ---------------- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_DIA_DIAFRAMEDATA_H
+#define LLVM_DEBUGINFO_PDB_DIA_DIAFRAMEDATA_H
+
+#include "DIASupport.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
+
+namespace llvm {
+namespace pdb {
+
+class DIASession;
+
+class DIAFrameData : public IPDBFrameData {
+public:
+  explicit DIAFrameData(CComPtr<IDiaFrameData> DiaFrameData);
+
+  uint32_t getAddressOffset() const override;
+  uint32_t getAddressSection() const override;
+  uint32_t getLengthBlock() const override;
+  std::string getProgram() const override;
+  uint32_t getRelativeVirtualAddress() const override;
+  uint64_t getVirtualAddress() const override;
+
+private:
+  CComPtr<IDiaFrameData> FrameData;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
index dfb35647055a..5d4f855c63ca 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
@@ -20,7 +20,8 @@ class DIARawSymbol : public IPDBRawSymbol {
 public:
   DIARawSymbol(const DIASession &PDBSession, CComPtr<IDiaSymbol> DiaSymbol);
 
-  void dump(raw_ostream &OS, int Indent) const override;
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
 
   CComPtr<IDiaSymbol> getDiaSymbol() const { return Symbol; }
 
@@ -63,25 +64,25 @@ public:
   uint32_t getAddressOffset() const override;
   uint32_t getAddressSection() const override;
   uint32_t getAge() const override;
-  uint32_t getArrayIndexTypeId() const override;
+  SymIndexId getArrayIndexTypeId() const override;
   uint32_t getBaseDataOffset() const override;
   uint32_t getBaseDataSlot() const override;
-  uint32_t getBaseSymbolId() const override;
+  SymIndexId getBaseSymbolId() const override;
   PDB_BuiltinType getBuiltinType() const override;
   uint32_t getBitPosition() const override;
   PDB_CallingConv getCallingConvention() const override;
-  uint32_t getClassParentId() const override;
+  SymIndexId getClassParentId() const override;
   std::string getCompilerName() const override;
   uint32_t getCount() const override;
   uint32_t getCountLiveRanges() const override;
   PDB_Lang getLanguage() const override;
-  uint32_t getLexicalParentId() const override;
+  SymIndexId getLexicalParentId() const override;
   std::string getLibraryName() const override;
   uint32_t getLiveRangeStartAddressOffset() const override;
   uint32_t getLiveRangeStartAddressSection() const override;
   uint32_t getLiveRangeStartRelativeVirtualAddress() const override;
   codeview::RegisterId getLocalBasePointerRegisterId() const override;
-  uint32_t getLowerBoundId() const override;
+  SymIndexId getLowerBoundId() const override;
   uint32_t getMemorySpaceKind() const override;
   std::string getName() const override;
   uint32_t getNumberOfAcceleratorPointerTags() const override;
@@ -91,7 +92,7 @@ public:
   uint32_t getNumberOfRows() const override;
   std::string getObjectFileName() const override;
   uint32_t getOemId() const override;
-  uint32_t getOemSymbolId() const override;
+  SymIndexId getOemSymbolId() const override;
   uint32_t getOffsetInUdt() const override;
   PDB_Cpu getPlatform() const override;
   uint32_t getRank() const override;
@@ -105,9 +106,9 @@ public:
   std::string getSourceFileName() const override;
   std::unique_ptr<IPDBLineNumber> getSrcLineOnTypeDefn() const override;
   uint32_t getStride() const override;
-  uint32_t getSubTypeId() const override;
+  SymIndexId getSubTypeId() const override;
   std::string getSymbolsFileName() const override;
-  uint32_t getSymIndexId() const override;
+  SymIndexId getSymIndexId() const override;
   uint32_t getTargetOffset() const override;
   uint32_t getTargetRelativeVirtualAddress() const override;
   uint64_t getTargetVirtualAddress() const override;
@@ -115,16 +116,16 @@ public:
   uint32_t getTextureSlot() const override;
   uint32_t getTimeStamp() const override;
   uint32_t getToken() const override;
-  uint32_t getTypeId() const override;
+  SymIndexId getTypeId() const override;
   uint32_t getUavSlot() const override;
   std::string getUndecoratedName() const override;
   std::string getUndecoratedNameEx(PDB_UndnameFlags Flags) const override;
-  uint32_t getUnmodifiedTypeId() const override;
-  uint32_t getUpperBoundId() const override;
+  SymIndexId getUnmodifiedTypeId() const override;
+  SymIndexId getUpperBoundId() const override;
   Variant getValue() const override;
   uint32_t getVirtualBaseDispIndex() const override;
   uint32_t getVirtualBaseOffset() const override;
-  uint32_t getVirtualTableShapeId() const override;
+  SymIndexId getVirtualTableShapeId() const override;
   std::unique_ptr<PDBSymbolTypeBuiltin>
   getVirtualBaseTableType() const override;
   PDB_DataKind getDataKind() const override;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h
index a63659439389..592e061a8d83 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIASession.h
@@ -32,7 +32,7 @@ public:
   uint64_t getLoadAddress() const override;
   bool setLoadAddress(uint64_t Address) override;
   std::unique_ptr<PDBSymbolExe> getGlobalScope() override;
-  std::unique_ptr<PDBSymbol> getSymbolById(uint32_t SymbolId) const override;
+  std::unique_ptr<PDBSymbol> getSymbolById(SymIndexId SymbolId) const override;
 
   bool addressForVA(uint64_t VA, uint32_t &Section,
                     uint32_t &Offset) const override;
@@ -85,6 +85,7 @@ public:
 
   std::unique_ptr<IPDBEnumSectionContribs> getSectionContribs() const override;
 
+  std::unique_ptr<IPDBEnumFrameData> getFrameData() const override;
 private:
   CComPtr<IDiaSession> Session;
 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
index 03205a986f1a..997f13f5f30e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
@@ -16,29 +16,37 @@
 namespace llvm {
 namespace pdb {
 
-enum class generic_error_code {
-  invalid_path = 1,
+enum class pdb_error_code {
+  invalid_utf8_path = 1,
   dia_sdk_not_present,
-  type_server_not_found,
+  dia_failed_loading,
+  signature_out_of_date,
+  external_cmdline_ref,
   unspecified,
 };
+} // namespace pdb
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::pdb::pdb_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace pdb {
+const std::error_category &PDBErrCategory();
+
+inline std::error_code make_error_code(pdb_error_code E) {
+  return std::error_code(static_cast<int>(E), PDBErrCategory());
+}
 
 /// Base class for errors originating when parsing raw PDB files
-class GenericError : public ErrorInfo<GenericError> {
+class PDBError : public ErrorInfo<PDBError, StringError> {
 public:
+  using ErrorInfo<PDBError, StringError>::ErrorInfo; // inherit constructors
+  PDBError(const Twine &S) : ErrorInfo(S, pdb_error_code::unspecified) {}
   static char ID;
-  GenericError(generic_error_code C);
-  GenericError(StringRef Context);
-  GenericError(generic_error_code C, StringRef Context);
-
-  void log(raw_ostream &OS) const override;
-  StringRef getErrorMessage() const;
-  std::error_code convertToErrorCode() const override;
-
-private:
-  std::string ErrMsg;
-  generic_error_code Code;
 };
-}
-}
+} // namespace pdb
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 67b5a06d7c59..0d7a286a11a6 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -32,7 +32,6 @@ public:
   virtual Optional<RecordType> getItemAtIndex(uint32_t Index) const = 0;
   virtual bool getNext(RecordType &Record) = 0;
   virtual void reset() = 0;
-  virtual IPDBDataStream *clone() const = 0;
 };
 
 } // end namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index b6b7d95f6282..7017f2600e9b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
 #define LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
 
+#include <cassert>
 #include <cstdint>
 #include <memory>
 
@@ -27,7 +28,19 @@ public:
   virtual ChildTypePtr getChildAtIndex(uint32_t Index) const = 0;
   virtual ChildTypePtr getNext() = 0;
   virtual void reset() = 0;
-  virtual MyType *clone() const = 0;
+};
+
+template <typename ChildType>
+class NullEnumerator : public IPDBEnumChildren<ChildType> {
+  virtual uint32_t getChildCount() const override { return 0; }
+  virtual std::unique_ptr<ChildType>
+  getChildAtIndex(uint32_t Index) const override {
+    return nullptr;
+  }
+  virtual std::unique_ptr<ChildType> getNext() override {
+    return nullptr;
+  }
+  virtual void reset() override {}
 };
 
 } // end namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h
new file mode 100644
index 000000000000..74679215b880
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h
@@ -0,0 +1,36 @@
+//===- IPDBFrameData.h - base interface for frame data ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_IPDBFRAMEDATA_H
+#define LLVM_DEBUGINFO_PDB_IPDBFRAMEDATA_H
+
+#include <cstdint>
+#include <string>
+
+namespace llvm {
+namespace pdb {
+
+/// IPDBFrameData defines an interface used to represent a frame data of some
+/// code block.
+class IPDBFrameData {
+public:
+  virtual ~IPDBFrameData();
+
+  virtual uint32_t getAddressOffset() const = 0;
+  virtual uint32_t getAddressSection() const = 0;
+  virtual uint32_t getLengthBlock() const = 0;
+  virtual std::string getProgram() const = 0;
+  virtual uint32_t getRelativeVirtualAddress() const = 0;
+  virtual uint64_t getVirtualAddress() const = 0;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
index bcb2eaa35630..7c818d7cadeb 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
@@ -11,6 +11,7 @@
 #define LLVM_DEBUGINFO_PDB_IPDBRAWSYMBOL_H
 
 #include "PDBTypes.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -21,9 +22,26 @@ class raw_ostream;
 
 namespace pdb {
 
+class IPDBSession;
 class PDBSymbolTypeVTable;
 class PDBSymbolTypeVTableShape;
 
+enum class PdbSymbolIdField : uint32_t {
+  None = 0,
+  SymIndexId = 1 << 0,
+  LexicalParent = 1 << 1,
+  ClassParent = 1 << 2,
+  Type = 1 << 3,
+  UnmodifiedType = 1 << 4,
+  All = 0xFFFFFFFF,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ All)
+};
+
+void dumpSymbolIdField(raw_ostream &OS, StringRef Name, SymIndexId Value,
+                       int Indent, const IPDBSession &Session,
+                       PdbSymbolIdField FieldId, PdbSymbolIdField ShowFlags,
+                       PdbSymbolIdField RecurseFlags);
+
 /// IPDBRawSymbol defines an interface used to represent an arbitrary symbol.
 /// It exposes a monolithic interface consisting of accessors for the union of
 /// all properties that are valid for any symbol type.  This interface is then
@@ -33,7 +51,8 @@ class IPDBRawSymbol {
 public:
   virtual ~IPDBRawSymbol();
 
-  virtual void dump(raw_ostream &OS, int Indent) const = 0;
+  virtual void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+                    PdbSymbolIdField RecurseIdFields) const = 0;
 
   virtual std::unique_ptr<IPDBEnumSymbols>
   findChildren(PDB_SymType Type) const = 0;
@@ -74,26 +93,26 @@ public:
   virtual uint32_t getAddressOffset() const = 0;
   virtual uint32_t getAddressSection() const = 0;
   virtual uint32_t getAge() const = 0;
-  virtual uint32_t getArrayIndexTypeId() const = 0;
+  virtual SymIndexId getArrayIndexTypeId() const = 0;
   virtual uint32_t getBaseDataOffset() const = 0;
   virtual uint32_t getBaseDataSlot() const = 0;
-  virtual uint32_t getBaseSymbolId() const = 0;
+  virtual SymIndexId getBaseSymbolId() const = 0;
   virtual PDB_BuiltinType getBuiltinType() const = 0;
   virtual uint32_t getBitPosition() const = 0;
   virtual PDB_CallingConv getCallingConvention() const = 0;
-  virtual uint32_t getClassParentId() const = 0;
+  virtual SymIndexId getClassParentId() const = 0;
   virtual std::string getCompilerName() const = 0;
   virtual uint32_t getCount() const = 0;
   virtual uint32_t getCountLiveRanges() const = 0;
   virtual void getFrontEndVersion(VersionInfo &Version) const = 0;
   virtual PDB_Lang getLanguage() const = 0;
-  virtual uint32_t getLexicalParentId() const = 0;
+  virtual SymIndexId getLexicalParentId() const = 0;
   virtual std::string getLibraryName() const = 0;
   virtual uint32_t getLiveRangeStartAddressOffset() const = 0;
   virtual uint32_t getLiveRangeStartAddressSection() const = 0;
   virtual uint32_t getLiveRangeStartRelativeVirtualAddress() const = 0;
   virtual codeview::RegisterId getLocalBasePointerRegisterId() const = 0;
-  virtual uint32_t getLowerBoundId() const = 0;
+  virtual SymIndexId getLowerBoundId() const = 0;
   virtual uint32_t getMemorySpaceKind() const = 0;
   virtual std::string getName() const = 0;
   virtual uint32_t getNumberOfAcceleratorPointerTags() const = 0;
@@ -103,7 +122,7 @@ public:
   virtual uint32_t getNumberOfRows() const = 0;
   virtual std::string getObjectFileName() const = 0;
   virtual uint32_t getOemId() const = 0;
-  virtual uint32_t getOemSymbolId() const = 0;
+  virtual SymIndexId getOemSymbolId() const = 0;
   virtual uint32_t getOffsetInUdt() const = 0;
   virtual PDB_Cpu getPlatform() const = 0;
   virtual uint32_t getRank() const = 0;
@@ -118,9 +137,9 @@ public:
   virtual std::unique_ptr<IPDBLineNumber>
   getSrcLineOnTypeDefn() const = 0;
   virtual uint32_t getStride() const = 0;
-  virtual uint32_t getSubTypeId() const = 0;
+  virtual SymIndexId getSubTypeId() const = 0;
   virtual std::string getSymbolsFileName() const = 0;
-  virtual uint32_t getSymIndexId() const = 0;
+  virtual SymIndexId getSymIndexId() const = 0;
   virtual uint32_t getTargetOffset() const = 0;
   virtual uint32_t getTargetRelativeVirtualAddress() const = 0;
   virtual uint64_t getTargetVirtualAddress() const = 0;
@@ -128,18 +147,18 @@ public:
   virtual uint32_t getTextureSlot() const = 0;
   virtual uint32_t getTimeStamp() const = 0;
   virtual uint32_t getToken() const = 0;
-  virtual uint32_t getTypeId() const = 0;
+  virtual SymIndexId getTypeId() const = 0;
   virtual uint32_t getUavSlot() const = 0;
   virtual std::string getUndecoratedName() const = 0;
   virtual std::string getUndecoratedNameEx(PDB_UndnameFlags Flags) const = 0;
-  virtual uint32_t getUnmodifiedTypeId() const = 0;
-  virtual uint32_t getUpperBoundId() const = 0;
+  virtual SymIndexId getUnmodifiedTypeId() const = 0;
+  virtual SymIndexId getUpperBoundId() const = 0;
   virtual Variant getValue() const = 0;
   virtual uint32_t getVirtualBaseDispIndex() const = 0;
   virtual uint32_t getVirtualBaseOffset() const = 0;
   virtual std::unique_ptr<PDBSymbolTypeBuiltin>
   getVirtualBaseTableType() const = 0;
-  virtual uint32_t getVirtualTableShapeId() const = 0;
+  virtual SymIndexId getVirtualTableShapeId() const = 0;
   virtual PDB_DataKind getDataKind() const = 0;
   virtual PDB_SymType getSymTag() const = 0;
   virtual codeview::GUID getGuid() const = 0;
@@ -237,6 +256,8 @@ public:
   virtual std::string getUnused() const = 0;
 };
 
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
 } // namespace pdb
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
index 88ec517bc4a5..88fd02c0a345 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -30,7 +30,8 @@ public:
   virtual uint64_t getLoadAddress() const = 0;
   virtual bool setLoadAddress(uint64_t Address) = 0;
   virtual std::unique_ptr<PDBSymbolExe> getGlobalScope() = 0;
-  virtual std::unique_ptr<PDBSymbol> getSymbolById(uint32_t SymbolId) const = 0;
+  virtual std::unique_ptr<PDBSymbol>
+  getSymbolById(SymIndexId SymbolId) const = 0;
 
   virtual bool addressForVA(uint64_t VA, uint32_t &Section,
                             uint32_t &Offset) const = 0;
@@ -38,7 +39,7 @@ public:
                              uint32_t &Offset) const = 0;
 
   template <typename T>
-  std::unique_ptr<T> getConcreteSymbolById(uint32_t SymbolId) const {
+  std::unique_ptr<T> getConcreteSymbolById(SymIndexId SymbolId) const {
     return unique_dyn_cast_or_null<T>(getSymbolById(SymbolId));
   }
 
@@ -90,6 +91,9 @@ public:
 
   virtual std::unique_ptr<IPDBEnumSectionContribs>
   getSectionContribs() const = 0;
+
+  virtual std::unique_ptr<IPDBEnumFrameData>
+  getFrameData() const = 0;
 };
 } // namespace pdb
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index ce4d07917755..ac7f741afefa 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -51,6 +51,7 @@ public:
   void setObjFileName(StringRef Name);
   void setFirstSectionContrib(const SectionContrib &SC);
   void addSymbol(codeview::CVSymbol Symbol);
+  void addSymbolsInBulk(ArrayRef<uint8_t> BulkSymbols);
 
   void
   addDebugSubsection(std::shared_ptr<codeview::DebugSubsection> Subsection);
@@ -91,7 +92,7 @@ private:
   std::string ModuleName;
   std::string ObjFileName;
   std::vector<std::string> SourceFiles;
-  std::vector<codeview::CVSymbol> Symbols;
+  std::vector<ArrayRef<uint8_t>> Symbols;
 
   std::vector<std::unique_ptr<codeview::DebugSubsectionRecordBuilder>>
       C13Builders;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index 280615bdb507..a3ca607efbef 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -78,7 +78,7 @@ public:
 
   const DbiModuleList &modules() const;
 
-  FixedStreamArray<object::coff_section> getSectionHeaders();
+  FixedStreamArray<object::coff_section> getSectionHeaders() const;
 
   FixedStreamArray<object::FpoData> getFpoRecords();
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
index 51befcdac775..b538de576677 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
@@ -15,6 +15,7 @@
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Support/Error.h"
 
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
@@ -24,11 +25,15 @@
 #include "llvm/Support/Endian.h"
 
 namespace llvm {
+namespace codeview {
+struct FrameData;
+}
 namespace msf {
 class MSFBuilder;
 }
 namespace object {
 struct coff_section;
+struct FpoData;
 }
 namespace pdb {
 class DbiStream;
@@ -65,6 +70,8 @@ public:
   void setGlobalsStreamIndex(uint32_t Index);
   void setPublicsStreamIndex(uint32_t Index);
   void setSymbolRecordStreamIndex(uint32_t Index);
+  void addNewFpoData(const codeview::FrameData &FD);
+  void addOldFpoData(const object::FpoData &Fpo);
 
   Expected<DbiModuleDescriptorBuilder &> addModuleInfo(StringRef ModuleName);
   Error addModuleSourceFile(DbiModuleDescriptorBuilder &Module, StringRef File);
@@ -84,7 +91,8 @@ public:
 
 private:
   struct DebugStream {
-    ArrayRef<uint8_t> Data;
+    std::function<Error(BinaryStreamWriter &)> WriteFn;
+    uint32_t Size = 0;
     uint16_t StreamNumber = kInvalidStreamIndex;
   };
 
@@ -117,6 +125,9 @@ private:
 
   std::vector<std::unique_ptr<DbiModuleDescriptorBuilder>> ModiList;
 
+  Optional<codeview::DebugFrameDataSubsection> NewFpoData;
+  std::vector<object::FpoData> OldFpoData;
+
   StringMap<uint32_t> SourceFileNames;
 
   PDBStringTableBuilder ECNamesBuilder;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
index 1a4f89d607df..4c39ca762b5b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
@@ -61,7 +61,6 @@ public:
   void addGlobalSymbol(const codeview::ProcRefSym &Sym);
   void addGlobalSymbol(const codeview::DataSym &Sym);
   void addGlobalSymbol(const codeview::ConstantSym &Sym);
-  void addGlobalSymbol(const codeview::UDTSym &Sym);
   void addGlobalSymbol(const codeview::CVSymbol &Sym);
 
 private:
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
index dd04b5c5681d..7f84564ee988 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
@@ -10,18 +10,20 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
 
+#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/Support/BinaryStreamArray.h"
 #include "llvm/Support/Error.h"
-#include "llvm/ADT/iterator.h"
 
 namespace llvm {
 namespace pdb {
 class DbiStream;
 class PDBFile;
+class SymbolStream;
 
 /// Iterator over hash records producing symbol record offsets. Abstracts away
 /// the fact that symbol record offsets on disk are off-by-one.
@@ -50,8 +52,9 @@ class GSIHashTable {
 public:
   const GSIHashHeader *HashHdr;
   FixedStreamArray<PSHashRecord> HashRecords;
-  ArrayRef<uint8_t> HashBitmap;
+  FixedStreamArray<support::ulittle32_t> HashBitmap;
   FixedStreamArray<support::ulittle32_t> HashBuckets;
+  std::array<int32_t, IPHR_HASH + 1> BucketMap;
 
   Error read(BinaryStreamReader &Reader);
 
@@ -72,6 +75,9 @@ public:
   const GSIHashTable &getGlobalsTable() const { return GlobalsTable; }
   Error reload();
 
+  std::vector<std::pair<uint32_t, codeview::CVSymbol>>
+  findRecordsByName(StringRef Name, const SymbolStream &Symbols) const;
+
 private:
   GSIHashTable GlobalsTable;
   std::unique_ptr<msf::MappedBlockStream> Stream;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
index 419e8ada06f7..101127a355f5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
@@ -35,11 +35,18 @@ public:
   InfoStreamBuilder &operator=(const InfoStreamBuilder &) = delete;
 
   void setVersion(PdbRaw_ImplVer V);
+  void addFeature(PdbRaw_FeatureSig Sig);
+
+  // If this is true, the PDB contents are hashed and this hash is used as
+  // PDB GUID and as Signature. The age is always 1.
+  void setHashPDBContentsToGUID(bool B);
+
+  // These only have an effect if hashPDBContentsToGUID() is false.
   void setSignature(uint32_t S);
   void setAge(uint32_t A);
   void setGuid(codeview::GUID G);
-  void addFeature(PdbRaw_FeatureSig Sig);
 
+  bool hashPDBContentsToGUID() const { return HashPDBContentsToGUID; }
   uint32_t getAge() const { return Age; }
   codeview::GUID getGuid() const { return Guid; }
   Optional<uint32_t> getSignature() const { return Signature; }
@@ -60,6 +67,8 @@ private:
   Optional<uint32_t> Signature;
   codeview::GUID Guid;
 
+  bool HashPDBContentsToGUID = false;
+
   NamedStreamMap &NamedStreams;
 };
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index efc25e0559b9..8d590df288f3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -15,6 +15,7 @@
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/Support/BinaryStreamRef.h"
 #include "llvm/Support/Error.h"
 #include <cstdint>
@@ -43,6 +44,8 @@ public:
   symbols(bool *HadError) const;
 
   const codeview::CVSymbolArray &getSymbolArray() const { return SymbolArray; }
+  const codeview::CVSymbolArray
+  getSymbolArrayForScope(uint32_t ScopeBegin) const;
 
   BinarySubstreamRef getSymbolsSubstream() const;
   BinarySubstreamRef getC11LinesSubstream() const;
@@ -51,6 +54,8 @@ public:
 
   ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = delete;
 
+  codeview::CVSymbol readSymbolAtOffset(uint32_t Offset) const;
+
   iterator_range<DebugSubsectionIterator> subsections() const;
   codeview::DebugSubsectionArray getSubsectionsArray() const {
     return Subsections;
@@ -64,7 +69,7 @@ public:
   findChecksumsSubsection() const;
 
 private:
-  const DbiModuleDescriptor &Mod;
+  DbiModuleDescriptor Mod;
 
   uint32_t Signature;
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
index bd5c09e5ff76..3cd465503044 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
@@ -21,11 +21,12 @@ public:
   NativeCompilandSymbol(NativeSession &Session, SymIndexId SymbolId,
                         DbiModuleDescriptor MI);
 
-  std::unique_ptr<NativeRawSymbol> clone() const override;
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
 
   PDB_SymType getSymTag() const override;
   bool isEditAndContinueEnabled() const override;
-  uint32_t getLexicalParentId() const override;
+  SymIndexId getLexicalParentId() const override;
   std::string getLibraryName() const override;
   std::string getName() const override;
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
new file mode 100644
index 000000000000..4442a1ec41fb
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
@@ -0,0 +1,43 @@
+//==- NativeEnumGlobals.h - Native Global Enumerator impl --------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+namespace pdb {
+
+class NativeSession;
+
+class NativeEnumGlobals : public IPDBEnumChildren<PDBSymbol> {
+public:
+  NativeEnumGlobals(NativeSession &Session,
+                    std::vector<codeview::SymbolKind> Kinds);
+
+  uint32_t getChildCount() const override;
+  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
+  std::unique_ptr<PDBSymbol> getNext() override;
+  void reset() override;
+
+private:
+  std::vector<uint32_t> MatchOffsets;
+  uint32_t Index;
+  NativeSession &Session;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
index 6aa1460dbb4e..c268641a1008 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
@@ -11,28 +11,23 @@
 #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMMODULES_H
 
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
 namespace llvm {
 namespace pdb {
 
-class DbiModuleList;
 class NativeSession;
 
 class NativeEnumModules : public IPDBEnumChildren<PDBSymbol> {
 public:
-  NativeEnumModules(NativeSession &Session, const DbiModuleList &Modules,
-                    uint32_t Index = 0);
+  NativeEnumModules(NativeSession &Session, uint32_t Index = 0);
 
   uint32_t getChildCount() const override;
   std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
   std::unique_ptr<PDBSymbol> getNext() override;
   void reset() override;
-  NativeEnumModules *clone() const override;
 
 private:
   NativeSession &Session;
-  const DbiModuleList &Modules;
   uint32_t Index;
 };
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h
deleted file mode 100644
index 41b7b78b8d80..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===- NativeEnumSymbol.h - info about enum type ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOL_H
-#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOL_H
-
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-
-namespace llvm {
-namespace pdb {
-
-class NativeEnumSymbol : public NativeRawSymbol,
-                         public codeview::TypeVisitorCallbacks {
-public:
-  NativeEnumSymbol(NativeSession &Session, SymIndexId Id,
-                   const codeview::CVType &CV);
-  ~NativeEnumSymbol() override;
-
-  std::unique_ptr<NativeRawSymbol> clone() const override;
-
-  std::unique_ptr<IPDBEnumSymbols>
-  findChildren(PDB_SymType Type) const override;
-
-  Error visitKnownRecord(codeview::CVType &CVR,
-                         codeview::EnumRecord &Record) override;
-  Error visitKnownMember(codeview::CVMemberRecord &CVM,
-                         codeview::EnumeratorRecord &Record) override;
-
-  PDB_SymType getSymTag() const override;
-  uint32_t getClassParentId() const override;
-  uint32_t getUnmodifiedTypeId() const override;
-  bool hasConstructor() const override;
-  bool hasAssignmentOperator() const override;
-  bool hasCastOperator() const override;
-  uint64_t getLength() const override;
-  std::string getName() const override;
-  bool isNested() const override;
-  bool hasOverloadedOperator() const override;
-  bool isPacked() const override;
-  bool isScoped() const override;
-  uint32_t getTypeId() const override;
-
-protected:
-  codeview::CVType CV;
-  codeview::EnumRecord Record;
-};
-
-} // namespace pdb
-} // namespace llvm
-
-#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOL_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
index e0a5c8d9ad81..f8ac1655dc61 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
@@ -26,23 +26,20 @@ class NativeEnumTypes : public IPDBEnumChildren<PDBSymbol> {
 public:
   NativeEnumTypes(NativeSession &Session,
                   codeview::LazyRandomTypeCollection &TypeCollection,
-                  codeview::TypeLeafKind Kind);
+                  std::vector<codeview::TypeLeafKind> Kinds);
+
+  NativeEnumTypes(NativeSession &Session,
+                  std::vector<codeview::TypeIndex> Indices);
 
   uint32_t getChildCount() const override;
   std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
   std::unique_ptr<PDBSymbol> getNext() override;
   void reset() override;
-  NativeEnumTypes *clone() const override;
 
 private:
-  NativeEnumTypes(NativeSession &Session,
-                  const std::vector<codeview::TypeIndex> &Matches,
-                  codeview::TypeLeafKind Kind);
-
   std::vector<codeview::TypeIndex> Matches;
   uint32_t Index;
   NativeSession &Session;
-  codeview::TypeLeafKind Kind;
 };
 
 } // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
index 587c7ff2b092..f4030da1d026 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
@@ -16,11 +16,14 @@
 namespace llvm {
 namespace pdb {
 
+class DbiStream;
+
 class NativeExeSymbol : public NativeRawSymbol {
-public:
-  NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId);
+  // EXE symbol is the authority on the various symbol types.
+  DbiStream *Dbi = nullptr;
 
-  std::unique_ptr<NativeRawSymbol> clone() const override;
+public:
+  NativeExeSymbol(NativeSession &Session, SymIndexId Id);
 
   std::unique_ptr<IPDBEnumSymbols>
   findChildren(PDB_SymType Type) const override;
@@ -30,9 +33,6 @@ public:
   codeview::GUID getGuid() const override;
   bool hasCTypes() const override;
   bool hasPrivateSymbols() const override;
-
-private:
-  PDBFile &File;
 };
 
 } // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
index 5b70ecfa2056..6505a7d39573 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
@@ -19,15 +19,16 @@ namespace pdb {
 
 class NativeSession;
 
-typedef uint32_t SymIndexId;
-
 class NativeRawSymbol : public IPDBRawSymbol {
-public:
-  NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId);
+  friend class SymbolCache;
+  virtual void initialize() {}
 
-  virtual std::unique_ptr<NativeRawSymbol> clone() const = 0;
+public:
+  NativeRawSymbol(NativeSession &PDBSession, PDB_SymType Tag,
+                  SymIndexId SymbolId);
 
-  void dump(raw_ostream &OS, int Indent) const override;
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
 
   std::unique_ptr<IPDBEnumSymbols>
     findChildren(PDB_SymType Type) const override;
@@ -68,25 +69,25 @@ public:
   uint32_t getAddressOffset() const override;
   uint32_t getAddressSection() const override;
   uint32_t getAge() const override;
-  uint32_t getArrayIndexTypeId() const override;
+  SymIndexId getArrayIndexTypeId() const override;
   uint32_t getBaseDataOffset() const override;
   uint32_t getBaseDataSlot() const override;
-  uint32_t getBaseSymbolId() const override;
+  SymIndexId getBaseSymbolId() const override;
   PDB_BuiltinType getBuiltinType() const override;
   uint32_t getBitPosition() const override;
   PDB_CallingConv getCallingConvention() const override;
-  uint32_t getClassParentId() const override;
+  SymIndexId getClassParentId() const override;
   std::string getCompilerName() const override;
   uint32_t getCount() const override;
   uint32_t getCountLiveRanges() const override;
   PDB_Lang getLanguage() const override;
-  uint32_t getLexicalParentId() const override;
+  SymIndexId getLexicalParentId() const override;
   std::string getLibraryName() const override;
   uint32_t getLiveRangeStartAddressOffset() const override;
   uint32_t getLiveRangeStartAddressSection() const override;
   uint32_t getLiveRangeStartRelativeVirtualAddress() const override;
   codeview::RegisterId getLocalBasePointerRegisterId() const override;
-  uint32_t getLowerBoundId() const override;
+  SymIndexId getLowerBoundId() const override;
   uint32_t getMemorySpaceKind() const override;
   std::string getName() const override;
   uint32_t getNumberOfAcceleratorPointerTags() const override;
@@ -96,7 +97,7 @@ public:
   uint32_t getNumberOfRows() const override;
   std::string getObjectFileName() const override;
   uint32_t getOemId() const override;
-  uint32_t getOemSymbolId() const override;
+  SymIndexId getOemSymbolId() const override;
   uint32_t getOffsetInUdt() const override;
   PDB_Cpu getPlatform() const override;
   uint32_t getRank() const override;
@@ -110,9 +111,9 @@ public:
   std::string getSourceFileName() const override;
   std::unique_ptr<IPDBLineNumber> getSrcLineOnTypeDefn() const override;
   uint32_t getStride() const override;
-  uint32_t getSubTypeId() const override;
+  SymIndexId getSubTypeId() const override;
   std::string getSymbolsFileName() const override;
-  uint32_t getSymIndexId() const override;
+  SymIndexId getSymIndexId() const override;
   uint32_t getTargetOffset() const override;
   uint32_t getTargetRelativeVirtualAddress() const override;
   uint64_t getTargetVirtualAddress() const override;
@@ -120,16 +121,16 @@ public:
   uint32_t getTextureSlot() const override;
   uint32_t getTimeStamp() const override;
   uint32_t getToken() const override;
-  uint32_t getTypeId() const override;
+  SymIndexId getTypeId() const override;
   uint32_t getUavSlot() const override;
   std::string getUndecoratedName() const override;
   std::string getUndecoratedNameEx(PDB_UndnameFlags Flags) const override;
-  uint32_t getUnmodifiedTypeId() const override;
-  uint32_t getUpperBoundId() const override;
+  SymIndexId getUnmodifiedTypeId() const override;
+  SymIndexId getUpperBoundId() const override;
   Variant getValue() const override;
   uint32_t getVirtualBaseDispIndex() const override;
   uint32_t getVirtualBaseOffset() const override;
-  uint32_t getVirtualTableShapeId() const override;
+  SymIndexId getVirtualTableShapeId() const override;
   std::unique_ptr<PDBSymbolTypeBuiltin>
   getVirtualBaseTableType() const override;
   PDB_DataKind getDataKind() const override;
@@ -230,6 +231,7 @@ public:
 
 protected:
   NativeSession &Session;
+  PDB_SymType Tag;
   SymIndexId SymbolId;
 };
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index aff7ef2f8f21..4878e47d3121 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -15,9 +15,8 @@
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
-#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
 
@@ -25,6 +24,7 @@ namespace llvm {
 class MemoryBuffer;
 namespace pdb {
 class PDBFile;
+class NativeExeSymbol;
 
 class NativeSession : public IPDBSession {
 public:
@@ -37,21 +37,10 @@ public:
   static Error createFromExe(StringRef Path,
                              std::unique_ptr<IPDBSession> &Session);
 
-  std::unique_ptr<PDBSymbolCompiland>
-  createCompilandSymbol(DbiModuleDescriptor MI);
-
-  std::unique_ptr<PDBSymbolTypeEnum>
-  createEnumSymbol(codeview::TypeIndex Index);
-
-  std::unique_ptr<IPDBEnumSymbols>
-  createTypeEnumerator(codeview::TypeLeafKind Kind);
-
-  SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI);
-
   uint64_t getLoadAddress() const override;
   bool setLoadAddress(uint64_t Address) override;
   std::unique_ptr<PDBSymbolExe> getGlobalScope() override;
-  std::unique_ptr<PDBSymbol> getSymbolById(uint32_t SymbolId) const override;
+  std::unique_ptr<PDBSymbol> getSymbolById(SymIndexId SymbolId) const override;
 
   bool addressForVA(uint64_t VA, uint32_t &Section,
                     uint32_t &Offset) const override;
@@ -104,14 +93,23 @@ public:
 
   std::unique_ptr<IPDBEnumSectionContribs> getSectionContribs() const override;
 
+  std::unique_ptr<IPDBEnumFrameData> getFrameData() const override;
+
   PDBFile &getPDBFile() { return *Pdb; }
   const PDBFile &getPDBFile() const { return *Pdb; }
 
+  NativeExeSymbol &getNativeGlobalScope() const;
+  SymbolCache &getSymbolCache() { return Cache; }
+  const SymbolCache &getSymbolCache() const { return Cache; }
+
 private:
+  void initializeExeSymbol();
+
   std::unique_ptr<PDBFile> Pdb;
   std::unique_ptr<BumpPtrAllocator> Allocator;
-  std::vector<std::unique_ptr<NativeRawSymbol>> SymbolCache;
-  DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId;
+
+  SymbolCache Cache;
+  SymIndexId ExeSymbol = 0;
 };
 } // namespace pdb
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
new file mode 100644
index 000000000000..acc5eb8ff2c2
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
@@ -0,0 +1,51 @@
+//===- NativeSymbolEnumerator.h - info about enumerator values --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+class NativeTypeEnum;
+
+class NativeSymbolEnumerator : public NativeRawSymbol {
+public:
+  NativeSymbolEnumerator(NativeSession &Session, SymIndexId Id,
+                         const NativeTypeEnum &Parent,
+                         codeview::EnumeratorRecord Record);
+
+  ~NativeSymbolEnumerator() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  SymIndexId getClassParentId() const override;
+  SymIndexId getLexicalParentId() const override;
+  std::string getName() const override;
+  SymIndexId getTypeId() const override;
+  PDB_DataKind getDataKind() const override;
+  PDB_LocType getLocationType() const override;
+  bool isConstType() const override;
+  bool isVolatileType() const override;
+  bool isUnalignedType() const override;
+  Variant getValue() const override;
+
+protected:
+  const NativeTypeEnum &Parent;
+  codeview::EnumeratorRecord Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
new file mode 100644
index 000000000000..10e68e6df450
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
@@ -0,0 +1,50 @@
+//===- NativeTypeArray.h ------------------------------------------ C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEARRAY_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEARRAY_H
+
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeSession;
+
+class NativeTypeArray : public NativeRawSymbol {
+public:
+  NativeTypeArray(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+                  codeview::ArrayRecord Record);
+  ~NativeTypeArray() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  SymIndexId getArrayIndexTypeId() const override;
+
+  bool isConstType() const override;
+  bool isUnalignedType() const override;
+  bool isVolatileType() const override;
+
+  uint32_t getCount() const override;
+  SymIndexId getTypeId() const override;
+  uint64_t getLength() const override;
+
+protected:
+  codeview::ArrayRecord Record;
+  codeview::TypeIndex Index;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
index 4f532c6e3829..725dfb89222f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
@@ -1,4 +1,4 @@
-//===- NativeBuiltinSymbol.h -------------------------------------- C++ -*-===//
+//===- NativeTypeBuiltin.h ---------------------------------------- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H
-#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEBUILTIN_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEBUILTIN_H
 
 #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
 
@@ -19,15 +19,15 @@ namespace pdb {
 
 class NativeSession;
 
-class NativeBuiltinSymbol : public NativeRawSymbol {
+class NativeTypeBuiltin : public NativeRawSymbol {
 public:
-  NativeBuiltinSymbol(NativeSession &PDBSession, SymIndexId Id,
-                      PDB_BuiltinType T, uint64_t L);
-  ~NativeBuiltinSymbol() override;
+  NativeTypeBuiltin(NativeSession &PDBSession, SymIndexId Id,
+                    codeview::ModifierOptions Mods, PDB_BuiltinType T,
+                    uint64_t L);
+  ~NativeTypeBuiltin() override;
 
-  virtual std::unique_ptr<NativeRawSymbol> clone() const override;
-
-  void dump(raw_ostream &OS, int Indent) const override;
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
 
   PDB_SymType getSymTag() const override;
 
@@ -39,6 +39,7 @@ public:
 
 protected:
   NativeSession &Session;
+  codeview::ModifierOptions Mods;
   PDB_BuiltinType Type;
   uint64_t Length;
 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
new file mode 100644
index 000000000000..a5cbefc18111
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
@@ -0,0 +1,75 @@
+//===- NativeTypeEnum.h - info about enum type ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeBuiltin;
+
+class NativeTypeEnum : public NativeRawSymbol {
+public:
+  NativeTypeEnum(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+                 codeview::EnumRecord Record);
+
+  NativeTypeEnum(NativeSession &Session, SymIndexId Id,
+                 NativeTypeEnum &UnmodifiedType,
+                 codeview::ModifierRecord Modifier);
+  ~NativeTypeEnum() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  std::unique_ptr<IPDBEnumSymbols>
+  findChildren(PDB_SymType Type) const override;
+
+  PDB_BuiltinType getBuiltinType() const override;
+  PDB_SymType getSymTag() const override;
+  SymIndexId getUnmodifiedTypeId() const override;
+  bool hasConstructor() const override;
+  bool hasAssignmentOperator() const override;
+  bool hasCastOperator() const override;
+  uint64_t getLength() const override;
+  std::string getName() const override;
+  bool isConstType() const override;
+  bool isVolatileType() const override;
+  bool isUnalignedType() const override;
+  bool isNested() const override;
+  bool hasOverloadedOperator() const override;
+  bool hasNestedTypes() const override;
+  bool isIntrinsic() const override;
+  bool isPacked() const override;
+  bool isScoped() const override;
+  SymIndexId getTypeId() const override;
+  bool isRefUdt() const override;
+  bool isValueUdt() const override;
+  bool isInterfaceUdt() const override;
+
+  const NativeTypeBuiltin &getUnderlyingBuiltinType() const;
+  const codeview::EnumRecord &getEnumRecord() const { return *Record; }
+
+protected:
+  codeview::TypeIndex Index;
+  Optional<codeview::EnumRecord> Record;
+  NativeTypeEnum *UnmodifiedType = nullptr;
+  Optional<codeview::ModifierRecord> Modifiers;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
new file mode 100644
index 000000000000..1b1b87f6581f
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -0,0 +1,74 @@
+//===- NativeTypeFunctionSig.h - info about function signature ---*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeUDT;
+
+class NativeTypeFunctionSig : public NativeRawSymbol {
+protected:
+  void initialize() override;
+
+public:
+  NativeTypeFunctionSig(NativeSession &Session, SymIndexId Id,
+                        codeview::TypeIndex TI, codeview::ProcedureRecord Proc);
+
+  NativeTypeFunctionSig(NativeSession &Session, SymIndexId Id,
+                        codeview::TypeIndex TI,
+                        codeview::MemberFunctionRecord MemberFunc);
+
+  ~NativeTypeFunctionSig() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  std::unique_ptr<IPDBEnumSymbols>
+  findChildren(PDB_SymType Type) const override;
+
+  SymIndexId getClassParentId() const override;
+  PDB_CallingConv getCallingConvention() const override;
+  uint32_t getCount() const override;
+  SymIndexId getTypeId() const override;
+  int32_t getThisAdjust() const override;
+  bool hasConstructor() const override;
+  bool isConstType() const override;
+  bool isConstructorVirtualBase() const override;
+  bool isCxxReturnUdt() const override;
+  bool isUnalignedType() const override;
+  bool isVolatileType() const override;
+
+private:
+  void initializeArgList(codeview::TypeIndex ArgListTI);
+
+  union {
+    codeview::MemberFunctionRecord MemberFunc;
+    codeview::ProcedureRecord Proc;
+  };
+
+  SymIndexId ClassParentId = 0;
+  codeview::TypeIndex Index;
+  codeview::ArgListRecord ArgList;
+  bool IsMemberFunction = false;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+\ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
new file mode 100644
index 000000000000..bcb7431fecf1
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
@@ -0,0 +1,61 @@
+//===- NativeTypePointer.h - info about pointer type -------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypePointer : public NativeRawSymbol {
+public:
+  // Create a pointer record for a simple type.
+  NativeTypePointer(NativeSession &Session, SymIndexId Id,
+                    codeview::TypeIndex TI);
+
+  // Create a pointer record for a non-simple type.
+  NativeTypePointer(NativeSession &Session, SymIndexId Id,
+                    codeview::TypeIndex TI, codeview::PointerRecord PR);
+  ~NativeTypePointer() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  SymIndexId getClassParentId() const override;
+  bool isConstType() const override;
+  uint64_t getLength() const override;
+  bool isReference() const override;
+  bool isRValueReference() const override;
+  bool isPointerToDataMember() const override;
+  bool isPointerToMemberFunction() const override;
+  SymIndexId getTypeId() const override;
+  bool isRestrictedType() const override;
+  bool isVolatileType() const override;
+  bool isUnalignedType() const override;
+
+  bool isSingleInheritance() const override;
+  bool isMultipleInheritance() const override;
+  bool isVirtualInheritance() const override;
+
+protected:
+  bool isMemberPointer() const;
+  codeview::TypeIndex TI;
+  Optional<codeview::PointerRecord> Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+\ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
new file mode 100644
index 000000000000..06eb6fcf3764
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
@@ -0,0 +1,42 @@
+//===- NativeTypeTypedef.h - info about typedef ------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeTypedef : public NativeRawSymbol {
+public:
+  // Create a pointer record for a non-simple type.
+  NativeTypeTypedef(NativeSession &Session, SymIndexId Id,
+                    codeview::UDTSym Typedef);
+
+  ~NativeTypeTypedef() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  std::string getName() const override;
+  SymIndexId getTypeId() const override;
+
+protected:
+  codeview::UDTSym Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
+\ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
new file mode 100644
index 000000000000..84821d8731be
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
@@ -0,0 +1,74 @@
+//===- NativeTypeUDT.h - info about class/struct type ------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeUDT : public NativeRawSymbol {
+public:
+  NativeTypeUDT(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+                codeview::ClassRecord Class);
+
+  NativeTypeUDT(NativeSession &Session, SymIndexId Id, codeview::TypeIndex TI,
+                codeview::UnionRecord Union);
+
+  NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+                NativeTypeUDT &UnmodifiedType,
+                codeview::ModifierRecord Modifier);
+
+  ~NativeTypeUDT() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  std::string getName() const override;
+  SymIndexId getLexicalParentId() const override;
+  SymIndexId getUnmodifiedTypeId() const override;
+  SymIndexId getVirtualTableShapeId() const override;
+  uint64_t getLength() const override;
+  PDB_UdtType getUdtKind() const override;
+  bool hasConstructor() const override;
+  bool isConstType() const override;
+  bool hasAssignmentOperator() const override;
+  bool hasCastOperator() const override;
+  bool hasNestedTypes() const override;
+  bool hasOverloadedOperator() const override;
+  bool isInterfaceUdt() const override;
+  bool isIntrinsic() const override;
+  bool isNested() const override;
+  bool isPacked() const override;
+  bool isRefUdt() const override;
+  bool isScoped() const override;
+  bool isValueUdt() const override;
+  bool isUnalignedType() const override;
+  bool isVolatileType() const override;
+
+protected:
+  codeview::TypeIndex Index;
+
+  Optional<codeview::ClassRecord> Class;
+  Optional<codeview::UnionRecord> Union;
+  NativeTypeUDT *UnmodifiedType = nullptr;
+  codeview::TagRecord *Tag = nullptr;
+  Optional<codeview::ModifierRecord> Modifiers;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H
+\ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
new file mode 100644
index 000000000000..a996f34ef859
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -0,0 +1,46 @@
+//===- NativeTypeVTShape.h - info about virtual table shape ------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+namespace llvm {
+namespace pdb {
+
+class NativeTypeVTShape : public NativeRawSymbol {
+public:
+  // Create a pointer record for a non-simple type.
+  NativeTypeVTShape(NativeSession &Session, SymIndexId Id,
+                    codeview::TypeIndex TI, codeview::VFTableShapeRecord SR);
+
+  ~NativeTypeVTShape() override;
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override;
+
+  bool isConstType() const override;
+  bool isVolatileType() const override;
+  bool isUnalignedType() const override;
+  uint32_t getCount() const override;
+
+protected:
+  codeview::TypeIndex TI;
+  codeview::VFTableShapeRecord Record;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
+\ No newline at end of file
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 7f9c4cf9fa83..37458749a8d8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -53,7 +53,9 @@ public:
   PDBStringTableBuilder &getStringTableBuilder();
   GSIStreamBuilder &getGsiBuilder();
 
-  Error commit(StringRef Filename);
+  // If HashPDBContentsToGUID is true on the InfoStreamBuilder, Guid is filled
+  // with the computed PDB GUID on return.
+  Error commit(StringRef Filename, codeview::GUID *Guid);
 
   Expected<uint32_t> getNamedStreamIndex(StringRef Name) const;
   Error addNamedStream(StringRef Name, StringRef Data);
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h
index 3624a7682e38..97d11b4f20d1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawError.h
@@ -31,23 +31,29 @@ enum class raw_error_code {
   stream_too_long,
   invalid_tpi_hash,
 };
+} // namespace pdb
+} // namespace llvm
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::pdb::raw_error_code> : std::true_type {};
+} // namespace std
+
+namespace llvm {
+namespace pdb {
+const std::error_category &RawErrCategory();
+
+inline std::error_code make_error_code(raw_error_code E) {
+  return std::error_code(static_cast<int>(E), RawErrCategory());
+}
 
 /// Base class for errors originating when parsing raw PDB files
-class RawError : public ErrorInfo<RawError> {
+class RawError : public ErrorInfo<RawError, StringError> {
 public:
+  using ErrorInfo<RawError, StringError>::ErrorInfo; // inherit constructors
+  RawError(const Twine &S) : ErrorInfo(S, raw_error_code::unspecified) {}
   static char ID;
-  RawError(raw_error_code C);
-  RawError(const std::string &Context);
-  RawError(raw_error_code C, const std::string &Context);
-
-  void log(raw_ostream &OS) const override;
-  const std::string &getErrorMessage() const;
-  std::error_code convertToErrorCode() const override;
-
-private:
-  std::string ErrMsg;
-  raw_error_code Code;
 };
-}
-}
+} // namespace pdb
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
index 19f592d562e4..8f6d6611c032 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@@ -343,7 +343,6 @@ struct SrcHeaderBlockEntry {
   char Reserved[8];
 };
 
-constexpr int I = sizeof(SrcHeaderBlockEntry);
 static_assert(sizeof(SrcHeaderBlockEntry) == 40, "Incorrect struct size!");
 
 } // namespace pdb
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
new file mode 100644
index 000000000000..08e1d41e6ee9
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -0,0 +1,148 @@
+//==- SymbolCache.h - Cache of native symbols and ids ------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/Support/Allocator.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace pdb {
+class DbiStream;
+class PDBFile;
+
+class SymbolCache {
+  NativeSession &Session;
+  DbiStream *Dbi = nullptr;
+
+  /// Cache of all stable symbols, indexed by SymIndexId.  Just because a
+  /// symbol has been parsed does not imply that it will be stable and have
+  /// an Id.  Id allocation is an implementation, with the only guarantee
+  /// being that once an Id is allocated, the symbol can be assumed to be
+  /// cached.
+  std::vector<std::unique_ptr<NativeRawSymbol>> Cache;
+
+  /// For type records from the TPI stream which have been paresd and cached,
+  /// stores a mapping to SymIndexId of the cached symbol.
+  DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId;
+
+  /// For field list members which have been parsed and cached, stores a mapping
+  /// from (IndexOfClass, MemberIndex) to the corresponding SymIndexId of the
+  /// cached symbol.
+  DenseMap<std::pair<codeview::TypeIndex, uint32_t>, SymIndexId>
+      FieldListMembersToSymbolId;
+
+  /// List of SymIndexIds for each compiland, indexed by compiland index as they
+  /// appear in the PDB file.
+  std::vector<SymIndexId> Compilands;
+
+  /// Map from global symbol offset to SymIndexId.
+  DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId;
+
+  SymIndexId createSymbolPlaceholder() {
+    SymIndexId Id = Cache.size();
+    Cache.push_back(nullptr);
+    return Id;
+  }
+
+  template <typename ConcreteSymbolT, typename CVRecordT, typename... Args>
+  SymIndexId createSymbolForType(codeview::TypeIndex TI, codeview::CVType CVT,
+                                 Args &&... ConstructorArgs) {
+    CVRecordT Record;
+    if (auto EC =
+            codeview::TypeDeserializer::deserializeAs<CVRecordT>(CVT, Record)) {
+      consumeError(std::move(EC));
+      return 0;
+    }
+
+    return createSymbol<ConcreteSymbolT>(
+        TI, std::move(Record), std::forward<Args>(ConstructorArgs)...);
+  }
+
+  SymIndexId createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
+                                         codeview::CVType CVT);
+
+  SymIndexId createSimpleType(codeview::TypeIndex TI,
+                              codeview::ModifierOptions Mods);
+
+public:
+  SymbolCache(NativeSession &Session, DbiStream *Dbi);
+
+  template <typename ConcreteSymbolT, typename... Args>
+  SymIndexId createSymbol(Args &&... ConstructorArgs) {
+    SymIndexId Id = Cache.size();
+
+    // Initial construction must not access the cache, since it must be done
+    // atomically.
+    auto Result = llvm::make_unique<ConcreteSymbolT>(
+        Session, Id, std::forward<Args>(ConstructorArgs)...);
+    Result->SymbolId = Id;
+
+    NativeRawSymbol *NRS = static_cast<NativeRawSymbol *>(Result.get());
+    Cache.push_back(std::move(Result));
+
+    // After the item is in the cache, we can do further initialization which
+    // is then allowed to access the cache.
+    NRS->initialize();
+    return Id;
+  }
+
+  std::unique_ptr<IPDBEnumSymbols>
+  createTypeEnumerator(codeview::TypeLeafKind Kind);
+
+  std::unique_ptr<IPDBEnumSymbols>
+  createTypeEnumerator(std::vector<codeview::TypeLeafKind> Kinds);
+
+  std::unique_ptr<IPDBEnumSymbols>
+  createGlobalsEnumerator(codeview::SymbolKind Kind);
+
+  SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI);
+
+  template <typename ConcreteSymbolT, typename... Args>
+  SymIndexId getOrCreateFieldListMember(codeview::TypeIndex FieldListTI,
+                                        uint32_t Index,
+                                        Args &&... ConstructorArgs) {
+    SymIndexId SymId = Cache.size();
+    std::pair<codeview::TypeIndex, uint32_t> Key{FieldListTI, Index};
+    auto Result = FieldListMembersToSymbolId.try_emplace(Key, SymId);
+    if (Result.second)
+      SymId =
+          createSymbol<ConcreteSymbolT>(std::forward<Args>(ConstructorArgs)...);
+    else
+      SymId = Result.first->second;
+    return SymId;
+  }
+
+  SymIndexId getOrCreateGlobalSymbolByOffset(uint32_t Offset);
+
+  std::unique_ptr<PDBSymbolCompiland> getOrCreateCompiland(uint32_t Index);
+  uint32_t getNumCompilands() const;
+
+  std::unique_ptr<PDBSymbol> getSymbolById(SymIndexId SymbolId) const;
+
+  NativeRawSymbol &getNativeSymbolById(SymIndexId SymbolId) const;
+
+  template <typename ConcreteT>
+  ConcreteT &getNativeSymbolById(SymIndexId SymbolId) const {
+    return static_cast<ConcreteT &>(getNativeSymbolById(SymbolId));
+  }
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
index c1edec7a26fe..c2996ccf1825 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
@@ -18,6 +18,54 @@ namespace pdb {
 
 Expected<uint32_t> hashTypeRecord(const llvm::codeview::CVType &Type);
 
+struct TagRecordHash {
+  explicit TagRecordHash(codeview::ClassRecord CR, uint32_t Full,
+                         uint32_t Forward)
+      : FullRecordHash(Full), ForwardDeclHash(Forward), Class(std::move(CR)) {
+    State = 0;
+  }
+
+  explicit TagRecordHash(codeview::EnumRecord ER, uint32_t Full,
+                         uint32_t Forward)
+      : FullRecordHash(Full), ForwardDeclHash(Forward), Enum(std::move(ER)) {
+    State = 1;
+  }
+
+  explicit TagRecordHash(codeview::UnionRecord UR, uint32_t Full,
+                         uint32_t Forward)
+      : FullRecordHash(Full), ForwardDeclHash(Forward), Union(std::move(UR)) {
+    State = 2;
+  }
+
+  uint32_t FullRecordHash;
+  uint32_t ForwardDeclHash;
+
+  codeview::TagRecord &getRecord() {
+    switch (State) {
+    case 0:
+      return Class;
+    case 1:
+      return Enum;
+    case 2:
+      return Union;
+    }
+    llvm_unreachable("unreachable!");
+  }
+
+private:
+  union {
+    codeview::ClassRecord Class;
+    codeview::EnumRecord Enum;
+    codeview::UnionRecord Union;
+  };
+
+  uint8_t State = 0;
+};
+
+/// Given a CVType referring to a class, structure, union, or enum, compute
+/// the hash of its forward decl and full decl.
+Expected<TagRecordHash> hashTagRecord(const codeview::CVType &Type);
+
 } // end namespace pdb
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index b77939929ecf..b76576a7a263 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -58,10 +58,21 @@ public:
 
   codeview::LazyRandomTypeCollection &typeCollection() { return *Types; }
 
+  Expected<codeview::TypeIndex>
+  findFullDeclForForwardRef(codeview::TypeIndex ForwardRefTI) const;
+
+  std::vector<codeview::TypeIndex> findRecordsByName(StringRef Name) const;
+
+  codeview::CVType getType(codeview::TypeIndex Index);
+
   BinarySubstreamRef getTypeRecordsSubstream() const;
 
   Error commit();
 
+  void buildHashMap();
+
+  bool supportsTypeLookup() const;
+
 private:
   PDBFile &Pdb;
   std::unique_ptr<msf::MappedBlockStream> Stream;
@@ -77,6 +88,8 @@ private:
   FixedStreamArray<codeview::TypeIndexOffset> TypeIndexOffsets;
   HashTable<support::ulittle32_t> HashAdjusters;
 
+  std::vector<std::vector<codeview::TypeIndex>> HashMap;
+
   const TpiStreamHeader *Header;
 };
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
index 3c9a19801f89..aaec71aa8c90 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -12,6 +12,8 @@
 
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/Support/raw_ostream.h"
+
 #include <unordered_map>
 
 namespace llvm {
@@ -24,6 +26,7 @@ using TagStats = std::unordered_map<PDB_SymType, int>;
 
 raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv);
+raw_ostream &operator<<(raw_ostream &OS, const PDB_BuiltinType &Type);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_DataKind &Data);
 raw_ostream &operator<<(raw_ostream &OS, const codeview::RegisterId &Reg);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_LocType &Loc);
@@ -41,6 +44,15 @@ raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
 raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
 raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
 
+
+template <typename T>
+void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {
+  OS << "\n";
+  OS.indent(Indent);
+  OS << Name << ": " << Value;
+}
+
+
 } // end namespace pdb
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
index 04373463212b..3a74f7c3aace 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -49,9 +49,22 @@ class IPDBRawSymbol;
 class IPDBSession;
 
 #define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue)                             \
+private:                                                                       \
+  using PDBSymbol::PDBSymbol;                                                  \
+  friend class PDBSymbol;                                                      \
+                                                                               \
+public:                                                                        \
   static const PDB_SymType Tag = TagValue;                                     \
   static bool classof(const PDBSymbol *S) { return S->getSymTag() == Tag; }
 
+#define DECLARE_PDB_SYMBOL_CUSTOM_TYPE(Condition)                              \
+private:                                                                       \
+  using PDBSymbol::PDBSymbol;                                                  \
+  friend class PDBSymbol;                                                      \
+                                                                               \
+public:                                                                        \
+  static bool classof(const PDBSymbol *S) { return Condition; }
+
 /// PDBSymbol defines the base of the inheritance hierarchy for concrete symbol
 /// types (e.g. functions, executables, vtables, etc).  All concrete symbol
 /// types inherit from PDBSymbol and expose the exact set of methods that are
@@ -59,14 +72,33 @@ class IPDBSession;
 /// reference "Lexical and Class Hierarchy of Symbol Types":
 /// https://msdn.microsoft.com/en-us/library/370hs6k4.aspx
 class PDBSymbol {
+  static std::unique_ptr<PDBSymbol> createSymbol(const IPDBSession &PDBSession,
+                                                 PDB_SymType Tag);
+
 protected:
-  PDBSymbol(const IPDBSession &PDBSession,
-            std::unique_ptr<IPDBRawSymbol> Symbol);
-  PDBSymbol(PDBSymbol &Symbol);
+  explicit PDBSymbol(const IPDBSession &PDBSession);
+  PDBSymbol(PDBSymbol &&Other);
 
 public:
   static std::unique_ptr<PDBSymbol>
-  create(const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol);
+  create(const IPDBSession &PDBSession,
+         std::unique_ptr<IPDBRawSymbol> RawSymbol);
+  static std::unique_ptr<PDBSymbol> create(const IPDBSession &PDBSession,
+                                           IPDBRawSymbol &RawSymbol);
+
+  template <typename ConcreteT>
+  static std::unique_ptr<ConcreteT>
+  createAs(const IPDBSession &PDBSession,
+           std::unique_ptr<IPDBRawSymbol> RawSymbol) {
+    std::unique_ptr<PDBSymbol> S = create(PDBSession, std::move(RawSymbol));
+    return unique_dyn_cast_or_null<ConcreteT>(std::move(S));
+  }
+  template <typename ConcreteT>
+  static std::unique_ptr<ConcreteT> createAs(const IPDBSession &PDBSession,
+                                             IPDBRawSymbol &RawSymbol) {
+    std::unique_ptr<PDBSymbol> S = create(PDBSession, RawSymbol);
+    return unique_dyn_cast_or_null<ConcreteT>(std::move(S));
+  }
 
   virtual ~PDBSymbol();
 
@@ -80,7 +112,8 @@ public:
   /// normally goes on the right side of the symbol.
   virtual void dumpRight(PDBSymDumper &Dumper) const {}
 
-  void defaultDump(raw_ostream &OS, int Indent) const;
+  void defaultDump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowFlags,
+                   PdbSymbolIdField RecurseFlags) const;
   void dumpProperties() const;
   void dumpChildStats() const;
 
@@ -94,8 +127,6 @@ public:
     return Enumerator->getNext();
   }
 
-  std::unique_ptr<PDBSymbol> clone() const;
-
   template <typename T>
   std::unique_ptr<ConcreteSymbolEnumerator<T>> findAllChildren() const {
     auto BaseIter = RawSymbol->findChildren(T::Tag);
@@ -131,7 +162,8 @@ protected:
   }
 
   const IPDBSession &Session;
-  std::unique_ptr<IPDBRawSymbol> RawSymbol;
+  std::unique_ptr<IPDBRawSymbol> OwnedRawSymbol;
+  IPDBRawSymbol *RawSymbol = nullptr;
 };
 
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index 3169146e5b12..ef00df15cb0a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -18,12 +18,9 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolAnnotation : public PDBSymbol {
-public:
-  PDBSymbolAnnotation(const IPDBSession &PDBSession,
-                      std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Annotation)
 
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index d81da1eaa023..2cf9c72a8886 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolBlock : public PDBSymbol {
-public:
-  PDBSymbolBlock(const IPDBSession &PDBSession,
-                 std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Block)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
index 9549089c7eb4..04dbd962ebd4 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
@@ -20,12 +20,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolCompiland : public PDBSymbol {
-public:
-  PDBSymbolCompiland(const IPDBSession &PDBSession,
-                     std::unique_ptr<IPDBRawSymbol> CompilandSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Compiland)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(isEditAndContinueEnabled)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
index dba50c42cf81..3d651a464d94 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolCompilandDetails : public PDBSymbol {
-public:
-  PDBSymbolCompilandDetails(const IPDBSession &PDBSession,
-                            std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CompilandDetails)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   void getFrontEndVersion(VersionInfo &Version) const {
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
index 7868f0459086..ffc408314d9a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
@@ -18,12 +18,8 @@ namespace llvm {
 class raw_ostream;
 namespace pdb {
 class PDBSymbolCompilandEnv : public PDBSymbol {
-public:
-  PDBSymbolCompilandEnv(const IPDBSession &PDBSession,
-                        std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CompilandEnv)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
index 54f089404262..c29e4c31d3f3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
@@ -23,12 +23,8 @@ namespace pdb {
 /// fit anywhere else in the lexical hierarchy.
 /// https://msdn.microsoft.com/en-us/library/d88sf09h.aspx
 class PDBSymbolCustom : public PDBSymbol {
-public:
-  PDBSymbolCustom(const IPDBSession &PDBSession,
-                  std::unique_ptr<IPDBRawSymbol> CustomSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Custom)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   void getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes);
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
index 76b14bf17784..217e1e976e6b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
@@ -21,12 +21,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolData : public PDBSymbol {
-public:
-  PDBSymbolData(const IPDBSession &PDBSession,
-                std::unique_ptr<IPDBRawSymbol> DataSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Data)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAccess)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
index 2c2d74665040..366d0cf4777f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
@@ -20,12 +20,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolExe : public PDBSymbol {
-public:
-  PDBSymbolExe(const IPDBSession &PDBSession,
-               std::unique_ptr<IPDBRawSymbol> ExeSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Exe)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAge)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
index 05d585d25763..129e557c7f25 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
@@ -22,18 +22,14 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolFunc : public PDBSymbol {
+  DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function)
 public:
-  PDBSymbolFunc(const IPDBSession &PDBSession,
-                std::unique_ptr<IPDBRawSymbol> FuncSymbol);
-
   void dump(PDBSymDumper &Dumper) const override;
 
   bool isDestructor() const;
 
   std::unique_ptr<IPDBEnumChildren<PDBSymbolData>> getArguments() const;
 
-  DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function)
-
   FORWARD_SYMBOL_METHOD(getAccess)
   FORWARD_SYMBOL_METHOD(getAddressOffset)
   FORWARD_SYMBOL_METHOD(getAddressSection)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
index 3341bd9b30fd..18db8a50fd1b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
@@ -20,12 +20,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolFuncDebugEnd : public PDBSymbol {
-public:
-  PDBSymbolFuncDebugEnd(const IPDBSession &PDBSession,
-                        std::unique_ptr<IPDBRawSymbol> FuncDebugEndSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FuncDebugEnd)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
index 6729838597c8..83d82f0cbcc5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolFuncDebugStart : public PDBSymbol {
-public:
-  PDBSymbolFuncDebugStart(const IPDBSession &PDBSession,
-                          std::unique_ptr<IPDBRawSymbol> FuncDebugStartSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FuncDebugStart)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
index c2b1c28c929e..8b2617fcd757 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolLabel : public PDBSymbol {
-public:
-  PDBSymbolLabel(const IPDBSession &PDBSession,
-                 std::unique_ptr<IPDBRawSymbol> LabelSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Label)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
index c9e6ee67c575..9def3edb469a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolPublicSymbol : public PDBSymbol {
-public:
-  PDBSymbolPublicSymbol(const IPDBSession &PDBSession,
-                        std::unique_ptr<IPDBRawSymbol> PublicSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::PublicSymbol)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAddressOffset)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
index 614fad86caa8..7bb0555362db 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolThunk : public PDBSymbol {
-public:
-  PDBSymbolThunk(const IPDBSession &PDBSession,
-                 std::unique_ptr<IPDBRawSymbol> ThunkSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Thunk)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAccess)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
index 39b7d3b300ea..488f668bdc10 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeArray : public PDBSymbol {
-public:
-  PDBSymbolTypeArray(const IPDBSession &PDBSession,
-                     std::unique_ptr<IPDBRawSymbol> ArrayTypeSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::ArrayType)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
   void dumpRight(PDBSymDumper &Dumper) const override;
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
index d607a3d81170..550deedd7504 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
@@ -22,12 +22,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeBaseClass : public PDBSymbol {
-public:
-  PDBSymbolTypeBaseClass(const IPDBSession &PDBSession,
-                         std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BaseClass)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getAccess)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
index 5b1863c42a04..e07e88802b8f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeBuiltin : public PDBSymbol {
-public:
-  PDBSymbolTypeBuiltin(const IPDBSession &PDBSession,
-                       std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BuiltinType)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getBuiltinType)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
index 199b3f8b304e..0d8979c9c5c5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeCustom : public PDBSymbol {
-public:
-  PDBSymbolTypeCustom(const IPDBSession &PDBSession,
-                      std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CustomType)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getOemId)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
index e635eb5bbf6f..58292a63501f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeDimension : public PDBSymbol {
-public:
-  PDBSymbolTypeDimension(const IPDBSession &PDBSession,
-                         std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Dimension)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getLowerBoundId)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
index ddbe7e58f183..f463047bb5b5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
@@ -21,12 +21,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeEnum : public PDBSymbol {
-public:
-  PDBSymbolTypeEnum(const IPDBSession &PDBSession,
-                    std::unique_ptr<IPDBRawSymbol> EnumTypeSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Enum)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getBuiltinType)
@@ -38,6 +34,7 @@ public:
   FORWARD_SYMBOL_METHOD(hasNestedTypes)
   FORWARD_SYMBOL_METHOD(getLength)
   FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
+  FORWARD_SYMBOL_ID_METHOD(getUnmodifiedType)
   FORWARD_SYMBOL_METHOD(getName)
   FORWARD_SYMBOL_METHOD(getSrcLineOnTypeDefn)
   FORWARD_SYMBOL_METHOD(isNested)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
index 24c13128111f..5b940b0737af 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeFriend : public PDBSymbol {
-public:
-  PDBSymbolTypeFriend(const IPDBSession &PDBSession,
-                      std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Friend)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
index 3855999c473f..074cb418fc82 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeFunctionArg : public PDBSymbol {
-public:
-  PDBSymbolTypeFunctionArg(const IPDBSession &PDBSession,
-                           std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FunctionArg)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
index abd4cf5effa2..dfdf436197c3 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeFunctionSig : public PDBSymbol {
-public:
-  PDBSymbolTypeFunctionSig(const IPDBSession &PDBSession,
-                           std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::FunctionSig)
-
+public:
   std::unique_ptr<IPDBEnumSymbols> getArguments() const;
 
   void dump(PDBSymDumper &Dumper) const override;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
index 31cf5363dde1..d716abd640c6 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeManaged : public PDBSymbol {
-public:
-  PDBSymbolTypeManaged(const IPDBSession &PDBSession,
-                       std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::ManagedType)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getName)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
index 7612ebac31dd..300d6722fc4d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
@@ -19,16 +19,13 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypePointer : public PDBSymbol {
-public:
-  PDBSymbolTypePointer(const IPDBSession &PDBSession,
-                       std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::PointerType)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
   void dumpRight(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(isConstType)
+  FORWARD_SYMBOL_ID_METHOD(getClassParent)
   FORWARD_SYMBOL_METHOD(getLength)
   FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
   FORWARD_SYMBOL_METHOD(isReference)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
index 16c1d1b88c6d..d6e2a36486d5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeTypedef : public PDBSymbol {
-public:
-  PDBSymbolTypeTypedef(const IPDBSession &PDBSession,
-                       std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Typedef)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(getBuiltinType)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
index e259b6dca3d5..937dd6c87221 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
@@ -23,17 +23,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeUDT : public PDBSymbol {
-public:
-  PDBSymbolTypeUDT(const IPDBSession &PDBSession,
-                   std::unique_ptr<IPDBRawSymbol> UDTSymbol);
-
-  std::unique_ptr<PDBSymbolTypeUDT> clone() const {
-    return getSession().getConcreteSymbolById<PDBSymbolTypeUDT>(
-        getSymIndexId());
-  }
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UDT)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
index e270c2b7eb95..6efce4bbd686 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeVTable : public PDBSymbol {
-public:
-  PDBSymbolTypeVTable(const IPDBSession &PDBSession,
-                      std::unique_ptr<IPDBRawSymbol> VtblSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::VTable)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_ID_METHOD(getClassParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
index 8acaabea5bb8..8949052b0c0f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
@@ -19,12 +19,8 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolTypeVTableShape : public PDBSymbol {
-public:
-  PDBSymbolTypeVTableShape(const IPDBSession &PDBSession,
-                           std::unique_ptr<IPDBRawSymbol> VtblShapeSymbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::VTableShape)
-
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_METHOD(isConstType)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
index de43e47badbd..e935ac6ce0dc 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
@@ -18,16 +18,11 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolUnknown : public PDBSymbol {
-public:
-  PDBSymbolUnknown(const IPDBSession &PDBSession,
-                   std::unique_ptr<IPDBRawSymbol> UnknownSymbol);
+  DECLARE_PDB_SYMBOL_CUSTOM_TYPE(S->getSymTag() == PDB_SymType::None ||
+                                 S->getSymTag() >= PDB_SymType::Max)
 
+public:
   void dump(PDBSymDumper &Dumper) const override;
-
-  static bool classof(const PDBSymbol *S) {
-    return (S->getSymTag() == PDB_SymType::None ||
-            S->getSymTag() >= PDB_SymType::Max);
-  }
 };
 
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
index 70fbd5b84c34..4e8c99fc8d89 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
@@ -19,12 +19,9 @@ class raw_ostream;
 namespace pdb {
 
 class PDBSymbolUsingNamespace : public PDBSymbol {
-public:
-  PDBSymbolUsingNamespace(const IPDBSession &PDBSession,
-                          std::unique_ptr<IPDBRawSymbol> Symbol);
-
   DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UsingNamespace)
 
+public:
   void dump(PDBSymDumper &Dumper) const override;
 
   FORWARD_SYMBOL_ID_METHOD(getLexicalParent)
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index da6cb1d26771..917f3ed73910 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -12,6 +12,7 @@
 
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include <cctype>
 #include <cstddef>
@@ -22,6 +23,8 @@
 namespace llvm {
 namespace pdb {
 
+typedef uint32_t SymIndexId;
+
 class IPDBDataStream;
 class IPDBInjectedSource;
 class IPDBLineNumber;
@@ -69,6 +72,7 @@ using IPDBEnumLineNumbers = IPDBEnumChildren<IPDBLineNumber>;
 using IPDBEnumTables = IPDBEnumChildren<IPDBTable>;
 using IPDBEnumInjectedSources = IPDBEnumChildren<IPDBInjectedSource>;
 using IPDBEnumSectionContribs = IPDBEnumChildren<IPDBSectionContrib>;
+using IPDBEnumFrameData = IPDBEnumChildren<IPDBFrameData>;
 
 /// Specifies which PDB reader implementation is to be used.  Only a value
 /// of PDB_ReaderType::DIA is currently supported, but Native is in the works.
@@ -208,6 +212,18 @@ enum class PDB_SymType {
   CustomType,
   ManagedType,
   Dimension,
+  CallSite,
+  InlineSite,
+  BaseInterface,
+  VectorType,
+  MatrixType,
+  HLSLType,
+  Caller,
+  Callee,
+  Export,
+  HeapAllocationSite,
+  CoffGroup,
+  Inlinee,
   Max
 };
 
@@ -334,6 +350,36 @@ enum PDB_VariantType {
 struct Variant {
   Variant() = default;
 
+  explicit Variant(bool V) : Type(PDB_VariantType::Bool) { Value.Bool = V; }
+  explicit Variant(int8_t V) : Type(PDB_VariantType::Int8) { Value.Int8 = V; }
+  explicit Variant(int16_t V) : Type(PDB_VariantType::Int16) {
+    Value.Int16 = V;
+  }
+  explicit Variant(int32_t V) : Type(PDB_VariantType::Int32) {
+    Value.Int32 = V;
+  }
+  explicit Variant(int64_t V) : Type(PDB_VariantType::Int64) {
+    Value.Int64 = V;
+  }
+  explicit Variant(float V) : Type(PDB_VariantType::Single) {
+    Value.Single = V;
+  }
+  explicit Variant(double V) : Type(PDB_VariantType::Double) {
+    Value.Double = V;
+  }
+  explicit Variant(uint8_t V) : Type(PDB_VariantType::UInt8) {
+    Value.UInt8 = V;
+  }
+  explicit Variant(uint16_t V) : Type(PDB_VariantType::UInt16) {
+    Value.UInt16 = V;
+  }
+  explicit Variant(uint32_t V) : Type(PDB_VariantType::UInt32) {
+    Value.UInt32 = V;
+  }
+  explicit Variant(uint64_t V) : Type(PDB_VariantType::UInt64) {
+    Value.UInt64 = V;
+  }
+
   Variant(const Variant &Other) {
     *this = Other;
   }
diff --git a/contrib/llvm/lib/Demangle/Compiler.h b/contrib/llvm/include/llvm/Demangle/Compiler.h
index 248d6e3a7faa..248d6e3a7faa 100644
--- a/contrib/llvm/lib/Demangle/Compiler.h
+++ b/contrib/llvm/include/llvm/Demangle/Compiler.h
diff --git a/contrib/llvm/include/llvm/Demangle/Demangle.h b/contrib/llvm/include/llvm/Demangle/Demangle.h
index df7753f23b87..4c9dc9569e18 100644
--- a/contrib/llvm/include/llvm/Demangle/Demangle.h
+++ b/contrib/llvm/include/llvm/Demangle/Demangle.h
@@ -7,6 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_DEMANGLE_DEMANGLE_H
+#define LLVM_DEMANGLE_DEMANGLE_H
+
 #include <cstddef>
 
 namespace llvm {
@@ -27,8 +30,11 @@ enum : int {
 
 char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
                       int *status);
+
+
+enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 };
 char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n,
-                        int *status);
+                        int *status, MSDemangleFlags Flags = MSDF_None);
 
 /// "Partial" demangler. This supports demangling a string into an AST
 /// (typically an intermediate stage in itaniumDemangle) and querying certain
@@ -86,3 +92,5 @@ private:
   void *Context;
 };
 } // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h b/contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h
new file mode 100644
index 000000000000..0b9187f30a5a
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -0,0 +1,5184 @@
+//===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+
+// FIXME: (possibly) incomplete list of features that clang mangles that this
+// file does not yet support:
+//   - C++ modules TS
+
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
+
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <numeric>
+#include <utility>
+
+#define FOR_EACH_NODE_KIND(X) \
+    X(NodeArrayNode) \
+    X(DotSuffix) \
+    X(VendorExtQualType) \
+    X(QualType) \
+    X(ConversionOperatorType) \
+    X(PostfixQualifiedType) \
+    X(ElaboratedTypeSpefType) \
+    X(NameType) \
+    X(AbiTagAttr) \
+    X(EnableIfAttr) \
+    X(ObjCProtoName) \
+    X(PointerType) \
+    X(ReferenceType) \
+    X(PointerToMemberType) \
+    X(ArrayType) \
+    X(FunctionType) \
+    X(NoexceptSpec) \
+    X(DynamicExceptionSpec) \
+    X(FunctionEncoding) \
+    X(LiteralOperator) \
+    X(SpecialName) \
+    X(CtorVtableSpecialName) \
+    X(QualifiedName) \
+    X(NestedName) \
+    X(LocalName) \
+    X(VectorType) \
+    X(PixelVectorType) \
+    X(ParameterPack) \
+    X(TemplateArgumentPack) \
+    X(ParameterPackExpansion) \
+    X(TemplateArgs) \
+    X(ForwardTemplateReference) \
+    X(NameWithTemplateArgs) \
+    X(GlobalQualifiedName) \
+    X(StdQualifiedName) \
+    X(ExpandedSpecialSubstitution) \
+    X(SpecialSubstitution) \
+    X(CtorDtorName) \
+    X(DtorName) \
+    X(UnnamedTypeName) \
+    X(ClosureTypeName) \
+    X(StructuredBindingName) \
+    X(BinaryExpr) \
+    X(ArraySubscriptExpr) \
+    X(PostfixExpr) \
+    X(ConditionalExpr) \
+    X(MemberExpr) \
+    X(EnclosingExpr) \
+    X(CastExpr) \
+    X(SizeofParamPackExpr) \
+    X(CallExpr) \
+    X(NewExpr) \
+    X(DeleteExpr) \
+    X(PrefixExpr) \
+    X(FunctionParam) \
+    X(ConversionExpr) \
+    X(InitListExpr) \
+    X(FoldExpr) \
+    X(ThrowExpr) \
+    X(BoolExpr) \
+    X(IntegerCastExpr) \
+    X(IntegerLiteral) \
+    X(FloatLiteral) \
+    X(DoubleLiteral) \
+    X(LongDoubleLiteral) \
+    X(BracedExpr) \
+    X(BracedRangeExpr)
+
+namespace llvm {
+namespace itanium_demangle {
+// Base class of all AST nodes. The AST is built by the parser, then is
+// traversed by the printLeft/Right functions to produce a demangled string.
+class Node {
+public:
+  enum Kind : unsigned char {
+#define ENUMERATOR(NodeKind) K ## NodeKind,
+    FOR_EACH_NODE_KIND(ENUMERATOR)
+#undef ENUMERATOR
+  };
+
+  /// Three-way bool to track a cached value. Unknown is possible if this node
+  /// has an unexpanded parameter pack below it that may affect this cache.
+  enum class Cache : unsigned char { Yes, No, Unknown, };
+
+private:
+  Kind K;
+
+  // FIXME: Make these protected.
+public:
+  /// Tracks if this node has a component on its right side, in which case we
+  /// need to call printRight.
+  Cache RHSComponentCache;
+
+  /// Track if this node is a (possibly qualified) array type. This can affect
+  /// how we format the output string.
+  Cache ArrayCache;
+
+  /// Track if this node is a (possibly qualified) function type. This can
+  /// affect how we format the output string.
+  Cache FunctionCache;
+
+public:
+  Node(Kind K_, Cache RHSComponentCache_ = Cache::No,
+       Cache ArrayCache_ = Cache::No, Cache FunctionCache_ = Cache::No)
+      : K(K_), RHSComponentCache(RHSComponentCache_), ArrayCache(ArrayCache_),
+        FunctionCache(FunctionCache_) {}
+
+  /// Visit the most-derived object corresponding to this object.
+  template<typename Fn> void visit(Fn F) const;
+
+  // The following function is provided by all derived classes:
+  //
+  // Call F with arguments that, when passed to the constructor of this node,
+  // would construct an equivalent node.
+  //template<typename Fn> void match(Fn F) const;
+
+  bool hasRHSComponent(OutputStream &S) const {
+    if (RHSComponentCache != Cache::Unknown)
+      return RHSComponentCache == Cache::Yes;
+    return hasRHSComponentSlow(S);
+  }
+
+  bool hasArray(OutputStream &S) const {
+    if (ArrayCache != Cache::Unknown)
+      return ArrayCache == Cache::Yes;
+    return hasArraySlow(S);
+  }
+
+  bool hasFunction(OutputStream &S) const {
+    if (FunctionCache != Cache::Unknown)
+      return FunctionCache == Cache::Yes;
+    return hasFunctionSlow(S);
+  }
+
+  Kind getKind() const { return K; }
+
+  virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
+  virtual bool hasArraySlow(OutputStream &) const { return false; }
+  virtual bool hasFunctionSlow(OutputStream &) const { return false; }
+
+  // Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
+  // get at a node that actually represents some concrete syntax.
+  virtual const Node *getSyntaxNode(OutputStream &) const {
+    return this;
+  }
+
+  void print(OutputStream &S) const {
+    printLeft(S);
+    if (RHSComponentCache != Cache::No)
+      printRight(S);
+  }
+
+  // Print the "left" side of this Node into OutputStream.
+  virtual void printLeft(OutputStream &) const = 0;
+
+  // Print the "right". This distinction is necessary to represent C++ types
+  // that appear on the RHS of their subtype, such as arrays or functions.
+  // Since most types don't have such a component, provide a default
+  // implementation.
+  virtual void printRight(OutputStream &) const {}
+
+  virtual StringView getBaseName() const { return StringView(); }
+
+  // Silence compiler warnings, this dtor will never be called.
+  virtual ~Node() = default;
+
+#ifndef NDEBUG
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
+class NodeArray {
+  Node **Elements;
+  size_t NumElements;
+
+public:
+  NodeArray() : Elements(nullptr), NumElements(0) {}
+  NodeArray(Node **Elements_, size_t NumElements_)
+      : Elements(Elements_), NumElements(NumElements_) {}
+
+  bool empty() const { return NumElements == 0; }
+  size_t size() const { return NumElements; }
+
+  Node **begin() const { return Elements; }
+  Node **end() const { return Elements + NumElements; }
+
+  Node *operator[](size_t Idx) const { return Elements[Idx]; }
+
+  void printWithComma(OutputStream &S) const {
+    bool FirstElement = true;
+    for (size_t Idx = 0; Idx != NumElements; ++Idx) {
+      size_t BeforeComma = S.getCurrentPosition();
+      if (!FirstElement)
+        S += ", ";
+      size_t AfterComma = S.getCurrentPosition();
+      Elements[Idx]->print(S);
+
+      // Elements[Idx] is an empty parameter pack expansion, we should erase the
+      // comma we just printed.
+      if (AfterComma == S.getCurrentPosition()) {
+        S.setCurrentPosition(BeforeComma);
+        continue;
+      }
+
+      FirstElement = false;
+    }
+  }
+};
+
+struct NodeArrayNode : Node {
+  NodeArray Array;
+  NodeArrayNode(NodeArray Array_) : Node(KNodeArrayNode), Array(Array_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Array); }
+
+  void printLeft(OutputStream &S) const override {
+    Array.printWithComma(S);
+  }
+};
+
+class DotSuffix final : public Node {
+  const Node *Prefix;
+  const StringView Suffix;
+
+public:
+  DotSuffix(const Node *Prefix_, StringView Suffix_)
+      : Node(KDotSuffix), Prefix(Prefix_), Suffix(Suffix_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); }
+
+  void printLeft(OutputStream &s) const override {
+    Prefix->print(s);
+    s += " (";
+    s += Suffix;
+    s += ")";
+  }
+};
+
+class VendorExtQualType final : public Node {
+  const Node *Ty;
+  StringView Ext;
+
+public:
+  VendorExtQualType(const Node *Ty_, StringView Ext_)
+      : Node(KVendorExtQualType), Ty(Ty_), Ext(Ext_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Ty, Ext); }
+
+  void printLeft(OutputStream &S) const override {
+    Ty->print(S);
+    S += " ";
+    S += Ext;
+  }
+};
+
+enum FunctionRefQual : unsigned char {
+  FrefQualNone,
+  FrefQualLValue,
+  FrefQualRValue,
+};
+
+enum Qualifiers {
+  QualNone = 0,
+  QualConst = 0x1,
+  QualVolatile = 0x2,
+  QualRestrict = 0x4,
+};
+
+inline Qualifiers operator|=(Qualifiers &Q1, Qualifiers Q2) {
+  return Q1 = static_cast<Qualifiers>(Q1 | Q2);
+}
+
+class QualType : public Node {
+protected:
+  const Qualifiers Quals;
+  const Node *Child;
+
+  void printQuals(OutputStream &S) const {
+    if (Quals & QualConst)
+      S += " const";
+    if (Quals & QualVolatile)
+      S += " volatile";
+    if (Quals & QualRestrict)
+      S += " restrict";
+  }
+
+public:
+  QualType(const Node *Child_, Qualifiers Quals_)
+      : Node(KQualType, Child_->RHSComponentCache,
+             Child_->ArrayCache, Child_->FunctionCache),
+        Quals(Quals_), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Child, Quals); }
+
+  bool hasRHSComponentSlow(OutputStream &S) const override {
+    return Child->hasRHSComponent(S);
+  }
+  bool hasArraySlow(OutputStream &S) const override {
+    return Child->hasArray(S);
+  }
+  bool hasFunctionSlow(OutputStream &S) const override {
+    return Child->hasFunction(S);
+  }
+
+  void printLeft(OutputStream &S) const override {
+    Child->printLeft(S);
+    printQuals(S);
+  }
+
+  void printRight(OutputStream &S) const override { Child->printRight(S); }
+};
+
+class ConversionOperatorType final : public Node {
+  const Node *Ty;
+
+public:
+  ConversionOperatorType(const Node *Ty_)
+      : Node(KConversionOperatorType), Ty(Ty_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Ty); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "operator ";
+    Ty->print(S);
+  }
+};
+
+class PostfixQualifiedType final : public Node {
+  const Node *Ty;
+  const StringView Postfix;
+
+public:
+  PostfixQualifiedType(Node *Ty_, StringView Postfix_)
+      : Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
+
+  void printLeft(OutputStream &s) const override {
+    Ty->printLeft(s);
+    s += Postfix;
+  }
+};
+
+class NameType final : public Node {
+  const StringView Name;
+
+public:
+  NameType(StringView Name_) : Node(KNameType), Name(Name_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Name); }
+
+  StringView getName() const { return Name; }
+  StringView getBaseName() const override { return Name; }
+
+  void printLeft(OutputStream &s) const override { s += Name; }
+};
+
+class ElaboratedTypeSpefType : public Node {
+  StringView Kind;
+  Node *Child;
+public:
+  ElaboratedTypeSpefType(StringView Kind_, Node *Child_)
+      : Node(KElaboratedTypeSpefType), Kind(Kind_), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Kind, Child); }
+
+  void printLeft(OutputStream &S) const override {
+    S += Kind;
+    S += ' ';
+    Child->print(S);
+  }
+};
+
+struct AbiTagAttr : Node {
+  Node *Base;
+  StringView Tag;
+
+  AbiTagAttr(Node* Base_, StringView Tag_)
+      : Node(KAbiTagAttr, Base_->RHSComponentCache,
+             Base_->ArrayCache, Base_->FunctionCache),
+        Base(Base_), Tag(Tag_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Base, Tag); }
+
+  void printLeft(OutputStream &S) const override {
+    Base->printLeft(S);
+    S += "[abi:";
+    S += Tag;
+    S += "]";
+  }
+};
+
+class EnableIfAttr : public Node {
+  NodeArray Conditions;
+public:
+  EnableIfAttr(NodeArray Conditions_)
+      : Node(KEnableIfAttr), Conditions(Conditions_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Conditions); }
+
+  void printLeft(OutputStream &S) const override {
+    S += " [enable_if:";
+    Conditions.printWithComma(S);
+    S += ']';
+  }
+};
+
+class ObjCProtoName : public Node {
+  const Node *Ty;
+  StringView Protocol;
+
+  friend class PointerType;
+
+public:
+  ObjCProtoName(const Node *Ty_, StringView Protocol_)
+      : Node(KObjCProtoName), Ty(Ty_), Protocol(Protocol_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Ty, Protocol); }
+
+  bool isObjCObject() const {
+    return Ty->getKind() == KNameType &&
+           static_cast<const NameType *>(Ty)->getName() == "objc_object";
+  }
+
+  void printLeft(OutputStream &S) const override {
+    Ty->print(S);
+    S += "<";
+    S += Protocol;
+    S += ">";
+  }
+};
+
+class PointerType final : public Node {
+  const Node *Pointee;
+
+public:
+  PointerType(const Node *Pointee_)
+      : Node(KPointerType, Pointee_->RHSComponentCache),
+        Pointee(Pointee_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Pointee); }
+
+  bool hasRHSComponentSlow(OutputStream &S) const override {
+    return Pointee->hasRHSComponent(S);
+  }
+
+  void printLeft(OutputStream &s) const override {
+    // We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
+    if (Pointee->getKind() != KObjCProtoName ||
+        !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
+      Pointee->printLeft(s);
+      if (Pointee->hasArray(s))
+        s += " ";
+      if (Pointee->hasArray(s) || Pointee->hasFunction(s))
+        s += "(";
+      s += "*";
+    } else {
+      const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
+      s += "id<";
+      s += objcProto->Protocol;
+      s += ">";
+    }
+  }
+
+  void printRight(OutputStream &s) const override {
+    if (Pointee->getKind() != KObjCProtoName ||
+        !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
+      if (Pointee->hasArray(s) || Pointee->hasFunction(s))
+        s += ")";
+      Pointee->printRight(s);
+    }
+  }
+};
+
+enum class ReferenceKind {
+  LValue,
+  RValue,
+};
+
+// Represents either a LValue or an RValue reference type.
+class ReferenceType : public Node {
+  const Node *Pointee;
+  ReferenceKind RK;
+
+  mutable bool Printing = false;
+
+  // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
+  // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
+  // other combination collapses to a lvalue ref.
+  std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
+    auto SoFar = std::make_pair(RK, Pointee);
+    for (;;) {
+      const Node *SN = SoFar.second->getSyntaxNode(S);
+      if (SN->getKind() != KReferenceType)
+        break;
+      auto *RT = static_cast<const ReferenceType *>(SN);
+      SoFar.second = RT->Pointee;
+      SoFar.first = std::min(SoFar.first, RT->RK);
+    }
+    return SoFar;
+  }
+
+public:
+  ReferenceType(const Node *Pointee_, ReferenceKind RK_)
+      : Node(KReferenceType, Pointee_->RHSComponentCache),
+        Pointee(Pointee_), RK(RK_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Pointee, RK); }
+
+  bool hasRHSComponentSlow(OutputStream &S) const override {
+    return Pointee->hasRHSComponent(S);
+  }
+
+  void printLeft(OutputStream &s) const override {
+    if (Printing)
+      return;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+    Collapsed.second->printLeft(s);
+    if (Collapsed.second->hasArray(s))
+      s += " ";
+    if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
+      s += "(";
+
+    s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+  }
+  void printRight(OutputStream &s) const override {
+    if (Printing)
+      return;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
+    if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
+      s += ")";
+    Collapsed.second->printRight(s);
+  }
+};
+
+class PointerToMemberType final : public Node {
+  const Node *ClassType;
+  const Node *MemberType;
+
+public:
+  PointerToMemberType(const Node *ClassType_, const Node *MemberType_)
+      : Node(KPointerToMemberType, MemberType_->RHSComponentCache),
+        ClassType(ClassType_), MemberType(MemberType_) {}
+
+  template<typename Fn> void match(Fn F) const { F(ClassType, MemberType); }
+
+  bool hasRHSComponentSlow(OutputStream &S) const override {
+    return MemberType->hasRHSComponent(S);
+  }
+
+  void printLeft(OutputStream &s) const override {
+    MemberType->printLeft(s);
+    if (MemberType->hasArray(s) || MemberType->hasFunction(s))
+      s += "(";
+    else
+      s += " ";
+    ClassType->print(s);
+    s += "::*";
+  }
+
+  void printRight(OutputStream &s) const override {
+    if (MemberType->hasArray(s) || MemberType->hasFunction(s))
+      s += ")";
+    MemberType->printRight(s);
+  }
+};
+
+class NodeOrString {
+  const void *First;
+  const void *Second;
+
+public:
+  /* implicit */ NodeOrString(StringView Str) {
+    const char *FirstChar = Str.begin();
+    const char *SecondChar = Str.end();
+    if (SecondChar == nullptr) {
+      assert(FirstChar == SecondChar);
+      ++FirstChar, ++SecondChar;
+    }
+    First = static_cast<const void *>(FirstChar);
+    Second = static_cast<const void *>(SecondChar);
+  }
+
+  /* implicit */ NodeOrString(Node *N)
+      : First(static_cast<const void *>(N)), Second(nullptr) {}
+  NodeOrString() : First(nullptr), Second(nullptr) {}
+
+  bool isString() const { return Second && First; }
+  bool isNode() const { return First && !Second; }
+  bool isEmpty() const { return !First && !Second; }
+
+  StringView asString() const {
+    assert(isString());
+    return StringView(static_cast<const char *>(First),
+                      static_cast<const char *>(Second));
+  }
+
+  const Node *asNode() const {
+    assert(isNode());
+    return static_cast<const Node *>(First);
+  }
+};
+
+class ArrayType final : public Node {
+  const Node *Base;
+  NodeOrString Dimension;
+
+public:
+  ArrayType(const Node *Base_, NodeOrString Dimension_)
+      : Node(KArrayType,
+             /*RHSComponentCache=*/Cache::Yes,
+             /*ArrayCache=*/Cache::Yes),
+        Base(Base_), Dimension(Dimension_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Base, Dimension); }
+
+  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
+  bool hasArraySlow(OutputStream &) const override { return true; }
+
+  void printLeft(OutputStream &S) const override { Base->printLeft(S); }
+
+  void printRight(OutputStream &S) const override {
+    if (S.back() != ']')
+      S += " ";
+    S += "[";
+    if (Dimension.isString())
+      S += Dimension.asString();
+    else if (Dimension.isNode())
+      Dimension.asNode()->print(S);
+    S += "]";
+    Base->printRight(S);
+  }
+};
+
+class FunctionType final : public Node {
+  const Node *Ret;
+  NodeArray Params;
+  Qualifiers CVQuals;
+  FunctionRefQual RefQual;
+  const Node *ExceptionSpec;
+
+public:
+  FunctionType(const Node *Ret_, NodeArray Params_, Qualifiers CVQuals_,
+               FunctionRefQual RefQual_, const Node *ExceptionSpec_)
+      : Node(KFunctionType,
+             /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
+             /*FunctionCache=*/Cache::Yes),
+        Ret(Ret_), Params(Params_), CVQuals(CVQuals_), RefQual(RefQual_),
+        ExceptionSpec(ExceptionSpec_) {}
+
+  template<typename Fn> void match(Fn F) const {
+    F(Ret, Params, CVQuals, RefQual, ExceptionSpec);
+  }
+
+  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
+  bool hasFunctionSlow(OutputStream &) const override { return true; }
+
+  // Handle C++'s ... quirky decl grammar by using the left & right
+  // distinction. Consider:
+  //   int (*f(float))(char) {}
+  // f is a function that takes a float and returns a pointer to a function
+  // that takes a char and returns an int. If we're trying to print f, start
+  // by printing out the return types's left, then print our parameters, then
+  // finally print right of the return type.
+  void printLeft(OutputStream &S) const override {
+    Ret->printLeft(S);
+    S += " ";
+  }
+
+  void printRight(OutputStream &S) const override {
+    S += "(";
+    Params.printWithComma(S);
+    S += ")";
+    Ret->printRight(S);
+
+    if (CVQuals & QualConst)
+      S += " const";
+    if (CVQuals & QualVolatile)
+      S += " volatile";
+    if (CVQuals & QualRestrict)
+      S += " restrict";
+
+    if (RefQual == FrefQualLValue)
+      S += " &";
+    else if (RefQual == FrefQualRValue)
+      S += " &&";
+
+    if (ExceptionSpec != nullptr) {
+      S += ' ';
+      ExceptionSpec->print(S);
+    }
+  }
+};
+
+class NoexceptSpec : public Node {
+  const Node *E;
+public:
+  NoexceptSpec(const Node *E_) : Node(KNoexceptSpec), E(E_) {}
+
+  template<typename Fn> void match(Fn F) const { F(E); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "noexcept(";
+    E->print(S);
+    S += ")";
+  }
+};
+
+class DynamicExceptionSpec : public Node {
+  NodeArray Types;
+public:
+  DynamicExceptionSpec(NodeArray Types_)
+      : Node(KDynamicExceptionSpec), Types(Types_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Types); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "throw(";
+    Types.printWithComma(S);
+    S += ')';
+  }
+};
+
+class FunctionEncoding final : public Node {
+  const Node *Ret;
+  const Node *Name;
+  NodeArray Params;
+  const Node *Attrs;
+  Qualifiers CVQuals;
+  FunctionRefQual RefQual;
+
+public:
+  FunctionEncoding(const Node *Ret_, const Node *Name_, NodeArray Params_,
+                   const Node *Attrs_, Qualifiers CVQuals_,
+                   FunctionRefQual RefQual_)
+      : Node(KFunctionEncoding,
+             /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
+             /*FunctionCache=*/Cache::Yes),
+        Ret(Ret_), Name(Name_), Params(Params_), Attrs(Attrs_),
+        CVQuals(CVQuals_), RefQual(RefQual_) {}
+
+  template<typename Fn> void match(Fn F) const {
+    F(Ret, Name, Params, Attrs, CVQuals, RefQual);
+  }
+
+  Qualifiers getCVQuals() const { return CVQuals; }
+  FunctionRefQual getRefQual() const { return RefQual; }
+  NodeArray getParams() const { return Params; }
+  const Node *getReturnType() const { return Ret; }
+
+  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
+  bool hasFunctionSlow(OutputStream &) const override { return true; }
+
+  const Node *getName() const { return Name; }
+
+  void printLeft(OutputStream &S) const override {
+    if (Ret) {
+      Ret->printLeft(S);
+      if (!Ret->hasRHSComponent(S))
+        S += " ";
+    }
+    Name->print(S);
+  }
+
+  void printRight(OutputStream &S) const override {
+    S += "(";
+    Params.printWithComma(S);
+    S += ")";
+    if (Ret)
+      Ret->printRight(S);
+
+    if (CVQuals & QualConst)
+      S += " const";
+    if (CVQuals & QualVolatile)
+      S += " volatile";
+    if (CVQuals & QualRestrict)
+      S += " restrict";
+
+    if (RefQual == FrefQualLValue)
+      S += " &";
+    else if (RefQual == FrefQualRValue)
+      S += " &&";
+
+    if (Attrs != nullptr)
+      Attrs->print(S);
+  }
+};
+
+class LiteralOperator : public Node {
+  const Node *OpName;
+
+public:
+  LiteralOperator(const Node *OpName_)
+      : Node(KLiteralOperator), OpName(OpName_) {}
+
+  template<typename Fn> void match(Fn F) const { F(OpName); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "operator\"\" ";
+    OpName->print(S);
+  }
+};
+
+class SpecialName final : public Node {
+  const StringView Special;
+  const Node *Child;
+
+public:
+  SpecialName(StringView Special_, const Node *Child_)
+      : Node(KSpecialName), Special(Special_), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Special, Child); }
+
+  void printLeft(OutputStream &S) const override {
+    S += Special;
+    Child->print(S);
+  }
+};
+
+class CtorVtableSpecialName final : public Node {
+  const Node *FirstType;
+  const Node *SecondType;
+
+public:
+  CtorVtableSpecialName(const Node *FirstType_, const Node *SecondType_)
+      : Node(KCtorVtableSpecialName),
+        FirstType(FirstType_), SecondType(SecondType_) {}
+
+  template<typename Fn> void match(Fn F) const { F(FirstType, SecondType); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "construction vtable for ";
+    FirstType->print(S);
+    S += "-in-";
+    SecondType->print(S);
+  }
+};
+
+struct NestedName : Node {
+  Node *Qual;
+  Node *Name;
+
+  NestedName(Node *Qual_, Node *Name_)
+      : Node(KNestedName), Qual(Qual_), Name(Name_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Qual, Name); }
+
+  StringView getBaseName() const override { return Name->getBaseName(); }
+
+  void printLeft(OutputStream &S) const override {
+    Qual->print(S);
+    S += "::";
+    Name->print(S);
+  }
+};
+
+struct LocalName : Node {
+  Node *Encoding;
+  Node *Entity;
+
+  LocalName(Node *Encoding_, Node *Entity_)
+      : Node(KLocalName), Encoding(Encoding_), Entity(Entity_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Encoding, Entity); }
+
+  void printLeft(OutputStream &S) const override {
+    Encoding->print(S);
+    S += "::";
+    Entity->print(S);
+  }
+};
+
+class QualifiedName final : public Node {
+  // qualifier::name
+  const Node *Qualifier;
+  const Node *Name;
+
+public:
+  QualifiedName(const Node *Qualifier_, const Node *Name_)
+      : Node(KQualifiedName), Qualifier(Qualifier_), Name(Name_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Qualifier, Name); }
+
+  StringView getBaseName() const override { return Name->getBaseName(); }
+
+  void printLeft(OutputStream &S) const override {
+    Qualifier->print(S);
+    S += "::";
+    Name->print(S);
+  }
+};
+
+class VectorType final : public Node {
+  const Node *BaseType;
+  const NodeOrString Dimension;
+
+public:
+  VectorType(const Node *BaseType_, NodeOrString Dimension_)
+      : Node(KVectorType), BaseType(BaseType_),
+        Dimension(Dimension_) {}
+
+  template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); }
+
+  void printLeft(OutputStream &S) const override {
+    BaseType->print(S);
+    S += " vector[";
+    if (Dimension.isNode())
+      Dimension.asNode()->print(S);
+    else if (Dimension.isString())
+      S += Dimension.asString();
+    S += "]";
+  }
+};
+
+class PixelVectorType final : public Node {
+  const NodeOrString Dimension;
+
+public:
+  PixelVectorType(NodeOrString Dimension_)
+      : Node(KPixelVectorType), Dimension(Dimension_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Dimension); }
+
+  void printLeft(OutputStream &S) const override {
+    // FIXME: This should demangle as "vector pixel".
+    S += "pixel vector[";
+    S += Dimension.asString();
+    S += "]";
+  }
+};
+
+/// An unexpanded parameter pack (either in the expression or type context). If
+/// this AST is correct, this node will have a ParameterPackExpansion node above
+/// it.
+///
+/// This node is created when some <template-args> are found that apply to an
+/// <encoding>, and is stored in the TemplateParams table. In order for this to
+/// appear in the final AST, it has to referenced via a <template-param> (ie,
+/// T_).
+class ParameterPack final : public Node {
+  NodeArray Data;
+
+  // Setup OutputStream for a pack expansion unless we're already expanding one.
+  void initializePackExpansion(OutputStream &S) const {
+    if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
+      S.CurrentPackMax = static_cast<unsigned>(Data.size());
+      S.CurrentPackIndex = 0;
+    }
+  }
+
+public:
+  ParameterPack(NodeArray Data_) : Node(KParameterPack), Data(Data_) {
+    ArrayCache = FunctionCache = RHSComponentCache = Cache::Unknown;
+    if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
+          return P->ArrayCache == Cache::No;
+        }))
+      ArrayCache = Cache::No;
+    if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
+          return P->FunctionCache == Cache::No;
+        }))
+      FunctionCache = Cache::No;
+    if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
+          return P->RHSComponentCache == Cache::No;
+        }))
+      RHSComponentCache = Cache::No;
+  }
+
+  template<typename Fn> void match(Fn F) const { F(Data); }
+
+  bool hasRHSComponentSlow(OutputStream &S) const override {
+    initializePackExpansion(S);
+    size_t Idx = S.CurrentPackIndex;
+    return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
+  }
+  bool hasArraySlow(OutputStream &S) const override {
+    initializePackExpansion(S);
+    size_t Idx = S.CurrentPackIndex;
+    return Idx < Data.size() && Data[Idx]->hasArray(S);
+  }
+  bool hasFunctionSlow(OutputStream &S) const override {
+    initializePackExpansion(S);
+    size_t Idx = S.CurrentPackIndex;
+    return Idx < Data.size() && Data[Idx]->hasFunction(S);
+  }
+  const Node *getSyntaxNode(OutputStream &S) const override {
+    initializePackExpansion(S);
+    size_t Idx = S.CurrentPackIndex;
+    return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
+  }
+
+  void printLeft(OutputStream &S) const override {
+    initializePackExpansion(S);
+    size_t Idx = S.CurrentPackIndex;
+    if (Idx < Data.size())
+      Data[Idx]->printLeft(S);
+  }
+  void printRight(OutputStream &S) const override {
+    initializePackExpansion(S);
+    size_t Idx = S.CurrentPackIndex;
+    if (Idx < Data.size())
+      Data[Idx]->printRight(S);
+  }
+};
+
+/// A variadic template argument. This node represents an occurrence of
+/// J<something>E in some <template-args>. It isn't itself unexpanded, unless
+/// one of it's Elements is. The parser inserts a ParameterPack into the
+/// TemplateParams table if the <template-args> this pack belongs to apply to an
+/// <encoding>.
+class TemplateArgumentPack final : public Node {
+  NodeArray Elements;
+public:
+  TemplateArgumentPack(NodeArray Elements_)
+      : Node(KTemplateArgumentPack), Elements(Elements_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Elements); }
+
+  NodeArray getElements() const { return Elements; }
+
+  void printLeft(OutputStream &S) const override {
+    Elements.printWithComma(S);
+  }
+};
+
+/// A pack expansion. Below this node, there are some unexpanded ParameterPacks
+/// which each have Child->ParameterPackSize elements.
+class ParameterPackExpansion final : public Node {
+  const Node *Child;
+
+public:
+  ParameterPackExpansion(const Node *Child_)
+      : Node(KParameterPackExpansion), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Child); }
+
+  const Node *getChild() const { return Child; }
+
+  void printLeft(OutputStream &S) const override {
+    constexpr unsigned Max = std::numeric_limits<unsigned>::max();
+    SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
+    SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
+    size_t StreamPos = S.getCurrentPosition();
+
+    // Print the first element in the pack. If Child contains a ParameterPack,
+    // it will set up S.CurrentPackMax and print the first element.
+    Child->print(S);
+
+    // No ParameterPack was found in Child. This can occur if we've found a pack
+    // expansion on a <function-param>.
+    if (S.CurrentPackMax == Max) {
+      S += "...";
+      return;
+    }
+
+    // We found a ParameterPack, but it has no elements. Erase whatever we may
+    // of printed.
+    if (S.CurrentPackMax == 0) {
+      S.setCurrentPosition(StreamPos);
+      return;
+    }
+
+    // Else, iterate through the rest of the elements in the pack.
+    for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
+      S += ", ";
+      S.CurrentPackIndex = I;
+      Child->print(S);
+    }
+  }
+};
+
+class TemplateArgs final : public Node {
+  NodeArray Params;
+
+public:
+  TemplateArgs(NodeArray Params_) : Node(KTemplateArgs), Params(Params_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Params); }
+
+  NodeArray getParams() { return Params; }
+
+  void printLeft(OutputStream &S) const override {
+    S += "<";
+    Params.printWithComma(S);
+    if (S.back() == '>')
+      S += " ";
+    S += ">";
+  }
+};
+
+/// A forward-reference to a template argument that was not known at the point
+/// where the template parameter name was parsed in a mangling.
+///
+/// This is created when demangling the name of a specialization of a
+/// conversion function template:
+///
+/// \code
+/// struct A {
+///   template<typename T> operator T*();
+/// };
+/// \endcode
+///
+/// When demangling a specialization of the conversion function template, we
+/// encounter the name of the template (including the \c T) before we reach
+/// the template argument list, so we cannot substitute the parameter name
+/// for the corresponding argument while parsing. Instead, we create a
+/// \c ForwardTemplateReference node that is resolved after we parse the
+/// template arguments.
+struct ForwardTemplateReference : Node {
+  size_t Index;
+  Node *Ref = nullptr;
+
+  // If we're currently printing this node. It is possible (though invalid) for
+  // a forward template reference to refer to itself via a substitution. This
+  // creates a cyclic AST, which will stack overflow printing. To fix this, bail
+  // out if more than one print* function is active.
+  mutable bool Printing = false;
+
+  ForwardTemplateReference(size_t Index_)
+      : Node(KForwardTemplateReference, Cache::Unknown, Cache::Unknown,
+             Cache::Unknown),
+        Index(Index_) {}
+
+  // We don't provide a matcher for these, because the value of the node is
+  // not determined by its construction parameters, and it generally needs
+  // special handling.
+  template<typename Fn> void match(Fn F) const = delete;
+
+  bool hasRHSComponentSlow(OutputStream &S) const override {
+    if (Printing)
+      return false;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    return Ref->hasRHSComponent(S);
+  }
+  bool hasArraySlow(OutputStream &S) const override {
+    if (Printing)
+      return false;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    return Ref->hasArray(S);
+  }
+  bool hasFunctionSlow(OutputStream &S) const override {
+    if (Printing)
+      return false;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    return Ref->hasFunction(S);
+  }
+  const Node *getSyntaxNode(OutputStream &S) const override {
+    if (Printing)
+      return this;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    return Ref->getSyntaxNode(S);
+  }
+
+  void printLeft(OutputStream &S) const override {
+    if (Printing)
+      return;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    Ref->printLeft(S);
+  }
+  void printRight(OutputStream &S) const override {
+    if (Printing)
+      return;
+    SwapAndRestore<bool> SavePrinting(Printing, true);
+    Ref->printRight(S);
+  }
+};
+
+struct NameWithTemplateArgs : Node {
+  // name<template_args>
+  Node *Name;
+  Node *TemplateArgs;
+
+  NameWithTemplateArgs(Node *Name_, Node *TemplateArgs_)
+      : Node(KNameWithTemplateArgs), Name(Name_), TemplateArgs(TemplateArgs_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Name, TemplateArgs); }
+
+  StringView getBaseName() const override { return Name->getBaseName(); }
+
+  void printLeft(OutputStream &S) const override {
+    Name->print(S);
+    TemplateArgs->print(S);
+  }
+};
+
+class GlobalQualifiedName final : public Node {
+  Node *Child;
+
+public:
+  GlobalQualifiedName(Node* Child_)
+      : Node(KGlobalQualifiedName), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Child); }
+
+  StringView getBaseName() const override { return Child->getBaseName(); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "::";
+    Child->print(S);
+  }
+};
+
+struct StdQualifiedName : Node {
+  Node *Child;
+
+  StdQualifiedName(Node *Child_) : Node(KStdQualifiedName), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Child); }
+
+  StringView getBaseName() const override { return Child->getBaseName(); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "std::";
+    Child->print(S);
+  }
+};
+
+enum class SpecialSubKind {
+  allocator,
+  basic_string,
+  string,
+  istream,
+  ostream,
+  iostream,
+};
+
+class ExpandedSpecialSubstitution final : public Node {
+  SpecialSubKind SSK;
+
+public:
+  ExpandedSpecialSubstitution(SpecialSubKind SSK_)
+      : Node(KExpandedSpecialSubstitution), SSK(SSK_) {}
+
+  template<typename Fn> void match(Fn F) const { F(SSK); }
+
+  StringView getBaseName() const override {
+    switch (SSK) {
+    case SpecialSubKind::allocator:
+      return StringView("allocator");
+    case SpecialSubKind::basic_string:
+      return StringView("basic_string");
+    case SpecialSubKind::string:
+      return StringView("basic_string");
+    case SpecialSubKind::istream:
+      return StringView("basic_istream");
+    case SpecialSubKind::ostream:
+      return StringView("basic_ostream");
+    case SpecialSubKind::iostream:
+      return StringView("basic_iostream");
+    }
+    LLVM_BUILTIN_UNREACHABLE;
+  }
+
+  void printLeft(OutputStream &S) const override {
+    switch (SSK) {
+    case SpecialSubKind::allocator:
+      S += "std::allocator";
+      break;
+    case SpecialSubKind::basic_string:
+      S += "std::basic_string";
+      break;
+    case SpecialSubKind::string:
+      S += "std::basic_string<char, std::char_traits<char>, "
+           "std::allocator<char> >";
+      break;
+    case SpecialSubKind::istream:
+      S += "std::basic_istream<char, std::char_traits<char> >";
+      break;
+    case SpecialSubKind::ostream:
+      S += "std::basic_ostream<char, std::char_traits<char> >";
+      break;
+    case SpecialSubKind::iostream:
+      S += "std::basic_iostream<char, std::char_traits<char> >";
+      break;
+    }
+  }
+};
+
+class SpecialSubstitution final : public Node {
+public:
+  SpecialSubKind SSK;
+
+  SpecialSubstitution(SpecialSubKind SSK_)
+      : Node(KSpecialSubstitution), SSK(SSK_) {}
+
+  template<typename Fn> void match(Fn F) const { F(SSK); }
+
+  StringView getBaseName() const override {
+    switch (SSK) {
+    case SpecialSubKind::allocator:
+      return StringView("allocator");
+    case SpecialSubKind::basic_string:
+      return StringView("basic_string");
+    case SpecialSubKind::string:
+      return StringView("string");
+    case SpecialSubKind::istream:
+      return StringView("istream");
+    case SpecialSubKind::ostream:
+      return StringView("ostream");
+    case SpecialSubKind::iostream:
+      return StringView("iostream");
+    }
+    LLVM_BUILTIN_UNREACHABLE;
+  }
+
+  void printLeft(OutputStream &S) const override {
+    switch (SSK) {
+    case SpecialSubKind::allocator:
+      S += "std::allocator";
+      break;
+    case SpecialSubKind::basic_string:
+      S += "std::basic_string";
+      break;
+    case SpecialSubKind::string:
+      S += "std::string";
+      break;
+    case SpecialSubKind::istream:
+      S += "std::istream";
+      break;
+    case SpecialSubKind::ostream:
+      S += "std::ostream";
+      break;
+    case SpecialSubKind::iostream:
+      S += "std::iostream";
+      break;
+    }
+  }
+};
+
+class CtorDtorName final : public Node {
+  const Node *Basename;
+  const bool IsDtor;
+  const int Variant;
+
+public:
+  CtorDtorName(const Node *Basename_, bool IsDtor_, int Variant_)
+      : Node(KCtorDtorName), Basename(Basename_), IsDtor(IsDtor_),
+        Variant(Variant_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
+
+  void printLeft(OutputStream &S) const override {
+    if (IsDtor)
+      S += "~";
+    S += Basename->getBaseName();
+  }
+};
+
+class DtorName : public Node {
+  const Node *Base;
+
+public:
+  DtorName(const Node *Base_) : Node(KDtorName), Base(Base_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Base); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "~";
+    Base->printLeft(S);
+  }
+};
+
+class UnnamedTypeName : public Node {
+  const StringView Count;
+
+public:
+  UnnamedTypeName(StringView Count_) : Node(KUnnamedTypeName), Count(Count_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Count); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "'unnamed";
+    S += Count;
+    S += "\'";
+  }
+};
+
+class ClosureTypeName : public Node {
+  NodeArray Params;
+  StringView Count;
+
+public:
+  ClosureTypeName(NodeArray Params_, StringView Count_)
+      : Node(KClosureTypeName), Params(Params_), Count(Count_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Params, Count); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "\'lambda";
+    S += Count;
+    S += "\'(";
+    Params.printWithComma(S);
+    S += ")";
+  }
+};
+
+class StructuredBindingName : public Node {
+  NodeArray Bindings;
+public:
+  StructuredBindingName(NodeArray Bindings_)
+      : Node(KStructuredBindingName), Bindings(Bindings_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Bindings); }
+
+  void printLeft(OutputStream &S) const override {
+    S += '[';
+    Bindings.printWithComma(S);
+    S += ']';
+  }
+};
+
+// -- Expression Nodes --
+
+class BinaryExpr : public Node {
+  const Node *LHS;
+  const StringView InfixOperator;
+  const Node *RHS;
+
+public:
+  BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_)
+      : Node(KBinaryExpr), LHS(LHS_), InfixOperator(InfixOperator_), RHS(RHS_) {
+  }
+
+  template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); }
+
+  void printLeft(OutputStream &S) const override {
+    // might be a template argument expression, then we need to disambiguate
+    // with parens.
+    if (InfixOperator == ">")
+      S += "(";
+
+    S += "(";
+    LHS->print(S);
+    S += ") ";
+    S += InfixOperator;
+    S += " (";
+    RHS->print(S);
+    S += ")";
+
+    if (InfixOperator == ">")
+      S += ")";
+  }
+};
+
+class ArraySubscriptExpr : public Node {
+  const Node *Op1;
+  const Node *Op2;
+
+public:
+  ArraySubscriptExpr(const Node *Op1_, const Node *Op2_)
+      : Node(KArraySubscriptExpr), Op1(Op1_), Op2(Op2_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Op1, Op2); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "(";
+    Op1->print(S);
+    S += ")[";
+    Op2->print(S);
+    S += "]";
+  }
+};
+
+class PostfixExpr : public Node {
+  const Node *Child;
+  const StringView Operator;
+
+public:
+  PostfixExpr(const Node *Child_, StringView Operator_)
+      : Node(KPostfixExpr), Child(Child_), Operator(Operator_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Child, Operator); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "(";
+    Child->print(S);
+    S += ")";
+    S += Operator;
+  }
+};
+
+class ConditionalExpr : public Node {
+  const Node *Cond;
+  const Node *Then;
+  const Node *Else;
+
+public:
+  ConditionalExpr(const Node *Cond_, const Node *Then_, const Node *Else_)
+      : Node(KConditionalExpr), Cond(Cond_), Then(Then_), Else(Else_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "(";
+    Cond->print(S);
+    S += ") ? (";
+    Then->print(S);
+    S += ") : (";
+    Else->print(S);
+    S += ")";
+  }
+};
+
+class MemberExpr : public Node {
+  const Node *LHS;
+  const StringView Kind;
+  const Node *RHS;
+
+public:
+  MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_)
+      : Node(KMemberExpr), LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
+
+  template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); }
+
+  void printLeft(OutputStream &S) const override {
+    LHS->print(S);
+    S += Kind;
+    RHS->print(S);
+  }
+};
+
+class EnclosingExpr : public Node {
+  const StringView Prefix;
+  const Node *Infix;
+  const StringView Postfix;
+
+public:
+  EnclosingExpr(StringView Prefix_, Node *Infix_, StringView Postfix_)
+      : Node(KEnclosingExpr), Prefix(Prefix_), Infix(Infix_),
+        Postfix(Postfix_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); }
+
+  void printLeft(OutputStream &S) const override {
+    S += Prefix;
+    Infix->print(S);
+    S += Postfix;
+  }
+};
+
+class CastExpr : public Node {
+  // cast_kind<to>(from)
+  const StringView CastKind;
+  const Node *To;
+  const Node *From;
+
+public:
+  CastExpr(StringView CastKind_, const Node *To_, const Node *From_)
+      : Node(KCastExpr), CastKind(CastKind_), To(To_), From(From_) {}
+
+  template<typename Fn> void match(Fn F) const { F(CastKind, To, From); }
+
+  void printLeft(OutputStream &S) const override {
+    S += CastKind;
+    S += "<";
+    To->printLeft(S);
+    S += ">(";
+    From->printLeft(S);
+    S += ")";
+  }
+};
+
+class SizeofParamPackExpr : public Node {
+  const Node *Pack;
+
+public:
+  SizeofParamPackExpr(const Node *Pack_)
+      : Node(KSizeofParamPackExpr), Pack(Pack_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Pack); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "sizeof...(";
+    ParameterPackExpansion PPE(Pack);
+    PPE.printLeft(S);
+    S += ")";
+  }
+};
+
+class CallExpr : public Node {
+  const Node *Callee;
+  NodeArray Args;
+
+public:
+  CallExpr(const Node *Callee_, NodeArray Args_)
+      : Node(KCallExpr), Callee(Callee_), Args(Args_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Callee, Args); }
+
+  void printLeft(OutputStream &S) const override {
+    Callee->print(S);
+    S += "(";
+    Args.printWithComma(S);
+    S += ")";
+  }
+};
+
+class NewExpr : public Node {
+  // new (expr_list) type(init_list)
+  NodeArray ExprList;
+  Node *Type;
+  NodeArray InitList;
+  bool IsGlobal; // ::operator new ?
+  bool IsArray;  // new[] ?
+public:
+  NewExpr(NodeArray ExprList_, Node *Type_, NodeArray InitList_, bool IsGlobal_,
+          bool IsArray_)
+      : Node(KNewExpr), ExprList(ExprList_), Type(Type_), InitList(InitList_),
+        IsGlobal(IsGlobal_), IsArray(IsArray_) {}
+
+  template<typename Fn> void match(Fn F) const {
+    F(ExprList, Type, InitList, IsGlobal, IsArray);
+  }
+
+  void printLeft(OutputStream &S) const override {
+    if (IsGlobal)
+      S += "::operator ";
+    S += "new";
+    if (IsArray)
+      S += "[]";
+    S += ' ';
+    if (!ExprList.empty()) {
+      S += "(";
+      ExprList.printWithComma(S);
+      S += ")";
+    }
+    Type->print(S);
+    if (!InitList.empty()) {
+      S += "(";
+      InitList.printWithComma(S);
+      S += ")";
+    }
+
+  }
+};
+
+class DeleteExpr : public Node {
+  Node *Op;
+  bool IsGlobal;
+  bool IsArray;
+
+public:
+  DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_)
+      : Node(KDeleteExpr), Op(Op_), IsGlobal(IsGlobal_), IsArray(IsArray_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); }
+
+  void printLeft(OutputStream &S) const override {
+    if (IsGlobal)
+      S += "::";
+    S += "delete";
+    if (IsArray)
+      S += "[] ";
+    Op->print(S);
+  }
+};
+
+class PrefixExpr : public Node {
+  StringView Prefix;
+  Node *Child;
+
+public:
+  PrefixExpr(StringView Prefix_, Node *Child_)
+      : Node(KPrefixExpr), Prefix(Prefix_), Child(Child_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Prefix, Child); }
+
+  void printLeft(OutputStream &S) const override {
+    S += Prefix;
+    S += "(";
+    Child->print(S);
+    S += ")";
+  }
+};
+
+class FunctionParam : public Node {
+  StringView Number;
+
+public:
+  FunctionParam(StringView Number_) : Node(KFunctionParam), Number(Number_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Number); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "fp";
+    S += Number;
+  }
+};
+
+class ConversionExpr : public Node {
+  const Node *Type;
+  NodeArray Expressions;
+
+public:
+  ConversionExpr(const Node *Type_, NodeArray Expressions_)
+      : Node(KConversionExpr), Type(Type_), Expressions(Expressions_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Type, Expressions); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "(";
+    Type->print(S);
+    S += ")(";
+    Expressions.printWithComma(S);
+    S += ")";
+  }
+};
+
+class InitListExpr : public Node {
+  const Node *Ty;
+  NodeArray Inits;
+public:
+  InitListExpr(const Node *Ty_, NodeArray Inits_)
+      : Node(KInitListExpr), Ty(Ty_), Inits(Inits_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
+
+  void printLeft(OutputStream &S) const override {
+    if (Ty)
+      Ty->print(S);
+    S += '{';
+    Inits.printWithComma(S);
+    S += '}';
+  }
+};
+
+class BracedExpr : public Node {
+  const Node *Elem;
+  const Node *Init;
+  bool IsArray;
+public:
+  BracedExpr(const Node *Elem_, const Node *Init_, bool IsArray_)
+      : Node(KBracedExpr), Elem(Elem_), Init(Init_), IsArray(IsArray_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Elem, Init, IsArray); }
+
+  void printLeft(OutputStream &S) const override {
+    if (IsArray) {
+      S += '[';
+      Elem->print(S);
+      S += ']';
+    } else {
+      S += '.';
+      Elem->print(S);
+    }
+    if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
+      S += " = ";
+    Init->print(S);
+  }
+};
+
+class BracedRangeExpr : public Node {
+  const Node *First;
+  const Node *Last;
+  const Node *Init;
+public:
+  BracedRangeExpr(const Node *First_, const Node *Last_, const Node *Init_)
+      : Node(KBracedRangeExpr), First(First_), Last(Last_), Init(Init_) {}
+
+  template<typename Fn> void match(Fn F) const { F(First, Last, Init); }
+
+  void printLeft(OutputStream &S) const override {
+    S += '[';
+    First->print(S);
+    S += " ... ";
+    Last->print(S);
+    S += ']';
+    if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
+      S += " = ";
+    Init->print(S);
+  }
+};
+
+class FoldExpr : public Node {
+  const Node *Pack, *Init;
+  StringView OperatorName;
+  bool IsLeftFold;
+
+public:
+  FoldExpr(bool IsLeftFold_, StringView OperatorName_, const Node *Pack_,
+           const Node *Init_)
+      : Node(KFoldExpr), Pack(Pack_), Init(Init_), OperatorName(OperatorName_),
+        IsLeftFold(IsLeftFold_) {}
+
+  template<typename Fn> void match(Fn F) const {
+    F(IsLeftFold, OperatorName, Pack, Init);
+  }
+
+  void printLeft(OutputStream &S) const override {
+    auto PrintPack = [&] {
+      S += '(';
+      ParameterPackExpansion(Pack).print(S);
+      S += ')';
+    };
+
+    S += '(';
+
+    if (IsLeftFold) {
+      // init op ... op pack
+      if (Init != nullptr) {
+        Init->print(S);
+        S += ' ';
+        S += OperatorName;
+        S += ' ';
+      }
+      // ... op pack
+      S += "... ";
+      S += OperatorName;
+      S += ' ';
+      PrintPack();
+    } else { // !IsLeftFold
+      // pack op ...
+      PrintPack();
+      S += ' ';
+      S += OperatorName;
+      S += " ...";
+      // pack op ... op init
+      if (Init != nullptr) {
+        S += ' ';
+        S += OperatorName;
+        S += ' ';
+        Init->print(S);
+      }
+    }
+    S += ')';
+  }
+};
+
+class ThrowExpr : public Node {
+  const Node *Op;
+
+public:
+  ThrowExpr(const Node *Op_) : Node(KThrowExpr), Op(Op_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Op); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "throw ";
+    Op->print(S);
+  }
+};
+
+class BoolExpr : public Node {
+  bool Value;
+
+public:
+  BoolExpr(bool Value_) : Node(KBoolExpr), Value(Value_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Value); }
+
+  void printLeft(OutputStream &S) const override {
+    S += Value ? StringView("true") : StringView("false");
+  }
+};
+
+class IntegerCastExpr : public Node {
+  // ty(integer)
+  const Node *Ty;
+  StringView Integer;
+
+public:
+  IntegerCastExpr(const Node *Ty_, StringView Integer_)
+      : Node(KIntegerCastExpr), Ty(Ty_), Integer(Integer_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
+
+  void printLeft(OutputStream &S) const override {
+    S += "(";
+    Ty->print(S);
+    S += ")";
+    S += Integer;
+  }
+};
+
+class IntegerLiteral : public Node {
+  StringView Type;
+  StringView Value;
+
+public:
+  IntegerLiteral(StringView Type_, StringView Value_)
+      : Node(KIntegerLiteral), Type(Type_), Value(Value_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Type, Value); }
+
+  void printLeft(OutputStream &S) const override {
+    if (Type.size() > 3) {
+      S += "(";
+      S += Type;
+      S += ")";
+    }
+
+    if (Value[0] == 'n') {
+      S += "-";
+      S += Value.dropFront(1);
+    } else
+      S += Value;
+
+    if (Type.size() <= 3)
+      S += Type;
+  }
+};
+
+template <class Float> struct FloatData;
+
+namespace float_literal_impl {
+constexpr Node::Kind getFloatLiteralKind(float *) {
+  return Node::KFloatLiteral;
+}
+constexpr Node::Kind getFloatLiteralKind(double *) {
+  return Node::KDoubleLiteral;
+}
+constexpr Node::Kind getFloatLiteralKind(long double *) {
+  return Node::KLongDoubleLiteral;
+}
+}
+
+template <class Float> class FloatLiteralImpl : public Node {
+  const StringView Contents;
+
+  static constexpr Kind KindForClass =
+      float_literal_impl::getFloatLiteralKind((Float *)nullptr);
+
+public:
+  FloatLiteralImpl(StringView Contents_)
+      : Node(KindForClass), Contents(Contents_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Contents); }
+
+  void printLeft(OutputStream &s) const override {
+    const char *first = Contents.begin();
+    const char *last = Contents.end() + 1;
+
+    const size_t N = FloatData<Float>::mangled_size;
+    if (static_cast<std::size_t>(last - first) > N) {
+      last = first + N;
+      union {
+        Float value;
+        char buf[sizeof(Float)];
+      };
+      const char *t = first;
+      char *e = buf;
+      for (; t != last; ++t, ++e) {
+        unsigned d1 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
+                                  : static_cast<unsigned>(*t - 'a' + 10);
+        ++t;
+        unsigned d0 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
+                                  : static_cast<unsigned>(*t - 'a' + 10);
+        *e = static_cast<char>((d1 << 4) + d0);
+      }
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+      std::reverse(buf, e);
+#endif
+      char num[FloatData<Float>::max_demangled_size] = {0};
+      int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
+      s += StringView(num, num + n);
+    }
+  }
+};
+
+using FloatLiteral = FloatLiteralImpl<float>;
+using DoubleLiteral = FloatLiteralImpl<double>;
+using LongDoubleLiteral = FloatLiteralImpl<long double>;
+
+/// Visit the node. Calls \c F(P), where \c P is the node cast to the
+/// appropriate derived class.
+template<typename Fn>
+void Node::visit(Fn F) const {
+  switch (K) {
+#define CASE(X) case K ## X: return F(static_cast<const X*>(this));
+    FOR_EACH_NODE_KIND(CASE)
+#undef CASE
+  }
+  assert(0 && "unknown mangling node kind");
+}
+
+/// Determine the kind of a node from its type.
+template<typename NodeT> struct NodeKind;
+#define SPECIALIZATION(X) \
+  template<> struct NodeKind<X> { \
+    static constexpr Node::Kind Kind = Node::K##X; \
+    static constexpr const char *name() { return #X; } \
+  };
+FOR_EACH_NODE_KIND(SPECIALIZATION)
+#undef SPECIALIZATION
+
+#undef FOR_EACH_NODE_KIND
+
+template <class T, size_t N>
+class PODSmallVector {
+  static_assert(std::is_pod<T>::value,
+                "T is required to be a plain old data type");
+
+  T* First;
+  T* Last;
+  T* Cap;
+  T Inline[N];
+
+  bool isInline() const { return First == Inline; }
+
+  void clearInline() {
+    First = Inline;
+    Last = Inline;
+    Cap = Inline + N;
+  }
+
+  void reserve(size_t NewCap) {
+    size_t S = size();
+    if (isInline()) {
+      auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
+      if (Tmp == nullptr)
+        std::terminate();
+      std::copy(First, Last, Tmp);
+      First = Tmp;
+    } else {
+      First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
+      if (First == nullptr)
+        std::terminate();
+    }
+    Last = First + S;
+    Cap = First + NewCap;
+  }
+
+public:
+  PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
+
+  PODSmallVector(const PODSmallVector&) = delete;
+  PODSmallVector& operator=(const PODSmallVector&) = delete;
+
+  PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
+    if (Other.isInline()) {
+      std::copy(Other.begin(), Other.end(), First);
+      Last = First + Other.size();
+      Other.clear();
+      return;
+    }
+
+    First = Other.First;
+    Last = Other.Last;
+    Cap = Other.Cap;
+    Other.clearInline();
+  }
+
+  PODSmallVector& operator=(PODSmallVector&& Other) {
+    if (Other.isInline()) {
+      if (!isInline()) {
+        std::free(First);
+        clearInline();
+      }
+      std::copy(Other.begin(), Other.end(), First);
+      Last = First + Other.size();
+      Other.clear();
+      return *this;
+    }
+
+    if (isInline()) {
+      First = Other.First;
+      Last = Other.Last;
+      Cap = Other.Cap;
+      Other.clearInline();
+      return *this;
+    }
+
+    std::swap(First, Other.First);
+    std::swap(Last, Other.Last);
+    std::swap(Cap, Other.Cap);
+    Other.clear();
+    return *this;
+  }
+
+  void push_back(const T& Elem) {
+    if (Last == Cap)
+      reserve(size() * 2);
+    *Last++ = Elem;
+  }
+
+  void pop_back() {
+    assert(Last != First && "Popping empty vector!");
+    --Last;
+  }
+
+  void dropBack(size_t Index) {
+    assert(Index <= size() && "dropBack() can't expand!");
+    Last = First + Index;
+  }
+
+  T* begin() { return First; }
+  T* end() { return Last; }
+
+  bool empty() const { return First == Last; }
+  size_t size() const { return static_cast<size_t>(Last - First); }
+  T& back() {
+    assert(Last != First && "Calling back() on empty vector!");
+    return *(Last - 1);
+  }
+  T& operator[](size_t Index) {
+    assert(Index < size() && "Invalid access!");
+    return *(begin() + Index);
+  }
+  void clear() { Last = First; }
+
+  ~PODSmallVector() {
+    if (!isInline())
+      std::free(First);
+  }
+};
+
+template <typename Derived, typename Alloc> struct AbstractManglingParser {
+  const char *First;
+  const char *Last;
+
+  // Name stack, this is used by the parser to hold temporary names that were
+  // parsed. The parser collapses multiple names into new nodes to construct
+  // the AST. Once the parser is finished, names.size() == 1.
+  PODSmallVector<Node *, 32> Names;
+
+  // Substitution table. Itanium supports name substitutions as a means of
+  // compression. The string "S42_" refers to the 44nd entry (base-36) in this
+  // table.
+  PODSmallVector<Node *, 32> Subs;
+
+  // Template parameter table. Like the above, but referenced like "T42_".
+  // This has a smaller size compared to Subs and Names because it can be
+  // stored on the stack.
+  PODSmallVector<Node *, 8> TemplateParams;
+
+  // Set of unresolved forward <template-param> references. These can occur in a
+  // conversion operator's type, and are resolved in the enclosing <encoding>.
+  PODSmallVector<ForwardTemplateReference *, 4> ForwardTemplateRefs;
+
+  bool TryToParseTemplateArgs = true;
+  bool PermitForwardTemplateReferences = false;
+  bool ParsingLambdaParams = false;
+
+  Alloc ASTAllocator;
+
+  AbstractManglingParser(const char *First_, const char *Last_)
+      : First(First_), Last(Last_) {}
+
+  Derived &getDerived() { return static_cast<Derived &>(*this); }
+
+  void reset(const char *First_, const char *Last_) {
+    First = First_;
+    Last = Last_;
+    Names.clear();
+    Subs.clear();
+    TemplateParams.clear();
+    ParsingLambdaParams = false;
+    TryToParseTemplateArgs = true;
+    PermitForwardTemplateReferences = false;
+    ASTAllocator.reset();
+  }
+
+  template <class T, class... Args> Node *make(Args &&... args) {
+    return ASTAllocator.template makeNode<T>(std::forward<Args>(args)...);
+  }
+
+  template <class It> NodeArray makeNodeArray(It begin, It end) {
+    size_t sz = static_cast<size_t>(end - begin);
+    void *mem = ASTAllocator.allocateNodeArray(sz);
+    Node **data = new (mem) Node *[sz];
+    std::copy(begin, end, data);
+    return NodeArray(data, sz);
+  }
+
+  NodeArray popTrailingNodeArray(size_t FromPosition) {
+    assert(FromPosition <= Names.size());
+    NodeArray res =
+        makeNodeArray(Names.begin() + (long)FromPosition, Names.end());
+    Names.dropBack(FromPosition);
+    return res;
+  }
+
+  bool consumeIf(StringView S) {
+    if (StringView(First, Last).startsWith(S)) {
+      First += S.size();
+      return true;
+    }
+    return false;
+  }
+
+  bool consumeIf(char C) {
+    if (First != Last && *First == C) {
+      ++First;
+      return true;
+    }
+    return false;
+  }
+
+  char consume() { return First != Last ? *First++ : '\0'; }
+
+  char look(unsigned Lookahead = 0) {
+    if (static_cast<size_t>(Last - First) <= Lookahead)
+      return '\0';
+    return First[Lookahead];
+  }
+
+  size_t numLeft() const { return static_cast<size_t>(Last - First); }
+
+  StringView parseNumber(bool AllowNegative = false);
+  Qualifiers parseCVQualifiers();
+  bool parsePositiveInteger(size_t *Out);
+  StringView parseBareSourceName();
+
+  bool parseSeqId(size_t *Out);
+  Node *parseSubstitution();
+  Node *parseTemplateParam();
+  Node *parseTemplateArgs(bool TagTemplates = false);
+  Node *parseTemplateArg();
+
+  /// Parse the <expr> production.
+  Node *parseExpr();
+  Node *parsePrefixExpr(StringView Kind);
+  Node *parseBinaryExpr(StringView Kind);
+  Node *parseIntegerLiteral(StringView Lit);
+  Node *parseExprPrimary();
+  template <class Float> Node *parseFloatingLiteral();
+  Node *parseFunctionParam();
+  Node *parseNewExpr();
+  Node *parseConversionExpr();
+  Node *parseBracedExpr();
+  Node *parseFoldExpr();
+
+  /// Parse the <type> production.
+  Node *parseType();
+  Node *parseFunctionType();
+  Node *parseVectorType();
+  Node *parseDecltype();
+  Node *parseArrayType();
+  Node *parsePointerToMemberType();
+  Node *parseClassEnumType();
+  Node *parseQualifiedType();
+
+  Node *parseEncoding();
+  bool parseCallOffset();
+  Node *parseSpecialName();
+
+  /// Holds some extra information about a <name> that is being parsed. This
+  /// information is only pertinent if the <name> refers to an <encoding>.
+  struct NameState {
+    bool CtorDtorConversion = false;
+    bool EndsWithTemplateArgs = false;
+    Qualifiers CVQualifiers = QualNone;
+    FunctionRefQual ReferenceQualifier = FrefQualNone;
+    size_t ForwardTemplateRefsBegin;
+
+    NameState(AbstractManglingParser *Enclosing)
+        : ForwardTemplateRefsBegin(Enclosing->ForwardTemplateRefs.size()) {}
+  };
+
+  bool resolveForwardTemplateRefs(NameState &State) {
+    size_t I = State.ForwardTemplateRefsBegin;
+    size_t E = ForwardTemplateRefs.size();
+    for (; I < E; ++I) {
+      size_t Idx = ForwardTemplateRefs[I]->Index;
+      if (Idx >= TemplateParams.size())
+        return true;
+      ForwardTemplateRefs[I]->Ref = TemplateParams[Idx];
+    }
+    ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin);
+    return false;
+  }
+
+  /// Parse the <name> production>
+  Node *parseName(NameState *State = nullptr);
+  Node *parseLocalName(NameState *State);
+  Node *parseOperatorName(NameState *State);
+  Node *parseUnqualifiedName(NameState *State);
+  Node *parseUnnamedTypeName(NameState *State);
+  Node *parseSourceName(NameState *State);
+  Node *parseUnscopedName(NameState *State);
+  Node *parseNestedName(NameState *State);
+  Node *parseCtorDtorName(Node *&SoFar, NameState *State);
+
+  Node *parseAbiTags(Node *N);
+
+  /// Parse the <unresolved-name> production.
+  Node *parseUnresolvedName();
+  Node *parseSimpleId();
+  Node *parseBaseUnresolvedName();
+  Node *parseUnresolvedType();
+  Node *parseDestructorName();
+
+  /// Top-level entry point into the parser.
+  Node *parse();
+};
+
+const char* parse_discriminator(const char* first, const char* last);
+
+// <name> ::= <nested-name> // N
+//        ::= <local-name> # See Scope Encoding below  // Z
+//        ::= <unscoped-template-name> <template-args>
+//        ::= <unscoped-name>
+//
+// <unscoped-template-name> ::= <unscoped-name>
+//                          ::= <substitution>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
+  consumeIf('L'); // extension
+
+  if (look() == 'N')
+    return getDerived().parseNestedName(State);
+  if (look() == 'Z')
+    return getDerived().parseLocalName(State);
+
+  //        ::= <unscoped-template-name> <template-args>
+  if (look() == 'S' && look(1) != 't') {
+    Node *S = getDerived().parseSubstitution();
+    if (S == nullptr)
+      return nullptr;
+    if (look() != 'I')
+      return nullptr;
+    Node *TA = getDerived().parseTemplateArgs(State != nullptr);
+    if (TA == nullptr)
+      return nullptr;
+    if (State) State->EndsWithTemplateArgs = true;
+    return make<NameWithTemplateArgs>(S, TA);
+  }
+
+  Node *N = getDerived().parseUnscopedName(State);
+  if (N == nullptr)
+    return nullptr;
+  //        ::= <unscoped-template-name> <template-args>
+  if (look() == 'I') {
+    Subs.push_back(N);
+    Node *TA = getDerived().parseTemplateArgs(State != nullptr);
+    if (TA == nullptr)
+      return nullptr;
+    if (State) State->EndsWithTemplateArgs = true;
+    return make<NameWithTemplateArgs>(N, TA);
+  }
+  //        ::= <unscoped-name>
+  return N;
+}
+
+// <local-name> := Z <function encoding> E <entity name> [<discriminator>]
+//              := Z <function encoding> E s [<discriminator>]
+//              := Z <function encoding> Ed [ <parameter number> ] _ <entity name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
+  if (!consumeIf('Z'))
+    return nullptr;
+  Node *Encoding = getDerived().parseEncoding();
+  if (Encoding == nullptr || !consumeIf('E'))
+    return nullptr;
+
+  if (consumeIf('s')) {
+    First = parse_discriminator(First, Last);
+    auto *StringLitName = make<NameType>("string literal");
+    if (!StringLitName)
+      return nullptr;
+    return make<LocalName>(Encoding, StringLitName);
+  }
+
+  if (consumeIf('d')) {
+    parseNumber(true);
+    if (!consumeIf('_'))
+      return nullptr;
+    Node *N = getDerived().parseName(State);
+    if (N == nullptr)
+      return nullptr;
+    return make<LocalName>(Encoding, N);
+  }
+
+  Node *Entity = getDerived().parseName(State);
+  if (Entity == nullptr)
+    return nullptr;
+  First = parse_discriminator(First, Last);
+  return make<LocalName>(Encoding, Entity);
+}
+
+// <unscoped-name> ::= <unqualified-name>
+//                 ::= St <unqualified-name>   # ::std::
+// extension       ::= StL<unqualified-name>
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
+  if (consumeIf("StL") || consumeIf("St")) {
+    Node *R = getDerived().parseUnqualifiedName(State);
+    if (R == nullptr)
+      return nullptr;
+    return make<StdQualifiedName>(R);
+  }
+  return getDerived().parseUnqualifiedName(State);
+}
+
+// <unqualified-name> ::= <operator-name> [abi-tags]
+//                    ::= <ctor-dtor-name>
+//                    ::= <source-name>
+//                    ::= <unnamed-type-name>
+//                    ::= DC <source-name>+ E      # structured binding declaration
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseUnqualifiedName(NameState *State) {
+  // <ctor-dtor-name>s are special-cased in parseNestedName().
+  Node *Result;
+  if (look() == 'U')
+    Result = getDerived().parseUnnamedTypeName(State);
+  else if (look() >= '1' && look() <= '9')
+    Result = getDerived().parseSourceName(State);
+  else if (consumeIf("DC")) {
+    size_t BindingsBegin = Names.size();
+    do {
+      Node *Binding = getDerived().parseSourceName(State);
+      if (Binding == nullptr)
+        return nullptr;
+      Names.push_back(Binding);
+    } while (!consumeIf('E'));
+    Result = make<StructuredBindingName>(popTrailingNodeArray(BindingsBegin));
+  } else
+    Result = getDerived().parseOperatorName(State);
+  if (Result != nullptr)
+    Result = getDerived().parseAbiTags(Result);
+  return Result;
+}
+
+// <unnamed-type-name> ::= Ut [<nonnegative number>] _
+//                     ::= <closure-type-name>
+//
+// <closure-type-name> ::= Ul <lambda-sig> E [ <nonnegative number> ] _
+//
+// <lambda-sig> ::= <parameter type>+  # Parameter types or "v" if the lambda has no parameters
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *) {
+  if (consumeIf("Ut")) {
+    StringView Count = parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+    return make<UnnamedTypeName>(Count);
+  }
+  if (consumeIf("Ul")) {
+    NodeArray Params;
+    SwapAndRestore<bool> SwapParams(ParsingLambdaParams, true);
+    if (!consumeIf("vE")) {
+      size_t ParamsBegin = Names.size();
+      do {
+        Node *P = getDerived().parseType();
+        if (P == nullptr)
+          return nullptr;
+        Names.push_back(P);
+      } while (!consumeIf('E'));
+      Params = popTrailingNodeArray(ParamsBegin);
+    }
+    StringView Count = parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+    return make<ClosureTypeName>(Params, Count);
+  }
+  return nullptr;
+}
+
+// <source-name> ::= <positive length number> <identifier>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSourceName(NameState *) {
+  size_t Length = 0;
+  if (parsePositiveInteger(&Length))
+    return nullptr;
+  if (numLeft() < Length || Length == 0)
+    return nullptr;
+  StringView Name(First, First + Length);
+  First += Length;
+  if (Name.startsWith("_GLOBAL__N"))
+    return make<NameType>("(anonymous namespace)");
+  return make<NameType>(Name);
+}
+
+//   <operator-name> ::= aa    # &&
+//                   ::= ad    # & (unary)
+//                   ::= an    # &
+//                   ::= aN    # &=
+//                   ::= aS    # =
+//                   ::= cl    # ()
+//                   ::= cm    # ,
+//                   ::= co    # ~
+//                   ::= cv <type>    # (cast)
+//                   ::= da    # delete[]
+//                   ::= de    # * (unary)
+//                   ::= dl    # delete
+//                   ::= dv    # /
+//                   ::= dV    # /=
+//                   ::= eo    # ^
+//                   ::= eO    # ^=
+//                   ::= eq    # ==
+//                   ::= ge    # >=
+//                   ::= gt    # >
+//                   ::= ix    # []
+//                   ::= le    # <=
+//                   ::= li <source-name>  # operator ""
+//                   ::= ls    # <<
+//                   ::= lS    # <<=
+//                   ::= lt    # <
+//                   ::= mi    # -
+//                   ::= mI    # -=
+//                   ::= ml    # *
+//                   ::= mL    # *=
+//                   ::= mm    # -- (postfix in <expression> context)
+//                   ::= na    # new[]
+//                   ::= ne    # !=
+//                   ::= ng    # - (unary)
+//                   ::= nt    # !
+//                   ::= nw    # new
+//                   ::= oo    # ||
+//                   ::= or    # |
+//                   ::= oR    # |=
+//                   ::= pm    # ->*
+//                   ::= pl    # +
+//                   ::= pL    # +=
+//                   ::= pp    # ++ (postfix in <expression> context)
+//                   ::= ps    # + (unary)
+//                   ::= pt    # ->
+//                   ::= qu    # ?
+//                   ::= rm    # %
+//                   ::= rM    # %=
+//                   ::= rs    # >>
+//                   ::= rS    # >>=
+//                   ::= ss    # <=> C++2a
+//                   ::= v <digit> <source-name>        # vendor extended operator
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
+  switch (look()) {
+  case 'a':
+    switch (look(1)) {
+    case 'a':
+      First += 2;
+      return make<NameType>("operator&&");
+    case 'd':
+    case 'n':
+      First += 2;
+      return make<NameType>("operator&");
+    case 'N':
+      First += 2;
+      return make<NameType>("operator&=");
+    case 'S':
+      First += 2;
+      return make<NameType>("operator=");
+    }
+    return nullptr;
+  case 'c':
+    switch (look(1)) {
+    case 'l':
+      First += 2;
+      return make<NameType>("operator()");
+    case 'm':
+      First += 2;
+      return make<NameType>("operator,");
+    case 'o':
+      First += 2;
+      return make<NameType>("operator~");
+    //                   ::= cv <type>    # (cast)
+    case 'v': {
+      First += 2;
+      SwapAndRestore<bool> SaveTemplate(TryToParseTemplateArgs, false);
+      // If we're parsing an encoding, State != nullptr and the conversion
+      // operators' <type> could have a <template-param> that refers to some
+      // <template-arg>s further ahead in the mangled name.
+      SwapAndRestore<bool> SavePermit(PermitForwardTemplateReferences,
+                                      PermitForwardTemplateReferences ||
+                                          State != nullptr);
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      if (State) State->CtorDtorConversion = true;
+      return make<ConversionOperatorType>(Ty);
+    }
+    }
+    return nullptr;
+  case 'd':
+    switch (look(1)) {
+    case 'a':
+      First += 2;
+      return make<NameType>("operator delete[]");
+    case 'e':
+      First += 2;
+      return make<NameType>("operator*");
+    case 'l':
+      First += 2;
+      return make<NameType>("operator delete");
+    case 'v':
+      First += 2;
+      return make<NameType>("operator/");
+    case 'V':
+      First += 2;
+      return make<NameType>("operator/=");
+    }
+    return nullptr;
+  case 'e':
+    switch (look(1)) {
+    case 'o':
+      First += 2;
+      return make<NameType>("operator^");
+    case 'O':
+      First += 2;
+      return make<NameType>("operator^=");
+    case 'q':
+      First += 2;
+      return make<NameType>("operator==");
+    }
+    return nullptr;
+  case 'g':
+    switch (look(1)) {
+    case 'e':
+      First += 2;
+      return make<NameType>("operator>=");
+    case 't':
+      First += 2;
+      return make<NameType>("operator>");
+    }
+    return nullptr;
+  case 'i':
+    if (look(1) == 'x') {
+      First += 2;
+      return make<NameType>("operator[]");
+    }
+    return nullptr;
+  case 'l':
+    switch (look(1)) {
+    case 'e':
+      First += 2;
+      return make<NameType>("operator<=");
+    //                   ::= li <source-name>  # operator ""
+    case 'i': {
+      First += 2;
+      Node *SN = getDerived().parseSourceName(State);
+      if (SN == nullptr)
+        return nullptr;
+      return make<LiteralOperator>(SN);
+    }
+    case 's':
+      First += 2;
+      return make<NameType>("operator<<");
+    case 'S':
+      First += 2;
+      return make<NameType>("operator<<=");
+    case 't':
+      First += 2;
+      return make<NameType>("operator<");
+    }
+    return nullptr;
+  case 'm':
+    switch (look(1)) {
+    case 'i':
+      First += 2;
+      return make<NameType>("operator-");
+    case 'I':
+      First += 2;
+      return make<NameType>("operator-=");
+    case 'l':
+      First += 2;
+      return make<NameType>("operator*");
+    case 'L':
+      First += 2;
+      return make<NameType>("operator*=");
+    case 'm':
+      First += 2;
+      return make<NameType>("operator--");
+    }
+    return nullptr;
+  case 'n':
+    switch (look(1)) {
+    case 'a':
+      First += 2;
+      return make<NameType>("operator new[]");
+    case 'e':
+      First += 2;
+      return make<NameType>("operator!=");
+    case 'g':
+      First += 2;
+      return make<NameType>("operator-");
+    case 't':
+      First += 2;
+      return make<NameType>("operator!");
+    case 'w':
+      First += 2;
+      return make<NameType>("operator new");
+    }
+    return nullptr;
+  case 'o':
+    switch (look(1)) {
+    case 'o':
+      First += 2;
+      return make<NameType>("operator||");
+    case 'r':
+      First += 2;
+      return make<NameType>("operator|");
+    case 'R':
+      First += 2;
+      return make<NameType>("operator|=");
+    }
+    return nullptr;
+  case 'p':
+    switch (look(1)) {
+    case 'm':
+      First += 2;
+      return make<NameType>("operator->*");
+    case 'l':
+      First += 2;
+      return make<NameType>("operator+");
+    case 'L':
+      First += 2;
+      return make<NameType>("operator+=");
+    case 'p':
+      First += 2;
+      return make<NameType>("operator++");
+    case 's':
+      First += 2;
+      return make<NameType>("operator+");
+    case 't':
+      First += 2;
+      return make<NameType>("operator->");
+    }
+    return nullptr;
+  case 'q':
+    if (look(1) == 'u') {
+      First += 2;
+      return make<NameType>("operator?");
+    }
+    return nullptr;
+  case 'r':
+    switch (look(1)) {
+    case 'm':
+      First += 2;
+      return make<NameType>("operator%");
+    case 'M':
+      First += 2;
+      return make<NameType>("operator%=");
+    case 's':
+      First += 2;
+      return make<NameType>("operator>>");
+    case 'S':
+      First += 2;
+      return make<NameType>("operator>>=");
+    }
+    return nullptr;
+  case 's':
+    if (look(1) == 's') {
+      First += 2;
+      return make<NameType>("operator<=>");
+    }
+    return nullptr;
+  // ::= v <digit> <source-name>        # vendor extended operator
+  case 'v':
+    if (std::isdigit(look(1))) {
+      First += 2;
+      Node *SN = getDerived().parseSourceName(State);
+      if (SN == nullptr)
+        return nullptr;
+      return make<ConversionOperatorType>(SN);
+    }
+    return nullptr;
+  }
+  return nullptr;
+}
+
+// <ctor-dtor-name> ::= C1  # complete object constructor
+//                  ::= C2  # base object constructor
+//                  ::= C3  # complete object allocating constructor
+//   extension      ::= C5    # ?
+//                  ::= D0  # deleting destructor
+//                  ::= D1  # complete object destructor
+//                  ::= D2  # base object destructor
+//   extension      ::= D5    # ?
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
+                                                          NameState *State) {
+  if (SoFar->getKind() == Node::KSpecialSubstitution) {
+    auto SSK = static_cast<SpecialSubstitution *>(SoFar)->SSK;
+    switch (SSK) {
+    case SpecialSubKind::string:
+    case SpecialSubKind::istream:
+    case SpecialSubKind::ostream:
+    case SpecialSubKind::iostream:
+      SoFar = make<ExpandedSpecialSubstitution>(SSK);
+      if (!SoFar)
+        return nullptr;
+      break;
+    default:
+      break;
+    }
+  }
+
+  if (consumeIf('C')) {
+    bool IsInherited = consumeIf('I');
+    if (look() != '1' && look() != '2' && look() != '3' && look() != '5')
+      return nullptr;
+    int Variant = look() - '0';
+    ++First;
+    if (State) State->CtorDtorConversion = true;
+    if (IsInherited) {
+      if (getDerived().parseName(State) == nullptr)
+        return nullptr;
+    }
+    return make<CtorDtorName>(SoFar, false, Variant);
+  }
+
+  if (look() == 'D' &&
+      (look(1) == '0' || look(1) == '1' || look(1) == '2' || look(1) == '5')) {
+    int Variant = look(1) - '0';
+    First += 2;
+    if (State) State->CtorDtorConversion = true;
+    return make<CtorDtorName>(SoFar, true, Variant);
+  }
+
+  return nullptr;
+}
+
+// <nested-name> ::= N [<CV-Qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E
+//               ::= N [<CV-Qualifiers>] [<ref-qualifier>] <template-prefix> <template-args> E
+//
+// <prefix> ::= <prefix> <unqualified-name>
+//          ::= <template-prefix> <template-args>
+//          ::= <template-param>
+//          ::= <decltype>
+//          ::= # empty
+//          ::= <substitution>
+//          ::= <prefix> <data-member-prefix>
+//  extension ::= L
+//
+// <data-member-prefix> := <member source-name> [<template-args>] M
+//
+// <template-prefix> ::= <prefix> <template unqualified-name>
+//                   ::= <template-param>
+//                   ::= <substitution>
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseNestedName(NameState *State) {
+  if (!consumeIf('N'))
+    return nullptr;
+
+  Qualifiers CVTmp = parseCVQualifiers();
+  if (State) State->CVQualifiers = CVTmp;
+
+  if (consumeIf('O')) {
+    if (State) State->ReferenceQualifier = FrefQualRValue;
+  } else if (consumeIf('R')) {
+    if (State) State->ReferenceQualifier = FrefQualLValue;
+  } else
+    if (State) State->ReferenceQualifier = FrefQualNone;
+
+  Node *SoFar = nullptr;
+  auto PushComponent = [&](Node *Comp) {
+    if (!Comp) return false;
+    if (SoFar) SoFar = make<NestedName>(SoFar, Comp);
+    else       SoFar = Comp;
+    if (State) State->EndsWithTemplateArgs = false;
+    return SoFar != nullptr;
+  };
+
+  if (consumeIf("St")) {
+    SoFar = make<NameType>("std");
+    if (!SoFar)
+      return nullptr;
+  }
+
+  while (!consumeIf('E')) {
+    consumeIf('L'); // extension
+
+    // <data-member-prefix> := <member source-name> [<template-args>] M
+    if (consumeIf('M')) {
+      if (SoFar == nullptr)
+        return nullptr;
+      continue;
+    }
+
+    //          ::= <template-param>
+    if (look() == 'T') {
+      if (!PushComponent(getDerived().parseTemplateParam()))
+        return nullptr;
+      Subs.push_back(SoFar);
+      continue;
+    }
+
+    //          ::= <template-prefix> <template-args>
+    if (look() == 'I') {
+      Node *TA = getDerived().parseTemplateArgs(State != nullptr);
+      if (TA == nullptr || SoFar == nullptr)
+        return nullptr;
+      SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+      if (!SoFar)
+        return nullptr;
+      if (State) State->EndsWithTemplateArgs = true;
+      Subs.push_back(SoFar);
+      continue;
+    }
+
+    //          ::= <decltype>
+    if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) {
+      if (!PushComponent(getDerived().parseDecltype()))
+        return nullptr;
+      Subs.push_back(SoFar);
+      continue;
+    }
+
+    //          ::= <substitution>
+    if (look() == 'S' && look(1) != 't') {
+      Node *S = getDerived().parseSubstitution();
+      if (!PushComponent(S))
+        return nullptr;
+      if (SoFar != S)
+        Subs.push_back(S);
+      continue;
+    }
+
+    // Parse an <unqualified-name> thats actually a <ctor-dtor-name>.
+    if (look() == 'C' || (look() == 'D' && look(1) != 'C')) {
+      if (SoFar == nullptr)
+        return nullptr;
+      if (!PushComponent(getDerived().parseCtorDtorName(SoFar, State)))
+        return nullptr;
+      SoFar = getDerived().parseAbiTags(SoFar);
+      if (SoFar == nullptr)
+        return nullptr;
+      Subs.push_back(SoFar);
+      continue;
+    }
+
+    //          ::= <prefix> <unqualified-name>
+    if (!PushComponent(getDerived().parseUnqualifiedName(State)))
+      return nullptr;
+    Subs.push_back(SoFar);
+  }
+
+  if (SoFar == nullptr || Subs.empty())
+    return nullptr;
+
+  Subs.pop_back();
+  return SoFar;
+}
+
+// <simple-id> ::= <source-name> [ <template-args> ]
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSimpleId() {
+  Node *SN = getDerived().parseSourceName(/*NameState=*/nullptr);
+  if (SN == nullptr)
+    return nullptr;
+  if (look() == 'I') {
+    Node *TA = getDerived().parseTemplateArgs();
+    if (TA == nullptr)
+      return nullptr;
+    return make<NameWithTemplateArgs>(SN, TA);
+  }
+  return SN;
+}
+
+// <destructor-name> ::= <unresolved-type>  # e.g., ~T or ~decltype(f())
+//                   ::= <simple-id>        # e.g., ~A<2*N>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseDestructorName() {
+  Node *Result;
+  if (std::isdigit(look()))
+    Result = getDerived().parseSimpleId();
+  else
+    Result = getDerived().parseUnresolvedType();
+  if (Result == nullptr)
+    return nullptr;
+  return make<DtorName>(Result);
+}
+
+// <unresolved-type> ::= <template-param>
+//                   ::= <decltype>
+//                   ::= <substitution>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedType() {
+  if (look() == 'T') {
+    Node *TP = getDerived().parseTemplateParam();
+    if (TP == nullptr)
+      return nullptr;
+    Subs.push_back(TP);
+    return TP;
+  }
+  if (look() == 'D') {
+    Node *DT = getDerived().parseDecltype();
+    if (DT == nullptr)
+      return nullptr;
+    Subs.push_back(DT);
+    return DT;
+  }
+  return getDerived().parseSubstitution();
+}
+
+// <base-unresolved-name> ::= <simple-id>                                # unresolved name
+//          extension     ::= <operator-name>                            # unresolved operator-function-id
+//          extension     ::= <operator-name> <template-args>            # unresolved operator template-id
+//                        ::= on <operator-name>                         # unresolved operator-function-id
+//                        ::= on <operator-name> <template-args>         # unresolved operator template-id
+//                        ::= dn <destructor-name>                       # destructor or pseudo-destructor;
+//                                                                         # e.g. ~X or ~X<N-1>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseBaseUnresolvedName() {
+  if (std::isdigit(look()))
+    return getDerived().parseSimpleId();
+
+  if (consumeIf("dn"))
+    return getDerived().parseDestructorName();
+
+  consumeIf("on");
+
+  Node *Oper = getDerived().parseOperatorName(/*NameState=*/nullptr);
+  if (Oper == nullptr)
+    return nullptr;
+  if (look() == 'I') {
+    Node *TA = getDerived().parseTemplateArgs();
+    if (TA == nullptr)
+      return nullptr;
+    return make<NameWithTemplateArgs>(Oper, TA);
+  }
+  return Oper;
+}
+
+// <unresolved-name>
+//  extension        ::= srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
+//                   ::= [gs] <base-unresolved-name>                     # x or (with "gs") ::x
+//                   ::= [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
+//                                                                       # A::x, N::y, A<T>::z; "gs" means leading "::"
+//                   ::= sr <unresolved-type> <base-unresolved-name>     # T::x / decltype(p)::x
+//  extension        ::= sr <unresolved-type> <template-args> <base-unresolved-name>
+//                                                                       # T::N::x /decltype(p)::N::x
+//  (ignored)        ::= srN <unresolved-type>  <unresolved-qualifier-level>+ E <base-unresolved-name>
+//
+// <unresolved-qualifier-level> ::= <simple-id>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedName() {
+  Node *SoFar = nullptr;
+
+  // srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
+  // srN <unresolved-type>                   <unresolved-qualifier-level>+ E <base-unresolved-name>
+  if (consumeIf("srN")) {
+    SoFar = getDerived().parseUnresolvedType();
+    if (SoFar == nullptr)
+      return nullptr;
+
+    if (look() == 'I') {
+      Node *TA = getDerived().parseTemplateArgs();
+      if (TA == nullptr)
+        return nullptr;
+      SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+      if (!SoFar)
+        return nullptr;
+    }
+
+    while (!consumeIf('E')) {
+      Node *Qual = getDerived().parseSimpleId();
+      if (Qual == nullptr)
+        return nullptr;
+      SoFar = make<QualifiedName>(SoFar, Qual);
+      if (!SoFar)
+        return nullptr;
+    }
+
+    Node *Base = getDerived().parseBaseUnresolvedName();
+    if (Base == nullptr)
+      return nullptr;
+    return make<QualifiedName>(SoFar, Base);
+  }
+
+  bool Global = consumeIf("gs");
+
+  // [gs] <base-unresolved-name>                     # x or (with "gs") ::x
+  if (!consumeIf("sr")) {
+    SoFar = getDerived().parseBaseUnresolvedName();
+    if (SoFar == nullptr)
+      return nullptr;
+    if (Global)
+      SoFar = make<GlobalQualifiedName>(SoFar);
+    return SoFar;
+  }
+
+  // [gs] sr <unresolved-qualifier-level>+ E   <base-unresolved-name>
+  if (std::isdigit(look())) {
+    do {
+      Node *Qual = getDerived().parseSimpleId();
+      if (Qual == nullptr)
+        return nullptr;
+      if (SoFar)
+        SoFar = make<QualifiedName>(SoFar, Qual);
+      else if (Global)
+        SoFar = make<GlobalQualifiedName>(Qual);
+      else
+        SoFar = Qual;
+      if (!SoFar)
+        return nullptr;
+    } while (!consumeIf('E'));
+  }
+  //      sr <unresolved-type>                 <base-unresolved-name>
+  //      sr <unresolved-type> <template-args> <base-unresolved-name>
+  else {
+    SoFar = getDerived().parseUnresolvedType();
+    if (SoFar == nullptr)
+      return nullptr;
+
+    if (look() == 'I') {
+      Node *TA = getDerived().parseTemplateArgs();
+      if (TA == nullptr)
+        return nullptr;
+      SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+      if (!SoFar)
+        return nullptr;
+    }
+  }
+
+  assert(SoFar != nullptr);
+
+  Node *Base = getDerived().parseBaseUnresolvedName();
+  if (Base == nullptr)
+    return nullptr;
+  return make<QualifiedName>(SoFar, Base);
+}
+
+// <abi-tags> ::= <abi-tag> [<abi-tags>]
+// <abi-tag> ::= B <source-name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseAbiTags(Node *N) {
+  while (consumeIf('B')) {
+    StringView SN = parseBareSourceName();
+    if (SN.empty())
+      return nullptr;
+    N = make<AbiTagAttr>(N, SN);
+    if (!N)
+      return nullptr;
+  }
+  return N;
+}
+
+// <number> ::= [n] <non-negative decimal integer>
+template <typename Alloc, typename Derived>
+StringView
+AbstractManglingParser<Alloc, Derived>::parseNumber(bool AllowNegative) {
+  const char *Tmp = First;
+  if (AllowNegative)
+    consumeIf('n');
+  if (numLeft() == 0 || !std::isdigit(*First))
+    return StringView();
+  while (numLeft() != 0 && std::isdigit(*First))
+    ++First;
+  return StringView(Tmp, First);
+}
+
+// <positive length number> ::= [0-9]*
+template <typename Alloc, typename Derived>
+bool AbstractManglingParser<Alloc, Derived>::parsePositiveInteger(size_t *Out) {
+  *Out = 0;
+  if (look() < '0' || look() > '9')
+    return true;
+  while (look() >= '0' && look() <= '9') {
+    *Out *= 10;
+    *Out += static_cast<size_t>(consume() - '0');
+  }
+  return false;
+}
+
+template <typename Alloc, typename Derived>
+StringView AbstractManglingParser<Alloc, Derived>::parseBareSourceName() {
+  size_t Int = 0;
+  if (parsePositiveInteger(&Int) || numLeft() < Int)
+    return StringView();
+  StringView R(First, First + Int);
+  First += Int;
+  return R;
+}
+
+// <function-type> ::= [<CV-qualifiers>] [<exception-spec>] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
+//
+// <exception-spec> ::= Do                # non-throwing exception-specification (e.g., noexcept, throw())
+//                  ::= DO <expression> E # computed (instantiation-dependent) noexcept
+//                  ::= Dw <type>+ E      # dynamic exception specification with instantiation-dependent types
+//
+// <ref-qualifier> ::= R                   # & ref-qualifier
+// <ref-qualifier> ::= O                   # && ref-qualifier
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseFunctionType() {
+  Qualifiers CVQuals = parseCVQualifiers();
+
+  Node *ExceptionSpec = nullptr;
+  if (consumeIf("Do")) {
+    ExceptionSpec = make<NameType>("noexcept");
+    if (!ExceptionSpec)
+      return nullptr;
+  } else if (consumeIf("DO")) {
+    Node *E = getDerived().parseExpr();
+    if (E == nullptr || !consumeIf('E'))
+      return nullptr;
+    ExceptionSpec = make<NoexceptSpec>(E);
+    if (!ExceptionSpec)
+      return nullptr;
+  } else if (consumeIf("Dw")) {
+    size_t SpecsBegin = Names.size();
+    while (!consumeIf('E')) {
+      Node *T = getDerived().parseType();
+      if (T == nullptr)
+        return nullptr;
+      Names.push_back(T);
+    }
+    ExceptionSpec =
+      make<DynamicExceptionSpec>(popTrailingNodeArray(SpecsBegin));
+    if (!ExceptionSpec)
+      return nullptr;
+  }
+
+  consumeIf("Dx"); // transaction safe
+
+  if (!consumeIf('F'))
+    return nullptr;
+  consumeIf('Y'); // extern "C"
+  Node *ReturnType = getDerived().parseType();
+  if (ReturnType == nullptr)
+    return nullptr;
+
+  FunctionRefQual ReferenceQualifier = FrefQualNone;
+  size_t ParamsBegin = Names.size();
+  while (true) {
+    if (consumeIf('E'))
+      break;
+    if (consumeIf('v'))
+      continue;
+    if (consumeIf("RE")) {
+      ReferenceQualifier = FrefQualLValue;
+      break;
+    }
+    if (consumeIf("OE")) {
+      ReferenceQualifier = FrefQualRValue;
+      break;
+    }
+    Node *T = getDerived().parseType();
+    if (T == nullptr)
+      return nullptr;
+    Names.push_back(T);
+  }
+
+  NodeArray Params = popTrailingNodeArray(ParamsBegin);
+  return make<FunctionType>(ReturnType, Params, CVQuals,
+                            ReferenceQualifier, ExceptionSpec);
+}
+
+// extension:
+// <vector-type>           ::= Dv <positive dimension number> _ <extended element type>
+//                         ::= Dv [<dimension expression>] _ <element type>
+// <extended element type> ::= <element type>
+//                         ::= p # AltiVec vector pixel
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseVectorType() {
+  if (!consumeIf("Dv"))
+    return nullptr;
+  if (look() >= '1' && look() <= '9') {
+    StringView DimensionNumber = parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+    if (consumeIf('p'))
+      return make<PixelVectorType>(DimensionNumber);
+    Node *ElemType = getDerived().parseType();
+    if (ElemType == nullptr)
+      return nullptr;
+    return make<VectorType>(ElemType, DimensionNumber);
+  }
+
+  if (!consumeIf('_')) {
+    Node *DimExpr = getDerived().parseExpr();
+    if (!DimExpr)
+      return nullptr;
+    if (!consumeIf('_'))
+      return nullptr;
+    Node *ElemType = getDerived().parseType();
+    if (!ElemType)
+      return nullptr;
+    return make<VectorType>(ElemType, DimExpr);
+  }
+  Node *ElemType = getDerived().parseType();
+  if (!ElemType)
+    return nullptr;
+  return make<VectorType>(ElemType, StringView());
+}
+
+// <decltype>  ::= Dt <expression> E  # decltype of an id-expression or class member access (C++0x)
+//             ::= DT <expression> E  # decltype of an expression (C++0x)
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseDecltype() {
+  if (!consumeIf('D'))
+    return nullptr;
+  if (!consumeIf('t') && !consumeIf('T'))
+    return nullptr;
+  Node *E = getDerived().parseExpr();
+  if (E == nullptr)
+    return nullptr;
+  if (!consumeIf('E'))
+    return nullptr;
+  return make<EnclosingExpr>("decltype(", E, ")");
+}
+
+// <array-type> ::= A <positive dimension number> _ <element type>
+//              ::= A [<dimension expression>] _ <element type>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseArrayType() {
+  if (!consumeIf('A'))
+    return nullptr;
+
+  NodeOrString Dimension;
+
+  if (std::isdigit(look())) {
+    Dimension = parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+  } else if (!consumeIf('_')) {
+    Node *DimExpr = getDerived().parseExpr();
+    if (DimExpr == nullptr)
+      return nullptr;
+    if (!consumeIf('_'))
+      return nullptr;
+    Dimension = DimExpr;
+  }
+
+  Node *Ty = getDerived().parseType();
+  if (Ty == nullptr)
+    return nullptr;
+  return make<ArrayType>(Ty, Dimension);
+}
+
+// <pointer-to-member-type> ::= M <class type> <member type>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberType() {
+  if (!consumeIf('M'))
+    return nullptr;
+  Node *ClassType = getDerived().parseType();
+  if (ClassType == nullptr)
+    return nullptr;
+  Node *MemberType = getDerived().parseType();
+  if (MemberType == nullptr)
+    return nullptr;
+  return make<PointerToMemberType>(ClassType, MemberType);
+}
+
+// <class-enum-type> ::= <name>     # non-dependent type name, dependent type name, or dependent typename-specifier
+//                   ::= Ts <name>  # dependent elaborated type specifier using 'struct' or 'class'
+//                   ::= Tu <name>  # dependent elaborated type specifier using 'union'
+//                   ::= Te <name>  # dependent elaborated type specifier using 'enum'
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseClassEnumType() {
+  StringView ElabSpef;
+  if (consumeIf("Ts"))
+    ElabSpef = "struct";
+  else if (consumeIf("Tu"))
+    ElabSpef = "union";
+  else if (consumeIf("Te"))
+    ElabSpef = "enum";
+
+  Node *Name = getDerived().parseName();
+  if (Name == nullptr)
+    return nullptr;
+
+  if (!ElabSpef.empty())
+    return make<ElaboratedTypeSpefType>(ElabSpef, Name);
+
+  return Name;
+}
+
+// <qualified-type>     ::= <qualifiers> <type>
+// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
+// <extended-qualifier> ::= U <source-name> [<template-args>] # vendor extended type qualifier
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseQualifiedType() {
+  if (consumeIf('U')) {
+    StringView Qual = parseBareSourceName();
+    if (Qual.empty())
+      return nullptr;
+
+    // FIXME parse the optional <template-args> here!
+
+    // extension            ::= U <objc-name> <objc-type>  # objc-type<identifier>
+    if (Qual.startsWith("objcproto")) {
+      StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto"));
+      StringView Proto;
+      {
+        SwapAndRestore<const char *> SaveFirst(First, ProtoSourceName.begin()),
+                                     SaveLast(Last, ProtoSourceName.end());
+        Proto = parseBareSourceName();
+      }
+      if (Proto.empty())
+        return nullptr;
+      Node *Child = getDerived().parseQualifiedType();
+      if (Child == nullptr)
+        return nullptr;
+      return make<ObjCProtoName>(Child, Proto);
+    }
+
+    Node *Child = getDerived().parseQualifiedType();
+    if (Child == nullptr)
+      return nullptr;
+    return make<VendorExtQualType>(Child, Qual);
+  }
+
+  Qualifiers Quals = parseCVQualifiers();
+  Node *Ty = getDerived().parseType();
+  if (Ty == nullptr)
+    return nullptr;
+  if (Quals != QualNone)
+    Ty = make<QualType>(Ty, Quals);
+  return Ty;
+}
+
+// <type>      ::= <builtin-type>
+//             ::= <qualified-type>
+//             ::= <function-type>
+//             ::= <class-enum-type>
+//             ::= <array-type>
+//             ::= <pointer-to-member-type>
+//             ::= <template-param>
+//             ::= <template-template-param> <template-args>
+//             ::= <decltype>
+//             ::= P <type>        # pointer
+//             ::= R <type>        # l-value reference
+//             ::= O <type>        # r-value reference (C++11)
+//             ::= C <type>        # complex pair (C99)
+//             ::= G <type>        # imaginary (C99)
+//             ::= <substitution>  # See Compression below
+// extension   ::= U <objc-name> <objc-type>  # objc-type<identifier>
+// extension   ::= <vector-type> # <vector-type> starts with Dv
+//
+// <objc-name> ::= <k0 number> objcproto <k1 number> <identifier>  # k0 = 9 + <number of digits in k1> + k1
+// <objc-type> ::= <source-name>  # PU<11+>objcproto 11objc_object<source-name> 11objc_object -> id<source-name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseType() {
+  Node *Result = nullptr;
+
+  switch (look()) {
+  //             ::= <qualified-type>
+  case 'r':
+  case 'V':
+  case 'K': {
+    unsigned AfterQuals = 0;
+    if (look(AfterQuals) == 'r') ++AfterQuals;
+    if (look(AfterQuals) == 'V') ++AfterQuals;
+    if (look(AfterQuals) == 'K') ++AfterQuals;
+
+    if (look(AfterQuals) == 'F' ||
+        (look(AfterQuals) == 'D' &&
+         (look(AfterQuals + 1) == 'o' || look(AfterQuals + 1) == 'O' ||
+          look(AfterQuals + 1) == 'w' || look(AfterQuals + 1) == 'x'))) {
+      Result = getDerived().parseFunctionType();
+      break;
+    }
+    LLVM_FALLTHROUGH;
+  }
+  case 'U': {
+    Result = getDerived().parseQualifiedType();
+    break;
+  }
+  // <builtin-type> ::= v    # void
+  case 'v':
+    ++First;
+    return make<NameType>("void");
+  //                ::= w    # wchar_t
+  case 'w':
+    ++First;
+    return make<NameType>("wchar_t");
+  //                ::= b    # bool
+  case 'b':
+    ++First;
+    return make<NameType>("bool");
+  //                ::= c    # char
+  case 'c':
+    ++First;
+    return make<NameType>("char");
+  //                ::= a    # signed char
+  case 'a':
+    ++First;
+    return make<NameType>("signed char");
+  //                ::= h    # unsigned char
+  case 'h':
+    ++First;
+    return make<NameType>("unsigned char");
+  //                ::= s    # short
+  case 's':
+    ++First;
+    return make<NameType>("short");
+  //                ::= t    # unsigned short
+  case 't':
+    ++First;
+    return make<NameType>("unsigned short");
+  //                ::= i    # int
+  case 'i':
+    ++First;
+    return make<NameType>("int");
+  //                ::= j    # unsigned int
+  case 'j':
+    ++First;
+    return make<NameType>("unsigned int");
+  //                ::= l    # long
+  case 'l':
+    ++First;
+    return make<NameType>("long");
+  //                ::= m    # unsigned long
+  case 'm':
+    ++First;
+    return make<NameType>("unsigned long");
+  //                ::= x    # long long, __int64
+  case 'x':
+    ++First;
+    return make<NameType>("long long");
+  //                ::= y    # unsigned long long, __int64
+  case 'y':
+    ++First;
+    return make<NameType>("unsigned long long");
+  //                ::= n    # __int128
+  case 'n':
+    ++First;
+    return make<NameType>("__int128");
+  //                ::= o    # unsigned __int128
+  case 'o':
+    ++First;
+    return make<NameType>("unsigned __int128");
+  //                ::= f    # float
+  case 'f':
+    ++First;
+    return make<NameType>("float");
+  //                ::= d    # double
+  case 'd':
+    ++First;
+    return make<NameType>("double");
+  //                ::= e    # long double, __float80
+  case 'e':
+    ++First;
+    return make<NameType>("long double");
+  //                ::= g    # __float128
+  case 'g':
+    ++First;
+    return make<NameType>("__float128");
+  //                ::= z    # ellipsis
+  case 'z':
+    ++First;
+    return make<NameType>("...");
+
+  // <builtin-type> ::= u <source-name>    # vendor extended type
+  case 'u': {
+    ++First;
+    StringView Res = parseBareSourceName();
+    if (Res.empty())
+      return nullptr;
+    return make<NameType>(Res);
+  }
+  case 'D':
+    switch (look(1)) {
+    //                ::= Dd   # IEEE 754r decimal floating point (64 bits)
+    case 'd':
+      First += 2;
+      return make<NameType>("decimal64");
+    //                ::= De   # IEEE 754r decimal floating point (128 bits)
+    case 'e':
+      First += 2;
+      return make<NameType>("decimal128");
+    //                ::= Df   # IEEE 754r decimal floating point (32 bits)
+    case 'f':
+      First += 2;
+      return make<NameType>("decimal32");
+    //                ::= Dh   # IEEE 754r half-precision floating point (16 bits)
+    case 'h':
+      First += 2;
+      return make<NameType>("decimal16");
+    //                ::= Di   # char32_t
+    case 'i':
+      First += 2;
+      return make<NameType>("char32_t");
+    //                ::= Ds   # char16_t
+    case 's':
+      First += 2;
+      return make<NameType>("char16_t");
+    //                ::= Da   # auto (in dependent new-expressions)
+    case 'a':
+      First += 2;
+      return make<NameType>("auto");
+    //                ::= Dc   # decltype(auto)
+    case 'c':
+      First += 2;
+      return make<NameType>("decltype(auto)");
+    //                ::= Dn   # std::nullptr_t (i.e., decltype(nullptr))
+    case 'n':
+      First += 2;
+      return make<NameType>("std::nullptr_t");
+
+    //             ::= <decltype>
+    case 't':
+    case 'T': {
+      Result = getDerived().parseDecltype();
+      break;
+    }
+    // extension   ::= <vector-type> # <vector-type> starts with Dv
+    case 'v': {
+      Result = getDerived().parseVectorType();
+      break;
+    }
+    //           ::= Dp <type>       # pack expansion (C++0x)
+    case 'p': {
+      First += 2;
+      Node *Child = getDerived().parseType();
+      if (!Child)
+        return nullptr;
+      Result = make<ParameterPackExpansion>(Child);
+      break;
+    }
+    // Exception specifier on a function type.
+    case 'o':
+    case 'O':
+    case 'w':
+    // Transaction safe function type.
+    case 'x':
+      Result = getDerived().parseFunctionType();
+      break;
+    }
+    break;
+  //             ::= <function-type>
+  case 'F': {
+    Result = getDerived().parseFunctionType();
+    break;
+  }
+  //             ::= <array-type>
+  case 'A': {
+    Result = getDerived().parseArrayType();
+    break;
+  }
+  //             ::= <pointer-to-member-type>
+  case 'M': {
+    Result = getDerived().parsePointerToMemberType();
+    break;
+  }
+  //             ::= <template-param>
+  case 'T': {
+    // This could be an elaborate type specifier on a <class-enum-type>.
+    if (look(1) == 's' || look(1) == 'u' || look(1) == 'e') {
+      Result = getDerived().parseClassEnumType();
+      break;
+    }
+
+    Result = getDerived().parseTemplateParam();
+    if (Result == nullptr)
+      return nullptr;
+
+    // Result could be either of:
+    //   <type>        ::= <template-param>
+    //   <type>        ::= <template-template-param> <template-args>
+    //
+    //   <template-template-param> ::= <template-param>
+    //                             ::= <substitution>
+    //
+    // If this is followed by some <template-args>, and we're permitted to
+    // parse them, take the second production.
+
+    if (TryToParseTemplateArgs && look() == 'I') {
+      Node *TA = getDerived().parseTemplateArgs();
+      if (TA == nullptr)
+        return nullptr;
+      Result = make<NameWithTemplateArgs>(Result, TA);
+    }
+    break;
+  }
+  //             ::= P <type>        # pointer
+  case 'P': {
+    ++First;
+    Node *Ptr = getDerived().parseType();
+    if (Ptr == nullptr)
+      return nullptr;
+    Result = make<PointerType>(Ptr);
+    break;
+  }
+  //             ::= R <type>        # l-value reference
+  case 'R': {
+    ++First;
+    Node *Ref = getDerived().parseType();
+    if (Ref == nullptr)
+      return nullptr;
+    Result = make<ReferenceType>(Ref, ReferenceKind::LValue);
+    break;
+  }
+  //             ::= O <type>        # r-value reference (C++11)
+  case 'O': {
+    ++First;
+    Node *Ref = getDerived().parseType();
+    if (Ref == nullptr)
+      return nullptr;
+    Result = make<ReferenceType>(Ref, ReferenceKind::RValue);
+    break;
+  }
+  //             ::= C <type>        # complex pair (C99)
+  case 'C': {
+    ++First;
+    Node *P = getDerived().parseType();
+    if (P == nullptr)
+      return nullptr;
+    Result = make<PostfixQualifiedType>(P, " complex");
+    break;
+  }
+  //             ::= G <type>        # imaginary (C99)
+  case 'G': {
+    ++First;
+    Node *P = getDerived().parseType();
+    if (P == nullptr)
+      return P;
+    Result = make<PostfixQualifiedType>(P, " imaginary");
+    break;
+  }
+  //             ::= <substitution>  # See Compression below
+  case 'S': {
+    if (look(1) && look(1) != 't') {
+      Node *Sub = getDerived().parseSubstitution();
+      if (Sub == nullptr)
+        return nullptr;
+
+      // Sub could be either of:
+      //   <type>        ::= <substitution>
+      //   <type>        ::= <template-template-param> <template-args>
+      //
+      //   <template-template-param> ::= <template-param>
+      //                             ::= <substitution>
+      //
+      // If this is followed by some <template-args>, and we're permitted to
+      // parse them, take the second production.
+
+      if (TryToParseTemplateArgs && look() == 'I') {
+        Node *TA = getDerived().parseTemplateArgs();
+        if (TA == nullptr)
+          return nullptr;
+        Result = make<NameWithTemplateArgs>(Sub, TA);
+        break;
+      }
+
+      // If all we parsed was a substitution, don't re-insert into the
+      // substitution table.
+      return Sub;
+    }
+    LLVM_FALLTHROUGH;
+  }
+  //        ::= <class-enum-type>
+  default: {
+    Result = getDerived().parseClassEnumType();
+    break;
+  }
+  }
+
+  // If we parsed a type, insert it into the substitution table. Note that all
+  // <builtin-type>s and <substitution>s have already bailed out, because they
+  // don't get substitutions.
+  if (Result != nullptr)
+    Subs.push_back(Result);
+  return Result;
+}
+
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(StringView Kind) {
+  Node *E = getDerived().parseExpr();
+  if (E == nullptr)
+    return nullptr;
+  return make<PrefixExpr>(Kind, E);
+}
+
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(StringView Kind) {
+  Node *LHS = getDerived().parseExpr();
+  if (LHS == nullptr)
+    return nullptr;
+  Node *RHS = getDerived().parseExpr();
+  if (RHS == nullptr)
+    return nullptr;
+  return make<BinaryExpr>(LHS, Kind, RHS);
+}
+
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseIntegerLiteral(StringView Lit) {
+  StringView Tmp = parseNumber(true);
+  if (!Tmp.empty() && consumeIf('E'))
+    return make<IntegerLiteral>(Lit, Tmp);
+  return nullptr;
+}
+
+// <CV-Qualifiers> ::= [r] [V] [K]
+template <typename Alloc, typename Derived>
+Qualifiers AbstractManglingParser<Alloc, Derived>::parseCVQualifiers() {
+  Qualifiers CVR = QualNone;
+  if (consumeIf('r'))
+    CVR |= QualRestrict;
+  if (consumeIf('V'))
+    CVR |= QualVolatile;
+  if (consumeIf('K'))
+    CVR |= QualConst;
+  return CVR;
+}
+
+// <function-param> ::= fp <top-level CV-Qualifiers> _                                     # L == 0, first parameter
+//                  ::= fp <top-level CV-Qualifiers> <parameter-2 non-negative number> _   # L == 0, second and later parameters
+//                  ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> _         # L > 0, first parameter
+//                  ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> <parameter-2 non-negative number> _   # L > 0, second and later parameters
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseFunctionParam() {
+  if (consumeIf("fp")) {
+    parseCVQualifiers();
+    StringView Num = parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+    return make<FunctionParam>(Num);
+  }
+  if (consumeIf("fL")) {
+    if (parseNumber().empty())
+      return nullptr;
+    if (!consumeIf('p'))
+      return nullptr;
+    parseCVQualifiers();
+    StringView Num = parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+    return make<FunctionParam>(Num);
+  }
+  return nullptr;
+}
+
+// [gs] nw <expression>* _ <type> E                     # new (expr-list) type
+// [gs] nw <expression>* _ <type> <initializer>         # new (expr-list) type (init)
+// [gs] na <expression>* _ <type> E                     # new[] (expr-list) type
+// [gs] na <expression>* _ <type> <initializer>         # new[] (expr-list) type (init)
+// <initializer> ::= pi <expression>* E                 # parenthesized initialization
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseNewExpr() {
+  bool Global = consumeIf("gs");
+  bool IsArray = look(1) == 'a';
+  if (!consumeIf("nw") && !consumeIf("na"))
+    return nullptr;
+  size_t Exprs = Names.size();
+  while (!consumeIf('_')) {
+    Node *Ex = getDerived().parseExpr();
+    if (Ex == nullptr)
+      return nullptr;
+    Names.push_back(Ex);
+  }
+  NodeArray ExprList = popTrailingNodeArray(Exprs);
+  Node *Ty = getDerived().parseType();
+  if (Ty == nullptr)
+    return Ty;
+  if (consumeIf("pi")) {
+    size_t InitsBegin = Names.size();
+    while (!consumeIf('E')) {
+      Node *Init = getDerived().parseExpr();
+      if (Init == nullptr)
+        return Init;
+      Names.push_back(Init);
+    }
+    NodeArray Inits = popTrailingNodeArray(InitsBegin);
+    return make<NewExpr>(ExprList, Ty, Inits, Global, IsArray);
+  } else if (!consumeIf('E'))
+    return nullptr;
+  return make<NewExpr>(ExprList, Ty, NodeArray(), Global, IsArray);
+}
+
+// cv <type> <expression>                               # conversion with one argument
+// cv <type> _ <expression>* E                          # conversion with a different number of arguments
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseConversionExpr() {
+  if (!consumeIf("cv"))
+    return nullptr;
+  Node *Ty;
+  {
+    SwapAndRestore<bool> SaveTemp(TryToParseTemplateArgs, false);
+    Ty = getDerived().parseType();
+  }
+
+  if (Ty == nullptr)
+    return nullptr;
+
+  if (consumeIf('_')) {
+    size_t ExprsBegin = Names.size();
+    while (!consumeIf('E')) {
+      Node *E = getDerived().parseExpr();
+      if (E == nullptr)
+        return E;
+      Names.push_back(E);
+    }
+    NodeArray Exprs = popTrailingNodeArray(ExprsBegin);
+    return make<ConversionExpr>(Ty, Exprs);
+  }
+
+  Node *E[1] = {getDerived().parseExpr()};
+  if (E[0] == nullptr)
+    return nullptr;
+  return make<ConversionExpr>(Ty, makeNodeArray(E, E + 1));
+}
+
+// <expr-primary> ::= L <type> <value number> E                          # integer literal
+//                ::= L <type> <value float> E                           # floating literal
+//                ::= L <string type> E                                  # string literal
+//                ::= L <nullptr type> E                                 # nullptr literal (i.e., "LDnE")
+// FIXME:         ::= L <type> <real-part float> _ <imag-part float> E   # complex floating point literal (C 2000)
+//                ::= L <mangled-name> E                                 # external name
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseExprPrimary() {
+  if (!consumeIf('L'))
+    return nullptr;
+  switch (look()) {
+  case 'w':
+    ++First;
+    return getDerived().parseIntegerLiteral("wchar_t");
+  case 'b':
+    if (consumeIf("b0E"))
+      return make<BoolExpr>(0);
+    if (consumeIf("b1E"))
+      return make<BoolExpr>(1);
+    return nullptr;
+  case 'c':
+    ++First;
+    return getDerived().parseIntegerLiteral("char");
+  case 'a':
+    ++First;
+    return getDerived().parseIntegerLiteral("signed char");
+  case 'h':
+    ++First;
+    return getDerived().parseIntegerLiteral("unsigned char");
+  case 's':
+    ++First;
+    return getDerived().parseIntegerLiteral("short");
+  case 't':
+    ++First;
+    return getDerived().parseIntegerLiteral("unsigned short");
+  case 'i':
+    ++First;
+    return getDerived().parseIntegerLiteral("");
+  case 'j':
+    ++First;
+    return getDerived().parseIntegerLiteral("u");
+  case 'l':
+    ++First;
+    return getDerived().parseIntegerLiteral("l");
+  case 'm':
+    ++First;
+    return getDerived().parseIntegerLiteral("ul");
+  case 'x':
+    ++First;
+    return getDerived().parseIntegerLiteral("ll");
+  case 'y':
+    ++First;
+    return getDerived().parseIntegerLiteral("ull");
+  case 'n':
+    ++First;
+    return getDerived().parseIntegerLiteral("__int128");
+  case 'o':
+    ++First;
+    return getDerived().parseIntegerLiteral("unsigned __int128");
+  case 'f':
+    ++First;
+    return getDerived().template parseFloatingLiteral<float>();
+  case 'd':
+    ++First;
+    return getDerived().template parseFloatingLiteral<double>();
+  case 'e':
+    ++First;
+    return getDerived().template parseFloatingLiteral<long double>();
+  case '_':
+    if (consumeIf("_Z")) {
+      Node *R = getDerived().parseEncoding();
+      if (R != nullptr && consumeIf('E'))
+        return R;
+    }
+    return nullptr;
+  case 'T':
+    // Invalid mangled name per
+    //   http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html
+    return nullptr;
+  default: {
+    // might be named type
+    Node *T = getDerived().parseType();
+    if (T == nullptr)
+      return nullptr;
+    StringView N = parseNumber();
+    if (!N.empty()) {
+      if (!consumeIf('E'))
+        return nullptr;
+      return make<IntegerCastExpr>(T, N);
+    }
+    if (consumeIf('E'))
+      return T;
+    return nullptr;
+  }
+  }
+}
+
+// <braced-expression> ::= <expression>
+//                     ::= di <field source-name> <braced-expression>    # .name = expr
+//                     ::= dx <index expression> <braced-expression>     # [expr] = expr
+//                     ::= dX <range begin expression> <range end expression> <braced-expression>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseBracedExpr() {
+  if (look() == 'd') {
+    switch (look(1)) {
+    case 'i': {
+      First += 2;
+      Node *Field = getDerived().parseSourceName(/*NameState=*/nullptr);
+      if (Field == nullptr)
+        return nullptr;
+      Node *Init = getDerived().parseBracedExpr();
+      if (Init == nullptr)
+        return nullptr;
+      return make<BracedExpr>(Field, Init, /*isArray=*/false);
+    }
+    case 'x': {
+      First += 2;
+      Node *Index = getDerived().parseExpr();
+      if (Index == nullptr)
+        return nullptr;
+      Node *Init = getDerived().parseBracedExpr();
+      if (Init == nullptr)
+        return nullptr;
+      return make<BracedExpr>(Index, Init, /*isArray=*/true);
+    }
+    case 'X': {
+      First += 2;
+      Node *RangeBegin = getDerived().parseExpr();
+      if (RangeBegin == nullptr)
+        return nullptr;
+      Node *RangeEnd = getDerived().parseExpr();
+      if (RangeEnd == nullptr)
+        return nullptr;
+      Node *Init = getDerived().parseBracedExpr();
+      if (Init == nullptr)
+        return nullptr;
+      return make<BracedRangeExpr>(RangeBegin, RangeEnd, Init);
+    }
+    }
+  }
+  return getDerived().parseExpr();
+}
+
+// (not yet in the spec)
+// <fold-expr> ::= fL <binary-operator-name> <expression> <expression>
+//             ::= fR <binary-operator-name> <expression> <expression>
+//             ::= fl <binary-operator-name> <expression>
+//             ::= fr <binary-operator-name> <expression>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
+  if (!consumeIf('f'))
+    return nullptr;
+
+  char FoldKind = look();
+  bool IsLeftFold, HasInitializer;
+  HasInitializer = FoldKind == 'L' || FoldKind == 'R';
+  if (FoldKind == 'l' || FoldKind == 'L')
+    IsLeftFold = true;
+  else if (FoldKind == 'r' || FoldKind == 'R')
+    IsLeftFold = false;
+  else
+    return nullptr;
+  ++First;
+
+  // FIXME: This map is duplicated in parseOperatorName and parseExpr.
+  StringView OperatorName;
+  if      (consumeIf("aa")) OperatorName = "&&";
+  else if (consumeIf("an")) OperatorName = "&";
+  else if (consumeIf("aN")) OperatorName = "&=";
+  else if (consumeIf("aS")) OperatorName = "=";
+  else if (consumeIf("cm")) OperatorName = ",";
+  else if (consumeIf("ds")) OperatorName = ".*";
+  else if (consumeIf("dv")) OperatorName = "/";
+  else if (consumeIf("dV")) OperatorName = "/=";
+  else if (consumeIf("eo")) OperatorName = "^";
+  else if (consumeIf("eO")) OperatorName = "^=";
+  else if (consumeIf("eq")) OperatorName = "==";
+  else if (consumeIf("ge")) OperatorName = ">=";
+  else if (consumeIf("gt")) OperatorName = ">";
+  else if (consumeIf("le")) OperatorName = "<=";
+  else if (consumeIf("ls")) OperatorName = "<<";
+  else if (consumeIf("lS")) OperatorName = "<<=";
+  else if (consumeIf("lt")) OperatorName = "<";
+  else if (consumeIf("mi")) OperatorName = "-";
+  else if (consumeIf("mI")) OperatorName = "-=";
+  else if (consumeIf("ml")) OperatorName = "*";
+  else if (consumeIf("mL")) OperatorName = "*=";
+  else if (consumeIf("ne")) OperatorName = "!=";
+  else if (consumeIf("oo")) OperatorName = "||";
+  else if (consumeIf("or")) OperatorName = "|";
+  else if (consumeIf("oR")) OperatorName = "|=";
+  else if (consumeIf("pl")) OperatorName = "+";
+  else if (consumeIf("pL")) OperatorName = "+=";
+  else if (consumeIf("rm")) OperatorName = "%";
+  else if (consumeIf("rM")) OperatorName = "%=";
+  else if (consumeIf("rs")) OperatorName = ">>";
+  else if (consumeIf("rS")) OperatorName = ">>=";
+  else return nullptr;
+
+  Node *Pack = getDerived().parseExpr(), *Init = nullptr;
+  if (Pack == nullptr)
+    return nullptr;
+  if (HasInitializer) {
+    Init = getDerived().parseExpr();
+    if (Init == nullptr)
+      return nullptr;
+  }
+
+  if (IsLeftFold && Init)
+    std::swap(Pack, Init);
+
+  return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init);
+}
+
+// <expression> ::= <unary operator-name> <expression>
+//              ::= <binary operator-name> <expression> <expression>
+//              ::= <ternary operator-name> <expression> <expression> <expression>
+//              ::= cl <expression>+ E                                   # call
+//              ::= cv <type> <expression>                               # conversion with one argument
+//              ::= cv <type> _ <expression>* E                          # conversion with a different number of arguments
+//              ::= [gs] nw <expression>* _ <type> E                     # new (expr-list) type
+//              ::= [gs] nw <expression>* _ <type> <initializer>         # new (expr-list) type (init)
+//              ::= [gs] na <expression>* _ <type> E                     # new[] (expr-list) type
+//              ::= [gs] na <expression>* _ <type> <initializer>         # new[] (expr-list) type (init)
+//              ::= [gs] dl <expression>                                 # delete expression
+//              ::= [gs] da <expression>                                 # delete[] expression
+//              ::= pp_ <expression>                                     # prefix ++
+//              ::= mm_ <expression>                                     # prefix --
+//              ::= ti <type>                                            # typeid (type)
+//              ::= te <expression>                                      # typeid (expression)
+//              ::= dc <type> <expression>                               # dynamic_cast<type> (expression)
+//              ::= sc <type> <expression>                               # static_cast<type> (expression)
+//              ::= cc <type> <expression>                               # const_cast<type> (expression)
+//              ::= rc <type> <expression>                               # reinterpret_cast<type> (expression)
+//              ::= st <type>                                            # sizeof (a type)
+//              ::= sz <expression>                                      # sizeof (an expression)
+//              ::= at <type>                                            # alignof (a type)
+//              ::= az <expression>                                      # alignof (an expression)
+//              ::= nx <expression>                                      # noexcept (expression)
+//              ::= <template-param>
+//              ::= <function-param>
+//              ::= dt <expression> <unresolved-name>                    # expr.name
+//              ::= pt <expression> <unresolved-name>                    # expr->name
+//              ::= ds <expression> <expression>                         # expr.*expr
+//              ::= sZ <template-param>                                  # size of a parameter pack
+//              ::= sZ <function-param>                                  # size of a function parameter pack
+//              ::= sP <template-arg>* E                                 # sizeof...(T), size of a captured template parameter pack from an alias template
+//              ::= sp <expression>                                      # pack expansion
+//              ::= tw <expression>                                      # throw expression
+//              ::= tr                                                   # throw with no operand (rethrow)
+//              ::= <unresolved-name>                                    # f(p), N::f(p), ::f(p),
+//                                                                       # freestanding dependent name (e.g., T::x),
+//                                                                       # objectless nonstatic member reference
+//              ::= fL <binary-operator-name> <expression> <expression>
+//              ::= fR <binary-operator-name> <expression> <expression>
+//              ::= fl <binary-operator-name> <expression>
+//              ::= fr <binary-operator-name> <expression>
+//              ::= <expr-primary>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
+  bool Global = consumeIf("gs");
+  if (numLeft() < 2)
+    return nullptr;
+
+  switch (*First) {
+  case 'L':
+    return getDerived().parseExprPrimary();
+  case 'T':
+    return getDerived().parseTemplateParam();
+  case 'f': {
+    // Disambiguate a fold expression from a <function-param>.
+    if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2))))
+      return getDerived().parseFunctionParam();
+    return getDerived().parseFoldExpr();
+  }
+  case 'a':
+    switch (First[1]) {
+    case 'a':
+      First += 2;
+      return getDerived().parseBinaryExpr("&&");
+    case 'd':
+      First += 2;
+      return getDerived().parsePrefixExpr("&");
+    case 'n':
+      First += 2;
+      return getDerived().parseBinaryExpr("&");
+    case 'N':
+      First += 2;
+      return getDerived().parseBinaryExpr("&=");
+    case 'S':
+      First += 2;
+      return getDerived().parseBinaryExpr("=");
+    case 't': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      return make<EnclosingExpr>("alignof (", Ty, ")");
+    }
+    case 'z': {
+      First += 2;
+      Node *Ty = getDerived().parseExpr();
+      if (Ty == nullptr)
+        return nullptr;
+      return make<EnclosingExpr>("alignof (", Ty, ")");
+    }
+    }
+    return nullptr;
+  case 'c':
+    switch (First[1]) {
+    // cc <type> <expression>                               # const_cast<type>(expression)
+    case 'c': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return Ty;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<CastExpr>("const_cast", Ty, Ex);
+    }
+    // cl <expression>+ E                                   # call
+    case 'l': {
+      First += 2;
+      Node *Callee = getDerived().parseExpr();
+      if (Callee == nullptr)
+        return Callee;
+      size_t ExprsBegin = Names.size();
+      while (!consumeIf('E')) {
+        Node *E = getDerived().parseExpr();
+        if (E == nullptr)
+          return E;
+        Names.push_back(E);
+      }
+      return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin));
+    }
+    case 'm':
+      First += 2;
+      return getDerived().parseBinaryExpr(",");
+    case 'o':
+      First += 2;
+      return getDerived().parsePrefixExpr("~");
+    case 'v':
+      return getDerived().parseConversionExpr();
+    }
+    return nullptr;
+  case 'd':
+    switch (First[1]) {
+    case 'a': {
+      First += 2;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<DeleteExpr>(Ex, Global, /*is_array=*/true);
+    }
+    case 'c': {
+      First += 2;
+      Node *T = getDerived().parseType();
+      if (T == nullptr)
+        return T;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<CastExpr>("dynamic_cast", T, Ex);
+    }
+    case 'e':
+      First += 2;
+      return getDerived().parsePrefixExpr("*");
+    case 'l': {
+      First += 2;
+      Node *E = getDerived().parseExpr();
+      if (E == nullptr)
+        return E;
+      return make<DeleteExpr>(E, Global, /*is_array=*/false);
+    }
+    case 'n':
+      return getDerived().parseUnresolvedName();
+    case 's': {
+      First += 2;
+      Node *LHS = getDerived().parseExpr();
+      if (LHS == nullptr)
+        return nullptr;
+      Node *RHS = getDerived().parseExpr();
+      if (RHS == nullptr)
+        return nullptr;
+      return make<MemberExpr>(LHS, ".*", RHS);
+    }
+    case 't': {
+      First += 2;
+      Node *LHS = getDerived().parseExpr();
+      if (LHS == nullptr)
+        return LHS;
+      Node *RHS = getDerived().parseExpr();
+      if (RHS == nullptr)
+        return nullptr;
+      return make<MemberExpr>(LHS, ".", RHS);
+    }
+    case 'v':
+      First += 2;
+      return getDerived().parseBinaryExpr("/");
+    case 'V':
+      First += 2;
+      return getDerived().parseBinaryExpr("/=");
+    }
+    return nullptr;
+  case 'e':
+    switch (First[1]) {
+    case 'o':
+      First += 2;
+      return getDerived().parseBinaryExpr("^");
+    case 'O':
+      First += 2;
+      return getDerived().parseBinaryExpr("^=");
+    case 'q':
+      First += 2;
+      return getDerived().parseBinaryExpr("==");
+    }
+    return nullptr;
+  case 'g':
+    switch (First[1]) {
+    case 'e':
+      First += 2;
+      return getDerived().parseBinaryExpr(">=");
+    case 't':
+      First += 2;
+      return getDerived().parseBinaryExpr(">");
+    }
+    return nullptr;
+  case 'i':
+    switch (First[1]) {
+    case 'x': {
+      First += 2;
+      Node *Base = getDerived().parseExpr();
+      if (Base == nullptr)
+        return nullptr;
+      Node *Index = getDerived().parseExpr();
+      if (Index == nullptr)
+        return Index;
+      return make<ArraySubscriptExpr>(Base, Index);
+    }
+    case 'l': {
+      First += 2;
+      size_t InitsBegin = Names.size();
+      while (!consumeIf('E')) {
+        Node *E = getDerived().parseBracedExpr();
+        if (E == nullptr)
+          return nullptr;
+        Names.push_back(E);
+      }
+      return make<InitListExpr>(nullptr, popTrailingNodeArray(InitsBegin));
+    }
+    }
+    return nullptr;
+  case 'l':
+    switch (First[1]) {
+    case 'e':
+      First += 2;
+      return getDerived().parseBinaryExpr("<=");
+    case 's':
+      First += 2;
+      return getDerived().parseBinaryExpr("<<");
+    case 'S':
+      First += 2;
+      return getDerived().parseBinaryExpr("<<=");
+    case 't':
+      First += 2;
+      return getDerived().parseBinaryExpr("<");
+    }
+    return nullptr;
+  case 'm':
+    switch (First[1]) {
+    case 'i':
+      First += 2;
+      return getDerived().parseBinaryExpr("-");
+    case 'I':
+      First += 2;
+      return getDerived().parseBinaryExpr("-=");
+    case 'l':
+      First += 2;
+      return getDerived().parseBinaryExpr("*");
+    case 'L':
+      First += 2;
+      return getDerived().parseBinaryExpr("*=");
+    case 'm':
+      First += 2;
+      if (consumeIf('_'))
+        return getDerived().parsePrefixExpr("--");
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return nullptr;
+      return make<PostfixExpr>(Ex, "--");
+    }
+    return nullptr;
+  case 'n':
+    switch (First[1]) {
+    case 'a':
+    case 'w':
+      return getDerived().parseNewExpr();
+    case 'e':
+      First += 2;
+      return getDerived().parseBinaryExpr("!=");
+    case 'g':
+      First += 2;
+      return getDerived().parsePrefixExpr("-");
+    case 't':
+      First += 2;
+      return getDerived().parsePrefixExpr("!");
+    case 'x':
+      First += 2;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<EnclosingExpr>("noexcept (", Ex, ")");
+    }
+    return nullptr;
+  case 'o':
+    switch (First[1]) {
+    case 'n':
+      return getDerived().parseUnresolvedName();
+    case 'o':
+      First += 2;
+      return getDerived().parseBinaryExpr("||");
+    case 'r':
+      First += 2;
+      return getDerived().parseBinaryExpr("|");
+    case 'R':
+      First += 2;
+      return getDerived().parseBinaryExpr("|=");
+    }
+    return nullptr;
+  case 'p':
+    switch (First[1]) {
+    case 'm':
+      First += 2;
+      return getDerived().parseBinaryExpr("->*");
+    case 'l':
+      First += 2;
+      return getDerived().parseBinaryExpr("+");
+    case 'L':
+      First += 2;
+      return getDerived().parseBinaryExpr("+=");
+    case 'p': {
+      First += 2;
+      if (consumeIf('_'))
+        return getDerived().parsePrefixExpr("++");
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<PostfixExpr>(Ex, "++");
+    }
+    case 's':
+      First += 2;
+      return getDerived().parsePrefixExpr("+");
+    case 't': {
+      First += 2;
+      Node *L = getDerived().parseExpr();
+      if (L == nullptr)
+        return nullptr;
+      Node *R = getDerived().parseExpr();
+      if (R == nullptr)
+        return nullptr;
+      return make<MemberExpr>(L, "->", R);
+    }
+    }
+    return nullptr;
+  case 'q':
+    if (First[1] == 'u') {
+      First += 2;
+      Node *Cond = getDerived().parseExpr();
+      if (Cond == nullptr)
+        return nullptr;
+      Node *LHS = getDerived().parseExpr();
+      if (LHS == nullptr)
+        return nullptr;
+      Node *RHS = getDerived().parseExpr();
+      if (RHS == nullptr)
+        return nullptr;
+      return make<ConditionalExpr>(Cond, LHS, RHS);
+    }
+    return nullptr;
+  case 'r':
+    switch (First[1]) {
+    case 'c': {
+      First += 2;
+      Node *T = getDerived().parseType();
+      if (T == nullptr)
+        return T;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<CastExpr>("reinterpret_cast", T, Ex);
+    }
+    case 'm':
+      First += 2;
+      return getDerived().parseBinaryExpr("%");
+    case 'M':
+      First += 2;
+      return getDerived().parseBinaryExpr("%=");
+    case 's':
+      First += 2;
+      return getDerived().parseBinaryExpr(">>");
+    case 'S':
+      First += 2;
+      return getDerived().parseBinaryExpr(">>=");
+    }
+    return nullptr;
+  case 's':
+    switch (First[1]) {
+    case 'c': {
+      First += 2;
+      Node *T = getDerived().parseType();
+      if (T == nullptr)
+        return T;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<CastExpr>("static_cast", T, Ex);
+    }
+    case 'p': {
+      First += 2;
+      Node *Child = getDerived().parseExpr();
+      if (Child == nullptr)
+        return nullptr;
+      return make<ParameterPackExpansion>(Child);
+    }
+    case 'r':
+      return getDerived().parseUnresolvedName();
+    case 't': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return Ty;
+      return make<EnclosingExpr>("sizeof (", Ty, ")");
+    }
+    case 'z': {
+      First += 2;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<EnclosingExpr>("sizeof (", Ex, ")");
+    }
+    case 'Z':
+      First += 2;
+      if (look() == 'T') {
+        Node *R = getDerived().parseTemplateParam();
+        if (R == nullptr)
+          return nullptr;
+        return make<SizeofParamPackExpr>(R);
+      } else if (look() == 'f') {
+        Node *FP = getDerived().parseFunctionParam();
+        if (FP == nullptr)
+          return nullptr;
+        return make<EnclosingExpr>("sizeof... (", FP, ")");
+      }
+      return nullptr;
+    case 'P': {
+      First += 2;
+      size_t ArgsBegin = Names.size();
+      while (!consumeIf('E')) {
+        Node *Arg = getDerived().parseTemplateArg();
+        if (Arg == nullptr)
+          return nullptr;
+        Names.push_back(Arg);
+      }
+      auto *Pack = make<NodeArrayNode>(popTrailingNodeArray(ArgsBegin));
+      if (!Pack)
+        return nullptr;
+      return make<EnclosingExpr>("sizeof... (", Pack, ")");
+    }
+    }
+    return nullptr;
+  case 't':
+    switch (First[1]) {
+    case 'e': {
+      First += 2;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return Ex;
+      return make<EnclosingExpr>("typeid (", Ex, ")");
+    }
+    case 'i': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return Ty;
+      return make<EnclosingExpr>("typeid (", Ty, ")");
+    }
+    case 'l': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      size_t InitsBegin = Names.size();
+      while (!consumeIf('E')) {
+        Node *E = getDerived().parseBracedExpr();
+        if (E == nullptr)
+          return nullptr;
+        Names.push_back(E);
+      }
+      return make<InitListExpr>(Ty, popTrailingNodeArray(InitsBegin));
+    }
+    case 'r':
+      First += 2;
+      return make<NameType>("throw");
+    case 'w': {
+      First += 2;
+      Node *Ex = getDerived().parseExpr();
+      if (Ex == nullptr)
+        return nullptr;
+      return make<ThrowExpr>(Ex);
+    }
+    }
+    return nullptr;
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+    return getDerived().parseUnresolvedName();
+  }
+  return nullptr;
+}
+
+// <call-offset> ::= h <nv-offset> _
+//               ::= v <v-offset> _
+//
+// <nv-offset> ::= <offset number>
+//               # non-virtual base override
+//
+// <v-offset>  ::= <offset number> _ <virtual offset number>
+//               # virtual base override, with vcall offset
+template <typename Alloc, typename Derived>
+bool AbstractManglingParser<Alloc, Derived>::parseCallOffset() {
+  // Just scan through the call offset, we never add this information into the
+  // output.
+  if (consumeIf('h'))
+    return parseNumber(true).empty() || !consumeIf('_');
+  if (consumeIf('v'))
+    return parseNumber(true).empty() || !consumeIf('_') ||
+           parseNumber(true).empty() || !consumeIf('_');
+  return true;
+}
+
+// <special-name> ::= TV <type>    # virtual table
+//                ::= TT <type>    # VTT structure (construction vtable index)
+//                ::= TI <type>    # typeinfo structure
+//                ::= TS <type>    # typeinfo name (null-terminated byte string)
+//                ::= Tc <call-offset> <call-offset> <base encoding>
+//                    # base is the nominal target function of thunk
+//                    # first call-offset is 'this' adjustment
+//                    # second call-offset is result adjustment
+//                ::= T <call-offset> <base encoding>
+//                    # base is the nominal target function of thunk
+//                ::= GV <object name> # Guard variable for one-time initialization
+//                                     # No <type>
+//                ::= TW <object name> # Thread-local wrapper
+//                ::= TH <object name> # Thread-local initialization
+//                ::= GR <object name> _             # First temporary
+//                ::= GR <object name> <seq-id> _    # Subsequent temporaries
+//      extension ::= TC <first type> <number> _ <second type> # construction vtable for second-in-first
+//      extension ::= GR <object name> # reference temporary for object
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() {
+  switch (look()) {
+  case 'T':
+    switch (look(1)) {
+    // TV <type>    # virtual table
+    case 'V': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      return make<SpecialName>("vtable for ", Ty);
+    }
+    // TT <type>    # VTT structure (construction vtable index)
+    case 'T': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      return make<SpecialName>("VTT for ", Ty);
+    }
+    // TI <type>    # typeinfo structure
+    case 'I': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      return make<SpecialName>("typeinfo for ", Ty);
+    }
+    // TS <type>    # typeinfo name (null-terminated byte string)
+    case 'S': {
+      First += 2;
+      Node *Ty = getDerived().parseType();
+      if (Ty == nullptr)
+        return nullptr;
+      return make<SpecialName>("typeinfo name for ", Ty);
+    }
+    // Tc <call-offset> <call-offset> <base encoding>
+    case 'c': {
+      First += 2;
+      if (parseCallOffset() || parseCallOffset())
+        return nullptr;
+      Node *Encoding = getDerived().parseEncoding();
+      if (Encoding == nullptr)
+        return nullptr;
+      return make<SpecialName>("covariant return thunk to ", Encoding);
+    }
+    // extension ::= TC <first type> <number> _ <second type>
+    //               # construction vtable for second-in-first
+    case 'C': {
+      First += 2;
+      Node *FirstType = getDerived().parseType();
+      if (FirstType == nullptr)
+        return nullptr;
+      if (parseNumber(true).empty() || !consumeIf('_'))
+        return nullptr;
+      Node *SecondType = getDerived().parseType();
+      if (SecondType == nullptr)
+        return nullptr;
+      return make<CtorVtableSpecialName>(SecondType, FirstType);
+    }
+    // TW <object name> # Thread-local wrapper
+    case 'W': {
+      First += 2;
+      Node *Name = getDerived().parseName();
+      if (Name == nullptr)
+        return nullptr;
+      return make<SpecialName>("thread-local wrapper routine for ", Name);
+    }
+    // TH <object name> # Thread-local initialization
+    case 'H': {
+      First += 2;
+      Node *Name = getDerived().parseName();
+      if (Name == nullptr)
+        return nullptr;
+      return make<SpecialName>("thread-local initialization routine for ", Name);
+    }
+    // T <call-offset> <base encoding>
+    default: {
+      ++First;
+      bool IsVirt = look() == 'v';
+      if (parseCallOffset())
+        return nullptr;
+      Node *BaseEncoding = getDerived().parseEncoding();
+      if (BaseEncoding == nullptr)
+        return nullptr;
+      if (IsVirt)
+        return make<SpecialName>("virtual thunk to ", BaseEncoding);
+      else
+        return make<SpecialName>("non-virtual thunk to ", BaseEncoding);
+    }
+    }
+  case 'G':
+    switch (look(1)) {
+    // GV <object name> # Guard variable for one-time initialization
+    case 'V': {
+      First += 2;
+      Node *Name = getDerived().parseName();
+      if (Name == nullptr)
+        return nullptr;
+      return make<SpecialName>("guard variable for ", Name);
+    }
+    // GR <object name> # reference temporary for object
+    // GR <object name> _             # First temporary
+    // GR <object name> <seq-id> _    # Subsequent temporaries
+    case 'R': {
+      First += 2;
+      Node *Name = getDerived().parseName();
+      if (Name == nullptr)
+        return nullptr;
+      size_t Count;
+      bool ParsedSeqId = !parseSeqId(&Count);
+      if (!consumeIf('_') && ParsedSeqId)
+        return nullptr;
+      return make<SpecialName>("reference temporary for ", Name);
+    }
+    }
+  }
+  return nullptr;
+}
+
+// <encoding> ::= <function name> <bare-function-type>
+//            ::= <data name>
+//            ::= <special-name>
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() {
+  if (look() == 'G' || look() == 'T')
+    return getDerived().parseSpecialName();
+
+  auto IsEndOfEncoding = [&] {
+    // The set of chars that can potentially follow an <encoding> (none of which
+    // can start a <type>). Enumerating these allows us to avoid speculative
+    // parsing.
+    return numLeft() == 0 || look() == 'E' || look() == '.' || look() == '_';
+  };
+
+  NameState NameInfo(this);
+  Node *Name = getDerived().parseName(&NameInfo);
+  if (Name == nullptr)
+    return nullptr;
+
+  if (resolveForwardTemplateRefs(NameInfo))
+    return nullptr;
+
+  if (IsEndOfEncoding())
+    return Name;
+
+  Node *Attrs = nullptr;
+  if (consumeIf("Ua9enable_ifI")) {
+    size_t BeforeArgs = Names.size();
+    while (!consumeIf('E')) {
+      Node *Arg = getDerived().parseTemplateArg();
+      if (Arg == nullptr)
+        return nullptr;
+      Names.push_back(Arg);
+    }
+    Attrs = make<EnableIfAttr>(popTrailingNodeArray(BeforeArgs));
+    if (!Attrs)
+      return nullptr;
+  }
+
+  Node *ReturnType = nullptr;
+  if (!NameInfo.CtorDtorConversion && NameInfo.EndsWithTemplateArgs) {
+    ReturnType = getDerived().parseType();
+    if (ReturnType == nullptr)
+      return nullptr;
+  }
+
+  if (consumeIf('v'))
+    return make<FunctionEncoding>(ReturnType, Name, NodeArray(),
+                                  Attrs, NameInfo.CVQualifiers,
+                                  NameInfo.ReferenceQualifier);
+
+  size_t ParamsBegin = Names.size();
+  do {
+    Node *Ty = getDerived().parseType();
+    if (Ty == nullptr)
+      return nullptr;
+    Names.push_back(Ty);
+  } while (!IsEndOfEncoding());
+
+  return make<FunctionEncoding>(ReturnType, Name,
+                                popTrailingNodeArray(ParamsBegin),
+                                Attrs, NameInfo.CVQualifiers,
+                                NameInfo.ReferenceQualifier);
+}
+
+template <class Float>
+struct FloatData;
+
+template <>
+struct FloatData<float>
+{
+    static const size_t mangled_size = 8;
+    static const size_t max_demangled_size = 24;
+    static constexpr const char* spec = "%af";
+};
+
+template <>
+struct FloatData<double>
+{
+    static const size_t mangled_size = 16;
+    static const size_t max_demangled_size = 32;
+    static constexpr const char* spec = "%a";
+};
+
+template <>
+struct FloatData<long double>
+{
+#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \
+    defined(__wasm__)
+    static const size_t mangled_size = 32;
+#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__)
+    static const size_t mangled_size = 16;
+#else
+    static const size_t mangled_size = 20;  // May need to be adjusted to 16 or 24 on other platforms
+#endif
+    static const size_t max_demangled_size = 40;
+    static constexpr const char *spec = "%LaL";
+};
+
+template <typename Alloc, typename Derived>
+template <class Float>
+Node *AbstractManglingParser<Alloc, Derived>::parseFloatingLiteral() {
+  const size_t N = FloatData<Float>::mangled_size;
+  if (numLeft() <= N)
+    return nullptr;
+  StringView Data(First, First + N);
+  for (char C : Data)
+    if (!std::isxdigit(C))
+      return nullptr;
+  First += N;
+  if (!consumeIf('E'))
+    return nullptr;
+  return make<FloatLiteralImpl<Float>>(Data);
+}
+
+// <seq-id> ::= <0-9A-Z>+
+template <typename Alloc, typename Derived>
+bool AbstractManglingParser<Alloc, Derived>::parseSeqId(size_t *Out) {
+  if (!(look() >= '0' && look() <= '9') &&
+      !(look() >= 'A' && look() <= 'Z'))
+    return true;
+
+  size_t Id = 0;
+  while (true) {
+    if (look() >= '0' && look() <= '9') {
+      Id *= 36;
+      Id += static_cast<size_t>(look() - '0');
+    } else if (look() >= 'A' && look() <= 'Z') {
+      Id *= 36;
+      Id += static_cast<size_t>(look() - 'A') + 10;
+    } else {
+      *Out = Id;
+      return false;
+    }
+    ++First;
+  }
+}
+
+// <substitution> ::= S <seq-id> _
+//                ::= S_
+// <substitution> ::= Sa # ::std::allocator
+// <substitution> ::= Sb # ::std::basic_string
+// <substitution> ::= Ss # ::std::basic_string < char,
+//                                               ::std::char_traits<char>,
+//                                               ::std::allocator<char> >
+// <substitution> ::= Si # ::std::basic_istream<char,  std::char_traits<char> >
+// <substitution> ::= So # ::std::basic_ostream<char,  std::char_traits<char> >
+// <substitution> ::= Sd # ::std::basic_iostream<char, std::char_traits<char> >
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
+  if (!consumeIf('S'))
+    return nullptr;
+
+  if (std::islower(look())) {
+    Node *SpecialSub;
+    switch (look()) {
+    case 'a':
+      ++First;
+      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::allocator);
+      break;
+    case 'b':
+      ++First;
+      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::basic_string);
+      break;
+    case 's':
+      ++First;
+      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::string);
+      break;
+    case 'i':
+      ++First;
+      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::istream);
+      break;
+    case 'o':
+      ++First;
+      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::ostream);
+      break;
+    case 'd':
+      ++First;
+      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::iostream);
+      break;
+    default:
+      return nullptr;
+    }
+    if (!SpecialSub)
+      return nullptr;
+    // Itanium C++ ABI 5.1.2: If a name that would use a built-in <substitution>
+    // has ABI tags, the tags are appended to the substitution; the result is a
+    // substitutable component.
+    Node *WithTags = getDerived().parseAbiTags(SpecialSub);
+    if (WithTags != SpecialSub) {
+      Subs.push_back(WithTags);
+      SpecialSub = WithTags;
+    }
+    return SpecialSub;
+  }
+
+  //                ::= S_
+  if (consumeIf('_')) {
+    if (Subs.empty())
+      return nullptr;
+    return Subs[0];
+  }
+
+  //                ::= S <seq-id> _
+  size_t Index = 0;
+  if (parseSeqId(&Index))
+    return nullptr;
+  ++Index;
+  if (!consumeIf('_') || Index >= Subs.size())
+    return nullptr;
+  return Subs[Index];
+}
+
+// <template-param> ::= T_    # first template parameter
+//                  ::= T <parameter-2 non-negative number> _
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseTemplateParam() {
+  if (!consumeIf('T'))
+    return nullptr;
+
+  size_t Index = 0;
+  if (!consumeIf('_')) {
+    if (parsePositiveInteger(&Index))
+      return nullptr;
+    ++Index;
+    if (!consumeIf('_'))
+      return nullptr;
+  }
+
+  // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list
+  // are mangled as the corresponding artificial template type parameter.
+  if (ParsingLambdaParams)
+    return make<NameType>("auto");
+
+  // If we're in a context where this <template-param> refers to a
+  // <template-arg> further ahead in the mangled name (currently just conversion
+  // operator types), then we should only look it up in the right context.
+  if (PermitForwardTemplateReferences) {
+    Node *ForwardRef = make<ForwardTemplateReference>(Index);
+    if (!ForwardRef)
+      return nullptr;
+    assert(ForwardRef->getKind() == Node::KForwardTemplateReference);
+    ForwardTemplateRefs.push_back(
+        static_cast<ForwardTemplateReference *>(ForwardRef));
+    return ForwardRef;
+  }
+
+  if (Index >= TemplateParams.size())
+    return nullptr;
+  return TemplateParams[Index];
+}
+
+// <template-arg> ::= <type>                    # type or template
+//                ::= X <expression> E          # expression
+//                ::= <expr-primary>            # simple expressions
+//                ::= J <template-arg>* E       # argument pack
+//                ::= LZ <encoding> E           # extension
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parseTemplateArg() {
+  switch (look()) {
+  case 'X': {
+    ++First;
+    Node *Arg = getDerived().parseExpr();
+    if (Arg == nullptr || !consumeIf('E'))
+      return nullptr;
+    return Arg;
+  }
+  case 'J': {
+    ++First;
+    size_t ArgsBegin = Names.size();
+    while (!consumeIf('E')) {
+      Node *Arg = getDerived().parseTemplateArg();
+      if (Arg == nullptr)
+        return nullptr;
+      Names.push_back(Arg);
+    }
+    NodeArray Args = popTrailingNodeArray(ArgsBegin);
+    return make<TemplateArgumentPack>(Args);
+  }
+  case 'L': {
+    //                ::= LZ <encoding> E           # extension
+    if (look(1) == 'Z') {
+      First += 2;
+      Node *Arg = getDerived().parseEncoding();
+      if (Arg == nullptr || !consumeIf('E'))
+        return nullptr;
+      return Arg;
+    }
+    //                ::= <expr-primary>            # simple expressions
+    return getDerived().parseExprPrimary();
+  }
+  default:
+    return getDerived().parseType();
+  }
+}
+
+// <template-args> ::= I <template-arg>* E
+//     extension, the abi says <template-arg>+
+template <typename Derived, typename Alloc>
+Node *
+AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) {
+  if (!consumeIf('I'))
+    return nullptr;
+
+  // <template-params> refer to the innermost <template-args>. Clear out any
+  // outer args that we may have inserted into TemplateParams.
+  if (TagTemplates)
+    TemplateParams.clear();
+
+  size_t ArgsBegin = Names.size();
+  while (!consumeIf('E')) {
+    if (TagTemplates) {
+      auto OldParams = std::move(TemplateParams);
+      Node *Arg = getDerived().parseTemplateArg();
+      TemplateParams = std::move(OldParams);
+      if (Arg == nullptr)
+        return nullptr;
+      Names.push_back(Arg);
+      Node *TableEntry = Arg;
+      if (Arg->getKind() == Node::KTemplateArgumentPack) {
+        TableEntry = make<ParameterPack>(
+            static_cast<TemplateArgumentPack*>(TableEntry)->getElements());
+        if (!TableEntry)
+          return nullptr;
+      }
+      TemplateParams.push_back(TableEntry);
+    } else {
+      Node *Arg = getDerived().parseTemplateArg();
+      if (Arg == nullptr)
+        return nullptr;
+      Names.push_back(Arg);
+    }
+  }
+  return make<TemplateArgs>(popTrailingNodeArray(ArgsBegin));
+}
+
+// <mangled-name> ::= _Z <encoding>
+//                ::= <type>
+// extension      ::= ___Z <encoding> _block_invoke
+// extension      ::= ___Z <encoding> _block_invoke<decimal-digit>+
+// extension      ::= ___Z <encoding> _block_invoke_<decimal-digit>+
+template <typename Derived, typename Alloc>
+Node *AbstractManglingParser<Derived, Alloc>::parse() {
+  if (consumeIf("_Z")) {
+    Node *Encoding = getDerived().parseEncoding();
+    if (Encoding == nullptr)
+      return nullptr;
+    if (look() == '.') {
+      Encoding = make<DotSuffix>(Encoding, StringView(First, Last));
+      First = Last;
+    }
+    if (numLeft() != 0)
+      return nullptr;
+    return Encoding;
+  }
+
+  if (consumeIf("___Z")) {
+    Node *Encoding = getDerived().parseEncoding();
+    if (Encoding == nullptr || !consumeIf("_block_invoke"))
+      return nullptr;
+    bool RequireNumber = consumeIf('_');
+    if (parseNumber().empty() && RequireNumber)
+      return nullptr;
+    if (look() == '.')
+      First = Last;
+    if (numLeft() != 0)
+      return nullptr;
+    return make<SpecialName>("invocation function for block in ", Encoding);
+  }
+
+  Node *Ty = getDerived().parseType();
+  if (numLeft() != 0)
+    return nullptr;
+  return Ty;
+}
+
+template <typename Alloc>
+struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
+  using AbstractManglingParser<ManglingParser<Alloc>,
+                               Alloc>::AbstractManglingParser;
+};
+
+}  // namespace itanium_demangle
+}  // namespace llvm
+
+#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h
new file mode 100644
index 000000000000..97b918fc9459
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -0,0 +1,276 @@
+//===------------------------- MicrosoftDemangle.h --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
+#define LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
+
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
+
+#include <utility>
+
+namespace llvm {
+namespace ms_demangle {
+// This memory allocator is extremely fast, but it doesn't call dtors
+// for allocated objects. That means you can't use STL containers
+// (such as std::vector) with this allocator. But it pays off --
+// the demangler is 3x faster with this allocator compared to one with
+// STL containers.
+constexpr size_t AllocUnit = 4096;
+
+class ArenaAllocator {
+  struct AllocatorNode {
+    uint8_t *Buf = nullptr;
+    size_t Used = 0;
+    size_t Capacity = 0;
+    AllocatorNode *Next = nullptr;
+  };
+
+  void addNode(size_t Capacity) {
+    AllocatorNode *NewHead = new AllocatorNode;
+    NewHead->Buf = new uint8_t[Capacity];
+    NewHead->Next = Head;
+    NewHead->Capacity = Capacity;
+    Head = NewHead;
+    NewHead->Used = 0;
+  }
+
+public:
+  ArenaAllocator() { addNode(AllocUnit); }
+
+  ~ArenaAllocator() {
+    while (Head) {
+      assert(Head->Buf);
+      delete[] Head->Buf;
+      AllocatorNode *Next = Head->Next;
+      delete Head;
+      Head = Next;
+    }
+  }
+
+  char *allocUnalignedBuffer(size_t Length) {
+    uint8_t *Buf = Head->Buf + Head->Used;
+
+    Head->Used += Length;
+    if (Head->Used > Head->Capacity) {
+      // It's possible we need a buffer which is larger than our default unit
+      // size, so we need to be careful to add a node with capacity that is at
+      // least as large as what we need.
+      addNode(std::max(AllocUnit, Length));
+      Head->Used = Length;
+      Buf = Head->Buf;
+    }
+
+    return reinterpret_cast<char *>(Buf);
+  }
+
+  template <typename T, typename... Args> T *allocArray(size_t Count) {
+
+    size_t Size = Count * sizeof(T);
+    assert(Head && Head->Buf);
+
+    size_t P = (size_t)Head->Buf + Head->Used;
+    uintptr_t AlignedP =
+        (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
+    uint8_t *PP = (uint8_t *)AlignedP;
+    size_t Adjustment = AlignedP - P;
+
+    Head->Used += Size + Adjustment;
+    if (Head->Used < Head->Capacity)
+      return new (PP) T[Count]();
+
+    addNode(AllocUnit);
+    Head->Used = Size;
+    return new (Head->Buf) T[Count]();
+  }
+
+  template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
+
+    size_t Size = sizeof(T);
+    assert(Head && Head->Buf);
+
+    size_t P = (size_t)Head->Buf + Head->Used;
+    uintptr_t AlignedP =
+        (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
+    uint8_t *PP = (uint8_t *)AlignedP;
+    size_t Adjustment = AlignedP - P;
+
+    Head->Used += Size + Adjustment;
+    if (Head->Used < Head->Capacity)
+      return new (PP) T(std::forward<Args>(ConstructorArgs)...);
+
+    addNode(AllocUnit);
+    Head->Used = Size;
+    return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
+  }
+
+private:
+  AllocatorNode *Head = nullptr;
+};
+
+struct BackrefContext {
+  static constexpr size_t Max = 10;
+
+  TypeNode *FunctionParams[Max];
+  size_t FunctionParamCount = 0;
+
+  // The first 10 BackReferences in a mangled name can be back-referenced by
+  // special name @[0-9]. This is a storage for the first 10 BackReferences.
+  NamedIdentifierNode *Names[Max];
+  size_t NamesCount = 0;
+};
+
+enum class QualifierMangleMode { Drop, Mangle, Result };
+
+enum NameBackrefBehavior : uint8_t {
+  NBB_None = 0,          // don't save any names as backrefs.
+  NBB_Template = 1 << 0, // save template instanations.
+  NBB_Simple = 1 << 1,   // save simple names.
+};
+
+enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
+
+// Demangler class takes the main role in demangling symbols.
+// It has a set of functions to parse mangled symbols into Type instances.
+// It also has a set of functions to convert Type instances to strings.
+class Demangler {
+public:
+  Demangler() = default;
+  virtual ~Demangler() = default;
+
+  // You are supposed to call parse() first and then check if error is true.  If
+  // it is false, call output() to write the formatted name to the given stream.
+  SymbolNode *parse(StringView &MangledName);
+
+  TagTypeNode *parseTagUniqueName(StringView &MangledName);
+
+  // True if an error occurred.
+  bool Error = false;
+
+  void dumpBackReferences();
+
+private:
+  SymbolNode *demangleEncodedSymbol(StringView &MangledName,
+                                    QualifiedNameNode *QN);
+
+  VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
+                                               StorageClass SC);
+  FunctionSymbolNode *demangleFunctionEncoding(StringView &MangledName);
+
+  Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
+
+  // Parser functions. This is a recursive-descent parser.
+  TypeNode *demangleType(StringView &MangledName, QualifierMangleMode QMM);
+  PrimitiveTypeNode *demanglePrimitiveType(StringView &MangledName);
+  CustomTypeNode *demangleCustomType(StringView &MangledName);
+  TagTypeNode *demangleClassType(StringView &MangledName);
+  PointerTypeNode *demanglePointerType(StringView &MangledName);
+  PointerTypeNode *demangleMemberPointerType(StringView &MangledName);
+  FunctionSignatureNode *demangleFunctionType(StringView &MangledName,
+                                              bool HasThisQuals);
+
+  ArrayTypeNode *demangleArrayType(StringView &MangledName);
+
+  NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
+  NodeArrayNode *demangleFunctionParameterList(StringView &MangledName);
+
+  std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
+  uint64_t demangleUnsigned(StringView &MangledName);
+  int64_t demangleSigned(StringView &MangledName);
+
+  void memorizeString(StringView s);
+  void memorizeIdentifier(IdentifierNode *Identifier);
+
+  /// Allocate a copy of \p Borrowed into memory that we own.
+  StringView copyString(StringView Borrowed);
+
+  QualifiedNameNode *demangleFullyQualifiedTypeName(StringView &MangledName);
+  QualifiedNameNode *demangleFullyQualifiedSymbolName(StringView &MangledName);
+
+  IdentifierNode *demangleUnqualifiedTypeName(StringView &MangledName,
+                                              bool Memorize);
+  IdentifierNode *demangleUnqualifiedSymbolName(StringView &MangledName,
+                                                NameBackrefBehavior NBB);
+
+  QualifiedNameNode *demangleNameScopeChain(StringView &MangledName,
+                                            IdentifierNode *UnqualifiedName);
+  IdentifierNode *demangleNameScopePiece(StringView &MangledName);
+
+  NamedIdentifierNode *demangleBackRefName(StringView &MangledName);
+  IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName,
+                                                    NameBackrefBehavior NBB);
+  IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName);
+  IdentifierNode *
+  demangleFunctionIdentifierCode(StringView &MangledName,
+                                 FunctionIdentifierCodeGroup Group);
+  StructorIdentifierNode *demangleStructorIdentifier(StringView &MangledName,
+                                                     bool IsDestructor);
+  ConversionOperatorIdentifierNode *
+  demangleConversionOperatorIdentifier(StringView &MangledName);
+  LiteralOperatorIdentifierNode *
+  demangleLiteralOperatorIdentifier(StringView &MangledName);
+
+  SymbolNode *demangleSpecialIntrinsic(StringView &MangledName);
+  SpecialTableSymbolNode *
+  demangleSpecialTableSymbolNode(StringView &MangledName,
+                                 SpecialIntrinsicKind SIK);
+  LocalStaticGuardVariableNode *
+  demangleLocalStaticGuard(StringView &MangledName);
+  VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
+                                              StringView &MangledName,
+                                              StringView VariableName);
+  VariableSymbolNode *
+  demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
+                                      StringView &MangledName);
+  FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName,
+                                           bool IsDestructor);
+
+  NamedIdentifierNode *demangleSimpleName(StringView &MangledName,
+                                          bool Memorize);
+  NamedIdentifierNode *demangleAnonymousNamespaceName(StringView &MangledName);
+  NamedIdentifierNode *demangleLocallyScopedNamePiece(StringView &MangledName);
+  EncodedStringLiteralNode *demangleStringLiteral(StringView &MangledName);
+  FunctionSymbolNode *demangleVcallThunkNode(StringView &MangledName);
+
+  StringView demangleSimpleString(StringView &MangledName, bool Memorize);
+
+  FuncClass demangleFunctionClass(StringView &MangledName);
+  CallingConv demangleCallingConvention(StringView &MangledName);
+  StorageClass demangleVariableStorageClass(StringView &MangledName);
+  bool demangleThrowSpecification(StringView &MangledName);
+  wchar_t demangleWcharLiteral(StringView &MangledName);
+  uint8_t demangleCharLiteral(StringView &MangledName);
+
+  std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
+
+  // Memory allocator.
+  ArenaAllocator Arena;
+
+  // A single type uses one global back-ref table for all function params.
+  // This means back-refs can even go "into" other types.  Examples:
+  //
+  //  // Second int* is a back-ref to first.
+  //  void foo(int *, int*);
+  //
+  //  // Second int* is not a back-ref to first (first is not a function param).
+  //  int* foo(int*);
+  //
+  //  // Second int* is a back-ref to first (ALL function types share the same
+  //  // back-ref map.
+  //  using F = void(*)(int*);
+  //  F G(int *);
+  BackrefContext Backrefs;
+};
+
+} // namespace ms_demangle
+} // namespace llvm
+
+#endif // LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
diff --git a/contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
new file mode 100644
index 000000000000..9e3478e9fd29
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -0,0 +1,605 @@
+#ifndef LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
+#define LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
+
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/StringView.h"
+#include <array>
+
+class OutputStream;
+
+namespace llvm {
+namespace ms_demangle {
+
+// Storage classes
+enum Qualifiers : uint8_t {
+  Q_None = 0,
+  Q_Const = 1 << 0,
+  Q_Volatile = 1 << 1,
+  Q_Far = 1 << 2,
+  Q_Huge = 1 << 3,
+  Q_Unaligned = 1 << 4,
+  Q_Restrict = 1 << 5,
+  Q_Pointer64 = 1 << 6
+};
+
+enum class StorageClass : uint8_t {
+  None,
+  PrivateStatic,
+  ProtectedStatic,
+  PublicStatic,
+  Global,
+  FunctionLocalStatic,
+};
+
+enum class PointerAffinity { None, Pointer, Reference, RValueReference };
+enum class FunctionRefQualifier { None, Reference, RValueReference };
+
+// Calling conventions
+enum class CallingConv : uint8_t {
+  None,
+  Cdecl,
+  Pascal,
+  Thiscall,
+  Stdcall,
+  Fastcall,
+  Clrcall,
+  Eabi,
+  Vectorcall,
+  Regcall,
+};
+
+enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef };
+
+enum OutputFlags {
+  OF_Default = 0,
+  OF_NoCallingConvention = 1,
+  OF_NoTagSpecifier = 2,
+};
+
+// Types
+enum class PrimitiveKind {
+  Void,
+  Bool,
+  Char,
+  Schar,
+  Uchar,
+  Char16,
+  Char32,
+  Short,
+  Ushort,
+  Int,
+  Uint,
+  Long,
+  Ulong,
+  Int64,
+  Uint64,
+  Wchar,
+  Float,
+  Double,
+  Ldouble,
+  Nullptr,
+};
+
+enum class CharKind {
+  Char,
+  Char16,
+  Char32,
+  Wchar,
+};
+
+enum class IntrinsicFunctionKind : uint8_t {
+  None,
+  New,                        // ?2 # operator new
+  Delete,                     // ?3 # operator delete
+  Assign,                     // ?4 # operator=
+  RightShift,                 // ?5 # operator>>
+  LeftShift,                  // ?6 # operator<<
+  LogicalNot,                 // ?7 # operator!
+  Equals,                     // ?8 # operator==
+  NotEquals,                  // ?9 # operator!=
+  ArraySubscript,             // ?A # operator[]
+  Pointer,                    // ?C # operator->
+  Dereference,                // ?D # operator*
+  Increment,                  // ?E # operator++
+  Decrement,                  // ?F # operator--
+  Minus,                      // ?G # operator-
+  Plus,                       // ?H # operator+
+  BitwiseAnd,                 // ?I # operator&
+  MemberPointer,              // ?J # operator->*
+  Divide,                     // ?K # operator/
+  Modulus,                    // ?L # operator%
+  LessThan,                   // ?M operator<
+  LessThanEqual,              // ?N operator<=
+  GreaterThan,                // ?O operator>
+  GreaterThanEqual,           // ?P operator>=
+  Comma,                      // ?Q operator,
+  Parens,                     // ?R operator()
+  BitwiseNot,                 // ?S operator~
+  BitwiseXor,                 // ?T operator^
+  BitwiseOr,                  // ?U operator|
+  LogicalAnd,                 // ?V operator&&
+  LogicalOr,                  // ?W operator||
+  TimesEqual,                 // ?X operator*=
+  PlusEqual,                  // ?Y operator+=
+  MinusEqual,                 // ?Z operator-=
+  DivEqual,                   // ?_0 operator/=
+  ModEqual,                   // ?_1 operator%=
+  RshEqual,                   // ?_2 operator>>=
+  LshEqual,                   // ?_3 operator<<=
+  BitwiseAndEqual,            // ?_4 operator&=
+  BitwiseOrEqual,             // ?_5 operator|=
+  BitwiseXorEqual,            // ?_6 operator^=
+  VbaseDtor,                  // ?_D # vbase destructor
+  VecDelDtor,                 // ?_E # vector deleting destructor
+  DefaultCtorClosure,         // ?_F # default constructor closure
+  ScalarDelDtor,              // ?_G # scalar deleting destructor
+  VecCtorIter,                // ?_H # vector constructor iterator
+  VecDtorIter,                // ?_I # vector destructor iterator
+  VecVbaseCtorIter,           // ?_J # vector vbase constructor iterator
+  VdispMap,                   // ?_K # virtual displacement map
+  EHVecCtorIter,              // ?_L # eh vector constructor iterator
+  EHVecDtorIter,              // ?_M # eh vector destructor iterator
+  EHVecVbaseCtorIter,         // ?_N # eh vector vbase constructor iterator
+  CopyCtorClosure,            // ?_O # copy constructor closure
+  LocalVftableCtorClosure,    // ?_T # local vftable constructor closure
+  ArrayNew,                   // ?_U operator new[]
+  ArrayDelete,                // ?_V operator delete[]
+  ManVectorCtorIter,          // ?__A managed vector ctor iterator
+  ManVectorDtorIter,          // ?__B managed vector dtor iterator
+  EHVectorCopyCtorIter,       // ?__C EH vector copy ctor iterator
+  EHVectorVbaseCopyCtorIter,  // ?__D EH vector vbase copy ctor iterator
+  VectorCopyCtorIter,         // ?__G vector copy constructor iterator
+  VectorVbaseCopyCtorIter,    // ?__H vector vbase copy constructor iterator
+  ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy constructor
+  CoAwait,                    // ?__L co_await
+  Spaceship,                  // operator<=>
+  MaxIntrinsic
+};
+
+enum class SpecialIntrinsicKind {
+  None,
+  Vftable,
+  Vbtable,
+  Typeof,
+  VcallThunk,
+  LocalStaticGuard,
+  StringLiteralSymbol,
+  UdtReturning,
+  Unknown,
+  DynamicInitializer,
+  DynamicAtexitDestructor,
+  RttiTypeDescriptor,
+  RttiBaseClassDescriptor,
+  RttiBaseClassArray,
+  RttiClassHierarchyDescriptor,
+  RttiCompleteObjLocator,
+  LocalVftable,
+  LocalStaticThreadGuard,
+};
+
+// Function classes
+enum FuncClass : uint16_t {
+  FC_None = 0,
+  FC_Public = 1 << 0,
+  FC_Protected = 1 << 1,
+  FC_Private = 1 << 2,
+  FC_Global = 1 << 3,
+  FC_Static = 1 << 4,
+  FC_Virtual = 1 << 5,
+  FC_Far = 1 << 6,
+  FC_ExternC = 1 << 7,
+  FC_NoParameterList = 1 << 8,
+  FC_VirtualThisAdjust = 1 << 9,
+  FC_VirtualThisAdjustEx = 1 << 10,
+  FC_StaticThisAdjust = 1 << 11,
+};
+
+enum class TagKind { Class, Struct, Union, Enum };
+
+enum class NodeKind {
+  Unknown,
+  Md5Symbol,
+  PrimitiveType,
+  FunctionSignature,
+  Identifier,
+  NamedIdentifier,
+  VcallThunkIdentifier,
+  LocalStaticGuardIdentifier,
+  IntrinsicFunctionIdentifier,
+  ConversionOperatorIdentifier,
+  DynamicStructorIdentifier,
+  StructorIdentifier,
+  LiteralOperatorIdentifier,
+  ThunkSignature,
+  PointerType,
+  TagType,
+  ArrayType,
+  Custom,
+  IntrinsicType,
+  NodeArray,
+  QualifiedName,
+  TemplateParameterReference,
+  EncodedStringLiteral,
+  IntegerLiteral,
+  RttiBaseClassDescriptor,
+  LocalStaticGuardVariable,
+  FunctionSymbol,
+  VariableSymbol,
+  SpecialTableSymbol
+};
+
+struct Node {
+  explicit Node(NodeKind K) : Kind(K) {}
+  virtual ~Node() = default;
+
+  NodeKind kind() const { return Kind; }
+
+  virtual void output(OutputStream &OS, OutputFlags Flags) const = 0;
+
+  std::string toString(OutputFlags Flags = OF_Default) const;
+
+private:
+  NodeKind Kind;
+};
+
+struct TypeNode;
+struct PrimitiveTypeNode;
+struct FunctionSignatureNode;
+struct IdentifierNode;
+struct NamedIdentifierNode;
+struct VcallThunkIdentifierNode;
+struct IntrinsicFunctionIdentifierNode;
+struct LiteralOperatorIdentifierNode;
+struct ConversionOperatorIdentifierNode;
+struct StructorIdentifierNode;
+struct ThunkSignatureNode;
+struct PointerTypeNode;
+struct ArrayTypeNode;
+struct CustomNode;
+struct TagTypeNode;
+struct IntrinsicTypeNode;
+struct NodeArrayNode;
+struct QualifiedNameNode;
+struct TemplateParameterReferenceNode;
+struct EncodedStringLiteralNode;
+struct IntegerLiteralNode;
+struct RttiBaseClassDescriptorNode;
+struct LocalStaticGuardVariableNode;
+struct SymbolNode;
+struct FunctionSymbolNode;
+struct VariableSymbolNode;
+struct SpecialTableSymbolNode;
+
+struct TypeNode : public Node {
+  explicit TypeNode(NodeKind K) : Node(K) {}
+
+  virtual void outputPre(OutputStream &OS, OutputFlags Flags) const = 0;
+  virtual void outputPost(OutputStream &OS, OutputFlags Flags) const = 0;
+
+  void output(OutputStream &OS, OutputFlags Flags) const override {
+    outputPre(OS, Flags);
+    outputPost(OS, Flags);
+  }
+
+  void outputQuals(bool SpaceBefore, bool SpaceAfter) const;
+
+  Qualifiers Quals = Q_None;
+};
+
+struct PrimitiveTypeNode : public TypeNode {
+  explicit PrimitiveTypeNode(PrimitiveKind K)
+      : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
+
+  void outputPre(OutputStream &OS, OutputFlags Flags) const;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const {}
+
+  PrimitiveKind PrimKind;
+};
+
+struct FunctionSignatureNode : public TypeNode {
+  explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {}
+  FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {}
+
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+  // Valid if this FunctionTypeNode is the Pointee of a PointerType or
+  // MemberPointerType.
+  PointerAffinity Affinity = PointerAffinity::None;
+
+  // The function's calling convention.
+  CallingConv CallConvention = CallingConv::None;
+
+  // Function flags (gloabl, public, etc)
+  FuncClass FunctionClass = FC_Global;
+
+  FunctionRefQualifier RefQualifier = FunctionRefQualifier::None;
+
+  // The return type of the function.
+  TypeNode *ReturnType = nullptr;
+
+  // True if this is a C-style ... varargs function.
+  bool IsVariadic = false;
+
+  // Function parameters
+  NodeArrayNode *Params = nullptr;
+
+  // True if the function type is noexcept
+  bool IsNoexcept = false;
+};
+
+struct IdentifierNode : public Node {
+  explicit IdentifierNode(NodeKind K) : Node(K) {}
+
+  NodeArrayNode *TemplateParams = nullptr;
+
+protected:
+  void outputTemplateParameters(OutputStream &OS, OutputFlags Flags) const;
+};
+
+struct VcallThunkIdentifierNode : public IdentifierNode {
+  VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  uint64_t OffsetInVTable = 0;
+};
+
+struct DynamicStructorIdentifierNode : public IdentifierNode {
+  DynamicStructorIdentifierNode()
+      : IdentifierNode(NodeKind::DynamicStructorIdentifier) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  VariableSymbolNode *Variable = nullptr;
+  QualifiedNameNode *Name = nullptr;
+  bool IsDestructor = false;
+};
+
+struct NamedIdentifierNode : public IdentifierNode {
+  NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  StringView Name;
+};
+
+struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
+  explicit IntrinsicFunctionIdentifierNode(IntrinsicFunctionKind Operator)
+      : IdentifierNode(NodeKind::IntrinsicFunctionIdentifier),
+        Operator(Operator) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  IntrinsicFunctionKind Operator;
+};
+
+struct LiteralOperatorIdentifierNode : public IdentifierNode {
+  LiteralOperatorIdentifierNode()
+      : IdentifierNode(NodeKind::LiteralOperatorIdentifier) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  StringView Name;
+};
+
+struct LocalStaticGuardIdentifierNode : public IdentifierNode {
+  LocalStaticGuardIdentifierNode()
+      : IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  uint32_t ScopeIndex = 0;
+};
+
+struct ConversionOperatorIdentifierNode : public IdentifierNode {
+  ConversionOperatorIdentifierNode()
+      : IdentifierNode(NodeKind::ConversionOperatorIdentifier) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  // The type that this operator converts too.
+  TypeNode *TargetType = nullptr;
+};
+
+struct StructorIdentifierNode : public IdentifierNode {
+  StructorIdentifierNode() : IdentifierNode(NodeKind::StructorIdentifier) {}
+  explicit StructorIdentifierNode(bool IsDestructor)
+      : IdentifierNode(NodeKind::StructorIdentifier),
+        IsDestructor(IsDestructor) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  // The name of the class that this is a structor of.
+  IdentifierNode *Class = nullptr;
+  bool IsDestructor = false;
+};
+
+struct ThunkSignatureNode : public FunctionSignatureNode {
+  ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {}
+
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+  struct ThisAdjustor {
+    uint32_t StaticOffset = 0;
+    int32_t VBPtrOffset = 0;
+    int32_t VBOffsetOffset = 0;
+    int32_t VtordispOffset = 0;
+  };
+
+  ThisAdjustor ThisAdjust;
+};
+
+struct PointerTypeNode : public TypeNode {
+  PointerTypeNode() : TypeNode(NodeKind::PointerType) {}
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+  // Is this a pointer, reference, or rvalue-reference?
+  PointerAffinity Affinity = PointerAffinity::None;
+
+  // If this is a member pointer, this is the class that the member is in.
+  QualifiedNameNode *ClassParent = nullptr;
+
+  // Represents a type X in "a pointer to X", "a reference to X", or
+  // "rvalue-reference to X"
+  TypeNode *Pointee = nullptr;
+};
+
+struct TagTypeNode : public TypeNode {
+  explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
+
+  void outputPre(OutputStream &OS, OutputFlags Flags) const;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const;
+
+  QualifiedNameNode *QualifiedName = nullptr;
+  TagKind Tag;
+};
+
+struct ArrayTypeNode : public TypeNode {
+  ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
+
+  void outputPre(OutputStream &OS, OutputFlags Flags) const;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const;
+
+  void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const;
+  void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const;
+
+  // A list of array dimensions.  e.g. [3,4,5] in `int Foo[3][4][5]`
+  NodeArrayNode *Dimensions = nullptr;
+
+  // The type of array element.
+  TypeNode *ElementType = nullptr;
+};
+
+struct IntrinsicNode : public TypeNode {
+  IntrinsicNode() : TypeNode(NodeKind::IntrinsicType) {}
+  void output(OutputStream &OS, OutputFlags Flags) const override {}
+};
+
+struct CustomTypeNode : public TypeNode {
+  CustomTypeNode() : TypeNode(NodeKind::Custom) {}
+
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+
+  IdentifierNode *Identifier;
+};
+
+struct NodeArrayNode : public Node {
+  NodeArrayNode() : Node(NodeKind::NodeArray) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
+
+  Node **Nodes = 0;
+  size_t Count = 0;
+};
+
+struct QualifiedNameNode : public Node {
+  QualifiedNameNode() : Node(NodeKind::QualifiedName) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  NodeArrayNode *Components = nullptr;
+
+  IdentifierNode *getUnqualifiedIdentifier() {
+    Node *LastComponent = Components->Nodes[Components->Count - 1];
+    return static_cast<IdentifierNode *>(LastComponent);
+  }
+};
+
+struct TemplateParameterReferenceNode : public Node {
+  TemplateParameterReferenceNode()
+      : Node(NodeKind::TemplateParameterReference) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  SymbolNode *Symbol = nullptr;
+
+  int ThunkOffsetCount = 0;
+  std::array<int64_t, 3> ThunkOffsets;
+  PointerAffinity Affinity = PointerAffinity::None;
+  bool IsMemberPointer = false;
+};
+
+struct IntegerLiteralNode : public Node {
+  IntegerLiteralNode() : Node(NodeKind::IntegerLiteral) {}
+  IntegerLiteralNode(uint64_t Value, bool IsNegative)
+      : Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  uint64_t Value = 0;
+  bool IsNegative = false;
+};
+
+struct RttiBaseClassDescriptorNode : public IdentifierNode {
+  RttiBaseClassDescriptorNode()
+      : IdentifierNode(NodeKind::RttiBaseClassDescriptor) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  uint32_t NVOffset = 0;
+  int32_t VBPtrOffset = 0;
+  uint32_t VBTableOffset = 0;
+  uint32_t Flags = 0;
+};
+
+struct SymbolNode : public Node {
+  explicit SymbolNode(NodeKind K) : Node(K) {}
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+  QualifiedNameNode *Name = nullptr;
+};
+
+struct SpecialTableSymbolNode : public SymbolNode {
+  explicit SpecialTableSymbolNode()
+      : SymbolNode(NodeKind::SpecialTableSymbol) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+  QualifiedNameNode *TargetName = nullptr;
+  Qualifiers Quals;
+};
+
+struct LocalStaticGuardVariableNode : public SymbolNode {
+  LocalStaticGuardVariableNode()
+      : SymbolNode(NodeKind::LocalStaticGuardVariable) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  bool IsVisible = false;
+};
+
+struct EncodedStringLiteralNode : public SymbolNode {
+  EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  StringView DecodedString;
+  bool IsTruncated = false;
+  CharKind Char = CharKind::Char;
+};
+
+struct VariableSymbolNode : public SymbolNode {
+  VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  StorageClass SC = StorageClass::None;
+  TypeNode *Type = nullptr;
+};
+
+struct FunctionSymbolNode : public SymbolNode {
+  FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {}
+
+  void output(OutputStream &OS, OutputFlags Flags) const override;
+
+  FunctionSignatureNode *Signature = nullptr;
+};
+
+} // namespace ms_demangle
+} // namespace llvm
+
+#endif
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Demangle/StringView.h b/contrib/llvm/include/llvm/Demangle/StringView.h
index a89deda694c2..a89deda694c2 100644
--- a/contrib/llvm/lib/Demangle/StringView.h
+++ b/contrib/llvm/include/llvm/Demangle/StringView.h
diff --git a/contrib/llvm/lib/Demangle/Utility.h b/contrib/llvm/include/llvm/Demangle/Utility.h
index 54cd99e5026b..1d1601c81635 100644
--- a/contrib/llvm/lib/Demangle/Utility.h
+++ b/contrib/llvm/include/llvm/Demangle/Utility.h
@@ -70,22 +70,6 @@ public:
     BufferCapacity = BufferCapacity_;
   }
 
-  /// Create an OutputStream from a buffer and a size.  If either of these are
-  /// null a buffer is allocated.
-  static OutputStream create(char *StartBuf, size_t *Size, size_t AllocSize) {
-    OutputStream Result;
-
-    if (!StartBuf || !Size) {
-      StartBuf = static_cast<char *>(std::malloc(AllocSize));
-      if (StartBuf == nullptr)
-        std::terminate();
-      Size = &AllocSize;
-    }
-
-    Result.reset(StartBuf, *Size);
-    return Result;
-  }
-
   /// If a ParameterPackExpansion (or similar type) is encountered, the offset
   /// into the pack that we're currently printing.
   unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
@@ -185,4 +169,19 @@ public:
   SwapAndRestore &operator=(const SwapAndRestore &) = delete;
 };
 
+inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
+                                   size_t InitSize) {
+  size_t BufferSize;
+  if (Buf == nullptr) {
+    Buf = static_cast<char *>(std::malloc(InitSize));
+    if (Buf == nullptr)
+      return false;
+    BufferSize = InitSize;
+  } else
+    BufferSize = *N;
+
+  S.reset(Buf, BufferSize);
+  return true;
+}
+
 #endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
index 1ce772ccde95..1b08379b8c3b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
@@ -35,25 +35,6 @@ class ObjectFile;
 
 } // end namespace object
 
-/// JITEvent_EmittedFunctionDetails - Helper struct for containing information
-/// about a generated machine code function.
-struct JITEvent_EmittedFunctionDetails {
-  struct LineStart {
-    /// The address at which the current line changes.
-    uintptr_t Address;
-
-    /// The new location information.  These can be translated to DebugLocTuples
-    /// using MF->getDebugLocTuple().
-    DebugLoc Loc;
-  };
-
-  /// The machine function the struct contains information for.
-  const MachineFunction *MF;
-
-  /// The list of line boundary information, sorted by address.
-  std::vector<LineStart> LineStarts;
-};
-
 /// JITEventListener - Abstract interface for use by the JIT to notify clients
 /// about significant events during compilation. For example, to notify
 /// profilers and debuggers that need to know where functions have been emitted.
@@ -61,26 +42,26 @@ struct JITEvent_EmittedFunctionDetails {
 /// The default implementation of each method does nothing.
 class JITEventListener {
 public:
-  using EmittedFunctionDetails = JITEvent_EmittedFunctionDetails;
+  using ObjectKey = uint64_t;
 
-public:
   JITEventListener() = default;
   virtual ~JITEventListener() = default;
 
-  /// NotifyObjectEmitted - Called after an object has been successfully
-  /// emitted to memory.  NotifyFunctionEmitted will not be called for
+  /// notifyObjectLoaded - Called after an object has had its sections allocated
+  /// and addresses assigned to all symbols. Note: Section memory will not have
+  /// been relocated yet. notifyFunctionLoaded will not be called for
   /// individual functions in the object.
   ///
   /// ELF-specific information
   /// The ObjectImage contains the generated object image
   /// with section headers updated to reflect the address at which sections
   /// were loaded and with relocations performed in-place on debug sections.
-  virtual void NotifyObjectEmitted(const object::ObjectFile &Obj,
-                                   const RuntimeDyld::LoadedObjectInfo &L) {}
+  virtual void notifyObjectLoaded(ObjectKey K, const object::ObjectFile &Obj,
+                                  const RuntimeDyld::LoadedObjectInfo &L) {}
 
-  /// NotifyFreeingObject - Called just before the memory associated with
+  /// notifyFreeingObject - Called just before the memory associated with
   /// a previously emitted object is released.
-  virtual void NotifyFreeingObject(const object::ObjectFile &Obj) {}
+  virtual void notifyFreeingObject(ObjectKey K) {}
 
   // Get a pointe to the GDB debugger registration listener.
   static JITEventListener *createGDBRegistrationListener();
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
index 53037c3dbc72..05c9590726df 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -23,6 +23,7 @@
 #include <set>
 #include <string>
 
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
@@ -32,13 +33,25 @@ class GlobalValue;
 
 namespace object {
 
-class BasicSymbolRef;
+class SymbolRef;
 
 } // end namespace object
 
 /// Represents an address in the target process's address space.
 using JITTargetAddress = uint64_t;
 
+/// Convert a JITTargetAddress to a pointer.
+template <typename T> T jitTargetAddressToPointer(JITTargetAddress Addr) {
+  static_assert(std::is_pointer<T>::value, "T must be a pointer type");
+  uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
+  assert(IntPtr == Addr && "JITTargetAddress value out of range for uintptr_t");
+  return reinterpret_cast<T>(IntPtr);
+}
+
+template <typename T> JITTargetAddress pointerToJITTargetAddress(T *Ptr) {
+  return static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(Ptr));
+}
+
 /// Flags for symbols in the JIT.
 class JITSymbolFlags {
 public:
@@ -52,8 +65,10 @@ public:
     Common = 1U << 2,
     Absolute = 1U << 3,
     Exported = 1U << 4,
-    Lazy = 1U << 5,
-    Materializing = 1U << 6
+    Callable = 1U << 5,
+    Lazy = 1U << 6,
+    Materializing = 1U << 7,
+    LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Materializing)
   };
 
   static JITSymbolFlags stripTransientFlags(JITSymbolFlags Orig) {
@@ -71,6 +86,26 @@ public:
   JITSymbolFlags(FlagNames Flags, TargetFlagsType TargetFlags)
     : Flags(Flags), TargetFlags(TargetFlags) {}
 
+  /// Implicitly convert to bool. Returs true if any flag is set.
+  explicit operator bool() const { return Flags != None || TargetFlags != 0; }
+
+  /// Compare for equality.
+  bool operator==(const JITSymbolFlags &RHS) const {
+    return Flags == RHS.Flags && TargetFlags == RHS.TargetFlags;
+  }
+
+  /// Bitwise AND-assignment for FlagNames.
+  JITSymbolFlags &operator&=(const FlagNames &RHS) {
+    Flags &= RHS;
+    return *this;
+  }
+
+  /// Bitwise OR-assignment for FlagNames.
+  JITSymbolFlags &operator|=(const FlagNames &RHS) {
+    Flags |= RHS;
+    return *this;
+  }
+
   /// Return true if there was an error retrieving this symbol.
   bool hasError() const {
     return (Flags & HasError) == HasError;
@@ -109,11 +144,13 @@ public:
     return (Flags & Exported) == Exported;
   }
 
-  /// Implicitly convert to the underlying flags type.
-  operator UnderlyingType&() { return Flags; }
+  /// Returns true if the given symbol is known to be callable.
+  bool isCallable() const { return (Flags & Callable) == Callable; }
 
-  /// Implicitly convert to the underlying flags type.
-  operator const UnderlyingType&() const { return Flags; }
+  /// Get the underlying flags value as an integer.
+  UnderlyingType getRawFlagsValue() const {
+    return static_cast<UnderlyingType>(Flags);
+  }
 
   /// Return a reference to the target-specific flags.
   TargetFlagsType& getTargetFlags() { return TargetFlags; }
@@ -127,13 +164,28 @@ public:
 
   /// Construct a JITSymbolFlags value based on the flags of the given libobject
   /// symbol.
-  static JITSymbolFlags fromObjectSymbol(const object::BasicSymbolRef &Symbol);
+  static Expected<JITSymbolFlags>
+  fromObjectSymbol(const object::SymbolRef &Symbol);
 
 private:
-  UnderlyingType Flags = None;
+  FlagNames Flags = None;
   TargetFlagsType TargetFlags = 0;
 };
 
+inline JITSymbolFlags operator&(const JITSymbolFlags &LHS,
+                                const JITSymbolFlags::FlagNames &RHS) {
+  JITSymbolFlags Tmp = LHS;
+  Tmp &= RHS;
+  return Tmp;
+}
+
+inline JITSymbolFlags operator|(const JITSymbolFlags &LHS,
+                                const JITSymbolFlags::FlagNames &RHS) {
+  JITSymbolFlags Tmp = LHS;
+  Tmp |= RHS;
+  return Tmp;
+}
+
 /// ARM-specific JIT symbol flags.
 /// FIXME: This should be moved into a target-specific header.
 class ARMJITSymbolFlags {
@@ -147,8 +199,8 @@ public:
 
   operator JITSymbolFlags::TargetFlagsType&() { return Flags; }
 
-  static ARMJITSymbolFlags fromObjectSymbol(
-                                           const object::BasicSymbolRef &Symbol);
+  static ARMJITSymbolFlags fromObjectSymbol(const object::SymbolRef &Symbol);
+
 private:
   JITSymbolFlags::TargetFlagsType Flags = 0;
 };
@@ -293,7 +345,7 @@ class JITSymbolResolver {
 public:
   using LookupSet = std::set<StringRef>;
   using LookupResult = std::map<StringRef, JITEvaluatedSymbol>;
-  using LookupFlagsResult = std::map<StringRef, JITSymbolFlags>;
+  using OnResolvedFunction = std::function<void(Expected<LookupResult>)>;
 
   virtual ~JITSymbolResolver() = default;
 
@@ -302,13 +354,14 @@ public:
   ///
   /// This method will return an error if any of the given symbols can not be
   /// resolved, or if the resolution process itself triggers an error.
-  virtual Expected<LookupResult> lookup(const LookupSet &Symbols) = 0;
+  virtual void lookup(const LookupSet &Symbols,
+                      OnResolvedFunction OnResolved) = 0;
 
-  /// Returns the symbol flags for each of the given symbols.
-  ///
-  /// This method does NOT return an error if any of the given symbols is
-  /// missing. Instead, that symbol will be left out of the result map.
-  virtual Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) = 0;
+  /// Returns the subset of the given symbols that should be materialized by
+  /// the caller. Only weak/common symbols should be looked up, as strong
+  /// definitions are implicitly always part of the caller's responsibility.
+  virtual Expected<LookupSet>
+  getResponsibilitySet(const LookupSet &Symbols) = 0;
 
 private:
   virtual void anchor();
@@ -320,11 +373,11 @@ public:
   /// Performs lookup by, for each symbol, first calling
   ///        findSymbolInLogicalDylib and if that fails calling
   ///        findSymbol.
-  Expected<LookupResult> lookup(const LookupSet &Symbols) final;
+  void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) final;
 
   /// Performs flags lookup by calling findSymbolInLogicalDylib and
   ///        returning the flags value for that symbol.
-  Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) final;
+  Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) final;
 
   /// This method returns the address of the specified symbol if it exists
   /// within the logical dynamic library represented by this JITSymbolResolver.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 8bd21a0e3dd6..884878925cde 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -16,6 +16,7 @@
 #define LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
 
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -23,6 +24,7 @@
 #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
 #include "llvm/ExecutionEngine/Orc/Legacy.h"
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
@@ -60,42 +62,73 @@ namespace orc {
 
 class ExtractingIRMaterializationUnit;
 
-class CompileOnDemandLayer2 : public IRLayer {
-  friend class ExtractingIRMaterializationUnit;
+class CompileOnDemandLayer : public IRLayer {
+  friend class PartitioningIRMaterializationUnit;
 
 public:
   /// Builder for IndirectStubsManagers.
   using IndirectStubsManagerBuilder =
       std::function<std::unique_ptr<IndirectStubsManager>()>;
 
-  using GetAvailableContextFunction = std::function<LLVMContext &()>;
+  using GlobalValueSet = std::set<const GlobalValue *>;
 
-  CompileOnDemandLayer2(ExecutionSession &ES, IRLayer &BaseLayer,
-                        JITCompileCallbackManager &CCMgr,
-                        IndirectStubsManagerBuilder BuildIndirectStubsManager,
-                        GetAvailableContextFunction GetAvailableContext);
+  /// Partitioning function.
+  using PartitionFunction =
+      std::function<Optional<GlobalValueSet>(GlobalValueSet Requested)>;
 
-  Error add(VSO &V, VModuleKey K, std::unique_ptr<Module> M) override;
+  /// Off-the-shelf partitioning which compiles all requested symbols (usually
+  /// a single function at a time).
+  static Optional<GlobalValueSet> compileRequested(GlobalValueSet Requested);
 
-  void emit(MaterializationResponsibility R, VModuleKey K,
-            std::unique_ptr<Module> M) override;
+  /// Off-the-shelf partitioning which compiles whole modules whenever any
+  /// symbol in them is requested.
+  static Optional<GlobalValueSet> compileWholeModule(GlobalValueSet Requested);
+
+  /// Construct a CompileOnDemandLayer.
+  CompileOnDemandLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+                        LazyCallThroughManager &LCTMgr,
+                        IndirectStubsManagerBuilder BuildIndirectStubsManager);
+
+  /// Sets the partition function.
+  void setPartitionFunction(PartitionFunction Partition);
+
+  /// Emits the given module. This should not be called by clients: it will be
+  /// called by the JIT when a definition added via the add method is requested.
+  void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
 
 private:
-  using StubManagersMap =
-      std::map<const VSO *, std::unique_ptr<IndirectStubsManager>>;
+  struct PerDylibResources {
+  public:
+    PerDylibResources(JITDylib &ImplD,
+                      std::unique_ptr<IndirectStubsManager> ISMgr)
+        : ImplD(ImplD), ISMgr(std::move(ISMgr)) {}
+    JITDylib &getImplDylib() { return ImplD; }
+    IndirectStubsManager &getISManager() { return *ISMgr; }
+
+  private:
+    JITDylib &ImplD;
+    std::unique_ptr<IndirectStubsManager> ISMgr;
+  };
+
+  using PerDylibResourcesMap = std::map<const JITDylib *, PerDylibResources>;
+
+  PerDylibResources &getPerDylibResources(JITDylib &TargetD);
 
-  IndirectStubsManager &getStubsManager(const VSO &V);
+  void cleanUpModule(Module &M);
 
-  void emitExtractedFunctionsModule(MaterializationResponsibility R,
-                                    std::unique_ptr<Module> M);
+  void expandPartition(GlobalValueSet &Partition);
+
+  void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM,
+                     IRMaterializationUnit::SymbolNameToDefinitionMap Defs);
 
   mutable std::mutex CODLayerMutex;
 
   IRLayer &BaseLayer;
-  JITCompileCallbackManager &CCMgr;
+  LazyCallThroughManager &LCTMgr;
   IndirectStubsManagerBuilder BuildIndirectStubsManager;
-  StubManagersMap StubsMgrs;
-  GetAvailableContextFunction GetAvailableContext;
+  PerDylibResourcesMap DylibResources;
+  PartitionFunction Partition = compileRequested;
+  SymbolLinkagePromoter PromoteSymbols;
 };
 
 /// Compile-on-demand layer.
@@ -108,7 +141,7 @@ private:
 template <typename BaseLayerT,
           typename CompileCallbackMgrT = JITCompileCallbackManager,
           typename IndirectStubsMgrT = IndirectStubsManager>
-class CompileOnDemandLayer {
+class LegacyCompileOnDemandLayer {
 private:
   template <typename MaterializerFtor>
   class LambdaMaterializer final : public ValueMaterializer {
@@ -158,25 +191,6 @@ private:
     return llvm::make_unique<RO>(std::move(ResourcePtr));
   }
 
-  class StaticGlobalRenamer {
-  public:
-    StaticGlobalRenamer() = default;
-    StaticGlobalRenamer(StaticGlobalRenamer &&) = default;
-    StaticGlobalRenamer &operator=(StaticGlobalRenamer &&) = default;
-
-    void rename(Module &M) {
-      for (auto &F : M)
-        if (F.hasLocalLinkage())
-          F.setName("$static." + Twine(NextId++));
-      for (auto &G : M.globals())
-        if (G.hasLocalLinkage())
-          G.setName("$static." + Twine(NextId++));
-    }
-
-  private:
-    unsigned NextId = 0;
-  };
-
   struct LogicalDylib {
     struct SourceModuleEntry {
       std::unique_ptr<Module> SourceMod;
@@ -230,7 +244,7 @@ private:
     VModuleKey K;
     std::shared_ptr<SymbolResolver> BackingResolver;
     std::unique_ptr<IndirectStubsMgrT> StubsMgr;
-    StaticGlobalRenamer StaticRenamer;
+    SymbolLinkagePromoter PromoteSymbols;
     SourceModulesList SourceModules;
     std::vector<VModuleKey> BaseLayerVModuleKeys;
   };
@@ -251,13 +265,13 @@ public:
       std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>;
 
   /// Construct a compile-on-demand layer instance.
-  CompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
-                       SymbolResolverGetter GetSymbolResolver,
-                       SymbolResolverSetter SetSymbolResolver,
-                       PartitioningFtor Partition,
-                       CompileCallbackMgrT &CallbackMgr,
-                       IndirectStubsManagerBuilderT CreateIndirectStubsManager,
-                       bool CloneStubsIntoPartitions = true)
+  LegacyCompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
+                             SymbolResolverGetter GetSymbolResolver,
+                             SymbolResolverSetter SetSymbolResolver,
+                             PartitioningFtor Partition,
+                             CompileCallbackMgrT &CallbackMgr,
+                             IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+                             bool CloneStubsIntoPartitions = true)
       : ES(ES), BaseLayer(BaseLayer),
         GetSymbolResolver(std::move(GetSymbolResolver)),
         SetSymbolResolver(std::move(SetSymbolResolver)),
@@ -265,7 +279,7 @@ public:
         CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
         CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
 
-  ~CompileOnDemandLayer() {
+  ~LegacyCompileOnDemandLayer() {
     // FIXME: Report error on log.
     while (!LogicalDylibs.empty())
       consumeError(removeModule(LogicalDylibs.begin()->first));
@@ -352,14 +366,9 @@ public:
 private:
   Error addLogicalModule(LogicalDylib &LD, std::unique_ptr<Module> SrcMPtr) {
 
-    // Rename all static functions / globals to $static.X :
-    // This will unique the names across all modules in the logical dylib,
-    // simplifying symbol lookup.
-    LD.StaticRenamer.rename(*SrcMPtr);
-
-    // Bump the linkage and rename any anonymous/private members in SrcM to
-    // ensure that everything will resolve properly after we partition SrcM.
-    makeAllSymbolsExternallyAccessible(*SrcMPtr);
+    // Rename anonymous globals and promote linkage to ensure that everything
+    // will resolve properly after we partition SrcM.
+    LD.PromoteSymbols(*SrcMPtr);
 
     // Create a logical module handle for SrcM within the logical dylib.
     Module &SrcM = *SrcMPtr;
@@ -500,28 +509,29 @@ private:
 
     auto GVsResolver = createSymbolResolver(
         [&LD, LegacyLookup](const SymbolNameSet &Symbols) {
-          auto SymbolFlags = lookupFlagsWithLegacyFn(Symbols, LegacyLookup);
+          auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup);
 
-          if (!SymbolFlags) {
-            logAllUnhandledErrors(SymbolFlags.takeError(), errs(),
-                                  "CODLayer/GVsResolver flags lookup failed: ");
-            return SymbolFlagsMap();
+          if (!RS) {
+            logAllUnhandledErrors(
+                RS.takeError(), errs(),
+                "CODLayer/GVsResolver responsibility set lookup failed: ");
+            return SymbolNameSet();
           }
 
-          if (SymbolFlags->size() == Symbols.size())
-            return *SymbolFlags;
+          if (RS->size() == Symbols.size())
+            return *RS;
 
           SymbolNameSet NotFoundViaLegacyLookup;
           for (auto &S : Symbols)
-            if (!SymbolFlags->count(S))
+            if (!RS->count(S))
               NotFoundViaLegacyLookup.insert(S);
-          auto SymbolFlags2 =
-              LD.BackingResolver->lookupFlags(NotFoundViaLegacyLookup);
+          auto RS2 =
+              LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup);
 
-          for (auto &KV : SymbolFlags2)
-            (*SymbolFlags)[KV.first] = std::move(KV.second);
+          for (auto &S : RS2)
+            (*RS).insert(S);
 
-          return *SymbolFlags;
+          return *RS;
         },
         [this, &LD,
          LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Query,
@@ -669,28 +679,29 @@ private:
     // Create memory manager and symbol resolver.
     auto Resolver = createSymbolResolver(
         [&LD, LegacyLookup](const SymbolNameSet &Symbols) {
-          auto SymbolFlags = lookupFlagsWithLegacyFn(Symbols, LegacyLookup);
-          if (!SymbolFlags) {
-            logAllUnhandledErrors(SymbolFlags.takeError(), errs(),
-                                  "CODLayer/SubResolver flags lookup failed: ");
-            return SymbolFlagsMap();
+          auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup);
+          if (!RS) {
+            logAllUnhandledErrors(
+                RS.takeError(), errs(),
+                "CODLayer/SubResolver responsibility set lookup failed: ");
+            return SymbolNameSet();
           }
 
-          if (SymbolFlags->size() == Symbols.size())
-            return *SymbolFlags;
+          if (RS->size() == Symbols.size())
+            return *RS;
 
           SymbolNameSet NotFoundViaLegacyLookup;
           for (auto &S : Symbols)
-            if (!SymbolFlags->count(S))
+            if (!RS->count(S))
               NotFoundViaLegacyLookup.insert(S);
 
-          auto SymbolFlags2 =
-              LD.BackingResolver->lookupFlags(NotFoundViaLegacyLookup);
+          auto RS2 =
+              LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup);
 
-          for (auto &KV : SymbolFlags2)
-            (*SymbolFlags)[KV.first] = std::move(KV.second);
+          for (auto &S : RS2)
+            (*RS).insert(S);
 
-          return *SymbolFlags;
+          return *RS;
         },
         [this, &LD, LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Q,
                                   SymbolNameSet Symbols) {
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index 213a59124c85..f34f88311ba5 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -16,7 +16,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
-#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
@@ -38,7 +38,7 @@ namespace orc {
 
 /// Simple compile functor: Takes a single IR module and returns an ObjectFile.
 /// This compiler supports a single compilation thread and LLVMContext only.
-/// For multithreaded compilation, use MultiThreadedSimpleCompiler below.
+/// For multithreaded compilation, use ConcurrentIRCompiler below.
 class SimpleCompiler {
 public:
   using CompileResult = std::unique_ptr<MemoryBuffer>;
@@ -105,10 +105,10 @@ private:
 ///
 /// This class creates a new TargetMachine and SimpleCompiler instance for each
 /// compile.
-class MultiThreadedSimpleCompiler {
+class ConcurrentIRCompiler {
 public:
-  MultiThreadedSimpleCompiler(JITTargetMachineBuilder JTMB,
-                              ObjectCache *ObjCache = nullptr)
+  ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
+                       ObjectCache *ObjCache = nullptr)
       : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
 
   void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; }
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 11d7c091947e..39d306e0bd4c 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -18,13 +18,13 @@
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
 
-#include <list>
-#include <map>
 #include <memory>
-#include <set>
 #include <vector>
 
+#define DEBUG_TYPE "orc"
+
 namespace llvm {
 namespace orc {
 
@@ -33,7 +33,7 @@ class AsynchronousSymbolQuery;
 class ExecutionSession;
 class MaterializationUnit;
 class MaterializationResponsibility;
-class VSO;
+class JITDylib;
 
 /// VModuleKey provides a unique identifier (allocated and managed by
 /// ExecutionSessions) for a module added to the JIT.
@@ -41,36 +41,52 @@ using VModuleKey = uint64_t;
 
 /// A set of symbol names (represented by SymbolStringPtrs for
 //         efficiency).
-using SymbolNameSet = std::set<SymbolStringPtr>;
-
-/// Render a SymbolNameSet to an ostream.
-raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols);
+using SymbolNameSet = DenseSet<SymbolStringPtr>;
 
 /// A map from symbol names (as SymbolStringPtrs) to JITSymbols
 ///        (address/flags pairs).
-using SymbolMap = std::map<SymbolStringPtr, JITEvaluatedSymbol>;
-
-/// Render a SymbolMap to an ostream.
-raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols);
+using SymbolMap = DenseMap<SymbolStringPtr, JITEvaluatedSymbol>;
 
 /// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
-using SymbolFlagsMap = std::map<SymbolStringPtr, JITSymbolFlags>;
-
-/// Render a SymbolMap to an ostream.
-raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &Symbols);
+using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
 
 /// A base class for materialization failures that allows the failing
 ///        symbols to be obtained for logging.
-using SymbolDependenceMap = std::map<VSO *, SymbolNameSet>;
+using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
+
+/// A list of (JITDylib*, bool) pairs.
+using JITDylibSearchList = std::vector<std::pair<JITDylib *, bool>>;
+
+/// Render a SymbolStringPtr.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym);
+
+/// Render a SymbolNameSet.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols);
+
+/// Render a SymbolFlagsMap entry.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV);
+
+/// Render a SymbolMap entry.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV);
+
+/// Render a SymbolFlagsMap.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags);
+
+/// Render a SymbolMap.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols);
+
+/// Render a SymbolDependenceMap entry.
+raw_ostream &operator<<(raw_ostream &OS,
+                        const SymbolDependenceMap::value_type &KV);
 
 /// Render a SymbolDependendeMap.
 raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps);
 
-/// A list of VSO pointers.
-using VSOList = std::vector<VSO *>;
+/// Render a MaterializationUnit.
+raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU);
 
-/// Render a VSOList.
-raw_ostream &operator<<(raw_ostream &OS, const VSOList &VSOs);
+/// Render a JITDylibSearchList.
+raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs);
 
 /// Callback to notify client that symbols have been resolved.
 using SymbolsResolvedCallback = std::function<void(Expected<SymbolMap>)>;
@@ -86,7 +102,8 @@ using RegisterDependenciesFunction =
 /// are no dependants to register with.
 extern RegisterDependenciesFunction NoDependenciesToRegister;
 
-/// Used to notify a VSO that the given set of symbols failed to materialize.
+/// Used to notify a JITDylib that the given set of symbols failed to
+/// materialize.
 class FailedToMaterialize : public ErrorInfo<FailedToMaterialize> {
 public:
   static char ID;
@@ -114,12 +131,26 @@ private:
   SymbolNameSet Symbols;
 };
 
+/// Used to notify clients that a set of symbols could not be removed.
+class SymbolsCouldNotBeRemoved : public ErrorInfo<SymbolsCouldNotBeRemoved> {
+public:
+  static char ID;
+
+  SymbolsCouldNotBeRemoved(SymbolNameSet Symbols);
+  std::error_code convertToErrorCode() const override;
+  void log(raw_ostream &OS) const override;
+  const SymbolNameSet &getSymbols() const { return Symbols; }
+
+private:
+  SymbolNameSet Symbols;
+};
+
 /// Tracks responsibility for materialization, and mediates interactions between
-/// MaterializationUnits and VSOs.
+/// MaterializationUnits and JDs.
 ///
 /// An instance of this class is passed to MaterializationUnits when their
 /// materialize method is called. It allows MaterializationUnits to resolve and
-/// finalize symbols, or abandon materialization by notifying any unmaterialized
+/// emit symbols, or abandon materialization by notifying any unmaterialized
 /// symbols of an error.
 class MaterializationResponsibility {
   friend class MaterializationUnit;
@@ -130,41 +161,54 @@ public:
 
   /// Destruct a MaterializationResponsibility instance. In debug mode
   ///        this asserts that all symbols being tracked have been either
-  ///        finalized or notified of an error.
+  ///        emitted or notified of an error.
   ~MaterializationResponsibility();
 
-  /// Returns the target VSO that these symbols are being materialized
+  /// Returns the target JITDylib that these symbols are being materialized
   ///        into.
-  VSO &getTargetVSO() const { return V; }
+  JITDylib &getTargetJITDylib() const { return JD; }
+
+  /// Returns the VModuleKey for this instance.
+  VModuleKey getVModuleKey() const { return K; }
 
   /// Returns the symbol flags map for this responsibility instance.
-  SymbolFlagsMap getSymbols() { return SymbolFlags; }
+  /// Note: The returned flags may have transient flags (Lazy, Materializing)
+  /// set. These should be stripped with JITSymbolFlags::stripTransientFlags
+  /// before using.
+  const SymbolFlagsMap &getSymbols() { return SymbolFlags; }
 
   /// Returns the names of any symbols covered by this
   /// MaterializationResponsibility object that have queries pending. This
   /// information can be used to return responsibility for unrequested symbols
-  /// back to the VSO via the delegate method.
-  SymbolNameSet getRequestedSymbols();
-
-  /// Resolves the given symbols. Individual calls to this method may
-  ///        resolve a subset of the symbols, but all symbols must have been
-  ///        resolved prior to calling finalize.
+  /// back to the JITDylib via the delegate method.
+  SymbolNameSet getRequestedSymbols() const;
+
+  /// Notifies the target JITDylib that the given symbols have been resolved.
+  /// This will update the given symbols' addresses in the JITDylib, and notify
+  /// any pending queries on the given symbols of their resolution. The given
+  /// symbols must be ones covered by this MaterializationResponsibility
+  /// instance. Individual calls to this method may resolve a subset of the
+  /// symbols, but all symbols must have been resolved prior to calling emit.
   void resolve(const SymbolMap &Symbols);
 
-  /// Finalizes all symbols tracked by this instance.
-  void finalize();
+  /// Notifies the target JITDylib (and any pending queries on that JITDylib)
+  /// that all symbols covered by this MaterializationResponsibility instance
+  /// have been emitted.
+  void emit();
 
-  /// Adds new symbols to the VSO and this responsibility instance.
-  ///        VSO entries start out in the materializing state.
+  /// Adds new symbols to the JITDylib and this responsibility instance.
+  ///        JITDylib entries start out in the materializing state.
   ///
   ///   This method can be used by materialization units that want to add
   /// additional symbols at materialization time (e.g. stubs, compile
   /// callbacks, metadata).
   Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
 
-  /// Notify all unfinalized symbols that an error has occurred.
+  /// Notify all not-yet-emitted covered by this MaterializationResponsibility
+  /// instance that an error has occurred.
   /// This will remove all symbols covered by this MaterializationResponsibilty
-  /// from V, and send an error to any queries waiting on these symbols.
+  /// from the target JITDylib, and send an error to any queries waiting on
+  /// these symbols.
   void failMaterialization();
 
   /// Transfers responsibility to the given MaterializationUnit for all
@@ -177,7 +221,8 @@ public:
   /// Delegates responsibility for the given symbols to the returned
   /// materialization responsibility. Useful for breaking up work between
   /// threads, or different kinds of materialization processes.
-  MaterializationResponsibility delegate(const SymbolNameSet &Symbols);
+  MaterializationResponsibility delegate(const SymbolNameSet &Symbols,
+                                         VModuleKey NewKey = VModuleKey());
 
   void addDependencies(const SymbolStringPtr &Name,
                        const SymbolDependenceMap &Dependencies);
@@ -186,12 +231,14 @@ public:
   void addDependenciesForAll(const SymbolDependenceMap &Dependencies);
 
 private:
-  /// Create a MaterializationResponsibility for the given VSO and
+  /// Create a MaterializationResponsibility for the given JITDylib and
   ///        initial symbols.
-  MaterializationResponsibility(VSO &V, SymbolFlagsMap SymbolFlags);
+  MaterializationResponsibility(JITDylib &JD, SymbolFlagsMap SymbolFlags,
+                                VModuleKey K);
 
-  VSO &V;
+  JITDylib &JD;
   SymbolFlagsMap SymbolFlags;
+  VModuleKey K;
 };
 
 /// A MaterializationUnit represents a set of symbol definitions that can
@@ -199,35 +246,41 @@ private:
 ///        overriding definitions are encountered).
 ///
 /// MaterializationUnits are used when providing lazy definitions of symbols to
-/// VSOs. The VSO will call materialize when the address of a symbol is
-/// requested via the lookup method. The VSO will call discard if a stronger
-/// definition is added or already present.
+/// JITDylibs. The JITDylib will call materialize when the address of a symbol
+/// is requested via the lookup method. The JITDylib will call discard if a
+/// stronger definition is added or already present.
 class MaterializationUnit {
 public:
-  MaterializationUnit(SymbolFlagsMap InitalSymbolFlags)
-      : SymbolFlags(std::move(InitalSymbolFlags)) {}
+  MaterializationUnit(SymbolFlagsMap InitalSymbolFlags, VModuleKey K)
+      : SymbolFlags(std::move(InitalSymbolFlags)), K(std::move(K)) {}
 
   virtual ~MaterializationUnit() {}
 
+  /// Return the name of this materialization unit. Useful for debugging
+  /// output.
+  virtual StringRef getName() const = 0;
+
   /// Return the set of symbols that this source provides.
   const SymbolFlagsMap &getSymbols() const { return SymbolFlags; }
 
   /// Called by materialization dispatchers (see
   /// ExecutionSession::DispatchMaterializationFunction) to trigger
   /// materialization of this MaterializationUnit.
-  void doMaterialize(VSO &V) {
-    materialize(MaterializationResponsibility(V, std::move(SymbolFlags)));
+  void doMaterialize(JITDylib &JD) {
+    materialize(MaterializationResponsibility(JD, std::move(SymbolFlags),
+                                              std::move(K)));
   }
 
-  /// Called by VSOs to notify MaterializationUnits that the given symbol has
-  /// been overridden.
-  void doDiscard(const VSO &V, SymbolStringPtr Name) {
+  /// Called by JITDylibs to notify MaterializationUnits that the given symbol
+  /// has been overridden.
+  void doDiscard(const JITDylib &JD, const SymbolStringPtr &Name) {
     SymbolFlags.erase(Name);
-    discard(V, std::move(Name));
+    discard(JD, std::move(Name));
   }
 
 protected:
   SymbolFlagsMap SymbolFlags;
+  VModuleKey K;
 
 private:
   virtual void anchor();
@@ -241,7 +294,7 @@ private:
   ///        from the source (e.g. if the source is an LLVM IR Module and the
   ///        symbol is a function, delete the function body or mark it available
   ///        externally).
-  virtual void discard(const VSO &V, SymbolStringPtr Name) = 0;
+  virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0;
 };
 
 using MaterializationUnitList =
@@ -253,30 +306,32 @@ using MaterializationUnitList =
 /// materialized.
 class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit {
 public:
-  AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols);
+  AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols, VModuleKey K);
+
+  StringRef getName() const override;
 
 private:
   void materialize(MaterializationResponsibility R) override;
-  void discard(const VSO &V, SymbolStringPtr Name) override;
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
   static SymbolFlagsMap extractFlags(const SymbolMap &Symbols);
 
   SymbolMap Symbols;
 };
 
 /// Create an AbsoluteSymbolsMaterializationUnit with the given symbols.
-/// Useful for inserting absolute symbols into a VSO. E.g.:
+/// Useful for inserting absolute symbols into a JITDylib. E.g.:
 /// \code{.cpp}
-///   VSO &V = ...;
+///   JITDylib &JD = ...;
 ///   SymbolStringPtr Foo = ...;
 ///   JITEvaluatedSymbol FooSym = ...;
-///   if (auto Err = V.define(absoluteSymbols({{Foo, FooSym}})))
+///   if (auto Err = JD.define(absoluteSymbols({{Foo, FooSym}})))
 ///     return Err;
 /// \endcode
 ///
 inline std::unique_ptr<AbsoluteSymbolsMaterializationUnit>
-absoluteSymbols(SymbolMap Symbols) {
+absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) {
   return llvm::make_unique<AbsoluteSymbolsMaterializationUnit>(
-      std::move(Symbols));
+      std::move(Symbols), std::move(K));
 }
 
 struct SymbolAliasMapEntry {
@@ -289,191 +344,87 @@ struct SymbolAliasMapEntry {
 };
 
 /// A map of Symbols to (Symbol, Flags) pairs.
-using SymbolAliasMap = std::map<SymbolStringPtr, SymbolAliasMapEntry>;
+using SymbolAliasMap = DenseMap<SymbolStringPtr, SymbolAliasMapEntry>;
 
 /// A materialization unit for symbol aliases. Allows existing symbols to be
 /// aliased with alternate flags.
 class ReExportsMaterializationUnit : public MaterializationUnit {
 public:
-  /// SourceVSO is allowed to be nullptr, in which case the source VSO is
-  /// taken to be whatever VSO these definitions are materialized in. This
-  /// is useful for defining aliases within a VSO.
+  /// SourceJD is allowed to be nullptr, in which case the source JITDylib is
+  /// taken to be whatever JITDylib these definitions are materialized in (and
+  /// MatchNonExported has no effect). This is useful for defining aliases
+  /// within a JITDylib.
   ///
   /// Note: Care must be taken that no sets of aliases form a cycle, as such
   ///       a cycle will result in a deadlock when any symbol in the cycle is
   ///       resolved.
-  ReExportsMaterializationUnit(VSO *SourceVSO, SymbolAliasMap Aliases);
+  ReExportsMaterializationUnit(JITDylib *SourceJD, bool MatchNonExported,
+                               SymbolAliasMap Aliases, VModuleKey K);
+
+  StringRef getName() const override;
 
 private:
   void materialize(MaterializationResponsibility R) override;
-  void discard(const VSO &V, SymbolStringPtr Name) override;
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
   static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
 
-  VSO *SourceVSO = nullptr;
+  JITDylib *SourceJD = nullptr;
+  bool MatchNonExported = false;
   SymbolAliasMap Aliases;
 };
 
 /// Create a ReExportsMaterializationUnit with the given aliases.
-/// Useful for defining symbol aliases.: E.g., given a VSO V containing symbols
-/// "foo" and "bar", we can define aliases "baz" (for "foo") and "qux" (for
-/// "bar") with:
-/// \code{.cpp}
+/// Useful for defining symbol aliases.: E.g., given a JITDylib JD containing
+/// symbols "foo" and "bar", we can define aliases "baz" (for "foo") and "qux"
+/// (for "bar") with: \code{.cpp}
 ///   SymbolStringPtr Baz = ...;
 ///   SymbolStringPtr Qux = ...;
-///   if (auto Err = V.define(symbolAliases({
+///   if (auto Err = JD.define(symbolAliases({
 ///       {Baz, { Foo, JITSymbolFlags::Exported }},
 ///       {Qux, { Bar, JITSymbolFlags::Weak }}}))
 ///     return Err;
 /// \endcode
 inline std::unique_ptr<ReExportsMaterializationUnit>
-symbolAliases(SymbolAliasMap Aliases) {
-  return llvm::make_unique<ReExportsMaterializationUnit>(nullptr,
-                                                         std::move(Aliases));
+symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) {
+  return llvm::make_unique<ReExportsMaterializationUnit>(
+      nullptr, true, std::move(Aliases), std::move(K));
 }
 
-/// Create a materialization unit for re-exporting symbols from another VSO
+/// Create a materialization unit for re-exporting symbols from another JITDylib
 /// with alternative names/flags.
+/// If MatchNonExported is true then non-exported symbols from SourceJD can be
+/// re-exported. If it is false, attempts to re-export a non-exported symbol
+/// will result in a "symbol not found" error.
 inline std::unique_ptr<ReExportsMaterializationUnit>
-reexports(VSO &SourceV, SymbolAliasMap Aliases) {
-  return llvm::make_unique<ReExportsMaterializationUnit>(&SourceV,
-                                                         std::move(Aliases));
+reexports(JITDylib &SourceJD, SymbolAliasMap Aliases,
+          bool MatchNonExported = false, VModuleKey K = VModuleKey()) {
+  return llvm::make_unique<ReExportsMaterializationUnit>(
+      &SourceJD, MatchNonExported, std::move(Aliases), std::move(K));
 }
 
 /// Build a SymbolAliasMap for the common case where you want to re-export
-/// symbols from another VSO with the same linkage/flags.
+/// symbols from another JITDylib with the same linkage/flags.
 Expected<SymbolAliasMap>
-buildSimpleReexportsAliasMap(VSO &SourceV, const SymbolNameSet &Symbols);
-
-/// Base utilities for ExecutionSession.
-class ExecutionSessionBase {
-  // FIXME: Remove this when we remove the old ORC layers.
-  friend class VSO;
+buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols);
 
+/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically
+/// re-export a subset of the source JITDylib's symbols in the target.
+class ReexportsGenerator {
 public:
-  /// For reporting errors.
-  using ErrorReporter = std::function<void(Error)>;
-
-  /// For dispatching MaterializationUnit::materialize calls.
-  using DispatchMaterializationFunction =
-      std::function<void(VSO &V, std::unique_ptr<MaterializationUnit> MU)>;
-
-  /// Construct an ExecutionSessionBase.
-  ///
-  /// SymbolStringPools may be shared between ExecutionSessions.
-  ExecutionSessionBase(std::shared_ptr<SymbolStringPool> SSP = nullptr)
-      : SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {}
-
-  /// Returns the SymbolStringPool for this ExecutionSession.
-  SymbolStringPool &getSymbolStringPool() const { return *SSP; }
-
-  /// Run the given lambda with the session mutex locked.
-  template <typename Func> auto runSessionLocked(Func &&F) -> decltype(F()) {
-    std::lock_guard<std::recursive_mutex> Lock(SessionMutex);
-    return F();
-  }
-
-  /// Set the error reporter function.
-  ExecutionSessionBase &setErrorReporter(ErrorReporter ReportError) {
-    this->ReportError = std::move(ReportError);
-    return *this;
-  }
-
-  /// Set the materialization dispatch function.
-  ExecutionSessionBase &setDispatchMaterialization(
-      DispatchMaterializationFunction DispatchMaterialization) {
-    this->DispatchMaterialization = std::move(DispatchMaterialization);
-    return *this;
-  }
-
-  /// Report a error for this execution session.
-  ///
-  /// Unhandled errors can be sent here to log them.
-  void reportError(Error Err) { ReportError(std::move(Err)); }
+  using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
 
-  /// Allocate a module key for a new module to add to the JIT.
-  VModuleKey allocateVModule() { return ++LastKey; }
+  /// Create a reexports generator. If an Allow predicate is passed, only
+  /// symbols for which the predicate returns true will be reexported. If no
+  /// Allow predicate is passed, all symbols will be exported.
+  ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
+                     SymbolPredicate Allow = SymbolPredicate());
 
-  /// Return a module key to the ExecutionSession so that it can be
-  ///        re-used. This should only be done once all resources associated
-  ///        with the original key have been released.
-  void releaseVModule(VModuleKey Key) { /* FIXME: Recycle keys */
-  }
-
-  void legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err);
-
-  using LegacyAsyncLookupFunction = std::function<SymbolNameSet(
-      std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names)>;
-
-  /// A legacy lookup function for JITSymbolResolverAdapter.
-  /// Do not use -- this will be removed soon.
-  Expected<SymbolMap>
-  legacyLookup(ExecutionSessionBase &ES, LegacyAsyncLookupFunction AsyncLookup,
-               SymbolNameSet Names, bool WaiUntilReady,
-               RegisterDependenciesFunction RegisterDependencies);
-
-  /// Search the given VSO list for the given symbols.
-  ///
-  ///
-  /// The OnResolve callback will be called once all requested symbols are
-  /// resolved, or if an error occurs prior to resolution.
-  ///
-  /// The OnReady callback will be called once all requested symbols are ready,
-  /// or if an error occurs after resolution but before all symbols are ready.
-  ///
-  /// If all symbols are found, the RegisterDependencies function will be called
-  /// while the session lock is held. This gives clients a chance to register
-  /// dependencies for on the queried symbols for any symbols they are
-  /// materializing (if a MaterializationResponsibility instance is present,
-  /// this can be implemented by calling
-  /// MaterializationResponsibility::addDependencies). If there are no
-  /// dependenant symbols for this query (e.g. it is being made by a top level
-  /// client to get an address to call) then the value NoDependenciesToRegister
-  /// can be used.
-  void lookup(const VSOList &VSOs, const SymbolNameSet &Symbols,
-              SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
-              RegisterDependenciesFunction RegisterDependencies);
-
-  /// Blocking version of lookup above. Returns the resolved symbol map.
-  /// If WaitUntilReady is true (the default), will not return until all
-  /// requested symbols are ready (or an error occurs). If WaitUntilReady is
-  /// false, will return as soon as all requested symbols are resolved,
-  /// or an error occurs. If WaitUntilReady is false and an error occurs
-  /// after resolution, the function will return a success value, but the
-  /// error will be reported via reportErrors.
-  Expected<SymbolMap> lookup(const VSOList &VSOs, const SymbolNameSet &Symbols,
-                             RegisterDependenciesFunction RegisterDependencies,
-                             bool WaitUntilReady = true);
-
-  /// Materialize the given unit.
-  void dispatchMaterialization(VSO &V,
-                               std::unique_ptr<MaterializationUnit> MU) {
-    DispatchMaterialization(V, std::move(MU));
-  }
+  SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
 
 private:
-  static void logErrorsToStdErr(Error Err) {
-    logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: ");
-  }
-
-  static void
-  materializeOnCurrentThread(VSO &V, std::unique_ptr<MaterializationUnit> MU) {
-    MU->doMaterialize(V);
-  }
-
-  void runOutstandingMUs();
-
-  mutable std::recursive_mutex SessionMutex;
-  std::shared_ptr<SymbolStringPool> SSP;
-  VModuleKey LastKey = 0;
-  ErrorReporter ReportError = logErrorsToStdErr;
-  DispatchMaterializationFunction DispatchMaterialization =
-      materializeOnCurrentThread;
-
-  // FIXME: Remove this (and runOutstandingMUs) once the linking layer works
-  //        with callbacks from asynchronous queries.
-  mutable std::recursive_mutex OutstandingMUsMutex;
-  std::vector<std::pair<VSO *, std::unique_ptr<MaterializationUnit>>>
-      OutstandingMUs;
+  JITDylib &SourceJD;
+  bool MatchNonExported = false;
+  SymbolPredicate Allow;
 };
 
 /// A symbol query that returns results via a callback when results are
@@ -481,8 +432,9 @@ private:
 ///
 /// makes a callback when all symbols are available.
 class AsynchronousSymbolQuery {
-  friend class ExecutionSessionBase;
-  friend class VSO;
+  friend class ExecutionSession;
+  friend class JITDylib;
+  friend class JITSymbolResolverAdapter;
 
 public:
 
@@ -517,9 +469,9 @@ public:
   void handleFullyReady();
 
 private:
-  void addQueryDependence(VSO &V, SymbolStringPtr Name);
+  void addQueryDependence(JITDylib &JD, SymbolStringPtr Name);
 
-  void removeQueryDependence(VSO &V, const SymbolStringPtr &Name);
+  void removeQueryDependence(JITDylib &JD, const SymbolStringPtr &Name);
 
   bool canStillFail();
 
@@ -539,110 +491,118 @@ private:
 ///
 /// Represents a virtual shared object. Instances can not be copied or moved, so
 /// their addresses may be used as keys for resource management.
-/// VSO state changes must be made via an ExecutionSession to guarantee that
-/// they are synchronized with respect to other VSO operations.
-class VSO {
+/// JITDylib state changes must be made via an ExecutionSession to guarantee
+/// that they are synchronized with respect to other JITDylib operations.
+class JITDylib {
   friend class AsynchronousSymbolQuery;
   friend class ExecutionSession;
-  friend class ExecutionSessionBase;
   friend class MaterializationResponsibility;
 public:
-  using FallbackDefinitionGeneratorFunction =
-      std::function<SymbolNameSet(VSO &Parent, const SymbolNameSet &Names)>;
+  using GeneratorFunction = std::function<SymbolNameSet(
+      JITDylib &Parent, const SymbolNameSet &Names)>;
 
   using AsynchronousSymbolQuerySet =
-      std::set<std::shared_ptr<AsynchronousSymbolQuery>>;
+    std::set<std::shared_ptr<AsynchronousSymbolQuery>>;
 
-  VSO(const VSO &) = delete;
-  VSO &operator=(const VSO &) = delete;
-  VSO(VSO &&) = delete;
-  VSO &operator=(VSO &&) = delete;
+  JITDylib(const JITDylib &) = delete;
+  JITDylib &operator=(const JITDylib &) = delete;
+  JITDylib(JITDylib &&) = delete;
+  JITDylib &operator=(JITDylib &&) = delete;
 
-  /// Get the name for this VSO.
-  const std::string &getName() const { return VSOName; }
+  /// Get the name for this JITDylib.
+  const std::string &getName() const { return JITDylibName; }
 
-  /// Get a reference to the ExecutionSession for this VSO.
-  ExecutionSessionBase &getExecutionSession() const { return ES; }
+  /// Get a reference to the ExecutionSession for this JITDylib.
+  ExecutionSession &getExecutionSession() const { return ES; }
 
-  /// Set a fallback defenition generator. If set, lookup and lookupFlags will
-  /// pass the unresolved symbols set to the fallback definition generator,
-  /// allowing it to add a new definition to the VSO.
-  void setFallbackDefinitionGenerator(
-      FallbackDefinitionGeneratorFunction FallbackDefinitionGenerator) {
-    this->FallbackDefinitionGenerator = std::move(FallbackDefinitionGenerator);
+  /// Set a definition generator. If set, whenever a symbol fails to resolve
+  /// within this JITDylib, lookup and lookupFlags will pass the unresolved
+  /// symbols set to the definition generator. The generator can optionally
+  /// add a definition for the unresolved symbols to the dylib.
+  void setGenerator(GeneratorFunction DefGenerator) {
+    this->DefGenerator = std::move(DefGenerator);
   }
 
-  /// Set the search order to be used when fixing up definitions in VSO.
+  /// Set the search order to be used when fixing up definitions in JITDylib.
   /// This will replace the previous search order, and apply to any symbol
-  /// resolutions made for definitions in this VSO after the call to
+  /// resolutions made for definitions in this JITDylib after the call to
   /// setSearchOrder (even if the definition itself was added before the
   /// call).
   ///
-  /// If SearchThisVSOFirst is set, which by default it is, then this VSO will
-  /// add itself to the beginning of the SearchOrder (Clients should *not*
-  /// put this VSO in the list in this case, to avoid redundant lookups).
+  /// If SearchThisJITDylibFirst is set, which by default it is, then this
+  /// JITDylib will add itself to the beginning of the SearchOrder (Clients
+  /// should *not* put this JITDylib in the list in this case, to avoid
+  /// redundant lookups).
   ///
-  /// If SearchThisVSOFirst is false then the search order will be used as
+  /// If SearchThisJITDylibFirst is false then the search order will be used as
   /// given. The main motivation for this feature is to support deliberate
-  /// shadowing of symbols in this VSO by a facade VSO. For example, the
-  /// facade may resolve function names to stubs, and the stubs may compile
+  /// shadowing of symbols in this JITDylib by a facade JITDylib. For example,
+  /// the facade may resolve function names to stubs, and the stubs may compile
   /// lazily by looking up symbols in this dylib. Adding the facade dylib
   /// as the first in the search order (instead of this dylib) ensures that
   /// definitions within this dylib resolve to the lazy-compiling stubs,
   /// rather than immediately materializing the definitions in this dylib.
-  void setSearchOrder(VSOList NewSearchOrder, bool SearchThisVSOFirst = true);
+  void setSearchOrder(JITDylibSearchList NewSearchOrder,
+                      bool SearchThisJITDylibFirst = true,
+                      bool MatchNonExportedInThisDylib = true);
 
-  /// Add the given VSO to the search order for definitions in this VSO.
-  void addToSearchOrder(VSO &V);
+  /// Add the given JITDylib to the search order for definitions in this
+  /// JITDylib.
+  void addToSearchOrder(JITDylib &JD, bool MatcNonExported = false);
 
-  /// Replace OldV with NewV in the search order if OldV is present. Otherwise
-  /// this operation is a no-op.
-  void replaceInSearchOrder(VSO &OldV, VSO &NewV);
+  /// Replace OldJD with NewJD in the search order if OldJD is present.
+  /// Otherwise this operation is a no-op.
+  void replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD,
+                            bool MatchNonExported = false);
 
-  /// Remove the given VSO from the search order for this VSO if it is
+  /// Remove the given JITDylib from the search order for this JITDylib if it is
   /// present. Otherwise this operation is a no-op.
-  void removeFromSearchOrder(VSO &V);
+  void removeFromSearchOrder(JITDylib &JD);
 
   /// Do something with the search order (run under the session lock).
   template <typename Func>
   auto withSearchOrderDo(Func &&F)
-      -> decltype(F(std::declval<const VSOList &>())) {
-    return ES.runSessionLocked([&]() { return F(SearchOrder); });
-  }
+      -> decltype(F(std::declval<const JITDylibSearchList &>()));
 
-  /// Define all symbols provided by the materialization unit to be part
-  ///        of the given VSO.
-  template <typename UniquePtrToMaterializationUnit>
-  typename std::enable_if<
-      std::is_convertible<
-          typename std::decay<UniquePtrToMaterializationUnit>::type,
-          std::unique_ptr<MaterializationUnit>>::value,
-      Error>::type
-  define(UniquePtrToMaterializationUnit &&MU) {
-    return ES.runSessionLocked([&, this]() -> Error {
-      assert(MU && "Can't define with a null MU");
-
-      if (auto Err = defineImpl(*MU))
-        return Err;
-
-      /// defineImpl succeeded.
-      auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
-      for (auto &KV : UMI->MU->getSymbols())
-        UnmaterializedInfos[KV.first] = UMI;
-
-      return Error::success();
-    });
-  }
+  /// Define all symbols provided by the materialization unit to be part of this
+  /// JITDylib.
+  ///
+  /// This overload always takes ownership of the MaterializationUnit. If any
+  /// errors occur, the MaterializationUnit consumed.
+  template <typename MaterializationUnitType>
+  Error define(std::unique_ptr<MaterializationUnitType> &&MU);
 
-  /// Search the given VSO for the symbols in Symbols. If found, store
+  /// Define all symbols provided by the materialization unit to be part of this
+  /// JITDylib.
+  ///
+  /// This overload only takes ownership of the MaterializationUnit no error is
+  /// generated. If an error occurs, ownership remains with the caller. This
+  /// may allow the caller to modify the MaterializationUnit to correct the
+  /// issue, then re-call define.
+  template <typename MaterializationUnitType>
+  Error define(std::unique_ptr<MaterializationUnitType> &MU);
+
+  /// Tries to remove the given symbols.
+  ///
+  /// If any symbols are not defined in this JITDylib this method will return
+  /// a SymbolsNotFound error covering the missing symbols.
+  ///
+  /// If all symbols are found but some symbols are in the process of being
+  /// materialized this method will return a SymbolsCouldNotBeRemoved error.
+  ///
+  /// On success, all symbols are removed. On failure, the JITDylib state is
+  /// left unmodified (no symbols are removed).
+  Error remove(const SymbolNameSet &Names);
+
+  /// Search the given JITDylib for the symbols in Symbols. If found, store
   ///        the flags for each symbol in Flags. Returns any unresolved symbols.
   SymbolFlagsMap lookupFlags(const SymbolNameSet &Names);
 
-  /// Dump current VSO state to OS.
+  /// Dump current JITDylib state to OS.
   void dump(raw_ostream &OS);
 
   /// FIXME: Remove this when we remove the old ORC layers.
-  /// Search the given VSOs in order for the symbols in Symbols. Results
+  /// Search the given JITDylibs in order for the symbols in Symbols. Results
   ///        (once they become available) will be returned via the given Query.
   ///
   /// If any symbol is not found then the unresolved symbols will be returned,
@@ -664,16 +624,16 @@ private:
   };
 
   using UnmaterializedInfosMap =
-      std::map<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
+      DenseMap<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
 
   struct MaterializingInfo {
     AsynchronousSymbolQueryList PendingQueries;
     SymbolDependenceMap Dependants;
-    SymbolDependenceMap UnfinalizedDependencies;
-    bool IsFinalized = false;
+    SymbolDependenceMap UnemittedDependencies;
+    bool IsEmitted = false;
   };
 
-  using MaterializingInfosMap = std::map<SymbolStringPtr, MaterializingInfo>;
+  using MaterializingInfosMap = DenseMap<SymbolStringPtr, MaterializingInfo>;
 
   using LookupImplActionFlags = enum {
     None = 0,
@@ -682,7 +642,7 @@ private:
     LLVM_MARK_AS_BITMASK_ENUM(NotifyFullyReady)
   };
 
-  VSO(ExecutionSessionBase &ES, std::string Name);
+  JITDylib(ExecutionSession &ES, std::string Name);
 
   Error defineImpl(MaterializationUnit &MU);
 
@@ -690,10 +650,12 @@ private:
                                 const SymbolNameSet &Names);
 
   void lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                  SymbolNameSet &Unresolved, MaterializationUnitList &MUs);
+                  SymbolNameSet &Unresolved, bool MatchNonExported,
+                  MaterializationUnitList &MUs);
 
   void lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                      SymbolNameSet &Unresolved, MaterializationUnitList &MUs);
+                      SymbolNameSet &Unresolved, bool MatchNonExported,
+                      MaterializationUnitList &MUs);
 
   LookupImplActionFlags
   lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
@@ -703,77 +665,266 @@ private:
   void detachQueryHelper(AsynchronousSymbolQuery &Q,
                          const SymbolNameSet &QuerySymbols);
 
-  void transferFinalizedNodeDependencies(MaterializingInfo &DependantMI,
-                                         const SymbolStringPtr &DependantName,
-                                         MaterializingInfo &FinalizedMI);
+  void transferEmittedNodeDependencies(MaterializingInfo &DependantMI,
+                                       const SymbolStringPtr &DependantName,
+                                       MaterializingInfo &EmittedMI);
 
   Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
 
   void replace(std::unique_ptr<MaterializationUnit> MU);
 
-  SymbolNameSet getRequestedSymbols(const SymbolFlagsMap &SymbolFlags);
+  SymbolNameSet getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const;
 
   void addDependencies(const SymbolStringPtr &Name,
                        const SymbolDependenceMap &Dependants);
 
   void resolve(const SymbolMap &Resolved);
 
-  void finalize(const SymbolFlagsMap &Finalized);
+  void emit(const SymbolFlagsMap &Emitted);
 
   void notifyFailed(const SymbolNameSet &FailedSymbols);
 
-  ExecutionSessionBase &ES;
-  std::string VSOName;
+  ExecutionSession &ES;
+  std::string JITDylibName;
   SymbolMap Symbols;
   UnmaterializedInfosMap UnmaterializedInfos;
   MaterializingInfosMap MaterializingInfos;
-  FallbackDefinitionGeneratorFunction FallbackDefinitionGenerator;
-  VSOList SearchOrder;
+  GeneratorFunction DefGenerator;
+  JITDylibSearchList SearchOrder;
 };
 
 /// An ExecutionSession represents a running JIT program.
-class ExecutionSession : public ExecutionSessionBase {
+class ExecutionSession {
+  // FIXME: Remove this when we remove the old ORC layers.
+  friend class JITDylib;
+
 public:
+  /// For reporting errors.
   using ErrorReporter = std::function<void(Error)>;
 
-  using DispatchMaterializationFunction =
-      std::function<void(VSO &V, std::unique_ptr<MaterializationUnit> MU)>;
+  /// For dispatching MaterializationUnit::materialize calls.
+  using DispatchMaterializationFunction = std::function<void(
+      JITDylib &JD, std::unique_ptr<MaterializationUnit> MU)>;
 
-  /// Construct an ExecutionEngine.
+  /// Construct an ExecutionSession.
   ///
   /// SymbolStringPools may be shared between ExecutionSessions.
-  ExecutionSession(std::shared_ptr<SymbolStringPool> SSP = nullptr)
-      : ExecutionSessionBase(std::move(SSP)) {}
+  ExecutionSession(std::shared_ptr<SymbolStringPool> SSP = nullptr);
+
+  /// Add a symbol name to the SymbolStringPool and return a pointer to it.
+  SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); }
+
+  /// Returns a shared_ptr to the SymbolStringPool for this ExecutionSession.
+  std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; }
+
+  /// Run the given lambda with the session mutex locked.
+  template <typename Func> auto runSessionLocked(Func &&F) -> decltype(F()) {
+    std::lock_guard<std::recursive_mutex> Lock(SessionMutex);
+    return F();
+  }
+
+  /// Get the "main" JITDylib, which is created automatically on construction of
+  /// the ExecutionSession.
+  JITDylib &getMainJITDylib();
+
+  /// Add a new JITDylib to this ExecutionSession.
+  JITDylib &createJITDylib(std::string Name,
+                           bool AddToMainDylibSearchOrder = true);
+
+  /// Allocate a module key for a new module to add to the JIT.
+  VModuleKey allocateVModule() {
+    return runSessionLocked([this]() { return ++LastKey; });
+  }
+
+  /// Return a module key to the ExecutionSession so that it can be
+  ///        re-used. This should only be done once all resources associated
+  ///        with the original key have been released.
+  void releaseVModule(VModuleKey Key) { /* FIXME: Recycle keys */
+  }
+
+  /// Set the error reporter function.
+  ExecutionSession &setErrorReporter(ErrorReporter ReportError) {
+    this->ReportError = std::move(ReportError);
+    return *this;
+  }
+
+  /// Report a error for this execution session.
+  ///
+  /// Unhandled errors can be sent here to log them.
+  void reportError(Error Err) { ReportError(std::move(Err)); }
+
+  /// Set the materialization dispatch function.
+  ExecutionSession &setDispatchMaterialization(
+      DispatchMaterializationFunction DispatchMaterialization) {
+    this->DispatchMaterialization = std::move(DispatchMaterialization);
+    return *this;
+  }
+
+  void legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err);
+
+  using LegacyAsyncLookupFunction = std::function<SymbolNameSet(
+      std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names)>;
+
+  /// A legacy lookup function for JITSymbolResolverAdapter.
+  /// Do not use -- this will be removed soon.
+  Expected<SymbolMap>
+  legacyLookup(LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
+               bool WaiUntilReady,
+               RegisterDependenciesFunction RegisterDependencies);
+
+  /// Search the given JITDylib list for the given symbols.
+  ///
+  /// SearchOrder lists the JITDylibs to search. For each dylib, the associated
+  /// boolean indicates whether the search should match against non-exported
+  /// (hidden visibility) symbols in that dylib (true means match against
+  /// non-exported symbols, false means do not match).
+  ///
+  /// The OnResolve callback will be called once all requested symbols are
+  /// resolved, or if an error occurs prior to resolution.
+  ///
+  /// The OnReady callback will be called once all requested symbols are ready,
+  /// or if an error occurs after resolution but before all symbols are ready.
+  ///
+  /// If all symbols are found, the RegisterDependencies function will be called
+  /// while the session lock is held. This gives clients a chance to register
+  /// dependencies for on the queried symbols for any symbols they are
+  /// materializing (if a MaterializationResponsibility instance is present,
+  /// this can be implemented by calling
+  /// MaterializationResponsibility::addDependencies). If there are no
+  /// dependenant symbols for this query (e.g. it is being made by a top level
+  /// client to get an address to call) then the value NoDependenciesToRegister
+  /// can be used.
+  void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
+              SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+              RegisterDependenciesFunction RegisterDependencies);
 
-  /// Add a new VSO to this ExecutionSession.
-  VSO &createVSO(std::string Name);
+  /// Blocking version of lookup above. Returns the resolved symbol map.
+  /// If WaitUntilReady is true (the default), will not return until all
+  /// requested symbols are ready (or an error occurs). If WaitUntilReady is
+  /// false, will return as soon as all requested symbols are resolved,
+  /// or an error occurs. If WaitUntilReady is false and an error occurs
+  /// after resolution, the function will return a success value, but the
+  /// error will be reported via reportErrors.
+  Expected<SymbolMap> lookup(const JITDylibSearchList &SearchOrder,
+                             const SymbolNameSet &Symbols,
+                             RegisterDependenciesFunction RegisterDependencies =
+                                 NoDependenciesToRegister,
+                             bool WaitUntilReady = true);
+
+  /// Convenience version of blocking lookup.
+  /// Searches each of the JITDylibs in the search order in turn for the given
+  /// symbol.
+  Expected<JITEvaluatedSymbol> lookup(const JITDylibSearchList &SearchOrder,
+                                      SymbolStringPtr Symbol);
+
+  /// Convenience version of blocking lookup.
+  /// Searches each of the JITDylibs in the search order in turn for the given
+  /// symbol. The search will not find non-exported symbols.
+  Expected<JITEvaluatedSymbol> lookup(ArrayRef<JITDylib *> SearchOrder,
+                                      SymbolStringPtr Symbol);
+
+  /// Convenience version of blocking lookup.
+  /// Searches each of the JITDylibs in the search order in turn for the given
+  /// symbol. The search will not find non-exported symbols.
+  Expected<JITEvaluatedSymbol> lookup(ArrayRef<JITDylib *> SearchOrder,
+                                      StringRef Symbol);
+
+  /// Materialize the given unit.
+  void dispatchMaterialization(JITDylib &JD,
+                               std::unique_ptr<MaterializationUnit> MU) {
+    LLVM_DEBUG(runSessionLocked([&]() {
+                 dbgs() << "Compiling, for " << JD.getName() << ", " << *MU
+                        << "\n";
+               }););
+    DispatchMaterialization(JD, std::move(MU));
+  }
+
+  /// Dump the state of all the JITDylibs in this session.
+  void dump(raw_ostream &OS);
 
 private:
-  std::vector<std::unique_ptr<VSO>> VSOs;
+  static void logErrorsToStdErr(Error Err) {
+    logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: ");
+  }
+
+  static void
+  materializeOnCurrentThread(JITDylib &JD,
+                             std::unique_ptr<MaterializationUnit> MU) {
+    MU->doMaterialize(JD);
+  }
+
+  void runOutstandingMUs();
+
+  mutable std::recursive_mutex SessionMutex;
+  std::shared_ptr<SymbolStringPool> SSP;
+  VModuleKey LastKey = 0;
+  ErrorReporter ReportError = logErrorsToStdErr;
+  DispatchMaterializationFunction DispatchMaterialization =
+      materializeOnCurrentThread;
+
+  std::vector<std::unique_ptr<JITDylib>> JDs;
+
+  // FIXME: Remove this (and runOutstandingMUs) once the linking layer works
+  //        with callbacks from asynchronous queries.
+  mutable std::recursive_mutex OutstandingMUsMutex;
+  std::vector<std::pair<JITDylib *, std::unique_ptr<MaterializationUnit>>>
+      OutstandingMUs;
 };
 
-/// Look up the given names in the given VSOs.
-/// VSOs will be searched in order and no VSO pointer may be null.
-/// All symbols must be found within the given VSOs or an error
-/// will be returned.
-Expected<SymbolMap> lookup(const VSOList &VSOs, SymbolNameSet Names);
+template <typename Func>
+auto JITDylib::withSearchOrderDo(Func &&F)
+    -> decltype(F(std::declval<const JITDylibSearchList &>())) {
+  return ES.runSessionLocked([&]() { return F(SearchOrder); });
+}
+
+template <typename MaterializationUnitType>
+Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU) {
+  assert(MU && "Can not define with a null MU");
+  return ES.runSessionLocked([&, this]() -> Error {
+    if (auto Err = defineImpl(*MU))
+      return Err;
 
-/// Look up a symbol by searching a list of VSOs.
-Expected<JITEvaluatedSymbol> lookup(const VSOList &VSOs, SymbolStringPtr Name);
+    /// defineImpl succeeded.
+    auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
+    for (auto &KV : UMI->MU->getSymbols())
+      UnmaterializedInfos[KV.first] = UMI;
+
+    return Error::success();
+  });
+}
+
+template <typename MaterializationUnitType>
+Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) {
+  assert(MU && "Can not define with a null MU");
+
+  return ES.runSessionLocked([&, this]() -> Error {
+    if (auto Err = defineImpl(*MU))
+      return Err;
+
+    /// defineImpl succeeded.
+    auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
+    for (auto &KV : UMI->MU->getSymbols())
+      UnmaterializedInfos[KV.first] = UMI;
+
+    return Error::success();
+  });
+}
 
 /// Mangles symbol names then uniques them in the context of an
 /// ExecutionSession.
 class MangleAndInterner {
 public:
-  MangleAndInterner(ExecutionSessionBase &ES, const DataLayout &DL);
+  MangleAndInterner(ExecutionSession &ES, const DataLayout &DL);
   SymbolStringPtr operator()(StringRef Name);
 
 private:
-  ExecutionSessionBase &ES;
+  ExecutionSession &ES;
   const DataLayout &DL;
 };
 
 } // End namespace orc
 } // End namespace llvm
 
+#undef DEBUG_TYPE // "orc"
+
 #endif // LLVM_EXECUTIONENGINE_ORC_CORE_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index e27f6e1e2cd6..88559f822e5d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -21,7 +21,6 @@
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #include <cstdint>
 #include <string>
@@ -39,45 +38,6 @@ class Value;
 
 namespace orc {
 
-/// A utility class for building TargetMachines for JITs.
-class JITTargetMachineBuilder {
-public:
-  JITTargetMachineBuilder(Triple TT);
-  static Expected<JITTargetMachineBuilder> detectHost();
-  Expected<std::unique_ptr<TargetMachine>> createTargetMachine();
-
-  JITTargetMachineBuilder &setArch(std::string Arch) {
-    this->Arch = std::move(Arch);
-    return *this;
-  }
-  JITTargetMachineBuilder &setCPU(std::string CPU) {
-    this->CPU = std::move(CPU);
-    return *this;
-  }
-  JITTargetMachineBuilder &setRelocationModel(Optional<Reloc::Model> RM) {
-    this->RM = std::move(RM);
-    return *this;
-  }
-  JITTargetMachineBuilder &setCodeModel(Optional<CodeModel::Model> CM) {
-    this->CM = std::move(CM);
-    return *this;
-  }
-  JITTargetMachineBuilder &
-  addFeatures(const std::vector<std::string> &FeatureVec);
-  SubtargetFeatures &getFeatures() { return Features; }
-  TargetOptions &getOptions() { return Options; }
-
-private:
-  Triple TT;
-  std::string Arch;
-  std::string CPU;
-  SubtargetFeatures Features;
-  TargetOptions Options;
-  Optional<Reloc::Model> RM;
-  Optional<CodeModel::Model> CM;
-  CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
-};
-
 /// This iterator provides a convenient way to iterate over the elements
 ///        of an llvm.global_ctors/llvm.global_dtors instance.
 ///
@@ -134,11 +94,11 @@ iterator_range<CtorDtorIterator> getDestructors(const Module &M);
 /// Convenience class for recording constructor/destructor names for
 ///        later execution.
 template <typename JITLayerT>
-class CtorDtorRunner {
+class LegacyCtorDtorRunner {
 public:
   /// Construct a CtorDtorRunner for the given range using the given
   ///        name mangling function.
-  CtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
+  LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
       : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
 
   /// Run the recorded constructors/destructors through the given JIT
@@ -169,9 +129,9 @@ private:
   orc::VModuleKey K;
 };
 
-class CtorDtorRunner2 {
+class CtorDtorRunner {
 public:
-  CtorDtorRunner2(VSO &V) : V(V) {}
+  CtorDtorRunner(JITDylib &JD) : JD(JD) {}
   void add(iterator_range<CtorDtorIterator> CtorDtors);
   Error run();
 
@@ -179,7 +139,7 @@ private:
   using CtorDtorList = std::vector<SymbolStringPtr>;
   using CtorDtorPriorityMap = std::map<unsigned, CtorDtorList>;
 
-  VSO &V;
+  JITDylib &JD;
   CtorDtorPriorityMap CtorDtorsByPriority;
 };
 
@@ -217,11 +177,11 @@ protected:
                                void *DSOHandle);
 };
 
-class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
+class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
 public:
   /// Create a runtime-overrides class.
   template <typename MangleFtorT>
-  LocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
+  LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
     addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
     addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
   }
@@ -242,22 +202,44 @@ private:
   StringMap<JITTargetAddress> CXXRuntimeOverrides;
 };
 
-class LocalCXXRuntimeOverrides2 : public LocalCXXRuntimeOverridesBase {
+class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
 public:
-  Error enable(VSO &V, MangleAndInterner &Mangler);
+  Error enable(JITDylib &JD, MangleAndInterner &Mangler);
 };
 
 /// A utility class to expose symbols found via dlsym to the JIT.
 ///
-/// If an instance of this class is attached to a VSO as a fallback definition
-/// generator, then any symbol found in the given DynamicLibrary that passes
-/// the 'Allow' predicate will be added to the VSO.
-class DynamicLibraryFallbackGenerator {
+/// If an instance of this class is attached to a JITDylib as a fallback
+/// definition generator, then any symbol found in the given DynamicLibrary that
+/// passes the 'Allow' predicate will be added to the JITDylib.
+class DynamicLibrarySearchGenerator {
 public:
   using SymbolPredicate = std::function<bool(SymbolStringPtr)>;
-  DynamicLibraryFallbackGenerator(sys::DynamicLibrary Dylib,
-                                  const DataLayout &DL, SymbolPredicate Allow);
-  SymbolNameSet operator()(VSO &V, const SymbolNameSet &Names);
+
+  /// Create a DynamicLibrarySearchGenerator that searches for symbols in the
+  /// given sys::DynamicLibrary.
+  /// If the Allow predicate is given then only symbols matching the predicate
+  /// will be searched for in the DynamicLibrary. If the predicate is not given
+  /// then all symbols will be searched for.
+  DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, const DataLayout &DL,
+                                SymbolPredicate Allow = SymbolPredicate());
+
+  /// Permanently loads the library at the given path and, on success, returns
+  /// a DynamicLibrarySearchGenerator that will search it for symbol definitions
+  /// in the library. On failure returns the reason the library failed to load.
+  static Expected<DynamicLibrarySearchGenerator>
+  Load(const char *FileName, const DataLayout &DL,
+       SymbolPredicate Allow = SymbolPredicate());
+
+  /// Creates a DynamicLibrarySearchGenerator that searches for symbols in
+  /// the current process.
+  static Expected<DynamicLibrarySearchGenerator>
+  GetForCurrentProcess(const DataLayout &DL,
+                       SymbolPredicate Allow = SymbolPredicate()) {
+    return Load(nullptr, DL, std::move(Allow));
+  }
+
+  SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
 
 private:
   sys::DynamicLibrary Dylib;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index ad6481548d59..30d71e69cd70 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -28,21 +28,20 @@ class Module;
 
 namespace orc {
 
-class IRCompileLayer2 : public IRLayer {
+class IRCompileLayer : public IRLayer {
 public:
   using CompileFunction =
       std::function<Expected<std::unique_ptr<MemoryBuffer>>(Module &)>;
 
   using NotifyCompiledFunction =
-      std::function<void(VModuleKey K, std::unique_ptr<Module>)>;
+      std::function<void(VModuleKey K, ThreadSafeModule TSM)>;
 
-  IRCompileLayer2(ExecutionSession &ES, ObjectLayer &BaseLayer,
-                  CompileFunction Compile);
+  IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
+                 CompileFunction Compile);
 
   void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled);
 
-  void emit(MaterializationResponsibility R, VModuleKey K,
-            std::unique_ptr<Module> M) override;
+  void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
 
 private:
   mutable std::mutex IRLayerMutex;
@@ -57,15 +56,15 @@ private:
 /// object file and adds this module file to the layer below, which must
 /// implement the object layer concept.
 template <typename BaseLayerT, typename CompileFtor>
-class IRCompileLayer {
+class LegacyIRCompileLayer {
 public:
   /// Callback type for notifications when modules are compiled.
   using NotifyCompiledCallback =
       std::function<void(VModuleKey K, std::unique_ptr<Module>)>;
 
-  /// Construct an IRCompileLayer with the given BaseLayer, which must
+  /// Construct an LegacyIRCompileLayer with the given BaseLayer, which must
   ///        implement the ObjectLayer concept.
-  IRCompileLayer(
+  LegacyIRCompileLayer(
       BaseLayerT &BaseLayer, CompileFtor Compile,
       NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback())
       : BaseLayer(BaseLayer), Compile(std::move(Compile)),
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 266a0f45b3e4..49e65b9f2a80 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -23,24 +23,24 @@ namespace llvm {
 class Module;
 namespace orc {
 
-class IRTransformLayer2 : public IRLayer {
+class IRTransformLayer : public IRLayer {
 public:
+  using TransformFunction = std::function<Expected<ThreadSafeModule>(
+      ThreadSafeModule, const MaterializationResponsibility &R)>;
 
-  using TransformFunction =
-    std::function<Expected<std::unique_ptr<Module>>(std::unique_ptr<Module>)>;
-
-  IRTransformLayer2(ExecutionSession &ES, IRLayer &BaseLayer,
-                    TransformFunction Transform = identityTransform);
+  IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+                   TransformFunction Transform = identityTransform);
 
   void setTransform(TransformFunction Transform) {
     this->Transform = std::move(Transform);
   }
 
-  void emit(MaterializationResponsibility R, VModuleKey K,
-            std::unique_ptr<Module> M) override;
+  void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
 
-  static std::unique_ptr<Module> identityTransform(std::unique_ptr<Module> M) {
-    return M;
+  static ThreadSafeModule
+  identityTransform(ThreadSafeModule TSM,
+                    const MaterializationResponsibility &R) {
+    return TSM;
   }
 
 private:
@@ -53,11 +53,11 @@ private:
 ///   This layer applies a user supplied transform to each module that is added,
 /// then adds the transformed module to the layer below.
 template <typename BaseLayerT, typename TransformFtor>
-class IRTransformLayer {
+class LegacyIRTransformLayer {
 public:
 
-  /// Construct an IRTransformLayer with the given BaseLayer
-  IRTransformLayer(BaseLayerT &BaseLayer,
+  /// Construct an LegacyIRTransformLayer with the given BaseLayer
+  LegacyIRTransformLayer(BaseLayerT &BaseLayer,
                    TransformFtor Transform = TransformFtor())
     : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 8b0b3fdb7df4..c2527802f6a7 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -47,92 +47,101 @@ class Value;
 
 namespace orc {
 
-/// Target-independent base class for compile callback management.
-class JITCompileCallbackManager {
+/// Base class for pools of compiler re-entry trampolines.
+/// These trampolines are callable addresses that save all register state
+/// before calling a supplied function to return the trampoline landing
+/// address, then restore all state before jumping to that address. They
+/// are used by various ORC APIs to support lazy compilation
+class TrampolinePool {
 public:
-  using CompileFunction = std::function<JITTargetAddress()>;
+  virtual ~TrampolinePool() {}
 
-  /// Construct a JITCompileCallbackManager.
-  /// @param ErrorHandlerAddress The address of an error handler in the target
-  ///                            process to be used if a compile callback fails.
-  JITCompileCallbackManager(ExecutionSession &ES,
-                            JITTargetAddress ErrorHandlerAddress)
-      : ES(ES), CallbacksVSO(ES.createVSO("<Callbacks>")),
-        ErrorHandlerAddress(ErrorHandlerAddress) {}
-
-  virtual ~JITCompileCallbackManager() = default;
-
-  /// Reserve a compile callback.
-  Expected<JITTargetAddress> getCompileCallback(CompileFunction Compile);
+  /// Get an available trampoline address.
+  /// Returns an error if no trampoline can be created.
+  virtual Expected<JITTargetAddress> getTrampoline() = 0;
 
-  /// Execute the callback for the given trampoline id. Called by the JIT
-  ///        to compile functions on demand.
-  JITTargetAddress executeCompileCallback(JITTargetAddress TrampolineAddr);
+private:
+  virtual void anchor();
+};
 
-protected:
-  std::vector<JITTargetAddress> AvailableTrampolines;
+/// A trampoline pool for trampolines within the current process.
+template <typename ORCABI> class LocalTrampolinePool : public TrampolinePool {
+public:
+  using GetTrampolineLandingFunction =
+      std::function<JITTargetAddress(JITTargetAddress TrampolineAddr)>;
+
+  /// Creates a LocalTrampolinePool with the given RunCallback function.
+  /// Returns an error if this function is unable to correctly allocate, write
+  /// and protect the resolver code block.
+  static Expected<std::unique_ptr<LocalTrampolinePool>>
+  Create(GetTrampolineLandingFunction GetTrampolineLanding) {
+    Error Err = Error::success();
+
+    auto LTP = std::unique_ptr<LocalTrampolinePool>(
+        new LocalTrampolinePool(std::move(GetTrampolineLanding), Err));
+
+    if (Err)
+      return std::move(Err);
+    return std::move(LTP);
+  }
 
-private:
-  Expected<JITTargetAddress> getAvailableTrampolineAddr() {
-    if (this->AvailableTrampolines.empty())
+  /// Get a free trampoline. Returns an error if one can not be provide (e.g.
+  /// because the pool is empty and can not be grown).
+  Expected<JITTargetAddress> getTrampoline() override {
+    std::lock_guard<std::mutex> Lock(LTPMutex);
+    if (AvailableTrampolines.empty()) {
       if (auto Err = grow())
         return std::move(Err);
-    assert(!this->AvailableTrampolines.empty() &&
-           "Failed to grow available trampolines.");
-    JITTargetAddress TrampolineAddr = this->AvailableTrampolines.back();
-    this->AvailableTrampolines.pop_back();
+    }
+    assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
+    auto TrampolineAddr = AvailableTrampolines.back();
+    AvailableTrampolines.pop_back();
     return TrampolineAddr;
   }
 
-  // Create new trampolines - to be implemented in subclasses.
-  virtual Error grow() = 0;
+  /// Returns the given trampoline to the pool for re-use.
+  void releaseTrampoline(JITTargetAddress TrampolineAddr) {
+    std::lock_guard<std::mutex> Lock(LTPMutex);
+    AvailableTrampolines.push_back(TrampolineAddr);
+  }
 
-  virtual void anchor();
+private:
+  static JITTargetAddress reenter(void *TrampolinePoolPtr, void *TrampolineId) {
+    LocalTrampolinePool<ORCABI> *TrampolinePool =
+        static_cast<LocalTrampolinePool *>(TrampolinePoolPtr);
+    return TrampolinePool->GetTrampolineLanding(static_cast<JITTargetAddress>(
+        reinterpret_cast<uintptr_t>(TrampolineId)));
+  }
 
-  std::mutex CCMgrMutex;
-  ExecutionSession &ES;
-  VSO &CallbacksVSO;
-  JITTargetAddress ErrorHandlerAddress;
-  std::map<JITTargetAddress, SymbolStringPtr> AddrToSymbol;
-  size_t NextCallbackId = 0;
-};
+  LocalTrampolinePool(GetTrampolineLandingFunction GetTrampolineLanding,
+                      Error &Err)
+      : GetTrampolineLanding(std::move(GetTrampolineLanding)) {
 
-/// Manage compile callbacks for in-process JITs.
-template <typename TargetT>
-class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
-public:
-  /// Construct a InProcessJITCompileCallbackManager.
-  /// @param ErrorHandlerAddress The address of an error handler in the target
-  ///                            process to be used if a compile callback fails.
-  LocalJITCompileCallbackManager(ExecutionSession &ES,
-                                 JITTargetAddress ErrorHandlerAddress)
-      : JITCompileCallbackManager(ES, ErrorHandlerAddress) {
-    /// Set up the resolver block.
+    ErrorAsOutParameter _(&Err);
+
+    /// Try to set up the resolver block.
     std::error_code EC;
     ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-        TargetT::ResolverCodeSize, nullptr,
+        ORCABI::ResolverCodeSize, nullptr,
         sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-    assert(!EC && "Failed to allocate resolver block");
+    if (EC) {
+      Err = errorCodeToError(EC);
+      return;
+    }
 
-    TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
-                               &reenter, this);
+    ORCABI::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
+                              &reenter, this);
 
     EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
                                           sys::Memory::MF_READ |
                                               sys::Memory::MF_EXEC);
-    assert(!EC && "Failed to mprotect resolver block");
+    if (EC) {
+      Err = errorCodeToError(EC);
+      return;
+    }
   }
 
-private:
-  static JITTargetAddress reenter(void *CCMgr, void *TrampolineId) {
-    JITCompileCallbackManager *Mgr =
-        static_cast<JITCompileCallbackManager *>(CCMgr);
-    return Mgr->executeCompileCallback(
-        static_cast<JITTargetAddress>(
-            reinterpret_cast<uintptr_t>(TrampolineId)));
-  }
-
-  Error grow() override {
+  Error grow() {
     assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
 
     std::error_code EC;
@@ -144,17 +153,17 @@ private:
       return errorCodeToError(EC);
 
     unsigned NumTrampolines =
-        (sys::Process::getPageSize() - TargetT::PointerSize) /
-        TargetT::TrampolineSize;
+        (sys::Process::getPageSize() - ORCABI::PointerSize) /
+        ORCABI::TrampolineSize;
 
     uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
-    TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
-                              NumTrampolines);
+    ORCABI::writeTrampolines(TrampolineMem, ResolverBlock.base(),
+                             NumTrampolines);
 
     for (unsigned I = 0; I < NumTrampolines; ++I)
       this->AvailableTrampolines.push_back(
           static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(
-              TrampolineMem + (I * TargetT::TrampolineSize))));
+              TrampolineMem + (I * ORCABI::TrampolineSize))));
 
     if (auto EC = sys::Memory::protectMappedMemory(
                     TrampolineBlock.getMemoryBlock(),
@@ -165,8 +174,87 @@ private:
     return Error::success();
   }
 
+  GetTrampolineLandingFunction GetTrampolineLanding;
+
+  std::mutex LTPMutex;
   sys::OwningMemoryBlock ResolverBlock;
   std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+  std::vector<JITTargetAddress> AvailableTrampolines;
+};
+
+/// Target-independent base class for compile callback management.
+class JITCompileCallbackManager {
+public:
+  using CompileFunction = std::function<JITTargetAddress()>;
+
+  virtual ~JITCompileCallbackManager() = default;
+
+  /// Reserve a compile callback.
+  Expected<JITTargetAddress> getCompileCallback(CompileFunction Compile);
+
+  /// Execute the callback for the given trampoline id. Called by the JIT
+  ///        to compile functions on demand.
+  JITTargetAddress executeCompileCallback(JITTargetAddress TrampolineAddr);
+
+protected:
+  /// Construct a JITCompileCallbackManager.
+  JITCompileCallbackManager(std::unique_ptr<TrampolinePool> TP,
+                            ExecutionSession &ES,
+                            JITTargetAddress ErrorHandlerAddress)
+      : TP(std::move(TP)), ES(ES),
+        CallbacksJD(ES.createJITDylib("<Callbacks>")),
+        ErrorHandlerAddress(ErrorHandlerAddress) {}
+
+  void setTrampolinePool(std::unique_ptr<TrampolinePool> TP) {
+    this->TP = std::move(TP);
+  }
+
+private:
+  std::mutex CCMgrMutex;
+  std::unique_ptr<TrampolinePool> TP;
+  ExecutionSession &ES;
+  JITDylib &CallbacksJD;
+  JITTargetAddress ErrorHandlerAddress;
+  std::map<JITTargetAddress, SymbolStringPtr> AddrToSymbol;
+  size_t NextCallbackId = 0;
+};
+
+/// Manage compile callbacks for in-process JITs.
+template <typename ORCABI>
+class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
+public:
+  /// Create a new LocalJITCompileCallbackManager.
+  static Expected<std::unique_ptr<LocalJITCompileCallbackManager>>
+  Create(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddress) {
+    Error Err = Error::success();
+    auto CCMgr = std::unique_ptr<LocalJITCompileCallbackManager>(
+        new LocalJITCompileCallbackManager(ES, ErrorHandlerAddress, Err));
+    if (Err)
+      return std::move(Err);
+    return std::move(CCMgr);
+  }
+
+private:
+  /// Construct a InProcessJITCompileCallbackManager.
+  /// @param ErrorHandlerAddress The address of an error handler in the target
+  ///                            process to be used if a compile callback fails.
+  LocalJITCompileCallbackManager(ExecutionSession &ES,
+                                 JITTargetAddress ErrorHandlerAddress,
+                                 Error &Err)
+      : JITCompileCallbackManager(nullptr, ES, ErrorHandlerAddress) {
+    ErrorAsOutParameter _(&Err);
+    auto TP = LocalTrampolinePool<ORCABI>::Create(
+        [this](JITTargetAddress TrampolineAddr) {
+          return executeCompileCallback(TrampolineAddr);
+        });
+
+    if (!TP) {
+      Err = TP.takeError();
+      return;
+    }
+
+    setTrampolinePool(std::move(*TP));
+  }
 };
 
 /// Base class for managing collections of named indirect stubs.
@@ -207,6 +295,7 @@ class LocalIndirectStubsManager : public IndirectStubsManager {
 public:
   Error createStub(StringRef StubName, JITTargetAddress StubAddr,
                    JITSymbolFlags StubFlags) override {
+    std::lock_guard<std::mutex> Lock(StubsMutex);
     if (auto Err = reserveStubs(1))
       return Err;
 
@@ -216,6 +305,7 @@ public:
   }
 
   Error createStubs(const StubInitsMap &StubInits) override {
+    std::lock_guard<std::mutex> Lock(StubsMutex);
     if (auto Err = reserveStubs(StubInits.size()))
       return Err;
 
@@ -227,6 +317,7 @@ public:
   }
 
   JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+    std::lock_guard<std::mutex> Lock(StubsMutex);
     auto I = StubIndexes.find(Name);
     if (I == StubIndexes.end())
       return nullptr;
@@ -242,6 +333,7 @@ public:
   }
 
   JITEvaluatedSymbol findPointer(StringRef Name) override {
+    std::lock_guard<std::mutex> Lock(StubsMutex);
     auto I = StubIndexes.find(Name);
     if (I == StubIndexes.end())
       return nullptr;
@@ -254,11 +346,15 @@ public:
   }
 
   Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
+    using AtomicIntPtr = std::atomic<uintptr_t>;
+
+    std::lock_guard<std::mutex> Lock(StubsMutex);
     auto I = StubIndexes.find(Name);
     assert(I != StubIndexes.end() && "No stub pointer for symbol");
     auto Key = I->second.first;
-    *IndirectStubsInfos[Key.first].getPtr(Key.second) =
-        reinterpret_cast<void *>(static_cast<uintptr_t>(NewAddr));
+    AtomicIntPtr *AtomicStubPtr = reinterpret_cast<AtomicIntPtr *>(
+        IndirectStubsInfos[Key.first].getPtr(Key.second));
+    *AtomicStubPtr = static_cast<uintptr_t>(NewAddr);
     return Error::success();
   }
 
@@ -288,6 +384,7 @@ private:
     StubIndexes[StubName] = std::make_pair(Key, StubFlags);
   }
 
+  std::mutex StubsMutex;
   std::vector<typename TargetT::IndirectStubsInfo> IndirectStubsInfos;
   using StubKey = std::pair<uint16_t, uint16_t>;
   std::vector<StubKey> FreeStubs;
@@ -299,7 +396,7 @@ private:
 /// The given target triple will determine the ABI, and the given
 /// ErrorHandlerAddress will be used by the resulting compile callback
 /// manager if a compile callback fails.
-std::unique_ptr<JITCompileCallbackManager>
+Expected<std::unique_ptr<JITCompileCallbackManager>>
 createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
                                   JITTargetAddress ErrorHandlerAddress);
 
@@ -325,12 +422,18 @@ GlobalVariable *createImplPointer(PointerType &PT, Module &M, const Twine &Name,
 ///        indirect call using the given function pointer.
 void makeStub(Function &F, Value &ImplPointer);
 
-/// Raise linkage types and rename as necessary to ensure that all
-///        symbols are accessible for other modules.
-///
-///   This should be called before partitioning a module to ensure that the
-/// partitions retain access to each other's symbols.
-void makeAllSymbolsExternallyAccessible(Module &M);
+/// Promotes private symbols to global hidden, and renames to prevent clashes
+/// with other promoted symbols. The same SymbolPromoter instance should be
+/// used for all symbols to be added to a single JITDylib.
+class SymbolLinkagePromoter {
+public:
+  /// Promote symbols in the given module. Returns the set of global values
+  /// that have been renamed/promoted.
+  std::vector<GlobalValue *> operator()(Module &M);
+
+private:
+  unsigned NextId = 0;
+};
 
 /// Clone a function declaration into a new module.
 ///
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
new file mode 100644
index 000000000000..eb9b6bf2dea6
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -0,0 +1,130 @@
+//===- JITTargetMachineBuilder.h - Build TargetMachines for JIT -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A utitily for building TargetMachines for JITs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
+#define LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// A utility class for building TargetMachines for JITs.
+class JITTargetMachineBuilder {
+public:
+  /// Create a JITTargetMachineBuilder based on the given triple.
+  ///
+  /// Note: TargetOptions is default-constructed, then EmulatedTLS and
+  /// ExplicitEmulatedTLS are set to true. If EmulatedTLS is not
+  /// required, these values should be reset before calling
+  /// createTargetMachine.
+  JITTargetMachineBuilder(Triple TT);
+
+  /// Create a JITTargetMachineBuilder for the host system.
+  ///
+  /// Note: TargetOptions is default-constructed, then EmulatedTLS and
+  /// ExplicitEmulatedTLS are set to true. If EmulatedTLS is not
+  /// required, these values should be reset before calling
+  /// createTargetMachine.
+  static Expected<JITTargetMachineBuilder> detectHost();
+
+  /// Create a TargetMachine.
+  ///
+  /// This operation will fail if the requested target is not registered,
+  /// in which case see llvm/Support/TargetSelect.h. To JIT IR the Target and
+  /// the target's AsmPrinter must both be registered. To JIT assembly
+  /// (including inline and module level assembly) the target's AsmParser must
+  /// also be registered.
+  Expected<std::unique_ptr<TargetMachine>> createTargetMachine();
+
+  /// Get the default DataLayout for the target.
+  ///
+  /// Note: This is reasonably expensive, as it creates a temporary
+  /// TargetMachine instance under the hood. It is only suitable for use during
+  /// JIT setup.
+  Expected<DataLayout> getDefaultDataLayoutForTarget() {
+    auto TM = createTargetMachine();
+    if (!TM)
+      return TM.takeError();
+    return (*TM)->createDataLayout();
+  }
+
+  /// Set the CPU string.
+  JITTargetMachineBuilder &setCPU(std::string CPU) {
+    this->CPU = std::move(CPU);
+    return *this;
+  }
+
+  /// Set the relocation model.
+  JITTargetMachineBuilder &setRelocationModel(Optional<Reloc::Model> RM) {
+    this->RM = std::move(RM);
+    return *this;
+  }
+
+  /// Set the code model.
+  JITTargetMachineBuilder &setCodeModel(Optional<CodeModel::Model> CM) {
+    this->CM = std::move(CM);
+    return *this;
+  }
+
+  /// Set the LLVM CodeGen optimization level.
+  JITTargetMachineBuilder &setCodeGenOptLevel(CodeGenOpt::Level OptLevel) {
+    this->OptLevel = OptLevel;
+    return *this;
+  }
+
+  /// Add subtarget features.
+  JITTargetMachineBuilder &
+  addFeatures(const std::vector<std::string> &FeatureVec);
+
+  /// Access subtarget features.
+  SubtargetFeatures &getFeatures() { return Features; }
+
+  /// Access subtarget features.
+  const SubtargetFeatures &getFeatures() const { return Features; }
+
+  /// Access TargetOptions.
+  TargetOptions &getOptions() { return Options; }
+
+  /// Access TargetOptions.
+  const TargetOptions &getOptions() const { return Options; }
+
+  /// Access Triple.
+  Triple &getTargetTriple() { return TT; }
+
+  /// Access Triple.
+  const Triple &getTargetTriple() const { return TT; }
+
+private:
+  Triple TT;
+  std::string CPU;
+  SubtargetFeatures Features;
+  TargetOptions Options;
+  Optional<Reloc::Model> RM;
+  Optional<CodeModel::Model> CM;
+  CodeGenOpt::Level OptLevel = CodeGenOpt::None;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index df655bd82006..ce3e5d519c73 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -19,9 +19,11 @@
 #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
+#include "llvm/Support/ThreadPool.h"
 
 namespace llvm {
 namespace orc {
@@ -29,44 +31,68 @@ namespace orc {
 /// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT.
 class LLJIT {
 public:
+
+  /// Destruct this instance. If a multi-threaded instance, waits for all
+  /// compile threads to complete.
+  ~LLJIT();
+
   /// Create an LLJIT instance.
+  /// If NumCompileThreads is not equal to zero, creates a multi-threaded
+  /// LLJIT with the given number of compile threads.
   static Expected<std::unique_ptr<LLJIT>>
-  Create(std::unique_ptr<ExecutionSession> ES,
-         std::unique_ptr<TargetMachine> TM, DataLayout DL);
+  Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+         unsigned NumCompileThreads = 0);
 
-  /// Returns a reference to the ExecutionSession for this JIT instance.
+  /// Returns the ExecutionSession for this instance.
   ExecutionSession &getExecutionSession() { return *ES; }
 
-  /// Returns a reference to the VSO representing the JIT'd main program.
-  VSO &getMainVSO() { return Main; }
+  /// Returns a reference to the JITDylib representing the JIT'd main program.
+  JITDylib &getMainJITDylib() { return Main; }
+
+  /// Create a new JITDylib with the given name and return a reference to it.
+  JITDylib &createJITDylib(std::string Name) {
+    return ES->createJITDylib(std::move(Name));
+  }
 
   /// Convenience method for defining an absolute symbol.
   Error defineAbsolute(StringRef Name, JITEvaluatedSymbol Address);
 
-  /// Adds an IR module to the given VSO.
-  Error addIRModule(VSO &V, std::unique_ptr<Module> M);
+  /// Convenience method for defining an
 
-  /// Adds an IR module to the Main VSO.
-  Error addIRModule(std::unique_ptr<Module> M) {
-    return addIRModule(Main, std::move(M));
+  /// Adds an IR module to the given JITDylib.
+  Error addIRModule(JITDylib &JD, ThreadSafeModule TSM);
+
+  /// Adds an IR module to the Main JITDylib.
+  Error addIRModule(ThreadSafeModule TSM) {
+    return addIRModule(Main, std::move(TSM));
   }
 
-  /// Look up a symbol in VSO V by the symbol's linker-mangled name (to look up
-  /// symbols based on their IR name use the lookup function instead).
-  Expected<JITEvaluatedSymbol> lookupLinkerMangled(VSO &V, StringRef Name);
+  /// Adds an object file to the given JITDylib.
+  Error addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj);
+
+  /// Adds an object file to the given JITDylib.
+  Error addObjectFile(std::unique_ptr<MemoryBuffer> Obj) {
+    return addObjectFile(Main, std::move(Obj));
+  }
 
-  /// Look up a symbol in the main VSO by the symbol's linker-mangled name (to
+  /// Look up a symbol in JITDylib JD by the symbol's linker-mangled name (to
   /// look up symbols based on their IR name use the lookup function instead).
+  Expected<JITEvaluatedSymbol> lookupLinkerMangled(JITDylib &JD,
+                                                   StringRef Name);
+
+  /// Look up a symbol in the main JITDylib by the symbol's linker-mangled name
+  /// (to look up symbols based on their IR name use the lookup function
+  /// instead).
   Expected<JITEvaluatedSymbol> lookupLinkerMangled(StringRef Name) {
     return lookupLinkerMangled(Main, Name);
   }
 
-  /// Look up a symbol in VSO V based on its IR symbol name.
-  Expected<JITEvaluatedSymbol> lookup(VSO &V, StringRef UnmangledName) {
-    return lookupLinkerMangled(V, mangle(UnmangledName));
+  /// Look up a symbol in JITDylib JD based on its IR symbol name.
+  Expected<JITEvaluatedSymbol> lookup(JITDylib &JD, StringRef UnmangledName) {
+    return lookupLinkerMangled(JD, mangle(UnmangledName));
   }
 
-  /// Look up a symbol in the main VSO based on its IR symbol name.
+  /// Look up a symbol in the main JITDylib based on its IR symbol name.
   Expected<JITEvaluatedSymbol> lookup(StringRef UnmangledName) {
     return lookup(Main, UnmangledName);
   }
@@ -77,11 +103,18 @@ public:
   /// Runs all not-yet-run static destructors.
   Error runDestructors() { return DtorRunner.run(); }
 
+  /// Returns a reference to the ObjLinkingLayer
+  RTDyldObjectLinkingLayer &getObjLinkingLayer() { return ObjLinkingLayer; }
+
 protected:
+
+  /// Create an LLJIT instance with a single compile thread.
   LLJIT(std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
         DataLayout DL);
 
-  std::shared_ptr<RuntimeDyld::MemoryManager> getMemoryManager(VModuleKey K);
+  /// Create an LLJIT instance with multiple compile threads.
+  LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+        DataLayout DL, unsigned NumCompileThreads);
 
   std::string mangle(StringRef UnmangledName);
 
@@ -90,51 +123,68 @@ protected:
   void recordCtorDtors(Module &M);
 
   std::unique_ptr<ExecutionSession> ES;
-  VSO &Main;
+  JITDylib &Main;
 
-  std::unique_ptr<TargetMachine> TM;
   DataLayout DL;
+  std::unique_ptr<ThreadPool> CompileThreads;
 
-  RTDyldObjectLinkingLayer2 ObjLinkingLayer;
-  IRCompileLayer2 CompileLayer;
+  RTDyldObjectLinkingLayer ObjLinkingLayer;
+  IRCompileLayer CompileLayer;
 
-  CtorDtorRunner2 CtorRunner, DtorRunner;
+  CtorDtorRunner CtorRunner, DtorRunner;
 };
 
 /// An extended version of LLJIT that supports lazy function-at-a-time
 /// compilation of LLVM IR.
 class LLLazyJIT : public LLJIT {
 public:
+
   /// Create an LLLazyJIT instance.
+  /// If NumCompileThreads is not equal to zero, creates a multi-threaded
+  /// LLLazyJIT with the given number of compile threads.
   static Expected<std::unique_ptr<LLLazyJIT>>
-  Create(std::unique_ptr<ExecutionSession> ES,
-         std::unique_ptr<TargetMachine> TM, DataLayout DL, LLVMContext &Ctx);
+  Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+         JITTargetAddress ErrorAddr, unsigned NumCompileThreads = 0);
 
   /// Set an IR transform (e.g. pass manager pipeline) to run on each function
   /// when it is compiled.
-  void setLazyCompileTransform(IRTransformLayer2::TransformFunction Transform) {
+  void setLazyCompileTransform(IRTransformLayer::TransformFunction Transform) {
     TransformLayer.setTransform(std::move(Transform));
   }
 
-  /// Add a module to be lazily compiled to VSO V.
-  Error addLazyIRModule(VSO &V, std::unique_ptr<Module> M);
+  /// Sets the partition function.
+  void
+  setPartitionFunction(CompileOnDemandLayer::PartitionFunction Partition) {
+    CODLayer.setPartitionFunction(std::move(Partition));
+  }
+
+  /// Add a module to be lazily compiled to JITDylib JD.
+  Error addLazyIRModule(JITDylib &JD, ThreadSafeModule M);
 
-  /// Add a module to be lazily compiled to the main VSO.
-  Error addLazyIRModule(std::unique_ptr<Module> M) {
+  /// Add a module to be lazily compiled to the main JITDylib.
+  Error addLazyIRModule(ThreadSafeModule M) {
     return addLazyIRModule(Main, std::move(M));
   }
 
 private:
+
+  // Create a single-threaded LLLazyJIT instance.
   LLLazyJIT(std::unique_ptr<ExecutionSession> ES,
-            std::unique_ptr<TargetMachine> TM, DataLayout DL, LLVMContext &Ctx,
-            std::unique_ptr<JITCompileCallbackManager> CCMgr,
+            std::unique_ptr<TargetMachine> TM, DataLayout DL,
+            std::unique_ptr<LazyCallThroughManager> LCTMgr,
+            std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+
+  // Create a multi-threaded LLLazyJIT instance.
+  LLLazyJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+            DataLayout DL, unsigned NumCompileThreads,
+            std::unique_ptr<LazyCallThroughManager> LCTMgr,
             std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
 
-  std::unique_ptr<JITCompileCallbackManager> CCMgr;
+  std::unique_ptr<LazyCallThroughManager> LCTMgr;
   std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder;
 
-  IRTransformLayer2 TransformLayer;
-  CompileOnDemandLayer2 CODLayer;
+  IRTransformLayer TransformLayer;
+  CompileOnDemandLayer CODLayer;
 };
 
 } // End namespace orc
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
index 91bd4fb83e6f..cd797445a2e6 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -15,7 +15,9 @@
 #define LLVM_EXECUTIONENGINE_ORC_LAYER_H
 
 #include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace orc {
@@ -29,14 +31,32 @@ public:
   /// Returns the ExecutionSession for this layer.
   ExecutionSession &getExecutionSession() { return ES; }
 
-  /// Adds a MaterializationUnit representing the given IR to the given VSO.
-  virtual Error add(VSO &V, VModuleKey K, std::unique_ptr<Module> M);
+  /// Sets the CloneToNewContextOnEmit flag (false by default).
+  ///
+  /// When set, IR modules added to this layer will be cloned on to a new
+  /// context before emit is called. This can be used by clients who want
+  /// to load all IR using one LLVMContext (to save memory via type and
+  /// constant uniquing), but want to move Modules to fresh contexts before
+  /// compiling them to enable concurrent compilation.
+  /// Single threaded clients, or clients who load every module on a new
+  /// context, need not set this.
+  void setCloneToNewContextOnEmit(bool CloneToNewContextOnEmit) {
+    this->CloneToNewContextOnEmit = CloneToNewContextOnEmit;
+  }
+
+  /// Returns the current value of the CloneToNewContextOnEmit flag.
+  bool getCloneToNewContextOnEmit() const { return CloneToNewContextOnEmit; }
+
+  /// Adds a MaterializationUnit representing the given IR to the given
+  /// JITDylib.
+  virtual Error add(JITDylib &JD, ThreadSafeModule TSM,
+                    VModuleKey K = VModuleKey());
 
   /// Emit should materialize the given IR.
-  virtual void emit(MaterializationResponsibility R, VModuleKey K,
-                    std::unique_ptr<Module> M) = 0;
+  virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0;
 
 private:
+  bool CloneToNewContextOnEmit = false;
   ExecutionSession &ES;
 };
 
@@ -50,22 +70,29 @@ public:
 
   /// Create an IRMaterializationLayer. Scans the module to build the
   /// SymbolFlags and SymbolToDefinition maps.
-  IRMaterializationUnit(ExecutionSession &ES, std::unique_ptr<Module> M);
+  IRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
+                        VModuleKey K);
 
   /// Create an IRMaterializationLayer from a module, and pre-existing
   /// SymbolFlags and SymbolToDefinition maps. The maps must provide
   /// entries for each definition in M.
   /// This constructor is useful for delegating work from one
   /// IRMaterializationUnit to another.
-  IRMaterializationUnit(std::unique_ptr<Module> M, SymbolFlagsMap SymbolFlags,
+  IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
+                        SymbolFlagsMap SymbolFlags,
                         SymbolNameToDefinitionMap SymbolToDefinition);
 
+  /// Return the ModuleIdentifier as the name for this MaterializationUnit.
+  StringRef getName() const override;
+
+  const ThreadSafeModule &getModule() const { return TSM; }
+
 protected:
-  std::unique_ptr<Module> M;
+  ThreadSafeModule TSM;
   SymbolNameToDefinitionMap SymbolToDefinition;
 
 private:
-  void discard(const VSO &V, SymbolStringPtr Name) override;
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
 };
 
 /// MaterializationUnit that materializes modules by calling the 'emit' method
@@ -73,7 +100,8 @@ private:
 class BasicIRLayerMaterializationUnit : public IRMaterializationUnit {
 public:
   BasicIRLayerMaterializationUnit(IRLayer &L, VModuleKey K,
-                                  std::unique_ptr<Module> M);
+                                  ThreadSafeModule TSM);
+
 private:
 
   void materialize(MaterializationResponsibility R) override;
@@ -91,11 +119,13 @@ public:
   /// Returns the execution session for this layer.
   ExecutionSession &getExecutionSession() { return ES; }
 
-  /// Adds a MaterializationUnit representing the given IR to the given VSO.
-  virtual Error add(VSO &V, VModuleKey K, std::unique_ptr<MemoryBuffer> O);
+  /// Adds a MaterializationUnit representing the given IR to the given
+  /// JITDylib.
+  virtual Error add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O,
+                    VModuleKey K = VModuleKey());
 
   /// Emit should materialize the given IR.
-  virtual void emit(MaterializationResponsibility R, VModuleKey K,
+  virtual void emit(MaterializationResponsibility R,
                     std::unique_ptr<MemoryBuffer> O) = 0;
 
 private:
@@ -106,23 +136,31 @@ private:
 /// instance) by calling 'emit' on the given ObjectLayer.
 class BasicObjectLayerMaterializationUnit : public MaterializationUnit {
 public:
+  static Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
+  Create(ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O);
 
-
-  /// The MemoryBuffer should represent a valid object file.
-  /// If there is any chance that the file is invalid it should be validated
-  /// prior to constructing a BasicObjectLayerMaterializationUnit.
   BasicObjectLayerMaterializationUnit(ObjectLayer &L, VModuleKey K,
-                                      std::unique_ptr<MemoryBuffer> O);
+                                      std::unique_ptr<MemoryBuffer> O,
+                                      SymbolFlagsMap SymbolFlags);
+
+  /// Return the buffer's identifier as the name for this MaterializationUnit.
+  StringRef getName() const override;
 
 private:
+
   void materialize(MaterializationResponsibility R) override;
-  void discard(const VSO &V, SymbolStringPtr Name) override;
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
 
   ObjectLayer &L;
-  VModuleKey K;
   std::unique_ptr<MemoryBuffer> O;
 };
 
+/// Returns a SymbolFlagsMap for the object file represented by the given
+/// buffer, or an error if the buffer does not contain a valid object file.
+// FIXME: Maybe move to Core.h?
+Expected<SymbolFlagsMap> getObjectSymbolFlags(ExecutionSession &ES,
+                                              MemoryBufferRef ObjBuffer);
+
 } // End namespace orc
 } // End namespace llvm
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
new file mode 100644
index 000000000000..b5041325bce2
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -0,0 +1,195 @@
+//===------ LazyReexports.h -- Utilities for lazy reexports -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Lazy re-exports are similar to normal re-exports, except that for callable
+// symbols the definitions are replaced with trampolines that will look up and
+// call through to the re-exported symbol at runtime. This can be used to
+// enable lazy compilation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LAZYREEXPORTS_H
+#define LLVM_EXECUTIONENGINE_ORC_LAZYREEXPORTS_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+
+namespace llvm {
+
+class Triple;
+
+namespace orc {
+
+/// Manages a set of 'lazy call-through' trampolines. These are compiler
+/// re-entry trampolines that are pre-bound to look up a given symbol in a given
+/// JITDylib, then jump to that address. Since compilation of symbols is
+/// triggered on first lookup, these call-through trampolines can be used to
+/// implement lazy compilation.
+///
+/// The easiest way to construct these call-throughs is using the lazyReexport
+/// function.
+class LazyCallThroughManager {
+public:
+  /// Clients will want to take some action on first resolution, e.g. updating
+  /// a stub pointer. Instances of this class can be used to implement this.
+  class NotifyResolvedFunction {
+  public:
+    virtual ~NotifyResolvedFunction() {}
+
+    /// Called the first time a lazy call through is executed and the target
+    /// symbol resolved.
+    virtual Error operator()(JITDylib &SourceJD,
+                             const SymbolStringPtr &SymbolName,
+                             JITTargetAddress ResolvedAddr) = 0;
+
+  private:
+    virtual void anchor();
+  };
+
+  template <typename NotifyResolvedImpl>
+  class NotifyResolvedFunctionImpl : public NotifyResolvedFunction {
+  public:
+    NotifyResolvedFunctionImpl(NotifyResolvedImpl NotifyResolved)
+        : NotifyResolved(std::move(NotifyResolved)) {}
+    Error operator()(JITDylib &SourceJD, const SymbolStringPtr &SymbolName,
+                     JITTargetAddress ResolvedAddr) {
+      return NotifyResolved(SourceJD, SymbolName, ResolvedAddr);
+    }
+
+  private:
+    NotifyResolvedImpl NotifyResolved;
+  };
+
+  /// Create a shared NotifyResolvedFunction from a given type that is
+  /// callable with the correct signature.
+  template <typename NotifyResolvedImpl>
+  static std::unique_ptr<NotifyResolvedFunction>
+  createNotifyResolvedFunction(NotifyResolvedImpl NotifyResolved) {
+    return llvm::make_unique<NotifyResolvedFunctionImpl<NotifyResolvedImpl>>(
+        std::move(NotifyResolved));
+  }
+
+  // Return a free call-through trampoline and bind it to look up and call
+  // through to the given symbol.
+  Expected<JITTargetAddress> getCallThroughTrampoline(
+      JITDylib &SourceJD, SymbolStringPtr SymbolName,
+      std::shared_ptr<NotifyResolvedFunction> NotifyResolved);
+
+protected:
+  LazyCallThroughManager(ExecutionSession &ES,
+                         JITTargetAddress ErrorHandlerAddr,
+                         std::unique_ptr<TrampolinePool> TP);
+
+  JITTargetAddress callThroughToSymbol(JITTargetAddress TrampolineAddr);
+
+  void setTrampolinePool(std::unique_ptr<TrampolinePool> TP) {
+    this->TP = std::move(TP);
+  }
+
+private:
+  using ReexportsMap =
+      std::map<JITTargetAddress, std::pair<JITDylib *, SymbolStringPtr>>;
+
+  using NotifiersMap =
+      std::map<JITTargetAddress, std::shared_ptr<NotifyResolvedFunction>>;
+
+  std::mutex LCTMMutex;
+  ExecutionSession &ES;
+  JITTargetAddress ErrorHandlerAddr;
+  std::unique_ptr<TrampolinePool> TP;
+  ReexportsMap Reexports;
+  NotifiersMap Notifiers;
+};
+
+/// A lazy call-through manager that builds trampolines in the current process.
+class LocalLazyCallThroughManager : public LazyCallThroughManager {
+private:
+  LocalLazyCallThroughManager(ExecutionSession &ES,
+                              JITTargetAddress ErrorHandlerAddr)
+      : LazyCallThroughManager(ES, ErrorHandlerAddr, nullptr) {}
+
+  template <typename ORCABI> Error init() {
+    auto TP = LocalTrampolinePool<ORCABI>::Create(
+        [this](JITTargetAddress TrampolineAddr) {
+          return callThroughToSymbol(TrampolineAddr);
+        });
+
+    if (!TP)
+      return TP.takeError();
+
+    setTrampolinePool(std::move(*TP));
+    return Error::success();
+  }
+
+public:
+  /// Create a LocalLazyCallThroughManager using the given ABI. See
+  /// createLocalLazyCallThroughManager.
+  template <typename ORCABI>
+  static Expected<std::unique_ptr<LocalLazyCallThroughManager>>
+  Create(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
+    auto LLCTM = std::unique_ptr<LocalLazyCallThroughManager>(
+        new LocalLazyCallThroughManager(ES, ErrorHandlerAddr));
+
+    if (auto Err = LLCTM->init<ORCABI>())
+      return std::move(Err);
+
+    return std::move(LLCTM);
+  }
+};
+
+/// Create a LocalLazyCallThroughManager from the given triple and execution
+/// session.
+Expected<std::unique_ptr<LazyCallThroughManager>>
+createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
+                                  JITTargetAddress ErrorHandlerAddr);
+
+/// A materialization unit that builds lazy re-exports. These are callable
+/// entry points that call through to the given symbols.
+/// Unlike a 'true' re-export, the address of the lazy re-export will not
+/// match the address of the re-exported symbol, but calling it will behave
+/// the same as calling the re-exported symbol.
+class LazyReexportsMaterializationUnit : public MaterializationUnit {
+public:
+  LazyReexportsMaterializationUnit(LazyCallThroughManager &LCTManager,
+                                   IndirectStubsManager &ISManager,
+                                   JITDylib &SourceJD,
+                                   SymbolAliasMap CallableAliases,
+                                   VModuleKey K);
+
+  StringRef getName() const override;
+
+private:
+  void materialize(MaterializationResponsibility R) override;
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
+  static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases);
+
+  LazyCallThroughManager &LCTManager;
+  IndirectStubsManager &ISManager;
+  JITDylib &SourceJD;
+  SymbolAliasMap CallableAliases;
+  std::shared_ptr<LazyCallThroughManager::NotifyResolvedFunction>
+      NotifyResolved;
+};
+
+/// Define lazy-reexports based on the given SymbolAliasMap. Each lazy re-export
+/// is a callable symbol that will look up and dispatch to the given aliasee on
+/// first call. All subsequent calls will go directly to the aliasee.
+inline std::unique_ptr<LazyReexportsMaterializationUnit>
+lazyReexports(LazyCallThroughManager &LCTManager,
+              IndirectStubsManager &ISManager, JITDylib &SourceJD,
+              SymbolAliasMap CallableAliases, VModuleKey K = VModuleKey()) {
+  return llvm::make_unique<LazyReexportsMaterializationUnit>(
+      LCTManager, ISManager, SourceJD, std::move(CallableAliases),
+      std::move(K));
+}
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LAZYREEXPORTS_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
index 52c8c162ff0b..4c6162ac4b8b 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h
@@ -31,12 +31,12 @@ class SymbolResolver {
 public:
   virtual ~SymbolResolver() = default;
 
-  /// Returns the flags for each symbol in Symbols that can be found,
-  ///        along with the set of symbol that could not be found.
-  virtual SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) = 0;
+  /// Returns the subset of the given symbols that the caller is responsible for
+  /// materializing.
+  virtual SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) = 0;
 
   /// For each symbol in Symbols that can be found, assigns that symbols
-  ///        value in Query. Returns the set of symbols that could not be found.
+  /// value in Query. Returns the set of symbols that could not be found.
   virtual SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
                                SymbolNameSet Symbols) = 0;
 
@@ -46,16 +46,18 @@ private:
 
 /// Implements SymbolResolver with a pair of supplied function objects
 ///        for convenience. See createSymbolResolver.
-template <typename LookupFlagsFn, typename LookupFn>
+template <typename GetResponsibilitySetFn, typename LookupFn>
 class LambdaSymbolResolver final : public SymbolResolver {
 public:
-  template <typename LookupFlagsFnRef, typename LookupFnRef>
-  LambdaSymbolResolver(LookupFlagsFnRef &&LookupFlags, LookupFnRef &&Lookup)
-      : LookupFlags(std::forward<LookupFlagsFnRef>(LookupFlags)),
+  template <typename GetResponsibilitySetFnRef, typename LookupFnRef>
+  LambdaSymbolResolver(GetResponsibilitySetFnRef &&GetResponsibilitySet,
+                       LookupFnRef &&Lookup)
+      : GetResponsibilitySet(
+            std::forward<GetResponsibilitySetFnRef>(GetResponsibilitySet)),
         Lookup(std::forward<LookupFnRef>(Lookup)) {}
 
-  SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) final {
-    return LookupFlags(Symbols);
+  SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final {
+    return GetResponsibilitySet(Symbols);
   }
 
   SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
@@ -64,34 +66,37 @@ public:
   }
 
 private:
-  LookupFlagsFn LookupFlags;
+  GetResponsibilitySetFn GetResponsibilitySet;
   LookupFn Lookup;
 };
 
 /// Creates a SymbolResolver implementation from the pair of supplied
 ///        function objects.
-template <typename LookupFlagsFn, typename LookupFn>
+template <typename GetResponsibilitySetFn, typename LookupFn>
 std::unique_ptr<LambdaSymbolResolver<
     typename std::remove_cv<
-        typename std::remove_reference<LookupFlagsFn>::type>::type,
+        typename std::remove_reference<GetResponsibilitySetFn>::type>::type,
     typename std::remove_cv<
         typename std::remove_reference<LookupFn>::type>::type>>
-createSymbolResolver(LookupFlagsFn &&LookupFlags, LookupFn &&Lookup) {
+createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet,
+                     LookupFn &&Lookup) {
   using LambdaSymbolResolverImpl = LambdaSymbolResolver<
       typename std::remove_cv<
-          typename std::remove_reference<LookupFlagsFn>::type>::type,
+          typename std::remove_reference<GetResponsibilitySetFn>::type>::type,
       typename std::remove_cv<
           typename std::remove_reference<LookupFn>::type>::type>;
   return llvm::make_unique<LambdaSymbolResolverImpl>(
-      std::forward<LookupFlagsFn>(LookupFlags), std::forward<LookupFn>(Lookup));
+      std::forward<GetResponsibilitySetFn>(GetResponsibilitySet),
+      std::forward<LookupFn>(Lookup));
 }
 
+/// Legacy adapter. Remove once we kill off the old ORC layers.
 class JITSymbolResolverAdapter : public JITSymbolResolver {
 public:
   JITSymbolResolverAdapter(ExecutionSession &ES, SymbolResolver &R,
                            MaterializationResponsibility *MR);
-  Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) override;
-  Expected<LookupResult> lookup(const LookupSet &Symbols) override;
+  Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override;
+  void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override;
 
 private:
   ExecutionSession &ES;
@@ -100,27 +105,29 @@ private:
   MaterializationResponsibility *MR;
 };
 
-/// Use the given legacy-style FindSymbol function (i.e. a function that
-///        takes a const std::string& or StringRef and returns a JITSymbol) to
-///        find the flags for each symbol in Symbols and store their flags in
-///        SymbolFlags. If any JITSymbol returned by FindSymbol is in an error
-///        state the function returns immediately with that error, otherwise it
-///        returns the set of symbols not found.
+/// Use the given legacy-style FindSymbol function (i.e. a function that takes
+/// a const std::string& or StringRef and returns a JITSymbol) to get the
+/// subset of symbols that the caller is responsible for materializing. If any
+/// JITSymbol returned by FindSymbol is in an error state the function returns
+/// immediately with that error.
 ///
-/// Useful for implementing lookupFlags bodies that query legacy resolvers.
+/// Useful for implementing getResponsibilitySet bodies that query legacy
+/// resolvers.
 template <typename FindSymbolFn>
-Expected<SymbolFlagsMap> lookupFlagsWithLegacyFn(const SymbolNameSet &Symbols,
-                                                 FindSymbolFn FindSymbol) {
-  SymbolFlagsMap SymbolFlags;
+Expected<SymbolNameSet>
+getResponsibilitySetWithLegacyFn(const SymbolNameSet &Symbols,
+                                 FindSymbolFn FindSymbol) {
+  SymbolNameSet Result;
 
   for (auto &S : Symbols) {
-    if (JITSymbol Sym = FindSymbol(*S))
-      SymbolFlags[S] = Sym.getFlags();
-    else if (auto Err = Sym.takeError())
+    if (JITSymbol Sym = FindSymbol(*S)) {
+      if (!Sym.getFlags().isStrong())
+        Result.insert(S);
+    } else if (auto Err = Sym.takeError())
       return std::move(Err);
   }
 
-  return SymbolFlags;
+  return Result;
 }
 
 /// Use the given legacy-style FindSymbol function (i.e. a function that
@@ -177,12 +184,13 @@ public:
       : ES(ES), LegacyLookup(std::move(LegacyLookup)),
         ReportError(std::move(ReportError)) {}
 
-  SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) final {
-    if (auto SymbolFlags = lookupFlagsWithLegacyFn(Symbols, LegacyLookup))
-      return std::move(*SymbolFlags);
+  SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final {
+    if (auto ResponsibilitySet =
+            getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup))
+      return std::move(*ResponsibilitySet);
     else {
-      ReportError(SymbolFlags.takeError());
-      return SymbolFlagsMap();
+      ReportError(ResponsibilitySet.takeError());
+      return SymbolNameSet();
     }
   }
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
index 3dd3cfe05b8d..03fefb69a928 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
@@ -23,10 +23,10 @@ namespace orc {
 
 class NullResolver : public SymbolResolver {
 public:
-  SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) override;
+  SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final;
 
   SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
-                       SymbolNameSet Symbols) override;
+                       SymbolNameSet Symbols) final;
 };
 
 /// SymbolResolver impliementation that rejects all resolution requests.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index c6b43a9c8ed6..44d6b490e19d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -23,16 +23,16 @@
 namespace llvm {
 namespace orc {
 
-class ObjectTransformLayer2 : public ObjectLayer {
+class ObjectTransformLayer : public ObjectLayer {
 public:
   using TransformFunction =
       std::function<Expected<std::unique_ptr<MemoryBuffer>>(
           std::unique_ptr<MemoryBuffer>)>;
 
-  ObjectTransformLayer2(ExecutionSession &ES, ObjectLayer &BaseLayer,
-                        TransformFunction Transform);
+  ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
+                       TransformFunction Transform);
 
-  void emit(MaterializationResponsibility R, VModuleKey K,
+  void emit(MaterializationResponsibility R,
             std::unique_ptr<MemoryBuffer> O) override;
 
 private:
@@ -46,11 +46,11 @@ private:
 /// immediately applies the user supplied functor to each object, then adds
 /// the set of transformed objects to the layer below.
 template <typename BaseLayerT, typename TransformFtor>
-class ObjectTransformLayer {
+class LegacyObjectTransformLayer {
 public:
   /// Construct an ObjectTransformLayer with the given BaseLayer
-  ObjectTransformLayer(BaseLayerT &BaseLayer,
-                       TransformFtor Transform = TransformFtor())
+  LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+                             TransformFtor Transform = TransformFtor())
       : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
   /// Apply the transform functor to each object in the object set, then
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 581c598aff62..a70fc373713d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -238,7 +238,78 @@ public:
                                       unsigned MinStubs, void *InitialPtrVal);
 };
 
-} // end namespace orc
-} // end namespace llvm
+// @brief Mips32 support.
+//
+// Mips32 supports lazy JITing.
+class OrcMips32_Base {
+public:
+  static const unsigned PointerSize = 4;
+  static const unsigned TrampolineSize = 20;
+  static const unsigned ResolverCodeSize = 0xfc;
+  using IndirectStubsInfo = GenericIndirectStubsInfo<16>;
+
+  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
+                                            void *TrampolineId);
+  /// @brief Write the requsted number of trampolines into the given memory,
+  ///        which must be big enough to hold 1 pointer, plus NumTrampolines
+  ///        trampolines.
+  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
+
+  /// @brief Write the resolver code into the given memory. The user is be
+  ///        responsible for allocating the memory and setting permissions.
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr, bool isBigEndian);
+  /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+  ///        the nearest page size.
+  ///
+  ///   E.g. Asking for 4 stubs on Mips32, where stubs are 8-bytes, with 4k
+  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+  /// will return a block of 1024 (2-pages worth).
+  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal);
+};
+
+
+class OrcMips32Le : public OrcMips32_Base {
+public:
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr)
+  { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, false); }
+};
+
+class OrcMips32Be : public OrcMips32_Base {
+public:
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr)
+  { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, true); }
+};
+
+// @brief Mips64 support.
+//
+// Mips64 supports lazy JITing.
+class OrcMips64 {
+public:
+  static const unsigned PointerSize = 8;
+  static const unsigned TrampolineSize = 40;
+  static const unsigned ResolverCodeSize = 0x120;
+
+  using IndirectStubsInfo = GenericIndirectStubsInfo<32>;
+  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
+                                            void *TrampolineId);
+  /// @brief Write the resolver code into the given memory. The user is be
+  ///        responsible for allocating the memory and setting permissions.
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr);
+
+  /// @brief Write the requsted number of trampolines into the given memory,
+  ///        which must be big enough to hold 1 pointer, plus NumTrampolines
+  ///        trampolines.
+  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
+
+  /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+  ///        the nearest page size.
+  ///
+  ///   E.g. Asking for 4 stubs on Mips64, where stubs are 8-bytes, with 4k
+  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+  /// will return a block of 1024 (2-pages worth).
+  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal);
+};
 
+ } // end namespace orc
+ } // end namespace llvm
 #endif // LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 45f95f63e70f..3e07f5cf3742 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -118,30 +118,33 @@ public:
         Unmapped.back().RemoteCodeAddr =
             Client.reserveMem(Id, CodeSize, CodeAlign);
 
-        LLVM_DEBUG(dbgs() << "  code: "
-                          << format("0x%016x", Unmapped.back().RemoteCodeAddr)
-                          << " (" << CodeSize << " bytes, alignment "
-                          << CodeAlign << ")\n");
+        LLVM_DEBUG(
+            dbgs() << "  code: "
+                   << format("0x%016" PRIx64, Unmapped.back().RemoteCodeAddr)
+                   << " (" << CodeSize << " bytes, alignment " << CodeAlign
+                   << ")\n");
       }
 
       if (RODataSize != 0) {
         Unmapped.back().RemoteRODataAddr =
             Client.reserveMem(Id, RODataSize, RODataAlign);
 
-        LLVM_DEBUG(dbgs() << "  ro-data: "
-                          << format("0x%016x", Unmapped.back().RemoteRODataAddr)
-                          << " (" << RODataSize << " bytes, alignment "
-                          << RODataAlign << ")\n");
+        LLVM_DEBUG(
+            dbgs() << "  ro-data: "
+                   << format("0x%016" PRIx64, Unmapped.back().RemoteRODataAddr)
+                   << " (" << RODataSize << " bytes, alignment " << RODataAlign
+                   << ")\n");
       }
 
       if (RWDataSize != 0) {
         Unmapped.back().RemoteRWDataAddr =
             Client.reserveMem(Id, RWDataSize, RWDataAlign);
 
-        LLVM_DEBUG(dbgs() << "  rw-data: "
-                          << format("0x%016x", Unmapped.back().RemoteRWDataAddr)
-                          << " (" << RWDataSize << " bytes, alignment "
-                          << RWDataAlign << ")\n");
+        LLVM_DEBUG(
+            dbgs() << "  rw-data: "
+                   << format("0x%016" PRIx64, Unmapped.back().RemoteRWDataAddr)
+                   << " (" << RWDataSize << " bytes, alignment " << RWDataAlign
+                   << ")\n");
       }
     }
 
@@ -269,9 +272,9 @@ public:
       for (auto &Alloc : Allocs) {
         NextAddr = alignTo(NextAddr, Alloc.getAlign());
         Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextAddr);
-        LLVM_DEBUG(dbgs() << "     "
-                          << static_cast<void *>(Alloc.getLocalAddress())
-                          << " -> " << format("0x%016x", NextAddr) << "\n");
+        LLVM_DEBUG(
+            dbgs() << "     " << static_cast<void *>(Alloc.getLocalAddress())
+                   << " -> " << format("0x%016" PRIx64, NextAddr) << "\n");
         Alloc.setRemoteAddress(NextAddr);
 
         // Only advance NextAddr if it was non-null to begin with,
@@ -293,7 +296,7 @@ public:
           LLVM_DEBUG(dbgs() << "  copying section: "
                             << static_cast<void *>(Alloc.getLocalAddress())
                             << " -> "
-                            << format("0x%016x", Alloc.getRemoteAddress())
+                            << format("0x%016" PRIx64, Alloc.getRemoteAddress())
                             << " (" << Alloc.getSize() << " bytes)\n";);
 
           if (Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
@@ -306,7 +309,8 @@ public:
                           << (Permissions & sys::Memory::MF_WRITE ? 'W' : '-')
                           << (Permissions & sys::Memory::MF_EXEC ? 'X' : '-')
                           << " permissions on block: "
-                          << format("0x%016x", RemoteSegmentAddr) << "\n");
+                          << format("0x%016" PRIx64, RemoteSegmentAddr)
+                          << "\n");
         if (Client.setProtections(Id, RemoteSegmentAddr, Permissions))
           return true;
       }
@@ -446,16 +450,24 @@ public:
     StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
   };
 
-  /// Remote compile callback manager.
-  class RemoteCompileCallbackManager : public JITCompileCallbackManager {
+  class RemoteTrampolinePool : public TrampolinePool {
   public:
-    RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
-                                 ExecutionSession &ES,
-                                 JITTargetAddress ErrorHandlerAddress)
-        : JITCompileCallbackManager(ES, ErrorHandlerAddress), Client(Client) {}
+    RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {}
+
+    Expected<JITTargetAddress> getTrampoline() override {
+      std::lock_guard<std::mutex> Lock(RTPMutex);
+      if (AvailableTrampolines.empty()) {
+        if (auto Err = grow())
+          return std::move(Err);
+      }
+      assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
+      auto TrampolineAddr = AvailableTrampolines.back();
+      AvailableTrampolines.pop_back();
+      return TrampolineAddr;
+    }
 
   private:
-    Error grow() override {
+    Error grow() {
       JITTargetAddress BlockAddr = 0;
       uint32_t NumTrampolines = 0;
       if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock())
@@ -470,7 +482,20 @@ public:
       return Error::success();
     }
 
+    std::mutex RTPMutex;
     OrcRemoteTargetClient &Client;
+    std::vector<JITTargetAddress> AvailableTrampolines;
+  };
+
+  /// Remote compile callback manager.
+  class RemoteCompileCallbackManager : public JITCompileCallbackManager {
+  public:
+    RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
+                                 ExecutionSession &ES,
+                                 JITTargetAddress ErrorHandlerAddress)
+        : JITCompileCallbackManager(
+              llvm::make_unique<RemoteTrampolinePool>(Client), ES,
+              ErrorHandlerAddress) {}
   };
 
   /// Create an OrcRemoteTargetClient.
@@ -489,8 +514,8 @@ public:
   /// Call the int(void) function at the given address in the target and return
   /// its result.
   Expected<int> callIntVoid(JITTargetAddress Addr) {
-    LLVM_DEBUG(dbgs() << "Calling int(*)(void) " << format("0x%016x", Addr)
-                      << "\n");
+    LLVM_DEBUG(dbgs() << "Calling int(*)(void) "
+                      << format("0x%016" PRIx64, Addr) << "\n");
     return callB<exec::CallIntVoid>(Addr);
   }
 
@@ -499,15 +524,15 @@ public:
   Expected<int> callMain(JITTargetAddress Addr,
                          const std::vector<std::string> &Args) {
     LLVM_DEBUG(dbgs() << "Calling int(*)(int, char*[]) "
-                      << format("0x%016x", Addr) << "\n");
+                      << format("0x%016" PRIx64, Addr) << "\n");
     return callB<exec::CallMain>(Addr, Args);
   }
 
   /// Call the void() function at the given address in the target and wait for
   /// it to finish.
   Error callVoidVoid(JITTargetAddress Addr) {
-    LLVM_DEBUG(dbgs() << "Calling void(*)(void) " << format("0x%016x", Addr)
-                      << "\n");
+    LLVM_DEBUG(dbgs() << "Calling void(*)(void) "
+                      << format("0x%016" PRIx64, Addr) << "\n");
     return callB<exec::CallVoidVoid>(Addr);
   }
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index bc0da0f9a730..8db9e317a18a 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -87,8 +87,7 @@ class SerializationTraits<ChannelT, JITSymbolFlags> {
 public:
 
   static Error serialize(ChannelT &C, const JITSymbolFlags &Flags) {
-    return serializeSeq(C, static_cast<JITSymbolFlags::UnderlyingType>(Flags),
-                        Flags.getTargetFlags());
+    return serializeSeq(C, Flags.getRawFlagsValue(), Flags.getTargetFlags());
   }
 
   static Error deserialize(ChannelT &C, JITSymbolFlags &Flags) {
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 47bd90bb1bad..953b73e10e43 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/ExecutionEngine/Orc/RPCSerialization.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
 
 #include <future>
 
@@ -207,73 +208,6 @@ private:
 
 namespace detail {
 
-// FIXME: Remove MSVCPError/MSVCPExpected once MSVC's future implementation
-//        supports classes without default constructors.
-#ifdef _MSC_VER
-
-namespace msvc_hacks {
-
-// Work around MSVC's future implementation's use of default constructors:
-// A default constructed value in the promise will be overwritten when the
-// real error is set - so the default constructed Error has to be checked
-// already.
-class MSVCPError : public Error {
-public:
-  MSVCPError() { (void)!!*this; }
-
-  MSVCPError(MSVCPError &&Other) : Error(std::move(Other)) {}
-
-  MSVCPError &operator=(MSVCPError Other) {
-    Error::operator=(std::move(Other));
-    return *this;
-  }
-
-  MSVCPError(Error Err) : Error(std::move(Err)) {}
-};
-
-// Work around MSVC's future implementation, similar to MSVCPError.
-template <typename T> class MSVCPExpected : public Expected<T> {
-public:
-  MSVCPExpected()
-      : Expected<T>(make_error<StringError>("", inconvertibleErrorCode())) {
-    consumeError(this->takeError());
-  }
-
-  MSVCPExpected(MSVCPExpected &&Other) : Expected<T>(std::move(Other)) {}
-
-  MSVCPExpected &operator=(MSVCPExpected &&Other) {
-    Expected<T>::operator=(std::move(Other));
-    return *this;
-  }
-
-  MSVCPExpected(Error Err) : Expected<T>(std::move(Err)) {}
-
-  template <typename OtherT>
-  MSVCPExpected(
-      OtherT &&Val,
-      typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
-          nullptr)
-      : Expected<T>(std::move(Val)) {}
-
-  template <class OtherT>
-  MSVCPExpected(
-      Expected<OtherT> &&Other,
-      typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
-          nullptr)
-      : Expected<T>(std::move(Other)) {}
-
-  template <class OtherT>
-  explicit MSVCPExpected(
-      Expected<OtherT> &&Other,
-      typename std::enable_if<!std::is_convertible<OtherT, T>::value>::type * =
-          nullptr)
-      : Expected<T>(std::move(Other)) {}
-};
-
-} // end namespace msvc_hacks
-
-#endif // _MSC_VER
-
 /// Provides a typedef for a tuple containing the decayed argument types.
 template <typename T> class FunctionArgsTuple;
 
@@ -293,10 +227,10 @@ public:
 
 #ifdef _MSC_VER
   // The ErrorReturnType wrapped in a std::promise.
-  using ReturnPromiseType = std::promise<msvc_hacks::MSVCPExpected<RetT>>;
+  using ReturnPromiseType = std::promise<MSVCPExpected<RetT>>;
 
   // The ErrorReturnType wrapped in a std::future.
-  using ReturnFutureType = std::future<msvc_hacks::MSVCPExpected<RetT>>;
+  using ReturnFutureType = std::future<MSVCPExpected<RetT>>;
 #else
   // The ErrorReturnType wrapped in a std::promise.
   using ReturnPromiseType = std::promise<ErrorReturnType>;
@@ -325,10 +259,10 @@ public:
 
 #ifdef _MSC_VER
   // The ErrorReturnType wrapped in a std::promise.
-  using ReturnPromiseType = std::promise<msvc_hacks::MSVCPError>;
+  using ReturnPromiseType = std::promise<MSVCPError>;
 
   // The ErrorReturnType wrapped in a std::future.
-  using ReturnFutureType = std::future<msvc_hacks::MSVCPError>;
+  using ReturnFutureType = std::future<MSVCPError>;
 #else
   // The ErrorReturnType wrapped in a std::promise.
   using ReturnPromiseType = std::promise<ErrorReturnType>;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 48b3f7a58ed7..6f90f0380d95 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -36,7 +36,7 @@
 namespace llvm {
 namespace orc {
 
-class RTDyldObjectLinkingLayer2 : public ObjectLayer {
+class RTDyldObjectLinkingLayer : public ObjectLayer {
 public:
   /// Functor for receiving object-loaded notifications.
   using NotifyLoadedFunction =
@@ -44,48 +44,84 @@ public:
                          const RuntimeDyld::LoadedObjectInfo &)>;
 
   /// Functor for receiving finalization notifications.
-  using NotifyFinalizedFunction = std::function<void(VModuleKey)>;
+  using NotifyEmittedFunction = std::function<void(VModuleKey)>;
 
   using GetMemoryManagerFunction =
-      std::function<std::shared_ptr<RuntimeDyld::MemoryManager>(VModuleKey)>;
+      std::function<std::unique_ptr<RuntimeDyld::MemoryManager>()>;
 
   /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
-  ///        and NotifyFinalized functors.
-  RTDyldObjectLinkingLayer2(
+  ///        and NotifyEmitted functors.
+  RTDyldObjectLinkingLayer(
       ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
       NotifyLoadedFunction NotifyLoaded = NotifyLoadedFunction(),
-      NotifyFinalizedFunction NotifyFinalized = NotifyFinalizedFunction());
+      NotifyEmittedFunction NotifyEmitted = NotifyEmittedFunction());
 
   /// Emit the object.
-  void emit(MaterializationResponsibility R, VModuleKey K,
+  void emit(MaterializationResponsibility R,
             std::unique_ptr<MemoryBuffer> O) override;
 
-  /// Map section addresses for the object associated with the
-  ///        VModuleKey K.
-  void mapSectionAddress(VModuleKey K, const void *LocalAddress,
-                         JITTargetAddress TargetAddr) const;
-
   /// Set the 'ProcessAllSections' flag.
   ///
   /// If set to true, all sections in each object file will be allocated using
   /// the memory manager, rather than just the sections required for execution.
   ///
   /// This is kludgy, and may be removed in the future.
-  void setProcessAllSections(bool ProcessAllSections) {
+  RTDyldObjectLinkingLayer &setProcessAllSections(bool ProcessAllSections) {
     this->ProcessAllSections = ProcessAllSections;
+    return *this;
+  }
+
+  /// Instructs this RTDyldLinkingLayer2 instance to override the symbol flags
+  /// returned by RuntimeDyld for any given object file with the flags supplied
+  /// by the MaterializationResponsibility instance. This is a workaround to
+  /// support symbol visibility in COFF, which does not use the libObject's
+  /// SF_Exported flag. Use only when generating / adding COFF object files.
+  ///
+  /// FIXME: We should be able to remove this if/when COFF properly tracks
+  /// exported symbols.
+  RTDyldObjectLinkingLayer &
+  setOverrideObjectFlagsWithResponsibilityFlags(bool OverrideObjectFlags) {
+    this->OverrideObjectFlags = OverrideObjectFlags;
+    return *this;
+  }
+
+  /// If set, this RTDyldObjectLinkingLayer instance will claim responsibility
+  /// for any symbols provided by a given object file that were not already in
+  /// the MaterializationResponsibility instance. Setting this flag allows
+  /// higher-level program representations (e.g. LLVM IR) to be added based on
+  /// only a subset of the symbols they provide, without having to write
+  /// intervening layers to scan and add the additional symbols. This trades
+  /// diagnostic quality for convenience however: If all symbols are enumerated
+  /// up-front then clashes can be detected and reported early (and usually
+  /// deterministically). If this option is set, clashes for the additional
+  /// symbols may not be detected until late, and detection may depend on
+  /// the flow of control through JIT'd code. Use with care.
+  RTDyldObjectLinkingLayer &
+  setAutoClaimResponsibilityForObjectSymbols(bool AutoClaimObjectSymbols) {
+    this->AutoClaimObjectSymbols = AutoClaimObjectSymbols;
+    return *this;
   }
 
 private:
+  Error onObjLoad(VModuleKey K, MaterializationResponsibility &R,
+                  object::ObjectFile &Obj,
+                  std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+                  std::map<StringRef, JITEvaluatedSymbol> Resolved,
+                  std::set<StringRef> &InternalSymbols);
+
+  void onObjEmit(VModuleKey K, MaterializationResponsibility &R, Error Err);
+
   mutable std::mutex RTDyldLayerMutex;
   GetMemoryManagerFunction GetMemoryManager;
   NotifyLoadedFunction NotifyLoaded;
-  NotifyFinalizedFunction NotifyFinalized;
-  bool ProcessAllSections;
-  std::map<VModuleKey, RuntimeDyld *> ActiveRTDylds;
-  std::map<VModuleKey, std::shared_ptr<RuntimeDyld::MemoryManager>> MemMgrs;
+  NotifyEmittedFunction NotifyEmitted;
+  bool ProcessAllSections = false;
+  bool OverrideObjectFlags = false;
+  bool AutoClaimObjectSymbols = false;
+  std::vector<std::unique_ptr<RuntimeDyld::MemoryManager>> MemMgrs;
 };
 
-class RTDyldObjectLinkingLayerBase {
+class LegacyRTDyldObjectLinkingLayerBase {
 public:
   using ObjectPtr = std::unique_ptr<MemoryBuffer>;
 
@@ -137,10 +173,10 @@ protected:
 /// object files to be loaded into memory, linked, and the addresses of their
 /// symbols queried. All objects added to this layer can see each other's
 /// symbols.
-class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase {
+class LegacyRTDyldObjectLinkingLayer : public LegacyRTDyldObjectLinkingLayerBase {
 public:
 
-  using RTDyldObjectLinkingLayerBase::ObjectPtr;
+  using LegacyRTDyldObjectLinkingLayerBase::ObjectPtr;
 
   /// Functor for receiving object-loaded notifications.
   using NotifyLoadedFtor =
@@ -161,7 +197,7 @@ private:
   template <typename MemoryManagerPtrT>
   class ConcreteLinkedObject : public LinkedObject {
   public:
-    ConcreteLinkedObject(RTDyldObjectLinkingLayer &Parent, VModuleKey K,
+    ConcreteLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K,
                          OwnedObject Obj, MemoryManagerPtrT MemMgr,
                          std::shared_ptr<SymbolResolver> Resolver,
                          bool ProcessAllSections)
@@ -175,7 +211,7 @@ private:
     }
 
     ~ConcreteLinkedObject() override {
-      if (this->Parent.NotifyFreed)
+      if (this->Parent.NotifyFreed && ObjForNotify.getBinary())
         this->Parent.NotifyFreed(K, *ObjForNotify.getBinary());
 
       MemMgr->deregisterEHFrames();
@@ -249,9 +285,14 @@ private:
           consumeError(SymbolName.takeError());
           continue;
         }
+        // FIXME: Raise an error for bad symbols.
         auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol);
+        if (!Flags) {
+          consumeError(Flags.takeError());
+          continue;
+        }
         SymbolTable.insert(
-          std::make_pair(*SymbolName, JITEvaluatedSymbol(0, Flags)));
+            std::make_pair(*SymbolName, JITEvaluatedSymbol(0, *Flags)));
       }
     }
 
@@ -272,7 +313,7 @@ private:
     };
 
     VModuleKey K;
-    RTDyldObjectLinkingLayer &Parent;
+    LegacyRTDyldObjectLinkingLayer &Parent;
     MemoryManagerPtrT MemMgr;
     OwnedObject ObjForNotify;
     std::unique_ptr<PreFinalizeContents> PFC;
@@ -280,7 +321,7 @@ private:
 
   template <typename MemoryManagerPtrT>
   std::unique_ptr<ConcreteLinkedObject<MemoryManagerPtrT>>
-  createLinkedObject(RTDyldObjectLinkingLayer &Parent, VModuleKey K,
+  createLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K,
                      OwnedObject Obj, MemoryManagerPtrT MemMgr,
                      std::shared_ptr<SymbolResolver> Resolver,
                      bool ProcessAllSections) {
@@ -300,7 +341,7 @@ public:
 
   /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
   ///        and NotifyFinalized functors.
-  RTDyldObjectLinkingLayer(
+  LegacyRTDyldObjectLinkingLayer(
       ExecutionSession &ES, ResourcesGetter GetResources,
       NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
       NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
@@ -402,11 +443,14 @@ public:
 private:
   ExecutionSession &ES;
 
-  std::map<VModuleKey, std::unique_ptr<LinkedObject>> LinkedObjects;
   ResourcesGetter GetResources;
   NotifyLoadedFtor NotifyLoaded;
   NotifyFinalizedFtor NotifyFinalized;
   NotifyFreedFtor NotifyFreed;
+
+  // NB!  `LinkedObjects` needs to be destroyed before `NotifyFreed` because
+  // `~ConcreteLinkedObject` calls `NotifyFreed`
+  std::map<VModuleKey, std::unique_ptr<LinkedObject>> LinkedObjects;
   bool ProcessAllSections = false;
 };
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
index 4c45cfd199dd..717076e25609 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H
 #define LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include <atomic>
 #include <mutex>
@@ -49,10 +50,13 @@ private:
 /// Pointer to a pooled string representing a symbol name.
 class SymbolStringPtr {
   friend class SymbolStringPool;
+  friend struct DenseMapInfo<SymbolStringPtr>;
   friend bool operator==(const SymbolStringPtr &LHS,
                          const SymbolStringPtr &RHS);
   friend bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS);
 
+  static SymbolStringPool::PoolMapEntry Tombstone;
+
 public:
   SymbolStringPtr() = default;
   SymbolStringPtr(const SymbolStringPtr &Other)
@@ -142,6 +146,29 @@ inline bool SymbolStringPool::empty() const {
 }
 
 } // end namespace orc
+
+template <>
+struct DenseMapInfo<orc::SymbolStringPtr> {
+
+  static orc::SymbolStringPtr getEmptyKey() {
+    return orc::SymbolStringPtr();
+  }
+
+  static orc::SymbolStringPtr getTombstoneKey() {
+    return orc::SymbolStringPtr(&orc::SymbolStringPtr::Tombstone);
+  }
+
+  static unsigned getHashValue(orc::SymbolStringPtr V) {
+    uintptr_t IV = reinterpret_cast<uintptr_t>(V.S);
+    return unsigned(IV) ^ unsigned(IV >> 9);
+  }
+
+  static bool isEqual(const orc::SymbolStringPtr &LHS,
+                      const orc::SymbolStringPtr &RHS) {
+    return LHS.S == RHS.S;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
new file mode 100644
index 000000000000..bf946de532d3
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
@@ -0,0 +1,163 @@
+//===----------- ThreadSafeModule.h -- Layer interfaces ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Thread safe wrappers and utilities for Module and LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_THREADSAFEMODULEWRAPPER_H
+#define LLVM_EXECUTIONENGINE_ORC_THREADSAFEMODULEWRAPPER_H
+
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compiler.h"
+
+#include <functional>
+#include <memory>
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+/// An LLVMContext together with an associated mutex that can be used to lock
+/// the context to prevent concurrent access by other threads.
+class ThreadSafeContext {
+private:
+  struct State {
+    State(std::unique_ptr<LLVMContext> Ctx) : Ctx(std::move(Ctx)) {}
+
+    std::unique_ptr<LLVMContext> Ctx;
+    std::recursive_mutex Mutex;
+  };
+
+public:
+  // RAII based lock for ThreadSafeContext.
+  class LLVM_NODISCARD Lock {
+  private:
+    using UnderlyingLock = std::lock_guard<std::recursive_mutex>;
+
+  public:
+    Lock(std::shared_ptr<State> S)
+        : S(std::move(S)),
+          L(llvm::make_unique<UnderlyingLock>(this->S->Mutex)) {}
+
+  private:
+    std::shared_ptr<State> S;
+    std::unique_ptr<UnderlyingLock> L;
+  };
+
+  /// Construct a null context.
+  ThreadSafeContext() = default;
+
+  /// Construct a ThreadSafeContext from the given LLVMContext.
+  ThreadSafeContext(std::unique_ptr<LLVMContext> NewCtx)
+      : S(std::make_shared<State>(std::move(NewCtx))) {
+    assert(S->Ctx != nullptr &&
+           "Can not construct a ThreadSafeContext from a nullptr");
+  }
+
+  /// Returns a pointer to the LLVMContext that was used to construct this
+  /// instance, or null if the instance was default constructed.
+  LLVMContext *getContext() { return S ? S->Ctx.get() : nullptr; }
+
+  /// Returns a pointer to the LLVMContext that was used to construct this
+  /// instance, or null if the instance was default constructed.
+  const LLVMContext *getContext() const { return S ? S->Ctx.get() : nullptr; }
+
+  Lock getLock() {
+    assert(S && "Can not lock an empty ThreadSafeContext");
+    return Lock(S);
+  }
+
+private:
+  std::shared_ptr<State> S;
+};
+
+/// An LLVM Module together with a shared ThreadSafeContext.
+class ThreadSafeModule {
+public:
+  /// Default construct a ThreadSafeModule. This results in a null module and
+  /// null context.
+  ThreadSafeModule() = default;
+
+  ThreadSafeModule(ThreadSafeModule &&Other) = default;
+
+  ThreadSafeModule &operator=(ThreadSafeModule &&Other) {
+    // We have to explicitly define this move operator to copy the fields in
+    // reverse order (i.e. module first) to ensure the dependencies are
+    // protected: The old module that is being overwritten must be destroyed
+    // *before* the context that it depends on.
+    // We also need to lock the context to make sure the module tear-down
+    // does not overlap any other work on the context.
+    if (M) {
+      auto L = getContextLock();
+      M = nullptr;
+    }
+    M = std::move(Other.M);
+    TSCtx = std::move(Other.TSCtx);
+    return *this;
+  }
+
+  /// Construct a ThreadSafeModule from a unique_ptr<Module> and a
+  /// unique_ptr<LLVMContext>. This creates a new ThreadSafeContext from the
+  /// given context.
+  ThreadSafeModule(std::unique_ptr<Module> M, std::unique_ptr<LLVMContext> Ctx)
+      : M(std::move(M)), TSCtx(std::move(Ctx)) {}
+
+  /// Construct a ThreadSafeModule from a unique_ptr<Module> and an
+  /// existing ThreadSafeContext.
+  ThreadSafeModule(std::unique_ptr<Module> M, ThreadSafeContext TSCtx)
+      : M(std::move(M)), TSCtx(std::move(TSCtx)) {}
+
+  ~ThreadSafeModule() {
+    // We need to lock the context while we destruct the module.
+    if (M) {
+      auto L = getContextLock();
+      M = nullptr;
+    }
+  }
+
+  /// Get the module wrapped by this ThreadSafeModule.
+  Module *getModule() { return M.get(); }
+
+  /// Get the module wrapped by this ThreadSafeModule.
+  const Module *getModule() const { return M.get(); }
+
+  /// Take out a lock on the ThreadSafeContext for this module.
+  ThreadSafeContext::Lock getContextLock() { return TSCtx.getLock(); }
+
+  /// Boolean conversion: This ThreadSafeModule will evaluate to true if it
+  /// wraps a non-null module.
+  explicit operator bool() {
+    if (M) {
+      assert(TSCtx.getContext() &&
+             "Non-null module must have non-null context");
+      return true;
+    }
+    return false;
+  }
+
+private:
+  std::unique_ptr<Module> M;
+  ThreadSafeContext TSCtx;
+};
+
+using GVPredicate = std::function<bool(const GlobalValue &)>;
+using GVModifier = std::function<void(GlobalValue &)>;
+
+/// Clones the given module on to a new context.
+ThreadSafeModule
+cloneToNewContext(ThreadSafeModule &TSMW,
+                  GVPredicate ShouldCloneDef = GVPredicate(),
+                  GVModifier UpdateClonedDefSource = GVModifier());
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_THREADSAFEMODULEWRAPPER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 5dd5add1bb39..e419ee05e566 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -250,6 +250,16 @@ public:
   void finalizeWithMemoryManagerLocking();
 
 private:
+  friend void
+  jitLinkForORC(object::ObjectFile &Obj,
+                std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+                RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver,
+                bool ProcessAllSections,
+                std::function<Error(std::unique_ptr<LoadedObjectInfo>,
+                                    std::map<StringRef, JITEvaluatedSymbol>)>
+                    OnLoaded,
+                std::function<void(Error)> OnEmitted);
+
   // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
   // interface.
   std::unique_ptr<RuntimeDyldImpl> Dyld;
@@ -259,6 +269,21 @@ private:
   RuntimeDyldCheckerImpl *Checker;
 };
 
+// Asynchronous JIT link for ORC.
+//
+// Warning: This API is experimental and probably should not be used by anyone
+// but ORC's RTDyldObjectLinkingLayer2. Internally it constructs a RuntimeDyld
+// instance and uses continuation passing to perform the fix-up and finalize
+// steps asynchronously.
+void jitLinkForORC(object::ObjectFile &Obj,
+                   std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+                   RuntimeDyld::MemoryManager &MemMgr,
+                   JITSymbolResolver &Resolver, bool ProcessAllSections,
+                   std::function<Error(std::unique_ptr<LoadedObjectInfo>,
+                                       std::map<StringRef, JITEvaluatedSymbol>)>
+                       OnLoaded,
+                   std::function<void(Error)> OnEmitted);
+
 } // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h
index 5aaaaf3c396b..9fc4614af010 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.h
+++ b/contrib/llvm/include/llvm/IR/Attributes.h
@@ -230,29 +230,33 @@ public:
 
   /// Add an argument attribute. Returns a new set because attribute sets are
   /// immutable.
-  AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const;
+  LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C,
+                                           Attribute::AttrKind Kind) const;
 
   /// Add a target-dependent attribute. Returns a new set because attribute sets
   /// are immutable.
-  AttributeSet addAttribute(LLVMContext &C, StringRef Kind,
-                            StringRef Value = StringRef()) const;
+  LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C, StringRef Kind,
+                                           StringRef Value = StringRef()) const;
 
   /// Add attributes to the attribute set. Returns a new set because attribute
   /// sets are immutable.
-  AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const;
+  LLVM_NODISCARD AttributeSet addAttributes(LLVMContext &C,
+                                            AttributeSet AS) const;
 
   /// Remove the specified attribute from this set. Returns a new set because
   /// attribute sets are immutable.
-  AttributeSet removeAttribute(LLVMContext &C, Attribute::AttrKind Kind) const;
+  LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
+                                              Attribute::AttrKind Kind) const;
 
   /// Remove the specified attribute from this set. Returns a new set because
   /// attribute sets are immutable.
-  AttributeSet removeAttribute(LLVMContext &C, StringRef Kind) const;
+  LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
+                                              StringRef Kind) const;
 
   /// Remove the specified attributes from this set. Returns a new set because
   /// attribute sets are immutable.
-  AttributeSet removeAttributes(LLVMContext &C,
-                                const AttrBuilder &AttrsToRemove) const;
+  LLVM_NODISCARD AttributeSet
+  removeAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const;
 
   /// Return the number of attributes in this set.
   unsigned getNumAttributes() const;
@@ -375,133 +379,140 @@ public:
 
   /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  AttributeList addAttribute(LLVMContext &C, unsigned Index,
-                             Attribute::AttrKind Kind) const;
+  LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
+                                            Attribute::AttrKind Kind) const;
 
   /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  AttributeList addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
-                             StringRef Value = StringRef()) const;
+  LLVM_NODISCARD AttributeList
+  addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
+               StringRef Value = StringRef()) const;
 
   /// Add an attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute A) const;
+  LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
+                                            Attribute A) const;
 
   /// Add attributes to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  AttributeList addAttributes(LLVMContext &C, unsigned Index,
-                              const AttrBuilder &B) const;
+  LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
+                                             const AttrBuilder &B) const;
 
   /// Add an argument attribute to the list. Returns a new list because
   /// attribute lists are immutable.
-  AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo,
-                                  Attribute::AttrKind Kind) const {
+  LLVM_NODISCARD AttributeList addParamAttribute(
+      LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
     return addAttribute(C, ArgNo + FirstArgIndex, Kind);
   }
 
   /// Add an argument attribute to the list. Returns a new list because
   /// attribute lists are immutable.
-  AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo,
-                                  StringRef Kind,
-                                  StringRef Value = StringRef()) const {
+  LLVM_NODISCARD AttributeList
+  addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
+                    StringRef Value = StringRef()) const {
     return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
   }
 
   /// Add an attribute to the attribute list at the given arg indices. Returns a
   /// new list because attribute lists are immutable.
-  AttributeList addParamAttribute(LLVMContext &C, ArrayRef<unsigned> ArgNos,
-                                  Attribute A) const;
+  LLVM_NODISCARD AttributeList addParamAttribute(LLVMContext &C,
+                                                 ArrayRef<unsigned> ArgNos,
+                                                 Attribute A) const;
 
   /// Add an argument attribute to the list. Returns a new list because
   /// attribute lists are immutable.
-  AttributeList addParamAttributes(LLVMContext &C, unsigned ArgNo,
-                                   const AttrBuilder &B) const {
+  LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
+                                                  unsigned ArgNo,
+                                                  const AttrBuilder &B) const {
     return addAttributes(C, ArgNo + FirstArgIndex, B);
   }
 
   /// Remove the specified attribute at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeAttribute(LLVMContext &C, unsigned Index,
-                                Attribute::AttrKind Kind) const;
+  LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
+                                               Attribute::AttrKind Kind) const;
 
   /// Remove the specified attribute at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeAttribute(LLVMContext &C, unsigned Index,
-                                StringRef Kind) const;
+  LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
+                                               StringRef Kind) const;
 
   /// Remove the specified attributes at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeAttributes(LLVMContext &C, unsigned Index,
-                                 const AttrBuilder &AttrsToRemove) const;
+  LLVM_NODISCARD AttributeList removeAttributes(
+      LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
 
   /// Remove all attributes at the specified index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeAttributes(LLVMContext &C, unsigned Index) const;
+  LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
+                                                unsigned Index) const;
 
   /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo,
-                                     Attribute::AttrKind Kind) const {
+  LLVM_NODISCARD AttributeList removeParamAttribute(
+      LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
     return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
   }
 
   /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo,
-                                     StringRef Kind) const {
+  LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
+                                                    unsigned ArgNo,
+                                                    StringRef Kind) const {
     return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
   }
 
   /// Remove the specified attribute at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo,
-                                      const AttrBuilder &AttrsToRemove) const {
+  LLVM_NODISCARD AttributeList removeParamAttributes(
+      LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
     return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
   }
 
   /// Remove all attributes at the specified arg index from this
   /// attribute list. Returns a new list because attribute lists are immutable.
-  AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo) const {
+  LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
+                                                     unsigned ArgNo) const {
     return removeAttributes(C, ArgNo + FirstArgIndex);
   }
 
   /// \brief Add the dereferenceable attribute to the attribute set at the given
   /// index. Returns a new list because attribute lists are immutable.
-  AttributeList addDereferenceableAttr(LLVMContext &C, unsigned Index,
-                                       uint64_t Bytes) const;
+  LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
+                                                      unsigned Index,
+                                                      uint64_t Bytes) const;
 
   /// \brief Add the dereferenceable attribute to the attribute set at the given
   /// arg index. Returns a new list because attribute lists are immutable.
-  AttributeList addDereferenceableParamAttr(LLVMContext &C, unsigned ArgNo,
-                                            uint64_t Bytes) const {
+  LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
+      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
     return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
   }
 
   /// Add the dereferenceable_or_null attribute to the attribute set at
   /// the given index. Returns a new list because attribute lists are immutable.
-  AttributeList addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
-                                             uint64_t Bytes) const;
+  LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
+      LLVMContext &C, unsigned Index, uint64_t Bytes) const;
 
   /// Add the dereferenceable_or_null attribute to the attribute set at
   /// the given arg index. Returns a new list because attribute lists are
   /// immutable.
-  AttributeList addDereferenceableOrNullParamAttr(LLVMContext &C,
-                                                  unsigned ArgNo,
-                                                  uint64_t Bytes) const {
+  LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
+      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
     return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
   }
 
   /// Add the allocsize attribute to the attribute set at the given index.
   /// Returns a new list because attribute lists are immutable.
-  AttributeList addAllocSizeAttr(LLVMContext &C, unsigned Index,
-                                 unsigned ElemSizeArg,
-                                 const Optional<unsigned> &NumElemsArg);
+  LLVM_NODISCARD AttributeList
+  addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
+                   const Optional<unsigned> &NumElemsArg);
 
   /// Add the allocsize attribute to the attribute set at the given arg index.
   /// Returns a new list because attribute lists are immutable.
-  AttributeList addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo,
-                                      unsigned ElemSizeArg,
-                                      const Optional<unsigned> &NumElemsArg) {
+  LLVM_NODISCARD AttributeList
+  addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
+                        const Optional<unsigned> &NumElemsArg) {
     return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
   }
 
diff --git a/contrib/llvm/include/llvm/IR/Attributes.td b/contrib/llvm/include/llvm/IR/Attributes.td
index 39978c41ac72..e786d85d05a8 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.td
+++ b/contrib/llvm/include/llvm/IR/Attributes.td
@@ -176,6 +176,14 @@ def SanitizeMemory : EnumAttr<"sanitize_memory">;
 /// HWAddressSanitizer is on.
 def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">;
 
+/// Speculative Load Hardening is enabled.
+///
+/// Note that this uses the default compatibility (always compatible during
+/// inlining) and a conservative merge strategy where inlining an attributed
+/// body will add the attribute to the caller. This ensures that code carrying
+/// this attribute will always be lowered with hardening enabled.
+def SpeculativeLoadHardening : EnumAttr<"speculative_load_hardening">;
+
 /// Argument is swift error.
 def SwiftError : EnumAttr<"swifterror">;
 
@@ -232,6 +240,7 @@ def : MergeRule<"setAND<UnsafeFPMathAttr>">;
 def : MergeRule<"setOR<NoImplicitFloatAttr>">;
 def : MergeRule<"setOR<NoJumpTablesAttr>">;
 def : MergeRule<"setOR<ProfileSampleAccurateAttr>">;
+def : MergeRule<"setOR<SpeculativeLoadHardeningAttr>">;
 def : MergeRule<"adjustCallerSSPLevel">;
 def : MergeRule<"adjustCallerStackProbes">;
 def : MergeRule<"adjustCallerStackProbeSize">;
diff --git a/contrib/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm/include/llvm/IR/BasicBlock.h
index 1ee19975af75..99eac33f742e 100644
--- a/contrib/llvm/include/llvm/IR/BasicBlock.h
+++ b/contrib/llvm/include/llvm/IR/BasicBlock.h
@@ -38,7 +38,6 @@ class LandingPadInst;
 class LLVMContext;
 class Module;
 class PHINode;
-class TerminatorInst;
 class ValueSymbolTable;
 
 /// LLVM Basic Block Representation
@@ -50,12 +49,12 @@ class ValueSymbolTable;
 /// represents a label to which a branch can jump.
 ///
 /// A well formed basic block is formed of a list of non-terminating
-/// instructions followed by a single TerminatorInst instruction.
-/// TerminatorInst's may not occur in the middle of basic blocks, and must
-/// terminate the blocks. The BasicBlock class allows malformed basic blocks to
-/// occur because it may be useful in the intermediate stage of constructing or
-/// modifying a program. However, the verifier will ensure that basic blocks
-/// are "well formed".
+/// instructions followed by a single terminator instruction. Terminator
+/// instructions may not occur in the middle of basic blocks, and must terminate
+/// the blocks. The BasicBlock class allows malformed basic blocks to occur
+/// because it may be useful in the intermediate stage of constructing or
+/// modifying a program. However, the verifier will ensure that basic blocks are
+/// "well formed".
 class BasicBlock final : public Value, // Basic blocks are data objects also
                          public ilist_node_with_parent<BasicBlock, Function> {
 public:
@@ -120,10 +119,10 @@ public:
 
   /// Returns the terminator instruction if the block is well formed or null
   /// if the block is not well formed.
-  const TerminatorInst *getTerminator() const LLVM_READONLY;
-  TerminatorInst *getTerminator() {
-    return const_cast<TerminatorInst *>(
-                        static_cast<const BasicBlock *>(this)->getTerminator());
+  const Instruction *getTerminator() const LLVM_READONLY;
+  Instruction *getTerminator() {
+    return const_cast<Instruction *>(
+        static_cast<const BasicBlock *>(this)->getTerminator());
   }
 
   /// Returns the call instruction calling \@llvm.experimental.deoptimize
@@ -238,6 +237,12 @@ public:
                  static_cast<const BasicBlock *>(this)->getUniquePredecessor());
   }
 
+  /// Return true if this block has exactly N predecessors.
+  bool hasNPredecessors(unsigned N) const;
+
+  /// Return true if this block has N predecessors or more.
+  bool hasNPredecessorsOrMore(unsigned N) const;
+
   /// Return the successor of this block if it has a single successor.
   /// Otherwise return a null pointer.
   ///
diff --git a/contrib/llvm/include/llvm/IR/CFG.h b/contrib/llvm/include/llvm/IR/CFG.h
index f4988e7f1fec..8385c4647e12 100644
--- a/contrib/llvm/include/llvm/IR/CFG.h
+++ b/contrib/llvm/include/llvm/IR/CFG.h
@@ -1,4 +1,4 @@
-//===- CFG.h - Process LLVM structures as graphs ----------------*- C++ -*-===//
+//===- CFG.h ----------------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,15 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines specializations of GraphTraits that allow Function and
-// BasicBlock graphs to be treated as proper graphs for generic algorithms.
-//
+/// \file
+///
+/// This file provides various utilities for inspecting and working with the
+/// control flow graph in LLVM IR. This includes generic facilities for
+/// iterating successors and predecessors of basic blocks, the successors of
+/// specific terminator instructions, etc. It also defines specializations of
+/// GraphTraits that allow Function and BasicBlock graphs to be treated as
+/// proper graphs for generic algorithms.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_IR_CFG_H
@@ -44,8 +49,13 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
 
   inline void advancePastNonTerminators() {
     // Loop to ignore non-terminator uses (for example BlockAddresses).
-    while (!It.atEnd() && !isa<TerminatorInst>(*It))
+    while (!It.atEnd()) {
+      if (auto *Inst = dyn_cast<Instruction>(*It))
+        if (Inst->isTerminator())
+          break;
+
       ++It;
+    }
   }
 
 public:
@@ -63,7 +73,7 @@ public:
 
   inline reference operator*() const {
     assert(!It.atEnd() && "pred_iterator out of range!");
-    return cast<TerminatorInst>(*It)->getParent();
+    return cast<Instruction>(*It)->getParent();
   }
   inline pointer *operator->() const { return &operator*(); }
 
@@ -107,6 +117,8 @@ inline const_pred_iterator pred_end(const BasicBlock *BB) {
 inline bool pred_empty(const BasicBlock *BB) {
   return pred_begin(BB) == pred_end(BB);
 }
+/// Get the number of predecessors of \p BB. This is a linear time operation.
+/// Use \ref BasicBlock::hasNPredecessors() or hasNPredecessorsOrMore if able.
 inline unsigned pred_size(const BasicBlock *BB) {
   return std::distance(pred_begin(BB), pred_end(BB));
 }
@@ -118,16 +130,144 @@ inline pred_const_range predecessors(const BasicBlock *BB) {
 }
 
 //===----------------------------------------------------------------------===//
-// BasicBlock succ_iterator helpers
+// Instruction and BasicBlock succ_iterator helpers
 //===----------------------------------------------------------------------===//
 
-using succ_iterator =
-    TerminatorInst::SuccIterator<TerminatorInst *, BasicBlock>;
-using succ_const_iterator =
-    TerminatorInst::SuccIterator<const TerminatorInst *, const BasicBlock>;
+template <class InstructionT, class BlockT>
+class SuccIterator
+    : public iterator_facade_base<SuccIterator<InstructionT, BlockT>,
+                                  std::random_access_iterator_tag, BlockT, int,
+                                  BlockT *, BlockT *> {
+public:
+  using difference_type = int;
+  using pointer = BlockT *;
+  using reference = BlockT *;
+
+private:
+  InstructionT *Inst;
+  int Idx;
+  using Self = SuccIterator<InstructionT, BlockT>;
+
+  inline bool index_is_valid(int Idx) {
+    // Note that we specially support the index of zero being valid even in the
+    // face of a null instruction.
+    return Idx >= 0 && (Idx == 0 || Idx <= (int)Inst->getNumSuccessors());
+  }
+
+  /// Proxy object to allow write access in operator[]
+  class SuccessorProxy {
+    Self It;
+
+  public:
+    explicit SuccessorProxy(const Self &It) : It(It) {}
+
+    SuccessorProxy(const SuccessorProxy &) = default;
+
+    SuccessorProxy &operator=(SuccessorProxy RHS) {
+      *this = reference(RHS);
+      return *this;
+    }
+
+    SuccessorProxy &operator=(reference RHS) {
+      It.Inst->setSuccessor(It.Idx, RHS);
+      return *this;
+    }
+
+    operator reference() const { return *It; }
+  };
+
+public:
+  // begin iterator
+  explicit inline SuccIterator(InstructionT *Inst) : Inst(Inst), Idx(0) {}
+  // end iterator
+  inline SuccIterator(InstructionT *Inst, bool) : Inst(Inst) {
+    if (Inst)
+      Idx = Inst->getNumSuccessors();
+    else
+      // Inst == NULL happens, if a basic block is not fully constructed and
+      // consequently getTerminator() returns NULL. In this case we construct
+      // a SuccIterator which describes a basic block that has zero
+      // successors.
+      // Defining SuccIterator for incomplete and malformed CFGs is especially
+      // useful for debugging.
+      Idx = 0;
+  }
+
+  /// This is used to interface between code that wants to
+  /// operate on terminator instructions directly.
+  int getSuccessorIndex() const { return Idx; }
+
+  inline bool operator==(const Self &x) const { return Idx == x.Idx; }
+
+  inline BlockT *operator*() const { return Inst->getSuccessor(Idx); }
+
+  // We use the basic block pointer directly for operator->.
+  inline BlockT *operator->() const { return operator*(); }
+
+  inline bool operator<(const Self &RHS) const {
+    assert(Inst == RHS.Inst && "Cannot compare iterators of different blocks!");
+    return Idx < RHS.Idx;
+  }
+
+  int operator-(const Self &RHS) const {
+    assert(Inst == RHS.Inst && "Cannot compare iterators of different blocks!");
+    return Idx - RHS.Idx;
+  }
+
+  inline Self &operator+=(int RHS) {
+    int NewIdx = Idx + RHS;
+    assert(index_is_valid(NewIdx) && "Iterator index out of bound");
+    Idx = NewIdx;
+    return *this;
+  }
+
+  inline Self &operator-=(int RHS) { return operator+=(-RHS); }
+
+  // Specially implement the [] operation using a proxy object to support
+  // assignment.
+  inline SuccessorProxy operator[](int Offset) {
+    Self TmpIt = *this;
+    TmpIt += Offset;
+    return SuccessorProxy(TmpIt);
+  }
+
+  /// Get the source BlockT of this iterator.
+  inline BlockT *getSource() {
+    assert(Inst && "Source not available, if basic block was malformed");
+    return Inst->getParent();
+  }
+};
+
+template <typename T, typename U> struct isPodLike<SuccIterator<T, U>> {
+  static const bool value = isPodLike<T>::value;
+};
+
+using succ_iterator = SuccIterator<Instruction, BasicBlock>;
+using succ_const_iterator = SuccIterator<const Instruction, const BasicBlock>;
 using succ_range = iterator_range<succ_iterator>;
 using succ_const_range = iterator_range<succ_const_iterator>;
 
+inline succ_iterator succ_begin(Instruction *I) { return succ_iterator(I); }
+inline succ_const_iterator succ_begin(const Instruction *I) {
+  return succ_const_iterator(I);
+}
+inline succ_iterator succ_end(Instruction *I) { return succ_iterator(I, true); }
+inline succ_const_iterator succ_end(const Instruction *I) {
+  return succ_const_iterator(I, true);
+}
+inline bool succ_empty(const Instruction *I) {
+  return succ_begin(I) == succ_end(I);
+}
+inline unsigned succ_size(const Instruction *I) {
+  return std::distance(succ_begin(I), succ_end(I));
+}
+inline succ_range successors(Instruction *I) {
+  return succ_range(succ_begin(I), succ_end(I));
+}
+inline succ_const_range successors(const Instruction *I) {
+  return succ_const_range(succ_begin(I), succ_end(I));
+}
+
 inline succ_iterator succ_begin(BasicBlock *BB) {
   return succ_iterator(BB->getTerminator());
 }
@@ -153,11 +293,6 @@ inline succ_const_range successors(const BasicBlock *BB) {
   return succ_const_range(succ_begin(BB), succ_end(BB));
 }
 
-template <typename T, typename U>
-struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
-  static const bool value = isPodLike<T>::value;
-};
-
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for basic block graphs (CFGs)
 //===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/CFGDiff.h b/contrib/llvm/include/llvm/IR/CFGDiff.h
new file mode 100644
index 000000000000..da4373f7bce2
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/CFGDiff.h
@@ -0,0 +1,285 @@
+//===- CFGDiff.h - Define a CFG snapshot. -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines specializations of GraphTraits that allows generic
+// algorithms to see a different snapshot of a CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_CFGDIFF_H
+#define LLVM_IR_CFGDIFF_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/Support/CFGUpdate.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+
+// Two booleans are used to define orders in graphs:
+// InverseGraph defines when we need to reverse the whole graph and is as such
+// also equivalent to applying updates in reverse.
+// InverseEdge defines whether we want to change the edges direction. E.g., for
+// a non-inversed graph, the children are naturally the successors when
+// InverseEdge is false and the predecessors when InverseEdge is true.
+
+// We define two base clases that call into GraphDiff, one for successors
+// (CFGSuccessors), where InverseEdge is false, and one for predecessors
+// (CFGPredecessors), where InverseEdge is true.
+// FIXME: Further refactoring may merge the two base classes into a single one
+// templated / parametrized on using succ_iterator/pred_iterator and false/true
+// for the InverseEdge.
+
+// CFGViewSuccessors and CFGViewPredecessors, both can be parametrized to
+// consider the graph inverted or not (i.e. InverseGraph). Successors
+// implicitly has InverseEdge = false and Predecessors implicitly has
+// InverseEdge = true (see calls to GraphDiff methods in there). The GraphTraits
+// instantiations that follow define the value of InverseGraph.
+
+// GraphTraits instantiations:
+// - GraphDiff<BasicBlock *> is equivalent to InverseGraph = false
+// - GraphDiff<Inverse<BasicBlock *>> is equivalent to InverseGraph = true
+// - second pair item is BasicBlock *, then InverseEdge = false (so it inherits
+// from CFGViewSuccessors).
+// - second pair item is Inverse<BasicBlock *>, then InverseEdge = true (so it
+// inherits from CFGViewPredecessors).
+
+// The 4 GraphTraits are as follows:
+// 1. std::pair<const GraphDiff<BasicBlock *> *, BasicBlock *>> :
+//        CFGViewSuccessors<false>
+// Regular CFG, children means successors, InverseGraph = false,
+// InverseEdge = false.
+// 2. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, BasicBlock *>> :
+//        CFGViewSuccessors<true>
+// Reverse the graph, get successors but reverse-apply updates,
+// InverseGraph = true, InverseEdge = false.
+// 3. std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>> :
+//        CFGViewPredecessors<false>
+// Regular CFG, reverse edges, so children mean predecessors,
+// InverseGraph = false, InverseEdge = true.
+// 4. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, Inverse<BasicBlock *>>
+//        : CFGViewPredecessors<true>
+// Reverse the graph and the edges, InverseGraph = true, InverseEdge = true.
+
+namespace llvm {
+
+// GraphDiff defines a CFG snapshot: given a set of Update<NodePtr>, provide
+// utilities to skip edges marked as deleted and return a set of edges marked as
+// newly inserted. The current diff treats the CFG as a graph rather than a
+// multigraph. Added edges are pruned to be unique, and deleted edges will
+// remove all existing edges between two blocks.
+template <typename NodePtr, bool InverseGraph = false> class GraphDiff {
+  using UpdateMapType = SmallDenseMap<NodePtr, SmallVector<NodePtr, 2>>;
+  UpdateMapType SuccInsert;
+  UpdateMapType SuccDelete;
+  UpdateMapType PredInsert;
+  UpdateMapType PredDelete;
+  // Using a singleton empty vector for all BasicBlock requests with no
+  // children.
+  SmallVector<NodePtr, 1> Empty;
+
+  void printMap(raw_ostream &OS, const UpdateMapType &M) const {
+    for (auto Pair : M)
+      for (auto Child : Pair.second) {
+        OS << "(";
+        Pair.first->printAsOperand(OS, false);
+        OS << ", ";
+        Child->printAsOperand(OS, false);
+        OS << ") ";
+      }
+    OS << "\n";
+  }
+
+public:
+  GraphDiff() {}
+  GraphDiff(ArrayRef<cfg::Update<NodePtr>> Updates) {
+    SmallVector<cfg::Update<NodePtr>, 4> LegalizedUpdates;
+    cfg::LegalizeUpdates<NodePtr>(Updates, LegalizedUpdates, InverseGraph);
+    for (auto U : LegalizedUpdates) {
+      if (U.getKind() == cfg::UpdateKind::Insert) {
+        SuccInsert[U.getFrom()].push_back(U.getTo());
+        PredInsert[U.getTo()].push_back(U.getFrom());
+      } else {
+        SuccDelete[U.getFrom()].push_back(U.getTo());
+        PredDelete[U.getTo()].push_back(U.getFrom());
+      }
+    }
+  }
+
+  bool ignoreChild(const NodePtr BB, NodePtr EdgeEnd, bool InverseEdge) const {
+    auto &DeleteChildren =
+        (InverseEdge != InverseGraph) ? PredDelete : SuccDelete;
+    auto It = DeleteChildren.find(BB);
+    if (It == DeleteChildren.end())
+      return false;
+    auto &EdgesForBB = It->second;
+    return llvm::find(EdgesForBB, EdgeEnd) != EdgesForBB.end();
+  }
+
+  iterator_range<typename SmallVectorImpl<NodePtr>::const_iterator>
+  getAddedChildren(const NodePtr BB, bool InverseEdge) const {
+    auto &InsertChildren =
+        (InverseEdge != InverseGraph) ? PredInsert : SuccInsert;
+    auto It = InsertChildren.find(BB);
+    if (It == InsertChildren.end())
+      return make_range(Empty.begin(), Empty.end());
+    return make_range(It->second.begin(), It->second.end());
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << "===== GraphDiff: CFG edge changes to create a CFG snapshot. \n"
+          "===== (Note: notion of children/inverse_children depends on "
+          "the direction of edges and the graph.)\n";
+    OS << "Children to insert:\n\t";
+    printMap(OS, SuccInsert);
+    OS << "Children to delete:\n\t";
+    printMap(OS, SuccDelete);
+    OS << "Inverse_children to insert:\n\t";
+    printMap(OS, PredInsert);
+    OS << "Inverse_children to delete:\n\t";
+    printMap(OS, PredDelete);
+    OS << "\n";
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+};
+
+template <bool InverseGraph = false> struct CFGViewSuccessors {
+  using DataRef = const GraphDiff<BasicBlock *, InverseGraph> *;
+  using NodeRef = std::pair<DataRef, BasicBlock *>;
+
+  using ExistingChildIterator =
+      WrappedPairNodeDataIterator<succ_iterator, NodeRef, DataRef>;
+  struct DeletedEdgesFilter {
+    BasicBlock *BB;
+    DeletedEdgesFilter(BasicBlock *BB) : BB(BB){};
+    bool operator()(NodeRef N) const {
+      return !N.first->ignoreChild(BB, N.second, false);
+    }
+  };
+  using FilterExistingChildrenIterator =
+      filter_iterator<ExistingChildIterator, DeletedEdgesFilter>;
+
+  using vec_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
+  using AddNewChildrenIterator =
+      WrappedPairNodeDataIterator<vec_iterator, NodeRef, DataRef>;
+  using ChildIteratorType =
+      concat_iterator<NodeRef, FilterExistingChildrenIterator,
+                      AddNewChildrenIterator>;
+
+  static ChildIteratorType child_begin(NodeRef N) {
+    auto InsertVec = N.first->getAddedChildren(N.second, false);
+    // filter iterator init:
+    auto firstit = make_filter_range(
+        make_range<ExistingChildIterator>({succ_begin(N.second), N.first},
+                                          {succ_end(N.second), N.first}),
+        DeletedEdgesFilter(N.second));
+    // new inserts iterator init:
+    auto secondit = make_range<AddNewChildrenIterator>(
+        {InsertVec.begin(), N.first}, {InsertVec.end(), N.first});
+
+    return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+                           AddNewChildrenIterator>(firstit, secondit);
+  }
+
+  static ChildIteratorType child_end(NodeRef N) {
+    auto InsertVec = N.first->getAddedChildren(N.second, false);
+    // filter iterator init:
+    auto firstit = make_filter_range(
+        make_range<ExistingChildIterator>({succ_end(N.second), N.first},
+                                          {succ_end(N.second), N.first}),
+        DeletedEdgesFilter(N.second));
+    // new inserts iterator init:
+    auto secondit = make_range<AddNewChildrenIterator>(
+        {InsertVec.end(), N.first}, {InsertVec.end(), N.first});
+
+    return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+                           AddNewChildrenIterator>(firstit, secondit);
+  }
+};
+
+template <bool InverseGraph = false> struct CFGViewPredecessors {
+  using DataRef = const GraphDiff<BasicBlock *, InverseGraph> *;
+  using NodeRef = std::pair<DataRef, BasicBlock *>;
+
+  using ExistingChildIterator =
+      WrappedPairNodeDataIterator<pred_iterator, NodeRef, DataRef>;
+  struct DeletedEdgesFilter {
+    BasicBlock *BB;
+    DeletedEdgesFilter(BasicBlock *BB) : BB(BB){};
+    bool operator()(NodeRef N) const {
+      return !N.first->ignoreChild(BB, N.second, true);
+    }
+  };
+  using FilterExistingChildrenIterator =
+      filter_iterator<ExistingChildIterator, DeletedEdgesFilter>;
+
+  using vec_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
+  using AddNewChildrenIterator =
+      WrappedPairNodeDataIterator<vec_iterator, NodeRef, DataRef>;
+  using ChildIteratorType =
+      concat_iterator<NodeRef, FilterExistingChildrenIterator,
+                      AddNewChildrenIterator>;
+
+  static ChildIteratorType child_begin(NodeRef N) {
+    auto InsertVec = N.first->getAddedChildren(N.second, true);
+    // filter iterator init:
+    auto firstit = make_filter_range(
+        make_range<ExistingChildIterator>({pred_begin(N.second), N.first},
+                                          {pred_end(N.second), N.first}),
+        DeletedEdgesFilter(N.second));
+    // new inserts iterator init:
+    auto secondit = make_range<AddNewChildrenIterator>(
+        {InsertVec.begin(), N.first}, {InsertVec.end(), N.first});
+
+    return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+                           AddNewChildrenIterator>(firstit, secondit);
+  }
+
+  static ChildIteratorType child_end(NodeRef N) {
+    auto InsertVec = N.first->getAddedChildren(N.second, true);
+    // filter iterator init:
+    auto firstit = make_filter_range(
+        make_range<ExistingChildIterator>({pred_end(N.second), N.first},
+                                          {pred_end(N.second), N.first}),
+        DeletedEdgesFilter(N.second));
+    // new inserts iterator init:
+    auto secondit = make_range<AddNewChildrenIterator>(
+        {InsertVec.end(), N.first}, {InsertVec.end(), N.first});
+
+    return concat_iterator<NodeRef, FilterExistingChildrenIterator,
+                           AddNewChildrenIterator>(firstit, secondit);
+  }
+};
+
+template <>
+struct GraphTraits<
+    std::pair<const GraphDiff<BasicBlock *, false> *, BasicBlock *>>
+    : CFGViewSuccessors<false> {};
+template <>
+struct GraphTraits<
+    std::pair<const GraphDiff<BasicBlock *, true> *, BasicBlock *>>
+    : CFGViewSuccessors<true> {};
+template <>
+struct GraphTraits<
+    std::pair<const GraphDiff<BasicBlock *, false> *, Inverse<BasicBlock *>>>
+    : CFGViewPredecessors<false> {};
+template <>
+struct GraphTraits<
+    std::pair<const GraphDiff<BasicBlock *, true> *, Inverse<BasicBlock *>>>
+    : CFGViewPredecessors<true> {};
+} // end namespace llvm
+
+#endif // LLVM_IR_CFGDIFF_H
diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h
index 2162ccb982b0..a3e78049f4be 100644
--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@@ -656,10 +656,7 @@ public:
 
 private:
   IterTy getCallee() const {
-    if (isCall()) // Skip Callee
-      return cast<CallInst>(getInstruction())->op_end() - 1;
-    else // Skip BB, BB, Callee
-      return cast<InvokeInst>(getInstruction())->op_end() - 3;
+    return cast<CallBase>(getInstruction())->op_end() - 1;
   }
 };
 
diff --git a/contrib/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm/include/llvm/IR/CallingConv.h
index b9c02d7ed424..49c3be960373 100644
--- a/contrib/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm/include/llvm/IR/CallingConv.h
@@ -220,6 +220,9 @@ namespace CallingConv {
     /// shader if tessellation is in use, or otherwise the vertex shader.
     AMDGPU_ES = 96,
 
+    // Calling convention between AArch64 Advanced SIMD functions
+    AArch64_VectorCall = 97,
+
     /// The highest possible calling convention ID. Must be some 2^k - 1.
     MaxID = 1023
   };
diff --git a/contrib/llvm/include/llvm/IR/Constant.h b/contrib/llvm/include/llvm/IR/Constant.h
index 5fdf0ea00f00..98437f8eff1f 100644
--- a/contrib/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm/include/llvm/IR/Constant.h
@@ -114,7 +114,8 @@ public:
 
   /// For aggregates (struct/array/vector) return the constant that corresponds
   /// to the specified element if possible, or null if not. This can return null
-  /// if the element index is a ConstantExpr, or if 'this' is a constant expr.
+  /// if the element index is a ConstantExpr, if 'this' is a constant expr or
+  /// if the constant does not fit into an uint64_t.
   Constant *getAggregateElement(unsigned Elt) const;
   Constant *getAggregateElement(Constant *Elt) const;
 
diff --git a/contrib/llvm/include/llvm/IR/Constants.h b/contrib/llvm/include/llvm/IR/Constants.h
index f9d5ebc560c7..afc93cd61d47 100644
--- a/contrib/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm/include/llvm/IR/Constants.h
@@ -290,7 +290,11 @@ public:
 
   static Constant *get(Type* Ty, StringRef Str);
   static ConstantFP *get(LLVMContext &Context, const APFloat &V);
-  static Constant *getNaN(Type *Ty, bool Negative = false, unsigned type = 0);
+  static Constant *getNaN(Type *Ty, bool Negative = false, uint64_t Payload = 0);
+  static Constant *getQNaN(Type *Ty, bool Negative = false,
+                           APInt *Payload = nullptr);
+  static Constant *getSNaN(Type *Ty, bool Negative = false,
+                           APInt *Payload = nullptr);
   static Constant *getNegativeZero(Type *Ty);
   static Constant *getInfinity(Type *Ty, bool Negative = false);
 
@@ -1114,6 +1118,13 @@ public:
   static Constant *getSelect(Constant *C, Constant *V1, Constant *V2,
                              Type *OnlyIfReducedTy = nullptr);
 
+  /// get - Return a unary operator constant expression,
+  /// folding if possible.
+  ///
+  /// \param OnlyIfReducedTy see \a getWithOperands() docs.
+  static Constant *get(unsigned Opcode, Constant *C1, unsigned Flags = 0, 
+                       Type *OnlyIfReducedTy = nullptr);
+
   /// get - Return a binary or shift operator constant expression,
   /// folding if possible.
   ///
diff --git a/contrib/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm/include/llvm/IR/DIBuilder.h
index 06c9421ec1d6..443332b1b23c 100644
--- a/contrib/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm/include/llvm/IR/DIBuilder.h
@@ -134,8 +134,8 @@ namespace llvm {
     /// \param SplitDebugInlining    Whether to emit inline debug info.
     /// \param DebugInfoForProfiling Whether to emit extra debug info for
     ///                              profile collection.
-    /// \param GnuPubnames   Whether to emit .debug_gnu_pubnames section instead
-    ///                      of .debug_pubnames.
+    /// \param NameTableKind  Whether to emit .debug_gnu_pubnames,
+    ///                      .debug_pubnames, or no pubnames at all.
     DICompileUnit *
     createCompileUnit(unsigned Lang, DIFile *File, StringRef Producer,
                       bool isOptimized, StringRef Flags, unsigned RV,
@@ -144,7 +144,9 @@ namespace llvm {
                           DICompileUnit::DebugEmissionKind::FullDebug,
                       uint64_t DWOId = 0, bool SplitDebugInlining = true,
                       bool DebugInfoForProfiling = false,
-                      bool GnuPubnames = false);
+                      DICompileUnit::DebugNameTableKind NameTableKind =
+                          DICompileUnit::DebugNameTableKind::Default,
+                      bool RangesBaseAddress = false);
 
     /// Create a file descriptor to hold debugging information for a file.
     /// \param Filename  File name.
@@ -188,9 +190,11 @@ namespace llvm {
     /// type.
     /// \param Name        Type name.
     /// \param SizeInBits  Size of the type.
-    /// \param Encoding    DWARF encoding code, e.g. dwarf::DW_ATE_float.
+    /// \param Encoding    DWARF encoding code, e.g., dwarf::DW_ATE_float.
+    /// \param Flags       Optional DWARF attributes, e.g., DW_AT_endianity.
     DIBasicType *createBasicType(StringRef Name, uint64_t SizeInBits,
-                                 unsigned Encoding);
+                                 unsigned Encoding,
+                                 DINode::DIFlags Flags = DINode::FlagZero);
 
     /// Create debugging information entry for a qualified
     /// type, e.g. 'const int'.
@@ -498,11 +502,11 @@ namespace llvm {
     /// \param Elements       Enumeration elements.
     /// \param UnderlyingType Underlying type of a C++11/ObjC fixed enum.
     /// \param UniqueIdentifier A unique identifier for the enum.
-    /// \param IsFixed Boolean flag indicate if this is C++11/ObjC fixed enum.
+    /// \param IsScoped Boolean flag indicate if this is C++11/ObjC 'enum class'.
     DICompositeType *createEnumerationType(
         DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
         uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
-        DIType *UnderlyingType, StringRef UniqueIdentifier = "", bool IsFixed = false);
+        DIType *UnderlyingType, StringRef UniqueIdentifier = "", bool IsScoped = false);
 
     /// Create subroutine type.
     /// \param ParameterTypes  An array of subroutine parameter types. This
@@ -580,14 +584,14 @@ namespace llvm {
         DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
         unsigned LineNo, DIType *Ty, bool isLocalToUnit,
         DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
-        uint32_t AlignInBits = 0);
+        MDTuple *templateParams = nullptr, uint32_t AlignInBits = 0);
 
     /// Identical to createGlobalVariable
     /// except that the resulting DbgNode is temporary and meant to be RAUWed.
     DIGlobalVariable *createTempGlobalVariableFwdDecl(
         DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
         unsigned LineNo, DIType *Ty, bool isLocalToUnit, MDNode *Decl = nullptr,
-        uint32_t AlignInBits = 0);
+        MDTuple *templateParams = nullptr, uint32_t AlignInBits = 0);
 
     /// Create a new descriptor for an auto variable.  This is a local variable
     /// that is not a subprogram parameter.
@@ -649,29 +653,28 @@ namespace llvm {
     /// \param File          File where this variable is defined.
     /// \param LineNo        Line number.
     /// \param Ty            Function type.
-    /// \param isLocalToUnit True if this function is not externally visible.
-    /// \param isDefinition  True if this is a function definition.
     /// \param ScopeLine     Set to the beginning of the scope this starts
     /// \param Flags         e.g. is this function prototyped or not.
     ///                      These flags are used to emit dwarf attributes.
-    /// \param isOptimized   True if optimization is ON.
+    /// \param SPFlags       Additional flags specific to subprograms.
     /// \param TParams       Function template parameters.
     /// \param ThrownTypes   Exception types this function may throw.
-    DISubprogram *createFunction(
-        DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
-        unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-        bool isDefinition, unsigned ScopeLine,
-        DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
-        DITemplateParameterArray TParams = nullptr,
-        DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr);
+    DISubprogram *
+    createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
+                   DIFile *File, unsigned LineNo, DISubroutineType *Ty,
+                   unsigned ScopeLine, DINode::DIFlags Flags = DINode::FlagZero,
+                   DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
+                   DITemplateParameterArray TParams = nullptr,
+                   DISubprogram *Decl = nullptr,
+                   DITypeArray ThrownTypes = nullptr);
 
     /// Identical to createFunction,
     /// except that the resulting DbgNode is meant to be RAUWed.
     DISubprogram *createTempFunctionFwdDecl(
         DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
-        unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-        bool isDefinition, unsigned ScopeLine,
-        DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
+        unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
+        DINode::DIFlags Flags = DINode::FlagZero,
+        DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
         DITemplateParameterArray TParams = nullptr,
         DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr);
 
@@ -683,10 +686,6 @@ namespace llvm {
     /// \param File          File where this variable is defined.
     /// \param LineNo        Line number.
     /// \param Ty            Function type.
-    /// \param isLocalToUnit True if this function is not externally visible..
-    /// \param isDefinition  True if this is a function definition.
-    /// \param Virtuality    Attributes describing virtualness. e.g. pure
-    ///                      virtual function.
     /// \param VTableIndex   Index no of this method in virtual table, or -1u if
     ///                      unrepresentable.
     /// \param ThisAdjustment
@@ -695,17 +694,18 @@ namespace llvm {
     /// \param VTableHolder  Type that holds vtable.
     /// \param Flags         e.g. is this function prototyped or not.
     ///                      This flags are used to emit dwarf attributes.
-    /// \param isOptimized   True if optimization is ON.
+    /// \param SPFlags       Additional flags specific to subprograms.
     /// \param TParams       Function template parameters.
     /// \param ThrownTypes   Exception types this function may throw.
-    DISubprogram *createMethod(
-        DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
-        unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-        bool isDefinition, unsigned Virtuality = 0, unsigned VTableIndex = 0,
-        int ThisAdjustment = 0, DIType *VTableHolder = nullptr,
-        DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
-        DITemplateParameterArray TParams = nullptr,
-        DITypeArray ThrownTypes = nullptr);
+    DISubprogram *
+    createMethod(DIScope *Scope, StringRef Name, StringRef LinkageName,
+                 DIFile *File, unsigned LineNo, DISubroutineType *Ty,
+                 unsigned VTableIndex = 0, int ThisAdjustment = 0,
+                 DIType *VTableHolder = nullptr,
+                 DINode::DIFlags Flags = DINode::FlagZero,
+                 DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
+                 DITemplateParameterArray TParams = nullptr,
+                 DITypeArray ThrownTypes = nullptr);
 
     /// This creates new descriptor for a namespace with the specified
     /// parent scope.
diff --git a/contrib/llvm/include/llvm/IR/DataLayout.h b/contrib/llvm/include/llvm/IR/DataLayout.h
index d796a65e6129..c144d1c13c34 100644
--- a/contrib/llvm/include/llvm/IR/DataLayout.h
+++ b/contrib/llvm/include/llvm/IR/DataLayout.h
@@ -334,6 +334,9 @@ public:
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
+  /// Returns the maximum pointer size over all address spaces.
+  unsigned getMaxPointerSize() const;
+
   // Index size used for address calculation.
   unsigned getIndexSize(unsigned AS) const;
 
@@ -361,6 +364,11 @@ public:
     return getPointerSize(AS) * 8;
   }
 
+  /// Returns the maximum pointer size over all address spaces.
+  unsigned getMaxPointerSizeInBits() const {
+    return getMaxPointerSize() * 8;
+  }
+
   /// Size in bits of index used for address calculation in getelementptr.
   unsigned getIndexSizeInBits(unsigned AS) const {
     return getIndexSize(AS) * 8;
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
index b1f5fac64232..ce117aa452aa 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
+++ b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
@@ -11,11 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: Add other DW-based macros.
+#if !(defined HANDLE_DI_FLAG || defined HANDLE_DISP_FLAG)
+#error "Missing macro definition of HANDLE_DI*"
+#endif
+
 #ifndef HANDLE_DI_FLAG
-#error "Missing macro definition of HANDLE_DI_FLAG"
+#define HANDLE_DI_FLAG(ID, NAME)
 #endif
 
+#ifndef HANDLE_DISP_FLAG
+#define HANDLE_DISP_FLAG(ID, NAME)
+#endif
+
+// General flags kept in DINode.
+
 HANDLE_DI_FLAG(0, Zero) // Use it as zero value.
                         // For example: void foo(DIFlags Flags = FlagZero).
 HANDLE_DI_FLAG(1, Private)
@@ -45,9 +54,12 @@ HANDLE_DI_FLAG((1 << 20), NoReturn)
 HANDLE_DI_FLAG((1 << 21), MainSubprogram)
 HANDLE_DI_FLAG((1 << 22), TypePassByValue)
 HANDLE_DI_FLAG((1 << 23), TypePassByReference)
-HANDLE_DI_FLAG((1 << 24), FixedEnum)
+HANDLE_DI_FLAG((1 << 24), EnumClass)
 HANDLE_DI_FLAG((1 << 25), Thunk)
 HANDLE_DI_FLAG((1 << 26), Trivial)
+HANDLE_DI_FLAG((1 << 27), BigEndian)
+HANDLE_DI_FLAG((1 << 28), LittleEndian)
+HANDLE_DI_FLAG((1 << 29), AllCallsDescribed)
 
 // To avoid needing a dedicated value for IndirectVirtualBase, we use
 // the bitwise or of Virtual and FwdDecl, which does not otherwise
@@ -57,8 +69,29 @@ HANDLE_DI_FLAG((1 << 2) | (1 << 5), IndirectVirtualBase)
 #ifdef DI_FLAG_LARGEST_NEEDED
 // intended to be used with ADT/BitmaskEnum.h
 // NOTE: always must be equal to largest flag, check this when adding new flag
-HANDLE_DI_FLAG((1 << 26), Largest)
+HANDLE_DI_FLAG((1 << 29), Largest)
 #undef DI_FLAG_LARGEST_NEEDED
 #endif
 
+// Subprogram-specific flags kept in DISubprogram.
+
+// Use this as a zero/initialization value.
+// For example: void foo(DISPFlags Flags = SPFlagZero).
+HANDLE_DISP_FLAG(0, Zero)
+// Virtuality is a two-bit enum field in the LSB of the word.
+// Values should match DW_VIRTUALITY_*.
+HANDLE_DISP_FLAG(1u, Virtual)
+HANDLE_DISP_FLAG(2u, PureVirtual)
+HANDLE_DISP_FLAG((1u << 2), LocalToUnit)
+HANDLE_DISP_FLAG((1u << 3), Definition)
+HANDLE_DISP_FLAG((1u << 4), Optimized)
+
+#ifdef DISP_FLAG_LARGEST_NEEDED
+// Intended to be used with ADT/BitmaskEnum.h.
+// NOTE: Always must be equal to largest flag, check this when adding new flags.
+HANDLE_DISP_FLAG((1 << 4), Largest)
+#undef DISP_FLAG_LARGEST_NEEDED
+#endif
+
 #undef HANDLE_DI_FLAG
+#undef HANDLE_DISP_FLAG
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
index 820746851104..a461d1bd4fe8 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -713,6 +713,8 @@ public:
   bool isTypePassByReference() const {
     return getFlags() & FlagTypePassByReference;
   }
+  bool isBigEndian() const { return getFlags() & FlagBigEndian; }
+  bool isLittleEndian() const { return getFlags() & FlagLittleEndian; }
 
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
@@ -739,40 +741,43 @@ class DIBasicType : public DIType {
 
   DIBasicType(LLVMContext &C, StorageType Storage, unsigned Tag,
               uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding,
-              ArrayRef<Metadata *> Ops)
+              DIFlags Flags, ArrayRef<Metadata *> Ops)
       : DIType(C, DIBasicTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0,
-               FlagZero, Ops),
+               Flags, Ops),
         Encoding(Encoding) {}
   ~DIBasicType() = default;
 
   static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,
                               StringRef Name, uint64_t SizeInBits,
                               uint32_t AlignInBits, unsigned Encoding,
-                              StorageType Storage, bool ShouldCreate = true) {
+                              DIFlags Flags, StorageType Storage,
+                              bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name),
-                   SizeInBits, AlignInBits, Encoding, Storage, ShouldCreate);
+                   SizeInBits, AlignInBits, Encoding, Flags, Storage,
+                   ShouldCreate);
   }
   static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,
                               MDString *Name, uint64_t SizeInBits,
                               uint32_t AlignInBits, unsigned Encoding,
-                              StorageType Storage, bool ShouldCreate = true);
+                              DIFlags Flags, StorageType Storage,
+                              bool ShouldCreate = true);
 
   TempDIBasicType cloneImpl() const {
     return getTemporary(getContext(), getTag(), getName(), getSizeInBits(),
-                        getAlignInBits(), getEncoding());
+                        getAlignInBits(), getEncoding(), getFlags());
   }
 
 public:
   DEFINE_MDNODE_GET(DIBasicType, (unsigned Tag, StringRef Name),
-                    (Tag, Name, 0, 0, 0))
+                    (Tag, Name, 0, 0, 0, FlagZero))
   DEFINE_MDNODE_GET(DIBasicType,
                     (unsigned Tag, StringRef Name, uint64_t SizeInBits,
-                     uint32_t AlignInBits, unsigned Encoding),
-                    (Tag, Name, SizeInBits, AlignInBits, Encoding))
+                     uint32_t AlignInBits, unsigned Encoding, DIFlags Flags),
+                    (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags))
   DEFINE_MDNODE_GET(DIBasicType,
                     (unsigned Tag, MDString *Name, uint64_t SizeInBits,
-                     uint32_t AlignInBits, unsigned Encoding),
-                    (Tag, Name, SizeInBits, AlignInBits, Encoding))
+                     uint32_t AlignInBits, unsigned Encoding, DIFlags Flags),
+                    (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags))
 
   TempDIBasicType clone() const { return cloneImpl(); }
 
@@ -1162,11 +1167,21 @@ public:
     NoDebug = 0,
     FullDebug,
     LineTablesOnly,
-    LastEmissionKind = LineTablesOnly
+    DebugDirectivesOnly,
+    LastEmissionKind = DebugDirectivesOnly
+  };
+
+  enum class DebugNameTableKind : unsigned {
+    Default = 0,
+    GNU = 1,
+    None = 2,
+    LastDebugNameTableKind = None
   };
 
   static Optional<DebugEmissionKind> getEmissionKind(StringRef Str);
   static const char *emissionKindString(DebugEmissionKind EK);
+  static Optional<DebugNameTableKind> getNameTableKind(StringRef Str);
+  static const char *nameTableKindString(DebugNameTableKind PK);
 
 private:
   unsigned SourceLanguage;
@@ -1176,17 +1191,20 @@ private:
   uint64_t DWOId;
   bool SplitDebugInlining;
   bool DebugInfoForProfiling;
-  bool GnuPubnames;
+  unsigned NameTableKind;
+  bool RangesBaseAddress;
 
   DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage,
                 bool IsOptimized, unsigned RuntimeVersion,
                 unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining,
-                bool DebugInfoForProfiling, bool GnuPubnames, ArrayRef<Metadata *> Ops)
+                bool DebugInfoForProfiling, unsigned NameTableKind,
+                bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
       : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
         SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
         RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind),
         DWOId(DWOId), SplitDebugInlining(SplitDebugInlining),
-        DebugInfoForProfiling(DebugInfoForProfiling), GnuPubnames(GnuPubnames) {
+        DebugInfoForProfiling(DebugInfoForProfiling),
+        NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
     assert(Storage != Uniqued);
   }
   ~DICompileUnit() = default;
@@ -1200,14 +1218,16 @@ private:
           DIGlobalVariableExpressionArray GlobalVariables,
           DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
           uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
-          bool GnuPubnames, StorageType Storage, bool ShouldCreate = true) {
-    return getImpl(
-        Context, SourceLanguage, File, getCanonicalMDString(Context, Producer),
-        IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion,
-        getCanonicalMDString(Context, SplitDebugFilename), EmissionKind,
-        EnumTypes.get(), RetainedTypes.get(), GlobalVariables.get(),
-        ImportedEntities.get(), Macros.get(), DWOId, SplitDebugInlining,
-        DebugInfoForProfiling, GnuPubnames, Storage, ShouldCreate);
+          unsigned NameTableKind, bool RangesBaseAddress, StorageType Storage,
+          bool ShouldCreate = true) {
+    return getImpl(Context, SourceLanguage, File,
+                   getCanonicalMDString(Context, Producer), IsOptimized,
+                   getCanonicalMDString(Context, Flags), RuntimeVersion,
+                   getCanonicalMDString(Context, SplitDebugFilename),
+                   EmissionKind, EnumTypes.get(), RetainedTypes.get(),
+                   GlobalVariables.get(), ImportedEntities.get(), Macros.get(),
+                   DWOId, SplitDebugInlining, DebugInfoForProfiling,
+                   NameTableKind, RangesBaseAddress, Storage, ShouldCreate);
   }
   static DICompileUnit *
   getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -1216,17 +1236,17 @@ private:
           unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
           Metadata *GlobalVariables, Metadata *ImportedEntities,
           Metadata *Macros, uint64_t DWOId, bool SplitDebugInlining,
-          bool DebugInfoForProfiling, bool GnuPubnames, StorageType Storage,
-          bool ShouldCreate = true);
+          bool DebugInfoForProfiling, unsigned NameTableKind,
+          bool RangesBaseAddress, StorageType Storage, bool ShouldCreate = true);
 
   TempDICompileUnit cloneImpl() const {
-    return getTemporary(getContext(), getSourceLanguage(), getFile(),
-                        getProducer(), isOptimized(), getFlags(),
-                        getRuntimeVersion(), getSplitDebugFilename(),
-                        getEmissionKind(), getEnumTypes(), getRetainedTypes(),
-                        getGlobalVariables(), getImportedEntities(),
-                        getMacros(), DWOId, getSplitDebugInlining(),
-                        getDebugInfoForProfiling(), getGnuPubnames());
+    return getTemporary(
+        getContext(), getSourceLanguage(), getFile(), getProducer(),
+        isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(),
+        getEmissionKind(), getEnumTypes(), getRetainedTypes(),
+        getGlobalVariables(), getImportedEntities(), getMacros(), DWOId,
+        getSplitDebugInlining(), getDebugInfoForProfiling(), getNameTableKind(),
+        getRangesBaseAddress());
   }
 
 public:
@@ -1242,11 +1262,11 @@ public:
        DIGlobalVariableExpressionArray GlobalVariables,
        DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
        uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
-       bool GnuPubnames),
+       DebugNameTableKind NameTableKind, bool RangesBaseAddress),
       (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
        SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes,
        GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining,
-       DebugInfoForProfiling, GnuPubnames))
+       DebugInfoForProfiling, (unsigned)NameTableKind, RangesBaseAddress))
   DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
       DICompileUnit,
       (unsigned SourceLanguage, Metadata *File, MDString *Producer,
@@ -1254,11 +1274,12 @@ public:
        MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes,
        Metadata *RetainedTypes, Metadata *GlobalVariables,
        Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
-       bool SplitDebugInlining, bool DebugInfoForProfiling, bool GnuPubnames),
+       bool SplitDebugInlining, bool DebugInfoForProfiling,
+       unsigned NameTableKind, bool RangesBaseAddress),
       (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
        SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes,
        GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining,
-       DebugInfoForProfiling, GnuPubnames))
+       DebugInfoForProfiling, NameTableKind, RangesBaseAddress))
 
   TempDICompileUnit clone() const { return cloneImpl(); }
 
@@ -1268,11 +1289,21 @@ public:
   DebugEmissionKind getEmissionKind() const {
     return (DebugEmissionKind)EmissionKind;
   }
+  bool isDebugDirectivesOnly() const {
+    return EmissionKind == DebugDirectivesOnly;
+  }
   bool getDebugInfoForProfiling() const { return DebugInfoForProfiling; }
-  bool getGnuPubnames() const { return GnuPubnames; }
-  StringRef getProducer() const { return getStringOperand(1); }
-  StringRef getFlags() const { return getStringOperand(2); }
-  StringRef getSplitDebugFilename() const { return getStringOperand(3); }
+  DebugNameTableKind getNameTableKind() const {
+    return (DebugNameTableKind)NameTableKind;
+  }
+  bool getRangesBaseAddress() const {
+    return RangesBaseAddress; }
+  StringRef getProducer() const {
+    return getStringOperand(1); }
+  StringRef getFlags() const {
+    return getStringOperand(2); }
+  StringRef getSplitDebugFilename() const {
+    return getStringOperand(3); }
   DICompositeTypeArray getEnumTypes() const {
     return cast_or_null<MDTuple>(getRawEnumTypes());
   }
@@ -1372,19 +1403,20 @@ class DILocation : public MDNode {
   friend class MDNode;
 
   DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
-             unsigned Column, ArrayRef<Metadata *> MDs);
+             unsigned Column, ArrayRef<Metadata *> MDs, bool ImplicitCode);
   ~DILocation() { dropAllReferences(); }
 
   static DILocation *getImpl(LLVMContext &Context, unsigned Line,
                              unsigned Column, Metadata *Scope,
-                             Metadata *InlinedAt, StorageType Storage,
-                             bool ShouldCreate = true);
+                             Metadata *InlinedAt, bool ImplicitCode,
+                             StorageType Storage, bool ShouldCreate = true);
   static DILocation *getImpl(LLVMContext &Context, unsigned Line,
                              unsigned Column, DILocalScope *Scope,
-                             DILocation *InlinedAt, StorageType Storage,
-                             bool ShouldCreate = true) {
+                             DILocation *InlinedAt, bool ImplicitCode,
+                             StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Line, Column, static_cast<Metadata *>(Scope),
-                   static_cast<Metadata *>(InlinedAt), Storage, ShouldCreate);
+                   static_cast<Metadata *>(InlinedAt), ImplicitCode, Storage,
+                   ShouldCreate);
   }
 
   /// With a given unsigned int \p U, use up to 13 bits to represent it.
@@ -1398,6 +1430,9 @@ class DILocation : public MDNode {
 
   /// Reverse transformation as getPrefixEncodingFromUnsigned.
   static unsigned getUnsignedFromPrefixEncoding(unsigned U) {
+    if (U & 1)
+      return 0;
+    U >>= 1;
     return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
   }
 
@@ -1413,7 +1448,15 @@ class DILocation : public MDNode {
     // Get the raw scope/inlinedAt since it is possible to invoke this on
     // a DILocation containing temporary metadata.
     return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
-                        getRawInlinedAt());
+                        getRawInlinedAt(), isImplicitCode());
+  }
+
+  static unsigned encodeComponent(unsigned C) {
+    return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1);
+  }
+
+  static unsigned encodingBits(unsigned C) {
+    return (C == 0) ? 1 : (C > 0x1f ? 14 : 7);
   }
 
 public:
@@ -1422,12 +1465,13 @@ public:
 
   DEFINE_MDNODE_GET(DILocation,
                     (unsigned Line, unsigned Column, Metadata *Scope,
-                     Metadata *InlinedAt = nullptr),
-                    (Line, Column, Scope, InlinedAt))
+                     Metadata *InlinedAt = nullptr, bool ImplicitCode = false),
+                    (Line, Column, Scope, InlinedAt, ImplicitCode))
   DEFINE_MDNODE_GET(DILocation,
                     (unsigned Line, unsigned Column, DILocalScope *Scope,
-                     DILocation *InlinedAt = nullptr),
-                    (Line, Column, Scope, InlinedAt))
+                     DILocation *InlinedAt = nullptr,
+                     bool ImplicitCode = false),
+                    (Line, Column, Scope, InlinedAt, ImplicitCode))
 
   /// Return a (temporary) clone of this.
   TempDILocation clone() const { return cloneImpl(); }
@@ -1440,6 +1484,15 @@ public:
     return cast_or_null<DILocation>(getRawInlinedAt());
   }
 
+  /// Check if the location corresponds to an implicit code.
+  /// When the ImplicitCode flag is true, it means that the Instruction
+  /// with this DILocation has been added by the front-end but it hasn't been
+  /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing
+  /// bracket). It's useful for code coverage to not show a counter on "empty"
+  /// lines.
+  bool isImplicitCode() const { return ImplicitCode; }
+  void setImplicitCode(bool ImplicitCode) { this->ImplicitCode = ImplicitCode; }
+
   DIFile *getFile() const { return getScope()->getFile(); }
   StringRef getFilename() const { return getScope()->getFilename(); }
   StringRef getDirectory() const { return getScope()->getDirectory(); }
@@ -1455,19 +1508,6 @@ public:
     return getScope();
   }
 
-  /// Check whether this can be discriminated from another location.
-  ///
-  /// Check \c this can be discriminated from \c RHS in a linetable entry.
-  /// Scope and inlined-at chains are not recorded in the linetable, so they
-  /// cannot be used to distinguish basic blocks.
-  bool canDiscriminate(const DILocation &RHS) const {
-    return getLine() != RHS.getLine() ||
-           getColumn() != RHS.getColumn() ||
-           getDiscriminator() != RHS.getDiscriminator() ||
-           getFilename() != RHS.getFilename() ||
-           getDirectory() != RHS.getDirectory();
-  }
-
   /// Get the DWARF discriminator.
   ///
   /// DWARF discriminators distinguish identical file locations between
@@ -1489,20 +1529,35 @@ public:
   /// order. If the lowest bit is 1, the current component is empty, and the
   /// next component will start in the next bit. Otherwise, the current
   /// component is non-empty, and its content starts in the next bit. The
-  /// length of each components is either 5 bit or 12 bit: if the 7th bit
+  /// value of each components is either 5 bit or 12 bit: if the 7th bit
   /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
   /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
-  /// represent the component.
+  /// represent the component. Thus, the number of bits used for a component
+  /// is either 0 (if it and all the next components are empty); 1 - if it is
+  /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both
+  /// 0); or 14, if its value is up to and including 0x1ff. Note that the last
+  /// component is also capped at 0x1ff, even in the case when both first
+  /// components are 0, and we'd technically have 29 bits available.
+  ///
+  /// For precise control over the data being encoded in the discriminator,
+  /// use encodeDiscriminator/decodeDiscriminator.
+  ///
+  /// Use {get|set}BaseDiscriminator and cloneWithDuplicationFactor after reading
+  /// their documentation, as their behavior has side-effects.
 
   inline unsigned getDiscriminator() const;
 
   /// Returns a new DILocation with updated \p Discriminator.
   inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
 
-  /// Returns a new DILocation with updated base discriminator \p BD.
-  inline const DILocation *setBaseDiscriminator(unsigned BD) const;
+  /// Returns a new DILocation with updated base discriminator \p BD. Only the
+  /// base discriminator is set in the new DILocation, the other encoded values
+  /// are elided.
+  /// If the discriminator cannot be encoded, the function returns None.
+  inline Optional<const DILocation *> setBaseDiscriminator(unsigned BD) const;
 
-  /// Returns the duplication factor stored in the discriminator.
+  /// Returns the duplication factor stored in the discriminator, or 1 if no
+  /// duplication factor (or 0) is encoded.
   inline unsigned getDuplicationFactor() const;
 
   /// Returns the copy identifier stored in the discriminator.
@@ -1511,11 +1566,11 @@ public:
   /// Returns the base discriminator stored in the discriminator.
   inline unsigned getBaseDiscriminator() const;
 
-  /// Returns a new DILocation with duplication factor \p DF encoded in the
-  /// discriminator.
-  inline const DILocation *cloneWithDuplicationFactor(unsigned DF) const;
-
-  enum { NoGeneratedLocation = false, WithGeneratedLocation = true };
+  /// Returns a new DILocation with duplication factor \p DF * current
+  /// duplication factor encoded in the discriminator. The current duplication
+  /// factor is as defined by getDuplicationFactor().
+  /// Returns None if encoding failed.
+  inline Optional<const DILocation *> cloneWithDuplicationFactor(unsigned DF) const;
 
   /// When two instructions are combined into a single instruction we also
   /// need to combine the original locations into a single location.
@@ -1531,25 +1586,36 @@ public:
   ///
   /// \p GenerateLocation: Whether the merged location can be generated when
   /// \p LocA and \p LocB differ.
-  static const DILocation *
-  getMergedLocation(const DILocation *LocA, const DILocation *LocB,
-                    bool GenerateLocation = NoGeneratedLocation);
+  static const DILocation *getMergedLocation(const DILocation *LocA,
+                                             const DILocation *LocB);
 
   /// Returns the base discriminator for a given encoded discriminator \p D.
   static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) {
-    if ((D & 1) == 0)
-      return getUnsignedFromPrefixEncoding(D >> 1);
-    else
-      return 0;
+    return getUnsignedFromPrefixEncoding(D);
   }
 
-  /// Returns the duplication factor for a given encoded discriminator \p D.
+  /// Raw encoding of the discriminator. APIs such as setBaseDiscriminator or
+  /// cloneWithDuplicationFactor have certain side-effects. This API, in
+  /// conjunction with cloneWithDiscriminator, may be used to encode precisely
+  /// the values provided. \p BD: base discriminator \p DF: duplication factor
+  /// \p CI: copy index
+  /// The return is None if the values cannot be encoded in 32 bits - for
+  /// example, values for BD or DF larger than 12 bits. Otherwise, the return
+  /// is the encoded value.
+  static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI);
+
+  /// Raw decoder for values in an encoded discriminator D.
+  static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
+                                  unsigned &CI);
+
+  /// Returns the duplication factor for a given encoded discriminator \p D, or
+  /// 1 if no value or 0 is encoded.
   static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
     D = getNextComponentInDiscriminator(D);
-    if (D == 0 || (D & 1))
+    unsigned Ret = getUnsignedFromPrefixEncoding(D);
+    if (Ret == 0)
       return 1;
-    else
-      return getUnsignedFromPrefixEncoding(D >> 1);
+    return Ret;
   }
 
   /// Returns the copy identifier for a given encoded discriminator \p D.
@@ -1588,102 +1654,118 @@ class DISubprogram : public DILocalScope {
   /// negative.
   int ThisAdjustment;
 
-  // Virtuality can only assume three values, so we can pack
-  // in 2 bits (none/pure/pure_virtual).
-  unsigned Virtuality : 2;
+public:
+  /// Debug info subprogram flags.
+  enum DISPFlags : uint32_t {
+#define HANDLE_DISP_FLAG(ID, NAME) SPFlag##NAME = ID,
+#define DISP_FLAG_LARGEST_NEEDED
+#include "llvm/IR/DebugInfoFlags.def"
+    SPFlagNonvirtual = SPFlagZero,
+    SPFlagVirtuality = SPFlagVirtual | SPFlagPureVirtual,
+    LLVM_MARK_AS_BITMASK_ENUM(SPFlagLargest)
+  };
 
-  // These are boolean flags so one bit is enough.
-  // MSVC starts a new container field every time the base
-  // type changes so we can't use 'bool' to ensure these bits
-  // are packed.
-  unsigned IsLocalToUnit : 1;
-  unsigned IsDefinition : 1;
-  unsigned IsOptimized : 1;
+  static DISPFlags getFlag(StringRef Flag);
+  static StringRef getFlagString(DISPFlags Flag);
 
-  unsigned Padding : 3;
+  /// Split up a flags bitfield for easier printing.
+  ///
+  /// Split \c Flags into \c SplitFlags, a vector of its components.  Returns
+  /// any remaining (unrecognized) bits.
+  static DISPFlags splitFlags(DISPFlags Flags,
+                              SmallVectorImpl<DISPFlags> &SplitFlags);
+
+  // Helper for converting old bitfields to new flags word.
+  static DISPFlags toSPFlags(bool IsLocalToUnit, bool IsDefinition,
+                             bool IsOptimized,
+                             unsigned Virtuality = SPFlagNonvirtual) {
+    // We're assuming virtuality is the low-order field.
+    static_assert(
+        int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
+            int(SPFlagPureVirtual) == int(dwarf::DW_VIRTUALITY_pure_virtual),
+        "Virtuality constant mismatch");
+    return static_cast<DISPFlags>(
+        (Virtuality & SPFlagVirtuality) |
+        (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
+        (IsDefinition ? SPFlagDefinition : SPFlagZero) |
+        (IsOptimized ? SPFlagOptimized : SPFlagZero));
+  }
 
+private:
   DIFlags Flags;
+  DISPFlags SPFlags;
 
   DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line,
-               unsigned ScopeLine, unsigned Virtuality, unsigned VirtualIndex,
-               int ThisAdjustment, DIFlags Flags, bool IsLocalToUnit,
-               bool IsDefinition, bool IsOptimized, ArrayRef<Metadata *> Ops)
+               unsigned ScopeLine, unsigned VirtualIndex, int ThisAdjustment,
+               DIFlags Flags, DISPFlags SPFlags, ArrayRef<Metadata *> Ops)
       : DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram,
                      Ops),
         Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex),
-        ThisAdjustment(ThisAdjustment), Virtuality(Virtuality),
-        IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition),
-        IsOptimized(IsOptimized), Flags(Flags) {
+        ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags) {
     static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range");
-    assert(Virtuality < 4 && "Virtuality out of range");
   }
   ~DISubprogram() = default;
 
   static DISubprogram *
   getImpl(LLVMContext &Context, DIScopeRef Scope, StringRef Name,
           StringRef LinkageName, DIFile *File, unsigned Line,
-          DISubroutineType *Type, bool IsLocalToUnit, bool IsDefinition,
-          unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality,
+          DISubroutineType *Type, unsigned ScopeLine, DITypeRef ContainingType,
           unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
-          bool IsOptimized, DICompileUnit *Unit,
+          DISPFlags SPFlags, DICompileUnit *Unit,
           DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
           DINodeArray RetainedNodes, DITypeArray ThrownTypes,
           StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
-                   IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
-                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams.get(), Declaration, RetainedNodes.get(),
-                   ThrownTypes.get(), Storage, ShouldCreate);
+                   ScopeLine, ContainingType, VirtualIndex, ThisAdjustment,
+                   Flags, SPFlags, Unit, TemplateParams.get(), Declaration,
+                   RetainedNodes.get(), ThrownTypes.get(), Storage,
+                   ShouldCreate);
   }
-  static DISubprogram *
-  getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
-          MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
-          bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
-          Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-          int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
-          Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
-          Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate = true);
+  static DISubprogram *getImpl(LLVMContext &Context, Metadata *Scope,
+                               MDString *Name, MDString *LinkageName,
+                               Metadata *File, unsigned Line, Metadata *Type,
+                               unsigned ScopeLine, Metadata *ContainingType,
+                               unsigned VirtualIndex, int ThisAdjustment,
+                               DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+                               Metadata *TemplateParams, Metadata *Declaration,
+                               Metadata *RetainedNodes, Metadata *ThrownTypes,
+                               StorageType Storage, bool ShouldCreate = true);
 
   TempDISubprogram cloneImpl() const {
     return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
-                        getFile(), getLine(), getType(), isLocalToUnit(),
-                        isDefinition(), getScopeLine(), getContainingType(),
-                        getVirtuality(), getVirtualIndex(), getThisAdjustment(),
-                        getFlags(), isOptimized(), getUnit(),
-                        getTemplateParams(), getDeclaration(), getRetainedNodes(),
-                        getThrownTypes());
+                        getFile(), getLine(), getType(), getScopeLine(),
+                        getContainingType(), getVirtualIndex(),
+                        getThisAdjustment(), getFlags(), getSPFlags(),
+                        getUnit(), getTemplateParams(), getDeclaration(),
+                        getRetainedNodes(), getThrownTypes());
   }
 
 public:
-  DEFINE_MDNODE_GET(DISubprogram,
-                    (DIScopeRef Scope, StringRef Name, StringRef LinkageName,
-                     DIFile *File, unsigned Line, DISubroutineType *Type,
-                     bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
-                     DITypeRef ContainingType, unsigned Virtuality,
-                     unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
-                     bool IsOptimized, DICompileUnit *Unit,
-                     DITemplateParameterArray TemplateParams = nullptr,
-                     DISubprogram *Declaration = nullptr,
-                     DINodeArray RetainedNodes = nullptr,
-                     DITypeArray ThrownTypes = nullptr),
-                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, ScopeLine, ContainingType, Virtuality,
-                     VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
-                     TemplateParams, Declaration, RetainedNodes, ThrownTypes))
+  DEFINE_MDNODE_GET(
+      DISubprogram,
+      (DIScopeRef Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+       unsigned Line, DISubroutineType *Type, unsigned ScopeLine,
+       DITypeRef ContainingType, unsigned VirtualIndex, int ThisAdjustment,
+       DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
+       DITemplateParameterArray TemplateParams = nullptr,
+       DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
+       DITypeArray ThrownTypes = nullptr),
+      (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
+       VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
+       Declaration, RetainedNodes, ThrownTypes))
+
   DEFINE_MDNODE_GET(
       DISubprogram,
       (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
-       unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
-       unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
-       unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
-       bool IsOptimized, Metadata *Unit, Metadata *TemplateParams = nullptr,
-       Metadata *Declaration = nullptr, Metadata *RetainedNodes = nullptr,
-       Metadata *ThrownTypes = nullptr),
-      (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
-       ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment,
-       Flags, IsOptimized, Unit, TemplateParams, Declaration, RetainedNodes,
-       ThrownTypes))
+       unsigned Line, Metadata *Type, unsigned ScopeLine,
+       Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
+       DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+       Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
+       Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr),
+      (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
+       VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
+       Declaration, RetainedNodes, ThrownTypes))
 
   TempDISubprogram clone() const { return cloneImpl(); }
 
@@ -1696,14 +1778,15 @@ public:
 
 public:
   unsigned getLine() const { return Line; }
-  unsigned getVirtuality() const { return Virtuality; }
+  unsigned getVirtuality() const { return getSPFlags() & SPFlagVirtuality; }
   unsigned getVirtualIndex() const { return VirtualIndex; }
   int getThisAdjustment() const { return ThisAdjustment; }
   unsigned getScopeLine() const { return ScopeLine; }
   DIFlags getFlags() const { return Flags; }
-  bool isLocalToUnit() const { return IsLocalToUnit; }
-  bool isDefinition() const { return IsDefinition; }
-  bool isOptimized() const { return IsOptimized; }
+  DISPFlags getSPFlags() const { return SPFlags; }
+  bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
+  bool isDefinition() const { return getSPFlags() & SPFlagDefinition; }
+  bool isOptimized() const { return getSPFlags() & SPFlagOptimized; }
 
   bool isArtificial() const { return getFlags() & FlagArtificial; }
   bool isPrivate() const {
@@ -1717,6 +1800,9 @@ public:
   }
   bool isExplicit() const { return getFlags() & FlagExplicit; }
   bool isPrototyped() const { return getFlags() & FlagPrototyped; }
+  bool areAllCallsDescribed() const {
+    return getFlags() & FlagAllCallsDescribed;
+  }
   bool isMainSubprogram() const { return getFlags() & FlagMainSubprogram; }
 
   /// Check if this is reference-qualified.
@@ -1953,28 +2039,24 @@ unsigned DILocation::getCopyIdentifier() const {
   return getCopyIdentifierFromDiscriminator(getDiscriminator());
 }
 
-const DILocation *DILocation::setBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *> DILocation::setBaseDiscriminator(unsigned D) const {
   if (D == 0)
     return this;
-  else
-    return cloneWithDiscriminator(getPrefixEncodingFromUnsigned(D) << 1);
+  if (D > 0xfff)
+    return None;
+  return cloneWithDiscriminator(encodeComponent(D));
 }
 
-const DILocation *DILocation::cloneWithDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *> DILocation::cloneWithDuplicationFactor(unsigned DF) const {
   DF *= getDuplicationFactor();
   if (DF <= 1)
     return this;
 
   unsigned BD = getBaseDiscriminator();
-  unsigned CI = getCopyIdentifier() << (DF > 0x1f ? 14 : 7);
-  unsigned D = CI | (getPrefixEncodingFromUnsigned(DF) << 1);
-
-  if (BD == 0)
-    D = (D << 1) | 1;
-  else
-    D = (D << (BD > 0x1f ? 14 : 7)) | (getPrefixEncodingFromUnsigned(BD) << 1);
-
-  return cloneWithDiscriminator(D);
+  unsigned CI = getCopyIdentifier();
+  if (Optional<unsigned> D = encodeDiscriminator(BD, DF, CI))
+    return cloneWithDiscriminator(*D);
+  return None;
 }
 
 class DINamespace : public DIScope {
@@ -2515,30 +2597,30 @@ class DIGlobalVariable : public DIVariable {
         IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
   ~DIGlobalVariable() = default;
 
-  static DIGlobalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
-                                   StringRef Name, StringRef LinkageName,
-                                   DIFile *File, unsigned Line, DITypeRef Type,
-                                   bool IsLocalToUnit, bool IsDefinition,
-                                   DIDerivedType *StaticDataMemberDeclaration,
-                                   uint32_t AlignInBits, StorageType Storage,
-                                   bool ShouldCreate = true) {
+  static DIGlobalVariable *
+  getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
+          StringRef LinkageName, DIFile *File, unsigned Line, DITypeRef Type,
+          bool IsLocalToUnit, bool IsDefinition,
+          DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
+          uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration,
-                   AlignInBits, Storage, ShouldCreate);
+                   cast_or_null<Metadata>(TemplateParams), AlignInBits, Storage,
+                   ShouldCreate);
   }
   static DIGlobalVariable *
   getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
           MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
           bool IsLocalToUnit, bool IsDefinition,
-          Metadata *StaticDataMemberDeclaration, uint32_t AlignInBits,
-          StorageType Storage, bool ShouldCreate = true);
+          Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+          uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true);
 
   TempDIGlobalVariable cloneImpl() const {
     return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
                         getFile(), getLine(), getType(), isLocalToUnit(),
                         isDefinition(), getStaticDataMemberDeclaration(),
-                        getAlignInBits());
+                        getTemplateParams(), getAlignInBits());
   }
 
 public:
@@ -2547,17 +2629,19 @@ public:
                      DIFile *File, unsigned Line, DITypeRef Type,
                      bool IsLocalToUnit, bool IsDefinition,
                      DIDerivedType *StaticDataMemberDeclaration,
-                     uint32_t AlignInBits),
+                     MDTuple *TemplateParams, uint32_t AlignInBits),
                     (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, StaticDataMemberDeclaration, AlignInBits))
+                     IsDefinition, StaticDataMemberDeclaration, TemplateParams,
+                     AlignInBits))
   DEFINE_MDNODE_GET(DIGlobalVariable,
                     (Metadata * Scope, MDString *Name, MDString *LinkageName,
                      Metadata *File, unsigned Line, Metadata *Type,
                      bool IsLocalToUnit, bool IsDefinition,
                      Metadata *StaticDataMemberDeclaration,
-                     uint32_t AlignInBits),
+                     Metadata *TemplateParams, uint32_t AlignInBits),
                     (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, StaticDataMemberDeclaration, AlignInBits))
+                     IsDefinition, StaticDataMemberDeclaration, TemplateParams,
+                     AlignInBits))
 
   TempDIGlobalVariable clone() const { return cloneImpl(); }
 
@@ -2571,6 +2655,8 @@ public:
 
   MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
   Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); }
+  Metadata *getRawTemplateParams() const { return getOperand(7); }
+  MDTuple *getTemplateParams() const { return getOperandAs<MDTuple>(7); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIGlobalVariableKind;
diff --git a/contrib/llvm/include/llvm/IR/DebugLoc.h b/contrib/llvm/include/llvm/IR/DebugLoc.h
index 9f619ffc5c4d..4f0d7f51b5f9 100644
--- a/contrib/llvm/include/llvm/IR/DebugLoc.h
+++ b/contrib/llvm/include/llvm/IR/DebugLoc.h
@@ -78,7 +78,8 @@ namespace llvm {
     ///
     /// FIXME: Remove this.  Users should use DILocation::get().
     static DebugLoc get(unsigned Line, unsigned Col, const MDNode *Scope,
-                        const MDNode *InlinedAt = nullptr);
+                        const MDNode *InlinedAt = nullptr,
+                        bool ImplicitCode = false);
 
     enum { ReplaceLastInlinedAt = true };
     /// Rebuild the entire inlined-at chain for this instruction so that the top of
@@ -112,6 +113,10 @@ namespace llvm {
     /// Return \c this as a bar \a MDNode.
     MDNode *getAsMDNode() const { return Loc; }
 
+    /// Check if the DebugLoc corresponds to an implicit code.
+    bool isImplicitCode() const;
+    void setImplicitCode(bool ImplicitCode);
+
     bool operator==(const DebugLoc &DL) const { return Loc == DL.Loc; }
     bool operator!=(const DebugLoc &DL) const { return Loc != DL.Loc; }
 
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
index 81d4ae84bf01..3a55a7dca7f4 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -101,6 +101,7 @@ private:
   /// Severity gives the severity of the diagnostic.
   const DiagnosticSeverity Severity;
 
+  virtual void anchor();
 public:
   DiagnosticInfo(/* DiagnosticKind */ int Kind, DiagnosticSeverity Severity)
       : Kind(Kind), Severity(Severity) {}
@@ -210,6 +211,7 @@ public:
 };
 
 class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit {
+  virtual void anchor() override;
 public:
   DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize,
                           DiagnosticSeverity Severity = DS_Warning,
@@ -340,7 +342,7 @@ private:
 };
 
 class DiagnosticLocation {
-  StringRef Filename;
+  DIFile *File = nullptr;
   unsigned Line = 0;
   unsigned Column = 0;
 
@@ -349,14 +351,18 @@ public:
   DiagnosticLocation(const DebugLoc &DL);
   DiagnosticLocation(const DISubprogram *SP);
 
-  bool isValid() const { return !Filename.empty(); }
-  StringRef getFilename() const { return Filename; }
+  bool isValid() const { return File; }
+  /// Return the full path to the file.
+  std::string getAbsolutePath() const;
+  /// Return the file name relative to the compilation directory.
+  StringRef getRelativePath() const;
   unsigned getLine() const { return Line; }
   unsigned getColumn() const { return Column; }
 };
 
 /// Common features for diagnostics with an associated location.
 class DiagnosticInfoWithLocationBase : public DiagnosticInfo {
+  virtual void anchor() override;
 public:
   /// \p Fn is the function where the diagnostic is being emitted. \p Loc is
   /// the location information to use in the diagnostic.
@@ -375,9 +381,13 @@ public:
   const std::string getLocationStr() const;
 
   /// Return location information for this diagnostic in three parts:
-  /// the source file name, line number and column.
-  void getLocation(StringRef *Filename, unsigned *Line, unsigned *Column) const;
+  /// the relative source file path, line number and column.
+  void getLocation(StringRef &RelativePath, unsigned &Line,
+                   unsigned &Column) const;
 
+  /// Return the absolute path tot the file.
+  std::string getAbsolutePath() const;
+  
   const Function &getFunction() const { return Fn; }
   DiagnosticLocation getLocation() const { return Loc; }
 
@@ -414,6 +424,7 @@ public:
     Argument(StringRef Key, const Value *V);
     Argument(StringRef Key, const Type *T);
     Argument(StringRef Key, StringRef S);
+    Argument(StringRef Key, const char *S) : Argument(Key, StringRef(S)) {};
     Argument(StringRef Key, int N);
     Argument(StringRef Key, float N);
     Argument(StringRef Key, long N);
@@ -590,6 +601,7 @@ operator<<(RemarkT &R,
 /// Common features for diagnostics dealing with optimization remarks
 /// that are used by IR passes.
 class DiagnosticInfoIROptimization : public DiagnosticInfoOptimizationBase {
+  virtual void anchor() override;
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. \p
   /// RemarkName is a textual identifier for the remark (single-word,
@@ -810,6 +822,7 @@ private:
 /// Diagnostic information for optimization analysis remarks related to
 /// floating-point non-commutativity.
 class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis {
+  virtual void anchor();
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If this name
   /// matches the regular expression given in -Rpass-analysis=, then the
@@ -851,6 +864,7 @@ private:
 /// Diagnostic information for optimization analysis remarks related to
 /// pointer aliasing.
 class OptimizationRemarkAnalysisAliasing : public OptimizationRemarkAnalysis {
+  virtual void anchor();
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If this name
   /// matches the regular expression given in -Rpass-analysis=, then the
diff --git a/contrib/llvm/include/llvm/IR/DomTreeUpdater.h b/contrib/llvm/include/llvm/IR/DomTreeUpdater.h
index 81ba670ac0f5..e5bb092d21ca 100644
--- a/contrib/llvm/include/llvm/IR/DomTreeUpdater.h
+++ b/contrib/llvm/include/llvm/IR/DomTreeUpdater.h
@@ -159,11 +159,9 @@ public:
   void callbackDeleteBB(BasicBlock *DelBB,
                         std::function<void(BasicBlock *)> Callback);
 
-  /// Recalculate all available trees.
-  /// Under Lazy Strategy, available trees will only be recalculated if there
-  /// are pending updates or there is BasicBlock awaiting deletion. Returns true
-  /// if at least one tree is recalculated.
-  bool recalculate(Function &F);
+  /// Recalculate all available trees and flush all BasicBlocks
+  /// awaiting deletion immediately.
+  void recalculate(Function &F);
 
   /// Flush DomTree updates and return DomTree.
   /// It also flush out of date updates applied by all available trees
diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h
index f9e992b0ef0c..f7da47d07663 100644
--- a/contrib/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm/include/llvm/IR/Dominators.h
@@ -37,15 +37,18 @@ extern template class DomTreeNodeBase<BasicBlock>;
 extern template class DominatorTreeBase<BasicBlock, false>; // DomTree
 extern template class DominatorTreeBase<BasicBlock, true>; // PostDomTree
 
+extern template class cfg::Update<BasicBlock *>;
+
 namespace DomTreeBuilder {
 using BBDomTree = DomTreeBase<BasicBlock>;
 using BBPostDomTree = PostDomTreeBase<BasicBlock>;
 
-extern template struct Update<BasicBlock *>;
-
-using BBUpdates = ArrayRef<Update<BasicBlock *>>;
+using BBUpdates = ArrayRef<llvm::cfg::Update<BasicBlock *>>;
 
 extern template void Calculate<BBDomTree>(BBDomTree &DT);
+extern template void CalculateWithUpdates<BBDomTree>(BBDomTree &DT,
+                                                     BBUpdates U);
+
 extern template void Calculate<BBPostDomTree>(BBPostDomTree &DT);
 
 extern template void InsertEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From,
@@ -145,6 +148,9 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
 
   DominatorTree() = default;
   explicit DominatorTree(Function &F) { recalculate(F); }
+  explicit DominatorTree(DominatorTree &DT, DomTreeBuilder::BBUpdates U) {
+    recalculate(*DT.Parent, U);
+  }
 
   /// Handle invalidation explicitly.
   bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -276,94 +282,6 @@ public:
 
   void print(raw_ostream &OS, const Module *M = nullptr) const override;
 };
-
-//===-------------------------------------
-/// Class to defer updates to a DominatorTree.
-///
-/// Definition: Applying updates to every edge insertion and deletion is
-/// expensive and not necessary. When one needs the DominatorTree for analysis
-/// they can request a flush() to perform a larger batch update. This has the
-/// advantage of the DominatorTree inspecting the set of updates to find
-/// duplicates or unnecessary subtree updates.
-///
-/// The scope of DeferredDominance operates at a Function level.
-///
-/// It is not necessary for the user to scrub the updates for duplicates or
-/// updates that point to the same block (Delete, BB_A, BB_A). Performance
-/// can be gained if the caller attempts to batch updates before submitting
-/// to applyUpdates(ArrayRef) in cases where duplicate edge requests will
-/// occur.
-///
-/// It is required for the state of the LLVM IR to be applied *before*
-/// submitting updates. The update routines must analyze the current state
-/// between a pair of (From, To) basic blocks to determine if the update
-/// needs to be queued.
-/// Example (good):
-///     TerminatorInstructionBB->removeFromParent();
-///     DDT->deleteEdge(BB, Successor);
-/// Example (bad):
-///     DDT->deleteEdge(BB, Successor);
-///     TerminatorInstructionBB->removeFromParent();
-class DeferredDominance {
-public:
-  DeferredDominance(DominatorTree &DT_) : DT(DT_) {}
-
-  /// Queues multiple updates and discards duplicates.
-  void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates);
-
-  /// Helper method for a single edge insertion. It's almost always
-  /// better to batch updates and call applyUpdates to quickly remove duplicate
-  /// edges. This is best used when there is only a single insertion needed to
-  /// update Dominators.
-  void insertEdge(BasicBlock *From, BasicBlock *To);
-
-  /// Helper method for a single edge deletion. It's almost always better
-  /// to batch updates and call applyUpdates to quickly remove duplicate edges.
-  /// This is best used when there is only a single deletion needed to update
-  /// Dominators.
-  void deleteEdge(BasicBlock *From, BasicBlock *To);
-
-  /// Delays the deletion of a basic block until a flush() event.
-  void deleteBB(BasicBlock *DelBB);
-
-  /// Returns true if DelBB is awaiting deletion at a flush() event.
-  bool pendingDeletedBB(BasicBlock *DelBB);
-
-  /// Returns true if pending DT updates are queued for a flush() event.
-  bool pending();
-
-  /// Flushes all pending updates and block deletions. Returns a
-  /// correct DominatorTree reference to be used by the caller for analysis.
-  DominatorTree &flush();
-
-  /// Drops all internal state and forces a (slow) recalculation of the
-  /// DominatorTree based on the current state of the LLVM IR in F. This should
-  /// only be used in corner cases such as the Entry block of F being deleted.
-  void recalculate(Function &F);
-
-  /// Debug method to help view the state of pending updates.
-  LLVM_DUMP_METHOD void dump() const;
-
-private:
-  DominatorTree &DT;
-  SmallVector<DominatorTree::UpdateType, 16> PendUpdates;
-  SmallPtrSet<BasicBlock *, 8> DeletedBBs;
-
-  /// Apply an update (Kind, From, To) to the internal queued updates. The
-  /// update is only added when determined to be necessary. Checks for
-  /// self-domination, unnecessary updates, duplicate requests, and balanced
-  /// pairs of requests are all performed. Returns true if the update is
-  /// queued and false if it is discarded.
-  bool applyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
-                   BasicBlock *To);
-
-  /// Performs all pending basic block deletions. We have to defer the deletion
-  /// of these blocks until after the DominatorTree updates are applied. The
-  /// internal workings of the DominatorTree code expect every update's From
-  /// and To blocks to exist and to be a member of the same Function.
-  bool flushDelBB();
-};
-
 } // end namespace llvm
 
 #endif // LLVM_IR_DOMINATORS_H
diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h
index 02e3ecc8e27f..630f47e8bb57 100644
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@@ -120,7 +120,7 @@ private:
   /// function is automatically inserted into the end of the function list for
   /// the module.
   ///
-  Function(FunctionType *Ty, LinkageTypes Linkage,
+  Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
            const Twine &N = "", Module *M = nullptr);
 
 public:
@@ -134,17 +134,31 @@ public:
   const Function &getFunction() const { return *this; }
 
   static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
+                          unsigned AddrSpace, const Twine &N = "",
+                          Module *M = nullptr) {
+    return new Function(Ty, Linkage, AddrSpace, N, M);
+  }
+
+  // TODO: remove this once all users have been updated to pass an AddrSpace
+  static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
                           const Twine &N = "", Module *M = nullptr) {
-    return new Function(Ty, Linkage, N, M);
+    return new Function(Ty, Linkage, static_cast<unsigned>(-1), N, M);
   }
 
+  /// Creates a new function and attaches it to a module.
+  ///
+  /// Places the function in the program address space as specified
+  /// by the module's data layout.
+  static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
+                          const Twine &N, Module &M);
+
   // Provide fast operand accessors.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
   /// Returns the number of non-debug IR instructions in this function.
   /// This is equivalent to the sum of the sizes of each basic block contained
   /// within this function.
-  unsigned getInstructionCount();
+  unsigned getInstructionCount() const;
 
   /// Returns the FunctionType for me.
   FunctionType *getFunctionType() const {
diff --git a/contrib/llvm/include/llvm/IR/GlobalValue.h b/contrib/llvm/include/llvm/IR/GlobalValue.h
index 9d9f4f65a6b5..c07d4051c803 100644
--- a/contrib/llvm/include/llvm/IR/GlobalValue.h
+++ b/contrib/llvm/include/llvm/IR/GlobalValue.h
@@ -189,6 +189,7 @@ public:
   GlobalValue(const GlobalValue &) = delete;
 
   unsigned getAlignment() const;
+  unsigned getAddressSpace() const;
 
   enum class UnnamedAddr {
     None,
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index 70641ba25d2e..fac2ff46c453 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -651,7 +651,7 @@ public:
       ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs,
       const Twine &Name = "");
 
-  // Conveninence function for the common case when CallArgs are filled in using
+  // Convenience function for the common case when CallArgs are filled in using
   // makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be .get()'ed to
   // get the Value *.
   InvokeInst *
@@ -675,32 +675,44 @@ public:
                              Type *ResultType,
                              const Twine &Name = "");
 
+  /// Create a call to intrinsic \p ID with 1 operand which is mangled on its
+  /// type.
+  CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
+                                 Instruction *FMFSource = nullptr,
+                                 const Twine &Name = "");
+
   /// Create a call to intrinsic \p ID with 2 operands which is mangled on the
   /// first type.
-  CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID,
-                                  Value *LHS, Value *RHS,
+  CallInst *CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS,
+                                  Instruction *FMFSource = nullptr,
                                   const Twine &Name = "");
 
-  /// Create a call to intrinsic \p ID with no operands.
-  CallInst *CreateIntrinsic(Intrinsic::ID ID,
-                            Instruction *FMFSource = nullptr,
-                            const Twine &Name = "");
-
-  /// Create a call to intrinsic \p ID with 1 or more operands assuming the
-  /// intrinsic and all operands have the same type. If \p FMFSource is
-  /// provided, copy fast-math-flags from that instruction to the intrinsic.
-  CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Value *> Args,
+  /// Create a call to intrinsic \p ID with \p args, mangled using \p Types. If
+  /// \p FMFSource is provided, copy fast-math-flags from that instruction to
+  /// the intrinsic.
+  CallInst *CreateIntrinsic(Intrinsic::ID ID, ArrayRef<Type *> Types,
+                            ArrayRef<Value *> Args,
                             Instruction *FMFSource = nullptr,
                             const Twine &Name = "");
 
   /// Create call to the minnum intrinsic.
   CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, Name);
+    return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, nullptr, Name);
   }
 
   /// Create call to the maxnum intrinsic.
   CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS, Name);
+    return CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS, nullptr, Name);
+  }
+
+  /// Create call to the minimum intrinsic.
+  CallInst *CreateMinimum(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateBinaryIntrinsic(Intrinsic::minimum, LHS, RHS, nullptr, Name);
+  }
+
+  /// Create call to the maximum intrinsic.
+  CallInst *CreateMaximum(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
   }
 
 private:
@@ -877,19 +889,59 @@ public:
   }
 
   /// Create an invoke instruction.
-  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
-                           BasicBlock *UnwindDest,
+  InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee,
+                           BasicBlock *NormalDest, BasicBlock *UnwindDest,
+                           ArrayRef<Value *> Args,
+                           ArrayRef<OperandBundleDef> OpBundles,
+                           const Twine &Name = "") {
+    return Insert(
+        InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args, OpBundles),
+        Name);
+  }
+  InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee,
+                           BasicBlock *NormalDest, BasicBlock *UnwindDest,
                            ArrayRef<Value *> Args = None,
                            const Twine &Name = "") {
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
+    return Insert(InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args),
                   Name);
   }
+
+  InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, ArrayRef<Value *> Args,
+                           ArrayRef<OperandBundleDef> OpBundles,
+                           const Twine &Name = "") {
+    return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
+                        UnwindDest, Args, OpBundles, Name);
+  }
+
+  InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest,
+                           ArrayRef<Value *> Args = None,
+                           const Twine &Name = "") {
+    return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
+                        UnwindDest, Args, Name);
+  }
+
+  // Deprecated [opaque pointer types]
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest, ArrayRef<Value *> Args,
                            ArrayRef<OperandBundleDef> OpBundles,
                            const Twine &Name = "") {
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
-                                     OpBundles), Name);
+    return CreateInvoke(
+        cast<FunctionType>(
+            cast<PointerType>(Callee->getType())->getElementType()),
+        Callee, NormalDest, UnwindDest, Args, OpBundles, Name);
+  }
+
+  // Deprecated [opaque pointer types]
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest,
+                           ArrayRef<Value *> Args = None,
+                           const Twine &Name = "") {
+    return CreateInvoke(
+        cast<FunctionType>(
+            cast<PointerType>(Callee->getType())->getElementType()),
+        Callee, NormalDest, UnwindDest, Args, Name);
   }
 
   ResumeInst *CreateResume(Value *Exn) {
@@ -1300,22 +1352,35 @@ public:
     return Insert(new AllocaInst(Ty, DL.getAllocaAddrSpace(), ArraySize), Name);
   }
 
-  /// Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
+  /// Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of
   /// converting the string to 'bool' for the isVolatile parameter.
-  LoadInst *CreateLoad(Value *Ptr, const char *Name) {
-    return Insert(new LoadInst(Ptr), Name);
-  }
-
-  LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
-    return Insert(new LoadInst(Ptr), Name);
+  LoadInst *CreateLoad(Type *Ty, Value *Ptr, const char *Name) {
+    return Insert(new LoadInst(Ty, Ptr), Name);
   }
 
   LoadInst *CreateLoad(Type *Ty, Value *Ptr, const Twine &Name = "") {
     return Insert(new LoadInst(Ty, Ptr), Name);
   }
 
+  LoadInst *CreateLoad(Type *Ty, Value *Ptr, bool isVolatile,
+                       const Twine &Name = "") {
+    return Insert(new LoadInst(Ty, Ptr, Twine(), isVolatile), Name);
+  }
+
+  // Deprecated [opaque pointer types]
+  LoadInst *CreateLoad(Value *Ptr, const char *Name) {
+    return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
+  }
+
+  // Deprecated [opaque pointer types]
+  LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
+    return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
+  }
+
+  // Deprecated [opaque pointer types]
   LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
-    return Insert(new LoadInst(Ptr, nullptr, isVolatile), Name);
+    return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, isVolatile,
+                      Name);
   }
 
   StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
@@ -1325,24 +1390,43 @@ public:
   /// Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")'
   /// correctly, instead of converting the string to 'bool' for the isVolatile
   /// parameter.
-  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
-    LoadInst *LI = CreateLoad(Ptr, Name);
+  LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
+                              const char *Name) {
+    LoadInst *LI = CreateLoad(Ty, Ptr, Name);
     LI->setAlignment(Align);
     return LI;
   }
-  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+  LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
                               const Twine &Name = "") {
-    LoadInst *LI = CreateLoad(Ptr, Name);
+    LoadInst *LI = CreateLoad(Ty, Ptr, Name);
     LI->setAlignment(Align);
     return LI;
   }
-  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
-                              const Twine &Name = "") {
-    LoadInst *LI = CreateLoad(Ptr, isVolatile, Name);
+  LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align,
+                              bool isVolatile, const Twine &Name = "") {
+    LoadInst *LI = CreateLoad(Ty, Ptr, isVolatile, Name);
     LI->setAlignment(Align);
     return LI;
   }
 
+  // Deprecated [opaque pointer types]
+  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
+    return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+                             Align, Name);
+  }
+  // Deprecated [opaque pointer types]
+  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+                              const Twine &Name = "") {
+    return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+                             Align, Name);
+  }
+  // Deprecated [opaque pointer types]
+  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
+                              const Twine &Name = "") {
+    return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
+                             Align, isVolatile, Name);
+  }
+
   StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
                                 bool isVolatile = false) {
     StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
@@ -1479,50 +1563,69 @@ public:
     return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
   }
 
-  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+  Value *CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0,
+                            const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (auto *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idx), Name);
+      return Insert(Folder.CreateGetElementPtr(Ty, PC, Idx), Name);
 
-    return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idx), Name);
+    return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
   }
 
-  Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+    return CreateConstGEP1_64(nullptr, Ptr, Idx0, Name);
+  }
+
+  Value *CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0,
                                     const Twine &Name = "") {
     Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
 
     if (auto *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idx), Name);
+      return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idx), Name);
 
-    return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idx), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
   }
 
-  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
-                    const Twine &Name = "") {
+  Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+                                    const Twine &Name = "") {
+    return CreateConstInBoundsGEP1_64(nullptr, Ptr, Idx0, Name);
+  }
+
+  Value *CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                            const Twine &Name = "") {
     Value *Idxs[] = {
       ConstantInt::get(Type::getInt64Ty(Context), Idx0),
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
     if (auto *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(nullptr, PC, Idxs), Name);
+      return Insert(Folder.CreateGetElementPtr(Ty, PC, Idxs), Name);
 
-    return Insert(GetElementPtrInst::Create(nullptr, Ptr, Idxs), Name);
+    return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name);
   }
 
-  Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
-                                    const Twine &Name = "") {
+  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                            const Twine &Name = "") {
+    return CreateConstGEP2_64(nullptr, Ptr, Idx0, Idx1, Name);
+  }
+
+  Value *CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0,
+                                    uint64_t Idx1, const Twine &Name = "") {
     Value *Idxs[] = {
       ConstantInt::get(Type::getInt64Ty(Context), Idx0),
       ConstantInt::get(Type::getInt64Ty(Context), Idx1)
     };
 
     if (auto *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(nullptr, PC, Idxs),
-                    Name);
+      return Insert(Folder.CreateInBoundsGetElementPtr(Ty, PC, Idxs), Name);
 
-    return Insert(GetElementPtrInst::CreateInBounds(nullptr, Ptr, Idxs), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
+  }
+
+  Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                                    const Twine &Name = "") {
+    return CreateConstInBoundsGEP2_64(nullptr, Ptr, Idx0, Idx1, Name);
   }
 
   Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx,
@@ -1868,15 +1971,8 @@ public:
     return Insert(PHINode::Create(Ty, NumReservedValues), Name);
   }
 
-  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
-                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    auto *PTy = cast<PointerType>(Callee->getType());
-    auto *FTy = cast<FunctionType>(PTy->getElementType());
-    return CreateCall(FTy, Callee, Args, Name, FPMathTag);
-  }
-
   CallInst *CreateCall(FunctionType *FTy, Value *Callee,
-                       ArrayRef<Value *> Args, const Twine &Name = "",
+                       ArrayRef<Value *> Args = None, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
     if (isa<FPMathOperator>(CI))
@@ -1884,20 +1980,44 @@ public:
     return Insert(CI, Name);
   }
 
-  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+  CallInst *CreateCall(FunctionType *FTy, Value *Callee, ArrayRef<Value *> Args,
                        ArrayRef<OperandBundleDef> OpBundles,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
+    CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles);
     if (isa<FPMathOperator>(CI))
       CI = cast<CallInst>(setFPAttrs(CI, FPMathTag, FMF));
     return Insert(CI, Name);
   }
 
-  CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
+  CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args = None,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
     return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
   }
 
+  CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
+                       ArrayRef<OperandBundleDef> OpBundles,
+                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    return CreateCall(Callee->getFunctionType(), Callee, Args, OpBundles, Name,
+                      FPMathTag);
+  }
+
+  // Deprecated [opaque pointer types]
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
+                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    return CreateCall(
+        cast<FunctionType>(Callee->getType()->getPointerElementType()), Callee,
+        Args, Name, FPMathTag);
+  }
+
+  // Deprecated [opaque pointer types]
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+                       ArrayRef<OperandBundleDef> OpBundles,
+                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    return CreateCall(
+        cast<FunctionType>(Callee->getType()->getPointerElementType()), Callee,
+        Args, OpBundles, Name, FPMathTag);
+  }
+
   Value *CreateSelect(Value *C, Value *True, Value *False,
                       const Twine &Name = "", Instruction *MDFrom = nullptr) {
     if (auto *CC = dyn_cast<Constant>(C))
@@ -2114,11 +2234,12 @@ public:
 private:
   /// Helper function that creates an assume intrinsic call that
   /// represents an alignment assumption on the provided Ptr, Mask, Type
-  /// and Offset.
+  /// and Offset. It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
                                             Value *PtrValue, Value *Mask,
-                                            Type *IntPtrTy,
-                                            Value *OffsetValue) {
+                                            Type *IntPtrTy, Value *OffsetValue,
+                                            Value **TheCheck) {
     Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
 
     if (OffsetValue) {
@@ -2137,6 +2258,9 @@ private:
     Value *Zero = ConstantInt::get(IntPtrTy, 0);
     Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr");
     Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond");
+    if (TheCheck)
+      *TheCheck = InvCond;
+
     return CreateAssumption(InvCond);
   }
 
@@ -2147,9 +2271,13 @@ public:
   /// An optional offset can be provided, and if it is provided, the offset
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
+  ///
+  /// It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       unsigned Alignment,
-                                      Value *OffsetValue = nullptr) {
+                                      Value *OffsetValue = nullptr,
+                                      Value **TheCheck = nullptr) {
     assert(isa<PointerType>(PtrValue->getType()) &&
            "trying to create an alignment assumption on a non-pointer?");
     auto *PtrTy = cast<PointerType>(PtrValue->getType());
@@ -2157,7 +2285,7 @@ public:
 
     Value *Mask = ConstantInt::get(IntPtrTy, Alignment > 0 ? Alignment - 1 : 0);
     return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
-                                           OffsetValue);
+                                           OffsetValue, TheCheck);
   }
 
   /// Create an assume intrinsic call that represents an alignment
@@ -2167,11 +2295,15 @@ public:
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
   ///
+  /// It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
+  ///
   /// This overload handles the condition where the Alignment is dependent
   /// on an existing value rather than a static value.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       Value *Alignment,
-                                      Value *OffsetValue = nullptr) {
+                                      Value *OffsetValue = nullptr,
+                                      Value **TheCheck = nullptr) {
     assert(isa<PointerType>(PtrValue->getType()) &&
            "trying to create an alignment assumption on a non-pointer?");
     auto *PtrTy = cast<PointerType>(PtrValue->getType());
@@ -2189,7 +2321,7 @@ public:
                                ConstantInt::get(IntPtrTy, 0), "mask");
 
     return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
-                                           OffsetValue);
+                                           OffsetValue, TheCheck);
   }
 };
 
diff --git a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
index e4ac5d4d88a3..75f80567dbd5 100644
--- a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -58,6 +58,22 @@ void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name);
 /// Return true if a pass is for IR printing.
 bool isIRPrintingPass(Pass *P);
 
+/// isFunctionInPrintList - returns true if a function should be printed via
+//  debugging options like -print-after-all/-print-before-all.
+//  Tells if the function IR should be printed by PrinterPass.
+extern bool isFunctionInPrintList(StringRef FunctionName);
+
+/// forcePrintModuleIR - returns true if IR printing passes should
+//  be printing module IR (even for local-pass printers e.g. function-pass)
+//  to provide more context, as enabled by debugging option -print-module-scope
+//  Tells if IR printer should be printing module IR
+extern bool forcePrintModuleIR();
+
+extern bool shouldPrintBeforePass();
+extern bool shouldPrintBeforePass(StringRef);
+extern bool shouldPrintAfterPass();
+extern bool shouldPrintAfterPass(StringRef);
+
 /// Pass for printing a Module as LLVM's text IR assembly.
 ///
 /// Note: This pass is for use with the new pass manager. Use the create...Pass
diff --git a/contrib/llvm/include/llvm/IR/InstVisitor.h b/contrib/llvm/include/llvm/IR/InstVisitor.h
index 65074025a083..c5b4c6f71d7d 100644
--- a/contrib/llvm/include/llvm/IR/InstVisitor.h
+++ b/contrib/llvm/include/llvm/IR/InstVisitor.h
@@ -166,15 +166,6 @@ public:
   // Specific Instruction type classes... note that all of the casts are
   // necessary because we use the instruction classes as opaque types...
   //
-  RetTy visitReturnInst(ReturnInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitBranchInst(BranchInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitSwitchInst(SwitchInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitIndirectBrInst(IndirectBrInst &I)    { DELEGATE(TerminatorInst);}
-  RetTy visitResumeInst(ResumeInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitUnreachableInst(UnreachableInst &I)  { DELEGATE(TerminatorInst);}
-  RetTy visitCleanupReturnInst(CleanupReturnInst &I) { DELEGATE(TerminatorInst);}
-  RetTy visitCatchReturnInst(CatchReturnInst &I)  { DELEGATE(TerminatorInst); }
-  RetTy visitCatchSwitchInst(CatchSwitchInst &I)  { DELEGATE(TerminatorInst);}
   RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
@@ -211,10 +202,12 @@ public:
   RetTy visitCatchPadInst(CatchPadInst &I)     { DELEGATE(FuncletPadInst); }
 
   // Handle the special instrinsic instruction classes.
-  RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgInfoIntrinsic);}
-  RetTy visitDbgValueInst(DbgValueInst &I)        { DELEGATE(DbgInfoIntrinsic);}
+  RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgVariableIntrinsic);}
+  RetTy visitDbgValueInst(DbgValueInst &I)        { DELEGATE(DbgVariableIntrinsic);}
+  RetTy visitDbgVariableIntrinsic(DbgVariableIntrinsic &I)
+                                                  { DELEGATE(DbgInfoIntrinsic);}
   RetTy visitDbgLabelInst(DbgLabelInst &I)        { DELEGATE(DbgInfoIntrinsic);}
-  RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { DELEGATE(IntrinsicInst); }
+  RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
   RetTy visitMemSetInst(MemSetInst &I)            { DELEGATE(MemIntrinsic); }
   RetTy visitMemCpyInst(MemCpyInst &I)            { DELEGATE(MemTransferInst); }
   RetTy visitMemMoveInst(MemMoveInst &I)          { DELEGATE(MemTransferInst); }
@@ -234,27 +227,64 @@ public:
     return static_cast<SubClass*>(this)->visitCallSite(&I);
   }
 
+  // While terminators don't have a distinct type modeling them, we support
+  // intercepting them with dedicated a visitor callback.
+  RetTy visitReturnInst(ReturnInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitBranchInst(BranchInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitSwitchInst(SwitchInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitIndirectBrInst(IndirectBrInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitResumeInst(ResumeInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitUnreachableInst(UnreachableInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitCleanupReturnInst(CleanupReturnInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitCatchReturnInst(CatchReturnInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitCatchSwitchInst(CatchSwitchInst &I) {
+    return static_cast<SubClass *>(this)->visitTerminator(I);
+  }
+  RetTy visitTerminator(Instruction &I)    { DELEGATE(Instruction);}
+
   // Next level propagators: If the user does not overload a specific
   // instruction type, they can overload one of these to get the whole class
   // of instructions...
   //
   RetTy visitCastInst(CastInst &I)                { DELEGATE(UnaryInstruction);}
+  RetTy visitUnaryOperator(UnaryOperator &I)      { DELEGATE(UnaryInstruction);}
   RetTy visitBinaryOperator(BinaryOperator &I)    { DELEGATE(Instruction);}
   RetTy visitCmpInst(CmpInst &I)                  { DELEGATE(Instruction);}
-  RetTy visitTerminatorInst(TerminatorInst &I)    { DELEGATE(Instruction);}
   RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);}
 
-  // Provide a special visitor for a 'callsite' that visits both calls and
-  // invokes. When unimplemented, properly delegates to either the terminator or
-  // regular instruction visitor.
+  // The next level delegation for `CallBase` is slightly more complex in order
+  // to support visiting cases where the call is also a terminator.
+  RetTy visitCallBase(CallBase &I) {
+    if (isa<InvokeInst>(I))
+      return static_cast<SubClass *>(this)->visitTerminator(I);
+
+    DELEGATE(Instruction);
+  }
+
+  // Provide a legacy visitor for a 'callsite' that visits both calls and
+  // invokes.
+  //
+  // Prefer overriding the type system based `CallBase` instead.
   RetTy visitCallSite(CallSite CS) {
     assert(CS);
     Instruction &I = *CS.getInstruction();
-    if (CS.isCall())
-      DELEGATE(Instruction);
-
-    assert(CS.isInvoke());
-    DELEGATE(TerminatorInst);
+    DELEGATE(CallBase);
   }
 
   // If the user wants a 'default' case, they can choose to override this
diff --git a/contrib/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm/include/llvm/IR/InstrTypes.h
index ad0012048ac9..3f384a6ee40c 100644
--- a/contrib/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm/include/llvm/IR/InstrTypes.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
@@ -45,233 +46,9 @@
 
 namespace llvm {
 
-//===----------------------------------------------------------------------===//
-//                            TerminatorInst Class
-//===----------------------------------------------------------------------===//
-
-/// Subclasses of this class are all able to terminate a basic
-/// block. Thus, these are all the flow control type of operations.
-///
-class TerminatorInst : public Instruction {
-protected:
-  TerminatorInst(Type *Ty, Instruction::TermOps iType,
-                 Use *Ops, unsigned NumOps,
-                 Instruction *InsertBefore = nullptr)
-    : Instruction(Ty, iType, Ops, NumOps, InsertBefore) {}
-
-  TerminatorInst(Type *Ty, Instruction::TermOps iType,
-                 Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd)
-    : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {}
-
-public:
-  /// Return the number of successors that this terminator has.
-  unsigned getNumSuccessors() const;
-
-  /// Return the specified successor.
-  BasicBlock *getSuccessor(unsigned idx) const;
-
-  /// Update the specified successor to point at the provided block.
-  void setSuccessor(unsigned idx, BasicBlock *B);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Instruction *I) {
-    return I->isTerminator();
-  }
-  static bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-
-  // Returns true if this terminator relates to exception handling.
-  bool isExceptional() const {
-    switch (getOpcode()) {
-    case Instruction::CatchSwitch:
-    case Instruction::CatchRet:
-    case Instruction::CleanupRet:
-    case Instruction::Invoke:
-    case Instruction::Resume:
-      return true;
-    default:
-      return false;
-    }
-  }
-
-  //===--------------------------------------------------------------------===//
-  // succ_iterator definition
-  //===--------------------------------------------------------------------===//
-
-  template <class Term, class BB> // Successor Iterator
-  class SuccIterator : public std::iterator<std::random_access_iterator_tag, BB,
-                                            int, BB *, BB *> {
-    using super =
-        std::iterator<std::random_access_iterator_tag, BB, int, BB *, BB *>;
-
-  public:
-    using pointer = typename super::pointer;
-    using reference = typename super::reference;
-
-  private:
-    Term TermInst;
-    unsigned idx;
-    using Self = SuccIterator<Term, BB>;
-
-    inline bool index_is_valid(unsigned idx) {
-      return idx < TermInst->getNumSuccessors();
-    }
-
-    /// Proxy object to allow write access in operator[]
-    class SuccessorProxy {
-      Self it;
-
-    public:
-      explicit SuccessorProxy(const Self &it) : it(it) {}
-
-      SuccessorProxy(const SuccessorProxy &) = default;
-
-      SuccessorProxy &operator=(SuccessorProxy r) {
-        *this = reference(r);
-        return *this;
-      }
-
-      SuccessorProxy &operator=(reference r) {
-        it.TermInst->setSuccessor(it.idx, r);
-        return *this;
-      }
-
-      operator reference() const { return *it; }
-    };
-
-  public:
-    // begin iterator
-    explicit inline SuccIterator(Term T) : TermInst(T), idx(0) {}
-    // end iterator
-    inline SuccIterator(Term T, bool) : TermInst(T) {
-      if (TermInst)
-        idx = TermInst->getNumSuccessors();
-      else
-        // Term == NULL happens, if a basic block is not fully constructed and
-        // consequently getTerminator() returns NULL. In this case we construct
-        // a SuccIterator which describes a basic block that has zero
-        // successors.
-        // Defining SuccIterator for incomplete and malformed CFGs is especially
-        // useful for debugging.
-        idx = 0;
-    }
-
-    /// This is used to interface between code that wants to
-    /// operate on terminator instructions directly.
-    unsigned getSuccessorIndex() const { return idx; }
-
-    inline bool operator==(const Self &x) const { return idx == x.idx; }
-    inline bool operator!=(const Self &x) const { return !operator==(x); }
-
-    inline reference operator*() const { return TermInst->getSuccessor(idx); }
-    inline pointer operator->() const { return operator*(); }
-
-    inline Self &operator++() {
-      ++idx;
-      return *this;
-    } // Preincrement
-
-    inline Self operator++(int) { // Postincrement
-      Self tmp = *this;
-      ++*this;
-      return tmp;
-    }
-
-    inline Self &operator--() {
-      --idx;
-      return *this;
-    }                             // Predecrement
-    inline Self operator--(int) { // Postdecrement
-      Self tmp = *this;
-      --*this;
-      return tmp;
-    }
-
-    inline bool operator<(const Self &x) const {
-      assert(TermInst == x.TermInst &&
-             "Cannot compare iterators of different blocks!");
-      return idx < x.idx;
-    }
-
-    inline bool operator<=(const Self &x) const {
-      assert(TermInst == x.TermInst &&
-             "Cannot compare iterators of different blocks!");
-      return idx <= x.idx;
-    }
-    inline bool operator>=(const Self &x) const {
-      assert(TermInst == x.TermInst &&
-             "Cannot compare iterators of different blocks!");
-      return idx >= x.idx;
-    }
-
-    inline bool operator>(const Self &x) const {
-      assert(TermInst == x.TermInst &&
-             "Cannot compare iterators of different blocks!");
-      return idx > x.idx;
-    }
-
-    inline Self &operator+=(int Right) {
-      unsigned new_idx = idx + Right;
-      assert(index_is_valid(new_idx) && "Iterator index out of bound");
-      idx = new_idx;
-      return *this;
-    }
-
-    inline Self operator+(int Right) const {
-      Self tmp = *this;
-      tmp += Right;
-      return tmp;
-    }
-
-    inline Self &operator-=(int Right) { return operator+=(-Right); }
-
-    inline Self operator-(int Right) const { return operator+(-Right); }
-
-    inline int operator-(const Self &x) const {
-      assert(TermInst == x.TermInst &&
-             "Cannot work on iterators of different blocks!");
-      int distance = idx - x.idx;
-      return distance;
-    }
-
-    inline SuccessorProxy operator[](int offset) {
-      Self tmp = *this;
-      tmp += offset;
-      return SuccessorProxy(tmp);
-    }
-
-    /// Get the source BB of this iterator.
-    inline BB *getSource() {
-      assert(TermInst && "Source not available, if basic block was malformed");
-      return TermInst->getParent();
-    }
-  };
-
-  using succ_iterator = SuccIterator<TerminatorInst *, BasicBlock>;
-  using succ_const_iterator =
-      SuccIterator<const TerminatorInst *, const BasicBlock>;
-  using succ_range = iterator_range<succ_iterator>;
-  using succ_const_range = iterator_range<succ_const_iterator>;
-
-private:
-  inline succ_iterator succ_begin() { return succ_iterator(this); }
-  inline succ_const_iterator succ_begin() const {
-    return succ_const_iterator(this);
-  }
-  inline succ_iterator succ_end() { return succ_iterator(this, true); }
-  inline succ_const_iterator succ_end() const {
-    return succ_const_iterator(this, true);
-  }
-
-public:
-  inline succ_range successors() {
-    return succ_range(succ_begin(), succ_end());
-  }
-  inline succ_const_range successors() const {
-    return succ_const_range(succ_begin(), succ_end());
-  }
-};
+namespace Intrinsic {
+enum ID : unsigned;
+}
 
 //===----------------------------------------------------------------------===//
 //                          UnaryInstruction Class
@@ -536,22 +313,6 @@ public:
   static BinaryOperator *CreateNot(Value *Op, const Twine &Name,
                                    BasicBlock *InsertAtEnd);
 
-  /// Check if the given Value is a NEG, FNeg, or NOT instruction.
-  ///
-  static bool isNeg(const Value *V);
-  static bool isFNeg(const Value *V, bool IgnoreZeroSign=false);
-  static bool isNot(const Value *V);
-
-  /// Helper functions to extract the unary argument of a NEG, FNEG or NOT
-  /// operation implemented via Sub, FSub, or Xor.
-  ///
-  static const Value *getNegArgument(const Value *BinOp);
-  static       Value *getNegArgument(      Value *BinOp);
-  static const Value *getFNegArgument(const Value *BinOp);
-  static       Value *getFNegArgument(      Value *BinOp);
-  static const Value *getNotArgument(const Value *BinOp);
-  static       Value *getNotArgument(      Value *BinOp);
-
   BinaryOps getOpcode() const {
     return static_cast<BinaryOps>(Instruction::getOpcode());
   }
@@ -921,7 +682,8 @@ public:
 protected:
   CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
           Value *LHS, Value *RHS, const Twine &Name = "",
-          Instruction *InsertBefore = nullptr);
+          Instruction *InsertBefore = nullptr,
+          Instruction *FlagsSource = nullptr);
 
   CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
           Value *LHS, Value *RHS, const Twine &Name,
@@ -1147,76 +909,6 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
 
-//===----------------------------------------------------------------------===//
-//                           FuncletPadInst Class
-//===----------------------------------------------------------------------===//
-class FuncletPadInst : public Instruction {
-private:
-  FuncletPadInst(const FuncletPadInst &CPI);
-
-  explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
-                          ArrayRef<Value *> Args, unsigned Values,
-                          const Twine &NameStr, Instruction *InsertBefore);
-  explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
-                          ArrayRef<Value *> Args, unsigned Values,
-                          const Twine &NameStr, BasicBlock *InsertAtEnd);
-
-  void init(Value *ParentPad, ArrayRef<Value *> Args, const Twine &NameStr);
-
-protected:
-  // Note: Instruction needs to be a friend here to call cloneImpl.
-  friend class Instruction;
-  friend class CatchPadInst;
-  friend class CleanupPadInst;
-
-  FuncletPadInst *cloneImpl() const;
-
-public:
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// getNumArgOperands - Return the number of funcletpad arguments.
-  ///
-  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
-
-  /// Convenience accessors
-
-  /// Return the outer EH-pad this funclet is nested within.
-  ///
-  /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
-  /// is a CatchPadInst.
-  Value *getParentPad() const { return Op<-1>(); }
-  void setParentPad(Value *ParentPad) {
-    assert(ParentPad);
-    Op<-1>() = ParentPad;
-  }
-
-  /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument.
-  ///
-  Value *getArgOperand(unsigned i) const { return getOperand(i); }
-  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
-
-  /// arg_operands - iteration adapter for range-for loops.
-  op_range arg_operands() { return op_range(op_begin(), op_end() - 1); }
-
-  /// arg_operands - iteration adapter for range-for loops.
-  const_op_range arg_operands() const {
-    return const_op_range(op_begin(), op_end() - 1);
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Instruction *I) { return I->isFuncletPad(); }
-  static bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<FuncletPadInst>
-    : public VariadicOperandTraits<FuncletPadInst, /*MINARITY=*/1> {};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
-
 /// A lightweight accessor for an operand bundle meant to be passed
 /// around by value.
 struct OperandBundleUse {
@@ -1301,54 +993,609 @@ public:
 using OperandBundleDef = OperandBundleDefT<Value *>;
 using ConstOperandBundleDef = OperandBundleDefT<const Value *>;
 
-/// A mixin to add operand bundle functionality to llvm instruction
-/// classes.
-///
-/// OperandBundleUser uses the descriptor area co-allocated with the host User
-/// to store some meta information about which operands are "normal" operands,
-/// and which ones belong to some operand bundle.
-///
-/// The layout of an operand bundle user is
-///
-///          +-----------uint32_t End-------------------------------------+
-///          |                                                            |
-///          |  +--------uint32_t Begin--------------------+              |
-///          |  |                                          |              |
-///          ^  ^                                          v              v
-///  |------|------|----|----|----|----|----|---------|----|---------|----|-----
-///  | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un
-///  |------|------|----|----|----|----|----|---------|----|---------|----|-----
-///   v  v                                  ^              ^
-///   |  |                                  |              |
-///   |  +--------uint32_t Begin------------+              |
-///   |                                                    |
-///   +-----------uint32_t End-----------------------------+
-///
-///
-/// BOI0, BOI1 ... are descriptions of operand bundles in this User's use list.
-/// These descriptions are installed and managed by this class, and they're all
-/// instances of OperandBundleUser<T>::BundleOpInfo.
-///
-/// DU is an additional descriptor installed by User's 'operator new' to keep
-/// track of the 'BOI0 ... BOIN' co-allocation.  OperandBundleUser does not
-/// access or modify DU in any way, it's an implementation detail private to
-/// User.
-///
-/// The regular Use& vector for the User starts at U0.  The operand bundle uses
-/// are part of the Use& vector, just like normal uses.  In the diagram above,
-/// the operand bundle uses start at BOI0_U0.  Each instance of BundleOpInfo has
-/// information about a contiguous set of uses constituting an operand bundle,
-/// and the total set of operand bundle uses themselves form a contiguous set of
-/// uses (i.e. there are no gaps between uses corresponding to individual
-/// operand bundles).
+//===----------------------------------------------------------------------===//
+//                               CallBase Class
+//===----------------------------------------------------------------------===//
+
+/// Base class for all callable instructions (InvokeInst and CallInst)
+/// Holds everything related to calling a function.
 ///
-/// This class does not know the location of the set of operand bundle uses
-/// within the use list -- that is decided by the User using this class via the
-/// BeginIdx argument in populateBundleOperandInfos.
+/// All call-like instructions are required to use a common operand layout:
+/// - Zero or more arguments to the call,
+/// - Zero or more operand bundles with zero or more operand inputs each
+///   bundle,
+/// - Zero or more subclass controlled operands
+/// - The called function.
 ///
-/// Currently operand bundle users with hung-off operands are not supported.
-template <typename InstrTy, typename OpIteratorTy> class OperandBundleUser {
+/// This allows this base class to easily access the called function and the
+/// start of the arguments without knowing how many other operands a particular
+/// subclass requires. Note that accessing the end of the argument list isn't
+/// as cheap as most other operations on the base class.
+class CallBase : public Instruction {
+protected:
+  /// The last operand is the called operand.
+  static constexpr int CalledOperandOpEndIdx = -1;
+
+  AttributeList Attrs; ///< parameter attributes for callable
+  FunctionType *FTy;
+
+  template <class... ArgsTy>
+  CallBase(AttributeList const &A, FunctionType *FT, ArgsTy &&... Args)
+      : Instruction(std::forward<ArgsTy>(Args)...), Attrs(A), FTy(FT) {}
+
+  using Instruction::Instruction;
+
+  bool hasDescriptor() const { return Value::HasDescriptor; }
+
+  unsigned getNumSubclassExtraOperands() const {
+    switch (getOpcode()) {
+    case Instruction::Call:
+      return 0;
+    case Instruction::Invoke:
+      return 2;
+    }
+    llvm_unreachable("Invalid opcode!");
+  }
+
 public:
+  using Instruction::getContext;
+
+  static bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Call ||
+           I->getOpcode() == Instruction::Invoke;
+  }
+  static bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+  FunctionType *getFunctionType() const { return FTy; }
+
+  void mutateFunctionType(FunctionType *FTy) {
+    Value::mutateType(FTy->getReturnType());
+    this->FTy = FTy;
+  }
+
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// data_operands_begin/data_operands_end - Return iterators iterating over
+  /// the call / invoke argument list and bundle operands.  For invokes, this is
+  /// the set of instruction operands except the invoke target and the two
+  /// successor blocks; and for calls this is the set of instruction operands
+  /// except the call target.
+  User::op_iterator data_operands_begin() { return op_begin(); }
+  User::const_op_iterator data_operands_begin() const {
+    return const_cast<CallBase *>(this)->data_operands_begin();
+  }
+  User::op_iterator data_operands_end() {
+    // Walk from the end of the operands over the called operand and any
+    // subclass operands.
+    return op_end() - getNumSubclassExtraOperands() - 1;
+  }
+  User::const_op_iterator data_operands_end() const {
+    return const_cast<CallBase *>(this)->data_operands_end();
+  }
+  iterator_range<User::op_iterator> data_ops() {
+    return make_range(data_operands_begin(), data_operands_end());
+  }
+  iterator_range<User::const_op_iterator> data_ops() const {
+    return make_range(data_operands_begin(), data_operands_end());
+  }
+  bool data_operands_empty() const {
+    return data_operands_end() == data_operands_begin();
+  }
+  unsigned data_operands_size() const {
+    return std::distance(data_operands_begin(), data_operands_end());
+  }
+
+  bool isDataOperand(const Use *U) const {
+    assert(this == U->getUser() &&
+           "Only valid to query with a use of this instruction!");
+    return data_operands_begin() <= U && U < data_operands_end();
+  }
+  bool isDataOperand(Value::const_user_iterator UI) const {
+    return isDataOperand(&UI.getUse());
+  }
+
+  /// Return the iterator pointing to the beginning of the argument list.
+  User::op_iterator arg_begin() { return op_begin(); }
+  User::const_op_iterator arg_begin() const {
+    return const_cast<CallBase *>(this)->arg_begin();
+  }
+
+  /// Return the iterator pointing to the end of the argument list.
+  User::op_iterator arg_end() {
+    // From the end of the data operands, walk backwards past the bundle
+    // operands.
+    return data_operands_end() - getNumTotalBundleOperands();
+  }
+  User::const_op_iterator arg_end() const {
+    return const_cast<CallBase *>(this)->arg_end();
+  }
+
+  /// Iteration adapter for range-for loops.
+  iterator_range<User::op_iterator> args() {
+    return make_range(arg_begin(), arg_end());
+  }
+  iterator_range<User::const_op_iterator> args() const {
+    return make_range(arg_begin(), arg_end());
+  }
+  bool arg_empty() const { return arg_end() == arg_begin(); }
+  unsigned arg_size() const { return arg_end() - arg_begin(); }
+
+  // Legacy API names that duplicate the above and will be removed once users
+  // are migrated.
+  iterator_range<User::op_iterator> arg_operands() {
+    return make_range(arg_begin(), arg_end());
+  }
+  iterator_range<User::const_op_iterator> arg_operands() const {
+    return make_range(arg_begin(), arg_end());
+  }
+  unsigned getNumArgOperands() const { return arg_size(); }
+
+  Value *getArgOperand(unsigned i) const {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperand(i);
+  }
+
+  void setArgOperand(unsigned i, Value *v) {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    setOperand(i, v);
+  }
+
+  /// Wrappers for getting the \c Use of a call argument.
+  const Use &getArgOperandUse(unsigned i) const {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return User::getOperandUse(i);
+  }
+  Use &getArgOperandUse(unsigned i) {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return User::getOperandUse(i);
+  }
+
+  bool isArgOperand(const Use *U) const {
+    assert(this == U->getUser() &&
+           "Only valid to query with a use of this instruction!");
+    return arg_begin() <= U && U < arg_end();
+  }
+  bool isArgOperand(Value::const_user_iterator UI) const {
+    return isArgOperand(&UI.getUse());
+  }
+
+  /// Returns true if this CallSite passes the given Value* as an argument to
+  /// the called function.
+  bool hasArgument(const Value *V) const {
+    return llvm::any_of(args(), [V](const Value *Arg) { return Arg == V; });
+  }
+
+  Value *getCalledOperand() const { return Op<CalledOperandOpEndIdx>(); }
+
+  // DEPRECATED: This routine will be removed in favor of `getCalledOperand` in
+  // the near future.
+  Value *getCalledValue() const { return getCalledOperand(); }
+
+  const Use &getCalledOperandUse() const { return Op<CalledOperandOpEndIdx>(); }
+  Use &getCalledOperandUse() { return Op<CalledOperandOpEndIdx>(); }
+
+  /// Returns the function called, or null if this is an
+  /// indirect function invocation.
+  Function *getCalledFunction() const {
+    return dyn_cast_or_null<Function>(getCalledOperand());
+  }
+
+  /// Return true if the callsite is an indirect call.
+  bool isIndirectCall() const;
+
+  /// Determine whether the passed iterator points to the callee operand's Use.
+  bool isCallee(Value::const_user_iterator UI) const {
+    return isCallee(&UI.getUse());
+  }
+
+  /// Determine whether this Use is the callee operand's Use.
+  bool isCallee(const Use *U) const { return &getCalledOperandUse() == U; }
+
+  /// Helper to get the caller (the parent function).
+  Function *getCaller();
+  const Function *getCaller() const {
+    return const_cast<CallBase *>(this)->getCaller();
+  }
+
+  /// Returns the intrinsic ID of the intrinsic called or
+  /// Intrinsic::not_intrinsic if the called function is not an intrinsic, or if
+  /// this is an indirect call.
+  Intrinsic::ID getIntrinsicID() const;
+
+  void setCalledOperand(Value *V) { Op<CalledOperandOpEndIdx>() = V; }
+
+  /// Sets the function called, including updating the function type.
+  void setCalledFunction(Value *Fn) {
+    setCalledFunction(
+        cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
+        Fn);
+  }
+
+  /// Sets the function called, including updating to the specified function
+  /// type.
+  void setCalledFunction(FunctionType *FTy, Value *Fn) {
+    this->FTy = FTy;
+    assert(FTy == cast<FunctionType>(
+                      cast<PointerType>(Fn->getType())->getElementType()));
+    setCalledOperand(Fn);
+  }
+
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 2);
+  }
+
+  void setCallingConv(CallingConv::ID CC) {
+    auto ID = static_cast<unsigned>(CC);
+    assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
+                               (ID << 2));
+  }
+
+  /// \name Attribute API
+  ///
+  /// These methods access and modify attributes on this call (including
+  /// looking through to the attributes on the called function when necessary).
+  ///@{
+
+  /// Return the parameter attributes for this call.
+  ///
+  AttributeList getAttributes() const { return Attrs; }
+
+  /// Set the parameter attributes for this call.
+  ///
+  void setAttributes(AttributeList A) { Attrs = A; }
+
+  /// Determine whether this call has the given attribute.
+  bool hasFnAttr(Attribute::AttrKind Kind) const {
+    assert(Kind != Attribute::NoBuiltin &&
+           "Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin");
+    return hasFnAttrImpl(Kind);
+  }
+
+  /// Determine whether this call has the given attribute.
+  bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
+
+  /// adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attribute::AttrKind Kind) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addAttribute(getContext(), i, Kind);
+    setAttributes(PAL);
+  }
+
+  /// adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attribute Attr) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addAttribute(getContext(), i, Attr);
+    setAttributes(PAL);
+  }
+
+  /// Adds the attribute to the indicated argument
+  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
+    setAttributes(PAL);
+  }
+
+  /// Adds the attribute to the indicated argument
+  void addParamAttr(unsigned ArgNo, Attribute Attr) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
+    setAttributes(PAL);
+  }
+
+  /// removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeAttribute(getContext(), i, Kind);
+    setAttributes(PAL);
+  }
+
+  /// removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, StringRef Kind) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeAttribute(getContext(), i, Kind);
+    setAttributes(PAL);
+  }
+
+  /// Removes the attribute from the given argument
+  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
+    setAttributes(PAL);
+  }
+
+  /// Removes the attribute from the given argument
+  void removeParamAttr(unsigned ArgNo, StringRef Kind) {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    AttributeList PAL = getAttributes();
+    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
+    setAttributes(PAL);
+  }
+
+  /// adds the dereferenceable attribute to the list of attributes.
+  void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
+    setAttributes(PAL);
+  }
+
+  /// adds the dereferenceable_or_null attribute to the list of
+  /// attributes.
+  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+    AttributeList PAL = getAttributes();
+    PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+    setAttributes(PAL);
+  }
+
+  /// Determine whether the return value has the given attribute.
+  bool hasRetAttr(Attribute::AttrKind Kind) const;
+
+  /// Determine whether the argument or parameter has the given attribute.
+  bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
+
+  /// Get the attribute of a given kind at a position.
+  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
+    return getAttributes().getAttribute(i, Kind);
+  }
+
+  /// Get the attribute of a given kind at a position.
+  Attribute getAttribute(unsigned i, StringRef Kind) const {
+    return getAttributes().getAttribute(i, Kind);
+  }
+
+  /// Get the attribute of a given kind from a given arg
+  Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    return getAttributes().getParamAttr(ArgNo, Kind);
+  }
+
+  /// Get the attribute of a given kind from a given arg
+  Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
+    assert(ArgNo < getNumArgOperands() && "Out of bounds");
+    return getAttributes().getParamAttr(ArgNo, Kind);
+  }
+
+  /// Return true if the data operand at index \p i has the attribute \p
+  /// A.
+  ///
+  /// Data operands include call arguments and values used in operand bundles,
+  /// but does not include the callee operand.  This routine dispatches to the
+  /// underlying AttributeList or the OperandBundleUser as appropriate.
+  ///
+  /// The index \p i is interpreted as
+  ///
+  ///  \p i == Attribute::ReturnIndex  -> the return value
+  ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
+  ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
+  ///     (\p i - 1) in the operand list.
+  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
+    // Note that we have to add one because `i` isn't zero-indexed.
+    assert(i < (getNumArgOperands() + getNumTotalBundleOperands() + 1) &&
+           "Data operand index out of bounds!");
+
+    // The attribute A can either be directly specified, if the operand in
+    // question is a call argument; or be indirectly implied by the kind of its
+    // containing operand bundle, if the operand is a bundle operand.
+
+    if (i == AttributeList::ReturnIndex)
+      return hasRetAttr(Kind);
+
+    // FIXME: Avoid these i - 1 calculations and update the API to use
+    // zero-based indices.
+    if (i < (getNumArgOperands() + 1))
+      return paramHasAttr(i - 1, Kind);
+
+    assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+           "Must be either a call argument or an operand bundle!");
+    return bundleOperandHasAttr(i - 1, Kind);
+  }
+
+  /// Determine whether this data operand is not captured.
+  // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+  // better indicate that this may return a conservative answer.
+  bool doesNotCapture(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
+  }
+
+  /// Determine whether this argument is passed by value.
+  bool isByValArgument(unsigned ArgNo) const {
+    return paramHasAttr(ArgNo, Attribute::ByVal);
+  }
+
+  /// Determine whether this argument is passed in an alloca.
+  bool isInAllocaArgument(unsigned ArgNo) const {
+    return paramHasAttr(ArgNo, Attribute::InAlloca);
+  }
+
+  /// Determine whether this argument is passed by value or in an alloca.
+  bool isByValOrInAllocaArgument(unsigned ArgNo) const {
+    return paramHasAttr(ArgNo, Attribute::ByVal) ||
+           paramHasAttr(ArgNo, Attribute::InAlloca);
+  }
+
+  /// Determine if there are is an inalloca argument. Only the last argument can
+  /// have the inalloca attribute.
+  bool hasInAllocaArgument() const {
+    return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca);
+  }
+
+  // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+  // better indicate that this may return a conservative answer.
+  bool doesNotAccessMemory(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+  }
+
+  // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+  // better indicate that this may return a conservative answer.
+  bool onlyReadsMemory(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
+           dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+  }
+
+  // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
+  // better indicate that this may return a conservative answer.
+  bool doesNotReadMemory(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
+           dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+  }
+
+  /// Extract the alignment of the return value.
+  unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
+
+  /// Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned ArgNo) const {
+    return Attrs.getParamAlignment(ArgNo);
+  }
+
+  /// Extract the number of dereferenceable bytes for a call or
+  /// parameter (0=unknown).
+  uint64_t getDereferenceableBytes(unsigned i) const {
+    return Attrs.getDereferenceableBytes(i);
+  }
+
+  /// Extract the number of dereferenceable_or_null bytes for a call or
+  /// parameter (0=unknown).
+  uint64_t getDereferenceableOrNullBytes(unsigned i) const {
+    return Attrs.getDereferenceableOrNullBytes(i);
+  }
+
+  /// Return true if the return value is known to be not null.
+  /// This may be because it has the nonnull attribute, or because at least
+  /// one byte is dereferenceable and the pointer is in addrspace(0).
+  bool isReturnNonNull() const;
+
+  /// Determine if the return value is marked with NoAlias attribute.
+  bool returnDoesNotAlias() const {
+    return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+  }
+
+  /// If one of the arguments has the 'returned' attribute, returns its
+  /// operand value. Otherwise, return nullptr.
+  Value *getReturnedArgOperand() const;
+
+  /// Return true if the call should not be treated as a call to a
+  /// builtin.
+  bool isNoBuiltin() const {
+    return hasFnAttrImpl(Attribute::NoBuiltin) &&
+           !hasFnAttrImpl(Attribute::Builtin);
+  }
+
+  /// Determine if the call requires strict floating point semantics.
+  bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); }
+
+  /// Return true if the call should not be inlined.
+  bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
+  void setIsNoInline() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
+  }
+  /// Determine if the call does not access memory.
+  bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
+  void setDoesNotAccessMemory() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+  }
+
+  /// Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
+  }
+
+  /// Determine if the call does not access or only writes memory.
+  bool doesNotReadMemory() const {
+    return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
+  }
+  void setDoesNotReadMemory() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
+  }
+
+  /// Determine if the call can access memmory only using pointers based
+  /// on its arguments.
+  bool onlyAccessesArgMemory() const {
+    return hasFnAttr(Attribute::ArgMemOnly);
+  }
+  void setOnlyAccessesArgMemory() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
+  }
+
+  /// Determine if the function may only access memory that is
+  /// inaccessible from the IR.
+  bool onlyAccessesInaccessibleMemory() const {
+    return hasFnAttr(Attribute::InaccessibleMemOnly);
+  }
+  void setOnlyAccessesInaccessibleMemory() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
+  }
+
+  /// Determine if the function may only access memory that is
+  /// either inaccessible from the IR or pointed to by its arguments.
+  bool onlyAccessesInaccessibleMemOrArgMem() const {
+    return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+  }
+  void setOnlyAccessesInaccessibleMemOrArgMem() {
+    addAttribute(AttributeList::FunctionIndex,
+                 Attribute::InaccessibleMemOrArgMemOnly);
+  }
+  /// Determine if the call cannot return.
+  bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
+  void setDoesNotReturn() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
+  }
+
+  /// Determine if the call should not perform indirect branch tracking.
+  bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
+
+  /// Determine if the call cannot unwind.
+  bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
+  void setDoesNotThrow() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+  }
+
+  /// Determine if the invoke cannot be duplicated.
+  bool cannotDuplicate() const { return hasFnAttr(Attribute::NoDuplicate); }
+  void setCannotDuplicate() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
+  }
+
+  /// Determine if the invoke is convergent
+  bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
+  void setConvergent() {
+    addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+  }
+  void setNotConvergent() {
+    removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+  }
+
+  /// Determine if the call returns a structure through first
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    if (getNumArgOperands() == 0)
+      return false;
+
+    // Be friendly and also check the callee.
+    return paramHasAttr(0, Attribute::StructRet);
+  }
+
+  /// Determine if any call argument is an aggregate passed by value.
+  bool hasByValArgument() const {
+    return Attrs.hasAttrSomewhere(Attribute::ByVal);
+  }
+
+  ///@{
+  // End of attribute API.
+
+  /// \name Operand Bundle API
+  ///
+  /// This group of methods provides the API to access and manipulate operand
+  /// bundles on this call.
+  /// @{
+
   /// Return the number of operand bundles associated with this User.
   unsigned getNumOperandBundles() const {
     return std::distance(bundle_op_info_begin(), bundle_op_info_end());
@@ -1375,6 +1622,16 @@ public:
            Idx < getBundleOperandsEndIndex();
   }
 
+  /// Returns true if the use is a bundle operand.
+  bool isBundleOperand(const Use *U) const {
+    assert(this == U->getUser() &&
+           "Only valid to query with a use of this instruction!");
+    return hasOperandBundles() && isBundleOperand(U - op_begin());
+  }
+  bool isBundleOperand(Value::const_user_iterator UI) const {
+    return isBundleOperand(&UI.getUse());
+  }
+
   /// Return the total number operands (not operand bundles) used by
   /// every operand bundle in this OperandBundleUser.
   unsigned getNumTotalBundleOperands() const {
@@ -1504,8 +1761,7 @@ public:
   /// Return true if \p Other has the same sequence of operand bundle
   /// tags with the same number of operands on each one of them as this
   /// OperandBundleUser.
-  bool hasIdenticalOperandBundleSchema(
-      const OperandBundleUser<InstrTy, OpIteratorTy> &Other) const {
+  bool hasIdenticalOperandBundleSchema(const CallBase &Other) const {
     if (getNumOperandBundles() != Other.getNumOperandBundles())
       return false;
 
@@ -1524,7 +1780,6 @@ public:
     return false;
   }
 
-protected:
   /// Is the function attribute S disallowed by some operand bundle on
   /// this operand bundle user?
   bool isFnAttrDisallowedByOpBundle(StringRef S) const {
@@ -1583,8 +1838,8 @@ protected:
   /// OperandBundleUse.
   OperandBundleUse
   operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const {
-    auto op_begin = static_cast<const InstrTy *>(this)->op_begin();
-    ArrayRef<Use> Inputs(op_begin + BOI.Begin, op_begin + BOI.End);
+    auto begin = op_begin();
+    ArrayRef<Use> Inputs(begin + BOI.Begin, begin + BOI.End);
     return OperandBundleUse(BOI.Tag, Inputs);
   }
 
@@ -1593,37 +1848,79 @@ protected:
 
   /// Return the start of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
+  ///
+  /// OperandBundleUser uses the descriptor area co-allocated with the host User
+  /// to store some meta information about which operands are "normal" operands,
+  /// and which ones belong to some operand bundle.
+  ///
+  /// The layout of an operand bundle user is
+  ///
+  ///          +-----------uint32_t End-------------------------------------+
+  ///          |                                                            |
+  ///          |  +--------uint32_t Begin--------------------+              |
+  ///          |  |                                          |              |
+  ///          ^  ^                                          v              v
+  ///  |------|------|----|----|----|----|----|---------|----|---------|----|-----
+  ///  | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un
+  ///  |------|------|----|----|----|----|----|---------|----|---------|----|-----
+  ///   v  v                                  ^              ^
+  ///   |  |                                  |              |
+  ///   |  +--------uint32_t Begin------------+              |
+  ///   |                                                    |
+  ///   +-----------uint32_t End-----------------------------+
+  ///
+  ///
+  /// BOI0, BOI1 ... are descriptions of operand bundles in this User's use
+  /// list. These descriptions are installed and managed by this class, and
+  /// they're all instances of OperandBundleUser<T>::BundleOpInfo.
+  ///
+  /// DU is an additional descriptor installed by User's 'operator new' to keep
+  /// track of the 'BOI0 ... BOIN' co-allocation.  OperandBundleUser does not
+  /// access or modify DU in any way, it's an implementation detail private to
+  /// User.
+  ///
+  /// The regular Use& vector for the User starts at U0.  The operand bundle
+  /// uses are part of the Use& vector, just like normal uses.  In the diagram
+  /// above, the operand bundle uses start at BOI0_U0.  Each instance of
+  /// BundleOpInfo has information about a contiguous set of uses constituting
+  /// an operand bundle, and the total set of operand bundle uses themselves
+  /// form a contiguous set of uses (i.e. there are no gaps between uses
+  /// corresponding to individual operand bundles).
+  ///
+  /// This class does not know the location of the set of operand bundle uses
+  /// within the use list -- that is decided by the User using this class via
+  /// the BeginIdx argument in populateBundleOperandInfos.
+  ///
+  /// Currently operand bundle users with hung-off operands are not supported.
   bundle_op_iterator bundle_op_info_begin() {
-    if (!static_cast<InstrTy *>(this)->hasDescriptor())
+    if (!hasDescriptor())
       return nullptr;
 
-    uint8_t *BytesBegin = static_cast<InstrTy *>(this)->getDescriptor().begin();
+    uint8_t *BytesBegin = getDescriptor().begin();
     return reinterpret_cast<bundle_op_iterator>(BytesBegin);
   }
 
   /// Return the start of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   const_bundle_op_iterator bundle_op_info_begin() const {
-    auto *NonConstThis =
-        const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+    auto *NonConstThis = const_cast<CallBase *>(this);
     return NonConstThis->bundle_op_info_begin();
   }
 
   /// Return the end of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   bundle_op_iterator bundle_op_info_end() {
-    if (!static_cast<InstrTy *>(this)->hasDescriptor())
+    if (!hasDescriptor())
       return nullptr;
 
-    uint8_t *BytesEnd = static_cast<InstrTy *>(this)->getDescriptor().end();
+    uint8_t *BytesEnd = getDescriptor().end();
     return reinterpret_cast<bundle_op_iterator>(BytesEnd);
   }
 
   /// Return the end of the list of BundleOpInfo instances associated
   /// with this OperandBundleUser.
   const_bundle_op_iterator bundle_op_info_end() const {
-    auto *NonConstThis =
-        const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+    auto *NonConstThis = const_cast<CallBase *>(this);
     return NonConstThis->bundle_op_info_end();
   }
 
@@ -1643,30 +1940,8 @@ protected:
   ///
   /// Each \p OperandBundleDef instance is tracked by a OperandBundleInfo
   /// instance allocated in this User's descriptor.
-  OpIteratorTy populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
-                                          const unsigned BeginIndex) {
-    auto It = static_cast<InstrTy *>(this)->op_begin() + BeginIndex;
-    for (auto &B : Bundles)
-      It = std::copy(B.input_begin(), B.input_end(), It);
-
-    auto *ContextImpl = static_cast<InstrTy *>(this)->getContext().pImpl;
-    auto BI = Bundles.begin();
-    unsigned CurrentIndex = BeginIndex;
-
-    for (auto &BOI : bundle_op_infos()) {
-      assert(BI != Bundles.end() && "Incorrect allocation?");
-
-      BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag());
-      BOI.Begin = CurrentIndex;
-      BOI.End = CurrentIndex + BI->input_size();
-      CurrentIndex = BOI.End;
-      BI++;
-    }
-
-    assert(BI == Bundles.end() && "Incorrect allocation?");
-
-    return It;
-  }
+  op_iterator populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
+                                         const unsigned BeginIndex);
 
   /// Return the BundleOpInfo for the operand at index OpIdx.
   ///
@@ -1680,6 +1955,7 @@ protected:
     llvm_unreachable("Did not find operand bundle for operand!");
   }
 
+protected:
   /// Return the total number of values used in \p Bundles.
   static unsigned CountBundleInputs(ArrayRef<OperandBundleDef> Bundles) {
     unsigned Total = 0;
@@ -1687,8 +1963,102 @@ protected:
       Total += B.input_size();
     return Total;
   }
+
+  /// @}
+  // End of operand bundle API.
+
+private:
+  bool hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const;
+  bool hasFnAttrOnCalledFunction(StringRef Kind) const;
+
+  template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
+    if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
+      return true;
+
+    // Operand bundles override attributes on the called function, but don't
+    // override attributes directly present on the call instruction.
+    if (isFnAttrDisallowedByOpBundle(Kind))
+      return false;
+
+    return hasFnAttrOnCalledFunction(Kind);
+  }
 };
 
+template <>
+struct OperandTraits<CallBase> : public VariadicOperandTraits<CallBase, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallBase, Value)
+
+//===----------------------------------------------------------------------===//
+//                           FuncletPadInst Class
+//===----------------------------------------------------------------------===//
+class FuncletPadInst : public Instruction {
+private:
+  FuncletPadInst(const FuncletPadInst &CPI);
+
+  explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+                          ArrayRef<Value *> Args, unsigned Values,
+                          const Twine &NameStr, Instruction *InsertBefore);
+  explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+                          ArrayRef<Value *> Args, unsigned Values,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  void init(Value *ParentPad, ArrayRef<Value *> Args, const Twine &NameStr);
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  friend class CatchPadInst;
+  friend class CleanupPadInst;
+
+  FuncletPadInst *cloneImpl() const;
+
+public:
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumArgOperands - Return the number of funcletpad arguments.
+  ///
+  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+
+  /// Convenience accessors
+
+  /// Return the outer EH-pad this funclet is nested within.
+  ///
+  /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
+  /// is a CatchPadInst.
+  Value *getParentPad() const { return Op<-1>(); }
+  void setParentPad(Value *ParentPad) {
+    assert(ParentPad);
+    Op<-1>() = ParentPad;
+  }
+
+  /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument.
+  ///
+  Value *getArgOperand(unsigned i) const { return getOperand(i); }
+  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+  /// arg_operands - iteration adapter for range-for loops.
+  op_range arg_operands() { return op_range(op_begin(), op_end() - 1); }
+
+  /// arg_operands - iteration adapter for range-for loops.
+  const_op_range arg_operands() const {
+    return const_op_range(op_begin(), op_end() - 1);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Instruction *I) { return I->isFuncletPad(); }
+  static bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<FuncletPadInst>
+    : public VariadicOperandTraits<FuncletPadInst, /*MINARITY=*/1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
+
 } // end namespace llvm
 
 #endif // LLVM_IR_INSTRTYPES_H
diff --git a/contrib/llvm/include/llvm/IR/Instruction.def b/contrib/llvm/include/llvm/IR/Instruction.def
index 86617299c44a..58e4e2e1d6cc 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.def
+++ b/contrib/llvm/include/llvm/IR/Instruction.def
@@ -32,6 +32,20 @@
 #define LAST_TERM_INST(num)
 #endif
 
+#ifndef FIRST_UNARY_INST
+#define FIRST_UNARY_INST(num)
+#endif
+#ifndef HANDLE_UNARY_INST
+#ifndef HANDLE_INST
+#define HANDLE_UNARY_INST(num, opcode, instclass)
+#else
+#define HANDLE_UNARY_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_UNARY_INST
+#define LAST_UNARY_INST(num)
+#endif
+
 #ifndef FIRST_BINARY_INST
 #define FIRST_BINARY_INST(num)
 #endif
@@ -123,87 +137,96 @@ HANDLE_TERM_INST  ( 9, CatchRet      , CatchReturnInst)
 HANDLE_TERM_INST  (10, CatchSwitch   , CatchSwitchInst)
   LAST_TERM_INST  (10)
 
+// Standard unary operators...
+ FIRST_UNARY_INST(11)
+HANDLE_UNARY_INST(11, FNeg  , UnaryOperator)
+  LAST_UNARY_INST(11)
+
 // Standard binary operators...
- FIRST_BINARY_INST(11)
-HANDLE_BINARY_INST(11, Add  , BinaryOperator)
-HANDLE_BINARY_INST(12, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(13, Sub  , BinaryOperator)
-HANDLE_BINARY_INST(14, FSub , BinaryOperator)
-HANDLE_BINARY_INST(15, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(16, FMul , BinaryOperator)
-HANDLE_BINARY_INST(17, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(18, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(19, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(20, URem , BinaryOperator)
-HANDLE_BINARY_INST(21, SRem , BinaryOperator)
-HANDLE_BINARY_INST(22, FRem , BinaryOperator)
+ FIRST_BINARY_INST(12)
+HANDLE_BINARY_INST(12, Add  , BinaryOperator)
+HANDLE_BINARY_INST(13, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(14, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(15, FSub , BinaryOperator)
+HANDLE_BINARY_INST(16, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(17, FMul , BinaryOperator)
+HANDLE_BINARY_INST(18, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(19, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(20, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(21, URem , BinaryOperator)
+HANDLE_BINARY_INST(22, SRem , BinaryOperator)
+HANDLE_BINARY_INST(23, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(23, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(24, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(25, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(26, And  , BinaryOperator)
-HANDLE_BINARY_INST(27, Or   , BinaryOperator)
-HANDLE_BINARY_INST(28, Xor  , BinaryOperator)
-  LAST_BINARY_INST(28)
+HANDLE_BINARY_INST(24, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(25, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(26, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(27, And  , BinaryOperator)
+HANDLE_BINARY_INST(28, Or   , BinaryOperator)
+HANDLE_BINARY_INST(29, Xor  , BinaryOperator)
+  LAST_BINARY_INST(29)
 
 // Memory operators...
- FIRST_MEMORY_INST(29)
-HANDLE_MEMORY_INST(29, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(30, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(31, Store , StoreInst )
-HANDLE_MEMORY_INST(32, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(33, Fence , FenceInst )
-HANDLE_MEMORY_INST(34, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(35, AtomicRMW , AtomicRMWInst )
-  LAST_MEMORY_INST(35)
+ FIRST_MEMORY_INST(30)
+HANDLE_MEMORY_INST(30, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(31, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(32, Store , StoreInst )
+HANDLE_MEMORY_INST(33, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(34, Fence , FenceInst )
+HANDLE_MEMORY_INST(35, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(36, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(36)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(36)
-HANDLE_CAST_INST(36, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(37, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(38, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(39, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(40, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(41, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(42, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(43, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(44, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(45, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(46, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(47, BitCast , BitCastInst )  // Type cast
-HANDLE_CAST_INST(48, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
-  LAST_CAST_INST(48)
-
- FIRST_FUNCLETPAD_INST(49)
-HANDLE_FUNCLETPAD_INST(49, CleanupPad, CleanupPadInst)
-HANDLE_FUNCLETPAD_INST(50, CatchPad  , CatchPadInst)
-  LAST_FUNCLETPAD_INST(50)
+ FIRST_CAST_INST(37)
+HANDLE_CAST_INST(37, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(38, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(39, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(40, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(41, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(42, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(43, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(44, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(45, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(46, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(47, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(48, BitCast , BitCastInst )  // Type cast
+HANDLE_CAST_INST(49, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
+  LAST_CAST_INST(49)
+
+ FIRST_FUNCLETPAD_INST(50)
+HANDLE_FUNCLETPAD_INST(50, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(51, CatchPad  , CatchPadInst)
+  LAST_FUNCLETPAD_INST(51)
 
 // Other operators...
- FIRST_OTHER_INST(51)
-HANDLE_OTHER_INST(51, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(52, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(53, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(54, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(55, Select , SelectInst )  // select instruction
-HANDLE_USER_INST (56, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_USER_INST (57, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(58, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(60, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(61, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(62, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(63, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(64, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(64)
+ FIRST_OTHER_INST(52)
+HANDLE_OTHER_INST(52, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(53, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(54, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(55, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(56, Select , SelectInst )  // select instruction
+HANDLE_USER_INST (57, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_USER_INST (58, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(59, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(60, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(61, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(62, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(63, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(64, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(65, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(65)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
 #undef   LAST_TERM_INST
 
+#undef  FIRST_UNARY_INST
+#undef HANDLE_UNARY_INST
+#undef   LAST_UNARY_INST
+
 #undef  FIRST_BINARY_INST
 #undef HANDLE_BINARY_INST
 #undef   LAST_BINARY_INST
diff --git a/contrib/llvm/include/llvm/IR/Instruction.h b/contrib/llvm/include/llvm/IR/Instruction.h
index 643c2a0761d1..5e78cb1edf02 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm/include/llvm/IR/Instruction.h
@@ -127,11 +127,15 @@ public:
 
   const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
   bool isTerminator() const { return isTerminator(getOpcode()); }
+  bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
   bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
   bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
   bool isShift() { return isShift(getOpcode()); }
   bool isCast() const { return isCast(getOpcode()); }
   bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
+  bool isExceptionalTerminator() const {
+    return isExceptionalTerminator(getOpcode());
+  }
 
   static const char* getOpcodeName(unsigned OpCode);
 
@@ -139,6 +143,9 @@ public:
     return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
   }
 
+  static inline bool isUnaryOp(unsigned Opcode) {
+    return Opcode >= UnaryOpsBegin && Opcode < UnaryOpsEnd;
+  }
   static inline bool isBinaryOp(unsigned Opcode) {
     return Opcode >= BinaryOpsBegin && Opcode < BinaryOpsEnd;
   }
@@ -182,6 +189,20 @@ public:
     return OpCode >= FuncletPadOpsBegin && OpCode < FuncletPadOpsEnd;
   }
 
+  /// Returns true if the OpCode is a terminator related to exception handling.
+  static inline bool isExceptionalTerminator(unsigned OpCode) {
+    switch (OpCode) {
+    case Instruction::CatchSwitch:
+    case Instruction::CatchRet:
+    case Instruction::CleanupRet:
+    case Instruction::Invoke:
+    case Instruction::Resume:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   //===--------------------------------------------------------------------===//
   // Metadata manipulation.
   //===--------------------------------------------------------------------===//
@@ -561,6 +582,10 @@ public:
     }
   }
 
+  /// Return true if the instruction is a llvm.lifetime.start or
+  /// llvm.lifetime.end marker.
+  bool isLifetimeStartOrEnd() const;
+
   /// Return a pointer to the next non-debug instruction in the same basic
   /// block as 'this', or nullptr if no such instruction exists.
   const Instruction *getNextNonDebugInstruction() const;
@@ -569,6 +594,14 @@ public:
         static_cast<const Instruction *>(this)->getNextNonDebugInstruction());
   }
 
+  /// Return a pointer to the previous non-debug instruction in the same basic
+  /// block as 'this', or nullptr if no such instruction exists.
+  const Instruction *getPrevNonDebugInstruction() const;
+  Instruction *getPrevNonDebugInstruction() {
+    return const_cast<Instruction *>(
+        static_cast<const Instruction *>(this)->getPrevNonDebugInstruction());
+  }
+
   /// Create a copy of 'this' instruction that is identical in all ways except
   /// the following:
   ///   * The instruction has no parent
@@ -611,6 +644,16 @@ public:
   /// operands in the corresponding predecessor block.
   bool isUsedOutsideOfBlock(const BasicBlock *BB) const;
 
+  /// Return the number of successors that this instruction has. The instruction
+  /// must be a terminator.
+  unsigned getNumSuccessors() const;
+
+  /// Return the specified successor. This instruction must be a terminator.
+  BasicBlock *getSuccessor(unsigned Idx) const;
+
+  /// Update the specified successor to point at the provided block. This
+  /// instruction must be a terminator.
+  void setSuccessor(unsigned Idx, BasicBlock *BB);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
@@ -627,6 +670,13 @@ public:
 #include "llvm/IR/Instruction.def"
   };
 
+  enum UnaryOps {
+#define  FIRST_UNARY_INST(N)             UnaryOpsBegin = N,
+#define HANDLE_UNARY_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_UNARY_INST(N)             UnaryOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+
   enum BinaryOps {
 #define  FIRST_BINARY_INST(N)             BinaryOpsBegin = N,
 #define HANDLE_BINARY_INST(N, OPC, CLASS) OPC = N,
diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h
index 9be8bd1a07bc..0ff8f56f213a 100644
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@@ -175,47 +175,58 @@ protected:
   LoadInst *cloneImpl() const;
 
 public:
-  LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
-  LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
-  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile = false,
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr = "",
            Instruction *InsertBefore = nullptr);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false,
-           Instruction *InsertBefore = nullptr)
-      : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
-                 NameStr, isVolatile, InsertBefore) {}
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
+           Instruction *InsertBefore = nullptr);
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
            BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
-           Instruction *InsertBefore = nullptr)
-      : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
-                 NameStr, isVolatile, Align, InsertBefore) {}
   LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, Instruction *InsertBefore = nullptr);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
-           AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System,
-           Instruction *InsertBefore = nullptr)
-      : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
-                 NameStr, isVolatile, Align, Order, SSID, InsertBefore) {}
   LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, AtomicOrdering Order,
            SyncScope::ID SSID = SyncScope::System,
            Instruction *InsertBefore = nullptr);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+  LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile,
            unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
            BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
-  LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd);
-  LoadInst(Type *Ty, Value *Ptr, const char *NameStr = nullptr,
-           bool isVolatile = false, Instruction *InsertBefore = nullptr);
-  explicit LoadInst(Value *Ptr, const char *NameStr = nullptr,
-                    bool isVolatile = false,
+
+  // Deprecated [opaque pointer types]
+  explicit LoadInst(Value *Ptr, const Twine &NameStr = "",
                     Instruction *InsertBefore = nullptr)
-      : LoadInst(cast<PointerType>(Ptr->getType())->getElementType(), Ptr,
-                 NameStr, isVolatile, InsertBefore) {}
-  LoadInst(Value *Ptr, const char *NameStr, bool isVolatile,
-           BasicBlock *InsertAtEnd);
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 InsertBefore) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 InsertAtEnd) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           Instruction *InsertBefore = nullptr)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 isVolatile, InsertBefore) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           BasicBlock *InsertAtEnd)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 isVolatile, InsertAtEnd) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+           Instruction *InsertBefore = nullptr)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 isVolatile, Align, InsertBefore) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+           BasicBlock *InsertAtEnd)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 isVolatile, Align, InsertAtEnd) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+           AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System,
+           Instruction *InsertBefore = nullptr)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 isVolatile, Align, Order, SSID, InsertBefore) {}
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align,
+           AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd)
+      : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr,
+                 isVolatile, Align, Order, SSID, InsertAtEnd) {}
 
   /// Return true if this is a load from a volatile memory location.
   bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
@@ -735,6 +746,8 @@ public:
     return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
   }
 
+  static StringRef getOperationName(BinOp Op);
+
   void setOperation(BinOp Operation) {
     unsigned short SubclassData = getSubclassDataFromInstruction();
     setInstructionSubclassData((SubclassData & 31) |
@@ -1102,6 +1115,71 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 
 //===----------------------------------------------------------------------===//
+//                                UnaryOperator Class
+//===----------------------------------------------------------------------===//
+
+/// a unary instruction 
+class UnaryOperator : public UnaryInstruction {
+  void AssertOK();
+
+protected:
+  UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+                const Twine &Name, Instruction *InsertBefore);
+  UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+                const Twine &Name, BasicBlock *InsertAtEnd);
+
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+
+  UnaryOperator *cloneImpl() const;
+
+public:
+
+  /// Construct a unary instruction, given the opcode and an operand.
+  /// Optionally (if InstBefore is specified) insert the instruction
+  /// into a BasicBlock right before the specified instruction.  The specified
+  /// Instruction is allowed to be a dereferenced end iterator.
+  ///
+  static UnaryOperator *Create(UnaryOps Op, Value *S,
+                               const Twine &Name = Twine(),
+                               Instruction *InsertBefore = nullptr);
+
+  /// Construct a unary instruction, given the opcode and an operand.
+  /// Also automatically insert this instruction to the end of the
+  /// BasicBlock specified.
+  ///
+  static UnaryOperator *Create(UnaryOps Op, Value *S,
+                               const Twine &Name,
+                               BasicBlock *InsertAtEnd);
+
+  /// These methods just forward to Create, and are useful when you
+  /// statically know what type of instruction you're going to create.  These
+  /// helpers just save some typing.
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+  static UnaryInstruction *Create##OPC(Value *V, \
+                                       const Twine &Name = "") {\
+    return Create(Instruction::OPC, V, Name);\
+  }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+  static UnaryInstruction *Create##OPC(Value *V, \
+                                       const Twine &Name, BasicBlock *BB) {\
+    return Create(Instruction::OPC, V, Name, BB);\
+  }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+  static UnaryInstruction *Create##OPC(Value *V, \
+                                       const Twine &Name, Instruction *I) {\
+    return Create(Instruction::OPC, V, Name, I);\
+  }
+#include "llvm/IR/Instruction.def"
+
+  UnaryOps getOpcode() const {
+    return static_cast<UnaryOps>(Instruction::getOpcode());
+  }
+};
+
+//===----------------------------------------------------------------------===//
 //                               ICmpInst Class
 //===----------------------------------------------------------------------===//
 
@@ -1297,12 +1375,13 @@ public:
 
   /// Constructor with no-insertion semantics
   FCmpInst(
-    Predicate pred, ///< The predicate to use for the comparison
+    Predicate Pred, ///< The predicate to use for the comparison
     Value *LHS,     ///< The left-hand-side of the expression
     Value *RHS,     ///< The right-hand-side of the expression
-    const Twine &NameStr = "" ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::FCmp, pred, LHS, RHS, NameStr) {
+    const Twine &NameStr = "", ///< Name of the instruction
+    Instruction *FlagsSource = nullptr
+  ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, Pred, LHS,
+              RHS, NameStr, nullptr, FlagsSource) {
     AssertOK();
   }
 
@@ -1350,537 +1429,13 @@ public:
   }
 };
 
-class CallInst;
-class InvokeInst;
-
-template <class T> struct CallBaseParent { using type = Instruction; };
-
-template <> struct CallBaseParent<InvokeInst> { using type = TerminatorInst; };
-
-//===----------------------------------------------------------------------===//
-/// Base class for all callable instructions (InvokeInst and CallInst)
-/// Holds everything related to calling a function, abstracting from the base
-/// type @p BaseInstTy and the concrete instruction @p InstTy
-///
-template <class InstTy>
-class CallBase : public CallBaseParent<InstTy>::type,
-                 public OperandBundleUser<InstTy, User::op_iterator> {
-protected:
-  AttributeList Attrs; ///< parameter attributes for callable
-  FunctionType *FTy;
-  using BaseInstTy = typename CallBaseParent<InstTy>::type;
-
-  template <class... ArgsTy>
-  CallBase(AttributeList const &A, FunctionType *FT, ArgsTy &&... Args)
-      : BaseInstTy(std::forward<ArgsTy>(Args)...), Attrs(A), FTy(FT) {}
-  bool hasDescriptor() const { return Value::HasDescriptor; }
-
-  using BaseInstTy::BaseInstTy;
-
-  using OperandBundleUser<InstTy,
-                          User::op_iterator>::isFnAttrDisallowedByOpBundle;
-  using OperandBundleUser<InstTy, User::op_iterator>::getNumTotalBundleOperands;
-  using OperandBundleUser<InstTy, User::op_iterator>::bundleOperandHasAttr;
-  using Instruction::getSubclassDataFromInstruction;
-  using Instruction::setInstructionSubclassData;
-
-public:
-  using Instruction::getContext;
-  using OperandBundleUser<InstTy, User::op_iterator>::hasOperandBundles;
-  using OperandBundleUser<InstTy,
-                          User::op_iterator>::getBundleOperandsStartIndex;
-
-  static bool classof(const Instruction *I) {
-    llvm_unreachable(
-        "CallBase is not meant to be used as part of the classof hierarchy");
-  }
-
-public:
-  /// Return the parameter attributes for this call.
-  ///
-  AttributeList getAttributes() const { return Attrs; }
-
-  /// Set the parameter attributes for this call.
-  ///
-  void setAttributes(AttributeList A) { Attrs = A; }
-
-  FunctionType *getFunctionType() const { return FTy; }
-
-  void mutateFunctionType(FunctionType *FTy) {
-    Value::mutateType(FTy->getReturnType());
-    this->FTy = FTy;
-  }
-
-  /// Return the number of call arguments.
-  ///
-  unsigned getNumArgOperands() const {
-    return getNumOperands() - getNumTotalBundleOperands() - InstTy::ArgOffset;
-  }
-
-  /// getArgOperand/setArgOperand - Return/set the i-th call argument.
-  ///
-  Value *getArgOperand(unsigned i) const {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    return getOperand(i);
-  }
-  void setArgOperand(unsigned i, Value *v) {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    setOperand(i, v);
-  }
-
-  /// Return the iterator pointing to the beginning of the argument list.
-  User::op_iterator arg_begin() { return op_begin(); }
-
-  /// Return the iterator pointing to the end of the argument list.
-  User::op_iterator arg_end() {
-    // [ call args ], [ operand bundles ], callee
-    return op_end() - getNumTotalBundleOperands() - InstTy::ArgOffset;
-  }
-
-  /// Iteration adapter for range-for loops.
-  iterator_range<User::op_iterator> arg_operands() {
-    return make_range(arg_begin(), arg_end());
-  }
-
-  /// Return the iterator pointing to the beginning of the argument list.
-  User::const_op_iterator arg_begin() const { return op_begin(); }
-
-  /// Return the iterator pointing to the end of the argument list.
-  User::const_op_iterator arg_end() const {
-    // [ call args ], [ operand bundles ], callee
-    return op_end() - getNumTotalBundleOperands() - InstTy::ArgOffset;
-  }
-
-  /// Iteration adapter for range-for loops.
-  iterator_range<User::const_op_iterator> arg_operands() const {
-    return make_range(arg_begin(), arg_end());
-  }
-
-  /// Wrappers for getting the \c Use of a call argument.
-  const Use &getArgOperandUse(unsigned i) const {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    return User::getOperandUse(i);
-  }
-  Use &getArgOperandUse(unsigned i) {
-    assert(i < getNumArgOperands() && "Out of bounds!");
-    return User::getOperandUse(i);
-  }
-
-  /// If one of the arguments has the 'returned' attribute, return its
-  /// operand value. Otherwise, return nullptr.
-  Value *getReturnedArgOperand() const {
-    unsigned Index;
-
-    if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
-      return getArgOperand(Index - AttributeList::FirstArgIndex);
-    if (const Function *F = getCalledFunction())
-      if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
-          Index)
-        return getArgOperand(Index - AttributeList::FirstArgIndex);
-
-    return nullptr;
-  }
-
-  User::op_iterator op_begin() {
-    return OperandTraits<CallBase>::op_begin(this);
-  }
-
-  User::const_op_iterator op_begin() const {
-    return OperandTraits<CallBase>::op_begin(const_cast<CallBase *>(this));
-  }
-
-  User::op_iterator op_end() { return OperandTraits<CallBase>::op_end(this); }
-
-  User::const_op_iterator op_end() const {
-    return OperandTraits<CallBase>::op_end(const_cast<CallBase *>(this));
-  }
-
-  Value *getOperand(unsigned i_nocapture) const {
-    assert(i_nocapture < OperandTraits<CallBase>::operands(this) &&
-           "getOperand() out of range!");
-    return cast_or_null<Value>(OperandTraits<CallBase>::op_begin(
-                                   const_cast<CallBase *>(this))[i_nocapture]
-                                   .get());
-  }
-
-  void setOperand(unsigned i_nocapture, Value *Val_nocapture) {
-    assert(i_nocapture < OperandTraits<CallBase>::operands(this) &&
-           "setOperand() out of range!");
-    OperandTraits<CallBase>::op_begin(this)[i_nocapture] = Val_nocapture;
-  }
-
-  unsigned getNumOperands() const {
-    return OperandTraits<CallBase>::operands(this);
-  }
-  template <int Idx_nocapture> Use &Op() {
-    return User::OpFrom<Idx_nocapture>(this);
-  }
-  template <int Idx_nocapture> const Use &Op() const {
-    return User::OpFrom<Idx_nocapture>(this);
-  }
-
-  /// Return the function called, or null if this is an
-  /// indirect function invocation.
-  ///
-  Function *getCalledFunction() const {
-    return dyn_cast<Function>(Op<-InstTy::ArgOffset>());
-  }
-
-  /// Determine whether this call has the given attribute.
-  bool hasFnAttr(Attribute::AttrKind Kind) const {
-    assert(Kind != Attribute::NoBuiltin &&
-           "Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin");
-    return hasFnAttrImpl(Kind);
-  }
-
-  /// Determine whether this call has the given attribute.
-  bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
-
-  /// getCallingConv/setCallingConv - Get or set the calling convention of this
-  /// function call.
-  CallingConv::ID getCallingConv() const {
-    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 2);
-  }
-  void setCallingConv(CallingConv::ID CC) {
-    auto ID = static_cast<unsigned>(CC);
-    assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
-    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
-                               (ID << 2));
-  }
-
-
-  /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute::AttrKind Kind) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addAttribute(getContext(), i, Kind);
-    setAttributes(PAL);
-  }
-
-  /// adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attribute Attr) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addAttribute(getContext(), i, Attr);
-    setAttributes(PAL);
-  }
-
-  /// Adds the attribute to the indicated argument
-  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
-    setAttributes(PAL);
-  }
-
-  /// Adds the attribute to the indicated argument
-  void addParamAttr(unsigned ArgNo, Attribute Attr) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
-    setAttributes(PAL);
-  }
-
-  /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeAttribute(getContext(), i, Kind);
-    setAttributes(PAL);
-  }
-
-  /// removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, StringRef Kind) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeAttribute(getContext(), i, Kind);
-    setAttributes(PAL);
-  }
-
-  /// Removes the attribute from the given argument
-  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
-    setAttributes(PAL);
-  }
-
-  /// Removes the attribute from the given argument
-  void removeParamAttr(unsigned ArgNo, StringRef Kind) {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    AttributeList PAL = getAttributes();
-    PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
-    setAttributes(PAL);
-  }
-
-  /// adds the dereferenceable attribute to the list of attributes.
-  void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
-    setAttributes(PAL);
-  }
-
-  /// adds the dereferenceable_or_null attribute to the list of
-  /// attributes.
-  void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
-    AttributeList PAL = getAttributes();
-    PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
-    setAttributes(PAL);
-  }
-
-  /// Determine whether the return value has the given attribute.
-  bool hasRetAttr(Attribute::AttrKind Kind) const {
-    if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
-      return true;
-
-    // Look at the callee, if available.
-    if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
-    return false;
-  }
-
-  /// Determine whether the argument or parameter has the given attribute.
-  bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
-
-    if (Attrs.hasParamAttribute(ArgNo, Kind))
-      return true;
-    if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasParamAttribute(ArgNo, Kind);
-    return false;
-  }
-
-  /// Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
-    return getAttributes().getAttribute(i, Kind);
-  }
-
-  /// Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, StringRef Kind) const {
-    return getAttributes().getAttribute(i, Kind);
-  }
-
-  /// Get the attribute of a given kind from a given arg
-  Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    return getAttributes().getParamAttr(ArgNo, Kind);
-  }
-
-  /// Get the attribute of a given kind from a given arg
-  Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
-    assert(ArgNo < getNumArgOperands() && "Out of bounds");
-    return getAttributes().getParamAttr(ArgNo, Kind);
-  }
-  /// Return true if the data operand at index \p i has the attribute \p
-  /// A.
-  ///
-  /// Data operands include call arguments and values used in operand bundles,
-  /// but does not include the callee operand.  This routine dispatches to the
-  /// underlying AttributeList or the OperandBundleUser as appropriate.
-  ///
-  /// The index \p i is interpreted as
-  ///
-  ///  \p i == Attribute::ReturnIndex  -> the return value
-  ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
-  ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
-  ///     (\p i - 1) in the operand list.
-  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
-    // There are getNumOperands() - (InstTy::ArgOffset - 1) data operands.
-    // The last operand is the callee.
-    assert(i < (getNumOperands() - InstTy::ArgOffset + 1) &&
-           "Data operand index out of bounds!");
-
-    // The attribute A can either be directly specified, if the operand in
-    // question is a call argument; or be indirectly implied by the kind of its
-    // containing operand bundle, if the operand is a bundle operand.
-
-    if (i == AttributeList::ReturnIndex)
-      return hasRetAttr(Kind);
-
-    // FIXME: Avoid these i - 1 calculations and update the API to use
-    // zero-based indices.
-    if (i < (getNumArgOperands() + 1))
-      return paramHasAttr(i - 1, Kind);
-
-    assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
-           "Must be either a call argument or an operand bundle!");
-    return bundleOperandHasAttr(i - 1, Kind);
-  }
-
-  /// Extract the alignment of the return value.
-  unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
-
-  /// Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned ArgNo) const {
-    return Attrs.getParamAlignment(ArgNo);
-  }
-
-  /// Extract the number of dereferenceable bytes for a call or
-  /// parameter (0=unknown).
-  uint64_t getDereferenceableBytes(unsigned i) const {
-    return Attrs.getDereferenceableBytes(i);
-  }
-
-  /// Extract the number of dereferenceable_or_null bytes for a call or
-  /// parameter (0=unknown).
-  uint64_t getDereferenceableOrNullBytes(unsigned i) const {
-    return Attrs.getDereferenceableOrNullBytes(i);
-  }
-
-  /// Determine if the return value is marked with NoAlias attribute.
-  bool returnDoesNotAlias() const {
-    return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
-  }
-
-  /// Return true if the call should not be treated as a call to a
-  /// builtin.
-  bool isNoBuiltin() const {
-    return hasFnAttrImpl(Attribute::NoBuiltin) &&
-      !hasFnAttrImpl(Attribute::Builtin);
-  }
-
-  /// Determine if the call requires strict floating point semantics.
-  bool isStrictFP() const { return hasFnAttr(Attribute::StrictFP); }
-
-  /// Return true if the call should not be inlined.
-  bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
-  void setIsNoInline() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
-  }
-  /// Determine if the call does not access memory.
-  bool doesNotAccessMemory() const {
-    return hasFnAttr(Attribute::ReadNone);
-  }
-  void setDoesNotAccessMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-  }
-
-  /// Determine if the call does not access or only reads memory.
-  bool onlyReadsMemory() const {
-    return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
-  }
-  void setOnlyReadsMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
-  }
-
-  /// Determine if the call does not access or only writes memory.
-  bool doesNotReadMemory() const {
-    return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
-  }
-  void setDoesNotReadMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
-  }
-
-  /// Determine if the call can access memmory only using pointers based
-  /// on its arguments.
-  bool onlyAccessesArgMemory() const {
-    return hasFnAttr(Attribute::ArgMemOnly);
-  }
-  void setOnlyAccessesArgMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
-  }
-
-  /// Determine if the function may only access memory that is
-  /// inaccessible from the IR.
-  bool onlyAccessesInaccessibleMemory() const {
-    return hasFnAttr(Attribute::InaccessibleMemOnly);
-  }
-  void setOnlyAccessesInaccessibleMemory() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
-  }
-
-  /// Determine if the function may only access memory that is
-  /// either inaccessible from the IR or pointed to by its arguments.
-  bool onlyAccessesInaccessibleMemOrArgMem() const {
-    return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
-  }
-  void setOnlyAccessesInaccessibleMemOrArgMem() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOrArgMemOnly);
-  }
-  /// Determine if the call cannot return.
-  bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
-  void setDoesNotReturn() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
-  }
-
-  /// Determine if the call should not perform indirect branch tracking.
-  bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
-
-  /// Determine if the call cannot unwind.
-  bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
-  void setDoesNotThrow() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
-  }
-
-  /// Determine if the invoke cannot be duplicated.
-  bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
-  void setCannotDuplicate() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
-  }
-
-  /// Determine if the invoke is convergent
-  bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
-  void setConvergent() {
-    addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-  }
-  void setNotConvergent() {
-    removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-  }
-
-  /// Determine if the call returns a structure through first
-  /// pointer argument.
-  bool hasStructRetAttr() const {
-    if (getNumArgOperands() == 0)
-      return false;
-
-    // Be friendly and also check the callee.
-    return paramHasAttr(0, Attribute::StructRet);
-  }
-
-  /// Determine if any call argument is an aggregate passed by value.
-  bool hasByValArgument() const {
-    return Attrs.hasAttrSomewhere(Attribute::ByVal);
-  }
-  /// Get a pointer to the function that is invoked by this
-  /// instruction.
-  const Value *getCalledValue() const { return Op<-InstTy::ArgOffset>(); }
-  Value *getCalledValue() { return Op<-InstTy::ArgOffset>(); }
-
-  /// Set the function called.
-  void setCalledFunction(Value* Fn) {
-    setCalledFunction(
-        cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
-        Fn);
-  }
-  void setCalledFunction(FunctionType *FTy, Value *Fn) {
-    this->FTy = FTy;
-    assert(FTy == cast<FunctionType>(
-                      cast<PointerType>(Fn->getType())->getElementType()));
-    Op<-InstTy::ArgOffset>() = Fn;
-  }
-
-protected:
-  template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
-    if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
-      return true;
-
-    // Operand bundles override attributes on the called function, but don't
-    // override attributes directly present on the call instruction.
-    if (isFnAttrDisallowedByOpBundle(Kind))
-      return false;
-
-    if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasAttribute(AttributeList::FunctionIndex,
-                                             Kind);
-    return false;
-  }
-};
-
 //===----------------------------------------------------------------------===//
 /// This class represents a function call, abstracting a target
 /// machine's calling convention.  This class uses low bit of the SubClassData
 /// field to indicate whether or not this is a tail call.  The rest of the bits
 /// hold the calling convention of the call.
 ///
-class CallInst : public CallBase<CallInst> {
-  friend class OperandBundleUser<CallInst, User::op_iterator>;
-
+class CallInst : public CallBase {
   CallInst(const CallInst &CI);
 
   /// Construct a CallInst given a range of arguments.
@@ -1889,36 +1444,32 @@ class CallInst : public CallBase<CallInst> {
                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                   Instruction *InsertBefore);
 
-  inline CallInst(Value *Func, ArrayRef<Value *> Args,
-                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
-                  Instruction *InsertBefore)
-      : CallInst(cast<FunctionType>(
-                     cast<PointerType>(Func->getType())->getElementType()),
-                 Func, Args, Bundles, NameStr, InsertBefore) {}
-
-  inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
-                  Instruction *InsertBefore)
-      : CallInst(Func, Args, None, NameStr, InsertBefore) {}
+  inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+                  const Twine &NameStr, Instruction *InsertBefore)
+      : CallInst(Ty, Func, Args, None, NameStr, InsertBefore) {}
 
   /// Construct a CallInst given a range of arguments.
   /// Construct a CallInst from a range of arguments
-  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+  inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                   BasicBlock *InsertAtEnd);
 
-  explicit CallInst(Value *F, const Twine &NameStr, Instruction *InsertBefore);
+  explicit CallInst(FunctionType *Ty, Value *F, const Twine &NameStr,
+                    Instruction *InsertBefore);
 
-  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+  CallInst(FunctionType *ty, Value *F, const Twine &NameStr,
+           BasicBlock *InsertAtEnd);
 
-  void init(Value *Func, ArrayRef<Value *> Args,
-            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
-    init(cast<FunctionType>(
-             cast<PointerType>(Func->getType())->getElementType()),
-         Func, Args, Bundles, NameStr);
-  }
   void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
             ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
-  void init(Value *Func, const Twine &NameStr);
+  void init(FunctionType *FTy, Value *Func, const Twine &NameStr);
+
+  /// Compute the number of operands to allocate.
+  static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) {
+    // We need one operand for the called function, plus the input operand
+    // counts provided.
+    return 1 + NumArgs + NumBundleInputs;
+  }
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
@@ -1927,29 +1478,15 @@ protected:
   CallInst *cloneImpl() const;
 
 public:
-  static constexpr int ArgOffset = 1;
-
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
-                          ArrayRef<OperandBundleDef> Bundles = None,
-                          const Twine &NameStr = "",
+  static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
-    return Create(cast<FunctionType>(
-                      cast<PointerType>(Func->getType())->getElementType()),
-                  Func, Args, Bundles, NameStr, InsertBefore);
-  }
-
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
-                          const Twine &NameStr,
-                          Instruction *InsertBefore = nullptr) {
-    return Create(cast<FunctionType>(
-                      cast<PointerType>(Func->getType())->getElementType()),
-                  Func, Args, None, NameStr, InsertBefore);
+    return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertBefore);
   }
 
   static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                           const Twine &NameStr,
                           Instruction *InsertBefore = nullptr) {
-    return new (unsigned(Args.size() + 1))
+    return new (ComputeNumOperands(Args.size()))
         CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
   }
 
@@ -1957,39 +1494,107 @@ public:
                           ArrayRef<OperandBundleDef> Bundles = None,
                           const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
-    const unsigned TotalOps =
-        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+    const int NumOperands =
+        ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
     const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
 
-    return new (TotalOps, DescriptorBytes)
+    return new (NumOperands, DescriptorBytes)
         CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
   }
 
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+  static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd) {
+    return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertAtEnd);
+  }
+
+  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new (ComputeNumOperands(Args.size()))
+        CallInst(Ty, Func, Args, None, NameStr, InsertAtEnd);
+  }
+
+  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                           ArrayRef<OperandBundleDef> Bundles,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    const unsigned TotalOps =
-        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+    const int NumOperands =
+        ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
     const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
 
-    return new (TotalOps, DescriptorBytes)
-        CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
+    return new (NumOperands, DescriptorBytes)
+        CallInst(Ty, Func, Args, Bundles, NameStr, InsertAtEnd);
   }
 
-  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+  static CallInst *Create(Function *Func, const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    return Create(Func->getFunctionType(), Func, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    return Create(Func->getFunctionType(), Func, Args, NameStr, InsertBefore);
+  }
+
+  static CallInst *Create(Function *Func, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd) {
+    return Create(Func->getFunctionType(), Func, NameStr, InsertAtEnd);
+  }
+
+  static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return new (unsigned(Args.size() + 1))
-        CallInst(Func, Args, None, NameStr, InsertAtEnd);
+    return Create(Func->getFunctionType(), Func, Args, NameStr, InsertAtEnd);
   }
 
-  static CallInst *Create(Value *F, const Twine &NameStr = "",
+  // Deprecated [opaque pointer types]
+  static CallInst *Create(Value *Func, const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, NameStr, InsertBefore);
+  }
+
+  // Deprecated [opaque pointer types]
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr,
                           Instruction *InsertBefore = nullptr) {
-    return new (1) CallInst(F, NameStr, InsertBefore);
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, NameStr, InsertBefore);
   }
 
-  static CallInst *Create(Value *F, const Twine &NameStr,
+  // Deprecated [opaque pointer types]
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles = None,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, Bundles, NameStr, InsertBefore);
+  }
+
+  // Deprecated [opaque pointer types]
+  static CallInst *Create(Value *Func, const Twine &NameStr,
                           BasicBlock *InsertAtEnd) {
-    return new (1) CallInst(F, NameStr, InsertAtEnd);
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, NameStr, InsertAtEnd);
+  }
+
+  // Deprecated [opaque pointer types]
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, NameStr, InsertAtEnd);
+  }
+
+  // Deprecated [opaque pointer types]
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, Bundles, NameStr, InsertAtEnd);
   }
 
   /// Create a clone of \p CI with a different set of operand bundles and
@@ -2080,7 +1685,7 @@ public:
   }
 
   /// Check if this call is an inline asm statement.
-  bool isInlineAsm() const { return isa<InlineAsm>(Op<-1>()); }
+  bool isInlineAsm() const { return isa<InlineAsm>(getCalledOperand()); }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
@@ -2098,32 +1703,25 @@ private:
   }
 };
 
-template <>
-struct OperandTraits<CallBase<CallInst>>
-    : public VariadicOperandTraits<CallBase<CallInst>, 1> {};
-
-CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
+CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                    ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                    BasicBlock *InsertAtEnd)
-    : CallBase<CallInst>(
-          cast<FunctionType>(
-              cast<PointerType>(Func->getType())->getElementType())
-              ->getReturnType(),
-          Instruction::Call,
-          OperandTraits<CallBase<CallInst>>::op_end(this) -
-              (Args.size() + CountBundleInputs(Bundles) + 1),
-          unsigned(Args.size() + CountBundleInputs(Bundles) + 1), InsertAtEnd) {
-  init(Func, Args, Bundles, NameStr);
+    : CallBase(Ty->getReturnType(), Instruction::Call,
+               OperandTraits<CallBase>::op_end(this) -
+                   (Args.size() + CountBundleInputs(Bundles) + 1),
+               unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+               InsertAtEnd) {
+  init(Ty, Func, Args, Bundles, NameStr);
 }
 
 CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                    ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                    Instruction *InsertBefore)
-    : CallBase<CallInst>(Ty->getReturnType(), Instruction::Call,
-                         OperandTraits<CallBase<CallInst>>::op_end(this) -
-                             (Args.size() + CountBundleInputs(Bundles) + 1),
-                         unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
-                         InsertBefore) {
+    : CallBase(Ty->getReturnType(), Instruction::Call,
+               OperandTraits<CallBase>::op_end(this) -
+                   (Args.size() + CountBundleInputs(Bundles) + 1),
+               unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+               InsertBefore) {
   init(Ty, Func, Args, Bundles, NameStr);
 }
 
@@ -2456,14 +2054,24 @@ public:
   }
 
   /// Return true if this shuffle returns a vector with a different number of
-  /// elements than its source elements.
-  /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2>
+  /// elements than its source vectors.
+  /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3>
+  ///           shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5>
   bool changesLength() const {
     unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
     unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
     return NumSourceElts != NumMaskElts;
   }
 
+  /// Return true if this shuffle returns a vector with a greater number of
+  /// elements than its source vectors.
+  /// Example: shufflevector <2 x n> A, <2 x n> B, <1,2,3>
+  bool increasesLength() const {
+    unsigned NumSourceElts = Op<0>()->getType()->getVectorNumElements();
+    unsigned NumMaskElts = getMask()->getType()->getVectorNumElements();
+    return NumSourceElts < NumMaskElts;
+  }
+
   /// Return true if this shuffle mask chooses elements from exactly one source
   /// vector.
   /// Example: <7,5,undef,7>
@@ -2497,15 +2105,27 @@ public:
     return isIdentityMask(MaskAsInts);
   }
 
-  /// Return true if this shuffle mask chooses elements from exactly one source
+  /// Return true if this shuffle chooses elements from exactly one source
   /// vector without lane crossings and does not change the number of elements
   /// from its input vectors.
   /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
-  /// TODO: Optionally allow length-changing shuffles.
   bool isIdentity() const {
     return !changesLength() && isIdentityMask(getShuffleMask());
   }
 
+  /// Return true if this shuffle lengthens exactly one source vector with
+  /// undefs in the high elements.
+  bool isIdentityWithPadding() const;
+
+  /// Return true if this shuffle extracts the first N elements of exactly one
+  /// source vector.
+  bool isIdentityWithExtract() const;
+
+  /// Return true if this shuffle concatenates its 2 source vectors. This
+  /// returns false if either input is undefined. In that case, the shuffle is
+  /// is better classified as an identity with padding operation.
+  bool isConcat() const;
+
   /// Return true if this shuffle mask chooses elements from its source vectors
   /// without lane crossings. A shuffle using this mask would be
   /// equivalent to a vector select with a constant condition operand.
@@ -2625,6 +2245,25 @@ public:
     return !changesLength() && isTransposeMask(getMask());
   }
 
+  /// Return true if this shuffle mask is an extract subvector mask.
+  /// A valid extract subvector mask returns a smaller vector from a single
+  /// source operand. The base extraction index is returned as well.
+  static bool isExtractSubvectorMask(ArrayRef<int> Mask, int NumSrcElts,
+                                     int &Index);
+  static bool isExtractSubvectorMask(const Constant *Mask, int NumSrcElts,
+                                     int &Index) {
+    assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+    SmallVector<int, 16> MaskAsInts;
+    getShuffleMask(Mask, MaskAsInts);
+    return isExtractSubvectorMask(MaskAsInts, NumSrcElts, Index);
+  }
+
+  /// Return true if this shuffle mask is an extract subvector mask.
+  bool isExtractSubvectorMask(int &Index) const {
+    int NumSrcElts = Op<0>()->getType()->getVectorNumElements();
+    return isExtractSubvectorMask(getMask(), NumSrcElts, Index);
+  }
+
   /// Change values in a shuffle permute mask assuming the two vector operands
   /// of length InVecNumElts have swapped position.
   static void commuteShuffleMask(MutableArrayRef<int> Mask,
@@ -3241,7 +2880,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)
 /// Return a value (possibly void), from a function.  Execution
 /// does not continue in this function any longer.
 ///
-class ReturnInst : public TerminatorInst {
+class ReturnInst : public Instruction {
   ReturnInst(const ReturnInst &RI);
 
 private:
@@ -3301,8 +2940,6 @@ public:
   }
 
 private:
-  friend TerminatorInst;
-
   BasicBlock *getSuccessor(unsigned idx) const {
     llvm_unreachable("ReturnInst has no successors!");
   }
@@ -3325,7 +2962,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
 //===---------------------------------------------------------------------------
 /// Conditional or Unconditional Branch instruction.
 ///
-class BranchInst : public TerminatorInst {
+class BranchInst : public Instruction {
   /// Ops list - Branches are strange.  The operands are ordered:
   ///  [Cond, FalseDest,] TrueDest.  This makes some accessors faster because
   /// they don't have to check for cond/uncond branchness. These are mostly
@@ -3354,6 +2991,33 @@ protected:
   BranchInst *cloneImpl() const;
 
 public:
+  /// Iterator type that casts an operand to a basic block.
+  ///
+  /// This only makes sense because the successors are stored as adjacent
+  /// operands for branch instructions.
+  struct succ_op_iterator
+      : iterator_adaptor_base<succ_op_iterator, value_op_iterator,
+                              std::random_access_iterator_tag, BasicBlock *,
+                              ptrdiff_t, BasicBlock *, BasicBlock *> {
+    explicit succ_op_iterator(value_op_iterator I) : iterator_adaptor_base(I) {}
+
+    BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+    BasicBlock *operator->() const { return operator*(); }
+  };
+
+  /// The const version of `succ_op_iterator`.
+  struct const_succ_op_iterator
+      : iterator_adaptor_base<const_succ_op_iterator, const_value_op_iterator,
+                              std::random_access_iterator_tag,
+                              const BasicBlock *, ptrdiff_t, const BasicBlock *,
+                              const BasicBlock *> {
+    explicit const_succ_op_iterator(const_value_op_iterator I)
+        : iterator_adaptor_base(I) {}
+
+    const BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+    const BasicBlock *operator->() const { return operator*(); }
+  };
+
   static BranchInst *Create(BasicBlock *IfTrue,
                             Instruction *InsertBefore = nullptr) {
     return new(1) BranchInst(IfTrue, InsertBefore);
@@ -3408,6 +3072,18 @@ public:
   /// continues to map correctly to each operand.
   void swapSuccessors();
 
+  iterator_range<succ_op_iterator> successors() {
+    return make_range(
+        succ_op_iterator(std::next(value_op_begin(), isConditional() ? 1 : 0)),
+        succ_op_iterator(value_op_end()));
+  }
+
+  iterator_range<const_succ_op_iterator> successors() const {
+    return make_range(const_succ_op_iterator(
+                          std::next(value_op_begin(), isConditional() ? 1 : 0)),
+                      const_succ_op_iterator(value_op_end()));
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::Br);
@@ -3430,7 +3106,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
 //===---------------------------------------------------------------------------
 /// Multiway switch
 ///
-class SwitchInst : public TerminatorInst {
+class SwitchInst : public Instruction {
   unsigned ReservedSpace;
 
   // Operand[0]    = Value to switch on
@@ -3513,7 +3189,7 @@ public:
     /// Returns number of current case.
     unsigned getCaseIndex() const { return Index; }
 
-    /// Returns TerminatorInst's successor index for current case successor.
+    /// Returns successor index for current case successor.
     unsigned getSuccessorIndex() const {
       assert(((unsigned)Index == DefaultPseudoIndex ||
               (unsigned)Index < SI->getNumCases()) &&
@@ -3569,7 +3245,7 @@ public:
     CaseIteratorImpl(SwitchInstT *SI, unsigned CaseNum) : Case(SI, CaseNum) {}
 
     /// Initializes case iterator for given SwitchInst and for given
-    /// TerminatorInst's successor index.
+    /// successor index.
     static CaseIteratorImpl fromSuccessorIndex(SwitchInstT *SI,
                                                unsigned SuccessorIndex) {
       assert(SuccessorIndex < SI->getNumSuccessors() &&
@@ -3787,7 +3463,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
 //===---------------------------------------------------------------------------
 /// Indirect Branch Instruction.
 ///
-class IndirectBrInst : public TerminatorInst {
+class IndirectBrInst : public Instruction {
   unsigned ReservedSpace;
 
   // Operand[0]   = Address to jump to
@@ -3821,6 +3497,33 @@ protected:
   IndirectBrInst *cloneImpl() const;
 
 public:
+  /// Iterator type that casts an operand to a basic block.
+  ///
+  /// This only makes sense because the successors are stored as adjacent
+  /// operands for indirectbr instructions.
+  struct succ_op_iterator
+      : iterator_adaptor_base<succ_op_iterator, value_op_iterator,
+                              std::random_access_iterator_tag, BasicBlock *,
+                              ptrdiff_t, BasicBlock *, BasicBlock *> {
+    explicit succ_op_iterator(value_op_iterator I) : iterator_adaptor_base(I) {}
+
+    BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+    BasicBlock *operator->() const { return operator*(); }
+  };
+
+  /// The const version of `succ_op_iterator`.
+  struct const_succ_op_iterator
+      : iterator_adaptor_base<const_succ_op_iterator, const_value_op_iterator,
+                              std::random_access_iterator_tag,
+                              const BasicBlock *, ptrdiff_t, const BasicBlock *,
+                              const BasicBlock *> {
+    explicit const_succ_op_iterator(const_value_op_iterator I)
+        : iterator_adaptor_base(I) {}
+
+    const BasicBlock *operator*() const { return cast<BasicBlock>(*I); }
+    const BasicBlock *operator->() const { return operator*(); }
+  };
+
   static IndirectBrInst *Create(Value *Address, unsigned NumDests,
                                 Instruction *InsertBefore = nullptr) {
     return new IndirectBrInst(Address, NumDests, InsertBefore);
@@ -3863,6 +3566,16 @@ public:
     setOperand(i + 1, NewSucc);
   }
 
+  iterator_range<succ_op_iterator> successors() {
+    return make_range(succ_op_iterator(std::next(value_op_begin())),
+                      succ_op_iterator(value_op_end()));
+  }
+
+  iterator_range<const_succ_op_iterator> successors() const {
+    return make_range(const_succ_op_iterator(std::next(value_op_begin())),
+                      const_succ_op_iterator(value_op_end()));
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::IndirectBr;
@@ -3885,48 +3598,43 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 /// Invoke instruction.  The SubclassData field is used to hold the
 /// calling convention of the call.
 ///
-class InvokeInst : public CallBase<InvokeInst> {
-  friend class OperandBundleUser<InvokeInst, User::op_iterator>;
+class InvokeInst : public CallBase {
+  /// The number of operands for this call beyond the called function,
+  /// arguments, and operand bundles.
+  static constexpr int NumExtraOperands = 2;
+
+  /// The index from the end of the operand array to the normal destination.
+  static constexpr int NormalDestOpEndIdx = -3;
+
+  /// The index from the end of the operand array to the unwind destination.
+  static constexpr int UnwindDestOpEndIdx = -2;
 
   InvokeInst(const InvokeInst &BI);
 
   /// Construct an InvokeInst given a range of arguments.
   ///
   /// Construct an InvokeInst from a range of arguments
-  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
-                    unsigned Values, const Twine &NameStr,
-                    Instruction *InsertBefore)
-      : InvokeInst(cast<FunctionType>(
-                       cast<PointerType>(Func->getType())->getElementType()),
-                   Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
-                   InsertBefore) {}
-
   inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                     BasicBlock *IfException, ArrayRef<Value *> Args,
-                    ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+                    ArrayRef<OperandBundleDef> Bundles, int NumOperands,
                     const Twine &NameStr, Instruction *InsertBefore);
-  /// Construct an InvokeInst given a range of arguments.
-  ///
-  /// Construct an InvokeInst from a range of arguments
-  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
-                    unsigned Values, const Twine &NameStr,
-                    BasicBlock *InsertAtEnd);
-
 
-  void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-            ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
-            const Twine &NameStr) {
-    init(cast<FunctionType>(
-             cast<PointerType>(Func->getType())->getElementType()),
-         Func, IfNormal, IfException, Args, Bundles, NameStr);
-  }
+  inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+                    BasicBlock *IfException, ArrayRef<Value *> Args,
+                    ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
 
-  void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal,
+  void init(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
             BasicBlock *IfException, ArrayRef<Value *> Args,
             ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
 
+  /// Compute the number of operands to allocate.
+  static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) {
+    // We need one operand for the called function, plus our extra operands and
+    // the input operand counts provided.
+    return 1 + NumExtraOperands + NumArgs + NumBundleInputs;
+  }
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -3934,69 +3642,125 @@ protected:
   InvokeInst *cloneImpl() const;
 
 public:
-  static constexpr int ArgOffset = 3;
-  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             const Twine &NameStr,
                             Instruction *InsertBefore = nullptr) {
-    return Create(cast<FunctionType>(
-                      cast<PointerType>(Func->getType())->getElementType()),
-                  Func, IfNormal, IfException, Args, None, NameStr,
-                  InsertBefore);
+    int NumOperands = ComputeNumOperands(Args.size());
+    return new (NumOperands)
+        InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands,
+                   NameStr, InsertBefore);
   }
 
-  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             ArrayRef<OperandBundleDef> Bundles = None,
                             const Twine &NameStr = "",
                             Instruction *InsertBefore = nullptr) {
-    return Create(cast<FunctionType>(
-                      cast<PointerType>(Func->getType())->getElementType()),
-                  Func, IfNormal, IfException, Args, Bundles, NameStr,
-                  InsertBefore);
+    int NumOperands =
+        ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
+    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (NumOperands, DescriptorBytes)
+        InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands,
+                   NameStr, InsertBefore);
+  }
+
+  static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    int NumOperands = ComputeNumOperands(Args.size());
+    return new (NumOperands)
+        InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands,
+                   NameStr, InsertAtEnd);
   }
 
   static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    int NumOperands =
+        ComputeNumOperands(Args.size(), CountBundleInputs(Bundles));
+    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (NumOperands, DescriptorBytes)
+        InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands,
+                   NameStr, InsertAtEnd);
+  }
+
+  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
                             const Twine &NameStr,
                             Instruction *InsertBefore = nullptr) {
-    unsigned Values = unsigned(Args.size()) + 3;
-    return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, None,
-                                   Values, NameStr, InsertBefore);
+    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+                  None, NameStr, InsertBefore);
   }
 
-  static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             ArrayRef<OperandBundleDef> Bundles = None,
                             const Twine &NameStr = "",
                             Instruction *InsertBefore = nullptr) {
-    unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
-    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+                  Bundles, NameStr, InsertBefore);
+  }
 
-    return new (Values, DescriptorBytes)
-        InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, Values,
-                   NameStr, InsertBefore);
+  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+                  NameStr, InsertAtEnd);
   }
 
-  static InvokeInst *Create(Value *Func,
-                            BasicBlock *IfNormal, BasicBlock *IfException,
-                            ArrayRef<Value *> Args, const Twine &NameStr,
-                            BasicBlock *InsertAtEnd) {
-    unsigned Values = unsigned(Args.size()) + 3;
-    return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None,
-                                   Values, NameStr, InsertAtEnd);
+  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
+                  Bundles, NameStr, InsertAtEnd);
   }
 
+  // Deprecated [opaque pointer types]
+  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            const Twine &NameStr,
+                            Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, IfNormal, IfException, Args, None, NameStr,
+                  InsertBefore);
+  }
+
+  // Deprecated [opaque pointer types]
+  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles = None,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, IfNormal, IfException, Args, Bundles, NameStr,
+                  InsertBefore);
+  }
+
+  // Deprecated [opaque pointer types]
   static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
-                            ArrayRef<OperandBundleDef> Bundles,
                             const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
-    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, IfNormal, IfException, Args, NameStr, InsertAtEnd);
+  }
 
-    return new (Values, DescriptorBytes)
-        InvokeInst(Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
-                   InsertAtEnd);
+  // Deprecated [opaque pointer types]
+  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, IfNormal, IfException, Args, Bundles, NameStr,
+                  InsertAtEnd);
   }
 
   /// Create a clone of \p II with a different set of operand bundles and
@@ -4017,43 +3781,18 @@ public:
     addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
   }
 
-  /// Return the function called, or null if this is an
-  /// indirect function invocation.
-  ///
-  Function *getCalledFunction() const {
-    return dyn_cast<Function>(Op<-3>());
-  }
-
-  /// Get a pointer to the function that is invoked by this
-  /// instruction
-  const Value *getCalledValue() const { return Op<-3>(); }
-        Value *getCalledValue()       { return Op<-3>(); }
-
-  /// Set the function called.
-  void setCalledFunction(Value* Fn) {
-    setCalledFunction(
-        cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
-        Fn);
-  }
-  void setCalledFunction(FunctionType *FTy, Value *Fn) {
-    this->FTy = FTy;
-    assert(FTy == cast<FunctionType>(
-                      cast<PointerType>(Fn->getType())->getElementType()));
-    Op<-3>() = Fn;
-  }
-
   // get*Dest - Return the destination basic blocks...
   BasicBlock *getNormalDest() const {
-    return cast<BasicBlock>(Op<-2>());
+    return cast<BasicBlock>(Op<NormalDestOpEndIdx>());
   }
   BasicBlock *getUnwindDest() const {
-    return cast<BasicBlock>(Op<-1>());
+    return cast<BasicBlock>(Op<UnwindDestOpEndIdx>());
   }
   void setNormalDest(BasicBlock *B) {
-    Op<-2>() = reinterpret_cast<Value*>(B);
+    Op<NormalDestOpEndIdx>() = reinterpret_cast<Value *>(B);
   }
   void setUnwindDest(BasicBlock *B) {
-    Op<-1>() = reinterpret_cast<Value*>(B);
+    Op<UnwindDestOpEndIdx>() = reinterpret_cast<Value *>(B);
   }
 
   /// Get the landingpad instruction from the landing pad
@@ -4065,9 +3804,12 @@ public:
     return i == 0 ? getNormalDest() : getUnwindDest();
   }
 
-  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
-    assert(idx < 2 && "Successor # out of range for invoke!");
-    *(&Op<-2>() + idx) = reinterpret_cast<Value*>(NewSucc);
+  void setSuccessor(unsigned i, BasicBlock *NewSucc) {
+    assert(i < 2 && "Successor # out of range for invoke!");
+    if (i == 0)
+      setNormalDest(NewSucc);
+    else
+      setUnwindDest(NewSucc);
   }
 
   unsigned getNumSuccessors() const { return 2; }
@@ -4089,36 +3831,26 @@ private:
   }
 };
 
-template <>
-struct OperandTraits<CallBase<InvokeInst>>
-    : public VariadicOperandTraits<CallBase<InvokeInst>, 3> {};
-
 InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                        BasicBlock *IfException, ArrayRef<Value *> Args,
-                       ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+                       ArrayRef<OperandBundleDef> Bundles, int NumOperands,
                        const Twine &NameStr, Instruction *InsertBefore)
-    : CallBase<InvokeInst>(Ty->getReturnType(), Instruction::Invoke,
-                           OperandTraits<CallBase<InvokeInst>>::op_end(this) -
-                               Values,
-                           Values, InsertBefore) {
+    : CallBase(Ty->getReturnType(), Instruction::Invoke,
+               OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+               InsertBefore) {
   init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
 
-InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
+InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                        BasicBlock *IfException, ArrayRef<Value *> Args,
-                       ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+                       ArrayRef<OperandBundleDef> Bundles, int NumOperands,
                        const Twine &NameStr, BasicBlock *InsertAtEnd)
-    : CallBase<InvokeInst>(
-          cast<FunctionType>(
-              cast<PointerType>(Func->getType())->getElementType())
-              ->getReturnType(),
-          Instruction::Invoke,
-          OperandTraits<CallBase<InvokeInst>>::op_end(this) - Values, Values,
-          InsertAtEnd) {
-  init(Func, IfNormal, IfException, Args, Bundles, NameStr);
+    : CallBase(Ty->getReturnType(), Instruction::Invoke,
+               OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+               InsertAtEnd) {
+  init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
 
-
 //===----------------------------------------------------------------------===//
 //                              ResumeInst Class
 //===----------------------------------------------------------------------===//
@@ -4126,7 +3858,7 @@ InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
 //===---------------------------------------------------------------------------
 /// Resume the propagation of an exception.
 ///
-class ResumeInst : public TerminatorInst {
+class ResumeInst : public Instruction {
   ResumeInst(const ResumeInst &RI);
 
   explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr);
@@ -4164,8 +3896,6 @@ public:
   }
 
 private:
-  friend TerminatorInst;
-
   BasicBlock *getSuccessor(unsigned idx) const {
     llvm_unreachable("ResumeInst has no successors!");
   }
@@ -4185,7 +3915,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
 //===----------------------------------------------------------------------===//
 //                         CatchSwitchInst Class
 //===----------------------------------------------------------------------===//
-class CatchSwitchInst : public TerminatorInst {
+class CatchSwitchInst : public Instruction {
   /// The number of operands actually allocated.  NumOperands is
   /// the number actually in use.
   unsigned ReservedSpace;
@@ -4451,7 +4181,7 @@ public:
 //                               CatchReturnInst Class
 //===----------------------------------------------------------------------===//
 
-class CatchReturnInst : public TerminatorInst {
+class CatchReturnInst : public Instruction {
   CatchReturnInst(const CatchReturnInst &RI);
   CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore);
   CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd);
@@ -4511,8 +4241,6 @@ public:
   }
 
 private:
-  friend TerminatorInst;
-
   BasicBlock *getSuccessor(unsigned Idx) const {
     assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
     return getSuccessor();
@@ -4534,7 +4262,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value)
 //                               CleanupReturnInst Class
 //===----------------------------------------------------------------------===//
 
-class CleanupReturnInst : public TerminatorInst {
+class CleanupReturnInst : public Instruction {
 private:
   CleanupReturnInst(const CleanupReturnInst &RI);
   CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
@@ -4607,8 +4335,6 @@ public:
   }
 
 private:
-  friend TerminatorInst;
-
   BasicBlock *getSuccessor(unsigned Idx) const {
     assert(Idx == 0);
     return getUnwindDest();
@@ -4641,7 +4367,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value)
 /// presence of this instruction indicates some higher level knowledge that the
 /// end of the block cannot be reached.
 ///
-class UnreachableInst : public TerminatorInst {
+class UnreachableInst : public Instruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -4668,8 +4394,6 @@ public:
   }
 
 private:
-  friend TerminatorInst;
-
   BasicBlock *getSuccessor(unsigned idx) const {
     llvm_unreachable("UnreachableInst has no successors!");
   }
@@ -5248,6 +4972,25 @@ inline Value *getPointerOperand(Value *V) {
   return nullptr;
 }
 
+/// A helper function that returns the alignment of load or store instruction.
+inline unsigned getLoadStoreAlignment(Value *I) {
+  assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+         "Expected Load or Store instruction");
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    return LI->getAlignment();
+  return cast<StoreInst>(I)->getAlignment();
+}
+
+/// A helper function that returns the address space of the pointer operand of
+/// load or store instruction.
+inline unsigned getLoadStoreAddressSpace(Value *I) {
+  assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+         "Expected Load or Store instruction");
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerAddressSpace();
+  return cast<StoreInst>(I)->getPointerAddressSpace();
+}
+
 } // end namespace llvm
 
 #endif // LLVM_IR_INSTRUCTIONS_H
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
index 6650afcca7fb..80a7a7052574 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
@@ -66,6 +66,27 @@ namespace llvm {
   /// This is the common base class for debug info intrinsics.
   class DbgInfoIntrinsic : public IntrinsicInst {
   public:
+    /// \name Casting methods
+    /// @{
+    static bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::dbg_declare:
+      case Intrinsic::dbg_value:
+      case Intrinsic::dbg_addr:
+      case Intrinsic::dbg_label:
+        return true;
+      default: return false;
+      }
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+    /// @}
+  };
+
+  /// This is the common base class for debug info intrinsics for variables.
+  class DbgVariableIntrinsic : public DbgInfoIntrinsic {
+  public:
     /// Get the location corresponding to the variable referenced by the debug
     /// info intrinsic.  Depending on the intrinsic, this could be the
     /// variable's value or its address.
@@ -104,7 +125,6 @@ namespace llvm {
       case Intrinsic::dbg_declare:
       case Intrinsic::dbg_value:
       case Intrinsic::dbg_addr:
-      case Intrinsic::dbg_label:
         return true;
       default: return false;
       }
@@ -116,7 +136,7 @@ namespace llvm {
   };
 
   /// This represents the llvm.dbg.declare instruction.
-  class DbgDeclareInst : public DbgInfoIntrinsic {
+  class DbgDeclareInst : public DbgVariableIntrinsic {
   public:
     Value *getAddress() const { return getVariableLocation(); }
 
@@ -132,7 +152,7 @@ namespace llvm {
   };
 
   /// This represents the llvm.dbg.addr instruction.
-  class DbgAddrIntrinsic : public DbgInfoIntrinsic {
+  class DbgAddrIntrinsic : public DbgVariableIntrinsic {
   public:
     Value *getAddress() const { return getVariableLocation(); }
 
@@ -147,7 +167,7 @@ namespace llvm {
   };
 
   /// This represents the llvm.dbg.value instruction.
-  class DbgValueInst : public DbgInfoIntrinsic {
+  class DbgValueInst : public DbgVariableIntrinsic {
   public:
     Value *getValue() const {
       return getVariableLocation(/* AllowNullOp = */ false);
@@ -168,17 +188,13 @@ namespace llvm {
   class DbgLabelInst : public DbgInfoIntrinsic {
   public:
     DILabel *getLabel() const {
-      return cast<DILabel>(getRawVariable());
+      return cast<DILabel>(getRawLabel());
     }
 
-    Metadata *getRawVariable() const {
+    Metadata *getRawLabel() const {
       return cast<MetadataAsValue>(getArgOperand(0))->getMetadata();
     }
 
-    Metadata *getRawExpression() const {
-      return nullptr;
-    }
-
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     /// @{
     static bool classof(const IntrinsicInst *I) {
@@ -235,6 +251,12 @@ namespace llvm {
       case Intrinsic::experimental_constrained_log2:
       case Intrinsic::experimental_constrained_rint:
       case Intrinsic::experimental_constrained_nearbyint:
+      case Intrinsic::experimental_constrained_maxnum:
+      case Intrinsic::experimental_constrained_minnum:
+      case Intrinsic::experimental_constrained_ceil:
+      case Intrinsic::experimental_constrained_floor:
+      case Intrinsic::experimental_constrained_round:
+      case Intrinsic::experimental_constrained_trunc:
         return true;
       default: return false;
       }
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td
index 0cec754dd649..64603d8ea030 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.td
@@ -90,6 +90,10 @@ class ReadNone<int argNo> : IntrinsicProperty {
 
 def IntrNoReturn : IntrinsicProperty;
 
+// IntrCold - Calls to this intrinsic are cold.
+// Parallels the cold attribute on LLVM IR functions.
+def IntrCold : IntrinsicProperty;
+
 // IntrNoduplicate - Calls to this intrinsic cannot be duplicated.
 // Parallels the noduplicate attribute on LLVM IR functions.
 def IntrNoDuplicate : IntrinsicProperty;
@@ -315,11 +319,84 @@ def int_gcwrite : Intrinsic<[],
                             [llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
                             [IntrArgMemOnly, NoCapture<1>, NoCapture<2>]>;
 
+//===------------------- ObjC ARC runtime Intrinsics --------------------===//
+//
+// Note these are to support the Objective-C ARC optimizer which wants to
+// eliminate retain and releases where possible.
+
+def int_objc_autorelease                    : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_autoreleasePoolPop             : Intrinsic<[], [llvm_ptr_ty]>;
+def int_objc_autoreleasePoolPush            : Intrinsic<[llvm_ptr_ty], []>;
+def int_objc_autoreleaseReturnValue         : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_copyWeak                       : Intrinsic<[],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptrptr_ty]>;
+def int_objc_destroyWeak                    : Intrinsic<[], [llvm_ptrptr_ty]>;
+def int_objc_initWeak                       : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptr_ty]>;
+def int_objc_loadWeak                       : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptrptr_ty]>;
+def int_objc_loadWeakRetained               : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptrptr_ty]>;
+def int_objc_moveWeak                       : Intrinsic<[],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptrptr_ty]>;
+def int_objc_release                        : Intrinsic<[], [llvm_ptr_ty]>;
+def int_objc_retain                         : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_retainAutorelease              : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_retainAutoreleaseReturnValue   : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_retainAutoreleasedReturnValue  : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_retainBlock                    : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_storeStrong                    : Intrinsic<[],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptr_ty]>;
+def int_objc_storeWeak                      : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptr_ty]>;
+def int_objc_clang_arc_use                  : Intrinsic<[],
+                                                        [llvm_vararg_ty]>;
+def int_objc_unsafeClaimAutoreleasedReturnValue : Intrinsic<[llvm_ptr_ty],
+                                                            [llvm_ptr_ty]>;
+def int_objc_retainedObject                 : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_unretainedObject               : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_unretainedPointer              : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_retain_autorelease             : Intrinsic<[llvm_ptr_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_sync_enter                     : Intrinsic<[llvm_i32_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_sync_exit                      : Intrinsic<[llvm_i32_ty],
+                                                        [llvm_ptr_ty]>;
+def int_objc_arc_annotation_topdown_bbstart : Intrinsic<[],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptrptr_ty]>;
+def int_objc_arc_annotation_topdown_bbend   : Intrinsic<[],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptrptr_ty]>;
+def int_objc_arc_annotation_bottomup_bbstart  : Intrinsic<[],
+                                                          [llvm_ptrptr_ty,
+                                                           llvm_ptrptr_ty]>;
+def int_objc_arc_annotation_bottomup_bbend  : Intrinsic<[],
+                                                        [llvm_ptrptr_ty,
+                                                         llvm_ptrptr_ty]>;
+
+
 //===--------------------- Code Generator Intrinsics ----------------------===//
 //
 def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
 def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_sponentry  : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_read_register  : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
                                    [IntrReadMem], "llvm.read_register">;
 def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty],
@@ -337,6 +414,13 @@ def int_localescape : Intrinsic<[], [llvm_vararg_ty]>;
 def int_localrecover : Intrinsic<[llvm_ptr_ty],
                                  [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
                                  [IntrNoMem]>;
+
+// Given the frame pointer passed into an SEH filter function, returns a
+// pointer to the local variable area suitable for use with llvm.localrecover.
+def int_eh_recoverfp : Intrinsic<[llvm_ptr_ty],
+                                 [llvm_ptr_ty, llvm_ptr_ty],
+                                 [IntrNoMem]>;
+
 // Note: we treat stacksave/stackrestore as writemem because we don't otherwise
 // model their dependencies on allocas.
 def int_stacksave     : Intrinsic<[llvm_ptr_ty]>,
@@ -453,6 +537,14 @@ def int_maxnum : Intrinsic<[llvm_anyfloat_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>],
   [IntrNoMem, IntrSpeculatable, Commutative]
 >;
+def int_minimum : Intrinsic<[llvm_anyfloat_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, Commutative]
+>;
+def int_maximum : Intrinsic<[llvm_anyfloat_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, Commutative]
+>;
 
 // NOTE: these are internal interfaces.
 def int_setjmp     : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
@@ -557,9 +649,35 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
                                                          [ LLVMMatchType<0>,
                                                            llvm_metadata_ty,
                                                            llvm_metadata_ty ]>;
+  def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ],
+                                                      [ LLVMMatchType<0>,
+                                                        LLVMMatchType<0>,
+                                                        llvm_metadata_ty,
+                                                        llvm_metadata_ty ]>;
+  def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ],
+                                                      [ LLVMMatchType<0>,
+                                                        LLVMMatchType<0>,
+                                                        llvm_metadata_ty,
+                                                        llvm_metadata_ty ]>;
+  def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ],
+                                                    [ LLVMMatchType<0>,
+                                                      llvm_metadata_ty,
+                                                      llvm_metadata_ty ]>;
+  def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ],
+                                                     [ LLVMMatchType<0>,
+                                                       llvm_metadata_ty,
+                                                       llvm_metadata_ty ]>;
+  def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ],
+                                                     [ LLVMMatchType<0>,
+                                                      llvm_metadata_ty,
+                                                      llvm_metadata_ty ]>;
+  def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ],
+                                                     [ LLVMMatchType<0>,
+                                                       llvm_metadata_ty,
+                                                       llvm_metadata_ty ]>;
 }
 // FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
-// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round?
+// FIXME: Add intrinsics for fabs and copysign? 
 
 
 //===------------------------- Expect Intrinsics --------------------------===//
@@ -700,6 +818,27 @@ def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
 
+//===------------------------- Saturation Arithmetic Intrinsics ---------------------===//
+//
+def int_sadd_sat : Intrinsic<[llvm_anyint_ty],
+                             [LLVMMatchType<0>, LLVMMatchType<0>],
+                             [IntrNoMem, IntrSpeculatable, Commutative]>;
+def int_uadd_sat : Intrinsic<[llvm_anyint_ty],
+                             [LLVMMatchType<0>, LLVMMatchType<0>],
+                             [IntrNoMem, IntrSpeculatable, Commutative]>;
+def int_ssub_sat : Intrinsic<[llvm_anyint_ty],
+                             [LLVMMatchType<0>, LLVMMatchType<0>],
+                             [IntrNoMem, IntrSpeculatable]>;
+def int_usub_sat : Intrinsic<[llvm_anyint_ty],
+                             [LLVMMatchType<0>, LLVMMatchType<0>],
+                             [IntrNoMem, IntrSpeculatable]>;
+
+//===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===//
+//
+def int_smul_fix : Intrinsic<[llvm_anyint_ty],
+                             [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+                             [IntrNoMem, IntrSpeculatable, Commutative]>;
+
 //===------------------------- Memory Use Markers -------------------------===//
 //
 def int_lifetime_start  : Intrinsic<[],
@@ -817,7 +956,7 @@ def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty],
 //
 def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
                      GCCBuiltin<"__builtin_flt_rounds">;
-def int_trap : Intrinsic<[], [], [IntrNoReturn]>,
+def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>,
                GCCBuiltin<"__builtin_trap">;
 def int_debugtrap : Intrinsic<[]>,
                     GCCBuiltin<"__builtin_debugtrap">;
@@ -830,6 +969,10 @@ def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [llvm_vararg_ty],
 def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty],
                                        [Throws]>;
 
+// Supports widenable conditions for guards represented as explicit branches.
+def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [],
+                                           [IntrInaccessibleMemOnly]>;
+
 // NOP: calls/invokes to this intrinsic are removed by codegen
 def int_donothing : Intrinsic<[], [], [IntrNoMem]>;
 
@@ -850,6 +993,10 @@ def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
 def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
                                 [], "llvm.clear_cache">;
 
+// Intrinsic to detect whether its argument is a constant.
+def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">;
+
+
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
@@ -1008,3 +1155,4 @@ include "llvm/IR/IntrinsicsAMDGPU.td"
 include "llvm/IR/IntrinsicsBPF.td"
 include "llvm/IR/IntrinsicsSystemZ.td"
 include "llvm/IR/IntrinsicsWebAssembly.td"
+include "llvm/IR/IntrinsicsRISCV.td"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td b/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 688e863c1afe..ff25750fe399 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -44,6 +44,12 @@ def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intri
 def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>;
 def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>;
 
+// A space-consuming intrinsic primarily for testing block and jump table
+// placements. The first argument is the number of bytes this "instruction"
+// takes up, the second and return value are essentially chains, used to force
+// ordering during ISel.
+def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -154,6 +160,11 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
     : Intrinsic<[llvm_anyvector_ty],
                 [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
                 [IntrNoMem]>;
+
+  class AdvSIMD_FP16FML_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
+                [IntrNoMem]>;
 }
 
 // Arithmetic ops
@@ -424,6 +435,12 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
   // v8.2-A Dot Product
   def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic;
   def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic;
+
+  // v8.2-A FP16 Fused Multiply-Add Long
+  def int_aarch64_neon_fmlal : AdvSIMD_FP16FML_Intrinsic;
+  def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic;
+  def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic;
+  def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic;
 }
 
 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9f361410b9b8..7913ce828fbc 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -590,7 +590,7 @@ class AMDGPUDimSampleProfile<string opmod,
                              AMDGPUDimProps dim,
                              AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
   let IsSample = 1;
-  let RetTypes = [llvm_anyfloat_ty];
+  let RetTypes = [llvm_any_ty];
   let ExtraAddrArgs = sample.ExtraAddrArgs;
   let Gradients = sample.Gradients;
   let LodClampMip = sample.LodOrClamp;
@@ -683,11 +683,11 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
   }
 
   defm int_amdgcn_image_load
-    : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_anyfloat_ty], [], [IntrReadMem],
+    : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_any_ty], [], [IntrReadMem],
                                   [SDNPMemOperand]>,
       AMDGPUImageDMaskIntrinsic;
   defm int_amdgcn_image_load_mip
-    : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_anyfloat_ty], [],
+    : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_any_ty], [],
                                      [IntrReadMem], [SDNPMemOperand], 1>,
       AMDGPUImageDMaskIntrinsic;
 
@@ -802,6 +802,14 @@ class AMDGPUBufferLoad : Intrinsic <
 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
 
+def int_amdgcn_s_buffer_load : Intrinsic <
+  [llvm_any_ty],
+  [llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // byte offset(SGPR/VGPR/imm)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc)
+  [IntrNoMem]>,
+  AMDGPURsrcIntrinsic<0>;
+
 class AMDGPUBufferStore : Intrinsic <
   [],
   [llvm_anyfloat_ty,  // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
@@ -815,6 +823,124 @@ class AMDGPUBufferStore : Intrinsic <
 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
 def int_amdgcn_buffer_store : AMDGPUBufferStore;
 
+// New buffer intrinsics with separate raw and struct variants.  The raw
+// variant never has an index. The struct variant always has an index, even if
+// it is const 0. A struct intrinsic with constant 0 index is different to the
+// corresponding raw intrinsic on gfx9+ because the behavior of bound checking
+// and swizzling changes depending on whether idxen is set in the instruction.
+// These new instrinsics also keep the offset and soffset arguments separate as
+// they behave differently in bounds checking and swizzling.
+class AMDGPURawBufferLoad : Intrinsic <
+  [llvm_any_ty],
+  [llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+  [IntrReadMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
+def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
+
+class AMDGPUStructBufferLoad : Intrinsic <
+  [llvm_any_ty],
+  [llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // vindex(VGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+  [IntrReadMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
+def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
+
+class AMDGPURawBufferStore : Intrinsic <
+  [],
+  [llvm_any_ty,       // vdata(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+  [IntrWriteMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
+def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
+
+class AMDGPUStructBufferStore : Intrinsic <
+  [],
+  [llvm_any_ty,       // vdata(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // vindex(VGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+  [IntrWriteMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
+def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
+
+class AMDGPURawBufferAtomic : Intrinsic <
+  [llvm_anyint_ty],
+  [LLVMMatchType<0>,  // vdata(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1, 0>;
+def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
+  [llvm_anyint_ty],
+  [LLVMMatchType<0>,  // src(VGPR)
+   LLVMMatchType<0>,  // cmp(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<2, 0>;
+
+class AMDGPUStructBufferAtomic : Intrinsic <
+  [llvm_anyint_ty],
+  [LLVMMatchType<0>,  // vdata(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // vindex(VGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1, 0>;
+def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_sub : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_smin : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_umin : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_smax : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic;
+def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
+  [llvm_anyint_ty],
+  [LLVMMatchType<0>,  // src(VGPR)
+   LLVMMatchType<0>,  // cmp(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // vindex(VGPR)
+   llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
+   llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+   llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<2, 0>;
+
+// Obsolescent tbuffer intrinsics.
 def int_amdgcn_tbuffer_load : Intrinsic <
     [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
     [llvm_v4i32_ty,   // rsrc(SGPR)
@@ -844,6 +970,54 @@ def int_amdgcn_tbuffer_store : Intrinsic <
     [IntrWriteMem], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 
+// New tbuffer intrinsics, with:
+// - raw and struct variants
+// - joint format field
+// - joint cachepolicy field
+def int_amdgcn_raw_tbuffer_load : Intrinsic <
+    [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+    [llvm_v4i32_ty,   // rsrc(SGPR)
+     llvm_i32_ty,     // offset(VGPR/imm, included in bounds checking and swizzling)
+     llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+     llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+     llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+    [IntrReadMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<0>;
+
+def int_amdgcn_raw_tbuffer_store : Intrinsic <
+    [],
+    [llvm_any_ty,    // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+     llvm_v4i32_ty,  // rsrc(SGPR)
+     llvm_i32_ty,    // offset(VGPR/imm, included in bounds checking and swizzling)
+     llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+     llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+     llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+    [IntrWriteMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1>;
+
+def int_amdgcn_struct_tbuffer_load : Intrinsic <
+    [llvm_any_ty],    // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+    [llvm_v4i32_ty,   // rsrc(SGPR)
+     llvm_i32_ty,     // vindex(VGPR)
+     llvm_i32_ty,     // offset(VGPR/imm, included in bounds checking and swizzling)
+     llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+     llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+     llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+    [IntrReadMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<0>;
+
+def int_amdgcn_struct_tbuffer_store : Intrinsic <
+    [],
+    [llvm_any_ty,    // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+     llvm_v4i32_ty,  // rsrc(SGPR)
+     llvm_i32_ty,    // vindex(VGPR)
+     llvm_i32_ty,    // offset(VGPR/imm, included in bounds checking and swizzling)
+     llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+     llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+     llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+    [IntrWriteMem], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1>;
+
 class AMDGPUBufferAtomic : Intrinsic <
   [llvm_i32_ty],
   [llvm_i32_ty,       // vdata(VGPR)
@@ -1310,18 +1484,10 @@ def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
   [llvm_i64_ty], [IntrConvergent]
 >;
 
-def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
-  [llvm_i64_ty], [IntrNoMem, IntrConvergent]
->;
-
 def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
   [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
 >;
 
-def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
-  [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
->;
-
 def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
   [llvm_i64_ty], [IntrConvergent]
 >;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 25f4215d68a8..ecc69a679553 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -15,7 +15,7 @@
 //
 // All Hexagon intrinsics start with "llvm.hexagon.".
 let TargetPrefix = "hexagon" in {
-  /// Hexagon_Intrinsic - Base class for all Hexagon intrinsics.
+  /// Hexagon_Intrinsic - Base class for the majority of Hexagon intrinsics.
   class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
                               list<LLVMType> param_types,
                               list<IntrinsicProperty> properties>
@@ -30,397 +30,6 @@ let TargetPrefix = "hexagon" in {
     : Intrinsic<ret_types, param_types, properties>;
 }
 
-//===----------------------------------------------------------------------===//
-//
-// DEF_FUNCTION_TYPE_1(QI_ftype_MEM,BT_BOOL,BT_PTR) ->
-// Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_ptr_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
-// Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i16_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(SI_ftype_SI,BT_INT,BT_INT) ->
-// Hexagon_si_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(DI_ftype_SI,BT_LONGLONG,BT_INT) ->
-// Hexagon_di_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(SI_ftype_DI,BT_INT,BT_LONGLONG) ->
-// Hexagon_si_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_di_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(DI_ftype_DI,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_di_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_di_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(QI_ftype_QI,BT_BOOL,BT_BOOL) ->
-// Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(QI_ftype_SI,BT_BOOL,BT_INT) ->
-// Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(DI_ftype_QI,BT_LONGLONG,BT_BOOL) ->
-// Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_1(SI_ftype_QI,BT_INT,BT_BOOL) ->
-// Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_SISI,BT_BOOL,BT_INT,BT_INT) ->
-// Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(void_ftype_SISI,BT_VOID,BT_INT,BT_INT) ->
-// Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_SISI,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(USI_ftype_SISI,BT_UINT,BT_INT,BT_INT) ->
-// Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_SISI,BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(UDI_ftype_SISI,BT_ULONGLONG,BT_INT,BT_INT) ->
-// Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_SIDI,BT_LONGLONG,BT_INT,BT_LONGLONG) ->
-// Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_DISI,BT_LONGLONG,BT_LONGLONG,BT_INT) ->
-// Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_SIDI,BT_INT,BT_INT,BT_LONGLONG) ->
-// Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_DIDI,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(DI_ftype_DIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(UDI_ftype_DIDI,BT_ULONGLONG,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_DISI,BT_INT,BT_LONGLONG,BT_INT) ->
-// Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_DIDI,BT_BOOL,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i64_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_SIDI,BT_BOOL,BT_INT,BT_LONGLONG) ->
-// Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_sidi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_DISI,BT_BOOL,BT_LONGLONG,BT_INT) ->
-// Hexagon_qi_disi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_disi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i64_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_QIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
-// Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(QI_ftype_QIQIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
-// Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_i1_ty, llvm_i1_ty, llvm_i1_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_QIQI,BT_INT,BT_BOOL,BT_BOOL) ->
-// Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_2(SI_ftype_QISI,BT_INT,BT_BOOL,BT_INT) ->
-// Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i1_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(void_ftype_SISISI,BT_VOID,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SISISI,BT_INT,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_SISISI,BT_LONGLONG,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_DISISI,BT_INT,BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DISISI,BT_LONGLONG,BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SIDISI,BT_INT,BT_INT,BT_LONGLONG,BT_INT) ->
-// Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DIDISI,BT_LONGLONG,BT_LONGLONG,
-//                     BT_LONGLONG,BT_INT) ->
-// Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SIDIDI,BT_INT,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
-// Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
-                           llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
-//                     BT_LONGLONG) ->
-// Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
-                           llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_SISIDI,BT_INT,BT_INT,BT_INT,BT_LONGLONG) ->
-// Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
-                           llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(SI_ftype_QISISI,BT_INT,BT_BOOL,BT_INT,BT_INT) ->
-// Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_QISISI,BT_LONGLONG,BT_BOOL,BT_INT,BT_INT) ->
-// Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i1_ty, llvm_i32_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_QIDIDI,BT_LONGLONG,BT_BOOL,BT_LONGLONG,
-//                     BT_LONGLONG) ->
-// Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty,
-                           llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIQI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
-//                     BT_BOOL) ->
-// Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_4(SI_ftype_SISISISI,BT_INT,BT_INT,BT_INT,BT_INT,BT_INT) ->
-// Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
-                           llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// DEF_FUNCTION_TYPE_4(DI_ftype_DIDISISI,BT_LONGLONG,BT_LONGLONG,
-//                     BT_LONGLONG,BT_INT,BT_INT) ->
-// Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
-                           llvm_i32_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-
 class Hexagon_mem_memmemsi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
@@ -457,191 +66,6 @@ class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
                            llvm_i32_ty, llvm_i32_ty],
                           [IntrWriteMem]>;
 
-class Hexagon_v256_v256v256_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
-                          [IntrArgMemOnly]>;
-
-//
-// Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_i32_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_double_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_sf_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_di_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_df_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_sf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_float_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_di_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_sf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_float_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_sf_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_float_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_si_sf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_float_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_si_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_df_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_double_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_sf_sfsf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sfsf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_si_sfsf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sfsf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_float_ty, llvm_float_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_si_sfsi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_sfsi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_float_ty, llvm_i32_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_qi_sfqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_qi_sfqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i1_ty], [llvm_float_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_sf_sfsfsf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sfsfsf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_float_ty, llvm_float_ty,
-                                            llvm_float_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_sf_sfsfsfqi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_sf_sfsfsfqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_float_ty], [llvm_float_ty, llvm_float_ty,
-                                            llvm_float_ty,
-                           llvm_i32_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_dididisi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
-                           llvm_i64_ty, llvm_i32_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_df_si_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_si_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_i32_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_df_di_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_di_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_i64_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_di_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_di_df_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_double_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_df_df_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_df_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_double_ty],
-                          [IntrNoMem]>;
-//
-// Hexagon_df_dfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_dfdf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_si_dfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_dfdf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_double_ty, llvm_double_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_si_dfsi_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_si_dfsi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_double_ty, llvm_i32_ty],
-                          [IntrNoMem, Throws]>;
-//
-//
-// Hexagon_df_dfdfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_dfdfdf_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_double_ty, llvm_double_ty,
-                                             llvm_double_ty],
-                          [IntrNoMem, Throws]>;
-//
-// Hexagon_df_dfdfdf_Intrinsic<string GCCIntSuffix>
-//
-class Hexagon_df_dfdfdfqi_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_double_ty], [llvm_double_ty, llvm_double_ty,
-                                             llvm_double_ty,
-                          llvm_i32_ty],
-                          [IntrNoMem, Throws]>;
-
-
-// This one below will not be auto-generated,
-// so make sure, you don't overwrite this one.
 //
 // BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4)
 //
@@ -699,4204 +123,6 @@ Hexagon_mem_memsisisi_Intrinsic<"circ_sthhi">;
 def int_hexagon_circ_stb :
 Hexagon_mem_memsisisi_Intrinsic<"circ_stb">;
 
-
-def int_hexagon_mm256i_vaddw :
-Hexagon_v256_v256v256_Intrinsic<"_mm256i_vaddw">;
-
-
-// This one above will not be auto-generated,
-// so make sure, you don't overwrite this one.
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpeq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgt,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtu,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgtu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgtu">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpeqp,QI_ftype_DIDI,2)
-//
-def int_hexagon_C2_cmpeqp :
-Hexagon_si_didi_Intrinsic<"HEXAGON_C2_cmpeqp">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtp,QI_ftype_DIDI,2)
-//
-def int_hexagon_C2_cmpgtp :
-Hexagon_si_didi_Intrinsic<"HEXAGON_C2_cmpgtp">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtup,QI_ftype_DIDI,2)
-//
-def int_hexagon_C2_cmpgtup :
-Hexagon_si_didi_Intrinsic<"HEXAGON_C2_cmpgtup">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpeqi,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpeqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpeqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpneqi,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpneqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpneqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpeq,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpeq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_rcmpneq,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_rcmpneq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpneq">;
-//
-// BUILTIN_INFO(HEXAGON.C2_bitsset,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_bitsset :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_bitsset">;
-//
-// BUILTIN_INFO(HEXAGON.C2_bitsclr,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_bitsclr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_bitsclr">;
-//
-// BUILTIN_INFO(HEXAGON.C4_nbitsset,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_nbitsset :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_nbitsset">;
-//
-// BUILTIN_INFO(HEXAGON.C4_nbitsclr,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_nbitsclr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_nbitsclr">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpeqi,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpeqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpeqi">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgti,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgti :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgti">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgtui,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgtui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgtui">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgei,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgei :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgei">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpgeui,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpgeui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpgeui">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmplt,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmplt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmplt">;
-//
-// BUILTIN_INFO(HEXAGON.C2_cmpltu,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_cmpltu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_cmpltu">;
-//
-// BUILTIN_INFO(HEXAGON.C2_bitsclri,QI_ftype_SISI,2)
-//
-def int_hexagon_C2_bitsclri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_bitsclri">;
-//
-// BUILTIN_INFO(HEXAGON.C4_nbitsclri,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_nbitsclri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_nbitsclri">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmpneqi,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmpneqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmpneqi">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmpltei,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmpltei :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmpltei">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmplteui,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmplteui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmplteui">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmpneq,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmpneq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmpneq">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmplte,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmplte :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmplte">;
-//
-// BUILTIN_INFO(HEXAGON.C4_cmplteu,QI_ftype_SISI,2)
-//
-def int_hexagon_C4_cmplteu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C4_cmplteu">;
-//
-// BUILTIN_INFO(HEXAGON.C2_and,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_and :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_and">;
-//
-// BUILTIN_INFO(HEXAGON.C2_or,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_or :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_or">;
-//
-// BUILTIN_INFO(HEXAGON.C2_xor,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_xor :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_xor">;
-//
-// BUILTIN_INFO(HEXAGON.C2_andn,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_andn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_andn">;
-//
-// BUILTIN_INFO(HEXAGON.C2_not,QI_ftype_QI,1)
-//
-def int_hexagon_C2_not :
-Hexagon_si_si_Intrinsic<"HEXAGON_C2_not">;
-//
-// BUILTIN_INFO(HEXAGON.C2_orn,QI_ftype_QIQI,2)
-//
-def int_hexagon_C2_orn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_C2_orn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_and,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_and">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_or,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_or">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_and,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_and">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_or,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_or">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_andn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_andn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_and_orn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_and_orn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_and_orn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_andn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_andn">;
-//
-// BUILTIN_INFO(HEXAGON.C4_or_orn,QI_ftype_QIQIQI,3)
-//
-def int_hexagon_C4_or_orn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_C4_or_orn">;
-//
-// BUILTIN_INFO(HEXAGON.C2_pxfer_map,QI_ftype_QI,1)
-//
-def int_hexagon_C2_pxfer_map :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_pxfer_map">;
-//
-// BUILTIN_INFO(HEXAGON.C2_any8,QI_ftype_QI,1)
-//
-def int_hexagon_C2_any8 :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_any8">;
-//
-// BUILTIN_INFO(HEXAGON.C2_all8,QI_ftype_QI,1)
-//
-def int_hexagon_C2_all8 :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_all8">;
-//
-// BUILTIN_INFO(HEXAGON.C2_vitpack,SI_ftype_QIQI,2)
-//
-def int_hexagon_C2_vitpack :
-Hexagon_si_qiqi_Intrinsic<"HEXAGON_C2_vitpack">;
-//
-// BUILTIN_INFO(HEXAGON.C2_mux,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_mux :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_mux">;
-//
-// BUILTIN_INFO(HEXAGON.C2_muxii,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_muxii :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxii">;
-//
-// BUILTIN_INFO(HEXAGON.C2_muxir,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_muxir :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxir">;
-//
-// BUILTIN_INFO(HEXAGON.C2_muxri,SI_ftype_QISISI,3)
-//
-def int_hexagon_C2_muxri :
-Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxri">;
-//
-// BUILTIN_INFO(HEXAGON.C2_vmux,DI_ftype_QIDIDI,3)
-//
-def int_hexagon_C2_vmux :
-Hexagon_di_qididi_Intrinsic<"HEXAGON_C2_vmux">;
-//
-// BUILTIN_INFO(HEXAGON.C2_mask,DI_ftype_QI,1)
-//
-def int_hexagon_C2_mask :
-Hexagon_di_qi_Intrinsic<"HEXAGON_C2_mask">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpbeq,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpbeq :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpbeq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbeqi,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpbeqi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpbeqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbeq_any,QI_ftype_DIDI,2)
-//
-def int_hexagon_A4_vcmpbeq_any :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A4_vcmpbeq_any">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpbgtu,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpbgtu :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpbgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbgtui,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpbgtui :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpbgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbgt,QI_ftype_DIDI,2)
-//
-def int_hexagon_A4_vcmpbgt :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A4_vcmpbgt">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpbgti,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpbgti :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpbgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbeq,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbeq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbeq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbeqi,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbeqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbeqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgtu,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgtu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgtui,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgtui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgt,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgt">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpbgti,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpbgti :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpbgti">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpheq,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpheq :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpheq">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmphgt,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmphgt :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmphgt">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmphgtu,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmphgtu :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmphgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpheqi,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpheqi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpheqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmphgti,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmphgti :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmphgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmphgtui,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmphgtui :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmphgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpheq,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpheq :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpheq">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgt,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgt :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgt">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgtu,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgtu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmpheqi,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmpheqi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmpheqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgti,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgti :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cmphgtui,QI_ftype_SISI,2)
-//
-def int_hexagon_A4_cmphgtui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cmphgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpweq,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpweq :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpweq">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpwgt,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpwgt :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpwgt">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vcmpwgtu,QI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vcmpwgtu :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A2_vcmpwgtu">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpweqi,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpweqi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpweqi">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpwgti,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpwgti :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpwgti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vcmpwgtui,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_vcmpwgtui :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_vcmpwgtui">;
-//
-// BUILTIN_INFO(HEXAGON.A4_boundscheck,QI_ftype_SIDI,2)
-//
-def int_hexagon_A4_boundscheck :
-Hexagon_si_sidi_Intrinsic<"HEXAGON_A4_boundscheck">;
-//
-// BUILTIN_INFO(HEXAGON.A4_tlbmatch,QI_ftype_DISI,2)
-//
-def int_hexagon_A4_tlbmatch :
-Hexagon_si_disi_Intrinsic<"HEXAGON_A4_tlbmatch">;
-//
-// BUILTIN_INFO(HEXAGON.C2_tfrpr,SI_ftype_QI,1)
-//
-def int_hexagon_C2_tfrpr :
-Hexagon_si_qi_Intrinsic<"HEXAGON_C2_tfrpr">;
-//
-// BUILTIN_INFO(HEXAGON.C2_tfrrp,QI_ftype_SI,1)
-//
-def int_hexagon_C2_tfrrp :
-Hexagon_si_si_Intrinsic<"HEXAGON_C2_tfrrp">;
-//
-// BUILTIN_INFO(HEXAGON.C4_fastcorner9,QI_ftype_QIQI,2)
-//
-def int_hexagon_C4_fastcorner9 :
-Hexagon_si_qiqi_Intrinsic<"HEXAGON_C4_fastcorner9">;
-//
-// BUILTIN_INFO(HEXAGON.C4_fastcorner9_not,QI_ftype_QIQI,2)
-//
-def int_hexagon_C4_fastcorner9_not :
-Hexagon_si_qiqi_Intrinsic<"HEXAGON_C4_fastcorner9_not">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_acc_sat_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpy_nac_sat_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_rnd_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_acc_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyd_nac_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hl_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_hl_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_lh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_lh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_ll_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_ll_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hl_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_hl_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_lh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_lh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_ll_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyd_rnd_ll_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_acc_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hl_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_hl_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_lh_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_lh_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s0,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_ll_s0 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s1,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_mpyu_nac_ll_s1 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hl_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_hl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_lh_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_lh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s0,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_ll_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s1,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_ll_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_acc_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hl_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_hl_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_lh_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_lh_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_ll_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_mpyud_nac_ll_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hl_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_hl_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_lh_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_lh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_lh_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_lh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_ll_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_ll_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s1,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyud_ll_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_ll_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpysmi,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpysmi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpysmi">;
-//
-// BUILTIN_INFO(HEXAGON.M2_macsip,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_macsip :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_macsip">;
-//
-// BUILTIN_INFO(HEXAGON.M2_macsin,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_macsin :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_macsin">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_dpmpyss_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_dpmpyss_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_acc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyss_acc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_nac_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyss_nac_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyss_nac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_s0,UDI_ftype_SISI,2)
-//
-def int_hexagon_M2_dpmpyuu_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_acc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyuu_acc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_nac_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_dpmpyuu_nac_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_up,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_up :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_up_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_up_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpy_up_s1_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpy_up_s1_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up_s1_sat">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyu_up,USI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyu_up :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_up">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpysu_up,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpysu_up :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpysu_up">;
-//
-// BUILTIN_INFO(HEXAGON.M2_dpmpyss_rnd_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_dpmpyss_rnd_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mac_up_s1_sat,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mac_up_s1_sat :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">;
-//
-// BUILTIN_INFO(HEXAGON.M4_nac_up_s1_sat,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_nac_up_s1_sat :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_nac_up_s1_sat">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyi,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyi">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mpyui,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_mpyui :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyui">;
-//
-// BUILTIN_INFO(HEXAGON.M2_maci,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_maci :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_maci">;
-//
-// BUILTIN_INFO(HEXAGON.M2_acci,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_acci :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_acci">;
-//
-// BUILTIN_INFO(HEXAGON.M2_accii,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_accii :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_accii">;
-//
-// BUILTIN_INFO(HEXAGON.M2_nacci,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_nacci :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_nacci">;
-//
-// BUILTIN_INFO(HEXAGON.M2_naccii,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_naccii :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_naccii">;
-//
-// BUILTIN_INFO(HEXAGON.M2_subacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_subacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_subacc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyrr_addr,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyrr_addr :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyrr_addr">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyri_addr_u2,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyri_addr_u2 :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyri_addr,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyri_addr :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addr">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyri_addi,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyri_addi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addi">;
-//
-// BUILTIN_INFO(HEXAGON.M4_mpyrr_addi,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_mpyrr_addi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyrr_addi">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2s_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2s_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2s_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2s_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2s_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2s_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2su_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2su_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2su_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2su_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2su_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2su_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2su_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2su_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2su_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2su_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2su_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2su_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0pack,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s0pack :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1pack,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_vmpy2s_s1pack :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s1pack">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_vmac2 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vmpy2es_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vmpy2es_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vmpy2es_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vmpy2es_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2es_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vmac2es_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2es_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vmac2es_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vmac2es,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vmac2es :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrmac_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrmac_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrmac_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrmpy_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrmpy_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrmpy_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s0,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpyrs_s0 :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s1,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpyrs_s1 :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vdmpyrs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmpybuu,DI_ftype_DIDI,2)
-//
-def int_hexagon_M5_vrmpybuu :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vrmpybuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmacbuu,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M5_vrmacbuu :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vrmacbuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmpybsu,DI_ftype_DIDI,2)
-//
-def int_hexagon_M5_vrmpybsu :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vrmpybsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vrmacbsu,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M5_vrmacbsu :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vrmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmpybuu,DI_ftype_SISI,2)
-//
-def int_hexagon_M5_vmpybuu :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M5_vmpybuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmpybsu,DI_ftype_SISI,2)
-//
-def int_hexagon_M5_vmpybsu :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M5_vmpybsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmacbuu,DI_ftype_DISISI,3)
-//
-def int_hexagon_M5_vmacbuu :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M5_vmacbuu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vmacbsu,DI_ftype_DISISI,3)
-//
-def int_hexagon_M5_vmacbsu :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M5_vmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vdmpybsu,DI_ftype_DIDI,2)
-//
-def int_hexagon_M5_vdmpybsu :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vdmpybsu">;
-//
-// BUILTIN_INFO(HEXAGON.M5_vdmacbsu,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M5_vdmacbsu :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vdmacbsu">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmacs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vdmacs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vdmacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmacs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vdmacs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vdmacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpys_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpys_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vdmpys_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vdmpys_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vdmpys_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vdmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrs_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrs_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s0,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrsc_s0 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyrsc_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacs_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacs_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacs_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacs_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacsc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacsc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacsc_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacsc_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpys_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpys_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpys_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpys_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpys_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpysc_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpysc_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpysc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpysc_s1,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpysc_s1 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpysc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacs_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacs_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacs_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacs_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacsc_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacsc_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacsc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cnacsc_s1,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cnacsc_s1 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacsc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1,DI_ftype_DISI,2)
-//
-def int_hexagon_M2_vrcmpys_s1 :
-Hexagon_di_disi_Intrinsic<"HEXAGON_M2_vrcmpys_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpys_acc_s1,DI_ftype_DIDISI,3)
-//
-def int_hexagon_M2_vrcmpys_acc_s1 :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_M2_vrcmpys_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1rp,SI_ftype_DISI,2)
-//
-def int_hexagon_M2_vrcmpys_s1rp :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M2_vrcmpys_s1rp">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacls_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacls_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmachs_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmachs_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyl_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyh_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyeh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyeh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyeh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyeh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_acc_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyeh_acc_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyeh_acc_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyeh_acc_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyoh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyoh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M4_vrmpyoh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyoh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_acc_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyoh_acc_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vrmpyoh_acc_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_vrmpyoh_acc_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyl_rs1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyl_rs1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyl_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyh_rs1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyh_rs1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyl_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyl_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyl_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_hmmpyh_s1,SI_ftype_SISI,2)
-//
-def int_hexagon_M2_hmmpyh_s1 :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_s1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_s1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_s1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmaculs_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_rs0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs1,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_mmacuhs_rs1 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyul_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_rs0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs1,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_mmpyuh_rs1 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmaci_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmaci_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmacr_s0 :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmacr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0c,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmaci_s0c :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmaci_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0c,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vrcmacr_s0c :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmacr_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmaci_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmaci_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmaci_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmacr_s0,DI_ftype_DISISI,3)
-//
-def int_hexagon_M2_cmacr_s0 :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyi_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyi_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyr_s0 :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0c,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyi_s0c :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0c,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vrcmpyr_s0c :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyr_s0c">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyi_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyi_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpyi_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M2_cmpyr_s0,DI_ftype_SISI,2)
-//
-def int_hexagon_M2_cmpyr_s0 :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpyr_s0">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyi_wh,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyi_wh :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyi_wh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyr_wh,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyr_wh :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyr_wh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyi_whc,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyi_whc :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyi_whc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_cmpyr_whc,SI_ftype_DISI,2)
-//
-def int_hexagon_M4_cmpyr_whc :
-Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyr_whc">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_i,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s0_sat_i :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_r,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s0_sat_r :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_i,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s1_sat_i :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_r,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vcmpy_s1_sat_r :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_i,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vcmac_s0_sat_i :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_i">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_r,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M2_vcmac_s0_sat_r :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vcrotate,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_vcrotate :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_vcrotate">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vrcrotate_acc,DI_ftype_DIDISISI,4)
-//
-def int_hexagon_S4_vrcrotate_acc :
-Hexagon_di_didisisi_Intrinsic<"HEXAGON_S4_vrcrotate_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vrcrotate,DI_ftype_DISISI,3)
-//
-def int_hexagon_S4_vrcrotate :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_S4_vrcrotate">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vcnegh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_vcnegh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_vcnegh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vrcnegh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_vrcnegh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_vrcnegh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_pmpyw,DI_ftype_SISI,2)
-//
-def int_hexagon_M4_pmpyw :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M4_pmpyw">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vpmpyh,DI_ftype_SISI,2)
-//
-def int_hexagon_M4_vpmpyh :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_M4_vpmpyh">;
-//
-// BUILTIN_INFO(HEXAGON.M4_pmpyw_acc,DI_ftype_DISISI,3)
-//
-def int_hexagon_M4_pmpyw_acc :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M4_pmpyw_acc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_vpmpyh_acc,DI_ftype_DISISI,3)
-//
-def int_hexagon_M4_vpmpyh_acc :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_M4_vpmpyh_acc">;
-//
-// BUILTIN_INFO(HEXAGON.A2_add,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_add :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_add">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sub,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_sub :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_sub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addsat,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addsat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subsat,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subsat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addi,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addi">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_l16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_l16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_addh_h16_sat_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subh_h16_sat_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_aslh,SI_ftype_SI,1)
-//
-def int_hexagon_A2_aslh :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_aslh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_asrh,SI_ftype_SI,1)
-//
-def int_hexagon_A2_asrh :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_asrh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_addp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_addp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addpsat,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_addpsat :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_addpsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_addsp,DI_ftype_SIDI,2)
-//
-def int_hexagon_A2_addsp :
-Hexagon_di_sidi_Intrinsic<"HEXAGON_A2_addsp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_subp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_subp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_neg,SI_ftype_SI,1)
-//
-def int_hexagon_A2_neg :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_neg">;
-//
-// BUILTIN_INFO(HEXAGON.A2_negsat,SI_ftype_SI,1)
-//
-def int_hexagon_A2_negsat :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_negsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_abs,SI_ftype_SI,1)
-//
-def int_hexagon_A2_abs :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_abs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_abssat,SI_ftype_SI,1)
-//
-def int_hexagon_A2_abssat :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_abssat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vconj,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vconj :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vconj">;
-//
-// BUILTIN_INFO(HEXAGON.A2_negp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_negp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_negp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_absp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_absp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_absp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_max,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_max :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_max">;
-//
-// BUILTIN_INFO(HEXAGON.A2_maxu,USI_ftype_SISI,2)
-//
-def int_hexagon_A2_maxu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_maxu">;
-//
-// BUILTIN_INFO(HEXAGON.A2_min,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_min :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_min">;
-//
-// BUILTIN_INFO(HEXAGON.A2_minu,USI_ftype_SISI,2)
-//
-def int_hexagon_A2_minu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_minu">;
-//
-// BUILTIN_INFO(HEXAGON.A2_maxp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_maxp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_maxp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_maxup,UDI_ftype_DIDI,2)
-//
-def int_hexagon_A2_maxup :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_maxup">;
-//
-// BUILTIN_INFO(HEXAGON.A2_minp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_minp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_minp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_minup,UDI_ftype_DIDI,2)
-//
-def int_hexagon_A2_minup :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_minup">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfr,SI_ftype_SI,1)
-//
-def int_hexagon_A2_tfr :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_tfr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrsi,SI_ftype_SI,1)
-//
-def int_hexagon_A2_tfrsi :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_tfrsi">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_tfrp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_tfrp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrpi,DI_ftype_SI,1)
-//
-def int_hexagon_A2_tfrpi :
-Hexagon_di_si_Intrinsic<"HEXAGON_A2_tfrpi">;
-//
-// BUILTIN_INFO(HEXAGON.A2_zxtb,SI_ftype_SI,1)
-//
-def int_hexagon_A2_zxtb :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_zxtb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sxtb,SI_ftype_SI,1)
-//
-def int_hexagon_A2_sxtb :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_sxtb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_zxth,SI_ftype_SI,1)
-//
-def int_hexagon_A2_zxth :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_zxth">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sxth,SI_ftype_SI,1)
-//
-def int_hexagon_A2_sxth :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_sxth">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combinew,DI_ftype_SISI,2)
-//
-def int_hexagon_A2_combinew :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A2_combinew">;
-//
-// BUILTIN_INFO(HEXAGON.A4_combineri,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_combineri :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_combineri">;
-//
-// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_combineir :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_combineir">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combineii,DI_ftype_SISI,2)
-//
-def int_hexagon_A2_combineii :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A2_combineii">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_hh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_hh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_hh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_hl,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_hl :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_hl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_lh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_lh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_lh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_combine_ll,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_combine_ll :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_ll">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfril,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_tfril :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_tfril">;
-//
-// BUILTIN_INFO(HEXAGON.A2_tfrih,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_tfrih :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_tfrih">;
-//
-// BUILTIN_INFO(HEXAGON.A2_and,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_and :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_and">;
-//
-// BUILTIN_INFO(HEXAGON.A2_or,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_or :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_or">;
-//
-// BUILTIN_INFO(HEXAGON.A2_xor,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_xor :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_xor">;
-//
-// BUILTIN_INFO(HEXAGON.A2_not,SI_ftype_SI,1)
-//
-def int_hexagon_A2_not :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_not">;
-//
-// BUILTIN_INFO(HEXAGON.M2_xor_xacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_M2_xor_xacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_xor_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_xacc,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_M4_xor_xacc :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_xor_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.A4_andn,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_andn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_andn">;
-//
-// BUILTIN_INFO(HEXAGON.A4_orn,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_orn :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_orn">;
-//
-// BUILTIN_INFO(HEXAGON.A4_andnp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A4_andnp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A4_andnp">;
-//
-// BUILTIN_INFO(HEXAGON.A4_ornp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A4_ornp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A4_ornp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_addaddi,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_addaddi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addaddi">;
-//
-// BUILTIN_INFO(HEXAGON.S4_subaddi,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_subaddi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subaddi">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_and">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_andn,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_andn">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_or">;
-//
-// BUILTIN_INFO(HEXAGON.M4_and_xor,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_and_xor :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_xor">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_and">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_andn,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_andn">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_or">;
-//
-// BUILTIN_INFO(HEXAGON.M4_or_xor,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_or_xor :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S4_or_andix,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_or_andix :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_andix">;
-//
-// BUILTIN_INFO(HEXAGON.S4_or_andi,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_or_andi :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_andi">;
-//
-// BUILTIN_INFO(HEXAGON.S4_or_ori,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_or_ori :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_ori">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_xor_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_and">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_xor_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_or">;
-//
-// BUILTIN_INFO(HEXAGON.M4_xor_andn,SI_ftype_SISISI,3)
-//
-def int_hexagon_M4_xor_andn :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_andn">;
-//
-// BUILTIN_INFO(HEXAGON.A2_subri,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_subri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subri">;
-//
-// BUILTIN_INFO(HEXAGON.A2_andir,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_andir :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_andir">;
-//
-// BUILTIN_INFO(HEXAGON.A2_orir,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_orir :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_orir">;
-//
-// BUILTIN_INFO(HEXAGON.A2_andp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_andp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_andp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_orp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_orp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_orp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_xorp,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_xorp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_xorp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_notp,DI_ftype_DI,1)
-//
-def int_hexagon_A2_notp :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_notp">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sxtw,DI_ftype_SI,1)
-//
-def int_hexagon_A2_sxtw :
-Hexagon_di_si_Intrinsic<"HEXAGON_A2_sxtw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sat,SI_ftype_DI,1)
-//
-def int_hexagon_A2_sat :
-Hexagon_si_di_Intrinsic<"HEXAGON_A2_sat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_roundsat,SI_ftype_DI,1)
-//
-def int_hexagon_A2_roundsat :
-Hexagon_si_di_Intrinsic<"HEXAGON_A2_roundsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_sath,SI_ftype_SI,1)
-//
-def int_hexagon_A2_sath :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_sath">;
-//
-// BUILTIN_INFO(HEXAGON.A2_satuh,SI_ftype_SI,1)
-//
-def int_hexagon_A2_satuh :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_satuh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_satub,SI_ftype_SI,1)
-//
-def int_hexagon_A2_satub :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_satub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_satb,SI_ftype_SI,1)
-//
-def int_hexagon_A2_satb :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_satb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddb_map,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddb_map :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddb_map">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddubs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddubs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddubs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vadduhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vadduhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vadduhs">;
-//
-// BUILTIN_INFO(HEXAGON.A5_vaddhubs,SI_ftype_DIDI,2)
-//
-def int_hexagon_A5_vaddhubs :
-Hexagon_si_didi_Intrinsic<"HEXAGON_A5_vaddhubs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vaddws,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vaddws :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddws">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxaddsubw,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxaddsubw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubw">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxsubaddw,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxsubaddw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddw">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxaddsubh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxaddsubh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubh">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxsubaddh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxsubaddh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddh">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxaddsubhr,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxaddsubhr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubhr">;
-//
-// BUILTIN_INFO(HEXAGON.S4_vxsubaddhr,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_vxsubaddhr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddhr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svavgh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svavgh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svavghs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svavghs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svavghs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svnavgh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svnavgh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svnavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svaddh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svaddh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svaddh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svaddhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svaddhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svaddhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svadduhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svadduhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svadduhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svsubh,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svsubh :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svsubhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svsubhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_svsubuhs,SI_ftype_SISI,2)
-//
-def int_hexagon_A2_svsubuhs :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubuhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vraddub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vraddub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vraddub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vraddub_acc,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_A2_vraddub_acc :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_A2_vraddub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vraddh,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vraddh :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vraddh">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vradduh,SI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vradduh :
-Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vradduh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubb_map,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubb_map :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubb_map">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsububs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsububs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsububs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubuhs,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubuhs :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubuhs">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vsubws,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vsubws :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubws">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabsh,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabsh :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabsh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabshsat,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabshsat :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabshsat">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabsw,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabsw :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabsw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vabswsat,DI_ftype_DI,1)
-//
-def int_hexagon_A2_vabswsat :
-Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabswsat">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vabsdiffw,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vabsdiffw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vabsdiffw">;
-//
-// BUILTIN_INFO(HEXAGON.M2_vabsdiffh,DI_ftype_DIDI,2)
-//
-def int_hexagon_M2_vabsdiffh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vabsdiffh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vrsadub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vrsadub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vrsadub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vrsadub_acc,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_A2_vrsadub_acc :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_A2_vrsadub_acc">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgwr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgwr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgwr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgwr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgwr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgwr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgwcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgwcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgwcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavgwcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavgwcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgwcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavghcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavghcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavghcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavghcr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavghcr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavghcr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguwr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguwr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguwr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavgubr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavgubr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgubr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavguhr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavguhr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguhr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vavghr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vavghr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavghr">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vnavghr,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vnavghr :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavghr">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_ri,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_ri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_ri">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_rr,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_rr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_rr">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_ri_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_ri_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_ri_sat">;
-//
-// BUILTIN_INFO(HEXAGON.A4_round_rr_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_round_rr_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_rr_sat">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cround_ri,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_cround_ri :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cround_ri">;
-//
-// BUILTIN_INFO(HEXAGON.A4_cround_rr,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_cround_rr :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cround_rr">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminuh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminuh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminuh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxuh,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxuh :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxuh">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrminuw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrminuw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminuw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_vrmaxuw,DI_ftype_DIDISI,3)
-//
-def int_hexagon_A4_vrmaxuw :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxuw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminb,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminb :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxb,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxb :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxb">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxub,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxub :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxub">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminuh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminuh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminuh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxuh,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxuh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxuh">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vminuw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vminuw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminuw">;
-//
-// BUILTIN_INFO(HEXAGON.A2_vmaxuw,DI_ftype_DIDI,2)
-//
-def int_hexagon_A2_vmaxuw :
-Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxuw">;
-//
-// BUILTIN_INFO(HEXAGON.A4_modwrapu,SI_ftype_SISI,2)
-//
-def int_hexagon_A4_modwrapu :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_modwrapu">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfadd,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfadd :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfadd">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfsub,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfsub :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfsub">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfmpy,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfmpy :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmpy">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffma,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffma :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffma">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffma_sc,SF_ftype_SFSFSFQI,4)
-//
-def int_hexagon_F2_sffma_sc :
-Hexagon_sf_sfsfsfqi_Intrinsic<"HEXAGON_F2_sffma_sc">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffms,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffms :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffms">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffma_lib,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffma_lib :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffma_lib">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffms_lib,SF_ftype_SFSFSF,3)
-//
-def int_hexagon_F2_sffms_lib :
-Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffms_lib">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpeq,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpeq :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpgt,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpgt :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpge,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpge :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpge">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfcmpuo,QI_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfcmpuo :
-Hexagon_si_sfsf_Intrinsic<"HEXAGON_F2_sfcmpuo">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfmax,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfmax :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmax">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfmin,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sfmin :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmin">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfclass,QI_ftype_SFSI,2)
-//
-def int_hexagon_F2_sfclass :
-Hexagon_si_sfsi_Intrinsic<"HEXAGON_F2_sfclass">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfimm_p,SF_ftype_SI,1)
-//
-def int_hexagon_F2_sfimm_p :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_sfimm_p">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sfimm_n,SF_ftype_SI,1)
-//
-def int_hexagon_F2_sfimm_n :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_sfimm_n">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffixupn,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sffixupn :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sffixupn">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffixupd,SF_ftype_SFSF,2)
-//
-def int_hexagon_F2_sffixupd :
-Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sffixupd">;
-//
-// BUILTIN_INFO(HEXAGON.F2_sffixupr,SF_ftype_SF,1)
-//
-def int_hexagon_F2_sffixupr :
-Hexagon_sf_sf_Intrinsic<"HEXAGON_F2_sffixupr">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpeq,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpeq :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpeq">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpgt,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpgt :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpgt">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpge,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpge :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpge">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfcmpuo,QI_ftype_DFDF,2)
-//
-def int_hexagon_F2_dfcmpuo :
-Hexagon_si_dfdf_Intrinsic<"HEXAGON_F2_dfcmpuo">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfclass,QI_ftype_DFSI,2)
-//
-def int_hexagon_F2_dfclass :
-Hexagon_si_dfsi_Intrinsic<"HEXAGON_F2_dfclass">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfimm_p,DF_ftype_SI,1)
-//
-def int_hexagon_F2_dfimm_p :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_dfimm_p">;
-//
-// BUILTIN_INFO(HEXAGON.F2_dfimm_n,DF_ftype_SI,1)
-//
-def int_hexagon_F2_dfimm_n :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_dfimm_n">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2df,DF_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2df :
-Hexagon_df_sf_Intrinsic<"HEXAGON_F2_conv_sf2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2sf,SF_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2sf :
-Hexagon_sf_df_Intrinsic<"HEXAGON_F2_conv_df2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_uw2sf,SF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_uw2sf :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_conv_uw2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_uw2df,DF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_uw2df :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_conv_uw2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_w2sf,SF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_w2sf :
-Hexagon_sf_si_Intrinsic<"HEXAGON_F2_conv_w2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_w2df,DF_ftype_SI,1)
-//
-def int_hexagon_F2_conv_w2df :
-Hexagon_df_si_Intrinsic<"HEXAGON_F2_conv_w2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_ud2sf,SF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_ud2sf :
-Hexagon_sf_di_Intrinsic<"HEXAGON_F2_conv_ud2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_ud2df,DF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_ud2df :
-Hexagon_df_di_Intrinsic<"HEXAGON_F2_conv_ud2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_d2sf,SF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_d2sf :
-Hexagon_sf_di_Intrinsic<"HEXAGON_F2_conv_d2sf">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_d2df,DF_ftype_DI,1)
-//
-def int_hexagon_F2_conv_d2df :
-Hexagon_df_di_Intrinsic<"HEXAGON_F2_conv_d2df">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2uw,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2uw :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2uw">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2w,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2w :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2w">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2ud,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2ud :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2ud">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2d,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2d :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2d">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2uw,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2uw :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2uw">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2w,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2w :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2w">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2ud,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2ud :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2ud">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2d,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2d :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2d">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2uw_chop,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2uw_chop :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2uw_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2w_chop,SI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2w_chop :
-Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2w_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2ud_chop,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2ud_chop :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_sf2d_chop,DI_ftype_SF,1)
-//
-def int_hexagon_F2_conv_sf2d_chop :
-Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2d_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2uw_chop,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2uw_chop :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2uw_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2w_chop,SI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2w_chop :
-Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2w_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2ud_chop,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2ud_chop :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2ud_chop">;
-//
-// BUILTIN_INFO(HEXAGON.F2_conv_df2d_chop,DI_ftype_DF,1)
-//
-def int_hexagon_F2_conv_df2d_chop :
-Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2d_chop">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_lsr_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsr_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_lsl_r_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsl_r_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsl_r_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsl_r_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_xor,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsl_r_p_xor :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_r_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_r_r_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_r_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_r_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_r_r_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_r_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_i_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_lsr_i_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsr_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_i_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_i_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_i_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_i_p :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_acc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_acc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_acc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_acc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_acc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_nac,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_nac :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_nac,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_nac :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_nac">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_xacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_xacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_xacc,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_xacc :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_xacc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_xacc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_xacc,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_xacc :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_and,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_and :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asr_i_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_lsr_i_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_or,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_asl_i_r_or :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_and,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_and :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_and">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asr_i_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_lsr_i_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_p_or,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_asl_i_p_or :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_or">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_r_sat,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asl_i_r_sat :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_i_r_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_i_r_rnd :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd_goodsyntax,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_rnd,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_p_rnd :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_p_rnd_goodsyntax,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_p_rnd_goodsyntax :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S4_lsli,SI_ftype_SISI,2)
-//
-def int_hexagon_S4_lsli :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_lsli">;
-//
-// BUILTIN_INFO(HEXAGON.S2_addasl_rrri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_addasl_rrri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_addasl_rrri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_andi_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_andi_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_andi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ori_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_ori_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_ori_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_addi_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_addi_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_subi_asl_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_subi_asl_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subi_asl_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_andi_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_andi_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_andi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ori_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_ori_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_ori_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_addi_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_addi_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S4_subi_lsr_ri,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_subi_lsr_ri :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subi_lsr_ri">;
-//
-// BUILTIN_INFO(HEXAGON.S2_valignib,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_valignib :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_valignib">;
-//
-// BUILTIN_INFO(HEXAGON.S2_valignrb,DI_ftype_DIDIQI,3)
-//
-def int_hexagon_S2_valignrb :
-Hexagon_di_didiqi_Intrinsic<"HEXAGON_S2_valignrb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vspliceib,DI_ftype_DIDISI,3)
-//
-def int_hexagon_S2_vspliceib :
-Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_vspliceib">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsplicerb,DI_ftype_DIDIQI,3)
-//
-def int_hexagon_S2_vsplicerb :
-Hexagon_di_didiqi_Intrinsic<"HEXAGON_S2_vsplicerb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsplatrh,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vsplatrh :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsplatrh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsplatrb,SI_ftype_SI,1)
-//
-def int_hexagon_S2_vsplatrb :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_vsplatrb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insert,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_insert :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_insert">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxb_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxb_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxh_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxh_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxw_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxw_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tableidxd_goodsyntax,SI_ftype_SISISISI,4)
-//
-def int_hexagon_S2_tableidxd_goodsyntax :
-Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.A4_bitspliti,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_bitspliti :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_bitspliti">;
-//
-// BUILTIN_INFO(HEXAGON.A4_bitsplit,DI_ftype_SISI,2)
-//
-def int_hexagon_A4_bitsplit :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_bitsplit">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extract,SI_ftype_SISISI,3)
-//
-def int_hexagon_S4_extract :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_extract">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractu,SI_ftype_SISISI,3)
-//
-def int_hexagon_S2_extractu :
-Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_extractu">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insertp,DI_ftype_DIDISISI,4)
-//
-def int_hexagon_S2_insertp :
-Hexagon_di_didisisi_Intrinsic<"HEXAGON_S2_insertp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extractp,DI_ftype_DISISI,3)
-//
-def int_hexagon_S4_extractp :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_S4_extractp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractup,DI_ftype_DISISI,3)
-//
-def int_hexagon_S2_extractup :
-Hexagon_di_disisi_Intrinsic<"HEXAGON_S2_extractup">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insert_rp,SI_ftype_SISIDI,3)
-//
-def int_hexagon_S2_insert_rp :
-Hexagon_si_sisidi_Intrinsic<"HEXAGON_S2_insert_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extract_rp,SI_ftype_SIDI,2)
-//
-def int_hexagon_S4_extract_rp :
-Hexagon_si_sidi_Intrinsic<"HEXAGON_S4_extract_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractu_rp,SI_ftype_SIDI,2)
-//
-def int_hexagon_S2_extractu_rp :
-Hexagon_si_sidi_Intrinsic<"HEXAGON_S2_extractu_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_insertp_rp,DI_ftype_DIDIDI,3)
-//
-def int_hexagon_S2_insertp_rp :
-Hexagon_di_dididi_Intrinsic<"HEXAGON_S2_insertp_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_extractp_rp,DI_ftype_DIDI,2)
-//
-def int_hexagon_S4_extractp_rp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S4_extractp_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_extractup_rp,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_extractup_rp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_extractup_rp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tstbit_i,QI_ftype_SISI,2)
-//
-def int_hexagon_S2_tstbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_tstbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ntstbit_i,QI_ftype_SISI,2)
-//
-def int_hexagon_S4_ntstbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_ntstbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_setbit_i,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_setbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_setbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_togglebit_i,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_togglebit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_togglebit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clrbit_i,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_clrbit_i :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_clrbit_i">;
-//
-// BUILTIN_INFO(HEXAGON.S2_tstbit_r,QI_ftype_SISI,2)
-//
-def int_hexagon_S2_tstbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_tstbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S4_ntstbit_r,QI_ftype_SISI,2)
-//
-def int_hexagon_S4_ntstbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_ntstbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_setbit_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_setbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_setbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_togglebit_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_togglebit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_togglebit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clrbit_r,SI_ftype_SISI,2)
-//
-def int_hexagon_S2_clrbit_r :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_clrbit_r">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_i_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_i_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S5_asrhub_rnd_sat_goodsyntax,SI_ftype_DISI,2)
-//
-def int_hexagon_S5_asrhub_rnd_sat_goodsyntax :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S5_asrhub_sat,SI_ftype_DISI,2)
-//
-def int_hexagon_S5_asrhub_sat :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S5_asrhub_sat">;
-//
-// BUILTIN_INFO(HEXAGON.S5_vasrhrnd_goodsyntax,DI_ftype_DISI,2)
-//
-def int_hexagon_S5_vasrhrnd_goodsyntax :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_vh,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsl_r_vh :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_vh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_i_svw_trun,SI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_i_svw_trun :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_svw_trun,SI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_svw_trun :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S2_asr_r_svw_trun">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_i_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_i_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_i_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_i_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asr_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asr_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_asl_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_asl_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsr_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsr_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lsl_r_vw,DI_ftype_DISI,2)
-//
-def int_hexagon_S2_lsl_r_vw :
-Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_vw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vrndpackwh,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vrndpackwh :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vrndpackwh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vrndpackwhs,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vrndpackwhs :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vrndpackwhs">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsxtbh,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vsxtbh :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsxtbh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vzxtbh,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vzxtbh :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vzxtbh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathub,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathub :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsathub">;
-//
-// BUILTIN_INFO(HEXAGON.S2_svsathub,SI_ftype_SI,1)
-//
-def int_hexagon_S2_svsathub :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_svsathub">;
-//
-// BUILTIN_INFO(HEXAGON.S2_svsathb,SI_ftype_SI,1)
-//
-def int_hexagon_S2_svsathb :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_svsathb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathb,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathb :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsathb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunohb,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vtrunohb :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vtrunohb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunewh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_vtrunewh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_vtrunewh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunowh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_vtrunowh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_vtrunowh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vtrunehb,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vtrunehb :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vtrunehb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsxthw,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vsxthw :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsxthw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vzxthw,DI_ftype_SI,1)
-//
-def int_hexagon_S2_vzxthw :
-Hexagon_di_si_Intrinsic<"HEXAGON_S2_vzxthw">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwh,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwh :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsatwh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwuh,SI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwuh :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsatwuh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_packhl,DI_ftype_SISI,2)
-//
-def int_hexagon_S2_packhl :
-Hexagon_di_sisi_Intrinsic<"HEXAGON_S2_packhl">;
-//
-// BUILTIN_INFO(HEXAGON.A2_swiz,SI_ftype_SI,1)
-//
-def int_hexagon_A2_swiz :
-Hexagon_si_si_Intrinsic<"HEXAGON_A2_swiz">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathub_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathub_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsathub_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsathb_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsathb_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsathb_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwh_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwh_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsatwh_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_vsatwuh_nopack,DI_ftype_DI,1)
-//
-def int_hexagon_S2_vsatwuh_nopack :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsatwuh_nopack">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffob,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffob :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffob">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffeb,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffeb :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffeb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffoh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffoh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffoh">;
-//
-// BUILTIN_INFO(HEXAGON.S2_shuffeh,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_shuffeh :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffeh">;
-//
-// BUILTIN_INFO(HEXAGON.S5_popcountp,SI_ftype_DI,1)
-//
-def int_hexagon_S5_popcountp :
-Hexagon_si_di_Intrinsic<"HEXAGON_S5_popcountp">;
-//
-// BUILTIN_INFO(HEXAGON.S4_parity,SI_ftype_SISI,2)
-//
-def int_hexagon_S4_parity :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_parity">;
-//
-// BUILTIN_INFO(HEXAGON.S2_parityp,SI_ftype_DIDI,2)
-//
-def int_hexagon_S2_parityp :
-Hexagon_si_didi_Intrinsic<"HEXAGON_S2_parityp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_lfsp,DI_ftype_DIDI,2)
-//
-def int_hexagon_S2_lfsp :
-Hexagon_di_didi_Intrinsic<"HEXAGON_S2_lfsp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clbnorm,SI_ftype_SI,1)
-//
-def int_hexagon_S2_clbnorm :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_clbnorm">;
-//
-// BUILTIN_INFO(HEXAGON.S4_clbaddi,SI_ftype_SISI,2)
-//
-def int_hexagon_S4_clbaddi :
-Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_clbaddi">;
-//
-// BUILTIN_INFO(HEXAGON.S4_clbpnorm,SI_ftype_DI,1)
-//
-def int_hexagon_S4_clbpnorm :
-Hexagon_si_di_Intrinsic<"HEXAGON_S4_clbpnorm">;
-//
-// BUILTIN_INFO(HEXAGON.S4_clbpaddi,SI_ftype_DISI,2)
-//
-def int_hexagon_S4_clbpaddi :
-Hexagon_si_disi_Intrinsic<"HEXAGON_S4_clbpaddi">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clb,SI_ftype_SI,1)
-//
-def int_hexagon_S2_clb :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_clb">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl0,SI_ftype_SI,1)
-//
-def int_hexagon_S2_cl0 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_cl0">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl1,SI_ftype_SI,1)
-//
-def int_hexagon_S2_cl1 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_cl1">;
-//
-// BUILTIN_INFO(HEXAGON.S2_clbp,SI_ftype_DI,1)
-//
-def int_hexagon_S2_clbp :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_clbp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl0p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_cl0p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_cl0p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_cl1p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_cl1p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_cl1p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_brev,SI_ftype_SI,1)
-//
-def int_hexagon_S2_brev :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_brev">;
-//
-// BUILTIN_INFO(HEXAGON.S2_brevp,DI_ftype_DI,1)
-//
-def int_hexagon_S2_brevp :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_brevp">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct0,SI_ftype_SI,1)
-//
-def int_hexagon_S2_ct0 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_ct0">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct1,SI_ftype_SI,1)
-//
-def int_hexagon_S2_ct1 :
-Hexagon_si_si_Intrinsic<"HEXAGON_S2_ct1">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct0p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_ct0p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_ct0p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_ct1p,SI_ftype_DI,1)
-//
-def int_hexagon_S2_ct1p :
-Hexagon_si_di_Intrinsic<"HEXAGON_S2_ct1p">;
-//
-// BUILTIN_INFO(HEXAGON.S2_interleave,DI_ftype_DI,1)
-//
-def int_hexagon_S2_interleave :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">;
-//
-// BUILTIN_INFO(HEXAGON.S2_deinterleave,DI_ftype_DI,1)
-//
-def int_hexagon_S2_deinterleave :
-Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">;
-
 //
 // BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1)
 //
@@ -4934,6042 +160,6197 @@ def int_hexagon_S4_stored_locked :
 Hexagon_Intrinsic<"HEXAGON_S4_stored_locked", [llvm_i32_ty],
       [llvm_ptr64_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture<0>]>;
 
-// V60
+def int_hexagon_vmemcpy : Hexagon_Intrinsic<"hexagon_vmemcpy",
+    [], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
+    [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>;
 
-class Hexagon_v2048v2048_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty],
-                          [IntrNoMem]>;
-
-// tag : V6_hi_W
-// tag : V6_lo_W
-class Hexagon_v512v1024_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v32i32_ty],
-                          [IntrNoMem]>;
-
-// tag : V6_hi_W_128B
-// tag : V6_lo_W_128B
-class Hexagon_v1024v2048_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v64i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_vmemset : Hexagon_Intrinsic<"hexagon_vmemset",
+    [], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+    [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
 
-class Hexagon_v1024v1024_Intrinsic_T<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty],
-                          [IntrNoMem]>;
-
-// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
-// tag : V6_hi
-def int_hexagon_V6_hi :
-Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_hi">;
+multiclass Hexagon_custom_circ_ld_Intrinsic<LLVMType ElTy> {
+  def NAME#_pci : Hexagon_NonGCC_Intrinsic<
+    [ElTy, llvm_ptr_ty],
+    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<3>]>;
+  def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
+    [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<2>]>;
+}
 
-// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
-// tag : V6_lo
-def int_hexagon_V6_lo :
-Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_lo">;
+defm int_hexagon_L2_loadrub : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrb : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadruh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadri : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_L2_loadrd : Hexagon_custom_circ_ld_Intrinsic<llvm_i64_ty>;
 
-// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
-// tag : V6_hi_128B
-def int_hexagon_V6_hi_128B :
-Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_hi_128B">;
+multiclass Hexagon_custom_circ_st_Intrinsic<LLVMType ElTy> {
+  def NAME#_pci : Hexagon_NonGCC_Intrinsic<
+    [llvm_ptr_ty],
+    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<4>]>;
+  def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
+    [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
+    [IntrArgMemOnly, NoCapture<3>]>;
+}
 
-// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
-// tag : V6_lo_128B
-def int_hexagon_V6_lo_128B :
-Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_lo_128B">;
+defm int_hexagon_S2_storerb : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerh : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerf : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storeri : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
+defm int_hexagon_S2_storerd : Hexagon_custom_circ_st_Intrinsic<llvm_i64_ty>;
 
-// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
-// tag : V6_vassignp
-def int_hexagon_V6_vassignp :
-Hexagon_v1024v1024_Intrinsic_T<"HEXAGON_V6_vassignp">;
+// The front-end emits the intrinsic call with only two arguments. The third
+// argument from the builtin is already used by front-end to write to memory
+// by generating a store.
+class Hexagon_custom_brev_ld_Intrinsic<LLVMType ElTy>
+ : Hexagon_NonGCC_Intrinsic<
+    [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
+    [IntrReadMem]>;
 
-// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
-// tag : V6_vassignp_128B
-def int_hexagon_V6_vassignp_128B :
-Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">;
+def int_hexagon_L2_loadrub_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrb_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadruh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadri_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
+def int_hexagon_L2_loadrd_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i64_ty>;
 
+def int_hexagon_S2_storerb_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
+def int_hexagon_S2_storerh_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
+def int_hexagon_S2_storerf_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
+def int_hexagon_S2_storeri_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
+def int_hexagon_S2_storerd_pbr : Hexagon_mem_memdisi_Intrinsic<"brev_std">;
 
 //
-// Hexagon_iii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_r
-class Hexagon_iii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
-
+// Masked vector stores
 //
-// Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_p
-class Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
 
 //
-// Hexagon_iiii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_r_acc
-class Hexagon_iiii_Intrinsic<string GCCIntSuffix>
+// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
+// tag: V6_vS32b_qpred_ai
+class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
+                          [IntrArgMemOnly]>;
 
 //
-// Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
-// tag : S6_rol_i_p_acc
-class Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
+// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
+// tag: V6_vS32b_qpred_ai_128B
+class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_valignb
-class Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_qpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai">;
 
-//
-// Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_valignb_128B
-class Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nqpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai">;
 
-//
-// Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vror
-class Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_qpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai">;
 
-//
-// Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vror_128B
-class Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_nqpred_ai :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai">;
 
-//
-// Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackub
-class Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_qpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B">;
 
-//
-// Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackub_128B
-class Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nqpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B">;
 
-//
-// Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackob
-class Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_qpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B">;
 
-//
-// Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vunpackob_128B
-class Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vS32b_nt_nqpred_ai_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B">;
 
-//
-// Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vpackeb
-class Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstoreq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
 
-//
-// Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vpackeb_128B
-class Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorenq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
 
-//
-// Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpybus_dv_128B
-class Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
 
-//
-// Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpybus_dv_acc_128B
-class Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentnq :
+Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
 
-//
-// Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhvsat_acc
-class Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstoreq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
 
-//
-// Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhvsat_acc_128B
-class Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorenq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
 
-//
-// Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat
-class Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
 
-//
-// Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat_128B
-class Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaskedstorentnq_128B :
+Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
 
-//
-// Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat_acc
-class Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemiiv512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
+                               llvm_v16i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vdmpyhisat_acc_128B
-class Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemiiv1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
+                               llvm_v32i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi
-class Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemiiv2048_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
+                               llvm_v64i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi_128B
-class Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv64iiiv512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
+                               llvm_i32_ty,llvm_v16i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi_acc
-class Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv128iiiv1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
+                               llvm_i32_ty,llvm_v32i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyubi_acc_128B
-class Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv64iiiv1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
+                               llvm_i32_ty,llvm_v32i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddb_dv_128B
-class Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vvmemv128iiiv2048_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
+                               llvm_i32_ty,llvm_v64i32_ty],
+                          [IntrArgMemOnly]>;
 
-//
-// Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddubh
-class Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
+def int_hexagon_V6_vgathermw :
+Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermw">;
+
+def int_hexagon_V6_vgathermw_128B :
+Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermw_128B">;
+
+def int_hexagon_V6_vgathermh :
+Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermh">;
+
+def int_hexagon_V6_vgathermh_128B :
+Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermh_128B">;
+
+def int_hexagon_V6_vgathermhw :
+Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermhw">;
+
+def int_hexagon_V6_vgathermhw_128B :
+Hexagon_V65_vvmemiiv2048_Intrinsic<"HEXAGON_V6_vgathermhw_128B">;
+
+def int_hexagon_V6_vgathermwq :
+Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermwq">;
+
+def int_hexagon_V6_vgathermwq_128B :
+Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermwq_128B">;
+
+def int_hexagon_V6_vgathermhq :
+Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermhq">;
+
+def int_hexagon_V6_vgathermhq_128B :
+Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhq_128B">;
+
+def int_hexagon_V6_vgathermhwq :
+Hexagon_V65_vvmemv64iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhwq">;
+
+def int_hexagon_V6_vgathermhwq_128B :
+Hexagon_V65_vvmemv128iiiv2048_Intrinsic<"HEXAGON_V6_vgathermhwq_128B">;
+
+class Hexagon_V65_viiv512v512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_i32_ty,llvm_i32_ty,
+                                           llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddubh_128B
-class Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_viiv1024v1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_i32_ty,llvm_i32_ty,
+                                           llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vd0
-class Hexagon_v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv64iiiv512v512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [],
-                          [IntrNoMem]>;
+                          [], [llvm_v512i1_ty,llvm_i32_ty,
+                                           llvm_i32_ty,llvm_v16i32_ty,
+                                           llvm_v16i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vd0_128B
-class Hexagon_v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv128iiiv1024v1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [],
-                          [IntrNoMem]>;
+                          [], [llvm_v1024i1_ty,llvm_i32_ty,
+                                           llvm_i32_ty,llvm_v32i32_ty,
+                                           llvm_v32i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddbq
-class Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_viiv1024v512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_i32_ty,llvm_i32_ty,
+                                           llvm_v32i32_ty,llvm_v16i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddbq_128B
-class Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_viiv2048v1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_i32_ty,llvm_i32_ty,
+                                           llvm_v64i32_ty,llvm_v32i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsh
-class Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv64iiiv1024v512_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_v512i1_ty,llvm_i32_ty,
+                                           llvm_i32_ty,llvm_v32i32_ty,
+                                           llvm_v16i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsh_128B
-class Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_vv128iiiv2048v1024_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_v1024i1_ty,llvm_i32_ty,
+                                           llvm_i32_ty,llvm_v64i32_ty,
+                                           llvm_v32i32_ty],
+                          [IntrWriteMem]>;
 
-//
-// Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpybv_acc
-class Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
+class Hexagon_V65_v2048_Intrinsic<string GCCIntSuffix>
  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+                          [llvm_v64i32_ty], [],
                           [IntrNoMem]>;
 
 //
-// Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpybv_acc_128B
-class Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw
+def int_hexagon_V6_vscattermw :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw">;
 
 //
-// Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub
-class Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw_128B
+def int_hexagon_V6_vscattermw_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_128B">;
 
 //
-// Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub_128B
-class Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh
+def int_hexagon_V6_vscattermh :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh">;
 
 //
-// Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub_acc
-class Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh_128B
+def int_hexagon_V6_vscattermh_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_128B">;
 
 //
-// Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyub_acc_128B
-class Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw_add,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw_add
+def int_hexagon_V6_vscattermw_add :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw_add">;
 
 //
-// Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt
-class Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermw_add_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermw_add_128B
+def int_hexagon_V6_vscattermw_add_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_add_128B">;
 
 //
-// Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt_128B
-class Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh_add,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh_add
+def int_hexagon_V6_vscattermh_add :
+Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh_add">;
 
 //
-// Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt_acc
-class Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermh_add_128B,v_ftype_SISIVIVI,4)
+// tag : V6_vscattermh_add_128B
+def int_hexagon_V6_vscattermh_add_128B :
+Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_add_128B">;
 
 //
-// Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandqrt_acc_128B
-class Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermwq,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermwq
+def int_hexagon_V6_vscattermwq :
+Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermwq">;
 
 //
-// Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt
-class Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermwq_128B,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermwq_128B
+def int_hexagon_V6_vscattermwq_128B :
+Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermwq_128B">;
 
 //
-// Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt_128B
-class Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhq,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermhq
+def int_hexagon_V6_vscattermhq :
+Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermhq">;
 
 //
-// Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt_acc
-class Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhq_128B,v_ftype_QVSISIVIVI,5)
+// tag : V6_vscattermhq_128B
+def int_hexagon_V6_vscattermhq_128B :
+Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermhq_128B">;
 
 //
-// Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvrt_acc_128B
-class Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw
+def int_hexagon_V6_vscattermhw :
+Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw">;
 
 //
-// Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw
-class Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw_128B,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw_128B
+def int_hexagon_V6_vscattermhw_128B :
+Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_128B">;
 
 //
-// Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw_128B
-class Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhwq,v_ftype_QVSISIVDVI,5)
+// tag : V6_vscattermhwq
+def int_hexagon_V6_vscattermhwq :
+Hexagon_V65_vv64iiiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhwq">;
 
 //
-// Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw_and
-class Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhwq_128B,v_ftype_QVSISIVDVI,5)
+// tag : V6_vscattermhwq_128B
+def int_hexagon_V6_vscattermhwq_128B :
+Hexagon_V65_vv128iiiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhwq_128B">;
 
 //
-// Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vgtw_and_128B
-class Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw_add
+def int_hexagon_V6_vscattermhw_add :
+Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw_add">;
 
 //
-// Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
+// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add_128B,v_ftype_SISIVDVI,4)
+// tag : V6_vscattermhw_add_128B
+def int_hexagon_V6_vscattermhw_add_128B :
+Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B">;
+
+// Auto-generated intrinsics
+
+// tag : S2_vsatwh
+class Hexagon_i32_i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybusv
+class Hexagon_v16i32_v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybusv
+class Hexagon_v32i32_v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vaslw_acc
+class Hexagon_v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vaslw_acc
+class Hexagon_v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmux
+class Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmux
+class Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : S2_tableidxd_goodsyntax
+class Hexagon_i32_i32i32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandnqrt_acc
+class Hexagon_v16i32_v16i32v512i1i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandnqrt_acc
+class Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybusi
+class Hexagon_v32i32_v32i32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybusi
+class Hexagon_v64i32_v64i32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vsubb_dv
+class Hexagon_v64i32_v64i32v64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
+       [IntrNoMem]>;
+
+// tag : M2_mpysu_up
+class Hexagon_i32_i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : M2_mpyud_acc_ll_s0
+class Hexagon_i64_i64i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : S2_lsr_i_r_nac
+class Hexagon_i32_i32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : M2_cmpysc_s0
+class Hexagon_i64_i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_lo
+class Hexagon_v16i32_v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_lo
+class Hexagon_v32i32_v64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v64i32_ty],
+       [IntrNoMem]>;
+
+// tag : S2_shuffoh
+class Hexagon_i64_i64i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : F2_sfmax
+class Hexagon_float_floatfloat_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_float_ty,llvm_float_ty],
+       [IntrNoMem, Throws]>;
+
+// tag : A2_vabswsat
+class Hexagon_i64_i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag :
+class Hexagon_v32i32_v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_ldnp0
+class Hexagon_v16i32_i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_ldnp0
+class Hexagon_v32i32_i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vdmpyhb
+class Hexagon_v16i32_v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vdmpyhb
+class Hexagon_v32i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : A4_vcmphgti
+class Hexagon_i32_i64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i64_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag :
+class Hexagon_v32i32_v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : S6_rol_i_p_or
+class Hexagon_i64_i64i64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vgtuh_and
+class Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vgtuh_and
+class Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : A2_abssat
+class Hexagon_i32_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : A2_vcmpwgtu
+class Hexagon_i32_i64i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vtmpybus_acc
+class Hexagon_v64i32_v64i32v64i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : F2_conv_df2uw_chop
+class Hexagon_i32_double_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_double_ty],
+       [IntrNoMem]>;
+
 // tag : V6_pred_or
-class Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
-                          [IntrNoMem]>;
+class Hexagon_v512i1_v512i1v512i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_or_128B
-class Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
-                          [IntrNoMem]>;
+// tag : V6_pred_or
+class Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_not
-class Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v512i1_ty],
-                          [IntrNoMem]>;
+// tag : S2_asr_i_p_rnd_goodsyntax
+class Hexagon_i64_i64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_not_128B
-class Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v1024i1_ty],
-                          [IntrNoMem]>;
+// tag : F2_conv_w2df
+class Hexagon_double_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_double_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2
-class Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vunpackuh
+class Hexagon_v32i32_v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v16i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2_128B
-class Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vunpackuh
+class Hexagon_v64i32_v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v32i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vswap
-class Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vadduhw_acc
+class Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vswap_128B
-class Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vadduhw_acc
+class Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vshuffvdd
-class Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : M2_vdmacs_s0
+class Hexagon_i64_i64i64i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i64_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vshuffvdd_128B
-class Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vrmpybub_rtt_acc
+class Hexagon_v32i32_v32i32v16i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybub_rtt_acc
+class Hexagon_v64i32_v64i32v32i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
 
+// tag : V6_ldu0
+class Hexagon_v16i32_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_ldu0
+class Hexagon_v32i32_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : S4_extract_rp
+class Hexagon_i32_i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vdmpyhsuisat
+class Hexagon_v16i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vdmpyhsuisat
+class Hexagon_v32i32_v64i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : A2_addsp
+class Hexagon_i64_i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
 // tag : V6_extractw
-class Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+class Hexagon_i32_v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_extractw_128B
-class Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_extractw
+class Hexagon_i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplatw
-class Hexagon_v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vlutvwhi
+class Hexagon_v32i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplatw_128B
-class Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vlutvwhi
+class Hexagon_v64i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vgtuh
+class Hexagon_v512i1_v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vgtuh
+class Hexagon_v1024i1_v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : F2_sffma_lib
+class Hexagon_float_floatfloatfloat_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_float_ty,llvm_float_ty,llvm_float_ty],
+       [IntrNoMem, Throws]>;
+
+// tag : F2_conv_ud2df
+class Hexagon_double_i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_double_ty], [llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : S2_vzxthw
+class Hexagon_i64_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vtmpyhb
+class Hexagon_v64i32_v64i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vshufoeh
+class Hexagon_v32i32_v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vshufoeh
+class Hexagon_v64i32_v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vlut4
+class Hexagon_v16i32_v16i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vlut4
+class Hexagon_v32i32_v32i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag :
+class Hexagon_v16i32_v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : F2_conv_uw2sf
+class Hexagon_float_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vswap
+class Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vswap
+class Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandnqrt
+class Hexagon_v16i32_v512i1i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandnqrt
+class Hexagon_v32i32_v1024i1i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmpyub
+class Hexagon_v64i32_v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : A5_ACS
+class Hexagon_i64i32_i64i64i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty,llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vunpackob
+class Hexagon_v32i32_v32i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vunpackob
+class Hexagon_v64i32_v64i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmpyhsat_acc
+class Hexagon_v32i32_v32i32v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmpyhsat_acc
+class Hexagon_v64i32_v64i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vaddcarrysat
+class Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vaddcarrysat
+class Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
 // tag : V6_vlutvvb_oracc
-class Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+class Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvvb_oracc_128B
-class Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vlutvvb_oracc
+class Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybub_rtt
+class Hexagon_v32i32_v16i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrmpybub_rtt
+class Hexagon_v64i32_v32i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : A4_addp_c
+class Hexagon_i64i32_i64i64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty,llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrsadubi_acc
+class Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vrsadubi_acc
+class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : F2_conv_df2sf
+class Hexagon_float_double_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_double_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandvqv
+class Hexagon_v16i32_v512i1v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandvqv
+class Hexagon_v32i32_v1024i1v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : C2_vmux
+class Hexagon_i64_i32i64i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i32_ty,llvm_i64_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : F2_sfcmpeq
+class Hexagon_i32_floatfloat_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_float_ty,llvm_float_ty],
+       [IntrNoMem, Throws]>;
+
+// tag : V6_vmpahhsat
+class Hexagon_v16i32_v16i32v16i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmpahhsat
+class Hexagon_v32i32_v32i32v32i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandvrt
+class Hexagon_v512i1_v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandvrt
+class Hexagon_v1024i1_v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vsubcarry
+class Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v16i32_ty,llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vsubcarry
+class Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B
+  : Hexagon_NonGCC_Intrinsic<
+       [llvm_v32i32_ty,llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
+       [IntrNoMem]>;
+
+// tag : F2_sffixupr
+class Hexagon_float_float_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_float_ty],
+       [IntrNoMem, Throws]>;
+
+// tag : V6_vandvrt_acc
+class Hexagon_v512i1_v512i1v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vandvrt_acc
+class Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : F2_dfsub
+class Hexagon_double_doubledouble_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_double_ty], [llvm_double_ty,llvm_double_ty],
+       [IntrNoMem, Throws]>;
+
+// tag : V6_vmpyowh_sacc
+class Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vmpyowh_sacc
+class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+       [IntrNoMem]>;
+
+// tag : S2_insertp
+class Hexagon_i64_i64i64i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : F2_sfinvsqrta
+class Hexagon_floati32_float_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty,llvm_i32_ty], [llvm_float_ty],
+       [IntrNoMem, Throws]>;
+
+// tag : V6_vtran2x2_map
+class Hexagon_v16i32v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty,llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
+
+// tag : V6_vtran2x2_map
+class Hexagon_v32i32v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty,llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
 // tag : V6_vlutvwh_oracc
-class Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+class Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwh_oracc_128B
-class Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+// tag : V6_vlutvwh_oracc
+class Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai
-class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
-                          [IntrArgMemOnly]>;
+// tag : F2_dfcmpge
+class Hexagon_i32_doubledouble_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_double_ty,llvm_double_ty],
+       [IntrNoMem, Throws]>;
 
-//
-// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
-// tag: V6_vS32b_qpred_ai_128B
-class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
-                          [IntrArgMemOnly]>;
+// tag : F2_conv_df2d_chop
+class Hexagon_i64_double_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_double_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r,SI_ftype_SISI,2)
-// tag : S6_rol_i_r
-def int_hexagon_S6_rol_i_r :
-Hexagon_iii_Intrinsic<"HEXAGON_S6_rol_i_r">;
+// tag : F2_conv_sf2w
+class Hexagon_i32_float_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_float_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p,DI_ftype_DISI,2)
-// tag : S6_rol_i_p
-def int_hexagon_S6_rol_i_p :
-Hexagon_LLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p">;
+// tag : F2_sfclass
+class Hexagon_i32_floati32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_float_ty,llvm_i32_ty],
+       [IntrNoMem, Throws]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_acc,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_acc
-def int_hexagon_S6_rol_i_r_acc :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+// tag : F2_conv_sf2ud_chop
+class Hexagon_i64_float_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty], [llvm_float_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_acc,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_acc
-def int_hexagon_S6_rol_i_p_acc :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+// tag : V6_pred_scalar2v2
+class Hexagon_v512i1_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_nac,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_nac
-def int_hexagon_S6_rol_i_r_nac :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+// tag : V6_pred_scalar2v2
+class Hexagon_v1024i1_i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_nac,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_nac
-def int_hexagon_S6_rol_i_p_nac :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+// tag : F2_sfrecipa
+class Hexagon_floati32_floatfloat_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty,llvm_i32_ty], [llvm_float_ty,llvm_float_ty],
+       [IntrNoMem, Throws]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_xacc,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_xacc
-def int_hexagon_S6_rol_i_r_xacc :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+// tag : V6_vprefixqh
+class Hexagon_v16i32_v512i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v512i1_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_xacc,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_xacc
-def int_hexagon_S6_rol_i_p_xacc :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+// tag : V6_vprefixqh
+class Hexagon_v32i32_v1024i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v1024i1_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_and,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_and
-def int_hexagon_S6_rol_i_r_and :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+// tag : V6_vdmpyhisat_acc
+class Hexagon_v16i32_v16i32v32i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_r_or,SI_ftype_SISISI,3)
-// tag : S6_rol_i_r_or
-def int_hexagon_S6_rol_i_r_or :
-Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+// tag : V6_vdmpyhisat_acc
+class Hexagon_v32i32_v32i32v64i32i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_and,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_and
-def int_hexagon_S6_rol_i_p_and :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+// tag : F2_conv_ud2sf
+class Hexagon_float_i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_i64_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_rol_i_p_or,DI_ftype_DIDISI,3)
-// tag : S6_rol_i_p_or
-def int_hexagon_S6_rol_i_p_or :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+// tag : F2_conv_sf2df
+class Hexagon_double_float_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_double_ty], [llvm_float_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.S2_cabacencbin,DI_ftype_DIDIQI,3)
-// tag : S2_cabacencbin
-def int_hexagon_S2_cabacencbin :
-Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S2_cabacencbin">;
+// tag : F2_sffma_sc
+class Hexagon_float_floatfloatfloati32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_float_ty], [llvm_float_ty,llvm_float_ty,llvm_float_ty,llvm_i32_ty],
+       [IntrNoMem, Throws]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_valignb,VI_ftype_VIVISI,3)
-// tag : V6_valignb
-def int_hexagon_V6_valignb :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignb">;
+// tag : F2_dfclass
+class Hexagon_i32_doublei32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_double_ty,llvm_i32_ty],
+       [IntrNoMem, Throws]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_valignb_128B,VI_ftype_VIVISI,3)
-// tag : V6_valignb_128B
-def int_hexagon_V6_valignb_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignb_128B">;
+// tag : V6_vd0
+class Hexagon_v16i32__Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v16i32_ty], [],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignb,VI_ftype_VIVISI,3)
-// tag : V6_vlalignb
-def int_hexagon_V6_vlalignb :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignb">;
+// tag : V6_vd0
+class Hexagon_v32i32__Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v32i32_ty], [],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignb_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlalignb_128B
-def int_hexagon_V6_vlalignb_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignb_128B">;
+// tag : V6_vdd0
+class Hexagon_v64i32__Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_valignbi,VI_ftype_VIVISI,3)
-// tag : V6_valignbi
-def int_hexagon_V6_valignbi :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignbi">;
+// tag : S2_insert_rp
+class Hexagon_i32_i32i32i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i64_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_valignbi_128B,VI_ftype_VIVISI,3)
-// tag : V6_valignbi_128B
-def int_hexagon_V6_valignbi_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignbi_128B">;
+// tag : V6_vassignp
+class Hexagon_v64i32_v64i32_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v64i32_ty], [llvm_v64i32_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignbi,VI_ftype_VIVISI,3)
-// tag : V6_vlalignbi
-def int_hexagon_V6_vlalignbi :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignbi">;
+// tag : A6_vminub_RdP
+class Hexagon_i64i32_i64i64_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_i64_ty,llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlalignbi_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlalignbi_128B
-def int_hexagon_V6_vlalignbi_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
+// tag : V6_pred_not
+class Hexagon_v512i1_v512i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v512i1_ty], [llvm_v512i1_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vror,VI_ftype_VISI,2)
-// tag : V6_vror
-def int_hexagon_V6_vror :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vror">;
+// tag : V6_pred_not
+class Hexagon_v1024i1_v1024i1_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+       [llvm_v1024i1_ty], [llvm_v1024i1_ty],
+       [IntrNoMem]>;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vror_128B,VI_ftype_VISI,2)
-// tag : V6_vror_128B
-def int_hexagon_V6_vror_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vror_128B">;
+// V5 Scalar Instructions.
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackub,VD_ftype_VI,1)
-// tag : V6_vunpackub
-def int_hexagon_V6_vunpackub :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackub">;
+def int_hexagon_S2_asr_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackub_128B,VD_ftype_VI,1)
-// tag : V6_vunpackub_128B
-def int_hexagon_V6_vunpackub_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackub_128B">;
+def int_hexagon_S2_vsatwh :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsatwh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackb,VD_ftype_VI,1)
-// tag : V6_vunpackb
-def int_hexagon_V6_vunpackb :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackb">;
+def int_hexagon_S2_tableidxd_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackb_128B,VD_ftype_VI,1)
-// tag : V6_vunpackb_128B
-def int_hexagon_V6_vunpackb_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackb_128B">;
+def int_hexagon_M2_mpysu_up :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysu_up">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackuh,VD_ftype_VI,1)
-// tag : V6_vunpackuh
-def int_hexagon_V6_vunpackuh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackuh">;
+def int_hexagon_M2_mpyud_acc_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackuh_128B,VD_ftype_VI,1)
-// tag : V6_vunpackuh_128B
-def int_hexagon_V6_vunpackuh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackuh_128B">;
+def int_hexagon_M2_mpyud_acc_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackh,VD_ftype_VI,1)
-// tag : V6_vunpackh
-def int_hexagon_V6_vunpackh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackh">;
+def int_hexagon_M2_cmpysc_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpysc_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackh_128B,VD_ftype_VI,1)
-// tag : V6_vunpackh_128B
-def int_hexagon_V6_vunpackh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackh_128B">;
+def int_hexagon_M2_cmpysc_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpysc_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackob,VD_ftype_VDVI,2)
-// tag : V6_vunpackob
-def int_hexagon_V6_vunpackob :
-Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackob">;
+def int_hexagon_M4_cmpyi_whc :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyi_whc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackob_128B,VD_ftype_VDVI,2)
-// tag : V6_vunpackob_128B
-def int_hexagon_V6_vunpackob_128B :
-Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackob_128B">;
+def int_hexagon_M2_mpy_sat_rnd_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackoh,VD_ftype_VDVI,2)
-// tag : V6_vunpackoh
-def int_hexagon_V6_vunpackoh :
-Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackoh">;
+def int_hexagon_M2_mpy_sat_rnd_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vunpackoh_128B,VD_ftype_VDVI,2)
-// tag : V6_vunpackoh_128B
-def int_hexagon_V6_vunpackoh_128B :
-Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackoh_128B">;
+def int_hexagon_S2_tableidxb_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeb,VI_ftype_VIVI,2)
-// tag : V6_vpackeb
-def int_hexagon_V6_vpackeb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeb">;
+def int_hexagon_S2_shuffoh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffoh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeb_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackeb_128B
-def int_hexagon_V6_vpackeb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeb_128B">;
+def int_hexagon_F2_sfmax :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeh,VI_ftype_VIVI,2)
-// tag : V6_vpackeh
-def int_hexagon_V6_vpackeh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeh">;
+def int_hexagon_A2_vabswsat :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabswsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackeh_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackeh_128B
-def int_hexagon_V6_vpackeh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeh_128B">;
+def int_hexagon_S2_asr_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackob,VI_ftype_VIVI,2)
-// tag : V6_vpackob
-def int_hexagon_V6_vpackob :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackob">;
+def int_hexagon_S2_asr_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackob_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackob_128B
-def int_hexagon_V6_vpackob_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackob_128B">;
+def int_hexagon_A4_combineri :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackoh,VI_ftype_VIVI,2)
-// tag : V6_vpackoh
-def int_hexagon_V6_vpackoh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackoh">;
+def int_hexagon_M2_mpy_nac_sat_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackoh_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackoh_128B
-def int_hexagon_V6_vpackoh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackoh_128B">;
+def int_hexagon_M4_vpmpyh_acc :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M4_vpmpyh_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackhub_sat
-def int_hexagon_V6_vpackhub_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhub_sat">;
+def int_hexagon_M2_vcmpy_s0_sat_i :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_i">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackhub_sat_128B
-def int_hexagon_V6_vpackhub_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">;
+def int_hexagon_A2_notp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_notp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackhb_sat
-def int_hexagon_V6_vpackhb_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhb_sat">;
+def int_hexagon_M2_mpy_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackhb_sat_128B
-def int_hexagon_V6_vpackhb_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">;
+def int_hexagon_M2_mpy_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackwuh_sat
-def int_hexagon_V6_vpackwuh_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwuh_sat">;
+def int_hexagon_C4_or_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackwuh_sat_128B
-def int_hexagon_V6_vpackwuh_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">;
+def int_hexagon_M2_vmac2s_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2s_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat,VI_ftype_VIVI,2)
-// tag : V6_vpackwh_sat
-def int_hexagon_V6_vpackwh_sat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwh_sat">;
+def int_hexagon_M2_vmac2s_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2s_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vpackwh_sat_128B
-def int_hexagon_V6_vpackwh_sat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">;
+def int_hexagon_S2_brevp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_brevp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vzb,VD_ftype_VI,1)
-// tag : V6_vzb
-def int_hexagon_V6_vzb :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzb">;
+def int_hexagon_M4_pmpyw_acc :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M4_pmpyw_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vzb_128B,VD_ftype_VI,1)
-// tag : V6_vzb_128B
-def int_hexagon_V6_vzb_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzb_128B">;
+def int_hexagon_S2_cl1 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_cl1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsb,VD_ftype_VI,1)
-// tag : V6_vsb
-def int_hexagon_V6_vsb :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsb">;
+def int_hexagon_C4_cmplte :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplte">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsb_128B,VD_ftype_VI,1)
-// tag : V6_vsb_128B
-def int_hexagon_V6_vsb_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsb_128B">;
+def int_hexagon_M2_mmpyul_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vzh,VD_ftype_VI,1)
-// tag : V6_vzh
-def int_hexagon_V6_vzh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzh">;
+def int_hexagon_A2_vaddws :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddws">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vzh_128B,VD_ftype_VI,1)
-// tag : V6_vzh_128B
-def int_hexagon_V6_vzh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzh_128B">;
+def int_hexagon_A2_maxup :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxup">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsh,VD_ftype_VI,1)
-// tag : V6_vsh
-def int_hexagon_V6_vsh :
-Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsh">;
+def int_hexagon_A4_vcmphgti :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsh_128B,VD_ftype_VI,1)
-// tag : V6_vsh_128B
-def int_hexagon_V6_vsh_128B :
-Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsh_128B">;
+def int_hexagon_S2_interleave :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_interleave">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus,VI_ftype_VISI,2)
-// tag : V6_vdmpybus
-def int_hexagon_V6_vdmpybus :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus">;
+def int_hexagon_M2_vrcmpyi_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyi_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpybus_128B
-def int_hexagon_V6_vdmpybus_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_128B">;
+def int_hexagon_A2_abssat :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_abssat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpybus_acc
-def int_hexagon_V6_vdmpybus_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus_acc">;
+def int_hexagon_A2_vcmpwgtu :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpwgtu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpybus_acc_128B
-def int_hexagon_V6_vdmpybus_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">;
+def int_hexagon_C2_cmpgtu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv,VD_ftype_VDSI,2)
-// tag : V6_vdmpybus_dv
-def int_hexagon_V6_vdmpybus_dv :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv">;
+def int_hexagon_C2_cmpgtp :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_128B,VD_ftype_VDSI,2)
-// tag : V6_vdmpybus_dv_128B
-def int_hexagon_V6_vdmpybus_dv_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">;
+def int_hexagon_A4_cmphgtui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpybus_dv_acc
-def int_hexagon_V6_vdmpybus_dv_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">;
+def int_hexagon_C2_cmpgti :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpybus_dv_acc_128B
-def int_hexagon_V6_vdmpybus_dv_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">;
+def int_hexagon_M2_mpyi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb,VI_ftype_VISI,2)
-// tag : V6_vdmpyhb
-def int_hexagon_V6_vdmpyhb :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb">;
+def int_hexagon_F2_conv_df2uw_chop :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2uw_chop">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpyhb_128B
-def int_hexagon_V6_vdmpyhb_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">;
+def int_hexagon_A4_cmpheq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhb_acc
-def int_hexagon_V6_vdmpyhb_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">;
+def int_hexagon_M2_mpy_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhb_acc_128B
-def int_hexagon_V6_vdmpyhb_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">;
+def int_hexagon_M2_mpy_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv,VD_ftype_VDSI,2)
-// tag : V6_vdmpyhb_dv
-def int_hexagon_V6_vdmpyhb_dv :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">;
+def int_hexagon_S2_lsr_i_r_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_128B,VD_ftype_VDSI,2)
-// tag : V6_vdmpyhb_dv_128B
-def int_hexagon_V6_vdmpyhb_dv_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">;
+def int_hexagon_S2_vrcnegh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vrcnegh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpyhb_dv_acc
-def int_hexagon_V6_vdmpyhb_dv_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
+def int_hexagon_S2_extractup :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vdmpyhb_dv_acc_128B
-def int_hexagon_V6_vdmpyhb_dv_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">;
+def int_hexagon_S2_asr_i_p_rnd_goodsyntax :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat,VI_ftype_VIVI,2)
-// tag : V6_vdmpyhvsat
-def int_hexagon_V6_vdmpyhvsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat">;
+def int_hexagon_S4_ntstbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vdmpyhvsat_128B
-def int_hexagon_V6_vdmpyhvsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">;
+def int_hexagon_F2_conv_w2sf :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_conv_w2sf">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vdmpyhvsat_acc
-def int_hexagon_V6_vdmpyhvsat_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">;
+def int_hexagon_C2_not :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_not">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vdmpyhvsat_acc_128B
-def int_hexagon_V6_vdmpyhvsat_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">;
+def int_hexagon_C2_tfrpr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_tfrpr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsat
-def int_hexagon_V6_vdmpyhsat :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat">;
+def int_hexagon_M2_mpy_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsat_128B
-def int_hexagon_V6_vdmpyhsat_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">;
+def int_hexagon_M2_mpy_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsat_acc
-def int_hexagon_V6_vdmpyhsat_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">;
+def int_hexagon_A4_cmpbgt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgt">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsat_acc_128B
-def int_hexagon_V6_vdmpyhsat_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">;
+def int_hexagon_S2_asr_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhisat
-def int_hexagon_V6_vdmpyhisat :
-Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat">;
+def int_hexagon_A4_rcmpneqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_128B,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhisat_128B
-def int_hexagon_V6_vdmpyhisat_128B :
-Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">;
+def int_hexagon_S2_asl_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhisat_acc
-def int_hexagon_V6_vdmpyhisat_acc :
-Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">;
+def int_hexagon_M2_subacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_subacc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc_128B,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhisat_acc_128B
-def int_hexagon_V6_vdmpyhisat_acc_128B :
-Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">;
+def int_hexagon_A2_orp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_orp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsusat
-def int_hexagon_V6_vdmpyhsusat :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat">;
+def int_hexagon_M2_mpyu_up :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_up">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_128B,VI_ftype_VISI,2)
-// tag : V6_vdmpyhsusat_128B
-def int_hexagon_V6_vdmpyhsusat_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">;
+def int_hexagon_M2_mpy_acc_sat_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsusat_acc
-def int_hexagon_V6_vdmpyhsusat_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">;
+def int_hexagon_S2_asr_i_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vdmpyhsusat_acc_128B
-def int_hexagon_V6_vdmpyhsusat_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">;
+def int_hexagon_S2_asr_i_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhsuisat
-def int_hexagon_V6_vdmpyhsuisat :
-Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">;
+def int_hexagon_A4_cmpbgtu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_128B,VI_ftype_VDSI,2)
-// tag : V6_vdmpyhsuisat_128B
-def int_hexagon_V6_vdmpyhsuisat_128B :
-Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
+def int_hexagon_A4_vcmpbeq_any :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbeq_any">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhsuisat_acc
-def int_hexagon_V6_vdmpyhsuisat_acc :
-Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">;
+def int_hexagon_A4_cmpbgti :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc_128B,VI_ftype_VIVDSI,3)
-// tag : V6_vdmpyhsuisat_acc_128B
-def int_hexagon_V6_vdmpyhsuisat_acc_128B :
-Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">;
+def int_hexagon_M2_mpyd_lh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb,VD_ftype_VDSI,2)
-// tag : V6_vtmpyb
-def int_hexagon_V6_vtmpyb :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb">;
+def int_hexagon_S2_asl_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb_128B,VD_ftype_VDSI,2)
-// tag : V6_vtmpyb_128B
-def int_hexagon_V6_vtmpyb_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_128B">;
+def int_hexagon_S2_lsr_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyb_acc
-def int_hexagon_V6_vtmpyb_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb_acc">;
+def int_hexagon_A2_addsp :
+Hexagon_i64_i32i64_Intrinsic<"HEXAGON_A2_addsp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyb_acc_128B
-def int_hexagon_V6_vtmpyb_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">;
+def int_hexagon_S4_vxsubaddw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus,VD_ftype_VDSI,2)
-// tag : V6_vtmpybus
-def int_hexagon_V6_vtmpybus :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus">;
+def int_hexagon_A4_vcmpheqi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus_128B,VD_ftype_VDSI,2)
-// tag : V6_vtmpybus_128B
-def int_hexagon_V6_vtmpybus_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_128B">;
+def int_hexagon_S4_vxsubaddh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpybus_acc
-def int_hexagon_V6_vtmpybus_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus_acc">;
+def int_hexagon_M4_pmpyw :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M4_pmpyw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpybus_acc_128B
-def int_hexagon_V6_vtmpybus_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">;
+def int_hexagon_S2_vsathb :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsathb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb,VD_ftype_VDSI,2)
-// tag : V6_vtmpyhb
-def int_hexagon_V6_vtmpyhb :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb">;
+def int_hexagon_S2_asr_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_128B,VD_ftype_VDSI,2)
-// tag : V6_vtmpyhb_128B
-def int_hexagon_V6_vtmpyhb_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">;
+def int_hexagon_M2_mpyu_acc_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyhb_acc
-def int_hexagon_V6_vtmpyhb_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">;
+def int_hexagon_M2_mpyu_acc_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vtmpyhb_acc_128B
-def int_hexagon_V6_vtmpyhb_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">;
+def int_hexagon_S2_lsl_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub,VI_ftype_VISI,2)
-// tag : V6_vrmpyub
-def int_hexagon_V6_vrmpyub :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub">;
+def int_hexagon_A2_pxorf :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_A2_pxorf">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_128B,VI_ftype_VISI,2)
-// tag : V6_vrmpyub_128B
-def int_hexagon_V6_vrmpyub_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_128B">;
+def int_hexagon_C2_cmpgei :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc,VI_ftype_VIVISI,3)
-// tag : V6_vrmpyub_acc
-def int_hexagon_V6_vrmpyub_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub_acc">;
+def int_hexagon_A2_vsubub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vrmpyub_acc_128B
-def int_hexagon_V6_vrmpyub_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">;
+def int_hexagon_S2_asl_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv,VI_ftype_VIVI,2)
-// tag : V6_vrmpyubv
-def int_hexagon_V6_vrmpyubv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv">;
+def int_hexagon_S2_asl_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_128B,VI_ftype_VIVI,2)
-// tag : V6_vrmpyubv_128B
-def int_hexagon_V6_vrmpyubv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">;
+def int_hexagon_A4_vrminuw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminuw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpyubv_acc
-def int_hexagon_V6_vrmpyubv_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">;
+def int_hexagon_F2_sffma :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffma">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpyubv_acc_128B
-def int_hexagon_V6_vrmpyubv_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">;
+def int_hexagon_A2_absp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_absp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv,VI_ftype_VIVI,2)
-// tag : V6_vrmpybv
-def int_hexagon_V6_vrmpybv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv">;
+def int_hexagon_C2_all8 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_all8">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv_128B,VI_ftype_VIVI,2)
-// tag : V6_vrmpybv_128B
-def int_hexagon_V6_vrmpybv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_128B">;
+def int_hexagon_A4_vrminuh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybv_acc
-def int_hexagon_V6_vrmpybv_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv_acc">;
+def int_hexagon_F2_sffma_lib :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffma_lib">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybv_acc_128B
-def int_hexagon_V6_vrmpybv_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">;
+def int_hexagon_M4_vrmpyoh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi,VD_ftype_VDSISI,3)
-// tag : V6_vrmpyubi
-def int_hexagon_V6_vrmpyubi :
-Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+def int_hexagon_M4_vrmpyoh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_128B,VD_ftype_VDSISI,3)
-// tag : V6_vrmpyubi_128B
-def int_hexagon_V6_vrmpyubi_128B :
-Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+def int_hexagon_C2_bitsset :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsset">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpyubi_acc
-def int_hexagon_V6_vrmpyubi_acc :
-Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+def int_hexagon_M2_mpysip :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysip">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc_128B,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpyubi_acc_128B
-def int_hexagon_V6_vrmpyubi_acc_128B :
-Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+def int_hexagon_M2_mpysin :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysin">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus,VI_ftype_VISI,2)
-// tag : V6_vrmpybus
-def int_hexagon_V6_vrmpybus :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus">;
+def int_hexagon_A4_boundscheck :
+Hexagon_i32_i32i64_Intrinsic<"HEXAGON_A4_boundscheck">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus_128B,VI_ftype_VISI,2)
-// tag : V6_vrmpybus_128B
-def int_hexagon_V6_vrmpybus_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_128B">;
+def int_hexagon_M5_vrmpybuu :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vrmpybuu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc,VI_ftype_VIVISI,3)
-// tag : V6_vrmpybus_acc
-def int_hexagon_V6_vrmpybus_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus_acc">;
+def int_hexagon_C4_fastcorner9 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_fastcorner9">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vrmpybus_acc_128B
-def int_hexagon_V6_vrmpybus_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">;
+def int_hexagon_M2_vrcmpys_s1rp :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_s1rp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi,VD_ftype_VDSISI,3)
-// tag : V6_vrmpybusi
-def int_hexagon_V6_vrmpybusi :
-Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi">;
+def int_hexagon_A2_neg :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_neg">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_128B,VD_ftype_VDSISI,3)
-// tag : V6_vrmpybusi_128B
-def int_hexagon_V6_vrmpybusi_128B :
-Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+def int_hexagon_A2_subsat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpybusi_acc
-def int_hexagon_V6_vrmpybusi_acc :
-Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+def int_hexagon_S2_asl_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_r_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc_128B,VD_ftype_VDVDSISI,4)
-// tag : V6_vrmpybusi_acc_128B
-def int_hexagon_V6_vrmpybusi_acc_128B :
-Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+def int_hexagon_S2_asl_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv,VI_ftype_VIVI,2)
-// tag : V6_vrmpybusv
-def int_hexagon_V6_vrmpybusv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv">;
+def int_hexagon_A2_vnavgh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_128B,VI_ftype_VIVI,2)
-// tag : V6_vrmpybusv_128B
-def int_hexagon_V6_vrmpybusv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">;
+def int_hexagon_M2_mpy_nac_sat_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybusv_acc
-def int_hexagon_V6_vrmpybusv_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">;
+def int_hexagon_F2_conv_ud2df :
+Hexagon_double_i64_Intrinsic<"HEXAGON_F2_conv_ud2df">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vrmpybusv_acc_128B
-def int_hexagon_V6_vrmpybusv_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">;
+def int_hexagon_A2_vnavgw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh,VD_ftype_VDSI,2)
-// tag : V6_vdsaduh
-def int_hexagon_V6_vdsaduh :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh">;
+def int_hexagon_S2_asl_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh_128B,VD_ftype_VDSI,2)
-// tag : V6_vdsaduh_128B
-def int_hexagon_V6_vdsaduh_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_128B">;
+def int_hexagon_S4_subi_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vdsaduh_acc
-def int_hexagon_V6_vdsaduh_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh_acc">;
+def int_hexagon_S2_vzxthw :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxthw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vdsaduh_acc_128B
-def int_hexagon_V6_vdsaduh_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">;
+def int_hexagon_F2_sfadd :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfadd">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi,VD_ftype_VDSISI,3)
-// tag : V6_vrsadubi
-def int_hexagon_V6_vrsadubi :
-Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi">;
+def int_hexagon_A2_sub :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_sub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi_128B,VD_ftype_VDSISI,3)
-// tag : V6_vrsadubi_128B
-def int_hexagon_V6_vrsadubi_128B :
-Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+def int_hexagon_M2_vmac2su_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2su_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc,VD_ftype_VDVDSISI,4)
-// tag : V6_vrsadubi_acc
-def int_hexagon_V6_vrsadubi_acc :
-Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+def int_hexagon_M2_vmac2su_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2su_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc_128B,VD_ftype_VDVDSISI,4)
-// tag : V6_vrsadubi_acc_128B
-def int_hexagon_V6_vrsadubi_acc_128B :
-Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+def int_hexagon_M2_dpmpyss_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw,VI_ftype_VISI,2)
-// tag : V6_vasrw
-def int_hexagon_V6_vasrw :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrw">;
+def int_hexagon_S2_insert :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_insert">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw_128B,VI_ftype_VISI,2)
-// tag : V6_vasrw_128B
-def int_hexagon_V6_vasrw_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_128B">;
+def int_hexagon_S2_packhl :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_S2_packhl">;
 
+def int_hexagon_A4_vcmpwgti :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw,VI_ftype_VISI,2)
-// tag : V6_vaslw
-def int_hexagon_V6_vaslw :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslw">;
+def int_hexagon_A2_vavguwr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguwr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw_128B,VI_ftype_VISI,2)
-// tag : V6_vaslw_128B
-def int_hexagon_V6_vaslw_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_128B">;
+def int_hexagon_S2_asl_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrw,VI_ftype_VISI,2)
-// tag : V6_vlsrw
-def int_hexagon_V6_vlsrw :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrw">;
+def int_hexagon_A2_svsubhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubhs">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrw_128B,VI_ftype_VISI,2)
-// tag : V6_vlsrw_128B
-def int_hexagon_V6_vlsrw_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrw_128B">;
+def int_hexagon_A2_addh_l16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwv,VI_ftype_VIVI,2)
-// tag : V6_vasrwv
-def int_hexagon_V6_vasrwv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrwv">;
+def int_hexagon_M4_and_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwv_128B,VI_ftype_VIVI,2)
-// tag : V6_vasrwv_128B
-def int_hexagon_V6_vasrwv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrwv_128B">;
+def int_hexagon_F2_conv_d2df :
+Hexagon_double_i64_Intrinsic<"HEXAGON_F2_conv_d2df">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslwv,VI_ftype_VIVI,2)
-// tag : V6_vaslwv
-def int_hexagon_V6_vaslwv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslwv">;
+def int_hexagon_C2_cmpgtui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslwv_128B,VI_ftype_VIVI,2)
-// tag : V6_vaslwv_128B
-def int_hexagon_V6_vaslwv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslwv_128B">;
+def int_hexagon_A2_vconj :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vconj">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrwv,VI_ftype_VIVI,2)
-// tag : V6_vlsrwv
-def int_hexagon_V6_vlsrwv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrwv">;
+def int_hexagon_S2_lsr_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_r_vw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrwv_128B,VI_ftype_VIVI,2)
-// tag : V6_vlsrwv_128B
-def int_hexagon_V6_vlsrwv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrwv_128B">;
+def int_hexagon_S2_lsr_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_r_vh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh,VI_ftype_VISI,2)
-// tag : V6_vasrh
-def int_hexagon_V6_vasrh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrh">;
+def int_hexagon_A2_subh_l16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh_128B,VI_ftype_VISI,2)
-// tag : V6_vasrh_128B
-def int_hexagon_V6_vasrh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_128B">;
+def int_hexagon_S4_vxsubaddhr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddhr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh,VI_ftype_VISI,2)
-// tag : V6_vaslh
-def int_hexagon_V6_vaslh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslh">;
+def int_hexagon_S2_clbp :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_clbp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh_128B,VI_ftype_VISI,2)
-// tag : V6_vaslh_128B
-def int_hexagon_V6_vaslh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_128B">;
+def int_hexagon_S2_deinterleave :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_deinterleave">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrh,VI_ftype_VISI,2)
-// tag : V6_vlsrh
-def int_hexagon_V6_vlsrh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrh">;
+def int_hexagon_C2_any8 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_any8">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrh_128B,VI_ftype_VISI,2)
-// tag : V6_vlsrh_128B
-def int_hexagon_V6_vlsrh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrh_128B">;
+def int_hexagon_S2_togglebit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhv,VI_ftype_VIVI,2)
-// tag : V6_vasrhv
-def int_hexagon_V6_vasrhv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrhv">;
+def int_hexagon_S2_togglebit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhv_128B,VI_ftype_VIVI,2)
-// tag : V6_vasrhv_128B
-def int_hexagon_V6_vasrhv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrhv_128B">;
+def int_hexagon_F2_conv_uw2sf :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_conv_uw2sf">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslhv,VI_ftype_VIVI,2)
-// tag : V6_vaslhv
-def int_hexagon_V6_vaslhv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslhv">;
+def int_hexagon_S2_vsathb_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsathb_nopack">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslhv_128B,VI_ftype_VIVI,2)
-// tag : V6_vaslhv_128B
-def int_hexagon_V6_vaslhv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslhv_128B">;
+def int_hexagon_M2_cmacs_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacs_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrhv,VI_ftype_VIVI,2)
-// tag : V6_vlsrhv
-def int_hexagon_V6_vlsrhv :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrhv">;
+def int_hexagon_M2_cmacs_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacs_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrhv_128B,VI_ftype_VIVI,2)
-// tag : V6_vlsrhv_128B
-def int_hexagon_V6_vlsrhv_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrhv_128B">;
+def int_hexagon_M2_mpy_sat_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwh,VI_ftype_VIVISI,3)
-// tag : V6_vasrwh
-def int_hexagon_V6_vasrwh :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwh">;
+def int_hexagon_M2_mpy_sat_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwh_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwh_128B
-def int_hexagon_V6_vasrwh_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwh_128B">;
+def int_hexagon_M2_mmacuhs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhsat
-def int_hexagon_V6_vasrwhsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhsat">;
+def int_hexagon_M2_mmacuhs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhsat_128B
-def int_hexagon_V6_vasrwhsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">;
+def int_hexagon_S2_clrbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhrndsat
-def int_hexagon_V6_vasrwhrndsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhrndsat">;
+def int_hexagon_C4_or_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_andn">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwhrndsat_128B
-def int_hexagon_V6_vasrwhrndsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">;
+def int_hexagon_S2_asl_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhsat
-def int_hexagon_V6_vasrwuhsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhsat">;
+def int_hexagon_S2_asl_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhsat_128B
-def int_hexagon_V6_vasrwuhsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">;
+def int_hexagon_A4_vcmpwgtui :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwh,VI_ftype_VIVI,2)
-// tag : V6_vroundwh
-def int_hexagon_V6_vroundwh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwh">;
+def int_hexagon_M4_vrmpyoh_acc_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwh_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundwh_128B
-def int_hexagon_V6_vroundwh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwh_128B">;
+def int_hexagon_M4_vrmpyoh_acc_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwuh,VI_ftype_VIVI,2)
-// tag : V6_vroundwuh
-def int_hexagon_V6_vroundwuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwuh">;
+def int_hexagon_A4_vrmaxh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundwuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundwuh_128B
-def int_hexagon_V6_vroundwuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwuh_128B">;
+def int_hexagon_A2_vcmpbeq :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpbeq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubsat
-def int_hexagon_V6_vasrhubsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubsat">;
+def int_hexagon_A2_vcmphgt :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmphgt">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubsat_128B
-def int_hexagon_V6_vasrhubsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">;
+def int_hexagon_A2_vnavgwcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgwcr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubrndsat
-def int_hexagon_V6_vasrhubrndsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubrndsat">;
+def int_hexagon_M2_vrcmacr_s0c :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmacr_s0c">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhubrndsat_128B
-def int_hexagon_V6_vasrhubrndsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">;
+def int_hexagon_A2_vavgwcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgwcr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbrndsat
-def int_hexagon_V6_vasrhbrndsat :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbrndsat">;
+def int_hexagon_S2_asl_i_p_xacc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbrndsat_128B
-def int_hexagon_V6_vasrhbrndsat_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">;
+def int_hexagon_A4_vrmaxw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhb,VI_ftype_VIVI,2)
-// tag : V6_vroundhb
-def int_hexagon_V6_vroundhb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhb">;
+def int_hexagon_A2_vnavghr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhb_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundhb_128B
-def int_hexagon_V6_vroundhb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhb_128B">;
+def int_hexagon_M4_cmpyi_wh :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyi_wh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhub,VI_ftype_VIVI,2)
-// tag : V6_vroundhub
-def int_hexagon_V6_vroundhub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhub">;
+def int_hexagon_A2_tfrsi :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vroundhub_128B,VI_ftype_VIVI,2)
-// tag : V6_vroundhub_128B
-def int_hexagon_V6_vroundhub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhub_128B">;
+def int_hexagon_S2_asr_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw_acc,VI_ftype_VIVISI,3)
-// tag : V6_vaslw_acc
-def int_hexagon_V6_vaslw_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslw_acc">;
+def int_hexagon_A2_svnavgh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svnavgh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslw_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vaslw_acc_128B
-def int_hexagon_V6_vaslw_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">;
+def int_hexagon_S2_lsr_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw_acc,VI_ftype_VIVISI,3)
-// tag : V6_vasrw_acc
-def int_hexagon_V6_vasrw_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrw_acc">;
+def int_hexagon_M2_vmac2 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrw_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrw_acc_128B
-def int_hexagon_V6_vasrw_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">;
+def int_hexagon_A4_vcmphgtui :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb,VI_ftype_VIVI,2)
-// tag : V6_vaddb
-def int_hexagon_V6_vaddb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddb">;
+def int_hexagon_A2_svavgh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svavgh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddb_128B
-def int_hexagon_V6_vaddb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_128B">;
+def int_hexagon_M4_vrmpyeh_acc_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb,VI_ftype_VIVI,2)
-// tag : V6_vsubb
-def int_hexagon_V6_vsubb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubb">;
+def int_hexagon_M4_vrmpyeh_acc_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubb_128B
-def int_hexagon_V6_vsubb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_128B">;
+def int_hexagon_S2_lsr_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddb_dv
-def int_hexagon_V6_vaddb_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_dv">;
+def int_hexagon_A2_combine_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddb_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddb_dv_128B
-def int_hexagon_V6_vaddb_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">;
+def int_hexagon_M2_mpy_up :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubb_dv
-def int_hexagon_V6_vsubb_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_dv">;
+def int_hexagon_A2_combine_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubb_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubb_dv_128B
-def int_hexagon_V6_vsubb_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">;
+def int_hexagon_A2_negsat :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_negsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh,VI_ftype_VIVI,2)
-// tag : V6_vaddh
-def int_hexagon_V6_vaddh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddh">;
+def int_hexagon_M2_mpyd_hl_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddh_128B
-def int_hexagon_V6_vaddh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_128B">;
+def int_hexagon_M2_mpyd_hl_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh,VI_ftype_VIVI,2)
-// tag : V6_vsubh
-def int_hexagon_V6_vsubh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubh">;
+def int_hexagon_A4_bitsplit :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitsplit">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubh_128B
-def int_hexagon_V6_vsubh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_128B">;
+def int_hexagon_A2_vabshsat :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabshsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddh_dv
-def int_hexagon_V6_vaddh_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_dv">;
+def int_hexagon_M2_mpyui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyui">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddh_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddh_dv_128B
-def int_hexagon_V6_vaddh_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
+def int_hexagon_A2_addh_l16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubh_dv
-def int_hexagon_V6_vsubh_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_dv">;
+def int_hexagon_S2_lsl_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubh_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubh_dv_128B
-def int_hexagon_V6_vsubh_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">;
+def int_hexagon_M2_mmpyul_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw,VI_ftype_VIVI,2)
-// tag : V6_vaddw
-def int_hexagon_V6_vaddw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddw">;
+def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddw_128B
-def int_hexagon_V6_vaddw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_128B">;
+def int_hexagon_S2_lsr_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw,VI_ftype_VIVI,2)
-// tag : V6_vsubw
-def int_hexagon_V6_vsubw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubw">;
+def int_hexagon_C2_cmplt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmplt">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubw_128B
-def int_hexagon_V6_vsubw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_128B">;
+def int_hexagon_M2_cmacr_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacr_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddw_dv
-def int_hexagon_V6_vaddw_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_dv">;
+def int_hexagon_M4_or_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddw_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddw_dv_128B
-def int_hexagon_V6_vaddw_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">;
+def int_hexagon_M4_mpyrr_addi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubw_dv
-def int_hexagon_V6_vsubw_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_dv">;
+def int_hexagon_S4_or_andi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubw_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubw_dv_128B
-def int_hexagon_V6_vsubw_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">;
+def int_hexagon_M2_mpy_sat_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat,VI_ftype_VIVI,2)
-// tag : V6_vaddubsat
-def int_hexagon_V6_vaddubsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddubsat">;
+def int_hexagon_M2_mpy_sat_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddubsat_128B
-def int_hexagon_V6_vaddubsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_128B">;
+def int_hexagon_M4_mpyrr_addr :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddubsat_dv
-def int_hexagon_V6_vaddubsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_dv">;
+def int_hexagon_M2_mmachs_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_rs0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddubsat_dv_128B
-def int_hexagon_V6_vaddubsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">;
+def int_hexagon_M2_mmachs_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat,VI_ftype_VIVI,2)
-// tag : V6_vsububsat
-def int_hexagon_V6_vsububsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsububsat">;
+def int_hexagon_M2_vrcmpyr_s0c :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyr_s0c">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsububsat_128B
-def int_hexagon_V6_vsububsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_128B">;
+def int_hexagon_M2_mpy_acc_sat_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsububsat_dv
-def int_hexagon_V6_vsububsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_dv">;
+def int_hexagon_M2_mpyd_acc_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsububsat_dv_128B
-def int_hexagon_V6_vsububsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">;
+def int_hexagon_F2_sffixupn :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupn">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat,VI_ftype_VIVI,2)
-// tag : V6_vadduhsat
-def int_hexagon_V6_vadduhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vadduhsat">;
+def int_hexagon_M2_mpyd_acc_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vadduhsat_128B
-def int_hexagon_V6_vadduhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_128B">;
+def int_hexagon_M2_mpyd_acc_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vadduhsat_dv
-def int_hexagon_V6_vadduhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_dv">;
+def int_hexagon_M2_mpy_rnd_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vadduhsat_dv_128B
-def int_hexagon_V6_vadduhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">;
+def int_hexagon_M2_mpy_rnd_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat,VI_ftype_VIVI,2)
-// tag : V6_vsubuhsat
-def int_hexagon_V6_vsubuhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuhsat">;
+def int_hexagon_A2_vadduhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vadduhs">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubuhsat_128B
-def int_hexagon_V6_vsubuhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">;
+def int_hexagon_A2_vsubuhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubuhs">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubuhsat_dv
-def int_hexagon_V6_vsubuhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">;
+def int_hexagon_A2_subh_h16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubuhsat_dv_128B
-def int_hexagon_V6_vsubuhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">;
+def int_hexagon_A2_subh_h16_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_hh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat,VI_ftype_VIVI,2)
-// tag : V6_vaddhsat
-def int_hexagon_V6_vaddhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddhsat">;
+def int_hexagon_A2_xorp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_xorp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddhsat_128B
-def int_hexagon_V6_vaddhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_128B">;
+def int_hexagon_A4_tfrpcp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A4_tfrpcp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddhsat_dv
-def int_hexagon_V6_vaddhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_dv">;
+def int_hexagon_A2_addh_h16_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_lh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddhsat_dv_128B
-def int_hexagon_V6_vaddhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">;
+def int_hexagon_A2_addh_h16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat,VI_ftype_VIVI,2)
-// tag : V6_vsubhsat
-def int_hexagon_V6_vsubhsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubhsat">;
+def int_hexagon_A2_addh_h16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_ll">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubhsat_128B
-def int_hexagon_V6_vsubhsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
+def int_hexagon_A2_addh_h16_sat_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_hh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubhsat_dv
-def int_hexagon_V6_vsubhsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_dv">;
+def int_hexagon_A2_zxtb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxtb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubhsat_dv_128B
-def int_hexagon_V6_vsubhsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">;
+def int_hexagon_A2_zxth :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxth">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat,VI_ftype_VIVI,2)
-// tag : V6_vaddwsat
-def int_hexagon_V6_vaddwsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddwsat">;
+def int_hexagon_A2_vnavgwr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgwr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddwsat_128B
-def int_hexagon_V6_vaddwsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_128B">;
+def int_hexagon_M4_or_xor :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddwsat_dv
-def int_hexagon_V6_vaddwsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_dv">;
+def int_hexagon_M2_mpyud_acc_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddwsat_dv_128B
-def int_hexagon_V6_vaddwsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">;
+def int_hexagon_M2_mpyud_acc_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat,VI_ftype_VIVI,2)
-// tag : V6_vsubwsat
-def int_hexagon_V6_vsubwsat :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubwsat">;
+def int_hexagon_M5_vmacbsu :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M5_vmacbsu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubwsat_128B
-def int_hexagon_V6_vsubwsat_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_128B">;
+def int_hexagon_M2_dpmpyuu_acc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubwsat_dv
-def int_hexagon_V6_vsubwsat_dv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_dv">;
+def int_hexagon_M2_mpy_rnd_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubwsat_dv_128B
-def int_hexagon_V6_vsubwsat_dv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">;
+def int_hexagon_M2_mpy_rnd_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgub,VI_ftype_VIVI,2)
-// tag : V6_vavgub
-def int_hexagon_V6_vavgub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgub">;
+def int_hexagon_F2_sffms_lib :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms_lib">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgub_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgub_128B
-def int_hexagon_V6_vavgub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgub_128B">;
+def int_hexagon_C4_cmpneqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgubrnd,VI_ftype_VIVI,2)
-// tag : V6_vavgubrnd
-def int_hexagon_V6_vavgubrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgubrnd">;
+def int_hexagon_M4_and_xor :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgubrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgubrnd_128B
-def int_hexagon_V6_vavgubrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">;
+def int_hexagon_A2_sat :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_A2_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguh,VI_ftype_VIVI,2)
-// tag : V6_vavguh
-def int_hexagon_V6_vavguh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguh">;
+def int_hexagon_M2_mpyd_nac_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguh_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguh_128B
-def int_hexagon_V6_vavguh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguh_128B">;
+def int_hexagon_M2_mpyd_nac_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguhrnd,VI_ftype_VIVI,2)
-// tag : V6_vavguhrnd
-def int_hexagon_V6_vavguhrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguhrnd">;
+def int_hexagon_A2_addsat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguhrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguhrnd_128B
-def int_hexagon_V6_vavguhrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">;
+def int_hexagon_A2_svavghs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svavghs">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgh,VI_ftype_VIVI,2)
-// tag : V6_vavgh
-def int_hexagon_V6_vavgh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgh">;
+def int_hexagon_A2_vrsadub_acc :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_A2_vrsadub_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgh_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgh_128B
-def int_hexagon_V6_vavgh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgh_128B">;
+def int_hexagon_C2_bitsclri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavghrnd,VI_ftype_VIVI,2)
-// tag : V6_vavghrnd
-def int_hexagon_V6_vavghrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavghrnd">;
+def int_hexagon_A2_subh_h16_sat_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavghrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavghrnd_128B
-def int_hexagon_V6_vavghrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavghrnd_128B">;
+def int_hexagon_A2_subh_h16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgh,VI_ftype_VIVI,2)
-// tag : V6_vnavgh
-def int_hexagon_V6_vnavgh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgh">;
+def int_hexagon_M2_mmaculs_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_rs0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgh_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgh_128B
-def int_hexagon_V6_vnavgh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgh_128B">;
+def int_hexagon_M2_mmaculs_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgw,VI_ftype_VIVI,2)
-// tag : V6_vavgw
-def int_hexagon_V6_vavgw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgw">;
+def int_hexagon_M2_vradduh :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vradduh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgw_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgw_128B
-def int_hexagon_V6_vavgw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgw_128B">;
+def int_hexagon_A4_addp_c :
+Hexagon_i64i32_i64i64i32_Intrinsic<"HEXAGON_A4_addp_c">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgwrnd,VI_ftype_VIVI,2)
-// tag : V6_vavgwrnd
-def int_hexagon_V6_vavgwrnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgwrnd">;
+def int_hexagon_C2_xor :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgwrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgwrnd_128B
-def int_hexagon_V6_vavgwrnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">;
+def int_hexagon_S2_lsl_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgw,VI_ftype_VIVI,2)
-// tag : V6_vnavgw
-def int_hexagon_V6_vnavgw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgw">;
+def int_hexagon_M2_mmpyh_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgw_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgw_128B
-def int_hexagon_V6_vnavgw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgw_128B">;
+def int_hexagon_M2_mmpyh_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_rs0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffub,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffub
-def int_hexagon_V6_vabsdiffub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffub">;
+def int_hexagon_F2_conv_df2ud_chop :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2ud_chop">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffub_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffub_128B
-def int_hexagon_V6_vabsdiffub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">;
+def int_hexagon_C4_or_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffuh
-def int_hexagon_V6_vabsdiffuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffuh">;
+def int_hexagon_S4_vxaddsubhr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxaddsubhr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffuh_128B
-def int_hexagon_V6_vabsdiffuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">;
+def int_hexagon_S2_vsathub :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsathub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffh,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffh
-def int_hexagon_V6_vabsdiffh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffh">;
+def int_hexagon_F2_conv_df2sf :
+Hexagon_float_double_Intrinsic<"HEXAGON_F2_conv_df2sf">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffh_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffh_128B
-def int_hexagon_V6_vabsdiffh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">;
+def int_hexagon_M2_hmmpyh_rs1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffw,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffw
-def int_hexagon_V6_vabsdiffw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffw">;
+def int_hexagon_M2_hmmpyh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsdiffw_128B,VI_ftype_VIVI,2)
-// tag : V6_vabsdiffw_128B
-def int_hexagon_V6_vabsdiffw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">;
+def int_hexagon_A2_vavgwr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgwr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgub,VI_ftype_VIVI,2)
-// tag : V6_vnavgub
-def int_hexagon_V6_vnavgub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgub">;
+def int_hexagon_S2_tableidxh_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgub_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgub_128B
-def int_hexagon_V6_vnavgub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgub_128B">;
+def int_hexagon_A2_sxth :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxth">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh,VD_ftype_VIVI,2)
-// tag : V6_vaddubh
-def int_hexagon_V6_vaddubh :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh">;
+def int_hexagon_A2_sxtb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxtb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh_128B,VD_ftype_VIVI,2)
-// tag : V6_vaddubh_128B
-def int_hexagon_V6_vaddubh_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_128B">;
+def int_hexagon_C4_or_orn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_or_orn">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububh,VD_ftype_VIVI,2)
-// tag : V6_vsububh
-def int_hexagon_V6_vsububh :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsububh">;
+def int_hexagon_M2_vrcmaci_s0c :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmaci_s0c">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsububh_128B,VD_ftype_VIVI,2)
-// tag : V6_vsububh_128B
-def int_hexagon_V6_vsububh_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsububh_128B">;
+def int_hexagon_A2_sxtw :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_sxtw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw,VD_ftype_VIVI,2)
-// tag : V6_vaddhw
-def int_hexagon_V6_vaddhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw">;
+def int_hexagon_M2_vabsdiffh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vabsdiffh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vaddhw_128B
-def int_hexagon_V6_vaddhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_128B">;
+def int_hexagon_M2_mpy_acc_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhw,VD_ftype_VIVI,2)
-// tag : V6_vsubhw
-def int_hexagon_V6_vsubhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubhw">;
+def int_hexagon_M2_mpy_acc_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vsubhw_128B
-def int_hexagon_V6_vsubhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubhw_128B">;
+def int_hexagon_M2_hmmpyl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw,VD_ftype_VIVI,2)
-// tag : V6_vadduhw
-def int_hexagon_V6_vadduhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw">;
+def int_hexagon_S2_cl1p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_cl1p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vadduhw_128B
-def int_hexagon_V6_vadduhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_128B">;
+def int_hexagon_M2_vabsdiffw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vabsdiffw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhw,VD_ftype_VIVI,2)
-// tag : V6_vsubuhw
-def int_hexagon_V6_vsubuhw :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubuhw">;
+def int_hexagon_A4_andnp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_andnp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuhw_128B,VD_ftype_VIVI,2)
-// tag : V6_vsubuhw_128B
-def int_hexagon_V6_vsubuhw_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
+def int_hexagon_C2_vmux :
+Hexagon_i64_i32i64i64_Intrinsic<"HEXAGON_C2_vmux">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vd0,VI_ftype_,0)
-// tag : V6_vd0
-def int_hexagon_V6_vd0 :
-Hexagon_v512_Intrinsic<"HEXAGON_V6_vd0">;
+def int_hexagon_S2_parityp :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_S2_parityp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vd0_128B,VI_ftype_,0)
-// tag : V6_vd0_128B
-def int_hexagon_V6_vd0_128B :
-Hexagon_v1024_Intrinsic<"HEXAGON_V6_vd0_128B">;
+def int_hexagon_S2_lsr_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbq
-def int_hexagon_V6_vaddbq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbq">;
+def int_hexagon_S2_asr_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbq_128B
-def int_hexagon_V6_vaddbq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
+def int_hexagon_M2_mpyu_nac_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">;
 
+def int_hexagon_M2_mpyu_nac_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbq
-def int_hexagon_V6_vsubbq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbq">;
+def int_hexagon_F2_sfcmpeq :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpeq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbq_128B
-def int_hexagon_V6_vsubbq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+def int_hexagon_A2_vaddb_map :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddb_map">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbnq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbnq
-def int_hexagon_V6_vaddbnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbnq">;
+def int_hexagon_S2_lsr_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddbnq_128B
-def int_hexagon_V6_vaddbnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
+def int_hexagon_A2_vcmpheq :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpheq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbnq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbnq
-def int_hexagon_V6_vsubbnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbnq">;
+def int_hexagon_S2_clbnorm :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_clbnorm">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubbnq_128B
-def int_hexagon_V6_vsubbnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+def int_hexagon_M2_cnacsc_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacsc_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhq
-def int_hexagon_V6_vaddhq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhq">;
+def int_hexagon_M2_cnacsc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacsc_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhq_128B
-def int_hexagon_V6_vaddhq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+def int_hexagon_S4_subaddi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhq
-def int_hexagon_V6_vsubhq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhq">;
+def int_hexagon_M2_mpyud_nac_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhq_128B
-def int_hexagon_V6_vsubhq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+def int_hexagon_M2_mpyud_nac_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhnq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhnq
-def int_hexagon_V6_vaddhnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhnq">;
+def int_hexagon_S5_vasrhrnd_goodsyntax :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddhnq_128B
-def int_hexagon_V6_vaddhnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+def int_hexagon_S2_tstbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_r">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhnq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhnq
-def int_hexagon_V6_vsubhnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhnq">;
+def int_hexagon_S4_vrcrotate :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubhnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubhnq_128B
-def int_hexagon_V6_vsubhnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+def int_hexagon_M2_mmachs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwq
-def int_hexagon_V6_vaddwq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwq">;
+def int_hexagon_M2_mmachs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwq_128B
-def int_hexagon_V6_vaddwq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
+def int_hexagon_S2_tstbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwq
-def int_hexagon_V6_vsubwq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwq">;
+def int_hexagon_M2_mpy_up_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwq_128B
-def int_hexagon_V6_vsubwq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
+def int_hexagon_S2_extractu_rp :
+Hexagon_i32_i32i64_Intrinsic<"HEXAGON_S2_extractu_rp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwnq,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwnq
-def int_hexagon_V6_vaddwnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwnq">;
+def int_hexagon_M2_mmpyuh_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddwnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vaddwnq_128B
-def int_hexagon_V6_vaddwnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
+def int_hexagon_S2_lsr_i_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwnq,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwnq
-def int_hexagon_V6_vsubwnq :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwnq">;
+def int_hexagon_M2_mpy_rnd_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubwnq_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vsubwnq_128B
-def int_hexagon_V6_vsubwnq_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+def int_hexagon_M2_mpy_rnd_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh,VI_ftype_VI,1)
-// tag : V6_vabsh
-def int_hexagon_V6_vabsh :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh">;
+def int_hexagon_M4_or_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh_128B,VI_ftype_VI,1)
-// tag : V6_vabsh_128B
-def int_hexagon_V6_vabsh_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_128B">;
+def int_hexagon_M2_mpyu_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh_sat,VI_ftype_VI,1)
-// tag : V6_vabsh_sat
-def int_hexagon_V6_vabsh_sat :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh_sat">;
+def int_hexagon_M2_mpyu_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsh_sat_128B,VI_ftype_VI,1)
-// tag : V6_vabsh_sat_128B
-def int_hexagon_V6_vabsh_sat_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">;
+def int_hexagon_S2_asl_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw,VI_ftype_VI,1)
-// tag : V6_vabsw
-def int_hexagon_V6_vabsw :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw">;
+def int_hexagon_M2_mpyu_nac_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw_128B,VI_ftype_VI,1)
-// tag : V6_vabsw_128B
-def int_hexagon_V6_vabsw_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_128B">;
+def int_hexagon_M2_mpyu_nac_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw_sat,VI_ftype_VI,1)
-// tag : V6_vabsw_sat
-def int_hexagon_V6_vabsw_sat :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw_sat">;
+def int_hexagon_M2_mpy_sat_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsw_sat_128B,VI_ftype_VI,1)
-// tag : V6_vabsw_sat_128B
-def int_hexagon_V6_vabsw_sat_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">;
+def int_hexagon_M2_mpy_sat_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv,VD_ftype_VIVI,2)
-// tag : V6_vmpybv
-def int_hexagon_V6_vmpybv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv">;
+def int_hexagon_F2_conv_w2df :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_conv_w2df">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpybv_128B
-def int_hexagon_V6_vmpybv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_128B">;
+def int_hexagon_A2_subh_l16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybv_acc
-def int_hexagon_V6_vmpybv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv_acc">;
+def int_hexagon_C2_cmpeqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybv_acc_128B
-def int_hexagon_V6_vmpybv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
+def int_hexagon_S2_asl_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv,VD_ftype_VIVI,2)
-// tag : V6_vmpyubv
-def int_hexagon_V6_vmpyubv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv">;
+def int_hexagon_S2_vcnegh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcnegh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyubv_128B
-def int_hexagon_V6_vmpyubv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_128B">;
+def int_hexagon_A4_vcmpweqi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyubv_acc
-def int_hexagon_V6_vmpyubv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv_acc">;
+def int_hexagon_M2_vdmpyrs_s0 :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyubv_acc_128B
-def int_hexagon_V6_vmpyubv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">;
+def int_hexagon_M2_vdmpyrs_s1 :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vdmpyrs_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv,VD_ftype_VIVI,2)
-// tag : V6_vmpybusv
-def int_hexagon_V6_vmpybusv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv">;
+def int_hexagon_M4_xor_xacc :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_xor_xacc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpybusv_128B
-def int_hexagon_V6_vmpybusv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_128B">;
+def int_hexagon_M2_vdmpys_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vdmpys_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybusv_acc
-def int_hexagon_V6_vmpybusv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv_acc">;
+def int_hexagon_M2_vdmpys_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vdmpys_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpybusv_acc_128B
-def int_hexagon_V6_vmpybusv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">;
+def int_hexagon_A2_vavgubr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgubr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabusv,VD_ftype_VDVD,2)
-// tag : V6_vmpabusv
-def int_hexagon_V6_vmpabusv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabusv">;
+def int_hexagon_M2_mpyu_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabusv_128B,VD_ftype_VDVD,2)
-// tag : V6_vmpabusv_128B
-def int_hexagon_V6_vmpabusv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabusv_128B">;
+def int_hexagon_M2_mpyu_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuuv,VD_ftype_VDVD,2)
-// tag : V6_vmpabuuv
-def int_hexagon_V6_vmpabuuv :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabuuv">;
+def int_hexagon_S2_asl_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuuv_128B,VD_ftype_VDVD,2)
-// tag : V6_vmpabuuv_128B
-def int_hexagon_V6_vmpabuuv_128B :
-Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">;
+def int_hexagon_S2_cl0p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_cl0p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv,VD_ftype_VIVI,2)
-// tag : V6_vmpyhv
-def int_hexagon_V6_vmpyhv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv">;
+def int_hexagon_S2_valignib :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyhv_128B
-def int_hexagon_V6_vmpyhv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_128B">;
+def int_hexagon_F2_sffixupd :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupd">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhv_acc
-def int_hexagon_V6_vmpyhv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv_acc">;
+def int_hexagon_M2_mpy_sat_rnd_hl_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhv_acc_128B
-def int_hexagon_V6_vmpyhv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">;
+def int_hexagon_M2_mpy_sat_rnd_hl_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv,VD_ftype_VIVI,2)
-// tag : V6_vmpyuhv
-def int_hexagon_V6_vmpyuhv :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv">;
+def int_hexagon_M2_cmacsc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacsc_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyuhv_128B
-def int_hexagon_V6_vmpyuhv_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">;
+def int_hexagon_M2_cmacsc_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmacsc_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyuhv_acc
-def int_hexagon_V6_vmpyuhv_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">;
+def int_hexagon_S2_ct1 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_ct1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyuhv_acc_128B
-def int_hexagon_V6_vmpyuhv_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">;
+def int_hexagon_S2_ct0 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_ct0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs,VI_ftype_VIVI,2)
-// tag : V6_vmpyhvsrs
-def int_hexagon_V6_vmpyhvsrs :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyhvsrs">;
+def int_hexagon_M2_dpmpyuu_nac_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyhvsrs_128B
-def int_hexagon_V6_vmpyhvsrs_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">;
+def int_hexagon_M2_mmpyul_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus,VD_ftype_VIVI,2)
-// tag : V6_vmpyhus
-def int_hexagon_V6_vmpyhus :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus">;
+def int_hexagon_S4_ntstbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyhus_128B
-def int_hexagon_V6_vmpyhus_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_128B">;
+def int_hexagon_F2_sffixupr :
+Hexagon_float_float_Intrinsic<"HEXAGON_F2_sffixupr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhus_acc
-def int_hexagon_V6_vmpyhus_acc :
-Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus_acc">;
+def int_hexagon_S2_asr_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyhus_acc_128B
-def int_hexagon_V6_vmpyhus_acc_128B :
-Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">;
+def int_hexagon_M2_mpyud_acc_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih,VI_ftype_VIVI,2)
-// tag : V6_vmpyih
-def int_hexagon_V6_vmpyih :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih">;
+def int_hexagon_M2_mpyud_acc_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyih_128B
-def int_hexagon_V6_vmpyih_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_128B">;
+def int_hexagon_A2_vcmphgtu :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmphgtu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyih_acc
-def int_hexagon_V6_vmpyih_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih_acc">;
+def int_hexagon_C2_andn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_andn">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyih_acc_128B
-def int_hexagon_V6_vmpyih_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">;
+def int_hexagon_M2_vmpy2s_s0pack :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh,VI_ftype_VIVI,2)
-// tag : V6_vmpyewuh
-def int_hexagon_V6_vmpyewuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh">;
+def int_hexagon_S4_addaddi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyewuh_128B
-def int_hexagon_V6_vmpyewuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">;
+def int_hexagon_M2_mpyd_acc_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh
-def int_hexagon_V6_vmpyowh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh">;
+def int_hexagon_M2_mpy_acc_sat_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh_128B
-def int_hexagon_V6_vmpyowh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_128B">;
+def int_hexagon_A4_rcmpeqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh_rnd
-def int_hexagon_V6_vmpyowh_rnd :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">;
+def int_hexagon_M4_xor_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyowh_rnd_128B
-def int_hexagon_V6_vmpyowh_rnd_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">;
+def int_hexagon_S2_asl_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_sacc
-def int_hexagon_V6_vmpyowh_sacc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">;
+def int_hexagon_M2_mmpyuh_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_sacc_128B
-def int_hexagon_V6_vmpyowh_sacc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">;
+def int_hexagon_S2_asr_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_rnd_sacc
-def int_hexagon_V6_vmpyowh_rnd_sacc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">;
+def int_hexagon_A4_round_ri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyowh_rnd_sacc_128B
-def int_hexagon_V6_vmpyowh_rnd_sacc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">;
+def int_hexagon_A2_max :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_max">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyieoh,VI_ftype_VIVI,2)
-// tag : V6_vmpyieoh
-def int_hexagon_V6_vmpyieoh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyieoh">;
+def int_hexagon_A4_round_rr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyieoh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyieoh_128B
-def int_hexagon_V6_vmpyieoh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">;
+def int_hexagon_A4_combineii :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineii">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh,VI_ftype_VIVI,2)
-// tag : V6_vmpyiewuh
-def int_hexagon_V6_vmpyiewuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh">;
+def int_hexagon_A4_combineir :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyiewuh_128B
-def int_hexagon_V6_vmpyiewuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">;
+def int_hexagon_C4_and_orn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_orn">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiowh,VI_ftype_VIVI,2)
-// tag : V6_vmpyiowh
-def int_hexagon_V6_vmpyiowh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiowh">;
+def int_hexagon_M5_vmacbuu :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M5_vmacbuu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiowh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmpyiowh_128B
-def int_hexagon_V6_vmpyiowh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">;
+def int_hexagon_A4_rcmpeq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewh_acc
-def int_hexagon_V6_vmpyiewh_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">;
+def int_hexagon_M4_cmpyr_whc :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_whc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewh_acc_128B
-def int_hexagon_V6_vmpyiewh_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">;
+def int_hexagon_S2_lsr_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewuh_acc
-def int_hexagon_V6_vmpyiewuh_acc :
-Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">;
+def int_hexagon_S2_vzxtbh :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxtbh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc_128B,VI_ftype_VIVIVI,3)
-// tag : V6_vmpyiewuh_acc_128B
-def int_hexagon_V6_vmpyiewuh_acc_128B :
-Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">;
+def int_hexagon_M2_mmacuhs_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_rs1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub,VD_ftype_VISI,2)
-// tag : V6_vmpyub
-def int_hexagon_V6_vmpyub :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub">;
+def int_hexagon_S2_asr_r_r_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub_128B,VD_ftype_VISI,2)
-// tag : V6_vmpyub_128B
-def int_hexagon_V6_vmpyub_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_128B">;
+def int_hexagon_A2_combinew :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combinew">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyub_acc
-def int_hexagon_V6_vmpyub_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub_acc">;
+def int_hexagon_M2_mpy_acc_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyub_acc_128B
-def int_hexagon_V6_vmpyub_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">;
+def int_hexagon_M2_mpy_acc_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus,VD_ftype_VISI,2)
-// tag : V6_vmpybus
-def int_hexagon_V6_vmpybus :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus">;
+def int_hexagon_M2_cmpyi_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpyi_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus_128B,VD_ftype_VISI,2)
-// tag : V6_vmpybus_128B
-def int_hexagon_V6_vmpybus_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_128B">;
+def int_hexagon_S2_asl_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpybus_acc
-def int_hexagon_V6_vmpybus_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus_acc">;
+def int_hexagon_S4_ori_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpybus_acc_128B
-def int_hexagon_V6_vmpybus_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">;
+def int_hexagon_C4_nbitsset :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsset">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus,VD_ftype_VDSI,2)
-// tag : V6_vmpabus
-def int_hexagon_V6_vmpabus :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus">;
+def int_hexagon_M2_mpyu_acc_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpabus_128B
-def int_hexagon_V6_vmpabus_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_128B">;
+def int_hexagon_M2_mpyu_acc_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabus_acc
-def int_hexagon_V6_vmpabus_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus_acc">;
+def int_hexagon_M2_mpyu_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabus_acc_128B
-def int_hexagon_V6_vmpabus_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">;
+def int_hexagon_M2_mpyu_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb,VD_ftype_VDSI,2)
-// tag : V6_vmpahb
-def int_hexagon_V6_vmpahb :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb">;
+def int_hexagon_A2_addh_l16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_ll">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpahb_128B
-def int_hexagon_V6_vmpahb_128B :
-Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_128B">;
+def int_hexagon_S2_lsr_r_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpahb_acc
-def int_hexagon_V6_vmpahb_acc :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb_acc">;
+def int_hexagon_A4_modwrapu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_modwrapu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpahb_acc_128B
-def int_hexagon_V6_vmpahb_acc_128B :
-Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">;
+def int_hexagon_A4_rcmpneq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh,VD_ftype_VISI,2)
-// tag : V6_vmpyh
-def int_hexagon_V6_vmpyh :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh">;
+def int_hexagon_M2_mpyd_acc_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh_128B,VD_ftype_VISI,2)
-// tag : V6_vmpyh_128B
-def int_hexagon_V6_vmpyh_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_128B">;
+def int_hexagon_M2_mpyd_acc_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyhsat_acc
-def int_hexagon_V6_vmpyhsat_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">;
+def int_hexagon_F2_sfimm_p :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyhsat_acc_128B
-def int_hexagon_V6_vmpyhsat_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">;
+def int_hexagon_F2_sfimm_n :
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhss,VI_ftype_VISI,2)
-// tag : V6_vmpyhss
-def int_hexagon_V6_vmpyhss :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhss">;
+def int_hexagon_M4_cmpyr_wh :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_wh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhss_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyhss_128B
-def int_hexagon_V6_vmpyhss_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhss_128B">;
+def int_hexagon_S2_lsl_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs,VI_ftype_VISI,2)
-// tag : V6_vmpyhsrs
-def int_hexagon_V6_vmpyhsrs :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhsrs">;
+def int_hexagon_A2_vavgub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyhsrs_128B
-def int_hexagon_V6_vmpyhsrs_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">;
+def int_hexagon_F2_conv_d2sf :
+Hexagon_float_i64_Intrinsic<"HEXAGON_F2_conv_d2sf">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh,VD_ftype_VISI,2)
-// tag : V6_vmpyuh
-def int_hexagon_V6_vmpyuh :
-Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh">;
+def int_hexagon_A2_vavguh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh_128B,VD_ftype_VISI,2)
-// tag : V6_vmpyuh_128B
-def int_hexagon_V6_vmpyuh_128B :
-Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_128B">;
+def int_hexagon_A4_cmpbeqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyuh_acc
-def int_hexagon_V6_vmpyuh_acc :
-Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh_acc">;
+def int_hexagon_F2_sfcmpuo :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpuo">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyuh_acc_128B
-def int_hexagon_V6_vmpyuh_acc_128B :
-Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">;
+def int_hexagon_A2_vavguw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb,VI_ftype_VISI,2)
-// tag : V6_vmpyihb
-def int_hexagon_V6_vmpyihb :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb">;
+def int_hexagon_S2_asr_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyihb_128B
-def int_hexagon_V6_vmpyihb_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_128B">;
+def int_hexagon_S2_vsatwh_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsatwh_nopack">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyihb_acc
-def int_hexagon_V6_vmpyihb_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb_acc">;
+def int_hexagon_M2_mpyd_hh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyihb_acc_128B
-def int_hexagon_V6_vmpyihb_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">;
+def int_hexagon_M2_mpyd_hh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_hh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb,VI_ftype_VISI,2)
-// tag : V6_vmpyiwb
-def int_hexagon_V6_vmpyiwb :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb">;
+def int_hexagon_S2_lsl_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyiwb_128B
-def int_hexagon_V6_vmpyiwb_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">;
+def int_hexagon_A2_minu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_minu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwb_acc
-def int_hexagon_V6_vmpyiwb_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">;
+def int_hexagon_M2_mpy_sat_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwb_acc_128B
-def int_hexagon_V6_vmpyiwb_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">;
+def int_hexagon_M4_or_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_andn">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh,VI_ftype_VISI,2)
-// tag : V6_vmpyiwh
-def int_hexagon_V6_vmpyiwh :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh">;
+def int_hexagon_A2_minp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyiwh_128B
-def int_hexagon_V6_vmpyiwh_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">;
+def int_hexagon_S4_or_andix :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwh_acc
-def int_hexagon_V6_vmpyiwh_acc :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">;
+def int_hexagon_M2_mpy_rnd_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwh_acc_128B
-def int_hexagon_V6_vmpyiwh_acc_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">;
+def int_hexagon_M2_mpy_rnd_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vand,VI_ftype_VIVI,2)
-// tag : V6_vand
-def int_hexagon_V6_vand :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vand">;
+def int_hexagon_M2_mmpyuh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vand_128B,VI_ftype_VIVI,2)
-// tag : V6_vand_128B
-def int_hexagon_V6_vand_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vand_128B">;
+def int_hexagon_M2_mmpyuh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vor,VI_ftype_VIVI,2)
-// tag : V6_vor
-def int_hexagon_V6_vor :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vor">;
+def int_hexagon_M2_mpy_acc_sat_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vor_128B,VI_ftype_VIVI,2)
-// tag : V6_vor_128B
-def int_hexagon_V6_vor_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vor_128B">;
+def int_hexagon_F2_sfcmpge :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpge">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vxor,VI_ftype_VIVI,2)
-// tag : V6_vxor
-def int_hexagon_V6_vxor :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vxor">;
+def int_hexagon_F2_sfmin :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmin">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vxor_128B,VI_ftype_VIVI,2)
-// tag : V6_vxor_128B
-def int_hexagon_V6_vxor_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vxor_128B">;
+def int_hexagon_F2_sfcmpgt :
+Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpgt">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnot,VI_ftype_VI,1)
-// tag : V6_vnot
-def int_hexagon_V6_vnot :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnot">;
+def int_hexagon_M4_vpmpyh :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M4_vpmpyh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnot_128B,VI_ftype_VI,1)
-// tag : V6_vnot_128B
-def int_hexagon_V6_vnot_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnot_128B">;
+def int_hexagon_M2_mmacuhs_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacuhs_rs0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt,VI_ftype_QVSI,2)
-// tag : V6_vandqrt
-def int_hexagon_V6_vandqrt :
-Hexagon_v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt">;
+def int_hexagon_M2_mpyd_rnd_lh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt_128B,VI_ftype_QVSI,2)
-// tag : V6_vandqrt_128B
-def int_hexagon_V6_vandqrt_128B :
-Hexagon_v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+def int_hexagon_M2_mpyd_rnd_lh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc,VI_ftype_VIQVSI,3)
-// tag : V6_vandqrt_acc
-def int_hexagon_V6_vandqrt_acc :
-Hexagon_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
+def int_hexagon_A2_roundsat :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_A2_roundsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc_128B,VI_ftype_VIQVSI,3)
-// tag : V6_vandqrt_acc_128B
-def int_hexagon_V6_vandqrt_acc_128B :
-Hexagon_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
+def int_hexagon_S2_ct1p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_ct1p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt,QV_ftype_VISI,2)
-// tag : V6_vandvrt
-def int_hexagon_V6_vandvrt :
-Hexagon_v64iv512i_Intrinsic<"HEXAGON_V6_vandvrt">;
+def int_hexagon_S4_extract_rp :
+Hexagon_i32_i32i64_Intrinsic<"HEXAGON_S4_extract_rp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt_128B,QV_ftype_VISI,2)
-// tag : V6_vandvrt_128B
-def int_hexagon_V6_vandvrt_128B :
-Hexagon_v128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
+def int_hexagon_S2_lsl_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc,QV_ftype_QVVISI,3)
-// tag : V6_vandvrt_acc
-def int_hexagon_V6_vandvrt_acc :
-Hexagon_v64iv64iv512i_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+def int_hexagon_C4_cmplteui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc_128B,QV_ftype_QVVISI,3)
-// tag : V6_vandvrt_acc_128B
-def int_hexagon_V6_vandvrt_acc_128B :
-Hexagon_v128iv128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
+def int_hexagon_S4_addi_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw,QV_ftype_VIVI,2)
-// tag : V6_vgtw
-def int_hexagon_V6_vgtw :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtw">;
+def int_hexagon_A4_tfrcpp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A4_tfrcpp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtw_128B
-def int_hexagon_V6_vgtw_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_128B">;
+def int_hexagon_S2_asr_i_svw_trun :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_and
-def int_hexagon_V6_vgtw_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_and">;
+def int_hexagon_A4_cmphgti :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_and_128B
-def int_hexagon_V6_vgtw_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+def int_hexagon_A4_vrminh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_or
-def int_hexagon_V6_vgtw_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_or">;
+def int_hexagon_A4_vrminw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_or_128B
-def int_hexagon_V6_vgtw_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
+def int_hexagon_A4_cmphgtu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_xor
-def int_hexagon_V6_vgtw_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+def int_hexagon_S2_insertp_rp :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_S2_insertp_rp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtw_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtw_xor_128B
-def int_hexagon_V6_vgtw_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+def int_hexagon_A2_vnavghcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghcr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw,QV_ftype_VIVI,2)
-// tag : V6_veqw
-def int_hexagon_V6_veqw :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqw">;
+def int_hexagon_S4_subi_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_128B,QV_ftype_VIVI,2)
-// tag : V6_veqw_128B
-def int_hexagon_V6_veqw_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_128B">;
+def int_hexagon_S2_lsl_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_and,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_and
-def int_hexagon_V6_veqw_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_and">;
+def int_hexagon_M2_mpy_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hh_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_and_128B
-def int_hexagon_V6_veqw_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
+def int_hexagon_A2_vsubws :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubws">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_or,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_or
-def int_hexagon_V6_veqw_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_or">;
+def int_hexagon_A2_sath :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sath">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_or_128B
-def int_hexagon_V6_veqw_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+def int_hexagon_S2_asl_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_xor,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_xor
-def int_hexagon_V6_veqw_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_xor">;
+def int_hexagon_A2_satb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqw_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqw_xor_128B
-def int_hexagon_V6_veqw_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
+def int_hexagon_C2_cmpltu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpltu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth,QV_ftype_VIVI,2)
-// tag : V6_vgth
-def int_hexagon_V6_vgth :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgth">;
+def int_hexagon_S2_insertp :
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_128B,QV_ftype_VIVI,2)
-// tag : V6_vgth_128B
-def int_hexagon_V6_vgth_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_128B">;
+def int_hexagon_M2_mpyd_rnd_ll_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_and
-def int_hexagon_V6_vgth_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_and">;
+def int_hexagon_M2_mpyd_rnd_ll_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_and_128B
-def int_hexagon_V6_vgth_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+def int_hexagon_S2_lsr_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_or
-def int_hexagon_V6_vgth_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_or">;
+def int_hexagon_S2_extractup_rp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_extractup_rp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_or_128B
-def int_hexagon_V6_vgth_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+def int_hexagon_S4_vxaddsubw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxaddsubw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_xor
-def int_hexagon_V6_vgth_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_xor">;
+def int_hexagon_S4_vxaddsubh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxaddsubh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgth_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgth_xor_128B
-def int_hexagon_V6_vgth_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+def int_hexagon_A2_asrh :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_asrh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh,QV_ftype_VIVI,2)
-// tag : V6_veqh
-def int_hexagon_V6_veqh :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqh">;
+def int_hexagon_S4_extractp_rp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_extractp_rp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_128B,QV_ftype_VIVI,2)
-// tag : V6_veqh_128B
-def int_hexagon_V6_veqh_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_128B">;
+def int_hexagon_S2_lsr_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_and,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_and
-def int_hexagon_V6_veqh_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_and">;
+def int_hexagon_M2_mpyd_nac_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_and_128B
-def int_hexagon_V6_veqh_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
+def int_hexagon_M2_mpyd_nac_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_or,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_or
-def int_hexagon_V6_veqh_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_or">;
+def int_hexagon_C2_or :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_or_128B
-def int_hexagon_V6_veqh_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
+def int_hexagon_M2_mmpyul_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_xor,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_xor
-def int_hexagon_V6_veqh_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_xor">;
+def int_hexagon_M2_vrcmacr_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmacr_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqh_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqh_xor_128B
-def int_hexagon_V6_veqh_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
+def int_hexagon_A2_xor :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb,QV_ftype_VIVI,2)
-// tag : V6_vgtb
-def int_hexagon_V6_vgtb :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtb">;
+def int_hexagon_A2_add :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_add">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtb_128B
-def int_hexagon_V6_vgtb_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_128B">;
+def int_hexagon_A2_vsububs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsububs">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_and
-def int_hexagon_V6_vgtb_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_and">;
+def int_hexagon_M2_vmpy2s_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s1">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_and_128B
-def int_hexagon_V6_vgtb_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+def int_hexagon_M2_vmpy2s_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_or
-def int_hexagon_V6_vgtb_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_or">;
+def int_hexagon_A2_vraddub_acc :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_A2_vraddub_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_or_128B
-def int_hexagon_V6_vgtb_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
+def int_hexagon_F2_sfinvsqrta :
+Hexagon_floati32_float_Intrinsic<"HEXAGON_F2_sfinvsqrta">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_xor
-def int_hexagon_V6_vgtb_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+def int_hexagon_S2_ct0p :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_ct0p">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtb_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtb_xor_128B
-def int_hexagon_V6_vgtb_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+def int_hexagon_A2_svaddh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svaddh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb,QV_ftype_VIVI,2)
-// tag : V6_veqb
-def int_hexagon_V6_veqb :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqb">;
+def int_hexagon_S2_vcrotate :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcrotate">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_128B,QV_ftype_VIVI,2)
-// tag : V6_veqb_128B
-def int_hexagon_V6_veqb_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_128B">;
+def int_hexagon_A2_aslh :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_aslh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_and,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_and
-def int_hexagon_V6_veqb_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_and">;
+def int_hexagon_A2_subh_h16_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_lh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_and_128B
-def int_hexagon_V6_veqb_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+def int_hexagon_A2_subh_h16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_ll">;
+
+def int_hexagon_M2_hmmpyl_rs1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyl_rs1">;
+
+def int_hexagon_S2_asr_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_p">;
+
+def int_hexagon_S2_vsplatrh :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsplatrh">;
+
+def int_hexagon_S2_asr_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_r_r">;
+
+def int_hexagon_A2_addh_h16_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_hl">;
+
+def int_hexagon_S2_vsplatrb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_vsplatrb">;
+
+def int_hexagon_A2_addh_h16_hh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_hh">;
+
+def int_hexagon_M2_cmpyr_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpyr_s0">;
+
+def int_hexagon_M2_dpmpyss_rnd_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">;
+
+def int_hexagon_C2_muxri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri">;
+
+def int_hexagon_M2_vmac2es_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es_s0">;
+
+def int_hexagon_M2_vmac2es_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es_s1">;
+
+def int_hexagon_C2_pxfer_map :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_pxfer_map">;
+
+def int_hexagon_M2_mpyu_lh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_lh_s1">;
+
+def int_hexagon_M2_mpyu_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">;
+
+def int_hexagon_S2_asl_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or">;
+
+def int_hexagon_M2_mpyd_acc_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">;
+
+def int_hexagon_M2_mpyd_acc_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s1">;
+
+def int_hexagon_S2_asr_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_nac">;
+
+def int_hexagon_A2_vaddw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddw">;
+
+def int_hexagon_S2_asr_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and">;
+
+def int_hexagon_A2_vaddh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddh">;
+
+def int_hexagon_M2_mpy_nac_sat_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s1">;
+
+def int_hexagon_M2_mpy_nac_sat_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s0">;
+
+def int_hexagon_C2_cmpeqp :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpeqp">;
+
+def int_hexagon_M4_mpyri_addi :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi">;
+
+def int_hexagon_A2_not :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_not">;
+
+def int_hexagon_S4_andi_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri">;
+
+def int_hexagon_M2_macsip :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip">;
+
+def int_hexagon_A2_tfrcrr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrcrr">;
+
+def int_hexagon_M2_macsin :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin">;
+
+def int_hexagon_C2_orn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_orn">;
+
+def int_hexagon_M4_and_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_andn">;
+
+def int_hexagon_F2_sfmpy :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmpy">;
+
+def int_hexagon_M2_mpyud_nac_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s1">;
+
+def int_hexagon_M2_mpyud_nac_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s0">;
+
+def int_hexagon_S2_lsr_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_acc">;
+
+def int_hexagon_S2_asr_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_vw">;
+
+def int_hexagon_M4_and_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_or">;
+
+def int_hexagon_S2_asr_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_vh">;
+
+def int_hexagon_C2_mask :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_C2_mask">;
+
+def int_hexagon_M2_mpy_nac_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s0">;
+
+def int_hexagon_M2_mpy_nac_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s1">;
+
+def int_hexagon_M2_mpy_up_s1_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up_s1_sat">;
+
+def int_hexagon_A4_vcmpbgt :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbgt">;
+
+def int_hexagon_M5_vrmacbsu :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vrmacbsu">;
+
+def int_hexagon_S2_tableidxw_goodsyntax :
+Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax">;
+
+def int_hexagon_A2_vrsadub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vrsadub">;
+
+def int_hexagon_A2_tfrrcr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrrcr">;
+
+def int_hexagon_M2_vrcmpys_acc_s1 :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_acc_s1">;
+
+def int_hexagon_F2_dfcmpge :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpge">;
+
+def int_hexagon_M2_accii :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii">;
+
+def int_hexagon_A5_vaddhubs :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A5_vaddhubs">;
+
+def int_hexagon_A2_vmaxw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxw">;
+
+def int_hexagon_A2_vmaxb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxb">;
+
+def int_hexagon_A2_vmaxh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxh">;
+
+def int_hexagon_S2_vsxthw :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsxthw">;
+
+def int_hexagon_S4_andi_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri">;
+
+def int_hexagon_S2_asl_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac">;
+
+def int_hexagon_S2_lsl_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">;
+
+def int_hexagon_C2_cmpgt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgt">;
+
+def int_hexagon_F2_conv_df2d_chop :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2d_chop">;
+
+def int_hexagon_M2_mpyu_nac_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s0">;
+
+def int_hexagon_M2_mpyu_nac_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s1">;
+
+def int_hexagon_F2_conv_sf2w :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2w">;
+
+def int_hexagon_S2_lsr_r_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_or">;
+
+def int_hexagon_F2_sfclass :
+Hexagon_i32_floati32_Intrinsic<"HEXAGON_F2_sfclass">;
+
+def int_hexagon_M2_mpyud_acc_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s0">;
+
+def int_hexagon_M4_xor_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_andn">;
+
+def int_hexagon_S2_addasl_rrri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri">;
+
+def int_hexagon_M5_vdmpybsu :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vdmpybsu">;
+
+def int_hexagon_M2_mpyu_nac_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s0">;
+
+def int_hexagon_M2_mpyu_nac_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">;
+
+def int_hexagon_A2_addi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi">;
+
+def int_hexagon_A2_addp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addp">;
+
+def int_hexagon_M2_vmpy2s_s1pack :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s1pack">;
+
+def int_hexagon_S4_clbpnorm :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S4_clbpnorm">;
+
+def int_hexagon_A4_round_rr_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr_sat">;
+
+def int_hexagon_M2_nacci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_nacci">;
+
+def int_hexagon_S2_shuffeh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffeh">;
+
+def int_hexagon_S2_lsr_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and">;
+
+def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">;
+
+def int_hexagon_M2_mpy_sat_rnd_hh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s0">;
+
+def int_hexagon_F2_conv_sf2uw :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2uw">;
+
+def int_hexagon_A2_vsubh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubh">;
+
+def int_hexagon_F2_conv_sf2ud :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2ud">;
+
+def int_hexagon_A2_vsubw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubw">;
+
+def int_hexagon_A2_vcmpwgt :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpwgt">;
+
+def int_hexagon_M4_xor_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_or">;
+
+def int_hexagon_F2_conv_sf2uw_chop :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2uw_chop">;
+
+def int_hexagon_S2_asl_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vw">;
+
+def int_hexagon_S2_vsatwuh_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsatwuh_nopack">;
+
+def int_hexagon_S2_asl_r_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vh">;
+
+def int_hexagon_A2_svsubuhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubuhs">;
+
+def int_hexagon_M5_vmpybsu :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M5_vmpybsu">;
+
+def int_hexagon_A2_subh_l16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">;
+
+def int_hexagon_C4_and_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_and">;
+
+def int_hexagon_M2_mpyu_acc_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s1">;
+
+def int_hexagon_M2_mpyu_acc_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s0">;
+
+def int_hexagon_S2_lsr_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p">;
+
+def int_hexagon_S2_lsr_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r">;
+
+def int_hexagon_A4_subp_c :
+Hexagon_i64i32_i64i64i32_Intrinsic<"HEXAGON_A4_subp_c">;
+
+def int_hexagon_A2_vsubhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubhs">;
+
+def int_hexagon_C2_vitpack :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_vitpack">;
+
+def int_hexagon_A2_vavguhr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguhr">;
+
+def int_hexagon_S2_vsplicerb :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vsplicerb">;
+
+def int_hexagon_C4_nbitsclr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclr">;
+
+def int_hexagon_A2_vcmpbgtu :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpbgtu">;
+
+def int_hexagon_M2_cmpys_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpys_s1">;
+
+def int_hexagon_M2_cmpys_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_cmpys_s0">;
+
+def int_hexagon_F2_dfcmpuo :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpuo">;
+
+def int_hexagon_S2_shuffob :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffob">;
+
+def int_hexagon_C2_and :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_and">;
+
+def int_hexagon_S5_popcountp :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S5_popcountp">;
+
+def int_hexagon_S4_extractp :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp">;
+
+def int_hexagon_S2_cl0 :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_cl0">;
+
+def int_hexagon_A4_vcmpbgti :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti">;
+
+def int_hexagon_M2_mmacls_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_s1">;
+
+def int_hexagon_M2_mmacls_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_s0">;
+
+def int_hexagon_C4_cmpneq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneq">;
+
+def int_hexagon_M2_vmac2es :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es">;
+
+def int_hexagon_M2_vdmacs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vdmacs_s0">;
+
+def int_hexagon_M2_vdmacs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vdmacs_s1">;
+
+def int_hexagon_M2_mpyud_ll_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_ll_s0">;
+
+def int_hexagon_M2_mpyud_ll_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_ll_s1">;
+
+def int_hexagon_S2_clb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_clb">;
+
+def int_hexagon_M2_mpy_nac_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s0">;
+
+def int_hexagon_M2_mpy_nac_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s1">;
+
+def int_hexagon_M2_mpyd_nac_hl_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s1">;
+
+def int_hexagon_M2_mpyd_nac_hl_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s0">;
+
+def int_hexagon_M2_maci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_maci">;
+
+def int_hexagon_A2_vmaxuh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxuh">;
+
+def int_hexagon_A4_bitspliti :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti">;
+
+def int_hexagon_A2_vmaxub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxub">;
+
+def int_hexagon_M2_mpyud_hh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hh_s0">;
+
+def int_hexagon_M2_mpyud_hh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hh_s1">;
+
+def int_hexagon_M2_vrmac_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrmac_s0">;
+
+def int_hexagon_M2_mpy_sat_lh_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s0">;
+
+def int_hexagon_S2_asl_r_r_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_sat">;
+
+def int_hexagon_F2_conv_sf2d :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2d">;
+
+def int_hexagon_S2_asr_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_nac">;
+
+def int_hexagon_F2_dfimm_n :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n">;
+
+def int_hexagon_A4_cmphgt :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgt">;
+
+def int_hexagon_F2_dfimm_p :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p">;
+
+def int_hexagon_M2_mpyud_acc_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">;
+
+def int_hexagon_M2_vcmpy_s1_sat_r :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">;
+
+def int_hexagon_M4_mpyri_addr_u2 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">;
+
+def int_hexagon_M2_vcmpy_s1_sat_i :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">;
+
+def int_hexagon_S2_lsl_r_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_nac">;
+
+def int_hexagon_M5_vrmacbuu :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vrmacbuu">;
+
+def int_hexagon_S5_asrhub_rnd_sat_goodsyntax :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">;
+
+def int_hexagon_S2_vspliceib :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib">;
+
+def int_hexagon_M2_dpmpyss_acc_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">;
+
+def int_hexagon_M2_cnacs_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacs_s1">;
+
+def int_hexagon_M2_cnacs_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacs_s0">;
+
+def int_hexagon_A2_maxu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_maxu">;
+
+def int_hexagon_A2_maxp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxp">;
+
+def int_hexagon_A2_andir :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir">;
+
+def int_hexagon_F2_sfrecipa :
+Hexagon_floati32_floatfloat_Intrinsic<"HEXAGON_F2_sfrecipa">;
+
+def int_hexagon_A2_combineii :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii">;
+
+def int_hexagon_A4_orn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_orn">;
+
+def int_hexagon_A4_cmpbgtui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui">;
+
+def int_hexagon_S2_lsr_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_or">;
+
+def int_hexagon_A4_vcmpbeqi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi">;
+
+def int_hexagon_S2_lsl_r_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r">;
+
+def int_hexagon_S2_lsl_r_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p">;
+
+def int_hexagon_A2_or :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_or">;
+
+def int_hexagon_F2_dfcmpeq :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpeq">;
+
+def int_hexagon_C2_cmpeq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeq">;
+
+def int_hexagon_A2_tfrp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_tfrp">;
+
+def int_hexagon_C4_and_andn :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_andn">;
+
+def int_hexagon_S2_vsathub_nopack :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsathub_nopack">;
+
+def int_hexagon_A2_satuh :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satuh">;
+
+def int_hexagon_A2_satub :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satub">;
+
+def int_hexagon_M2_vrcmpys_s1 :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_s1">;
+
+def int_hexagon_S4_or_ori :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori">;
+
+def int_hexagon_C4_fastcorner9_not :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_fastcorner9_not">;
+
+def int_hexagon_A2_tfrih :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih">;
+
+def int_hexagon_A2_tfril :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril">;
+
+def int_hexagon_M4_mpyri_addr :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr">;
+
+def int_hexagon_S2_vtrunehb :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vtrunehb">;
+
+def int_hexagon_A2_vabsw :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabsw">;
+
+def int_hexagon_A2_vabsh :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabsh">;
+
+def int_hexagon_F2_sfsub :
+Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfsub">;
+
+def int_hexagon_C2_muxii :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii">;
+
+def int_hexagon_C2_muxir :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir">;
+
+def int_hexagon_A2_swiz :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_swiz">;
+
+def int_hexagon_S2_asr_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and">;
+
+def int_hexagon_M2_cmpyrsc_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">;
+
+def int_hexagon_M2_cmpyrsc_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrsc_s1">;
+
+def int_hexagon_A2_vraddub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vraddub">;
+
+def int_hexagon_A4_tlbmatch :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_tlbmatch">;
+
+def int_hexagon_F2_conv_df2w_chop :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2w_chop">;
+
+def int_hexagon_A2_and :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_and">;
+
+def int_hexagon_S2_lsr_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_and">;
+
+def int_hexagon_M2_mpy_nac_sat_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s1">;
+
+def int_hexagon_M2_mpy_nac_sat_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">;
+
+def int_hexagon_S4_extract :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract">;
+
+def int_hexagon_A2_vcmpweq :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpweq">;
+
+def int_hexagon_M2_acci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_acci">;
+
+def int_hexagon_S2_lsr_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">;
+
+def int_hexagon_S2_lsr_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or">;
+
+def int_hexagon_F2_conv_ud2sf :
+Hexagon_float_i64_Intrinsic<"HEXAGON_F2_conv_ud2sf">;
+
+def int_hexagon_A2_tfr :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfr">;
+
+def int_hexagon_S2_asr_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or">;
+
+def int_hexagon_A2_subri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri">;
+
+def int_hexagon_A4_vrmaxuw :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuw">;
+
+def int_hexagon_M5_vmpybuu :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M5_vmpybuu">;
+
+def int_hexagon_A4_vrmaxuh :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuh">;
+
+def int_hexagon_S2_asl_i_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw">;
+
+def int_hexagon_A2_vavgw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgw">;
+
+def int_hexagon_S2_brev :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_brev">;
+
+def int_hexagon_A2_vavgh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgh">;
+
+def int_hexagon_S2_clrbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i">;
+
+def int_hexagon_S2_asl_i_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh">;
+
+def int_hexagon_S2_lsr_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or">;
+
+def int_hexagon_S2_lsl_r_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">;
+
+def int_hexagon_M2_mmpyl_rs1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_rs1">;
+
+def int_hexagon_M2_mpyud_hl_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hl_s1">;
+
+def int_hexagon_M2_mmpyl_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_s0">;
+
+def int_hexagon_M2_mmpyl_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_s1">;
+
+def int_hexagon_M2_naccii :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii">;
+
+def int_hexagon_S2_vrndpackwhs :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vrndpackwhs">;
+
+def int_hexagon_S2_vtrunewh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_vtrunewh">;
+
+def int_hexagon_M2_dpmpyss_nac_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_nac_s0">;
+
+def int_hexagon_M2_mpyd_ll_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_ll_s0">;
+
+def int_hexagon_M2_mpyd_ll_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_ll_s1">;
+
+def int_hexagon_M4_mac_up_s1_sat :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">;
+
+def int_hexagon_S4_vrcrotate_acc :
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc">;
+
+def int_hexagon_F2_conv_uw2df :
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_conv_uw2df">;
+
+def int_hexagon_A2_vaddubs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddubs">;
+
+def int_hexagon_S2_asr_r_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_acc">;
+
+def int_hexagon_A2_orir :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir">;
+
+def int_hexagon_A2_andp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">;
+
+def int_hexagon_S2_lfsp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_lfsp">;
+
+def int_hexagon_A2_min :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_min">;
+
+def int_hexagon_M2_mpysmi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi">;
+
+def int_hexagon_M2_vcmpy_s0_sat_r :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">;
+
+def int_hexagon_M2_mpyu_acc_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s1">;
+
+def int_hexagon_M2_mpyu_acc_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s0">;
+
+def int_hexagon_S2_asr_r_svw_trun :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_r_svw_trun">;
+
+def int_hexagon_M2_mmpyh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_s0">;
+
+def int_hexagon_M2_mmpyh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyh_s1">;
+
+def int_hexagon_F2_conv_sf2df :
+Hexagon_double_float_Intrinsic<"HEXAGON_F2_conv_sf2df">;
+
+def int_hexagon_S2_vtrunohb :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vtrunohb">;
+
+def int_hexagon_F2_conv_sf2d_chop :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2d_chop">;
+
+def int_hexagon_M2_mpyd_lh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_lh_s0">;
+
+def int_hexagon_F2_conv_df2w :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2w">;
+
+def int_hexagon_S5_asrhub_sat :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat">;
+
+def int_hexagon_S2_asl_i_r_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">;
+
+def int_hexagon_F2_conv_df2d :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2d">;
+
+def int_hexagon_M2_mmaculs_s1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_s1">;
+
+def int_hexagon_M2_mmaculs_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmaculs_s0">;
+
+def int_hexagon_A2_svadduhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svadduhs">;
+
+def int_hexagon_F2_conv_sf2w_chop :
+Hexagon_i32_float_Intrinsic<"HEXAGON_F2_conv_sf2w_chop">;
+
+def int_hexagon_S2_svsathub :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_svsathub">;
+
+def int_hexagon_M2_mpyd_rnd_hl_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s1">;
+
+def int_hexagon_M2_mpyd_rnd_hl_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s0">;
+
+def int_hexagon_S2_setbit_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_r">;
+
+def int_hexagon_A2_vavghr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavghr">;
+
+def int_hexagon_F2_sffma_sc :
+Hexagon_float_floatfloatfloati32_Intrinsic<"HEXAGON_F2_sffma_sc">;
+
+def int_hexagon_F2_dfclass :
+Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass">;
+
+def int_hexagon_F2_conv_df2ud :
+Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2ud">;
+
+def int_hexagon_F2_conv_df2uw :
+Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2uw">;
+
+def int_hexagon_M2_cmpyrs_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrs_s0">;
+
+def int_hexagon_M2_cmpyrs_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrs_s1">;
+
+def int_hexagon_C4_cmpltei :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei">;
+
+def int_hexagon_C4_cmplteu :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteu">;
+
+def int_hexagon_A2_vsubb_map :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubb_map">;
+
+def int_hexagon_A2_subh_l16_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_ll">;
+
+def int_hexagon_S2_asr_i_r_rnd :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">;
+
+def int_hexagon_M2_vrmpy_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrmpy_s0">;
+
+def int_hexagon_M2_mpyd_rnd_hh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s1">;
+
+def int_hexagon_M2_mpyd_rnd_hh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s0">;
+
+def int_hexagon_A2_minup :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minup">;
+
+def int_hexagon_S2_valignrb :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignrb">;
+
+def int_hexagon_S2_asr_r_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_r_p_acc">;
+
+def int_hexagon_M2_mmpyl_rs0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_rs0">;
+
+def int_hexagon_M2_vrcmaci_s0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vrcmaci_s0">;
+
+def int_hexagon_A2_vaddub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddub">;
+
+def int_hexagon_A2_combine_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_lh">;
+
+def int_hexagon_M5_vdmacbsu :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vdmacbsu">;
+
+def int_hexagon_A2_combine_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_ll">;
+
+def int_hexagon_M2_mpyud_hl_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_hl_s0">;
+
+def int_hexagon_M2_vrcmpyi_s0c :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">;
+
+def int_hexagon_S2_asr_i_p_rnd :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">;
+
+def int_hexagon_A2_addpsat :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addpsat">;
+
+def int_hexagon_A2_svaddhs :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svaddhs">;
+
+def int_hexagon_S4_ori_lsr_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri">;
+
+def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">;
+
+def int_hexagon_M2_mpy_sat_rnd_ll_s0 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s0">;
+
+def int_hexagon_A2_vminw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminw">;
+
+def int_hexagon_A2_vminh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminh">;
+
+def int_hexagon_M2_vrcmpyr_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyr_s0">;
+
+def int_hexagon_A2_vminb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminb">;
+
+def int_hexagon_M2_vcmac_s0_sat_i :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_i">;
+
+def int_hexagon_M2_mpyud_lh_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_lh_s0">;
+
+def int_hexagon_M2_mpyud_lh_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_lh_s1">;
+
+def int_hexagon_S2_asl_r_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_or">;
+
+def int_hexagon_S4_lsli :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli">;
+
+def int_hexagon_S2_lsl_r_vw :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vw">;
+
+def int_hexagon_M2_mpy_hh_s1 :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_hh_s1">;
+
+def int_hexagon_M4_vrmpyeh_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_s0">;
+
+def int_hexagon_M4_vrmpyeh_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_s1">;
+
+def int_hexagon_M2_mpy_nac_lh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s0">;
+
+def int_hexagon_M2_mpy_nac_lh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s1">;
+
+def int_hexagon_M2_vraddh :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vraddh">;
+
+def int_hexagon_C2_tfrrp :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_C2_tfrrp">;
+
+def int_hexagon_M2_mpy_acc_sat_ll_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s0">;
+
+def int_hexagon_M2_mpy_acc_sat_ll_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s1">;
+
+def int_hexagon_S2_vtrunowh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_vtrunowh">;
+
+def int_hexagon_A2_abs :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_abs">;
+
+def int_hexagon_A4_cmpbeq :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeq">;
+
+def int_hexagon_A2_negp :
+Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_negp">;
+
+def int_hexagon_S2_asl_i_r_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat">;
+
+def int_hexagon_A2_addh_l16_sat_hl :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">;
+
+def int_hexagon_S2_vsatwuh :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vsatwuh">;
+
+def int_hexagon_F2_dfcmpgt :
+Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpgt">;
+
+def int_hexagon_S2_svsathb :
+Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_svsathb">;
+
+def int_hexagon_C2_cmpgtup :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtup">;
+
+def int_hexagon_A4_cround_ri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri">;
+
+def int_hexagon_S4_clbpaddi :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi">;
+
+def int_hexagon_A4_cround_rr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_rr">;
+
+def int_hexagon_C2_mux :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_mux">;
+
+def int_hexagon_M2_dpmpyuu_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">;
+
+def int_hexagon_S2_shuffeb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffeb">;
+
+def int_hexagon_A2_vminuw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminuw">;
+
+def int_hexagon_A2_vaddhs :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddhs">;
+
+def int_hexagon_S2_insert_rp :
+Hexagon_i32_i32i32i64_Intrinsic<"HEXAGON_S2_insert_rp">;
+
+def int_hexagon_A2_vminuh :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminuh">;
+
+def int_hexagon_A2_vminub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminub">;
+
+def int_hexagon_S2_extractu :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu">;
+
+def int_hexagon_A2_svsubh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubh">;
+
+def int_hexagon_S4_clbaddi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi">;
+
+def int_hexagon_F2_sffms :
+Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms">;
+
+def int_hexagon_S2_vsxtbh :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsxtbh">;
+
+def int_hexagon_M2_mpyud_nac_ll_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">;
+
+def int_hexagon_M2_mpyud_nac_ll_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s0">;
+
+def int_hexagon_A2_subp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_subp">;
+
+def int_hexagon_M2_vmpy2es_s1 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vmpy2es_s1">;
+
+def int_hexagon_M2_vmpy2es_s0 :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vmpy2es_s0">;
+
+def int_hexagon_S4_parity :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_parity">;
+
+def int_hexagon_M2_mpy_acc_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s1">;
+
+def int_hexagon_M2_mpy_acc_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">;
+
+def int_hexagon_S4_addi_asl_ri :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri">;
+
+def int_hexagon_M2_mpyd_nac_hh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">;
+
+def int_hexagon_M2_mpyd_nac_hh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">;
+
+def int_hexagon_S2_asr_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac">;
+
+def int_hexagon_A4_cmpheqi :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi">;
+
+def int_hexagon_S2_lsr_r_p_xor :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">;
+
+def int_hexagon_M2_mpy_acc_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s1">;
+
+def int_hexagon_M2_mpy_acc_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s0">;
+
+def int_hexagon_F2_conv_sf2ud_chop :
+Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">;
+
+def int_hexagon_C2_cmpgeui :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui">;
+
+def int_hexagon_M2_mpy_acc_sat_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">;
+
+def int_hexagon_M2_mpy_acc_sat_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s1">;
+
+def int_hexagon_S2_asl_r_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_and">;
+
+def int_hexagon_A2_addh_h16_sat_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_lh">;
+
+def int_hexagon_A2_addh_h16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_sat_ll">;
+
+def int_hexagon_M4_nac_up_s1_sat :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_nac_up_s1_sat">;
+
+def int_hexagon_M2_mpyud_nac_lh_s1 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s1">;
+
+def int_hexagon_M2_mpyud_nac_lh_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">;
+
+def int_hexagon_A4_round_ri_sat :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat">;
+
+def int_hexagon_M2_mpy_nac_hl_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">;
+
+def int_hexagon_M2_mpy_nac_hl_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s1">;
+
+def int_hexagon_A2_vavghcr :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavghcr">;
+
+def int_hexagon_M2_mmacls_rs0 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_rs0">;
+
+def int_hexagon_M2_mmacls_rs1 :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_rs1">;
+
+def int_hexagon_M2_cmaci_s0 :
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmaci_s0">;
+
+def int_hexagon_S2_setbit_i :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i">;
+
+def int_hexagon_S2_asl_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or">;
+
+def int_hexagon_A4_andn :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_andn">;
+
+def int_hexagon_M5_vrmpybsu :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vrmpybsu">;
+
+def int_hexagon_S2_vrndpackwh :
+Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vrndpackwh">;
+
+def int_hexagon_M2_vcmac_s0_sat_r :
+Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_r">;
+
+def int_hexagon_A2_vmaxuw :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxuw">;
+
+def int_hexagon_C2_bitsclr :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclr">;
+
+def int_hexagon_M2_xor_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_xor_xacc">;
+
+def int_hexagon_A4_vcmpbgtui :
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui">;
+
+def int_hexagon_A4_ornp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_ornp">;
+
+def int_hexagon_A2_tfrpi :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi">;
+
+def int_hexagon_C4_and_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_or">;
+
+def int_hexagon_M2_mpy_nac_sat_hh_s1 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s1">;
+
+def int_hexagon_M2_mpy_nac_sat_hh_s0 :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s0">;
+
+def int_hexagon_A2_subh_h16_sat_ll :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_ll">;
+
+def int_hexagon_A2_subh_h16_sat_lh :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_lh">;
+
+def int_hexagon_M2_vmpy2su_s1 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2su_s1">;
+
+def int_hexagon_M2_vmpy2su_s0 :
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2su_s0">;
+
+def int_hexagon_S2_asr_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc">;
+
+def int_hexagon_C4_nbitsclri :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri">;
+
+def int_hexagon_S2_lsr_i_vh :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh">;
+
+def int_hexagon_S2_lsr_i_p_xacc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">;
+
+// V55 Scalar Instructions.
+
+def int_hexagon_A5_ACS :
+Hexagon_i64i32_i64i64i64_Intrinsic<"HEXAGON_A5_ACS">;
+
+// V60 Scalar Instructions.
+
+def int_hexagon_S6_rol_i_p_and :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+
+def int_hexagon_S6_rol_i_r_xacc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+
+def int_hexagon_S6_rol_i_r_and :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+
+def int_hexagon_S6_rol_i_r_acc :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+
+def int_hexagon_S6_rol_i_p_xacc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+
+def int_hexagon_S6_rol_i_p :
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p">;
+
+def int_hexagon_S6_rol_i_p_nac :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+
+def int_hexagon_S6_rol_i_p_acc :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+
+def int_hexagon_S6_rol_i_r_or :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+
+def int_hexagon_S6_rol_i_r :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r">;
+
+def int_hexagon_S6_rol_i_r_nac :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+
+def int_hexagon_S6_rol_i_p_or :
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+
+// V62 Scalar Instructions.
+
+def int_hexagon_S6_vtrunehb_ppp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">;
+
+def int_hexagon_V6_ldntnt0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ldntnt0">;
+
+def int_hexagon_M6_vabsdiffub :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M6_vabsdiffub">;
+
+def int_hexagon_S6_vtrunohb_ppp :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">;
+
+def int_hexagon_M6_vabsdiffb :
+Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M6_vabsdiffb">;
+
+def int_hexagon_A6_vminub_RdP :
+Hexagon_i64i32_i64i64_Intrinsic<"HEXAGON_A6_vminub_RdP">;
+
+def int_hexagon_S6_vsplatrbp :
+Hexagon_i64_i32_Intrinsic<"HEXAGON_S6_vsplatrbp">;
+
+// V65 Scalar Instructions.
+
+def int_hexagon_A6_vcmpbeq_notany :
+Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A6_vcmpbeq_notany">;
+
+// V66 Scalar Instructions.
+
+def int_hexagon_F2_dfsub :
+Hexagon_double_doubledouble_Intrinsic<"HEXAGON_F2_dfsub">;
+
+def int_hexagon_F2_dfadd :
+Hexagon_double_doubledouble_Intrinsic<"HEXAGON_F2_dfadd">;
+
+def int_hexagon_M2_mnaci :
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mnaci">;
+
+def int_hexagon_S2_mask :
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask">;
+
+// V60 HVX Instructions.
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_or,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_or
 def int_hexagon_V6_veqb_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_or">;
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_or_128B
 def int_hexagon_V6_veqb_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_xor,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_xor
-def int_hexagon_V6_veqb_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_xor">;
+def int_hexagon_V6_vminub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_veqb_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_veqb_xor_128B
-def int_hexagon_V6_veqb_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+def int_hexagon_V6_vminub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminub_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw,QV_ftype_VIVI,2)
-// tag : V6_vgtuw
-def int_hexagon_V6_vgtuw :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw">;
+def int_hexagon_V6_vaslw_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vaslw_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtuw_128B
-def int_hexagon_V6_vgtuw_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+def int_hexagon_V6_vaslw_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_and
-def int_hexagon_V6_vgtuw_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+def int_hexagon_V6_vmpyhvsrs :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhvsrs">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_and_128B
-def int_hexagon_V6_vgtuw_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+def int_hexagon_V6_vmpyhvsrs_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_or
-def int_hexagon_V6_vgtuw_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_or">;
+def int_hexagon_V6_vsathub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsathub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_or_128B
-def int_hexagon_V6_vgtuw_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
+def int_hexagon_V6_vsathub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsathub_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_xor
-def int_hexagon_V6_vgtuw_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
+def int_hexagon_V6_vaddh_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddh_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuw_xor_128B
-def int_hexagon_V6_vgtuw_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
+def int_hexagon_V6_vaddh_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh,QV_ftype_VIVI,2)
-// tag : V6_vgtuh
-def int_hexagon_V6_vgtuh :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh">;
+def int_hexagon_V6_vrmpybusi :
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtuh_128B
-def int_hexagon_V6_vgtuh_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
+def int_hexagon_V6_vrmpybusi_128B :
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+
+def int_hexagon_V6_vshufoh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoh">;
+
+def int_hexagon_V6_vshufoh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufoh_128B">;
+
+def int_hexagon_V6_vasrwv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vasrwv">;
+
+def int_hexagon_V6_vasrwv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vasrwv_128B">;
+
+def int_hexagon_V6_vdmpyhsuisat :
+Hexagon_v16i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">;
+
+def int_hexagon_V6_vdmpyhsuisat_128B :
+Hexagon_v32i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
+
+def int_hexagon_V6_vrsadubi_acc :
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+
+def int_hexagon_V6_vrsadubi_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+
+def int_hexagon_V6_vnavgw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgw">;
+
+def int_hexagon_V6_vnavgw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgw_128B">;
+
+def int_hexagon_V6_vnavgh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgh">;
+
+def int_hexagon_V6_vnavgh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgh_128B">;
+
+def int_hexagon_V6_vavgub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgub">;
+
+def int_hexagon_V6_vavgub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgub_128B">;
+
+def int_hexagon_V6_vsubb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubb">;
+
+def int_hexagon_V6_vsubb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubb_128B">;
+
+def int_hexagon_V6_vgtw_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_and">;
+
+def int_hexagon_V6_vgtw_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+
+def int_hexagon_V6_vavgubrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgubrnd">;
+
+def int_hexagon_V6_vavgubrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">;
+
+def int_hexagon_V6_vrmpybusv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybusv">;
+
+def int_hexagon_V6_vrmpybusv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">;
+
+def int_hexagon_V6_vsubbnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbnq">;
+
+def int_hexagon_V6_vsubbnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+
+def int_hexagon_V6_vroundhb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundhb">;
+
+def int_hexagon_V6_vroundhb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundhb_128B">;
+
+def int_hexagon_V6_vadduhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhsat_dv">;
+
+def int_hexagon_V6_vadduhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">;
+
+def int_hexagon_V6_vsububsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsububsat">;
+
+def int_hexagon_V6_vsububsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsububsat_128B">;
+
+def int_hexagon_V6_vmpabus_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpabus_acc">;
+
+def int_hexagon_V6_vmpabus_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">;
+
+def int_hexagon_V6_vmux :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vmux">;
+
+def int_hexagon_V6_vmux_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vmux_128B">;
+
+def int_hexagon_V6_vmpyhus :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhus">;
+
+def int_hexagon_V6_vmpyhus_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhus_128B">;
+
+def int_hexagon_V6_vpackeb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackeb">;
+
+def int_hexagon_V6_vpackeb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackeb_128B">;
+
+def int_hexagon_V6_vsubhnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhnq">;
+
+def int_hexagon_V6_vsubhnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+
+def int_hexagon_V6_vavghrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavghrnd">;
+
+def int_hexagon_V6_vavghrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavghrnd_128B">;
+
+def int_hexagon_V6_vtran2x2_map :
+Hexagon_v16i32v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vtran2x2_map">;
+
+def int_hexagon_V6_vtran2x2_map_128B :
+Hexagon_v32i32v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtran2x2_map_128B">;
+
+def int_hexagon_V6_vdelta :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdelta">;
+
+def int_hexagon_V6_vdelta_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdelta_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_and
 def int_hexagon_V6_vgtuh_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_and">;
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_and_128B
 def int_hexagon_V6_vgtuh_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_or
-def int_hexagon_V6_vgtuh_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+def int_hexagon_V6_vtmpyhb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_or_128B
-def int_hexagon_V6_vgtuh_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+def int_hexagon_V6_vtmpyhb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_xor
-def int_hexagon_V6_vgtuh_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+def int_hexagon_V6_vpackob :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackob">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtuh_xor_128B
-def int_hexagon_V6_vgtuh_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+def int_hexagon_V6_vpackob_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackob_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub,QV_ftype_VIVI,2)
-// tag : V6_vgtub
-def int_hexagon_V6_vgtub :
-Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtub">;
+def int_hexagon_V6_vmaxh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_128B,QV_ftype_VIVI,2)
-// tag : V6_vgtub_128B
-def int_hexagon_V6_vgtub_128B :
-Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+def int_hexagon_V6_vmaxh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_and,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_and
-def int_hexagon_V6_vgtub_and :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_and">;
+def int_hexagon_V6_vtmpybus_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtmpybus_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_and_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_and_128B
-def int_hexagon_V6_vgtub_and_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
+def int_hexagon_V6_vtmpybus_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_or,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_or
-def int_hexagon_V6_vgtub_or :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_or">;
+def int_hexagon_V6_vsubuhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuhsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_or_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_or_128B
-def int_hexagon_V6_vgtub_or_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
+def int_hexagon_V6_vsubuhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_xor,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_xor
-def int_hexagon_V6_vgtub_xor :
-Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_xor">;
+def int_hexagon_V6_vasrw_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrw_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vgtub_xor_128B,QV_ftype_QVVIVI,3)
-// tag : V6_vgtub_xor_128B
-def int_hexagon_V6_vgtub_xor_128B :
-Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+def int_hexagon_V6_vasrw_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or,QV_ftype_QVQV,2)
-// tag : V6_pred_or
 def int_hexagon_V6_pred_or :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or">;
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_or_128B
 def int_hexagon_V6_pred_or_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_128B">;
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and,QV_ftype_QVQV,2)
-// tag : V6_pred_and
-def int_hexagon_V6_pred_and :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and">;
+def int_hexagon_V6_vrmpyub_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vrmpyub_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_and_128B
-def int_hexagon_V6_pred_and_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+def int_hexagon_V6_vrmpyub_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_not,QV_ftype_QV,1)
-// tag : V6_pred_not
-def int_hexagon_V6_pred_not :
-Hexagon_v64iv64i_Intrinsic<"HEXAGON_V6_pred_not">;
+def int_hexagon_V6_lo :
+Hexagon_v16i32_v32i32_Intrinsic<"HEXAGON_V6_lo">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_not_128B,QV_ftype_QV,1)
-// tag : V6_pred_not_128B
-def int_hexagon_V6_pred_not_128B :
-Hexagon_v128iv128i_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+def int_hexagon_V6_lo_128B :
+Hexagon_v32i32_v64i32_Intrinsic<"HEXAGON_V6_lo_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_xor,QV_ftype_QVQV,2)
-// tag : V6_pred_xor
-def int_hexagon_V6_pred_xor :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_xor">;
+def int_hexagon_V6_vsubb_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubb_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_xor_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_xor_128B
-def int_hexagon_V6_pred_xor_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+def int_hexagon_V6_vsubb_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">;
+
+def int_hexagon_V6_vsubhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhsat_dv">;
+
+def int_hexagon_V6_vsubhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">;
+
+def int_hexagon_V6_vmpyiwh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh">;
+
+def int_hexagon_V6_vmpyiwh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">;
+
+def int_hexagon_V6_vmpyiwb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb">;
+
+def int_hexagon_V6_vmpyiwb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">;
+
+def int_hexagon_V6_ldu0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ldu0">;
+
+def int_hexagon_V6_ldu0_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_ldu0_128B">;
+
+def int_hexagon_V6_vgtuh_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+
+def int_hexagon_V6_vgtuh_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+
+def int_hexagon_V6_vgth_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_or">;
+
+def int_hexagon_V6_vgth_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+
+def int_hexagon_V6_vavgh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgh">;
+
+def int_hexagon_V6_vavgh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgh_128B">;
+
+def int_hexagon_V6_vlalignb :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignb">;
+
+def int_hexagon_V6_vlalignb_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignb_128B">;
+
+def int_hexagon_V6_vsh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vsh">;
+
+def int_hexagon_V6_vsh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vsh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and_n,QV_ftype_QVQV,2)
-// tag : V6_pred_and_n
 def int_hexagon_V6_pred_and_n :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and_n">;
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and_n">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_and_n_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_and_n_128B
 def int_hexagon_V6_pred_and_n_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or_n,QV_ftype_QVQV,2)
-// tag : V6_pred_or_n
-def int_hexagon_V6_pred_or_n :
-Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or_n">;
+def int_hexagon_V6_vsb :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vsb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_or_n_128B,QV_ftype_QVQV,2)
-// tag : V6_pred_or_n_128B
-def int_hexagon_V6_pred_or_n_128B :
-Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
+def int_hexagon_V6_vsb_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vsb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2,QV_ftype_SI,1)
-// tag : V6_pred_scalar2
-def int_hexagon_V6_pred_scalar2 :
-Hexagon_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+def int_hexagon_V6_vroundwuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundwuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2_128B,QV_ftype_SI,1)
-// tag : V6_pred_scalar2_128B
-def int_hexagon_V6_pred_scalar2_128B :
-Hexagon_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+def int_hexagon_V6_vroundwuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundwuh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmux,VI_ftype_QVVIVI,3)
-// tag : V6_vmux
-def int_hexagon_V6_vmux :
-Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vmux">;
+def int_hexagon_V6_vasrhv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vasrhv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmux_128B,VI_ftype_QVVIVI,3)
-// tag : V6_vmux_128B
-def int_hexagon_V6_vmux_128B :
-Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vmux_128B">;
+def int_hexagon_V6_vasrhv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vasrhv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vswap,VD_ftype_QVVIVI,3)
-// tag : V6_vswap
-def int_hexagon_V6_vswap :
-Hexagon_v1024v64iv512v512_Intrinsic<"HEXAGON_V6_vswap">;
+def int_hexagon_V6_vshuffh :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vshuffh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vswap_128B,VD_ftype_QVVIVI,3)
-// tag : V6_vswap_128B
-def int_hexagon_V6_vswap_128B :
-Hexagon_v2048v128iv1024v1024_Intrinsic<"HEXAGON_V6_vswap_128B">;
+def int_hexagon_V6_vshuffh_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vshuffh_128B">;
+
+def int_hexagon_V6_vaddhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhsat_dv">;
+
+def int_hexagon_V6_vaddhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">;
+
+def int_hexagon_V6_vnavgub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgub">;
+
+def int_hexagon_V6_vnavgub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgub_128B">;
+
+def int_hexagon_V6_vrmpybv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybv">;
+
+def int_hexagon_V6_vrmpybv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybv_128B">;
+
+def int_hexagon_V6_vnormamth :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vnormamth">;
+
+def int_hexagon_V6_vnormamth_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnormamth_128B">;
+
+def int_hexagon_V6_vdmpyhb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb">;
+
+def int_hexagon_V6_vdmpyhb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">;
+
+def int_hexagon_V6_vavguh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguh">;
+
+def int_hexagon_V6_vavguh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguh_128B">;
+
+def int_hexagon_V6_vlsrwv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vlsrwv">;
+
+def int_hexagon_V6_vlsrwv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vlsrwv_128B">;
+
+def int_hexagon_V6_vlsrhv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vlsrhv">;
+
+def int_hexagon_V6_vlsrhv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vlsrhv_128B">;
+
+def int_hexagon_V6_vdmpyhisat :
+Hexagon_v16i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat">;
+
+def int_hexagon_V6_vdmpyhisat_128B :
+Hexagon_v32i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">;
+
+def int_hexagon_V6_vdmpyhvsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat">;
+
+def int_hexagon_V6_vdmpyhvsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">;
+
+def int_hexagon_V6_vaddw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddw">;
+
+def int_hexagon_V6_vaddw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddw_128B">;
+
+def int_hexagon_V6_vzh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vzh">;
+
+def int_hexagon_V6_vzh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vzh_128B">;
+
+def int_hexagon_V6_vaddh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddh">;
+
+def int_hexagon_V6_vaddh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxub,VI_ftype_VIVI,2)
-// tag : V6_vmaxub
 def int_hexagon_V6_vmaxub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxub">;
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxub_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxub_128B
 def int_hexagon_V6_vmaxub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxub_128B">;
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxub_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminub,VI_ftype_VIVI,2)
-// tag : V6_vminub
-def int_hexagon_V6_vminub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminub">;
+def int_hexagon_V6_vmpyhv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhv_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminub_128B,VI_ftype_VIVI,2)
-// tag : V6_vminub_128B
-def int_hexagon_V6_vminub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminub_128B">;
+def int_hexagon_V6_vmpyhv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxuh,VI_ftype_VIVI,2)
-// tag : V6_vmaxuh
-def int_hexagon_V6_vmaxuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxuh">;
+def int_hexagon_V6_vadduhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduhsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxuh_128B
-def int_hexagon_V6_vmaxuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxuh_128B">;
+def int_hexagon_V6_vadduhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhsat_128B">;
+
+def int_hexagon_V6_vshufoeh :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoeh">;
+
+def int_hexagon_V6_vshufoeh_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufoeh_128B">;
+
+def int_hexagon_V6_vmpyuhv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">;
+
+def int_hexagon_V6_vmpyuhv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">;
+
+def int_hexagon_V6_veqh :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh">;
+
+def int_hexagon_V6_veqh_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_128B">;
+
+def int_hexagon_V6_vmpabuuv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpabuuv">;
+
+def int_hexagon_V6_vmpabuuv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">;
+
+def int_hexagon_V6_vasrwhsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwhsat">;
+
+def int_hexagon_V6_vasrwhsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminuh,VI_ftype_VIVI,2)
-// tag : V6_vminuh
 def int_hexagon_V6_vminuh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminuh">;
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vminuh_128B
 def int_hexagon_V6_vminuh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminuh_128B">;
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminuh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxh,VI_ftype_VIVI,2)
-// tag : V6_vmaxh
-def int_hexagon_V6_vmaxh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxh">;
+def int_hexagon_V6_vror :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vror">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxh_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxh_128B
-def int_hexagon_V6_vmaxh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxh_128B">;
+def int_hexagon_V6_vror_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vror_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminh,VI_ftype_VIVI,2)
-// tag : V6_vminh
-def int_hexagon_V6_vminh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminh">;
+def int_hexagon_V6_vmpyowh_rnd_sacc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminh_128B,VI_ftype_VIVI,2)
-// tag : V6_vminh_128B
-def int_hexagon_V6_vminh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminh_128B">;
+def int_hexagon_V6_vmpyowh_rnd_sacc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxw,VI_ftype_VIVI,2)
-// tag : V6_vmaxw
-def int_hexagon_V6_vmaxw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxw">;
+def int_hexagon_V6_vmaxuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxw_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxw_128B
-def int_hexagon_V6_vmaxw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxw_128B">;
+def int_hexagon_V6_vmaxuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxuh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminw,VI_ftype_VIVI,2)
-// tag : V6_vminw
-def int_hexagon_V6_vminw :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminw">;
+def int_hexagon_V6_vabsh_sat :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsh_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminw_128B,VI_ftype_VIVI,2)
-// tag : V6_vminw_128B
-def int_hexagon_V6_vminw_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminw_128B">;
+def int_hexagon_V6_vabsh_sat_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsathub,VI_ftype_VIVI,2)
-// tag : V6_vsathub
-def int_hexagon_V6_vsathub :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsathub">;
+def int_hexagon_V6_pred_or_n :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_or_n">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsathub_128B,VI_ftype_VIVI,2)
-// tag : V6_vsathub_128B
-def int_hexagon_V6_vsathub_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsathub_128B">;
+def int_hexagon_V6_pred_or_n_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatwh,VI_ftype_VIVI,2)
-// tag : V6_vsatwh
-def int_hexagon_V6_vsatwh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsatwh">;
+def int_hexagon_V6_vdealb :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vdealb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatwh_128B,VI_ftype_VIVI,2)
-// tag : V6_vsatwh_128B
-def int_hexagon_V6_vsatwh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatwh_128B">;
+def int_hexagon_V6_vdealb_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vdealb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffeb,VI_ftype_VIVI,2)
-// tag : V6_vshuffeb
-def int_hexagon_V6_vshuffeb :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffeb">;
+def int_hexagon_V6_vmpybusv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybusv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffeb_128B,VI_ftype_VIVI,2)
-// tag : V6_vshuffeb_128B
-def int_hexagon_V6_vshuffeb_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffeb_128B">;
+def int_hexagon_V6_vmpybusv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybusv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffob,VI_ftype_VIVI,2)
-// tag : V6_vshuffob
-def int_hexagon_V6_vshuffob :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffob">;
+def int_hexagon_V6_vzb :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vzb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffob_128B,VI_ftype_VIVI,2)
-// tag : V6_vshuffob_128B
-def int_hexagon_V6_vshuffob_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffob_128B">;
+def int_hexagon_V6_vzb_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vzb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufeh,VI_ftype_VIVI,2)
-// tag : V6_vshufeh
-def int_hexagon_V6_vshufeh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufeh">;
+def int_hexagon_V6_vdmpybus_dv :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufeh_128B,VI_ftype_VIVI,2)
-// tag : V6_vshufeh_128B
-def int_hexagon_V6_vshufeh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufeh_128B">;
+def int_hexagon_V6_vdmpybus_dv_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoh,VI_ftype_VIVI,2)
-// tag : V6_vshufoh
-def int_hexagon_V6_vshufoh :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufoh">;
+def int_hexagon_V6_vaddbq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoh_128B,VI_ftype_VIVI,2)
-// tag : V6_vshufoh_128B
-def int_hexagon_V6_vshufoh_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufoh_128B">;
+def int_hexagon_V6_vaddbq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffvdd,VD_ftype_VIVISI,3)
-// tag : V6_vshuffvdd
-def int_hexagon_V6_vshuffvdd :
-Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vshuffvdd">;
+def int_hexagon_V6_vaddb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffvdd_128B,VD_ftype_VIVISI,3)
-// tag : V6_vshuffvdd_128B
-def int_hexagon_V6_vshuffvdd_128B :
-Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">;
+def int_hexagon_V6_vaddb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealvdd,VD_ftype_VIVISI,3)
-// tag : V6_vdealvdd
-def int_hexagon_V6_vdealvdd :
-Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vdealvdd">;
+def int_hexagon_V6_vaddwq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealvdd_128B,VD_ftype_VIVISI,3)
-// tag : V6_vdealvdd_128B
-def int_hexagon_V6_vdealvdd_128B :
-Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vdealvdd_128B">;
+def int_hexagon_V6_vaddwq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeh,VD_ftype_VIVI,2)
-// tag : V6_vshufoeh
-def int_hexagon_V6_vshufoeh :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeh">;
+def int_hexagon_V6_vasrhubrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhubrndsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeh_128B,VD_ftype_VIVI,2)
-// tag : V6_vshufoeh_128B
-def int_hexagon_V6_vshufoeh_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeh_128B">;
+def int_hexagon_V6_vasrhubrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">;
+
+def int_hexagon_V6_vasrhubsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhubsat">;
+
+def int_hexagon_V6_vasrhubsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeb,VD_ftype_VIVI,2)
-// tag : V6_vshufoeb
 def int_hexagon_V6_vshufoeb :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeb">;
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoeb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshufoeb_128B,VD_ftype_VIVI,2)
-// tag : V6_vshufoeb_128B
 def int_hexagon_V6_vshufoeb_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeb_128B">;
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufoeb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealh,VI_ftype_VI,1)
-// tag : V6_vdealh
-def int_hexagon_V6_vdealh :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealh">;
+def int_hexagon_V6_vpackhub_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackhub_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealh_128B,VI_ftype_VI,1)
-// tag : V6_vdealh_128B
-def int_hexagon_V6_vdealh_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealh_128B">;
+def int_hexagon_V6_vpackhub_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb,VI_ftype_VI,1)
-// tag : V6_vdealb
-def int_hexagon_V6_vdealb :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealb">;
+def int_hexagon_V6_vmpyiwh_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb_128B,VI_ftype_VI,1)
-// tag : V6_vdealb_128B
-def int_hexagon_V6_vdealb_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealb_128B">;
+def int_hexagon_V6_vmpyiwh_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb4w,VI_ftype_VIVI,2)
-// tag : V6_vdealb4w
-def int_hexagon_V6_vdealb4w :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdealb4w">;
+def int_hexagon_V6_vtmpyb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdealb4w_128B,VI_ftype_VIVI,2)
-// tag : V6_vdealb4w_128B
-def int_hexagon_V6_vdealb4w_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdealb4w_128B">;
+def int_hexagon_V6_vtmpyb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffh,VI_ftype_VI,1)
-// tag : V6_vshuffh
-def int_hexagon_V6_vshuffh :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffh">;
+def int_hexagon_V6_vmpabusv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpabusv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffh_128B,VI_ftype_VI,1)
-// tag : V6_vshuffh_128B
-def int_hexagon_V6_vshuffh_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffh_128B">;
+def int_hexagon_V6_vmpabusv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vmpabusv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffb,VI_ftype_VI,1)
-// tag : V6_vshuffb
-def int_hexagon_V6_vshuffb :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffb">;
+def int_hexagon_V6_pred_and :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vshuffb_128B,VI_ftype_VI,1)
-// tag : V6_vshuffb_128B
-def int_hexagon_V6_vshuffb_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffb_128B">;
+def int_hexagon_V6_pred_and_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+
+def int_hexagon_V6_vsubwnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwnq">;
+
+def int_hexagon_V6_vsubwnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+
+def int_hexagon_V6_vpackwuh_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackwuh_sat">;
+
+def int_hexagon_V6_vpackwuh_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">;
+
+def int_hexagon_V6_vswap :
+Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vswap">;
+
+def int_hexagon_V6_vswap_128B :
+Hexagon_v64i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vswap_128B">;
+
+def int_hexagon_V6_vrmpyubv_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">;
+
+def int_hexagon_V6_vrmpyubv_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">;
+
+def int_hexagon_V6_vgtb_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_and">;
+
+def int_hexagon_V6_vgtb_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+
+def int_hexagon_V6_vaslw :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vaslw">;
+
+def int_hexagon_V6_vaslw_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vaslw_128B">;
+
+def int_hexagon_V6_vpackhb_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackhb_sat">;
+
+def int_hexagon_V6_vpackhb_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">;
+
+def int_hexagon_V6_vmpyih_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyih_acc">;
+
+def int_hexagon_V6_vmpyih_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">;
+
+def int_hexagon_V6_vshuffvdd :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vshuffvdd">;
+
+def int_hexagon_V6_vshuffvdd_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">;
+
+def int_hexagon_V6_vaddb_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddb_dv">;
+
+def int_hexagon_V6_vaddb_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">;
+
+def int_hexagon_V6_vunpackub :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackub">;
+
+def int_hexagon_V6_vunpackub_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackub_128B">;
+
+def int_hexagon_V6_vgtuw :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw">;
+
+def int_hexagon_V6_vgtuw_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+
+def int_hexagon_V6_vlutvwh :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh">;
+
+def int_hexagon_V6_vlutvwh_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_128B">;
+
+def int_hexagon_V6_vgtub :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub">;
+
+def int_hexagon_V6_vgtub_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+
+def int_hexagon_V6_vmpyowh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh">;
+
+def int_hexagon_V6_vmpyowh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_128B">;
+
+def int_hexagon_V6_vmpyieoh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyieoh">;
+
+def int_hexagon_V6_vmpyieoh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_extractw,SI_ftype_VISI,2)
-// tag : V6_extractw
 def int_hexagon_V6_extractw :
-Hexagon_iv512i_Intrinsic<"HEXAGON_V6_extractw">;
+Hexagon_i32_v16i32i32_Intrinsic<"HEXAGON_V6_extractw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_extractw_128B,SI_ftype_VISI,2)
-// tag : V6_extractw_128B
 def int_hexagon_V6_extractw_128B :
-Hexagon_iv1024i_Intrinsic<"HEXAGON_V6_extractw_128B">;
+Hexagon_i32_v32i32i32_Intrinsic<"HEXAGON_V6_extractw_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vinsertwr,VI_ftype_VISI,2)
-// tag : V6_vinsertwr
-def int_hexagon_V6_vinsertwr :
-Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vinsertwr">;
+def int_hexagon_V6_vavgwrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgwrnd">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vinsertwr_128B,VI_ftype_VISI,2)
-// tag : V6_vinsertwr_128B
-def int_hexagon_V6_vinsertwr_128B :
-Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vinsertwr_128B">;
+def int_hexagon_V6_vavgwrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatw,VI_ftype_SI,1)
-// tag : V6_lvsplatw
-def int_hexagon_V6_lvsplatw :
-Hexagon_v512i_Intrinsic<"HEXAGON_V6_lvsplatw">;
+def int_hexagon_V6_vdmpyhsat_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatw_128B,VI_ftype_SI,1)
-// tag : V6_lvsplatw_128B
-def int_hexagon_V6_lvsplatw_128B :
-Hexagon_v1024i_Intrinsic<"HEXAGON_V6_lvsplatw_128B">;
+def int_hexagon_V6_vdmpyhsat_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vassign,VI_ftype_VI,1)
-// tag : V6_vassign
-def int_hexagon_V6_vassign :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vassign">;
+def int_hexagon_V6_vgtub_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vassign_128B,VI_ftype_VI,1)
-// tag : V6_vassign_128B
-def int_hexagon_V6_vassign_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vassign_128B">;
+def int_hexagon_V6_vgtub_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+
+def int_hexagon_V6_vmpyub :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyub">;
+
+def int_hexagon_V6_vmpyub_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyub_128B">;
+
+def int_hexagon_V6_vmpyuh :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuh">;
+
+def int_hexagon_V6_vmpyuh_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuh_128B">;
+
+def int_hexagon_V6_vunpackob :
+Hexagon_v32i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vunpackob">;
+
+def int_hexagon_V6_vunpackob_128B :
+Hexagon_v64i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vunpackob_128B">;
+
+def int_hexagon_V6_vmpahb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpahb">;
+
+def int_hexagon_V6_vmpahb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpahb_128B">;
+
+def int_hexagon_V6_veqw_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_or">;
+
+def int_hexagon_V6_veqw_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+
+def int_hexagon_V6_vandqrt :
+Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt">;
+
+def int_hexagon_V6_vandqrt_128B :
+Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+
+def int_hexagon_V6_vxor :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vxor">;
+
+def int_hexagon_V6_vxor_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vxor_128B">;
+
+def int_hexagon_V6_vasrwhrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwhrndsat">;
+
+def int_hexagon_V6_vasrwhrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">;
+
+def int_hexagon_V6_vmpyhsat_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">;
+
+def int_hexagon_V6_vmpyhsat_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">;
+
+def int_hexagon_V6_vrmpybus_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_acc">;
+
+def int_hexagon_V6_vrmpybus_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">;
+
+def int_hexagon_V6_vsubhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhw">;
+
+def int_hexagon_V6_vsubhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhw_128B">;
+
+def int_hexagon_V6_vdealb4w :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdealb4w">;
+
+def int_hexagon_V6_vdealb4w_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdealb4w_128B">;
+
+def int_hexagon_V6_vmpyowh_sacc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">;
+
+def int_hexagon_V6_vmpyowh_sacc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">;
+
+def int_hexagon_V6_vmpybv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybv">;
+
+def int_hexagon_V6_vmpybv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybv_128B">;
+
+def int_hexagon_V6_vabsdiffh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffh">;
+
+def int_hexagon_V6_vabsdiffh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">;
+
+def int_hexagon_V6_vshuffob :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshuffob">;
+
+def int_hexagon_V6_vshuffob_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshuffob_128B">;
+
+def int_hexagon_V6_vmpyub_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyub_acc">;
+
+def int_hexagon_V6_vmpyub_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">;
+
+def int_hexagon_V6_vnormamtw :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vnormamtw">;
+
+def int_hexagon_V6_vnormamtw_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnormamtw_128B">;
+
+def int_hexagon_V6_vunpackuh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackuh">;
+
+def int_hexagon_V6_vunpackuh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackuh_128B">;
+
+def int_hexagon_V6_vgtuh_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+
+def int_hexagon_V6_vgtuh_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+
+def int_hexagon_V6_vmpyiewuh_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">;
+
+def int_hexagon_V6_vmpyiewuh_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">;
+
+def int_hexagon_V6_vunpackoh :
+Hexagon_v32i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vunpackoh">;
+
+def int_hexagon_V6_vunpackoh_128B :
+Hexagon_v64i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vunpackoh_128B">;
+
+def int_hexagon_V6_vdmpyhsat :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat">;
+
+def int_hexagon_V6_vdmpyhsat_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">;
+
+def int_hexagon_V6_vmpyubv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyubv">;
+
+def int_hexagon_V6_vmpyubv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyubv_128B">;
+
+def int_hexagon_V6_vmpyhss :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyhss">;
+
+def int_hexagon_V6_vmpyhss_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyhss_128B">;
+
+def int_hexagon_V6_hi :
+Hexagon_v16i32_v32i32_Intrinsic<"HEXAGON_V6_hi">;
+
+def int_hexagon_V6_hi_128B :
+Hexagon_v32i32_v64i32_Intrinsic<"HEXAGON_V6_hi_128B">;
+
+def int_hexagon_V6_vasrwuhsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwuhsat">;
+
+def int_hexagon_V6_vasrwuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">;
+
+def int_hexagon_V6_veqw :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw">;
+
+def int_hexagon_V6_veqw_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_128B">;
+
+def int_hexagon_V6_vdsaduh :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdsaduh">;
+
+def int_hexagon_V6_vdsaduh_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_128B">;
+
+def int_hexagon_V6_vsubw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubw">;
+
+def int_hexagon_V6_vsubw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubw_128B">;
+
+def int_hexagon_V6_vsubw_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubw_dv">;
+
+def int_hexagon_V6_vsubw_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">;
+
+def int_hexagon_V6_veqb_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_and">;
+
+def int_hexagon_V6_veqb_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+
+def int_hexagon_V6_vmpyih :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyih">;
+
+def int_hexagon_V6_vmpyih_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyih_128B">;
+
+def int_hexagon_V6_vtmpyb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyb_acc">;
+
+def int_hexagon_V6_vtmpyb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">;
+
+def int_hexagon_V6_vrmpybus :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vrmpybus">;
+
+def int_hexagon_V6_vrmpybus_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_128B">;
+
+def int_hexagon_V6_vmpybus_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpybus_acc">;
+
+def int_hexagon_V6_vmpybus_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">;
+
+def int_hexagon_V6_vgth_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_xor">;
+
+def int_hexagon_V6_vgth_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+
+def int_hexagon_V6_vsubhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhsat">;
+
+def int_hexagon_V6_vsubhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
+
+def int_hexagon_V6_vrmpyubi_acc :
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+
+def int_hexagon_V6_vrmpyubi_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+
+def int_hexagon_V6_vabsw :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsw">;
+
+def int_hexagon_V6_vabsw_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsw_128B">;
+
+def int_hexagon_V6_vaddwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwsat_dv">;
+
+def int_hexagon_V6_vaddwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">;
+
+def int_hexagon_V6_vlsrw :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vlsrw">;
+
+def int_hexagon_V6_vlsrw_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrw_128B">;
+
+def int_hexagon_V6_vabsh :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsh">;
+
+def int_hexagon_V6_vabsh_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsh_128B">;
+
+def int_hexagon_V6_vlsrh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vlsrh">;
+
+def int_hexagon_V6_vlsrh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrh_128B">;
+
+def int_hexagon_V6_valignb :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignb">;
+
+def int_hexagon_V6_valignb_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignb_128B">;
+
+def int_hexagon_V6_vsubhq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhq">;
+
+def int_hexagon_V6_vsubhq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+
+def int_hexagon_V6_vpackoh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackoh">;
+
+def int_hexagon_V6_vpackoh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackoh_128B">;
+
+def int_hexagon_V6_vdmpybus_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_acc">;
+
+def int_hexagon_V6_vdmpybus_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">;
+
+def int_hexagon_V6_vdmpyhvsat_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">;
+
+def int_hexagon_V6_vdmpyhvsat_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">;
+
+def int_hexagon_V6_vrmpybv_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybv_acc">;
+
+def int_hexagon_V6_vrmpybv_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">;
+
+def int_hexagon_V6_vaddhsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhsat">;
+
+def int_hexagon_V6_vaddhsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vcombine,VD_ftype_VIVI,2)
-// tag : V6_vcombine
 def int_hexagon_V6_vcombine :
-Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vcombine">;
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcombine">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vcombine_128B,VD_ftype_VIVI,2)
-// tag : V6_vcombine_128B
 def int_hexagon_V6_vcombine_128B :
-Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">;
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcombine_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2)
-// tag : V6_vdelta
-def int_hexagon_V6_vdelta :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdelta">;
+def int_hexagon_V6_vandqrt_acc :
+Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdelta_128B,VI_ftype_VIVI,2)
-// tag : V6_vdelta_128B
-def int_hexagon_V6_vdelta_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdelta_128B">;
+def int_hexagon_V6_vandqrt_acc_128B :
+Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrdelta,VI_ftype_VIVI,2)
-// tag : V6_vrdelta
-def int_hexagon_V6_vrdelta :
-Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrdelta">;
+def int_hexagon_V6_vaslhv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaslhv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrdelta_128B,VI_ftype_VIVI,2)
-// tag : V6_vrdelta_128B
-def int_hexagon_V6_vrdelta_128B :
-Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrdelta_128B">;
+def int_hexagon_V6_vaslhv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaslhv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0w,VI_ftype_VI,1)
-// tag : V6_vcl0w
-def int_hexagon_V6_vcl0w :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0w">;
+def int_hexagon_V6_vinsertwr :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vinsertwr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0w_128B,VI_ftype_VI,1)
-// tag : V6_vcl0w_128B
-def int_hexagon_V6_vcl0w_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0w_128B">;
+def int_hexagon_V6_vinsertwr_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vinsertwr_128B">;
+
+def int_hexagon_V6_vsubh_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubh_dv">;
+
+def int_hexagon_V6_vsubh_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">;
+
+def int_hexagon_V6_vshuffb :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vshuffb">;
+
+def int_hexagon_V6_vshuffb_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vshuffb_128B">;
+
+def int_hexagon_V6_vand :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vand">;
+
+def int_hexagon_V6_vand_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vand_128B">;
+
+def int_hexagon_V6_vmpyhv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhv">;
+
+def int_hexagon_V6_vmpyhv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhv_128B">;
+
+def int_hexagon_V6_vdmpyhsuisat_acc :
+Hexagon_v16i32_v16i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">;
+
+def int_hexagon_V6_vdmpyhsuisat_acc_128B :
+Hexagon_v32i32_v32i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">;
+
+def int_hexagon_V6_vsububsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsububsat_dv">;
+
+def int_hexagon_V6_vsububsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">;
+
+def int_hexagon_V6_vgtb_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+
+def int_hexagon_V6_vgtb_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+
+def int_hexagon_V6_vdsaduh_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc">;
+
+def int_hexagon_V6_vdsaduh_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">;
+
+def int_hexagon_V6_vrmpyub :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vrmpyub">;
+
+def int_hexagon_V6_vrmpyub_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vrmpyub_128B">;
+
+def int_hexagon_V6_vmpyuh_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuh_acc">;
+
+def int_hexagon_V6_vmpyuh_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0h,VI_ftype_VI,1)
-// tag : V6_vcl0h
 def int_hexagon_V6_vcl0h :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0h">;
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcl0h">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vcl0h_128B,VI_ftype_VI,1)
-// tag : V6_vcl0h_128B
 def int_hexagon_V6_vcl0h_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0h_128B">;
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcl0h_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamtw,VI_ftype_VI,1)
-// tag : V6_vnormamtw
-def int_hexagon_V6_vnormamtw :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamtw">;
+def int_hexagon_V6_vmpyhus_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyhus_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamtw_128B,VI_ftype_VI,1)
-// tag : V6_vnormamtw_128B
-def int_hexagon_V6_vnormamtw_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamtw_128B">;
+def int_hexagon_V6_vmpyhus_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamth,VI_ftype_VI,1)
-// tag : V6_vnormamth
-def int_hexagon_V6_vnormamth :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamth">;
+def int_hexagon_V6_vmpybv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybv_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnormamth_128B,VI_ftype_VI,1)
-// tag : V6_vnormamth_128B
-def int_hexagon_V6_vnormamth_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamth_128B">;
+def int_hexagon_V6_vmpybv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
+
+def int_hexagon_V6_vrsadubi :
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi">;
+
+def int_hexagon_V6_vrsadubi_128B :
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+
+def int_hexagon_V6_vdmpyhb_dv_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
+
+def int_hexagon_V6_vdmpyhb_dv_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">;
+
+def int_hexagon_V6_vshufeh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufeh">;
+
+def int_hexagon_V6_vshufeh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshufeh_128B">;
+
+def int_hexagon_V6_vmpyewuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyewuh">;
+
+def int_hexagon_V6_vmpyewuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">;
+
+def int_hexagon_V6_vmpyhsrs :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyhsrs">;
+
+def int_hexagon_V6_vmpyhsrs_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">;
+
+def int_hexagon_V6_vdmpybus_dv_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">;
+
+def int_hexagon_V6_vdmpybus_dv_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">;
+
+def int_hexagon_V6_vaddubh :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddubh">;
+
+def int_hexagon_V6_vaddubh_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubh_128B">;
+
+def int_hexagon_V6_vasrwh :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwh">;
+
+def int_hexagon_V6_vasrwh_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwh_128B">;
+
+def int_hexagon_V6_ld0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ld0">;
+
+def int_hexagon_V6_ld0_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_ld0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpopcounth,VI_ftype_VI,1)
-// tag : V6_vpopcounth
 def int_hexagon_V6_vpopcounth :
-Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vpopcounth">;
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vpopcounth">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vpopcounth_128B,VI_ftype_VI,1)
-// tag : V6_vpopcounth_128B
 def int_hexagon_V6_vpopcounth_128B :
-Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vpopcounth_128B">;
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vpopcounth_128B">;
+
+def int_hexagon_V6_ldnt0 :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_ldnt0">;
+
+def int_hexagon_V6_ldnt0_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_ldnt0_128B">;
+
+def int_hexagon_V6_vgth_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_and">;
+
+def int_hexagon_V6_vgth_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+
+def int_hexagon_V6_vaddubsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubsat_dv">;
+
+def int_hexagon_V6_vaddubsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">;
+
+def int_hexagon_V6_vpackeh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackeh">;
+
+def int_hexagon_V6_vpackeh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackeh_128B">;
+
+def int_hexagon_V6_vmpyh :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyh">;
+
+def int_hexagon_V6_vmpyh_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyh_128B">;
+
+def int_hexagon_V6_vminh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminh">;
+
+def int_hexagon_V6_vminh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminh_128B">;
+
+def int_hexagon_V6_pred_scalar2 :
+Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+
+def int_hexagon_V6_pred_scalar2_128B :
+Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+
+def int_hexagon_V6_vdealh :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vdealh">;
+
+def int_hexagon_V6_vdealh_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vdealh_128B">;
+
+def int_hexagon_V6_vpackwh_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vpackwh_sat">;
+
+def int_hexagon_V6_vpackwh_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">;
+
+def int_hexagon_V6_vaslh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vaslh">;
+
+def int_hexagon_V6_vaslh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vaslh_128B">;
+
+def int_hexagon_V6_vgtuw_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+
+def int_hexagon_V6_vgtuw_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+
+def int_hexagon_V6_vor :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vor">;
+
+def int_hexagon_V6_vor_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vor_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb
 def int_hexagon_V6_vlutvvb :
-Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb_128B
 def int_hexagon_V6_vlutvvb_128B :
-Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_128B">;
+
+def int_hexagon_V6_vmpyiowh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiowh">;
+
+def int_hexagon_V6_vmpyiowh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracc
 def int_hexagon_V6_vlutvvb_oracc :
-Hexagon_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">;
+Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc_128B,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracc_128B
 def int_hexagon_V6_vlutvvb_oracc_128B :
-Hexagon_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">;
+Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh
-def int_hexagon_V6_vlutvwh :
-Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh">;
+def int_hexagon_V6_vandvrt :
+Hexagon_v512i1_v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_128B,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh_128B
-def int_hexagon_V6_vlutvwh_128B :
-Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_128B">;
+def int_hexagon_V6_vandvrt_128B :
+Hexagon_v1024i1_v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracc
-def int_hexagon_V6_vlutvwh_oracc :
-Hexagon_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">;
+def int_hexagon_V6_veqh_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc_128B,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracc_128B
-def int_hexagon_V6_vlutvwh_oracc_128B :
-Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">;
+def int_hexagon_V6_veqh_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
 
-//
-// Masked vector stores
-//
-def int_hexagon_V6_vS32b_qpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai">;
+def int_hexagon_V6_vadduhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduhw">;
 
-def int_hexagon_V6_vS32b_nqpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai">;
+def int_hexagon_V6_vadduhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhw_128B">;
 
-def int_hexagon_V6_vS32b_nt_qpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai">;
+def int_hexagon_V6_vcl0w :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcl0w">;
 
-def int_hexagon_V6_vS32b_nt_nqpred_ai :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai">;
+def int_hexagon_V6_vcl0w_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcl0w_128B">;
 
-def int_hexagon_V6_vS32b_qpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B">;
+def int_hexagon_V6_vmpyihb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyihb">;
 
-def int_hexagon_V6_vS32b_nqpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B">;
+def int_hexagon_V6_vmpyihb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyihb_128B">;
 
-def int_hexagon_V6_vS32b_nt_qpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B">;
+def int_hexagon_V6_vtmpybus :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpybus">;
 
-def int_hexagon_V6_vS32b_nt_nqpred_ai_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B">;
+def int_hexagon_V6_vtmpybus_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vtmpybus_128B">;
 
-def int_hexagon_V6_vmaskedstoreq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
+def int_hexagon_V6_vd0 :
+Hexagon_v16i32__Intrinsic<"HEXAGON_V6_vd0">;
 
-def int_hexagon_V6_vmaskedstorenq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
+def int_hexagon_V6_vd0_128B :
+Hexagon_v32i32__Intrinsic<"HEXAGON_V6_vd0_128B">;
 
-def int_hexagon_V6_vmaskedstorentq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
+def int_hexagon_V6_veqh_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_or">;
 
-def int_hexagon_V6_vmaskedstorentnq :
-Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
+def int_hexagon_V6_veqh_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
 
-def int_hexagon_V6_vmaskedstoreq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
+def int_hexagon_V6_vgtw_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_or">;
 
-def int_hexagon_V6_vmaskedstorenq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
+def int_hexagon_V6_vgtw_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
 
-def int_hexagon_V6_vmaskedstorentq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
+def int_hexagon_V6_vdmpybus :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpybus">;
 
-def int_hexagon_V6_vmaskedstorentnq_128B :
-Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
+def int_hexagon_V6_vdmpybus_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpybus_128B">;
 
-multiclass Hexagon_custom_circ_ld_Intrinsic<LLVMType ElTy> {
-  def NAME#_pci : Hexagon_NonGCC_Intrinsic<
-    [ElTy, llvm_ptr_ty],
-    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty],
-    [IntrArgMemOnly, NoCapture<3>]>;
-  def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
-    [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty],
-    [IntrArgMemOnly, NoCapture<2>]>;
-}
+def int_hexagon_V6_vgtub_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_or">;
 
-defm int_hexagon_L2_loadrub : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadrb : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadruh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadrh : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadri : Hexagon_custom_circ_ld_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_L2_loadrd : Hexagon_custom_circ_ld_Intrinsic<llvm_i64_ty>;
+def int_hexagon_V6_vgtub_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
 
-multiclass Hexagon_custom_circ_st_Intrinsic<LLVMType ElTy> {
-  def NAME#_pci : Hexagon_NonGCC_Intrinsic<
-    [llvm_ptr_ty],
-    [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
-    [IntrArgMemOnly, NoCapture<4>]>;
-  def NAME#_pcr : Hexagon_NonGCC_Intrinsic<
-    [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, ElTy, llvm_ptr_ty],
-    [IntrArgMemOnly, NoCapture<3>]>;
-}
+def int_hexagon_V6_vmpybus :
+Hexagon_v32i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpybus">;
 
-defm int_hexagon_S2_storerb : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storerh : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storerf : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storeri : Hexagon_custom_circ_st_Intrinsic<llvm_i32_ty>;
-defm int_hexagon_S2_storerd : Hexagon_custom_circ_st_Intrinsic<llvm_i64_ty>;
+def int_hexagon_V6_vmpybus_128B :
+Hexagon_v64i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpybus_128B">;
 
-// The front-end emits the intrinsic call with only two arguments. The third
-// argument from the builtin is already used by front-end to write to memory
-// by generating a store.
-class Hexagon_custom_brev_ld_Intrinsic<LLVMType ElTy>
- : Hexagon_NonGCC_Intrinsic<
-    [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
-    [IntrReadMem]>;
+def int_hexagon_V6_vdmpyhb_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">;
 
-def int_hexagon_L2_loadrub_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadrb_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadruh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadrh_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadri_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i32_ty>;
-def int_hexagon_L2_loadrd_pbr : Hexagon_custom_brev_ld_Intrinsic<llvm_i64_ty>;
+def int_hexagon_V6_vdmpyhb_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">;
 
-def int_hexagon_S2_storerb_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stb">;
-def int_hexagon_S2_storerh_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sth">;
-def int_hexagon_S2_storerf_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_sthhi">;
-def int_hexagon_S2_storeri_pbr : Hexagon_mem_memsisi_Intrinsic<"brev_stw">;
-def int_hexagon_S2_storerd_pbr : Hexagon_mem_memdisi_Intrinsic<"brev_std">;
+def int_hexagon_V6_vandvrt_acc :
+Hexagon_v512i1_v512i1v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
 
+def int_hexagon_V6_vandvrt_acc_128B :
+Hexagon_v1024i1_v1024i1v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
 
-///
-/// HexagonV62 intrinsics
-///
+def int_hexagon_V6_vassign :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vassign">;
 
-//
-// Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
-// tag : M6_vabsdiffb
-class Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vassign_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vassign_128B">;
 
-//
-// Hexagon_LLii_Intrinsic<string GCCIntSuffix>
-// tag : S6_vsplatrbp
-class Hexagon_LLii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i64_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddwnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwnq">;
 
-//
-// Hexagon_V62_v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlsrb
-class Hexagon_V62_v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddwnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
 
-//
-// Hexagon_V62_v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlsrb_128B
-class Hexagon_V62_v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vgtub_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_and">;
 
-//
-// Hexagon_V62_v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasrwuhrndsat
-class Hexagon_V62_v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vgtub_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
 
-//
-// Hexagon_V62_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasrwuhrndsat_128B
-class Hexagon_V62_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vdmpyhb_dv :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">;
 
-//
-// Hexagon_V62_v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrounduwuh
-class Hexagon_V62_v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vdmpyhb_dv_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">;
 
-//
-// Hexagon_V62_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrounduwuh_128B
-class Hexagon_V62_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vunpackb :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackb">;
 
-//
-// Hexagon_V62_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
-// tag : V6_vadduwsat_dv_128B
-class Hexagon_V62_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vunpackb_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackb_128B">;
 
-//
-// Hexagon_V62_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddhw_acc
-class Hexagon_V62_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vunpackh :
+Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vunpackh">;
 
-//
-// Hexagon_V62_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vaddhw_acc_128B
-class Hexagon_V62_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vunpackh_128B :
+Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vunpackh_128B">;
 
-//
-// Hexagon_V62_v1024v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyewuh_64
-class Hexagon_V62_v1024v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpahb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpahb_acc">;
 
-//
-// Hexagon_V62_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyewuh_64_128B
-class Hexagon_V62_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpahb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">;
 
-//
-// Hexagon_V62_v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpauhb_128B
-class Hexagon_V62_v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddbnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbnq">;
 
-//
-// Hexagon_V62_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpauhb_acc_128B
-class Hexagon_V62_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddbnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
 
-//
-// Hexagon_V62_v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt
-class Hexagon_V62_v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vlalignbi :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi">;
 
-//
-// Hexagon_V62_v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt_128B
-class Hexagon_V62_v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vlalignbi_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
 
-//
-// Hexagon_V62_v512v512v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt_acc
-class Hexagon_V62_v512v512v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vsatwh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatwh">;
 
-//
-// Hexagon_V62_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandnqrt_acc_128B
-class Hexagon_V62_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vsatwh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsatwh_128B">;
 
-//
-// Hexagon_V62_v512v64iv512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvqv
-class Hexagon_V62_v512v64iv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vgtuh :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh">;
 
-//
-// Hexagon_V62_v1024v128iv1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vandvqv_128B
-class Hexagon_V62_v1024v128iv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vgtuh_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
 
-//
-// Hexagon_V62_v64ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2v2
-class Hexagon_V62_v64ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpyihb_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyihb_acc">;
 
-//
-// Hexagon_V62_v128ii_Intrinsic<string GCCIntSuffix>
-// tag : V6_pred_scalar2v2_128B
-class Hexagon_V62_v128ii_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpyihb_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">;
 
-//
-// Hexagon_V62_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
-// tag : V6_shuffeqw
-class Hexagon_V62_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vrmpybusv_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">;
 
-//
-// Hexagon_V62_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_shuffeqw_128B
-class Hexagon_V62_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vrmpybusv_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">;
 
-//
-// Hexagon_V62_v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplath
-class Hexagon_V62_v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vrdelta :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrdelta">;
 
-//
-// Hexagon_V62_v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_lvsplath_128B
-class Hexagon_V62_v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vrdelta_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrdelta_128B">;
 
-//
-// Hexagon_V62_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvvb_oracci
-class Hexagon_V62_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vroundwh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundwh">;
 
-//
-// Hexagon_V62_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvvb_oracci_128B
-class Hexagon_V62_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vroundwh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundwh_128B">;
 
-//
-// Hexagon_V62_v1024v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwhi
-class Hexagon_V62_v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddw_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddw_dv">;
 
-//
-// Hexagon_V62_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwhi_128B
-class Hexagon_V62_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddw_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">;
 
-//
-// Hexagon_V62_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwh_oracci
-class Hexagon_V62_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwb_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">;
 
-//
-// Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlutvwh_oracci_128B
-class Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwb_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">;
 
-// Hexagon_v512v64iv512v512v64i_Intrinsic<string GCCIntSuffix>
-// tag: V6_vaddcarry
-class Hexagon_v512v64iv512v512v64i_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty, llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v512i1_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vsubbq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbq">;
 
-// Hexagon_v1024v128iv1024v1024v128i_Intrinsic<string GCCIntSuffix>
-// tag: V6_vaddcarry_128B
-class Hexagon_v1024v128iv1024v1024v128i_Intrinsic<string GCCIntSuffix>
-  : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty, llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v1024i1_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vsubbq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
 
+def int_hexagon_V6_veqh_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2)
-// tag : M6_vabsdiffb
-def int_hexagon_M6_vabsdiffb :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffb">;
+def int_hexagon_V6_veqh_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.M6_vabsdiffub,DI_ftype_DIDI,2)
-// tag : M6_vabsdiffub
-def int_hexagon_M6_vabsdiffub :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffub">;
+def int_hexagon_V6_valignbi :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi">;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_vtrunehb_ppp,DI_ftype_DIDI,2)
-// tag : S6_vtrunehb_ppp
-def int_hexagon_S6_vtrunehb_ppp :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">;
+def int_hexagon_V6_valignbi_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_vtrunohb_ppp,DI_ftype_DIDI,2)
-// tag : S6_vtrunohb_ppp
-def int_hexagon_S6_vtrunohb_ppp :
-Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">;
+def int_hexagon_V6_vaddwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.S6_vsplatrbp,DI_ftype_SI,1)
-// tag : S6_vsplatrbp
-def int_hexagon_S6_vsplatrbp :
-Hexagon_LLii_Intrinsic<"HEXAGON_S6_vsplatrbp">;
+def int_hexagon_V6_vaddwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrb,VI_ftype_VISI,2)
-// tag : V6_vlsrb
-def int_hexagon_V6_vlsrb :
-Hexagon_V62_v512v512i_Intrinsic<"HEXAGON_V6_vlsrb">;
+def int_hexagon_V6_veqw_and :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_and">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlsrb_128B,VI_ftype_VISI,2)
-// tag : V6_vlsrb_128B
-def int_hexagon_V6_vlsrb_128B :
-Hexagon_V62_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrb_128B">;
+def int_hexagon_V6_veqw_and_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhrndsat
-def int_hexagon_V6_vasrwuhrndsat :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhrndsat">;
+def int_hexagon_V6_vabsdiffub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrwuhrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrwuhrndsat_128B
-def int_hexagon_V6_vasrwuhrndsat_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhrndsat_128B">;
+def int_hexagon_V6_vabsdiffub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhrndsat
-def int_hexagon_V6_vasruwuhrndsat :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruwuhrndsat">;
+def int_hexagon_V6_vshuffeb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshuffeb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhrndsat_128B
-def int_hexagon_V6_vasruwuhrndsat_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruwuhrndsat_128B">;
+def int_hexagon_V6_vshuffeb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vshuffeb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbsat,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbsat
-def int_hexagon_V6_vasrhbsat :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbsat">;
+def int_hexagon_V6_vabsdiffuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrhbsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrhbsat_128B
-def int_hexagon_V6_vasrhbsat_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbsat_128B">;
+def int_hexagon_V6_vabsdiffuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduwuh,VI_ftype_VIVI,2)
-// tag : V6_vrounduwuh
-def int_hexagon_V6_vrounduwuh :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vrounduwuh">;
+def int_hexagon_V6_veqw_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduwuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vrounduwuh_128B
-def int_hexagon_V6_vrounduwuh_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrounduwuh_128B">;
+def int_hexagon_V6_veqw_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduhub,VI_ftype_VIVI,2)
-// tag : V6_vrounduhub
-def int_hexagon_V6_vrounduhub :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vrounduhub">;
+def int_hexagon_V6_vgth :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrounduhub_128B,VI_ftype_VIVI,2)
-// tag : V6_vrounduhub_128B
-def int_hexagon_V6_vrounduhub_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrounduhub_128B">;
+def int_hexagon_V6_vgth_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat,VI_ftype_VIVI,2)
-// tag : V6_vadduwsat
-def int_hexagon_V6_vadduwsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vadduwsat">;
+def int_hexagon_V6_vgtuw_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vadduwsat_128B
-def int_hexagon_V6_vadduwsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduwsat_128B">;
+def int_hexagon_V6_vgtuw_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vadduwsat_dv
-def int_hexagon_V6_vadduwsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduwsat_dv">;
+def int_hexagon_V6_vgtb :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vadduwsat_dv_128B
-def int_hexagon_V6_vadduwsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduwsat_dv_128B">;
+def int_hexagon_V6_vgtb_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat,VI_ftype_VIVI,2)
-// tag : V6_vsubuwsat
-def int_hexagon_V6_vsubuwsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuwsat">;
+def int_hexagon_V6_vgtw :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubuwsat_128B
-def int_hexagon_V6_vsubuwsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuwsat_128B">;
+def int_hexagon_V6_vgtw_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubuwsat_dv
-def int_hexagon_V6_vsubuwsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuwsat_dv">;
+def int_hexagon_V6_vsubwq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubuwsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubuwsat_dv_128B
-def int_hexagon_V6_vsubuwsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuwsat_dv_128B">;
+def int_hexagon_V6_vsubwq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat,VI_ftype_VIVI,2)
-// tag : V6_vaddbsat
-def int_hexagon_V6_vaddbsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddbsat">;
+def int_hexagon_V6_vnot :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vnot">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddbsat_128B
-def int_hexagon_V6_vaddbsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddbsat_128B">;
+def int_hexagon_V6_vnot_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vnot_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vaddbsat_dv
-def int_hexagon_V6_vaddbsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddbsat_dv">;
+def int_hexagon_V6_vgtb_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddbsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vaddbsat_dv_128B
-def int_hexagon_V6_vaddbsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddbsat_dv_128B">;
+def int_hexagon_V6_vgtb_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat,VI_ftype_VIVI,2)
-// tag : V6_vsubbsat
-def int_hexagon_V6_vsubbsat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsubbsat">;
+def int_hexagon_V6_vgtuw_or :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_or">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubbsat_128B
-def int_hexagon_V6_vsubbsat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubbsat_128B">;
+def int_hexagon_V6_vgtuw_or_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat_dv,VD_ftype_VDVD,2)
-// tag : V6_vsubbsat_dv
-def int_hexagon_V6_vsubbsat_dv :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubbsat_dv">;
+def int_hexagon_V6_vaddubsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddubsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubbsat_dv_128B,VD_ftype_VDVD,2)
-// tag : V6_vsubbsat_dv_128B
-def int_hexagon_V6_vsubbsat_dv_128B :
-Hexagon_V62_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubbsat_dv_128B">;
+def int_hexagon_V6_vaddubsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddububb_sat,VI_ftype_VIVI,2)
-// tag : V6_vaddububb_sat
-def int_hexagon_V6_vaddububb_sat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
+def int_hexagon_V6_vmaxw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddububb_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddububb_sat_128B
-def int_hexagon_V6_vaddububb_sat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddububb_sat_128B">;
+def int_hexagon_V6_vmaxw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxw_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubububb_sat,VI_ftype_VIVI,2)
-// tag : V6_vsubububb_sat
-def int_hexagon_V6_vsubububb_sat :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsubububb_sat">;
+def int_hexagon_V6_vaslwv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaslwv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubububb_sat_128B,VI_ftype_VIVI,2)
-// tag : V6_vsubububb_sat_128B
-def int_hexagon_V6_vsubububb_sat_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubububb_sat_128B">;
+def int_hexagon_V6_vaslwv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaslwv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vaddhw_acc
-def int_hexagon_V6_vaddhw_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw_acc">;
+def int_hexagon_V6_vabsw_sat :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsw_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddhw_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vaddhw_acc_128B
-def int_hexagon_V6_vaddhw_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_acc_128B">;
+def int_hexagon_V6_vabsw_sat_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vadduhw_acc
-def int_hexagon_V6_vadduhw_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw_acc">;
+def int_hexagon_V6_vsubwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwsat_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vadduhw_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vadduhw_acc_128B
-def int_hexagon_V6_vadduhw_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_acc_128B">;
+def int_hexagon_V6_vsubwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vaddubh_acc
-def int_hexagon_V6_vaddubh_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh_acc">;
+def int_hexagon_V6_vroundhub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vroundhub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddubh_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vaddubh_acc_128B
-def int_hexagon_V6_vaddubh_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_acc_128B">;
+def int_hexagon_V6_vroundhub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vroundhub_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_64,VD_ftype_VIVI,2)
-// tag : V6_vmpyewuh_64
-def int_hexagon_V6_vmpyewuh_64 :
-Hexagon_V62_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh_64">;
+def int_hexagon_V6_vdmpyhisat_acc :
+Hexagon_v16i32_v16i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_64_128B,VD_ftype_VIVI,2)
-// tag : V6_vmpyewuh_64_128B
-def int_hexagon_V6_vmpyewuh_64_128B :
-Hexagon_V62_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_64_128B">;
+def int_hexagon_V6_vdmpyhisat_acc_128B :
+Hexagon_v32i32_v32i32v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_64_acc,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyowh_64_acc
-def int_hexagon_V6_vmpyowh_64_acc :
-Hexagon_V62_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc">;
+def int_hexagon_V6_vmpabus :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpabus">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyowh_64_acc_128B,VD_ftype_VDVIVI,3)
-// tag : V6_vmpyowh_64_acc_128B
-def int_hexagon_V6_vmpyowh_64_acc_128B :
-Hexagon_V62_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc_128B">;
+def int_hexagon_V6_vmpabus_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpabus_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb,VD_ftype_VDSI,2)
-// tag : V6_vmpauhb
-def int_hexagon_V6_vmpauhb :
-Hexagon_V62_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpauhb">;
+def int_hexagon_V6_vassignp :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vassignp">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpauhb_128B
-def int_hexagon_V6_vmpauhb_128B :
-Hexagon_V62_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpauhb_128B">;
+def int_hexagon_V6_vassignp_128B :
+Hexagon_v64i32_v64i32_Intrinsic<"HEXAGON_V6_vassignp_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpauhb_acc
-def int_hexagon_V6_vmpauhb_acc :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpauhb_acc">;
+def int_hexagon_V6_veqb :
+Hexagon_v512i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhb_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpauhb_acc_128B
-def int_hexagon_V6_vmpauhb_acc_128B :
-Hexagon_V62_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpauhb_acc_128B">;
+def int_hexagon_V6_veqb_128B :
+Hexagon_v1024i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub,VI_ftype_VISI,2)
-// tag : V6_vmpyiwub
-def int_hexagon_V6_vmpyiwub :
-Hexagon_V62_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwub">;
+def int_hexagon_V6_vsububh :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsububh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyiwub_128B
-def int_hexagon_V6_vmpyiwub_128B :
-Hexagon_V62_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwub_128B">;
+def int_hexagon_V6_vsububh_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsububh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwub_acc
-def int_hexagon_V6_vmpyiwub_acc :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwub_acc">;
+def int_hexagon_V6_lvsplatw :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_lvsplatw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyiwub_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyiwub_acc_128B
-def int_hexagon_V6_vmpyiwub_acc_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwub_acc_128B">;
+def int_hexagon_V6_lvsplatw_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplatw_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt,VI_ftype_QVSI,2)
-// tag : V6_vandnqrt
-def int_hexagon_V6_vandnqrt :
-Hexagon_V62_v512v64ii_Intrinsic<"HEXAGON_V6_vandnqrt">;
+def int_hexagon_V6_vaddhnq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhnq">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt_128B,VI_ftype_QVSI,2)
-// tag : V6_vandnqrt_128B
-def int_hexagon_V6_vandnqrt_128B :
-Hexagon_V62_v1024v128ii_Intrinsic<"HEXAGON_V6_vandnqrt_128B">;
+def int_hexagon_V6_vaddhnq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+
+def int_hexagon_V6_vdmpyhsusat :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat">;
+
+def int_hexagon_V6_vdmpyhsusat_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">;
+
+def int_hexagon_V6_pred_not :
+Hexagon_v512i1_v512i1_Intrinsic<"HEXAGON_V6_pred_not">;
+
+def int_hexagon_V6_pred_not_128B :
+Hexagon_v1024i1_v1024i1_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+
+def int_hexagon_V6_vlutvwh_oracc :
+Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">;
+
+def int_hexagon_V6_vlutvwh_oracc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">;
+
+def int_hexagon_V6_vmpyiewh_acc :
+Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">;
+
+def int_hexagon_V6_vmpyiewh_acc_128B :
+Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">;
+
+def int_hexagon_V6_vdealvdd :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdealvdd">;
+
+def int_hexagon_V6_vdealvdd_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdealvdd_128B">;
+
+def int_hexagon_V6_vavgw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgw">;
+
+def int_hexagon_V6_vavgw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgw_128B">;
+
+def int_hexagon_V6_vdmpyhsusat_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">;
+
+def int_hexagon_V6_vdmpyhsusat_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">;
+
+def int_hexagon_V6_vgtw_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+
+def int_hexagon_V6_vgtw_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+
+def int_hexagon_V6_vtmpyhb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">;
+
+def int_hexagon_V6_vtmpyhb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">;
+
+def int_hexagon_V6_vaddhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhw">;
+
+def int_hexagon_V6_vaddhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhw_128B">;
+
+def int_hexagon_V6_vaddhq :
+Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhq">;
+
+def int_hexagon_V6_vaddhq_128B :
+Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+
+def int_hexagon_V6_vrmpyubv :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpyubv">;
+
+def int_hexagon_V6_vrmpyubv_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">;
+
+def int_hexagon_V6_vsubh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubh">;
+
+def int_hexagon_V6_vsubh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubh_128B">;
+
+def int_hexagon_V6_vrmpyubi :
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+
+def int_hexagon_V6_vrmpyubi_128B :
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+
+def int_hexagon_V6_vminw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminw">;
+
+def int_hexagon_V6_vminw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminw_128B">;
+
+def int_hexagon_V6_vmpyubv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyubv_acc">;
+
+def int_hexagon_V6_vmpyubv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">;
+
+def int_hexagon_V6_pred_xor :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_pred_xor">;
+
+def int_hexagon_V6_pred_xor_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+
+def int_hexagon_V6_veqb_xor :
+Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_xor">;
+
+def int_hexagon_V6_veqb_xor_128B :
+Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+
+def int_hexagon_V6_vmpyiewuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyiewuh">;
+
+def int_hexagon_V6_vmpyiewuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">;
+
+def int_hexagon_V6_vmpybusv_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpybusv_acc">;
+
+def int_hexagon_V6_vmpybusv_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">;
+
+def int_hexagon_V6_vavguhrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguhrnd">;
+
+def int_hexagon_V6_vavguhrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">;
+
+def int_hexagon_V6_vmpyowh_rnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">;
+
+def int_hexagon_V6_vmpyowh_rnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">;
+
+def int_hexagon_V6_vsubwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwsat">;
+
+def int_hexagon_V6_vsubwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwsat_128B">;
+
+def int_hexagon_V6_vsubuhw :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuhw">;
+
+def int_hexagon_V6_vsubuhw_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
+
+def int_hexagon_V6_vrmpybusi_acc :
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+
+def int_hexagon_V6_vrmpybusi_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+
+def int_hexagon_V6_vasrw :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrw">;
+
+def int_hexagon_V6_vasrw_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vasrw_128B">;
+
+def int_hexagon_V6_vasrh :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrh">;
+
+def int_hexagon_V6_vasrh_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vasrh_128B">;
+
+def int_hexagon_V6_vmpyuhv :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyuhv">;
+
+def int_hexagon_V6_vmpyuhv_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">;
+
+def int_hexagon_V6_vasrhbrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhbrndsat">;
+
+def int_hexagon_V6_vasrhbrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">;
+
+def int_hexagon_V6_vsubuhsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">;
+
+def int_hexagon_V6_vsubuhsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">;
+
+def int_hexagon_V6_vabsdiffw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffw">;
+
+def int_hexagon_V6_vabsdiffw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">;
+
+// V62 HVX Instructions.
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt_acc,VI_ftype_VIQVSI,3)
-// tag : V6_vandnqrt_acc
 def int_hexagon_V6_vandnqrt_acc :
-Hexagon_V62_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandnqrt_acc">;
+Hexagon_v16i32_v16i32v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandnqrt_acc_128B,VI_ftype_VIQVSI,3)
-// tag : V6_vandnqrt_acc_128B
 def int_hexagon_V6_vandnqrt_acc_128B :
-Hexagon_V62_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">;
+Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvqv,VI_ftype_QVVI,2)
-// tag : V6_vandvqv
-def int_hexagon_V6_vandvqv :
-Hexagon_V62_v512v64iv512_Intrinsic<"HEXAGON_V6_vandvqv">;
+def int_hexagon_V6_vaddclbh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddclbh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvqv_128B,VI_ftype_QVVI,2)
-// tag : V6_vandvqv_128B
-def int_hexagon_V6_vandvqv_128B :
-Hexagon_V62_v1024v128iv1024_Intrinsic<"HEXAGON_V6_vandvqv_128B">;
+def int_hexagon_V6_vaddclbh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddclbh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvnqv,VI_ftype_QVVI,2)
-// tag : V6_vandvnqv
-def int_hexagon_V6_vandvnqv :
-Hexagon_V62_v512v64iv512_Intrinsic<"HEXAGON_V6_vandvnqv">;
+def int_hexagon_V6_vmpyowh_64_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vandvnqv_128B,VI_ftype_QVVI,2)
-// tag : V6_vandvnqv_128B
-def int_hexagon_V6_vandvnqv_128B :
-Hexagon_V62_v1024v128iv1024_Intrinsic<"HEXAGON_V6_vandvnqv_128B">;
+def int_hexagon_V6_vmpyowh_64_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyowh_64_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2v2,QV_ftype_SI,1)
-// tag : V6_pred_scalar2v2
-def int_hexagon_V6_pred_scalar2v2 :
-Hexagon_V62_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2v2">;
+def int_hexagon_V6_vmpyewuh_64 :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyewuh_64">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_pred_scalar2v2_128B,QV_ftype_SI,1)
-// tag : V6_pred_scalar2v2_128B
-def int_hexagon_V6_pred_scalar2v2_128B :
-Hexagon_V62_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">;
+def int_hexagon_V6_vmpyewuh_64_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyewuh_64_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqw,QV_ftype_QVQV,2)
-// tag : V6_shuffeqw
-def int_hexagon_V6_shuffeqw :
-Hexagon_V62_v64iv64iv64i_Intrinsic<"HEXAGON_V6_shuffeqw">;
+def int_hexagon_V6_vsatuwuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatuwuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqw_128B,QV_ftype_QVQV,2)
-// tag : V6_shuffeqw_128B
-def int_hexagon_V6_shuffeqw_128B :
-Hexagon_V62_v128iv128iv128i_Intrinsic<"HEXAGON_V6_shuffeqw_128B">;
+def int_hexagon_V6_vsatuwuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsatuwuh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqh,QV_ftype_QVQV,2)
-// tag : V6_shuffeqh
 def int_hexagon_V6_shuffeqh :
-Hexagon_V62_v64iv64iv64i_Intrinsic<"HEXAGON_V6_shuffeqh">;
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_shuffeqh_128B,QV_ftype_QVQV,2)
-// tag : V6_shuffeqh_128B
 def int_hexagon_V6_shuffeqh_128B :
-Hexagon_V62_v128iv128iv128i_Intrinsic<"HEXAGON_V6_shuffeqh_128B">;
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxb,VI_ftype_VIVI,2)
-// tag : V6_vmaxb
-def int_hexagon_V6_vmaxb :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxb">;
+def int_hexagon_V6_shuffeqw :
+Hexagon_v512i1_v512i1v512i1_Intrinsic<"HEXAGON_V6_shuffeqw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmaxb_128B,VI_ftype_VIVI,2)
-// tag : V6_vmaxb_128B
-def int_hexagon_V6_vmaxb_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxb_128B">;
+def int_hexagon_V6_shuffeqw_128B :
+Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<"HEXAGON_V6_shuffeqw_128B">;
+
+def int_hexagon_V6_ldcnpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnpnt0">;
+
+def int_hexagon_V6_ldcnpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnpnt0_128B">;
+
+def int_hexagon_V6_vsubcarry :
+Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
+
+def int_hexagon_V6_vsubcarry_128B :
+Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
+
+def int_hexagon_V6_vasrhbsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhbsat">;
+
+def int_hexagon_V6_vasrhbsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrhbsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminb,VI_ftype_VIVI,2)
-// tag : V6_vminb
 def int_hexagon_V6_vminb :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vminb">;
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vminb_128B,VI_ftype_VIVI,2)
-// tag : V6_vminb_128B
 def int_hexagon_V6_vminb_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminb_128B">;
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vminb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatuwuh,VI_ftype_VIVI,2)
-// tag : V6_vsatuwuh
-def int_hexagon_V6_vsatuwuh :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vsatuwuh">;
+def int_hexagon_V6_vmpauhb_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpauhb_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsatuwuh_128B,VI_ftype_VIVI,2)
-// tag : V6_vsatuwuh_128B
-def int_hexagon_V6_vsatuwuh_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatuwuh_128B">;
+def int_hexagon_V6_vmpauhb_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpauhb_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplath,VI_ftype_SI,1)
-// tag : V6_lvsplath
-def int_hexagon_V6_lvsplath :
-Hexagon_V62_v512i_Intrinsic<"HEXAGON_V6_lvsplath">;
+def int_hexagon_V6_vaddhw_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhw_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplath_128B,VI_ftype_SI,1)
-// tag : V6_lvsplath_128B
-def int_hexagon_V6_lvsplath_128B :
-Hexagon_V62_v1024i_Intrinsic<"HEXAGON_V6_lvsplath_128B">;
+def int_hexagon_V6_vaddhw_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhw_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatb,VI_ftype_SI,1)
-// tag : V6_lvsplatb
-def int_hexagon_V6_lvsplatb :
-Hexagon_V62_v512i_Intrinsic<"HEXAGON_V6_lvsplatb">;
+def int_hexagon_V6_vlsrb :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vlsrb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_lvsplatb_128B,VI_ftype_SI,1)
-// tag : V6_lvsplatb_128B
-def int_hexagon_V6_lvsplatb_128B :
-Hexagon_V62_v1024i_Intrinsic<"HEXAGON_V6_lvsplatb_128B">;
+def int_hexagon_V6_vlsrb_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbw,VI_ftype_VIVI,2)
-// tag : V6_vaddclbw
-def int_hexagon_V6_vaddclbw :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddclbw">;
+def int_hexagon_V6_vlutvwhi :
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbw_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddclbw_128B
-def int_hexagon_V6_vaddclbw_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddclbw_128B">;
+def int_hexagon_V6_vlutvwhi_128B :
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbh,VI_ftype_VIVI,2)
-// tag : V6_vaddclbh
-def int_hexagon_V6_vaddclbh :
-Hexagon_V62_v512v512v512_Intrinsic<"HEXAGON_V6_vaddclbh">;
+def int_hexagon_V6_vaddububb_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddclbh_128B,VI_ftype_VIVI,2)
-// tag : V6_vaddclbh_128B
-def int_hexagon_V6_vaddclbh_128B :
-Hexagon_V62_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddclbh_128B">;
+def int_hexagon_V6_vaddububb_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddububb_sat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvbi,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvbi
-def int_hexagon_V6_vlutvvbi :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvbi">;
+def int_hexagon_V6_vsubbsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbsat_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvbi_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvbi_128B
-def int_hexagon_V6_vlutvvbi_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvbi_128B">;
+def int_hexagon_V6_vsubbsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubbsat_dv_128B">;
+
+def int_hexagon_V6_ldtp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtp0">;
+
+def int_hexagon_V6_ldtp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtp0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracci,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracci
 def int_hexagon_V6_vlutvvb_oracci :
-Hexagon_V62_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracci">;
+Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracci_128B,VI_ftype_VIVIVISI,4)
-// tag : V6_vlutvvb_oracci_128B
 def int_hexagon_V6_vlutvvb_oracci_128B :
-Hexagon_V62_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B">;
+Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwhi,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwhi
-def int_hexagon_V6_vlutvwhi :
-Hexagon_V62_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwhi">;
+def int_hexagon_V6_vsubuwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuwsat_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwhi_128B,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwhi_128B
-def int_hexagon_V6_vlutvwhi_128B :
-Hexagon_V62_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwhi_128B">;
+def int_hexagon_V6_vsubuwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubuwsat_dv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracci,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracci
-def int_hexagon_V6_vlutvwh_oracci :
-Hexagon_V62_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracci">;
+def int_hexagon_V6_ldpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldpnt0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracci_128B,VD_ftype_VDVIVISI,4)
-// tag : V6_vlutvwh_oracci_128B
-def int_hexagon_V6_vlutvwh_oracci_128B :
-Hexagon_V62_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B">;
+def int_hexagon_V6_ldpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldpnt0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_nm,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb_nm
-def int_hexagon_V6_vlutvvb_nm :
-Hexagon_V62_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_nm">;
+def int_hexagon_V6_vandvnqv :
+Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvnqv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvvb_nm_128B,VI_ftype_VIVISI,3)
-// tag : V6_vlutvvb_nm_128B
-def int_hexagon_V6_vlutvvb_nm_128B :
-Hexagon_V62_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_nm_128B">;
+def int_hexagon_V6_vandvnqv_128B :
+Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvnqv_128B">;
+
+def int_hexagon_V6_lvsplatb :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_lvsplatb">;
+
+def int_hexagon_V6_lvsplatb_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplatb_128B">;
+
+def int_hexagon_V6_lvsplath :
+Hexagon_v16i32_i32_Intrinsic<"HEXAGON_V6_lvsplath">;
+
+def int_hexagon_V6_lvsplath_128B :
+Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplath_128B">;
+
+def int_hexagon_V6_ldtpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtpnt0">;
+
+def int_hexagon_V6_ldtpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtpnt0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_nm,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh_nm
 def int_hexagon_V6_vlutvwh_nm :
-Hexagon_V62_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_nm">;
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_nm">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlutvwh_nm_128B,VD_ftype_VIVISI,3)
-// tag : V6_vlutvwh_nm_128B
 def int_hexagon_V6_vlutvwh_nm_128B :
-Hexagon_V62_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_nm_128B">;
-
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddcarry,VI_ftype_VIVIQV,3)
-// tag: V6_vaddcarry
-def int_hexagon_V6_vaddcarry :
-Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vaddcarry">;
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_nm_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaddcarry_128B,VI_ftype_VIVIQV,3)
-// tag: V6_vaddcarry_128B
-def int_hexagon_V6_vaddcarry_128B :
-Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vaddcarry_128B">;
+def int_hexagon_V6_ldnpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldnpnt0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubcarry,VI_ftype_VIVIQV,3)
-// tag: V6_vsubcarry
-def int_hexagon_V6_vsubcarry :
-Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vsubcarry">;
+def int_hexagon_V6_ldnpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldnpnt0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vsubcarry_128B,VI_ftype_VIVIQV,3)
-// tag: V6_vsubcarry_128B
-def int_hexagon_V6_vsubcarry_128B :
-Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vsubcarry_128B">;
+def int_hexagon_V6_vmpauhb :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpauhb">;
 
+def int_hexagon_V6_vmpauhb_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpauhb_128B">;
 
-///
-/// HexagonV65 intrinsics
-///
+def int_hexagon_V6_ldtnp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnp0">;
 
-//
-// Hexagon_V65_iLLiLLi_Intrinsic<string GCCIntSuffix>
-// tag : A6_vcmpbeq_notany
-class Hexagon_V65_iLLiLLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_ldtnp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnp0_128B">;
 
-//
-// Hexagon_V65_v1024v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt
-class Hexagon_V65_v1024v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vrounduhub :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrounduhub">;
 
-//
-// Hexagon_V65_v2048v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt_128B
-class Hexagon_V65_v2048v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vrounduhub_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrounduhub_128B">;
 
-//
-// Hexagon_V65_v1024v1024v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt_acc
-class Hexagon_V65_v1024v1024v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vadduhw_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduhw_acc">;
 
-//
-// Hexagon_V65_v2048v2048v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vrmpyub_rtt_acc_128B
-class Hexagon_V65_v2048v2048v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vadduhw_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduhw_acc_128B">;
 
-//
-// Hexagon_V65_v512v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasruwuhsat
-class Hexagon_V65_v512v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_ldcp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcp0">;
 
-//
-// Hexagon_V65_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vasruwuhsat_128B
-class Hexagon_V65_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_ldcp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcp0_128B">;
 
-//
-// Hexagon_V65_v512v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vavguw
-class Hexagon_V65_v512v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vadduwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadduwsat">;
 
-//
-// Hexagon_V65_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vavguw_128B
-class Hexagon_V65_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vadduwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduwsat_128B">;
 
-//
-// Hexagon_V65_v512v512_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsb
-class Hexagon_V65_v512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_ldtnpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnpnt0">;
 
-//
-// Hexagon_V65_v1024v1024_Intrinsic<string GCCIntSuffix>
-// tag : V6_vabsb_128B
-class Hexagon_V65_v1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_ldtnpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtnpnt0_128B">;
 
-//
-// Hexagon_V65_v1024v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpabuu
-class Hexagon_V65_v1024v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddbsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbsat">;
 
-//
-// Hexagon_V65_v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpabuu_128B
-class Hexagon_V65_v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddbsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbsat_128B">;
 
-//
-// Hexagon_V65_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpabuu_acc_128B
-class Hexagon_V65_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vandnqrt :
+Hexagon_v16i32_v512i1i32_Intrinsic<"HEXAGON_V6_vandnqrt">;
 
-//
-// Hexagon_V65_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyh_acc
-class Hexagon_V65_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vandnqrt_128B :
+Hexagon_v32i32_v1024i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_128B">;
 
-//
-// Hexagon_V65_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyh_acc_128B
-class Hexagon_V65_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwub_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_acc">;
 
-//
-// Hexagon_V65_v512v512v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpahhsat
-class Hexagon_V65_v512v512v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmpyiwub_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_acc_128B">;
 
-//
-// Hexagon_V65_v1024v1024v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpahhsat_128B
-class Hexagon_V65_v1024v1024v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaxb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmaxb">;
 
-//
-// Hexagon_V65_v512v512LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlut4
-class Hexagon_V65_v512v512LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vmaxb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmaxb_128B">;
 
-//
-// Hexagon_V65_v1024v1024LLi_Intrinsic<string GCCIntSuffix>
-// tag : V6_vlut4_128B
-class Hexagon_V65_v1024v1024LLi_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vandvqv :
+Hexagon_v16i32_v512i1v16i32_Intrinsic<"HEXAGON_V6_vandvqv">;
 
-//
-// Hexagon_V65_v512v512i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vmpyuhe
-class Hexagon_V65_v512v512i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vandvqv_128B :
+Hexagon_v32i32_v1024i1v32i32_Intrinsic<"HEXAGON_V6_vandvqv_128B">;
 
-//
-// Hexagon_V65_v512v64i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vprefixqb
-class Hexagon_V65_v512v64i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i32_ty], [llvm_v512i1_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddcarry :
+Hexagon_custom_v16i32v512i1_v16i32v16i32v512i1_Intrinsic;
 
-//
-// Hexagon_V65_v1024v128i_Intrinsic<string GCCIntSuffix>
-// tag : V6_vprefixqb_128B
-class Hexagon_V65_v1024v128i_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v32i32_ty], [llvm_v1024i1_ty],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vaddcarry_128B :
+Hexagon_custom_v32i32v1024i1_v32i32v32i32v1024i1_Intrinsic_128B;
 
-//
-// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany,QI_ftype_DIDI,2)
-// tag : A6_vcmpbeq_notany
-def int_hexagon_A6_vcmpbeq_notany :
-Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany">;
+def int_hexagon_V6_vasrwuhrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrwuhrndsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany_128B,QI_ftype_DIDI,2)
-// tag : A6_vcmpbeq_notany_128B
-def int_hexagon_A6_vcmpbeq_notany_128B :
-Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany_128B">;
+def int_hexagon_V6_vasrwuhrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwuhrndsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt,VD_ftype_VIDI,2)
-// tag : V6_vrmpyub_rtt
-def int_hexagon_V6_vrmpyub_rtt :
-Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt">;
+def int_hexagon_V6_vlutvvbi :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_128B,VD_ftype_VIDI,2)
-// tag : V6_vrmpyub_rtt_128B
-def int_hexagon_V6_vrmpyub_rtt_128B :
-Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_128B">;
+def int_hexagon_V6_vlutvvbi_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpyub_rtt_acc
-def int_hexagon_V6_vrmpyub_rtt_acc :
-Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">;
+def int_hexagon_V6_vsubuwsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuwsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc_128B,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpyub_rtt_acc_128B
-def int_hexagon_V6_vrmpyub_rtt_acc_128B :
-Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
+def int_hexagon_V6_vsubuwsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuwsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt,VD_ftype_VIDI,2)
-// tag : V6_vrmpybub_rtt
-def int_hexagon_V6_vrmpybub_rtt :
-Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt">;
+def int_hexagon_V6_vaddbsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbsat_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_128B,VD_ftype_VIDI,2)
-// tag : V6_vrmpybub_rtt_128B
-def int_hexagon_V6_vrmpybub_rtt_128B :
-Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_128B">;
+def int_hexagon_V6_vaddbsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddbsat_dv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpybub_rtt_acc
-def int_hexagon_V6_vrmpybub_rtt_acc :
-Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc">;
+def int_hexagon_V6_ldnp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldnp0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc_128B,VD_ftype_VDVIDI,3)
-// tag : V6_vrmpybub_rtt_acc_128B
-def int_hexagon_V6_vrmpybub_rtt_acc_128B :
-Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc_128B">;
+def int_hexagon_V6_ldnp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldnp0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhsat
-def int_hexagon_V6_vasruwuhsat :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruwuhsat">;
+def int_hexagon_V6_vasruwuhrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruwuhrndsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruwuhsat_128B
-def int_hexagon_V6_vasruwuhsat_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruwuhsat_128B">;
+def int_hexagon_V6_vasruwuhrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruwuhrndsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubsat
-def int_hexagon_V6_vasruhubsat :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubsat">;
+def int_hexagon_V6_vrounduwuh :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrounduwuh">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubsat_128B
-def int_hexagon_V6_vasruhubsat_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubsat_128B">;
+def int_hexagon_V6_vrounduwuh_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrounduwuh_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubrndsat
-def int_hexagon_V6_vasruhubrndsat :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubrndsat">;
+def int_hexagon_V6_vlutvvb_nm :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_nm">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasruhubrndsat_128B
-def int_hexagon_V6_vasruhubrndsat_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubrndsat_128B">;
+def int_hexagon_V6_vlutvvb_nm_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_nm_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh_acc,VI_ftype_VIVISI,3)
-// tag : V6_vaslh_acc
-def int_hexagon_V6_vaslh_acc :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslh_acc">;
+def int_hexagon_V6_pred_scalar2v2 :
+Hexagon_v512i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vaslh_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vaslh_acc_128B
-def int_hexagon_V6_vaslh_acc_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_acc_128B">;
+def int_hexagon_V6_pred_scalar2v2_128B :
+Hexagon_v1024i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh_acc,VI_ftype_VIVISI,3)
-// tag : V6_vasrh_acc
-def int_hexagon_V6_vasrh_acc :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrh_acc">;
+def int_hexagon_V6_ldp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldp0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vasrh_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vasrh_acc_128B
-def int_hexagon_V6_vasrh_acc_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_acc_128B">;
+def int_hexagon_V6_ldp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldp0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguw,VI_ftype_VIVI,2)
-// tag : V6_vavguw
-def int_hexagon_V6_vavguw :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguw">;
+def int_hexagon_V6_vaddubh_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddubh_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguw_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguw_128B
-def int_hexagon_V6_vavguw_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguw_128B">;
+def int_hexagon_V6_vaddubh_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddubh_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguwrnd,VI_ftype_VIVI,2)
-// tag : V6_vavguwrnd
-def int_hexagon_V6_vavguwrnd :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguwrnd">;
+def int_hexagon_V6_vaddclbw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddclbw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavguwrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavguwrnd_128B
-def int_hexagon_V6_vavguwrnd_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguwrnd_128B">;
+def int_hexagon_V6_vaddclbw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddclbw_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgb,VI_ftype_VIVI,2)
-// tag : V6_vavgb
-def int_hexagon_V6_vavgb :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgb">;
+def int_hexagon_V6_ldcpnt0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcpnt0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgb_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgb_128B
-def int_hexagon_V6_vavgb_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgb_128B">;
+def int_hexagon_V6_ldcpnt0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcpnt0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgbrnd,VI_ftype_VIVI,2)
-// tag : V6_vavgbrnd
-def int_hexagon_V6_vavgbrnd :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgbrnd">;
+def int_hexagon_V6_vadduwsat_dv :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadduwsat_dv">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vavgbrnd_128B,VI_ftype_VIVI,2)
-// tag : V6_vavgbrnd_128B
-def int_hexagon_V6_vavgbrnd_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgbrnd_128B">;
+def int_hexagon_V6_vadduwsat_dv_128B :
+Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vadduwsat_dv_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgb,VI_ftype_VIVI,2)
-// tag : V6_vnavgb
-def int_hexagon_V6_vnavgb :
-Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgb">;
+def int_hexagon_V6_vmpyiwub :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vnavgb_128B,VI_ftype_VIVI,2)
-// tag : V6_vnavgb_128B
-def int_hexagon_V6_vnavgb_128B :
-Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgb_128B">;
+def int_hexagon_V6_vmpyiwub_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyiwub_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb,VI_ftype_VI,1)
-// tag : V6_vabsb
-def int_hexagon_V6_vabsb :
-Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb">;
+def int_hexagon_V6_vsubububb_sat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubububb_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb_128B,VI_ftype_VI,1)
-// tag : V6_vabsb_128B
-def int_hexagon_V6_vabsb_128B :
-Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_128B">;
+def int_hexagon_V6_vsubububb_sat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubububb_sat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb_sat,VI_ftype_VI,1)
-// tag : V6_vabsb_sat
-def int_hexagon_V6_vabsb_sat :
-Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb_sat">;
+def int_hexagon_V6_ldcnp0 :
+Hexagon_v16i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnp0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vabsb_sat_128B,VI_ftype_VI,1)
-// tag : V6_vabsb_sat_128B
-def int_hexagon_V6_vabsb_sat_128B :
-Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_sat_128B">;
+def int_hexagon_V6_ldcnp0_128B :
+Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnp0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu,VD_ftype_VDSI,2)
-// tag : V6_vmpabuu
-def int_hexagon_V6_vmpabuu :
-Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu">;
+def int_hexagon_V6_vlutvwh_oracci :
+Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu_128B,VD_ftype_VDSI,2)
-// tag : V6_vmpabuu_128B
-def int_hexagon_V6_vmpabuu_128B :
-Hexagon_V65_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_128B">;
+def int_hexagon_V6_vlutvwh_oracci_128B :
+Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabuu_acc
-def int_hexagon_V6_vmpabuu_acc :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu_acc">;
+def int_hexagon_V6_vsubbsat :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc_128B,VD_ftype_VDVDSI,3)
-// tag : V6_vmpabuu_acc_128B
-def int_hexagon_V6_vmpabuu_acc_128B :
-Hexagon_V65_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_acc_128B">;
+def int_hexagon_V6_vsubbsat_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc,VD_ftype_VDVISI,3)
-// tag : V6_vmpyh_acc
-def int_hexagon_V6_vmpyh_acc :
-Hexagon_V65_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh_acc">;
+// V65 HVX Instructions.
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc_128B,VD_ftype_VDVISI,3)
-// tag : V6_vmpyh_acc_128B
-def int_hexagon_V6_vmpyh_acc_128B :
-Hexagon_V65_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_acc_128B">;
+def int_hexagon_V6_vasruhubrndsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruhubrndsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahhsat,VI_ftype_VIVIDI,3)
-// tag : V6_vmpahhsat
-def int_hexagon_V6_vmpahhsat :
-Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpahhsat">;
+def int_hexagon_V6_vasruhubrndsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruhubrndsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpahhsat_128B,VI_ftype_VIVIDI,3)
-// tag : V6_vmpahhsat_128B
-def int_hexagon_V6_vmpahhsat_128B :
-Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpahhsat_128B">;
+def int_hexagon_V6_vrmpybub_rtt :
+Hexagon_v32i32_v16i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat,VI_ftype_VIVIDI,3)
-// tag : V6_vmpauhuhsat
-def int_hexagon_V6_vmpauhuhsat :
-Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat">;
+def int_hexagon_V6_vrmpybub_rtt_128B :
+Hexagon_v64i32_v32i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat_128B,VI_ftype_VIVIDI,3)
-// tag : V6_vmpauhuhsat_128B
-def int_hexagon_V6_vmpauhuhsat_128B :
-Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat_128B">;
+def int_hexagon_V6_vmpahhsat :
+Hexagon_v16i32_v16i32v16i32i64_Intrinsic<"HEXAGON_V6_vmpahhsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat,VI_ftype_VIVIDI,3)
-// tag : V6_vmpsuhuhsat
-def int_hexagon_V6_vmpsuhuhsat :
-Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat">;
+def int_hexagon_V6_vmpahhsat_128B :
+Hexagon_v32i32_v32i32v32i32i64_Intrinsic<"HEXAGON_V6_vmpahhsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat_128B,VI_ftype_VIVIDI,3)
-// tag : V6_vmpsuhuhsat_128B
-def int_hexagon_V6_vmpsuhuhsat_128B :
-Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat_128B">;
+def int_hexagon_V6_vavguwrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguwrnd">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlut4,VI_ftype_VIDI,2)
-// tag : V6_vlut4
-def int_hexagon_V6_vlut4 :
-Hexagon_V65_v512v512LLi_Intrinsic<"HEXAGON_V6_vlut4">;
+def int_hexagon_V6_vavguwrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguwrnd_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vlut4_128B,VI_ftype_VIDI,2)
-// tag : V6_vlut4_128B
-def int_hexagon_V6_vlut4_128B :
-Hexagon_V65_v1024v1024LLi_Intrinsic<"HEXAGON_V6_vlut4_128B">;
+def int_hexagon_V6_vnavgb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe,VI_ftype_VISI,2)
-// tag : V6_vmpyuhe
-def int_hexagon_V6_vmpyuhe :
-Hexagon_V65_v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe">;
+def int_hexagon_V6_vnavgb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_128B,VI_ftype_VISI,2)
-// tag : V6_vmpyuhe_128B
-def int_hexagon_V6_vmpyuhe_128B :
-Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_128B">;
+def int_hexagon_V6_vasrh_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrh_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc,VI_ftype_VIVISI,3)
-// tag : V6_vmpyuhe_acc
-def int_hexagon_V6_vmpyuhe_acc :
-Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc">;
+def int_hexagon_V6_vasrh_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrh_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc_128B,VI_ftype_VIVISI,3)
-// tag : V6_vmpyuhe_acc_128B
-def int_hexagon_V6_vmpyuhe_acc_128B :
-Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc_128B">;
+def int_hexagon_V6_vmpauhuhsat :
+Hexagon_v16i32_v16i32v16i32i64_Intrinsic<"HEXAGON_V6_vmpauhuhsat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqb,VI_ftype_QV,1)
-// tag : V6_vprefixqb
-def int_hexagon_V6_vprefixqb :
-Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqb">;
+def int_hexagon_V6_vmpauhuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i64_Intrinsic<"HEXAGON_V6_vmpauhuhsat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqb_128B,VI_ftype_QV,1)
-// tag : V6_vprefixqb_128B
-def int_hexagon_V6_vprefixqb_128B :
-Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqb_128B">;
+def int_hexagon_V6_vmpyh_acc :
+Hexagon_v32i32_v32i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyh_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqh,VI_ftype_QV,1)
-// tag : V6_vprefixqh
-def int_hexagon_V6_vprefixqh :
-Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqh">;
+def int_hexagon_V6_vmpyh_acc_128B :
+Hexagon_v64i32_v64i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyh_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqh_128B,VI_ftype_QV,1)
-// tag : V6_vprefixqh_128B
-def int_hexagon_V6_vprefixqh_128B :
-Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqh_128B">;
+def int_hexagon_V6_vrmpybub_rtt_acc :
+Hexagon_v32i32_v32i32v16i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqw,VI_ftype_QV,1)
-// tag : V6_vprefixqw
-def int_hexagon_V6_vprefixqw :
-Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqw">;
+def int_hexagon_V6_vrmpybub_rtt_acc_128B :
+Hexagon_v64i32_v64i32v32i32i64_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vprefixqw_128B,VI_ftype_QV,1)
-// tag : V6_vprefixqw_128B
-def int_hexagon_V6_vprefixqw_128B :
-Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqw_128B">;
+def int_hexagon_V6_vavgb :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgb">;
 
+def int_hexagon_V6_vavgb_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgb_128B">;
 
-// The scatter/gather ones below will not be generated from iset.py. Make sure
-// you don't overwrite these.
-class Hexagon_V65_vvmemiiv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
-                               llvm_v16i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vaslh_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vaslh_acc">;
 
-class Hexagon_V65_vvmemiiv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
-                               llvm_v32i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vaslh_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vaslh_acc_128B">;
 
-class Hexagon_V65_vvmemiiv2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_i32_ty,llvm_i32_ty,
-                               llvm_v64i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vavguw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavguw">;
 
-class Hexagon_V65_vvmemv64iiiv512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
-                               llvm_i32_ty,llvm_v16i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vavguw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavguw_128B">;
 
-class Hexagon_V65_vvmemv128iiiv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
-                               llvm_i32_ty,llvm_v32i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vlut4 :
+Hexagon_v16i32_v16i32i64_Intrinsic<"HEXAGON_V6_vlut4">;
 
-class Hexagon_V65_vvmemv64iiiv1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_v512i1_ty,llvm_i32_ty,
-                               llvm_i32_ty,llvm_v32i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vlut4_128B :
+Hexagon_v32i32_v32i32i64_Intrinsic<"HEXAGON_V6_vlut4_128B">;
 
-class Hexagon_V65_vvmemv128iiiv2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_ptr_ty,llvm_v1024i1_ty,llvm_i32_ty,
-                               llvm_i32_ty,llvm_v64i32_ty],
-                          [IntrArgMemOnly]>;
+def int_hexagon_V6_vmpyuhe_acc :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe_acc">;
 
-def int_hexagon_V6_vgathermw :
-Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermw">;
+def int_hexagon_V6_vmpyuhe_acc_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe_acc_128B">;
 
-def int_hexagon_V6_vgathermw_128B :
-Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermw_128B">;
+def int_hexagon_V6_vrmpyub_rtt :
+Hexagon_v32i32_v16i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt">;
 
-def int_hexagon_V6_vgathermh :
-Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermh">;
+def int_hexagon_V6_vrmpyub_rtt_128B :
+Hexagon_v64i32_v32i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_128B">;
 
-def int_hexagon_V6_vgathermh_128B :
-Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermh_128B">;
+def int_hexagon_V6_vmpsuhuhsat :
+Hexagon_v16i32_v16i32v16i32i64_Intrinsic<"HEXAGON_V6_vmpsuhuhsat">;
 
-def int_hexagon_V6_vgathermhw :
-Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermhw">;
+def int_hexagon_V6_vmpsuhuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i64_Intrinsic<"HEXAGON_V6_vmpsuhuhsat_128B">;
 
-def int_hexagon_V6_vgathermhw_128B :
-Hexagon_V65_vvmemiiv2048_Intrinsic<"HEXAGON_V6_vgathermhw_128B">;
+def int_hexagon_V6_vasruhubsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruhubsat">;
 
-def int_hexagon_V6_vgathermwq :
-Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermwq">;
+def int_hexagon_V6_vasruhubsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruhubsat_128B">;
 
-def int_hexagon_V6_vgathermwq_128B :
-Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermwq_128B">;
+def int_hexagon_V6_vmpyuhe :
+Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe">;
 
-def int_hexagon_V6_vgathermhq :
-Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermhq">;
+def int_hexagon_V6_vmpyuhe_128B :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpyuhe_128B">;
 
-def int_hexagon_V6_vgathermhq_128B :
-Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhq_128B">;
+def int_hexagon_V6_vrmpyub_rtt_acc :
+Hexagon_v32i32_v32i32v16i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">;
 
-def int_hexagon_V6_vgathermhwq :
-Hexagon_V65_vvmemv64iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhwq">;
+def int_hexagon_V6_vrmpyub_rtt_acc_128B :
+Hexagon_v64i32_v64i32v32i32i64_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
 
-def int_hexagon_V6_vgathermhwq_128B :
-Hexagon_V65_vvmemv128iiiv2048_Intrinsic<"HEXAGON_V6_vgathermhwq_128B">;
+def int_hexagon_V6_vasruwuhsat :
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasruwuhsat">;
 
-class Hexagon_V65_viiv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_i32_ty,llvm_i32_ty,
-                                           llvm_v16i32_ty,llvm_v16i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vasruwuhsat_128B :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasruwuhsat_128B">;
 
-class Hexagon_V65_viiv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_i32_ty,llvm_i32_ty,
-                                           llvm_v32i32_ty,llvm_v32i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vmpabuu_acc :
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vmpabuu_acc">;
 
-class Hexagon_V65_vv64iiiv512v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v512i1_ty,llvm_i32_ty,
-                                           llvm_i32_ty,llvm_v16i32_ty,
-                                           llvm_v16i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vmpabuu_acc_128B :
+Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vmpabuu_acc_128B">;
 
-class Hexagon_V65_vv128iiiv1024v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v1024i1_ty,llvm_i32_ty,
-                                           llvm_i32_ty,llvm_v32i32_ty,
-                                           llvm_v32i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqw :
+Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqw">;
 
-class Hexagon_V65_viiv1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_i32_ty,llvm_i32_ty,
-                                           llvm_v32i32_ty,llvm_v16i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqw_128B :
+Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqw_128B">;
 
-class Hexagon_V65_viiv2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_i32_ty,llvm_i32_ty,
-                                           llvm_v64i32_ty,llvm_v32i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqh :
+Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqh">;
 
-class Hexagon_V65_vv64iiiv1024v512_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v512i1_ty,llvm_i32_ty,
-                                           llvm_i32_ty,llvm_v32i32_ty,
-                                           llvm_v16i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqh_128B :
+Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqh_128B">;
 
-class Hexagon_V65_vv128iiiv2048v1024_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [], [llvm_v1024i1_ty,llvm_i32_ty,
-                                           llvm_i32_ty,llvm_v64i32_ty,
-                                           llvm_v32i32_ty],
-                          [IntrWriteMem]>;
+def int_hexagon_V6_vprefixqb :
+Hexagon_v16i32_v512i1_Intrinsic<"HEXAGON_V6_vprefixqb">;
 
-class Hexagon_V65_v2048_Intrinsic<string GCCIntSuffix>
- : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_v64i32_ty], [],
-                          [IntrNoMem]>;
+def int_hexagon_V6_vprefixqb_128B :
+Hexagon_v32i32_v1024i1_Intrinsic<"HEXAGON_V6_vprefixqb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw
-def int_hexagon_V6_vscattermw :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw">;
+def int_hexagon_V6_vabsb :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsb">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw_128B
-def int_hexagon_V6_vscattermw_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_128B">;
+def int_hexagon_V6_vabsb_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsb_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh
-def int_hexagon_V6_vscattermh :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh">;
+def int_hexagon_V6_vavgbrnd :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vavgbrnd">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh_128B
-def int_hexagon_V6_vscattermh_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_128B">;
+def int_hexagon_V6_vavgbrnd_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vavgbrnd_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw_add,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw_add
-def int_hexagon_V6_vscattermw_add :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw_add">;
+def int_hexagon_V6_vdd0 :
+Hexagon_v32i32__Intrinsic<"HEXAGON_V6_vdd0">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermw_add_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermw_add_128B
-def int_hexagon_V6_vscattermw_add_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_add_128B">;
+def int_hexagon_V6_vdd0_128B :
+Hexagon_v64i32__Intrinsic<"HEXAGON_V6_vdd0_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh_add,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh_add
-def int_hexagon_V6_vscattermh_add :
-Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh_add">;
+def int_hexagon_V6_vmpabuu :
+Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vmpabuu">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermh_add_128B,v_ftype_SISIVIVI,4)
-// tag : V6_vscattermh_add_128B
-def int_hexagon_V6_vscattermh_add_128B :
-Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_add_128B">;
+def int_hexagon_V6_vmpabuu_128B :
+Hexagon_v64i32_v64i32i32_Intrinsic<"HEXAGON_V6_vmpabuu_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermwq,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermwq
-def int_hexagon_V6_vscattermwq :
-Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermwq">;
+def int_hexagon_V6_vabsb_sat :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsb_sat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermwq_128B,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermwq_128B
-def int_hexagon_V6_vscattermwq_128B :
-Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermwq_128B">;
+def int_hexagon_V6_vabsb_sat_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabsb_sat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhq,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermhq
-def int_hexagon_V6_vscattermhq :
-Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermhq">;
+// V66 HVX Instructions.
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhq_128B,v_ftype_QVSISIVIVI,5)
-// tag : V6_vscattermhq_128B
-def int_hexagon_V6_vscattermhq_128B :
-Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermhq_128B">;
+def int_hexagon_V6_vaddcarrysat :
+Hexagon_v16i32_v16i32v16i32v512i1_Intrinsic<"HEXAGON_V6_vaddcarrysat">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw
-def int_hexagon_V6_vscattermhw :
-Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw">;
+def int_hexagon_V6_vaddcarrysat_128B :
+Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<"HEXAGON_V6_vaddcarrysat_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw_128B,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw_128B
-def int_hexagon_V6_vscattermhw_128B :
-Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_128B">;
+def int_hexagon_V6_vasr_into :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vasr_into">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhwq,v_ftype_QVSISIVDVI,5)
-// tag : V6_vscattermhwq
-def int_hexagon_V6_vscattermhwq :
-Hexagon_V65_vv64iiiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhwq">;
+def int_hexagon_V6_vasr_into_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vasr_into_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhwq_128B,v_ftype_QVSISIVDVI,5)
-// tag : V6_vscattermhwq_128B
-def int_hexagon_V6_vscattermhwq_128B :
-Hexagon_V65_vv128iiiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhwq_128B">;
+def int_hexagon_V6_vsatdw :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatdw">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw_add
-def int_hexagon_V6_vscattermhw_add :
-Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw_add">;
+def int_hexagon_V6_vsatdw_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsatdw_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add_128B,v_ftype_SISIVDVI,4)
-// tag : V6_vscattermhw_add_128B
-def int_hexagon_V6_vscattermhw_add_128B :
-Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B">;
+def int_hexagon_V6_vrotr :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrotr">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdd0,VD_ftype_,0)
-// tag : V6_vdd0
-def int_hexagon_V6_vdd0 :
-Hexagon_v1024_Intrinsic<"HEXAGON_V6_vdd0">;
+def int_hexagon_V6_vrotr_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vrotr_128B">;
 
-//
-// BUILTIN_INFO(HEXAGON.V6_vdd0_128B,VD_ftype_,0)
-// tag : V6_vdd0_128B
-def int_hexagon_V6_vdd0_128B :
-Hexagon_V65_v2048_Intrinsic<"HEXAGON_V6_vdd0_128B">;
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 3433aaa402eb..62b2e8f77e7d 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -83,6 +83,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_fmaf128_round_to_odd
       : GCCBuiltin<"__builtin_fmaf128_round_to_odd">,
         Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_scalar_extract_expq
+      : GCCBuiltin<"__builtin_vsx_scalar_extract_expq">,
+        Intrinsic <[llvm_i64_ty], [llvm_f128_ty], [IntrNoMem]>;
+  def int_ppc_scalar_insert_exp_qp
+      : GCCBuiltin<"__builtin_vsx_scalar_insert_exp_qp">,
+        Intrinsic <[llvm_f128_ty], [llvm_f128_ty, llvm_i64_ty], [IntrNoMem]>;
 
 }
 
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td b/contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td
new file mode 100644
index 000000000000..0ac7348b56db
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -0,0 +1,44 @@
+//===- IntrinsicsRISCV.td - Defines RISCV intrinsics -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the RISCV-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "riscv" in {
+
+//===----------------------------------------------------------------------===//
+// Atomics
+
+class MaskedAtomicRMW32Intrinsic
+    : Intrinsic<[llvm_i32_ty],
+                [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                [IntrArgMemOnly, NoCapture<0>]>;
+
+class MaskedAtomicRMW32WithSextIntrinsic
+    : Intrinsic<[llvm_i32_ty],
+                [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                 llvm_i32_ty],
+                [IntrArgMemOnly, NoCapture<0>]>;
+
+def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_add_i32  : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_sub_i32  : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_nand_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_max_i32  : MaskedAtomicRMW32WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_min_i32  : MaskedAtomicRMW32WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_umax_i32 : MaskedAtomicRMW32Intrinsic;
+def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic;
+
+def int_riscv_masked_cmpxchg_i32
+    : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty,
+                                llvm_i32_ty, llvm_i32_ty],
+                [IntrArgMemOnly, NoCapture<0>]>;
+
+} // TargetPrefix = "riscv"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 7afc755a1e37..b015650906e0 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -24,17 +24,16 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty],
                                      [llvm_i32_ty, LLVMMatchType<0>],
                                      []>;
 
-// These are the old names.
-def int_wasm_mem_size : Intrinsic<[llvm_anyint_ty],
-                                  [llvm_i32_ty],
-                                  [IntrReadMem]>;
-def int_wasm_mem_grow : Intrinsic<[llvm_anyint_ty],
-                                  [llvm_i32_ty, LLVMMatchType<0>],
-                                  []>;
+//===----------------------------------------------------------------------===//
+// Saturating float-to-int conversions
+//===----------------------------------------------------------------------===//
 
-// These are the old old names. They also lack the immediate field.
-def int_wasm_current_memory : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>;
-def int_wasm_grow_memory : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
+def int_wasm_trunc_saturate_signed : Intrinsic<[llvm_anyint_ty],
+                                               [llvm_anyfloat_ty],
+                                               [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
+                                                 [llvm_anyfloat_ty],
+                                                 [IntrNoMem, IntrSpeculatable]>;
 
 //===----------------------------------------------------------------------===//
 // Exception handling intrinsics
@@ -60,8 +59,57 @@ def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
 // WebAssembly EH must maintain the landingpads in the order assigned to them
 // by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is
 // used in order to give them the indices in WasmEHPrepare.
-def int_wasm_landingpad_index: Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty],
+                                         [IntrNoMem]>;
 
 // Returns LSDA address of the current function.
 def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-}
+
+//===----------------------------------------------------------------------===//
+// Atomic intrinsics
+//===----------------------------------------------------------------------===//
+
+// wait / notify
+def int_wasm_atomic_wait_i32 :
+  Intrinsic<[llvm_i32_ty],
+            [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty, llvm_i64_ty],
+            [IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+             IntrHasSideEffects],
+             "", [SDNPMemOperand]>;
+def int_wasm_atomic_wait_i64 :
+  Intrinsic<[llvm_i32_ty],
+            [LLVMPointerType<llvm_i64_ty>, llvm_i64_ty, llvm_i64_ty],
+            [IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+             IntrHasSideEffects],
+             "", [SDNPMemOperand]>;
+def int_wasm_atomic_notify:
+  Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty],
+            [IntrInaccessibleMemOnly, NoCapture<0>, IntrHasSideEffects], "",
+            [SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// SIMD intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_sub_saturate_signed :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_sub_saturate_unsigned :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_bitselect :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_anytrue :
+  Intrinsic<[llvm_i32_ty],
+            [llvm_anyvector_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_alltrue :
+  Intrinsic<[llvm_i32_ty],
+            [llvm_anyvector_ty],
+            [IntrNoMem, IntrSpeculatable]>;
+
+} // TargetPrefix = "wasm"
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index 905afc130d8f..8d8cc8e97678 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -27,12 +27,6 @@ let TargetPrefix = "x86" in {
 
   // Marks the EH guard slot node created in LLVM IR prior to code generation.
   def int_x86_seh_ehguard : Intrinsic<[], [llvm_ptr_ty], []>;
-
-  // Given a pointer to the end of an EH registration object, returns the true
-  // parent frame address that can be used with llvm.localrecover.
-  def int_x86_seh_recoverfp : Intrinsic<[llvm_ptr_ty],
-                                        [llvm_ptr_ty, llvm_ptr_ty],
-                                        [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -53,8 +47,8 @@ let TargetPrefix = "x86" in {
 let TargetPrefix = "x86" in {
   def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">,
               Intrinsic<[llvm_i64_ty], [], []>;
-  def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrArgMemOnly]>;
+  def int_x86_rdtscp :
+              Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
 }
 
 // Read Performance-Monitoring Counter.
@@ -364,30 +358,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Integer arithmetic ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
   def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem, Commutative]>;
@@ -1336,21 +1306,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // BITALG bits shuffle
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_vpshufbitqmb_128 :
-    GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">,
-    Intrinsic<[llvm_i16_ty],
-              [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
-              [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshufbitqmb_256 :
-    GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">,
-    Intrinsic<[llvm_i32_ty],
-              [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
-              [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshufbitqmb_512 :
-    GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">,
-    Intrinsic<[llvm_i64_ty],
-              [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
-              [IntrNoMem]>;
+  def int_x86_avx512_vpshufbitqmb_128 :
+    Intrinsic<[llvm_v16i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshufbitqmb_256 :
+    Intrinsic<[llvm_v32i1_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_vpshufbitqmb_512 :
+    Intrinsic<[llvm_v64i1_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1358,30 +1319,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Integer arithmetic ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
   def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
                          llvm_v16i16_ty], [IntrNoMem, Commutative]>;
@@ -1518,18 +1455,15 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_pmultishift_qb_128:
-        GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                   llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmultishift_qb_256:
-        GCCBuiltin<"__builtin_ia32_vpmultishiftqb256_mask">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                   llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmultishift_qb_512:
-        GCCBuiltin<"__builtin_ia32_vpmultishiftqb512_mask">,
-        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
-                   llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_pmultishift_qb_128:
+        GCCBuiltin<"__builtin_ia32_vpmultishiftqb128">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pmultishift_qb_256:
+        GCCBuiltin<"__builtin_ia32_vpmultishiftqb256">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pmultishift_qb_512:
+        GCCBuiltin<"__builtin_ia32_vpmultishiftqb512">,
+        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
 }
 
 // Pack ops.
@@ -1739,83 +1673,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
                         [IntrNoMem]>;
-
-  def int_x86_avx512_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty], [IntrNoMem]>;
-
-   def int_x86_avx512_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128">,
-              Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256">,
-              Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512">,
-              Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128">,
-              Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256">,
-              Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512">,
-              Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-
-
-  def int_x86_avx512_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-
 }
 
 // Gather ops
@@ -2187,32 +2044,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v16i8_ty],
                         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
                         [IntrNoMem]>;
-
-  def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-
   def int_x86_xop_vpshab :
               GCCBuiltin<"__builtin_ia32_vpshab">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -2750,24 +2581,18 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // ADX
 
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_addcarryx_u32: GCCBuiltin<"__builtin_ia32_addcarryx_u32">,
-        Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty,
-                                 llvm_ptr_ty], [IntrArgMemOnly]>;
-  def int_x86_addcarryx_u64: GCCBuiltin<"__builtin_ia32_addcarryx_u64">,
-        Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty,
-                                 llvm_ptr_ty], [IntrArgMemOnly]>;
-  def int_x86_addcarry_u32: GCCBuiltin<"__builtin_ia32_addcarry_u32">,
-        Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty,
-                                 llvm_ptr_ty], [IntrArgMemOnly]>;
-  def int_x86_addcarry_u64: GCCBuiltin<"__builtin_ia32_addcarry_u64">,
-        Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty,
-                                 llvm_ptr_ty], [IntrArgMemOnly]>;
-  def int_x86_subborrow_u32: GCCBuiltin<"__builtin_ia32_subborrow_u32">,
-        Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty,
-                                 llvm_ptr_ty], [IntrArgMemOnly]>;
-  def int_x86_subborrow_u64: GCCBuiltin<"__builtin_ia32_subborrow_u64">,
-        Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty,
-                                 llvm_ptr_ty], [IntrArgMemOnly]>;
+  def int_x86_addcarry_32:
+        Intrinsic<[llvm_i8_ty, llvm_i32_ty],
+                  [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_addcarry_64:
+        Intrinsic<[llvm_i8_ty, llvm_i64_ty],
+                  [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_subborrow_32:
+        Intrinsic<[llvm_i8_ty, llvm_i32_ty],
+                  [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_subborrow_64:
+        Intrinsic<[llvm_i8_ty, llvm_i64_ty],
+                  [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2787,6 +2612,36 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 //===----------------------------------------------------------------------===//
 // AVX512
 
+// Mask ops
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_kadd_b :
+        Intrinsic<[llvm_v8i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_kadd_w :
+        Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_kadd_d :
+        Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_kadd_q :
+        Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_ktestc_b :
+          Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestc_w :
+          Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestc_d :
+          Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestc_q :
+          Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_ktestz_b :
+          Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestz_w :
+          Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestz_d :
+          Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestz_q :
+          Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+}
+
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
@@ -3677,78 +3532,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 // Integer arithmetic ops
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_padds_b_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_b_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_w_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_w_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">,
-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_b_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_b_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_w_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_w_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,
-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_b_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_b_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_w_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_w_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">,
-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_b_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_b_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_w_128 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_w_256 : // FIXME: remove this intrinsic
-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,
-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
                          llvm_v32i16_ty], [IntrNoMem, Commutative]>;
@@ -3780,6 +3563,7 @@ let TargetPrefix = "x86" in {
 
 // Gather and Scatter ops
 let TargetPrefix = "x86" in {
+  // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
   def int_x86_avx512_gather_dpd_512  : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
                      llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
@@ -3912,6 +3696,7 @@ let TargetPrefix = "x86" in {
           [IntrReadMem, IntrArgMemOnly]>;
 
 // scatter
+  // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
   def int_x86_avx512_scatter_dpd_512  : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
                         llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
@@ -4072,6 +3857,239 @@ let TargetPrefix = "x86" in {
                      llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
 }
 
+// AVX512 gather/scatter intrinsics that use vXi1 masks.
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_mask_gather_dpd_512  :
+          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
+                     llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_gather_dps_512  :
+          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
+                     llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_gather_qpd_512  :
+          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
+                     llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_gather_qps_512  :
+          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
+                     llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+
+
+  def int_x86_avx512_mask_gather_dpq_512  :
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
+                     llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_gather_dpi_512  :
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
+                     llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_gather_qpq_512  :
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
+                     llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_gather_qpi_512  :
+          Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
+                     llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
+                    [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div2_df :
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div2_di :
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div4_df :
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div4_di :
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div4_sf :
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div4_si :
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div8_sf :
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3div8_si :
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv2_df :
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv2_di :
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv4_df :
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv4_di :
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv4_sf :
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv4_si :
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv8_sf :
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_gather3siv8_si :
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+          [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatter_dpd_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+                        llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_scatter_dps_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
+                       llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_scatter_qpd_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+                     llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_scatter_qps_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+                     llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+
+
+  def int_x86_avx512_mask_scatter_dpq_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
+                         llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_scatter_dpi_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
+                     llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_scatter_qpq_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,llvm_v8i64_ty, llvm_v8i64_ty,
+                         llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_scatter_qpi_512  :
+          Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i32_ty,
+                         llvm_i32_ty],
+                    [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv2_df :
+        Intrinsic<[],
+        [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
+        [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv2_di :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv4_df :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv4_di :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv4_sf :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv4_si :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv8_sf :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scatterdiv8_si :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv2_df :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv2_di :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv4_df :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv4_di :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv4_sf :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv4_si :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv8_sf :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_scattersiv8_si :
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+          [IntrArgMemOnly]>;
+}
+
 // AVX-512 conflict detection instruction
 // Instructions that count the number of leading zero bits
 let TargetPrefix = "x86" in {
@@ -4273,237 +4291,6 @@ let TargetPrefix = "x86" in {
                    llvm_i8_ty], [IntrNoMem]>;
 }
 
-// VBMI2 Concat & Shift
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_vpshld_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldq512">,
-        Intrinsic<[llvm_v8i64_ty],
-                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshld_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldq256">,
-        Intrinsic<[llvm_v4i64_ty],
-                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshld_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldq128">,
-        Intrinsic<[llvm_v2i64_ty],
-                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_vpshld_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldd512">,
-        Intrinsic<[llvm_v16i32_ty],
-                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshld_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldd256">,
-        Intrinsic<[llvm_v8i32_ty],
-                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshld_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldd128">,
-        Intrinsic<[llvm_v4i32_ty],
-                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_vpshld_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldw512">,
-        Intrinsic<[llvm_v32i16_ty],
-                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshld_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldw256">,
-        Intrinsic<[llvm_v16i16_ty],
-                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshld_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldw128">,
-        Intrinsic<[llvm_v8i16_ty],
-                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_vpshrd_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdq512">,
-        Intrinsic<[llvm_v8i64_ty],
-                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshrd_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdq256">,
-        Intrinsic<[llvm_v4i64_ty],
-                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshrd_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdq128">,
-        Intrinsic<[llvm_v2i64_ty],
-                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_vpshrd_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdd512">,
-        Intrinsic<[llvm_v16i32_ty],
-                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshrd_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdd256">,
-        Intrinsic<[llvm_v8i32_ty],
-                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshrd_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdd128">,
-        Intrinsic<[llvm_v4i32_ty],
-                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_vpshrd_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdw512">,
-        Intrinsic<[llvm_v32i16_ty],
-                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshrd_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdw256">,
-        Intrinsic<[llvm_v16i16_ty],
-                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_vpshrd_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdw128">,
-        Intrinsic<[llvm_v8i16_ty],
-                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpshldv_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldvw128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldvw128_maskz">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshldv_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldvw256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldvw256_maskz">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshldv_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldvw512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldvw512_maskz">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpshldv_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldvq128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldvq128_maskz">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshldv_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldvq256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldvq256_maskz">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshldv_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldvq512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldvq512_maskz">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpshldv_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldvd128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshldvd128_maskz">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshldv_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldvd256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshldvd256_maskz">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshldv_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldvd512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshldv_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshldvd512_maskz">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpshrdv_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvw128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_w_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvw128_maskz">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrdv_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvw256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_w_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvw256_maskz">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrdv_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvw512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_w_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvw512_maskz">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpshrdv_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvq128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_q_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvq128_maskz">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrdv_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvq256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_q_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvq256_maskz">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrdv_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvq512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_q_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvq512_maskz">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpshrdv_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvd128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_d_128 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvd128_maskz">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrdv_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvd256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_d_256 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvd256_maskz">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_vpshrdv_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvd512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_maskz_vpshrdv_d_512 :
-        GCCBuiltin<"__builtin_ia32_vpshrdvd512_maskz">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-}
-
 // truncate
 let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_pmov_qb_128 :
diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h
index ebd445553167..bd7097b39a3e 100644
--- a/contrib/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm/include/llvm/IR/LLVMContext.h
@@ -102,6 +102,7 @@ public:
     MD_associated = 22,               // "associated"
     MD_callees = 23,                  // "callees"
     MD_irr_loop = 24,                 // "irr_loop"
+    MD_access_group = 25,             // "llvm.access.group"
   };
 
   /// Known operand bundle tag IDs, which always have the same value.  All
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManager.h b/contrib/llvm/include/llvm/IR/LegacyPassManager.h
index 9a376a151505..5257a0eed488 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManager.h
@@ -98,9 +98,6 @@ private:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_STDCXX_CONVERSION_FUNCTIONS(legacy::PassManagerBase, LLVMPassManagerRef)
 
-/// If -time-passes has been specified, report the timings immediately and then
-/// reset the timers to zero.
-void reportAndResetTimings();
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
index f6752f2817ba..51a2eb2a146d 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -406,11 +406,23 @@ public:
   /// Set the initial size of the module if the user has specified that they
   /// want remarks for size.
   /// Returns 0 if the remark was not requested.
-  unsigned initSizeRemarkInfo(Module &M);
+  unsigned initSizeRemarkInfo(
+      Module &M,
+      StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount);
 
   /// Emit a remark signifying that the number of IR instructions in the module
   /// changed.
-  void emitInstrCountChangedRemark(Pass *P, Module &M, unsigned CountBefore);
+  /// \p F is optionally passed by passes which run on Functions, and thus
+  /// always know whether or not a non-empty function is available.
+  ///
+  /// \p FunctionToInstrCount maps the name of a \p Function to a pair. The
+  /// first member of the pair is the IR count of the \p Function before running
+  /// \p P, and the second member is the IR count of the \p Function after
+  /// running \p P.
+  void emitInstrCountChangedRemark(
+      Pass *P, Module &M, int64_t Delta, unsigned CountBefore,
+      StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount,
+      Function *F = nullptr);
 
 protected:
   // Top level manager.
@@ -508,7 +520,6 @@ public:
   }
 };
 
-Timer *getPassTimer(Pass *);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index 9ac97f4224ac..be82c4efc115 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -66,9 +66,11 @@ protected:
   enum StorageType { Uniqued, Distinct, Temporary };
 
   /// Storage flag for non-uniqued, otherwise unowned, metadata.
-  unsigned char Storage;
+  unsigned char Storage : 7;
   // TODO: expose remaining bits to subclasses.
 
+  unsigned char ImplicitCode : 1;
+
   unsigned short SubclassData16 = 0;
   unsigned SubclassData32 = 0;
 
@@ -80,7 +82,7 @@ public:
 
 protected:
   Metadata(unsigned ID, StorageType Storage)
-      : SubclassID(ID), Storage(Storage) {
+      : SubclassID(ID), Storage(Storage), ImplicitCode(false) {
     static_assert(sizeof(*this) == 8, "Metadata fields poorly packed");
   }
 
@@ -1316,10 +1318,11 @@ public:
 //===----------------------------------------------------------------------===//
 /// A tuple of MDNodes.
 ///
-/// Despite its name, a NamedMDNode isn't itself an MDNode. NamedMDNodes belong
-/// to modules, have names, and contain lists of MDNodes.
+/// Despite its name, a NamedMDNode isn't itself an MDNode.
+///
+/// NamedMDNodes are named module-level entities that contain lists of MDNodes.
 ///
-/// TODO: Inherit from Metadata.
+/// It is illegal for a NamedMDNode to appear as an operand of an MDNode.
 class NamedMDNode : public ilist_node<NamedMDNode> {
   friend class LLVMContextImpl;
   friend class Module;
@@ -1420,6 +1423,9 @@ public:
   }
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(NamedMDNode, LLVMNamedMDNodeRef)
+
 } // end namespace llvm
 
 #endif // LLVM_IR_METADATA_H
diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h
index a405f7df3efe..9ef35f1f73cd 100644
--- a/contrib/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm/include/llvm/IR/Module.h
@@ -16,6 +16,7 @@
 #define LLVM_IR_MODULE_H
 
 #include "llvm-c/Types.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -48,6 +49,7 @@ class MemoryBuffer;
 class RandomNumberGenerator;
 template <class PtrType> class SmallPtrSetImpl;
 class StructType;
+class VersionTuple;
 
 /// A Module instance is used to store all the information related to an
 /// LLVM module. Modules are the top level container of all other LLVM
@@ -365,6 +367,11 @@ public:
     return getOrInsertFunction(Name, AttributeList{}, RetTy, Args...);
   }
 
+  // Avoid an incorrect ordering that'd otherwise compile incorrectly.
+  template <typename... ArgsTy>
+  Constant *getOrInsertFunction(StringRef Name, AttributeList AttributeList,
+                                FunctionType *Invalid, ArgsTy... Args) = delete;
+
   /// Look up the specified function in the module symbol table. If it does not
   /// exist, return null.
   Function *getFunction(StringRef Name) const;
@@ -401,11 +408,15 @@ public:
   }
 
   /// Look up the specified global in the module symbol table.
-  ///   1. If it does not exist, add a declaration of the global and return it.
-  ///   2. Else, the global exists but has the wrong type: return the function
-  ///      with a constantexpr cast to the right type.
-  ///   3. Finally, if the existing global is the correct declaration, return
-  ///      the existing global.
+  /// If it does not exist, invoke a callback to create a declaration of the
+  /// global and return it. The global is constantexpr casted to the expected
+  /// type if necessary.
+  Constant *
+  getOrInsertGlobal(StringRef Name, Type *Ty,
+                    function_ref<GlobalVariable *()> CreateGlobalCallback);
+
+  /// Look up the specified global in the module symbol table. If required, this
+  /// overload constructs the global variable using its constructor's defaults.
   Constant *getOrInsertGlobal(StringRef Name, Type *Ty);
 
 /// @}
@@ -840,6 +851,17 @@ public:
   void setPIELevel(PIELevel::Level PL);
 /// @}
 
+  /// @}
+  /// @name Utility function for querying and setting code model
+  /// @{
+
+  /// Returns the code model (tiny, small, kernel, medium or large model)
+  Optional<CodeModel::Model> getCodeModel() const;
+
+  /// Set the code model (tiny, small, kernel, medium or large)
+  void setCodeModel(CodeModel::Model CL);
+  /// @}
+
   /// @name Utility functions for querying and setting PGO summary
   /// @{
 
@@ -856,6 +878,17 @@ public:
   /// Set that PLT should be avoid for RTLib calls.
   void setRtLibUseGOT();
 
+  /// @name Utility functions for querying and setting the build SDK version
+  /// @{
+
+  /// Attach a build SDK version metadata to this module.
+  void setSDKVersion(const VersionTuple &V);
+
+  /// Get the build SDK version metadata.
+  ///
+  /// An empty version is returned if no such metadata is attached.
+  VersionTuple getSDKVersion() const;
+  /// @}
 
   /// Take ownership of the given memory buffer.
   void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
diff --git a/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h b/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
index fdf3d4b5f1ce..a1acee494475 100644
--- a/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Allocator.h"
@@ -99,6 +100,22 @@ struct CalleeInfo {
   }
 };
 
+inline const char *getHotnessName(CalleeInfo::HotnessType HT) {
+  switch (HT) {
+  case CalleeInfo::HotnessType::Unknown:
+    return "unknown";
+  case CalleeInfo::HotnessType::Cold:
+    return "cold";
+  case CalleeInfo::HotnessType::None:
+    return "none";
+  case CalleeInfo::HotnessType::Hot:
+    return "hot";
+  case CalleeInfo::HotnessType::Critical:
+    return "critical";
+  }
+  llvm_unreachable("invalid hotness");
+}
+
 class GlobalValueSummary;
 
 using GlobalValueSummaryList = std::vector<std::unique_ptr<GlobalValueSummary>>;
@@ -146,13 +163,13 @@ using GlobalValueSummaryMapTy =
 /// Struct that holds a reference to a particular GUID in a global value
 /// summary.
 struct ValueInfo {
-  PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 1, bool>
-      RefAndFlag;
+  PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 2, int>
+      RefAndFlags;
 
   ValueInfo() = default;
   ValueInfo(bool HaveGVs, const GlobalValueSummaryMapTy::value_type *R) {
-    RefAndFlag.setPointer(R);
-    RefAndFlag.setInt(HaveGVs);
+    RefAndFlags.setPointer(R);
+    RefAndFlags.setInt(HaveGVs);
   }
 
   operator bool() const { return getRef(); }
@@ -172,10 +189,12 @@ struct ValueInfo {
                      : getRef()->second.U.Name;
   }
 
-  bool haveGVs() const { return RefAndFlag.getInt(); }
+  bool haveGVs() const { return RefAndFlags.getInt() & 0x1; }
+  bool isReadOnly() const { return RefAndFlags.getInt() & 0x2; }
+  void setReadOnly() { RefAndFlags.setInt(RefAndFlags.getInt() | 0x2); }
 
   const GlobalValueSummaryMapTy::value_type *getRef() const {
-    return RefAndFlag.getPointer();
+    return RefAndFlags.getPointer();
   }
 
   bool isDSOLocal() const;
@@ -391,6 +410,7 @@ public:
     return const_cast<GlobalValueSummary &>(
                          static_cast<const AliasSummary *>(this)->getAliasee());
   }
+  bool hasAliaseeGUID() const { return AliaseeGUID != 0; }
   const GlobalValue::GUID &getAliaseeGUID() const {
     assert(AliaseeGUID && "Unexpected missing aliasee GUID");
     return AliaseeGUID;
@@ -460,13 +480,17 @@ public:
         TypeCheckedLoadConstVCalls;
   };
 
-  /// Function attribute flags. Used to track if a function accesses memory,
-  /// recurses or aliases.
+  /// Flags specific to function summaries.
   struct FFlags {
+    // Function attribute flags. Used to track if a function accesses memory,
+    // recurses or aliases.
     unsigned ReadNone : 1;
     unsigned ReadOnly : 1;
     unsigned NoRecurse : 1;
     unsigned ReturnDoesNotAlias : 1;
+
+    // Indicate if the global value cannot be inlined.
+    unsigned NoInline : 1;
   };
 
   /// Create an empty FunctionSummary (with specified call edges).
@@ -477,8 +501,9 @@ public:
         FunctionSummary::GVFlags(
             GlobalValue::LinkageTypes::AvailableExternallyLinkage,
             /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
-        0, FunctionSummary::FFlags{}, std::vector<ValueInfo>(),
-        std::move(Edges), std::vector<GlobalValue::GUID>(),
+        /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
+        std::vector<ValueInfo>(), std::move(Edges),
+        std::vector<GlobalValue::GUID>(),
         std::vector<FunctionSummary::VFuncId>(),
         std::vector<FunctionSummary::VFuncId>(),
         std::vector<FunctionSummary::ConstVCall>(),
@@ -493,10 +518,14 @@ private:
   /// during the initial compile step when the summary index is first built.
   unsigned InstCount;
 
-  /// Function attribute flags. Used to track if a function accesses memory,
-  /// recurses or aliases.
+  /// Function summary specific flags.
   FFlags FunFlags;
 
+  /// The synthesized entry count of the function.
+  /// This is only populated during ThinLink phase and remains unused while
+  /// generating per-module summaries.
+  uint64_t EntryCount = 0;
+
   /// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
   std::vector<EdgeTy> CallGraphEdgeList;
 
@@ -504,14 +533,15 @@ private:
 
 public:
   FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
-                  std::vector<ValueInfo> Refs, std::vector<EdgeTy> CGEdges,
+                  uint64_t EntryCount, std::vector<ValueInfo> Refs,
+                  std::vector<EdgeTy> CGEdges,
                   std::vector<GlobalValue::GUID> TypeTests,
                   std::vector<VFuncId> TypeTestAssumeVCalls,
                   std::vector<VFuncId> TypeCheckedLoadVCalls,
                   std::vector<ConstVCall> TypeTestAssumeConstVCalls,
                   std::vector<ConstVCall> TypeCheckedLoadConstVCalls)
       : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
-        InstCount(NumInsts), FunFlags(FunFlags),
+        InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
         CallGraphEdgeList(std::move(CGEdges)) {
     if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
         !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
@@ -522,18 +552,26 @@ public:
           std::move(TypeTestAssumeConstVCalls),
           std::move(TypeCheckedLoadConstVCalls)});
   }
+  // Gets the number of immutable refs in RefEdgeList
+  unsigned immutableRefCount() const;
 
   /// Check if this is a function summary.
   static bool classof(const GlobalValueSummary *GVS) {
     return GVS->getSummaryKind() == FunctionKind;
   }
 
-  /// Get function attribute flags.
+  /// Get function summary flags.
   FFlags fflags() const { return FunFlags; }
 
   /// Get the instruction count recorded for this function.
   unsigned instCount() const { return InstCount; }
 
+  /// Get the synthetic entry count for this function.
+  uint64_t entryCount() const { return EntryCount; }
+
+  /// Set the synthetic entry count for this function.
+  void setEntryCount(uint64_t EC) { EntryCount = EC; }
+
   /// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
   ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
 
@@ -631,19 +669,30 @@ template <> struct DenseMapInfo<FunctionSummary::ConstVCall> {
 /// Global variable summary information to aid decisions and
 /// implementation of importing.
 ///
-/// Currently this doesn't add anything to the base \p GlobalValueSummary,
-/// but is a placeholder as additional info may be added to the summary
-/// for variables.
+/// Global variable summary has extra flag, telling if it is
+/// modified during the program run or not. This affects ThinLTO
+/// internalization
 class GlobalVarSummary : public GlobalValueSummary {
-
 public:
-  GlobalVarSummary(GVFlags Flags, std::vector<ValueInfo> Refs)
-      : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {}
+  struct GVarFlags {
+    GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {}
+
+    unsigned ReadOnly : 1;
+  } VarFlags;
+
+  GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags,
+                   std::vector<ValueInfo> Refs)
+      : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)),
+        VarFlags(VarFlags) {}
 
   /// Check if this is a global variable summary.
   static bool classof(const GlobalValueSummary *GVS) {
     return GVS->getSummaryKind() == GlobalVarKind;
   }
+
+  GVarFlags varflags() const { return VarFlags; }
+  void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; }
+  bool isReadOnly() const { return VarFlags.ReadOnly; }
 };
 
 struct TypeTestResolution {
@@ -737,6 +786,11 @@ using ModulePathStringTableTy = StringMap<std::pair<uint64_t, ModuleHash>>;
 /// a particular module, and provide efficient access to their summary.
 using GVSummaryMapTy = DenseMap<GlobalValue::GUID, GlobalValueSummary *>;
 
+/// Map of a type GUID to type id string and summary (multimap used
+/// in case of GUID conflicts).
+using TypeIdSummaryMapTy =
+    std::multimap<GlobalValue::GUID, std::pair<std::string, TypeIdSummary>>;
+
 /// Class to hold module path string table and global value map,
 /// and encapsulate methods for operating on them.
 class ModuleSummaryIndex {
@@ -748,9 +802,9 @@ private:
   /// Holds strings for combined index, mapping to the corresponding module ID.
   ModulePathStringTableTy ModulePathStringTable;
 
-  /// Mapping from type identifiers to summary information for that type
-  /// identifier.
-  std::map<std::string, TypeIdSummary> TypeIdMap;
+  /// Mapping from type identifier GUIDs to type identifier and its summary
+  /// information.
+  TypeIdSummaryMapTy TypeIdMap;
 
   /// Mapping from original ID to GUID. If original ID can map to multiple
   /// GUIDs, it will be mapped to 0.
@@ -761,6 +815,9 @@ private:
   /// considered live.
   bool WithGlobalValueDeadStripping = false;
 
+  /// Indicates that summary-based synthetic entry count propagation has run
+  bool HasSyntheticEntryCounts = false;
+
   /// Indicates that distributed backend should skip compilation of the
   /// module. Flag is suppose to be set by distributed ThinLTO indexing
   /// when it detected that the module is not needed during the final
@@ -774,6 +831,13 @@ private:
   /// union.
   bool HaveGVs;
 
+  // True if the index was created for a module compiled with -fsplit-lto-unit.
+  bool EnableSplitLTOUnit;
+
+  // True if some of the modules were compiled with -fsplit-lto-unit and
+  // some were not. Set when the combined index is created during the thin link.
+  bool PartiallySplitLTOUnits = false;
+
   std::set<std::string> CfiFunctionDefs;
   std::set<std::string> CfiFunctionDecls;
 
@@ -793,7 +857,9 @@ private:
 
 public:
   // See HaveGVs variable comment.
-  ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {}
+  ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false)
+      : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc) {
+  }
 
   bool haveGVs() const { return HaveGVs; }
 
@@ -873,6 +939,9 @@ public:
     WithGlobalValueDeadStripping = true;
   }
 
+  bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
+  void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
+
   bool skipModuleByDistributedBackend() const {
     return SkipModuleByDistributedBackend;
   }
@@ -880,6 +949,12 @@ public:
     SkipModuleByDistributedBackend = true;
   }
 
+  bool enableSplitLTOUnit() const { return EnableSplitLTOUnit; }
+  void setEnableSplitLTOUnit() { EnableSplitLTOUnit = true; }
+
+  bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; }
+  void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; }
+
   bool isGlobalValueLive(const GlobalValueSummary *GVS) const {
     return !WithGlobalValueDeadStripping || GVS->isLive();
   }
@@ -905,7 +980,7 @@ public:
   // Save a string in the Index. Use before passing Name to
   // getOrInsertValueInfo when the string isn't owned elsewhere (e.g. on the
   // module's Strtab).
-  StringRef saveString(std::string String) { return Saver.save(String); }
+  StringRef saveString(StringRef String) { return Saver.save(String); }
 
   /// Return a ValueInfo for \p GUID setting value \p Name.
   ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID, StringRef Name) {
@@ -1063,23 +1138,29 @@ public:
     return ModulePathStringTable.count(M.getModuleIdentifier());
   }
 
-  const std::map<std::string, TypeIdSummary> &typeIds() const {
-    return TypeIdMap;
-  }
+  const TypeIdSummaryMapTy &typeIds() const { return TypeIdMap; }
 
-  /// This accessor should only be used when exporting because it can mutate the
-  /// map.
+  /// Return an existing or new TypeIdSummary entry for \p TypeId.
+  /// This accessor can mutate the map and therefore should not be used in
+  /// the ThinLTO backends.
   TypeIdSummary &getOrInsertTypeIdSummary(StringRef TypeId) {
-    return TypeIdMap[TypeId];
+    auto TidIter = TypeIdMap.equal_range(GlobalValue::getGUID(TypeId));
+    for (auto It = TidIter.first; It != TidIter.second; ++It)
+      if (It->second.first == TypeId)
+        return It->second.second;
+    auto It = TypeIdMap.insert(
+        {GlobalValue::getGUID(TypeId), {TypeId, TypeIdSummary()}});
+    return It->second.second;
   }
 
   /// This returns either a pointer to the type id summary (if present in the
   /// summary map) or null (if not present). This may be used when importing.
   const TypeIdSummary *getTypeIdSummary(StringRef TypeId) const {
-    auto I = TypeIdMap.find(TypeId);
-    if (I == TypeIdMap.end())
-      return nullptr;
-    return &I->second;
+    auto TidIter = TypeIdMap.equal_range(GlobalValue::getGUID(TypeId));
+    for (auto It = TidIter.first; It != TidIter.second; ++It)
+      if (It->second.first == TypeId)
+        return &It->second.second;
+    return nullptr;
   }
 
   /// Collect for the given module the list of functions it defines
@@ -1103,11 +1184,15 @@ public:
 
   /// Print out strongly connected components for debugging.
   void dumpSCCs(raw_ostream &OS);
+
+  /// Analyze index and detect unmodified globals
+  void propagateConstants(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
 };
 
 /// GraphTraits definition to build SCC for the index
 template <> struct GraphTraits<ValueInfo> {
   typedef ValueInfo NodeRef;
+  using EdgeRef = FunctionSummary::EdgeTy &;
 
   static NodeRef valueInfoFromEdge(FunctionSummary::EdgeTy &P) {
     return P.first;
@@ -1116,6 +1201,8 @@ template <> struct GraphTraits<ValueInfo> {
       mapped_iterator<std::vector<FunctionSummary::EdgeTy>::iterator,
                       decltype(&valueInfoFromEdge)>;
 
+  using ChildEdgeIteratorType = std::vector<FunctionSummary::EdgeTy>::iterator;
+
   static NodeRef getEntryNode(ValueInfo V) { return V; }
 
   static ChildIteratorType child_begin(NodeRef N) {
@@ -1137,6 +1224,26 @@ template <> struct GraphTraits<ValueInfo> {
         cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
     return ChildIteratorType(F->CallGraphEdgeList.end(), &valueInfoFromEdge);
   }
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    if (!N.getSummaryList().size()) // handle external function
+      return FunctionSummary::ExternalNode.CallGraphEdgeList.begin();
+
+    FunctionSummary *F =
+        cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+    return F->CallGraphEdgeList.begin();
+  }
+
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+    if (!N.getSummaryList().size()) // handle external function
+      return FunctionSummary::ExternalNode.CallGraphEdgeList.end();
+
+    FunctionSummary *F =
+        cast<FunctionSummary>(N.getSummaryList().front()->getBaseObject());
+    return F->CallGraphEdgeList.end();
+  }
+
+  static NodeRef edge_dest(EdgeRef E) { return E.first; }
 };
 
 template <>
@@ -1152,6 +1259,14 @@ struct GraphTraits<ModuleSummaryIndex *> : public GraphTraits<ValueInfo> {
   }
 };
 
+static inline bool canImportGlobalVar(GlobalValueSummary *S) {
+  assert(isa<GlobalVarSummary>(S->getBaseObject()));
+
+  // We don't import GV with references, because it can result
+  // in promotion of local variables in the source module.
+  return !GlobalValue::isInterposableLinkage(S->linkage()) &&
+         !S->notEligibleToImport() && S->refs().empty();
+}
 } // end namespace llvm
 
 #endif // LLVM_IR_MODULESUMMARYINDEX_H
diff --git a/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 1b339ab32cf1..a88ee26b51c3 100644
--- a/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -195,7 +195,6 @@ template <> struct MappingTraits<FunctionSummaryYaml> {
 } // End yaml namespace
 } // End llvm namespace
 
-LLVM_YAML_IS_STRING_MAP(TypeIdSummary)
 LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml)
 
 namespace llvm {
@@ -225,7 +224,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
           GlobalValueSummary::GVFlags(
               static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
               FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
-          0, FunctionSummary::FFlags{}, Refs,
+          /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
           ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
           std::move(FSum.TypeTestAssumeVCalls),
           std::move(FSum.TypeCheckedLoadVCalls),
@@ -258,6 +257,18 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
   }
 };
 
+template <> struct CustomMappingTraits<TypeIdSummaryMapTy> {
+  static void inputOne(IO &io, StringRef Key, TypeIdSummaryMapTy &V) {
+    TypeIdSummary TId;
+    io.mapRequired(Key.str().c_str(), TId);
+    V.insert({GlobalValue::getGUID(Key), {Key, TId}});
+  }
+  static void output(IO &io, TypeIdSummaryMapTy &V) {
+    for (auto TidIter = V.begin(); TidIter != V.end(); TidIter++)
+      io.mapRequired(TidIter->second.first.c_str(), TidIter->second.second);
+  }
+};
+
 template <> struct MappingTraits<ModuleSummaryIndex> {
   static void mapping(IO &io, ModuleSummaryIndex& index) {
     io.mapOptional("GlobalValueMap", index.GlobalValueMap);
diff --git a/contrib/llvm/include/llvm/IR/Operator.h b/contrib/llvm/include/llvm/IR/Operator.h
index 939cec7f4aa4..6b387bbcccb1 100644
--- a/contrib/llvm/include/llvm/IR/Operator.h
+++ b/contrib/llvm/include/llvm/IR/Operator.h
@@ -364,19 +364,26 @@ public:
   /// precision.
   float getFPAccuracy() const;
 
-  static bool classof(const Instruction *I) {
-    return I->getType()->isFPOrFPVectorTy() ||
-      I->getOpcode() == Instruction::FCmp;
-  }
-
-  static bool classof(const ConstantExpr *CE) {
-    return CE->getType()->isFPOrFPVectorTy() ||
-           CE->getOpcode() == Instruction::FCmp;
-  }
-
   static bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+    unsigned Opcode;
+    if (auto *I = dyn_cast<Instruction>(V))
+      Opcode = I->getOpcode();
+    else if (auto *CE = dyn_cast<ConstantExpr>(V))
+      Opcode = CE->getOpcode();
+    else
+      return false;
+
+    switch (Opcode) {
+    case Instruction::FCmp:
+      return true;
+    // non math FP Operators (no FMF)
+    case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::InsertElement:
+      return false;
+    default:
+      return V->getType()->isFPOrFPVectorTy();
+    }
   }
 };
 
diff --git a/contrib/llvm/include/llvm/IR/PassInstrumentation.h b/contrib/llvm/include/llvm/IR/PassInstrumentation.h
new file mode 100644
index 000000000000..08dac1c4a274
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/PassInstrumentation.h
@@ -0,0 +1,207 @@
+//===- llvm/IR/PassInstrumentation.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Pass Instrumentation classes that provide
+/// instrumentation points into the pass execution by PassManager.
+///
+/// There are two main classes:
+///   - PassInstrumentation provides a set of instrumentation points for
+///     pass managers to call on.
+///
+///   - PassInstrumentationCallbacks registers callbacks and provides access
+///     to them for PassInstrumentation.
+///
+/// PassInstrumentation object is being used as a result of
+/// PassInstrumentationAnalysis (so it is intended to be easily copyable).
+///
+/// Intended scheme of use for Pass Instrumentation is as follows:
+///    - register instrumentation callbacks in PassInstrumentationCallbacks
+///      instance. PassBuilder provides helper for that.
+///
+///    - register PassInstrumentationAnalysis with all the PassManagers.
+///      PassBuilder handles that automatically when registering analyses.
+///
+///    - Pass Manager requests PassInstrumentationAnalysis from analysis manager
+///      and gets PassInstrumentation as its result.
+///
+///    - Pass Manager invokes PassInstrumentation entry points appropriately,
+///      passing StringRef identification ("name") of the pass currently being
+///      executed and IRUnit it works on. There can be different schemes of
+///      providing names in future, currently it is just a name() of the pass.
+///
+///    - PassInstrumentation wraps address of IRUnit into llvm::Any and passes
+///      control to all the registered callbacks. Note that we specifically wrap
+///      'const IRUnitT*' so as to avoid any accidental changes to IR in
+///      instrumenting callbacks.
+///
+///    - Some instrumentation points (BeforePass) allow to control execution
+///      of a pass. For those callbacks returning false means pass will not be
+///      executed.
+///
+/// TODO: currently there is no way for a pass to opt-out of execution control
+/// (e.g. become unskippable). PassManager is the only entity that determines
+/// how pass instrumentation affects pass execution.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PASSINSTRUMENTATION_H
+#define LLVM_IR_PASSINSTRUMENTATION_H
+
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/TypeName.h"
+#include <type_traits>
+
+namespace llvm {
+
+class PreservedAnalyses;
+
+/// This class manages callbacks registration, as well as provides a way for
+/// PassInstrumentation to pass control to the registered callbacks.
+class PassInstrumentationCallbacks {
+public:
+  // Before/After callbacks accept IRUnits whenever appropriate, so they need
+  // to take them as constant pointers, wrapped with llvm::Any.
+  // For the case when IRUnit has been invalidated there is a different
+  // callback to use - AfterPassInvalidated.
+  // TODO: currently AfterPassInvalidated does not accept IRUnit, since passing
+  // already invalidated IRUnit is unsafe. There are ways to handle invalidated IRUnits
+  // in a safe way, and we might pursue that as soon as there is a useful instrumentation
+  // that needs it.
+  using BeforePassFunc = bool(StringRef, Any);
+  using AfterPassFunc = void(StringRef, Any);
+  using AfterPassInvalidatedFunc = void(StringRef);
+  using BeforeAnalysisFunc = void(StringRef, Any);
+  using AfterAnalysisFunc = void(StringRef, Any);
+
+public:
+  PassInstrumentationCallbacks() {}
+
+  /// Copying PassInstrumentationCallbacks is not intended.
+  PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete;
+  void operator=(const PassInstrumentationCallbacks &) = delete;
+
+  template <typename CallableT> void registerBeforePassCallback(CallableT C) {
+    BeforePassCallbacks.emplace_back(std::move(C));
+  }
+
+  template <typename CallableT> void registerAfterPassCallback(CallableT C) {
+    AfterPassCallbacks.emplace_back(std::move(C));
+  }
+
+  template <typename CallableT>
+  void registerAfterPassInvalidatedCallback(CallableT C) {
+    AfterPassInvalidatedCallbacks.emplace_back(std::move(C));
+  }
+
+  template <typename CallableT>
+  void registerBeforeAnalysisCallback(CallableT C) {
+    BeforeAnalysisCallbacks.emplace_back(std::move(C));
+  }
+
+  template <typename CallableT>
+  void registerAfterAnalysisCallback(CallableT C) {
+    AfterAnalysisCallbacks.emplace_back(std::move(C));
+  }
+
+private:
+  friend class PassInstrumentation;
+
+  SmallVector<llvm::unique_function<BeforePassFunc>, 4> BeforePassCallbacks;
+  SmallVector<llvm::unique_function<AfterPassFunc>, 4> AfterPassCallbacks;
+  SmallVector<llvm::unique_function<AfterPassInvalidatedFunc>, 4>
+      AfterPassInvalidatedCallbacks;
+  SmallVector<llvm::unique_function<BeforeAnalysisFunc>, 4>
+      BeforeAnalysisCallbacks;
+  SmallVector<llvm::unique_function<AfterAnalysisFunc>, 4>
+      AfterAnalysisCallbacks;
+};
+
+/// This class provides instrumentation entry points for the Pass Manager,
+/// doing calls to callbacks registered in PassInstrumentationCallbacks.
+class PassInstrumentation {
+  PassInstrumentationCallbacks *Callbacks;
+
+public:
+  /// Callbacks object is not owned by PassInstrumentation, its life-time
+  /// should at least match the life-time of corresponding
+  /// PassInstrumentationAnalysis (which usually is till the end of current
+  /// compilation).
+  PassInstrumentation(PassInstrumentationCallbacks *CB = nullptr)
+      : Callbacks(CB) {}
+
+  /// BeforePass instrumentation point - takes \p Pass instance to be executed
+  /// and constant reference to IR it operates on. \Returns true if pass is
+  /// allowed to be executed.
+  template <typename IRUnitT, typename PassT>
+  bool runBeforePass(const PassT &Pass, const IRUnitT &IR) const {
+    if (!Callbacks)
+      return true;
+
+    bool ShouldRun = true;
+    for (auto &C : Callbacks->BeforePassCallbacks)
+      ShouldRun &= C(Pass.name(), llvm::Any(&IR));
+    return ShouldRun;
+  }
+
+  /// AfterPass instrumentation point - takes \p Pass instance that has
+  /// just been executed and constant reference to \p IR it operates on.
+  /// \p IR is guaranteed to be valid at this point.
+  template <typename IRUnitT, typename PassT>
+  void runAfterPass(const PassT &Pass, const IRUnitT &IR) const {
+    if (Callbacks)
+      for (auto &C : Callbacks->AfterPassCallbacks)
+        C(Pass.name(), llvm::Any(&IR));
+  }
+
+  /// AfterPassInvalidated instrumentation point - takes \p Pass instance
+  /// that has just been executed. For use when IR has been invalidated
+  /// by \p Pass execution.
+  template <typename IRUnitT, typename PassT>
+  void runAfterPassInvalidated(const PassT &Pass) const {
+    if (Callbacks)
+      for (auto &C : Callbacks->AfterPassInvalidatedCallbacks)
+        C(Pass.name());
+  }
+
+  /// BeforeAnalysis instrumentation point - takes \p Analysis instance
+  /// to be executed and constant reference to IR it operates on.
+  template <typename IRUnitT, typename PassT>
+  void runBeforeAnalysis(const PassT &Analysis, const IRUnitT &IR) const {
+    if (Callbacks)
+      for (auto &C : Callbacks->BeforeAnalysisCallbacks)
+        C(Analysis.name(), llvm::Any(&IR));
+  }
+
+  /// AfterAnalysis instrumentation point - takes \p Analysis instance
+  /// that has just been executed and constant reference to IR it operated on.
+  template <typename IRUnitT, typename PassT>
+  void runAfterAnalysis(const PassT &Analysis, const IRUnitT &IR) const {
+    if (Callbacks)
+      for (auto &C : Callbacks->AfterAnalysisCallbacks)
+        C(Analysis.name(), llvm::Any(&IR));
+  }
+
+  /// Handle invalidation from the pass manager when PassInstrumentation
+  /// is used as the result of PassInstrumentationAnalysis.
+  ///
+  /// On attempt to invalidate just return false. There is nothing to become
+  /// invalid here.
+  template <typename IRUnitT, typename... ExtraArgsT>
+  bool invalidate(IRUnitT &, const class llvm::PreservedAnalyses &,
+                  ExtraArgsT...) {
+    return false;
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/IR/PassManager.h b/contrib/llvm/include/llvm/IR/PassManager.h
index a5d4aaf71c0e..738a2242eea0 100644
--- a/contrib/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm/include/llvm/IR/PassManager.h
@@ -44,6 +44,7 @@
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
 #include "llvm/IR/PassManagerInternal.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TypeName.h"
@@ -402,6 +403,43 @@ struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
   }
 };
 
+namespace detail {
+
+/// Actual unpacker of extra arguments in getAnalysisResult,
+/// passes only those tuple arguments that are mentioned in index_sequence.
+template <typename PassT, typename IRUnitT, typename AnalysisManagerT,
+          typename... ArgTs, size_t... Ns>
+typename PassT::Result
+getAnalysisResultUnpackTuple(AnalysisManagerT &AM, IRUnitT &IR,
+                             std::tuple<ArgTs...> Args,
+                             llvm::index_sequence<Ns...>) {
+  (void)Args;
+  return AM.template getResult<PassT>(IR, std::get<Ns>(Args)...);
+}
+
+/// Helper for *partial* unpacking of extra arguments in getAnalysisResult.
+///
+/// Arguments passed in tuple come from PassManager, so they might have extra
+/// arguments after those AnalysisManager's ExtraArgTs ones that we need to
+/// pass to getResult.
+template <typename PassT, typename IRUnitT, typename... AnalysisArgTs,
+          typename... MainArgTs>
+typename PassT::Result
+getAnalysisResult(AnalysisManager<IRUnitT, AnalysisArgTs...> &AM, IRUnitT &IR,
+                  std::tuple<MainArgTs...> Args) {
+  return (getAnalysisResultUnpackTuple<
+          PassT, IRUnitT>)(AM, IR, Args,
+                           llvm::index_sequence_for<AnalysisArgTs...>{});
+}
+
+} // namespace detail
+
+// Forward declare the pass instrumentation analysis explicitly queried in
+// generic PassManager code.
+// FIXME: figure out a way to move PassInstrumentationAnalysis into its own
+// header.
+class PassInstrumentationAnalysis;
+
 /// Manages a sequence of passes over a particular unit of IR.
 ///
 /// A pass manager contains a sequence of passes to run over a particular unit
@@ -445,15 +483,34 @@ public:
                         ExtraArgTs... ExtraArgs) {
     PreservedAnalyses PA = PreservedAnalyses::all();
 
+    // Request PassInstrumentation from analysis manager, will use it to run
+    // instrumenting callbacks for the passes later.
+    // Here we use std::tuple wrapper over getResult which helps to extract
+    // AnalysisManager's arguments out of the whole ExtraArgs set.
+    PassInstrumentation PI =
+        detail::getAnalysisResult<PassInstrumentationAnalysis>(
+            AM, IR, std::tuple<ExtraArgTs...>(ExtraArgs...));
+
     if (DebugLogging)
       dbgs() << "Starting " << getTypeName<IRUnitT>() << " pass manager run.\n";
 
     for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+      auto *P = Passes[Idx].get();
       if (DebugLogging)
-        dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
-               << IR.getName() << "\n";
+        dbgs() << "Running pass: " << P->name() << " on " << IR.getName()
+               << "\n";
 
-      PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM, ExtraArgs...);
+      // Check the PassInstrumentation's BeforePass callbacks before running the
+      // pass, skip its execution completely if asked to (callback returns
+      // false).
+      if (!PI.runBeforePass<IRUnitT>(*P, IR))
+        continue;
+
+      PreservedAnalyses PassPA = P->run(IR, AM, ExtraArgs...);
+
+      // Call onto PassInstrumentation's AfterPass callbacks immediately after
+      // running the pass.
+      PI.runAfterPass<IRUnitT>(*P, IR);
 
       // Update the analysis manager as each pass runs and potentially
       // invalidates analyses.
@@ -510,6 +567,32 @@ extern template class PassManager<Function>;
 /// Convenience typedef for a pass manager over functions.
 using FunctionPassManager = PassManager<Function>;
 
+/// Pseudo-analysis pass that exposes the \c PassInstrumentation to pass
+/// managers. Goes before AnalysisManager definition to provide its
+/// internals (e.g PassInstrumentationAnalysis::ID) for use there if needed.
+/// FIXME: figure out a way to move PassInstrumentationAnalysis into its own
+/// header.
+class PassInstrumentationAnalysis
+    : public AnalysisInfoMixin<PassInstrumentationAnalysis> {
+  friend AnalysisInfoMixin<PassInstrumentationAnalysis>;
+  static AnalysisKey Key;
+
+  PassInstrumentationCallbacks *Callbacks;
+
+public:
+  /// PassInstrumentationCallbacks object is shared, owned by something else,
+  /// not this analysis.
+  PassInstrumentationAnalysis(PassInstrumentationCallbacks *Callbacks = nullptr)
+      : Callbacks(Callbacks) {}
+
+  using Result = PassInstrumentation;
+
+  template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
+  Result run(IRUnitT &, AnalysisManagerT &, ExtraArgTs &&...) {
+    return PassInstrumentation(Callbacks);
+  }
+};
+
 /// A container for analyses that lazily runs them and caches their
 /// results.
 ///
@@ -860,9 +943,18 @@ private:
       if (DebugLogging)
         dbgs() << "Running analysis: " << P.name() << " on " << IR.getName()
                << "\n";
+
+      PassInstrumentation PI;
+      if (ID != PassInstrumentationAnalysis::ID()) {
+        PI = getResult<PassInstrumentationAnalysis>(IR, ExtraArgs...);
+        PI.runBeforeAnalysis(P, IR);
+      }
+
       AnalysisResultListT &ResultList = AnalysisResultLists[&IR];
       ResultList.emplace_back(ID, P.run(IR, *this, ExtraArgs...));
 
+      PI.runAfterAnalysis(P, IR);
+
       // P.run may have inserted elements into AnalysisResults and invalidated
       // RI.
       RI = AnalysisResults.find({ID, &IR});
@@ -930,7 +1022,7 @@ using FunctionAnalysisManager = AnalysisManager<Function>;
 /// analysis manager over an "inner" IR unit.  The inner unit must be contained
 /// in the outer unit.
 ///
-/// Fore example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
+/// For example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
 /// an analysis over Modules (the "outer" unit) that provides access to a
 /// Function analysis manager.  The FunctionAnalysisManager is the "inner"
 /// manager being proxied, and Functions are the "inner" unit.  The inner/outer
@@ -1192,13 +1284,24 @@ public:
     FunctionAnalysisManager &FAM =
         AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
 
+    // Request PassInstrumentation from analysis manager, will use it to run
+    // instrumenting callbacks for the passes later.
+    PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
     PreservedAnalyses PA = PreservedAnalyses::all();
     for (Function &F : M) {
       if (F.isDeclaration())
         continue;
 
+      // Check the PassInstrumentation's BeforePass callbacks before running the
+      // pass, skip its execution completely if asked to (callback returns
+      // false).
+      if (!PI.runBeforePass<Function>(Pass, F))
+        continue;
       PreservedAnalyses PassPA = Pass.run(F, FAM);
 
+      PI.runAfterPass(Pass, F);
+
       // We know that the function pass couldn't have invalidated any other
       // function's analyses (that's the contract of a function pass), so
       // directly handle the function analysis manager's invalidation here.
@@ -1302,10 +1405,26 @@ public:
   RepeatedPass(int Count, PassT P) : Count(Count), P(std::move(P)) {}
 
   template <typename IRUnitT, typename AnalysisManagerT, typename... Ts>
-  PreservedAnalyses run(IRUnitT &Arg, AnalysisManagerT &AM, Ts &&... Args) {
+  PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, Ts &&... Args) {
+
+    // Request PassInstrumentation from analysis manager, will use it to run
+    // instrumenting callbacks for the passes later.
+    // Here we use std::tuple wrapper over getResult which helps to extract
+    // AnalysisManager's arguments out of the whole Args set.
+    PassInstrumentation PI =
+        detail::getAnalysisResult<PassInstrumentationAnalysis>(
+            AM, IR, std::tuple<Ts...>(Args...));
+
     auto PA = PreservedAnalyses::all();
-    for (int i = 0; i < Count; ++i)
-      PA.intersect(P.run(Arg, AM, std::forward<Ts>(Args)...));
+    for (int i = 0; i < Count; ++i) {
+      // Check the PassInstrumentation's BeforePass callbacks before running the
+      // pass, skip its execution completely if asked to (callback returns
+      // false).
+      if (!PI.runBeforePass<IRUnitT>(P, IR))
+        continue;
+      PA.intersect(P.run(IR, AM, std::forward<Ts>(Args)...));
+      PI.runAfterPass(P, IR);
+    }
     return PA;
   }
 
diff --git a/contrib/llvm/include/llvm/IR/PassManagerInternal.h b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
index 16a3258b4121..5ad68be62742 100644
--- a/contrib/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
@@ -48,7 +48,7 @@ struct PassConcept {
                                 ExtraArgTs... ExtraArgs) = 0;
 
   /// Polymorphic method to access the name of a pass.
-  virtual StringRef name() = 0;
+  virtual StringRef name() const = 0;
 };
 
 /// A template wrapper used to implement the polymorphic API.
@@ -80,7 +80,7 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
     return Pass.run(IR, AM, ExtraArgs...);
   }
 
-  StringRef name() override { return PassT::name(); }
+  StringRef name() const override { return PassT::name(); }
 
   PassT Pass;
 };
@@ -250,7 +250,7 @@ struct AnalysisPassConcept {
       ExtraArgTs... ExtraArgs) = 0;
 
   /// Polymorphic method to access the name of a pass.
-  virtual StringRef name() = 0;
+  virtual StringRef name() const = 0;
 };
 
 /// Wrapper to model the analysis pass concept.
@@ -290,13 +290,14 @@ struct AnalysisPassModel : AnalysisPassConcept<IRUnitT, PreservedAnalysesT,
       AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT>>
   run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
       ExtraArgTs... ExtraArgs) override {
-    return llvm::make_unique<ResultModelT>(Pass.run(IR, AM, ExtraArgs...));
+    return llvm::make_unique<ResultModelT>(
+        Pass.run(IR, AM, std::forward<ExtraArgTs>(ExtraArgs)...));
   }
 
   /// The model delegates to a static \c PassT::name method.
   ///
   /// The returned string ref must point to constant immutable data!
-  StringRef name() override { return PassT::name(); }
+  StringRef name() const override { return PassT::name(); }
 
   PassT Pass;
 };
diff --git a/contrib/llvm/include/llvm/IR/PassTimingInfo.h b/contrib/llvm/include/llvm/IR/PassTimingInfo.h
new file mode 100644
index 000000000000..e9945f997f43
--- /dev/null
+++ b/contrib/llvm/include/llvm/IR/PassTimingInfo.h
@@ -0,0 +1,108 @@
+//===- PassTimingInfo.h - pass execution timing -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header defines classes/functions to handle pass execution timing
+/// information with interfaces for both pass managers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_PASSTIMINGINFO_H
+#define LLVM_IR_PASSTIMINGINFO_H
+
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/TypeName.h"
+#include <memory>
+namespace llvm {
+
+class Pass;
+class PassInstrumentationCallbacks;
+
+/// If -time-passes has been specified, report the timings immediately and then
+/// reset the timers to zero.
+void reportAndResetTimings();
+
+/// Request the timer for this legacy-pass-manager's pass instance.
+Timer *getPassTimer(Pass *);
+
+/// If the user specifies the -time-passes argument on an LLVM tool command line
+/// then the value of this boolean will be true, otherwise false.
+/// This is the storage for the -time-passes option.
+extern bool TimePassesIsEnabled;
+
+/// This class implements -time-passes functionality for new pass manager.
+/// It provides the pass-instrumentation callbacks that measure the pass
+/// execution time. They collect timing info into individual timers as
+/// passes are being run. At the end of its life-time it prints the resulting
+/// timing report.
+class TimePassesHandler {
+  /// Value of this type is capable of uniquely identifying pass invocations.
+  /// It is a pair of string Pass-Identifier (which for now is common
+  /// to all the instance of a given pass) + sequential invocation counter.
+  using PassInvocationID = std::pair<StringRef, unsigned>;
+
+  /// A group of all pass-timing timers.
+  TimerGroup TG;
+
+  /// Map of timers for pass invocations
+  DenseMap<PassInvocationID, std::unique_ptr<Timer>> TimingData;
+
+  /// Map that counts invocations of passes, for use in UniqPassID construction.
+  StringMap<unsigned> PassIDCountMap;
+
+  /// Stack of currently active timers.
+  SmallVector<Timer *, 8> TimerStack;
+
+  bool Enabled;
+
+public:
+  TimePassesHandler(bool Enabled = TimePassesIsEnabled);
+
+  /// Destructor handles the print action if it has not been handled before.
+  ~TimePassesHandler() {
+    // First destroying the timers from TimingData, which deploys all their
+    // collected data into the TG time group member, which later prints itself
+    // when being destroyed.
+    TimingData.clear();
+  }
+
+  /// Prints out timing information and then resets the timers.
+  void print();
+
+  // We intend this to be unique per-compilation, thus no copies.
+  TimePassesHandler(const TimePassesHandler &) = delete;
+  void operator=(const TimePassesHandler &) = delete;
+
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+private:
+  /// Dumps information for running/triggered timers, useful for debugging
+  LLVM_DUMP_METHOD void dump() const;
+
+  /// Returns the new timer for each new run of the pass.
+  Timer &getPassTimer(StringRef PassID);
+
+  /// Returns the incremented counter for the next invocation of \p PassID.
+  unsigned nextPassID(StringRef PassID) { return ++PassIDCountMap[PassID]; }
+
+  void startTimer(StringRef PassID);
+  void stopTimer(StringRef PassID);
+
+  // Implementation of pass instrumentation callbacks.
+  bool runBeforePass(StringRef PassID);
+  void runAfterPass(StringRef PassID);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm/include/llvm/IR/PatternMatch.h
index af0616cd8221..120fc253b908 100644
--- a/contrib/llvm/include/llvm/IR/PatternMatch.h
+++ b/contrib/llvm/include/llvm/IR/PatternMatch.h
@@ -31,7 +31,6 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstrTypes.h"
@@ -215,6 +214,7 @@ template <typename Predicate> struct cst_pred_ty : public Predicate {
         // Non-splat vector constant: check each element for a match.
         unsigned NumElts = V->getType()->getVectorNumElements();
         assert(NumElts != 0 && "Constant vector with no elements?");
+        bool HasNonUndefElements = false;
         for (unsigned i = 0; i != NumElts; ++i) {
           Constant *Elt = C->getAggregateElement(i);
           if (!Elt)
@@ -224,8 +224,9 @@ template <typename Predicate> struct cst_pred_ty : public Predicate {
           auto *CI = dyn_cast<ConstantInt>(Elt);
           if (!CI || !this->isValue(CI->getValue()))
             return false;
+          HasNonUndefElements = true;
         }
-        return true;
+        return HasNonUndefElements;
       }
     }
     return false;
@@ -272,6 +273,7 @@ template <typename Predicate> struct cstfp_pred_ty : public Predicate {
         // Non-splat vector constant: check each element for a match.
         unsigned NumElts = V->getType()->getVectorNumElements();
         assert(NumElts != 0 && "Constant vector with no elements?");
+        bool HasNonUndefElements = false;
         for (unsigned i = 0; i != NumElts; ++i) {
           Constant *Elt = C->getAggregateElement(i);
           if (!Elt)
@@ -281,8 +283,9 @@ template <typename Predicate> struct cstfp_pred_ty : public Predicate {
           auto *CF = dyn_cast<ConstantFP>(Elt);
           if (!CF || !this->isValue(CF->getValueAPF()))
             return false;
+          HasNonUndefElements = true;
         }
-        return true;
+        return HasNonUndefElements;
       }
     }
     return false;
@@ -659,11 +662,39 @@ inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
   return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
 }
 
+template <typename Op_t> struct FNeg_match {
+  Op_t X;
+
+  FNeg_match(const Op_t &Op) : X(Op) {}
+  template <typename OpTy> bool match(OpTy *V) {
+    auto *FPMO = dyn_cast<FPMathOperator>(V);
+    if (!FPMO || FPMO->getOpcode() != Instruction::FSub)
+      return false;
+    if (FPMO->hasNoSignedZeros()) {
+      // With 'nsz', any zero goes.
+      if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
+        return false;
+    } else {
+      // Without 'nsz', we need fsub -0.0, X exactly.
+      if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
+        return false;
+    }
+    return X.match(FPMO->getOperand(1));
+  }
+};
+
 /// Match 'fneg X' as 'fsub -0.0, X'.
+template <typename OpTy>
+inline FNeg_match<OpTy>
+m_FNeg(const OpTy &X) {
+  return FNeg_match<OpTy>(X);
+}
+
+/// Match 'fneg X' as 'fsub +-0.0, X'.
 template <typename RHS>
-inline BinaryOp_match<cstfp_pred_ty<is_neg_zero_fp>, RHS, Instruction::FSub>
-m_FNeg(const RHS &X) {
-  return m_FSub(m_NegZeroFP(), X);
+inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
+m_FNegNSZ(const RHS &X) {
+  return m_FSub(m_AnyZeroFP(), X);
 }
 
 template <typename LHS, typename RHS>
@@ -991,116 +1022,111 @@ m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
 }
 
 //===----------------------------------------------------------------------===//
-// Matchers for SelectInst classes
+// Matchers for instructions with a given opcode and number of operands.
 //
 
-template <typename Cond_t, typename LHS_t, typename RHS_t>
-struct SelectClass_match {
-  Cond_t C;
-  LHS_t L;
-  RHS_t R;
+/// Matches instructions with Opcode and three operands.
+template <typename T0, unsigned Opcode> struct OneOps_match {
+  T0 Op1;
 
-  SelectClass_match(const Cond_t &Cond, const LHS_t &LHS, const RHS_t &RHS)
-      : C(Cond), L(LHS), R(RHS) {}
+  OneOps_match(const T0 &Op1) : Op1(Op1) {}
 
   template <typename OpTy> bool match(OpTy *V) {
-    if (auto *I = dyn_cast<SelectInst>(V))
-      return C.match(I->getOperand(0)) && L.match(I->getOperand(1)) &&
-             R.match(I->getOperand(2));
+    if (V->getValueID() == Value::InstructionVal + Opcode) {
+      auto *I = cast<Instruction>(V);
+      return Op1.match(I->getOperand(0));
+    }
     return false;
   }
 };
 
+/// Matches instructions with Opcode and three operands.
+template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
+  T0 Op1;
+  T1 Op2;
+
+  TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    if (V->getValueID() == Value::InstructionVal + Opcode) {
+      auto *I = cast<Instruction>(V);
+      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
+    }
+    return false;
+  }
+};
+
+/// Matches instructions with Opcode and three operands.
+template <typename T0, typename T1, typename T2, unsigned Opcode>
+struct ThreeOps_match {
+  T0 Op1;
+  T1 Op2;
+  T2 Op3;
+
+  ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
+      : Op1(Op1), Op2(Op2), Op3(Op3) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    if (V->getValueID() == Value::InstructionVal + Opcode) {
+      auto *I = cast<Instruction>(V);
+      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
+             Op3.match(I->getOperand(2));
+    }
+    return false;
+  }
+};
+
+/// Matches SelectInst.
 template <typename Cond, typename LHS, typename RHS>
-inline SelectClass_match<Cond, LHS, RHS> m_Select(const Cond &C, const LHS &L,
-                                                  const RHS &R) {
-  return SelectClass_match<Cond, LHS, RHS>(C, L, R);
+inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
+m_Select(const Cond &C, const LHS &L, const RHS &R) {
+  return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
 }
 
 /// This matches a select of two constants, e.g.:
 /// m_SelectCst<-1, 0>(m_Value(V))
 template <int64_t L, int64_t R, typename Cond>
-inline SelectClass_match<Cond, constantint_match<L>, constantint_match<R>>
+inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
+                      Instruction::Select>
 m_SelectCst(const Cond &C) {
   return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
 }
 
-//===----------------------------------------------------------------------===//
-// Matchers for InsertElementInst classes
-//
-
+/// Matches InsertElementInst.
 template <typename Val_t, typename Elt_t, typename Idx_t>
-struct InsertElementClass_match {
-  Val_t V;
-  Elt_t E;
-  Idx_t I;
-
-  InsertElementClass_match(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
-      : V(Val), E(Elt), I(Idx) {}
-
-  template <typename OpTy> bool match(OpTy *VV) {
-    if (auto *II = dyn_cast<InsertElementInst>(VV))
-      return V.match(II->getOperand(0)) && E.match(II->getOperand(1)) &&
-             I.match(II->getOperand(2));
-    return false;
-  }
-};
-
-template <typename Val_t, typename Elt_t, typename Idx_t>
-inline InsertElementClass_match<Val_t, Elt_t, Idx_t>
+inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
 m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
-  return InsertElementClass_match<Val_t, Elt_t, Idx_t>(Val, Elt, Idx);
+  return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
+      Val, Elt, Idx);
 }
 
-//===----------------------------------------------------------------------===//
-// Matchers for ExtractElementInst classes
-//
-
-template <typename Val_t, typename Idx_t> struct ExtractElementClass_match {
-  Val_t V;
-  Idx_t I;
-
-  ExtractElementClass_match(const Val_t &Val, const Idx_t &Idx)
-      : V(Val), I(Idx) {}
-
-  template <typename OpTy> bool match(OpTy *VV) {
-    if (auto *II = dyn_cast<ExtractElementInst>(VV))
-      return V.match(II->getOperand(0)) && I.match(II->getOperand(1));
-    return false;
-  }
-};
-
+/// Matches ExtractElementInst.
 template <typename Val_t, typename Idx_t>
-inline ExtractElementClass_match<Val_t, Idx_t>
+inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
 m_ExtractElement(const Val_t &Val, const Idx_t &Idx) {
-  return ExtractElementClass_match<Val_t, Idx_t>(Val, Idx);
+  return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
 }
 
-//===----------------------------------------------------------------------===//
-// Matchers for ShuffleVectorInst classes
-//
-
+/// Matches ShuffleVectorInst.
 template <typename V1_t, typename V2_t, typename Mask_t>
-struct ShuffleVectorClass_match {
-  V1_t V1;
-  V2_t V2;
-  Mask_t M;
-
-  ShuffleVectorClass_match(const V1_t &v1, const V2_t &v2, const Mask_t &m)
-      : V1(v1), V2(v2), M(m) {}
+inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>
+m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
+  return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2,
+                                                                        m);
+}
 
-  template <typename OpTy> bool match(OpTy *V) {
-    if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
-      return V1.match(SI->getOperand(0)) && V2.match(SI->getOperand(1)) &&
-             M.match(SI->getOperand(2));
-    return false;
-  }
-};
+/// Matches LoadInst.
+template <typename OpTy>
+inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
+  return OneOps_match<OpTy, Instruction::Load>(Op);
+}
 
-template <typename V1_t, typename V2_t, typename Mask_t>
-inline ShuffleVectorClass_match<V1_t, V2_t, Mask_t>
-m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
-  return ShuffleVectorClass_match<V1_t, V2_t, Mask_t>(v1, v2, m);
+/// Matches StoreInst.
+template <typename ValueOpTy, typename PointerOpTy>
+inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
+m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
+  return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
+                                                                  PointerOp);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1181,54 +1207,6 @@ inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
 }
 
 //===----------------------------------------------------------------------===//
-// Matcher for LoadInst classes
-//
-
-template <typename Op_t> struct LoadClass_match {
-  Op_t Op;
-
-  LoadClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
-
-  template <typename OpTy> bool match(OpTy *V) {
-    if (auto *LI = dyn_cast<LoadInst>(V))
-      return Op.match(LI->getPointerOperand());
-    return false;
-  }
-};
-
-/// Matches LoadInst.
-template <typename OpTy> inline LoadClass_match<OpTy> m_Load(const OpTy &Op) {
-  return LoadClass_match<OpTy>(Op);
-}
-
-//===----------------------------------------------------------------------===//
-// Matcher for StoreInst classes
-//
-
-template <typename ValueOp_t, typename PointerOp_t> struct StoreClass_match {
-  ValueOp_t ValueOp;
-  PointerOp_t PointerOp;
-
-  StoreClass_match(const ValueOp_t &ValueOpMatch,
-                   const PointerOp_t &PointerOpMatch) :
-    ValueOp(ValueOpMatch), PointerOp(PointerOpMatch)  {}
-
-  template <typename OpTy> bool match(OpTy *V) {
-    if (auto *LI = dyn_cast<StoreInst>(V))
-      return ValueOp.match(LI->getValueOperand()) &&
-             PointerOp.match(LI->getPointerOperand());
-    return false;
-  }
-};
-
-/// Matches StoreInst.
-template <typename ValueOpTy, typename PointerOpTy>
-inline StoreClass_match<ValueOpTy, PointerOpTy>
-m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
-  return StoreClass_match<ValueOpTy, PointerOpTy>(ValueOp, PointerOp);
-}
-
-//===----------------------------------------------------------------------===//
 // Matchers for control flow.
 //
 
@@ -1507,8 +1485,10 @@ template <typename Opnd_t> struct Argument_match {
   Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
 
   template <typename OpTy> bool match(OpTy *V) {
-    CallSite CS(V);
-    return CS.isCall() && Val.match(CS.getArgument(OpI));
+    // FIXME: Should likely be switched to use `CallBase`.
+    if (const auto *CI = dyn_cast<CallInst>(V))
+      return Val.match(CI->getArgOperand(OpI));
+    return false;
   }
 };
 
diff --git a/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def b/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def
index 7ed90d959f01..89005120cdc1 100644
--- a/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/contrib/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -83,6 +83,9 @@ HANDLE_LIBCALL(UDIVREM_I64, nullptr)
 HANDLE_LIBCALL(UDIVREM_I128, nullptr)
 HANDLE_LIBCALL(NEG_I32, "__negsi2")
 HANDLE_LIBCALL(NEG_I64, "__negdi2")
+HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
+HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
+HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
 
 // Floating-point
 HANDLE_LIBCALL(ADD_F32, "__addsf3")
@@ -125,6 +128,11 @@ HANDLE_LIBCALL(SQRT_F64, "sqrt")
 HANDLE_LIBCALL(SQRT_F80, "sqrtl")
 HANDLE_LIBCALL(SQRT_F128, "sqrtl")
 HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(CBRT_F32, "cbrtf")
+HANDLE_LIBCALL(CBRT_F64, "cbrt")
+HANDLE_LIBCALL(CBRT_F80, "cbrtl")
+HANDLE_LIBCALL(CBRT_F128, "cbrtl")
+HANDLE_LIBCALL(CBRT_PPCF128, "cbrtl")
 HANDLE_LIBCALL(LOG_F32, "logf")
 HANDLE_LIBCALL(LOG_F64, "log")
 HANDLE_LIBCALL(LOG_F80, "logl")
diff --git a/contrib/llvm/include/llvm/IR/TypeBuilder.h b/contrib/llvm/include/llvm/IR/TypeBuilder.h
deleted file mode 100644
index d2c6f00079da..000000000000
--- a/contrib/llvm/include/llvm/IR/TypeBuilder.h
+++ /dev/null
@@ -1,407 +0,0 @@
-//===---- llvm/TypeBuilder.h - Builder for LLVM types -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the TypeBuilder class, which is used as a convenient way to
-// create LLVM types with a consistent and simplified interface.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_TYPEBUILDER_H
-#define LLVM_IR_TYPEBUILDER_H
-
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include <climits>
-
-namespace llvm {
-
-/// TypeBuilder - This provides a uniform API for looking up types
-/// known at compile time.  To support cross-compilation, we define a
-/// series of tag types in the llvm::types namespace, like i<N>,
-/// ieee_float, ppc_fp128, etc.  TypeBuilder<T, false> allows T to be
-/// any of these, a native C type (whose size may depend on the host
-/// compiler), or a pointer, function, or struct type built out of
-/// these.  TypeBuilder<T, true> removes native C types from this set
-/// to guarantee that its result is suitable for cross-compilation.
-/// We define the primitive types, pointer types, and functions up to
-/// 5 arguments here, but to use this class with your own types,
-/// you'll need to specialize it.  For example, say you want to call a
-/// function defined externally as:
-///
-/// \code{.cpp}
-///
-///   struct MyType {
-///     int32 a;
-///     int32 *b;
-///     void *array[1];  // Intended as a flexible array.
-///   };
-///   int8 AFunction(struct MyType *value);
-///
-/// \endcode
-///
-/// You'll want to use
-///   Function::Create(TypeBuilder<types::i<8>(MyType*), true>::get(), ...)
-/// to declare the function, but when you first try this, your compiler will
-/// complain that TypeBuilder<MyType, true>::get() doesn't exist. To fix this,
-/// write:
-///
-/// \code{.cpp}
-///
-///   namespace llvm {
-///   template<bool xcompile> class TypeBuilder<MyType, xcompile> {
-///   public:
-///     static StructType *get(LLVMContext &Context) {
-///       // If you cache this result, be sure to cache it separately
-///       // for each LLVMContext.
-///       return StructType::get(
-///         TypeBuilder<types::i<32>, xcompile>::get(Context),
-///         TypeBuilder<types::i<32>*, xcompile>::get(Context),
-///         TypeBuilder<types::i<8>*[], xcompile>::get(Context),
-///         nullptr);
-///     }
-///
-///     // You may find this a convenient place to put some constants
-///     // to help with getelementptr.  They don't have any effect on
-///     // the operation of TypeBuilder.
-///     enum Fields {
-///       FIELD_A,
-///       FIELD_B,
-///       FIELD_ARRAY
-///     };
-///   }
-///   }  // namespace llvm
-///
-/// \endcode
-///
-/// TypeBuilder cannot handle recursive types or types you only know at runtime.
-/// If you try to give it a recursive type, it will deadlock, infinitely
-/// recurse, or do something similarly undesirable.
-template<typename T, bool cross_compilable> class TypeBuilder {};
-
-// Types for use with cross-compilable TypeBuilders.  These correspond
-// exactly with an LLVM-native type.
-namespace types {
-/// i<N> corresponds to the LLVM IntegerType with N bits.
-template<uint32_t num_bits> class i {};
-
-// The following classes represent the LLVM floating types.
-class ieee_float {};
-class ieee_double {};
-class x86_fp80 {};
-class fp128 {};
-class ppc_fp128 {};
-// X86 MMX.
-class x86_mmx {};
-}  // namespace types
-
-// LLVM doesn't have const or volatile types.
-template<typename T, bool cross> class TypeBuilder<const T, cross>
-  : public TypeBuilder<T, cross> {};
-template<typename T, bool cross> class TypeBuilder<volatile T, cross>
-  : public TypeBuilder<T, cross> {};
-template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
-  : public TypeBuilder<T, cross> {};
-
-// Pointers
-template<typename T, bool cross> class TypeBuilder<T*, cross> {
-public:
-  static PointerType *get(LLVMContext &Context) {
-    return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
-  }
-};
-
-/// There is no support for references
-template<typename T, bool cross> class TypeBuilder<T&, cross> {};
-
-// Arrays
-template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
-public:
-  static ArrayType *get(LLVMContext &Context) {
-    return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
-  }
-};
-/// LLVM uses an array of length 0 to represent an unknown-length array.
-template<typename T, bool cross> class TypeBuilder<T[], cross> {
-public:
-  static ArrayType *get(LLVMContext &Context) {
-    return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
-  }
-};
-
-// Define the C integral types only for TypeBuilder<T, false>.
-//
-// C integral types do not have a defined size. It would be nice to use the
-// stdint.h-defined typedefs that do have defined sizes, but we'd run into the
-// following problem:
-//
-// On an ILP32 machine, stdint.h might define:
-//
-//   typedef int int32_t;
-//   typedef long long int64_t;
-//   typedef long size_t;
-//
-// If we defined TypeBuilder<int32_t> and TypeBuilder<int64_t>, then any use of
-// TypeBuilder<size_t> would fail.  We couldn't define TypeBuilder<size_t> in
-// addition to the defined-size types because we'd get duplicate definitions on
-// platforms where stdint.h instead defines:
-//
-//   typedef int int32_t;
-//   typedef long long int64_t;
-//   typedef int size_t;
-//
-// So we define all the primitive C types and nothing else.
-#define DEFINE_INTEGRAL_TYPEBUILDER(T) \
-template<> class TypeBuilder<T, false> { \
-public: \
-  static IntegerType *get(LLVMContext &Context) { \
-    return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
-  } \
-}; \
-template<> class TypeBuilder<T, true> { \
-  /* We provide a definition here so users don't accidentally */ \
-  /* define these types to work. */ \
-}
-DEFINE_INTEGRAL_TYPEBUILDER(char);
-DEFINE_INTEGRAL_TYPEBUILDER(signed char);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned char);
-DEFINE_INTEGRAL_TYPEBUILDER(short);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned short);
-DEFINE_INTEGRAL_TYPEBUILDER(int);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned int);
-DEFINE_INTEGRAL_TYPEBUILDER(long);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned long);
-#ifdef _MSC_VER
-DEFINE_INTEGRAL_TYPEBUILDER(__int64);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned __int64);
-#else /* _MSC_VER */
-DEFINE_INTEGRAL_TYPEBUILDER(long long);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
-#endif /* _MSC_VER */
-#undef DEFINE_INTEGRAL_TYPEBUILDER
-
-template<uint32_t num_bits, bool cross>
-class TypeBuilder<types::i<num_bits>, cross> {
-public:
-  static IntegerType *get(LLVMContext &C) {
-    return IntegerType::get(C, num_bits);
-  }
-};
-
-template<> class TypeBuilder<float, false> {
-public:
-  static Type *get(LLVMContext& C) {
-    return Type::getFloatTy(C);
-  }
-};
-template<> class TypeBuilder<float, true> {};
-
-template<> class TypeBuilder<double, false> {
-public:
-  static Type *get(LLVMContext& C) {
-    return Type::getDoubleTy(C);
-  }
-};
-template<> class TypeBuilder<double, true> {};
-
-template<bool cross> class TypeBuilder<types::ieee_float, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
-};
-template<bool cross> class TypeBuilder<types::ieee_double, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
-};
-template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::fp128, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
-};
-
-template<bool cross> class TypeBuilder<void, cross> {
-public:
-  static Type *get(LLVMContext &C) {
-    return Type::getVoidTy(C);
-  }
-};
-
-/// void* is disallowed in LLVM types, but it occurs often enough in C code that
-/// we special case it.
-template<> class TypeBuilder<void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<const void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<volatile void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<const volatile void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-
-template<typename R, bool cross> class TypeBuilder<R(), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
-  }
-};
-template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-template<typename R, typename A1, typename A2, bool cross>
-class TypeBuilder<R(A1, A2), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-template<typename R, typename A1, typename A2, typename A3, bool cross>
-class TypeBuilder<R(A1, A2, A3), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         bool cross>
-class TypeBuilder<R(A1, A2, A3, A4), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         typename A5, bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-      TypeBuilder<A5, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-
-template<typename R, bool cross> class TypeBuilder<R(...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
-  }
-};
-template<typename R, typename A1, bool cross>
-class TypeBuilder<R(A1, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
-  }
-};
-template<typename R, typename A1, typename A2, bool cross>
-class TypeBuilder<R(A1, A2, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                                   params, true);
-  }
-};
-template<typename R, typename A1, typename A2, typename A3, bool cross>
-class TypeBuilder<R(A1, A2, A3, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                                   params, true);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, true);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         typename A5, bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-      TypeBuilder<A5, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                                   params, true);
-  }
-};
-
-}  // namespace llvm
-
-#endif
diff --git a/contrib/llvm/include/llvm/IR/Value.h b/contrib/llvm/include/llvm/IR/Value.h
index f396db995ab0..4f3a45c684fc 100644
--- a/contrib/llvm/include/llvm/IR/Value.h
+++ b/contrib/llvm/include/llvm/IR/Value.h
@@ -254,7 +254,8 @@ public:
 
 private:
   void destroyValueName();
-  void doRAUW(Value *New, bool NoMetadata);
+  enum class ReplaceMetadataUses { No, Yes };
+  void doRAUW(Value *New, ReplaceMetadataUses);
   void setNameImpl(const Twine &Name);
 
 public:
diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h
index d67b1d48f274..037c0dbb56ec 100644
--- a/contrib/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm/include/llvm/InitializePasses.h
@@ -85,6 +85,7 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
 void initializeBranchRelaxationPass(PassRegistry&);
 void initializeBreakCriticalEdgesPass(PassRegistry&);
 void initializeBreakFalseDepsPass(PassRegistry&);
+void initializeCanonicalizeAliasesLegacyPassPass(PassRegistry &);
 void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&);
 void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&);
 void initializeCFGPrinterLegacyPassPass(PassRegistry&);
@@ -103,6 +104,7 @@ void initializeCodeGenPreparePass(PassRegistry&);
 void initializeConstantHoistingLegacyPassPass(PassRegistry&);
 void initializeConstantMergeLegacyPassPass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
+void initializeControlHeightReductionLegacyPassPass(PassRegistry&);
 void initializeCorrelatedValuePropagationPass(PassRegistry&);
 void initializeCostModelAnalysisPass(PassRegistry&);
 void initializeCrossDSOCFIPass(PassRegistry&);
@@ -119,7 +121,6 @@ void initializeDependenceAnalysisPass(PassRegistry&);
 void initializeDependenceAnalysisWrapperPassPass(PassRegistry&);
 void initializeDetectDeadLanesPass(PassRegistry&);
 void initializeDivRemPairsLegacyPassPass(PassRegistry&);
-void initializeDivergenceAnalysisPass(PassRegistry&);
 void initializeDomOnlyPrinterPass(PassRegistry&);
 void initializeDomOnlyViewerPass(PassRegistry&);
 void initializeDomPrinterPass(PassRegistry&);
@@ -140,6 +141,7 @@ void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);
+void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
 void initializeExternalAAWrapperPassPass(PassRegistry&);
 void initializeFEntryInserterPass(PassRegistry&);
 void initializeFinalizeMachineBundlesPass(PassRegistry&);
@@ -161,6 +163,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
 void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
+void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
 void initializeHWAddressSanitizerPass(PassRegistry&);
 void initializeIPCPPass(PassRegistry&);
 void initializeIPSCCPLegacyPassPass(PassRegistry&);
@@ -181,6 +184,7 @@ void initializeInstrProfilingLegacyPassPass(PassRegistry&);
 void initializeInstructionCombiningPassPass(PassRegistry&);
 void initializeInstructionSelectPass(PassRegistry&);
 void initializeInterleavedAccessPass(PassRegistry&);
+void initializeInterleavedLoadCombinePass(PassRegistry &);
 void initializeInternalizeLegacyPassPass(PassRegistry&);
 void initializeIntervalPartitionPass(PassRegistry&);
 void initializeJumpThreadingPass(PassRegistry&);
@@ -191,9 +195,11 @@ void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&);
 void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&);
 void initializeLazyValueInfoPrinterPass(PassRegistry&);
 void initializeLazyValueInfoWrapperPassPass(PassRegistry&);
+void initializeLegacyDivergenceAnalysisPass(PassRegistry&);
 void initializeLegacyLICMPassPass(PassRegistry&);
 void initializeLegacyLoopSinkPassPass(PassRegistry&);
 void initializeLegalizerPass(PassRegistry&);
+void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &);
 void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
 void initializeLintPass(PassRegistry&);
 void initializeLiveDebugValuesPass(PassRegistry&);
@@ -203,7 +209,7 @@ void initializeLiveRangeShrinkPass(PassRegistry&);
 void initializeLiveRegMatrixPass(PassRegistry&);
 void initializeLiveStacksPass(PassRegistry&);
 void initializeLiveVariablesPass(PassRegistry&);
-void initializeLoadStoreVectorizerPass(PassRegistry&);
+void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&);
 void initializeLoaderPassPass(PassRegistry&);
 void initializeLocalStackSlotPassPass(PassRegistry&);
 void initializeLocalizerPass(PassRegistry&);
@@ -269,7 +275,7 @@ void initializeMemDerefPrinterPass(PassRegistry&);
 void initializeMemoryDependenceWrapperPassPass(PassRegistry&);
 void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
 void initializeMemorySSAWrapperPassPass(PassRegistry&);
-void initializeMemorySanitizerPass(PassRegistry&);
+void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
 void initializeMergeFunctionsPass(PassRegistry&);
 void initializeMergeICmpsPass(PassRegistry&);
 void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
@@ -352,7 +358,7 @@ void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
 void initializeSanitizerCoverageModulePass(PassRegistry&);
 void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
 void initializeScalarizeMaskedMemIntrinPass(PassRegistry&);
-void initializeScalarizerPass(PassRegistry&);
+void initializeScalarizerLegacyPassPass(PassRegistry&);
 void initializeScavengerTestPass(PassRegistry&);
 void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
 void initializeSeparateConstOffsetFromGEPPass(PassRegistry&);
@@ -369,6 +375,8 @@ void initializeSpillPlacementPass(PassRegistry&);
 void initializeStackColoringPass(PassRegistry&);
 void initializeStackMapLivenessPass(PassRegistry&);
 void initializeStackProtectorPass(PassRegistry&);
+void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &);
+void initializeStackSafetyInfoWrapperPassPass(PassRegistry &);
 void initializeStackSlotColoringPass(PassRegistry&);
 void initializeStraightLineStrengthReducePass(PassRegistry&);
 void initializeStripDeadDebugInfoPass(PassRegistry&);
@@ -384,7 +392,7 @@ void initializeTailDuplicatePass(PassRegistry&);
 void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
 void initializeTargetPassConfigPass(PassRegistry&);
 void initializeTargetTransformInfoWrapperPassPass(PassRegistry&);
-void initializeThreadSanitizerPass(PassRegistry&);
+void initializeThreadSanitizerLegacyPassPass(PassRegistry&);
 void initializeTwoAddressInstructionPassPass(PassRegistry&);
 void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
 void initializeUnifyFunctionExitNodesPass(PassRegistry&);
@@ -394,6 +402,7 @@ void initializeUnreachableMachineBlockElimPass(PassRegistry&);
 void initializeVerifierLegacyPassPass(PassRegistry&);
 void initializeVirtRegMapPass(PassRegistry&);
 void initializeVirtRegRewriterPass(PassRegistry&);
+void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry&);
 void initializeWholeProgramDevirtPass(PassRegistry&);
 void initializeWinEHPreparePass(PassRegistry&);
diff --git a/contrib/llvm/include/llvm/LTO/Config.h b/contrib/llvm/include/llvm/LTO/Config.h
index 57bba5e34840..7058602c3ee2 100644
--- a/contrib/llvm/include/llvm/LTO/Config.h
+++ b/contrib/llvm/include/llvm/LTO/Config.h
@@ -49,6 +49,10 @@ struct Config {
   /// Use the new pass manager
   bool UseNewPM = false;
 
+  /// Flag to indicate that the optimizer should not assume builtins are present
+  /// on the target.
+  bool Freestanding = false;
+
   /// Disable entirely the optimizer, including importing for ThinLTO
   bool CodeGenOnly = false;
 
@@ -73,6 +77,9 @@ struct Config {
   /// Sample PGO profile path.
   std::string SampleProfile;
 
+  /// Name remapping file for profile data.
+  std::string ProfileRemapping;
+
   /// The directory to store .dwo files.
   std::string DwoDir;
 
diff --git a/contrib/llvm/include/llvm/LTO/LTO.h b/contrib/llvm/include/llvm/LTO/LTO.h
index 7d6beab6b441..534d9b6f3f2a 100644
--- a/contrib/llvm/include/llvm/LTO/LTO.h
+++ b/contrib/llvm/include/llvm/LTO/LTO.h
@@ -40,13 +40,13 @@ class Module;
 class Target;
 class raw_pwrite_stream;
 
-/// Resolve Weak and LinkOnce values in the \p Index. Linkage changes recorded
-/// in the index and the ThinLTO backends must apply the changes to the Module
-/// via thinLTOResolveWeakForLinkerModule.
+/// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
+/// recorded in the index and the ThinLTO backends must apply the changes to
+/// the module via thinLTOResolvePrevailingInModule.
 ///
 /// This is done for correctness (if value exported, ensure we always
 /// emit a copy), and compile-time optimization (allow drop of duplicates).
-void thinLTOResolveWeakForLinkerInIndex(
+void thinLTOResolvePrevailingInIndex(
     ModuleSummaryIndex &Index,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
@@ -60,6 +60,19 @@ void thinLTOInternalizeAndPromoteInIndex(
     ModuleSummaryIndex &Index,
     function_ref<bool(StringRef, GlobalValue::GUID)> isExported);
 
+/// Computes a unique hash for the Module considering the current list of
+/// export/import and other global analysis results.
+/// The hash is produced in \p Key.
+void computeLTOCacheKey(
+    SmallString<40> &Key, const lto::Config &Conf,
+    const ModuleSummaryIndex &Index, StringRef ModuleID,
+    const FunctionImporter::ImportMapTy &ImportList,
+    const FunctionImporter::ExportSetTy &ExportList,
+    const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+    const GVSummaryMapTy &DefinedGlobals,
+    const std::set<GlobalValue::GUID> &CfiFunctionDefs = {},
+    const std::set<GlobalValue::GUID> &CfiFunctionDecls = {});
+
 namespace lto {
 
 /// Given the original \p Path to an output file, replace any path
@@ -387,6 +400,9 @@ private:
   Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache);
 
   mutable bool CalledGetMaxTasks = false;
+
+  // Use Optional to distinguish false from not yet initialized.
+  Optional<bool> EnableSplitLTOUnit;
 };
 
 /// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
new file mode 100644
index 000000000000..ad3a8e7dc77b
--- /dev/null
+++ b/contrib/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -0,0 +1,17 @@
+//=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
+namespace llvm {
+class ModuleSummaryIndex;
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index f48ab02863a5..8f23b7cb4574 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -48,6 +48,9 @@
 #include <string>
 #include <vector>
 
+/// Enable global value internalization in LTO.
+extern llvm::cl::opt<bool> EnableLTOInternalization;
+
 namespace llvm {
 template <typename T> class ArrayRef;
   class LLVMContext;
@@ -233,7 +236,7 @@ private:
   unsigned OptLevel = 2;
   lto_diagnostic_handler_t DiagHandler = nullptr;
   void *DiagContext = nullptr;
-  bool ShouldInternalize = true;
+  bool ShouldInternalize = EnableLTOInternalization;
   bool ShouldEmbedUselists = false;
   bool ShouldRestoreGlobalsLinkage = false;
   TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
diff --git a/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index b32a972542c8..d4c69a1ce260 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -187,7 +187,7 @@ public:
   /// Cache policy: the maximum size for the cache directory in bytes. A value
   /// over the amount of available space on the disk will be reduced to the
   /// amount of available space. A value of 0 will be ignored.
-  void setCacheMaxSizeBytes(unsigned MaxSizeBytes) {
+  void setCacheMaxSizeBytes(uint64_t MaxSizeBytes) {
     if (MaxSizeBytes)
       CacheOptions.Policy.MaxSizeBytes = MaxSizeBytes;
   }
@@ -273,8 +273,8 @@ public:
   /**
    * Compute and emit the imported files for module at \p ModulePath.
    */
-  static void emitImports(StringRef ModulePath, StringRef OutputName,
-                          ModuleSummaryIndex &Index);
+  void emitImports(Module &Module, StringRef OutputName,
+                   ModuleSummaryIndex &Index);
 
   /**
    * Perform cross-module importing for the module identified by
@@ -285,8 +285,8 @@ public:
   /**
    * Compute the list of summaries needed for importing into module.
    */
-  static void gatherImportedSummariesForModule(
-      StringRef ModulePath, ModuleSummaryIndex &Index,
+  void gatherImportedSummariesForModule(
+      Module &Module, ModuleSummaryIndex &Index,
       std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
 
   /**
@@ -299,11 +299,6 @@ public:
    */
   void optimize(Module &Module);
 
-  /**
-   * Perform ThinLTO CodeGen.
-   */
-  std::unique_ptr<MemoryBuffer> codegen(Module &Module);
-
   /**@}*/
 
 private:
diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h
index bd432c58b613..0851c2f8d265 100644
--- a/contrib/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm/include/llvm/LinkAllPasses.h
@@ -50,6 +50,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
@@ -88,13 +89,13 @@ namespace {
       (void) llvm::createCalledValuePropagationPass();
       (void) llvm::createConstantMergePass();
       (void) llvm::createConstantPropagationPass();
+      (void) llvm::createControlHeightReductionLegacyPass();
       (void) llvm::createCostModelAnalysisPass();
       (void) llvm::createDeadArgEliminationPass();
       (void) llvm::createDeadCodeEliminationPass();
       (void) llvm::createDeadInstEliminationPass();
       (void) llvm::createDeadStoreEliminationPass();
       (void) llvm::createDependenceAnalysisWrapperPass();
-      (void) llvm::createDivergenceAnalysisPass();
       (void) llvm::createDomOnlyPrinterPass();
       (void) llvm::createDomPrinterPass();
       (void) llvm::createDomOnlyViewerPass();
@@ -121,6 +122,7 @@ namespace {
       (void) llvm::createInstructionCombiningPass();
       (void) llvm::createInternalizePass();
       (void) llvm::createLCSSAPass();
+      (void) llvm::createLegacyDivergenceAnalysisPass();
       (void) llvm::createLICMPass();
       (void) llvm::createLoopSinkPass();
       (void) llvm::createLazyValueInfoPass();
@@ -218,6 +220,7 @@ namespace {
       (void) llvm::createFloat2IntPass();
       (void) llvm::createEliminateAvailableExternallyPass();
       (void) llvm::createScalarizeMaskedMemIntrinPass();
+      (void) llvm::createWarnMissedTransformationsPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::ScalarEvolutionWrapperPass();
@@ -227,7 +230,8 @@ namespace {
       llvm::TargetLibraryInfo TLI(TLII);
       llvm::AliasAnalysis AA(TLI);
       llvm::AliasSetTracker X(AA);
-      X.add(nullptr, 0, llvm::AAMDNodes()); // for -print-alias-sets
+      X.add(nullptr, llvm::LocationSize::unknown(),
+            llvm::AAMDNodes()); // for -print-alias-sets
       (void) llvm::AreStatisticsEnabled();
       (void) llvm::sys::RunningOnValgrind();
     }
diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h b/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h
index bc46cfdf4c4c..71c6ee28df70 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfoWasm.h
@@ -19,6 +19,6 @@ class MCAsmInfoWasm : public MCAsmInfo {
 protected:
   MCAsmInfoWasm();
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCAsmMacro.h b/contrib/llvm/include/llvm/MC/MCAsmMacro.h
index 09b32c7ea333..135fa4f2e33d 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmMacro.h
@@ -52,7 +52,7 @@ public:
     Pipe, PipePipe, Caret,
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     Less, LessEqual, LessLess, LessGreater,
-    Greater, GreaterEqual, GreaterGreater, At,
+    Greater, GreaterEqual, GreaterGreater, At, MinusGreater,
 
     // MIPS unary expression operators such as %neg.
     PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
diff --git a/contrib/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm/include/llvm/MC/MCAssembler.h
index 0f9499d705e4..986c6e17548f 100644
--- a/contrib/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm/include/llvm/MC/MCAssembler.h
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCFragment.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/VersionTuple.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -94,6 +95,8 @@ public:
     unsigned Major;
     unsigned Minor;
     unsigned Update;
+    /// An optional version of the SDK that was used to build the source.
+    VersionTuple SDKVersion;
   };
 
 private:
@@ -255,20 +258,24 @@ public:
   /// MachO deployment target version information.
   const VersionInfoType &getVersionInfo() const { return VersionInfo; }
   void setVersionMin(MCVersionMinType Type, unsigned Major, unsigned Minor,
-                     unsigned Update) {
+                     unsigned Update,
+                     VersionTuple SDKVersion = VersionTuple()) {
     VersionInfo.EmitBuildVersion = false;
     VersionInfo.TypeOrPlatform.Type = Type;
     VersionInfo.Major = Major;
     VersionInfo.Minor = Minor;
     VersionInfo.Update = Update;
+    VersionInfo.SDKVersion = SDKVersion;
   }
   void setBuildVersion(MachO::PlatformType Platform, unsigned Major,
-                       unsigned Minor, unsigned Update) {
+                       unsigned Minor, unsigned Update,
+                       VersionTuple SDKVersion = VersionTuple()) {
     VersionInfo.EmitBuildVersion = true;
     VersionInfo.TypeOrPlatform.Platform = Platform;
     VersionInfo.Major = Major;
     VersionInfo.Minor = Minor;
     VersionInfo.Update = Update;
+    VersionInfo.SDKVersion = SDKVersion;
   }
 
   /// Reuse an assembler instance
diff --git a/contrib/llvm/include/llvm/MC/MCCodeView.h b/contrib/llvm/include/llvm/MC/MCCodeView.h
index 1d9e3c6698cf..cef03a409f95 100644
--- a/contrib/llvm/include/llvm/MC/MCCodeView.h
+++ b/contrib/llvm/include/llvm/MC/MCCodeView.h
@@ -30,6 +30,7 @@ class CodeViewContext;
 /// Instances of this class represent the information from a
 /// .cv_loc directive.
 class MCCVLoc {
+  const MCSymbol *Label = nullptr;
   uint32_t FunctionId;
   uint32_t FileNum;
   uint32_t Line;
@@ -39,15 +40,17 @@ class MCCVLoc {
 
 private: // CodeViewContext manages these
   friend class CodeViewContext;
-  MCCVLoc(unsigned functionid, unsigned fileNum, unsigned line, unsigned column,
-          bool prologueend, bool isstmt)
-      : FunctionId(functionid), FileNum(fileNum), Line(line), Column(column),
-        PrologueEnd(prologueend), IsStmt(isstmt) {}
+  MCCVLoc(const MCSymbol *Label, unsigned functionid, unsigned fileNum,
+          unsigned line, unsigned column, bool prologueend, bool isstmt)
+      : Label(Label), FunctionId(functionid), FileNum(fileNum), Line(line),
+        Column(column), PrologueEnd(prologueend), IsStmt(isstmt) {}
 
   // Allow the default copy constructor and assignment operator to be used
   // for an MCCVLoc object.
 
 public:
+  const MCSymbol *getLabel() const { return Label; }
+
   unsigned getFunctionId() const { return FunctionId; }
 
   /// Get the FileNum of this MCCVLoc.
@@ -62,6 +65,8 @@ public:
   bool isPrologueEnd() const { return PrologueEnd; }
   bool isStmt() const { return IsStmt; }
 
+  void setLabel(const MCSymbol *L) { Label = L; }
+
   void setFunctionId(unsigned FID) { FunctionId = FID; }
 
   /// Set the FileNum of this MCCVLoc.
@@ -80,31 +85,6 @@ public:
   void setIsStmt(bool IS) { IsStmt = IS; }
 };
 
-/// Instances of this class represent the line information for
-/// the CodeView line table entries.  Which is created after a machine
-/// instruction is assembled and uses an address from a temporary label
-/// created at the current address in the current section and the info from
-/// the last .cv_loc directive seen as stored in the context.
-class MCCVLineEntry : public MCCVLoc {
-  const MCSymbol *Label;
-
-private:
-  // Allow the default copy constructor and assignment operator to be used
-  // for an MCCVLineEntry object.
-
-public:
-  // Constructor to create an MCCVLineEntry given a symbol and the dwarf loc.
-  MCCVLineEntry(const MCSymbol *Label, const MCCVLoc loc)
-      : MCCVLoc(loc), Label(Label) {}
-
-  const MCSymbol *getLabel() const { return Label; }
-
-  // This is called when an instruction is assembled into the specified
-  // section and if there is information from the last .cv_loc directive that
-  // has yet to have a line entry made for it is made.
-  static void Make(MCObjectStreamer *MCOS);
-};
-
 /// Information describing a function or inlined call site introduced by
 /// .cv_func_id or .cv_inline_site_id. Accumulates information from .cv_loc
 /// directives used with this function's id or the id of an inlined call site
@@ -183,32 +163,20 @@ public:
   /// and sets CVLocSeen.  When the next instruction is assembled an entry
   /// in the line number table with this information and the address of the
   /// instruction will be created.
-  void setCurrentCVLoc(unsigned FunctionId, unsigned FileNo, unsigned Line,
-                       unsigned Column, bool PrologueEnd, bool IsStmt) {
-    CurrentCVLoc.setFunctionId(FunctionId);
-    CurrentCVLoc.setFileNum(FileNo);
-    CurrentCVLoc.setLine(Line);
-    CurrentCVLoc.setColumn(Column);
-    CurrentCVLoc.setPrologueEnd(PrologueEnd);
-    CurrentCVLoc.setIsStmt(IsStmt);
-    CVLocSeen = true;
-  }
-
-  bool getCVLocSeen() { return CVLocSeen; }
-  void clearCVLocSeen() { CVLocSeen = false; }
-
-  const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
+  void recordCVLoc(MCContext &Ctx, const MCSymbol *Label, unsigned FunctionId,
+                   unsigned FileNo, unsigned Line, unsigned Column,
+                   bool PrologueEnd, bool IsStmt);
 
   bool isValidCVFileNumber(unsigned FileNumber);
 
   /// Add a line entry.
-  void addLineEntry(const MCCVLineEntry &LineEntry);
+  void addLineEntry(const MCCVLoc &LineEntry);
 
-  std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId);
+  std::vector<MCCVLoc> getFunctionLineEntries(unsigned FuncId);
 
   std::pair<size_t, size_t> getLineExtent(unsigned FuncId);
 
-  ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R);
+  ArrayRef<MCCVLoc> getLinesForExtent(size_t L, size_t R);
 
   /// Emits a line table substream.
   void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId,
@@ -226,7 +194,7 @@ public:
   void encodeInlineLineTable(MCAsmLayout &Layout,
                              MCCVInlineLineTableFragment &F);
 
-  void
+  MCFragment *
   emitDefRange(MCObjectStreamer &OS,
                ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
                StringRef FixedSizePortion);
@@ -247,10 +215,6 @@ public:
   std::pair<StringRef, unsigned> addToStringTable(StringRef S);
 
 private:
-  /// The current CodeView line information from the last .cv_loc directive.
-  MCCVLoc CurrentCVLoc = MCCVLoc(0, 0, 0, 0, false, true);
-  bool CVLocSeen = false;
-
   /// Map from string to string table offset.
   StringMap<unsigned> StringTable;
 
@@ -286,8 +250,8 @@ private:
   /// id.
   std::map<unsigned, std::pair<size_t, size_t>> MCCVLineStartStop;
 
-  /// A collection of MCCVLineEntry for each section.
-  std::vector<MCCVLineEntry> MCCVLines;
+  /// A collection of MCCVLoc for each section.
+  std::vector<MCCVLoc> MCCVLines;
 
   /// All known functions and inlined call sites, indexed by function id.
   std::vector<MCCVFunctionInfo> Functions;
diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h
index a712e2d95cbc..3b8ac8b79e21 100644
--- a/contrib/llvm/include/llvm/MC/MCContext.h
+++ b/contrib/llvm/include/llvm/MC/MCContext.h
@@ -298,10 +298,6 @@ namespace llvm {
 
     CodeViewContext &getCVContext();
 
-    /// Clear the current cv_loc, if there is one. Avoids lazily creating a
-    /// CodeViewContext if none is needed.
-    void clearCVLocSeen();
-
     void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
     void setUseNamesOnTempLabels(bool Value) { UseNamesOnTempLabels = Value; }
 
diff --git a/contrib/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm/include/llvm/MC/MCDwarf.h
index 2bfaf19cf2c6..7b96e9aaca89 100644
--- a/contrib/llvm/include/llvm/MC/MCDwarf.h
+++ b/contrib/llvm/include/llvm/MC/MCDwarf.h
@@ -430,6 +430,7 @@ public:
     OpUndefined,
     OpRegister,
     OpWindowSave,
+    OpNegateRAState,
     OpGnuArgsSize
   };
 
@@ -509,6 +510,11 @@ public:
     return MCCFIInstruction(OpWindowSave, L, 0, 0, "");
   }
 
+  /// .cfi_negate_ra_state AArch64 negate RA state.
+  static MCCFIInstruction createNegateRAState(MCSymbol *L) {
+    return MCCFIInstruction(OpNegateRAState, L, 0, 0, "");
+  }
+
   /// .cfi_restore says that the rule for Register is now the same as it
   /// was at the beginning of the function, after all initial instructions added
   /// by .cfi_startproc were executed.
@@ -593,6 +599,7 @@ struct MCDwarfFrameInfo {
   bool IsSignalFrame = false;
   bool IsSimple = false;
   unsigned RAReg = static_cast<unsigned>(INT_MAX);
+  bool IsBKeyFrame = false;
 };
 
 class MCDwarfFrameEmitter {
diff --git a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
index bff58fef6af9..f226d6a45a5a 100644
--- a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
@@ -73,6 +74,8 @@ public:
     switch (OSType) {
       case Triple::CloudABI:
         return ELF::ELFOSABI_CLOUDABI;
+      case Triple::HermitCore:
+        return ELF::ELFOSABI_STANDALONE;
       case Triple::PS4:
       case Triple::FreeBSD:
         return ELF::ELFOSABI_FREEBSD;
@@ -90,6 +93,8 @@ public:
   virtual void sortRelocs(const MCAssembler &Asm,
                           std::vector<ELFRelocationEntry> &Relocs);
 
+  virtual void addTargetSectionFlags(MCContext &Ctx, MCSectionELF &Sec);
+
   /// \name Accessors
   /// @{
   uint8_t getOSABI() const { return OSABI; }
diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h
index 3fd58a169d4b..8cb6b86fd672 100644
--- a/contrib/llvm/include/llvm/MC/MCExpr.h
+++ b/contrib/llvm/include/llvm/MC/MCExpr.h
@@ -286,7 +286,9 @@ public:
     VK_Hexagon_IE_GOT,
 
     VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
+    VK_WebAssembly_GLOBAL,   // Global object index
     VK_WebAssembly_TYPEINDEX,// Type table index
+    VK_WebAssembly_EVENT,    // Event index
 
     VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
     VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
diff --git a/contrib/llvm/include/llvm/MC/MCInst.h b/contrib/llvm/include/llvm/MC/MCInst.h
index 67bb11a70387..d501b686bb2e 100644
--- a/contrib/llvm/include/llvm/MC/MCInst.h
+++ b/contrib/llvm/include/llvm/MC/MCInst.h
@@ -208,6 +208,8 @@ public:
   /// string.
   void dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer = nullptr,
                    StringRef Separator = " ") const;
+  void dump_pretty(raw_ostream &OS, StringRef Name,
+                   StringRef Separator = " ") const;
 };
 
 inline raw_ostream& operator<<(raw_ostream &OS, const MCOperand &MO) {
diff --git a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
index e1673208d875..200f10f7d64b 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -23,6 +23,7 @@
 namespace llvm {
 
 class MCRegisterInfo;
+class Triple;
 
 class MCInstrAnalysis {
 protected:
@@ -87,24 +88,77 @@ public:
                                     const MCInst &Inst,
                                     APInt &Writes) const;
 
-  /// Returns true if \param Inst is a dependency breaking instruction for the
-  /// given subtarget.
+  /// Returns true if MI is a dependency breaking zero-idiom for the given
+  /// subtarget.
+  ///
+  /// Mask is used to identify input operands that have their dependency
+  /// broken. Each bit of the mask is associated with a specific input operand.
+  /// Bits associated with explicit input operands are laid out first in the
+  /// mask; implicit operands come after explicit operands.
+  /// 
+  /// Dependencies are broken only for operands that have their corresponding bit
+  /// set. Operands that have their bit cleared, or that don't have a
+  /// corresponding bit in the mask don't have their dependency broken.  Note
+  /// that Mask may not be big enough to describe all operands.  The assumption
+  /// for operands that don't have a correspondent bit in the mask is that those
+  /// are still data dependent.
+  /// 
+  /// The only exception to the rule is for when Mask has all zeroes.
+  /// A zero mask means: dependencies are broken for all explicit register
+  /// operands.
+  virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask,
+                           unsigned CPUID) const {
+    return false;
+  }
+
+  /// Returns true if MI is a dependency breaking instruction for the
+  /// subtarget associated with CPUID .
   ///
   /// The value computed by a dependency breaking instruction is not dependent
   /// on the inputs. An example of dependency breaking instruction on X86 is
   /// `XOR %eax, %eax`.
-  /// TODO: In future, we could implement an alternative approach where this
-  /// method returns `true` if the input instruction is not dependent on
-  /// some/all of its input operands. An APInt mask could then be used to
-  /// identify independent operands.
-  virtual bool isDependencyBreaking(const MCSubtargetInfo &STI,
-                                    const MCInst &Inst) const;
+  ///
+  /// If MI is a dependency breaking instruction for subtarget CPUID, then Mask
+  /// can be inspected to identify independent operands.
+  ///
+  /// Essentially, each bit of the mask corresponds to an input operand.
+  /// Explicit operands are laid out first in the mask; implicit operands follow
+  /// explicit operands. Bits are set for operands that are independent.
+  ///
+  /// Note that the number of bits in Mask may not be equivalent to the sum of
+  /// explicit and implicit operands in MI. Operands that don't have a
+  /// corresponding bit in Mask are assumed "not independente".
+  ///
+  /// The only exception is for when Mask is all zeroes. That means: explicit
+  /// input operands of MI are independent.
+  virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask,
+                                    unsigned CPUID) const {
+    return isZeroIdiom(MI, Mask, CPUID);
+  }
+
+  /// Returns true if MI is a candidate for move elimination.
+  ///
+  /// Different subtargets may apply different constraints to optimizable
+  /// register moves. For example, on most X86 subtargets, a candidate for move
+  /// elimination cannot specify the same register for both source and
+  /// destination.
+  virtual bool isOptimizableRegisterMove(const MCInst &MI,
+                                         unsigned CPUID) const {
+    return false;
+  }
 
   /// Given a branch instruction try to get the address the branch
   /// targets. Return true on success, and the address in Target.
   virtual bool
   evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
                  uint64_t &Target) const;
+
+  /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
+  virtual std::vector<std::pair<uint64_t, uint64_t>>
+  findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+                 uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+    return {};
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
index 3e000a2210e9..61e7d09afbcb 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
@@ -120,6 +120,7 @@ enum Flag {
   HasOptionalDef,
   Pseudo,
   Return,
+  EHScopeReturn,
   Call,
   Barrier,
   Terminator,
@@ -150,7 +151,8 @@ enum Flag {
   InsertSubreg,
   Convergent,
   Add,
-  Trap
+  Trap,
+  VariadicOpsAreDefs,
 };
 }
 
@@ -382,6 +384,11 @@ public:
   /// additional values.
   bool isConvergent() const { return Flags & (1ULL << MCID::Convergent); }
 
+  /// Return true if variadic operands of this instruction are definitions.
+  bool variadicOpsAreDefs() const {
+    return Flags & (1ULL << MCID::VariadicOpsAreDefs);
+  }
+
   //===--------------------------------------------------------------------===//
   // Side Effect Analysis
   //===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
index 3a27ef8c8fee..f8142ccd8ac5 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/VersionTuple.h"
 
 namespace llvm {
 class MCContext;
@@ -42,12 +43,11 @@ protected:
   /// dwarf unwind.
   bool OmitDwarfIfHaveCompactUnwind;
 
-  /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
-  /// for EH.
-  unsigned PersonalityEncoding;
-  unsigned LSDAEncoding;
-  unsigned FDECFIEncoding;
-  unsigned TTypeEncoding;
+  /// FDE CFI encoding. Controls the encoding of the begin label in the
+  /// .eh_frame section. Unlike the LSDA encoding, personality encoding, and
+  /// type encodings, this is something that the assembler just "knows" about
+  /// its target
+  unsigned FDECFIEncoding = 0;
 
   /// Compact unwind encoding indicating that we should emit only an EH frame.
   unsigned CompactUnwindDwarfEHFrameOnly;
@@ -118,6 +118,8 @@ protected:
   MCSection *DwarfAddrSection;
   /// The DWARF v5 range list section.
   MCSection *DwarfRnglistsSection;
+  /// The DWARF v5 locations list section.
+  MCSection *DwarfLoclistsSection;
 
   /// The DWARF v5 range list section for fission.
   MCSection *DwarfRnglistsDWOSection;
@@ -226,10 +228,7 @@ public:
     return CommDirectiveSupportsAlignment;
   }
 
-  unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
-  unsigned getLSDAEncoding() const { return LSDAEncoding; }
   unsigned getFDEEncoding() const { return FDECFIEncoding; }
-  unsigned getTTypeEncoding() const { return TTypeEncoding; }
 
   unsigned getCompactUnwindDwarfEHFrameOnly() const {
     return CompactUnwindDwarfEHFrameOnly;
@@ -243,6 +242,9 @@ public:
   MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
   MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
+  MCSection *getDwarfInfoSection(uint64_t Hash) const {
+    return getDwarfComdatSection(".debug_info", Hash);
+  }
   MCSection *getDwarfLineSection() const { return DwarfLineSection; }
   MCSection *getDwarfLineStrSection() const { return DwarfLineStrSection; }
   MCSection *getDwarfFrameSection() const { return DwarfFrameSection; }
@@ -262,6 +264,7 @@ public:
   MCSection *getDwarfARangesSection() const { return DwarfARangesSection; }
   MCSection *getDwarfRangesSection() const { return DwarfRangesSection; }
   MCSection *getDwarfRnglistsSection() const { return DwarfRnglistsSection; }
+  MCSection *getDwarfLoclistsSection() const { return DwarfLoclistsSection; }
   MCSection *getDwarfMacinfoSection() const { return DwarfMacinfoSection; }
 
   MCSection *getDwarfDebugNamesSection() const {
@@ -278,7 +281,9 @@ public:
     return DwarfAccelTypesSection;
   }
   MCSection *getDwarfInfoDWOSection() const { return DwarfInfoDWOSection; }
-  MCSection *getDwarfTypesSection(uint64_t Hash) const;
+  MCSection *getDwarfTypesSection(uint64_t Hash) const {
+    return getDwarfComdatSection(".debug_types", Hash);
+  }
   MCSection *getDwarfTypesDWOSection() const { return DwarfTypesDWOSection; }
   MCSection *getDwarfAbbrevDWOSection() const { return DwarfAbbrevDWOSection; }
   MCSection *getDwarfStrDWOSection() const { return DwarfStrDWOSection; }
@@ -386,14 +391,22 @@ private:
   bool PositionIndependent;
   MCContext *Ctx;
   Triple TT;
+  VersionTuple SDKVersion;
 
   void initMachOMCObjectFileInfo(const Triple &T);
   void initELFMCObjectFileInfo(const Triple &T, bool Large);
   void initCOFFMCObjectFileInfo(const Triple &T);
   void initWasmMCObjectFileInfo(const Triple &T);
+  MCSection *getDwarfComdatSection(const char *Name, uint64_t Hash) const;
 
 public:
   const Triple &getTargetTriple() const { return TT; }
+
+  void setSDKVersion(const VersionTuple &TheSDKVersion) {
+    SDKVersion = TheSDKVersion;
+  }
+
+  const VersionTuple &getSDKVersion() const { return SDKVersion; }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
index 035206dce939..892909656c15 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -39,12 +39,21 @@ class MCObjectStreamer : public MCStreamer {
   bool EmitEHFrame;
   bool EmitDebugFrame;
   SmallVector<MCSymbol *, 2> PendingLabels;
+  struct PendingMCFixup {
+    const MCSymbol *Sym;
+    MCFixup Fixup;
+    MCDataFragment *DF;
+    PendingMCFixup(const MCSymbol *McSym, MCDataFragment *F, MCFixup McFixup)
+        : Sym(McSym), Fixup(McFixup), DF(F) {}
+  };
+  SmallVector<PendingMCFixup, 2> PendingFixups;
 
   virtual void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo&) = 0;
   void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
   void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
   MCSymbol *EmitCFILabel() override;
   void EmitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI);
+  void resolvePendingFixups();
 
 protected:
   MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
@@ -179,7 +188,9 @@ public:
   ///
   /// Emit the absolute difference between \c Hi and \c Lo, as long as we can
   /// compute it.  Currently, that requires that both symbols are in the same
-  /// data fragment.  Otherwise, do nothing and return \c false.
+  /// data fragment and that the target has not specified that diff expressions
+  /// require relocations to be emitted. Otherwise, do nothing and return
+  /// \c false.
   ///
   /// \pre Offset of \c Hi is greater than the offset \c Lo.
   void emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
diff --git a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 207183a69b0e..2e9b8dfa3b26 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
   StringRef CurBuf;
   bool IsAtStartOfLine = true;
   bool IsAtStartOfStatement = true;
-  bool IsParsingMSInlineAsm = false;
   bool IsPeeking = false;
 
 protected:
@@ -44,7 +43,6 @@ public:
   ~AsmLexer() override;
 
   void setBuffer(StringRef Buf, const char *ptr = nullptr);
-  void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
 
   StringRef LexUntilEndOfStatement() override;
 
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 10550b3370e8..ea13d1cdc09f 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -50,9 +50,9 @@ protected: // Can only create subclasses.
   bool SkipSpace = true;
   bool AllowAtInIdentifier;
   bool IsAtStartOfStatement = true;
+  bool LexMasmIntegers = false;
   AsmCommentConsumer *CommentConsumer = nullptr;
 
-  bool AltMacroMode;
   MCAsmLexer();
 
   virtual AsmToken LexToken() = 0;
@@ -67,17 +67,9 @@ public:
   MCAsmLexer &operator=(const MCAsmLexer &) = delete;
   virtual ~MCAsmLexer();
 
-  bool IsaAltMacroMode() {
-    return AltMacroMode;
-  }
-
-  void SetAltMacroMode(bool AltMacroSet) {
-    AltMacroMode = AltMacroSet;
-  }
-
   /// Consume the next token from the input stream and return it.
   ///
-  /// The lexer will continuosly return the end-of-file token once the end of
+  /// The lexer will continuously return the end-of-file token once the end of
   /// the main input file has been reached.
   const AsmToken &Lex() {
     assert(!CurTok.empty());
@@ -155,6 +147,10 @@ public:
   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
     this->CommentConsumer = CommentConsumer;
   }
+
+  /// Set whether to lex masm-style binary and hex literals. They look like
+  /// 0b1101 and 0ABCh respectively.
+  void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 0d56f36fbae8..b80289878e6e 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -122,17 +122,18 @@ public:
 private:
   MCTargetAsmParser *TargetParser = nullptr;
 
-  unsigned ShowParsedOperands : 1;
-
 protected: // Can only create subclasses.
   MCAsmParser();
 
+  SmallVector<MCPendingError, 0> PendingErrors;
+
   /// Flag tracking whether any errors have been encountered.
   bool HadError = false;
+
   /// Enable print [latency:throughput] in output file.
   bool EnablePrintSchedInfo = false;
 
-  SmallVector<MCPendingError, 1> PendingErrors;
+  bool ShowParsedOperands = false;
 
 public:
   MCAsmParser(const MCAsmParser &) = delete;
@@ -166,7 +167,7 @@ public:
   void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
 
   void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; }
-  bool shouldPrintSchedInfo() { return EnablePrintSchedInfo; }
+  bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; }
 
   /// Run the parser on the input source buffer.
   virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 135b5fab07ce..ccf13a6a4fb4 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -476,6 +476,9 @@ public:
     return nullptr;
   }
 
+  // For actions that have to be performed before a label is emitted
+  virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
+  
   virtual void onLabelParsed(MCSymbol *Symbol) {}
 
   /// Ensure that all previously parsed instructions have been emitted to the
@@ -487,6 +490,9 @@ public:
                                               MCContext &Ctx) {
     return nullptr;
   }
+
+  // For any checks or cleanups at the end of parsing.
+  virtual void onEndOfFile() {}
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
index 6edfc30b0aa6..8d8c677c77ea 100644
--- a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -41,7 +41,6 @@ public:
   const uint16_t RegsSize;
   const uint16_t RegSetSize;
   const uint16_t ID;
-  const uint16_t PhysRegSize;
   const int8_t CopyCost;
   const bool Allocatable;
 
@@ -80,11 +79,6 @@ public:
     return contains(Reg1) && contains(Reg2);
   }
 
-  /// Return the size of the physical register in bytes.
-  unsigned getPhysRegSize() const { return PhysRegSize; }
-  /// Temporary function to allow out-of-tree targets to switch.
-  unsigned getSize() const { return getPhysRegSize(); }
-
   /// getCopyCost - Return the cost of copying a value between two registers in
   /// this class. A negative number means the register class is very expensive
   /// to copy e.g. status flag register classes.
diff --git a/contrib/llvm/include/llvm/MC/MCSchedule.h b/contrib/llvm/include/llvm/MC/MCSchedule.h
index f2f1dfb36918..689ac73cbdd1 100644
--- a/contrib/llvm/include/llvm/MC/MCSchedule.h
+++ b/contrib/llvm/include/llvm/MC/MCSchedule.h
@@ -142,6 +142,7 @@ struct MCSchedClassDesc {
 struct MCRegisterCostEntry {
   unsigned RegisterClassID;
   unsigned Cost;
+  bool AllowMoveElimination;
 };
 
 /// A register file descriptor.
@@ -159,6 +160,12 @@ struct MCRegisterFileDesc {
   uint16_t NumRegisterCostEntries;
   // Index of the first cost entry in MCExtraProcessorInfo::RegisterCostTable.
   uint16_t RegisterCostEntryIdx;
+  // A value of zero means: there is no limit in the number of moves that can be
+  // eliminated every cycle.
+  uint16_t MaxMovesEliminatedPerCycle;
+  // Ture if this register file only knows how to optimize register moves from
+  // known zero registers.
+  bool AllowZeroMoveEliminationOnly;
 };
 
 /// Provide extra details about the machine processor.
@@ -176,18 +183,8 @@ struct MCExtraProcessorInfo {
   unsigned NumRegisterFiles;
   const MCRegisterCostEntry *RegisterCostTable;
   unsigned NumRegisterCostEntries;
-
-  struct PfmCountersInfo {
-    // An optional name of a performance counter that can be used to measure
-    // cycles.
-    const char *CycleCounter;
-
-    // For each MCProcResourceDesc defined by the processor, an optional list of
-    // names of performance counters that can be used to measure the resource
-    // utilization.
-    const char **IssueCounters;
-  };
-  PfmCountersInfo PfmCounters;
+  unsigned LoadQueueID;
+  unsigned StoreQueueID;
 };
 
 /// Machine model for scheduling, bundling, and heuristics.
diff --git a/contrib/llvm/include/llvm/MC/MCSection.h b/contrib/llvm/include/llvm/MC/MCSection.h
index ba5c60d3ba58..eb210b4e9dfa 100644
--- a/contrib/llvm/include/llvm/MC/MCSection.h
+++ b/contrib/llvm/include/llvm/MC/MCSection.h
@@ -78,6 +78,10 @@ private:
   /// Whether this section has had instructions emitted into it.
   bool HasInstructions : 1;
 
+  /// Whether this section has had data emitted into it.
+  /// Right now this is only used by the ARM backend.
+  bool HasData : 1;
+
   bool IsRegistered : 1;
 
   MCDummyFragment DummyFragment;
@@ -137,6 +141,9 @@ public:
   bool hasInstructions() const { return HasInstructions; }
   void setHasInstructions(bool Value) { HasInstructions = Value; }
 
+  bool hasData() const { return HasData; }
+  void setHasData(bool Value) { HasData = Value; }
+
   bool isRegistered() const { return IsRegistered; }
   void setIsRegistered(bool Value) { IsRegistered = Value; }
 
diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h
index e4d0dc03b87c..f613d3a1943f 100644
--- a/contrib/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCStreamer.h
@@ -28,6 +28,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TargetParser.h"
+#include "llvm/Support/VersionTuple.h"
 #include <cassert>
 #include <cstdint>
 #include <memory>
@@ -109,6 +110,11 @@ public:
 
   virtual void emitValue(const MCExpr *Value);
 
+  /// Emit the bytes in \p Data into the output.
+  ///
+  /// This is used to emit bytes in \p Data as sequence of .byte directives.
+  virtual void emitRawBytes(StringRef Data);
+
   virtual void finish();
 };
 
@@ -193,10 +199,6 @@ class MCStreamer {
 
   WinEH::FrameInfo *CurrentWinFrameInfo;
 
-  /// Retreive the current frame info if one is available and it is not yet
-  /// closed. Otherwise, issue an error and return null.
-  WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc);
-
   /// Tracks an index to represent the order a symbol was emitted in.
   /// Zero means we did not emit that symbol.
   DenseMap<const MCSymbol *, unsigned> SymbolOrdering;
@@ -219,10 +221,6 @@ protected:
   virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
   virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
 
-  /// When emitting an object file, create and emit a real label. When emitting
-  /// textual assembly, this should do nothing to avoid polluting our output.
-  virtual MCSymbol *EmitCFILabel();
-
   WinEH::FrameInfo *getCurrentWinFrameInfo() {
     return CurrentWinFrameInfo;
   }
@@ -231,6 +229,9 @@ protected:
 
   virtual void EmitRawTextImpl(StringRef String);
 
+  /// Returns true if the the .cv_loc directive is in the right section.
+  bool checkCVLocSection(unsigned FuncId, unsigned FileNo, SMLoc Loc);
+
 public:
   MCStreamer(const MCStreamer &) = delete;
   MCStreamer &operator=(const MCStreamer &) = delete;
@@ -258,6 +259,14 @@ public:
     return TargetStreamer.get();
   }
 
+  /// When emitting an object file, create and emit a real label. When emitting
+  /// textual assembly, this should do nothing to avoid polluting our output.
+  virtual MCSymbol *EmitCFILabel();
+
+  /// Retreive the current frame info if one is available and it is not yet
+  /// closed. Otherwise, issue an error and return null.
+  WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc);
+
   unsigned getNumFrameInfos() { return DwarfFrameInfos.size(); }
   ArrayRef<MCDwarfFrameInfo> getDwarfFrameInfos() const {
     return DwarfFrameInfos;
@@ -444,14 +453,17 @@ public:
 
   /// Specify the Mach-O minimum deployment target version.
   virtual void EmitVersionMin(MCVersionMinType Type, unsigned Major,
-                              unsigned Minor, unsigned Update) {}
+                              unsigned Minor, unsigned Update,
+                              VersionTuple SDKVersion) {}
 
   /// Emit/Specify Mach-O build version command.
   /// \p Platform should be one of MachO::PlatformType.
   virtual void EmitBuildVersion(unsigned Platform, unsigned Major,
-                                unsigned Minor, unsigned Update) {}
+                                unsigned Minor, unsigned Update,
+                                VersionTuple SDKVersion) {}
 
-  void EmitVersionForTarget(const Triple &Target);
+  void EmitVersionForTarget(const Triple &Target,
+                            const VersionTuple &SDKVersion);
 
   /// Note in the output that the specified \p Func is a Thumb mode
   /// function (ARM target only).
@@ -794,6 +806,8 @@ public:
                                        Optional<StringRef> Source,
                                        unsigned CUID = 0);
 
+  virtual void EmitCFIBKeyFrame();
+
   /// This implements the DWARF2 '.loc fileno lineno ...' assembler
   /// directive.
   virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -867,7 +881,7 @@ public:
 
   virtual MCSymbol *getDwarfLineTableSymbol(unsigned CUID);
   virtual void EmitCFISections(bool EH, bool Debug);
-  void EmitCFIStartProc(bool IsSimple);
+  void EmitCFIStartProc(bool IsSimple, SMLoc Loc = SMLoc());
   void EmitCFIEndProc();
   virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
   virtual void EmitCFIDefCfaOffset(int64_t Offset);
@@ -888,9 +902,15 @@ public:
   virtual void EmitCFIUndefined(int64_t Register);
   virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
   virtual void EmitCFIWindowSave();
+  virtual void EmitCFINegateRAState();
 
   virtual void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
   virtual void EmitWinCFIEndProc(SMLoc Loc = SMLoc());
+  /// This is used on platforms, such as Windows on ARM64, that require function
+  /// or funclet sizes to be emitted in .xdata before the End marker is emitted
+  /// for the frame.  We cannot use the End marker, as it is not set at the
+  /// point of emitting .xdata, in order to indicate that the frame is active.
+  virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc());
   virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc());
   virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc());
   virtual void EmitWinCFIPushReg(unsigned Register, SMLoc Loc = SMLoc());
diff --git a/contrib/llvm/include/llvm/MC/MCSymbolWasm.h b/contrib/llvm/include/llvm/MC/MCSymbolWasm.h
index e043453dc732..8e66dc881d0f 100644
--- a/contrib/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/contrib/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -20,12 +20,9 @@ class MCSymbolWasm : public MCSymbol {
   bool IsHidden = false;
   bool IsComdat = false;
   std::string ModuleName;
-  SmallVector<wasm::ValType, 1> Returns;
-  SmallVector<wasm::ValType, 4> Params;
-  wasm::WasmGlobalType GlobalType;
-  bool ParamsSet = false;
-  bool ReturnsSet = false;
-  bool GlobalTypeSet = false;
+  wasm::WasmSignature *Signature = nullptr;
+  Optional<wasm::WasmGlobalType> GlobalType;
+  Optional<wasm::WasmEventType> EventType;
 
   /// An expression describing how to calculate the size of a symbol. If a
   /// symbol has no size this field will be NULL.
@@ -35,8 +32,7 @@ public:
   // Use a module name of "env" for now, for compatibility with existing tools.
   // This is temporary, and may change, as the ABI is not yet stable.
   MCSymbolWasm(const StringMapEntry<bool> *Name, bool isTemporary)
-      : MCSymbol(SymbolKindWasm, Name, isTemporary),
-        ModuleName("env") {}
+      : MCSymbol(SymbolKindWasm, Name, isTemporary), ModuleName("env") {}
   static bool classof(const MCSymbol *S) { return S->isWasm(); }
 
   const MCExpr *getSize() const { return SymbolSize; }
@@ -46,6 +42,7 @@ public:
   bool isData() const { return Type == wasm::WASM_SYMBOL_TYPE_DATA; }
   bool isGlobal() const { return Type == wasm::WASM_SYMBOL_TYPE_GLOBAL; }
   bool isSection() const { return Type == wasm::WASM_SYMBOL_TYPE_SECTION; }
+  bool isEvent() const { return Type == wasm::WASM_SYMBOL_TYPE_EVENT; }
   wasm::WasmSymbolType getType() const { return Type; }
   void setType(wasm::WasmSymbolType type) { Type = type; }
 
@@ -61,37 +58,22 @@ public:
   const StringRef getModuleName() const { return ModuleName; }
   void setModuleName(StringRef Name) { ModuleName = Name; }
 
-  const SmallVector<wasm::ValType, 1> &getReturns() const {
-    assert(ReturnsSet);
-    return Returns;
-  }
-
-  void setReturns(SmallVectorImpl<wasm::ValType> &&Rets) {
-    ReturnsSet = true;
-    Returns = std::move(Rets);
-  }
-
-  const SmallVector<wasm::ValType, 4> &getParams() const {
-    assert(ParamsSet);
-    return Params;
-  }
-
-  void setParams(SmallVectorImpl<wasm::ValType> &&Pars) {
-    ParamsSet = true;
-    Params = std::move(Pars);
-  }
+  const wasm::WasmSignature *getSignature() const { return Signature; }
+  void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; }
 
   const wasm::WasmGlobalType &getGlobalType() const {
-    assert(GlobalTypeSet);
-    return GlobalType;
+    assert(GlobalType.hasValue());
+    return GlobalType.getValue();
   }
+  void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; }
 
-  void setGlobalType(wasm::WasmGlobalType GT) {
-    GlobalTypeSet = true;
-    GlobalType = GT;
+  const wasm::WasmEventType &getEventType() const {
+    assert(EventType.hasValue());
+    return EventType.getValue();
   }
+  void setEventType(wasm::WasmEventType ET) { EventType = ET; }
 };
 
-}  // end namespace llvm
+} // end namespace llvm
 
 #endif // LLVM_MC_MCSYMBOLWASM_H
diff --git a/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h b/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h
index e45030f302ff..6b788cfe96b9 100644
--- a/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h
+++ b/contrib/llvm/include/llvm/MC/MCWasmObjectWriter.h
@@ -51,6 +51,6 @@ std::unique_ptr<MCObjectWriter>
 createWasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW,
                        raw_pwrite_stream &OS);
 
-} // End llvm namespace
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/MC/MCWin64EH.h b/contrib/llvm/include/llvm/MC/MCWin64EH.h
index 83ea738de8c3..1a9f6f403d7c 100644
--- a/contrib/llvm/include/llvm/MC/MCWin64EH.h
+++ b/contrib/llvm/include/llvm/MC/MCWin64EH.h
@@ -56,6 +56,14 @@ public:
   void Emit(MCStreamer &Streamer) const override;
   void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI) const override;
 };
+
+class ARM64UnwindEmitter : public WinEH::UnwindEmitter {
+public:
+  void Emit(MCStreamer &Streamer) const override;
+  void EmitUnwindInfo(MCStreamer &Streamer,
+                      WinEH::FrameInfo *FI) const override;
+};
+
 }
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/MC/MCWinEH.h b/contrib/llvm/include/llvm/MC/MCWinEH.h
index 4ca52a6654eb..98ef0367a11d 100644
--- a/contrib/llvm/include/llvm/MC/MCWinEH.h
+++ b/contrib/llvm/include/llvm/MC/MCWinEH.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCWINEH_H
 #define LLVM_MC_MCWINEH_H
 
+#include "llvm/ADT/MapVector.h"
 #include <vector>
 
 namespace llvm {
@@ -20,9 +21,9 @@ class MCSymbol;
 namespace WinEH {
 struct Instruction {
   const MCSymbol *Label;
-  const unsigned Offset;
-  const unsigned Register;
-  const unsigned Operation;
+  unsigned Offset;
+  unsigned Register;
+  unsigned Operation;
 
   Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off)
     : Label(L), Offset(Off), Register(Reg), Operation(Op) {}
@@ -31,6 +32,7 @@ struct Instruction {
 struct FrameInfo {
   const MCSymbol *Begin = nullptr;
   const MCSymbol *End = nullptr;
+  const MCSymbol *FuncletOrFuncEnd = nullptr;
   const MCSymbol *ExceptionHandler = nullptr;
   const MCSymbol *Function = nullptr;
   const MCSymbol *PrologEnd = nullptr;
@@ -43,6 +45,7 @@ struct FrameInfo {
   int LastFrameInst = -1;
   const FrameInfo *ChainedParent = nullptr;
   std::vector<Instruction> Instructions;
+  MapVector<MCSymbol*, std::vector<Instruction>> EpilogMap;
 
   FrameInfo() = default;
   FrameInfo(const MCSymbol *Function, const MCSymbol *BeginFuncEHLabel)
diff --git a/contrib/llvm/tools/llvm-mca/Context.h b/contrib/llvm/include/llvm/MCA/Context.h
index cf483fa7b37d..6b2bee0fdc42 100644
--- a/contrib/llvm/tools/llvm-mca/Context.h
+++ b/contrib/llvm/include/llvm/MCA/Context.h
@@ -11,21 +11,22 @@
 /// This file defines a class for holding ownership of various simulated
 /// hardware units.  A Context also provides a utility routine for constructing
 /// a default out-of-order pipeline with fetch, dispatch, execute, and retire
-/// stages).
+/// stages.
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H
-#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H
-#include "HardwareUnit.h"
-#include "InstrBuilder.h"
-#include "Pipeline.h"
-#include "SourceMgr.h"
+#ifndef LLVM_MCA_CONTEXT_H
+#define LLVM_MCA_CONTEXT_H
+
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSchedule.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/SourceMgr.h"
 #include <memory>
 
+namespace llvm {
 namespace mca {
 
 /// This is a convenience struct to hold the parameters necessary for creating
@@ -43,13 +44,12 @@ struct PipelineOptions {
 };
 
 class Context {
-  llvm::SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
-  const llvm::MCRegisterInfo &MRI;
-  const llvm::MCSubtargetInfo &STI;
+  SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
+  const MCRegisterInfo &MRI;
+  const MCSubtargetInfo &STI;
 
 public:
-  Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S)
-      : MRI(R), STI(S) {}
+  Context(const MCRegisterInfo &R, const MCSubtargetInfo &S) : MRI(R), STI(S) {}
   Context(const Context &C) = delete;
   Context &operator=(const Context &C) = delete;
 
@@ -65,4 +65,5 @@ public:
 };
 
 } // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H
+} // namespace llvm
+#endif // LLVM_MCA_CONTEXT_H
diff --git a/contrib/llvm/tools/llvm-mca/HWEventListener.h b/contrib/llvm/include/llvm/MCA/HWEventListener.h
index aa3e6dcf19a0..3b32b2cd6577 100644
--- a/contrib/llvm/tools/llvm-mca/HWEventListener.h
+++ b/contrib/llvm/include/llvm/MCA/HWEventListener.h
@@ -12,13 +12,14 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
-#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
+#ifndef LLVM_MCA_HWEVENTLISTENER_H
+#define LLVM_MCA_HWEVENTLISTENER_H
 
-#include "Instruction.h"
 #include "llvm/ADT/ArrayRef.h"
-#include <utility>
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
 
+namespace llvm {
 namespace mca {
 
 // An HWInstructionEvent represents state changes of instructions that
@@ -62,30 +63,41 @@ class HWInstructionIssuedEvent : public HWInstructionEvent {
 public:
   using ResourceRef = std::pair<uint64_t, uint64_t>;
   HWInstructionIssuedEvent(const InstRef &IR,
-                           llvm::ArrayRef<std::pair<ResourceRef, double>> UR)
+                           ArrayRef<std::pair<ResourceRef, ResourceCycles>> UR)
       : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
 
-  llvm::ArrayRef<std::pair<ResourceRef, double>> UsedResources;
+  ArrayRef<std::pair<ResourceRef, ResourceCycles>> UsedResources;
 };
 
 class HWInstructionDispatchedEvent : public HWInstructionEvent {
 public:
-  HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+  HWInstructionDispatchedEvent(const InstRef &IR, ArrayRef<unsigned> Regs,
+                               unsigned UOps)
       : HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
-        UsedPhysRegs(Regs) {}
+        UsedPhysRegs(Regs), MicroOpcodes(UOps) {}
   // Number of physical register allocated for this instruction. There is one
   // entry per register file.
-  llvm::ArrayRef<unsigned> UsedPhysRegs;
+  ArrayRef<unsigned> UsedPhysRegs;
+  // Number of micro opcodes dispatched.
+  // This field is often set to the total number of micro-opcodes specified by
+  // the instruction descriptor of IR.
+  // The only exception is when IR declares a number of micro opcodes
+  // which exceeds the processor DispatchWidth, and - by construction - it
+  // requires multiple cycles to be fully dispatched. In that particular case,
+  // the dispatch logic would generate more than one dispatch event (one per
+  // cycle), and each event would declare how many micro opcodes are effectively
+  // been dispatched to the schedulers.
+  unsigned MicroOpcodes;
 };
 
 class HWInstructionRetiredEvent : public HWInstructionEvent {
 public:
-  HWInstructionRetiredEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+  HWInstructionRetiredEvent(const InstRef &IR, ArrayRef<unsigned> Regs)
       : HWInstructionEvent(HWInstructionEvent::Retired, IR),
         FreedPhysRegs(Regs) {}
   // Number of register writes that have been architecturally committed. There
   // is one entry per register file.
-  llvm::ArrayRef<unsigned> FreedPhysRegs;
+  ArrayRef<unsigned> FreedPhysRegs;
 };
 
 // A HWStallEvent represents a pipeline stall caused by the lack of hardware
@@ -127,9 +139,11 @@ public:
   virtual void onResourceAvailable(const ResourceRef &RRef) {}
 
   // Events generated by the Scheduler when buffered resources are
-  // consumed/freed.
-  virtual void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
-  virtual void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
+  // consumed/freed for an instruction.
+  virtual void onReservedBuffers(const InstRef &Inst,
+                                 ArrayRef<unsigned> Buffers) {}
+  virtual void onReleasedBuffers(const InstRef &Inst,
+                                 ArrayRef<unsigned> Buffers) {}
 
   virtual ~HWEventListener() {}
 
@@ -137,5 +151,6 @@ private:
   virtual void anchor();
 };
 } // namespace mca
+} // namespace llvm
 
-#endif
+#endif // LLVM_MCA_HWEVENTLISTENER_H
diff --git a/contrib/llvm/tools/llvm-mca/HardwareUnit.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/HardwareUnit.h
index e8c496ab967a..104a2009f219 100644
--- a/contrib/llvm/tools/llvm-mca/HardwareUnit.h
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/HardwareUnit.h
@@ -13,9 +13,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
-#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
+#ifndef LLVM_MCA_HARDWAREUNIT_H
+#define LLVM_MCA_HARDWAREUNIT_H
 
+namespace llvm {
 namespace mca {
 
 class HardwareUnit {
@@ -28,4 +29,5 @@ public:
 };
 
 } // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
+} // namespace llvm
+#endif // LLVM_MCA_HARDWAREUNIT_H
diff --git a/contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
new file mode 100644
index 000000000000..e217fc50f780
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -0,0 +1,207 @@
+//===------------------------- LSUnit.h --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load/Store unit class that models load/store queues and that implements
+/// a simple weak memory consistency model.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_LSUNIT_H
+#define LLVM_MCA_LSUNIT_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+
+namespace llvm {
+namespace mca {
+
+class InstRef;
+class Scheduler;
+
+/// A Load/Store Unit implementing a load and store queues.
+///
+/// This class implements a load queue and a store queue to emulate the
+/// out-of-order execution of memory operations.
+/// Each load (or store) consumes an entry in the load (or store) queue.
+///
+/// Rules are:
+/// 1) A younger load is allowed to pass an older load only if there are no
+///    stores nor barriers in between the two loads.
+/// 2) An younger store is not allowed to pass an older store.
+/// 3) A younger store is not allowed to pass an older load.
+/// 4) A younger load is allowed to pass an older store only if the load does
+///    not alias with the store.
+///
+/// This class optimistically assumes that loads don't alias store operations.
+/// Under this assumption, younger loads are always allowed to pass older
+/// stores (this would only affects rule 4).
+/// Essentially, this class doesn't perform any sort alias analysis to
+/// identify aliasing loads and stores.
+///
+/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
+/// set to `false` by the constructor of LSUnit.
+///
+/// Note that this class doesn't know about the existence of different memory
+/// types for memory operations (example: write-through, write-combining, etc.).
+/// Derived classes are responsible for implementing that extra knowledge, and
+/// provide different sets of rules for loads and stores by overriding method
+/// `isReady()`.
+/// To emulate a write-combining memory type, rule 2. must be relaxed in a
+/// derived class to enable the reordering of non-aliasing store operations.
+///
+/// No assumptions are made by this class on the size of the store buffer.  This
+/// class doesn't know how to identify cases where store-to-load forwarding may
+/// occur.
+///
+/// LSUnit doesn't attempt to predict whether a load or store hits or misses
+/// the L1 cache. To be more specific, LSUnit doesn't know anything about
+/// cache hierarchy and memory types.
+/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
+/// scheduling model provides an "optimistic" load-to-use latency (which usually
+/// matches the load-to-use latency for when there is a hit in the L1D).
+/// Derived classes may expand this knowledge.
+///
+/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
+/// memory-barrier like instructions.
+/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
+/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
+/// serializes loads without forcing a flush of the load queue.
+/// Similarly, instructions that both `mayStore` and have `unmodeled side
+/// effects` are treated like store barriers. A full memory
+/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
+/// effects. This is obviously inaccurate, but this is the best that we can do
+/// at the moment.
+///
+/// Each load/store barrier consumes one entry in the load/store queue. A
+/// load/store barrier enforces ordering of loads/stores:
+///  - A younger load cannot pass a load barrier.
+///  - A younger store cannot pass a store barrier.
+///
+/// A younger load has to wait for the memory load barrier to execute.
+/// A load/store barrier is "executed" when it becomes the oldest entry in
+/// the load/store queue(s). That also means, all the older loads/stores have
+/// already been executed.
+class LSUnit : public HardwareUnit {
+  // Load queue size.
+  // LQ_Size == 0 means that there are infinite slots in the load queue.
+  unsigned LQ_Size;
+
+  // Store queue size.
+  // SQ_Size == 0 means that there are infinite slots in the store queue.
+  unsigned SQ_Size;
+
+  // If true, loads will never alias with stores. This is the default.
+  bool NoAlias;
+
+  // When a `MayLoad` instruction is dispatched to the schedulers for execution,
+  // the LSUnit reserves an entry in the `LoadQueue` for it.
+  //
+  // LoadQueue keeps track of all the loads that are in-flight. A load
+  // instruction is eventually removed from the LoadQueue when it reaches
+  // completion stage. That means, a load leaves the queue whe it is 'executed',
+  // and its value can be forwarded on the data path to outside units.
+  //
+  // This class doesn't know about the latency of a load instruction. So, it
+  // conservatively/pessimistically assumes that the latency of a load opcode
+  // matches the instruction latency.
+  //
+  // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
+  // and load/store conflicts, the latency of a load is determined by the depth
+  // of the load pipeline. So, we could use field `LoadLatency` in the
+  // MCSchedModel to model that latency.
+  // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
+  // L1D, and it usually already accounts for any extra latency due to data
+  // forwarding.
+  // When doing throughput analysis, `LoadLatency` is likely to
+  // be a better predictor of load latency than instruction latency. This is
+  // particularly true when simulating code with temporal/spatial locality of
+  // memory accesses.
+  // Using `LoadLatency` (instead of the instruction latency) is also expected
+  // to improve the load queue allocation for long latency instructions with
+  // folded memory operands (See PR39829).
+  //
+  // FIXME: On some processors, load/store operations are split into multiple
+  // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
+  // not 256-bit data types. So, a 256-bit load is effectively split into two
+  // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
+  // simplicity, this class optimistically assumes that a load instruction only
+  // consumes one entry in the LoadQueue.  Similarly, store instructions only
+  // consume a single entry in the StoreQueue.
+  // In future, we should reassess the quality of this design, and consider
+  // alternative approaches that let instructions specify the number of
+  // load/store queue entries which they consume at dispatch stage (See
+  // PR39830).
+  SmallSet<unsigned, 16> LoadQueue;
+  SmallSet<unsigned, 16> StoreQueue;
+
+  void assignLQSlot(unsigned Index);
+  void assignSQSlot(unsigned Index);
+  bool isReadyNoAlias(unsigned Index) const;
+
+  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
+  // conservatively treated as a store barrier. It forces older store to be
+  // executed before newer stores are issued.
+  SmallSet<unsigned, 8> StoreBarriers;
+
+  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
+  // conservatively treated as a load barrier. It forces older loads to execute
+  // before newer loads are issued.
+  SmallSet<unsigned, 8> LoadBarriers;
+
+  bool isSQEmpty() const { return StoreQueue.empty(); }
+  bool isLQEmpty() const { return LoadQueue.empty(); }
+  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
+  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+
+public:
+  LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
+         bool AssumeNoAlias = false);
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+
+  enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+
+  // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
+  // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
+  Status isAvailable(const InstRef &IR) const;
+
+  // Allocates load/store queue resources for IR.
+  //
+  // This method assumes that a previous call to `isAvailable(IR)` returned
+  // LSU_AVAILABLE, and that IR is a memory operation.
+  void dispatch(const InstRef &IR);
+
+  // By default, rules are:
+  // 1. A store may not pass a previous store.
+  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+  // 3. A load may pass a previous load.
+  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+  // 5. A load has to wait until an older load barrier is fully executed.
+  // 6. A store has to wait until an older store barrier is fully executed.
+  virtual bool isReady(const InstRef &IR) const;
+
+  // Load and store instructions are tracked by their corresponding queues from
+  // dispatch until the "instruction executed" event.
+  // Only when a load instruction reaches the 'Executed' stage, its value
+  // becomes available to the users. At that point, the load no longer needs to
+  // be tracked by the load queue.
+  // FIXME: For simplicity, we optimistically assume a similar behavior for
+  // store instructions. In practice, store operations don't tend to leave the
+  // store queue until they reach the 'Retired' stage (See PR39830).
+  void onInstructionExecuted(const InstRef &IR);
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_LSUNIT_H
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFile.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
index 349e9789b6ee..c23ab0389234 100644
--- a/contrib/llvm/tools/llvm-mca/RegisterFile.h
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -14,14 +14,17 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
-#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
+#ifndef LLVM_MCA_REGISTER_FILE_H
+#define LLVM_MCA_REGISTER_FILE_H
 
-#include "HardwareUnit.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/Support/Error.h"
 
+namespace llvm {
 namespace mca {
 
 class ReadState;
@@ -31,12 +34,15 @@ class WriteRef;
 /// Manages hardware register files, and tracks register definitions for
 /// register renaming purposes.
 class RegisterFile : public HardwareUnit {
-  const llvm::MCRegisterInfo &MRI;
+  const MCRegisterInfo &MRI;
 
-  // Each register file is associated with an instance of
-  // RegisterMappingTracker.
-  // A RegisterMappingTracker keeps track of the number of physical registers
-  // which have been dynamically allocated by the simulator.
+  // class RegisterMappingTracker is a  physical register file (PRF) descriptor.
+  // There is one RegisterMappingTracker for every PRF definition in the
+  // scheduling model.
+  //
+  // An instance of RegisterMappingTracker tracks the number of physical
+  // registers available for renaming. It also tracks  the number of register
+  // moves eliminated per cycle.
   struct RegisterMappingTracker {
     // The total number of physical registers that are available in this
     // register file for register renaming purpouses.  A value of zero for this
@@ -46,8 +52,28 @@ class RegisterFile : public HardwareUnit {
     // Number of physical registers that are currently in use.
     unsigned NumUsedPhysRegs;
 
-    RegisterMappingTracker(unsigned NumPhysRegisters)
-        : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {}
+    // Maximum number of register moves that can be eliminated by this PRF every
+    // cycle. A value of zero means that there is no limit in the number of
+    // moves which can be eliminated every cycle.
+    const unsigned MaxMoveEliminatedPerCycle;
+
+    // Number of register moves eliminated during this cycle.
+    //
+    // This value is increased by one every time a register move is eliminated.
+    // Every new cycle, this value is reset to zero.
+    // A move can be eliminated only if MaxMoveEliminatedPerCycle is zero, or if
+    // NumMoveEliminated is less than MaxMoveEliminatedPerCycle.
+    unsigned NumMoveEliminated;
+
+    // If set, move elimination is restricted to zero-register moves only.
+    bool AllowZeroMoveEliminationOnly;
+
+    RegisterMappingTracker(unsigned NumPhysRegisters,
+                           unsigned MaxMoveEliminated = 0U,
+                           bool AllowZeroMoveElimOnly = false)
+        : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0),
+          MaxMoveEliminatedPerCycle(MaxMoveEliminated), NumMoveEliminated(0U),
+          AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly) {}
   };
 
   // A vector of register file descriptors.  This set always contains at least
@@ -59,7 +85,7 @@ class RegisterFile : public HardwareUnit {
   //
   // Users can limit the number of physical registers that are available in
   // regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
-  llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
+  SmallVector<RegisterMappingTracker, 4> RegisterFiles;
 
   // This type is used to propagate information about the owner of a register,
   // and the cost of allocating it in the PRF. Register cost is defined as the
@@ -70,20 +96,42 @@ class RegisterFile : public HardwareUnit {
   // registers. So, the cost of allocating a YMM register in BtVer2 is 2.
   using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
 
-  // Struct RegisterRenamingInfo maps registers to register files.
-  // There is a RegisterRenamingInfo object for every register defined by
-  // the target. RegisteRenamingInfo objects are stored into vector
-  // RegisterMappings, and register IDs can be used to reference them.
+  // Struct RegisterRenamingInfo is used to map logical registers to register
+  // files.
+  //
+  // There is a RegisterRenamingInfo object for every logical register defined
+  // by the target. RegisteRenamingInfo objects are stored into vector
+  // `RegisterMappings`, and MCPhysReg IDs can be used to reference
+  // elements in that vector.
+  //
+  // Each RegisterRenamingInfo is owned by a PRF, and field `IndexPlusCost`
+  // specifies both the owning PRF, as well as the number of physical registers
+  // consumed at register renaming stage.
+  //
+  // Field `AllowMoveElimination` is set for registers that are used as
+  // destination by optimizable register moves.
+  //
+  // Field `AliasRegID` is set by writes from register moves that have been
+  // eliminated at register renaming stage. A move eliminated at register
+  // renaming stage is effectively bypassed, and its write aliases the source
+  // register definition.
   struct RegisterRenamingInfo {
     IndexPlusCostPairTy IndexPlusCost;
-    llvm::MCPhysReg RenameAs;
+    MCPhysReg RenameAs;
+    MCPhysReg AliasRegID;
+    bool AllowMoveElimination;
+    RegisterRenamingInfo()
+        : IndexPlusCost(std::make_pair(0U, 1U)), RenameAs(0U), AliasRegID(0U),
+          AllowMoveElimination(false) {}
   };
 
   // RegisterMapping objects are mainly used to track physical register
-  // definitions. There is a RegisterMapping for every register defined by the
-  // Target. For each register, a RegisterMapping pair contains a descriptor of
-  // the last register write (in the form of a WriteRef object), as well as a
-  // RegisterRenamingInfo to quickly identify owning register files.
+  // definitions and resolve data dependencies.
+  //
+  // Every register declared by the Target is associated with an instance of
+  // RegisterMapping. RegisterMapping objects keep track of writes to a logical
+  // register.  That information is used by class RegisterFile to resolve data
+  // dependencies, and correctly set latencies for register uses.
   //
   // This implementation does not allow overlapping register files. The only
   // register file that is allowed to overlap with other register files is
@@ -91,9 +139,13 @@ class RegisterFile : public HardwareUnit {
   // at most one register file.
   using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
 
-  // This map contains one entry for each register defined by the target.
+  // There is one entry per each register defined by the target.
   std::vector<RegisterMapping> RegisterMappings;
 
+  // Used to track zero registers. There is one bit for each register defined by
+  // the target. Bits are set for registers that are known to be zero.
+  APInt ZeroRegisters;
+
   // This method creates a new register file descriptor.
   // The new register file owns all of the registers declared by register
   // classes in the 'RegisterClasses' set.
@@ -108,43 +160,56 @@ class RegisterFile : public HardwareUnit {
   // Here FPRegisterFile contains all the registers defined by register class
   // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
   // registers which can be used for register renaming purpose.
-  void
-  addRegisterFile(llvm::ArrayRef<llvm::MCRegisterCostEntry> RegisterClasses,
-                  unsigned NumPhysRegs);
+  void addRegisterFile(const MCRegisterFileDesc &RF,
+                       ArrayRef<MCRegisterCostEntry> Entries);
 
   // Consumes physical registers in each register file specified by the
   // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
   void allocatePhysRegs(const RegisterRenamingInfo &Entry,
-                        llvm::MutableArrayRef<unsigned> UsedPhysRegs);
+                        MutableArrayRef<unsigned> UsedPhysRegs);
 
   // Releases previously allocated physical registers from the register file(s).
   // This method is called from `invalidateRegisterMapping()`.
   void freePhysRegs(const RegisterRenamingInfo &Entry,
-                    llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+                    MutableArrayRef<unsigned> FreedPhysRegs);
+
+  // Collects writes that are in a RAW dependency with RS.
+  // This method is called from `addRegisterRead()`.
+  void collectWrites(const ReadState &RS,
+                     SmallVectorImpl<WriteRef> &Writes) const;
 
   // Create an instance of RegisterMappingTracker for every register file
   // specified by the processor model.
   // If no register file is specified, then this method creates a default
   // register file with an unbounded number of physical registers.
-  void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs);
+  void initialize(const MCSchedModel &SM, unsigned NumRegs);
 
 public:
-  RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri,
+  RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
                unsigned NumRegs = 0);
 
   // This method updates the register mappings inserting a new register
   // definition. This method is also responsible for updating the number of
   // allocated physical registers in each register file modified by the write.
-  // No physical regiser is allocated when flag ShouldAllocatePhysRegs is set.
-  void addRegisterWrite(WriteRef Write,
-                        llvm::MutableArrayRef<unsigned> UsedPhysRegs,
-                        bool ShouldAllocatePhysRegs = true);
+  // No physical regiser is allocated if this write is from a zero-idiom.
+  void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
+
+  // Collect writes that are in a data dependency with RS, and update RS
+  // internal state.
+  void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
 
   // Removes write \param WS from the register mappings.
   // Physical registers may be released to reflect this update.
+  // No registers are released if this write is from a zero-idiom.
   void removeRegisterWrite(const WriteState &WS,
-                           llvm::MutableArrayRef<unsigned> FreedPhysRegs,
-                           bool ShouldFreePhysRegs = true);
+                           MutableArrayRef<unsigned> FreedPhysRegs);
+
+  // Returns true if a move from RS to WS can be eliminated.
+  // On success, it updates WriteState by setting flag `WS.isEliminated`.
+  // If RS is a read from a zero register, and WS is eliminated, then
+  // `WS.WritesZero` is also set, so that method addRegisterWrite() would not
+  // reserve a physical register for it.
+  bool tryEliminateMove(WriteState &WS, ReadState &RS);
 
   // Checks if there are enough physical registers in the register files.
   // Returns a "response mask" where each bit represents the response from a
@@ -155,18 +220,20 @@ public:
   //
   // Current implementation can simulate up to 32 register files (including the
   // special register file at index #0).
-  unsigned isAvailable(llvm::ArrayRef<unsigned> Regs) const;
-  void collectWrites(llvm::SmallVectorImpl<WriteRef> &Writes,
-                     unsigned RegID) const;
-  void updateOnRead(ReadState &RS, unsigned RegID);
+  unsigned isAvailable(ArrayRef<unsigned> Regs) const;
 
+  // Returns the number of PRFs implemented by this processor.
   unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
 
+  // Notify each PRF that a new cycle just started.
+  void cycleStart();
+
 #ifndef NDEBUG
   void dump() const;
 #endif
 };
 
 } // namespace mca
+} // namespace llvm
 
-#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
+#endif // LLVM_MCA_REGISTER_FILE_H
diff --git a/contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
new file mode 100644
index 000000000000..549a46c247fe
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -0,0 +1,410 @@
+//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy.  These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_RESOURCE_MANAGER_H
+#define LLVM_MCA_RESOURCE_MANAGER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+/// Used to notify the internal state of a processor resource.
+///
+/// A processor resource is available if it is not reserved, and there are
+/// available slots in the buffer.  A processor resource is unavailable if it
+/// is either reserved, or the associated buffer is full. A processor resource
+/// with a buffer size of -1 is always available if it is not reserved.
+///
+/// Values of type ResourceStateEvent are returned by method
+/// ResourceState::isBufferAvailable(), which is used to query the internal
+/// state of a resource.
+///
+/// The naming convention for resource state events is:
+///  * Event names start with prefix RS_
+///  * Prefix RS_ is followed by a string describing the actual resource state.
+enum ResourceStateEvent {
+  RS_BUFFER_AVAILABLE,
+  RS_BUFFER_UNAVAILABLE,
+  RS_RESERVED
+};
+
+/// Resource allocation strategy used by hardware scheduler resources.
+class ResourceStrategy {
+  ResourceStrategy(const ResourceStrategy &) = delete;
+  ResourceStrategy &operator=(const ResourceStrategy &) = delete;
+
+public:
+  ResourceStrategy() {}
+  virtual ~ResourceStrategy();
+
+  /// Selects a processor resource unit from a ReadyMask.
+  virtual uint64_t select(uint64_t ReadyMask) = 0;
+
+  /// Called by the ResourceManager when a processor resource group, or a
+  /// processor resource with multiple units has become unavailable.
+  ///
+  /// The default strategy uses this information to bias its selection logic.
+  virtual void used(uint64_t ResourceMask) {}
+};
+
+/// Default resource allocation strategy used by processor resource groups and
+/// processor resources with multiple units.
+class DefaultResourceStrategy final : public ResourceStrategy {
+  /// A Mask of resource unit identifiers.
+  ///
+  /// There is one bit set for every available resource unit.
+  /// It defaults to the value of field ResourceSizeMask in ResourceState.
+  const uint64_t ResourceUnitMask;
+
+  /// A simple round-robin selector for processor resource units.
+  /// Each bit of this mask identifies a sub resource within a group.
+  ///
+  /// As an example, lets assume that this is a default policy for a
+  /// processor resource group composed by the following three units:
+  ///   ResourceA -- 0b001
+  ///   ResourceB -- 0b010
+  ///   ResourceC -- 0b100
+  ///
+  /// Field NextInSequenceMask is used to select the next unit from the set of
+  /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
+  /// this example, it defaults to mask '0b111').
+  ///
+  /// The round-robin selector would firstly select 'ResourceC', then
+  /// 'ResourceB', and eventually 'ResourceA'.  When a resource R is used, the
+  /// corresponding bit in NextInSequenceMask is cleared.  For example, if
+  /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
+  /// 0xb011.
+  ///
+  /// When NextInSequenceMask becomes zero, it is automatically reset to the
+  /// default value (i.e. ResourceUnitMask).
+  uint64_t NextInSequenceMask;
+
+  /// This field is used to track resource units that are used (i.e. selected)
+  /// by other groups other than the one associated with this strategy object.
+  ///
+  /// In LLVM processor resource groups are allowed to partially (or fully)
+  /// overlap. That means, a same unit may be visible to multiple groups.
+  /// This field keeps track of uses that have originated from outside of
+  /// this group. The idea is to bias the selection strategy, so that resources
+  /// that haven't been used by other groups get prioritized.
+  ///
+  /// The end goal is to (try to) keep the resource distribution as much uniform
+  /// as possible. By construction, this mask only tracks one-level of resource
+  /// usage. Therefore, this strategy is expected to be less accurate when same
+  /// units are used multiple times by other groups within a single round of
+  /// select.
+  ///
+  /// Note: an LRU selector would have a better accuracy at the cost of being
+  /// slightly more expensive (mostly in terms of runtime cost). Methods
+  /// 'select' and 'used', are always in the hot execution path of llvm-mca.
+  /// Therefore, a slow implementation of 'select' would have a negative impact
+  /// on the overall performance of the tool.
+  uint64_t RemovedFromNextInSequence;
+
+public:
+  DefaultResourceStrategy(uint64_t UnitMask)
+      : ResourceStrategy(), ResourceUnitMask(UnitMask),
+        NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
+  virtual ~DefaultResourceStrategy() = default;
+
+  uint64_t select(uint64_t ReadyMask) override;
+  void used(uint64_t Mask) override;
+};
+
+/// A processor resource descriptor.
+///
+/// There is an instance of this class for every processor resource defined by
+/// the machine scheduling model.
+/// Objects of class ResourceState dynamically track the usage of processor
+/// resource units.
+class ResourceState {
+  /// An index to the MCProcResourceDesc entry in the processor model.
+  const unsigned ProcResourceDescIndex;
+  /// A resource mask. This is generated by the tool with the help of
+  /// function `mca::computeProcResourceMasks' (see Support.h).
+  ///
+  /// Field ResourceMask only has one bit set if this resource state describes a
+  /// processor resource unit (i.e. this is not a group). That means, we can
+  /// quickly check if a resource is a group by simply counting the number of
+  /// bits that are set in the mask.
+  ///
+  /// The most significant bit of a mask (MSB) uniquely identifies a resource.
+  /// Remaining bits are used to describe the composition of a group (Group).
+  ///
+  /// Example (little endian):
+  ///            Resource |  Mask      |  MSB       |  Group
+  ///            ---------+------------+------------+------------
+  ///            A        |  0b000001  |  0b000001  |  0b000000
+  ///                     |            |            |
+  ///            B        |  0b000010  |  0b000010  |  0b000000
+  ///                     |            |            |
+  ///            C        |  0b010000  |  0b010000  |  0b000000
+  ///                     |            |            |
+  ///            D        |  0b110010  |  0b100000  |  0b010010
+  ///
+  /// In this example, resources A, B and C are processor resource units.
+  /// Only resource D is a group resource, and it contains resources B and C.
+  /// That is because MSB(B) and MSB(C) are both contained within Group(D).
+  const uint64_t ResourceMask;
+
+  /// A ProcResource can have multiple units.
+  ///
+  /// For processor resource groups this field is a mask of contained resource
+  /// units. It is obtained from ResourceMask by clearing the highest set bit.
+  /// The number of resource units in a group can be simply computed as the
+  /// population count of this field.
+  ///
+  /// For normal (i.e. non-group) resources, the number of bits set in this mask
+  /// is equivalent to the number of units declared by the processor model (see
+  /// field 'NumUnits' in 'ProcResourceUnits').
+  uint64_t ResourceSizeMask;
+
+  /// A mask of ready units.
+  uint64_t ReadyMask;
+
+  /// Buffered resources will have this field set to a positive number different
+  /// than zero. A buffered resource behaves like a reservation station
+  /// implementing its own buffer for out-of-order execution.
+  ///
+  /// A BufferSize of 1 is used by scheduler resources that force in-order
+  /// execution.
+  ///
+  /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
+  /// Since in-order issue/dispatch resources don't implement buffers, dispatch
+  /// events coincide with issue events.
+  /// Also, no other instruction ca be dispatched/issue while this resource is
+  /// in use. Only when all the "resource cycles" are consumed (after the issue
+  /// event), a new instruction ca be dispatched.
+  const int BufferSize;
+
+  /// Available slots in the buffer (zero, if this is not a buffered resource).
+  unsigned AvailableSlots;
+
+  /// This field is set if this resource is currently reserved.
+  ///
+  /// Resources can be reserved for a number of cycles.
+  /// Instructions can still be dispatched to reserved resources. However,
+  /// istructions dispatched to a reserved resource cannot be issued to the
+  /// underlying units (i.e. pipelines) until the resource is released.
+  bool Unavailable;
+
+  const bool IsAGroup;
+
+  /// Checks for the availability of unit 'SubResMask' in the group.
+  bool isSubResourceReady(uint64_t SubResMask) const {
+    return ReadyMask & SubResMask;
+  }
+
+public:
+  ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask);
+
+  unsigned getProcResourceID() const { return ProcResourceDescIndex; }
+  uint64_t getResourceMask() const { return ResourceMask; }
+  uint64_t getReadyMask() const { return ReadyMask; }
+  int getBufferSize() const { return BufferSize; }
+
+  bool isBuffered() const { return BufferSize > 0; }
+  bool isInOrder() const { return BufferSize == 1; }
+
+  /// Returns true if this is an in-order dispatch/issue resource.
+  bool isADispatchHazard() const { return BufferSize == 0; }
+  bool isReserved() const { return Unavailable; }
+
+  void setReserved() { Unavailable = true; }
+  void clearReserved() { Unavailable = false; }
+
+  /// Returs true if this resource is not reserved, and if there are at least
+  /// `NumUnits` available units.
+  bool isReady(unsigned NumUnits = 1) const;
+
+  bool isAResourceGroup() const { return IsAGroup; }
+
+  bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
+
+  void markSubResourceAsUsed(uint64_t ID) {
+    assert(isSubResourceReady(ID));
+    ReadyMask ^= ID;
+  }
+
+  void releaseSubResource(uint64_t ID) {
+    assert(!isSubResourceReady(ID));
+    ReadyMask ^= ID;
+  }
+
+  unsigned getNumUnits() const {
+    return isAResourceGroup() ? 1U : countPopulation(ResourceSizeMask);
+  }
+
+  /// Checks if there is an available slot in the resource buffer.
+  ///
+  /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
+  /// there is a slot available.
+  ///
+  /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
+  /// is reserved.
+  ///
+  /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
+  ResourceStateEvent isBufferAvailable() const;
+
+  /// Reserve a slot in the buffer.
+  void reserveBuffer() {
+    if (AvailableSlots)
+      AvailableSlots--;
+  }
+
+  /// Release a slot in the buffer.
+  void releaseBuffer() {
+    if (BufferSize > 0)
+      AvailableSlots++;
+    assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+  }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// A resource unit identifier.
+///
+/// This is used to identify a specific processor resource unit using a pair
+/// of indices where the 'first' index is a processor resource mask, and the
+/// 'second' index is an index for a "sub-resource" (i.e. unit).
+typedef std::pair<uint64_t, uint64_t> ResourceRef;
+
+// First: a MCProcResourceDesc index identifying a buffered resource.
+// Second: max number of buffer entries used in this resource.
+typedef std::pair<unsigned, unsigned> BufferUsageEntry;
+
+/// A resource manager for processor resource units and groups.
+///
+/// This class owns all the ResourceState objects, and it is responsible for
+/// acting on requests from a Scheduler by updating the internal state of
+/// ResourceState objects.
+/// This class doesn't know about instruction itineraries and functional units.
+/// In future, it can be extended to support itineraries too through the same
+/// public interface.
+class ResourceManager {
+  // Set of resources available on the subtarget.
+  //
+  // There is an instance of ResourceState for every resource declared by the
+  // target scheduling model.
+  //
+  // Elements of this vector are ordered by resource kind. In particular,
+  // resource units take precedence over resource groups.
+  //
+  // The index of a processor resource in this vector depends on the value of
+  // its mask (see the description of field ResourceState::ResourceMask).  In
+  // particular, it is computed as the position of the most significant bit set
+  // (MSB) in the mask plus one (since we want to ignore the invalid resource
+  // descriptor at index zero).
+  //
+  // Example (little endian):
+  //
+  //             Resource | Mask    |  MSB    | Index
+  //             ---------+---------+---------+-------
+  //                 A    | 0b00001 | 0b00001 |   1
+  //                      |         |         |
+  //                 B    | 0b00100 | 0b00100 |   3
+  //                      |         |         |
+  //                 C    | 0b10010 | 0b10000 |   5
+  //
+  //
+  // The same index is also used to address elements within vector `Strategies`
+  // and vector `Resource2Groups`.
+  std::vector<std::unique_ptr<ResourceState>> Resources;
+  std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
+
+  // Used to quickly identify groups that own a particular resource unit.
+  std::vector<uint64_t> Resource2Groups;
+
+  // A table to map processor resource IDs to processor resource masks.
+  SmallVector<uint64_t, 8> ProcResID2Mask;
+
+  // Keeps track of which resources are busy, and how many cycles are left
+  // before those become usable again.
+  SmallDenseMap<ResourceRef, unsigned> BusyResources;
+
+  // Returns the actual resource unit that will be used.
+  ResourceRef selectPipe(uint64_t ResourceID);
+
+  void use(const ResourceRef &RR);
+  void release(const ResourceRef &RR);
+
+  unsigned getNumUnits(uint64_t ResourceID) const;
+
+  // Overrides the selection strategy for the processor resource with the given
+  // mask.
+  void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+                             uint64_t ResourceMask);
+
+public:
+  ResourceManager(const MCSchedModel &SM);
+  virtual ~ResourceManager() = default;
+
+  // Overrides the selection strategy for the resource at index ResourceID in
+  // the MCProcResourceDesc table.
+  void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
+                         unsigned ResourceID) {
+    assert(ResourceID < ProcResID2Mask.size() &&
+           "Invalid resource index in input!");
+    return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
+  }
+
+  // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
+  // there are enough available slots in the buffers.
+  ResourceStateEvent canBeDispatched(ArrayRef<uint64_t> Buffers) const;
+
+  // Return the processor resource identifier associated to this Mask.
+  unsigned resolveResourceMask(uint64_t Mask) const;
+
+  // Consume a slot in every buffered resource from array 'Buffers'. Resource
+  // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+  void reserveBuffers(ArrayRef<uint64_t> Buffers);
+
+  // Release buffer entries previously allocated by method reserveBuffers.
+  void releaseBuffers(ArrayRef<uint64_t> Buffers);
+
+  // Reserve a processor resource. A reserved resource is not available for
+  // instruction issue until it is released.
+  void reserveResource(uint64_t ResourceID);
+
+  // Release a previously reserved processor resource.
+  void releaseResource(uint64_t ResourceID);
+
+  bool canBeIssued(const InstrDesc &Desc) const;
+
+  void issueInstruction(
+      const InstrDesc &Desc,
+      SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+  void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
+
+#ifndef NDEBUG
+  void dump() const {
+    for (const std::unique_ptr<ResourceState> &Resource : Resources)
+      Resource->dump();
+  }
+#endif
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_RESOURCE_MANAGER_H
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnit.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
index 8acc8bcc98fe..71360e984ade 100644
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnit.h
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -12,14 +12,15 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
-#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#ifndef LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#define LLVM_MCA_RETIRE_CONTROL_UNIT_H
 
-#include "HardwareUnit.h"
-#include "Instruction.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
 #include <vector>
 
+namespace llvm {
 namespace mca {
 
 /// This class tracks which instructions are in-flight (i.e., dispatched but not
@@ -62,15 +63,19 @@ private:
   std::vector<RUToken> Queue;
 
 public:
-  RetireControlUnit(const llvm::MCSchedModel &SM);
+  RetireControlUnit(const MCSchedModel &SM);
 
-  bool isFull() const { return !AvailableSlots; }
   bool isEmpty() const { return AvailableSlots == Queue.size(); }
   bool isAvailable(unsigned Quantity = 1) const {
     // Some instructions may declare a number of uOps which exceeds the size
     // of the reorder buffer. To avoid problems, cap the amount of slots to
     // the size of the reorder buffer.
     Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+
+    // Further normalize the number of micro opcodes for instructions that
+    // declare zero opcodes. This should match the behavior of method
+    // reserveSlot().
+    Quantity = std::max(Quantity, 1U);
     return AvailableSlots >= Quantity;
   }
 
@@ -94,5 +99,6 @@ public:
 };
 
 } // namespace mca
+} // namespace llvm
 
-#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#endif // LLVM_MCA_RETIRE_CONTROL_UNIT_H
diff --git a/contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
new file mode 100644
index 000000000000..351ea4827df9
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -0,0 +1,214 @@
+//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A scheduler for Processor Resource Units and Processor Resource Groups.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SCHEDULER_H
+#define LLVM_MCA_SCHEDULER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+
+namespace llvm {
+namespace mca {
+
+class SchedulerStrategy {
+public:
+  SchedulerStrategy() = default;
+  virtual ~SchedulerStrategy();
+
+  /// Returns true if Lhs should take priority over Rhs.
+  ///
+  /// This method is used by class Scheduler to select the "best" ready
+  /// instruction to issue to the underlying pipelines.
+  virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
+};
+
+/// Default instruction selection strategy used by class Scheduler.
+class DefaultSchedulerStrategy : public SchedulerStrategy {
+  /// This method ranks instructions based on their age, and the number of known
+  /// users. The lower the rank value, the better.
+  int computeRank(const InstRef &Lhs) const {
+    return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
+  }
+
+public:
+  DefaultSchedulerStrategy() = default;
+  virtual ~DefaultSchedulerStrategy();
+
+  bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
+    int LhsRank = computeRank(Lhs);
+    int RhsRank = computeRank(Rhs);
+
+    /// Prioritize older instructions over younger instructions to minimize the
+    /// pressure on the reorder buffer.
+    if (LhsRank == RhsRank)
+      return Lhs.getSourceIndex() < Rhs.getSourceIndex();
+    return LhsRank < RhsRank;
+  }
+};
+
+/// Class Scheduler is responsible for issuing instructions to pipeline
+/// resources.
+///
+/// Internally, it delegates to a ResourceManager the management of processor
+/// resources. This class is also responsible for tracking the progress of
+/// instructions from the dispatch stage, until the write-back stage.
+///
+/// An instruction dispatched to the Scheduler is initially placed into either
+/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
+/// operands.
+///
+/// An instruction is moved from the WaitSet to the ReadySet when register
+/// operands become available, and all memory dependencies are met.
+/// Instructions that are moved from the WaitSet to the ReadySet transition
+/// in state from 'IS_AVAILABLE' to 'IS_READY'.
+///
+/// On every cycle, the Scheduler checks if it can promote instructions from the
+/// WaitSet to the ReadySet.
+///
+/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
+/// to a (one or more) pipeline(s). This event also causes an instruction state
+/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
+/// leaves the IssuedSet when it reaches the write-back stage.
+class Scheduler : public HardwareUnit {
+  LSUnit &LSU;
+
+  // Instruction selection strategy for this Scheduler.
+  std::unique_ptr<SchedulerStrategy> Strategy;
+
+  // Hardware resources that are managed by this scheduler.
+  std::unique_ptr<ResourceManager> Resources;
+
+  std::vector<InstRef> WaitSet;
+  std::vector<InstRef> ReadySet;
+  std::vector<InstRef> IssuedSet;
+
+  /// Verify the given selection strategy and set the Strategy member
+  /// accordingly.  If no strategy is provided, the DefaultSchedulerStrategy is
+  /// used.
+  void initializeStrategy(std::unique_ptr<SchedulerStrategy> S);
+
+  /// Issue an instruction without updating the ready queue.
+  void issueInstructionImpl(
+      InstRef &IR,
+      SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
+
+  // Identify instructions that have finished executing, and remove them from
+  // the IssuedSet. References to executed instructions are added to input
+  // vector 'Executed'.
+  void updateIssuedSet(SmallVectorImpl<InstRef> &Executed);
+
+  // Try to promote instructions from WaitSet to ReadySet.
+  // Add promoted instructions to the 'Ready' vector in input.
+  void promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+
+public:
+  Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
+      : Scheduler(Model, Lsu, nullptr) {}
+
+  Scheduler(const MCSchedModel &Model, LSUnit &Lsu,
+            std::unique_ptr<SchedulerStrategy> SelectStrategy)
+      : Scheduler(make_unique<ResourceManager>(Model), Lsu,
+                  std::move(SelectStrategy)) {}
+
+  Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
+            std::unique_ptr<SchedulerStrategy> SelectStrategy)
+      : LSU(Lsu), Resources(std::move(RM)) {
+    initializeStrategy(std::move(SelectStrategy));
+  }
+
+  // Stalls generated by the scheduler.
+  enum Status {
+    SC_AVAILABLE,
+    SC_LOAD_QUEUE_FULL,
+    SC_STORE_QUEUE_FULL,
+    SC_BUFFERS_FULL,
+    SC_DISPATCH_GROUP_STALL,
+  };
+
+  /// Check if the instruction in 'IR' can be dispatched and returns an answer
+  /// in the form of a Status value.
+  ///
+  /// The DispatchStage is responsible for querying the Scheduler before
+  /// dispatching new instructions. This routine is used for performing such
+  /// a query.  If the instruction 'IR' can be dispatched, then true is
+  /// returned, otherwise false is returned with Event set to the stall type.
+  /// Internally, it also checks if the load/store unit is available.
+  Status isAvailable(const InstRef &IR) const;
+
+  /// Reserves buffer and LSUnit queue resources that are necessary to issue
+  /// this instruction.
+  ///
+  /// Returns true if instruction IR is ready to be issued to the underlying
+  /// pipelines. Note that this operation cannot fail; it assumes that a
+  /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
+  void dispatch(const InstRef &IR);
+
+  /// Returns true if IR is ready to be executed by the underlying pipelines.
+  /// This method assumes that IR has been previously dispatched.
+  bool isReady(const InstRef &IR) const;
+
+  /// Issue an instruction and populates a vector of used pipeline resources,
+  /// and a vector of instructions that transitioned to the ready state as a
+  /// result of this event.
+  void issueInstruction(
+      InstRef &IR,
+      SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
+      SmallVectorImpl<InstRef> &Ready);
+
+  /// Returns true if IR has to be issued immediately, or if IR is a zero
+  /// latency instruction.
+  bool mustIssueImmediately(const InstRef &IR) const;
+
+  /// This routine notifies the Scheduler that a new cycle just started.
+  ///
+  /// It notifies the underlying ResourceManager that a new cycle just started.
+  /// Vector `Freed` is populated with resourceRef related to resources that
+  /// have changed in state, and that are now available to new instructions.
+  /// Instructions executed are added to vector Executed, while vector Ready is
+  /// populated with instructions that have become ready in this new cycle.
+  void cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+                  SmallVectorImpl<InstRef> &Ready,
+                  SmallVectorImpl<InstRef> &Executed);
+
+  /// Convert a resource mask into a valid llvm processor resource identifier.
+  unsigned getResourceID(uint64_t Mask) const {
+    return Resources->resolveResourceMask(Mask);
+  }
+
+  /// Select the next instruction to issue from the ReadySet. Returns an invalid
+  /// instruction reference if there are no ready instructions, or if processor
+  /// resources are not available.
+  InstRef select();
+
+#ifndef NDEBUG
+  // Update the ready queues.
+  void dump() const;
+
+  // This routine performs a sanity check.  This routine should only be called
+  // when we know that 'IR' is not in the scheduler's instruction queues.
+  void sanityCheck(const InstRef &IR) const {
+    assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!");
+    assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!");
+    assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!");
+  }
+#endif // !NDEBUG
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SCHEDULER_H
diff --git a/contrib/llvm/include/llvm/MCA/InstrBuilder.h b/contrib/llvm/include/llvm/MCA/InstrBuilder.h
new file mode 100644
index 000000000000..5f998db5e4ce
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/InstrBuilder.h
@@ -0,0 +1,77 @@
+//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A builder class for instructions that are statically analyzed by llvm-mca.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INSTRBUILDER_H
+#define LLVM_MCA_INSTRBUILDER_H
+
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+/// A builder class that knows how to construct Instruction objects.
+///
+/// Every llvm-mca Instruction is described by an object of class InstrDesc.
+/// An InstrDesc describes which registers are read/written by the instruction,
+/// as well as the instruction latency and hardware resources consumed.
+///
+/// This class is used by the tool to construct Instructions and instruction
+/// descriptors (i.e. InstrDesc objects).
+/// Information from the machine scheduling model is used to identify processor
+/// resources that are consumed by an instruction.
+class InstrBuilder {
+  const MCSubtargetInfo &STI;
+  const MCInstrInfo &MCII;
+  const MCRegisterInfo &MRI;
+  const MCInstrAnalysis *MCIA;
+  SmallVector<uint64_t, 8> ProcResourceMasks;
+
+  DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
+  DenseMap<const MCInst *, std::unique_ptr<const InstrDesc>> VariantDescriptors;
+
+  bool FirstCallInst;
+  bool FirstReturnInst;
+
+  Expected<const InstrDesc &> createInstrDescImpl(const MCInst &MCI);
+  Expected<const InstrDesc &> getOrCreateInstrDesc(const MCInst &MCI);
+
+  InstrBuilder(const InstrBuilder &) = delete;
+  InstrBuilder &operator=(const InstrBuilder &) = delete;
+
+  void populateWrites(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+  void populateReads(InstrDesc &ID, const MCInst &MCI, unsigned SchedClassID);
+  Error verifyInstrDesc(const InstrDesc &ID, const MCInst &MCI) const;
+
+public:
+  InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
+               const MCRegisterInfo &RI, const MCInstrAnalysis *IA);
+
+  void clear() {
+    VariantDescriptors.shrink_and_clear();
+    FirstCallInst = true;
+    FirstReturnInst = true;
+  }
+
+  Expected<std::unique_ptr<Instruction>> createInstruction(const MCInst &MCI);
+};
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_INSTRBUILDER_H
diff --git a/contrib/llvm/tools/llvm-mca/Instruction.h b/contrib/llvm/include/llvm/MCA/Instruction.h
index 3b2f90528f2e..b91610c64d85 100644
--- a/contrib/llvm/tools/llvm-mca/Instruction.h
+++ b/contrib/llvm/include/llvm/MCA/Instruction.h
@@ -13,9 +13,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
+#ifndef LLVM_MCA_INSTRUCTION_H
+#define LLVM_MCA_INSTRUCTION_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MathExtras.h"
 
 #ifndef NDEBUG
@@ -23,8 +26,8 @@
 #endif
 
 #include <memory>
-#include <set>
-#include <vector>
+
+namespace llvm {
 
 namespace mca {
 
@@ -85,7 +88,7 @@ class ReadState;
 /// register write. It also tracks how many cycles are left before the write
 /// back stage.
 class WriteState {
-  const WriteDescriptor &WD;
+  const WriteDescriptor *WD;
   // On instruction issue, this field is set equal to the write latency.
   // Before instruction issue, this field defaults to -512, a special
   // value that represents an "unknown" number of cycles.
@@ -97,43 +100,94 @@ class WriteState {
   // field RegisterID from WD.
   unsigned RegisterID;
 
+  // Physical register file that serves register RegisterID.
+  unsigned PRFID;
+
   // True if this write implicitly clears the upper portion of RegisterID's
   // super-registers.
   bool ClearsSuperRegs;
 
+  // True if this write is from a dependency breaking zero-idiom instruction.
+  bool WritesZero;
+
+  // True if this write has been eliminated at register renaming stage.
+  // Example: a register move doesn't consume scheduler/pipleline resources if
+  // it is eliminated at register renaming stage. It still consumes
+  // decode bandwidth, and ROB entries.
+  bool IsEliminated;
+
   // This field is set if this is a partial register write, and it has a false
   // dependency on any previous write of the same register (or a portion of it).
   // DependentWrite must be able to complete before this write completes, so
   // that we don't break the WAW, and the two writes can be merged together.
   const WriteState *DependentWrite;
 
+  // A partial write that is in a false dependency with this write.
+  WriteState *PartialWrite;
+
+  unsigned DependentWriteCyclesLeft;
+
   // A list of dependent reads. Users is a set of dependent
   // reads. A dependent read is added to the set only if CyclesLeft
   // is "unknown". As soon as CyclesLeft is 'known', each user in the set
   // gets notified with the actual CyclesLeft.
 
   // The 'second' element of a pair is a "ReadAdvance" number of cycles.
-  std::set<std::pair<ReadState *, int>> Users;
+  SmallVector<std::pair<ReadState *, int>, 4> Users;
 
 public:
   WriteState(const WriteDescriptor &Desc, unsigned RegID,
-             bool clearsSuperRegs = false)
-      : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
-        ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr) {}
-  WriteState(const WriteState &Other) = delete;
-  WriteState &operator=(const WriteState &Other) = delete;
+             bool clearsSuperRegs = false, bool writesZero = false)
+      : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
+        ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+        IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
+        DependentWriteCyclesLeft(0) {}
+
+  WriteState(const WriteState &Other) = default;
+  WriteState &operator=(const WriteState &Other) = default;
 
   int getCyclesLeft() const { return CyclesLeft; }
-  unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; }
+  unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
   unsigned getRegisterID() const { return RegisterID; }
-  unsigned getLatency() const { return WD.Latency; }
+  unsigned getRegisterFileID() const { return PRFID; }
+  unsigned getLatency() const { return WD->Latency; }
 
   void addUser(ReadState *Use, int ReadAdvance);
-  unsigned getNumUsers() const { return Users.size(); }
+  void addUser(WriteState *Use);
+
+  unsigned getDependentWriteCyclesLeft() const {
+    return DependentWriteCyclesLeft;
+  }
+
+  unsigned getNumUsers() const {
+    unsigned NumUsers = Users.size();
+    if (PartialWrite)
+      ++NumUsers;
+    return NumUsers;
+  }
+
   bool clearsSuperRegisters() const { return ClearsSuperRegs; }
+  bool isWriteZero() const { return WritesZero; }
+  bool isEliminated() const { return IsEliminated; }
+  bool isExecuted() const {
+    return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
+  }
 
   const WriteState *getDependentWrite() const { return DependentWrite; }
-  void setDependentWrite(const WriteState *Write) { DependentWrite = Write; }
+  void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
+  void writeStartEvent(unsigned Cycles) {
+    DependentWriteCyclesLeft = Cycles;
+    DependentWrite = nullptr;
+  }
+
+  void setWriteZero() { WritesZero = true; }
+  void setEliminated() {
+    assert(Users.empty() && "Write is in an inconsistent state.");
+    CyclesLeft = 0;
+    IsEliminated = true;
+  }
+
+  void setPRF(unsigned PRF) { PRFID = PRF; }
 
   // On every cycle, update CyclesLeft and notify dependent users.
   void cycleEvent();
@@ -149,9 +203,11 @@ public:
 /// A read may be dependent on more than one write. This occurs when some
 /// writes only partially update the register associated to this read.
 class ReadState {
-  const ReadDescriptor &RD;
+  const ReadDescriptor *RD;
   // Physical register identified associated to this read.
   unsigned RegisterID;
+  // Physical register file that serves register RegisterID.
+  unsigned PRFID;
   // Number of writes that contribute to the definition of RegisterID.
   // In the absence of partial register updates, the number of DependentWrites
   // cannot be more than one.
@@ -168,20 +224,27 @@ class ReadState {
   // This field is set to true only if there are no dependent writes, and
   // there are no `CyclesLeft' to wait.
   bool IsReady;
+  // True if this is a read from a known zero register.
+  bool IsZero;
+  // True if this register read is from a dependency-breaking instruction.
+  bool IndependentFromDef;
 
 public:
   ReadState(const ReadDescriptor &Desc, unsigned RegID)
-      : RD(Desc), RegisterID(RegID), DependentWrites(0),
-        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {}
-  ReadState(const ReadState &Other) = delete;
-  ReadState &operator=(const ReadState &Other) = delete;
+      : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
+        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
+        IsZero(false), IndependentFromDef(false) {}
 
-  const ReadDescriptor &getDescriptor() const { return RD; }
-  unsigned getSchedClass() const { return RD.SchedClassID; }
+  const ReadDescriptor &getDescriptor() const { return *RD; }
+  unsigned getSchedClass() const { return RD->SchedClassID; }
   unsigned getRegisterID() const { return RegisterID; }
+  unsigned getRegisterFileID() const { return PRFID; }
 
   bool isReady() const { return IsReady; }
-  bool isImplicitRead() const { return RD.isImplicitRead(); }
+  bool isImplicitRead() const { return RD->isImplicitRead(); }
+
+  bool isIndependentFromDef() const { return IndependentFromDef; }
+  void setIndependentFromDef() { IndependentFromDef = true; }
 
   void cycleEvent();
   void writeStartEvent(unsigned Cycles);
@@ -189,6 +252,10 @@ public:
     DependentWrites = Writes;
     IsReady = !Writes;
   }
+
+  bool isReadZero() const { return IsZero; }
+  void setReadZero() { IsZero = true; }
+  void setPRF(unsigned ID) { PRFID = ID; }
 };
 
 /// A sequence of cycles.
@@ -224,7 +291,7 @@ public:
 
   bool isValid() const { return Begin <= End; }
   unsigned size() const { return End - Begin; };
-  void Subtract(unsigned Cycles) {
+  void subtract(unsigned Cycles) {
     assert(End >= Cycles);
     End -= Cycles;
   }
@@ -253,15 +320,16 @@ struct ResourceUsage {
 
 /// An instruction descriptor
 struct InstrDesc {
-  std::vector<WriteDescriptor> Writes; // Implicit writes are at the end.
-  std::vector<ReadDescriptor> Reads;   // Implicit reads are at the end.
+  SmallVector<WriteDescriptor, 4> Writes; // Implicit writes are at the end.
+  SmallVector<ReadDescriptor, 4> Reads;   // Implicit reads are at the end.
 
   // For every resource used by an instruction of this kind, this vector
   // reports the number of "consumed cycles".
-  std::vector<std::pair<uint64_t, ResourceUsage>> Resources;
+  SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Resources;
 
   // A list of buffered resources consumed by this instruction.
-  std::vector<uint64_t> Buffers;
+  SmallVector<uint64_t, 4> Buffers;
+
   unsigned MaxLatency;
   // Number of MicroOps for this instruction.
   unsigned NumMicroOps;
@@ -269,18 +337,74 @@ struct InstrDesc {
   bool MayLoad;
   bool MayStore;
   bool HasSideEffects;
+  bool BeginGroup;
+  bool EndGroup;
+
+  // True if all buffered resources are in-order, and there is at least one
+  // buffer which is a dispatch hazard (BufferSize = 0).
+  bool MustIssueImmediately;
 
   // A zero latency instruction doesn't consume any scheduler resources.
   bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
+
+  InstrDesc() = default;
+  InstrDesc(const InstrDesc &Other) = delete;
+  InstrDesc &operator=(const InstrDesc &Other) = delete;
+};
+
+/// Base class for instructions consumed by the simulation pipeline.
+///
+/// This class tracks data dependencies as well as generic properties
+/// of the instruction.
+class InstructionBase {
+  const InstrDesc &Desc;
+
+  // This field is set for instructions that are candidates for move
+  // elimination. For more information about move elimination, see the
+  // definition of RegisterMappingTracker in RegisterFile.h
+  bool IsOptimizableMove;
+
+  // Output dependencies.
+  // One entry per each implicit and explicit register definition.
+  SmallVector<WriteState, 4> Defs;
+
+  // Input dependencies.
+  // One entry per each implicit and explicit register use.
+  SmallVector<ReadState, 4> Uses;
+
+public:
+  InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
+
+  SmallVectorImpl<WriteState> &getDefs() { return Defs; }
+  const ArrayRef<WriteState> getDefs() const { return Defs; }
+  SmallVectorImpl<ReadState> &getUses() { return Uses; }
+  const ArrayRef<ReadState> getUses() const { return Uses; }
+  const InstrDesc &getDesc() const { return Desc; }
+
+  unsigned getLatency() const { return Desc.MaxLatency; }
+
+  bool hasDependentUsers() const {
+    return any_of(Defs,
+                  [](const WriteState &Def) { return Def.getNumUsers() > 0; });
+  }
+
+  unsigned getNumUsers() const {
+    unsigned NumUsers = 0;
+    for (const WriteState &Def : Defs)
+      NumUsers += Def.getNumUsers();
+    return NumUsers;
+  }
+
+  // Returns true if this instruction is a candidate for move elimination.
+  bool isOptimizableMove() const { return IsOptimizableMove; }
+  void setOptimizableMove() { IsOptimizableMove = true; }
 };
 
 /// An instruction propagated through the simulated instruction pipeline.
 ///
 /// This class is used to monitor changes to the internal state of instructions
 /// that are sent to the various components of the simulated hardware pipeline.
-class Instruction {
-  const InstrDesc &Desc;
-
+class Instruction : public InstructionBase {
   enum InstrStage {
     IS_INVALID,   // Instruction in an invalid state.
     IS_AVAILABLE, // Instruction dispatched but operands are not ready.
@@ -300,46 +424,14 @@ class Instruction {
   // Retire Unit token ID for this instruction.
   unsigned RCUTokenID;
 
-  bool IsDepBreaking;
-
-  using UniqueDef = std::unique_ptr<WriteState>;
-  using UniqueUse = std::unique_ptr<ReadState>;
-  using VecDefs = std::vector<UniqueDef>;
-  using VecUses = std::vector<UniqueUse>;
-
-  // Output dependencies.
-  // One entry per each implicit and explicit register definition.
-  VecDefs Defs;
-
-  // Input dependencies.
-  // One entry per each implicit and explicit register use.
-  VecUses Uses;
-
 public:
   Instruction(const InstrDesc &D)
-      : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
-        IsDepBreaking(false) {}
-  Instruction(const Instruction &Other) = delete;
-  Instruction &operator=(const Instruction &Other) = delete;
-
-  VecDefs &getDefs() { return Defs; }
-  const VecDefs &getDefs() const { return Defs; }
-  VecUses &getUses() { return Uses; }
-  const VecUses &getUses() const { return Uses; }
-  const InstrDesc &getDesc() const { return Desc; }
+      : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
+        RCUTokenID(0) {}
+
   unsigned getRCUTokenID() const { return RCUTokenID; }
   int getCyclesLeft() const { return CyclesLeft; }
 
-  bool isDependencyBreaking() const { return IsDepBreaking; }
-  void setDependencyBreaking() { IsDepBreaking = true; }
-
-  unsigned getNumUsers() const {
-    unsigned NumUsers = 0;
-    for (const UniqueDef &Def : Defs)
-      NumUsers += Def->getNumUsers();
-    return NumUsers;
-  }
-
   // Transition to the dispatch stage, and assign a RCUToken to this
   // instruction. The RCUToken is used to track the completion of every
   // register write performed by this instruction.
@@ -363,6 +455,15 @@ public:
   bool isExecuted() const { return Stage == IS_EXECUTED; }
   bool isRetired() const { return Stage == IS_RETIRED; }
 
+  bool isEliminated() const {
+    return isReady() && getDefs().size() &&
+           all_of(getDefs(),
+                  [](const WriteState &W) { return W.isEliminated(); });
+  }
+
+  // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
+  void forceExecuted();
+
   void retire() {
     assert(isExecuted() && "Instruction is in an invalid state!");
     Stage = IS_RETIRED;
@@ -374,26 +475,32 @@ public:
 /// An InstRef contains both a SourceMgr index and Instruction pair.  The index
 /// is used as a unique identifier for the instruction.  MCA will make use of
 /// this index as a key throughout MCA.
-class InstRef : public std::pair<unsigned, Instruction *> {
+class InstRef {
+  std::pair<unsigned, Instruction *> Data;
+
 public:
-  InstRef() : std::pair<unsigned, Instruction *>(0, nullptr) {}
-  InstRef(unsigned Index, Instruction *I)
-      : std::pair<unsigned, Instruction *>(Index, I) {}
+  InstRef() : Data(std::make_pair(0, nullptr)) {}
+  InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
+
+  bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+
+  unsigned getSourceIndex() const { return Data.first; }
+  Instruction *getInstruction() { return Data.second; }
+  const Instruction *getInstruction() const { return Data.second; }
 
-  unsigned getSourceIndex() const { return first; }
-  Instruction *getInstruction() { return second; }
-  const Instruction *getInstruction() const { return second; }
+  /// Returns true if this references a valid instruction.
+  operator bool() const { return Data.second != nullptr; }
 
-  /// Returns true if  this InstRef has been populated.
-  bool isValid() const { return second != nullptr; }
+  /// Invalidate this reference.
+  void invalidate() { Data.second = nullptr; }
 
 #ifndef NDEBUG
-  void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); }
+  void print(raw_ostream &OS) const { OS << getSourceIndex(); }
 #endif
 };
 
 #ifndef NDEBUG
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InstRef &IR) {
+inline raw_ostream &operator<<(raw_ostream &OS, const InstRef &IR) {
   IR.print(OS);
   return OS;
 }
@@ -415,20 +522,30 @@ public:
   unsigned getSourceIndex() const { return Data.first; }
   const WriteState *getWriteState() const { return Data.second; }
   WriteState *getWriteState() { return Data.second; }
-  void invalidate() { Data = std::make_pair(INVALID_IID, nullptr); }
-
-  bool isValid() const {
-    return Data.first != INVALID_IID && Data.second != nullptr;
+  void invalidate() { Data.second = nullptr; }
+  bool isWriteZero() const {
+    assert(isValid() && "Invalid null WriteState found!");
+    return getWriteState()->isWriteZero();
   }
-  bool operator==(const WriteRef &Other) const {
-    return Data == Other.Data;
+
+  /// Returns true if this register write has been executed, and the new
+  /// register value is therefore available to users.
+  bool isAvailable() const {
+    if (getSourceIndex() == INVALID_IID)
+      return false;
+    const WriteState *WS = getWriteState();
+    return !WS || WS->isExecuted();
   }
 
+  bool isValid() const { return Data.first != INVALID_IID && Data.second; }
+  bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
+
 #ifndef NDEBUG
   void dump() const;
 #endif
 };
 
 } // namespace mca
+} // namespace llvm
 
-#endif
+#endif // LLVM_MCA_INSTRUCTION_H
diff --git a/contrib/llvm/tools/llvm-mca/Pipeline.h b/contrib/llvm/include/llvm/MCA/Pipeline.h
index 6916e422be39..acd256060bdd 100644
--- a/contrib/llvm/tools/llvm-mca/Pipeline.h
+++ b/contrib/llvm/include/llvm/MCA/Pipeline.h
@@ -13,18 +13,18 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H
-#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H
+#ifndef LLVM_MCA_PIPELINE_H
+#define LLVM_MCA_PIPELINE_H
 
-#include "Scheduler.h"
-#include "Stage.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/Support/Error.h"
 
+namespace llvm {
 namespace mca {
 
 class HWEventListener;
-class HWInstructionEvent;
-class HWStallEvent;
 
 /// A pipeline for a specific subtarget.
 ///
@@ -55,25 +55,25 @@ class Pipeline {
   Pipeline &operator=(const Pipeline &P) = delete;
 
   /// An ordered list of stages that define this instruction pipeline.
-  llvm::SmallVector<std::unique_ptr<Stage>, 8> Stages;
+  SmallVector<std::unique_ptr<Stage>, 8> Stages;
   std::set<HWEventListener *> Listeners;
   unsigned Cycles;
 
-  void preExecuteStages();
-  bool executeStages(InstRef &IR);
-  void postExecuteStages();
-  void runCycle();
-
+  Error runCycle();
   bool hasWorkToProcess();
   void notifyCycleBegin();
   void notifyCycleEnd();
 
 public:
   Pipeline() : Cycles(0) {}
-  void appendStage(std::unique_ptr<Stage> S) { Stages.push_back(std::move(S)); }
-  void run();
+  void appendStage(std::unique_ptr<Stage> S);
+
+  /// Returns the total number of simulated cycles.
+  Expected<unsigned> run();
+
   void addEventListener(HWEventListener *Listener);
 };
 } // namespace mca
+} // namespace llvm
 
-#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H
+#endif // LLVM_MCA_PIPELINE_H
diff --git a/contrib/llvm/include/llvm/MCA/SourceMgr.h b/contrib/llvm/include/llvm/MCA/SourceMgr.h
new file mode 100644
index 000000000000..5e0ca6419f5d
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/SourceMgr.h
@@ -0,0 +1,57 @@
+//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements class SourceMgr. Class SourceMgr abstracts the input
+/// code sequence (a sequence of MCInst), and assings unique identifiers to
+/// every instruction in the sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SOURCEMGR_H
+#define LLVM_MCA_SOURCEMGR_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace mca {
+
+class Instruction;
+
+typedef std::pair<unsigned, const Instruction &> SourceRef;
+
+class SourceMgr {
+  using UniqueInst = std::unique_ptr<Instruction>;
+  ArrayRef<UniqueInst> Sequence;
+  unsigned Current;
+  const unsigned Iterations;
+  static const unsigned DefaultIterations = 100;
+
+public:
+  SourceMgr(ArrayRef<UniqueInst> S, unsigned Iter)
+      : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {}
+
+  unsigned getNumIterations() const { return Iterations; }
+  unsigned size() const { return Sequence.size(); }
+  bool hasNext() const { return Current < (Iterations * Sequence.size()); }
+  void updateNext() { ++Current; }
+
+  SourceRef peekNext() const {
+    assert(hasNext() && "Already at end of sequence!");
+    return SourceRef(Current, *Sequence[Current % Sequence.size()]);
+  }
+
+  using const_iterator = ArrayRef<UniqueInst>::const_iterator;
+  const_iterator begin() const { return Sequence.begin(); }
+  const_iterator end() const { return Sequence.end(); }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SOURCEMGR_H
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStage.h b/contrib/llvm/include/llvm/MCA/Stages/DispatchStage.h
index 4262a241c08c..f015cd7522eb 100644
--- a/contrib/llvm/tools/llvm-mca/DispatchStage.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/DispatchStage.h
@@ -16,21 +16,20 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
+#ifndef LLVM_MCA_DISPATCH_STAGE_H
+#define LLVM_MCA_DISPATCH_STAGE_H
 
-#include "HWEventListener.h"
-#include "Instruction.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "Stage.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
 
+namespace llvm {
 namespace mca {
 
-class Scheduler;
-
 // Implements the hardware dispatch logic.
 //
 // This class is responsible for the dispatch stage, in which instructions are
@@ -49,58 +48,46 @@ class Scheduler;
 //
 // If the number of micro opcodes exceedes DispatchWidth, then the instruction
 // is dispatched in multiple cycles.
-class DispatchStage : public Stage {
+class DispatchStage final : public Stage {
   unsigned DispatchWidth;
   unsigned AvailableEntries;
   unsigned CarryOver;
-  const llvm::MCSubtargetInfo &STI;
+  InstRef CarriedOver;
+  const MCSubtargetInfo &STI;
   RetireControlUnit &RCU;
   RegisterFile &PRF;
-  Scheduler &SC;
-
-  bool checkRCU(const InstRef &IR);
-  bool checkPRF(const InstRef &IR);
-  bool checkScheduler(const InstRef &IR);
-  void dispatch(InstRef IR);
-  void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
-
-  void notifyInstructionDispatched(const InstRef &IR,
-                                   llvm::ArrayRef<unsigned> UsedPhysRegs);
 
-  bool isAvailable(unsigned NumEntries) const {
-    return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
-  }
+  bool checkRCU(const InstRef &IR) const;
+  bool checkPRF(const InstRef &IR) const;
+  bool canDispatch(const InstRef &IR) const;
+  Error dispatch(InstRef IR);
 
-  bool canDispatch(const InstRef &IR) {
-    assert(isAvailable(IR.getInstruction()->getDesc().NumMicroOps));
-    return checkRCU(IR) && checkPRF(IR) && checkScheduler(IR);
-  }
+  void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI);
 
-  void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec,
-                     unsigned RegID) const {
-    return PRF.collectWrites(Vec, RegID);
-  }
+  void notifyInstructionDispatched(const InstRef &IR,
+                                   ArrayRef<unsigned> UsedPhysRegs,
+                                   unsigned uOps) const;
 
 public:
-  DispatchStage(const llvm::MCSubtargetInfo &Subtarget,
-                const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
+  DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
                 unsigned MaxDispatchWidth, RetireControlUnit &R,
-                RegisterFile &F, Scheduler &Sched)
+                RegisterFile &F)
       : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
-        CarryOver(0U), STI(Subtarget), RCU(R), PRF(F), SC(Sched) {}
+        CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
+
+  bool isAvailable(const InstRef &IR) const override;
 
-  // We can always try to dispatch, so returning false is okay in this case.
-  // The retire stage, which controls the RCU, might have items to complete but
-  // RetireStage::hasWorkToComplete will check for that case.
-  virtual bool hasWorkToComplete() const override final { return false; }
-  virtual void cycleStart() override final;
-  virtual bool execute(InstRef &IR) override final;
-  void notifyDispatchStall(const InstRef &IR, unsigned EventType);
+  // The dispatch logic internally doesn't buffer instructions. So there is
+  // never work to do at the beginning of every cycle.
+  bool hasWorkToComplete() const override { return false; }
+  Error cycleStart() override;
+  Error execute(InstRef &IR) override;
 
 #ifndef NDEBUG
   void dump() const;
 #endif
 };
 } // namespace mca
+} // namespace llvm
 
-#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
+#endif // LLVM_MCA_DISPATCH_STAGE_H
diff --git a/contrib/llvm/include/llvm/MCA/Stages/EntryStage.h b/contrib/llvm/include/llvm/MCA/Stages/EntryStage.h
new file mode 100644
index 000000000000..cd9a65b8cc2b
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/Stages/EntryStage.h
@@ -0,0 +1,52 @@
+//===---------------------- EntryStage.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Entry stage of an instruction pipeline.  Its sole
+/// purpose in life is to pick instructions in sequence and move them to the
+/// next pipeline stage.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_ENTRY_STAGE_H
+#define LLVM_MCA_ENTRY_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class EntryStage final : public Stage {
+  InstRef CurrentInstruction;
+  SmallVector<std::unique_ptr<Instruction>, 16> Instructions;
+  SourceMgr &SM;
+  unsigned NumRetired;
+
+  // Updates the program counter, and sets 'CurrentInstruction'.
+  void getNextInstruction();
+
+  EntryStage(const EntryStage &Other) = delete;
+  EntryStage &operator=(const EntryStage &Other) = delete;
+
+public:
+  EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { }
+
+  bool isAvailable(const InstRef &IR) const override;
+  bool hasWorkToComplete() const override;
+  Error execute(InstRef &IR) override;
+  Error cycleStart() override;
+  Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_FETCH_STAGE_H
diff --git a/contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h b/contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h
new file mode 100644
index 000000000000..8cb287e06d9f
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/Stages/ExecuteStage.h
@@ -0,0 +1,80 @@
+//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of a default instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_EXECUTE_STAGE_H
+#define LLVM_MCA_EXECUTE_STAGE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+class ExecuteStage final : public Stage {
+  Scheduler &HWS;
+
+  Error issueInstruction(InstRef &IR);
+
+  // Called at the beginning of each cycle to issue already dispatched
+  // instructions to the underlying pipelines.
+  Error issueReadyInstructions();
+
+  // Used to notify instructions eliminated at register renaming stage.
+  Error handleInstructionEliminated(InstRef &IR);
+
+  ExecuteStage(const ExecuteStage &Other) = delete;
+  ExecuteStage &operator=(const ExecuteStage &Other) = delete;
+
+public:
+  ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+
+  // This stage works under the assumption that the Pipeline will eventually
+  // execute a retire stage. We don't need to check if pipelines and/or
+  // schedulers have instructions to process, because those instructions are
+  // also tracked by the retire control unit. That means,
+  // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
+  // are still instructions in-flight in the out-of-order backend.
+  bool hasWorkToComplete() const override { return false; }
+  bool isAvailable(const InstRef &IR) const override;
+
+  // Notifies the scheduler that a new cycle just started.
+  //
+  // This method notifies the scheduler that a new cycle started.
+  // This method is also responsible for notifying listeners about instructions
+  // state changes, and processor resources freed by the scheduler.
+  // Instructions that transitioned to the 'Executed' state are automatically
+  // moved to the next stage (i.e. RetireStage).
+  Error cycleStart() override;
+  Error execute(InstRef &IR) override;
+
+  void notifyInstructionIssued(
+      const InstRef &IR,
+      MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
+  void notifyInstructionExecuted(const InstRef &IR) const;
+  void notifyInstructionReady(const InstRef &IR) const;
+  void notifyResourceAvailable(const ResourceRef &RR) const;
+
+  // Notify listeners that buffered resources have been consumed or freed.
+  void notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_EXECUTE_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/InstructionTables.h b/contrib/llvm/include/llvm/MCA/Stages/InstructionTables.h
index 18e019988430..34e338f0ce6b 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionTables.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/InstructionTables.h
@@ -14,30 +14,33 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
+#ifndef LLVM_MCA_INSTRUCTIONTABLES_H
+#define LLVM_MCA_INSTRUCTIONTABLES_H
 
-#include "InstrBuilder.h"
-#include "Scheduler.h"
-#include "Stage.h"
-#include "View.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/Stage.h"
+#include "llvm/MCA/Support.h"
 
+namespace llvm {
 namespace mca {
 
-class InstructionTables : public Stage {
-  const llvm::MCSchedModel &SM;
-  InstrBuilder &IB;
-  llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
+class InstructionTables final : public Stage {
+  const MCSchedModel &SM;
+  SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
+  SmallVector<uint64_t, 8> Masks;
 
 public:
-  InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)
-      : Stage(), SM(Model), IB(Builder) {}
+  InstructionTables(const MCSchedModel &Model)
+      : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) {
+    computeProcResourceMasks(Model, Masks);
+  }
 
-  bool hasWorkToComplete() const override final { return false; }
-  bool execute(InstRef &IR) override final;
+  bool hasWorkToComplete() const override { return false; }
+  Error execute(InstRef &IR) override;
 };
 } // namespace mca
+} // namespace llvm
 
-#endif
+#endif // LLVM_MCA_INSTRUCTIONTABLES_H
diff --git a/contrib/llvm/tools/llvm-mca/RetireStage.h b/contrib/llvm/include/llvm/MCA/Stages/RetireStage.h
index 8cf672d92c6e..2051ce5c86ad 100644
--- a/contrib/llvm/tools/llvm-mca/RetireStage.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/RetireStage.h
@@ -8,41 +8,41 @@
 //===----------------------------------------------------------------------===//
 /// \file
 ///
-/// This file defines the retire stage of an instruction pipeline.
+/// This file defines the retire stage of a default instruction pipeline.
 /// The RetireStage represents the process logic that interacts with the
 /// simulated RetireControlUnit hardware.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
+#ifndef LLVM_MCA_RETIRE_STAGE_H
+#define LLVM_MCA_RETIRE_STAGE_H
 
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "Stage.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/Stages/Stage.h"
 
+namespace llvm {
 namespace mca {
 
-class RetireStage : public Stage {
+class RetireStage final : public Stage {
   // Owner will go away when we move listeners/eventing to the stages.
   RetireControlUnit &RCU;
   RegisterFile &PRF;
 
+  RetireStage(const RetireStage &Other) = delete;
+  RetireStage &operator=(const RetireStage &Other) = delete;
+
 public:
   RetireStage(RetireControlUnit &R, RegisterFile &F)
       : Stage(), RCU(R), PRF(F) {}
-  RetireStage(const RetireStage &Other) = delete;
-  RetireStage &operator=(const RetireStage &Other) = delete;
 
-  virtual bool hasWorkToComplete() const override final {
-    return !RCU.isEmpty();
-  }
-  virtual void cycleStart() override final;
-  virtual bool execute(InstRef &IR) override final { return true; }
-  void notifyInstructionRetired(const InstRef &IR);
-  void onInstructionExecuted(unsigned TokenID);
+  bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+  Error cycleStart() override;
+  Error execute(InstRef &IR) override;
+  void notifyInstructionRetired(const InstRef &IR) const;
 };
 
 } // namespace mca
+} // namespace llvm
 
-#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
+#endif // LLVM_MCA_RETIRE_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/Stage.h b/contrib/llvm/include/llvm/MCA/Stages/Stage.h
index 9dbdcd89a33b..fc7ab569bb0f 100644
--- a/contrib/llvm/tools/llvm-mca/Stage.h
+++ b/contrib/llvm/include/llvm/MCA/Stages/Stage.h
@@ -13,64 +13,76 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_STAGE_H
+#ifndef LLVM_MCA_STAGE_H
+#define LLVM_MCA_STAGE_H
 
-#include "HWEventListener.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Error.h"
 #include <set>
 
+namespace llvm {
 namespace mca {
 
 class InstRef;
 
 class Stage {
+  Stage *NextInSequence;
+  std::set<HWEventListener *> Listeners;
+
   Stage(const Stage &Other) = delete;
   Stage &operator=(const Stage &Other) = delete;
-  std::set<HWEventListener *> Listeners;
 
 protected:
   const std::set<HWEventListener *> &getListeners() const { return Listeners; }
 
 public:
-  Stage();
-  virtual ~Stage() = default;
+  Stage() : NextInSequence(nullptr) {}
+  virtual ~Stage();
+
+  /// Returns true if it can execute IR during this cycle.
+  virtual bool isAvailable(const InstRef &IR) const { return true; }
 
-  /// Called prior to preExecute to ensure that the stage has items that it
-  /// is to process.  For example, a FetchStage might have more instructions
-  /// that need to be processed, or a RCU might have items that have yet to
-  /// retire.
+  /// Returns true if some instructions are still executing this stage.
   virtual bool hasWorkToComplete() const = 0;
 
   /// Called once at the start of each cycle.  This can be used as a setup
   /// phase to prepare for the executions during the cycle.
-  virtual void cycleStart() {}
+  virtual Error cycleStart() { return ErrorSuccess(); }
 
   /// Called once at the end of each cycle.
-  virtual void cycleEnd() {}
+  virtual Error cycleEnd() { return ErrorSuccess(); }
 
-  /// Called prior to executing the list of stages.
-  /// This can be called multiple times per cycle.
-  virtual void preExecute() {}
+  /// The primary action that this stage performs on instruction IR.
+  virtual Error execute(InstRef &IR) = 0;
 
-  /// Called as a cleanup and finalization phase after each execution.
-  /// This will only be called if all stages return a success from their
-  /// execute callback.  This can be called multiple times per cycle.
-  virtual void postExecute() {}
+  void setNextInSequence(Stage *NextStage) {
+    assert(!NextInSequence && "This stage already has a NextInSequence!");
+    NextInSequence = NextStage;
+  }
+
+  bool checkNextStage(const InstRef &IR) const {
+    return NextInSequence && NextInSequence->isAvailable(IR);
+  }
 
-  /// The primary action that this stage performs.
-  /// Returning false prevents successor stages from having their 'execute'
-  /// routine called.  This can be called multiple times during a single cycle.
-  virtual bool execute(InstRef &IR) = 0;
+  /// Called when an instruction is ready to move the next pipeline stage.
+  ///
+  /// Stages are responsible for moving instructions to their immediate
+  /// successor stages.
+  Error moveToTheNextStage(InstRef &IR) {
+    assert(checkNextStage(IR) && "Next stage is not ready!");
+    return NextInSequence->execute(IR);
+  }
 
   /// Add a listener to receive callbacks during the execution of this stage.
   void addListener(HWEventListener *Listener);
 
   /// Notify listeners of a particular hardware event.
-  template <typename EventT> void notifyEvent(const EventT &Event) {
+  template <typename EventT> void notifyEvent(const EventT &Event) const {
     for (HWEventListener *Listener : Listeners)
       Listener->onEvent(Event);
   }
 };
 
 } // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H
+} // namespace llvm
+#endif // LLVM_MCA_STAGE_H
diff --git a/contrib/llvm/include/llvm/MCA/Support.h b/contrib/llvm/include/llvm/MCA/Support.h
new file mode 100644
index 000000000000..7b0c5bf3a486
--- /dev/null
+++ b/contrib/llvm/include/llvm/MCA/Support.h
@@ -0,0 +1,119 @@
+//===--------------------- Support.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Helper functions used by various pipeline components.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_SUPPORT_H
+#define LLVM_MCA_SUPPORT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace mca {
+
+template <typename T>
+class InstructionError : public ErrorInfo<InstructionError<T>> {
+public:
+  static char ID;
+  std::string Message;
+  const T &Inst;
+
+  InstructionError(std::string M, const T &MCI)
+      : Message(std::move(M)), Inst(MCI) {}
+
+  void log(raw_ostream &OS) const override { OS << Message; }
+
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+};
+
+template <typename T> char InstructionError<T>::ID;
+
+/// This class represents the number of cycles per resource (fractions of
+/// cycles).  That quantity is managed here as a ratio, and accessed via the
+/// double cast-operator below.  The two quantities, number of cycles and
+/// number of resources, are kept separate.  This is used by the
+/// ResourcePressureView to calculate the average resource cycles
+/// per instruction/iteration.
+class ResourceCycles {
+  unsigned Numerator, Denominator;
+
+public:
+  ResourceCycles() : Numerator(0), Denominator(1) {}
+  ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
+      : Numerator(Cycles), Denominator(ResourceUnits) {}
+
+  operator double() const {
+    assert(Denominator && "Invalid denominator (must be non-zero).");
+    return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
+  }
+
+  // Add the components of RHS to this instance.  Instead of calculating
+  // the final value here, we keep track of the numerator and denominator
+  // separately, to reduce floating point error.
+  ResourceCycles &operator+=(const ResourceCycles &RHS) {
+    if (Denominator == RHS.Denominator)
+      Numerator += RHS.Numerator;
+    else {
+      // Create a common denominator for LHS and RHS by calculating the least
+      // common multiple from the GCD.
+      unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
+      unsigned LCM = (Denominator * RHS.Denominator) / GCD;
+      unsigned LHSNumerator = Numerator * (LCM / Denominator);
+      unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
+      Numerator = LHSNumerator + RHSNumerator;
+      Denominator = LCM;
+    }
+    return *this;
+  }
+};
+
+/// Populates vector Masks with processor resource masks.
+///
+/// The number of bits set in a mask depends on the processor resource type.
+/// Each processor resource mask has at least one bit set. For groups, the
+/// number of bits set in the mask is equal to the cardinality of the group plus
+/// one. Excluding the most significant bit, the remaining bits in the mask
+/// identify processor resources that are part of the group.
+///
+/// Example:
+///
+///  ResourceA  -- Mask: 0b001
+///  ResourceB  -- Mask: 0b010
+///  ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
+///
+/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
+/// Each resource mask uniquely identifies a resource; both ResourceA and
+/// ResourceB only have one bit set.
+/// ResourceAB is a group; excluding the most significant bit in the mask, the
+/// remaining bits identify the composition of the group.
+///
+/// Resource masks are used by the ResourceManager to solve set membership
+/// problems with simple bit manipulation operations.
+void computeProcResourceMasks(const MCSchedModel &SM,
+                              MutableArrayRef<uint64_t> Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+///  - NumMicroOps / DispatchWidth
+///  - ProcResourceCycles / #ProcResourceUnits  (for every consumed resource).
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+                               unsigned NumMicroOps,
+                               ArrayRef<unsigned> ProcResourceUsage);
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_SUPPORT_H
diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h
index 6caadea0175b..b753d261a0fc 100644
--- a/contrib/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm/include/llvm/Object/COFF.h
@@ -594,6 +594,8 @@ enum class coff_guard_flags : uint32_t {
   FidTableHasFlags = 0x10000000, // Indicates that fid tables are 5 bytes
 };
 
+enum class frame_type : uint16_t { Fpo = 0, Trap = 1, Tss = 2, NonFpo = 3 };
+
 struct coff_load_config_code_integrity {
   support::ulittle16_t Flags;
   support::ulittle16_t Catalog;
@@ -883,6 +885,7 @@ public:
     assert(is64());
     return reinterpret_cast<const coff_load_configuration64 *>(LoadConfig);
   }
+  StringRef getRelocationTypeName(uint16_t Type) const;
 
 protected:
   void moveSymbolNext(DataRefImpl &Symb) const override;
@@ -968,6 +971,9 @@ public:
       return nullptr;
     return reinterpret_cast<const dos_header *>(base());
   }
+  std::error_code getCOFFHeader(const coff_file_header *&Res) const;
+  std::error_code
+  getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const;
   std::error_code getPE32Header(const pe32_header *&Res) const;
   std::error_code getPE32PlusHeader(const pe32plus_header *&Res) const;
   std::error_code getDataDirectory(uint32_t index,
@@ -1016,6 +1022,8 @@ public:
 
   ArrayRef<uint8_t> getSymbolAuxData(COFFSymbolRef Symbol) const;
 
+  uint32_t getSymbolIndex(COFFSymbolRef Symbol) const;
+
   size_t getSymbolTableEntrySize() const {
     if (COFFHeader)
       return sizeof(coff_symbol16);
@@ -1059,6 +1067,8 @@ public:
   bool isRelocatableObject() const override;
   bool is64() const { return PE32PlusHeader; }
 
+  StringRef mapDebugSectionName(StringRef Name) const override;
+
   static bool classof(const Binary *v) { return v->isCOFF(); }
 };
 
@@ -1227,7 +1237,7 @@ struct FpoData {
   bool useBP() const { return (Attributes >> 10) & 1; }
 
   // cbFrame: frame pointer
-  int getFP() const { return Attributes >> 14; }
+  frame_type getFP() const { return static_cast<frame_type>(Attributes >> 14); }
 };
 
 } // end namespace object
diff --git a/contrib/llvm/include/llvm/Object/ELF.h b/contrib/llvm/include/llvm/Object/ELF.h
index 752d468fd25b..bcdc190cc7dc 100644
--- a/contrib/llvm/include/llvm/Object/ELF.h
+++ b/contrib/llvm/include/llvm/Object/ELF.h
@@ -32,7 +32,7 @@ namespace llvm {
 namespace object {
 
 StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type);
-uint32_t getELFRelrRelocationType(uint32_t Machine);
+uint32_t getELFRelativeRelocationType(uint32_t Machine);
 StringRef getELFSectionTypeName(uint32_t Machine, uint32_t Type);
 
 // Subclasses of ELFFile may need this for template instantiation
@@ -113,7 +113,7 @@ public:
   StringRef getRelocationTypeName(uint32_t Type) const;
   void getRelocationTypeName(uint32_t Type,
                              SmallVectorImpl<char> &Result) const;
-  uint32_t getRelrRelocationType() const;
+  uint32_t getRelativeRelocationType() const;
 
   const char *getDynamicTagAsString(unsigned Arch, uint64_t Type) const;
   const char *getDynamicTagAsString(uint64_t Type) const;
@@ -415,8 +415,8 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type,
 }
 
 template <class ELFT>
-uint32_t ELFFile<ELFT>::getRelrRelocationType() const {
-  return getELFRelrRelocationType(getHeader()->e_machine);
+uint32_t ELFFile<ELFT>::getRelativeRelocationType() const {
+  return getELFRelativeRelocationType(getHeader()->e_machine);
 }
 
 template <class ELFT>
diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
index 2c0905d545a7..0f620681cd99 100644
--- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
@@ -86,6 +86,8 @@ public:
   void setARMSubArch(Triple &TheTriple) const override;
 
   virtual uint16_t getEType() const = 0;
+
+  std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
 };
 
 class ELFSectionRef : public SectionRef {
@@ -258,6 +260,8 @@ protected:
   bool isSectionData(DataRefImpl Sec) const override;
   bool isSectionBSS(DataRefImpl Sec) const override;
   bool isSectionVirtual(DataRefImpl Sec) const override;
+  bool isBerkeleyText(DataRefImpl Sec) const override;
+  bool isBerkeleyData(DataRefImpl Sec) const override;
   relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
   relocation_iterator section_rel_end(DataRefImpl Sec) const override;
   std::vector<SectionRef> dynamic_relocation_sections() const override;
@@ -331,9 +335,10 @@ protected:
     // A symbol is exported if its binding is either GLOBAL or WEAK, and its
     // visibility is either DEFAULT or PROTECTED. All other symbols are not
     // exported.
-    return ((Binding == ELF::STB_GLOBAL || Binding == ELF::STB_WEAK) &&
-            (Visibility == ELF::STV_DEFAULT ||
-             Visibility == ELF::STV_PROTECTED));
+    return (
+        (Binding == ELF::STB_GLOBAL || Binding == ELF::STB_WEAK ||
+         Binding == ELF::STB_GNU_UNIQUE) &&
+        (Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_PROTECTED));
   }
 
   // This flag is used for classof, to distinguish ELFObjectFile from
@@ -757,6 +762,20 @@ bool ELFObjectFile<ELFT>::isSectionVirtual(DataRefImpl Sec) const {
 }
 
 template <class ELFT>
+bool ELFObjectFile<ELFT>::isBerkeleyText(DataRefImpl Sec) const {
+  return getSection(Sec)->sh_flags & ELF::SHF_ALLOC &&
+         (getSection(Sec)->sh_flags & ELF::SHF_EXECINSTR ||
+          !(getSection(Sec)->sh_flags & ELF::SHF_WRITE));
+}
+
+template <class ELFT>
+bool ELFObjectFile<ELFT>::isBerkeleyData(DataRefImpl Sec) const {
+  const Elf_Shdr *EShdr = getSection(Sec);
+  return !isBerkeleyText(Sec) && EShdr->sh_type != ELF::SHT_NOBITS &&
+         EShdr->sh_flags & ELF::SHF_ALLOC;
+}
+
+template <class ELFT>
 relocation_iterator
 ELFObjectFile<ELFT>::section_rel_begin(DataRefImpl Sec) const {
   DataRefImpl RelData;
@@ -1019,6 +1038,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return "ELF32-lanai";
     case ELF::EM_MIPS:
       return "ELF32-mips";
+    case ELF::EM_MSP430:
+      return "ELF32-msp430";
     case ELF::EM_PPC:
       return "ELF32-ppc";
     case ELF::EM_RISCV:
@@ -1089,6 +1110,8 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
     default:
       report_fatal_error("Invalid ELFCLASS!");
     }
+  case ELF::EM_MSP430:
+    return Triple::msp430;
   case ELF::EM_PPC:
     return Triple::ppc;
   case ELF::EM_PPC64:
diff --git a/contrib/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm/include/llvm/Object/ELFTypes.h
index fb386120e34d..ec3c8e7bae46 100644
--- a/contrib/llvm/include/llvm/Object/ELFTypes.h
+++ b/contrib/llvm/include/llvm/Object/ELFTypes.h
@@ -605,13 +605,12 @@ public:
   }
 
   /// Get the note's descriptor.
-  ArrayRef<Elf_Word> getDesc() const {
+  ArrayRef<uint8_t> getDesc() const {
     if (!Nhdr.n_descsz)
-      return ArrayRef<Elf_Word>();
-    return ArrayRef<Elf_Word>(
-        reinterpret_cast<const Elf_Word *>(
-            reinterpret_cast<const uint8_t *>(&Nhdr) + sizeof(Nhdr) +
-            alignTo<Elf_Nhdr_Impl<ELFT>::Align>(Nhdr.n_namesz)),
+      return ArrayRef<uint8_t>();
+    return ArrayRef<uint8_t>(
+        reinterpret_cast<const uint8_t *>(&Nhdr) + sizeof(Nhdr) +
+          alignTo<Elf_Nhdr_Impl<ELFT>::Align>(Nhdr.n_namesz),
         Nhdr.n_descsz);
   }
 
@@ -643,14 +642,19 @@ class Elf_Note_Iterator_Impl
   // container, either cleanly or with an overflow error.
   void advanceNhdr(const uint8_t *NhdrPos, size_t NoteSize) {
     RemainingSize -= NoteSize;
-    if (RemainingSize == 0u)
+    if (RemainingSize == 0u) {
+      // Ensure that if the iterator walks to the end, the error is checked
+      // afterwards.
+      *Err = Error::success();
       Nhdr = nullptr;
-    else if (sizeof(*Nhdr) > RemainingSize)
+    } else if (sizeof(*Nhdr) > RemainingSize)
       stopWithOverflowError();
     else {
       Nhdr = reinterpret_cast<const Elf_Nhdr_Impl<ELFT> *>(NhdrPos + NoteSize);
       if (Nhdr->getSize() > RemainingSize)
         stopWithOverflowError();
+      else
+        *Err = Error::success();
     }
   }
 
@@ -658,6 +662,7 @@ class Elf_Note_Iterator_Impl
   explicit Elf_Note_Iterator_Impl(Error &Err) : Err(&Err) {}
   Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, Error &Err)
       : RemainingSize(Size), Err(&Err) {
+    consumeError(std::move(Err));
     assert(Start && "ELF note iterator starting at NULL");
     advanceNhdr(Start, 0u);
   }
@@ -671,6 +676,10 @@ public:
     return *this;
   }
   bool operator==(Elf_Note_Iterator_Impl Other) const {
+    if (!Nhdr && Other.Err)
+      (void)(bool)(*Other.Err);
+    if (!Other.Nhdr && Err)
+      (void)(bool)(*Err);
     return Nhdr == Other.Nhdr;
   }
   bool operator!=(Elf_Note_Iterator_Impl Other) const {
diff --git a/contrib/llvm/include/llvm/Object/Error.h b/contrib/llvm/include/llvm/Object/Error.h
index eb938338715d..a15f8b9236eb 100644
--- a/contrib/llvm/include/llvm/Object/Error.h
+++ b/contrib/llvm/include/llvm/Object/Error.h
@@ -50,6 +50,7 @@ inline std::error_code make_error_code(object_error e) {
 /// Currently inherits from ECError for easy interoperability with
 /// std::error_code, but this will be removed in the future.
 class BinaryError : public ErrorInfo<BinaryError, ECError> {
+  virtual void anchor();
 public:
   static char ID;
   BinaryError() {
diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h
index 159c1765ab86..c2f4f4062934 100644
--- a/contrib/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm/include/llvm/Object/MachO.h
@@ -356,7 +356,7 @@ public:
   basic_symbol_iterator symbol_end() const override;
 
   // MachO specific.
-  basic_symbol_iterator getSymbolByIndex(unsigned Index) const;
+  symbol_iterator getSymbolByIndex(unsigned Index) const;
   uint64_t getSymbolIndex(DataRefImpl Symb) const;
 
   section_iterator section_begin() const override;
@@ -616,6 +616,9 @@ public:
     case MachO::PLATFORM_TVOS: return "tvos";
     case MachO::PLATFORM_WATCHOS: return "watchos";
     case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+    case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
+    case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
+    case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
     default:
       std::string ret;
       raw_string_ostream ss(ret);
diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h
index 02d62e8e4879..036c99cb6baf 100644
--- a/contrib/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ObjectFile.h
@@ -104,13 +104,25 @@ public:
   uint64_t getAlignment() const;
 
   bool isCompressed() const;
+  /// Whether this section contains instructions.
   bool isText() const;
+  /// Whether this section contains data, not instructions.
   bool isData() const;
+  /// Whether this section contains BSS uninitialized data.
   bool isBSS() const;
   bool isVirtual() const;
   bool isBitcode() const;
   bool isStripped() const;
 
+  /// Whether this section will be placed in the text segment, according to the
+  /// Berkeley size format. This is true if the section is allocatable, and
+  /// contains either code or readonly data.
+  bool isBerkeleyText() const;
+  /// Whether this section will be placed in the data segment, according to the
+  /// Berkeley size format. This is true if the section is allocatable and
+  /// contains data (e.g. PROGBITS), but is not text.
+  bool isBerkeleyData() const;
+
   bool containsSymbol(SymbolRef S) const;
 
   relocation_iterator relocation_begin() const;
@@ -238,6 +250,8 @@ protected:
   virtual bool isSectionVirtual(DataRefImpl Sec) const = 0;
   virtual bool isSectionBitcode(DataRefImpl Sec) const;
   virtual bool isSectionStripped(DataRefImpl Sec) const;
+  virtual bool isBerkeleyText(DataRefImpl Sec) const;
+  virtual bool isBerkeleyData(DataRefImpl Sec) const;
   virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0;
   virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0;
   virtual section_iterator getRelocatedSection(DataRefImpl Sec) const;
@@ -449,6 +463,14 @@ inline bool SectionRef::isStripped() const {
   return OwningObject->isSectionStripped(SectionPimpl);
 }
 
+inline bool SectionRef::isBerkeleyText() const {
+  return OwningObject->isBerkeleyText(SectionPimpl);
+}
+
+inline bool SectionRef::isBerkeleyData() const {
+  return OwningObject->isBerkeleyData(SectionPimpl);
+}
+
 inline relocation_iterator SectionRef::relocation_begin() const {
   return OwningObject->section_rel_begin(SectionPimpl);
 }
diff --git a/contrib/llvm/include/llvm/Object/RelocVisitor.h b/contrib/llvm/include/llvm/Object/RelocVisitor.h
index 008e109f6679..9a978de2e599 100644
--- a/contrib/llvm/include/llvm/Object/RelocVisitor.h
+++ b/contrib/llvm/include/llvm/Object/RelocVisitor.h
@@ -129,6 +129,8 @@ private:
     case ELF::R_X86_64_NONE:
       return 0;
     case ELF::R_X86_64_64:
+    case ELF::R_X86_64_DTPOFF32:
+    case ELF::R_X86_64_DTPOFF64:
       return Value + getELFAddend(R);
     case ELF::R_X86_64_PC32:
       return Value + getELFAddend(R) - R.getOffset();
@@ -333,6 +335,7 @@ private:
       case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
       case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
       case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
+      case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
         // For wasm section, its offset at 0 -- ignoring Value
         return 0;
       }
diff --git a/contrib/llvm/include/llvm/Object/Wasm.h b/contrib/llvm/include/llvm/Object/Wasm.h
index fd34e45feb62..ed857652a048 100644
--- a/contrib/llvm/include/llvm/Object/Wasm.h
+++ b/contrib/llvm/include/llvm/Object/Wasm.h
@@ -18,10 +18,11 @@
 #define LLVM_OBJECT_WASM_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
@@ -36,13 +37,16 @@ namespace object {
 class WasmSymbol {
 public:
   WasmSymbol(const wasm::WasmSymbolInfo &Info,
-             const wasm::WasmSignature *FunctionType,
-             const wasm::WasmGlobalType *GlobalType)
-      : Info(Info), FunctionType(FunctionType), GlobalType(GlobalType) {}
+             const wasm::WasmGlobalType *GlobalType,
+             const wasm::WasmEventType *EventType,
+             const wasm::WasmSignature *Signature)
+      : Info(Info), GlobalType(GlobalType), EventType(EventType),
+        Signature(Signature) {}
 
   const wasm::WasmSymbolInfo &Info;
-  const wasm::WasmSignature *FunctionType;
   const wasm::WasmGlobalType *GlobalType;
+  const wasm::WasmEventType *EventType;
+  const wasm::WasmSignature *Signature;
 
   bool isTypeFunction() const {
     return Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION;
@@ -58,6 +62,8 @@ public:
     return Info.Kind == wasm::WASM_SYMBOL_TYPE_SECTION;
   }
 
+  bool isTypeEvent() const { return Info.Kind == wasm::WASM_SYMBOL_TYPE_EVENT; }
+
   bool isDefined() const { return !isUndefined(); }
 
   bool isUndefined() const {
@@ -98,9 +104,9 @@ public:
 struct WasmSection {
   WasmSection() = default;
 
-  uint32_t Type = 0; // Section type (See below)
-  uint32_t Offset = 0; // Offset with in the file
-  StringRef Name; // Section name (User-defined sections only)
+  uint32_t Type = 0;         // Section type (See below)
+  uint32_t Offset = 0;       // Offset with in the file
+  StringRef Name;            // Section name (User-defined sections only)
   ArrayRef<uint8_t> Content; // Section content
   std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
 };
@@ -119,19 +125,21 @@ public:
   const WasmSymbol &getWasmSymbol(const DataRefImpl &Symb) const;
   const WasmSymbol &getWasmSymbol(const SymbolRef &Symbol) const;
   const WasmSection &getWasmSection(const SectionRef &Section) const;
-  const wasm::WasmRelocation &getWasmRelocation(const RelocationRef& Ref) const;
+  const wasm::WasmRelocation &getWasmRelocation(const RelocationRef &Ref) const;
 
   static bool classof(const Binary *v) { return v->isWasm(); }
 
+  const wasm::WasmDylinkInfo &dylinkInfo() const { return DylinkInfo; }
   ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
   ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
   ArrayRef<wasm::WasmImport> imports() const { return Imports; }
   ArrayRef<wasm::WasmTable> tables() const { return Tables; }
   ArrayRef<wasm::WasmLimits> memories() const { return Memories; }
   ArrayRef<wasm::WasmGlobal> globals() const { return Globals; }
+  ArrayRef<wasm::WasmEvent> events() const { return Events; }
   ArrayRef<wasm::WasmExport> exports() const { return Exports; }
   ArrayRef<WasmSymbol> syms() const { return Symbols; }
-  const wasm::WasmLinkingData& linkingData() const { return LinkingData; }
+  const wasm::WasmLinkingData &linkingData() const { return LinkingData; }
   uint32_t getNumberOfSymbols() const { return Symbols.size(); }
   ArrayRef<wasm::WasmElemSegment> elements() const { return ElemSegments; }
   ArrayRef<WasmSegment> dataSegments() const { return DataSegments; }
@@ -140,6 +148,7 @@ public:
   uint32_t startFunction() const { return StartFunction; }
   uint32_t getNumImportedGlobals() const { return NumImportedGlobals; }
   uint32_t getNumImportedFunctions() const { return NumImportedFunctions; }
+  uint32_t getNumImportedEvents() const { return NumImportedEvents; }
 
   void moveSymbolNext(DataRefImpl &Symb) const override;
 
@@ -151,7 +160,7 @@ public:
   Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
 
   Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
-  uint64_t getWasmSymbolValue(const WasmSymbol& Sym) const;
+  uint64_t getWasmSymbolValue(const WasmSymbol &Sym) const;
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
@@ -192,6 +201,7 @@ public:
   Triple::ArchType getArch() const override;
   SubtargetFeatures getFeatures() const override;
   bool isRelocatableObject() const override;
+  bool isSharedObject() const;
 
   struct ReadContext {
     const uint8_t *Start;
@@ -204,12 +214,16 @@ private:
   bool isDefinedFunctionIndex(uint32_t Index) const;
   bool isValidGlobalIndex(uint32_t Index) const;
   bool isDefinedGlobalIndex(uint32_t Index) const;
+  bool isValidEventIndex(uint32_t Index) const;
+  bool isDefinedEventIndex(uint32_t Index) const;
   bool isValidFunctionSymbol(uint32_t Index) const;
   bool isValidGlobalSymbol(uint32_t Index) const;
+  bool isValidEventSymbol(uint32_t Index) const;
   bool isValidDataSymbol(uint32_t Index) const;
   bool isValidSectionSymbol(uint32_t Index) const;
   wasm::WasmFunction &getDefinedFunction(uint32_t Index);
   wasm::WasmGlobal &getDefinedGlobal(uint32_t Index);
+  wasm::WasmEvent &getDefinedEvent(uint32_t Index);
 
   const WasmSection &getWasmSection(DataRefImpl Ref) const;
   const wasm::WasmRelocation &getWasmRelocation(DataRefImpl Ref) const;
@@ -225,6 +239,7 @@ private:
   Error parseTableSection(ReadContext &Ctx);
   Error parseMemorySection(ReadContext &Ctx);
   Error parseGlobalSection(ReadContext &Ctx);
+  Error parseEventSection(ReadContext &Ctx);
   Error parseExportSection(ReadContext &Ctx);
   Error parseStartSection(ReadContext &Ctx);
   Error parseElemSection(ReadContext &Ctx);
@@ -232,6 +247,7 @@ private:
   Error parseDataSection(ReadContext &Ctx);
 
   // Custom section types
+  Error parseDylinkSection(ReadContext &Ctx);
   Error parseNameSection(ReadContext &Ctx);
   Error parseLinkingSection(ReadContext &Ctx);
   Error parseLinkingSectionSymtab(ReadContext &Ctx);
@@ -240,11 +256,13 @@ private:
 
   wasm::WasmObjectHeader Header;
   std::vector<WasmSection> Sections;
+  wasm::WasmDylinkInfo DylinkInfo;
   std::vector<wasm::WasmSignature> Signatures;
   std::vector<uint32_t> FunctionTypes;
   std::vector<wasm::WasmTable> Tables;
   std::vector<wasm::WasmLimits> Memories;
   std::vector<wasm::WasmGlobal> Globals;
+  std::vector<wasm::WasmEvent> Events;
   std::vector<wasm::WasmImport> Imports;
   std::vector<wasm::WasmExport> Exports;
   std::vector<wasm::WasmElemSegment> ElemSegments;
@@ -254,18 +272,63 @@ private:
   std::vector<wasm::WasmFunctionName> DebugNames;
   uint32_t StartFunction = -1;
   bool HasLinkingSection = false;
+  bool HasDylinkSection = false;
   wasm::WasmLinkingData LinkingData;
   uint32_t NumImportedGlobals = 0;
   uint32_t NumImportedFunctions = 0;
+  uint32_t NumImportedEvents = 0;
   uint32_t CodeSection = 0;
   uint32_t DataSection = 0;
   uint32_t GlobalSection = 0;
+  uint32_t EventSection = 0;
+};
+
+class WasmSectionOrderChecker {
+public:
+  // We define orders for all core wasm sections and known custom sections.
+  enum : int {
+    // Core sections
+    // The order of standard sections is precisely given by the spec.
+    WASM_SEC_ORDER_TYPE = 1,
+    WASM_SEC_ORDER_IMPORT = 2,
+    WASM_SEC_ORDER_FUNCTION = 3,
+    WASM_SEC_ORDER_TABLE = 4,
+    WASM_SEC_ORDER_MEMORY = 5,
+    WASM_SEC_ORDER_GLOBAL = 6,
+    WASM_SEC_ORDER_EVENT = 7,
+    WASM_SEC_ORDER_EXPORT = 8,
+    WASM_SEC_ORDER_START = 9,
+    WASM_SEC_ORDER_ELEM = 10,
+    WASM_SEC_ORDER_DATACOUNT = 11,
+    WASM_SEC_ORDER_CODE = 12,
+    WASM_SEC_ORDER_DATA = 13,
+
+    // Custom sections
+    // "dylink" should be the very first section in the module
+    WASM_SEC_ORDER_DYLINK = 0,
+    // "linking" section requires DATA section in order to validate data symbols
+    WASM_SEC_ORDER_LINKING = 100,
+    // Must come after "linking" section in order to validate reloc indexes.
+    WASM_SEC_ORDER_RELOC = 101,
+    // "name" section must appear after DATA. Comes after "linking" to allow
+    // symbol table to set default function name.
+    WASM_SEC_ORDER_NAME = 102,
+    // "producers" section must appear after "name" section.
+    WASM_SEC_ORDER_PRODUCERS = 103
+  };
+
+  bool isValidSectionOrder(unsigned ID, StringRef CustomSectionName = "");
+
+private:
+  int LastOrder = -1; // Lastly seen known section's order
+
+  // Returns -1 for unknown sections.
+  int getSectionOrder(unsigned ID, StringRef CustomSectionName = "");
 };
 
 } // end namespace object
 
-inline raw_ostream &operator<<(raw_ostream &OS,
-                               const object::WasmSymbol &Sym) {
+inline raw_ostream &operator<<(raw_ostream &OS, const object::WasmSymbol &Sym) {
   Sym.print(OS);
   return OS;
 }
diff --git a/contrib/llvm/include/llvm/Object/WasmTraits.h b/contrib/llvm/include/llvm/Object/WasmTraits.h
index ebcd00b15227..049d72f79e41 100644
--- a/contrib/llvm/include/llvm/Object/WasmTraits.h
+++ b/contrib/llvm/include/llvm/Object/WasmTraits.h
@@ -24,14 +24,20 @@ template <typename T> struct DenseMapInfo;
 // Traits for using WasmSignature in a DenseMap.
 template <> struct DenseMapInfo<wasm::WasmSignature> {
   static wasm::WasmSignature getEmptyKey() {
-    return wasm::WasmSignature{{}, 1};
+    wasm::WasmSignature Sig;
+    Sig.State = wasm::WasmSignature::Empty;
+    return Sig;
   }
   static wasm::WasmSignature getTombstoneKey() {
-    return wasm::WasmSignature{{}, 2};
+    wasm::WasmSignature Sig;
+    Sig.State = wasm::WasmSignature::Tombstone;
+    return Sig;
   }
   static unsigned getHashValue(const wasm::WasmSignature &Sig) {
-    unsigned H = hash_value(Sig.ReturnType);
-    for (int32_t Param : Sig.ParamTypes)
+    uintptr_t H = hash_value(Sig.State);
+    for (auto Ret : Sig.Returns)
+      H = hash_combine(H, Ret);
+    for (auto Param : Sig.Params)
       H = hash_combine(H, Param);
     return H;
   }
diff --git a/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h b/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h
index 78f021fc0386..253c627dd683 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/COFFYAML.h
@@ -58,7 +58,13 @@ LLVM_YAML_STRONG_TYPEDEF(uint8_t, AuxSymbolType)
 struct Relocation {
   uint32_t VirtualAddress;
   uint16_t Type;
+
+  // Normally a Relocation can refer to the symbol via its name.
+  // It can also use a direct symbol table index instead (with no name
+  // specified), allowing disambiguating between multiple symbols with the
+  // same name or crafting intentionally broken files for testing.
   StringRef SymbolName;
+  Optional<uint32_t> SymbolTableIndex;
 };
 
 struct Section {
diff --git a/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h b/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h
index 6fc69735f1c7..f2b0c35521f0 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -68,6 +68,7 @@ struct FileHeader {
   ELF_ELFCLASS Class;
   ELF_ELFDATA Data;
   ELF_ELFOSABI OSABI;
+  llvm::yaml::Hex8 ABIVersion;
   ELF_ET Type;
   ELF_EM Machine;
   ELF_EF Flags;
@@ -123,6 +124,7 @@ struct Section {
   StringRef Link;
   StringRef Info;
   llvm::yaml::Hex64 AddressAlign;
+  Optional<llvm::yaml::Hex64> EntSize;
 
   Section(SectionKind Kind) : Kind(Kind) {}
   virtual ~Section();
diff --git a/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h b/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 8cd08e520560..406dd7cb515f 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -74,6 +74,12 @@ struct Global {
   wasm::WasmInitExpr InitExpr;
 };
 
+struct Event {
+  uint32_t Index;
+  uint32_t Attribute;
+  uint32_t SigIndex;
+};
+
 struct Import {
   StringRef Module;
   StringRef Field;
@@ -83,6 +89,7 @@ struct Import {
     Global GlobalImport;
     Table TableImport;
     Limits Memory;
+    Event EventImport;
   };
 };
 
@@ -176,6 +183,21 @@ struct CustomSection : Section {
   yaml::BinaryRef Payload;
 };
 
+struct DylinkSection : CustomSection {
+  DylinkSection() : CustomSection("dylink") {}
+
+  static bool classof(const Section *S) {
+    auto C = dyn_cast<CustomSection>(S);
+    return C && C->Name == "dylink";
+  }
+
+  uint32_t MemorySize;
+  uint32_t MemoryAlignment;
+  uint32_t TableSize;
+  uint32_t TableAlignment;
+  std::vector<StringRef> Needed;
+};
+
 struct NameSection : CustomSection {
   NameSection() : CustomSection("name") {}
 
@@ -262,6 +284,16 @@ struct GlobalSection : Section {
   std::vector<Global> Globals;
 };
 
+struct EventSection : Section {
+  EventSection() : Section(wasm::WASM_SEC_EVENT) {}
+
+  static bool classof(const Section *S) {
+    return S->Type == wasm::WASM_SEC_EVENT;
+  }
+
+  std::vector<Event> Events;
+};
+
 struct ExportSection : Section {
   ExportSection() : Section(wasm::WASM_SEC_EXPORT) {}
 
@@ -339,6 +371,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Event)
 
 namespace llvm {
 namespace yaml {
@@ -471,6 +504,10 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> {
   static void enumeration(IO &IO, WasmYAML::RelocType &Kind);
 };
 
+template <> struct MappingTraits<WasmYAML::Event> {
+  static void mapping(IO &IO, WasmYAML::Event &Event);
+};
+
 } // end namespace yaml
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Option/OptTable.h b/contrib/llvm/include/llvm/Option/OptTable.h
index 743c4772c98c..fdb05d8a15af 100644
--- a/contrib/llvm/include/llvm/Option/OptTable.h
+++ b/contrib/llvm/include/llvm/Option/OptTable.h
@@ -217,8 +217,8 @@ public:
   /// Render the help text for an option table.
   ///
   /// \param OS - The stream to write the help text to.
-  /// \param Name - The name to use in the usage line.
-  /// \param Title - The title to use in the usage line.
+  /// \param Usage - USAGE: Usage
+  /// \param Title - OVERVIEW: Title
   /// \param FlagsToInclude - If non-zero, only include options with any
   ///                         of these flags set.
   /// \param FlagsToExclude - Exclude options with any of these flags set.
@@ -226,11 +226,11 @@ public:
   ///                         that don't have help texts. By default, we display
   ///                         only options that are not hidden and have help
   ///                         texts.
-  void PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+  void PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
                  unsigned FlagsToInclude, unsigned FlagsToExclude,
                  bool ShowAllAliases) const;
 
-  void PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+  void PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
                  bool ShowHidden = false, bool ShowAllAliases = false) const;
 };
 
diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h
index d65347d611ea..5935a0853d32 100644
--- a/contrib/llvm/include/llvm/Pass.h
+++ b/contrib/llvm/include/llvm/Pass.h
@@ -356,17 +356,6 @@ protected:
 /// This is the storage for the -time-passes option.
 extern bool TimePassesIsEnabled;
 
-/// isFunctionInPrintList - returns true if a function should be printed via
-//  debugging options like -print-after-all/-print-before-all.
-//  Tells if the function IR should be printed by PrinterPass.
-extern bool isFunctionInPrintList(StringRef FunctionName);
-
-/// forcePrintModuleIR - returns true if IR printing passes should
-//  be printing module IR (even for local-pass printers e.g. function-pass)
-//  to provide more context, as enabled by debugging option -print-module-scope
-//  Tells if IR printer should be printing module IR
-extern bool forcePrintModuleIR();
-
 } // end namespace llvm
 
 // Include support files that contain important APIs commonly used by Passes,
diff --git a/contrib/llvm/include/llvm/Passes/PassBuilder.h b/contrib/llvm/include/llvm/Passes/PassBuilder.h
index 24a93bc76af5..fa59345a02cf 100644
--- a/contrib/llvm/include/llvm/Passes/PassBuilder.h
+++ b/contrib/llvm/include/llvm/Passes/PassBuilder.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include <vector>
@@ -32,10 +33,13 @@ class ModuleSummaryIndex;
 /// A struct capturing PGO tunables.
 struct PGOOptions {
   PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "",
-             std::string SampleProfileFile = "", bool RunProfileGen = false,
-             bool SamplePGOSupport = false)
+             std::string SampleProfileFile = "",
+             std::string ProfileRemappingFile = "",
+             bool RunProfileGen = false, bool SamplePGOSupport = false)
       : ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile),
-        SampleProfileFile(SampleProfileFile), RunProfileGen(RunProfileGen),
+        SampleProfileFile(SampleProfileFile),
+        ProfileRemappingFile(ProfileRemappingFile),
+        RunProfileGen(RunProfileGen),
         SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) {
     assert((RunProfileGen ||
             !SampleProfileFile.empty() ||
@@ -45,6 +49,7 @@ struct PGOOptions {
   std::string ProfileGenFile;
   std::string ProfileUseFile;
   std::string SampleProfileFile;
+  std::string ProfileRemappingFile;
   bool RunProfileGen;
   bool SamplePGOSupport;
 };
@@ -58,6 +63,7 @@ struct PGOOptions {
 class PassBuilder {
   TargetMachine *TM;
   Optional<PGOOptions> PGOOpt;
+  PassInstrumentationCallbacks *PIC;
 
 public:
   /// A struct to capture parsed pass pipeline names.
@@ -172,8 +178,9 @@ public:
   };
 
   explicit PassBuilder(TargetMachine *TM = nullptr,
-                       Optional<PGOOptions> PGOOpt = None)
-      : TM(TM), PGOOpt(PGOOpt) {}
+                       Optional<PGOOptions> PGOOpt = None,
+                       PassInstrumentationCallbacks *PIC = nullptr)
+      : TM(TM), PGOOpt(PGOOpt), PIC(PIC) {}
 
   /// Cross register the analysis managers through their proxies.
   ///
@@ -378,8 +385,9 @@ public:
   /// If the sequence of passes aren't all the exact same kind of pass, it will
   /// be an error. You cannot mix different levels implicitly, you must
   /// explicitly form a pass manager in which to nest passes.
-  bool parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText,
-                         bool VerifyEachPass = true, bool DebugLogging = false);
+  Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText,
+                          bool VerifyEachPass = true,
+                          bool DebugLogging = false);
 
   /// {{@ Parse a textual pass pipeline description into a specific PassManager
   ///
@@ -388,12 +396,15 @@ public:
   /// this is the valid pipeline text:
   ///
   ///   function(lpass)
-  bool parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText,
-                         bool VerifyEachPass = true, bool DebugLogging = false);
-  bool parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText,
-                         bool VerifyEachPass = true, bool DebugLogging = false);
-  bool parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText,
-                         bool VerifyEachPass = true, bool DebugLogging = false);
+  Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText,
+                          bool VerifyEachPass = true,
+                          bool DebugLogging = false);
+  Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText,
+                          bool VerifyEachPass = true,
+                          bool DebugLogging = false);
+  Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText,
+                          bool VerifyEachPass = true,
+                          bool DebugLogging = false);
   /// @}}
 
   /// Parse a textual alias analysis pipeline into the provided AA manager.
@@ -411,7 +422,7 @@ public:
   /// Returns false if the text cannot be parsed cleanly. The specific state of
   /// the \p AA manager is unspecified if such an error is encountered and this
   /// returns false.
-  bool parseAAPipeline(AAManager &AA, StringRef PipelineText);
+  Error parseAAPipeline(AAManager &AA, StringRef PipelineText);
 
   /// Register a callback for a default optimizer pipeline extension
   /// point
@@ -490,6 +501,18 @@ public:
     PipelineStartEPCallbacks.push_back(C);
   }
 
+  /// Register a callback for a default optimizer pipeline extension point
+  ///
+  /// This extension point allows adding optimizations at the very end of the
+  /// function optimization pipeline. A key difference between this and the
+  /// legacy PassManager's OptimizerLast callback is that this extension point
+  /// is not triggered at O0. Extensions to the O0 pipeline should append their
+  /// passes to the end of the overall pipeline.
+  void registerOptimizerLastEPCallback(
+      const std::function<void(FunctionPassManager &, OptimizationLevel)> &C) {
+    OptimizerLastEPCallbacks.push_back(C);
+  }
+
   /// Register a callback for parsing an AliasAnalysis Name to populate
   /// the given AAManager \p AA
   void registerParseAACallback(
@@ -559,33 +582,34 @@ private:
   static Optional<std::vector<PipelineElement>>
   parsePipelineText(StringRef Text);
 
-  bool parseModulePass(ModulePassManager &MPM, const PipelineElement &E,
+  Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E,
+                        bool VerifyEachPass, bool DebugLogging);
+  Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E,
                        bool VerifyEachPass, bool DebugLogging);
-  bool parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E,
+  Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E,
+                          bool VerifyEachPass, bool DebugLogging);
+  Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
                       bool VerifyEachPass, bool DebugLogging);
-  bool parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E,
-                     bool VerifyEachPass, bool DebugLogging);
-  bool parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
-                     bool VerifyEachPass, bool DebugLogging);
   bool parseAAPassName(AAManager &AA, StringRef Name);
 
-  bool parseLoopPassPipeline(LoopPassManager &LPM,
-                             ArrayRef<PipelineElement> Pipeline,
-                             bool VerifyEachPass, bool DebugLogging);
-  bool parseFunctionPassPipeline(FunctionPassManager &FPM,
-                                 ArrayRef<PipelineElement> Pipeline,
-                                 bool VerifyEachPass, bool DebugLogging);
-  bool parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+  Error parseLoopPassPipeline(LoopPassManager &LPM,
                               ArrayRef<PipelineElement> Pipeline,
                               bool VerifyEachPass, bool DebugLogging);
-  bool parseModulePassPipeline(ModulePassManager &MPM,
+  Error parseFunctionPassPipeline(FunctionPassManager &FPM,
+                                  ArrayRef<PipelineElement> Pipeline,
+                                  bool VerifyEachPass, bool DebugLogging);
+  Error parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
                                ArrayRef<PipelineElement> Pipeline,
                                bool VerifyEachPass, bool DebugLogging);
+  Error parseModulePassPipeline(ModulePassManager &MPM,
+                                ArrayRef<PipelineElement> Pipeline,
+                                bool VerifyEachPass, bool DebugLogging);
 
   void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
                          OptimizationLevel Level, bool RunProfileGen,
                          std::string ProfileGenFile,
-                         std::string ProfileUseFile);
+                         std::string ProfileUseFile,
+                         std::string ProfileRemappingFile);
 
   void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
 
@@ -602,6 +626,8 @@ private:
       CGSCCOptimizerLateEPCallbacks;
   SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
       VectorizerStartEPCallbacks;
+  SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
+      OptimizerLastEPCallbacks;
   // Module callbacks
   SmallVector<std::function<void(ModulePassManager &)>, 2>
       PipelineStartEPCallbacks;
diff --git a/contrib/llvm/include/llvm/Passes/StandardInstrumentations.h b/contrib/llvm/include/llvm/Passes/StandardInstrumentations.h
new file mode 100644
index 000000000000..8c6f5e1e22f7
--- /dev/null
+++ b/contrib/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -0,0 +1,70 @@
+//===- StandardInstrumentations.h ------------------------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header defines a class that provides bookkeeping for all standard
+/// (i.e in-tree) pass instrumentations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSES_STANDARDINSTRUMENTATIONS_H
+#define LLVM_PASSES_STANDARDINSTRUMENTATIONS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassTimingInfo.h"
+
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class Module;
+
+/// Instrumentation to print IR before/after passes.
+///
+/// Needs state to be able to print module after pass that invalidates IR unit
+/// (typically Loop or SCC).
+class PrintIRInstrumentation {
+public:
+  PrintIRInstrumentation() = default;
+  ~PrintIRInstrumentation();
+
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+private:
+  bool printBeforePass(StringRef PassID, Any IR);
+  void printAfterPass(StringRef PassID, Any IR);
+  void printAfterPassInvalidated(StringRef PassID);
+
+  using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>;
+
+  void pushModuleDesc(StringRef PassID, Any IR);
+  PrintModuleDesc popModuleDesc(StringRef PassID);
+
+  /// Stack of Module description, enough to print the module after a given
+  /// pass.
+  SmallVector<PrintModuleDesc, 2> ModuleDescStack;
+  bool StoreModuleDesc = false;
+};
+
+/// This class provides an interface to register all the standard pass
+/// instrumentations and manages their state (if any).
+class StandardInstrumentations {
+  PrintIRInstrumentation PrintIR;
+  TimePassesHandler TimePasses;
+
+public:
+  StandardInstrumentations() = default;
+
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index e820f71cb6d5..beaa36553287 100644
--- a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -510,7 +510,6 @@ class CoverageMapping {
   DenseMap<size_t, DenseSet<size_t>> RecordProvenance;
   std::vector<FunctionRecord> Functions;
   std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches;
-  std::vector<std::pair<std::string, uint64_t>> FuncCounterMismatches;
 
   CoverageMapping() = default;
 
@@ -537,9 +536,7 @@ public:
   ///
   /// This is a count of functions whose profile is out of date or otherwise
   /// can't be associated with any coverage information.
-  unsigned getMismatchedCount() const {
-    return FuncHashMismatches.size() + FuncCounterMismatches.size();
-  }
+  unsigned getMismatchedCount() const { return FuncHashMismatches.size(); }
 
   /// A hash mismatch occurs when a profile record for a symbol does not have
   /// the same hash as a coverage mapping record for the same symbol. This
@@ -549,14 +546,6 @@ public:
     return FuncHashMismatches;
   }
 
-  /// A counter mismatch occurs when there is an error when evaluating the
-  /// counter expressions in a coverage mapping record. This returns a list of
-  /// counter mismatches, where each mismatch is a pair of the symbol name and
-  /// the number of valid evaluated counter expressions.
-  ArrayRef<std::pair<std::string, uint64_t>> getCounterMismatches() const {
-    return FuncCounterMismatches;
-  }
-
   /// Returns a lexicographically sorted, unique list of files that are
   /// covered.
   std::vector<StringRef> getUniqueSourceFiles() const;
diff --git a/contrib/llvm/include/llvm/ProfileData/GCOV.h b/contrib/llvm/include/llvm/ProfileData/GCOV.h
index 8500401e44ad..a088f63a6915 100644
--- a/contrib/llvm/include/llvm/ProfileData/GCOV.h
+++ b/contrib/llvm/include/llvm/ProfileData/GCOV.h
@@ -24,9 +24,11 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
@@ -266,13 +268,14 @@ struct GCOVEdge {
   GCOVBlock &Src;
   GCOVBlock &Dst;
   uint64_t Count = 0;
+  uint64_t CyclesCount = 0;
 };
 
 /// GCOVFunction - Collects function information.
 class GCOVFunction {
 public:
-  using BlockIterator = pointee_iterator<SmallVectorImpl<
-      std::unique_ptr<GCOVBlock>>::const_iterator>;
+  using BlockIterator = pointee_iterator<
+      SmallVectorImpl<std::unique_ptr<GCOVBlock>>::const_iterator>;
 
   GCOVFunction(GCOVFile &P) : Parent(P) {}
 
@@ -322,6 +325,9 @@ class GCOVBlock {
 
 public:
   using EdgeIterator = SmallVectorImpl<GCOVEdge *>::const_iterator;
+  using BlockVector = SmallVector<const GCOVBlock *, 4>;
+  using BlockVectorLists = SmallVector<BlockVector, 4>;
+  using Edges = SmallVector<GCOVEdge *, 4>;
 
   GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {}
   ~GCOVBlock();
@@ -365,6 +371,16 @@ public:
   void dump() const;
   void collectLineCounts(FileInfo &FI);
 
+  static uint64_t getCycleCount(const Edges &Path);
+  static void unblock(const GCOVBlock *U, BlockVector &Blocked,
+                      BlockVectorLists &BlockLists);
+  static bool lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start,
+                             Edges &Path, BlockVector &Blocked,
+                             BlockVectorLists &BlockLists,
+                             const BlockVector &Blocks, uint64_t &Count);
+  static void getCyclesCount(const BlockVector &Blocks, uint64_t &Count);
+  static uint64_t getLineCount(const BlockVector &Blocks);
+
 private:
   GCOVFunction &Parent;
   uint32_t Number;
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
index 206142b3565a..dc45021fc47d 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@@ -544,9 +544,9 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) {
 void InstrProfSymtab::finalizeSymtab() {
   if (Sorted)
     return;
-  llvm::sort(MD5NameMap.begin(), MD5NameMap.end(), less_first());
-  llvm::sort(MD5FuncMap.begin(), MD5FuncMap.end(), less_first());
-  llvm::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first());
+  llvm::sort(MD5NameMap, less_first());
+  llvm::sort(MD5FuncMap, less_first());
+  llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
                      AddrToMD5Map.end());
   Sorted = true;
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h b/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
index efc22dcd0d9a..08d782276117 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -349,12 +349,17 @@ using OnDiskHashTableImplV3 =
     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
 
 template <typename HashTableImpl>
+class InstrProfReaderItaniumRemapper;
+
+template <typename HashTableImpl>
 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
 private:
   std::unique_ptr<HashTableImpl> HashTable;
   typename HashTableImpl::data_iterator RecordIterator;
   uint64_t FormatVersion;
 
+  friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
+
 public:
   InstrProfReaderIndex(const unsigned char *Buckets,
                        const unsigned char *const Payload,
@@ -386,13 +391,26 @@ public:
   }
 };
 
+/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
+class InstrProfReaderRemapper {
+public:
+  virtual ~InstrProfReaderRemapper() {}
+  virtual Error populateRemappings() { return Error::success(); }
+  virtual Error getRecords(StringRef FuncName,
+                           ArrayRef<NamedInstrProfRecord> &Data) = 0;
+};
+
 /// Reader for the indexed binary instrprof format.
 class IndexedInstrProfReader : public InstrProfReader {
 private:
   /// The profile data file contents.
   std::unique_ptr<MemoryBuffer> DataBuffer;
+  /// The profile remapping file contents.
+  std::unique_ptr<MemoryBuffer> RemappingBuffer;
   /// The index into the profile data.
   std::unique_ptr<InstrProfReaderIndexBase> Index;
+  /// The profile remapping file contents.
+  std::unique_ptr<InstrProfReaderRemapper> Remapper;
   /// Profile summary data.
   std::unique_ptr<ProfileSummary> Summary;
   // Index to the current record in the record array.
@@ -404,8 +422,11 @@ private:
                                    const unsigned char *Cur);
 
 public:
-  IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
-      : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {}
+  IndexedInstrProfReader(
+      std::unique_ptr<MemoryBuffer> DataBuffer,
+      std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
+      : DataBuffer(std::move(DataBuffer)),
+        RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
 
@@ -434,10 +455,11 @@ public:
 
   /// Factory method to create an indexed reader.
   static Expected<std::unique_ptr<IndexedInstrProfReader>>
-  create(const Twine &Path);
+  create(const Twine &Path, const Twine &RemappingPath = "");
 
   static Expected<std::unique_ptr<IndexedInstrProfReader>>
-  create(std::unique_ptr<MemoryBuffer> Buffer);
+  create(std::unique_ptr<MemoryBuffer> Buffer,
+         std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
 
   // Used for testing purpose only.
   void setValueProfDataEndianness(support::endianness Endianness) {
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
index 0cd6dd2c2c0e..927dfd246878 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
@@ -49,7 +49,8 @@ enum class sampleprof_error {
   unsupported_writing_format,
   truncated_name_table,
   not_implemented,
-  counter_overflow
+  counter_overflow,
+  ostream_seek_unsupported
 };
 
 inline std::error_code make_error_code(sampleprof_error E) {
@@ -293,6 +294,9 @@ public:
   /// with the maximum total sample count.
   const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
                                                StringRef CalleeName) const {
+    std::string CalleeGUID;
+    CalleeName = getRepInFormat(CalleeName, Format, CalleeGUID);
+
     auto iter = CallsiteSamples.find(Loc);
     if (iter == CallsiteSamples.end())
       return nullptr;
@@ -377,30 +381,53 @@ public:
   /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
   /// to \p S.
   void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
-                            uint64_t Threshold, bool isCompact) const {
+                            uint64_t Threshold) const {
     if (TotalSamples <= Threshold)
       return;
-    S.insert(Function::getGUID(Name));
+    S.insert(getGUID(Name));
     // Import hot CallTargets, which may not be available in IR because full
     // profile annotation cannot be done until backend compilation in ThinLTO.
     for (const auto &BS : BodySamples)
       for (const auto &TS : BS.second.getCallTargets())
         if (TS.getValue() > Threshold) {
-          Function *Callee = M->getFunction(TS.getKey());
+          const Function *Callee =
+              M->getFunction(getNameInModule(TS.getKey(), M));
           if (!Callee || !Callee->getSubprogram())
-            S.insert(isCompact ? std::stol(TS.getKey().data())
-                               : Function::getGUID(TS.getKey()));
+            S.insert(getGUID(TS.getKey()));
         }
     for (const auto &CS : CallsiteSamples)
       for (const auto &NameFS : CS.second)
-        NameFS.second.findInlinedFunctions(S, M, Threshold, isCompact);
+        NameFS.second.findInlinedFunctions(S, M, Threshold);
   }
 
   /// Set the name of the function.
   void setName(StringRef FunctionName) { Name = FunctionName; }
 
   /// Return the function name.
-  const StringRef &getName() const { return Name; }
+  StringRef getName() const { return Name; }
+
+  /// Return the original function name if it exists in Module \p M.
+  StringRef getFuncNameInModule(const Module *M) const {
+    return getNameInModule(Name, M);
+  }
+
+  /// Translate \p Name into its original name in Module.
+  /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
+  /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
+  /// is actually GUID of the original function name. getNameInModule will
+  /// translate \p Name in current FunctionSamples into its original name.
+  /// If the original name doesn't exist in \p M, return empty StringRef.
+  StringRef getNameInModule(StringRef Name, const Module *M) const {
+    if (Format != SPF_Compact_Binary)
+      return Name;
+    // Expect CurrentModule to be initialized by GUIDToFuncNameMapper.
+    if (M != CurrentModule)
+      llvm_unreachable("Input Module should be the same as CurrentModule");
+    auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data()));
+    if (iter == GUIDToFuncNameMap.end())
+      return StringRef();
+    return iter->second;
+  }
 
   /// Returns the line offset to the start line of the subprogram.
   /// We assume that a single function will not exceed 65535 LOC.
@@ -417,6 +444,54 @@ public:
   /// \returns the FunctionSamples pointer to the inlined instance.
   const FunctionSamples *findFunctionSamples(const DILocation *DIL) const;
 
+  static SampleProfileFormat Format;
+  /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
+  /// all the function symbols defined or declared in CurrentModule.
+  static DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
+  static Module *CurrentModule;
+
+  class GUIDToFuncNameMapper {
+  public:
+    GUIDToFuncNameMapper(Module &M) {
+      if (Format != SPF_Compact_Binary)
+        return;
+
+      for (const auto &F : M) {
+        StringRef OrigName = F.getName();
+        GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName});
+        /// Local to global var promotion used by optimization like thinlto
+        /// will rename the var and add suffix like ".llvm.xxx" to the
+        /// original local name. In sample profile, the suffixes of function
+        /// names are all stripped. Since it is possible that the mapper is
+        /// built in post-thin-link phase and var promotion has been done,
+        /// we need to add the substring of function name without the suffix
+        /// into the GUIDToFuncNameMap.
+        auto pos = OrigName.find('.');
+        if (pos != StringRef::npos) {
+          StringRef NewName = OrigName.substr(0, pos);
+          GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName});
+        }
+      }
+      CurrentModule = &M;
+    }
+
+    ~GUIDToFuncNameMapper() {
+      if (Format != SPF_Compact_Binary)
+        return;
+
+      GUIDToFuncNameMap.clear();
+      CurrentModule = nullptr;
+    }
+  };
+
+  // Assume the input \p Name is a name coming from FunctionSamples itself.
+  // If the format is SPF_Compact_Binary, the name is already a GUID and we
+  // don't want to return the GUID of GUID.
+  static uint64_t getGUID(StringRef Name) {
+    return (Format == SPF_Compact_Binary) ? std::stoull(Name.data())
+                                          : Function::getGUID(Name);
+  }
+
 private:
   /// Mangled name of the function.
   StringRef Name;
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h b/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
index 0617b05e8d4f..5cc729e42cc8 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -222,6 +222,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SymbolRemappingReader.h"
 #include <algorithm>
 #include <cstdint>
 #include <memory>
@@ -279,6 +280,8 @@ public:
   /// Print the profile for \p FName on stream \p OS.
   void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
 
+  virtual void collectFuncsToUse(const Module &M) {}
+
   /// Print all the profiles on stream \p OS.
   void dump(raw_ostream &OS = dbgs());
 
@@ -287,11 +290,16 @@ public:
     // The function name may have been updated by adding suffix. In sample
     // profile, the function names are all stripped, so we need to strip
     // the function name suffix before matching with profile.
-    StringRef Fname = F.getName().split('.').first;
+    return getSamplesFor(F.getName().split('.').first);
+  }
+
+  /// Return the samples collected for function \p F.
+  virtual FunctionSamples *getSamplesFor(StringRef Fname) {
     std::string FGUID;
     Fname = getRepInFormat(Fname, getFormat(), FGUID);
-    if (Profiles.count(Fname))
-      return &Profiles[Fname];
+    auto It = Profiles.find(Fname);
+    if (It != Profiles.end())
+      return &It->second;
     return nullptr;
   }
 
@@ -335,6 +343,12 @@ protected:
   /// Profile summary information.
   std::unique_ptr<ProfileSummary> Summary;
 
+  /// Take ownership of the summary of this reader.
+  static std::unique_ptr<ProfileSummary>
+  takeSummary(SampleProfileReader &Reader) {
+    return std::move(Reader.Summary);
+  }
+
   /// Compute summary for this profile.
   void computeSummary();
 
@@ -364,7 +378,7 @@ public:
       : SampleProfileReader(std::move(B), C, Format) {}
 
   /// Read and validate the file header.
-  std::error_code readHeader() override;
+  virtual std::error_code readHeader() override;
 
   /// Read sample profiles from the associated file.
   std::error_code read() override;
@@ -378,6 +392,10 @@ protected:
   /// \returns the read value.
   template <typename T> ErrorOr<T> readNumber();
 
+  /// Read a numeric value of type T from the profile. The value is saved
+  /// without encoded.
+  template <typename T> ErrorOr<T> readUnencodedNumber();
+
   /// Read a string from the profile.
   ///
   /// If an error occurs during decoding, a diagnostic message is emitted and
@@ -392,6 +410,9 @@ protected:
   /// Return true if we've reached the end of file.
   bool at_eof() const { return Data >= End; }
 
+  /// Read the next function profile instance.
+  std::error_code readFuncProfile();
+
   /// Read the contents of the given profile instance.
   std::error_code readProfile(FunctionSamples &FProfile);
 
@@ -436,10 +457,17 @@ class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
 private:
   /// Function name table.
   std::vector<std::string> NameTable;
+  /// The table mapping from function name to the offset of its FunctionSample
+  /// towards file start.
+  DenseMap<StringRef, uint64_t> FuncOffsetTable;
+  /// The set containing the functions to use when compiling a module.
+  DenseSet<StringRef> FuncsToUse;
   virtual std::error_code verifySPMagic(uint64_t Magic) override;
   virtual std::error_code readNameTable() override;
   /// Read a string indirectly via the name table.
   virtual ErrorOr<StringRef> readStringFromTable() override;
+  virtual std::error_code readHeader() override;
+  std::error_code readFuncOffsetTable();
 
 public:
   SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B,
@@ -448,6 +476,12 @@ public:
 
   /// \brief Return true if \p Buffer is in the format supported by this class.
   static bool hasFormat(const MemoryBuffer &Buffer);
+
+  /// Read samples only for functions to use.
+  std::error_code read() override;
+
+  /// Collect functions to be used when compiling Module \p M.
+  void collectFuncsToUse(const Module &M) override;
 };
 
 using InlineCallStack = SmallVector<FunctionSamples *, 10>;
@@ -503,6 +537,44 @@ protected:
   static const uint32_t GCOVTagAFDOFunction = 0xac000000;
 };
 
+/// A profile data reader proxy that remaps the profile data from another
+/// sample profile data reader, by applying a provided set of equivalences
+/// between components of the symbol names in the profile.
+class SampleProfileReaderItaniumRemapper : public SampleProfileReader {
+public:
+  SampleProfileReaderItaniumRemapper(
+      std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
+      std::unique_ptr<SampleProfileReader> Underlying)
+      : SampleProfileReader(std::move(B), C, Underlying->getFormat()) {
+    Profiles = std::move(Underlying->getProfiles());
+    Summary = takeSummary(*Underlying);
+    // Keep the underlying reader alive; the profile data may contain
+    // StringRefs referencing names in its name table.
+    UnderlyingReader = std::move(Underlying);
+  }
+
+  /// Create a remapped sample profile from the given remapping file and
+  /// underlying samples.
+  static ErrorOr<std::unique_ptr<SampleProfileReader>>
+  create(const Twine &Filename, LLVMContext &C,
+         std::unique_ptr<SampleProfileReader> Underlying);
+
+  /// Read and validate the file header.
+  std::error_code readHeader() override { return sampleprof_error::success; }
+
+  /// Read remapping file and apply it to the sample profile.
+  std::error_code read() override;
+
+  /// Return the samples collected for function \p F.
+  FunctionSamples *getSamplesFor(StringRef FunctionName) override;
+  using SampleProfileReader::getSamplesFor;
+
+private:
+  SymbolRemappingReader Remappings;
+  DenseMap<SymbolRemappingReader::Key, FunctionSamples*> SampleMap;
+  std::unique_ptr<SampleProfileReader> UnderlyingReader;
+};
+
 } // end namespace sampleprof
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h b/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 74dc839ff049..d5ac6e53e4f7 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -42,7 +42,7 @@ public:
   /// Write all the sample profiles in the given map of samples.
   ///
   /// \returns status code of the file update operation.
-  std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
+  virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
 
   raw_ostream &getOutputStream() { return *OutputStream; }
 
@@ -103,14 +103,15 @@ private:
 /// Sample-based profile writer (binary format).
 class SampleProfileWriterBinary : public SampleProfileWriter {
 public:
-  std::error_code write(const FunctionSamples &S) override;
+  virtual std::error_code write(const FunctionSamples &S) override;
   SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS)
       : SampleProfileWriter(OS) {}
 
 protected:
   virtual std::error_code writeNameTable() = 0;
   virtual std::error_code writeMagicIdent() = 0;
-  std::error_code writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+  virtual std::error_code
+  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
   std::error_code writeSummary();
   std::error_code writeNameIdx(StringRef FName);
   std::error_code writeBody(const FunctionSamples &S);
@@ -135,12 +136,56 @@ protected:
   virtual std::error_code writeMagicIdent() override;
 };
 
+// CompactBinary is a compact format of binary profile which both reduces
+// the profile size and the load time needed when compiling. It has two
+// major difference with Binary format.
+// 1. It represents all the strings in name table using md5 hash.
+// 2. It saves a function offset table which maps function name index to
+// the offset of its function profile to the start of the binary profile,
+// so by using the function offset table, for those function profiles which
+// will not be needed when compiling a module, the profile reader does't
+// have to read them and it saves compile time if the profile size is huge.
+// The layout of the compact format is shown as follows:
+//
+//    Part1: Profile header, the same as binary format, containing magic
+//           number, version, summary, name table...
+//    Part2: Function Offset Table Offset, which saves the position of
+//           Part4.
+//    Part3: Function profile collection
+//             function1 profile start
+//                 ....
+//             function2 profile start
+//                 ....
+//             function3 profile start
+//                 ....
+//                ......
+//    Part4: Function Offset Table
+//             function1 name index --> function1 profile start
+//             function2 name index --> function2 profile start
+//             function3 name index --> function3 profile start
+//
+// We need Part2 because profile reader can use it to find out and read
+// function offset table without reading Part3 first.
 class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
   using SampleProfileWriterBinary::SampleProfileWriterBinary;
 
+public:
+  virtual std::error_code write(const FunctionSamples &S) override;
+  virtual std::error_code
+  write(const StringMap<FunctionSamples> &ProfileMap) override;
+
 protected:
+  /// The table mapping from function name to the offset of its FunctionSample
+  /// towards profile start.
+  MapVector<StringRef, uint64_t> FuncOffsetTable;
+  /// The offset of the slot to be filled with the offset of FuncOffsetTable
+  /// towards profile start.
+  uint64_t TableOffset;
   virtual std::error_code writeNameTable() override;
   virtual std::error_code writeMagicIdent() override;
+  virtual std::error_code
+  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+  std::error_code writeFuncOffsetTable();
 };
 
 } // end namespace sampleprof
diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
index 6772e5f9b734..e03297b7c3c3 100644
--- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -40,6 +40,11 @@ AARCH64_ARCH("armv8.4-a", ARMV8_4A, "8.4-A", "v8.4a",
              (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
               AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
               AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
+AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
+             ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+             (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+              AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+              AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
 #undef AARCH64_ARCH
 
 #ifndef AARCH64_ARCH_EXT_NAME
@@ -60,10 +65,16 @@ AARCH64_ARCH_EXT_NAME("dotprod",  AArch64::AEK_DOTPROD,  "+dotprod","-dotprod")
 AARCH64_ARCH_EXT_NAME("fp",       AArch64::AEK_FP,       "+fp-armv8",  "-fp-armv8")
 AARCH64_ARCH_EXT_NAME("simd",     AArch64::AEK_SIMD,     "+neon",  "-neon")
 AARCH64_ARCH_EXT_NAME("fp16",     AArch64::AEK_FP16,     "+fullfp16",  "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml",  AArch64::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 AARCH64_ARCH_EXT_NAME("profile",  AArch64::AEK_PROFILE,  "+spe",  "-spe")
 AARCH64_ARCH_EXT_NAME("ras",      AArch64::AEK_RAS,      "+ras",  "-ras")
 AARCH64_ARCH_EXT_NAME("sve",      AArch64::AEK_SVE,      "+sve",  "-sve")
 AARCH64_ARCH_EXT_NAME("rcpc",     AArch64::AEK_RCPC,     "+rcpc", "-rcpc")
+AARCH64_ARCH_EXT_NAME("rng",      AArch64::AEK_RAND,     "+rand",  "-rand")
+AARCH64_ARCH_EXT_NAME("memtag",   AArch64::AEK_MTE,      "+mte",   "-mte")
+AARCH64_ARCH_EXT_NAME("ssbs",     AArch64::AEK_SSBS,     "+ssbs",  "-ssbs")
+AARCH64_ARCH_EXT_NAME("sb",       AArch64::AEK_SB,       "+sb",    "-sb")
+AARCH64_ARCH_EXT_NAME("predres",  AArch64::AEK_PREDRES,  "+predres", "-predres")
 #undef AARCH64_ARCH_EXT_NAME
 
 #ifndef AARCH64_CPU_NAME
@@ -91,8 +102,8 @@ AARCH64_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_CRC))
-AARCH64_CPU_NAME("exynos-m4", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+AARCH64_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD))
 AARCH64_CPU_NAME("falkor", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_CRC | AArch64::AEK_RDM))
 AARCH64_CPU_NAME("saphira", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -109,6 +120,9 @@ AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
 AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_PROFILE | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
+                  AArch64::AEK_DOTPROD))
 // Invalid CPU
 AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
 #undef AARCH64_CPU_NAME
diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.h b/contrib/llvm/include/llvm/Support/AArch64TargetParser.h
new file mode 100644
index 000000000000..76b77d474428
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -0,0 +1,124 @@
+//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise AArch64 hardware features
+// such as FPU/CPU/ARCH and extension names.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AARCH64TARGETPARSERCOMMON_H
+#define LLVM_SUPPORT_AARCH64TARGETPARSERCOMMON_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ARMTargetParser.h"
+#include <vector>
+
+// FIXME:This should be made into class design,to avoid dupplication.
+namespace llvm {
+namespace AArch64 {
+
+// Arch extension modifiers for CPUs.
+enum ArchExtKind : unsigned {
+  AEK_INVALID =     0,
+  AEK_NONE =        1,
+  AEK_CRC =         1 << 1,
+  AEK_CRYPTO =      1 << 2,
+  AEK_FP =          1 << 3,
+  AEK_SIMD =        1 << 4,
+  AEK_FP16 =        1 << 5,
+  AEK_PROFILE =     1 << 6,
+  AEK_RAS =         1 << 7,
+  AEK_LSE =         1 << 8,
+  AEK_SVE =         1 << 9,
+  AEK_DOTPROD =     1 << 10,
+  AEK_RCPC =        1 << 11,
+  AEK_RDM =         1 << 12,
+  AEK_SM4 =         1 << 13,
+  AEK_SHA3 =        1 << 14,
+  AEK_SHA2 =        1 << 15,
+  AEK_AES =         1 << 16,
+  AEK_FP16FML =     1 << 17,
+  AEK_RAND =        1 << 18,
+  AEK_MTE =         1 << 19,
+  AEK_SSBS =        1 << 20,
+  AEK_SB =          1 << 21,
+  AEK_PREDRES =     1 << 22,
+};
+
+enum class ArchKind {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "AArch64TargetParser.def"
+};
+
+const ARM::ArchNames<ArchKind> AArch64ARCHNames[] = {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU,        \
+                     ARCH_BASE_EXT)                                            \
+  {NAME,                                                                       \
+   sizeof(NAME) - 1,                                                           \
+   CPU_ATTR,                                                                   \
+   sizeof(CPU_ATTR) - 1,                                                       \
+   SUB_ARCH,                                                                   \
+   sizeof(SUB_ARCH) - 1,                                                       \
+   ARM::FPUKind::ARCH_FPU,                                                     \
+   ARCH_BASE_EXT,                                                              \
+   AArch64::ArchKind::ID,                                                      \
+   ARCH_ATTR},
+#include "AArch64TargetParser.def"
+};
+
+const ARM::ExtName AArch64ARCHExtNames[] = {
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)                   \
+  {NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE},
+#include "AArch64TargetParser.def"
+};
+
+const ARM::CpuNames<ArchKind> AArch64CPUNames[] = {
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)       \
+  {NAME, sizeof(NAME) - 1, AArch64::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT},
+#include "AArch64TargetParser.def"
+};
+
+const ArchKind ArchKinds[] = {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) \
+    ArchKind::ID,
+#include "AArch64TargetParser.def"
+};
+
+// FIXME: These should be moved to TargetTuple once it exists
+bool getExtensionFeatures(unsigned Extensions,
+                          std::vector<StringRef> &Features);
+bool getArchFeatures(ArchKind AK, std::vector<StringRef> &Features);
+
+StringRef getArchName(ArchKind AK);
+unsigned getArchAttr(ArchKind AK);
+StringRef getCPUAttr(ArchKind AK);
+StringRef getSubArch(ArchKind AK);
+StringRef getArchExtName(unsigned ArchExtKind);
+StringRef getArchExtFeature(StringRef ArchExt);
+
+// Information by Name
+unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
+unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
+StringRef getDefaultCPU(StringRef Arch);
+ArchKind getCPUArchKind(StringRef CPU);
+
+// Parser
+ArchKind parseArch(StringRef Arch);
+ArchExtKind parseArchExt(StringRef ArchExt);
+ArchKind parseCPUArch(StringRef CPU);
+// Used by target parser tests
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
+
+bool isX18ReservedByDefault(const Triple &TT);
+
+} // namespace AArch64
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h b/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h
index 667fb3f3da43..84851c07499d 100644
--- a/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/contrib/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -431,6 +431,21 @@ std::error_code fromString(std::string String, Metadata &HSAMetadata);
 /// Converts \p HSAMetadata to \p String.
 std::error_code toString(Metadata HSAMetadata, std::string &String);
 
+//===----------------------------------------------------------------------===//
+// HSA metadata for v3 code object.
+//===----------------------------------------------------------------------===//
+namespace V3 {
+/// HSA metadata major version.
+constexpr uint32_t VersionMajor = 1;
+/// HSA metadata minor version.
+constexpr uint32_t VersionMinor = 0;
+
+/// HSA metadata beginning assembler directive.
+constexpr char AssemblerDirectiveBegin[] = ".amdgpu_metadata";
+/// HSA metadata ending assembler directive.
+constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
+} // end namespace V3
+
 } // end namespace HSAMD
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
index 78f5410fb733..9e844e2b464d 100644
--- a/contrib/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
@@ -106,6 +106,11 @@ ARM_ARCH("armv8.4-a", ARMV8_4A, "8.4-A", "v8.4a",
          (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
           ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
           ARM::AEK_DOTPROD))
+ARM_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
+         ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+         (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+          ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+          ARM::AEK_DOTPROD))
 ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
           FK_NEON_FP_ARMV8,
           (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -152,6 +157,8 @@ ARM_ARCH_EXT_NAME("iwmmxt",   ARM::AEK_IWMMXT,   nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("iwmmxt2",  ARM::AEK_IWMMXT2,  nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("xscale",   ARM::AEK_XSCALE,   nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("fp16fml",  ARM::AEK_FP16FML,  "+fp16fml", "-fp16fml")
+ARM_ARCH_EXT_NAME("sb",       ARM::AEK_SB,       "+sb",      "-sb")
 #undef ARM_ARCH_EXT_NAME
 
 #ifndef ARM_HW_DIV_NAME
@@ -202,10 +209,9 @@ ARM_CPU_NAME("arm926ej-s", ARMV5TEJ, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("arm1136j-s", ARMV6, FK_NONE, false, ARM::AEK_NONE)
 ARM_CPU_NAME("arm1136jf-s", ARMV6, FK_VFPV2, true, ARM::AEK_NONE)
 ARM_CPU_NAME("arm1136jz-s", ARMV6, FK_NONE, false, ARM::AEK_NONE)
-ARM_CPU_NAME("arm1176j-s", ARMV6K, FK_NONE, true, ARM::AEK_NONE)
-ARM_CPU_NAME("arm1176jz-s", ARMV6KZ, FK_NONE, false, ARM::AEK_NONE)
-ARM_CPU_NAME("mpcore", ARMV6K, FK_VFPV2, false, ARM::AEK_NONE)
+ARM_CPU_NAME("mpcore", ARMV6K, FK_VFPV2, true, ARM::AEK_NONE)
 ARM_CPU_NAME("mpcorenovfp", ARMV6K, FK_NONE, false, ARM::AEK_NONE)
+ARM_CPU_NAME("arm1176jz-s", ARMV6KZ, FK_NONE, false, ARM::AEK_NONE)
 ARM_CPU_NAME("arm1176jzf-s", ARMV6KZ, FK_VFPV2, true, ARM::AEK_NONE)
 ARM_CPU_NAME("arm1156t2-s", ARMV6T2, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("arm1156t2f-s", ARMV6T2, FK_VFPV2, false, ARM::AEK_NONE)
@@ -260,7 +266,8 @@ ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
-ARM_CPU_NAME("exynos-m4", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
+ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+             (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 // Non-standard Arch names.
 ARM_CPU_NAME("iwmmxt", IWMMXT, FK_NONE, true, ARM::AEK_NONE)
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.h b/contrib/llvm/include/llvm/Support/ARMTargetParser.h
new file mode 100644
index 000000000000..71acc0dc72d0
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.h
@@ -0,0 +1,264 @@
+//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise ARM hardware features
+// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ARMTARGETPARSER_H
+#define LLVM_SUPPORT_ARMTARGETPARSER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include <vector>
+
+namespace llvm {
+namespace ARM {
+
+// Arch extension modifiers for CPUs.
+// Note that this is not the same as the AArch64 list
+enum ArchExtKind : unsigned {
+  AEK_INVALID =     0,
+  AEK_NONE =        1,
+  AEK_CRC =         1 << 1,
+  AEK_CRYPTO =      1 << 2,
+  AEK_FP =          1 << 3,
+  AEK_HWDIVTHUMB =  1 << 4,
+  AEK_HWDIVARM =    1 << 5,
+  AEK_MP =          1 << 6,
+  AEK_SIMD =        1 << 7,
+  AEK_SEC =         1 << 8,
+  AEK_VIRT =        1 << 9,
+  AEK_DSP =         1 << 10,
+  AEK_FP16 =        1 << 11,
+  AEK_RAS =         1 << 12,
+  AEK_SVE =         1 << 13,
+  AEK_DOTPROD =     1 << 14,
+  AEK_SHA2    =     1 << 15,
+  AEK_AES     =     1 << 16,
+  AEK_FP16FML =     1 << 17,
+  AEK_SB      =     1 << 18,
+  // Unsupported extensions.
+  AEK_OS = 0x8000000,
+  AEK_IWMMXT = 0x10000000,
+  AEK_IWMMXT2 = 0x20000000,
+  AEK_MAVERICK = 0x40000000,
+  AEK_XSCALE = 0x80000000,
+};
+
+// List of Arch Extension names.
+// FIXME: TableGen this.
+struct ExtName {
+  const char *NameCStr;
+  size_t NameLength;
+  unsigned ID;
+  const char *Feature;
+  const char *NegFeature;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+const ExtName ARCHExtNames[] = {
+#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)                       \
+  {NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE},
+#include "ARMTargetParser.def"
+};
+
+// List of HWDiv names (use getHWDivSynonym) and which architectural
+// features they correspond to (use getHWDivFeatures).
+// FIXME: TableGen this.
+const struct {
+  const char *NameCStr;
+  size_t NameLength;
+  unsigned ID;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+} HWDivNames[] = {
+#define ARM_HW_DIV_NAME(NAME, ID) {NAME, sizeof(NAME) - 1, ID},
+#include "ARMTargetParser.def"
+};
+
+// Arch names.
+enum class ArchKind {
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "ARMTargetParser.def"
+};
+
+// List of CPU names and their arches.
+// The same CPU can have multiple arches and can be default on multiple arches.
+// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
+// When this becomes table-generated, we'd probably need two tables.
+// FIXME: TableGen this.
+template <typename T> struct CpuNames {
+  const char *NameCStr;
+  size_t NameLength;
+  T ArchID;
+  bool Default; // is $Name the default CPU for $ArchID ?
+  unsigned DefaultExtensions;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+const CpuNames<ArchKind> CPUNames[] = {
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)           \
+  {NAME, sizeof(NAME) - 1, ARM::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT},
+#include "ARMTargetParser.def"
+};
+
+// FPU names.
+enum FPUKind {
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND,
+#include "ARMTargetParser.def"
+  FK_LAST
+};
+
+// FPU Version
+enum class FPUVersion {
+  NONE,
+  VFPV2,
+  VFPV3,
+  VFPV3_FP16,
+  VFPV4,
+  VFPV5
+};
+
+// An FPU name restricts the FPU in one of three ways:
+enum class FPURestriction {
+  None = 0, ///< No restriction
+  D16,      ///< Only 16 D registers
+  SP_D16    ///< Only single-precision instructions, with 16 D registers
+};
+
+// An FPU name implies one of three levels of Neon support:
+enum class NeonSupportLevel {
+  None = 0, ///< No Neon
+  Neon,     ///< Neon
+  Crypto    ///< Neon with Crypto
+};
+
+// ISA kinds.
+enum class ISAKind { INVALID = 0, ARM, THUMB, AARCH64 };
+
+// Endianness
+// FIXME: BE8 vs. BE32?
+enum class EndianKind { INVALID = 0, LITTLE, BIG };
+
+// v6/v7/v8 Profile
+enum class ProfileKind { INVALID = 0, A, R, M };
+
+// List of canonical FPU names (use getFPUSynonym) and which architectural
+// features they correspond to (use getFPUFeatures).
+// FIXME: TableGen this.
+// The entries must appear in the order listed in ARM::FPUKind for correct
+// indexing
+struct FPUName {
+  const char *NameCStr;
+  size_t NameLength;
+  FPUKind ID;
+  FPUVersion FPUVer;
+  NeonSupportLevel NeonSupport;
+  FPURestriction Restriction;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+static const FPUName FPUNames[] = {
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION)                \
+  {NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION},
+#include "llvm/Support/ARMTargetParser.def"
+};
+
+// List of canonical arch names (use getArchSynonym).
+// This table also provides the build attribute fields for CPU arch
+// and Arch ID, according to the Addenda to the ARM ABI, chapters
+// 2.4 and 2.3.5.2 respectively.
+// FIXME: SubArch values were simplified to fit into the expectations
+// of the triples and are not conforming with their official names.
+// Check to see if the expectation should be changed.
+// FIXME: TableGen this.
+template <typename T> struct ArchNames {
+  const char *NameCStr;
+  size_t NameLength;
+  const char *CPUAttrCStr;
+  size_t CPUAttrLength;
+  const char *SubArchCStr;
+  size_t SubArchLength;
+  unsigned DefaultFPU;
+  unsigned ArchBaseExtensions;
+  T ID;
+  ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+
+  // CPU class in build attributes.
+  StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); }
+
+  // Sub-Arch name.
+  StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
+};
+
+static const ArchNames<ArchKind> ARCHNames[] = {
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU,            \
+                 ARCH_BASE_EXT)                                                \
+  {NAME,         sizeof(NAME) - 1,                                             \
+   CPU_ATTR,     sizeof(CPU_ATTR) - 1,                                         \
+   SUB_ARCH,     sizeof(SUB_ARCH) - 1,                                         \
+   ARCH_FPU,     ARCH_BASE_EXT,                                                \
+   ArchKind::ID, ARCH_ATTR},
+#include "llvm/Support/ARMTargetParser.def"
+};
+
+// Information by ID
+StringRef getFPUName(unsigned FPUKind);
+FPUVersion getFPUVersion(unsigned FPUKind);
+NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
+FPURestriction getFPURestriction(unsigned FPUKind);
+
+// FIXME: These should be moved to TargetTuple once it exists
+bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
+bool getHWDivFeatures(unsigned HWDivKind, std::vector<StringRef> &Features);
+bool getExtensionFeatures(unsigned Extensions,
+                          std::vector<StringRef> &Features);
+
+StringRef getArchName(ArchKind AK);
+unsigned getArchAttr(ArchKind AK);
+StringRef getCPUAttr(ArchKind AK);
+StringRef getSubArch(ArchKind AK);
+StringRef getArchExtName(unsigned ArchExtKind);
+StringRef getArchExtFeature(StringRef ArchExt);
+StringRef getHWDivName(unsigned HWDivKind);
+
+// Information by Name
+unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
+unsigned getDefaultExtensions(StringRef CPU, ArchKind AK);
+StringRef getDefaultCPU(StringRef Arch);
+StringRef getCanonicalArchName(StringRef Arch);
+StringRef getFPUSynonym(StringRef FPU);
+StringRef getArchSynonym(StringRef Arch);
+
+// Parser
+unsigned parseHWDiv(StringRef HWDiv);
+unsigned parseFPU(StringRef FPU);
+ArchKind parseArch(StringRef Arch);
+unsigned parseArchExt(StringRef ArchExt);
+ArchKind parseCPUArch(StringRef CPU);
+ISAKind parseArchISA(StringRef Arch);
+EndianKind parseArchEndian(StringRef Arch);
+ProfileKind parseArchProfile(StringRef Arch);
+unsigned parseArchVersion(StringRef Arch);
+
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
+StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU);
+
+} // namespace ARM
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/ARMWinEH.h b/contrib/llvm/include/llvm/Support/ARMWinEH.h
index 1463629f45dc..60174503ad49 100644
--- a/contrib/llvm/include/llvm/Support/ARMWinEH.h
+++ b/contrib/llvm/include/llvm/Support/ARMWinEH.h
@@ -207,6 +207,8 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
 
 /// ExceptionDataRecord - An entry in the table of exception data (.xdata)
 ///
+/// The format on ARM is:
+///
 ///  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
 ///  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
 /// +-------+---------+-+-+-+---+-----------------------------------+
@@ -215,6 +217,16 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
 /// |    Reserved    |Ex. Code Words|   (Extended Epilogue Count)   |
 /// +-------+--------+--------------+-------------------------------+
 ///
+/// The format on ARM64 is:
+///
+///  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+///  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+/// +---------+---------+-+-+---+-----------------------------------+
+/// |  C Wrd  | Epi Cnt |E|X|Ver|         Function Length           |
+/// +---------+------+--'-'-'---'---+-------------------------------+
+/// |    Reserved    |Ex. Code Words|   (Extended Epilogue Count)   |
+/// +-------+--------+--------------+-------------------------------+
+///
 /// Function Length : 18-bit field indicating the total length of the function
 ///                   in bytes divided by 2.  If a function is larger than
 ///                   512KB, then multiple pdata and xdata records must be used.
@@ -225,7 +237,7 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
 ///     header
 /// F : 1-bit field indicating that the record describes a function fragment
 ///     (implies that no prologue is present, and prologue processing should be
-///     skipped)
+///     skipped) (ARM only)
 /// Epilogue Count : 5-bit field that differs in meaning based on the E field.
 ///
 ///                  If E is set, then this field specifies the index of the
@@ -235,33 +247,43 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
 ///                  scopes.  If more than 31 scopes exist, then this field and
 ///                  the Code Words field must both be set to 0 to indicate that
 ///                  an extension word is required.
-/// Code Words : 4-bit field that species the number of 32-bit words needed to
-///              contain all the unwind codes.  If more than 15 words (63 code
-///              bytes) are required, then this field and the Epilogue Count
-///              field must both be set to 0 to indicate that an extension word
-///              is required.
+/// Code Words : 4-bit (5-bit on ARM64) field that specifies the number of
+///              32-bit words needed to contain all the unwind codes.  If more
+///              than 15 words (31 words on ARM64) are required, then this field
+///              and the Epilogue Count field must both be set to 0 to indicate
+///              that an extension word is required.
 /// Extended Epilogue Count, Extended Code Words :
 ///                          Valid only if Epilog Count and Code Words are both
 ///                          set to 0.  Provides an 8-bit extended code word
 ///                          count and 16-bits for epilogue count
 ///
+/// The epilogue scope format on ARM is:
+///
 ///  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
 ///  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
 /// +----------------+------+---+---+-------------------------------+
 /// |  Ep Start Idx  | Cond |Res|       Epilogue Start Offset       |
 /// +----------------+------+---+-----------------------------------+
 ///
+/// The epilogue scope format on ARM64 is:
+///
+///  3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+///  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+/// +-------------------+-------+---+-------------------------------+
+/// |  Ep Start Idx     |  Res  |   Epilogue Start Offset           |
+/// +-------------------+-------+-----------------------------------+
+///
 /// If the E bit is unset in the header, the header is followed by a series of
 /// epilogue scopes, which are sorted by their offset.
 ///
 /// Epilogue Start Offset: 18-bit field encoding the offset of epilogue relative
 ///                        to the start of the function in bytes divided by two
 /// Res : 2-bit field reserved for future expansion (must be set to 0)
-/// Condition : 4-bit field providing the condition under which the epilogue is
-///             executed.  Unconditional epilogues should set this field to 0xe.
-///             Epilogues must be entirely conditional or unconditional, and in
-///             Thumb-2 mode.  The epilogue beings with the first instruction
-///             after the IT opcode.
+/// Condition : (ARM only) 4-bit field providing the condition under which the
+///             epilogue is executed.  Unconditional epilogues should set this
+///             field to 0xe. Epilogues must be entirely conditional or
+///             unconditional, and in Thumb-2 mode.  The epilogue begins with
+///             the first instruction after the IT opcode.
 /// Epilogue Start Index : 8-bit field indicating the byte index of the first
 ///                        unwind code describing the epilogue
 ///
@@ -293,18 +315,33 @@ struct EpilogueScope {
   const support::ulittle32_t ES;
 
   EpilogueScope(const support::ulittle32_t Data) : ES(Data) {}
+  // Same for both ARM and AArch64.
   uint32_t EpilogueStartOffset() const {
     return (ES & 0x0003ffff);
   }
-  uint8_t Res() const {
+
+  // Different implementations for ARM and AArch64.
+  uint8_t ResARM() const {
     return ((ES & 0x000c0000) >> 18);
   }
+
+  uint8_t ResAArch64() const {
+    return ((ES & 0x000f0000) >> 18);
+  }
+
+  // Condition is only applicable to ARM.
   uint8_t Condition() const {
     return ((ES & 0x00f00000) >> 20);
   }
-  uint8_t EpilogueStartIndex() const {
+
+  // Different implementations for ARM and AArch64.
+  uint8_t EpilogueStartIndexARM() const {
     return ((ES & 0xff000000) >> 24);
   }
+
+  uint16_t EpilogueStartIndexAArch64() const {
+    return ((ES & 0xffc00000) >> 22);
+  }
 };
 
 struct ExceptionDataRecord;
@@ -312,13 +349,23 @@ inline size_t HeaderWords(const ExceptionDataRecord &XR);
 
 struct ExceptionDataRecord {
   const support::ulittle32_t *Data;
+  bool isAArch64;
 
-  ExceptionDataRecord(const support::ulittle32_t *Data) : Data(Data) {}
+  ExceptionDataRecord(const support::ulittle32_t *Data, bool isAArch64) :
+    Data(Data), isAArch64(isAArch64) {}
 
   uint32_t FunctionLength() const {
     return (Data[0] & 0x0003ffff);
   }
 
+  uint32_t FunctionLengthInBytesARM() const {
+    return FunctionLength() << 1;
+  }
+
+  uint32_t FunctionLengthInBytesAArch64() const {
+    return FunctionLength() << 2;
+  }
+
   uint8_t Vers() const {
     return (Data[0] & 0x000C0000) >> 18;
   }
@@ -332,18 +379,25 @@ struct ExceptionDataRecord {
   }
 
   bool F() const {
+    assert(!isAArch64 && "Fragments are only supported on ARMv7 WinEH");
     return ((Data[0] & 0x00400000) >> 22);
   }
 
   uint8_t EpilogueCount() const {
-    if (HeaderWords(*this) == 1)
+    if (HeaderWords(*this) == 1) {
+      if (isAArch64)
+        return (Data[0] & 0x07C00000) >> 22;
       return (Data[0] & 0x0f800000) >> 23;
+    }
     return Data[1] & 0x0000ffff;
   }
 
   uint8_t CodeWords() const {
-    if (HeaderWords(*this) == 1)
+    if (HeaderWords(*this) == 1) {
+      if (isAArch64)
+        return (Data[0] & 0xf8000000) >> 27;
       return (Data[0] & 0xf0000000) >> 28;
+    }
     return (Data[1] & 0x00ff0000) >> 16;
   }
 
@@ -373,6 +427,8 @@ struct ExceptionDataRecord {
 };
 
 inline size_t HeaderWords(const ExceptionDataRecord &XR) {
+  if (XR.isAArch64)
+    return (XR.Data[0] & 0xffc00000) ? 1 : 2;
   return (XR.Data[0] & 0xff800000) ? 1 : 2;
 }
 }
diff --git a/contrib/llvm/include/llvm/Support/Allocator.h b/contrib/llvm/include/llvm/Support/Allocator.h
index 184ac491b1f1..42d08378a677 100644
--- a/contrib/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm/include/llvm/Support/Allocator.h
@@ -21,6 +21,7 @@
 #ifndef LLVM_SUPPORT_ALLOCATOR_H
 #define LLVM_SUPPORT_ALLOCATOR_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -283,6 +284,60 @@ public:
 
   size_t GetNumSlabs() const { return Slabs.size() + CustomSizedSlabs.size(); }
 
+  /// \return An index uniquely and reproducibly identifying
+  /// an input pointer \p Ptr in the given allocator.
+  /// The returned value is negative iff the object is inside a custom-size
+  /// slab.
+  /// Returns an empty optional if the pointer is not found in the allocator.
+  llvm::Optional<int64_t> identifyObject(const void *Ptr) {
+    const char *P = static_cast<const char *>(Ptr);
+    int64_t InSlabIdx = 0;
+    for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) {
+      const char *S = static_cast<const char *>(Slabs[Idx]);
+      if (P >= S && P < S + computeSlabSize(Idx))
+        return InSlabIdx + static_cast<int64_t>(P - S);
+      InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx));
+    }
+
+    // Use negative index to denote custom sized slabs.
+    int64_t InCustomSizedSlabIdx = -1;
+    for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) {
+      const char *S = static_cast<const char *>(CustomSizedSlabs[Idx].first);
+      size_t Size = CustomSizedSlabs[Idx].second;
+      if (P >= S && P < S + Size)
+        return InCustomSizedSlabIdx - static_cast<int64_t>(P - S);
+      InCustomSizedSlabIdx -= static_cast<int64_t>(Size);
+    }
+    return None;
+  }
+
+  /// A wrapper around identifyObject that additionally asserts that
+  /// the object is indeed within the allocator.
+  /// \return An index uniquely and reproducibly identifying
+  /// an input pointer \p Ptr in the given allocator.
+  int64_t identifyKnownObject(const void *Ptr) {
+    Optional<int64_t> Out = identifyObject(Ptr);
+    assert(Out && "Wrong allocator used");
+    return *Out;
+  }
+
+  /// A wrapper around identifyKnownObject. Accepts type information
+  /// about the object and produces a smaller identifier by relying on
+  /// the alignment information. Note that sub-classes may have different
+  /// alignment, so the most base class should be passed as template parameter
+  /// in order to obtain correct results. For that reason automatic template
+  /// parameter deduction is disabled.
+  /// \return An index uniquely and reproducibly identifying
+  /// an input pointer \p Ptr in the given allocator. This identifier is
+  /// different from the ones produced by identifyObject and
+  /// identifyAlignedObject.
+  template <typename T>
+  int64_t identifyKnownAlignedObject(const void *Ptr) {
+    int64_t Out = identifyKnownObject(Ptr);
+    assert(Out % alignof(T) == 0 && "Wrong alignment information");
+    return Out / alignof(T);
+  }
+
   size_t getTotalMemory() const {
     size_t TotalMemory = 0;
     for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
diff --git a/contrib/llvm/include/llvm/Support/BinaryStreamArray.h b/contrib/llvm/include/llvm/Support/BinaryStreamArray.h
index d1571cb37fc6..7c110fcb6a4b 100644
--- a/contrib/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/contrib/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -96,21 +96,32 @@ public:
 
   explicit VarStreamArray(const Extractor &E) : E(E) {}
 
-  explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {}
+  explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
+      : Stream(Stream), Skew(Skew) {}
 
-  VarStreamArray(BinaryStreamRef Stream, const Extractor &E)
-      : Stream(Stream), E(E) {}
+  VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
+      : Stream(Stream), E(E), Skew(Skew) {}
 
   Iterator begin(bool *HadError = nullptr) const {
-    return Iterator(*this, E, HadError);
+    return Iterator(*this, E, Skew, nullptr);
   }
 
   bool valid() const { return Stream.valid(); }
 
+  uint32_t skew() const { return Skew; }
   Iterator end() const { return Iterator(E); }
 
   bool empty() const { return Stream.getLength() == 0; }
 
+  VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
+                                                 uint32_t End) const {
+    assert(Begin >= Skew);
+    // We should never cut off the beginning of the stream since it might be
+    // skewed, meaning the initial bytes are important.
+    BinaryStreamRef NewStream = Stream.slice(0, End);
+    return {NewStream, E, Begin};
+  }
+
   /// given an offset into the array's underlying stream, return an
   /// iterator to the record at that offset.  This is considered unsafe
   /// since the behavior is undefined if \p Offset does not refer to the
@@ -123,11 +134,17 @@ public:
   Extractor &getExtractor() { return E; }
 
   BinaryStreamRef getUnderlyingStream() const { return Stream; }
-  void setUnderlyingStream(BinaryStreamRef S) { Stream = S; }
+  void setUnderlyingStream(BinaryStreamRef S, uint32_t Skew = 0) {
+    Stream = S;
+    this->Skew = Skew;
+  }
+
+  void drop_front() { Skew += begin()->length(); }
 
 private:
   BinaryStreamRef Stream;
   Extractor E;
+  uint32_t Skew;
 };
 
 template <typename ValueType, typename Extractor>
@@ -139,10 +156,6 @@ class VarStreamArrayIterator
 
 public:
   VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
-                         bool *HadError)
-      : VarStreamArrayIterator(Array, E, 0, HadError) {}
-
-  VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
                          uint32_t Offset, bool *HadError)
       : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
         Array(&Array), AbsOffset(Offset), HadError(HadError) {
diff --git a/contrib/llvm/include/llvm/Support/BinaryStreamReader.h b/contrib/llvm/include/llvm/Support/BinaryStreamReader.h
index fe77b550c453..392958de30d5 100644
--- a/contrib/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/contrib/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -203,11 +203,12 @@ public:
   /// \returns a success error code if the data was successfully read, otherwise
   /// returns an appropriate error code.
   template <typename T, typename U>
-  Error readArray(VarStreamArray<T, U> &Array, uint32_t Size) {
+  Error readArray(VarStreamArray<T, U> &Array, uint32_t Size,
+                  uint32_t Skew = 0) {
     BinaryStreamRef S;
     if (auto EC = readStreamRef(S, Size))
       return EC;
-    Array.setUnderlyingStream(S);
+    Array.setUnderlyingStream(S, Skew);
     return Error::success();
   }
 
diff --git a/contrib/llvm/include/llvm/Support/BuryPointer.h b/contrib/llvm/include/llvm/Support/BuryPointer.h
new file mode 100644
index 000000000000..53f1f395b922
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/BuryPointer.h
@@ -0,0 +1,30 @@
+//===- llvm/Support/BuryPointer.h - Memory Manipulation/Leak ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_BURYPOINTER_H
+#define LLVM_SUPPORT_BURYPOINTER_H
+
+#include <memory>
+
+namespace llvm {
+
+// In tools that will exit soon anyway, going through the process of explicitly
+// deallocating resources can be unnecessary - better to leak the resources and
+// let the OS clean them up when the process ends. Use this function to ensure
+// the memory is not misdiagnosed as an unintentional leak by leak detection
+// tools (this is achieved by preserving pointers to the object in a globally
+// visible array).
+void BuryPointer(const void *Ptr);
+template <typename T> void BuryPointer(std::unique_ptr<T> Ptr) {
+  BuryPointer(Ptr.release());
+}
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/CFGUpdate.h b/contrib/llvm/include/llvm/Support/CFGUpdate.h
new file mode 100644
index 000000000000..63c24a3d2a20
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/CFGUpdate.h
@@ -0,0 +1,118 @@
+//===- CFGUpdate.h - Encode a CFG Edge Update. ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a CFG Edge Update: Insert or Delete, and two Nodes as the
+// Edge ends.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CFGUPDATE_H
+#define LLVM_SUPPORT_CFGUPDATE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace cfg {
+enum class UpdateKind : unsigned char { Insert, Delete };
+
+template <typename NodePtr> class Update {
+  using NodeKindPair = PointerIntPair<NodePtr, 1, UpdateKind>;
+  NodePtr From;
+  NodeKindPair ToAndKind;
+
+public:
+  Update(UpdateKind Kind, NodePtr From, NodePtr To)
+      : From(From), ToAndKind(To, Kind) {}
+
+  UpdateKind getKind() const { return ToAndKind.getInt(); }
+  NodePtr getFrom() const { return From; }
+  NodePtr getTo() const { return ToAndKind.getPointer(); }
+  bool operator==(const Update &RHS) const {
+    return From == RHS.From && ToAndKind == RHS.ToAndKind;
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << (getKind() == UpdateKind::Insert ? "Insert " : "Delete ");
+    getFrom()->printAsOperand(OS, false);
+    OS << " -> ";
+    getTo()->printAsOperand(OS, false);
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+};
+
+// LegalizeUpdates function simplifies updates assuming a graph structure.
+// This function serves double purpose:
+// a) It removes redundant updates, which makes it easier to reverse-apply
+//    them when traversing CFG.
+// b) It optimizes away updates that cancel each other out, as the end result
+//    is the same.
+template <typename NodePtr>
+void LegalizeUpdates(ArrayRef<Update<NodePtr>> AllUpdates,
+                     SmallVectorImpl<Update<NodePtr>> &Result,
+                     bool InverseGraph) {
+  // Count the total number of inserions of each edge.
+  // Each insertion adds 1 and deletion subtracts 1. The end number should be
+  // one of {-1 (deletion), 0 (NOP), +1 (insertion)}. Otherwise, the sequence
+  // of updates contains multiple updates of the same kind and we assert for
+  // that case.
+  SmallDenseMap<std::pair<NodePtr, NodePtr>, int, 4> Operations;
+  Operations.reserve(AllUpdates.size());
+
+  for (const auto &U : AllUpdates) {
+    NodePtr From = U.getFrom();
+    NodePtr To = U.getTo();
+    if (InverseGraph)
+      std::swap(From, To); // Reverse edge for postdominators.
+
+    Operations[{From, To}] += (U.getKind() == UpdateKind::Insert ? 1 : -1);
+  }
+
+  Result.clear();
+  Result.reserve(Operations.size());
+  for (auto &Op : Operations) {
+    const int NumInsertions = Op.second;
+    assert(std::abs(NumInsertions) <= 1 && "Unbalanced operations!");
+    if (NumInsertions == 0)
+      continue;
+    const UpdateKind UK =
+        NumInsertions > 0 ? UpdateKind::Insert : UpdateKind::Delete;
+    Result.push_back({UK, Op.first.first, Op.first.second});
+  }
+
+  // Make the order consistent by not relying on pointer values within the
+  // set. Reuse the old Operations map.
+  // In the future, we should sort by something else to minimize the amount
+  // of work needed to perform the series of updates.
+  for (size_t i = 0, e = AllUpdates.size(); i != e; ++i) {
+    const auto &U = AllUpdates[i];
+    if (!InverseGraph)
+      Operations[{U.getFrom(), U.getTo()}] = int(i);
+    else
+      Operations[{U.getTo(), U.getFrom()}] = int(i);
+  }
+
+  llvm::sort(Result,
+             [&Operations](const Update<NodePtr> &A, const Update<NodePtr> &B) {
+               return Operations[{A.getFrom(), A.getTo()}] >
+                      Operations[{B.getFrom(), B.getTo()}];
+             });
+}
+
+} // end namespace cfg
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_CFGUPDATE_H
diff --git a/contrib/llvm/include/llvm/Support/Chrono.h b/contrib/llvm/include/llvm/Support/Chrono.h
index 994068af3771..57677e8d5cf1 100644
--- a/contrib/llvm/include/llvm/Support/Chrono.h
+++ b/contrib/llvm/include/llvm/Support/Chrono.h
@@ -47,6 +47,14 @@ toTimePoint(std::time_t T) {
   return time_point_cast<seconds>(system_clock::from_time_t(T));
 }
 
+/// Convert a std::time_t + nanoseconds to a TimePoint
+LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<>
+toTimePoint(std::time_t T, uint32_t nsec) {
+  using namespace std::chrono;
+  return time_point_cast<nanoseconds>(system_clock::from_time_t(T))
+    + nanoseconds(nsec);
+}
+
 } // namespace sys
 
 raw_ostream &operator<<(raw_ostream &OS, sys::TimePoint<> TP);
diff --git a/contrib/llvm/include/llvm/Support/CodeGen.h b/contrib/llvm/include/llvm/Support/CodeGen.h
index 5f9e33129587..22e74167266c 100644
--- a/contrib/llvm/include/llvm/Support/CodeGen.h
+++ b/contrib/llvm/include/llvm/Support/CodeGen.h
@@ -25,7 +25,7 @@ namespace llvm {
   // Code model types.
   namespace CodeModel {
     // Sync changes with CodeGenCWrappers.h.
-  enum Model { Small, Kernel, Medium, Large };
+  enum Model { Tiny, Small, Kernel, Medium, Large };
   }
 
   namespace PICLevel {
@@ -57,6 +57,11 @@ namespace llvm {
     };
   }
 
+  // Specify effect of frame pointer elimination optimization.
+  namespace FramePointer {
+    enum FP {All, NonLeaf, None};
+  }
+
 }  // end llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm/include/llvm/Support/CommandLine.h
index 799b41fbf8b0..a8ad89384d17 100644
--- a/contrib/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm/include/llvm/Support/CommandLine.h
@@ -56,9 +56,18 @@ namespace cl {
 // Returns true on success. Otherwise, this will print the error message to
 // stderr and exit if \p Errs is not set (nullptr by default), or print the
 // error message to \p Errs and return false if \p Errs is provided.
+//
+// If EnvVar is not nullptr, command-line options are also parsed from the
+// environment variable named by EnvVar.  Precedence is given to occurrences
+// from argv.  This precedence is currently implemented by parsing argv after
+// the environment variable, so it is only implemented correctly for options
+// that give precedence to later occurrences.  If your program supports options
+// that give precedence to earlier occurrences, you will need to extend this
+// function to support it correctly.
 bool ParseCommandLineOptions(int argc, const char *const *argv,
                              StringRef Overview = "",
-                             raw_ostream *Errs = nullptr);
+                             raw_ostream *Errs = nullptr,
+                             const char *EnvVar = nullptr);
 
 //===----------------------------------------------------------------------===//
 // ParseEnvironmentOptions - Environment variable option processing alternate
@@ -147,6 +156,9 @@ enum OptionHidden {   // Control whether -help shows this option
 // enabled, and used, the value for the flag comes from the suffix of the
 // argument.
 //
+// AlwaysPrefix - Only allow the behavior enabled by the Prefix flag and reject
+// the Option=Value form.
+//
 // Grouping - With this option enabled, multiple letter options are allowed to
 // bunch together with only a single hyphen for the whole group.  This allows
 // emulation of the behavior that ls uses for example: ls -la === ls -l -a
@@ -156,7 +168,8 @@ enum FormattingFlags {
   NormalFormatting = 0x00, // Nothing special
   Positional = 0x01,       // Is a positional argument, no '-' required
   Prefix = 0x02,           // Can this option directly prefix its value?
-  Grouping = 0x03          // Can this option group with other options?
+  AlwaysPrefix = 0x03,     // Can this option only directly prefix its value?
+  Grouping = 0x04          // Can this option group with other options?
 };
 
 enum MiscFlags {             // Miscellaneous flags to adjust argument
@@ -256,7 +269,7 @@ class Option {
   // detail representing the non-value
   unsigned Value : 2;
   unsigned HiddenFlag : 2; // enum OptionHidden
-  unsigned Formatting : 2; // enum FormattingFlags
+  unsigned Formatting : 3; // enum FormattingFlags
   unsigned Misc : 3;
   unsigned Position = 0;       // Position of last occurrence of the option
   unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option.
diff --git a/contrib/llvm/include/llvm/Support/Compiler.h b/contrib/llvm/include/llvm/Support/Compiler.h
index 4de815fe61d7..14e4d6e97140 100644
--- a/contrib/llvm/include/llvm/Support/Compiler.h
+++ b/contrib/llvm/include/llvm/Support/Compiler.h
@@ -133,6 +133,19 @@
 #define LLVM_NODISCARD
 #endif
 
+// Indicate that a non-static, non-const C++ member function reinitializes
+// the entire object to a known state, independent of the previous state of
+// the object.
+//
+// The clang-tidy check bugprone-use-after-move recognizes this attribute as a
+// marker that a moved-from object has left the indeterminate state and can be
+// reused.
+#if __has_cpp_attribute(clang::reinitializes)
+#define LLVM_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]]
+#else
+#define LLVM_ATTRIBUTE_REINITIALIZES
+#endif
+
 // Some compilers warn about unused functions. When a function is sometimes
 // used or not depending on build settings (e.g. a function only called from
 // within "assert"), this attribute can be used to suppress such warnings.
@@ -519,7 +532,7 @@ namespace llvm {
 /// reduced default alignment.
 inline void *allocate_buffer(size_t Size, size_t Alignment) {
   return ::operator new(Size
-#if __cpp_aligned_new
+#ifdef __cpp_aligned_new
                         ,
                         std::align_val_t(Alignment)
 #endif
@@ -535,11 +548,11 @@ inline void *allocate_buffer(size_t Size, size_t Alignment) {
 /// most likely using the above helper.
 inline void deallocate_buffer(void *Ptr, size_t Size, size_t Alignment) {
   ::operator delete(Ptr
-#if __cpp_sized_deallocation
+#ifdef __cpp_sized_deallocation
                     ,
                     Size
 #endif
-#if __cpp_aligned_new
+#ifdef __cpp_aligned_new
                     ,
                     std::align_val_t(Alignment)
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Compression.h b/contrib/llvm/include/llvm/Support/Compression.h
index 2d191abe4b1a..f7258f4bf8f8 100644
--- a/contrib/llvm/include/llvm/Support/Compression.h
+++ b/contrib/llvm/include/llvm/Support/Compression.h
@@ -23,17 +23,15 @@ class StringRef;
 
 namespace zlib {
 
-enum CompressionLevel {
-  NoCompression,
-  DefaultCompression,
-  BestSpeedCompression,
-  BestSizeCompression
-};
+static constexpr int NoCompression = 0;
+static constexpr int BestSpeedCompression = 1;
+static constexpr int DefaultCompression = 6;
+static constexpr int BestSizeCompression = 9;
 
 bool isAvailable();
 
 Error compress(StringRef InputBuffer, SmallVectorImpl<char> &CompressedBuffer,
-               CompressionLevel Level = DefaultCompression);
+               int Level = DefaultCompression);
 
 Error uncompress(StringRef InputBuffer, char *UncompressedBuffer,
                  size_t &UncompressedSize);
@@ -49,4 +47,3 @@ uint32_t crc32(StringRef Buffer);
 } // End of namespace llvm
 
 #endif
-
diff --git a/contrib/llvm/include/llvm/Support/Debug.h b/contrib/llvm/include/llvm/Support/Debug.h
index 980abfb0e8da..df86dbb82414 100644
--- a/contrib/llvm/include/llvm/Support/Debug.h
+++ b/contrib/llvm/include/llvm/Support/Debug.h
@@ -94,6 +94,10 @@ extern bool VerifyDomInfo;
 ///
 extern bool VerifyLoopInfo;
 
+/// Enables verification of MemorySSA.
+///
+extern bool VerifyMemorySSA;
+
 ///\}
 
 /// EnableDebugBuffering - This defaults to false.  If true, the debug
diff --git a/contrib/llvm/include/llvm/Support/DebugCounter.h b/contrib/llvm/include/llvm/Support/DebugCounter.h
index 83bd5a06c94a..6eadd5c6aeff 100644
--- a/contrib/llvm/include/llvm/Support/DebugCounter.h
+++ b/contrib/llvm/include/llvm/Support/DebugCounter.h
@@ -55,6 +55,8 @@ namespace llvm {
 
 class DebugCounter {
 public:
+  ~DebugCounter();
+
   /// Returns a reference to the singleton instance.
   static DebugCounter &instance();
 
diff --git a/contrib/llvm/include/llvm/Support/Error.h b/contrib/llvm/include/llvm/Support/Error.h
index 8015cab45a06..ee2cbeec97a8 100644
--- a/contrib/llvm/include/llvm/Support/Error.h
+++ b/contrib/llvm/include/llvm/Support/Error.h
@@ -14,8 +14,9 @@
 #ifndef LLVM_SUPPORT_ERROR_H
 #define LLVM_SUPPORT_ERROR_H
 
-#include "llvm/ADT/SmallVector.h"
+#include "llvm-c/Error.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/abi-breaking.h"
@@ -155,9 +156,10 @@ private:
 /// they're moved-assigned or constructed from Success values that have already
 /// been checked. This enforces checking through all levels of the call stack.
 class LLVM_NODISCARD Error {
-  // ErrorList needs to be able to yank ErrorInfoBase pointers out of this
-  // class to add to the error list.
+  // Both ErrorList and FileError need to be able to yank ErrorInfoBase
+  // pointers out of this class to add to the error list.
   friend class ErrorList;
+  friend class FileError;
 
   // handleErrors needs to be able to set the Checked flag.
   template <typename... HandlerTs>
@@ -167,6 +169,9 @@ class LLVM_NODISCARD Error {
   // error.
   template <typename T> friend class Expected;
 
+  // wrap needs to be able to steal the payload.
+  friend LLVMErrorRef wrap(Error);
+
 protected:
   /// Create a success value. Prefer using 'Error::success()' for readability
   Error() {
@@ -317,7 +322,7 @@ private:
 /// Subclass of Error for the sole purpose of identifying the success path in
 /// the type system. This allows to catch invalid conversion to Expected<T> at
 /// compile time.
-class ErrorSuccess : public Error {};
+class ErrorSuccess final : public Error {};
 
 inline ErrorSuccess Error::success() { return ErrorSuccess(); }
 
@@ -339,6 +344,8 @@ template <typename ErrT, typename... ArgTs> Error make_error(ArgTs &&... Args) {
 template <typename ThisErrT, typename ParentErrT = ErrorInfoBase>
 class ErrorInfo : public ParentErrT {
 public:
+  using ParentErrT::ParentErrT; // inherit constructors
+
   static const void *classID() { return &ThisErrT::ID; }
 
   const void *dynamicClassID() const override { return &ThisErrT::ID; }
@@ -946,10 +953,14 @@ Expected<T> handleExpected(Expected<T> ValOrErr, RecoveryFtor &&RecoveryPath,
 /// will be printed before the first one is logged. A newline will be printed
 /// after each error.
 ///
+/// This function is compatible with the helpers from Support/WithColor.h. You
+/// can pass any of them as the OS. Please consider using them instead of
+/// including 'error: ' in the ErrorBanner.
+///
 /// This is useful in the base level of your program to allow clean termination
 /// (allowing clean deallocation of resources, etc.), while reporting error
 /// information to the user.
-void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner);
+void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner = {});
 
 /// Write all error messages (if any) in E to a string. The newline character
 /// is used to separate error messages.
@@ -1055,6 +1066,8 @@ private:
 class ECError : public ErrorInfo<ECError> {
   friend Error errorCodeToError(std::error_code);
 
+  virtual void anchor() override;
+
 public:
   void setErrorCode(std::error_code EC) { this->EC = EC; }
   std::error_code convertToErrorCode() const override { return EC; }
@@ -1106,10 +1119,33 @@ template <typename T> ErrorOr<T> expectedToErrorOr(Expected<T> &&E) {
 /// StringError is useful in cases where the client is not expected to be able
 /// to consume the specific error message programmatically (for example, if the
 /// error message is to be presented to the user).
+///
+/// StringError can also be used when additional information is to be printed
+/// along with a error_code message. Depending on the constructor called, this
+/// class can either display:
+///    1. the error_code message (ECError behavior)
+///    2. a string
+///    3. the error_code message and a string
+///
+/// These behaviors are useful when subtyping is required; for example, when a
+/// specific library needs an explicit error type. In the example below,
+/// PDBError is derived from StringError:
+///
+///   @code{.cpp}
+///   Expected<int> foo() {
+///      return llvm::make_error<PDBError>(pdb_error_code::dia_failed_loading,
+///                                        "Additional information");
+///   }
+///   @endcode
+///
 class StringError : public ErrorInfo<StringError> {
 public:
   static char ID;
 
+  // Prints EC + S and converts to EC
+  StringError(std::error_code EC, const Twine &S = Twine());
+
+  // Prints S and converts to EC
   StringError(const Twine &S, std::error_code EC);
 
   void log(raw_ostream &OS) const override;
@@ -1120,6 +1156,7 @@ public:
 private:
   std::string Msg;
   std::error_code EC;
+  const bool PrintMsgOnly = false;
 };
 
 /// Create formatted StringError object.
@@ -1134,6 +1171,53 @@ Error createStringError(std::error_code EC, char const *Fmt,
 
 Error createStringError(std::error_code EC, char const *Msg);
 
+/// This class wraps a filename and another Error.
+///
+/// In some cases, an error needs to live along a 'source' name, in order to
+/// show more detailed information to the user.
+class FileError final : public ErrorInfo<FileError> {
+
+  friend Error createFileError(std::string, Error);
+
+public:
+  void log(raw_ostream &OS) const override {
+    assert(Err && !FileName.empty() && "Trying to log after takeError().");
+    OS << "'" << FileName << "': ";
+    Err->log(OS);
+  }
+
+  Error takeError() { return Error(std::move(Err)); }
+
+  std::error_code convertToErrorCode() const override;
+
+  // Used by ErrorInfo::classID.
+  static char ID;
+
+private:
+  FileError(std::string F, std::unique_ptr<ErrorInfoBase> E) {
+    assert(E && "Cannot create FileError from Error success value.");
+    assert(!F.empty() &&
+           "The file name provided to FileError must not be empty.");
+    FileName = F;
+    Err = std::move(E);
+  }
+
+  static Error build(std::string F, Error E) {
+    return Error(std::unique_ptr<FileError>(new FileError(F, E.takePayload())));
+  }
+
+  std::string FileName;
+  std::unique_ptr<ErrorInfoBase> Err;
+};
+
+/// Concatenate a source file path and/or name with an Error. The resulting
+/// Error is unchecked.
+inline Error createFileError(std::string F, Error E) {
+  return FileError::build(F, std::move(E));
+}
+
+Error createFileError(std::string F, ErrorSuccess) = delete;
+
 /// Helper for check-and-exit error handling.
 ///
 /// For tool use only. NOT FOR USE IN LIBRARY CODE.
@@ -1183,6 +1267,17 @@ private:
   std::function<int(const Error &)> GetExitCode;
 };
 
+/// Conversion from Error to LLVMErrorRef for C error bindings.
+inline LLVMErrorRef wrap(Error Err) {
+  return reinterpret_cast<LLVMErrorRef>(Err.takePayload().release());
+}
+
+/// Conversion from LLVMErrorRef to Error for C error bindings.
+inline Error unwrap(LLVMErrorRef ErrRef) {
+  return Error(std::unique_ptr<ErrorInfoBase>(
+      reinterpret_cast<ErrorInfoBase *>(ErrRef)));
+}
+
 } // end namespace llvm
 
 #endif // LLVM_SUPPORT_ERROR_H
diff --git a/contrib/llvm/include/llvm/Support/ErrorHandling.h b/contrib/llvm/include/llvm/Support/ErrorHandling.h
index 39cbfed2436a..fec39e59a717 100644
--- a/contrib/llvm/include/llvm/Support/ErrorHandling.h
+++ b/contrib/llvm/include/llvm/Support/ErrorHandling.h
@@ -112,8 +112,8 @@ void install_out_of_memory_new_handler();
 /// in the unwind chain.
 ///
 /// If no error handler is installed (default), then a bad_alloc exception
-/// is thrown, if LLVM is compiled with exception support, otherwise an assertion
-/// is called.
+/// is thrown, if LLVM is compiled with exception support, otherwise an
+/// assertion is called.
 void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true);
 
 /// This function calls abort(), and prints the optional message to stderr.
diff --git a/contrib/llvm/include/llvm/Support/FileCheck.h b/contrib/llvm/include/llvm/Support/FileCheck.h
new file mode 100644
index 000000000000..4061a26e22c5
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FileCheck.h
@@ -0,0 +1,282 @@
+//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file has some utilities to use FileCheck as an API
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILECHECK_H
+#define LLVM_SUPPORT_FILECHECK_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+/// Contains info about various FileCheck options.
+struct FileCheckRequest {
+  std::vector<std::string> CheckPrefixes;
+  bool NoCanonicalizeWhiteSpace = false;
+  std::vector<std::string> ImplicitCheckNot;
+  std::vector<std::string> GlobalDefines;
+  bool AllowEmptyInput = false;
+  bool MatchFullLines = false;
+  bool EnableVarScope = false;
+  bool AllowDeprecatedDagOverlap = false;
+  bool Verbose = false;
+  bool VerboseVerbose = false;
+};
+
+
+//===----------------------------------------------------------------------===//
+// Pattern Handling Code.
+//===----------------------------------------------------------------------===//
+
+namespace Check {
+
+enum FileCheckKind {
+  CheckNone = 0,
+  CheckPlain,
+  CheckNext,
+  CheckSame,
+  CheckNot,
+  CheckDAG,
+  CheckLabel,
+  CheckEmpty,
+
+  /// Indicates the pattern only matches the end of file. This is used for
+  /// trailing CHECK-NOTs.
+  CheckEOF,
+
+  /// Marks when parsing found a -NOT check combined with another CHECK suffix.
+  CheckBadNot,
+
+  /// Marks when parsing found a -COUNT directive with invalid count value.
+  CheckBadCount
+};
+
+class FileCheckType {
+  FileCheckKind Kind;
+  int Count; ///< optional Count for some checks
+
+public:
+  FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {}
+  FileCheckType(const FileCheckType &) = default;
+
+  operator FileCheckKind() const { return Kind; }
+
+  int getCount() const { return Count; }
+  FileCheckType &setCount(int C);
+
+  std::string getDescription(StringRef Prefix) const;
+};
+}
+
+struct FileCheckDiag;
+
+class FileCheckPattern {
+  SMLoc PatternLoc;
+
+  /// A fixed string to match as the pattern or empty if this pattern requires
+  /// a regex match.
+  StringRef FixedStr;
+
+  /// A regex string to match as the pattern or empty if this pattern requires
+  /// a fixed string to match.
+  std::string RegExStr;
+
+  /// Entries in this vector map to uses of a variable in the pattern, e.g.
+  /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
+  /// we'll get an entry in this vector that tells us to insert the value of
+  /// bar at offset 3.
+  std::vector<std::pair<StringRef, unsigned>> VariableUses;
+
+  /// Maps definitions of variables to their parenthesized capture numbers.
+  ///
+  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
+  /// 1.
+  std::map<StringRef, unsigned> VariableDefs;
+
+  Check::FileCheckType CheckTy;
+
+  /// Contains the number of line this pattern is in.
+  unsigned LineNumber;
+
+public:
+  explicit FileCheckPattern(Check::FileCheckType Ty)
+      : CheckTy(Ty) {}
+
+  /// Returns the location in source code.
+  SMLoc getLoc() const { return PatternLoc; }
+
+  bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
+                    unsigned LineNumber, const FileCheckRequest &Req);
+  size_t Match(StringRef Buffer, size_t &MatchLen,
+               StringMap<StringRef> &VariableTable) const;
+  void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
+                         const StringMap<StringRef> &VariableTable,
+                         SMRange MatchRange = None) const;
+  void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
+                       const StringMap<StringRef> &VariableTable,
+                       std::vector<FileCheckDiag> *Diags) const;
+
+  bool hasVariable() const {
+    return !(VariableUses.empty() && VariableDefs.empty());
+  }
+
+  Check::FileCheckType getCheckTy() const { return CheckTy; }
+
+  int getCount() const { return CheckTy.getCount(); }
+
+private:
+  bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
+  void AddBackrefToRegEx(unsigned BackrefNum);
+  unsigned
+  ComputeMatchDistance(StringRef Buffer,
+                       const StringMap<StringRef> &VariableTable) const;
+  bool EvaluateExpression(StringRef Expr, std::string &Value) const;
+  size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
+};
+
+//===----------------------------------------------------------------------===//
+/// Summary of a FileCheck diagnostic.
+//===----------------------------------------------------------------------===//
+
+struct FileCheckDiag {
+  /// What is the FileCheck directive for this diagnostic?
+  Check::FileCheckType CheckTy;
+  /// Where is the FileCheck directive for this diagnostic?
+  unsigned CheckLine, CheckCol;
+  /// What type of match result does this diagnostic describe?
+  ///
+  /// A directive's supplied pattern is said to be either expected or excluded
+  /// depending on whether the pattern must have or must not have a match in
+  /// order for the directive to succeed.  For example, a CHECK directive's
+  /// pattern is expected, and a CHECK-NOT directive's pattern is excluded.
+  /// All match result types whose names end with "Excluded" are for excluded
+  /// patterns, and all others are for expected patterns.
+  ///
+  /// There might be more than one match result for a single pattern.  For
+  /// example, there might be several discarded matches
+  /// (MatchFoundButDiscarded) before either a good match
+  /// (MatchFoundAndExpected) or a failure to match (MatchNoneButExpected),
+  /// and there might be a fuzzy match (MatchFuzzy) after the latter.
+  enum MatchType {
+    /// Indicates a good match for an expected pattern.
+    MatchFoundAndExpected,
+    /// Indicates a match for an excluded pattern.
+    MatchFoundButExcluded,
+    /// Indicates a match for an expected pattern, but the match is on the
+    /// wrong line.
+    MatchFoundButWrongLine,
+    /// Indicates a discarded match for an expected pattern.
+    MatchFoundButDiscarded,
+    /// Indicates no match for an excluded pattern.
+    MatchNoneAndExcluded,
+    /// Indicates no match for an expected pattern, but this might follow good
+    /// matches when multiple matches are expected for the pattern, or it might
+    /// follow discarded matches for the pattern.
+    MatchNoneButExpected,
+    /// Indicates a fuzzy match that serves as a suggestion for the next
+    /// intended match for an expected pattern with too few or no good matches.
+    MatchFuzzy,
+  } MatchTy;
+  /// The search range if MatchTy is MatchNoneAndExcluded or
+  /// MatchNoneButExpected, or the match range otherwise.
+  unsigned InputStartLine;
+  unsigned InputStartCol;
+  unsigned InputEndLine;
+  unsigned InputEndCol;
+  FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy,
+                SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange);
+};
+
+//===----------------------------------------------------------------------===//
+// Check Strings.
+//===----------------------------------------------------------------------===//
+
+/// A check that we found in the input file.
+struct FileCheckString {
+  /// The pattern to match.
+  FileCheckPattern Pat;
+
+  /// Which prefix name this check matched.
+  StringRef Prefix;
+
+  /// The location in the match file that the check string was specified.
+  SMLoc Loc;
+
+  /// All of the strings that are disallowed from occurring between this match
+  /// string and the previous one (or start of file).
+  std::vector<FileCheckPattern> DagNotStrings;
+
+  FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
+      : Pat(P), Prefix(S), Loc(L) {}
+
+  size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
+               size_t &MatchLen, StringMap<StringRef> &VariableTable,
+               FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const;
+
+  bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
+  bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
+  bool CheckNot(const SourceMgr &SM, StringRef Buffer,
+                const std::vector<const FileCheckPattern *> &NotStrings,
+                StringMap<StringRef> &VariableTable,
+                const FileCheckRequest &Req,
+                std::vector<FileCheckDiag> *Diags) const;
+  size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
+                  std::vector<const FileCheckPattern *> &NotStrings,
+                  StringMap<StringRef> &VariableTable,
+                  const FileCheckRequest &Req,
+                  std::vector<FileCheckDiag> *Diags) const;
+};
+
+/// FileCheck class takes the request and exposes various methods that
+/// use information from the request.
+class FileCheck {
+  FileCheckRequest Req;
+
+public:
+  FileCheck(FileCheckRequest Req) : Req(Req) {}
+
+  // Combines the check prefixes into a single regex so that we can efficiently
+  // scan for any of the set.
+  //
+  // The semantics are that the longest-match wins which matches our regex
+  // library.
+  Regex buildCheckPrefixRegex();
+
+  /// Read the check file, which specifies the sequence of expected strings.
+  ///
+  /// The strings are added to the CheckStrings vector. Returns true in case of
+  /// an error, false otherwise.
+  bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+                     std::vector<FileCheckString> &CheckStrings);
+
+  bool ValidateCheckPrefixes();
+
+  /// Canonicalize whitespaces in the file. Line endings are replaced with
+  /// UNIX-style '\n'.
+  StringRef CanonicalizeFile(MemoryBuffer &MB,
+                             SmallVectorImpl<char> &OutputBuffer);
+
+  /// Check the input to FileCheck provided in the \p Buffer against the \p
+  /// CheckStrings read from the check file.
+  ///
+  /// Returns false if the input fails to satisfy the checks.
+  bool CheckInput(SourceMgr &SM, StringRef Buffer,
+                  ArrayRef<FileCheckString> CheckStrings,
+                  std::vector<FileCheckDiag> *Diags = nullptr);
+};
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/include/llvm/Support/FileOutputBuffer.h b/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
index ee8cbb730878..68226ca55502 100644
--- a/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
+++ b/contrib/llvm/include/llvm/Support/FileOutputBuffer.h
@@ -76,6 +76,10 @@ public:
   /// deallocates the buffer and the target file is never written.
   virtual ~FileOutputBuffer() {}
 
+  /// This removes the temporary file (unless it already was committed)
+  /// but keeps the memory mapping alive.
+  virtual void discard() {}
+
 protected:
   FileOutputBuffer(StringRef Path) : FinalPath(Path) {}
 
diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h
index 02db4596bf1c..d2042f51d8c1 100644
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@@ -160,6 +160,8 @@ protected:
   #if defined(LLVM_ON_UNIX)
   time_t fs_st_atime = 0;
   time_t fs_st_mtime = 0;
+  uint32_t fs_st_atime_nsec = 0;
+  uint32_t fs_st_mtime_nsec = 0;
   uid_t fs_st_uid = 0;
   gid_t fs_st_gid = 0;
   off_t fs_st_size = 0;
@@ -180,9 +182,12 @@ public:
   explicit basic_file_status(file_type Type) : Type(Type) {}
 
   #if defined(LLVM_ON_UNIX)
-  basic_file_status(file_type Type, perms Perms, time_t ATime, time_t MTime,
+  basic_file_status(file_type Type, perms Perms, time_t ATime,
+                    uint32_t ATimeNSec, time_t MTime, uint32_t MTimeNSec,
                     uid_t UID, gid_t GID, off_t Size)
-      : fs_st_atime(ATime), fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID),
+      : fs_st_atime(ATime), fs_st_mtime(MTime),
+        fs_st_atime_nsec(ATimeNSec), fs_st_mtime_nsec(MTimeNSec),
+        fs_st_uid(UID), fs_st_gid(GID),
         fs_st_size(Size), Type(Type), Perms(Perms) {}
 #elif defined(_WIN32)
   basic_file_status(file_type Type, perms Perms, uint32_t LastAccessTimeHigh,
@@ -199,7 +204,20 @@ public:
   // getters
   file_type type() const { return Type; }
   perms permissions() const { return Perms; }
+
+  /// The file access time as reported from the underlying file system.
+  ///
+  /// Also see comments on \c getLastModificationTime() related to the precision
+  /// of the returned value.
   TimePoint<> getLastAccessedTime() const;
+
+  /// The file modification time as reported from the underlying file system.
+  ///
+  /// The returned value allows for nanosecond precision but the actual
+  /// resolution is an implementation detail of the underlying file system.
+  /// There is no guarantee for what kind of resolution you can expect, the
+  /// resolution can differ across platforms and even across mountpoints on the
+  /// same machine.
   TimePoint<> getLastModificationTime() const;
 
   #if defined(LLVM_ON_UNIX)
@@ -247,8 +265,11 @@ public:
 
   #if defined(LLVM_ON_UNIX)
   file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino,
-              time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size)
-      : basic_file_status(Type, Perms, ATime, MTime, UID, GID, Size),
+              time_t ATime, uint32_t ATimeNSec,
+              time_t MTime, uint32_t MTimeNSec,
+              uid_t UID, gid_t GID, off_t Size)
+      : basic_file_status(Type, Perms, ATime, ATimeNSec, MTime, MTimeNSec,
+                          UID, GID, Size),
         fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino) {}
   #elif defined(_WIN32)
   file_status(file_type Type, perms Perms, uint32_t LinkCount,
@@ -281,10 +302,7 @@ public:
 /// relative/../path => <current-directory>/relative/../path
 ///
 /// @param path A path that is modified to be an absolute path.
-/// @returns errc::success if \a path has been made absolute, otherwise a
-///          platform-specific error_code.
-std::error_code make_absolute(const Twine &current_directory,
-                              SmallVectorImpl<char> &path);
+void make_absolute(const Twine &current_directory, SmallVectorImpl<char> &path);
 
 /// Make \a path an absolute path.
 ///
@@ -349,6 +367,12 @@ std::error_code create_hard_link(const Twine &to, const Twine &from);
 std::error_code real_path(const Twine &path, SmallVectorImpl<char> &output,
                           bool expand_tilde = false);
 
+/// Expands ~ expressions to the user's home directory. On Unix ~user
+/// directories are resolved as well.
+///
+/// @param path The path to resolve.
+void expand_tilde(const Twine &path, SmallVectorImpl<char> &output);
+
 /// Get the current path.
 ///
 /// @param result Holds the current path on return.
@@ -666,7 +690,15 @@ inline std::error_code file_size(const Twine &Path, uint64_t &Result) {
 /// @returns errc::success if the file times were successfully set, otherwise a
 ///          platform-specific error_code or errc::function_not_supported on
 ///          platforms where the functionality isn't available.
-std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time);
+std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
+                                                 TimePoint<> ModificationTime);
+
+/// Simpler version that sets both file modification and access time to the same
+/// time.
+inline std::error_code setLastAccessAndModificationTime(int FD,
+                                                        TimePoint<> Time) {
+  return setLastAccessAndModificationTime(FD, Time, Time);
+}
 
 /// Is status available?
 ///
@@ -693,7 +725,7 @@ enum CreationDisposition : unsigned {
   ///   * If it does not already exist, create a new file.
   CD_CreateNew = 1,
 
-  /// CD_OpenAlways - When opening a file:
+  /// CD_OpenExisting - When opening a file:
   ///   * If it already exists, open the file with the offset set to 0.
   ///   * If it does not already exist, fail.
   CD_OpenExisting = 2,
@@ -1092,38 +1124,51 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr);
 /// @name Iterators
 /// @{
 
-/// directory_entry - A single entry in a directory. Caches the status either
-/// from the result of the iteration syscall, or the first time status is
-/// called.
+/// directory_entry - A single entry in a directory.
 class directory_entry {
+  // FIXME: different platforms make different information available "for free"
+  // when traversing a directory. The design of this class wraps most of the
+  // information in basic_file_status, so on platforms where we can't populate
+  // that whole structure, callers end up paying for a stat().
+  // std::filesystem::directory_entry may be a better model.
   std::string Path;
-  bool FollowSymlinks;
-  basic_file_status Status;
+  file_type Type;           // Most platforms can provide this.
+  bool FollowSymlinks;      // Affects the behavior of status().
+  basic_file_status Status; // If available.
 
 public:
-  explicit directory_entry(const Twine &path, bool follow_symlinks = true,
-                           basic_file_status st = basic_file_status())
-      : Path(path.str()), FollowSymlinks(follow_symlinks), Status(st) {}
+  explicit directory_entry(const Twine &Path, bool FollowSymlinks = true,
+                           file_type Type = file_type::type_unknown,
+                           basic_file_status Status = basic_file_status())
+      : Path(Path.str()), Type(Type), FollowSymlinks(FollowSymlinks),
+        Status(Status) {}
 
   directory_entry() = default;
 
-  void assign(const Twine &path, basic_file_status st = basic_file_status()) {
-    Path = path.str();
-    Status = st;
-  }
-
-  void replace_filename(const Twine &filename,
-                        basic_file_status st = basic_file_status());
+  void replace_filename(const Twine &Filename, file_type Type,
+                        basic_file_status Status = basic_file_status());
 
   const std::string &path() const { return Path; }
+  // Get basic information about entry file (a subset of fs::status()).
+  // On most platforms this is a stat() call.
+  // On windows the information was already retrieved from the directory.
   ErrorOr<basic_file_status> status() const;
+  // Get the type of this file.
+  // On most platforms (Linux/Mac/Windows/BSD), this was already retrieved.
+  // On some platforms (e.g. Solaris) this is a stat() call.
+  file_type type() const {
+    if (Type != file_type::type_unknown)
+      return Type;
+    auto S = status();
+    return S ? S->type() : file_type::type_unknown;
+  }
 
-  bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; }
-  bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); }
-  bool operator< (const directory_entry& rhs) const;
-  bool operator<=(const directory_entry& rhs) const;
-  bool operator> (const directory_entry& rhs) const;
-  bool operator>=(const directory_entry& rhs) const;
+  bool operator==(const directory_entry& RHS) const { return Path == RHS.Path; }
+  bool operator!=(const directory_entry& RHS) const { return !(*this == RHS); }
+  bool operator< (const directory_entry& RHS) const;
+  bool operator<=(const directory_entry& RHS) const;
+  bool operator> (const directory_entry& RHS) const;
+  bool operator>=(const directory_entry& RHS) const;
 };
 
 namespace detail {
@@ -1161,7 +1206,6 @@ public:
     SmallString<128> path_storage;
     ec = detail::directory_iterator_construct(
         *State, path.toStringRef(path_storage), FollowSymlinks);
-    update_error_code_for_current_entry(ec);
   }
 
   explicit directory_iterator(const directory_entry &de, std::error_code &ec,
@@ -1170,7 +1214,6 @@ public:
     State = std::make_shared<detail::DirIterState>();
     ec = detail::directory_iterator_construct(
         *State, de.path(), FollowSymlinks);
-    update_error_code_for_current_entry(ec);
   }
 
   /// Construct end iterator.
@@ -1179,7 +1222,6 @@ public:
   // No operator++ because we need error_code.
   directory_iterator &increment(std::error_code &ec) {
     ec = directory_iterator_increment(*State);
-    update_error_code_for_current_entry(ec);
     return *this;
   }
 
@@ -1199,26 +1241,6 @@ public:
   bool operator!=(const directory_iterator &RHS) const {
     return !(*this == RHS);
   }
-  // Other members as required by
-  // C++ Std, 24.1.1 Input iterators [input.iterators]
-
-private:
-  // Checks if current entry is valid and populates error code. For example,
-  // current entry may not exist due to broken symbol links.
-  void update_error_code_for_current_entry(std::error_code &ec) {
-    // Bail out if error has already occured earlier to avoid overwriting it.
-    if (ec)
-      return;
-
-    // Empty directory entry is used to mark the end of an interation, it's not
-    // an error.
-    if (State->CurrentEntry == directory_entry())
-      return;
-
-    ErrorOr<basic_file_status> status = State->CurrentEntry.status();
-    if (!status)
-      ec = status.getError();
-  }
 };
 
 namespace detail {
@@ -1256,8 +1278,15 @@ public:
     if (State->HasNoPushRequest)
       State->HasNoPushRequest = false;
     else {
-      ErrorOr<basic_file_status> status = State->Stack.top()->status();
-      if (status && is_directory(*status)) {
+      file_type type = State->Stack.top()->type();
+      if (type == file_type::symlink_file && Follow) {
+        // Resolve the symlink: is it a directory to recurse into?
+        ErrorOr<basic_file_status> status = State->Stack.top()->status();
+        if (status)
+          type = status->type();
+        // Otherwise broken symlink, and we'll continue.
+      }
+      if (type == file_type::directory_file) {
         State->Stack.push(directory_iterator(*State->Stack.top(), ec, Follow));
         if (State->Stack.top() != end_itr) {
           ++State->Level;
@@ -1321,8 +1350,6 @@ public:
   bool operator!=(const recursive_directory_iterator &RHS) const {
     return !(*this == RHS);
   }
-  // Other members as required by
-  // C++ Std, 24.1.1 Input iterators [input.iterators]
 };
 
 /// @}
diff --git a/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h b/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
index 56dda430efda..e8bd90f50941 100644
--- a/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -21,6 +21,8 @@ class Error;
 
 namespace detail {
 class format_adapter {
+  virtual void anchor();
+
 protected:
   virtual ~format_adapter() {}
 
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h
index c716e4a4d300..b3018bac310a 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@@ -24,6 +24,14 @@
 #ifndef LLVM_SUPPORT_GENERICDOMTREE_H
 #define LLVM_SUPPORT_GENERICDOMTREE_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CFGUpdate.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -32,13 +40,6 @@
 #include <type_traits>
 #include <utility>
 #include <vector>
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
@@ -192,6 +193,10 @@ template <typename DomTreeT>
 void Calculate(DomTreeT &DT);
 
 template <typename DomTreeT>
+void CalculateWithUpdates(DomTreeT &DT,
+                          ArrayRef<typename DomTreeT::UpdateType> Updates);
+
+template <typename DomTreeT>
 void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
                 typename DomTreeT::NodePtr To);
 
@@ -199,36 +204,6 @@ template <typename DomTreeT>
 void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
                 typename DomTreeT::NodePtr To);
 
-// UpdateKind and Update are used by the batch update API and it's easiest to
-// define them here.
-enum class UpdateKind : unsigned char { Insert, Delete };
-
-template <typename NodePtr>
-struct Update {
-  using NodeKindPair = PointerIntPair<NodePtr, 1, UpdateKind>;
-
-  NodePtr From;
-  NodeKindPair ToAndKind;
-
-  Update(UpdateKind Kind, NodePtr From, NodePtr To)
-      : From(From), ToAndKind(To, Kind) {}
-
-  UpdateKind getKind() const { return ToAndKind.getInt(); }
-  NodePtr getFrom() const { return From; }
-  NodePtr getTo() const { return ToAndKind.getPointer(); }
-  bool operator==(const Update &RHS) const {
-    return From == RHS.From && ToAndKind == RHS.ToAndKind;
-  }
-
-  friend raw_ostream &operator<<(raw_ostream &OS, const Update &U) {
-    OS << (U.getKind() == UpdateKind::Insert ? "Insert " : "Delete ");
-    U.getFrom()->printAsOperand(OS, false);
-    OS << " -> ";
-    U.getTo()->printAsOperand(OS, false);
-    return OS;
-  }
-};
-
 template <typename DomTreeT>
 void ApplyUpdates(DomTreeT &DT,
                   ArrayRef<typename DomTreeT::UpdateType> Updates);
@@ -254,8 +229,8 @@ class DominatorTreeBase {
   using ParentType = typename std::remove_pointer<ParentPtr>::type;
   static constexpr bool IsPostDominator = IsPostDom;
 
-  using UpdateType = DomTreeBuilder::Update<NodePtr>;
-  using UpdateKind = DomTreeBuilder::UpdateKind;
+  using UpdateType = cfg::Update<NodePtr>;
+  using UpdateKind = cfg::UpdateKind;
   static constexpr UpdateKind Insert = UpdateKind::Insert;
   static constexpr UpdateKind Delete = UpdateKind::Delete;
 
@@ -759,6 +734,11 @@ public:
     DomTreeBuilder::Calculate(*this);
   }
 
+  void recalculate(ParentType &Func, ArrayRef<UpdateType> Updates) {
+    Parent = &Func;
+    DomTreeBuilder::CalculateWithUpdates(*this, Updates);
+  }
+
   /// verify - checks if the tree is correct. There are 3 level of verification:
   ///  - Full --  verifies if the tree is correct by making sure all the
   ///             properties (including the parent and the sibling property)
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 977f209f92b3..971e8305a112 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -71,6 +71,7 @@ struct SemiNCAInfo {
   DenseMap<NodePtr, InfoRec> NodeToInfo;
 
   using UpdateT = typename DomTreeT::UpdateType;
+  using UpdateKind = typename DomTreeT::UpdateKind;
   struct BatchUpdateInfo {
     SmallVector<UpdateT, 4> Updates;
     using NodePtrAndKind = PointerIntPair<NodePtr, 1, UpdateKind>;
@@ -1166,7 +1167,8 @@ struct SemiNCAInfo {
     }
 
     BatchUpdateInfo BUI;
-    LegalizeUpdates(Updates, BUI.Updates);
+    LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n");
+    cfg::LegalizeUpdates<NodePtr>(Updates, BUI.Updates, IsPostDom);
 
     const size_t NumLegalized = BUI.Updates.size();
     BUI.FutureSuccessors.reserve(NumLegalized);
@@ -1182,8 +1184,11 @@ struct SemiNCAInfo {
 
     LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n");
     LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U
-                                           : reverse(BUI.Updates)) dbgs()
-               << '\t' << U << "\n");
+                                           : reverse(BUI.Updates)) {
+      dbgs() << "\t";
+      U.dump();
+      dbgs() << "\n";
+    });
     LLVM_DEBUG(dbgs() << "\n");
 
     // Recalculate the DominatorTree when the number of updates
@@ -1207,76 +1212,11 @@ struct SemiNCAInfo {
       ApplyNextUpdate(DT, BUI);
   }
 
-  // This function serves double purpose:
-  // a) It removes redundant updates, which makes it easier to reverse-apply
-  //    them when traversing CFG.
-  // b) It optimizes away updates that cancel each other out, as the end result
-  //    is the same.
-  //
-  // It relies on the property of the incremental updates that says that the
-  // order of updates doesn't matter. This allows us to reorder them and end up
-  // with the exact same DomTree every time.
-  //
-  // Following the same logic, the function doesn't care about the order of
-  // input updates, so it's OK to pass it an unordered sequence of updates, that
-  // doesn't make sense when applied sequentially, eg. performing double
-  // insertions or deletions and then doing an opposite update.
-  //
-  // In the future, it should be possible to schedule updates in way that
-  // minimizes the amount of work needed done during incremental updates.
-  static void LegalizeUpdates(ArrayRef<UpdateT> AllUpdates,
-                              SmallVectorImpl<UpdateT> &Result) {
-    LLVM_DEBUG(dbgs() << "Legalizing " << AllUpdates.size() << " updates\n");
-    // Count the total number of inserions of each edge.
-    // Each insertion adds 1 and deletion subtracts 1. The end number should be
-    // one of {-1 (deletion), 0 (NOP), +1 (insertion)}. Otherwise, the sequence
-    // of updates contains multiple updates of the same kind and we assert for
-    // that case.
-    SmallDenseMap<std::pair<NodePtr, NodePtr>, int, 4> Operations;
-    Operations.reserve(AllUpdates.size());
-
-    for (const auto &U : AllUpdates) {
-      NodePtr From = U.getFrom();
-      NodePtr To = U.getTo();
-      if (IsPostDom) std::swap(From, To);  // Reverse edge for postdominators.
-
-      Operations[{From, To}] += (U.getKind() == UpdateKind::Insert ? 1 : -1);
-    }
-
-    Result.clear();
-    Result.reserve(Operations.size());
-    for (auto &Op : Operations) {
-      const int NumInsertions = Op.second;
-      assert(std::abs(NumInsertions) <= 1 && "Unbalanced operations!");
-      if (NumInsertions == 0) continue;
-      const UpdateKind UK =
-          NumInsertions > 0 ? UpdateKind::Insert : UpdateKind::Delete;
-      Result.push_back({UK, Op.first.first, Op.first.second});
-    }
-
-    // Make the order consistent by not relying on pointer values within the
-    // set. Reuse the old Operations map.
-    // In the future, we should sort by something else to minimize the amount
-    // of work needed to perform the series of updates.
-    for (size_t i = 0, e = AllUpdates.size(); i != e; ++i) {
-      const auto &U = AllUpdates[i];
-      if (!IsPostDom)
-        Operations[{U.getFrom(), U.getTo()}] = int(i);
-      else
-        Operations[{U.getTo(), U.getFrom()}] = int(i);
-    }
-
-    llvm::sort(Result.begin(), Result.end(),
-               [&Operations](const UpdateT &A, const UpdateT &B) {
-                 return Operations[{A.getFrom(), A.getTo()}] >
-                        Operations[{B.getFrom(), B.getTo()}];
-               });
-  }
-
   static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) {
     assert(!BUI.Updates.empty() && "No updates to apply!");
     UpdateT CurrentUpdate = BUI.Updates.pop_back_val();
-    LLVM_DEBUG(dbgs() << "Applying update: " << CurrentUpdate << "\n");
+    LLVM_DEBUG(dbgs() << "Applying update: ");
+    LLVM_DEBUG(CurrentUpdate.dump(); dbgs() << "\n");
 
     // Move to the next snapshot of the CFG by removing the reverse-applied
     // current update. Since updates are performed in the same order they are
@@ -1460,10 +1400,9 @@ struct SemiNCAInfo {
       // Make a copy and sort it such that it is possible to check if there are
       // no gaps between DFS numbers of adjacent children.
       SmallVector<TreeNodePtr, 8> Children(Node->begin(), Node->end());
-      llvm::sort(Children.begin(), Children.end(),
-                 [](const TreeNodePtr Ch1, const TreeNodePtr Ch2) {
-                   return Ch1->getDFSNumIn() < Ch2->getDFSNumIn();
-                 });
+      llvm::sort(Children, [](const TreeNodePtr Ch1, const TreeNodePtr Ch2) {
+        return Ch1->getDFSNumIn() < Ch2->getDFSNumIn();
+      });
 
       auto PrintChildrenError = [Node, &Children, PrintNodeAndDFSNums](
           const TreeNodePtr FirstCh, const TreeNodePtr SecondCh) {
@@ -1650,6 +1589,25 @@ void Calculate(DomTreeT &DT) {
   SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, nullptr);
 }
 
+template <typename DomTreeT>
+void CalculateWithUpdates(DomTreeT &DT,
+                          ArrayRef<typename DomTreeT::UpdateType> Updates) {
+  // TODO: Move BUI creation in common method, reuse in ApplyUpdates.
+  typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI;
+  LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n");
+  cfg::LegalizeUpdates<typename DomTreeT::NodePtr>(Updates, BUI.Updates,
+                                                   DomTreeT::IsPostDominator);
+  const size_t NumLegalized = BUI.Updates.size();
+  BUI.FutureSuccessors.reserve(NumLegalized);
+  BUI.FuturePredecessors.reserve(NumLegalized);
+  for (auto &U : BUI.Updates) {
+    BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()});
+    BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()});
+  }
+
+  SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, &BUI);
+}
+
 template <class DomTreeT>
 void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
                 typename DomTreeT::NodePtr To) {
diff --git a/contrib/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm/include/llvm/Support/GraphWriter.h
index c9a9f409c522..02d98bec16e2 100644
--- a/contrib/llvm/include/llvm/Support/GraphWriter.h
+++ b/contrib/llvm/include/llvm/Support/GraphWriter.h
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstddef>
@@ -320,14 +321,32 @@ raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G,
 
 std::string createGraphFilename(const Twine &Name, int &FD);
 
+/// Writes graph into a provided {@code Filename}.
+/// If {@code Filename} is empty, generates a random one.
+/// \return The resulting filename, or an empty string if writing
+/// failed.
 template <typename GraphType>
 std::string WriteGraph(const GraphType &G, const Twine &Name,
-                       bool ShortNames = false, const Twine &Title = "") {
+                       bool ShortNames = false,
+                       const Twine &Title = "",
+                       std::string Filename = "") {
   int FD;
   // Windows can't always handle long paths, so limit the length of the name.
   std::string N = Name.str();
   N = N.substr(0, std::min<std::size_t>(N.size(), 140));
-  std::string Filename = createGraphFilename(N, FD);
+  if (Filename.empty()) {
+    Filename = createGraphFilename(N, FD);
+  } else {
+    std::error_code EC = sys::fs::openFileForWrite(Filename, FD);
+
+    // Writing over an existing file is not considered an error.
+    if (EC == std::errc::file_exists) {
+      errs() << "file exists, overwriting" << "\n";
+    } else if (EC) {
+      errs() << "error writing into file" << "\n";
+      return "";
+    }
+  }
   raw_fd_ostream O(FD, /*shouldClose=*/ true);
 
   if (FD == -1) {
diff --git a/contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h b/contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
new file mode 100644
index 000000000000..34eb9f7deaaf
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
@@ -0,0 +1,93 @@
+//===--- ItaniumManglingCanonicalizer.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a class for computing equivalence classes of mangled names
+// given a set of equivalences between name fragments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+#define LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstddef>
+
+namespace llvm {
+/// Canonicalizer for mangled names.
+///
+/// This class allows specifying a list of "equivalent" manglings. For example,
+/// you can specify that Ss is equivalent to
+///   NSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE
+/// and then manglings that refer to libstdc++'s 'std::string' will be
+/// considered equivalent to manglings that are the same except that they refer
+/// to libc++'s 'std::string'.
+///
+/// This can be used when data (eg, profiling data) is available for a version
+/// of a program built in a different configuration, with correspondingly
+/// different manglings.
+class ItaniumManglingCanonicalizer {
+public:
+  ItaniumManglingCanonicalizer();
+  ItaniumManglingCanonicalizer(const ItaniumManglingCanonicalizer &) = delete;
+  void operator=(const ItaniumManglingCanonicalizer &) = delete;
+  ~ItaniumManglingCanonicalizer();
+
+  enum class EquivalenceError {
+    Success,
+
+    /// Both the equivalent manglings have already been used as components of
+    /// some other mangling we've looked at. It's too late to add this
+    /// equivalence.
+    ManglingAlreadyUsed,
+
+    /// The first equivalent mangling is invalid.
+    InvalidFirstMangling,
+
+    /// The second equivalent mangling is invalid.
+    InvalidSecondMangling,
+  };
+
+  enum class FragmentKind {
+    /// The mangling fragment is a <name> (or a predefined <substitution>).
+    Name,
+    /// The mangling fragment is a <type>.
+    Type,
+    /// The mangling fragment is an <encoding>.
+    Encoding,
+  };
+
+  /// Add an equivalence between \p First and \p Second. Both manglings must
+  /// live at least as long as the canonicalizer.
+  EquivalenceError addEquivalence(FragmentKind Kind, StringRef First,
+                                  StringRef Second);
+
+  using Key = uintptr_t;
+
+  /// Form a canonical key for the specified mangling. They key will be the
+  /// same for all equivalent manglings, and different for any two
+  /// non-equivalent manglings, but is otherwise unspecified.
+  ///
+  /// Returns Key() if (and only if) the mangling is not a valid Itanium C++
+  /// ABI mangling.
+  ///
+  /// The string denoted by Mangling must live as long as the canonicalizer.
+  Key canonicalize(StringRef Mangling);
+
+  /// Find a canonical key for the specified mangling, if one has already been
+  /// formed. Otherwise returns Key().
+  Key lookup(StringRef Mangling);
+
+private:
+  struct Impl;
+  Impl *P;
+};
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
diff --git a/contrib/llvm/include/llvm/Support/JSON.h b/contrib/llvm/include/llvm/Support/JSON.h
index da3c5ea0b25d..7a04fd52bc50 100644
--- a/contrib/llvm/include/llvm/Support/JSON.h
+++ b/contrib/llvm/include/llvm/Support/JSON.h
@@ -294,9 +294,13 @@ public:
   Value(json::Array &&Elements) : Type(T_Array) {
     create<json::Array>(std::move(Elements));
   }
+  template <typename Elt>
+  Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
   Value(json::Object &&Properties) : Type(T_Object) {
     create<json::Object>(std::move(Properties));
   }
+  template <typename Elt>
+  Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
   // Strings: types with value semantics. Must be valid UTF-8.
   Value(std::string V) : Type(T_String) {
     if (LLVM_UNLIKELY(!isUTF8(V))) {
@@ -452,7 +456,10 @@ private:
     new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...);
   }
   template <typename T> T &as() const {
-    return *reinterpret_cast<T *>(Union.buffer);
+    // Using this two-step static_cast via void * instead of reinterpret_cast
+    // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
+    void *Storage = static_cast<void *>(Union.buffer);
+    return *static_cast<T *>(Storage);
   }
 
   template <typename Indenter>
diff --git a/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h b/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h
index a0a5a52d206e..2a1075c9a48d 100644
--- a/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h
+++ b/contrib/llvm/include/llvm/Support/LowLevelTypeImpl.h
@@ -147,6 +147,7 @@ public:
   bool operator!=(const LLT &RHS) const { return !(*this == RHS); }
 
   friend struct DenseMapInfo<LLT>;
+  friend class GISelInstProfileBuilder;
 
 private:
   /// LLT is packed into 64 bits as follows:
@@ -231,6 +232,11 @@ private:
             maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo);
     }
   }
+
+  uint64_t getUniqueRAWLLTData() const {
+    return ((uint64_t)RawData) << 2 | ((uint64_t)IsPointer) << 1 |
+           ((uint64_t)IsVector);
+  }
 };
 
 inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
@@ -250,8 +256,7 @@ template<> struct DenseMapInfo<LLT> {
     return Invalid;
   }
   static inline unsigned getHashValue(const LLT &Ty) {
-    uint64_t Val = ((uint64_t)Ty.RawData) << 2 | ((uint64_t)Ty.IsPointer) << 1 |
-                   ((uint64_t)Ty.IsVector);
+    uint64_t Val = Ty.getUniqueRAWLLTData();
     return DenseMapInfo<uint64_t>::getHashValue(Val);
   }
   static bool isEqual(const LLT &LHS, const LLT &RHS) {
diff --git a/contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h b/contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h
new file mode 100644
index 000000000000..053ecf64d1e9
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/MSVCErrorWorkarounds.h
@@ -0,0 +1,84 @@
+//===--- MSVCErrorWorkarounds.h - Enable future<Error> in MSVC --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MSVC's promise/future implementation requires types to be default
+// constructible, so this header provides analogues of Error an Expected
+// that are default constructed in a safely destructible state.
+//
+// FIXME: Kill off this header and migrate all users to Error/Expected once we
+//        move to MSVC versions that support non-default-constructible types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSVCERRORWORKAROUNDS_H
+#define LLVM_SUPPORT_MSVCERRORWORKAROUNDS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+// A default-constructible llvm::Error that is suitable for use with MSVC's
+// std::future implementation which requires default constructible types.
+class MSVCPError : public Error {
+public:
+  MSVCPError() { (void)!!*this; }
+
+  MSVCPError(MSVCPError &&Other) : Error(std::move(Other)) {}
+
+  MSVCPError &operator=(MSVCPError Other) {
+    Error::operator=(std::move(Other));
+    return *this;
+  }
+
+  MSVCPError(Error Err) : Error(std::move(Err)) {}
+};
+
+// A default-constructible llvm::Expected that is suitable for use with MSVC's
+// std::future implementation, which requires default constructible types.
+template <typename T> class MSVCPExpected : public Expected<T> {
+public:
+  MSVCPExpected()
+      : Expected<T>(make_error<StringError>("", inconvertibleErrorCode())) {
+    consumeError(this->takeError());
+  }
+
+  MSVCPExpected(MSVCPExpected &&Other) : Expected<T>(std::move(Other)) {}
+
+  MSVCPExpected &operator=(MSVCPExpected &&Other) {
+    Expected<T>::operator=(std::move(Other));
+    return *this;
+  }
+
+  MSVCPExpected(Error Err) : Expected<T>(std::move(Err)) {}
+
+  template <typename OtherT>
+  MSVCPExpected(
+      OtherT &&Val,
+      typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
+          nullptr)
+      : Expected<T>(std::move(Val)) {}
+
+  template <class OtherT>
+  MSVCPExpected(
+      Expected<OtherT> &&Other,
+      typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
+          nullptr)
+      : Expected<T>(std::move(Other)) {}
+
+  template <class OtherT>
+  explicit MSVCPExpected(
+      Expected<OtherT> &&Other,
+      typename std::enable_if<!std::is_convertible<OtherT, T>::value>::type * =
+          nullptr)
+      : Expected<T>(std::move(Other)) {}
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_MSVCERRORWORKAROUNDS_H
diff --git a/contrib/llvm/include/llvm/Support/Path.h b/contrib/llvm/include/llvm/Support/Path.h
index c4cc93721d7e..76de887b7cb4 100644
--- a/contrib/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm/include/llvm/Support/Path.h
@@ -361,22 +361,6 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result);
 /// @result True if a home directory is set, false otherwise.
 bool home_directory(SmallVectorImpl<char> &result);
 
-/// Get the user's cache directory.
-///
-/// Expect the resulting path to be a directory shared with other
-/// applications/services used by the user. Params \p Path1 to \p Path3 can be
-/// used to append additional directory names to the resulting path. Recommended
-/// pattern is <user_cache_directory>/<vendor>/<application>.
-///
-/// @param Result Holds the resulting path.
-/// @param Path1 Additional path to be appended to the user's cache directory
-/// path. "" can be used to append nothing.
-/// @param Path2 Second additional path to be appended.
-/// @param Path3 Third additional path to be appended.
-/// @result True if a cache directory path is set, false otherwise.
-bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
-                          const Twine &Path2 = "", const Twine &Path3 = "");
-
 /// Has root name?
 ///
 /// root_name != ""
diff --git a/contrib/llvm/include/llvm/Support/ScopedPrinter.h b/contrib/llvm/include/llvm/Support/ScopedPrinter.h
index 062439b4f7db..34c1a287ee10 100644
--- a/contrib/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/contrib/llvm/include/llvm/Support/ScopedPrinter.h
@@ -138,7 +138,7 @@ public:
       }
     }
 
-    llvm::sort(SetFlags.begin(), SetFlags.end(), &flagName<TFlag>);
+    llvm::sort(SetFlags, &flagName<TFlag>);
 
     startLine() << Label << " [ (" << hex(Value) << ")\n";
     for (const auto &Flag : SetFlags) {
diff --git a/contrib/llvm/include/llvm/Support/SymbolRemappingReader.h b/contrib/llvm/include/llvm/Support/SymbolRemappingReader.h
new file mode 100644
index 000000000000..b457b9e817e4
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/SymbolRemappingReader.h
@@ -0,0 +1,133 @@
+//===- SymbolRemappingReader.h - Read symbol remapping file -----*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions needed for reading and applying symbol
+// remapping files.
+//
+// Support is provided only for the Itanium C++ name mangling scheme for now.
+//
+// NOTE: If you are making changes to this file format, please remember
+//       to document them in the Clang documentation at
+//       tools/clang/docs/UsersManual.rst.
+//
+// File format
+// -----------
+//
+// The symbol remappings are written as an ASCII text file. Blank lines and
+// lines starting with a # are ignored. All other lines specify a kind of
+// mangled name fragment, along with two fragments of that kind that should
+// be treated as equivalent, separated by spaces.
+//
+// See http://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling for a
+// description of the Itanium name mangling scheme.
+//
+// The accepted fragment kinds are:
+//
+//  * name  A <name>, such as 6foobar or St3__1
+//  * type  A <type>, such as Ss or N4llvm9StringRefE
+//  * encoding  An <encoding> (a complete mangling without the leading _Z)
+//
+// For example:
+//
+// # Ignore int / long differences to treat symbols from 32-bit and 64-bit
+// # builds with differing size_t / ptrdiff_t / intptr_t as equivalent.
+// type i l
+// type j m
+//
+// # Ignore differences between libc++ and libstdc++, and between libstdc++'s
+// # C++98 and C++11 ABIs.
+// name 3std St3__1
+// name 3std St7__cxx11
+//
+// # Remap a function overload to a specialization of a template (including
+// # any local symbols declared within it).
+// encoding N2NS1fEi N2NS1fIiEEvT_
+//
+// # Substitutions must be remapped separately from namespace 'std' for now.
+// name Sa NSt3__19allocatorE
+// name Sb NSt3__112basic_stringE
+// type Ss NSt3__112basic_stringIcSt11char_traitsIcESaE
+// # ...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
+#define LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+class SymbolRemappingParseError : public ErrorInfo<SymbolRemappingParseError> {
+public:
+  SymbolRemappingParseError(StringRef File, int64_t Line, Twine Message)
+      : File(File), Line(Line), Message(Message.str()) {}
+
+  void log(llvm::raw_ostream &OS) const override {
+    OS << File << ':' << Line << ": " << Message;
+  }
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+
+  StringRef getFileName() const { return File; }
+  int64_t getLineNum() const { return Line; }
+  StringRef getMessage() const { return Message; }
+
+  static char ID;
+
+private:
+  std::string File;
+  int64_t Line;
+  std::string Message;
+};
+
+/// Reader for symbol remapping files.
+///
+/// Remaps the symbol names in profile data to match those in the program
+/// according to a set of rules specified in a given file.
+class SymbolRemappingReader {
+public:
+  /// Read remappings from the given buffer, which must live as long as
+  /// the remapper.
+  Error read(MemoryBuffer &B);
+
+  /// A Key represents an equivalence class of symbol names.
+  using Key = uintptr_t;
+
+  /// Construct a key for the given symbol, or return an existing one if an
+  /// equivalent name has already been inserted. The symbol name must live
+  /// as long as the remapper.
+  ///
+  /// The result will be Key() if the name cannot be remapped (typically
+  /// because it is not a valid mangled name).
+  Key insert(StringRef FunctionName) {
+    return Canonicalizer.canonicalize(FunctionName);
+  }
+
+  /// Map the given symbol name into the key for the corresponding equivalence
+  /// class.
+  ///
+  /// The result will typically be Key() if no equivalent symbol has been
+  /// inserted, but this is not guaranteed: a Key different from all keys ever
+  /// returned by \c insert may be returned instead.
+  Key lookup(StringRef FunctionName) {
+    return Canonicalizer.lookup(FunctionName);
+  }
+
+private:
+  ItaniumManglingCanonicalizer Canonicalizer;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
diff --git a/contrib/llvm/include/llvm/Support/TargetOpcodes.def b/contrib/llvm/include/llvm/Support/TargetOpcodes.def
index 63491a5f01d2..3e8193a5cdcf 100644
--- a/contrib/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/contrib/llvm/include/llvm/Support/TargetOpcodes.def
@@ -258,6 +258,17 @@ HANDLE_TARGET_OPCODE(G_INSERT)
 /// larger register.
 HANDLE_TARGET_OPCODE(G_MERGE_VALUES)
 
+/// Generic instruction to create a vector value from a number of scalar
+/// components.
+HANDLE_TARGET_OPCODE(G_BUILD_VECTOR)
+
+/// Generic instruction to create a vector value from a number of scalar
+/// components, which have types larger than the result vector elt type.
+HANDLE_TARGET_OPCODE(G_BUILD_VECTOR_TRUNC)
+
+/// Generic instruction to create a vector by concatenating multiple vectors.
+HANDLE_TARGET_OPCODE(G_CONCAT_VECTORS)
+
 /// Generic pointer to int conversion.
 HANDLE_TARGET_OPCODE(G_PTRTOINT)
 
@@ -268,6 +279,12 @@ HANDLE_TARGET_OPCODE(G_INTTOPTR)
 /// COPY is the relevant instruction.
 HANDLE_TARGET_OPCODE(G_BITCAST)
 
+/// INTRINSIC trunc intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
+
+/// INTRINSIC round intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND)
+
 /// Generic load (including anyext load)
 HANDLE_TARGET_OPCODE(G_LOAD)
 
@@ -356,10 +373,18 @@ HANDLE_TARGET_OPCODE(G_FCMP)
 /// Generic select.
 HANDLE_TARGET_OPCODE(G_SELECT)
 
+/// Generic unsigned add instruction, consuming the normal operands and
+/// producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_UADDO)
+
 /// Generic unsigned add instruction, consuming the normal operands plus a carry
 /// flag, and similarly producing the result and a carry flag.
 HANDLE_TARGET_OPCODE(G_UADDE)
 
+/// Generic unsigned sub instruction, consuming the normal operands and
+/// producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_USUBO)
+
 /// Generic unsigned subtract instruction, consuming the normal operands plus a
 /// carry flag, and similarly producing the result and a carry flag.
 HANDLE_TARGET_OPCODE(G_USUBE)
@@ -368,10 +393,18 @@ HANDLE_TARGET_OPCODE(G_USUBE)
 /// flag.
 HANDLE_TARGET_OPCODE(G_SADDO)
 
+/// Generic signed add instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_SADDE)
+
 /// Generic signed subtract instruction, producing the result and a signed
 /// overflow flag.
 HANDLE_TARGET_OPCODE(G_SSUBO)
 
+/// Generic signed sub instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_SSUBE)
+
 /// Generic unsigned multiply instruction, producing the result and a signed
 /// overflow flag.
 HANDLE_TARGET_OPCODE(G_UMULO)
@@ -421,6 +454,9 @@ HANDLE_TARGET_OPCODE(G_FLOG)
 /// Floating point base-2 logarithm of a value.
 HANDLE_TARGET_OPCODE(G_FLOG2)
 
+/// Floating point base-10 logarithm of a value.
+HANDLE_TARGET_OPCODE(G_FLOG10)
+
 /// Generic FP negation.
 HANDLE_TARGET_OPCODE(G_FNEG)
 
@@ -464,9 +500,27 @@ HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT)
 /// Generic shufflevector.
 HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR)
 
+/// Generic count trailing zeroes.
+HANDLE_TARGET_OPCODE(G_CTTZ)
+
+/// Same as above, undefined for zero inputs.
+HANDLE_TARGET_OPCODE(G_CTTZ_ZERO_UNDEF)
+
+/// Generic count leading zeroes.
+HANDLE_TARGET_OPCODE(G_CTLZ)
+
+/// Same as above, undefined for zero inputs.
+HANDLE_TARGET_OPCODE(G_CTLZ_ZERO_UNDEF)
+
+/// Generic count bits.
+HANDLE_TARGET_OPCODE(G_CTPOP)
+
 /// Generic byte swap.
 HANDLE_TARGET_OPCODE(G_BSWAP)
 
+/// Floating point ceil.
+HANDLE_TARGET_OPCODE(G_FCEIL)
+
 /// Generic AddressSpaceCast.
 HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST)
 
diff --git a/contrib/llvm/include/llvm/Support/TargetParser.h b/contrib/llvm/include/llvm/Support/TargetParser.h
index 08ad42dda3eb..ace11ed410a3 100644
--- a/contrib/llvm/include/llvm/Support/TargetParser.h
+++ b/contrib/llvm/include/llvm/Support/TargetParser.h
@@ -18,211 +18,20 @@
 // FIXME: vector is used because that's what clang uses for subtarget feature
 // lists, but SmallVector would probably be better
 #include "llvm/ADT/Triple.h"
+#include "llvm/Support/ARMTargetParser.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include <vector>
 
 namespace llvm {
 class StringRef;
 
-// Target specific information into their own namespaces. These should be
-// generated from TableGen because the information is already there, and there
-// is where new information about targets will be added.
+// Target specific information in their own namespaces.
+// (ARM/AArch64 are declared in ARM/AArch64TargetParser.h)
+// These should be generated from TableGen because the information is already
+// there, and there is where new information about targets will be added.
 // FIXME: To TableGen this we need to make some table generated files available
 // even if the back-end is not compiled with LLVM, plus we need to create a new
 // back-end to TableGen to create these clean tables.
-namespace ARM {
-
-// FPU Version
-enum class FPUVersion {
-  NONE,
-  VFPV2,
-  VFPV3,
-  VFPV3_FP16,
-  VFPV4,
-  VFPV5
-};
-
-// An FPU name restricts the FPU in one of three ways:
-enum class FPURestriction {
-  None = 0, ///< No restriction
-  D16,      ///< Only 16 D registers
-  SP_D16    ///< Only single-precision instructions, with 16 D registers
-};
-
-// An FPU name implies one of three levels of Neon support:
-enum class NeonSupportLevel {
-  None = 0, ///< No Neon
-  Neon,     ///< Neon
-  Crypto    ///< Neon with Crypto
-};
-
-// FPU names.
-enum FPUKind {
-#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND,
-#include "ARMTargetParser.def"
-  FK_LAST
-};
-
-// Arch names.
-enum class ArchKind {
-#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
-#include "ARMTargetParser.def"
-};
-
-// Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
-  AEK_INVALID =     0,
-  AEK_NONE =        1,
-  AEK_CRC =         1 << 1,
-  AEK_CRYPTO =      1 << 2,
-  AEK_FP =          1 << 3,
-  AEK_HWDIVTHUMB =  1 << 4,
-  AEK_HWDIVARM =    1 << 5,
-  AEK_MP =          1 << 6,
-  AEK_SIMD =        1 << 7,
-  AEK_SEC =         1 << 8,
-  AEK_VIRT =        1 << 9,
-  AEK_DSP =         1 << 10,
-  AEK_FP16 =        1 << 11,
-  AEK_RAS =         1 << 12,
-  AEK_SVE =         1 << 13,
-  AEK_DOTPROD =     1 << 14,
-  AEK_SHA2    =     1 << 15,
-  AEK_AES     =     1 << 16,
-  // Unsupported extensions.
-  AEK_OS = 0x8000000,
-  AEK_IWMMXT = 0x10000000,
-  AEK_IWMMXT2 = 0x20000000,
-  AEK_MAVERICK = 0x40000000,
-  AEK_XSCALE = 0x80000000,
-};
-
-// ISA kinds.
-enum class ISAKind { INVALID = 0, ARM, THUMB, AARCH64 };
-
-// Endianness
-// FIXME: BE8 vs. BE32?
-enum class EndianKind { INVALID = 0, LITTLE, BIG };
-
-// v6/v7/v8 Profile
-enum class ProfileKind { INVALID = 0, A, R, M };
-
-StringRef getCanonicalArchName(StringRef Arch);
-
-// Information by ID
-StringRef getFPUName(unsigned FPUKind);
-FPUVersion getFPUVersion(unsigned FPUKind);
-NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
-FPURestriction getFPURestriction(unsigned FPUKind);
-
-// FIXME: These should be moved to TargetTuple once it exists
-bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
-bool getHWDivFeatures(unsigned HWDivKind, std::vector<StringRef> &Features);
-bool getExtensionFeatures(unsigned Extensions,
-                          std::vector<StringRef> &Features);
-
-StringRef getArchName(ArchKind AK);
-unsigned getArchAttr(ArchKind AK);
-StringRef getCPUAttr(ArchKind AK);
-StringRef getSubArch(ArchKind AK);
-StringRef getArchExtName(unsigned ArchExtKind);
-StringRef getArchExtFeature(StringRef ArchExt);
-StringRef getHWDivName(unsigned HWDivKind);
-
-// Information by Name
-unsigned  getDefaultFPU(StringRef CPU, ArchKind AK);
-unsigned  getDefaultExtensions(StringRef CPU, ArchKind AK);
-StringRef getDefaultCPU(StringRef Arch);
-
-// Parser
-unsigned parseHWDiv(StringRef HWDiv);
-unsigned parseFPU(StringRef FPU);
-ArchKind parseArch(StringRef Arch);
-unsigned parseArchExt(StringRef ArchExt);
-ArchKind parseCPUArch(StringRef CPU);
-void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
-ISAKind parseArchISA(StringRef Arch);
-EndianKind parseArchEndian(StringRef Arch);
-ProfileKind parseArchProfile(StringRef Arch);
-unsigned parseArchVersion(StringRef Arch);
-
-StringRef computeDefaultTargetABI(const Triple &TT, StringRef CPU);
-
-} // namespace ARM
-
-// FIXME:This should be made into class design,to avoid dupplication.
-namespace AArch64 {
-
-// Arch names.
-enum class ArchKind {
-#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
-#include "AArch64TargetParser.def"
-};
-
-// Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
-  AEK_INVALID =     0,
-  AEK_NONE =        1,
-  AEK_CRC =         1 << 1,
-  AEK_CRYPTO =      1 << 2,
-  AEK_FP =          1 << 3,
-  AEK_SIMD =        1 << 4,
-  AEK_FP16 =        1 << 5,
-  AEK_PROFILE =     1 << 6,
-  AEK_RAS =         1 << 7,
-  AEK_LSE =         1 << 8,
-  AEK_SVE =         1 << 9,
-  AEK_DOTPROD =     1 << 10,
-  AEK_RCPC =        1 << 11,
-  AEK_RDM =         1 << 12,
-  AEK_SM4 =         1 << 13,
-  AEK_SHA3 =        1 << 14,
-  AEK_SHA2 =        1 << 15,
-  AEK_AES =         1 << 16,
-};
-
-StringRef getCanonicalArchName(StringRef Arch);
-
-// Information by ID
-StringRef getFPUName(unsigned FPUKind);
-ARM::FPUVersion getFPUVersion(unsigned FPUKind);
-ARM::NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
-ARM::FPURestriction getFPURestriction(unsigned FPUKind);
-
-// FIXME: These should be moved to TargetTuple once it exists
-bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
-bool getExtensionFeatures(unsigned Extensions,
-                                   std::vector<StringRef> &Features);
-bool getArchFeatures(ArchKind AK, std::vector<StringRef> &Features);
-
-StringRef getArchName(ArchKind AK);
-unsigned getArchAttr(ArchKind AK);
-StringRef getCPUAttr(ArchKind AK);
-StringRef getSubArch(ArchKind AK);
-StringRef getArchExtName(unsigned ArchExtKind);
-StringRef getArchExtFeature(StringRef ArchExt);
-unsigned checkArchVersion(StringRef Arch);
-
-// Information by Name
-unsigned  getDefaultFPU(StringRef CPU, ArchKind AK);
-unsigned  getDefaultExtensions(StringRef CPU, ArchKind AK);
-StringRef getDefaultCPU(StringRef Arch);
-AArch64::ArchKind getCPUArchKind(StringRef CPU);
-
-// Parser
-unsigned parseFPU(StringRef FPU);
-AArch64::ArchKind parseArch(StringRef Arch);
-ArchExtKind parseArchExt(StringRef ArchExt);
-ArchKind parseCPUArch(StringRef CPU);
-void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
-ARM::ISAKind parseArchISA(StringRef Arch);
-ARM::EndianKind parseArchEndian(StringRef Arch);
-ARM::ProfileKind parseArchProfile(StringRef Arch);
-unsigned parseArchVersion(StringRef Arch);
-
-bool isX18ReservedByDefault(const Triple &TT);
-
-} // namespace AArch64
-
 namespace X86 {
 
 // This should be kept in sync with libcc/compiler-rt as its included by clang
@@ -266,6 +75,96 @@ enum ProcessorFeatures {
 
 } // namespace X86
 
+namespace AMDGPU {
+
+/// GPU kinds supported by the AMDGPU target.
+enum GPUKind : uint32_t {
+  // Not specified processor.
+  GK_NONE = 0,
+
+  // R600-based processors.
+  GK_R600 = 1,
+  GK_R630 = 2,
+  GK_RS880 = 3,
+  GK_RV670 = 4,
+  GK_RV710 = 5,
+  GK_RV730 = 6,
+  GK_RV770 = 7,
+  GK_CEDAR = 8,
+  GK_CYPRESS = 9,
+  GK_JUNIPER = 10,
+  GK_REDWOOD = 11,
+  GK_SUMO = 12,
+  GK_BARTS = 13,
+  GK_CAICOS = 14,
+  GK_CAYMAN = 15,
+  GK_TURKS = 16,
+
+  GK_R600_FIRST = GK_R600,
+  GK_R600_LAST = GK_TURKS,
+
+  // AMDGCN-based processors.
+  GK_GFX600 = 32,
+  GK_GFX601 = 33,
+
+  GK_GFX700 = 40,
+  GK_GFX701 = 41,
+  GK_GFX702 = 42,
+  GK_GFX703 = 43,
+  GK_GFX704 = 44,
+
+  GK_GFX801 = 50,
+  GK_GFX802 = 51,
+  GK_GFX803 = 52,
+  GK_GFX810 = 53,
+
+  GK_GFX900 = 60,
+  GK_GFX902 = 61,
+  GK_GFX904 = 62,
+  GK_GFX906 = 63,
+  GK_GFX909 = 65,
+
+  GK_AMDGCN_FIRST = GK_GFX600,
+  GK_AMDGCN_LAST = GK_GFX909,
+};
+
+/// Instruction set architecture version.
+struct IsaVersion {
+  unsigned Major;
+  unsigned Minor;
+  unsigned Stepping;
+};
+
+// This isn't comprehensive for now, just things that are needed from the
+// frontend driver.
+enum ArchFeatureKind : uint32_t {
+  FEATURE_NONE = 0,
+
+  // These features only exist for r600, and are implied true for amdgcn.
+  FEATURE_FMA = 1 << 1,
+  FEATURE_LDEXP = 1 << 2,
+  FEATURE_FP64 = 1 << 3,
+
+  // Common features.
+  FEATURE_FAST_FMA_F32 = 1 << 4,
+  FEATURE_FAST_DENORMAL_F32 = 1 << 5
+};
+
+StringRef getArchNameAMDGCN(GPUKind AK);
+StringRef getArchNameR600(GPUKind AK);
+StringRef getCanonicalArchName(StringRef Arch);
+GPUKind parseArchAMDGCN(StringRef CPU);
+GPUKind parseArchR600(StringRef CPU);
+unsigned getArchAttrAMDGCN(GPUKind AK);
+unsigned getArchAttrR600(GPUKind AK);
+
+void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
+void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
+
+IsaVersion getIsaVersion(StringRef GPU);
+
+} // namespace AMDGPU
+
 } // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Threading.h b/contrib/llvm/include/llvm/Support/Threading.h
index e8021f648b0d..ba7ece5e72ba 100644
--- a/contrib/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm/include/llvm/Support/Threading.h
@@ -27,7 +27,8 @@
 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
 #elif defined(LLVM_ON_UNIX) &&                                                 \
     (defined(_LIBCPP_VERSION) ||                                               \
-     !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__ppc__)))
+     !(defined(__NetBSD__) || defined(__OpenBSD__) ||                          \
+       (defined(__ppc__) || defined(__PPC__))))
 // std::call_once from libc++ is used on all Unix platforms. Other
 // implementations like libstdc++ are known to have problems on NetBSD,
 // OpenBSD and PowerPC.
diff --git a/contrib/llvm/include/llvm/Support/Timer.h b/contrib/llvm/include/llvm/Support/Timer.h
index bfffbc3157b1..a11c3ce3ff22 100644
--- a/contrib/llvm/include/llvm/Support/Timer.h
+++ b/contrib/llvm/include/llvm/Support/Timer.h
@@ -206,15 +206,23 @@ public:
     Description.assign(NewDescription.begin(), NewDescription.end());
   }
 
-  /// Print any started timers in this group and zero them.
+  /// Print any started timers in this group.
   void print(raw_ostream &OS);
 
-  /// This static method prints all timers and clears them all out.
+  /// Clear all timers in this group.
+  void clear();
+
+  /// This static method prints all timers.
   static void printAll(raw_ostream &OS);
 
+  /// Clear out all timers. This is mostly used to disable automatic
+  /// printing on shutdown, when timers have already been printed explicitly
+  /// using \c printAll or \c printJSONValues.
+  static void clearAll();
+
   const char *printJSONValues(raw_ostream &OS, const char *delim);
 
-  /// Prints all timers as JSON key/value pairs, and clears them all out.
+  /// Prints all timers as JSON key/value pairs.
   static const char *printAllJSONValues(raw_ostream &OS, const char *delim);
 
   /// Ensure global timer group lists are initialized. This function is mostly
diff --git a/contrib/llvm/include/llvm/Support/VirtualFileSystem.h b/contrib/llvm/include/llvm/Support/VirtualFileSystem.h
new file mode 100644
index 000000000000..61c3d2f46e9c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -0,0 +1,764 @@
+//===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines the virtual file system interface vfs::FileSystem.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
+#define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include <cassert>
+#include <cstdint>
+#include <ctime>
+#include <memory>
+#include <stack>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace vfs {
+
+/// The result of a \p status operation.
+class Status {
+  std::string Name;
+  llvm::sys::fs::UniqueID UID;
+  llvm::sys::TimePoint<> MTime;
+  uint32_t User;
+  uint32_t Group;
+  uint64_t Size;
+  llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
+  llvm::sys::fs::perms Perms;
+
+public:
+  // FIXME: remove when files support multiple names
+  bool IsVFSMapped = false;
+
+  Status() = default;
+  Status(const llvm::sys::fs::file_status &Status);
+  Status(StringRef Name, llvm::sys::fs::UniqueID UID,
+         llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
+         uint64_t Size, llvm::sys::fs::file_type Type,
+         llvm::sys::fs::perms Perms);
+
+  /// Get a copy of a Status with a different name.
+  static Status copyWithNewName(const Status &In, StringRef NewName);
+  static Status copyWithNewName(const llvm::sys::fs::file_status &In,
+                                StringRef NewName);
+
+  /// Returns the name that should be used for this file or directory.
+  StringRef getName() const { return Name; }
+
+  /// @name Status interface from llvm::sys::fs
+  /// @{
+  llvm::sys::fs::file_type getType() const { return Type; }
+  llvm::sys::fs::perms getPermissions() const { return Perms; }
+  llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
+  llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
+  uint32_t getUser() const { return User; }
+  uint32_t getGroup() const { return Group; }
+  uint64_t getSize() const { return Size; }
+  /// @}
+  /// @name Status queries
+  /// These are static queries in llvm::sys::fs.
+  /// @{
+  bool equivalent(const Status &Other) const;
+  bool isDirectory() const;
+  bool isRegularFile() const;
+  bool isOther() const;
+  bool isSymlink() const;
+  bool isStatusKnown() const;
+  bool exists() const;
+  /// @}
+};
+
+/// Represents an open file.
+class File {
+public:
+  /// Destroy the file after closing it (if open).
+  /// Sub-classes should generally call close() inside their destructors.  We
+  /// cannot do that from the base class, since close is virtual.
+  virtual ~File();
+
+  /// Get the status of the file.
+  virtual llvm::ErrorOr<Status> status() = 0;
+
+  /// Get the name of the file
+  virtual llvm::ErrorOr<std::string> getName() {
+    if (auto Status = status())
+      return Status->getName().str();
+    else
+      return Status.getError();
+  }
+
+  /// Get the contents of the file as a \p MemoryBuffer.
+  virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize = -1,
+            bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
+
+  /// Closes the file.
+  virtual std::error_code close() = 0;
+};
+
+/// A member of a directory, yielded by a directory_iterator.
+/// Only information available on most platforms is included.
+class directory_entry {
+  std::string Path;
+  llvm::sys::fs::file_type Type;
+
+public:
+  directory_entry() = default;
+  directory_entry(std::string Path, llvm::sys::fs::file_type Type)
+      : Path(std::move(Path)), Type(Type) {}
+
+  llvm::StringRef path() const { return Path; }
+  llvm::sys::fs::file_type type() const { return Type; }
+};
+
+namespace detail {
+
+/// An interface for virtual file systems to provide an iterator over the
+/// (non-recursive) contents of a directory.
+struct DirIterImpl {
+  virtual ~DirIterImpl();
+
+  /// Sets \c CurrentEntry to the next entry in the directory on success,
+  /// to directory_entry() at end,  or returns a system-defined \c error_code.
+  virtual std::error_code increment() = 0;
+
+  directory_entry CurrentEntry;
+};
+
+} // namespace detail
+
+/// An input iterator over the entries in a virtual path, similar to
+/// llvm::sys::fs::directory_iterator.
+class directory_iterator {
+  std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
+
+public:
+  directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
+      : Impl(std::move(I)) {
+    assert(Impl.get() != nullptr && "requires non-null implementation");
+    if (Impl->CurrentEntry.path().empty())
+      Impl.reset(); // Normalize the end iterator to Impl == nullptr.
+  }
+
+  /// Construct an 'end' iterator.
+  directory_iterator() = default;
+
+  /// Equivalent to operator++, with an error code.
+  directory_iterator &increment(std::error_code &EC) {
+    assert(Impl && "attempting to increment past end");
+    EC = Impl->increment();
+    if (Impl->CurrentEntry.path().empty())
+      Impl.reset(); // Normalize the end iterator to Impl == nullptr.
+    return *this;
+  }
+
+  const directory_entry &operator*() const { return Impl->CurrentEntry; }
+  const directory_entry *operator->() const { return &Impl->CurrentEntry; }
+
+  bool operator==(const directory_iterator &RHS) const {
+    if (Impl && RHS.Impl)
+      return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
+    return !Impl && !RHS.Impl;
+  }
+  bool operator!=(const directory_iterator &RHS) const {
+    return !(*this == RHS);
+  }
+};
+
+class FileSystem;
+
+namespace detail {
+
+/// Keeps state for the recursive_directory_iterator.
+struct RecDirIterState {
+  std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
+  bool HasNoPushRequest = false;
+};
+
+} // end namespace detail
+
+/// An input iterator over the recursive contents of a virtual path,
+/// similar to llvm::sys::fs::recursive_directory_iterator.
+class recursive_directory_iterator {
+  FileSystem *FS;
+  std::shared_ptr<detail::RecDirIterState>
+      State; // Input iterator semantics on copy.
+
+public:
+  recursive_directory_iterator(FileSystem &FS, const Twine &Path,
+                               std::error_code &EC);
+
+  /// Construct an 'end' iterator.
+  recursive_directory_iterator() = default;
+
+  /// Equivalent to operator++, with an error code.
+  recursive_directory_iterator &increment(std::error_code &EC);
+
+  const directory_entry &operator*() const { return *State->Stack.top(); }
+  const directory_entry *operator->() const { return &*State->Stack.top(); }
+
+  bool operator==(const recursive_directory_iterator &Other) const {
+    return State == Other.State; // identity
+  }
+  bool operator!=(const recursive_directory_iterator &RHS) const {
+    return !(*this == RHS);
+  }
+
+  /// Gets the current level. Starting path is at level 0.
+  int level() const {
+    assert(!State->Stack.empty() &&
+           "Cannot get level without any iteration state");
+    return State->Stack.size() - 1;
+  }
+
+  void no_push() { State->HasNoPushRequest = true; }
+};
+
+/// The virtual file system interface.
+class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> {
+public:
+  virtual ~FileSystem();
+
+  /// Get the status of the entry at \p Path, if one exists.
+  virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
+
+  /// Get a \p File object for the file at \p Path, if one exists.
+  virtual llvm::ErrorOr<std::unique_ptr<File>>
+  openFileForRead(const Twine &Path) = 0;
+
+  /// This is a convenience method that opens a file, gets its content and then
+  /// closes the file.
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBufferForFile(const Twine &Name, int64_t FileSize = -1,
+                   bool RequiresNullTerminator = true, bool IsVolatile = false);
+
+  /// Get a directory_iterator for \p Dir.
+  /// \note The 'end' iterator is directory_iterator().
+  virtual directory_iterator dir_begin(const Twine &Dir,
+                                       std::error_code &EC) = 0;
+
+  /// Set the working directory. This will affect all following operations on
+  /// this file system and may propagate down for nested file systems.
+  virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
+
+  /// Get the working directory of this file system.
+  virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
+
+  /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
+  /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
+  /// This returns errc::operation_not_permitted if not implemented by subclass.
+  virtual std::error_code getRealPath(const Twine &Path,
+                                      SmallVectorImpl<char> &Output) const;
+
+  /// Check whether a file exists. Provided for convenience.
+  bool exists(const Twine &Path);
+
+  /// Is the file mounted on a local filesystem?
+  virtual std::error_code isLocal(const Twine &Path, bool &Result);
+
+  /// Make \a Path an absolute path.
+  ///
+  /// Makes \a Path absolute using the current directory if it is not already.
+  /// An empty \a Path will result in the current directory.
+  ///
+  /// /absolute/path   => /absolute/path
+  /// relative/../path => <current-directory>/relative/../path
+  ///
+  /// \param Path A path that is modified to be an absolute path.
+  /// \returns success if \a path has been made absolute, otherwise a
+  ///          platform-specific error_code.
+  std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
+};
+
+/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
+/// the operating system.
+IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
+
+/// A file system that allows overlaying one \p AbstractFileSystem on top
+/// of another.
+///
+/// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
+/// one merged file system. When there is a directory that exists in more than
+/// one file system, the \p OverlayFileSystem contains a directory containing
+/// the union of their contents.  The attributes (permissions, etc.) of the
+/// top-most (most recently added) directory are used.  When there is a file
+/// that exists in more than one file system, the file in the top-most file
+/// system overrides the other(s).
+class OverlayFileSystem : public FileSystem {
+  using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
+
+  /// The stack of file systems, implemented as a list in order of
+  /// their addition.
+  FileSystemList FSList;
+
+public:
+  OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
+
+  /// Pushes a file system on top of the stack.
+  void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
+
+  llvm::ErrorOr<Status> status(const Twine &Path) override;
+  llvm::ErrorOr<std::unique_ptr<File>>
+  openFileForRead(const Twine &Path) override;
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+  std::error_code isLocal(const Twine &Path, bool &Result) override;
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override;
+
+  using iterator = FileSystemList::reverse_iterator;
+  using const_iterator = FileSystemList::const_reverse_iterator;
+
+  /// Get an iterator pointing to the most recently added file system.
+  iterator overlays_begin() { return FSList.rbegin(); }
+  const_iterator overlays_begin() const { return FSList.rbegin(); }
+
+  /// Get an iterator pointing one-past the least recently added file
+  /// system.
+  iterator overlays_end() { return FSList.rend(); }
+  const_iterator overlays_end() const { return FSList.rend(); }
+};
+
+/// By default, this delegates all calls to the underlying file system. This
+/// is useful when derived file systems want to override some calls and still
+/// proxy other calls.
+class ProxyFileSystem : public FileSystem {
+public:
+  explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
+      : FS(std::move(FS)) {}
+
+  llvm::ErrorOr<Status> status(const Twine &Path) override {
+    return FS->status(Path);
+  }
+  llvm::ErrorOr<std::unique_ptr<File>>
+  openFileForRead(const Twine &Path) override {
+    return FS->openFileForRead(Path);
+  }
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
+    return FS->dir_begin(Dir, EC);
+  }
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+    return FS->getCurrentWorkingDirectory();
+  }
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+    return FS->setCurrentWorkingDirectory(Path);
+  }
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override {
+    return FS->getRealPath(Path, Output);
+  }
+  std::error_code isLocal(const Twine &Path, bool &Result) override {
+    return FS->isLocal(Path, Result);
+  }
+
+protected:
+  FileSystem &getUnderlyingFS() { return *FS; }
+
+private:
+  IntrusiveRefCntPtr<FileSystem> FS;
+
+  virtual void anchor();
+};
+
+namespace detail {
+
+class InMemoryDirectory;
+class InMemoryFile;
+
+} // namespace detail
+
+/// An in-memory file system.
+class InMemoryFileSystem : public FileSystem {
+  std::unique_ptr<detail::InMemoryDirectory> Root;
+  std::string WorkingDirectory;
+  bool UseNormalizedPaths = true;
+
+  /// If HardLinkTarget is non-null, a hardlink is created to the To path which
+  /// must be a file. If it is null then it adds the file as the public addFile.
+  bool addFile(const Twine &Path, time_t ModificationTime,
+               std::unique_ptr<llvm::MemoryBuffer> Buffer,
+               Optional<uint32_t> User, Optional<uint32_t> Group,
+               Optional<llvm::sys::fs::file_type> Type,
+               Optional<llvm::sys::fs::perms> Perms,
+               const detail::InMemoryFile *HardLinkTarget);
+
+public:
+  explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
+  ~InMemoryFileSystem() override;
+
+  /// Add a file containing a buffer or a directory to the VFS with a
+  /// path. The VFS owns the buffer.  If present, User, Group, Type
+  /// and Perms apply to the newly-created file or directory.
+  /// \return true if the file or directory was successfully added,
+  /// false if the file or directory already exists in the file system with
+  /// different contents.
+  bool addFile(const Twine &Path, time_t ModificationTime,
+               std::unique_ptr<llvm::MemoryBuffer> Buffer,
+               Optional<uint32_t> User = None, Optional<uint32_t> Group = None,
+               Optional<llvm::sys::fs::file_type> Type = None,
+               Optional<llvm::sys::fs::perms> Perms = None);
+
+  /// Add a hard link to a file.
+  /// Here hard links are not intended to be fully equivalent to the classical
+  /// filesystem. Both the hard link and the file share the same buffer and
+  /// status (and thus have the same UniqueID). Because of this there is no way
+  /// to distinguish between the link and the file after the link has been
+  /// added.
+  ///
+  /// The To path must be an existing file or a hardlink. The From file must not
+  /// have been added before. The To Path must not be a directory. The From Node
+  /// is added as a hard link which points to the resolved file of To Node.
+  /// \return true if the above condition is satisfied and hardlink was
+  /// successfully created, false otherwise.
+  bool addHardLink(const Twine &From, const Twine &To);
+
+  /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
+  /// If present, User, Group, Type and Perms apply to the newly-created file
+  /// or directory.
+  /// \return true if the file or directory was successfully added,
+  /// false if the file or directory already exists in the file system with
+  /// different contents.
+  bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
+                    llvm::MemoryBuffer *Buffer, Optional<uint32_t> User = None,
+                    Optional<uint32_t> Group = None,
+                    Optional<llvm::sys::fs::file_type> Type = None,
+                    Optional<llvm::sys::fs::perms> Perms = None);
+
+  std::string toString() const;
+
+  /// Return true if this file system normalizes . and .. in paths.
+  bool useNormalizedPaths() const { return UseNormalizedPaths; }
+
+  llvm::ErrorOr<Status> status(const Twine &Path) override;
+  llvm::ErrorOr<std::unique_ptr<File>>
+  openFileForRead(const Twine &Path) override;
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+    return WorkingDirectory;
+  }
+  /// Canonicalizes \p Path by combining with the current working
+  /// directory and normalizing the path (e.g. remove dots). If the current
+  /// working directory is not set, this returns errc::operation_not_permitted.
+  ///
+  /// This doesn't resolve symlinks as they are not supported in in-memory file
+  /// system.
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override;
+  std::error_code isLocal(const Twine &Path, bool &Result) override;
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+};
+
+/// Get a globally unique ID for a virtual file or directory.
+llvm::sys::fs::UniqueID getNextVirtualUniqueID();
+
+/// Gets a \p FileSystem for a virtual file system described in YAML
+/// format.
+IntrusiveRefCntPtr<FileSystem>
+getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
+               llvm::SourceMgr::DiagHandlerTy DiagHandler,
+               StringRef YAMLFilePath, void *DiagContext = nullptr,
+               IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
+
+struct YAMLVFSEntry {
+  template <typename T1, typename T2>
+  YAMLVFSEntry(T1 &&VPath, T2 &&RPath)
+      : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)) {}
+  std::string VPath;
+  std::string RPath;
+};
+
+class VFSFromYamlDirIterImpl;
+class RedirectingFileSystemParser;
+
+/// A virtual file system parsed from a YAML file.
+///
+/// Currently, this class allows creating virtual directories and mapping
+/// virtual file paths to existing external files, available in \c ExternalFS.
+///
+/// The basic structure of the parsed file is:
+/// \verbatim
+/// {
+///   'version': <version number>,
+///   <optional configuration>
+///   'roots': [
+///              <directory entries>
+///            ]
+/// }
+/// \endverbatim
+///
+/// All configuration options are optional.
+///   'case-sensitive': <boolean, default=true>
+///   'use-external-names': <boolean, default=true>
+///   'overlay-relative': <boolean, default=false>
+///   'fallthrough': <boolean, default=true>
+///
+/// Virtual directories are represented as
+/// \verbatim
+/// {
+///   'type': 'directory',
+///   'name': <string>,
+///   'contents': [ <file or directory entries> ]
+/// }
+/// \endverbatim
+///
+/// The default attributes for virtual directories are:
+/// \verbatim
+/// MTime = now() when created
+/// Perms = 0777
+/// User = Group = 0
+/// Size = 0
+/// UniqueID = unspecified unique value
+/// \endverbatim
+///
+/// Re-mapped files are represented as
+/// \verbatim
+/// {
+///   'type': 'file',
+///   'name': <string>,
+///   'use-external-name': <boolean> # Optional
+///   'external-contents': <path to external file>
+/// }
+/// \endverbatim
+///
+/// and inherit their attributes from the external contents.
+///
+/// In both cases, the 'name' field may contain multiple path components (e.g.
+/// /path/to/file). However, any directory that contains more than one child
+/// must be uniquely represented by a directory entry.
+class RedirectingFileSystem : public vfs::FileSystem {
+public:
+  enum EntryKind { EK_Directory, EK_File };
+
+  /// A single file or directory in the VFS.
+  class Entry {
+    EntryKind Kind;
+    std::string Name;
+
+  public:
+    Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
+    virtual ~Entry() = default;
+
+    StringRef getName() const { return Name; }
+    EntryKind getKind() const { return Kind; }
+  };
+
+  class RedirectingDirectoryEntry : public Entry {
+    std::vector<std::unique_ptr<Entry>> Contents;
+    Status S;
+
+  public:
+    RedirectingDirectoryEntry(StringRef Name,
+                              std::vector<std::unique_ptr<Entry>> Contents,
+                              Status S)
+        : Entry(EK_Directory, Name), Contents(std::move(Contents)),
+          S(std::move(S)) {}
+    RedirectingDirectoryEntry(StringRef Name, Status S)
+        : Entry(EK_Directory, Name), S(std::move(S)) {}
+
+    Status getStatus() { return S; }
+
+    void addContent(std::unique_ptr<Entry> Content) {
+      Contents.push_back(std::move(Content));
+    }
+
+    Entry *getLastContent() const { return Contents.back().get(); }
+
+    using iterator = decltype(Contents)::iterator;
+
+    iterator contents_begin() { return Contents.begin(); }
+    iterator contents_end() { return Contents.end(); }
+
+    static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
+  };
+
+  class RedirectingFileEntry : public Entry {
+  public:
+    enum NameKind { NK_NotSet, NK_External, NK_Virtual };
+
+  private:
+    std::string ExternalContentsPath;
+    NameKind UseName;
+
+  public:
+    RedirectingFileEntry(StringRef Name, StringRef ExternalContentsPath,
+                         NameKind UseName)
+        : Entry(EK_File, Name), ExternalContentsPath(ExternalContentsPath),
+          UseName(UseName) {}
+
+    StringRef getExternalContentsPath() const { return ExternalContentsPath; }
+
+    /// whether to use the external path as the name for this file.
+    bool useExternalName(bool GlobalUseExternalName) const {
+      return UseName == NK_NotSet ? GlobalUseExternalName
+                                  : (UseName == NK_External);
+    }
+
+    NameKind getUseName() const { return UseName; }
+
+    static bool classof(const Entry *E) { return E->getKind() == EK_File; }
+  };
+
+private:
+  friend class VFSFromYamlDirIterImpl;
+  friend class RedirectingFileSystemParser;
+
+  /// The root(s) of the virtual file system.
+  std::vector<std::unique_ptr<Entry>> Roots;
+
+  /// The file system to use for external references.
+  IntrusiveRefCntPtr<FileSystem> ExternalFS;
+
+  /// If IsRelativeOverlay is set, this represents the directory
+  /// path that should be prefixed to each 'external-contents' entry
+  /// when reading from YAML files.
+  std::string ExternalContentsPrefixDir;
+
+  /// @name Configuration
+  /// @{
+
+  /// Whether to perform case-sensitive comparisons.
+  ///
+  /// Currently, case-insensitive matching only works correctly with ASCII.
+  bool CaseSensitive = true;
+
+  /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
+  /// be prefixed in every 'external-contents' when reading from YAML files.
+  bool IsRelativeOverlay = false;
+
+  /// Whether to use to use the value of 'external-contents' for the
+  /// names of files.  This global value is overridable on a per-file basis.
+  bool UseExternalNames = true;
+
+  /// Whether to attempt a file lookup in external file system after it wasn't
+  /// found in VFS.
+  bool IsFallthrough = true;
+  /// @}
+
+  /// Virtual file paths and external files could be canonicalized without "..",
+  /// "." and "./" in their paths. FIXME: some unittests currently fail on
+  /// win32 when using remove_dots and remove_leading_dotslash on paths.
+  bool UseCanonicalizedPaths =
+#ifdef _WIN32
+      false;
+#else
+      true;
+#endif
+
+  RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS)
+      : ExternalFS(std::move(ExternalFS)) {}
+
+  /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly
+  /// recursing into the contents of \p From if it is a directory.
+  ErrorOr<Entry *> lookupPath(llvm::sys::path::const_iterator Start,
+                              llvm::sys::path::const_iterator End,
+                              Entry *From) const;
+
+  /// Get the status of a given an \c Entry.
+  ErrorOr<Status> status(const Twine &Path, Entry *E);
+
+public:
+  /// Looks up \p Path in \c Roots.
+  ErrorOr<Entry *> lookupPath(const Twine &Path) const;
+
+  /// Parses \p Buffer, which is expected to be in YAML format and
+  /// returns a virtual file system representing its contents.
+  static RedirectingFileSystem *
+  create(std::unique_ptr<MemoryBuffer> Buffer,
+         SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
+         void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
+
+  ErrorOr<Status> status(const Twine &Path) override;
+  ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
+
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override;
+
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+
+  std::error_code isLocal(const Twine &Path, bool &Result) override;
+
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+  void setExternalContentsPrefixDir(StringRef PrefixDir);
+
+  StringRef getExternalContentsPrefixDir() const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const;
+  LLVM_DUMP_METHOD void dumpEntry(Entry *E, int NumSpaces = 0) const;
+#endif
+};
+
+/// Collect all pairs of <virtual path, real path> entries from the
+/// \p YAMLFilePath. This is used by the module dependency collector to forward
+/// the entries into the reproducer output VFS YAML file.
+void collectVFSFromYAML(
+    std::unique_ptr<llvm::MemoryBuffer> Buffer,
+    llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
+    SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
+    void *DiagContext = nullptr,
+    IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
+
+class YAMLVFSWriter {
+  std::vector<YAMLVFSEntry> Mappings;
+  Optional<bool> IsCaseSensitive;
+  Optional<bool> IsOverlayRelative;
+  Optional<bool> UseExternalNames;
+  std::string OverlayDir;
+
+public:
+  YAMLVFSWriter() = default;
+
+  void addFileMapping(StringRef VirtualPath, StringRef RealPath);
+
+  void setCaseSensitivity(bool CaseSensitive) {
+    IsCaseSensitive = CaseSensitive;
+  }
+
+  void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
+
+  void setOverlayDir(StringRef OverlayDirectory) {
+    IsOverlayRelative = true;
+    OverlayDir.assign(OverlayDirectory.str());
+  }
+
+  const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
+
+  void write(llvm::raw_ostream &OS);
+};
+
+} // namespace vfs
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
diff --git a/contrib/llvm/include/llvm/Support/Win64EH.h b/contrib/llvm/include/llvm/Support/Win64EH.h
index 928eb906de0c..e27bf1b3a1a5 100644
--- a/contrib/llvm/include/llvm/Support/Win64EH.h
+++ b/contrib/llvm/include/llvm/Support/Win64EH.h
@@ -33,7 +33,24 @@ enum UnwindOpcodes {
   UOP_SaveNonVolBig,
   UOP_SaveXMM128 = 8,
   UOP_SaveXMM128Big,
-  UOP_PushMachFrame
+  UOP_PushMachFrame,
+  // The following set of unwind opcodes is for ARM64.  They are documented at
+  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+  UOP_AllocMedium,
+  UOP_SaveFPLRX,
+  UOP_SaveFPLR,
+  UOP_SaveReg,
+  UOP_SaveRegX,
+  UOP_SaveRegP,
+  UOP_SaveRegPX,
+  UOP_SaveFReg,
+  UOP_SaveFRegX,
+  UOP_SaveFRegP,
+  UOP_SaveFRegPX,
+  UOP_SetFP,
+  UOP_AddFP,
+  UOP_Nop,
+  UOP_End
 };
 
 /// UnwindCode - This union describes a single operation in a function prolog,
diff --git a/contrib/llvm/include/llvm/Support/WithColor.h b/contrib/llvm/include/llvm/Support/WithColor.h
index 85fc5fa0cf14..76842d1c3dc8 100644
--- a/contrib/llvm/include/llvm/Support/WithColor.h
+++ b/contrib/llvm/include/llvm/Support/WithColor.h
@@ -29,23 +29,49 @@ enum class HighlightColor {
   Macro,
   Error,
   Warning,
-  Note
+  Note,
+  Remark
 };
 
 /// An RAII object that temporarily switches an output stream to a specific
 /// color.
 class WithColor {
   raw_ostream &OS;
-  /// Determine whether colors should be displayed.
-  bool colorsEnabled(raw_ostream &OS);
+  bool DisableColors;
 
 public:
   /// To be used like this: WithColor(OS, HighlightColor::String) << "text";
-  WithColor(raw_ostream &OS, HighlightColor S);
+  /// @param OS The output stream
+  /// @param S Symbolic name for syntax element to color
+  /// @param DisableColors Whether to ignore color changes regardless of -color
+  /// and support in OS
+  WithColor(raw_ostream &OS, HighlightColor S, bool DisableColors = false);
+  /// To be used like this: WithColor(OS, raw_ostream::Black) << "text";
+  /// @param OS The output stream
+  /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
+  /// change only the bold attribute, and keep colors untouched
+  /// @param Bold Bold/brighter text, default false
+  /// @param BG If true, change the background, default: change foreground
+  /// @param DisableColors Whether to ignore color changes regardless of -color
+  /// and support in OS
+  WithColor(raw_ostream &OS,
+            raw_ostream::Colors Color = raw_ostream::SAVEDCOLOR,
+            bool Bold = false, bool BG = false, bool DisableColors = false)
+      : OS(OS), DisableColors(DisableColors) {
+    changeColor(Color, Bold, BG);
+  }
   ~WithColor();
 
   raw_ostream &get() { return OS; }
   operator raw_ostream &() { return OS; }
+  template <typename T> WithColor &operator<<(T &O) {
+    OS << O;
+    return *this;
+  }
+  template <typename T> WithColor &operator<<(const T &O) {
+    OS << O;
+    return *this;
+  }
 
   /// Convenience method for printing "error: " to stderr.
   static raw_ostream &error();
@@ -53,13 +79,36 @@ public:
   static raw_ostream &warning();
   /// Convenience method for printing "note: " to stderr.
   static raw_ostream &note();
+  /// Convenience method for printing "remark: " to stderr.
+  static raw_ostream &remark();
 
   /// Convenience method for printing "error: " to the given stream.
-  static raw_ostream &error(raw_ostream &OS, StringRef Prefix = "");
+  static raw_ostream &error(raw_ostream &OS, StringRef Prefix = "",
+                            bool DisableColors = false);
   /// Convenience method for printing "warning: " to the given stream.
-  static raw_ostream &warning(raw_ostream &OS, StringRef Prefix = "");
+  static raw_ostream &warning(raw_ostream &OS, StringRef Prefix = "",
+                              bool DisableColors = false);
   /// Convenience method for printing "note: " to the given stream.
-  static raw_ostream &note(raw_ostream &OS, StringRef Prefix = "");
+  static raw_ostream &note(raw_ostream &OS, StringRef Prefix = "",
+                           bool DisableColors = false);
+  /// Convenience method for printing "remark: " to the given stream.
+  static raw_ostream &remark(raw_ostream &OS, StringRef Prefix = "",
+                             bool DisableColors = false);
+
+  /// Determine whether colors are displayed.
+  bool colorsEnabled();
+
+  /// Change the color of text that will be output from this point forward.
+  /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
+  /// change only the bold attribute, and keep colors untouched
+  /// @param Bold Bold/brighter text, default false
+  /// @param BG If true, change the background, default: change foreground
+  WithColor &changeColor(raw_ostream::Colors Color, bool Bold = false,
+                         bool BG = false);
+
+  /// Reset the colors to terminal defaults. Call this when you are done
+  /// outputting colored text, or before program exit.
+  WithColor &resetColor();
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 185b357efef5..466dd309909a 100644
--- a/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -414,7 +414,7 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
   ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
   ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
-  ENUM_ENTRY(TYPE_IMM,        "immediate operand")                      \
+  ENUM_ENTRY(TYPE_IMM,        "immediate operand")                             \
   ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
   ENUM_ENTRY(TYPE_IMM5,       "1-byte immediate operand between 0 and 31")     \
   ENUM_ENTRY(TYPE_AVX512ICC,  "1-byte immediate operand for AVX512 icmp")      \
diff --git a/contrib/llvm/include/llvm/Support/X86TargetParser.def b/contrib/llvm/include/llvm/Support/X86TargetParser.def
index e4af0657a350..e9bede545d3f 100644
--- a/contrib/llvm/include/llvm/Support/X86TargetParser.def
+++ b/contrib/llvm/include/llvm/Support/X86TargetParser.def
@@ -34,17 +34,20 @@ X86_VENDOR(VENDOR_AMD,   "amd")
 #ifndef X86_CPU_TYPE
 #define X86_CPU_TYPE(ARCHNAME, ENUM)
 #endif
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell",    INTEL_BONNELL,    "bonnell", "atom")
-X86_CPU_TYPE_COMPAT           ("core2",      INTEL_CORE2,      "core2")
-X86_CPU_TYPE_COMPAT           ("nehalem",    INTEL_COREI7,     "corei7")
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("amdfam10",   AMDFAM10H,        "amdfam10h", "amdfam10")
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("bdver1",     AMDFAM15H,        "amdfam15h", "amdfam15")
-X86_CPU_TYPE_COMPAT_WITH_ALIAS("silvermont", INTEL_SILVERMONT, "silvermont", "slm")
-X86_CPU_TYPE_COMPAT           ("knl",        INTEL_KNL,        "knl")
-X86_CPU_TYPE_COMPAT           ("btver1",     AMD_BTVER1,       "btver1")
-X86_CPU_TYPE_COMPAT           ("btver2",     AMD_BTVER2,       "btver2")
-X86_CPU_TYPE_COMPAT           ("znver1",     AMDFAM17H,        "amdfam17h")
-X86_CPU_TYPE_COMPAT           ("knm",        INTEL_KNM,        "knm")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell",       INTEL_BONNELL,       "bonnell", "atom")
+X86_CPU_TYPE_COMPAT           ("core2",         INTEL_CORE2,         "core2")
+X86_CPU_TYPE_COMPAT           ("nehalem",       INTEL_COREI7,        "corei7")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("amdfam10",      AMDFAM10H,           "amdfam10h", "amdfam10")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("bdver1",        AMDFAM15H,           "amdfam15h", "amdfam15")
+X86_CPU_TYPE_COMPAT_WITH_ALIAS("silvermont",    INTEL_SILVERMONT,    "silvermont", "slm")
+X86_CPU_TYPE_COMPAT           ("knl",           INTEL_KNL,           "knl")
+X86_CPU_TYPE_COMPAT           ("btver1",        AMD_BTVER1,          "btver1")
+X86_CPU_TYPE_COMPAT           ("btver2",        AMD_BTVER2,          "btver2")
+X86_CPU_TYPE_COMPAT           ("znver1",        AMDFAM17H,           "amdfam17h")
+X86_CPU_TYPE_COMPAT           ("knm",           INTEL_KNM,           "knm")
+X86_CPU_TYPE_COMPAT           ("goldmont",      INTEL_GOLDMONT,      "goldmont")
+X86_CPU_TYPE_COMPAT           ("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus")
+X86_CPU_TYPE_COMPAT           ("tremont",       INTEL_TREMONT,       "tremont")
 // Entries below this are not in libgcc/compiler-rt.
 X86_CPU_TYPE                  ("i386",        INTEL_i386)
 X86_CPU_TYPE                  ("i486",        INTEL_i486)
@@ -64,9 +67,6 @@ X86_CPU_TYPE                  ("athlon",      AMD_ATHLON)
 X86_CPU_TYPE                  ("athlon-xp",   AMD_ATHLON_XP)
 X86_CPU_TYPE                  ("k8",          AMD_K8)
 X86_CPU_TYPE                  ("k8-sse3",     AMD_K8SSE3)
-X86_CPU_TYPE                  ("goldmont",    INTEL_GOLDMONT)
-X86_CPU_TYPE                  ("goldmont-plus", INTEL_GOLDMONT_PLUS)
-X86_CPU_TYPE                  ("tremont",     INTEL_TREMONT)
 #undef X86_CPU_TYPE_COMPAT_WITH_ALIAS
 #undef X86_CPU_TYPE_COMPAT
 #undef X86_CPU_TYPE
@@ -97,9 +97,12 @@ X86_CPU_SUBTYPE_COMPAT("broadwell",      INTEL_COREI7_BROADWELL,      "broadwell
 X86_CPU_SUBTYPE_COMPAT("skylake",        INTEL_COREI7_SKYLAKE,        "skylake")
 X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512")
 X86_CPU_SUBTYPE_COMPAT("cannonlake",     INTEL_COREI7_CANNONLAKE,     "cannonlake")
+X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
+X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
 // Entries below this are not in libgcc/compiler-rt.
 X86_CPU_SUBTYPE       ("core2",          INTEL_CORE2_65)
 X86_CPU_SUBTYPE       ("penryn",         INTEL_CORE2_45)
+X86_CPU_SUBTYPE       ("cascadelake",    INTEL_COREI7_CASCADELAKE)
 X86_CPU_SUBTYPE       ("k6",             AMDPENTIUM_K6)
 X86_CPU_SUBTYPE       ("k6-2",           AMDPENTIUM_K62)
 X86_CPU_SUBTYPE       ("k6-3",           AMDPENTIUM_K63)
@@ -147,11 +150,16 @@ X86_FEATURE_COMPAT(27, FEATURE_AVX512IFMA,      "avx512ifma")
 X86_FEATURE_COMPAT(28, FEATURE_AVX5124VNNIW,    "avx5124vnniw")
 X86_FEATURE_COMPAT(29, FEATURE_AVX5124FMAPS,    "avx5124fmaps")
 X86_FEATURE_COMPAT(30, FEATURE_AVX512VPOPCNTDQ, "avx512vpopcntdq")
+X86_FEATURE_COMPAT(31, FEATURE_AVX512VBMI2,     "avx512vbmi2")
+X86_FEATURE_COMPAT(32, FEATURE_GFNI,            "gfni")
+X86_FEATURE_COMPAT(33, FEATURE_VPCLMULQDQ,      "vpclmulqdq")
+X86_FEATURE_COMPAT(34, FEATURE_AVX512VNNI,      "avx512vnni")
+X86_FEATURE_COMPAT(35, FEATURE_AVX512BITALG,    "avx512bitalg")
 // Features below here are not in libgcc/compiler-rt.
-X86_FEATURE       (32, FEATURE_MOVBE)
-X86_FEATURE       (33, FEATURE_ADX)
-X86_FEATURE       (34, FEATURE_EM64T)
-X86_FEATURE       (35, FEATURE_CLFLUSHOPT)
-X86_FEATURE       (36, FEATURE_SHA)
+X86_FEATURE       (64, FEATURE_MOVBE)
+X86_FEATURE       (65, FEATURE_ADX)
+X86_FEATURE       (66, FEATURE_EM64T)
+X86_FEATURE       (67, FEATURE_CLFLUSHOPT)
+X86_FEATURE       (68, FEATURE_SHA)
 #undef X86_FEATURE_COMPAT
 #undef X86_FEATURE
diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h
index 4b8c4e958288..3d790e96fff7 100644
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@@ -27,6 +27,7 @@
 #include <cctype>
 #include <cstddef>
 #include <cstdint>
+#include <iterator>
 #include <map>
 #include <memory>
 #include <new>
@@ -38,6 +39,12 @@
 namespace llvm {
 namespace yaml {
 
+enum class NodeKind : uint8_t {
+  Scalar,
+  Map,
+  Sequence,
+};
+
 struct EmptyContext {};
 
 /// This class should be specialized by any type that needs to be converted
@@ -144,14 +151,14 @@ struct ScalarTraits {
   // Must provide:
   //
   // Function to write the value as a string:
-  //static void output(const T &value, void *ctxt, llvm::raw_ostream &out);
+  // static void output(const T &value, void *ctxt, llvm::raw_ostream &out);
   //
   // Function to convert a string to a value.  Returns the empty
   // StringRef on success or an error string if string is malformed:
-  //static StringRef input(StringRef scalar, void *ctxt, T &value);
+  // static StringRef input(StringRef scalar, void *ctxt, T &value);
   //
   // Function to determine if the value should be quoted.
-  //static QuotingType mustQuote(StringRef);
+  // static QuotingType mustQuote(StringRef);
 };
 
 /// This class should be specialized by type that requires custom conversion
@@ -162,7 +169,7 @@ struct ScalarTraits {
 ///      static void output(const MyType &Value, void*, llvm::raw_ostream &Out)
 ///      {
 ///        // stream out custom formatting
-///        Out << Val;
+///        Out << Value;
 ///      }
 ///      static StringRef input(StringRef Scalar, void*, MyType &Value) {
 ///        // parse scalar and set `value`
@@ -180,6 +187,47 @@ struct BlockScalarTraits {
   // Function to convert a string to a value.  Returns the empty
   // StringRef on success or an error string if string is malformed:
   // static StringRef input(StringRef Scalar, void *ctxt, T &Value);
+  //
+  // Optional:
+  // static StringRef inputTag(T &Val, std::string Tag)
+  // static void outputTag(const T &Val, raw_ostream &Out)
+};
+
+/// This class should be specialized by type that requires custom conversion
+/// to/from a YAML scalar with optional tags. For example:
+///
+///    template <>
+///    struct TaggedScalarTraits<MyType> {
+///      static void output(const MyType &Value, void*, llvm::raw_ostream
+///      &ScalarOut, llvm::raw_ostream &TagOut)
+///      {
+///        // stream out custom formatting including optional Tag
+///        Out << Value;
+///      }
+///      static StringRef input(StringRef Scalar, StringRef Tag, void*, MyType
+///      &Value) {
+///        // parse scalar and set `value`
+///        // return empty string on success, or error string
+///        return StringRef();
+///      }
+///      static QuotingType mustQuote(const MyType &Value, StringRef) {
+///        return QuotingType::Single;
+///      }
+///    };
+template <typename T> struct TaggedScalarTraits {
+  // Must provide:
+  //
+  // Function to write the value and tag as strings:
+  // static void output(const T &Value, void *ctx, llvm::raw_ostream &ScalarOut,
+  // llvm::raw_ostream &TagOut);
+  //
+  // Function to convert a string to a value.  Returns the empty
+  // StringRef on success or an error string if string is malformed:
+  // static StringRef input(StringRef Scalar, StringRef Tag, void *ctxt, T
+  // &Value);
+  //
+  // Function to determine if the value should be quoted.
+  // static QuotingType mustQuote(const T &Value, StringRef Scalar);
 };
 
 /// This class should be specialized by any type that needs to be converted
@@ -233,6 +281,31 @@ struct CustomMappingTraits {
   // static void output(IO &io, T &elem);
 };
 
+/// This class should be specialized by any type that can be represented as
+/// a scalar, map, or sequence, decided dynamically. For example:
+///
+///    typedef std::unique_ptr<MyBase> MyPoly;
+///
+///    template<>
+///    struct PolymorphicTraits<MyPoly> {
+///      static NodeKind getKind(const MyPoly &poly) {
+///        return poly->getKind();
+///      }
+///      static MyScalar& getAsScalar(MyPoly &poly) {
+///        if (!poly || !isa<MyScalar>(poly))
+///          poly.reset(new MyScalar());
+///        return *cast<MyScalar>(poly.get());
+///      }
+///      // ...
+///    };
+template <typename T> struct PolymorphicTraits {
+  // Must provide:
+  // static NodeKind getKind(const T &poly);
+  // static scalar_type &getAsScalar(T &poly);
+  // static map_type &getAsMap(T &poly);
+  // static sequence_type &getAsSequence(T &poly);
+};
+
 // Only used for better diagnostics of missing traits
 template <typename T>
 struct MissingTrait;
@@ -249,7 +322,6 @@ struct has_ScalarEnumerationTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =
     (sizeof(test<ScalarEnumerationTraits<T>>(nullptr)) == 1);
 };
@@ -266,7 +338,6 @@ struct has_ScalarBitSetTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value = (sizeof(test<ScalarBitSetTraits<T>>(nullptr)) == 1);
 };
 
@@ -286,7 +357,6 @@ struct has_ScalarTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =
       (sizeof(test<ScalarTraits<T>>(nullptr, nullptr, nullptr)) == 1);
 };
@@ -305,11 +375,28 @@ struct has_BlockScalarTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =
       (sizeof(test<BlockScalarTraits<T>>(nullptr, nullptr)) == 1);
 };
 
+// Test if TaggedScalarTraits<T> is defined on type T.
+template <class T> struct has_TaggedScalarTraits {
+  using Signature_input = StringRef (*)(StringRef, StringRef, void *, T &);
+  using Signature_output = void (*)(const T &, void *, raw_ostream &,
+                                    raw_ostream &);
+  using Signature_mustQuote = QuotingType (*)(const T &, StringRef);
+
+  template <typename U>
+  static char test(SameType<Signature_input, &U::input> *,
+                   SameType<Signature_output, &U::output> *,
+                   SameType<Signature_mustQuote, &U::mustQuote> *);
+
+  template <typename U> static double test(...);
+
+  static bool const value =
+      (sizeof(test<TaggedScalarTraits<T>>(nullptr, nullptr, nullptr)) == 1);
+};
+
 // Test if MappingContextTraits<T> is defined on type T.
 template <class T, class Context> struct has_MappingTraits {
   using Signature_mapping = void (*)(class IO &, T &, Context &);
@@ -320,7 +407,6 @@ template <class T, class Context> struct has_MappingTraits {
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =
       (sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
 };
@@ -334,7 +420,6 @@ template <class T> struct has_MappingTraits<T, EmptyContext> {
 
   template <typename U> static double test(...);
 
-public:
   static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
 };
 
@@ -348,7 +433,6 @@ template <class T, class Context> struct has_MappingValidateTraits {
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =
       (sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
 };
@@ -362,7 +446,6 @@ template <class T> struct has_MappingValidateTraits<T, EmptyContext> {
 
   template <typename U> static double test(...);
 
-public:
   static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
 };
 
@@ -378,7 +461,6 @@ struct has_SequenceMethodTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =  (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
 };
 
@@ -394,7 +476,6 @@ struct has_CustomMappingTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value =
       (sizeof(test<CustomMappingTraits<T>>(nullptr)) == 1);
 };
@@ -424,7 +505,6 @@ struct has_FlowTraits<T, true>
   template<typename C>
   static char (&f(...))[2];
 
-public:
   static bool const value = sizeof(f<Derived>(nullptr)) == 2;
 };
 
@@ -445,50 +525,114 @@ struct has_DocumentListTraits
   template <typename U>
   static double test(...);
 
-public:
   static bool const value = (sizeof(test<DocumentListTraits<T>>(nullptr))==1);
 };
 
-inline bool isNumber(StringRef S) {
-  static const char OctalChars[] = "01234567";
-  if (S.startswith("0") &&
-      S.drop_front().find_first_not_of(OctalChars) == StringRef::npos)
-    return true;
+template <class T> struct has_PolymorphicTraits {
+  using Signature_getKind = NodeKind (*)(const T &);
 
-  if (S.startswith("0o") &&
-      S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos)
-    return true;
+  template <typename U>
+  static char test(SameType<Signature_getKind, &U::getKind> *);
 
-  static const char HexChars[] = "0123456789abcdefABCDEF";
-  if (S.startswith("0x") &&
-      S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos)
-    return true;
+  template <typename U> static double test(...);
 
-  static const char DecChars[] = "0123456789";
-  if (S.find_first_not_of(DecChars) == StringRef::npos)
-    return true;
+  static bool const value = (sizeof(test<PolymorphicTraits<T>>(nullptr)) == 1);
+};
 
-  if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF"))
-    return true;
+inline bool isNumeric(StringRef S) {
+  const static auto skipDigits = [](StringRef Input) {
+    return Input.drop_front(
+        std::min(Input.find_first_not_of("0123456789"), Input.size()));
+  };
 
-  Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
-  if (FloatMatcher.match(S))
+  // Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls
+  // safe.
+  if (S.empty() || S.equals("+") || S.equals("-"))
+    return false;
+
+  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
     return true;
 
-  return false;
-}
+  // Infinity and decimal numbers can be prefixed with sign.
+  StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S;
 
-inline bool isNumeric(StringRef S) {
-  if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front()))
+  // Check for infinity first, because checking for hex and oct numbers is more
+  // expensive.
+  if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF"))
     return true;
 
-  if (isNumber(S))
-    return true;
+  // Section 10.3.2 Tag Resolution
+  // YAML 1.2 Specification prohibits Base 8 and Base 16 numbers prefixed with
+  // [-+], so S should be used instead of Tail.
+  if (S.startswith("0o"))
+    return S.size() > 2 &&
+           S.drop_front(2).find_first_not_of("01234567") == StringRef::npos;
+
+  if (S.startswith("0x"))
+    return S.size() > 2 && S.drop_front(2).find_first_not_of(
+                               "0123456789abcdefABCDEF") == StringRef::npos;
+
+  // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)?
+  S = Tail;
+
+  // Handle cases when the number starts with '.' and hence needs at least one
+  // digit after dot (as opposed by number which has digits before the dot), but
+  // doesn't have one.
+  if (S.startswith(".") &&
+      (S.equals(".") ||
+       (S.size() > 1 && std::strchr("0123456789", S[1]) == nullptr)))
+    return false;
+
+  if (S.startswith("E") || S.startswith("e"))
+    return false;
+
+  enum ParseState {
+    Default,
+    FoundDot,
+    FoundExponent,
+  };
+  ParseState State = Default;
 
-  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
+  S = skipDigits(S);
+
+  // Accept decimal integer.
+  if (S.empty())
     return true;
 
-  return false;
+  if (S.front() == '.') {
+    State = FoundDot;
+    S = S.drop_front();
+  } else if (S.front() == 'e' || S.front() == 'E') {
+    State = FoundExponent;
+    S = S.drop_front();
+  } else {
+    return false;
+  }
+
+  if (State == FoundDot) {
+    S = skipDigits(S);
+    if (S.empty())
+      return true;
+
+    if (S.front() == 'e' || S.front() == 'E') {
+      State = FoundExponent;
+      S = S.drop_front();
+    } else {
+      return false;
+    }
+  }
+
+  assert(State == FoundExponent && "Should have found exponent at this point.");
+  if (S.empty())
+    return false;
+
+  if (S.front() == '+' || S.front() == '-') {
+    S = S.drop_front();
+    if (S.empty())
+      return false;
+  }
+
+  return skipDigits(S).empty();
 }
 
 inline bool isNull(StringRef S) {
@@ -535,7 +679,6 @@ inline QuotingType needsQuotes(StringRef S) {
     // Safe scalar characters.
     case '_':
     case '-':
-    case '/':
     case '^':
     case '.':
     case ',':
@@ -552,6 +695,12 @@ inline QuotingType needsQuotes(StringRef S) {
     // DEL (0x7F) are excluded from the allowed character range.
     case 0x7F:
       return QuotingType::Double;
+    // Forward slash is allowed to be unquoted, but we quote it anyway.  We have
+    // many tests that use FileCheck against YAML output, and this output often
+    // contains paths.  If we quote backslashes but not forward slashes then
+    // paths will come out either quoted or unquoted depending on which platform
+    // the test is run on, making FileCheck comparisons difficult.
+    case '/':
     default: {
       // C0 control block (0x0 - 0x1F) is excluded from the allowed character
       // range.
@@ -578,10 +727,12 @@ struct missingTraits
                                         !has_ScalarBitSetTraits<T>::value &&
                                         !has_ScalarTraits<T>::value &&
                                         !has_BlockScalarTraits<T>::value &&
+                                        !has_TaggedScalarTraits<T>::value &&
                                         !has_MappingTraits<T, Context>::value &&
                                         !has_SequenceTraits<T>::value &&
                                         !has_CustomMappingTraits<T>::value &&
-                                        !has_DocumentListTraits<T>::value> {};
+                                        !has_DocumentListTraits<T>::value &&
+                                        !has_PolymorphicTraits<T>::value> {};
 
 template <typename T, typename Context>
 struct validatedMappingTraits
@@ -635,6 +786,9 @@ public:
 
   virtual void scalarString(StringRef &, QuotingType) = 0;
   virtual void blockScalarString(StringRef &) = 0;
+  virtual void scalarTag(std::string &) = 0;
+
+  virtual NodeKind getNodeKind() = 0;
 
   virtual void setError(const Twine &) = 0;
 
@@ -869,6 +1023,31 @@ yamlize(IO &YamlIO, T &Val, bool, EmptyContext &Ctx) {
   }
 }
 
+template <typename T>
+typename std::enable_if<has_TaggedScalarTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
+  if (io.outputting()) {
+    std::string ScalarStorage, TagStorage;
+    raw_string_ostream ScalarBuffer(ScalarStorage), TagBuffer(TagStorage);
+    TaggedScalarTraits<T>::output(Val, io.getContext(), ScalarBuffer,
+                                  TagBuffer);
+    io.scalarTag(TagBuffer.str());
+    StringRef ScalarStr = ScalarBuffer.str();
+    io.scalarString(ScalarStr,
+                    TaggedScalarTraits<T>::mustQuote(Val, ScalarStr));
+  } else {
+    std::string Tag;
+    io.scalarTag(Tag);
+    StringRef Str;
+    io.scalarString(Str, QuotingType::None);
+    StringRef Result =
+        TaggedScalarTraits<T>::input(Str, Tag, io.getContext(), Val);
+    if (!Result.empty()) {
+      io.setError(Twine(Result));
+    }
+  }
+}
+
 template <typename T, typename Context>
 typename std::enable_if<validatedMappingTraits<T, Context>::value, void>::type
 yamlize(IO &io, T &Val, bool, Context &Ctx) {
@@ -925,6 +1104,20 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
 }
 
 template <typename T>
+typename std::enable_if<has_PolymorphicTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
+  switch (io.outputting() ? PolymorphicTraits<T>::getKind(Val)
+                          : io.getNodeKind()) {
+  case NodeKind::Scalar:
+    return yamlize(io, PolymorphicTraits<T>::getAsScalar(Val), true, Ctx);
+  case NodeKind::Map:
+    return yamlize(io, PolymorphicTraits<T>::getAsMap(Val), true, Ctx);
+  case NodeKind::Sequence:
+    return yamlize(io, PolymorphicTraits<T>::getAsSequence(Val), true, Ctx);
+  }
+}
+
+template <typename T>
 typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
 yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
@@ -1202,6 +1395,8 @@ private:
   void endBitSetScalar() override;
   void scalarString(StringRef &, QuotingType) override;
   void blockScalarString(StringRef &) override;
+  void scalarTag(std::string &) override;
+  NodeKind getNodeKind() override;
   void setError(const Twine &message) override;
   bool canElideEmptySequence() override;
 
@@ -1347,6 +1542,8 @@ public:
   void endBitSetScalar() override;
   void scalarString(StringRef &, QuotingType) override;
   void blockScalarString(StringRef &) override;
+  void scalarTag(std::string &) override;
+  NodeKind getNodeKind() override;
   void setError(const Twine &message) override;
   bool canElideEmptySequence() override;
 
@@ -1366,14 +1563,21 @@ private:
   void flowKey(StringRef Key);
 
   enum InState {
-    inSeq,
-    inFlowSeq,
+    inSeqFirstElement,
+    inSeqOtherElement,
+    inFlowSeqFirstElement,
+    inFlowSeqOtherElement,
     inMapFirstKey,
     inMapOtherKey,
     inFlowMapFirstKey,
     inFlowMapOtherKey
   };
 
+  static bool inSeqAnyElement(InState State);
+  static bool inFlowSeqAnyElement(InState State);
+  static bool inMapAnyKey(InState State);
+  static bool inFlowMapAnyKey(InState State);
+
   raw_ostream &Out;
   int WrapColumn;
   SmallVector<InState, 8> StateStack;
@@ -1509,6 +1713,16 @@ operator>>(Input &In, T &Val) {
   return In;
 }
 
+// Define non-member operator>> so that Input can stream in a polymorphic type.
+template <typename T>
+inline typename std::enable_if<has_PolymorphicTraits<T>::value, Input &>::type
+operator>>(Input &In, T &Val) {
+  EmptyContext Ctx;
+  if (In.setCurrentDocument())
+    yamlize(In, Val, true, Ctx);
+  return In;
+}
+
 // Provide better error message about types missing a trait specialization
 template <typename T>
 inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@@ -1597,6 +1811,24 @@ operator<<(Output &Out, T &Val) {
   return Out;
 }
 
+// Define non-member operator<< so that Output can stream out a polymorphic
+// type.
+template <typename T>
+inline typename std::enable_if<has_PolymorphicTraits<T>::value, Output &>::type
+operator<<(Output &Out, T &Val) {
+  EmptyContext Ctx;
+  Out.beginDocuments();
+  if (Out.preflightDocument(0)) {
+    // FIXME: The parser does not support explicit documents terminated with a
+    // plain scalar; the end-marker is included as part of the scalar token.
+    assert(PolymorphicTraits<T>::getKind(Val) != NodeKind::Scalar && "plain scalar documents are not supported");
+    yamlize(Out, Val, true, Ctx);
+    Out.postflightDocument();
+  }
+  Out.endDocuments();
+  return Out;
+}
+
 // Provide better error message about types missing a trait specialization
 template <typename T>
 inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
diff --git a/contrib/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm/include/llvm/Support/raw_ostream.h
index b9ea9b5817f2..d062e716209d 100644
--- a/contrib/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/raw_ostream.h
@@ -367,12 +367,18 @@ class raw_fd_ostream : public raw_pwrite_stream {
   int FD;
   bool ShouldClose;
 
+  bool SupportsSeeking;
+
+#ifdef _WIN32
+  /// True if this fd refers to a Windows console device. Mintty and other
+  /// terminal emulators are TTYs, but they are not consoles.
+  bool IsWindowsConsole = false;
+#endif
+
   std::error_code EC;
 
   uint64_t pos;
 
-  bool SupportsSeeking;
-
   /// See raw_ostream::write_impl.
   void write_impl(const char *Ptr, size_t Size) override;
 
@@ -548,6 +554,8 @@ class buffer_ostream : public raw_svector_ostream {
   raw_ostream &OS;
   SmallVector<char, 0> Buffer;
 
+  virtual void anchor() override;
+
 public:
   buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer), OS(OS) {}
   ~buffer_ostream() override { OS << str(); }
diff --git a/contrib/llvm/include/llvm/Support/type_traits.h b/contrib/llvm/include/llvm/Support/type_traits.h
index 55d84f138f07..e7b8f2517b8a 100644
--- a/contrib/llvm/include/llvm/Support/type_traits.h
+++ b/contrib/llvm/include/llvm/Support/type_traits.h
@@ -30,9 +30,10 @@ namespace llvm {
 template <typename T>
 struct isPodLike {
   // std::is_trivially_copyable is available in libc++ with clang, libstdc++
-  // that comes with GCC 5.
+  // that comes with GCC 5.  MSVC 2015 and newer also have
+  // std::is_trivially_copyable.
 #if (__has_feature(is_trivially_copyable) && defined(_LIBCPP_VERSION)) ||      \
-    (defined(__GNUC__) && __GNUC__ >= 5)
+    (defined(__GNUC__) && __GNUC__ >= 5) || defined(_MSC_VER)
   // If the compiler supports the is_trivially_copyable trait use it, as it
   // matches the definition of isPodLike closely.
   static const bool value = std::is_trivially_copyable<T>::value;
diff --git a/contrib/llvm/include/llvm/TableGen/StringMatcher.h b/contrib/llvm/include/llvm/TableGen/StringMatcher.h
index 09d2092d43b0..3aa3540d616d 100644
--- a/contrib/llvm/include/llvm/TableGen/StringMatcher.h
+++ b/contrib/llvm/include/llvm/TableGen/StringMatcher.h
@@ -23,12 +23,11 @@ namespace llvm {
 
 class raw_ostream;
 
-/// StringMatcher - Given a list of strings and code to execute when they match,
-/// output a simple switch tree to classify the input string.
+/// Given a list of strings and code to execute when they match, output a
+/// simple switch tree to classify the input string.
 ///
-/// If a match is found, the code in Vals[i].second is executed; control must
+/// If a match is found, the code in Matches[i].second is executed; control must
 /// not exit this code fragment.  If nothing matches, execution falls through.
-///
 class StringMatcher {
 public:
   using StringPair = std::pair<std::string, std::string>;
diff --git a/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h b/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h
index e9a990569d36..3ad77c5d5e00 100644
--- a/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h
+++ b/contrib/llvm/include/llvm/Target/CodeGenCWrappers.h
@@ -31,6 +31,8 @@ inline Optional<CodeModel::Model> unwrap(LLVMCodeModel Model, bool &JIT) {
     LLVM_FALLTHROUGH;
   case LLVMCodeModelDefault:
     return None;
+  case LLVMCodeModelTiny:
+    return CodeModel::Tiny;
   case LLVMCodeModelSmall:
     return CodeModel::Small;
   case LLVMCodeModelKernel:
@@ -45,6 +47,8 @@ inline Optional<CodeModel::Model> unwrap(LLVMCodeModel Model, bool &JIT) {
 
 inline LLVMCodeModel wrap(CodeModel::Model Model) {
   switch (Model) {
+  case CodeModel::Tiny:
+    return LLVMCodeModelTiny;
   case CodeModel::Small:
     return LLVMCodeModelSmall;
   case CodeModel::Kernel:
diff --git a/contrib/llvm/include/llvm/Target/GenericOpcodes.td b/contrib/llvm/include/llvm/Target/GenericOpcodes.td
index 79cc1e4d9eee..045fe2520047 100644
--- a/contrib/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/contrib/llvm/include/llvm/Target/GenericOpcodes.td
@@ -120,6 +120,36 @@ def G_VAARG : GenericInstruction {
   let mayStore = 1;
 }
 
+def G_CTLZ : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_CTLZ_ZERO_UNDEF : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_CTTZ : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_CTTZ_ZERO_UNDEF : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+def G_CTPOP : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
 def G_BSWAP : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type0:$src);
@@ -281,6 +311,14 @@ def G_PTR_MASK : GenericInstruction {
 // Overflow ops
 //------------------------------------------------------------------------------
 
+// Generic unsigned addition producing a carry flag.
+def G_UADDO : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
 // Generic unsigned addition consuming and producing a carry flag.
 def G_UADDE : GenericInstruction {
   let OutOperandList = (outs type0:$dst, type1:$carry_out);
@@ -296,6 +334,19 @@ def G_SADDO : GenericInstruction {
   let isCommutable = 1;
 }
 
+// Generic signed addition consuming and producing a carry flag.
+def G_SADDE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
+  let hasSideEffects = 0;
+}
+
+// Generic unsigned subtraction producing a carry flag.
+def G_USUBO : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
 // Generic unsigned subtraction consuming and producing a carry flag.
 def G_USUBE : GenericInstruction {
   let OutOperandList = (outs type0:$dst, type1:$carry_out);
@@ -303,13 +354,20 @@ def G_USUBE : GenericInstruction {
   let hasSideEffects = 0;
 }
 
-// Generic unsigned subtraction producing a carry flag.
+// Generic signed subtraction producing a carry flag.
 def G_SSUBO : GenericInstruction {
   let OutOperandList = (outs type0:$dst, type1:$carry_out);
   let InOperandList = (ins type0:$src1, type0:$src2);
   let hasSideEffects = 0;
 }
 
+// Generic signed subtraction consuming and producing a carry flag.
+def G_SSUBE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
+  let hasSideEffects = 0;
+}
+
 // Generic unsigned multiplication producing a carry flag.
 def G_UMULO : GenericInstruction {
   let OutOperandList = (outs type0:$dst, type1:$carry_out);
@@ -482,6 +540,35 @@ def G_FLOG2 : GenericInstruction {
   let hasSideEffects = 0;
 }
 
+// Floating point base-10 logarithm of a value.
+def G_FLOG10 : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+// Floating point ceiling of a value.
+def G_FCEIL : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+//------------------------------------------------------------------------------
+// Opcodes for LLVM Intrinsics
+//------------------------------------------------------------------------------
+def G_INTRINSIC_TRUNC : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+def G_INTRINSIC_ROUND : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
 //------------------------------------------------------------------------------
 // Memory ops
 //------------------------------------------------------------------------------
@@ -576,6 +663,9 @@ def G_EXTRACT : GenericInstruction {
 // Extract multiple registers specified size, starting from blocks given by
 // indexes. This will almost certainly be mapped to sub-register COPYs after
 // register banks have been selected.
+// The output operands are always ordered from lowest bits to highest:
+//   %bits_0_7:(s8), %bits_8_15:(s8),
+//       %bits_16_23:(s8), %bits_24_31:(s8) = G_UNMERGE_VALUES %0:(s32)
 def G_UNMERGE_VALUES : GenericInstruction {
   let OutOperandList = (outs type0:$dst0, variable_ops);
   let InOperandList = (ins type1:$src);
@@ -589,13 +679,38 @@ def G_INSERT : GenericInstruction {
   let hasSideEffects = 0;
 }
 
-/// Concatenate multiple registers of the same size into a wider register.
+// Concatenate multiple registers of the same size into a wider register.
+// The input operands are always ordered from lowest bits to highest:
+//   %0:(s32) = G_MERGE_VALUES %bits_0_7:(s8), %bits_8_15:(s8),
+//                             %bits_16_23:(s8), %bits_24_31:(s8)
 def G_MERGE_VALUES : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type1:$src0, variable_ops);
   let hasSideEffects = 0;
 }
 
+/// Create a vector from multiple scalar registers.
+def G_BUILD_VECTOR : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src0, variable_ops);
+  let hasSideEffects = 0;
+}
+
+/// Like G_BUILD_VECTOR, but truncates the larger operand types to fit the
+/// destination vector elt type.
+def G_BUILD_VECTOR_TRUNC : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src0, variable_ops);
+  let hasSideEffects = 0;
+}
+
+/// Create a vector by concatenating vectors together.
+def G_CONCAT_VECTORS : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src0, variable_ops);
+  let hasSideEffects = 0;
+}
+
 // Intrinsic without side effects.
 def G_INTRINSIC : GenericInstruction {
   let OutOperandList = (outs);
diff --git a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index d487759a4852..31d26361260d 100644
--- a/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/contrib/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -83,6 +83,13 @@ def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
 def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
 def : GINodeEquiv<G_BR, br>;
 def : GINodeEquiv<G_BSWAP, bswap>;
+def : GINodeEquiv<G_CTLZ, ctlz>;
+def : GINodeEquiv<G_CTTZ, cttz>;
+def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
+def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
+def : GINodeEquiv<G_CTPOP, ctpop>;
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_FCEIL, fceil>;
 
 // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
 // complications that tablegen must take care of. For example, Predicates such
diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td
index b746505d2a45..e4b827babb92 100644
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@@ -439,6 +439,7 @@ class Instruction {
   // instruction.
   bit isReturn     = 0;     // Is this instruction a return instruction?
   bit isBranch     = 0;     // Is this instruction a branch instruction?
+  bit isEHScopeReturn = 0;  // Does this instruction end an EH scope?
   bit isIndirectBranch = 0; // Is this instruction an indirect branch?
   bit isCompare    = 0;     // Is this instruction a comparison instruction?
   bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
@@ -478,6 +479,7 @@ class Instruction {
   bit isInsertSubreg = 0;   // Is this instruction a kind of insert subreg?
                             // If so, make sure to override
                             // TargetInstrInfo::getInsertSubregLikeInputs.
+  bit variadicOpsAreDefs = 0; // Are variadic operands definitions?
 
   // Does the instruction have side effects that are not captured by any
   // operands of the instruction or other flags?
@@ -1103,7 +1105,7 @@ def FAULTING_OP : StandardPseudoInstruction {
   let isBranch = 1;
 }
 def PATCHABLE_OP : StandardPseudoInstruction {
-  let OutOperandList = (outs unknown:$dst);
+  let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
   let usesCustomInserter = 1;
   let mayLoad = 1;
@@ -1163,8 +1165,8 @@ def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction {
   let hasSideEffects = 1;
 }
 def FENTRY_CALL : StandardPseudoInstruction {
-  let OutOperandList = (outs unknown:$dst);
-  let InOperandList = (ins variable_ops);
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
   let AsmString = "# FEntry call";
   let usesCustomInserter = 1;
   let mayLoad = 1;
@@ -1554,3 +1556,8 @@ include "llvm/Target/GlobalISel/Target.td"
 // Pull in the common support for the Global ISel DAG-based selector generation.
 //
 include "llvm/Target/GlobalISel/SelectionDAGCompat.td"
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for Pfm Counters generation.
+//
+include "llvm/Target/TargetPfmCounters.td"
diff --git a/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td
index 8d57cae02d22..4b2c57b34c2e 100644
--- a/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -7,29 +7,39 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines MCInstPredicate classes and its subclasses.
+// This file defines class MCInstPredicate and its subclasses.
 //
-// MCInstPredicate is used to describe constraints on the opcode/operand(s) of
-// an instruction. Each MCInstPredicate class has a well-known semantic, and it
-// is used by a PredicateExpander to generate code for MachineInstr and/or
-// MCInst.
-//
-// MCInstPredicate definitions can be used to construct MCSchedPredicate
-// definitions. An MCSchedPredicate can be used in place of a SchedPredicate
-// when defining SchedReadVariant and SchedWriteVariant used by a processor
-// scheduling model.
+// MCInstPredicate definitions are used by target scheduling models to describe
+// constraints on instructions.
 //
-// Here is an example of MCInstPredicate definition:
+// Here is an example of an MCInstPredicate definition in tablegen:
 //
 // def MCInstPredicateExample : CheckAll<[
 //    CheckOpcode<[BLR]>,
 //    CheckIsRegOperand<0>,
 //    CheckNot<CheckRegOperand<0, LR>>]>;
 //
-// Predicate `MCInstPredicateExample` checks that the machine instruction in
-// input is a BLR, and that operand at index 0 is register `LR`.
+// The syntax for MCInstPredicate is declarative, and predicate definitions can
+// be composed together in order to generate more complex constraints.
+//
+// The `CheckAll` from the example defines a composition of three different
+// predicates.  Definition `MCInstPredicateExample` identifies instructions
+// whose opcode is BLR, and whose first operand is a register different from
+// register `LR`.
+//
+// Every MCInstPredicate class has a well-known semantic in tablegen. For
+// example, `CheckOpcode` is a special type of predicate used to describe a
+// constraint on the value of an instruction opcode.
 //
-// That predicate could be used to rewrite the following definition (from
+// MCInstPredicate definitions are typically used by scheduling models to
+// construct MCSchedPredicate definitions (see the definition of class
+// MCSchedPredicate in llvm/Target/TargetSchedule.td).
+// In particular, an MCSchedPredicate can be used instead of a SchedPredicate
+// when defining the set of SchedReadVariant and SchedWriteVariant of a
+// processor scheduling model.
+//
+// The `MCInstPredicateExample` definition above is equivalent (and therefore
+// could replace) the following definition from a previous ExynosM3 model (see
 // AArch64SchedExynosM3.td):
 //
 // def M3BranchLinkFastPred  : SchedPredicate<[{
@@ -37,22 +47,13 @@
 //    MI->getOperand(0).isReg() &&
 //    MI->getOperand(0).getReg() != AArch64::LR}]>;
 //
-// MCInstPredicate definitions are used to construct MCSchedPredicate (see the
-// definition of class MCSchedPredicate in llvm/Target/TargetSchedule.td).  An
-// MCSchedPredicate can be used by a `SchedVar` to associate a predicate with a
-// list of SchedReadWrites. Note that `SchedVar` are used to create SchedVariant
-// definitions.
-//
-// Each MCInstPredicate class has a well known semantic. For example,
-// `CheckOpcode` is only used to check the instruction opcode value.
-//
-// MCInstPredicate classes allow the definition of predicates in a declarative
-// way.  These predicates don't require a custom block of C++, and can be used
-// to define conditions on instructions without being bound to a particular
+// The main advantage of using MCInstPredicate instead of SchedPredicate is
+// portability: users don't need to specify predicates in C++. As a consequence
+// of this, MCInstPredicate definitions are not bound to a particular
 // representation (i.e. MachineInstr vs MCInst).
 //
-// It also means that tablegen backends must know how to parse and expand them
-// into code that works on MCInst (or MachineInst).
+// Tablegen backends know how to expand MCInstPredicate definitions into actual
+// C++ code that works on MachineInstr (and/or MCInst).
 //
 // Instances of class PredicateExpander (see utils/Tablegen/PredicateExpander.h)
 // know how to expand a predicate. For each MCInstPredicate class, there must be
@@ -68,6 +69,7 @@
 
 // Forward declarations.
 class Instruction;
+class SchedMachineModel;
 
 // A generic machine instruction predicate.
 class MCInstPredicate;
@@ -104,28 +106,50 @@ class CheckSameRegOperand<int First, int Second> : MCInstPredicate {
   int SecondIndex = Second;
 }
 
+// Base class for checks on register/immediate operands.
+// It allows users to define checks like:
+//    MyFunction(MI->getOperand(Index).getImm()) == Val;
+//
+// In the example above, `MyFunction` is a function that takes as input an
+// immediate operand value, and returns another value. Field `FunctionMapper` is
+// the name of the function to call on the operand value.
+class CheckOperandBase<int Index, string Fn = ""> : MCOperandPredicate<Index> {
+  string FunctionMapper = Fn;
+}
+
 // Check that the machine register operand at position `Index` references
 // register R. This predicate assumes that we already checked that the machine
 // operand at position `Index` is a register operand.
-class CheckRegOperand<int Index, Register R> : MCOperandPredicate<Index> {
+class CheckRegOperand<int Index, Register R> : CheckOperandBase<Index> {
   Register Reg = R;
 }
 
 // Check if register operand at index `Index` is the invalid register.
-class CheckInvalidRegOperand<int Index> : MCOperandPredicate<Index>;
+class CheckInvalidRegOperand<int Index> : CheckOperandBase<Index>;
 
 // Check that the operand at position `Index` is immediate `Imm`.
-class CheckImmOperand<int Index, int Imm> : MCOperandPredicate<Index> {
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperand<int Index, int Imm> : CheckOperandBase<Index> {
   int ImmVal = Imm;
 }
 
 // Similar to CheckImmOperand, however the immediate is not a literal number.
 // This is useful when we want to compare the value of an operand against an
 // enum value, and we know the actual integer value of that enum.
-class CheckImmOperand_s<int Index, string Value> : MCOperandPredicate<Index> {
+class CheckImmOperand_s<int Index, string Value> : CheckOperandBase<Index> {
   string ImmVal = Value;
 }
 
+// Expands to a call to `FunctionMapper` if field `FunctionMapper` is set.
+// Otherwise, it expands to a CheckNot<CheckInvalidRegOperand<Index>>.
+class CheckRegOperandSimple<int Index> : CheckOperandBase<Index>;
+
+// Expands to a call to `FunctionMapper` if field `FunctionMapper` is set.
+// Otherwise, it simply evaluates to TruePred.
+class CheckImmOperandSimple<int Index> : CheckOperandBase<Index>;
+
 // Check that the operand at position `Index` is immediate value zero.
 class CheckZeroOperand<int Index> : CheckImmOperand<Index, 0>;
 
@@ -169,18 +193,53 @@ class CheckAll<list<MCInstPredicate> Sequence>
 class CheckAny<list<MCInstPredicate> Sequence>
     : CheckPredicateSequence<Sequence>;
 
-// Check that a call to method `Name` in class "XXXGenInstrInfo" (where XXX is
-// the `Target` name) returns true.
+
+// Used to expand the body of a function predicate. See the definition of
+// TIIPredicate below.
+class MCStatement;
+
+// Expands to a return statement. The return expression is a boolean expression
+// described by a MCInstPredicate.
+class MCReturnStatement<MCInstPredicate predicate> : MCStatement {
+  MCInstPredicate Pred = predicate;
+}
+
+// Used to automatically construct cases of a switch statement where the switch
+// variable is an instruction opcode. There is a 'case' for every opcode in the
+// `opcodes` list, and each case is associated with MCStatement `caseStmt`.
+class MCOpcodeSwitchCase<list<Instruction> opcodes, MCStatement caseStmt> {
+  list<Instruction> Opcodes = opcodes;
+  MCStatement CaseStmt = caseStmt;
+}
+
+// Expands to a switch statement. The switch variable is an instruction opcode.
+// The auto-generated switch is populated by a number of cases based on the
+// `cases` list in input. A default case is automatically generated, and it
+// evaluates to `default`.
+class MCOpcodeSwitchStatement<list<MCOpcodeSwitchCase> cases,
+                              MCStatement default> : MCStatement {
+  list<MCOpcodeSwitchCase> Cases = cases;
+  MCStatement DefaultCase = default;
+}
+
+// Base class for function predicates.
+class FunctionPredicateBase<string name, MCStatement body> {
+  string FunctionName = name;
+  MCStatement Body = body;
+}
+
+// Check that a call to method `Name` in class "XXXInstrInfo" (where XXX is
+// the name of a target) returns true.
 //
 // TIIPredicate definitions are used to model calls to the target-specific
 // InstrInfo. A TIIPredicate is treated specially by the InstrInfoEmitter
 // tablegen backend, which will use it to automatically generate a definition in
-// the target specific `GenInstrInfo` class.
-class TIIPredicate<string Target, string Name, MCInstPredicate P> : MCInstPredicate {
-  string TargetName = Target;
-  string FunctionName = Name;
-  MCInstPredicate Pred = P;
-}
+// the target specific `InstrInfo` class.
+//
+// There cannot be multiple TIIPredicate definitions with the same name for the
+// same target.
+class TIIPredicate<string Name, MCStatement body>
+    : FunctionPredicateBase<Name, body>, MCInstPredicate;
 
 // A function predicate that takes as input a machine instruction, and returns
 // a boolean value.
@@ -195,3 +254,106 @@ class CheckFunctionPredicate<string MCInstFn, string MachineInstrFn> : MCInstPre
   string MCInstFnName = MCInstFn;
   string MachineInstrFnName = MachineInstrFn;
 }
+
+// Used to classify machine instructions based on a machine instruction
+// predicate.
+//
+// Let IC be an InstructionEquivalenceClass definition, and MI a machine
+// instruction.  We say that MI belongs to the equivalence class described by IC
+// if and only if the following two conditions are met:
+//  a) MI's opcode is in the `opcodes` set, and
+//  b) `Predicate` evaluates to true when applied to MI.
+//
+// Instances of this class can be used by processor scheduling models to
+// describe instructions that have a property in common.  For example,
+// InstructionEquivalenceClass definitions can be used to identify the set of
+// dependency breaking instructions for a processor model.
+//
+// An (optional) list of operand indices can be used to further describe
+// properties that apply to instruction operands. For example, it can be used to
+// identify register uses of a dependency breaking instructions that are not in
+// a RAW dependency.
+class InstructionEquivalenceClass<list<Instruction> opcodes,
+                                  MCInstPredicate pred,
+                                  list<int> operands = []> {
+  list<Instruction> Opcodes = opcodes;
+  MCInstPredicate Predicate = pred;
+  list<int> OperandIndices = operands;
+}
+
+// Used by processor models to describe dependency breaking instructions.
+//
+// This is mainly an alias for InstructionEquivalenceClass.  Input operand
+// `BrokenDeps` identifies the set of "broken dependencies". There is one bit
+// per each implicit and explicit input operand.  An empty set of broken
+// dependencies means: "explicit input register operands are independent."
+class DepBreakingClass<list<Instruction> opcodes, MCInstPredicate pred,
+                       list<int> BrokenDeps = []>
+    : InstructionEquivalenceClass<opcodes, pred, BrokenDeps>;
+
+// A function descriptor used to describe the signature of a predicate methods
+// which will be expanded by the STIPredicateExpander into a tablegen'd
+// XXXGenSubtargetInfo class member definition (here, XXX is a target name).
+//
+// It describes the signature of a TargetSubtarget hook, as well as a few extra
+// properties. Examples of extra properties are:
+//  - The default return value for the auto-generate function hook.
+//  - A list of subtarget hooks (Delegates) that are called from this function.
+//
+class STIPredicateDecl<string name, MCInstPredicate default = FalsePred,
+                       bit overrides = 1, bit expandForMC = 1,
+                       bit updatesOpcodeMask = 0,
+                       list<STIPredicateDecl> delegates = []> {
+  string Name = name;
+
+  MCInstPredicate DefaultReturnValue = default;
+
+  // True if this method is declared as virtual in class TargetSubtargetInfo.
+  bit OverridesBaseClassMember = overrides;
+
+  // True if we need an equivalent predicate function in the MC layer.
+  bit ExpandForMC = expandForMC;
+
+  // True if the autogenerated method has a extra in/out APInt param used as a
+  // mask of operands.
+  bit UpdatesOpcodeMask = updatesOpcodeMask;
+
+  // A list of STIPredicates used by this definition to delegate part of the
+  // computation. For example, STIPredicateFunction `isDependencyBreaking()`
+  // delegates to `isZeroIdiom()` part of its computation.
+  list<STIPredicateDecl> Delegates = delegates;
+}
+
+// A predicate function definition member of class `XXXGenSubtargetInfo`.
+//
+// If `Declaration.ExpandForMC` is true, then SubtargetEmitter
+// will also expand another definition of this method that accepts a MCInst.
+class STIPredicate<STIPredicateDecl declaration,
+                   list<InstructionEquivalenceClass> classes> {
+  STIPredicateDecl Declaration = declaration;
+  list<InstructionEquivalenceClass> Classes = classes;
+  SchedMachineModel SchedModel = ?;
+}
+
+// Convenience classes and definitions used by processor scheduling models to
+// describe dependency breaking instructions and move elimination candidates.
+let UpdatesOpcodeMask = 1 in {
+
+def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">;
+
+let Delegates = [IsZeroIdiomDecl] in
+def IsDepBreakingDecl : STIPredicateDecl<"isDependencyBreaking">;
+
+} // UpdatesOpcodeMask
+
+def IsOptimizableRegisterMoveDecl
+    : STIPredicateDecl<"isOptimizableRegisterMove">;
+
+class IsZeroIdiomFunction<list<DepBreakingClass> classes>
+    : STIPredicate<IsZeroIdiomDecl, classes>;
+
+class IsDepBreakingFunction<list<DepBreakingClass> classes>
+    : STIPredicate<IsDepBreakingDecl, classes>;
+
+class IsOptimizableRegisterMove<list<InstructionEquivalenceClass> classes>
+    : STIPredicate<IsOptimizableRegisterMoveDecl, classes>;
diff --git a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index dbdfd4139a0f..e80f2bf82f26 100644
--- a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -45,6 +45,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
 protected:
   bool SupportIndirectSymViaGOTPCRel = false;
   bool SupportGOTPCRelWithOffset = true;
+  bool SupportDebugThreadLocalLocation = true;
+
+  /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
+  /// for EH.
+  unsigned PersonalityEncoding = 0;
+  unsigned LSDAEncoding = 0;
+  unsigned TTypeEncoding = 0;
 
   /// This section contains the static constructor pointer list.
   MCSection *StaticCtorSection = nullptr;
@@ -135,6 +142,10 @@ public:
                                             const TargetMachine &TM,
                                             MachineModuleInfo *MMI) const;
 
+  unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
+  unsigned getLSDAEncoding() const { return LSDAEncoding; }
+  unsigned getTTypeEncoding() const { return TTypeEncoding; }
+
   const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
                                   MCStreamer &Streamer) const;
 
@@ -170,6 +181,11 @@ public:
     return SupportGOTPCRelWithOffset;
   }
 
+  /// Target supports TLS offset relocation in debug section?
+  bool supportDebugThreadLocalLocation() const {
+    return SupportDebugThreadLocalLocation;
+  }
+
   /// Get the target specific PC relative GOT entry relocation
   virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
                                                   const MCValue &MV,
@@ -185,6 +201,12 @@ public:
   virtual void emitLinkerFlagsForUsed(raw_ostream &OS,
                                       const GlobalValue *GV) const {}
 
+  /// If supported, return the section to use for the llvm.commandline
+  /// metadata. Otherwise, return nullptr.
+  virtual MCSection *getSectionForCommandLines() const {
+    return nullptr;
+  }
+
 protected:
   virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO,
                                             SectionKind Kind,
diff --git a/contrib/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm/include/llvm/Target/TargetMachine.h
index 1ca68c8df63a..3eafcc25583a 100644
--- a/contrib/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm/include/llvm/Target/TargetMachine.h
@@ -84,11 +84,10 @@ protected: // Can only create subclasses.
   CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
 
   /// Contains target specific asm information.
-  const MCAsmInfo *AsmInfo;
-
-  const MCRegisterInfo *MRI;
-  const MCInstrInfo *MII;
-  const MCSubtargetInfo *STI;
+  std::unique_ptr<const MCAsmInfo> AsmInfo;
+  std::unique_ptr<const MCRegisterInfo> MRI;
+  std::unique_ptr<const MCInstrInfo> MII;
+  std::unique_ptr<const MCSubtargetInfo> STI;
 
   unsigned RequireStructuredCFG : 1;
   unsigned O0WantsFastISel : 1;
@@ -160,11 +159,11 @@ public:
   void resetTargetOptions(const Function &F) const;
 
   /// Return target specific asm information.
-  const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
+  const MCAsmInfo *getMCAsmInfo() const { return AsmInfo.get(); }
 
-  const MCRegisterInfo *getMCRegisterInfo() const { return MRI; }
-  const MCInstrInfo *getMCInstrInfo() const { return MII; }
-  const MCSubtargetInfo *getMCSubtargetInfo() const { return STI; }
+  const MCRegisterInfo *getMCRegisterInfo() const { return MRI.get(); }
+  const MCInstrInfo *getMCInstrInfo() const { return MII.get(); }
+  const MCSubtargetInfo *getMCSubtargetInfo() const { return STI.get(); }
 
   /// If intrinsic information is available, return it.  If not, return null.
   virtual const TargetIntrinsicInfo *getIntrinsicInfo() const {
@@ -202,6 +201,9 @@ public:
   bool getO0WantsFastISel() { return O0WantsFastISel; }
   void setO0WantsFastISel(bool Enable) { O0WantsFastISel = Enable; }
   void setGlobalISel(bool Enable) { Options.EnableGlobalISel = Enable; }
+  void setGlobalISelAbort(GlobalISelAbortMode Mode) {
+    Options.GlobalISelAbort = Mode;
+  }
   void setMachineOutliner(bool Enable) {
     Options.EnableMachineOutliner = Enable;
   }
@@ -285,18 +287,6 @@ public:
   void getNameWithPrefix(SmallVectorImpl<char> &Name, const GlobalValue *GV,
                          Mangler &Mang, bool MayAlwaysUsePrivate = false) const;
   MCSymbol *getSymbol(const GlobalValue *GV) const;
-
-  /// True if the target uses physical regs at Prolog/Epilog insertion
-  /// time. If true (most machines), all vregs must be allocated before
-  /// PEI. If false (virtual-register machines), then callee-save register
-  /// spilling and scavenging are not needed or used.
-  virtual bool usesPhysRegsForPEI() const { return true; }
-
-  /// True if the target wants to use interprocedural register allocation by
-  /// default. The -enable-ipra flag can be used to override this.
-  virtual bool useIPRA() const {
-    return false;
-  }
 };
 
 /// This class describes a target machine that is implemented with the LLVM
@@ -350,8 +340,37 @@ public:
   bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out,
                      raw_pwrite_stream *DwoOut, CodeGenFileType FileTYpe,
                      MCContext &Context);
+
+  /// True if the target uses physical regs at Prolog/Epilog insertion
+  /// time. If true (most machines), all vregs must be allocated before
+  /// PEI. If false (virtual-register machines), then callee-save register
+  /// spilling and scavenging are not needed or used.
+  virtual bool usesPhysRegsForPEI() const { return true; }
+
+  /// True if the target wants to use interprocedural register allocation by
+  /// default. The -enable-ipra flag can be used to override this.
+  virtual bool useIPRA() const {
+    return false;
+  }
 };
 
+/// Helper method for getting the code model, returning Default if
+/// CM does not have a value. The tiny and kernel models will produce
+/// an error, so targets that support them or require more complex codemodel
+/// selection logic should implement and call their own getEffectiveCodeModel.
+inline CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
+                                              CodeModel::Model Default) {
+  if (CM) {
+    // By default, targets do not support the tiny and kernel models.
+    if (*CM == CodeModel::Tiny)
+      report_fatal_error("Target does not support the tiny CodeModel");
+    if (*CM == CodeModel::Kernel)
+      report_fatal_error("Target does not support the kernel CodeModel");
+    return *CM;
+  }
+  return Default;
+}
+
 } // end namespace llvm
 
 #endif // LLVM_TARGET_TARGETMACHINE_H
diff --git a/contrib/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm/include/llvm/Target/TargetOptions.h
index 07ed773de55e..b18101d92833 100644
--- a/contrib/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm/include/llvm/Target/TargetOptions.h
@@ -96,6 +96,14 @@ namespace llvm {
     SCE       // Tune debug info for SCE targets (e.g. PS4).
   };
 
+  /// Enable abort calls when global instruction selection fails to lower/select
+  /// an instruction.
+  enum class GlobalISelAbortMode {
+    Disable,        // Disable the abort.
+    Enable,         // Enable the abort.
+    DisableWithDiag // Disable the abort but emit a diagnostic on failure.
+  };
+
   class TargetOptions {
   public:
     TargetOptions()
@@ -192,6 +200,10 @@ namespace llvm {
     /// EnableGlobalISel - This flag enables global instruction selection.
     unsigned EnableGlobalISel : 1;
 
+    /// EnableGlobalISelAbort - Control abort behaviour when global instruction
+    /// selection fails to lower/select an instruction.
+    GlobalISelAbortMode GlobalISelAbort = GlobalISelAbortMode::Enable;
+
     /// UseInitArray - Use .init_array instead of .ctors for static
     /// constructors.
     unsigned UseInitArray : 1;
diff --git a/contrib/llvm/include/llvm/Target/TargetPfmCounters.td b/contrib/llvm/include/llvm/Target/TargetPfmCounters.td
new file mode 100644
index 000000000000..dac150f03445
--- /dev/null
+++ b/contrib/llvm/include/llvm/Target/TargetPfmCounters.td
@@ -0,0 +1,50 @@
+//===- TargetPfmCounters.td - Target Pfm Counters -*- tablegen ----------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces for performance counters.
+
+// Definition of a hardware counters from libpfm identifiers.
+class PfmCounter<string counter> {
+  // The name of the counter that measures events.
+  // The name can be "some_counter + some_other_counter", in which case the
+  // measured value is the sum of events on these counters.
+  string Counter = counter;
+}
+
+// Issue counters can be tied to a ProcResource
+class PfmIssueCounter<string resource_name, string counter>
+    : PfmCounter<counter> {
+  // The name of the ProcResource on which uops are issued. This is used by
+  // llvm-exegesis to compare measurements with values in the SchedModels.
+  // If the CPU has a sched model, this should correspond to the name of a
+  // ProcResource.
+  string ResourceName = resource_name;
+}
+
+def NoPfmCounter : PfmCounter <""> {}
+
+// Set of PfmCounters for measuring sched model characteristics.
+class ProcPfmCounters {
+  // Processors can define how to measure cycles by defining a CycleCounter.
+  PfmCounter CycleCounter = NoPfmCounter;
+  // Processors can define how to measure uops by defining a UopsCounter.
+  PfmCounter UopsCounter = NoPfmCounter;
+  // Processors can define how to measure issued uops by defining IssueCounters.
+  list<PfmIssueCounter> IssueCounters = [];
+}
+
+// A binding of a set of counters to a CPU.
+class PfmCountersBinding<string cpu_name, ProcPfmCounters counters> {
+  string CpuName = cpu_name;
+  ProcPfmCounters Counters = counters;
+}
+
+// Declares the default binding for unbound CPUs for the target.
+class PfmCountersDefaultBinding<ProcPfmCounters counters>
+    : PfmCountersBinding<"", counters> {}
diff --git a/contrib/llvm/include/llvm/Target/TargetSchedule.td b/contrib/llvm/include/llvm/Target/TargetSchedule.td
index 6fd2d5b78e54..808e183f5a5f 100644
--- a/contrib/llvm/include/llvm/Target/TargetSchedule.td
+++ b/contrib/llvm/include/llvm/Target/TargetSchedule.td
@@ -182,8 +182,7 @@ class ProcResourceKind;
 //
 // SchedModel ties these units to a processor for any stand-alone defs
 // of this class.
-class ProcResourceUnits<ProcResourceKind kind, int num,
-                        list<string> pfmCounters> {
+class ProcResourceUnits<ProcResourceKind kind, int num> {
   ProcResourceKind Kind = kind;
   int NumUnits = num;
   ProcResourceKind Super = ?;
@@ -198,8 +197,8 @@ def EponymousProcResourceKind : ProcResourceKind;
 
 // Subtargets typically define processor resource kind and number of
 // units in one place.
-class ProcResource<int num, list<string> pfmCounters = []> : ProcResourceKind,
-  ProcResourceUnits<EponymousProcResourceKind, num, pfmCounters>;
+class ProcResource<int num> : ProcResourceKind,
+  ProcResourceUnits<EponymousProcResourceKind, num>;
 
 class ProcResGroup<list<ProcResource> resources> : ProcResourceKind {
   list<ProcResource> Resources = resources;
@@ -374,7 +373,11 @@ class SchedPredicate<code pred> : SchedPredicateBase {
   SchedMachineModel SchedModel = ?;
   code Predicate = pred;
 }
-def NoSchedPred : SchedPredicate<[{true}]>;
+
+// Define a predicate to be typically used as the default case in a
+// SchedVariant.  It the SchedVariant does not use any other predicate based on
+// MCSchedPredicate, this is the default scheduling case used by llvm-mca.
+def NoSchedPred : MCSchedPredicate<TruePred>;
 
 // Associate a predicate with a list of SchedReadWrites. By default,
 // the selected SchedReadWrites are still associated with a single
@@ -461,6 +464,10 @@ class SchedAlias<SchedReadWrite match, SchedReadWrite alias> {
 //  - The number of physical registers which can be used for register renaming
 //    purpose.
 //  - The cost of a register rename.
+//  - The set of registers that allow move elimination.
+//  - The maximum number of moves that can be eliminated every cycle.
+//  - Whether move elimination is limited to register moves whose input
+//    is known to be zero.
 //
 // The cost of a rename is the number of physical registers allocated by the
 // register alias table to map the new definition. By default, register can be
@@ -507,11 +514,35 @@ class SchedAlias<SchedReadWrite match, SchedReadWrite alias> {
 // partial write is combined with the previous super-register definition.  We
 // should add support for these cases, and correctly model merge problems with
 // partial register accesses.
+//
+// Field MaxMovesEliminatedPerCycle specifies how many moves can be eliminated
+// every cycle. A default value of zero for that field means: there is no limit
+// to the number of moves that can be eliminated by this register file.
+//
+// An instruction MI is a candidate for move elimination if a call to
+// method TargetSubtargetInfo::isOptimizableRegisterMove(MI) returns true (see
+// llvm/CodeGen/TargetSubtargetInfo.h, and llvm/MC/MCInstrAnalysis.h).
+//
+// Subtargets can instantiate tablegen class IsOptimizableRegisterMove (see
+// llvm/Target/TargetInstrPredicate.td) to customize the set of move elimination
+// candidates. By default, no instruction is a valid move elimination candidate.
+//
+// A register move MI is eliminated only if:
+//  - MI is a move elimination candidate.
+//  - The destination register is from a register class that allows move
+//    elimination (see field `AllowMoveElimination` below).
+//  - Constraints on the move kind, and the maximum number of moves that can be
+//    eliminated per cycle are all met.
+
 class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [],
-                   list<int> Costs = []> {
+                   list<int> Costs = [], list<bit> AllowMoveElim = [],
+                   int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = 0> {
   list<RegisterClass> RegClasses = Classes;
   list<int> RegCosts = Costs;
+  list<bit> AllowMoveElimination = AllowMoveElim;
   int NumPhysRegs = numPhysRegs;
+  int MaxMovesEliminatedPerCycle = MaxMoveElimPerCy;
+  bit AllowZeroMoveEliminationOnly = AllowZeroMoveElimOnly;
   SchedMachineModel SchedModel = ?;
 }
 
@@ -531,23 +562,12 @@ class RetireControlUnit<int bufferSize, int retirePerCycle> {
   SchedMachineModel SchedModel = ?;
 }
 
-// Allow the definition of hardware counters.
-class PfmCounter {
+// Base class for Load/StoreQueue.  It is used to identify processor resources
+// which describe load/store queues in the LS unit.
+class MemoryQueue<ProcResource PR> {
+  ProcResource QueueDescriptor = PR;
   SchedMachineModel SchedModel = ?;
 }
 
-// Each processor can define how to measure cycles by defining a
-// PfmCycleCounter.
-class PfmCycleCounter<string counter> : PfmCounter {
-  string Counter = counter;
-}
-
-// Each ProcResourceUnits can define how to measure issued uops by defining
-// a PfmIssueCounter.
-class PfmIssueCounter<ProcResourceUnits resource, list<string> counters>
-    : PfmCounter{
-  // The resource units on which uops are issued.
-  ProcResourceUnits Resource = resource;
-  // The list of counters that measure issue events.
-  list<string> Counters = counters;
-}
+class LoadQueue<ProcResource LDQueue> : MemoryQueue<LDQueue>;
+class StoreQueue<ProcResource STQueue> : MemoryQueue<STQueue>;
diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
index 4ba4d821225d..eb5a14bd21b8 100644
--- a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -116,12 +116,18 @@ def SDTIntBinOp : SDTypeProfile<1, 2, [     // add, and, or, xor, udiv, etc.
 def SDTIntShiftOp : SDTypeProfile<1, 2, [   // shl, sra, srl
   SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>
 ]>;
+def SDTIntShiftDOp: SDTypeProfile<1, 3, [   // fshl, fshr
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
 def SDTIntSatNoShOp : SDTypeProfile<1, 2, [   // ssat with no shift
   SDTCisSameAs<0, 1>, SDTCisInt<2>
 ]>;
 def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
 ]>;
+def SDTIntScaledBinOp : SDTypeProfile<1, 3, [  // smulfix
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
 
 def SDTFPBinOp : SDTypeProfile<1, 2, [      // fadd, fmul, etc.
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
@@ -162,7 +168,7 @@ def SDTExtInreg : SDTypeProfile<1, 2, [     // sext_inreg
 ]>;
 def SDTExtInvec : SDTypeProfile<1, 1, [     // sext_invec
   SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>,
-  SDTCisOpSmallerThanOp<1, 0>, SDTCisSameSizeAs<0,1>
+  SDTCisOpSmallerThanOp<1, 0>
 ]>;
 
 def SDTSetCC : SDTypeProfile<1, 3, [        // setcc
@@ -217,7 +223,7 @@ def SDTIStore : SDTypeProfile<1, 3, [       // indexed store
 ]>;
 
 def SDTMaskedStore: SDTypeProfile<0, 3, [       // masked store
-  SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<1, 2>
+  SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>
 ]>;
 
 def SDTMaskedLoad: SDTypeProfile<1, 3, [       // masked load
@@ -225,16 +231,6 @@ def SDTMaskedLoad: SDTypeProfile<1, 3, [       // masked load
   SDTCisSameNumEltsAs<0, 2>
 ]>;
 
-def SDTMaskedGather: SDTypeProfile<2, 3, [       // masked gather
-  SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>,
-  SDTCisPtrTy<4>, SDTCVecEltisVT<1, i1>, SDTCisSameNumEltsAs<0, 1>
-]>;
-
-def SDTMaskedScatter: SDTypeProfile<1, 3, [       // masked scatter
-  SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameNumEltsAs<0, 1>,
-  SDTCVecEltisVT<0, i1>, SDTCisPtrTy<3>
-]>;
-
 def SDTVecShuffle : SDTypeProfile<1, 2, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
 ]>;
@@ -360,6 +356,8 @@ def sra        : SDNode<"ISD::SRA"       , SDTIntShiftOp>;
 def shl        : SDNode<"ISD::SHL"       , SDTIntShiftOp>;
 def rotl       : SDNode<"ISD::ROTL"      , SDTIntShiftOp>;
 def rotr       : SDNode<"ISD::ROTR"      , SDTIntShiftOp>;
+def fshl       : SDNode<"ISD::FSHL"      , SDTIntShiftDOp>;
+def fshr       : SDNode<"ISD::FSHR"      , SDTIntShiftDOp>;
 def and        : SDNode<"ISD::AND"       , SDTIntBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def or         : SDNode<"ISD::OR"        , SDTIntBinOp,
@@ -383,6 +381,12 @@ def umin       : SDNode<"ISD::UMIN"      , SDTIntBinOp,
 def umax       : SDNode<"ISD::UMAX"      , SDTIntBinOp,
                                   [SDNPCommutative, SDNPAssociative]>;
 
+def saddsat    : SDNode<"ISD::SADDSAT"   , SDTIntBinOp, [SDNPCommutative]>;
+def uaddsat    : SDNode<"ISD::UADDSAT"   , SDTIntBinOp, [SDNPCommutative]>;
+def ssubsat    : SDNode<"ISD::SSUBSAT"   , SDTIntBinOp>;
+def usubsat    : SDNode<"ISD::USUBSAT"   , SDTIntBinOp>;
+def smulfix    : SDNode<"ISD::SMULFIX"   , SDTIntScaledBinOp, [SDNPCommutative]>;
+
 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
 def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
 def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
@@ -416,8 +420,14 @@ def fminnum    : SDNode<"ISD::FMINNUM"    , SDTFPBinOp,
                                   [SDNPCommutative, SDNPAssociative]>;
 def fmaxnum    : SDNode<"ISD::FMAXNUM"    , SDTFPBinOp,
                                   [SDNPCommutative, SDNPAssociative]>;
-def fminnan    : SDNode<"ISD::FMINNAN"    , SDTFPBinOp>;
-def fmaxnan    : SDNode<"ISD::FMAXNAN"    , SDTFPBinOp>;
+def fminnum_ieee : SDNode<"ISD::FMINNUM_IEEE", SDTFPBinOp,
+                          [SDNPCommutative]>;
+def fmaxnum_ieee  : SDNode<"ISD::FMAXNUM_IEEE", SDTFPBinOp,
+                           [SDNPCommutative]>;
+def fminimum   : SDNode<"ISD::FMINIMUM"   , SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def fmaximum   : SDNode<"ISD::FMAXIMUM"   , SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
 def fgetsign   : SDNode<"ISD::FGETSIGN"   , SDTFPToIntOp>;
 def fcanonicalize : SDNode<"ISD::FCANONICALIZE", SDTFPUnaryOp>;
 def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
@@ -510,10 +520,6 @@ def masked_store : SDNode<"ISD::MSTORE",  SDTMaskedStore,
                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def masked_load  : SDNode<"ISD::MLOAD",  SDTMaskedLoad,
                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def masked_scatter : SDNode<"ISD::MSCATTER",  SDTMaskedScatter,
-                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def masked_gather  : SDNode<"ISD::MGATHER",  SDTMaskedGather,
-                       [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
 // Do not use ld, st directly. Use load, extload, sextload, zextload, store,
 // and truncst (see below).
@@ -630,6 +636,15 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
   code ImmediateCode = [{}];
   SDNodeXForm OperandTransform = xform;
 
+  // When this is set, the PredicateCode may refer to a constant Operands
+  // vector which contains the captured nodes of the DAG, in the order listed
+  // by the Operands field above.
+  //
+  // This is useful when Fragments involves associative / commutative
+  // operators: a single piece of code can easily refer to all operands even
+  // when re-associated / commuted variants of the fragment are matched.
+  bit PredicateCodeUsesOperands = 0;
+
   // Define a few pre-packaged predicates. This helps GlobalISel import
   // existing rules from SelectionDAG for many common cases.
   // They will be tested prior to the code in pred and must not be used in
@@ -1067,6 +1082,15 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
   let MemoryVT = f32;
 }
 
+def nonvolatile_load : PatFrag<(ops node:$ptr),
+                               (load node:$ptr), [{
+  return !cast<LoadSDNode>(N)->isVolatile();
+}]>;
+def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr),
+                                (store node:$val, node:$ptr), [{
+  return !cast<StoreSDNode>(N)->isVolatile();
+}]>;
+
 // nontemporal store fragments.
 def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
                                (store node:$val, node:$ptr), [{
diff --git a/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h b/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h
index 96264ac81dc4..b2975ec395d5 100644
--- a/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h
+++ b/contrib/llvm/include/llvm/Testing/Support/SupportHelpers.h
@@ -10,10 +10,13 @@
 #ifndef LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
 #define LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
 
-#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/raw_os_ostream.h"
 #include "gtest/gtest-printers.h"
 
+#include <string>
+
 namespace llvm {
 namespace detail {
 struct ErrorHolder {
@@ -52,6 +55,10 @@ void PrintTo(const ExpectedHolder<T> &Item, std::ostream *Out) {
   }
 }
 } // namespace detail
+
+namespace unittest {
+SmallString<128> getInputFileDirectory(const char *Argv0);
+}
 } // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h b/contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h
new file mode 100644
index 000000000000..fa54e6f8b711
--- /dev/null
+++ b/contrib/llvm/include/llvm/TextAPI/ELF/ELFStub.h
@@ -0,0 +1,69 @@
+//===- ELFStub.h ------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+///
+/// \file
+/// This file defines an internal representation of an ELF stub.
+///
+//===-----------------------------------------------------------------------===/
+
+#ifndef LLVM_TEXTAPI_ELF_ELFSTUB_H
+#define LLVM_TEXTAPI_ELF_ELFSTUB_H
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/VersionTuple.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+namespace elfabi {
+
+typedef uint16_t ELFArch;
+
+enum class ELFSymbolType {
+  NoType = ELF::STT_NOTYPE,
+  Object = ELF::STT_OBJECT,
+  Func = ELF::STT_FUNC,
+  TLS = ELF::STT_TLS,
+
+  // Type information is 4 bits, so 16 is safely out of range.
+  Unknown = 16,
+};
+
+struct ELFSymbol {
+  ELFSymbol(std::string SymbolName) : Name(SymbolName) {}
+  std::string Name;
+  uint64_t Size;
+  ELFSymbolType Type;
+  bool Undefined;
+  bool Weak;
+  Optional<std::string> Warning;
+  bool operator<(const ELFSymbol &RHS) const {
+    return Name < RHS.Name;
+  }
+};
+
+// A cumulative representation of ELF stubs.
+// Both textual and binary stubs will read into and write from this object.
+class ELFStub {
+// TODO: Add support for symbol versioning.
+public:
+  VersionTuple TbeVersion;
+  Optional<std::string> SoName;
+  ELFArch Arch;
+  std::vector<std::string> NeededLibs;
+  std::set<ELFSymbol> Symbols;
+
+  ELFStub() {}
+  ELFStub(const ELFStub &Stub);
+  ELFStub(ELFStub &&Stub);
+};
+} // end namespace elfabi
+} // end namespace llvm
+
+#endif // LLVM_TEXTAPI_ELF_ELFSTUB_H
diff --git a/contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h b/contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h
new file mode 100644
index 000000000000..91521c656fa2
--- /dev/null
+++ b/contrib/llvm/include/llvm/TextAPI/ELF/TBEHandler.h
@@ -0,0 +1,45 @@
+//===- TBEHandler.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+///
+/// \file
+/// This file declares an interface for reading and writing .tbe (text-based
+/// ELF) files.
+///
+//===-----------------------------------------------------------------------===/
+
+#ifndef LLVM_TEXTAPI_ELF_TBEHANDLER_H
+#define LLVM_TEXTAPI_ELF_TBEHANDLER_H
+
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+
+namespace llvm {
+
+class raw_ostream;
+class Error;
+class StringRef;
+class VersionTuple;
+
+namespace elfabi {
+
+class ELFStub;
+
+const VersionTuple TBEVersionCurrent(1, 0);
+
+/// Attempts to read an ELF interface file from a StringRef buffer.
+Expected<std::unique_ptr<ELFStub>> readTBEFromBuffer(StringRef Buf);
+
+/// Attempts to write an ELF interface file to a raw_ostream.
+Error writeTBEToOutputStream(raw_ostream &OS, const ELFStub &Stub);
+
+} // end namespace elfabi
+} // end namespace llvm
+
+#endif // LLVM_TEXTAPI_ELF_TBEHANDLER_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h
index ebc76bf82118..11d363b1200b 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@@ -202,6 +202,11 @@ Pass *createReversePostOrderFunctionAttrsPass();
 ModulePass *createMergeFunctionsPass();
 
 //===----------------------------------------------------------------------===//
+/// createHotColdSplittingPass - This pass outlines cold blocks into a separate
+/// function(s).
+ModulePass *createHotColdSplittingPass();
+
+//===----------------------------------------------------------------------===//
 /// createPartialInliningPass - This pass inlines parts of functions.
 ///
 ModulePass *createPartialInliningPass();
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index dc9f18c79410..901fed7a0fa4 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -32,7 +32,8 @@ class Pass;
 enum MemoryAccessKind {
   MAK_ReadNone = 0,
   MAK_ReadOnly = 1,
-  MAK_MayWrite = 2
+  MAK_MayWrite = 2,
+  MAK_WriteOnly = 3
 };
 
 /// Returns the memory access properties of this copy of the function.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 120a34e15933..c2103b637266 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -37,13 +37,61 @@ public:
   /// containing all the GUIDs of all functions to import for a source module.
   using FunctionsToImportTy = std::unordered_set<GlobalValue::GUID>;
 
+  /// The different reasons selectCallee will chose not to import a
+  /// candidate.
+  enum ImportFailureReason {
+    None,
+    // We can encounter a global variable instead of a function in rare
+    // situations with SamplePGO. See comments where this failure type is
+    // set for more details.
+    GlobalVar,
+    // Found to be globally dead, so we don't bother importing.
+    NotLive,
+    // Instruction count over the current threshold.
+    TooLarge,
+    // Don't import something with interposable linkage as we can't inline it
+    // anyway.
+    InterposableLinkage,
+    // Generally we won't end up failing due to this reason, as we expect
+    // to find at least one summary for the GUID that is global or a local
+    // in the referenced module for direct calls.
+    LocalLinkageNotInModule,
+    // This corresponds to the NotEligibleToImport being set on the summary,
+    // which can happen in a few different cases (e.g. local that can't be
+    // renamed or promoted because it is referenced on a llvm*.used variable).
+    NotEligible,
+    // This corresponds to NoInline being set on the function summary,
+    // which will happen if it is known that the inliner will not be able
+    // to inline the function (e.g. it is marked with a NoInline attribute).
+    NoInline
+  };
+
+  /// Information optionally tracked for candidates the importer decided
+  /// not to import. Used for optional stat printing.
+  struct ImportFailureInfo {
+    // The ValueInfo corresponding to the candidate. We save an index hash
+    // table lookup for each GUID by stashing this here.
+    ValueInfo VI;
+    // The maximum call edge hotness for all failed imports of this candidate.
+    CalleeInfo::HotnessType MaxHotness;
+    // most recent reason for failing to import (doesn't necessarily correspond
+    // to the attempt with the maximum hotness).
+    ImportFailureReason Reason;
+    // The number of times we tried to import candidate but failed.
+    unsigned Attempts;
+    ImportFailureInfo(ValueInfo VI, CalleeInfo::HotnessType MaxHotness,
+                      ImportFailureReason Reason, unsigned Attempts)
+        : VI(VI), MaxHotness(MaxHotness), Reason(Reason), Attempts(Attempts) {}
+  };
+
   /// Map of callee GUID considered for import into a given module to a pair
   /// consisting of the largest threshold applied when deciding whether to
   /// import it and, if we decided to import, a pointer to the summary instance
   /// imported. If we decided not to import, the summary will be nullptr.
   using ImportThresholdsTy =
       DenseMap<GlobalValue::GUID,
-               std::pair<unsigned, const GlobalValueSummary *>>;
+               std::tuple<unsigned, const GlobalValueSummary *,
+                          std::unique_ptr<ImportFailureInfo>>>;
 
   /// The map contains an entry for every module to import from, the key being
   /// the module identifier to pass to the ModuleLoader. The value is the set of
@@ -128,6 +176,14 @@ void computeDeadSymbols(
     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing);
 
+/// Compute dead symbols and run constant propagation in combined index
+/// after that.
+void computeDeadSymbolsWithConstProp(
+    ModuleSummaryIndex &Index,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+    function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
+    bool ImportEnabled);
+
 /// Converts value \p GV to declaration, or replaces with a declaration if
 /// it is an alias. Returns true if converted, false if replaced.
 bool convertToDeclaration(GlobalValue &GV);
@@ -153,10 +209,10 @@ std::error_code EmitImportsFiles(
     StringRef ModulePath, StringRef OutputFilename,
     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
 
-/// Resolve WeakForLinker values in \p TheModule based on the information
+/// Resolve prevailing symbol linkages in \p TheModule based on the information
 /// recorded in the summaries during global summary-based analysis.
-void thinLTOResolveWeakForLinkerModule(Module &TheModule,
-                                       const GVSummaryMapTy &DefinedGlobals);
+void thinLTOResolvePrevailingInModule(Module &TheModule,
+                                      const GVSummaryMapTy &DefinedGlobals);
 
 /// Internalize \p TheModule based on the information recorded in the summaries
 /// during global summary-based analysis.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h b/contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
new file mode 100644
index 000000000000..57e9a9e69187
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -0,0 +1,31 @@
+//===- HotColdSplitting.h ---- Outline Cold Regions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This pass outlines cold regions to a separate function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H
+#define LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+/// Pass to outline cold regions.
+class HotColdSplittingPass : public PassInfoMixin<HotColdSplittingPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_HOTCOLDSPLITTING_H
+
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index cd5a0563898e..af4a933ec1f6 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -25,13 +25,16 @@ class Module;
 /// The sample profiler data loader pass.
 class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
 public:
-  SampleProfileLoaderPass(std::string File = "", bool IsThinLTOPreLink = false)
-      : ProfileFileName(File), IsThinLTOPreLink(IsThinLTOPreLink) {}
+  SampleProfileLoaderPass(std::string File = "", std::string RemappingFile = "",
+                          bool IsThinLTOPreLink = false)
+      : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
+        IsThinLTOPreLink(IsThinLTOPreLink) {}
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
 private:
   std::string ProfileFileName;
+  std::string ProfileRemappingFileName;
   bool IsThinLTOPreLink;
 };
 
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
index 4a346c8d7450..017cab0a7750 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
@@ -24,9 +24,11 @@
 
 namespace llvm {
 
+class Triple;
 class FunctionPass;
 class ModulePass;
 class OptimizationRemarkEmitter;
+class Comdat;
 
 /// Instrumentation passes often insert conditional checks into entry blocks.
 /// Call this function before splitting the entry block to move instructions
@@ -36,6 +38,17 @@ class OptimizationRemarkEmitter;
 BasicBlock::iterator PrepareToSplitEntryBlock(BasicBlock &BB,
                                               BasicBlock::iterator IP);
 
+// Create a constant for Str so that we can pass it to the run-time lib.
+GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
+                                             bool AllowMerging,
+                                             const char *NamePrefix = "");
+
+// Returns F.getComdat() if it exists.
+// Otherwise creates a new comdat, sets F's comdat, and returns it.
+// Returns nullptr on failure.
+Comdat *GetOrCreateFunctionComdat(Function &F, Triple &T,
+                                  const std::string &ModuleId);
+
 // Insert GCOV profiling instrumentation
 struct GCOVOptions {
   static GCOVOptions getDefault();
@@ -64,6 +77,12 @@ struct GCOVOptions {
   // Emit the exit block immediately after the start block, rather than after
   // all of the function body's blocks.
   bool ExitBlockBeforeBody;
+
+  // Regexes separated by a semi-colon to filter the files to instrument.
+  std::string Filter;
+
+  // Regexes separated by a semi-colon to filter the files to not instrument.
+  std::string Exclude;
 };
 
 ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
@@ -111,6 +130,9 @@ struct InstrProfOptions {
   // Do counter register promotion
   bool DoCounterPromotion = false;
 
+  // Use atomic profile counter increments.
+  bool Atomic = false;
+
   // Name of the profile file to use as output
   std::string InstrProfileOutput;
 
@@ -127,18 +149,12 @@ FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
                                                  bool UseAfterScope = false);
 ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
                                              bool Recover = false,
-                                             bool UseGlobalsGC = true);
-
-// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
-FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0,
-                                        bool Recover = false);
+                                             bool UseGlobalsGC = true,
+                                             bool UseOdrIndicator = true);
 
 FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false,
                                            bool Recover = false);
 
-// Insert ThreadSanitizer (race detection) instrumentation
-FunctionPass *createThreadSanitizerPass();
-
 // Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation
 ModulePass *createDataFlowSanitizerPass(
     const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
@@ -206,7 +222,6 @@ static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
   assert(Scaled <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
   return Scaled;
 }
-
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_INSTRUMENTATION_H
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
new file mode 100644
index 000000000000..460342d1631b
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
@@ -0,0 +1,31 @@
+//===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ControlHeightReductionPass :
+      public PassInfoMixin<ControlHeightReductionPass> {
+public:
+  ControlHeightReductionPass();
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
new file mode 100644
index 000000000000..54f0e2f78230
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -0,0 +1,48 @@
+//===- Transforms/Instrumentation/MemorySanitizer.h - MSan Pass -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the memoy sanitizer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
+FunctionPass *createMemorySanitizerLegacyPassPass(int TrackOrigins = 0,
+                                        bool Recover = false,
+                                        bool EnableKmsan = false);
+
+/// A function pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+/// used.
+struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
+  MemorySanitizerPass(int TrackOrigins = 0, bool Recover = false,
+                      bool EnableKmsan = false)
+      : TrackOrigins(TrackOrigins), Recover(Recover), EnableKmsan(EnableKmsan) {
+  }
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+
+private:
+  int TrackOrigins;
+  bool Recover;
+  bool EnableKmsan;
+};
+}
+
+#endif /* LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index c0b37c470b74..fdc5df68a669 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -36,12 +36,14 @@ public:
 /// The profile annotation (profile-instr-use) pass for IR based PGO.
 class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
 public:
-  PGOInstrumentationUse(std::string Filename = "");
+  PGOInstrumentationUse(std::string Filename = "",
+                        std::string RemappingFilename = "");
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
 private:
   std::string ProfileFileName;
+  std::string ProfileRemappingFileName;
 };
 
 /// The indirect function call promotion pass.
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
new file mode 100644
index 000000000000..701e2e6ec89e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -0,0 +1,33 @@
+//===- Transforms/Instrumentation/MemorySanitizer.h - TSan Pass -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the thread sanitizer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+// Insert ThreadSanitizer (race detection) instrumentation
+FunctionPass *createThreadSanitizerLegacyPassPass();
+
+/// A function pass for tsan instrumentation.
+///
+/// Instruments functions to detect race conditions reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // namespace llvm
+#endif /* LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h
index 9491e1bbac93..8fcf9296ba47 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar.h
@@ -26,7 +26,6 @@ class ModulePass;
 class Pass;
 class GetElementPtrInst;
 class PassInfo;
-class TerminatorInst;
 class TargetLowering;
 class TargetMachine;
 
@@ -184,11 +183,12 @@ Pass *createLoopInstSimplifyPass();
 //
 // LoopUnroll - This pass is a simple loop unrolling pass.
 //
-Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1,
+Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+                           int Threshold = -1, int Count = -1,
                            int AllowPartial = -1, int Runtime = -1,
                            int UpperBound = -1, int AllowPeeling = -1);
 // Create an unrolling pass for full unrolling that uses exact trip count only.
-Pass *createSimpleLoopUnrollPass(int OptLevel = 2);
+Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false);
 
 //===----------------------------------------------------------------------===//
 //
@@ -394,12 +394,6 @@ FunctionPass *createPartiallyInlineLibCallsPass();
 
 //===----------------------------------------------------------------------===//
 //
-// ScalarizerPass - Converts vector operations into scalar operations
-//
-FunctionPass *createScalarizerPass();
-
-//===----------------------------------------------------------------------===//
-//
 // SeparateConstOffsetFromGEP - Split GEPs for better CSE
 //
 FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false);
@@ -477,6 +471,7 @@ FunctionPass *createLoopDataPrefetchPass();
 
 ///===---------------------------------------------------------------------===//
 ModulePass *createNameAnonGlobalPass();
+ModulePass *createCanonicalizeAliasesPass();
 
 //===----------------------------------------------------------------------===//
 //
@@ -491,6 +486,13 @@ FunctionPass *createLibCallsShrinkWrapPass();
 // primarily to help other loop passes.
 //
 Pass *createLoopSimplifyCFGPass();
+
+//===----------------------------------------------------------------------===//
+//
+// WarnMissedTransformations - This pass emits warnings for leftover forced
+// transformations.
+//
+Pass *createWarnMissedTransformationsPass();
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
index 84589bf4db99..ba32e122fa10 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -38,6 +38,7 @@
 #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/PassManager.h"
@@ -50,8 +51,10 @@ class BasicBlock;
 class BlockFrequencyInfo;
 class Constant;
 class ConstantInt;
+class ConstantExpr;
 class DominatorTree;
 class Function;
+class GlobalVariable;
 class Instruction;
 class TargetTransformInfo;
 
@@ -74,10 +77,15 @@ using ConstantUseListType = SmallVector<ConstantUser, 8>;
 /// Keeps track of a constant candidate and its uses.
 struct ConstantCandidate {
   ConstantUseListType Uses;
+  // If the candidate is a ConstantExpr (currely only constant GEP expressions
+  // whose base pointers are GlobalVariables are supported), ConstInt records
+  // its offset from the base GV, ConstExpr tracks the candidate GEP expr.
   ConstantInt *ConstInt;
+  ConstantExpr *ConstExpr;
   unsigned CumulativeCost = 0;
 
-  ConstantCandidate(ConstantInt *ConstInt) : ConstInt(ConstInt) {}
+  ConstantCandidate(ConstantInt *ConstInt, ConstantExpr *ConstExpr=nullptr) :
+      ConstInt(ConstInt), ConstExpr(ConstExpr) {}
 
   /// Add the user to the use list and update the cost.
   void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) {
@@ -91,16 +99,21 @@ struct ConstantCandidate {
 struct RebasedConstantInfo {
   ConstantUseListType Uses;
   Constant *Offset;
+  Type *Ty;
 
-  RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
-    : Uses(std::move(Uses)), Offset(Offset) {}
+  RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset,
+      Type *Ty=nullptr) : Uses(std::move(Uses)), Offset(Offset), Ty(Ty) {}
 };
 
 using RebasedConstantListType = SmallVector<RebasedConstantInfo, 4>;
 
 /// A base constant and all its rebased constants.
 struct ConstantInfo {
-  ConstantInt *BaseConstant;
+  // If the candidate is a ConstantExpr (currely only constant GEP expressions
+  // whose base pointers are GlobalVariables are supported), ConstInt records
+  // its offset from the base GV, ConstExpr tracks the candidate GEP expr.
+  ConstantInt *BaseInt;
+  ConstantExpr *BaseExpr;
   RebasedConstantListType RebasedConstants;
 };
 
@@ -115,29 +128,43 @@ public:
                BlockFrequencyInfo *BFI, BasicBlock &Entry);
 
   void releaseMemory() {
-    ConstantVec.clear();
     ClonedCastMap.clear();
-    ConstCandVec.clear();
+    ConstIntCandVec.clear();
+    for (auto MapEntry : ConstGEPCandMap)
+      MapEntry.second.clear();
+    ConstGEPCandMap.clear();
+    ConstIntInfoVec.clear();
+    for (auto MapEntry : ConstGEPInfoMap)
+      MapEntry.second.clear();
+    ConstGEPInfoMap.clear();
   }
 
 private:
-  using ConstCandMapType = DenseMap<ConstantInt *, unsigned>;
-  using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
+  using ConstPtrUnionType = PointerUnion<ConstantInt *, ConstantExpr *>;
+  using ConstCandMapType = DenseMap<ConstPtrUnionType, unsigned>;
 
   const TargetTransformInfo *TTI;
   DominatorTree *DT;
   BlockFrequencyInfo *BFI;
+  LLVMContext *Ctx;
+  const DataLayout *DL;
   BasicBlock *Entry;
 
   /// Keeps track of constant candidates found in the function.
-  ConstCandVecType ConstCandVec;
+  using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
+  using GVCandVecMapType = DenseMap<GlobalVariable *, ConstCandVecType>;
+  ConstCandVecType ConstIntCandVec;
+  GVCandVecMapType ConstGEPCandMap;
+
+  /// These are the final constants we decided to hoist.
+  using ConstInfoVecType = SmallVector<consthoist::ConstantInfo, 8>;
+  using GVInfoVecMapType = DenseMap<GlobalVariable *, ConstInfoVecType>;
+  ConstInfoVecType ConstIntInfoVec;
+  GVInfoVecMapType ConstGEPInfoMap;
 
   /// Keep track of cast instructions we already cloned.
   SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
 
-  /// These are the final constants we decided to hoist.
-  SmallVector<consthoist::ConstantInfo, 8> ConstantVec;
-
   Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
   SmallPtrSet<Instruction *, 8>
   findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const;
@@ -145,19 +172,27 @@ private:
                                  Instruction *Inst, unsigned Idx,
                                  ConstantInt *ConstInt);
   void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+                                 Instruction *Inst, unsigned Idx,
+                                 ConstantExpr *ConstExpr);
+  void collectConstantCandidates(ConstCandMapType &ConstCandMap,
                                  Instruction *Inst, unsigned Idx);
   void collectConstantCandidates(ConstCandMapType &ConstCandMap,
                                  Instruction *Inst);
   void collectConstantCandidates(Function &Fn);
   void findAndMakeBaseConstant(ConstCandVecType::iterator S,
-                               ConstCandVecType::iterator E);
+                               ConstCandVecType::iterator E,
+      SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec);
   unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
                                     ConstCandVecType::iterator E,
                                     ConstCandVecType::iterator &MaxCostItr);
-  void findBaseConstants();
-  void emitBaseConstants(Instruction *Base, Constant *Offset,
+  // If BaseGV is nullptr, find base among Constant Integer candidates;
+  // otherwise find base among constant GEPs sharing BaseGV as base pointer.
+  void findBaseConstants(GlobalVariable *BaseGV);
+  void emitBaseConstants(Instruction *Base, Constant *Offset, Type *Ty,
                          const consthoist::ConstantUser &ConstUser);
-  bool emitBaseConstants();
+  // If BaseGV is nullptr, emit Constant Integer base; otherwise emit
+  // constant GEP base.
+  bool emitBaseConstants(GlobalVariable *BaseGV);
   void deleteDeadCastInst() const;
   bool optimizeConstants(Function &Fn);
 };
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
index b9de07ec9279..9827678b89f2 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -22,13 +22,14 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
 #include <cstdint>
 #include <utility>
 #include <vector>
@@ -158,11 +159,8 @@ private:
   AssumptionCache *AC;
   SetVector<BasicBlock *> DeadBlocks;
   OptimizationRemarkEmitter *ORE;
-  // Maps a block to the topmost instruction with implicit control flow in it.
-  DenseMap<const BasicBlock *, const Instruction *>
-      FirstImplicitControlFlowInsts;
+  ImplicitControlFlowTracking *ICF;
 
-  OrderedInstructions *OI;
   ValueTable VN;
 
   /// A mapping from value numbers to lists of Value*'s that
@@ -183,7 +181,12 @@ private:
 
   // Map the block to reversed postorder traversal number. It is used to
   // find back edge easily.
-  DenseMap<const BasicBlock *, uint32_t> BlockRPONumber;
+  DenseMap<AssertingVH<BasicBlock>, uint32_t> BlockRPONumber;
+
+  // This is set 'true' initially and also when new blocks have been added to
+  // the function being analyzed. This boolean is used to control the updating
+  // of BlockRPONumber prior to accessing the contents of BlockRPONumber.
+  bool InvalidBlockRPONumbers = true;
 
   using LoadDepVect = SmallVector<NonLocalDepResult, 64>;
   using AvailValInBlkVect = SmallVector<gvn::AvailableValueInBlock, 64>;
@@ -240,7 +243,7 @@ private:
   }
 
   // List of critical edges to be split between iterations.
-  SmallVector<std::pair<TerminatorInst *, unsigned>, 4> toSplit;
+  SmallVector<std::pair<Instruction *, unsigned>, 4> toSplit;
 
   // Helper functions of redundant load elimination
   bool processLoad(LoadInst *L);
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index b3493a292498..9894345645a1 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/ValueHandle.h"
 #include <memory>
 #include <utility>
@@ -34,7 +35,7 @@ class BinaryOperator;
 class BranchInst;
 class CmpInst;
 class Constant;
-class DeferredDominance;
+class DomTreeUpdater;
 class Function;
 class Instruction;
 class IntrinsicInst;
@@ -78,7 +79,7 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
   TargetLibraryInfo *TLI;
   LazyValueInfo *LVI;
   AliasAnalysis *AA;
-  DeferredDominance *DDT;
+  DomTreeUpdater *DTU;
   std::unique_ptr<BlockFrequencyInfo> BFI;
   std::unique_ptr<BranchProbabilityInfo> BPI;
   bool HasProfileData = false;
@@ -88,29 +89,16 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
 #else
   SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
 #endif
-  DenseSet<std::pair<Value *, BasicBlock *>> RecursionSet;
 
   unsigned BBDupThreshold;
 
-  // RAII helper for updating the recursion stack.
-  struct RecursionSetRemover {
-    DenseSet<std::pair<Value *, BasicBlock *>> &TheSet;
-    std::pair<Value *, BasicBlock *> ThePair;
-
-    RecursionSetRemover(DenseSet<std::pair<Value *, BasicBlock *>> &S,
-                        std::pair<Value *, BasicBlock *> P)
-        : TheSet(S), ThePair(P) {}
-
-    ~RecursionSetRemover() { TheSet.erase(ThePair); }
-  };
-
 public:
   JumpThreadingPass(int T = -1);
 
   // Glue for old PM.
   bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_,
-               AliasAnalysis *AA_, DeferredDominance *DDT_,
-               bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_,
+               AliasAnalysis *AA_, DomTreeUpdater *DTU_, bool HasProfileData_,
+               std::unique_ptr<BlockFrequencyInfo> BFI_,
                std::unique_ptr<BranchProbabilityInfo> BPI_);
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -127,11 +115,21 @@ public:
   bool DuplicateCondBranchOnPHIIntoPred(
       BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs);
 
+  bool ComputeValueKnownInPredecessorsImpl(
+      Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result,
+      jumpthreading::ConstantPreference Preference,
+      DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet,
+      Instruction *CxtI = nullptr);
   bool
   ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
                                   jumpthreading::PredValueInfo &Result,
                                   jumpthreading::ConstantPreference Preference,
-                                  Instruction *CxtI = nullptr);
+                                  Instruction *CxtI = nullptr) {
+    DenseSet<std::pair<Value *, BasicBlock *>> RecursionSet;
+    return ComputeValueKnownInPredecessorsImpl(V, BB, Result, Preference,
+                                               RecursionSet, CxtI);
+  }
+
   bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
                               jumpthreading::ConstantPreference Preference,
                               Instruction *CxtI = nullptr);
@@ -141,7 +139,11 @@ public:
   bool ProcessImpliedCondition(BasicBlock *BB);
 
   bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+  void UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI,
+                         PHINode *SIUse, unsigned Idx);
+
   bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+  bool TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB);
   bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
 
   bool ProcessGuards(BasicBlock *BB);
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 5f61c39b5530..46ebb74c413c 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -276,7 +276,15 @@ public:
     // pass pipeline to put loops into their canonical form. Note that we can
     // directly build up function analyses after this as the function pass
     // manager handles all the invalidation at that layer.
-    PreservedAnalyses PA = LoopCanonicalizationFPM.run(F, AM);
+    PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F);
+
+    PreservedAnalyses PA = PreservedAnalyses::all();
+    // Check the PassInstrumentation's BeforePass callbacks before running the
+    // canonicalization pipeline.
+    if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) {
+      PA = LoopCanonicalizationFPM.run(F, AM);
+      PI.runAfterPass<Function>(LoopCanonicalizationFPM, F);
+    }
 
     // Get the loop structure for this function
     LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
@@ -337,8 +345,19 @@ public:
       assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) &&
              "Loops must remain in LCSSA form!");
 #endif
-
+      // Check the PassInstrumentation's BeforePass callbacks before running the
+      // pass, skip its execution completely if asked to (callback returns
+      // false).
+      if (!PI.runBeforePass<Loop>(Pass, *L))
+        continue;
       PreservedAnalyses PassPA = Pass.run(*L, LAM, LAR, Updater);
+
+      // Do not pass deleted Loop into the instrumentation.
+      if (Updater.skipCurrentLoop())
+        PI.runAfterPassInvalidated<Loop>(Pass);
+      else
+        PI.runAfterPass<Loop>(Pass, *L);
+
       // FIXME: We should verify the set of analyses relevant to Loop passes
       // are preserved.
 
@@ -364,8 +383,8 @@ public:
     PA.preserve<DominatorTreeAnalysis>();
     PA.preserve<LoopAnalysis>();
     PA.preserve<ScalarEvolutionAnalysis>();
-    // FIXME: Uncomment this when all loop passes preserve MemorySSA
-    // PA.preserve<MemorySSAAnalysis>();
+    if (EnableMSSALoopDependency)
+      PA.preserve<MemorySSAAnalysis>();
     // FIXME: What we really want to do here is preserve an AA category, but
     // that concept doesn't exist yet.
     PA.preserve<AAManager>();
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 9848e0d54f2b..e38e983cc9eb 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
 #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/IR/PassManager.h"
 
@@ -23,23 +24,90 @@ class LPMUpdater;
 class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
   const int OptLevel;
 
+  /// If false, use a cost model to determine whether unrolling of a loop is
+  /// profitable. If true, only loops that explicitly request unrolling via
+  /// metadata are considered. All other loops are skipped.
+  const bool OnlyWhenForced;
+
 public:
-  explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
+  explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false)
+      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
 
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
 
+/// A set of parameters used to control various transforms performed by the
+/// LoopUnroll pass. Each of the boolean parameters can be set to:
+///      true - enabling the transformation.
+///      false - disabling the transformation.
+///      None - relying on a global default.
+///
+/// There is also OptLevel parameter, which is used for additional loop unroll
+/// tuning.
+///
+/// Intended use is to create a default object, modify parameters with
+/// additional setters and then pass it to LoopUnrollPass.
+///
+struct LoopUnrollOptions {
+  Optional<bool> AllowPartial;
+  Optional<bool> AllowPeeling;
+  Optional<bool> AllowRuntime;
+  Optional<bool> AllowUpperBound;
+  int OptLevel;
+
+  /// If false, use a cost model to determine whether unrolling of a loop is
+  /// profitable. If true, only loops that explicitly request unrolling via
+  /// metadata are considered. All other loops are skipped.
+  bool OnlyWhenForced;
+
+  LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false)
+      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+
+  /// Enables or disables partial unrolling. When disabled only full unrolling
+  /// is allowed.
+  LoopUnrollOptions &setPartial(bool Partial) {
+    AllowPartial = Partial;
+    return *this;
+  }
+
+  /// Enables or disables unrolling of loops with runtime trip count.
+  LoopUnrollOptions &setRuntime(bool Runtime) {
+    AllowRuntime = Runtime;
+    return *this;
+  }
+
+  /// Enables or disables loop peeling.
+  LoopUnrollOptions &setPeeling(bool Peeling) {
+    AllowPeeling = Peeling;
+    return *this;
+  }
+
+  /// Enables or disables the use of trip count upper bound
+  /// in loop unrolling.
+  LoopUnrollOptions &setUpperBound(bool UpperBound) {
+    AllowUpperBound = UpperBound;
+    return *this;
+  }
+
+  // Sets "optimization level" tuning parameter for loop unrolling.
+  LoopUnrollOptions &setOptLevel(int O) {
+    OptLevel = O;
+    return *this;
+  }
+};
+
 /// Loop unroll pass that will support both full and partial unrolling.
 /// It is a function pass to have access to function and module analyses.
 /// It will also put loops into canonical form (simplified and LCSSA).
 class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
-  const int OptLevel;
+  LoopUnrollOptions UnrollOpts;
 
 public:
   /// This uses the target information (or flags) to control the thresholds for
   /// different unrolling stategies but supports all of them.
-  explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
+  explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {})
+      : UnrollOpts(UnrollOpts) {}
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h b/contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
new file mode 100644
index 000000000000..41b4aada2baa
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
@@ -0,0 +1,47 @@
+//===-- MakeGuardsExplicit.h - Turn guard intrinsics into guard branches --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the @llvm.experimental.guard intrinsic to the new form of
+// guard represented as widenable explicit branch to the deopt block. The
+// difference between this pass and LowerGuardIntrinsic is that after this pass
+// the guard represented as intrinsic:
+//
+//   call void(i1, ...) @llvm.experimental.guard(i1 %old_cond) [ "deopt"() ]
+//
+// transforms to a guard represented as widenable explicit branch:
+//
+//   %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+//   br i1 (%old_cond & %widenable_cond), label %guarded, label %deopt
+//
+// Here:
+//   - The semantics of @llvm.experimental.widenable.condition allows to replace
+//     %widenable_cond with the construction (%widenable_cond & %any_other_cond)
+//     without loss of correctness;
+//   - %guarded is the lower part of old guard intrinsic's parent block split by
+//     the intrinsic call;
+//   - %deopt is a block containing a sole call to @llvm.experimental.deoptimize
+//     intrinsic.
+//
+// Therefore, this branch preserves the property of widenability.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_MAKEGUARDSEXPLICIT_H
+#define LLVM_TRANSFORMS_SCALAR_MAKEGUARDSEXPLICIT_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct MakeGuardsExplicitPass : public PassInfoMixin<MakeGuardsExplicitPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif //LLVM_TRANSFORMS_SCALAR_MAKEGUARDSEXPLICIT_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h b/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
index 2a294c95a17b..0abbb32fde6a 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
@@ -21,15 +21,17 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_SCCP_H
 #define LLVM_TRANSFORMS_SCALAR_SCCP_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
 
 namespace llvm {
 
-class Function;
+class PostDominatorTree;
 
 /// This pass performs function-level constant propagation and merging.
 class SCCPPass : public PassInfoMixin<SCCPPass> {
@@ -37,7 +39,15 @@ public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
-bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI);
+/// Helper struct for bundling up the analysis results per function for IPSCCP.
+struct AnalysisResultsForFn {
+  std::unique_ptr<PredicateInfo> PredInfo;
+  DominatorTree *DT;
+  PostDominatorTree *PDT;
+};
+
+bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI,
+               function_ref<AnalysisResultsForFn(Function &)> getAnalysis);
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_SCALAR_SCCP_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
new file mode 100644
index 000000000000..1a0b9a2b638c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -0,0 +1,35 @@
+//===- Scalarizer.h --- Scalarize vector operations -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass converts vector operations into scalar operations, in order
+/// to expose optimization opportunities on the individual scalar operations.
+/// It is mainly intended for targets that do not have vector units, but it
+/// may also be useful for revectorizing code to different vector widths.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
+#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Create a legacy pass manager instance of the Scalarizer pass
+FunctionPass *createScalarizerPass();
+
+}
+
+#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
new file mode 100644
index 000000000000..018b22a932e6
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -0,0 +1,38 @@
+//===- WarnMissedTransforms.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit warnings if forced code transformations have not been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
+#define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+class Loop;
+class LPMUpdater;
+
+// New pass manager boilerplate.
+class WarnMissedTransformationsPass
+    : public PassInfoMixin<WarnMissedTransformationsPass> {
+public:
+  explicit WarnMissedTransformationsPass() {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass manager boilerplate.
+Pass *createWarnMissedTransformationsPass();
+void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils.h b/contrib/llvm/include/llvm/Transforms/Utils.h
index 0d997ce17b83..378552775c77 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils.h
@@ -113,6 +113,13 @@ extern char &LoopSimplifyID;
 /// This function returns a new pass that downgrades the debug info in the
 /// module to line tables only.
 ModulePass *createStripNonLineTableDebugInfoPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ControlHeightReudction - Merges conditional blocks of code and reduces the
+// number of conditional branches in the hot paths based on profiles.
+//
+FunctionPass *createControlHeightReductionLegacyPass();
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 3dfc73b64842..5b16a2c0d0b1 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/InstrTypes.h"
 #include <cassert>
 
@@ -27,19 +28,27 @@ namespace llvm {
 
 class BlockFrequencyInfo;
 class BranchProbabilityInfo;
-class DeferredDominance;
 class DominatorTree;
+class DomTreeUpdater;
 class Function;
 class Instruction;
 class LoopInfo;
 class MDNode;
 class MemoryDependenceResults;
+class MemorySSAUpdater;
 class ReturnInst;
 class TargetLibraryInfo;
 class Value;
 
 /// Delete the specified block, which must have no predecessors.
-void DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT = nullptr);
+void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
+
+/// Delete the specified blocks from \p BB. The set of deleted blocks must have
+/// no predecessors that are not being deleted themselves. \p BBs must have no
+/// duplicating blocks. If there are loops among this set of blocks, all
+/// relevant loop info updates should be done before this function is called.
+void DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
+                      DomTreeUpdater *DTU = nullptr);
 
 /// We know that BB has one predecessor. If there are any single-entry PHI nodes
 /// in it, fold them away. This handles the case when all entries to the PHI
@@ -56,10 +65,10 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr);
 
 /// Attempts to merge a block into its predecessor, if possible. The return
 /// value indicates success or failure.
-bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT = nullptr,
+bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
                                LoopInfo *LI = nullptr,
-                               MemoryDependenceResults *MemDep = nullptr,
-                               DeferredDominance *DDT = nullptr);
+                               MemorySSAUpdater *MSSAU = nullptr,
+                               MemoryDependenceResults *MemDep = nullptr);
 
 /// Replace all uses of an instruction (specified by BI) with a value, then
 /// remove and delete the original instruction.
@@ -84,13 +93,15 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
 struct CriticalEdgeSplittingOptions {
   DominatorTree *DT;
   LoopInfo *LI;
+  MemorySSAUpdater *MSSAU;
   bool MergeIdenticalEdges = false;
   bool DontDeleteUselessPHIs = false;
   bool PreserveLCSSA = false;
 
   CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr,
-                               LoopInfo *LI = nullptr)
-      : DT(DT), LI(LI) {}
+                               LoopInfo *LI = nullptr,
+                               MemorySSAUpdater *MSSAU = nullptr)
+      : DT(DT), LI(LI), MSSAU(MSSAU) {}
 
   CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
     MergeIdenticalEdges = true;
@@ -124,7 +135,7 @@ struct CriticalEdgeSplittingOptions {
 /// IndirectBrInst.  Splitting these edges will almost always create an invalid
 /// program because the address of the new block won't be the one that is jumped
 /// to.
-BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+BasicBlock *SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
                               const CriticalEdgeSplittingOptions &Options =
                                   CriticalEdgeSplittingOptions());
 
@@ -144,7 +155,7 @@ inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI,
                               const CriticalEdgeSplittingOptions &Options =
                                   CriticalEdgeSplittingOptions()) {
   bool MadeChange = false;
-  TerminatorInst *TI = (*PI)->getTerminator();
+  Instruction *TI = (*PI)->getTerminator();
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
     if (TI->getSuccessor(i) == Succ)
       MadeChange |= !!SplitCriticalEdge(TI, i, Options);
@@ -158,7 +169,7 @@ inline BasicBlock *
 SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
                   const CriticalEdgeSplittingOptions &Options =
                       CriticalEdgeSplittingOptions()) {
-  TerminatorInst *TI = Src->getTerminator();
+  Instruction *TI = Src->getTerminator();
   unsigned i = 0;
   while (true) {
     assert(i != TI->getNumSuccessors() && "Edge doesn't exist!");
@@ -176,14 +187,16 @@ unsigned SplitAllCriticalEdges(Function &F,
 
 /// Split the edge connecting specified block.
 BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,
-                      DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
+                      DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+                      MemorySSAUpdater *MSSAU = nullptr);
 
 /// Split the specified block at the specified instruction - everything before
 /// SplitPt stays in Old and everything starting with SplitPt moves to a new
 /// block. The two blocks are joined by an unconditional branch and the loop
 /// info is updated.
 BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
-                       DominatorTree *DT = nullptr, LoopInfo *LI = nullptr);
+                       DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+                       MemorySSAUpdater *MSSAU = nullptr);
 
 /// This method introduces at least one new basic block into the function and
 /// moves some of the predecessors of BB to be predecessors of the new block.
@@ -203,6 +216,7 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
                                    const char *Suffix,
                                    DominatorTree *DT = nullptr,
                                    LoopInfo *LI = nullptr,
+                                   MemorySSAUpdater *MSSAU = nullptr,
                                    bool PreserveLCSSA = false);
 
 /// This method transforms the landing pad, OrigBB, by introducing two new basic
@@ -216,20 +230,19 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
 /// no other analyses. In particular, it does not preserve LoopSimplify
 /// (because it's complicated to handle the case where one of the edges being
 /// split is an exit of a loop with other exits).
-void SplitLandingPadPredecessors(BasicBlock *OrigBB,
-                                 ArrayRef<BasicBlock *> Preds,
-                                 const char *Suffix, const char *Suffix2,
-                                 SmallVectorImpl<BasicBlock *> &NewBBs,
-                                 DominatorTree *DT = nullptr,
-                                 LoopInfo *LI = nullptr,
-                                 bool PreserveLCSSA = false);
+void SplitLandingPadPredecessors(
+    BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix,
+    const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs,
+    DominatorTree *DT = nullptr, LoopInfo *LI = nullptr,
+    MemorySSAUpdater *MSSAU = nullptr, bool PreserveLCSSA = false);
 
 /// This method duplicates the specified return instruction into a predecessor
 /// which ends in an unconditional branch. If the return instruction returns a
 /// value defined by a PHI, propagate the right value into the return. It
 /// returns the new return instruction in the predecessor.
 ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
-                                       BasicBlock *Pred);
+                                       BasicBlock *Pred,
+                                       DomTreeUpdater *DTU = nullptr);
 
 /// Split the containing block at the specified instruction - everything before
 /// SplitBefore stays in the old basic block, and the rest of the instructions
@@ -251,11 +264,11 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
 /// Returns the NewBasicBlock's terminator.
 ///
 /// Updates DT and LI if given.
-TerminatorInst *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
-                                          bool Unreachable,
-                                          MDNode *BranchWeights = nullptr,
-                                          DominatorTree *DT = nullptr,
-                                          LoopInfo *LI = nullptr);
+Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
+                                       bool Unreachable,
+                                       MDNode *BranchWeights = nullptr,
+                                       DominatorTree *DT = nullptr,
+                                       LoopInfo *LI = nullptr);
 
 /// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
 /// but also creates the ElseBlock.
@@ -272,8 +285,8 @@ TerminatorInst *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
 ///   SplitBefore
 ///   Tail
 void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
-                                   TerminatorInst **ThenTerm,
-                                   TerminatorInst **ElseTerm,
+                                   Instruction **ThenTerm,
+                                   Instruction **ElseTerm,
                                    MDNode *BranchWeights = nullptr);
 
 /// Check whether BB is the merge point of a if-region.
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index eafe07f49284..28efce6ac3fb 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -37,6 +37,12 @@ namespace llvm {
                        LibFunc DoubleFn, LibFunc FloatFn,
                        LibFunc LongDoubleFn);
 
+  /// Get the name of the overloaded unary floating point function
+  /// corresponding to \a Ty.
+  StringRef getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+                            LibFunc DoubleFn, LibFunc FloatFn,
+                            LibFunc LongDoubleFn);
+
   /// Return V if it is an i8*, otherwise cast it to i8*.
   Value *castToCStr(Value *V, IRBuilder<> &B);
 
@@ -94,6 +100,13 @@ namespace llvm {
   Value *emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
                               const AttributeList &Attrs);
 
+  /// Emit a call to the unary function DoubleFn, FloatFn or LongDoubleFn,
+  /// depending of the type of Op.
+  Value *emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+                              LibFunc DoubleFn, LibFunc FloatFn,
+                              LibFunc LongDoubleFn, IRBuilder<> &B,
+                              const AttributeList &Attrs);
+
   /// Emit a call to the binary function named 'Name' (e.g. 'fmin'). This
   /// function is known to take type matching 'Op1' and 'Op2' and return one
   /// value with the same type. If 'Op1/Op2' are long double, 'l' is added as
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h b/contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
new file mode 100644
index 000000000000..f23263783fec
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
@@ -0,0 +1,32 @@
+//===-- CanonicalizeAliases.h - Alias Canonicalization Pass -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file canonicalizes aliases.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_CANONICALIZE_ALIASES_H
+#define LLVM_TRANSFORMS_UTILS_CANONICALIZE_ALIASES_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Simple pass that canonicalizes aliases.
+class CanonicalizeAliasesPass : public PassInfoMixin<CanonicalizeAliasesPass> {
+public:
+  CanonicalizeAliasesPass() = default;
+
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_CANONICALIZE_ALIASESH
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
index 7531fb2d69b3..f5e997324fc8 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -46,9 +47,9 @@ class LoopInfo;
 class Module;
 class ProfileSummaryInfo;
 class ReturnInst;
+class DomTreeUpdater;
 
 /// Return an exact copy of the specified module
-///
 std::unique_ptr<Module> CloneModule(const Module &M);
 std::unique_ptr<Module> CloneModule(const Module &M, ValueToValueMapTy &VMap);
 
@@ -60,17 +61,15 @@ std::unique_ptr<Module>
 CloneModule(const Module &M, ValueToValueMapTy &VMap,
             function_ref<bool(const GlobalValue *)> ShouldCloneDefinition);
 
-/// ClonedCodeInfo - This struct can be used to capture information about code
+/// This struct can be used to capture information about code
 /// being cloned, while it is being cloned.
 struct ClonedCodeInfo {
-  /// ContainsCalls - This is set to true if the cloned code contains a normal
-  /// call instruction.
+  /// This is set to true if the cloned code contains a normal call instruction.
   bool ContainsCalls = false;
 
-  /// ContainsDynamicAllocas - This is set to true if the cloned code contains
-  /// a 'dynamic' alloca.  Dynamic allocas are allocas that are either not in
-  /// the entry block or they are in the entry block but are not a constant
-  /// size.
+  /// This is set to true if the cloned code contains a 'dynamic' alloca.
+  /// Dynamic allocas are allocas that are either not in the entry block or they
+  /// are in the entry block but are not a constant size.
   bool ContainsDynamicAllocas = false;
 
   /// All cloned call sites that have operand bundles attached are appended to
@@ -81,7 +80,7 @@ struct ClonedCodeInfo {
   ClonedCodeInfo() = default;
 };
 
-/// CloneBasicBlock - Return a copy of the specified basic block, but without
+/// Return a copy of the specified basic block, but without
 /// embedding the block into a particular function.  The block returned is an
 /// exact copy of the specified basic block, without any remapping having been
 /// performed.  Because of this, this is only suitable for applications where
@@ -108,13 +107,12 @@ struct ClonedCodeInfo {
 /// If you would like to collect additional information about the cloned
 /// function, you can specify a ClonedCodeInfo object with the optional fifth
 /// parameter.
-///
 BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
                             const Twine &NameSuffix = "", Function *F = nullptr,
                             ClonedCodeInfo *CodeInfo = nullptr,
                             DebugInfoFinder *DIFinder = nullptr);
 
-/// CloneFunction - Return a copy of the specified function and add it to that
+/// Return a copy of the specified function and add it to that
 /// function's module.  Also, any references specified in the VMap are changed
 /// to refer to their mapped value instead of the original one.  If any of the
 /// arguments to the function are in the VMap, the arguments are deleted from
@@ -153,7 +151,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const char *NameSuffix = "",
                                ClonedCodeInfo *CodeInfo = nullptr);
 
-/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
 /// effect of this is to copy significantly less code in cases where (for
 /// example) a function call with constant arguments is inlined, and those
@@ -171,8 +169,8 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                ClonedCodeInfo *CodeInfo = nullptr,
                                Instruction *TheCall = nullptr);
 
-/// InlineFunctionInfo - This class captures the data input to the
-/// InlineFunction call, and records the auxiliary results produced by it.
+/// This class captures the data input to the InlineFunction call, and records
+/// the auxiliary results produced by it.
 class InlineFunctionInfo {
 public:
   explicit InlineFunctionInfo(CallGraph *cg = nullptr,
@@ -184,19 +182,19 @@ public:
       : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI),
         CallerBFI(CallerBFI), CalleeBFI(CalleeBFI) {}
 
-  /// CG - If non-null, InlineFunction will update the callgraph to reflect the
+  /// If non-null, InlineFunction will update the callgraph to reflect the
   /// changes it makes.
   CallGraph *CG;
   std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
   ProfileSummaryInfo *PSI;
   BlockFrequencyInfo *CallerBFI, *CalleeBFI;
 
-  /// StaticAllocas - InlineFunction fills this in with all static allocas that
-  /// get copied into the caller.
+  /// InlineFunction fills this in with all static allocas that get copied into
+  /// the caller.
   SmallVector<AllocaInst *, 4> StaticAllocas;
 
-  /// InlinedCalls - InlineFunction fills this in with callsites that were
-  /// inlined from the callee.  This is only filled in if CG is non-null.
+  /// InlineFunction fills this in with callsites that were inlined from the
+  /// callee. This is only filled in if CG is non-null.
   SmallVector<WeakTrackingVH, 8> InlinedCalls;
 
   /// All of the new call sites inlined into the caller.
@@ -213,7 +211,7 @@ public:
   }
 };
 
-/// InlineFunction - This function inlines the called function into the basic
+/// This function inlines the called function into the basic
 /// block of the caller.  This returns false if it is not possible to inline
 /// this call.  The program is still in a well defined state if this occurs
 /// though.
@@ -232,13 +230,16 @@ public:
 /// and all varargs at the callsite will be passed to any calls to
 /// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
 /// are only used by ForwardVarArgsTo.
-bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
-                    AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
-bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
-                    AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
-bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
-                    AAResults *CalleeAAR = nullptr, bool InsertLifetime = true,
-                    Function *ForwardVarArgsTo = nullptr);
+InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
+                            AAResults *CalleeAAR = nullptr,
+                            bool InsertLifetime = true);
+InlineResult InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+                            AAResults *CalleeAAR = nullptr,
+                            bool InsertLifetime = true);
+InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+                            AAResults *CalleeAAR = nullptr,
+                            bool InsertLifetime = true,
+                            Function *ForwardVarArgsTo = nullptr);
 
 /// Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p
 /// Blocks.
@@ -262,11 +263,12 @@ void remapInstructionsInBlocks(const SmallVectorImpl<BasicBlock *> &Blocks,
 /// we replace them with the uses of corresponding Phi inputs. ValueMapping
 /// is used to map the original instructions from BB to their newly-created
 /// copies. Returns the split block.
-BasicBlock *
-DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
-                                    Instruction *StopAt,
-                                    ValueToValueMapTy &ValueMapping,
-                                    DominatorTree *DT = nullptr);
+BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
+                                                BasicBlock *PredBB,
+                                                Instruction *StopAt,
+                                                ValueToValueMapTy &ValueMapping,
+                                                DomTreeUpdater &DTU);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 0e5254acb0d3..fee79fdc3bff 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <limits>
 
 namespace llvm {
@@ -26,6 +27,7 @@ class BasicBlock;
 class BlockFrequency;
 class BlockFrequencyInfo;
 class BranchProbabilityInfo;
+class CallInst;
 class DominatorTree;
 class Function;
 class Instruction;
@@ -64,6 +66,11 @@ class Value;
     unsigned NumExitBlocks = std::numeric_limits<unsigned>::max();
     Type *RetTy;
 
+    // Suffix to use when creating extracted function (appended to the original
+    // function name + "."). If empty, the default is to use the entry block
+    // label, if non-empty, otherwise "extracted".
+    std::string Suffix;
+
   public:
     /// Create a code extractor for a sequence of blocks.
     ///
@@ -78,7 +85,8 @@ class Value;
     CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
                   bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
                   BranchProbabilityInfo *BPI = nullptr,
-                  bool AllowVarArgs = false, bool AllowAlloca = false);
+                  bool AllowVarArgs = false, bool AllowAlloca = false,
+                  std::string Suffix = "");
 
     /// Create a code extractor for a loop body.
     ///
@@ -86,7 +94,8 @@ class Value;
     /// block sequence of the loop.
     CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false,
                   BlockFrequencyInfo *BFI = nullptr,
-                  BranchProbabilityInfo *BPI = nullptr);
+                  BranchProbabilityInfo *BPI = nullptr,
+                  std::string Suffix = "");
 
     /// Perform the extraction, returning the new function.
     ///
@@ -139,7 +148,8 @@ class Value;
     BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
 
   private:
-    void severSplitPHINodes(BasicBlock *&Header);
+    void severSplitPHINodesOfEntry(BasicBlock *&Header);
+    void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
     void splitReturnBlocks();
 
     Function *constructFunction(const ValueSet &inputs,
@@ -155,10 +165,9 @@ class Value;
         DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
         BranchProbabilityInfo *BPI);
 
-    void emitCallAndSwitchStatement(Function *newFunction,
-                                    BasicBlock *newHeader,
-                                    ValueSet &inputs,
-                                    ValueSet &outputs);
+    CallInst *emitCallAndSwitchStatement(Function *newFunction,
+                                         BasicBlock *newHeader,
+                                         ValueSet &inputs, ValueSet &outputs);
   };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index b9fbef04cdc3..e24398b90012 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -114,6 +114,9 @@ bool renameModuleForThinLTO(
     Module &M, const ModuleSummaryIndex &Index,
     SetVector<GlobalValue *> *GlobalsToImport = nullptr);
 
+/// Compute synthetic function entry counts.
+void computeSyntheticCounts(ModuleSummaryIndex &Index);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h
new file mode 100644
index 000000000000..537045edafe4
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/GuardUtils.h
@@ -0,0 +1,30 @@
+//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform transformations related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_GUARDUTILS_H
+#define LLVM_TRANSFORMS_UTILS_GUARDUTILS_H
+
+namespace llvm {
+
+class CallInst;
+class Function;
+
+/// Splits control flow at point of \p Guard, replacing it with explicit branch
+/// by the condition of guard's first argument. The taken branch then goes to
+/// the block that contains  \p Guard's successors, and the non-taken branch
+/// goes to a newly-created deopt block that contains a sole call of the
+/// deoptimize function \p DeoptIntrinsic.
+void makeGuardControlFlowExplicit(Function *DeoptIntrinsic, CallInst *Guard);
+
+} // llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_GUARDUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
index b8df32565723..ec8b0eda3641 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
@@ -26,6 +26,7 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Operator.h"
@@ -43,7 +44,7 @@ class AssumptionCache;
 class BasicBlock;
 class BranchInst;
 class CallInst;
-class DbgInfoIntrinsic;
+class DbgVariableIntrinsic;
 class DbgValueInst;
 class DIBuilder;
 class Function;
@@ -51,6 +52,7 @@ class Instruction;
 class LazyValueInfo;
 class LoadInst;
 class MDNode;
+class MemorySSAUpdater;
 class PHINode;
 class StoreInst;
 class TargetLibraryInfo;
@@ -120,7 +122,7 @@ struct SimplifyCFGOptions {
 /// DeleteDeadConditions is true.
 bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false,
                             const TargetLibraryInfo *TLI = nullptr,
-                            DeferredDominance *DDT = nullptr);
+                            DomTreeUpdater *DTU = nullptr);
 
 //===----------------------------------------------------------------------===//
 //  Local dead code elimination.
@@ -140,8 +142,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I,
 /// If the specified value is a trivially dead instruction, delete it.
 /// If that makes any of its operands trivially dead, delete them too,
 /// recursively. Return true if any instructions were deleted.
-bool RecursivelyDeleteTriviallyDeadInstructions(Value *V,
-                                        const TargetLibraryInfo *TLI = nullptr);
+bool RecursivelyDeleteTriviallyDeadInstructions(
+    Value *V, const TargetLibraryInfo *TLI = nullptr,
+    MemorySSAUpdater *MSSAU = nullptr);
 
 /// Delete all of the instructions in `DeadInsts`, and all other instructions
 /// that deleting these in turn causes to be trivially dead.
@@ -153,7 +156,7 @@ bool RecursivelyDeleteTriviallyDeadInstructions(Value *V,
 /// empty afterward.
 void RecursivelyDeleteTriviallyDeadInstructions(
     SmallVectorImpl<Instruction *> &DeadInsts,
-    const TargetLibraryInfo *TLI = nullptr);
+    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
 
 /// If the specified value is an effectively dead PHI node, due to being a
 /// def-use chain of single-use nodes that either forms a cycle or is terminated
@@ -171,6 +174,12 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN,
 bool SimplifyInstructionsInBlock(BasicBlock *BB,
                                  const TargetLibraryInfo *TLI = nullptr);
 
+/// Replace all the uses of an SSA value in @llvm.dbg intrinsics with
+/// undef. This is useful for signaling that a variable, e.g. has been
+/// found dead and hence it's unavailable at a given program point.
+/// Returns true if the dbg values have been changed.
+bool replaceDbgUsesWithUndef(Instruction *I);
+
 //===----------------------------------------------------------------------===//
 //  Control Flow Graph Restructuring.
 //
@@ -187,20 +196,19 @@ bool SimplifyInstructionsInBlock(BasicBlock *BB,
 /// .. and delete the predecessor corresponding to the '1', this will attempt to
 /// recursively fold the 'and' to 0.
 void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
-                                  DeferredDominance *DDT = nullptr);
+                                  DomTreeUpdater *DTU = nullptr);
 
 /// BB is a block with one predecessor and its predecessor is known to have one
 /// successor (BB!). Eliminate the edge between them, moving the instructions in
 /// the predecessor into BB. This deletes the predecessor block.
-void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr,
-                                 DeferredDominance *DDT = nullptr);
+void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
 
 /// BB is known to contain an unconditional branch, and contains no instructions
 /// other than PHI nodes, potential debug intrinsics and the branch. If
 /// possible, eliminate BB by rewriting all the predecessors to branch to the
 /// successor block and return true. If we can't transform, return false.
 bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
-                                             DeferredDominance *DDT = nullptr);
+                                             DomTreeUpdater *DTU = nullptr);
 
 /// Check for and eliminate duplicate PHI nodes in this block. This doesn't try
 /// to be clever about PHI nodes which differ only in the order of the incoming
@@ -270,17 +278,17 @@ inline unsigned getKnownAlignment(Value *V, const DataLayout &DL,
 
 /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
                                      StoreInst *SI, DIBuilder &Builder);
 
 /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
                                      LoadInst *LI, DIBuilder &Builder);
 
 /// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
 /// llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
                                      PHINode *LI, DIBuilder &Builder);
 
 /// Lowers llvm.dbg.declare intrinsics into appropriate set of
@@ -294,13 +302,13 @@ void insertDebugValuesForPHIs(BasicBlock *BB,
 /// Finds all intrinsics declaring local variables as living in the memory that
 /// 'V' points to. This may include a mix of dbg.declare and
 /// dbg.addr intrinsics.
-TinyPtrVector<DbgInfoIntrinsic *> FindDbgAddrUses(Value *V);
+TinyPtrVector<DbgVariableIntrinsic *> FindDbgAddrUses(Value *V);
 
 /// Finds the llvm.dbg.value intrinsics describing a value.
 void findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V);
 
 /// Finds the debug info intrinsics describing a value.
-void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgInsts, Value *V);
+void findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V);
 
 /// Replaces llvm.dbg.declare instruction when the address it
 /// describes is replaced with a new value. If Deref is true, an
@@ -359,7 +367,7 @@ unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
 /// instruction, making it and the rest of the code in the block dead.
 unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap,
                              bool PreserveLCSSA = false,
-                             DeferredDominance *DDT = nullptr);
+                             DomTreeUpdater *DTU = nullptr);
 
 /// Convert the CallInst to InvokeInst with the specified unwind edge basic
 /// block.  This also splits the basic block where CI is located, because
@@ -374,24 +382,36 @@ BasicBlock *changeToInvokeAndSplitBasicBlock(CallInst *CI,
 ///
 /// \param BB  Block whose terminator will be replaced.  Its terminator must
 ///            have an unwind successor.
-void removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT = nullptr);
+void removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
 
 /// Remove all blocks that can not be reached from the function's entry.
 ///
 /// Returns true if any basic block was removed.
 bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr,
-                             DeferredDominance *DDT = nullptr);
+                             DomTreeUpdater *DTU = nullptr,
+                             MemorySSAUpdater *MSSAU = nullptr);
 
-/// Combine the metadata of two instructions so that K can replace J
+/// Combine the metadata of two instructions so that K can replace J. Some
+/// metadata kinds can only be kept if K does not move, meaning it dominated
+/// J in the original IR.
 ///
 /// Metadata not listed as known via KnownIDs is removed
-void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs);
+void combineMetadata(Instruction *K, const Instruction *J,
+                     ArrayRef<unsigned> KnownIDs, bool DoesKMove);
 
 /// Combine the metadata of two instructions so that K can replace J. This
-/// specifically handles the case of CSE-like transformations.
+/// specifically handles the case of CSE-like transformations. Some
+/// metadata can only be kept if K dominates J. For this to be correct,
+/// K cannot be hoisted.
 ///
 /// Unknown metadata is removed.
-void combineMetadataForCSE(Instruction *K, const Instruction *J);
+void combineMetadataForCSE(Instruction *K, const Instruction *J,
+                           bool DoesKMove);
+
+/// Patch the replacement so that it is not more restrictive than the value
+/// being replaced. It assumes that the replacement does not get moved from
+/// its original position.
+void patchReplacementInstruction(Instruction *I, Value *Repl);
 
 // Replace each use of 'From' with 'To', if that use does not belong to basic
 // block where 'From' is defined. Returns the number of replacements made.
@@ -429,6 +449,18 @@ void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
 void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
                        LoadInst &NewLI);
 
+/// Remove the debug intrinsic instructions for the given instruction.
+void dropDebugUsers(Instruction &I);
+
+/// Hoist all of the instructions in the \p IfBlock to the dominant block
+/// \p DomBlock, by moving its instructions to the insertion point \p InsertPt.
+///
+/// The moved instructions receive the insertion point debug location values
+/// (DILocations) and their debug intrinsic instructions (dbg.values) are
+/// removed.
+void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
+                              BasicBlock *BB);
+
 //===----------------------------------------------------------------------===//
 //  Intrinsic pattern matching
 //
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
index 231e5bbb6dee..cd5bc4301018 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -20,6 +20,7 @@ class AssumptionCache;
 class DominatorTree;
 class Loop;
 class LoopInfo;
+class MemorySSAUpdater;
 class ScalarEvolution;
 struct SimplifyQuery;
 class TargetTransformInfo;
@@ -32,8 +33,8 @@ class TargetTransformInfo;
 /// LoopRotation. If it is true, the profitability heuristic will be ignored.
 bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
                   AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE,
-                  const SimplifyQuery &SQ, bool RotationOnly,
-                  unsigned Threshold, bool IsUtilMode);
+                  MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
+                  bool RotationOnly, unsigned Threshold, bool IsUtilMode);
 
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index eb4c99102a63..8c2527b6ae68 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -40,6 +41,7 @@ class BasicBlock;
 class DataLayout;
 class Loop;
 class LoopInfo;
+class MemorySSAUpdater;
 class OptimizationRemarkEmitter;
 class PredicatedScalarEvolution;
 class PredIteratorCache;
@@ -48,318 +50,6 @@ class SCEV;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
-
-/// The RecurrenceDescriptor is used to identify recurrences variables in a
-/// loop. Reduction is a special case of recurrence that has uses of the
-/// recurrence variable outside the loop. The method isReductionPHI identifies
-/// reductions that are basic recurrences.
-///
-/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min,
-/// or max of a set of terms. For example: for(i=0; i<n; i++) { total +=
-/// array[i]; } is a summation of array elements. Basic recurrences are a
-/// special case of chains of recurrences (CR). See ScalarEvolution for CR
-/// references.
-
-/// This struct holds information about recurrence variables.
-class RecurrenceDescriptor {
-public:
-  /// This enum represents the kinds of recurrences that we support.
-  enum RecurrenceKind {
-    RK_NoRecurrence,  ///< Not a recurrence.
-    RK_IntegerAdd,    ///< Sum of integers.
-    RK_IntegerMult,   ///< Product of integers.
-    RK_IntegerOr,     ///< Bitwise or logical OR of numbers.
-    RK_IntegerAnd,    ///< Bitwise or logical AND of numbers.
-    RK_IntegerXor,    ///< Bitwise or logical XOR of numbers.
-    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
-    RK_FloatAdd,      ///< Sum of floats.
-    RK_FloatMult,     ///< Product of floats.
-    RK_FloatMinMax    ///< Min/max implemented in terms of select(cmp()).
-  };
-
-  // This enum represents the kind of minmax recurrence.
-  enum MinMaxRecurrenceKind {
-    MRK_Invalid,
-    MRK_UIntMin,
-    MRK_UIntMax,
-    MRK_SIntMin,
-    MRK_SIntMax,
-    MRK_FloatMin,
-    MRK_FloatMax
-  };
-
-  RecurrenceDescriptor() = default;
-
-  RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
-                       MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
-                       bool Signed, SmallPtrSetImpl<Instruction *> &CI)
-      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
-        UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
-    CastInsts.insert(CI.begin(), CI.end());
-  }
-
-  /// This POD struct holds information about a potential recurrence operation.
-  class InstDesc {
-  public:
-    InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
-          UnsafeAlgebraInst(UAI) {}
-
-    InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
-        : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
-          UnsafeAlgebraInst(UAI) {}
-
-    bool isRecurrence() { return IsRecurrence; }
-
-    bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
-
-    Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
-
-    MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
-
-    Instruction *getPatternInst() { return PatternLastInst; }
-
-  private:
-    // Is this instruction a recurrence candidate.
-    bool IsRecurrence;
-    // The last instruction in a min/max pattern (select of the select(icmp())
-    // pattern), or the current recurrence instruction otherwise.
-    Instruction *PatternLastInst;
-    // If this is a min/max pattern the comparison predicate.
-    MinMaxRecurrenceKind MinMaxKind;
-    // Recurrence has unsafe algebra.
-    Instruction *UnsafeAlgebraInst;
-  };
-
-  /// Returns a struct describing if the instruction 'I' can be a recurrence
-  /// variable of type 'Kind'. If the recurrence is a min/max pattern of
-  /// select(icmp()) this function advances the instruction pointer 'I' from the
-  /// compare instruction to the select instruction and stores this pointer in
-  /// 'PatternLastInst' member of the returned struct.
-  static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
-                                    InstDesc &Prev, bool HasFunNoNaNAttr);
-
-  /// Returns true if instruction I has multiple uses in Insts
-  static bool hasMultipleUsesOf(Instruction *I,
-                                SmallPtrSetImpl<Instruction *> &Insts);
-
-  /// Returns true if all uses of the instruction I is within the Set.
-  static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
-
-  /// Returns a struct describing if the instruction if the instruction is a
-  /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
-  /// or max(X, Y).
-  static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
-
-  /// Returns identity corresponding to the RecurrenceKind.
-  static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp);
-
-  /// Returns the opcode of binary operation corresponding to the
-  /// RecurrenceKind.
-  static unsigned getRecurrenceBinOp(RecurrenceKind Kind);
-
-  /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
-  static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK,
-                               Value *Left, Value *Right);
-
-  /// Returns true if Phi is a reduction of type Kind and adds it to the
-  /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
-  /// non-null, the minimal bit width needed to compute the reduction will be
-  /// computed.
-  static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
-                              bool HasFunNoNaNAttr,
-                              RecurrenceDescriptor &RedDes,
-                              DemandedBits *DB = nullptr,
-                              AssumptionCache *AC = nullptr,
-                              DominatorTree *DT = nullptr);
-
-  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
-  /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
-  /// non-null, the minimal bit width needed to compute the reduction will be
-  /// computed.
-  static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                             RecurrenceDescriptor &RedDes,
-                             DemandedBits *DB = nullptr,
-                             AssumptionCache *AC = nullptr,
-                             DominatorTree *DT = nullptr);
-
-  /// Returns true if Phi is a first-order recurrence. A first-order recurrence
-  /// is a non-reduction recurrence relation in which the value of the
-  /// recurrence in the current loop iteration equals a value defined in the
-  /// previous iteration. \p SinkAfter includes pairs of instructions where the
-  /// first will be rescheduled to appear after the second if/when the loop is
-  /// vectorized. It may be augmented with additional pairs if needed in order
-  /// to handle Phi as a first-order recurrence.
-  static bool
-  isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
-                         DenseMap<Instruction *, Instruction *> &SinkAfter,
-                         DominatorTree *DT);
-
-  RecurrenceKind getRecurrenceKind() { return Kind; }
-
-  MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
-
-  TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
-
-  Instruction *getLoopExitInstr() { return LoopExitInstr; }
-
-  /// Returns true if the recurrence has unsafe algebra which requires a relaxed
-  /// floating-point model.
-  bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
-
-  /// Returns first unsafe algebra instruction in the PHI node's use-chain.
-  Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
-
-  /// Returns true if the recurrence kind is an integer kind.
-  static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
-
-  /// Returns true if the recurrence kind is a floating point kind.
-  static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
-
-  /// Returns true if the recurrence kind is an arithmetic kind.
-  static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
-
-  /// Returns the type of the recurrence. This type can be narrower than the
-  /// actual type of the Phi if the recurrence has been type-promoted.
-  Type *getRecurrenceType() { return RecurrenceType; }
-
-  /// Returns a reference to the instructions used for type-promoting the
-  /// recurrence.
-  SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
-
-  /// Returns true if all source operands of the recurrence are SExtInsts.
-  bool isSigned() { return IsSigned; }
-
-private:
-  // The starting value of the recurrence.
-  // It does not have to be zero!
-  TrackingVH<Value> StartValue;
-  // The instruction who's value is used outside the loop.
-  Instruction *LoopExitInstr = nullptr;
-  // The kind of the recurrence.
-  RecurrenceKind Kind = RK_NoRecurrence;
-  // If this a min/max recurrence the kind of recurrence.
-  MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
-  // First occurrence of unasfe algebra in the PHI's use-chain.
-  Instruction *UnsafeAlgebraInst = nullptr;
-  // The type of the recurrence.
-  Type *RecurrenceType = nullptr;
-  // True if all source operands of the recurrence are SExtInsts.
-  bool IsSigned = false;
-  // Instructions used for type-promoting the recurrence.
-  SmallPtrSet<Instruction *, 8> CastInsts;
-};
-
-/// A struct for saving information about induction variables.
-class InductionDescriptor {
-public:
-  /// This enum represents the kinds of inductions that we support.
-  enum InductionKind {
-    IK_NoInduction,  ///< Not an induction variable.
-    IK_IntInduction, ///< Integer induction variable. Step = C.
-    IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
-    IK_FpInduction   ///< Floating point induction variable.
-  };
-
-public:
-  /// Default constructor - creates an invalid induction.
-  InductionDescriptor() = default;
-
-  /// Get the consecutive direction. Returns:
-  ///   0 - unknown or non-consecutive.
-  ///   1 - consecutive and increasing.
-  ///  -1 - consecutive and decreasing.
-  int getConsecutiveDirection() const;
-
-  /// Compute the transformed value of Index at offset StartValue using step
-  /// StepValue.
-  /// For integer induction, returns StartValue + Index * StepValue.
-  /// For pointer induction, returns StartValue[Index * StepValue].
-  /// FIXME: The newly created binary instructions should contain nsw/nuw
-  /// flags, which can be found from the original scalar operations.
-  Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
-                   const DataLayout& DL) const;
-
-  Value *getStartValue() const { return StartValue; }
-  InductionKind getKind() const { return IK; }
-  const SCEV *getStep() const { return Step; }
-  ConstantInt *getConstIntStepValue() const;
-
-  /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
-  /// induction, the induction descriptor \p D will contain the data describing
-  /// this induction. If by some other means the caller has a better SCEV
-  /// expression for \p Phi than the one returned by the ScalarEvolution
-  /// analysis, it can be passed through \p Expr. If the def-use chain
-  /// associated with the phi includes casts (that we know we can ignore
-  /// under proper runtime checks), they are passed through \p CastsToIgnore.
-  static bool
-  isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
-                 InductionDescriptor &D, const SCEV *Expr = nullptr,
-                 SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
-
-  /// Returns true if \p Phi is a floating point induction in the loop \p L.
-  /// If \p Phi is an induction, the induction descriptor \p D will contain
-  /// the data describing this induction.
-  static bool isFPInductionPHI(PHINode *Phi, const Loop* L,
-                               ScalarEvolution *SE, InductionDescriptor &D);
-
-  /// Returns true if \p Phi is a loop \p L induction, in the context associated
-  /// with the run-time predicate of PSE. If \p Assume is true, this can add
-  /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
-  /// induction.
-  /// If \p Phi is an induction, \p D will contain the data describing this
-  /// induction.
-  static bool isInductionPHI(PHINode *Phi, const Loop* L,
-                             PredicatedScalarEvolution &PSE,
-                             InductionDescriptor &D, bool Assume = false);
-
-  /// Returns true if the induction type is FP and the binary operator does
-  /// not have the "fast-math" property. Such operation requires a relaxed FP
-  /// mode.
-  bool hasUnsafeAlgebra() {
-    return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
-  }
-
-  /// Returns induction operator that does not have "fast-math" property
-  /// and requires FP unsafe mode.
-  Instruction *getUnsafeAlgebraInst() {
-    if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
-      return nullptr;
-    return InductionBinOp;
-  }
-
-  /// Returns binary opcode of the induction operator.
-  Instruction::BinaryOps getInductionOpcode() const {
-    return InductionBinOp ? InductionBinOp->getOpcode() :
-      Instruction::BinaryOpsEnd;
-  }
-
-  /// Returns a reference to the type cast instructions in the induction
-  /// update chain, that are redundant when guarded with a runtime
-  /// SCEV overflow check.
-  const SmallVectorImpl<Instruction *> &getCastInsts() const {
-    return RedundantCasts;
-  }
-
-private:
-  /// Private constructor - used by \c isInductionPHI.
-  InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
-                      BinaryOperator *InductionBinOp = nullptr,
-                      SmallVectorImpl<Instruction *> *Casts = nullptr);
-
-  /// Start value.
-  TrackingVH<Value> StartValue;
-  /// Induction kind.
-  InductionKind IK = IK_NoInduction;
-  /// Step value.
-  const SCEV *Step = nullptr;
-  // Instruction that advances induction variable.
-  BinaryOperator *InductionBinOp = nullptr;
-  // Instructions used for type-casts of the induction variable,
-  // that are redundant when guarded with a runtime SCEV overflow check.
-  SmallVector<Instruction *, 2> RedundantCasts;
-};
-
 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
                                    bool PreserveLCSSA);
 
@@ -420,7 +110,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
-                AliasSetTracker *, LoopSafetyInfo *,
+                AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
                 OptimizationRemarkEmitter *ORE);
 
 /// Walk the specified region of the CFG (defined by all blocks
@@ -433,7 +123,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 /// ORE. It returns changed status.
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
-                 LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
+                 MemorySSAUpdater *, ICFLoopSafetyInfo *,
+                 OptimizationRemarkEmitter *ORE);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@@ -462,7 +153,8 @@ bool promoteLoopAccessesToScalars(const SmallSetVector<Value *, 8> &,
                                   SmallVectorImpl<Instruction *> &,
                                   PredIteratorCache &, LoopInfo *,
                                   DominatorTree *, const TargetLibraryInfo *,
-                                  Loop *, AliasSetTracker *, LoopSafetyInfo *,
+                                  Loop *, AliasSetTracker *,
+                                  ICFLoopSafetyInfo *,
                                   OptimizationRemarkEmitter *);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
@@ -478,9 +170,80 @@ SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
 /// operand or null otherwise.  If the string metadata is not found return
 /// Optional's not-a-value.
-Optional<const MDOperand *> findStringMetadataForLoop(Loop *TheLoop,
+Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
                                                       StringRef Name);
 
+/// Find named metadata for a loop with an integer value.
+llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
+
+/// Create a new loop identifier for a loop created from a loop transformation.
+///
+/// @param OrigLoopID The loop ID of the loop before the transformation.
+/// @param FollowupAttrs List of attribute names that contain attributes to be
+///                      added to the new loop ID.
+/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
+///                                  from the original loop. The following values
+///                                  are considered:
+///        nullptr   : Inherit all attributes from @p OrigLoopID.
+///        ""        : Do not inherit any attribute from @p OrigLoopID; only use
+///                    those specified by a followup attribute.
+///        "<prefix>": Inherit all attributes except those which start with
+///                    <prefix>; commonly used to remove metadata for the
+///                    applied transformation.
+/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
+///                  None.
+///
+/// @return The loop ID for the after-transformation loop. The following values
+///         can be returned:
+///         None         : No followup attribute was found; it is up to the
+///                        transformation to choose attributes that make sense.
+///         @p OrigLoopID: The original identifier can be reused.
+///         nullptr      : The new loop has no attributes.
+///         MDNode*      : A new unique loop identifier.
+Optional<MDNode *>
+makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
+                   const char *InheritOptionsAttrsPrefix = "",
+                   bool AlwaysNew = false);
+
+/// Look for the loop attribute that disables all transformation heuristic.
+bool hasDisableAllTransformsHint(const Loop *L);
+
+/// The mode sets how eager a transformation should be applied.
+enum TransformationMode {
+  /// The pass can use heuristics to determine whether a transformation should
+  /// be applied.
+  TM_Unspecified,
+
+  /// The transformation should be applied without considering a cost model.
+  TM_Enable,
+
+  /// The transformation should not be applied.
+  TM_Disable,
+
+  /// Force is a flag and should not be used alone.
+  TM_Force = 0x04,
+
+  /// The transformation was directed by the user, e.g. by a #pragma in
+  /// the source code. If the transformation could not be applied, a
+  /// warning should be emitted.
+  TM_ForcedByUser = TM_Enable | TM_Force,
+
+  /// The transformation must not be applied. For instance, `#pragma clang loop
+  /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
+  /// general loop metadata, it must not be dropped. Most passes should not
+  /// behave differently under TM_Disable and TM_SuppressedByUser.
+  TM_SuppressedByUser = TM_Disable | TM_Force
+};
+
+/// @{
+/// Get the mode for LLVM's supported loop transformations.
+TransformationMode hasUnrollTransformation(Loop *L);
+TransformationMode hasUnrollAndJamTransformation(Loop *L);
+TransformationMode hasVectorizeTransformation(Loop *L);
+TransformationMode hasDistributeTransformation(Loop *L);
+TransformationMode hasLICMVersioningTransformation(Loop *L);
+/// @}
+
 /// Set input string into loop metadata by keeping other values intact.
 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
                              unsigned V = 0);
@@ -490,6 +253,11 @@ void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
 /// estimate can not be made.
 Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
 
+/// Check inner loop (L) backedge count is known to be invariant on all
+/// iterations of its outer loop. If the loop has no parent, this is trivially
+/// true.
+bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
+
 /// Helper to consistently add the set of standard passes to a loop pass's \c
 /// AnalysisUsage.
 ///
@@ -497,18 +265,25 @@ Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
 /// getAnalysisUsage.
 void getLoopAnalysisUsage(AnalysisUsage &AU);
 
-/// Returns true if the hoister and sinker can handle this instruction.
-/// If SafetyInfo is null, we are checking for sinking instructions from
-/// preheader to loop body (no speculation).
-/// If SafetyInfo is not null, we are checking for hoisting/sinking
-/// instructions from loop body to preheader/exit. Check if the instruction
-/// can execute speculatively.
+/// Returns true if is legal to hoist or sink this instruction disregarding the
+/// possible introduction of faults.  Reasoning about potential faulting
+/// instructions is the responsibility of the caller since it is challenging to
+/// do efficiently from within this routine.
+/// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
+/// target executes at most once per execution of the loop body.  This is used
+/// to assess the legality of duplicating atomic loads.  Generally, this is
+/// true when moving out of loop and not true when moving into loops.
 /// If \p ORE is set use it to emit optimization remarks.
 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                         Loop *CurLoop, AliasSetTracker *CurAST,
-                        LoopSafetyInfo *SafetyInfo,
+                        MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
                         OptimizationRemarkEmitter *ORE = nullptr);
 
+/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
+Value *createMinMaxOp(IRBuilder<> &Builder,
+                      RecurrenceDescriptor::MinMaxRecurrenceKind RK,
+                      Value *Left, Value *Right);
+
 /// Generates an ordered vector reduction using extracts to reduce the value.
 Value *
 getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
@@ -527,12 +302,12 @@ Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
 /// additional information supplied in \p Flags.
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
-Value *
-createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
-                            unsigned Opcode, Value *Src,
-                            TargetTransformInfo::ReductionFlags Flags =
-                                TargetTransformInfo::ReductionFlags(),
-                            ArrayRef<Value *> RedOps = None);
+Value *createSimpleTargetReduction(IRBuilder<> &B,
+                                   const TargetTransformInfo *TTI,
+                                   unsigned Opcode, Value *Src,
+                                   TargetTransformInfo::ReductionFlags Flags =
+                                       TargetTransformInfo::ReductionFlags(),
+                                   ArrayRef<Value *> RedOps = None);
 
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
@@ -548,6 +323,23 @@ Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
 /// Flag set: NSW, NUW, exact, and all of fast-math.
 void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
 
+/// Returns true if we can prove that \p S is defined and always negative in
+/// loop \p L.
+bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
+
+/// Returns true if we can prove that \p S is defined and always non-negative in
+/// loop \p L.
+bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
+                              ScalarEvolution &SE);
+
+/// Returns true if \p S is defined and never is equal to signed/unsigned max.
+bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+                       bool Signed);
+
+/// Returns true if \p S is defined and never is equal to signed/unsigned min.
+bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+                       bool Signed);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 14615c25d093..fee492be2a90 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -58,6 +58,24 @@ std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
     StringRef VersionCheckName = StringRef());
 
+/// Creates sanitizer constructor function lazily. If a constructor and init
+/// function already exist, this function returns it. Otherwise it calls \c
+/// createSanitizerCtorAndInitFunctions. The FunctionsCreatedCallback is invoked
+/// in that case, passing the new Ctor and Init function.
+///
+/// \return Returns pair of pointers to constructor, and init functions
+/// respectively.
+std::pair<Function *, Function *> getOrCreateSanitizerCtorAndInitFunctions(
+    Module &M, StringRef CtorName, StringRef InitName,
+    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+    function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+    StringRef VersionCheckName = StringRef());
+
+// Creates and returns a sanitizer init function without argument if it doesn't
+// exist, and adds it to the global constructors list. Otherwise it returns the
+// existing function.
+Function *getOrCreateInitFunction(Module &M, StringRef Name);
+
 /// Rename all the anon globals in the module using a hash computed from
 /// the list of public globals in the module.
 bool nameUnamedGlobals(Module &M);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index b53eda7e5a42..2fc38089f3f1 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -60,6 +60,7 @@
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/OrderedInstructions.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -76,7 +77,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index d007f909c6a4..025bcd44e310 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -77,21 +77,34 @@ private:
   OptimizationRemarkEmitter &ORE;
   bool UnsafeFPShrink;
   function_ref<void(Instruction *, Value *)> Replacer;
+  function_ref<void(Instruction *)> Eraser;
 
   /// Internal wrapper for RAUW that is the default implementation.
   ///
   /// Other users may provide an alternate function with this signature instead
   /// of this one.
-  static void replaceAllUsesWithDefault(Instruction *I, Value *With);
+  static void replaceAllUsesWithDefault(Instruction *I, Value *With) {
+    I->replaceAllUsesWith(With);
+  }
+
+  /// Internal wrapper for eraseFromParent that is the default implementation.
+  static void eraseFromParentDefault(Instruction *I) { I->eraseFromParent(); }
 
   /// Replace an instruction's uses with a value using our replacer.
   void replaceAllUsesWith(Instruction *I, Value *With);
 
+  /// Erase an instruction from its parent with our eraser.
+  void eraseFromParent(Instruction *I);
+
+  Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B);
+
 public:
-  LibCallSimplifier(const DataLayout &DL, const TargetLibraryInfo *TLI,
-                    OptimizationRemarkEmitter &ORE,
-                    function_ref<void(Instruction *, Value *)> Replacer =
-                        &replaceAllUsesWithDefault);
+  LibCallSimplifier(
+      const DataLayout &DL, const TargetLibraryInfo *TLI,
+      OptimizationRemarkEmitter &ORE,
+      function_ref<void(Instruction *, Value *)> Replacer =
+          &replaceAllUsesWithDefault,
+      function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
 
   /// optimizeCall - Take the given call instruction and return a more
   /// optimal value to replace the instruction with or 0 if a more
@@ -131,8 +144,8 @@ private:
 
   // Math Library Optimizations
   Value *optimizeCAbs(CallInst *CI, IRBuilder<> &B);
-  Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
   Value *optimizePow(CallInst *CI, IRBuilder<> &B);
+  Value *replacePowWithExp(CallInst *Pow, IRBuilder<> &B);
   Value *replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B);
   Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
   Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index a6b84af068a5..70e936d75008 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -35,6 +35,15 @@ class ScalarEvolution;
 
 using NewLoopsMap = SmallDenseMap<const Loop *, Loop *, 4>;
 
+/// @{
+/// Metadata attribute names
+const char *const LLVMLoopUnrollFollowupAll = "llvm.loop.unroll.followup_all";
+const char *const LLVMLoopUnrollFollowupUnrolled =
+    "llvm.loop.unroll.followup_unrolled";
+const char *const LLVMLoopUnrollFollowupRemainder =
+    "llvm.loop.unroll.followup_remainder";
+/// @}
+
 const Loop* addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
                                      BasicBlock *ClonedBB, LoopInfo *LI,
                                      NewLoopsMap &NewLoops);
@@ -61,15 +70,16 @@ LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                             unsigned PeelCount, bool UnrollRemainder,
                             LoopInfo *LI, ScalarEvolution *SE,
                             DominatorTree *DT, AssumptionCache *AC,
-                            OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
+                            OptimizationRemarkEmitter *ORE, bool PreserveLCSSA,
+                            Loop **RemainderLoop = nullptr);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 bool AllowExpensiveTripCount,
                                 bool UseEpilogRemainder, bool UnrollRemainder,
-                                LoopInfo *LI,
-                                ScalarEvolution *SE, DominatorTree *DT,
-                                AssumptionCache *AC,
-                                bool PreserveLCSSA);
+                                LoopInfo *LI, ScalarEvolution *SE,
+                                DominatorTree *DT, AssumptionCache *AC,
+                                bool PreserveLCSSA,
+                                Loop **ResultLoop = nullptr);
 
 void computePeelCount(Loop *L, unsigned LoopSize,
                       TargetTransformInfo::UnrollingPreferences &UP,
@@ -84,7 +94,8 @@ LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
                                   unsigned TripMultiple, bool UnrollRemainder,
                                   LoopInfo *LI, ScalarEvolution *SE,
                                   DominatorTree *DT, AssumptionCache *AC,
-                                  OptimizationRemarkEmitter *ORE);
+                                  OptimizationRemarkEmitter *ORE,
+                                  Loop **EpilogueLoop = nullptr);
 
 bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                           DependenceInfo &DI);
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize.h
index 950af7ffe05f..70f9a2e0741b 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize.h
@@ -110,8 +110,8 @@ struct VectorizeConfig {
 //
 // LoopVectorize - Create a loop vectorization pass.
 //
-Pass *createLoopVectorizePass(bool NoUnrolling = false,
-                              bool AlwaysVectorize = true);
+Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced = false,
+                              bool VectorizeOnlyWhenForced = false);
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
new file mode 100644
index 000000000000..6b37d7093c44
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -0,0 +1,27 @@
+//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
+#define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LoadStoreVectorizerPass : public PassInfoMixin<LoadStoreVectorizerPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Create a legacy pass manager instance of the LoadStoreVectorizer pass
+Pass *createLoadStoreVectorizerPass();
+
+}
+
+#endif /* LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H */
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 224879cdba52..5c7bba048607 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -95,7 +95,7 @@ public:
     FK_Enabled = 1,    ///< Forcing enabled.
   };
 
-  LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+  LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
                      OptimizationRemarkEmitter &ORE);
 
   /// Mark the loop L as already vectorized by setting the width to 1.
@@ -105,7 +105,8 @@ public:
     writeHintsToMetadata(Hints);
   }
 
-  bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
+  bool allowVectorization(Function *F, Loop *L,
+                          bool VectorizeOnlyWhenForced) const;
 
   /// Dumps all the hint information.
   void emitRemarkWithHints() const;
@@ -113,7 +114,12 @@ public:
   unsigned getWidth() const { return Width.Value; }
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
-  enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+  enum ForceKind getForce() const {
+    if ((ForceKind)Force.Value == FK_Undefined &&
+        hasDisableAllTransformsHint(TheLoop))
+      return FK_Disabled;
+    return (ForceKind)Force.Value;
+  }
 
   /// If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
@@ -241,6 +247,10 @@ public:
   /// If false, good old LV code.
   bool canVectorize(bool UseVPlanNativePath);
 
+  /// Return true if we can vectorize this loop while folding its tail by
+  /// masking.
+  bool canFoldTailByMasking();
+
   /// Returns the primary induction variable.
   PHINode *getPrimaryInduction() { return PrimaryInduction; }
 
@@ -332,6 +342,11 @@ private:
   /// If false, good old LV code.
   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
 
+  /// Set up outer loop inductions by checking Phis in outer loop header for
+  /// supported inductions (int inductions). Return false if any of these Phis
+  /// is not a supported induction or if we fail to find an induction.
+  bool setupOuterLoopInductions();
+
   /// Return true if the pre-header, exiting and latch blocks of \p Lp
   /// (non-recursive) are considered legal for vectorization.
   /// Temporarily taking UseVPlanNativePath parameter. If true, take
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d79d84691803..d9c4f7b023c1 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -78,12 +78,13 @@ class TargetTransformInfo;
 
 /// The LoopVectorize Pass.
 struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
-  bool DisableUnrolling = false;
+  /// If false, consider all loops for interleaving.
+  /// If true, only loops that explicitly request interleaving are considered.
+  bool InterleaveOnlyWhenForced = false;
 
-  /// If true, consider all loops for vectorization.
-  /// If false, only loops that explicitly request vectorization are
-  /// considered.
-  bool AlwaysVectorize = true;
+  /// If false, consider all loops for vectorization.
+  /// If true, only loops that explicitly request vectorization are considered.
+  bool VectorizeOnlyWhenForced = false;
 
   ScalarEvolution *SE;
   LoopInfo *LI;
diff --git a/contrib/llvm/include/llvm/XRay/BlockIndexer.h b/contrib/llvm/include/llvm/XRay/BlockIndexer.h
new file mode 100644
index 000000000000..b42fa17f3fb7
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/BlockIndexer.h
@@ -0,0 +1,69 @@
+//===- BlockIndexer.h - FDR Block Indexing Visitor ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which generates a mapping between a
+// thread and a range of records representing a block.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_XRAY_BLOCKINDEXER_H_
+#define LLVM_LIB_XRAY_BLOCKINDEXER_H_
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/XRay/FDRRecords.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+// The BlockIndexer will gather all related records associated with a
+// process+thread and group them by 'Block'.
+class BlockIndexer : public RecordVisitor {
+public:
+  struct Block {
+    uint64_t ProcessID;
+    int32_t ThreadID;
+    WallclockRecord *WallclockTime;
+    std::vector<Record *> Records;
+  };
+
+  // This maps the process + thread combination to a sequence of blocks.
+  using Index = DenseMap<std::pair<uint64_t, int32_t>, std::vector<Block>>;
+
+private:
+  Index &Indices;
+
+  Block CurrentBlock{0, 0, nullptr, {}};
+
+public:
+  explicit BlockIndexer(Index &I) : RecordVisitor(), Indices(I) {}
+
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+
+  /// The flush() function will clear out the current state of the visitor, to
+  /// allow for explicitly flushing a block's records to the currently
+  /// recognized thread and process combination.
+  Error flush();
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_LIB_XRAY_BLOCKINDEXER_H_
diff --git a/contrib/llvm/include/llvm/XRay/BlockPrinter.h b/contrib/llvm/include/llvm/XRay/BlockPrinter.h
new file mode 100644
index 000000000000..bfb21e239517
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/BlockPrinter.h
@@ -0,0 +1,62 @@
+//===- BlockPrinter.h - FDR Block Pretty Printer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which formats a block of records for
+// easier human consumption.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_BLOCKPRINTER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_BLOCKPRINTER_H_
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/RecordPrinter.h"
+
+namespace llvm {
+namespace xray {
+
+class BlockPrinter : public RecordVisitor {
+  enum class State {
+    Start,
+    Preamble,
+    Metadata,
+    Function,
+    Arg,
+    CustomEvent,
+    End,
+  };
+
+  raw_ostream &OS;
+  RecordPrinter &RP;
+  State CurrentState = State::Start;
+
+public:
+  explicit BlockPrinter(raw_ostream &O, RecordPrinter &P)
+      : RecordVisitor(), OS(O), RP(P) {}
+
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+
+  void reset() { CurrentState = State::Start; }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_BLOCKPRINTER_H_
diff --git a/contrib/llvm/include/llvm/XRay/BlockVerifier.h b/contrib/llvm/include/llvm/XRay/BlockVerifier.h
new file mode 100644
index 000000000000..46371c13891a
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/BlockVerifier.h
@@ -0,0 +1,72 @@
+//===- BlockVerifier.h - FDR Block Verifier -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which verifies a sequence of records
+// associated with a block, following the FDR mode log format's specifications.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_BLOCKVERIFIER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_BLOCKVERIFIER_H_
+
+#include "llvm/XRay/FDRRecords.h"
+#include <array>
+#include <bitset>
+
+namespace llvm {
+namespace xray {
+
+class BlockVerifier : public RecordVisitor {
+public:
+  // We force State elements to be size_t, to be used as indices for containers.
+  enum class State : std::size_t {
+    Unknown,
+    BufferExtents,
+    NewBuffer,
+    WallClockTime,
+    PIDEntry,
+    NewCPUId,
+    TSCWrap,
+    CustomEvent,
+    TypedEvent,
+    Function,
+    CallArg,
+    EndOfBuffer,
+    StateMax,
+  };
+
+private:
+  // We keep track of the current record seen by the verifier.
+  State CurrentRecord = State::Unknown;
+
+  // Transitions the current record to the new record, records an error on
+  // invalid transitions.
+  Error transition(State To);
+
+public:
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+
+  Error verify();
+  void reset();
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_BLOCKVERIFIER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRLogBuilder.h b/contrib/llvm/include/llvm/XRay/FDRLogBuilder.h
new file mode 100644
index 000000000000..b5e9ed5c406b
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRLogBuilder.h
@@ -0,0 +1,41 @@
+//===- FDRLogBuilder.h - XRay FDR Log Building Utility --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
+
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+/// The LogBuilder class allows for creating ad-hoc collections of records
+/// through the `add<...>(...)` function. An example use of this API is in
+/// crafting arbitrary sequences of records:
+///
+///   auto Records = LogBuilder()
+///       .add<BufferExtents>(256)
+///       .add<NewBufferRecord>(1)
+///       .consume();
+///
+class LogBuilder {
+  std::vector<std::unique_ptr<Record>> Records;
+
+public:
+  template <class R, class... T> LogBuilder &add(T &&... A) {
+    Records.emplace_back(new R(std::forward<T>(A)...));
+    return *this;
+  }
+
+  std::vector<std::unique_ptr<Record>> consume() { return std::move(Records); }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h b/contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h
new file mode 100644
index 000000000000..e856e1540558
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRRecordConsumer.h
@@ -0,0 +1,55 @@
+//===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
+
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/FDRRecords.h"
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+class RecordConsumer {
+public:
+  virtual Error consume(std::unique_ptr<Record> R) = 0;
+  virtual ~RecordConsumer() = default;
+};
+
+// This consumer will collect all the records into a vector of records, in
+// arrival order.
+class LogBuilderConsumer : public RecordConsumer {
+  std::vector<std::unique_ptr<Record>> &Records;
+
+public:
+  explicit LogBuilderConsumer(std::vector<std::unique_ptr<Record>> &R)
+      : RecordConsumer(), Records(R) {}
+
+  Error consume(std::unique_ptr<Record> R) override;
+};
+
+// A PipelineConsumer applies a set of visitors to every consumed Record, in the
+// order by which the visitors are added to the pipeline in the order of
+// appearance.
+class PipelineConsumer : public RecordConsumer {
+  std::vector<RecordVisitor *> Visitors;
+
+public:
+  PipelineConsumer(std::initializer_list<RecordVisitor *> V)
+      : RecordConsumer(), Visitors(V) {}
+
+  Error consume(std::unique_ptr<Record> R) override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRRecordProducer.h b/contrib/llvm/include/llvm/XRay/FDRRecordProducer.h
new file mode 100644
index 000000000000..efdba2a67b7b
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRRecordProducer.h
@@ -0,0 +1,51 @@
+//===- FDRRecordProducer.h - XRay FDR Mode Record Producer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
+
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/XRayRecord.h"
+#include <memory>
+
+namespace llvm {
+namespace xray {
+
+class RecordProducer {
+public:
+  /// All producer implementations must yield either an Error or a non-nullptr
+  /// unique_ptr<Record>.
+  virtual Expected<std::unique_ptr<Record>> produce() = 0;
+  virtual ~RecordProducer() = default;
+};
+
+class FileBasedRecordProducer : public RecordProducer {
+  const XRayFileHeader &Header;
+  DataExtractor &E;
+  uint32_t &OffsetPtr;
+  uint32_t CurrentBufferBytes = 0;
+
+  // Helper function which gets the next record by speculatively reading through
+  // the log, finding a buffer extents record.
+  Expected<std::unique_ptr<Record>> findNextBufferExtent();
+
+public:
+  FileBasedRecordProducer(const XRayFileHeader &FH, DataExtractor &DE,
+                          uint32_t &OP)
+      : Header(FH), E(DE), OffsetPtr(OP) {}
+
+  /// This producer encapsulates the logic for loading a File-backed
+  /// RecordProducer hidden behind a DataExtractor.
+  Expected<std::unique_ptr<Record>> produce() override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRRecords.h b/contrib/llvm/include/llvm/XRay/FDRRecords.h
new file mode 100644
index 000000000000..8a84f4d0c1fb
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRRecords.h
@@ -0,0 +1,450 @@
+//===- FDRRecords.h - XRay Flight Data Recorder Mode Records --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define types and operations on these types that represent the different kinds
+// of records we encounter in XRay flight data recorder mode traces.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_XRAY_FDRRECORDS_H_
+#define LLVM_LIB_XRAY_FDRRECORDS_H_
+
+#include <cstdint>
+#include <string>
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/XRayRecord.h"
+
+namespace llvm {
+namespace xray {
+
+class RecordVisitor;
+class RecordInitializer;
+
+class Record {
+public:
+  enum class RecordKind {
+    RK_Metadata,
+    RK_Metadata_BufferExtents,
+    RK_Metadata_WallClockTime,
+    RK_Metadata_NewCPUId,
+    RK_Metadata_TSCWrap,
+    RK_Metadata_CustomEvent,
+    RK_Metadata_CustomEventV5,
+    RK_Metadata_CallArg,
+    RK_Metadata_PIDEntry,
+    RK_Metadata_NewBuffer,
+    RK_Metadata_EndOfBuffer,
+    RK_Metadata_TypedEvent,
+    RK_Metadata_LastMetadata,
+    RK_Function,
+  };
+
+  static StringRef kindToString(RecordKind K);
+
+private:
+  const RecordKind T;
+
+public:
+  Record(const Record &) = delete;
+  Record(Record &&) = delete;
+  Record &operator=(const Record &) = delete;
+  Record &operator=(Record &&) = delete;
+  explicit Record(RecordKind T) : T(T) {}
+
+  RecordKind getRecordType() const { return T; }
+
+  // Each Record should be able to apply an abstract visitor, and choose the
+  // appropriate function in the visitor to invoke, given its own type.
+  virtual Error apply(RecordVisitor &V) = 0;
+
+  virtual ~Record() = default;
+};
+
+class MetadataRecord : public Record {
+public:
+  enum class MetadataType : unsigned {
+    Unknown,
+    BufferExtents,
+    WallClockTime,
+    NewCPUId,
+    TSCWrap,
+    CustomEvent,
+    CallArg,
+    PIDEntry,
+    NewBuffer,
+    EndOfBuffer,
+    TypedEvent,
+  };
+
+protected:
+  static constexpr int kMetadataBodySize = 15;
+  friend class RecordInitializer;
+
+private:
+  const MetadataType MT;
+
+public:
+  explicit MetadataRecord(RecordKind T, MetadataType M) : Record(T), MT(M) {}
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() >= RecordKind::RK_Metadata &&
+           R->getRecordType() <= RecordKind::RK_Metadata_LastMetadata;
+  }
+
+  MetadataType metadataType() const { return MT; }
+
+  virtual ~MetadataRecord() = default;
+};
+
+// What follows are specific Metadata record types which encapsulate the
+// information associated with specific metadata record types in an FDR mode
+// log.
+class BufferExtents : public MetadataRecord {
+  uint64_t Size = 0;
+  friend class RecordInitializer;
+
+public:
+  BufferExtents()
+      : MetadataRecord(RecordKind::RK_Metadata_BufferExtents,
+                       MetadataType::BufferExtents) {}
+
+  explicit BufferExtents(uint64_t S)
+      : MetadataRecord(RecordKind::RK_Metadata_BufferExtents,
+                       MetadataType::BufferExtents),
+        Size(S) {}
+
+  uint64_t size() const { return Size; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_BufferExtents;
+  }
+};
+
+class WallclockRecord : public MetadataRecord {
+  uint64_t Seconds = 0;
+  uint32_t Nanos = 0;
+  friend class RecordInitializer;
+
+public:
+  WallclockRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_WallClockTime,
+                       MetadataType::WallClockTime) {}
+
+  explicit WallclockRecord(uint64_t S, uint32_t N)
+      : MetadataRecord(RecordKind::RK_Metadata_WallClockTime,
+                       MetadataType::WallClockTime),
+        Seconds(S), Nanos(N) {}
+
+  uint64_t seconds() const { return Seconds; }
+  uint32_t nanos() const { return Nanos; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_WallClockTime;
+  }
+};
+
+class NewCPUIDRecord : public MetadataRecord {
+  uint16_t CPUId = 0;
+  uint64_t TSC = 0;
+  friend class RecordInitializer;
+
+public:
+  NewCPUIDRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_NewCPUId,
+                       MetadataType::NewCPUId) {}
+
+  NewCPUIDRecord(uint16_t C, uint64_t T)
+      : MetadataRecord(RecordKind::RK_Metadata_NewCPUId,
+                       MetadataType::NewCPUId),
+        CPUId(C), TSC(T) {}
+
+  uint16_t cpuid() const { return CPUId; }
+
+  uint64_t tsc() const { return TSC; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_NewCPUId;
+  }
+};
+
+class TSCWrapRecord : public MetadataRecord {
+  uint64_t BaseTSC = 0;
+  friend class RecordInitializer;
+
+public:
+  TSCWrapRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_TSCWrap, MetadataType::TSCWrap) {
+  }
+
+  explicit TSCWrapRecord(uint64_t B)
+      : MetadataRecord(RecordKind::RK_Metadata_TSCWrap, MetadataType::TSCWrap),
+        BaseTSC(B) {}
+
+  uint64_t tsc() const { return BaseTSC; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_TSCWrap;
+  }
+};
+
+class CustomEventRecord : public MetadataRecord {
+  int32_t Size = 0;
+  uint64_t TSC = 0;
+  uint16_t CPU = 0;
+  std::string Data{};
+  friend class RecordInitializer;
+
+public:
+  CustomEventRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_CustomEvent,
+                       MetadataType::CustomEvent) {}
+
+  explicit CustomEventRecord(uint64_t S, uint64_t T, uint16_t C, std::string D)
+      : MetadataRecord(RecordKind::RK_Metadata_CustomEvent,
+                       MetadataType::CustomEvent),
+        Size(S), TSC(T), CPU(C), Data(std::move(D)) {}
+
+  int32_t size() const { return Size; }
+  uint64_t tsc() const { return TSC; }
+  uint16_t cpu() const { return CPU; }
+  StringRef data() const { return Data; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_CustomEvent;
+  }
+};
+
+class CustomEventRecordV5 : public MetadataRecord {
+  int32_t Size = 0;
+  int32_t Delta = 0;
+  std::string Data{};
+  friend class RecordInitializer;
+
+public:
+  CustomEventRecordV5()
+      : MetadataRecord(RecordKind::RK_Metadata_CustomEventV5,
+                       MetadataType::CustomEvent) {}
+
+  explicit CustomEventRecordV5(int32_t S, int32_t D, std::string P)
+      : MetadataRecord(RecordKind::RK_Metadata_CustomEventV5,
+                       MetadataType::CustomEvent),
+        Size(S), Delta(D), Data(std::move(P)) {}
+
+  int32_t size() const { return Size; }
+  int32_t delta() const { return Delta; }
+  StringRef data() const { return Data; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_CustomEventV5;
+  }
+};
+
+class TypedEventRecord : public MetadataRecord {
+  int32_t Size = 0;
+  int32_t Delta = 0;
+  uint16_t EventType = 0;
+  std::string Data{};
+  friend class RecordInitializer;
+
+public:
+  TypedEventRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_TypedEvent,
+                       MetadataType::TypedEvent) {}
+
+  explicit TypedEventRecord(int32_t S, int32_t D, uint16_t E, std::string P)
+      : MetadataRecord(RecordKind::RK_Metadata_TypedEvent,
+                       MetadataType::TypedEvent),
+        Size(S), Delta(D), Data(std::move(P)) {}
+
+  int32_t size() const { return Size; }
+  int32_t delta() const { return Delta; }
+  uint16_t eventType() const { return EventType; }
+  StringRef data() const { return Data; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_TypedEvent;
+  }
+};
+
+class CallArgRecord : public MetadataRecord {
+  uint64_t Arg;
+  friend class RecordInitializer;
+
+public:
+  CallArgRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_CallArg, MetadataType::CallArg) {
+  }
+
+  explicit CallArgRecord(uint64_t A)
+      : MetadataRecord(RecordKind::RK_Metadata_CallArg, MetadataType::CallArg),
+        Arg(A) {}
+
+  uint64_t arg() const { return Arg; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_CallArg;
+  }
+};
+
+class PIDRecord : public MetadataRecord {
+  int32_t PID = 0;
+  friend class RecordInitializer;
+
+public:
+  PIDRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_PIDEntry,
+                       MetadataType::PIDEntry) {}
+
+  explicit PIDRecord(int32_t P)
+      : MetadataRecord(RecordKind::RK_Metadata_PIDEntry,
+                       MetadataType::PIDEntry),
+        PID(P) {}
+
+  int32_t pid() const { return PID; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_PIDEntry;
+  }
+};
+
+class NewBufferRecord : public MetadataRecord {
+  int32_t TID = 0;
+  friend class RecordInitializer;
+
+public:
+  NewBufferRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_NewBuffer,
+                       MetadataType::NewBuffer) {}
+
+  explicit NewBufferRecord(int32_t T)
+      : MetadataRecord(RecordKind::RK_Metadata_NewBuffer,
+                       MetadataType::NewBuffer),
+        TID(T) {}
+
+  int32_t tid() const { return TID; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_NewBuffer;
+  }
+};
+
+class EndBufferRecord : public MetadataRecord {
+public:
+  EndBufferRecord()
+      : MetadataRecord(RecordKind::RK_Metadata_EndOfBuffer,
+                       MetadataType::EndOfBuffer) {}
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Metadata_EndOfBuffer;
+  }
+};
+
+class FunctionRecord : public Record {
+  RecordTypes Kind;
+  int32_t FuncId;
+  uint32_t Delta;
+  friend class RecordInitializer;
+
+  static constexpr unsigned kFunctionRecordSize = 8;
+
+public:
+  FunctionRecord() : Record(RecordKind::RK_Function) {}
+
+  explicit FunctionRecord(RecordTypes K, int32_t F, uint32_t D)
+      : Record(RecordKind::RK_Function), Kind(K), FuncId(F), Delta(D) {}
+
+  // A function record is a concrete record type which has a number of common
+  // properties.
+  RecordTypes recordType() const { return Kind; }
+  int32_t functionId() const { return FuncId; }
+  uint32_t delta() const { return Delta; }
+
+  Error apply(RecordVisitor &V) override;
+
+  static bool classof(const Record *R) {
+    return R->getRecordType() == RecordKind::RK_Function;
+  }
+};
+
+class RecordVisitor {
+public:
+  virtual ~RecordVisitor() = default;
+
+  // Support all specific kinds of records:
+  virtual Error visit(BufferExtents &) = 0;
+  virtual Error visit(WallclockRecord &) = 0;
+  virtual Error visit(NewCPUIDRecord &) = 0;
+  virtual Error visit(TSCWrapRecord &) = 0;
+  virtual Error visit(CustomEventRecord &) = 0;
+  virtual Error visit(CallArgRecord &) = 0;
+  virtual Error visit(PIDRecord &) = 0;
+  virtual Error visit(NewBufferRecord &) = 0;
+  virtual Error visit(EndBufferRecord &) = 0;
+  virtual Error visit(FunctionRecord &) = 0;
+  virtual Error visit(CustomEventRecordV5 &) = 0;
+  virtual Error visit(TypedEventRecord &) = 0;
+};
+
+class RecordInitializer : public RecordVisitor {
+  DataExtractor &E;
+  uint32_t &OffsetPtr;
+  uint16_t Version;
+
+public:
+  static constexpr uint16_t DefaultVersion = 5u;
+
+  explicit RecordInitializer(DataExtractor &DE, uint32_t &OP, uint16_t V)
+      : RecordVisitor(), E(DE), OffsetPtr(OP), Version(V) {}
+
+  explicit RecordInitializer(DataExtractor &DE, uint32_t &OP)
+      : RecordInitializer(DE, OP, DefaultVersion) {}
+
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_LIB_XRAY_FDRRECORDS_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRTraceExpander.h b/contrib/llvm/include/llvm/XRay/FDRTraceExpander.h
new file mode 100644
index 000000000000..02a21bed5ce9
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRTraceExpander.h
@@ -0,0 +1,63 @@
+//===- FDRTraceExpander.h - XRay FDR Mode Log Expander --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// We define an FDR record visitor which can re-constitute XRayRecord instances
+// from a sequence of FDR mode records in arrival order into a collection.
+//
+//===----------------------------------------------------------------------===//
+#ifndef INCLUDE_LLVM_XRAY_FDRTRACEEXPANDER_H_
+#define INCLUDE_LLVM_XRAY_FDRTRACEEXPANDER_H_
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/XRayRecord.h"
+
+namespace llvm {
+namespace xray {
+
+class TraceExpander : public RecordVisitor {
+  // Type-erased callback for handling individual XRayRecord instances.
+  function_ref<void(const XRayRecord &)> C;
+  int32_t PID = 0;
+  int32_t TID = 0;
+  uint64_t BaseTSC = 0;
+  XRayRecord CurrentRecord{0, 0, RecordTypes::ENTER, 0, 0, 0, 0, {}, {}};
+  uint16_t CPUId = 0;
+  uint16_t LogVersion = 0;
+  bool BuildingRecord = false;
+  bool IgnoringRecords = false;
+
+  void resetCurrentRecord();
+
+public:
+  explicit TraceExpander(function_ref<void(const XRayRecord &)> F, uint16_t L)
+      : RecordVisitor(), C(std::move(F)), LogVersion(L) {}
+
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+
+  // Must be called after all the records have been processed, to handle the
+  // most recent record generated.
+  Error flush();
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // INCLUDE_LLVM_XRAY_FDRTRACEEXPANDER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FDRTraceWriter.h b/contrib/llvm/include/llvm/XRay/FDRTraceWriter.h
new file mode 100644
index 000000000000..7b3b5fa25eff
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FDRTraceWriter.h
@@ -0,0 +1,56 @@
+//===- FDRTraceWriter.h - XRay FDR Trace Writer -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Test a utility that can write out XRay FDR Mode formatted trace files.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRTRACEWRITER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_FDRTRACEWRITER_H_
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/XRayRecord.h"
+
+namespace llvm {
+namespace xray {
+
+/// The FDRTraceWriter allows us to hand-craft an XRay Flight Data Recorder
+/// (FDR) mode log file. This is used primarily for testing, generating
+/// sequences of FDR records that can be read/processed. It can also be used to
+/// generate various kinds of execution traces without using the XRay runtime.
+/// Note that this writer does not do any validation, but uses the types of
+/// records defined in the FDRRecords.h file.
+class FDRTraceWriter : public RecordVisitor {
+public:
+  // Construct an FDRTraceWriter associated with an output stream.
+  explicit FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H);
+  ~FDRTraceWriter();
+
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+
+private:
+  support::endian::Writer OS;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_FDRTRACEWRITER_H_
diff --git a/contrib/llvm/include/llvm/XRay/FileHeaderReader.h b/contrib/llvm/include/llvm/XRay/FileHeaderReader.h
new file mode 100644
index 000000000000..3b8809bdbb34
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/FileHeaderReader.h
@@ -0,0 +1,33 @@
+//===- FileHeaderReader.h - XRay Trace File Header Reading Function -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares functions that can load an XRay log header from various
+// sources.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_XRAY_FILEHEADERREADER_H_
+#define LLVM_LIB_XRAY_FILEHEADERREADER_H_
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/XRay/XRayRecord.h"
+#include <cstdint>
+
+namespace llvm {
+namespace xray {
+
+/// Convenience function for loading the file header given a data extractor at a
+/// specified offset.
+Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
+                                                uint32_t &OffsetPtr);
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_LIB_XRAY_FILEHEADERREADER_H_
diff --git a/contrib/llvm/include/llvm/XRay/Profile.h b/contrib/llvm/include/llvm/XRay/Profile.h
new file mode 100644
index 000000000000..9365630358e8
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/Profile.h
@@ -0,0 +1,150 @@
+//===- Profile.h - XRay Profile Abstraction -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the XRay Profile class representing the latency profile generated by
+// XRay's profiling mode.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_XRAY_PROFILE_H
+#define LLVM_XRAY_PROFILE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+class Profile;
+
+// We forward declare the Trace type for turning a Trace into a Profile.
+class Trace;
+
+/// This function will attempt to load an XRay Profiling Mode profile from the
+/// provided |Filename|.
+///
+/// For any errors encountered in the loading of the profile data from
+/// |Filename|, this function will return an Error condition appropriately.
+Expected<Profile> loadProfile(StringRef Filename);
+
+/// This algorithm will merge two Profile instances into a single Profile
+/// instance, aggregating blocks by Thread ID.
+Profile mergeProfilesByThread(const Profile &L, const Profile &R);
+
+/// This algorithm will merge two Profile instances into a single Profile
+/// instance, aggregating blocks by function call stack.
+Profile mergeProfilesByStack(const Profile &L, const Profile &R);
+
+/// This function takes a Trace and creates a Profile instance from it.
+Expected<Profile> profileFromTrace(const Trace &T);
+
+/// Profile instances are thread-compatible.
+class Profile {
+public:
+  using ThreadID = uint64_t;
+  using PathID = unsigned;
+  using FuncID = int32_t;
+
+  struct Data {
+    uint64_t CallCount;
+    uint64_t CumulativeLocalTime;
+  };
+
+  struct Block {
+    ThreadID Thread;
+    std::vector<std::pair<PathID, Data>> PathData;
+  };
+
+  /// Provides a sequence of function IDs from a previously interned PathID.
+  ///
+  /// Returns an error if |P| had not been interned before into the Profile.
+  ///
+  Expected<std::vector<FuncID>> expandPath(PathID P) const;
+
+  /// The stack represented in |P| must be in stack order (leaf to root). This
+  /// will always return the same PathID for |P| that has the same sequence.
+  PathID internPath(ArrayRef<FuncID> P);
+
+  /// Appends a fully-formed Block instance into the Profile.
+  ///
+  /// Returns an error condition in the following cases:
+  ///
+  ///    - The PathData component of the Block is empty
+  ///
+  Error addBlock(Block &&B);
+
+  Profile() = default;
+  ~Profile() = default;
+
+  Profile(Profile &&O) noexcept
+      : Blocks(std::move(O.Blocks)), NodeStorage(std::move(O.NodeStorage)),
+        Roots(std::move(O.Roots)), PathIDMap(std::move(O.PathIDMap)),
+        NextID(O.NextID) {}
+
+  Profile &operator=(Profile &&O) noexcept {
+    Blocks = std::move(O.Blocks);
+    NodeStorage = std::move(O.NodeStorage);
+    Roots = std::move(O.Roots);
+    PathIDMap = std::move(O.PathIDMap);
+    NextID = O.NextID;
+    return *this;
+  }
+
+  Profile(const Profile &);
+  Profile &operator=(const Profile &);
+
+  friend void swap(Profile &L, Profile &R) {
+    using std::swap;
+    swap(L.Blocks, R.Blocks);
+    swap(L.NodeStorage, R.NodeStorage);
+    swap(L.Roots, R.Roots);
+    swap(L.PathIDMap, R.PathIDMap);
+    swap(L.NextID, R.NextID);
+  }
+
+private:
+  using BlockList = std::list<Block>;
+
+  struct TrieNode {
+    FuncID Func = 0;
+    std::vector<TrieNode *> Callees{};
+    TrieNode *Caller = nullptr;
+    PathID ID = 0;
+  };
+
+  // List of blocks associated with a Profile.
+  BlockList Blocks;
+
+  // List of TrieNode elements we've seen.
+  std::list<TrieNode> NodeStorage;
+
+  // List of call stack roots.
+  SmallVector<TrieNode *, 4> Roots;
+
+  // Reverse mapping between a PathID to a TrieNode*.
+  DenseMap<PathID, TrieNode *> PathIDMap;
+
+  // Used to identify paths.
+  PathID NextID = 1;
+
+public:
+  using const_iterator = BlockList::const_iterator;
+  const_iterator begin() const { return Blocks.begin(); }
+  const_iterator end() const { return Blocks.end(); }
+  bool empty() const { return Blocks.empty(); }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/XRay/RecordPrinter.h b/contrib/llvm/include/llvm/XRay/RecordPrinter.h
new file mode 100644
index 000000000000..649c64ab6f5c
--- /dev/null
+++ b/contrib/llvm/include/llvm/XRay/RecordPrinter.h
@@ -0,0 +1,50 @@
+//===- RecordPrinter.h - FDR Record Printer -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which prints an individual record's
+// data in an adhoc format, suitable for human inspection.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_INCLUDE_LLVM_XRAY_RECORDPRINTER_H_
+#define LLVM_INCLUDE_LLVM_XRAY_RECORDPRINTER_H_
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+class RecordPrinter : public RecordVisitor {
+  raw_ostream &OS;
+  std::string Delim;
+
+public:
+  explicit RecordPrinter(raw_ostream &O, std::string D)
+      : RecordVisitor(), OS(O), Delim(std::move(D)) {}
+
+  explicit RecordPrinter(raw_ostream &O) : RecordPrinter(O, ""){};
+
+  Error visit(BufferExtents &) override;
+  Error visit(WallclockRecord &) override;
+  Error visit(NewCPUIDRecord &) override;
+  Error visit(TSCWrapRecord &) override;
+  Error visit(CustomEventRecord &) override;
+  Error visit(CallArgRecord &) override;
+  Error visit(PIDRecord &) override;
+  Error visit(NewBufferRecord &) override;
+  Error visit(EndBufferRecord &) override;
+  Error visit(FunctionRecord &) override;
+  Error visit(CustomEventRecordV5 &) override;
+  Error visit(TypedEventRecord &) override;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_INCLUDE_LLVM_XRAY_RECORDPRINTER_H
diff --git a/contrib/llvm/include/llvm/XRay/Trace.h b/contrib/llvm/include/llvm/XRay/Trace.h
index 6b033d686b06..924addd1560d 100644
--- a/contrib/llvm/include/llvm/XRay/Trace.h
+++ b/contrib/llvm/include/llvm/XRay/Trace.h
@@ -17,8 +17,8 @@
 #include <vector>
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/XRay/XRayRecord.h"
 
 namespace llvm {
@@ -46,25 +46,35 @@ namespace xray {
 ///
 class Trace {
   XRayFileHeader FileHeader;
-  std::vector<XRayRecord> Records;
+  using RecordVector = std::vector<XRayRecord>;
+  RecordVector Records;
 
   typedef std::vector<XRayRecord>::const_iterator citerator;
 
-  friend Expected<Trace> loadTraceFile(StringRef, bool);
+  friend Expected<Trace> loadTrace(const DataExtractor &, bool);
 
 public:
+  using size_type = RecordVector::size_type;
+  using value_type = RecordVector::value_type;
+  using const_iterator = RecordVector::const_iterator;
+
   /// Provides access to the loaded XRay trace file header.
   const XRayFileHeader &getFileHeader() const { return FileHeader; }
 
-  citerator begin() const { return Records.begin(); }
-  citerator end() const { return Records.end(); }
-  size_t size() const { return Records.size(); }
+  const_iterator begin() const { return Records.begin(); }
+  const_iterator end() const { return Records.end(); }
+  bool empty() const { return Records.empty(); }
+  size_type size() const { return Records.size(); }
 };
 
 /// This function will attempt to load XRay trace records from the provided
 /// |Filename|.
 Expected<Trace> loadTraceFile(StringRef Filename, bool Sort = false);
 
+/// This function will attempt to load XRay trace records from the provided
+/// DataExtractor.
+Expected<Trace> loadTrace(const DataExtractor &Extractor, bool Sort = false);
+
 } // namespace xray
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/XRay/XRayRecord.h b/contrib/llvm/include/llvm/XRay/XRayRecord.h
index 76873447f170..7685ec95838a 100644
--- a/contrib/llvm/include/llvm/XRay/XRayRecord.h
+++ b/contrib/llvm/include/llvm/XRay/XRayRecord.h
@@ -17,6 +17,7 @@
 
 #include <cstdint>
 #include <vector>
+#include <string>
 
 namespace llvm {
 namespace xray {
@@ -54,10 +55,23 @@ struct XRayFileHeader {
 /// This may or may not correspond to actual record types in the raw trace (as
 /// the loader implementation may synthesize this information in the process of
 /// of loading).
-enum class RecordTypes { ENTER, EXIT, TAIL_EXIT, ENTER_ARG };
+enum class RecordTypes {
+  ENTER,
+  EXIT,
+  TAIL_EXIT,
+  ENTER_ARG,
+  CUSTOM_EVENT,
+  TYPED_EVENT
+};
 
+/// An XRayRecord is the denormalized view of data associated in a trace. These
+/// records may not correspond to actual entries in the raw traces, but they are
+/// the logical representation of records in a higher-level event log.
 struct XRayRecord {
-  /// The type of record.
+  /// RecordType values are used as "sub-types" which have meaning in the
+  /// context of the `Type` below. For function call and custom event records,
+  /// the RecordType is always 0, while for typed events we store the type in
+  /// the RecordType field.
   uint16_t RecordType;
 
   /// The CPU where the thread is running. We assume number of CPUs <= 65536.
@@ -66,7 +80,7 @@ struct XRayRecord {
   /// Identifies the type of record.
   RecordTypes Type;
 
-  /// The function ID for the record.
+  /// The function ID for the record, if this is a function call record.
   int32_t FuncId;
 
   /// Get the full 8 bytes of the TSC when we get the log record.
@@ -80,6 +94,9 @@ struct XRayRecord {
 
   /// The function call arguments.
   std::vector<uint64_t> CallArgs;
+
+  /// For custom and typed events, we provide the raw data from the trace.
+  std::string Data;
 };
 
 } // namespace xray
diff --git a/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h b/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h
index 0de9ea0968e6..6150196ed98d 100644
--- a/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h
+++ b/contrib/llvm/include/llvm/XRay/YAMLXRayRecord.h
@@ -39,6 +39,7 @@ struct YAMLXRayRecord {
   uint32_t TId;
   uint32_t PId;
   std::vector<uint64_t> CallArgs;
+  std::string Data;
 };
 
 struct YAMLXRayTrace {
@@ -58,6 +59,8 @@ template <> struct ScalarEnumerationTraits<xray::RecordTypes> {
     IO.enumCase(Type, "function-exit", xray::RecordTypes::EXIT);
     IO.enumCase(Type, "function-tail-exit", xray::RecordTypes::TAIL_EXIT);
     IO.enumCase(Type, "function-enter-arg", xray::RecordTypes::ENTER_ARG);
+    IO.enumCase(Type, "custom-event", xray::RecordTypes::CUSTOM_EVENT);
+    IO.enumCase(Type, "typed-event", xray::RecordTypes::TYPED_EVENT);
   }
 };
 
@@ -73,16 +76,16 @@ template <> struct MappingTraits<xray::YAMLXRayFileHeader> {
 
 template <> struct MappingTraits<xray::YAMLXRayRecord> {
   static void mapping(IO &IO, xray::YAMLXRayRecord &Record) {
-    // FIXME: Make this type actually be descriptive
     IO.mapRequired("type", Record.RecordType);
-    IO.mapRequired("func-id", Record.FuncId);
+    IO.mapOptional("func-id", Record.FuncId);
     IO.mapOptional("function", Record.Function);
     IO.mapOptional("args", Record.CallArgs);
     IO.mapRequired("cpu", Record.CPU);
-    IO.mapRequired("thread", Record.TId);
+    IO.mapOptional("thread", Record.TId, 0U);
     IO.mapOptional("process", Record.PId, 0U);
     IO.mapRequired("kind", Record.Type);
     IO.mapRequired("tsc", Record.TSC);
+    IO.mapOptional("data", Record.Data);
   }
 
   static constexpr bool flow = true;
diff --git a/contrib/llvm/include/llvm/module.extern.modulemap b/contrib/llvm/include/llvm/module.extern.modulemap
new file mode 100644
index 000000000000..8acda137e044
--- /dev/null
+++ b/contrib/llvm/include/llvm/module.extern.modulemap
@@ -0,0 +1,5 @@
+module LLVM_Extern_Config_Def {}
+module LLVM_Extern_IR_Attributes_Gen {}
+module LLVM_Extern_IR_Intrinsics_Gen {}
+module LLVM_Extern_IR_Intrinsics_Enum {}
+module LLVM_Extern_Utils_DataTypes {}
diff --git a/contrib/llvm/include/llvm/module.install.modulemap b/contrib/llvm/include/llvm/module.install.modulemap
new file mode 100644
index 000000000000..ac73a8612326
--- /dev/null
+++ b/contrib/llvm/include/llvm/module.install.modulemap
@@ -0,0 +1,27 @@
+
+module LLVM_Extern_Config_Def {
+ textual header "Config/AsmParsers.def"
+ textual header "Config/AsmPrinters.def"
+ textual header "Config/Disassemblers.def"
+ textual header "Config/Targets.def"
+ export *
+}
+
+module LLVM_Extern_IR_Attributes_Gen {
+  textual header "IR/Attributes.gen"
+  textual header "IR/Attributes.inc"
+}
+
+module LLVM_Extern_IR_Intrinsics_Gen {
+  textual header "IR/Intrinsics.gen"
+  textual header "IR/Intrinsics.inc"
+}
+
+module LLVM_Extern_IR_Intrinsics_Enum {
+  textual header "IR/IntrinsicEnums.inc"
+}
+
+module LLVM_Extern_Utils_DataTypes {
+  header "Support/DataTypes.h"
+  export *
+}
diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap
index 649cdf3b0a89..bcc12534ec85 100644
--- a/contrib/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm/include/llvm/module.modulemap
@@ -7,7 +7,11 @@ module LLVM_Analysis {
   textual header "Analysis/TargetLibraryInfo.def"
 }
 
-module LLVM_AsmParser { requires cplusplus umbrella "AsmParser" module * { export * } }
+module LLVM_AsmParser {
+  requires cplusplus
+  umbrella "AsmParser"
+  module * { export * }
+}
 
 // A module covering CodeGen/ and Target/. These are intertwined
 // and codependent, and thus notionally form a single module.
@@ -27,14 +31,21 @@ module LLVM_Backend {
     textual header "CodeGen/CommandFlags.inc"
     textual header "CodeGen/DIEValue.def"
   }
+}
 
-  module Target {
-    umbrella "Target"
-    module * { export * }
-  }
+// FIXME: Make this as a submodule of LLVM_Backend again.
+//        Doing so causes a linker error in clang-format.
+module LLVM_Backend_Target {
+  umbrella "Target"
+  module * { export * }
+}
+
+module LLVM_Bitcode {
+ requires cplusplus
+ umbrella "Bitcode"
+ module * { export * }
 }
 
-module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } }
 
 module LLVM_BinaryFormat {
     requires cplusplus
@@ -52,6 +63,7 @@ module LLVM_BinaryFormat {
     textual header "BinaryFormat/ELFRelocs/i386.def"
     textual header "BinaryFormat/ELFRelocs/Lanai.def"
     textual header "BinaryFormat/ELFRelocs/Mips.def"
+    textual header "BinaryFormat/ELFRelocs/MSP430.def"
     textual header "BinaryFormat/ELFRelocs/PowerPC64.def"
     textual header "BinaryFormat/ELFRelocs/PowerPC.def"
     textual header "BinaryFormat/ELFRelocs/RISCV.def"
@@ -59,9 +71,15 @@ module LLVM_BinaryFormat {
     textual header "BinaryFormat/ELFRelocs/SystemZ.def"
     textual header "BinaryFormat/ELFRelocs/x86_64.def"
     textual header "BinaryFormat/WasmRelocs.def"
+    textual header "BinaryFormat/MsgPack.def"
 }
 
-module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } }
+module LLVM_Config {
+  requires cplusplus
+  umbrella "Config"
+  extern module LLVM_Extern_Config_Def "module.extern.modulemap"
+  module * { export * }
+}
 
 module LLVM_DebugInfo {
   requires cplusplus
@@ -87,12 +105,14 @@ module LLVM_DebugInfo_PDB {
   // FIXME: There should be a better way to specify this.
   exclude header "DebugInfo/PDB/DIA/DIADataStream.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+  exclude header "DebugInfo/PDB/DIA/DIAEnumFrameData.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumInjectedSources.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumSectionContribs.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumSymbols.h"
   exclude header "DebugInfo/PDB/DIA/DIAEnumTables.h"
+  exclude header "DebugInfo/PDB/DIA/DIAFrameData.h"
   exclude header "DebugInfo/PDB/DIA/DIAInjectedSource.h"
   exclude header "DebugInfo/PDB/DIA/DIALineNumber.h"
   exclude header "DebugInfo/PDB/DIA/DIARawSymbol.h"
@@ -177,7 +197,11 @@ module LLVM_intrinsic_gen {
 
   // Attributes.h
   module IR_Argument { header "IR/Argument.h" export * }
-  module IR_Attributes { header "IR/Attributes.h" export * }
+  module IR_Attributes {
+    header "IR/Attributes.h"
+    extern module LLVM_Extern_IR_Attributes_Gen "module.extern.modulemap"
+    export *
+  }
   module IR_CallSite { header "IR/CallSite.h" export * }
   module IR_ConstantFolder { header "IR/ConstantFolder.h" export * }
   module IR_GlobalVariable { header "IR/GlobalVariable.h" export * }
@@ -192,6 +216,7 @@ module LLVM_intrinsic_gen {
 
   // Intrinsics.h
   module IR_CFG { header "IR/CFG.h" export * }
+  module IR_CFGDiff { header "IR/CFGDiff.h" export * }
   module IR_ConstantRange { header "IR/ConstantRange.h" export * }
   module IR_Dominators { header "IR/Dominators.h" export * }
   module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
@@ -202,7 +227,12 @@ module LLVM_intrinsic_gen {
   module IR_Verifier { header "IR/Verifier.h" export * }
   module IR_InstIterator { header "IR/InstIterator.h" export * }
   module IR_InstVisitor { header "IR/InstVisitor.h" export * }
-  module IR_Intrinsics { header "IR/Intrinsics.h" export * }
+  module IR_Intrinsics {
+    header "IR/Intrinsics.h"
+    extern module LLVM_Extern_IR_Intricsics_Gen "module.extern.modulemap"
+    extern module LLVM_Extern_IR_Intrinsics_Enum "module.extern.modulemap"
+    export *
+  }
   module IR_IntrinsicInst { header "IR/IntrinsicInst.h" export * }
   module IR_PatternMatch { header "IR/PatternMatch.h" export * }
   module IR_Statepoint { header "IR/Statepoint.h" export * }
@@ -224,9 +254,23 @@ module LLVM_IR {
   textual header "IR/RuntimeLibcalls.def"
 }
 
-module LLVM_IRReader { requires cplusplus umbrella "IRReader" module * { export * } }
-module LLVM_LineEditor { requires cplusplus umbrella "LineEditor" module * { export * } }
-module LLVM_LTO { requires cplusplus umbrella "LTO" module * { export * } }
+module LLVM_IRReader {
+  requires cplusplus
+  umbrella "IRReader"
+  module * { export * }
+}
+
+module LLVM_LineEditor {
+  requires cplusplus
+  umbrella "LineEditor"
+  module * { export * }
+}
+
+module LLVM_LTO {
+  requires cplusplus
+  umbrella "LTO"
+  module * { export * }
+}
 
 module LLVM_MC {
   requires cplusplus
@@ -253,7 +297,11 @@ module LLVM_Object {
   module * { export * }
 }
 
-module LLVM_Option { requires cplusplus umbrella "Option" module * { export * } }
+module LLVM_Option {
+  requires cplusplus
+  umbrella "Option"
+  module * { export * }
+}
 
 module LLVM_ProfileData {
   requires cplusplus
@@ -271,7 +319,11 @@ module LLVM_Support_TargetRegistry {
   export *
 }
 
-module LLVM_TableGen { requires cplusplus umbrella "TableGen" module * { export * } }
+module LLVM_TableGen {
+  requires cplusplus
+  umbrella "TableGen"
+  module * { export * }
+}
 
 module LLVM_Transforms {
   requires cplusplus
@@ -279,6 +331,8 @@ module LLVM_Transforms {
   module * { export * }
 }
 
+extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
+
 // A module covering ADT/ and Support/. These are intertwined and
 // codependent, and notionally form a single module.
 module LLVM_Utils {
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index a6585df949f8..3446aef39938 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -40,7 +40,6 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -118,11 +117,11 @@ bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
   return false;
 }
 
-ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
   ModRefInfo Result = ModRefInfo::ModRef;
 
   for (const auto &AA : AAs) {
-    Result = intersectModRef(Result, AA->getArgModRefInfo(CS, ArgIdx));
+    Result = intersectModRef(Result, AA->getArgModRefInfo(Call, ArgIdx));
 
     // Early-exit the moment we reach the bottom of the lattice.
     if (isNoModRef(Result))
@@ -132,11 +131,11 @@ ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
   return Result;
 }
 
-ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
   // We may have two calls.
-  if (auto CS = ImmutableCallSite(I)) {
+  if (const auto *Call1 = dyn_cast<CallBase>(I)) {
     // Check if the two calls modify the same memory.
-    return getModRefInfo(CS, Call);
+    return getModRefInfo(Call1, Call2);
   } else if (I->isFenceLike()) {
     // If this is a fence, just return ModRef.
     return ModRefInfo::ModRef;
@@ -146,19 +145,19 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
     // is that if the call references what this instruction
     // defines, it must be clobbered by this location.
     const MemoryLocation DefLoc = MemoryLocation::get(I);
-    ModRefInfo MR = getModRefInfo(Call, DefLoc);
+    ModRefInfo MR = getModRefInfo(Call2, DefLoc);
     if (isModOrRefSet(MR))
       return setModAndRef(MR);
   }
   return ModRefInfo::NoModRef;
 }
 
-ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
                                     const MemoryLocation &Loc) {
   ModRefInfo Result = ModRefInfo::ModRef;
 
   for (const auto &AA : AAs) {
-    Result = intersectModRef(Result, AA->getModRefInfo(CS, Loc));
+    Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc));
 
     // Early-exit the moment we reach the bottom of the lattice.
     if (isNoModRef(Result))
@@ -167,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
 
   // Try to refine the mod-ref info further using other API entry points to the
   // aggregate set of AA results.
-  auto MRB = getModRefBehavior(CS);
+  auto MRB = getModRefBehavior(Call);
   if (MRB == FMRB_DoesNotAccessMemory ||
       MRB == FMRB_OnlyAccessesInaccessibleMem)
     return ModRefInfo::NoModRef;
@@ -178,20 +177,19 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
     Result = clearRef(Result);
 
   if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
-    bool DoesAlias = false;
     bool IsMustAlias = true;
     ModRefInfo AllArgsMask = ModRefInfo::NoModRef;
     if (doesAccessArgPointees(MRB)) {
-      for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) {
+      for (auto AI = Call->arg_begin(), AE = Call->arg_end(); AI != AE; ++AI) {
         const Value *Arg = *AI;
         if (!Arg->getType()->isPointerTy())
           continue;
-        unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
-        MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI);
+        unsigned ArgIdx = std::distance(Call->arg_begin(), AI);
+        MemoryLocation ArgLoc =
+            MemoryLocation::getForArgument(Call, ArgIdx, TLI);
         AliasResult ArgAlias = alias(ArgLoc, Loc);
         if (ArgAlias != NoAlias) {
-          ModRefInfo ArgMask = getArgModRefInfo(CS, ArgIdx);
-          DoesAlias = true;
+          ModRefInfo ArgMask = getArgModRefInfo(Call, ArgIdx);
           AllArgsMask = unionModRef(AllArgsMask, ArgMask);
         }
         // Conservatively clear IsMustAlias unless only MustAlias is found.
@@ -199,7 +197,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
       }
     }
     // Return NoModRef if no alias found with any argument.
-    if (!DoesAlias)
+    if (isNoModRef(AllArgsMask))
       return ModRefInfo::NoModRef;
     // Logical & between other AA analyses and argument analysis.
     Result = intersectModRef(Result, AllArgsMask);
@@ -215,12 +213,12 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
   return Result;
 }
 
-ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
-                                    ImmutableCallSite CS2) {
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
+                                    const CallBase *Call2) {
   ModRefInfo Result = ModRefInfo::ModRef;
 
   for (const auto &AA : AAs) {
-    Result = intersectModRef(Result, AA->getModRefInfo(CS1, CS2));
+    Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2));
 
     // Early-exit the moment we reach the bottom of the lattice.
     if (isNoModRef(Result))
@@ -230,59 +228,61 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
   // Try to refine the mod-ref info further using other API entry points to the
   // aggregate set of AA results.
 
-  // If CS1 or CS2 are readnone, they don't interact.
-  auto CS1B = getModRefBehavior(CS1);
-  if (CS1B == FMRB_DoesNotAccessMemory)
+  // If Call1 or Call2 are readnone, they don't interact.
+  auto Call1B = getModRefBehavior(Call1);
+  if (Call1B == FMRB_DoesNotAccessMemory)
     return ModRefInfo::NoModRef;
 
-  auto CS2B = getModRefBehavior(CS2);
-  if (CS2B == FMRB_DoesNotAccessMemory)
+  auto Call2B = getModRefBehavior(Call2);
+  if (Call2B == FMRB_DoesNotAccessMemory)
     return ModRefInfo::NoModRef;
 
   // If they both only read from memory, there is no dependence.
-  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+  if (onlyReadsMemory(Call1B) && onlyReadsMemory(Call2B))
     return ModRefInfo::NoModRef;
 
-  // If CS1 only reads memory, the only dependence on CS2 can be
-  // from CS1 reading memory written by CS2.
-  if (onlyReadsMemory(CS1B))
+  // If Call1 only reads memory, the only dependence on Call2 can be
+  // from Call1 reading memory written by Call2.
+  if (onlyReadsMemory(Call1B))
     Result = clearMod(Result);
-  else if (doesNotReadMemory(CS1B))
+  else if (doesNotReadMemory(Call1B))
     Result = clearRef(Result);
 
-  // If CS2 only access memory through arguments, accumulate the mod/ref
-  // information from CS1's references to the memory referenced by
-  // CS2's arguments.
-  if (onlyAccessesArgPointees(CS2B)) {
-    if (!doesAccessArgPointees(CS2B))
+  // If Call2 only access memory through arguments, accumulate the mod/ref
+  // information from Call1's references to the memory referenced by
+  // Call2's arguments.
+  if (onlyAccessesArgPointees(Call2B)) {
+    if (!doesAccessArgPointees(Call2B))
       return ModRefInfo::NoModRef;
     ModRefInfo R = ModRefInfo::NoModRef;
     bool IsMustAlias = true;
-    for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+    for (auto I = Call2->arg_begin(), E = Call2->arg_end(); I != E; ++I) {
       const Value *Arg = *I;
       if (!Arg->getType()->isPointerTy())
         continue;
-      unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
-      auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
-
-      // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the
-      // dependence of CS1 on that location is the inverse:
-      // - If CS2 modifies location, dependence exists if CS1 reads or writes.
-      // - If CS2 only reads location, dependence exists if CS1 writes.
-      ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx);
+      unsigned Call2ArgIdx = std::distance(Call2->arg_begin(), I);
+      auto Call2ArgLoc =
+          MemoryLocation::getForArgument(Call2, Call2ArgIdx, TLI);
+
+      // ArgModRefC2 indicates what Call2 might do to Call2ArgLoc, and the
+      // dependence of Call1 on that location is the inverse:
+      // - If Call2 modifies location, dependence exists if Call1 reads or
+      //   writes.
+      // - If Call2 only reads location, dependence exists if Call1 writes.
+      ModRefInfo ArgModRefC2 = getArgModRefInfo(Call2, Call2ArgIdx);
       ModRefInfo ArgMask = ModRefInfo::NoModRef;
-      if (isModSet(ArgModRefCS2))
+      if (isModSet(ArgModRefC2))
         ArgMask = ModRefInfo::ModRef;
-      else if (isRefSet(ArgModRefCS2))
+      else if (isRefSet(ArgModRefC2))
         ArgMask = ModRefInfo::Mod;
 
-      // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use
+      // ModRefC1 indicates what Call1 might do to Call2ArgLoc, and we use
       // above ArgMask to update dependence info.
-      ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
-      ArgMask = intersectModRef(ArgMask, ModRefCS1);
+      ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc);
+      ArgMask = intersectModRef(ArgMask, ModRefC1);
 
       // Conservatively clear IsMustAlias unless only MustAlias is found.
-      IsMustAlias &= isMustSet(ModRefCS1);
+      IsMustAlias &= isMustSet(ModRefC1);
 
       R = intersectModRef(unionModRef(R, ArgMask), Result);
       if (R == Result) {
@@ -300,31 +300,32 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
     return IsMustAlias ? setMust(R) : clearMust(R);
   }
 
-  // If CS1 only accesses memory through arguments, check if CS2 references
-  // any of the memory referenced by CS1's arguments. If not, return NoModRef.
-  if (onlyAccessesArgPointees(CS1B)) {
-    if (!doesAccessArgPointees(CS1B))
+  // If Call1 only accesses memory through arguments, check if Call2 references
+  // any of the memory referenced by Call1's arguments. If not, return NoModRef.
+  if (onlyAccessesArgPointees(Call1B)) {
+    if (!doesAccessArgPointees(Call1B))
       return ModRefInfo::NoModRef;
     ModRefInfo R = ModRefInfo::NoModRef;
     bool IsMustAlias = true;
-    for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+    for (auto I = Call1->arg_begin(), E = Call1->arg_end(); I != E; ++I) {
       const Value *Arg = *I;
       if (!Arg->getType()->isPointerTy())
         continue;
-      unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
-      auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
-
-      // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might
-      // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If
-      // CS1 might Ref, then we care only about a Mod by CS2.
-      ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx);
-      ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc);
-      if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) ||
-          (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
-        R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
+      unsigned Call1ArgIdx = std::distance(Call1->arg_begin(), I);
+      auto Call1ArgLoc =
+          MemoryLocation::getForArgument(Call1, Call1ArgIdx, TLI);
+
+      // ArgModRefC1 indicates what Call1 might do to Call1ArgLoc; if Call1
+      // might Mod Call1ArgLoc, then we care about either a Mod or a Ref by
+      // Call2. If Call1 might Ref, then we care only about a Mod by Call2.
+      ModRefInfo ArgModRefC1 = getArgModRefInfo(Call1, Call1ArgIdx);
+      ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc);
+      if ((isModSet(ArgModRefC1) && isModOrRefSet(ModRefC2)) ||
+          (isRefSet(ArgModRefC1) && isModSet(ModRefC2)))
+        R = intersectModRef(unionModRef(R, ArgModRefC1), Result);
 
       // Conservatively clear IsMustAlias unless only MustAlias is found.
-      IsMustAlias &= isMustSet(ModRefCS2);
+      IsMustAlias &= isMustSet(ModRefC2);
 
       if (R == Result) {
         // On early exit, not all args were checked, cannot set Must.
@@ -344,11 +345,11 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
   return Result;
 }
 
-FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+FunctionModRefBehavior AAResults::getModRefBehavior(const CallBase *Call) {
   FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
 
   for (const auto &AA : AAs) {
-    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
+    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(Call));
 
     // Early-exit the moment we reach the bottom of the lattice.
     if (Result == FMRB_DoesNotAccessMemory)
@@ -560,8 +561,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
       isa<Constant>(Object))
     return ModRefInfo::ModRef;
 
-  ImmutableCallSite CS(I);
-  if (!CS.getInstruction() || CS.getInstruction() == Object)
+  const auto *Call = dyn_cast<CallBase>(I);
+  if (!Call || Call == Object)
     return ModRefInfo::ModRef;
 
   if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
@@ -574,14 +575,14 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
   ModRefInfo R = ModRefInfo::NoModRef;
   bool IsMustAlias = true;
   // Set flag only if no May found and all operands processed.
-  for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+  for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
        CI != CE; ++CI, ++ArgNo) {
     // Only look at the no-capture or byval pointer arguments.  If this
     // pointer were passed to arguments that were neither of these, then it
     // couldn't be no-capture.
     if (!(*CI)->getType()->isPointerTy() ||
-        (!CS.doesNotCapture(ArgNo) &&
-         ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
+        (!Call->doesNotCapture(ArgNo) && ArgNo < Call->getNumArgOperands() &&
+         !Call->isByValArgument(ArgNo)))
       continue;
 
     AliasResult AR = alias(MemoryLocation(*CI), MemoryLocation(Object));
@@ -593,9 +594,9 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
       IsMustAlias = false;
     if (AR == NoAlias)
       continue;
-    if (CS.doesNotAccessMemory(ArgNo))
+    if (Call->doesNotAccessMemory(ArgNo))
       continue;
-    if (CS.onlyReadsMemory(ArgNo)) {
+    if (Call->onlyReadsMemory(ArgNo)) {
       R = ModRefInfo::Ref;
       continue;
     }
@@ -642,28 +643,6 @@ AnalysisKey AAManager::Key;
 
 namespace {
 
-/// A wrapper pass for external alias analyses. This just squirrels away the
-/// callback used to run any analyses and register their results.
-struct ExternalAAWrapperPass : ImmutablePass {
-  using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;
-
-  CallbackT CB;
-
-  static char ID;
-
-  ExternalAAWrapperPass() : ImmutablePass(ID) {
-    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
-  }
-
-  explicit ExternalAAWrapperPass(CallbackT CB)
-      : ImmutablePass(ID), CB(std::move(CB)) {
-    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesAll();
-  }
-};
 
 } // end anonymous namespace
 
@@ -799,8 +778,8 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
 }
 
 bool llvm::isNoAliasCall(const Value *V) {
-  if (auto CS = ImmutableCallSite(V))
-    return CS.hasRetAttr(Attribute::NoAlias);
+  if (const auto *Call = dyn_cast<CallBase>(V))
+    return Call->hasRetAttr(Attribute::NoAlias);
   return false;
 }
 
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 764ae9160350..85dd4fe95b33 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -66,11 +66,10 @@ static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I,
   }
 }
 
-static inline void PrintModRefResults(const char *Msg, bool P, CallSite CSA,
-                                      CallSite CSB, Module *M) {
+static inline void PrintModRefResults(const char *Msg, bool P, CallBase *CallA,
+                                      CallBase *CallB, Module *M) {
   if (PrintAll || P) {
-    errs() << "  " << Msg << ": " << *CSA.getInstruction() << " <-> "
-           << *CSB.getInstruction() << '\n';
+    errs() << "  " << Msg << ": " << *CallA << " <-> " << *CallB << '\n';
   }
 }
 
@@ -98,7 +97,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   ++FunctionCount;
 
   SetVector<Value *> Pointers;
-  SmallSetVector<CallSite, 16> CallSites;
+  SmallSetVector<CallBase *, 16> Calls;
   SetVector<Value *> Loads;
   SetVector<Value *> Stores;
 
@@ -114,16 +113,16 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
     if (EvalAAMD && isa<StoreInst>(&*I))
       Stores.insert(&*I);
     Instruction &Inst = *I;
-    if (auto CS = CallSite(&Inst)) {
-      Value *Callee = CS.getCalledValue();
+    if (auto *Call = dyn_cast<CallBase>(&Inst)) {
+      Value *Callee = Call->getCalledValue();
       // Skip actual functions for direct function calls.
       if (!isa<Function>(Callee) && isInterestingPointer(Callee))
         Pointers.insert(Callee);
       // Consider formals.
-      for (Use &DataOp : CS.data_ops())
+      for (Use &DataOp : Call->data_ops())
         if (isInterestingPointer(DataOp))
           Pointers.insert(DataOp);
-      CallSites.insert(CS);
+      Calls.insert(Call);
     } else {
       // Consider all operands.
       for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
@@ -136,19 +135,21 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias ||
       PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef)
     errs() << "Function: " << F.getName() << ": " << Pointers.size()
-           << " pointers, " << CallSites.size() << " call sites\n";
+           << " pointers, " << Calls.size() << " call sites\n";
 
   // iterate over the worklist, and run the full (n^2)/2 disambiguations
   for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
-    uint64_t I1Size = MemoryLocation::UnknownSize;
+    auto I1Size = LocationSize::unknown();
     Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
-    if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
+    if (I1ElTy->isSized())
+      I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
 
     for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
-      uint64_t I2Size = MemoryLocation::UnknownSize;
-      Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
-      if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
+      auto I2Size = LocationSize::unknown();
+      Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType();
+      if (I2ElTy->isSized())
+        I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
 
       AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
       switch (AR) {
@@ -228,49 +229,48 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   }
 
   // Mod/ref alias analysis: compare all pairs of calls and values
-  for (CallSite C : CallSites) {
-    Instruction *I = C.getInstruction();
-
+  for (CallBase *Call : Calls) {
     for (auto Pointer : Pointers) {
-      uint64_t Size = MemoryLocation::UnknownSize;
+      auto Size = LocationSize::unknown();
       Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType();
-      if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
+      if (ElTy->isSized())
+        Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
 
-      switch (AA.getModRefInfo(C, Pointer, Size)) {
+      switch (AA.getModRefInfo(Call, Pointer, Size)) {
       case ModRefInfo::NoModRef:
-        PrintModRefResults("NoModRef", PrintNoModRef, I, Pointer,
+        PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer,
                            F.getParent());
         ++NoModRefCount;
         break;
       case ModRefInfo::Mod:
-        PrintModRefResults("Just Mod", PrintMod, I, Pointer, F.getParent());
+        PrintModRefResults("Just Mod", PrintMod, Call, Pointer, F.getParent());
         ++ModCount;
         break;
       case ModRefInfo::Ref:
-        PrintModRefResults("Just Ref", PrintRef, I, Pointer, F.getParent());
+        PrintModRefResults("Just Ref", PrintRef, Call, Pointer, F.getParent());
         ++RefCount;
         break;
       case ModRefInfo::ModRef:
-        PrintModRefResults("Both ModRef", PrintModRef, I, Pointer,
+        PrintModRefResults("Both ModRef", PrintModRef, Call, Pointer,
                            F.getParent());
         ++ModRefCount;
         break;
       case ModRefInfo::Must:
-        PrintModRefResults("Must", PrintMust, I, Pointer, F.getParent());
+        PrintModRefResults("Must", PrintMust, Call, Pointer, F.getParent());
         ++MustCount;
         break;
       case ModRefInfo::MustMod:
-        PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, I, Pointer,
+        PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, Call, Pointer,
                            F.getParent());
         ++MustModCount;
         break;
       case ModRefInfo::MustRef:
-        PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, I, Pointer,
+        PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, Call, Pointer,
                            F.getParent());
         ++MustRefCount;
         break;
       case ModRefInfo::MustModRef:
-        PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, I,
+        PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, Call,
                            Pointer, F.getParent());
         ++MustModRefCount;
         break;
@@ -279,44 +279,46 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   }
 
   // Mod/ref alias analysis: compare all pairs of calls
-  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
-    for (auto D = CallSites.begin(); D != Ce; ++D) {
-      if (D == C)
+  for (CallBase *CallA : Calls) {
+    for (CallBase *CallB : Calls) {
+      if (CallA == CallB)
         continue;
-      switch (AA.getModRefInfo(*C, *D)) {
+      switch (AA.getModRefInfo(CallA, CallB)) {
       case ModRefInfo::NoModRef:
-        PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+        PrintModRefResults("NoModRef", PrintNoModRef, CallA, CallB,
+                           F.getParent());
         ++NoModRefCount;
         break;
       case ModRefInfo::Mod:
-        PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+        PrintModRefResults("Just Mod", PrintMod, CallA, CallB, F.getParent());
         ++ModCount;
         break;
       case ModRefInfo::Ref:
-        PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+        PrintModRefResults("Just Ref", PrintRef, CallA, CallB, F.getParent());
         ++RefCount;
         break;
       case ModRefInfo::ModRef:
-        PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+        PrintModRefResults("Both ModRef", PrintModRef, CallA, CallB,
+                           F.getParent());
         ++ModRefCount;
         break;
       case ModRefInfo::Must:
-        PrintModRefResults("Must", PrintMust, *C, *D, F.getParent());
+        PrintModRefResults("Must", PrintMust, CallA, CallB, F.getParent());
         ++MustCount;
         break;
       case ModRefInfo::MustMod:
-        PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, *C, *D,
+        PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, CallA, CallB,
                            F.getParent());
         ++MustModCount;
         break;
       case ModRefInfo::MustRef:
-        PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, *C, *D,
+        PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, CallA, CallB,
                            F.getParent());
         ++MustRefCount;
         break;
       case ModRefInfo::MustModRef:
-        PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, *C, *D,
-                           F.getParent());
+        PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, CallA,
+                           CallB, F.getParent());
         ++MustModRefCount;
         break;
       }
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 8f903fa4f1e8..f6ad704cc914 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -13,9 +13,9 @@
 
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -24,6 +24,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
@@ -55,7 +56,6 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
   // Update the alias and access types of this set...
   Access |= AS.Access;
   Alias  |= AS.Alias;
-  Volatile |= AS.Volatile;
 
   if (Alias == SetMustAlias) {
     // Check that these two merged sets really are must aliases.  Since both
@@ -113,10 +113,9 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
   if (AliasSet *Fwd = AS->Forward) {
     Fwd->dropRef(*this);
     AS->Forward = nullptr;
-  }
-
-  if (AS->Alias == AliasSet::SetMayAlias)
-    TotalMayAliasSetSize -= AS->size();
+  } else // Update TotalMayAliasSetSize only if not forwarding.
+      if (AS->Alias == AliasSet::SetMayAlias)
+        TotalMayAliasSetSize -= AS->size();
 
   AliasSets.erase(AS);
 }
@@ -169,7 +168,12 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
     addRef();
   UnknownInsts.emplace_back(I);
 
-  if (!I->mayWriteToMemory()) {
+  // Guards are marked as modifying memory for control flow modelling purposes,
+  // but don't actually modify any specific memory location.
+  using namespace PatternMatch;
+  bool MayWriteMemory = I->mayWriteToMemory() && !isGuard(I) &&
+    !(I->use_empty() && match(I, m_Intrinsic<Intrinsic::invariant_start>()));
+  if (!MayWriteMemory) {
     Alias = SetMayAlias;
     Access |= RefAccess;
     return;
@@ -226,12 +230,13 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
   if (AliasAny)
     return true;
 
-  if (!Inst->mayReadOrWriteMemory())
-    return false;
+  assert(Inst->mayReadOrWriteMemory() &&
+         "Instruction must either read or write memory.");
 
   for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
     if (auto *UnknownInst = getUnknownInst(i)) {
-      ImmutableCallSite C1(UnknownInst), C2(Inst);
+      const auto *C1 = dyn_cast<CallBase>(UnknownInst);
+      const auto *C2 = dyn_cast<CallBase>(Inst);
       if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) ||
           isModOrRefSet(AA.getModRefInfo(C2, C1)))
         return true;
@@ -246,6 +251,31 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
   return false;
 }
 
+Instruction* AliasSet::getUniqueInstruction() {
+  if (AliasAny)
+    // May have collapses alias set
+    return nullptr;
+  if (begin() != end()) {
+    if (!UnknownInsts.empty())
+      // Another instruction found
+      return nullptr;
+    if (std::next(begin()) != end())
+      // Another instruction found
+      return nullptr;
+    Value *Addr = begin()->getValue();
+    assert(!Addr->user_empty() &&
+           "where's the instruction which added this pointer?");
+    if (std::next(Addr->user_begin()) != Addr->user_end())
+      // Another instruction found -- this is really restrictive
+      // TODO: generalize!
+      return nullptr;
+    return cast<Instruction>(*(Addr->user_begin()));
+  }
+  if (1 != UnknownInsts.size())
+    return nullptr;
+  return cast<Instruction>(UnknownInsts[0]);
+}
+
 void AliasSetTracker::clear() {
   // Delete all the PointerRec entries.
   for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
@@ -280,13 +310,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
   return FoundSet;
 }
 
-bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
-  for (const AliasSet &AS : *this)
-    if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA))
-      return true;
-  return false;
-}
-
 AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
   AliasSet *FoundSet = nullptr;
   for (iterator I = begin(), E = end(); I != E;) {
@@ -295,17 +318,18 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
       continue;
     if (!FoundSet)            // If this is the first alias set ptr can go into.
       FoundSet = &*Cur;       // Remember it.
-    else if (!Cur->Forward)   // Otherwise, we must merge the sets.
+    else   // Otherwise, we must merge the sets.
       FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
   }
   return FoundSet;
 }
 
-/// getAliasSetForPointer - Return the alias set that the specified pointer
-/// lives in.
-AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
-                                                 LocationSize Size,
-                                                 const AAMDNodes &AAInfo) {
+AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
+
+  Value * const Pointer = const_cast<Value*>(MemLoc.Ptr);
+  const LocationSize Size = MemLoc.Size;
+  const AAMDNodes &AAInfo = MemLoc.AATags;
+  
   AliasSet::PointerRec &Entry = getEntryFor(Pointer);
 
   if (AliasAnyAS) {
@@ -351,83 +375,32 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
 
 void AliasSetTracker::add(Value *Ptr, LocationSize Size,
                           const AAMDNodes &AAInfo) {
-  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
+  addPointer(MemoryLocation(Ptr, Size, AAInfo), AliasSet::NoAccess);
 }
 
 void AliasSetTracker::add(LoadInst *LI) {
-  if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI);
-
-  AAMDNodes AAInfo;
-  LI->getAAMetadata(AAInfo);
-
-  AliasSet::AccessLattice Access = AliasSet::RefAccess;
-  const DataLayout &DL = LI->getModule()->getDataLayout();
-  AliasSet &AS = addPointer(LI->getOperand(0),
-                            DL.getTypeStoreSize(LI->getType()), AAInfo, Access);
-  if (LI->isVolatile()) AS.setVolatile();
+  if (isStrongerThanMonotonic(LI->getOrdering()))
+    return addUnknown(LI);
+  addPointer(MemoryLocation::get(LI), AliasSet::RefAccess);
 }
 
 void AliasSetTracker::add(StoreInst *SI) {
-  if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI);
-
-  AAMDNodes AAInfo;
-  SI->getAAMetadata(AAInfo);
-
-  AliasSet::AccessLattice Access = AliasSet::ModAccess;
-  const DataLayout &DL = SI->getModule()->getDataLayout();
-  Value *Val = SI->getOperand(0);
-  AliasSet &AS = addPointer(
-      SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access);
-  if (SI->isVolatile()) AS.setVolatile();
+  if (isStrongerThanMonotonic(SI->getOrdering()))
+    return addUnknown(SI);
+  addPointer(MemoryLocation::get(SI), AliasSet::ModAccess);
 }
 
 void AliasSetTracker::add(VAArgInst *VAAI) {
-  AAMDNodes AAInfo;
-  VAAI->getAAMetadata(AAInfo);
-
-  addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
-             AliasSet::ModRefAccess);
+  addPointer(MemoryLocation::get(VAAI), AliasSet::ModRefAccess);
 }
 
 void AliasSetTracker::add(AnyMemSetInst *MSI) {
-  AAMDNodes AAInfo;
-  MSI->getAAMetadata(AAInfo);
-
-  uint64_t Len;
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
-    Len = C->getZExtValue();
-  else
-    Len = MemoryLocation::UnknownSize;
-
-  AliasSet &AS =
-      addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
-  auto *MS = dyn_cast<MemSetInst>(MSI);
-  if (MS && MS->isVolatile())
-    AS.setVolatile();
+  addPointer(MemoryLocation::getForDest(MSI), AliasSet::ModAccess);
 }
 
 void AliasSetTracker::add(AnyMemTransferInst *MTI) {
-  AAMDNodes AAInfo;
-  MTI->getAAMetadata(AAInfo);
-
-  uint64_t Len;
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
-    Len = C->getZExtValue();
-  else
-    Len = MemoryLocation::UnknownSize;
-
-  AliasSet &ASSrc =
-      addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
-
-  AliasSet &ASDst =
-      addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
-
-  auto* MT = dyn_cast<MemTransferInst>(MTI);
-  if (MT && MT->isVolatile()) {
-    ASSrc.setVolatile();
-    ASDst.setVolatile();
-  }
+  addPointer(MemoryLocation::getForDest(MTI), AliasSet::ModAccess);
+  addPointer(MemoryLocation::getForSource(MTI), AliasSet::RefAccess);
 }
 
 void AliasSetTracker::addUnknown(Instruction *Inst) {
@@ -471,6 +444,46 @@ void AliasSetTracker::add(Instruction *I) {
     return add(MSI);
   if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(I))
     return add(MTI);
+
+  // Handle all calls with known mod/ref sets genericall
+  if (auto *Call = dyn_cast<CallBase>(I))
+    if (Call->onlyAccessesArgMemory()) {
+      auto getAccessFromModRef = [](ModRefInfo MRI) {
+        if (isRefSet(MRI) && isModSet(MRI))
+          return AliasSet::ModRefAccess;
+        else if (isModSet(MRI))
+          return AliasSet::ModAccess;
+        else if (isRefSet(MRI))
+          return AliasSet::RefAccess;
+        else
+          return AliasSet::NoAccess;
+      };
+
+      ModRefInfo CallMask = createModRefInfo(AA.getModRefBehavior(Call));
+
+      // Some intrinsics are marked as modifying memory for control flow
+      // modelling purposes, but don't actually modify any specific memory
+      // location.
+      using namespace PatternMatch;
+      if (Call->use_empty() &&
+          match(Call, m_Intrinsic<Intrinsic::invariant_start>()))
+        CallMask = clearMod(CallMask);
+
+      for (auto IdxArgPair : enumerate(Call->args())) {
+        int ArgIdx = IdxArgPair.index();
+        const Value *Arg = IdxArgPair.value();
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        MemoryLocation ArgLoc =
+            MemoryLocation::getForArgument(Call, ArgIdx, nullptr);
+        ModRefInfo ArgMask = AA.getArgModRefInfo(Call, ArgIdx);
+        ArgMask = intersectModRef(CallMask, ArgMask);
+        if (!isNoModRef(ArgMask))
+          addPointer(ArgLoc, getAccessFromModRef(ArgMask));
+      }
+      return;
+    }
+
   return addUnknown(I);
 }
 
@@ -496,12 +509,10 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
         add(Inst);
 
     // Loop over all of the pointers in this alias set.
-    for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
-      AliasSet &NewAS =
-          addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(),
-                     (AliasSet::AccessLattice)AS.Access);
-      if (AS.isVolatile()) NewAS.setVolatile();
-    }
+    for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI)
+      addPointer(
+          MemoryLocation(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo()),
+          (AliasSet::AccessLattice)AS.Access);
   }
 }
 
@@ -594,10 +605,9 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
   return *AliasAnyAS;
 }
 
-AliasSet &AliasSetTracker::addPointer(Value *P, LocationSize Size,
-                                      const AAMDNodes &AAInfo,
+AliasSet &AliasSetTracker::addPointer(MemoryLocation Loc,
                                       AliasSet::AccessLattice E) {
-  AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
+  AliasSet &AS = getAliasSetFor(Loc);
   AS.Access |= E;
 
   if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) {
@@ -623,7 +633,6 @@ void AliasSet::print(raw_ostream &OS) const {
   case ModRefAccess: OS << "Mod/Ref   "; break;
   default: llvm_unreachable("Bad value for Access!");
   }
-  if (isVolatile()) OS << "[volatile] ";
   if (Forward)
     OS << " forwarding to " << (void*)Forward;
 
@@ -632,7 +641,10 @@ void AliasSet::print(raw_ostream &OS) const {
     for (iterator I = begin(), E = end(); I != E; ++I) {
       if (I != begin()) OS << ", ";
       I.getPointer()->printAsOperand(OS << "(");
-      OS << ", " << I.getSize() << ")";
+      if (I.getSize() == LocationSize::unknown())
+        OS << ", unknown)";
+      else 
+        OS << ", " << I.getSize() << ")";
     }
   }
   if (!UnknownInsts.empty()) {
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 30576cf1ae10..bb8742123a0f 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -39,7 +39,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeDependenceAnalysisWrapperPassPass(Registry);
   initializeDelinearizationPass(Registry);
   initializeDemandedBitsWrapperPassPass(Registry);
-  initializeDivergenceAnalysisPass(Registry);
   initializeDominanceFrontierWrapperPassPass(Registry);
   initializeDomViewerPass(Registry);
   initializeDomPrinterPass(Registry);
@@ -58,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeLazyBlockFrequencyInfoPassPass(Registry);
   initializeLazyValueInfoWrapperPassPass(Registry);
   initializeLazyValueInfoPrinterPass(Registry);
+  initializeLegacyDivergenceAnalysisPass(Registry);
   initializeLintPass(Registry);
   initializeLoopInfoWrapperPassPass(Registry);
   initializeMemDepPrinterPass(Registry);
@@ -77,6 +77,8 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeRegionOnlyPrinterPass(Registry);
   initializeSCEVAAWrapperPassPass(Registry);
   initializeScalarEvolutionWrapperPassPass(Registry);
+  initializeStackSafetyGlobalInfoWrapperPassPass(Registry);
+  initializeStackSafetyInfoWrapperPassPass(Registry);
   initializeTargetTransformInfoWrapperPassPass(Registry);
   initializeTypeBasedAAWrapperPassPass(Registry);
   initializeScopedNoAliasAAWrapperPassPass(Registry);
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index f9ecbc043261..332eeaa00e73 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Analysis/PhiValues.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -68,6 +67,16 @@ using namespace llvm;
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
                                           cl::init(false));
+
+/// By default, even on 32-bit architectures we use 64-bit integers for
+/// calculations. This will allow us to more-aggressively decompose indexing
+/// expressions calculated using i64 values (e.g., long long in C) which is
+/// common enough to worry about.
+static cl::opt<bool> ForceAtLeast64Bits("basicaa-force-at-least-64b",
+                                        cl::Hidden, cl::init(true));
+static cl::opt<bool> DoubleCalcBits("basicaa-double-calc-bits",
+                                    cl::Hidden, cl::init(false));
+
 /// SearchLimitReached / SearchTimes shows how often the limit of
 /// to decompose GEPs is reached. It will affect the precision
 /// of basic alias analysis.
@@ -134,7 +143,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
 /// Returns true if the pointer is one which would have been considered an
 /// escape by isNonEscapingLocalObject.
 static bool isEscapeSource(const Value *V) {
-  if (ImmutableCallSite(V))
+  if (isa<CallBase>(V))
     return true;
 
   if (isa<Argument>(V))
@@ -381,13 +390,22 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
 }
 
 /// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than 64. This is an issue in
-/// particular for 32b programs with negative indices that rely on two's
-/// complement wrap-arounds for precise alias information.
-static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) {
-  assert(PointerSize <= 64 && "Invalid PointerSize!");
-  unsigned ShiftBits = 64 - PointerSize;
-  return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits;
+/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// an issue, for example, in particular for 32b pointers with negative indices
+/// that rely on two's complement wrap-arounds for precise alias information
+/// where the maximum pointer size is 64b.
+static APInt adjustToPointerSize(APInt Offset, unsigned PointerSize) {
+  assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
+  unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+  return (Offset << ShiftBits).ashr(ShiftBits);
+}
+
+static unsigned getMaxPointerSize(const DataLayout &DL) {
+  unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
+  if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
+  if (DoubleCalcBits) MaxPointerSize *= 2;
+
+  return MaxPointerSize;
 }
 
 /// If V is a symbolic pointer expression, decompose it into a base pointer
@@ -410,8 +428,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
   unsigned MaxLookup = MaxLookupSearchDepth;
   SearchTimes++;
 
-  Decomposed.StructOffset = 0;
-  Decomposed.OtherOffset = 0;
+  unsigned MaxPointerSize = getMaxPointerSize(DL);
   Decomposed.VarIndices.clear();
   do {
     // See if this is a bitcast or GEP.
@@ -436,7 +453,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
 
     const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
     if (!GEPOp) {
-      if (auto CS = ImmutableCallSite(V)) {
+      if (const auto *Call = dyn_cast<CallBase>(V)) {
         // CaptureTracking can know about special capturing properties of some
         // intrinsics like launder.invariant.group, that can't be expressed with
         // the attributes, but have properties like returning aliasing pointer.
@@ -446,7 +463,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
         // because it should be in sync with CaptureTracking. Not using it may
         // cause weird miscompilations where 2 aliasing pointers are assumed to
         // noalias.
-        if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+        if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
           V = RP;
           continue;
         }
@@ -501,13 +518,15 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
         if (CIdx->isZero())
           continue;
         Decomposed.OtherOffset +=
-          DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
+          (DL.getTypeAllocSize(GTI.getIndexedType()) *
+            CIdx->getValue().sextOrSelf(MaxPointerSize))
+              .sextOrTrunc(MaxPointerSize);
         continue;
       }
 
       GepHasConstantOffset = false;
 
-      uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
+      APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType()));
       unsigned ZExtBits = 0, SExtBits = 0;
 
       // If the integer type is smaller than the pointer size, it is implicitly
@@ -519,20 +538,34 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
       APInt IndexScale(Width, 0), IndexOffset(Width, 0);
       bool NSW = true, NUW = true;
+      const Value *OrigIndex = Index;
       Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
                                   SExtBits, DL, 0, AC, DT, NSW, NUW);
 
-      // All GEP math happens in the width of the pointer type,
-      // so we can truncate the value to 64-bits as we don't handle
-      // currently pointers larger than 64 bits and we would crash
-      // later. TODO: Make `Scale` an APInt to avoid this problem.
-      if (IndexScale.getBitWidth() > 64)
-        IndexScale = IndexScale.sextOrTrunc(64);
-
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-      Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale;
-      Scale *= IndexScale.getSExtValue();
+
+      // It can be the case that, even through C1*V+C2 does not overflow for
+      // relevant values of V, (C2*Scale) can overflow. In that case, we cannot
+      // decompose the expression in this way.
+      //
+      // FIXME: C1*Scale and the other operations in the decomposed
+      // (C1*Scale)*V+C2*Scale can also overflow. We should check for this
+      // possibility.
+      APInt WideScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize*2) *
+                                 Scale.sext(MaxPointerSize*2);
+      if (WideScaledOffset.getMinSignedBits() > MaxPointerSize) {
+        Index = OrigIndex;
+        IndexScale = 1;
+        IndexOffset = 0;
+
+        ZExtBits = SExtBits = 0;
+        if (PointerSize > Width)
+          SExtBits += PointerSize - Width;
+      } else {
+        Decomposed.OtherOffset += IndexOffset.sextOrTrunc(MaxPointerSize) * Scale;
+        Scale *= IndexScale.sextOrTrunc(MaxPointerSize);
+      }
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
@@ -552,9 +585,8 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
       // pointer size.
       Scale = adjustToPointerSize(Scale, PointerSize);
 
-      if (Scale) {
-        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
-                                  static_cast<int64_t>(Scale)};
+      if (!!Scale) {
+        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale};
         Decomposed.VarIndices.push_back(Entry);
       }
     }
@@ -640,8 +672,8 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
 }
 
 /// Returns the behavior when calling the given call site.
-FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
-  if (CS.doesNotAccessMemory())
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const CallBase *Call) {
+  if (Call->doesNotAccessMemory())
     // Can't do better than this.
     return FMRB_DoesNotAccessMemory;
 
@@ -649,23 +681,23 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
 
   // If the callsite knows it only reads memory, don't return worse
   // than that.
-  if (CS.onlyReadsMemory())
+  if (Call->onlyReadsMemory())
     Min = FMRB_OnlyReadsMemory;
-  else if (CS.doesNotReadMemory())
+  else if (Call->doesNotReadMemory())
     Min = FMRB_DoesNotReadMemory;
 
-  if (CS.onlyAccessesArgMemory())
+  if (Call->onlyAccessesArgMemory())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
-  else if (CS.onlyAccessesInaccessibleMemory())
+  else if (Call->onlyAccessesInaccessibleMemory())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
-  else if (CS.onlyAccessesInaccessibleMemOrArgMem())
+  else if (Call->onlyAccessesInaccessibleMemOrArgMem())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
 
-  // If CS has operand bundles then aliasing attributes from the function it
-  // calls do not directly apply to the CallSite.  This can be made more
-  // precise in the future.
-  if (!CS.hasOperandBundles())
-    if (const Function *F = CS.getCalledFunction())
+  // If the call has operand bundles then aliasing attributes from the function
+  // it calls do not directly apply to the call.  This can be made more precise
+  // in the future.
+  if (!Call->hasOperandBundles())
+    if (const Function *F = Call->getCalledFunction())
       Min =
           FunctionModRefBehavior(Min & getBestAAResults().getModRefBehavior(F));
 
@@ -698,9 +730,9 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
 }
 
 /// Returns true if this is a writeonly (i.e Mod only) parameter.
-static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
+static bool isWriteOnlyParam(const CallBase *Call, unsigned ArgIdx,
                              const TargetLibraryInfo &TLI) {
-  if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly))
+  if (Call->paramHasAttr(ArgIdx, Attribute::WriteOnly))
     return true;
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
@@ -710,7 +742,8 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
   // FIXME Consider handling this in InferFunctionAttr.cpp together with other
   // attributes.
   LibFunc F;
-  if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
+  if (Call->getCalledFunction() &&
+      TLI.getLibFunc(*Call->getCalledFunction(), F) &&
       F == LibFunc_memset_pattern16 && TLI.has(F))
     if (ArgIdx == 0)
       return true;
@@ -722,23 +755,23 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
   return false;
 }
 
-ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ModRefInfo BasicAAResult::getArgModRefInfo(const CallBase *Call,
                                            unsigned ArgIdx) {
   // Checking for known builtin intrinsics and target library functions.
-  if (isWriteOnlyParam(CS, ArgIdx, TLI))
+  if (isWriteOnlyParam(Call, ArgIdx, TLI))
     return ModRefInfo::Mod;
 
-  if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly))
+  if (Call->paramHasAttr(ArgIdx, Attribute::ReadOnly))
     return ModRefInfo::Ref;
 
-  if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone))
+  if (Call->paramHasAttr(ArgIdx, Attribute::ReadNone))
     return ModRefInfo::NoModRef;
 
-  return AAResultBase::getArgModRefInfo(CS, ArgIdx);
+  return AAResultBase::getArgModRefInfo(Call, ArgIdx);
 }
 
-static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) {
-  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+static bool isIntrinsicCall(const CallBase *Call, Intrinsic::ID IID) {
+  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call);
   return II && II->getIntrinsicID() == IID;
 }
 
@@ -794,9 +827,9 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
 /// Since we only look at local properties of this function, we really can't
 /// say much about this query.  We do, however, use simple "address taken"
 /// analysis on local objects.
-ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
                                         const MemoryLocation &Loc) {
-  assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+  assert(notDifferentParent(Call, Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
   const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
@@ -807,15 +840,21 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // contents of the alloca into argument registers or stack slots, so there is
   // no lifetime issue.
   if (isa<AllocaInst>(Object))
-    if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+    if (const CallInst *CI = dyn_cast<CallInst>(Call))
       if (CI->isTailCall() &&
           !CI->getAttributes().hasAttrSomewhere(Attribute::ByVal))
         return ModRefInfo::NoModRef;
 
+  // Stack restore is able to modify unescaped dynamic allocas. Assume it may
+  // modify them even though the alloca is not escaped.
+  if (auto *AI = dyn_cast<AllocaInst>(Object))
+    if (!AI->isStaticAlloca() && isIntrinsicCall(Call, Intrinsic::stackrestore))
+      return ModRefInfo::Mod;
+
   // If the pointer is to a locally allocated object that does not escape,
   // then the call can not mod/ref the pointer unless the call takes the pointer
   // as an argument, and itself doesn't capture it.
-  if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+  if (!isa<Constant>(Object) && Call != Object &&
       isNonEscapingLocalObject(Object)) {
 
     // Optimistically assume that call doesn't touch Object and check this
@@ -824,19 +863,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
     bool IsMustAlias = true;
 
     unsigned OperandNo = 0;
-    for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+    for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
          CI != CE; ++CI, ++OperandNo) {
       // Only look at the no-capture or byval pointer arguments.  If this
       // pointer were passed to arguments that were neither of these, then it
       // couldn't be no-capture.
       if (!(*CI)->getType()->isPointerTy() ||
-          (!CS.doesNotCapture(OperandNo) &&
-           OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
+          (!Call->doesNotCapture(OperandNo) &&
+           OperandNo < Call->getNumArgOperands() &&
+           !Call->isByValArgument(OperandNo)))
         continue;
 
       // Call doesn't access memory through this operand, so we don't care
       // if it aliases with Object.
-      if (CS.doesNotAccessMemory(OperandNo))
+      if (Call->doesNotAccessMemory(OperandNo))
         continue;
 
       // If this is a no-capture pointer argument, see if we can tell that it
@@ -850,12 +890,12 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
         continue;
       // Operand aliases 'Object', but call doesn't modify it. Strengthen
       // initial assumption and keep looking in case if there are more aliases.
-      if (CS.onlyReadsMemory(OperandNo)) {
+      if (Call->onlyReadsMemory(OperandNo)) {
         Result = setRef(Result);
         continue;
       }
       // Operand aliases 'Object' but call only writes into it.
-      if (CS.doesNotReadMemory(OperandNo)) {
+      if (Call->doesNotReadMemory(OperandNo)) {
         Result = setMod(Result);
         continue;
       }
@@ -879,17 +919,16 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
     }
   }
 
-  // If the CallSite is to malloc or calloc, we can assume that it doesn't
+  // If the call is to malloc or calloc, we can assume that it doesn't
   // modify any IR visible value.  This is only valid because we assume these
   // routines do not read values visible in the IR.  TODO: Consider special
   // casing realloc and strdup routines which access only their arguments as
   // well.  Or alternatively, replace all of this with inaccessiblememonly once
   // that's implemented fully.
-  auto *Inst = CS.getInstruction();
-  if (isMallocOrCallocLikeFn(Inst, &TLI)) {
+  if (isMallocOrCallocLikeFn(Call, &TLI)) {
     // Be conservative if the accessed pointer may alias the allocation -
     // fallback to the generic handling below.
-    if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
+    if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias)
       return ModRefInfo::NoModRef;
   }
 
@@ -897,7 +936,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // operands, i.e., source and destination of any given memcpy must no-alias.
   // If Loc must-aliases either one of these two locations, then it necessarily
   // no-aliases the other.
-  if (auto *Inst = dyn_cast<AnyMemCpyInst>(CS.getInstruction())) {
+  if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) {
     AliasResult SrcAA, DestAA;
 
     if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
@@ -921,7 +960,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
-  if (isIntrinsicCall(CS, Intrinsic::assume))
+  if (isIntrinsicCall(Call, Intrinsic::assume))
     return ModRefInfo::NoModRef;
 
   // Like assumes, guard intrinsics are also marked as arbitrarily writing so
@@ -931,7 +970,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // *Unlike* assumes, guard intrinsics are modeled as reading memory since the
   // heap state at the point the guard is issued needs to be consistent in case
   // the guard invokes the "deopt" continuation.
-  if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
+  if (isIntrinsicCall(Call, Intrinsic::experimental_guard))
     return ModRefInfo::Ref;
 
   // Like assumes, invariant.start intrinsics were also marked as arbitrarily
@@ -957,20 +996,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   // The transformation will cause the second store to be ignored (based on
   // rules of invariant.start)  and print 40, while the first program always
   // prints 50.
-  if (isIntrinsicCall(CS, Intrinsic::invariant_start))
+  if (isIntrinsicCall(Call, Intrinsic::invariant_start))
     return ModRefInfo::Ref;
 
   // The AAResultBase base class has some smarts, lets use them.
-  return AAResultBase::getModRefInfo(CS, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc);
 }
 
-ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
-                                        ImmutableCallSite CS2) {
+ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
+                                        const CallBase *Call2) {
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
-  if (isIntrinsicCall(CS1, Intrinsic::assume) ||
-      isIntrinsicCall(CS2, Intrinsic::assume))
+  if (isIntrinsicCall(Call1, Intrinsic::assume) ||
+      isIntrinsicCall(Call2, Intrinsic::assume))
     return ModRefInfo::NoModRef;
 
   // Like assumes, guard intrinsics are also marked as arbitrarily writing so
@@ -984,26 +1023,26 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
   // NB! This function is *not* commutative, so we specical case two
   // possibilities for guard intrinsics.
 
-  if (isIntrinsicCall(CS1, Intrinsic::experimental_guard))
-    return isModSet(createModRefInfo(getModRefBehavior(CS2)))
+  if (isIntrinsicCall(Call1, Intrinsic::experimental_guard))
+    return isModSet(createModRefInfo(getModRefBehavior(Call2)))
                ? ModRefInfo::Ref
                : ModRefInfo::NoModRef;
 
-  if (isIntrinsicCall(CS2, Intrinsic::experimental_guard))
-    return isModSet(createModRefInfo(getModRefBehavior(CS1)))
+  if (isIntrinsicCall(Call2, Intrinsic::experimental_guard))
+    return isModSet(createModRefInfo(getModRefBehavior(Call1)))
                ? ModRefInfo::Mod
                : ModRefInfo::NoModRef;
 
   // The AAResultBase base class has some smarts, lets use them.
-  return AAResultBase::getModRefInfo(CS1, CS2);
+  return AAResultBase::getModRefInfo(Call1, Call2);
 }
 
 /// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
 /// both having the exact same pointer operand.
 static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
-                                            LocationSize V1Size,
+                                            LocationSize MaybeV1Size,
                                             const GEPOperator *GEP2,
-                                            LocationSize V2Size,
+                                            LocationSize MaybeV2Size,
                                             const DataLayout &DL) {
   assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
              GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
@@ -1019,10 +1058,13 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
 
   // If we don't know the size of the accesses through both GEPs, we can't
   // determine whether the struct fields accessed can't alias.
-  if (V1Size == MemoryLocation::UnknownSize ||
-      V2Size == MemoryLocation::UnknownSize)
+  if (MaybeV1Size == LocationSize::unknown() ||
+      MaybeV2Size == LocationSize::unknown())
     return MayAlias;
 
+  const uint64_t V1Size = MaybeV1Size.getValue();
+  const uint64_t V2Size = MaybeV2Size.getValue();
+
   ConstantInt *C1 =
       dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
   ConstantInt *C2 =
@@ -1030,8 +1072,12 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
 
   // If the last (struct) indices are constants and are equal, the other indices
   // might be also be dynamically equal, so the GEPs can alias.
-  if (C1 && C2 && C1->getSExtValue() == C2->getSExtValue())
-    return MayAlias;
+  if (C1 && C2) {
+    unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth());
+    if (C1->getValue().sextOrSelf(BitWidth) ==
+        C2->getValue().sextOrSelf(BitWidth))
+      return MayAlias;
+  }
 
   // Find the last-indexed type of the GEP, i.e., the type you'd get if
   // you stripped the last index.
@@ -1114,6 +1160,10 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
     return MayAlias;
   }
 
+  if (C1->getValue().getActiveBits() > 64 ||
+      C2->getValue().getActiveBits() > 64)
+    return MayAlias;
+
   // We know that:
   // - both GEPs begin indexing from the exact same pointer;
   // - the last indices in both GEPs are constants, indexing into a struct;
@@ -1179,11 +1229,13 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
 // than (%alloca - 1), and so is not inbounds, a contradiction.
 bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
       const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
-      LocationSize ObjectAccessSize) {
+      LocationSize MaybeObjectAccessSize) {
   // If the object access size is unknown, or the GEP isn't inbounds, bail.
-  if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds())
+  if (MaybeObjectAccessSize == LocationSize::unknown() || !GEPOp->isInBounds())
     return false;
 
+  const uint64_t ObjectAccessSize = MaybeObjectAccessSize.getValue();
+
   // We need the object to be an alloca or a globalvariable, and want to know
   // the offset of the pointer from the object precisely, so no variable
   // indices are allowed.
@@ -1192,8 +1244,8 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
       !DecompObject.VarIndices.empty())
     return false;
 
-  int64_t ObjectBaseOffset = DecompObject.StructOffset +
-                             DecompObject.OtherOffset;
+  APInt ObjectBaseOffset = DecompObject.StructOffset +
+                           DecompObject.OtherOffset;
 
   // If the GEP has no variable indices, we know the precise offset
   // from the base, then use it. If the GEP has variable indices,
@@ -1201,10 +1253,11 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
   // false in that case.
   if (!DecompGEP.VarIndices.empty())
     return false;
-  int64_t GEPBaseOffset = DecompGEP.StructOffset;
+
+  APInt GEPBaseOffset = DecompGEP.StructOffset;
   GEPBaseOffset += DecompGEP.OtherOffset;
 
-  return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize);
+  return GEPBaseOffset.sge(ObjectBaseOffset + (int64_t)ObjectAccessSize);
 }
 
 /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
@@ -1219,13 +1272,17 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
                         LocationSize V2Size, const AAMDNodes &V2AAInfo,
                         const Value *UnderlyingV1, const Value *UnderlyingV2) {
   DecomposedGEP DecompGEP1, DecompGEP2;
+  unsigned MaxPointerSize = getMaxPointerSize(DL);
+  DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
+  DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0);
+
   bool GEP1MaxLookupReached =
     DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
   bool GEP2MaxLookupReached =
     DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT);
 
-  int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
-  int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
+  APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
+  APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
 
   assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
          "DecomposeGEPExpression returned a result different from "
@@ -1248,8 +1305,8 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
       return NoAlias;
     // Do the base pointers alias?
     AliasResult BaseAlias =
-        aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(),
-                   UnderlyingV2, MemoryLocation::UnknownSize, AAMDNodes());
+        aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
+                   UnderlyingV2, LocationSize::unknown(), AAMDNodes());
 
     // Check for geps of non-aliasing underlying pointers where the offsets are
     // identical.
@@ -1308,13 +1365,12 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
     // pointer, we know they cannot alias.
 
     // If both accesses are unknown size, we can't do anything useful here.
-    if (V1Size == MemoryLocation::UnknownSize &&
-        V2Size == MemoryLocation::UnknownSize)
+    if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
       return MayAlias;
 
-    AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
-                               AAMDNodes(), V2, MemoryLocation::UnknownSize,
-                               V2AAInfo, nullptr, UnderlyingV2);
+    AliasResult R =
+        aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2,
+                   LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2);
     if (R != MustAlias) {
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
@@ -1344,9 +1400,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
   // that the objects are partially overlapping.  If the difference is
   // greater, we know they do not overlap.
   if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) {
-    if (GEP1BaseOffset >= 0) {
-      if (V2Size != MemoryLocation::UnknownSize) {
-        if ((uint64_t)GEP1BaseOffset < V2Size)
+    if (GEP1BaseOffset.sge(0)) {
+      if (V2Size != LocationSize::unknown()) {
+        if (GEP1BaseOffset.ult(V2Size.getValue()))
           return PartialAlias;
         return NoAlias;
       }
@@ -1359,9 +1415,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
       // GEP1             V2
       // We need to know that V2Size is not unknown, otherwise we might have
       // stripped a gep with negative index ('gep <ptr>, -1, ...).
-      if (V1Size != MemoryLocation::UnknownSize &&
-          V2Size != MemoryLocation::UnknownSize) {
-        if (-(uint64_t)GEP1BaseOffset < V1Size)
+      if (V1Size != LocationSize::unknown() &&
+          V2Size != LocationSize::unknown()) {
+        if ((-GEP1BaseOffset).ult(V1Size.getValue()))
           return PartialAlias;
         return NoAlias;
       }
@@ -1369,7 +1425,7 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
   }
 
   if (!DecompGEP1.VarIndices.empty()) {
-    uint64_t Modulo = 0;
+    APInt Modulo(MaxPointerSize, 0);
     bool AllPositive = true;
     for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
 
@@ -1377,7 +1433,7 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
       // Grab the least significant bit set in any of the scales. We
       // don't need std::abs here (even if the scale's negative) as we'll
       // be ^'ing Modulo with itself later.
-      Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale;
+      Modulo |= DecompGEP1.VarIndices[i].Scale;
 
       if (AllPositive) {
         // If the Value could change between cycles, then any reasoning about
@@ -1398,9 +1454,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
         // If the variable begins with a zero then we know it's
         // positive, regardless of whether the value is signed or
         // unsigned.
-        int64_t Scale = DecompGEP1.VarIndices[i].Scale;
+        APInt Scale = DecompGEP1.VarIndices[i].Scale;
         AllPositive =
-            (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+            (SignKnownZero && Scale.sge(0)) || (SignKnownOne && Scale.slt(0));
       }
     }
 
@@ -1409,16 +1465,18 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
     // We can compute the difference between the two addresses
     // mod Modulo. Check whether that difference guarantees that the
     // two locations do not alias.
-    uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
-    if (V1Size != MemoryLocation::UnknownSize &&
-        V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
-        V1Size <= Modulo - ModOffset)
+    APInt ModOffset = GEP1BaseOffset & (Modulo - 1);
+    if (V1Size != LocationSize::unknown() &&
+        V2Size != LocationSize::unknown() && ModOffset.uge(V2Size.getValue()) &&
+        (Modulo - ModOffset).uge(V1Size.getValue()))
       return NoAlias;
 
     // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
     // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
     // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
-    if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+    if (AllPositive && GEP1BaseOffset.sgt(0) &&
+        V2Size != LocationSize::unknown() &&
+        GEP1BaseOffset.uge(V2Size.getValue()))
       return NoAlias;
 
     if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
@@ -1598,7 +1656,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
   // unknown to represent all the possible values the GEP could advance the
   // pointer to.
   if (isRecursive)
-    PNSize = MemoryLocation::UnknownSize;
+    PNSize = LocationSize::unknown();
 
   AliasResult Alias =
       aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
@@ -1632,7 +1690,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
                                       const Value *O1, const Value *O2) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
-  if (V1Size == 0 || V2Size == 0)
+  if (V1Size.isZero() || V2Size.isZero())
     return NoAlias;
 
   // Strip off any casts if they exist.
@@ -1706,10 +1764,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   // If the size of one access is larger than the entire object on the other
   // side, then we know such behavior is undefined and can assume no alias.
   bool NullIsValidLocation = NullPointerIsDefined(&F);
-  if ((V1Size != MemoryLocation::UnknownSize &&
-       isObjectSmallerThan(O2, V1Size, DL, TLI, NullIsValidLocation)) ||
-      (V2Size != MemoryLocation::UnknownSize &&
-       isObjectSmallerThan(O1, V2Size, DL, TLI, NullIsValidLocation)))
+  if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI,
+                                                 NullIsValidLocation)) ||
+      (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI,
+                                                 NullIsValidLocation)))
     return NoAlias;
 
   // Check the cache before climbing up use-def chains. This also terminates
@@ -1767,10 +1825,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   // If both pointers are pointing into the same object and one of them
   // accesses the entire object, then the accesses must overlap in some way.
   if (O1 == O2)
-    if (V1Size != MemoryLocation::UnknownSize &&
-        V2Size != MemoryLocation::UnknownSize &&
-        (isObjectSize(O1, V1Size, DL, TLI, NullIsValidLocation) ||
-         isObjectSize(O2, V2Size, DL, TLI, NullIsValidLocation)))
+    if (V1Size.isPrecise() && V2Size.isPrecise() &&
+        (isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
+         isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
       return AliasCache[Locs] = PartialAlias;
 
   // Recurse back into the best AA results we have, potentially with refined
@@ -1825,7 +1882,7 @@ void BasicAAResult::GetIndexDifference(
   for (unsigned i = 0, e = Src.size(); i != e; ++i) {
     const Value *V = Src[i].V;
     unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
-    int64_t Scale = Src[i].Scale;
+    APInt Scale = Src[i].Scale;
 
     // Find V in Dest.  This is N^2, but pointer indices almost never have more
     // than a few variable indexes.
@@ -1845,7 +1902,7 @@ void BasicAAResult::GetIndexDifference(
     }
 
     // If we didn't consume this entry, add it to the end of the Dest list.
-    if (Scale) {
+    if (!!Scale) {
       VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
       Dest.push_back(Entry);
     }
@@ -1853,13 +1910,16 @@ void BasicAAResult::GetIndexDifference(
 }
 
 bool BasicAAResult::constantOffsetHeuristic(
-    const SmallVectorImpl<VariableGEPIndex> &VarIndices, LocationSize V1Size,
-    LocationSize V2Size, int64_t BaseOffset, AssumptionCache *AC,
-    DominatorTree *DT) {
-  if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
-      V2Size == MemoryLocation::UnknownSize)
+    const SmallVectorImpl<VariableGEPIndex> &VarIndices,
+    LocationSize MaybeV1Size, LocationSize MaybeV2Size, APInt BaseOffset,
+    AssumptionCache *AC, DominatorTree *DT) {
+  if (VarIndices.size() != 2 || MaybeV1Size == LocationSize::unknown() ||
+      MaybeV2Size == LocationSize::unknown())
     return false;
 
+  const uint64_t V1Size = MaybeV1Size.getValue();
+  const uint64_t V2Size = MaybeV2Size.getValue();
+
   const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
 
   if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
@@ -1896,14 +1956,15 @@ bool BasicAAResult::constantOffsetHeuristic(
   // the minimum distance between %i and %i + 5 is 3.
   APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
   MinDiff = APIntOps::umin(MinDiff, Wrapped);
-  uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+  APInt MinDiffBytes =
+    MinDiff.zextOrTrunc(Var0.Scale.getBitWidth()) * Var0.Scale.abs();
 
   // We can't definitely say whether GEP1 is before or after V2 due to wrapping
   // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
   // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
   // V2Size can fit in the MinDiffBytes gap.
-  return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
-         V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+  return MinDiffBytes.uge(V1Size + BaseOffset.abs()) &&
+         MinDiffBytes.uge(V2Size + BaseOffset.abs());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 41c295895213..ef27c36517ea 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -252,8 +252,8 @@ void BlockFrequencyInfo::setBlockFreqAndScale(
 
 /// Pop up a ghostview window with the current block frequency propagation
 /// rendered using dot.
-void BlockFrequencyInfo::view() const {
-  ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+void BlockFrequencyInfo::view(StringRef title) const {
+  ViewGraph(const_cast<BlockFrequencyInfo *>(this), title);
 }
 
 const Function *BlockFrequencyInfo::getFunction() const {
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 3d095068e7ff..08ebcc47a807 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -156,9 +156,9 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
 
 static void combineWeightsBySorting(WeightList &Weights) {
   // Sort so edges to the same node are adjacent.
-  llvm::sort(Weights.begin(), Weights.end(),
-             [](const Weight &L,
-                const Weight &R) { return L.TargetNode < R.TargetNode; });
+  llvm::sort(Weights, [](const Weight &L, const Weight &R) {
+    return L.TargetNode < R.TargetNode;
+  });
 
   // Combine adjacent edges.
   WeightList::iterator O = Weights.begin();
@@ -573,7 +573,9 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
   APInt BlockFreq(128, Freq);
   APInt EntryFreq(128, getEntryFreq());
   BlockCount *= BlockFreq;
-  BlockCount = BlockCount.udiv(EntryFreq);
+  // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
+  // lshr by 1 gives EntryFreq/2.
+  BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
   return BlockCount.getLimitedValue();
 }
 
@@ -705,7 +707,7 @@ static void findIrreducibleHeaders(
          "Expected irreducible CFG; -loop-info is likely invalid");
   if (Headers.size() == InSCC.size()) {
     // Every block is a header.
-    llvm::sort(Headers.begin(), Headers.end());
+    llvm::sort(Headers);
     return;
   }
 
@@ -740,8 +742,8 @@ static void findIrreducibleHeaders(
     Others.push_back(Irr.Node);
     LLVM_DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
   }
-  llvm::sort(Headers.begin(), Headers.end());
-  llvm::sort(Others.begin(), Others.end());
+  llvm::sort(Headers);
+  llvm::sort(Others);
 }
 
 static void createIrreducibleLoop(
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 54a657073f0f..7f544b27fe9d 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -135,7 +135,7 @@ static const uint32_t IH_NONTAKEN_WEIGHT = 1;
 /// Add \p BB to PostDominatedByUnreachable set if applicable.
 void
 BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   if (TI->getNumSuccessors() == 0) {
     if (isa<UnreachableInst>(TI) ||
         // If this block is terminated by a call to
@@ -167,7 +167,7 @@ BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
 void
 BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
   assert(!PostDominatedByColdCall.count(BB));
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   if (TI->getNumSuccessors() == 0)
     return;
 
@@ -202,7 +202,7 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
 /// Predict that a successor which leads necessarily to an
 /// unreachable-terminated block as extremely unlikely.
 bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   (void) TI;
   assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
   assert(!isa<InvokeInst>(TI) &&
@@ -246,7 +246,7 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
 // heuristic. The probability of the edge coming to unreachable block is
 // set to min of metadata and unreachable heuristic.
 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
   if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI)))
     return false;
@@ -348,7 +348,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
 /// Return true if we could compute the weights for cold edges.
 /// Return false, otherwise.
 bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   (void) TI;
   assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
   assert(!isa<InvokeInst>(TI) &&
diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp
index a319be8092f9..aa880a62b754 100644
--- a/contrib/llvm/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/lib/Analysis/CFG.cpp
@@ -71,7 +71,7 @@ void llvm::FindFunctionBackedges(const Function &F,
 /// successor.
 unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
     const BasicBlock *Succ) {
-  const TerminatorInst *Term = BB->getTerminator();
+  const Instruction *Term = BB->getTerminator();
 #ifndef NDEBUG
   unsigned e = Term->getNumSuccessors();
 #endif
@@ -85,8 +85,9 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
 /// isCriticalEdge - Return true if the specified edge is a critical edge.
 /// Critical edges are edges from a block with multiple successors to a block
 /// with multiple predecessors.
-bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum,
                           bool AllowIdenticalEdges) {
+  assert(TI->isTerminator() && "Must be a terminator to have successors!");
   assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
   if (TI->getNumSuccessors() == 1) return false;
 
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index 5b170dfa7903..6d01e9d5d447 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -7,9 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines a '-dot-cfg' analysis pass, which emits the
-// cfg.<fnname>.dot file for each function in the program, with a graph of the
-// CFG for that function.
+// This file defines a `-dot-cfg` analysis pass, which emits the
+// `<prefix>.<fnname>.dot` file for each function in the program, with a graph
+// of the CFG for that function. The default value for `<prefix>` is `cfg` but
+// can be customized as needed.
 //
 // The other main feature of this file is that it implements the
 // Function::viewCFG method, which is useful for debugging passes which operate
@@ -27,6 +28,10 @@ static cl::opt<std::string> CFGFuncName(
     cl::desc("The name of a function (or its substring)"
              " whose CFG is viewed/printed."));
 
+static cl::opt<std::string> CFGDotFilenamePrefix(
+    "cfg-dot-filename-prefix", cl::Hidden,
+    cl::desc("The prefix used for the CFG dot file names."));
+
 namespace {
   struct CFGViewerLegacyPass : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
@@ -90,7 +95,8 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
 static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
   if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
      return;
-  std::string Filename = ("cfg." + F.getName() + ".dot").str();
+  std::string Filename =
+      (CFGDotFilenamePrefix + "." + F.getName() + ".dot").str();
   errs() << "Writing '" << Filename << "'...";
 
   std::error_code EC;
diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 194983418b08..1c61dd369a05 100644
--- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -395,7 +395,7 @@ populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
     }
 
     // Sort AliasList for faster lookup
-    llvm::sort(AliasList.begin(), AliasList.end());
+    llvm::sort(AliasList);
   }
 }
 
@@ -479,7 +479,7 @@ static void populateExternalRelations(
   }
 
   // Remove duplicates in ExtRelations
-  llvm::sort(ExtRelations.begin(), ExtRelations.end());
+  llvm::sort(ExtRelations);
   ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
                      ExtRelations.end());
 }
@@ -515,10 +515,9 @@ CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
   return None;
 }
 
-bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
-                                               LocationSize LHSSize,
-                                               const Value *RHS,
-                                               LocationSize RHSSize) const {
+bool CFLAndersAAResult::FunctionInfo::mayAlias(
+    const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS,
+    LocationSize MaybeRHSSize) const {
   assert(LHS && RHS);
 
   // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
@@ -557,11 +556,14 @@ bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
                                       OffsetValue{RHS, 0}, Comparator);
 
     if (RangePair.first != RangePair.second) {
-      // Be conservative about UnknownSize
-      if (LHSSize == MemoryLocation::UnknownSize ||
-          RHSSize == MemoryLocation::UnknownSize)
+      // Be conservative about unknown sizes
+      if (MaybeLHSSize == LocationSize::unknown() ||
+          MaybeRHSSize == LocationSize::unknown())
         return true;
 
+      const uint64_t LHSSize = MaybeLHSSize.getValue();
+      const uint64_t RHSSize = MaybeRHSSize.getValue();
+
       for (const auto &OVal : make_range(RangePair)) {
         // Be conservative about UnknownOffset
         if (OVal.Offset == UnknownOffset)
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index 86812009da7c..12121d717433 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -594,7 +594,7 @@ template <typename CFLAA> class CFLGraphBuilder {
   // Determines whether or not we an instruction is useless to us (e.g.
   // FenceInst)
   static bool hasUsefulEdges(Instruction *Inst) {
-    bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
+    bool IsNonInvokeRetTerminator = Inst->isTerminator() &&
                                     !isa<InvokeInst>(Inst) &&
                                     !isa<ReturnInst>(Inst);
     return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
index b325afb8e7c5..fd2292ced017 100644
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -54,6 +54,11 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
             CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
                                       CGSCCAnalysisManager &AM,
                                       LazyCallGraph &G, CGSCCUpdateResult &UR) {
+  // Request PassInstrumentation from analysis manager, will use it to run
+  // instrumenting callbacks for the passes later.
+  PassInstrumentation PI =
+      AM.getResult<PassInstrumentationAnalysis>(InitialC, G);
+
   PreservedAnalyses PA = PreservedAnalyses::all();
 
   if (DebugLogging)
@@ -67,8 +72,18 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
     if (DebugLogging)
       dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
 
+    // Check the PassInstrumentation's BeforePass callbacks before running the
+    // pass, skip its execution completely if asked to (callback returns false).
+    if (!PI.runBeforePass(*Pass, *C))
+      continue;
+
     PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
 
+    if (UR.InvalidatedSCCs.count(C))
+      PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass);
+    else
+      PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C);
+
     // Update the SCC if necessary.
     C = UR.UpdatedC ? UR.UpdatedC : C;
 
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index cbdf5f63c557..0da678e1611b 100644
--- a/contrib/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -97,8 +97,7 @@ void CallGraph::print(raw_ostream &OS) const {
   for (const auto &I : *this)
     Nodes.push_back(I.second.get());
 
-  llvm::sort(Nodes.begin(), Nodes.end(),
-             [](CallGraphNode *LHS, CallGraphNode *RHS) {
+  llvm::sort(Nodes, [](CallGraphNode *LHS, CallGraphNode *RHS) {
     if (Function *LF = LHS->getFunction())
       if (Function *RF = RHS->getFunction())
         return LF->getName() < RF->getName();
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 4c33c420b65d..0aed57a39387 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -22,11 +22,13 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManagers.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OptBisect.h"
+#include "llvm/IR/PassTimingInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -123,24 +125,34 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
   Module &M = CG.getModule();
 
   if (!PM) {
-    CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+    CallGraphSCCPass *CGSP = (CallGraphSCCPass *)P;
     if (!CallGraphUpToDate) {
       DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
       CallGraphUpToDate = true;
     }
 
     {
-      unsigned InstrCount = 0;
+      unsigned InstrCount, SCCCount = 0;
+      StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
       bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
       TimeRegion PassTimer(getPassTimer(CGSP));
       if (EmitICRemark)
-        InstrCount = initSizeRemarkInfo(M);
+        InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
       Changed = CGSP->runOnSCC(CurSCC);
 
-      // If the pass modified the module, it may have modified the instruction
-      // count of the module. Try emitting a remark.
-      if (EmitICRemark)
-        emitInstrCountChangedRemark(P, M, InstrCount);
+      if (EmitICRemark) {
+        // FIXME: Add getInstructionCount to CallGraphSCC.
+        SCCCount = M.getInstructionCount();
+        // Is there a difference in the number of instructions in the module?
+        if (SCCCount != InstrCount) {
+          // Yep. Emit a remark and update InstrCount.
+          int64_t Delta =
+              static_cast<int64_t>(SCCCount) - static_cast<int64_t>(InstrCount);
+          emitInstrCountChangedRemark(P, M, Delta, InstrCount,
+                                      FunctionToInstrCount);
+          InstrCount = SCCCount;
+        }
+      }
     }
 
     // After the CGSCCPass is done, when assertions are enabled, use
@@ -621,23 +633,40 @@ namespace {
 
     bool runOnSCC(CallGraphSCC &SCC) override {
       bool BannerPrinted = false;
-      auto PrintBannerOnce = [&] () {
+      auto PrintBannerOnce = [&]() {
         if (BannerPrinted)
           return;
         OS << Banner;
         BannerPrinted = true;
-        };
+      };
+
+      bool NeedModule = llvm::forcePrintModuleIR();
+      if (isFunctionInPrintList("*") && NeedModule) {
+        PrintBannerOnce();
+        OS << "\n";
+        SCC.getCallGraph().getModule().print(OS, nullptr);
+        return false;
+      }
+      bool FoundFunction = false;
       for (CallGraphNode *CGN : SCC) {
         if (Function *F = CGN->getFunction()) {
           if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) {
-            PrintBannerOnce();
-            F->print(OS);
+            FoundFunction = true;
+            if (!NeedModule) {
+              PrintBannerOnce();
+              F->print(OS);
+            }
           }
         } else if (isFunctionInPrintList("*")) {
           PrintBannerOnce();
           OS << "\nPrinting <null> Function\n";
         }
       }
+      if (NeedModule && FoundFunction) {
+        PrintBannerOnce();
+        OS << "\n";
+        SCC.getCallGraph().getModule().print(OS, nullptr);
+      }
       return false;
     }
 
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index d4f73bdb4361..669f4f2835fa 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -158,7 +157,8 @@ namespace {
 /// storing the value (or part of it) into memory anywhere automatically
 /// counts as capturing it or not.
 bool llvm::PointerMayBeCaptured(const Value *V,
-                                bool ReturnCaptures, bool StoreCaptures) {
+                                bool ReturnCaptures, bool StoreCaptures,
+                                unsigned MaxUsesToExplore) {
   assert(!isa<GlobalValue>(V) &&
          "It doesn't make sense to ask whether a global is captured.");
 
@@ -169,7 +169,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
   (void)StoreCaptures;
 
   SimpleCaptureTracker SCT(ReturnCaptures);
-  PointerMayBeCaptured(V, &SCT);
+  PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
   return SCT.Captured;
 }
 
@@ -186,13 +186,15 @@ bool llvm::PointerMayBeCaptured(const Value *V,
 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
                                       bool StoreCaptures, const Instruction *I,
                                       const DominatorTree *DT, bool IncludeI,
-                                      OrderedBasicBlock *OBB) {
+                                      OrderedBasicBlock *OBB,
+                                      unsigned MaxUsesToExplore) {
   assert(!isa<GlobalValue>(V) &&
          "It doesn't make sense to ask whether a global is captured.");
   bool UseNewOBB = OBB == nullptr;
 
   if (!DT)
-    return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+    return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures,
+                                MaxUsesToExplore);
   if (UseNewOBB)
     OBB = new OrderedBasicBlock(I->getParent());
 
@@ -200,29 +202,25 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
   // with StoreCaptures.
 
   CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
-  PointerMayBeCaptured(V, &CB);
+  PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
 
   if (UseNewOBB)
     delete OBB;
   return CB.Captured;
 }
 
-/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
-/// a cache. Then we can move the code from BasicAliasAnalysis into
-/// that path, and remove this threshold.
-static int const Threshold = 20;
-
-void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
+                                unsigned MaxUsesToExplore) {
   assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
-  SmallVector<const Use *, Threshold> Worklist;
-  SmallSet<const Use *, Threshold> Visited;
+  SmallVector<const Use *, DefaultMaxUsesToExplore> Worklist;
+  SmallSet<const Use *, DefaultMaxUsesToExplore> Visited;
 
   auto AddUses = [&](const Value *V) {
-    int Count = 0;
+    unsigned Count = 0;
     for (const Use &U : V->uses()) {
       // If there are lots of uses, conservatively say that the value
       // is captured to avoid taking too much compile time.
-      if (Count++ >= Threshold)
+      if (Count++ >= MaxUsesToExplore)
         return Tracker->tooManyUses();
       if (!Visited.insert(&U).second)
         continue;
@@ -241,11 +239,12 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
     switch (I->getOpcode()) {
     case Instruction::Call:
     case Instruction::Invoke: {
-      CallSite CS(I);
+      auto *Call = cast<CallBase>(I);
       // Not captured if the callee is readonly, doesn't return a copy through
       // its return value and doesn't unwind (a readonly function can leak bits
       // by throwing an exception or not depending on the input value).
-      if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+      if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
+          Call->getType()->isVoidTy())
         break;
 
       // The pointer is not captured if returned pointer is not captured.
@@ -253,14 +252,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
       // marked with nocapture do not capture. This means that places like
       // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
       // in BasicAA also need to know about this property.
-      if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) {
-        AddUses(I);
+      if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) {
+        AddUses(Call);
         break;
       }
 
       // Volatile operations effectively capture the memory location that they
       // load and store to.
-      if (auto *MI = dyn_cast<MemIntrinsic>(I))
+      if (auto *MI = dyn_cast<MemIntrinsic>(Call))
         if (MI->isVolatile())
           if (Tracker->captured(U))
             return;
@@ -272,13 +271,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
       // that loading a value from a pointer does not cause the pointer to be
       // captured, even though the loaded value might be the pointer itself
       // (think of self-referential objects).
-      CallSite::data_operand_iterator B =
-        CS.data_operands_begin(), E = CS.data_operands_end();
-      for (CallSite::data_operand_iterator A = B; A != E; ++A)
-        if (A->get() == V && !CS.doesNotCapture(A - B))
+      for (auto IdxOpPair : enumerate(Call->data_ops())) {
+        int Idx = IdxOpPair.index();
+        Value *A = IdxOpPair.value();
+        if (A == V && !Call->doesNotCapture(Idx))
           // The parameter is not marked 'nocapture' - captured.
           if (Tracker->captured(U))
             return;
+      }
       break;
     }
     case Instruction::Load:
diff --git a/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
index 159c1a2d135a..27071babec5c 100644
--- a/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -40,28 +40,28 @@ unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
   }
 }
 
-Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
-                          CmpInst::Predicate &NewICmpPred) {
+Constant *llvm::getPredForICmpCode(unsigned Code, bool Sign, Type *OpTy,
+                                   CmpInst::Predicate &Pred) {
   switch (Code) {
     default: llvm_unreachable("Illegal ICmp code!");
     case 0: // False.
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
-    case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
-    case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
-    case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
-    case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
-    case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
-    case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+      return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 0);
+    case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+    case 2: Pred = ICmpInst::ICMP_EQ; break;
+    case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+    case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+    case 5: Pred = ICmpInst::ICMP_NE; break;
+    case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
     case 7: // True.
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+      return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 1);
   }
   return nullptr;
 }
 
-bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
-  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
-         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
-         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+bool llvm::predicatesFoldable(ICmpInst::Predicate P1, ICmpInst::Predicate P2) {
+  return (CmpInst::isSigned(P1) == CmpInst::isSigned(P2)) ||
+         (CmpInst::isSigned(P1) && ICmpInst::isEquality(P2)) ||
+         (CmpInst::isSigned(P2) && ICmpInst::isEquality(P1));
 }
 
 bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index c5281c57bc19..5da29d6d2372 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -347,9 +347,20 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
 
     // We're simulating a load through a pointer that was bitcast to point to
     // a different type, so we can try to walk down through the initial
-    // elements of an aggregate to see if some part of th e aggregate is
+    // elements of an aggregate to see if some part of the aggregate is
     // castable to implement the "load" semantic model.
-    C = C->getAggregateElement(0u);
+    if (SrcTy->isStructTy()) {
+      // Struct types might have leading zero-length elements like [0 x i32],
+      // which are certainly not what we are looking for, so skip them.
+      unsigned Elem = 0;
+      Constant *ElemC;
+      do {
+        ElemC = C->getAggregateElement(Elem++);
+      } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()) == 0);
+      C = ElemC;
+    } else {
+      C = C->getAggregateElement(0u);
+    }
   } while (C);
 
   return nullptr;
@@ -960,10 +971,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
         NewIdxs.size() > *LastIRIndex) {
       InRangeIndex = LastIRIndex;
       for (unsigned I = 0; I <= *LastIRIndex; ++I)
-        if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) {
-          InRangeIndex = None;
-          break;
-        }
+        if (NewIdxs[I] != InnermostGEP->getOperand(I + 1))
+          return nullptr;
     }
 
   // Create a GEP.
@@ -985,11 +994,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 /// returned, if not, null is returned.  Note that this function can fail when
 /// attempting to fold instructions like loads and stores, which have no
 /// constant expression form.
-///
-/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange
-/// etc information, due to only being passed an opcode and operands. Constant
-/// folding using this function strips this information.
-///
 Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        ArrayRef<Constant *> Ops,
                                        const DataLayout &DL,
@@ -1370,6 +1374,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
   case Intrinsic::fabs:
   case Intrinsic::minnum:
   case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximum:
   case Intrinsic::log:
   case Intrinsic::log2:
   case Intrinsic::log10:
@@ -1389,6 +1395,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
+  case Intrinsic::fshl:
+  case Intrinsic::fshr:
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
   case Intrinsic::copysign:
@@ -1402,6 +1410,10 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
   case Intrinsic::usub_with_overflow:
   case Intrinsic::smul_with_overflow:
   case Intrinsic::umul_with_overflow:
+  case Intrinsic::sadd_sat:
+  case Intrinsic::uadd_sat:
+  case Intrinsic::ssub_sat:
+  case Intrinsic::usub_sat:
   case Intrinsic::convert_from_fp16:
   case Intrinsic::convert_to_fp16:
   case Intrinsic::bitreverse:
@@ -1413,6 +1425,23 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
   case Intrinsic::x86_sse2_cvtsd2si64:
   case Intrinsic::x86_sse2_cvttsd2si:
   case Intrinsic::x86_sse2_cvttsd2si64:
+  case Intrinsic::x86_avx512_vcvtss2si32:
+  case Intrinsic::x86_avx512_vcvtss2si64:
+  case Intrinsic::x86_avx512_cvttss2si:
+  case Intrinsic::x86_avx512_cvttss2si64:
+  case Intrinsic::x86_avx512_vcvtsd2si32:
+  case Intrinsic::x86_avx512_vcvtsd2si64:
+  case Intrinsic::x86_avx512_cvttsd2si:
+  case Intrinsic::x86_avx512_cvttsd2si64:
+  case Intrinsic::x86_avx512_vcvtss2usi32:
+  case Intrinsic::x86_avx512_vcvtss2usi64:
+  case Intrinsic::x86_avx512_cvttss2usi:
+  case Intrinsic::x86_avx512_cvttss2usi64:
+  case Intrinsic::x86_avx512_vcvtsd2usi32:
+  case Intrinsic::x86_avx512_vcvtsd2usi64:
+  case Intrinsic::x86_avx512_cvttsd2usi:
+  case Intrinsic::x86_avx512_cvttsd2usi64:
+  case Intrinsic::is_constant:
     return true;
   default:
     return false;
@@ -1553,7 +1582,7 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
 /// result. Returns null if the conversion cannot be performed, otherwise
 /// returns the Constant value resulting from the conversion.
 Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
-                                      Type *Ty) {
+                                      Type *Ty, bool IsSigned) {
   // All of these conversion intrinsics form an integer of at most 64bits.
   unsigned ResultWidth = Ty->getIntegerBitWidth();
   assert(ResultWidth <= 64 &&
@@ -1565,11 +1594,11 @@ Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
                                               : APFloat::rmNearestTiesToEven;
   APFloat::opStatus status =
       Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
-                           /*isSigned=*/true, mode, &isExact);
+                           IsSigned, mode, &isExact);
   if (status != APFloat::opOK &&
       (!roundTowardZero || status != APFloat::opInexact))
     return nullptr;
-  return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+  return ConstantInt::get(Ty, UIntVal, IsSigned);
 }
 
 double getValueAsDouble(ConstantFP *Op) {
@@ -1587,14 +1616,49 @@ double getValueAsDouble(ConstantFP *Op) {
   return APF.convertToDouble();
 }
 
+static bool isManifestConstant(const Constant *c) {
+  if (isa<ConstantData>(c)) {
+    return true;
+  } else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
+    for (const Value *subc : c->operand_values()) {
+      if (!isManifestConstant(cast<Constant>(subc)))
+        return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
+  if (auto *CI = dyn_cast<ConstantInt>(Op)) {
+    C = &CI->getValue();
+    return true;
+  }
+  if (isa<UndefValue>(Op)) {
+    C = nullptr;
+    return true;
+  }
+  return false;
+}
+
 Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
                                  ArrayRef<Constant *> Operands,
                                  const TargetLibraryInfo *TLI,
                                  ImmutableCallSite CS) {
   if (Operands.size() == 1) {
+    if (IntrinsicID == Intrinsic::is_constant) {
+      // We know we have a "Constant" argument. But we want to only
+      // return true for manifest constants, not those that depend on
+      // constants with unknowable values, e.g. GlobalValue or BlockAddress.
+      if (isManifestConstant(Operands[0]))
+        return ConstantInt::getTrue(Ty->getContext());
+      return nullptr;
+    }
     if (isa<UndefValue>(Operands[0])) {
-      // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
-      if (IntrinsicID == Intrinsic::cos)
+      // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
+      // ctpop() is between 0 and bitwidth, pick 0 for undef.
+      if (IntrinsicID == Intrinsic::cos ||
+          IntrinsicID == Intrinsic::ctpop)
         return Constant::getNullValue(Ty);
       if (IntrinsicID == Intrinsic::bswap ||
           IntrinsicID == Intrinsic::bitreverse ||
@@ -1849,7 +1913,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
         if (ConstantFP *FPOp =
                 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
           return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/false, Ty);
+                                             /*roundTowardZero=*/false, Ty,
+                                             /*IsSigned*/true);
         break;
       case Intrinsic::x86_sse_cvttss2si:
       case Intrinsic::x86_sse_cvttss2si64:
@@ -1858,7 +1923,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
         if (ConstantFP *FPOp =
                 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
           return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/true, Ty);
+                                             /*roundTowardZero=*/true, Ty,
+                                             /*IsSigned*/true);
         break;
       }
     }
@@ -1899,6 +1965,18 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
           return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
         }
 
+        if (IntrinsicID == Intrinsic::minimum) {
+          const APFloat &C1 = Op1->getValueAPF();
+          const APFloat &C2 = Op2->getValueAPF();
+          return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
+        }
+
+        if (IntrinsicID == Intrinsic::maximum) {
+          const APFloat &C1 = Op1->getValueAPF();
+          const APFloat &C2 = Op2->getValueAPF();
+          return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
+        }
+
         if (!TLI)
           return nullptr;
         if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
@@ -1931,58 +2009,149 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
       return nullptr;
     }
 
-    if (auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
-      if (auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+    if (Operands[0]->getType()->isIntegerTy() &&
+        Operands[1]->getType()->isIntegerTy()) {
+      const APInt *C0, *C1;
+      if (!getConstIntOrUndef(Operands[0], C0) ||
+          !getConstIntOrUndef(Operands[1], C1))
+        return nullptr;
+
+      switch (IntrinsicID) {
+      default: break;
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        // Even if both operands are undef, we cannot fold muls to undef
+        // in the general case. For example, on i2 there are no inputs
+        // that would produce { i2 -1, i1 true } as the result.
+        if (!C0 || !C1)
+          return Constant::getNullValue(Ty);
+        LLVM_FALLTHROUGH;
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow: {
+        if (!C0 || !C1)
+          return UndefValue::get(Ty);
+
+        APInt Res;
+        bool Overflow;
         switch (IntrinsicID) {
-        default: break;
+        default: llvm_unreachable("Invalid case");
         case Intrinsic::sadd_with_overflow:
+          Res = C0->sadd_ov(*C1, Overflow);
+          break;
         case Intrinsic::uadd_with_overflow:
+          Res = C0->uadd_ov(*C1, Overflow);
+          break;
         case Intrinsic::ssub_with_overflow:
+          Res = C0->ssub_ov(*C1, Overflow);
+          break;
         case Intrinsic::usub_with_overflow:
+          Res = C0->usub_ov(*C1, Overflow);
+          break;
         case Intrinsic::smul_with_overflow:
-        case Intrinsic::umul_with_overflow: {
-          APInt Res;
-          bool Overflow;
-          switch (IntrinsicID) {
-          default: llvm_unreachable("Invalid case");
-          case Intrinsic::sadd_with_overflow:
-            Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
-            break;
-          case Intrinsic::uadd_with_overflow:
-            Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
-            break;
-          case Intrinsic::ssub_with_overflow:
-            Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
-            break;
-          case Intrinsic::usub_with_overflow:
-            Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
-            break;
-          case Intrinsic::smul_with_overflow:
-            Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
-            break;
-          case Intrinsic::umul_with_overflow:
-            Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
-            break;
-          }
-          Constant *Ops[] = {
-            ConstantInt::get(Ty->getContext(), Res),
-            ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
-          };
-          return ConstantStruct::get(cast<StructType>(Ty), Ops);
-        }
-        case Intrinsic::cttz:
-          if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
-            return UndefValue::get(Ty);
-          return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
-        case Intrinsic::ctlz:
-          if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
-            return UndefValue::get(Ty);
-          return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
+          Res = C0->smul_ov(*C1, Overflow);
+          break;
+        case Intrinsic::umul_with_overflow:
+          Res = C0->umul_ov(*C1, Overflow);
+          break;
         }
+        Constant *Ops[] = {
+          ConstantInt::get(Ty->getContext(), Res),
+          ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+        };
+        return ConstantStruct::get(cast<StructType>(Ty), Ops);
+      }
+      case Intrinsic::uadd_sat:
+      case Intrinsic::sadd_sat:
+        if (!C0 && !C1)
+          return UndefValue::get(Ty);
+        if (!C0 || !C1)
+          return Constant::getAllOnesValue(Ty);
+        if (IntrinsicID == Intrinsic::uadd_sat)
+          return ConstantInt::get(Ty, C0->uadd_sat(*C1));
+        else
+          return ConstantInt::get(Ty, C0->sadd_sat(*C1));
+      case Intrinsic::usub_sat:
+      case Intrinsic::ssub_sat:
+        if (!C0 && !C1)
+          return UndefValue::get(Ty);
+        if (!C0 || !C1)
+          return Constant::getNullValue(Ty);
+        if (IntrinsicID == Intrinsic::usub_sat)
+          return ConstantInt::get(Ty, C0->usub_sat(*C1));
+        else
+          return ConstantInt::get(Ty, C0->ssub_sat(*C1));
+      case Intrinsic::cttz:
+      case Intrinsic::ctlz:
+        assert(C1 && "Must be constant int");
+
+        // cttz(0, 1) and ctlz(0, 1) are undef.
+        if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+          return UndefValue::get(Ty);
+        if (!C0)
+          return Constant::getNullValue(Ty);
+        if (IntrinsicID == Intrinsic::cttz)
+          return ConstantInt::get(Ty, C0->countTrailingZeros());
+        else
+          return ConstantInt::get(Ty, C0->countLeadingZeros());
       }
 
       return nullptr;
     }
+
+    // Support ConstantVector in case we have an Undef in the top.
+    if ((isa<ConstantVector>(Operands[0]) ||
+         isa<ConstantDataVector>(Operands[0])) &&
+        // Check for default rounding mode.
+        // FIXME: Support other rounding modes?
+        isa<ConstantInt>(Operands[1]) &&
+        cast<ConstantInt>(Operands[1])->getValue() == 4) {
+      auto *Op = cast<Constant>(Operands[0]);
+      switch (IntrinsicID) {
+      default: break;
+      case Intrinsic::x86_avx512_vcvtss2si32:
+      case Intrinsic::x86_avx512_vcvtss2si64:
+      case Intrinsic::x86_avx512_vcvtsd2si32:
+      case Intrinsic::x86_avx512_vcvtsd2si64:
+        if (ConstantFP *FPOp =
+                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                             /*roundTowardZero=*/false, Ty,
+                                             /*IsSigned*/true);
+        break;
+      case Intrinsic::x86_avx512_vcvtss2usi32:
+      case Intrinsic::x86_avx512_vcvtss2usi64:
+      case Intrinsic::x86_avx512_vcvtsd2usi32:
+      case Intrinsic::x86_avx512_vcvtsd2usi64:
+        if (ConstantFP *FPOp =
+                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                             /*roundTowardZero=*/false, Ty,
+                                             /*IsSigned*/false);
+        break;
+      case Intrinsic::x86_avx512_cvttss2si:
+      case Intrinsic::x86_avx512_cvttss2si64:
+      case Intrinsic::x86_avx512_cvttsd2si:
+      case Intrinsic::x86_avx512_cvttsd2si64:
+        if (ConstantFP *FPOp =
+                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                             /*roundTowardZero=*/true, Ty,
+                                             /*IsSigned*/true);
+        break;
+      case Intrinsic::x86_avx512_cvttss2usi:
+      case Intrinsic::x86_avx512_cvttss2usi64:
+      case Intrinsic::x86_avx512_cvttsd2usi:
+      case Intrinsic::x86_avx512_cvttsd2usi64:
+        if (ConstantFP *FPOp =
+                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                             /*roundTowardZero=*/true, Ty,
+                                             /*IsSigned*/false);
+        break;
+      }
+    }
     return nullptr;
   }
 
@@ -2010,6 +2179,36 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
     }
   }
 
+  if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
+    const APInt *C0, *C1, *C2;
+    if (!getConstIntOrUndef(Operands[0], C0) ||
+        !getConstIntOrUndef(Operands[1], C1) ||
+        !getConstIntOrUndef(Operands[2], C2))
+      return nullptr;
+
+    bool IsRight = IntrinsicID == Intrinsic::fshr;
+    if (!C2)
+      return Operands[IsRight ? 1 : 0];
+    if (!C0 && !C1)
+      return UndefValue::get(Ty);
+
+    // The shift amount is interpreted as modulo the bitwidth. If the shift
+    // amount is effectively 0, avoid UB due to oversized inverse shift below.
+    unsigned BitWidth = C2->getBitWidth();
+    unsigned ShAmt = C2->urem(BitWidth);
+    if (!ShAmt)
+      return Operands[IsRight ? 1 : 0];
+
+    // (C0 << ShlAmt) | (C1 >> LshrAmt)
+    unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
+    unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
+    if (!C0)
+      return ConstantInt::get(Ty, C1->lshr(LshrAmt));
+    if (!C1)
+      return ConstantInt::get(Ty, C0->shl(ShlAmt));
+    return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
+  }
+
   return nullptr;
 }
 
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index e7637cd88327..34f785fb02be 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -21,8 +21,7 @@
 
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -39,6 +38,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/Pass.h"
@@ -50,6 +50,7 @@
 #include <cstdint>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "demanded-bits"
 
@@ -78,13 +79,14 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
 }
 
 static bool isAlwaysLive(Instruction *I) {
-  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
-      I->isEHPad() || I->mayHaveSideEffects();
+  return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
+         I->mayHaveSideEffects();
 }
 
 void DemandedBits::determineLiveOperandBits(
-    const Instruction *UserI, const Instruction *I, unsigned OperandNo,
-    const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) {
+    const Instruction *UserI, const Value *Val, unsigned OperandNo,
+    const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2,
+    bool &KnownBitsComputed) {
   unsigned BitWidth = AB.getBitWidth();
 
   // We're called once per operand, but for some instructions, we need to
@@ -95,7 +97,11 @@ void DemandedBits::determineLiveOperandBits(
   // provided here.
   auto ComputeKnownBits =
       [&](unsigned BitWidth, const Value *V1, const Value *V2) {
-        const DataLayout &DL = I->getModule()->getDataLayout();
+        if (KnownBitsComputed)
+          return;
+        KnownBitsComputed = true;
+
+        const DataLayout &DL = UserI->getModule()->getDataLayout();
         Known = KnownBits(BitWidth);
         computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
 
@@ -127,7 +133,7 @@ void DemandedBits::determineLiveOperandBits(
           // We need some output bits, so we need all bits of the
           // input to the left of, and including, the leftmost bit
           // known to be one.
-          ComputeKnownBits(BitWidth, I, nullptr);
+          ComputeKnownBits(BitWidth, Val, nullptr);
           AB = APInt::getHighBitsSet(BitWidth,
                  std::min(BitWidth, Known.countMaxLeadingZeros()+1));
         }
@@ -137,11 +143,33 @@ void DemandedBits::determineLiveOperandBits(
           // We need some output bits, so we need all bits of the
           // input to the right of, and including, the rightmost bit
           // known to be one.
-          ComputeKnownBits(BitWidth, I, nullptr);
+          ComputeKnownBits(BitWidth, Val, nullptr);
           AB = APInt::getLowBitsSet(BitWidth,
                  std::min(BitWidth, Known.countMaxTrailingZeros()+1));
         }
         break;
+      case Intrinsic::fshl:
+      case Intrinsic::fshr: {
+        const APInt *SA;
+        if (OperandNo == 2) {
+          // Shift amount is modulo the bitwidth. For powers of two we have
+          // SA % BW == SA & (BW - 1).
+          if (isPowerOf2_32(BitWidth))
+            AB = BitWidth - 1;
+        } else if (match(II->getOperand(2), m_APInt(SA))) {
+          // Normalize to funnel shift left. APInt shifts of BitWidth are well-
+          // defined, so no need to special-case zero shifts here.
+          uint64_t ShiftAmt = SA->urem(BitWidth);
+          if (II->getIntrinsicID() == Intrinsic::fshr)
+            ShiftAmt = BitWidth - ShiftAmt;
+
+          if (OperandNo == 0)
+            AB = AOut.lshr(ShiftAmt);
+          else if (OperandNo == 1)
+            AB = AOut.shl(BitWidth - ShiftAmt);
+        }
+        break;
+      }
       }
     break;
   case Instruction::Add:
@@ -153,8 +181,9 @@ void DemandedBits::determineLiveOperandBits(
     AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
     break;
   case Instruction::Shl:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.lshr(ShiftAmt);
 
@@ -166,10 +195,12 @@ void DemandedBits::determineLiveOperandBits(
         else if (S->hasNoUnsignedWrap())
           AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::LShr:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
 
@@ -178,10 +209,12 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<LShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::AShr:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
         // Because the high input bit is replicated into the
@@ -196,6 +229,7 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<AShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::And:
     AB = AOut;
@@ -204,14 +238,11 @@ void DemandedBits::determineLiveOperandBits(
     // other operand are dead (unless they're both zero, in which
     // case they can't both be dead, so just mark the LHS bits as
     // dead).
-    if (OperandNo == 0) {
-      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+    ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+    if (OperandNo == 0)
       AB &= ~Known2.Zero;
-    } else {
-      if (!isa<Instruction>(UserI->getOperand(0)))
-        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+    else
       AB &= ~(Known.Zero & ~Known2.Zero);
-    }
     break;
   case Instruction::Or:
     AB = AOut;
@@ -220,14 +251,11 @@ void DemandedBits::determineLiveOperandBits(
     // other operand are dead (unless they're both one, in which
     // case they can't both be dead, so just mark the LHS bits as
     // dead).
-    if (OperandNo == 0) {
-      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+    ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+    if (OperandNo == 0)
       AB &= ~Known2.One;
-    } else {
-      if (!isa<Instruction>(UserI->getOperand(0)))
-        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+    else
       AB &= ~(Known.One & ~Known2.One);
-    }
     break;
   case Instruction::Xor:
   case Instruction::PHI:
@@ -253,6 +281,15 @@ void DemandedBits::determineLiveOperandBits(
     if (OperandNo != 0)
       AB = AOut;
     break;
+  case Instruction::ExtractElement:
+    if (OperandNo == 0)
+      AB = AOut;
+    break;
+  case Instruction::InsertElement:
+  case Instruction::ShuffleVector:
+    if (OperandNo == 0 || OperandNo == 1)
+      AB = AOut;
+    break;
   }
 }
 
@@ -275,8 +312,9 @@ void DemandedBits::performAnalysis() {
 
   Visited.clear();
   AliveBits.clear();
+  DeadUses.clear();
 
-  SmallVector<Instruction*, 128> Worklist;
+  SmallSetVector<Instruction*, 16> Worklist;
 
   // Collect the set of "root" instructions that are known live.
   for (Instruction &I : instructions(F)) {
@@ -288,9 +326,10 @@ void DemandedBits::performAnalysis() {
     // bits and add the instruction to the work list. For other instructions
     // add their operands to the work list (for integer values operands, mark
     // all bits as live).
-    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-      if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
-        Worklist.push_back(&I);
+    Type *T = I.getType();
+    if (T->isIntOrIntVectorTy()) {
+      if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
+        Worklist.insert(&I);
 
       continue;
     }
@@ -298,9 +337,10 @@ void DemandedBits::performAnalysis() {
     // Non-integer-typed instructions...
     for (Use &OI : I.operands()) {
       if (Instruction *J = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
-          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
-        Worklist.push_back(J);
+        Type *T = J->getType();
+        if (T->isIntOrIntVectorTy())
+          AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+        Worklist.insert(J);
       }
     }
     // To save memory, we don't add I to the Visited set here. Instead, we
@@ -315,35 +355,51 @@ void DemandedBits::performAnalysis() {
 
     LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
     APInt AOut;
-    if (UserI->getType()->isIntegerTy()) {
+    if (UserI->getType()->isIntOrIntVectorTy()) {
       AOut = AliveBits[UserI];
-      LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
+      LLVM_DEBUG(dbgs() << " Alive Out: 0x"
+                        << Twine::utohexstr(AOut.getLimitedValue()));
     }
     LLVM_DEBUG(dbgs() << "\n");
 
-    if (!UserI->getType()->isIntegerTy())
+    if (!UserI->getType()->isIntOrIntVectorTy())
       Visited.insert(UserI);
 
     KnownBits Known, Known2;
+    bool KnownBitsComputed = false;
     // Compute the set of alive bits for each operand. These are anded into the
     // existing set, if any, and if that changes the set of alive bits, the
     // operand is added to the work-list.
     for (Use &OI : UserI->operands()) {
-      if (Instruction *I = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
-          unsigned BitWidth = IT->getBitWidth();
-          APInt AB = APInt::getAllOnesValue(BitWidth);
-          if (UserI->getType()->isIntegerTy() && !AOut &&
-              !isAlwaysLive(UserI)) {
-            AB = APInt(BitWidth, 0);
-          } else {
-            // If all bits of the output are dead, then all bits of the input
-            // Bits of each operand that are used to compute alive bits of the
-            // output are alive, all others are dead.
-            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
-                                     Known, Known2);
-          }
+      // We also want to detect dead uses of arguments, but will only store
+      // demanded bits for instructions.
+      Instruction *I = dyn_cast<Instruction>(OI);
+      if (!I && !isa<Argument>(OI))
+        continue;
+
+      Type *T = OI->getType();
+      if (T->isIntOrIntVectorTy()) {
+        unsigned BitWidth = T->getScalarSizeInBits();
+        APInt AB = APInt::getAllOnesValue(BitWidth);
+        if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
+            !isAlwaysLive(UserI)) {
+          // If all bits of the output are dead, then all bits of the input
+          // are also dead.
+          AB = APInt(BitWidth, 0);
+        } else {
+          // Bits of each operand that are used to compute alive bits of the
+          // output are alive, all others are dead.
+          determineLiveOperandBits(UserI, OI, OI.getOperandNo(), AOut, AB,
+                                   Known, Known2, KnownBitsComputed);
+
+          // Keep track of uses which have no demanded bits.
+          if (AB.isNullValue())
+            DeadUses.insert(&OI);
+          else
+            DeadUses.erase(&OI);
+        }
 
+        if (I) {
           // If we've added to the set of alive bits (or the operand has not
           // been previously visited), then re-queue the operand to be visited
           // again.
@@ -355,11 +411,11 @@ void DemandedBits::performAnalysis() {
           APInt ABNew = AB | ABPrev;
           if (ABNew != ABPrev || ABI == AliveBits.end()) {
             AliveBits[I] = std::move(ABNew);
-            Worklist.push_back(I);
+            Worklist.insert(I);
           }
-        } else if (!Visited.count(I)) {
-          Worklist.push_back(I);
         }
+      } else if (I && !Visited.count(I)) {
+        Worklist.insert(I);
       }
     }
   }
@@ -368,11 +424,13 @@ void DemandedBits::performAnalysis() {
 APInt DemandedBits::getDemandedBits(Instruction *I) {
   performAnalysis();
 
-  const DataLayout &DL = I->getModule()->getDataLayout();
   auto Found = AliveBits.find(I);
   if (Found != AliveBits.end())
     return Found->second;
-  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  return APInt::getAllOnesValue(
+      DL.getTypeSizeInBits(I->getType()->getScalarType()));
 }
 
 bool DemandedBits::isInstructionDead(Instruction *I) {
@@ -382,6 +440,31 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
     !isAlwaysLive(I);
 }
 
+bool DemandedBits::isUseDead(Use *U) {
+  // We only track integer uses, everything else is assumed live.
+  if (!(*U)->getType()->isIntOrIntVectorTy())
+    return false;
+
+  // Uses by always-live instructions are never dead.
+  Instruction *UserI = cast<Instruction>(U->getUser());
+  if (isAlwaysLive(UserI))
+    return false;
+
+  performAnalysis();
+  if (DeadUses.count(U))
+    return true;
+
+  // If no output bits are demanded, no input bits are demanded and the use
+  // is dead. These uses might not be explicitly present in the DeadUses map.
+  if (UserI->getType()->isIntOrIntVectorTy()) {
+    auto Found = AliveBits.find(UserI);
+    if (Found != AliveBits.end() && Found->second.isNullValue())
+      return true;
+  }
+
+  return false;
+}
+
 void DemandedBits::print(raw_ostream &OS) {
   performAnalysis();
   for (auto &KV : AliveBits) {
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 79c2728d5620..3f4dfa52e1da 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -194,6 +194,13 @@ void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
   dumpExampleDependence(OS, info.get());
 }
 
+PreservedAnalyses
+DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
+  OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
+  dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F));
+  return PreservedAnalyses::all();
+}
+
 //===----------------------------------------------------------------------===//
 // Dependence methods
 
@@ -633,8 +640,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
                                           const MemoryLocation &LocB) {
   // Check the original locations (minus size) for noalias, which can happen for
   // tbaa, incompatible underlying object locations, etc.
-  MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags);
-  MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags);
+  MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags);
+  MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags);
   if (AA->alias(LocAS, LocBS) == NoAlias)
     return NoAlias;
 
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
index f5f1874c9303..7ba23854a3cc 100644
--- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -7,8 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements divergence analysis which determines whether a branch
-// in a GPU program is divergent.It can help branch optimizations such as jump
+// This file implements a general divergence analysis for loop vectorization
+// and GPU programs. It determines which branches and values in a loop or GPU
+// program are divergent. It can help branch optimizations such as jump
 // threading and loop unswitching to make better decisions.
 //
 // GPU programs typically use the SIMD execution model, where multiple threads
@@ -16,25 +17,29 @@
 // code contains divergent branches (i.e., threads in a group do not agree on
 // which path of the branch to take), the group of threads has to execute all
 // the paths from that branch with different subsets of threads enabled until
-// they converge at the immediately post-dominating BB of the paths.
+// they re-converge.
 //
 // Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can be unfortunately harmful when performed on
-// divergent branches. Therefore, an analysis that computes which branches in a
-// GPU program are divergent can help the compiler to selectively run these
-// optimizations.
+// threading and loop unswitching can interfere with thread re-convergence.
+// Therefore, an analysis that computes which branches in a GPU program are
+// divergent can help the compiler to selectively run these optimizations.
 //
-// This file defines divergence analysis which computes a conservative but
-// non-trivial approximation of all divergent branches in a GPU program. It
-// partially implements the approach described in
+// This implementation is derived from the Vectorization Analysis of the
+// Region Vectorizer (RV). That implementation in turn is based on the approach
+// described in
 //
-//   Divergence Analysis
-//   Sampaio, Souza, Collange, Pereira
-//   TOPLAS '13
+//   Improving Performance of OpenCL on CPUs
+//   Ralf Karrenberg and Sebastian Hack
+//   CC '12
 //
-// The divergence analysis identifies the sources of divergence (e.g., special
-// variables that hold the thread ID), and recursively marks variables that are
-// data or sync dependent on a source of divergence as divergent.
+// This DivergenceAnalysis implementation is generic in the sense that it does
+// not itself identify original sources of divergence.
+// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
+// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
+// (e.g., special variables that hold the thread ID or the iteration variable).
+//
+// The generic implementation propagates divergence to variables that are data
+// or sync dependent on a source of divergence.
 //
 // While data dependency is a well-known concept, the notion of sync dependency
 // is worth more explanation. Sync dependence characterizes the control flow
@@ -54,287 +59,399 @@
 // because the branch "br i1 %cond" depends on %tid and affects which value %a
 // is assigned to.
 //
-// The current implementation has the following limitations:
+// The sync dependence detection (which branch induces divergence in which join
+// points) is implemented in the SyncDependenceAnalysis.
+//
+// The current DivergenceAnalysis implementation has the following limitations:
 // 1. intra-procedural. It conservatively considers the arguments of a
 //    non-kernel-entry function and the return value of a function call as
 //    divergent.
 // 2. memory as black box. It conservatively considers values loaded from
 //    generic or local address as divergent. This can be improved by leveraging
-//    pointer analysis.
+//    pointer analysis and/or by modelling non-escaping memory objects in SSA
+//    as done in RV.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
+
 using namespace llvm;
 
-#define DEBUG_TYPE "divergence"
-
-namespace {
-
-class DivergencePropagator {
-public:
-  DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
-                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)
-      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
-  void populateWithSourcesOfDivergence();
-  void propagate();
-
-private:
-  // A helper function that explores data dependents of V.
-  void exploreDataDependency(Value *V);
-  // A helper function that explores sync dependents of TI.
-  void exploreSyncDependency(TerminatorInst *TI);
-  // Computes the influence region from Start to End. This region includes all
-  // basic blocks on any simple path from Start to End.
-  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
-                              DenseSet<BasicBlock *> &InfluenceRegion);
-  // Finds all users of I that are outside the influence region, and add these
-  // users to Worklist.
-  void findUsersOutsideInfluenceRegion(
-      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
-
-  Function &F;
-  TargetTransformInfo &TTI;
-  DominatorTree &DT;
-  PostDominatorTree &PDT;
-  std::vector<Value *> Worklist; // Stack for DFS.
-  DenseSet<const Value *> &DV;   // Stores all divergent values.
-};
-
-void DivergencePropagator::populateWithSourcesOfDivergence() {
-  Worklist.clear();
-  DV.clear();
-  for (auto &I : instructions(F)) {
-    if (TTI.isSourceOfDivergence(&I)) {
-      Worklist.push_back(&I);
-      DV.insert(&I);
-    }
+#define DEBUG_TYPE "divergence-analysis"
+
+// class DivergenceAnalysis
+DivergenceAnalysis::DivergenceAnalysis(
+    const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
+    const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
+    : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
+      IsLCSSAForm(IsLCSSAForm) {}
+
+void DivergenceAnalysis::markDivergent(const Value &DivVal) {
+  assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
+  assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
+  DivergentValues.insert(&DivVal);
+}
+
+void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
+  UniformOverrides.insert(&UniVal);
+}
+
+bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
+  if (Term.getNumSuccessors() <= 1)
+    return false;
+  if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
+    assert(BranchTerm->isConditional());
+    return isDivergent(*BranchTerm->getCondition());
   }
-  for (auto &Arg : F.args()) {
-    if (TTI.isSourceOfDivergence(&Arg)) {
-      Worklist.push_back(&Arg);
-      DV.insert(&Arg);
-    }
+  if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
+    return isDivergent(*SwitchTerm->getCondition());
+  }
+  if (isa<InvokeInst>(Term)) {
+    return false; // ignore abnormal executions through landingpad
   }
+
+  llvm_unreachable("unexpected terminator");
 }
 
-void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
-  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
-  // immediate post dominator are divergent. This rule handles if-then-else
-  // patterns. For example,
-  //
-  // if (tid < 5)
-  //   a1 = 1;
-  // else
-  //   a2 = 2;
-  // a = phi(a1, a2); // sync dependent on (tid < 5)
-  BasicBlock *ThisBB = TI->getParent();
-
-  // Unreachable blocks may not be in the dominator tree.
-  if (!DT.isReachableFromEntry(ThisBB))
-    return;
+bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
+  // TODO function calls with side effects, etc
+  for (const auto &Op : I.operands()) {
+    if (isDivergent(*Op))
+      return true;
+  }
+  return false;
+}
 
-  // If the function has no exit blocks or doesn't reach any exit blocks, the
-  // post dominator may be null.
-  DomTreeNode *ThisNode = PDT.getNode(ThisBB);
-  if (!ThisNode)
-    return;
+bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
+                                             const Value &Val) const {
+  const auto *Inst = dyn_cast<const Instruction>(&Val);
+  if (!Inst)
+    return false;
+  // check whether any divergent loop carrying Val terminates before control
+  // proceeds to ObservingBlock
+  for (const auto *Loop = LI.getLoopFor(Inst->getParent());
+       Loop != RegionLoop && !Loop->contains(&ObservingBlock);
+       Loop = Loop->getParentLoop()) {
+    if (DivergentLoops.find(Loop) != DivergentLoops.end())
+      return true;
+  }
 
-  BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
-  if (IPostDom == nullptr)
-    return;
+  return false;
+}
 
-  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
-    // A PHINode is uniform if it returns the same value no matter which path is
-    // taken.
-    if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
-      Worklist.push_back(&*I);
+bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
+  // joining divergent disjoint path in Phi parent block
+  if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
+    return true;
   }
 
-  // Propagation rule 2: if a value defined in a loop is used outside, the user
-  // is sync dependent on the condition of the loop exits that dominate the
-  // user. For example,
-  //
-  // int i = 0;
-  // do {
-  //   i++;
-  //   if (foo(i)) ... // uniform
-  // } while (i < tid);
-  // if (bar(i)) ...   // divergent
+  // An incoming value could be divergent by itself.
+  // Otherwise, an incoming value could be uniform within the loop
+  // that carries its definition but it may appear divergent
+  // from outside the loop. This happens when divergent loop exits
+  // drop definitions of that uniform value in different iterations.
   //
-  // A program may contain unstructured loops. Therefore, we cannot leverage
-  // LoopInfo, which only recognizes natural loops.
-  //
-  // The algorithm used here handles both natural and unstructured loops.  Given
-  // a branch TI, we first compute its influence region, the union of all simple
-  // paths from TI to its immediate post dominator (IPostDom). Then, we search
-  // for all the values defined in the influence region but used outside. All
-  // these users are sync dependent on TI.
-  DenseSet<BasicBlock *> InfluenceRegion;
-  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
-  // An insight that can speed up the search process is that all the in-region
-  // values that are used outside must dominate TI. Therefore, instead of
-  // searching every basic blocks in the influence region, we search all the
-  // dominators of TI until it is outside the influence region.
-  BasicBlock *InfluencedBB = ThisBB;
-  while (InfluenceRegion.count(InfluencedBB)) {
-    for (auto &I : *InfluencedBB)
-      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
-    DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
-    if (IDomNode == nullptr)
-      break;
-    InfluencedBB = IDomNode->getBlock();
+  // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
+  //   if (i % thread_id == 0) break;    // divergent loop exit
+  // }
+  // int divI = i;                 // divI is divergent
+  for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
+    const auto *InVal = Phi.getIncomingValue(i);
+    if (isDivergent(*Phi.getIncomingValue(i)) ||
+        isTemporalDivergent(*Phi.getParent(), *InVal)) {
+      return true;
+    }
   }
+  return false;
 }
 
-void DivergencePropagator::findUsersOutsideInfluenceRegion(
-    Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
-  for (User *U : I.users()) {
-    Instruction *UserInst = cast<Instruction>(U);
-    if (!InfluenceRegion.count(UserInst->getParent())) {
-      if (DV.insert(UserInst).second)
-        Worklist.push_back(UserInst);
+bool DivergenceAnalysis::inRegion(const Instruction &I) const {
+  return I.getParent() && inRegion(*I.getParent());
+}
+
+bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
+  return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
+}
+
+// marks all users of loop-carried values of the loop headed by LoopHeader as
+// divergent
+void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
+  auto *DivLoop = LI.getLoopFor(&LoopHeader);
+  assert(DivLoop && "loopHeader is not actually part of a loop");
+
+  SmallVector<BasicBlock *, 8> TaintStack;
+  DivLoop->getExitBlocks(TaintStack);
+
+  // Otherwise potential users of loop-carried values could be anywhere in the
+  // dominance region of DivLoop (including its fringes for phi nodes)
+  DenseSet<const BasicBlock *> Visited;
+  for (auto *Block : TaintStack) {
+    Visited.insert(Block);
+  }
+  Visited.insert(&LoopHeader);
+
+  while (!TaintStack.empty()) {
+    auto *UserBlock = TaintStack.back();
+    TaintStack.pop_back();
+
+    // don't spread divergence beyond the region
+    if (!inRegion(*UserBlock))
+      continue;
+
+    assert(!DivLoop->contains(UserBlock) &&
+           "irreducible control flow detected");
+
+    // phi nodes at the fringes of the dominance region
+    if (!DT.dominates(&LoopHeader, UserBlock)) {
+      // all PHI nodes of UserBlock become divergent
+      for (auto &Phi : UserBlock->phis()) {
+        Worklist.push_back(&Phi);
+      }
+      continue;
+    }
+
+    // taint outside users of values carried by DivLoop
+    for (auto &I : *UserBlock) {
+      if (isAlwaysUniform(I))
+        continue;
+      if (isDivergent(I))
+        continue;
+
+      for (auto &Op : I.operands()) {
+        auto *OpInst = dyn_cast<Instruction>(&Op);
+        if (!OpInst)
+          continue;
+        if (DivLoop->contains(OpInst->getParent())) {
+          markDivergent(I);
+          pushUsers(I);
+          break;
+        }
+      }
+    }
+
+    // visit all blocks in the dominance region
+    for (auto *SuccBlock : successors(UserBlock)) {
+      if (!Visited.insert(SuccBlock).second) {
+        continue;
+      }
+      TaintStack.push_back(SuccBlock);
     }
   }
 }
 
-// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
-// to the influence region.
-static void
-addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
-                               DenseSet<BasicBlock *> &InfluenceRegion,
-                               std::vector<BasicBlock *> &InfluenceStack) {
-  for (BasicBlock *Succ : successors(ThisBB)) {
-    if (Succ != End && InfluenceRegion.insert(Succ).second)
-      InfluenceStack.push_back(Succ);
+void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
+  for (const auto &Phi : Block.phis()) {
+    if (isDivergent(Phi))
+      continue;
+    Worklist.push_back(&Phi);
   }
 }
 
-void DivergencePropagator::computeInfluenceRegion(
-    BasicBlock *Start, BasicBlock *End,
-    DenseSet<BasicBlock *> &InfluenceRegion) {
-  assert(PDT.properlyDominates(End, Start) &&
-         "End does not properly dominate Start");
-
-  // The influence region starts from the end of "Start" to the beginning of
-  // "End". Therefore, "Start" should not be in the region unless "Start" is in
-  // a loop that doesn't contain "End".
-  std::vector<BasicBlock *> InfluenceStack;
-  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
-  while (!InfluenceStack.empty()) {
-    BasicBlock *BB = InfluenceStack.back();
-    InfluenceStack.pop_back();
-    addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+void DivergenceAnalysis::pushUsers(const Value &V) {
+  for (const auto *User : V.users()) {
+    const auto *UserInst = dyn_cast<const Instruction>(User);
+    if (!UserInst)
+      continue;
+
+    if (isDivergent(*UserInst))
+      continue;
+
+    // only compute divergent inside loop
+    if (!inRegion(*UserInst))
+      continue;
+    Worklist.push_back(UserInst);
   }
 }
 
-void DivergencePropagator::exploreDataDependency(Value *V) {
-  // Follow def-use chains of V.
-  for (User *U : V->users()) {
-    Instruction *UserInst = cast<Instruction>(U);
-    if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
-      Worklist.push_back(UserInst);
+bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
+                                                 const Loop *BranchLoop) {
+  LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
+
+  // ignore divergence outside the region
+  if (!inRegion(JoinBlock)) {
+    return false;
+  }
+
+  // push non-divergent phi nodes in JoinBlock to the worklist
+  pushPHINodes(JoinBlock);
+
+  // JoinBlock is a divergent loop exit
+  if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
+    return true;
   }
+
+  // disjoint-paths divergent at JoinBlock
+  markBlockJoinDivergent(JoinBlock);
+  return false;
 }
 
-void DivergencePropagator::propagate() {
-  // Traverse the dependency graph using DFS.
-  while (!Worklist.empty()) {
-    Value *V = Worklist.back();
-    Worklist.pop_back();
-    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
-      // Terminators with less than two successors won't introduce sync
-      // dependency. Ignore them.
-      if (TI->getNumSuccessors() > 1)
-        exploreSyncDependency(TI);
+void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
+  LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
+
+  markDivergent(Term);
+
+  const auto *BranchLoop = LI.getLoopFor(Term.getParent());
+
+  // whether there is a divergent loop exit from BranchLoop (if any)
+  bool IsBranchLoopDivergent = false;
+
+  // iterate over all blocks reachable by disjoint from Term within the loop
+  // also iterates over loop exits that become divergent due to Term.
+  for (const auto *JoinBlock : SDA.join_blocks(Term)) {
+    IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+  }
+
+  // Branch loop is a divergent loop due to the divergent branch in Term
+  if (IsBranchLoopDivergent) {
+    assert(BranchLoop);
+    if (!DivergentLoops.insert(BranchLoop).second) {
+      return;
     }
-    exploreDataDependency(V);
+    propagateLoopDivergence(*BranchLoop);
   }
 }
 
-} /// end namespace anonymous
+void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
+  LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
+
+  // don't propagate beyond region
+  if (!inRegion(*ExitingLoop.getHeader()))
+    return;
 
-// Register this pass.
-char DivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
-                      false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
-                    false, true)
+  const auto *BranchLoop = ExitingLoop.getParentLoop();
 
-FunctionPass *llvm::createDivergenceAnalysisPass() {
-  return new DivergenceAnalysis();
-}
+  // Uses of loop-carried values could occur anywhere
+  // within the dominance region of the definition. All loop-carried
+  // definitions are dominated by the loop header (reducible control).
+  // Thus all users have to be in the dominance region of the loop header,
+  // except PHI nodes that can also live at the fringes of the dom region
+  // (incoming defining value).
+  if (!IsLCSSAForm)
+    taintLoopLiveOuts(*ExitingLoop.getHeader());
+
+  // whether there is a divergent loop exit from BranchLoop (if any)
+  bool IsBranchLoopDivergent = false;
+
+  // iterate over all blocks reachable by disjoint paths from exits of
+  // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
+  // become divergent.
+  for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
+    IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+  }
 
-void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DominatorTreeWrapperPass>();
-  AU.addRequired<PostDominatorTreeWrapperPass>();
-  AU.setPreservesAll();
+  // Branch loop is a divergent due to divergent loop exit in ExitingLoop
+  if (IsBranchLoopDivergent) {
+    assert(BranchLoop);
+    if (!DivergentLoops.insert(BranchLoop).second) {
+      return;
+    }
+    propagateLoopDivergence(*BranchLoop);
+  }
 }
 
-bool DivergenceAnalysis::runOnFunction(Function &F) {
-  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
-  if (TTIWP == nullptr)
-    return false;
+void DivergenceAnalysis::compute() {
+  for (auto *DivVal : DivergentValues) {
+    pushUsers(*DivVal);
+  }
 
-  TargetTransformInfo &TTI = TTIWP->getTTI(F);
-  // Fast path: if the target does not have branch divergence, we do not mark
-  // any branch as divergent.
-  if (!TTI.hasBranchDivergence())
-    return false;
+  // propagate divergence
+  while (!Worklist.empty()) {
+    const Instruction &I = *Worklist.back();
+    Worklist.pop_back();
 
-  DivergentValues.clear();
-  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-  DivergencePropagator DP(F, TTI,
-                          getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
-                          PDT, DivergentValues);
-  DP.populateWithSourcesOfDivergence();
-  DP.propagate();
-  LLVM_DEBUG(
-    dbgs() << "\nAfter divergence analysis on " << F.getName() << ":\n";
-    print(dbgs(), F.getParent())
-  );
-  return false;
+    // maintain uniformity of overrides
+    if (isAlwaysUniform(I))
+      continue;
+
+    bool WasDivergent = isDivergent(I);
+    if (WasDivergent)
+      continue;
+
+    // propagate divergence caused by terminator
+    if (I.isTerminator()) {
+      if (updateTerminator(I)) {
+        // propagate control divergence to affected instructions
+        propagateBranchDivergence(I);
+        continue;
+      }
+    }
+
+    // update divergence of I due to divergent operands
+    bool DivergentUpd = false;
+    const auto *Phi = dyn_cast<const PHINode>(&I);
+    if (Phi) {
+      DivergentUpd = updatePHINode(*Phi);
+    } else {
+      DivergentUpd = updateNormalInstruction(I);
+    }
+
+    // propagate value divergence to users
+    if (DivergentUpd) {
+      markDivergent(I);
+      pushUsers(I);
+    }
+  }
+}
+
+bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
+  return UniformOverrides.find(&V) != UniformOverrides.end();
+}
+
+bool DivergenceAnalysis::isDivergent(const Value &V) const {
+  return DivergentValues.find(&V) != DivergentValues.end();
 }
 
 void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
   if (DivergentValues.empty())
     return;
-  const Value *FirstDivergentValue = *DivergentValues.begin();
-  const Function *F;
-  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
-    F = Arg->getParent();
-  } else if (const Instruction *I =
-                 dyn_cast<Instruction>(FirstDivergentValue)) {
-    F = I->getParent()->getParent();
-  } else {
-    llvm_unreachable("Only arguments and instructions can be divergent");
+  // iterate instructions using instructions() to ensure a deterministic order.
+  for (auto &I : instructions(F)) {
+    if (isDivergent(I))
+      OS << "DIVERGENT:" << I << '\n';
   }
+}
 
-  // Dumps all divergent values in F, arguments and then instructions.
-  for (auto &Arg : F->args()) {
-    OS << (DivergentValues.count(&Arg) ? "DIVERGENT: " : "           ");
-    OS << Arg << "\n";
+// class GPUDivergenceAnalysis
+GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
+                                             const DominatorTree &DT,
+                                             const PostDominatorTree &PDT,
+                                             const LoopInfo &LI,
+                                             const TargetTransformInfo &TTI)
+    : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
+  for (auto &I : instructions(F)) {
+    if (TTI.isSourceOfDivergence(&I)) {
+      DA.markDivergent(I);
+    } else if (TTI.isAlwaysUniform(&I)) {
+      DA.addUniformOverride(I);
+    }
   }
-  // Iterate instructions using instructions() to ensure a deterministic order.
-  for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
-    auto &BB = *BI;
-    OS << "\n           " << BB.getName() << ":\n";
-    for (auto &I : BB.instructionsWithoutDebug()) {
-      OS << (DivergentValues.count(&I) ? "DIVERGENT:     " : "               ");
-      OS << I << "\n";
+  for (auto &Arg : F.args()) {
+    if (TTI.isSourceOfDivergence(&Arg)) {
+      DA.markDivergent(Arg);
     }
   }
-  OS << "\n";
+
+  DA.compute();
+}
+
+bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
+  return DA.isDivergent(val);
+}
+
+void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
+  OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
+  DA.print(OS, mod);
+  OS << "}\n";
 }
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
index 2d35a3fa9118..0df73aeebbdc 100644
--- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -120,7 +120,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
                            << "\'.\n");
 
     BasicBlock *SuccColor = Color;
-    TerminatorInst *Terminator = Visiting->getTerminator();
+    Instruction *Terminator = Visiting->getTerminator();
     if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
       Value *ParentPad = CatchRet->getCatchSwitchParentPad();
       if (isa<ConstantTokenNone>(ParentPad))
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index 2c503609d96b..b28abcadca4a 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -255,11 +255,11 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
 }
 
 FunctionModRefBehavior
-GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+GlobalsAAResult::getModRefBehavior(const CallBase *Call) {
   FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
 
-  if (!CS.hasOperandBundles())
-    if (const Function *F = CS.getCalledFunction())
+  if (!Call->hasOperandBundles())
+    if (const Function *F = Call->getCalledFunction())
       if (FunctionInfo *FI = getFunctionInfo(F)) {
         if (!isModOrRefSet(FI->getModRefInfo()))
           Min = FMRB_DoesNotAccessMemory;
@@ -267,7 +267,7 @@ GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
           Min = FMRB_OnlyReadsMemory;
       }
 
-  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
 }
 
 /// Returns the function info for the function, or null if we don't have
@@ -366,14 +366,14 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
     } else if (Operator::getOpcode(I) == Instruction::BitCast) {
       if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
         return true;
-    } else if (auto CS = CallSite(I)) {
+    } else if (auto *Call = dyn_cast<CallBase>(I)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
-      if (CS.isDataOperand(&U)) {
+      if (Call->isDataOperand(&U)) {
         // Detect calls to free.
-        if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+        if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) {
           if (Writers)
-            Writers->insert(CS->getParent()->getParent());
+            Writers->insert(Call->getParent()->getParent());
         } else {
           return true; // Argument of an unknown call.
         }
@@ -576,15 +576,15 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
 
         // We handle calls specially because the graph-relevant aspects are
         // handled above.
-        if (auto CS = CallSite(&I)) {
-          if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+        if (auto *Call = dyn_cast<CallBase>(&I)) {
+          if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
             // FIXME: It is completely unclear why this is necessary and not
             // handled by the above graph code.
             FI.addModRefInfo(ModRefInfo::ModRef);
-          } else if (Function *Callee = CS.getCalledFunction()) {
+          } else if (Function *Callee = Call->getCalledFunction()) {
             // The callgraph doesn't include intrinsic calls.
             if (Callee->isIntrinsic()) {
-              if (isa<DbgInfoIntrinsic>(I))
+              if (isa<DbgInfoIntrinsic>(Call))
                 // Don't let dbg intrinsics affect alias info.
                 continue;
 
@@ -885,16 +885,16 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
   return AAResultBase::alias(LocA, LocB);
 }
 
-ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
                                                      const GlobalValue *GV) {
-  if (CS.doesNotAccessMemory())
+  if (Call->doesNotAccessMemory())
     return ModRefInfo::NoModRef;
   ModRefInfo ConservativeResult =
-      CS.onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
+      Call->onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
 
   // Iterate through all the arguments to the called function. If any argument
   // is based on GV, return the conservative result.
-  for (auto &A : CS.args()) {
+  for (auto &A : Call->args()) {
     SmallVector<Value*, 4> Objects;
     GetUnderlyingObjects(A, Objects, DL);
 
@@ -914,7 +914,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
   return ModRefInfo::NoModRef;
 }
 
-ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
                                           const MemoryLocation &Loc) {
   ModRefInfo Known = ModRefInfo::ModRef;
 
@@ -923,15 +923,15 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
   if (const GlobalValue *GV =
           dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
     if (GV->hasLocalLinkage())
-      if (const Function *F = CS.getCalledFunction())
+      if (const Function *F = Call->getCalledFunction())
         if (NonAddressTakenGlobals.count(GV))
           if (const FunctionInfo *FI = getFunctionInfo(F))
             Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
-                                getModRefInfoForArgument(CS, GV));
+                                getModRefInfoForArgument(Call, GV));
 
   if (!isModOrRefSet(Known))
     return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
-  return intersectModRef(Known, AAResultBase::getModRefInfo(CS, Loc));
+  return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc));
 }
 
 GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/contrib/llvm/lib/Analysis/GuardUtils.cpp b/contrib/llvm/lib/Analysis/GuardUtils.cpp
new file mode 100644
index 000000000000..08fa6abeafb5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/GuardUtils.cpp
@@ -0,0 +1,21 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform analyzes related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+
+bool llvm::isGuard(const User *U) {
+  using namespace llvm::PatternMatch;
+  return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
+}
diff --git a/contrib/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm/lib/Analysis/IVDescriptors.cpp
new file mode 100644
index 000000000000..aaebc4a481ec
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IVDescriptors.cpp
@@ -0,0 +1,1089 @@
+//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file "describes" induction and recurrence variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "iv-descriptors"
+
+bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
+                                        SmallPtrSetImpl<Instruction *> &Set) {
+  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+    if (!Set.count(dyn_cast<Instruction>(*Use)))
+      return false;
+  return true;
+}
+
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+  switch (Kind) {
+  default:
+    break;
+  case RK_IntegerAdd:
+  case RK_IntegerMult:
+  case RK_IntegerOr:
+  case RK_IntegerAnd:
+  case RK_IntegerXor:
+  case RK_IntegerMinMax:
+    return true;
+  }
+  return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+  return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+  switch (Kind) {
+  default:
+    break;
+  case RK_IntegerAdd:
+  case RK_IntegerMult:
+  case RK_FloatAdd:
+  case RK_FloatMult:
+    return true;
+  }
+  return false;
+}
+
+/// Determines if Phi may have been type-promoted. If Phi has a single user
+/// that ANDs the Phi with a type mask, return the user. RT is updated to
+/// account for the narrower bit width represented by the mask, and the AND
+/// instruction is added to CI.
+static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+                                   SmallPtrSetImpl<Instruction *> &Visited,
+                                   SmallPtrSetImpl<Instruction *> &CI) {
+  if (!Phi->hasOneUse())
+    return Phi;
+
+  const APInt *M = nullptr;
+  Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+  // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+  // with a new integer type of the corresponding bit width.
+  if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
+    int32_t Bits = (*M + 1).exactLogBase2();
+    if (Bits > 0) {
+      RT = IntegerType::get(Phi->getContext(), Bits);
+      Visited.insert(Phi);
+      CI.insert(J);
+      return J;
+    }
+  }
+  return Phi;
+}
+
+/// Compute the minimal bit width needed to represent a reduction whose exit
+/// instruction is given by Exit.
+static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
+                                                     DemandedBits *DB,
+                                                     AssumptionCache *AC,
+                                                     DominatorTree *DT) {
+  bool IsSigned = false;
+  const DataLayout &DL = Exit->getModule()->getDataLayout();
+  uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
+
+  if (DB) {
+    // Use the demanded bits analysis to determine the bits that are live out
+    // of the exit instruction, rounding up to the nearest power of two. If the
+    // use of demanded bits results in a smaller bit width, we know the value
+    // must be positive (i.e., IsSigned = false), because if this were not the
+    // case, the sign bit would have been demanded.
+    auto Mask = DB->getDemandedBits(Exit);
+    MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+  }
+
+  if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
+    // If demanded bits wasn't able to limit the bit width, we can try to use
+    // value tracking instead. This can be the case, for example, if the value
+    // may be negative.
+    auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
+    auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
+    MaxBitWidth = NumTypeBits - NumSignBits;
+    KnownBits Bits = computeKnownBits(Exit, DL);
+    if (!Bits.isNonNegative()) {
+      // If the value is not known to be non-negative, we set IsSigned to true,
+      // meaning that we will use sext instructions instead of zext
+      // instructions to restore the original type.
+      IsSigned = true;
+      if (!Bits.isNegative())
+        // If the value is not known to be negative, we don't known what the
+        // upper bit is, and therefore, we don't know what kind of extend we
+        // will need. In this case, just increase the bit width by one bit and
+        // use sext.
+        ++MaxBitWidth;
+    }
+  }
+  if (!isPowerOf2_64(MaxBitWidth))
+    MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+  return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
+                        IsSigned);
+}
+
+/// Collect cast instructions that can be ignored in the vectorizer's cost
+/// model, given a reduction exit value and the minimal type in which the
+/// reduction can be represented.
+static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
+                                 Type *RecurrenceType,
+                                 SmallPtrSetImpl<Instruction *> &Casts) {
+
+  SmallVector<Instruction *, 8> Worklist;
+  SmallPtrSet<Instruction *, 8> Visited;
+  Worklist.push_back(Exit);
+
+  while (!Worklist.empty()) {
+    Instruction *Val = Worklist.pop_back_val();
+    Visited.insert(Val);
+    if (auto *Cast = dyn_cast<CastInst>(Val))
+      if (Cast->getSrcTy() == RecurrenceType) {
+        // If the source type of a cast instruction is equal to the recurrence
+        // type, it will be eliminated, and should be ignored in the vectorizer
+        // cost model.
+        Casts.insert(Cast);
+        continue;
+      }
+
+    // Add all operands to the work list if they are loop-varying values that
+    // we haven't yet visited.
+    for (Value *O : cast<User>(Val)->operands())
+      if (auto *I = dyn_cast<Instruction>(O))
+        if (TheLoop->contains(I) && !Visited.count(I))
+          Worklist.push_back(I);
+  }
+}
+
+bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
+                                           Loop *TheLoop, bool HasFunNoNaNAttr,
+                                           RecurrenceDescriptor &RedDes,
+                                           DemandedBits *DB,
+                                           AssumptionCache *AC,
+                                           DominatorTree *DT) {
+  if (Phi->getNumIncomingValues() != 2)
+    return false;
+
+  // Reduction variables are only found in the loop header block.
+  if (Phi->getParent() != TheLoop->getHeader())
+    return false;
+
+  // Obtain the reduction start value from the value that comes from the loop
+  // preheader.
+  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+  // ExitInstruction is the single value which is used outside the loop.
+  // We only allow for a single reduction value to be used outside the loop.
+  // This includes users of the reduction, variables (which form a cycle
+  // which ends in the phi node).
+  Instruction *ExitInstruction = nullptr;
+  // Indicates that we found a reduction operation in our scan.
+  bool FoundReduxOp = false;
+
+  // We start with the PHI node and scan for all of the users of this
+  // instruction. All users must be instructions that can be used as reduction
+  // variables (such as ADD). We must have a single out-of-block user. The cycle
+  // must include the original PHI.
+  bool FoundStartPHI = false;
+
+  // To recognize min/max patterns formed by a icmp select sequence, we store
+  // the number of instruction we saw from the recognized min/max pattern,
+  //  to make sure we only see exactly the two instructions.
+  unsigned NumCmpSelectPatternInst = 0;
+  InstDesc ReduxDesc(false, nullptr);
+
+  // Data used for determining if the recurrence has been type-promoted.
+  Type *RecurrenceType = Phi->getType();
+  SmallPtrSet<Instruction *, 4> CastInsts;
+  Instruction *Start = Phi;
+  bool IsSigned = false;
+
+  SmallPtrSet<Instruction *, 8> VisitedInsts;
+  SmallVector<Instruction *, 8> Worklist;
+
+  // Return early if the recurrence kind does not match the type of Phi. If the
+  // recurrence kind is arithmetic, we attempt to look through AND operations
+  // resulting from the type promotion performed by InstCombine.  Vector
+  // operations are not limited to the legal integer widths, so we may be able
+  // to evaluate the reduction in the narrower width.
+  if (RecurrenceType->isFloatingPointTy()) {
+    if (!isFloatingPointRecurrenceKind(Kind))
+      return false;
+  } else {
+    if (!isIntegerRecurrenceKind(Kind))
+      return false;
+    if (isArithmeticRecurrenceKind(Kind))
+      Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+  }
+
+  Worklist.push_back(Start);
+  VisitedInsts.insert(Start);
+
+  // A value in the reduction can be used:
+  //  - By the reduction:
+  //      - Reduction operation:
+  //        - One use of reduction value (safe).
+  //        - Multiple use of reduction value (not safe).
+  //      - PHI:
+  //        - All uses of the PHI must be the reduction (safe).
+  //        - Otherwise, not safe.
+  //  - By instructions outside of the loop (safe).
+  //      * One value may have several outside users, but all outside
+  //        uses must be of the same value.
+  //  - By an instruction that is not part of the reduction (not safe).
+  //    This is either:
+  //      * An instruction type other than PHI or the reduction operation.
+  //      * A PHI in the header other than the initial PHI.
+  while (!Worklist.empty()) {
+    Instruction *Cur = Worklist.back();
+    Worklist.pop_back();
+
+    // No Users.
+    // If the instruction has no users then this is a broken chain and can't be
+    // a reduction variable.
+    if (Cur->use_empty())
+      return false;
+
+    bool IsAPhi = isa<PHINode>(Cur);
+
+    // A header PHI use other than the original PHI.
+    if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+      return false;
+
+    // Reductions of instructions such as Div, and Sub is only possible if the
+    // LHS is the reduction variable.
+    if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+        !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+        !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+      return false;
+
+    // Any reduction instruction must be of one of the allowed kinds. We ignore
+    // the starting value (the Phi or an AND instruction if the Phi has been
+    // type-promoted).
+    if (Cur != Start) {
+      ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+      if (!ReduxDesc.isRecurrence())
+        return false;
+    }
+
+    bool IsASelect = isa<SelectInst>(Cur);
+
+    // A conditional reduction operation must only have 2 or less uses in
+    // VisitedInsts.
+    if (IsASelect && (Kind == RK_FloatAdd || Kind == RK_FloatMult) &&
+        hasMultipleUsesOf(Cur, VisitedInsts, 2))
+      return false;
+
+    // A reduction operation must only have one use of the reduction value.
+    if (!IsAPhi && !IsASelect && Kind != RK_IntegerMinMax &&
+        Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts, 1))
+      return false;
+
+    // All inputs to a PHI node must be a reduction value.
+    if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+      return false;
+
+    if (Kind == RK_IntegerMinMax &&
+        (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
+      ++NumCmpSelectPatternInst;
+    if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+      ++NumCmpSelectPatternInst;
+
+    // Check  whether we found a reduction operator.
+    FoundReduxOp |= !IsAPhi && Cur != Start;
+
+    // Process users of current instruction. Push non-PHI nodes after PHI nodes
+    // onto the stack. This way we are going to have seen all inputs to PHI
+    // nodes once we get to them.
+    SmallVector<Instruction *, 8> NonPHIs;
+    SmallVector<Instruction *, 8> PHIs;
+    for (User *U : Cur->users()) {
+      Instruction *UI = cast<Instruction>(U);
+
+      // Check if we found the exit user.
+      BasicBlock *Parent = UI->getParent();
+      if (!TheLoop->contains(Parent)) {
+        // If we already know this instruction is used externally, move on to
+        // the next user.
+        if (ExitInstruction == Cur)
+          continue;
+
+        // Exit if you find multiple values used outside or if the header phi
+        // node is being used. In this case the user uses the value of the
+        // previous iteration, in which case we would loose "VF-1" iterations of
+        // the reduction operation if we vectorize.
+        if (ExitInstruction != nullptr || Cur == Phi)
+          return false;
+
+        // The instruction used by an outside user must be the last instruction
+        // before we feed back to the reduction phi. Otherwise, we loose VF-1
+        // operations on the value.
+        if (!is_contained(Phi->operands(), Cur))
+          return false;
+
+        ExitInstruction = Cur;
+        continue;
+      }
+
+      // Process instructions only once (termination). Each reduction cycle
+      // value must only be used once, except by phi nodes and min/max
+      // reductions which are represented as a cmp followed by a select.
+      InstDesc IgnoredVal(false, nullptr);
+      if (VisitedInsts.insert(UI).second) {
+        if (isa<PHINode>(UI))
+          PHIs.push_back(UI);
+        else
+          NonPHIs.push_back(UI);
+      } else if (!isa<PHINode>(UI) &&
+                 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
+                   !isa<SelectInst>(UI)) ||
+                  (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
+                   !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())))
+        return false;
+
+      // Remember that we completed the cycle.
+      if (UI == Phi)
+        FoundStartPHI = true;
+    }
+    Worklist.append(PHIs.begin(), PHIs.end());
+    Worklist.append(NonPHIs.begin(), NonPHIs.end());
+  }
+
+  // This means we have seen one but not the other instruction of the
+  // pattern or more than just a select and cmp.
+  if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+      NumCmpSelectPatternInst != 2)
+    return false;
+
+  if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+    return false;
+
+  if (Start != Phi) {
+    // If the starting value is not the same as the phi node, we speculatively
+    // looked through an 'and' instruction when evaluating a potential
+    // arithmetic reduction to determine if it may have been type-promoted.
+    //
+    // We now compute the minimal bit width that is required to represent the
+    // reduction. If this is the same width that was indicated by the 'and', we
+    // can represent the reduction in the smaller type. The 'and' instruction
+    // will be eliminated since it will essentially be a cast instruction that
+    // can be ignore in the cost model. If we compute a different type than we
+    // did when evaluating the 'and', the 'and' will not be eliminated, and we
+    // will end up with different kinds of operations in the recurrence
+    // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+    // the case.
+    //
+    // The vectorizer relies on InstCombine to perform the actual
+    // type-shrinking. It does this by inserting instructions to truncate the
+    // exit value of the reduction to the width indicated by RecurrenceType and
+    // then extend this value back to the original width. If IsSigned is false,
+    // a 'zext' instruction will be generated; otherwise, a 'sext' will be
+    // used.
+    //
+    // TODO: We should not rely on InstCombine to rewrite the reduction in the
+    //       smaller type. We should just generate a correctly typed expression
+    //       to begin with.
+    Type *ComputedType;
+    std::tie(ComputedType, IsSigned) =
+        computeRecurrenceType(ExitInstruction, DB, AC, DT);
+    if (ComputedType != RecurrenceType)
+      return false;
+
+    // The recurrence expression will be represented in a narrower type. If
+    // there are any cast instructions that will be unnecessary, collect them
+    // in CastInsts. Note that the 'and' instruction was already included in
+    // this list.
+    //
+    // TODO: A better way to represent this may be to tag in some way all the
+    //       instructions that are a part of the reduction. The vectorizer cost
+    //       model could then apply the recurrence type to these instructions,
+    //       without needing a white list of instructions to ignore.
+    collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
+  }
+
+  // We found a reduction var if we have reached the original phi node and we
+  // only have a single instruction with out-of-loop users.
+
+  // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
+  // is saved as part of the RecurrenceDescriptor.
+
+  // Save the description of this reduction variable.
+  RecurrenceDescriptor RD(
+      RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+      ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
+  RedDes = RD;
+
+  return true;
+}
+
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
+
+  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+         "Expect a select instruction");
+  Instruction *Cmp = nullptr;
+  SelectInst *Select = nullptr;
+
+  // We must handle the select(cmp()) as a single instruction. Advance to the
+  // select.
+  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
+      return InstDesc(false, I);
+    return InstDesc(Select, Prev.getMinMaxKind());
+  }
+
+  // Only handle single use cases for now.
+  if (!(Select = dyn_cast<SelectInst>(I)))
+    return InstDesc(false, I);
+  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+    return InstDesc(false, I);
+  if (!Cmp->hasOneUse())
+    return InstDesc(false, I);
+
+  Value *CmpLeft;
+  Value *CmpRight;
+
+  // Look for a min/max pattern.
+  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_UIntMin);
+  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_UIntMax);
+  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_SIntMax);
+  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_SIntMin);
+  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_FloatMin);
+  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_FloatMax);
+  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_FloatMin);
+  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return InstDesc(Select, MRK_FloatMax);
+
+  return InstDesc(false, I);
+}
+
+/// Returns true if the select instruction has users in the compare-and-add
+/// reduction pattern below. The select instruction argument is the last one
+/// in the sequence.
+///
+/// %sum.1 = phi ...
+/// ...
+/// %cmp = fcmp pred %0, %CFP
+/// %add = fadd %0, %sum.1
+/// %sum.2 = select %cmp, %add, %sum.1
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isConditionalRdxPattern(
+    RecurrenceKind Kind, Instruction *I) {
+  SelectInst *SI = dyn_cast<SelectInst>(I);
+  if (!SI)
+    return InstDesc(false, I);
+
+  CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
+  // Only handle single use cases for now.
+  if (!CI || !CI->hasOneUse())
+    return InstDesc(false, I);
+
+  Value *TrueVal = SI->getTrueValue();
+  Value *FalseVal = SI->getFalseValue();
+  // Handle only when either of operands of select instruction is a PHI
+  // node for now.
+  if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
+      (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
+    return InstDesc(false, I);
+
+  Instruction *I1 =
+      isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
+                             : dyn_cast<Instruction>(TrueVal);
+  if (!I1 || !I1->isBinaryOp())
+    return InstDesc(false, I);
+
+  Value *Op1, *Op2;
+  if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1)  ||
+       m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
+      I1->isFast())
+    return InstDesc(Kind == RK_FloatAdd, SI);
+
+  if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
+    return InstDesc(Kind == RK_FloatMult, SI);
+
+  return InstDesc(false, I);
+}
+
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+                                        InstDesc &Prev, bool HasFunNoNaNAttr) {
+  bool FP = I->getType()->isFloatingPointTy();
+  Instruction *UAI = Prev.getUnsafeAlgebraInst();
+  if (!UAI && FP && !I->isFast())
+    UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
+  switch (I->getOpcode()) {
+  default:
+    return InstDesc(false, I);
+  case Instruction::PHI:
+    return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
+  case Instruction::Sub:
+  case Instruction::Add:
+    return InstDesc(Kind == RK_IntegerAdd, I);
+  case Instruction::Mul:
+    return InstDesc(Kind == RK_IntegerMult, I);
+  case Instruction::And:
+    return InstDesc(Kind == RK_IntegerAnd, I);
+  case Instruction::Or:
+    return InstDesc(Kind == RK_IntegerOr, I);
+  case Instruction::Xor:
+    return InstDesc(Kind == RK_IntegerXor, I);
+  case Instruction::FMul:
+    return InstDesc(Kind == RK_FloatMult, I, UAI);
+  case Instruction::FSub:
+  case Instruction::FAdd:
+    return InstDesc(Kind == RK_FloatAdd, I, UAI);
+  case Instruction::Select:
+    if (Kind == RK_FloatAdd || Kind == RK_FloatMult)
+      return isConditionalRdxPattern(Kind, I);
+    LLVM_FALLTHROUGH;
+  case Instruction::FCmp:
+  case Instruction::ICmp:
+    if (Kind != RK_IntegerMinMax &&
+        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+      return InstDesc(false, I);
+    return isMinMaxSelectCmpPattern(I, Prev);
+  }
+}
+
+bool RecurrenceDescriptor::hasMultipleUsesOf(
+    Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
+    unsigned MaxNumUses) {
+  unsigned NumUses = 0;
+  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
+       ++Use) {
+    if (Insts.count(dyn_cast<Instruction>(*Use)))
+      ++NumUses;
+    if (NumUses > MaxNumUses)
+      return true;
+  }
+
+  return false;
+}
+bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+                                          RecurrenceDescriptor &RedDes,
+                                          DemandedBits *DB, AssumptionCache *AC,
+                                          DominatorTree *DT) {
+
+  BasicBlock *Header = TheLoop->getHeader();
+  Function &F = *Header->getParent();
+  bool HasFunNoNaNAttr =
+      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
+                      DB, AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+    return true;
+  }
+  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
+    LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
+                      << "\n");
+    return true;
+  }
+  // Not a reduction of known type.
+  return false;
+}
+
+bool RecurrenceDescriptor::isFirstOrderRecurrence(
+    PHINode *Phi, Loop *TheLoop,
+    DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+
+  // Ensure the phi node is in the loop header and has two incoming values.
+  if (Phi->getParent() != TheLoop->getHeader() ||
+      Phi->getNumIncomingValues() != 2)
+    return false;
+
+  // Ensure the loop has a preheader and a single latch block. The loop
+  // vectorizer will need the latch to set up the next iteration of the loop.
+  auto *Preheader = TheLoop->getLoopPreheader();
+  auto *Latch = TheLoop->getLoopLatch();
+  if (!Preheader || !Latch)
+    return false;
+
+  // Ensure the phi node's incoming blocks are the loop preheader and latch.
+  if (Phi->getBasicBlockIndex(Preheader) < 0 ||
+      Phi->getBasicBlockIndex(Latch) < 0)
+    return false;
+
+  // Get the previous value. The previous value comes from the latch edge while
+  // the initial value comes form the preheader edge.
+  auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+  if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+      SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+    return false;
+
+  // Ensure every user of the phi node is dominated by the previous value.
+  // The dominance requirement ensures the loop vectorizer will not need to
+  // vectorize the initial value prior to the first iteration of the loop.
+  // TODO: Consider extending this sinking to handle other kinds of instructions
+  // and expressions, beyond sinking a single cast past Previous.
+  if (Phi->hasOneUse()) {
+    auto *I = Phi->user_back();
+    if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
+        DT->dominates(Previous, I->user_back())) {
+      if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
+        SinkAfter[I] = Previous;
+      return true;
+    }
+  }
+
+  for (User *U : Phi->users())
+    if (auto *I = dyn_cast<Instruction>(U)) {
+      if (!DT->dominates(Previous, I))
+        return false;
+    }
+
+  return true;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
+                                                      Type *Tp) {
+  switch (K) {
+  case RK_IntegerXor:
+  case RK_IntegerAdd:
+  case RK_IntegerOr:
+    // Adding, Xoring, Oring zero to a number does not change it.
+    return ConstantInt::get(Tp, 0);
+  case RK_IntegerMult:
+    // Multiplying a number by 1 does not change it.
+    return ConstantInt::get(Tp, 1);
+  case RK_IntegerAnd:
+    // AND-ing a number with an all-1 value does not change it.
+    return ConstantInt::get(Tp, -1, true);
+  case RK_FloatMult:
+    // Multiplying a number by 1 does not change it.
+    return ConstantFP::get(Tp, 1.0L);
+  case RK_FloatAdd:
+    // Adding zero to a number does not change it.
+    return ConstantFP::get(Tp, 0.0L);
+  default:
+    llvm_unreachable("Unknown recurrence kind");
+  }
+}
+
+/// This function translates the recurrence kind to an LLVM binary operator.
+unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
+  switch (Kind) {
+  case RK_IntegerAdd:
+    return Instruction::Add;
+  case RK_IntegerMult:
+    return Instruction::Mul;
+  case RK_IntegerOr:
+    return Instruction::Or;
+  case RK_IntegerAnd:
+    return Instruction::And;
+  case RK_IntegerXor:
+    return Instruction::Xor;
+  case RK_FloatMult:
+    return Instruction::FMul;
+  case RK_FloatAdd:
+    return Instruction::FAdd;
+  case RK_IntegerMinMax:
+    return Instruction::ICmp;
+  case RK_FloatMinMax:
+    return Instruction::FCmp;
+  default:
+    llvm_unreachable("Unknown recurrence operation");
+  }
+}
+
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+                                         const SCEV *Step, BinaryOperator *BOp,
+                                         SmallVectorImpl<Instruction *> *Casts)
+    : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
+  assert(IK != IK_NoInduction && "Not an induction");
+
+  // Start value type should match the induction kind and the value
+  // itself should not be null.
+  assert(StartValue && "StartValue is null");
+  assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+         "StartValue is not a pointer for pointer induction");
+  assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+         "StartValue is not an integer for integer induction");
+
+  // Check the Step Value. It should be non-zero integer value.
+  assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+         "Step value is zero");
+
+  assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+         "Step value should be constant for pointer induction");
+  assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
+         "StepValue is not an integer");
+
+  assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
+         "StepValue is not FP for FpInduction");
+  assert((IK != IK_FpInduction ||
+          (InductionBinOp &&
+           (InductionBinOp->getOpcode() == Instruction::FAdd ||
+            InductionBinOp->getOpcode() == Instruction::FSub))) &&
+         "Binary opcode should be specified for FP induction");
+
+  if (Casts) {
+    for (auto &Inst : *Casts) {
+      RedundantCasts.push_back(Inst);
+    }
+  }
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+  ConstantInt *ConstStep = getConstIntStepValue();
+  if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+    return ConstStep->getSExtValue();
+  return 0;
+}
+
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+  if (isa<SCEVConstant>(Step))
+    return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+  return nullptr;
+}
+
+bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
+                                           ScalarEvolution *SE,
+                                           InductionDescriptor &D) {
+
+  // Here we only handle FP induction variables.
+  assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
+
+  if (TheLoop->getHeader() != Phi->getParent())
+    return false;
+
+  // The loop may have multiple entrances or multiple exits; we can analyze
+  // this phi if it has a unique entry value and a unique backedge value.
+  if (Phi->getNumIncomingValues() != 2)
+    return false;
+  Value *BEValue = nullptr, *StartValue = nullptr;
+  if (TheLoop->contains(Phi->getIncomingBlock(0))) {
+    BEValue = Phi->getIncomingValue(0);
+    StartValue = Phi->getIncomingValue(1);
+  } else {
+    assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
+           "Unexpected Phi node in the loop");
+    BEValue = Phi->getIncomingValue(1);
+    StartValue = Phi->getIncomingValue(0);
+  }
+
+  BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
+  if (!BOp)
+    return false;
+
+  Value *Addend = nullptr;
+  if (BOp->getOpcode() == Instruction::FAdd) {
+    if (BOp->getOperand(0) == Phi)
+      Addend = BOp->getOperand(1);
+    else if (BOp->getOperand(1) == Phi)
+      Addend = BOp->getOperand(0);
+  } else if (BOp->getOpcode() == Instruction::FSub)
+    if (BOp->getOperand(0) == Phi)
+      Addend = BOp->getOperand(1);
+
+  if (!Addend)
+    return false;
+
+  // The addend should be loop invariant
+  if (auto *I = dyn_cast<Instruction>(Addend))
+    if (TheLoop->contains(I))
+      return false;
+
+  // FP Step has unknown SCEV
+  const SCEV *Step = SE->getUnknown(Addend);
+  D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
+  return true;
+}
+
+/// This function is called when we suspect that the update-chain of a phi node
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
+/// predicate P under which the SCEV expression for the phi can be the
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
+/// cast instructions that are involved in the update-chain of this induction.
+/// A caller that adds the required runtime predicate can be free to drop these
+/// cast instructions, and compute the phi using \p AR (instead of some scev
+/// expression with casts).
+///
+/// For example, without a predicate the scev expression can take the following
+/// form:
+///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+///
+/// It corresponds to the following IR sequence:
+/// %for.body:
+///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+///   %casted_phi = "ExtTrunc i64 %x"
+///   %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN,
+/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
+/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
+/// several forms, for example, such as:
+///   ExtTrunc1:    %casted_phi = and  %x, 2^n-1
+/// or:
+///   ExtTrunc2:    %t = shl %x, m
+///                 %casted_phi = ashr %t, m
+///
+/// If we are able to find such sequence, we return the instructions
+/// we found, namely %casted_phi and the instructions on its use-def chain up
+/// to the phi (not including the phi).
+static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
+                                    const SCEVUnknown *PhiScev,
+                                    const SCEVAddRecExpr *AR,
+                                    SmallVectorImpl<Instruction *> &CastInsts) {
+
+  assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+  auto *PN = cast<PHINode>(PhiScev->getValue());
+  assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
+  const Loop *L = AR->getLoop();
+
+  // Find any cast instructions that participate in the def-use chain of
+  // PhiScev in the loop.
+  // FORNOW/TODO: We currently expect the def-use chain to include only
+  // two-operand instructions, where one of the operands is an invariant.
+  // createAddRecFromPHIWithCasts() currently does not support anything more
+  // involved than that, so we keep the search simple. This can be
+  // extended/generalized as needed.
+
+  auto getDef = [&](const Value *Val) -> Value * {
+    const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
+    if (!BinOp)
+      return nullptr;
+    Value *Op0 = BinOp->getOperand(0);
+    Value *Op1 = BinOp->getOperand(1);
+    Value *Def = nullptr;
+    if (L->isLoopInvariant(Op0))
+      Def = Op1;
+    else if (L->isLoopInvariant(Op1))
+      Def = Op0;
+    return Def;
+  };
+
+  // Look for the instruction that defines the induction via the
+  // loop backedge.
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+  Value *Val = PN->getIncomingValueForBlock(Latch);
+  if (!Val)
+    return false;
+
+  // Follow the def-use chain until the induction phi is reached.
+  // If on the way we encounter a Value that has the same SCEV Expr as the
+  // phi node, we can consider the instructions we visit from that point
+  // as part of the cast-sequence that can be ignored.
+  bool InCastSequence = false;
+  auto *Inst = dyn_cast<Instruction>(Val);
+  while (Val != PN) {
+    // If we encountered a phi node other than PN, or if we left the loop,
+    // we bail out.
+    if (!Inst || !L->contains(Inst)) {
+      return false;
+    }
+    auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
+    if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
+      InCastSequence = true;
+    if (InCastSequence) {
+      // Only the last instruction in the cast sequence is expected to have
+      // uses outside the induction def-use chain.
+      if (!CastInsts.empty())
+        if (!Inst->hasOneUse())
+          return false;
+      CastInsts.push_back(Inst);
+    }
+    Val = getDef(Val);
+    if (!Val)
+      return false;
+    Inst = dyn_cast<Instruction>(Val);
+  }
+
+  return InCastSequence;
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
+                                         PredicatedScalarEvolution &PSE,
+                                         InductionDescriptor &D, bool Assume) {
+  Type *PhiTy = Phi->getType();
+
+  // Handle integer and pointer inductions variables.
+  // Now we handle also FP induction but not trying to make a
+  // recurrent expression from the PHI node in-place.
+
+  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
+      !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
+    return false;
+
+  if (PhiTy->isFloatingPointTy())
+    return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
+
+  const SCEV *PhiScev = PSE.getSCEV(Phi);
+  const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+  // We need this expression to be an AddRecExpr.
+  if (Assume && !AR)
+    AR = PSE.getAsAddRec(Phi);
+
+  if (!AR) {
+    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+    return false;
+  }
+
+  // Record any Cast instructions that participate in the induction update
+  const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+  // If we started from an UnknownSCEV, and managed to build an addRecurrence
+  // only after enabling Assume with PSCEV, this means we may have encountered
+  // cast instructions that required adding a runtime check in order to
+  // guarantee the correctness of the AddRecurence respresentation of the
+  // induction.
+  if (PhiScev != AR && SymbolicPhi) {
+    SmallVector<Instruction *, 2> Casts;
+    if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
+      return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
+  }
+
+  return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
+}
+
+bool InductionDescriptor::isInductionPHI(
+    PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+    InductionDescriptor &D, const SCEV *Expr,
+    SmallVectorImpl<Instruction *> *CastsToIgnore) {
+  Type *PhiTy = Phi->getType();
+  // We only handle integer and pointer inductions variables.
+  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+    return false;
+
+  // Check that the PHI is consecutive.
+  const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+  if (!AR) {
+    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+    return false;
+  }
+
+  if (AR->getLoop() != TheLoop) {
+    // FIXME: We should treat this as a uniform. Unfortunately, we
+    // don't currently know how to handled uniform PHIs.
+    LLVM_DEBUG(
+        dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
+    return false;
+  }
+
+  Value *StartValue =
+      Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+  // Calculate the pointer stride and check if it is consecutive.
+  // The stride may be a constant or a loop invariant integer value.
+  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+  if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
+    return false;
+
+  if (PhiTy->isIntegerTy()) {
+    D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr,
+                            CastsToIgnore);
+    return true;
+  }
+
+  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+  // Pointer induction should be a constant.
+  if (!ConstStep)
+    return false;
+
+  ConstantInt *CV = ConstStep->getValue();
+  Type *PointerElementType = PhiTy->getPointerElementType();
+  // The pointer stride cannot be determined if the pointer element type is not
+  // sized.
+  if (!PointerElementType->isSized())
+    return false;
+
+  const DataLayout &DL = Phi->getModule()->getDataLayout();
+  int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+  if (!Size)
+    return false;
+
+  int64_t CVSize = CV->getSExtValue();
+  if (CVSize % Size)
+    return false;
+  auto *StepValue =
+      SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
+  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+  return true;
+}
diff --git a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 4659c0a00629..d6e6e76af03c 100644
--- a/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -15,7 +15,7 @@
 
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstVisitor.h"
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index a6cccc3b5910..6ddb3cbc01a3 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -30,6 +31,7 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/InstVisitor.h"
@@ -137,7 +139,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool HasReturn;
   bool HasIndirectBr;
   bool HasUninlineableIntrinsic;
-  bool UsesVarArgs;
+  bool InitsVargArgs;
 
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize;
@@ -227,7 +229,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
                                         BlockFrequencyInfo *CallerBFI);
 
   // Custom analysis routines.
-  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
+  InlineResult analyzeBlock(BasicBlock *BB,
+                            SmallPtrSetImpl<const Value *> &EphValues);
 
   // Disable several entry points to the visitor so we don't accidentally use
   // them by declaring but not defining them here.
@@ -282,7 +285,7 @@ public:
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
-        HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0),
+        HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
         NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
         SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
         NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
@@ -290,7 +293,7 @@ public:
         NumInstructionsSimplified(0), SROACostSavings(0),
         SROACostSavingsLost(0) {}
 
-  bool analyzeCall(CallSite CS);
+  InlineResult analyzeCall(CallSite CS);
 
   int getThreshold() { return Threshold; }
   int getCost() { return Cost; }
@@ -719,6 +722,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
   case Instruction::FPToSI:
     if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
       Cost += InlineConstants::CallPenalty;
+    break;
   default:
     break;
   }
@@ -1238,8 +1242,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
         HasUninlineableIntrinsic = true;
         return false;
       case Intrinsic::vastart:
-      case Intrinsic::vaend:
-        UsesVarArgs = true;
+        InitsVargArgs = true;
         return false;
       }
     }
@@ -1541,8 +1544,9 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
 /// aborts early if the threshold has been exceeded or an impossible to inline
 /// construct has been detected. It returns false if inlining is no longer
 /// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
-                                SmallPtrSetImpl<const Value *> &EphValues) {
+InlineResult
+CallAnalyzer::analyzeBlock(BasicBlock *BB,
+                           SmallPtrSetImpl<const Value *> &EphValues) {
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     // FIXME: Currently, the number of instructions in a function regardless of
     // our ability to simplify them during inline to constants or dead code,
@@ -1574,16 +1578,29 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
 
     using namespace ore;
     // If the visit this instruction detected an uninlinable pattern, abort.
-    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-        HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) {
+    InlineResult IR;
+    if (IsRecursiveCall)
+      IR = "recursive";
+    else if (ExposesReturnsTwice)
+      IR = "exposes returns twice";
+    else if (HasDynamicAlloca)
+      IR = "dynamic alloca";
+    else if (HasIndirectBr)
+      IR = "indirect branch";
+    else if (HasUninlineableIntrinsic)
+      IR = "uninlinable intrinsic";
+    else if (InitsVargArgs)
+      IR = "varargs";
+    if (!IR) {
       if (ORE)
         ORE->emit([&]() {
           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
                                           CandidateCS.getInstruction())
-                 << NV("Callee", &F)
-                 << " has uninlinable pattern and cost is not fully computed";
+                 << NV("Callee", &F) << " has uninlinable pattern ("
+                 << NV("InlineResult", IR.message)
+                 << ") and cost is not fully computed";
         });
-      return false;
+      return IR;
     }
 
     // If the caller is a recursive function then we don't want to inline
@@ -1591,15 +1608,15 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
     // the caller stack usage dramatically.
     if (IsCallerRecursive &&
         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+      InlineResult IR = "recursive and allocates too much stack space";
       if (ORE)
         ORE->emit([&]() {
           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
                                           CandidateCS.getInstruction())
-                 << NV("Callee", &F)
-                 << " is recursive and allocates too much stack space. Cost is "
-                    "not fully computed";
+                 << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
+                 << ". Cost is not fully computed";
         });
-      return false;
+      return IR;
     }
 
     // Check if we've past the maximum possible threshold so we don't spin in
@@ -1695,7 +1712,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
 /// factors and heuristics. If this method returns false but the computed cost
 /// is below the computed threshold, then inlining was forcibly disabled by
 /// some artifact of the routine.
-bool CallAnalyzer::analyzeCall(CallSite CS) {
+InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
   ++NumCallsAnalyzed;
 
   // Perform some tweaks to the cost and threshold based on the direct
@@ -1714,6 +1731,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // Update the threshold based on callsite properties
   updateThreshold(CS, F);
 
+  // While Threshold depends on commandline options that can take negative
+  // values, we want to enforce the invariant that the computed threshold and
+  // bonuses are non-negative.
+  assert(Threshold >= 0);
+  assert(SingleBBBonus >= 0);
+  assert(VectorBonus >= 0);
+
   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
   // this Threshold any time, and cost cannot decrease, we can stop processing
   // the rest of the function body.
@@ -1730,7 +1754,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
 
   // Check if we're done. This can happen due to bonuses and penalties.
   if (Cost >= Threshold && !ComputeFullInlineCost)
-    return false;
+    return "high cost";
 
   if (F.empty())
     return true;
@@ -1809,14 +1833,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     // site. If the blockaddress escapes the function, e.g., via a global
     // variable, inlining may lead to an invalid cross-function reference.
     if (BB->hasAddressTaken())
-      return false;
+      return "blockaddress";
 
     // Analyze the cost of this block. If we blow through the threshold, this
     // returns false, and we can bail on out.
-    if (!analyzeBlock(BB, EphValues))
-      return false;
+    InlineResult IR = analyzeBlock(BB, EphValues);
+    if (!IR)
+      return IR;
 
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
 
     // Add in the live successors by first checking whether we have terminator
     // that may be simplified based on the values simplified by this call.
@@ -1867,7 +1892,25 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // inlining this would cause the removal of the caller (so the instruction
   // is not actually duplicated, just moved).
   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
-    return false;
+    return "noduplicate";
+
+  // Loops generally act a lot like calls in that they act like barriers to
+  // movement, require a certain amount of setup, etc. So when optimising for
+  // size, we penalise any call sites that perform loops. We do this after all
+  // other costs here, so will likely only be dealing with relatively small
+  // functions (and hence DT and LI will hopefully be cheap).
+  if (Caller->optForMinSize()) {
+    DominatorTree DT(F);
+    LoopInfo LI(DT);
+    int NumLoops = 0;
+    for (Loop *L : LI) {
+      // Ignore loops that will not be executed
+      if (DeadBlocks.count(L->getHeader()))
+        continue;
+      NumLoops++;
+    }
+    Cost += NumLoops * InlineConstants::CallPenalty;
+  }
 
   // We applied the maximum possible vector bonus at the beginning. Now,
   // subtract the excess bonus, if any, from the Threshold before
@@ -1961,7 +2004,7 @@ InlineCost llvm::getInlineCost(
 
   // Cannot inline indirect calls.
   if (!Callee)
-    return llvm::InlineCost::getNever();
+    return llvm::InlineCost::getNever("indirect call");
 
   // Never inline calls with byval arguments that does not have the alloca
   // address space. Since byval arguments can be replaced with a copy to an
@@ -1973,54 +2016,59 @@ InlineCost llvm::getInlineCost(
     if (CS.isByValArgument(I)) {
       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
       if (PTy->getAddressSpace() != AllocaAS)
-        return llvm::InlineCost::getNever();
+        return llvm::InlineCost::getNever("byval arguments without alloca"
+                                          " address space");
     }
 
   // Calls to functions with always-inline attributes should be inlined
   // whenever possible.
   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
     if (isInlineViable(*Callee))
-      return llvm::InlineCost::getAlways();
-    return llvm::InlineCost::getNever();
+      return llvm::InlineCost::getAlways("always inline attribute");
+    return llvm::InlineCost::getNever("inapplicable always inline attribute");
   }
 
   // Never inline functions with conflicting attributes (unless callee has
   // always-inline attribute).
   Function *Caller = CS.getCaller();
   if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
-    return llvm::InlineCost::getNever();
+    return llvm::InlineCost::getNever("conflicting attributes");
 
   // Don't inline this call if the caller has the optnone attribute.
   if (Caller->hasFnAttribute(Attribute::OptimizeNone))
-    return llvm::InlineCost::getNever();
+    return llvm::InlineCost::getNever("optnone attribute");
 
   // Don't inline a function that treats null pointer as valid into a caller
   // that does not have this attribute.
   if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
-    return llvm::InlineCost::getNever();
+    return llvm::InlineCost::getNever("nullptr definitions incompatible");
+
+  // Don't inline functions which can be interposed at link-time.
+  if (Callee->isInterposable())
+    return llvm::InlineCost::getNever("interposable");
+
+  // Don't inline functions marked noinline.
+  if (Callee->hasFnAttribute(Attribute::NoInline))
+    return llvm::InlineCost::getNever("noinline function attribute");
 
-  // Don't inline functions which can be interposed at link-time.  Don't inline
-  // functions marked noinline or call sites marked noinline.
-  // Note: inlining non-exact non-interposable functions is fine, since we know
-  // we have *a* correct implementation of the source level function.
-  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
-      CS.isNoInline())
-    return llvm::InlineCost::getNever();
+  // Don't inline call sites marked noinline.
+  if (CS.isNoInline())
+    return llvm::InlineCost::getNever("noinline call site attribute");
 
   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                           << "... (caller:" << Caller->getName() << ")\n");
 
   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
                   Params);
-  bool ShouldInline = CA.analyzeCall(CS);
+  InlineResult ShouldInline = CA.analyzeCall(CS);
 
   LLVM_DEBUG(CA.dump());
 
   // Check if there was a reason to force inlining or no inlining.
   if (!ShouldInline && CA.getCost() < CA.getThreshold())
-    return InlineCost::getNever();
+    return InlineCost::getNever(ShouldInline.message);
   if (ShouldInline && CA.getCost() >= CA.getThreshold())
-    return InlineCost::getAlways();
+    return InlineCost::getAlways("empty function");
 
   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
 }
@@ -2058,9 +2106,8 @@ bool llvm::isInlineViable(Function &F) {
         // Disallow inlining functions that call @llvm.localescape. Doing this
         // correctly would require major changes to the inliner.
         case llvm::Intrinsic::localescape:
-        // Disallow inlining of functions that access VarArgs.
+        // Disallow inlining of functions that initialize VarArgs with va_start.
         case llvm::Intrinsic::vastart:
-        case llvm::Intrinsic::vaend:
           return false;
         }
     }
diff --git a/contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
new file mode 100644
index 000000000000..816126f407ca
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -0,0 +1,157 @@
+//===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implements a class that is able to define some instructions as "special"
+// (e.g. as having implicit control flow, or writing memory, or having another
+// interesting property) and then efficiently answers queries of the types:
+// 1. Are there any special instructions in the block of interest?
+// 2. Return first of the special instructions in the given block;
+// 3. Check if the given instruction is preceeded by the first special
+//    instruction in the same block.
+// The class provides caching that allows to answer these queries quickly. The
+// user must make sure that the cached data is invalidated properly whenever
+// a content of some tracked block is changed.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool> ExpensiveAsserts(
+    "ipt-expensive-asserts",
+    cl::desc("Perform expensive assert validation on every query to Instruction"
+             " Precedence Tracking"),
+    cl::init(false), cl::Hidden);
+#endif
+
+const Instruction *InstructionPrecedenceTracking::getFirstSpecialInstruction(
+    const BasicBlock *BB) {
+#ifndef NDEBUG
+  // If there is a bug connected to invalid cache, turn on ExpensiveAsserts to
+  // catch this situation as early as possible.
+  if (ExpensiveAsserts)
+    validateAll();
+  else
+    validate(BB);
+#endif
+
+  if (FirstSpecialInsts.find(BB) == FirstSpecialInsts.end()) {
+    fill(BB);
+    assert(FirstSpecialInsts.find(BB) != FirstSpecialInsts.end() && "Must be!");
+  }
+  return FirstSpecialInsts[BB];
+}
+
+bool InstructionPrecedenceTracking::hasSpecialInstructions(
+    const BasicBlock *BB) {
+  return getFirstSpecialInstruction(BB) != nullptr;
+}
+
+bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction(
+    const Instruction *Insn) {
+  const Instruction *MaybeFirstSpecial =
+      getFirstSpecialInstruction(Insn->getParent());
+  return MaybeFirstSpecial && OI.dominates(MaybeFirstSpecial, Insn);
+}
+
+void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
+  FirstSpecialInsts.erase(BB);
+  for (auto &I : *BB)
+    if (isSpecialInstruction(&I)) {
+      FirstSpecialInsts[BB] = &I;
+      return;
+    }
+
+  // Mark this block as having no special instructions.
+  FirstSpecialInsts[BB] = nullptr;
+}
+
+#ifndef NDEBUG
+void InstructionPrecedenceTracking::validate(const BasicBlock *BB) const {
+  auto It = FirstSpecialInsts.find(BB);
+  // Bail if we don't have anything cached for this block.
+  if (It == FirstSpecialInsts.end())
+    return;
+
+  for (const Instruction &Insn : *BB)
+    if (isSpecialInstruction(&Insn)) {
+      assert(It->second == &Insn &&
+             "Cached first special instruction is wrong!");
+      return;
+    }
+
+  assert(It->second == nullptr &&
+         "Block is marked as having special instructions but in fact it  has "
+         "none!");
+}
+
+void InstructionPrecedenceTracking::validateAll() const {
+  // Check that for every known block the cached value is correct.
+  for (auto &It : FirstSpecialInsts)
+    validate(It.first);
+}
+#endif
+
+void InstructionPrecedenceTracking::insertInstructionTo(const Instruction *Inst,
+                                                        const BasicBlock *BB) {
+  if (isSpecialInstruction(Inst))
+    FirstSpecialInsts.erase(BB);
+  OI.invalidateBlock(BB);
+}
+
+void InstructionPrecedenceTracking::removeInstruction(const Instruction *Inst) {
+  if (isSpecialInstruction(Inst))
+    FirstSpecialInsts.erase(Inst->getParent());
+  OI.invalidateBlock(Inst->getParent());
+}
+
+void InstructionPrecedenceTracking::clear() {
+  for (auto It : FirstSpecialInsts)
+    OI.invalidateBlock(It.first);
+  FirstSpecialInsts.clear();
+#ifndef NDEBUG
+  // The map should be valid after clearing (at least empty).
+  validateAll();
+#endif
+}
+
+bool ImplicitControlFlowTracking::isSpecialInstruction(
+    const Instruction *Insn) const {
+  // If a block's instruction doesn't always pass the control to its successor
+  // instruction, mark the block as having implicit control flow. We use them
+  // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+  // A, then B is also executed". This is not true is there is an implicit
+  // control flow instruction (e.g. a guard) between them.
+  //
+  // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+  // for volatile stores and loads because they can trap. The discussion on
+  // whether or not it is correct is still ongoing. We might want to get rid
+  // of this logic in the future. Anyways, trapping instructions shouldn't
+  // introduce implicit control flow, so we explicitly allow them here. This
+  // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+  if (isGuaranteedToTransferExecutionToSuccessor(Insn))
+    return false;
+  if (isa<LoadInst>(Insn)) {
+    assert(cast<LoadInst>(Insn)->isVolatile() &&
+           "Non-volatile load should transfer execution to successor!");
+    return false;
+  }
+  if (isa<StoreInst>(Insn)) {
+    assert(cast<StoreInst>(Insn)->isVolatile() &&
+           "Non-volatile store should transfer execution to successor!");
+    return false;
+  }
+  return true;
+}
+
+bool MemoryWriteTracking::isSpecialInstruction(
+    const Instruction *Insn) const {
+  return Insn->mayWriteToMemory();
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 5e72798d459a..ccf907c144f0 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -861,8 +861,10 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
 
   // (X / Y) * Y -> X if the division is exact.
   Value *X = nullptr;
-  if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
-      match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))   // Y * (X / Y)
+  if (Q.IIQ.UseInstrInfo &&
+      (match(Op0,
+             m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) ||     // (X / Y) * Y
+       match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))) // Y * (X / Y)
     return X;
 
   // i1 mul -> and.
@@ -1035,8 +1037,8 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) {
     auto *Mul = cast<OverflowingBinaryOperator>(Op0);
     // If the Mul does not overflow, then we are good to go.
-    if ((IsSigned && Mul->hasNoSignedWrap()) ||
-        (!IsSigned && Mul->hasNoUnsignedWrap()))
+    if ((IsSigned && Q.IIQ.hasNoSignedWrap(Mul)) ||
+        (!IsSigned && Q.IIQ.hasNoUnsignedWrap(Mul)))
       return X;
     // If X has the form X = A / Y, then X * Y cannot overflow.
     if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) ||
@@ -1094,10 +1096,11 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
     return Op0;
 
   // (X << Y) % X -> 0
-  if ((Opcode == Instruction::SRem &&
-       match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
-      (Opcode == Instruction::URem &&
-       match(Op0, m_NUWShl(m_Specific(Op1), m_Value()))))
+  if (Q.IIQ.UseInstrInfo &&
+      ((Opcode == Instruction::SRem &&
+        match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
+       (Opcode == Instruction::URem &&
+        match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))))
     return Constant::getNullValue(Op0->getType());
 
   // If the operation is with the result of a select instruction, check whether
@@ -1295,7 +1298,8 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 
   // (X >> A) << A -> X
   Value *X;
-  if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
+  if (Q.IIQ.UseInstrInfo &&
+      match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
     return X;
 
   // shl nuw i8 C, %x  ->  C  iff C has sign bit set.
@@ -1365,7 +1369,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 
   // (X << A) >> A -> X
   Value *X;
-  if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
+  if (Q.IIQ.UseInstrInfo && match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
     return X;
 
   // Arithmetic shifting an all-sign-bit value is a no-op.
@@ -1552,7 +1556,8 @@ static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1,
   return nullptr;
 }
 
-static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
+                                        const InstrInfoQuery &IIQ) {
   // (icmp (add V, C0), C1) & (icmp V, C0)
   ICmpInst::Predicate Pred0, Pred1;
   const APInt *C0, *C1;
@@ -1563,13 +1568,13 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
   if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
     return nullptr;
 
-  auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  auto *AddInst = cast<OverflowingBinaryOperator>(Op0->getOperand(0));
   if (AddInst->getOperand(1) != Op1->getOperand(1))
     return nullptr;
 
   Type *ITy = Op0->getType();
-  bool isNSW = AddInst->hasNoSignedWrap();
-  bool isNUW = AddInst->hasNoUnsignedWrap();
+  bool isNSW = IIQ.hasNoSignedWrap(AddInst);
+  bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
 
   const APInt Delta = *C1 - *C0;
   if (C0->isStrictlyPositive()) {
@@ -1598,7 +1603,8 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
   return nullptr;
 }
 
-static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
+                                 const InstrInfoQuery &IIQ) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
     return X;
   if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
@@ -1615,15 +1621,16 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
     return X;
 
-  if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1))
+  if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ))
     return X;
-  if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0))
+  if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ))
     return X;
 
   return nullptr;
 }
 
-static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
+                                       const InstrInfoQuery &IIQ) {
   // (icmp (add V, C0), C1) | (icmp V, C0)
   ICmpInst::Predicate Pred0, Pred1;
   const APInt *C0, *C1;
@@ -1639,8 +1646,8 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
     return nullptr;
 
   Type *ITy = Op0->getType();
-  bool isNSW = AddInst->hasNoSignedWrap();
-  bool isNUW = AddInst->hasNoUnsignedWrap();
+  bool isNSW = IIQ.hasNoSignedWrap(AddInst);
+  bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
 
   const APInt Delta = *C1 - *C0;
   if (C0->isStrictlyPositive()) {
@@ -1669,7 +1676,8 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
   return nullptr;
 }
 
-static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
+                                const InstrInfoQuery &IIQ) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
     return X;
   if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
@@ -1686,15 +1694,16 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
     return X;
 
-  if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1))
+  if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ))
     return X;
-  if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0))
+  if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ))
     return X;
 
   return nullptr;
 }
 
-static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
+                                   FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
   Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
   Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
   if (LHS0->getType() != RHS0->getType())
@@ -1711,8 +1720,8 @@ static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
     // (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X
     // (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y
     // (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X
-    if ((isKnownNeverNaN(LHS0) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
-        (isKnownNeverNaN(LHS1) && (LHS0 == RHS0 || LHS0 == RHS1)))
+    if ((isKnownNeverNaN(LHS0, TLI) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
+        (isKnownNeverNaN(LHS1, TLI) && (LHS0 == RHS0 || LHS0 == RHS1)))
       return RHS;
 
     // (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y
@@ -1723,15 +1732,16 @@ static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
     // (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X
     // (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y
     // (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X
-    if ((isKnownNeverNaN(RHS0) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
-        (isKnownNeverNaN(RHS1) && (RHS0 == LHS0 || RHS0 == LHS1)))
+    if ((isKnownNeverNaN(RHS0, TLI) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
+        (isKnownNeverNaN(RHS1, TLI) && (RHS0 == LHS0 || RHS0 == LHS1)))
       return LHS;
   }
 
   return nullptr;
 }
 
-static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
+static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
+                                  Value *Op0, Value *Op1, bool IsAnd) {
   // Look through casts of the 'and' operands to find compares.
   auto *Cast0 = dyn_cast<CastInst>(Op0);
   auto *Cast1 = dyn_cast<CastInst>(Op1);
@@ -1745,13 +1755,13 @@ static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
   auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
   auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
   if (ICmp0 && ICmp1)
-    V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1) :
-                simplifyOrOfICmps(ICmp0, ICmp1);
+    V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
+              : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
 
   auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
   auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
   if (FCmp0 && FCmp1)
-    V = simplifyAndOrOfFCmps(FCmp0, FCmp1, IsAnd);
+    V = simplifyAndOrOfFCmps(Q.TLI, FCmp0, FCmp1, IsAnd);
 
   if (!V)
     return nullptr;
@@ -1831,7 +1841,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       return Op1;
   }
 
-  if (Value *V = simplifyAndOrOfCmps(Op0, Op1, true))
+  if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
     return V;
 
   // Try some generic simplifications for associative operations.
@@ -1981,7 +1991,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
        match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
     return Op0;
 
-  if (Value *V = simplifyAndOrOfCmps(Op0, Op1, false))
+  if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
     return V;
 
   // Try some generic simplifications for associative operations.
@@ -2142,13 +2152,15 @@ static Constant *
 computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
                    const DominatorTree *DT, CmpInst::Predicate Pred,
                    AssumptionCache *AC, const Instruction *CxtI,
-                   Value *LHS, Value *RHS) {
+                   const InstrInfoQuery &IIQ, Value *LHS, Value *RHS) {
   // First, skip past any trivial no-ops.
   LHS = LHS->stripPointerCasts();
   RHS = RHS->stripPointerCasts();
 
   // A non-null pointer is not equal to a null pointer.
-  if (llvm::isKnownNonZero(LHS, DL) && isa<ConstantPointerNull>(RHS) &&
+  if (llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
+                           IIQ.UseInstrInfo) &&
+      isa<ConstantPointerNull>(RHS) &&
       (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
     return ConstantInt::get(GetCompareTy(LHS),
                             !CmpInst::isTrueWhenEqual(Pred));
@@ -2413,12 +2425,12 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
     return getTrue(ITy);
   case ICmpInst::ICMP_EQ:
   case ICmpInst::ICMP_ULE:
-    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
       return getFalse(ITy);
     break;
   case ICmpInst::ICMP_NE:
   case ICmpInst::ICMP_UGT:
-    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
       return getTrue(ITy);
     break;
   case ICmpInst::ICMP_SLT: {
@@ -2463,17 +2475,18 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
 /// Many binary operators with a constant operand have an easy-to-compute
 /// range of outputs. This can be used to fold a comparison to always true or
 /// always false.
-static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
+static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper,
+                              const InstrInfoQuery &IIQ) {
   unsigned Width = Lower.getBitWidth();
   const APInt *C;
   switch (BO.getOpcode()) {
   case Instruction::Add:
     if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
       // FIXME: If we have both nuw and nsw, we should reduce the range further.
-      if (BO.hasNoUnsignedWrap()) {
+      if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
         // 'add nuw x, C' produces [C, UINT_MAX].
         Lower = *C;
-      } else if (BO.hasNoSignedWrap()) {
+      } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
         if (C->isNegative()) {
           // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
           Lower = APInt::getSignedMinValue(Width);
@@ -2506,7 +2519,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
       Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
     } else if (match(BO.getOperand(0), m_APInt(C))) {
       unsigned ShiftAmount = Width - 1;
-      if (!C->isNullValue() && BO.isExact())
+      if (!C->isNullValue() && IIQ.isExact(&BO))
         ShiftAmount = C->countTrailingZeros();
       if (C->isNegative()) {
         // 'ashr C, x' produces [C, C >> (Width-1)]
@@ -2527,7 +2540,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
     } else if (match(BO.getOperand(0), m_APInt(C))) {
       // 'lshr C, x' produces [C >> (Width-1), C].
       unsigned ShiftAmount = Width - 1;
-      if (!C->isNullValue() && BO.isExact())
+      if (!C->isNullValue() && IIQ.isExact(&BO))
         ShiftAmount = C->countTrailingZeros();
       Lower = C->lshr(ShiftAmount);
       Upper = *C + 1;
@@ -2536,7 +2549,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
 
   case Instruction::Shl:
     if (match(BO.getOperand(0), m_APInt(C))) {
-      if (BO.hasNoUnsignedWrap()) {
+      if (IIQ.hasNoUnsignedWrap(&BO)) {
         // 'shl nuw C, x' produces [C, C << CLZ(C)]
         Lower = *C;
         Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
@@ -2617,8 +2630,72 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
   }
 }
 
+/// Some intrinsics with a constant operand have an easy-to-compute range of
+/// outputs. This can be used to fold a comparison to always true or always
+/// false.
+static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower,
+                                  APInt &Upper) {
+  unsigned Width = Lower.getBitWidth();
+  const APInt *C;
+  switch (II.getIntrinsicID()) {
+  case Intrinsic::uadd_sat:
+    // uadd.sat(x, C) produces [C, UINT_MAX].
+    if (match(II.getOperand(0), m_APInt(C)) ||
+        match(II.getOperand(1), m_APInt(C)))
+      Lower = *C;
+    break;
+  case Intrinsic::sadd_sat:
+    if (match(II.getOperand(0), m_APInt(C)) ||
+        match(II.getOperand(1), m_APInt(C))) {
+      if (C->isNegative()) {
+        // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
+        Lower = APInt::getSignedMinValue(Width);
+        Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+      } else {
+        // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+        Lower = APInt::getSignedMinValue(Width) + *C;
+        Upper = APInt::getSignedMaxValue(Width) + 1;
+      }
+    }
+    break;
+  case Intrinsic::usub_sat:
+    // usub.sat(C, x) produces [0, C].
+    if (match(II.getOperand(0), m_APInt(C)))
+      Upper = *C + 1;
+    // usub.sat(x, C) produces [0, UINT_MAX - C].
+    else if (match(II.getOperand(1), m_APInt(C)))
+      Upper = APInt::getMaxValue(Width) - *C + 1;
+    break;
+  case Intrinsic::ssub_sat:
+    if (match(II.getOperand(0), m_APInt(C))) {
+      if (C->isNegative()) {
+        // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
+        Lower = APInt::getSignedMinValue(Width);
+        Upper = *C - APInt::getSignedMinValue(Width) + 1;
+      } else {
+        // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+        Lower = *C - APInt::getSignedMaxValue(Width);
+        Upper = APInt::getSignedMaxValue(Width) + 1;
+      }
+    } else if (match(II.getOperand(1), m_APInt(C))) {
+      if (C->isNegative()) {
+        // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
+        Lower = APInt::getSignedMinValue(Width) - *C;
+        Upper = APInt::getSignedMaxValue(Width) + 1;
+      } else {
+        // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+        Lower = APInt::getSignedMinValue(Width);
+        Upper = APInt::getSignedMaxValue(Width) - *C + 1;
+      }
+    }
+    break;
+  default:
+    break;
+  }
+}
+
 static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
-                                       Value *RHS) {
+                                       Value *RHS, const InstrInfoQuery &IIQ) {
   Type *ITy = GetCompareTy(RHS); // The return type.
 
   Value *X;
@@ -2649,13 +2726,15 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
   APInt Lower = APInt(Width, 0);
   APInt Upper = APInt(Width, 0);
   if (auto *BO = dyn_cast<BinaryOperator>(LHS))
-    setLimitsForBinOp(*BO, Lower, Upper);
+    setLimitsForBinOp(*BO, Lower, Upper, IIQ);
+  else if (auto *II = dyn_cast<IntrinsicInst>(LHS))
+    setLimitsForIntrinsic(*II, Lower, Upper);
 
   ConstantRange LHS_CR =
       Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
 
   if (auto *I = dyn_cast<Instruction>(LHS))
-    if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+    if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range))
       LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
 
   if (!LHS_CR.isFullSet()) {
@@ -2688,16 +2767,20 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
       B = LBO->getOperand(1);
       NoLHSWrapProblem =
           ICmpInst::isEquality(Pred) ||
-          (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
-          (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+          (CmpInst::isUnsigned(Pred) &&
+           Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO))) ||
+          (CmpInst::isSigned(Pred) &&
+           Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)));
     }
     if (RBO && RBO->getOpcode() == Instruction::Add) {
       C = RBO->getOperand(0);
       D = RBO->getOperand(1);
       NoRHSWrapProblem =
           ICmpInst::isEquality(Pred) ||
-          (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
-          (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+          (CmpInst::isUnsigned(Pred) &&
+           Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(RBO))) ||
+          (CmpInst::isSigned(Pred) &&
+           Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RBO)));
     }
 
     // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
@@ -2915,7 +2998,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
         // - The shift is nuw, we can't shift out the one bit.
         // - CI2 is one
         // - CI isn't zero
-        if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
+        if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
+            Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
             CI2Val->isOneValue() || !CI->isZero()) {
           if (Pred == ICmpInst::ICMP_EQ)
             return ConstantInt::getFalse(RHS->getContext());
@@ -2939,29 +3023,31 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
       break;
     case Instruction::UDiv:
     case Instruction::LShr:
-      if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact())
+      if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) ||
+          !Q.IIQ.isExact(RBO))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
           return V;
       break;
     case Instruction::SDiv:
-      if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact())
+      if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) ||
+          !Q.IIQ.isExact(RBO))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     case Instruction::AShr:
-      if (!LBO->isExact() || !RBO->isExact())
+      if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     case Instruction::Shl: {
-      bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
-      bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+      bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
+      bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
       if (!NUW && !NSW)
         break;
       if (!NSW && ICmpInst::isSigned(Pred))
@@ -2976,6 +3062,44 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
   return nullptr;
 }
 
+static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0,
+                                      Value *Op1) {
+  // We need a comparison with a constant.
+  const APInt *C;
+  if (!match(Op1, m_APInt(C)))
+    return nullptr;
+
+  // matchSelectPattern returns the negation part of an abs pattern in SP1.
+  // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that
+  // constraint, we can't make a contiguous range for the result of abs.
+  ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE;
+  Value *SP0, *SP1;
+  SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor;
+  if (SPF == SelectPatternFlavor::SPF_ABS &&
+      cast<Instruction>(SP1)->hasNoSignedWrap())
+    // The result of abs(X) is >= 0 (with nsw).
+    AbsPred = ICmpInst::ICMP_SGE;
+  if (SPF == SelectPatternFlavor::SPF_NABS)
+    // The result of -abs(X) is <= 0.
+    AbsPred = ICmpInst::ICMP_SLE;
+
+  if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE)
+    return nullptr;
+
+  // If there is no intersection between abs/nabs and the range of this icmp,
+  // the icmp must be false. If the abs/nabs range is a subset of the icmp
+  // range, the icmp must be true.
+  APInt Zero = APInt::getNullValue(C->getBitWidth());
+  ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero);
+  ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C);
+  if (AbsRange.intersectWith(CmpRange).isEmptySet())
+    return getFalse(GetCompareTy(Op0));
+  if (CmpRange.contains(AbsRange))
+    return getTrue(GetCompareTy(Op0));
+
+  return nullptr;
+}
+
 /// Simplify integer comparisons where at least one operand of the compare
 /// matches an integer min/max idiom.
 static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
@@ -3209,7 +3333,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
     return V;
 
-  if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS))
+  if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q.IIQ))
     return V;
 
   // If both operands have range metadata, use the metadata
@@ -3218,8 +3342,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     auto RHS_Instr = cast<Instruction>(RHS);
     auto LHS_Instr = cast<Instruction>(LHS);
 
-    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
-        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+    if (Q.IIQ.getMetadata(RHS_Instr, LLVMContext::MD_range) &&
+        Q.IIQ.getMetadata(LHS_Instr, LLVMContext::MD_range)) {
       auto RHS_CR = getConstantRangeFromMetadata(
           *RHS_Instr->getMetadata(LLVMContext::MD_range));
       auto LHS_CR = getConstantRangeFromMetadata(
@@ -3397,7 +3521,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
   // icmp eq|ne X, Y -> false|true if X != Y
   if (ICmpInst::isEquality(Pred) &&
-      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo)) {
     return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy);
   }
 
@@ -3407,11 +3531,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
     return V;
 
+  if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS))
+    return V;
+
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
   if (LHS->getType()->isPointerTy())
-    if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS,
-                                     RHS))
+    if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
+                                     Q.IIQ, LHS, RHS))
       return C;
   if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
     if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
@@ -3420,7 +3547,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
           Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
               Q.DL.getTypeSizeInBits(CRHS->getType()))
         if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
-                                         CLHS->getPointerOperand(),
+                                         Q.IIQ, CLHS->getPointerOperand(),
                                          CRHS->getPointerOperand()))
           return C;
 
@@ -3491,13 +3618,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Pred == FCmpInst::FCMP_TRUE)
     return getTrue(RetTy);
 
-  // UNO/ORD predicates can be trivially folded if NaNs are ignored.
-  if (FMF.noNaNs()) {
-    if (Pred == FCmpInst::FCMP_UNO)
-      return getFalse(RetTy);
-    if (Pred == FCmpInst::FCMP_ORD)
-      return getTrue(RetTy);
-  }
+  // Fold (un)ordered comparison if we can determine there are no NaNs.
+  if (Pred == FCmpInst::FCMP_UNO || Pred == FCmpInst::FCMP_ORD)
+    if (FMF.noNaNs() ||
+        (isKnownNeverNaN(LHS, Q.TLI) && isKnownNeverNaN(RHS, Q.TLI)))
+      return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
 
   // NaN is unordered; NaN is not ordered.
   assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) &&
@@ -3552,12 +3677,19 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
     if (C->isZero()) {
       switch (Pred) {
+      case FCmpInst::FCMP_OGE:
+        if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
+          return getTrue(RetTy);
+        break;
       case FCmpInst::FCMP_UGE:
         if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
           return getTrue(RetTy);
         break;
+      case FCmpInst::FCMP_ULT:
+        if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
+          return getFalse(RetTy);
+        break;
       case FCmpInst::FCMP_OLT:
-        // X < 0
         if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
           return getFalse(RetTy);
         break;
@@ -3634,11 +3766,10 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
     //
     // We can't replace %sel with %add unless we strip away the flags.
     if (isa<OverflowingBinaryOperator>(B))
-      if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap())
-        return nullptr;
-    if (isa<PossiblyExactOperator>(B))
-      if (B->isExact())
+      if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
         return nullptr;
+    if (isa<PossiblyExactOperator>(B) && Q.IIQ.isExact(B))
+      return nullptr;
 
     if (MaxRecurse) {
       if (B->getOperand(0) == Op)
@@ -3772,6 +3903,28 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
       if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
                                            Pred == ICmpInst::ICMP_EQ))
         return V;
+
+    // Test for zero-shift-guard-ops around funnel shifts. These are used to
+    // avoid UB from oversized shifts in raw IR rotate patterns, but the
+    // intrinsics do not have that problem.
+    Value *ShAmt;
+    auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
+                                                          m_Value(ShAmt)),
+                             m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
+                                                          m_Value(ShAmt)));
+    // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
+    // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
+    // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
+    // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
+    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
+      return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
+
+    // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
+    // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+    // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
+    // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
+    if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
+      return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
   }
 
   // Check for other compares that behave like bit test.
@@ -3809,6 +3962,34 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
   return nullptr;
 }
 
+/// Try to simplify a select instruction when its condition operand is a
+/// floating-point comparison.
+static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) {
+  FCmpInst::Predicate Pred;
+  if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) &&
+      !match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T))))
+    return nullptr;
+
+  // TODO: The transform may not be valid with -0.0. An incomplete way of
+  // testing for that possibility is to check if at least one operand is a
+  // non-zero constant.
+  const APFloat *C;
+  if ((match(T, m_APFloat(C)) && C->isNonZero()) ||
+      (match(F, m_APFloat(C)) && C->isNonZero())) {
+    // (T == F) ? T : F --> F
+    // (F == T) ? T : F --> F
+    if (Pred == FCmpInst::FCMP_OEQ)
+      return F;
+
+    // (T != F) ? T : F --> T
+    // (F != T) ? T : F --> T
+    if (Pred == FCmpInst::FCMP_UNE)
+      return T;
+  }
+
+  return nullptr;
+}
+
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -3845,9 +4026,16 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
           simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
     return V;
 
+  if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal))
+    return V;
+
   if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
     return V;
 
+  Optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
+  if (Imp)
+    return *Imp ? TrueVal : FalseVal;
+
   return nullptr;
 }
 
@@ -4359,6 +4547,14 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                        match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
     return ConstantFP::getNullValue(Op0->getType());
 
+  // (X - Y) + Y --> X
+  // Y + (X - Y) --> X
+  Value *X;
+  if (FMF.noSignedZeros() && FMF.allowReassoc() &&
+      (match(Op0, m_FSub(m_Value(X), m_Specific(Op1))) ||
+       match(Op1, m_FSub(m_Value(X), m_Specific(Op0)))))
+    return X;
+
   return nullptr;
 }
 
@@ -4396,6 +4592,13 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (FMF.noNaNs() && Op0 == Op1)
     return Constant::getNullValue(Op0->getType());
 
+  // Y - (Y - X) --> X
+  // (X + Y) - Y --> X
+  if (FMF.noSignedZeros() && FMF.allowReassoc() &&
+      (match(Op1, m_FSub(m_Specific(Op0), m_Value(X))) ||
+       match(Op0, m_c_FAdd(m_Specific(Op1), m_Value(X)))))
+    return X;
+
   return nullptr;
 }
 
@@ -4476,10 +4679,8 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
     // -X /  X -> -1.0 and
     //  X / -X -> -1.0 are legal when NaNs are ignored.
     // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
-    if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
-         BinaryOperator::getFNegArgument(Op0) == Op1) ||
-        (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
-         BinaryOperator::getFNegArgument(Op1) == Op0))
+    if (match(Op0, m_FNegNSZ(m_Specific(Op1))) ||
+        match(Op1, m_FNegNSZ(m_Specific(Op0))))
       return ConstantFP::get(Op0->getType(), -1.0);
   }
 
@@ -4781,6 +4982,40 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
     if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
       return Constant::getNullValue(ReturnType);
     break;
+  case Intrinsic::uadd_sat:
+    // sat(MAX + X) -> MAX
+    // sat(X + MAX) -> MAX
+    if (match(Op0, m_AllOnes()) || match(Op1, m_AllOnes()))
+      return Constant::getAllOnesValue(ReturnType);
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sadd_sat:
+    // sat(X + undef) -> -1
+    // sat(undef + X) -> -1
+    // For unsigned: Assume undef is MAX, thus we saturate to MAX (-1).
+    // For signed: Assume undef is ~X, in which case X + ~X = -1.
+    if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+      return Constant::getAllOnesValue(ReturnType);
+
+    // X + 0 -> X
+    if (match(Op1, m_Zero()))
+      return Op0;
+    // 0 + X -> X
+    if (match(Op0, m_Zero()))
+      return Op1;
+    break;
+  case Intrinsic::usub_sat:
+    // sat(0 - X) -> 0, sat(X - MAX) -> 0
+    if (match(Op0, m_Zero()) || match(Op1, m_AllOnes()))
+      return Constant::getNullValue(ReturnType);
+    LLVM_FALLTHROUGH;
+  case Intrinsic::ssub_sat:
+    // X - X -> 0, X - undef -> 0, undef - X -> 0
+    if (Op0 == Op1 || match(Op0, m_Undef()) || match(Op1, m_Undef()))
+      return Constant::getNullValue(ReturnType);
+    // X - 0 -> X
+    if (match(Op1, m_Zero()))
+      return Op0;
+    break;
   case Intrinsic::load_relative:
     if (auto *C0 = dyn_cast<Constant>(Op0))
       if (auto *C1 = dyn_cast<Constant>(Op1))
@@ -4798,10 +5033,51 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
     break;
   case Intrinsic::maxnum:
   case Intrinsic::minnum:
-    // If one argument is NaN, return the other argument.
-    if (match(Op0, m_NaN())) return Op1;
-    if (match(Op1, m_NaN())) return Op0;
+  case Intrinsic::maximum:
+  case Intrinsic::minimum: {
+    // If the arguments are the same, this is a no-op.
+    if (Op0 == Op1) return Op0;
+
+    // If one argument is undef, return the other argument.
+    if (match(Op0, m_Undef()))
+      return Op1;
+    if (match(Op1, m_Undef()))
+      return Op0;
+
+    // If one argument is NaN, return other or NaN appropriately.
+    bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
+    if (match(Op0, m_NaN()))
+      return PropagateNaN ? Op0 : Op1;
+    if (match(Op1, m_NaN()))
+      return PropagateNaN ? Op1 : Op0;
+
+    // Min/max of the same operation with common operand:
+    // m(m(X, Y)), X --> m(X, Y) (4 commuted variants)
+    if (auto *M0 = dyn_cast<IntrinsicInst>(Op0))
+      if (M0->getIntrinsicID() == IID &&
+          (M0->getOperand(0) == Op1 || M0->getOperand(1) == Op1))
+        return Op0;
+    if (auto *M1 = dyn_cast<IntrinsicInst>(Op1))
+      if (M1->getIntrinsicID() == IID &&
+          (M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0))
+        return Op1;
+
+    // min(X, -Inf) --> -Inf (and commuted variant)
+    // max(X, +Inf) --> +Inf (and commuted variant)
+    bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum;
+    const APFloat *C;
+    if ((match(Op0, m_APFloat(C)) && C->isInfinity() &&
+         C->isNegative() == UseNegInf) ||
+        (match(Op1, m_APFloat(C)) && C->isInfinity() &&
+         C->isNegative() == UseNegInf))
+      return ConstantFP::getInfinity(ReturnType, UseNegInf);
+
+    // TODO: minnum(nnan x, inf) -> x
+    // TODO: minnum(nnan ninf x, flt_max) -> x
+    // TODO: maxnum(nnan x, -inf) -> x
+    // TODO: maxnum(nnan ninf x, -flt_max) -> x
     break;
+  }
   default:
     break;
   }
@@ -4836,7 +5112,16 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
   }
   case Intrinsic::fshl:
   case Intrinsic::fshr: {
-    Value *ShAmtArg = ArgBegin[2];
+    Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2];
+
+    // If both operands are undef, the result is undef.
+    if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
+      return UndefValue::get(F->getReturnType());
+
+    // If shift amount is undef, assume it is zero.
+    if (match(ShAmtArg, m_Undef()))
+      return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+
     const APInt *ShAmtC;
     if (match(ShAmtArg, m_APInt(ShAmtC))) {
       // If there's effectively no shift, return the 1st arg or 2nd arg.
@@ -4923,18 +5208,20 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
                               I->getFastMathFlags(), Q);
     break;
   case Instruction::Add:
-    Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
-                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+    Result =
+        SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+                        Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+                        Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::FSub:
     Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
                               I->getFastMathFlags(), Q);
     break;
   case Instruction::Sub:
-    Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
-                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+    Result =
+        SimplifySubInst(I->getOperand(0), I->getOperand(1),
+                        Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+                        Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::FMul:
     Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
@@ -4964,17 +5251,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
                               I->getFastMathFlags(), Q);
     break;
   case Instruction::Shl:
-    Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
-                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
-                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+    Result =
+        SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+                        Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+                        Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::LShr:
     Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
-                              cast<BinaryOperator>(I)->isExact(), Q);
+                              Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::AShr:
     Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
-                              cast<BinaryOperator>(I)->isExact(), Q);
+                              Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
     break;
   case Instruction::And:
     Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q);
@@ -5100,7 +5388,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
 
     // Gracefully handle edge cases where the instruction is not wired into any
     // parent block.
-    if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
+    if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
         !I->mayHaveSideEffects())
       I->eraseFromParent();
   } else {
@@ -5129,7 +5417,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
 
     // Gracefully handle edge cases where the instruction is not wired into any
     // parent block.
-    if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
+    if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
         !I->mayHaveSideEffects())
       I->eraseFromParent();
   }
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
index e7751d32aab3..000fe5ddad54 100644
--- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -17,6 +17,7 @@
 #include <queue>
 
 namespace llvm {
+
 template <class NodeTy, bool IsPostDom>
 void IDFCalculator<NodeTy, IsPostDom>::calculate(
     SmallVectorImpl<BasicBlock *> &PHIBlocks) {
@@ -61,29 +62,39 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate(
       BasicBlock *BB = Node->getBlock();
       // Succ is the successor in the direction we are calculating IDF, so it is
       // successor for IDF, and predecessor for Reverse IDF.
-      for (auto *Succ : children<NodeTy>(BB)) {
+      auto DoWork = [&](BasicBlock *Succ) {
         DomTreeNode *SuccNode = DT.getNode(Succ);
 
         // Quickly skip all CFG edges that are also dominator tree edges instead
         // of catching them below.
         if (SuccNode->getIDom() == Node)
-          continue;
+          return;
 
         const unsigned SuccLevel = SuccNode->getLevel();
         if (SuccLevel > RootLevel)
-          continue;
+          return;
 
         if (!VisitedPQ.insert(SuccNode).second)
-          continue;
+          return;
 
         BasicBlock *SuccBB = SuccNode->getBlock();
         if (useLiveIn && !LiveInBlocks->count(SuccBB))
-          continue;
+          return;
 
         PHIBlocks.emplace_back(SuccBB);
         if (!DefBlocks->count(SuccBB))
           PQ.push(std::make_pair(
               SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
+      };
+
+      if (GD) {
+        for (auto Pair : children<
+                 std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>(
+                 {GD, BB}))
+          DoWork(Pair.second);
+      } else {
+        for (auto *Succ : children<NodeTy>(BB))
+          DoWork(Succ);
       }
 
       for (auto DomChild : *Node) {
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index b1d585bfc683..3f22ada803c9 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -619,7 +619,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(
 
   // If the merge range is empty, then adding the edge didn't actually form any
   // new cycles. We're done.
-  if (MergeRange.begin() == MergeRange.end()) {
+  if (empty(MergeRange)) {
     // Now that the SCC structure is finalized, flip the kind to call.
     SourceN->setEdgeKind(TargetN, Edge::Call);
     return false; // No new cycle.
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index ee0148e0d795..110c085d3f35 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -420,6 +421,8 @@ namespace {
                               BasicBlock *BB);
   bool solveBlockValueSelect(ValueLatticeElement &BBLV, SelectInst *S,
                              BasicBlock *BB);
+  Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
+                                             BasicBlock *BB);
   bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
                                BasicBlock *BB);
   bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
@@ -634,8 +637,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
     if (auto *CI = dyn_cast<CastInst>(BBI))
       return solveBlockValueCast(Res, CI, BB);
 
-    BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
-    if (BO && isa<ConstantInt>(BO->getOperand(1)))
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
       return solveBlockValueBinaryOp(Res, BO, BB);
   }
 
@@ -951,6 +953,25 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
   return true;
 }
 
+Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
+                                                              Instruction *I,
+                                                              BasicBlock *BB) {
+  if (!hasBlockValue(I->getOperand(Op), BB))
+    if (pushBlockValue(std::make_pair(BB, I->getOperand(Op))))
+      return None;
+
+  const unsigned OperandBitWidth =
+    DL.getTypeSizeInBits(I->getOperand(Op)->getType());
+  ConstantRange Range = ConstantRange(OperandBitWidth);
+  if (hasBlockValue(I->getOperand(Op), BB)) {
+    ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
+    intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
+    if (Val.isConstantRange())
+      Range = Val.getConstantRange();
+  }
+  return Range;
+}
+
 bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
                                             CastInst *CI,
                                             BasicBlock *BB) {
@@ -981,21 +1002,11 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
   // Figure out the range of the LHS.  If that fails, we still apply the
   // transfer rule on the full set since we may be able to locally infer
   // interesting facts.
-  if (!hasBlockValue(CI->getOperand(0), BB))
-    if (pushBlockValue(std::make_pair(BB, CI->getOperand(0))))
-      // More work to do before applying this transfer rule.
-      return false;
-
-  const unsigned OperandBitWidth =
-    DL.getTypeSizeInBits(CI->getOperand(0)->getType());
-  ConstantRange LHSRange = ConstantRange(OperandBitWidth);
-  if (hasBlockValue(CI->getOperand(0), BB)) {
-    ValueLatticeElement LHSVal = getBlockValue(CI->getOperand(0), BB);
-    intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
-                                                  CI);
-    if (LHSVal.isConstantRange())
-      LHSRange = LHSVal.getConstantRange();
-  }
+  Optional<ConstantRange> LHSRes = getRangeForOperand(0, CI, BB);
+  if (!LHSRes.hasValue())
+    // More work to do before applying this transfer rule.
+    return false;
+  ConstantRange LHSRange = LHSRes.getValue();
 
   const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
 
@@ -1037,27 +1048,19 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
     return true;
   };
 
-  // Figure out the range of the LHS.  If that fails, use a conservative range,
-  // but apply the transfer rule anyways.  This lets us pick up facts from
-  // expressions like "and i32 (call i32 @foo()), 32"
-  if (!hasBlockValue(BO->getOperand(0), BB))
-    if (pushBlockValue(std::make_pair(BB, BO->getOperand(0))))
-      // More work to do before applying this transfer rule.
-      return false;
+  // Figure out the ranges of the operands.  If that fails, use a
+  // conservative range, but apply the transfer rule anyways.  This
+  // lets us pick up facts from expressions like "and i32 (call i32
+  // @foo()), 32"
+  Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
+  Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
 
-  const unsigned OperandBitWidth =
-    DL.getTypeSizeInBits(BO->getOperand(0)->getType());
-  ConstantRange LHSRange = ConstantRange(OperandBitWidth);
-  if (hasBlockValue(BO->getOperand(0), BB)) {
-    ValueLatticeElement LHSVal = getBlockValue(BO->getOperand(0), BB);
-    intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
-                                                  BO);
-    if (LHSVal.isConstantRange())
-      LHSRange = LHSVal.getConstantRange();
-  }
+  if (!LHSRes.hasValue() || !RHSRes.hasValue())
+    // More work to do before applying this transfer rule.
+    return false;
 
-  ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1));
-  ConstantRange RHSRange = ConstantRange(RHS->getValue());
+  ConstantRange LHSRange = LHSRes.getValue();
+  ConstantRange RHSRange = RHSRes.getValue();
 
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
diff --git a/contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
new file mode 100644
index 000000000000..5540859ebdda
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -0,0 +1,391 @@
+//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
+//Implementation -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+//   Divergence Analysis
+//   Sampaio, Souza, Collange, Pereira
+//   TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+//   %cond = icmp slt i32 %tid, 10
+//   br i1 %cond, label %then, label %else
+// then:
+//   br label %merge
+// else:
+//   br label %merge
+// merge:
+//   %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+//    non-kernel-entry function and the return value of a function call as
+//    divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+//    generic or local address as divergent. This can be improved by leveraging
+//    pointer analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+// transparently use the GPUDivergenceAnalysis
+static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
+                              cl::Hidden,
+                              cl::desc("turn the LegacyDivergenceAnalysis into "
+                                       "a wrapper for GPUDivergenceAnalysis"));
+
+namespace {
+
+class DivergencePropagator {
+public:
+  DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+  void populateWithSourcesOfDivergence();
+  void propagate();
+
+private:
+  // A helper function that explores data dependents of V.
+  void exploreDataDependency(Value *V);
+  // A helper function that explores sync dependents of TI.
+  void exploreSyncDependency(Instruction *TI);
+  // Computes the influence region from Start to End. This region includes all
+  // basic blocks on any simple path from Start to End.
+  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+                              DenseSet<BasicBlock *> &InfluenceRegion);
+  // Finds all users of I that are outside the influence region, and add these
+  // users to Worklist.
+  void findUsersOutsideInfluenceRegion(
+      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+  Function &F;
+  TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  PostDominatorTree &PDT;
+  std::vector<Value *> Worklist; // Stack for DFS.
+  DenseSet<const Value *> &DV;   // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+  Worklist.clear();
+  DV.clear();
+  for (auto &I : instructions(F)) {
+    if (TTI.isSourceOfDivergence(&I)) {
+      Worklist.push_back(&I);
+      DV.insert(&I);
+    }
+  }
+  for (auto &Arg : F.args()) {
+    if (TTI.isSourceOfDivergence(&Arg)) {
+      Worklist.push_back(&Arg);
+      DV.insert(&Arg);
+    }
+  }
+}
+
+void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
+  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+  // immediate post dominator are divergent. This rule handles if-then-else
+  // patterns. For example,
+  //
+  // if (tid < 5)
+  //   a1 = 1;
+  // else
+  //   a2 = 2;
+  // a = phi(a1, a2); // sync dependent on (tid < 5)
+  BasicBlock *ThisBB = TI->getParent();
+
+  // Unreachable blocks may not be in the dominator tree.
+  if (!DT.isReachableFromEntry(ThisBB))
+    return;
+
+  // If the function has no exit blocks or doesn't reach any exit blocks, the
+  // post dominator may be null.
+  DomTreeNode *ThisNode = PDT.getNode(ThisBB);
+  if (!ThisNode)
+    return;
+
+  BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
+  if (IPostDom == nullptr)
+    return;
+
+  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+    // A PHINode is uniform if it returns the same value no matter which path is
+    // taken.
+    if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
+      Worklist.push_back(&*I);
+  }
+
+  // Propagation rule 2: if a value defined in a loop is used outside, the user
+  // is sync dependent on the condition of the loop exits that dominate the
+  // user. For example,
+  //
+  // int i = 0;
+  // do {
+  //   i++;
+  //   if (foo(i)) ... // uniform
+  // } while (i < tid);
+  // if (bar(i)) ...   // divergent
+  //
+  // A program may contain unstructured loops. Therefore, we cannot leverage
+  // LoopInfo, which only recognizes natural loops.
+  //
+  // The algorithm used here handles both natural and unstructured loops.  Given
+  // a branch TI, we first compute its influence region, the union of all simple
+  // paths from TI to its immediate post dominator (IPostDom). Then, we search
+  // for all the values defined in the influence region but used outside. All
+  // these users are sync dependent on TI.
+  DenseSet<BasicBlock *> InfluenceRegion;
+  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+  // An insight that can speed up the search process is that all the in-region
+  // values that are used outside must dominate TI. Therefore, instead of
+  // searching every basic blocks in the influence region, we search all the
+  // dominators of TI until it is outside the influence region.
+  BasicBlock *InfluencedBB = ThisBB;
+  while (InfluenceRegion.count(InfluencedBB)) {
+    for (auto &I : *InfluencedBB)
+      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+    if (IDomNode == nullptr)
+      break;
+    InfluencedBB = IDomNode->getBlock();
+  }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+    Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+  for (User *U : I.users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (!InfluenceRegion.count(UserInst->getParent())) {
+      if (DV.insert(UserInst).second)
+        Worklist.push_back(UserInst);
+    }
+  }
+}
+
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+                               DenseSet<BasicBlock *> &InfluenceRegion,
+                               std::vector<BasicBlock *> &InfluenceStack) {
+  for (BasicBlock *Succ : successors(ThisBB)) {
+    if (Succ != End && InfluenceRegion.insert(Succ).second)
+      InfluenceStack.push_back(Succ);
+  }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+    BasicBlock *Start, BasicBlock *End,
+    DenseSet<BasicBlock *> &InfluenceRegion) {
+  assert(PDT.properlyDominates(End, Start) &&
+         "End does not properly dominate Start");
+
+  // The influence region starts from the end of "Start" to the beginning of
+  // "End". Therefore, "Start" should not be in the region unless "Start" is in
+  // a loop that doesn't contain "End".
+  std::vector<BasicBlock *> InfluenceStack;
+  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
+  while (!InfluenceStack.empty()) {
+    BasicBlock *BB = InfluenceStack.back();
+    InfluenceStack.pop_back();
+    addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+  }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+  // Follow def-use chains of V.
+  for (User *U : V->users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
+      Worklist.push_back(UserInst);
+  }
+}
+
+void DivergencePropagator::propagate() {
+  // Traverse the dependency graph using DFS.
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      // Terminators with less than two successors won't introduce sync
+      // dependency. Ignore them.
+      if (I->isTerminator() && I->getNumSuccessors() > 1)
+        exploreSyncDependency(I);
+    }
+    exploreDataDependency(V);
+  }
+}
+
+} // namespace
+
+// Register this pass.
+char LegacyDivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
+                      "Legacy Divergence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence",
+                    "Legacy Divergence Analysis", false, true)
+
+FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
+  return new LegacyDivergenceAnalysis();
+}
+
+void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<PostDominatorTreeWrapperPass>();
+  if (UseGPUDA)
+    AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
+    const Function &F) const {
+  if (!UseGPUDA)
+    return false;
+
+  // GPUDivergenceAnalysis requires a reducible CFG.
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+  RPOTraversal FuncRPOT(&F);
+  return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+                                 const LoopInfo>(FuncRPOT, LI);
+}
+
+bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
+  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  if (TTIWP == nullptr)
+    return false;
+
+  TargetTransformInfo &TTI = TTIWP->getTTI(F);
+  // Fast path: if the target does not have branch divergence, we do not mark
+  // any branch as divergent.
+  if (!TTI.hasBranchDivergence())
+    return false;
+
+  DivergentValues.clear();
+  gpuDA = nullptr;
+
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+
+  if (shouldUseGPUDivergenceAnalysis(F)) {
+    // run the new GPU divergence analysis
+    auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+
+  } else {
+    // run LLVM's existing DivergenceAnalysis
+    DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+    DP.populateWithSourcesOfDivergence();
+    DP.propagate();
+  }
+
+  LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
+                    << ":\n";
+             print(dbgs(), F.getParent()));
+
+  return false;
+}
+
+bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
+  if (gpuDA) {
+    return gpuDA->isDivergent(*V);
+  }
+  return DivergentValues.count(V);
+}
+
+void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+  if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
+    return;
+
+  const Function *F = nullptr;
+  if (!DivergentValues.empty()) {
+    const Value *FirstDivergentValue = *DivergentValues.begin();
+    if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+      F = Arg->getParent();
+    } else if (const Instruction *I =
+                   dyn_cast<Instruction>(FirstDivergentValue)) {
+      F = I->getParent()->getParent();
+    } else {
+      llvm_unreachable("Only arguments and instructions can be divergent");
+    }
+  } else if (gpuDA) {
+    F = &gpuDA->getFunction();
+  }
+  if (!F)
+    return;
+
+  // Dumps all divergent values in F, arguments and then instructions.
+  for (auto &Arg : F->args()) {
+    OS << (isDivergent(&Arg) ? "DIVERGENT: " : "           ");
+    OS << Arg << "\n";
+  }
+  // Iterate instructions using instructions() to ensure a deterministic order.
+  for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
+    auto &BB = *BI;
+    OS << "\n           " << BB.getName() << ":\n";
+    for (auto &I : BB.instructionsWithoutDebug()) {
+      OS << (isDivergent(&I) ? "DIVERGENT:     " : "               ");
+      OS << I << "\n";
+    }
+  }
+  OS << "\n";
+}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index db919bd233bf..5d0a627f8426 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -330,12 +330,12 @@ void Lint::visitCallSite(CallSite CS) {
       // Check that the memcpy arguments don't overlap. The AliasAnalysis API
       // isn't expressive enough for what we really want to do. Known partial
       // overlap is not distinguished from the case where nothing is known.
-      uint64_t Size = 0;
+      auto Size = LocationSize::unknown();
       if (const ConstantInt *Len =
               dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                               /*OffsetOk=*/false)))
         if (Len->getValue().isIntN(32))
-          Size = Len->getValue().getZExtValue();
+          Size = LocationSize::precise(Len->getValue().getZExtValue());
       Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
                  MustAlias,
              "Undefined behavior: memcpy source and destination overlap", &I);
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index d319d4c249d3..8129795bc0c1 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -107,8 +107,8 @@ static bool isDereferenceableAndAlignedPointer(
     return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size,
                                               DL, CtxI, DT, Visited);
 
-  if (auto CS = ImmutableCallSite(V))
-    if (auto *RP = getArgumentAliasingToReturnedPointer(CS))
+  if (const auto *Call = dyn_cast<CallBase>(V))
+    if (auto *RP = getArgumentAliasingToReturnedPointer(Call))
       return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
                                                 Visited);
 
@@ -345,7 +345,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
   const DataLayout &DL = ScanBB->getModule()->getDataLayout();
 
   // Try to get the store size for the type.
-  uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
+  auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
 
   Value *StrippedPtr = Ptr->stripPointerCasts();
 
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index a24d66011b8d..7f3480f512ab 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -342,7 +342,7 @@ void RuntimePointerChecking::groupChecks(
   //
   // The above case requires that we have an UnknownDependence between
   // accesses to the same underlying object. This cannot happen unless
-  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
   // is also false. In this case we will use the fallback path and create
   // separate checking groups for all pointers.
 
@@ -420,7 +420,7 @@ void RuntimePointerChecking::groupChecks(
 
     // We've computed the grouped checks for this partition.
     // Save the results and continue with the next one.
-    std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+    llvm::copy(Groups, std::back_inserter(CheckingGroups));
   }
 }
 
@@ -509,7 +509,7 @@ public:
   /// Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+    AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
     Accesses.insert(MemAccessInfo(Ptr, false));
     if (IsReadOnly)
       ReadOnlyPtr.insert(Ptr);
@@ -518,7 +518,7 @@ public:
   /// Register a store.
   void addStore(MemoryLocation &Loc) {
     Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+    AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
     Accesses.insert(MemAccessInfo(Ptr, true));
   }
 
@@ -556,7 +556,7 @@ public:
   /// perform dependency checking.
   ///
   /// Note that this can later be cleared if we retry memcheck analysis without
-  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
+  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
 
   /// We decided that no dependence analysis would be used.  Reset the state.
@@ -604,8 +604,8 @@ private:
   ///
   /// Note that, this is different from isDependencyCheckNeeded.  When we retry
   /// memcheck analysis without dependency checking
-  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
-  /// while this remains set if we have potentially dependent accesses.
+  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
+  /// cleared while this remains set if we have potentially dependent accesses.
   bool IsRTCheckAnalysisNeeded;
 
   /// The SCEV predicate containing all the SCEV-related assumptions.
@@ -1221,18 +1221,20 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
   return X == PtrSCEVB;
 }
 
-bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+MemoryDepChecker::VectorizationSafetyStatus
+MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
   switch (Type) {
   case NoDep:
   case Forward:
   case BackwardVectorizable:
-    return true;
+    return VectorizationSafetyStatus::Safe;
 
   case Unknown:
+    return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
   case ForwardButPreventsForwarding:
   case Backward:
   case BackwardVectorizableButPreventsForwarding:
-    return false;
+    return VectorizationSafetyStatus::Unsafe;
   }
   llvm_unreachable("unexpected DepType!");
 }
@@ -1317,6 +1319,11 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
   return false;
 }
 
+void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
+  if (Status < S)
+    Status = S;
+}
+
 /// Given a non-constant (unknown) dependence-distance \p Dist between two
 /// memory accesses, that have the same stride whose absolute value is given
 /// in \p Stride, and that have the same type size \p TypeByteSize,
@@ -1485,7 +1492,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
       return Dependence::NoDep;
 
     LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
-    ShouldRetryWithRuntimeCheck = true;
+    FoundNonConstantDistanceDependence = true;
     return Dependence::Unknown;
   }
 
@@ -1652,7 +1659,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
 
             Dependence::DepType Type =
                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
-            SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+            mergeInStatus(Dependence::isSafeForVectorization(Type));
 
             // Gather dependences unless we accumulated MaxDependences
             // dependences.  In that case return as soon as we find the first
@@ -1669,7 +1676,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
                            << "Too many dependences, stopped recording\n");
               }
             }
-            if (!RecordDependences && !SafeForVectorization)
+            if (!RecordDependences && !isSafeForVectorization())
               return false;
           }
         ++OI;
@@ -1679,7 +1686,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
   }
 
   LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
-  return SafeForVectorization;
+  return isSafeForVectorization();
 }
 
 SmallVector<Instruction *, 4>
@@ -1862,10 +1869,17 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
   // writes and between reads and writes, but not between reads and reads.
   ValueSet Seen;
 
+  // Record uniform store addresses to identify if we have multiple stores
+  // to the same address.
+  ValueSet UniformStores;
+
   for (StoreInst *ST : Stores) {
     Value *Ptr = ST->getPointerOperand();
-    // Check for store to loop invariant address.
-    StoreToLoopInvariantAddress |= isUniform(Ptr);
+
+    if (isUniform(Ptr))
+      HasDependenceInvolvingLoopInvariantAddress |=
+          !UniformStores.insert(Ptr).second;
+
     // If we did *not* see this pointer before, insert it to  the read-write
     // list. At this phase it is only a 'write' list.
     if (Seen.insert(Ptr).second) {
@@ -1907,6 +1921,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
       IsReadOnlyPtr = true;
     }
 
+    // See if there is an unsafe dependency between a load to a uniform address and
+    // store to the same uniform address.
+    if (UniformStores.count(Ptr)) {
+      LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
+                           "load and uniform store to the same address!\n");
+      HasDependenceInvolvingLoopInvariantAddress = true;
+    }
+
     MemoryLocation Loc = MemoryLocation::get(LD);
     // The TBAA metadata could have a control dependency on the predication
     // condition, so we cannot rely on it when determining whether or not we
@@ -2265,7 +2287,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
       PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
       DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
       NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
-      StoreToLoopInvariantAddress(false) {
+      HasDependenceInvolvingLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
     analyzeLoop(AA, LI, TLI, DT);
 }
@@ -2297,8 +2319,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   PtrRtChecking->print(OS, Depth);
   OS << "\n";
 
-  OS.indent(Depth) << "Store to invariant address was "
-                   << (StoreToLoopInvariantAddress ? "" : "not ")
+  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
+                   << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
                    << "found in loop.\n";
 
   OS.indent(Depth) << "SCEV assumptions:\n";
diff --git a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
index 074023a7e1e2..2a3b29d7fbca 100644
--- a/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -147,8 +147,8 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
   PA.preserve<LoopAnalysis>();
   PA.preserve<LoopAnalysisManagerFunctionProxy>();
   PA.preserve<ScalarEvolutionAnalysis>();
-  // FIXME: Uncomment this when all loop passes preserve MemorySSA
-  // PA.preserve<MemorySSAAnalysis>();
+  if (EnableMSSALoopDependency)
+    PA.preserve<MemorySSAAnalysis>();
   // FIXME: What we really want to do here is preserve an AA category, but that
   // concept doesn't exist yet.
   PA.preserve<AAManager>();
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 3f78456b3586..ef2b1257015c 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
@@ -213,33 +214,21 @@ bool Loop::isSafeToClone() const {
 
 MDNode *Loop::getLoopID() const {
   MDNode *LoopID = nullptr;
-  if (BasicBlock *Latch = getLoopLatch()) {
-    LoopID = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
-  } else {
-    assert(!getLoopLatch() &&
-           "The loop should have no single latch at this point");
-    // Go through each predecessor of the loop header and check the
-    // terminator for the metadata.
-    BasicBlock *H = getHeader();
-    for (BasicBlock *BB : this->blocks()) {
-      TerminatorInst *TI = BB->getTerminator();
-      MDNode *MD = nullptr;
-
-      // Check if this terminator branches to the loop header.
-      for (BasicBlock *Successor : TI->successors()) {
-        if (Successor == H) {
-          MD = TI->getMetadata(LLVMContext::MD_loop);
-          break;
-        }
-      }
-      if (!MD)
-        return nullptr;
 
-      if (!LoopID)
-        LoopID = MD;
-      else if (MD != LoopID)
-        return nullptr;
-    }
+  // Go through the latch blocks and check the terminator for the metadata.
+  SmallVector<BasicBlock *, 4> LatchesBlocks;
+  getLoopLatches(LatchesBlocks);
+  for (BasicBlock *BB : LatchesBlocks) {
+    Instruction *TI = BB->getTerminator();
+    MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+
+    if (!MD)
+      return nullptr;
+
+    if (!LoopID)
+      LoopID = MD;
+    else if (MD != LoopID)
+      return nullptr;
   }
   if (!LoopID || LoopID->getNumOperands() == 0 ||
       LoopID->getOperand(0) != LoopID)
@@ -248,23 +237,19 @@ MDNode *Loop::getLoopID() const {
 }
 
 void Loop::setLoopID(MDNode *LoopID) const {
-  assert(LoopID && "Loop ID should not be null");
-  assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
-  assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
+  assert((!LoopID || LoopID->getNumOperands() > 0) &&
+         "Loop ID needs at least one operand");
+  assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
+         "Loop ID should refer to itself");
 
-  if (BasicBlock *Latch = getLoopLatch()) {
-    Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
-    return;
-  }
-
-  assert(!getLoopLatch() &&
-         "The loop should have no single latch at this point");
   BasicBlock *H = getHeader();
   for (BasicBlock *BB : this->blocks()) {
-    TerminatorInst *TI = BB->getTerminator();
-    for (BasicBlock *Successor : TI->successors()) {
-      if (Successor == H)
+    Instruction *TI = BB->getTerminator();
+    for (BasicBlock *Successor : successors(TI)) {
+      if (Successor == H) {
         TI->setMetadata(LLVMContext::MD_loop, LoopID);
+        break;
+      }
     }
   }
 }
@@ -308,16 +293,50 @@ bool Loop::isAnnotatedParallel() const {
   if (!DesiredLoopIdMetadata)
     return false;
 
+  MDNode *ParallelAccesses =
+      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+  SmallPtrSet<MDNode *, 4>
+      ParallelAccessGroups; // For scalable 'contains' check.
+  if (ParallelAccesses) {
+    for (const MDOperand &MD : drop_begin(ParallelAccesses->operands(), 1)) {
+      MDNode *AccGroup = cast<MDNode>(MD.get());
+      assert(isValidAsAccessGroup(AccGroup) &&
+             "List item must be an access group");
+      ParallelAccessGroups.insert(AccGroup);
+    }
+  }
+
   // The loop branch contains the parallel loop metadata. In order to ensure
   // that any parallel-loop-unaware optimization pass hasn't added loop-carried
   // dependencies (thus converted the loop back to a sequential loop), check
-  // that all the memory instructions in the loop contain parallelism metadata
-  // that point to the same unique "loop id metadata" the loop branch does.
+  // that all the memory instructions in the loop belong to an access group that
+  // is parallel to this loop.
   for (BasicBlock *BB : this->blocks()) {
     for (Instruction &I : *BB) {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
+        auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+          if (AG->getNumOperands() == 0) {
+            assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+            return ParallelAccessGroups.count(AG);
+          }
+
+          for (const MDOperand &AccessListItem : AG->operands()) {
+            MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+            assert(isValidAsAccessGroup(AccGroup) &&
+                   "List item must be an access group");
+            if (ParallelAccessGroups.count(AccGroup))
+              return true;
+          }
+          return false;
+        };
+
+        if (ContainsAccessGroup(AccessGroup))
+          continue;
+      }
+
       // The memory instruction can refer to the loop identifier metadata
       // directly or indirectly through another list metadata (in case of
       // nested parallel loops). The loop identifier metadata refers to
@@ -708,6 +727,40 @@ void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) {
   }
 }
 
+MDNode *llvm::findOptionMDForLoopID(MDNode *LoopID, StringRef Name) {
+  // No loop metadata node, no loop properties.
+  if (!LoopID)
+    return nullptr;
+
+  // First operand should refer to the metadata node itself, for legacy reasons.
+  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+  // Iterate over the metdata node operands and look for MDString metadata.
+  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+    if (!MD || MD->getNumOperands() < 1)
+      continue;
+    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+    if (!S)
+      continue;
+    // Return the operand node if MDString holds expected metadata.
+    if (Name.equals(S->getString()))
+      return MD;
+  }
+
+  // Loop property not found.
+  return nullptr;
+}
+
+MDNode *llvm::findOptionMDForLoop(const Loop *TheLoop, StringRef Name) {
+  return findOptionMDForLoopID(TheLoop->getLoopID(), Name);
+}
+
+bool llvm::isValidAsAccessGroup(MDNode *Node) {
+  return Node->getNumOperands() == 0 && Node->isDistinct();
+}
+
 //===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 07a151ce0fce..a68f114b83a0 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OptBisect.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassTimingInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
@@ -193,8 +194,14 @@ bool LPPassManager::runOnFunction(Function &F) {
   }
 
   // Walk Loops
-  unsigned InstrCount = 0;
+  unsigned InstrCount, FunctionSize = 0;
+  StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
   bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+  // Collect the initial size of the module and the function we're looking at.
+  if (EmitICRemark) {
+    InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+    FunctionSize = F.getInstructionCount();
+  }
   while (!LQ.empty()) {
     CurrentLoopDeleted = false;
     CurrentLoop = LQ.back();
@@ -209,17 +216,28 @@ bool LPPassManager::runOnFunction(Function &F) {
 
       initializeAnalysisImpl(P);
 
+      bool LocalChanged = false;
       {
         PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
         TimeRegion PassTimer(getPassTimer(P));
-        if (EmitICRemark)
-          InstrCount = initSizeRemarkInfo(M);
-        Changed |= P->runOnLoop(CurrentLoop, *this);
-        if (EmitICRemark)
-          emitInstrCountChangedRemark(P, M, InstrCount);
+        LocalChanged = P->runOnLoop(CurrentLoop, *this);
+        Changed |= LocalChanged;
+        if (EmitICRemark) {
+          unsigned NewSize = F.getInstructionCount();
+          // Update the size of the function, emit a remark, and update the
+          // size of the module.
+          if (NewSize != FunctionSize) {
+            int64_t Delta = static_cast<int64_t>(NewSize) -
+                            static_cast<int64_t>(FunctionSize);
+            emitInstrCountChangedRemark(P, M, Delta, InstrCount,
+                                        FunctionToInstrCount, &F);
+            InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+            FunctionSize = NewSize;
+          }
+        }
       }
 
-      if (Changed)
+      if (LocalChanged)
         dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
                      CurrentLoopDeleted ? "<deleted loop>"
                                         : CurrentLoop->getName());
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index 5a6bbd7b2ac6..907b321b231a 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -13,7 +13,6 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -106,9 +105,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
     if (!Res.isNonLocal()) {
       Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
                                        static_cast<BasicBlock *>(nullptr)));
-    } else if (auto CS = CallSite(Inst)) {
+    } else if (auto *Call = dyn_cast<CallBase>(Inst)) {
       const MemoryDependenceResults::NonLocalDepInfo &NLDI =
-        MDA.getNonLocalCallDependency(CS);
+          MDA.getNonLocalCallDependency(Call);
 
       DepSet &InstDeps = Deps[Inst];
       for (const NonLocalDepEntry &I : NLDI) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index feae53c54ecb..e22182b99e11 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -31,7 +31,6 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -182,8 +181,8 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
 }
 
 /// Private helper for finding the local dependencies of a call site.
-MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
-    CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
+MemDepResult MemoryDependenceResults::getCallDependencyFrom(
+    CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
     BasicBlock *BB) {
   unsigned Limit = BlockScanLimit;
 
@@ -205,21 +204,21 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
     ModRefInfo MR = GetLocation(Inst, Loc, TLI);
     if (Loc.Ptr) {
       // A simple instruction.
-      if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
+      if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
         return MemDepResult::getClobber(Inst);
       continue;
     }
 
-    if (auto InstCS = CallSite(Inst)) {
+    if (auto *CallB = dyn_cast<CallBase>(Inst)) {
       // If these two calls do not interfere, look past it.
-      if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
-        // If the two calls are the same, return InstCS as a Def, so that
-        // CS can be found redundant and eliminated.
+      if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
+        // If the two calls are the same, return Inst as a Def, so that
+        // Call can be found redundant and eliminated.
         if (isReadOnlyCall && !isModSet(MR) &&
-            CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+            Call->isIdenticalToWhenDefined(CallB))
           return MemDepResult::getDef(Inst);
 
-        // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+        // Otherwise if the two calls don't interact (e.g. CallB is readnone)
         // keep scanning.
         continue;
       } else
@@ -750,11 +749,10 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
 
       LocalCache = getPointerDependencyFrom(
           MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
-    } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
-      CallSite QueryCS(QueryInst);
-      bool isReadOnly = AA.onlyReadsMemory(QueryCS);
-      LocalCache = getCallSiteDependencyFrom(
-          QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
+    } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
+      bool isReadOnly = AA.onlyReadsMemory(QueryCall);
+      LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
+                                         ScanPos->getIterator(), QueryParent);
     } else
       // Non-memory instruction.
       LocalCache = MemDepResult::getUnknown();
@@ -780,11 +778,11 @@ static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
 #endif
 
 const MemoryDependenceResults::NonLocalDepInfo &
-MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
-  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
+  assert(getDependency(QueryCall).isNonLocal() &&
          "getNonLocalCallDependency should only be used on calls with "
          "non-local deps!");
-  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+  PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
   NonLocalDepInfo &Cache = CacheP.first;
 
   // This is the set of blocks that need to be recomputed.  In the cached case,
@@ -807,21 +805,21 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
         DirtyBlocks.push_back(Entry.getBB());
 
     // Sort the cache so that we can do fast binary search lookups below.
-    llvm::sort(Cache.begin(), Cache.end());
+    llvm::sort(Cache);
 
     ++NumCacheDirtyNonLocal;
     // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
     //     << Cache.size() << " cached: " << *QueryInst;
   } else {
     // Seed DirtyBlocks with each of the preds of QueryInst's block.
-    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+    BasicBlock *QueryBB = QueryCall->getParent();
     for (BasicBlock *Pred : PredCache.get(QueryBB))
       DirtyBlocks.push_back(Pred);
     ++NumUncacheNonLocal;
   }
 
   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
-  bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
+  bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
 
   SmallPtrSet<BasicBlock *, 32> Visited;
 
@@ -865,8 +863,8 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
       if (Instruction *Inst = ExistingResult->getResult().getInst()) {
         ScanPos = Inst->getIterator();
         // We're removing QueryInst's use of Inst.
-        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
-                             QueryCS.getInstruction());
+        RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
+                                            QueryCall);
       }
     }
 
@@ -874,8 +872,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
     MemDepResult Dep;
 
     if (ScanPos != DirtyBB->begin()) {
-      Dep =
-          getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
+      Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
       // No dependence found.  If this is the entry block of the function, it is
       // a clobber, otherwise it is unknown.
@@ -897,7 +894,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
       // Keep the ReverseNonLocalDeps map up to date so we can efficiently
       // update this when we remove instructions.
       if (Instruction *Inst = Dep.getInst())
-        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+        ReverseNonLocalDeps[Inst].insert(QueryCall);
     } else {
 
       // If the block *is* completely transparent to the load, we need to check
@@ -1070,7 +1067,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
     break;
   default:
     // Added many values, do a full scale sort.
-    llvm::sort(Cache.begin(), Cache.end());
+    llvm::sort(Cache);
     break;
   }
 }
@@ -1113,21 +1110,36 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
   // If we already have a cache entry for this CacheKey, we may need to do some
   // work to reconcile the cache entry and the current query.
   if (!Pair.second) {
-    if (CacheInfo->Size < Loc.Size) {
-      // The query's Size is greater than the cached one. Throw out the
-      // cached data and proceed with the query at the greater size.
-      CacheInfo->Pair = BBSkipFirstBlockPair();
-      CacheInfo->Size = Loc.Size;
-      for (auto &Entry : CacheInfo->NonLocalDeps)
-        if (Instruction *Inst = Entry.getResult().getInst())
-          RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
-      CacheInfo->NonLocalDeps.clear();
-    } else if (CacheInfo->Size > Loc.Size) {
-      // This query's Size is less than the cached one. Conservatively restart
-      // the query using the greater size.
-      return getNonLocalPointerDepFromBB(
-          QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
-          StartBB, Result, Visited, SkipFirstBlock);
+    if (CacheInfo->Size != Loc.Size) {
+      bool ThrowOutEverything;
+      if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
+        // FIXME: We may be able to do better in the face of results with mixed
+        // precision. We don't appear to get them in practice, though, so just
+        // be conservative.
+        ThrowOutEverything =
+            CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
+            CacheInfo->Size.getValue() < Loc.Size.getValue();
+      } else {
+        // For our purposes, unknown size > all others.
+        ThrowOutEverything = !Loc.Size.hasValue();
+      }
+
+      if (ThrowOutEverything) {
+        // The query's Size is greater than the cached one. Throw out the
+        // cached data and proceed with the query at the greater size.
+        CacheInfo->Pair = BBSkipFirstBlockPair();
+        CacheInfo->Size = Loc.Size;
+        for (auto &Entry : CacheInfo->NonLocalDeps)
+          if (Instruction *Inst = Entry.getResult().getInst())
+            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+        CacheInfo->NonLocalDeps.clear();
+      } else {
+        // This query's Size is less than the cached one. Conservatively restart
+        // the query using the greater size.
+        return getNonLocalPointerDepFromBB(
+            QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
+            StartBB, Result, Visited, SkipFirstBlock);
+      }
     }
 
     // If the query's AATags are inconsistent with the cached one,
@@ -1572,7 +1584,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   if (ReverseDepIt != ReverseLocalDeps.end()) {
     // RemInst can't be the terminator if it has local stuff depending on it.
-    assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
+    assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
            "Nothing can locally depend on a terminator");
 
     for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
@@ -1662,7 +1674,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
 
       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
       // subsequent value may invalidate the sortedness.
-      llvm::sort(NLPDI.begin(), NLPDI.end());
+      llvm::sort(NLPDI);
     }
 
     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
diff --git a/contrib/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
index 55924db284ec..27e8d72b8e89 100644
--- a/contrib/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryLocation.cpp
@@ -18,13 +18,28 @@
 #include "llvm/IR/Type.h"
 using namespace llvm;
 
+void LocationSize::print(raw_ostream &OS) const {
+  OS << "LocationSize::";
+  if (*this == unknown())
+    OS << "unknown";
+  else if (*this == mapEmpty())
+    OS << "mapEmpty";
+  else if (*this == mapTombstone())
+    OS << "mapTombstone";
+  else if (isPrecise())
+    OS << "precise(" << getValue() << ')';
+  else
+    OS << "upperBound(" << getValue() << ')';
+}
+
 MemoryLocation MemoryLocation::get(const LoadInst *LI) {
   AAMDNodes AATags;
   LI->getAAMetadata(AATags);
   const auto &DL = LI->getModule()->getDataLayout();
 
-  return MemoryLocation(LI->getPointerOperand(),
-                        DL.getTypeStoreSize(LI->getType()), AATags);
+  return MemoryLocation(
+      LI->getPointerOperand(),
+      LocationSize::precise(DL.getTypeStoreSize(LI->getType())), AATags);
 }
 
 MemoryLocation MemoryLocation::get(const StoreInst *SI) {
@@ -33,7 +48,8 @@ MemoryLocation MemoryLocation::get(const StoreInst *SI) {
   const auto &DL = SI->getModule()->getDataLayout();
 
   return MemoryLocation(SI->getPointerOperand(),
-                        DL.getTypeStoreSize(SI->getValueOperand()->getType()),
+                        LocationSize::precise(DL.getTypeStoreSize(
+                            SI->getValueOperand()->getType())),
                         AATags);
 }
 
@@ -41,7 +57,8 @@ MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
   AAMDNodes AATags;
   VI->getAAMetadata(AATags);
 
-  return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags);
+  return MemoryLocation(VI->getPointerOperand(), LocationSize::unknown(),
+                        AATags);
 }
 
 MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
@@ -49,9 +66,10 @@ MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
   CXI->getAAMetadata(AATags);
   const auto &DL = CXI->getModule()->getDataLayout();
 
-  return MemoryLocation(
-      CXI->getPointerOperand(),
-      DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags);
+  return MemoryLocation(CXI->getPointerOperand(),
+                        LocationSize::precise(DL.getTypeStoreSize(
+                            CXI->getCompareOperand()->getType())),
+                        AATags);
 }
 
 MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
@@ -60,7 +78,8 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
   const auto &DL = RMWI->getModule()->getDataLayout();
 
   return MemoryLocation(RMWI->getPointerOperand(),
-                        DL.getTypeStoreSize(RMWI->getValOperand()->getType()),
+                        LocationSize::precise(DL.getTypeStoreSize(
+                            RMWI->getValOperand()->getType())),
                         AATags);
 }
 
@@ -73,9 +92,9 @@ MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
 }
 
 MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
-  uint64_t Size = UnknownSize;
+  auto Size = LocationSize::unknown();
   if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
-    Size = C->getValue().getZExtValue();
+    Size = LocationSize::precise(C->getValue().getZExtValue());
 
   // memcpy/memmove can have AA tags. For memcpy, they apply
   // to both the source and the destination.
@@ -94,9 +113,9 @@ MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
 }
 
 MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
-  uint64_t Size = UnknownSize;
+  auto Size = LocationSize::unknown();
   if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
-    Size = C->getValue().getZExtValue();
+    Size = LocationSize::precise(C->getValue().getZExtValue());
 
   // memcpy/memmove can have AA tags. For memcpy, they apply
   // to both the source and the destination.
@@ -106,15 +125,15 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
   return MemoryLocation(MI->getRawDest(), Size, AATags);
 }
 
-MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
+MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
                                               unsigned ArgIdx,
-                                              const TargetLibraryInfo &TLI) {
+                                              const TargetLibraryInfo *TLI) {
   AAMDNodes AATags;
-  CS->getAAMetadata(AATags);
-  const Value *Arg = CS.getArgument(ArgIdx);
+  Call->getAAMetadata(AATags);
+  const Value *Arg = Call->getArgOperand(ArgIdx);
 
   // We may be able to produce an exact size for known intrinsics.
-  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
     const DataLayout &DL = II->getModule()->getDataLayout();
 
     switch (II->getIntrinsicID()) {
@@ -126,7 +145,8 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
       assert((ArgIdx == 0 || ArgIdx == 1) &&
              "Invalid argument index for memory intrinsic");
       if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
-        return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+        return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+                              AATags);
       break;
 
     case Intrinsic::lifetime_start:
@@ -134,23 +154,37 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
     case Intrinsic::invariant_start:
       assert(ArgIdx == 1 && "Invalid argument index");
       return MemoryLocation(
-          Arg, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AATags);
+          Arg,
+          LocationSize::precise(
+              cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()),
+          AATags);
 
     case Intrinsic::invariant_end:
+      // The first argument to an invariant.end is a "descriptor" type (e.g. a
+      // pointer to a empty struct) which is never actually dereferenced.
+      if (ArgIdx == 0)
+        return MemoryLocation(Arg, LocationSize::precise(0), AATags);
       assert(ArgIdx == 2 && "Invalid argument index");
       return MemoryLocation(
-          Arg, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AATags);
+          Arg,
+          LocationSize::precise(
+              cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()),
+          AATags);
 
     case Intrinsic::arm_neon_vld1:
       assert(ArgIdx == 0 && "Invalid argument index");
       // LLVM's vld1 and vst1 intrinsics currently only support a single
       // vector register.
-      return MemoryLocation(Arg, DL.getTypeStoreSize(II->getType()), AATags);
+      return MemoryLocation(
+          Arg, LocationSize::precise(DL.getTypeStoreSize(II->getType())),
+          AATags);
 
     case Intrinsic::arm_neon_vst1:
       assert(ArgIdx == 0 && "Invalid argument index");
-      return MemoryLocation(
-          Arg, DL.getTypeStoreSize(II->getArgOperand(1)->getType()), AATags);
+      return MemoryLocation(Arg,
+                            LocationSize::precise(DL.getTypeStoreSize(
+                                II->getArgOperand(1)->getType())),
+                            AATags);
     }
   }
 
@@ -159,16 +193,20 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
   // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
   // whenever possible.
   LibFunc F;
-  if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
-      F == LibFunc_memset_pattern16 && TLI.has(F)) {
+  if (TLI && Call->getCalledFunction() &&
+      TLI->getLibFunc(*Call->getCalledFunction(), F) &&
+      F == LibFunc_memset_pattern16 && TLI->has(F)) {
     assert((ArgIdx == 0 || ArgIdx == 1) &&
            "Invalid argument index for memset_pattern16");
     if (ArgIdx == 1)
-      return MemoryLocation(Arg, 16, AATags);
-    if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
-      return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+      return MemoryLocation(Arg, LocationSize::precise(16), AATags);
+    if (const ConstantInt *LenCI =
+            dyn_cast<ConstantInt>(Call->getArgOperand(2)))
+      return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+                            AATags);
   }
   // FIXME: Handle memset_pattern4 and memset_pattern8 also.
 
-  return MemoryLocation(CS.getArgument(ArgIdx), UnknownSize, AATags);
+  return MemoryLocation(Call->getArgOperand(ArgIdx), LocationSize::unknown(),
+                        AATags);
 }
diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp
index 6e49a39926a2..6a5567ed765b 100644
--- a/contrib/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
@@ -77,9 +76,15 @@ static cl::opt<unsigned> MaxCheckLimit(
     cl::desc("The maximum number of stores/phis MemorySSA"
              "will consider trying to walk past (default = 100)"));
 
-static cl::opt<bool>
-    VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
-                    cl::desc("Verify MemorySSA in legacy printer pass."));
+// Always verify MemorySSA if expensive checking is enabled.
+#ifdef EXPENSIVE_CHECKS
+bool llvm::VerifyMemorySSA = true;
+#else
+bool llvm::VerifyMemorySSA = false;
+#endif
+static cl::opt<bool, true>
+    VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
+                     cl::Hidden, cl::desc("Enable verification of MemorySSA."));
 
 namespace llvm {
 
@@ -125,9 +130,9 @@ public:
       : MemoryLocOrCall(MUD->getMemoryInst()) {}
 
   MemoryLocOrCall(Instruction *Inst) {
-    if (ImmutableCallSite(Inst)) {
+    if (auto *C = dyn_cast<CallBase>(Inst)) {
       IsCall = true;
-      CS = ImmutableCallSite(Inst);
+      Call = C;
     } else {
       IsCall = false;
       // There is no such thing as a memorylocation for a fence inst, and it is
@@ -139,9 +144,9 @@ public:
 
   explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
 
-  ImmutableCallSite getCS() const {
+  const CallBase *getCall() const {
     assert(IsCall);
-    return CS;
+    return Call;
   }
 
   MemoryLocation getLoc() const {
@@ -156,16 +161,17 @@ public:
     if (!IsCall)
       return Loc == Other.Loc;
 
-    if (CS.getCalledValue() != Other.CS.getCalledValue())
+    if (Call->getCalledValue() != Other.Call->getCalledValue())
       return false;
 
-    return CS.arg_size() == Other.CS.arg_size() &&
-           std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin());
+    return Call->arg_size() == Other.Call->arg_size() &&
+           std::equal(Call->arg_begin(), Call->arg_end(),
+                      Other.Call->arg_begin());
   }
 
 private:
   union {
-    ImmutableCallSite CS;
+    const CallBase *Call;
     MemoryLocation Loc;
   };
 };
@@ -191,9 +197,9 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
 
     hash_code hash =
         hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue(
-                                      MLOC.getCS().getCalledValue()));
+                                      MLOC.getCall()->getCalledValue()));
 
-    for (const Value *Arg : MLOC.getCS().args())
+    for (const Value *Arg : MLOC.getCall()->args())
       hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg));
     return hash;
   }
@@ -246,13 +252,13 @@ struct ClobberAlias {
 
 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
 // ignored if IsClobber = false.
-static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
+static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
                                              const MemoryLocation &UseLoc,
                                              const Instruction *UseInst,
                                              AliasAnalysis &AA) {
   Instruction *DefInst = MD->getMemoryInst();
   assert(DefInst && "Defining instruction not actually an instruction");
-  ImmutableCallSite UseCS(UseInst);
+  const auto *UseCall = dyn_cast<CallBase>(UseInst);
   Optional<AliasResult> AR;
 
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
@@ -265,7 +271,7 @@ static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
     // context.
     switch (II->getIntrinsicID()) {
     case Intrinsic::lifetime_start:
-      if (UseCS)
+      if (UseCall)
         return {false, NoAlias};
       AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc);
       return {AR != NoAlias, AR};
@@ -279,8 +285,8 @@ static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
     }
   }
 
-  if (UseCS) {
-    ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
+  if (UseCall) {
+    ModRefInfo I = AA.getModRefInfo(DefInst, UseCall);
     AR = isMustSet(I) ? MustAlias : MayAlias;
     return {isModOrRefSet(I), AR};
   }
@@ -326,11 +332,12 @@ struct UpwardsMemoryQuery {
   // The MemoryAccess we actually got called with, used to test local domination
   const MemoryAccess *OriginalAccess = nullptr;
   Optional<AliasResult> AR = MayAlias;
+  bool SkipSelfAccess = false;
 
   UpwardsMemoryQuery() = default;
 
   UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
-      : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
+      : IsCall(isa<CallBase>(Inst)), Inst(Inst), OriginalAccess(Access) {
     if (!IsCall)
       StartingLoc = MemoryLocation::get(Inst);
   }
@@ -370,13 +377,15 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
 /// \param Start     The MemoryAccess that we want to walk from.
 /// \param ClobberAt A clobber for Start.
 /// \param StartLoc  The MemoryLocation for Start.
-/// \param MSSA      The MemorySSA isntance that Start and ClobberAt belong to.
+/// \param MSSA      The MemorySSA instance that Start and ClobberAt belong to.
 /// \param Query     The UpwardsMemoryQuery we used for our search.
 /// \param AA        The AliasAnalysis we used for our search.
-static void LLVM_ATTRIBUTE_UNUSED
-checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
+/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
+static void
+checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
                    const MemoryLocation &StartLoc, const MemorySSA &MSSA,
-                   const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
+                   const UpwardsMemoryQuery &Query, AliasAnalysis &AA,
+                   bool AllowImpreciseClobber = false) {
   assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
 
   if (MSSA.isLiveOnEntryDef(Start)) {
@@ -386,21 +395,21 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
   }
 
   bool FoundClobber = false;
-  DenseSet<MemoryAccessPair> VisitedPhis;
-  SmallVector<MemoryAccessPair, 8> Worklist;
+  DenseSet<ConstMemoryAccessPair> VisitedPhis;
+  SmallVector<ConstMemoryAccessPair, 8> Worklist;
   Worklist.emplace_back(Start, StartLoc);
   // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
   // is found, complain.
   while (!Worklist.empty()) {
-    MemoryAccessPair MAP = Worklist.pop_back_val();
+    auto MAP = Worklist.pop_back_val();
     // All we care about is that nothing from Start to ClobberAt clobbers Start.
     // We learn nothing from revisiting nodes.
     if (!VisitedPhis.insert(MAP).second)
       continue;
 
-    for (MemoryAccess *MA : def_chain(MAP.first)) {
+    for (const auto *MA : def_chain(MAP.first)) {
       if (MA == ClobberAt) {
-        if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+        if (const auto *MD = dyn_cast<MemoryDef>(MA)) {
           // instructionClobbersQuery isn't essentially free, so don't use `|=`,
           // since it won't let us short-circuit.
           //
@@ -422,19 +431,39 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
       // We should never hit liveOnEntry, unless it's the clobber.
       assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
 
-      if (auto *MD = dyn_cast<MemoryDef>(MA)) {
-        (void)MD;
+      if (const auto *MD = dyn_cast<MemoryDef>(MA)) {
+        // If Start is a Def, skip self.
+        if (MD == Start)
+          continue;
+
         assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA)
                     .IsClobber &&
                "Found clobber before reaching ClobberAt!");
         continue;
       }
 
+      if (const auto *MU = dyn_cast<MemoryUse>(MA)) {
+        (void)MU;
+        assert (MU == Start &&
+                "Can only find use in def chain if Start is a use");
+        continue;
+      }
+
       assert(isa<MemoryPhi>(MA));
-      Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
+      Worklist.append(
+          upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second}),
+          upward_defs_end());
     }
   }
 
+  // If the verify is done following an optimization, it's possible that
+  // ClobberAt was a conservative clobbering, that we can now infer is not a
+  // true clobbering access. Don't fail the verify if that's the case.
+  // We do have accesses that claim they're optimized, but could be optimized
+  // further. Updating all these can be expensive, so allow it for now (FIXME).
+  if (AllowImpreciseClobber)
+    return;
+
   // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
   // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
   assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
@@ -507,13 +536,13 @@ class ClobberWalker {
   ///
   /// This does not test for whether StopAt is a clobber
   UpwardsWalkResult
-  walkToPhiOrClobber(DefPath &Desc,
-                     const MemoryAccess *StopAt = nullptr) const {
+  walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
+                     const MemoryAccess *SkipStopAt = nullptr) const {
     assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
 
     for (MemoryAccess *Current : def_chain(Desc.Last)) {
       Desc.Last = Current;
-      if (Current == StopAt)
+      if (Current == StopAt || Current == SkipStopAt)
         return {Current, false, MayAlias};
 
       if (auto *MD = dyn_cast<MemoryDef>(Current)) {
@@ -591,9 +620,16 @@ class ClobberWalker {
       if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
         continue;
 
-      UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
+      const MemoryAccess *SkipStopWhere = nullptr;
+      if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) {
+        assert(isa<MemoryDef>(Query->OriginalAccess));
+        SkipStopWhere = Query->OriginalAccess;
+      }
+
+      UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere,
+                                                 /*SkipStopAt=*/SkipStopWhere);
       if (Res.IsKnownClobber) {
-        assert(Res.Result != StopWhere);
+        assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
         // If this wasn't a cache hit, we hit a clobber when walking. That's a
         // failure.
         TerminatedPath Term{Res.Result, PathIndex};
@@ -605,10 +641,13 @@ class ClobberWalker {
         continue;
       }
 
-      if (Res.Result == StopWhere) {
+      if (Res.Result == StopWhere || Res.Result == SkipStopWhere) {
         // We've hit our target. Save this path off for if we want to continue
-        // walking.
-        NewPaused.push_back(PathIndex);
+        // walking. If we are in the mode of skipping the OriginalAccess, and
+        // we've reached back to the OriginalAccess, do not save path, we've
+        // just looped back to self.
+        if (Res.Result != SkipStopWhere)
+          NewPaused.push_back(PathIndex);
         continue;
       }
 
@@ -879,7 +918,8 @@ public:
     }
 
 #ifdef EXPENSIVE_CHECKS
-    checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
+    if (!Q.SkipSelfAccess)
+      checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
 #endif
     return Result;
   }
@@ -907,28 +947,76 @@ struct RenamePassData {
 
 namespace llvm {
 
+class MemorySSA::ClobberWalkerBase {
+  ClobberWalker Walker;
+  MemorySSA *MSSA;
+
+public:
+  ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D)
+      : Walker(*M, *A, *D), MSSA(M) {}
+
+  MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
+                                              const MemoryLocation &);
+  // Second argument (bool), defines whether the clobber search should skip the
+  // original queried access. If true, there will be a follow-up query searching
+  // for a clobber access past "self". Note that the Optimized access is not
+  // updated if a new clobber is found by this SkipSelf search. If this
+  // additional query becomes heavily used we may decide to cache the result.
+  // Walker instantiations will decide how to set the SkipSelf bool.
+  MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool);
+  void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); }
+};
+
 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
 /// longer does caching on its own, but the name has been retained for the
 /// moment.
 class MemorySSA::CachingWalker final : public MemorySSAWalker {
-  ClobberWalker Walker;
-
-  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
+  ClobberWalkerBase *Walker;
 
 public:
-  CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
+  CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
+      : MemorySSAWalker(M), Walker(W) {}
   ~CachingWalker() override = default;
 
   using MemorySSAWalker::getClobberingMemoryAccess;
 
-  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
-  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
-                                          const MemoryLocation &) override;
-  void invalidateInfo(MemoryAccess *) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+                                          const MemoryLocation &Loc) override;
+
+  void invalidateInfo(MemoryAccess *MA) override {
+    if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+      MUD->resetOptimized();
+  }
 
   void verify(const MemorySSA *MSSA) override {
     MemorySSAWalker::verify(MSSA);
-    Walker.verify(MSSA);
+    Walker->verify(MSSA);
+  }
+};
+
+class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
+  ClobberWalkerBase *Walker;
+
+public:
+  SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
+      : MemorySSAWalker(M), Walker(W) {}
+  ~SkipSelfWalker() override = default;
+
+  using MemorySSAWalker::getClobberingMemoryAccess;
+
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+                                          const MemoryLocation &Loc) override;
+
+  void invalidateInfo(MemoryAccess *MA) override {
+    if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+      MUD->resetOptimized();
+  }
+
+  void verify(const MemorySSA *MSSA) override {
+    MemorySSAWalker::verify(MSSA);
+    Walker->verify(MSSA);
   }
 };
 
@@ -1067,7 +1155,7 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
 
 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
     : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
-      NextID(0) {
+      SkipWalker(nullptr), NextID(0) {
   buildMemorySSA();
 }
 
@@ -1398,10 +1486,25 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
   if (Walker)
     return Walker.get();
 
-  Walker = llvm::make_unique<CachingWalker>(this, AA, DT);
+  if (!WalkerBase)
+    WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+
+  Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get());
   return Walker.get();
 }
 
+MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
+  if (SkipWalker)
+    return SkipWalker.get();
+
+  if (!WalkerBase)
+    WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+
+  SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get());
+  return SkipWalker.get();
+ }
+
+
 // This is a helper function used by the creation routines. It places NewAccess
 // into the access and defs lists for a given basic block, at the given
 // insertion point.
@@ -1465,15 +1568,25 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
   BlockNumberingValid.erase(BB);
 }
 
+void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) {
+  // Keep it in the lookup tables, remove from the lists
+  removeFromLists(What, false);
+
+  // Note that moving should implicitly invalidate the optimized state of a
+  // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a
+  // MemoryDef.
+  if (auto *MD = dyn_cast<MemoryDef>(What))
+    MD->resetOptimized();
+  What->setBlock(BB);
+}
+
 // Move What before Where in the IR.  The end result is that What will belong to
 // the right lists and have the right Block set, but will not otherwise be
 // correct. It will not have the right defining access, and if it is a def,
 // things below it will not properly be updated.
 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
                        AccessList::iterator Where) {
-  // Keep it in the lookup tables, remove from the lists
-  removeFromLists(What, false);
-  What->setBlock(BB);
+  prepareForMoveTo(What, BB);
   insertIntoListsBefore(What, BB, Where);
 }
 
@@ -1489,8 +1602,7 @@ void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB,
     assert(Inserted && "Cannot move a Phi to a block that already has one");
   }
 
-  removeFromLists(What, false);
-  What->setBlock(BB);
+  prepareForMoveTo(What, BB);
   insertIntoListsForBlock(What, BB, Point);
 }
 
@@ -1504,9 +1616,10 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
 }
 
 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
-                                               MemoryAccess *Definition) {
+                                               MemoryAccess *Definition,
+                                               const MemoryUseOrDef *Template) {
   assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
-  MemoryUseOrDef *NewAccess = createNewAccess(I);
+  MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
   assert(
       NewAccess != nullptr &&
       "Tried to create a memory access for a non-memory touching instruction");
@@ -1529,7 +1642,8 @@ static inline bool isOrdered(const Instruction *I) {
 }
 
 /// Helper function to create new memory accesses
-MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
+MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
+                                           const MemoryUseOrDef *Template) {
   // The assume intrinsic has a control dependency which we model by claiming
   // that it writes arbitrarily. Ignore that fake memory dependency here.
   // FIXME: Replace this special casing with a more accurate modelling of
@@ -1538,18 +1652,31 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
     if (II->getIntrinsicID() == Intrinsic::assume)
       return nullptr;
 
-  // Find out what affect this instruction has on memory.
-  ModRefInfo ModRef = AA->getModRefInfo(I, None);
-  // The isOrdered check is used to ensure that volatiles end up as defs
-  // (atomics end up as ModRef right now anyway).  Until we separate the
-  // ordering chain from the memory chain, this enables people to see at least
-  // some relative ordering to volatiles.  Note that getClobberingMemoryAccess
-  // will still give an answer that bypasses other volatile loads.  TODO:
-  // Separate memory aliasing and ordering into two different chains so that we
-  // can precisely represent both "what memory will this read/write/is clobbered
-  // by" and "what instructions can I move this past".
-  bool Def = isModSet(ModRef) || isOrdered(I);
-  bool Use = isRefSet(ModRef);
+  bool Def, Use;
+  if (Template) {
+    Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
+    Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
+#if !defined(NDEBUG)
+    ModRefInfo ModRef = AA->getModRefInfo(I, None);
+    bool DefCheck, UseCheck;
+    DefCheck = isModSet(ModRef) || isOrdered(I);
+    UseCheck = isRefSet(ModRef);
+    assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template");
+#endif
+  } else {
+    // Find out what affect this instruction has on memory.
+    ModRefInfo ModRef = AA->getModRefInfo(I, None);
+    // The isOrdered check is used to ensure that volatiles end up as defs
+    // (atomics end up as ModRef right now anyway).  Until we separate the
+    // ordering chain from the memory chain, this enables people to see at least
+    // some relative ordering to volatiles.  Note that getClobberingMemoryAccess
+    // will still give an answer that bypasses other volatile loads.  TODO:
+    // Separate memory aliasing and ordering into two different chains so that
+    // we can precisely represent both "what memory will this read/write/is
+    // clobbered by" and "what instructions can I move this past".
+    Def = isModSet(ModRef) || isOrdered(I);
+    Use = isRefSet(ModRef);
+  }
 
   // It's possible for an instruction to not modify memory at all. During
   // construction, we ignore them.
@@ -1652,6 +1779,34 @@ void MemorySSA::verifyMemorySSA() const {
   verifyOrdering(F);
   verifyDominationNumbers(F);
   Walker->verify(this);
+  verifyClobberSanity(F);
+}
+
+/// Check sanity of the clobbering instruction for access MA.
+void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const {
+  if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+    if (!MUD->isOptimized())
+      return;
+    auto *I = MUD->getMemoryInst();
+    auto Loc = MemoryLocation::getOrNone(I);
+    if (Loc == None)
+      return;
+    auto *Clobber = MUD->getOptimized();
+    UpwardsMemoryQuery Q(I, MUD);
+    checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true);
+  }
+}
+
+void MemorySSA::verifyClobberSanity(const Function &F) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+  for (const BasicBlock &BB : F) {
+    const AccessList *Accesses = getBlockAccesses(&BB);
+    if (!Accesses)
+      continue;
+    for (const MemoryAccess &MA : *Accesses)
+      checkClobberSanityAccess(&MA);
+  }
+#endif
 }
 
 /// Verify that all of the blocks we believe to have valid domination numbers
@@ -1695,6 +1850,7 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
 /// Verify that the order and existence of MemoryAccesses matches the
 /// order and existence of memory affecting instructions.
 void MemorySSA::verifyOrdering(Function &F) const {
+#ifndef NDEBUG
   // Walk all the blocks, comparing what the lookups think and what the access
   // lists think, as well as the order in the blocks vs the order in the access
   // lists.
@@ -1753,6 +1909,7 @@ void MemorySSA::verifyOrdering(Function &F) const {
     }
     ActualDefs.clear();
   }
+#endif
 }
 
 /// Verify the domination properties of MemorySSA by checking that each
@@ -1795,6 +1952,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
 /// accesses and verifying that, for each use, it appears in the
 /// appropriate def's use list
 void MemorySSA::verifyDefUses(Function &F) const {
+#ifndef NDEBUG
   for (BasicBlock &B : F) {
     // Phi nodes are attached to basic blocks
     if (MemoryPhi *Phi = getMemoryAccess(&B)) {
@@ -1815,14 +1973,7 @@ void MemorySSA::verifyDefUses(Function &F) const {
       }
     }
   }
-}
-
-MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
-  return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
-}
-
-MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
-  return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+#endif
 }
 
 /// Perform a local numbering on blocks so that instruction ordering can be
@@ -2055,25 +2206,11 @@ void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
 
 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
 
-MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
-                                        DominatorTree *D)
-    : MemorySSAWalker(M), Walker(*M, *A, *D) {}
-
-void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
-  if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
-    MUD->resetOptimized();
-}
-
-/// Walk the use-def chains starting at \p MA and find
+/// Walk the use-def chains starting at \p StartingAccess and find
 /// the MemoryAccess that actually clobbers Loc.
 ///
 /// \returns our clobbering memory access
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
-    MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
-  return Walker.findClobber(StartingAccess, Q);
-}
-
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
     MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
   if (isa<MemoryPhi>(StartingAccess))
     return StartingAccess;
@@ -2086,7 +2223,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
 
   // Conservatively, fences are always clobbers, so don't perform the walk if we
   // hit a fence.
-  if (!ImmutableCallSite(I) && I->isFenceLike())
+  if (!isa<CallBase>(I) && I->isFenceLike())
     return StartingUseOrDef;
 
   UpwardsMemoryQuery Q;
@@ -2097,11 +2234,12 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
 
   // Unlike the other function, do not walk to the def of a def, because we are
   // handed something we already believe is the clobbering access.
+  // We never set SkipSelf to true in Q in this method.
   MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
                                      ? StartingUseOrDef->getDefiningAccess()
                                      : StartingUseOrDef;
 
-  MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
+  MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q);
   LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
   LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
   LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -2110,26 +2248,33 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
 }
 
 MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
+                                                            bool SkipSelf) {
   auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
   // If this is a MemoryPhi, we can't do anything.
   if (!StartingAccess)
     return MA;
 
+  bool IsOptimized = false;
+
   // If this is an already optimized use or def, return the optimized result.
   // Note: Currently, we store the optimized def result in a separate field,
   // since we can't use the defining access.
-  if (StartingAccess->isOptimized())
-    return StartingAccess->getOptimized();
+  if (StartingAccess->isOptimized()) {
+    if (!SkipSelf || !isa<MemoryDef>(StartingAccess))
+      return StartingAccess->getOptimized();
+    IsOptimized = true;
+  }
 
   const Instruction *I = StartingAccess->getMemoryInst();
-  UpwardsMemoryQuery Q(I, StartingAccess);
   // We can't sanely do anything with a fence, since they conservatively clobber
   // all memory, and have no locations to get pointers from to try to
   // disambiguate.
-  if (!Q.IsCall && I->isFenceLike())
+  if (!isa<CallBase>(I) && I->isFenceLike())
     return StartingAccess;
 
+  UpwardsMemoryQuery Q(I, StartingAccess);
+
   if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
     MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
     StartingAccess->setOptimized(LiveOnEntry);
@@ -2137,33 +2282,71 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
     return LiveOnEntry;
   }
 
-  // Start with the thing we already think clobbers this location
-  MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+  MemoryAccess *OptimizedAccess;
+  if (!IsOptimized) {
+    // Start with the thing we already think clobbers this location
+    MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+
+    // At this point, DefiningAccess may be the live on entry def.
+    // If it is, we will not get a better result.
+    if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
+      StartingAccess->setOptimized(DefiningAccess);
+      StartingAccess->setOptimizedAccessType(None);
+      return DefiningAccess;
+    }
 
-  // At this point, DefiningAccess may be the live on entry def.
-  // If it is, we will not get a better result.
-  if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
-    StartingAccess->setOptimized(DefiningAccess);
-    StartingAccess->setOptimizedAccessType(None);
-    return DefiningAccess;
-  }
+    OptimizedAccess = Walker.findClobber(DefiningAccess, Q);
+    StartingAccess->setOptimized(OptimizedAccess);
+    if (MSSA->isLiveOnEntryDef(OptimizedAccess))
+      StartingAccess->setOptimizedAccessType(None);
+    else if (Q.AR == MustAlias)
+      StartingAccess->setOptimizedAccessType(MustAlias);
+  } else
+    OptimizedAccess = StartingAccess->getOptimized();
 
-  MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
   LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
-  LLVM_DEBUG(dbgs() << *DefiningAccess << "\n");
-  LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
-  LLVM_DEBUG(dbgs() << *Result << "\n");
-
-  StartingAccess->setOptimized(Result);
-  if (MSSA->isLiveOnEntryDef(Result))
-    StartingAccess->setOptimizedAccessType(None);
-  else if (Q.AR == MustAlias)
-    StartingAccess->setOptimizedAccessType(MustAlias);
+  LLVM_DEBUG(dbgs() << *StartingAccess << "\n");
+  LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is ");
+  LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n");
+
+  MemoryAccess *Result;
+  if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) &&
+      isa<MemoryDef>(StartingAccess)) {
+    assert(isa<MemoryDef>(Q.OriginalAccess));
+    Q.SkipSelfAccess = true;
+    Result = Walker.findClobber(OptimizedAccess, Q);
+  } else
+    Result = OptimizedAccess;
+
+  LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf);
+  LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n");
 
   return Result;
 }
 
 MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+  return Walker->getClobberingMemoryAccessBase(MA, false);
+}
+
+MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA,
+                                                    const MemoryLocation &Loc) {
+  return Walker->getClobberingMemoryAccessBase(MA, Loc);
+}
+
+MemoryAccess *
+MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+  return Walker->getClobberingMemoryAccessBase(MA, true);
+}
+
+MemoryAccess *
+MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA,
+                                                    const MemoryLocation &Loc) {
+  return Walker->getClobberingMemoryAccessBase(MA, Loc);
+}
+
+MemoryAccess *
 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
   if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
     return Use->getDefiningAccess();
diff --git a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
index abe2b3c25a58..6c817d203684 100644
--- a/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -12,7 +12,9 @@
 //===----------------------------------------------------------------===//
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
@@ -91,7 +93,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
         // FIXME: Figure out whether this is dead code and if so remove it.
         if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
           // These will have been filled in by the recursive read we did above.
-          std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
+          llvm::copy(PhiOps, Phi->op_begin());
           std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
         }
       } else {
@@ -264,16 +266,15 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
     for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
          UI != UE;) {
       Use &U = *UI++;
-      // Leave the uses alone
-      if (isa<MemoryUse>(U.getUser()))
+      // Leave the MemoryUses alone.
+      // Also make sure we skip ourselves to avoid self references.
+      if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
         continue;
       U.set(MD);
     }
   }
 
   // and that def is now our defining access.
-  // We change them in this order otherwise we will appear in the use list
-  // above and reset ourselves.
   MD->setDefiningAccess(DefBefore);
 
   SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
@@ -392,6 +393,522 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
   }
 }
 
+void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
+  if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
+    MPhi->unorderedDeleteIncomingBlock(From);
+    if (MPhi->getNumIncomingValues() == 1)
+      removeMemoryAccess(MPhi);
+  }
+}
+
+void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
+                                                      BasicBlock *To) {
+  if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
+    bool Found = false;
+    MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
+      if (From != B)
+        return false;
+      if (Found)
+        return true;
+      Found = true;
+      return false;
+    });
+    if (MPhi->getNumIncomingValues() == 1)
+      removeMemoryAccess(MPhi);
+  }
+}
+
+void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
+                                        const ValueToValueMapTy &VMap,
+                                        PhiToDefMap &MPhiMap) {
+  auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
+    MemoryAccess *InsnDefining = MA;
+    if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
+      if (!MSSA->isLiveOnEntryDef(DefMUD)) {
+        Instruction *DefMUDI = DefMUD->getMemoryInst();
+        assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+        if (Instruction *NewDefMUDI =
+                cast_or_null<Instruction>(VMap.lookup(DefMUDI)))
+          InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
+      }
+    } else {
+      MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
+      if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
+        InsnDefining = NewDefPhi;
+    }
+    assert(InsnDefining && "Defining instruction cannot be nullptr.");
+    return InsnDefining;
+  };
+
+  const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+  if (!Acc)
+    return;
+  for (const MemoryAccess &MA : *Acc) {
+    if (const MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&MA)) {
+      Instruction *Insn = MUD->getMemoryInst();
+      // Entry does not exist if the clone of the block did not clone all
+      // instructions. This occurs in LoopRotate when cloning instructions
+      // from the old header to the old preheader. The cloned instruction may
+      // also be a simplified Value, not an Instruction (see LoopRotate).
+      if (Instruction *NewInsn =
+              dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
+        MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
+            NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
+        MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
+      }
+    }
+  }
+}
+
+void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
+                                           ArrayRef<BasicBlock *> ExitBlocks,
+                                           const ValueToValueMapTy &VMap,
+                                           bool IgnoreIncomingWithNoClones) {
+  PhiToDefMap MPhiMap;
+
+  auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) {
+    assert(Phi && NewPhi && "Invalid Phi nodes.");
+    BasicBlock *NewPhiBB = NewPhi->getBlock();
+    SmallPtrSet<BasicBlock *, 4> NewPhiBBPreds(pred_begin(NewPhiBB),
+                                               pred_end(NewPhiBB));
+    for (unsigned It = 0, E = Phi->getNumIncomingValues(); It < E; ++It) {
+      MemoryAccess *IncomingAccess = Phi->getIncomingValue(It);
+      BasicBlock *IncBB = Phi->getIncomingBlock(It);
+
+      if (BasicBlock *NewIncBB = cast_or_null<BasicBlock>(VMap.lookup(IncBB)))
+        IncBB = NewIncBB;
+      else if (IgnoreIncomingWithNoClones)
+        continue;
+
+      // Now we have IncBB, and will need to add incoming from it to NewPhi.
+
+      // If IncBB is not a predecessor of NewPhiBB, then do not add it.
+      // NewPhiBB was cloned without that edge.
+      if (!NewPhiBBPreds.count(IncBB))
+        continue;
+
+      // Determine incoming value and add it as incoming from IncBB.
+      if (MemoryUseOrDef *IncMUD = dyn_cast<MemoryUseOrDef>(IncomingAccess)) {
+        if (!MSSA->isLiveOnEntryDef(IncMUD)) {
+          Instruction *IncI = IncMUD->getMemoryInst();
+          assert(IncI && "Found MemoryUseOrDef with no Instruction.");
+          if (Instruction *NewIncI =
+                  cast_or_null<Instruction>(VMap.lookup(IncI))) {
+            IncMUD = MSSA->getMemoryAccess(NewIncI);
+            assert(IncMUD &&
+                   "MemoryUseOrDef cannot be null, all preds processed.");
+          }
+        }
+        NewPhi->addIncoming(IncMUD, IncBB);
+      } else {
+        MemoryPhi *IncPhi = cast<MemoryPhi>(IncomingAccess);
+        if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi))
+          NewPhi->addIncoming(NewDefPhi, IncBB);
+        else
+          NewPhi->addIncoming(IncPhi, IncBB);
+      }
+    }
+  };
+
+  auto ProcessBlock = [&](BasicBlock *BB) {
+    BasicBlock *NewBlock = cast_or_null<BasicBlock>(VMap.lookup(BB));
+    if (!NewBlock)
+      return;
+
+    assert(!MSSA->getWritableBlockAccesses(NewBlock) &&
+           "Cloned block should have no accesses");
+
+    // Add MemoryPhi.
+    if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) {
+      MemoryPhi *NewPhi = MSSA->createMemoryPhi(NewBlock);
+      MPhiMap[MPhi] = NewPhi;
+    }
+    // Update Uses and Defs.
+    cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
+  };
+
+  for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+    ProcessBlock(BB);
+
+  for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+    if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
+      if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
+        FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
+}
+
+void MemorySSAUpdater::updateForClonedBlockIntoPred(
+    BasicBlock *BB, BasicBlock *P1, const ValueToValueMapTy &VM) {
+  // All defs/phis from outside BB that are used in BB, are valid uses in P1.
+  // Since those defs/phis must have dominated BB, and also dominate P1.
+  // Defs from BB being used in BB will be replaced with the cloned defs from
+  // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
+  // incoming def into the Phi from P1.
+  PhiToDefMap MPhiMap;
+  if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
+    MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
+  cloneUsesAndDefs(BB, P1, VM, MPhiMap);
+}
+
+template <typename Iter>
+void MemorySSAUpdater::privateUpdateExitBlocksForClonedLoop(
+    ArrayRef<BasicBlock *> ExitBlocks, Iter ValuesBegin, Iter ValuesEnd,
+    DominatorTree &DT) {
+  SmallVector<CFGUpdate, 4> Updates;
+  // Update/insert phis in all successors of exit blocks.
+  for (auto *Exit : ExitBlocks)
+    for (const ValueToValueMapTy *VMap : make_range(ValuesBegin, ValuesEnd))
+      if (BasicBlock *NewExit = cast_or_null<BasicBlock>(VMap->lookup(Exit))) {
+        BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+        Updates.push_back({DT.Insert, NewExit, ExitSucc});
+      }
+  applyInsertUpdates(Updates, DT);
+}
+
+void MemorySSAUpdater::updateExitBlocksForClonedLoop(
+    ArrayRef<BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap,
+    DominatorTree &DT) {
+  const ValueToValueMapTy *const Arr[] = {&VMap};
+  privateUpdateExitBlocksForClonedLoop(ExitBlocks, std::begin(Arr),
+                                       std::end(Arr), DT);
+}
+
+void MemorySSAUpdater::updateExitBlocksForClonedLoop(
+    ArrayRef<BasicBlock *> ExitBlocks,
+    ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT) {
+  auto GetPtr = [&](const std::unique_ptr<ValueToValueMapTy> &I) {
+    return I.get();
+  };
+  using MappedIteratorType =
+      mapped_iterator<const std::unique_ptr<ValueToValueMapTy> *,
+                      decltype(GetPtr)>;
+  auto MapBegin = MappedIteratorType(VMaps.begin(), GetPtr);
+  auto MapEnd = MappedIteratorType(VMaps.end(), GetPtr);
+  privateUpdateExitBlocksForClonedLoop(ExitBlocks, MapBegin, MapEnd, DT);
+}
+
+void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
+                                    DominatorTree &DT) {
+  SmallVector<CFGUpdate, 4> RevDeleteUpdates;
+  SmallVector<CFGUpdate, 4> InsertUpdates;
+  for (auto &Update : Updates) {
+    if (Update.getKind() == DT.Insert)
+      InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+    else
+      RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+  }
+
+  if (!RevDeleteUpdates.empty()) {
+    // Update for inserted edges: use newDT and snapshot CFG as if deletes had
+    // not occured.
+    // FIXME: This creates a new DT, so it's more expensive to do mix
+    // delete/inserts vs just inserts. We can do an incremental update on the DT
+    // to revert deletes, than re-delete the edges. Teaching DT to do this, is
+    // part of a pending cleanup.
+    DominatorTree NewDT(DT, RevDeleteUpdates);
+    GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
+    applyInsertUpdates(InsertUpdates, NewDT, &GD);
+  } else {
+    GraphDiff<BasicBlock *> GD;
+    applyInsertUpdates(InsertUpdates, DT, &GD);
+  }
+
+  // Update for deleted edges
+  for (auto &Update : RevDeleteUpdates)
+    removeEdge(Update.getFrom(), Update.getTo());
+}
+
+void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
+                                          DominatorTree &DT) {
+  GraphDiff<BasicBlock *> GD;
+  applyInsertUpdates(Updates, DT, &GD);
+}
+
+void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
+                                          DominatorTree &DT,
+                                          const GraphDiff<BasicBlock *> *GD) {
+  // Get recursive last Def, assuming well formed MSSA and updated DT.
+  auto GetLastDef = [&](BasicBlock *BB) -> MemoryAccess * {
+    while (true) {
+      MemorySSA::DefsList *Defs = MSSA->getWritableBlockDefs(BB);
+      // Return last Def or Phi in BB, if it exists.
+      if (Defs)
+        return &*(--Defs->end());
+
+      // Check number of predecessors, we only care if there's more than one.
+      unsigned Count = 0;
+      BasicBlock *Pred = nullptr;
+      for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
+        Pred = Pair.second;
+        Count++;
+        if (Count == 2)
+          break;
+      }
+
+      // If BB has multiple predecessors, get last definition from IDom.
+      if (Count != 1) {
+        // [SimpleLoopUnswitch] If BB is a dead block, about to be deleted, its
+        // DT is invalidated. Return LoE as its last def. This will be added to
+        // MemoryPhi node, and later deleted when the block is deleted.
+        if (!DT.getNode(BB))
+          return MSSA->getLiveOnEntryDef();
+        if (auto *IDom = DT.getNode(BB)->getIDom())
+          if (IDom->getBlock() != BB) {
+            BB = IDom->getBlock();
+            continue;
+          }
+        return MSSA->getLiveOnEntryDef();
+      } else {
+        // Single predecessor, BB cannot be dead. GetLastDef of Pred.
+        assert(Count == 1 && Pred && "Single predecessor expected.");
+        BB = Pred;
+      }
+    };
+    llvm_unreachable("Unable to get last definition.");
+  };
+
+  // Get nearest IDom given a set of blocks.
+  // TODO: this can be optimized by starting the search at the node with the
+  // lowest level (highest in the tree).
+  auto FindNearestCommonDominator =
+      [&](const SmallSetVector<BasicBlock *, 2> &BBSet) -> BasicBlock * {
+    BasicBlock *PrevIDom = *BBSet.begin();
+    for (auto *BB : BBSet)
+      PrevIDom = DT.findNearestCommonDominator(PrevIDom, BB);
+    return PrevIDom;
+  };
+
+  // Get all blocks that dominate PrevIDom, stop when reaching CurrIDom. Do not
+  // include CurrIDom.
+  auto GetNoLongerDomBlocks =
+      [&](BasicBlock *PrevIDom, BasicBlock *CurrIDom,
+          SmallVectorImpl<BasicBlock *> &BlocksPrevDom) {
+        if (PrevIDom == CurrIDom)
+          return;
+        BlocksPrevDom.push_back(PrevIDom);
+        BasicBlock *NextIDom = PrevIDom;
+        while (BasicBlock *UpIDom =
+                   DT.getNode(NextIDom)->getIDom()->getBlock()) {
+          if (UpIDom == CurrIDom)
+            break;
+          BlocksPrevDom.push_back(UpIDom);
+          NextIDom = UpIDom;
+        }
+      };
+
+  // Map a BB to its predecessors: added + previously existing. To get a
+  // deterministic order, store predecessors as SetVectors. The order in each
+  // will be defined by teh order in Updates (fixed) and the order given by
+  // children<> (also fixed). Since we further iterate over these ordered sets,
+  // we lose the information of multiple edges possibly existing between two
+  // blocks, so we'll keep and EdgeCount map for that.
+  // An alternate implementation could keep unordered set for the predecessors,
+  // traverse either Updates or children<> each time to get  the deterministic
+  // order, and drop the usage of EdgeCount. This alternate approach would still
+  // require querying the maps for each predecessor, and children<> call has
+  // additional computation inside for creating the snapshot-graph predecessors.
+  // As such, we favor using a little additional storage and less compute time.
+  // This decision can be revisited if we find the alternative more favorable.
+
+  struct PredInfo {
+    SmallSetVector<BasicBlock *, 2> Added;
+    SmallSetVector<BasicBlock *, 2> Prev;
+  };
+  SmallDenseMap<BasicBlock *, PredInfo> PredMap;
+
+  for (auto &Edge : Updates) {
+    BasicBlock *BB = Edge.getTo();
+    auto &AddedBlockSet = PredMap[BB].Added;
+    AddedBlockSet.insert(Edge.getFrom());
+  }
+
+  // Store all existing predecessor for each BB, at least one must exist.
+  SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, int> EdgeCountMap;
+  SmallPtrSet<BasicBlock *, 2> NewBlocks;
+  for (auto &BBPredPair : PredMap) {
+    auto *BB = BBPredPair.first;
+    const auto &AddedBlockSet = BBPredPair.second.Added;
+    auto &PrevBlockSet = BBPredPair.second.Prev;
+    for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
+      BasicBlock *Pi = Pair.second;
+      if (!AddedBlockSet.count(Pi))
+        PrevBlockSet.insert(Pi);
+      EdgeCountMap[{Pi, BB}]++;
+    }
+
+    if (PrevBlockSet.empty()) {
+      assert(pred_size(BB) == AddedBlockSet.size() && "Duplicate edges added.");
+      LLVM_DEBUG(
+          dbgs()
+          << "Adding a predecessor to a block with no predecessors. "
+             "This must be an edge added to a new, likely cloned, block. "
+             "Its memory accesses must be already correct, assuming completed "
+             "via the updateExitBlocksForClonedLoop API. "
+             "Assert a single such edge is added so no phi addition or "
+             "additional processing is required.\n");
+      assert(AddedBlockSet.size() == 1 &&
+             "Can only handle adding one predecessor to a new block.");
+      // Need to remove new blocks from PredMap. Remove below to not invalidate
+      // iterator here.
+      NewBlocks.insert(BB);
+    }
+  }
+  // Nothing to process for new/cloned blocks.
+  for (auto *BB : NewBlocks)
+    PredMap.erase(BB);
+
+  SmallVector<BasicBlock *, 8> BlocksToProcess;
+  SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace;
+
+  // First create MemoryPhis in all blocks that don't have one. Create in the
+  // order found in Updates, not in PredMap, to get deterministic numbering.
+  for (auto &Edge : Updates) {
+    BasicBlock *BB = Edge.getTo();
+    if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
+      MSSA->createMemoryPhi(BB);
+  }
+
+  // Now we'll fill in the MemoryPhis with the right incoming values.
+  for (auto &BBPredPair : PredMap) {
+    auto *BB = BBPredPair.first;
+    const auto &PrevBlockSet = BBPredPair.second.Prev;
+    const auto &AddedBlockSet = BBPredPair.second.Added;
+    assert(!PrevBlockSet.empty() &&
+           "At least one previous predecessor must exist.");
+
+    // TODO: if this becomes a bottleneck, we can save on GetLastDef calls by
+    // keeping this map before the loop. We can reuse already populated entries
+    // if an edge is added from the same predecessor to two different blocks,
+    // and this does happen in rotate. Note that the map needs to be updated
+    // when deleting non-necessary phis below, if the phi is in the map by
+    // replacing the value with DefP1.
+    SmallDenseMap<BasicBlock *, MemoryAccess *> LastDefAddedPred;
+    for (auto *AddedPred : AddedBlockSet) {
+      auto *DefPn = GetLastDef(AddedPred);
+      assert(DefPn != nullptr && "Unable to find last definition.");
+      LastDefAddedPred[AddedPred] = DefPn;
+    }
+
+    MemoryPhi *NewPhi = MSSA->getMemoryAccess(BB);
+    // If Phi is not empty, add an incoming edge from each added pred. Must
+    // still compute blocks with defs to replace for this block below.
+    if (NewPhi->getNumOperands()) {
+      for (auto *Pred : AddedBlockSet) {
+        auto *LastDefForPred = LastDefAddedPred[Pred];
+        for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+          NewPhi->addIncoming(LastDefForPred, Pred);
+      }
+    } else {
+      // Pick any existing predecessor and get its definition. All other
+      // existing predecessors should have the same one, since no phi existed.
+      auto *P1 = *PrevBlockSet.begin();
+      MemoryAccess *DefP1 = GetLastDef(P1);
+
+      // Check DefP1 against all Defs in LastDefPredPair. If all the same,
+      // nothing to add.
+      bool InsertPhi = false;
+      for (auto LastDefPredPair : LastDefAddedPred)
+        if (DefP1 != LastDefPredPair.second) {
+          InsertPhi = true;
+          break;
+        }
+      if (!InsertPhi) {
+        // Since NewPhi may be used in other newly added Phis, replace all uses
+        // of NewPhi with the definition coming from all predecessors (DefP1),
+        // before deleting it.
+        NewPhi->replaceAllUsesWith(DefP1);
+        removeMemoryAccess(NewPhi);
+        continue;
+      }
+
+      // Update Phi with new values for new predecessors and old value for all
+      // other predecessors. Since AddedBlockSet and PrevBlockSet are ordered
+      // sets, the order of entries in NewPhi is deterministic.
+      for (auto *Pred : AddedBlockSet) {
+        auto *LastDefForPred = LastDefAddedPred[Pred];
+        for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+          NewPhi->addIncoming(LastDefForPred, Pred);
+      }
+      for (auto *Pred : PrevBlockSet)
+        for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+          NewPhi->addIncoming(DefP1, Pred);
+
+      // Insert BB in the set of blocks that now have definition. We'll use this
+      // to compute IDF and add Phis there next.
+      BlocksToProcess.push_back(BB);
+    }
+
+    // Get all blocks that used to dominate BB and no longer do after adding
+    // AddedBlockSet, where PrevBlockSet are the previously known predecessors.
+    assert(DT.getNode(BB)->getIDom() && "BB does not have valid idom");
+    BasicBlock *PrevIDom = FindNearestCommonDominator(PrevBlockSet);
+    assert(PrevIDom && "Previous IDom should exists");
+    BasicBlock *NewIDom = DT.getNode(BB)->getIDom()->getBlock();
+    assert(NewIDom && "BB should have a new valid idom");
+    assert(DT.dominates(NewIDom, PrevIDom) &&
+           "New idom should dominate old idom");
+    GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
+  }
+
+  // Compute IDF and add Phis in all IDF blocks that do not have one.
+  SmallVector<BasicBlock *, 32> IDFBlocks;
+  if (!BlocksToProcess.empty()) {
+    ForwardIDFCalculator IDFs(DT);
+    SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(),
+                                                 BlocksToProcess.end());
+    IDFs.setDefiningBlocks(DefiningBlocks);
+    IDFs.calculate(IDFBlocks);
+    for (auto *BBIDF : IDFBlocks) {
+      if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
+        // Update existing Phi.
+        // FIXME: some updates may be redundant, try to optimize and skip some.
+        for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
+          IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
+      } else {
+        IDFPhi = MSSA->createMemoryPhi(BBIDF);
+        for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
+          BasicBlock *Pi = Pair.second;
+          IDFPhi->addIncoming(GetLastDef(Pi), Pi);
+        }
+      }
+    }
+  }
+
+  // Now for all defs in BlocksWithDefsToReplace, if there are uses they no
+  // longer dominate, replace those with the closest dominating def.
+  // This will also update optimized accesses, as they're also uses.
+  for (auto *BlockWithDefsToReplace : BlocksWithDefsToReplace) {
+    if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) {
+      for (auto &DefToReplaceUses : *DefsList) {
+        BasicBlock *DominatingBlock = DefToReplaceUses.getBlock();
+        Value::use_iterator UI = DefToReplaceUses.use_begin(),
+                            E = DefToReplaceUses.use_end();
+        for (; UI != E;) {
+          Use &U = *UI;
+          ++UI;
+          MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser());
+          if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
+            BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
+            if (!DT.dominates(DominatingBlock, DominatedBlock))
+              U.set(GetLastDef(DominatedBlock));
+          } else {
+            BasicBlock *DominatedBlock = Usr->getBlock();
+            if (!DT.dominates(DominatingBlock, DominatedBlock)) {
+              if (auto *DomBlPhi = MSSA->getMemoryAccess(DominatedBlock))
+                U.set(DomBlPhi);
+              else {
+                auto *IDom = DT.getNode(DominatedBlock)->getIDom();
+                assert(IDom && "Block must have a valid IDom.");
+                U.set(GetLastDef(IDom->getBlock()));
+              }
+              cast<MemoryUseOrDef>(Usr)->resetOptimized();
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
 // Move What before Where in the MemorySSA IR.
 template <class WhereType>
 void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
@@ -498,13 +1015,14 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
 }
 
 void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
-    BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds) {
+    BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds,
+    bool IdenticalEdgesWereMerged) {
   assert(!MSSA->getWritableBlockAccesses(New) &&
          "Access list should be null for a new block.");
   MemoryPhi *Phi = MSSA->getMemoryAccess(Old);
   if (!Phi)
     return;
-  if (pred_size(Old) == 1) {
+  if (Old->hasNPredecessors(1)) {
     assert(pred_size(New) == Preds.size() &&
            "Should have moved all predecessors.");
     MSSA->moveTo(Phi, New, MemorySSA::Beginning);
@@ -513,9 +1031,17 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
                              "new immediate predecessor.");
     MemoryPhi *NewPhi = MSSA->createMemoryPhi(New);
     SmallPtrSet<BasicBlock *, 16> PredsSet(Preds.begin(), Preds.end());
+    // Currently only support the case of removing a single incoming edge when
+    // identical edges were not merged.
+    if (!IdenticalEdgesWereMerged)
+      assert(PredsSet.size() == Preds.size() &&
+             "If identical edges were not merged, we cannot have duplicate "
+             "blocks in the predecessors");
     Phi->unorderedDeleteIncomingIf([&](MemoryAccess *MA, BasicBlock *B) {
       if (PredsSet.count(B)) {
         NewPhi->addIncoming(MA, B);
+        if (!IdenticalEdgesWereMerged)
+          PredsSet.erase(B);
         return true;
       }
       return false;
@@ -578,9 +1104,9 @@ void MemorySSAUpdater::removeBlocks(
     const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
   // First delete all uses of BB in MemoryPhis.
   for (BasicBlock *BB : DeadBlocks) {
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     assert(TI && "Basic block expected to have a terminator instruction");
-    for (BasicBlock *Succ : TI->successors())
+    for (BasicBlock *Succ : successors(TI))
       if (!DeadBlocks.count(Succ))
         if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
           MP->unorderedDeleteIncomingBlock(BB);
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index ce099ed2f391..87f76d43bb1e 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -158,7 +158,8 @@ static void addIntrinsicToSummary(
     SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
     SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
     SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
-    SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls) {
+    SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
+    DominatorTree &DT) {
   switch (CI->getCalledFunction()->getIntrinsicID()) {
   case Intrinsic::type_test: {
     auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
@@ -183,7 +184,7 @@ static void addIntrinsicToSummary(
 
     SmallVector<DevirtCallSite, 4> DevirtCalls;
     SmallVector<CallInst *, 4> Assumes;
-    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
     for (auto &Call : DevirtCalls)
       addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
                     TypeTestAssumeConstVCalls);
@@ -203,7 +204,7 @@ static void addIntrinsicToSummary(
     SmallVector<Instruction *, 4> Preds;
     bool HasNonCallUses = false;
     findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
-                                               HasNonCallUses, CI);
+                                               HasNonCallUses, CI, DT);
     // Any non-call uses of the result of llvm.type.checked.load will
     // prevent us from optimizing away the llvm.type.test.
     if (HasNonCallUses)
@@ -219,11 +220,19 @@ static void addIntrinsicToSummary(
   }
 }
 
-static void
-computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
-                       const Function &F, BlockFrequencyInfo *BFI,
-                       ProfileSummaryInfo *PSI, bool HasLocalsInUsedOrAsm,
-                       DenseSet<GlobalValue::GUID> &CantBePromoted) {
+static bool isNonVolatileLoad(const Instruction *I) {
+  if (const auto *LI = dyn_cast<LoadInst>(I))
+    return !LI->isVolatile();
+
+  return false;
+}
+
+static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+                                   const Function &F, BlockFrequencyInfo *BFI,
+                                   ProfileSummaryInfo *PSI, DominatorTree &DT,
+                                   bool HasLocalsInUsedOrAsm,
+                                   DenseSet<GlobalValue::GUID> &CantBePromoted,
+                                   bool IsThinLTO) {
   // Summary not currently supported for anonymous functions, they should
   // have been named.
   assert(F.hasName());
@@ -244,6 +253,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
   // Add personality function, prefix data and prologue data to function's ref
   // list.
   findRefEdges(Index, &F, RefEdges, Visited);
+  std::vector<const Instruction *> NonVolatileLoads;
 
   bool HasInlineAsmMaybeReferencingInternal = false;
   for (const BasicBlock &BB : F)
@@ -251,6 +261,13 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       ++NumInsts;
+      if (isNonVolatileLoad(&I)) {
+        // Postpone processing of non-volatile load instructions
+        // See comments below
+        Visited.insert(&I);
+        NonVolatileLoads.push_back(&I);
+        continue;
+      }
       findRefEdges(Index, &I, RefEdges, Visited);
       auto CS = ImmutableCallSite(&I);
       if (!CS)
@@ -284,7 +301,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
         if (CI && CalledFunction->isIntrinsic()) {
           addIntrinsicToSummary(
               CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
-              TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls);
+              TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
           continue;
         }
         // We should have named any anonymous globals
@@ -340,6 +357,24 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       }
     }
 
+  // By now we processed all instructions in a function, except
+  // non-volatile loads. All new refs we add in a loop below
+  // are obviously constant. All constant refs are grouped in the
+  // end of RefEdges vector, so we can use a single integer value
+  // to identify them.
+  unsigned RefCnt = RefEdges.size();
+  for (const Instruction *I : NonVolatileLoads) {
+    Visited.erase(I);
+    findRefEdges(Index, I, RefEdges, Visited);
+  }
+  std::vector<ValueInfo> Refs = RefEdges.takeVector();
+  // Regular LTO module doesn't participate in ThinLTO import,
+  // so no reference from it can be readonly, since this would
+  // require importing variable as local copy
+  if (IsThinLTO)
+    for (; RefCnt < Refs.size(); ++RefCnt)
+      Refs[RefCnt].setReadOnly();
+
   // Explicit add hot edges to enforce importing for designated GUIDs for
   // sample PGO, to enable the same inlines as the profiled optimized binary.
   for (auto &I : F.getImportGUIDs())
@@ -350,22 +385,18 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
 
   bool NonRenamableLocal = isNonRenamableLocal(F);
   bool NotEligibleForImport =
-      NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
-      // Inliner doesn't handle variadic functions.
-      // FIXME: refactor this to use the same code that inliner is using.
-      F.isVarArg() ||
-      // Don't try to import functions with noinline attribute.
-      F.getAttributes().hasFnAttribute(Attribute::NoInline);
+      NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
   GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
                                     /* Live = */ false, F.isDSOLocal());
   FunctionSummary::FFlags FunFlags{
       F.hasFnAttribute(Attribute::ReadNone),
       F.hasFnAttribute(Attribute::ReadOnly),
-      F.hasFnAttribute(Attribute::NoRecurse),
-      F.returnDoesNotAlias(),
-  };
+      F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
+      // FIXME: refactor this to use the same code that inliner is using.
+      // Don't try to import functions with noinline attribute.
+      F.getAttributes().hasFnAttribute(Attribute::NoInline)};
   auto FuncSummary = llvm::make_unique<FunctionSummary>(
-      Flags, NumInsts, FunFlags, RefEdges.takeVector(),
+      Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
       CallGraphEdges.takeVector(), TypeTests.takeVector(),
       TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
       TypeTestAssumeConstVCalls.takeVector(),
@@ -384,8 +415,13 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
   bool NonRenamableLocal = isNonRenamableLocal(V);
   GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
                                     /* Live = */ false, V.isDSOLocal());
-  auto GVarSummary =
-      llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+
+  // Don't mark variables we won't be able to internalize as read-only.
+  GlobalVarSummary::GVarFlags VarFlags(
+      !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
+      !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+  auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
+                                                         RefEdges.takeVector());
   if (NonRenamableLocal)
     CantBePromoted.insert(V.getGUID());
   if (HasBlockAddress)
@@ -421,7 +457,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
     ProfileSummaryInfo *PSI) {
   assert(PSI);
-  ModuleSummaryIndex Index(/*HaveGVs=*/true);
+  bool EnableSplitLTOUnit = false;
+  if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+          M.getModuleFlag("EnableSplitLTOUnit")))
+    EnableSplitLTOUnit = MD->getZExtValue();
+  ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
 
   // Identify the local values in the llvm.used and llvm.compiler.used sets,
   // which should not be exported as they would then require renaming and
@@ -473,13 +513,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
           if (Function *F = dyn_cast<Function>(GV)) {
             std::unique_ptr<FunctionSummary> Summary =
                 llvm::make_unique<FunctionSummary>(
-                    GVFlags, 0,
+                    GVFlags, /*InstCount=*/0,
                     FunctionSummary::FFlags{
                         F->hasFnAttribute(Attribute::ReadNone),
                         F->hasFnAttribute(Attribute::ReadOnly),
                         F->hasFnAttribute(Attribute::NoRecurse),
-                        F->returnDoesNotAlias()},
-                    ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+                        F->returnDoesNotAlias(),
+                        /* NoInline = */ false},
+                    /*EntryCount=*/0, ArrayRef<ValueInfo>{},
+                    ArrayRef<FunctionSummary::EdgeTy>{},
                     ArrayRef<GlobalValue::GUID>{},
                     ArrayRef<FunctionSummary::VFuncId>{},
                     ArrayRef<FunctionSummary::VFuncId>{},
@@ -488,33 +530,40 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
             Index.addGlobalValueSummary(*GV, std::move(Summary));
           } else {
             std::unique_ptr<GlobalVarSummary> Summary =
-                llvm::make_unique<GlobalVarSummary>(GVFlags,
-                                                    ArrayRef<ValueInfo>{});
+                llvm::make_unique<GlobalVarSummary>(
+                    GVFlags, GlobalVarSummary::GVarFlags(),
+                    ArrayRef<ValueInfo>{});
             Index.addGlobalValueSummary(*GV, std::move(Summary));
           }
         });
   }
 
+  bool IsThinLTO = true;
+  if (auto *MD =
+          mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
+    IsThinLTO = MD->getZExtValue();
+
   // Compute summaries for all functions defined in module, and save in the
   // index.
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
 
+    DominatorTree DT(const_cast<Function &>(F));
     BlockFrequencyInfo *BFI = nullptr;
     std::unique_ptr<BlockFrequencyInfo> BFIPtr;
     if (GetBFICallback)
       BFI = GetBFICallback(F);
     else if (F.hasProfileData()) {
-      LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
+      LoopInfo LI{DT};
       BranchProbabilityInfo BPI{F, LI};
       BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
       BFI = BFIPtr.get();
     }
 
-    computeFunctionSummary(Index, M, F, BFI, PSI,
+    computeFunctionSummary(Index, M, F, BFI, PSI, DT,
                            !LocalsUsed.empty() || HasLocalInlineAsmSymbol,
-                           CantBePromoted);
+                           CantBePromoted, IsThinLTO);
   }
 
   // Compute summaries for all variables defined in module, and save in the
@@ -545,11 +594,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
   setLiveRoot(Index, "llvm.global_dtors");
   setLiveRoot(Index, "llvm.global.annotations");
 
-  bool IsThinLTO = true;
-  if (auto *MD =
-          mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
-    IsThinLTO = MD->getZExtValue();
-
   for (auto &GlobalList : Index) {
     // Ignore entries for references that are undefined in the current module.
     if (GlobalList.second.SummaryList.empty())
@@ -619,7 +663,7 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
 }
 
 bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
-  auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   Index.emplace(buildModuleSummaryIndex(
       M,
       [this](const Function &F) {
@@ -627,7 +671,7 @@ bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
                          *const_cast<Function *>(&F))
                      .getBFI());
       },
-      &PSI));
+      PSI));
   return false;
 }
 
diff --git a/contrib/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm/lib/Analysis/MustExecute.cpp
index 8e85366b4618..180c38ddacc2 100644
--- a/contrib/llvm/lib/Analysis/MustExecute.cpp
+++ b/contrib/llvm/lib/Analysis/MustExecute.cpp
@@ -22,20 +22,32 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-/// Computes loop safety information, checks loop body & header
-/// for the possibility of may throw exception.
-///
-void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
+const DenseMap<BasicBlock *, ColorVector> &
+LoopSafetyInfo::getBlockColors() const {
+  return BlockColors;
+}
+
+void LoopSafetyInfo::copyColors(BasicBlock *New, BasicBlock *Old) {
+  ColorVector &ColorsForNewBlock = BlockColors[New];
+  ColorVector &ColorsForOldBlock = BlockColors[Old];
+  ColorsForNewBlock = ColorsForOldBlock;
+}
+
+bool SimpleLoopSafetyInfo::blockMayThrow(const BasicBlock *BB) const {
+  (void)BB;
+  return anyBlockMayThrow();
+}
+
+bool SimpleLoopSafetyInfo::anyBlockMayThrow() const {
+  return MayThrow;
+}
+
+void SimpleLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
   assert(CurLoop != nullptr && "CurLoop can't be null");
   BasicBlock *Header = CurLoop->getHeader();
-  // Setting default safety values.
-  SafetyInfo->MayThrow = false;
-  SafetyInfo->HeaderMayThrow = false;
   // Iterate over header and compute safety info.
-  SafetyInfo->HeaderMayThrow =
-    !isGuaranteedToTransferExecutionToSuccessor(Header);
-
-  SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
+  HeaderMayThrow = !isGuaranteedToTransferExecutionToSuccessor(Header);
+  MayThrow = HeaderMayThrow;
   // Iterate over loop instructions and compute safety info.
   // Skip header as it has been computed and stored in HeaderMayThrow.
   // The first block in loopinfo.Blocks is guaranteed to be the header.
@@ -43,23 +55,59 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
          "First block must be header");
   for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
                             BBE = CurLoop->block_end();
-       (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
-    SafetyInfo->MayThrow |=
-      !isGuaranteedToTransferExecutionToSuccessor(*BB);
+       (BB != BBE) && !MayThrow; ++BB)
+    MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(*BB);
+
+  computeBlockColors(CurLoop);
+}
 
+bool ICFLoopSafetyInfo::blockMayThrow(const BasicBlock *BB) const {
+  return ICF.hasICF(BB);
+}
+
+bool ICFLoopSafetyInfo::anyBlockMayThrow() const {
+  return MayThrow;
+}
+
+void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
+  assert(CurLoop != nullptr && "CurLoop can't be null");
+  ICF.clear();
+  MW.clear();
+  MayThrow = false;
+  // Figure out the fact that at least one block may throw.
+  for (auto &BB : CurLoop->blocks())
+    if (ICF.hasICF(&*BB)) {
+      MayThrow = true;
+      break;
+    }
+  computeBlockColors(CurLoop);
+}
+
+void ICFLoopSafetyInfo::insertInstructionTo(const Instruction *Inst,
+                                            const BasicBlock *BB) {
+  ICF.insertInstructionTo(Inst, BB);
+  MW.insertInstructionTo(Inst, BB);
+}
+
+void ICFLoopSafetyInfo::removeInstruction(const Instruction *Inst) {
+  ICF.removeInstruction(Inst);
+  MW.removeInstruction(Inst);
+}
+
+void LoopSafetyInfo::computeBlockColors(const Loop *CurLoop) {
   // Compute funclet colors if we might sink/hoist in a function with a funclet
   // personality routine.
   Function *Fn = CurLoop->getHeader()->getParent();
   if (Fn->hasPersonalityFn())
     if (Constant *PersonalityFn = Fn->getPersonalityFn())
       if (isScopedEHPersonality(classifyEHPersonality(PersonalityFn)))
-        SafetyInfo->BlockColors = colorEHFunclets(*Fn);
+        BlockColors = colorEHFunclets(*Fn);
 }
 
 /// Return true if we can prove that the given ExitBlock is not reached on the
 /// first iteration of the given loop.  That is, the backedge of the loop must
 /// be executed before the ExitBlock is executed in any dynamic execution trace.
-static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
+static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop) {
   auto *CondExitBlock = ExitBlock->getSinglePredecessor();
@@ -99,15 +147,94 @@ static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
   return SimpleCst->isAllOnesValue();
 }
 
+/// Collect all blocks from \p CurLoop which lie on all possible paths from
+/// the header of \p CurLoop (inclusive) to BB (exclusive) into the set
+/// \p Predecessors. If \p BB is the header, \p Predecessors will be empty.
+static void collectTransitivePredecessors(
+    const Loop *CurLoop, const BasicBlock *BB,
+    SmallPtrSetImpl<const BasicBlock *> &Predecessors) {
+  assert(Predecessors.empty() && "Garbage in predecessors set?");
+  assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+  if (BB == CurLoop->getHeader())
+    return;
+  SmallVector<const BasicBlock *, 4> WorkList;
+  for (auto *Pred : predecessors(BB)) {
+    Predecessors.insert(Pred);
+    WorkList.push_back(Pred);
+  }
+  while (!WorkList.empty()) {
+    auto *Pred = WorkList.pop_back_val();
+    assert(CurLoop->contains(Pred) && "Should only reach loop blocks!");
+    // We are not interested in backedges and we don't want to leave loop.
+    if (Pred == CurLoop->getHeader())
+      continue;
+    // TODO: If BB lies in an inner loop of CurLoop, this will traverse over all
+    // blocks of this inner loop, even those that are always executed AFTER the
+    // BB. It may make our analysis more conservative than it could be, see test
+    // @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll.
+    // We can ignore backedge of all loops containing BB to get a sligtly more
+    // optimistic result.
+    for (auto *PredPred : predecessors(Pred))
+      if (Predecessors.insert(PredPred).second)
+        WorkList.push_back(PredPred);
+  }
+}
+
+bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
+                                             const BasicBlock *BB,
+                                             const DominatorTree *DT) const {
+  assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+
+  // Fast path: header is always reached once the loop is entered.
+  if (BB == CurLoop->getHeader())
+    return true;
+
+  // Collect all transitive predecessors of BB in the same loop. This set will
+  // be a subset of the blocks within the loop.
+  SmallPtrSet<const BasicBlock *, 4> Predecessors;
+  collectTransitivePredecessors(CurLoop, BB, Predecessors);
+
+  // Make sure that all successors of all predecessors of BB are either:
+  // 1) BB,
+  // 2) Also predecessors of BB,
+  // 3) Exit blocks which are not taken on 1st iteration.
+  // Memoize blocks we've already checked.
+  SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors;
+  for (auto *Pred : Predecessors) {
+    // Predecessor block may throw, so it has a side exit.
+    if (blockMayThrow(Pred))
+      return false;
+    for (auto *Succ : successors(Pred))
+      if (CheckedSuccessors.insert(Succ).second &&
+          Succ != BB && !Predecessors.count(Succ))
+        // By discharging conditions that are not executed on the 1st iteration,
+        // we guarantee that *at least* on the first iteration all paths from
+        // header that *may* execute will lead us to the block of interest. So
+        // that if we had virtually peeled one iteration away, in this peeled
+        // iteration the set of predecessors would contain only paths from
+        // header to BB without any exiting edges that may execute.
+        //
+        // TODO: We only do it for exiting edges currently. We could use the
+        // same function to skip some of the edges within the loop if we know
+        // that they will not be taken on the 1st iteration.
+        //
+        // TODO: If we somehow know the number of iterations in loop, the same
+        // check may be done for any arbitrary N-th iteration as long as N is
+        // not greater than minimum number of iterations in this loop.
+        if (CurLoop->contains(Succ) ||
+            !CanProveNotTakenFirstIteration(Succ, DT, CurLoop))
+          return false;
+  }
+
+  // All predecessors can only lead us to BB.
+  return true;
+}
+
 /// Returns true if the instruction in a loop is guaranteed to execute at least
 /// once.
-bool llvm::isGuaranteedToExecute(const Instruction &Inst,
-                                 const DominatorTree *DT, const Loop *CurLoop,
-                                 const LoopSafetyInfo *SafetyInfo) {
-  // We have to check to make sure that the instruction dominates all
-  // of the exit blocks.  If it doesn't, then there is a path out of the loop
-  // which does not execute this instruction, so we can't hoist it.
-
+bool SimpleLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst,
+                                                 const DominatorTree *DT,
+                                                 const Loop *CurLoop) const {
   // If the instruction is in the header block for the loop (which is very
   // common), it is always guaranteed to dominate the exit blocks.  Since this
   // is a common case, and can save some work, check it now.
@@ -116,52 +243,48 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
     // Inst unless we can prove that Inst comes before the potential implicit
     // exit.  At the moment, we use a (cheap) hack for the common case where
     // the instruction of interest is the first one in the block.
-    return !SafetyInfo->HeaderMayThrow ||
-      Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
+    return !HeaderMayThrow ||
+           Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
 
-  // Somewhere in this loop there is an instruction which may throw and make us
-  // exit the loop.
-  if (SafetyInfo->MayThrow)
-    return false;
+  // If there is a path from header to exit or latch that doesn't lead to our
+  // instruction's block, return false.
+  return allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
+}
 
-  // Note: There are two styles of reasoning intermixed below for
-  // implementation efficiency reasons.  They are:
-  // 1) If we can prove that the instruction dominates all exit blocks, then we
-  // know the instruction must have executed on *some* iteration before we
-  // exit.  We do not prove *which* iteration the instruction must execute on.
-  // 2) If we can prove that the instruction dominates the latch and all exits
-  // which might be taken on the first iteration, we know the instruction must
-  // execute on the first iteration.  This second style allows a conditional
-  // exit before the instruction of interest which is provably not taken on the
-  // first iteration.  This is a quite common case for range check like
-  // patterns.  TODO: support loops with multiple latches.
-
-  const bool InstDominatesLatch =
-    CurLoop->getLoopLatch() != nullptr &&
-    DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
-
-  // Get the exit blocks for the current loop.
-  SmallVector<BasicBlock *, 8> ExitBlocks;
-  CurLoop->getExitBlocks(ExitBlocks);
-
-  // Verify that the block dominates each of the exit blocks of the loop.
-  for (BasicBlock *ExitBlock : ExitBlocks)
-    if (!DT->dominates(Inst.getParent(), ExitBlock))
-      if (!InstDominatesLatch ||
-          !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
-        return false;
-
-  // As a degenerate case, if the loop is statically infinite then we haven't
-  // proven anything since there are no exit blocks.
-  if (ExitBlocks.empty())
-    return false;
+bool ICFLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst,
+                                              const DominatorTree *DT,
+                                              const Loop *CurLoop) const {
+  return !ICF.isDominatedByICFIFromSameBlock(&Inst) &&
+         allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
+}
+
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB,
+                                                 const Loop *CurLoop) const {
+  assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
 
-  // FIXME: In general, we have to prove that the loop isn't an infinite loop.
-  // See http::llvm.org/PR24078 .  (The "ExitBlocks.empty()" check above is
-  // just a special case of this.)
+  // Fast path: there are no instructions before header.
+  if (BB == CurLoop->getHeader())
+    return true;
+
+  // Collect all transitive predecessors of BB in the same loop. This set will
+  // be a subset of the blocks within the loop.
+  SmallPtrSet<const BasicBlock *, 4> Predecessors;
+  collectTransitivePredecessors(CurLoop, BB, Predecessors);
+  // Find if there any instruction in either predecessor that could write
+  // to memory.
+  for (auto *Pred : Predecessors)
+    if (MW.mayWriteToMemory(Pred))
+      return false;
   return true;
 }
 
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I,
+                                                 const Loop *CurLoop) const {
+  auto *BB = I.getParent();
+  assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+  return !MW.isDominatedByMemoryWriteFromSameBlock(&I) &&
+         doesNotWriteMemoryBefore(BB, CurLoop);
+}
 
 namespace {
   struct MustExecutePrinter : public FunctionPass {
@@ -195,9 +318,9 @@ static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
   // TODO: merge these two routines.  For the moment, we display the best
   // result obtained by *either* implementation.  This is a bit unfair since no
   // caller actually gets the full power at the moment.
-  LoopSafetyInfo LSI;
-  computeLoopSafetyInfo(&LSI, L);
-  return isGuaranteedToExecute(I, DT, L, &LSI) ||
+  SimpleLoopSafetyInfo LSI;
+  LSI.computeLoopSafetyInfo(L);
+  return LSI.isGuaranteedToExecute(I, DT, L) ||
     isGuaranteedToExecuteForEveryIteration(&I, L);
 }
 
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 096ea661ecb6..95ae1a6e744f 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -106,12 +106,12 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
   return AAResultBase::getModRefBehavior(F);
 }
 
-ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
                                           const MemoryLocation &Loc) {
   if (!EnableARCOpts)
-    return AAResultBase::getModRefInfo(CS, Loc);
+    return AAResultBase::getModRefInfo(Call, Loc);
 
-  switch (GetBasicARCInstKind(CS.getInstruction())) {
+  switch (GetBasicARCInstKind(Call)) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
   case ARCInstKind::Autorelease:
@@ -128,7 +128,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
     break;
   }
 
-  return AAResultBase::getModRefInfo(CS, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc);
 }
 
 ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
index f268e2a9abdd..31c432711834 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -85,97 +85,73 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
 }
 
 ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
-  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
 
-  // No (mandatory) arguments.
-  if (AI == AE)
-    return StringSwitch<ARCInstKind>(F->getName())
-        .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
-        .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
-        .Default(ARCInstKind::CallOrUser);
-
-  // One argument.
-  const Argument *A0 = &*AI++;
-  if (AI == AE) {
-    // Argument is a pointer.
-    PointerType *PTy = dyn_cast<PointerType>(A0->getType());
-    if (!PTy)
-      return ARCInstKind::CallOrUser;
-
-    Type *ETy = PTy->getElementType();
-    // Argument is i8*.
-    if (ETy->isIntegerTy(8))
-      return StringSwitch<ARCInstKind>(F->getName())
-          .Case("objc_retain", ARCInstKind::Retain)
-          .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
-          .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV)
-          .Case("objc_retainBlock", ARCInstKind::RetainBlock)
-          .Case("objc_release", ARCInstKind::Release)
-          .Case("objc_autorelease", ARCInstKind::Autorelease)
-          .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
-          .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
-          .Case("objc_retainedObject", ARCInstKind::NoopCast)
-          .Case("objc_unretainedObject", ARCInstKind::NoopCast)
-          .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
-          .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease)
-          .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
-          .Case("objc_retainAutoreleaseReturnValue",
-                ARCInstKind::FusedRetainAutoreleaseRV)
-          .Case("objc_sync_enter", ARCInstKind::User)
-          .Case("objc_sync_exit", ARCInstKind::User)
-          .Default(ARCInstKind::CallOrUser);
-
-    // Argument is i8**
-    if (PointerType *Pte = dyn_cast<PointerType>(ETy))
-      if (Pte->getElementType()->isIntegerTy(8))
-        return StringSwitch<ARCInstKind>(F->getName())
-            .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
-            .Case("objc_loadWeak", ARCInstKind::LoadWeak)
-            .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
-            .Default(ARCInstKind::CallOrUser);
-
-    // Anything else with one argument.
+  Intrinsic::ID ID = F->getIntrinsicID();
+  switch (ID) {
+  default:
     return ARCInstKind::CallOrUser;
+  case Intrinsic::objc_autorelease:
+    return ARCInstKind::Autorelease;
+  case Intrinsic::objc_autoreleasePoolPop:
+    return ARCInstKind::AutoreleasepoolPop;
+  case Intrinsic::objc_autoreleasePoolPush:
+    return ARCInstKind::AutoreleasepoolPush;
+  case Intrinsic::objc_autoreleaseReturnValue:
+    return ARCInstKind::AutoreleaseRV;
+  case Intrinsic::objc_copyWeak:
+    return ARCInstKind::CopyWeak;
+  case Intrinsic::objc_destroyWeak:
+    return ARCInstKind::DestroyWeak;
+  case Intrinsic::objc_initWeak:
+    return ARCInstKind::InitWeak;
+  case Intrinsic::objc_loadWeak:
+    return ARCInstKind::LoadWeak;
+  case Intrinsic::objc_loadWeakRetained:
+    return ARCInstKind::LoadWeakRetained;
+  case Intrinsic::objc_moveWeak:
+    return ARCInstKind::MoveWeak;
+  case Intrinsic::objc_release:
+    return ARCInstKind::Release;
+  case Intrinsic::objc_retain:
+    return ARCInstKind::Retain;
+  case Intrinsic::objc_retainAutorelease:
+    return ARCInstKind::FusedRetainAutorelease;
+  case Intrinsic::objc_retainAutoreleaseReturnValue:
+    return ARCInstKind::FusedRetainAutoreleaseRV;
+  case Intrinsic::objc_retainAutoreleasedReturnValue:
+    return ARCInstKind::RetainRV;
+  case Intrinsic::objc_retainBlock:
+    return ARCInstKind::RetainBlock;
+  case Intrinsic::objc_storeStrong:
+    return ARCInstKind::StoreStrong;
+  case Intrinsic::objc_storeWeak:
+    return ARCInstKind::StoreWeak;
+  case Intrinsic::objc_clang_arc_use:
+    return ARCInstKind::IntrinsicUser;
+  case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+    return ARCInstKind::ClaimRV;
+  case Intrinsic::objc_retainedObject:
+    return ARCInstKind::NoopCast;
+  case Intrinsic::objc_unretainedObject:
+    return ARCInstKind::NoopCast;
+  case Intrinsic::objc_unretainedPointer:
+    return ARCInstKind::NoopCast;
+  case Intrinsic::objc_retain_autorelease:
+    return ARCInstKind::FusedRetainAutorelease;
+  case Intrinsic::objc_sync_enter:
+    return ARCInstKind::User;
+  case Intrinsic::objc_sync_exit:
+    return ARCInstKind::User;
+  case Intrinsic::objc_arc_annotation_topdown_bbstart:
+  case Intrinsic::objc_arc_annotation_topdown_bbend:
+  case Intrinsic::objc_arc_annotation_bottomup_bbstart:
+  case Intrinsic::objc_arc_annotation_bottomup_bbend:
+    // Ignore annotation calls. This is important to stop the
+    // optimizer from treating annotations as uses which would
+    // make the state of the pointers they are attempting to
+    // elucidate to be incorrect.
+    return ARCInstKind::None;
   }
-
-  // Two arguments, first is i8**.
-  const Argument *A1 = &*AI++;
-  if (AI == AE)
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
-      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
-        if (Pte->getElementType()->isIntegerTy(8))
-          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
-            Type *ETy1 = PTy1->getElementType();
-            // Second argument is i8*
-            if (ETy1->isIntegerTy(8))
-              return StringSwitch<ARCInstKind>(F->getName())
-                  .Case("objc_storeWeak", ARCInstKind::StoreWeak)
-                  .Case("objc_initWeak", ARCInstKind::InitWeak)
-                  .Case("objc_storeStrong", ARCInstKind::StoreStrong)
-                  .Default(ARCInstKind::CallOrUser);
-            // Second argument is i8**.
-            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
-              if (Pte1->getElementType()->isIntegerTy(8))
-                return StringSwitch<ARCInstKind>(F->getName())
-                    .Case("objc_moveWeak", ARCInstKind::MoveWeak)
-                    .Case("objc_copyWeak", ARCInstKind::CopyWeak)
-                    // Ignore annotation calls. This is important to stop the
-                    // optimizer from treating annotations as uses which would
-                    // make the state of the pointers they are attempting to
-                    // elucidate to be incorrect.
-                    .Case("llvm.arc.annotation.topdown.bbstart",
-                          ARCInstKind::None)
-                    .Case("llvm.arc.annotation.topdown.bbend",
-                          ARCInstKind::None)
-                    .Case("llvm.arc.annotation.bottomup.bbstart",
-                          ARCInstKind::None)
-                    .Case("llvm.arc.annotation.bottomup.bbend",
-                          ARCInstKind::None)
-                    .Default(ARCInstKind::CallOrUser);
-          }
-
-  // Anything else.
-  return ARCInstKind::CallOrUser;
 }
 
 // A whitelist of intrinsics that we know do not use objc pointers or decrement
diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
index 6c47651eae9e..5f4fe0f7dda2 100644
--- a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
+++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp
@@ -37,6 +37,8 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
   const Instruction *Inst = nullptr;
   assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
          "Instruction supposed to be in NumberedInsts");
+  assert(A->getParent() == BB && "Instruction supposed to be in the block!");
+  assert(B->getParent() == BB && "Instruction supposed to be in the block!");
 
   // Start the search with the instruction found in the last lookup round.
   auto II = BB->begin();
@@ -65,6 +67,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
 bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
   assert(A->getParent() == B->getParent() &&
          "Instructions must be in the same basic block!");
+  assert(A->getParent() == BB && "Instructions must be in the tracked block!");
 
   // First we lookup the instructions. If they don't exist, lookup will give us
   // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Analysis/OrderedInstructions.cpp
index 6d0b96f6aa8a..7b155208c02e 100644
--- a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp
+++ b/contrib/llvm/lib/Analysis/OrderedInstructions.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
+#include "llvm/Analysis/OrderedInstructions.h"
 using namespace llvm;
 
 bool OrderedInstructions::localDominates(const Instruction *InstA,
diff --git a/contrib/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm/lib/Analysis/PhiValues.cpp
index ef121815d2cf..729227c86697 100644
--- a/contrib/llvm/lib/Analysis/PhiValues.cpp
+++ b/contrib/llvm/lib/Analysis/PhiValues.cpp
@@ -14,6 +14,16 @@
 
 using namespace llvm;
 
+void PhiValues::PhiValuesCallbackVH::deleted() {
+  PV->invalidateValue(getValPtr());
+}
+
+void PhiValues::PhiValuesCallbackVH::allUsesReplacedWith(Value *) {
+  // We could potentially update the cached values we have with the new value,
+  // but it's simpler to just treat the old value as invalidated.
+  PV->invalidateValue(getValPtr());
+}
+
 bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA,
                            FunctionAnalysisManager::Invalidator &) {
   // PhiValues is invalidated if it isn't preserved.
@@ -46,6 +56,7 @@ void PhiValues::processPhi(const PHINode *Phi,
   DepthMap[Phi] = DepthNumber;
 
   // Recursively process the incoming phis of this phi.
+  TrackedValues.insert(PhiValuesCallbackVH(const_cast<PHINode *>(Phi), this));
   for (Value *PhiOp : Phi->incoming_values()) {
     if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) {
       // Recurse if the phi has not yet been visited.
@@ -56,6 +67,8 @@ void PhiValues::processPhi(const PHINode *Phi,
       // phi are part of the same component, so adjust the depth number.
       if (!ReachableMap.count(DepthMap[PhiPhiOp]))
         DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]);
+    } else {
+      TrackedValues.insert(PhiValuesCallbackVH(PhiOp, this));
     }
   }
 
@@ -122,6 +135,10 @@ void PhiValues::invalidateValue(const Value *V) {
     NonPhiReachableMap.erase(N);
     ReachableMap.erase(N);
   }
+  // This value is no longer tracked
+  auto It = TrackedValues.find_as(V);
+  if (It != TrackedValues.end())
+    TrackedValues.erase(It);
 }
 
 void PhiValues::releaseMemory() {
diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index fb591f5d6a69..1d70c75f2e1c 100644
--- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoffCold(
     cl::desc("A count is cold if it is below the minimum count"
              " to reach this percentile of total counts."));
 
-static cl::opt<bool> ProfileSampleAccurate(
-    "profile-sample-accurate", cl::Hidden, cl::init(false),
-    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
-             "callsite as cold. Otherwise, treat un-sampled callsites as if "
-             "we have no profile."));
 static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
     "profile-summary-huge-working-set-size-threshold", cl::Hidden,
     cl::init(15000), cl::ZeroOrMore,
@@ -51,6 +46,18 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
              " blocks required to reach the -profile-summary-cutoff-hot"
              " percentile exceeds this count."));
 
+// The next two options override the counts derived from summary computation and
+// are useful for debugging purposes.
+static cl::opt<int> ProfileSummaryHotCount(
+    "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
+    cl::desc("A fixed hot count that overrides the count derived from"
+             " profile-summary-cutoff-hot"));
+
+static cl::opt<int> ProfileSummaryColdCount(
+    "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
+    cl::desc("A fixed cold count that overrides the count derived from"
+             " profile-summary-cutoff-cold"));
+
 // Find the summary entry for a desired percentile of counts.
 static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
                                                         uint64_t Percentile) {
@@ -139,7 +146,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
       return true;
   }
   for (const auto &BB : *F)
-    if (isHotBB(&BB, &BFI))
+    if (isHotBlock(&BB, &BFI))
       return true;
   return false;
 }
@@ -168,7 +175,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
       return false;
   }
   for (const auto &BB : *F)
-    if (!isColdBB(&BB, &BFI))
+    if (!isColdBlock(&BB, &BFI))
       return false;
   return true;
 }
@@ -198,9 +205,15 @@ void ProfileSummaryInfo::computeThresholds() {
   auto &HotEntry =
       getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot);
   HotCountThreshold = HotEntry.MinCount;
+  if (ProfileSummaryHotCount.getNumOccurrences() > 0)
+    HotCountThreshold = ProfileSummaryHotCount;
   auto &ColdEntry =
       getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold);
   ColdCountThreshold = ColdEntry.MinCount;
+  if (ProfileSummaryColdCount.getNumOccurrences() > 0)
+    ColdCountThreshold = ProfileSummaryColdCount;
+  assert(ColdCountThreshold <= HotCountThreshold &&
+         "Cold count threshold cannot exceed hot count threshold!");
   HasHugeWorkingSetSize =
       HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
 }
@@ -226,23 +239,23 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
 uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
   if (!HotCountThreshold)
     computeThresholds();
-  return HotCountThreshold && HotCountThreshold.getValue();
+  return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX;
 }
 
 uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() {
   if (!ColdCountThreshold)
     computeThresholds();
-  return ColdCountThreshold && ColdCountThreshold.getValue();
+  return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
 }
 
-bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
-  auto Count = BFI->getBlockProfileCount(B);
+bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) {
+  auto Count = BFI->getBlockProfileCount(BB);
   return Count && isHotCount(*Count);
 }
 
-bool ProfileSummaryInfo::isColdBB(const BasicBlock *B,
+bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
                                   BlockFrequencyInfo *BFI) {
-  auto Count = BFI->getBlockProfileCount(B);
+  auto Count = BFI->getBlockProfileCount(BB);
   return Count && isColdCount(*Count);
 }
 
@@ -260,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
 
   // In SamplePGO, if the caller has been sampled, and there is no profile
   // annotated on the callsite, we consider the callsite as cold.
-  // If there is no profile for the caller, and we know the profile is
-  // accurate, we consider the callsite as cold.
-  return (hasSampleProfile() &&
-          (CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
-           CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
+  return hasSampleProfile() && CS.getCaller()->hasProfileData();
 }
 
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index ed17df2e7e93..a101ff109199 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/IR/OptBisect.h"
+#include "llvm/IR/PassTimingInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 0e715b8814ff..e5134f2eeda9 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -112,6 +112,7 @@
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -162,6 +163,11 @@ static cl::opt<bool>
                   cl::desc("Verify no dangling value in ScalarEvolution's "
                            "ExprValueMap (slow)"));
 
+static cl::opt<bool> VerifyIR(
+    "scev-verify-ir", cl::Hidden,
+    cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
+    cl::init(false));
+
 static cl::opt<unsigned> MulOpsInlineThreshold(
     "scev-mulops-inline-threshold", cl::Hidden,
     cl::desc("Threshold for inlining multiplication operands into a SCEV"),
@@ -204,7 +210,7 @@ static cl::opt<unsigned>
 static cl::opt<unsigned>
     MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
                   cl::desc("Max coefficients in AddRec during evolving"),
-                  cl::init(16));
+                  cl::init(8));
 
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
@@ -692,10 +698,6 @@ static int CompareSCEVComplexity(
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
-    // Compare NoWrap flags.
-    if (LA->getNoWrapFlags() != RA->getNoWrapFlags())
-      return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags();
-
     // Lexicographically compare.
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
@@ -720,10 +722,6 @@ static int CompareSCEVComplexity(
     if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
 
-    // Compare NoWrap flags.
-    if (LC->getNoWrapFlags() != RC->getNoWrapFlags())
-      return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags();
-
     for (unsigned i = 0; i != LNumOps; ++i) {
       int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
                                     LC->getOperand(i), RC->getOperand(i), DT,
@@ -2767,6 +2765,29 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 }
 
 const SCEV *
+ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                       const Loop *L, SCEV::NoWrapFlags Flags) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddRecExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  ID.AddPointer(L);
+  void *IP = nullptr;
+  SCEVAddRecExpr *S =
+      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator)
+        SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
+    UniqueSCEVs.InsertNode(S, IP);
+    addToLoopUseLists(S);
+  }
+  S->setNoWrapFlags(Flags);
+  return S;
+}
+
+const SCEV *
 ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                     SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
@@ -3045,7 +3066,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       SmallVector<const SCEV*, 7> AddRecOps;
       for (int x = 0, xe = AddRec->getNumOperands() +
              OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
-        const SCEV *Term = getZero(Ty);
+        SmallVector <const SCEV *, 7> SumOps;
         for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
           uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
           for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
@@ -3060,12 +3081,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
             const SCEV *CoeffTerm = getConstant(Ty, Coeff);
             const SCEV *Term1 = AddRec->getOperand(y-z);
             const SCEV *Term2 = OtherAddRec->getOperand(z);
-            Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,
-                                               SCEV::FlagAnyWrap, Depth + 1),
-                              SCEV::FlagAnyWrap, Depth + 1);
+            SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
+                                        SCEV::FlagAnyWrap, Depth + 1));
           }
         }
-        AddRecOps.push_back(Term);
+        if (SumOps.empty())
+          SumOps.push_back(getZero(Ty));
+        AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
       }
       if (!Overflow) {
         const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
@@ -3416,24 +3438,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
 
   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
   // already have one, otherwise create a new one.
-  FoldingSetNodeID ID;
-  ID.AddInteger(scAddRecExpr);
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-    ID.AddPointer(Operands[i]);
-  ID.AddPointer(L);
-  void *IP = nullptr;
-  SCEVAddRecExpr *S =
-    static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-  if (!S) {
-    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
-    std::uninitialized_copy(Operands.begin(), Operands.end(), O);
-    S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
-                                           O, Operands.size(), L);
-    UniqueSCEVs.InsertNode(S, IP);
-    addToLoopUseLists(S);
-  }
-  S->setNoWrapFlags(Flags);
-  return S;
+  return getOrCreateAddRecExpr(Operands, L, Flags);
 }
 
 const SCEV *
@@ -7080,7 +7085,7 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
     return getCouldNotCompute();
 
   bool IsOnlyExit = (L->getExitingBlock() != nullptr);
-  TerminatorInst *Term = ExitingBlock->getTerminator();
+  Instruction *Term = ExitingBlock->getTerminator();
   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
     assert(BI->isConditional() && "If unconditional, it can't be in loop!");
     bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
@@ -8344,69 +8349,273 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
   return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
 }
 
-/// Find the roots of the quadratic equation for the given quadratic chrec
-/// {L,+,M,+,N}.  This returns either the two roots (which might be the same) or
-/// two SCEVCouldNotCompute objects.
-static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>>
-SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+/// For a given quadratic addrec, generate coefficients of the corresponding
+/// quadratic equation, multiplied by a common value to ensure that they are
+/// integers.
+/// The returned value is a tuple { A, B, C, M, BitWidth }, where
+/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
+/// were multiplied by, and BitWidth is the bit width of the original addrec
+/// coefficients.
+/// This function returns None if the addrec coefficients are not compile-
+/// time constants.
+static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
+GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
   const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
   const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+  LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
+                    << *AddRec << '\n');
 
   // We currently can only solve this if the coefficients are constants.
-  if (!LC || !MC || !NC)
+  if (!LC || !MC || !NC) {
+    LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
     return None;
+  }
 
-  uint32_t BitWidth = LC->getAPInt().getBitWidth();
-  const APInt &L = LC->getAPInt();
-  const APInt &M = MC->getAPInt();
-  const APInt &N = NC->getAPInt();
-  APInt Two(BitWidth, 2);
-
-  // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+  APInt L = LC->getAPInt();
+  APInt M = MC->getAPInt();
+  APInt N = NC->getAPInt();
+  assert(!N.isNullValue() && "This is not a quadratic addrec");
+
+  unsigned BitWidth = LC->getAPInt().getBitWidth();
+  unsigned NewWidth = BitWidth + 1;
+  LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
+                    << BitWidth << '\n');
+  // The sign-extension (as opposed to a zero-extension) here matches the
+  // extension used in SolveQuadraticEquationWrap (with the same motivation).
+  N = N.sext(NewWidth);
+  M = M.sext(NewWidth);
+  L = L.sext(NewWidth);
+
+  // The increments are M, M+N, M+2N, ..., so the accumulated values are
+  //   L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
+  //   L+M, L+2M+N, L+3M+3N, ...
+  // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
+  //
+  // The equation Acc = 0 is then
+  //   L + nM + n(n-1)/2 N = 0,  or  2L + 2M n + n(n-1) N = 0.
+  // In a quadratic form it becomes:
+  //   N n^2 + (2M-N) n + 2L = 0.
+
+  APInt A = N;
+  APInt B = 2 * M - A;
+  APInt C = 2 * L;
+  APInt T = APInt(NewWidth, 2);
+  LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
+                    << "x + " << C << ", coeff bw: " << NewWidth
+                    << ", multiplied by " << T << '\n');
+  return std::make_tuple(A, B, C, T, BitWidth);
+}
+
+/// Helper function to compare optional APInts:
+/// (a) if X and Y both exist, return min(X, Y),
+/// (b) if neither X nor Y exist, return None,
+/// (c) if exactly one of X and Y exists, return that value.
+static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
+  if (X.hasValue() && Y.hasValue()) {
+    unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
+    APInt XW = X->sextOrSelf(W);
+    APInt YW = Y->sextOrSelf(W);
+    return XW.slt(YW) ? *X : *Y;
+  }
+  if (!X.hasValue() && !Y.hasValue())
+    return None;
+  return X.hasValue() ? *X : *Y;
+}
 
-  // The A coefficient is N/2
-  APInt A = N.sdiv(Two);
+/// Helper function to truncate an optional APInt to a given BitWidth.
+/// When solving addrec-related equations, it is preferable to return a value
+/// that has the same bit width as the original addrec's coefficients. If the
+/// solution fits in the original bit width, truncate it (except for i1).
+/// Returning a value of a different bit width may inhibit some optimizations.
+///
+/// In general, a solution to a quadratic equation generated from an addrec
+/// may require BW+1 bits, where BW is the bit width of the addrec's
+/// coefficients. The reason is that the coefficients of the quadratic
+/// equation are BW+1 bits wide (to avoid truncation when converting from
+/// the addrec to the equation).
+static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
+  if (!X.hasValue())
+    return None;
+  unsigned W = X->getBitWidth();
+  if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
+    return X->trunc(BitWidth);
+  return X;
+}
+
+/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
+/// iterations. The values L, M, N are assumed to be signed, and they
+/// should all have the same bit widths.
+/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
+/// where BW is the bit width of the addrec's coefficients.
+/// If the calculated value is a BW-bit integer (for BW > 1), it will be
+/// returned as such, otherwise the bit width of the returned value may
+/// be greater than BW.
+///
+/// This function returns None if
+/// (a) the addrec coefficients are not constant, or
+/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
+///     like x^2 = 5, no integer solutions exist, in other cases an integer
+///     solution may exist, but SolveQuadraticEquationWrap may fail to find it.
+static Optional<APInt>
+SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+  APInt A, B, C, M;
+  unsigned BitWidth;
+  auto T = GetQuadraticEquation(AddRec);
+  if (!T.hasValue())
+    return None;
 
-  // The B coefficient is M-N/2
-  APInt B = M;
-  B -= A; // A is the same as N/2.
+  std::tie(A, B, C, M, BitWidth) = *T;
+  LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
+  Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
+  if (!X.hasValue())
+    return None;
 
-  // The C coefficient is L.
-  const APInt& C = L;
+  ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
+  ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
+  if (!V->isZero())
+    return None;
 
-  // Compute the B^2-4ac term.
-  APInt SqrtTerm = B;
-  SqrtTerm *= B;
-  SqrtTerm -= 4 * (A * C);
+  return TruncIfPossible(X, BitWidth);
+}
 
-  if (SqrtTerm.isNegative()) {
-    // The loop is provably infinite.
+/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
+/// iterations. The values M, N are assumed to be signed, and they
+/// should all have the same bit widths.
+/// Find the least n such that c(n) does not belong to the given range,
+/// while c(n-1) does.
+///
+/// This function returns None if
+/// (a) the addrec coefficients are not constant, or
+/// (b) SolveQuadraticEquationWrap was unable to find a solution for the
+///     bounds of the range.
+static Optional<APInt>
+SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
+                          const ConstantRange &Range, ScalarEvolution &SE) {
+  assert(AddRec->getOperand(0)->isZero() &&
+         "Starting value of addrec should be 0");
+  LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
+                    << Range << ", addrec " << *AddRec << '\n');
+  // This case is handled in getNumIterationsInRange. Here we can assume that
+  // we start in the range.
+  assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
+         "Addrec's initial value should be in range");
+
+  APInt A, B, C, M;
+  unsigned BitWidth;
+  auto T = GetQuadraticEquation(AddRec);
+  if (!T.hasValue())
     return None;
-  }
 
-  // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
-  // integer value or else APInt::sqrt() will assert.
-  APInt SqrtVal = SqrtTerm.sqrt();
+  // Be careful about the return value: there can be two reasons for not
+  // returning an actual number. First, if no solutions to the equations
+  // were found, and second, if the solutions don't leave the given range.
+  // The first case means that the actual solution is "unknown", the second
+  // means that it's known, but not valid. If the solution is unknown, we
+  // cannot make any conclusions.
+  // Return a pair: the optional solution and a flag indicating if the
+  // solution was found.
+  auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
+    // Solve for signed overflow and unsigned overflow, pick the lower
+    // solution.
+    LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
+                      << Bound << " (before multiplying by " << M << ")\n");
+    Bound *= M; // The quadratic equation multiplier.
+
+    Optional<APInt> SO = None;
+    if (BitWidth > 1) {
+      LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
+                           "signed overflow\n");
+      SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
+    }
+    LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
+                         "unsigned overflow\n");
+    Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
+                                                              BitWidth+1);
+
+    auto LeavesRange = [&] (const APInt &X) {
+      ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
+      ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
+      if (Range.contains(V0->getValue()))
+        return false;
+      // X should be at least 1, so X-1 is non-negative.
+      ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
+      ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
+      if (Range.contains(V1->getValue()))
+        return true;
+      return false;
+    };
 
-  // Compute the two solutions for the quadratic formula.
-  // The divisions must be performed as signed divisions.
-  APInt NegB = -std::move(B);
-  APInt TwoA = std::move(A);
-  TwoA <<= 1;
-  if (TwoA.isNullValue())
-    return None;
+    // If SolveQuadraticEquationWrap returns None, it means that there can
+    // be a solution, but the function failed to find it. We cannot treat it
+    // as "no solution".
+    if (!SO.hasValue() || !UO.hasValue())
+      return { None, false };
+
+    // Check the smaller value first to see if it leaves the range.
+    // At this point, both SO and UO must have values.
+    Optional<APInt> Min = MinOptional(SO, UO);
+    if (LeavesRange(*Min))
+      return { Min, true };
+    Optional<APInt> Max = Min == SO ? UO : SO;
+    if (LeavesRange(*Max))
+      return { Max, true };
+
+    // Solutions were found, but were eliminated, hence the "true".
+    return { None, true };
+  };
 
-  LLVMContext &Context = SE.getContext();
+  std::tie(A, B, C, M, BitWidth) = *T;
+  // Lower bound is inclusive, subtract 1 to represent the exiting value.
+  APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
+  APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
+  auto SL = SolveForBoundary(Lower);
+  auto SU = SolveForBoundary(Upper);
+  // If any of the solutions was unknown, no meaninigful conclusions can
+  // be made.
+  if (!SL.second || !SU.second)
+    return None;
 
-  ConstantInt *Solution1 =
-    ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
-  ConstantInt *Solution2 =
-    ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+  // Claim: The correct solution is not some value between Min and Max.
+  //
+  // Justification: Assuming that Min and Max are different values, one of
+  // them is when the first signed overflow happens, the other is when the
+  // first unsigned overflow happens. Crossing the range boundary is only
+  // possible via an overflow (treating 0 as a special case of it, modeling
+  // an overflow as crossing k*2^W for some k).
+  //
+  // The interesting case here is when Min was eliminated as an invalid
+  // solution, but Max was not. The argument is that if there was another
+  // overflow between Min and Max, it would also have been eliminated if
+  // it was considered.
+  //
+  // For a given boundary, it is possible to have two overflows of the same
+  // type (signed/unsigned) without having the other type in between: this
+  // can happen when the vertex of the parabola is between the iterations
+  // corresponding to the overflows. This is only possible when the two
+  // overflows cross k*2^W for the same k. In such case, if the second one
+  // left the range (and was the first one to do so), the first overflow
+  // would have to enter the range, which would mean that either we had left
+  // the range before or that we started outside of it. Both of these cases
+  // are contradictions.
+  //
+  // Claim: In the case where SolveForBoundary returns None, the correct
+  // solution is not some value between the Max for this boundary and the
+  // Min of the other boundary.
+  //
+  // Justification: Assume that we had such Max_A and Min_B corresponding
+  // to range boundaries A and B and such that Max_A < Min_B. If there was
+  // a solution between Max_A and Min_B, it would have to be caused by an
+  // overflow corresponding to either A or B. It cannot correspond to B,
+  // since Min_B is the first occurrence of such an overflow. If it
+  // corresponded to A, it would have to be either a signed or an unsigned
+  // overflow that is larger than both eliminated overflows for A. But
+  // between the eliminated overflows and this overflow, the values would
+  // cover the entire value space, thus crossing the other boundary, which
+  // is a contradiction.
 
-  return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
-                        cast<SCEVConstant>(SE.getConstant(Solution2)));
+  return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
 }
 
 ScalarEvolution::ExitLimit
@@ -8441,23 +8650,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
   // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
   // the quadratic equation to solve it.
   if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
-    if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
-      const SCEVConstant *R1 = Roots->first;
-      const SCEVConstant *R2 = Roots->second;
-      // Pick the smallest positive root value.
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
-              CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
-        if (!CB->getZExtValue())
-          std::swap(R1, R2); // R1 is the minimum root now.
-
-        // We can only use this value if the chrec ends up with an exact zero
-        // value at this index.  When solving for "X*X != 5", for example, we
-        // should not accept a root of 2.
-        const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
-        if (Val->isZero())
-          // We found a quadratic root!
-          return ExitLimit(R1, R1, false, Predicates);
-      }
+    // We can only use this value if the chrec ends up with an exact zero
+    // value at this index.  When solving for "X*X != 5", for example, we
+    // should not accept a root of 2.
+    if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
+      const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
+      return ExitLimit(R, R, false, Predicates);
     }
     return getCouldNotCompute();
   }
@@ -8617,7 +8815,13 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
                                            const SCEV *&LHS, const SCEV *&RHS,
                                            unsigned Depth) {
   bool Changed = false;
-
+  // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
+  // '0 != 0'.
+  auto TrivialCase = [&](bool TriviallyTrue) {
+    LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+    Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+    return true;
+  };
   // If we hit the max recursion limit bail out.
   if (Depth >= 3)
     return false;
@@ -8629,9 +8833,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
       if (ConstantExpr::getICmp(Pred,
                                 LHSC->getValue(),
                                 RHSC->getValue())->isNullValue())
-        goto trivially_false;
+        return TrivialCase(false);
       else
-        goto trivially_true;
+        return TrivialCase(true);
     }
     // Otherwise swap the operands to put the constant on the right.
     std::swap(LHS, RHS);
@@ -8661,9 +8865,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     if (!ICmpInst::isEquality(Pred)) {
       ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
       if (ExactCR.isFullSet())
-        goto trivially_true;
+        return TrivialCase(true);
       else if (ExactCR.isEmptySet())
-        goto trivially_false;
+        return TrivialCase(false);
 
       APInt NewRHS;
       CmpInst::Predicate NewPred;
@@ -8699,7 +8903,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
         // The "Should have been caught earlier!" messages refer to the fact
         // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
         // should have fired on the corresponding cases, and canonicalized the
-        // check to trivially_true or trivially_false.
+        // check to trivial case.
 
       case ICmpInst::ICMP_UGE:
         assert(!RA.isMinValue() && "Should have been caught earlier!");
@@ -8732,9 +8936,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // Check for obvious equality.
   if (HasSameValue(LHS, RHS)) {
     if (ICmpInst::isTrueWhenEqual(Pred))
-      goto trivially_true;
+      return TrivialCase(true);
     if (ICmpInst::isFalseWhenEqual(Pred))
-      goto trivially_false;
+      return TrivialCase(false);
   }
 
   // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
@@ -8802,18 +9006,6 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
 
   return Changed;
-
-trivially_true:
-  // Return 0 == 0.
-  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
-  Pred = ICmpInst::ICMP_EQ;
-  return true;
-
-trivially_false:
-  // Return 0 != 0.
-  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
-  Pred = ICmpInst::ICMP_NE;
-  return true;
 }
 
 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
@@ -9184,6 +9376,11 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
   // (interprocedural conditions notwithstanding).
   if (!L) return true;
 
+  if (VerifyIR)
+    assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+           "This cannot be done on broken IR!");
+
+
   if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
     return true;
 
@@ -9289,6 +9486,10 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
   // (interprocedural conditions notwithstanding).
   if (!L) return false;
 
+  if (VerifyIR)
+    assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+           "This cannot be done on broken IR!");
+
   // Both LHS and RHS must be available at loop entry.
   assert(isAvailableAtLoopEntry(LHS, L) &&
          "LHS is not available at Loop Entry");
@@ -10565,52 +10766,11 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
            ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
-  } else if (isQuadratic()) {
-    // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
-    // quadratic equation to solve it.  To do this, we must frame our problem in
-    // terms of figuring out when zero is crossed, instead of when
-    // Range.getUpper() is crossed.
-    SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
-    NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
-    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
-
-    // Next, solve the constructed addrec
-    if (auto Roots =
-            SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE)) {
-      const SCEVConstant *R1 = Roots->first;
-      const SCEVConstant *R2 = Roots->second;
-      // Pick the smallest positive root value.
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
-              ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
-        if (!CB->getZExtValue())
-          std::swap(R1, R2); // R1 is the minimum root now.
-
-        // Make sure the root is not off by one.  The returned iteration should
-        // not be in the range, but the previous one should be.  When solving
-        // for "X*X < 5", for example, we should not return a root of 2.
-        ConstantInt *R1Val =
-            EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
-        if (Range.contains(R1Val->getValue())) {
-          // The next iteration must be out of the range...
-          ConstantInt *NextVal =
-              ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
-
-          R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
-          if (!Range.contains(R1Val->getValue()))
-            return SE.getConstant(NextVal);
-          return SE.getCouldNotCompute(); // Something strange happened
-        }
+  }
 
-        // If R1 was not in the range, then it is a good return value.  Make
-        // sure that R1-1 WAS in the range though, just in case.
-        ConstantInt *NextVal =
-            ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
-        R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
-        if (Range.contains(R1Val->getValue()))
-          return R1;
-        return SE.getCouldNotCompute(); // Something strange happened
-      }
-    }
+  if (isQuadratic()) {
+    if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
+      return SE.getConstant(S.getValue());
   }
 
   return SE.getCouldNotCompute();
@@ -10920,7 +11080,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
   Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
 
   // Put larger terms first.
-  llvm::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+  llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
     return numberOfTerms(LHS) > numberOfTerms(RHS);
   });
 
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 7bea994121c8..289d4f8ae49a 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -27,7 +27,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are. This allows the code below to ignore this special
   // case.
-  if (LocA.Size == 0 || LocB.Size == 0)
+  if (LocA.Size.isZero() || LocB.Size.isZero())
     return NoAlias;
 
   // This is SCEVAAResult. Get the SCEVs!
@@ -43,8 +43,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
   if (SE.getEffectiveSCEVType(AS->getType()) ==
       SE.getEffectiveSCEVType(BS->getType())) {
     unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
-    APInt ASizeInt(BitWidth, LocA.Size);
-    APInt BSizeInt(BitWidth, LocB.Size);
+    APInt ASizeInt(BitWidth, LocA.Size.hasValue()
+                                 ? LocA.Size.getValue()
+                                 : MemoryLocation::UnknownSize);
+    APInt BSizeInt(BitWidth, LocB.Size.hasValue()
+                                 ? LocB.Size.getValue()
+                                 : MemoryLocation::UnknownSize);
 
     // Compute the difference between the two pointers.
     const SCEV *BA = SE.getMinusSCEV(BS, AS);
@@ -78,10 +82,10 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
   Value *BO = GetBaseValue(BS);
   if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
     if (alias(MemoryLocation(AO ? AO : LocA.Ptr,
-                             AO ? +MemoryLocation::UnknownSize : LocA.Size,
+                             AO ? LocationSize::unknown() : LocA.Size,
                              AO ? AAMDNodes() : LocA.AATags),
               MemoryLocation(BO ? BO : LocB.Ptr,
-                             BO ? +MemoryLocation::UnknownSize : LocB.Size,
+                             BO ? LocationSize::unknown() : LocB.Size,
                              BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
       return NoAlias;
 
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 8f89389c4b5d..ca5cf1663b83 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1867,7 +1867,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
     Phis.push_back(&PN);
 
   if (TTI)
-    llvm::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+    llvm::sort(Phis, [](Value *LHS, Value *RHS) {
       // Put pointers at the back and make sure pointer < pointer = false.
       if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
         return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index f12275aff387..9a581fe46afc 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -95,39 +95,36 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
   return AAResultBase::alias(LocA, LocB);
 }
 
-ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
                                                 const MemoryLocation &Loc) {
   if (!EnableScopedNoAlias)
-    return AAResultBase::getModRefInfo(CS, Loc);
+    return AAResultBase::getModRefInfo(Call, Loc);
 
-  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
-                                              LLVMContext::MD_noalias)))
+  if (!mayAliasInScopes(Loc.AATags.Scope,
+                        Call->getMetadata(LLVMContext::MD_noalias)))
     return ModRefInfo::NoModRef;
 
-  if (!mayAliasInScopes(
-          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
-          Loc.AATags.NoAlias))
+  if (!mayAliasInScopes(Call->getMetadata(LLVMContext::MD_alias_scope),
+                        Loc.AATags.NoAlias))
     return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(CS, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc);
 }
 
-ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
-                                                ImmutableCallSite CS2) {
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
+                                                const CallBase *Call2) {
   if (!EnableScopedNoAlias)
-    return AAResultBase::getModRefInfo(CS1, CS2);
+    return AAResultBase::getModRefInfo(Call1, Call2);
 
-  if (!mayAliasInScopes(
-          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
-          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+  if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
+                        Call2->getMetadata(LLVMContext::MD_noalias)))
     return ModRefInfo::NoModRef;
 
-  if (!mayAliasInScopes(
-          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
-          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+  if (!mayAliasInScopes(Call2->getMetadata(LLVMContext::MD_alias_scope),
+                        Call1->getMetadata(LLVMContext::MD_noalias)))
     return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(CS1, CS2);
+  return AAResultBase::getModRefInfo(Call1, Call2);
 }
 
 static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp
new file mode 100644
index 000000000000..66b03845864f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -0,0 +1,673 @@
+//===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-safety"
+
+static cl::opt<int> StackSafetyMaxIterations("stack-safety-max-iterations",
+                                             cl::init(20), cl::Hidden);
+
+namespace {
+
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+  const Value *AllocaPtr;
+
+public:
+  AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+      : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
+
+  const SCEV *visit(const SCEV *Expr) {
+    // Only re-write the expression if the alloca is used in an addition
+    // expression (it can be used in other types of expressions if it's cast to
+    // an int and passed as an argument.)
+    if (!isa<SCEVAddRecExpr>(Expr) && !isa<SCEVAddExpr>(Expr) &&
+        !isa<SCEVUnknown>(Expr))
+      return Expr;
+    return SCEVRewriteVisitor<AllocaOffsetRewriter>::visit(Expr);
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    // FIXME: look through one or several levels of definitions?
+    // This can be inttoptr(AllocaPtr) and SCEV would not unwrap
+    // it for us.
+    if (Expr->getValue() == AllocaPtr)
+      return SE.getZero(Expr->getType());
+    return Expr;
+  }
+};
+
+/// Describes use of address in as a function call argument.
+struct PassAsArgInfo {
+  /// Function being called.
+  const GlobalValue *Callee = nullptr;
+  /// Index of argument which pass address.
+  size_t ParamNo = 0;
+  // Offset range of address from base address (alloca or calling function
+  // argument).
+  // Range should never set to empty-set, that is an invalid access range
+  // that can cause empty-set to be propagated with ConstantRange::add
+  ConstantRange Offset;
+  PassAsArgInfo(const GlobalValue *Callee, size_t ParamNo, ConstantRange Offset)
+      : Callee(Callee), ParamNo(ParamNo), Offset(Offset) {}
+
+  StringRef getName() const { return Callee->getName(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const PassAsArgInfo &P) {
+  return OS << "@" << P.getName() << "(arg" << P.ParamNo << ", " << P.Offset
+            << ")";
+}
+
+/// Describe uses of address (alloca or parameter) inside of the function.
+struct UseInfo {
+  // Access range if the address (alloca or parameters).
+  // It is allowed to be empty-set when there are no known accesses.
+  ConstantRange Range;
+
+  // List of calls which pass address as an argument.
+  SmallVector<PassAsArgInfo, 4> Calls;
+
+  explicit UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
+
+  void updateRange(ConstantRange R) { Range = Range.unionWith(R); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) {
+  OS << U.Range;
+  for (auto &Call : U.Calls)
+    OS << ", " << Call;
+  return OS;
+}
+
+struct AllocaInfo {
+  const AllocaInst *AI = nullptr;
+  uint64_t Size = 0;
+  UseInfo Use;
+
+  AllocaInfo(unsigned PointerSize, const AllocaInst *AI, uint64_t Size)
+      : AI(AI), Size(Size), Use(PointerSize) {}
+
+  StringRef getName() const { return AI->getName(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AllocaInfo &A) {
+  return OS << A.getName() << "[" << A.Size << "]: " << A.Use;
+}
+
+struct ParamInfo {
+  const Argument *Arg = nullptr;
+  UseInfo Use;
+
+  explicit ParamInfo(unsigned PointerSize, const Argument *Arg)
+      : Arg(Arg), Use(PointerSize) {}
+
+  StringRef getName() const { return Arg ? Arg->getName() : "<N/A>"; }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ParamInfo &P) {
+  return OS << P.getName() << "[]: " << P.Use;
+}
+
+/// Calculate the allocation size of a given alloca. Returns 0 if the
+/// size can not be statically determined.
+uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) {
+  const DataLayout &DL = AI->getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
+  if (AI->isArrayAllocation()) {
+    auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+    if (!C)
+      return 0;
+    Size *= C->getZExtValue();
+  }
+  return Size;
+}
+
+} // end anonymous namespace
+
+/// Describes uses of allocas and parameters inside of a single function.
+struct StackSafetyInfo::FunctionInfo {
+  // May be a Function or a GlobalAlias
+  const GlobalValue *GV = nullptr;
+  // Informations about allocas uses.
+  SmallVector<AllocaInfo, 4> Allocas;
+  // Informations about parameters uses.
+  SmallVector<ParamInfo, 4> Params;
+  // TODO: describe return value as depending on one or more of its arguments.
+
+  // StackSafetyDataFlowAnalysis counter stored here for faster access.
+  int UpdateCount = 0;
+
+  FunctionInfo(const StackSafetyInfo &SSI) : FunctionInfo(*SSI.Info) {}
+
+  explicit FunctionInfo(const Function *F) : GV(F){};
+  // Creates FunctionInfo that forwards all the parameters to the aliasee.
+  explicit FunctionInfo(const GlobalAlias *A);
+
+  FunctionInfo(FunctionInfo &&) = default;
+
+  bool IsDSOLocal() const { return GV->isDSOLocal(); };
+
+  bool IsInterposable() const { return GV->isInterposable(); };
+
+  StringRef getName() const { return GV->getName(); }
+
+  void print(raw_ostream &O) const {
+    // TODO: Consider different printout format after
+    // StackSafetyDataFlowAnalysis. Calls and parameters are irrelevant then.
+    O << "  @" << getName() << (IsDSOLocal() ? "" : " dso_preemptable")
+      << (IsInterposable() ? " interposable" : "") << "\n";
+    O << "    args uses:\n";
+    for (auto &P : Params)
+      O << "      " << P << "\n";
+    O << "    allocas uses:\n";
+    for (auto &AS : Allocas)
+      O << "      " << AS << "\n";
+  }
+
+private:
+  FunctionInfo(const FunctionInfo &) = default;
+};
+
+StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) {
+  unsigned PointerSize = A->getParent()->getDataLayout().getPointerSizeInBits();
+  const GlobalObject *Aliasee = A->getBaseObject();
+  const FunctionType *Type = cast<FunctionType>(Aliasee->getValueType());
+  // 'Forward' all parameters to this alias to the aliasee
+  for (unsigned ArgNo = 0; ArgNo < Type->getNumParams(); ArgNo++) {
+    Params.emplace_back(PointerSize, nullptr);
+    UseInfo &US = Params.back().Use;
+    US.Calls.emplace_back(Aliasee, ArgNo, ConstantRange(APInt(PointerSize, 0)));
+  }
+}
+
+namespace {
+
+class StackSafetyLocalAnalysis {
+  const Function &F;
+  const DataLayout &DL;
+  ScalarEvolution &SE;
+  unsigned PointerSize = 0;
+
+  const ConstantRange UnknownRange;
+
+  ConstantRange offsetFromAlloca(Value *Addr, const Value *AllocaPtr);
+  ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr,
+                               uint64_t AccessSize);
+  ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U,
+                                           const Value *AllocaPtr);
+
+  bool analyzeAllUses(const Value *Ptr, UseInfo &AS);
+
+  ConstantRange getRange(uint64_t Lower, uint64_t Upper) const {
+    return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper));
+  }
+
+public:
+  StackSafetyLocalAnalysis(const Function &F, ScalarEvolution &SE)
+      : F(F), DL(F.getParent()->getDataLayout()), SE(SE),
+        PointerSize(DL.getPointerSizeInBits()),
+        UnknownRange(PointerSize, true) {}
+
+  // Run the transformation on the associated function.
+  StackSafetyInfo run();
+};
+
+ConstantRange
+StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr,
+                                           const Value *AllocaPtr) {
+  if (!SE.isSCEVable(Addr->getType()))
+    return UnknownRange;
+
+  AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+  const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+  ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize);
+  assert(!Offset.isEmptySet());
+  return Offset;
+}
+
+ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr,
+                                                       const Value *AllocaPtr,
+                                                       uint64_t AccessSize) {
+  if (!SE.isSCEVable(Addr->getType()))
+    return UnknownRange;
+
+  AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+  const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+
+  ConstantRange AccessStartRange =
+      SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize);
+  ConstantRange SizeRange = getRange(0, AccessSize);
+  ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+  assert(!AccessRange.isEmptySet());
+  return AccessRange;
+}
+
+ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
+    const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr) {
+  if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+    if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+      return getRange(0, 1);
+  } else {
+    if (MI->getRawDest() != U)
+      return getRange(0, 1);
+  }
+  const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+  // Non-constant size => unsafe. FIXME: try SCEV getRange.
+  if (!Len)
+    return UnknownRange;
+  ConstantRange AccessRange = getAccessRange(U, AllocaPtr, Len->getZExtValue());
+  return AccessRange;
+}
+
+/// The function analyzes all local uses of Ptr (alloca or argument) and
+/// calculates local access range and all function calls where it was used.
+bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) {
+  SmallPtrSet<const Value *, 16> Visited;
+  SmallVector<const Value *, 8> WorkList;
+  WorkList.push_back(Ptr);
+
+  // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+  while (!WorkList.empty()) {
+    const Value *V = WorkList.pop_back_val();
+    for (const Use &UI : V->uses()) {
+      auto I = cast<const Instruction>(UI.getUser());
+      assert(V == UI.get());
+
+      switch (I->getOpcode()) {
+      case Instruction::Load: {
+        US.updateRange(
+            getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+        break;
+      }
+
+      case Instruction::VAArg:
+        // "va-arg" from a pointer is safe.
+        break;
+      case Instruction::Store: {
+        if (V == I->getOperand(0)) {
+          // Stored the pointer - conservatively assume it may be unsafe.
+          US.updateRange(UnknownRange);
+          return false;
+        }
+        US.updateRange(getAccessRange(
+            UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+        break;
+      }
+
+      case Instruction::Ret:
+        // Information leak.
+        // FIXME: Process parameters correctly. This is a leak only if we return
+        // alloca.
+        US.updateRange(UnknownRange);
+        return false;
+
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        ImmutableCallSite CS(I);
+
+        if (I->isLifetimeStartOrEnd())
+          break;
+
+        if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+          US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr));
+          break;
+        }
+
+        // FIXME: consult devirt?
+        // Do not follow aliases, otherwise we could inadvertently follow
+        // dso_preemptable aliases or aliases with interposable linkage.
+        const GlobalValue *Callee = dyn_cast<GlobalValue>(
+            CS.getCalledValue()->stripPointerCastsNoFollowAliases());
+        if (!Callee) {
+          US.updateRange(UnknownRange);
+          return false;
+        }
+
+        assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
+
+        ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+        for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) {
+          if (A->get() == V) {
+            ConstantRange Offset = offsetFromAlloca(UI, Ptr);
+            US.Calls.emplace_back(Callee, A - B, Offset);
+          }
+        }
+
+        break;
+      }
+
+      default:
+        if (Visited.insert(I).second)
+          WorkList.push_back(cast<const Instruction>(I));
+      }
+    }
+  }
+
+  return true;
+}
+
+StackSafetyInfo StackSafetyLocalAnalysis::run() {
+  StackSafetyInfo::FunctionInfo Info(&F);
+  assert(!F.isDeclaration() &&
+         "Can't run StackSafety on a function declaration");
+
+  LLVM_DEBUG(dbgs() << "[StackSafety] " << F.getName() << "\n");
+
+  for (auto &I : instructions(F)) {
+    if (auto AI = dyn_cast<AllocaInst>(&I)) {
+      Info.Allocas.emplace_back(PointerSize, AI,
+                                getStaticAllocaAllocationSize(AI));
+      AllocaInfo &AS = Info.Allocas.back();
+      analyzeAllUses(AI, AS.Use);
+    }
+  }
+
+  for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) {
+    Info.Params.emplace_back(PointerSize, &A);
+    ParamInfo &PS = Info.Params.back();
+    analyzeAllUses(&A, PS.Use);
+  }
+
+  LLVM_DEBUG(dbgs() << "[StackSafety] done\n");
+  LLVM_DEBUG(Info.print(dbgs()));
+  return StackSafetyInfo(std::move(Info));
+}
+
+class StackSafetyDataFlowAnalysis {
+  using FunctionMap =
+      std::map<const GlobalValue *, StackSafetyInfo::FunctionInfo>;
+
+  FunctionMap Functions;
+  // Callee-to-Caller multimap.
+  DenseMap<const GlobalValue *, SmallVector<const GlobalValue *, 4>> Callers;
+  SetVector<const GlobalValue *> WorkList;
+
+  unsigned PointerSize = 0;
+  const ConstantRange UnknownRange;
+
+  ConstantRange getArgumentAccessRange(const GlobalValue *Callee,
+                                       unsigned ParamNo) const;
+  bool updateOneUse(UseInfo &US, bool UpdateToFullSet);
+  void updateOneNode(const GlobalValue *Callee,
+                     StackSafetyInfo::FunctionInfo &FS);
+  void updateOneNode(const GlobalValue *Callee) {
+    updateOneNode(Callee, Functions.find(Callee)->second);
+  }
+  void updateAllNodes() {
+    for (auto &F : Functions)
+      updateOneNode(F.first, F.second);
+  }
+  void runDataFlow();
+  void verifyFixedPoint();
+
+public:
+  StackSafetyDataFlowAnalysis(
+      Module &M, std::function<const StackSafetyInfo &(Function &)> FI);
+  StackSafetyGlobalInfo run();
+};
+
+StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis(
+    Module &M, std::function<const StackSafetyInfo &(Function &)> FI)
+    : PointerSize(M.getDataLayout().getPointerSizeInBits()),
+      UnknownRange(PointerSize, true) {
+  // Without ThinLTO, run the local analysis for every function in the TU and
+  // then run the DFA.
+  for (auto &F : M.functions())
+    if (!F.isDeclaration())
+      Functions.emplace(&F, FI(F));
+  for (auto &A : M.aliases())
+    if (isa<Function>(A.getBaseObject()))
+      Functions.emplace(&A, StackSafetyInfo::FunctionInfo(&A));
+}
+
+ConstantRange
+StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee,
+                                                    unsigned ParamNo) const {
+  auto IT = Functions.find(Callee);
+  // Unknown callee (outside of LTO domain or an indirect call).
+  if (IT == Functions.end())
+    return UnknownRange;
+  const StackSafetyInfo::FunctionInfo &FS = IT->second;
+  // The definition of this symbol may not be the definition in this linkage
+  // unit.
+  if (!FS.IsDSOLocal() || FS.IsInterposable())
+    return UnknownRange;
+  if (ParamNo >= FS.Params.size()) // possibly vararg
+    return UnknownRange;
+  return FS.Params[ParamNo].Use.Range;
+}
+
+bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US,
+                                               bool UpdateToFullSet) {
+  bool Changed = false;
+  for (auto &CS : US.Calls) {
+    assert(!CS.Offset.isEmptySet() &&
+           "Param range can't be empty-set, invalid offset range");
+
+    ConstantRange CalleeRange = getArgumentAccessRange(CS.Callee, CS.ParamNo);
+    CalleeRange = CalleeRange.add(CS.Offset);
+    if (!US.Range.contains(CalleeRange)) {
+      Changed = true;
+      if (UpdateToFullSet)
+        US.Range = UnknownRange;
+      else
+        US.Range = US.Range.unionWith(CalleeRange);
+    }
+  }
+  return Changed;
+}
+
+void StackSafetyDataFlowAnalysis::updateOneNode(
+    const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS) {
+  bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations;
+  bool Changed = false;
+  for (auto &AS : FS.Allocas)
+    Changed |= updateOneUse(AS.Use, UpdateToFullSet);
+  for (auto &PS : FS.Params)
+    Changed |= updateOneUse(PS.Use, UpdateToFullSet);
+
+  if (Changed) {
+    LLVM_DEBUG(dbgs() << "=== update [" << FS.UpdateCount
+                      << (UpdateToFullSet ? ", full-set" : "") << "] "
+                      << FS.getName() << "\n");
+    // Callers of this function may need updating.
+    for (auto &CallerID : Callers[Callee])
+      WorkList.insert(CallerID);
+
+    ++FS.UpdateCount;
+  }
+}
+
+void StackSafetyDataFlowAnalysis::runDataFlow() {
+  Callers.clear();
+  WorkList.clear();
+
+  SmallVector<const GlobalValue *, 16> Callees;
+  for (auto &F : Functions) {
+    Callees.clear();
+    StackSafetyInfo::FunctionInfo &FS = F.second;
+    for (auto &AS : FS.Allocas)
+      for (auto &CS : AS.Use.Calls)
+        Callees.push_back(CS.Callee);
+    for (auto &PS : FS.Params)
+      for (auto &CS : PS.Use.Calls)
+        Callees.push_back(CS.Callee);
+
+    llvm::sort(Callees);
+    Callees.erase(std::unique(Callees.begin(), Callees.end()), Callees.end());
+
+    for (auto &Callee : Callees)
+      Callers[Callee].push_back(F.first);
+  }
+
+  updateAllNodes();
+
+  while (!WorkList.empty()) {
+    const GlobalValue *Callee = WorkList.back();
+    WorkList.pop_back();
+    updateOneNode(Callee);
+  }
+}
+
+void StackSafetyDataFlowAnalysis::verifyFixedPoint() {
+  WorkList.clear();
+  updateAllNodes();
+  assert(WorkList.empty());
+}
+
+StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() {
+  runDataFlow();
+  LLVM_DEBUG(verifyFixedPoint());
+
+  StackSafetyGlobalInfo SSI;
+  for (auto &F : Functions)
+    SSI.emplace(F.first, std::move(F.second));
+  return SSI;
+}
+
+void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, const Module &M) {
+  size_t Count = 0;
+  for (auto &F : M.functions())
+    if (!F.isDeclaration()) {
+      SSI.find(&F)->second.print(O);
+      O << "\n";
+      ++Count;
+    }
+  for (auto &A : M.aliases()) {
+    SSI.find(&A)->second.print(O);
+    O << "\n";
+    ++Count;
+  }
+  assert(Count == SSI.size() && "Unexpected functions in the result");
+}
+
+} // end anonymous namespace
+
+StackSafetyInfo::StackSafetyInfo() = default;
+StackSafetyInfo::StackSafetyInfo(StackSafetyInfo &&) = default;
+StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default;
+
+StackSafetyInfo::StackSafetyInfo(FunctionInfo &&Info)
+    : Info(new FunctionInfo(std::move(Info))) {}
+
+StackSafetyInfo::~StackSafetyInfo() = default;
+
+void StackSafetyInfo::print(raw_ostream &O) const { Info->print(O); }
+
+AnalysisKey StackSafetyAnalysis::Key;
+
+StackSafetyInfo StackSafetyAnalysis::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  StackSafetyLocalAnalysis SSLA(F, AM.getResult<ScalarEvolutionAnalysis>(F));
+  return SSLA.run();
+}
+
+PreservedAnalyses StackSafetyPrinterPass::run(Function &F,
+                                              FunctionAnalysisManager &AM) {
+  OS << "'Stack Safety Local Analysis' for function '" << F.getName() << "'\n";
+  AM.getResult<StackSafetyAnalysis>(F).print(OS);
+  return PreservedAnalyses::all();
+}
+
+char StackSafetyInfoWrapperPass::ID = 0;
+
+StackSafetyInfoWrapperPass::StackSafetyInfoWrapperPass() : FunctionPass(ID) {
+  initializeStackSafetyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+  AU.setPreservesAll();
+}
+
+void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const {
+  SSI.print(O);
+}
+
+bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) {
+  StackSafetyLocalAnalysis SSLA(
+      F, getAnalysis<ScalarEvolutionWrapperPass>().getSE());
+  SSI = StackSafetyInfo(SSLA.run());
+  return false;
+}
+
+AnalysisKey StackSafetyGlobalAnalysis::Key;
+
+StackSafetyGlobalInfo
+StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  StackSafetyDataFlowAnalysis SSDFA(
+      M, [&FAM](Function &F) -> const StackSafetyInfo & {
+        return FAM.getResult<StackSafetyAnalysis>(F);
+      });
+  return SSDFA.run();
+}
+
+PreservedAnalyses StackSafetyGlobalPrinterPass::run(Module &M,
+                                                    ModuleAnalysisManager &AM) {
+  OS << "'Stack Safety Analysis' for module '" << M.getName() << "'\n";
+  print(AM.getResult<StackSafetyGlobalAnalysis>(M), OS, M);
+  return PreservedAnalyses::all();
+}
+
+char StackSafetyGlobalInfoWrapperPass::ID = 0;
+
+StackSafetyGlobalInfoWrapperPass::StackSafetyGlobalInfoWrapperPass()
+    : ModulePass(ID) {
+  initializeStackSafetyGlobalInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+void StackSafetyGlobalInfoWrapperPass::print(raw_ostream &O,
+                                             const Module *M) const {
+  ::print(SSI, O, *M);
+}
+
+void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage(
+    AnalysisUsage &AU) const {
+  AU.addRequired<StackSafetyInfoWrapperPass>();
+}
+
+bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) {
+  StackSafetyDataFlowAnalysis SSDFA(
+      M, [this](Function &F) -> const StackSafetyInfo & {
+        return getAnalysis<StackSafetyInfoWrapperPass>(F).getResult();
+      });
+  SSI = SSDFA.run();
+  return false;
+}
+
+static const char LocalPassArg[] = "stack-safety-local";
+static const char LocalPassName[] = "Stack Safety Local Analysis";
+INITIALIZE_PASS_BEGIN(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName,
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName,
+                    false, true)
+
+static const char GlobalPassName[] = "Stack Safety Analysis";
+INITIALIZE_PASS_BEGIN(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE,
+                      GlobalPassName, false, false)
+INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass)
+INITIALIZE_PASS_END(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE,
+                    GlobalPassName, false, false)
diff --git a/contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
new file mode 100644
index 000000000000..e1a7e4476d12
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -0,0 +1,380 @@
+//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
+//--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an algorithm that returns for a divergent branch
+// the set of basic blocks whose phi nodes become divergent due to divergent
+// control. These are the blocks that are reachable by two disjoint paths from
+// the branch or loop exits that have a reaching path that is disjoint from a
+// path to the loop latch.
+//
+// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
+// control-induced divergence in phi nodes.
+//
+// -- Summary --
+// The SyncDependenceAnalysis lazily computes sync dependences [3].
+// The analysis evaluates the disjoint path criterion [2] by a reduction
+// to SSA construction. The SSA construction algorithm is implemented as
+// a simple data-flow analysis [1].
+//
+// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
+// [2] "Efficiently Computing Static Single Assignment Form
+//     and the Control Dependence Graph", TOPLAS '91,
+//           Cytron, Ferrante, Rosen, Wegman and Zadeck
+// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
+// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
+//
+// -- Sync dependence --
+// Sync dependence [4] characterizes the control flow aspect of the
+// propagation of branch divergence. For example,
+//
+//   %cond = icmp slt i32 %tid, 10
+//   br i1 %cond, label %then, label %else
+// then:
+//   br label %merge
+// else:
+//   br label %merge
+// merge:
+//   %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// -- Reduction to SSA construction --
+// There are two disjoint paths from A to X, if a certain variant of SSA
+// construction places a phi node in X under the following set-up scheme [2].
+//
+// This variant of SSA construction ignores incoming undef values.
+// That is paths from the entry without a definition do not result in
+// phi nodes.
+//
+//       entry
+//     /      \
+//    A        \
+//  /   \       Y
+// B     C     /
+//  \   /  \  /
+//    D     E
+//     \   /
+//       F
+// Assume that A contains a divergent branch. We are interested
+// in the set of all blocks where each block is reachable from A
+// via two disjoint paths. This would be the set {D, F} in this
+// case.
+// To generally reduce this query to SSA construction we introduce
+// a virtual variable x and assign to x different values in each
+// successor block of A.
+//           entry
+//         /      \
+//        A        \
+//      /   \       Y
+// x = 0   x = 1   /
+//      \  /   \  /
+//        D     E
+//         \   /
+//           F
+// Our flavor of SSA construction for x will construct the following
+//            entry
+//          /      \
+//         A        \
+//       /   \       Y
+// x0 = 0   x1 = 1  /
+//       \   /   \ /
+//      x2=phi    E
+//         \     /
+//          x3=phi
+// The blocks D and F contain phi nodes and are thus each reachable
+// by two disjoins paths from A.
+//
+// -- Remarks --
+// In case of loop exits we need to check the disjoint path criterion for loops
+// [2]. To this end, we check whether the definition of x differs between the
+// loop exit and the loop header (_after_ SSA construction).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/SyncDependenceAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+
+#include <stack>
+#include <unordered_set>
+
+#define DEBUG_TYPE "sync-dependence"
+
+namespace llvm {
+
+ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
+
+SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
+                                               const PostDominatorTree &PDT,
+                                               const LoopInfo &LI)
+    : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
+
+SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
+
+using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
+
+// divergence propagator for reducible CFGs
+struct DivergencePropagator {
+  const FunctionRPOT &FuncRPOT;
+  const DominatorTree &DT;
+  const PostDominatorTree &PDT;
+  const LoopInfo &LI;
+
+  // identified join points
+  std::unique_ptr<ConstBlockSet> JoinBlocks;
+
+  // reached loop exits (by a path disjoint to a path to the loop header)
+  SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
+
+  // if DefMap[B] == C then C is the dominating definition at block B
+  // if DefMap[B] ~ undef then we haven't seen B yet
+  // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
+  // an immediate successor of X (initial value).
+  using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
+  DefiningBlockMap DefMap;
+
+  // all blocks with pending visits
+  std::unordered_set<const BasicBlock *> PendingUpdates;
+
+  DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
+                       const PostDominatorTree &PDT, const LoopInfo &LI)
+      : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
+        JoinBlocks(new ConstBlockSet) {}
+
+  // set the definition at @block and mark @block as pending for a visit
+  void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
+    bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
+    if (WasAdded)
+      PendingUpdates.insert(&Block);
+  }
+
+  void printDefs(raw_ostream &Out) {
+    Out << "Propagator::DefMap {\n";
+    for (const auto *Block : FuncRPOT) {
+      auto It = DefMap.find(Block);
+      Out << Block->getName() << " : ";
+      if (It == DefMap.end()) {
+        Out << "\n";
+      } else {
+        const auto *DefBlock = It->second;
+        Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
+      }
+    }
+    Out << "}\n";
+  }
+
+  // process @succBlock with reaching definition @defBlock
+  // the original divergent branch was in @parentLoop (if any)
+  void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
+                      const BasicBlock &DefBlock) {
+
+    // @succBlock is a loop exit
+    if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
+      DefMap.emplace(&SuccBlock, &DefBlock);
+      ReachedLoopExits.insert(&SuccBlock);
+      return;
+    }
+
+    // first reaching def?
+    auto ItLastDef = DefMap.find(&SuccBlock);
+    if (ItLastDef == DefMap.end()) {
+      addPending(SuccBlock, DefBlock);
+      return;
+    }
+
+    // a join of at least two definitions
+    if (ItLastDef->second != &DefBlock) {
+      // do we know this join already?
+      if (!JoinBlocks->insert(&SuccBlock).second)
+        return;
+
+      // update the definition
+      addPending(SuccBlock, SuccBlock);
+    }
+  }
+
+  // find all blocks reachable by two disjoint paths from @rootTerm.
+  // This method works for both divergent terminators and loops with
+  // divergent exits.
+  // @rootBlock is either the block containing the branch or the header of the
+  // divergent loop.
+  // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
+  // headed by @rootBlock.
+  // @parentLoop is the parent loop of the Loop or the loop that contains the
+  // Terminator.
+  template <typename SuccessorIterable>
+  std::unique_ptr<ConstBlockSet>
+  computeJoinPoints(const BasicBlock &RootBlock,
+                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+    assert(JoinBlocks);
+
+    // immediate post dominator (no join block beyond that block)
+    const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
+    const auto *IpdNode = PdNode->getIDom();
+    const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
+    // bootstrap with branch targets
+    for (const auto *SuccBlock : NodeSuccessors) {
+      DefMap.emplace(SuccBlock, SuccBlock);
+
+      if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
+        // immediate loop exit from node.
+        ReachedLoopExits.insert(SuccBlock);
+        continue;
+      } else {
+        // regular successor
+        PendingUpdates.insert(SuccBlock);
+      }
+    }
+
+    auto ItBeginRPO = FuncRPOT.begin();
+
+    // skip until term (TODO RPOT won't let us start at @term directly)
+    for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
+
+    auto ItEndRPO = FuncRPOT.end();
+    assert(ItBeginRPO != ItEndRPO);
+
+    // propagate definitions at the immediate successors of the node in RPO
+    auto ItBlockRPO = ItBeginRPO;
+    while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+      const auto *Block = *ItBlockRPO;
+
+      // skip @block if not pending update
+      auto ItPending = PendingUpdates.find(Block);
+      if (ItPending == PendingUpdates.end())
+        continue;
+      PendingUpdates.erase(ItPending);
+
+      // propagate definition at @block to its successors
+      auto ItDef = DefMap.find(Block);
+      const auto *DefBlock = ItDef->second;
+      assert(DefBlock);
+
+      auto *BlockLoop = LI.getLoopFor(Block);
+      if (ParentLoop &&
+          (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
+        // if the successor is the header of a nested loop pretend its a
+        // single node with the loop's exits as successors
+        SmallVector<BasicBlock *, 4> BlockLoopExits;
+        BlockLoop->getExitBlocks(BlockLoopExits);
+        for (const auto *BlockLoopExit : BlockLoopExits) {
+          visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
+        }
+
+      } else {
+        // the successors are either on the same loop level or loop exits
+        for (const auto *SuccBlock : successors(Block)) {
+          visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
+        }
+      }
+    }
+
+    // We need to know the definition at the parent loop header to decide
+    // whether the definition at the header is different from the definition at
+    // the loop exits, which would indicate a divergent loop exits.
+    //
+    // A // loop header
+    // |
+    // B // nested loop header
+    // |
+    // C -> X (exit from B loop) -..-> (A latch)
+    // |
+    // D -> back to B (B latch)
+    // |
+    // proper exit from both loops
+    //
+    // D post-dominates B as it is the only proper exit from the "A loop".
+    // If C has a divergent branch, propagation will therefore stop at D.
+    // That implies that B will never receive a definition.
+    // But that definition can only be the same as at D (D itself in thise case)
+    // because all paths to anywhere have to pass through D.
+    //
+    const BasicBlock *ParentLoopHeader =
+        ParentLoop ? ParentLoop->getHeader() : nullptr;
+    if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
+      DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
+    }
+
+    // analyze reached loop exits
+    if (!ReachedLoopExits.empty()) {
+      assert(ParentLoop);
+      const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+      LLVM_DEBUG(printDefs(dbgs()));
+      assert(HeaderDefBlock && "no definition in header of carrying loop");
+
+      for (const auto *ExitBlock : ReachedLoopExits) {
+        auto ItExitDef = DefMap.find(ExitBlock);
+        assert((ItExitDef != DefMap.end()) &&
+               "no reaching def at reachable loop exit");
+        if (ItExitDef->second != HeaderDefBlock) {
+          JoinBlocks->insert(ExitBlock);
+        }
+      }
+    }
+
+    return std::move(JoinBlocks);
+  }
+};
+
+const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
+  using LoopExitVec = SmallVector<BasicBlock *, 4>;
+  LoopExitVec LoopExits;
+  Loop.getExitBlocks(LoopExits);
+  if (LoopExits.size() < 1) {
+    return EmptyBlockSet;
+  }
+
+  // already available in cache?
+  auto ItCached = CachedLoopExitJoins.find(&Loop);
+  if (ItCached != CachedLoopExitJoins.end())
+    return *ItCached->second;
+
+  // compute all join points
+  DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+  auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
+      *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+
+  auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
+  assert(ItInserted.second);
+  return *ItInserted.first->second;
+}
+
+const ConstBlockSet &
+SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
+  // trivial case
+  if (Term.getNumSuccessors() < 1) {
+    return EmptyBlockSet;
+  }
+
+  // already available in cache?
+  auto ItCached = CachedBranchJoins.find(&Term);
+  if (ItCached != CachedBranchJoins.end())
+    return *ItCached->second;
+
+  // compute all join points
+  DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+  const auto &TermBlock = *Term.getParent();
+  auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
+      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+
+  auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
+  assert(ItInserted.second);
+  return *ItInserted.first->second;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
index b085fa274d7f..c2d7bb11a4cf 100644
--- a/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/contrib/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -14,22 +14,21 @@
 #include "llvm/Analysis/SyntheticCountsUtils.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 
 using namespace llvm;
 
 // Given an SCC, propagate entry counts along the edge of the SCC nodes.
 template <typename CallGraphType>
 void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
-    const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
-    AddCountTy AddCount) {
+    const SccTy &SCC, GetProfCountTy GetProfCount, AddCountTy AddCount) {
 
-  SmallPtrSet<NodeRef, 8> SCCNodes;
+  DenseSet<NodeRef> SCCNodes;
   SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
 
   for (auto &Node : SCC)
@@ -54,17 +53,13 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
   // This ensures that the order of
   // traversal of nodes within the SCC doesn't affect the final result.
 
-  DenseMap<NodeRef, uint64_t> AdditionalCounts;
+  DenseMap<NodeRef, Scaled64> AdditionalCounts;
   for (auto &E : SCCEdges) {
-    auto OptRelFreq = GetRelBBFreq(E.second);
-    if (!OptRelFreq)
+    auto OptProfCount = GetProfCount(E.first, E.second);
+    if (!OptProfCount)
       continue;
-    Scaled64 RelFreq = OptRelFreq.getValue();
-    auto Caller = E.first;
     auto Callee = CGT::edge_dest(E.second);
-    RelFreq *= Scaled64(GetCount(Caller), 0);
-    uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
-    AdditionalCounts[Callee] += AdditionalCount;
+    AdditionalCounts[Callee] += OptProfCount.getValue();
   }
 
   // Update the counts for the nodes in the SCC.
@@ -73,14 +68,11 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
 
   // Now update the counts for nodes outside the SCC.
   for (auto &E : NonSCCEdges) {
-    auto OptRelFreq = GetRelBBFreq(E.second);
-    if (!OptRelFreq)
+    auto OptProfCount = GetProfCount(E.first, E.second);
+    if (!OptProfCount)
       continue;
-    Scaled64 RelFreq = OptRelFreq.getValue();
-    auto Caller = E.first;
     auto Callee = CGT::edge_dest(E.second);
-    RelFreq *= Scaled64(GetCount(Caller), 0);
-    AddCount(Callee, RelFreq.toInt<uint64_t>());
+    AddCount(Callee, OptProfCount.getValue());
   }
 }
 
@@ -94,8 +86,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
 
 template <typename CallGraphType>
 void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
-                                                    GetRelBBFreqTy GetRelBBFreq,
-                                                    GetCountTy GetCount,
+                                                    GetProfCountTy GetProfCount,
                                                     AddCountTy AddCount) {
   std::vector<SccTy> SCCs;
 
@@ -107,7 +98,8 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
   // The scc iterator returns the scc in bottom-up order, so reverse the SCCs
   // and call propagateFromSCC.
   for (auto &SCC : reverse(SCCs))
-    propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount);
+    propagateFromSCC(SCC, GetProfCount, AddCount);
 }
 
 template class llvm::SyntheticCountsUtils<const CallGraph *>;
+template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>;
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 102135fbf313..4643f75da42d 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -413,17 +413,17 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_flsll);
   }
 
-  // The following functions are available on Linux,
-  // but Android uses bionic instead of glibc.
-  if (!T.isOSLinux() || T.isAndroid()) {
+  // The following functions are only available on GNU/Linux (using glibc).
+  // Linux variants without glibc (eg: bionic, musl) may have some subset.
+  if (!T.isOSLinux() || !T.isGNUEnvironment()) {
     TLI.setUnavailable(LibFunc_dunder_strdup);
     TLI.setUnavailable(LibFunc_dunder_strtok_r);
     TLI.setUnavailable(LibFunc_dunder_isoc99_scanf);
     TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
     TLI.setUnavailable(LibFunc_under_IO_getc);
     TLI.setUnavailable(LibFunc_under_IO_putc);
-    // But, Android has memalign.
-    if (!T.isAndroid())
+    // But, Android and musl have memalign.
+    if (!T.isAndroid() && !T.isMusl())
       TLI.setUnavailable(LibFunc_memalign);
     TLI.setUnavailable(LibFunc_fopen64);
     TLI.setUnavailable(LibFunc_fseeko64);
@@ -613,6 +613,24 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   unsigned NumParams = FTy.getNumParams();
 
   switch (F) {
+  case LibFunc_execl:
+  case LibFunc_execlp:
+  case LibFunc_execle:
+    return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
+            FTy.getParamType(1)->isPointerTy() &&
+            FTy.getReturnType()->isIntegerTy(32));
+  case LibFunc_execv:
+  case LibFunc_execvp:
+    return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
+            FTy.getParamType(1)->isPointerTy() &&
+            FTy.getReturnType()->isIntegerTy(32));
+  case LibFunc_execvP:
+  case LibFunc_execvpe:
+  case LibFunc_execve:
+    return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+            FTy.getParamType(1)->isPointerTy() &&
+            FTy.getParamType(2)->isPointerTy() &&
+            FTy.getReturnType()->isIntegerTy(32));
   case LibFunc_strlen:
     return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getReturnType()->isIntegerTy());
@@ -863,6 +881,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
+  case LibFunc_fork:
+    return (NumParams == 0 && FTy.getReturnType()->isIntegerTy(32));
   case LibFunc_fdopen:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
@@ -1399,10 +1419,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
 
 void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
   VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
-  llvm::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+  llvm::sort(VectorDescs, compareByScalarFnName);
 
   ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
-  llvm::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+  llvm::sort(ScalarDescs, compareByVectorFnName);
 }
 
 void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7233a86e5daf..9151d46c6cce 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -268,6 +268,10 @@ bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
   return TTIImpl->enableInterleavedAccessVectorization();
 }
 
+bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
+  return TTIImpl->enableMaskedInterleavedAccessVectorization();
+}
+
 bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
   return TTIImpl->isFPVectorizationPotentiallyUnsafe();
 }
@@ -384,6 +388,55 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
   return TTIImpl->getMaxInterleaveFactor(VF);
 }
 
+TargetTransformInfo::OperandValueKind
+TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
+  OperandValueKind OpInfo = OK_AnyValue;
+  OpProps = OP_None;
+
+  if (auto *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getValue().isPowerOf2())
+      OpProps = OP_PowerOf2;
+    return OK_UniformConstantValue;
+  }
+
+  // A broadcast shuffle creates a uniform value.
+  // TODO: Add support for non-zero index broadcasts.
+  // TODO: Add support for different source vector width.
+  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
+    if (ShuffleInst->isZeroEltSplat())
+      OpInfo = OK_UniformValue;
+
+  const Value *Splat = getSplatValue(V);
+
+  // Check for a splat of a constant or for a non uniform vector of constants
+  // and check if the constant(s) are all powers of two.
+  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+    OpInfo = OK_NonUniformConstantValue;
+    if (Splat) {
+      OpInfo = OK_UniformConstantValue;
+      if (auto *CI = dyn_cast<ConstantInt>(Splat))
+        if (CI->getValue().isPowerOf2())
+          OpProps = OP_PowerOf2;
+    } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
+      OpProps = OP_PowerOf2;
+      for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+        if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
+          if (CI->getValue().isPowerOf2())
+            continue;
+        OpProps = OP_None;
+        break;
+      }
+    }
+  }
+
+  // Check for a splat of a uniform value. This is not loop aware, so return
+  // true only for the obviously uniform cases (argument, globalvalue)
+  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+    OpInfo = OK_UniformValue;
+
+  return OpInfo;
+}
+
 int TargetTransformInfo::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
     OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
@@ -472,9 +525,12 @@ int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 
 int TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-    unsigned Alignment, unsigned AddressSpace) const {
+    unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+    bool UseMaskForGaps) const {
   int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                                 Alignment, AddressSpace);
+                                                 Alignment, AddressSpace,
+                                                 UseMaskForCond,
+                                                 UseMaskForGaps);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -569,6 +625,12 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
   return TTIImpl->areInlineCompatible(Caller, Callee);
 }
 
+bool TargetTransformInfo::areFunctionArgsABICompatible(
+    const Function *Caller, const Function *Callee,
+    SmallPtrSetImpl<Argument *> &Args) const {
+  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
+}
+
 bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
                                              Type *Ty) const {
   return TTIImpl->isIndexedLoadLegal(Mode, Ty);
@@ -630,49 +692,6 @@ int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
   return TTIImpl->getInstructionLatency(I);
 }
 
-static TargetTransformInfo::OperandValueKind
-getOperandInfo(Value *V, TargetTransformInfo::OperandValueProperties &OpProps) {
-  TargetTransformInfo::OperandValueKind OpInfo =
-      TargetTransformInfo::OK_AnyValue;
-  OpProps = TargetTransformInfo::OP_None;
-
-  if (auto *CI = dyn_cast<ConstantInt>(V)) {
-    if (CI->getValue().isPowerOf2())
-      OpProps = TargetTransformInfo::OP_PowerOf2;
-    return TargetTransformInfo::OK_UniformConstantValue;
-  }
-
-  const Value *Splat = getSplatValue(V);
-
-  // Check for a splat of a constant or for a non uniform vector of constants
-  // and check if the constant(s) are all powers of two.
-  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
-    OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
-    if (Splat) {
-      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
-      if (auto *CI = dyn_cast<ConstantInt>(Splat))
-        if (CI->getValue().isPowerOf2())
-          OpProps = TargetTransformInfo::OP_PowerOf2;
-    } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
-      OpProps = TargetTransformInfo::OP_PowerOf2;
-      for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
-        if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
-          if (CI->getValue().isPowerOf2())
-            continue;
-        OpProps = TargetTransformInfo::OP_None;
-        break;
-      }
-    }
-  }
-
-  // Check for a splat of a uniform value. This is not loop aware, so return
-  // true only for the obviously uniform cases (argument, globalvalue)
-  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
-    OpInfo = TargetTransformInfo::OK_UniformValue;
-
-  return OpInfo;
-}
-
 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
                                      unsigned Level) {
   // We don't need a shuffle if we just want to have element 0 in position 0 of
@@ -1101,14 +1120,20 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
   }
   case Instruction::ShuffleVector: {
     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
-    // TODO: Identify and add costs for insert/extract subvector, etc.
+    Type *Ty = Shuffle->getType();
+    Type *SrcTy = Shuffle->getOperand(0)->getType();
+
+    // TODO: Identify and add costs for insert subvector, etc.
+    int SubIndex;
+    if (Shuffle->isExtractSubvectorMask(SubIndex))
+      return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
+
     if (Shuffle->changesLength())
       return -1;
 
     if (Shuffle->isIdentity())
       return 0;
 
-    Type *Ty = Shuffle->getType();
     if (Shuffle->isReverse())
       return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
 
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 25a154edf4ac..83974da30a54 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -399,20 +399,20 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
 }
 
 FunctionModRefBehavior
-TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
+TypeBasedAAResult::getModRefBehavior(const CallBase *Call) {
   if (!EnableTBAA)
-    return AAResultBase::getModRefBehavior(CS);
+    return AAResultBase::getModRefBehavior(Call);
 
   FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
 
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
-  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+  if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
     if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
         (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
       Min = FMRB_OnlyReadsMemory;
 
-  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
 }
 
 FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
@@ -420,33 +420,30 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
   return AAResultBase::getModRefBehavior(F);
 }
 
-ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
                                             const MemoryLocation &Loc) {
   if (!EnableTBAA)
-    return AAResultBase::getModRefInfo(CS, Loc);
+    return AAResultBase::getModRefInfo(Call, Loc);
 
   if (const MDNode *L = Loc.AATags.TBAA)
-    if (const MDNode *M =
-            CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
       if (!Aliases(L, M))
         return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(CS, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc);
 }
 
-ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
-                                            ImmutableCallSite CS2) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
+                                            const CallBase *Call2) {
   if (!EnableTBAA)
-    return AAResultBase::getModRefInfo(CS1, CS2);
+    return AAResultBase::getModRefInfo(Call1, Call2);
 
-  if (const MDNode *M1 =
-          CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if (const MDNode *M2 =
-            CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+  if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
+    if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
       if (!Aliases(M1, M2))
         return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(CS1, CS2);
+  return AAResultBase::getModRefInfo(Call1, Call2);
 }
 
 bool MDNode::isTBAAVtableAccess() const {
diff --git a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 6871e4887c9e..bd13a43b8d46 100644
--- a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 
@@ -22,11 +23,21 @@ using namespace llvm;
 // Search for virtual calls that call FPtr and add them to DevirtCalls.
 static void
 findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
-                          bool *HasNonCallUses, Value *FPtr, uint64_t Offset) {
+                          bool *HasNonCallUses, Value *FPtr, uint64_t Offset,
+                          const CallInst *CI, DominatorTree &DT) {
   for (const Use &U : FPtr->uses()) {
-    Value *User = U.getUser();
+    Instruction *User = cast<Instruction>(U.getUser());
+    // Ignore this instruction if it is not dominated by the type intrinsic
+    // being analyzed. Otherwise we may transform a call sharing the same
+    // vtable pointer incorrectly. Specifically, this situation can arise
+    // after indirect call promotion and inlining, where we may have uses
+    // of the vtable pointer guarded by a function pointer check, and a fallback
+    // indirect call.
+    if (!DT.dominates(CI, User))
+      continue;
     if (isa<BitCastInst>(User)) {
-      findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset);
+      findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset, CI,
+                                DT);
     } else if (auto CI = dyn_cast<CallInst>(User)) {
       DevirtCalls.push_back({Offset, CI});
     } else if (auto II = dyn_cast<InvokeInst>(User)) {
@@ -38,23 +49,23 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
 }
 
 // Search for virtual calls that load from VPtr and add them to DevirtCalls.
-static void
-findLoadCallsAtConstantOffset(const Module *M,
-                              SmallVectorImpl<DevirtCallSite> &DevirtCalls,
-                              Value *VPtr, int64_t Offset) {
+static void findLoadCallsAtConstantOffset(
+    const Module *M, SmallVectorImpl<DevirtCallSite> &DevirtCalls, Value *VPtr,
+    int64_t Offset, const CallInst *CI, DominatorTree &DT) {
   for (const Use &U : VPtr->uses()) {
     Value *User = U.getUser();
     if (isa<BitCastInst>(User)) {
-      findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset);
+      findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset, CI, DT);
     } else if (isa<LoadInst>(User)) {
-      findCallsAtConstantOffset(DevirtCalls, nullptr, User, Offset);
+      findCallsAtConstantOffset(DevirtCalls, nullptr, User, Offset, CI, DT);
     } else if (auto GEP = dyn_cast<GetElementPtrInst>(User)) {
       // Take into account the GEP offset.
       if (VPtr == GEP->getPointerOperand() && GEP->hasAllConstantIndices()) {
         SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
         int64_t GEPOffset = M->getDataLayout().getIndexedOffsetInType(
             GEP->getSourceElementType(), Indices);
-        findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset);
+        findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset,
+                                      CI, DT);
       }
     }
   }
@@ -62,7 +73,8 @@ findLoadCallsAtConstantOffset(const Module *M,
 
 void llvm::findDevirtualizableCallsForTypeTest(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
-    SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI) {
+    SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
+    DominatorTree &DT) {
   assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
 
   const Module *M = CI->getParent()->getParent()->getParent();
@@ -79,15 +91,15 @@ void llvm::findDevirtualizableCallsForTypeTest(
   // If we found any, search for virtual calls based on %p and add them to
   // DevirtCalls.
   if (!Assumes.empty())
-    findLoadCallsAtConstantOffset(M, DevirtCalls,
-                                  CI->getArgOperand(0)->stripPointerCasts(), 0);
+    findLoadCallsAtConstantOffset(
+        M, DevirtCalls, CI->getArgOperand(0)->stripPointerCasts(), 0, CI, DT);
 }
 
 void llvm::findDevirtualizableCallsForTypeCheckedLoad(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
     SmallVectorImpl<Instruction *> &LoadedPtrs,
     SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
-    const CallInst *CI) {
+    const CallInst *CI, DominatorTree &DT) {
   assert(CI->getCalledFunction()->getIntrinsicID() ==
          Intrinsic::type_checked_load);
 
@@ -114,5 +126,5 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
 
   for (Value *LoadedPtr : LoadedPtrs)
     findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
-                              Offset->getZExtValue());
+                              Offset->getZExtValue(), CI, DT);
 }
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index edd46c5fe362..0446426c0e66 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -118,14 +119,18 @@ struct Query {
   /// (all of which can call computeKnownBits), and so on.
   std::array<const Value *, MaxDepth> Excluded;
 
+  /// If true, it is safe to use metadata during simplification.
+  InstrInfoQuery IIQ;
+
   unsigned NumExcluded = 0;
 
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
-        const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
-      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {}
+        const DominatorTree *DT, bool UseInstrInfo,
+        OptimizationRemarkEmitter *ORE = nullptr)
+      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
 
   Query(const Query &Q, const Value *NewExcl)
-      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
+      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),
         NumExcluded(Q.NumExcluded) {
     Excluded = Q.Excluded;
     Excluded[NumExcluded++] = NewExcl;
@@ -165,9 +170,9 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT,
-                            OptimizationRemarkEmitter *ORE) {
+                            OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
   ::computeKnownBits(V, Known, Depth,
-                     Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+                     Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
 }
 
 static KnownBits computeKnownBits(const Value *V, unsigned Depth,
@@ -177,15 +182,16 @@ KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
                                  unsigned Depth, AssumptionCache *AC,
                                  const Instruction *CxtI,
                                  const DominatorTree *DT,
-                                 OptimizationRemarkEmitter *ORE) {
-  return ::computeKnownBits(V, Depth,
-                            Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+                                 OptimizationRemarkEmitter *ORE,
+                                 bool UseInstrInfo) {
+  return ::computeKnownBits(
+      V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
 }
 
 bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
-                               const DataLayout &DL,
-                               AssumptionCache *AC, const Instruction *CxtI,
-                               const DominatorTree *DT) {
+                               const DataLayout &DL, AssumptionCache *AC,
+                               const Instruction *CxtI, const DominatorTree *DT,
+                               bool UseInstrInfo) {
   assert(LHS->getType() == RHS->getType() &&
          "LHS and RHS should have the same type");
   assert(LHS->getType()->isIntOrIntVectorTy() &&
@@ -201,8 +207,8 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
   IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
   KnownBits LHSKnown(IT->getBitWidth());
   KnownBits RHSKnown(IT->getBitWidth());
-  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
-  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
+  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
+  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
   return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
 }
 
@@ -222,69 +228,71 @@ static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
                                    const Query &Q);
 
 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
-                                  bool OrZero,
-                                  unsigned Depth, AssumptionCache *AC,
-                                  const Instruction *CxtI,
-                                  const DominatorTree *DT) {
-  return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
-                                  Query(DL, AC, safeCxtI(V, CxtI), DT));
+                                  bool OrZero, unsigned Depth,
+                                  AssumptionCache *AC, const Instruction *CxtI,
+                                  const DominatorTree *DT, bool UseInstrInfo) {
+  return ::isKnownToBeAPowerOfTwo(
+      V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
 }
 
 static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
 
 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
-                          const DominatorTree *DT) {
-  return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+                          const DominatorTree *DT, bool UseInstrInfo) {
+  return ::isKnownNonZero(V, Depth,
+                          Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
 }
 
 bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
-                              unsigned Depth,
-                              AssumptionCache *AC, const Instruction *CxtI,
-                              const DominatorTree *DT) {
-  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+                              unsigned Depth, AssumptionCache *AC,
+                              const Instruction *CxtI, const DominatorTree *DT,
+                              bool UseInstrInfo) {
+  KnownBits Known =
+      computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
   return Known.isNonNegative();
 }
 
 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
-                           const DominatorTree *DT) {
+                           const DominatorTree *DT, bool UseInstrInfo) {
   if (auto *CI = dyn_cast<ConstantInt>(V))
     return CI->getValue().isStrictlyPositive();
 
   // TODO: We'd doing two recursive queries here.  We should factor this such
   // that only a single query is needed.
-  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) &&
-    isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
+  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
+         isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
 }
 
 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
-                           const DominatorTree *DT) {
-  KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+                           const DominatorTree *DT, bool UseInstrInfo) {
+  KnownBits Known =
+      computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
   return Known.isNegative();
 }
 
 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
 
 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
-                           const DataLayout &DL,
-                           AssumptionCache *AC, const Instruction *CxtI,
-                           const DominatorTree *DT) {
-  return ::isKnownNonEqual(V1, V2, Query(DL, AC,
-                                         safeCxtI(V1, safeCxtI(V2, CxtI)),
-                                         DT));
+                           const DataLayout &DL, AssumptionCache *AC,
+                           const Instruction *CxtI, const DominatorTree *DT,
+                           bool UseInstrInfo) {
+  return ::isKnownNonEqual(V1, V2,
+                           Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT,
+                                 UseInstrInfo, /*ORE=*/nullptr));
 }
 
 static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
                               const Query &Q);
 
 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
-                             const DataLayout &DL,
-                             unsigned Depth, AssumptionCache *AC,
-                             const Instruction *CxtI, const DominatorTree *DT) {
-  return ::MaskedValueIsZero(V, Mask, Depth,
-                             Query(DL, AC, safeCxtI(V, CxtI), DT));
+                             const DataLayout &DL, unsigned Depth,
+                             AssumptionCache *AC, const Instruction *CxtI,
+                             const DominatorTree *DT, bool UseInstrInfo) {
+  return ::MaskedValueIsZero(
+      V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
 }
 
 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
@@ -293,8 +301,9 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
-                                  const DominatorTree *DT) {
-  return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+                                  const DominatorTree *DT, bool UseInstrInfo) {
+  return ::ComputeNumSignBits(
+      V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
 }
 
 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
@@ -965,7 +974,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
   switch (I->getOpcode()) {
   default: break;
   case Instruction::Load:
-    if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+    if (MDNode *MD =
+            Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
       computeKnownBitsFromRangeMetadata(*MD, Known);
     break;
   case Instruction::And: {
@@ -1014,7 +1024,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     break;
   }
   case Instruction::Mul: {
-    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
     computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
                         Known2, Depth, Q);
     break;
@@ -1082,7 +1092,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
       // RHS from matchSelectPattern returns the negation part of abs pattern.
       // If the negate has an NSW flag we can assume the sign bit of the result
       // will be 0 because that makes abs(INT_MIN) undefined.
-      if (cast<Instruction>(RHS)->hasNoSignedWrap())
+      if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
         MaxHighZeros = 1;
     }
 
@@ -1151,7 +1161,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
   }
   case Instruction::Shl: {
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
     auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
       APInt KZResult = KnownZero << ShiftAmt;
       KZResult.setLowBits(ShiftAmt); // Low bits known 0.
@@ -1202,13 +1212,13 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     break;
   }
   case Instruction::Sub: {
-    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
                            Known, Known2, Depth, Q);
     break;
   }
   case Instruction::Add: {
-    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
     computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
                            Known, Known2, Depth, Q);
     break;
@@ -1369,7 +1379,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
                                          Known3.countMinTrailingZeros()));
 
           auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
-          if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+          if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
             // If initial value of recurrence is nonnegative, and we are adding
             // a nonnegative number with nsw, the result can only be nonnegative
             // or poison value regardless of the number of times we execute the
@@ -1442,7 +1452,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     // If range metadata is attached to this call, set known bits from that,
     // and then intersect with known bits based on other properties of the
     // function.
-    if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+    if (MDNode *MD =
+            Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
       computeKnownBitsFromRangeMetadata(*MD, Known);
     if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
       computeKnownBits(RV, Known2, Depth + 1, Q);
@@ -1495,6 +1506,27 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
         // of bits which might be set provided by popcnt KnownOne2.
         break;
       }
+      case Intrinsic::fshr:
+      case Intrinsic::fshl: {
+        const APInt *SA;
+        if (!match(I->getOperand(2), m_APInt(SA)))
+          break;
+
+        // Normalize to funnel shift left.
+        uint64_t ShiftAmt = SA->urem(BitWidth);
+        if (II->getIntrinsicID() == Intrinsic::fshr)
+          ShiftAmt = BitWidth - ShiftAmt;
+
+        KnownBits Known3(Known);
+        computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
+
+        Known.Zero =
+            Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
+        Known.One =
+            Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
+        break;
+      }
       case Intrinsic::x86_sse42_crc32_64_64:
         Known.Zero.setBitsFrom(32);
         break;
@@ -1722,7 +1754,8 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
   // either the original power-of-two, a larger power-of-two or zero.
   if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
     const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
-    if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
+    if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
+        Q.IIQ.hasNoSignedWrap(VOBO)) {
       if (match(X, m_And(m_Specific(Y), m_Value())) ||
           match(X, m_And(m_Value(), m_Specific(Y))))
         if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
@@ -1860,19 +1893,41 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
         (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
       continue;
 
+    SmallVector<const User *, 4> WorkList;
+    SmallPtrSet<const User *, 4> Visited;
     for (auto *CmpU : U->users()) {
-      if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
-        assert(BI->isConditional() && "uses a comparison!");
+      assert(WorkList.empty() && "Should be!");
+      if (Visited.insert(CmpU).second)
+        WorkList.push_back(CmpU);
+
+      while (!WorkList.empty()) {
+        auto *Curr = WorkList.pop_back_val();
+
+        // If a user is an AND, add all its users to the work list. We only
+        // propagate "pred != null" condition through AND because it is only
+        // correct to assume that all conditions of AND are met in true branch.
+        // TODO: Support similar logic of OR and EQ predicate?
+        if (Pred == ICmpInst::ICMP_NE)
+          if (auto *BO = dyn_cast<BinaryOperator>(Curr))
+            if (BO->getOpcode() == Instruction::And) {
+              for (auto *BOU : BO->users())
+                if (Visited.insert(BOU).second)
+                  WorkList.push_back(BOU);
+              continue;
+            }
 
-        BasicBlock *NonNullSuccessor =
-            BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
-        BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
-        if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+        if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
+          assert(BI->isConditional() && "uses a comparison!");
+
+          BasicBlock *NonNullSuccessor =
+              BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
+          BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+          if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+            return true;
+        } else if (Pred == ICmpInst::ICMP_NE && isGuard(Curr) &&
+                   DT->dominates(cast<Instruction>(Curr), CtxI)) {
           return true;
-      } else if (Pred == ICmpInst::ICMP_NE &&
-                 match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
-                 DT->dominates(cast<Instruction>(CmpU), CtxI)) {
-        return true;
+        }
       }
     }
   }
@@ -1937,7 +1992,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
   }
 
   if (auto *I = dyn_cast<Instruction>(V)) {
-    if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
+    if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
       // If the possible ranges don't contain zero, then the value is
       // definitely non-zero.
       if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
@@ -1965,13 +2020,13 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
 
     // A Load tagged with nonnull metadata is never null.
     if (const LoadInst *LI = dyn_cast<LoadInst>(V))
-      if (LI->getMetadata(LLVMContext::MD_nonnull))
+      if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
         return true;
 
-    if (auto CS = ImmutableCallSite(V)) {
-      if (CS.isReturnNonNull())
+    if (const auto *Call = dyn_cast<CallBase>(V)) {
+      if (Call->isReturnNonNull())
         return true;
-      if (const auto *RP = getArgumentAliasingToReturnedPointer(CS))
+      if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
         return isKnownNonZero(RP, Depth, Q);
     }
   }
@@ -2003,7 +2058,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
   if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
     // shl nuw can't remove any non-zero bits.
     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
-    if (BO->hasNoUnsignedWrap())
+    if (Q.IIQ.hasNoUnsignedWrap(BO))
       return isKnownNonZero(X, Depth, Q);
 
     KnownBits Known(BitWidth);
@@ -2078,7 +2133,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     // If X and Y are non-zero then so is X * Y as long as the multiplication
     // does not overflow.
-    if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+    if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) &&
         isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q))
       return true;
   }
@@ -2100,7 +2155,8 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
       if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
         if (!C->isZero() && !C->isNegative()) {
           ConstantInt *X;
-          if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+          if (Q.IIQ.UseInstrInfo &&
+              (match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
                match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
               !X->isNegative())
             return true;
@@ -2174,6 +2230,36 @@ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
   return Mask.isSubsetOf(Known.Zero);
 }
 
+// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
+// Returns the input and lower/upper bounds.
+static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
+                                const APInt *&CLow, const APInt *&CHigh) {
+  assert(isa<Operator>(Select) &&
+         cast<Operator>(Select)->getOpcode() == Instruction::Select &&
+         "Input should be a Select!");
+
+  const Value *LHS, *RHS, *LHS2, *RHS2;
+  SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
+  if (SPF != SPF_SMAX && SPF != SPF_SMIN)
+    return false;
+
+  if (!match(RHS, m_APInt(CLow)))
+    return false;
+
+  SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
+  if (getInverseMinMaxFlavor(SPF) != SPF2)
+    return false;
+
+  if (!match(RHS2, m_APInt(CHigh)))
+    return false;
+
+  if (SPF == SPF_SMIN)
+    std::swap(CLow, CHigh);
+
+  In = LHS2;
+  return CLow->sle(*CHigh);
+}
+
 /// For vector constants, loop over the elements and find the constant with the
 /// minimum number of sign bits. Return 0 if the value is not a vector constant
 /// or if any element was not analyzed; otherwise, return the count for the
@@ -2335,11 +2421,19 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
     }
     break;
 
-  case Instruction::Select:
+  case Instruction::Select: {
+    // If we have a clamp pattern, we know that the number of sign bits will be
+    // the minimum of the clamp min/max range.
+    const Value *X;
+    const APInt *CLow, *CHigh;
+    if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+      return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
     Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
     if (Tmp == 1) break;
     Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
     return std::min(Tmp, Tmp2);
+  }
 
   case Instruction::Add:
     // Add can have at most one carry bit.  Thus we know that the output
@@ -2437,6 +2531,44 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
     // valid for all elements of the vector (for example if vector is sign
     // extended, shifted, etc).
     return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+  case Instruction::ShuffleVector: {
+    // TODO: This is copied almost directly from the SelectionDAG version of
+    //       ComputeNumSignBits. It would be better if we could share common
+    //       code. If not, make sure that changes are translated to the DAG.
+
+    // Collect the minimum number of sign bits that are shared by every vector
+    // element referenced by the shuffle.
+    auto *Shuf = cast<ShuffleVectorInst>(U);
+    int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+    int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+    APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+    for (int i = 0; i != NumMaskElts; ++i) {
+      int M = Shuf->getMaskValue(i);
+      assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+      // For undef elements, we don't know anything about the common state of
+      // the shuffle result.
+      if (M == -1)
+        return 1;
+      if (M < NumElts)
+        DemandedLHS.setBit(M % NumElts);
+      else
+        DemandedRHS.setBit(M % NumElts);
+    }
+    Tmp = std::numeric_limits<unsigned>::max();
+    if (!!DemandedLHS)
+      Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+    if (!!DemandedRHS) {
+      Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+      Tmp = std::min(Tmp, Tmp2);
+    }
+    // If we don't know anything, early out and try computeKnownBits fall-back.
+    if (Tmp == 1)
+      break;
+    assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+           "Failed to determine minimum sign bits");
+    return Tmp;
+  }
   }
 
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2722,6 +2854,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
       break;
     // sqrt(-0.0) = -0.0, no other negative results are possible.
     case Intrinsic::sqrt:
+    case Intrinsic::canonicalize:
       return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
     // fabs(x) != -0.0
     case Intrinsic::fabs:
@@ -2817,14 +2950,20 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
     default:
       break;
     case Intrinsic::maxnum:
-      return (isKnownNeverNaN(I->getOperand(0)) &&
+      return (isKnownNeverNaN(I->getOperand(0), TLI) &&
               cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
                                               SignBitOnly, Depth + 1)) ||
-             (isKnownNeverNaN(I->getOperand(1)) &&
+            (isKnownNeverNaN(I->getOperand(1), TLI) &&
               cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
                                               SignBitOnly, Depth + 1));
 
+    case Intrinsic::maximum:
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) ||
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::minnum:
+    case Intrinsic::minimum:
       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
                                              Depth + 1) &&
              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
@@ -2885,7 +3024,8 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
   return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
 }
 
-bool llvm::isKnownNeverNaN(const Value *V) {
+bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
+                           unsigned Depth) {
   assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
 
   // If we're told that NaNs won't happen, assume they won't.
@@ -2893,13 +3033,60 @@ bool llvm::isKnownNeverNaN(const Value *V) {
     if (FPMathOp->hasNoNaNs())
       return true;
 
-  // TODO: Handle instructions and potentially recurse like other 'isKnown'
-  // functions. For example, the result of sitofp is never NaN.
-
   // Handle scalar constants.
   if (auto *CFP = dyn_cast<ConstantFP>(V))
     return !CFP->isNaN();
 
+  if (Depth == MaxDepth)
+    return false;
+
+  if (auto *Inst = dyn_cast<Instruction>(V)) {
+    switch (Inst->getOpcode()) {
+    case Instruction::FAdd:
+    case Instruction::FMul:
+    case Instruction::FSub:
+    case Instruction::FDiv:
+    case Instruction::FRem: {
+      // TODO: Need isKnownNeverInfinity
+      return false;
+    }
+    case Instruction::Select: {
+      return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+             isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
+    }
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+      return true;
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+      return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
+    default:
+      break;
+    }
+  }
+
+  if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::canonicalize:
+    case Intrinsic::fabs:
+    case Intrinsic::copysign:
+    case Intrinsic::exp:
+    case Intrinsic::exp2:
+    case Intrinsic::floor:
+    case Intrinsic::ceil:
+    case Intrinsic::trunc:
+    case Intrinsic::rint:
+    case Intrinsic::nearbyint:
+    case Intrinsic::round:
+      return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
+    case Intrinsic::sqrt:
+      return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
+             CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
+    default:
+      return false;
+    }
+  }
+
   // Bail out for constant expressions, but try to handle vector constants.
   if (!V->getType()->isVectorTy() || !isa<Constant>(V))
     return false;
@@ -2920,62 +3107,92 @@ bool llvm::isKnownNeverNaN(const Value *V) {
   return true;
 }
 
-/// If the specified value can be set by repeating the same byte in memory,
-/// return the i8 value that it is represented with.  This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
 Value *llvm::isBytewiseValue(Value *V) {
+
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType()->isIntegerTy(8)) return V;
+  if (V->getType()->isIntegerTy(8))
+    return V;
+
+  LLVMContext &Ctx = V->getContext();
+
+  // Undef don't care.
+  auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
+  if (isa<UndefValue>(V))
+    return UndefInt8;
+
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C) {
+    // Conceptually, we could handle things like:
+    //   %a = zext i8 %X to i16
+    //   %b = shl i16 %a, 8
+    //   %c = or i16 %a, %b
+    // but until there is an example that actually needs this, it doesn't seem
+    // worth worrying about.
+    return nullptr;
+  }
 
   // Handle 'null' ConstantArrayZero etc.
-  if (Constant *C = dyn_cast<Constant>(V))
-    if (C->isNullValue())
-      return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+  if (C->isNullValue())
+    return Constant::getNullValue(Type::getInt8Ty(Ctx));
 
-  // Constant float and double values can be handled as integer values if the
+  // Constant floating-point values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType()->isFloatTy())
-      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
-    if (CFP->getType()->isDoubleTy())
-      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    Type *Ty = nullptr;
+    if (CFP->getType()->isHalfTy())
+      Ty = Type::getInt16Ty(Ctx);
+    else if (CFP->getType()->isFloatTy())
+      Ty = Type::getInt32Ty(Ctx);
+    else if (CFP->getType()->isDoubleTy())
+      Ty = Type::getInt64Ty(Ctx);
     // Don't handle long double formats, which have strange constraints.
+    return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
   }
 
   // We can handle constant integers that are multiple of 8 bits.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
     if (CI->getBitWidth() % 8 == 0) {
       assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
-
       if (!CI->getValue().isSplat(8))
         return nullptr;
-      return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
+      return ConstantInt::get(Ctx, CI->getValue().trunc(8));
     }
   }
 
-  // A ConstantDataArray/Vector is splatable if all its members are equal and
-  // also splatable.
-  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
-    Value *Elt = CA->getElementAsConstant(0);
-    Value *Val = isBytewiseValue(Elt);
-    if (!Val)
+  auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
+    if (LHS == RHS)
+      return LHS;
+    if (!LHS || !RHS)
       return nullptr;
+    if (LHS == UndefInt8)
+      return RHS;
+    if (RHS == UndefInt8)
+      return LHS;
+    return nullptr;
+  };
 
-    for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
-      if (CA->getElementAsConstant(I) != Elt)
+  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
+    Value *Val = UndefInt8;
+    for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
+      if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
         return nullptr;
+    return Val;
+  }
+
+  if (isa<ConstantVector>(C)) {
+    Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
+    return Splat ? isBytewiseValue(Splat) : nullptr;
+  }
 
+  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+    Value *Val = UndefInt8;
+    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
+      if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+        return nullptr;
     return Val;
   }
 
-  // Conceptually, we could handle things like:
-  //   %a = zext i8 %X to i16
-  //   %b = shl i16 %a, 8
-  //   %c = or i16 %a, %b
-  // but until there is an example that actually needs this, it doesn't seem
-  // worth worrying about.
+  // Don't try to handle the handful of other constants.
   return nullptr;
 }
 
@@ -3172,7 +3389,14 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
         break;
 
-      ByteOffset += GEPOffset.getSExtValue();
+      APInt OrigByteOffset(ByteOffset);
+      ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth());
+      if (ByteOffset.getMinSignedBits() > 64) {
+        // Stop traversal if the pointer offset wouldn't fit into int64_t
+        // (this should be removed if Offset is updated to an APInt)
+        ByteOffset = OrigByteOffset;
+        break;
+      }
 
       Ptr = GEP->getPointerOperand();
     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -3400,21 +3624,21 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
   return Len == ~0ULL ? 1 : Len;
 }
 
-const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) {
-  assert(CS &&
-         "getArgumentAliasingToReturnedPointer only works on nonnull CallSite");
-  if (const Value *RV = CS.getReturnedArgOperand())
+const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
+  assert(Call &&
+         "getArgumentAliasingToReturnedPointer only works on nonnull calls");
+  if (const Value *RV = Call->getReturnedArgOperand())
     return RV;
   // This can be used only as a aliasing property.
-  if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS))
-    return CS.getArgOperand(0);
+  if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
+    return Call->getArgOperand(0);
   return nullptr;
 }
 
 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
-    ImmutableCallSite CS) {
-  return CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
-         CS.getIntrinsicID() == Intrinsic::strip_invariant_group;
+    const CallBase *Call) {
+  return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+         Call->getIntrinsicID() == Intrinsic::strip_invariant_group;
 }
 
 /// \p PN defines a loop-variant pointer to an object.  Check if the
@@ -3462,7 +3686,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
       // An alloca can't be further simplified.
       return V;
     } else {
-      if (auto CS = CallSite(V)) {
+      if (auto *Call = dyn_cast<CallBase>(V)) {
         // CaptureTracking can know about special capturing properties of some
         // intrinsics like launder.invariant.group, that can't be expressed with
         // the attributes, but have properties like returning aliasing pointer.
@@ -3472,7 +3696,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
         // because it should be in sync with CaptureTracking. Not using it may
         // cause weird miscompilations where 2 aliasing pointers are assumed to
         // noalias.
-        if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+        if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
           V = RP;
           continue;
         }
@@ -3605,8 +3829,7 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
     if (!II) return false;
 
-    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-        II->getIntrinsicID() != Intrinsic::lifetime_end)
+    if (!II->isLifetimeStartOrEnd())
       return false;
   }
   return true;
@@ -3703,12 +3926,10 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
   return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
 }
 
-OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
-                                                   const Value *RHS,
-                                                   const DataLayout &DL,
-                                                   AssumptionCache *AC,
-                                                   const Instruction *CxtI,
-                                                   const DominatorTree *DT) {
+OverflowResult llvm::computeOverflowForUnsignedMul(
+    const Value *LHS, const Value *RHS, const DataLayout &DL,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+    bool UseInstrInfo) {
   // Multiplying n * m significant bits yields a result of n + m significant
   // bits. If the total number of significant bits does not exceed the
   // result bit width (minus 1), there is no overflow.
@@ -3718,8 +3939,10 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
   KnownBits LHSKnown(BitWidth);
   KnownBits RHSKnown(BitWidth);
-  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
-  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
+  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
+                   UseInstrInfo);
+  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
+                   UseInstrInfo);
   // Note that underestimating the number of zero bits gives a more
   // conservative answer.
   unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
@@ -3750,12 +3973,11 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
   return OverflowResult::MayOverflow;
 }
 
-OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
-                                                 const Value *RHS,
-                                                 const DataLayout &DL,
-                                                 AssumptionCache *AC,
-                                                 const Instruction *CxtI,
-                                                 const DominatorTree *DT) {
+OverflowResult
+llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
+                                  const DataLayout &DL, AssumptionCache *AC,
+                                  const Instruction *CxtI,
+                                  const DominatorTree *DT, bool UseInstrInfo) {
   // Multiplying n * m significant bits yields a result of n + m significant
   // bits. If the total number of significant bits does not exceed the
   // result bit width (minus 1), there is no overflow.
@@ -3784,33 +4006,33 @@ OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
     // product is exactly the minimum negative number.
     // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
     // For simplicity we just check if at least one side is not negative.
-    KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
-    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+    KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                          nullptr, UseInstrInfo);
+    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                          nullptr, UseInstrInfo);
     if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
       return OverflowResult::NeverOverflows;
   }
   return OverflowResult::MayOverflow;
 }
 
-OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
-                                                   const Value *RHS,
-                                                   const DataLayout &DL,
-                                                   AssumptionCache *AC,
-                                                   const Instruction *CxtI,
-                                                   const DominatorTree *DT) {
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+OverflowResult llvm::computeOverflowForUnsignedAdd(
+    const Value *LHS, const Value *RHS, const DataLayout &DL,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+    bool UseInstrInfo) {
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                        nullptr, UseInstrInfo);
   if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
-    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                          nullptr, UseInstrInfo);
 
     if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
       // The sign bit is set in both cases: this MUST overflow.
-      // Create a simple add instruction, and insert it into the struct.
       return OverflowResult::AlwaysOverflows;
     }
 
     if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
       // The sign bit is clear in both cases: this CANNOT overflow.
-      // Create a simple add instruction, and insert it into the struct.
       return OverflowResult::NeverOverflows;
     }
   }
@@ -3927,11 +4149,18 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
                                                    const DominatorTree *DT) {
-  // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
   KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
-  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-  if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
-    return OverflowResult::NeverOverflows;
+  if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
+    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+
+    // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
+    if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
+      return OverflowResult::NeverOverflows;
+
+    // If the LHS is non-negative and the RHS negative, we always wrap.
+    if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
+      return OverflowResult::AlwaysOverflows;
+  }
 
   return OverflowResult::MayOverflow;
 }
@@ -4244,12 +4473,34 @@ static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
 
   if (auto *C = dyn_cast<ConstantFP>(V))
     return !C->isNaN();
+
+  if (auto *C = dyn_cast<ConstantDataVector>(V)) {
+    if (!C->getElementType()->isFloatingPointTy())
+      return false;
+    for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
+      if (C->getElementAsAPFloat(I).isNaN())
+        return false;
+    }
+    return true;
+  }
+
   return false;
 }
 
 static bool isKnownNonZero(const Value *V) {
   if (auto *C = dyn_cast<ConstantFP>(V))
     return !C->isZero();
+
+  if (auto *C = dyn_cast<ConstantDataVector>(V)) {
+    if (!C->getElementType()->isFloatingPointTy())
+      return false;
+    for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
+      if (C->getElementAsAPFloat(I).isZero())
+        return false;
+    }
+    return true;
+  }
+
   return false;
 }
 
@@ -4541,6 +4792,27 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
                                               Value *TrueVal, Value *FalseVal,
                                               Value *&LHS, Value *&RHS,
                                               unsigned Depth) {
+  if (CmpInst::isFPPredicate(Pred)) {
+    // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
+    // 0.0 operand, set the compare's 0.0 operands to that same value for the
+    // purpose of identifying min/max. Disregard vector constants with undefined
+    // elements because those can not be back-propagated for analysis.
+    Value *OutputZeroVal = nullptr;
+    if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) &&
+        !cast<Constant>(TrueVal)->containsUndefElement())
+      OutputZeroVal = TrueVal;
+    else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) &&
+             !cast<Constant>(FalseVal)->containsUndefElement())
+      OutputZeroVal = FalseVal;
+
+    if (OutputZeroVal) {
+      if (match(CmpLHS, m_AnyZeroFP()))
+        CmpLHS = OutputZeroVal;
+      if (match(CmpRHS, m_AnyZeroFP()))
+        CmpRHS = OutputZeroVal;
+    }
+  }
+
   LHS = CmpLHS;
   RHS = CmpRHS;
 
@@ -4970,21 +5242,16 @@ static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
   return IsMatchingOps || IsSwappedOps;
 }
 
-/// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is
-/// true.  Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
-/// BRHS" is false.  Otherwise, return None if we can't infer anything.
+/// Return true if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is true.
+/// Return false if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is false.
+/// Otherwise, return None if we can't infer anything.
 static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
-                                                    const Value *ALHS,
-                                                    const Value *ARHS,
                                                     CmpInst::Predicate BPred,
-                                                    const Value *BLHS,
-                                                    const Value *BRHS,
-                                                    bool IsSwappedOps) {
-  // Canonicalize the operands so they're matching.
-  if (IsSwappedOps) {
-    std::swap(BLHS, BRHS);
+                                                    bool AreSwappedOps) {
+  // Canonicalize the predicate as if the operands were not commuted.
+  if (AreSwappedOps)
     BPred = ICmpInst::getSwappedPredicate(BPred);
-  }
+
   if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred))
     return true;
   if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred))
@@ -4993,15 +5260,14 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
   return None;
 }
 
-/// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is
-/// true.  Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
-/// C2" is false.  Otherwise, return None if we can't infer anything.
+/// Return true if "icmp APred X, C1" implies "icmp BPred X, C2" is true.
+/// Return false if "icmp APred X, C1" implies "icmp BPred X, C2" is false.
+/// Otherwise, return None if we can't infer anything.
 static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
+isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
                                  const ConstantInt *C1,
                                  CmpInst::Predicate BPred,
-                                 const Value *BLHS, const ConstantInt *C2) {
-  assert(ALHS == BLHS && "LHS operands must match.");
+                                 const ConstantInt *C2) {
   ConstantRange DomCR =
       ConstantRange::makeExactICmpRegion(APred, C1->getValue());
   ConstantRange CR =
@@ -5033,10 +5299,10 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
   ICmpInst::Predicate BPred = RHS->getPredicate();
 
   // Can we infer anything when the two compares have matching operands?
-  bool IsSwappedOps;
-  if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) {
+  bool AreSwappedOps;
+  if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, AreSwappedOps)) {
     if (Optional<bool> Implication = isImpliedCondMatchingOperands(
-            APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps))
+            APred, BPred, AreSwappedOps))
       return Implication;
     // No amount of additional analysis will infer the second condition, so
     // early exit.
@@ -5047,8 +5313,7 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
   // constants (not necessarily matching)?
   if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
     if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
-            APred, ALHS, cast<ConstantInt>(ARHS), BPred, BLHS,
-            cast<ConstantInt>(BRHS)))
+            APred, cast<ConstantInt>(ARHS), BPred, cast<ConstantInt>(BRHS)))
       return Implication;
     // No amount of additional analysis will infer the second condition, so
     // early exit.
@@ -5133,3 +5398,35 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
   }
   return None;
 }
+
+Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
+                                             const Instruction *ContextI,
+                                             const DataLayout &DL) {
+  assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
+  if (!ContextI || !ContextI->getParent())
+    return None;
+
+  // TODO: This is a poor/cheap way to determine dominance. Should we use a
+  // dominator tree (eg, from a SimplifyQuery) instead?
+  const BasicBlock *ContextBB = ContextI->getParent();
+  const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
+  if (!PredBB)
+    return None;
+
+  // We need a conditional branch in the predecessor.
+  Value *PredCond;
+  BasicBlock *TrueBB, *FalseBB;
+  if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB)))
+    return None;
+
+  // The branch should get simplified. Don't bother simplifying this condition.
+  if (TrueBB == FalseBB)
+    return None;
+
+  assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
+         "Predecessor block does not point to successor?");
+
+  // Is this condition implied by the predecessor condition?
+  bool CondIsTrue = TrueBB == ContextBB;
+  return isImpliedCondition(PredCond, Cond, DL, CondIsTrue);
+}
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index d73d24736439..5656a19d7e0d 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -25,16 +26,30 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
 
+#define DEBUG_TYPE "vectorutils"
+
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-/// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// Maximum factor for an interleaved memory access.
+static cl::opt<unsigned> MaxInterleaveGroupFactor(
+    "max-interleave-group-factor", cl::Hidden,
+    cl::desc("Maximum factor for an interleaved access group (default = 8)"),
+    cl::init(8));
+
+/// Return true if all of the intrinsic's arguments and return type are scalars
+/// for the scalar form of the intrinsic and vectors for the vector form of the
+/// intrinsic.
 bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   switch (ID) {
-  case Intrinsic::sqrt:
+  case Intrinsic::bswap: // Begin integer bit-manipulation.
+  case Intrinsic::bitreverse:
+  case Intrinsic::ctpop:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::fshl:
+  case Intrinsic::fshr:
+  case Intrinsic::sqrt: // Begin floating-point.
   case Intrinsic::sin:
   case Intrinsic::cos:
   case Intrinsic::exp:
@@ -45,6 +60,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::fabs:
   case Intrinsic::minnum:
   case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximum:
   case Intrinsic::copysign:
   case Intrinsic::floor:
   case Intrinsic::ceil:
@@ -52,15 +69,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::rint:
   case Intrinsic::nearbyint:
   case Intrinsic::round:
-  case Intrinsic::bswap:
-  case Intrinsic::bitreverse:
-  case Intrinsic::ctpop:
   case Intrinsic::pow:
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
-  case Intrinsic::ctlz:
-  case Intrinsic::cttz:
   case Intrinsic::powi:
+  case Intrinsic::canonicalize:
+  case Intrinsic::sadd_sat:
+  case Intrinsic::ssub_sat:
+  case Intrinsic::uadd_sat:
+  case Intrinsic::usub_sat:
     return true;
   default:
     return false;
@@ -270,9 +287,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
   }
 
   // Extract a value from a vector add operation with a constant zero.
-  Value *Val = nullptr; Constant *Con = nullptr;
-  if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
-    if (Constant *Elt = Con->getAggregateElement(EltNo))
+  // TODO: Use getBinOpIdentity() to generalize this.
+  Value *Val; Constant *C;
+  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
+    if (Constant *Elt = C->getAggregateElement(EltNo))
       if (Elt->isNullValue())
         return findScalarElement(Val, EltNo);
 
@@ -450,16 +468,100 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
   return MinBWs;
 }
 
+/// Add all access groups in @p AccGroups to @p List.
+template <typename ListT>
+static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
+  // Interpret an access group as a list containing itself.
+  if (AccGroups->getNumOperands() == 0) {
+    assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
+    List.insert(AccGroups);
+    return;
+  }
+
+  for (auto &AccGroupListOp : AccGroups->operands()) {
+    auto *Item = cast<MDNode>(AccGroupListOp.get());
+    assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+    List.insert(Item);
+  }
+}
+
+MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
+  if (!AccGroups1)
+    return AccGroups2;
+  if (!AccGroups2)
+    return AccGroups1;
+  if (AccGroups1 == AccGroups2)
+    return AccGroups1;
+
+  SmallSetVector<Metadata *, 4> Union;
+  addToAccessGroupList(Union, AccGroups1);
+  addToAccessGroupList(Union, AccGroups2);
+
+  if (Union.size() == 0)
+    return nullptr;
+  if (Union.size() == 1)
+    return cast<MDNode>(Union.front());
+
+  LLVMContext &Ctx = AccGroups1->getContext();
+  return MDNode::get(Ctx, Union.getArrayRef());
+}
+
+MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,
+                                    const Instruction *Inst2) {
+  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
+  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
+
+  if (!MayAccessMem1 && !MayAccessMem2)
+    return nullptr;
+  if (!MayAccessMem1)
+    return Inst2->getMetadata(LLVMContext::MD_access_group);
+  if (!MayAccessMem2)
+    return Inst1->getMetadata(LLVMContext::MD_access_group);
+
+  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
+  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
+  if (!MD1 || !MD2)
+    return nullptr;
+  if (MD1 == MD2)
+    return MD1;
+
+  // Use set for scalable 'contains' check.
+  SmallPtrSet<Metadata *, 4> AccGroupSet2;
+  addToAccessGroupList(AccGroupSet2, MD2);
+
+  SmallVector<Metadata *, 4> Intersection;
+  if (MD1->getNumOperands() == 0) {
+    assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
+    if (AccGroupSet2.count(MD1))
+      Intersection.push_back(MD1);
+  } else {
+    for (const MDOperand &Node : MD1->operands()) {
+      auto *Item = cast<MDNode>(Node.get());
+      assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+      if (AccGroupSet2.count(Item))
+        Intersection.push_back(Item);
+    }
+  }
+
+  if (Intersection.size() == 0)
+    return nullptr;
+  if (Intersection.size() == 1)
+    return cast<MDNode>(Intersection.front());
+
+  LLVMContext &Ctx = Inst1->getContext();
+  return MDNode::get(Ctx, Intersection);
+}
+
 /// \returns \p I after propagating metadata from \p VL.
 Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
   Instruction *I0 = cast<Instruction>(VL[0]);
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   I0->getAllMetadataOtherThanDebugLoc(Metadata);
 
-  for (auto Kind :
-       {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
-        LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
-        LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
+  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+                    LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+                    LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
+                    LLVMContext::MD_access_group}) {
     MDNode *MD = I0->getMetadata(Kind);
 
     for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -480,6 +582,9 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
       case LLVMContext::MD_invariant_load:
         MD = MDNode::intersect(MD, IMD);
         break;
+      case LLVMContext::MD_access_group:
+        MD = intersectAccessGroups(Inst, IJ);
+        break;
       default:
         llvm_unreachable("unhandled metadata");
       }
@@ -491,6 +596,36 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
   return Inst;
 }
 
+Constant *
+llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+                           const InterleaveGroup<Instruction> &Group) {
+  // All 1's means mask is not needed.
+  if (Group.getNumMembers() == Group.getFactor())
+    return nullptr;
+
+  // TODO: support reversed access.
+  assert(!Group.isReverse() && "Reversed group not supported.");
+
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < VF; i++)
+    for (unsigned j = 0; j < Group.getFactor(); ++j) {
+      unsigned HasMember = Group.getMember(j) ? 1 : 0;
+      Mask.push_back(Builder.getInt1(HasMember));
+    }
+
+  return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createReplicatedMask(IRBuilder<> &Builder, 
+                                     unsigned ReplicationFactor, unsigned VF) {
+  SmallVector<Constant *, 16> MaskVec;
+  for (unsigned i = 0; i < VF; i++)
+    for (unsigned j = 0; j < ReplicationFactor; j++)
+      MaskVec.push_back(Builder.getInt32(i));
+
+  return ConstantVector::get(MaskVec);
+}
+
 Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
                                      unsigned NumVecs) {
   SmallVector<Constant *, 16> Mask;
@@ -575,3 +710,364 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
 
   return ResList[0];
 }
+
+bool InterleavedAccessInfo::isStrided(int Stride) {
+  unsigned Factor = std::abs(Stride);
+  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
+}
+
+void InterleavedAccessInfo::collectConstStrideAccesses(
+    MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+    const ValueToValueMap &Strides) {
+  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+
+  // Since it's desired that the load/store instructions be maintained in
+  // "program order" for the interleaved access analysis, we have to visit the
+  // blocks in the loop in reverse postorder (i.e., in a topological order).
+  // Such an ordering will ensure that any load/store that may be executed
+  // before a second load/store will precede the second load/store in
+  // AccessStrideInfo.
+  LoopBlocksDFS DFS(TheLoop);
+  DFS.perform(LI);
+  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+    for (auto &I : *BB) {
+      auto *LI = dyn_cast<LoadInst>(&I);
+      auto *SI = dyn_cast<StoreInst>(&I);
+      if (!LI && !SI)
+        continue;
+
+      Value *Ptr = getLoadStorePointerOperand(&I);
+      // We don't check wrapping here because we don't know yet if Ptr will be
+      // part of a full group or a group with gaps. Checking wrapping for all
+      // pointers (even those that end up in groups with no gaps) will be overly
+      // conservative. For full groups, wrapping should be ok since if we would
+      // wrap around the address space we would do a memory access at nullptr
+      // even without the transformation. The wrapping checks are therefore
+      // deferred until after we've formed the interleaved groups.
+      int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+                                    /*Assume=*/true, /*ShouldCheckWrap=*/false);
+
+      const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+      PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+      uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+
+      // An alignment of 0 means target ABI alignment.
+      unsigned Align = getLoadStoreAlignment(&I);
+      if (!Align)
+        Align = DL.getABITypeAlignment(PtrTy->getElementType());
+
+      AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+    }
+}
+
+// Analyze interleaved accesses and collect them into interleaved load and
+// store groups.
+//
+// When generating code for an interleaved load group, we effectively hoist all
+// loads in the group to the location of the first load in program order. When
+// generating code for an interleaved store group, we sink all stores to the
+// location of the last store. This code motion can change the order of load
+// and store instructions and may break dependences.
+//
+// The code generation strategy mentioned above ensures that we won't violate
+// any write-after-read (WAR) dependences.
+//
+// E.g., for the WAR dependence:  a = A[i];      // (1)
+//                                A[i] = b;      // (2)
+//
+// The store group of (2) is always inserted at or below (2), and the load
+// group of (1) is always inserted at or above (1). Thus, the instructions will
+// never be reordered. All other dependences are checked to ensure the
+// correctness of the instruction reordering.
+//
+// The algorithm visits all memory accesses in the loop in bottom-up program
+// order. Program order is established by traversing the blocks in the loop in
+// reverse postorder when collecting the accesses.
+//
+// We visit the memory accesses in bottom-up order because it can simplify the
+// construction of store groups in the presence of write-after-write (WAW)
+// dependences.
+//
+// E.g., for the WAW dependence:  A[i] = a;      // (1)
+//                                A[i] = b;      // (2)
+//                                A[i + 1] = c;  // (3)
+//
+// We will first create a store group with (3) and (2). (1) can't be added to
+// this group because it and (2) are dependent. However, (1) can be grouped
+// with other accesses that may precede it in program order. Note that a
+// bottom-up order does not imply that WAW dependences should not be checked.
+void InterleavedAccessInfo::analyzeInterleaving(
+                                 bool EnablePredicatedInterleavedMemAccesses) {
+  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
+
+  // Holds all accesses with a constant stride.
+  MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
+  collectConstStrideAccesses(AccessStrideInfo, Strides);
+
+  if (AccessStrideInfo.empty())
+    return;
+
+  // Collect the dependences in the loop.
+  collectDependences();
+
+  // Holds all interleaved store groups temporarily.
+  SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
+  // Holds all interleaved load groups temporarily.
+  SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
+
+  // Search in bottom-up program order for pairs of accesses (A and B) that can
+  // form interleaved load or store groups. In the algorithm below, access A
+  // precedes access B in program order. We initialize a group for B in the
+  // outer loop of the algorithm, and then in the inner loop, we attempt to
+  // insert each A into B's group if:
+  //
+  //  1. A and B have the same stride,
+  //  2. A and B have the same memory object size, and
+  //  3. A belongs in B's group according to its distance from B.
+  //
+  // Special care is taken to ensure group formation will not break any
+  // dependences.
+  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
+       BI != E; ++BI) {
+    Instruction *B = BI->first;
+    StrideDescriptor DesB = BI->second;
+
+    // Initialize a group for B if it has an allowable stride. Even if we don't
+    // create a group for B, we continue with the bottom-up algorithm to ensure
+    // we don't break any of B's dependences.
+    InterleaveGroup<Instruction> *Group = nullptr;
+    if (isStrided(DesB.Stride) && 
+        (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
+      Group = getInterleaveGroup(B);
+      if (!Group) {
+        LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
+                          << '\n');
+        Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+      }
+      if (B->mayWriteToMemory())
+        StoreGroups.insert(Group);
+      else
+        LoadGroups.insert(Group);
+    }
+
+    for (auto AI = std::next(BI); AI != E; ++AI) {
+      Instruction *A = AI->first;
+      StrideDescriptor DesA = AI->second;
+
+      // Our code motion strategy implies that we can't have dependences
+      // between accesses in an interleaved group and other accesses located
+      // between the first and last member of the group. Note that this also
+      // means that a group can't have more than one member at a given offset.
+      // The accesses in a group can have dependences with other accesses, but
+      // we must ensure we don't extend the boundaries of the group such that
+      // we encompass those dependent accesses.
+      //
+      // For example, assume we have the sequence of accesses shown below in a
+      // stride-2 loop:
+      //
+      //  (1, 2) is a group | A[i]   = a;  // (1)
+      //                    | A[i-1] = b;  // (2) |
+      //                      A[i-3] = c;  // (3)
+      //                      A[i]   = d;  // (4) | (2, 4) is not a group
+      //
+      // Because accesses (2) and (3) are dependent, we can group (2) with (1)
+      // but not with (4). If we did, the dependent access (3) would be within
+      // the boundaries of the (2, 4) group.
+      if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
+        // If a dependence exists and A is already in a group, we know that A
+        // must be a store since A precedes B and WAR dependences are allowed.
+        // Thus, A would be sunk below B. We release A's group to prevent this
+        // illegal code motion. A will then be free to form another group with
+        // instructions that precede it.
+        if (isInterleaved(A)) {
+          InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+          StoreGroups.remove(StoreGroup);
+          releaseGroup(StoreGroup);
+        }
+
+        // If a dependence exists and A is not already in a group (or it was
+        // and we just released it), B might be hoisted above A (if B is a
+        // load) or another store might be sunk below A (if B is a store). In
+        // either case, we can't add additional instructions to B's group. B
+        // will only form a group with instructions that it precedes.
+        break;
+      }
+
+      // At this point, we've checked for illegal code motion. If either A or B
+      // isn't strided, there's nothing left to do.
+      if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
+        continue;
+
+      // Ignore A if it's already in a group or isn't the same kind of memory
+      // operation as B.
+      // Note that mayReadFromMemory() isn't mutually exclusive to
+      // mayWriteToMemory in the case of atomic loads. We shouldn't see those
+      // here, canVectorizeMemory() should have returned false - except for the
+      // case we asked for optimization remarks.
+      if (isInterleaved(A) ||
+          (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
+          (A->mayWriteToMemory() != B->mayWriteToMemory()))
+        continue;
+
+      // Check rules 1 and 2. Ignore A if its stride or size is different from
+      // that of B.
+      if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
+        continue;
+
+      // Ignore A if the memory object of A and B don't belong to the same
+      // address space
+      if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
+        continue;
+
+      // Calculate the distance from A to B.
+      const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
+          PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
+      if (!DistToB)
+        continue;
+      int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
+
+      // Check rule 3. Ignore A if its distance to B is not a multiple of the
+      // size.
+      if (DistanceToB % static_cast<int64_t>(DesB.Size))
+        continue;
+
+      // All members of a predicated interleave-group must have the same predicate,
+      // and currently must reside in the same BB.
+      BasicBlock *BlockA = A->getParent();  
+      BasicBlock *BlockB = B->getParent();  
+      if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
+          (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
+        continue;
+
+      // The index of A is the index of B plus A's distance to B in multiples
+      // of the size.
+      int IndexA =
+          Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
+
+      // Try to insert A into B's group.
+      if (Group->insertMember(A, IndexA, DesA.Align)) {
+        LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
+                          << "    into the interleave group with" << *B
+                          << '\n');
+        InterleaveGroupMap[A] = Group;
+
+        // Set the first load in program order as the insert position.
+        if (A->mayReadFromMemory())
+          Group->setInsertPos(A);
+      }
+    } // Iteration over A accesses.
+  }   // Iteration over B accesses.
+
+  // Remove interleaved store groups with gaps.
+  for (auto *Group : StoreGroups)
+    if (Group->getNumMembers() != Group->getFactor()) {
+      LLVM_DEBUG(
+          dbgs() << "LV: Invalidate candidate interleaved store group due "
+                    "to gaps.\n");
+      releaseGroup(Group);
+    }
+  // Remove interleaved groups with gaps (currently only loads) whose memory
+  // accesses may wrap around. We have to revisit the getPtrStride analysis,
+  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+  // not check wrapping (see documentation there).
+  // FORNOW we use Assume=false;
+  // TODO: Change to Assume=true but making sure we don't exceed the threshold
+  // of runtime SCEV assumptions checks (thereby potentially failing to
+  // vectorize altogether).
+  // Additional optional optimizations:
+  // TODO: If we are peeling the loop and we know that the first pointer doesn't
+  // wrap then we can deduce that all pointers in the group don't wrap.
+  // This means that we can forcefully peel the loop in order to only have to
+  // check the first pointer for no-wrap. When we'll change to use Assume=true
+  // we'll only need at most one runtime check per interleaved group.
+  for (auto *Group : LoadGroups) {
+    // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+    // load would wrap around the address space we would do a memory access at
+    // nullptr even without the transformation.
+    if (Group->getNumMembers() == Group->getFactor())
+      continue;
+
+    // Case 2: If first and last members of the group don't wrap this implies
+    // that all the pointers in the group don't wrap.
+    // So we check only group member 0 (which is always guaranteed to exist),
+    // and group member Factor - 1; If the latter doesn't exist we rely on
+    // peeling (if it is a non-reveresed accsess -- see Case 3).
+    Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
+    if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+                      /*ShouldCheckWrap=*/true)) {
+      LLVM_DEBUG(
+          dbgs() << "LV: Invalidate candidate interleaved group due to "
+                    "first group member potentially pointer-wrapping.\n");
+      releaseGroup(Group);
+      continue;
+    }
+    Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
+    if (LastMember) {
+      Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
+      if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+                        /*ShouldCheckWrap=*/true)) {
+        LLVM_DEBUG(
+            dbgs() << "LV: Invalidate candidate interleaved group due to "
+                      "last group member potentially pointer-wrapping.\n");
+        releaseGroup(Group);
+      }
+    } else {
+      // Case 3: A non-reversed interleaved load group with gaps: We need
+      // to execute at least one scalar epilogue iteration. This will ensure
+      // we don't speculatively access memory out-of-bounds. We only need
+      // to look for a member at index factor - 1, since every group must have
+      // a member at index zero.
+      if (Group->isReverse()) {
+        LLVM_DEBUG(
+            dbgs() << "LV: Invalidate candidate interleaved group due to "
+                      "a reverse access with gaps.\n");
+        releaseGroup(Group);
+        continue;
+      }
+      LLVM_DEBUG(
+          dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+      RequiresScalarEpilogue = true;
+    }
+  }
+}
+
+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
+  // If no group had triggered the requirement to create an epilogue loop,
+  // there is nothing to do.
+  if (!requiresScalarEpilogue())
+    return;
+
+  // Avoid releasing a Group twice.
+  SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
+  for (auto &I : InterleaveGroupMap) {
+    InterleaveGroup<Instruction> *Group = I.second;
+    if (Group->requiresScalarEpilogue())
+      DelSet.insert(Group);
+  }
+  for (auto *Ptr : DelSet) {
+    LLVM_DEBUG(
+        dbgs()
+        << "LV: Invalidate candidate interleaved group due to gaps that "
+           "require a scalar epilogue (not allowed under optsize) and cannot "
+           "be masked (not enabled). \n");
+    releaseGroup(Ptr);
+  }
+
+  RequiresScalarEpilogue = false;
+}
+
+template <typename InstT>
+void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
+  llvm_unreachable("addMetadata can only be used for Instruction");
+}
+
+namespace llvm {
+template <>
+void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
+  SmallVector<Value *, 4> VL;
+  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
+                 [](std::pair<int, Instruction *> p) { return p.second; });
+  propagateMetadata(NewInst, VL);
+}
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index da9855ff630b..eab7ec819536 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -592,6 +592,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(arm_apcscc);
   KEYWORD(arm_aapcscc);
   KEYWORD(arm_aapcs_vfpcc);
+  KEYWORD(aarch64_vector_pcs);
   KEYWORD(msp430_intrcc);
   KEYWORD(avr_intrcc);
   KEYWORD(avr_signalcc);
@@ -678,6 +679,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(sanitize_hwaddress);
   KEYWORD(sanitize_thread);
   KEYWORD(sanitize_memory);
+  KEYWORD(speculative_load_hardening);
   KEYWORD(swifterror);
   KEYWORD(swiftself);
   KEYWORD(uwtable);
@@ -738,6 +740,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(readOnly);
   KEYWORD(noRecurse);
   KEYWORD(returnDoesNotAlias);
+  KEYWORD(noInline);
   KEYWORD(calls);
   KEYWORD(callee);
   KEYWORD(hotness);
@@ -785,6 +788,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(info);
   KEYWORD(byte);
   KEYWORD(bit);
+  KEYWORD(varFlags);
 
 #undef KEYWORD
 
@@ -820,6 +824,8 @@ lltok::Kind LLLexer::LexIdentifier() {
     }                                                                          \
   } while (false)
 
+  INSTKEYWORD(fneg,  FNeg);
+
   INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
   INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
   INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
@@ -899,17 +905,27 @@ lltok::Kind LLLexer::LexIdentifier() {
     return lltok::DIFlag;
   }
 
+  if (Keyword.startswith("DISPFlag")) {
+    StrVal.assign(Keyword.begin(), Keyword.end());
+    return lltok::DISPFlag;
+  }
+
   if (Keyword.startswith("CSK_")) {
     StrVal.assign(Keyword.begin(), Keyword.end());
     return lltok::ChecksumKind;
   }
 
   if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
-      Keyword == "LineTablesOnly") {
+      Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
     StrVal.assign(Keyword.begin(), Keyword.end());
     return lltok::EmissionKind;
   }
 
+  if (Keyword == "GNU" || Keyword == "None" || Keyword == "Default") {
+    StrVal.assign(Keyword.begin(), Keyword.end());
+    return lltok::NameTableKind;
+  }
+
   // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
   // the CFE to avoid forcing it to deal with 64-bit numbers.
   if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index 7cf74dd16f5a..ee634505581e 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -1276,6 +1276,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       B.addAttribute(Attribute::SanitizeThread); break;
     case lltok::kw_sanitize_memory:
       B.addAttribute(Attribute::SanitizeMemory); break;
+    case lltok::kw_speculative_load_hardening:
+      B.addAttribute(Attribute::SpeculativeLoadHardening);
+      break;
     case lltok::kw_strictfp: B.addAttribute(Attribute::StrictFP); break;
     case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
     case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
@@ -1317,7 +1320,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
 static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy,
                                               const std::string &Name) {
   if (auto *FT = dyn_cast<FunctionType>(PTy->getElementType()))
-    return Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+    return Function::Create(FT, GlobalValue::ExternalWeakLinkage,
+                            PTy->getAddressSpace(), Name, M);
   else
     return new GlobalVariable(*M, PTy->getElementType(), false,
                               GlobalValue::ExternalWeakLinkage, nullptr, Name,
@@ -1325,11 +1329,33 @@ static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy,
                               PTy->getAddressSpace());
 }
 
+Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
+                                        Value *Val, bool IsCall) {
+  if (Val->getType() == Ty)
+    return Val;
+  // For calls we also accept variables in the program address space.
+  Type *SuggestedTy = Ty;
+  if (IsCall && isa<PointerType>(Ty)) {
+    Type *TyInProgAS = cast<PointerType>(Ty)->getElementType()->getPointerTo(
+        M->getDataLayout().getProgramAddressSpace());
+    SuggestedTy = TyInProgAS;
+    if (Val->getType() == TyInProgAS)
+      return Val;
+  }
+  if (Ty->isLabelTy())
+    Error(Loc, "'" + Name + "' is not a basic block");
+  else
+    Error(Loc, "'" + Name + "' defined with type '" +
+                   getTypeString(Val->getType()) + "' but expected '" +
+                   getTypeString(SuggestedTy) + "'");
+  return nullptr;
+}
+
 /// GetGlobalVal - Get a value with the specified name or ID, creating a
 /// forward reference record if needed.  This can return null if the value
 /// exists but does not have the right type.
 GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
-                                    LocTy Loc) {
+                                    LocTy Loc, bool IsCall) {
   PointerType *PTy = dyn_cast<PointerType>(Ty);
   if (!PTy) {
     Error(Loc, "global variable reference must have pointer type");
@@ -1349,12 +1375,9 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
   }
 
   // If we have the value in the symbol table or fwd-ref table, return it.
-  if (Val) {
-    if (Val->getType() == Ty) return Val;
-    Error(Loc, "'@" + Name + "' defined with type '" +
-          getTypeString(Val->getType()) + "'");
-    return nullptr;
-  }
+  if (Val)
+    return cast_or_null<GlobalValue>(
+        checkValidVariableType(Loc, "@" + Name, Ty, Val, IsCall));
 
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, Name);
@@ -1362,7 +1385,8 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
   return FwdVal;
 }
 
-GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
+                                    bool IsCall) {
   PointerType *PTy = dyn_cast<PointerType>(Ty);
   if (!PTy) {
     Error(Loc, "global variable reference must have pointer type");
@@ -1380,12 +1404,9 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
   }
 
   // If we have the value in the symbol table or fwd-ref table, return it.
-  if (Val) {
-    if (Val->getType() == Ty) return Val;
-    Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
-          getTypeString(Val->getType()) + "'");
-    return nullptr;
-  }
+  if (Val)
+    return cast_or_null<GlobalValue>(
+        checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val, IsCall));
 
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, "");
@@ -1500,8 +1521,8 @@ bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) {
 /// ParseOptionalAddrSpace
 ///   := /*empty*/
 ///   := 'addrspace' '(' uint32 ')'
-bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
-  AddrSpace = 0;
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS) {
+  AddrSpace = DefaultAS;
   if (!EatIfPresent(lltok::kw_addrspace))
     return false;
   return ParseToken(lltok::lparen, "expected '(' in address space") ||
@@ -1601,6 +1622,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_sanitize_hwaddress:
     case lltok::kw_sanitize_memory:
     case lltok::kw_sanitize_thread:
+    case lltok::kw_speculative_load_hardening:
     case lltok::kw_ssp:
     case lltok::kw_sspreq:
     case lltok::kw_sspstrong:
@@ -1697,6 +1719,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_sanitize_hwaddress:
     case lltok::kw_sanitize_memory:
     case lltok::kw_sanitize_thread:
+    case lltok::kw_speculative_load_hardening:
     case lltok::kw_ssp:
     case lltok::kw_sspreq:
     case lltok::kw_sspstrong:
@@ -1851,6 +1874,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
 ///   ::= 'arm_apcscc'
 ///   ::= 'arm_aapcscc'
 ///   ::= 'arm_aapcs_vfpcc'
+///   ::= 'aarch64_vector_pcs'
 ///   ::= 'msp430_intrcc'
 ///   ::= 'avr_intrcc'
 ///   ::= 'avr_signalcc'
@@ -1894,6 +1918,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
   case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
   case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+  case lltok::kw_aarch64_vector_pcs:CC = CallingConv::AArch64_VectorCall; break;
   case lltok::kw_msp430_intrcc:  CC = CallingConv::MSP430_INTR; break;
   case lltok::kw_avr_intrcc:     CC = CallingConv::AVR_INTR; break;
   case lltok::kw_avr_signalcc:   CC = CallingConv::AVR_SIGNAL; break;
@@ -2741,19 +2766,6 @@ bool LLParser::PerFunctionState::FinishFunction() {
   return false;
 }
 
-static bool isValidVariableType(Module *M, Type *Ty, Value *Val, bool IsCall) {
-  if (Val->getType() == Ty)
-    return true;
-  // For calls we also accept variables in the program address space
-  if (IsCall && isa<PointerType>(Ty)) {
-    Type *TyInProgAS = cast<PointerType>(Ty)->getElementType()->getPointerTo(
-        M->getDataLayout().getProgramAddressSpace());
-    if (Val->getType() == TyInProgAS)
-      return true;
-  }
-  return false;
-}
-
 /// GetVal - Get a value with the specified name or ID, creating a
 /// forward reference record if needed.  This can return null if the value
 /// exists but does not have the right type.
@@ -2771,16 +2783,8 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
   }
 
   // If we have the value in the symbol table or fwd-ref table, return it.
-  if (Val) {
-    if (isValidVariableType(P.M, Ty, Val, IsCall))
-      return Val;
-    if (Ty->isLabelTy())
-      P.Error(Loc, "'%" + Name + "' is not a basic block");
-    else
-      P.Error(Loc, "'%" + Name + "' defined with type '" +
-              getTypeString(Val->getType()) + "'");
-    return nullptr;
-  }
+  if (Val)
+    return P.checkValidVariableType(Loc, "%" + Name, Ty, Val, IsCall);
 
   // Don't make placeholders with invalid type.
   if (!Ty->isFirstClassType()) {
@@ -2814,16 +2818,8 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc,
   }
 
   // If we have the value in the symbol table or fwd-ref table, return it.
-  if (Val) {
-    if (isValidVariableType(P.M, Ty, Val, IsCall))
-      return Val;
-    if (Ty->isLabelTy())
-      P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
-    else
-      P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
-              getTypeString(Val->getType()) + "'");
-    return nullptr;
-  }
+  if (Val)
+    return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val, IsCall);
 
   if (!Ty->isFirstClassType()) {
     P.Error(Loc, "invalid use of a non-first-class type");
@@ -3299,7 +3295,31 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     ID.Kind = ValID::t_Constant;
     return false;
   }
-
+ 
+  // Unary Operators.
+  case lltok::kw_fneg: {
+    unsigned Opc = Lex.getUIntVal();
+    Constant *Val;
+    Lex.Lex();
+    if (ParseToken(lltok::lparen, "expected '(' in unary constantexpr") ||
+        ParseGlobalTypeAndValue(Val) ||
+        ParseToken(lltok::rparen, "expected ')' in unary constantexpr"))
+      return true;
+    
+    // Check that the type is valid for the operator.
+    switch (Opc) {
+    case Instruction::FNeg:
+      if (!Val->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "constexpr requires fp operands");
+      break;
+    default: llvm_unreachable("Unknown unary operator!");
+    }
+    unsigned Flags = 0;
+    Constant *C = ConstantExpr::get(Opc, Val, Flags);
+    ID.ConstantVal = C;
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_fadd:
@@ -3718,10 +3738,21 @@ struct EmissionKindField : public MDUnsignedField {
   EmissionKindField() : MDUnsignedField(0, DICompileUnit::LastEmissionKind) {}
 };
 
+struct NameTableKindField : public MDUnsignedField {
+  NameTableKindField()
+      : MDUnsignedField(
+            0, (unsigned)
+                   DICompileUnit::DebugNameTableKind::LastDebugNameTableKind) {}
+};
+
 struct DIFlagField : public MDFieldImpl<DINode::DIFlags> {
   DIFlagField() : MDFieldImpl(DINode::FlagZero) {}
 };
 
+struct DISPFlagField : public MDFieldImpl<DISubprogram::DISPFlags> {
+  DISPFlagField() : MDFieldImpl(DISubprogram::SPFlagZero) {}
+};
+
 struct MDSignedField : public MDFieldImpl<int64_t> {
   int64_t Min;
   int64_t Max;
@@ -3938,6 +3969,25 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result
 
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            NameTableKindField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::NameTableKind)
+    return TokError("expected nameTable kind");
+
+  auto Kind = DICompileUnit::getNameTableKind(Lex.getStrVal());
+  if (!Kind)
+    return TokError("invalid nameTable kind" + Twine(" '") + Lex.getStrVal() +
+                    "'");
+  assert(((unsigned)*Kind) <= Result.Max && "Expected valid nameTable kind");
+  Result.assign((unsigned)*Kind);
+  Lex.Lex();
+  return false;
+}
+
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
                             DwarfAttEncodingField &Result) {
   if (Lex.getKind() == lltok::APSInt)
     return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
@@ -3995,6 +4045,46 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
   return false;
 }
 
+/// DISPFlagField
+///  ::= uint32
+///  ::= DISPFlagVector
+///  ::= DISPFlagVector '|' DISPFlag* '|' uint32
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DISPFlagField &Result) {
+
+  // Parser for a single flag.
+  auto parseFlag = [&](DISubprogram::DISPFlags &Val) {
+    if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned()) {
+      uint32_t TempVal = static_cast<uint32_t>(Val);
+      bool Res = ParseUInt32(TempVal);
+      Val = static_cast<DISubprogram::DISPFlags>(TempVal);
+      return Res;
+    }
+
+    if (Lex.getKind() != lltok::DISPFlag)
+      return TokError("expected debug info flag");
+
+    Val = DISubprogram::getFlag(Lex.getStrVal());
+    if (!Val)
+      return TokError(Twine("invalid subprogram debug info flag '") +
+                      Lex.getStrVal() + "'");
+    Lex.Lex();
+    return false;
+  };
+
+  // Parse the flags and combine them together.
+  DISubprogram::DISPFlags Combined = DISubprogram::SPFlagZero;
+  do {
+    DISubprogram::DISPFlags Val;
+    if (parseFlag(Val))
+      return true;
+    Combined |= Val;
+  } while (EatIfPresent(lltok::bar));
+
+  Result.assign(Combined);
+  return false;
+}
+
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
                             MDSignedField &Result) {
@@ -4206,18 +4296,21 @@ bool LLParser::ParseSpecializedMDNode(MDNode *&N, bool IsDistinct) {
   (IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS)
 
 /// ParseDILocationFields:
-///   ::= !DILocation(line: 43, column: 8, scope: !5, inlinedAt: !6)
+///   ::= !DILocation(line: 43, column: 8, scope: !5, inlinedAt: !6,
+///   isImplicitCode: true)
 bool LLParser::ParseDILocation(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(column, ColumnField, );                                             \
   REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
-  OPTIONAL(inlinedAt, MDField, );
+  OPTIONAL(inlinedAt, MDField, );                                              \
+  OPTIONAL(isImplicitCode, MDBoolField, (false));
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(
-      DILocation, (Context, line.Val, column.Val, scope.Val, inlinedAt.Val));
+  Result =
+      GET_OR_DISTINCT(DILocation, (Context, line.Val, column.Val, scope.Val,
+                                   inlinedAt.Val, isImplicitCode.Val));
   return false;
 }
 
@@ -4281,19 +4374,21 @@ bool LLParser::ParseDIEnumerator(MDNode *&Result, bool IsDistinct) {
 }
 
 /// ParseDIBasicType:
-///   ::= !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32)
+///   ::= !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32,
+///                    encoding: DW_ATE_encoding, flags: 0)
 bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type));                     \
   OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
   OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));                           \
-  OPTIONAL(encoding, DwarfAttEncodingField, );
+  OPTIONAL(encoding, DwarfAttEncodingField, );                                 \
+  OPTIONAL(flags, DIFlagField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = GET_OR_DISTINCT(DIBasicType, (Context, tag.Val, name.Val, size.Val,
-                                         align.Val, encoding.Val));
+                                         align.Val, encoding.Val, flags.Val));
   return false;
 }
 
@@ -4446,7 +4541,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(dwoId, MDUnsignedField, );                                          \
   OPTIONAL(splitDebugInlining, MDBoolField, = true);                           \
   OPTIONAL(debugInfoForProfiling, MDBoolField, = false);                       \
-  OPTIONAL(gnuPubnames, MDBoolField, = false);
+  OPTIONAL(nameTableKind, NameTableKindField, );                               \
+  OPTIONAL(debugBaseAddress, MDBoolField, = false);
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
@@ -4454,7 +4550,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
       Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
       runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
       retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val,
-      splitDebugInlining.Val, debugInfoForProfiling.Val, gnuPubnames.Val);
+      splitDebugInlining.Val, debugInfoForProfiling.Val, nameTableKind.Val,
+      debugBaseAddress.Val);
   return false;
 }
 
@@ -4464,8 +4561,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
 ///                     isDefinition: true, scopeLine: 8, containingType: !3,
 ///                     virtuality: DW_VIRTUALTIY_pure_virtual,
 ///                     virtualIndex: 10, thisAdjustment: 4, flags: 11,
-///                     isOptimized: false, templateParams: !4, declaration: !5,
-///                     retainedNodes: !6, thrownTypes: !7)
+///                     spFlags: 10, isOptimized: false, templateParams: !4,
+///                     declaration: !5, retainedNodes: !6, thrownTypes: !7)
 bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
   auto Loc = Lex.getLoc();
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
@@ -4483,26 +4580,31 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(virtualIndex, MDUnsignedField, (0, UINT32_MAX));                    \
   OPTIONAL(thisAdjustment, MDSignedField, (0, INT32_MIN, INT32_MAX));          \
   OPTIONAL(flags, DIFlagField, );                                              \
+  OPTIONAL(spFlags, DISPFlagField, );                                          \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(unit, MDField, );                                                   \
   OPTIONAL(templateParams, MDField, );                                         \
   OPTIONAL(declaration, MDField, );                                            \
-  OPTIONAL(retainedNodes, MDField, );                                              \
+  OPTIONAL(retainedNodes, MDField, );                                          \
   OPTIONAL(thrownTypes, MDField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  if (isDefinition.Val && !IsDistinct)
+  // An explicit spFlags field takes precedence over individual fields in
+  // older IR versions.
+  DISubprogram::DISPFlags SPFlags =
+      spFlags.Seen ? spFlags.Val
+                   : DISubprogram::toSPFlags(isLocal.Val, isDefinition.Val,
+                                             isOptimized.Val, virtuality.Val);
+  if ((SPFlags & DISubprogram::SPFlagDefinition) && !IsDistinct)
     return Lex.Error(
         Loc,
-        "missing 'distinct', required for !DISubprogram when 'isDefinition'");
-
+        "missing 'distinct', required for !DISubprogram that is a Definition");
   Result = GET_OR_DISTINCT(
       DISubprogram,
       (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
-       type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val,
-       containingType.Val, virtuality.Val, virtualIndex.Val, thisAdjustment.Val,
-       flags.Val, isOptimized.Val, unit.Val, templateParams.Val,
+       type.Val, scopeLine.Val, containingType.Val, virtualIndex.Val,
+       thisAdjustment.Val, flags.Val, SPFlags, unit.Val, templateParams.Val,
        declaration.Val, retainedNodes.Val, thrownTypes.Val));
   return false;
 }
@@ -4637,7 +4739,8 @@ bool LLParser::ParseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
 /// ParseDIGlobalVariable:
 ///   ::= !DIGlobalVariable(scope: !0, name: "foo", linkageName: "foo",
 ///                         file: !1, line: 7, type: !2, isLocal: false,
-///                         isDefinition: true, declaration: !3, align: 8)
+///                         isDefinition: true, templateParams: !3,
+///                         declaration: !4, align: 8)
 bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(name, MDStringField, (/* AllowEmpty */ false));                     \
@@ -4648,15 +4751,17 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(type, MDField, );                                                   \
   OPTIONAL(isLocal, MDBoolField, );                                            \
   OPTIONAL(isDefinition, MDBoolField, (true));                                 \
+  OPTIONAL(templateParams, MDField, );                                         \
   OPTIONAL(declaration, MDField, );                                            \
   OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(DIGlobalVariable,
-                           (Context, scope.Val, name.Val, linkageName.Val,
-                            file.Val, line.Val, type.Val, isLocal.Val,
-                            isDefinition.Val, declaration.Val, align.Val));
+  Result =
+      GET_OR_DISTINCT(DIGlobalVariable,
+                      (Context, scope.Val, name.Val, linkageName.Val, file.Val,
+                       line.Val, type.Val, isLocal.Val, isDefinition.Val,
+                       declaration.Val, templateParams.Val, align.Val));
   return false;
 }
 
@@ -4912,10 +5017,10 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
     return false;
   }
   case ValID::t_GlobalName:
-    V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+    V = GetGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall);
     return V == nullptr;
   case ValID::t_GlobalID:
-    V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+    V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall);
     return V == nullptr;
   case ValID::t_APSInt:
     if (!Ty->isIntegerTy())
@@ -5058,8 +5163,8 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalPreemptionSpecifier OptionalVisibility
 ///       OptionalCallingConv OptRetAttrs OptUnnamedAddr Type GlobalName
-///       '(' ArgList ')' OptFuncAttrs OptSection OptionalAlign OptGC
-///       OptionalPrefix OptionalPrologue OptPersonalityFn
+///       '(' ArgList ')' OptAddrSpace OptFuncAttrs OptSection OptionalAlign
+///       OptGC OptionalPrefix OptionalPrologue OptPersonalityFn
 bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Parse the linkage.
   LocTy LinkageLoc = Lex.getLoc();
@@ -5137,6 +5242,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   unsigned Alignment;
   std::string GC;
   GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
+  unsigned AddrSpace = 0;
   Constant *Prefix = nullptr;
   Constant *Prologue = nullptr;
   Constant *PersonalityFn = nullptr;
@@ -5144,6 +5250,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   if (ParseArgumentList(ArgList, isVarArg) ||
       ParseOptionalUnnamedAddr(UnnamedAddr) ||
+      ParseOptionalProgramAddrSpace(AddrSpace) ||
       ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
                                  BuiltinLoc) ||
       (EatIfPresent(lltok::kw_section) &&
@@ -5188,7 +5295,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   FunctionType *FT =
     FunctionType::get(RetType, ParamTypeList, isVarArg);
-  PointerType *PFT = PointerType::getUnqual(FT);
+  PointerType *PFT = PointerType::get(FT, AddrSpace);
 
   Fn = nullptr;
   if (!FunctionName.empty()) {
@@ -5202,8 +5309,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
                      "function as global value!");
       if (Fn->getType() != PFT)
         return Error(FRVI->second.second, "invalid forward reference to "
-                     "function '" + FunctionName + "' with wrong type!");
-
+                     "function '" + FunctionName + "' with wrong type: "
+                     "expected '" + getTypeString(PFT) + "' but was '" +
+                     getTypeString(Fn->getType()) + "'");
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
       // Reject redefinitions.
@@ -5221,16 +5329,21 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       Fn = cast<Function>(I->second.first);
       if (Fn->getType() != PFT)
         return Error(NameLoc, "type of definition and forward reference of '@" +
-                     Twine(NumberedVals.size()) + "' disagree");
+                     Twine(NumberedVals.size()) + "' disagree: "
+                     "expected '" + getTypeString(PFT) + "' but was '" +
+                     getTypeString(Fn->getType()) + "'");
       ForwardRefValIDs.erase(I);
     }
   }
 
   if (!Fn)
-    Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+    Fn = Function::Create(FT, GlobalValue::ExternalLinkage, AddrSpace,
+                          FunctionName, M);
   else // Move the forward-reference to the correct spot in the module.
     M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
 
+  assert(Fn->getAddressSpace() == AddrSpace && "Created function in wrong AS");
+
   if (FunctionName.empty())
     NumberedVals.push_back(Fn);
 
@@ -5419,7 +5532,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
 
     // Set the name on the instruction.
     if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
-  } while (!isa<TerminatorInst>(Inst));
+  } while (!Inst->isTerminator());
 
   return false;
 }
@@ -5454,6 +5567,16 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
   case lltok::kw_catchpad:    return ParseCatchPad(Inst, PFS);
   case lltok::kw_cleanuppad:  return ParseCleanupPad(Inst, PFS);
+  // Unary Operators.
+  case lltok::kw_fneg: {
+    FastMathFlags FMF = EatFastMathFlagsIfPresent();
+    int Res = ParseUnaryOp(Inst, PFS, KeywordVal, 2);
+    if (Res != 0)
+      return Res;
+    if (FMF.any())
+      Inst->setFastMathFlags(FMF);
+    return false;
+  }
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
@@ -5749,6 +5872,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   std::vector<unsigned> FwdRefAttrGrps;
   LocTy NoBuiltinLoc;
   unsigned CC;
+  unsigned InvokeAddrSpace;
   Type *RetType = nullptr;
   LocTy RetTypeLoc;
   ValID CalleeID;
@@ -5757,6 +5881,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
   BasicBlock *NormalBB, *UnwindBB;
   if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
+      ParseOptionalProgramAddrSpace(InvokeAddrSpace) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
       ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
@@ -5788,8 +5913,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
-                          /*IsCall=*/true))
+  if (ConvertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID,
+                          Callee, &PFS, /*IsCall=*/true))
     return true;
 
   // Set up the Attribute for the function.
@@ -6024,6 +6149,43 @@ bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
 }
 
 //===----------------------------------------------------------------------===//
+// Unary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseUnaryOp
+///  ::= UnaryOp TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed.  If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
+                            unsigned Opc, unsigned OperandType) {
+  LocTy Loc; Value *LHS;
+  if (ParseTypeAndValue(LHS, Loc, PFS))
+    return true;
+
+  bool Valid;
+  switch (OperandType) {
+  default: llvm_unreachable("Unknown operand type!");
+  case 0: // int or FP.
+    Valid = LHS->getType()->isIntOrIntVectorTy() ||
+            LHS->getType()->isFPOrFPVectorTy();
+    break;
+  case 1: 
+    Valid = LHS->getType()->isIntOrIntVectorTy(); 
+    break;
+  case 2: 
+    Valid = LHS->getType()->isFPOrFPVectorTy(); 
+    break;
+  }
+
+  if (!Valid)
+    return Error(Loc, "invalid operand type for instruction");
+
+  Inst = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
 // Binary Operators.
 //===----------------------------------------------------------------------===//
 
@@ -6332,6 +6494,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   AttrBuilder RetAttrs, FnAttrs;
   std::vector<unsigned> FwdRefAttrGrps;
   LocTy BuiltinLoc;
+  unsigned CallAddrSpace;
   unsigned CC;
   Type *RetType = nullptr;
   LocTy RetTypeLoc;
@@ -6348,6 +6511,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   FastMathFlags FMF = EatFastMathFlagsIfPresent();
 
   if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
+      ParseOptionalProgramAddrSpace(CallAddrSpace) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) ||
       ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
@@ -6380,8 +6544,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
-                          /*IsCall=*/true))
+  if (ConvertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee,
+                          &PFS, /*IsCall=*/true))
     return true;
 
   // Set up the Attribute for the function.
@@ -6685,8 +6849,13 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(PtrLoc, "atomicrmw operand must be a pointer");
   if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
     return Error(ValLoc, "atomicrmw value and pointer type do not match");
-  if (!Val->getType()->isIntegerTy())
-    return Error(ValLoc, "atomicrmw operand must be an integer");
+
+  if (!Val->getType()->isIntegerTy()) {
+    return Error(ValLoc, "atomicrmw " +
+                 AtomicRMWInst::getOperationName(Operation) +
+                 " operand must be an integer");
+  }
+
   unsigned Size = Val->getType()->getPrimitiveSizeInBits();
   if (Size < 8 || (Size & (Size - 1)))
     return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
@@ -7350,8 +7519,14 @@ bool LLParser::ParseArgs(std::vector<uint64_t> &Args) {
   return false;
 }
 
-static ValueInfo EmptyVI =
-    ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8);
+static const auto FwdVIRef = (GlobalValueSummaryMapTy::value_type *)-8;
+
+static void resolveFwdRef(ValueInfo *Fwd, ValueInfo &Resolved) {
+  bool ReadOnly = Fwd->isReadOnly();
+  *Fwd = Resolved;
+  if (ReadOnly)
+    Fwd->setReadOnly();
+}
 
 /// Stores the given Name/GUID and associated summary into the Index.
 /// Also updates any forward references to the associated entry ID.
@@ -7387,9 +7562,9 @@ void LLParser::AddGlobalValueToIndex(
   auto FwdRefVIs = ForwardRefValueInfos.find(ID);
   if (FwdRefVIs != ForwardRefValueInfos.end()) {
     for (auto VIRef : FwdRefVIs->second) {
-      assert(*VIRef.first == EmptyVI &&
+      assert(VIRef.first->getRef() == FwdVIRef &&
              "Forward referenced ValueInfo expected to be empty");
-      *VIRef.first = VI;
+      resolveFwdRef(VIRef.first, VI);
     }
     ForwardRefValueInfos.erase(FwdRefVIs);
   }
@@ -7552,8 +7727,8 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
     return true;
 
   auto FS = llvm::make_unique<FunctionSummary>(
-      GVFlags, InstCount, FFlags, std::move(Refs), std::move(Calls),
-      std::move(TypeIdInfo.TypeTests),
+      GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs),
+      std::move(Calls), std::move(TypeIdInfo.TypeTests),
       std::move(TypeIdInfo.TypeTestAssumeVCalls),
       std::move(TypeIdInfo.TypeCheckedLoadVCalls),
       std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
@@ -7579,11 +7754,14 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
   GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
       /*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
       /*Live=*/false, /*IsLocal=*/false);
+  GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false);
   std::vector<ValueInfo> Refs;
   if (ParseToken(lltok::colon, "expected ':' here") ||
       ParseToken(lltok::lparen, "expected '(' here") ||
       ParseModuleReference(ModulePath) ||
-      ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags))
+      ParseToken(lltok::comma, "expected ',' here") || ParseGVFlags(GVFlags) ||
+      ParseToken(lltok::comma, "expected ',' here") ||
+      ParseGVarFlags(GVarFlags))
     return true;
 
   // Parse optional refs field
@@ -7595,7 +7773,8 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
   if (ParseToken(lltok::rparen, "expected ')' here"))
     return true;
 
-  auto GS = llvm::make_unique<GlobalVarSummary>(GVFlags, std::move(Refs));
+  auto GS =
+      llvm::make_unique<GlobalVarSummary>(GVFlags, GVarFlags, std::move(Refs));
 
   GS->setModulePath(ModulePath);
 
@@ -7640,7 +7819,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
   AS->setModulePath(ModulePath);
 
   // Record forward reference if the aliasee is not parsed yet.
-  if (AliaseeVI == EmptyVI) {
+  if (AliaseeVI.getRef() == FwdVIRef) {
     auto FwdRef = ForwardRefAliasees.insert(
         std::make_pair(GVId, std::vector<std::pair<AliasSummary *, LocTy>>()));
     FwdRef.first->second.push_back(std::make_pair(AS.get(), Loc));
@@ -7667,6 +7846,7 @@ bool LLParser::ParseFlag(unsigned &Val) {
 ///   := 'funcFlags' ':' '(' ['readNone' ':' Flag]?
 ///        [',' 'readOnly' ':' Flag]? [',' 'noRecurse' ':' Flag]?
 ///        [',' 'returnDoesNotAlias' ':' Flag]? ')'
+///        [',' 'noInline' ':' Flag]? ')'
 bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
   assert(Lex.getKind() == lltok::kw_funcFlags);
   Lex.Lex();
@@ -7702,6 +7882,12 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
         return true;
       FFlags.ReturnDoesNotAlias = Val;
       break;
+    case lltok::kw_noInline:
+      Lex.Lex();
+      if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Val))
+        return true;
+      FFlags.NoInline = Val;
+      break;
     default:
       return Error(Lex.getLoc(), "expected function flag type");
     }
@@ -7755,7 +7941,7 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
     // Keep track of the Call array index needing a forward reference.
     // We will save the location of the ValueInfo needing an update, but
     // can only do so once the std::vector is finalized.
-    if (VI == EmptyVI)
+    if (VI.getRef() == FwdVIRef)
       IdToIndexMap[GVId].push_back(std::make_pair(Calls.size(), Loc));
     Calls.push_back(FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF)});
 
@@ -7767,7 +7953,7 @@ bool LLParser::ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
   // of any forward GV references that need updating later.
   for (auto I : IdToIndexMap) {
     for (auto P : I.second) {
-      assert(Calls[P.first].first == EmptyVI &&
+      assert(Calls[P.first].first.getRef() == FwdVIRef &&
              "Forward referenced ValueInfo expected to be empty");
       auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
           I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
@@ -7818,28 +8004,42 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
       ParseToken(lltok::lparen, "expected '(' in refs"))
     return true;
 
-  IdToIndexMapType IdToIndexMap;
-  // Parse each ref edge
-  do {
+  struct ValueContext {
     ValueInfo VI;
-    LocTy Loc = Lex.getLoc();
     unsigned GVId;
-    if (ParseGVReference(VI, GVId))
+    LocTy Loc;
+  };
+  std::vector<ValueContext> VContexts;
+  // Parse each ref edge
+  do {
+    ValueContext VC;
+    VC.Loc = Lex.getLoc();
+    if (ParseGVReference(VC.VI, VC.GVId))
       return true;
+    VContexts.push_back(VC);
+  } while (EatIfPresent(lltok::comma));
+
+  // Sort value contexts so that ones with readonly ValueInfo are at the end
+  // of VContexts vector. This is needed to match immutableRefCount() behavior.
+  llvm::sort(VContexts, [](const ValueContext &VC1, const ValueContext &VC2) {
+    return VC1.VI.isReadOnly() < VC2.VI.isReadOnly();
+  });
 
+  IdToIndexMapType IdToIndexMap;
+  for (auto &VC : VContexts) {
     // Keep track of the Refs array index needing a forward reference.
     // We will save the location of the ValueInfo needing an update, but
     // can only do so once the std::vector is finalized.
-    if (VI == EmptyVI)
-      IdToIndexMap[GVId].push_back(std::make_pair(Refs.size(), Loc));
-    Refs.push_back(VI);
-  } while (EatIfPresent(lltok::comma));
+    if (VC.VI.getRef() == FwdVIRef)
+      IdToIndexMap[VC.GVId].push_back(std::make_pair(Refs.size(), VC.Loc));
+    Refs.push_back(VC.VI);
+  }
 
   // Now that the Refs vector is finalized, it is safe to save the locations
   // of any forward GV references that need updating later.
   for (auto I : IdToIndexMap) {
     for (auto P : I.second) {
-      assert(Refs[P.first] == EmptyVI &&
+      assert(Refs[P.first].getRef() == FwdVIRef &&
              "Forward referenced ValueInfo expected to be empty");
       auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
           I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
@@ -8027,12 +8227,18 @@ bool LLParser::ParseConstVCallList(
 }
 
 /// ConstVCall
-///   ::= VFuncId, Args
+///   ::= '(' VFuncId ',' Args ')'
 bool LLParser::ParseConstVCall(FunctionSummary::ConstVCall &ConstVCall,
                                IdToIndexMapType &IdToIndexMap, unsigned Index) {
-  if (ParseVFuncId(ConstVCall.VFunc, IdToIndexMap, Index) ||
-      ParseToken(lltok::comma, "expected ',' here") ||
-      ParseArgs(ConstVCall.Args))
+  if (ParseToken(lltok::lparen, "expected '(' here") ||
+      ParseVFuncId(ConstVCall.VFunc, IdToIndexMap, Index))
+    return true;
+
+  if (EatIfPresent(lltok::comma))
+    if (ParseArgs(ConstVCall.Args))
+      return true;
+
+  if (ParseToken(lltok::rparen, "expected ')' here"))
     return true;
 
   return false;
@@ -8119,6 +8325,27 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
   return false;
 }
 
+/// GVarFlags
+///   ::= 'varFlags' ':' '(' 'readonly' ':' Flag ')'
+bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
+  assert(Lex.getKind() == lltok::kw_varFlags);
+  Lex.Lex();
+
+  unsigned Flag;
+  if (ParseToken(lltok::colon, "expected ':' here") ||
+      ParseToken(lltok::lparen, "expected '(' here") ||
+      ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||
+      ParseToken(lltok::colon, "expected ':' here"))
+    return true;
+
+  ParseFlag(Flag);
+  GVarFlags.ReadOnly = Flag;
+
+  if (ParseToken(lltok::rparen, "expected ')' here"))
+    return true;
+  return false;
+}
+
 /// ModuleReference
 ///   ::= 'module' ':' UInt
 bool LLParser::ParseModuleReference(StringRef &ModulePath) {
@@ -8139,18 +8366,20 @@ bool LLParser::ParseModuleReference(StringRef &ModulePath) {
 /// GVReference
 ///   ::= SummaryID
 bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
+  bool ReadOnly = EatIfPresent(lltok::kw_readonly);
   if (ParseToken(lltok::SummaryID, "expected GV ID"))
     return true;
 
   GVId = Lex.getUIntVal();
-
   // Check if we already have a VI for this GV
   if (GVId < NumberedValueInfos.size()) {
-    assert(NumberedValueInfos[GVId] != EmptyVI);
+    assert(NumberedValueInfos[GVId].getRef() != FwdVIRef);
     VI = NumberedValueInfos[GVId];
   } else
     // We will create a forward reference to the stored location.
-    VI = EmptyVI;
+    VI = ValueInfo(false, FwdVIRef);
 
+  if (ReadOnly)
+    VI.setReadOnly();
   return false;
 }
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index 811f96418fa5..5a0fc297265d 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -202,8 +202,9 @@ namespace llvm {
     /// GetGlobalVal - Get a value with the specified name or ID, creating a
     /// forward reference record if needed.  This can return null if the value
     /// exists but does not have the right type.
-    GlobalValue *GetGlobalVal(const std::string &Name, Type *Ty, LocTy Loc);
-    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
+    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
+                              bool IsCall);
+    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
 
     /// Get a Comdat with the specified name, creating a forward reference
     /// record if needed.
@@ -267,7 +268,11 @@ namespace llvm {
     bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalUnnamedAddr(GlobalVariable::UnnamedAddr &UnnamedAddr);
-    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+    bool ParseOptionalAddrSpace(unsigned &AddrSpace, unsigned DefaultAS = 0);
+    bool ParseOptionalProgramAddrSpace(unsigned &AddrSpace) {
+      return ParseOptionalAddrSpace(
+          AddrSpace, M->getDataLayout().getProgramAddressSpace());
+    };
     bool ParseOptionalParamAttrs(AttrBuilder &B);
     bool ParseOptionalReturnAttrs(AttrBuilder &B);
     bool ParseOptionalLinkage(unsigned &Res, bool &HasLinkage,
@@ -347,6 +352,7 @@ namespace llvm {
     bool ParseVariableSummary(std::string Name, GlobalValue::GUID, unsigned ID);
     bool ParseAliasSummary(std::string Name, GlobalValue::GUID, unsigned ID);
     bool ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags);
+    bool ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags);
     bool ParseOptionalFFlags(FunctionSummary::FFlags &FFlags);
     bool ParseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls);
     bool ParseHotness(CalleeInfo::HotnessType &Hotness);
@@ -448,6 +454,9 @@ namespace llvm {
     bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
                              PerFunctionState *PFS, bool IsCall);
 
+    Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
+                                  Value *Val, bool IsCall);
+
     bool parseConstantValue(Type *Ty, Constant *&C);
     bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
     bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
@@ -563,6 +572,8 @@ namespace llvm {
     bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
 
+    bool ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
+                      unsigned OperandType);
     bool ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
                          unsigned OperandType);
     bool ParseLogical(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index 8d8c7e99656e..c2e2795a9467 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -139,6 +139,7 @@ enum Kind {
   kw_arm_apcscc,
   kw_arm_aapcscc,
   kw_arm_aapcs_vfpcc,
+  kw_aarch64_vector_pcs,
   kw_msp430_intrcc,
   kw_avr_intrcc,
   kw_avr_signalcc,
@@ -219,6 +220,7 @@ enum Kind {
   kw_sret,
   kw_sanitize_thread,
   kw_sanitize_memory,
+  kw_speculative_load_hardening,
   kw_strictfp,
   kw_swifterror,
   kw_swiftself,
@@ -268,6 +270,7 @@ enum Kind {
   kw_umin,
 
   // Instruction Opcodes (Opcode in UIntVal).
+  kw_fneg,
   kw_add,
   kw_fadd,
   kw_sub,
@@ -367,6 +370,7 @@ enum Kind {
   kw_readOnly,
   kw_noRecurse,
   kw_returnDoesNotAlias,
+  kw_noInline,
   kw_calls,
   kw_callee,
   kw_hotness,
@@ -414,6 +418,7 @@ enum Kind {
   kw_info,
   kw_byte,
   kw_bit,
+  kw_varFlags,
 
   // Unsigned Valued tokens (UIntVal).
   GlobalID,   // @42
@@ -434,8 +439,10 @@ enum Kind {
   DwarfLang,        // DW_LANG_foo
   DwarfCC,          // DW_CC_foo
   EmissionKind,     // lineTablesOnly
+  NameTableKind,    // GNU
   DwarfOp,          // DW_OP_foo
   DIFlag,           // DIFlagFoo
+  DISPFlag,         // DISPFlagFoo
   DwarfMacinfo,     // DW_MACINFO_foo
   ChecksumKind,     // CSK_foo
 
diff --git a/contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
new file mode 100644
index 000000000000..b789f646b5f6
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -0,0 +1,324 @@
+//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implements a verifier for AMDGPU HSA metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAMD {
+namespace V3 {
+
+bool MetadataVerifier::verifyScalar(
+    msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
+    function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
+  auto ScalarPtr = dyn_cast<msgpack::ScalarNode>(&Node);
+  if (!ScalarPtr)
+    return false;
+  auto &Scalar = *ScalarPtr;
+  // Do not output extraneous tags for types we know from the spec.
+  Scalar.IgnoreTag = true;
+  if (Scalar.getScalarKind() != SKind) {
+    if (Strict)
+      return false;
+    // If we are not strict, we interpret string values as "implicitly typed"
+    // and attempt to coerce them to the expected type here.
+    if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
+      return false;
+    std::string StringValue = Scalar.getString();
+    Scalar.setScalarKind(SKind);
+    if (Scalar.inputYAML(StringValue) != StringRef())
+      return false;
+  }
+  if (verifyValue)
+    return verifyValue(Scalar);
+  return true;
+}
+
+bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
+  if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
+    if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
+      return false;
+  return true;
+}
+
+bool MetadataVerifier::verifyArray(
+    msgpack::Node &Node, function_ref<bool(msgpack::Node &)> verifyNode,
+    Optional<size_t> Size) {
+  auto ArrayPtr = dyn_cast<msgpack::ArrayNode>(&Node);
+  if (!ArrayPtr)
+    return false;
+  auto &Array = *ArrayPtr;
+  if (Size && Array.size() != *Size)
+    return false;
+  for (auto &Item : Array)
+    if (!verifyNode(*Item.get()))
+      return false;
+
+  return true;
+}
+
+bool MetadataVerifier::verifyEntry(
+    msgpack::MapNode &MapNode, StringRef Key, bool Required,
+    function_ref<bool(msgpack::Node &)> verifyNode) {
+  auto Entry = MapNode.find(Key);
+  if (Entry == MapNode.end())
+    return !Required;
+  return verifyNode(*Entry->second.get());
+}
+
+bool MetadataVerifier::verifyScalarEntry(
+    msgpack::MapNode &MapNode, StringRef Key, bool Required,
+    msgpack::ScalarNode::ScalarKind SKind,
+    function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
+  return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
+    return verifyScalar(Node, SKind, verifyValue);
+  });
+}
+
+bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
+                                          StringRef Key, bool Required) {
+  return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
+    return verifyInteger(Node);
+  });
+}
+
+bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
+  auto ArgsMapPtr = dyn_cast<msgpack::MapNode>(&Node);
+  if (!ArgsMapPtr)
+    return false;
+  auto &ArgsMap = *ArgsMapPtr;
+
+  if (!verifyScalarEntry(ArgsMap, ".name", false,
+                         msgpack::ScalarNode::SK_String))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".type_name", false,
+                         msgpack::ScalarNode::SK_String))
+    return false;
+  if (!verifyIntegerEntry(ArgsMap, ".size", true))
+    return false;
+  if (!verifyIntegerEntry(ArgsMap, ".offset", true))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
+                         msgpack::ScalarNode::SK_String,
+                         [](msgpack::ScalarNode &SNode) {
+                           return StringSwitch<bool>(SNode.getString())
+                               .Case("by_value", true)
+                               .Case("global_buffer", true)
+                               .Case("dynamic_shared_pointer", true)
+                               .Case("sampler", true)
+                               .Case("image", true)
+                               .Case("pipe", true)
+                               .Case("queue", true)
+                               .Case("hidden_global_offset_x", true)
+                               .Case("hidden_global_offset_y", true)
+                               .Case("hidden_global_offset_z", true)
+                               .Case("hidden_none", true)
+                               .Case("hidden_printf_buffer", true)
+                               .Case("hidden_default_queue", true)
+                               .Case("hidden_completion_action", true)
+                               .Default(false);
+                         }))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".value_type", true,
+                         msgpack::ScalarNode::SK_String,
+                         [](msgpack::ScalarNode &SNode) {
+                           return StringSwitch<bool>(SNode.getString())
+                               .Case("struct", true)
+                               .Case("i8", true)
+                               .Case("u8", true)
+                               .Case("i16", true)
+                               .Case("u16", true)
+                               .Case("f16", true)
+                               .Case("i32", true)
+                               .Case("u32", true)
+                               .Case("f32", true)
+                               .Case("i64", true)
+                               .Case("u64", true)
+                               .Case("f64", true)
+                               .Default(false);
+                         }))
+    return false;
+  if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".address_space", false,
+                         msgpack::ScalarNode::SK_String,
+                         [](msgpack::ScalarNode &SNode) {
+                           return StringSwitch<bool>(SNode.getString())
+                               .Case("private", true)
+                               .Case("global", true)
+                               .Case("constant", true)
+                               .Case("local", true)
+                               .Case("generic", true)
+                               .Case("region", true)
+                               .Default(false);
+                         }))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".access", false,
+                         msgpack::ScalarNode::SK_String,
+                         [](msgpack::ScalarNode &SNode) {
+                           return StringSwitch<bool>(SNode.getString())
+                               .Case("read_only", true)
+                               .Case("write_only", true)
+                               .Case("read_write", true)
+                               .Default(false);
+                         }))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
+                         msgpack::ScalarNode::SK_String,
+                         [](msgpack::ScalarNode &SNode) {
+                           return StringSwitch<bool>(SNode.getString())
+                               .Case("read_only", true)
+                               .Case("write_only", true)
+                               .Case("read_write", true)
+                               .Default(false);
+                         }))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".is_const", false,
+                         msgpack::ScalarNode::SK_Boolean))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
+                         msgpack::ScalarNode::SK_Boolean))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
+                         msgpack::ScalarNode::SK_Boolean))
+    return false;
+  if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
+                         msgpack::ScalarNode::SK_Boolean))
+    return false;
+
+  return true;
+}
+
+bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
+  auto KernelMapPtr = dyn_cast<msgpack::MapNode>(&Node);
+  if (!KernelMapPtr)
+    return false;
+  auto &KernelMap = *KernelMapPtr;
+
+  if (!verifyScalarEntry(KernelMap, ".name", true,
+                         msgpack::ScalarNode::SK_String))
+    return false;
+  if (!verifyScalarEntry(KernelMap, ".symbol", true,
+                         msgpack::ScalarNode::SK_String))
+    return false;
+  if (!verifyScalarEntry(KernelMap, ".language", false,
+                         msgpack::ScalarNode::SK_String,
+                         [](msgpack::ScalarNode &SNode) {
+                           return StringSwitch<bool>(SNode.getString())
+                               .Case("OpenCL C", true)
+                               .Case("OpenCL C++", true)
+                               .Case("HCC", true)
+                               .Case("HIP", true)
+                               .Case("OpenMP", true)
+                               .Case("Assembler", true)
+                               .Default(false);
+                         }))
+    return false;
+  if (!verifyEntry(
+          KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
+            return verifyArray(
+                Node,
+                [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+          }))
+    return false;
+  if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
+        return verifyArray(Node, [this](msgpack::Node &Node) {
+          return verifyKernelArgs(Node);
+        });
+      }))
+    return false;
+  if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
+                   [this](msgpack::Node &Node) {
+                     return verifyArray(Node,
+                                        [this](msgpack::Node &Node) {
+                                          return verifyInteger(Node);
+                                        },
+                                        3);
+                   }))
+    return false;
+  if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
+                   [this](msgpack::Node &Node) {
+                     return verifyArray(Node,
+                                        [this](msgpack::Node &Node) {
+                                          return verifyInteger(Node);
+                                        },
+                                        3);
+                   }))
+    return false;
+  if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
+                         msgpack::ScalarNode::SK_String))
+    return false;
+  if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
+                         msgpack::ScalarNode::SK_String))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".group_segment_fixed_size", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".sgpr_count", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".vgpr_count", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".max_flat_workgroup_size", true))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".sgpr_spill_count", false))
+    return false;
+  if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false))
+    return false;
+
+  return true;
+}
+
+bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
+  auto RootMapPtr = dyn_cast<msgpack::MapNode>(&HSAMetadataRoot);
+  if (!RootMapPtr)
+    return false;
+  auto &RootMap = *RootMapPtr;
+
+  if (!verifyEntry(
+          RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
+            return verifyArray(
+                Node,
+                [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+          }))
+    return false;
+  if (!verifyEntry(
+          RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
+            return verifyArray(Node, [this](msgpack::Node &Node) {
+              return verifyScalar(Node, msgpack::ScalarNode::SK_String);
+            });
+          }))
+    return false;
+  if (!verifyEntry(RootMap, "amdhsa.kernels", true,
+                   [this](msgpack::Node &Node) {
+                     return verifyArray(Node, [this](msgpack::Node &Node) {
+                       return verifyKernel(Node);
+                     });
+                   }))
+    return false;
+
+  return true;
+}
+
+} // end namespace V3
+} // end namespace HSAMD
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/contrib/llvm/lib/BinaryFormat/Dwarf.cpp b/contrib/llvm/lib/BinaryFormat/Dwarf.cpp
index 5984de73ae63..46f8056774b7 100644
--- a/contrib/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/contrib/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -300,7 +301,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
   switch (Language) {
   default:
     return StringRef();
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   case DW_LANG_##NAME:                                                         \
     return "DW_LANG_" #NAME;
 #include "llvm/BinaryFormat/Dwarf.def"
@@ -309,7 +310,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
 
 unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
   return StringSwitch<unsigned>(LanguageString)
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
 #include "llvm/BinaryFormat/Dwarf.def"
       .Default(0);
@@ -319,7 +320,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) {
   switch (Lang) {
   default:
     return 0;
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   case DW_LANG_##NAME:                                                         \
     return VERSION;
 #include "llvm/BinaryFormat/Dwarf.def"
@@ -330,13 +331,24 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) {
   switch (Lang) {
   default:
     return 0;
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   case DW_LANG_##NAME:                                                         \
     return DWARF_VENDOR_##VENDOR;
 #include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
+Optional<unsigned> llvm::dwarf::LanguageLowerBound(dwarf::SourceLanguage Lang) {
+  switch (Lang) {
+  default:
+    return None;
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
+  case DW_LANG_##NAME:                                                         \
+    return LOWER_BOUND;
+#include "llvm/BinaryFormat/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::CaseString(unsigned Case) {
   switch (Case) {
   case DW_ID_case_sensitive:
@@ -455,14 +467,32 @@ StringRef llvm::dwarf::RangeListEncodingString(unsigned Encoding) {
   }
 }
 
-StringRef llvm::dwarf::CallFrameString(unsigned Encoding) {
+StringRef llvm::dwarf::CallFrameString(unsigned Encoding,
+    Triple::ArchType Arch) {
+  assert(Arch != llvm::Triple::ArchType::UnknownArch);
+#define SELECT_AARCH64 (Arch == llvm::Triple::aarch64_be || Arch == llvm::Triple::aarch64)
+#define SELECT_MIPS64 Arch == llvm::Triple::mips64
+#define SELECT_SPARC (Arch == llvm::Triple::sparc || Arch == llvm::Triple::sparcv9)
+#define SELECT_X86 (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+#define HANDLE_DW_CFA(ID, NAME)
+#define HANDLE_DW_CFA_PRED(ID, NAME, PRED) \
+  if (ID == Encoding && PRED) \
+    return "DW_CFA_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+
   switch (Encoding) {
   default:
     return StringRef();
+#define HANDLE_DW_CFA_PRED(ID, NAME, PRED)
 #define HANDLE_DW_CFA(ID, NAME)                                                \
   case DW_CFA_##NAME:                                                          \
     return "DW_CFA_" #NAME;
 #include "llvm/BinaryFormat/Dwarf.def"
+
+#undef SELECT_X86
+#undef SELECT_SPARC
+#undef SELECT_MIPS64
+#undef SELECT_AARCH64
   }
 }
 
diff --git a/contrib/llvm/lib/BinaryFormat/Magic.cpp b/contrib/llvm/lib/BinaryFormat/Magic.cpp
index 5a339583fca1..78efa6ec87be 100644
--- a/contrib/llvm/lib/BinaryFormat/Magic.cpp
+++ b/contrib/llvm/lib/BinaryFormat/Magic.cpp
@@ -206,7 +206,7 @@ file_magic llvm::identify_magic(StringRef Magic) {
 }
 
 std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
-  auto FileOrError = MemoryBuffer::getFile(Path);
+  auto FileOrError = MemoryBuffer::getFile(Path, -1LL, false);
   if (!FileOrError)
     return FileOrError.getError();
 
diff --git a/contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp b/contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp
new file mode 100644
index 000000000000..b510fdba9608
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/MsgPackReader.cpp
@@ -0,0 +1,255 @@
+//===- MsgPackReader.cpp - Simple MsgPack reader ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file
+///  This file implements a MessagePack reader.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include "llvm/BinaryFormat/MsgPack.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace msgpack;
+
+Reader::Reader(MemoryBufferRef InputBuffer)
+    : InputBuffer(InputBuffer), Current(InputBuffer.getBufferStart()),
+      End(InputBuffer.getBufferEnd()) {}
+
+Reader::Reader(StringRef Input) : Reader({Input, "MsgPack"}) {}
+
+Expected<bool> Reader::read(Object &Obj) {
+  if (Current == End)
+    return false;
+
+  uint8_t FB = static_cast<uint8_t>(*Current++);
+
+  switch (FB) {
+  case FirstByte::Nil:
+    Obj.Kind = Type::Nil;
+    return true;
+  case FirstByte::True:
+    Obj.Kind = Type::Boolean;
+    Obj.Bool = true;
+    return true;
+  case FirstByte::False:
+    Obj.Kind = Type::Boolean;
+    Obj.Bool = false;
+    return true;
+  case FirstByte::Int8:
+    Obj.Kind = Type::Int;
+    return readInt<int8_t>(Obj);
+  case FirstByte::Int16:
+    Obj.Kind = Type::Int;
+    return readInt<int16_t>(Obj);
+  case FirstByte::Int32:
+    Obj.Kind = Type::Int;
+    return readInt<int32_t>(Obj);
+  case FirstByte::Int64:
+    Obj.Kind = Type::Int;
+    return readInt<int64_t>(Obj);
+  case FirstByte::UInt8:
+    Obj.Kind = Type::UInt;
+    return readUInt<uint8_t>(Obj);
+  case FirstByte::UInt16:
+    Obj.Kind = Type::UInt;
+    return readUInt<uint16_t>(Obj);
+  case FirstByte::UInt32:
+    Obj.Kind = Type::UInt;
+    return readUInt<uint32_t>(Obj);
+  case FirstByte::UInt64:
+    Obj.Kind = Type::UInt;
+    return readUInt<uint64_t>(Obj);
+  case FirstByte::Float32:
+    Obj.Kind = Type::Float;
+    if (sizeof(float) > remainingSpace())
+      return make_error<StringError>(
+          "Invalid Float32 with insufficient payload",
+          std::make_error_code(std::errc::invalid_argument));
+    Obj.Float = BitsToFloat(endian::read<uint32_t, Endianness>(Current));
+    Current += sizeof(float);
+    return true;
+  case FirstByte::Float64:
+    Obj.Kind = Type::Float;
+    if (sizeof(double) > remainingSpace())
+      return make_error<StringError>(
+          "Invalid Float64 with insufficient payload",
+          std::make_error_code(std::errc::invalid_argument));
+    Obj.Float = BitsToDouble(endian::read<uint64_t, Endianness>(Current));
+    Current += sizeof(double);
+    return true;
+  case FirstByte::Str8:
+    Obj.Kind = Type::String;
+    return readRaw<uint8_t>(Obj);
+  case FirstByte::Str16:
+    Obj.Kind = Type::String;
+    return readRaw<uint16_t>(Obj);
+  case FirstByte::Str32:
+    Obj.Kind = Type::String;
+    return readRaw<uint32_t>(Obj);
+  case FirstByte::Bin8:
+    Obj.Kind = Type::Binary;
+    return readRaw<uint8_t>(Obj);
+  case FirstByte::Bin16:
+    Obj.Kind = Type::Binary;
+    return readRaw<uint16_t>(Obj);
+  case FirstByte::Bin32:
+    Obj.Kind = Type::Binary;
+    return readRaw<uint32_t>(Obj);
+  case FirstByte::Array16:
+    Obj.Kind = Type::Array;
+    return readLength<uint16_t>(Obj);
+  case FirstByte::Array32:
+    Obj.Kind = Type::Array;
+    return readLength<uint32_t>(Obj);
+  case FirstByte::Map16:
+    Obj.Kind = Type::Map;
+    return readLength<uint16_t>(Obj);
+  case FirstByte::Map32:
+    Obj.Kind = Type::Map;
+    return readLength<uint32_t>(Obj);
+  case FirstByte::FixExt1:
+    Obj.Kind = Type::Extension;
+    return createExt(Obj, FixLen::Ext1);
+  case FirstByte::FixExt2:
+    Obj.Kind = Type::Extension;
+    return createExt(Obj, FixLen::Ext2);
+  case FirstByte::FixExt4:
+    Obj.Kind = Type::Extension;
+    return createExt(Obj, FixLen::Ext4);
+  case FirstByte::FixExt8:
+    Obj.Kind = Type::Extension;
+    return createExt(Obj, FixLen::Ext8);
+  case FirstByte::FixExt16:
+    Obj.Kind = Type::Extension;
+    return createExt(Obj, FixLen::Ext16);
+  case FirstByte::Ext8:
+    Obj.Kind = Type::Extension;
+    return readExt<uint8_t>(Obj);
+  case FirstByte::Ext16:
+    Obj.Kind = Type::Extension;
+    return readExt<uint16_t>(Obj);
+  case FirstByte::Ext32:
+    Obj.Kind = Type::Extension;
+    return readExt<uint32_t>(Obj);
+  }
+
+  if ((FB & FixBitsMask::NegativeInt) == FixBits::NegativeInt) {
+    Obj.Kind = Type::Int;
+    int8_t I;
+    static_assert(sizeof(I) == sizeof(FB), "Unexpected type sizes");
+    memcpy(&I, &FB, sizeof(FB));
+    Obj.Int = I;
+    return true;
+  }
+
+  if ((FB & FixBitsMask::PositiveInt) == FixBits::PositiveInt) {
+    Obj.Kind = Type::UInt;
+    Obj.UInt = FB;
+    return true;
+  }
+
+  if ((FB & FixBitsMask::String) == FixBits::String) {
+    Obj.Kind = Type::String;
+    uint8_t Size = FB & ~FixBitsMask::String;
+    return createRaw(Obj, Size);
+  }
+
+  if ((FB & FixBitsMask::Array) == FixBits::Array) {
+    Obj.Kind = Type::Array;
+    Obj.Length = FB & ~FixBitsMask::Array;
+    return true;
+  }
+
+  if ((FB & FixBitsMask::Map) == FixBits::Map) {
+    Obj.Kind = Type::Map;
+    Obj.Length = FB & ~FixBitsMask::Map;
+    return true;
+  }
+
+  return make_error<StringError>(
+      "Invalid first byte", std::make_error_code(std::errc::invalid_argument));
+}
+
+template <class T> Expected<bool> Reader::readRaw(Object &Obj) {
+  if (sizeof(T) > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Raw with insufficient payload",
+        std::make_error_code(std::errc::invalid_argument));
+  T Size = endian::read<T, Endianness>(Current);
+  Current += sizeof(T);
+  return createRaw(Obj, Size);
+}
+
+template <class T> Expected<bool> Reader::readInt(Object &Obj) {
+  if (sizeof(T) > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Int with insufficient payload",
+        std::make_error_code(std::errc::invalid_argument));
+  Obj.Int = static_cast<int64_t>(endian::read<T, Endianness>(Current));
+  Current += sizeof(T);
+  return true;
+}
+
+template <class T> Expected<bool> Reader::readUInt(Object &Obj) {
+  if (sizeof(T) > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Int with insufficient payload",
+        std::make_error_code(std::errc::invalid_argument));
+  Obj.UInt = static_cast<uint64_t>(endian::read<T, Endianness>(Current));
+  Current += sizeof(T);
+  return true;
+}
+
+template <class T> Expected<bool> Reader::readLength(Object &Obj) {
+  if (sizeof(T) > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Map/Array with invalid length",
+        std::make_error_code(std::errc::invalid_argument));
+  Obj.Length = static_cast<size_t>(endian::read<T, Endianness>(Current));
+  Current += sizeof(T);
+  return true;
+}
+
+template <class T> Expected<bool> Reader::readExt(Object &Obj) {
+  if (sizeof(T) > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Ext with invalid length",
+        std::make_error_code(std::errc::invalid_argument));
+  T Size = endian::read<T, Endianness>(Current);
+  Current += sizeof(T);
+  return createExt(Obj, Size);
+}
+
+Expected<bool> Reader::createRaw(Object &Obj, uint32_t Size) {
+  if (Size > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Raw with insufficient payload",
+        std::make_error_code(std::errc::invalid_argument));
+  Obj.Raw = StringRef(Current, Size);
+  Current += Size;
+  return true;
+}
+
+Expected<bool> Reader::createExt(Object &Obj, uint32_t Size) {
+  if (Current == End)
+    return make_error<StringError>(
+        "Invalid Ext with no type",
+        std::make_error_code(std::errc::invalid_argument));
+  Obj.Extension.Type = *Current++;
+  if (Size > remainingSpace())
+    return make_error<StringError>(
+        "Invalid Ext with insufficient payload",
+        std::make_error_code(std::errc::invalid_argument));
+  Obj.Extension.Bytes = StringRef(Current, Size);
+  Current += Size;
+  return true;
+}
diff --git a/contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp b/contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp
new file mode 100644
index 000000000000..4a8f70b10fb8
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/MsgPackTypes.cpp
@@ -0,0 +1,303 @@
+//===- MsgPackTypes.cpp - MsgPack Types -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implementation of types representing MessagePack "documents".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+namespace llvm {
+namespace msgpack {
+void ScalarNode::anchor() {}
+void ArrayNode::anchor() {}
+void MapNode::anchor() {}
+}
+}
+
+Expected<OptNodePtr> Node::readArray(Reader &MPReader, size_t Length) {
+  auto A = std::make_shared<ArrayNode>();
+  for (size_t I = 0; I < Length; ++I) {
+    auto OptNodeOrErr = Node::read(MPReader);
+    if (auto Err = OptNodeOrErr.takeError())
+      return std::move(Err);
+    if (!*OptNodeOrErr)
+      return make_error<StringError>(
+          "Insufficient array elements",
+          std::make_error_code(std::errc::invalid_argument));
+    A->push_back(std::move(**OptNodeOrErr));
+  }
+  return OptNodePtr(std::move(A));
+}
+
+Expected<OptNodePtr> Node::readMap(Reader &MPReader, size_t Length) {
+  auto M = std::make_shared<MapNode>();
+  for (size_t I = 0; I < Length; ++I) {
+    auto OptKeyOrErr = Node::read(MPReader);
+    if (auto Err = OptKeyOrErr.takeError())
+      return std::move(Err);
+    if (!*OptKeyOrErr)
+      return make_error<StringError>(
+          "Insufficient map elements",
+          std::make_error_code(std::errc::invalid_argument));
+    auto OptValOrErr = Node::read(MPReader);
+    if (auto Err = OptValOrErr.takeError())
+      return std::move(Err);
+    if (!*OptValOrErr)
+      return make_error<StringError>(
+          "Insufficient map elements",
+          std::make_error_code(std::errc::invalid_argument));
+    auto *Key = dyn_cast<ScalarNode>((*OptKeyOrErr)->get());
+    if (!Key)
+      return make_error<StringError>(
+          "Only string map keys are supported",
+          std::make_error_code(std::errc::invalid_argument));
+    if (Key->getScalarKind() != ScalarNode::SK_String)
+      return make_error<StringError>(
+          "Only string map keys are supported",
+          std::make_error_code(std::errc::invalid_argument));
+    M->try_emplace(Key->getString(), std::move(**OptValOrErr));
+  }
+  return OptNodePtr(std::move(M));
+}
+
+Expected<OptNodePtr> Node::read(Reader &MPReader) {
+  Object Obj;
+
+  auto ContinueOrErr = MPReader.read(Obj);
+  if (auto Err = ContinueOrErr.takeError())
+    return std::move(Err);
+  if (!*ContinueOrErr)
+    return None;
+
+  switch (Obj.Kind) {
+  case Type::Int:
+    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Int));
+  case Type::UInt:
+    return OptNodePtr(std::make_shared<ScalarNode>(Obj.UInt));
+  case Type::Nil:
+    return OptNodePtr(std::make_shared<ScalarNode>());
+  case Type::Boolean:
+    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Bool));
+  case Type::Float:
+    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Float));
+  case Type::String:
+    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
+  case Type::Binary:
+    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
+  case Type::Array:
+    return Node::readArray(MPReader, Obj.Length);
+  case Type::Map:
+    return Node::readMap(MPReader, Obj.Length);
+  case Type::Extension:
+    return make_error<StringError>(
+        "Extension types are not supported",
+        std::make_error_code(std::errc::invalid_argument));
+  }
+  llvm_unreachable("msgpack::Type not handled");
+}
+
+void ScalarNode::destroy() {
+  switch (SKind) {
+  case SK_String:
+  case SK_Binary:
+    StringValue.~basic_string();
+    break;
+  default:
+    // POD types do not require destruction
+    break;
+  }
+}
+
+ScalarNode::ScalarNode(int64_t IntValue)
+    : Node(NK_Scalar), SKind(SK_Int), IntValue(IntValue) {}
+
+ScalarNode::ScalarNode(int32_t IntValue)
+    : ScalarNode(static_cast<int64_t>(IntValue)) {}
+
+ScalarNode::ScalarNode(uint64_t UIntValue)
+    : Node(NK_Scalar), SKind(SK_UInt), UIntValue(UIntValue) {}
+
+ScalarNode::ScalarNode(uint32_t IntValue)
+    : ScalarNode(static_cast<uint64_t>(IntValue)) {}
+
+ScalarNode::ScalarNode() : Node(NK_Scalar), SKind(SK_Nil) {}
+
+ScalarNode::ScalarNode(bool BoolValue)
+    : Node(NK_Scalar), SKind(SK_Boolean), BoolValue(BoolValue) {}
+
+ScalarNode::ScalarNode(double FloatValue)
+    : Node(NK_Scalar), SKind(SK_Float), BoolValue(FloatValue) {}
+
+ScalarNode::ScalarNode(StringRef StringValue)
+    : Node(NK_Scalar), SKind(SK_String) {
+  new (&this->StringValue) std::string(StringValue);
+}
+
+ScalarNode::ScalarNode(const char *StringValue)
+    : ScalarNode(StringRef(StringValue)) {}
+
+ScalarNode::ScalarNode(std::string &&StringValue)
+    : Node(NK_Scalar), SKind(SK_String) {
+  new (&this->StringValue) std::string(StringValue);
+}
+
+ScalarNode::ScalarNode(MemoryBufferRef BinaryValue)
+    : Node(NK_Scalar), SKind(SK_Binary) {
+  new (&StringValue) std::string(BinaryValue.getBuffer());
+}
+
+ScalarNode::~ScalarNode() { destroy(); }
+
+ScalarNode &ScalarNode::operator=(ScalarNode &&RHS) {
+  destroy();
+  switch (SKind = RHS.SKind) {
+  case SK_Int:
+    IntValue = RHS.IntValue;
+    break;
+  case SK_UInt:
+    UIntValue = RHS.UIntValue;
+    break;
+  case SK_Boolean:
+    BoolValue = RHS.BoolValue;
+    break;
+  case SK_Float:
+    FloatValue = RHS.FloatValue;
+    break;
+  case SK_String:
+  case SK_Binary:
+    new (&StringValue) std::string(std::move(RHS.StringValue));
+    break;
+  case SK_Nil:
+    // pass
+    break;
+  }
+  return *this;
+}
+
+StringRef ScalarNode::inputYAML(StringRef ScalarStr) {
+  switch (SKind) {
+  case SK_Int:
+    return yaml::ScalarTraits<int64_t>::input(ScalarStr, nullptr, IntValue);
+  case SK_UInt:
+    return yaml::ScalarTraits<uint64_t>::input(ScalarStr, nullptr, UIntValue);
+  case SK_Nil:
+    return StringRef();
+  case SK_Boolean:
+    return yaml::ScalarTraits<bool>::input(ScalarStr, nullptr, BoolValue);
+  case SK_Float:
+    return yaml::ScalarTraits<double>::input(ScalarStr, nullptr, FloatValue);
+  case SK_Binary:
+  case SK_String:
+    return yaml::ScalarTraits<std::string>::input(ScalarStr, nullptr,
+                                                  StringValue);
+  }
+  llvm_unreachable("unrecognized ScalarKind");
+}
+
+void ScalarNode::outputYAML(raw_ostream &OS) const {
+  switch (SKind) {
+  case SK_Int:
+    yaml::ScalarTraits<int64_t>::output(IntValue, nullptr, OS);
+    break;
+  case SK_UInt:
+    yaml::ScalarTraits<uint64_t>::output(UIntValue, nullptr, OS);
+    break;
+  case SK_Nil:
+    yaml::ScalarTraits<StringRef>::output("", nullptr, OS);
+    break;
+  case SK_Boolean:
+    yaml::ScalarTraits<bool>::output(BoolValue, nullptr, OS);
+    break;
+  case SK_Float:
+    yaml::ScalarTraits<double>::output(FloatValue, nullptr, OS);
+    break;
+  case SK_Binary:
+  case SK_String:
+    yaml::ScalarTraits<std::string>::output(StringValue, nullptr, OS);
+    break;
+  }
+}
+
+yaml::QuotingType ScalarNode::mustQuoteYAML(StringRef ScalarStr) const {
+  switch (SKind) {
+  case SK_Int:
+    return yaml::ScalarTraits<int64_t>::mustQuote(ScalarStr);
+  case SK_UInt:
+    return yaml::ScalarTraits<uint64_t>::mustQuote(ScalarStr);
+  case SK_Nil:
+    return yaml::ScalarTraits<StringRef>::mustQuote(ScalarStr);
+  case SK_Boolean:
+    return yaml::ScalarTraits<bool>::mustQuote(ScalarStr);
+  case SK_Float:
+    return yaml::ScalarTraits<double>::mustQuote(ScalarStr);
+  case SK_Binary:
+  case SK_String:
+    return yaml::ScalarTraits<std::string>::mustQuote(ScalarStr);
+  }
+  llvm_unreachable("unrecognized ScalarKind");
+}
+
+const char *ScalarNode::IntTag = "!int";
+const char *ScalarNode::NilTag = "!nil";
+const char *ScalarNode::BooleanTag = "!bool";
+const char *ScalarNode::FloatTag = "!float";
+const char *ScalarNode::StringTag = "!str";
+const char *ScalarNode::BinaryTag = "!bin";
+
+StringRef ScalarNode::getYAMLTag() const {
+  switch (SKind) {
+  case SK_Int:
+    return IntTag;
+  case SK_UInt:
+    return IntTag;
+  case SK_Nil:
+    return NilTag;
+  case SK_Boolean:
+    return BooleanTag;
+  case SK_Float:
+    return FloatTag;
+  case SK_String:
+    return StringTag;
+  case SK_Binary:
+    return BinaryTag;
+  }
+  llvm_unreachable("unrecognized ScalarKind");
+}
+
+void ScalarNode::write(Writer &MPWriter) {
+  switch (SKind) {
+  case SK_Int:
+    MPWriter.write(IntValue);
+    break;
+  case SK_UInt:
+    MPWriter.write(UIntValue);
+    break;
+  case SK_Nil:
+    MPWriter.writeNil();
+    break;
+  case SK_Boolean:
+    MPWriter.write(BoolValue);
+    break;
+  case SK_Float:
+    MPWriter.write(FloatValue);
+    break;
+  case SK_String:
+    MPWriter.write(StringValue);
+    break;
+  case SK_Binary:
+    MPWriter.write(MemoryBufferRef(StringValue, ""));
+    break;
+  }
+}
diff --git a/contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp b/contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp
new file mode 100644
index 000000000000..d024bb0fcdb2
--- /dev/null
+++ b/contrib/llvm/lib/BinaryFormat/MsgPackWriter.cpp
@@ -0,0 +1,209 @@
+//===- MsgPackWriter.cpp - Simple MsgPack writer ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file
+///  This file implements a MessagePack writer.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+#include "llvm/BinaryFormat/MsgPack.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+Writer::Writer(raw_ostream &OS, bool Compatible)
+    : EW(OS, Endianness), Compatible(Compatible) {}
+
+void Writer::writeNil() { EW.write(FirstByte::Nil); }
+
+void Writer::write(bool b) { EW.write(b ? FirstByte::True : FirstByte::False); }
+
+void Writer::write(int64_t i) {
+  if (i >= 0) {
+    write(static_cast<uint64_t>(i));
+    return;
+  }
+
+  if (i >= FixMin::NegativeInt) {
+    EW.write(static_cast<int8_t>(i));
+    return;
+  }
+
+  if (i >= INT8_MIN) {
+    EW.write(FirstByte::Int8);
+    EW.write(static_cast<int8_t>(i));
+    return;
+  }
+
+  if (i >= INT16_MIN) {
+    EW.write(FirstByte::Int16);
+    EW.write(static_cast<int16_t>(i));
+    return;
+  }
+
+  if (i >= INT32_MIN) {
+    EW.write(FirstByte::Int32);
+    EW.write(static_cast<int32_t>(i));
+    return;
+  }
+
+  EW.write(FirstByte::Int64);
+  EW.write(i);
+}
+
+void Writer::write(uint64_t u) {
+  if (u <= FixMax::PositiveInt) {
+    EW.write(static_cast<uint8_t>(u));
+    return;
+  }
+
+  if (u <= UINT8_MAX) {
+    EW.write(FirstByte::UInt8);
+    EW.write(static_cast<uint8_t>(u));
+    return;
+  }
+
+  if (u <= UINT16_MAX) {
+    EW.write(FirstByte::UInt16);
+    EW.write(static_cast<uint16_t>(u));
+    return;
+  }
+
+  if (u <= UINT32_MAX) {
+    EW.write(FirstByte::UInt32);
+    EW.write(static_cast<uint32_t>(u));
+    return;
+  }
+
+  EW.write(FirstByte::UInt64);
+  EW.write(u);
+}
+
+void Writer::write(double d) {
+  // If no loss of precision, encode as a Float32.
+  double a = std::fabs(d);
+  if (a >= std::numeric_limits<float>::min() &&
+      a <= std::numeric_limits<float>::max()) {
+    EW.write(FirstByte::Float32);
+    EW.write(static_cast<float>(d));
+  } else {
+    EW.write(FirstByte::Float64);
+    EW.write(d);
+  }
+}
+
+void Writer::write(StringRef s) {
+  size_t Size = s.size();
+
+  if (Size <= FixMax::String)
+    EW.write(static_cast<uint8_t>(FixBits::String | Size));
+  else if (!Compatible && Size <= UINT8_MAX) {
+    EW.write(FirstByte::Str8);
+    EW.write(static_cast<uint8_t>(Size));
+  } else if (Size <= UINT16_MAX) {
+    EW.write(FirstByte::Str16);
+    EW.write(static_cast<uint16_t>(Size));
+  } else {
+    assert(Size <= UINT32_MAX && "String object too long to be encoded");
+    EW.write(FirstByte::Str32);
+    EW.write(static_cast<uint32_t>(Size));
+  }
+
+  EW.OS << s;
+}
+
+void Writer::write(MemoryBufferRef Buffer) {
+  assert(!Compatible && "Attempt to write Bin format in compatible mode");
+
+  size_t Size = Buffer.getBufferSize();
+
+  if (Size <= UINT8_MAX) {
+    EW.write(FirstByte::Bin8);
+    EW.write(static_cast<uint8_t>(Size));
+  } else if (Size <= UINT16_MAX) {
+    EW.write(FirstByte::Bin16);
+    EW.write(static_cast<uint16_t>(Size));
+  } else {
+    assert(Size <= UINT32_MAX && "Binary object too long to be encoded");
+    EW.write(FirstByte::Bin32);
+    EW.write(static_cast<uint32_t>(Size));
+  }
+
+  EW.OS.write(Buffer.getBufferStart(), Size);
+}
+
+void Writer::writeArraySize(uint32_t Size) {
+  if (Size <= FixMax::Array) {
+    EW.write(static_cast<uint8_t>(FixBits::Array | Size));
+    return;
+  }
+
+  if (Size <= UINT16_MAX) {
+    EW.write(FirstByte::Array16);
+    EW.write(static_cast<uint16_t>(Size));
+    return;
+  }
+
+  EW.write(FirstByte::Array32);
+  EW.write(Size);
+}
+
+void Writer::writeMapSize(uint32_t Size) {
+  if (Size <= FixMax::Map) {
+    EW.write(static_cast<uint8_t>(FixBits::Map | Size));
+    return;
+  }
+
+  if (Size <= UINT16_MAX) {
+    EW.write(FirstByte::Map16);
+    EW.write(static_cast<uint16_t>(Size));
+    return;
+  }
+
+  EW.write(FirstByte::Map32);
+  EW.write(Size);
+}
+
+void Writer::writeExt(int8_t Type, MemoryBufferRef Buffer) {
+  size_t Size = Buffer.getBufferSize();
+
+  switch (Size) {
+  case FixLen::Ext1:
+    EW.write(FirstByte::FixExt1);
+    break;
+  case FixLen::Ext2:
+    EW.write(FirstByte::FixExt2);
+    break;
+  case FixLen::Ext4:
+    EW.write(FirstByte::FixExt4);
+    break;
+  case FixLen::Ext8:
+    EW.write(FirstByte::FixExt8);
+    break;
+  case FixLen::Ext16:
+    EW.write(FirstByte::FixExt16);
+    break;
+  default:
+    if (Size <= UINT8_MAX) {
+      EW.write(FirstByte::Ext8);
+      EW.write(static_cast<uint8_t>(Size));
+    } else if (Size <= UINT16_MAX) {
+      EW.write(FirstByte::Ext16);
+      EW.write(static_cast<uint16_t>(Size));
+    } else {
+      assert(Size <= UINT32_MAX && "Ext size too large to be encoded");
+      EW.write(FirstByte::Ext32);
+      EW.write(static_cast<uint32_t>(Size));
+    }
+  }
+
+  EW.write(Type);
+  EW.OS.write(Buffer.getBufferStart(), Size);
+}
diff --git a/contrib/llvm/lib/BinaryFormat/Wasm.cpp b/contrib/llvm/lib/BinaryFormat/Wasm.cpp
index 35360d0ae4f0..94d40bf02a39 100644
--- a/contrib/llvm/lib/BinaryFormat/Wasm.cpp
+++ b/contrib/llvm/lib/BinaryFormat/Wasm.cpp
@@ -19,13 +19,17 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType type) {
     return "WASM_SYMBOL_TYPE_DATA";
   case wasm::WASM_SYMBOL_TYPE_SECTION:
     return "WASM_SYMBOL_TYPE_SECTION";
+  case wasm::WASM_SYMBOL_TYPE_EVENT:
+    return "WASM_SYMBOL_TYPE_EVENT";
   }
   llvm_unreachable("unknown symbol type");
 }
 
 std::string llvm::wasm::relocTypetoString(uint32_t type) {
   switch (type) {
-#define WASM_RELOC(NAME, VALUE) case VALUE: return #NAME;
+#define WASM_RELOC(NAME, VALUE)                                                \
+  case VALUE:                                                                  \
+    return #NAME;
 #include "llvm/BinaryFormat/WasmRelocs.def"
 #undef WASM_RELOC
   default:
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c45b441238bc..fe051e7a9125 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -876,6 +876,7 @@ static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
   Flags.ReadOnly = (RawFlags >> 1) & 0x1;
   Flags.NoRecurse = (RawFlags >> 2) & 0x1;
   Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
+  Flags.NoInline = (RawFlags >> 4) & 0x1;
   return Flags;
 }
 
@@ -897,6 +898,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
   return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local);
 }
 
+// Decode the flags for GlobalVariable in the summary
+static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
+  return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false);
+}
+
 static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
   switch (Val) {
   default: // Map unknown visibilities to default.
@@ -963,6 +969,20 @@ static int getDecodedCastOpcode(unsigned Val) {
   }
 }
 
+static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) {
+  bool IsFP = Ty->isFPOrFPVectorTy();
+  // UnOps are only valid for int/fp or vector of int/fp types
+  if (!IsFP && !Ty->isIntOrIntVectorTy())
+    return -1;
+
+  switch (Val) {
+  default:
+    return -1;
+  case bitc::UNOP_NEG:
+    return IsFP ? Instruction::FNeg : -1;
+  }
+}
+
 static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) {
   bool IsFP = Ty->isFPOrFPVectorTy();
   // BinOps are only valid for int/fp or vector of int/fp types
@@ -1165,6 +1185,8 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
   case Attribute::NoCfCheck:       return 1ULL << 57;
   case Attribute::OptForFuzzing:   return 1ULL << 58;
   case Attribute::ShadowCallStack: return 1ULL << 59;
+  case Attribute::SpeculativeLoadHardening:
+    return 1ULL << 60;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
     break;
@@ -1389,6 +1411,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::SanitizeThread;
   case bitc::ATTR_KIND_SANITIZE_MEMORY:
     return Attribute::SanitizeMemory;
+  case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING:
+    return Attribute::SpeculativeLoadHardening;
   case bitc::ATTR_KIND_SWIFT_ERROR:
     return Attribute::SwiftError;
   case bitc::ATTR_KIND_SWIFT_SELF:
@@ -2312,6 +2336,19 @@ Error BitcodeReader::parseConstants() {
       }
       break;
     }
+    case bitc::CST_CODE_CE_UNOP: {  // CE_UNOP: [opcode, opval]
+      if (Record.size() < 2)
+        return error("Invalid record");
+      int Opc = getDecodedUnaryOpcode(Record[0], CurTy);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown unop.
+      } else {
+        Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+        unsigned Flags = 0;
+        V = ConstantExpr::get(Opc, LHS, Flags);
+      }
+      break;
+    }
     case bitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
       if (Record.size() < 3)
         return error("Invalid record");
@@ -2938,7 +2975,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
 Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section,
   // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat,
-  // prefixdata,  personalityfn, preemption specifier] (name in VST)
+  // prefixdata,  personalityfn, preemption specifier, addrspace] (name in VST)
   // v2: [strtab_offset, strtab_size, v1]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
@@ -2957,8 +2994,12 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   if (CC & ~CallingConv::MaxID)
     return error("Invalid calling convention ID");
 
-  Function *Func =
-      Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule);
+  unsigned AddrSpace = TheModule->getDataLayout().getProgramAddressSpace();
+  if (Record.size() > 16)
+    AddrSpace = Record[16];
+
+  Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                    AddrSpace, Name, TheModule);
 
   Func->setCallingConv(CC);
   bool isProto = Record[2];
@@ -3508,24 +3549,47 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
 
       unsigned Line = Record[0], Col = Record[1];
       unsigned ScopeID = Record[2], IAID = Record[3];
+      bool isImplicitCode = Record.size() == 5 && Record[4];
 
       MDNode *Scope = nullptr, *IA = nullptr;
       if (ScopeID) {
-        Scope = MDLoader->getMDNodeFwdRefOrNull(ScopeID - 1);
+        Scope = dyn_cast_or_null<MDNode>(
+            MDLoader->getMetadataFwdRefOrLoad(ScopeID - 1));
         if (!Scope)
           return error("Invalid record");
       }
       if (IAID) {
-        IA = MDLoader->getMDNodeFwdRefOrNull(IAID - 1);
+        IA = dyn_cast_or_null<MDNode>(
+            MDLoader->getMetadataFwdRefOrLoad(IAID - 1));
         if (!IA)
           return error("Invalid record");
       }
-      LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+      LastLoc = DebugLoc::get(Line, Col, Scope, IA, isImplicitCode);
       I->setDebugLoc(LastLoc);
       I = nullptr;
       continue;
     }
+    case bitc::FUNC_CODE_INST_UNOP: {    // UNOP: [opval, ty, opcode]
+      unsigned OpNum = 0;
+      Value *LHS;
+      if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+          OpNum+1 > Record.size())
+        return error("Invalid record");
 
+      int Opc = getDecodedUnaryOpcode(Record[OpNum++], LHS->getType());
+      if (Opc == -1)
+        return error("Invalid record");
+      I = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS);
+      InstructionList.push_back(I);
+      if (OpNum < Record.size()) {
+        if (isa<FPMathOperator>(I)) {
+          FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
+          if (FMF.any())
+            I->setFastMathFlags(FMF);
+        }
+      }
+      break;
+    }
     case bitc::FUNC_CODE_INST_BINOP: {    // BINOP: [opval, ty, opval, opcode]
       unsigned OpNum = 0;
       Value *LHS, *RHS;
@@ -3656,16 +3720,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           return error("EXTRACTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return error("Invalid value");
-        if (IsStruct && Index >= CurTy->subtypes().size())
+        if (IsStruct && Index >= CurTy->getStructNumElements())
           return error("EXTRACTVAL: Invalid struct index");
         if (IsArray && Index >= CurTy->getArrayNumElements())
           return error("EXTRACTVAL: Invalid array index");
         EXTRACTVALIdx.push_back((unsigned)Index);
 
         if (IsStruct)
-          CurTy = CurTy->subtypes()[Index];
+          CurTy = CurTy->getStructElementType(Index);
         else
-          CurTy = CurTy->subtypes()[0];
+          CurTy = CurTy->getArrayElementType();
       }
 
       I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
@@ -3698,16 +3762,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           return error("INSERTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return error("Invalid value");
-        if (IsStruct && Index >= CurTy->subtypes().size())
+        if (IsStruct && Index >= CurTy->getStructNumElements())
           return error("INSERTVAL: Invalid struct index");
         if (IsArray && Index >= CurTy->getArrayNumElements())
           return error("INSERTVAL: Invalid array index");
 
         INSERTVALIdx.push_back((unsigned)Index);
         if (IsStruct)
-          CurTy = CurTy->subtypes()[Index];
+          CurTy = CurTy->getStructElementType(Index);
         else
-          CurTy = CurTy->subtypes()[0];
+          CurTy = CurTy->getArrayElementType();
       }
 
       if (CurTy != Val->getType())
@@ -4616,7 +4680,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     CurBB->getInstList().push_back(I);
 
     // If this was a terminator instruction, move to the next block.
-    if (isa<TerminatorInst>(I)) {
+    if (I->isTerminator()) {
       ++CurBBNo;
       CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr;
     }
@@ -4854,7 +4918,7 @@ void ModuleSummaryIndexBitcodeReader::setValueGUID(
   ValueIdToValueInfoMap[ValueID] = std::make_pair(
       TheIndex.getOrInsertValueInfo(
           ValueGUID,
-          UseStrtab ? ValueName : TheIndex.saveString(ValueName.str())),
+          UseStrtab ? ValueName : TheIndex.saveString(ValueName)),
       OriginalNameID);
 }
 
@@ -5160,6 +5224,12 @@ static void parseTypeIdSummaryRecord(ArrayRef<uint64_t> Record,
     parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId);
 }
 
+static void setImmutableRefs(std::vector<ValueInfo> &Refs, unsigned Count) {
+  // Read-only refs are in the end of the refs list.
+  for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo)
+    Refs[RefNo].setReadOnly();
+}
+
 // Eagerly parse the entire summary block. This populates the GlobalValueSummary
 // objects in the index.
 Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
@@ -5177,9 +5247,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
   }
   const uint64_t Version = Record[0];
   const bool IsOldProfileFormat = Version == 1;
-  if (Version < 1 || Version > 4)
+  if (Version < 1 || Version > 6)
     return error("Invalid summary version " + Twine(Version) +
-                 ", 1, 2, 3 or 4 expected");
+                 ". Version should be in the range [1-6].");
   Record.clear();
 
   // Keep around the last seen summary to be used when we see an optional
@@ -5224,15 +5294,30 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       break;
     case bitc::FS_FLAGS: {  // [flags]
       uint64_t Flags = Record[0];
-      // Scan flags (set only on the combined index).
-      assert(Flags <= 0x3 && "Unexpected bits in flag");
+      // Scan flags.
+      assert(Flags <= 0x1f && "Unexpected bits in flag");
 
       // 1 bit: WithGlobalValueDeadStripping flag.
+      // Set on combined index only.
       if (Flags & 0x1)
         TheIndex.setWithGlobalValueDeadStripping();
       // 1 bit: SkipModuleByDistributedBackend flag.
+      // Set on combined index only.
       if (Flags & 0x2)
         TheIndex.setSkipModuleByDistributedBackend();
+      // 1 bit: HasSyntheticEntryCounts flag.
+      // Set on combined index only.
+      if (Flags & 0x4)
+        TheIndex.setHasSyntheticEntryCounts();
+      // 1 bit: DisableSplitLTOUnit flag.
+      // Set on per module indexes. It is up to the client to validate
+      // the consistency of this flag across modules being linked.
+      if (Flags & 0x8)
+        TheIndex.setEnableSplitLTOUnit();
+      // 1 bit: PartiallySplitLTOUnits flag.
+      // Set on combined index only.
+      if (Flags & 0x10)
+        TheIndex.setPartiallySplitLTOUnits();
       break;
     }
     case bitc::FS_VALUE_GUID: { // [valueid, refguid]
@@ -5258,11 +5343,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       unsigned InstCount = Record[2];
       uint64_t RawFunFlags = 0;
       unsigned NumRefs = Record[3];
+      unsigned NumImmutableRefs = 0;
       int RefListStartIndex = 4;
       if (Version >= 4) {
         RawFunFlags = Record[3];
         NumRefs = Record[4];
         RefListStartIndex = 5;
+        if (Version >= 5) {
+          NumImmutableRefs = Record[5];
+          RefListStartIndex = 6;
+        }
       }
 
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
@@ -5281,9 +5371,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
           ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
           IsOldProfileFormat, HasProfile, HasRelBF);
+      setImmutableRefs(Refs, NumImmutableRefs);
       auto FS = llvm::make_unique<FunctionSummary>(
-          Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
-          std::move(Calls), std::move(PendingTypeTests),
+          Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
+          std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
           std::move(PendingTypeTestAssumeVCalls),
           std::move(PendingTypeCheckedLoadVCalls),
           std::move(PendingTypeTestAssumeConstVCalls),
@@ -5329,14 +5420,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       TheIndex.addGlobalValueSummary(GUID.first, std::move(AS));
       break;
     }
-    // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
+    // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags, n x valueid]
     case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: {
       unsigned ValueID = Record[0];
       uint64_t RawFlags = Record[1];
+      unsigned RefArrayStart = 2;
+      GlobalVarSummary::GVarFlags GVF;
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
+      if (Version >= 5) {
+        GVF = getDecodedGVarFlags(Record[2]);
+        RefArrayStart = 3;
+      }
       std::vector<ValueInfo> Refs =
-          makeRefList(ArrayRef<uint64_t>(Record).slice(2));
-      auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
+          makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
+      auto FS =
+          llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
       FS->setModulePath(getThisModule()->first());
       auto GUID = getValueInfoFromValueId(ValueID);
       FS->setOriginalName(GUID.second);
@@ -5354,13 +5452,25 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       uint64_t RawFlags = Record[2];
       unsigned InstCount = Record[3];
       uint64_t RawFunFlags = 0;
+      uint64_t EntryCount = 0;
       unsigned NumRefs = Record[4];
+      unsigned NumImmutableRefs = 0;
       int RefListStartIndex = 5;
 
       if (Version >= 4) {
         RawFunFlags = Record[4];
-        NumRefs = Record[5];
         RefListStartIndex = 6;
+        size_t NumRefsIndex = 5;
+        if (Version >= 5) {
+          RefListStartIndex = 7;
+          if (Version >= 6) {
+            NumRefsIndex = 6;
+            EntryCount = Record[5];
+            RefListStartIndex = 8;
+          }
+          NumImmutableRefs = Record[RefListStartIndex - 1];
+        }
+        NumRefs = Record[NumRefsIndex];
       }
 
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
@@ -5374,9 +5484,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
           ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
           IsOldProfileFormat, HasProfile, false);
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
+      setImmutableRefs(Refs, NumImmutableRefs);
       auto FS = llvm::make_unique<FunctionSummary>(
-          Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs),
-          std::move(Edges), std::move(PendingTypeTests),
+          Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
+          std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
           std::move(PendingTypeTestAssumeVCalls),
           std::move(PendingTypeCheckedLoadVCalls),
           std::move(PendingTypeTestAssumeConstVCalls),
@@ -5422,10 +5533,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       unsigned ValueID = Record[0];
       uint64_t ModuleId = Record[1];
       uint64_t RawFlags = Record[2];
+      unsigned RefArrayStart = 3;
+      GlobalVarSummary::GVarFlags GVF;
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
+      if (Version >= 5) {
+        GVF = getDecodedGVarFlags(Record[3]);
+        RefArrayStart = 4;
+      }
       std::vector<ValueInfo> Refs =
-          makeRefList(ArrayRef<uint64_t>(Record).slice(3));
-      auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
+          makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
+      auto FS =
+          llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
       LastSeenSummary = FS.get();
       FS->setModulePath(ModuleIdMap[ModuleId]);
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
@@ -5811,6 +5929,46 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
   return std::move(Index);
 }
 
+static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
+                                                unsigned ID) {
+  if (Stream.EnterSubBlock(ID))
+    return error("Invalid record");
+  SmallVector<uint64_t, 64> Record;
+
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      // If no flags record found, conservatively return true to mimic
+      // behavior before this flag was added.
+      return true;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Look for the FS_FLAGS record.
+    Record.clear();
+    auto BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: // Default behavior: ignore.
+      break;
+    case bitc::FS_FLAGS: { // [flags]
+      uint64_t Flags = Record[0];
+      // Scan flags.
+      assert(Flags <= 0x1f && "Unexpected bits in flag");
+
+      return Flags & 0x8;
+    }
+    }
+  }
+  llvm_unreachable("Exit infinite loop");
+}
+
 // Check if the given bitcode buffer contains a global value summary block.
 Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
   BitstreamCursor Stream(Buffer);
@@ -5826,14 +5984,27 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false};
+      return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false,
+                            /*EnableSplitLTOUnit=*/false};
 
     case BitstreamEntry::SubBlock:
-      if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID)
-        return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true};
+      if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) {
+        Expected<bool> EnableSplitLTOUnit =
+            getEnableSplitLTOUnitFlag(Stream, Entry.ID);
+        if (!EnableSplitLTOUnit)
+          return EnableSplitLTOUnit.takeError();
+        return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true,
+                              *EnableSplitLTOUnit};
+      }
 
-      if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID)
-        return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true};
+      if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) {
+        Expected<bool> EnableSplitLTOUnit =
+            getEnableSplitLTOUnitFlag(Stream, Entry.ID);
+        if (!EnableSplitLTOUnit)
+          return EnableSplitLTOUnit.takeError();
+        return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true,
+                              *EnableSplitLTOUnit};
+      }
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 011c41e2cecd..3289aa0acddd 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -650,10 +650,6 @@ public:
     return MetadataList.getMetadataFwdRef(ID);
   }
 
-  MDNode *getMDNodeFwdRefOrNull(unsigned Idx) {
-    return MetadataList.getMDNodeFwdRefOrNull(Idx);
-  }
-
   DISubprogram *lookupSubprogramForFunction(Function *F) {
     return FunctionsWithSPs.lookup(F);
   }
@@ -772,7 +768,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
           // It is acknowledged by 'TODO: Inherit from Metadata' in the
           // NamedMDNode class definition.
           MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
-          assert(MD && "Invalid record");
+          assert(MD && "Invalid metadata: expect fwd ref to MDNode");
           NMD->addOperand(MD);
         }
         break;
@@ -1049,7 +1045,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     for (unsigned i = 0; i != Size; ++i) {
       MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
       if (!MD)
-        return error("Invalid record");
+        return error("Invalid named metadata: expect fwd ref to MDNode");
       NMD->addOperand(MD);
     }
     break;
@@ -1139,7 +1135,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_LOCATION: {
-    if (Record.size() != 5)
+    if (Record.size() != 5 && Record.size() != 6)
       return error("Invalid record");
 
     IsDistinct = Record[0];
@@ -1147,8 +1143,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     unsigned Column = Record[2];
     Metadata *Scope = getMD(Record[3]);
     Metadata *InlinedAt = getMDOrNull(Record[4]);
+    bool ImplicitCode = Record.size() == 6 && Record[5];
     MetadataList.assignValue(
-        GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
+        GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt,
+                                     ImplicitCode)),
         NextMetadataNo);
     NextMetadataNo++;
     break;
@@ -1211,14 +1209,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_BASIC_TYPE: {
-    if (Record.size() != 6)
+    if (Record.size() < 6 || Record.size() > 7)
       return error("Invalid record");
 
     IsDistinct = Record[0];
+    DINode::DIFlags Flags = (Record.size() > 6) ?
+                    static_cast<DINode::DIFlags>(Record[6]) : DINode::FlagZero;
+
     MetadataList.assignValue(
         GET_OR_DISTINCT(DIBasicType,
                         (Context, Record[1], getMDString(Record[2]), Record[3],
-                         Record[4], Record[5])),
+                         Record[4], Record[5], Flags)),
         NextMetadataNo);
     NextMetadataNo++;
     break;
@@ -1308,7 +1309,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
                            (Context, Tag, Name, File, Line, Scope, BaseType,
                             SizeInBits, AlignInBits, OffsetInBits, Flags,
                             Elements, RuntimeLang, VTableHolder, TemplateParams,
-                            Identifier));
+                            Identifier, Discriminator));
     if (!IsNotUsedInTypeRef && Identifier)
       MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
 
@@ -1390,7 +1391,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
         Record.size() <= 14 ? 0 : Record[14],
         Record.size() <= 16 ? true : Record[16],
         Record.size() <= 17 ? false : Record[17],
-        Record.size() <= 18 ? false : Record[18]);
+        Record.size() <= 18 ? 0 : Record[18],
+        Record.size() <= 19 ? 0 : Record[19]);
 
     MetadataList.assignValue(CU, NextMetadataNo);
     NextMetadataNo++;
@@ -1404,20 +1406,43 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     if (Record.size() < 18 || Record.size() > 21)
       return error("Invalid record");
 
-    IsDistinct =
-        (Record[0] & 1) || Record[8]; // All definitions should be distinct.
+    bool HasSPFlags = Record[0] & 4;
+    DISubprogram::DISPFlags SPFlags =
+        HasSPFlags
+            ? static_cast<DISubprogram::DISPFlags>(Record[9])
+            : DISubprogram::toSPFlags(
+                  /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
+                  /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11]);
+
+    // All definitions should be distinct.
+    IsDistinct = (Record[0] & 1) || (SPFlags & DISubprogram::SPFlagDefinition);
     // Version 1 has a Function as Record[15].
     // Version 2 has removed Record[15].
     // Version 3 has the Unit as Record[15].
     // Version 4 added thisAdjustment.
-    bool HasUnit = Record[0] >= 2;
-    if (HasUnit && Record.size() < 19)
+    // Version 5 repacked flags into DISPFlags, changing many element numbers.
+    bool HasUnit = Record[0] & 2;
+    if (!HasSPFlags && HasUnit && Record.size() < 19)
       return error("Invalid record");
-    Metadata *CUorFn = getMDOrNull(Record[15]);
-    unsigned Offset = Record.size() >= 19 ? 1 : 0;
-    bool HasFn = Offset && !HasUnit;
-    bool HasThisAdj = Record.size() >= 20;
-    bool HasThrownTypes = Record.size() >= 21;
+    if (HasSPFlags && !HasUnit)
+      return error("Invalid record");
+    // Accommodate older formats.
+    bool HasFn = false;
+    bool HasThisAdj = true;
+    bool HasThrownTypes = true;
+    unsigned OffsetA = 0;
+    unsigned OffsetB = 0;
+    if (!HasSPFlags) {
+      OffsetA = 2;
+      OffsetB = 2;
+      if (Record.size() >= 19) {
+        HasFn = !HasUnit;
+        OffsetB++;
+      }
+      HasThisAdj = Record.size() >= 20;
+      HasThrownTypes = Record.size() >= 21;
+    }
+    Metadata *CUorFn = getMDOrNull(Record[12 + OffsetB]);
     DISubprogram *SP = GET_OR_DISTINCT(
         DISubprogram,
         (Context,
@@ -1427,20 +1452,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
          getMDOrNull(Record[4]),                            // file
          Record[5],                                         // line
          getMDOrNull(Record[6]),                            // type
-         Record[7],                                         // isLocal
-         Record[8],                                         // isDefinition
-         Record[9],                                         // scopeLine
-         getDITypeRefOrNull(Record[10]),                    // containingType
-         Record[11],                                        // virtuality
-         Record[12],                                        // virtualIndex
-         HasThisAdj ? Record[19] : 0,                       // thisAdjustment
-         static_cast<DINode::DIFlags>(Record[13]),          // flags
-         Record[14],                                        // isOptimized
+         Record[7 + OffsetA],                               // scopeLine
+         getDITypeRefOrNull(Record[8 + OffsetA]),           // containingType
+         Record[10 + OffsetA],                              // virtualIndex
+         HasThisAdj ? Record[16 + OffsetB] : 0,             // thisAdjustment
+         static_cast<DINode::DIFlags>(Record[11 + OffsetA]),// flags
+         SPFlags,                                           // SPFlags
          HasUnit ? CUorFn : nullptr,                        // unit
-         getMDOrNull(Record[15 + Offset]),                  // templateParams
-         getMDOrNull(Record[16 + Offset]),                  // declaration
-         getMDOrNull(Record[17 + Offset]),                  // retainedNodes
-         HasThrownTypes ? getMDOrNull(Record[20]) : nullptr // thrownTypes
+         getMDOrNull(Record[13 + OffsetB]),                 // templateParams
+         getMDOrNull(Record[14 + OffsetB]),                 // declaration
+         getMDOrNull(Record[15 + OffsetB]),                 // retainedNodes
+         HasThrownTypes ? getMDOrNull(Record[17 + OffsetB])
+                        : nullptr                           // thrownTypes
          ));
     MetadataList.assignValue(SP, NextMetadataNo);
     NextMetadataNo++;
@@ -1557,21 +1580,35 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_GLOBAL_VAR: {
-    if (Record.size() < 11 || Record.size() > 12)
+    if (Record.size() < 11 || Record.size() > 13)
       return error("Invalid record");
 
     IsDistinct = Record[0] & 1;
     unsigned Version = Record[0] >> 1;
 
-    if (Version == 1) {
+    if (Version == 2) {
+      MetadataList.assignValue(
+          GET_OR_DISTINCT(
+              DIGlobalVariable,
+              (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+               getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+               getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+               getMDOrNull(Record[9]), getMDOrNull(Record[10]), Record[11])),
+          NextMetadataNo);
+
+      NextMetadataNo++;
+    } else if (Version == 1) {
+      // No upgrade necessary. A null field will be introduced to indicate
+      // that no parameter information is available.
       MetadataList.assignValue(
           GET_OR_DISTINCT(DIGlobalVariable,
                           (Context, getMDOrNull(Record[1]),
                            getMDString(Record[2]), getMDString(Record[3]),
                            getMDOrNull(Record[4]), Record[5],
                            getDITypeRefOrNull(Record[6]), Record[7], Record[8],
-                           getMDOrNull(Record[10]), Record[11])),
+                           getMDOrNull(Record[10]), nullptr, Record[11])),
           NextMetadataNo);
+
       NextMetadataNo++;
     } else if (Version == 0) {
       // Upgrade old metadata, which stored a global variable reference or a
@@ -1602,7 +1639,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
           (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
            getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
            getDITypeRefOrNull(Record[6]), Record[7], Record[8],
-           getMDOrNull(Record[10]), AlignInBits));
+           getMDOrNull(Record[10]), nullptr, AlignInBits));
 
       DIGlobalVariableExpression *DGVE = nullptr;
       if (Attach || Expr)
@@ -1814,7 +1851,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
       return error("Invalid ID");
     MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]);
     if (!MD)
-      return error("Invalid metadata attachment");
+      return error("Invalid metadata attachment: expect fwd ref to MDNode");
     GO.addMetadata(K->second, *MD);
   }
   return Error::success();
@@ -1984,10 +2021,6 @@ Metadata *MetadataLoader::getMetadataFwdRefOrLoad(unsigned Idx) {
   return Pimpl->getMetadataFwdRefOrLoad(Idx);
 }
 
-MDNode *MetadataLoader::getMDNodeFwdRefOrNull(unsigned Idx) {
-  return Pimpl->getMDNodeFwdRefOrNull(Idx);
-}
-
 DISubprogram *MetadataLoader::lookupSubprogramForFunction(Function *F) {
   return Pimpl->lookupSubprogramForFunction(F);
 }
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
index f23dcc06cc94..07a77a086f32 100644
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -65,9 +65,7 @@ public:
   /// necessary.
   Metadata *getMetadataFwdRefOrLoad(unsigned Idx);
 
-  MDNode *getMDNodeFwdRefOrNull(unsigned Idx);
-
-  /// Return the DISubprogra metadata for a Function if any, null otherwise.
+  /// Return the DISubprogram metadata for a Function if any, null otherwise.
   DISubprogram *lookupSubprogramForFunction(Function *F);
 
   /// Parse a `METADATA_ATTACHMENT` block for a function.
diff --git a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
index 1ab22b5cc3d1..b3945a37408f 100644
--- a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -144,7 +144,7 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
 void BitcodeReaderValueList::resolveConstantForwardRefs() {
   // Sort the values by-pointer so that they are efficient to look up with a
   // binary search.
-  llvm::sort(ResolveConstants.begin(), ResolveConstants.end());
+  llvm::sort(ResolveConstants);
 
   SmallVector<Constant *, 64> NewOps;
 
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 87b47dc354b5..ba4f932e2e6d 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -112,6 +112,8 @@ enum {
 
   // FUNCTION_BLOCK abbrev id's.
   FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  FUNCTION_INST_UNOP_ABBREV,
+  FUNCTION_INST_UNOP_FLAGS_ABBREV,
   FUNCTION_INST_BINOP_ABBREV,
   FUNCTION_INST_BINOP_FLAGS_ABBREV,
   FUNCTION_INST_CAST_ABBREV,
@@ -513,6 +515,13 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) {
   }
 }
 
+static unsigned getEncodedUnaryOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown binary instruction!");
+  case Instruction::FNeg: return bitc::UNOP_NEG;
+  }
+}
+
 static unsigned getEncodedBinaryOpcode(unsigned Opcode) {
   switch (Opcode) {
   default: llvm_unreachable("Unknown binary instruction!");
@@ -690,6 +699,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_SANITIZE_THREAD;
   case Attribute::SanitizeMemory:
     return bitc::ATTR_KIND_SANITIZE_MEMORY;
+  case Attribute::SpeculativeLoadHardening:
+    return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING;
   case Attribute::SwiftError:
     return bitc::ATTR_KIND_SWIFT_ERROR;
   case Attribute::SwiftSelf:
@@ -969,6 +980,7 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
   RawFlags |= (Flags.ReadOnly << 1);
   RawFlags |= (Flags.NoRecurse << 2);
   RawFlags |= (Flags.ReturnDoesNotAlias << 3);
+  RawFlags |= (Flags.NoInline << 4);
   return RawFlags;
 }
 
@@ -988,6 +1000,11 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
   return RawFlags;
 }
 
+static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) {
+  uint64_t RawFlags = Flags.ReadOnly;
+  return RawFlags;
+}
+
 static unsigned getEncodedVisibility(const GlobalValue &GV) {
   switch (GV.getVisibility()) {
   case GlobalValue::DefaultVisibility:   return 0;
@@ -1264,7 +1281,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     // FUNCTION:  [strtab offset, strtab size, type, callingconv, isproto,
     //             linkage, paramattrs, alignment, section, visibility, gc,
     //             unnamed_addr, prologuedata, dllstorageclass, comdat,
-    //             prefixdata, personalityfn, DSO_Local]
+    //             prefixdata, personalityfn, DSO_Local, addrspace]
     Vals.push_back(addToStrtab(F.getName()));
     Vals.push_back(F.getName().size());
     Vals.push_back(VE.getTypeID(F.getFunctionType()));
@@ -1287,6 +1304,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
         F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0);
 
     Vals.push_back(F.isDSOLocal());
+    Vals.push_back(F.getAddressSpace());
+
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
     Vals.clear();
@@ -1399,6 +1418,7 @@ unsigned ModuleBitcodeWriter::createDILocationAbbrev() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   return Stream.EmitAbbrev(std::move(Abbv));
 }
 
@@ -1413,6 +1433,7 @@ void ModuleBitcodeWriter::writeDILocation(const DILocation *N,
   Record.push_back(N->getColumn());
   Record.push_back(VE.getMetadataID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
+  Record.push_back(N->isImplicitCode());
 
   Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev);
   Record.clear();
@@ -1486,6 +1507,7 @@ void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N,
   Record.push_back(N->getSizeInBits());
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getEncoding());
+  Record.push_back(N->getFlags());
 
   Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev);
   Record.clear();
@@ -1602,7 +1624,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
   Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
   Record.push_back(N->getSplitDebugInlining());
   Record.push_back(N->getDebugInfoForProfiling());
-  Record.push_back(N->getGnuPubnames());
+  Record.push_back((unsigned)N->getNameTableKind());
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
@@ -1611,22 +1633,20 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
 void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
                                             SmallVectorImpl<uint64_t> &Record,
                                             unsigned Abbrev) {
-  uint64_t HasUnitFlag = 1 << 1;
-  Record.push_back(N->isDistinct() | HasUnitFlag);
+  const uint64_t HasUnitFlag = 1 << 1;
+  const uint64_t HasSPFlagsFlag = 1 << 2;
+  Record.push_back(uint64_t(N->isDistinct()) | HasUnitFlag | HasSPFlagsFlag);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(N->getLine());
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
-  Record.push_back(N->isLocalToUnit());
-  Record.push_back(N->isDefinition());
   Record.push_back(N->getScopeLine());
   Record.push_back(VE.getMetadataOrNullID(N->getContainingType()));
-  Record.push_back(N->getVirtuality());
+  Record.push_back(N->getSPFlags());
   Record.push_back(N->getVirtualIndex());
   Record.push_back(N->getFlags());
-  Record.push_back(N->isOptimized());
   Record.push_back(VE.getMetadataOrNullID(N->getRawUnit()));
   Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
@@ -1738,7 +1758,7 @@ void ModuleBitcodeWriter::writeDITemplateValueParameter(
 void ModuleBitcodeWriter::writeDIGlobalVariable(
     const DIGlobalVariable *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
-  const uint64_t Version = 1 << 1;
+  const uint64_t Version = 2 << 1;
   Record.push_back((uint64_t)N->isDistinct() | Version);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
@@ -1748,8 +1768,8 @@ void ModuleBitcodeWriter::writeDIGlobalVariable(
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->isLocalToUnit());
   Record.push_back(N->isDefinition());
-  Record.push_back(/* expr */ 0);
   Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
   Record.push_back(N->getAlignInBits());
 
   Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
@@ -2376,6 +2396,16 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
             Record.push_back(Flags);
         }
         break;
+      case Instruction::FNeg: {
+        assert(CE->getNumOperands() == 1 && "Unknown constant expr!");
+        Code = bitc::CST_CODE_CE_UNOP;
+        Record.push_back(getEncodedUnaryOpcode(CE->getOpcode()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        uint64_t Flags = getOptimizationFlags(CE);
+        if (Flags != 0)
+          Record.push_back(Flags);
+        break;
+      }
       case Instruction::GetElementPtr: {
         Code = bitc::CST_CODE_CE_GEP;
         const auto *GO = cast<GEPOperator>(C);
@@ -2548,7 +2578,19 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
       }
     }
     break;
-
+  case Instruction::FNeg: {
+    Code = bitc::FUNC_CODE_INST_UNOP;
+    if (!pushValueAndType(I.getOperand(0), InstID, Vals))
+      AbbrevToUse = FUNCTION_INST_UNOP_ABBREV;
+    Vals.push_back(getEncodedUnaryOpcode(I.getOpcode()));
+    uint64_t Flags = getOptimizationFlags(&I);
+    if (Flags != 0) {
+      if (AbbrevToUse == FUNCTION_INST_UNOP_ABBREV)
+        AbbrevToUse = FUNCTION_INST_UNOP_FLAGS_ABBREV;
+      Vals.push_back(Flags);
+    }
+    break;
+  }
   case Instruction::GetElementPtr: {
     Code = bitc::FUNC_CODE_INST_GEP;
     AbbrevToUse = FUNCTION_INST_GEP_ABBREV;
@@ -3088,6 +3130,7 @@ void ModuleBitcodeWriter::writeFunction(
       Vals.push_back(DL->getColumn());
       Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
       Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+      Vals.push_back(DL->isImplicitCode());
       Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
       Vals.clear();
 
@@ -3208,6 +3251,25 @@ void ModuleBitcodeWriter::writeBlockInfo() {
         FUNCTION_INST_LOAD_ABBREV)
       llvm_unreachable("Unexpected abbrev ordering!");
   }
+  { // INST_UNOP abbrev for FUNCTION_BLOCK.
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+        FUNCTION_INST_UNOP_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_UNOP_FLAGS abbrev for FUNCTION_BLOCK.
+    auto Abbv = std::make_shared<BitCodeAbbrev>();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
+        FUNCTION_INST_UNOP_FLAGS_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
   { // INST_BINOP abbrev for FUNCTION_BLOCK.
     auto Abbv = std::make_shared<BitCodeAbbrev>();
     Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
@@ -3353,14 +3415,10 @@ void IndexBitcodeWriter::writeModStrings() {
 
 /// Write the function type metadata related records that need to appear before
 /// a function summary entry (whether per-module or combined).
-static void writeFunctionTypeMetadataRecords(
-    BitstreamWriter &Stream, FunctionSummary *FS,
-    std::set<GlobalValue::GUID> &ReferencedTypeIds) {
-  if (!FS->type_tests().empty()) {
+static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
+                                             FunctionSummary *FS) {
+  if (!FS->type_tests().empty())
     Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
-    for (auto &TT : FS->type_tests())
-      ReferencedTypeIds.insert(TT);
-  }
 
   SmallVector<uint64_t, 64> Record;
 
@@ -3372,7 +3430,6 @@ static void writeFunctionTypeMetadataRecords(
     for (auto &VF : VFs) {
       Record.push_back(VF.GUID);
       Record.push_back(VF.Offset);
-      ReferencedTypeIds.insert(VF.GUID);
     }
     Stream.EmitRecord(Ty, Record);
   };
@@ -3387,7 +3444,6 @@ static void writeFunctionTypeMetadataRecords(
     for (auto &VC : VCs) {
       Record.clear();
       Record.push_back(VC.VFunc.GUID);
-      ReferencedTypeIds.insert(VC.VFunc.GUID);
       Record.push_back(VC.VFunc.Offset);
       Record.insert(Record.end(), VC.Args.begin(), VC.Args.end());
       Stream.EmitRecord(Ty, Record);
@@ -3400,6 +3456,33 @@ static void writeFunctionTypeMetadataRecords(
                      FS->type_checked_load_const_vcalls());
 }
 
+/// Collect type IDs from type tests used by function.
+static void
+getReferencedTypeIds(FunctionSummary *FS,
+                     std::set<GlobalValue::GUID> &ReferencedTypeIds) {
+  if (!FS->type_tests().empty())
+    for (auto &TT : FS->type_tests())
+      ReferencedTypeIds.insert(TT);
+
+  auto GetReferencedTypesFromVFuncIdVec =
+      [&](ArrayRef<FunctionSummary::VFuncId> VFs) {
+        for (auto &VF : VFs)
+          ReferencedTypeIds.insert(VF.GUID);
+      };
+
+  GetReferencedTypesFromVFuncIdVec(FS->type_test_assume_vcalls());
+  GetReferencedTypesFromVFuncIdVec(FS->type_checked_load_vcalls());
+
+  auto GetReferencedTypesFromConstVCallVec =
+      [&](ArrayRef<FunctionSummary::ConstVCall> VCs) {
+        for (auto &VC : VCs)
+          ReferencedTypeIds.insert(VC.VFunc.GUID);
+      };
+
+  GetReferencedTypesFromConstVCallVec(FS->type_test_assume_const_vcalls());
+  GetReferencedTypesFromConstVCallVec(FS->type_checked_load_const_vcalls());
+}
+
 static void writeWholeProgramDevirtResolutionByArg(
     SmallVector<uint64_t, 64> &NameVals, const std::vector<uint64_t> &args,
     const WholeProgramDevirtResolution::ByArg &ByArg) {
@@ -3453,13 +3536,13 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
   NameVals.push_back(ValueID);
 
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
-  std::set<GlobalValue::GUID> ReferencedTypeIds;
-  writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds);
+  writeFunctionTypeMetadataRecords(Stream, FS);
 
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
   NameVals.push_back(FS->instCount());
   NameVals.push_back(getEncodedFFlags(FS->fflags()));
   NameVals.push_back(FS->refs().size());
+  NameVals.push_back(FS->immutableRefCount());
 
   for (auto &RI : FS->refs())
     NameVals.push_back(VE.getValueID(RI.getValue()));
@@ -3501,6 +3584,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
   NameVals.push_back(VE.getValueID(&V));
   GlobalVarSummary *VS = cast<GlobalVarSummary>(Summary);
   NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
+  NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
 
   unsigned SizeBeforeRefs = NameVals.size();
   for (auto &RI : VS->refs())
@@ -3517,7 +3601,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
 // Current version for the summary.
 // This is bumped whenever we introduce changes in the way some record are
 // interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 4;
+static const uint64_t INDEX_VERSION = 6;
 
 /// Emit the per-module summary section alongside the rest of
 /// the module's bitcode.
@@ -3534,6 +3618,13 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
 
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
+  // Write the index flags.
+  uint64_t Flags = 0;
+  // Bits 1-3 are set only in the combined index, skip them.
+  if (Index->enableSplitLTOUnit())
+    Flags |= 0x8;
+  Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
+
   if (Index->begin() == Index->end()) {
     Stream.ExitBlock();
     return;
@@ -3552,6 +3643,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
   // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3568,6 +3660,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
   // numrefs x valueid, n x (valueid [, rel_block_freq])
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3646,6 +3739,12 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     Flags |= 0x1;
   if (Index.skipModuleByDistributedBackend())
     Flags |= 0x2;
+  if (Index.hasSyntheticEntryCounts())
+    Flags |= 0x4;
+  if (Index.enableSplitLTOUnit())
+    Flags |= 0x8;
+  if (Index.partiallySplitLTOUnits())
+    Flags |= 0x10;
   Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
 
   for (const auto &GVI : valueIds()) {
@@ -3661,7 +3760,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // entrycount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
   // numrefs x valueid, n x (valueid)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3676,6 +3777,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
   // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3748,6 +3850,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
       NameVals.push_back(*ValueId);
       NameVals.push_back(Index.getModuleId(VS->modulePath()));
       NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
+      NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
       for (auto &RI : VS->refs()) {
         auto RefValueId = getValueId(RI.getGUID());
         if (!RefValueId)
@@ -3764,25 +3867,32 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     }
 
     auto *FS = cast<FunctionSummary>(S);
-    writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds);
+    writeFunctionTypeMetadataRecords(Stream, FS);
+    getReferencedTypeIds(FS, ReferencedTypeIds);
 
     NameVals.push_back(*ValueId);
     NameVals.push_back(Index.getModuleId(FS->modulePath()));
     NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
     NameVals.push_back(FS->instCount());
     NameVals.push_back(getEncodedFFlags(FS->fflags()));
+    NameVals.push_back(FS->entryCount());
+
     // Fill in below
-    NameVals.push_back(0);
+    NameVals.push_back(0); // numrefs
+    NameVals.push_back(0); // immutablerefcnt
 
-    unsigned Count = 0;
+    unsigned Count = 0, ImmutableRefCnt = 0;
     for (auto &RI : FS->refs()) {
       auto RefValueId = getValueId(RI.getGUID());
       if (!RefValueId)
         continue;
       NameVals.push_back(*RefValueId);
+      if (RI.isReadOnly())
+        ImmutableRefCnt++;
       Count++;
     }
-    NameVals[5] = Count;
+    NameVals[6] = Count;
+    NameVals[7] = ImmutableRefCnt;
 
     bool HasProfileData = false;
     for (auto &EI : FS->calls()) {
@@ -3851,6 +3961,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     Stream.EmitRecord(bitc::FS_COMBINED_ALIAS, NameVals, FSAliasAbbrev);
     NameVals.clear();
     MaybeEmitOriginalName(*AS);
+
+    if (auto *FS = dyn_cast<FunctionSummary>(&AS->getAliasee()))
+      getReferencedTypeIds(FS, ReferencedTypeIds);
   }
 
   if (!Index.cfiFunctionDefs().empty()) {
@@ -3871,12 +3984,13 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     NameVals.clear();
   }
 
-  if (!Index.typeIds().empty()) {
-    for (auto &S : Index.typeIds()) {
-      // Skip if not referenced in any GV summary within this index file.
-      if (!ReferencedTypeIds.count(GlobalValue::getGUID(S.first)))
-        continue;
-      writeTypeIdSummaryRecord(NameVals, StrtabBuilder, S.first, S.second);
+  // Walk the GUIDs that were referenced, and write the
+  // corresponding type id records.
+  for (auto &T : ReferencedTypeIds) {
+    auto TidIter = Index.typeIds().equal_range(T);
+    for (auto It = TidIter.first; It != TidIter.second; ++It) {
+      writeTypeIdSummaryRecord(NameVals, StrtabBuilder, It->second.first,
+                               It->second.second);
       Stream.EmitRecord(bitc::FS_TYPE_ID, NameVals);
       NameVals.clear();
     }
@@ -3926,7 +4040,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
 
     if (ModHash)
       // Save the written hash value.
-      std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash));
+      llvm::copy(Vals, std::begin(*ModHash));
   }
 }
 
diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index d473741e8ceb..deb04f1bb36c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -184,7 +184,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
     return;
 
   bool IsGlobalValue = OM.isGlobalValue(ID);
-  llvm::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) {
+  llvm::sort(List, [&](const Entry &L, const Entry &R) {
     const Use *LU = L.first;
     const Use *RU = R.first;
     if (LU == RU)
@@ -745,7 +745,7 @@ void ValueEnumerator::organizeMetadata() {
   // and then sort by the original/current ID.  Since the IDs are guaranteed to
   // be unique, the result of std::sort will be deterministic.  There's no need
   // for std::stable_sort.
-  llvm::sort(Order.begin(), Order.end(), [this](MDIndex LHS, MDIndex RHS) {
+  llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
     return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
            std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID);
   });
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 79f11def38f7..797f05ee5cf3 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -471,7 +471,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
 bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
   const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
-  const TerminatorInst *Term = ExitBB->getTerminator();
+  const Instruction *Term = ExitBB->getTerminator();
   const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
 
   // The block must end in a return statement or unreachable.
@@ -496,6 +496,10 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
       // Debug info intrinsics do not get in the way of tail call optimization.
       if (isa<DbgInfoIntrinsic>(BBI))
         continue;
+      // A lifetime end intrinsic should not stop tail call optimization.
+      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+        if (II->getIntrinsicID() == Intrinsic::lifetime_end)
+          continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
           !isSafeToSpeculativelyExecute(&*BBI))
         return false;
@@ -519,10 +523,12 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
   AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
                           AttributeList::ReturnIndex);
 
-  // Noalias is completely benign as far as calling convention goes, it
-  // shouldn't affect whether the call is a tail call.
+  // NoAlias and NonNull are completely benign as far as calling convention
+  // goes, they shouldn't affect whether the call is a tail call.
   CallerAttrs.removeAttribute(Attribute::NoAlias);
   CalleeAttrs.removeAttribute(Attribute::NoAlias);
+  CallerAttrs.removeAttribute(Attribute::NonNull);
+  CalleeAttrs.removeAttribute(Attribute::NonNull);
 
   if (CallerAttrs.contains(Attribute::ZExt)) {
     if (!CalleeAttrs.contains(Attribute::ZExt))
@@ -540,6 +546,21 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
     CalleeAttrs.removeAttribute(Attribute::SExt);
   }
 
+  // Drop sext and zext return attributes if the result is not used.
+  // This enables tail calls for code like:
+  //
+  // define void @caller() {
+  // entry:
+  //   %unused_result = tail call zeroext i1 @callee()
+  //   br label %retlabel
+  // retlabel:
+  //   ret void
+  // }
+  if (I->use_empty()) {
+    CalleeAttrs.removeAttribute(Attribute::SExt);
+    CalleeAttrs.removeAttribute(Attribute::ZExt);
+  }
+
   // If they're still different, there's some facet we don't understand
   // (currently only "inreg", but in future who knows). It may be OK but the
   // only safe option is to reject the tail call.
@@ -650,7 +671,7 @@ static void collectEHScopeMembers(
 
     // Returns are boundaries where scope transfer can occur, don't follow
     // successors.
-    if (Visiting->isReturnBlock())
+    if (Visiting->isEHScopeReturnBlock())
       continue;
 
     for (const MachineBasicBlock *Succ : Visiting->successors())
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 20b0b8d3feab..95875ccb8a0b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
@@ -553,19 +554,31 @@ void llvm::emitDWARF5AccelTable(
     AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
     const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
   std::vector<MCSymbol *> CompUnits;
+  SmallVector<unsigned, 1> CUIndex(CUs.size());
+  int Count = 0;
   for (const auto &CU : enumerate(CUs)) {
+    if (CU.value()->getCUNode()->getNameTableKind() ==
+        DICompileUnit::DebugNameTableKind::None)
+      continue;
+    CUIndex[CU.index()] = Count++;
     assert(CU.index() == CU.value()->getUniqueID());
     const DwarfCompileUnit *MainCU =
         DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
     CompUnits.push_back(MainCU->getLabelBegin());
   }
 
+  if (CompUnits.empty())
+    return;
+
+  Asm->OutStreamer->SwitchSection(
+      Asm->getObjFileLowering().getDwarfDebugNamesSection());
+
   Contents.finalize(Asm, "names");
   Dwarf5AccelTableWriter<DWARF5AccelTableData>(
       Asm, Contents, CompUnits,
-      [&DD](const DWARF5AccelTableData &Entry) {
+      [&](const DWARF5AccelTableData &Entry) {
         const DIE *CUDie = Entry.getDie().getUnitDie();
-        return DD.lookupCU(CUDie)->getUniqueID();
+        return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
       })
       .emit();
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index c8305ad9c547..042243b79259 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -27,29 +27,35 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
 
 void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
   static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
-  Asm.OutStreamer->SwitchSection(Section);
-
   uint64_t Length = sizeof(uint16_t) // version
                   + sizeof(uint8_t)  // address_size
                   + sizeof(uint8_t)  // segment_selector_size
                   + AddrSize * Pool.size(); // entries
+  Asm.OutStreamer->AddComment("Length of contribution");
   Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+  Asm.OutStreamer->AddComment("DWARF version number");
   Asm.emitInt16(Asm.getDwarfVersion());
+  Asm.OutStreamer->AddComment("Address size");
   Asm.emitInt8(AddrSize);
+  Asm.OutStreamer->AddComment("Segment selector size");
   Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
 }
 
 // Emit addresses into the section given.
 void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
-  if (Asm.getDwarfVersion() >= 5)
-    emitHeader(Asm, AddrSection);
-
-  if (Pool.empty())
+  if (isEmpty())
     return;
 
   // Start the dwarf addr section.
   Asm.OutStreamer->SwitchSection(AddrSection);
 
+  if (Asm.getDwarfVersion() >= 5)
+    emitHeader(Asm, AddrSection);
+
+  // Define the symbol that marks the start of the contribution.
+  // It is referenced via DW_AT_addr_base.
+  Asm.OutStreamer->EmitLabel(AddressTableBaseSym);
+
   // Order the address pool entries by ID
   SmallVector<const MCExpr *, 64> Entries(Pool.size());
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index d5008fab5563..2209c7eb50ed 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -51,8 +51,14 @@ public:
 
   void resetUsedFlag() { HasBeenUsed = false; }
 
+  MCSymbol *getLabel() { return AddressTableBaseSym; }
+  void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
+
 private:
   void emitHeader(AsmPrinter &Asm, MCSection *Section);
+
+  /// Symbol designates the start of the contribution to the address table.
+  MCSymbol *AddressTableBaseSym = nullptr;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9bbc77b3056b..7070451e3330 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "AsmPrinterHandler.h"
 #include "CodeViewDebug.h"
 #include "DwarfDebug.h"
 #include "DwarfException.h"
+#include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
 #include "llvm/ADT/APFloat.h"
@@ -32,8 +32,10 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
@@ -52,6 +54,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -260,7 +263,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   // use the directive, where it would need the same conditionalization
   // anyway.
   const Triple &Target = TM.getTargetTriple();
-  OutStreamer->EmitVersionForTarget(Target);
+  OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion());
 
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -355,7 +358,7 @@ bool AsmPrinter::doInitialization(Module &M) {
     }
     break;
   case ExceptionHandling::Wasm:
-    // TODO to prevent warning
+    ES = new WasmException(this);
     break;
   }
   if (ES)
@@ -363,7 +366,7 @@ bool AsmPrinter::doInitialization(Module &M) {
                                    DWARFGroupName, DWARFGroupDescription));
 
   if (mdconst::extract_or_null<ConstantInt>(
-          MMI->getModule()->getModuleFlag("cfguard")))
+          MMI->getModule()->getModuleFlag("cfguardtable")))
     Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
                                    CFGuardDescription, DWARFGroupName,
                                    DWARFGroupDescription));
@@ -627,8 +630,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 ///
 /// \p Value - The value to emit.
 /// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
-                                      unsigned Size) const {
+void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
   OutStreamer->EmitValue(Value, Size);
 }
 
@@ -749,18 +751,30 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
   const MachineFrameInfo &MFI = MF->getFrameInfo();
   bool Commented = false;
 
+  auto getSize =
+      [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
+        unsigned Size = 0;
+        for (auto A : Accesses)
+          if (MFI.isSpillSlotObjectIndex(
+                  cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+                      ->getFrameIndex()))
+            Size += A->getSize();
+        return Size;
+      };
+
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
+  SmallVector<const MachineMemOperand *, 2> Accesses;
   if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
     if (MFI.isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Reload";
       Commented = true;
     }
-  } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
-    if (MFI.isSpillSlotObjectIndex(FI)) {
-      CommentOS << MMO->getSize() << "-byte Folded Reload";
+  } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
+    if (auto Size = getSize(Accesses)) {
+      CommentOS << Size << "-byte Folded Reload";
       Commented = true;
     }
   } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
@@ -769,9 +783,9 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
       CommentOS << MMO->getSize() << "-byte Spill";
       Commented = true;
     }
-  } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
-    if (MFI.isSpillSlotObjectIndex(FI)) {
-      CommentOS << MMO->getSize() << "-byte Folded Spill";
+  } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
+    if (auto Size = getSize(Accesses)) {
+      CommentOS << Size << "-byte Folded Spill";
       Commented = true;
     }
   }
@@ -1066,6 +1080,10 @@ void AsmPrinter::EmitFunctionBody() {
         ++NumInstsInFunction;
       }
 
+      // If there is a pre-instruction symbol, emit a label for it here.
+      if (MCSymbol *S = MI.getPreInstrSymbol())
+        OutStreamer->EmitLabel(S);
+
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
           NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1117,6 +1135,10 @@ void AsmPrinter::EmitFunctionBody() {
         break;
       }
 
+      // If there is a post-instruction symbol, emit a label for it here.
+      if (MCSymbol *S = MI.getPostInstrSymbol())
+        OutStreamer->EmitLabel(S);
+
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
           NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -1394,6 +1416,33 @@ bool AsmPrinter::doFinalization(Module &M) {
     }
   }
 
+  if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+    MachineModuleInfoCOFF &MMICOFF =
+        MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoCOFF::SymbolListTy Stubs = MMICOFF.GetGVStubList();
+    if (!Stubs.empty()) {
+      const DataLayout &DL = M.getDataLayout();
+
+      for (const auto &Stub : Stubs) {
+        SmallString<256> SectionName = StringRef(".rdata$");
+        SectionName += Stub.first->getName();
+        OutStreamer->SwitchSection(OutContext.getCOFFSection(
+            SectionName,
+            COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+                COFF::IMAGE_SCN_LNK_COMDAT,
+            SectionKind::getReadOnly(), Stub.first->getName(),
+            COFF::IMAGE_COMDAT_SELECT_ANY));
+        EmitAlignment(Log2_32(DL.getPointerSize()));
+        OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global);
+        OutStreamer->EmitLabel(Stub.first);
+        OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+                                     DL.getPointerSize());
+      }
+    }
+  }
+
   // Finalize debug and EH information.
   for (const HandlerInfo &HI : Handlers) {
     NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -1450,6 +1499,9 @@ bool AsmPrinter::doFinalization(Module &M) {
   // Emit llvm.ident metadata in an '.ident' directive.
   EmitModuleIdents(M);
 
+  // Emit bytes for llvm.commandline metadata.
+  EmitModuleCommandLines(M);
+
   // Emit __morestack address if needed for indirect calls.
   if (MMI->usesMorestackAddr()) {
     unsigned Align = 1;
@@ -1534,7 +1586,8 @@ bool AsmPrinter::doFinalization(Module &M) {
     // Emit address-significance attributes for all globals.
     OutStreamer->EmitAddrsig();
     for (const GlobalValue &GV : M.global_values())
-      if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") &&
+      if (!GV.use_empty() && !GV.isThreadLocal() &&
+          !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") &&
           !GV.hasAtLeastLocalUnnamedAddr())
         OutStreamer->EmitAddrsigSym(getSymbol(&GV));
   }
@@ -1958,6 +2011,29 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
   }
 }
 
+void AsmPrinter::EmitModuleCommandLines(Module &M) {
+  MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines();
+  if (!CommandLine)
+    return;
+
+  const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline");
+  if (!NMD || !NMD->getNumOperands())
+    return;
+
+  OutStreamer->PushSection();
+  OutStreamer->SwitchSection(CommandLine);
+  OutStreamer->EmitZeros(1);
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    const MDNode *N = NMD->getOperand(i);
+    assert(N->getNumOperands() == 1 &&
+           "llvm.commandline metadata entry can have only one operand");
+    const MDString *S = cast<MDString>(N->getOperand(0));
+    OutStreamer->EmitBytes(S->getString());
+    OutStreamer->EmitZeros(1);
+  }
+  OutStreamer->PopSection();
+}
+
 //===--------------------------------------------------------------------===//
 // Emission and print routines
 //
@@ -2927,11 +3003,6 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
   if (!S.usesMetadata())
     return nullptr;
 
-  assert(!S.useStatepoints() && "statepoints do not currently support custom"
-         " stackmap formats, please see the documentation for a description of"
-         " the default format.  If you really need a custom serialized format,"
-         " please file a bug");
-
   gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
   gcp_map_type::iterator GCPI = GCMap.find(&S);
   if (GCPI != GCMap.end())
@@ -2952,6 +3023,27 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
   report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
 }
 
+void AsmPrinter::emitStackMaps(StackMaps &SM) {
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  bool NeedsDefault = false;
+  if (MI->begin() == MI->end())
+    // No GC strategy, use the default format.
+    NeedsDefault = true;
+  else
+    for (auto &I : *MI) {
+      if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+        if (MP->emitStackMaps(SM, *this))
+          continue;
+      // The strategy doesn't have printer or doesn't emit custom stack maps.
+      // Use the default format.
+      NeedsDefault = true;
+    }
+
+  if (NeedsDefault)
+    SM.serializeToStackMapSection();
+}
+
 /// Pin vtable to this file.
 AsmPrinterHandler::~AsmPrinterHandler() = default;
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 605588470670..afce3ad3133b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -212,6 +212,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpWindowSave:
     OutStreamer->EmitCFIWindowSave();
     break;
+  case MCCFIInstruction::OpNegateRAState:
+    OutStreamer->EmitCFINegateRAState();
+    break;
   case MCCFIInstruction::OpSameValue:
     OutStreamer->EmitCFISameValue(Inst.getRegister());
     break;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4159eb19423a..62103e3107c0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -71,6 +71,42 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
   DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
 }
 
+unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
+                                            const MDNode *LocMDNode) const {
+  if (!DiagInfo) {
+    DiagInfo = make_unique<SrcMgrDiagInfo>();
+
+    MCContext &Context = MMI->getContext();
+    Context.setInlineSourceManager(&DiagInfo->SrcMgr);
+
+    LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+    if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
+      DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+      DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+      DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
+    }
+  }
+
+  SourceMgr &SrcMgr = DiagInfo->SrcMgr;
+
+  std::unique_ptr<MemoryBuffer> Buffer;
+  // The inline asm source manager will outlive AsmStr, so make a copy of the
+  // string for SourceMgr to own.
+  Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>");
+
+  // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+  unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+  // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+  if (LocMDNode) {
+    DiagInfo->LocInfos.resize(BufNum);
+    DiagInfo->LocInfos[BufNum - 1] = LocMDNode;
+  }
+
+  return BufNum;
+}
+
+
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
 void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
                                const MCTargetOptions &MCOptions,
@@ -98,39 +134,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
     return;
   }
 
-  if (!DiagInfo) {
-    DiagInfo = make_unique<SrcMgrDiagInfo>();
+  unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
+  DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
 
-    MCContext &Context = MMI->getContext();
-    Context.setInlineSourceManager(&DiagInfo->SrcMgr);
-
-    LLVMContext &LLVMCtx = MMI->getModule()->getContext();
-    if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
-      DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
-      DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
-      DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
-    }
-  }
-
-  SourceMgr &SrcMgr = DiagInfo->SrcMgr;
-  SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
-
-  std::unique_ptr<MemoryBuffer> Buffer;
-  // The inline asm source manager will outlive Str, so make a copy of the
-  // string for SourceMgr to own.
-  Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
-
-  // Tell SrcMgr about this buffer, it takes ownership of the buffer.
-  unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
-
-  // Store LocMDNode in DiagInfo, using BufNum as an identifier.
-  if (LocMDNode) {
-    DiagInfo->LocInfos.resize(BufNum);
-    DiagInfo->LocInfos[BufNum-1] = LocMDNode;
-  }
-
-  std::unique_ptr<MCAsmParser> Parser(
-      createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
+  std::unique_ptr<MCAsmParser> Parser(createMCAsmParser(
+          DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
 
   // Do not use assembler-level information for parsing inline assembly.
   OutStreamer->setUseAssemblerInfoForParsing(false);
@@ -148,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
   Parser->setAssemblerDialect(Dialect);
   Parser->setTargetParser(*TAP.get());
   Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
+  // Enable lexing Masm binary and hex integer literals in intel inline
+  // assembly.
   if (Dialect == InlineAsm::AD_Intel)
-    // We need this flag to be able to parse numbers like "0bH"
-    Parser->setParsingInlineAsm(true);
+    Parser->getLexer().setLexMasmIntegers(true);
   if (MF) {
     const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
     TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
@@ -519,6 +528,44 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   MCOptions.SanitizeAddress =
       MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
 
+  // Emit warnings if we use reserved registers on the clobber list, as
+  // that might give surprising results.
+  std::vector<std::string> RestrRegs;
+  // Start with the first operand descriptor, and iterate over them.
+  for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
+       I < NumOps; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    if (MO.isImm()) {
+      unsigned Flags = MO.getImm();
+      const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+      if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber &&
+          !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) {
+        RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg()));
+      }
+      // Skip to one before the next operand descriptor, if it exists.
+      I += InlineAsm::getNumOperandRegisters(Flags);
+    }
+  }
+
+  if (!RestrRegs.empty()) {
+    unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD);
+    auto &SrcMgr = DiagInfo->SrcMgr;
+    SMLoc Loc = SMLoc::getFromPointer(
+        SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin());
+
+    std::string Msg = "inline asm clobber list contains reserved registers: ";
+    for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) {
+      if(I != RestrRegs.begin())
+        Msg += ", ";
+      Msg += *I;
+    }
+    std::string Note = "Reserved registers on the clobber list may not be "
+                "preserved across the asm statement, and clobbering them may "
+                "lead to undefined behaviour.";
+    SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
+    SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
+  }
+
   EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
                 MI->getInlineAsmDialect());
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8c5c5478d01a..8cabad4ad312 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -31,6 +31,7 @@
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -43,6 +44,7 @@
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
@@ -72,6 +74,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -90,8 +93,20 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
-static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section",
-                                           cl::ReallyHidden, cl::init(false));
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+  switch (Type) {
+  case Triple::ArchType::x86:
+    return CPUType::Pentium3;
+  case Triple::ArchType::x86_64:
+    return CPUType::X64;
+  case Triple::ArchType::thumb:
+    return CPUType::Thumb;
+  case Triple::ArchType::aarch64:
+    return CPUType::ARM64;
+  default:
+    report_fatal_error("target architecture doesn't map to a CodeView CPUType");
+  }
+}
 
 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
     : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
@@ -100,11 +115,21 @@ CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
       !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
     Asm = nullptr;
+    MMI->setDebugInfoAvailability(false);
     return;
   }
-
   // Tell MMI that we have debug info.
   MMI->setDebugInfoAvailability(true);
+
+  TheCPU =
+      mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
+
+  collectGlobalVariableInfo();
+
+  // Check if we should emit type record hashes.
+  ConstantInt *GH = mdconst::extract_or_null<ConstantInt>(
+      MMI->getModule()->getModuleFlag("CodeViewGHash"));
+  EmitDebugGlobalHashes = GH && !GH->isZero();
 }
 
 StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
@@ -116,7 +141,9 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
 
   // If this is a Unix-style path, just use it as is. Don't try to canonicalize
   // it textually because one of the path components could be a symlink.
-  if (!Dir.empty() && Dir[0] == '/') {
+  if (Dir.startswith("/") || Filename.startswith("/")) {
+    if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix))
+      return Filename;
     Filepath = Dir;
     if (Dir.back() != '/')
       Filepath += '/';
@@ -337,6 +364,36 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
   return recordTypeIndexForDINode(SP, TI);
 }
 
+static bool isTrivial(const DICompositeType *DCTy) {
+  return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial);
+}
+
+static FunctionOptions
+getFunctionOptions(const DISubroutineType *Ty,
+                   const DICompositeType *ClassTy = nullptr,
+                   StringRef SPName = StringRef("")) {
+  FunctionOptions FO = FunctionOptions::None;
+  const DIType *ReturnTy = nullptr;
+  if (auto TypeArray = Ty->getTypeArray()) {
+    if (TypeArray.size())
+      ReturnTy = TypeArray[0].resolve();
+  }
+
+  if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
+    if (!isTrivial(ReturnDCTy))
+      FO |= FunctionOptions::CxxReturnUdt;
+  }
+
+  // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
+  if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) {
+    FO |= FunctionOptions::Constructor;
+
+  // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
+
+  }
+  return FO;
+}
+
 TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
                                                const DICompositeType *Class) {
   // Always use the method declaration as the key for the function type. The
@@ -356,8 +413,10 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
   // member function type.
   TypeLoweringScope S(*this);
   const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0;
+
+  FunctionOptions FO = getFunctionOptions(SP->getType(), Class, SP->getName());
   TypeIndex TI = lowerTypeMemberFunction(
-      SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod);
+      SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod, FO);
   return recordTypeIndexForDINode(SP, TI, Class);
 }
 
@@ -508,6 +567,11 @@ void CodeViewDebug::endModule() {
   OS.AddComment("String table");
   OS.EmitCVStringTableDirective();
 
+  // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol
+  // subsection in the generic .debug$S section at the end. There is no
+  // particular reason for this ordering other than to match MSVC.
+  emitBuildInfo();
+
   // Emit type information and hashes last, so that any types we translate while
   // emitting function info are included.
   emitTypeInformation();
@@ -669,30 +733,8 @@ static Version parseVersion(StringRef Name) {
   return V;
 }
 
-static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
-  switch (Type) {
-  case Triple::ArchType::x86:
-    return CPUType::Pentium3;
-  case Triple::ArchType::x86_64:
-    return CPUType::X64;
-  case Triple::ArchType::thumb:
-    return CPUType::Thumb;
-  case Triple::ArchType::aarch64:
-    return CPUType::ARM64;
-  default:
-    report_fatal_error("target architecture doesn't map to a CodeView CPUType");
-  }
-}
-
 void CodeViewDebug::emitCompilerInformation() {
-  MCContext &Context = MMI->getContext();
-  MCSymbol *CompilerBegin = Context.createTempSymbol(),
-           *CompilerEnd = Context.createTempSymbol();
-  OS.AddComment("Record length");
-  OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2);
-  OS.EmitLabel(CompilerBegin);
-  OS.AddComment("Record kind: S_COMPILE3");
-  OS.EmitIntValue(SymbolKind::S_COMPILE3, 2);
+  MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
   uint32_t Flags = 0;
 
   NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
@@ -707,9 +749,7 @@ void CodeViewDebug::emitCompilerInformation() {
   OS.EmitIntValue(Flags, 4);
 
   OS.AddComment("CPUType");
-  CPUType CPU =
-      mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
-  OS.EmitIntValue(static_cast<uint64_t>(CPU), 2);
+  OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2);
 
   StringRef CompilerVersion = CU->getProducer();
   Version FrontVer = parseVersion(CompilerVersion);
@@ -733,7 +773,48 @@ void CodeViewDebug::emitCompilerInformation() {
   OS.AddComment("Null-terminated compiler version string");
   emitNullTerminatedSymbolName(OS, CompilerVersion);
 
-  OS.EmitLabel(CompilerEnd);
+  endSymbolRecord(CompilerEnd);
+}
+
+static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable,
+                                    StringRef S) {
+  StringIdRecord SIR(TypeIndex(0x0), S);
+  return TypeTable.writeLeafType(SIR);
+}
+
+void CodeViewDebug::emitBuildInfo() {
+  // First, make LF_BUILDINFO. It's a sequence of strings with various bits of
+  // build info. The known prefix is:
+  // - Absolute path of current directory
+  // - Compiler path
+  // - Main source file path, relative to CWD or absolute
+  // - Type server PDB file
+  // - Canonical compiler command line
+  // If frontend and backend compilation are separated (think llc or LTO), it's
+  // not clear if the compiler path should refer to the executable for the
+  // frontend or the backend. Leave it blank for now.
+  TypeIndex BuildInfoArgs[BuildInfoRecord::MaxArgs] = {};
+  NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+  const MDNode *Node = *CUs->operands().begin(); // FIXME: Multiple CUs.
+  const auto *CU = cast<DICompileUnit>(Node);
+  const DIFile *MainSourceFile = CU->getFile();
+  BuildInfoArgs[BuildInfoRecord::CurrentDirectory] =
+      getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory());
+  BuildInfoArgs[BuildInfoRecord::SourceFile] =
+      getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename());
+  // FIXME: Path to compiler and command line. PDB is intentionally blank unless
+  // we implement /Zi type servers.
+  BuildInfoRecord BIR(BuildInfoArgs);
+  TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR);
+
+  // Make a new .debug$S subsection for the S_BUILDINFO record, which points
+  // from the module symbols into the type stream.
+  MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+  MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO);
+  OS.AddComment("LF_BUILDINFO index");
+  OS.EmitIntValue(BuildInfoIndex.getIndex(), 4);
+  endSymbolRecord(BIEnd);
+  endCVSubsection(BISubsecEnd);
 }
 
 void CodeViewDebug::emitInlineeLinesSubsection() {
@@ -773,18 +854,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
                                         const DILocation *InlinedAt,
                                         const InlineSite &Site) {
-  MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(),
-           *InlineEnd = MMI->getContext().createTempSymbol();
-
   assert(TypeIndices.count({Site.Inlinee, nullptr}));
   TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}];
 
   // SymbolRecord
-  OS.AddComment("Record length");
-  OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2);   // RecordLength
-  OS.EmitLabel(InlineBegin);
-  OS.AddComment("Record kind: S_INLINESITE");
-  OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind
+  MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE);
 
   OS.AddComment("PtrParent");
   OS.EmitIntValue(0, 4);
@@ -799,9 +873,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
   OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
                                     FI.Begin, FI.End);
 
-  OS.EmitLabel(InlineEnd);
+  endSymbolRecord(InlineEnd);
 
-  emitLocalVariableList(Site.InlinedLocals);
+  emitLocalVariableList(FI, Site.InlinedLocals);
 
   // Recurse on child inlined call sites before closing the scope.
   for (const DILocation *ChildSite : Site.ChildSites) {
@@ -812,10 +886,7 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
   }
 
   // Close the scope.
-  OS.AddComment("Record length");
-  OS.EmitIntValue(2, 2);                                  // RecordLength
-  OS.AddComment("Record kind: S_INLINESITE_END");
-  OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind
+  emitEndSymbolRecord(SymbolKind::S_INLINESITE_END);
 }
 
 void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
@@ -850,13 +921,7 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
   MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
 
   // Emit S_THUNK32
-  MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(),
-           *ThunkRecordEnd   = MMI->getContext().createTempSymbol();
-  OS.AddComment("Record length");
-  OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2);
-  OS.EmitLabel(ThunkRecordBegin);
-  OS.AddComment("Record kind: S_THUNK32");
-  OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2);
+  MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32);
   OS.AddComment("PtrParent");
   OS.EmitIntValue(0, 4);
   OS.AddComment("PtrEnd");
@@ -874,17 +939,13 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
   OS.AddComment("Function name");
   emitNullTerminatedSymbolName(OS, FuncName);
   // Additional fields specific to the thunk ordinal would go here.
-  OS.EmitLabel(ThunkRecordEnd);
+  endSymbolRecord(ThunkRecordEnd);
 
   // Local variables/inlined routines are purposely omitted here.  The point of
   // marking this as a thunk is so Visual Studio will NOT stop in this routine.
 
   // Emit S_PROC_ID_END
-  const unsigned RecordLengthForSymbolEnd = 2;
-  OS.AddComment("Record length");
-  OS.EmitIntValue(RecordLengthForSymbolEnd, 2);
-  OS.AddComment("Record kind: S_PROC_ID_END");
-  OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+  emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
 
   endCVSubsection(SymbolsEnd);
 }
@@ -927,19 +988,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
   OS.AddComment("Symbol subsection for " + Twine(FuncName));
   MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
   {
-    MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
-             *ProcRecordEnd = MMI->getContext().createTempSymbol();
-    OS.AddComment("Record length");
-    OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
-    OS.EmitLabel(ProcRecordBegin);
-
-    if (GV->hasLocalLinkage()) {
-      OS.AddComment("Record kind: S_LPROC32_ID");
-      OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
-    } else {
-      OS.AddComment("Record kind: S_GPROC32_ID");
-      OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
-    }
+    SymbolKind ProcKind = GV->hasLocalLinkage() ? SymbolKind::S_LPROC32_ID
+                                                : SymbolKind::S_GPROC32_ID;
+    MCSymbol *ProcRecordEnd = beginSymbolRecord(ProcKind);
 
     // These fields are filled in by tools like CVPACK which run after the fact.
     OS.AddComment("PtrParent");
@@ -968,9 +1019,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     OS.AddComment("Function name");
     // Truncate the name so we won't overflow the record length field.
     emitNullTerminatedSymbolName(OS, FuncName);
-    OS.EmitLabel(ProcRecordEnd);
+    endSymbolRecord(ProcRecordEnd);
 
-    emitLocalVariableList(FI.Locals);
+    MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC);
+    // Subtract out the CSR size since MSVC excludes that and we include it.
+    OS.AddComment("FrameSize");
+    OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4);
+    OS.AddComment("Padding");
+    OS.EmitIntValue(0, 4);
+    OS.AddComment("Offset of padding");
+    OS.EmitIntValue(0, 4);
+    OS.AddComment("Bytes of callee saved registers");
+    OS.EmitIntValue(FI.CSRSize, 4);
+    OS.AddComment("Exception handler offset");
+    OS.EmitIntValue(0, 4);
+    OS.AddComment("Exception handler section");
+    OS.EmitIntValue(0, 2);
+    OS.AddComment("Flags (defines frame register)");
+    OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4);
+    endSymbolRecord(FrameProcEnd);
+
+    emitLocalVariableList(FI, FI.Locals);
+    emitGlobalVariableList(FI.Globals);
     emitLexicalBlockList(FI.ChildBlocks, FI);
 
     // Emit inlined call site information. Only emit functions inlined directly
@@ -986,13 +1056,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     for (auto Annot : FI.Annotations) {
       MCSymbol *Label = Annot.first;
       MDTuple *Strs = cast<MDTuple>(Annot.second);
-      MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(),
-               *AnnotEnd = MMI->getContext().createTempSymbol();
-      OS.AddComment("Record length");
-      OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2);
-      OS.EmitLabel(AnnotBegin);
-      OS.AddComment("Record kind: S_ANNOTATION");
-      OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2);
+      MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION);
       OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
       // FIXME: Make sure we don't overflow the max record size.
       OS.EmitCOFFSectionIndex(Label);
@@ -1004,17 +1068,14 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
         assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
         OS.EmitBytes(StringRef(Str.data(), Str.size() + 1));
       }
-      OS.EmitLabel(AnnotEnd);
+      endSymbolRecord(AnnotEnd);
     }
 
     if (SP != nullptr)
       emitDebugInfoForUDTs(LocalUDTs);
 
     // We're done with this function.
-    OS.AddComment("Record length");
-    OS.EmitIntValue(0x0002, 2);
-    OS.AddComment("Record kind: S_PROC_ID_END");
-    OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+    emitEndSymbolRecord(SymbolKind::S_PROC_ID_END);
   }
   endCVSubsection(SymbolsEnd);
 
@@ -1034,21 +1095,8 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
   return DR;
 }
 
-CodeViewDebug::LocalVarDefRange
-CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory,
-                                     int Offset, bool IsSubfield,
-                                     uint16_t StructOffset) {
-  LocalVarDefRange DR;
-  DR.InMemory = InMemory;
-  DR.DataOffset = Offset;
-  DR.IsSubfield = IsSubfield;
-  DR.StructOffset = StructOffset;
-  DR.CVRegister = CVRegister;
-  return DR;
-}
-
 void CodeViewDebug::collectVariableInfoFromMFTable(
-    DenseSet<InlinedVariable> &Processed) {
+    DenseSet<InlinedEntity> &Processed) {
   const MachineFunction &MF = *Asm->MF;
   const TargetSubtargetInfo &TSI = MF.getSubtarget();
   const TargetFrameLowering *TFI = TSI.getFrameLowering();
@@ -1060,7 +1108,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
     assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
            "Expected inlined-at fields to agree");
 
-    Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt()));
+    Processed.insert(InlinedEntity(VI.Var, VI.Loc->getInlinedAt()));
     LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
     // If variable scope is not found then skip this variable.
@@ -1196,15 +1244,15 @@ void CodeViewDebug::calculateRanges(
 }
 
 void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
-  DenseSet<InlinedVariable> Processed;
+  DenseSet<InlinedEntity> Processed;
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMFTable(Processed);
 
   for (const auto &I : DbgValues) {
-    InlinedVariable IV = I.first;
+    InlinedEntity IV = I.first;
     if (Processed.count(IV))
       continue;
-    const DILocalVariable *DIVar = IV.first;
+    const DILocalVariable *DIVar = cast<DILocalVariable>(IV.first);
     const DILocation *InlinedAt = IV.second;
 
     // Instruction ranges, specifying where IV is accessible.
@@ -1228,6 +1276,9 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
 }
 
 void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
+  const TargetSubtargetInfo &TSI = MF->getSubtarget();
+  const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   const Function &GV = MF->getFunction();
   auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()});
   assert(Insertion.second && "function already has info");
@@ -1235,6 +1286,66 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
   CurFn->FuncId = NextFuncId++;
   CurFn->Begin = Asm->getFunctionBegin();
 
+  // The S_FRAMEPROC record reports the stack size, and how many bytes of
+  // callee-saved registers were used. For targets that don't use a PUSH
+  // instruction (AArch64), this will be zero.
+  CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters();
+  CurFn->FrameSize = MFI.getStackSize();
+  CurFn->OffsetAdjustment = MFI.getOffsetAdjustment();
+  CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF);
+
+  // For this function S_FRAMEPROC record, figure out which codeview register
+  // will be the frame pointer.
+  CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::None; // None.
+  CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::None; // None.
+  if (CurFn->FrameSize > 0) {
+    if (!TSI.getFrameLowering()->hasFP(*MF)) {
+      CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+      CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr;
+    } else {
+      // If there is an FP, parameters are always relative to it.
+      CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr;
+      if (CurFn->HasStackRealignment) {
+        // If the stack needs realignment, locals are relative to SP or VFRAME.
+        CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
+      } else {
+        // Otherwise, locals are relative to EBP, and we probably have VLAs or
+        // other stack adjustments.
+        CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::FramePtr;
+      }
+    }
+  }
+
+  // Compute other frame procedure options.
+  FrameProcedureOptions FPO = FrameProcedureOptions::None;
+  if (MFI.hasVarSizedObjects())
+    FPO |= FrameProcedureOptions::HasAlloca;
+  if (MF->exposesReturnsTwice())
+    FPO |= FrameProcedureOptions::HasSetJmp;
+  // FIXME: Set HasLongJmp if we ever track that info.
+  if (MF->hasInlineAsm())
+    FPO |= FrameProcedureOptions::HasInlineAssembly;
+  if (GV.hasPersonalityFn()) {
+    if (isAsynchronousEHPersonality(
+            classifyEHPersonality(GV.getPersonalityFn())))
+      FPO |= FrameProcedureOptions::HasStructuredExceptionHandling;
+    else
+      FPO |= FrameProcedureOptions::HasExceptionHandling;
+  }
+  if (GV.hasFnAttribute(Attribute::InlineHint))
+    FPO |= FrameProcedureOptions::MarkedInline;
+  if (GV.hasFnAttribute(Attribute::Naked))
+    FPO |= FrameProcedureOptions::Naked;
+  if (MFI.hasStackProtectorIndex())
+    FPO |= FrameProcedureOptions::SecurityChecks;
+  FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
+  FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
+  if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() &&
+      !GV.hasFnAttribute(Attribute::OptimizeNone))
+    FPO |= FrameProcedureOptions::OptimizedForSpeed;
+  // FIXME: Set GuardCfg when it is implemented.
+  CurFn->FrameProcOpts = FPO;
+
   OS.EmitCVFuncIdDirective(CurFn->FuncId);
 
   // Find the end of the function prolog.  First known non-DBG_VALUE and
@@ -1358,6 +1469,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
   case dwarf::DW_TAG_union_type:
     return lowerTypeUnion(cast<DICompositeType>(Ty));
   case dwarf::DW_TAG_unspecified_type:
+    if (Ty->getName() == "decltype(nullptr)")
+      return TypeIndex::NullptrT();
     return TypeIndex::None();
   default:
     // Use the null type index.
@@ -1552,6 +1665,9 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty,
     break;
   }
 
+  if (Ty->isObjectPointer())
+    PO |= PointerOptions::Const;
+
   PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
   return TypeTable.writeLeafType(PR);
 }
@@ -1702,49 +1818,54 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
-  ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
-                            ArgTypeIndices.size(), ArgListIndex);
+  FunctionOptions FO = getFunctionOptions(Ty);
+  ProcedureRecord Procedure(ReturnTypeIndex, CC, FO, ArgTypeIndices.size(),
+                            ArgListIndex);
   return TypeTable.writeLeafType(Procedure);
 }
 
 TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
                                                  const DIType *ClassTy,
                                                  int ThisAdjustment,
-                                                 bool IsStaticMethod) {
+                                                 bool IsStaticMethod,
+                                                 FunctionOptions FO) {
   // Lower the containing class type.
   TypeIndex ClassType = getTypeIndex(ClassTy);
 
-  SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
-  for (DITypeRef ArgTypeRef : Ty->getTypeArray())
-    ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+  DITypeRefArray ReturnAndArgs = Ty->getTypeArray();
 
-  // MSVC uses type none for variadic argument.
-  if (ReturnAndArgTypeIndices.size() > 1 &&
-      ReturnAndArgTypeIndices.back() == TypeIndex::Void()) {
-    ReturnAndArgTypeIndices.back() = TypeIndex::None();
-  }
-  TypeIndex ReturnTypeIndex = TypeIndex::Void();
-  ArrayRef<TypeIndex> ArgTypeIndices = None;
-  if (!ReturnAndArgTypeIndices.empty()) {
-    auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
-    ReturnTypeIndex = ReturnAndArgTypesRef.front();
-    ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
-  }
+  unsigned Index = 0;
+  SmallVector<TypeIndex, 8> ArgTypeIndices;
+  TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+
+  // If the first argument is a pointer type and this isn't a static method,
+  // treat it as the special 'this' parameter, which is encoded separately from
+  // the arguments.
   TypeIndex ThisTypeIndex;
-  if (!IsStaticMethod && !ArgTypeIndices.empty()) {
-    ThisTypeIndex = ArgTypeIndices.front();
-    ArgTypeIndices = ArgTypeIndices.drop_front();
+  if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
+    if (const DIDerivedType *PtrTy =
+            dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) {
+      if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
+        ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
+        Index++;
+      }
+    }
   }
 
+  while (Index < ReturnAndArgs.size())
+    ArgTypeIndices.push_back(getTypeIndex(ReturnAndArgs[Index++]));
+
+  // MSVC uses type none for variadic argument.
+  if (!ArgTypeIndices.empty() && ArgTypeIndices.back() == TypeIndex::Void())
+    ArgTypeIndices.back() = TypeIndex::None();
+
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
   TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
-  // TODO: Need to use the correct values for FunctionOptions.
-  MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
-                           FunctionOptions::None, ArgTypeIndices.size(),
-                           ArgListIndex, ThisAdjustment);
+  MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FO,
+                           ArgTypeIndices.size(), ArgListIndex, ThisAdjustment);
   return TypeTable.writeLeafType(MFR);
 }
 
@@ -1825,12 +1946,20 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
   if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
     CO |= ClassOptions::Nested;
 
-  // Put the Scoped flag on function-local types.
-  for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
-       Scope = Scope->getScope().resolve()) {
-    if (isa<DISubprogram>(Scope)) {
+  // Put the Scoped flag on function-local types. MSVC puts this flag for enum
+  // type only when it has an immediate function scope. Clang never puts enums
+  // inside DILexicalBlock scopes. Enum types, as generated by clang, are
+  // always in function, class, or file scopes.
+  if (Ty->getTag() == dwarf::DW_TAG_enumeration_type) {
+    if (ImmediateScope && isa<DISubprogram>(ImmediateScope))
       CO |= ClassOptions::Scoped;
-      break;
+  } else {
+    for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
+         Scope = Scope->getScope().resolve()) {
+      if (isa<DISubprogram>(Scope)) {
+        CO |= ClassOptions::Scoped;
+        break;
+      }
     }
   }
 
@@ -1930,6 +2059,7 @@ void CodeViewDebug::clear() {
   GlobalUDTs.clear();
   TypeIndices.clear();
   CompleteTypeIndices.clear();
+  ScopeGlobals.clear();
 }
 
 void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
@@ -2275,6 +2405,32 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
   return recordTypeIndexForDINode(Ty, TI, ClassTy);
 }
 
+codeview::TypeIndex
+CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+                                      const DISubroutineType *SubroutineTy) {
+  assert(PtrTy->getTag() == dwarf::DW_TAG_pointer_type &&
+         "this type must be a pointer type");
+
+  PointerOptions Options = PointerOptions::None;
+  if (SubroutineTy->getFlags() & DINode::DIFlags::FlagLValueReference)
+    Options = PointerOptions::LValueRefThisPointer;
+  else if (SubroutineTy->getFlags() & DINode::DIFlags::FlagRValueReference)
+    Options = PointerOptions::RValueRefThisPointer;
+
+  // Check if we've already translated this type.  If there is no ref qualifier
+  // on the function then we look up this pointer type with no associated class
+  // so that the TypeIndex for the this pointer can be shared with the type
+  // index for other pointers to this class type.  If there is a ref qualifier
+  // then we lookup the pointer using the subroutine as the parent type.
+  auto I = TypeIndices.find({PtrTy, SubroutineTy});
+  if (I != TypeIndices.end())
+    return I->second;
+
+  TypeLoweringScope S(*this);
+  TypeIndex TI = lowerTypePointer(PtrTy, Options);
+  return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
+}
+
 TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
   DIType *Ty = TypeRef.resolve();
   PointerRecord PR(getTypeIndex(Ty),
@@ -2292,6 +2448,14 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
   if (!Ty)
     return TypeIndex::Void();
 
+  // Look through typedefs when getting the complete type index. Call
+  // getTypeIndex on the typdef to ensure that any UDTs are accumulated and are
+  // emitted only once.
+  if (Ty->getTag() == dwarf::DW_TAG_typedef)
+    (void)getTypeIndex(Ty);
+  while (Ty->getTag() == dwarf::DW_TAG_typedef)
+    Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+
   // If this is a non-record type, the complete type index is the same as the
   // normal type index. Just call getTypeIndex.
   switch (Ty->getTag()) {
@@ -2360,35 +2524,40 @@ void CodeViewDebug::emitDeferredCompleteTypes() {
   }
 }
 
-void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {
+void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
+                                          ArrayRef<LocalVariable> Locals) {
   // Get the sorted list of parameters and emit them first.
   SmallVector<const LocalVariable *, 6> Params;
   for (const LocalVariable &L : Locals)
     if (L.DIVar->isParameter())
       Params.push_back(&L);
-  llvm::sort(Params.begin(), Params.end(),
-             [](const LocalVariable *L, const LocalVariable *R) {
-               return L->DIVar->getArg() < R->DIVar->getArg();
-             });
+  llvm::sort(Params, [](const LocalVariable *L, const LocalVariable *R) {
+    return L->DIVar->getArg() < R->DIVar->getArg();
+  });
   for (const LocalVariable *L : Params)
-    emitLocalVariable(*L);
+    emitLocalVariable(FI, *L);
 
   // Next emit all non-parameters in the order that we found them.
   for (const LocalVariable &L : Locals)
     if (!L.DIVar->isParameter())
-      emitLocalVariable(L);
+      emitLocalVariable(FI, L);
 }
 
-void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
-  // LocalSym record, see SymbolRecord.h for more info.
-  MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(),
-           *LocalEnd = MMI->getContext().createTempSymbol();
-  OS.AddComment("Record length");
-  OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2);
-  OS.EmitLabel(LocalBegin);
+/// Only call this on endian-specific types like ulittle16_t and little32_t, or
+/// structs composed of them.
+template <typename T>
+static void copyBytesForDefRange(SmallString<20> &BytePrefix,
+                                 SymbolKind SymKind, const T &DefRangeHeader) {
+  BytePrefix.resize(2 + sizeof(T));
+  ulittle16_t SymKindLE = ulittle16_t(SymKind);
+  memcpy(&BytePrefix[0], &SymKindLE, 2);
+  memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T));
+}
 
-  OS.AddComment("Record kind: S_LOCAL");
-  OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2);
+void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
+                                      const LocalVariable &Var) {
+  // LocalSym record, see SymbolRecord.h for more info.
+  MCSymbol *LocalEnd = beginSymbolRecord(SymbolKind::S_LOCAL);
 
   LocalSymFlags Flags = LocalSymFlags::None;
   if (Var.DIVar->isParameter())
@@ -2405,7 +2574,7 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
   OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
   // Truncate the name so we won't overflow the record length field.
   emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
-  OS.EmitLabel(LocalEnd);
+  endSymbolRecord(LocalEnd);
 
   // Calculate the on disk prefix of the appropriate def range record. The
   // records and on disk formats are described in SymbolRecords.h. BytePrefix
@@ -2414,45 +2583,53 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
   for (const LocalVarDefRange &DefRange : Var.DefRanges) {
     BytePrefix.clear();
     if (DefRange.InMemory) {
-      uint16_t RegRelFlags = 0;
-      if (DefRange.IsSubfield) {
-        RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
-                      (DefRange.StructOffset
-                       << DefRangeRegisterRelSym::OffsetInParentShift);
+      int Offset = DefRange.DataOffset;
+      unsigned Reg = DefRange.CVRegister;
+
+      // 32-bit x86 call sequences often use PUSH instructions, which disrupt
+      // ESP-relative offsets. Use the virtual frame pointer, VFRAME or $T0,
+      // instead. In frames without stack realignment, $T0 will be the CFA.
+      if (RegisterId(Reg) == RegisterId::ESP) {
+        Reg = unsigned(RegisterId::VFRAME);
+        Offset += FI.OffsetAdjustment;
+      }
+
+      // If we can use the chosen frame pointer for the frame and this isn't a
+      // sliced aggregate, use the smaller S_DEFRANGE_FRAMEPOINTER_REL record.
+      // Otherwise, use S_DEFRANGE_REGISTER_REL.
+      EncodedFramePtrReg EncFP = encodeFramePtrReg(RegisterId(Reg), TheCPU);
+      if (!DefRange.IsSubfield && EncFP != EncodedFramePtrReg::None &&
+          (bool(Flags & LocalSymFlags::IsParameter)
+               ? (EncFP == FI.EncodedParamFramePtrReg)
+               : (EncFP == FI.EncodedLocalFramePtrReg))) {
+        little32_t FPOffset = little32_t(Offset);
+        copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset);
+      } else {
+        uint16_t RegRelFlags = 0;
+        if (DefRange.IsSubfield) {
+          RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+                        (DefRange.StructOffset
+                         << DefRangeRegisterRelSym::OffsetInParentShift);
+        }
+        DefRangeRegisterRelSym::Header DRHdr;
+        DRHdr.Register = Reg;
+        DRHdr.Flags = RegRelFlags;
+        DRHdr.BasePointerOffset = Offset;
+        copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr);
       }
-      DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL);
-      Sym.Hdr.Register = DefRange.CVRegister;
-      Sym.Hdr.Flags = RegRelFlags;
-      Sym.Hdr.BasePointerOffset = DefRange.DataOffset;
-      ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
-      BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
-      BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr));
     } else {
       assert(DefRange.DataOffset == 0 && "unexpected offset into register");
       if (DefRange.IsSubfield) {
-        // Unclear what matters here.
-        DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER);
-        Sym.Hdr.Register = DefRange.CVRegister;
-        Sym.Hdr.MayHaveNoName = 0;
-        Sym.Hdr.OffsetInParent = DefRange.StructOffset;
-
-        ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER);
-        BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
-                                sizeof(SymKind));
-        BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
-                                sizeof(Sym.Hdr));
+        DefRangeSubfieldRegisterSym::Header DRHdr;
+        DRHdr.Register = DefRange.CVRegister;
+        DRHdr.MayHaveNoName = 0;
+        DRHdr.OffsetInParent = DefRange.StructOffset;
+        copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr);
       } else {
-        // Unclear what matters here.
-        DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER);
-        Sym.Hdr.Register = DefRange.CVRegister;
-        Sym.Hdr.MayHaveNoName = 0;
-        ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
-        BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
-                                sizeof(SymKind));
-        BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
-                                sizeof(Sym.Hdr));
+        DefRangeRegisterSym::Header DRHdr;
+        DRHdr.Register = DefRange.CVRegister;
+        DRHdr.MayHaveNoName = 0;
+        copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr);
       }
     }
     OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
@@ -2469,15 +2646,7 @@ void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
 /// lexical block scope.
 void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
                                      const FunctionInfo& FI) {
-  MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(),
-           *RecordEnd   = MMI->getContext().createTempSymbol();
-
-  // Lexical block symbol record.
-  OS.AddComment("Record length");
-  OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2);   // Record Length
-  OS.EmitLabel(RecordBegin);
-  OS.AddComment("Record kind: S_BLOCK32");
-  OS.EmitIntValue(SymbolKind::S_BLOCK32, 2);              // Record Kind
+  MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32);
   OS.AddComment("PtrParent");
   OS.EmitIntValue(0, 4);                                  // PtrParent
   OS.AddComment("PtrEnd");
@@ -2490,19 +2659,17 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
   OS.EmitCOFFSectionIndex(FI.Begin);                      // Func Symbol
   OS.AddComment("Lexical block name");
   emitNullTerminatedSymbolName(OS, Block.Name);           // Name
-  OS.EmitLabel(RecordEnd);
+  endSymbolRecord(RecordEnd);
 
   // Emit variables local to this lexical block.
-  emitLocalVariableList(Block.Locals);
+  emitLocalVariableList(FI, Block.Locals);
+  emitGlobalVariableList(Block.Globals);
 
   // Emit lexical blocks contained within this block.
   emitLexicalBlockList(Block.Children, FI);
 
   // Close the lexical block scope.
-  OS.AddComment("Record length");
-  OS.EmitIntValue(2, 2);                                  // Record Length
-  OS.AddComment("Record kind: S_END");
-  OS.EmitIntValue(SymbolKind::S_END, 2);                  // Record Kind
+  emitEndSymbolRecord(SymbolKind::S_END);
 }
 
 /// Convenience routine for collecting lexical block information for a list
@@ -2510,9 +2677,10 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
 void CodeViewDebug::collectLexicalBlockInfo(
         SmallVectorImpl<LexicalScope *> &Scopes,
         SmallVectorImpl<LexicalBlock *> &Blocks,
-        SmallVectorImpl<LocalVariable> &Locals) {
+        SmallVectorImpl<LocalVariable> &Locals,
+        SmallVectorImpl<CVGlobalVariable> &Globals) {
   for (LexicalScope *Scope : Scopes)
-    collectLexicalBlockInfo(*Scope, Blocks, Locals);
+    collectLexicalBlockInfo(*Scope, Blocks, Locals, Globals);
 }
 
 /// Populate the lexical blocks and local variable lists of the parent with
@@ -2520,45 +2688,58 @@ void CodeViewDebug::collectLexicalBlockInfo(
 void CodeViewDebug::collectLexicalBlockInfo(
     LexicalScope &Scope,
     SmallVectorImpl<LexicalBlock *> &ParentBlocks,
-    SmallVectorImpl<LocalVariable> &ParentLocals) {
+    SmallVectorImpl<LocalVariable> &ParentLocals,
+    SmallVectorImpl<CVGlobalVariable> &ParentGlobals) {
   if (Scope.isAbstractScope())
     return;
 
-  auto LocalsIter = ScopeVariables.find(&Scope);
-  if (LocalsIter == ScopeVariables.end()) {
-    // This scope does not contain variables and can be eliminated.
-    collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
-    return;
-  }
-  SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second;
-
+  // Gather information about the lexical scope including local variables,
+  // global variables, and address ranges.
+  bool IgnoreScope = false;
+  auto LI = ScopeVariables.find(&Scope);
+  SmallVectorImpl<LocalVariable> *Locals =
+      LI != ScopeVariables.end() ? &LI->second : nullptr;
+  auto GI = ScopeGlobals.find(Scope.getScopeNode());
+  SmallVectorImpl<CVGlobalVariable> *Globals =
+      GI != ScopeGlobals.end() ? GI->second.get() : nullptr;
   const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode());
-  if (!DILB) {
-    // This scope is not a lexical block and can be eliminated, but keep any
-    // local variables it contains.
-    ParentLocals.append(Locals.begin(), Locals.end());
-    collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
-    return;
-  }
-
   const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges();
-  if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) {
-    // This lexical block scope has too many address ranges to represent in the
-    // current CodeView format or does not have a valid address range.
-    // Eliminate this lexical scope and promote any locals it contains to the
-    // parent scope.
-    //
-    // For lexical scopes with multiple address ranges you may be tempted to
-    // construct a single range covering every instruction where the block is
-    // live and everything in between.  Unfortunately, Visual Studio only
-    // displays variables from the first matching lexical block scope.  If the
-    // first lexical block contains exception handling code or cold code which
-    // is moved to the bottom of the routine creating a single range covering
-    // nearly the entire routine, then it will hide all other lexical blocks
-    // and the variables they contain.
-    //
-    ParentLocals.append(Locals.begin(), Locals.end());
-    collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
+
+  // Ignore lexical scopes which do not contain variables.
+  if (!Locals && !Globals)
+    IgnoreScope = true;
+
+  // Ignore lexical scopes which are not lexical blocks.
+  if (!DILB)
+    IgnoreScope = true;
+
+  // Ignore scopes which have too many address ranges to represent in the
+  // current CodeView format or do not have a valid address range.
+  //
+  // For lexical scopes with multiple address ranges you may be tempted to
+  // construct a single range covering every instruction where the block is
+  // live and everything in between.  Unfortunately, Visual Studio only
+  // displays variables from the first matching lexical block scope.  If the
+  // first lexical block contains exception handling code or cold code which
+  // is moved to the bottom of the routine creating a single range covering
+  // nearly the entire routine, then it will hide all other lexical blocks
+  // and the variables they contain.
+  if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second))
+    IgnoreScope = true;
+
+  if (IgnoreScope) {
+    // This scope can be safely ignored and eliminating it will reduce the
+    // size of the debug information. Be sure to collect any variable and scope
+    // information from the this scope or any of its children and collapse them
+    // into the parent scope.
+    if (Locals)
+      ParentLocals.append(Locals->begin(), Locals->end());
+    if (Globals)
+      ParentGlobals.append(Globals->begin(), Globals->end());
+    collectLexicalBlockInfo(Scope.getChildren(),
+                            ParentBlocks,
+                            ParentLocals,
+                            ParentGlobals);
     return;
   }
 
@@ -2569,8 +2750,8 @@ void CodeViewDebug::collectLexicalBlockInfo(
   if (!BlockInsertion.second)
     return;
 
-  // Create a lexical block containing the local variables and collect the
-  // the lexical block information for the children.
+  // Create a lexical block containing the variables and collect the the
+  // lexical block information for the children.
   const InsnRange &Range = Ranges.front();
   assert(Range.first && Range.second);
   LexicalBlock &Block = BlockInsertion.first->second;
@@ -2579,9 +2760,15 @@ void CodeViewDebug::collectLexicalBlockInfo(
   assert(Block.Begin && "missing label for scope begin");
   assert(Block.End && "missing label for scope end");
   Block.Name = DILB->getName();
-  Block.Locals = std::move(Locals);
+  if (Locals)
+    Block.Locals = std::move(*Locals);
+  if (Globals)
+    Block.Globals = std::move(*Globals);
   ParentBlocks.push_back(&Block);
-  collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals);
+  collectLexicalBlockInfo(Scope.getChildren(),
+                          Block.Children,
+                          Block.Locals,
+                          Block.Globals);
 }
 
 void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
@@ -2593,7 +2780,10 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
 
   // Build the lexical block structure to emit for this routine.
   if (LexicalScope *CFS = LScopes.getCurrentFunctionScope())
-    collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals);
+    collectLexicalBlockInfo(*CFS,
+                            CurFn->ChildBlocks,
+                            CurFn->Locals,
+                            CurFn->Globals);
 
   // Clear the scope and variable information from the map which will not be
   // valid after we have finished processing this routine.  This also prepares
@@ -2660,30 +2850,57 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
   OS.EmitValueToAlignment(4);
 }
 
+static StringRef getSymbolName(SymbolKind SymKind) {
+  for (const EnumEntry<SymbolKind> &EE : getSymbolTypeNames())
+    if (EE.Value == SymKind)
+      return EE.Name;
+  return "";
+}
+
+MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) {
+  MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+           *EndLabel = MMI->getContext().createTempSymbol();
+  OS.AddComment("Record length");
+  OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+  OS.EmitLabel(BeginLabel);
+  if (OS.isVerboseAsm())
+    OS.AddComment("Record kind: " + getSymbolName(SymKind));
+  OS.EmitIntValue(unsigned(SymKind), 2);
+  return EndLabel;
+}
+
+void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) {
+  // MSVC does not pad out symbol records to four bytes, but LLVM does to avoid
+  // an extra copy of every symbol record in LLD. This increases object file
+  // size by less than 1% in the clang build, and is compatible with the Visual
+  // C++ linker.
+  OS.EmitValueToAlignment(4);
+  OS.EmitLabel(SymEnd);
+}
+
+void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) {
+  OS.AddComment("Record length");
+  OS.EmitIntValue(2, 2);
+  if (OS.isVerboseAsm())
+    OS.AddComment("Record kind: " + getSymbolName(EndKind));
+  OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind
+}
+
 void CodeViewDebug::emitDebugInfoForUDTs(
     ArrayRef<std::pair<std::string, const DIType *>> UDTs) {
   for (const auto &UDT : UDTs) {
     const DIType *T = UDT.second;
     assert(shouldEmitUdt(T));
 
-    MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
-             *UDTRecordEnd = MMI->getContext().createTempSymbol();
-    OS.AddComment("Record length");
-    OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2);
-    OS.EmitLabel(UDTRecordBegin);
-
-    OS.AddComment("Record kind: S_UDT");
-    OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
-
+    MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT);
     OS.AddComment("Type");
     OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4);
-
     emitNullTerminatedSymbolName(OS, UDT.first);
-    OS.EmitLabel(UDTRecordEnd);
+    endSymbolRecord(UDTRecordEnd);
   }
 }
 
-void CodeViewDebug::emitDebugInfoForGlobals() {
+void CodeViewDebug::collectGlobalVariableInfo() {
   DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
       GlobalMap;
   for (const GlobalVariable &GV : MMI->getModule()->globals()) {
@@ -2696,42 +2913,56 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
   NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
   for (const MDNode *Node : CUs->operands()) {
     const auto *CU = cast<DICompileUnit>(Node);
-
-    // First, emit all globals that are not in a comdat in a single symbol
-    // substream. MSVC doesn't like it if the substream is empty, so only open
-    // it if we have at least one global to emit.
-    switchToDebugSectionForSymbol(nullptr);
-    MCSymbol *EndLabel = nullptr;
     for (const auto *GVE : CU->getGlobalVariables()) {
-      if (const auto *GV = GlobalMap.lookup(GVE))
-        if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
-          if (!EndLabel) {
-            OS.AddComment("Symbol subsection for globals");
-            EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
-          }
-          // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
-          emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
-        }
+      const auto *GV = GlobalMap.lookup(GVE);
+      if (!GV || GV->isDeclarationForLinker())
+        continue;
+      const DIGlobalVariable *DIGV = GVE->getVariable();
+      DIScope *Scope = DIGV->getScope();
+      SmallVector<CVGlobalVariable, 1> *VariableList;
+      if (Scope && isa<DILocalScope>(Scope)) {
+        // Locate a global variable list for this scope, creating one if
+        // necessary.
+        auto Insertion = ScopeGlobals.insert(
+            {Scope, std::unique_ptr<GlobalVariableList>()});
+        if (Insertion.second)
+          Insertion.first->second = llvm::make_unique<GlobalVariableList>();
+        VariableList = Insertion.first->second.get();
+      } else if (GV->hasComdat())
+        // Emit this global variable into a COMDAT section.
+        VariableList = &ComdatVariables;
+      else
+        // Emit this globla variable in a single global symbol section.
+        VariableList = &GlobalVariables;
+      CVGlobalVariable CVGV = {DIGV, GV};
+      VariableList->emplace_back(std::move(CVGV));
     }
-    if (EndLabel)
-      endCVSubsection(EndLabel);
+  }
+}
 
-    // Second, emit each global that is in a comdat into its own .debug$S
-    // section along with its own symbol substream.
-    for (const auto *GVE : CU->getGlobalVariables()) {
-      if (const auto *GV = GlobalMap.lookup(GVE)) {
-        if (GV->hasComdat()) {
-          MCSymbol *GVSym = Asm->getSymbol(GV);
-          OS.AddComment("Symbol subsection for " +
-                        Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
-          switchToDebugSectionForSymbol(GVSym);
-          EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
-          // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
-          emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
-          endCVSubsection(EndLabel);
-        }
-      }
-    }
+void CodeViewDebug::emitDebugInfoForGlobals() {
+  // First, emit all globals that are not in a comdat in a single symbol
+  // substream. MSVC doesn't like it if the substream is empty, so only open
+  // it if we have at least one global to emit.
+  switchToDebugSectionForSymbol(nullptr);
+  if (!GlobalVariables.empty()) {
+    OS.AddComment("Symbol subsection for globals");
+    MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+    emitGlobalVariableList(GlobalVariables);
+    endCVSubsection(EndLabel);
+  }
+
+  // Second, emit each global that is in a comdat into its own .debug$S
+  // section along with its own symbol substream.
+  for (const CVGlobalVariable &CVGV : ComdatVariables) {
+    MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+    OS.AddComment("Symbol subsection for " +
+            Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName())));
+    switchToDebugSectionForSymbol(GVSym);
+    MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
+    // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+    emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+    endCVSubsection(EndLabel);
   }
 }
 
@@ -2747,34 +2978,26 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
   }
 }
 
+// Emit each global variable in the specified array.
+void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
+  for (const CVGlobalVariable &CVGV : Globals) {
+    MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+    // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+    emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+  }
+}
+
 void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
                                            const GlobalVariable *GV,
                                            MCSymbol *GVSym) {
-  // DataSym record, see SymbolRecord.h for more info.
-  // FIXME: Thread local data, etc
-  MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),
-           *DataEnd = MMI->getContext().createTempSymbol();
-  const unsigned FixedLengthOfThisRecord = 12;
-  OS.AddComment("Record length");
-  OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
-  OS.EmitLabel(DataBegin);
-  if (DIGV->isLocalToUnit()) {
-    if (GV->isThreadLocal()) {
-      OS.AddComment("Record kind: S_LTHREAD32");
-      OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2);
-    } else {
-      OS.AddComment("Record kind: S_LDATA32");
-      OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2);
-    }
-  } else {
-    if (GV->isThreadLocal()) {
-      OS.AddComment("Record kind: S_GTHREAD32");
-      OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2);
-    } else {
-      OS.AddComment("Record kind: S_GDATA32");
-      OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2);
-    }
-  }
+  // DataSym record, see SymbolRecord.h for more info. Thread local data
+  // happens to have the same format as global data.
+  SymbolKind DataSym = GV->isThreadLocal()
+                           ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+                                                    : SymbolKind::S_GTHREAD32)
+                           : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+                                                    : SymbolKind::S_GDATA32);
+  MCSymbol *DataEnd = beginSymbolRecord(DataSym);
   OS.AddComment("Type");
   OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
   OS.AddComment("DataOffset");
@@ -2782,6 +3005,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
   OS.AddComment("Segment");
   OS.EmitCOFFSectionIndex(GVSym);
   OS.AddComment("Name");
-  emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord);
-  OS.EmitLabel(DataEnd);
+  const unsigned LengthOfDataRecord = 12;
+  emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+  endSymbolRecord(DataEnd);
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 6a0da5f993d0..21557ed1be35 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -14,14 +14,14 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
 
-#include "DbgValueHistoryCalculator.h"
-#include "DebugHandlerBase.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
@@ -54,6 +54,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   BumpPtrAllocator Allocator;
   codeview::GlobalTypeTableBuilder TypeTable;
 
+  /// Whether to emit type record hashes into .debug$H.
+  bool EmitDebugGlobalHashes = false;
+
+  /// The codeview CPU type used by the translation unit.
+  codeview::CPUType TheCPU;
+
   /// Represents the most general definition range.
   struct LocalVarDefRange {
     /// Indicates that variable data is stored in memory relative to the
@@ -85,10 +91,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   };
 
   static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
-  static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister,
-                                                bool InMemory, int Offset,
-                                                bool IsSubfield,
-                                                uint16_t StructOffset);
 
   /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
   struct LocalVariable {
@@ -97,6 +99,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     bool UseReferenceType = false;
   };
 
+  struct CVGlobalVariable {
+    const DIGlobalVariable *DIGV;
+    const GlobalVariable *GV;
+  };
+
   struct InlineSite {
     SmallVector<LocalVariable, 1> InlinedLocals;
     SmallVector<const DILocation *, 1> ChildSites;
@@ -110,6 +117,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   // Combines information from DILexicalBlock and LexicalScope.
   struct LexicalBlock {
     SmallVector<LocalVariable, 1> Locals;
+    SmallVector<CVGlobalVariable, 1> Globals;
     SmallVector<LexicalBlock *, 1> Children;
     const MCSymbol *Begin;
     const MCSymbol *End;
@@ -132,6 +140,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     SmallVector<const DILocation *, 1> ChildSites;
 
     SmallVector<LocalVariable, 1> Locals;
+    SmallVector<CVGlobalVariable, 1> Globals;
 
     std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks;
 
@@ -144,6 +153,33 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     const MCSymbol *End = nullptr;
     unsigned FuncId = 0;
     unsigned LastFileId = 0;
+
+    /// Number of bytes allocated in the prologue for all local stack objects.
+    unsigned FrameSize = 0;
+
+    /// Number of bytes of parameters on the stack.
+    unsigned ParamSize = 0;
+
+    /// Number of bytes pushed to save CSRs.
+    unsigned CSRSize = 0;
+
+    /// Adjustment to apply on x86 when using the VFRAME frame pointer.
+    int OffsetAdjustment = 0;
+
+    /// Two-bit value indicating which register is the designated frame pointer
+    /// register for local variables. Included in S_FRAMEPROC.
+    codeview::EncodedFramePtrReg EncodedLocalFramePtrReg =
+        codeview::EncodedFramePtrReg::None;
+
+    /// Two-bit value indicating which register is the designated frame pointer
+    /// register for stack parameters. Included in S_FRAMEPROC.
+    codeview::EncodedFramePtrReg EncodedParamFramePtrReg =
+        codeview::EncodedFramePtrReg::None;
+
+    codeview::FrameProcedureOptions FrameProcOpts;
+
+    bool HasStackRealignment = false;
+
     bool HaveLineInfo = false;
   };
   FunctionInfo *CurFn = nullptr;
@@ -154,6 +190,17 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   // and LexicalBlocks.
   DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables;
 
+  // Map to separate global variables according to the lexical scope they
+  // belong in. A null local scope represents the global scope.
+  typedef SmallVector<CVGlobalVariable, 1> GlobalVariableList;
+  DenseMap<const DIScope*, std::unique_ptr<GlobalVariableList> > ScopeGlobals;
+
+  // Array of global variables which  need to be emitted into a COMDAT section.
+  SmallVector<CVGlobalVariable, 1> ComdatVariables;
+
+  // Array of non-COMDAT global variables.
+  SmallVector<CVGlobalVariable, 1> GlobalVariables;
+
   /// The set of comdat .debug$S sections that we've seen so far. Each section
   /// must start with a magic version number that must only be emitted once.
   /// This set tracks which sections we've already opened.
@@ -249,6 +296,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitCompilerInformation();
 
+  void emitBuildInfo();
+
   void emitInlineeLinesSubsection();
 
   void emitDebugInfoForThunk(const Function *GV,
@@ -257,13 +306,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
 
-  void emitDebugInfoForGlobals();
-
   void emitDebugInfoForRetainedTypes();
 
   void
   emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs);
 
+  void emitDebugInfoForGlobals();
+  void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
   void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
                               const GlobalVariable *GV, MCSymbol *GVSym);
 
@@ -271,36 +320,49 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   /// Returns an end label for use with endCVSubsection when the subsection is
   /// finished.
   MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind);
-
   void endCVSubsection(MCSymbol *EndLabel);
 
+  /// Opens a symbol record of the given kind. Returns an end label for use with
+  /// endSymbolRecord.
+  MCSymbol *beginSymbolRecord(codeview::SymbolKind Kind);
+  void endSymbolRecord(MCSymbol *SymEnd);
+
+  /// Emits an S_END, S_INLINESITE_END, or S_PROC_ID_END record. These records
+  /// are empty, so we emit them with a simpler assembly sequence that doesn't
+  /// involve labels.
+  void emitEndSymbolRecord(codeview::SymbolKind EndKind);
+
   void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
                            const InlineSite &Site);
 
-  using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+  using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
 
+  void collectGlobalVariableInfo();
   void collectVariableInfo(const DISubprogram *SP);
 
-  void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
+  void collectVariableInfoFromMFTable(DenseSet<InlinedEntity> &Processed);
 
   // Construct the lexical block tree for a routine, pruning emptpy lexical
   // scopes, and populate it with local variables.
   void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes,
                                SmallVectorImpl<LexicalBlock *> &Blocks,
-                               SmallVectorImpl<LocalVariable> &Locals);
+                               SmallVectorImpl<LocalVariable> &Locals,
+                               SmallVectorImpl<CVGlobalVariable> &Globals);
   void collectLexicalBlockInfo(LexicalScope &Scope,
                                SmallVectorImpl<LexicalBlock *> &ParentBlocks,
-                               SmallVectorImpl<LocalVariable> &ParentLocals);
+                               SmallVectorImpl<LocalVariable> &ParentLocals,
+                               SmallVectorImpl<CVGlobalVariable> &ParentGlobals);
 
   /// Records information about a local variable in the appropriate scope. In
   /// particular, locals from inlined code live inside the inlining site.
   void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS);
 
   /// Emits local variables in the appropriate order.
-  void emitLocalVariableList(ArrayRef<LocalVariable> Locals);
+  void emitLocalVariableList(const FunctionInfo &FI,
+                             ArrayRef<LocalVariable> Locals);
 
   /// Emits an S_LOCAL record and its associated defined ranges.
-  void emitLocalVariable(const LocalVariable &Var);
+  void emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var);
 
   /// Emits a sequence of lexical block scopes and their children.
   void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
@@ -314,6 +376,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
                                    DITypeRef ClassTyRef = DITypeRef());
 
+  codeview::TypeIndex
+  getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
+                         const DISubroutineType *SubroutineTy);
+
   codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
 
   codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
@@ -340,10 +406,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
   codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
-  codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
-                                              const DIType *ClassTy,
-                                              int ThisAdjustment,
-                                              bool IsStaticMethod);
+  codeview::TypeIndex lowerTypeMemberFunction(
+      const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment,
+      bool IsStaticMethod,
+      codeview::FunctionOptions FO = codeview::FunctionOptions::None);
   codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
   codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
   codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 570424a79c81..e27659494f08 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -414,6 +414,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_GNU_addr_index:
   case dwarf::DW_FORM_ref_udata:
   case dwarf::DW_FORM_strx:
+  case dwarf::DW_FORM_addrx:
+  case dwarf::DW_FORM_rnglistx:
   case dwarf::DW_FORM_udata:
     Asm->EmitULEB128(Integer);
     return;
@@ -440,6 +442,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_GNU_addr_index:
   case dwarf::DW_FORM_ref_udata:
   case dwarf::DW_FORM_strx:
+  case dwarf::DW_FORM_addrx:
+  case dwarf::DW_FORM_rnglistx:
   case dwarf::DW_FORM_udata:
     return getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata:
@@ -461,7 +465,7 @@ void DIEInteger::print(raw_ostream &O) const {
 /// EmitValue - Emit expression value.
 ///
 void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
-  AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form));
+  AP->EmitDebugValue(Expr, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of expression value in bytes.
@@ -585,8 +589,7 @@ void DIEString::print(raw_ostream &O) const {
 //===----------------------------------------------------------------------===//
 void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_string) {
-    for (char ch : S)
-      AP->emitInt8(ch);
+    AP->OutStreamer->EmitBytes(S);
     AP->emitInt8(0);
     return;
   }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 25518a339c61..09867822c30a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===//
+//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DbgValueHistoryCalculator.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -42,7 +42,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
-void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
+void DbgValueHistoryMap::startInstrRange(InlinedEntity Var,
                                          const MachineInstr &MI) {
   // Instruction range should start with a DBG_VALUE instruction for the
   // variable.
@@ -57,7 +57,7 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
   Ranges.push_back(std::make_pair(&MI, nullptr));
 }
 
-void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
+void DbgValueHistoryMap::endInstrRange(InlinedEntity Var,
                                        const MachineInstr &MI) {
   auto &Ranges = VarInstrRanges[Var];
   // Verify that the current instruction range is not yet closed.
@@ -68,7 +68,7 @@ void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
   Ranges.back().second = &MI;
 }
 
-unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const {
   const auto &I = VarInstrRanges.find(Var);
   if (I == VarInstrRanges.end())
     return 0;
@@ -78,17 +78,22 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
   return isDescribedByReg(*Ranges.back().first);
 }
 
+void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
+  assert(MI.isDebugLabel() && "not a DBG_LABEL");
+  LabelInstr[Label] = &MI;
+}
+
 namespace {
 
 // Maps physreg numbers to the variables they describe.
-using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
-using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;
+using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
 
 } // end anonymous namespace
 
 // Claim that @Var is not described by @RegNo anymore.
 static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
-                                InlinedVariable Var) {
+                                InlinedEntity Var) {
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
@@ -102,7 +107,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
 
 // Claim that @Var is now described by @RegNo.
 static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
-                               InlinedVariable Var) {
+                               InlinedEntity Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
   assert(!is_contained(VarSet, Var));
@@ -187,9 +192,10 @@ static void collectChangingRegs(const MachineFunction *MF,
   }
 }
 
-void llvm::calculateDbgValueHistory(const MachineFunction *MF,
-                                    const TargetRegisterInfo *TRI,
-                                    DbgValueHistoryMap &Result) {
+void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
+                                     const TargetRegisterInfo *TRI,
+                                     DbgValueHistoryMap &DbgValues,
+                                     DbgLabelInstrMap &DbgLabels) {
   BitVector ChangingRegs(TRI->getNumRegs());
   collectChangingRegs(MF, TRI, ChangingRegs);
 
@@ -210,14 +216,14 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
             // If this is a virtual register, only clobber it since it doesn't
             // have aliases.
             if (TRI->isVirtualRegister(MO.getReg()))
-              clobberRegisterUses(RegVars, MO.getReg(), Result, MI);
+              clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI);
             // If this is a register def operand, it may end a debug value
             // range.
             else {
               for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
                    ++AI)
                 if (ChangingRegs.test(*AI))
-                  clobberRegisterUses(RegVars, *AI, Result, MI);
+                  clobberRegisterUses(RegVars, *AI, DbgValues, MI);
             }
           } else if (MO.isRegMask()) {
             // If this is a register mask operand, clobber all debug values in
@@ -226,7 +232,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
               // Don't consider SP to be clobbered by register masks.
               if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
                   MO.clobbersPhysReg(I)) {
-                clobberRegisterUses(RegVars, I, Result, MI);
+                clobberRegisterUses(RegVars, I, DbgValues, MI);
               }
             }
           }
@@ -234,26 +240,34 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
         continue;
       }
 
-      // Skip DBG_LABEL instructions.
-      if (MI.isDebugLabel())
-        continue;
-
-      assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
-      // Use the base variable (without any DW_OP_piece expressions)
-      // as index into History. The full variables including the
-      // piece expressions are attached to the MI.
-      const DILocalVariable *RawVar = MI.getDebugVariable();
-      assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
-             "Expected inlined-at fields to agree");
-      InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
-
-      if (unsigned PrevReg = Result.getRegisterForVar(Var))
-        dropRegDescribedVar(RegVars, PrevReg, Var);
-
-      Result.startInstrRange(Var, MI);
-
-      if (unsigned NewReg = isDescribedByReg(MI))
-        addRegDescribedVar(RegVars, NewReg, Var);
+      if (MI.isDebugValue()) {
+        assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
+        // Use the base variable (without any DW_OP_piece expressions)
+        // as index into History. The full variables including the
+        // piece expressions are attached to the MI.
+        const DILocalVariable *RawVar = MI.getDebugVariable();
+        assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+               "Expected inlined-at fields to agree");
+        InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+        if (unsigned PrevReg = DbgValues.getRegisterForVar(Var))
+          dropRegDescribedVar(RegVars, PrevReg, Var);
+
+        DbgValues.startInstrRange(Var, MI);
+
+        if (unsigned NewReg = isDescribedByReg(MI))
+          addRegDescribedVar(RegVars, NewReg, Var);
+      } else if (MI.isDebugLabel()) {
+        assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
+        const DILabel *RawLabel = MI.getDebugLabel();
+        assert(RawLabel->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+            "Expected inlined-at fields to agree");
+        // When collecting debug information for labels, there is no MCSymbol
+        // generated for it. So, we keep MachineInstr in DbgLabels in order
+        // to query MCSymbol afterward.
+        InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
+        DbgLabels.addInstr(L, MI);
+      }
     }
 
     // Make sure locations for register-described variables are valid only
@@ -264,7 +278,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
         auto CurElem = I++; // CurElem can be erased below.
         if (TRI->isVirtualRegister(CurElem->first) ||
             ChangingRegs.test(CurElem->first))
-          clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
+          clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back());
       }
     }
   }
@@ -274,10 +288,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
 LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
   dbgs() << "DbgValueHistoryMap:\n";
   for (const auto &VarRangePair : *this) {
-    const InlinedVariable &Var = VarRangePair.first;
+    const InlinedEntity &Var = VarRangePair.first;
     const InstrRanges &Ranges = VarRangePair.second;
 
-    const DILocalVariable *LocalVar = Var.first;
+    const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
     const DILocation *Location = Var.second;
 
     dbgs() << " - " << LocalVar->getName() << " at ";
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 82e14dc13cb1..551cd36d1984 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DebugHandlerBase.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -125,6 +125,21 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
   return LabelsAfterInsn.lookup(MI);
 }
 
+// Return the function-local offset of an instruction.
+const MCExpr *
+DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
+  MCContext &MC = Asm->OutContext;
+
+  MCSymbol *Start = Asm->getFunctionBegin();
+  const auto *StartRef = MCSymbolRefExpr::create(Start, MC);
+
+  MCSymbol *AfterInsn = getLabelAfterInsn(MI);
+  assert(AfterInsn && "Expected label after instruction");
+  const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC);
+
+  return MCBinaryExpr::createSub(AfterRef, StartRef, MC);
+}
+
 /// If this type is derived from a base type then return base type size.
 uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
   DIType *Ty = TyRef.resolve();
@@ -190,8 +205,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
 
   // Calculate history for local variables.
   assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
-  calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
-                           DbgValues);
+  assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
+  calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
+                            DbgValues, DbgLabels);
   LLVM_DEBUG(DbgValues.dump());
 
   // Request labels for the full history.
@@ -229,6 +245,12 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
     }
   }
 
+  // Ensure there is a symbol before DBG_LABEL.
+  for (const auto &I : DbgLabels) {
+    const MachineInstr *MI = I.second;
+    requestLabelBeforeInsn(MI);
+  }
+
   PrevInstLoc = DebugLoc();
   PrevLabel = Asm->getFunctionBegin();
   beginFunctionImpl(MF);
@@ -296,6 +318,7 @@ void DebugHandlerBase::endFunction(const MachineFunction *MF) {
   if (hasDebugInfo(MMI, MF))
     endFunctionImpl(MF);
   DbgValues.clear();
+  DbgLabels.clear();
   LabelsBeforeInsn.clear();
   LabelsAfterInsn.clear();
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index ac49657b68fa..befa4b941c8d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -139,7 +139,7 @@ public:
   // Sort the pieces by offset.
   // Remove any duplicate entries by dropping all but the first.
   void sortUniqueValues() {
-    llvm::sort(Values.begin(), Values.end());
+    llvm::sort(Values);
     Values.erase(
         std::unique(
             Values.begin(), Values.end(), [](const Value &A, const Value &B) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 32271a0ef24a..1dca3f0fce5b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -69,14 +69,16 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
   // pool from the skeleton - maybe even in non-fission (possibly fewer
   // relocations by sharing them in the pool, but we have other ideas about how
   // to reduce the number of relocations as well/instead).
-  if (!DD->useSplitDwarf() || !Skeleton)
+  if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
     return addLocalLabelAddress(Die, Attribute, Label);
 
   if (Label)
     DD->addArangeLabel(SymbolCU(this, Label));
 
   unsigned idx = DD->getAddressPool().getIndex(Label);
-  Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index,
+  Die.addValue(DIEValueAllocator, Attribute,
+               DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
+                                          : dwarf::DW_FORM_GNU_addr_index,
                DIEInteger(idx));
 }
 
@@ -160,6 +162,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
             AlignInBytes);
 
+  if (MDTuple *TP = GV->getTemplateParams())
+    addTemplateParams(*VariableDIE, DINodeArray(TP));
+
   // Add location.
   bool addToAccelTable = false;
   DIELoc *Loc = nullptr;
@@ -186,6 +191,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     if (!Global && (!Expr || !Expr->isConstant()))
       continue;
 
+    if (Global && Global->isThreadLocal() &&
+        !Asm->getObjFileLowering().supportDebugThreadLocalLocation())
+      continue;
+
     if (!Loc) {
       addToAccelTable = true;
       Loc = new (DIEValueAllocator) DIELoc;
@@ -245,13 +254,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     addLinkageName(*VariableDIE, GV->getLinkageName());
 
   if (addToAccelTable) {
-    DD->addAccelName(GV->getName(), *VariableDIE);
+    DD->addAccelName(*CUNode, GV->getName(), *VariableDIE);
 
     // If the linkage name is different than the name, go ahead and output
     // that as well into the name table.
     if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
         DD->useAllLinkageNames())
-      DD->addAccelName(GV->getLinkageName(), *VariableDIE);
+      DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
   }
 
   return VariableDIE;
@@ -268,6 +277,7 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
       (&CURanges.back().getEnd()->getSection() !=
        &Range.getEnd()->getSection())) {
     CURanges.push_back(Range);
+    DD->addSectionLabel(Range.getStart());
     return;
   }
 
@@ -275,6 +285,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
 }
 
 void DwarfCompileUnit::initStmtList() {
+  if (CUNode->isDebugDirectivesOnly())
+    return;
+
   // Define start line table label for each Compile Unit.
   MCSymbol *LineTableStartSym;
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -341,7 +354,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_subprogram nodes.
-  DD->addSubprogramNames(SP, *SPDie);
+  DD->addSubprogramNames(*CUNode, SP, *SPDie);
 
   return *SPDie;
 }
@@ -412,24 +425,29 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
           ? TLOF.getDwarfRnglistsSection()->getBeginSymbol()
           : TLOF.getDwarfRangesSection()->getBeginSymbol();
 
-  RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
+  HasRangeLists = true;
+
+  // Add the range list to the set of ranges to be emitted.
+  auto IndexAndList =
+      (DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU)
+          ->addRange(*(Skeleton ? Skeleton : this), std::move(Range));
+
+  uint32_t Index = IndexAndList.first;
+  auto &List = *IndexAndList.second;
 
   // Under fission, ranges are specified by constant offsets relative to the
   // CU's DW_AT_GNU_ranges_base.
   // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
   // fission until we support the forms using the .debug_addr section
   // (DW_RLE_startx_endx etc.).
-  if (isDwoUnit()) {
-    if (DD->getDwarfVersion() < 5)
-      addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
-                      RangeSectionSym);
-  } else {
+  if (DD->getDwarfVersion() >= 5)
+    addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
+  else if (isDwoUnit())
+    addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+                    RangeSectionSym);
+  else
     addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
                     RangeSectionSym);
-  }
-
-  // Add the range list to the set of ranges to be emitted.
-  (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
 }
 
 void DwarfCompileUnit::attachRangesOrLowHighPC(
@@ -479,7 +497,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
-  DD->addSubprogramNames(InlinedSP, *ScopeDIE);
+  DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE);
 
   return ScopeDIE;
 }
@@ -506,6 +524,18 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
   return D;
 }
 
+DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
+                                         const LexicalScope &Scope) {
+  auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
+  insertDIE(DL.getLabel(), LabelDie);
+  DL.setDIE(*LabelDie);
+
+  if (Scope.isAbstractScope())
+    applyLabelAttributes(DL, *LabelDie);
+
+  return LabelDie;
+}
+
 DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
                                                 bool Abstract) {
   // Define variable debug information entry.
@@ -699,13 +729,17 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
   if (HasNonScopeChildren)
     *HasNonScopeChildren = !Children.empty();
 
+  for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+    Children.push_back(constructLabelDIE(*DL, *Scope));
+
   for (LexicalScope *LS : Scope->getChildren())
     constructScopeDIE(LS, Children);
 
   return ObjectPointer;
 }
 
-void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) {
+DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
+                                                   LexicalScope *Scope) {
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
   if (Scope) {
@@ -728,6 +762,8 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, Lexi
       !includeMinimalInlineScopes())
     ScopeDIE.addChild(
         DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
+
+  return ScopeDIE;
 }
 
 DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
@@ -782,6 +818,32 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
     ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
 }
 
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
+                                                 const DISubprogram &CalleeSP,
+                                                 bool IsTail,
+                                                 const MCExpr *PCOffset) {
+  // Insert a call site entry DIE within ScopeDIE.
+  DIE &CallSiteDIE =
+      createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr);
+
+  // For the purposes of showing tail call frames in backtraces, a key piece of
+  // information is DW_AT_call_origin, a pointer to the callee DIE.
+  DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP);
+  assert(CalleeDIE && "Could not create DIE for call site entry origin");
+  addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE);
+
+  if (IsTail) {
+    // Attach DW_AT_call_tail_call to tail calls for standards compliance.
+    addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call);
+  } else {
+    // Attach the return PC to allow the debugger to disambiguate call paths
+    // from one function to another.
+    assert(PCOffset && "Missing return PC information for a call");
+    addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset);
+  }
+  return CallSiteDIE;
+}
+
 DIE *DwarfCompileUnit::constructImportedEntityDIE(
     const DIImportedEntity *Module) {
   DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
@@ -824,40 +886,51 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
   }
 }
 
-void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) {
-  DbgVariable *AbsVar = getExistingAbstractVariable(
-      InlinedVariable(Var.getVariable(), Var.getInlinedAt()));
-  auto *VariableDie = Var.getDIE();
-  if (AbsVar && AbsVar->getDIE()) {
-    addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
-                      *AbsVar->getDIE());
-  } else
-    applyVariableAttributes(Var, *VariableDie);
-}
+void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
+  DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity());
 
-DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) {
-  const DILocalVariable *Cleansed;
-  return getExistingAbstractVariable(IV, Cleansed);
+  auto *Die = Entity->getDIE();
+  /// Label may be used to generate DW_AT_low_pc, so put it outside
+  /// if/else block.
+  const DbgLabel *Label = nullptr;
+  if (AbsEntity && AbsEntity->getDIE()) {
+    addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE());
+    Label = dyn_cast<const DbgLabel>(Entity);
+  } else {
+    if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
+      applyVariableAttributes(*Var, *Die);
+    else if ((Label = dyn_cast<const DbgLabel>(Entity)))
+      applyLabelAttributes(*Label, *Die);
+    else
+      llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
+  }
+
+  if (Label)
+    if (const auto *Sym = Label->getSymbol())
+      addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
 }
 
-// Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
-    InlinedVariable IV, const DILocalVariable *&Cleansed) {
-  // More then one inlined variable corresponds to one abstract variable.
-  Cleansed = IV.first;
-  auto &AbstractVariables = getAbstractVariables();
-  auto I = AbstractVariables.find(Cleansed);
-  if (I != AbstractVariables.end())
+DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) {
+  auto &AbstractEntities = getAbstractEntities();
+  auto I = AbstractEntities.find(Node);
+  if (I != AbstractEntities.end())
     return I->second.get();
   return nullptr;
 }
 
-void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
-                                        LexicalScope *Scope) {
+void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
+                                            LexicalScope *Scope) {
   assert(Scope && Scope->isAbstractScope());
-  auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr);
-  DU->addScopeVariable(Scope, AbsDbgVariable.get());
-  getAbstractVariables()[Var] = std::move(AbsDbgVariable);
+  auto &Entity = getAbstractEntities()[Node];
+  if (isa<const DILocalVariable>(Node)) {
+    Entity = llvm::make_unique<DbgVariable>(
+                        cast<const DILocalVariable>(Node), nullptr /* IA */);;
+    DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
+  } else if (isa<const DILabel>(Node)) {
+    Entity = llvm::make_unique<DbgLabel>(
+                        cast<const DILabel>(Node), nullptr /* IA */);
+    DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
+  }
 }
 
 void DwarfCompileUnit::emitHeader(bool UseOffsets) {
@@ -876,13 +949,18 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
 }
 
 bool DwarfCompileUnit::hasDwarfPubSections() const {
-  // Opting in to GNU Pubnames/types overrides the default to ensure these are
-  // generated for things like Gold's gdb_index generation.
-  if (CUNode->getGnuPubnames())
+  switch (CUNode->getNameTableKind()) {
+  case DICompileUnit::DebugNameTableKind::None:
+    return false;
+    // Opting in to GNU Pubnames/types overrides the default to ensure these are
+    // generated for things like Gold's gdb_index generation.
+  case DICompileUnit::DebugNameTableKind::GNU:
     return true;
-
-  return DD->tuneForGDB() && DD->usePubSections() &&
-         !includeMinimalInlineScopes();
+  case DICompileUnit::DebugNameTableKind::Default:
+    return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
+           !CUNode->isDebugDirectivesOnly();
+  }
+  llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
 }
 
 /// addGlobalName - Add a new global name to the compile unit.
@@ -939,8 +1017,6 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
          "block byref variable without a complex expression");
   if (DV.hasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
-  else if (DV.isBlockByrefVariable())
-    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
   else
     addAddress(Die, dwarf::DW_AT_location, Location);
 }
@@ -1012,12 +1088,27 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
     addFlag(VariableDie, dwarf::DW_AT_artificial);
 }
 
+void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
+                                            DIE &LabelDie) {
+  StringRef Name = Label.getName();
+  if (!Name.empty())
+    addString(LabelDie, dwarf::DW_AT_name, Name);
+  const auto *DILabel = Label.getLabel();
+  addSourceLine(LabelDie, DILabel);
+}
+
 /// Add a Dwarf expression attribute data and value.
 void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
                                const MCExpr *Expr) {
   Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
 }
 
+void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
+                                      const MCExpr *Expr) {
+  Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+               DIEExpr(Expr));
+}
+
 void DwarfCompileUnit::applySubprogramAttributesToDefinition(
     const DISubprogram *SP, DIE &SPDie) {
   auto *SPDecl = SP->getDeclaration();
@@ -1034,3 +1125,12 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
   return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
+
+void DwarfCompileUnit::addAddrTableBase() {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  MCSymbol *Label = DD->getAddressPool().getLabel();
+  addSectionLabel(getUnitDie(),
+                  getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
+                                         : dwarf::DW_AT_GNU_addr_base,
+                  Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 51e1558fe4a3..9ec22f68c12f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 
-#include "DbgValueHistoryCalculator.h"
 #include "DwarfDebug.h"
 #include "DwarfUnit.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -23,6 +22,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -44,6 +44,7 @@ class MDNode;
 class DwarfCompileUnit final : public DwarfUnit {
   /// A numeric ID unique among all CUs in the module
   unsigned UniqueID;
+  bool HasRangeLists = false;
 
   /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
   /// the need to search for it in applyStmtList.
@@ -69,10 +70,6 @@ class DwarfCompileUnit final : public DwarfUnit {
   /// GlobalTypes - A map of globally visible types for this unit.
   StringMap<const DIE *> GlobalTypes;
 
-  // List of range lists for a given compile unit, separate from the ranges for
-  // the CU itself.
-  SmallVector<RangeSpanList, 1> CURangeLists;
-
   // List of ranges for a given compile unit.
   SmallVector<RangeSpan, 2> CURanges;
 
@@ -81,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit {
   const MCSymbol *BaseAddress = nullptr;
 
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
-  DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+  DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
 
   /// DWO ID for correlating skeleton and split units.
   uint64_t DWOId = 0;
@@ -98,16 +95,17 @@ class DwarfCompileUnit final : public DwarfUnit {
     return DU->getAbstractSPDies();
   }
 
-  DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+  DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
     if (isDwoUnit() && !DD->shareAcrossDWOCUs())
-      return AbstractVariables;
-    return DU->getAbstractVariables();
+      return AbstractEntities;
+    return DU->getAbstractEntities();
   }
 
 public:
   DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
                    DwarfDebug *DW, DwarfFile *DWU);
 
+  bool hasRangeLists() const { return HasRangeLists; }
   unsigned getUniqueID() const { return UniqueID; }
 
   DwarfCompileUnit *getSkeleton() const {
@@ -194,30 +192,39 @@ public:
   DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
                             DIE *&ObjectPointer);
 
+  /// Construct a DIE for the given DbgLabel.
+  DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
+
   /// A helper function to create children of a Scope DIE.
   DIE *createScopeChildrenDIE(LexicalScope *Scope,
                               SmallVectorImpl<DIE *> &Children,
                               bool *HasNonScopeChildren = nullptr);
 
   /// Construct a DIE for this subprogram scope.
-  void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
+  DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
+                                   LexicalScope *Scope);
 
   DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
 
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
+  /// Construct a call site entry DIE describing a call within \p Scope to a
+  /// callee described by \p CalleeSP. \p IsTail specifies whether the call is
+  /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the
+  /// function-local offset to PC value after the call instruction.
+  DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP,
+                                 bool IsTail, const MCExpr *PCOffset);
+
   /// Construct import_module DIE.
   DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
 
   void finishSubprogramDefinition(const DISubprogram *SP);
-  void finishVariableDefinition(const DbgVariable &Var);
+  void finishEntityDefinition(const DbgEntity *Entity);
 
   /// Find abstract variable associated with Var.
-  using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
-  DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
-                                           const DILocalVariable *&Cleansed);
-  DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
-  void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope);
+  using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
+  DbgEntity *getExistingAbstractEntity(const DINode *Node);
+  void createAbstractEntity(const DINode *Node, LexicalScope *Scope);
 
   /// Set the skeleton unit associated with this unit.
   void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
@@ -236,6 +243,9 @@ public:
 
   void emitHeader(bool UseOffsets) override;
 
+  /// Add the DW_AT_addr_base attribute to the unit DIE.
+  void addAddrTableBase();
+
   MCSymbol *getLabelBegin() const {
     assert(getSection());
     return LabelBegin;
@@ -285,13 +295,13 @@ public:
   /// Add a Dwarf expression attribute data and value.
   void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
 
+  /// Add an attribute containing an address expression to \p Die.
+  void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr);
+
   void applySubprogramAttributesToDefinition(const DISubprogram *SP,
                                              DIE &SPDie);
 
-  /// getRangeLists - Get the vector of range lists.
-  const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
-    return (Skeleton ? Skeleton : this)->CURangeLists;
-  }
+  void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie);
 
   /// getRanges - Get the list of ranges for this unit.
   const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 19b3afef34b5..1de2ffb6cfa1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
@@ -130,11 +131,6 @@ DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
                  cl::init(Default));
 
 static cl::opt<bool>
-    NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden,
-                       cl::desc("Disable emission of DWARF pub sections."),
-                       cl::init(false));
-
-static cl::opt<bool>
     NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
                          cl::desc("Disable emission .debug_ranges section."),
                          cl::init(false));
@@ -188,12 +184,12 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
 }
 
 bool DbgVariable::isBlockByrefVariable() const {
-  assert(Var && "Invalid complex DbgVariable!");
-  return Var->getType().resolve()->isBlockByrefStruct();
+  assert(getVariable() && "Invalid complex DbgVariable!");
+  return getVariable()->getType().resolve()->isBlockByrefStruct();
 }
 
 const DIType *DbgVariable::getType() const {
-  DIType *Ty = Var->getType().resolve();
+  DIType *Ty = getVariable()->getType().resolve();
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
   if (Ty->isBlockByrefStruct()) {
@@ -246,7 +242,7 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
                         return A.Expr->isFragment();
                       }) &&
          "multiple FI expressions without DW_OP_LLVM_fragment");
-  llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+  llvm::sort(FrameIndexExprs,
              [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
                return A.Expr->getFragmentInfo()->OffsetInBits <
                       B.Expr->getFragmentInfo()->OffsetInBits;
@@ -258,8 +254,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
 void DbgVariable::addMMIEntry(const DbgVariable &V) {
   assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
   assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
-  assert(V.Var == Var && "conflicting variable");
-  assert(V.IA == IA && "conflicting inlined-at location");
+  assert(V.getVariable() == getVariable() && "conflicting variable");
+  assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
 
   assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
   assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
@@ -355,7 +351,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfVersion =
       TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
 
-  UsePubSections = !NoDwarfPubSections && !TT.isNVPTX();
   UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
 
   // Use sections as references. Force for NVPTX.
@@ -421,30 +416,35 @@ static StringRef getObjCMethodName(StringRef In) {
 }
 
 // Add the various names to the Dwarf accelerator table names.
-void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
+void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
+                                    const DISubprogram *SP, DIE &Die) {
+  if (getAccelTableKind() != AccelTableKind::Apple &&
+      CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+    return;
+
   if (!SP->isDefinition())
     return;
 
   if (SP->getName() != "")
-    addAccelName(SP->getName(), Die);
+    addAccelName(CU, SP->getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output that as
   // well into the name table. Only do that if we are going to actually emit
   // that name.
   if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
       (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
-    addAccelName(SP->getLinkageName(), Die);
+    addAccelName(CU, SP->getLinkageName(), Die);
 
   // If this is an Objective-C selector name add it to the ObjC accelerator
   // too.
   if (isObjCClass(SP->getName())) {
     StringRef Class, Category;
     getObjCClassCategory(SP->getName(), Class, Category);
-    addAccelObjC(Class, Die);
+    addAccelObjC(CU, Class, Die);
     if (Category != "")
-      addAccelObjC(Category, Die);
+      addAccelObjC(CU, Category, Die);
     // Also add the base method name to the name table.
-    addAccelName(getObjCMethodName(SP->getName()), Die);
+    addAccelName(CU, getObjCMethodName(SP->getName()), Die);
   }
 }
 
@@ -503,6 +503,64 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
   }
 }
 
+void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
+                                            DwarfCompileUnit &CU, DIE &ScopeDIE,
+                                            const MachineFunction &MF) {
+  // Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if
+  // the subprogram is required to have one.
+  if (!SP.areAllCallsDescribed() || !SP.isDefinition())
+    return;
+
+  // Use DW_AT_call_all_calls to express that call site entries are present
+  // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
+  // because one of its requirements is not met: call site entries for
+  // optimized-out calls are elided.
+  CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls);
+
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "TargetInstrInfo not found: cannot label tail calls");
+
+  // Emit call site entries for each call or tail call in the function.
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB.instrs()) {
+      // Skip instructions which aren't calls. Both calls and tail-calling jump
+      // instructions (e.g TAILJMPd64) are classified correctly here.
+      if (!MI.isCall())
+        continue;
+
+      // TODO: Add support for targets with delay slots (see: beginInstruction).
+      if (MI.hasDelaySlot())
+        return;
+
+      // If this is a direct call, find the callee's subprogram.
+      const MachineOperand &CalleeOp = MI.getOperand(0);
+      if (!CalleeOp.isGlobal())
+        continue;
+      const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
+      if (!CalleeDecl || !CalleeDecl->getSubprogram())
+        continue;
+
+      // TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
+      // TODO: Add support for indirect calls.
+
+      bool IsTail = TII->isTailCall(MI);
+
+      // For tail calls, no return PC information is needed. For regular calls,
+      // the return PC is needed to disambiguate paths in the call graph which
+      // could lead to some target function.
+      const MCExpr *PCOffset =
+          IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI);
+
+      assert((IsTail || PCOffset) && "Call without return PC information");
+      LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
+                        << CalleeDecl->getName() << (IsTail ? " [tail]" : "")
+                        << "\n");
+      CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(),
+                                   IsTail, PCOffset);
+    }
+  }
+}
+
 void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
   if (!U.hasDwarfPubSections())
     return;
@@ -510,41 +568,14 @@ void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
   U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
 }
 
-// Create new DwarfCompileUnit for the given metadata node with tag
-// DW_TAG_compile_unit.
-DwarfCompileUnit &
-DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
-  if (auto *CU = CUMap.lookup(DIUnit))
-    return *CU;
-  StringRef FN = DIUnit->getFilename();
-  CompilationDir = DIUnit->getDirectory();
-
-  auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
-      InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
-  DwarfCompileUnit &NewCU = *OwnedUnit;
+void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
+                                      DwarfCompileUnit &NewCU) {
   DIE &Die = NewCU.getUnitDie();
-  InfoHolder.addUnit(std::move(OwnedUnit));
-  if (useSplitDwarf()) {
-    NewCU.setSkeleton(constructSkeletonCU(NewCU));
-    NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                  Asm->TM.Options.MCOptions.SplitDwarfFile);
-  }
-
-  for (auto *IE : DIUnit->getImportedEntities())
-    NewCU.addImportedEntity(IE);
-
-  // LTO with assembly output shares a single line table amongst multiple CUs.
-  // To avoid the compilation directory being ambiguous, let the line table
-  // explicitly describe the directory of all files, never relying on the
-  // compilation directory.
-  if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
-    Asm->OutStreamer->emitDwarfFile0Directive(
-        CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()),
-        DIUnit->getSource(), NewCU.getUniqueID());
+  StringRef FN = DIUnit->getFilename();
 
   StringRef Producer = DIUnit->getProducer();
   StringRef Flags = DIUnit->getFlags();
-  if (!Flags.empty()) {
+  if (!Flags.empty() && !useAppleExtensionAttributes()) {
     std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
     NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
   } else
@@ -582,11 +613,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
                     dwarf::DW_FORM_data1, RVer);
   }
 
-  if (useSplitDwarf())
-    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
-  else
-    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
-
   if (DIUnit->getDWOId()) {
     // This CU is either a clang module DWO or a skeleton CU.
     NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
@@ -596,9 +622,44 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
       NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
                       DIUnit->getSplitDebugFilename());
   }
+}
+// Create new DwarfCompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+DwarfCompileUnit &
+DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
+  if (auto *CU = CUMap.lookup(DIUnit))
+    return *CU;
+
+  CompilationDir = DIUnit->getDirectory();
+
+  auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+      InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
+  DwarfCompileUnit &NewCU = *OwnedUnit;
+  InfoHolder.addUnit(std::move(OwnedUnit));
+
+  for (auto *IE : DIUnit->getImportedEntities())
+    NewCU.addImportedEntity(IE);
+
+  // LTO with assembly output shares a single line table amongst multiple CUs.
+  // To avoid the compilation directory being ambiguous, let the line table
+  // explicitly describe the directory of all files, never relying on the
+  // compilation directory.
+  if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+    Asm->OutStreamer->emitDwarfFile0Directive(
+        CompilationDir, DIUnit->getFilename(),
+        NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
+        NewCU.getUniqueID());
+
+  if (useSplitDwarf()) {
+    NewCU.setSkeleton(constructSkeletonCU(NewCU));
+    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+  } else {
+    finishUnitAttributes(DIUnit, NewCU);
+    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
+  }
 
   CUMap.insert({DIUnit, &NewCU});
-  CUDieMap.insert({&Die, &NewCU});
+  CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
   return NewCU;
 }
 
@@ -613,22 +674,21 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
 /// Sort and unique GVEs by comparing their fragment offset.
 static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
 sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
-  llvm::sort(GVEs.begin(), GVEs.end(),
-             [](DwarfCompileUnit::GlobalExpr A,
-                DwarfCompileUnit::GlobalExpr B) {
-               // Sort order: first null exprs, then exprs without fragment
-               // info, then sort by fragment offset in bits.
-               // FIXME: Come up with a more comprehensive comparator so
-               // the sorting isn't non-deterministic, and so the following
-               // std::unique call works correctly.
-               if (!A.Expr || !B.Expr)
-                 return !!B.Expr;
-               auto FragmentA = A.Expr->getFragmentInfo();
-               auto FragmentB = B.Expr->getFragmentInfo();
-               if (!FragmentA || !FragmentB)
-                 return !!FragmentB;
-               return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
-             });
+  llvm::sort(
+      GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+        // Sort order: first null exprs, then exprs without fragment
+        // info, then sort by fragment offset in bits.
+        // FIXME: Come up with a more comprehensive comparator so
+        // the sorting isn't non-deterministic, and so the following
+        // std::unique call works correctly.
+        if (!A.Expr || !B.Expr)
+          return !!B.Expr;
+        auto FragmentA = A.Expr->getFragmentInfo();
+        auto FragmentB = B.Expr->getFragmentInfo();
+        if (!FragmentA || !FragmentB)
+          return !!FragmentB;
+        return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+      });
   GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
                          [](DwarfCompileUnit::GlobalExpr A,
                             DwarfCompileUnit::GlobalExpr B) {
@@ -644,15 +704,18 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
 void DwarfDebug::beginModule() {
   NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
                      DWARFGroupDescription, TimePassesIsEnabled);
-  if (DisableDebugInfoPrinting)
+  if (DisableDebugInfoPrinting) {
+    MMI->setDebugInfoAvailability(false);
     return;
+  }
 
   const Module *M = MMI->getModule();
 
   unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
                                        M->debug_compile_units_end());
   // Tell MMI whether we have debug info.
-  MMI->setDebugInfoAvailability(NumDebugCUs > 0);
+  assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) &&
+         "DebugInfoAvailabilty initialized unexpectedly");
   SingleCU = NumDebugCUs == 1;
   DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
       GVMap;
@@ -670,11 +733,24 @@ void DwarfDebug::beginModule() {
     (useSplitDwarf() ? SkeletonHolder : InfoHolder)
         .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
 
-  // Create the symbol that designates the start of the DWARF v5 range list
-  // table. It is located past the header and before the offsets table.
-  if (getDwarfVersion() >= 5)
-    (useSplitDwarf() ? SkeletonHolder : InfoHolder)
-        .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base"));
+
+  // Create the symbols that designates the start of the DWARF v5 range list
+  // and locations list tables. They are located past the table headers.
+  if (getDwarfVersion() >= 5) {
+    DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+    Holder.setRnglistsTableBaseSym(
+        Asm->createTempSymbol("rnglists_table_base"));
+    Holder.setLoclistsTableBaseSym(
+        Asm->createTempSymbol("loclists_table_base"));
+
+    if (useSplitDwarf())
+      InfoHolder.setRnglistsTableBaseSym(
+          Asm->createTempSymbol("rnglists_dwo_table_base"));
+  }
+
+  // Create the symbol that points to the first entry following the debug
+  // address table (.debug_addr) header.
+  AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
 
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
     // FIXME: Move local imported entities into a list attached to the
@@ -728,16 +804,16 @@ void DwarfDebug::beginModule() {
   }
 }
 
-void DwarfDebug::finishVariableDefinitions() {
-  for (const auto &Var : ConcreteVariables) {
-    DIE *VariableDie = Var->getDIE();
-    assert(VariableDie);
+void DwarfDebug::finishEntityDefinitions() {
+  for (const auto &Entity : ConcreteEntities) {
+    DIE *Die = Entity->getDIE();
+    assert(Die);
     // FIXME: Consider the time-space tradeoff of just storing the unit pointer
-    // in the ConcreteVariables list, rather than looking it up again here.
+    // in the ConcreteEntities list, rather than looking it up again here.
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
-    DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
+    DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie());
     assert(Unit);
-    Unit->finishVariableDefinition(*Var);
+    Unit->finishEntityDefinition(Entity.get());
   }
 }
 
@@ -755,7 +831,7 @@ void DwarfDebug::finalizeModuleInfo() {
 
   finishSubprogramDefinitions();
 
-  finishVariableDefinitions();
+  finishEntityDefinitions();
 
   // Include the DWO file name in the hash if there's more than one CU.
   // This handles ThinLTO's situation where imported CUs may very easily be
@@ -768,6 +844,8 @@ void DwarfDebug::finalizeModuleInfo() {
   // all other generation.
   for (const auto &P : CUMap) {
     auto &TheCU = *P.second;
+    if (TheCU.getCUNode()->isDebugDirectivesOnly())
+      continue;
     // Emit DW_AT_containing_type attribute to connect types with their
     // vtable holding type.
     TheCU.constructContainingTypeDIEs();
@@ -776,7 +854,12 @@ void DwarfDebug::finalizeModuleInfo() {
     // If we're splitting the dwarf out now that we've got the entire
     // CU then add the dwo id to it.
     auto *SkCU = TheCU.getSkeleton();
-    if (useSplitDwarf()) {
+    if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) {
+      finishUnitAttributes(TheCU.getCUNode(), TheCU);
+      TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+                      Asm->TM.Options.MCOptions.SplitDwarfFile);
+      SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+                      Asm->TM.Options.MCOptions.SplitDwarfFile);
       // Emit a unique identifier for this CU.
       uint64_t ID =
           DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
@@ -789,18 +872,14 @@ void DwarfDebug::finalizeModuleInfo() {
         SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
                       dwarf::DW_FORM_data8, ID);
       }
-      // We don't keep track of which addresses are used in which CU so this
-      // is a bit pessimistic under LTO.
-      if (!AddrPool.isEmpty()) {
-        const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
-        SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
-                              Sym, Sym);
-      }
-      if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) {
+
+      if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) {
         const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
         SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
                               Sym, Sym);
       }
+    } else if (SkCU) {
+      finishUnitAttributes(SkCU->getCUNode(), *SkCU);
     }
 
     // If we have code split among multiple sections or non-contiguous
@@ -810,6 +889,14 @@ void DwarfDebug::finalizeModuleInfo() {
     // .subsections_via_symbols in mach-o. This would mean turning on
     // ranges for all subprogram DIEs for mach-o.
     DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+
+    // We don't keep track of which addresses are used in which CU so this
+    // is a bit pessimistic under LTO.
+    if (!AddrPool.isEmpty() &&
+        (getDwarfVersion() >= 5 ||
+         (SkCU && !empty(TheCU.getUnitDie().children()))))
+      U.addAddrTableBase();
+
     if (unsigned NumRanges = TheCU.getRanges().size()) {
       if (NumRanges > 1 && useRangesSection())
         // A DW_AT_low_pc attribute may also be specified in combination with
@@ -822,9 +909,13 @@ void DwarfDebug::finalizeModuleInfo() {
       U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
     }
 
-    if (getDwarfVersion() >= 5 && !useSplitDwarf() &&
-        !U.getRangeLists().empty())
-      U.addRnglistsBase();
+    if (getDwarfVersion() >= 5) {
+      if (U.hasRangeLists())
+        U.addRnglistsBase();
+
+      if (!DebugLocs.getLists().empty() && !useSplitDwarf())
+        U.addLoclistsBase();
+    }
 
     auto *CUNode = cast<DICompileUnit>(P.first);
     // If compile Unit has macros, emit "DW_AT_macro_info" attribute.
@@ -888,9 +979,11 @@ void DwarfDebug::endModule() {
     emitDebugInfoDWO();
     emitDebugAbbrevDWO();
     emitDebugLineDWO();
-    emitDebugAddr();
+    emitDebugRangesDWO();
   }
 
+  emitDebugAddr();
+
   // Emit info into the dwarf accelerator table sections.
   switch (getAccelTableKind()) {
   case AccelTableKind::Apple:
@@ -915,38 +1008,37 @@ void DwarfDebug::endModule() {
   // FIXME: AbstractVariables.clear();
 }
 
-void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
-                                                 const MDNode *ScopeNode) {
-  const DILocalVariable *Cleansed = nullptr;
-  if (CU.getExistingAbstractVariable(IV, Cleansed))
+void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
+                                               const DINode *Node,
+                                               const MDNode *ScopeNode) {
+  if (CU.getExistingAbstractEntity(Node))
     return;
 
-  CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+  CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
                                        cast<DILocalScope>(ScopeNode)));
 }
 
-void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
-    InlinedVariable IV, const MDNode *ScopeNode) {
-  const DILocalVariable *Cleansed = nullptr;
-  if (CU.getExistingAbstractVariable(IV, Cleansed))
+void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+    const DINode *Node, const MDNode *ScopeNode) {
+  if (CU.getExistingAbstractEntity(Node))
     return;
 
   if (LexicalScope *Scope =
           LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
-    CU.createAbstractVariable(Cleansed, Scope);
+    CU.createAbstractEntity(Node, Scope);
 }
 
 // Collect variable information from side table maintained by MF.
 void DwarfDebug::collectVariableInfoFromMFTable(
-    DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
-  SmallDenseMap<InlinedVariable, DbgVariable *> MFVars;
+    DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
+  SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
   for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
            "Expected inlined-at fields to agree");
 
-    InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+    InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt());
     Processed.insert(Var);
     LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
@@ -954,14 +1046,15 @@ void DwarfDebug::collectVariableInfoFromMFTable(
     if (!Scope)
       continue;
 
-    ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
-    auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second);
+    ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
+    auto RegVar = llvm::make_unique<DbgVariable>(
+                    cast<DILocalVariable>(Var.first), Var.second);
     RegVar->initializeMMI(VI.Expr, VI.Slot);
     if (DbgVariable *DbgVar = MFVars.lookup(Var))
       DbgVar->addMMIEntry(*RegVar);
     else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
       MFVars.insert({Var, RegVar.get()});
-      ConcreteVariables.push_back(std::move(RegVar));
+      ConcreteEntities.push_back(std::move(RegVar));
     }
   }
 }
@@ -1087,6 +1180,18 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
 
     auto Value = getDebugLocValue(Begin);
+
+    // Omit entries with empty ranges as they do not have any effect in DWARF.
+    if (StartLabel == EndLabel) {
+      // If this is a fragment, we must still add the value to the list of
+      // open ranges, since it may describe non-overlapping parts of the
+      // variable.
+      if (DIExpr->isFragment())
+        OpenRanges.push_back(Value);
+      LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
+      continue;
+    }
+
     DebugLocEntry Loc(StartLabel, EndLabel, Value);
     bool couldMerge = false;
 
@@ -1126,14 +1231,26 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
   }
 }
 
-DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
-                                                LexicalScope &Scope,
-                                                InlinedVariable IV) {
-  ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
-  ConcreteVariables.push_back(
-      llvm::make_unique<DbgVariable>(IV.first, IV.second));
-  InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
-  return ConcreteVariables.back().get();
+DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
+                                            LexicalScope &Scope,
+                                            const DINode *Node,
+                                            const DILocation *Location,
+                                            const MCSymbol *Sym) {
+  ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
+  if (isa<const DILocalVariable>(Node)) {
+    ConcreteEntities.push_back(
+        llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+                                       Location));
+    InfoHolder.addScopeVariable(&Scope,
+        cast<DbgVariable>(ConcreteEntities.back().get()));
+  } else if (isa<const DILabel>(Node)) {
+    ConcreteEntities.push_back(
+        llvm::make_unique<DbgLabel>(cast<const DILabel>(Node),
+                                    Location, Sym));
+    InfoHolder.addScopeLabel(&Scope,
+        cast<DbgLabel>(ConcreteEntities.back().get()));
+  }
+  return ConcreteEntities.back().get();
 }
 
 /// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
@@ -1195,14 +1312,14 @@ static bool validThroughout(LexicalScopes &LScopes,
 }
 
 // Find variables for each lexical scope.
-void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
-                                     const DISubprogram *SP,
-                                     DenseSet<InlinedVariable> &Processed) {
+void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
+                                   const DISubprogram *SP,
+                                   DenseSet<InlinedEntity> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMFTable(TheCU, Processed);
 
   for (const auto &I : DbgValues) {
-    InlinedVariable IV = I.first;
+    InlinedEntity IV = I.first;
     if (Processed.count(IV))
       continue;
 
@@ -1212,16 +1329,18 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
       continue;
 
     LexicalScope *Scope = nullptr;
+    const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first);
     if (const DILocation *IA = IV.second)
-      Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
+      Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA);
     else
-      Scope = LScopes.findLexicalScope(IV.first->getScope());
+      Scope = LScopes.findLexicalScope(LocalVar->getScope());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
     Processed.insert(IV);
-    DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV);
+    DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
+                                            *Scope, LocalVar, IV.second));
 
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
@@ -1247,20 +1366,53 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
     // unique identifiers, so don't bother resolving the type with the
     // identifier map.
     const DIBasicType *BT = dyn_cast<DIBasicType>(
-        static_cast<const Metadata *>(IV.first->getType()));
+        static_cast<const Metadata *>(LocalVar->getType()));
 
     // Finalize the entry by lowering it into a DWARF bytestream.
     for (auto &Entry : Entries)
       Entry.finalize(*Asm, List, BT);
   }
 
-  // Collect info for variables that were optimized out.
+  // For each InlinedEntity collected from DBG_LABEL instructions, convert to
+  // DWARF-related DbgLabel.
+  for (const auto &I : DbgLabels) {
+    InlinedEntity IL = I.first;
+    const MachineInstr *MI = I.second;
+    if (MI == nullptr)
+      continue;
+
+    LexicalScope *Scope = nullptr;
+    const DILabel *Label = cast<DILabel>(IL.first);
+    // Get inlined DILocation if it is inlined label.
+    if (const DILocation *IA = IL.second)
+      Scope = LScopes.findInlinedScope(Label->getScope(), IA);
+    else
+      Scope = LScopes.findLexicalScope(Label->getScope());
+    // If label scope is not found then skip this label.
+    if (!Scope)
+      continue;
+
+    Processed.insert(IL);
+    /// At this point, the temporary label is created.
+    /// Save the temporary label to DbgLabel entity to get the
+    /// actually address when generating Dwarf DIE.
+    MCSymbol *Sym = getLabelBeforeInsn(MI);
+    createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
+  }
+
+  // Collect info for variables/labels that were optimized out.
   for (const DINode *DN : SP->getRetainedNodes()) {
+    if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+      continue;
+    LexicalScope *Scope = nullptr;
     if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
-      if (Processed.insert(InlinedVariable(DV, nullptr)).second)
-        if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
-          createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
+      Scope = LScopes.findLexicalScope(DV->getScope());
+    } else if (auto *DL = dyn_cast<DILabel>(DN)) {
+      Scope = LScopes.findLexicalScope(DL->getScope());
     }
+
+    if (Scope)
+      createConcreteEntity(TheCU, *Scope, DN, nullptr);
   }
 }
 
@@ -1284,6 +1436,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   unsigned LastAsmLine =
       Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
 
+  // Request a label after the call in order to emit AT_return_pc information
+  // in call site entries. TODO: Add support for targets with delay slots.
+  if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot())
+    requestLabelAfterInsn(MI);
+
   if (DL == PrevInstLoc) {
     // If we have an ongoing unspecified location, nothing to do here.
     if (!DL)
@@ -1416,9 +1573,14 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   assert(!FnScope || SP == FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
+  if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
+    PrevLabel = nullptr;
+    CurFn = nullptr;
+    return;
+  }
 
-  DenseSet<InlinedVariable> ProcessedVars;
-  collectVariableInfo(TheCU, SP, ProcessedVars);
+  DenseSet<InlinedEntity> Processed;
+  collectEntityInfo(TheCU, SP, Processed);
 
   // Add the range of this function to the list of ranges for the CU.
   TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
@@ -1442,31 +1604,41 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
   for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
     auto *SP = cast<DISubprogram>(AScope->getScopeNode());
     for (const DINode *DN : SP->getRetainedNodes()) {
-      if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
-        // Collect info for variables that were optimized out.
-        if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
-          continue;
-        ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
-                                        DV->getScope());
-        assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
-               && "ensureAbstractVariableIsCreated inserted abstract scopes");
-      }
+      if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+        continue;
+
+      const MDNode *Scope = nullptr;
+      if (auto *DV = dyn_cast<DILocalVariable>(DN))
+        Scope = DV->getScope();
+      else if (auto *DL = dyn_cast<DILabel>(DN))
+        Scope = DL->getScope();
+      else
+        llvm_unreachable("Unexpected DI type!");
+
+      // Collect info for variables/labels that were optimized out.
+      ensureAbstractEntityIsCreated(TheCU, DN, Scope);
+      assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+             && "ensureAbstractEntityIsCreated inserted abstract scopes");
     }
     constructAbstractSubprogramScopeDIE(TheCU, AScope);
   }
 
   ProcessedSPNodes.insert(SP);
-  TheCU.constructSubprogramScopeDIE(SP, FnScope);
+  DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope);
   if (auto *SkelCU = TheCU.getSkeleton())
     if (!LScopes.getAbstractScopesList().empty() &&
         TheCU.getCUNode()->getSplitDebugInlining())
       SkelCU->constructSubprogramScopeDIE(SP, FnScope);
 
+  // Construct call site entries.
+  constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
+
   // Clear debug info
   // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
   // DbgVariables except those that are also in AbstractVariables (since they
   // can be used cross-function)
   InfoHolder.getScopeVariables().clear();
+  InfoHolder.getScopeLabels().clear();
   PrevLabel = nullptr;
   CurFn = nullptr;
 }
@@ -1530,8 +1702,6 @@ void DwarfDebug::emitAccelDebugNames() {
   if (getUnits().empty())
     return;
 
-  Asm->OutStreamer->SwitchSection(
-      Asm->getObjFileLowering().getDwarfDebugNamesSection());
   emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
 }
 
@@ -1636,7 +1806,8 @@ void DwarfDebug::emitDebugPubSections() {
     if (!TheU->hasDwarfPubSections())
       continue;
 
-    bool GnuStyle = TheU->getCUNode()->getGnuPubnames();
+    bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
+                    DICompileUnit::DebugNameTableKind::GNU;
 
     Asm->OutStreamer->SwitchSection(
         GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
@@ -1692,8 +1863,8 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
     if (GnuStyle) {
       dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
       Asm->OutStreamer->AddComment(
-          Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
-          dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+          Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) +
+          ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
       Asm->emitInt8(Desc.toBits());
     }
 
@@ -1759,6 +1930,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
 void DebugLocEntry::finalize(const AsmPrinter &AP,
                              DebugLocStream::ListBuilder &List,
                              const DIBasicType *BT) {
+  assert(Begin != End && "unexpected location list entry with empty range");
   DebugLocStream::EntryBuilder Entry(List, Begin, End);
   BufferByteStreamer Streamer = Entry.getStreamer();
   DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
@@ -1791,25 +1963,119 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
   emitDebugLocEntry(Streamer, Entry);
 }
 
-// Emit locations into the debug loc section.
+// Emit the common part of the DWARF 5 range/locations list tables header.
+static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder,
+                                      MCSymbol *TableStart,
+                                      MCSymbol *TableEnd) {
+  // Build the table header, which starts with the length field.
+  Asm->OutStreamer->AddComment("Length");
+  Asm->EmitLabelDifference(TableEnd, TableStart, 4);
+  Asm->OutStreamer->EmitLabel(TableStart);
+  // Version number (DWARF v5 and later).
+  Asm->OutStreamer->AddComment("Version");
+  Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
+  // Address size.
+  Asm->OutStreamer->AddComment("Address size");
+  Asm->emitInt8(Asm->MAI->getCodePointerSize());
+  // Segment selector size.
+  Asm->OutStreamer->AddComment("Segment selector size");
+  Asm->emitInt8(0);
+}
+
+// Emit the header of a DWARF 5 range list table list table. Returns the symbol
+// that designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
+                                         const DwarfFile &Holder) {
+  MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
+  MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
+  emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+
+  Asm->OutStreamer->AddComment("Offset entry count");
+  Asm->emitInt32(Holder.getRangeLists().size());
+  Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
+
+  for (const RangeSpanList &List : Holder.getRangeLists())
+    Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(),
+                             4);
+
+  return TableEnd;
+}
+
+// Emit the header of a DWARF 5 locations list table. Returns the symbol that
+// designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
+                                         const DwarfFile &Holder) {
+  MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start");
+  MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end");
+  emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+
+  // FIXME: Generate the offsets table and use DW_FORM_loclistx with the
+  // DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
+  Asm->OutStreamer->AddComment("Offset entry count");
+  Asm->emitInt32(0);
+  Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym());
+
+  return TableEnd;
+}
+
+// Emit locations into the .debug_loc/.debug_rnglists section.
 void DwarfDebug::emitDebugLoc() {
   if (DebugLocs.getLists().empty())
     return;
 
-  // Start the dwarf loc section.
-  Asm->OutStreamer->SwitchSection(
-      Asm->getObjFileLowering().getDwarfLocSection());
+  bool IsLocLists = getDwarfVersion() >= 5;
+  MCSymbol *TableEnd = nullptr;
+  if (IsLocLists) {
+    Asm->OutStreamer->SwitchSection(
+        Asm->getObjFileLowering().getDwarfLoclistsSection());
+    TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
+                                                            : InfoHolder);
+  } else {
+    Asm->OutStreamer->SwitchSection(
+        Asm->getObjFileLowering().getDwarfLocSection());
+  }
+
   unsigned char Size = Asm->MAI->getCodePointerSize();
   for (const auto &List : DebugLocs.getLists()) {
     Asm->OutStreamer->EmitLabel(List.Label);
+
     const DwarfCompileUnit *CU = List.CU;
+    const MCSymbol *Base = CU->getBaseAddress();
     for (const auto &Entry : DebugLocs.getEntries(List)) {
-      // Set up the range. This range is relative to the entry point of the
-      // compile unit. This is a hard coded 0 for low_pc when we're emitting
-      // ranges, or the DW_AT_low_pc on the compile unit otherwise.
-      if (auto *Base = CU->getBaseAddress()) {
-        Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
-        Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+      if (Base) {
+        // Set up the range. This range is relative to the entry point of the
+        // compile unit. This is a hard coded 0 for low_pc when we're emitting
+        // ranges, or the DW_AT_low_pc on the compile unit otherwise.
+        if (IsLocLists) {
+          Asm->OutStreamer->AddComment("DW_LLE_offset_pair");
+          Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1);
+          Asm->OutStreamer->AddComment("  starting offset");
+          Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base);
+          Asm->OutStreamer->AddComment("  ending offset");
+          Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base);
+        } else {
+          Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
+          Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+        }
+
+        emitDebugLocEntryLocation(Entry);
+        continue;
+      }
+
+      // We have no base address.
+      if (IsLocLists) {
+        // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or
+        // DW_LLE_startx_length in case if there is only a single range.
+        // That should reduce the size of the debug data emited.
+        // For now just use the DW_LLE_startx_length for all cases.
+        Asm->OutStreamer->AddComment("DW_LLE_startx_length");
+        Asm->emitInt8(dwarf::DW_LLE_startx_length);
+        Asm->OutStreamer->AddComment("  start idx");
+        Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym));
+        Asm->OutStreamer->AddComment("  length");
+        Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym);
       } else {
         Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
         Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
@@ -1817,9 +2083,20 @@ void DwarfDebug::emitDebugLoc() {
 
       emitDebugLocEntryLocation(Entry);
     }
-    Asm->OutStreamer->EmitIntValue(0, Size);
-    Asm->OutStreamer->EmitIntValue(0, Size);
+
+    if (IsLocLists) {
+      // .debug_loclists section ends with DW_LLE_end_of_list.
+      Asm->OutStreamer->AddComment("DW_LLE_end_of_list");
+      Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1);
+    } else {
+      // Terminate the .debug_loc list with two 0 values.
+      Asm->OutStreamer->EmitIntValue(0, Size);
+      Asm->OutStreamer->EmitIntValue(0, Size);
+    }
   }
+
+  if (TableEnd)
+    Asm->OutStreamer->EmitLabel(TableEnd);
 }
 
 void DwarfDebug::emitDebugLocDWO() {
@@ -1828,10 +2105,13 @@ void DwarfDebug::emitDebugLocDWO() {
   for (const auto &List : DebugLocs.getLists()) {
     Asm->OutStreamer->EmitLabel(List.Label);
     for (const auto &Entry : DebugLocs.getEntries(List)) {
-      // Just always use start_length for now - at least that's one address
-      // rather than two. We could get fancier and try to, say, reuse an
-      // address we know we've emitted elsewhere (the start of the function?
-      // The start of the CU or CU subrange that encloses this range?)
+      // GDB only supports startx_length in pre-standard split-DWARF.
+      // (in v5 standard loclists, it currently* /only/ supports base_address +
+      // offset_pair, so the implementations can't really share much since they
+      // need to use different representations)
+      // * as of October 2018, at least
+      // Ideally/in v5, this could use SectionLabels to reuse existing addresses
+      // in the address pool to minimize object size/relocations.
       Asm->emitInt8(dwarf::DW_LLE_startx_length);
       unsigned idx = AddrPool.getIndex(Entry.BeginSym);
       Asm->EmitULEB128(idx);
@@ -1939,10 +2219,9 @@ void DwarfDebug::emitDebugARanges() {
   }
 
   // Sort the CU list (again, to ensure consistent output order).
-  llvm::sort(CUs.begin(), CUs.end(),
-             [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
-               return A->getUniqueID() < B->getUniqueID();
-             });
+  llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+    return A->getUniqueID() < B->getUniqueID();
+  });
 
   // Emit an arange table for each CU we used.
   for (DwarfCompileUnit *CU : CUs) {
@@ -2006,10 +2285,10 @@ void DwarfDebug::emitDebugARanges() {
 }
 
 /// Emit a single range list. We handle both DWARF v5 and earlier.
-static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
+static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
                           const RangeSpanList &List) {
 
-  auto DwarfVersion = CU->getDwarfVersion();
+  auto DwarfVersion = DD.getDwarfVersion();
   // Emit our symbol so we can find the beginning of the range.
   Asm->OutStreamer->EmitLabel(List.getSym());
   // Gather all the ranges that apply to the same section so they can share
@@ -2021,7 +2300,8 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
   for (const RangeSpan &Range : List.getRanges())
     SectionRanges[&Range.getStart()->getSection()].push_back(&Range);
 
-  auto *CUBase = CU->getBaseAddress();
+  const DwarfCompileUnit &CU = List.getCU();
+  const MCSymbol *CUBase = CU.getBaseAddress();
   bool BaseIsSet = false;
   for (const auto &P : SectionRanges) {
     // Don't bother with a base address entry if there's only one range in
@@ -2031,19 +2311,23 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
     // or optnone where there may be holes in a single CU's section
     // contributions.
     auto *Base = CUBase;
-    if (!Base && P.second.size() > 1 &&
-        (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) {
+    if (!Base && (P.second.size() > 1 || DwarfVersion < 5) &&
+        (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) {
       BaseIsSet = true;
       // FIXME/use care: This may not be a useful base address if it's not
       // the lowest address/range in this object.
       Base = P.second.front()->getStart();
       if (DwarfVersion >= 5) {
-        Asm->OutStreamer->AddComment("DW_RLE_base_address");
-        Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1);
-      } else
+        Base = DD.getSectionLabel(&Base->getSection());
+        Asm->OutStreamer->AddComment("DW_RLE_base_addressx");
+        Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1);
+        Asm->OutStreamer->AddComment("  base address index");
+        Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
+      } else {
         Asm->OutStreamer->EmitIntValue(-1, Size);
-      Asm->OutStreamer->AddComment("  base address");
-      Asm->OutStreamer->EmitSymbolValue(Base, Size);
+        Asm->OutStreamer->AddComment("  base address");
+        Asm->OutStreamer->EmitSymbolValue(Base, Size);
+      }
     } else if (BaseIsSet && DwarfVersion < 5) {
       BaseIsSet = false;
       assert(!Base);
@@ -2070,10 +2354,10 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
           Asm->EmitLabelDifference(End, Base, Size);
         }
       } else if (DwarfVersion >= 5) {
-        Asm->OutStreamer->AddComment("DW_RLE_start_length");
-        Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1);
-        Asm->OutStreamer->AddComment("  start");
-        Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+        Asm->OutStreamer->AddComment("DW_RLE_startx_length");
+        Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1);
+        Asm->OutStreamer->AddComment("  start index");
+        Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
         Asm->OutStreamer->AddComment("  length");
         Asm->EmitLabelDifferenceAsULEB128(End, Begin);
       } else {
@@ -2092,31 +2376,13 @@ static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
   }
 }
 
-// Emit the header of a DWARF 5 range list table. Returns the symbol that
-// designates the end of the table for the caller to emit when the table is
-// complete.
-static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) {
-  // The length is described by a starting label right after the length field
-  // and an end label.
-  MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
-  MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
-  // Build the range table header, which starts with the length field.
-  Asm->EmitLabelDifference(TableEnd, TableStart, 4);
-  Asm->OutStreamer->EmitLabel(TableStart);
-  // Version number (DWARF v5 and later).
-  Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
-  // Address size.
-  Asm->emitInt8(Asm->MAI->getCodePointerSize());
-  // Segment selector size.
-  Asm->emitInt8(0);
-
-  MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym();
+static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
+                                const DwarfFile &Holder, MCSymbol *TableEnd) {
+  for (const RangeSpanList &List : Holder.getRangeLists())
+    emitRangeList(DD, Asm, List);
 
-  // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the
-  // DW_AT_ranges attribute. Until then set the number of offsets to 0.
-  Asm->emitInt32(0);
-  Asm->OutStreamer->EmitLabel(RnglistTableBaseSym);
-  return TableEnd;
+  if (TableEnd)
+    Asm->OutStreamer->EmitLabel(TableEnd);
 }
 
 /// Emit address ranges into the .debug_ranges section or into the DWARF v5
@@ -2125,46 +2391,52 @@ void DwarfDebug::emitDebugRanges() {
   if (CUMap.empty())
     return;
 
-  auto NoRangesPresent = [this]() {
-    return llvm::all_of(
-        CUMap, [](const decltype(CUMap)::value_type &Pair) {
-          return Pair.second->getRangeLists().empty();
-        });
-  };
+  const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
 
-  if (!useRangesSection()) {
-    assert(NoRangesPresent() && "No debug ranges expected.");
+  if (Holder.getRangeLists().empty())
     return;
-  }
 
-  if (NoRangesPresent())
-    return;
+  assert(useRangesSection());
+  assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+    return Pair.second->getCUNode()->isDebugDirectivesOnly();
+  }));
 
   // Start the dwarf ranges section.
   MCSymbol *TableEnd = nullptr;
   if (getDwarfVersion() >= 5) {
     Asm->OutStreamer->SwitchSection(
         Asm->getObjFileLowering().getDwarfRnglistsSection());
-    TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
-                                                            : InfoHolder);
+    TableEnd = emitRnglistsTableHeader(Asm, Holder);
   } else
     Asm->OutStreamer->SwitchSection(
         Asm->getObjFileLowering().getDwarfRangesSection());
 
-  // Grab the specific ranges for the compile units in the module.
-  for (const auto &I : CUMap) {
-    DwarfCompileUnit *TheCU = I.second;
+  emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+}
 
-    if (auto *Skel = TheCU->getSkeleton())
-      TheCU = Skel;
+void DwarfDebug::emitDebugRangesDWO() {
+  assert(useSplitDwarf());
 
-    // Iterate over the misc ranges for the compile units in the module.
-    for (const RangeSpanList &List : TheCU->getRangeLists())
-      emitRangeList(Asm, TheCU, List);
-  }
+  if (CUMap.empty())
+    return;
 
-  if (TableEnd)
-    Asm->OutStreamer->EmitLabel(TableEnd);
+  const auto &Holder = InfoHolder;
+
+  if (Holder.getRangeLists().empty())
+    return;
+
+  assert(getDwarfVersion() >= 5);
+  assert(useRangesSection());
+  assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+    return Pair.second->getCUNode()->isDebugDirectivesOnly();
+  }));
+
+  // Start the dwarf ranges section.
+  Asm->OutStreamer->SwitchSection(
+      Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
+  MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
+  emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
 }
 
 void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -2206,12 +2478,19 @@ void DwarfDebug::emitDebugMacinfo() {
   if (CUMap.empty())
     return;
 
+  if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+        return Pair.second->getCUNode()->isDebugDirectivesOnly();
+      }))
+    return;
+
   // Start the dwarf macinfo section.
   Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfMacinfoSection());
 
   for (const auto &P : CUMap) {
     auto &TheCU = *P.second;
+    if (TheCU.getCUNode()->isDebugDirectivesOnly())
+      continue;
     auto *SkCU = TheCU.getSkeleton();
     DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
     auto *CUNode = cast<DICompileUnit>(P.first);
@@ -2229,8 +2508,6 @@ void DwarfDebug::emitDebugMacinfo() {
 
 void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
                                   std::unique_ptr<DwarfCompileUnit> NewU) {
-  NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                  Asm->TM.Options.MCOptions.SplitDwarfFile);
 
   if (!CompilationDir.empty())
     NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -2298,9 +2575,8 @@ void DwarfDebug::emitDebugStrDWO() {
                          OffSec, /* UseRelativeOffsets = */ false);
 }
 
-// Emit DWO addresses.
+// Emit address pool.
 void DwarfDebug::emitDebugAddr() {
-  assert(useSplitDwarf() && "No split dwarf?");
   AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
 }
 
@@ -2356,10 +2632,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   NewTU.setTypeSignature(Signature);
   Ins.first->second = Signature;
 
-  if (useSplitDwarf())
-    NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
-  else {
-    NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+  if (useSplitDwarf()) {
+    MCSection *Section =
+        getDwarfVersion() <= 4
+            ? Asm->getObjFileLowering().getDwarfTypesDWOSection()
+            : Asm->getObjFileLowering().getDwarfInfoDWOSection();
+    NewTU.setSection(Section);
+  } else {
+    MCSection *Section =
+        getDwarfVersion() <= 4
+            ? Asm->getObjFileLowering().getDwarfTypesSection(Signature)
+            : Asm->getObjFileLowering().getDwarfInfoSection(Signature);
+    NewTU.setSection(Section);
     // Non-split type units reuse the compile unit's line table.
     CU.applyStmtList(UnitDie);
   }
@@ -2408,14 +2692,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 // AccelTableKind::Apple, we use the table we got as an argument). If
 // accelerator tables are disabled, this function does nothing.
 template <typename DataT>
-void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
+void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
+                                  AccelTable<DataT> &AppleAccel, StringRef Name,
                                   const DIE &Die) {
   if (getAccelTableKind() == AccelTableKind::None)
     return;
 
+  if (getAccelTableKind() != AccelTableKind::Apple &&
+      CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+    return;
+
   DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-  DwarfStringPoolEntryRef Ref =
-      Holder.getStringPool().getEntry(*Asm, Name);
+  DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);
 
   switch (getAccelTableKind()) {
   case AccelTableKind::Apple:
@@ -2431,24 +2719,36 @@ void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
   }
 }
 
-void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
-  addAccelNameImpl(AccelNames, Name, Die);
+void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
+                              const DIE &Die) {
+  addAccelNameImpl(CU, AccelNames, Name, Die);
 }
 
-void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
+void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
+                              const DIE &Die) {
   // ObjC names go only into the Apple accelerator tables.
   if (getAccelTableKind() == AccelTableKind::Apple)
-    addAccelNameImpl(AccelObjC, Name, Die);
+    addAccelNameImpl(CU, AccelObjC, Name, Die);
 }
 
-void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
-  addAccelNameImpl(AccelNamespace, Name, Die);
+void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+                                   const DIE &Die) {
+  addAccelNameImpl(CU, AccelNamespace, Name, Die);
 }
 
-void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
-  addAccelNameImpl(AccelTypes, Name, Die);
+void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
+                              const DIE &Die, char Flags) {
+  addAccelNameImpl(CU, AccelTypes, Name, Die);
 }
 
 uint16_t DwarfDebug::getDwarfVersion() const {
   return Asm->OutStreamer->getContext().getDwarfVersion();
 }
+
+void DwarfDebug::addSectionLabel(const MCSymbol *Sym) {
+  SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym));
+}
+
+const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
+  return SectionLabels.find(S)->second;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index abf2e43b1312..8a31e989b289 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,8 +15,6 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
 
 #include "AddressPool.h"
-#include "DbgValueHistoryCalculator.h"
-#include "DebugHandlerBase.h"
 #include "DebugLocStream.h"
 #include "DwarfFile.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -31,6 +29,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AccelTable.h"
+#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
@@ -62,6 +62,47 @@ class MDNode;
 class Module;
 
 //===----------------------------------------------------------------------===//
+/// This class is defined as the common parent of DbgVariable and DbgLabel
+/// such that it could levarage polymorphism to extract common code for
+/// DbgVariable and DbgLabel.
+class DbgEntity {
+  const DINode *Entity;
+  const DILocation *InlinedAt;
+  DIE *TheDIE = nullptr;
+  unsigned SubclassID;
+
+public:
+  enum DbgEntityKind {
+    DbgVariableKind,
+    DbgLabelKind
+  };
+
+  DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
+    : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+  virtual ~DbgEntity() {}
+
+  /// Accessors.
+  /// @{
+  const DINode *getEntity() const { return Entity; }
+  const DILocation *getInlinedAt() const { return InlinedAt; }
+  DIE *getDIE() const { return TheDIE; }
+  unsigned getDbgEntityID() const { return SubclassID; }
+  /// @}
+
+  void setDIE(DIE &D) { TheDIE = &D; }
+
+  static bool classof(const DbgEntity *N) {
+    switch (N->getDbgEntityID()) {
+    default:
+      return false;
+    case DbgVariableKind:
+    case DbgLabelKind:
+      return true;
+    }
+  }
+};
+
+//===----------------------------------------------------------------------===//
 /// This class is used to track local variable information.
 ///
 /// Variables can be created from allocas, in which case they're generated from
@@ -73,10 +114,7 @@ class Module;
 /// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
 ///
 /// Variables that have been optimized out use none of these fields.
-class DbgVariable {
-  const DILocalVariable *Var;                /// Variable Descriptor.
-  const DILocation *IA;                      /// Inlined at location.
-  DIE *TheDIE = nullptr;                     /// Variable DIE.
+class DbgVariable : public DbgEntity {
   unsigned DebugLocListIndex = ~0u;          /// Offset in DebugLocs.
   const MachineInstr *MInsn = nullptr;       /// DBG_VALUE instruction.
 
@@ -93,7 +131,7 @@ public:
   /// Creates a variable without any DW_AT_location.  Call \a initializeMMI()
   /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
   DbgVariable(const DILocalVariable *V, const DILocation *IA)
-      : Var(V), IA(IA) {}
+      : DbgEntity(V, IA, DbgVariableKind) {}
 
   /// Initialize from the MMI table.
   void initializeMMI(const DIExpression *E, int FI) {
@@ -111,8 +149,9 @@ public:
     assert(FrameIndexExprs.empty() && "Already initialized?");
     assert(!MInsn && "Already initialized?");
 
-    assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
-    assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at");
+    assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+    assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+           "Wrong inlined-at");
 
     MInsn = DbgValue;
     if (auto *E = DbgValue->getDebugExpression())
@@ -121,19 +160,18 @@ public:
   }
 
   // Accessors.
-  const DILocalVariable *getVariable() const { return Var; }
-  const DILocation *getInlinedAt() const { return IA; }
+  const DILocalVariable *getVariable() const {
+    return cast<DILocalVariable>(getEntity());
+  }
 
   const DIExpression *getSingleExpression() const {
     assert(MInsn && FrameIndexExprs.size() <= 1);
     return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
   }
 
-  void setDIE(DIE &D) { TheDIE = &D; }
-  DIE *getDIE() const { return TheDIE; }
   void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
   unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
-  StringRef getName() const { return Var->getName(); }
+  StringRef getName() const { return getVariable()->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
   /// Get the FI entries, sorted by fragment offset.
   ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
@@ -143,7 +181,7 @@ public:
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
     // FIXME: Why don't we just infer this tag and store it all along?
-    if (Var->isParameter())
+    if (getVariable()->isParameter())
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
@@ -151,7 +189,7 @@ public:
 
   /// Return true if DbgVariable is artificial.
   bool isArtificial() const {
-    if (Var->isArtificial())
+    if (getVariable()->isArtificial())
       return true;
     if (getType()->isArtificial())
       return true;
@@ -159,7 +197,7 @@ public:
   }
 
   bool isObjectPointer() const {
-    if (Var->isObjectPointer())
+    if (getVariable()->isObjectPointer())
       return true;
     if (getType()->isObjectPointer())
       return true;
@@ -178,6 +216,45 @@ public:
   bool isBlockByrefVariable() const;
   const DIType *getType() const;
 
+  static bool classof(const DbgEntity *N) {
+    return N->getDbgEntityID() == DbgVariableKind;
+  }
+
+private:
+  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+    return Ref.resolve();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// This class is used to track label information.
+///
+/// Labels are collected from \c DBG_LABEL instructions.
+class DbgLabel : public DbgEntity {
+  const MCSymbol *Sym;                  /// Symbol before DBG_LABEL instruction.
+
+public:
+  /// We need MCSymbol information to generate DW_AT_low_pc.
+  DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr)
+      : DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {}
+
+  /// Accessors.
+  /// @{
+  const DILabel *getLabel() const { return cast<DILabel>(getEntity()); }
+  const MCSymbol *getSymbol() const { return Sym; }
+
+  StringRef getName() const { return getLabel()->getName(); }
+  /// @}
+
+  /// Translate tag to proper Dwarf tag.
+  dwarf::Tag getTag() const {
+    return dwarf::DW_TAG_label;
+  }
+
+  static bool classof(const DbgEntity *N) {
+    return N->getDbgEntityID() == DbgLabelKind;
+  }
+
 private:
   template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
     return Ref.resolve();
@@ -217,8 +294,8 @@ class DwarfDebug : public DebugHandlerBase {
   /// Size of each symbol emitted (for those symbols that have a specific size).
   DenseMap<const MCSymbol *, uint64_t> SymSize;
 
-  /// Collection of abstract variables.
-  SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
+  /// Collection of abstract variables/labels.
+  SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities;
 
   /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
   /// can refer to them in spite of insertions into this list.
@@ -250,6 +327,8 @@ class DwarfDebug : public DebugHandlerBase {
   /// used to keep track of which types we have emitted type units for.
   DenseMap<const MDNode *, uint64_t> TypeSignatures;
 
+  DenseMap<const MCSection *, const MCSymbol *> SectionLabels;
+
   SmallVector<
       std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
       TypeUnitsUnderConstruction;
@@ -266,9 +345,6 @@ class DwarfDebug : public DebugHandlerBase {
   /// Use inlined strings.
   bool UseInlineStrings = false;
 
-  /// Whether to emit DWARF pub sections or not.
-  bool UsePubSections = true;
-
   /// Allow emission of .debug_ranges section.
   bool UseRangesSection = true;
 
@@ -332,24 +408,33 @@ class DwarfDebug : public DebugHandlerBase {
     return InfoHolder.getUnits();
   }
 
-  using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+  using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
 
-  void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
-                                       const MDNode *Scope);
-  void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV,
-                                               const MDNode *Scope);
+  void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
+                                     const DINode *Node,
+                                     const MDNode *Scope);
+  void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
+                                             const DINode *Node,
+                                             const MDNode *Scope);
 
-  DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU,
-                                      LexicalScope &Scope, InlinedVariable IV);
+  DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU,
+                                  LexicalScope &Scope,
+                                  const DINode *Node,
+                                  const DILocation *Location,
+                                  const MCSymbol *Sym = nullptr);
 
   /// Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
 
+  /// Construct DIEs for call site entries describing the calls in \p MF.
+  void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
+                                  DIE &ScopeDIE, const MachineFunction &MF);
+
   template <typename DataT>
-  void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
-                        const DIE &Die);
+  void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
+                        StringRef Name, const DIE &Die);
 
-  void finishVariableDefinitions();
+  void finishEntityDefinitions();
 
   void finishSubprogramDefinitions();
 
@@ -407,9 +492,7 @@ class DwarfDebug : public DebugHandlerBase {
 
   /// Emit address ranges into a debug ranges section.
   void emitDebugRanges();
-
-  /// Emit range lists into a DWARF v5 debug rnglists section.
-  void emitDebugRnglists();
+  void emitDebugRangesDWO();
 
   /// Emit macros into a debug macinfo section.
   void emitDebugMacinfo();
@@ -457,6 +540,8 @@ class DwarfDebug : public DebugHandlerBase {
   /// Create new DwarfCompileUnit for the given metadata node with tag
   /// DW_TAG_compile_unit.
   DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
+  void finishUnitAttributes(const DICompileUnit *DIUnit,
+                            DwarfCompileUnit &NewCU);
 
   /// Construct imported_module or imported_declaration DIE.
   void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
@@ -469,8 +554,8 @@ class DwarfDebug : public DebugHandlerBase {
                         unsigned Flags);
 
   /// Populate LexicalScope entries with variables' info.
-  void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
-                           DenseSet<InlinedVariable> &ProcessedVars);
+  void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+                         DenseSet<InlinedEntity> &ProcessedVars);
 
   /// Build the location list for all DBG_VALUEs in the
   /// function that describe the same variable.
@@ -479,7 +564,7 @@ class DwarfDebug : public DebugHandlerBase {
 
   /// Collect variable information from the side table maintained by MF.
   void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
-                                      DenseSet<InlinedVariable> &P);
+                                      DenseSet<InlinedEntity> &P);
 
   /// Emit the reference to the section.
   void emitSectionReference(const DwarfCompileUnit &CU);
@@ -543,9 +628,6 @@ public:
   /// Returns whether to use inline strings.
   bool useInlineStrings() const { return UseInlineStrings; }
 
-  /// Returns whether GNU pub sections should be emitted.
-  bool usePubSections() const { return UsePubSections; }
-
   /// Returns whether ranges section should be emitted.
   bool useRangesSection() const { return UseRangesSection; }
 
@@ -608,17 +690,20 @@ public:
     return Ref.resolve();
   }
 
-  void addSubprogramNames(const DISubprogram *SP, DIE &Die);
+  void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
+                          DIE &Die);
 
   AddressPool &getAddressPool() { return AddrPool; }
 
-  void addAccelName(StringRef Name, const DIE &Die);
+  void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);
 
-  void addAccelObjC(StringRef Name, const DIE &Die);
+  void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);
 
-  void addAccelNamespace(StringRef Name, const DIE &Die);
+  void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
+                         const DIE &Die);
 
-  void addAccelType(StringRef Name, const DIE &Die, char Flags);
+  void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
+                    char Flags);
 
   const MachineFunction *getCurrentFunction() const { return CurFn; }
 
@@ -640,6 +725,9 @@ public:
   bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
   bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
   /// @}
+
+  void addSectionLabel(const MCSymbol *Sym);
+  const MCSymbol *getSectionLabel(const MCSection *S);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d8d1a5e8f841..19c350afbf17 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -24,6 +24,20 @@
 
 using namespace llvm;
 
+void DwarfExpression::emitConstu(uint64_t Value) {
+  if (Value < 32)
+    emitOp(dwarf::DW_OP_lit0 + Value);
+  else if (Value == std::numeric_limits<uint64_t>::max()) {
+    // Only do this for 64-bit values as the DWARF expression stack uses
+    // target-address-size values.
+    emitOp(dwarf::DW_OP_lit0);
+    emitOp(dwarf::DW_OP_not);
+  } else {
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned(Value);
+  }
+}
+
 void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
  assert(DwarfReg >= 0 && "invalid negative dwarf register number");
  assert((LocationKind == Unknown || LocationKind == Register) &&
@@ -72,14 +86,12 @@ void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
 }
 
 void DwarfExpression::addShr(unsigned ShiftBy) {
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned(ShiftBy);
+  emitConstu(ShiftBy);
   emitOp(dwarf::DW_OP_shr);
 }
 
 void DwarfExpression::addAnd(unsigned Mask) {
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned(Mask);
+  emitConstu(Mask);
   emitOp(dwarf::DW_OP_and);
 }
 
@@ -181,8 +193,7 @@ void DwarfExpression::addSignedConstant(int64_t Value) {
 void DwarfExpression::addUnsignedConstant(uint64_t Value) {
   assert(LocationKind == Implicit || LocationKind == Unknown);
   LocationKind = Implicit;
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned(Value);
+  emitConstu(Value);
 }
 
 void DwarfExpression::addUnsignedConstant(const APInt &Value) {
@@ -243,10 +254,9 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
 
   // Don't emit locations that cannot be expressed without DW_OP_stack_value.
   if (DwarfVersion < 4)
-    if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
-                    [](DIExpression::ExprOperand Op) -> bool {
-                      return Op.getOp() == dwarf::DW_OP_stack_value;
-                    })) {
+    if (any_of(ExprCursor, [](DIExpression::ExprOperand Op) -> bool {
+          return Op.getOp() == dwarf::DW_OP_stack_value;
+        })) {
       DwarfRegs.clear();
       LocationKind = Unknown;
       return false;
@@ -373,8 +383,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       break;
     case dwarf::DW_OP_constu:
       assert(LocationKind != Register);
-      emitOp(dwarf::DW_OP_constu);
-      emitUnsigned(Op->getArg(0));
+      emitConstu(Op->getArg(0));
       break;
     case dwarf::DW_OP_stack_value:
       LocationKind = Implicit;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 0637d952eba4..91568ba6d107 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -138,6 +138,9 @@ protected:
   /// Emit a raw unsigned value.
   virtual void emitUnsigned(uint64_t Value) = 0;
 
+  /// Emit a normalized unsigned constant.
+  void emitConstu(uint64_t Value);
+
   /// Return whether the given machine register is the frame register in the
   /// current function.
   virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
@@ -187,7 +190,7 @@ protected:
   /// DW_OP_stack_value.  Unfortunately, DW_OP_stack_value was not available
   /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
   /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
-  /// <const> actually describes a value at a constant addess, not a constant
+  /// <const> actually describes a value at a constant address, not a constant
   /// value.  However, in the past there was no better way to describe a
   /// constant value, so the producers and consumers started to rely on
   /// heuristics to disambiguate the value vs. location status of the
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 049f349b009a..78ccad481411 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -36,13 +36,20 @@ void DwarfFile::emitUnits(bool UseOffsets) {
 }
 
 void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
-  DIE &Die = TheU->getUnitDie();
-  MCSection *USection = TheU->getSection();
-  Asm->OutStreamer->SwitchSection(USection);
+  if (TheU->getCUNode()->isDebugDirectivesOnly())
+    return;
 
+  MCSection *S = TheU->getSection();
+
+  if (!S)
+    return;
+
+  Asm->OutStreamer->SwitchSection(S);
   TheU->emitHeader(UseOffsets);
+  Asm->emitDwarfDIE(TheU->getUnitDie());
 
-  Asm->emitDwarfDIE(Die);
+  if (MCSymbol *EndLabel = TheU->getEndLabel())
+    Asm->OutStreamer->EmitLabel(EndLabel);
 }
 
 // Compute the size and offset for each DIE.
@@ -53,6 +60,9 @@ void DwarfFile::computeSizeAndOffsets() {
   // Iterate over each compile unit and set the size and offsets for each
   // DIE within each compile unit. All offsets are CU relative.
   for (const auto &TheU : CUs) {
+    if (TheU->getCUNode()->isDebugDirectivesOnly())
+      continue;
+
     TheU->setDebugSectionOffset(SecOffset);
     SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
   }
@@ -98,3 +108,15 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   }
   return true;
 }
+
+void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
+  SmallVectorImpl<DbgLabel *> &Labels = ScopeLabels[LS];
+  Labels.push_back(Label);
+}
+
+std::pair<uint32_t, RangeSpanList *>
+DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
+  CURangeLists.push_back(
+      RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R)));
+  return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 8dfbc4e1c434..51acca8c1e53 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -24,12 +24,44 @@
 namespace llvm {
 
 class AsmPrinter;
+class DbgEntity;
 class DbgVariable;
+class DbgLabel;
 class DwarfCompileUnit;
 class DwarfUnit;
 class LexicalScope;
 class MCSection;
 
+// Data structure to hold a range for range lists.
+class RangeSpan {
+public:
+  RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
+  const MCSymbol *getStart() const { return Start; }
+  const MCSymbol *getEnd() const { return End; }
+  void setEnd(const MCSymbol *E) { End = E; }
+
+private:
+  const MCSymbol *Start, *End;
+};
+
+class RangeSpanList {
+private:
+  // Index for locating within the debug_range section this particular span.
+  MCSymbol *RangeSym;
+  const DwarfCompileUnit *CU;
+  // List of ranges.
+  SmallVector<RangeSpan, 2> Ranges;
+
+public:
+  RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU,
+                SmallVector<RangeSpan, 2> Ranges)
+      : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {}
+  MCSymbol *getSym() const { return RangeSym; }
+  const DwarfCompileUnit &getCU() const { return *CU; }
+  const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
+  void addRange(RangeSpan Range) { Ranges.push_back(Range); }
+};
+
 class DwarfFile {
   // Target of Dwarf emission, used for sizing of abbreviations.
   AsmPrinter *Asm;
@@ -44,6 +76,10 @@ class DwarfFile {
 
   DwarfStringPool StrPool;
 
+  // List of range lists for a given compile unit, separate from the ranges for
+  // the CU itself.
+  SmallVector<RangeSpanList, 1> CURangeLists;
+
   /// DWARF v5: The symbol that designates the start of the contribution to
   /// the string offsets table. The contribution is shared by all units.
   MCSymbol *StringOffsetsStartSym = nullptr;
@@ -52,6 +88,10 @@ class DwarfFile {
   /// The table is shared by all units.
   MCSymbol *RnglistsTableBaseSym = nullptr;
 
+  /// DWARF v5: The symbol that designates the base of the locations list table.
+  /// The table is shared by all units.
+  MCSymbol *LoclistsTableBaseSym = nullptr;
+
   /// The variables of a lexical scope.
   struct ScopeVars {
     /// We need to sort Args by ArgNo and check for duplicates. This could also
@@ -62,9 +102,13 @@ class DwarfFile {
   /// Collection of DbgVariables of each lexical scope.
   DenseMap<LexicalScope *, ScopeVars> ScopeVariables;
 
+  /// Collection of DbgLabels of each lexical scope.
+  using LabelList = SmallVector<DbgLabel *, 4>;
+  DenseMap<LexicalScope *, LabelList> ScopeLabels;
+
   // Collection of abstract subprogram DIEs.
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
-  DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+  DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
 
   /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
   /// be shared across CUs, that is why we keep the map here instead
@@ -78,6 +122,14 @@ public:
     return CUs;
   }
 
+  std::pair<uint32_t, RangeSpanList *> addRange(const DwarfCompileUnit &CU,
+                                                SmallVector<RangeSpan, 2> R);
+
+  /// getRangeLists - Get the vector of range lists.
+  const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+    return CURangeLists;
+  }
+
   /// Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
 
@@ -112,26 +164,33 @@ public:
   DwarfStringPool &getStringPool() { return StrPool; }
 
   MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; }
-
   void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; }
 
   MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
-
   void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
 
+  MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; }
+  void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; }
+
   /// \returns false if the variable was merged with a previous one.
   bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
+  void addScopeLabel(LexicalScope *LS, DbgLabel *Label);
+
   DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() {
     return ScopeVariables;
   }
 
+  DenseMap<LexicalScope *, LabelList> &getScopeLabels() {
+    return ScopeLabels;
+  }
+
   DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
     return AbstractSPDies;
   }
 
-  DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
-    return AbstractVariables;
+  DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
+    return AbstractEntities;
   }
 
   void insertDIE(const MDNode *TypeMD, DIE *Die) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a61fa83cfb03..02016534a774 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -24,25 +24,39 @@ DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
     : Pool(A), Prefix(Prefix),
       ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
 
-DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
-                                                    StringRef Str) {
+StringMapEntry<DwarfStringPool::EntryTy> &
+DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
   auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+  auto &Entry = I.first->second;
   if (I.second) {
-    auto &Entry = I.first->second;
-    Entry.Index = Pool.size() - 1;
+    Entry.Index = EntryTy::NotIndexed;
     Entry.Offset = NumBytes;
     Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
 
     NumBytes += Str.size() + 1;
     assert(NumBytes > Entry.Offset && "Unexpected overflow");
   }
-  return EntryRef(*I.first);
+  return *I.first;
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+                                                    StringRef Str) {
+  auto &MapEntry = getEntryImpl(Asm, Str);
+  return EntryRef(MapEntry, false);
+}
+
+DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
+                                                           StringRef Str) {
+  auto &MapEntry = getEntryImpl(Asm, Str);
+  if (!MapEntry.getValue().isIndexed())
+    MapEntry.getValue().Index = NumIndexedStrings++;
+  return EntryRef(MapEntry, true);
 }
 
 void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
                                                    MCSection *Section,
                                                    MCSymbol *StartSym) {
-  if (empty())
+  if (getNumIndexedStrings() == 0)
     return;
   Asm.OutStreamer->SwitchSection(Section);
   unsigned EntrySize = 4;
@@ -51,7 +65,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
   // table. The header consists of an entry with the contribution's
   // size (not including the size of the length field), the DWARF version and
   // 2 bytes of padding.
-  Asm.emitInt32(size() * EntrySize + 4);
+  Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4);
   Asm.emitInt16(Asm.getDwarfVersion());
   Asm.emitInt16(0);
   // Define the symbol that marks the start of the contribution. It is
@@ -69,12 +83,17 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
   // Start the dwarf str section.
   Asm.OutStreamer->SwitchSection(StrSection);
 
-  // Get all of the string pool entries and put them in an array by their ID so
-  // we can sort them.
-  SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
+  // Get all of the string pool entries and sort them by their offset.
+  SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
+  Entries.reserve(Pool.size());
 
   for (const auto &E : Pool)
-    Entries[E.getValue().Index] = &E;
+    Entries.push_back(&E);
+
+  llvm::sort(Entries, [](const StringMapEntry<EntryTy> *A,
+                         const StringMapEntry<EntryTy> *B) {
+    return A->getValue().Offset < B->getValue().Offset;
+  });
 
   for (const auto &Entry : Entries) {
     assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
@@ -93,6 +112,14 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
 
   // If we've got an offset section go ahead and emit that now as well.
   if (OffsetSection) {
+    // Now only take the indexed entries and put them in an array by their ID so
+    // we can emit them in order.
+    Entries.resize(NumIndexedStrings);
+    for (const auto &Entry : Pool) {
+      if (Entry.getValue().isIndexed())
+        Entries[Entry.getValue().Index] = &Entry;
+    }
+
     Asm.OutStreamer->SwitchSection(OffsetSection);
     unsigned size = 4; // FIXME: DWARF64 is 8.
     for (const auto &Entry : Entries)
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 6e6988ea4ad4..f484540d8d37 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -30,8 +30,11 @@ class DwarfStringPool {
   StringMap<EntryTy, BumpPtrAllocator &> Pool;
   StringRef Prefix;
   unsigned NumBytes = 0;
+  unsigned NumIndexedStrings = 0;
   bool ShouldCreateSymbols;
 
+  StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str);
+
 public:
   using EntryRef = DwarfStringPoolEntryRef;
 
@@ -48,8 +51,15 @@ public:
 
   unsigned size() const { return Pool.size(); }
 
+  unsigned getNumIndexedStrings() const { return NumIndexedStrings; }
+
   /// Get a reference to an entry in the string pool.
   EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
+
+  /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a
+  /// unique ID of this entry (e.g., for use in indexed forms like
+  /// DW_FORM_strx).
+  EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 600f4a78fda0..80b365f1aa43 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -234,15 +234,23 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
 
 void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
                           StringRef String) {
+  if (CUNode->isDebugDirectivesOnly())
+    return;
+
   if (DD->useInlineStrings()) {
     Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string,
                  new (DIEValueAllocator)
                      DIEInlineString(String, DIEValueAllocator));
     return;
   }
-  auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String);
   dwarf::Form IxForm =
       isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+
+  auto StringPoolEntry =
+      useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index
+          ? DU->getStringPool().getIndexedEntry(*Asm, String)
+          : DU->getStringPool().getEntry(*Asm, String);
+
   // For DWARF v5 and beyond, use the smallest strx? form possible.
   if (useSegmentedStringOffsetsTable()) {
     IxForm = dwarf::DW_FORM_strx1;
@@ -307,14 +315,21 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
 }
 
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
-  if (!DD->useSplitDwarf()) {
-    addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Die, dwarf::DW_FORM_udata, Sym);
-  } else {
+  if (DD->getDwarfVersion() >= 5) {
+    addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
+    addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym));
+    return;
+  }
+
+  if (DD->useSplitDwarf()) {
     addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
     addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
             DD->getAddressPool().getIndex(Sym));
+    return;
   }
+
+  addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+  addLabel(Die, dwarf::DW_FORM_udata, Sym);
 }
 
 void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
@@ -401,6 +416,12 @@ void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
   addSourceLine(Die, SP->getLine(), SP->getFile());
 }
 
+void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) {
+  assert(L);
+
+  addSourceLine(Die, L->getLine(), L->getFile());
+}
+
 void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
   assert(Ty);
 
@@ -413,138 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
   addSourceLine(Die, Ty->getLine(), Ty->getFile());
 }
 
-/* Byref variables, in Blocks, are declared by the programmer as "SomeType
-   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
-   gives the variable VarName either the struct, or a pointer to the struct, as
-   its type.  This is necessary for various behind-the-scenes things the
-   compiler needs to do with by-reference variables in Blocks.
-
-   However, as far as the original *programmer* is concerned, the variable
-   should still have type 'SomeType', as originally declared.
-
-   The function getBlockByrefType dives into the __Block_byref_x_VarName
-   struct to find the original type of the variable, which is then assigned to
-   the variable's Debug Information Entry as its real type.  So far, so good.
-   However now the debugger will expect the variable VarName to have the type
-   SomeType.  So we need the location attribute for the variable to be an
-   expression that explains to the debugger how to navigate through the
-   pointers and struct to find the actual variable of type SomeType.
-
-   The following function does just that.  We start by getting
-   the "normal" location for the variable. This will be the location
-   of either the struct __Block_byref_x_VarName or the pointer to the
-   struct __Block_byref_x_VarName.
-
-   The struct will look something like:
-
-   struct __Block_byref_x_VarName {
-     ... <various fields>
-     struct __Block_byref_x_VarName *forwarding;
-     ... <various other fields>
-     SomeType VarName;
-     ... <maybe more fields>
-   };
-
-   If we are given the struct directly (as our starting point) we
-   need to tell the debugger to:
-
-   1).  Add the offset of the forwarding field.
-
-   2).  Follow that pointer to get the real __Block_byref_x_VarName
-   struct to use (the real one may have been copied onto the heap).
-
-   3).  Add the offset for the field VarName, to find the actual variable.
-
-   If we started with a pointer to the struct, then we need to
-   dereference that pointer first, before the other steps.
-   Translating this into DWARF ops, we will need to append the following
-   to the current location description for the variable:
-
-   DW_OP_deref                    -- optional, if we start with a pointer
-   DW_OP_plus_uconst <forward_fld_offset>
-   DW_OP_deref
-   DW_OP_plus_uconst <varName_fld_offset>
-
-   That is what this function does.  */
-
-void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
-                                     dwarf::Attribute Attribute,
-                                     const MachineLocation &Location) {
-  const DIType *Ty = DV.getType();
-  const DIType *TmpTy = Ty;
-  uint16_t Tag = Ty->getTag();
-  bool isPointer = false;
-
-  StringRef varName = DV.getName();
-
-  if (Tag == dwarf::DW_TAG_pointer_type) {
-    auto *DTy = cast<DIDerivedType>(Ty);
-    TmpTy = resolve(DTy->getBaseType());
-    isPointer = true;
-  }
-
-  // Find the __forwarding field and the variable field in the __Block_byref
-  // struct.
-  DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
-  const DIDerivedType *varField = nullptr;
-  const DIDerivedType *forwardingField = nullptr;
-
-  for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
-    auto *DT = cast<DIDerivedType>(Fields[i]);
-    StringRef fieldName = DT->getName();
-    if (fieldName == "__forwarding")
-      forwardingField = DT;
-    else if (fieldName == varName)
-      varField = DT;
-  }
-
-  // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
-  unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
-
-  // Decode the original location, and use that as the start of the byref
-  // variable's location.
-  DIELoc *Loc = new (DIEValueAllocator) DIELoc;
-  DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
-  if (Location.isIndirect())
-    DwarfExpr.setMemoryLocationKind();
-
-  SmallVector<uint64_t, 6> Ops;
-  // If we started with a pointer to the __Block_byref... struct, then
-  // the first thing we need to do is dereference the pointer (DW_OP_deref).
-  if (isPointer)
-    Ops.push_back(dwarf::DW_OP_deref);
-
-  // Next add the offset for the '__forwarding' field:
-  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
-  // adding the offset if it's 0.
-  if (forwardingFieldOffset > 0) {
-    Ops.push_back(dwarf::DW_OP_plus_uconst);
-    Ops.push_back(forwardingFieldOffset);
-  }
-
-  // Now dereference the __forwarding field to get to the real __Block_byref
-  // struct:  DW_OP_deref.
-  Ops.push_back(dwarf::DW_OP_deref);
-
-  // Now that we've got the real __Block_byref... struct, add the offset
-  // for the variable's field to get to the location of the actual variable:
-  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
-  if (varFieldOffset > 0) {
-    Ops.push_back(dwarf::DW_OP_plus_uconst);
-    Ops.push_back(varFieldOffset);
-  }
-
-  DIExpressionCursor Cursor(Ops);
-  const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
-  if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
-    return;
-  DwarfExpr.addExpression(std::move(Cursor));
-
-  // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, DwarfExpr.finalize());
-}
-
 /// Return true if type encoding is unsigned.
 static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
   if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
@@ -787,7 +676,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
       IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
     }
     unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
-    DD->addAccelType(Ty->getName(), TyDIE, Flags);
+    DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
 
     if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
         isa<DINamespace>(Context))
@@ -851,6 +740,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
 
   uint64_t Size = BTy->getSizeInBits() >> 3;
   addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+
+  if (BTy->isBigEndian())
+    addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big);
+  else if (BTy->isLittleEndian())
+    addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
 }
 
 void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
@@ -1155,7 +1049,7 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
     addString(NDie, dwarf::DW_AT_name, NS->getName());
   else
     Name = "(anonymous namespace)";
-  DD->addAccelNamespace(Name, NDie);
+  DD->addAccelNamespace(*CUNode, Name, NDie);
   addGlobalName(Name, NDie, NS->getScope());
   if (NS->getExportSymbols())
     addFlag(NDie, dwarf::DW_AT_export_symbols);
@@ -1404,7 +1298,7 @@ DIE *DwarfUnit::getIndexTyDie() {
   addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
   addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
           dwarf::DW_ATE_unsigned);
-  DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0);
+  DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
   return IndexTyDie;
 }
 
@@ -1467,7 +1361,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
   if (DTy) {
     if (DD->getDwarfVersion() >= 3)
       addType(Buffer, DTy);
-    if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum))
+    if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
       addFlag(Buffer, dwarf::DW_AT_enum_class);
   }
 
@@ -1659,7 +1553,14 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
 void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
   // Emit size of content not including length itself
   Asm->OutStreamer->AddComment("Length of Unit");
-  Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
+  if (!DD->useSectionsAsReferences()) {
+    StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
+    MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
+    EndLabel = Asm->createTempSymbol(Prefix + "end");
+    Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+    Asm->OutStreamer->EmitLabel(BeginLabel);
+  } else
+    Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
 
   Asm->OutStreamer->AddComment("DWARF version number");
   unsigned Version = DD->getDwarfVersion();
@@ -1761,3 +1662,12 @@ void DwarfUnit::addRnglistsBase() {
                   DU->getRnglistsTableBaseSym(),
                   TLOF.getDwarfRnglistsSection()->getBeginSymbol());
 }
+
+void DwarfUnit::addLoclistsBase() {
+  assert(DD->getDwarfVersion() >= 5 &&
+         "DW_AT_loclists_base requires DWARF version 5 or later");
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base,
+                  DU->getLoclistsTableBaseSym(),
+                  TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 69696f626536..a59ebb7c1465 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -35,33 +35,6 @@ class ConstantFP;
 class DbgVariable;
 class DwarfCompileUnit;
 
-// Data structure to hold a range for range lists.
-class RangeSpan {
-public:
-  RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
-  const MCSymbol *getStart() const { return Start; }
-  const MCSymbol *getEnd() const { return End; }
-  void setEnd(const MCSymbol *E) { End = E; }
-
-private:
-  const MCSymbol *Start, *End;
-};
-
-class RangeSpanList {
-private:
-  // Index for locating within the debug_range section this particular span.
-  MCSymbol *RangeSym;
-  // List of ranges.
-  SmallVector<RangeSpan, 2> Ranges;
-
-public:
-  RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges)
-      : RangeSym(Sym), Ranges(std::move(Ranges)) {}
-  MCSymbol *getSym() const { return RangeSym; }
-  const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
-  void addRange(RangeSpan Range) { Ranges.push_back(Range); }
-};
-
 //===----------------------------------------------------------------------===//
 /// This dwarf writer support class manages information associated with a
 /// source file.
@@ -76,6 +49,9 @@ protected:
   /// Target of Dwarf emission.
   AsmPrinter *Asm;
 
+  /// Emitted at the end of the CU and used to compute the CU Length field.
+  MCSymbol *EndLabel = nullptr;
+
   // Holders for some common dwarf information.
   DwarfDebug *DD;
   DwarfFile *DU;
@@ -109,6 +85,7 @@ protected:
 public:
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
+  MCSymbol *getEndLabel() const { return EndLabel; }
   uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
   const DICompileUnit *getCUNode() const { return CUNode; }
 
@@ -213,6 +190,7 @@ public:
   void addSourceLine(DIE &Die, const DILocalVariable *V);
   void addSourceLine(DIE &Die, const DIGlobalVariable *G);
   void addSourceLine(DIE &Die, const DISubprogram *SP);
+  void addSourceLine(DIE &Die, const DILabel *L);
   void addSourceLine(DIE &Die, const DIType *Ty);
   void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
 
@@ -298,6 +276,9 @@ public:
   /// Add the DW_AT_rnglists_base attribute to the unit DIE.
   void addRnglistsBase();
 
+  /// Add the DW_AT_loclists_base attribute to the unit DIE.
+  void addLoclistsBase();
+
   virtual DwarfCompileUnit &getCU() = 0;
 
   void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 65de9d7e65a4..7599121de2b0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -99,7 +99,7 @@ void EHStreamer::computeActionsTable(
   FirstActions.reserve(LandingPads.size());
 
   int FirstAction = 0;
-  unsigned SizeActions = 0;
+  unsigned SizeActions = 0; // Total size of all action entries for a function
   const LandingPadInfo *PrevLPI = nullptr;
 
   for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
@@ -107,23 +107,24 @@ void EHStreamer::computeActionsTable(
     const LandingPadInfo *LPI = *I;
     const std::vector<int> &TypeIds = LPI->TypeIds;
     unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
-    unsigned SizeSiteActions = 0;
+    unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad
 
     if (NumShared < TypeIds.size()) {
-      unsigned SizeAction = 0;
+      // Size of one action entry (typeid + next action)
+      unsigned SizeActionEntry = 0;
       unsigned PrevAction = (unsigned)-1;
 
       if (NumShared) {
         unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
         PrevAction = Actions.size() - 1;
-        SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) +
-                     getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+        SizeActionEntry = getSLEB128Size(Actions[PrevAction].NextAction) +
+                          getSLEB128Size(Actions[PrevAction].ValueForTypeID);
 
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
           assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
-          SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
-          SizeAction += -Actions[PrevAction].NextAction;
+          SizeActionEntry -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+          SizeActionEntry += -Actions[PrevAction].NextAction;
           PrevAction = Actions[PrevAction].Previous;
         }
       }
@@ -136,9 +137,9 @@ void EHStreamer::computeActionsTable(
             isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
         unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
 
-        int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + getSLEB128Size(NextAction);
-        SizeSiteActions += SizeAction;
+        int NextAction = SizeActionEntry ? -(SizeActionEntry + SizeTypeID) : 0;
+        SizeActionEntry = SizeTypeID + getSLEB128Size(NextAction);
+        SizeSiteActions += SizeActionEntry;
 
         ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
         Actions.push_back(Action);
@@ -146,7 +147,7 @@ void EHStreamer::computeActionsTable(
       }
 
       // Record the first action of the landing pad site.
-      FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+      FirstAction = SizeActions + SizeSiteActions - SizeActionEntry + 1;
     } // else identical - re-use previous FirstAction
 
     // Information used when creating the call-site table. The action record
@@ -344,7 +345,9 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
 ///     unwound and handling continues.
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
-void EHStreamer::emitExceptionTable() {
+///
+/// Returns the starting symbol of an exception table.
+MCSymbol *EHStreamer::emitExceptionTable() {
   const MachineFunction *MF = Asm->MF;
   const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -359,9 +362,9 @@ void EHStreamer::emitExceptionTable() {
     LandingPads.push_back(&PadInfos[i]);
 
   // Order landing pads lexicographically by type id.
-  llvm::sort(LandingPads.begin(), LandingPads.end(),
-             [](const LandingPadInfo *L,
-                const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
+  llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
+    return L->TypeIds < R->TypeIds;
+  });
 
   // Compute the actions table and gather the first action index for each
   // landing pad site.
@@ -374,6 +377,7 @@ void EHStreamer::emitExceptionTable() {
   computeCallSiteTable(CallSites, LandingPads, FirstActions);
 
   bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+  bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
   unsigned CallSiteEncoding =
       IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
   bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
@@ -456,8 +460,8 @@ void EHStreamer::emitExceptionTable() {
   Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
   Asm->OutStreamer->EmitLabel(CstBeginLabel);
 
-  // SjLj Exception handling
-  if (IsSJLJ) {
+  // SjLj / Wasm Exception handling
+  if (IsSJLJ || IsWasm) {
     unsigned idx = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
@@ -603,6 +607,7 @@ void EHStreamer::emitExceptionTable() {
   }
 
   Asm->EmitAlignment(2);
+  return GCCETSym;
 }
 
 void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index b89421a1e067..ce912d032c6d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
 
-#include "AsmPrinterHandler.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -85,9 +85,10 @@ protected:
   /// zero for the landing pad and the action.  Calls marked 'nounwind' have
   /// no entry and must not be contained in the try-range of any entry - they
   /// form gaps in the table.  Entries must be ordered by try-range address.
-  void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
-                            const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
-                            const SmallVectorImpl<unsigned> &FirstActions);
+  virtual void computeCallSiteTable(
+      SmallVectorImpl<CallSiteEntry> &CallSites,
+      const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+      const SmallVectorImpl<unsigned> &FirstActions);
 
   /// Emit landing pads and actions.
   ///
@@ -108,7 +109,9 @@ protected:
   ///     found the frame is unwound and handling continues.
   ///  3. Type id table contains references to all the C++ typeinfo for all
   ///     catches in the function.  This tables is reversed indexed base 1.
-  void emitExceptionTable();
+  ///
+  /// Returns the starting symbol of an exception table.
+  MCSymbol *emitExceptionTable();
 
   virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel);
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 49cc376fcc98..34677ecc9e69 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -15,10 +15,10 @@
 
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 59a57ed30d10..3479a00def23 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,9 +15,9 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BuiltinGCs.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Mangler.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
new file mode 100644
index 000000000000..527e5ae50146
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -0,0 +1,97 @@
+//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WasmException.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+using namespace llvm;
+
+void WasmException::endModule() {
+  // This is the symbol used in 'throw' and 'if_except' instruction to denote
+  // this is a C++ exception. This symbol has to be emitted somewhere once in
+  // the module.  Check if the symbol has already been created, i.e., we have at
+  // least one 'throw' or 'if_except' instruction in the module, and emit the
+  // symbol only if so.
+  SmallString<60> NameStr;
+  Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
+  if (Asm->OutContext.lookupSymbol(NameStr)) {
+    MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
+    Asm->OutStreamer->EmitLabel(ExceptionSym);
+  }
+}
+
+void WasmException::markFunctionEnd() {
+  // Get rid of any dead landing pads.
+  if (!Asm->MF->getLandingPads().empty()) {
+    auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF);
+    // Wasm does not set BeginLabel and EndLabel information for landing pads,
+    // so we should set the second argument false.
+    NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false);
+  }
+}
+
+void WasmException::endFunction(const MachineFunction *MF) {
+  bool ShouldEmitExceptionTable = false;
+  for (const LandingPadInfo &Info : MF->getLandingPads()) {
+    if (MF->hasWasmLandingPadIndex(Info.LandingPadBlock)) {
+      ShouldEmitExceptionTable = true;
+      break;
+    }
+  }
+  if (!ShouldEmitExceptionTable)
+    return;
+  MCSymbol *LSDALabel = emitExceptionTable();
+  assert(LSDALabel && ".GCC_exception_table has not been emitted!");
+
+  // Wasm requires every data section symbol to have a .size set. So we emit an
+  // end marker and set the size as the difference between the start end the end
+  // marker.
+  MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end");
+  Asm->OutStreamer->EmitLabel(LSDAEndLabel);
+  MCContext &OutContext = Asm->OutStreamer->getContext();
+  const MCExpr *SizeExp = MCBinaryExpr::createSub(
+      MCSymbolRefExpr::create(LSDAEndLabel, OutContext),
+      MCSymbolRefExpr::create(LSDALabel, OutContext), OutContext);
+  Asm->OutStreamer->emitELFSize(LSDALabel, SizeExp);
+}
+
+// Compute the call-site table for wasm EH. Even though we use the same function
+// name to share the common routines, a call site entry in the table corresponds
+// to not a call site for possibly-throwing functions but a landing pad. In wasm
+// EH the VM is responsible for stack unwinding. After an exception occurs and
+// the stack is unwound, the control flow is transferred to wasm 'catch'
+// instruction by the VM, after which the personality function is called from
+// the compiler-generated code. Refer to WasmEHPrepare pass for more
+// information.
+void WasmException::computeCallSiteTable(
+    SmallVectorImpl<CallSiteEntry> &CallSites,
+    const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+    const SmallVectorImpl<unsigned> &FirstActions) {
+  MachineFunction &MF = *Asm->MF;
+  for (unsigned I = 0, N = LandingPads.size(); I < N; ++I) {
+    const LandingPadInfo *Info = LandingPads[I];
+    MachineBasicBlock *LPad = Info->LandingPadBlock;
+    // We don't emit LSDA for single catch (...).
+    if (!MF.hasWasmLandingPadIndex(LPad))
+      continue;
+    // Wasm EH must maintain the EH pads in the order assigned to them by the
+    // WasmEHPrepare pass.
+    unsigned LPadIndex = MF.getWasmLandingPadIndex(LPad);
+    CallSiteEntry Site = {nullptr, nullptr, Info, FirstActions[I]};
+    if (CallSites.size() < LPadIndex + 1)
+      CallSites.resize(LPadIndex + 1);
+    CallSites[LPadIndex] = Site;
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h
new file mode 100644
index 000000000000..cbdb42457cf8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -0,0 +1,42 @@
+//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing WebAssembly exception info into asm
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
+
+#include "EHStreamer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer {
+public:
+  WasmException(AsmPrinter *A) : EHStreamer(A) {}
+
+  void endModule() override;
+  void beginFunction(const MachineFunction *MF) override {}
+  virtual void markFunctionEnd() override;
+  void endFunction(const MachineFunction *MF) override;
+
+protected:
+  // Compute the call site table for wasm EH.
+  void computeCallSiteTable(
+      SmallVectorImpl<CallSiteEntry> &CallSites,
+      const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+      const SmallVectorImpl<unsigned> &FirstActions) override;
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 124e8f04bfad..28f119e35966 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
 
-#include "AsmPrinterHandler.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index eff73a58d8d2..cf8e8c69bc2a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -42,6 +42,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
   // MSVC's EH tables are always composed of 32-bit words.  All known 64-bit
   // platforms use an imagerel32 relocation to refer to symbols.
   useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
+  isAArch64 = Asm->TM.getTargetTriple().isAArch64();
 }
 
 WinException::~WinException() {}
@@ -242,6 +243,17 @@ void WinException::endFunclet() {
     if (F.hasPersonalityFn())
       Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
 
+    // On funclet exit, we emit a fake "function" end marker, so that the call
+    // to EmitWinEHHandlerData below can calculate the size of the funclet or
+    // function.
+    if (isAArch64) {
+      Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
+      Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+      MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
+          Asm->OutStreamer->getCurrentSectionOnly());
+      Asm->OutStreamer->SwitchSection(XData);
+    }
+
     // Emit an UNWIND_INFO struct describing the prologue.
     Asm->OutStreamer->EmitWinEHHandlerData();
 
@@ -286,7 +298,10 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
   return create32bitRef(Asm->getSymbol(GV));
 }
 
-const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+const MCExpr *WinException::getLabel(const MCSymbol *Label) {
+  if (isAArch64)
+    return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+                                   Asm->OutContext);
   return MCBinaryExpr::createAdd(create32bitRef(Label),
                                  MCConstantExpr::create(1, Asm->OutContext),
                                  Asm->OutContext);
@@ -531,7 +546,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
   };
 
   // Emit a label assignment with the SEH frame offset so we can use it for
-  // llvm.x86.seh.recoverfp.
+  // llvm.eh.recoverfp.
   StringRef FLinkageName =
       GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
   MCSymbol *ParentFrameOffset =
@@ -588,7 +603,6 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
                                           const MCSymbol *EndLabel, int State) {
   auto &OS = *Asm->OutStreamer;
   MCContext &Ctx = Asm->OutContext;
-
   bool VerboseAsm = OS.isVerboseAsm();
   auto AddComment = [&](const Twine &Comment) {
     if (VerboseAsm)
@@ -613,9 +627,9 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
     }
 
     AddComment("LabelStart");
-    OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+    OS.EmitValue(getLabel(BeginLabel), 4);
     AddComment("LabelEnd");
-    OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+    OS.EmitValue(getLabel(EndLabel), 4);
     AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
                                                              : "CatchAll");
     OS.EmitValue(FilterOrFinally, 4);
@@ -799,7 +813,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
       //   TypeDescriptor *Type;
       //   int32_t         CatchObjOffset;
       //   void          (*Handler)();
-      //   int32_t         ParentFrameOffset; // x64 only
+      //   int32_t         ParentFrameOffset; // x64 and AArch64 only
       // };
       OS.EmitLabel(HandlerMapXData);
       for (const WinEHHandlerType &HT : TBME.HandlerArray) {
@@ -901,7 +915,7 @@ void WinException::computeIP2StateTable(
         ChangeLabel = StateChange.PreviousEndLabel;
       // Emit an entry indicating that PCs after 'Label' have this EH state.
       IPToStateTable.push_back(
-          std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+          std::make_pair(getLabel(ChangeLabel), StateChange.NewState));
       // FIXME: assert that NewState is between CatchLow and CatchHigh.
     }
   }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index eed3c4453ffc..37c796f89765 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -38,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// True if this is a 64-bit target and we should use image relative offsets.
   bool useImageRel32 = false;
 
+  /// True if we are generating exception handling on Windows for ARM64.
+  bool isAArch64 = false;
+
   /// Pointer to the current funclet entry BB.
   const MachineBasicBlock *CurrentFuncletEntry = nullptr;
 
@@ -65,14 +68,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
       const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
       SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
 
-  /// Emits the label used with llvm.x86.seh.recoverfp, which is used by
+  /// Emits the label used with llvm.eh.recoverfp, which is used by
   /// outlined funclets.
   void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
                                      StringRef FLinkageName);
 
   const MCExpr *create32bitRef(const MCSymbol *Value);
   const MCExpr *create32bitRef(const GlobalValue *GV);
-  const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+  const MCExpr *getLabel(const MCSymbol *Label);
   const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
   const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
                                  const MCSymbol *OffsetFrom);
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index e28fc6fb9d4f..95581c09dd1c 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -88,7 +88,10 @@ namespace {
     void expandPartwordAtomicRMW(
         AtomicRMWInst *I,
         TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+    AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
     void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+    void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
+    void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
 
     AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
     static Value *insertRMWCmpXchgLoop(
@@ -96,6 +99,7 @@ namespace {
         AtomicOrdering MemOpOrder,
         function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
         CreateCmpXchgInstFun CreateCmpXchg);
+    bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
 
     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
     bool isIdempotentRMW(AtomicRMWInst *RMWI);
@@ -258,7 +262,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
                           isAcquireOrStronger(RMWI->getOrdering()))) {
         FenceOrdering = RMWI->getOrdering();
         RMWI->setOrdering(AtomicOrdering::Monotonic);
-      } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+      } else if (CASI &&
+                 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+                     TargetLoweringBase::AtomicExpansionKind::None &&
                  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
                   isAcquireOrStronger(CASI->getSuccessOrdering()))) {
         // If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -306,6 +312,16 @@ bool AtomicExpand::runOnFunction(Function &F) {
       if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
         MadeChange = true;
       } else {
+        unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+        unsigned ValueSize = getAtomicOpSize(RMWI);
+        AtomicRMWInst::BinOp Op = RMWI->getOperation();
+        if (ValueSize < MinCASSize &&
+            (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+             Op == AtomicRMWInst::And)) {
+          RMWI = widenPartwordAtomicRMW(RMWI);
+          MadeChange = true;
+        }
+
         MadeChange |= tryExpandAtomicRMW(RMWI);
       }
     } else if (CASI) {
@@ -322,16 +338,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
         MadeChange = true;
       }
 
-      unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
-      unsigned ValueSize = getAtomicOpSize(CASI);
-      if (ValueSize < MinCASSize) {
-        assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
-               "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
-        expandPartwordCmpXchg(CASI);
-      } else {
-        if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
-          MadeChange |= expandAtomicCmpXchg(CASI);
-      }
+      MadeChange |= tryExpandAtomicCmpXchg(CASI);
     }
   }
   return MadeChange;
@@ -400,8 +407,9 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
     return expandAtomicLoadToLL(LI);
   case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
     return expandAtomicLoadToCmpXchg(LI);
+  default:
+    llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
   }
-  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
 }
 
 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -563,6 +571,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
     }
     return true;
   }
+  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+    expandAtomicRMWToMaskedIntrinsic(AI);
+    return true;
+  }
   default:
     llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
   }
@@ -651,6 +663,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
                                     IRBuilder<> &Builder, Value *Loaded,
                                     Value *Shifted_Inc, Value *Inc,
                                     const PartwordMaskValues &PMV) {
+  // TODO: update to use
+  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
+  // to merge bits from two values without requiring PMV.Inv_Mask.
   switch (Op) {
   case AtomicRMWInst::Xchg: {
     Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
@@ -659,12 +674,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
   }
   case AtomicRMWInst::Or:
   case AtomicRMWInst::Xor:
-    // Or/Xor won't affect any other bits, so can just be done
-    // directly.
-    return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+  case AtomicRMWInst::And:
+    llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
   case AtomicRMWInst::Add:
   case AtomicRMWInst::Sub:
-  case AtomicRMWInst::And:
   case AtomicRMWInst::Nand: {
     // The other arithmetic ops need to be masked into place.
     Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
@@ -733,6 +746,41 @@ void AtomicExpand::expandPartwordAtomicRMW(
   AI->eraseFromParent();
 }
 
+// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
+AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
+  IRBuilder<> Builder(AI);
+  AtomicRMWInst::BinOp Op = AI->getOperation();
+
+  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
+          Op == AtomicRMWInst::And) &&
+         "Unable to widen operation");
+
+  PartwordMaskValues PMV =
+      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+                       TLI->getMinCmpXchgSizeInBits() / 8);
+
+  Value *ValOperand_Shifted =
+      Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+                        PMV.ShiftAmt, "ValOperand_Shifted");
+
+  Value *NewOperand;
+
+  if (Op == AtomicRMWInst::And)
+    NewOperand =
+        Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
+  else
+    NewOperand = ValOperand_Shifted;
+
+  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
+                                                 NewOperand, AI->getOrdering());
+
+  Value *FinalOldResult = Builder.CreateTrunc(
+      Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
+  AI->replaceAllUsesWith(FinalOldResult);
+  AI->eraseFromParent();
+  return NewAI;
+}
+
 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
   // The basic idea here is that we're expanding a cmpxchg of a
   // smaller memory size up to a word-sized cmpxchg. To do this, we
@@ -870,6 +918,62 @@ void AtomicExpand::expandAtomicOpToLLSC(
   I->eraseFromParent();
 }
 
+void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+  IRBuilder<> Builder(AI);
+
+  PartwordMaskValues PMV =
+      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+                       TLI->getMinCmpXchgSizeInBits() / 8);
+
+  // The value operand must be sign-extended for signed min/max so that the
+  // target's signed comparison instructions can be used. Otherwise, just
+  // zero-ext.
+  Instruction::CastOps CastOp = Instruction::ZExt;
+  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
+  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
+    CastOp = Instruction::SExt;
+
+  Value *ValOperand_Shifted = Builder.CreateShl(
+      Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
+      PMV.ShiftAmt, "ValOperand_Shifted");
+  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
+      Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
+      AI->getOrdering());
+  Value *FinalOldResult = Builder.CreateTrunc(
+      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+  AI->replaceAllUsesWith(FinalOldResult);
+  AI->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
+  IRBuilder<> Builder(CI);
+
+  PartwordMaskValues PMV = createMaskInstrs(
+      Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
+      TLI->getMinCmpXchgSizeInBits() / 8);
+
+  Value *CmpVal_Shifted = Builder.CreateShl(
+      Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
+      "CmpVal_Shifted");
+  Value *NewVal_Shifted = Builder.CreateShl(
+      Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
+      "NewVal_Shifted");
+  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
+      Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
+      CI->getSuccessOrdering());
+  Value *FinalOldVal = Builder.CreateTrunc(
+      Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+
+  Value *Res = UndefValue::get(CI->getType());
+  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+  Value *Success = Builder.CreateICmpEQ(
+      CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
+  Res = Builder.CreateInsertValue(Res, Success, 1);
+
+  CI->replaceAllUsesWith(Res);
+  CI->eraseFromParent();
+}
+
 Value *AtomicExpand::insertRMWLLSCLoop(
     IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
     AtomicOrdering MemOpOrder,
@@ -1275,6 +1379,28 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
   return NewLoaded;
 }
 
+bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+  unsigned ValueSize = getAtomicOpSize(CI);
+
+  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
+  default:
+    llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
+  case TargetLoweringBase::AtomicExpansionKind::None:
+    if (ValueSize < MinCASSize)
+      expandPartwordCmpXchg(CI);
+    return false;
+  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+    assert(ValueSize >= MinCASSize &&
+           "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
+    return expandAtomicCmpXchg(CI);
+  }
+  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
+    expandAtomicCmpXchgToMaskedIntrinsic(CI);
+    return true;
+  }
+}
+
 // Note: This function is exposed externally by AtomicExpandUtils.h
 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
                                     CreateCmpXchgInstFun CreateCmpXchg) {
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index c7a0c6457164..efbfd5f4ab2c 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -298,7 +298,7 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
 
 ///  Whether MI should be counted as an instruction when calculating common tail.
 static bool countsAsInstruction(const MachineInstr &MI) {
-  return !(MI.isDebugValue() || MI.isCFIInstruction());
+  return !(MI.isDebugInstr() || MI.isCFIInstruction());
 }
 
 /// ComputeCommonTailLength - Given two machine basic blocks, compute the number
@@ -865,7 +865,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
 
     // Merge MMOs from memory operations in the common block.
     if (MBBICommon->mayLoad() || MBBICommon->mayStore())
-      MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
+      MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
     // Drop undef flags if they aren't present in all merged instructions.
     for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
       MachineOperand &MO = MBBICommon->getOperand(I);
@@ -1363,9 +1363,9 @@ static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,
                                        MachineBasicBlock &PredMBB) {
   auto InsertBefore = PredMBB.getFirstTerminator();
   for (MachineInstr &MI : MBB.instrs())
-    if (MI.isDebugValue()) {
+    if (MI.isDebugInstr()) {
       TII->duplicate(PredMBB, InsertBefore, MI);
-      LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: "
+      LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: "
                         << MI);
     }
 }
@@ -1375,9 +1375,9 @@ static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,
                                      MachineBasicBlock &SuccMBB) {
   auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin());
   for (MachineInstr &MI : MBB.instrs())
-    if (MI.isDebugValue()) {
+    if (MI.isDebugInstr()) {
       TII->duplicate(SuccMBB, InsertBefore, MI);
-      LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: "
+      LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: "
                         << MI);
     }
 }
diff --git a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 7f098cb71657..210699cbf239 100644
--- a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
 }
 
 bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
-  unsigned Pref) {
+                                           unsigned Pref) {
   unsigned reg = MI->getOperand(OpIdx).getReg();
   unsigned Clearance = RDA->getClearance(MI, reg);
   LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
index 3a9b20aa661d..93939e573b7b 100644
--- a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
+++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/BuiltinGCs.h"
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/Casting.h"
 
@@ -28,10 +28,8 @@ namespace {
 class ErlangGC : public GCStrategy {
 public:
   ErlangGC() {
-    InitRoots = false;
-    NeededSafePoints = 1 << GC::PostCall;
+    NeededSafePoints = true;
     UsesMetadata = true;
-    CustomRoots = false;
   }
 };
 
@@ -41,7 +39,7 @@ public:
 class OcamlGC : public GCStrategy {
 public:
   OcamlGC() {
-    NeededSafePoints = 1 << GC::PostCall;
+    NeededSafePoints = true;
     UsesMetadata = true;
   }
 };
@@ -56,10 +54,7 @@ public:
 /// while introducing only minor runtime overhead.
 class ShadowStackGC : public GCStrategy {
 public:
-  ShadowStackGC() {
-    InitRoots = true;
-    CustomRoots = true;
-  }
+  ShadowStackGC() {}
 };
 
 /// A GCStrategy which serves as an example for the usage of a statepoint based
@@ -74,10 +69,8 @@ public:
     UseStatepoints = true;
     // These options are all gc.root specific, we specify them so that the
     // gc.root lowering code doesn't run.
-    InitRoots = false;
-    NeededSafePoints = 0;
+    NeededSafePoints = false;
     UsesMetadata = false;
-    CustomRoots = false;
   }
 
   Optional<bool> isGCManagedPointer(const Type *Ty) const override {
@@ -108,10 +101,8 @@ public:
     UseStatepoints = true;
     // These options are all gc.root specific, we specify them so that the
     // gc.root lowering code doesn't run.
-    InitRoots = false;
-    NeededSafePoints = 0;
+    NeededSafePoints = false;
     UsesMetadata = false;
-    CustomRoots = false;
   }
 
   Optional<bool> isGCManagedPointer(const Type *Ty) const override {
@@ -136,9 +127,5 @@ static GCRegistry::Add<StatepointGC> D("statepoint-example",
                                        "an example strategy for statepoint");
 static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC");
 
-// Provide hooks to ensure the containing library is fully loaded.
-void llvm::linkErlangGC() {}
-void llvm::linkOcamlGC() {}
-void llvm::linkShadowStackGC() {}
-void llvm::linkStatepointExampleGC() {}
-void llvm::linkCoreCLRGC() {}
+// Provide hook to ensure the containing library is fully loaded.
+void llvm::linkAllBuiltinGCs() {}
diff --git a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 00ebf63fc174..c4799855a2b3 100644
--- a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -207,6 +207,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
       case MCCFIInstruction::OpUndefined:
       case MCCFIInstruction::OpRegister:
       case MCCFIInstruction::OpWindowSave:
+      case MCCFIInstruction::OpNegateRAState:
       case MCCFIInstruction::OpGnuArgsSize:
         break;
       }
@@ -317,6 +318,10 @@ unsigned CFIInstrInserter::verify(MachineFunction &MF) {
       // outgoing offset and register values of CurrMBB
       if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset ||
           SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) {
+        // Inconsistent offsets/registers are ok for 'noreturn' blocks because
+        // we don't generate epilogues inside such blocks.
+        if (SuccMBBInfo.MBB->succ_empty() && !SuccMBBInfo.MBB->isReturnBlock())
+          continue;
         report(CurrMBBInfo, SuccMBBInfo);
         ErrorNum++;
       }
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 57541182cab2..02347b9f0b5c 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -70,15 +70,6 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
     return sub == hsub ? hreg : 0;
 
   const TargetRegisterClass *rc = mri.getRegClass(reg);
-  if (!tri.enableMultipleCopyHints()) {
-    // Only allow physreg hints in rc.
-    if (sub == 0)
-      return rc->contains(hreg) ? hreg : 0;
-
-    // reg:sub should match the physreg hreg.
-    return tri.getMatchingSuperReg(hreg, sub, rc);
-  }
-
   unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
   if (rc->contains(CopiedPReg))
     return CopiedPReg;
@@ -199,31 +190,19 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
     unsigned Reg;
     float Weight;
     bool IsPhys;
-    unsigned HintOrder;
-    CopyHint(unsigned R, float W, bool P, unsigned HR) :
-      Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {}
+    CopyHint(unsigned R, float W, bool P) :
+      Reg(R), Weight(W), IsPhys(P) {}
     bool operator<(const CopyHint &rhs) const {
       // Always prefer any physreg hint.
       if (IsPhys != rhs.IsPhys)
         return (IsPhys && !rhs.IsPhys);
       if (Weight != rhs.Weight)
         return (Weight > rhs.Weight);
-
-      // This is just a temporary way to achive NFC for targets that don't
-      // enable multiple copy hints. HintOrder should be removed when all
-      // targets return true in enableMultipleCopyHints().
-      return (HintOrder < rhs.HintOrder);
-
-#if 0 // Should replace the HintOrder check, see above.
-      // (just for the purpose of maintaining the set)
-      return Reg < rhs.Reg;
-#endif
+      return Reg < rhs.Reg; // Tie-breaker.
     }
   };
   std::set<CopyHint> CopyHints;
 
-  // Temporary: see comment for HintOrder above.
-  unsigned CopyHintOrder = 0;
   for (MachineRegisterInfo::reg_instr_iterator
        I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
        I != E; ) {
@@ -263,8 +242,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
     }
 
     // Get allocation hints from copies.
-    if (!mi->isCopy() ||
-        (TargetHint.first != 0 && !tri.enableMultipleCopyHints()))
+    if (!mi->isCopy())
       continue;
     unsigned hint = copyHint(mi, li.reg, tri, mri);
     if (!hint)
@@ -275,8 +253,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
     // FIXME: we probably shouldn't use floats at all.
     volatile float hweight = Hint[hint] += weight;
     if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
-      CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint),
-                     (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++)));
+      CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint)));
   }
 
   Hint.clear();
@@ -287,13 +264,13 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
     if (TargetHint.first == 0 && TargetHint.second)
       mri.clearSimpleHint(li.reg);
 
+    std::set<unsigned> HintedRegs;
     for (auto &Hint : CopyHints) {
-      if (TargetHint.first != 0 && Hint.Reg == TargetHint.second)
-        // Don't add again the target-type hint.
+      if (!HintedRegs.insert(Hint.Reg).second ||
+          (TargetHint.first != 0 && Hint.Reg == TargetHint.second))
+        // Don't add the same reg twice or the target-type hint again.
         continue;
       mri.addRegAllocationHint(li.reg, Hint.Reg);
-      if (!tri.enableMultipleCopyHints())
-        break;
     }
 
     // Weakly boost the spill weight of hinted registers.
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 2f845354c570..66166482c78b 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -42,6 +42,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeIfConverterPass(Registry);
   initializeImplicitNullChecksPass(Registry);
   initializeIndirectBrExpandPassPass(Registry);
+  initializeInterleavedLoadCombinePass(Registry);
   initializeInterleavedAccessPass(Registry);
   initializeLiveDebugValuesPass(Registry);
   initializeLiveDebugVariablesPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index be685b26a9ea..c35f8666fa3c 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -278,7 +278,7 @@ class TypePromotionTransaction;
     /// Keep track of GEPs accessing the same data structures such as structs or
     /// arrays that are candidates to be split later because of their large
     /// size.
-    DenseMap<
+    MapVector<
         AssertingVH<Value>,
         SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
         LargeOffsetGEPMap;
@@ -321,6 +321,24 @@ class TypePromotionTransaction;
     }
 
   private:
+    template <typename F>
+    void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
+      // Substituting can cause recursive simplifications, which can invalidate
+      // our iterator.  Use a WeakTrackingVH to hold onto it in case this
+      // happens.
+      Value *CurValue = &*CurInstIterator;
+      WeakTrackingVH IterHandle(CurValue);
+
+      f();
+
+      // If the iterator instruction was recursively deleted, start over at the
+      // start of the block.
+      if (IterHandle != CurValue) {
+        CurInstIterator = BB->begin();
+        SunkAddrs.clear();
+      }
+    }
+
     bool eliminateFallThrough(Function &F);
     bool eliminateMostlyEmptyBlocks(Function &F);
     BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -398,7 +416,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   OptSize = F.optForSize();
 
   ProfileSummaryInfo *PSI =
-      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   if (ProfileGuidedSectionPrefix) {
     if (PSI->isFunctionHotInCallGraph(&F, *BFI))
       F.setSectionPrefix(".hot");
@@ -426,11 +444,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   // unconditional branch.
   EverMadeChange |= eliminateMostlyEmptyBlocks(F);
 
-  // llvm.dbg.value is far away from the value then iSel may not be able
-  // handle it properly. iSel will drop llvm.dbg.value if it can not
-  // find a node corresponding to the value.
-  EverMadeChange |= placeDbgValues(F);
-
   if (!DisableBranchOpts)
     EverMadeChange |= splitBranchCondition(F);
 
@@ -441,11 +454,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    SeenChainsForSExt.clear();
-    ValToSExtendedUses.clear();
-    RemovedInsts.clear();
-    LargeOffsetGEPMap.clear();
-    LargeOffsetGEPID.clear();
     for (Function::iterator I = F.begin(); I != F.end(); ) {
       BasicBlock *BB = &*I++;
       bool ModifiedDTOnIteration = false;
@@ -465,6 +473,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       I->deleteValue();
 
     EverMadeChange |= MadeChange;
+    SeenChainsForSExt.clear();
+    ValToSExtendedUses.clear();
+    RemovedInsts.clear();
+    LargeOffsetGEPMap.clear();
+    LargeOffsetGEPID.clear();
   }
 
   SunkAddrs.clear();
@@ -518,6 +531,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       EverMadeChange |= simplifyOffsetableRelocate(*I);
   }
 
+  // Do this last to clean up use-before-def scenarios introduced by other
+  // preparatory transforms.
+  EverMadeChange |= placeDbgValues(F);
+
   return EverMadeChange;
 }
 
@@ -651,7 +668,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
         isa<IndirectBrInst>(Pred->getTerminator())))
     return true;
 
-  if (BB->getTerminator() != BB->getFirstNonPHI())
+  if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
     return true;
 
   // We use a simple cost heuristic which determine skipping merging is
@@ -1165,11 +1182,15 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
 
   auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
 
+  DebugLoc Loc = CI->getDebugLoc();
   auto *UAddWithOverflow =
       CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  UAddWithOverflow->setDebugLoc(Loc);
   auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  UAdd->setDebugLoc(Loc);
   auto *Overflow =
       ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+  Overflow->setDebugLoc(Loc);
 
   CI->replaceAllUsesWith(Overflow);
   AddI->replaceAllUsesWith(UAdd);
@@ -1402,6 +1423,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
       else
         InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
                                                    "", &*InsertPt);
+      InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
 
       // Sink the trunc
       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
@@ -1410,6 +1432,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
 
       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
                                        TruncI->getType(), "", &*TruncInsertPt);
+      InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
 
       MadeChange = true;
 
@@ -1501,6 +1524,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
       else
         InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
                                                    "", &*InsertPt);
+      InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
 
       MadeChange = true;
     }
@@ -1510,8 +1534,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
   }
 
   // If we removed all uses, nuke the shift.
-  if (ShiftI->use_empty())
+  if (ShiftI->use_empty()) {
+    salvageDebugInfo(*ShiftI);
     ShiftI->eraseFromParent();
+  }
 
   return MadeChange;
 }
@@ -1682,21 +1708,18 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
       // Lower all uses of llvm.objectsize.*
       ConstantInt *RetVal =
           lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
-      // Substituting this can cause recursive simplifications, which can
-      // invalidate our iterator.  Use a WeakTrackingVH to hold onto it in case
-      // this
-      // happens.
-      Value *CurValue = &*CurInstIterator;
-      WeakTrackingVH IterHandle(CurValue);
-
-      replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
 
-      // If the iterator instruction was recursively deleted, start over at the
-      // start of the block.
-      if (IterHandle != CurValue) {
-        CurInstIterator = BB->begin();
-        SunkAddrs.clear();
-      }
+      resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+        replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+      });
+      return true;
+    }
+    case Intrinsic::is_constant: {
+      // If is_constant hasn't folded away yet, lower it to false now.
+      Constant *RetVal = ConstantInt::get(II->getType(), 0);
+      resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+        replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+      });
       return true;
     }
     case Intrinsic::aarch64_stlxr:
@@ -1713,11 +1736,22 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
       return true;
     }
     case Intrinsic::launder_invariant_group:
-    case Intrinsic::strip_invariant_group:
-      II->replaceAllUsesWith(II->getArgOperand(0));
+    case Intrinsic::strip_invariant_group: {
+      Value *ArgVal = II->getArgOperand(0);
+      auto it = LargeOffsetGEPMap.find(II);
+      if (it != LargeOffsetGEPMap.end()) {
+          // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
+          // Make sure not to have to deal with iterator invalidation
+          // after possibly adding ArgVal to LargeOffsetGEPMap.
+          auto GEPs = std::move(it->second);
+          LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
+          LargeOffsetGEPMap.erase(II);
+      }
+
+      II->replaceAllUsesWith(ArgVal);
       II->eraseFromParent();
       return true;
-
+    }
     case Intrinsic::cttz:
     case Intrinsic::ctlz:
       // If counting zeros is expensive, try to avoid it.
@@ -1863,15 +1897,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
     CallInst *CI = TailCalls[i];
     CallSite CS(CI);
 
-    // Conservatively require the attributes of the call to match those of the
-    // return. Ignore noalias because it doesn't affect the call sequence.
-    AttributeList CalleeAttrs = CS.getAttributes();
-    if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
-            .removeAttribute(Attribute::NoAlias) !=
-        AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
-            .removeAttribute(Attribute::NoAlias))
-      continue;
-
     // Make sure the call instruction is followed by an unconditional branch to
     // the return block.
     BasicBlock *CallBB = CI->getParent();
@@ -2337,6 +2362,8 @@ class TypePromotionTransaction {
 
     /// Keep track of the original uses (pair Instruction, Index).
     SmallVector<InstructionAndIdx, 4> OriginalUses;
+    /// Keep track of the debug users.
+    SmallVector<DbgValueInst *, 1> DbgValues;
 
     using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
 
@@ -2350,6 +2377,10 @@ class TypePromotionTransaction {
         Instruction *UserI = cast<Instruction>(U.getUser());
         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
       }
+      // Record the debug uses separately. They are not in the instruction's
+      // use list, but they are replaced by RAUW.
+      findDbgValues(DbgValues, Inst);
+
       // Now, we can replace the uses.
       Inst->replaceAllUsesWith(New);
     }
@@ -2362,6 +2393,15 @@ class TypePromotionTransaction {
            UseIt != EndIt; ++UseIt) {
         UseIt->Inst->setOperand(UseIt->Idx, Inst);
       }
+      // RAUW has replaced all original uses with references to the new value,
+      // including the debug uses. Since we are undoing the replacements,
+      // the original debug uses must also be reinstated to maintain the
+      // correctness and utility of debug value instructions.
+      for (auto *DVI: DbgValues) {
+        LLVMContext &Ctx = Inst->getType()->getContext();
+        auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
+        DVI->setOperand(0, MV);
+      }
     }
   };
 
@@ -2632,15 +2672,159 @@ private:
                              Value *PromotedOperand) const;
 };
 
+class PhiNodeSet;
+
+/// An iterator for PhiNodeSet.
+class PhiNodeSetIterator {
+  PhiNodeSet * const Set;
+  size_t CurrentIndex = 0;
+
+public:
+  /// The constructor. Start should point to either a valid element, or be equal
+  /// to the size of the underlying SmallVector of the PhiNodeSet.
+  PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
+  PHINode * operator*() const;
+  PhiNodeSetIterator& operator++();
+  bool operator==(const PhiNodeSetIterator &RHS) const;
+  bool operator!=(const PhiNodeSetIterator &RHS) const;
+};
+
+/// Keeps a set of PHINodes.
+///
+/// This is a minimal set implementation for a specific use case:
+/// It is very fast when there are very few elements, but also provides good
+/// performance when there are many. It is similar to SmallPtrSet, but also
+/// provides iteration by insertion order, which is deterministic and stable
+/// across runs. It is also similar to SmallSetVector, but provides removing
+/// elements in O(1) time. This is achieved by not actually removing the element
+/// from the underlying vector, so comes at the cost of using more memory, but
+/// that is fine, since PhiNodeSets are used as short lived objects.
+class PhiNodeSet {
+  friend class PhiNodeSetIterator;
+
+  using MapType = SmallDenseMap<PHINode *, size_t, 32>;
+  using iterator =  PhiNodeSetIterator;
+
+  /// Keeps the elements in the order of their insertion in the underlying
+  /// vector. To achieve constant time removal, it never deletes any element.
+  SmallVector<PHINode *, 32> NodeList;
+
+  /// Keeps the elements in the underlying set implementation. This (and not the
+  /// NodeList defined above) is the source of truth on whether an element
+  /// is actually in the collection.
+  MapType NodeMap;
+
+  /// Points to the first valid (not deleted) element when the set is not empty
+  /// and the value is not zero. Equals to the size of the underlying vector
+  /// when the set is empty. When the value is 0, as in the beginning, the
+  /// first element may or may not be valid.
+  size_t FirstValidElement = 0;
+
+public:
+  /// Inserts a new element to the collection.
+  /// \returns true if the element is actually added, i.e. was not in the
+  /// collection before the operation.
+  bool insert(PHINode *Ptr) {
+    if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
+      NodeList.push_back(Ptr);
+      return true;
+    }
+    return false;
+  }
+
+  /// Removes the element from the collection.
+  /// \returns whether the element is actually removed, i.e. was in the
+  /// collection before the operation.
+  bool erase(PHINode *Ptr) {
+    auto it = NodeMap.find(Ptr);
+    if (it != NodeMap.end()) {
+      NodeMap.erase(Ptr);
+      SkipRemovedElements(FirstValidElement);
+      return true;
+    }
+    return false;
+  }
+
+  /// Removes all elements and clears the collection.
+  void clear() {
+    NodeMap.clear();
+    NodeList.clear();
+    FirstValidElement = 0;
+  }
+
+  /// \returns an iterator that will iterate the elements in the order of
+  /// insertion.
+  iterator begin() {
+    if (FirstValidElement == 0)
+      SkipRemovedElements(FirstValidElement);
+    return PhiNodeSetIterator(this, FirstValidElement);
+  }
+
+  /// \returns an iterator that points to the end of the collection.
+  iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
+
+  /// Returns the number of elements in the collection.
+  size_t size() const {
+    return NodeMap.size();
+  }
+
+  /// \returns 1 if the given element is in the collection, and 0 if otherwise.
+  size_t count(PHINode *Ptr) const {
+    return NodeMap.count(Ptr);
+  }
+
+private:
+  /// Updates the CurrentIndex so that it will point to a valid element.
+  ///
+  /// If the element of NodeList at CurrentIndex is valid, it does not
+  /// change it. If there are no more valid elements, it updates CurrentIndex
+  /// to point to the end of the NodeList.
+  void SkipRemovedElements(size_t &CurrentIndex) {
+    while (CurrentIndex < NodeList.size()) {
+      auto it = NodeMap.find(NodeList[CurrentIndex]);
+      // If the element has been deleted and added again later, NodeMap will
+      // point to a different index, so CurrentIndex will still be invalid.
+      if (it != NodeMap.end() && it->second == CurrentIndex)
+        break;
+      ++CurrentIndex;
+    }
+  }
+};
+
+PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
+    : Set(Set), CurrentIndex(Start) {}
+
+PHINode * PhiNodeSetIterator::operator*() const {
+  assert(CurrentIndex < Set->NodeList.size() &&
+         "PhiNodeSet access out of range");
+  return Set->NodeList[CurrentIndex];
+}
+
+PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
+  assert(CurrentIndex < Set->NodeList.size() &&
+         "PhiNodeSet access out of range");
+  ++CurrentIndex;
+  Set->SkipRemovedElements(CurrentIndex);
+  return *this;
+}
+
+bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
+  return CurrentIndex == RHS.CurrentIndex;
+}
+
+bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
+  return !((*this) == RHS);
+}
+
 /// Keep track of simplification of Phi nodes.
 /// Accept the set of all phi nodes and erase phi node from this set
 /// if it is simplified.
 class SimplificationTracker {
   DenseMap<Value *, Value *> Storage;
   const SimplifyQuery &SQ;
-  // Tracks newly created Phi nodes. We use a SetVector to get deterministic
-  // order when iterating over the set in MatchPhiSet.
-  SmallSetVector<PHINode *, 32> AllPhiNodes;
+  // Tracks newly created Phi nodes. The elements are iterated by insertion
+  // order.
+  PhiNodeSet AllPhiNodes;
   // Tracks newly created Select nodes.
   SmallPtrSet<SelectInst *, 32> AllSelectNodes;
 
@@ -2672,7 +2856,7 @@ public:
           Put(PI, V);
           PI->replaceAllUsesWith(V);
           if (auto *PHI = dyn_cast<PHINode>(PI))
-            AllPhiNodes.remove(PHI);
+            AllPhiNodes.erase(PHI);
           if (auto *Select = dyn_cast<SelectInst>(PI))
             AllSelectNodes.erase(Select);
           PI->eraseFromParent();
@@ -2695,11 +2879,11 @@ public:
     assert(Get(To) == To && "Replacement PHI node is already replaced.");
     Put(From, To);
     From->replaceAllUsesWith(To);
-    AllPhiNodes.remove(From);
+    AllPhiNodes.erase(From);
     From->eraseFromParent();
   }
 
-  SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; }
+  PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
 
   void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
 
@@ -2727,8 +2911,7 @@ public:
 
 /// A helper class for combining addressing modes.
 class AddressingModeCombiner {
-  typedef std::pair<Value *, BasicBlock *> ValueInBB;
-  typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
+  typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
   typedef std::pair<PHINode *, PHINode *> PHIPair;
 
 private:
@@ -2748,10 +2931,10 @@ private:
   const SimplifyQuery &SQ;
 
   /// Original Address.
-  ValueInBB Original;
+  Value *Original;
 
 public:
-  AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
+  AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
       : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
 
   /// Get the combined AddrMode
@@ -2847,46 +3030,40 @@ public:
   }
 
 private:
-  /// Initialize Map with anchor values. For address seen in some BB
+  /// Initialize Map with anchor values. For address seen
   /// we set the value of different field saw in this address.
-  /// If address is not an instruction than basic block is set to null.
   /// At the same time we find a common type for different field we will
   /// use to create new Phi/Select nodes. Keep it in CommonType field.
   /// Return false if there is no common type found.
   bool initializeMap(FoldAddrToValueMapping &Map) {
     // Keep track of keys where the value is null. We will need to replace it
     // with constant null when we know the common type.
-    SmallVector<ValueInBB, 2> NullValue;
+    SmallVector<Value *, 2> NullValue;
     Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
     for (auto &AM : AddrModes) {
-      BasicBlock *BB = nullptr;
-      if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue))
-        BB = I->getParent();
-
       Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
       if (DV) {
         auto *Type = DV->getType();
         if (CommonType && CommonType != Type)
           return false;
         CommonType = Type;
-        Map[{ AM.OriginalValue, BB }] = DV;
+        Map[AM.OriginalValue] = DV;
       } else {
-        NullValue.push_back({ AM.OriginalValue, BB });
+        NullValue.push_back(AM.OriginalValue);
       }
     }
     assert(CommonType && "At least one non-null value must be!");
-    for (auto VIBB : NullValue)
-      Map[VIBB] = Constant::getNullValue(CommonType);
+    for (auto *V : NullValue)
+      Map[V] = Constant::getNullValue(CommonType);
     return true;
   }
 
-  /// We have mapping between value A and basic block where value A
-  /// seen to other value B where B was a field in addressing mode represented
-  /// by A. Also we have an original value C representing an address in some
-  /// basic block. Traversing from C through phi and selects we ended up with
-  /// A's in a map. This utility function tries to find a value V which is a
-  /// field in addressing mode C and traversing through phi nodes and selects
-  /// we will end up in corresponded values B in a map.
+  /// We have mapping between value A and other value B where B was a field in
+  /// addressing mode represented by A. Also we have an original value C
+  /// representing an address we start with. Traversing from C through phi and
+  /// selects we ended up with A's in a map. This utility function tries to find
+  /// a value V which is a field in addressing mode C and traversing through phi
+  /// nodes and selects we will end up in corresponded values B in a map.
   /// The utility will create a new Phi/Selects if needed.
   // The simple example looks as follows:
   // BB1:
@@ -2899,22 +3076,24 @@ private:
   //   p = phi [p1, BB1], [p2, BB2]
   //   v = load p
   // Map is
-  //   <p1, BB1> -> b1
-  //   <p2, BB2> -> b2
+  //   p1 -> b1
+  //   p2 -> b2
   // Request is
-  //   <p, BB3> -> ?
-  // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
+  //   p -> ?
+  // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
   Value *findCommon(FoldAddrToValueMapping &Map) {
     // Tracks the simplification of newly created phi nodes. The reason we use
     // this mapping is because we will add new created Phi nodes in AddrToBase.
     // Simplification of Phi nodes is recursive, so some Phi node may
-    // be simplified after we added it to AddrToBase.
+    // be simplified after we added it to AddrToBase. In reality this
+    // simplification is possible only if original phi/selects were not
+    // simplified yet.
     // Using this mapping we can find the current value in AddrToBase.
     SimplificationTracker ST(SQ);
 
     // First step, DFS to create PHI nodes for all intermediate blocks.
     // Also fill traverse order for the second step.
-    SmallVector<ValueInBB, 32> TraverseOrder;
+    SmallVector<Value *, 32> TraverseOrder;
     InsertPlaceholders(Map, TraverseOrder, ST);
 
     // Second Step, fill new nodes by merged values and simplify if possible.
@@ -2944,7 +3123,7 @@ private:
   /// Matcher tracks the matched Phi nodes.
   bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
                     SmallSetVector<PHIPair, 8> &Matcher,
-                    SmallSetVector<PHINode *, 32> &PhiNodesToMatch) {
+                    PhiNodeSet &PhiNodesToMatch) {
     SmallVector<PHIPair, 8> WorkList;
     Matcher.insert({ PHI, Candidate });
     WorkList.push_back({ PHI, Candidate });
@@ -2993,11 +3172,12 @@ private:
   /// Returns false if this matching fails and creation of new Phi is disabled.
   bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
                    unsigned &PhiNotMatchedCount) {
-    // Use a SetVector for Matched to make sure we do replacements (ReplacePhi)
-    // in a deterministic order below.
+    // Matched and PhiNodesToMatch iterate their elements in a deterministic
+    // order, so the replacements (ReplacePhi) are also done in a deterministic
+    // order.
     SmallSetVector<PHIPair, 8> Matched;
     SmallPtrSet<PHINode *, 8> WillNotMatch;
-    SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes();
+    PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
     while (PhiNodesToMatch.size()) {
       PHINode *PHI = *PhiNodesToMatch.begin();
 
@@ -3032,129 +3212,86 @@ private:
       // Just remove all seen values in matcher. They will not match anything.
       PhiNotMatchedCount += WillNotMatch.size();
       for (auto *P : WillNotMatch)
-        PhiNodesToMatch.remove(P);
+        PhiNodesToMatch.erase(P);
     }
     return true;
   }
-  /// Fill the placeholder with values from predecessors and simplify it.
+  /// Fill the placeholders with values from predecessors and simplify them.
   void FillPlaceholders(FoldAddrToValueMapping &Map,
-                        SmallVectorImpl<ValueInBB> &TraverseOrder,
+                        SmallVectorImpl<Value *> &TraverseOrder,
                         SimplificationTracker &ST) {
     while (!TraverseOrder.empty()) {
-      auto Current = TraverseOrder.pop_back_val();
+      Value *Current = TraverseOrder.pop_back_val();
       assert(Map.find(Current) != Map.end() && "No node to fill!!!");
-      Value *CurrentValue = Current.first;
-      BasicBlock *CurrentBlock = Current.second;
       Value *V = Map[Current];
 
       if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
         // CurrentValue also must be Select.
-        auto *CurrentSelect = cast<SelectInst>(CurrentValue);
+        auto *CurrentSelect = cast<SelectInst>(Current);
         auto *TrueValue = CurrentSelect->getTrueValue();
-        ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue)
-                                              ? CurrentBlock
-                                              : nullptr };
-        assert(Map.find(TrueItem) != Map.end() && "No True Value!");
-        Select->setTrueValue(ST.Get(Map[TrueItem]));
+        assert(Map.find(TrueValue) != Map.end() && "No True Value!");
+        Select->setTrueValue(ST.Get(Map[TrueValue]));
         auto *FalseValue = CurrentSelect->getFalseValue();
-        ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue)
-                                                ? CurrentBlock
-                                                : nullptr };
-        assert(Map.find(FalseItem) != Map.end() && "No False Value!");
-        Select->setFalseValue(ST.Get(Map[FalseItem]));
+        assert(Map.find(FalseValue) != Map.end() && "No False Value!");
+        Select->setFalseValue(ST.Get(Map[FalseValue]));
       } else {
         // Must be a Phi node then.
         PHINode *PHI = cast<PHINode>(V);
+        auto *CurrentPhi = dyn_cast<PHINode>(Current);
         // Fill the Phi node with values from predecessors.
-        bool IsDefinedInThisBB =
-            cast<Instruction>(CurrentValue)->getParent() == CurrentBlock;
-        auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue);
-        for (auto B : predecessors(CurrentBlock)) {
-          Value *PV = IsDefinedInThisBB
-                          ? CurrentPhi->getIncomingValueForBlock(B)
-                          : CurrentValue;
-          ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr };
-          assert(Map.find(item) != Map.end() && "No predecessor Value!");
-          PHI->addIncoming(ST.Get(Map[item]), B);
+        for (auto B : predecessors(PHI->getParent())) {
+          Value *PV = CurrentPhi->getIncomingValueForBlock(B);
+          assert(Map.find(PV) != Map.end() && "No predecessor Value!");
+          PHI->addIncoming(ST.Get(Map[PV]), B);
         }
       }
-      // Simplify if possible.
       Map[Current] = ST.Simplify(V);
     }
   }
 
-  /// Starting from value recursively iterates over predecessors up to known
-  /// ending values represented in a map. For each traversed block inserts
-  /// a placeholder Phi or Select.
+  /// Starting from original value recursively iterates over def-use chain up to
+  /// known ending values represented in a map. For each traversed phi/select
+  /// inserts a placeholder Phi or Select.
   /// Reports all new created Phi/Select nodes by adding them to set.
-  /// Also reports and order in what basic blocks have been traversed.
+  /// Also reports and order in what values have been traversed.
   void InsertPlaceholders(FoldAddrToValueMapping &Map,
-                          SmallVectorImpl<ValueInBB> &TraverseOrder,
+                          SmallVectorImpl<Value *> &TraverseOrder,
                           SimplificationTracker &ST) {
-    SmallVector<ValueInBB, 32> Worklist;
-    assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
+    SmallVector<Value *, 32> Worklist;
+    assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
            "Address must be a Phi or Select node");
     auto *Dummy = UndefValue::get(CommonType);
     Worklist.push_back(Original);
     while (!Worklist.empty()) {
-      auto Current = Worklist.pop_back_val();
-      // If value is not an instruction it is something global, constant,
-      // parameter and we can say that this value is observable in any block.
-      // Set block to null to denote it.
-      // Also please take into account that it is how we build anchors.
-      if (!isa<Instruction>(Current.first))
-        Current.second = nullptr;
+      Value *Current = Worklist.pop_back_val();
       // if it is already visited or it is an ending value then skip it.
       if (Map.find(Current) != Map.end())
         continue;
       TraverseOrder.push_back(Current);
 
-      Value *CurrentValue = Current.first;
-      BasicBlock *CurrentBlock = Current.second;
       // CurrentValue must be a Phi node or select. All others must be covered
       // by anchors.
-      Instruction *CurrentI = cast<Instruction>(CurrentValue);
-      bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
-
-      unsigned PredCount = pred_size(CurrentBlock);
-      // if Current Value is not defined in this basic block we are interested
-      // in values in predecessors.
-      if (!IsDefinedInThisBB) {
-        assert(PredCount && "Unreachable block?!");
-        PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
-                                       &CurrentBlock->front());
-        Map[Current] = PHI;
-        ST.insertNewPhi(PHI);
-        // Add all predecessors in work list.
-        for (auto B : predecessors(CurrentBlock))
-          Worklist.push_back({ CurrentValue, B });
-        continue;
-      }
-      // Value is defined in this basic block.
-      if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) {
+      if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
         // Is it OK to get metadata from OrigSelect?!
         // Create a Select placeholder with dummy value.
-        SelectInst *Select =
-            SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
-                               OrigSelect->getName(), OrigSelect, OrigSelect);
+        SelectInst *Select = SelectInst::Create(
+            CurrentSelect->getCondition(), Dummy, Dummy,
+            CurrentSelect->getName(), CurrentSelect, CurrentSelect);
         Map[Current] = Select;
         ST.insertNewSelect(Select);
-        // We are interested in True and False value in this basic block.
-        Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
-        Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
+        // We are interested in True and False values.
+        Worklist.push_back(CurrentSelect->getTrueValue());
+        Worklist.push_back(CurrentSelect->getFalseValue());
       } else {
         // It must be a Phi node then.
-        auto *CurrentPhi = cast<PHINode>(CurrentI);
-        // Create new Phi node for merge of bases.
-        assert(PredCount && "Unreachable block?!");
-        PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
-                                       &CurrentBlock->front());
+        PHINode *CurrentPhi = cast<PHINode>(Current);
+        unsigned PredCount = CurrentPhi->getNumIncomingValues();
+        PHINode *PHI =
+            PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
         Map[Current] = PHI;
         ST.insertNewPhi(PHI);
-
-        // Add all predecessors in work list.
-        for (auto B : predecessors(CurrentBlock))
-          Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B });
+        for (Value *P : CurrentPhi->incoming_values())
+          Worklist.push_back(P);
       }
     }
   }
@@ -3843,8 +3980,13 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
       } else {
         uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
-          ConstantOffset += CI->getSExtValue() * TypeSize;
-        } else if (TypeSize) {  // Scales of zero don't do anything.
+          const APInt &CVal = CI->getValue();
+          if (CVal.getMinSignedBits() <= 64) {
+            ConstantOffset += CVal.getSExtValue() * TypeSize;
+            continue;
+          }
+        }
+        if (TypeSize) {  // Scales of zero don't do anything.
           // We only allow one variable index at the moment.
           if (VariableOperand != -1)
             return false;
@@ -4368,7 +4510,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   bool PhiOrSelectSeen = false;
   SmallVector<Instruction*, 16> AddrModeInsts;
   const SimplifyQuery SQ(*DL, TLInfo);
-  AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() });
+  AddressingModeCombiner AddrModes(SQ, Addr);
   TypePromotionTransaction TPT(RemovedInsts);
   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
       TPT.getRestorationPoint();
@@ -4985,8 +5127,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
           return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
         };
     // Sorting all the GEPs of the same data structures based on the offsets.
-    llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(),
-               compareGEPOffset);
+    llvm::sort(LargeOffsetGEPs, compareGEPOffset);
     LargeOffsetGEPs.erase(
         std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
         LargeOffsetGEPs.end());
@@ -5019,11 +5160,11 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
       }
 
       // Generate a new GEP to replace the current one.
-      IRBuilder<> Builder(GEP);
+      LLVMContext &Ctx = GEP->getContext();
       Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
       Type *I8PtrTy =
-          Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
-      Type *I8Ty = Builder.getInt8Ty();
+          Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
+      Type *I8Ty = Type::getInt8Ty(Ctx);
 
       if (!NewBaseGEP) {
         // Create a new base if we don't have one yet.  Find the insertion
@@ -5059,6 +5200,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
         NewGEPBases.insert(NewBaseGEP);
       }
 
+      IRBuilder<> Builder(GEP);
       Value *NewGEP = NewBaseGEP;
       if (Offset == BaseOffset) {
         if (GEP->getType() != I8PtrTy)
@@ -5587,6 +5729,10 @@ static Value *getTrueOrFalseValue(
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+  // If branch conversion isn't desirable, exit early.
+  if (DisableSelectToBranch || OptSize || !TLI)
+    return false;
+
   // Find all consecutive select instructions that share the same condition.
   SmallVector<SelectInst *, 2> ASI;
   ASI.push_back(SI);
@@ -5608,8 +5754,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
 
   // Can we convert the 'select' to CF ?
-  if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
-      SI->getMetadata(LLVMContext::MD_unpredictable))
+  if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
     return false;
 
   TargetLowering::SelectSupportKind SelectKind;
@@ -5672,6 +5817,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
         TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
                                        EndBlock->getParent(), EndBlock);
         TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+        TrueBranch->setDebugLoc(SI->getDebugLoc());
       }
       auto *TrueInst = cast<Instruction>(SI->getTrueValue());
       TrueInst->moveBefore(TrueBranch);
@@ -5681,6 +5827,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
         FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
                                         EndBlock->getParent(), EndBlock);
         FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+        FalseBranch->setDebugLoc(SI->getDebugLoc());
       }
       auto *FalseInst = cast<Instruction>(SI->getFalseValue());
       FalseInst->moveBefore(FalseBranch);
@@ -5695,7 +5842,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
 
     FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
                                     EndBlock->getParent(), EndBlock);
-    BranchInst::Create(EndBlock, FalseBlock);
+    auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+    FalseBranch->setDebugLoc(SI->getDebugLoc());
   }
 
   // Insert the real conditional branch based on the original condition.
@@ -5730,6 +5878,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
     PN->takeName(SI);
     PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
     PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+    PN->setDebugLoc(SI->getDebugLoc());
 
     SI->replaceAllUsesWith(PN);
     SI->eraseFromParent();
@@ -5841,6 +5990,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
 
   auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
   ExtInst->insertBefore(SI);
+  ExtInst->setDebugLoc(SI->getDebugLoc());
   SI->setCondition(ExtInst);
   for (auto Case : SI->cases()) {
     APInt NarrowConst = Case.getCaseValue()->getValue();
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index cd302e78cc3e..68034afe98d5 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -250,8 +250,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
 
   LLVM_DEBUG({
     dbgs() << "Scheduling DAG of the packetize region\n";
-    for (SUnit &SU : VLIWScheduler->SUnits)
-      SU.dumpAll(VLIWScheduler);
+    VLIWScheduler->dump();
   });
 
   // Generate MI -> SU map.
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 098afd885f2f..364e1f030942 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -398,6 +398,13 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
     return false;
   }
 
+  // Make sure the analyzed branch is conditional; one of the successors
+  // could be a landing pad. (Empty landing pads can be generated on Windows.)
+  if (Cond.empty()) {
+    LLVM_DEBUG(dbgs() << "AnalyzeBranch found an unconditional branch.\n");
+    return false;
+  }
+
   // AnalyzeBranch doesn't set FBB on a fall-through branch.
   // Make sure it is always set.
   FBB = TBB == Succ0 ? Succ1 : Succ0;
diff --git a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
index d7562cbf1e90..ee7683adbcdd 100644
--- a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -66,23 +66,18 @@ class MemCmpExpansion {
   // Represents the decomposition in blocks of the expansion. For example,
   // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
   // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
-  // TODO(courbet): Involve the target more in this computation. On X86, 7
-  // bytes can be done more efficiently with two overlaping 4-byte loads than
-  // covering the interval with [{4, 0},{2, 4},{1, 6}}.
   struct LoadEntry {
     LoadEntry(unsigned LoadSize, uint64_t Offset)
         : LoadSize(LoadSize), Offset(Offset) {
-      assert(Offset % LoadSize == 0 && "invalid load entry");
     }
 
-    uint64_t getGEPIndex() const { return Offset / LoadSize; }
-
     // The size of the load for this block, in bytes.
-    const unsigned LoadSize;
-    // The offset of this load WRT the base pointer, in bytes.
-    const uint64_t Offset;
+    unsigned LoadSize;
+    // The offset of this load from the base pointer, in bytes.
+    uint64_t Offset;
   };
-  SmallVector<LoadEntry, 8> LoadSequence;
+  using LoadEntryVector = SmallVector<LoadEntry, 8>;
+  LoadEntryVector LoadSequence;
 
   void createLoadCmpBlocks();
   void createResultBlock();
@@ -92,13 +87,23 @@ class MemCmpExpansion {
   void emitLoadCompareBlock(unsigned BlockIndex);
   void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
                                          unsigned &LoadIndex);
-  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
+  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
   void emitMemCmpResultBlock();
   Value *getMemCmpExpansionZeroCase();
   Value *getMemCmpEqZeroOneBlock();
   Value *getMemCmpOneBlock();
+  Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType,
+                                 uint64_t OffsetBytes);
+
+  static LoadEntryVector
+  computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+                            unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
+  static LoadEntryVector
+  computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
+                                 unsigned MaxNumLoads,
+                                 unsigned &NumLoadsNonOneByte);
 
- public:
+public:
   MemCmpExpansion(CallInst *CI, uint64_t Size,
                   const TargetTransformInfo::MemCmpExpansionOptions &Options,
                   unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
@@ -110,6 +115,76 @@ class MemCmpExpansion {
   Value *getMemCmpExpansion();
 };
 
+MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
+    uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+    const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
+  NumLoadsNonOneByte = 0;
+  LoadEntryVector LoadSequence;
+  uint64_t Offset = 0;
+  while (Size && !LoadSizes.empty()) {
+    const unsigned LoadSize = LoadSizes.front();
+    const uint64_t NumLoadsForThisSize = Size / LoadSize;
+    if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+      // Do not expand if the total number of loads is larger than what the
+      // target allows. Note that it's important that we exit before completing
+      // the expansion to avoid using a ton of memory to store the expansion for
+      // large sizes.
+      return {};
+    }
+    if (NumLoadsForThisSize > 0) {
+      for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+        LoadSequence.push_back({LoadSize, Offset});
+        Offset += LoadSize;
+      }
+      if (LoadSize > 1)
+        ++NumLoadsNonOneByte;
+      Size = Size % LoadSize;
+    }
+    LoadSizes = LoadSizes.drop_front();
+  }
+  return LoadSequence;
+}
+
+MemCmpExpansion::LoadEntryVector
+MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
+                                                const unsigned MaxLoadSize,
+                                                const unsigned MaxNumLoads,
+                                                unsigned &NumLoadsNonOneByte) {
+  // These are already handled by the greedy approach.
+  if (Size < 2 || MaxLoadSize < 2)
+    return {};
+
+  // We try to do as many non-overlapping loads as possible starting from the
+  // beginning.
+  const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
+  assert(NumNonOverlappingLoads && "there must be at least one load");
+  // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
+  // an overlapping load.
+  Size = Size - NumNonOverlappingLoads * MaxLoadSize;
+  // Bail if we do not need an overloapping store, this is already handled by
+  // the greedy approach.
+  if (Size == 0)
+    return {};
+  // Bail if the number of loads (non-overlapping + potential overlapping one)
+  // is larger than the max allowed.
+  if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
+    return {};
+
+  // Add non-overlapping loads.
+  LoadEntryVector LoadSequence;
+  uint64_t Offset = 0;
+  for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
+    LoadSequence.push_back({MaxLoadSize, Offset});
+    Offset += MaxLoadSize;
+  }
+
+  // Add the last overlapping load.
+  assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
+  LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
+  NumLoadsNonOneByte = 1;
+  return LoadSequence;
+}
+
 // Initialize the basic block structure required for expansion of memcmp call
 // with given maximum load size and memcmp size parameter.
 // This structure includes:
@@ -133,38 +208,31 @@ MemCmpExpansion::MemCmpExpansion(
       Builder(CI) {
   assert(Size > 0 && "zero blocks");
   // Scale the max size down if the target can load more bytes than we need.
-  size_t LoadSizeIndex = 0;
-  while (LoadSizeIndex < Options.LoadSizes.size() &&
-         Options.LoadSizes[LoadSizeIndex] > Size) {
-    ++LoadSizeIndex;
+  llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
+  while (!LoadSizes.empty() && LoadSizes.front() > Size) {
+    LoadSizes = LoadSizes.drop_front();
   }
-  this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
+  assert(!LoadSizes.empty() && "cannot load Size bytes");
+  MaxLoadSize = LoadSizes.front();
   // Compute the decomposition.
-  uint64_t CurSize = Size;
-  uint64_t Offset = 0;
-  while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
-    const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
-    assert(LoadSize > 0 && "zero load size");
-    const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
-    if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
-      // Do not expand if the total number of loads is larger than what the
-      // target allows. Note that it's important that we exit before completing
-      // the expansion to avoid using a ton of memory to store the expansion for
-      // large sizes.
-      LoadSequence.clear();
-      return;
-    }
-    if (NumLoadsForThisSize > 0) {
-      for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
-        LoadSequence.push_back({LoadSize, Offset});
-        Offset += LoadSize;
-      }
-      if (LoadSize > 1) {
-        ++NumLoadsNonOneByte;
-      }
-      CurSize = CurSize % LoadSize;
+  unsigned GreedyNumLoadsNonOneByte = 0;
+  LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+                                           GreedyNumLoadsNonOneByte);
+  NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
+  assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+  // If we allow overlapping loads and the load sequence is not already optimal,
+  // use overlapping loads.
+  if (Options.AllowOverlappingLoads &&
+      (LoadSequence.empty() || LoadSequence.size() > 2)) {
+    unsigned OverlappingNumLoadsNonOneByte = 0;
+    auto OverlappingLoads = computeOverlappingLoadSequence(
+        Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+    if (!OverlappingLoads.empty() &&
+        (LoadSequence.empty() ||
+         OverlappingLoads.size() < LoadSequence.size())) {
+      LoadSequence = OverlappingLoads;
+      NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
     }
-    ++LoadSizeIndex;
   }
   assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
 }
@@ -189,30 +257,32 @@ void MemCmpExpansion::createResultBlock() {
                                    EndBlock->getParent(), EndBlock);
 }
 
+/// Return a pointer to an element of type `LoadSizeType` at offset
+/// `OffsetBytes`.
+Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
+                                                Type *LoadSizeType,
+                                                uint64_t OffsetBytes) {
+  if (OffsetBytes > 0) {
+    auto *ByteType = Type::getInt8Ty(CI->getContext());
+    Source = Builder.CreateGEP(
+        ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
+        ConstantInt::get(ByteType, OffsetBytes));
+  }
+  return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
+}
+
 // This function creates the IR instructions for loading and comparing 1 byte.
 // It loads 1 byte from each source of the memcmp parameters with the given
 // GEPIndex. It then subtracts the two loaded values and adds this result to the
 // final phi node for selecting the memcmp result.
 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
-                                               unsigned GEPIndex) {
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
+                                               unsigned OffsetBytes) {
   Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
   Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-  // Get the base address using the GEPIndex.
-  if (GEPIndex != 0) {
-    Source1 = Builder.CreateGEP(LoadSizeType, Source1,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-    Source2 = Builder.CreateGEP(LoadSizeType, Source2,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-  }
+  Value *Source1 =
+      getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);
+  Value *Source2 =
+      getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);
 
   Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
   Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
@@ -270,24 +340,10 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
     IntegerType *LoadSizeType =
         IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
 
-    Value *Source1 = CI->getArgOperand(0);
-    Value *Source2 = CI->getArgOperand(1);
-
-    // Cast source to LoadSizeType*.
-    if (Source1->getType() != LoadSizeType)
-      Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-    if (Source2->getType() != LoadSizeType)
-      Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-    // Get the base address using a GEP.
-    if (CurLoadEntry.Offset != 0) {
-      Source1 = Builder.CreateGEP(
-          LoadSizeType, Source1,
-          ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-      Source2 = Builder.CreateGEP(
-          LoadSizeType, Source2,
-          ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-    }
+    Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+                                             CurLoadEntry.Offset);
+    Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+                                             CurLoadEntry.Offset);
 
     // Get a constant or load a value for each source address.
     Value *LoadSrc1 = nullptr;
@@ -378,8 +434,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
   const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
 
   if (CurLoadEntry.LoadSize == 1) {
-    MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
-                                              CurLoadEntry.getGEPIndex());
+    MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
     return;
   }
 
@@ -388,25 +443,12 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
   Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
   assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
 
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
   Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
 
-  // Get the base address using a GEP.
-  if (CurLoadEntry.Offset != 0) {
-    Source1 = Builder.CreateGEP(
-        LoadSizeType, Source1,
-        ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-    Source2 = Builder.CreateGEP(
-        LoadSizeType, Source2,
-        ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-  }
+  Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+                                           CurLoadEntry.Offset);
+  Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+                                           CurLoadEntry.Offset);
 
   // Load LoadSizeType from the base address.
   Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
@@ -694,7 +736,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   if (SizeVal == 0) {
     return false;
   }
-
   // TTI call to check if target would like to expand memcmp. Also, get the
   // available load sizes.
   const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index bc747fc610f8..f2a2bcbb94b1 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -97,6 +97,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
 
   if (MI->allDefsAreDead()) {
     MI->setDesc(TII->get(TargetOpcode::KILL));
+    MI->RemoveOperand(3); // SubIdx
+    MI->RemoveOperand(1); // Imm
     LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
     return true;
   }
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index fe3d29657942..1c80556dfef5 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -103,16 +103,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<GCModuleInfo>();
 }
 
-static const char *DescKind(GC::PointKind Kind) {
-  switch (Kind) {
-  case GC::PreCall:
-    return "pre-call";
-  case GC::PostCall:
-    return "post-call";
-  }
-  llvm_unreachable("Invalid point kind");
-}
-
 bool Printer::runOnFunction(Function &F) {
   if (F.hasGC())
     return false;
@@ -129,7 +119,7 @@ bool Printer::runOnFunction(Function &F) {
   for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
        ++PI) {
 
-    OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+    OS << "\t" << PI->Label->getName() << ": " << "post-call"
        << ", live = {";
 
     for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 31ddeadbd97a..e8ccd84b0b93 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -38,7 +38,7 @@ namespace {
 /// directed by the GCStrategy. It also performs automatic root initialization
 /// and custom intrinsic lowering.
 class LowerIntrinsics : public FunctionPass {
-  bool PerformDefaultLowering(Function &F, GCStrategy &S);
+  bool DoLowering(Function &F, GCStrategy &S);
 
 public:
   static char ID;
@@ -102,13 +102,6 @@ void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<DominatorTreeWrapperPass>();
 }
 
-static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
-  // Default lowering is necessary only if read or write barriers have a default
-  // action. The default for roots is no action.
-  return !C.customWriteBarrier() || !C.customReadBarrier() ||
-         C.initializeRoots();
-}
-
 /// doInitialization - If this module uses the GC intrinsics, find them now.
 bool LowerIntrinsics::doInitialization(Module &M) {
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -148,8 +141,7 @@ static bool CouldBecomeSafePoint(Instruction *I) {
   return true;
 }
 
-static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
-                                   unsigned Count) {
+static bool InsertRootInitializers(Function &F, ArrayRef<AllocaInst *> Roots) {
   // Scroll past alloca instructions.
   BasicBlock::iterator IP = F.getEntryBlock().begin();
   while (isa<AllocaInst>(IP))
@@ -166,12 +158,12 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
   // Add root initializers.
   bool MadeChange = false;
 
-  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
-    if (!InitedRoots.count(*I)) {
+  for (AllocaInst *Root : Roots)
+    if (!InitedRoots.count(Root)) {
       StoreInst *SI = new StoreInst(
-          ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())),
-          *I);
-      SI->insertAfter(*I);
+          ConstantPointerNull::get(cast<PointerType>(Root->getAllocatedType())),
+          Root);
+      SI->insertAfter(Root);
       MadeChange = true;
     }
 
@@ -188,64 +180,59 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
   GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
   GCStrategy &S = FI.getStrategy();
 
-  bool MadeChange = false;
-
-  if (NeedsDefaultLoweringPass(S))
-    MadeChange |= PerformDefaultLowering(F, S);
-
-  return MadeChange;
+  return DoLowering(F, S);
 }
 
-bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
-  bool LowerWr = !S.customWriteBarrier();
-  bool LowerRd = !S.customReadBarrier();
-  bool InitRoots = S.initializeRoots();
-
+/// Lower barriers out of existance (if the associated GCStrategy hasn't
+/// already done so...), and insert initializing stores to roots as a defensive
+/// measure.  Given we're going to report all roots live at all safepoints, we
+/// need to be able to ensure each root has been initialized by the point the
+/// first safepoint is reached.  This really should have been done by the
+/// frontend, but the old API made this non-obvious, so we do a potentially
+/// redundant store just in case.  
+bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
   SmallVector<AllocaInst *, 32> Roots;
 
   bool MadeChange = false;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
-      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
-        Function *F = CI->getCalledFunction();
-        switch (F->getIntrinsicID()) {
-        case Intrinsic::gcwrite:
-          if (LowerWr) {
-            // Replace a write barrier with a simple store.
-            Value *St =
-                new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
-            CI->replaceAllUsesWith(St);
-            CI->eraseFromParent();
-          }
-          break;
-        case Intrinsic::gcread:
-          if (LowerRd) {
-            // Replace a read barrier with a simple load.
-            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
-            Ld->takeName(CI);
-            CI->replaceAllUsesWith(Ld);
-            CI->eraseFromParent();
-          }
-          break;
-        case Intrinsic::gcroot:
-          if (InitRoots) {
-            // Initialize the GC root, but do not delete the intrinsic. The
-            // backend needs the intrinsic to flag the stack slot.
-            Roots.push_back(
-                cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
-          }
-          break;
-        default:
-          continue;
-        }
-
+  for (BasicBlock &BB : F) 
+    for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
+      IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
+      if (!CI)
+        continue;
+
+      Function *F = CI->getCalledFunction();
+      switch (F->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::gcwrite: {
+        // Replace a write barrier with a simple store.
+        Value *St = new StoreInst(CI->getArgOperand(0),
+                                  CI->getArgOperand(2), CI);
+        CI->replaceAllUsesWith(St);
+        CI->eraseFromParent();
         MadeChange = true;
+        break;
+      }
+      case Intrinsic::gcread: {
+        // Replace a read barrier with a simple load.
+        Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+        Ld->takeName(CI);
+        CI->replaceAllUsesWith(Ld);
+        CI->eraseFromParent();
+        MadeChange = true;
+        break;
+      }
+      case Intrinsic::gcroot: {
+        // Initialize the GC root, but do not delete the intrinsic. The
+        // backend needs the intrinsic to flag the stack slot.
+        Roots.push_back(
+            cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+        break;
+      }
       }
     }
-  }
 
   if (Roots.size())
-    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+    MadeChange |= InsertRootInitializers(F, Roots);
 
   return MadeChange;
 }
@@ -276,26 +263,18 @@ MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
 }
 
 void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
-  // Find the return address (next instruction), too, so as to bracket the call
-  // instruction.
+  // Find the return address (next instruction), since that's what will be on
+  // the stack when the call is suspended and we need to inspect the stack.
   MachineBasicBlock::iterator RAI = CI;
   ++RAI;
 
-  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
-    MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
-    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
-  }
-
-  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
-    MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
-    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
-  }
+  MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+  FI->addSafePoint(Label, CI->getDebugLoc());
 }
 
 void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
-  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
-       ++BBI)
-    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+  for (MachineBasicBlock &MBB : MF)
+    for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end();
          MI != ME; ++MI)
       if (MI->isCall()) {
         // Do not treat tail or sibling call sites as safe points.  This is
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
new file mode 100644
index 000000000000..89c525c5ba15
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -0,0 +1,370 @@
+//===- CSEInfo.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "cseinfo"
+
+using namespace llvm;
+char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+                      "Analysis containing CSE Info", false, true)
+INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
+                    "Analysis containing CSE Info", false, true)
+
+/// -------- UniqueMachineInstr -------------//
+
+void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
+  GISelInstProfileBuilder(ID, MI->getMF()->getRegInfo()).addNodeID(MI);
+}
+/// -----------------------------------------
+
+/// --------- CSEConfig ---------- ///
+bool CSEConfig::shouldCSEOpc(unsigned Opc) {
+  switch (Opc) {
+  default:
+    break;
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_SUB:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_UDIV:
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UREM:
+  case TargetOpcode::G_SREM:
+  case TargetOpcode::G_CONSTANT:
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_ZEXT:
+  case TargetOpcode::G_SEXT:
+  case TargetOpcode::G_ANYEXT:
+  case TargetOpcode::G_UNMERGE_VALUES:
+  case TargetOpcode::G_TRUNC:
+    return true;
+  }
+  return false;
+}
+
+bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
+  return Opc == TargetOpcode::G_CONSTANT;
+}
+/// -----------------------------------------
+
+/// -------- GISelCSEInfo -------------//
+void GISelCSEInfo::setMF(MachineFunction &MF) {
+  this->MF = &MF;
+  this->MRI = &MF.getRegInfo();
+}
+
+GISelCSEInfo::~GISelCSEInfo() {}
+
+bool GISelCSEInfo::isUniqueMachineInstValid(
+    const UniqueMachineInstr &UMI) const {
+  // Should we check here and assert that the instruction has been fully
+  // constructed?
+  // FIXME: Any other checks required to be done here? Remove this method if
+  // none.
+  return true;
+}
+
+void GISelCSEInfo::invalidateUniqueMachineInstr(UniqueMachineInstr *UMI) {
+  bool Removed = CSEMap.RemoveNode(UMI);
+  (void)Removed;
+  assert(Removed && "Invalidation called on invalid UMI");
+  // FIXME: Should UMI be deallocated/destroyed?
+}
+
+UniqueMachineInstr *GISelCSEInfo::getNodeIfExists(FoldingSetNodeID &ID,
+                                                  MachineBasicBlock *MBB,
+                                                  void *&InsertPos) {
+  auto *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (Node) {
+    if (!isUniqueMachineInstValid(*Node)) {
+      invalidateUniqueMachineInstr(Node);
+      return nullptr;
+    }
+
+    if (Node->MI->getParent() != MBB)
+      return nullptr;
+  }
+  return Node;
+}
+
+void GISelCSEInfo::insertNode(UniqueMachineInstr *UMI, void *InsertPos) {
+  handleRecordedInsts();
+  assert(UMI);
+  UniqueMachineInstr *MaybeNewNode = UMI;
+  if (InsertPos)
+    CSEMap.InsertNode(UMI, InsertPos);
+  else
+    MaybeNewNode = CSEMap.GetOrInsertNode(UMI);
+  if (MaybeNewNode != UMI) {
+    // A similar node exists in the folding set. Let's ignore this one.
+    return;
+  }
+  assert(InstrMapping.count(UMI->MI) == 0 &&
+         "This instruction should not be in the map");
+  InstrMapping[UMI->MI] = MaybeNewNode;
+}
+
+UniqueMachineInstr *GISelCSEInfo::getUniqueInstrForMI(const MachineInstr *MI) {
+  assert(shouldCSE(MI->getOpcode()) && "Trying to CSE an unsupported Node");
+  auto *Node = new (UniqueInstrAllocator) UniqueMachineInstr(MI);
+  return Node;
+}
+
+void GISelCSEInfo::insertInstr(MachineInstr *MI, void *InsertPos) {
+  assert(MI);
+  // If it exists in temporary insts, remove it.
+  TemporaryInsts.remove(MI);
+  auto *Node = getUniqueInstrForMI(MI);
+  insertNode(Node, InsertPos);
+}
+
+MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
+                                                    MachineBasicBlock *MBB,
+                                                    void *&InsertPos) {
+  handleRecordedInsts();
+  if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
+    LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";);
+    return const_cast<MachineInstr *>(Inst->MI);
+  }
+  return nullptr;
+}
+
+void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
+#ifndef NDEBUG
+  if (OpcodeHitTable.count(Opc))
+    OpcodeHitTable[Opc] += 1;
+  else
+    OpcodeHitTable[Opc] = 1;
+#endif
+  // Else do nothing.
+}
+
+void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
+  if (shouldCSE(MI->getOpcode())) {
+    TemporaryInsts.insert(MI);
+    LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";);
+  }
+}
+
+void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
+  assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
+  auto *UMI = InstrMapping.lookup(MI);
+  LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";);
+  if (UMI) {
+    // Invalidate this MI.
+    invalidateUniqueMachineInstr(UMI);
+    InstrMapping.erase(MI);
+  }
+  /// Now insert the new instruction.
+  if (UMI) {
+    /// We'll reuse the same UniqueMachineInstr to avoid the new
+    /// allocation.
+    *UMI = UniqueMachineInstr(MI);
+    insertNode(UMI, nullptr);
+  } else {
+    /// This is a new instruction. Allocate a new UniqueMachineInstr and
+    /// Insert.
+    insertInstr(MI);
+  }
+}
+
+void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
+  if (auto *UMI = InstrMapping.lookup(MI)) {
+    invalidateUniqueMachineInstr(UMI);
+    InstrMapping.erase(MI);
+  }
+  TemporaryInsts.remove(MI);
+}
+
+void GISelCSEInfo::handleRecordedInsts() {
+  while (!TemporaryInsts.empty()) {
+    auto *MI = TemporaryInsts.pop_back_val();
+    handleRecordedInst(MI);
+  }
+}
+
+bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
+  // Only GISel opcodes are CSEable
+  if (!isPreISelGenericOpcode(Opc))
+    return false;
+  assert(CSEOpt.get() && "CSEConfig not set");
+  return CSEOpt->shouldCSEOpc(Opc);
+}
+
+void GISelCSEInfo::erasingInstr(MachineInstr &MI) { handleRemoveInst(&MI); }
+void GISelCSEInfo::createdInstr(MachineInstr &MI) { recordNewInstruction(&MI); }
+void GISelCSEInfo::changingInstr(MachineInstr &MI) {
+  // For now, perform erase, followed by insert.
+  erasingInstr(MI);
+  createdInstr(MI);
+}
+void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); }
+
+void GISelCSEInfo::analyze(MachineFunction &MF) {
+  setMF(MF);
+  for (auto &MBB : MF) {
+    if (MBB.empty())
+      continue;
+    for (MachineInstr &MI : MBB) {
+      if (!shouldCSE(MI.getOpcode()))
+        continue;
+      LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";);
+      insertInstr(&MI);
+    }
+  }
+}
+
+void GISelCSEInfo::releaseMemory() {
+  // print();
+  CSEMap.clear();
+  InstrMapping.clear();
+  UniqueInstrAllocator.Reset();
+  TemporaryInsts.clear();
+  CSEOpt.reset();
+  MRI = nullptr;
+  MF = nullptr;
+#ifndef NDEBUG
+  OpcodeHitTable.clear();
+#endif
+}
+
+void GISelCSEInfo::print() {
+#ifndef NDEBUG
+  for (auto &It : OpcodeHitTable) {
+    dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n";
+  };
+#endif
+}
+/// -----------------------------------------
+// ---- Profiling methods for FoldingSetNode --- //
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const {
+  addNodeIDMBB(MI->getParent());
+  addNodeIDOpcode(MI->getOpcode());
+  for (auto &Op : MI->operands())
+    addNodeIDMachineOperand(Op);
+  addNodeIDFlag(MI->getFlags());
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDOpcode(unsigned Opc) const {
+  ID.AddInteger(Opc);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const LLT &Ty) const {
+  uint64_t Val = Ty.getUniqueRAWLLTData();
+  ID.AddInteger(Val);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const TargetRegisterClass *RC) const {
+  ID.AddPointer(RC);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const RegisterBank *RB) const {
+  ID.AddPointer(RB);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const {
+  ID.AddInteger(Imm);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegNum(unsigned Reg) const {
+  ID.AddInteger(Reg);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDRegType(const unsigned Reg) const {
+  addNodeIDMachineOperand(MachineOperand::CreateReg(Reg, false));
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDMBB(const MachineBasicBlock *MBB) const {
+  ID.AddPointer(MBB);
+  return *this;
+}
+
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
+  if (Flag)
+    ID.AddInteger(Flag);
+  return *this;
+}
+
+const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
+    const MachineOperand &MO) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    if (!MO.isDef())
+      addNodeIDRegNum(Reg);
+    LLT Ty = MRI.getType(Reg);
+    if (Ty.isValid())
+      addNodeIDRegType(Ty);
+    auto *RB = MRI.getRegBankOrNull(Reg);
+    if (RB)
+      addNodeIDRegType(RB);
+    auto *RC = MRI.getRegClassOrNull(Reg);
+    if (RC)
+      addNodeIDRegType(RC);
+    assert(!MO.isImplicit() && "Unhandled case");
+  } else if (MO.isImm())
+    ID.AddInteger(MO.getImm());
+  else if (MO.isCImm())
+    ID.AddPointer(MO.getCImm());
+  else if (MO.isFPImm())
+    ID.AddPointer(MO.getFPImm());
+  else if (MO.isPredicate())
+    ID.AddInteger(MO.getPredicate());
+  else
+    llvm_unreachable("Unhandled operand type");
+  // Handle other types
+  return *this;
+}
+
+GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt,
+                                           bool Recompute) {
+  if (!AlreadyComputed || Recompute) {
+    Info.setCSEConfig(std::move(CSEOpt));
+    Info.analyze(*MF);
+    AlreadyComputed = true;
+  }
+  return Info;
+}
+void GISelCSEAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelCSEAnalysisWrapperPass::runOnMachineFunction(MachineFunction &MF) {
+  releaseMemory();
+  Wrapper.setMF(MF);
+  return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
new file mode 100644
index 000000000000..863efe0c3e34
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -0,0 +1,231 @@
+//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the CSEMIRBuilder class which CSEs as it builds
+/// instructions.
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+
+using namespace llvm;
+
+bool CSEMIRBuilder::dominates(MachineBasicBlock::const_iterator A,
+                              MachineBasicBlock::const_iterator B) const {
+  auto MBBEnd = getMBB().end();
+  if (B == MBBEnd)
+    return true;
+  assert(A->getParent() == B->getParent() &&
+         "Iterators should be in same block");
+  const MachineBasicBlock *BBA = A->getParent();
+  MachineBasicBlock::const_iterator I = BBA->begin();
+  for (; &*I != A && &*I != B; ++I)
+    ;
+  return &*I == A;
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
+                                       void *&NodeInsertPos) {
+  GISelCSEInfo *CSEInfo = getCSEInfo();
+  assert(CSEInfo && "Can't get here without setting CSEInfo");
+  MachineBasicBlock *CurMBB = &getMBB();
+  MachineInstr *MI =
+      CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
+  if (MI) {
+    auto CurrPos = getInsertPt();
+    if (!dominates(MI, CurrPos))
+      CurMBB->splice(CurrPos, CurMBB, MI);
+    return MachineInstrBuilder(getMF(), MI);
+  }
+  return MachineInstrBuilder();
+}
+
+bool CSEMIRBuilder::canPerformCSEForOpc(unsigned Opc) const {
+  const GISelCSEInfo *CSEInfo = getCSEInfo();
+  if (!CSEInfo || !CSEInfo->shouldCSE(Opc))
+    return false;
+  return true;
+}
+
+void CSEMIRBuilder::profileDstOp(const DstOp &Op,
+                                 GISelInstProfileBuilder &B) const {
+  switch (Op.getDstOpKind()) {
+  case DstOp::DstType::Ty_RC:
+    B.addNodeIDRegType(Op.getRegClass());
+    break;
+  default:
+    B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
+    break;
+  }
+}
+
+void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
+                                 GISelInstProfileBuilder &B) const {
+  switch (Op.getSrcOpKind()) {
+  case SrcOp::SrcType::Ty_Predicate:
+    B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
+    break;
+  default:
+    B.addNodeIDRegType(Op.getReg());
+    break;
+  }
+}
+
+void CSEMIRBuilder::profileMBBOpcode(GISelInstProfileBuilder &B,
+                                     unsigned Opc) const {
+  // First add the MBB (Local CSE).
+  B.addNodeIDMBB(&getMBB());
+  // Then add the opcode.
+  B.addNodeIDOpcode(Opc);
+}
+
+void CSEMIRBuilder::profileEverything(unsigned Opc, ArrayRef<DstOp> DstOps,
+                                      ArrayRef<SrcOp> SrcOps,
+                                      Optional<unsigned> Flags,
+                                      GISelInstProfileBuilder &B) const {
+
+  profileMBBOpcode(B, Opc);
+  // Then add the DstOps.
+  profileDstOps(DstOps, B);
+  // Then add the SrcOps.
+  profileSrcOps(SrcOps, B);
+  // Add Flags if passed in.
+  if (Flags)
+    B.addNodeIDFlag(*Flags);
+}
+
+MachineInstrBuilder CSEMIRBuilder::memoizeMI(MachineInstrBuilder MIB,
+                                             void *NodeInsertPos) {
+  assert(canPerformCSEForOpc(MIB->getOpcode()) &&
+         "Attempting to CSE illegal op");
+  MachineInstr *MIBInstr = MIB;
+  getCSEInfo()->insertInstr(MIBInstr, NodeInsertPos);
+  return MIB;
+}
+
+bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
+  if (DstOps.size() == 1)
+    return true; // always possible to emit copy to just 1 vreg.
+
+  return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) {
+    DstOp::DstType DT = Op.getDstOpKind();
+    return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
+  });
+}
+
+MachineInstrBuilder
+CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
+                                        MachineInstrBuilder &MIB) {
+  assert(checkCopyToDefsPossible(DstOps) &&
+         "Impossible return a single MIB with copies to multiple defs");
+  if (DstOps.size() == 1) {
+    const DstOp &Op = DstOps[0];
+    if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
+      return buildCopy(Op.getReg(), MIB->getOperand(0).getReg());
+  }
+  return MIB;
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
+                                              ArrayRef<DstOp> DstOps,
+                                              ArrayRef<SrcOp> SrcOps,
+                                              Optional<unsigned> Flag) {
+  switch (Opc) {
+  default:
+    break;
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_SUB:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_UDIV:
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UREM:
+  case TargetOpcode::G_SREM: {
+    // Try to constant fold these.
+    assert(SrcOps.size() == 2 && "Invalid sources");
+    assert(DstOps.size() == 1 && "Invalid dsts");
+    if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
+                                                SrcOps[1].getReg(), *getMRI()))
+      return buildConstant(DstOps[0], Cst->getSExtValue());
+    break;
+  }
+  }
+  bool CanCopy = checkCopyToDefsPossible(DstOps);
+  if (!canPerformCSEForOpc(Opc))
+    return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+  // If we can CSE this instruction, but involves generating copies to multiple
+  // regs, give up. This frequently happens to UNMERGEs.
+  if (!CanCopy) {
+    auto MIB = MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+    // CSEInfo would have tracked this instruction. Remove it from the temporary
+    // insts.
+    getCSEInfo()->handleRemoveInst(&*MIB);
+    return MIB;
+  }
+  FoldingSetNodeID ID;
+  GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+  void *InsertPos = nullptr;
+  profileEverything(Opc, DstOps, SrcOps, Flag, ProfBuilder);
+  MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+  if (MIB) {
+    // Handle generating copies here.
+    return generateCopiesIfRequired(DstOps, MIB);
+  }
+  // This instruction does not exist in the CSEInfo. Build it and CSE it.
+  MachineInstrBuilder NewMIB =
+      MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps, Flag);
+  return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
+                                                 const ConstantInt &Val) {
+  constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
+  if (!canPerformCSEForOpc(Opc))
+    return MachineIRBuilder::buildConstant(Res, Val);
+  FoldingSetNodeID ID;
+  GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+  void *InsertPos = nullptr;
+  profileMBBOpcode(ProfBuilder, Opc);
+  profileDstOp(Res, ProfBuilder);
+  ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateCImm(&Val));
+  MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+  if (MIB) {
+    // Handle generating copies here.
+    return generateCopiesIfRequired({Res}, MIB);
+  }
+  MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
+  return memoizeMI(NewMIB, InsertPos);
+}
+
+MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
+                                                  const ConstantFP &Val) {
+  constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
+  if (!canPerformCSEForOpc(Opc))
+    return MachineIRBuilder::buildFConstant(Res, Val);
+  FoldingSetNodeID ID;
+  GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
+  void *InsertPos = nullptr;
+  profileMBBOpcode(ProfBuilder, Opc);
+  profileDstOp(Res, ProfBuilder);
+  ProfBuilder.addNodeIDMachineOperand(MachineOperand::CreateFPImm(&Val));
+  MachineInstrBuilder MIB = getDominatingInstrForID(ID, InsertPos);
+  if (MIB) {
+    // Handle generating copies here.
+    return generateCopiesIfRequired({Res}, MIB);
+  }
+  MachineInstrBuilder NewMIB = MachineIRBuilder::buildFConstant(Res, Val);
+  return memoizeMI(NewMIB, InsertPos);
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 07de31bec660..724ecedf3b3f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -23,6 +23,8 @@
 
 using namespace llvm;
 
+void CallLowering::anchor() {}
+
 bool CallLowering::lowerCall(
     MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
     ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
@@ -164,7 +166,6 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
     // nop in big-endian situations.
     return ValReg;
   case CCValAssign::AExt: {
-    assert(!VA.getLocVT().isVector() && "unexpected vector extend");
     auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
     return MIB->getOperand(0).getReg();
   }
@@ -181,3 +182,5 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
   }
   llvm_unreachable("unable to extend register");
 }
+
+void CallLowering::ValueHandler::anchor() {}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 0bc5b87de150..45b0e36fd7d9 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===//
+//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
-#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 
@@ -25,20 +28,76 @@
 
 using namespace llvm;
 
+namespace {
+/// This class acts as the glue the joins the CombinerHelper to the overall
+/// Combine algorithm. The CombinerHelper is intended to report the
+/// modifications it makes to the MIR to the GISelChangeObserver and the
+/// observer subclass will act on these events. In this case, instruction
+/// erasure will cancel any future visits to the erased instruction and
+/// instruction creation will schedule that instruction for a future visit.
+/// Other Combiner implementations may require more complex behaviour from
+/// their GISelChangeObserver subclass.
+class WorkListMaintainer : public GISelChangeObserver {
+  using WorkListTy = GISelWorkList<512>;
+  WorkListTy &WorkList;
+  /// The instructions that have been created but we want to report once they
+  /// have their operands. This is only maintained if debug output is requested.
+  SmallPtrSet<const MachineInstr *, 4> CreatedInstrs;
+
+public:
+  WorkListMaintainer(WorkListTy &WorkList)
+      : GISelChangeObserver(), WorkList(WorkList) {}
+  virtual ~WorkListMaintainer() {
+  }
+
+  void erasingInstr(MachineInstr &MI) override {
+    LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n");
+    WorkList.remove(&MI);
+  }
+  void createdInstr(MachineInstr &MI) override {
+    LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
+    WorkList.insert(&MI);
+    LLVM_DEBUG(CreatedInstrs.insert(&MI));
+  }
+  void changingInstr(MachineInstr &MI) override {
+    LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
+    WorkList.insert(&MI);
+  }
+  void changedInstr(MachineInstr &MI) override {
+    LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
+    WorkList.insert(&MI);
+  }
+
+  void reportFullyCreatedInstrs() {
+    LLVM_DEBUG(for (const auto *MI
+                    : CreatedInstrs) {
+      dbgs() << "Created: ";
+      MI->print(dbgs());
+    });
+    LLVM_DEBUG(CreatedInstrs.clear());
+  }
+};
+}
+
 Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
     : CInfo(Info), TPC(TPC) {
   (void)this->TPC; // FIXME: Remove when used.
 }
 
-bool Combiner::combineMachineInstrs(MachineFunction &MF) {
+bool Combiner::combineMachineInstrs(MachineFunction &MF,
+                                    GISelCSEInfo *CSEInfo) {
   // If the ISel pipeline failed, do not bother running this pass.
   // FIXME: Should this be here or in individual combiner passes.
   if (MF.getProperties().hasProperty(
           MachineFunctionProperties::Property::FailedISel))
     return false;
 
+  Builder =
+      CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>();
   MRI = &MF.getRegInfo();
-  Builder.setMF(MF);
+  Builder->setMF(MF);
+  if (CSEInfo)
+    Builder->setCSEInfo(CSEInfo);
 
   LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
 
@@ -46,6 +105,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
 
   bool MFChanged = false;
   bool Changed;
+  MachineIRBuilder &B = *Builder.get();
 
   do {
     // Collect all instructions. Do a post order traversal for basic blocks and
@@ -53,6 +113,11 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
     // down RPOT.
     Changed = false;
     GISelWorkList<512> WorkList;
+    WorkListMaintainer Observer(WorkList);
+    GISelObserverWrapper WrapperObserver(&Observer);
+    if (CSEInfo)
+      WrapperObserver.addObserver(CSEInfo);
+    RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
     for (MachineBasicBlock *MBB : post_order(&MF)) {
       if (MBB->empty())
         continue;
@@ -71,8 +136,9 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF) {
     // Main Loop. Process the instructions here.
     while (!WorkList.empty()) {
       MachineInstr *CurrInst = WorkList.pop_back_val();
-      LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";);
-      Changed |= CInfo.combine(*CurrInst, Builder);
+      LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
+      Changed |= CInfo.combine(WrapperObserver, *CurrInst, B);
+      Observer.reportFullyCreatedInstrs();
     }
     MFChanged |= Changed;
   } while (Changed);
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 44e904a6391b..b1c5670a6dec 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1,4 +1,4 @@
-//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == //
+//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,17 +7,44 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 
-#define DEBUG_TYPE "gi-combine"
+#define DEBUG_TYPE "gi-combiner"
 
 using namespace llvm;
 
-CombinerHelper::CombinerHelper(MachineIRBuilder &B) :
-  Builder(B), MRI(Builder.getMF().getRegInfo()) {}
+CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
+                               MachineIRBuilder &B)
+    : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
+
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
+                                    unsigned ToReg) const {
+  Observer.changingAllUsesOfReg(MRI, FromReg);
+
+  if (MRI.constrainRegAttrs(ToReg, FromReg))
+    MRI.replaceRegWith(FromReg, ToReg);
+  else
+    Builder.buildCopy(ToReg, FromReg);
+
+  Observer.finishedChangingAllUsesOfReg();
+}
+
+void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
+                                      MachineOperand &FromRegOp,
+                                      unsigned ToReg) const {
+  assert(FromRegOp.getParent() && "Expected an operand in an MI");
+  Observer.changingInstr(*FromRegOp.getParent());
+
+  FromRegOp.setReg(ToReg);
+
+  Observer.changedInstr(*FromRegOp.getParent());
+}
 
 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
   if (MI.getOpcode() != TargetOpcode::COPY)
@@ -30,12 +57,279 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
   // a(sx) = COPY b(sx) -> Replace all uses of a with b.
   if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
     MI.eraseFromParent();
-    MRI.replaceRegWith(DstReg, SrcReg);
+    replaceRegWith(MRI, DstReg, SrcReg);
     return true;
   }
   return false;
 }
 
+namespace {
+struct PreferredTuple {
+  LLT Ty;                // The result type of the extend.
+  unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
+  MachineInstr *MI;
+};
+
+/// Select a preference between two uses. CurrentUse is the current preference
+/// while *ForCandidate is attributes of the candidate under consideration.
+PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+                                  const LLT &TyForCandidate,
+                                  unsigned OpcodeForCandidate,
+                                  MachineInstr *MIForCandidate) {
+  if (!CurrentUse.Ty.isValid()) {
+    if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
+        CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
+      return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+    return CurrentUse;
+  }
+
+  // We permit the extend to hoist through basic blocks but this is only
+  // sensible if the target has extending loads. If you end up lowering back
+  // into a load and extend during the legalizer then the end result is
+  // hoisting the extend up to the load.
+
+  // Prefer defined extensions to undefined extensions as these are more
+  // likely to reduce the number of instructions.
+  if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
+      CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
+    return CurrentUse;
+  else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
+           OpcodeForCandidate != TargetOpcode::G_ANYEXT)
+    return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+
+  // Prefer sign extensions to zero extensions as sign-extensions tend to be
+  // more expensive.
+  if (CurrentUse.Ty == TyForCandidate) {
+    if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
+        OpcodeForCandidate == TargetOpcode::G_ZEXT)
+      return CurrentUse;
+    else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
+             OpcodeForCandidate == TargetOpcode::G_SEXT)
+      return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+  }
+
+  // This is potentially target specific. We've chosen the largest type
+  // because G_TRUNC is usually free. One potential catch with this is that
+  // some targets have a reduced number of larger registers than smaller
+  // registers and this choice potentially increases the live-range for the
+  // larger value.
+  if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
+    return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
+  }
+  return CurrentUse;
+}
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+    MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+    std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
+        Inserter) {
+  MachineInstr &UseMI = *UseMO.getParent();
+
+  MachineBasicBlock *InsertBB = UseMI.getParent();
+
+  // If the use is a PHI then we want the predecessor block instead.
+  if (UseMI.isPHI()) {
+    MachineOperand *PredBB = std::next(&UseMO);
+    InsertBB = PredBB->getMBB();
+  }
+
+  // If the block is the same block as the def then we want to insert just after
+  // the def instead of at the start of the block.
+  if (InsertBB == DefMI.getParent()) {
+    MachineBasicBlock::iterator InsertPt = &DefMI;
+    Inserter(InsertBB, std::next(InsertPt));
+    return;
+  }
+
+  // Otherwise we want the start of the BB
+  Inserter(InsertBB, InsertBB->getFirstNonPHI());
+}
+} // end anonymous namespace
+
+bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
+  struct InsertionPoint {
+    MachineOperand *UseMO;
+    MachineBasicBlock *InsertIntoBB;
+    MachineBasicBlock::iterator InsertBefore;
+    InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
+                   MachineBasicBlock::iterator InsertBefore)
+        : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
+    }
+  };
+
+  // We match the loads and follow the uses to the extend instead of matching
+  // the extends and following the def to the load. This is because the load
+  // must remain in the same position for correctness (unless we also add code
+  // to find a safe place to sink it) whereas the extend is freely movable.
+  // It also prevents us from duplicating the load for the volatile case or just
+  // for performance.
+
+  if (MI.getOpcode() != TargetOpcode::G_LOAD &&
+      MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
+      MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
+    return false;
+
+  auto &LoadValue = MI.getOperand(0);
+  assert(LoadValue.isReg() && "Result wasn't a register?");
+
+  LLT LoadValueTy = MRI.getType(LoadValue.getReg());
+  if (!LoadValueTy.isScalar())
+    return false;
+
+  // Find the preferred type aside from the any-extends (unless it's the only
+  // one) and non-extending ops. We'll emit an extending load to that type and
+  // and emit a variant of (extend (trunc X)) for the others according to the
+  // relative type sizes. At the same time, pick an extend to use based on the
+  // extend involved in the chosen type.
+  unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
+                                 ? TargetOpcode::G_ANYEXT
+                                 : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
+                                       ? TargetOpcode::G_SEXT
+                                       : TargetOpcode::G_ZEXT;
+  PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
+  for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
+    if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
+        UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
+        UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
+      Preferred = ChoosePreferredUse(Preferred,
+                                     MRI.getType(UseMI.getOperand(0).getReg()),
+                                     UseMI.getOpcode(), &UseMI);
+    }
+  }
+
+  // There were no extends
+  if (!Preferred.MI)
+    return false;
+  // It should be impossible to chose an extend without selecting a different
+  // type since by definition the result of an extend is larger.
+  assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
+
+  LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+
+  // Rewrite the load to the chosen extending load.
+  unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+  Observer.changingInstr(MI);
+  MI.setDesc(
+      Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
+                               ? TargetOpcode::G_SEXTLOAD
+                               : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
+                                     ? TargetOpcode::G_ZEXTLOAD
+                                     : TargetOpcode::G_LOAD));
+
+  // Rewrite all the uses to fix up the types.
+  SmallVector<MachineInstr *, 1> ScheduleForErase;
+  SmallVector<InsertionPoint, 4> ScheduleForInsert;
+  for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
+    MachineInstr *UseMI = UseMO.getParent();
+
+    // If the extend is compatible with the preferred extend then we should fix
+    // up the type and extend so that it uses the preferred use.
+    if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
+        UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
+      unsigned UseDstReg = UseMI->getOperand(0).getReg();
+      MachineOperand &UseSrcMO = UseMI->getOperand(1);
+      const LLT &UseDstTy = MRI.getType(UseDstReg);
+      if (UseDstReg != ChosenDstReg) {
+        if (Preferred.Ty == UseDstTy) {
+          // If the use has the same type as the preferred use, then merge
+          // the vregs and erase the extend. For example:
+          //    %1:_(s8) = G_LOAD ...
+          //    %2:_(s32) = G_SEXT %1(s8)
+          //    %3:_(s32) = G_ANYEXT %1(s8)
+          //    ... = ... %3(s32)
+          // rewrites to:
+          //    %2:_(s32) = G_SEXTLOAD ...
+          //    ... = ... %2(s32)
+          replaceRegWith(MRI, UseDstReg, ChosenDstReg);
+          ScheduleForErase.push_back(UseMO.getParent());
+        } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
+          // If the preferred size is smaller, then keep the extend but extend
+          // from the result of the extending load. For example:
+          //    %1:_(s8) = G_LOAD ...
+          //    %2:_(s32) = G_SEXT %1(s8)
+          //    %3:_(s64) = G_ANYEXT %1(s8)
+          //    ... = ... %3(s64)
+          /// rewrites to:
+          //    %2:_(s32) = G_SEXTLOAD ...
+          //    %3:_(s64) = G_ANYEXT %2:_(s32)
+          //    ... = ... %3(s64)
+          replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
+        } else {
+          // If the preferred size is large, then insert a truncate. For
+          // example:
+          //    %1:_(s8) = G_LOAD ...
+          //    %2:_(s64) = G_SEXT %1(s8)
+          //    %3:_(s32) = G_ZEXT %1(s8)
+          //    ... = ... %3(s32)
+          /// rewrites to:
+          //    %2:_(s64) = G_SEXTLOAD ...
+          //    %4:_(s8) = G_TRUNC %2:_(s32)
+          //    %3:_(s64) = G_ZEXT %2:_(s8)
+          //    ... = ... %3(s64)
+          InsertInsnsWithoutSideEffectsBeforeUse(
+              Builder, MI, UseMO,
+              [&](MachineBasicBlock *InsertIntoBB,
+                  MachineBasicBlock::iterator InsertBefore) {
+                ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
+              });
+        }
+        continue;
+      }
+      // The use is (one of) the uses of the preferred use we chose earlier.
+      // We're going to update the load to def this value later so just erase
+      // the old extend.
+      ScheduleForErase.push_back(UseMO.getParent());
+      continue;
+    }
+
+    // The use isn't an extend. Truncate back to the type we originally loaded.
+    // This is free on many targets.
+    InsertInsnsWithoutSideEffectsBeforeUse(
+        Builder, MI, UseMO,
+        [&](MachineBasicBlock *InsertIntoBB,
+            MachineBasicBlock::iterator InsertBefore) {
+          ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
+        });
+  }
+
+  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+  for (auto &InsertionInfo : ScheduleForInsert) {
+    MachineOperand *UseMO = InsertionInfo.UseMO;
+    MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
+    MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
+
+    MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+    if (PreviouslyEmitted) {
+      Observer.changingInstr(*UseMO->getParent());
+      UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
+      Observer.changedInstr(*UseMO->getParent());
+      continue;
+    }
+
+    Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+    unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+    EmittedInsns[InsertIntoBB] = NewMI;
+    replaceRegOpWith(MRI, *UseMO, NewDstReg);
+  }
+  for (auto &EraseMI : ScheduleForErase) {
+    Observer.erasingInstr(*EraseMI);
+    EraseMI->eraseFromParent();
+  }
+  MI.getOperand(0).setReg(ChosenDstReg);
+  Observer.changedInstr(MI);
+
+  return true;
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
-  return tryCombineCopy(MI);
+  if (tryCombineCopy(MI))
+    return true;
+  return tryCombineExtendingLoads(MI);
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
new file mode 100644
index 000000000000..c693acbbf10b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -0,0 +1,40 @@
+//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file constains common code to combine machine functions at generic
+// level.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+void GISelChangeObserver::changingAllUsesOfReg(
+    const MachineRegisterInfo &MRI, unsigned Reg) {
+  for (auto &ChangingMI : MRI.use_instructions(Reg)) {
+    changingInstr(ChangingMI);
+    ChangingAllUsesOfReg.insert(&ChangingMI);
+  }
+}
+
+void GISelChangeObserver::finishedChangingAllUsesOfReg() {
+  for (auto *ChangedMI : ChangingAllUsesOfReg)
+    changedInstr(*ChangedMI);
+}
+
+RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
+                                             MachineFunction::Delegate *Del)
+    : MF(MF), Delegate(Del) {
+  // Register this as the delegate for handling insertions and deletions of
+  // instructions.
+  MF.setDelegate(Del);
+}
+
+RAIIDelegateInstaller::~RAIIDelegateInstaller() { MF.resetDelegate(Delegate); }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 75496fba0449..95f6274aa068 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -75,11 +76,16 @@
 
 using namespace llvm;
 
+static cl::opt<bool>
+    EnableCSEInIRTranslator("enable-cse-in-irtranslator",
+                            cl::desc("Should enable CSE in irtranslator"),
+                            cl::Optional, cl::init(false));
 char IRTranslator::ID = 0;
 
 INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                 false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
 INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                 false, false)
 
@@ -104,9 +110,44 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
   initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
 }
 
+#ifndef NDEBUG
+namespace {
+/// Verify that every instruction created has the same DILocation as the
+/// instruction being translated.
+class DILocationVerifier : public GISelChangeObserver {
+  const Instruction *CurrInst = nullptr;
+
+public:
+  DILocationVerifier() = default;
+  ~DILocationVerifier() = default;
+
+  const Instruction *getCurrentInst() const { return CurrInst; }
+  void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
+
+  void erasingInstr(MachineInstr &MI) override {}
+  void changingInstr(MachineInstr &MI) override {}
+  void changedInstr(MachineInstr &MI) override {}
+
+  void createdInstr(MachineInstr &MI) override {
+    assert(getCurrentInst() && "Inserted instruction without a current MI");
+
+    // Only print the check message if we're actually checking it.
+#ifndef NDEBUG
+    LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
+                      << " was copied to " << MI);
+#endif
+    assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
+           "Line info was not transferred to all instructions");
+  }
+};
+} // namespace
+#endif // ifndef NDEBUG
+
+
 void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<StackProtector>();
   AU.addRequired<TargetPassConfig>();
+  AU.addRequired<GISelCSEAnalysisWrapperPass>();
   getSelectionDAGFallbackAnalysisUsage(AU);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -185,7 +226,7 @@ ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
     unsigned Idx = 0;
     while (auto Elt = C.getAggregateElement(Idx++)) {
       auto EltRegs = getOrCreateVRegs(*Elt);
-      std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs));
+      llvm::copy(EltRegs, std::back_inserter(*VRegs));
     }
   } else {
     assert(SplitTys.size() == 1 && "unexpectedly split LLT");
@@ -279,7 +320,12 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
   unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
   unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
   unsigned Res = getOrCreateVReg(U);
-  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+  auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+  if (isa<Instruction>(U)) {
+    MachineInstr *FBinOpMI = FBinOp.getInstr();
+    const Instruction &I = cast<Instruction>(U);
+    FBinOpMI->copyIRFlags(I);
+  }
   return true;
 }
 
@@ -295,6 +341,13 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
   return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
 }
 
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+  MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
+      .addDef(getOrCreateVReg(U))
+      .addUse(getOrCreateVReg(*U.getOperand(1)));
+  return true;
+}
+
 bool IRTranslator::translateCompare(const User &U,
                                     MachineIRBuilder &MIRBuilder) {
   const CmpInst *CI = dyn_cast<CmpInst>(&U);
@@ -312,8 +365,10 @@ bool IRTranslator::translateCompare(const User &U,
   else if (Pred == CmpInst::FCMP_TRUE)
     MIRBuilder.buildCopy(
         Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
-  else
-    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+  else {
+    auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+    FCmp->copyIRFlags(*CI);
+  }
 
   return true;
 }
@@ -323,14 +378,16 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   const Value *Ret = RI.getReturnValue();
   if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
     Ret = nullptr;
+
+  ArrayRef<unsigned> VRegs;
+  if (Ret)
+    VRegs = getOrCreateVRegs(*Ret);
+
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
 
-  // FIXME: this interface should simplify when CallLowering gets adapted to
-  // multiple VRegs per Value.
-  unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0;
-  return CLI->lowerReturn(MIRBuilder, Ret, VReg);
+  return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
 }
 
 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -353,7 +410,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
     MIRBuilder.buildBr(TgtBB);
 
   // Link successors.
-  for (const BasicBlock *Succ : BrInst.successors())
+  for (const BasicBlock *Succ : successors(&BrInst))
     CurBB.addSuccessor(&getMBB(*Succ));
   return true;
 }
@@ -413,7 +470,7 @@ bool IRTranslator::translateIndirectBr(const User &U,
 
   // Link successors.
   MachineBasicBlock &CurBB = MIRBuilder.getMBB();
-  for (const BasicBlock *Succ : BrInst.successors())
+  for (const BasicBlock *Succ : successors(&BrInst))
     CurBB.addSuccessor(&getMBB(*Succ));
 
   return true;
@@ -544,8 +601,15 @@ bool IRTranslator::translateSelect(const User &U,
   ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
   ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
 
-  for (unsigned i = 0; i < ResRegs.size(); ++i)
-    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+  const SelectInst &SI = cast<SelectInst>(U);
+  const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition());
+  for (unsigned i = 0; i < ResRegs.size(); ++i) {
+    auto Select =
+        MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+    if (Cmp && isa<FPMathOperator>(Cmp)) {
+      Select->copyIRFlags(*Cmp);
+    }
+  }
 
   return true;
 }
@@ -704,29 +768,22 @@ void IRTranslator::getStackGuard(unsigned DstReg,
     return;
 
   MachinePointerInfo MPInfo(Global);
-  MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1);
   auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
                MachineMemOperand::MODereferenceable;
-  *MemRefs =
+  MachineMemOperand *MemRef =
       MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
                                DL->getPointerABIAlignment(0));
-  MIB.setMemRefs(MemRefs, MemRefs + 1);
+  MIB.setMemRefs({MemRef});
 }
 
 bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                               MachineIRBuilder &MIRBuilder) {
   ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
-  auto MIB = MIRBuilder.buildInstr(Op)
-                 .addDef(ResRegs[0])
-                 .addDef(ResRegs[1])
-                 .addUse(getOrCreateVReg(*CI.getOperand(0)))
-                 .addUse(getOrCreateVReg(*CI.getOperand(1)));
-
-  if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
-    unsigned Zero = getOrCreateVReg(
-        *Constant::getNullValue(Type::getInt1Ty(CI.getContext())));
-    MIB.addUse(Zero);
-  }
+  MIRBuilder.buildInstr(Op)
+      .addDef(ResRegs[0])
+      .addDef(ResRegs[1])
+      .addUse(getOrCreateVReg(*CI.getOperand(0)))
+      .addUse(getOrCreateVReg(*CI.getOperand(1)));
 
   return true;
 }
@@ -763,9 +820,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
       // instructions (in fact, they get ignored if they *do* exist).
       MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
                              getOrCreateFrameIndex(*AI), DI.getDebugLoc());
-    } else
-      MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address),
-                                     DI.getVariable(), DI.getExpression());
+    } else {
+      // A dbg.declare describes the address of a source variable, so lower it
+      // into an indirect DBG_VALUE.
+      MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+                                       DI.getVariable(), DI.getExpression());
+    }
+    return true;
+  }
+  case Intrinsic::dbg_label: {
+    const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
+    assert(DI.getLabel() && "Missing label");
+
+    assert(DI.getLabel()->isValidLocationForIntrinsic(
+               MIRBuilder.getDebugLoc()) &&
+           "Expected inlined-at fields to agree");
+
+    MIRBuilder.buildDbgLabel(DI.getLabel());
     return true;
   }
   case Intrinsic::vaend:
@@ -807,55 +878,86 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     return true;
   }
   case Intrinsic::uadd_with_overflow:
-    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
   case Intrinsic::sadd_with_overflow:
     return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
   case Intrinsic::usub_with_overflow:
-    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder);
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
   case Intrinsic::ssub_with_overflow:
     return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
   case Intrinsic::umul_with_overflow:
     return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
   case Intrinsic::smul_with_overflow:
     return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
-  case Intrinsic::pow:
-    MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
+  case Intrinsic::pow: {
+    auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
         .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+    Pow->copyIRFlags(CI);
     return true;
-  case Intrinsic::exp:
-    MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+  }
+  case Intrinsic::exp: {
+    auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    Exp->copyIRFlags(CI);
     return true;
-  case Intrinsic::exp2:
-    MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+  }
+  case Intrinsic::exp2: {
+    auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    Exp2->copyIRFlags(CI);
     return true;
-  case Intrinsic::log:
-    MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+  }
+  case Intrinsic::log: {
+    auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    Log->copyIRFlags(CI);
     return true;
-  case Intrinsic::log2:
-    MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+  }
+  case Intrinsic::log2: {
+    auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    Log2->copyIRFlags(CI);
     return true;
-  case Intrinsic::fabs:
-    MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+  }
+  case Intrinsic::log10: {
+    auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    Log10->copyIRFlags(CI);
     return true;
-  case Intrinsic::fma:
-    MIRBuilder.buildInstr(TargetOpcode::G_FMA)
+  }
+  case Intrinsic::fabs: {
+    auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+        .addDef(getOrCreateVReg(CI))
+        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    Fabs->copyIRFlags(CI);
+    return true;
+  }
+  case Intrinsic::trunc:
+    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
+        .addDef(getOrCreateVReg(CI))
+        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  case Intrinsic::round:
+    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
+        .addDef(getOrCreateVReg(CI))
+        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  case Intrinsic::fma: {
+    auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA)
         .addDef(getOrCreateVReg(CI))
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
         .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
         .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
+    FMA->copyIRFlags(CI);
     return true;
+  }
   case Intrinsic::fmuladd: {
     const TargetMachine &TM = MF->getTarget();
     const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -867,11 +969,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
         TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
       // TODO: Revisit this to see if we should move this part of the
       // lowering to the combiner.
-      MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2);
+      auto FMA =  MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2});
+      FMA->copyIRFlags(CI);
     } else {
       LLT Ty = getLLTForType(*CI.getType(), *DL);
-      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1);
-      MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2);
+      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1});
+      FMul->copyIRFlags(CI);
+      auto FAdd =  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2});
+      FAdd->copyIRFlags(CI);
     }
     return true;
   }
@@ -893,6 +998,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
     return true;
   }
+  case Intrinsic::is_constant:
+    // If this wasn't constant-folded away by now, then it's not a
+    // constant.
+    MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
+    return true;
   case Intrinsic::stackguard:
     getStackGuard(getOrCreateVReg(CI), MIRBuilder);
     return true;
@@ -902,15 +1012,50 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     getStackGuard(GuardVal, MIRBuilder);
 
     AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+    int FI = getOrCreateFrameIndex(*Slot);
+    MF->getFrameInfo().setStackProtectorIndex(FI);
+
     MIRBuilder.buildStore(
         GuardVal, getOrCreateVReg(*Slot),
-        *MF->getMachineMemOperand(
-            MachinePointerInfo::getFixedStack(*MF,
-                                              getOrCreateFrameIndex(*Slot)),
-            MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
-            PtrTy.getSizeInBits() / 8, 8));
+        *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+                                  MachineMemOperand::MOStore |
+                                      MachineMemOperand::MOVolatile,
+                                  PtrTy.getSizeInBits() / 8, 8));
     return true;
   }
+  case Intrinsic::cttz:
+  case Intrinsic::ctlz: {
+    ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
+    bool isTrailing = ID == Intrinsic::cttz;
+    unsigned Opcode = isTrailing
+                          ? Cst->isZero() ? TargetOpcode::G_CTTZ
+                                          : TargetOpcode::G_CTTZ_ZERO_UNDEF
+                          : Cst->isZero() ? TargetOpcode::G_CTLZ
+                                          : TargetOpcode::G_CTLZ_ZERO_UNDEF;
+    MIRBuilder.buildInstr(Opcode)
+        .addDef(getOrCreateVReg(CI))
+        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  }
+  case Intrinsic::ctpop: {
+    MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
+        .addDef(getOrCreateVReg(CI))
+        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
+  }
+  case Intrinsic::invariant_start: {
+    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
+    unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
+    MIRBuilder.buildUndef(Undef);
+    return true;
+  }
+  case Intrinsic::invariant_end:
+    return true;
+  case Intrinsic::ceil:
+    MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
+        .addDef(getOrCreateVReg(CI))
+        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+    return true;
   }
   return false;
 }
@@ -1101,7 +1246,6 @@ bool IRTranslator::translateLandingPad(const User &U,
   const LandingPadInst &LP = cast<LandingPadInst>(U);
 
   MachineBasicBlock &MBB = MIRBuilder.getMBB();
-  addLandingPadInfo(LP, MBB);
 
   MBB.setIsEHPad();
 
@@ -1279,7 +1423,22 @@ bool IRTranslator::translateExtractElement(const User &U,
   }
   unsigned Res = getOrCreateVReg(U);
   unsigned Val = getOrCreateVReg(*U.getOperand(0));
-  unsigned Idx = getOrCreateVReg(*U.getOperand(1));
+  const auto &TLI = *MF->getSubtarget().getTargetLowering();
+  unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
+  unsigned Idx = 0;
+  if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
+    if (CI->getBitWidth() != PreferredVecIdxWidth) {
+      APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
+      auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
+      Idx = getOrCreateVReg(*NewIdxCI);
+    }
+  }
+  if (!Idx)
+    Idx = getOrCreateVReg(*U.getOperand(1));
+  if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
+    const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
+    Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg();
+  }
   MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
   return true;
 }
@@ -1299,7 +1458,7 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
 
   SmallVector<MachineInstr *, 4> Insts;
   for (auto Reg : getOrCreateVRegs(PI)) {
-    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
+    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
     Insts.push_back(MIB.getInstr());
   }
 
@@ -1402,9 +1561,18 @@ bool IRTranslator::translateAtomicRMW(const User &U,
 }
 
 void IRTranslator::finishPendingPhis() {
+#ifndef NDEBUG
+  DILocationVerifier Verifier;
+  GISelObserverWrapper WrapperObserver(&Verifier);
+  RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+#endif // ifndef NDEBUG
   for (auto &Phi : PendingPHIs) {
     const PHINode *PI = Phi.first;
     ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+    EntryBuilder->setDebugLoc(PI->getDebugLoc());
+#ifndef NDEBUG
+    Verifier.setCurrentInst(PI);
+#endif // ifndef NDEBUG
 
     // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
     // won't create extra control flow here, otherwise we need to find the
@@ -1442,10 +1610,12 @@ bool IRTranslator::valueIsSplit(const Value &V,
 }
 
 bool IRTranslator::translate(const Instruction &Inst) {
-  CurBuilder.setDebugLoc(Inst.getDebugLoc());
+  CurBuilder->setDebugLoc(Inst.getDebugLoc());
+  EntryBuilder->setDebugLoc(Inst.getDebugLoc());
   switch(Inst.getOpcode()) {
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
-    case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
+#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
+  case Instruction::OPCODE:                                                    \
+    return translate##OPCODE(Inst, *CurBuilder.get());
 #include "llvm/IR/Instruction.def"
   default:
     return false;
@@ -1454,11 +1624,11 @@ bool IRTranslator::translate(const Instruction &Inst) {
 
 bool IRTranslator::translate(const Constant &C, unsigned Reg) {
   if (auto CI = dyn_cast<ConstantInt>(&C))
-    EntryBuilder.buildConstant(Reg, *CI);
+    EntryBuilder->buildConstant(Reg, *CI);
   else if (auto CF = dyn_cast<ConstantFP>(&C))
-    EntryBuilder.buildFConstant(Reg, *CF);
+    EntryBuilder->buildFConstant(Reg, *CF);
   else if (isa<UndefValue>(C))
-    EntryBuilder.buildUndef(Reg);
+    EntryBuilder->buildUndef(Reg);
   else if (isa<ConstantPointerNull>(C)) {
     // As we are trying to build a constant val of 0 into a pointer,
     // insert a cast to make them correct with respect to types.
@@ -1466,35 +1636,36 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
     auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
     auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
     unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
-    EntryBuilder.buildCast(Reg, ZeroReg);
+    EntryBuilder->buildCast(Reg, ZeroReg);
   } else if (auto GV = dyn_cast<GlobalValue>(&C))
-    EntryBuilder.buildGlobalValue(Reg, GV);
+    EntryBuilder->buildGlobalValue(Reg, GV);
   else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
     if (!CAZ->getType()->isVectorTy())
       return false;
     // Return the scalar if it is a <1 x Ty> vector.
     if (CAZ->getNumElements() == 1)
       return translate(*CAZ->getElementValue(0u), Reg);
-    std::vector<unsigned> Ops;
+    SmallVector<unsigned, 4> Ops;
     for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
       Constant &Elt = *CAZ->getElementValue(i);
       Ops.push_back(getOrCreateVReg(Elt));
     }
-    EntryBuilder.buildMerge(Reg, Ops);
+    EntryBuilder->buildBuildVector(Reg, Ops);
   } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
     // Return the scalar if it is a <1 x Ty> vector.
     if (CV->getNumElements() == 1)
       return translate(*CV->getElementAsConstant(0), Reg);
-    std::vector<unsigned> Ops;
+    SmallVector<unsigned, 4> Ops;
     for (unsigned i = 0; i < CV->getNumElements(); ++i) {
       Constant &Elt = *CV->getElementAsConstant(i);
       Ops.push_back(getOrCreateVReg(Elt));
     }
-    EntryBuilder.buildMerge(Reg, Ops);
+    EntryBuilder->buildBuildVector(Reg, Ops);
   } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
     switch(CE->getOpcode()) {
-#define HANDLE_INST(NUM, OPCODE, CLASS)                         \
-      case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
+#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
+  case Instruction::OPCODE:                                                    \
+    return translate##OPCODE(*CE, *EntryBuilder.get());
 #include "llvm/IR/Instruction.def"
     default:
       return false;
@@ -1506,9 +1677,9 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
     for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
       Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
     }
-    EntryBuilder.buildMerge(Reg, Ops);
+    EntryBuilder->buildBuildVector(Reg, Ops);
   } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
-    EntryBuilder.buildBlockAddress(Reg, BA);
+    EntryBuilder->buildBlockAddress(Reg, BA);
   } else
     return false;
 
@@ -1525,8 +1696,8 @@ void IRTranslator::finalizeFunction() {
   // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
   // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
   // destroying it twice (in ~IRTranslator() and ~LLVMContext())
-  EntryBuilder = MachineIRBuilder();
-  CurBuilder = MachineIRBuilder();
+  EntryBuilder.reset();
+  CurBuilder.reset();
 }
 
 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -1534,12 +1705,30 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   const Function &F = MF->getFunction();
   if (F.empty())
     return false;
+  GISelCSEAnalysisWrapper &Wrapper =
+      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+  // Set the CSEConfig and run the analysis.
+  GISelCSEInfo *CSEInfo = nullptr;
+  TPC = &getAnalysis<TargetPassConfig>();
+  bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None;
+  // Disable CSE for O0.
+  bool EnableCSE = !IsO0 && EnableCSEInIRTranslator;
+  if (EnableCSE) {
+    EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
+    std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
+    CSEInfo = &Wrapper.get(std::move(Config));
+    EntryBuilder->setCSEInfo(CSEInfo);
+    CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
+    CurBuilder->setCSEInfo(CSEInfo);
+  } else {
+    EntryBuilder = make_unique<MachineIRBuilder>();
+    CurBuilder = make_unique<MachineIRBuilder>();
+  }
   CLI = MF->getSubtarget().getCallLowering();
-  CurBuilder.setMF(*MF);
-  EntryBuilder.setMF(*MF);
+  CurBuilder->setMF(*MF);
+  EntryBuilder->setMF(*MF);
   MRI = &MF->getRegInfo();
   DL = &F.getParent()->getDataLayout();
-  TPC = &getAnalysis<TargetPassConfig>();
   ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
 
   assert(PendingPHIs.empty() && "stale PHIs");
@@ -1558,7 +1747,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   // Setup a separate basic-block for the arguments and constants
   MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
   MF->push_back(EntryBB);
-  EntryBuilder.setMBB(*EntryBB);
+  EntryBuilder->setMBB(*EntryBB);
 
   // Create all blocks, in IR order, to preserve the layout.
   for (const BasicBlock &BB: F) {
@@ -1595,7 +1784,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
     }
   }
 
-  if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
+  if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                F.getSubprogram(), &F.getEntryBlock());
     R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
@@ -1612,38 +1801,54 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
       assert(VRegs.empty() && "VRegs already populated?");
       VRegs.push_back(VArg);
     } else {
-      unpackRegs(*ArgIt, VArg, EntryBuilder);
+      unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
     }
     ArgIt++;
   }
 
   // Need to visit defs before uses when translating instructions.
-  ReversePostOrderTraversal<const Function *> RPOT(&F);
-  for (const BasicBlock *BB : RPOT) {
-    MachineBasicBlock &MBB = getMBB(*BB);
-    // Set the insertion point of all the following translations to
-    // the end of this basic block.
-    CurBuilder.setMBB(MBB);
-
-    for (const Instruction &Inst : *BB) {
-      if (translate(Inst))
-        continue;
-
-      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-                                 Inst.getDebugLoc(), BB);
-      R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
-
-      if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
-        std::string InstStrStorage;
-        raw_string_ostream InstStr(InstStrStorage);
-        InstStr << Inst;
+  GISelObserverWrapper WrapperObserver;
+  if (EnableCSE && CSEInfo)
+    WrapperObserver.addObserver(CSEInfo);
+  {
+    ReversePostOrderTraversal<const Function *> RPOT(&F);
+#ifndef NDEBUG
+    DILocationVerifier Verifier;
+    WrapperObserver.addObserver(&Verifier);
+#endif // ifndef NDEBUG
+    RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
+    for (const BasicBlock *BB : RPOT) {
+      MachineBasicBlock &MBB = getMBB(*BB);
+      // Set the insertion point of all the following translations to
+      // the end of this basic block.
+      CurBuilder->setMBB(MBB);
+
+      for (const Instruction &Inst : *BB) {
+#ifndef NDEBUG
+        Verifier.setCurrentInst(&Inst);
+#endif // ifndef NDEBUG
+        if (translate(Inst))
+          continue;
+
+        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+                                   Inst.getDebugLoc(), BB);
+        R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
+
+        if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
+          std::string InstStrStorage;
+          raw_string_ostream InstStr(InstStrStorage);
+          InstStr << Inst;
+
+          R << ": '" << InstStr.str() << "'";
+        }
 
-        R << ": '" << InstStr.str() << "'";
+        reportTranslationError(*MF, *TPC, *ORE, R);
+        return false;
       }
-
-      reportTranslationError(*MF, *TPC, *ORE, R);
-      return false;
     }
+#ifndef NDEBUG
+    WrapperObserver.removeObserver(&Verifier);
+#endif
   }
 
   finishPendingPhis();
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5e77fcbb0ed9..38913e4afcba 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -80,5 +80,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
     return true;
 
   return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
-         MI.implicit_operands().begin() == MI.implicit_operands().end();
+         empty(MI.implicit_operands());
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 344f573a67f5..94eab9ae00c8 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -45,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
   SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
   return [=](const LegalityQuery &Query) {
     TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
-                                Query.MMODescrs[MMOIdx].Size};
+                                Query.MMODescrs[MMOIdx].SizeInBits};
     return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
            TypesAndMemSize.end();
   };
@@ -82,7 +82,7 @@ LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
 
 LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
   return [=](const LegalityQuery &Query) {
-    return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */);
+    return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
   };
 }
 
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 9a2aac998a84..84131e59948c 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -16,6 +16,9 @@
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
 #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -32,11 +35,17 @@
 
 using namespace llvm;
 
+static cl::opt<bool>
+    EnableCSEInLegalizer("enable-cse-in-legalizer",
+                         cl::desc("Should enable CSE in Legalizer"),
+                         cl::Optional, cl::init(false));
+
 char Legalizer::ID = 0;
 INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
                       "Legalize the Machine IR a function's Machine IR", false,
                       false)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
 INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
                     "Legalize the Machine IR a function's Machine IR", false,
                     false)
@@ -47,6 +56,8 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) {
 
 void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetPassConfig>();
+  AU.addRequired<GISelCSEAnalysisWrapperPass>();
+  AU.addPreserved<GISelCSEAnalysisWrapperPass>();
   getSelectionDAGFallbackAnalysisUsage(AU);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -64,9 +75,54 @@ static bool isArtifact(const MachineInstr &MI) {
   case TargetOpcode::G_SEXT:
   case TargetOpcode::G_MERGE_VALUES:
   case TargetOpcode::G_UNMERGE_VALUES:
+  case TargetOpcode::G_CONCAT_VECTORS:
+  case TargetOpcode::G_BUILD_VECTOR:
     return true;
   }
 }
+using InstListTy = GISelWorkList<256>;
+using ArtifactListTy = GISelWorkList<128>;
+
+namespace {
+class LegalizerWorkListManager : public GISelChangeObserver {
+  InstListTy &InstList;
+  ArtifactListTy &ArtifactList;
+
+public:
+  LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
+      : InstList(Insts), ArtifactList(Arts) {}
+
+  void createdInstr(MachineInstr &MI) override {
+    // Only legalize pre-isel generic instructions.
+    // Legalization process could generate Target specific pseudo
+    // instructions with generic types. Don't record them
+    if (isPreISelGenericOpcode(MI.getOpcode())) {
+      if (isArtifact(MI))
+        ArtifactList.insert(&MI);
+      else
+        InstList.insert(&MI);
+    }
+    LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
+  }
+
+  void erasingInstr(MachineInstr &MI) override {
+    LLVM_DEBUG(dbgs() << ".. .. Erasing: " << MI);
+    InstList.remove(&MI);
+    ArtifactList.remove(&MI);
+  }
+
+  void changingInstr(MachineInstr &MI) override {
+    LLVM_DEBUG(dbgs() << ".. .. Changing MI: " << MI);
+  }
+
+  void changedInstr(MachineInstr &MI) override {
+    // When insts change, we want to revisit them to legalize them again.
+    // We'll consider them the same as created.
+    LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
+    createdInstr(MI);
+  }
+};
+} // namespace
 
 bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
   // If the ISel pipeline failed, do not bother running that pass.
@@ -76,15 +132,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
   init(MF);
   const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  GISelCSEAnalysisWrapper &Wrapper =
+      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
   MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
-  LegalizerHelper Helper(MF);
 
   const size_t NumBlocks = MF.size();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Populate Insts
-  GISelWorkList<256> InstList;
-  GISelWorkList<128> ArtifactList;
+  InstListTy InstList;
+  ArtifactListTy ArtifactList;
   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
   // Perform legalization bottom up so we can DCE as we legalize.
   // Traverse BB in RPOT and within each basic block, add insts top down,
@@ -103,24 +160,34 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
         InstList.insert(&MI);
     }
   }
-  Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
-    // Only legalize pre-isel generic instructions.
-    // Legalization process could generate Target specific pseudo
-    // instructions with generic types. Don't record them
-    if (isPreISelGenericOpcode(MI->getOpcode())) {
-      if (isArtifact(*MI))
-        ArtifactList.insert(MI);
-      else
-        InstList.insert(MI);
-    }
-    LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;);
-  });
+  std::unique_ptr<MachineIRBuilder> MIRBuilder;
+  GISelCSEInfo *CSEInfo = nullptr;
+  bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None;
+  // Disable CSE for O0.
+  bool EnableCSE = !IsO0 && EnableCSEInLegalizer;
+  if (EnableCSE) {
+    MIRBuilder = make_unique<CSEMIRBuilder>();
+    std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
+    CSEInfo = &Wrapper.get(std::move(Config));
+    MIRBuilder->setCSEInfo(CSEInfo);
+  } else
+    MIRBuilder = make_unique<MachineIRBuilder>();
+  // This observer keeps the worklist updated.
+  LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
+  // We want both WorkListObserver as well as CSEInfo to observe all changes.
+  // Use the wrapper observer.
+  GISelObserverWrapper WrapperObserver(&WorkListObserver);
+  if (EnableCSE && CSEInfo)
+    WrapperObserver.addObserver(CSEInfo);
+  // Now install the observer as the delegate to MF.
+  // This will keep all the observers notified about new insertions/deletions.
+  RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
+  LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get());
   const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
-  LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo);
-  auto RemoveDeadInstFromLists = [&InstList,
-                                  &ArtifactList](MachineInstr *DeadMI) {
-    InstList.remove(DeadMI);
-    ArtifactList.remove(DeadMI);
+  LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(),
+                                           LInfo);
+  auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
+    WrapperObserver.erasingInstr(*DeadMI);
   };
   bool Changed = false;
   do {
@@ -138,7 +205,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
       // Error out if we couldn't legalize this instruction. We may want to
       // fall back to DAG ISel instead in the future.
       if (Res == LegalizerHelper::UnableToLegalize) {
-        Helper.MIRBuilder.stopRecordingInsertions();
+        Helper.MIRBuilder.stopObservingChanges();
         reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
                            "unable to legalize instruction", MI);
         return false;
@@ -149,7 +216,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr &MI = *ArtifactList.pop_back_val();
       assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
       if (isTriviallyDead(MI, MRI)) {
-        LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+        LLVM_DEBUG(dbgs() << MI << "Is dead\n");
         RemoveDeadInstFromLists(&MI);
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
         continue;
@@ -157,7 +224,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
       SmallVector<MachineInstr *, 4> DeadInstructions;
       if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
         for (auto *DeadMI : DeadInstructions) {
-          LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
+          LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
           RemoveDeadInstFromLists(DeadMI);
           DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
         }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 87086af121b7..b3fc94cdec60 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,24 +15,37 @@
 
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
-
 #define DEBUG_TYPE "legalizer"
 
 using namespace llvm;
 using namespace LegalizeActions;
 
-LegalizerHelper::LegalizerHelper(MachineFunction &MF)
-    : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
+LegalizerHelper::LegalizerHelper(MachineFunction &MF,
+                                 GISelChangeObserver &Observer,
+                                 MachineIRBuilder &Builder)
+    : MIRBuilder(Builder), MRI(MF.getRegInfo()),
+      LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
   MIRBuilder.setMF(MF);
+  MIRBuilder.setChangeObserver(Observer);
 }
 
+LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
+                                 GISelChangeObserver &Observer,
+                                 MachineIRBuilder &B)
+    : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
+  MIRBuilder.setMF(MF);
+  MIRBuilder.setChangeObserver(Observer);
+}
 LegalizerHelper::LegalizeResult
 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
@@ -59,8 +72,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
   case Custom:
     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
-    return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
-                                                  : UnableToLegalize;
+    return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
+                                                            : UnableToLegalize;
   default:
     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
     return UnableToLegalize;
@@ -77,17 +90,20 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
   switch (Opcode) {
   case TargetOpcode::G_SDIV:
-    assert(Size == 32 && "Unsupported size");
-    return RTLIB::SDIV_I32;
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
   case TargetOpcode::G_UDIV:
-    assert(Size == 32 && "Unsupported size");
-    return RTLIB::UDIV_I32;
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
   case TargetOpcode::G_SREM:
-    assert(Size == 32 && "Unsupported size");
-    return RTLIB::SREM_I32;
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
   case TargetOpcode::G_UREM:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
+  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
     assert(Size == 32 && "Unsupported size");
-    return RTLIB::UREM_I32;
+    return RTLIB::CTLZ_I32;
   case TargetOpcode::G_FADD:
     assert((Size == 32 || Size == 64) && "Unsupported size");
     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@@ -184,8 +200,9 @@ LegalizerHelper::libcall(MachineInstr &MI) {
   case TargetOpcode::G_SDIV:
   case TargetOpcode::G_UDIV:
   case TargetOpcode::G_SREM:
-  case TargetOpcode::G_UREM: {
-    Type *HLTy = Type::getInt32Ty(Ctx);
+  case TargetOpcode::G_UREM:
+  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+    Type *HLTy = IntegerType::get(Ctx, Size);
     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
     if (Status != Legalized)
       return Status;
@@ -289,7 +306,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     for (int i = 0; i < NumParts; ++i)
       DstRegs.push_back(
           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
-    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+
+    unsigned DstReg = MI.getOperand(0).getReg();
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -319,7 +341,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       CarryIn = CarryOut;
     }
     unsigned DstReg = MI.getOperand(0).getReg();
-    MIRBuilder.buildMerge(DstReg, DstRegs);
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -375,7 +400,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       DstRegs.push_back(SegReg);
     }
 
-    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+    unsigned DstReg = MI.getOperand(0).getReg();
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -436,7 +465,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     }
 
     assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
-    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+    unsigned DstReg = MI.getOperand(0).getReg();
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -462,12 +495,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
       unsigned SrcReg = 0;
       unsigned Adjustment = i * NarrowSize / 8;
+      unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
 
       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
-          NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
-          MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
-          MMO.getOrdering(), MMO.getFailureOrdering());
+          NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
+          MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
 
       MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
                                 Adjustment);
@@ -477,7 +510,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       DstRegs.push_back(DstReg);
     }
     unsigned DstReg = MI.getOperand(0).getReg();
-    MIRBuilder.buildMerge(DstReg, DstRegs);
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -504,12 +540,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     for (int i = 0; i < NumParts; ++i) {
       unsigned DstReg = 0;
       unsigned Adjustment = i * NarrowSize / 8;
+      unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
 
       MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
           MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
-          NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
-          MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
-          MMO.getOrdering(), MMO.getFailureOrdering());
+          NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
+          MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
 
       MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
                                 Adjustment);
@@ -537,11 +573,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       DstRegs.push_back(DstReg);
     }
     unsigned DstReg = MI.getOperand(0).getReg();
-    MIRBuilder.buildMerge(DstReg, DstRegs);
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
-  case TargetOpcode::G_OR: {
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR: {
     // Legalize bitwise operation:
     // A = BinOp<Ty> B, C
     // into:
@@ -580,11 +621,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
 
     // Do the operation on each small part.
     for (int i = 0; i < NumParts; ++i)
-      MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
+      MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
+                            {SrcsReg1[i], SrcsReg2[i]});
 
     // Gather the destination registers into the final destination.
     unsigned DstReg = MI.getOperand(0).getReg();
-    MIRBuilder.buildMerge(DstReg, DstRegs);
+    if(MRI.getType(DstReg).isVector())
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    else
+      MIRBuilder.buildMerge(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -594,7 +639,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
                                      unsigned OpIdx, unsigned ExtOpcode) {
   MachineOperand &MO = MI.getOperand(OpIdx);
-  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg());
+  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
   MO.setReg(ExtB->getOperand(0).getReg());
 }
 
@@ -603,7 +648,7 @@ void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
   MachineOperand &MO = MI.getOperand(OpIdx);
   unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
-  MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt);
+  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
   MO.setReg(DstExt);
 }
 
@@ -614,6 +659,69 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   switch (MI.getOpcode()) {
   default:
     return UnableToLegalize;
+  case TargetOpcode::G_UADDO:
+  case TargetOpcode::G_USUBO: {
+    if (TypeIdx == 1)
+      return UnableToLegalize; // TODO
+    auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+                                         {MI.getOperand(2).getReg()});
+    auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+                                         {MI.getOperand(3).getReg()});
+    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
+                          ? TargetOpcode::G_ADD
+                          : TargetOpcode::G_SUB;
+    // Do the arithmetic in the larger type.
+    auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
+    LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+    APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
+    auto AndOp = MIRBuilder.buildInstr(
+        TargetOpcode::G_AND, {WideTy},
+        {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
+    // There is no overflow if the AndOp is the same as NewOp.
+    MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
+                         AndOp);
+    // Now trunc the NewOp to the original result.
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_CTTZ:
+  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+  case TargetOpcode::G_CTLZ:
+  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+  case TargetOpcode::G_CTPOP: {
+    // First ZEXT the input.
+    auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
+    LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+    if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+      // The count is the same in the larger type except if the original
+      // value was zero.  This can be handled by setting the bit just off
+      // the top of the original type.
+      auto TopBit =
+          APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
+      MIBSrc = MIRBuilder.buildInstr(
+          TargetOpcode::G_OR, {WideTy},
+          {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+    }
+    // Perform the operation at the larger size.
+    auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+    // This is already the correct result for CTPOP and CTTZs
+    if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
+        MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
+      // The correct result is NewOp - (Difference in widety and current ty).
+      unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
+      MIBNewOp = MIRBuilder.buildInstr(
+          TargetOpcode::G_SUB, {WideTy},
+          {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
+    }
+    auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+    // Make the original instruction a trunc now, and update its source.
+    Observer.changingInstr(MI);
+    MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
+    MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
 
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_AND:
@@ -624,87 +732,100 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     // Perform operation at larger width (any extension is fine here, high bits
     // don't affect the result) and then truncate the result back to the
     // original type.
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_SHL:
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
     // The "number of bits to shift" operand must preserve its value as an
     // unsigned integer:
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_SDIV:
   case TargetOpcode::G_SREM:
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_ASHR:
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
     // The "number of bits to shift" operand must preserve its value as an
     // unsigned integer:
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_UDIV:
   case TargetOpcode::G_UREM:
   case TargetOpcode::G_LSHR:
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_SELECT:
-    if (TypeIdx != 0)
-      return UnableToLegalize;
-    // Perform operation at larger width (any extension is fine here, high bits
-    // don't affect the result) and then truncate the result back to the
-    // original type.
-    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
-    widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
-    widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changingInstr(MI);
+    if (TypeIdx == 0) {
+      // Perform operation at larger width (any extension is fine here, high
+      // bits don't affect the result) and then truncate the result back to the
+      // original type.
+      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+      widenScalarDst(MI, WideTy);
+    } else {
+      // Explicit extension is required here since high bits affect the result.
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+    }
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
     if (TypeIdx != 0)
       return UnableToLegalize;
+    Observer.changingInstr(MI);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_SITOFP:
     if (TypeIdx != 1)
       return UnableToLegalize;
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_UITOFP:
     if (TypeIdx != 1)
       return UnableToLegalize;
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_INSERT:
     if (TypeIdx != 0)
       return UnableToLegalize;
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_LOAD:
@@ -717,8 +838,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     LLVM_FALLTHROUGH;
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD:
+    Observer.changingInstr(MI);
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_STORE: {
@@ -726,18 +848,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
         WideTy != LLT::scalar(8))
       return UnableToLegalize;
 
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
   }
   case TargetOpcode::G_CONSTANT: {
     MachineOperand &SrcMO = MI.getOperand(1);
     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
     const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+    Observer.changingInstr(MI);
     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
 
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
   }
   case TargetOpcode::G_FCONSTANT: {
@@ -755,28 +879,38 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     default:
       llvm_unreachable("Unhandled fp widen type");
     }
+    Observer.changingInstr(MI);
     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
 
     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+  case TargetOpcode::G_IMPLICIT_DEF: {
+    Observer.changingInstr(MI);
+    widenScalarDst(MI, WideTy);
+    Observer.changedInstr(MI);
     return Legalized;
   }
   case TargetOpcode::G_BRCOND:
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_FCMP:
+    Observer.changingInstr(MI);
     if (TypeIdx == 0)
       widenScalarDst(MI, WideTy);
     else {
       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
     }
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_ICMP:
+    Observer.changingInstr(MI);
     if (TypeIdx == 0)
       widenScalarDst(MI, WideTy);
     else {
@@ -787,18 +921,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
     }
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_GEP:
     assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+    Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_PHI: {
     assert(TypeIdx == 0 && "Expecting only Idx 0");
 
+    Observer.changingInstr(MI);
     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
@@ -808,9 +944,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     MachineBasicBlock &MBB = *MI.getParent();
     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
     widenScalarDst(MI, WideTy);
-    MIRBuilder.recordInsertion(&MI);
+    Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+    if (TypeIdx != 2)
+      return UnableToLegalize;
+    Observer.changingInstr(MI);
+    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+    Observer.changedInstr(MI);
+    return Legalized;
+
+  case TargetOpcode::G_FCEIL:
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+    Observer.changingInstr(MI);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    Observer.changedInstr(MI);
+    return Legalized;
   }
 }
 
@@ -984,6 +1136,30 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
 
     return UnableToLegalize;
   }
+  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+  case TargetOpcode::G_CTLZ:
+  case TargetOpcode::G_CTTZ:
+  case TargetOpcode::G_CTPOP:
+    return lowerBitCount(MI, TypeIdx, Ty);
+  case G_UADDE: {
+    unsigned Res = MI.getOperand(0).getReg();
+    unsigned CarryOut = MI.getOperand(1).getReg();
+    unsigned LHS = MI.getOperand(2).getReg();
+    unsigned RHS = MI.getOperand(3).getReg();
+    unsigned CarryIn = MI.getOperand(4).getReg();
+
+    unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
+    unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
+
+    MIRBuilder.buildAdd(TmpRes, LHS, RHS);
+    MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
+    MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
+    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
   }
 }
 
@@ -993,10 +1169,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   // FIXME: Don't know how to handle secondary types yet.
   if (TypeIdx != 0)
     return UnableToLegalize;
+
+  MIRBuilder.setInstr(MI);
   switch (MI.getOpcode()) {
   default:
     return UnableToLegalize;
-  case TargetOpcode::G_ADD: {
+  case TargetOpcode::G_IMPLICIT_DEF: {
+    SmallVector<unsigned, 2> DstRegs;
+
     unsigned NarrowSize = NarrowTy.getSizeInBits();
     unsigned DstReg = MI.getOperand(0).getReg();
     unsigned Size = MRI.getType(DstReg).getSizeInBits();
@@ -1006,7 +1186,29 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
     if (Size % NarrowSize != 0)
       return UnableToLegalize;
 
-    MIRBuilder.setInstr(MI);
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+      MIRBuilder.buildUndef(TmpReg);
+      DstRegs.push_back(TmpReg);
+    }
+
+    if (NarrowTy.isVector())
+      MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+    else
+      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_ADD: {
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    unsigned DstReg = MI.getOperand(0).getReg();
+    unsigned Size = MRI.getType(DstReg).getSizeInBits();
+    int NumParts = Size / NarrowSize;
+    // FIXME: Don't know how to handle the situation where the small vectors
+    // aren't all the same size yet.
+    if (Size % NarrowSize != 0)
+      return UnableToLegalize;
 
     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
@@ -1018,9 +1220,164 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
       DstRegs.push_back(DstReg);
     }
 
-    MIRBuilder.buildMerge(DstReg, DstRegs);
+    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_STORE: {
+    bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+    unsigned ValReg = MI.getOperand(0).getReg();
+    unsigned AddrReg = MI.getOperand(1).getReg();
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    unsigned Size = MRI.getType(ValReg).getSizeInBits();
+    unsigned NumParts = Size / NarrowSize;
+
+    SmallVector<unsigned, 8> NarrowRegs;
+    if (!IsLoad)
+      extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
+
+    const LLT OffsetTy =
+        LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+    MachineFunction &MF = *MI.getMF();
+    MachineMemOperand *MMO = *MI.memoperands_begin();
+    for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
+      unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
+      unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
+      unsigned NewAddrReg = 0;
+      MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
+      MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
+          MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
+          NarrowTy.getSizeInBits() / 8, Alignment);
+      if (IsLoad) {
+        unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+        NarrowRegs.push_back(Dst);
+        MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
+      } else {
+        MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
+      }
+    }
+    if (IsLoad) {
+      if (NarrowTy.isVector())
+        MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
+      else
+        MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
+    }
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  unsigned Opc = MI.getOpcode();
+  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+  auto isSupported = [this](const LegalityQuery &Q) {
+    auto QAction = LI.getAction(Q).Action;
+    return QAction == Legal || QAction == Libcall || QAction == Custom;
+  };
+  switch (Opc) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
+    // This trivially expands to CTLZ.
+    Observer.changingInstr(MI);
+    MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+  case TargetOpcode::G_CTLZ: {
+    unsigned SrcReg = MI.getOperand(1).getReg();
+    unsigned Len = Ty.getSizeInBits();
+    if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
+      // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
+      auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
+                                             {Ty}, {SrcReg});
+      auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
+      auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
+      auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+                                          SrcReg, MIBZero);
+      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
+                             MIBCtlzZU);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+    // for now, we do this:
+    // NewLen = NextPowerOf2(Len);
+    // x = x | (x >> 1);
+    // x = x | (x >> 2);
+    // ...
+    // x = x | (x >>16);
+    // x = x | (x >>32); // for 64-bit input
+    // Upto NewLen/2
+    // return Len - popcount(x);
+    //
+    // Ref: "Hacker's Delight" by Henry Warren
+    unsigned Op = SrcReg;
+    unsigned NewLen = PowerOf2Ceil(Len);
+    for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
+      auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
+      auto MIBOp = MIRBuilder.buildInstr(
+          TargetOpcode::G_OR, {Ty},
+          {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
+                                     {Op, MIBShiftAmt})});
+      Op = MIBOp->getOperand(0).getReg();
+    }
+    auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
+    MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
+                          {MIRBuilder.buildConstant(Ty, Len), MIBPop});
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
+    // This trivially expands to CTTZ.
+    Observer.changingInstr(MI);
+    MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+  case TargetOpcode::G_CTTZ: {
+    unsigned SrcReg = MI.getOperand(1).getReg();
+    unsigned Len = Ty.getSizeInBits();
+    if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
+      // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
+      // zero.
+      auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
+                                             {Ty}, {SrcReg});
+      auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
+      auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
+      auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
+                                          SrcReg, MIBZero);
+      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
+                             MIBCttzZU);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+    // for now, we use: { return popcount(~x & (x - 1)); }
+    // unless the target has ctlz but not ctpop, in which case we use:
+    // { return 32 - nlz(~x & (x-1)); }
+    // Ref: "Hacker's Delight" by Henry Warren
+    auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
+    auto MIBNot =
+        MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
+    auto MIBTmp = MIRBuilder.buildInstr(
+        TargetOpcode::G_AND, {Ty},
+        {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
+                                       {SrcReg, MIBCstNeg1})});
+    if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
+        isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
+      auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
+      MIRBuilder.buildInstr(
+          TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
+          {MIBCstLen,
+           MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
+      MI.eraseFromParent();
+      return Legalized;
+    }
+    MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
+    MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
+    return Legalized;
+  }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index ae061b64a38c..fa36ede5b976 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -19,6 +19,7 @@
 
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,7 +52,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
 
   OS << Opcode << ", MMOs={";
   for (const auto &MMODescr : MMODescrs) {
-    OS << MMODescr.Size << ", ";
+    OS << MMODescr.SizeInBits << ", ";
   }
   OS << "}";
 
@@ -219,7 +220,7 @@ void LegalizerInfo::computeTables() {
             Opcode, TypeIdx, ElementSize,
             moreToWiderTypesAndLessToWidest(NumElementsActions));
       }
-      llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
+      llvm::sort(ElementSizesSeen);
       SizeChangeStrategy VectorElementSizeChangeStrategy =
           &unsupportedForDifferentSizes;
       if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
@@ -298,8 +299,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
     std::initializer_list<unsigned> Opcodes) {
   unsigned Representative = *Opcodes.begin();
 
-  assert(Opcodes.begin() != Opcodes.end() &&
-         Opcodes.begin() + 1 != Opcodes.end() &&
+  assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
          "Initializer list must have at least two opcodes");
 
   for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
@@ -376,7 +376,8 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
 }
 
 bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
-                                   MachineIRBuilder &MIRBuilder) const {
+                                   MachineIRBuilder &MIRBuilder,
+                                   GISelChangeObserver &Observer) const {
   return false;
 }
 
@@ -584,7 +585,7 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
     for (const MachineBasicBlock &MBB : MF)
       for (const MachineInstr &MI : MBB)
         if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
-	  return &MI;
+          return &MI;
   }
   return nullptr;
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 3271b54aa830..1f5611061994 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -10,6 +10,7 @@
 /// This file implements the MachineIRBuidler class.
 //===----------------------------------------------------------------------===//
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -22,73 +23,72 @@
 
 using namespace llvm;
 
-void MachineIRBuilderBase::setMF(MachineFunction &MF) {
+void MachineIRBuilder::setMF(MachineFunction &MF) {
   State.MF = &MF;
   State.MBB = nullptr;
   State.MRI = &MF.getRegInfo();
   State.TII = MF.getSubtarget().getInstrInfo();
   State.DL = DebugLoc();
   State.II = MachineBasicBlock::iterator();
-  State.InsertedInstr = nullptr;
+  State.Observer = nullptr;
 }
 
-void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) {
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
   State.MBB = &MBB;
   State.II = MBB.end();
   assert(&getMF() == MBB.getParent() &&
          "Basic block is in a different function");
 }
 
-void MachineIRBuilderBase::setInstr(MachineInstr &MI) {
+void MachineIRBuilder::setInstr(MachineInstr &MI) {
   assert(MI.getParent() && "Instruction is not part of a basic block");
   setMBB(*MI.getParent());
   State.II = MI.getIterator();
 }
 
-void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator II) {
+void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
+
+void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator II) {
   assert(MBB.getParent() == &getMF() &&
          "Basic block is in a different function");
   State.MBB = &MBB;
   State.II = II;
 }
 
-void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const {
-  if (State.InsertedInstr)
-    State.InsertedInstr(InsertedInstr);
+void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const {
+  if (State.Observer)
+    State.Observer->createdInstr(*InsertedInstr);
 }
 
-void MachineIRBuilderBase::recordInsertions(
-    std::function<void(MachineInstr *)> Inserted) {
-  State.InsertedInstr = std::move(Inserted);
+void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) {
+  State.Observer = &Observer;
 }
 
-void MachineIRBuilderBase::stopRecordingInsertions() {
-  State.InsertedInstr = nullptr;
-}
+void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; }
 
 //------------------------------------------------------------------------------
 // Build instruction variants.
 //------------------------------------------------------------------------------
 
-MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
   return insertInstr(buildInstrNoInsert(Opcode));
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
   MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
   return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) {
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
   getMBB().insert(getInsertPt(), MIB);
   recordInsertion(MIB);
   return MIB;
 }
 
 MachineInstrBuilder
-MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
-                                          const MDNode *Expr) {
+MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+                                      const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(
@@ -99,8 +99,9 @@ MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
                              /*IsIndirect*/ false, Reg, Variable, Expr));
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
-    unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+                                        const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(
@@ -111,9 +112,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
                              /*IsIndirect*/ true, Reg, Variable, Expr));
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
-                                      const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+                                                      const MDNode *Variable,
+                                                      const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(
@@ -126,8 +127,9 @@ MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
       .addMetadata(Expr);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
-    const Constant &C, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+                                                         const MDNode *Variable,
+                                                         const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(
@@ -149,16 +151,24 @@ MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
   return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res,
-                                                          int Idx) {
+MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
+  assert(isa<DILabel>(Label) && "not a label");
+  assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(State.DL) &&
+         "Expected inlined-at fields to agree");
+  auto MIB = buildInstr(TargetOpcode::DBG_LABEL);
+
+  return MIB.addMetadata(Label);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
   assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
   return buildInstr(TargetOpcode::G_FRAME_INDEX)
       .addDef(Res)
       .addFrameIndex(Idx);
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+                                                       const GlobalValue *GV) {
   assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
   assert(getMRI()->getType(Res).getAddressSpace() ==
              GV->getType()->getAddressSpace() &&
@@ -169,17 +179,14 @@ MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
       .addGlobalAddress(GV);
 }
 
-void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0,
-                                            unsigned Op1) {
-  assert((getMRI()->getType(Res).isScalar() ||
-          getMRI()->getType(Res).isVector()) &&
-         "invalid operand type");
-  assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
-         getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
+void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
+                                        const LLT &Op1) {
+  assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+  assert((Res == Op0 && Res == Op1) && "type mismatch");
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
-                                                   unsigned Op1) {
+MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
   assert(getMRI()->getType(Res).isPointer() &&
          getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
   assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
@@ -191,8 +198,8 @@ MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
 }
 
 Optional<MachineInstrBuilder>
-MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
-                                     const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+                                 const LLT &ValueTy, uint64_t Value) {
   assert(Res == 0 && "Res is a result argument");
   assert(ValueTy.isScalar()  && "invalid offset type");
 
@@ -208,9 +215,8 @@ MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
   return buildGEP(Res, Op0, TmpReg);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
-                                                       unsigned Op0,
-                                                       uint32_t NumBits) {
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+                                                   uint32_t NumBits) {
   assert(getMRI()->getType(Res).isPointer() &&
          getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
 
@@ -220,24 +226,23 @@ MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
       .addImm(NumBits);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
   return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) {
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
   assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
   return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) {
-  assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() ||
-         getMRI()->getType(Res) == getMRI()->getType(Op));
-  return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
+                                                const SrcOp &Op) {
+  return buildInstr(TargetOpcode::COPY, Res, Op);
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
-  LLT Ty = getMRI()->getType(Res);
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+                                                    const ConstantInt &Val) {
+  LLT Ty = Res.getLLTTy(*getMRI());
 
   assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
 
@@ -246,48 +251,55 @@ MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
     NewVal = ConstantInt::get(getMF().getFunction().getContext(),
                               Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
 
-  return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
+  auto MIB = buildInstr(TargetOpcode::G_CONSTANT);
+  Res.addDefToMIB(*getMRI(), MIB);
+  MIB.addCImm(NewVal);
+  return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res,
-                                                        int64_t Val) {
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+                                                    int64_t Val) {
   auto IntN = IntegerType::get(getMF().getFunction().getContext(),
-                               getMRI()->getType(Res).getSizeInBits());
+                               Res.getLLTTy(*getMRI()).getSizeInBits());
   ConstantInt *CI = ConstantInt::get(IntN, Val, true);
   return buildConstant(Res, *CI);
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) {
-  assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+                                                     const ConstantFP &Val) {
+  assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
 
-  return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
+  auto MIB = buildInstr(TargetOpcode::G_FCONSTANT);
+  Res.addDefToMIB(*getMRI(), MIB);
+  MIB.addFPImm(&Val);
+  return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res,
-                                                         double Val) {
-  LLT DstTy = getMRI()->getType(Res);
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+                                                     double Val) {
+  LLT DstTy = Res.getLLTTy(*getMRI());
   auto &Ctx = getMF().getFunction().getContext();
   auto *CFP =
       ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
   return buildFConstant(Res, *CFP);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst,
-                                                      MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+                                                  MachineBasicBlock &Dest) {
   assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
 
   return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr,
-                                                    MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+                                                MachineMemOperand &MMO) {
   return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
-                                     unsigned Addr, MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
+                                                     unsigned Res,
+                                                     unsigned Addr,
+                                                     MachineMemOperand &MMO) {
   assert(getMRI()->getType(Res).isValid() && "invalid operand type");
   assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
 
@@ -297,9 +309,8 @@ MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
       .addMemOperand(&MMO);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
-                                                     unsigned Addr,
-                                                     MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+                                                 MachineMemOperand &MMO) {
   assert(getMRI()->getType(Val).isValid() && "invalid operand type");
   assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
 
@@ -309,83 +320,73 @@ MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
       .addMemOperand(&MMO);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res,
-                                                     unsigned CarryOut,
-                                                     unsigned Op0, unsigned Op1,
-                                                     unsigned CarryIn) {
-  assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
-  assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
-         getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
-  assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type");
-  assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) &&
-         "type mismatch");
-
-  return buildInstr(TargetOpcode::G_UADDE)
-      .addDef(Res)
-      .addDef(CarryOut)
-      .addUse(Op0)
-      .addUse(Op1)
-      .addUse(CarryIn);
+MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
+                                                 const DstOp &CarryOut,
+                                                 const SrcOp &Op0,
+                                                 const SrcOp &Op1,
+                                                 const SrcOp &CarryIn) {
+  return buildInstr(TargetOpcode::G_UADDE, {Res, CarryOut},
+                    {Op0, Op1, CarryIn});
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res,
-                                                      unsigned Op) {
-  validateTruncExt(Res, Op, true);
-  return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
+                                                  const SrcOp &Op) {
+  return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) {
-  validateTruncExt(Res, Op, true);
-  return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildSExt(const DstOp &Res,
+                                                const SrcOp &Op) {
+  return buildInstr(TargetOpcode::G_SEXT, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) {
-  validateTruncExt(Res, Op, true);
-  return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
+                                                const SrcOp &Op) {
+  return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc,
-                                                          unsigned Res,
-                                                          unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
+                                                      const DstOp &Res,
+                                                      const SrcOp &Op) {
   assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
           TargetOpcode::G_SEXT == ExtOpc) &&
          "Expecting Extending Opc");
-  assert(getMRI()->getType(Res).isScalar() ||
-         getMRI()->getType(Res).isVector());
-  assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar());
+  assert(Res.getLLTTy(*getMRI()).isScalar() ||
+         Res.getLLTTy(*getMRI()).isVector());
+  assert(Res.getLLTTy(*getMRI()).isScalar() ==
+         Op.getLLTTy(*getMRI()).isScalar());
 
   unsigned Opcode = TargetOpcode::COPY;
-  if (getMRI()->getType(Res).getSizeInBits() >
-      getMRI()->getType(Op).getSizeInBits())
+  if (Res.getLLTTy(*getMRI()).getSizeInBits() >
+      Op.getLLTTy(*getMRI()).getSizeInBits())
     Opcode = ExtOpc;
-  else if (getMRI()->getType(Res).getSizeInBits() <
-           getMRI()->getType(Op).getSizeInBits())
+  else if (Res.getLLTTy(*getMRI()).getSizeInBits() <
+           Op.getLLTTy(*getMRI()).getSizeInBits())
     Opcode = TargetOpcode::G_TRUNC;
   else
-    assert(getMRI()->getType(Res) == getMRI()->getType(Op));
+    assert(Res.getLLTTy(*getMRI()) == Op.getLLTTy(*getMRI()));
 
-  return buildInstr(Opcode).addDef(Res).addUse(Op);
+  return buildInstr(Opcode, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res,
-                                                           unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(const DstOp &Res,
+                                                       const SrcOp &Op) {
   return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res,
-                                                           unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(const DstOp &Res,
+                                                       const SrcOp &Op) {
   return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res,
-                                                             unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(const DstOp &Res,
+                                                         const SrcOp &Op) {
   return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
-                                                    unsigned Src) {
-  LLT SrcTy = getMRI()->getType(Src);
-  LLT DstTy = getMRI()->getType(Dst);
+MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
+                                                const SrcOp &Src) {
+  LLT SrcTy = Src.getLLTTy(*getMRI());
+  LLT DstTy = Dst.getLLTTy(*getMRI());
   if (SrcTy == DstTy)
     return buildCopy(Dst, Src);
 
@@ -399,11 +400,11 @@ MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
     Opcode = TargetOpcode::G_BITCAST;
   }
 
-  return buildInstr(Opcode).addDef(Dst).addUse(Src);
+  return buildInstr(Opcode, Dst, Src);
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
+MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+                                                   uint64_t Index) {
 #ifndef NDEBUG
   assert(getMRI()->getType(Src).isValid() && "invalid operand type");
   assert(getMRI()->getType(Res).isValid() && "invalid operand type");
@@ -424,8 +425,8 @@ MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
       .addImm(Index);
 }
 
-void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
-                                         ArrayRef<uint64_t> Indices) {
+void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+                                     ArrayRef<uint64_t> Indices) {
 #ifndef NDEBUG
   assert(Ops.size() == Indices.size() && "incompatible args");
   assert(!Ops.empty() && "invalid trivial sequence");
@@ -465,56 +466,67 @@ void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
   }
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) {
-  return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
+MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
+  return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res,
-                                                     ArrayRef<unsigned> Ops) {
-
-#ifndef NDEBUG
-  assert(!Ops.empty() && "invalid trivial sequence");
-  LLT Ty = getMRI()->getType(Ops[0]);
-  for (auto Reg : Ops)
-    assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
-  assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() ==
-             getMRI()->getType(Res).getSizeInBits() &&
-         "input operands do not cover output register");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
+                                                 ArrayRef<unsigned> Ops) {
+  // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
+  // we need some temporary storage for the DstOp objects. Here we use a
+  // sufficiently large SmallVector to not go through the heap.
+  SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+  return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
+}
 
-  if (Ops.size() == 1)
-    return buildCast(Res, Ops[0]);
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
+                                                   const SrcOp &Op) {
+  // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<DstOp>,
+  // we need some temporary storage for the DstOp objects. Here we use a
+  // sufficiently large SmallVector to not go through the heap.
+  SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+  return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
+}
 
-  MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
-  MIB.addDef(Res);
-  for (unsigned i = 0; i < Ops.size(); ++i)
-    MIB.addUse(Ops[i]);
-  return MIB;
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+                                                   const SrcOp &Op) {
+  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>,
+  // we need some temporary storage for the DstOp objects. Here we use a
+  // sufficiently large SmallVector to not go through the heap.
+  SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+  return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res,
-                                                       unsigned Op) {
+MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
+                                                       ArrayRef<unsigned> Ops) {
+  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+  // we need some temporary storage for the DstOp objects. Here we use a
+  // sufficiently large SmallVector to not go through the heap.
+  SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+  return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
 
-#ifndef NDEBUG
-  assert(!Res.empty() && "invalid trivial sequence");
-  LLT Ty = getMRI()->getType(Res[0]);
-  for (auto Reg : Res)
-    assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
-  assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() ==
-             getMRI()->getType(Op).getSizeInBits() &&
-         "input operands do not cover output register");
-#endif
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
+                                        ArrayRef<unsigned> Ops) {
+  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+  // we need some temporary storage for the DstOp objects. Here we use a
+  // sufficiently large SmallVector to not go through the heap.
+  SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+  return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
+}
 
-  MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES);
-  for (unsigned i = 0; i < Res.size(); ++i)
-    MIB.addDef(Res[i]);
-  MIB.addUse(Op);
-  return MIB;
+MachineInstrBuilder
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) {
+  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+  // we need some temporary storage for the DstOp objects. Here we use a
+  // sufficiently large SmallVector to not go through the heap.
+  SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+  return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
-                                                      unsigned Src, unsigned Op,
-                                                      unsigned Index) {
+MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
+                                                  unsigned Op, unsigned Index) {
   assert(Index + getMRI()->getType(Op).getSizeInBits() <=
              getMRI()->getType(Res).getSizeInBits() &&
          "insertion past the end of a register");
@@ -531,9 +543,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
       .addImm(Index);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
-                                                         unsigned Res,
-                                                         bool HasSideEffects) {
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+                                                     unsigned Res,
+                                                     bool HasSideEffects) {
   auto MIB =
       buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
                                 : TargetOpcode::G_INTRINSIC);
@@ -543,133 +555,52 @@ MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
   return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res,
-                                                     unsigned Op) {
-  validateTruncExt(Res, Op, false);
-  return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
+                                                 const SrcOp &Op) {
+  return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res,
-                                                       unsigned Op) {
-  validateTruncExt(Res, Op, false);
-  return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
+                                                   const SrcOp &Op) {
+  return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred,
-                                                    unsigned Res, unsigned Op0,
-                                                    unsigned Op1) {
-#ifndef NDEBUG
-  assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch");
-  assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
-  if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer())
-    assert(getMRI()->getType(Res).isScalar() && "type mismatch");
-  else
-    assert(getMRI()->getType(Res).isVector() &&
-           getMRI()->getType(Res).getNumElements() ==
-               getMRI()->getType(Op0).getNumElements() &&
-           "type mismatch");
-#endif
-
-  return buildInstr(TargetOpcode::G_ICMP)
-      .addDef(Res)
-      .addPredicate(Pred)
-      .addUse(Op0)
-      .addUse(Op1);
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+                                                const DstOp &Res,
+                                                const SrcOp &Op0,
+                                                const SrcOp &Op1) {
+  return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1});
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred,
-                                                    unsigned Res, unsigned Op0,
-                                                    unsigned Op1) {
-#ifndef NDEBUG
-  assert((getMRI()->getType(Op0).isScalar() ||
-          getMRI()->getType(Op0).isVector()) &&
-         "invalid operand type");
-  assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch");
-  assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
-  if (getMRI()->getType(Op0).isScalar())
-    assert(getMRI()->getType(Res).isScalar() && "type mismatch");
-  else
-    assert(getMRI()->getType(Res).isVector() &&
-           getMRI()->getType(Res).getNumElements() ==
-               getMRI()->getType(Op0).getNumElements() &&
-           "type mismatch");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+                                                const DstOp &Res,
+                                                const SrcOp &Op0,
+                                                const SrcOp &Op1) {
 
-  return buildInstr(TargetOpcode::G_FCMP)
-      .addDef(Res)
-      .addPredicate(Pred)
-      .addUse(Op0)
-      .addUse(Op1);
+  return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res,
-                                                      unsigned Tst,
-                                                      unsigned Op0,
-                                                      unsigned Op1) {
-#ifndef NDEBUG
-  LLT ResTy = getMRI()->getType(Res);
-  assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
-         "invalid operand type");
-  assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) &&
-         "type mismatch");
-  if (ResTy.isScalar() || ResTy.isPointer())
-    assert(getMRI()->getType(Tst).isScalar() && "type mismatch");
-  else
-    assert((getMRI()->getType(Tst).isScalar() ||
-            (getMRI()->getType(Tst).isVector() &&
-             getMRI()->getType(Tst).getNumElements() ==
-                 getMRI()->getType(Op0).getNumElements())) &&
-           "type mismatch");
-#endif
+MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
+                                                  const SrcOp &Tst,
+                                                  const SrcOp &Op0,
+                                                  const SrcOp &Op1) {
 
-  return buildInstr(TargetOpcode::G_SELECT)
-      .addDef(Res)
-      .addUse(Tst)
-      .addUse(Op0)
-      .addUse(Op1);
+  return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
 }
 
 MachineInstrBuilder
-MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val,
-                                               unsigned Elt, unsigned Idx) {
-#ifndef NDEBUG
-  LLT ResTy = getMRI()->getType(Res);
-  LLT ValTy = getMRI()->getType(Val);
-  LLT EltTy = getMRI()->getType(Elt);
-  LLT IdxTy = getMRI()->getType(Idx);
-  assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
-  assert(IdxTy.isScalar() && "invalid operand type");
-  assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
-  assert(ResTy.getElementType() == EltTy && "type mismatch");
-#endif
-
-  return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT)
-      .addDef(Res)
-      .addUse(Val)
-      .addUse(Elt)
-      .addUse(Idx);
+MachineIRBuilder::buildInsertVectorElement(const DstOp &Res, const SrcOp &Val,
+                                           const SrcOp &Elt, const SrcOp &Idx) {
+  return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT, Res, {Val, Elt, Idx});
 }
 
 MachineInstrBuilder
-MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val,
-                                                unsigned Idx) {
-#ifndef NDEBUG
-  LLT ResTy = getMRI()->getType(Res);
-  LLT ValTy = getMRI()->getType(Val);
-  LLT IdxTy = getMRI()->getType(Idx);
-  assert(ValTy.isVector() && "invalid operand type");
-  assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
-  assert(IdxTy.isScalar() && "invalid operand type");
-  assert(ValTy.getElementType() == ResTy && "type mismatch");
-#endif
-
-  return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT)
-      .addDef(Res)
-      .addUse(Val)
-      .addUse(Idx);
+MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
+                                            const SrcOp &Idx) {
+  return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT, Res, {Val, Idx});
 }
 
-MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
+MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
     unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
     unsigned NewVal, MachineMemOperand &MMO) {
 #ifndef NDEBUG
@@ -697,9 +628,9 @@ MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
 }
 
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
-                                         unsigned CmpVal, unsigned NewVal,
-                                         MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+                                     unsigned CmpVal, unsigned NewVal,
+                                     MachineMemOperand &MMO) {
 #ifndef NDEBUG
   LLT OldValResTy = getMRI()->getType(OldValRes);
   LLT AddrTy = getMRI()->getType(Addr);
@@ -721,10 +652,11 @@ MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
       .addMemOperand(&MMO);
 }
 
-MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
-                                     unsigned Addr, unsigned Val,
-                                     MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
+                                                     unsigned OldValRes,
+                                                     unsigned Addr,
+                                                     unsigned Val,
+                                                     MachineMemOperand &MMO) {
 #ifndef NDEBUG
   LLT OldValResTy = getMRI()->getType(OldValRes);
   LLT AddrTy = getMRI()->getType(Addr);
@@ -743,74 +675,75 @@ MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
 }
 
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
+                                     unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
+                                    unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
+                                    unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
+                                    unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
+                                     unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
                         MMO);
 }
-MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
-                                       unsigned Val, MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes,
+                                                       unsigned Addr,
+                                                       unsigned Val,
+                                                       MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
+                                    unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
+                                    unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
+                                    unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
+                                     unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
+                                     unsigned Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
                         MMO);
 }
 
 MachineInstrBuilder
-MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
 #ifndef NDEBUG
   assert(getMRI()->getType(Res).isPointer() && "invalid res type");
 #endif
@@ -818,12 +751,9 @@ MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
   return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA);
 }
 
-void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
-                                            bool IsExtend) {
+void MachineIRBuilder::validateTruncExt(const LLT &DstTy, const LLT &SrcTy,
+                                        bool IsExtend) {
 #ifndef NDEBUG
-  LLT SrcTy = getMRI()->getType(Src);
-  LLT DstTy = getMRI()->getType(Dst);
-
   if (DstTy.isVector()) {
     assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
     assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
@@ -839,3 +769,236 @@ void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
            "invalid widening trunc");
 #endif
 }
+
+void MachineIRBuilder::validateSelectOp(const LLT &ResTy, const LLT &TstTy,
+                                        const LLT &Op0Ty, const LLT &Op1Ty) {
+#ifndef NDEBUG
+  assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+         "invalid operand type");
+  assert((ResTy == Op0Ty && ResTy == Op1Ty) && "type mismatch");
+  if (ResTy.isScalar() || ResTy.isPointer())
+    assert(TstTy.isScalar() && "type mismatch");
+  else
+    assert((TstTy.isScalar() ||
+            (TstTy.isVector() &&
+             TstTy.getNumElements() == Op0Ty.getNumElements())) &&
+           "type mismatch");
+#endif
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
+                                                 ArrayRef<DstOp> DstOps,
+                                                 ArrayRef<SrcOp> SrcOps,
+                                                 Optional<unsigned> Flags) {
+  switch (Opc) {
+  default:
+    break;
+  case TargetOpcode::G_SELECT: {
+    assert(DstOps.size() == 1 && "Invalid select");
+    assert(SrcOps.size() == 3 && "Invalid select");
+    validateSelectOp(
+        DstOps[0].getLLTTy(*getMRI()), SrcOps[0].getLLTTy(*getMRI()),
+        SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
+    break;
+  }
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_SUB:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_UDIV:
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UREM:
+  case TargetOpcode::G_SREM: {
+    // All these are binary ops.
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(SrcOps.size() == 2 && "Invalid Srcs");
+    validateBinaryOp(DstOps[0].getLLTTy(*getMRI()),
+                     SrcOps[0].getLLTTy(*getMRI()),
+                     SrcOps[1].getLLTTy(*getMRI()));
+    break;
+  case TargetOpcode::G_SEXT:
+  case TargetOpcode::G_ZEXT:
+  case TargetOpcode::G_ANYEXT:
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(SrcOps.size() == 1 && "Invalid Srcs");
+    validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+                     SrcOps[0].getLLTTy(*getMRI()), true);
+    break;
+  case TargetOpcode::G_TRUNC:
+  case TargetOpcode::G_FPTRUNC:
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(SrcOps.size() == 1 && "Invalid Srcs");
+    validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
+                     SrcOps[0].getLLTTy(*getMRI()), false);
+    break;
+  }
+  case TargetOpcode::COPY:
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(SrcOps.size() == 1 && "Invalid Srcs");
+    assert(DstOps[0].getLLTTy(*getMRI()) == LLT() ||
+           SrcOps[0].getLLTTy(*getMRI()) == LLT() ||
+           DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI()));
+    break;
+  case TargetOpcode::G_FCMP:
+  case TargetOpcode::G_ICMP: {
+    assert(DstOps.size() == 1 && "Invalid Dst Operands");
+    assert(SrcOps.size() == 3 && "Invalid Src Operands");
+    // For F/ICMP, the first src operand is the predicate, followed by
+    // the two comparands.
+    assert(SrcOps[0].getSrcOpKind() == SrcOp::SrcType::Ty_Predicate &&
+           "Expecting predicate");
+    assert([&]() -> bool {
+      CmpInst::Predicate Pred = SrcOps[0].getPredicate();
+      return Opc == TargetOpcode::G_ICMP ? CmpInst::isIntPredicate(Pred)
+                                         : CmpInst::isFPPredicate(Pred);
+    }() && "Invalid predicate");
+    assert(SrcOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+           "Type mismatch");
+    assert([&]() -> bool {
+      LLT Op0Ty = SrcOps[1].getLLTTy(*getMRI());
+      LLT DstTy = DstOps[0].getLLTTy(*getMRI());
+      if (Op0Ty.isScalar() || Op0Ty.isPointer())
+        return DstTy.isScalar();
+      else
+        return DstTy.isVector() &&
+               DstTy.getNumElements() == Op0Ty.getNumElements();
+    }() && "Type Mismatch");
+    break;
+  }
+  case TargetOpcode::G_UNMERGE_VALUES: {
+    assert(!DstOps.empty() && "Invalid trivial sequence");
+    assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
+    assert(std::all_of(DstOps.begin(), DstOps.end(),
+                       [&, this](const DstOp &Op) {
+                         return Op.getLLTTy(*getMRI()) ==
+                                DstOps[0].getLLTTy(*getMRI());
+                       }) &&
+           "type mismatch in output list");
+    assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+               SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+           "input operands do not cover output register");
+    break;
+  }
+  case TargetOpcode::G_MERGE_VALUES: {
+    assert(!SrcOps.empty() && "invalid trivial sequence");
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+                       [&, this](const SrcOp &Op) {
+                         return Op.getLLTTy(*getMRI()) ==
+                                SrcOps[0].getLLTTy(*getMRI());
+                       }) &&
+           "type mismatch in input list");
+    assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+               DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+           "input operands do not cover output register");
+    if (SrcOps.size() == 1)
+      return buildCast(DstOps[0], SrcOps[0]);
+    if (DstOps[0].getLLTTy(*getMRI()).isVector())
+      return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+    break;
+  }
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+    assert(DstOps.size() == 1 && "Invalid Dst size");
+    assert(SrcOps.size() == 2 && "Invalid Src size");
+    assert(SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+    assert((DstOps[0].getLLTTy(*getMRI()).isScalar() ||
+            DstOps[0].getLLTTy(*getMRI()).isPointer()) &&
+           "Invalid operand type");
+    assert(SrcOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand type");
+    assert(SrcOps[0].getLLTTy(*getMRI()).getElementType() ==
+               DstOps[0].getLLTTy(*getMRI()) &&
+           "Type mismatch");
+    break;
+  }
+  case TargetOpcode::G_INSERT_VECTOR_ELT: {
+    assert(DstOps.size() == 1 && "Invalid dst size");
+    assert(SrcOps.size() == 3 && "Invalid src size");
+    assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+           SrcOps[0].getLLTTy(*getMRI()).isVector() && "Invalid operand type");
+    assert(DstOps[0].getLLTTy(*getMRI()).getElementType() ==
+               SrcOps[1].getLLTTy(*getMRI()) &&
+           "Type mismatch");
+    assert(SrcOps[2].getLLTTy(*getMRI()).isScalar() && "Invalid index");
+    assert(DstOps[0].getLLTTy(*getMRI()).getNumElements() ==
+               SrcOps[0].getLLTTy(*getMRI()).getNumElements() &&
+           "Type mismatch");
+    break;
+  }
+  case TargetOpcode::G_BUILD_VECTOR: {
+    assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+           "Must have at least 2 operands");
+    assert(DstOps.size() == 1 && "Invalid DstOps");
+    assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+           "Res type must be a vector");
+    assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+                       [&, this](const SrcOp &Op) {
+                         return Op.getLLTTy(*getMRI()) ==
+                                SrcOps[0].getLLTTy(*getMRI());
+                       }) &&
+           "type mismatch in input list");
+    assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+               DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+           "input scalars do not exactly cover the outpur vector register");
+    break;
+  }
+  case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+    assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+           "Must have at least 2 operands");
+    assert(DstOps.size() == 1 && "Invalid DstOps");
+    assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
+           "Res type must be a vector");
+    assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+                       [&, this](const SrcOp &Op) {
+                         return Op.getLLTTy(*getMRI()) ==
+                                SrcOps[0].getLLTTy(*getMRI());
+                       }) &&
+           "type mismatch in input list");
+    if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+        DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
+      return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
+    break;
+  }
+  case TargetOpcode::G_CONCAT_VECTORS: {
+    assert(DstOps.size() == 1 && "Invalid DstOps");
+    assert((!SrcOps.empty() || SrcOps.size() < 2) &&
+           "Must have at least 2 operands");
+    assert(std::all_of(SrcOps.begin(), SrcOps.end(),
+                       [&, this](const SrcOp &Op) {
+                         return (Op.getLLTTy(*getMRI()).isVector() &&
+                                 Op.getLLTTy(*getMRI()) ==
+                                     SrcOps[0].getLLTTy(*getMRI()));
+                       }) &&
+           "type mismatch in input list");
+    assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
+               DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
+           "input vectors do not exactly cover the outpur vector register");
+    break;
+  }
+  case TargetOpcode::G_UADDE: {
+    assert(DstOps.size() == 2 && "Invalid no of dst operands");
+    assert(SrcOps.size() == 3 && "Invalid no of src operands");
+    assert(DstOps[0].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+    assert((DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI())) &&
+           (DstOps[0].getLLTTy(*getMRI()) == SrcOps[1].getLLTTy(*getMRI())) &&
+           "Invalid operand");
+    assert(DstOps[1].getLLTTy(*getMRI()).isScalar() && "Invalid operand");
+    assert(DstOps[1].getLLTTy(*getMRI()) == SrcOps[2].getLLTTy(*getMRI()) &&
+           "type mismatch");
+    break;
+  }
+  }
+
+  auto MIB = buildInstr(Opc);
+  for (const DstOp &Op : DstOps)
+    Op.addDefToMIB(*getMRI(), MIB);
+  for (const SrcOp &Op : SrcOps)
+    Op.addSrcToMIB(MIB);
+  if (Flags)
+    MIB->setFlags(*Flags);
+  return MIB;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 9e2d48d1dc42..dcc8b7cc23c5 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -115,8 +115,8 @@ bool RegBankSelect::assignmentMatch(
   // By default we assume we will have to repair something.
   OnlyAssign = false;
   // Each part of a break down needs to end up in a different register.
-  // In other word, Reg assignement does not match.
-  if (ValMapping.NumBreakDowns > 1)
+  // In other word, Reg assignment does not match.
+  if (ValMapping.NumBreakDowns != 1)
     return false;
 
   const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
@@ -140,7 +140,7 @@ bool RegBankSelect::repairReg(
     return false;
   assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
   // An empty range of new register means no repairing.
-  assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
+  assert(!empty(NewVRegs) && "We should not have to repair");
 
   // Assume we are repairing a use and thus, the original reg will be
   // the source of the repairing.
@@ -528,7 +528,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
 bool RegBankSelect::applyMapping(
     MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
     SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
-  // OpdMapper will hold all the information needed for the rewritting.
+  // OpdMapper will hold all the information needed for the rewriting.
   RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
 
   // First, place the repairing code.
@@ -714,18 +714,23 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
     // - Terminators must be the last instructions:
     //   * Before, move the insert point before the first terminator.
     //   * After, we have to split the outcoming edges.
-    unsigned Reg = MO.getReg();
     if (Before) {
       // Check whether Reg is defined by any terminator.
-      MachineBasicBlock::iterator It = MI;
-      for (auto Begin = MI.getParent()->begin();
-           --It != Begin && It->isTerminator();)
-        if (It->modifiesRegister(Reg, &TRI)) {
-          // Insert the repairing code right after the definition.
-          addInsertPoint(*It, /*Before*/ false);
-          return;
-        }
-      addInsertPoint(*It, /*Before*/ true);
+      MachineBasicBlock::reverse_iterator It = MI;
+      auto REnd = MI.getParent()->rend();
+
+      for (; It != REnd && It->isTerminator(); ++It) {
+        assert(!It->modifiesRegister(MO.getReg(), &TRI) &&
+               "copy insertion in middle of terminators not handled");
+      }
+
+      if (It == REnd) {
+        addInsertPoint(*MI.getParent()->begin(), true);
+        return;
+      }
+
+      // We are sure to be right before the first terminator.
+      addInsertPoint(*It, /*Before*/ false);
       return;
     }
     // Make sure Reg is not redefined by other terminators, otherwise
@@ -733,7 +738,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
     for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
          ++It != End;)
       // The machine verifier should reject this kind of code.
-      assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split");
+      assert(It->modifiesRegister(MO.getReg(), &TRI) &&
+             "Do not know where to split");
     // Split each outcoming edges.
     MachineBasicBlock &Src = *MI.getParent();
     for (auto &Succ : Src.successors())
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index dd15567ef1c1..28404e52d6ea 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -426,7 +426,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
            "This mapping is too complex for this function");
     iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
         OpdMapper.getVRegs(OpIdx);
-    if (NewRegs.begin() == NewRegs.end()) {
+    if (empty(NewRegs)) {
       LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
       continue;
     }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1a5f88743d5f..59cbf93e7cd1 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -137,7 +137,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
   // If we can move an instruction, we can remove it.  Otherwise, it has
   // a side-effect of some sort.
   bool SawStore = false;
-  if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore))
+  if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore) && !MI.isPHI())
     return false;
 
   // Instructions without side-effects are dead iff they only define dead vregs.
@@ -235,6 +235,57 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
   return APF;
 }
 
+Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
+                                        const unsigned Op2,
+                                        const MachineRegisterInfo &MRI) {
+  auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+  auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+  if (MaybeOp1Cst && MaybeOp2Cst) {
+    LLT Ty = MRI.getType(Op1);
+    APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+    APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+    switch (Opcode) {
+    default:
+      break;
+    case TargetOpcode::G_ADD:
+      return C1 + C2;
+    case TargetOpcode::G_AND:
+      return C1 & C2;
+    case TargetOpcode::G_ASHR:
+      return C1.ashr(C2);
+    case TargetOpcode::G_LSHR:
+      return C1.lshr(C2);
+    case TargetOpcode::G_MUL:
+      return C1 * C2;
+    case TargetOpcode::G_OR:
+      return C1 | C2;
+    case TargetOpcode::G_SHL:
+      return C1 << C2;
+    case TargetOpcode::G_SUB:
+      return C1 - C2;
+    case TargetOpcode::G_XOR:
+      return C1 ^ C2;
+    case TargetOpcode::G_UDIV:
+      if (!C2.getBoolValue())
+        break;
+      return C1.udiv(C2);
+    case TargetOpcode::G_SDIV:
+      if (!C2.getBoolValue())
+        break;
+      return C1.sdiv(C2);
+    case TargetOpcode::G_UREM:
+      if (!C2.getBoolValue())
+        break;
+      return C1.urem(C2);
+    case TargetOpcode::G_SREM:
+      if (!C2.getBoolValue())
+        break;
+      return C1.srem(C2);
+    }
+  }
+  return None;
+}
+
 void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
   AU.addPreserved<StackProtector>();
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 9f7f5e392a9a..d3364952f244 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -461,6 +461,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
     unsigned CurIdx = 0;
     for (j = i; j != -1; j = GlobalSet.find_next(j)) {
       Type *Ty = Globals[j]->getValueType();
+
+      // Make sure we use the same alignment AsmPrinter would use.
       unsigned Align = DL.getPreferredAlignment(Globals[j]);
       unsigned Padding = alignTo(MergedSize, Align) - MergedSize;
       MergedSize += Padding;
@@ -516,6 +518,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
         GlobalVariable::NotThreadLocal, AddrSpace);
 
     MergedGV->setAlignment(MaxAlign);
+    MergedGV->setSection(Globals[i]->getSection());
 
     const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
     for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
@@ -599,16 +602,15 @@ bool GlobalMerge::doInitialization(Module &M) {
   IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
 
   auto &DL = M.getDataLayout();
-  DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals,
-                                                        BSSGlobals;
+  DenseMap<std::pair<unsigned, StringRef>, SmallVector<GlobalVariable *, 16>>
+      Globals, ConstGlobals, BSSGlobals;
   bool Changed = false;
   setMustKeepGlobalVariables(M);
 
   // Grab all non-const globals.
   for (auto &GV : M.globals()) {
     // Merge is safe for "normal" internal or external globals only
-    if (GV.isDeclaration() || GV.isThreadLocal() ||
-        GV.hasSection() || GV.hasImplicitSection())
+    if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasImplicitSection())
       continue;
 
     // It's not safe to merge globals that may be preempted
@@ -623,6 +625,7 @@ bool GlobalMerge::doInitialization(Module &M) {
     assert(PT && "Global variable is not a pointer!");
 
     unsigned AddressSpace = PT->getAddressSpace();
+    StringRef Section = GV.getSection();
 
     // Ignore all 'special' globals.
     if (GV.getName().startswith("llvm.") ||
@@ -636,27 +639,27 @@ bool GlobalMerge::doInitialization(Module &M) {
     Type *Ty = GV.getValueType();
     if (DL.getTypeAllocSize(Ty) < MaxOffset) {
       if (TM &&
-          TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
-        BSSGlobals[AddressSpace].push_back(&GV);
+          TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
+        BSSGlobals[{AddressSpace, Section}].push_back(&GV);
       else if (GV.isConstant())
-        ConstGlobals[AddressSpace].push_back(&GV);
+        ConstGlobals[{AddressSpace, Section}].push_back(&GV);
       else
-        Globals[AddressSpace].push_back(&GV);
+        Globals[{AddressSpace, Section}].push_back(&GV);
     }
   }
 
   for (auto &P : Globals)
     if (P.second.size() > 1)
-      Changed |= doMerge(P.second, M, false, P.first);
+      Changed |= doMerge(P.second, M, false, P.first.first);
 
   for (auto &P : BSSGlobals)
     if (P.second.size() > 1)
-      Changed |= doMerge(P.second, M, false, P.first);
+      Changed |= doMerge(P.second, M, false, P.first.first);
 
   if (EnableGlobalMergeOnConst)
     for (auto &P : ConstGlobals)
       if (P.second.size() > 1)
-        Changed |= doMerge(P.second, M, true, P.first);
+        Changed |= doMerge(P.second, M, true, P.first.first);
 
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index f12d00071b24..ceeba639ee09 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -273,7 +273,7 @@ namespace {
     void PredicateBlock(BBInfo &BBI,
                         MachineBasicBlock::iterator E,
                         SmallVectorImpl<MachineOperand> &Cond,
-                        SmallSet<unsigned, 4> *LaterRedefs = nullptr);
+                        SmallSet<MCPhysReg, 4> *LaterRedefs = nullptr);
     void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
                                SmallVectorImpl<MachineOperand> &Cond,
                                bool IgnoreBr = false);
@@ -1366,12 +1366,12 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
   // Before stepping forward past MI, remember which regs were live
   // before MI. This is needed to set the Undef flag only when reg is
   // dead.
-  SparseSet<unsigned> LiveBeforeMI;
+  SparseSet<MCPhysReg, identity<MCPhysReg>> LiveBeforeMI;
   LiveBeforeMI.setUniverse(TRI->getNumRegs());
   for (unsigned Reg : Redefs)
     LiveBeforeMI.insert(Reg);
 
-  SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
+  SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Clobbers;
   Redefs.stepForward(MI, Clobbers);
 
   // Now add the implicit uses for each of the clobbered values.
@@ -1444,7 +1444,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   Redefs.init(*TRI);
 
   if (MRI->tracksLiveness()) {
-    // Initialize liveins to the first BB. These are potentiall redefined by
+    // Initialize liveins to the first BB. These are potentially redefined by
     // predicated instructions.
     Redefs.addLiveIns(CvtMBB);
     Redefs.addLiveIns(NextMBB);
@@ -1740,7 +1740,7 @@ bool IfConverter::IfConvertDiamondCommon(
 
   if (MRI->tracksLiveness()) {
     for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
-      SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+      SmallVector<std::pair<MCPhysReg, const MachineOperand*>, 4> Dummy;
       Redefs.stepForward(MI, Dummy);
     }
   }
@@ -1806,13 +1806,13 @@ bool IfConverter::IfConvertDiamondCommon(
   // generate:
   //   sub    r0, r1, #1
   //   addne  r0, r1, #1
-  SmallSet<unsigned, 4> RedefsByFalse;
-  SmallSet<unsigned, 4> ExtUses;
+  SmallSet<MCPhysReg, 4> RedefsByFalse;
+  SmallSet<MCPhysReg, 4> ExtUses;
   if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
     for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
       if (FI.isDebugInstr())
         continue;
-      SmallVector<unsigned, 4> Defs;
+      SmallVector<MCPhysReg, 4> Defs;
       for (const MachineOperand &MO : FI.operands()) {
         if (!MO.isReg())
           continue;
@@ -1830,7 +1830,7 @@ bool IfConverter::IfConvertDiamondCommon(
         }
       }
 
-      for (unsigned Reg : Defs) {
+      for (MCPhysReg Reg : Defs) {
         if (!ExtUses.count(Reg)) {
           for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
                SubRegs.isValid(); ++SubRegs)
@@ -1976,7 +1976,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
 }
 
 static bool MaySpeculate(const MachineInstr &MI,
-                         SmallSet<unsigned, 4> &LaterRedefs) {
+                         SmallSet<MCPhysReg, 4> &LaterRedefs) {
   bool SawStore = true;
   if (!MI.isSafeToMove(nullptr, SawStore))
     return false;
@@ -1999,7 +1999,7 @@ static bool MaySpeculate(const MachineInstr &MI,
 void IfConverter::PredicateBlock(BBInfo &BBI,
                                  MachineBasicBlock::iterator E,
                                  SmallVectorImpl<MachineOperand> &Cond,
-                                 SmallSet<unsigned, 4> *LaterRedefs) {
+                                 SmallSet<MCPhysReg, 4> *LaterRedefs) {
   bool AnyUnpred = false;
   bool MaySpec = LaterRedefs != nullptr;
   for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
@@ -2148,7 +2148,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
       // Calculate the edge probability for the edge from ToBBI.BB to Succ,
       // which is a portion of the edge probability from FromMBB to Succ. The
       // portion ratio is the edge probability from ToBBI.BB to FromMBB (if
-      // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+      // FromBBI is a successor of ToBBI.BB. See comment below for exception).
       NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
 
       // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 0a447bc613b1..f411ee6745d0 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -90,7 +90,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
   /// A data type for representing the result computed by \c
   /// computeDependence.  States whether it is okay to reorder the
   /// instruction passed to \c computeDependence with at most one
-  /// depednency.
+  /// dependency.
   struct DependenceResult {
     /// Can we actually re-order \p MI with \p Insts (see \c
     /// computeDependence).
@@ -344,11 +344,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
           return AR_MayAlias;
         continue;
       }
-      llvm::AliasResult AAResult = AA->alias(
-          MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
-                         MMO1->getAAInfo()),
-          MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
-                         MMO2->getAAInfo()));
+      llvm::AliasResult AAResult =
+          AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
+                                   MMO1->getAAInfo()),
+                    MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
+                                   MMO2->getAAInfo()));
       if (AAResult != NoAlias)
         return AR_MayAlias;
     }
@@ -360,10 +360,10 @@ ImplicitNullChecks::SuitabilityResult
 ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
                                        ArrayRef<MachineInstr *> PrevInsts) {
   int64_t Offset;
-  unsigned BaseReg;
+  MachineOperand *BaseOp;
 
-  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
-      BaseReg != PointerReg)
+  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
+      !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
     return SR_Unsuitable;
 
   // We want the mem access to be issued at a sane offset from PointerReg,
@@ -651,7 +651,7 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr(
     }
   }
 
-  MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MIB.setMemRefs(MI->memoperands());
 
   return MIB;
 }
diff --git a/contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
new file mode 100644
index 000000000000..989fa164ad2d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -0,0 +1,1359 @@
+//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+//
+// This file defines the interleaved-load-combine pass. The pass searches for
+// ShuffleVectorInstruction that execute interleaving loads. If a matching
+// pattern is found, it adds a combined load and further instructions in a
+// pattern that is detectable by InterleavedAccesPass. The old instructions are
+// left dead to be removed later. The pass is specifically designed to be
+// executed just before InterleavedAccesPass to find any left-over instances
+// that are not detected within former passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+#include <cassert>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-load-combine"
+
+namespace {
+
+/// Statistic counter
+STATISTIC(NumInterleavedLoadCombine, "Number of combined loads");
+
+/// Option to disable the pass
+static cl::opt<bool> DisableInterleavedLoadCombine(
+    "disable-" DEBUG_TYPE, cl::init(false), cl::Hidden,
+    cl::desc("Disable combining of interleaved loads"));
+
+struct VectorInfo;
+
+struct InterleavedLoadCombineImpl {
+public:
+  InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
+                             TargetMachine &TM)
+      : F(F), DT(DT), MSSA(MSSA),
+        TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
+        TTI(TM.getTargetTransformInfo(F)) {}
+
+  /// Scan the function for interleaved load candidates and execute the
+  /// replacement if applicable.
+  bool run();
+
+private:
+  /// Function this pass is working on
+  Function &F;
+
+  /// Dominator Tree Analysis
+  DominatorTree &DT;
+
+  /// Memory Alias Analyses
+  MemorySSA &MSSA;
+
+  /// Target Lowering Information
+  const TargetLowering &TLI;
+
+  /// Target Transform Information
+  const TargetTransformInfo TTI;
+
+  /// Find the instruction in sets LIs that dominates all others, return nullptr
+  /// if there is none.
+  LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);
+
+  /// Replace interleaved load candidates. It does additional
+  /// analyses if this makes sense. Returns true on success and false
+  /// of nothing has been changed.
+  bool combine(std::list<VectorInfo> &InterleavedLoad,
+               OptimizationRemarkEmitter &ORE);
+
+  /// Given a set of VectorInfo containing candidates for a given interleave
+  /// factor, find a set that represents a 'factor' interleaved load.
+  bool findPattern(std::list<VectorInfo> &Candidates,
+                   std::list<VectorInfo> &InterleavedLoad, unsigned Factor,
+                   const DataLayout &DL);
+}; // InterleavedLoadCombine
+
+/// First Order Polynomial on an n-Bit Integer Value
+///
+/// Polynomial(Value) = Value * B + A + E*2^(n-e)
+///
+/// A and B are the coefficients. E*2^(n-e) is an error within 'e' most
+/// significant bits. It is introduced if an exact computation cannot be proven
+/// (e.q. division by 2).
+///
+/// As part of this optimization multiple loads will be combined. It necessary
+/// to prove that loads are within some relative offset to each other. This
+/// class is used to prove relative offsets of values loaded from memory.
+///
+/// Representing an integer in this form is sound since addition in two's
+/// complement is associative (trivial) and multiplication distributes over the
+/// addition (see Proof(1) in Polynomial::mul). Further, both operations
+/// commute.
+//
+// Example:
+// declare @fn(i64 %IDX, <4 x float>* %PTR) {
+//   %Pa1 = add i64 %IDX, 2
+//   %Pa2 = lshr i64 %Pa1, 1
+//   %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2
+//   %Va = load <4 x float>, <4 x float>* %Pa3
+//
+//   %Pb1 = add i64 %IDX, 4
+//   %Pb2 = lshr i64 %Pb1, 1
+//   %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2
+//   %Vb = load <4 x float>, <4 x float>* %Pb3
+// ... }
+//
+// The goal is to prove that two loads load consecutive addresses.
+//
+// In this case the polynomials are constructed by the following
+// steps.
+//
+// The number tag #e specifies the error bits.
+//
+// Pa_0 = %IDX              #0
+// Pa_1 = %IDX + 2          #0 | add 2
+// Pa_2 = %IDX/2 + 1        #1 | lshr 1
+// Pa_3 = %IDX/2 + 1        #1 | GEP, step signext to i64
+// Pa_4 = (%IDX/2)*16 + 16  #0 | GEP, multiply index by sizeof(4) for floats
+// Pa_5 = (%IDX/2)*16 + 16  #0 | GEP, add offset of leading components
+//
+// Pb_0 = %IDX              #0
+// Pb_1 = %IDX + 4          #0 | add 2
+// Pb_2 = %IDX/2 + 2        #1 | lshr 1
+// Pb_3 = %IDX/2 + 2        #1 | GEP, step signext to i64
+// Pb_4 = (%IDX/2)*16 + 32  #0 | GEP, multiply index by sizeof(4) for floats
+// Pb_5 = (%IDX/2)*16 + 16  #0 | GEP, add offset of leading components
+//
+// Pb_5 - Pa_5 = 16         #0 | subtract to get the offset
+//
+// Remark: %PTR is not maintained within this class. So in this instance the
+// offset of 16 can only be assumed if the pointers are equal.
+//
+class Polynomial {
+  /// Operations on B
+  enum BOps {
+    LShr,
+    Mul,
+    SExt,
+    Trunc,
+  };
+
+  /// Number of Error Bits e
+  unsigned ErrorMSBs;
+
+  /// Value
+  Value *V;
+
+  /// Coefficient B
+  SmallVector<std::pair<BOps, APInt>, 4> B;
+
+  /// Coefficient A
+  APInt A;
+
+public:
+  Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() {
+    IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
+    if (Ty) {
+      ErrorMSBs = 0;
+      this->V = V;
+      A = APInt(Ty->getBitWidth(), 0);
+    }
+  }
+
+  Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
+      : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(A) {}
+
+  Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
+      : ErrorMSBs(ErrorMSBs), V(NULL), B(), A(BitWidth, A) {}
+
+  Polynomial() : ErrorMSBs((unsigned)-1), V(NULL), B(), A() {}
+
+  /// Increment and clamp the number of undefined bits.
+  void incErrorMSBs(unsigned amt) {
+    if (ErrorMSBs == (unsigned)-1)
+      return;
+
+    ErrorMSBs += amt;
+    if (ErrorMSBs > A.getBitWidth())
+      ErrorMSBs = A.getBitWidth();
+  }
+
+  /// Decrement and clamp the number of undefined bits.
+  void decErrorMSBs(unsigned amt) {
+    if (ErrorMSBs == (unsigned)-1)
+      return;
+
+    if (ErrorMSBs > amt)
+      ErrorMSBs -= amt;
+    else
+      ErrorMSBs = 0;
+  }
+
+  /// Apply an add on the polynomial
+  Polynomial &add(const APInt &C) {
+    // Note: Addition is associative in two's complement even when in case of
+    // signed overflow.
+    //
+    // Error bits can only propagate into higher significant bits. As these are
+    // already regarded as undefined, there is no change.
+    //
+    // Theorem: Adding a constant to a polynomial does not change the error
+    // term.
+    //
+    // Proof:
+    //
+    //   Since the addition is associative and commutes:
+    //
+    //   (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e)
+    // [qed]
+
+    if (C.getBitWidth() != A.getBitWidth()) {
+      ErrorMSBs = (unsigned)-1;
+      return *this;
+    }
+
+    A += C;
+    return *this;
+  }
+
+  /// Apply a multiplication onto the polynomial.
+  Polynomial &mul(const APInt &C) {
+    // Note: Multiplication distributes over the addition
+    //
+    // Theorem: Multiplication distributes over the addition
+    //
+    // Proof(1):
+    //
+    //   (B+A)*C =-
+    //        = (B + A) + (B + A) + .. {C Times}
+    //         addition is associative and commutes, hence
+    //        = B + B + .. {C Times} .. + A + A + .. {C times}
+    //        = B*C + A*C
+    //   (see (function add) for signed values and overflows)
+    // [qed]
+    //
+    // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out
+    // to the left.
+    //
+    // Proof(2):
+    //
+    //   Let B' and A' be the n-Bit inputs with some unknown errors EA,
+    //   EB at e leading bits. B' and A' can be written down as:
+    //
+    //     B' = B + 2^(n-e)*EB
+    //     A' = A + 2^(n-e)*EA
+    //
+    //   Let C' be an input with c trailing zero bits. C' can be written as
+    //
+    //     C' = C*2^c
+    //
+    //   Therefore we can compute the result by using distributivity and
+    //   commutativity.
+    //
+    //     (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' =
+    //                     = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+    //                     = (B'+A') * C' =
+    //                     = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
+    //                     = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' =
+    //                     = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' =
+    //                     = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c =
+    //                     = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c =
+    //
+    //   Let EC be the final error with EC = C*(EB + EA)
+    //
+    //                     = (B + A)*C' + EC*2^(n-e)*2^c =
+    //                     = (B + A)*C' + EC*2^(n-(e-c))
+    //
+    //   Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c
+    //   less error bits than the input. c bits are shifted out to the left.
+    // [qed]
+
+    if (C.getBitWidth() != A.getBitWidth()) {
+      ErrorMSBs = (unsigned)-1;
+      return *this;
+    }
+
+    // Multiplying by one is a no-op.
+    if (C.isOneValue()) {
+      return *this;
+    }
+
+    // Multiplying by zero removes the coefficient B and defines all bits.
+    if (C.isNullValue()) {
+      ErrorMSBs = 0;
+      deleteB();
+    }
+
+    // See Proof(2): Trailing zero bits indicate a left shift. This removes
+    // leading bits from the result even if they are undefined.
+    decErrorMSBs(C.countTrailingZeros());
+
+    A *= C;
+    pushBOperation(Mul, C);
+    return *this;
+  }
+
+  /// Apply a logical shift right on the polynomial
+  Polynomial &lshr(const APInt &C) {
+    // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e')
+    //          where
+    //             e' = e + 1,
+    //             E is a e-bit number,
+    //             E' is a e'-bit number,
+    //   holds under the following precondition:
+    //          pre(1): A % 2 = 0
+    //          pre(2): e < n, (see Theorem(2) for the trivial case with e=n)
+    //   where >> expresses a logical shift to the right, with adding zeros.
+    //
+    //  We need to show that for every, E there is a E'
+    //
+    //  B = b_h * 2^(n-1) + b_m * 2 + b_l
+    //  A = a_h * 2^(n-1) + a_m * 2         (pre(1))
+    //
+    //  where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers
+    //
+    //  Let X = (B + A + E*2^(n-e)) >> 1
+    //  Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1
+    //
+    //    X = [B + A + E*2^(n-e)] >> 1 =
+    //      = [  b_h * 2^(n-1) + b_m * 2 + b_l +
+    //         + a_h * 2^(n-1) + a_m * 2 +
+    //         + E * 2^(n-e) ] >> 1 =
+    //
+    //    The sum is built by putting the overflow of [a_m + b+n] into the term
+    //    2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within
+    //    this bit is discarded. This is expressed by % 2.
+    //
+    //    The bit in position 0 cannot overflow into the term (b_m + a_m).
+    //
+    //      = [  ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) +
+    //         + ((b_m + a_m) % 2^(n-2)) * 2 +
+    //         + b_l + E * 2^(n-e) ] >> 1 =
+    //
+    //    The shift is computed by dividing the terms by 2 and by cutting off
+    //    b_l.
+    //
+    //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + E * 2^(n-(e+1)) =
+    //
+    //    by the definition in the Theorem e+1 = e'
+    //
+    //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + E * 2^(n-e') =
+    //
+    //    Compute Y by applying distributivity first
+    //
+    //    Y =  (B >> 1) + (A >> 1) + E*2^(n-e') =
+    //      =    (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 +
+    //         + (a_h * 2^(n-1) + a_m * 2) >> 1 +
+    //         + E * 2^(n-e) >> 1 =
+    //
+    //    Again, the shift is computed by dividing the terms by 2 and by cutting
+    //    off b_l.
+    //
+    //      =     b_h * 2^(n-2) + b_m +
+    //         +  a_h * 2^(n-2) + a_m +
+    //         +  E * 2^(n-(e+1)) =
+    //
+    //    Again, the sum is built by putting the overflow of [a_m + b+n] into
+    //    the term 2^(n-1). But this time there is room for a second bit in the
+    //    term 2^(n-2) we add this bit to a new term and denote it o_h in a
+    //    second step.
+    //
+    //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) +
+    //         + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + E * 2^(n-(e+1)) =
+    //
+    //    Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1
+    //    Further replace e+1 by e'.
+    //
+    //      =    o_h * 2^(n-1) +
+    //         + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + E * 2^(n-e') =
+    //
+    //    Move o_h into the error term and construct E'. To ensure that there is
+    //    no 2^x with negative x, this step requires pre(2) (e < n).
+    //
+    //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + o_h * 2^(e'-1) * 2^(n-e') +               | pre(2), move 2^(e'-1)
+    //                                                     | out of the old exponent
+    //         + E * 2^(n-e') =
+    //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + [o_h * 2^(e'-1) + E] * 2^(n-e') +         | move 2^(e'-1) out of
+    //                                                     | the old exponent
+    //
+    //    Let E' = o_h * 2^(e'-1) + E
+    //
+    //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
+    //         + ((b_m + a_m) % 2^(n-2)) +
+    //         + E' * 2^(n-e')
+    //
+    //    Because X and Y are distinct only in there error terms and E' can be
+    //    constructed as shown the theorem holds.
+    // [qed]
+    //
+    // For completeness in case of the case e=n it is also required to show that
+    // distributivity can be applied.
+    //
+    // In this case Theorem(1) transforms to (the pre-condition on A can also be
+    // dropped)
+    //
+    // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E'
+    //          where
+    //             A, B, E, E' are two's complement numbers with the same bit
+    //             width
+    //
+    //   Let A + B + E = X
+    //   Let (B >> 1) + (A >> 1) = Y
+    //
+    //   Therefore we need to show that for every X and Y there is an E' which
+    //   makes the equation
+    //
+    //     X = Y + E'
+    //
+    //   hold. This is trivially the case for E' = X - Y.
+    //
+    // [qed]
+    //
+    // Remark: Distributing lshr with and arbitrary number n can be expressed as
+    //   ((((B + A) lshr 1) lshr 1) ... ) {n times}.
+    // This construction induces n additional error bits at the left.
+
+    if (C.getBitWidth() != A.getBitWidth()) {
+      ErrorMSBs = (unsigned)-1;
+      return *this;
+    }
+
+    if (C.isNullValue())
+      return *this;
+
+    // Test if the result will be zero
+    unsigned shiftAmt = C.getZExtValue();
+    if (shiftAmt >= C.getBitWidth())
+      return mul(APInt(C.getBitWidth(), 0));
+
+    // The proof that shiftAmt LSBs are zero for at least one summand is only
+    // possible for the constant number.
+    //
+    // If this can be proven add shiftAmt to the error counter
+    // `ErrorMSBs`. Otherwise set all bits as undefined.
+    if (A.countTrailingZeros() < shiftAmt)
+      ErrorMSBs = A.getBitWidth();
+    else
+      incErrorMSBs(shiftAmt);
+
+    // Apply the operation.
+    pushBOperation(LShr, C);
+    A = A.lshr(shiftAmt);
+
+    return *this;
+  }
+
+  /// Apply a sign-extend or truncate operation on the polynomial.
+  Polynomial &sextOrTrunc(unsigned n) {
+    if (n < A.getBitWidth()) {
+      // Truncate: Clearly undefined Bits on the MSB side are removed
+      // if there are any.
+      decErrorMSBs(A.getBitWidth() - n);
+      A = A.trunc(n);
+      pushBOperation(Trunc, APInt(sizeof(n) * 8, n));
+    }
+    if (n > A.getBitWidth()) {
+      // Extend: Clearly extending first and adding later is different
+      // to adding first and extending later in all extended bits.
+      incErrorMSBs(n - A.getBitWidth());
+      A = A.sext(n);
+      pushBOperation(SExt, APInt(sizeof(n) * 8, n));
+    }
+
+    return *this;
+  }
+
+  /// Test if there is a coefficient B.
+  bool isFirstOrder() const { return V != nullptr; }
+
+  /// Test coefficient B of two Polynomials are equal.
+  bool isCompatibleTo(const Polynomial &o) const {
+    // The polynomial use different bit width.
+    if (A.getBitWidth() != o.A.getBitWidth())
+      return false;
+
+    // If neither Polynomial has the Coefficient B.
+    if (!isFirstOrder() && !o.isFirstOrder())
+      return true;
+
+    // The index variable is different.
+    if (V != o.V)
+      return false;
+
+    // Check the operations.
+    if (B.size() != o.B.size())
+      return false;
+
+    auto ob = o.B.begin();
+    for (auto &b : B) {
+      if (b != *ob)
+        return false;
+      ob++;
+    }
+
+    return true;
+  }
+
+  /// Subtract two polynomials, return an undefined polynomial if
+  /// subtraction is not possible.
+  Polynomial operator-(const Polynomial &o) const {
+    // Return an undefined polynomial if incompatible.
+    if (!isCompatibleTo(o))
+      return Polynomial();
+
+    // If the polynomials are compatible (meaning they have the same
+    // coefficient on B), B is eliminated. Thus a polynomial solely
+    // containing A is returned
+    return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));
+  }
+
+  /// Subtract a constant from a polynomial,
+  Polynomial operator-(uint64_t C) const {
+    Polynomial Result(*this);
+    Result.A -= C;
+    return Result;
+  }
+
+  /// Add a constant to a polynomial,
+  Polynomial operator+(uint64_t C) const {
+    Polynomial Result(*this);
+    Result.A += C;
+    return Result;
+  }
+
+  /// Returns true if it can be proven that two Polynomials are equal.
+  bool isProvenEqualTo(const Polynomial &o) {
+    // Subtract both polynomials and test if it is fully defined and zero.
+    Polynomial r = *this - o;
+    return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
+  }
+
+  /// Print the polynomial into a stream.
+  void print(raw_ostream &OS) const {
+    OS << "[{#ErrBits:" << ErrorMSBs << "} ";
+
+    if (V) {
+      for (auto b : B)
+        OS << "(";
+      OS << "(" << *V << ") ";
+
+      for (auto b : B) {
+        switch (b.first) {
+        case LShr:
+          OS << "LShr ";
+          break;
+        case Mul:
+          OS << "Mul ";
+          break;
+        case SExt:
+          OS << "SExt ";
+          break;
+        case Trunc:
+          OS << "Trunc ";
+          break;
+        }
+
+        OS << b.second << ") ";
+      }
+    }
+
+    OS << "+ " << A << "]";
+  }
+
+private:
+  void deleteB() {
+    V = nullptr;
+    B.clear();
+  }
+
+  void pushBOperation(const BOps Op, const APInt &C) {
+    if (isFirstOrder()) {
+      B.push_back(std::make_pair(Op, C));
+      return;
+    }
+  }
+};
+
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) {
+  S.print(OS);
+  return OS;
+}
+#endif
+
+/// VectorInfo stores abstract the following information for each vector
+/// element:
+///
+/// 1) The the memory address loaded into the element as Polynomial
+/// 2) a set of load instruction necessary to construct the vector,
+/// 3) a set of all other instructions that are necessary to create the vector and
+/// 4) a pointer value that can be used as relative base for all elements.
+struct VectorInfo {
+private:
+  VectorInfo(const VectorInfo &c) : VTy(c.VTy) {
+    llvm_unreachable(
+        "Copying VectorInfo is neither implemented nor necessary,");
+  }
+
+public:
+  /// Information of a Vector Element
+  struct ElementInfo {
+    /// Offset Polynomial.
+    Polynomial Ofs;
+
+    /// The Load Instruction used to Load the entry. LI is null if the pointer
+    /// of the load instruction does not point on to the entry
+    LoadInst *LI;
+
+    ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)
+        : Ofs(Offset), LI(LI) {}
+  };
+
+  /// Basic-block the load instructions are within
+  BasicBlock *BB;
+
+  /// Pointer value of all participation load instructions
+  Value *PV;
+
+  /// Participating load instructions
+  std::set<LoadInst *> LIs;
+
+  /// Participating instructions
+  std::set<Instruction *> Is;
+
+  /// Final shuffle-vector instruction
+  ShuffleVectorInst *SVI;
+
+  /// Information of the offset for each vector element
+  ElementInfo *EI;
+
+  /// Vector Type
+  VectorType *const VTy;
+
+  VectorInfo(VectorType *VTy)
+      : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) {
+    EI = new ElementInfo[VTy->getNumElements()];
+  }
+
+  virtual ~VectorInfo() { delete[] EI; }
+
+  unsigned getDimension() const { return VTy->getNumElements(); }
+
+  /// Test if the VectorInfo can be part of an interleaved load with the
+  /// specified factor.
+  ///
+  /// \param Factor of the interleave
+  /// \param DL Targets Datalayout
+  ///
+  /// \returns true if this is possible and false if not
+  bool isInterleaved(unsigned Factor, const DataLayout &DL) const {
+    unsigned Size = DL.getTypeAllocSize(VTy->getElementType());
+    for (unsigned i = 1; i < getDimension(); i++) {
+      if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /// Recursively computes the vector information stored in V.
+  ///
+  /// This function delegates the work to specialized implementations
+  ///
+  /// \param V Value to operate on
+  /// \param Result Result of the computation
+  ///
+  /// \returns false if no sensible information can be gathered.
+  static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {
+    ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
+    if (SVI)
+      return computeFromSVI(SVI, Result, DL);
+    LoadInst *LI = dyn_cast<LoadInst>(V);
+    if (LI)
+      return computeFromLI(LI, Result, DL);
+    BitCastInst *BCI = dyn_cast<BitCastInst>(V);
+    if (BCI)
+      return computeFromBCI(BCI, Result, DL);
+    return false;
+  }
+
+  /// BitCastInst specialization to compute the vector information.
+  ///
+  /// \param BCI BitCastInst to operate on
+  /// \param Result Result of the computation
+  ///
+  /// \returns false if no sensible information can be gathered.
+  static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,
+                             const DataLayout &DL) {
+    Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0));
+
+    if (!Op)
+      return false;
+
+    VectorType *VTy = dyn_cast<VectorType>(Op->getType());
+    if (!VTy)
+      return false;
+
+    // We can only cast from large to smaller vectors
+    if (Result.VTy->getNumElements() % VTy->getNumElements())
+      return false;
+
+    unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();
+    unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());
+    unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());
+
+    if (NewSize * Factor != OldSize)
+      return false;
+
+    VectorInfo Old(VTy);
+    if (!compute(Op, Old, DL))
+      return false;
+
+    for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {
+      for (unsigned j = 0; j < Factor; j++) {
+        Result.EI[i + j] =
+            ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,
+                        j == 0 ? Old.EI[i / Factor].LI : nullptr);
+      }
+    }
+
+    Result.BB = Old.BB;
+    Result.PV = Old.PV;
+    Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());
+    Result.Is.insert(Old.Is.begin(), Old.Is.end());
+    Result.Is.insert(BCI);
+    Result.SVI = nullptr;
+
+    return true;
+  }
+
+  /// ShuffleVectorInst specialization to compute vector information.
+  ///
+  /// \param SVI ShuffleVectorInst to operate on
+  /// \param Result Result of the computation
+  ///
+  /// Compute the left and the right side vector information and merge them by
+  /// applying the shuffle operation. This function also ensures that the left
+  /// and right side have compatible loads. This means that all loads are with
+  /// in the same basic block and are based on the same pointer.
+  ///
+  /// \returns false if no sensible information can be gathered.
+  static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
+                             const DataLayout &DL) {
+    VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType());
+    assert(ArgTy && "ShuffleVector Operand is not a VectorType");
+
+    // Compute the left hand vector information.
+    VectorInfo LHS(ArgTy);
+    if (!compute(SVI->getOperand(0), LHS, DL))
+      LHS.BB = nullptr;
+
+    // Compute the right hand vector information.
+    VectorInfo RHS(ArgTy);
+    if (!compute(SVI->getOperand(1), RHS, DL))
+      RHS.BB = nullptr;
+
+    // Neither operand produced sensible results?
+    if (!LHS.BB && !RHS.BB)
+      return false;
+    // Only RHS produced sensible results?
+    else if (!LHS.BB) {
+      Result.BB = RHS.BB;
+      Result.PV = RHS.PV;
+    }
+    // Only LHS produced sensible results?
+    else if (!RHS.BB) {
+      Result.BB = LHS.BB;
+      Result.PV = LHS.PV;
+    }
+    // Both operands produced sensible results?
+    else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {
+      Result.BB = LHS.BB;
+      Result.PV = LHS.PV;
+    }
+    // Both operands produced sensible results but they are incompatible.
+    else {
+      return false;
+    }
+
+    // Merge and apply the operation on the offset information.
+    if (LHS.BB) {
+      Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());
+      Result.Is.insert(LHS.Is.begin(), LHS.Is.end());
+    }
+    if (RHS.BB) {
+      Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());
+      Result.Is.insert(RHS.Is.begin(), RHS.Is.end());
+    }
+    Result.Is.insert(SVI);
+    Result.SVI = SVI;
+
+    int j = 0;
+    for (int i : SVI->getShuffleMask()) {
+      assert((i < 2 * (signed)ArgTy->getNumElements()) &&
+             "Invalid ShuffleVectorInst (index out of bounds)");
+
+      if (i < 0)
+        Result.EI[j] = ElementInfo();
+      else if (i < (signed)ArgTy->getNumElements()) {
+        if (LHS.BB)
+          Result.EI[j] = LHS.EI[i];
+        else
+          Result.EI[j] = ElementInfo();
+      } else {
+        if (RHS.BB)
+          Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()];
+        else
+          Result.EI[j] = ElementInfo();
+      }
+      j++;
+    }
+
+    return true;
+  }
+
+  /// LoadInst specialization to compute vector information.
+  ///
+  /// This function also acts as abort condition to the recursion.
+  ///
+  /// \param LI LoadInst to operate on
+  /// \param Result Result of the computation
+  ///
+  /// \returns false if no sensible information can be gathered.
+  static bool computeFromLI(LoadInst *LI, VectorInfo &Result,
+                            const DataLayout &DL) {
+    Value *BasePtr;
+    Polynomial Offset;
+
+    if (LI->isVolatile())
+      return false;
+
+    if (LI->isAtomic())
+      return false;
+
+    // Get the base polynomial
+    computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL);
+
+    Result.BB = LI->getParent();
+    Result.PV = BasePtr;
+    Result.LIs.insert(LI);
+    Result.Is.insert(LI);
+
+    for (unsigned i = 0; i < Result.getDimension(); i++) {
+      Value *Idx[2] = {
+          ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
+          ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
+      };
+      int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2));
+      Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
+    }
+
+    return true;
+  }
+
+  /// Recursively compute polynomial of a value.
+  ///
+  /// \param BO Input binary operation
+  /// \param Result Result polynomial
+  static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {
+    Value *LHS = BO.getOperand(0);
+    Value *RHS = BO.getOperand(1);
+
+    // Find the RHS Constant if any
+    ConstantInt *C = dyn_cast<ConstantInt>(RHS);
+    if ((!C) && BO.isCommutative()) {
+      C = dyn_cast<ConstantInt>(LHS);
+      if (C)
+        std::swap(LHS, RHS);
+    }
+
+    switch (BO.getOpcode()) {
+    case Instruction::Add:
+      if (!C)
+        break;
+
+      computePolynomial(*LHS, Result);
+      Result.add(C->getValue());
+      return;
+
+    case Instruction::LShr:
+      if (!C)
+        break;
+
+      computePolynomial(*LHS, Result);
+      Result.lshr(C->getValue());
+      return;
+
+    default:
+      break;
+    }
+
+    Result = Polynomial(&BO);
+  }
+
+  /// Recursively compute polynomial of a value
+  ///
+  /// \param V input value
+  /// \param Result result polynomial
+  static void computePolynomial(Value &V, Polynomial &Result) {
+    if (isa<BinaryOperator>(&V))
+      computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result);
+    else
+      Result = Polynomial(&V);
+  }
+
+  /// Compute the Polynomial representation of a Pointer type.
+  ///
+  /// \param Ptr input pointer value
+  /// \param Result result polynomial
+  /// \param BasePtr pointer the polynomial is based on
+  /// \param DL Datalayout of the target machine
+  static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,
+                                           Value *&BasePtr,
+                                           const DataLayout &DL) {
+    // Not a pointer type? Return an undefined polynomial
+    PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType());
+    if (!PtrTy) {
+      Result = Polynomial();
+      BasePtr = nullptr;
+    }
+    unsigned PointerBits =
+        DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
+
+    /// Skip pointer casts. Return Zero polynomial otherwise
+    if (isa<CastInst>(&Ptr)) {
+      CastInst &CI = *cast<CastInst>(&Ptr);
+      switch (CI.getOpcode()) {
+      case Instruction::BitCast:
+        computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);
+        break;
+      default:
+        BasePtr = &Ptr;
+        Polynomial(PointerBits, 0);
+        break;
+      }
+    }
+    /// Resolve GetElementPtrInst.
+    else if (isa<GetElementPtrInst>(&Ptr)) {
+      GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr);
+
+      APInt BaseOffset(PointerBits, 0);
+
+      // Check if we can compute the Offset with accumulateConstantOffset
+      if (GEP.accumulateConstantOffset(DL, BaseOffset)) {
+        Result = Polynomial(BaseOffset);
+        BasePtr = GEP.getPointerOperand();
+        return;
+      } else {
+        // Otherwise we allow that the last index operand of the GEP is
+        // non-constant.
+        unsigned idxOperand, e;
+        SmallVector<Value *, 4> Indices;
+        for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;
+             idxOperand++) {
+          ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand));
+          if (!IDX)
+            break;
+          Indices.push_back(IDX);
+        }
+
+        // It must also be the last operand.
+        if (idxOperand + 1 != e) {
+          Result = Polynomial();
+          BasePtr = nullptr;
+          return;
+        }
+
+        // Compute the polynomial of the index operand.
+        computePolynomial(*GEP.getOperand(idxOperand), Result);
+
+        // Compute base offset from zero based index, excluding the last
+        // variable operand.
+        BaseOffset =
+            DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);
+
+        // Apply the operations of GEP to the polynomial.
+        unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());
+        Result.sextOrTrunc(PointerBits);
+        Result.mul(APInt(PointerBits, ResultSize));
+        Result.add(BaseOffset);
+        BasePtr = GEP.getPointerOperand();
+      }
+    }
+    // All other instructions are handled by using the value as base pointer and
+    // a zero polynomial.
+    else {
+      BasePtr = &Ptr;
+      Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);
+    }
+  }
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const {
+    if (PV)
+      OS << *PV;
+    else
+      OS << "(none)";
+    OS << " + ";
+    for (unsigned i = 0; i < getDimension(); i++)
+      OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;
+    OS << "]";
+  }
+#endif
+};
+
+} // anonymous namespace
+
+bool InterleavedLoadCombineImpl::findPattern(
+    std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad,
+    unsigned Factor, const DataLayout &DL) {
+  for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {
+    unsigned i;
+    // Try to find an interleaved load using the front of Worklist as first line
+    unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());
+
+    // List containing iterators pointing to the VectorInfos of the candidates
+    std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end());
+
+    for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {
+      if (C->VTy != C0->VTy)
+        continue;
+      if (C->BB != C0->BB)
+        continue;
+      if (C->PV != C0->PV)
+        continue;
+
+      // Check the current value matches any of factor - 1 remaining lines
+      for (i = 1; i < Factor; i++) {
+        if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {
+          Res[i] = C;
+        }
+      }
+
+      for (i = 1; i < Factor; i++) {
+        if (Res[i] == Candidates.end())
+          break;
+      }
+      if (i == Factor) {
+        Res[0] = C0;
+        break;
+      }
+    }
+
+    if (Res[0] != Candidates.end()) {
+      // Move the result into the output
+      for (unsigned i = 0; i < Factor; i++) {
+        InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);
+      }
+
+      return true;
+    }
+  }
+  return false;
+}
+
+LoadInst *
+InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
+  assert(!LIs.empty() && "No load instructions given.");
+
+  // All LIs are within the same BB. Select the first for a reference.
+  BasicBlock *BB = (*LIs.begin())->getParent();
+  BasicBlock::iterator FLI =
+      std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool {
+        return is_contained(LIs, &I);
+      });
+  assert(FLI != BB->end());
+
+  return cast<LoadInst>(FLI);
+}
+
+bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
+                                         OptimizationRemarkEmitter &ORE) {
+  LLVM_DEBUG(dbgs() << "Checking interleaved load\n");
+
+  // The insertion point is the LoadInst which loads the first values. The
+  // following tests are used to proof that the combined load can be inserted
+  // just before InsertionPoint.
+  LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;
+
+  // Test if the offset is computed
+  if (!InsertionPoint)
+    return false;
+
+  std::set<LoadInst *> LIs;
+  std::set<Instruction *> Is;
+  std::set<Instruction *> SVIs;
+
+  unsigned InterleavedCost;
+  unsigned InstructionCost = 0;
+
+  // Get the interleave factor
+  unsigned Factor = InterleavedLoad.size();
+
+  // Merge all input sets used in analysis
+  for (auto &VI : InterleavedLoad) {
+    // Generate a set of all load instructions to be combined
+    LIs.insert(VI.LIs.begin(), VI.LIs.end());
+
+    // Generate a set of all instructions taking part in load
+    // interleaved. This list excludes the instructions necessary for the
+    // polynomial construction.
+    Is.insert(VI.Is.begin(), VI.Is.end());
+
+    // Generate the set of the final ShuffleVectorInst.
+    SVIs.insert(VI.SVI);
+  }
+
+  // There is nothing to combine.
+  if (LIs.size() < 2)
+    return false;
+
+  // Test if all participating instruction will be dead after the
+  // transformation. If intermediate results are used, no performance gain can
+  // be expected. Also sum the cost of the Instructions beeing left dead.
+  for (auto &I : Is) {
+    // Compute the old cost
+    InstructionCost +=
+        TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+
+    // The final SVIs are allowed not to be dead, all uses will be replaced
+    if (SVIs.find(I) != SVIs.end())
+      continue;
+
+    // If there are users outside the set to be eliminated, we abort the
+    // transformation. No gain can be expected.
+    for (const auto &U : I->users()) {
+      if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
+        return false;
+    }
+  }
+
+  // We know that all LoadInst are within the same BB. This guarantees that
+  // either everything or nothing is loaded.
+  LoadInst *First = findFirstLoad(LIs);
+
+  // To be safe that the loads can be combined, iterate over all loads and test
+  // that the corresponding defining access dominates first LI. This guarantees
+  // that there are no aliasing stores in between the loads.
+  auto FMA = MSSA.getMemoryAccess(First);
+  for (auto LI : LIs) {
+    auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess();
+    if (!MSSA.dominates(MADef, FMA))
+      return false;
+  }
+  assert(!LIs.empty() && "There are no LoadInst to combine");
+
+  // It is necessary that insertion point dominates all final ShuffleVectorInst.
+  for (auto &VI : InterleavedLoad) {
+    if (!DT.dominates(InsertionPoint, VI.SVI))
+      return false;
+  }
+
+  // All checks are done. Add instructions detectable by InterleavedAccessPass
+  // The old instruction will are left dead.
+  IRBuilder<> Builder(InsertionPoint);
+  Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
+  unsigned ElementsPerSVI =
+      InterleavedLoad.front().SVI->getType()->getNumElements();
+  VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI);
+
+  SmallVector<unsigned, 4> Indices;
+  for (unsigned i = 0; i < Factor; i++)
+    Indices.push_back(i);
+  InterleavedCost = TTI.getInterleavedMemoryOpCost(
+      Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(),
+      InsertionPoint->getPointerAddressSpace());
+
+  if (InterleavedCost >= InstructionCost) {
+    return false;
+  }
+
+  // Create a pointer cast for the wide load.
+  auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0),
+                                      ILTy->getPointerTo(),
+                                      "interleaved.wide.ptrcast");
+
+  // Create the wide load and update the MemorySSA.
+  auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(),
+                                      "interleaved.wide.load");
+  auto MSSAU = MemorySSAUpdater(&MSSA);
+  MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
+      LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
+  MSSAU.insertUse(MSSALoad);
+
+  // Create the final SVIs and replace all uses.
+  int i = 0;
+  for (auto &VI : InterleavedLoad) {
+    SmallVector<uint32_t, 4> Mask;
+    for (unsigned j = 0; j < ElementsPerSVI; j++)
+      Mask.push_back(i + j * Factor);
+
+    Builder.SetInsertPoint(VI.SVI);
+    auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
+                                           Mask, "interleaved.shuffle");
+    VI.SVI->replaceAllUsesWith(SVI);
+    i++;
+  }
+
+  NumInterleavedLoadCombine++;
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI)
+           << "Load interleaved combined with factor "
+           << ore::NV("Factor", Factor);
+  });
+
+  return true;
+}
+
+bool InterleavedLoadCombineImpl::run() {
+  OptimizationRemarkEmitter ORE(&F);
+  bool changed = false;
+  unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor();
+
+  auto &DL = F.getParent()->getDataLayout();
+
+  // Start with the highest factor to avoid combining and recombining.
+  for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
+    std::list<VectorInfo> Candidates;
+
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : BB) {
+        if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) {
+
+          Candidates.emplace_back(SVI->getType());
+
+          if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
+            Candidates.pop_back();
+            continue;
+          }
+
+          if (!Candidates.back().isInterleaved(Factor, DL)) {
+            Candidates.pop_back();
+          }
+        }
+      }
+    }
+
+    std::list<VectorInfo> InterleavedLoad;
+    while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {
+      if (combine(InterleavedLoad, ORE)) {
+        changed = true;
+      } else {
+        // Remove the first element of the Interleaved Load but put the others
+        // back on the list and continue searching
+        Candidates.splice(Candidates.begin(), InterleavedLoad,
+                          std::next(InterleavedLoad.begin()),
+                          InterleavedLoad.end());
+      }
+      InterleavedLoad.clear();
+    }
+  }
+
+  return changed;
+}
+
+namespace {
+/// This pass combines interleaved loads into a pattern detectable by
+/// InterleavedAccessPass.
+struct InterleavedLoadCombine : public FunctionPass {
+  static char ID;
+
+  InterleavedLoadCombine() : FunctionPass(ID) {
+    initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "Interleaved Load Combine Pass";
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (DisableInterleavedLoadCombine)
+      return false;
+
+    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+    if (!TPC)
+      return false;
+
+    LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()
+                      << "\n");
+
+    return InterleavedLoadCombineImpl(
+               F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+               getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+               TPC->getTM<TargetMachine>())
+        .run();
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    FunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+};
+} // anonymous namespace
+
+char InterleavedLoadCombine::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+    InterleavedLoadCombine, DEBUG_TYPE,
+    "Combine interleaved loads into wide loads and shufflevector instructions",
+    false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(
+    InterleavedLoadCombine, DEBUG_TYPE,
+    "Combine interleaved loads into wide loads and shufflevector instructions",
+    false, false)
+
+FunctionPass *
+llvm::createInterleavedLoadCombinePass() {
+  auto P = new InterleavedLoadCombine();
+  return P;
+}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 2cd389ce2c11..52e832cc38c1 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -40,14 +40,14 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
   cl::desc("Enable generating trap for unreachable"));
 
 void LLVMTargetMachine::initAsmInfo() {
-  MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
-  MII = TheTarget.createMCInstrInfo();
+  MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+  MII.reset(TheTarget.createMCInstrInfo());
   // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
   // to some backends having subtarget feature dependent module level
   // code generation. This is similar to the hack in the AsmPrinter for
   // module level assembly etc.
-  STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(),
-                                        getTargetFeatureString());
+  STI.reset(TheTarget.createMCSubtargetInfo(
+      getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
 
   MCAsmInfo *TmpAsmInfo =
       TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
@@ -71,7 +71,7 @@ void LLVMTargetMachine::initAsmInfo() {
   if (Options.ExceptionModel != ExceptionHandling::None)
     TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
 
-  AsmInfo = TmpAsmInfo;
+  AsmInfo.reset(TmpAsmInfo);
 }
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
@@ -95,29 +95,22 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
 }
 
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *
-addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
-                        bool DisableVerify, bool &WillCompleteCodeGenPipeline,
-                        raw_pwrite_stream &Out, MachineModuleInfo *MMI) {
+static TargetPassConfig *
+addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
+                        bool DisableVerify, MachineModuleInfo &MMI) {
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
-  TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+  TargetPassConfig *PassConfig = TM.createPassConfig(PM);
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
-  WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline();
   PM.add(PassConfig);
-  if (!MMI)
-    MMI = new MachineModuleInfo(TM);
-  PM.add(MMI);
+  PM.add(&MMI);
 
   if (PassConfig->addISelPasses())
     return nullptr;
   PassConfig->addMachinePasses();
   PassConfig->setInitialized();
-  if (!WillCompleteCodeGenPipeline)
-    PM.add(createPrintMIRPass(Out));
-
-  return &MMI->getContext();
+  return PassConfig;
 }
 
 bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
@@ -201,14 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                             bool DisableVerify,
                                             MachineModuleInfo *MMI) {
   // Add common CodeGen passes.
-  bool WillCompleteCodeGenPipeline = true;
-  MCContext *Context = addPassesToGenerateCode(
-      this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI);
-  if (!Context)
+  if (!MMI)
+    MMI = new MachineModuleInfo(this);
+  TargetPassConfig *PassConfig =
+      addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+  if (!PassConfig)
     return true;
 
-  if (WillCompleteCodeGenPipeline &&
-      addAsmPrinter(PM, Out, DwoOut, FileType, *Context))
+  if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
+    PM.add(createPrintMIRPass(Out));
+  } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
     return true;
 
   PM.add(createFreeMachineFunctionPass());
@@ -224,14 +219,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                                           raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
-  bool WillCompleteCodeGenPipeline = true;
-  Ctx = addPassesToGenerateCode(this, PM, DisableVerify,
-                                WillCompleteCodeGenPipeline, Out,
-                                /*MachineModuleInfo*/ nullptr);
-  if (!Ctx)
+  MachineModuleInfo *MMI = new MachineModuleInfo(this);
+  TargetPassConfig *PassConfig =
+      addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+  if (!PassConfig)
     return true;
-  assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered");
+  assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
+         "Cannot emit MC with limited codegen pipeline");
 
+  Ctx = &MMI->getContext();
   if (Options.MCOptions.MCSaveTempLabels)
     Ctx->setAllowTemporaryLabels(false);
 
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 5dbce841cfd5..f9f33a98a9d1 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -145,9 +145,9 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
 LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
   dbgs() << "Latency Priority Queue\n";
   dbgs() << "  Number of Queue Entries: " << Queue.size() << "\n";
-  for (auto const &SU : Queue) {
+  for (const SUnit *SU : Queue) {
     dbgs() << "    ";
-    SU->dump(DAG);
+    DAG->dumpNode(*SU);
   }
 }
 #endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 417bd9d5aebe..fc0ebea2d36c 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -258,7 +258,8 @@ private:
 
   bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
             const VarLocMap &VarLocIDs,
-            SmallPtrSet<const MachineBasicBlock *, 16> &Visited);
+            SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+            SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
 
   bool ExtendRanges(MachineFunction &MF);
 
@@ -323,8 +324,10 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
                                        raw_ostream &Out) const {
   Out << '\n' << msg << '\n';
   for (const MachineBasicBlock &BB : MF) {
-    const auto &L = V.lookup(&BB);
-    Out << "MBB: " << BB.getName() << ":\n";
+    const VarLocSet &L = V.lookup(&BB);
+    if (L.empty())
+      continue;
+    Out << "MBB: " << BB.getNumber() << ":\n";
     for (unsigned VLL : L) {
       const VarLoc &VL = VarLocIDs[VLL];
       Out << " Var: " << VL.Var.getVar()->getName();
@@ -470,16 +473,21 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
                                          MachineFunction *MF, unsigned &Reg) {
   const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   int FI;
-  const MachineMemOperand *MMO;
+  SmallVector<const MachineMemOperand*, 1> Accesses;
 
   // TODO: Handle multiple stores folded into one.
   if (!MI.hasOneMemOperand())
     return false;
 
   // To identify a spill instruction, use the same criteria as in AsmPrinter.
-  if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
-         TII->hasStoreToStackSlot(MI, MMO, FI)) &&
-        FrameInfo.isSpillSlotObjectIndex(FI)))
+  if (!((TII->isStoreToStackSlotPostFE(MI, FI) &&
+         FrameInfo.isSpillSlotObjectIndex(FI)) ||
+        (TII->hasStoreToStackSlot(MI, Accesses) &&
+         llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) {
+           return FrameInfo.isSpillSlotObjectIndex(
+               cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue())
+                   ->getFrameIndex());
+         }))))
     return false;
 
   auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
@@ -599,7 +607,7 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
   LLVM_DEBUG(for (unsigned ID
                   : OpenRanges.getVarLocs()) {
     // Copy OpenRanges to OutLocs, if not already present.
-    dbgs() << "Add to OutLocs: ";
+    dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ":  ";
     VarLocIDs[ID].dump();
   });
   VarLocSet &VLS = OutLocs[CurMBB];
@@ -626,10 +634,12 @@ bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
 /// This routine joins the analysis results of all incoming edges in @MBB by
 /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
 /// source variable in all the predecessors of @MBB reside in the same location.
-bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
-                           VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
-                           SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
-  LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+bool LiveDebugValues::join(
+    MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+    const VarLocMap &VarLocIDs,
+    SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+    SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
+  LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
   bool Changed = false;
 
   VarLocSet InLocsT; // Temporary incoming locations.
@@ -641,8 +651,11 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
     // Ignore unvisited predecessor blocks.  As we are processing
     // the blocks in reverse post-order any unvisited block can
     // be considered to not remove any incoming values.
-    if (!Visited.count(p))
+    if (!Visited.count(p)) {
+      LLVM_DEBUG(dbgs() << "  ignoring unvisited pred MBB: " << p->getNumber()
+                        << "\n");
       continue;
+    }
     auto OL = OutLocs.find(p);
     // Join is null in case of empty OutLocs from any of the pred.
     if (OL == OutLocs.end())
@@ -654,14 +667,32 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
       InLocsT = OL->second;
     else
       InLocsT &= OL->second;
+
+    LLVM_DEBUG({
+      if (!InLocsT.empty()) {
+        for (auto ID : InLocsT)
+          dbgs() << "  gathered candidate incoming var: "
+                 << VarLocIDs[ID].Var.getVar()->getName() << "\n";
+      }
+    });
+
     NumVisited++;
   }
 
   // Filter out DBG_VALUES that are out of scope.
   VarLocSet KillSet;
-  for (auto ID : InLocsT)
-    if (!VarLocIDs[ID].dominates(MBB))
-      KillSet.set(ID);
+  bool IsArtificial = ArtificialBlocks.count(&MBB);
+  if (!IsArtificial) {
+    for (auto ID : InLocsT) {
+      if (!VarLocIDs[ID].dominates(MBB)) {
+        KillSet.set(ID);
+        LLVM_DEBUG({
+          auto Name = VarLocIDs[ID].Var.getVar()->getName();
+          dbgs() << "  killing " << Name << ", it doesn't dominate MBB\n";
+        });
+      }
+    }
+  }
   InLocsT.intersectWithComplement(KillSet);
 
   // As we are processing blocks in reverse post-order we
@@ -712,6 +743,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
   VarLocInMBB InLocs;       // Ranges that are incoming after joining.
   TransferMap Transfers;    // DBG_VALUEs associated with spills.
 
+  // Blocks which are artificial, i.e. blocks which exclusively contain
+  // instructions without locations, or with line 0 locations.
+  SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
   DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
   DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
   std::priority_queue<unsigned int, std::vector<unsigned int>,
@@ -733,6 +768,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
       process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
               dontTransferChanges);
 
+  auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+    if (const DebugLoc &DL = MI.getDebugLoc())
+      return DL.getLine() != 0;
+    return false;
+  };
+  for (auto &MBB : MF)
+    if (none_of(MBB.instrs(), hasNonArtificialLocation))
+      ArtificialBlocks.insert(&MBB);
+
   LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
                               "OutLocs after initialization", dbgs()));
 
@@ -758,7 +802,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
     while (!Worklist.empty()) {
       MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
       Worklist.pop();
-      MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited);
+      MBBJoined =
+          join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks);
       Visited.insert(MBB);
       if (MBBJoined) {
         MBBJoined = false;
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 3ff03ec4a7ee..d0d889782a35 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -132,14 +132,18 @@ private:
   unsigned WasIndirect : 1;
 };
 
-/// LocMap - Map of where a user value is live, and its location.
+/// Map of where a user value is live, and its location.
 using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>;
 
+/// Map of stack slot offsets for spilled locations.
+/// Non-spilled locations are not added to the map.
+using SpillOffsetMap = DenseMap<unsigned, unsigned>;
+
 namespace {
 
 class LDVImpl;
 
-/// UserValue - A user value is a part of a debug info user variable.
+/// A user value is a part of a debug info user variable.
 ///
 /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
 /// holds part of a user variable. The part is identified by a byte offset.
@@ -166,26 +170,26 @@ class UserValue {
   /// lexical scope.
   SmallSet<SlotIndex, 2> trimmedDefs;
 
-  /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+  /// Insert a DBG_VALUE into MBB at Idx for LocNo.
   void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
-                        SlotIndex StopIdx,
-                        DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS,
+                        SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
+                        unsigned SpillOffset, LiveIntervals &LIS,
                         const TargetInstrInfo &TII,
                         const TargetRegisterInfo &TRI);
 
-  /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+  /// Replace OldLocNo ranges with NewRegs ranges where NewRegs
   /// is live. Returns true if any changes were made.
   bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
                      LiveIntervals &LIS);
 
 public:
-  /// UserValue - Create a new UserValue.
+  /// Create a new UserValue.
   UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
             LocMap::Allocator &alloc)
       : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
         locInts(alloc) {}
 
-  /// getLeader - Get the leader of this value's equivalence class.
+  /// Get the leader of this value's equivalence class.
   UserValue *getLeader() {
     UserValue *l = leader;
     while (l != l->leader)
@@ -193,10 +197,10 @@ public:
     return leader = l;
   }
 
-  /// getNext - Return the next UserValue in the equivalence class.
+  /// Return the next UserValue in the equivalence class.
   UserValue *getNext() const { return next; }
 
-  /// match - Does this UserValue match the parameters?
+  /// Does this UserValue match the parameters?
   bool match(const DILocalVariable *Var, const DIExpression *Expr,
              const DILocation *IA) const {
     // FIXME: The fragment should be part of the equivalence class, but not
@@ -204,7 +208,7 @@ public:
     return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
   }
 
-  /// merge - Merge equivalence classes.
+  /// Merge equivalence classes.
   static UserValue *merge(UserValue *L1, UserValue *L2) {
     L2 = L2->getLeader();
     if (!L1)
@@ -256,10 +260,10 @@ public:
     return locations.size() - 1;
   }
 
-  /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+  /// Ensure that all virtual register locations are mapped.
   void mapVirtRegs(LDVImpl *LDV);
 
-  /// addDef - Add a definition point to this value.
+  /// Add a definition point to this value.
   void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
     DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
     // Add a singular (Idx,Idx) -> Loc mapping.
@@ -271,63 +275,71 @@ public:
       I.setValue(Loc);
   }
 
-  /// extendDef - Extend the current definition as far as possible down.
+  /// Extend the current definition as far as possible down.
+  ///
   /// Stop when meeting an existing def or when leaving the live
-  /// range of VNI.
-  /// End points where VNI is no longer live are added to Kills.
-  /// @param Idx   Starting point for the definition.
-  /// @param Loc   Location number to propagate.
-  /// @param LR    Restrict liveness to where LR has the value VNI. May be null.
-  /// @param VNI   When LR is not null, this is the value to restrict to.
-  /// @param Kills Append end points of VNI's live range to Kills.
-  /// @param LIS   Live intervals analysis.
+  /// range of VNI. End points where VNI is no longer live are added to Kills.
+  ///
+  /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+  /// data-flow analysis to propagate them beyond basic block boundaries.
+  ///
+  /// \param Idx Starting point for the definition.
+  /// \param Loc Location number to propagate.
+  /// \param LR Restrict liveness to where LR has the value VNI. May be null.
+  /// \param VNI When LR is not null, this is the value to restrict to.
+  /// \param [out] Kills Append end points of VNI's live range to Kills.
+  /// \param LIS Live intervals analysis.
   void extendDef(SlotIndex Idx, DbgValueLocation Loc,
                  LiveRange *LR, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
                  LiveIntervals &LIS);
 
-  /// addDefsFromCopies - The value in LI/LocNo may be copies to other
-  /// registers. Determine if any of the copies are available at the kill
-  /// points, and add defs if possible.
-  /// @param LI      Scan for copies of the value in LI->reg.
-  /// @param LocNo   Location number of LI->reg.
-  /// @param WasIndirect Indicates if the original use of LI->reg was indirect
-  /// @param Kills   Points where the range of LocNo could be extended.
-  /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+  /// The value in LI/LocNo may be copies to other registers. Determine if
+  /// any of the copies are available at the kill points, and add defs if
+  /// possible.
+  ///
+  /// \param LI Scan for copies of the value in LI->reg.
+  /// \param LocNo Location number of LI->reg.
+  /// \param WasIndirect Indicates if the original use of LI->reg was indirect
+  /// \param Kills Points where the range of LocNo could be extended.
+  /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
   void addDefsFromCopies(
       LiveInterval *LI, unsigned LocNo, bool WasIndirect,
       const SmallVectorImpl<SlotIndex> &Kills,
       SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
       MachineRegisterInfo &MRI, LiveIntervals &LIS);
 
-  /// computeIntervals - Compute the live intervals of all locations after
-  /// collecting all their def points.
+  /// Compute the live intervals of all locations after collecting all their
+  /// def points.
   void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                         LiveIntervals &LIS, LexicalScopes &LS);
 
-  /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+  /// Replace OldReg ranges with NewRegs ranges where NewRegs is
   /// live. Returns true if any changes were made.
   bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
                      LiveIntervals &LIS);
 
-  /// rewriteLocations - Rewrite virtual register locations according to the
-  /// provided virtual register map. Record which locations were spilled.
-  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
-                        BitVector &SpilledLocations);
+  /// Rewrite virtual register locations according to the provided virtual
+  /// register map. Record the stack slot offsets for the locations that
+  /// were spilled.
+  void rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+                        const TargetInstrInfo &TII,
+                        const TargetRegisterInfo &TRI,
+                        SpillOffsetMap &SpillOffsets);
 
-  /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+  /// Recreate DBG_VALUE instruction from data structures.
   void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
                        const TargetInstrInfo &TII,
                        const TargetRegisterInfo &TRI,
-                       const BitVector &SpilledLocations);
+                       const SpillOffsetMap &SpillOffsets);
 
-  /// getDebugLoc - Return DebugLoc of this UserValue.
+  /// Return DebugLoc of this UserValue.
   DebugLoc getDebugLoc() { return dl;}
 
   void print(raw_ostream &, const TargetRegisterInfo *);
 };
 
-/// LDVImpl - Implementation of the LiveDebugVariables pass.
+/// Implementation of the LiveDebugVariables pass.
 class LDVImpl {
   LiveDebugVariables &pass;
   LocMap::Allocator allocator;
@@ -341,7 +353,7 @@ class LDVImpl {
   /// Whether the machine function is modified during the pass.
   bool ModifiedMF = false;
 
-  /// userValues - All allocated UserValue instances.
+  /// All allocated UserValue instances.
   SmallVector<std::unique_ptr<UserValue>, 8> userValues;
 
   /// Map virtual register to eq class leader.
@@ -352,27 +364,31 @@ class LDVImpl {
   using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
   UVMap userVarMap;
 
-  /// getUserValue - Find or create a UserValue.
+  /// Find or create a UserValue.
   UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
                           const DebugLoc &DL);
 
-  /// lookupVirtReg - Find the EC leader for VirtReg or null.
+  /// Find the EC leader for VirtReg or null.
   UserValue *lookupVirtReg(unsigned VirtReg);
 
-  /// handleDebugValue - Add DBG_VALUE instruction to our maps.
-  /// @param MI  DBG_VALUE instruction
-  /// @param Idx Last valid SLotIndex before instruction.
-  /// @return    True if the DBG_VALUE instruction should be deleted.
+  /// Add DBG_VALUE instruction to our maps.
+  ///
+  /// \param MI DBG_VALUE instruction
+  /// \param Idx Last valid SLotIndex before instruction.
+  ///
+  /// \returns True if the DBG_VALUE instruction should be deleted.
   bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
 
-  /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
-  /// a UserValue def for each instruction.
-  /// @param mf MachineFunction to be scanned.
-  /// @return True if any debug values were found.
+  /// Collect and erase all DBG_VALUE instructions, adding a UserValue def
+  /// for each instruction.
+  ///
+  /// \param mf MachineFunction to be scanned.
+  ///
+  /// \returns True if any debug values were found.
   bool collectDebugValues(MachineFunction &mf);
 
-  /// computeIntervals - Compute the live intervals of all user values after
-  /// collecting all their def points.
+  /// Compute the live intervals of all user values after collecting all
+  /// their def points.
   void computeIntervals();
 
 public:
@@ -380,7 +396,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &mf);
 
-  /// clear - Release all memory.
+  /// Release all memory.
   void clear() {
     MF = nullptr;
     userValues.clear();
@@ -393,13 +409,13 @@ public:
     ModifiedMF = false;
   }
 
-  /// mapVirtReg - Map virtual register to an equivalence class.
+  /// Map virtual register to an equivalence class.
   void mapVirtReg(unsigned VirtReg, UserValue *EC);
 
-  /// splitRegister -  Replace all references to OldReg with NewRegs.
+  /// Replace all references to OldReg with NewRegs.
   void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
 
-  /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+  /// Recreate DBG_VALUE instruction from data structures.
   void emitDebugValues(VirtRegMap *VRM);
 
   void print(raw_ostream&);
@@ -578,30 +594,33 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
     MachineBasicBlock *MBB = &*MFI;
     for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
          MBBI != MBBE;) {
-      if (!MBBI->isDebugValue()) {
+      // Use the first debug instruction in the sequence to get a SlotIndex
+      // for following consecutive debug instructions.
+      if (!MBBI->isDebugInstr()) {
         ++MBBI;
         continue;
       }
-      // DBG_VALUE has no slot index, use the previous instruction instead.
+      // Debug instructions has no slot index. Use the previous
+      // non-debug instruction's SlotIndex as its SlotIndex.
       SlotIndex Idx =
           MBBI == MBB->begin()
               ? LIS->getMBBStartIdx(MBB)
               : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
-      // Handle consecutive DBG_VALUE instructions with the same slot index.
+      // Handle consecutive debug instructions with the same slot index.
       do {
-        if (handleDebugValue(*MBBI, Idx)) {
+        // Only handle DBG_VALUE in handleDebugValue(). Skip all other
+        // kinds of debug instructions.
+        if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) {
           MBBI = MBB->erase(MBBI);
           Changed = true;
         } else
           ++MBBI;
-      } while (MBBI != MBBE && MBBI->isDebugValue());
+      } while (MBBI != MBBE && MBBI->isDebugInstr());
     }
   }
   return Changed;
 }
 
-/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
-/// data-flow analysis to propagate them beyond basic block boundaries.
 void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
                           const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS) {
@@ -752,7 +771,15 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
       }
       SmallVector<SlotIndex, 16> Kills;
       extendDef(Idx, Loc, LI, VNI, &Kills, LIS);
-      if (LI)
+      // FIXME: Handle sub-registers in addDefsFromCopies. The problem is that
+      // if the original location for example is %vreg0:sub_hi, and we find a
+      // full register copy in addDefsFromCopies (at the moment it only handles
+      // full register copies), then we must add the sub1 sub-register index to
+      // the new location. However, that is only possible if the new virtual
+      // register is of the same regclass (or if there is an equivalent
+      // sub-register in that regclass). For now, simply skip handling copies if
+      // a sub-register is involved.
+      if (LI && !LocMO.getSubReg())
         addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
                           LIS);
       continue;
@@ -1039,8 +1066,10 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
     static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
 }
 
-void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
-                                 BitVector &SpilledLocations) {
+void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
+                                 const TargetInstrInfo &TII,
+                                 const TargetRegisterInfo &TRI,
+                                 SpillOffsetMap &SpillOffsets) {
   // Build a set of new locations with new numbers so we can coalesce our
   // IntervalMap if two vreg intervals collapse to the same physical location.
   // Use MapVector instead of SetVector because MapVector::insert returns the
@@ -1049,10 +1078,11 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
   // FIXME: This will be problematic if we ever support direct and indirect
   // frame index locations, i.e. expressing both variables in memory and
   // 'int x, *px = &x'. The "spilled" bit must become part of the location.
-  MapVector<MachineOperand, bool> NewLocations;
+  MapVector<MachineOperand, std::pair<bool, unsigned>> NewLocations;
   SmallVector<unsigned, 4> LocNoMap(locations.size());
   for (unsigned I = 0, E = locations.size(); I != E; ++I) {
     bool Spilled = false;
+    unsigned SpillOffset = 0;
     MachineOperand Loc = locations[I];
     // Only virtual registers are rewritten.
     if (Loc.isReg() && Loc.getReg() &&
@@ -1065,7 +1095,16 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
         // non-existent sub-register, and %noreg is exactly what we want.
         Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
       } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
-        // FIXME: Translate SubIdx to a stackslot offset.
+        // Retrieve the stack slot offset.
+        unsigned SpillSize;
+        const MachineRegisterInfo &MRI = MF.getRegInfo();
+        const TargetRegisterClass *TRC = MRI.getRegClass(VirtReg);
+        bool Success = TII.getStackSlotRange(TRC, Loc.getSubReg(), SpillSize,
+                                             SpillOffset, MF);
+
+        // FIXME: Invalidate the location if the offset couldn't be calculated.
+        (void)Success;
+
         Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
         Spilled = true;
       } else {
@@ -1076,20 +1115,22 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
 
     // Insert this location if it doesn't already exist and record a mapping
     // from the old number to the new number.
-    auto InsertResult = NewLocations.insert({Loc, Spilled});
+    auto InsertResult = NewLocations.insert({Loc, {Spilled, SpillOffset}});
     unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first);
     LocNoMap[I] = NewLocNo;
   }
 
-  // Rewrite the locations and record which ones were spill slots.
+  // Rewrite the locations and record the stack slot offsets for spills.
   locations.clear();
-  SpilledLocations.clear();
-  SpilledLocations.resize(NewLocations.size());
+  SpillOffsets.clear();
   for (auto &Pair : NewLocations) {
+    bool Spilled;
+    unsigned SpillOffset;
+    std::tie(Spilled, SpillOffset) = Pair.second;
     locations.push_back(Pair.first);
-    if (Pair.second) {
+    if (Spilled) {
       unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair);
-      SpilledLocations.set(NewLocNo);
+      SpillOffsets[NewLocNo] = SpillOffset;
     }
   }
 
@@ -1158,10 +1199,9 @@ findNextInsertLocation(MachineBasicBlock *MBB,
 }
 
 void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
-                                 SlotIndex StopIdx,
-                                 DbgValueLocation Loc, bool Spilled,
-                                 LiveIntervals &LIS,
-                                 const TargetInstrInfo &TII,
+                                 SlotIndex StopIdx, DbgValueLocation Loc,
+                                 bool Spilled, unsigned SpillOffset,
+                                 LiveIntervals &LIS, const TargetInstrInfo &TII,
                                  const TargetRegisterInfo &TRI) {
   SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
   // Only search within the current MBB.
@@ -1184,12 +1224,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
 
   // If the location was spilled, the new DBG_VALUE will be indirect. If the
   // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
-  // that the original virtual register was a pointer.
+  // that the original virtual register was a pointer. Also, add the stack slot
+  // offset for the spilled register to the expression.
   const DIExpression *Expr = Expression;
   bool IsIndirect = Loc.wasIndirect();
   if (Spilled) {
-    if (IsIndirect)
-      Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+    auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref;
+    Expr =
+        DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref);
     IsIndirect = true;
   }
 
@@ -1208,14 +1250,17 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
 void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
                                 const TargetInstrInfo &TII,
                                 const TargetRegisterInfo &TRI,
-                                const BitVector &SpilledLocations) {
+                                const SpillOffsetMap &SpillOffsets) {
   MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
 
   for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
     SlotIndex Start = I.start();
     SlotIndex Stop = I.stop();
     DbgValueLocation Loc = I.value();
-    bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false;
+    auto SpillIt =
+        !Loc.isUndef() ? SpillOffsets.find(Loc.locNo()) : SpillOffsets.end();
+    bool Spilled = SpillIt != SpillOffsets.end();
+    unsigned SpillOffset = Spilled ? SpillIt->second : 0;
 
     // If the interval start was trimmed to the lexical scope insert the
     // DBG_VALUE at the previous index (otherwise it appears after the
@@ -1228,7 +1273,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
     SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
 
     LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
-    insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
+    insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
+                     TRI);
     // This interval may span multiple basic blocks.
     // Insert a DBG_VALUE into each one.
     while (Stop > MBBEnd) {
@@ -1238,7 +1284,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
         break;
       MBBEnd = LIS.getMBBEndIdx(&*MBB);
       LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
-      insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
+      insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, SpillOffset, LIS, TII,
+                       TRI);
     }
     LLVM_DEBUG(dbgs() << '\n');
     if (MBB == MFEnd)
@@ -1253,11 +1300,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
   if (!MF)
     return;
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-  BitVector SpilledLocations;
+  SpillOffsetMap SpillOffsets;
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
     LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
-    userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);
-    userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);
+    userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+    userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
   }
   EmitDone = true;
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index aa35880b063a..0060399c2b04 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -39,13 +39,6 @@ public:
   LiveDebugVariables();
   ~LiveDebugVariables() override;
 
-  /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
-  /// @param OldReg Old virtual register that is going away.
-  /// @param NewReg New register holding the user variables.
-  /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
-  ///               register.
-  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
-
   /// splitRegister - Move any user variables in OldReg to the live ranges in
   /// NewRegs where they are live. Mark the values as unavailable where no new
   /// register is live.
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 83dd982587c6..2340b6abd87c 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -1310,17 +1310,17 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
     MachineOperand &MO = *RI;
     MachineInstr *MI = RI->getParent();
     ++RI;
-    // DBG_VALUE instructions don't have slot indexes, so get the index of the
-    // instruction before them.
-    // Normally, DBG_VALUE instructions are removed before this function is
-    // called, but it is not a requirement.
-    SlotIndex Idx;
-    if (MI->isDebugValue())
-      Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
-    else
-      Idx = LIS.getInstructionIndex(*MI);
-    LiveQueryResult LRQ = LI.Query(Idx);
-    const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+    const VNInfo *VNI;
+    if (MI->isDebugValue()) {
+      // DBG_VALUE instructions don't have slot indexes, so get the index of
+      // the instruction before them. The value is defined there too.
+      SlotIndex Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
+      VNI = LI.Query(Idx).valueOut();
+    } else {
+      SlotIndex Idx = LIS.getInstructionIndex(*MI);
+      LiveQueryResult LRQ = LI.Query(Idx);
+      VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+    }
     // In the case of an <undef> use that isn't tied to any def, VNI will be
     // NULL. If the use is tied to a def, VNI will be the defined value.
     if (!VNI)
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 86c6c8e29f9a..619643acb6d3 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
 /// The clobbers set will be the list of live registers clobbered
 /// by the regmask.
 void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
-        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) {
-  SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
+    SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> *Clobbers) {
+  RegisterSet::iterator LRI = LiveRegs.begin();
   while (LRI != LiveRegs.end()) {
     if (MO.clobbersPhysReg(*LRI)) {
       if (Clobbers)
@@ -83,7 +83,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
 /// on accurate kill flags. If possible use stepBackward() instead of this
 /// function.
 void LivePhysRegs::stepForward(const MachineInstr &MI,
-        SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
+    SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
   // Remove killed registers from the set.
   for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
     if (O->isReg() && !O->isDebug()) {
@@ -142,7 +142,7 @@ LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
 #endif
 
 bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
-                             unsigned Reg) const {
+                             MCPhysReg Reg) const {
   if (LiveRegs.count(Reg))
     return false;
   if (MRI.isReserved(Reg))
@@ -157,7 +157,7 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
 /// Add live-in registers of basic block \p MBB to \p LiveRegs.
 void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
   for (const auto &LI : MBB.liveins()) {
-    unsigned Reg = LI.PhysReg;
+    MCPhysReg Reg = LI.PhysReg;
     LaneBitmask Mask = LI.LaneMask;
     MCSubRegIndexIterator S(Reg, TRI);
     assert(Mask.any() && "Invalid livein mask");
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 04324943dfad..70e135ab1aff 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -364,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 #ifndef NDEBUG
     if (MBB->pred_empty()) {
       MBB->getParent()->verify();
-      errs() << "Use of " << printReg(PhysReg)
+      errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo())
              << " does not have a corresponding definition on every path:\n";
       const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
       if (MI != nullptr)
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index f90ce0c8cd2a..795028e97929 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -328,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
 
   // Sort the frame references by local offset.
   // Use frame index as a tie-breaker in case MI's have the same offset.
-  llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+  llvm::sort(FrameReferenceInsns);
 
   MachineBasicBlock *Entry = &Fn.front();
 
diff --git a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index fa43d13b1b85..f17c23619ed5 100644
--- a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -134,10 +134,10 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
   }
 
-  llvm::sort(StringInstrMap.begin(), StringInstrMap.end(),
-            [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
-              return (a.first < b.first);
-            });
+  llvm::sort(StringInstrMap,
+             [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
+               return (a.first < b.first);
+             });
 
   for (auto &II : StringInstrMap) {
 
@@ -677,8 +677,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
 
   std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
   std::vector<MachineInstr *> VisitedMIs;
-  std::copy(Candidates.begin(), Candidates.end(),
-            std::back_inserter(VisitedMIs));
+  llvm::copy(Candidates, std::back_inserter(VisitedMIs));
 
   std::vector<TypedVReg> VRegs;
   for (auto candidate : Candidates) {
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index da05c9a22785..265877c2f5b4 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -202,6 +202,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("contract", MIToken::kw_contract)
       .Case("afn", MIToken::kw_afn)
       .Case("reassoc", MIToken::kw_reassoc)
+      .Case("nuw" , MIToken::kw_nuw)
+      .Case("nsw" , MIToken::kw_nsw)
+      .Case("exact" , MIToken::kw_exact)
       .Case("debug-location", MIToken::kw_debug_location)
       .Case("same_value", MIToken::kw_cfi_same_value)
       .Case("offset", MIToken::kw_cfi_offset)
@@ -217,6 +220,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("undefined", MIToken::kw_cfi_undefined)
       .Case("register", MIToken::kw_cfi_register)
       .Case("window_save", MIToken::kw_cfi_window_save)
+      .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
       .Case("blockaddress", MIToken::kw_blockaddress)
       .Case("intrinsic", MIToken::kw_intrinsic)
       .Case("target-index", MIToken::kw_target_index)
@@ -245,6 +249,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("successors", MIToken::kw_successors)
       .Case("floatpred", MIToken::kw_floatpred)
       .Case("intpred", MIToken::kw_intpred)
+      .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
+      .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+      .Case("unknown-size", MIToken::kw_unknown_size)
       .Default(MIToken::Identifier);
 }
 
@@ -460,6 +467,53 @@ static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
                  ErrorCallback);
 }
 
+static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
+                               ErrorCallbackType ErrorCallback) {
+  const StringRef Rule = "<mcsymbol ";
+  if (!C.remaining().startswith(Rule))
+    return None;
+  auto Start = C;
+  C.advance(Rule.size());
+
+  // Try a simple unquoted name.
+  if (C.peek() != '"') {
+    while (isIdentifierChar(C.peek()))
+      C.advance();
+    StringRef String = Start.upto(C).drop_front(Rule.size());
+    if (C.peek() != '>') {
+      ErrorCallback(C.location(),
+                    "expected the '<mcsymbol ...' to be closed by a '>'");
+      Token.reset(MIToken::Error, Start.remaining());
+      return Start;
+    }
+    C.advance();
+
+    Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
+    return C;
+  }
+
+  // Otherwise lex out a quoted name.
+  Cursor R = lexStringConstant(C, ErrorCallback);
+  if (!R) {
+    ErrorCallback(C.location(),
+                  "unable to parse quoted string from opening quote");
+    Token.reset(MIToken::Error, Start.remaining());
+    return Start;
+  }
+  StringRef String = Start.upto(R).drop_front(Rule.size());
+  if (R.peek() != '>') {
+    ErrorCallback(R.location(),
+                  "expected the '<mcsymbol ...' to be closed by a '>'");
+    Token.reset(MIToken::Error, Start.remaining());
+    return Start;
+  }
+  R.advance();
+
+  Token.reset(MIToken::MCSymbol, Start.upto(R))
+      .setOwnedStringValue(unescapeQuotedString(String));
+  return R;
+}
+
 static bool isValidHexFloatingPointPrefix(char C) {
   return C == 'H' || C == 'K' || C == 'L' || C == 'M';
 }
@@ -523,6 +577,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
       .Case("!noalias", MIToken::md_noalias)
       .Case("!range", MIToken::md_range)
       .Case("!DIExpression", MIToken::md_diexpr)
+      .Case("!DILocation", MIToken::md_dilocation)
       .Default(MIToken::Error);
 }
 
@@ -657,6 +712,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
     return R.remaining();
   if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
     return R.remaining();
+  if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
+    return R.remaining();
   if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexNumericalLiteral(C, Token))
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index e21c71532f79..ceff79087d81 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -71,6 +71,9 @@ struct MIToken {
     kw_contract,
     kw_afn,
     kw_reassoc,
+    kw_nuw,
+    kw_nsw,
+    kw_exact,
     kw_debug_location,
     kw_cfi_same_value,
     kw_cfi_offset,
@@ -86,6 +89,7 @@ struct MIToken {
     kw_cfi_restore_state,
     kw_cfi_undefined,
     kw_cfi_window_save,
+    kw_cfi_aarch64_negate_ra_sign_state,
     kw_blockaddress,
     kw_intrinsic,
     kw_target_index,
@@ -113,6 +117,9 @@ struct MIToken {
     kw_successors,
     kw_floatpred,
     kw_intpred,
+    kw_pre_instr_symbol,
+    kw_post_instr_symbol,
+    kw_unknown_size,
 
     // Named metadata keywords
     md_tbaa,
@@ -120,6 +127,7 @@ struct MIToken {
     md_noalias,
     md_range,
     md_diexpr,
+    md_dilocation,
 
     // Identifier tokens
     Identifier,
@@ -132,6 +140,7 @@ struct MIToken {
     NamedGlobalValue,
     GlobalValue,
     ExternalSymbol,
+    MCSymbol,
 
     // Other tokens
     IntegerLiteral,
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index a61e7872f1ae..6f2d8bb53ac8 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MIRPrinter.h"
@@ -54,6 +55,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -221,8 +223,10 @@ public:
   bool parseSubRegisterIndexOperand(MachineOperand &Dest);
   bool parseJumpTableIndexOperand(MachineOperand &Dest);
   bool parseExternalSymbolOperand(MachineOperand &Dest);
+  bool parseMCSymbolOperand(MachineOperand &Dest);
   bool parseMDNode(MDNode *&Node);
   bool parseDIExpression(MDNode *&Expr);
+  bool parseDILocation(MDNode *&Expr);
   bool parseMetadataOperand(MachineOperand &Dest);
   bool parseCFIOffset(int &Offset);
   bool parseCFIRegister(unsigned &Reg);
@@ -250,6 +254,7 @@ public:
   bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
   bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
   bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
+  bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
 
 private:
   /// Convert the integer literal in the current token into an unsigned integer.
@@ -346,6 +351,9 @@ private:
   /// Return true if the name isn't a name of a target MMO flag.
   bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
 
+  /// Get or create an MCSymbol for a given name.
+  MCSymbol *getOrCreateMCSymbol(StringRef Name);
+
   /// parseStringConstant
   ///   ::= StringConstant
   bool parseStringConstant(std::string &Result);
@@ -737,12 +745,16 @@ bool MIParser::parse(MachineInstr *&MI) {
     return true;
 
   // Parse the remaining machine operands.
-  while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+  while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
+         Token.isNot(MIToken::kw_post_instr_symbol) &&
+         Token.isNot(MIToken::kw_debug_location) &&
          Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
     auto Loc = Token.location();
     Optional<unsigned> TiedDefIdx;
     if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
       return true;
+    if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
+      MO.setIsDebug();
     Operands.push_back(
         ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
     if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -753,14 +765,29 @@ bool MIParser::parse(MachineInstr *&MI) {
     lex();
   }
 
+  MCSymbol *PreInstrSymbol = nullptr;
+  if (Token.is(MIToken::kw_pre_instr_symbol))
+    if (parsePreOrPostInstrSymbol(PreInstrSymbol))
+      return true;
+  MCSymbol *PostInstrSymbol = nullptr;
+  if (Token.is(MIToken::kw_post_instr_symbol))
+    if (parsePreOrPostInstrSymbol(PostInstrSymbol))
+      return true;
+
   DebugLoc DebugLocation;
   if (Token.is(MIToken::kw_debug_location)) {
     lex();
-    if (Token.isNot(MIToken::exclaim))
-      return error("expected a metadata node after 'debug-location'");
     MDNode *Node = nullptr;
-    if (parseMDNode(Node))
-      return true;
+    if (Token.is(MIToken::exclaim)) {
+      if (parseMDNode(Node))
+        return true;
+    } else if (Token.is(MIToken::md_dilocation)) {
+      if (parseDILocation(Node))
+        return true;
+    } else
+      return error("expected a metadata node after 'debug-location'");
+    if (!isa<DILocation>(Node))
+      return error("referenced metadata is not a DILocation");
     DebugLocation = DebugLoc(Node);
   }
 
@@ -795,12 +822,12 @@ bool MIParser::parse(MachineInstr *&MI) {
     MI->addOperand(MF, Operand.Operand);
   if (assignRegisterTies(*MI, Operands))
     return true;
-  if (MemOperands.empty())
-    return false;
-  MachineInstr::mmo_iterator MemRefs =
-      MF.allocateMemRefsArray(MemOperands.size());
-  std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
-  MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
+  if (PreInstrSymbol)
+    MI->setPreInstrSymbol(MF, PreInstrSymbol);
+  if (PostInstrSymbol)
+    MI->setPostInstrSymbol(MF, PostInstrSymbol);
+  if (!MemOperands.empty())
+    MI->setMemRefs(MF, MemOperands);
   return false;
 }
 
@@ -876,6 +903,9 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
   } else if (Token.is(MIToken::md_diexpr)) {
     if (parseDIExpression(Node))
       return true;
+  } else if (Token.is(MIToken::md_dilocation)) {
+    if (parseDILocation(Node))
+      return true;
   } else
     return error("expected a metadata node");
   if (Token.isNot(MIToken::Eof))
@@ -945,7 +975,10 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
          Token.is(MIToken::kw_arcp) ||
          Token.is(MIToken::kw_contract) ||
          Token.is(MIToken::kw_afn) ||
-         Token.is(MIToken::kw_reassoc)) {
+         Token.is(MIToken::kw_reassoc) ||
+         Token.is(MIToken::kw_nuw) ||
+         Token.is(MIToken::kw_nsw) ||
+         Token.is(MIToken::kw_exact)) {
     // Mine frame and fast math flags
     if (Token.is(MIToken::kw_frame_setup))
       Flags |= MachineInstr::FrameSetup;
@@ -965,6 +998,12 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
       Flags |= MachineInstr::FmAfn;
     if (Token.is(MIToken::kw_reassoc))
       Flags |= MachineInstr::FmReassoc;
+    if (Token.is(MIToken::kw_nuw))
+      Flags |= MachineInstr::NoUWrap;
+    if (Token.is(MIToken::kw_nsw))
+      Flags |= MachineInstr::NoSWrap;
+    if (Token.is(MIToken::kw_exact))
+      Flags |= MachineInstr::IsExact;
 
     lex();
   }
@@ -1573,6 +1612,16 @@ bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
   return false;
 }
 
+bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::MCSymbol));
+  MCSymbol *Symbol = getOrCreateMCSymbol(Token.stringValue());
+  lex();
+  Dest = MachineOperand::CreateMCSymbol(Symbol);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
 bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
   assert(Token.is(MIToken::SubRegisterIndex));
   StringRef Name = Token.stringValue();
@@ -1643,6 +1692,109 @@ bool MIParser::parseDIExpression(MDNode *&Expr) {
   return false;
 }
 
+bool MIParser::parseDILocation(MDNode *&Loc) {
+  assert(Token.is(MIToken::md_dilocation));
+  lex();
+
+  bool HaveLine = false;
+  unsigned Line = 0;
+  unsigned Column = 0;
+  MDNode *Scope = nullptr;
+  MDNode *InlinedAt = nullptr;
+  bool ImplicitCode = false;
+
+  if (expectAndConsume(MIToken::lparen))
+    return true;
+
+  if (Token.isNot(MIToken::rparen)) {
+    do {
+      if (Token.is(MIToken::Identifier)) {
+        if (Token.stringValue() == "line") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (Token.isNot(MIToken::IntegerLiteral) ||
+              Token.integerValue().isSigned())
+            return error("expected unsigned integer");
+          Line = Token.integerValue().getZExtValue();
+          HaveLine = true;
+          lex();
+          continue;
+        }
+        if (Token.stringValue() == "column") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (Token.isNot(MIToken::IntegerLiteral) ||
+              Token.integerValue().isSigned())
+            return error("expected unsigned integer");
+          Column = Token.integerValue().getZExtValue();
+          lex();
+          continue;
+        }
+        if (Token.stringValue() == "scope") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (parseMDNode(Scope))
+            return error("expected metadata node");
+          if (!isa<DIScope>(Scope))
+            return error("expected DIScope node");
+          continue;
+        }
+        if (Token.stringValue() == "inlinedAt") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (Token.is(MIToken::exclaim)) {
+            if (parseMDNode(InlinedAt))
+              return true;
+          } else if (Token.is(MIToken::md_dilocation)) {
+            if (parseDILocation(InlinedAt))
+              return true;
+          } else
+            return error("expected metadata node");
+          if (!isa<DILocation>(InlinedAt))
+            return error("expected DILocation node");
+          continue;
+        }
+        if (Token.stringValue() == "isImplicitCode") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (!Token.is(MIToken::Identifier))
+            return error("expected true/false");
+          // As far as I can see, we don't have any existing need for parsing
+          // true/false in MIR yet. Do it ad-hoc until there's something else
+          // that needs it.
+          if (Token.stringValue() == "true")
+            ImplicitCode = true;
+          else if (Token.stringValue() == "false")
+            ImplicitCode = false;
+          else
+            return error("expected true/false");
+          lex();
+          continue;
+        }
+      }
+      return error(Twine("invalid DILocation argument '") +
+                   Token.stringValue() + "'");
+    } while (consumeIfPresent(MIToken::comma));
+  }
+
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+
+  if (!HaveLine)
+    return error("DILocation requires line number");
+  if (!Scope)
+    return error("DILocation requires a scope");
+
+  Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope,
+                        InlinedAt, ImplicitCode);
+  return false;
+}
+
 bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
   MDNode *Node = nullptr;
   if (Token.is(MIToken::exclaim)) {
@@ -1779,6 +1931,9 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   case MIToken::kw_cfi_window_save:
     CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
     break;
+  case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+    break;
   case MIToken::kw_cfi_escape: {
     std::string Values;
     if (parseCFIEscapeValues(Values))
@@ -2050,6 +2205,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
     return parseJumpTableIndexOperand(Dest);
   case MIToken::ExternalSymbol:
     return parseExternalSymbolOperand(Dest);
+  case MIToken::MCSymbol:
+    return parseMCSymbolOperand(Dest);
   case MIToken::SubRegisterIndex:
     return parseSubRegisterIndexOperand(Dest);
   case MIToken::md_diexpr:
@@ -2069,6 +2226,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
   case MIToken::kw_cfi_restore_state:
   case MIToken::kw_cfi_undefined:
   case MIToken::kw_cfi_window_save:
+  case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
     return parseCFIOperand(Dest);
   case MIToken::kw_blockaddress:
     return parseBlockAddressOperand(Dest);
@@ -2423,7 +2581,7 @@ bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
     return false;
   }
 
-  return error("expected an atomic scope, ordering or a size integer literal");
+  return error("expected an atomic scope, ordering or a size specification");
 }
 
 bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
@@ -2462,11 +2620,17 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
   if (parseOptionalAtomicOrdering(FailureOrder))
     return true;
 
-  if (Token.isNot(MIToken::IntegerLiteral))
-    return error("expected the size integer literal after memory operation");
+  if (Token.isNot(MIToken::IntegerLiteral) &&
+      Token.isNot(MIToken::kw_unknown_size))
+    return error("expected the size integer literal or 'unknown-size' after "
+                 "memory operation");
   uint64_t Size;
-  if (getUint64(Size))
-    return true;
+  if (Token.is(MIToken::IntegerLiteral)) {
+    if (getUint64(Size))
+      return true;
+  } else if (Token.is(MIToken::kw_unknown_size)) {
+    Size = MemoryLocation::UnknownSize;
+  }
   lex();
 
   MachinePointerInfo Ptr = MachinePointerInfo();
@@ -2483,7 +2647,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
     if (parseMachinePointerInfo(Ptr))
       return true;
   }
-  unsigned BaseAlignment = Size;
+  unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1);
   AAMDNodes AAInfo;
   MDNode *Range = nullptr;
   while (consumeIfPresent(MIToken::comma)) {
@@ -2529,6 +2693,24 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
   return false;
 }
 
+bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
+  assert((Token.is(MIToken::kw_pre_instr_symbol) ||
+          Token.is(MIToken::kw_post_instr_symbol)) &&
+         "Invalid token for a pre- post-instruction symbol!");
+  lex();
+  if (Token.isNot(MIToken::MCSymbol))
+    return error("expected a symbol after 'pre-instr-symbol'");
+  Symbol = getOrCreateMCSymbol(Token.stringValue());
+  lex();
+  if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+      Token.is(MIToken::lbrace))
+    return false;
+  if (Token.isNot(MIToken::comma))
+    return error("expected ',' before the next machine operand");
+  lex();
+  return false;
+}
+
 void MIParser::initNames2InstrOpCodes() {
   if (!Names2InstrOpCodes.empty())
     return;
@@ -2759,6 +2941,15 @@ bool MIParser::getMMOTargetFlag(StringRef Name,
   return false;
 }
 
+MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
+  // FIXME: Currently we can't recognize temporary or local symbols and call all
+  // of the appropriate forms to create them. However, this handles basic cases
+  // well as most of the special aspects are recognized by a prefix on their
+  // name, and the input names should already be unique. For test cases, keeping
+  // the symbol name out of the symbol table isn't terribly important.
+  return MF.getContext().getOrCreateSymbol(Name);
+}
+
 bool MIParser::parseStringConstant(std::string &Result) {
   if (Token.isNot(MIToken::StringConstant))
     return error("expected string constant");
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 3d2db97acb48..00da92a92ec6 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -355,6 +355,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
   if (YamlMF.Alignment)
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+  MF.setHasWinCFI(YamlMF.HasWinCFI);
 
   if (YamlMF.Legalized)
     MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
@@ -580,6 +581,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
   MFI.setHasCalls(YamlMFI.HasCalls);
   if (YamlMFI.MaxCallFrameSize != ~0u)
     MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+  MFI.setCVBytesOfCalleeSavedRegisters(YamlMFI.CVBytesOfCalleeSavedRegisters);
   MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
   MFI.setHasVAStart(YamlMFI.HasVAStart);
   MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index bf8cd1489ec5..d9dcc428943f 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -50,6 +50,7 @@
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/AtomicOrdering.h"
@@ -195,6 +196,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
   YamlMF.Name = MF.getName();
   YamlMF.Alignment = MF.getAlignment();
   YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+  YamlMF.HasWinCFI = MF.hasWinCFI();
 
   YamlMF.Legalized = MF.getProperties().hasProperty(
       MachineFunctionProperties::Property::Legalized);
@@ -327,6 +329,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
   YamlMFI.HasCalls = MFI.hasCalls();
   YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
     ? MFI.getMaxCallFrameSize() : ~0u;
+  YamlMFI.CVBytesOfCalleeSavedRegisters =
+      MFI.getCVBytesOfCalleeSavedRegisters();
   YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
   YamlMFI.HasVAStart = MFI.hasVAStart();
   YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
@@ -397,18 +401,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
   for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
     yaml::StringValue Reg;
     printRegMIR(CSInfo.getReg(), Reg, TRI);
-    auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
-    assert(StackObjectInfo != StackObjectOperandMapping.end() &&
-           "Invalid stack object index");
-    const FrameIndexOperand &StackObject = StackObjectInfo->second;
-    if (StackObject.IsFixed) {
-      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
-      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
-        CSInfo.isRestored();
-    } else {
-      YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
-      YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
-        CSInfo.isRestored();
+    if (!CSInfo.isSpilledToReg()) {
+      auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+      assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+             "Invalid stack object index");
+      const FrameIndexOperand &StackObject = StackObjectInfo->second;
+      if (StackObject.IsFixed) {
+        YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+        YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
+          CSInfo.isRestored();
+      } else {
+        YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+        YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
+          CSInfo.isRestored();
+      }
     }
   }
   for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
@@ -694,6 +700,12 @@ void MIPrinter::print(const MachineInstr &MI) {
     OS << "afn ";
   if (MI.getFlag(MachineInstr::FmReassoc))
     OS << "reassoc ";
+  if (MI.getFlag(MachineInstr::NoUWrap))
+    OS << "nuw ";
+  if (MI.getFlag(MachineInstr::NoSWrap))
+    OS << "nsw ";
+  if (MI.getFlag(MachineInstr::IsExact))
+    OS << "exact ";
 
   OS << TII->getName(MI.getOpcode());
   if (I < E)
@@ -708,6 +720,23 @@ void MIPrinter::print(const MachineInstr &MI) {
     NeedComma = true;
   }
 
+  // Print any optional symbols attached to this instruction as-if they were
+  // operands.
+  if (MCSymbol *PreInstrSymbol = MI.getPreInstrSymbol()) {
+    if (NeedComma)
+      OS << ',';
+    OS << " pre-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PreInstrSymbol);
+    NeedComma = true;
+  }
+  if (MCSymbol *PostInstrSymbol = MI.getPostInstrSymbol()) {
+    if (NeedComma)
+      OS << ',';
+    OS << " post-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PostInstrSymbol);
+    NeedComma = true;
+  }
+
   if (const DebugLoc &DL = MI.getDebugLoc()) {
     if (NeedComma)
       OS << ',';
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 38e8369dc739..03771bc5dae1 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -110,6 +110,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   // use/def lists.
   MachineFunction *MF = Parent->getParent();
   N->AddRegOperandsToUseLists(MF->getRegInfo());
+  MF->handleInsertion(*N);
 }
 
 /// When we remove an instruction from a basic block list, we update its parent
@@ -118,8 +119,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   assert(N->getParent() && "machine instruction not in a basic block");
 
   // Remove from the use/def lists.
-  if (MachineFunction *MF = N->getMF())
+  if (MachineFunction *MF = N->getMF()) {
+    MF->handleRemoval(*N);
     N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+  }
 
   N->setParent(nullptr);
 }
@@ -359,7 +362,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
       // Print human readable probabilities as comments.
       OS << "; ";
       for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
-        const BranchProbability &BP = *getProbabilityIterator(I);
+        const BranchProbability &BP = getSuccProbability(I);
         if (I != succ_begin())
           OS << ", ";
         OS << printMBBReference(**I) << '('
@@ -458,7 +461,7 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
 }
 
 void MachineBasicBlock::sortUniqueLiveIns() {
-  llvm::sort(LiveIns.begin(), LiveIns.end(),
+  llvm::sort(LiveIns,
              [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
                return LI0.PhysReg < LI1.PhysReg;
              });
@@ -1375,13 +1378,53 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
                                            unsigned Neighborhood) const {
   unsigned N = Neighborhood;
 
-  // Start by searching backwards from Before, looking for kills, reads or defs.
+  // Try searching forwards from Before, looking for reads or defs.
   const_iterator I(Before);
+  for (; I != end() && N > 0; ++I) {
+    if (I->isDebugInstr())
+      continue;
+
+    --N;
+
+    MachineOperandIteratorBase::PhysRegInfo Info =
+        ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+
+    // Register is live when we read it here.
+    if (Info.Read)
+      return LQR_Live;
+    // Register is dead if we can fully overwrite or clobber it here.
+    if (Info.FullyDefined || Info.Clobbered)
+      return LQR_Dead;
+  }
+
+  // If we reached the end, it is safe to clobber Reg at the end of a block of
+  // no successor has it live in.
+  if (I == end()) {
+    for (MachineBasicBlock *S : successors()) {
+      for (const MachineBasicBlock::RegisterMaskPair &LI : S->liveins()) {
+        if (TRI->regsOverlap(LI.PhysReg, Reg))
+          return LQR_Live;
+      }
+    }
+
+    return LQR_Dead;
+  }
+
+
+  N = Neighborhood;
+
+  // Start by searching backwards from Before, looking for kills, reads or defs.
+  I = const_iterator(Before);
   // If this is the first insn in the block, don't search backwards.
   if (I != begin()) {
     do {
       --I;
 
+      if (I->isDebugInstr())
+        continue;
+
+      --N;
+
       MachineOperandIteratorBase::PhysRegInfo Info =
           ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
 
@@ -1406,39 +1449,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
       // Register must be live if we read it.
       if (Info.Read)
         return LQR_Live;
-    } while (I != begin() && --N > 0);
+
+    } while (I != begin() && N > 0);
   }
 
   // Did we get to the start of the block?
   if (I == begin()) {
     // If so, the register's state is definitely defined by the live-in state.
-    for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
-         ++RAI)
-      if (isLiveIn(*RAI))
+    for (const MachineBasicBlock::RegisterMaskPair &LI : liveins())
+      if (TRI->regsOverlap(LI.PhysReg, Reg))
         return LQR_Live;
 
     return LQR_Dead;
   }
 
-  N = Neighborhood;
-
-  // Try searching forwards from Before, looking for reads or defs.
-  I = const_iterator(Before);
-  // If this is the last insn in the block, don't search forwards.
-  if (I != end()) {
-    for (++I; I != end() && N > 0; ++I, --N) {
-      MachineOperandIteratorBase::PhysRegInfo Info =
-          ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
-
-      // Register is live when we read it here.
-      if (Info.Read)
-        return LQR_Live;
-      // Register is dead if we can fully overwrite or clobber it here.
-      if (Info.FullyDefined || Info.Clobbered)
-        return LQR_Dead;
-    }
-  }
-
   // At this point we have no idea of the liveness of the register.
   return LQR_Unknown;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 21350df624e7..4fee9c4ea027 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -316,7 +316,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// A type for a block filter set.
   using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
 
-  /// Pair struct containing basic block and taildup profitiability
+  /// Pair struct containing basic block and taildup profitability
   struct BlockAndTailDupResult {
     MachineBasicBlock *BB;
     bool ShouldTailDup;
@@ -2497,7 +2497,8 @@ void MachineBlockPlacement::alignBlocks() {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F->getFunction().optForSize())
+  if (F->getFunction().optForMinSize() ||
+      (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
     return;
   BlockChain &FunctionChain = *BlockToChain[&F->front()];
   if (FunctionChain.begin() == FunctionChain.end())
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 6c92b1d426d6..6ee8571c28aa 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -180,6 +180,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
       continue;
     LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
     LLVM_DEBUG(dbgs() << "***     to: " << *MI);
+
+    // Update matching debug values.
+    DefMI->changeDebugValuesDefReg(SrcReg);
+
     // Propagate SrcReg of copies to MI.
     MO.setReg(SrcReg);
     MRI->clearKillFlags(SrcReg);
@@ -231,6 +235,21 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
   return false;
 }
 
+static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
+                                            const MachineFunction &MF,
+                                            const TargetRegisterInfo &TRI) {
+  // MachineRegisterInfo::isConstantPhysReg directly called by
+  // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
+  // reserved registers to be frozen. That doesn't cause a problem  post-ISel as
+  // most (if not all) targets freeze reserved registers right after ISel.
+  //
+  // It does cause issues mid-GlobalISel, however, hence the additional
+  // reservedRegsFrozen check.
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  return TRI.isCallerPreservedPhysReg(Reg, MF) ||
+         (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
+}
+
 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
 /// physical registers (except for dead defs of physical registers). It also
 /// returns the physical register def by reference if it's the only one and the
@@ -250,7 +269,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     // Reading either caller preserved or constant physregs is ok.
-    if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
+    if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         PhysRefs.insert(*AI);
   }
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 0c6efff7bb40..f51b482e20e3 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -231,6 +231,8 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
     // Get the first instruction that uses MO
     MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
     RI++;
+    if (RI == MRI->reg_end())
+      continue;
     MachineInstr *UseMO = RI->getParent();
     unsigned LatencyOp = 0;
     if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 3bf8147a06c3..19879fe89007 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -74,58 +74,154 @@ DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
 
 namespace {
 
-using RegList = SmallVector<unsigned, 4>;
-using SourceMap = DenseMap<unsigned, RegList>;
-using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
-
-  class MachineCopyPropagation : public MachineFunctionPass {
-    const TargetRegisterInfo *TRI;
-    const TargetInstrInfo *TII;
-    const MachineRegisterInfo *MRI;
-
-  public:
-    static char ID; // Pass identification, replacement for typeid
+class CopyTracker {
+  struct CopyInfo {
+    MachineInstr *MI;
+    SmallVector<unsigned, 4> DefRegs;
+    bool Avail;
+  };
 
-    MachineCopyPropagation() : MachineFunctionPass(ID) {
-      initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+  DenseMap<unsigned, CopyInfo> Copies;
+
+public:
+  /// Mark all of the given registers and their subregisters as unavailable for
+  /// copying.
+  void markRegsUnavailable(ArrayRef<unsigned> Regs,
+                           const TargetRegisterInfo &TRI) {
+    for (unsigned Reg : Regs) {
+      // Source of copy is no longer available for propagation.
+      for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+        auto CI = Copies.find(*RUI);
+        if (CI != Copies.end())
+          CI->second.Avail = false;
+      }
     }
+  }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
-      MachineFunctionPass::getAnalysisUsage(AU);
+  /// Clobber a single register, removing it from the tracker's copy maps.
+  void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+    for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+      auto I = Copies.find(*RUI);
+      if (I != Copies.end()) {
+        // When we clobber the source of a copy, we need to clobber everything
+        // it defined.
+        markRegsUnavailable(I->second.DefRegs, TRI);
+        // When we clobber the destination of a copy, we need to clobber the
+        // whole register it defined.
+        if (MachineInstr *MI = I->second.MI)
+          markRegsUnavailable({MI->getOperand(0).getReg()}, TRI);
+        // Now we can erase the copy.
+        Copies.erase(I);
+      }
     }
+  }
+
+  /// Add this copy's registers into the tracker's copy maps.
+  void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
+    assert(MI->isCopy() && "Tracking non-copy?");
+
+    unsigned Def = MI->getOperand(0).getReg();
+    unsigned Src = MI->getOperand(1).getReg();
 
-    bool runOnMachineFunction(MachineFunction &MF) override;
+    // Remember Def is defined by the copy.
+    for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
+      Copies[*RUI] = {MI, {}, true};
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::NoVRegs);
+    // Remember source that's copied to Def. Once it's clobbered, then
+    // it's no longer available for copy propagation.
+    for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) {
+      auto I = Copies.insert({*RUI, {nullptr, {}, false}});
+      auto &Copy = I.first->second;
+      if (!is_contained(Copy.DefRegs, Def))
+        Copy.DefRegs.push_back(Def);
     }
+  }
+
+  bool hasAnyCopies() {
+    return !Copies.empty();
+  }
 
-  private:
-    void ClobberRegister(unsigned Reg);
-    void ReadRegister(unsigned Reg);
-    void CopyPropagateBlock(MachineBasicBlock &MBB);
-    bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
-    void forwardUses(MachineInstr &MI);
-    bool isForwardableRegClassCopy(const MachineInstr &Copy,
-                                   const MachineInstr &UseI, unsigned UseIdx);
-    bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+  MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI,
+                         bool MustBeAvailable = false) {
+    auto CI = Copies.find(RegUnit);
+    if (CI == Copies.end())
+      return nullptr;
+    if (MustBeAvailable && !CI->second.Avail)
+      return nullptr;
+    return CI->second.MI;
+  }
 
-    /// Candidates for deletion.
-    SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+  MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
+                              const TargetRegisterInfo &TRI) {
+    // We check the first RegUnit here, since we'll only be interested in the
+    // copy if it copies the entire register anyway.
+    MCRegUnitIterator RUI(Reg, &TRI);
+    MachineInstr *AvailCopy =
+        findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
+    if (!AvailCopy ||
+        !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg))
+      return nullptr;
+
+    // Check that the available copy isn't clobbered by any regmasks between
+    // itself and the destination.
+    unsigned AvailSrc = AvailCopy->getOperand(1).getReg();
+    unsigned AvailDef = AvailCopy->getOperand(0).getReg();
+    for (const MachineInstr &MI :
+         make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
+      for (const MachineOperand &MO : MI.operands())
+        if (MO.isRegMask())
+          if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+            return nullptr;
+
+    return AvailCopy;
+  }
 
-    /// Def -> available copies map.
-    Reg2MIMap AvailCopyMap;
+  void clear() {
+    Copies.clear();
+  }
+};
 
-    /// Def -> copies map.
-    Reg2MIMap CopyMap;
+class MachineCopyPropagation : public MachineFunctionPass {
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+  const MachineRegisterInfo *MRI;
 
-    /// Src -> Def map
-    SourceMap SrcMap;
+public:
+  static char ID; // Pass identification, replacement for typeid
 
-    bool Changed;
-  };
+  MachineCopyPropagation() : MachineFunctionPass(ID) {
+    initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+private:
+  void ClobberRegister(unsigned Reg);
+  void ReadRegister(unsigned Reg);
+  void CopyPropagateBlock(MachineBasicBlock &MBB);
+  bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+  void forwardUses(MachineInstr &MI);
+  bool isForwardableRegClassCopy(const MachineInstr &Copy,
+                                 const MachineInstr &UseI, unsigned UseIdx);
+  bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+
+  /// Candidates for deletion.
+  SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
+
+  CopyTracker Tracker;
+
+  bool Changed;
+};
 
 } // end anonymous namespace
 
@@ -136,54 +232,13 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
 INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
                 "Machine Copy Propagation Pass", false, false)
 
-/// Remove any entry in \p Map where the register is a subregister or equal to
-/// a register contained in \p Regs.
-static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs,
-                              const TargetRegisterInfo &TRI) {
-  for (unsigned Reg : Regs) {
-    // Source of copy is no longer available for propagation.
-    for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR)
-      Map.erase(*SR);
-  }
-}
-
-/// Remove any entry in \p Map that is marked clobbered in \p RegMask.
-/// The map will typically have a lot fewer entries than the regmask clobbers,
-/// so this is more efficient than iterating the clobbered registers and calling
-/// ClobberRegister() on them.
-static void removeClobberedRegsFromMap(Reg2MIMap &Map,
-                                       const MachineOperand &RegMask) {
-  for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E;
-       I = Next) {
-    Next = std::next(I);
-    unsigned Reg = I->first;
-    if (RegMask.clobbersPhysReg(Reg))
-      Map.erase(I);
-  }
-}
-
-void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-    CopyMap.erase(*AI);
-    AvailCopyMap.erase(*AI);
-
-    SourceMap::iterator SI = SrcMap.find(*AI);
-    if (SI != SrcMap.end()) {
-      removeRegsFromMap(AvailCopyMap, SI->second, *TRI);
-      SrcMap.erase(SI);
-    }
-  }
-}
-
 void MachineCopyPropagation::ReadRegister(unsigned Reg) {
   // If 'Reg' is defined by a copy, the copy is no longer a candidate
   // for elimination.
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-    Reg2MIMap::iterator CI = CopyMap.find(*AI);
-    if (CI != CopyMap.end()) {
-      LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: ";
-                 CI->second->dump());
-      MaybeDeadCopies.remove(CI->second);
+  for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+    if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
+      LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
+      MaybeDeadCopies.remove(Copy);
     }
   }
 }
@@ -219,15 +274,14 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
     return false;
 
   // Search for an existing copy.
-  Reg2MIMap::iterator CI = AvailCopyMap.find(Def);
-  if (CI == AvailCopyMap.end())
+  MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI);
+  if (!PrevCopy)
     return false;
 
   // Check that the existing copy uses the correct sub registers.
-  MachineInstr &PrevCopy = *CI->second;
-  if (PrevCopy.getOperand(0).isDead())
+  if (PrevCopy->getOperand(0).isDead())
     return false;
-  if (!isNopCopy(PrevCopy, Src, Def, TRI))
+  if (!isNopCopy(*PrevCopy, Src, Def, TRI))
     return false;
 
   LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
@@ -238,7 +292,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
   unsigned CopyDef = Copy.getOperand(0).getReg();
   assert(CopyDef == Src || CopyDef == Def);
   for (MachineInstr &MI :
-       make_range(PrevCopy.getIterator(), Copy.getIterator()))
+       make_range(PrevCopy->getIterator(), Copy.getIterator()))
     MI.clearRegisterKills(CopyDef, TRI);
 
   Copy.eraseFromParent();
@@ -314,7 +368,7 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
 /// Look for available copies whose destination register is used by \p MI and
 /// replace the use in \p MI with the copy's source register.
 void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
-  if (AvailCopyMap.empty())
+  if (!Tracker.hasAnyCopies())
     return;
 
   // Look for non-tied explicit vreg uses that have an active COPY
@@ -341,13 +395,12 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
     if (!MOUse.isRenamable())
       continue;
 
-    auto CI = AvailCopyMap.find(MOUse.getReg());
-    if (CI == AvailCopyMap.end())
+    MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI);
+    if (!Copy)
       continue;
 
-    MachineInstr &Copy = *CI->second;
-    unsigned CopyDstReg = Copy.getOperand(0).getReg();
-    const MachineOperand &CopySrc = Copy.getOperand(1);
+    unsigned CopyDstReg = Copy->getOperand(0).getReg();
+    const MachineOperand &CopySrc = Copy->getOperand(1);
     unsigned CopySrcReg = CopySrc.getReg();
 
     // FIXME: Don't handle partial uses of wider COPYs yet.
@@ -362,7 +415,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
     if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
       continue;
 
-    if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
+    if (!isForwardableRegClassCopy(*Copy, MI, OpIdx))
       continue;
 
     if (hasImplicitOverlap(MI, MOUse))
@@ -376,7 +429,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
 
     LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
                       << "\n     with " << printReg(CopySrcReg, TRI)
-                      << "\n     in " << MI << "     from " << Copy);
+                      << "\n     in " << MI << "     from " << *Copy);
 
     MOUse.setReg(CopySrcReg);
     if (!CopySrc.isRenamable())
@@ -386,7 +439,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
 
     // Clear kill markers that may have been invalidated.
     for (MachineInstr &KMI :
-         make_range(Copy.getIterator(), std::next(MI.getIterator())))
+         make_range(Copy->getIterator(), std::next(MI.getIterator())))
       KMI.clearRegisterKills(CopySrcReg, TRI);
 
     ++NumCopyForwards;
@@ -459,28 +512,17 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       // %xmm2 = copy %xmm0
       // ...
       // %xmm2 = copy %xmm9
-      ClobberRegister(Def);
+      Tracker.clobberRegister(Def, *TRI);
       for (const MachineOperand &MO : MI->implicit_operands()) {
         if (!MO.isReg() || !MO.isDef())
           continue;
         unsigned Reg = MO.getReg();
         if (!Reg)
           continue;
-        ClobberRegister(Reg);
+        Tracker.clobberRegister(Reg, *TRI);
       }
 
-      // Remember Def is defined by the copy.
-      for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
-           ++SR) {
-        CopyMap[*SR] = MI;
-        AvailCopyMap[*SR] = MI;
-      }
-
-      // Remember source that's copied to Def. Once it's clobbered, then
-      // it's no longer available for copy propagation.
-      RegList &DestList = SrcMap[Src];
-      if (!is_contained(DestList, Def))
-          DestList.push_back(Def);
+      Tracker.trackCopy(MI, *TRI);
 
       continue;
     }
@@ -494,7 +536,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         // later.
         if (MO.isTied())
           ReadRegister(Reg);
-        ClobberRegister(Reg);
+        Tracker.clobberRegister(Reg, *TRI);
       }
 
     forwardUses(*MI);
@@ -541,6 +583,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
                    MaybeDead->dump());
 
+        // Make sure we invalidate any entries in the copy maps before erasing
+        // the instruction.
+        Tracker.clobberRegister(Reg, *TRI);
+
         // erase() will return the next valid iterator pointing to the next
         // element after the erased one.
         DI = MaybeDeadCopies.erase(DI);
@@ -548,22 +594,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         Changed = true;
         ++NumDeletes;
       }
-
-      removeClobberedRegsFromMap(AvailCopyMap, *RegMask);
-      removeClobberedRegsFromMap(CopyMap, *RegMask);
-      for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next;
-           I != E; I = Next) {
-        Next = std::next(I);
-        if (RegMask->clobbersPhysReg(I->first)) {
-          removeRegsFromMap(AvailCopyMap, I->second, *TRI);
-          SrcMap.erase(I);
-        }
-      }
     }
 
     // Any previous copy definition or reading the Defs is no longer available.
     for (unsigned Reg : Defs)
-      ClobberRegister(Reg);
+      Tracker.clobberRegister(Reg, *TRI);
   }
 
   // If MBB doesn't have successors, delete the copies whose defs are not used.
@@ -574,6 +609,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
                  MaybeDead->dump());
       assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+
+      // Update matching debug values.
+      assert(MaybeDead->isCopy());
+      MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg());
+
       MaybeDead->eraseFromParent();
       Changed = true;
       ++NumDeletes;
@@ -581,9 +621,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   }
 
   MaybeDeadCopies.clear();
-  AvailCopyMap.clear();
-  CopyMap.clear();
-  SrcMap.clear();
+  Tracker.clear();
 }
 
 bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index dd668bcf6193..3495319670a5 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -99,6 +99,9 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
   llvm_unreachable("Invalid machine function property");
 }
 
+// Pin the vtable to this file.
+void MachineFunction::Delegate::anchor() {}
+
 void MachineFunctionProperties::print(raw_ostream &OS) const {
   const char *Separator = "";
   for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
@@ -127,7 +130,8 @@ static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
   return STI->getFrameLowering()->getStackAlignment();
 }
 
-MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
+MachineFunction::MachineFunction(const Function &F,
+                                 const LLVMTargetMachine &Target,
                                  const TargetSubtargetInfo &STI,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
     : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
@@ -135,6 +139,16 @@ MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
   init();
 }
 
+void MachineFunction::handleInsertion(MachineInstr &MI) {
+  if (TheDelegate)
+    TheDelegate->MF_HandleInsertion(MI);
+}
+
+void MachineFunction::handleRemoval(MachineInstr &MI) {
+  if (TheDelegate)
+    TheDelegate->MF_HandleRemoval(MI);
+}
+
 void MachineFunction::init() {
   // Assume the function starts in SSA form with correct liveness.
   Properties.set(MachineFunctionProperties::Property::IsSSA);
@@ -233,6 +247,11 @@ void MachineFunction::clear() {
     WinEHInfo->~WinEHFuncInfo();
     Allocator.Deallocate(WinEHInfo);
   }
+
+  if (WasmEHInfo) {
+    WasmEHInfo->~WasmEHFuncInfo();
+    Allocator.Deallocate(WasmEHInfo);
+  }
 }
 
 const DataLayout &MachineFunction::getDataLayout() const {
@@ -406,82 +425,17 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                MMO->getOrdering(), MMO->getFailureOrdering());
 }
 
-MachineInstr::mmo_iterator
-MachineFunction::allocateMemRefsArray(unsigned long Num) {
-  return Allocator.Allocate<MachineMemOperand *>(Num);
-}
-
-std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
-MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
-                                    MachineInstr::mmo_iterator End) {
-  // Count the number of load mem refs.
-  unsigned Num = 0;
-  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
-    if ((*I)->isLoad())
-      ++Num;
-
-  // Allocate a new array and populate it with the load information.
-  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
-  unsigned Index = 0;
-  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
-    if ((*I)->isLoad()) {
-      if (!(*I)->isStore())
-        // Reuse the MMO.
-        Result[Index] = *I;
-      else {
-        // Clone the MMO and unset the store flag.
-        MachineMemOperand *JustLoad =
-          getMachineMemOperand((*I)->getPointerInfo(),
-                               (*I)->getFlags() & ~MachineMemOperand::MOStore,
-                               (*I)->getSize(), (*I)->getBaseAlignment(),
-                               (*I)->getAAInfo(), nullptr,
-                               (*I)->getSyncScopeID(), (*I)->getOrdering(),
-                               (*I)->getFailureOrdering());
-        Result[Index] = JustLoad;
-      }
-      ++Index;
-    }
-  }
-  return std::make_pair(Result, Result + Num);
-}
-
-std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
-MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
-                                     MachineInstr::mmo_iterator End) {
-  // Count the number of load mem refs.
-  unsigned Num = 0;
-  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
-    if ((*I)->isStore())
-      ++Num;
-
-  // Allocate a new array and populate it with the store information.
-  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
-  unsigned Index = 0;
-  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
-    if ((*I)->isStore()) {
-      if (!(*I)->isLoad())
-        // Reuse the MMO.
-        Result[Index] = *I;
-      else {
-        // Clone the MMO and unset the load flag.
-        MachineMemOperand *JustStore =
-          getMachineMemOperand((*I)->getPointerInfo(),
-                               (*I)->getFlags() & ~MachineMemOperand::MOLoad,
-                               (*I)->getSize(), (*I)->getBaseAlignment(),
-                               (*I)->getAAInfo(), nullptr,
-                               (*I)->getSyncScopeID(), (*I)->getOrdering(),
-                               (*I)->getFailureOrdering());
-        Result[Index] = JustStore;
-      }
-      ++Index;
-    }
-  }
-  return std::make_pair(Result, Result + Num);
+MachineInstr::ExtraInfo *
+MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
+                                   MCSymbol *PreInstrSymbol,
+                                   MCSymbol *PostInstrSymbol) {
+  return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
+                                         PostInstrSymbol);
 }
 
 const char *MachineFunction::createExternalSymbolName(StringRef Name) {
   char *Dest = Allocator.Allocate<char>(Name.size() + 1);
-  std::copy(Name.begin(), Name.end(), Dest);
+  llvm::copy(Name, Dest);
   Dest[Name.size()] = 0;
   return Dest;
 }
@@ -678,6 +632,46 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
   MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.LandingPadLabel = LandingPadLabel;
+
+  const Instruction *FirstI = LandingPad->getBasicBlock()->getFirstNonPHI();
+  if (const auto *LPI = dyn_cast<LandingPadInst>(FirstI)) {
+    if (const auto *PF =
+            dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()))
+      getMMI().addPersonality(PF);
+
+    if (LPI->isCleanup())
+      addCleanup(LandingPad);
+
+    // FIXME: New EH - Add the clauses in reverse order. This isn't 100%
+    //        correct, but we need to do it this way because of how the DWARF EH
+    //        emitter processes the clauses.
+    for (unsigned I = LPI->getNumClauses(); I != 0; --I) {
+      Value *Val = LPI->getClause(I - 1);
+      if (LPI->isCatch(I - 1)) {
+        addCatchTypeInfo(LandingPad,
+                         dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+      } else {
+        // Add filters in a list.
+        auto *CVal = cast<Constant>(Val);
+        SmallVector<const GlobalValue *, 4> FilterList;
+        for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
+             II != IE; ++II)
+          FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+        addFilterTypeInfo(LandingPad, FilterList);
+      }
+    }
+
+  } else if (const auto *CPI = dyn_cast<CatchPadInst>(FirstI)) {
+    for (unsigned I = CPI->getNumArgOperands(); I != 0; --I) {
+      Value *TypeInfo = CPI->getArgOperand(I - 1)->stripPointerCasts();
+      addCatchTypeInfo(LandingPad, dyn_cast<GlobalValue>(TypeInfo));
+    }
+
+  } else {
+    assert(isa<CleanupPadInst>(FirstI) && "Invalid landingpad!");
+  }
+
   return LandingPadLabel;
 }
 
@@ -697,7 +691,8 @@ void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
   LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
 }
 
-void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+void MachineFunction::tidyLandingPads(DenseMap<MCSymbol *, uintptr_t> *LPMap,
+                                      bool TidyIfNoBeginLabels) {
   for (unsigned i = 0; i != LandingPads.size(); ) {
     LandingPadInfo &LandingPad = LandingPads[i];
     if (LandingPad.LandingPadLabel &&
@@ -712,24 +707,25 @@ void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
       continue;
     }
 
-    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
-      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
-      MCSymbol *EndLabel = LandingPad.EndLabels[j];
-      if ((BeginLabel->isDefined() ||
-           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
-          (EndLabel->isDefined() ||
-           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
-      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
-      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
-      --j;
-      --e;
-    }
+    if (TidyIfNoBeginLabels) {
+      for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+        MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+        MCSymbol *EndLabel = LandingPad.EndLabels[j];
+        if ((BeginLabel->isDefined() || (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+            (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0)))
+          continue;
+
+        LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+        LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+        --j;
+        --e;
+      }
 
-    // Remove landing pads with no try-ranges.
-    if (LandingPads[i].BeginLabels.empty()) {
-      LandingPads.erase(LandingPads.begin() + i);
-      continue;
+      // Remove landing pads with no try-ranges.
+      if (LandingPads[i].BeginLabels.empty()) {
+        LandingPads.erase(LandingPads.begin() + i);
+        continue;
+      }
     }
 
     // If there is no landing pad, ensure that the list of typeids is empty.
@@ -806,36 +802,6 @@ try_next:;
   return FilterID;
 }
 
-void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
-  MachineFunction &MF = *MBB.getParent();
-  if (const auto *PF = dyn_cast<Function>(
-          I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
-    MF.getMMI().addPersonality(PF);
-
-  if (I.isCleanup())
-    MF.addCleanup(&MBB);
-
-  // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
-  //        but we need to do it this way because of how the DWARF EH emitter
-  //        processes the clauses.
-  for (unsigned i = I.getNumClauses(); i != 0; --i) {
-    Value *Val = I.getClause(i - 1);
-    if (I.isCatch(i - 1)) {
-      MF.addCatchTypeInfo(&MBB,
-                          dyn_cast<GlobalValue>(Val->stripPointerCasts()));
-    } else {
-      // Add filters in a list.
-      Constant *CVal = cast<Constant>(Val);
-      SmallVector<const GlobalValue *, 4> FilterList;
-      for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
-           II != IE; ++II)
-        FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
-
-      MF.addFilterTypeInfo(&MBB, FilterList);
-    }
-  }
-}
-
 /// \}
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 67ac95740e3e..5db4e299fa70 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -23,11 +23,13 @@
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 
 using namespace llvm;
+using namespace ore;
 
 Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
                                              const std::string &Banner) const {
@@ -57,9 +59,43 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
     llvm_unreachable("MachineFunctionProperties check failed");
   }
 #endif
+  // Collect the MI count of the function before the pass.
+  unsigned CountBefore, CountAfter;
+
+  // Check if the user asked for size remarks.
+  bool ShouldEmitSizeRemarks =
+      F.getParent()->shouldEmitInstrCountChangedRemark();
+
+  // If we want size remarks, collect the number of MachineInstrs in our
+  // MachineFunction before the pass runs.
+  if (ShouldEmitSizeRemarks)
+    CountBefore = MF.getInstructionCount();
 
   bool RV = runOnMachineFunction(MF);
 
+  if (ShouldEmitSizeRemarks) {
+    // We wanted size remarks. Check if there was a change to the number of
+    // MachineInstrs in the module. Emit a remark if there was a change.
+    CountAfter = MF.getInstructionCount();
+    if (CountBefore != CountAfter) {
+      MachineOptimizationRemarkEmitter MORE(MF, nullptr);
+      MORE.emit([&]() {
+        int64_t Delta = static_cast<int64_t>(CountAfter) -
+                        static_cast<int64_t>(CountBefore);
+        MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+                                            MF.getFunction().getSubprogram(),
+                                            &MF.front());
+        R << NV("Pass", getPassName())
+          << ": Function: " << NV("Function", F.getName()) << ": "
+          << "MI Instruction count changed from "
+          << NV("MIInstrsBefore", CountBefore) << " to "
+          << NV("MIInstrsAfter", CountAfter)
+          << "; Delta: " << NV("Delta", Delta);
+        return R;
+      });
+    }
+  }
+
   MFProps.set(SetProperties);
   MFProps.reset(ClearedProperties);
   return RV;
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 55d9defced3a..9c96ba748778 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -38,6 +39,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
+    AU.addUsedIfAvailable<SlotIndexes>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 96fcfdb72ad7..764a84c7e132 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -52,6 +52,7 @@
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -131,8 +132,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-    : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
-      debugLoc(MI.getDebugLoc()) {
+    : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -315,71 +315,201 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
   --NumOperands;
 }
 
-/// addMemOperand - Add a MachineMemOperand to the machine instruction.
-/// This function should be used only occasionally. The setMemRefs function
-/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::dropMemRefs(MachineFunction &MF) {
+  if (memoperands_empty())
+    return;
+
+  // See if we can just drop all of our extra info.
+  if (!getPreInstrSymbol() && !getPostInstrSymbol()) {
+    Info.clear();
+    return;
+  }
+  if (!getPostInstrSymbol()) {
+    Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+    return;
+  }
+  if (!getPreInstrSymbol()) {
+    Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+    return;
+  }
+
+  // Otherwise allocate a fresh extra info with just these symbols.
+  Info.set<EIIK_OutOfLine>(
+      MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol()));
+}
+
+void MachineInstr::setMemRefs(MachineFunction &MF,
+                              ArrayRef<MachineMemOperand *> MMOs) {
+  if (MMOs.empty()) {
+    dropMemRefs(MF);
+    return;
+  }
+
+  // Try to store a single MMO inline.
+  if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) {
+    Info.set<EIIK_MMO>(MMOs[0]);
+    return;
+  }
+
+  // Otherwise create an extra info struct with all of our info.
+  Info.set<EIIK_OutOfLine>(
+      MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol()));
+}
+
 void MachineInstr::addMemOperand(MachineFunction &MF,
                                  MachineMemOperand *MO) {
-  mmo_iterator OldMemRefs = MemRefs;
-  unsigned OldNumMemRefs = NumMemRefs;
+  SmallVector<MachineMemOperand *, 2> MMOs;
+  MMOs.append(memoperands_begin(), memoperands_end());
+  MMOs.push_back(MO);
+  setMemRefs(MF, MMOs);
+}
 
-  unsigned NewNum = NumMemRefs + 1;
-  mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
+  if (this == &MI)
+    // Nothing to do for a self-clone!
+    return;
 
-  std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
-  NewMemRefs[NewNum - 1] = MO;
-  setMemRefs(NewMemRefs, NewMemRefs + NewNum);
+  assert(&MF == MI.getMF() &&
+         "Invalid machine functions when cloning memory refrences!");
+  // See if we can just steal the extra info already allocated for the
+  // instruction. We can do this whenever the pre- and post-instruction symbols
+  // are the same (including null).
+  if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
+      getPostInstrSymbol() == MI.getPostInstrSymbol()) {
+    Info = MI.Info;
+    return;
+  }
+
+  // Otherwise, fall back on a copy-based clone.
+  setMemRefs(MF, MI.memoperands());
 }
 
 /// Check to see if the MMOs pointed to by the two MemRefs arrays are
 /// identical.
-static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
-  auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
-  auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
-  if ((E1 - I1) != (E2 - I2))
+static bool hasIdenticalMMOs(ArrayRef<MachineMemOperand *> LHS,
+                             ArrayRef<MachineMemOperand *> RHS) {
+  if (LHS.size() != RHS.size())
     return false;
-  for (; I1 != E1; ++I1, ++I2) {
-    if (**I1 != **I2)
-      return false;
+
+  auto LHSPointees = make_pointee_range(LHS);
+  auto RHSPointees = make_pointee_range(RHS);
+  return std::equal(LHSPointees.begin(), LHSPointees.end(),
+                    RHSPointees.begin());
+}
+
+void MachineInstr::cloneMergedMemRefs(MachineFunction &MF,
+                                      ArrayRef<const MachineInstr *> MIs) {
+  // Try handling easy numbers of MIs with simpler mechanisms.
+  if (MIs.empty()) {
+    dropMemRefs(MF);
+    return;
   }
-  return true;
+  if (MIs.size() == 1) {
+    cloneMemRefs(MF, *MIs[0]);
+    return;
+  }
+  // Because an empty memoperands list provides *no* information and must be
+  // handled conservatively (assuming the instruction can do anything), the only
+  // way to merge with it is to drop all other memoperands.
+  if (MIs[0]->memoperands_empty()) {
+    dropMemRefs(MF);
+    return;
+  }
+
+  // Handle the general case.
+  SmallVector<MachineMemOperand *, 2> MergedMMOs;
+  // Start with the first instruction.
+  assert(&MF == MIs[0]->getMF() &&
+         "Invalid machine functions when cloning memory references!");
+  MergedMMOs.append(MIs[0]->memoperands_begin(), MIs[0]->memoperands_end());
+  // Now walk all the other instructions and accumulate any different MMOs.
+  for (const MachineInstr &MI : make_pointee_range(MIs.slice(1))) {
+    assert(&MF == MI.getMF() &&
+           "Invalid machine functions when cloning memory references!");
+
+    // Skip MIs with identical operands to the first. This is a somewhat
+    // arbitrary hack but will catch common cases without being quadratic.
+    // TODO: We could fully implement merge semantics here if needed.
+    if (hasIdenticalMMOs(MIs[0]->memoperands(), MI.memoperands()))
+      continue;
+
+    // Because an empty memoperands list provides *no* information and must be
+    // handled conservatively (assuming the instruction can do anything), the
+    // only way to merge with it is to drop all other memoperands.
+    if (MI.memoperands_empty()) {
+      dropMemRefs(MF);
+      return;
+    }
+
+    // Otherwise accumulate these into our temporary buffer of the merged state.
+    MergedMMOs.append(MI.memoperands_begin(), MI.memoperands_end());
+  }
+
+  setMemRefs(MF, MergedMMOs);
 }
 
-std::pair<MachineInstr::mmo_iterator, unsigned>
-MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
+void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+  MCSymbol *OldSymbol = getPreInstrSymbol();
+  if (OldSymbol == Symbol)
+    return;
+  if (OldSymbol && !Symbol) {
+    // We're removing a symbol rather than adding one. Try to clean up any
+    // extra info carried around.
+    if (Info.is<EIIK_PreInstrSymbol>()) {
+      Info.clear();
+      return;
+    }
 
-  // If either of the incoming memrefs are empty, we must be conservative and
-  // treat this as if we've exhausted our space for memrefs and dropped them.
-  if (memoperands_empty() || Other.memoperands_empty())
-    return std::make_pair(nullptr, 0);
+    if (memoperands_empty()) {
+      assert(getPostInstrSymbol() &&
+             "Should never have only a single symbol allocated out-of-line!");
+      Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+      return;
+    }
 
-  // If both instructions have identical memrefs, we don't need to merge them.
-  // Since many instructions have a single memref, and we tend to merge things
-  // like pairs of loads from the same location, this catches a large number of
-  // cases in practice.
-  if (hasIdenticalMMOs(*this, Other))
-    return std::make_pair(MemRefs, NumMemRefs);
+    // Otherwise fallback on the generic update.
+  } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) {
+    // If we don't have any other extra info, we can store this inline.
+    Info.set<EIIK_PreInstrSymbol>(Symbol);
+    return;
+  }
 
-  // TODO: consider uniquing elements within the operand lists to reduce
-  // space usage and fall back to conservative information less often.
-  size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+  // Otherwise, allocate a full new set of extra info.
+  // FIXME: Maybe we should make the symbols in the extra info mutable?
+  Info.set<EIIK_OutOfLine>(
+      MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol()));
+}
 
-  // If we don't have enough room to store this many memrefs, be conservative
-  // and drop them.  Otherwise, we'd fail asserts when trying to add them to
-  // the new instruction.
-  if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
-    return std::make_pair(nullptr, 0);
+void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
+  MCSymbol *OldSymbol = getPostInstrSymbol();
+  if (OldSymbol == Symbol)
+    return;
+  if (OldSymbol && !Symbol) {
+    // We're removing a symbol rather than adding one. Try to clean up any
+    // extra info carried around.
+    if (Info.is<EIIK_PostInstrSymbol>()) {
+      Info.clear();
+      return;
+    }
+
+    if (memoperands_empty()) {
+      assert(getPreInstrSymbol() &&
+             "Should never have only a single symbol allocated out-of-line!");
+      Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+      return;
+    }
 
-  MachineFunction *MF = getMF();
-  mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
-  mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
-                                  MemBegin);
-  MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
-                     MemEnd);
-  assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
-         "missing memrefs");
+    // Otherwise fallback on the generic update.
+  } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) {
+    // If we don't have any other extra info, we can store this inline.
+    Info.set<EIIK_PostInstrSymbol>(Symbol);
+    return;
+  }
 
-  return std::make_pair(MemBegin, CombinedNumMemRefs);
+  // Otherwise, allocate a full new set of extra info.
+  // FIXME: Maybe we should make the symbols in the extra info mutable?
+  Info.set<EIIK_OutOfLine>(
+      MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
 }
 
 uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -388,7 +518,42 @@ uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
   return getFlags() | Other.getFlags();
 }
 
-bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+void MachineInstr::copyIRFlags(const Instruction &I) {
+  // Copy the wrapping flags.
+  if (const OverflowingBinaryOperator *OB =
+          dyn_cast<OverflowingBinaryOperator>(&I)) {
+    if (OB->hasNoSignedWrap())
+      setFlag(MachineInstr::MIFlag::NoSWrap);
+    if (OB->hasNoUnsignedWrap())
+      setFlag(MachineInstr::MIFlag::NoUWrap);
+  }
+
+  // Copy the exact flag.
+  if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
+    if (PE->isExact())
+      setFlag(MachineInstr::MIFlag::IsExact);
+
+  // Copy the fast-math flags.
+  if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
+    const FastMathFlags Flags = FP->getFastMathFlags();
+    if (Flags.noNaNs())
+      setFlag(MachineInstr::MIFlag::FmNoNans);
+    if (Flags.noInfs())
+      setFlag(MachineInstr::MIFlag::FmNoInfs);
+    if (Flags.noSignedZeros())
+      setFlag(MachineInstr::MIFlag::FmNsz);
+    if (Flags.allowReciprocal())
+      setFlag(MachineInstr::MIFlag::FmArcp);
+    if (Flags.allowContract())
+      setFlag(MachineInstr::MIFlag::FmContract);
+    if (Flags.approxFunc())
+      setFlag(MachineInstr::MIFlag::FmAfn);
+    if (Flags.allowReassoc())
+      setFlag(MachineInstr::MIFlag::FmReassoc);
+  }
+}
+
+bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
   assert(!isBundledWithPred() && "Must be called on bundle header");
   for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
     if (MII->getDesc().getFlags() & Mask) {
@@ -768,9 +933,7 @@ int MachineInstr::findRegisterUseOperandIdx(
     unsigned MOReg = MO.getReg();
     if (!MOReg)
       continue;
-    if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) &&
-                         TargetRegisterInfo::isPhysicalRegister(Reg) &&
-                         TRI->isSubRegister(MOReg, Reg)))
+    if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg)))
       if (!isKill || MO.isKill())
         return i;
   }
@@ -1050,10 +1213,13 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
 
   int64_t OffsetA = MMOa->getOffset();
   int64_t OffsetB = MMOb->getOffset();
-
   int64_t MinOffset = std::min(OffsetA, OffsetB);
-  int64_t WidthA = MMOa->getSize();
-  int64_t WidthB = MMOb->getSize();
+
+  uint64_t WidthA = MMOa->getSize();
+  uint64_t WidthB = MMOb->getSize();
+  bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
+  bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+
   const Value *ValA = MMOa->getValue();
   const Value *ValB = MMOb->getValue();
   bool SameVal = (ValA && ValB && (ValA == ValB));
@@ -1069,6 +1235,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
   }
 
   if (SameVal) {
+    if (!KnownWidthA || !KnownWidthB)
+      return true;
     int64_t MaxOffset = std::max(OffsetA, OffsetB);
     int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
     return (MinOffset + LowWidth > MaxOffset);
@@ -1083,13 +1251,15 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
   assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
   assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
 
-  int64_t Overlapa = WidthA + OffsetA - MinOffset;
-  int64_t Overlapb = WidthB + OffsetB - MinOffset;
+  int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
+                                 : MemoryLocation::UnknownSize;
+  int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
+                                 : MemoryLocation::UnknownSize;
 
   AliasResult AAResult = AA->alias(
-      MemoryLocation(ValA, Overlapa,
+      MemoryLocation(ValA, OverlapA,
                      UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
-      MemoryLocation(ValB, Overlapb,
+      MemoryLocation(ValB, OverlapB,
                      UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
 
   return (AAResult != NoAlias);
@@ -1294,7 +1464,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
 
   SmallBitVector PrintedTypes(8);
-  bool ShouldPrintRegisterTies = hasComplexRegisterTies();
+  bool ShouldPrintRegisterTies = IsStandalone || hasComplexRegisterTies();
   auto getTiedOperandIdx = [&](unsigned OpIdx) {
     if (!ShouldPrintRegisterTies)
       return 0U;
@@ -1343,6 +1513,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << "afn ";
   if (getFlag(MachineInstr::FmReassoc))
     OS << "reassoc ";
+  if (getFlag(MachineInstr::NoUWrap))
+    OS << "nuw ";
+  if (getFlag(MachineInstr::NoSWrap))
+    OS << "nsw ";
+  if (getFlag(MachineInstr::IsExact))
+    OS << "exact ";
 
   // Print the opcode name.
   if (TII)
@@ -1486,6 +1662,25 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     }
   }
 
+  // Print any optional symbols attached to this instruction as-if they were
+  // operands.
+  if (MCSymbol *PreInstrSymbol = getPreInstrSymbol()) {
+    if (!FirstOp) {
+      FirstOp = false;
+      OS << ',';
+    }
+    OS << " pre-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PreInstrSymbol);
+  }
+  if (MCSymbol *PostInstrSymbol = getPostInstrSymbol()) {
+    if (!FirstOp) {
+      FirstOp = false;
+      OS << ',';
+    }
+    OS << " post-instr-symbol ";
+    MachineOperand::printSymbol(OS, *PostInstrSymbol);
+  }
+
   if (!SkipDebugLoc) {
     if (const DebugLoc &DL = getDebugLoc()) {
       if (!FirstOp)
@@ -1605,7 +1800,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
   // Trim unneeded kill operands.
   while (!DeadOps.empty()) {
     unsigned OpIdx = DeadOps.back();
-    if (getOperand(OpIdx).isImplicit())
+    if (getOperand(OpIdx).isImplicit() &&
+        (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
       RemoveOperand(OpIdx);
     else
       getOperand(OpIdx).setIsKill(false);
@@ -1669,7 +1865,8 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
   // Trim unneeded dead operands.
   while (!DeadOps.empty()) {
     unsigned OpIdx = DeadOps.back();
-    if (getOperand(OpIdx).isImplicit())
+    if (getOperand(OpIdx).isImplicit() &&
+        (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
       RemoveOperand(OpIdx);
     else
       getOperand(OpIdx).setIsDead(false);
@@ -1876,3 +2073,30 @@ void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
   Orig.getOperand(1).ChangeToImmediate(0U);
   Orig.getOperand(3).setMetadata(Expr);
 }
+
+void MachineInstr::collectDebugValues(
+                                SmallVectorImpl<MachineInstr *> &DbgValues) {
+  MachineInstr &MI = *this;
+  if (!MI.getOperand(0).isReg())
+    return;
+
+  MachineBasicBlock::iterator DI = MI; ++DI;
+  for (MachineBasicBlock::iterator DE = MI.getParent()->end();
+       DI != DE; ++DI) {
+    if (!DI->isDebugValue())
+      return;
+    if (DI->getOperand(0).isReg() &&
+        DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
+      DbgValues.push_back(&*DI);
+  }
+}
+
+void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
+  // Collect matching debug values.
+  SmallVector<MachineInstr *, 2> DbgValues;
+  collectDebugValues(DbgValues);
+
+  // Propagate Reg to debug value instructions.
+  for (auto *DBI : DbgValues)
+    DBI->getOperand(0).setReg(Reg);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index ed16a2b6084c..ae378cc8c464 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -105,6 +105,16 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
   return llvm::finalizeBundles(MF);
 }
 
+/// Return the first found DebugLoc that has a DILocation, given a range of
+/// instructions. The search range is from FirstMI to LastMI (exclusive). If no
+/// DILocation is found, then an empty location is returned.
+static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
+                            MachineBasicBlock::instr_iterator LastMI) {
+  for (auto MII = FirstMI; MII != LastMI; ++MII)
+    if (MII->getDebugLoc().get())
+      return MII->getDebugLoc();
+  return DebugLoc();
+}
 
 /// finalizeBundle - Finalize a machine instruction bundle which includes
 /// a sequence of instructions starting from FirstMI to LastMI (exclusive).
@@ -123,7 +133,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
 
   MachineInstrBuilder MIB =
-      BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
+      BuildMI(MF, getDebugLoc(FirstMI, LastMI), TII->get(TargetOpcode::BUNDLE));
   Bundle.prepend(MIB);
 
   SmallVector<unsigned, 32> LocalDefs;
@@ -135,9 +145,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
   SmallSet<unsigned, 8> KilledUseSet;
   SmallSet<unsigned, 8> UndefUseSet;
   SmallVector<MachineOperand*, 4> Defs;
-  for (; FirstMI != LastMI; ++FirstMI) {
-    for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = FirstMI->getOperand(i);
+  for (auto MII = FirstMI; MII != LastMI; ++MII) {
+    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MII->getOperand(i);
       if (!MO.isReg())
         continue;
       if (MO.isDef()) {
@@ -215,6 +225,15 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
     MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
                getImplRegState(true));
   }
+
+  // Set FrameSetup/FrameDestroy for the bundle. If any of the instructions got
+  // the property, then also set it on the bundle.
+  for (auto MII = FirstMI; MII != LastMI; ++MII) {
+    if (MII->getFlag(MachineInstr::FrameSetup))
+      MIB.setMIFlag(MachineInstr::FrameSetup);
+    if (MII->getFlag(MachineInstr::FrameDestroy))
+      MIB.setMIFlag(MachineInstr::FrameDestroy);
+  }
 }
 
 /// finalizeBundle - Same functionality as the previous finalizeBundle except
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 7332b7162030..58fd1f238420 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
     for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
       if (PhysRegDefs.test(*AS))
         PhysRegClobbers.set(*AS);
-      PhysRegDefs.set(*AS);
     }
+    // Need a second loop because MCRegAliasIterator can visit the same
+    // register twice.
+    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+      PhysRegDefs.set(*AS);
+
     if (PhysRegClobbers.test(Reg))
       // MI defined register is seen defined by another instruction in
       // the loop, it cannot be a LICM candidate.
@@ -497,8 +501,7 @@ void MachineLICMBase::HoistRegionPostRA() {
 
   // Walk the entire region, count number of defs for each register, and
   // collect potential LICM candidates.
-  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
-  for (MachineBasicBlock *BB : Blocks) {
+  for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
     // If the header of the loop containing this basic block is a landing pad,
     // then don't try to hoist instructions out of this loop.
     const MachineLoop *ML = MLI->getLoopFor(BB);
@@ -570,8 +573,7 @@ void MachineLICMBase::HoistRegionPostRA() {
 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make
 /// sure it is not killed by any instructions in the loop.
 void MachineLICMBase::AddToLiveIns(unsigned Reg) {
-  const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
-  for (MachineBasicBlock *BB : Blocks) {
+  for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
     if (!BB->isLiveIn(Reg))
       BB->addLiveIn(Reg);
     for (MachineInstr &MI : *BB) {
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 639cd80768fc..6ef8de88f8b1 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -194,7 +194,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
   Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
 }
 
-MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
   : ImmutablePass(ID), TM(*TM),
     Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
             TM->getObjFileLowering(), nullptr, false) {
@@ -206,10 +206,11 @@ MachineModuleInfo::~MachineModuleInfo() = default;
 bool MachineModuleInfo::doInitialization(Module &M) {
   ObjFileMMI = nullptr;
   CurCallSite = 0;
-  DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
+  UsesVAFloatArgument = UsesMorestackAddr = false;
   HasSplitStack = HasNosplitStack = false;
   AddrLabelSymbols = nullptr;
   TheModule = &M;
+  DbgInfoAvailable = !empty(M.debug_compile_units());
   return false;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index 07b173bc94f8..7b4f64bfe60d 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
 // Out of line virtual method.
 void MachineModuleInfoMachO::anchor() {}
 void MachineModuleInfoELF::anchor() {}
+void MachineModuleInfoCOFF::anchor() {}
 
 using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
 static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
diff --git a/contrib/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm/lib/CodeGen/MachineOperand.cpp
index 8098333832b4..05e51e1873cf 100644
--- a/contrib/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -460,7 +461,8 @@ static void printIRValueReference(raw_ostream &OS, const Value &V,
     printLLVMNameWithoutPrefix(OS, V.getName());
     return;
   }
-  MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V));
+  int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+  MachineOperand::printIRSlotNumber(OS, Slot);
 }
 
 static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
@@ -695,6 +697,11 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
     if (MCSymbol *Label = CFI.getLabel())
       MachineOperand::printSymbol(OS, *Label);
     break;
+  case MCCFIInstruction::OpNegateRAState:
+    OS << "negate_ra_sign_state ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    break;
   default:
     // TODO: Print the other CFI Operations.
     OS << "<unserializable cfi directive>";
@@ -742,10 +749,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
       OS << "undef ";
     if (isEarlyClobber())
       OS << "early-clobber ";
-    if (isDebug())
-      OS << "debug-use ";
     if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
       OS << "renamable ";
+    // isDebug() is exactly true for register operands of a DBG_VALUE. So we
+    // simply infer it when parsing and do not need to print it.
 
     const MachineRegisterInfo *MRI = nullptr;
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1078,7 +1085,11 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
   if (getFailureOrdering() != AtomicOrdering::NotAtomic)
     OS << toIRString(getFailureOrdering()) << ' ';
 
-  OS << getSize();
+  if (getSize() == MemoryLocation::UnknownSize)
+    OS << "unknown-size";
+  else
+    OS << getSize();
+
   if (const Value *Val = getValue()) {
     OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
     printIRValueReference(OS, *Val, MST);
diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
index a712afec0959..ad96c0e579e4 100644
--- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -128,9 +128,6 @@ struct SuffixTreeNode {
   /// mapping by tacking that character on the end of the current string.
   DenseMap<unsigned, SuffixTreeNode *> Children;
 
-  /// A flag set to false if the node has been pruned from the tree.
-  bool IsInTree = true;
-
   /// The start index of this node's substring in the main string.
   unsigned StartIdx = EmptyIdx;
 
@@ -167,15 +164,6 @@ struct SuffixTreeNode {
   /// construction algorithm O(N^2) rather than O(N).
   SuffixTreeNode *Link = nullptr;
 
-  /// The parent of this node. Every node except for the root has a parent.
-  SuffixTreeNode *Parent = nullptr;
-
-  /// The number of times this node's string appears in the tree.
-  ///
-  /// This is equal to the number of leaf children of the string. It represents
-  /// the number of suffixes that the node's string is a prefix of.
-  unsigned OccurrenceCount = 0;
-
   /// The length of the string formed by concatenating the edge labels from the
   /// root to this node.
   unsigned ConcatLen = 0;
@@ -200,9 +188,8 @@ struct SuffixTreeNode {
     return *EndIdx - StartIdx + 1;
   }
 
-  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
-                 SuffixTreeNode *Parent)
-      : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
+  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
+      : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
 
   SuffixTreeNode() {}
 };
@@ -231,14 +218,18 @@ struct SuffixTreeNode {
 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
 class SuffixTree {
 public:
-  /// Stores each leaf node in the tree.
-  ///
-  /// This is used for finding outlining candidates.
-  std::vector<SuffixTreeNode *> LeafVector;
-
   /// Each element is an integer representing an instruction in the module.
   ArrayRef<unsigned> Str;
 
+  /// A repeated substring in the tree.
+  struct RepeatedSubstring {
+    /// The length of the string.
+    unsigned Length;
+
+    /// The start indices of each occurrence.
+    std::vector<unsigned> StartIndices;
+  };
+
 private:
   /// Maintains each node in the tree.
   SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
@@ -291,7 +282,7 @@ private:
     assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
 
     SuffixTreeNode *N = new (NodeAllocator.Allocate())
-        SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
+        SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
     Parent.Children[Edge] = N;
 
     return N;
@@ -314,7 +305,7 @@ private:
 
     unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
     SuffixTreeNode *N = new (NodeAllocator.Allocate())
-        SuffixTreeNode(StartIdx, E, Root, Parent);
+        SuffixTreeNode(StartIdx, E, Root);
     if (Parent)
       Parent->Children[Edge] = N;
 
@@ -322,41 +313,27 @@ private:
   }
 
   /// Set the suffix indices of the leaves to the start indices of their
-  /// respective suffixes. Also stores each leaf in \p LeafVector at its
-  /// respective suffix index.
+  /// respective suffixes.
   ///
   /// \param[in] CurrNode The node currently being visited.
-  /// \param CurrIdx The current index of the string being visited.
-  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
+  /// \param CurrNodeLen The concatenation of all node sizes from the root to
+  /// this node. Used to produce suffix indices.
+  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
 
     bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
 
-    // Store the length of the concatenation of all strings from the root to
-    // this node.
-    if (!CurrNode.isRoot()) {
-      if (CurrNode.ConcatLen == 0)
-        CurrNode.ConcatLen = CurrNode.size();
-
-      if (CurrNode.Parent)
-        CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
-    }
-
+    // Store the concatenation of lengths down from the root.
+    CurrNode.ConcatLen = CurrNodeLen;
     // Traverse the tree depth-first.
     for (auto &ChildPair : CurrNode.Children) {
       assert(ChildPair.second && "Node had a null child!");
-      setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
+      setSuffixIndices(*ChildPair.second,
+                       CurrNodeLen + ChildPair.second->size());
     }
 
-    // Is this node a leaf?
-    if (IsLeaf) {
-      // If yes, give it a suffix index and bump its parent's occurrence count.
-      CurrNode.SuffixIdx = Str.size() - CurrIdx;
-      assert(CurrNode.Parent && "CurrNode had no parent!");
-      CurrNode.Parent->OccurrenceCount++;
-
-      // Store the leaf in the leaf vector for pruning later.
-      LeafVector[CurrNode.SuffixIdx] = &CurrNode;
-    }
+    // Is this node a leaf? If it is, give it a suffix index.
+    if (IsLeaf)
+      CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
   }
 
   /// Construct the suffix tree for the prefix of the input ending at
@@ -461,7 +438,6 @@ private:
         // Make the old node a child of the split node and update its start
         // index. This is the node n from the diagram.
         NextNode->StartIdx += Active.Len;
-        NextNode->Parent = SplitNode;
         SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
 
         // SplitNode is an internal node, update the suffix link.
@@ -495,9 +471,7 @@ public:
   /// \param Str The string to construct the suffix tree for.
   SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
     Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
-    Root->IsInTree = true;
     Active.Node = Root;
-    LeafVector = std::vector<SuffixTreeNode *>(Str.size());
 
     // Keep track of the number of suffixes we have to add of the current
     // prefix.
@@ -518,6 +492,117 @@ public:
     assert(Root && "Root node can't be nullptr!");
     setSuffixIndices(*Root, 0);
   }
+
+
+  /// Iterator for finding all repeated substrings in the suffix tree.
+  struct RepeatedSubstringIterator {
+    private:
+    /// The current node we're visiting.
+    SuffixTreeNode *N = nullptr;
+
+    /// The repeated substring associated with this node.
+    RepeatedSubstring RS;
+
+    /// The nodes left to visit.
+    std::vector<SuffixTreeNode *> ToVisit;
+
+    /// The minimum length of a repeated substring to find.
+    /// Since we're outlining, we want at least two instructions in the range.
+    /// FIXME: This may not be true for targets like X86 which support many
+    /// instruction lengths.
+    const unsigned MinLength = 2;
+
+    /// Move the iterator to the next repeated substring.
+    void advance() {
+      // Clear the current state. If we're at the end of the range, then this
+      // is the state we want to be in.
+      RS = RepeatedSubstring();
+      N = nullptr;
+
+      // Each leaf node represents a repeat of a string.
+      std::vector<SuffixTreeNode *> LeafChildren;
+
+      // Continue visiting nodes until we find one which repeats more than once.
+      while (!ToVisit.empty()) {
+        SuffixTreeNode *Curr = ToVisit.back();
+        ToVisit.pop_back();
+        LeafChildren.clear();
+
+        // Keep track of the length of the string associated with the node. If
+        // it's too short, we'll quit.
+        unsigned Length = Curr->ConcatLen;
+
+        // Iterate over each child, saving internal nodes for visiting, and
+        // leaf nodes in LeafChildren. Internal nodes represent individual
+        // strings, which may repeat.
+        for (auto &ChildPair : Curr->Children) {
+          // Save all of this node's children for processing.
+          if (!ChildPair.second->isLeaf())
+            ToVisit.push_back(ChildPair.second);
+
+          // It's not an internal node, so it must be a leaf. If we have a
+          // long enough string, then save the leaf children.
+          else if (Length >= MinLength)
+            LeafChildren.push_back(ChildPair.second);
+        }
+
+        // The root never represents a repeated substring. If we're looking at
+        // that, then skip it.
+        if (Curr->isRoot())
+          continue;
+
+        // Do we have any repeated substrings?
+        if (LeafChildren.size() >= 2) {
+          // Yes. Update the state to reflect this, and then bail out.
+          N = Curr;
+          RS.Length = Length;
+          for (SuffixTreeNode *Leaf : LeafChildren)
+            RS.StartIndices.push_back(Leaf->SuffixIdx);
+          break;
+        }
+      }
+
+      // At this point, either NewRS is an empty RepeatedSubstring, or it was
+      // set in the above loop. Similarly, N is either nullptr, or the node
+      // associated with NewRS.
+    }
+
+  public:
+    /// Return the current repeated substring.
+    RepeatedSubstring &operator*() { return RS; }
+
+    RepeatedSubstringIterator &operator++() {
+      advance();
+      return *this;
+    }
+
+    RepeatedSubstringIterator operator++(int I) {
+      RepeatedSubstringIterator It(*this);
+      advance();
+      return It;
+    }
+
+    bool operator==(const RepeatedSubstringIterator &Other) {
+      return N == Other.N;
+    }
+    bool operator!=(const RepeatedSubstringIterator &Other) {
+      return !(*this == Other);
+    }
+
+    RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
+      // Do we have a non-null node?
+      if (N) {
+        // Yes. At the first step, we need to visit all of N's children.
+        // Note: This means that we visit N last.
+        ToVisit.push_back(N);
+        advance();
+      }
+    }
+};
+
+  typedef RepeatedSubstringIterator iterator;
+  iterator begin() { return iterator(Root); }
+  iterator end() { return iterator(nullptr); }
 };
 
 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -537,9 +622,8 @@ struct InstructionMapper {
   DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
       InstructionIntegerMap;
 
-  /// Corresponcence from unsigned integers to \p MachineInstrs.
-  /// Inverse of \p InstructionIntegerMap.
-  DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
+  /// Correspondence between \p MachineBasicBlocks and target-defined flags.
+  DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
 
   /// The vector of unsigned integers that the module is mapped to.
   std::vector<unsigned> UnsignedVec;
@@ -548,17 +632,39 @@ struct InstructionMapper {
   /// at index i in \p UnsignedVec for each index i.
   std::vector<MachineBasicBlock::iterator> InstrList;
 
+  // Set if we added an illegal number in the previous step.
+  // Since each illegal number is unique, we only need one of them between
+  // each range of legal numbers. This lets us make sure we don't add more
+  // than one illegal number per range.
+  bool AddedIllegalLastTime = false;
+
   /// Maps \p *It to a legal integer.
   ///
-  /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
-  /// \p IntegerInstructionMap, and \p LegalInstrNumber.
+  /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
+  /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
   ///
   /// \returns The integer that \p *It was mapped to.
-  unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
+  unsigned mapToLegalUnsigned(
+      MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+      bool &HaveLegalRange, unsigned &NumLegalInBlock,
+      std::vector<unsigned> &UnsignedVecForMBB,
+      std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+    // We added something legal, so we should unset the AddedLegalLastTime
+    // flag.
+    AddedIllegalLastTime = false;
+
+    // If we have at least two adjacent legal instructions (which may have
+    // invisible instructions in between), remember that.
+    if (CanOutlineWithPrevInstr)
+      HaveLegalRange = true;
+    CanOutlineWithPrevInstr = true;
+
+    // Keep track of the number of legal instructions we insert.
+    NumLegalInBlock++;
 
     // Get the integer for this instruction or give it the current
     // LegalInstrNumber.
-    InstrList.push_back(It);
+    InstrListForMBB.push_back(It);
     MachineInstr &MI = *It;
     bool WasInserted;
     DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
@@ -568,12 +674,10 @@ struct InstructionMapper {
     unsigned MINumber = ResultIt->second;
 
     // There was an insertion.
-    if (WasInserted) {
+    if (WasInserted)
       LegalInstrNumber++;
-      IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
-    }
 
-    UnsignedVec.push_back(MINumber);
+    UnsignedVecForMBB.push_back(MINumber);
 
     // Make sure we don't overflow or use any integers reserved by the DenseMap.
     if (LegalInstrNumber >= IllegalInstrNumber)
@@ -589,14 +693,26 @@ struct InstructionMapper {
 
   /// Maps \p *It to an illegal integer.
   ///
-  /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
+  /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
+  /// IllegalInstrNumber.
   ///
   /// \returns The integer that \p *It was mapped to.
-  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
+  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
+  bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
+  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+    // Can't outline an illegal instruction. Set the flag.
+    CanOutlineWithPrevInstr = false;
+
+    // Only add one illegal number per range of legal numbers.
+    if (AddedIllegalLastTime)
+      return IllegalInstrNumber;
+
+    // Remember that we added an illegal number last time.
+    AddedIllegalLastTime = true;
     unsigned MINumber = IllegalInstrNumber;
 
-    InstrList.push_back(It);
-    UnsignedVec.push_back(IllegalInstrNumber);
+    InstrListForMBB.push_back(It);
+    UnsignedVecForMBB.push_back(IllegalInstrNumber);
     IllegalInstrNumber--;
 
     assert(LegalInstrNumber < IllegalInstrNumber &&
@@ -623,40 +739,78 @@ struct InstructionMapper {
   /// \param TII \p TargetInstrInfo for the function.
   void convertToUnsignedVec(MachineBasicBlock &MBB,
                             const TargetInstrInfo &TII) {
-    unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
+    unsigned Flags = 0;
+
+    // Don't even map in this case.
+    if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
+      return;
+
+    // Store info for the MBB for later outlining.
+    MBBFlagsMap[&MBB] = Flags;
+
+    MachineBasicBlock::iterator It = MBB.begin();
 
-    for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
-         It++) {
+    // The number of instructions in this block that will be considered for
+    // outlining.
+    unsigned NumLegalInBlock = 0;
 
+    // True if we have at least two legal instructions which aren't separated
+    // by an illegal instruction.
+    bool HaveLegalRange = false;
+
+    // True if we can perform outlining given the last mapped (non-invisible)
+    // instruction. This lets us know if we have a legal range.
+    bool CanOutlineWithPrevInstr = false;
+
+    // FIXME: Should this all just be handled in the target, rather than using
+    // repeated calls to getOutliningType?
+    std::vector<unsigned> UnsignedVecForMBB;
+    std::vector<MachineBasicBlock::iterator> InstrListForMBB;
+
+    for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
       // Keep track of where this instruction is in the module.
       switch (TII.getOutliningType(It, Flags)) {
       case InstrType::Illegal:
-        mapToIllegalUnsigned(It);
+        mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
+                             UnsignedVecForMBB, InstrListForMBB);
         break;
 
       case InstrType::Legal:
-        mapToLegalUnsigned(It);
+        mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+                           NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
         break;
 
       case InstrType::LegalTerminator:
-        mapToLegalUnsigned(It);
-        InstrList.push_back(It);
-        UnsignedVec.push_back(IllegalInstrNumber);
-        IllegalInstrNumber--;
+        mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+                           NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
+        // The instruction also acts as a terminator, so we have to record that
+        // in the string.
+        mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+        InstrListForMBB);
         break;
 
       case InstrType::Invisible:
+        // Normally this is set by mapTo(Blah)Unsigned, but we just want to
+        // skip this instruction. So, unset the flag here.
+        AddedIllegalLastTime = false;
         break;
       }
     }
 
-    // After we're done every insertion, uniquely terminate this part of the
-    // "string". This makes sure we won't match across basic block or function
-    // boundaries since the "end" is encoded uniquely and thus appears in no
-    // repeated substring.
-    InstrList.push_back(MBB.end());
-    UnsignedVec.push_back(IllegalInstrNumber);
-    IllegalInstrNumber--;
+    // Are there enough legal instructions in the block for outlining to be
+    // possible?
+    if (HaveLegalRange) {
+      // After we're done every insertion, uniquely terminate this part of the
+      // "string". This makes sure we won't match across basic block or function
+      // boundaries since the "end" is encoded uniquely and thus appears in no
+      // repeated substring.
+      mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+      InstrListForMBB);
+      InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
+                       InstrListForMBB.end());
+      UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
+                         UnsignedVecForMBB.end());
+    }
   }
 
   InstructionMapper() {
@@ -692,9 +846,6 @@ struct MachineOutliner : public ModulePass {
   /// Set when the pass is constructed in TargetPassConfig.
   bool RunOnAllFunctions = true;
 
-  // Collection of IR functions created by the outliner.
-  std::vector<Function *> CreatedIRFunctions;
-
   StringRef getPassName() const override { return "Machine Outliner"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -717,7 +868,8 @@ struct MachineOutliner : public ModulePass {
   /// Remark output explaining that a function was outlined.
   void emitOutlinedFunctionRemark(OutlinedFunction &OF);
 
-  /// Find all repeated substrings that satisfy the outlining cost model.
+  /// Find all repeated substrings that satisfy the outlining cost model by
+  /// constructing a suffix tree.
   ///
   /// If a substring appears at least twice, then it must be represented by
   /// an internal node which appears in at least two suffixes. Each suffix
@@ -726,73 +878,25 @@ struct MachineOutliner : public ModulePass {
   /// internal node represents a beneficial substring, then we use each of
   /// its leaf children to find the locations of its substring.
   ///
-  /// \param ST A suffix tree to query.
   /// \param Mapper Contains outlining mapping information.
-  /// \param[out] CandidateList Filled with candidates representing each
-  /// beneficial substring.
   /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
   /// each type of candidate.
-  ///
-  /// \returns The length of the longest candidate found.
-  unsigned
-  findCandidates(SuffixTree &ST,
-                 InstructionMapper &Mapper,
-                 std::vector<std::shared_ptr<Candidate>> &CandidateList,
-                 std::vector<OutlinedFunction> &FunctionList);
-
-  /// Replace the sequences of instructions represented by the
-  /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
-  /// described in \p FunctionList.
+  void findCandidates(InstructionMapper &Mapper,
+                      std::vector<OutlinedFunction> &FunctionList);
+
+  /// Replace the sequences of instructions represented by \p OutlinedFunctions
+  /// with calls to functions.
   ///
   /// \param M The module we are outlining from.
-  /// \param CandidateList A list of candidates to be outlined.
   /// \param FunctionList A list of functions to be inserted into the module.
   /// \param Mapper Contains the instruction mappings for the module.
-  bool outline(Module &M,
-               const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
-               std::vector<OutlinedFunction> &FunctionList,
+  bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
                InstructionMapper &Mapper);
 
   /// Creates a function for \p OF and inserts it into the module.
-  MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
-                                          InstructionMapper &Mapper);
-
-  /// Find potential outlining candidates and store them in \p CandidateList.
-  ///
-  /// For each type of potential candidate, also build an \p OutlinedFunction
-  /// struct containing the information to build the function for that
-  /// candidate.
-  ///
-  /// \param[out] CandidateList Filled with outlining candidates for the module.
-  /// \param[out] FunctionList Filled with functions corresponding to each type
-  /// of \p Candidate.
-  /// \param ST The suffix tree for the module.
-  ///
-  /// \returns The length of the longest candidate found. 0 if there are none.
-  unsigned
-  buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
-                     std::vector<OutlinedFunction> &FunctionList,
-                     SuffixTree &ST, InstructionMapper &Mapper);
-
-  /// Helper function for pruneOverlaps.
-  /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
-  void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
-
-  /// Remove any overlapping candidates that weren't handled by the
-  /// suffix tree's pruning method.
-  ///
-  /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
-  /// If a short candidate is chosen for outlining, then a longer candidate
-  /// which has that short candidate as a suffix is chosen, the tree's pruning
-  /// method will not find it. Thus, we need to prune before outlining as well.
-  ///
-  /// \param[in,out] CandidateList A list of outlining candidates.
-  /// \param[in,out] FunctionList A list of functions to be outlined.
-  /// \param Mapper Contains instruction mapping info for outlining.
-  /// \param MaxCandidateLen The length of the longest candidate.
-  void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
-                     std::vector<OutlinedFunction> &FunctionList,
-                     InstructionMapper &Mapper, unsigned MaxCandidateLen);
+  MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
+                                          InstructionMapper &Mapper,
+                                          unsigned Name);
 
   /// Construct a suffix tree on the instructions in \p M and outline repeated
   /// strings from that tree.
@@ -802,13 +906,31 @@ struct MachineOutliner : public ModulePass {
   /// function for remark emission.
   DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
     DISubprogram *SP;
-    for (const std::shared_ptr<Candidate> &C : OF.Candidates)
-      if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
+    for (const Candidate &C : OF.Candidates)
+      if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
         return SP;
     return nullptr;
   }
-};
 
+  /// Populate and \p InstructionMapper with instruction-to-integer mappings.
+  /// These are used to construct a suffix tree.
+  void populateMapper(InstructionMapper &Mapper, Module &M,
+                      MachineModuleInfo &MMI);
+
+  /// Initialize information necessary to output a size remark.
+  /// FIXME: This should be handled by the pass manager, not the outliner.
+  /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
+  /// pass manager.
+  void initSizeRemarkInfo(
+      const Module &M, const MachineModuleInfo &MMI,
+      StringMap<unsigned> &FunctionToInstrCount);
+
+  /// Emit the remark.
+  // FIXME: This should be handled by the pass manager, not the outliner.
+  void emitInstrCountChangedRemark(
+      const Module &M, const MachineModuleInfo &MMI,
+      const StringMap<unsigned> &FunctionToInstrCount);
+};
 } // Anonymous namespace.
 
 char MachineOutliner::ID = 0;
@@ -828,6 +950,10 @@ INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
 void MachineOutliner::emitNotOutliningCheaperRemark(
     unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
     OutlinedFunction &OF) {
+  // FIXME: Right now, we arbitrarily choose some Candidate from the
+  // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
+  // We should probably sort these by function name or something to make sure
+  // the remarks are stable.
   Candidate &C = CandidatesForRepeatedSeq.front();
   MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
   MORE.emit([&]() {
@@ -861,7 +987,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
   MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
                               MBB->findDebugLoc(MBB->begin()), MBB);
   R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
-    << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
+    << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
     << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
     << " locations. "
     << "(Found at: ";
@@ -869,12 +995,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
   // Tell the user the other places the candidate was found.
   for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
 
-    // Skip over things that were pruned.
-    if (!OF.Candidates[i]->InCandidateList)
-      continue;
-
     R << NV((Twine("StartLoc") + Twine(i)).str(),
-            OF.Candidates[i]->front()->getDebugLoc());
+            OF.Candidates[i].front()->getDebugLoc());
     if (i != e - 1)
       R << ", ";
   }
@@ -884,95 +1006,65 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
   MORE.emit(R);
 }
 
-unsigned MachineOutliner::findCandidates(
-    SuffixTree &ST, InstructionMapper &Mapper,
-    std::vector<std::shared_ptr<Candidate>> &CandidateList,
-    std::vector<OutlinedFunction> &FunctionList) {
-  CandidateList.clear();
+void
+MachineOutliner::findCandidates(InstructionMapper &Mapper,
+                                std::vector<OutlinedFunction> &FunctionList) {
   FunctionList.clear();
-  unsigned MaxLen = 0;
-
-  // FIXME: Visit internal nodes instead of leaves.
-  for (SuffixTreeNode *Leaf : ST.LeafVector) {
-    assert(Leaf && "Leaves in LeafVector cannot be null!");
-    if (!Leaf->IsInTree)
-      continue;
-
-    assert(Leaf->Parent && "All leaves must have parents!");
-    SuffixTreeNode &Parent = *(Leaf->Parent);
-
-    // If it doesn't appear enough, or we already outlined from it, skip it.
-    if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
-      continue;
-
-    // Figure out if this candidate is beneficial.
-    unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
-
-    // Too short to be beneficial; skip it.
-    // FIXME: This isn't necessarily true for, say, X86. If we factor in
-    // instruction lengths we need more information than this.
-    if (StringLen < 2)
-      continue;
-
-    // If this is a beneficial class of candidate, then every one is stored in
-    // this vector.
-    std::vector<Candidate> CandidatesForRepeatedSeq;
-
-    // Figure out the call overhead for each instance of the sequence.
-    for (auto &ChildPair : Parent.Children) {
-      SuffixTreeNode *M = ChildPair.second;
-
-      if (M && M->IsInTree && M->isLeaf()) {
-        // Never visit this leaf again.
-        M->IsInTree = false;
-        unsigned StartIdx = M->SuffixIdx;
-        unsigned EndIdx = StartIdx + StringLen - 1;
+  SuffixTree ST(Mapper.UnsignedVec);
 
-        // Trick: Discard some candidates that would be incompatible with the
-        // ones we've already found for this sequence. This will save us some
-        // work in candidate selection.
-        //
-        // If two candidates overlap, then we can't outline them both. This
-        // happens when we have candidates that look like, say
-        //
-        // AA (where each "A" is an instruction).
-        //
-        // We might have some portion of the module that looks like this:
-        // AAAAAA (6 A's)
-        //
-        // In this case, there are 5 different copies of "AA" in this range, but
-        // at most 3 can be outlined. If only outlining 3 of these is going to
-        // be unbeneficial, then we ought to not bother.
-        //
-        // Note that two things DON'T overlap when they look like this:
-        // start1...end1 .... start2...end2
-        // That is, one must either
-        // * End before the other starts
-        // * Start after the other ends
-        if (std::all_of(CandidatesForRepeatedSeq.begin(),
-                        CandidatesForRepeatedSeq.end(),
-                        [&StartIdx, &EndIdx](const Candidate &C) {
-                          return (EndIdx < C.getStartIdx() ||
-                                  StartIdx > C.getEndIdx());
-                        })) {
-          // It doesn't overlap with anything, so we can outline it.
-          // Each sequence is over [StartIt, EndIt].
-          // Save the candidate and its location.
-
-          MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
-          MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
-
-          CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
-                                                EndIt, StartIt->getParent(),
-                                                FunctionList.size());
-        }
+  // First, find dall of the repeated substrings in the tree of minimum length
+  // 2.
+  std::vector<Candidate> CandidatesForRepeatedSeq;
+  for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
+    CandidatesForRepeatedSeq.clear();
+    SuffixTree::RepeatedSubstring RS = *It;
+    unsigned StringLen = RS.Length;
+    for (const unsigned &StartIdx : RS.StartIndices) {
+      unsigned EndIdx = StartIdx + StringLen - 1;
+      // Trick: Discard some candidates that would be incompatible with the
+      // ones we've already found for this sequence. This will save us some
+      // work in candidate selection.
+      //
+      // If two candidates overlap, then we can't outline them both. This
+      // happens when we have candidates that look like, say
+      //
+      // AA (where each "A" is an instruction).
+      //
+      // We might have some portion of the module that looks like this:
+      // AAAAAA (6 A's)
+      //
+      // In this case, there are 5 different copies of "AA" in this range, but
+      // at most 3 can be outlined. If only outlining 3 of these is going to
+      // be unbeneficial, then we ought to not bother.
+      //
+      // Note that two things DON'T overlap when they look like this:
+      // start1...end1 .... start2...end2
+      // That is, one must either
+      // * End before the other starts
+      // * Start after the other ends
+      if (std::all_of(
+              CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
+              [&StartIdx, &EndIdx](const Candidate &C) {
+                return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
+              })) {
+        // It doesn't overlap with anything, so we can outline it.
+        // Each sequence is over [StartIt, EndIt].
+        // Save the candidate and its location.
+
+        MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+        MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+        MachineBasicBlock *MBB = StartIt->getParent();
+
+        CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
+                                              EndIt, MBB, FunctionList.size(),
+                                              Mapper.MBBFlagsMap[MBB]);
       }
     }
 
     // We've found something we might want to outline.
     // Create an OutlinedFunction to store it and check if it'd be beneficial
     // to outline.
-    if (CandidatesForRepeatedSeq.empty())
+    if (CandidatesForRepeatedSeq.size() < 2)
       continue;
 
     // Arbitrarily choose a TII from the first candidate.
@@ -983,179 +1075,33 @@ unsigned MachineOutliner::findCandidates(
     OutlinedFunction OF =
         TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
 
-    // If we deleted every candidate, then there's nothing to outline.
-    if (OF.Candidates.empty())
+    // If we deleted too many candidates, then there's nothing worth outlining.
+    // FIXME: This should take target-specified instruction sizes into account.
+    if (OF.Candidates.size() < 2)
       continue;
 
-    std::vector<unsigned> Seq;
-    for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
-      Seq.push_back(ST.Str[i]);
-    OF.Sequence = Seq;
-    OF.Name = FunctionList.size();
-
     // Is it better to outline this candidate than not?
     if (OF.getBenefit() < 1) {
       emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
       continue;
     }
 
-    if (StringLen > MaxLen)
-      MaxLen = StringLen;
-
-    // The function is beneficial. Save its candidates to the candidate list
-    // for pruning.
-    for (std::shared_ptr<Candidate> &C : OF.Candidates)
-      CandidateList.push_back(C);
     FunctionList.push_back(OF);
-
-    // Move to the next function.
-    Parent.IsInTree = false;
-  }
-
-  return MaxLen;
-}
-
-// Remove C from the candidate space, and update its OutlinedFunction.
-void MachineOutliner::prune(Candidate &C,
-                            std::vector<OutlinedFunction> &FunctionList) {
-  // Get the OutlinedFunction associated with this Candidate.
-  OutlinedFunction &F = FunctionList[C.FunctionIdx];
-
-  // Update C's associated function's occurrence count.
-  F.decrement();
-
-  // Remove C from the CandidateList.
-  C.InCandidateList = false;
-
-  LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
-             dbgs() << "--- Num fns left for candidate: "
-                    << F.getOccurrenceCount() << "\n";
-             dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
-                    << "\n";);
-}
-
-void MachineOutliner::pruneOverlaps(
-    std::vector<std::shared_ptr<Candidate>> &CandidateList,
-    std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
-    unsigned MaxCandidateLen) {
-
-  // Return true if this candidate became unbeneficial for outlining in a
-  // previous step.
-  auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
-
-    // Check if the candidate was removed in a previous step.
-    if (!C.InCandidateList)
-      return true;
-
-    // C must be alive. Check if we should remove it.
-    if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
-      prune(C, FunctionList);
-      return true;
-    }
-
-    // C is in the list, and F is still beneficial.
-    return false;
-  };
-
-  // TODO: Experiment with interval trees or other interval-checking structures
-  // to lower the time complexity of this function.
-  // TODO: Can we do better than the simple greedy choice?
-  // Check for overlaps in the range.
-  // This is O(MaxCandidateLen * CandidateList.size()).
-  for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
-       It++) {
-    Candidate &C1 = **It;
-
-    // If C1 was already pruned, or its function is no longer beneficial for
-    // outlining, move to the next candidate.
-    if (ShouldSkipCandidate(C1))
-      continue;
-
-    // The minimum start index of any candidate that could overlap with this
-    // one.
-    unsigned FarthestPossibleIdx = 0;
-
-    // Either the index is 0, or it's at most MaxCandidateLen indices away.
-    if (C1.getStartIdx() > MaxCandidateLen)
-      FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
-
-    // Compare against the candidates in the list that start at most
-    // FarthestPossibleIdx indices away from C1. There are at most
-    // MaxCandidateLen of these.
-    for (auto Sit = It + 1; Sit != Et; Sit++) {
-      Candidate &C2 = **Sit;
-
-      // Is this candidate too far away to overlap?
-      if (C2.getStartIdx() < FarthestPossibleIdx)
-        break;
-
-      // If C2 was already pruned, or its function is no longer beneficial for
-      // outlining, move to the next candidate.
-      if (ShouldSkipCandidate(C2))
-        continue;
-
-      // Do C1 and C2 overlap?
-      //
-      // Not overlapping:
-      // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
-      //
-      // We sorted our candidate list so C2Start <= C1Start. We know that
-      // C2End > C2Start since each candidate has length >= 2. Therefore, all we
-      // have to check is C2End < C2Start to see if we overlap.
-      if (C2.getEndIdx() < C1.getStartIdx())
-        continue;
-
-      // C1 and C2 overlap.
-      // We need to choose the better of the two.
-      //
-      // Approximate this by picking the one which would have saved us the
-      // most instructions before any pruning.
-
-      // Is C2 a better candidate?
-      if (C2.Benefit > C1.Benefit) {
-        // Yes, so prune C1. Since C1 is dead, we don't have to compare it
-        // against anything anymore, so break.
-        prune(C1, FunctionList);
-        break;
-      }
-
-      // Prune C2 and move on to the next candidate.
-      prune(C2, FunctionList);
-    }
   }
 }
 
-unsigned MachineOutliner::buildCandidateList(
-    std::vector<std::shared_ptr<Candidate>> &CandidateList,
-    std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
-    InstructionMapper &Mapper) {
-
-  std::vector<unsigned> CandidateSequence; // Current outlining candidate.
-  unsigned MaxCandidateLen = 0;            // Length of the longest candidate.
-
-  MaxCandidateLen =
-      findCandidates(ST, Mapper, CandidateList, FunctionList);
-
-  // Sort the candidates in decending order. This will simplify the outlining
-  // process when we have to remove the candidates from the mapping by
-  // allowing us to cut them out without keeping track of an offset.
-  std::stable_sort(
-      CandidateList.begin(), CandidateList.end(),
-      [](const std::shared_ptr<Candidate> &LHS,
-         const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
-
-  return MaxCandidateLen;
-}
-
 MachineFunction *
-MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
-                                        InstructionMapper &Mapper) {
+MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
+                                        InstructionMapper &Mapper,
+                                        unsigned Name) {
 
   // Create the function name. This should be unique. For now, just hash the
   // module name and include it in the function name plus the number of this
   // function.
   std::ostringstream NameStream;
-  NameStream << "OUTLINED_FUNCTION_" << OF.Name;
+  // FIXME: We should have a better naming scheme. This should be stable,
+  // regardless of changes to the outliner's cost model/traversal order.
+  NameStream << "OUTLINED_FUNCTION_" << Name;
 
   // Create the function using an IR-level function.
   LLVMContext &C = M.getContext();
@@ -1176,8 +1122,14 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
   F->addFnAttr(Attribute::OptimizeForSize);
   F->addFnAttr(Attribute::MinSize);
 
-  // Save F so that we can add debug info later if we need to.
-  CreatedIRFunctions.push_back(F);
+  // Include target features from an arbitrary candidate for the outlined
+  // function. This makes sure the outlined function knows what kinds of
+  // instructions are going into it. This is fine, since all parent functions
+  // must necessarily support the instructions that are in the outlined region.
+  Candidate &FirstCand = OF.Candidates.front();
+  const Function &ParentFn = FirstCand.getMF()->getFunction();
+  if (ParentFn.hasFnAttribute("target-features"))
+    F->addFnAttr(ParentFn.getFnAttribute("target-features"));
 
   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
   IRBuilder<> Builder(EntryBB);
@@ -1192,12 +1144,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
   // Insert the new function into the module.
   MF.insert(MF.begin(), &MBB);
 
-  // Copy over the instructions for the function using the integer mappings in
-  // its sequence.
-  for (unsigned Str : OF.Sequence) {
-    MachineInstr *NewMI =
-        MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
-    NewMI->dropMemRefs();
+  for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
+       ++I) {
+    MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+    NewMI->dropMemRefs(MF);
 
     // Don't keep debug information for outlined instructions.
     NewMI->setDebugLoc(DebugLoc());
@@ -1206,6 +1156,10 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
 
   TII.buildOutlinedFrame(MBB, MF, OF);
 
+  // Outlined functions shouldn't preserve liveness.
+  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
+  MF.getRegInfo().freezeReservedRegs(MF);
+
   // If there's a DISubprogram associated with this outlined function, then
   // emit debug info for the outlined function.
   if (DISubprogram *SP = getSubprogramOrNull(OF)) {
@@ -1214,118 +1168,127 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
     DIBuilder DB(M, true, CU);
     DIFile *Unit = SP->getFile();
     Mangler Mg;
-
-    // Walk over each IR function we created in the outliner and create
-    // DISubprograms for each function.
-    for (Function *F : CreatedIRFunctions) {
-      // Get the mangled name of the function for the linkage name.
-      std::string Dummy;
-      llvm::raw_string_ostream MangledNameStream(Dummy);
-      Mg.getNameWithPrefix(MangledNameStream, F, false);
-
-      DISubprogram *SP = DB.createFunction(
-          Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
-          Unit /* File */,
-          0 /* Line 0 is reserved for compiler-generated code. */,
-          DB.createSubroutineType(
-              DB.getOrCreateTypeArray(None)), /* void type */
-          false, true, 0, /* Line 0 is reserved for compiler-generated code. */
-          DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
-          true /* Outlined code is optimized code by definition. */);
-
-      // Don't add any new variables to the subprogram.
-      DB.finalizeSubprogram(SP);
-
-      // Attach subprogram to the function.
-      F->setSubprogram(SP);
-    }
-
+    // Get the mangled name of the function for the linkage name.
+    std::string Dummy;
+    llvm::raw_string_ostream MangledNameStream(Dummy);
+    Mg.getNameWithPrefix(MangledNameStream, F, false);
+
+    DISubprogram *OutlinedSP = DB.createFunction(
+        Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
+        Unit /* File */,
+        0 /* Line 0 is reserved for compiler-generated code. */,
+        DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
+        0, /* Line 0 is reserved for compiler-generated code. */
+        DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
+        /* Outlined code is optimized code by definition. */
+        DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
+
+    // Don't add any new variables to the subprogram.
+    DB.finalizeSubprogram(OutlinedSP);
+
+    // Attach subprogram to the function.
+    F->setSubprogram(OutlinedSP);
     // We're done with the DIBuilder.
     DB.finalize();
   }
 
-  // Outlined functions shouldn't preserve liveness.
-  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
-  MF.getRegInfo().freezeReservedRegs(MF);
   return &MF;
 }
 
-bool MachineOutliner::outline(
-    Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
-    std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
+bool MachineOutliner::outline(Module &M,
+                              std::vector<OutlinedFunction> &FunctionList,
+                              InstructionMapper &Mapper) {
 
   bool OutlinedSomething = false;
-  // Replace the candidates with calls to their respective outlined functions.
-  for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
-    Candidate &C = *Cptr;
-    // Was the candidate removed during pruneOverlaps?
-    if (!C.InCandidateList)
-      continue;
 
-    // If not, then look at its OutlinedFunction.
-    OutlinedFunction &OF = FunctionList[C.FunctionIdx];
+  // Number to append to the current outlined function.
+  unsigned OutlinedFunctionNum = 0;
 
-    // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
+  // Sort by benefit. The most beneficial functions should be outlined first.
+  std::stable_sort(
+      FunctionList.begin(), FunctionList.end(),
+      [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
+        return LHS.getBenefit() > RHS.getBenefit();
+      });
+
+  // Walk over each function, outlining them as we go along. Functions are
+  // outlined greedily, based off the sort above.
+  for (OutlinedFunction &OF : FunctionList) {
+    // If we outlined something that overlapped with a candidate in a previous
+    // step, then we can't outline from it.
+    erase_if(OF.Candidates, [&Mapper](Candidate &C) {
+      return std::any_of(
+          Mapper.UnsignedVec.begin() + C.getStartIdx(),
+          Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
+          [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
+    });
+
+    // If we made it unbeneficial to outline this function, skip it.
     if (OF.getBenefit() < 1)
       continue;
 
-    // Does this candidate have a function yet?
-    if (!OF.MF) {
-      OF.MF = createOutlinedFunction(M, OF, Mapper);
-      emitOutlinedFunctionRemark(OF);
-      FunctionsCreated++;
-    }
-
+    // It's beneficial. Create the function and outline its sequence's
+    // occurrences.
+    OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
+    emitOutlinedFunctionRemark(OF);
+    FunctionsCreated++;
+    OutlinedFunctionNum++; // Created a function, move to the next name.
     MachineFunction *MF = OF.MF;
-    MachineBasicBlock &MBB = *C.getMBB();
-    MachineBasicBlock::iterator StartIt = C.front();
-    MachineBasicBlock::iterator EndIt = C.back();
-    assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
-    assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
-
     const TargetSubtargetInfo &STI = MF->getSubtarget();
     const TargetInstrInfo &TII = *STI.getInstrInfo();
 
-    // Insert a call to the new function and erase the old sequence.
-    auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C);
-
-    // If the caller tracks liveness, then we need to make sure that anything
-    // we outline doesn't break liveness assumptions.
-    // The outlined functions themselves currently don't track liveness, but
-    // we should make sure that the ranges we yank things out of aren't
-    // wrong.
-    if (MBB.getParent()->getProperties().hasProperty(
-            MachineFunctionProperties::Property::TracksLiveness)) {
-      // Helper lambda for adding implicit def operands to the call instruction.
-      auto CopyDefs = [&CallInst](MachineInstr &MI) {
-        for (MachineOperand &MOP : MI.operands()) {
-          // Skip over anything that isn't a register.
-          if (!MOP.isReg())
-            continue;
-
-          // If it's a def, add it to the call instruction.
-          if (MOP.isDef())
-            CallInst->addOperand(
-                MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
-                                          true /* isImp = true */));
-        }
-      };
+    // Replace occurrences of the sequence with calls to the new function.
+    for (Candidate &C : OF.Candidates) {
+      MachineBasicBlock &MBB = *C.getMBB();
+      MachineBasicBlock::iterator StartIt = C.front();
+      MachineBasicBlock::iterator EndIt = C.back();
+
+      // Insert the call.
+      auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
+
+      // If the caller tracks liveness, then we need to make sure that
+      // anything we outline doesn't break liveness assumptions. The outlined
+      // functions themselves currently don't track liveness, but we should
+      // make sure that the ranges we yank things out of aren't wrong.
+      if (MBB.getParent()->getProperties().hasProperty(
+              MachineFunctionProperties::Property::TracksLiveness)) {
+        // Helper lambda for adding implicit def operands to the call
+        // instruction.
+        auto CopyDefs = [&CallInst](MachineInstr &MI) {
+          for (MachineOperand &MOP : MI.operands()) {
+            // Skip over anything that isn't a register.
+            if (!MOP.isReg())
+              continue;
+
+            // If it's a def, add it to the call instruction.
+            if (MOP.isDef())
+              CallInst->addOperand(MachineOperand::CreateReg(
+                  MOP.getReg(), true, /* isDef = true */
+                  true /* isImp = true */));
+          }
+        };
+        // Copy over the defs in the outlined range.
+        // First inst in outlined range <-- Anything that's defined in this
+        // ...                           .. range has to be added as an
+        // implicit Last inst in outlined range  <-- def to the call
+        // instruction.
+        std::for_each(CallInst, std::next(EndIt), CopyDefs);
+      }
 
-      // Copy over the defs in the outlined range.
-      // First inst in outlined range <-- Anything that's defined in this
-      // ...                           .. range has to be added as an implicit
-      // Last inst in outlined range  <-- def to the call instruction.
-      std::for_each(CallInst, std::next(EndIt), CopyDefs);
-    }
+      // Erase from the point after where the call was inserted up to, and
+      // including, the final instruction in the sequence.
+      // Erase needs one past the end, so we need std::next there too.
+      MBB.erase(std::next(StartIt), std::next(EndIt));
 
-    // Erase from the point after where the call was inserted up to, and
-    // including, the final instruction in the sequence.
-    // Erase needs one past the end, so we need std::next there too.
-    MBB.erase(std::next(StartIt), std::next(EndIt));
-    OutlinedSomething = true;
+      // Keep track of what we removed by marking them all as -1.
+      std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
+                    Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
+                    [](unsigned &I) { I = static_cast<unsigned>(-1); });
+      OutlinedSomething = true;
 
-    // Statistics.
-    NumOutlined++;
+      // Statistics.
+      NumOutlined++;
+    }
   }
 
   LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
@@ -1333,34 +1296,8 @@ bool MachineOutliner::outline(
   return OutlinedSomething;
 }
 
-bool MachineOutliner::runOnModule(Module &M) {
-  // Check if there's anything in the module. If it's empty, then there's
-  // nothing to outline.
-  if (M.empty())
-    return false;
-
-  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-
-  // If the user passed -enable-machine-outliner=always or
-  // -enable-machine-outliner, the pass will run on all functions in the module.
-  // Otherwise, if the target supports default outlining, it will run on all
-  // functions deemed by the target to be worth outlining from by default. Tell
-  // the user how the outliner is running.
-  LLVM_DEBUG(
-    dbgs() << "Machine Outliner: Running on ";
-    if (RunOnAllFunctions)
-      dbgs() << "all functions";
-    else
-      dbgs() << "target-default functions";
-    dbgs() << "\n"
-  );
-
-  // If the user specifies that they want to outline from linkonceodrs, set
-  // it here.
-  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
-
-  InstructionMapper Mapper;
-
+void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
+                                     MachineModuleInfo &MMI) {
   // Build instruction mappings for each function in the module. Start by
   // iterating over each Function in M.
   for (Function &F : M) {
@@ -1395,7 +1332,11 @@ bool MachineOutliner::runOnModule(Module &M) {
     for (MachineBasicBlock &MBB : *MF) {
       // If there isn't anything in MBB, then there's no point in outlining from
       // it.
-      if (MBB.empty())
+      // If there are fewer than 2 instructions in the MBB, then it can't ever
+      // contain something worth outlining.
+      // FIXME: This should be based off of the maximum size in B of an outlined
+      // call versus the size in B of the MBB.
+      if (MBB.empty() || MBB.size() < 2)
         continue;
 
       // Check if MBB could be the target of an indirect branch. If it is, then
@@ -1407,21 +1348,133 @@ bool MachineOutliner::runOnModule(Module &M) {
       Mapper.convertToUnsignedVec(MBB, *TII);
     }
   }
+}
 
-  // Construct a suffix tree, use it to find candidates, and then outline them.
-  SuffixTree ST(Mapper.UnsignedVec);
-  std::vector<std::shared_ptr<Candidate>> CandidateList;
+void MachineOutliner::initSizeRemarkInfo(
+    const Module &M, const MachineModuleInfo &MMI,
+    StringMap<unsigned> &FunctionToInstrCount) {
+  // Collect instruction counts for every function. We'll use this to emit
+  // per-function size remarks later.
+  for (const Function &F : M) {
+    MachineFunction *MF = MMI.getMachineFunction(F);
+
+    // We only care about MI counts here. If there's no MachineFunction at this
+    // point, then there won't be after the outliner runs, so let's move on.
+    if (!MF)
+      continue;
+    FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
+  }
+}
+
+void MachineOutliner::emitInstrCountChangedRemark(
+    const Module &M, const MachineModuleInfo &MMI,
+    const StringMap<unsigned> &FunctionToInstrCount) {
+  // Iterate over each function in the module and emit remarks.
+  // Note that we won't miss anything by doing this, because the outliner never
+  // deletes functions.
+  for (const Function &F : M) {
+    MachineFunction *MF = MMI.getMachineFunction(F);
+
+    // The outliner never deletes functions. If we don't have a MF here, then we
+    // didn't have one prior to outlining either.
+    if (!MF)
+      continue;
+
+    std::string Fname = F.getName();
+    unsigned FnCountAfter = MF->getInstructionCount();
+    unsigned FnCountBefore = 0;
+
+    // Check if the function was recorded before.
+    auto It = FunctionToInstrCount.find(Fname);
+
+    // Did we have a previously-recorded size? If yes, then set FnCountBefore
+    // to that.
+    if (It != FunctionToInstrCount.end())
+      FnCountBefore = It->second;
+
+    // Compute the delta and emit a remark if there was a change.
+    int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
+                      static_cast<int64_t>(FnCountBefore);
+    if (FnDelta == 0)
+      continue;
+
+    MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+    MORE.emit([&]() {
+      MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
+                                          DiagnosticLocation(),
+                                          &MF->front());
+      R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
+        << ": Function: "
+        << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
+        << ": MI instruction count changed from "
+        << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
+                                                    FnCountBefore)
+        << " to "
+        << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
+                                                    FnCountAfter)
+        << "; Delta: "
+        << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
+      return R;
+    });
+  }
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+  // Check if there's anything in the module. If it's empty, then there's
+  // nothing to outline.
+  if (M.empty())
+    return false;
+
+  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+
+  // If the user passed -enable-machine-outliner=always or
+  // -enable-machine-outliner, the pass will run on all functions in the module.
+  // Otherwise, if the target supports default outlining, it will run on all
+  // functions deemed by the target to be worth outlining from by default. Tell
+  // the user how the outliner is running.
+  LLVM_DEBUG(
+    dbgs() << "Machine Outliner: Running on ";
+    if (RunOnAllFunctions)
+      dbgs() << "all functions";
+    else
+      dbgs() << "target-default functions";
+    dbgs() << "\n"
+  );
+
+  // If the user specifies that they want to outline from linkonceodrs, set
+  // it here.
+  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
+  InstructionMapper Mapper;
+
+  // Prepare instruction mappings for the suffix tree.
+  populateMapper(Mapper, M, MMI);
   std::vector<OutlinedFunction> FunctionList;
 
   // Find all of the outlining candidates.
-  unsigned MaxCandidateLen =
-      buildCandidateList(CandidateList, FunctionList, ST, Mapper);
-
-  // Remove candidates that overlap with other candidates.
-  pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen);
+  findCandidates(Mapper, FunctionList);
+
+  // If we've requested size remarks, then collect the MI counts of every
+  // function before outlining, and the MI counts after outlining.
+  // FIXME: This shouldn't be in the outliner at all; it should ultimately be
+  // the pass manager's responsibility.
+  // This could pretty easily be placed in outline instead, but because we
+  // really ultimately *don't* want this here, it's done like this for now
+  // instead.
+
+  // Check if we want size remarks.
+  bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
+  StringMap<unsigned> FunctionToInstrCount;
+  if (ShouldEmitSizeRemarks)
+    initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
 
   // Outline each of the candidates and return true if something was outlined.
-  bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
+  bool OutlinedSomething = outline(M, FunctionList, Mapper);
+
+  // If we outlined something, we definitely changed the MI count of the
+  // module. If we've asked for size remarks, then output them.
+  // FIXME: This should be in the pass manager.
+  if (ShouldEmitSizeRemarks && OutlinedSomething)
+    emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
 
   return OutlinedSomething;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
deleted file mode 100644
index 3ee3e40b27e2..000000000000
--- a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the machine function pass registry for register allocators
-// and instruction schedulers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachinePassRegistry.h"
-
-using namespace llvm;
-
-void MachinePassRegistryListener::anchor() { }
-
-/// setDefault - Set the default constructor by name.
-void MachinePassRegistry::setDefault(StringRef Name) {
-  MachinePassCtor Ctor = nullptr;
-  for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
-    if (R->getName() == Name) {
-      Ctor = R->getCtor();
-      break;
-    }
-  }
-  assert(Ctor && "Unregistered pass name");
-  setDefault(Ctor);
-}
-
-/// Add - Adds a function pass to the registration list.
-///
-void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
-  Node->setNext(List);
-  List = Node;
-  if (Listener) Listener->NotifyAdd(Node->getName(),
-                                    Node->getCtor(),
-                                    Node->getDescription());
-}
-
-
-/// Remove - Removes a function pass from the registration list.
-///
-void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
-  for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
-    if (*I == Node) {
-      if (Listener) Listener->NotifyRemove(Node->getName());
-      *I = (*I)->getNext();
-      break;
-    }
-  }
-}
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
index 9bb00aaef86d..4d451bdd7f69 100644
--- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -9,34 +9,6 @@
 //
 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
 //
-// Software pipelining (SWP) is an instruction scheduling technique for loops
-// that overlap loop iterations and exploits ILP via a compiler transformation.
-//
-// Swing Modulo Scheduling is an implementation of software pipelining
-// that generates schedules that are near optimal in terms of initiation
-// interval, register requirements, and stage count. See the papers:
-//
-// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
-// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
-// Conference on Parallel Architectures and Compilation Techiniques.
-//
-// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
-// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
-// Transactions on Computers, Vol. 50, No. 3, 2001.
-//
-// "An Implementation of Swing Modulo Scheduling With Extensions for
-// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
-// Urbana-Chambpain, 2005.
-//
-//
-// The SMS algorithm consists of three main steps after computing the minimal
-// initiation interval (MII).
-// 1) Analyze the dependence graph and compute information about each
-//    instruction in the graph.
-// 2) Order the nodes (instructions) by priority based upon the heuristics
-//    described in the algorithm.
-// 3) Attempt to schedule the nodes in the specified order using the MII.
-//
 // This SMS implementation is a target-independent back-end pass. When enabled,
 // the pass runs just prior to the register allocation pass, while the machine
 // IR is in SSA form. If software pipelining is successful, then the original
@@ -83,13 +55,11 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePipeliner.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -171,552 +141,15 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
                                      cl::ReallyHidden, cl::init(false),
                                      cl::ZeroOrMore, cl::desc("Ignore RecMII"));
 
-namespace {
-
-class NodeSet;
-class SMSchedule;
-
-/// The main class in the implementation of the target independent
-/// software pipeliner pass.
-class MachinePipeliner : public MachineFunctionPass {
-public:
-  MachineFunction *MF = nullptr;
-  const MachineLoopInfo *MLI = nullptr;
-  const MachineDominatorTree *MDT = nullptr;
-  const InstrItineraryData *InstrItins;
-  const TargetInstrInfo *TII = nullptr;
-  RegisterClassInfo RegClassInfo;
-
-#ifndef NDEBUG
-  static int NumTries;
-#endif
-
-  /// Cache the target analysis information about the loop.
-  struct LoopInfo {
-    MachineBasicBlock *TBB = nullptr;
-    MachineBasicBlock *FBB = nullptr;
-    SmallVector<MachineOperand, 4> BrCond;
-    MachineInstr *LoopInductionVar = nullptr;
-    MachineInstr *LoopCompare = nullptr;
-  };
-  LoopInfo LI;
-
-  static char ID;
-
-  MachinePipeliner() : MachineFunctionPass(ID) {
-    initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<AAResultsWrapperPass>();
-    AU.addPreserved<AAResultsWrapperPass>();
-    AU.addRequired<MachineLoopInfo>();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addRequired<LiveIntervals>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-private:
-  void preprocessPhiNodes(MachineBasicBlock &B);
-  bool canPipelineLoop(MachineLoop &L);
-  bool scheduleLoop(MachineLoop &L);
-  bool swingModuloScheduler(MachineLoop &L);
-};
-
-/// This class builds the dependence graph for the instructions in a loop,
-/// and attempts to schedule the instructions using the SMS algorithm.
-class SwingSchedulerDAG : public ScheduleDAGInstrs {
-  MachinePipeliner &Pass;
-  /// The minimum initiation interval between iterations for this schedule.
-  unsigned MII = 0;
-  /// Set to true if a valid pipelined schedule is found for the loop.
-  bool Scheduled = false;
-  MachineLoop &Loop;
-  LiveIntervals &LIS;
-  const RegisterClassInfo &RegClassInfo;
-
-  /// A toplogical ordering of the SUnits, which is needed for changing
-  /// dependences and iterating over the SUnits.
-  ScheduleDAGTopologicalSort Topo;
-
-  struct NodeInfo {
-    int ASAP = 0;
-    int ALAP = 0;
-    int ZeroLatencyDepth = 0;
-    int ZeroLatencyHeight = 0;
-
-    NodeInfo() = default;
-  };
-  /// Computed properties for each node in the graph.
-  std::vector<NodeInfo> ScheduleInfo;
-
-  enum OrderKind { BottomUp = 0, TopDown = 1 };
-  /// Computed node ordering for scheduling.
-  SetVector<SUnit *> NodeOrder;
-
-  using NodeSetType = SmallVector<NodeSet, 8>;
-  using ValueMapTy = DenseMap<unsigned, unsigned>;
-  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
-  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
-
-  /// Instructions to change when emitting the final schedule.
-  DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
-
-  /// We may create a new instruction, so remember it because it
-  /// must be deleted when the pass is finished.
-  SmallPtrSet<MachineInstr *, 4> NewMIs;
-
-  /// Ordered list of DAG postprocessing steps.
-  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
-
-  /// Helper class to implement Johnson's circuit finding algorithm.
-  class Circuits {
-    std::vector<SUnit> &SUnits;
-    SetVector<SUnit *> Stack;
-    BitVector Blocked;
-    SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
-    SmallVector<SmallVector<int, 4>, 16> AdjK;
-    unsigned NumPaths;
-    static unsigned MaxPaths;
-
-  public:
-    Circuits(std::vector<SUnit> &SUs)
-        : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
-
-    /// Reset the data structures used in the circuit algorithm.
-    void reset() {
-      Stack.clear();
-      Blocked.reset();
-      B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
-      NumPaths = 0;
-    }
-
-    void createAdjacencyStructure(SwingSchedulerDAG *DAG);
-    bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
-    void unblock(int U);
-  };
-
-public:
-  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
-                    const RegisterClassInfo &rci)
-      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
-        RegClassInfo(rci), Topo(SUnits, &ExitSU) {
-    P.MF->getSubtarget().getSMSMutations(Mutations);
-  }
-
-  void schedule() override;
-  void finishBlock() override;
-
-  /// Return true if the loop kernel has been scheduled.
-  bool hasNewSchedule() { return Scheduled; }
-
-  /// Return the earliest time an instruction may be scheduled.
-  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
-
-  /// Return the latest time an instruction my be scheduled.
-  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
-
-  /// The mobility function, which the number of slots in which
-  /// an instruction may be scheduled.
-  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
-
-  /// The depth, in the dependence graph, for a node.
-  unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
-
-  /// The maximum unweighted length of a path from an arbitrary node to the
-  /// given node in which each edge has latency 0
-  int getZeroLatencyDepth(SUnit *Node) {
-    return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
-  }
-
-  /// The height, in the dependence graph, for a node.
-  unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
-
-  /// The maximum unweighted length of a path from the given node to an
-  /// arbitrary node in which each edge has latency 0
-  int getZeroLatencyHeight(SUnit *Node) {
-    return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
-  }
-
-  /// Return true if the dependence is a back-edge in the data dependence graph.
-  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
-  /// using an anti dependence from a Phi to an instruction.
-  bool isBackedge(SUnit *Source, const SDep &Dep) {
-    if (Dep.getKind() != SDep::Anti)
-      return false;
-    return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
-  }
-
-  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
-
-  /// The distance function, which indicates that operation V of iteration I
-  /// depends on operations U of iteration I-distance.
-  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
-    // Instructions that feed a Phi have a distance of 1. Computing larger
-    // values for arrays requires data dependence information.
-    if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
-      return 1;
-    return 0;
-  }
-
-  /// Set the Minimum Initiation Interval for this schedule attempt.
-  void setMII(unsigned mii) { MII = mii; }
-
-  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
-
-  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
-
-  /// Return the new base register that was stored away for the changed
-  /// instruction.
-  unsigned getInstrBaseReg(SUnit *SU) {
-    DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
-        InstrChanges.find(SU);
-    if (It != InstrChanges.end())
-      return It->second.first;
-    return 0;
-  }
-
-  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
-    Mutations.push_back(std::move(Mutation));
-  }
-
-private:
-  void addLoopCarriedDependences(AliasAnalysis *AA);
-  void updatePhiDependences();
-  void changeDependences();
-  unsigned calculateResMII();
-  unsigned calculateRecMII(NodeSetType &RecNodeSets);
-  void findCircuits(NodeSetType &NodeSets);
-  void fuseRecs(NodeSetType &NodeSets);
-  void removeDuplicateNodes(NodeSetType &NodeSets);
-  void computeNodeFunctions(NodeSetType &NodeSets);
-  void registerPressureFilter(NodeSetType &NodeSets);
-  void colocateNodeSets(NodeSetType &NodeSets);
-  void checkNodeSets(NodeSetType &NodeSets);
-  void groupRemainingNodes(NodeSetType &NodeSets);
-  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
-                         SetVector<SUnit *> &NodesAdded);
-  void computeNodeOrder(NodeSetType &NodeSets);
-  void checkValidNodeOrder(const NodeSetType &Circuits) const;
-  bool schedulePipeline(SMSchedule &Schedule);
-  void generatePipelinedLoop(SMSchedule &Schedule);
-  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
-                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
-                      MBBVectorTy &PrologBBs);
-  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
-                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
-                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
-  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
-                            MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
-                            SMSchedule &Schedule, ValueMapTy *VRMap,
-                            InstrMapTy &InstrMap, unsigned LastStageNum,
-                            unsigned CurStageNum, bool IsLast);
-  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
-                    MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
-                    SMSchedule &Schedule, ValueMapTy *VRMap,
-                    InstrMapTy &InstrMap, unsigned LastStageNum,
-                    unsigned CurStageNum, bool IsLast);
-  void removeDeadInstructions(MachineBasicBlock *KernelBB,
-                              MBBVectorTy &EpilogBBs);
-  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
-                      SMSchedule &Schedule);
-  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
-                   MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
-                   ValueMapTy *VRMap);
-  bool computeDelta(MachineInstr &MI, unsigned &Delta);
-  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
-                         unsigned Num);
-  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
-                           unsigned InstStageNum);
-  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
-                                    unsigned InstStageNum,
-                                    SMSchedule &Schedule);
-  void updateInstruction(MachineInstr *NewMI, bool LastDef,
-                         unsigned CurStageNum, unsigned InstrStageNum,
-                         SMSchedule &Schedule, ValueMapTy *VRMap);
-  MachineInstr *findDefInLoop(unsigned Reg);
-  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
-                         unsigned LoopStage, ValueMapTy *VRMap,
-                         MachineBasicBlock *BB);
-  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
-                        SMSchedule &Schedule, ValueMapTy *VRMap,
-                        InstrMapTy &InstrMap);
-  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
-                             InstrMapTy &InstrMap, unsigned CurStageNum,
-                             unsigned PhiNum, MachineInstr *Phi,
-                             unsigned OldReg, unsigned NewReg,
-                             unsigned PrevReg = 0);
-  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
-                             unsigned &OffsetPos, unsigned &NewBase,
-                             int64_t &NewOffset);
-  void postprocessDAG();
-};
-
-/// A NodeSet contains a set of SUnit DAG nodes with additional information
-/// that assigns a priority to the set.
-class NodeSet {
-  SetVector<SUnit *> Nodes;
-  bool HasRecurrence = false;
-  unsigned RecMII = 0;
-  int MaxMOV = 0;
-  unsigned MaxDepth = 0;
-  unsigned Colocate = 0;
-  SUnit *ExceedPressure = nullptr;
-  unsigned Latency = 0;
-
-public:
-  using iterator = SetVector<SUnit *>::const_iterator;
-
-  NodeSet() = default;
-  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
-    Latency = 0;
-    for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
-      for (const SDep &Succ : Nodes[i]->Succs)
-        if (Nodes.count(Succ.getSUnit()))
-          Latency += Succ.getLatency();
-  }
-
-  bool insert(SUnit *SU) { return Nodes.insert(SU); }
-
-  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
-
-  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
-    return Nodes.remove_if(P);
-  }
-
-  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
-
-  bool hasRecurrence() { return HasRecurrence; };
-
-  unsigned size() const { return Nodes.size(); }
-
-  bool empty() const { return Nodes.empty(); }
-
-  SUnit *getNode(unsigned i) const { return Nodes[i]; };
-
-  void setRecMII(unsigned mii) { RecMII = mii; };
-
-  void setColocate(unsigned c) { Colocate = c; };
-
-  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
-
-  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
-
-  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
-
-  int getRecMII() { return RecMII; }
-
-  /// Summarize node functions for the entire node set.
-  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
-    for (SUnit *SU : *this) {
-      MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
-      MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
-    }
-  }
-
-  unsigned getLatency() { return Latency; }
-
-  unsigned getMaxDepth() { return MaxDepth; }
-
-  void clear() {
-    Nodes.clear();
-    RecMII = 0;
-    HasRecurrence = false;
-    MaxMOV = 0;
-    MaxDepth = 0;
-    Colocate = 0;
-    ExceedPressure = nullptr;
-  }
-
-  operator SetVector<SUnit *> &() { return Nodes; }
-
-  /// Sort the node sets by importance. First, rank them by recurrence MII,
-  /// then by mobility (least mobile done first), and finally by depth.
-  /// Each node set may contain a colocate value which is used as the first
-  /// tie breaker, if it's set.
-  bool operator>(const NodeSet &RHS) const {
-    if (RecMII == RHS.RecMII) {
-      if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
-        return Colocate < RHS.Colocate;
-      if (MaxMOV == RHS.MaxMOV)
-        return MaxDepth > RHS.MaxDepth;
-      return MaxMOV < RHS.MaxMOV;
-    }
-    return RecMII > RHS.RecMII;
-  }
-
-  bool operator==(const NodeSet &RHS) const {
-    return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
-           MaxDepth == RHS.MaxDepth;
-  }
-
-  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
-
-  iterator begin() { return Nodes.begin(); }
-  iterator end() { return Nodes.end(); }
-
-  void print(raw_ostream &os) const {
-    os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
-       << " depth " << MaxDepth << " col " << Colocate << "\n";
-    for (const auto &I : Nodes)
-      os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
-    os << "\n";
-  }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
-#endif
-};
-
-/// This class represents the scheduled code.  The main data structure is a
-/// map from scheduled cycle to instructions.  During scheduling, the
-/// data structure explicitly represents all stages/iterations.   When
-/// the algorithm finshes, the schedule is collapsed into a single stage,
-/// which represents instructions from different loop iterations.
-///
-/// The SMS algorithm allows negative values for cycles, so the first cycle
-/// in the schedule is the smallest cycle value.
-class SMSchedule {
-private:
-  /// Map from execution cycle to instructions.
-  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
-
-  /// Map from instruction to execution cycle.
-  std::map<SUnit *, int> InstrToCycle;
-
-  /// Map for each register and the max difference between its uses and def.
-  /// The first element in the pair is the max difference in stages. The
-  /// second is true if the register defines a Phi value and loop value is
-  /// scheduled before the Phi.
-  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
-
-  /// Keep track of the first cycle value in the schedule.  It starts
-  /// as zero, but the algorithm allows negative values.
-  int FirstCycle = 0;
-
-  /// Keep track of the last cycle value in the schedule.
-  int LastCycle = 0;
-
-  /// The initiation interval (II) for the schedule.
-  int InitiationInterval = 0;
-
-  /// Target machine information.
-  const TargetSubtargetInfo &ST;
-
-  /// Virtual register information.
-  MachineRegisterInfo &MRI;
-
-  std::unique_ptr<DFAPacketizer> Resources;
-
-public:
-  SMSchedule(MachineFunction *mf)
-      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
-        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
-
-  void reset() {
-    ScheduledInstrs.clear();
-    InstrToCycle.clear();
-    RegToStageDiff.clear();
-    FirstCycle = 0;
-    LastCycle = 0;
-    InitiationInterval = 0;
-  }
-
-  /// Set the initiation interval for this schedule.
-  void setInitiationInterval(int ii) { InitiationInterval = ii; }
-
-  /// Return the first cycle in the completed schedule.  This
-  /// can be a negative value.
-  int getFirstCycle() const { return FirstCycle; }
-
-  /// Return the last cycle in the finalized schedule.
-  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
-
-  /// Return the cycle of the earliest scheduled instruction in the dependence
-  /// chain.
-  int earliestCycleInChain(const SDep &Dep);
-
-  /// Return the cycle of the latest scheduled instruction in the dependence
-  /// chain.
-  int latestCycleInChain(const SDep &Dep);
+namespace llvm {
 
-  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
-                    int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
-  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+// A command line option to enable the CopyToPhi DAG mutation.
+cl::opt<bool>
+    SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+                       cl::init(true), cl::ZeroOrMore,
+                       cl::desc("Enable CopyToPhi DAG Mutation"));
 
-  /// Iterators for the cycle to instruction map.
-  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
-  using const_sched_iterator =
-      DenseMap<int, std::deque<SUnit *>>::const_iterator;
-
-  /// Return true if the instruction is scheduled at the specified stage.
-  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
-    return (stageScheduled(SU) == (int)StageNum);
-  }
-
-  /// Return the stage for a scheduled instruction.  Return -1 if
-  /// the instruction has not been scheduled.
-  int stageScheduled(SUnit *SU) const {
-    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
-    if (it == InstrToCycle.end())
-      return -1;
-    return (it->second - FirstCycle) / InitiationInterval;
-  }
-
-  /// Return the cycle for a scheduled instruction. This function normalizes
-  /// the first cycle to be 0.
-  unsigned cycleScheduled(SUnit *SU) const {
-    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
-    assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
-    return (it->second - FirstCycle) % InitiationInterval;
-  }
-
-  /// Return the maximum stage count needed for this schedule.
-  unsigned getMaxStageCount() {
-    return (LastCycle - FirstCycle) / InitiationInterval;
-  }
-
-  /// Return the max. number of stages/iterations that can occur between a
-  /// register definition and its uses.
-  unsigned getStagesForReg(int Reg, unsigned CurStage) {
-    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
-    if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
-      return 1;
-    return Stages.first;
-  }
-
-  /// The number of stages for a Phi is a little different than other
-  /// instructions. The minimum value computed in RegToStageDiff is 1
-  /// because we assume the Phi is needed for at least 1 iteration.
-  /// This is not the case if the loop value is scheduled prior to the
-  /// Phi in the same stage.  This function returns the number of stages
-  /// or iterations needed between the Phi definition and any uses.
-  unsigned getStagesForPhi(int Reg) {
-    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
-    if (Stages.second)
-      return Stages.first;
-    return Stages.first - 1;
-  }
-
-  /// Return the instructions that are scheduled at the specified cycle.
-  std::deque<SUnit *> &getInstructions(int cycle) {
-    return ScheduledInstrs[cycle];
-  }
-
-  bool isValidSchedule(SwingSchedulerDAG *SSD);
-  void finalizeSchedule(SwingSchedulerDAG *SSD);
-  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
-                       std::deque<SUnit *> &Insts);
-  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
-  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
-                             MachineOperand &MO);
-  void print(raw_ostream &os) const;
-  void dump() const;
-};
-
-} // end anonymous namespace
+} // end namespace llvm
 
 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
 char MachinePipeliner::ID = 0;
@@ -884,12 +317,9 @@ void SwingSchedulerDAG::schedule() {
   addLoopCarriedDependences(AA);
   updatePhiDependences();
   Topo.InitDAGTopologicalSorting();
-  postprocessDAG();
   changeDependences();
-  LLVM_DEBUG({
-    for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-      SUnits[su].dumpAll(this);
-  });
+  postprocessDAG();
+  LLVM_DEBUG(dump());
 
   NodeSetType NodeSets;
   findCircuits(NodeSets);
@@ -1101,11 +531,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
           // First, perform the cheaper check that compares the base register.
           // If they are the same and the load offset is less than the store
           // offset, then mark the dependence as loop carried potentially.
-          unsigned BaseReg1, BaseReg2;
+          MachineOperand *BaseOp1, *BaseOp2;
           int64_t Offset1, Offset2;
-          if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
-              TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
-            if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+          if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
+              TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
+            if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+                (int)Offset1 < (int)Offset2) {
               assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
                      "What happened to the chain edge?");
               SDep Dep(Load, SDep::Barrier);
@@ -1139,9 +570,9 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
             continue;
           }
           AliasResult AAResult = AA->alias(
-              MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+              MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
                              MMO1->getAAInfo()),
-              MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+              MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
                              MMO2->getAAInfo()));
 
           if (AAResult != NoAlias) {
@@ -1298,6 +729,7 @@ void SwingSchedulerDAG::changeDependences() {
     // Add a dependence between the new instruction and the instruction
     // that defines the new base.
     SDep Dep(&I, SDep::Anti, NewBase);
+    Topo.AddPred(LastSU, &I);
     LastSU->addPred(Dep);
 
     // Remember the base and offset information so that we can update the
@@ -1509,9 +941,9 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
         }
         OutputDeps[N] = BackEdge;
       }
-      // Do not process a boundary node and a back-edge is processed only
-      // if it goes to a Phi.
-      if (SI.getSUnit()->isBoundaryNode() ||
+      // Do not process a boundary node, an artificial node.
+      // A back-edge is processed only if it goes to a Phi.
+      if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
           (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
         continue;
       int N = SI.getSUnit()->NodeNum;
@@ -1535,7 +967,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
       }
     }
   }
-  // Add back-eges in the adjacency matrix for the output dependences.
+  // Add back-edges in the adjacency matrix for the output dependences.
   for (auto &OD : OutputDeps)
     if (!Added.test(OD.second)) {
       AdjK[OD.first].push_back(OD.second);
@@ -1564,7 +996,8 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
       ++NumPaths;
       break;
     } else if (!Blocked.test(W)) {
-      if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
+      if (circuit(W, S, NodeSets,
+                  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
         F = true;
     }
   }
@@ -1604,7 +1037,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
   // but we do this to find the circuits, and then change them back.
   swapAntiDependences(SUnits);
 
-  Circuits Cir(SUnits);
+  Circuits Cir(SUnits, Topo);
   // Create the adjacency structure.
   Cir.createAdjacencyStructure(this);
   for (int i = 0, e = SUnits.size(); i != e; ++i) {
@@ -1616,6 +1049,85 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
   swapAntiDependences(SUnits);
 }
 
+// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
+// is loop-carried to the USE in next iteration. This will help pipeliner avoid
+// additional copies that are needed across iterations. An artificial dependence
+// edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
+
+// PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
+// SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
+// PHI-------True-Dep------> USEOfPhi
+
+// The mutation creates
+// USEOfPHI -------Artificial-Dep---> SRCOfCopy
+
+// This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
+// (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
+// late  to avoid additional copies across iterations. The possible scheduling
+// order would be
+// USEOfPHI --- SRCOfCopy---  COPY/REG_SEQUENCE.
+
+void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
+  for (SUnit &SU : DAG->SUnits) {
+    // Find the COPY/REG_SEQUENCE instruction.
+    if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
+      continue;
+
+    // Record the loop carried PHIs.
+    SmallVector<SUnit *, 4> PHISUs;
+    // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
+    SmallVector<SUnit *, 4> SrcSUs;
+
+    for (auto &Dep : SU.Preds) {
+      SUnit *TmpSU = Dep.getSUnit();
+      MachineInstr *TmpMI = TmpSU->getInstr();
+      SDep::Kind DepKind = Dep.getKind();
+      // Save the loop carried PHI.
+      if (DepKind == SDep::Anti && TmpMI->isPHI())
+        PHISUs.push_back(TmpSU);
+      // Save the source of COPY/REG_SEQUENCE.
+      // If the source has no pre-decessors, we will end up creating cycles.
+      else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
+        SrcSUs.push_back(TmpSU);
+    }
+
+    if (PHISUs.size() == 0 || SrcSUs.size() == 0)
+      continue;
+
+    // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
+    // SUnit to the container.
+    SmallVector<SUnit *, 8> UseSUs;
+    for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
+      for (auto &Dep : (*I)->Succs) {
+        if (Dep.getKind() != SDep::Data)
+          continue;
+
+        SUnit *TmpSU = Dep.getSUnit();
+        MachineInstr *TmpMI = TmpSU->getInstr();
+        if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
+          PHISUs.push_back(TmpSU);
+          continue;
+        }
+        UseSUs.push_back(TmpSU);
+      }
+    }
+
+    if (UseSUs.size() == 0)
+      continue;
+
+    SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
+    // Add the artificial dependencies if it does not form a cycle.
+    for (auto I : UseSUs) {
+      for (auto Src : SrcSUs) {
+        if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
+          Src->addPred(SDep(I, SDep::Artificial));
+          SDAG->Topo.AddPred(Src, I);
+        }
+      }
+    }
+  }
+}
+
 /// Return true for DAG nodes that we ignore when computing the cost functions.
 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
 /// in the calculation of the ASAP, ALAP, etc functions.
@@ -1638,8 +1150,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
     for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
                                                     E = Topo.end();
          I != E; ++I) {
-      SUnit *SU = &SUnits[*I];
-      SU->dump(this);
+      const SUnit &SU = SUnits[*I];
+      dumpNode(SU);
     }
   });
 
@@ -1864,8 +1376,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
     RecRPTracker.closeBottom();
 
     std::vector<SUnit *> SUnits(NS.begin(), NS.end());
-    llvm::sort(SUnits.begin(), SUnits.end(),
-               [](const SUnit *A, const SUnit *B) {
+    llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
       return A->NodeNum > B->NodeNum;
     });
 
@@ -2672,7 +2183,7 @@ void SwingSchedulerDAG::generateExistingPhis(
       else if (PrologStage >= AccessStage + StageDiff + np &&
                VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
         PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
-      // Check if the Phi has already been scheduled, but the loop intruction
+      // Check if the Phi has already been scheduled, but the loop instruction
       // is either another Phi, or doesn't occur in the loop.
       else if (PrologStage >= AccessStage + StageDiff + np) {
         // If the Phi references another Phi, we need to examine the other
@@ -2725,7 +2236,7 @@ void SwingSchedulerDAG::generateExistingPhis(
                  VRMap[PrevStage - np + 1].count(Def))
           PhiOp2 = VRMap[PrevStage - np + 1][Def];
         // Use the loop value defined in the kernel.
-        else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
+        else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
                  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
           PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
         // Use the value defined by the Phi, unless we're generating the first
@@ -2739,35 +2250,38 @@ void SwingSchedulerDAG::generateExistingPhis(
       // references another Phi, and the other Phi is scheduled in an
       // earlier stage. We can try to reuse an existing Phi up until the last
       // stage of the current Phi.
-      if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) {
-        int LVNumStages = Schedule.getStagesForPhi(LoopVal);
-        int StageDiff = (StageScheduled - LoopValStage);
-        LVNumStages -= StageDiff;
-        // Make sure the loop value Phi has been processed already.
-        if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
-          NewReg = PhiOp2;
-          unsigned ReuseStage = CurStageNum;
-          if (Schedule.isLoopCarried(this, *PhiInst))
-            ReuseStage -= LVNumStages;
-          // Check if the Phi to reuse has been generated yet. If not, then
-          // there is nothing to reuse.
-          if (VRMap[ReuseStage - np].count(LoopVal)) {
-            NewReg = VRMap[ReuseStage - np][LoopVal];
-
-            rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
-                                  &*BBI, Def, NewReg);
-            // Update the map with the new Phi name.
-            VRMap[CurStageNum - np][Def] = NewReg;
-            PhiOp2 = NewReg;
-            if (VRMap[LastStageNum - np - 1].count(LoopVal))
-              PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
-
-            if (IsLast && np == NumPhis - 1)
-              replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
-            continue;
+      if (LoopDefIsPhi) {
+        if (static_cast<int>(PrologStage - np) >= StageScheduled) {
+          int LVNumStages = Schedule.getStagesForPhi(LoopVal);
+          int StageDiff = (StageScheduled - LoopValStage);
+          LVNumStages -= StageDiff;
+          // Make sure the loop value Phi has been processed already.
+          if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
+            NewReg = PhiOp2;
+            unsigned ReuseStage = CurStageNum;
+            if (Schedule.isLoopCarried(this, *PhiInst))
+              ReuseStage -= LVNumStages;
+            // Check if the Phi to reuse has been generated yet. If not, then
+            // there is nothing to reuse.
+            if (VRMap[ReuseStage - np].count(LoopVal)) {
+              NewReg = VRMap[ReuseStage - np][LoopVal];
+
+              rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                    &*BBI, Def, NewReg);
+              // Update the map with the new Phi name.
+              VRMap[CurStageNum - np][Def] = NewReg;
+              PhiOp2 = NewReg;
+              if (VRMap[LastStageNum - np - 1].count(LoopVal))
+                PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+              if (IsLast && np == NumPhis - 1)
+                replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+              continue;
+            }
           }
-        } else if (InKernel && StageDiff > 0 &&
-                   VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+        }
+        if (InKernel && StageDiff > 0 &&
+            VRMap[CurStageNum - StageDiff - np].count(LoopVal))
           PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
       }
 
@@ -3143,11 +2657,16 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
 /// during each iteration. Set Delta to the amount of the change.
 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  unsigned BaseReg;
+  MachineOperand *BaseOp;
   int64_t Offset;
-  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
     return false;
 
+  if (!BaseOp->isReg())
+    return false;
+
+  unsigned BaseReg = BaseOp->getReg();
+
   MachineRegisterInfo &MRI = MF.getRegInfo();
   // Check if there is a Phi. If so, get the definition in the loop.
   MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
@@ -3175,28 +2694,26 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
     return;
   // If the instruction has memory operands, then adjust the offset
   // when the instruction appears in different stages.
-  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
-  if (NumRefs == 0)
+  if (NewMI.memoperands_empty())
     return;
-  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
-  unsigned Refs = 0;
+  SmallVector<MachineMemOperand *, 2> NewMMOs;
   for (MachineMemOperand *MMO : NewMI.memoperands()) {
     if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
         (!MMO->getValue())) {
-      NewMemRefs[Refs++] = MMO;
+      NewMMOs.push_back(MMO);
       continue;
     }
     unsigned Delta;
     if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
       int64_t AdjOffset = Delta * Num;
-      NewMemRefs[Refs++] =
-          MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
+      NewMMOs.push_back(
+          MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
     } else {
-      NewMI.dropMemRefs();
-      return;
+      NewMMOs.push_back(
+          MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
     }
   }
-  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
+  NewMI.setMemRefs(MF, NewMMOs);
 }
 
 /// Clone the instruction for the new pipelined loop and update the
@@ -3552,19 +3069,19 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
   if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
     return true;
 
-  unsigned BaseRegS, BaseRegD;
+  MachineOperand *BaseOpS, *BaseOpD;
   int64_t OffsetS, OffsetD;
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
-      !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
+  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
+      !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
     return true;
 
-  if (BaseRegS != BaseRegD)
+  if (!BaseOpS->isIdenticalTo(*BaseOpD))
     return true;
 
   // Check that the base register is incremented by a constant value for each
   // iteration.
-  MachineInstr *Def = MRI.getVRegDef(BaseRegS);
+  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
   if (!Def || !Def->isPHI())
     return true;
   unsigned InitVal = 0;
@@ -3983,7 +3500,7 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
   };
 
   // sort, so that we can perform a binary search
-  llvm::sort(Indices.begin(), Indices.end(), CompareKey);
+  llvm::sort(Indices, CompareKey);
 
   bool Valid = true;
   (void)Valid;
@@ -4193,6 +3710,14 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
   LLVM_DEBUG(dump(););
 }
 
+void NodeSet::print(raw_ostream &os) const {
+  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+     << " depth " << MaxDepth << " col " << Colocate << "\n";
+  for (const auto &I : Nodes)
+    os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
+  os << "\n";
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// Print the schedule information to the given output.
 void SMSchedule::print(raw_ostream &os) const {
@@ -4211,4 +3736,9 @@ void SMSchedule::print(raw_ostream &os) const {
 
 /// Utility function used for debugging to print the schedule.
 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
+
 #endif
+
+
+
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index f632a9bd457f..6e5ca45d5e5e 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -93,36 +93,29 @@ bool
 MachineRegisterInfo::constrainRegAttrs(unsigned Reg,
                                        unsigned ConstrainingReg,
                                        unsigned MinNumRegs) {
-  auto const *OldRC = getRegClassOrNull(Reg);
-  auto const *RC = getRegClassOrNull(ConstrainingReg);
-  // A virtual register at any point must have either a low-level type
-  // or a class assigned, but not both. The only exception is the internals of
-  // GlobalISel's instruction selection pass, which is allowed to temporarily
-  // introduce registers with types and classes both.
-  assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type");
-  assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both");
-  assert((RC || getType(ConstrainingReg).isValid()) &&
-         "ConstrainingReg has neither class nor type");
-  assert((!RC || !getType(ConstrainingReg).isValid()) &&
-         "ConstrainingReg has class and type both");
-  if (OldRC && RC)
-    return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs);
-  // If one of the virtual registers is generic (used in generic machine
-  // instructions, has a low-level type, doesn't have a class), and the other is
-  // concrete (used in target specific instructions, doesn't have a low-level
-  // type, has a class), we can not unify them.
-  if (OldRC || RC)
+  const LLT RegTy = getType(Reg);
+  const LLT ConstrainingRegTy = getType(ConstrainingReg);
+  if (RegTy.isValid() && ConstrainingRegTy.isValid() &&
+      RegTy != ConstrainingRegTy)
     return false;
-  // At this point, both registers are guaranteed to have a valid low-level
-  // type, and they must agree.
-  if (getType(Reg) != getType(ConstrainingReg))
-    return false;
-  auto const *OldRB = getRegBankOrNull(Reg);
-  auto const *RB = getRegBankOrNull(ConstrainingReg);
-  if (OldRB)
-    return !RB || RB == OldRB;
-  if (RB)
-    setRegBank(Reg, *RB);
+  const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg);
+  if (!ConstrainingRegCB.isNull()) {
+    const auto RegCB = getRegClassOrRegBank(Reg);
+    if (RegCB.isNull())
+      setRegClassOrRegBank(Reg, ConstrainingRegCB);
+    else if (RegCB.is<const TargetRegisterClass *>() !=
+             ConstrainingRegCB.is<const TargetRegisterClass *>())
+      return false;
+    else if (RegCB.is<const TargetRegisterClass *>()) {
+      if (!::constrainRegClass(
+              *this, Reg, RegCB.get<const TargetRegisterClass *>(),
+              ConstrainingRegCB.get<const TargetRegisterClass *>(), MinNumRegs))
+        return false;
+    } else if (RegCB != ConstrainingRegCB)
+      return false;
+  }
+  if (ConstrainingRegTy.isValid())
+    setType(Reg, ConstrainingRegTy);
   return true;
 }
 
@@ -177,11 +170,17 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
   return Reg;
 }
 
+unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
+                                                   StringRef Name) {
+  unsigned Reg = createIncompleteVirtualRegister(Name);
+  VRegInfo[Reg].first = VRegInfo[VReg].first;
+  setType(Reg, getType(VReg));
+  if (TheDelegate)
+    TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+  return Reg;
+}
+
 void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
-  // Check that VReg doesn't have a class.
-  assert((getRegClassOrRegBank(VReg).isNull() ||
-         !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
-         "Can't set the size of a non-generic virtual register");
   VRegToType.grow(VReg);
   VRegToType[VReg] = Ty;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 502d18f08f93..90dad9d399fe 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -41,6 +41,7 @@
 #include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -100,8 +101,11 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
   cl::desc("Only schedule this function"));
 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
                                         cl::desc("Only schedule this MBB#"));
+static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+                              cl::desc("Print schedule DAGs"));
 #else
-static bool ViewMISchedDAGs = false;
+static const bool ViewMISchedDAGs = false;
+static const bool PrintDAGs = false;
 #endif // NDEBUG
 
 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
@@ -237,7 +241,8 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-MachinePassRegistry MachineSchedRegistry::Registry;
+MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
+    MachineSchedRegistry::Registry;
 
 /// A dummy default scheduler factory indicates whether the scheduler
 /// is overridden on the command line.
@@ -633,7 +638,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
+    dumpNode(*SuccSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -670,7 +675,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    PredSU->dump(this);
+    dumpNode(*PredSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -764,10 +769,8 @@ void ScheduleDAGMI::schedule() {
   SmallVector<SUnit*, 8> TopRoots, BotRoots;
   findRootsAndBiasEdges(TopRoots, BotRoots);
 
-  LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
-             for (const SUnit &SU
-                  : SUnits) SU.dumpAll(this);
-             if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+  LLVM_DEBUG(dump());
+  if (PrintDAGs) dump();
   if (ViewMISchedDAGs) viewGraph();
 
   // Initialize the strategy before modifying the DAG.
@@ -920,7 +923,7 @@ void ScheduleDAGMI::placeDebugValues() {
 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
     if (SUnit *SU = getSUnit(&(*MI)))
-      SU->dump(this);
+      dumpNode(*SU);
     else
       dbgs() << "Missing SUnit\n";
   }
@@ -1171,6 +1174,29 @@ void ScheduleDAGMILive::updatePressureDiffs(
   }
 }
 
+void ScheduleDAGMILive::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  if (EntrySU.getInstr() != nullptr)
+    dumpNodeAll(EntrySU);
+  for (const SUnit &SU : SUnits) {
+    dumpNodeAll(SU);
+    if (ShouldTrackPressure) {
+      dbgs() << "  Pressure Diff      : ";
+      getPressureDiff(&SU).dump(*TRI);
+    }
+    dbgs() << "  Single Issue       : ";
+    if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+        SchedModel.mustEndGroup(SU.getInstr()))
+      dbgs() << "true;";
+    else
+      dbgs() << "false;";
+    dbgs() << '\n';
+  }
+  if (ExitSU.getInstr() != nullptr)
+    dumpNodeAll(ExitSU);
+#endif
+}
+
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 /// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -1197,22 +1223,8 @@ void ScheduleDAGMILive::schedule() {
   // This may initialize a DFSResult to be used for queue priority.
   SchedImpl->initialize(this);
 
-  LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
-             for (const SUnit &SU
-                  : SUnits) {
-               SU.dumpAll(this);
-               if (ShouldTrackPressure) {
-                 dbgs() << "  Pressure Diff      : ";
-                 getPressureDiff(&SU).dump(*TRI);
-               }
-               dbgs() << "  Single Issue       : ";
-               if (SchedModel.mustBeginGroup(SU.getInstr()) &&
-                   SchedModel.mustEndGroup(SU.getInstr()))
-                 dbgs() << "true;";
-               else
-                 dbgs() << "false;";
-               dbgs() << '\n';
-             } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+  LLVM_DEBUG(dump());
+  if (PrintDAGs) dump();
   if (ViewMISchedDAGs) viewGraph();
 
   // Initialize ready queues now that the DAG and priority data are finalized.
@@ -1472,15 +1484,40 @@ namespace {
 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
   struct MemOpInfo {
     SUnit *SU;
-    unsigned BaseReg;
+    MachineOperand *BaseOp;
     int64_t Offset;
 
-    MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
-        : SU(su), BaseReg(reg), Offset(ofs) {}
+    MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+        : SU(su), BaseOp(Op), Offset(ofs) {}
+
+    bool operator<(const MemOpInfo &RHS) const {
+      if (BaseOp->getType() != RHS.BaseOp->getType())
+        return BaseOp->getType() < RHS.BaseOp->getType();
+
+      if (BaseOp->isReg())
+        return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
+               std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
+                               RHS.SU->NodeNum);
+      if (BaseOp->isFI()) {
+        const MachineFunction &MF =
+            *BaseOp->getParent()->getParent()->getParent();
+        const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+        bool StackGrowsDown = TFI.getStackGrowthDirection() ==
+                              TargetFrameLowering::StackGrowsDown;
+        // Can't use tuple comparison here since we might need to use a
+        // different order when the stack grows down.
+        if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
+          return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
+                                : BaseOp->getIndex() < RHS.BaseOp->getIndex();
+
+        if (Offset != RHS.Offset)
+          return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+
+        return SU->NodeNum < RHS.SU->NodeNum;
+      }
 
-    bool operator<(const MemOpInfo&RHS) const {
-      return std::tie(BaseReg, Offset, SU->NodeNum) <
-             std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
+      llvm_unreachable("MemOpClusterMutation only supports register or frame "
+                       "index bases.");
     }
   };
 
@@ -1536,21 +1573,21 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
   SmallVector<MemOpInfo, 32> MemOpRecords;
   for (SUnit *SU : MemOps) {
-    unsigned BaseReg;
+    MachineOperand *BaseOp;
     int64_t Offset;
-    if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
-      MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
+    if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
+      MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
   }
   if (MemOpRecords.size() < 2)
     return;
 
-  llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
+  llvm::sort(MemOpRecords);
   unsigned ClusterLength = 1;
   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
     SUnit *SUa = MemOpRecords[Idx].SU;
     SUnit *SUb = MemOpRecords[Idx+1].SU;
-    if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
-                                 *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
+    if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
+                                 *MemOpRecords[Idx + 1].BaseOp,
                                  ClusterLength) &&
         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
       LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
@@ -2397,6 +2434,52 @@ initResourceDelta(const ScheduleDAGMI *DAG,
   }
 }
 
+/// Compute remaining latency. We need this both to determine whether the
+/// overall schedule has become latency-limited and whether the instructions
+/// outside this zone are resource or latency limited.
+///
+/// The "dependent" latency is updated incrementally during scheduling as the
+/// max height/depth of scheduled nodes minus the cycles since it was
+/// scheduled:
+///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+///
+/// The "independent" latency is the max ready queue depth:
+///   ILat = max N.depth for N in Available|Pending
+///
+/// RemainingLatency is the greater of independent and dependent latency.
+///
+/// These computations are expensive, especially in DAGs with many edges, so
+/// only do them if necessary.
+static unsigned computeRemLatency(SchedBoundary &CurrZone) {
+  unsigned RemLatency = CurrZone.getDependentLatency();
+  RemLatency = std::max(RemLatency,
+                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
+  RemLatency = std::max(RemLatency,
+                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+  return RemLatency;
+}
+
+/// Returns true if the current cycle plus remaning latency is greater than
+/// the critical path in the scheduling region.
+bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
+                                               SchedBoundary &CurrZone,
+                                               bool ComputeRemLatency,
+                                               unsigned &RemLatency) const {
+  // The current cycle is already greater than the critical path, so we are
+  // already latency limited and don't need to compute the remaining latency.
+  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
+    return true;
+
+  // If we haven't scheduled anything yet, then we aren't latency limited.
+  if (CurrZone.getCurrCycle() == 0)
+    return false;
+
+  if (ComputeRemLatency)
+    RemLatency = computeRemLatency(CurrZone);
+
+  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
+}
+
 /// Set the CandPolicy given a scheduling zone given the current resources and
 /// latencies inside and outside the zone.
 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
@@ -2406,46 +2489,32 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
   // inside and outside this zone. Potential stalls should be considered before
   // following this policy.
 
-  // Compute remaining latency. We need this both to determine whether the
-  // overall schedule has become latency-limited and whether the instructions
-  // outside this zone are resource or latency limited.
-  //
-  // The "dependent" latency is updated incrementally during scheduling as the
-  // max height/depth of scheduled nodes minus the cycles since it was
-  // scheduled:
-  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
-  //
-  // The "independent" latency is the max ready queue depth:
-  //   ILat = max N.depth for N in Available|Pending
-  //
-  // RemainingLatency is the greater of independent and dependent latency.
-  unsigned RemLatency = CurrZone.getDependentLatency();
-  RemLatency = std::max(RemLatency,
-                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
-  RemLatency = std::max(RemLatency,
-                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
-
   // Compute the critical resource outside the zone.
   unsigned OtherCritIdx = 0;
   unsigned OtherCount =
     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
 
   bool OtherResLimited = false;
-  if (SchedModel->hasInstrSchedModel())
+  unsigned RemLatency = 0;
+  bool RemLatencyComputed = false;
+  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
+    RemLatency = computeRemLatency(CurrZone);
+    RemLatencyComputed = true;
     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
                                          OtherCount, RemLatency);
+  }
 
   // Schedule aggressively for latency in PostRA mode. We don't check for
   // acyclic latency during PostRA, and highly out-of-order processors will
   // skip PostRA scheduling.
-  if (!OtherResLimited) {
-    if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
-      Policy.ReduceLatency |= true;
-      LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
-                        << " RemainingLatency " << RemLatency << " + "
-                        << CurrZone.getCurrCycle() << "c > CritPath "
-                        << Rem.CriticalPath << "\n");
-    }
+  if (!OtherResLimited &&
+      (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
+                                       RemLatency))) {
+    Policy.ReduceLatency |= true;
+    LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
+                      << " RemainingLatency " << RemLatency << " + "
+                      << CurrZone.getCurrCycle() << "c > CritPath "
+                      << Rem.CriticalPath << "\n");
   }
   // If the same resource is limiting inside and outside the zone, do nothing.
   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
@@ -2473,7 +2542,7 @@ const char *GenericSchedulerBase::getReasonStr(
   switch (Reason) {
   case NoCand:         return "NOCAND    ";
   case Only1:          return "ONLY1     ";
-  case PhysRegCopy:    return "PREG-COPY ";
+  case PhysReg:        return "PHYS-REG  ";
   case RegExcess:      return "REG-EXCESS";
   case RegCritical:    return "REG-CRIT  ";
   case Stall:          return "STALL     ";
@@ -2809,24 +2878,41 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) {
 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
 /// with the operation that produces or consumes the physreg. We'll do this when
 /// regalloc has support for parallel copies.
-int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+int biasPhysReg(const SUnit *SU, bool isTop) {
   const MachineInstr *MI = SU->getInstr();
-  if (!MI->isCopy())
-    return 0;
 
-  unsigned ScheduledOper = isTop ? 1 : 0;
-  unsigned UnscheduledOper = isTop ? 0 : 1;
-  // If we have already scheduled the physreg produce/consumer, immediately
-  // schedule the copy.
-  if (TargetRegisterInfo::isPhysicalRegister(
-        MI->getOperand(ScheduledOper).getReg()))
-    return 1;
-  // If the physreg is at the boundary, defer it. Otherwise schedule it
-  // immediately to free the dependent. We can hoist the copy later.
-  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
-  if (TargetRegisterInfo::isPhysicalRegister(
-        MI->getOperand(UnscheduledOper).getReg()))
-    return AtBoundary ? -1 : 1;
+  if (MI->isCopy()) {
+    unsigned ScheduledOper = isTop ? 1 : 0;
+    unsigned UnscheduledOper = isTop ? 0 : 1;
+    // If we have already scheduled the physreg produce/consumer, immediately
+    // schedule the copy.
+    if (TargetRegisterInfo::isPhysicalRegister(
+            MI->getOperand(ScheduledOper).getReg()))
+      return 1;
+    // If the physreg is at the boundary, defer it. Otherwise schedule it
+    // immediately to free the dependent. We can hoist the copy later.
+    bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+    if (TargetRegisterInfo::isPhysicalRegister(
+            MI->getOperand(UnscheduledOper).getReg()))
+      return AtBoundary ? -1 : 1;
+  }
+
+  if (MI->isMoveImmediate()) {
+    // If we have a move immediate and all successors have been assigned, bias
+    // towards scheduling this later. Make sure all register defs are to
+    // physical registers.
+    bool DoBias = true;
+    for (const MachineOperand &Op : MI->defs()) {
+      if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
+        DoBias = false;
+        break;
+      }
+    }
+
+    if (DoBias)
+      return isTop ? -1 : 1;
+  }
+
   return 0;
 }
 } // end namespace llvm
@@ -2887,9 +2973,9 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
     return;
   }
 
-  if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
-                 biasPhysRegCopy(Cand.SU, Cand.AtTop),
-                 TryCand, Cand, PhysRegCopy))
+  // Bias PhysReg Defs and copies to their uses and defined respectively.
+  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
     return;
 
   // Avoid exceeding the target's limit.
@@ -3136,7 +3222,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
   return SU;
 }
 
-void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
   MachineBasicBlock::iterator InsertPos = SU->getInstr();
   if (!isTop)
     ++InsertPos;
@@ -3151,10 +3237,10 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
       continue;
     MachineInstr *Copy = DepSU->getInstr();
-    if (!Copy->isCopy())
+    if (!Copy->isCopy() && !Copy->isMoveImmediate())
       continue;
     LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
-               Dep.getSUnit()->dump(DAG));
+               DAG->dumpNode(*Dep.getSUnit()));
     DAG->moveInstruction(Copy, InsertPos);
   }
 }
@@ -3165,18 +3251,18 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
 /// does.
 ///
 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
-/// them here. See comments in biasPhysRegCopy.
+/// them here. See comments in biasPhysReg.
 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
     Top.bumpNode(SU);
     if (SU->hasPhysRegUses)
-      reschedulePhysRegCopies(SU, true);
+      reschedulePhysReg(SU, true);
   } else {
     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
     Bot.bumpNode(SU);
     if (SU->hasPhysRegDefs)
-      reschedulePhysRegCopies(SU, false);
+      reschedulePhysReg(SU, false);
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 1fd40f757351..cdc597db6401 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -513,25 +513,6 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
   return true;
 }
 
-/// collectDebgValues - Scan instructions following MI and collect any
-/// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr &MI,
-                               SmallVectorImpl<MachineInstr *> &DbgValues) {
-  DbgValues.clear();
-  if (!MI.getOperand(0).isReg())
-    return;
-
-  MachineBasicBlock::iterator DI = MI; ++DI;
-  for (MachineBasicBlock::iterator DE = MI.getParent()->end();
-       DI != DE; ++DI) {
-    if (!DI->isDebugValue())
-      return;
-    if (DI->getOperand(0).isReg() &&
-        DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
-      DbgValues.push_back(&*DI);
-  }
-}
-
 /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
 bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
                                           MachineBasicBlock *MBB,
@@ -735,9 +716,12 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
     return false;
 
-  unsigned BaseReg;
+  MachineOperand *BaseOp;
   int64_t Offset;
-  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+    return false;
+
+  if (!BaseOp->isReg())
     return false;
 
   if (!(MI.mayLoad() && !MI.isPredicable()))
@@ -750,15 +734,21 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
   return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
          (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
           MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
-         MBP.LHS.getReg() == BaseReg;
+         MBP.LHS.getReg() == BaseOp->getReg();
 }
 
-/// Sink an instruction and its associated debug instructions.
+/// Sink an instruction and its associated debug instructions. If the debug
+/// instructions to be sunk are already known, they can be provided in DbgVals.
 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
-                        MachineBasicBlock::iterator InsertPos) {
-  // Collect matching debug values.
+                        MachineBasicBlock::iterator InsertPos,
+                        SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
+  // If debug values are provided use those, otherwise call collectDebugValues.
   SmallVector<MachineInstr *, 2> DbgValuesToSink;
-  collectDebugValues(MI, DbgValuesToSink);
+  if (DbgVals)
+    DbgValuesToSink.insert(DbgValuesToSink.begin(),
+                           DbgVals->begin(), DbgVals->end());
+  else
+    MI.collectDebugValues(DbgValuesToSink);
 
   // If we cannot find a location to use (merge with), then we erase the debug
   // location to prevent debug-info driven tools from potentially reporting
@@ -970,6 +960,9 @@ private:
   /// Track which register units have been modified and used.
   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
 
+  /// Track DBG_VALUEs of (unmodified) register units.
+  DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs;
+
   /// Sink Copy instructions unused in the same block close to their uses in
   /// successors.
   bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
@@ -1056,8 +1049,11 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
 static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
                          SmallVectorImpl<unsigned> &UsedOpsInCopy,
                          SmallVectorImpl<unsigned> &DefedRegsInCopy) {
-  for (auto DefReg : DefedRegsInCopy)
-    SuccBB->removeLiveIn(DefReg);
+  MachineFunction &MF = *SuccBB->getParent();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (unsigned DefReg : DefedRegsInCopy)
+    for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
+      SuccBB->removeLiveIn(*S);
   for (auto U : UsedOpsInCopy) {
     unsigned Reg = MI->getOperand(U).getReg();
     if (!SuccBB->isLiveIn(Reg))
@@ -1121,11 +1117,34 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
   // block and the current instruction.
   ModifiedRegUnits.clear();
   UsedRegUnits.clear();
+  SeenDbgInstrs.clear();
 
   for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
     MachineInstr *MI = &*I;
     ++I;
 
+    // Track the operand index for use in Copy.
+    SmallVector<unsigned, 2> UsedOpsInCopy;
+    // Track the register number defed in Copy.
+    SmallVector<unsigned, 2> DefedRegsInCopy;
+
+    // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
+    // for DBG_VALUEs later, record them when they're encountered.
+    if (MI->isDebugValue()) {
+      auto &MO = MI->getOperand(0);
+      if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) {
+        // Bail if we can already tell the sink would be rejected, rather
+        // than needlessly accumulating lots of DBG_VALUEs.
+        if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+                                  ModifiedRegUnits, UsedRegUnits))
+          continue;
+
+        // Record debug use of this register.
+        SeenDbgInstrs[MO.getReg()].push_back(MI);
+      }
+      continue;
+    }
+
     if (MI->isDebugInstr())
       continue;
 
@@ -1139,11 +1158,6 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
       continue;
     }
 
-    // Track the operand index for use in Copy.
-    SmallVector<unsigned, 2> UsedOpsInCopy;
-    // Track the register number defed in Copy.
-    SmallVector<unsigned, 2> DefedRegsInCopy;
-
     // Don't sink the COPY if it would violate a register dependency.
     if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
                               ModifiedRegUnits, UsedRegUnits)) {
@@ -1165,11 +1179,21 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
     assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
            "Unexpected predecessor");
 
+    // Collect DBG_VALUEs that must sink with this copy.
+    SmallVector<MachineInstr *, 4> DbgValsToSink;
+    for (auto &MO : MI->operands()) {
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned reg = MO.getReg();
+      for (auto *MI : SeenDbgInstrs.lookup(reg))
+        DbgValsToSink.push_back(MI);
+    }
+
     // Clear the kill flag if SrcReg is killed between MI and the end of the
     // block.
     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
-    performSink(*MI, *SuccBB, InsertPos);
+    performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
 
     Changed = true;
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 79ca6adf95c4..e62ed3094651 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -218,8 +218,7 @@ computeHeightResources(const MachineBasicBlock *MBB) {
   // The trace tail is done.
   if (!TBI->Succ) {
     TBI->Tail = MBB->getNumber();
-    std::copy(PRCycles.begin(), PRCycles.end(),
-              ProcResourceHeights.begin() + PROffset);
+    llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
     return;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 318776136e24..534d3699db29 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -23,6 +23,7 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
+#include "LiveRangeCalc.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
@@ -108,6 +109,7 @@ namespace {
     using RegMap = DenseMap<unsigned, const MachineInstr *>;
     using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
 
+    const MachineInstr *FirstNonPHI;
     const MachineInstr *FirstTerminator;
     BlockSet FunctionBlocks;
 
@@ -248,6 +250,7 @@ namespace {
     void report_context(const LiveRange::Segment &S) const;
     void report_context(const VNInfo &VNI) const;
     void report_context(SlotIndex Pos) const;
+    void report_context(MCPhysReg PhysReg) const;
     void report_context_liverange(const LiveRange &LR) const;
     void report_context_lanemask(LaneBitmask LaneMask) const;
     void report_context_vreg(unsigned VReg) const;
@@ -261,6 +264,7 @@ namespace {
                             LaneBitmask LaneMask = LaneBitmask::getNone());
     void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
                             SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
+                            bool SubRangeCheck = false,
                             LaneBitmask LaneMask = LaneBitmask::getNone());
 
     void markReachable(const MachineBasicBlock *MBB);
@@ -362,6 +366,13 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
 
   const bool isFunctionFailedISel = MF.getProperties().hasProperty(
       MachineFunctionProperties::Property::FailedISel);
+
+  // If we're mid-GlobalISel and we already triggered the fallback path then
+  // it's expected that the MIR is somewhat broken but that's ok since we'll
+  // reset it and clear the FailedISel attribute in ResetMachineFunctions.
+  if (isFunctionFailedISel)
+    return foundErrors;
+
   isFunctionRegBankSelected =
       !isFunctionFailedISel &&
       MF.getProperties().hasProperty(
@@ -530,6 +541,10 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
   errs() << "- liverange:   " << LR << '\n';
 }
 
+void MachineVerifier::report_context(MCPhysReg PReg) const {
+  errs() << "- p. register: " << printReg(PReg, TRI) << '\n';
+}
+
 void MachineVerifier::report_context_vreg(unsigned VReg) const {
   errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
 }
@@ -599,6 +614,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
 void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   FirstTerminator = nullptr;
+  FirstNonPHI = nullptr;
 
   if (!MF->getProperties().hasProperty(
       MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
@@ -608,6 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
           MBB->getIterator() != MBB->getParent()->begin()) {
         report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
+        report_context(LI.PhysReg);
       }
     }
   }
@@ -666,7 +683,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         // out the bottom of the function.
       } else if (MBB->succ_size() == LandingPadSuccs.size()) {
         // It's possible that the block legitimately ends with a noreturn
-        // call or an unreachable, in which case it won't actuall fall
+        // call or an unreachable, in which case it won't actually fall
         // out of the block.
       } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
         report("MBB exits via unconditional fall-through but doesn't have "
@@ -767,7 +784,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
                "isn't a terminator instruction!", MBB);
       }
       if (Cond.empty()) {
-        report("MBB exits via conditinal branch/branch but there's no "
+        report("MBB exits via conditional branch/branch but there's no "
                "condition!", MBB);
       }
     } else {
@@ -880,9 +897,15 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
            << MI->getNumOperands() << " given.\n";
   }
 
-  if (MI->isPHI() && MF->getProperties().hasProperty(
-                         MachineFunctionProperties::Property::NoPHIs))
-    report("Found PHI instruction with NoPHIs property set", MI);
+  if (MI->isPHI()) {
+    if (MF->getProperties().hasProperty(
+            MachineFunctionProperties::Property::NoPHIs))
+      report("Found PHI instruction with NoPHIs property set", MI);
+
+    if (FirstNonPHI)
+      report("Found PHI instruction after non-PHI", MI);
+  } else if (FirstNonPHI == nullptr)
+    FirstNonPHI = MI;
 
   // Check the tied operands.
   if (MI->isInlineAsm())
@@ -1038,6 +1061,89 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     }
     break;
   }
+  case TargetOpcode::G_MERGE_VALUES: {
+    // G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
+    // e.g. s2N = MERGE sN, sN
+    // Merging multiple scalars into a vector is not allowed, should use
+    // G_BUILD_VECTOR for that.
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+    if (DstTy.isVector() || SrcTy.isVector())
+      report("G_MERGE_VALUES cannot operate on vectors", MI);
+    break;
+  }
+  case TargetOpcode::G_UNMERGE_VALUES: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcTy = MRI->getType(MI->getOperand(MI->getNumOperands()-1).getReg());
+    // For now G_UNMERGE can split vectors.
+    for (unsigned i = 0; i < MI->getNumOperands()-1; ++i) {
+      if (MRI->getType(MI->getOperand(i).getReg()) != DstTy)
+        report("G_UNMERGE_VALUES destination types do not match", MI);
+    }
+    if (SrcTy.getSizeInBits() !=
+        (DstTy.getSizeInBits() * (MI->getNumOperands() - 1))) {
+      report("G_UNMERGE_VALUES source operand does not cover dest operands",
+             MI);
+    }
+    break;
+  }
+  case TargetOpcode::G_BUILD_VECTOR: {
+    // Source types must be scalars, dest type a vector. Total size of scalars
+    // must match the dest vector size.
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!DstTy.isVector() || SrcEltTy.isVector())
+      report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+    for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+      if (MRI->getType(MI->getOperand(1).getReg()) !=
+          MRI->getType(MI->getOperand(i).getReg()))
+        report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
+    }
+    if (DstTy.getSizeInBits() !=
+        SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1))
+      report("G_BUILD_VECTOR src operands total size don't match dest "
+             "size.",
+             MI);
+    break;
+  }
+  case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
+    // Source types must be scalars, dest type a vector. Scalar types must be
+    // larger than the dest vector elt type, as this is a truncating operation.
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!DstTy.isVector() || SrcEltTy.isVector())
+      report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
+             MI);
+    for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+      if (MRI->getType(MI->getOperand(1).getReg()) !=
+          MRI->getType(MI->getOperand(i).getReg()))
+        report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
+               MI);
+    }
+    if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
+      report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
+             "dest elt type",
+             MI);
+    break;
+  }
+  case TargetOpcode::G_CONCAT_VECTORS: {
+    // Source types should be vectors, and total size should match the dest
+    // vector size.
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!DstTy.isVector() || !SrcTy.isVector())
+      report("G_CONCAT_VECTOR requires vector source and destination operands",
+             MI);
+    for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
+      if (MRI->getType(MI->getOperand(1).getReg()) !=
+          MRI->getType(MI->getOperand(i).getReg()))
+        report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
+    }
+    if (DstTy.getNumElements() !=
+        SrcTy.getNumElements() * (MI->getNumOperands() - 1))
+      report("G_CONCAT_VECTOR num dest and source elements should match", MI);
+    break;
+  }
   case TargetOpcode::COPY: {
     if (foundErrors)
       break;
@@ -1395,7 +1501,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
 
 void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
     unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
-    LaneBitmask LaneMask) {
+    bool SubRangeCheck, LaneBitmask LaneMask) {
   if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
     assert(VNI && "NULL valno is not allowed");
     if (VNI->def != DefIdx) {
@@ -1419,25 +1525,14 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
   if (MO->isDead()) {
     LiveQueryResult LRQ = LR.Query(DefIdx);
     if (!LRQ.isDeadDef()) {
-      // In case of physregs we can have a non-dead definition on another
-      // operand.
-      bool otherDef = false;
-      if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
-        const MachineInstr &MI = *MO->getParent();
-        for (const MachineOperand &MO : MI.operands()) {
-          if (!MO.isReg() || !MO.isDef() || MO.isDead())
-            continue;
-          unsigned Reg = MO.getReg();
-          for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
-            if (*Units == VRegOrUnit) {
-              otherDef = true;
-              break;
-            }
-          }
-        }
-      }
-
-      if (!otherDef) {
+      assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) &&
+             "Expecting a virtual register.");
+      // A dead subreg def only tells us that the specific subreg is dead. There
+      // could be other non-dead defs of other subregs, or we could have other
+      // parts of the register being live through the instruction. So unless we
+      // are checking liveness for a subrange it is ok for the live range to
+      // continue, given that we have a dead def of a subregister.
+      if (SubRangeCheck || MO->getSubReg() == 0) {
         report("Live range continues after dead def flag", MO, MONum);
         report_context_liverange(LR);
         report_context_vreg_regunit(VRegOrUnit);
@@ -1532,10 +1627,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
         // get a report for its operand.
         if (Bad) {
           for (const MachineOperand &MOP : MI->uses()) {
-            if (!MOP.isReg())
+            if (!MOP.isReg() || !MOP.isImplicit())
               continue;
-            if (!MOP.isImplicit())
+
+            if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg()))
               continue;
+
             for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
                  ++SubRegs) {
               if (*SubRegs == Reg) {
@@ -1593,7 +1690,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             for (const LiveInterval::SubRange &SR : LI.subranges()) {
               if ((SR.LaneMask & MOMask).none())
                 continue;
-              checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
+              checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
             }
           }
         } else {
@@ -2116,6 +2213,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     // Skip this block.
     ++MFI;
   }
+
+  SmallVector<SlotIndex, 4> Undefs;
+  if (LaneMask.any()) {
+    LiveInterval &OwnerLI = LiveInts->getInterval(Reg);
+    OwnerLI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes);
+  }
+
   while (true) {
     assert(LiveInts->isLiveInToMBB(LR, &*MFI));
     // We don't know how to track physregs into a landing pad.
@@ -2141,7 +2245,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       // instruction with subregister intervals
       // only one of the subregisters (not necessarily the current one) needs to
       // be defined.
-      if (!PVNI && (LaneMask.none() || !IsPHI) ) {
+      if (!PVNI && (LaneMask.none() || !IsPHI)) {
+        if (LiveRangeCalc::isJointlyDominated(*PI, Undefs, *Indexes))
+          continue;
         report("Register not marked live out of predecessor", *PI);
         report_context(LR, Reg, LaneMask);
         report_context(*VNI);
diff --git a/contrib/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
index 62dadbba0c1a..82b6d642c73b 100644
--- a/contrib/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
@@ -67,8 +67,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
       SI.setLatency(0);
 
   LLVM_DEBUG(
-      dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
-      SecondSU.print(dbgs(), &DAG); dbgs() << " /  ";
+      dbgs() << "Macro fuse: "; DAG.dumpNodeName(FirstSU); dbgs() << " - ";
+      DAG.dumpNodeName(SecondSU); dbgs() << " /  ";
       dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - "
              << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);
 
@@ -80,8 +80,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
       if (SI.isWeak() || isHazard(SI) ||
           SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
         continue;
-      LLVM_DEBUG(dbgs() << "  Bind "; SecondSU.print(dbgs(), &DAG);
-                 dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';);
+      LLVM_DEBUG(dbgs() << "  Bind "; DAG.dumpNodeName(SecondSU);
+                 dbgs() << " - "; DAG.dumpNodeName(*SU); dbgs() << '\n';);
       DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
     }
 
@@ -92,8 +92,8 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
       SUnit *SU = SI.getSUnit();
       if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
         continue;
-      LLVM_DEBUG(dbgs() << "  Bind "; SU->print(dbgs(), &DAG); dbgs() << " - ";
-                 FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
+      LLVM_DEBUG(dbgs() << "  Bind "; DAG.dumpNodeName(*SU); dbgs() << " - ";
+                 DAG.dumpNodeName(FirstSU); dbgs() << '\n';);
       DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
     }
     // ExitSU comes last by design, which acts like an implicit dependency
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index befa8422d399..770f6c5b0403 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -90,10 +90,10 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
 }
 
 /// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
-/// are copies of SingleValReg, possibly via copies through other PHIs.  If
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
 /// SingleValReg is zero on entry, it is set to the register with the single
-/// non-copy value.  PHIsInCycle is a set used to keep track of the PHIs that
-/// have been scanned.
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned. PHIs may be grouped by cycle, several cycles or chains.
 bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
                                          unsigned &SingleValReg,
                                          InstrSet &PHIsInCycle) {
@@ -119,8 +119,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
     if (SrcMI && SrcMI->isCopy() &&
         !SrcMI->getOperand(0).getSubReg() &&
         !SrcMI->getOperand(1).getSubReg() &&
-        TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
-      SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+        TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
+      SrcReg = SrcMI->getOperand(1).getReg();
+      SrcMI = MRI->getVRegDef(SrcReg);
+    }
     if (!SrcMI)
       return false;
 
@@ -129,7 +131,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
         return false;
     } else {
       // Fail if there is more than one non-phi/non-move register.
-      if (SingleValReg != 0)
+      if (SingleValReg != 0 && SingleValReg != SrcReg)
         return false;
       SingleValReg = SrcReg;
     }
@@ -180,6 +182,9 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
       if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
         continue;
 
+      // for the case SingleValReg taken from copy instr
+      MRI->clearKillFlags(SingleValReg);
+
       MRI->replaceRegWith(OldReg, SingleValReg);
       MI->eraseFromParent();
       ++NumPHICycles;
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index 7a5c20000066..b9801c6fd97b 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -153,8 +153,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   // This pass takes the function out of SSA form.
   MRI->leaveSSA();
 
-  // Split critical edges to help the coalescer. This does not yet support
-  // updating LiveIntervals, so we disable it.
+  // Split critical edges to help the coalescer.
   if (!DisableEdgeSplitting && (LV || LIS)) {
     MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
     for (auto &MBB : MF)
@@ -197,12 +196,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
 bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
-                                             MachineBasicBlock &MBB) {
+                                       MachineBasicBlock &MBB) {
   if (MBB.empty() || !MBB.front().isPHI())
     return false;   // Quick exit for basic blocks without PHIs.
 
-  // Get an iterator to the first instruction after the last PHI node (this may
-  // also be the end of the basic block).
+  // Get an iterator to the last PHI node.
   MachineBasicBlock::iterator LastPHIIt =
     std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
 
@@ -212,26 +210,26 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
   return true;
 }
 
-/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// Return true if all defs of VirtReg are implicit-defs.
 /// This includes registers with no defs.
 static bool isImplicitlyDefined(unsigned VirtReg,
-                                const MachineRegisterInfo *MRI) {
-  for (MachineInstr &DI : MRI->def_instructions(VirtReg))
+                                const MachineRegisterInfo &MRI) {
+  for (MachineInstr &DI : MRI.def_instructions(VirtReg))
     if (!DI.isImplicitDef())
       return false;
   return true;
 }
 
-/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
-/// are implicit_def's.
-static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
-                                         const MachineRegisterInfo *MRI) {
-  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
-    if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
+/// Return true if all sources of the phi node are implicit_def's, or undef's.
+static bool allPhiOperandsUndefined(const MachineInstr &MPhi,
+                                    const MachineRegisterInfo &MRI) {
+  for (unsigned I = 1, E = MPhi.getNumOperands(); I != E; I += 2) {
+    const MachineOperand &MO = MPhi.getOperand(I);
+    if (!isImplicitlyDefined(MO.getReg(), MRI) && !MO.isUndef())
       return false;
+  }
   return true;
 }
-
 /// LowerPHINode - Lower the PHI node at the top of the specified block.
 void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator LastPHIIt) {
@@ -256,8 +254,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
   // after any remaining phi nodes) which copies the new incoming register
   // into the phi node destination.
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-  if (isSourceDefinedByImplicitDef(MPhi, MRI))
-    // If all sources of a PHI node are implicit_def, just emit an
+  if (allPhiOperandsUndefined(*MPhi, *MRI))
+    // If all sources of a PHI node are implicit_def or undef uses, just emit an
     // implicit_def instead of a copy.
     BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
             TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
@@ -374,7 +372,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
     unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
     unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
     bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
-      isImplicitlyDefined(SrcReg, MRI);
+      isImplicitlyDefined(SrcReg, *MRI);
     assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
            "Machine PHI Operands must all be virtual registers!");
 
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 215da630caf4..dd0a5fe1b39d 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -256,7 +256,7 @@ void SchedulePostRATDList::exitRegion() {
 LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     if (SUnit *SU = Sequence[i])
-      SU->dump(this);
+      dumpNode(*SU);
     else
       dbgs() << "**** NOOP ****\n";
   }
@@ -414,11 +414,7 @@ void SchedulePostRATDList::schedule() {
   postprocessDAG();
 
   LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
-  LLVM_DEBUG(for (const SUnit &SU
-                  : SUnits) {
-    SU.dumpAll(this);
-    dbgs() << '\n';
-  });
+  LLVM_DEBUG(dump());
 
   AvailableQueue.initNodes(SUnits);
   ListScheduleTopDown();
@@ -465,7 +461,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
+    dumpNode(*SuccSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -502,7 +498,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
 /// the Available queue.
 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
-  LLVM_DEBUG(SU->dump(this));
+  LLVM_DEBUG(dumpNode(*SU));
 
   Sequence.push_back(SU);
   assert(CurCycle >= SU->getDepth() &&
diff --git a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 8f88ef78828a..b0e9ac03612d 100644
--- a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -7,13 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass implements IR lowering for the llvm.load.relative intrinsic.
+// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
+// intrinsics.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -55,11 +57,129 @@ static bool lowerLoadRelative(Function &F) {
   return Changed;
 }
 
+static bool lowerObjCCall(Function &F, const char *NewFn,
+                          bool setNonLazyBind = false) {
+  if (F.use_empty())
+    return false;
+
+  // If we haven't already looked up this function, check to see if the
+  // program already contains a function with this name.
+  Module *M = F.getParent();
+  Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+
+  if (Function* Fn = dyn_cast<Function>(FCache)) {
+    Fn->setLinkage(F.getLinkage());
+    if (setNonLazyBind && !Fn->isWeakForLinker()) {
+      // If we have Native ARC, set nonlazybind attribute for these APIs for
+      // performance.
+      Fn->addFnAttr(Attribute::NonLazyBind);
+    }
+  }
+
+  for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
+    auto *CI = dyn_cast<CallInst>(I->getUser());
+    assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
+    ++I;
+
+    IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+    SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
+    CallInst *NewCI = Builder.CreateCall(FCache, Args);
+    NewCI->setName(CI->getName());
+    NewCI->setTailCallKind(CI->getTailCallKind());
+    if (!CI->use_empty())
+      CI->replaceAllUsesWith(NewCI);
+    CI->eraseFromParent();
+  }
+
+  return true;
+}
+
 static bool lowerIntrinsics(Module &M) {
   bool Changed = false;
   for (Function &F : M) {
-    if (F.getName().startswith("llvm.load.relative."))
+    if (F.getName().startswith("llvm.load.relative.")) {
       Changed |= lowerLoadRelative(F);
+      continue;
+    }
+    switch (F.getIntrinsicID()) {
+    default:
+      break;
+    case Intrinsic::objc_autorelease:
+      Changed |= lowerObjCCall(F, "objc_autorelease");
+      break;
+    case Intrinsic::objc_autoreleasePoolPop:
+      Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop");
+      break;
+    case Intrinsic::objc_autoreleasePoolPush:
+      Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush");
+      break;
+    case Intrinsic::objc_autoreleaseReturnValue:
+      Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue");
+      break;
+    case Intrinsic::objc_copyWeak:
+      Changed |= lowerObjCCall(F, "objc_copyWeak");
+      break;
+    case Intrinsic::objc_destroyWeak:
+      Changed |= lowerObjCCall(F, "objc_destroyWeak");
+      break;
+    case Intrinsic::objc_initWeak:
+      Changed |= lowerObjCCall(F, "objc_initWeak");
+      break;
+    case Intrinsic::objc_loadWeak:
+      Changed |= lowerObjCCall(F, "objc_loadWeak");
+      break;
+    case Intrinsic::objc_loadWeakRetained:
+      Changed |= lowerObjCCall(F, "objc_loadWeakRetained");
+      break;
+    case Intrinsic::objc_moveWeak:
+      Changed |= lowerObjCCall(F, "objc_moveWeak");
+      break;
+    case Intrinsic::objc_release:
+      Changed |= lowerObjCCall(F, "objc_release", true);
+      break;
+    case Intrinsic::objc_retain:
+      Changed |= lowerObjCCall(F, "objc_retain", true);
+      break;
+    case Intrinsic::objc_retainAutorelease:
+      Changed |= lowerObjCCall(F, "objc_retainAutorelease");
+      break;
+    case Intrinsic::objc_retainAutoreleaseReturnValue:
+      Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue");
+      break;
+    case Intrinsic::objc_retainAutoreleasedReturnValue:
+      Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue");
+      break;
+    case Intrinsic::objc_retainBlock:
+      Changed |= lowerObjCCall(F, "objc_retainBlock");
+      break;
+    case Intrinsic::objc_storeStrong:
+      Changed |= lowerObjCCall(F, "objc_storeStrong");
+      break;
+    case Intrinsic::objc_storeWeak:
+      Changed |= lowerObjCCall(F, "objc_storeWeak");
+      break;
+    case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+      Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue");
+      break;
+    case Intrinsic::objc_retainedObject:
+      Changed |= lowerObjCCall(F, "objc_retainedObject");
+      break;
+    case Intrinsic::objc_unretainedObject:
+      Changed |= lowerObjCCall(F, "objc_unretainedObject");
+      break;
+    case Intrinsic::objc_unretainedPointer:
+      Changed |= lowerObjCCall(F, "objc_unretainedPointer");
+      break;
+    case Intrinsic::objc_retain_autorelease:
+      Changed |= lowerObjCCall(F, "objc_retain_autorelease");
+      break;
+    case Intrinsic::objc_sync_enter:
+      Changed |= lowerObjCCall(F, "objc_sync_enter");
+      break;
+    case Intrinsic::objc_sync_exit:
+      Changed |= lowerObjCCall(F, "objc_sync_exit");
+      break;
+    }
   }
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index fc62c8caf59e..23754e487a18 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -75,6 +75,10 @@ using namespace llvm;
 
 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 
+STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
+STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+
+
 namespace {
 
 class PEI : public MachineFunctionPass {
@@ -168,6 +172,7 @@ using StackObjSet = SmallSetVector<int, 8>;
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
 bool PEI::runOnMachineFunction(MachineFunction &MF) {
+  NumFuncSeen++;
   const Function &F = MF.getFunction();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -357,6 +362,11 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
     // Now that we know which registers need to be saved and restored, allocate
     // stack slots for them.
     for (auto &CS : CSI) {
+      // If the target has spilled this register to another register, we don't
+      // need to allocate a stack slot.
+      if (CS.isSpilledToReg())
+        continue;
+
       unsigned Reg = CS.getReg();
       const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
 
@@ -454,7 +464,22 @@ static void updateLiveness(MachineFunction &MF) {
       if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
         MBB->addLiveIn(Reg);
     }
+    // If callee-saved register is spilled to another register rather than
+    // spilling to stack, the destination register has to be marked as live for
+    // each MBB between the prologue and epilogue so that it is not clobbered
+    // before it is reloaded in the epilogue. The Visited set contains all
+    // blocks outside of the region delimited by prologue/epilogue.
+    if (CSI[i].isSpilledToReg()) {
+      for (MachineBasicBlock &MBB : MF) {
+        if (Visited.count(&MBB))
+          continue;
+        MCPhysReg DstReg = CSI[i].getDstReg();
+        if (!MBB.isLiveIn(DstReg))
+          MBB.addLiveIn(DstReg);
+      }
+    }
   }
+
 }
 
 /// Insert restore code for the callee-saved registers used in the function.
@@ -530,6 +555,9 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
 
     std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
     if (!CSI.empty()) {
+      if (!MFI.hasCalls())
+        NumLeafFuncWithSpills++;
+
       for (MachineBasicBlock *SaveBlock : SaveBlocks) {
         insertCSRSaves(*SaveBlock, CSI);
         // Update the live-in information of all the blocks up to the save
@@ -1090,7 +1118,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
         MachineOperand &Offset = MI.getOperand(i + 1);
         int refOffset = TFI->getFrameIndexReferencePreferSP(
             MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
-        Offset.setImm(Offset.getImm() + refOffset);
+        Offset.setImm(Offset.getImm() + refOffset + SPAdj);
         MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
         continue;
       }
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 86fd87450521..6ca8d86e3f8e 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -25,7 +25,7 @@ static const char *const PSVNames[] = {
     "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
     "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
 
-PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII)
+PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
     : Kind(Kind) {
   AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
 }
@@ -81,7 +81,7 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
 }
 
 CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
-    PSVKind Kind, const TargetInstrInfo &TII)
+    unsigned Kind, const TargetInstrInfo &TII)
     : PseudoSourceValue(Kind, TII) {}
 
 bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
diff --git a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 050fef5d25ed..a9f0a9387297 100644
--- a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -157,7 +157,7 @@ bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
   // Sorting all reaching defs found for a ceartin reg unit in a given BB.
   for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
     for (MBBRegUnitDefs &RegUnitDefs : MBBDefs)
-      llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end());
+      llvm::sort(RegUnitDefs);
   }
 
   return false;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 7b57c6cbcdb8..eb3a4e481f5d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -54,7 +54,7 @@ using namespace llvm;
 
 STATISTIC(NumStores, "Number of stores added");
 STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumCopies, "Number of copies coalesced");
+STATISTIC(NumCoalesced, "Number of copies coalesced");
 
 static RegisterRegAlloc
   fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -88,7 +88,7 @@ namespace {
       unsigned short LastOpNum = 0;    ///< OpNum on LastUse.
       bool Dirty = false;              ///< Register needs spill.
 
-      explicit LiveReg(unsigned v) : VirtReg(v) {}
+      explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
 
       unsigned getSparseSetIndex() const {
         return TargetRegisterInfo::virtReg2Index(VirtReg);
@@ -96,14 +96,13 @@ namespace {
     };
 
     using LiveRegMap = SparseSet<LiveReg>;
-
     /// This map contains entries for each virtual register that is currently
     /// available in a physical register.
     LiveRegMap LiveVirtRegs;
 
-    DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap;
+    DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
 
-    /// Track the state of a physical register.
+    /// State of a physical register.
     enum RegState {
       /// A disabled register is not available for allocation, but an alias may
       /// be in use. A register can only be moved out of the disabled state if
@@ -123,18 +122,18 @@ namespace {
       /// register. In that case, LiveVirtRegs contains the inverse mapping.
     };
 
-    /// One of the RegState enums, or a virtreg.
+    /// Maps each physical register to a RegState enum or a virtual register.
     std::vector<unsigned> PhysRegState;
 
     SmallVector<unsigned, 16> VirtDead;
     SmallVector<MachineInstr *, 32> Coalesced;
 
-    /// Set of register units.
-    using UsedInInstrSet = SparseSet<unsigned>;
-
+    using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
     /// Set of register units that are used in the current instruction, and so
     /// cannot be allocated.
-    UsedInInstrSet UsedInInstr;
+    RegUnitSet UsedInInstr;
+
+    void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
 
     /// Mark a physreg as used in this instruction.
     void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -150,12 +149,8 @@ namespace {
       return false;
     }
 
-    /// This flag is set when LiveRegMap will be cleared completely after
-    /// spilling all live registers. LiveRegMap entries should not be erased.
-    bool isBulkSpilling = false;
-
     enum : unsigned {
-      spillClean = 1,
+      spillClean = 50,
       spillDirty = 100,
       spillImpossible = ~0u
     };
@@ -180,16 +175,18 @@ namespace {
 
   private:
     bool runOnMachineFunction(MachineFunction &MF) override;
+
     void allocateBasicBlock(MachineBasicBlock &MBB);
+    void allocateInstruction(MachineInstr &MI);
+    void handleDebugValue(MachineInstr &MI);
     void handleThroughOperands(MachineInstr &MI,
                                SmallVectorImpl<unsigned> &VirtDead);
-    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC);
     bool isLastUseOfLocalReg(const MachineOperand &MO) const;
 
     void addKillFlag(const LiveReg &LRI);
-    void killVirtReg(LiveRegMap::iterator LRI);
+    void killVirtReg(LiveReg &LR);
     void killVirtReg(unsigned VirtReg);
-    void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+    void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
     void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
 
     void usePhysReg(MachineOperand &MO);
@@ -206,15 +203,19 @@ namespace {
       return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
     }
 
-    LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg);
-    LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
-                                      unsigned Hint);
-    LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
-                                       unsigned VirtReg, unsigned Hint);
-    LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
-                                       unsigned VirtReg, unsigned Hint);
+    void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+    MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+                            unsigned Hint);
+    LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
+                           unsigned Hint);
     void spillAll(MachineBasicBlock::iterator MI);
-    bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg);
+    bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+
+    int getStackSpaceFor(unsigned VirtReg);
+    void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+               MCPhysReg AssignedReg, bool Kill);
+    void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+                MCPhysReg PhysReg);
 
     void dumpState();
   };
@@ -226,10 +227,13 @@ char RegAllocFast::ID = 0;
 INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
                 false)
 
+void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
+  PhysRegState[PhysReg] = NewState;
+}
+
 /// This allocates space for the specified virtual register to be held on the
 /// stack.
-int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
-                                   const TargetRegisterClass &RC) {
+int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
   // Find the location Reg would belong...
   int SS = StackSlotForVirtReg[VirtReg];
   // Already has space allocated?
@@ -237,6 +241,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
     return SS;
 
   // Allocate a new stack object for this spill location...
+  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
   unsigned Size = TRI->getSpillSize(RC);
   unsigned Align = TRI->getSpillAlignment(RC);
   int FrameIdx = MFI->CreateSpillStackObject(Size, Align);
@@ -246,6 +251,46 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
   return FrameIdx;
 }
 
+/// Insert spill instruction for \p AssignedReg before \p Before. Update
+/// DBG_VALUEs with \p VirtReg operands with the stack slot.
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+                         MCPhysReg AssignedReg, bool Kill) {
+  LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
+                    << " in " << printReg(AssignedReg, TRI));
+  int FI = getStackSpaceFor(VirtReg);
+  LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
+
+  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+  TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
+  ++NumStores;
+
+  // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+  // identify spilled location as the place to find corresponding variable's
+  // value.
+  SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
+  for (MachineInstr *DBG : LRIDbgValues) {
+    MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
+    assert(NewDV->getParent() == MBB && "dangling parent pointer");
+    (void)NewDV;
+    LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+  }
+  // Now this register is spilled there is should not be any DBG_VALUE
+  // pointing to this register because they are all pointing to spilled value
+  // now.
+  LRIDbgValues.clear();
+}
+
+/// Insert reload instruction for \p PhysReg before \p Before.
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+                          MCPhysReg PhysReg) {
+  LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+                    << printReg(PhysReg, TRI) << '\n');
+  int FI = getStackSpaceFor(VirtReg);
+  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+  TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI);
+  ++NumLoads;
+}
+
 /// Return true if MO is the only remaining reference to its virtual register,
 /// and it is guaranteed to be a block-local register.
 bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
@@ -281,14 +326,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
 }
 
 /// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) {
-  addKillFlag(*LRI);
-  assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+void RegAllocFast::killVirtReg(LiveReg &LR) {
+  addKillFlag(LR);
+  assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
          "Broken RegState mapping");
-  PhysRegState[LRI->PhysReg] = regFree;
-  // Erase from LiveVirtRegs unless we're spilling in bulk.
-  if (!isBulkSpilling)
-    LiveVirtRegs.erase(LRI);
+  setPhysRegState(LR.PhysReg, regFree);
+  LR.PhysReg = 0;
 }
 
 /// Mark virtreg as no longer available.
@@ -296,8 +339,8 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "killVirtReg needs a virtual register");
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
-  if (LRI != LiveVirtRegs.end())
-    killVirtReg(LRI);
+  if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
+    killVirtReg(*LRI);
 }
 
 /// This method spills the value specified by VirtReg into the corresponding
@@ -307,63 +350,41 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Spilling a physical register is illegal!");
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
-  assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
-  spillVirtReg(MI, LRI);
+  assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+         "Spilling unmapped virtual register");
+  spillVirtReg(MI, *LRI);
 }
 
 /// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
-                                LiveRegMap::iterator LRI) {
-  LiveReg &LR = *LRI;
-  assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
+  assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
 
   if (LR.Dirty) {
     // If this physreg is used by the instruction, we want to kill it on the
     // instruction, not on the spill.
     bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
     LR.Dirty = false;
-    LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in "
-                      << printReg(LR.PhysReg, TRI));
-    const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);
-    int FI = getStackSpaceFor(LRI->VirtReg, RC);
-    LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n");
-    TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);
-    ++NumStores;   // Update statistics
-
-    // If this register is used by DBG_VALUE then insert new DBG_VALUE to
-    // identify spilled location as the place to find corresponding variable's
-    // value.
-    SmallVectorImpl<MachineInstr *> &LRIDbgValues =
-      LiveDbgValueMap[LRI->VirtReg];
-    for (MachineInstr *DBG : LRIDbgValues) {
-      MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
-      assert(NewDV->getParent() == MBB && "dangling parent pointer");
-      (void)NewDV;
-      LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:"
-                        << "\n"
-                        << *NewDV);
-    }
-    // Now this register is spilled there is should not be any DBG_VALUE
-    // pointing to this register because they are all pointing to spilled value
-    // now.
-    LRIDbgValues.clear();
+
+    spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
+
     if (SpillKill)
       LR.LastUse = nullptr; // Don't kill register again
   }
-  killVirtReg(LRI);
+  killVirtReg(LR);
 }
 
 /// Spill all dirty virtregs without killing them.
 void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
-  if (LiveVirtRegs.empty()) return;
-  isBulkSpilling = true;
+  if (LiveVirtRegs.empty())
+    return;
   // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
   // of spilling here is deterministic, if arbitrary.
-  for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end();
-       I != E; ++I)
-    spillVirtReg(MI, I);
+  for (LiveReg &LR : LiveVirtRegs) {
+    if (!LR.PhysReg)
+      continue;
+    spillVirtReg(MI, LR);
+  }
   LiveVirtRegs.clear();
-  isBulkSpilling = false;
 }
 
 /// Handle the direct use of a physical register.  Check that the register is
@@ -417,12 +438,12 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
     case regFree:
       if (TRI->isSuperRegister(PhysReg, Alias)) {
         // Leave the superregister in the working set.
-        PhysRegState[Alias] = regFree;
+        setPhysRegState(Alias, regFree);
         MO.getParent()->addRegisterKilled(Alias, TRI, true);
         return;
       }
       // Some other alias was in the working set - clear it.
-      PhysRegState[Alias] = regDisabled;
+      setPhysRegState(Alias, regDisabled);
       break;
     default:
       llvm_unreachable("Instruction uses an alias of an allocated register");
@@ -430,7 +451,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
   }
 
   // All aliases are disabled, bring register into working set.
-  PhysRegState[PhysReg] = regFree;
+  setPhysRegState(PhysReg, regFree);
   MO.setIsKill();
 }
 
@@ -448,12 +469,12 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
     LLVM_FALLTHROUGH;
   case regFree:
   case regReserved:
-    PhysRegState[PhysReg] = NewState;
+    setPhysRegState(PhysReg, NewState);
     return;
   }
 
   // This is a disabled register, disable all aliases.
-  PhysRegState[PhysReg] = NewState;
+  setPhysRegState(PhysReg, NewState);
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
     MCPhysReg Alias = *AI;
     switch (unsigned VirtReg = PhysRegState[Alias]) {
@@ -464,7 +485,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
       LLVM_FALLTHROUGH;
     case regFree:
     case regReserved:
-      PhysRegState[Alias] = regDisabled;
+      setPhysRegState(Alias, regDisabled);
       if (TRI->isSuperRegister(PhysReg, Alias))
         return;
       break;
@@ -472,9 +493,9 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
   }
 }
 
-/// Return the cost of spilling clearing out PhysReg and aliases so it is
-/// free for allocation. Returns 0 when PhysReg is free or disabled with all
-/// aliases disabled - it can be allocated directly.
+/// Return the cost of spilling clearing out PhysReg and aliases so it is free
+/// for allocation. Returns 0 when PhysReg is free or disabled with all aliases
+/// disabled - it can be allocated directly.
 /// \returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
   if (isRegUsedInInstr(PhysReg)) {
@@ -492,9 +513,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
                       << printReg(PhysReg, TRI) << " is reserved already.\n");
     return spillImpossible;
   default: {
-    LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
-    assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
-    return I->Dirty ? spillDirty : spillClean;
+    LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+    assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+           "Missing VirtReg entry");
+    return LRI->Dirty ? spillDirty : spillClean;
   }
   }
 
@@ -512,9 +534,10 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
     case regReserved:
       return spillImpossible;
     default: {
-      LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
-      assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
-      Cost += I->Dirty ? spillDirty : spillClean;
+      LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+      assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+             "Missing VirtReg entry");
+      Cost += LRI->Dirty ? spillDirty : spillClean;
       break;
     }
     }
@@ -526,31 +549,27 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
 /// proper container for VirtReg now.  The physical register must not be used
 /// for anything else when this is called.
 void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
-  LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
-                    << printReg(PhysReg, TRI) << "\n");
-  PhysRegState[PhysReg] = LR.VirtReg;
-  assert(!LR.PhysReg && "Already assigned a physreg");
+  unsigned VirtReg = LR.VirtReg;
+  LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
+                    << printReg(PhysReg, TRI) << '\n');
+  assert(LR.PhysReg == 0 && "Already assigned a physreg");
+  assert(PhysReg != 0 && "Trying to assign no register");
   LR.PhysReg = PhysReg;
-}
-
-RegAllocFast::LiveRegMap::iterator
-RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) {
-  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
-  assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
-  assignVirtToPhysReg(*LRI, PhysReg);
-  return LRI;
+  setPhysRegState(PhysReg, VirtReg);
 }
 
 /// Allocates a physical register for VirtReg.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
-    LiveRegMap::iterator LRI, unsigned Hint) {
-  const unsigned VirtReg = LRI->VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+  const unsigned VirtReg = LR.VirtReg;
 
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Can only allocate virtual registers");
 
-  // Take hint when possible.
   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+  LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
+                    << " in class " << TRI->getRegClassName(&RC) << '\n');
+
+  // Take hint when possible.
   if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
       MRI->isAllocatable(Hint) && RC.contains(Hint)) {
     // Ignore the hint if we would have to spill a dirty register.
@@ -558,67 +577,62 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
     if (Cost < spillDirty) {
       if (Cost)
         definePhysReg(MI, Hint, regFree);
-      // definePhysReg may kill virtual registers and modify LiveVirtRegs.
-      // That invalidates LRI, so run a new lookup for VirtReg.
-      return assignVirtToPhysReg(VirtReg, Hint);
+      assignVirtToPhysReg(LR, Hint);
+      return;
     }
   }
 
   // First try to find a completely free register.
-  ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC);
-  for (MCPhysReg PhysReg : AO) {
+  ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+  for (MCPhysReg PhysReg : AllocationOrder) {
     if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
-      assignVirtToPhysReg(*LRI, PhysReg);
-      return LRI;
+      assignVirtToPhysReg(LR, PhysReg);
+      return;
     }
   }
 
-  LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
-                    << TRI->getRegClassName(&RC) << "\n");
-
-  unsigned BestReg = 0;
+  MCPhysReg BestReg = 0;
   unsigned BestCost = spillImpossible;
-  for (MCPhysReg PhysReg : AO) {
+  for (MCPhysReg PhysReg : AllocationOrder) {
+    LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
     unsigned Cost = calcSpillCost(PhysReg);
-    LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
-    LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n");
-    LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
-    // Cost is 0 when all aliases are already disabled.
+    LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
+    // Immediate take a register with cost 0.
     if (Cost == 0) {
-      assignVirtToPhysReg(*LRI, PhysReg);
-      return LRI;
+      assignVirtToPhysReg(LR, PhysReg);
+      return;
+    }
+    if (Cost < BestCost) {
+      BestReg = PhysReg;
+      BestCost = Cost;
     }
-    if (Cost < BestCost)
-      BestReg = PhysReg, BestCost = Cost;
   }
 
-  if (BestReg) {
-    definePhysReg(MI, BestReg, regFree);
-    // definePhysReg may kill virtual registers and modify LiveVirtRegs.
-    // That invalidates LRI, so run a new lookup for VirtReg.
-    return assignVirtToPhysReg(VirtReg, BestReg);
+  if (!BestReg) {
+    // Nothing we can do: Report an error and keep going with an invalid
+    // allocation.
+    if (MI.isInlineAsm())
+      MI.emitError("inline assembly requires more registers than available");
+    else
+      MI.emitError("ran out of registers during register allocation");
+    definePhysReg(MI, *AllocationOrder.begin(), regFree);
+    assignVirtToPhysReg(LR, *AllocationOrder.begin());
+    return;
   }
 
-  // Nothing we can do. Report an error and keep going with a bad allocation.
-  if (MI.isInlineAsm())
-    MI.emitError("inline assembly requires more registers than available");
-  else
-    MI.emitError("ran out of registers during register allocation");
-  definePhysReg(MI, *AO.begin(), regFree);
-  return assignVirtToPhysReg(VirtReg, *AO.begin());
+  definePhysReg(MI, BestReg, regFree);
+  assignVirtToPhysReg(LR, BestReg);
 }
 
 /// Allocates a register for VirtReg and mark it as dirty.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
-                                                               unsigned OpNum,
-                                                               unsigned VirtReg,
-                                                               unsigned Hint) {
+MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+                                      unsigned VirtReg, unsigned Hint) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
   std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
-  if (New) {
+  if (!LRI->PhysReg) {
     // If there is no hint, peek at the only use of this register.
     if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
         MRI->hasOneNonDBGUse(VirtReg)) {
@@ -627,7 +641,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
       if (UseMI.isCopyLike())
         Hint = UseMI.getOperand(0).getReg();
     }
-    LRI = allocVirtReg(MI, LRI, Hint);
+    allocVirtReg(MI, *LRI, Hint);
   } else if (LRI->LastUse) {
     // Redefining a live register - kill at the last use, unless it is this
     // instruction defining VirtReg multiple times.
@@ -639,40 +653,35 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
   LRI->LastOpNum = OpNum;
   LRI->Dirty = true;
   markRegUsedInInstr(LRI->PhysReg);
-  return LRI;
+  return LRI->PhysReg;
 }
 
 /// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
-                                                               unsigned OpNum,
-                                                               unsigned VirtReg,
-                                                               unsigned Hint) {
+RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
+                                                   unsigned OpNum,
+                                                   unsigned VirtReg,
+                                                   unsigned Hint) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
   std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   MachineOperand &MO = MI.getOperand(OpNum);
-  if (New) {
-    LRI = allocVirtReg(MI, LRI, Hint);
-    const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
-    int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
-                      << printReg(LRI->PhysReg, TRI) << "\n");
-    TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);
-    ++NumLoads;
+  if (!LRI->PhysReg) {
+    allocVirtReg(MI, *LRI, Hint);
+    reload(MI, VirtReg, LRI->PhysReg);
   } else if (LRI->Dirty) {
     if (isLastUseOfLocalReg(MO)) {
-      LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+      LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
       if (MO.isUse())
         MO.setIsKill();
       else
         MO.setIsDead();
     } else if (MO.isKill()) {
-      LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+      LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
       MO.setIsKill(false);
     } else if (MO.isDead()) {
-      LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+      LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
       MO.setIsDead(false);
     }
   } else if (MO.isKill()) {
@@ -680,25 +689,24 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
     // register would be killed immediately, and there might be a second use:
     //   %foo = OR killed %x, %x
     // This would cause a second reload of %x into a different register.
-    LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+    LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
     MO.setIsKill(false);
   } else if (MO.isDead()) {
-    LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+    LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
     MO.setIsDead(false);
   }
   assert(LRI->PhysReg && "Register not assigned");
   LRI->LastUse = &MI;
   LRI->LastOpNum = OpNum;
   markRegUsedInInstr(LRI->PhysReg);
-  return LRI;
+  return *LRI;
 }
 
 /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
 /// may invalidate any operand pointers.  Return true if the operand kills its
 /// register.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
                               MCPhysReg PhysReg) {
-  MachineOperand &MO = MI.getOperand(OpNum);
   bool Dead = MO.isDead();
   if (!MO.getSubReg()) {
     MO.setReg(PhysReg);
@@ -761,7 +769,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
   SmallVector<unsigned, 8> PartialDefs;
   LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-    const MachineOperand &MO = MI.getOperand(I);
+    MachineOperand &MO = MI.getOperand(I);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
@@ -770,17 +778,17 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
       LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
                         << ") is tied to operand " << MI.findTiedOperandIdx(I)
                         << ".\n");
-      LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
-      MCPhysReg PhysReg = LRI->PhysReg;
-      setPhysReg(MI, I, PhysReg);
+      LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+      MCPhysReg PhysReg = LR.PhysReg;
+      setPhysReg(MI, MO, PhysReg);
       // Note: we don't update the def operand yet. That would cause the normal
       // def-scan to attempt spilling.
     } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
-      LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+      LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
       // Reload the register, but don't assign to the operand just yet.
       // That would confuse the later phys-def processing pass.
-      LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
-      PartialDefs.push_back(LRI->PhysReg);
+      LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
+      PartialDefs.push_back(LR.PhysReg);
     }
   }
 
@@ -793,9 +801,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
     if (!MO.isEarlyClobber())
       continue;
     // Note: defineVirtReg may invalidate MO.
-    LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0);
-    MCPhysReg PhysReg = LRI->PhysReg;
-    if (setPhysReg(MI, I, PhysReg))
+    MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
+    if (setPhysReg(MI, MI.getOperand(I), PhysReg))
       VirtDead.push_back(Reg);
   }
 
@@ -828,11 +835,12 @@ void RegAllocFast::dumpState() {
       break;
     default: {
       dbgs() << '=' << printReg(PhysRegState[Reg]);
-      LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
-      assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
-      if (I->Dirty)
+      LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+      assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+             "Missing VirtReg entry");
+      if (LRI->Dirty)
         dbgs() << "*";
-      assert(I->PhysReg == Reg && "Bad inverse map");
+      assert(LRI->PhysReg == Reg && "Bad inverse map");
       break;
     }
     }
@@ -841,6 +849,8 @@ void RegAllocFast::dumpState() {
   // Check that LiveVirtRegs is the inverse.
   for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
        e = LiveVirtRegs.end(); i != e; ++i) {
+    if (!i->PhysReg)
+      continue;
     assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
            "Bad map key");
     assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
@@ -850,6 +860,199 @@ void RegAllocFast::dumpState() {
 }
 #endif
 
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  // If this is a copy, we may be able to coalesce.
+  unsigned CopySrcReg = 0;
+  unsigned CopyDstReg = 0;
+  unsigned CopySrcSub = 0;
+  unsigned CopyDstSub = 0;
+  if (MI.isCopy()) {
+    CopyDstReg = MI.getOperand(0).getReg();
+    CopySrcReg = MI.getOperand(1).getReg();
+    CopyDstSub = MI.getOperand(0).getSubReg();
+    CopySrcSub = MI.getOperand(1).getSubReg();
+  }
+
+  // Track registers used by instruction.
+  UsedInInstr.clear();
+
+  // First scan.
+  // Mark physreg uses and early clobbers as used.
+  // Find the end of the virtreg operands
+  unsigned VirtOpEnd = 0;
+  bool hasTiedOps = false;
+  bool hasEarlyClobbers = false;
+  bool hasPartialRedefs = false;
+  bool hasPhysDefs = false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    // Make sure MRI knows about registers clobbered by regmasks.
+    if (MO.isRegMask()) {
+      MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+      continue;
+    }
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg) continue;
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      VirtOpEnd = i+1;
+      if (MO.isUse()) {
+        hasTiedOps = hasTiedOps ||
+                            MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+      } else {
+        if (MO.isEarlyClobber())
+          hasEarlyClobbers = true;
+        if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
+          hasPartialRedefs = true;
+      }
+      continue;
+    }
+    if (!MRI->isAllocatable(Reg)) continue;
+    if (MO.isUse()) {
+      usePhysReg(MO);
+    } else if (MO.isEarlyClobber()) {
+      definePhysReg(MI, Reg,
+                    (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
+      hasEarlyClobbers = true;
+    } else
+      hasPhysDefs = true;
+  }
+
+  // The instruction may have virtual register operands that must be allocated
+  // the same register at use-time and def-time: early clobbers and tied
+  // operands. If there are also physical defs, these registers must avoid
+  // both physical defs and uses, making them more constrained than normal
+  // operands.
+  // Similarly, if there are multiple defs and tied operands, we must make
+  // sure the same register is allocated to uses and defs.
+  // We didn't detect inline asm tied operands above, so just make this extra
+  // pass for all inline asm.
+  if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+      (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+    handleThroughOperands(MI, VirtDead);
+    // Don't attempt coalescing when we have funny stuff going on.
+    CopyDstReg = 0;
+    // Pretend we have early clobbers so the use operands get marked below.
+    // This is not necessary for the common case of a single tied use.
+    hasEarlyClobbers = true;
+  }
+
+  // Second scan.
+  // Allocate virtreg uses.
+  for (unsigned I = 0; I != VirtOpEnd; ++I) {
+    MachineOperand &MO = MI.getOperand(I);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+    if (MO.isUse()) {
+      LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
+      MCPhysReg PhysReg = LR.PhysReg;
+      CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
+      if (setPhysReg(MI, MO, PhysReg))
+        killVirtReg(LR);
+    }
+  }
+
+  // Track registers defined by instruction - early clobbers and tied uses at
+  // this point.
+  UsedInInstr.clear();
+  if (hasEarlyClobbers) {
+    for (const MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+      // Look for physreg defs and tied uses.
+      if (!MO.isDef() && !MO.isTied()) continue;
+      markRegUsedInInstr(Reg);
+    }
+  }
+
+  unsigned DefOpEnd = MI.getNumOperands();
+  if (MI.isCall()) {
+    // Spill all virtregs before a call. This serves one purpose: If an
+    // exception is thrown, the landing pad is going to expect to find
+    // registers in their spill slots.
+    // Note: although this is appealing to just consider all definitions
+    // as call-clobbered, this is not correct because some of those
+    // definitions may be used later on and we do not want to reuse
+    // those for virtual registers in between.
+    LLVM_DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
+    spillAll(MI);
+  }
+
+  // Third scan.
+  // Allocate defs and collect dead defs.
+  for (unsigned I = 0; I != DefOpEnd; ++I) {
+    const MachineOperand &MO = MI.getOperand(I);
+    if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+      continue;
+    unsigned Reg = MO.getReg();
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (!MRI->isAllocatable(Reg)) continue;
+      definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+      continue;
+    }
+    MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
+    if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
+      VirtDead.push_back(Reg);
+      CopyDstReg = 0; // cancel coalescing;
+    } else
+      CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+  }
+
+  // Kill dead defs after the scan to ensure that multiple defs of the same
+  // register are allocated identically. We didn't need to do this for uses
+  // because we are crerating our own kill flags, and they are always at the
+  // last use.
+  for (unsigned VirtReg : VirtDead)
+    killVirtReg(VirtReg);
+  VirtDead.clear();
+
+  LLVM_DEBUG(dbgs() << "<< " << MI);
+  if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+    LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
+    Coalesced.push_back(&MI);
+  }
+}
+
+void RegAllocFast::handleDebugValue(MachineInstr &MI) {
+  MachineOperand &MO = MI.getOperand(0);
+
+  // Ignore DBG_VALUEs that aren't based on virtual registers. These are
+  // mostly constants and frame indices.
+  if (!MO.isReg())
+    return;
+  unsigned Reg = MO.getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return;
+
+  // See if this virtual register has already been allocated to a physical
+  // register or spilled to a stack slot.
+  LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+  if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+    setPhysReg(MI, MO, LRI->PhysReg);
+  } else {
+    int SS = StackSlotForVirtReg[Reg];
+    if (SS != -1) {
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      updateDbgValueForSpill(MI, SS);
+      LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
+      return;
+    }
+
+    // We can't allocate a physreg for a DebugValue, sorry!
+    LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+    MO.setReg(0);
+  }
+
+  // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+  // that future spills of Reg will have DBG_VALUEs.
+  LiveDbgValueMap[Reg].push_back(&MI);
+}
+
 void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
   LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -869,206 +1072,19 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
 
   // Otherwise, sequentially allocate each instruction in the MBB.
   for (MachineInstr &MI : MBB) {
-    const MCInstrDesc &MCID = MI.getDesc();
-    LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState());
+    LLVM_DEBUG(
+      dbgs() << "\n>> " << MI << "Regs:";
+      dumpState()
+    );
 
-    // Debug values are not allowed to change codegen in any way.
+    // Special handling for debug values. Note that they are not allowed to
+    // affect codegen of the other instructions in any way.
     if (MI.isDebugValue()) {
-      MachineInstr *DebugMI = &MI;
-      MachineOperand &MO = DebugMI->getOperand(0);
-
-      // Ignore DBG_VALUEs that aren't based on virtual registers. These are
-      // mostly constants and frame indices.
-      if (!MO.isReg())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
-
-      // See if this virtual register has already been allocated to a physical
-      // register or spilled to a stack slot.
-      LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
-      if (LRI != LiveVirtRegs.end())
-        setPhysReg(*DebugMI, 0, LRI->PhysReg);
-      else {
-        int SS = StackSlotForVirtReg[Reg];
-        if (SS != -1) {
-          // Modify DBG_VALUE now that the value is in a spill slot.
-          updateDbgValueForSpill(*DebugMI, SS);
-          LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:"
-                            << "\t" << *DebugMI);
-          continue;
-        }
-
-        // We can't allocate a physreg for a DebugValue, sorry!
-        LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
-        MO.setReg(0);
-      }
-
-      // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
-      // that future spills of Reg will have DBG_VALUEs.
-      LiveDbgValueMap[Reg].push_back(DebugMI);
+      handleDebugValue(MI);
       continue;
     }
 
-    if (MI.isDebugLabel())
-      continue;
-
-    // If this is a copy, we may be able to coalesce.
-    unsigned CopySrcReg = 0;
-    unsigned CopyDstReg = 0;
-    unsigned CopySrcSub = 0;
-    unsigned CopyDstSub = 0;
-    if (MI.isCopy()) {
-      CopyDstReg = MI.getOperand(0).getReg();
-      CopySrcReg = MI.getOperand(1).getReg();
-      CopyDstSub = MI.getOperand(0).getSubReg();
-      CopySrcSub = MI.getOperand(1).getSubReg();
-    }
-
-    // Track registers used by instruction.
-    UsedInInstr.clear();
-
-    // First scan.
-    // Mark physreg uses and early clobbers as used.
-    // Find the end of the virtreg operands
-    unsigned VirtOpEnd = 0;
-    bool hasTiedOps = false;
-    bool hasEarlyClobbers = false;
-    bool hasPartialRedefs = false;
-    bool hasPhysDefs = false;
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      // Make sure MRI knows about registers clobbered by regmasks.
-      if (MO.isRegMask()) {
-        MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
-        continue;
-      }
-      if (!MO.isReg()) continue;
-      unsigned Reg = MO.getReg();
-      if (!Reg) continue;
-      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-        VirtOpEnd = i+1;
-        if (MO.isUse()) {
-          hasTiedOps = hasTiedOps ||
-                              MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
-        } else {
-          if (MO.isEarlyClobber())
-            hasEarlyClobbers = true;
-          if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
-            hasPartialRedefs = true;
-        }
-        continue;
-      }
-      if (!MRI->isAllocatable(Reg)) continue;
-      if (MO.isUse()) {
-        usePhysReg(MO);
-      } else if (MO.isEarlyClobber()) {
-        definePhysReg(MI, Reg,
-                      (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
-        hasEarlyClobbers = true;
-      } else
-        hasPhysDefs = true;
-    }
-
-    // The instruction may have virtual register operands that must be allocated
-    // the same register at use-time and def-time: early clobbers and tied
-    // operands. If there are also physical defs, these registers must avoid
-    // both physical defs and uses, making them more constrained than normal
-    // operands.
-    // Similarly, if there are multiple defs and tied operands, we must make
-    // sure the same register is allocated to uses and defs.
-    // We didn't detect inline asm tied operands above, so just make this extra
-    // pass for all inline asm.
-    if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
-        (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
-      handleThroughOperands(MI, VirtDead);
-      // Don't attempt coalescing when we have funny stuff going on.
-      CopyDstReg = 0;
-      // Pretend we have early clobbers so the use operands get marked below.
-      // This is not necessary for the common case of a single tied use.
-      hasEarlyClobbers = true;
-    }
-
-    // Second scan.
-    // Allocate virtreg uses.
-    for (unsigned I = 0; I != VirtOpEnd; ++I) {
-      const MachineOperand &MO = MI.getOperand(I);
-      if (!MO.isReg()) continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-      if (MO.isUse()) {
-        LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg);
-        MCPhysReg PhysReg = LRI->PhysReg;
-        CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
-        if (setPhysReg(MI, I, PhysReg))
-          killVirtReg(LRI);
-      }
-    }
-
-    // Track registers defined by instruction - early clobbers and tied uses at
-    // this point.
-    UsedInInstr.clear();
-    if (hasEarlyClobbers) {
-      for (const MachineOperand &MO : MI.operands()) {
-        if (!MO.isReg()) continue;
-        unsigned Reg = MO.getReg();
-        if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
-        // Look for physreg defs and tied uses.
-        if (!MO.isDef() && !MO.isTied()) continue;
-        markRegUsedInInstr(Reg);
-      }
-    }
-
-    unsigned DefOpEnd = MI.getNumOperands();
-    if (MI.isCall()) {
-      // Spill all virtregs before a call. This serves one purpose: If an
-      // exception is thrown, the landing pad is going to expect to find
-      // registers in their spill slots.
-      // Note: although this is appealing to just consider all definitions
-      // as call-clobbered, this is not correct because some of those
-      // definitions may be used later on and we do not want to reuse
-      // those for virtual registers in between.
-      LLVM_DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
-      spillAll(MI);
-    }
-
-    // Third scan.
-    // Allocate defs and collect dead defs.
-    for (unsigned I = 0; I != DefOpEnd; ++I) {
-      const MachineOperand &MO = MI.getOperand(I);
-      if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
-        continue;
-      unsigned Reg = MO.getReg();
-
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (!MRI->isAllocatable(Reg)) continue;
-        definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
-        continue;
-      }
-      LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg);
-      MCPhysReg PhysReg = LRI->PhysReg;
-      if (setPhysReg(MI, I, PhysReg)) {
-        VirtDead.push_back(Reg);
-        CopyDstReg = 0; // cancel coalescing;
-      } else
-        CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
-    }
-
-    // Kill dead defs after the scan to ensure that multiple defs of the same
-    // register are allocated identically. We didn't need to do this for uses
-    // because we are crerating our own kill flags, and they are always at the
-    // last use.
-    for (unsigned VirtReg : VirtDead)
-      killVirtReg(VirtReg);
-    VirtDead.clear();
-
-    if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
-      LLVM_DEBUG(dbgs() << "-- coalescing: " << MI);
-      Coalesced.push_back(&MI);
-    } else {
-      LLVM_DEBUG(dbgs() << "<< " << MI);
-    }
+    allocateInstruction(MI);
   }
 
   // Spill all physical registers holding virtual registers now.
@@ -1079,12 +1095,11 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   // LiveVirtRegs might refer to the instrs.
   for (MachineInstr *MI : Coalesced)
     MBB.erase(MI);
-  NumCopies += Coalesced.size();
+  NumCoalesced += Coalesced.size();
 
   LLVM_DEBUG(MBB.dump());
 }
 
-/// Allocates registers for a function.
 bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
                     << "********** Function: " << MF.getName() << '\n');
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 3333e1f2fb8b..81b21b442437 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -318,7 +318,7 @@ class RAGreedy : public MachineFunctionPass,
 
     /// Track new eviction.
     /// The Evictor vreg has evicted the Evictee vreg from Physreg.
-    /// \param PhysReg The phisical register Evictee was evicted from.
+    /// \param PhysReg The physical register Evictee was evicted from.
     /// \param Evictor The evictor Vreg that evicted Evictee.
     /// \param Evictee The evictee Vreg.
     void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
@@ -449,8 +449,8 @@ private:
 
   BlockFrequency calcSpillCost();
   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
-  void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
-  void growRegion(GlobalSplitCandidate &Cand);
+  bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+  bool growRegion(GlobalSplitCandidate &Cand);
   bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
                                   unsigned BBNumber,
                                   const AllocationOrder &Order);
@@ -1183,7 +1183,10 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
     BC.Number = BI.MBB->getNumber();
     Intf.moveToBlock(BC.Number);
     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
-    BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BC.Exit = (BI.LiveOut &&
+               !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
+                  ? SpillPlacement::PrefReg
+                  : SpillPlacement::DontCare;
     BC.ChangesValue = BI.FirstDef.isValid();
 
     if (!Intf.hasInterference())
@@ -1203,6 +1206,13 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
       } else if (Intf.first() < BI.LastInstr) {
         ++Ins;
       }
+
+      // Abort if the spill cannot be inserted at the MBB' start
+      if (((BC.Entry == SpillPlacement::MustSpill) ||
+           (BC.Entry == SpillPlacement::PrefSpill)) &&
+          SlotIndex::isEarlierInstr(BI.FirstInstr,
+                                    SA->getFirstSplitPoint(BC.Number)))
+        return false;
     }
 
     // Interference for the live-out value.
@@ -1232,7 +1242,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
 
 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
 /// live-through blocks in Blocks.
-void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
                                      ArrayRef<unsigned> Blocks) {
   const unsigned GroupSize = 8;
   SpillPlacement::BlockConstraint BCS[GroupSize];
@@ -1256,6 +1266,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
     assert(B < GroupSize && "Array overflow");
     BCS[B].Number = Number;
 
+    // Abort if the spill cannot be inserted at the MBB' start
+    MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+    if (!MBB->empty() &&
+        SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()),
+                                  SA->getFirstSplitPoint(Number)))
+      return false;
     // Interference for the live-in value.
     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
       BCS[B].Entry = SpillPlacement::MustSpill;
@@ -1276,9 +1292,10 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
 
   SpillPlacer->addConstraints(makeArrayRef(BCS, B));
   SpillPlacer->addLinks(makeArrayRef(TBS, T));
+  return true;
 }
 
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
   // Keep track of through blocks that have not been added to SpillPlacer.
   BitVector Todo = SA->getThroughBlocks();
   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -1314,9 +1331,10 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
     // Compute through constraints from the interference, or assume that all
     // through blocks prefer spilling when forming compact regions.
     auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
-    if (Cand.PhysReg)
-      addThroughConstraints(Cand.Intf, NewBlocks);
-    else
+    if (Cand.PhysReg) {
+      if (!addThroughConstraints(Cand.Intf, NewBlocks))
+        return false;
+    } else
       // Provide a strong negative bias on through blocks to prevent unwanted
       // liveness on loop backedges.
       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
@@ -1326,6 +1344,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
     SpillPlacer->iterate();
   }
   LLVM_DEBUG(dbgs() << ", v=" << Visited);
+  return true;
 }
 
 /// calcCompactRegion - Compute the set of edge bundles that should be live
@@ -1356,7 +1375,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
     return false;
   }
 
-  growRegion(Cand);
+  if (!growRegion(Cand)) {
+    LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+    return false;
+  }
+
   SpillPlacer->finish();
 
   if (!Cand.LiveBundles.any()) {
@@ -1886,7 +1909,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
       });
       continue;
     }
-    growRegion(Cand);
+    if (!growRegion(Cand)) {
+      LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+      continue;
+    }
 
     SpillPlacer->finish();
 
@@ -2188,7 +2214,11 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
 ///
 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                  SmallVectorImpl<unsigned> &NewVRegs) {
-  assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+  // TODO: the function currently only handles a single UseBlock; it should be
+  // possible to generalize.
+  if (SA->getUseBlocks().size() != 1)
+    return 0;
+
   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
 
   // Note that it is possible to have an interval that is live-in or live-out
@@ -3120,18 +3150,23 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
     // Handle blocks that were not included in subloops.
     if (Loops->getLoopFor(MBB) == L)
       for (MachineInstr &MI : *MBB) {
-        const MachineMemOperand *MMO;
+        SmallVector<const MachineMemOperand *, 2> Accesses;
+        auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
+          return MFI.isSpillSlotObjectIndex(
+              cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+                  ->getFrameIndex());
+        };
 
         if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
           ++Reloads;
-        else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
-                 MFI.isSpillSlotObjectIndex(FI))
+        else if (TII->hasLoadFromStackSlot(MI, Accesses) &&
+                 llvm::any_of(Accesses, isSpillSlotAccess))
           ++FoldedReloads;
         else if (TII->isStoreToStackSlot(MI, FI) &&
                  MFI.isSpillSlotObjectIndex(FI))
           ++Spills;
-        else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
-                 MFI.isSpillSlotObjectIndex(FI))
+        else if (TII->hasStoreToStackSlot(MI, Accesses) &&
+                 llvm::any_of(Accesses, isSpillSlotAccess))
           ++FoldedSpills;
       }
 
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index f1c442ac38ae..66c7c5cd7dbf 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -81,7 +81,7 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
 bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  const TargetMachine &TM = MF.getTarget();
+  const LLVMTargetMachine &TM = MF.getTarget();
 
   LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
                     << " -------------------- \n");
@@ -166,28 +166,27 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
   }
 
   // Insert any register fully saved via subregisters.
-  for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
-    if (SavedRegs.test(PReg))
-      continue;
-
-    // Check if PReg is fully covered by its subregs.
-    bool CoveredBySubRegs = false;
-    for (const TargetRegisterClass *RC : TRI.regclasses())
-      if (RC->CoveredBySubRegs && RC->contains(PReg)) {
-        CoveredBySubRegs = true;
-        break;
-      }
-    if (!CoveredBySubRegs)
-      continue;
-
-    // Add PReg to SavedRegs if all subregs are saved.
-    bool AllSubRegsSaved = true;
-    for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
-      if (!SavedRegs.test(*SR)) {
-        AllSubRegsSaved = false;
-        break;
-      }
-    if (AllSubRegsSaved)
-      SavedRegs.set(PReg);
+  for (const TargetRegisterClass *RC : TRI.regclasses()) {
+    if (!RC->CoveredBySubRegs)
+       continue;
+
+    for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
+      if (SavedRegs.test(PReg))
+        continue;
+
+      // Check if PReg is fully covered by its subregs.
+      if (!RC->contains(PReg))
+        continue;
+
+      // Add PReg to SavedRegs if all subregs are saved.
+      bool AllSubRegsSaved = true;
+      for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
+        if (!SavedRegs.test(*SR)) {
+          AllSubRegsSaved = false;
+          break;
+        }
+      if (AllSubRegsSaved)
+        SavedRegs.set(PReg);
+    }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index cad13a60efd2..2a06d5e95fbb 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,6 +16,7 @@
 #include "RegisterCoalescer.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -69,6 +70,7 @@ STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(NumInflated , "Number of register classes inflated");
 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
 STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
+STATISTIC(NumShrinkToUses,  "Number of shrinkToUses called");
 
 static cl::opt<bool> EnableJoining("join-liveintervals",
                                    cl::desc("Coalesce copies (default=true)"),
@@ -94,6 +96,15 @@ VerifyCoalescing("verify-coalescing",
          cl::desc("Verify machine instrs before and after register coalescing"),
          cl::Hidden);
 
+static cl::opt<unsigned> LateRematUpdateThreshold(
+    "late-remat-update-threshold", cl::Hidden,
+    cl::desc("During rematerialization for a copy, if the def instruction has "
+             "many other copy uses to be rematerialized, delay the multiple "
+             "separate live interval update work and do them all at once after "
+             "all those rematerialization are done. It will save a lot of "
+             "repeated work. "),
+    cl::init(100));
+
 namespace {
 
   class RegisterCoalescer : public MachineFunctionPass,
@@ -137,6 +148,11 @@ namespace {
     /// Virtual registers to be considered for register class inflation.
     SmallVector<unsigned, 8> InflateRegs;
 
+    /// The collection of live intervals which should have been updated
+    /// immediately after rematerialiation but delayed until
+    /// lateLiveIntervalUpdate is called.
+    DenseSet<unsigned> ToBeUpdated;
+
     /// Recursively eliminate dead defs in DeadDefs.
     void eliminateDeadDefs();
 
@@ -157,6 +173,13 @@ namespace {
     /// was made.
     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
 
+    /// If one def has many copy like uses, and those copy uses are all
+    /// rematerialized, the live interval update needed for those
+    /// rematerializations will be delayed and done all at once instead
+    /// of being done multiple times. This is to save compile cost because
+    /// live interval update is costly.
+    void lateLiveIntervalUpdate();
+
     /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
     /// src/dst of the copy instruction CopyMI.  This returns true if the copy
     /// was successfully coalesced away. If it is not currently possible to
@@ -203,8 +226,12 @@ namespace {
     /// If the source value number is defined by a commutable instruction and
     /// its other operand is coalesced to the copy dest register, see if we
     /// can transform the copy into a noop by commuting the definition.
-    /// This returns true if an interval was modified.
-    bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+    /// This returns a pair of two flags:
+    /// - the first element is true if an interval was modified,
+    /// - the second element is true if the destination interval needs
+    ///   to be shrunk after deleting the copy.
+    std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
+                                                  MachineInstr *CopyMI);
 
     /// We found a copy which can be moved to its less frequent predecessor.
     bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
@@ -258,6 +285,7 @@ namespace {
     /// mentioned method returns true.
     void shrinkToUses(LiveInterval *LI,
                       SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+      NumShrinkToUses++;
       if (LIS->shrinkToUses(LI, Dead)) {
         /// Check whether or not \p LI is composed by multiple connected
         /// components and if that is the case, fix that.
@@ -662,17 +690,32 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
 
 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
 /// range @Dst and use value number @p DstValNo there.
-static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
-                                 const LiveRange &Src, const VNInfo *SrcValNo) {
+static std::pair<bool,bool>
+addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
+                     const VNInfo *SrcValNo) {
+  bool Changed = false;
+  bool MergedWithDead = false;
   for (const LiveRange::Segment &S : Src.segments) {
     if (S.valno != SrcValNo)
       continue;
-    Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
-  }
+    // This is adding a segment from Src that ends in a copy that is about
+    // to be removed. This segment is going to be merged with a pre-existing
+    // segment in Dst. This works, except in cases when the corresponding
+    // segment in Dst is dead. For example: adding [192r,208r:1) from Src
+    // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
+    // Recognized such cases, so that the segments can be shrunk.
+    LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
+    LiveRange::Segment &Merged = *Dst.addSegment(Added);
+    if (Merged.end.isDead())
+      MergedWithDead = true;
+    Changed = true;
+  }
+  return std::make_pair(Changed, MergedWithDead);
 }
 
-bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
-                                                 MachineInstr *CopyMI) {
+std::pair<bool,bool>
+RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+                                            MachineInstr *CopyMI) {
   assert(!CP.isPhys());
 
   LiveInterval &IntA =
@@ -710,19 +753,19 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
   if (AValNo->isPHIDef())
-    return false;
+    return { false, false };
   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
-    return false;
+    return { false, false };
   if (!DefMI->isCommutable())
-    return false;
+    return { false, false };
   // If DefMI is a two-address instruction then commuting it will change the
   // destination register.
   int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
   assert(DefIdx != -1);
   unsigned UseOpIdx;
   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
-    return false;
+    return { false, false };
 
   // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
   // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
@@ -735,17 +778,17 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   // op#2<->op#3) of commute transformation should be considered/tried here.
   unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
   if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
-    return false;
+    return { false, false };
 
   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
   unsigned NewReg = NewDstMO.getReg();
   if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
-    return false;
+    return { false, false };
 
   // Make sure there are no other definitions of IntB that would reach the
   // uses which the new definition can reach.
   if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
-    return false;
+    return { false, false };
 
   // If some of the uses of IntA.reg is already coalesced away, return false.
   // It's not possible to determine whether it's safe to perform the coalescing.
@@ -758,7 +801,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       continue;
     // If this use is tied to a def, we can't rewrite the register.
     if (UseMI->isRegTiedToDefOperand(OpNo))
-      return false;
+      return { false, false };
   }
 
   LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
@@ -770,11 +813,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   MachineInstr *NewMI =
       TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
   if (!NewMI)
-    return false;
+    return { false, false };
   if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
       TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
       !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
-    return false;
+    return { false, false };
   if (NewMI != DefMI) {
     LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
     MachineBasicBlock::iterator Pos = DefMI;
@@ -848,37 +891,58 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
 
   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
   // is updated.
+  bool ShrinkB = false;
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
-  if (IntB.hasSubRanges()) {
+  if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
     if (!IntA.hasSubRanges()) {
       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
       IntA.createSubRangeFrom(Allocator, Mask, IntA);
+    } else if (!IntB.hasSubRanges()) {
+      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
+      IntB.createSubRangeFrom(Allocator, Mask, IntB);
     }
     SlotIndex AIdx = CopyIdx.getRegSlot(true);
+    LaneBitmask MaskA;
     for (LiveInterval::SubRange &SA : IntA.subranges()) {
       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
       assert(ASubValNo != nullptr);
+      MaskA |= SA.LaneMask;
 
       IntB.refineSubRanges(Allocator, SA.LaneMask,
-          [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
+          [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
+            (LiveInterval::SubRange &SR) {
         VNInfo *BSubValNo = SR.empty()
           ? SR.getNextValue(CopyIdx, Allocator)
           : SR.getVNInfoAt(CopyIdx);
         assert(BSubValNo != nullptr);
-        addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+        auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+        ShrinkB |= P.second;
+        if (P.first)
+          BSubValNo->def = ASubValNo->def;
       });
     }
+    // Go over all subranges of IntB that have not been covered by IntA,
+    // and delete the segments starting at CopyIdx. This can happen if
+    // IntA has undef lanes that are defined in IntB.
+    for (LiveInterval::SubRange &SB : IntB.subranges()) {
+      if ((SB.LaneMask & MaskA).any())
+        continue;
+      if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
+        if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
+          SB.removeSegment(*S, true);
+    }
   }
 
   BValNo->def = AValNo->def;
-  addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+  auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+  ShrinkB |= P.second;
   LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
 
   LIS->removeVRegDefAt(IntA, AValNo->def);
 
   LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
   ++numCommutes;
-  return true;
+  return { true, ShrinkB };
 }
 
 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
@@ -1067,6 +1131,20 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
     assert(BValNo && "All sublanes should be live");
     LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
     BValNo->markUnused();
+    // We can have a situation where the result of the original copy is live,
+    // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
+    // the copy appear as an endpoint from pruneValue(), but we don't want it
+    // to because the copy has been removed.  We can go ahead and remove that
+    // endpoint; there is no other situation here that there could be a use at
+    // the same place as we know that the copy is a full copy.
+    for (unsigned I = 0; I != EndPoints.size(); ) {
+      if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
+        EndPoints[I] = EndPoints.back();
+        EndPoints.pop_back();
+        continue;
+      }
+      ++I;
+    }
     LIS->extendToIndices(SR, EndPoints);
   }
   // If any dead defs were extended, truncate them.
@@ -1107,7 +1185,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
   VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
-  assert(ValNo && "CopyMI input register not live");
+  if (!ValNo)
+    return false;
   if (ValNo->isPHIDef() || ValNo->isUnused())
     return false;
   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
@@ -1365,24 +1444,40 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
   ++NumReMats;
 
-  // The source interval can become smaller because we removed a use.
-  shrinkToUses(&SrcInt, &DeadDefs);
-  if (!DeadDefs.empty()) {
-    // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
-    // to describe DstReg instead.
+  // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
+  // to describe DstReg instead.
+  if (MRI->use_nodbg_empty(SrcReg)) {
     for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
       MachineInstr *UseMI = UseMO.getParent();
       if (UseMI->isDebugValue()) {
-        UseMO.setReg(DstReg);
+        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+          UseMO.substPhysReg(DstReg, *TRI);
+        else
+          UseMO.setReg(DstReg);
         // Move the debug value directly after the def of the rematerialized
         // value in DstReg.
         MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
         LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
       }
     }
-    eliminateDeadDefs();
   }
 
+  if (ToBeUpdated.count(SrcReg))
+    return true;
+
+  unsigned NumCopyUses = 0;
+  for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+    if (UseMO.getParent()->isCopyLike())
+      NumCopyUses++;
+  }
+  if (NumCopyUses < LateRematUpdateThreshold) {
+    // The source interval can become smaller because we removed a use.
+    shrinkToUses(&SrcInt, &DeadDefs);
+    if (!DeadDefs.empty())
+      eliminateDeadDefs();
+  } else {
+    ToBeUpdated.insert(SrcReg);
+  }
   return true;
 }
 
@@ -1751,9 +1846,18 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     // If we can eliminate the copy without merging the live segments, do so
     // now.
     if (!CP.isPartial() && !CP.isPhys()) {
-      if (adjustCopiesBackFrom(CP, CopyMI) ||
-          removeCopyByCommutingDef(CP, CopyMI)) {
+      bool Changed = adjustCopiesBackFrom(CP, CopyMI);
+      bool Shrink = false;
+      if (!Changed)
+        std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
+      if (Changed) {
         deleteInstr(CopyMI);
+        if (Shrink) {
+          unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+          LiveInterval &DstLI = LIS->getInterval(DstReg);
+          shrinkToUses(&DstLI);
+          LLVM_DEBUG(dbgs() << "\t\tshrunk:   " << DstLI << '\n');
+        }
         LLVM_DEBUG(dbgs() << "\tTrivial!\n");
         return true;
       }
@@ -1806,6 +1910,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     }
     LI.removeEmptySubRanges();
   }
+
+  // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
+  // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
+  // is not up-to-date, need to update the merged live interval here.
+  if (ToBeUpdated.count(CP.getSrcReg()))
+    ShrinkMainRange = true;
+
   if (ShrinkMainRange) {
     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
     shrinkToUses(&LI);
@@ -2397,8 +2508,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
         // We normally expect IMPLICIT_DEF values to be live only until the end
         // of their block. If the value is really live longer and gets pruned in
         // another block, this flag is cleared again.
+        //
+        // Clearing the valid lanes is deferred until it is sure this can be
+        // erased.
         V.ErasableImplicitDef = true;
-        V.ValidLanes &= ~V.WriteLanes;
       }
     }
   }
@@ -2453,20 +2566,25 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   Other.computeAssignment(V.OtherVNI->id, *this);
   Val &OtherV = Other.Vals[V.OtherVNI->id];
 
-  // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
-  // This shouldn't normally happen, but ProcessImplicitDefs can leave such
-  // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
-  // technically.
-  //
-  // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
-  // to erase the IMPLICIT_DEF instruction.
-  if (OtherV.ErasableImplicitDef && DefMI &&
-      DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
-    LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
-                      << " extends into "
-                      << printMBBReference(*DefMI->getParent())
-                      << ", keeping it.\n");
-    OtherV.ErasableImplicitDef = false;
+  if (OtherV.ErasableImplicitDef) {
+    // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+    // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+    // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+    // technically.
+    //
+    // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+    // to erase the IMPLICIT_DEF instruction.
+    if (DefMI &&
+        DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+      LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+                 << " extends into "
+                 << printMBBReference(*DefMI->getParent())
+                 << ", keeping it.\n");
+      OtherV.ErasableImplicitDef = false;
+    } else {
+      // We deferred clearing these lanes in case we needed to save them
+      OtherV.ValidLanes &= ~OtherV.WriteLanes;
+    }
   }
 
   // Allow overlapping PHI values. Any real interference would show up in a
@@ -2509,6 +2627,12 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     return CR_Erase;
   }
 
+  // The remaining checks apply to the lanes, which aren't tracked here.  This
+  // was already decided to be OK via the following CR_Replace condition.
+  // CR_Replace.
+  if (SubRangeJoin)
+    return CR_Replace;
+
   // If the lanes written by this instruction were all undef in OtherVNI, it is
   // still safe to join the live ranges. This can't be done with a simple value
   // mapping, though - OtherVNI will map to multiple values:
@@ -2590,8 +2714,18 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
     Val &OtherV = Other.Vals[V.OtherVNI->id];
     // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
     // its lanes.
-    if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
+    if (OtherV.ErasableImplicitDef &&
+        TrackSubRegLiveness &&
+        (OtherV.WriteLanes & ~V.ValidLanes).any()) {
+      LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
+
       OtherV.ErasableImplicitDef = false;
+      // The valid lanes written by the implicit_def were speculatively cleared
+      // before, so make this more conservative. It may be better to track this,
+      // I haven't found a testcase where it matters.
+      OtherV.ValidLanes = LaneBitmask::getAll();
+    }
+
     OtherV.Pruned = true;
     LLVM_FALLTHROUGH;
   }
@@ -3290,6 +3424,18 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
 }
 
+void RegisterCoalescer::lateLiveIntervalUpdate() {
+  for (unsigned reg : ToBeUpdated) {
+    if (!LIS->hasInterval(reg))
+      continue;
+    LiveInterval &LI = LIS->getInterval(reg);
+    shrinkToUses(&LI, &DeadDefs);
+    if (!DeadDefs.empty())
+      eliminateDeadDefs();
+  }
+  ToBeUpdated.clear();
+}
+
 bool RegisterCoalescer::
 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   bool Progress = false;
@@ -3459,12 +3605,14 @@ void RegisterCoalescer::joinAllIntervals() {
     }
     copyCoalesceInMBB(MBBs[i].MBB);
   }
+  lateLiveIntervalUpdate();
   coalesceLocals();
 
   // Joining intervals can allow other intervals to be joined.  Iteratively join
   // until we make no progress.
   while (copyCoalesceWorkList(WorkList))
     /* empty */ ;
+  lateLiveIntervalUpdate();
 }
 
 void RegisterCoalescer::releaseMemory() {
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 51414de518fd..1099e468e885 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -681,8 +681,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
       PressureDiff::iterator J;
       for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
         *I = *J;
-      if (J != E)
-        *I = *J;
+      *I = PressureChange();
     }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 6a31118cc562..6b9880a8913f 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -40,7 +40,7 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
 
 char PhysicalRegisterUsageInfo::ID = 0;
 
-void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) {
+void PhysicalRegisterUsageInfo::setTargetMachine(const LLVMTargetMachine &TM) {
   this->TM = &TM;
 }
 
@@ -81,7 +81,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
 
   // sort the vector to print analysis in alphabatic order of function name.
   llvm::sort(
-      FPRMPairVector.begin(), FPRMPairVector.end(),
+      FPRMPairVector,
       [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
         return A->first->getName() < B->first->getName();
       });
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index cbbbf7c385aa..c356fb57ac6d 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -260,8 +260,14 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
 bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
                                    const Value *AllocaPtr,
                                    uint64_t AllocaSize) {
-  // All MemIntrinsics have destination address in Arg0 and size in Arg2.
-  if (MI->getRawDest() != U) return true;
+  if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+    if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+      return true;
+  } else {
+    if (MI->getRawDest() != U)
+      return true;
+  }
+
   const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
   // Non-constant size => unsafe. FIXME: try SCEV getRange.
   if (!Len) return false;
@@ -318,11 +324,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
       case Instruction::Invoke: {
         ImmutableCallSite CS(I);
 
-        if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-          if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-              II->getIntrinsicID() == Intrinsic::lifetime_end)
-            continue;
-        }
+        if (I->isLifetimeStartOrEnd())
+          continue;
 
         if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
           if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
@@ -775,6 +778,10 @@ bool SafeStack::run() {
     ++NumUnsafeStackRestorePointsFunctions;
 
   IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+  // Calls must always have a debug location, or else inlining breaks. So
+  // we explicitly set a artificial debug location here.
+  if (DISubprogram *SP = F.getSubprogram())
+    IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
   if (SafeStackUsePointerAddress) {
     Value *Fn = F.getParent()->getOrInsertFunction(
         "__safestack_pointer_address", StackPtrTy->getPointerTo(0));
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 329458778a98..726c38002817 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -46,11 +46,10 @@ const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
 }
 
 bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
-  auto *II = dyn_cast<IntrinsicInst>(I);
-  if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-              II->getIntrinsicID() != Intrinsic::lifetime_end))
+  if (!I->isLifetimeStartOrEnd())
     return false;
 
+  auto *II = cast<IntrinsicInst>(I);
   *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
   return true;
 }
@@ -172,7 +171,9 @@ void StackColoring::calculateLocalLiveness() {
       BitVector LocalLiveIn;
       for (auto *PredBB : predecessors(BB)) {
         LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
-        assert(I != BlockLiveness.end() && "Predecessor not found");
+        // If a predecessor is unreachable, ignore it.
+        if (I == BlockLiveness.end())
+          continue;
         LocalLiveIn |= I->second.LiveOut;
       }
 
diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 9387722bfebd..2684f92b3a93 100644
--- a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -77,6 +77,21 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
   return new ScalarizeMaskedMemIntrin();
 }
 
+static bool isConstantIntVector(Value *Mask) {
+  Constant *C = dyn_cast<Constant>(Mask);
+  if (!C)
+    return false;
+
+  unsigned NumElts = Mask->getType()->getVectorNumElements();
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *CElt = C->getAggregateElement(i);
+    if (!CElt || !isa<ConstantInt>(CElt))
+      return false;
+  }
+
+  return true;
+}
+
 // Translate a masked load intrinsic like
 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
 //                               <16 x i1> %mask, <16 x i32> %passthru)
@@ -85,32 +100,29 @@ FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
 //
 //  %1 = bitcast i8* %addr to i32*
 //  %2 = extractelement <16 x i1> %mask, i32 0
-//  %3 = icmp eq i1 %2, true
-//  br i1 %3, label %cond.load, label %else
+//  br i1 %2, label %cond.load, label %else
 //
 // cond.load:                                        ; preds = %0
-//  %4 = getelementptr i32* %1, i32 0
-//  %5 = load i32* %4
-//  %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+//  %3 = getelementptr i32* %1, i32 0
+//  %4 = load i32* %3
+//  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
 //  br label %else
 //
 // else:                                             ; preds = %0, %cond.load
-//  %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
-//  %7 = extractelement <16 x i1> %mask, i32 1
-//  %8 = icmp eq i1 %7, true
-//  br i1 %8, label %cond.load1, label %else2
+//  %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
+//  %6 = extractelement <16 x i1> %mask, i32 1
+//  br i1 %6, label %cond.load1, label %else2
 //
 // cond.load1:                                       ; preds = %else
-//  %9 = getelementptr i32* %1, i32 1
-//  %10 = load i32* %9
-//  %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+//  %7 = getelementptr i32* %1, i32 1
+//  %8 = load i32* %7
+//  %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
 //  br label %else2
 //
 // else2:                                          ; preds = %else, %cond.load1
-//  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
-//  %12 = extractelement <16 x i1> %mask, i32 2
-//  %13 = icmp eq i1 %12, true
-//  br i1 %13, label %cond.load4, label %else5
+//  %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
+//  %10 = extractelement <16 x i1> %mask, i32 2
+//  br i1 %10, label %cond.load4, label %else5
 //
 static void scalarizeMaskedLoad(CallInst *CI) {
   Value *Ptr = CI->getArgOperand(0);
@@ -119,25 +131,19 @@ static void scalarizeMaskedLoad(CallInst *CI) {
   Value *Src0 = CI->getArgOperand(3);
 
   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
-  VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-  assert(VecType && "Unexpected return type of masked load intrinsic");
+  VectorType *VecType = cast<VectorType>(CI->getType());
 
-  Type *EltTy = CI->getType()->getVectorElementType();
+  Type *EltTy = VecType->getElementType();
 
   IRBuilder<> Builder(CI->getContext());
   Instruction *InsertPt = CI;
   BasicBlock *IfBlock = CI->getParent();
-  BasicBlock *CondBlock = nullptr;
-  BasicBlock *PrevIfBlock = CI->getParent();
 
   Builder.SetInsertPoint(InsertPt);
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
   // Short-cut if the mask is all-true.
-  bool IsAllOnesMask =
-      isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
-
-  if (IsAllOnesMask) {
+  if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
     Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
     CI->replaceAllUsesWith(NewI);
     CI->eraseFromParent();
@@ -145,21 +151,19 @@ static void scalarizeMaskedLoad(CallInst *CI) {
   }
 
   // Adjust alignment for the scalar instruction.
-  AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8);
+  AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
   // Bitcast %addr fron i8* to EltTy*
   Type *NewPtrType =
       EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   unsigned VectorWidth = VecType->getNumElements();
 
-  Value *UndefVal = UndefValue::get(VecType);
-
   // The result vector
-  Value *VResult = UndefVal;
+  Value *VResult = Src0;
 
-  if (isa<ConstantVector>(Mask)) {
+  if (isConstantIntVector(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
       Value *Gep =
           Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
@@ -167,35 +171,21 @@ static void scalarizeMaskedLoad(CallInst *CI) {
       VResult =
           Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
     }
-    Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
-    CI->replaceAllUsesWith(NewI);
+    CI->replaceAllUsesWith(VResult);
     CI->eraseFromParent();
     return;
   }
 
-  PHINode *Phi = nullptr;
-  Value *PrevPhi = UndefVal;
-
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
     // Fill the "else" block, created in the previous iteration
     //
     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
-    //  %to_load = icmp eq i1 %mask_1, true
-    //  br i1 %to_load, label %cond.load, label %else
+    //  br i1 %mask_1, label %cond.load, label %else
     //
-    if (Idx > 0) {
-      Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
-      Phi->addIncoming(VResult, CondBlock);
-      Phi->addIncoming(PrevPhi, PrevIfBlock);
-      PrevPhi = Phi;
-      VResult = Phi;
-    }
 
     Value *Predicate =
         Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
-    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
-                                    ConstantInt::get(Predicate->getType(), 1));
 
     // Create "cond" block
     //
@@ -203,30 +193,34 @@ static void scalarizeMaskedLoad(CallInst *CI) {
     //  %Elt = load i32* %EltAddr
     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
     //
-    CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
+    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+                                                     "cond.load");
     Builder.SetInsertPoint(InsertPt);
 
     Value *Gep =
         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
-    VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+    Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+                                                    Builder.getInt32(Idx));
 
     // Create "else" block, fill it in the next iteration
     BasicBlock *NewIfBlock =
         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
     Builder.SetInsertPoint(InsertPt);
     Instruction *OldBr = IfBlock->getTerminator();
-    BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
     OldBr->eraseFromParent();
-    PrevIfBlock = IfBlock;
+    BasicBlock *PrevIfBlock = IfBlock;
     IfBlock = NewIfBlock;
+
+    // Create the phi to join the new and previous value.
+    PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+    Phi->addIncoming(NewVResult, CondBlock);
+    Phi->addIncoming(VResult, PrevIfBlock);
+    VResult = Phi;
   }
 
-  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
-  Phi->addIncoming(VResult, CondBlock);
-  Phi->addIncoming(PrevPhi, PrevIfBlock);
-  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
-  CI->replaceAllUsesWith(NewI);
+  CI->replaceAllUsesWith(VResult);
   CI->eraseFromParent();
 }
 
@@ -238,24 +232,22 @@ static void scalarizeMaskedLoad(CallInst *CI) {
 //
 //   %1 = bitcast i8* %addr to i32*
 //   %2 = extractelement <16 x i1> %mask, i32 0
-//   %3 = icmp eq i1 %2, true
-//   br i1 %3, label %cond.store, label %else
+//   br i1 %2, label %cond.store, label %else
 //
 // cond.store:                                       ; preds = %0
-//   %4 = extractelement <16 x i32> %val, i32 0
-//   %5 = getelementptr i32* %1, i32 0
-//   store i32 %4, i32* %5
+//   %3 = extractelement <16 x i32> %val, i32 0
+//   %4 = getelementptr i32* %1, i32 0
+//   store i32 %3, i32* %4
 //   br label %else
 //
 // else:                                             ; preds = %0, %cond.store
-//   %6 = extractelement <16 x i1> %mask, i32 1
-//   %7 = icmp eq i1 %6, true
-//   br i1 %7, label %cond.store1, label %else2
+//   %5 = extractelement <16 x i1> %mask, i32 1
+//   br i1 %5, label %cond.store1, label %else2
 //
 // cond.store1:                                      ; preds = %else
-//   %8 = extractelement <16 x i32> %val, i32 1
-//   %9 = getelementptr i32* %1, i32 1
-//   store i32 %8, i32* %9
+//   %6 = extractelement <16 x i32> %val, i32 1
+//   %7 = getelementptr i32* %1, i32 1
+//   store i32 %6, i32* %7
 //   br label %else2
 //   . . .
 static void scalarizeMaskedStore(CallInst *CI) {
@@ -265,8 +257,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
   Value *Mask = CI->getArgOperand(3);
 
   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
-  VectorType *VecType = dyn_cast<VectorType>(Src->getType());
-  assert(VecType && "Unexpected data type in masked store intrinsic");
+  VectorType *VecType = cast<VectorType>(Src->getType());
 
   Type *EltTy = VecType->getElementType();
 
@@ -277,26 +268,23 @@ static void scalarizeMaskedStore(CallInst *CI) {
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
   // Short-cut if the mask is all-true.
-  bool IsAllOnesMask =
-      isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
-
-  if (IsAllOnesMask) {
+  if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
     Builder.CreateAlignedStore(Src, Ptr, AlignVal);
     CI->eraseFromParent();
     return;
   }
 
   // Adjust alignment for the scalar instruction.
-  AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8);
+  AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
   // Bitcast %addr fron i8* to EltTy*
   Type *NewPtrType =
       EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   unsigned VectorWidth = VecType->getNumElements();
 
-  if (isa<ConstantVector>(Mask)) {
+  if (isConstantIntVector(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
       Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
       Value *Gep =
@@ -311,13 +299,10 @@ static void scalarizeMaskedStore(CallInst *CI) {
     // Fill the "else" block, created in the previous iteration
     //
     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
-    //  %to_store = icmp eq i1 %mask_1, true
-    //  br i1 %to_store, label %cond.store, label %else
+    //  br i1 %mask_1, label %cond.store, label %else
     //
     Value *Predicate =
         Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
-    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
-                                    ConstantInt::get(Predicate->getType(), 1));
 
     // Create "cond" block
     //
@@ -339,7 +324,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
         CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
     Builder.SetInsertPoint(InsertPt);
     Instruction *OldBr = IfBlock->getTerminator();
-    BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
     OldBr->eraseFromParent();
     IfBlock = NewIfBlock;
   }
@@ -352,30 +337,28 @@ static void scalarizeMaskedStore(CallInst *CI) {
 // to a chain of basic blocks, with loading element one-by-one if
 // the appropriate mask bit is set
 //
-// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> %Mask, i32 0
-// % ToLoad0 = icmp eq i1 % Mask0, true
-// br i1 % ToLoad0, label %cond.load, label %else
+// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.load, label %else
 //
 // cond.load:
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// % Load0 = load i32, i32* % Ptr0, align 4
-// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// %Load0 = load i32, i32* %Ptr0, align 4
+// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
 // br label %else
 //
 // else:
-// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
-// % Mask1 = extractelement <16 x i1> %Mask, i32 1
-// % ToLoad1 = icmp eq i1 % Mask1, true
-// br i1 % ToLoad1, label %cond.load1, label %else2
+// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.load1, label %else2
 //
 // cond.load1:
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// % Load1 = load i32, i32* % Ptr1, align 4
-// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// %Load1 = load i32, i32* %Ptr1, align 4
+// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
 // br label %else2
 // . . .
-// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
 // ret <16 x i32> %Result
 static void scalarizeMaskedGather(CallInst *CI) {
   Value *Ptrs = CI->getArgOperand(0);
@@ -383,32 +366,24 @@ static void scalarizeMaskedGather(CallInst *CI) {
   Value *Mask = CI->getArgOperand(2);
   Value *Src0 = CI->getArgOperand(3);
 
-  VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-
-  assert(VecType && "Unexpected return type of masked load intrinsic");
+  VectorType *VecType = cast<VectorType>(CI->getType());
 
   IRBuilder<> Builder(CI->getContext());
   Instruction *InsertPt = CI;
   BasicBlock *IfBlock = CI->getParent();
-  BasicBlock *CondBlock = nullptr;
-  BasicBlock *PrevIfBlock = CI->getParent();
   Builder.SetInsertPoint(InsertPt);
   unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
 
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
-  Value *UndefVal = UndefValue::get(VecType);
-
   // The result vector
-  Value *VResult = UndefVal;
+  Value *VResult = Src0;
   unsigned VectorWidth = VecType->getNumElements();
 
   // Shorten the way if the mask is a vector of constants.
-  bool IsConstMask = isa<ConstantVector>(Mask);
-
-  if (IsConstMask) {
+  if (isConstantIntVector(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
       Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
                                                 "Ptr" + Twine(Idx));
@@ -417,35 +392,20 @@ static void scalarizeMaskedGather(CallInst *CI) {
       VResult = Builder.CreateInsertElement(
           VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
     }
-    Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
-    CI->replaceAllUsesWith(NewI);
+    CI->replaceAllUsesWith(VResult);
     CI->eraseFromParent();
     return;
   }
 
-  PHINode *Phi = nullptr;
-  Value *PrevPhi = UndefVal;
-
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
     // Fill the "else" block, created in the previous iteration
     //
     //  %Mask1 = extractelement <16 x i1> %Mask, i32 1
-    //  %ToLoad1 = icmp eq i1 %Mask1, true
-    //  br i1 %ToLoad1, label %cond.load, label %else
+    //  br i1 %Mask1, label %cond.load, label %else
     //
-    if (Idx > 0) {
-      Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
-      Phi->addIncoming(VResult, CondBlock);
-      Phi->addIncoming(PrevPhi, PrevIfBlock);
-      PrevPhi = Phi;
-      VResult = Phi;
-    }
 
     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
                                                     "Mask" + Twine(Idx));
-    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
-                                    ConstantInt::get(Predicate->getType(), 1),
-                                    "ToLoad" + Twine(Idx));
 
     // Create "cond" block
     //
@@ -453,31 +413,33 @@ static void scalarizeMaskedGather(CallInst *CI) {
     //  %Elt = load i32* %EltAddr
     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
     //
-    CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
     Builder.SetInsertPoint(InsertPt);
 
     Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
                                               "Ptr" + Twine(Idx));
     LoadInst *Load =
         Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
-    VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
-                                          "Res" + Twine(Idx));
+    Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+                                                    Builder.getInt32(Idx),
+                                                    "Res" + Twine(Idx));
 
     // Create "else" block, fill it in the next iteration
     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
     Builder.SetInsertPoint(InsertPt);
     Instruction *OldBr = IfBlock->getTerminator();
-    BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
     OldBr->eraseFromParent();
-    PrevIfBlock = IfBlock;
+    BasicBlock *PrevIfBlock = IfBlock;
     IfBlock = NewIfBlock;
+
+    PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+    Phi->addIncoming(NewVResult, CondBlock);
+    Phi->addIncoming(VResult, PrevIfBlock);
+    VResult = Phi;
   }
 
-  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
-  Phi->addIncoming(VResult, CondBlock);
-  Phi->addIncoming(PrevPhi, PrevIfBlock);
-  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
-  CI->replaceAllUsesWith(NewI);
+  CI->replaceAllUsesWith(VResult);
   CI->eraseFromParent();
 }
 
@@ -487,26 +449,24 @@ static void scalarizeMaskedGather(CallInst *CI) {
 // to a chain of basic blocks, that stores element one-by-one if
 // the appropriate mask bit is set.
 //
-// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> % Mask, i32 0
-// % ToStore0 = icmp eq i1 % Mask0, true
-// br i1 %ToStore0, label %cond.store, label %else
+// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// %Mask0 = extractelement <16 x i1> %Mask, i32 0
+// br i1 %Mask0, label %cond.store, label %else
 //
 // cond.store:
-// % Elt0 = extractelement <16 x i32> %Src, i32 0
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* % Ptr0, align 4
+// %Elt0 = extractelement <16 x i32> %Src, i32 0
+// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* %Ptr0, align 4
 // br label %else
 //
 // else:
-// % Mask1 = extractelement <16 x i1> % Mask, i32 1
-// % ToStore1 = icmp eq i1 % Mask1, true
-// br i1 % ToStore1, label %cond.store1, label %else2
+// %Mask1 = extractelement <16 x i1> %Mask, i32 1
+// br i1 %Mask1, label %cond.store1, label %else2
 //
 // cond.store1:
-// % Elt1 = extractelement <16 x i32> %Src, i32 1
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 % Elt1, i32* % Ptr1, align 4
+// %Elt1 = extractelement <16 x i32> %Src, i32 1
+// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 %Elt1, i32* %Ptr1, align 4
 // br label %else2
 //   . . .
 static void scalarizeMaskedScatter(CallInst *CI) {
@@ -531,11 +491,9 @@ static void scalarizeMaskedScatter(CallInst *CI) {
   unsigned VectorWidth = Src->getType()->getVectorNumElements();
 
   // Shorten the way if the mask is a vector of constants.
-  bool IsConstMask = isa<ConstantVector>(Mask);
-
-  if (IsConstMask) {
+  if (isConstantIntVector(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+      if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
       Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
                                                    "Elt" + Twine(Idx));
@@ -546,24 +504,21 @@ static void scalarizeMaskedScatter(CallInst *CI) {
     CI->eraseFromParent();
     return;
   }
+
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
     // Fill the "else" block, created in the previous iteration
     //
-    //  % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
-    //  % ToStore = icmp eq i1 % Mask1, true
-    //  br i1 % ToStore, label %cond.store, label %else
+    //  %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
+    //  br i1 %Mask1, label %cond.store, label %else
     //
     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
                                                     "Mask" + Twine(Idx));
-    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
-                                    ConstantInt::get(Predicate->getType(), 1),
-                                    "ToStore" + Twine(Idx));
 
     // Create "cond" block
     //
-    //  % Elt1 = extractelement <16 x i32> %Src, i32 1
-    //  % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-    //  %store i32 % Elt1, i32* % Ptr1
+    //  %Elt1 = extractelement <16 x i32> %Src, i32 1
+    //  %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+    //  %store i32 %Elt1, i32* %Ptr1
     //
     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
     Builder.SetInsertPoint(InsertPt);
@@ -578,7 +533,7 @@ static void scalarizeMaskedScatter(CallInst *CI) {
     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
     Builder.SetInsertPoint(InsertPt);
     Instruction *OldBr = IfBlock->getTerminator();
-    BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
     OldBr->eraseFromParent();
     IfBlock = NewIfBlock;
   }
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 46064012d9d8..6c135b3d69d6 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -68,39 +68,36 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
   return &TII->get(Node->getMachineOpcode());
 }
 
-LLVM_DUMP_METHOD
-raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+LLVM_DUMP_METHOD void SDep::dump(const TargetRegisterInfo *TRI) const {
   switch (getKind()) {
-  case Data:   OS << "Data"; break;
-  case Anti:   OS << "Anti"; break;
-  case Output: OS << "Out "; break;
-  case Order:  OS << "Ord "; break;
+  case Data:   dbgs() << "Data"; break;
+  case Anti:   dbgs() << "Anti"; break;
+  case Output: dbgs() << "Out "; break;
+  case Order:  dbgs() << "Ord "; break;
   }
 
   switch (getKind()) {
   case Data:
-    OS << " Latency=" << getLatency();
+    dbgs() << " Latency=" << getLatency();
     if (TRI && isAssignedRegDep())
-      OS << " Reg=" << printReg(getReg(), TRI);
+      dbgs() << " Reg=" << printReg(getReg(), TRI);
     break;
   case Anti:
   case Output:
-    OS << " Latency=" << getLatency();
+    dbgs() << " Latency=" << getLatency();
     break;
   case Order:
-    OS << " Latency=" << getLatency();
+    dbgs() << " Latency=" << getLatency();
     switch(Contents.OrdKind) {
-    case Barrier:      OS << " Barrier"; break;
+    case Barrier:      dbgs() << " Barrier"; break;
     case MayAliasMem:
-    case MustAliasMem: OS << " Memory"; break;
-    case Artificial:   OS << " Artificial"; break;
-    case Weak:         OS << " Weak"; break;
-    case Cluster:      OS << " Cluster"; break;
+    case MustAliasMem: dbgs() << " Memory"; break;
+    case Artificial:   dbgs() << " Artificial"; break;
+    case Weak:         dbgs() << " Weak"; break;
+    case Cluster:      dbgs() << " Cluster"; break;
     }
     break;
   }
-
-  return OS;
 }
 
 bool SUnit::addPred(const SDep &D, bool Required) {
@@ -337,33 +334,7 @@ void SUnit::biasCriticalPath() {
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
-raw_ostream &SUnit::print(raw_ostream &OS,
-                          const SUnit *Entry, const SUnit *Exit) const {
-  if (this == Entry)
-    OS << "EntrySU";
-  else if (this == Exit)
-    OS << "ExitSU";
-  else
-    OS << "SU(" << NodeNum << ")";
-  return OS;
-}
-
-LLVM_DUMP_METHOD
-raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const {
-  return print(OS, &G->EntrySU, &G->ExitSU);
-}
-
-LLVM_DUMP_METHOD
-void SUnit::dump(const ScheduleDAG *G) const {
-  print(dbgs(), G);
-  dbgs() << ": ";
-  G->dumpNode(this);
-}
-
-LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
-  dump(G);
-
+LLVM_DUMP_METHOD void SUnit::dumpAttributes() const {
   dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
   dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
   if (WeakPredsLeft)
@@ -374,21 +345,38 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
   dbgs() << "  Latency            : " << Latency << "\n";
   dbgs() << "  Depth              : " << getDepth() << "\n";
   dbgs() << "  Height             : " << getHeight() << "\n";
+}
+
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeName(const SUnit &SU) const {
+  if (&SU == &EntrySU)
+    dbgs() << "EntrySU";
+  else if (&SU == &ExitSU)
+    dbgs() << "ExitSU";
+  else
+    dbgs() << "SU(" << SU.NodeNum << ")";
+}
 
-  if (Preds.size() != 0) {
+LLVM_DUMP_METHOD void ScheduleDAG::dumpNodeAll(const SUnit &SU) const {
+  dumpNode(SU);
+  SU.dumpAttributes();
+  if (SU.Preds.size() > 0) {
     dbgs() << "  Predecessors:\n";
-    for (const SDep &Dep : Preds) {
+    for (const SDep &Dep : SU.Preds) {
       dbgs() << "    ";
-      Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
-      Dep.print(dbgs(), G->TRI); dbgs() << '\n';
+      dumpNodeName(*Dep.getSUnit());
+      dbgs() << ": ";
+      Dep.dump(TRI);
+      dbgs() << '\n';
     }
   }
-  if (Succs.size() != 0) {
+  if (SU.Succs.size() > 0) {
     dbgs() << "  Successors:\n";
-    for (const SDep &Dep : Succs) {
+    for (const SDep &Dep : SU.Succs) {
       dbgs() << "    ";
-      Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
-      Dep.print(dbgs(), G->TRI); dbgs() << '\n';
+      dumpNodeName(*Dep.getSUnit());
+      dbgs() << ": ";
+      Dep.dump(TRI);
+      dbgs() << '\n';
     }
   }
 }
@@ -406,7 +394,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
       }
       if (!AnyNotSched)
         dbgs() << "*** Scheduling failed! ***\n";
-      SUnit.dump(this);
+      dumpNode(SUnit);
       dbgs() << "has not been scheduled!\n";
       AnyNotSched = true;
     }
@@ -415,7 +403,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
           unsigned(std::numeric_limits<int>::max())) {
       if (!AnyNotSched)
         dbgs() << "*** Scheduling failed! ***\n";
-      SUnit.dump(this);
+      dumpNode(SUnit);
       dbgs() << "has an unexpected "
            << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
@@ -424,7 +412,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
       if (SUnit.NumSuccsLeft != 0) {
         if (!AnyNotSched)
           dbgs() << "*** Scheduling failed! ***\n";
-        SUnit.dump(this);
+        dumpNode(SUnit);
         dbgs() << "has successors left!\n";
         AnyNotSched = true;
       }
@@ -432,7 +420,7 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
       if (SUnit.NumPredsLeft != 0) {
         if (!AnyNotSched)
           dbgs() << "*** Scheduling failed! ***\n";
-        SUnit.dump(this);
+        dumpNode(SUnit);
         dbgs() << "has predecessors left!\n";
         AnyNotSched = true;
       }
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index d1c5ddabb975..99406ed1496a 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -234,6 +234,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
   // Ask the target if address-backscheduling is desirable, and if so how much.
   const TargetSubtargetInfo &ST = MF.getSubtarget();
 
+  // Only use any non-zero latency for real defs/uses, in contrast to
+  // "fake" operands added by regalloc.
+  const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
+  bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
+                            !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
        Alias.isValid(); ++Alias) {
     if (!Uses.contains(*Alias))
@@ -257,11 +262,18 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
         Dep = SDep(SU, SDep::Data, *Alias);
         RegUse = UseSU->getInstr();
       }
-      Dep.setLatency(
-        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
-                                         UseOp));
+      const MCInstrDesc *UseMIDesc =
+          (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
+      bool ImplicitPseudoUse =
+          (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
+           !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
+      if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
+        Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                                        RegUse, UseOp));
+        ST.adjustSchedDependency(SU, UseSU, Dep);
+      } else
+        Dep.setLatency(0);
 
-      ST.adjustSchedDependency(SU, UseSU, Dep);
       UseSU->addPred(Dep);
     }
   }
@@ -996,7 +1008,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
   for (auto &I : loads)
     for (auto *SU : I.second)
       NodeNums.push_back(SU->NodeNum);
-  llvm::sort(NodeNums.begin(), NodeNums.end());
+  llvm::sort(NodeNums);
 
   // The N last elements in NodeNums will be removed, and the SU with
   // the lowest NodeNum of them will become the new BarrierChain to
@@ -1097,10 +1109,22 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
   }
 }
 
-void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
-  // Cannot completely remove virtual function even in release mode.
+void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  dumpNodeName(SU);
+  dbgs() << ": ";
+  SU.getInstr()->dump();
+#endif
+}
+
+void ScheduleDAGInstrs::dump() const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  SU->getInstr()->dump();
+  if (EntrySU.getInstr() != nullptr)
+    dumpNodeAll(EntrySU);
+  for (const SUnit &SU : SUnits)
+    dumpNodeAll(SU);
+  if (ExitSU.getInstr() != nullptr)
+    dumpNodeAll(ExitSU);
 #endif
 }
 
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b8bfe69a76e1..4301372179b8 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -157,8 +157,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 
       if (!freeUnits) {
         LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
-        LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
-        LLVM_DEBUG(DAG->dumpNode(SU));
+        LLVM_DEBUG(DAG->dumpNode(*SU));
         return Hazard;
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a5c0b7750410..ff5505c97721 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
@@ -83,6 +84,7 @@ STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
 STATISTIC(SlicedLoads, "Number of load sliced");
+STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
 
 static cl::opt<bool>
 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
@@ -249,6 +251,11 @@ namespace {
     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
     bool SliceUpLoad(SDNode *N);
 
+    // Scalars have size 0 to distinguish from singleton vectors.
+    SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
+    bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
+    bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
+
     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
     ///   load.
     ///
@@ -257,8 +264,9 @@ namespace {
     /// \param EltNo index of the vector element to load.
     /// \param OriginalLoad load that EVE came from to be replaced.
     /// \returns EVE on success SDValue() on failure.
-    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
-        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
+    SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+                                         SDValue EltNo,
+                                         LoadSDNode *OriginalLoad);
     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -285,6 +293,8 @@ namespace {
     SDValue visitADD(SDNode *N);
     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitSUB(SDNode *N);
+    SDValue visitADDSAT(SDNode *N);
+    SDValue visitSUBSAT(SDNode *N);
     SDValue visitADDC(SDNode *N);
     SDValue visitUADDO(SDNode *N);
     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
@@ -318,6 +328,7 @@ namespace {
     SDValue visitSHL(SDNode *N);
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
+    SDValue visitFunnelShift(SDNode *N);
     SDValue visitRotate(SDNode *N);
     SDValue visitABS(SDNode *N);
     SDValue visitBSWAP(SDNode *N);
@@ -350,6 +361,7 @@ namespace {
     SDValue visitFREM(SDNode *N);
     SDValue visitFSQRT(SDNode *N);
     SDValue visitFCOPYSIGN(SDNode *N);
+    SDValue visitFPOW(SDNode *N);
     SDValue visitSINT_TO_FP(SDNode *N);
     SDValue visitUINT_TO_FP(SDNode *N);
     SDValue visitFP_TO_SINT(SDNode *N);
@@ -364,6 +376,8 @@ namespace {
     SDValue visitFFLOOR(SDNode *N);
     SDValue visitFMINNUM(SDNode *N);
     SDValue visitFMAXNUM(SDNode *N);
+    SDValue visitFMINIMUM(SDNode *N);
+    SDValue visitFMAXIMUM(SDNode *N);
     SDValue visitBRCOND(SDNode *N);
     SDValue visitBR_CC(SDNode *N);
     SDValue visitLOAD(SDNode *N);
@@ -393,7 +407,7 @@ namespace {
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
-                           SDValue N1);
+                           SDValue N1, SDNodeFlags Flags);
 
     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 
@@ -401,11 +415,14 @@ namespace {
     SDValue foldVSelectOfConstants(SDNode *N);
     SDValue foldBinOpIntoSelect(SDNode *BO);
     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
-    SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+    SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
                              SDValue N2, SDValue N3, ISD::CondCode CC,
                              bool NotExtCompare = false);
+    SDValue convertSelectOfFPConstantsToLoadOffset(
+        const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+        ISD::CondCode CC);
     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
                                    SDValue N2, SDValue N3, ISD::CondCode CC);
     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
@@ -455,7 +472,6 @@ namespace {
     SDValue TransformFPLoadStorePair(SDNode *N);
     SDValue convertBuildVecZextToZext(SDNode *N);
     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
-    SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
     SDValue reduceBuildVecToShuffle(SDNode *N);
     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                   ArrayRef<int> VectorMask, SDValue VecIn1,
@@ -482,6 +498,10 @@ namespace {
     /// returns false.
     bool findBetterNeighborChains(StoreSDNode *St);
 
+    // Helper for findBetterNeighborChains. Walk up store chain add additional
+    // chained stores that do not overlap and can be parallelized.
+    bool parallelizeChainedStores(StoreSDNode *St);
+
     /// Holds a pointer to an LSBaseSDNode as well as information on where it
     /// is located in a sequence of memory operations connected by a chain.
     struct MemOpLink {
@@ -515,7 +535,7 @@ namespace {
                            EVT &MemVT, unsigned ShAmt = 0);
 
     /// Used by BackwardsPropagateMask to find suitable loads.
-    bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
+    bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
                            ConstantSDNode *Mask, SDNode *&NodeToMask);
     /// Attempt to propagate a given AND node back to load leaves so that they
@@ -865,12 +885,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
   return false;
 }
 
-static SDValue peekThroughBitcast(SDValue V) {
-  while (V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-  return V;
-}
-
 // Returns the SDNode if it is a constant float BuildVector
 // or constant float.
 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
@@ -901,50 +915,23 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
   return true;
 }
 
-// Determines if it is a constant null integer or a splatted vector of a
-// constant null integer (with no undefs).
-// Build vector implicit truncation is not an issue for null values.
-static bool isNullConstantOrNullSplatConstant(SDValue N) {
-  // TODO: may want to use peekThroughBitcast() here.
-  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
-    return Splat->isNullValue();
-  return false;
-}
-
-// Determines if it is a constant integer of one or a splatted vector of a
-// constant integer of one (with no undefs).
-// Do not permit build vector implicit truncation.
-static bool isOneConstantOrOneSplatConstant(SDValue N) {
-  // TODO: may want to use peekThroughBitcast() here.
-  unsigned BitWidth = N.getScalarValueSizeInBits();
-  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
-    return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
-  return false;
-}
-
-// Determines if it is a constant integer of all ones or a splatted vector of a
-// constant integer of all ones (with no undefs).
-// Do not permit build vector implicit truncation.
-static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
-  N = peekThroughBitcast(N);
-  unsigned BitWidth = N.getScalarValueSizeInBits();
-  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
-    return Splat->isAllOnesValue() &&
-           Splat->getAPIntValue().getBitWidth() == BitWidth;
-  return false;
-}
-
 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 // undef's.
-static bool isAnyConstantBuildVector(const SDNode *N) {
-  return ISD::isBuildVectorOfConstantSDNodes(N) ||
-         ISD::isBuildVectorOfConstantFPSDNodes(N);
+static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
+  if (V.getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  return isConstantOrConstantVector(V, NoOpaques) ||
+         ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 }
 
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
-                                    SDValue N1) {
+                                    SDValue N1, SDNodeFlags Flags) {
+  // Don't reassociate reductions.
+  if (Flags.hasVectorReduction())
+    return SDValue();
+
   EVT VT = N0.getValueType();
-  if (N0.getOpcode() == Opc) {
+  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
@@ -964,7 +951,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
     }
   }
 
-  if (N1.getOpcode() == Opc) {
+  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
@@ -1501,6 +1488,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   case ISD::ADD:                return visitADD(N);
   case ISD::SUB:                return visitSUB(N);
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:            return visitADDSAT(N);
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT:            return visitSUBSAT(N);
   case ISD::ADDC:               return visitADDC(N);
   case ISD::UADDO:              return visitUADDO(N);
   case ISD::SUBC:               return visitSUBC(N);
@@ -1532,6 +1523,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SRL:                return visitSRL(N);
   case ISD::ROTR:
   case ISD::ROTL:               return visitRotate(N);
+  case ISD::FSHL:
+  case ISD::FSHR:               return visitFunnelShift(N);
   case ISD::ABS:                return visitABS(N);
   case ISD::BSWAP:              return visitBSWAP(N);
   case ISD::BITREVERSE:         return visitBITREVERSE(N);
@@ -1564,6 +1557,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FREM:               return visitFREM(N);
   case ISD::FSQRT:              return visitFSQRT(N);
   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
+  case ISD::FPOW:               return visitFPOW(N);
   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
@@ -1576,6 +1570,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FFLOOR:             return visitFFLOOR(N);
   case ISD::FMINNUM:            return visitFMINNUM(N);
   case ISD::FMAXNUM:            return visitFMAXNUM(N);
+  case ISD::FMINIMUM:           return visitFMINIMUM(N);
+  case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
   case ISD::FCEIL:              return visitFCEIL(N);
   case ISD::FTRUNC:             return visitFTRUNC(N);
   case ISD::BRCOND:             return visitBRCOND(N);
@@ -1855,8 +1851,11 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   // can be tried again once they have new operands.
   AddUsersToWorklist(N);
   do {
+    // Do as a single replacement to avoid rewalking use lists.
+    SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
+      Ops.push_back(N->getOperand(i));
+    DAG.ReplaceAllUsesWith(N, Ops.data());
   } while (!N->use_empty());
   deleteAndRecombine(N);
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1870,17 +1869,7 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
 }
 
 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
-  auto BinOpcode = BO->getOpcode();
-  assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
-          BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
-          BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
-          BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
-          BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
-          BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
-          BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
-          BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
-          BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
-         "Unexpected binary operator");
+  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
 
   // Don't do this unless the old select is going away. We want to eliminate the
   // binary operator, not replace a binop with a select.
@@ -1910,11 +1899,11 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
   // propagate non constant operands into select. I.e.:
   // and (select Cond, 0, -1), X --> select Cond, 0, X
   // or X, (select Cond, -1, 0) --> select Cond, -1, X
-  bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
-                         (isNullConstantOrNullSplatConstant(CT) ||
-                          isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
-                         (isNullConstantOrNullSplatConstant(CF) ||
-                          isAllOnesConstantOrAllOnesSplatConstant(CF));
+  auto BinOpcode = BO->getOpcode();
+  bool CanFoldNonConst =
+      (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+      (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
+      (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
 
   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
   if (!CanFoldNonConst &&
@@ -2009,10 +1998,8 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
     return SDValue();
 
   // The shift must be of a 'not' value.
-  // TODO: Use isBitwiseNot() if it works with vectors.
   SDValue Not = ShiftOp.getOperand(0);
-  if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
-      !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+  if (!Not.hasOneUse() || !isBitwiseNot(Not))
     return SDValue();
 
   // The shift must be moving the sign bit to the least-significant-bit.
@@ -2085,7 +2072,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     //   add (zext i1 X), -1 -> sext (not i1 X)
     // because most (?) targets generate better code for the zext form.
     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
-        isOneConstantOrOneSplatConstant(N1)) {
+        isOneOrOneSplat(N1)) {
       SDValue X = N0.getOperand(0);
       if ((!LegalOperations ||
            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
@@ -2110,17 +2097,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return NewSel;
 
   // reassociate add
-  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
+  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
     return RADD;
 
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB &&
-      isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
 
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB &&
-      isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
 
   // fold (A+(B-A)) -> B
@@ -2178,7 +2163,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
 
   // fold (add (xor a, -1), 1) -> (sub 0, a)
-  if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
+  if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
                        N0.getOperand(0));
 
@@ -2191,6 +2176,49 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitADDSAT(SDNode *N) {
+  unsigned Opcode = N->getOpcode();
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  SDLoc DL(N);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    // TODO SimplifyVBinOp
+
+    // fold (add_sat x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+  }
+
+  // fold (add_sat x, undef) -> -1
+  if (N0.isUndef() || N1.isUndef())
+    return DAG.getAllOnesConstant(DL, VT);
+
+  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+    // canonicalize constant to RHS
+    if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+      return DAG.getNode(Opcode, DL, VT, N1, N0);
+    // fold (add_sat c1, c2) -> c3
+    return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
+                                      N1.getNode());
+  }
+
+  // fold (add_sat x, 0) -> x
+  if (isNullConstant(N1))
+    return N0;
+
+  // If it cannot overflow, transform into an add.
+  if (Opcode == ISD::UADDSAT)
+    if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+      return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+
+  return SDValue();
+}
+
 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
   bool Masked = false;
 
@@ -2235,7 +2263,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+      isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
     return DAG.getNode(ISD::SUB, DL, VT, N0,
                        DAG.getNode(ISD::SHL, DL, VT,
                                    N1.getOperand(0).getOperand(1),
@@ -2248,8 +2276,7 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
 
     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
     // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits &&
-        isOneConstantOrOneSplatConstant(N1->getOperand(1)))
+    if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
   }
 
@@ -2380,7 +2407,7 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
                      DAG.getConstant(0, DL, CarryVT));
 
   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
-  if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
+  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
                               DAG.getConstant(0, DL, VT),
                               N0.getOperand(0));
@@ -2539,8 +2566,7 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
 // Since it may not be valid to emit a fold to zero for vector initializers
 // check if we can before folding.
 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
-                             SelectionDAG &DAG, bool LegalOperations,
-                             bool LegalTypes) {
+                             SelectionDAG &DAG, bool LegalOperations) {
   if (!VT.isVector())
     return DAG.getConstant(0, DL, VT);
   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
@@ -2567,7 +2593,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (sub x, x) -> 0
   // FIXME: Refactor this and xor and other similar operations together.
   if (N0 == N1)
-    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
+    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
     // fold (sub c1, c2) -> c1-c2
@@ -2586,7 +2612,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   }
 
-  if (isNullConstantOrNullSplatConstant(N0)) {
+  if (isNullOrNullSplat(N0)) {
     unsigned BitWidth = VT.getScalarSizeInBits();
     // Right-shifting everything out but the sign bit followed by negation is
     // the same as flipping arithmetic/logical shift type without the negation:
@@ -2617,12 +2643,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   }
 
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
-  if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+  if (isAllOnesOrAllOnesSplat(N0))
     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
 
   // fold (A - (0-B)) -> A+B
-  if (N1.getOpcode() == ISD::SUB &&
-      isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
 
   // fold A-(A-B) -> B
@@ -2676,14 +2701,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (X - (-Y * Z)) -> (X + (Y * Z))
   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
-        isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
+        isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
                                 N1.getOperand(0).getOperand(1),
                                 N1.getOperand(1));
       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
     }
     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
-        isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
+        isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
                                 N1.getOperand(0),
                                 N1.getOperand(1).getOperand(1));
@@ -2756,6 +2781,43 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  SDLoc DL(N);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    // TODO SimplifyVBinOp
+
+    // fold (sub_sat x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+  }
+
+  // fold (sub_sat x, undef) -> 0
+  if (N0.isUndef() || N1.isUndef())
+    return DAG.getConstant(0, DL, VT);
+
+  // fold (sub_sat x, x) -> 0
+  if (N0 == N1)
+    return DAG.getConstant(0, DL, VT);
+
+  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+      DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+    // fold (sub_sat c1, c2) -> c3
+    return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
+                                      N1.getNode());
+  }
+
+  // fold (sub_sat x, 0) -> x
+  if (isNullConstant(N1))
+    return N0;
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2931,6 +2993,39 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                       getShiftAmountTy(N0.getValueType()))));
   }
 
+  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+  // mul x, (2^N + 1) --> add (shl x, N), x
+  // mul x, (2^N - 1) --> sub (shl x, N), x
+  // Examples: x * 33 --> (x << 5) + x
+  //           x * 15 --> (x << 4) - x
+  //           x * -33 --> -((x << 5) + x)
+  //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+    // TODO: We could handle more general decomposition of any constant by
+    //       having the target set a limit on number of ops and making a
+    //       callback to determine that sequence (similar to sqrt expansion).
+    unsigned MathOp = ISD::DELETED_NODE;
+    APInt MulC = ConstValue1.abs();
+    if ((MulC - 1).isPowerOf2())
+      MathOp = ISD::ADD;
+    else if ((MulC + 1).isPowerOf2())
+      MathOp = ISD::SUB;
+
+    if (MathOp != ISD::DELETED_NODE) {
+      unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
+                                          : (MulC + 1).logBase2();
+      assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
+             "Not expecting multiply-by-constant that could have simplified");
+      SDLoc DL(N);
+      SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
+                                DAG.getConstant(ShAmt, DL, VT));
+      SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+      if (ConstValue1.isNegative())
+        R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
+      return R;
+    }
+  }
+
   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   if (N0.getOpcode() == ISD::SHL &&
       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
@@ -2974,7 +3069,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                      N0.getOperand(1), N1));
 
   // reassociate mul
-  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
+  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
     return RMUL;
 
   return SDValue();
@@ -3076,7 +3171,16 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+  unsigned Opc = N->getOpcode();
+  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+  // X / undef -> undef
+  // X % undef -> undef
+  // X / 0 -> undef
+  // X % 0 -> undef
+  // NOTE: This includes vectors where any divisor element is zero/undef.
+  if (DAG.isUndef(Opc, {N0, N1}))
     return DAG.getUNDEF(VT);
 
   // undef / X -> 0
@@ -3084,6 +3188,26 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
   if (N0.isUndef())
     return DAG.getConstant(0, DL, VT);
 
+  // 0 / X -> 0
+  // 0 % X -> 0
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  if (N0C && N0C->isNullValue())
+    return N0;
+
+  // X / X -> 1
+  // X % X -> 0
+  if (N0 == N1)
+    return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
+
+  // X / 1 -> X
+  // X % 1 -> 0
+  // If this is a boolean op (single-bit element type), we can't have
+  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
+  // it's a 1.
+  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+    return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
+
   return SDValue();
 }
 
@@ -3105,9 +3229,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
-  // fold (sdiv X, 1) -> X
-  if (N1C && N1C->isOne())
-    return N0;
   // fold (sdiv X, -1) -> 0-X
   if (N1C && N1C->isAllOnesValue())
     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
@@ -3128,8 +3249,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
 
-  if (SDValue V = visitSDIVLike(N0, N1, N))
+  if (SDValue V = visitSDIVLike(N0, N1, N)) {
+    // If the corresponding remainder node exists, update its users with
+    // (Dividend - (Quotient * Divisor).
+    if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
+                                              { N0, N1 })) {
+      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+      AddToWorklist(Mul.getNode());
+      AddToWorklist(Sub.getNode());
+      CombineTo(RemNode, Sub);
+    }
     return V;
+  }
 
   // sdiv, srem -> sdivrem
   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
@@ -3148,8 +3280,6 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   EVT CCVT = getSetCCResultType(VT);
   unsigned BitWidth = VT.getScalarSizeInBits();
 
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
   // Helper for determining whether a value is a power-2 constant scalar or a
   // vector of such elements.
   auto IsPowerOfTwo = [](ConstantSDNode *C) {
@@ -3166,8 +3296,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // FIXME: We check for the exact bit here because the generic lowering gives
   // better results in that case. The target-specific lowering should learn how
   // to handle exact sdivs efficiently.
-  if (!N->getFlags().hasExact() &&
-      ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
+  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
     // Target-specific implementation of sdiv x, pow2.
     if (SDValue Res = BuildSDIVPow2(N))
       return Res;
@@ -3218,7 +3347,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // alternate sequence.  Targets may check function attributes for size/speed
   // trade-offs.
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+  if (isConstantOrConstantVector(N1) &&
+      !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildSDIV(N))
       return Op;
 
@@ -3245,9 +3375,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
                                                     N0C, N1C))
       return Folded;
-  // fold (udiv X, 1) -> X
-  if (N1C && N1C->isOne())
-    return N0;
   // fold (udiv X, -1) -> select(X == -1, 1, 0)
   if (N1C && N1C->getAPIntValue().isAllOnesValue())
     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3260,8 +3387,19 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (SDValue V = visitUDIVLike(N0, N1, N))
+  if (SDValue V = visitUDIVLike(N0, N1, N)) {
+    // If the corresponding remainder node exists, update its users with
+    // (Dividend - (Quotient * Divisor).
+    if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
+                                              { N0, N1 })) {
+      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+      AddToWorklist(Mul.getNode());
+      AddToWorklist(Sub.getNode());
+      CombineTo(RemNode, Sub);
+    }
     return V;
+  }
 
   // sdiv, srem -> sdivrem
   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
@@ -3278,8 +3416,6 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
 
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
-
   // fold (udiv x, (1 << c)) -> x >>u c
   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
       DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -3311,7 +3447,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
 
   // fold (udiv x, c) -> alternate
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+  if (isConstantOrConstantVector(N1) &&
+      !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildUDIV(N))
       return Op;
 
@@ -3380,8 +3517,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
     SDValue OptimizedDiv =
         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
-    if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
-        OptimizedDiv.getOpcode() != ISD::SDIVREM) {
+    if (OptimizedDiv.getNode()) {
+      // If the equivalent Div node also exists, update its users.
+      unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+      if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
+                                                { N0, N1 }))
+        CombineTo(DivNode, OptimizedDiv);
       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
       AddToWorklist(OptimizedDiv.getNode());
@@ -3468,6 +3609,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   if (N0.isUndef() || N1.isUndef())
     return DAG.getConstant(0, DL, VT);
 
+  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
+  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+      DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
+    SDLoc DL(N);
+    unsigned NumEltBits = VT.getScalarSizeInBits();
+    SDValue LogBase2 = BuildLogBase2(N1, DL);
+    SDValue SRLAmt = DAG.getNode(
+        ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+    SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+  }
+
   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   // plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
@@ -3495,18 +3649,16 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                                 unsigned HiOp) {
   // If the high half is not needed, just compute the low half.
   bool HiExists = N->hasAnyUseOfValue(1);
-  if (!HiExists &&
-      (!LegalOperations ||
-       TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
+  if (!HiExists && (!LegalOperations ||
+                    TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
     return CombineTo(N, Res, Res);
   }
 
   // If the low half is not needed, just compute the high half.
   bool LoExists = N->hasAnyUseOfValue(0);
-  if (!LoExists &&
-      (!LegalOperations ||
-       TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+  if (!LoExists && (!LegalOperations ||
+                    TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
     return CombineTo(N, Res, Res);
   }
@@ -3522,7 +3674,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
     SDValue LoOpt = combine(Lo.getNode());
     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
         (!LegalOperations ||
-         TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+         TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
       return CombineTo(N, LoOpt, LoOpt);
   }
 
@@ -3532,7 +3684,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
     SDValue HiOpt = combine(Hi.getNode());
     if (HiOpt.getNode() && HiOpt != Hi &&
         (!LegalOperations ||
-         TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+         TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
       return CombineTo(N, HiOpt, HiOpt);
   }
 
@@ -3664,59 +3816,94 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   return SDValue();
 }
 
-/// If this is a binary operator with two operands of the same opcode, try to
-/// simplify it.
-SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+/// If this is a bitwise logic instruction and both operands have the same
+/// opcode, try to sink the other opcode after the logic instruction.
+SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+  unsigned LogicOpcode = N->getOpcode();
+  unsigned HandOpcode = N0.getOpcode();
+  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+          LogicOpcode == ISD::XOR) && "Expected logic opcode");
+  assert(HandOpcode == N1.getOpcode() && "Bad input!");
 
   // Bail early if none of these transforms apply.
-  if (N0.getNumOperands() == 0) return SDValue();
-
-  // For each of OP in AND/OR/XOR:
-  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
-  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
-  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
-  //
-  // do not sink logical op inside of a vector extend, since it may combine
-  // into a vsetcc.
-  EVT Op0VT = N0.getOperand(0).getValueType();
-  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
-       N0.getOpcode() == ISD::SIGN_EXTEND ||
-       N0.getOpcode() == ISD::BSWAP ||
-       // Avoid infinite looping with PromoteIntBinOp.
-       (N0.getOpcode() == ISD::ANY_EXTEND &&
-        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
-       (N0.getOpcode() == ISD::TRUNCATE &&
-        (!TLI.isZExtFree(VT, Op0VT) ||
-         !TLI.isTruncateFree(Op0VT, VT)) &&
-        TLI.isTypeLegal(Op0VT))) &&
-      !VT.isVector() &&
-      Op0VT == N1.getOperand(0).getValueType() &&
-      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
-    SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
-                                 N0.getOperand(0).getValueType(),
-                                 N0.getOperand(0), N1.getOperand(0));
-    AddToWorklist(ORNode.getNode());
-    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
-  }
-
-  // For each of OP in SHL/SRL/SRA/AND...
-  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
-  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
-  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
-  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
-       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+  if (N0.getNumOperands() == 0)
+    return SDValue();
+
+  // FIXME: We should check number of uses of the operands to not increase
+  //        the instruction count for all transforms.
+
+  // Handle size-changing casts.
+  SDValue X = N0.getOperand(0);
+  SDValue Y = N1.getOperand(0);
+  EVT XVT = X.getValueType();
+  SDLoc DL(N);
+  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
+      HandOpcode == ISD::SIGN_EXTEND) {
+    // If both operands have other uses, this transform would create extra
+    // instructions without eliminating anything.
+    if (!N0.hasOneUse() && !N1.hasOneUse())
+      return SDValue();
+    // We need matching integer source types.
+    if (XVT != Y.getValueType())
+      return SDValue();
+    // Don't create an illegal op during or after legalization. Don't ever
+    // create an unsupported vector op.
+    if ((VT.isVector() || LegalOperations) &&
+        !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
+      return SDValue();
+    // Avoid infinite looping with PromoteIntBinOp.
+    // TODO: Should we apply desirable/legal constraints to all opcodes?
+    if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
+        !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+      return SDValue();
+    // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
+    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+    return DAG.getNode(HandOpcode, DL, VT, Logic);
+  }
+
+  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
+  if (HandOpcode == ISD::TRUNCATE) {
+    // If both operands have other uses, this transform would create extra
+    // instructions without eliminating anything.
+    if (!N0.hasOneUse() && !N1.hasOneUse())
+      return SDValue();
+    // We need matching source types.
+    if (XVT != Y.getValueType())
+      return SDValue();
+    // Don't create an illegal op during or after legalization.
+    if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
+      return SDValue();
+    // Be extra careful sinking truncate. If it's free, there's no benefit in
+    // widening a binop. Also, don't create a logic op on an illegal type.
+    if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
+      return SDValue();
+    if (!TLI.isTypeLegal(XVT))
+      return SDValue();
+    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+    return DAG.getNode(HandOpcode, DL, VT, Logic);
+  }
+
+  // For binops SHL/SRL/SRA/AND:
+  //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
+  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
+       HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
       N0.getOperand(1) == N1.getOperand(1)) {
-    SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
-                                 N0.getOperand(0).getValueType(),
-                                 N0.getOperand(0), N1.getOperand(0));
-    AddToWorklist(ORNode.getNode());
-    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                       ORNode, N0.getOperand(1));
+    // If either operand has other uses, this transform is not an improvement.
+    if (!N0.hasOneUse() || !N1.hasOneUse())
+      return SDValue();
+    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+    return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
+  }
+
+  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
+  if (HandOpcode == ISD::BSWAP) {
+    // If either operand has other uses, this transform is not an improvement.
+    if (!N0.hasOneUse() || !N1.hasOneUse())
+      return SDValue();
+    SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+    return DAG.getNode(HandOpcode, DL, VT, Logic);
   }
 
   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
@@ -3726,21 +3913,12 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // we don't want to undo this promotion.
   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   // on scalars.
-  if ((N0.getOpcode() == ISD::BITCAST ||
-       N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
        Level <= AfterLegalizeTypes) {
-    SDValue In0 = N0.getOperand(0);
-    SDValue In1 = N1.getOperand(0);
-    EVT In0Ty = In0.getValueType();
-    EVT In1Ty = In1.getValueType();
-    SDLoc DL(N);
-    // If both incoming values are integers, and the original types are the
-    // same.
-    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
-      SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
-      SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
-      AddToWorklist(Op.getNode());
-      return BC;
+    // Input types must be integer and the same.
+    if (XVT.isInteger() && XVT == Y.getValueType()) {
+      SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+      return DAG.getNode(HandOpcode, DL, VT, Logic);
     }
   }
 
@@ -3756,61 +3934,44 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // If both shuffles use the same mask, and both shuffles have the same first
   // or second operand, then it might still be profitable to move the shuffle
   // after the xor/and/or operation.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
-    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
-    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
-
-    assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+    auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
+    auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
+    assert(X.getValueType() == Y.getValueType() &&
            "Inputs to shuffles are not the same type");
 
     // Check that both shuffles use the same mask. The masks are known to be of
     // the same length because the result vector type is the same.
     // Check also that shuffles have only one use to avoid introducing extra
     // instructions.
-    if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
-        SVN0->getMask().equals(SVN1->getMask())) {
-      SDValue ShOp = N0->getOperand(1);
-
-      // Don't try to fold this node if it requires introducing a
-      // build vector of all zeros that might be illegal at this stage.
-      if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
-        if (!LegalTypes)
-          ShOp = DAG.getConstant(0, SDLoc(N), VT);
-        else
-          ShOp = SDValue();
-      }
+    if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
+        !SVN0->getMask().equals(SVN1->getMask()))
+      return SDValue();
 
-      // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
-      // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
-      // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
-      if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
-        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                                      N0->getOperand(0), N1->getOperand(0));
-        AddToWorklist(NewNode.getNode());
-        return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
-                                    SVN0->getMask());
-      }
+    // Don't try to fold this node if it requires introducing a
+    // build vector of all zeros that might be illegal at this stage.
+    SDValue ShOp = N0.getOperand(1);
+    if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+      ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
 
-      // Don't try to fold this node if it requires introducing a
-      // build vector of all zeros that might be illegal at this stage.
-      ShOp = N0->getOperand(0);
-      if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
-        if (!LegalTypes)
-          ShOp = DAG.getConstant(0, SDLoc(N), VT);
-        else
-          ShOp = SDValue();
-      }
+    // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
+    if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+      SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
+                                  N0.getOperand(0), N1.getOperand(0));
+      return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
+    }
 
-      // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
-      // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
-      // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
-      if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
-        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                                      N0->getOperand(1), N1->getOperand(1));
-        AddToWorklist(NewNode.getNode());
-        return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
-                                    SVN0->getMask());
-      }
+    // Don't try to fold this node if it requires introducing a
+    // build vector of all zeros that might be illegal at this stage.
+    ShOp = N0.getOperand(0);
+    if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
+      ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
+
+    // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
+    if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
+      SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
+                                  N1.getOperand(1));
+      return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
     }
   }
 
@@ -3846,8 +4007,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
   bool IsInteger = OpVT.isInteger();
   if (LR == RR && CC0 == CC1 && IsInteger) {
-    bool IsZero = isNullConstantOrNullSplatConstant(LR);
-    bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+    bool IsZero = isNullOrNullSplat(LR);
+    bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
 
     // All bits clear?
     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
@@ -4149,7 +4310,7 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
 }
 
 bool DAGCombiner::SearchForAndLoads(SDNode *N,
-                                    SmallPtrSetImpl<LoadSDNode*> &Loads,
+                                    SmallVectorImpl<LoadSDNode*> &Loads,
                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
                                     ConstantSDNode *Mask,
                                     SDNode *&NodeToMask) {
@@ -4186,7 +4347,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
 
         // Use LE to convert equal sized loads to zext.
         if (ExtVT.bitsLE(Load->getMemoryVT()))
-          Loads.insert(Load);
+          Loads.push_back(Load);
 
         continue;
       }
@@ -4251,7 +4412,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
   if (isa<LoadSDNode>(N->getOperand(0)))
     return false;
 
-  SmallPtrSet<LoadSDNode*, 8> Loads;
+  SmallVector<LoadSDNode*, 8> Loads;
   SmallPtrSet<SDNode*, 2> NodesWithConsts;
   SDNode *FixupNode = nullptr;
   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
@@ -4399,7 +4560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
-     !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   // fold (and x, -1) -> x
   if (isAllOnesConstant(N1))
@@ -4414,7 +4575,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return NewSel;
 
   // reassociate and
-  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
+  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
     return RAND;
 
   // Try to convert a constant mask AND into a shuffle clear mask.
@@ -4563,9 +4724,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue Res = ReduceLoadWidth(N)) {
       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
-
       AddToWorklist(N);
-      CombineTo(LN0, Res, Res.getValue(1));
+      DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
       return SDValue(N, 0);
     }
   }
@@ -4585,8 +4745,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
   if (N0.getOpcode() == N1.getOpcode())
-    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
-      return Tmp;
+    if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+      return V;
 
   // Masking the negated extension of a boolean is just the zero-extended
   // boolean:
@@ -4596,7 +4756,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // Note: the SimplifyDemandedBits fold below can make an information-losing
   // transform, and then we have no way to find this better fold.
   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
-    if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
+    if (isNullOrNullSplat(N0.getOperand(0))) {
       SDValue SubRHS = N0.getOperand(1);
       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
@@ -5124,16 +5284,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return BSwap;
 
   // reassociate or
-  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
+  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
     return ROR;
 
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
-  // iff (c1 & c2) != 0.
-  auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
-    return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
+  // iff (c1 & c2) != 0 or c1/c2 are undef.
+  auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
+    return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
   };
   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
-      ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
+      ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
     if (SDValue COR = DAG.FoldConstantArithmetic(
             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
@@ -5144,8 +5304,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
 
   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   if (N0.getOpcode() == N1.getOpcode())
-    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
-      return Tmp;
+    if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+      return V;
 
   // See if this is some rotate idiom.
   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -5257,9 +5417,9 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
 
   // Compute the shift amount we need to extract to complete the rotate.
   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
-  APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
-  if (NeededShiftAmt.isNegative())
+  if (OppShiftCst->getAPIntValue().ugt(VTWidth))
     return SDValue();
+  APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
@@ -5340,8 +5500,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
   unsigned MaskLoBits = 0;
   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
-      KnownBits Known;
-      DAG.computeKnownBits(Neg.getOperand(0), Known);
+      KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
       unsigned Bits = Log2_64(EltSize);
       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
@@ -5363,8 +5522,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
   // Pos'.  The truncation is redundant for the purpose of the equality.
   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
-      KnownBits Known;
-      DAG.computeKnownBits(Pos.getOperand(0), Known);
+      KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
            MaskLoBits))
@@ -5894,7 +6052,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
   assert(N->getOpcode() == ISD::XOR);
 
   // Don't touch 'not' (i.e. where y = -1).
-  if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
+  if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -5911,7 +6069,7 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
     SDValue Xor0 = Xor.getOperand(0);
     SDValue Xor1 = Xor.getOperand(1);
     // Don't touch 'not' (i.e. where y = -1).
-    if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
+    if (isAllOnesOrAllOnesSplat(Xor1))
       return false;
     if (Other == Xor0)
       std::swap(Xor0, Xor1);
@@ -5977,8 +6135,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+  SDLoc DL(N);
   if (N0.isUndef() && N1.isUndef())
-    return DAG.getConstant(0, SDLoc(N), VT);
+    return DAG.getConstant(0, DL, VT);
   // fold (xor x, undef) -> undef
   if (N0.isUndef())
     return N0;
@@ -5988,11 +6147,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
   // canonicalize constant to RHS
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
-    return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
   // fold (xor x, 0) -> x
   if (isNullConstant(N1))
     return N0;
@@ -6001,19 +6160,18 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     return NewSel;
 
   // reassociate xor
-  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
+  if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)
+  unsigned N0Opcode = N0.getOpcode();
   SDValue LHS, RHS, CC;
   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
-    bool isInt = LHS.getValueType().isInteger();
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
-                                               isInt);
-
+                                               LHS.getValueType().isInteger());
     if (!LegalOperations ||
         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
-      switch (N0.getOpcode()) {
+      switch (N0Opcode) {
       default:
         llvm_unreachable("Unhandled SetCC Equivalent!");
       case ISD::SETCC:
@@ -6026,54 +6184,74 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
 
   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
-  if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
-      N0.getNode()->hasOneUse() &&
+  if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
     SDValue V = N0.getOperand(0);
-    SDLoc DL(N0);
-    V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
-                    DAG.getConstant(1, DL, V.getValueType()));
+    SDLoc DL0(N0);
+    V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
+                    DAG.getConstant(1, DL0, V.getValueType()));
     AddToWorklist(V.getNode());
-    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
   }
 
   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
-      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+      (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
-      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
-      return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+      return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
     }
   }
   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
-      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+      (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
-      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
-      return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+      return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
     }
   }
   // fold (xor (and x, y), y) -> (and (not x), y)
-  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
-      N0->getOperand(1) == N1) {
-    SDValue X = N0->getOperand(0);
+  if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
+    SDValue X = N0.getOperand(0);
     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
     AddToWorklist(NotX.getNode());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
+    return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
+  }
+
+  if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
+    ConstantSDNode *XorC = isConstOrConstSplat(N1);
+    ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
+    unsigned BitWidth = VT.getScalarSizeInBits();
+    if (XorC && ShiftC) {
+      // Don't crash on an oversized shift. We can not guarantee that a bogus
+      // shift has been simplified to undef.
+      uint64_t ShiftAmt = ShiftC->getLimitedValue();
+      if (ShiftAmt < BitWidth) {
+        APInt Ones = APInt::getAllOnesValue(BitWidth);
+        Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
+        if (XorC->getAPIntValue() == Ones) {
+          // If the xor constant is a shifted -1, do a 'not' before the shift:
+          // xor (X << ShiftC), XorC --> (not X) << ShiftC
+          // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
+          SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
+          return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
+        }
+      }
+    }
   }
 
   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
-    SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
-    SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
+    SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
+    SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
       SDValue S0 = S.getOperand(0);
@@ -6081,14 +6259,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
         unsigned OpSizeInBits = VT.getScalarSizeInBits();
         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
           if (C->getAPIntValue() == (OpSizeInBits - 1))
-            return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+            return DAG.getNode(ISD::ABS, DL, VT, S0);
       }
     }
   }
 
   // fold (xor x, x) -> 0
   if (N0 == N1)
-    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
 
   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
   // Here is a concrete example of this equivalence:
@@ -6108,17 +6286,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   //   consistent result.
   // - Pushing the zero left requires shifting one bits in from the right.
   // A rotate left of ~1 is a nice way of achieving the desired result.
-  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
-      && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
-    SDLoc DL(N);
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
+      isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
                        N0.getOperand(1));
   }
 
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
-  if (N0.getOpcode() == N1.getOpcode())
-    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
-      return Tmp;
+  if (N0Opcode == N1.getOpcode())
+    if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
+      return V;
 
   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
   if (SDValue MM = unfoldMaskedMerge(N))
@@ -6134,6 +6311,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 /// Handle transforms common to the three shifts, when the shift amount is a
 /// constant.
 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+  // Do not turn a 'not' into a regular xor.
+  if (isBitwiseNot(N->getOperand(0)))
+    return SDValue();
+
   SDNode *LHS = N->getOperand(0).getNode();
   if (!LHS->hasOneUse()) return SDValue();
 
@@ -6191,7 +6372,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
       return SDValue();
   }
 
-  if (!TLI.isDesirableToCommuteWithShift(LHS))
+  if (!TLI.isDesirableToCommuteWithShift(N, Level))
     return SDValue();
 
   // Fold the constants, shifting the binop RHS by the shift amount.
@@ -6239,9 +6420,16 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
   unsigned Bitsize = VT.getScalarSizeInBits();
 
   // fold (rot x, 0) -> x
-  if (isNullConstantOrNullSplatConstant(N1))
+  if (isNullOrNullSplat(N1))
     return N0;
 
+  // fold (rot x, c) -> x iff (c % BitSize) == 0
+  if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
+    APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
+    if (DAG.MaskedValueIsZero(N1, ModuloMask))
+      return N0;
+  }
+
   // fold (rot x, c) -> (rot x, c % BitSize)
   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
     if (Cst->getAPIntValue().uge(Bitsize)) {
@@ -6284,6 +6472,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
 SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
+  if (SDValue V = DAG.simplifyShift(N0, N1))
+    return V;
+
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
@@ -6318,22 +6509,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
-  // fold (shl 0, x) -> 0
-  if (isNullConstantOrNullSplatConstant(N0))
-    return N0;
-  // fold (shl x, c >= size(x)) -> undef
-  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
-  auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
-    return Val->getAPIntValue().uge(OpSizeInBits);
-  };
-  if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
-    return DAG.getUNDEF(VT);
-  // fold (shl x, 0) -> x
-  if (N1C && N1C->isNullValue())
-    return N0;
-  // fold (shl undef, x) -> 0
-  if (N0.isUndef())
-    return DAG.getConstant(0, SDLoc(N), VT);
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
@@ -6454,7 +6629,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   //                               (and (srl x, (sub c1, c2), MASK)
   // Only fold this if the inner shift has no other uses -- if it does, folding
   // this will increase the total number of instructions.
-  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+      TLI.shouldFoldShiftPairToMask(N, Level)) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t c1 = N0C1->getZExtValue();
       if (c1 < OpSizeInBits) {
@@ -6495,7 +6671,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
       N0.getNode()->hasOneUse() &&
       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
-      isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+      isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+      TLI.isDesirableToCommuteWithShift(N, Level)) {
     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
     AddToWorklist(Shl0.getNode());
@@ -6522,6 +6699,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
 SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
+  if (SDValue V = DAG.simplifyShift(N0, N1))
+    return V;
+
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
@@ -6542,16 +6722,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
-  // fold (sra x, c >= size(x)) -> undef
-  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
-  auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
-    return Val->getAPIntValue().uge(OpSizeInBits);
-  };
-  if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
-    return DAG.getUNDEF(VT);
-  // fold (sra x, 0) -> x
-  if (N1C && N1C->isNullValue())
-    return N0;
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
@@ -6571,31 +6741,30 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   }
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+  // clamp (add c1, c2) to max shift.
   if (N0.getOpcode() == ISD::SRA) {
     SDLoc DL(N);
     EVT ShiftVT = N1.getValueType();
+    EVT ShiftSVT = ShiftVT.getScalarType();
+    SmallVector<SDValue, 16> ShiftValues;
 
-    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
-                                          ConstantSDNode *RHS) {
+    auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
       APInt c1 = LHS->getAPIntValue();
       APInt c2 = RHS->getAPIntValue();
       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
-      return (c1 + c2).uge(OpSizeInBits);
-    };
-    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
-      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
-
-    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
-                                       ConstantSDNode *RHS) {
-      APInt c1 = LHS->getAPIntValue();
-      APInt c2 = RHS->getAPIntValue();
-      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
-      return (c1 + c2).ult(OpSizeInBits);
+      APInt Sum = c1 + c2;
+      unsigned ShiftSum =
+          Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
+      ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
+      return true;
     };
-    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
-      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
-      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
+    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
+      SDValue ShiftValue;
+      if (VT.isVector())
+        ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
+      else
+        ShiftValue = ShiftValues[0];
+      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
     }
   }
 
@@ -6689,6 +6858,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
 SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
+  if (SDValue V = DAG.simplifyShift(N0, N1))
+    return V;
+
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
@@ -6703,19 +6875,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
-  // fold (srl 0, x) -> 0
-  if (isNullConstantOrNullSplatConstant(N0))
-    return N0;
-  // fold (srl x, c >= size(x)) -> undef
-  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
-  auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
-    return Val->getAPIntValue().uge(OpSizeInBits);
-  };
-  if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
-    return DAG.getUNDEF(VT);
-  // fold (srl x, 0) -> x
-  if (N1C && N1C->isNullValue())
-    return N0;
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
@@ -6819,8 +6978,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
-    KnownBits Known;
-    DAG.computeKnownBits(N0.getOperand(0), Known);
+    KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
 
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
@@ -6906,6 +7064,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  bool IsFSHL = N->getOpcode() == ISD::FSHL;
+  unsigned BitWidth = VT.getScalarSizeInBits();
+
+  // fold (fshl N0, N1, 0) -> N0
+  // fold (fshr N0, N1, 0) -> N1
+  if (isPowerOf2_32(BitWidth))
+    if (DAG.MaskedValueIsZero(
+            N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+      return IsFSHL ? N0 : N1;
+
+  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+  if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+    if (Cst->getAPIntValue().uge(BitWidth)) {
+      uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
+      return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
+                         DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+    }
+  }
+
+  // fold (fshl N0, N0, N2) -> (rotl N0, N2)
+  // fold (fshr N0, N0, N2) -> (rotr N0, N2)
+  // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
+  // is legal as well we might be better off avoiding non-constant (BW - N2).
+  unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
+  if (N0 == N1 && hasOperation(RotOpc, VT))
+    return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -7012,6 +7205,16 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   return SDValue();
 }
 
+// FIXME: This should be checking for no signed zeros on individual operands, as
+// well as no nans.
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+  const TargetOptions &Options = DAG.getTarget().Options;
+  EVT VT = LHS.getValueType();
+
+  return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+         DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
+}
+
 /// Generate Min/Max node
 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
                                    SDValue RHS, SDValue True, SDValue False,
@@ -7020,6 +7223,7 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
     return SDValue();
 
+  EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   switch (CC) {
   case ISD::SETOLT:
   case ISD::SETOLE:
@@ -7027,8 +7231,15 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   case ISD::SETLE:
   case ISD::SETULT:
   case ISD::SETULE: {
+    // Since it's known never nan to get here already, either fminnum or
+    // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
+    // expanded in terms of it.
+    unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+    if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+      return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
-    if (TLI.isOperationLegal(Opcode, VT))
+    if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
     return SDValue();
   }
@@ -7038,8 +7249,12 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   case ISD::SETGE:
   case ISD::SETUGT:
   case ISD::SETUGE: {
+    unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+    if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
+      return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
+
     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
-    if (TLI.isOperationLegal(Opcode, VT))
+    if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
     return SDValue();
   }
@@ -7150,15 +7365,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   EVT VT0 = N0.getValueType();
   SDLoc DL(N);
 
-  // fold (select C, X, X) -> X
-  if (N1 == N2)
-    return N1;
-
-  if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
-    // fold (select true, X, Y) -> X
-    // fold (select false, X, Y) -> Y
-    return !N0C->isNullValue() ? N1 : N2;
-  }
+  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+    return V;
 
   // fold (select X, X, Y) -> (or X, Y)
   // fold (select X, 1, Y) -> (or C, Y)
@@ -7264,32 +7472,54 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
   }
 
-  // fold selects based on a setcc into other things, such as min/max/abs
+  // Fold selects based on a setcc into other things, such as min/max/abs.
   if (N0.getOpcode() == ISD::SETCC) {
-    // select x, y (fcmp lt x, y) -> fminnum x, y
-    // select x, y (fcmp gt x, y) -> fmaxnum x, y
-    //
-    // This is OK if we don't care about what happens if either operand is a
-    // NaN.
-    //
-
-    // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
-    // no signed zeros as well as no nans.
-    const TargetOptions &Options = DAG.getTarget().Options;
-    if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
-        DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
-      ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+    SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
 
-      if (SDValue FMinMax = combineMinNumMaxNum(
-              DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+    // select (fcmp lt x, y), x, y -> fminnum x, y
+    // select (fcmp gt x, y), x, y -> fmaxnum x, y
+    //
+    // This is OK if we don't care what happens if either operand is a NaN.
+    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+      if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
+                                                CC, TLI, DAG))
         return FMinMax;
+
+    // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
+    // This is conservatively limited to pre-legal-operations to give targets
+    // a chance to reverse the transform if they want to do that. Also, it is
+    // unlikely that the pattern would be formed late, so it's probably not
+    // worth going through the other checks.
+    if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
+        CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
+        N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
+      auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
+      auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
+      if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
+        // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
+        // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
+        //
+        // The IR equivalent of this transform would have this form:
+        //   %a = add %x, C
+        //   %c = icmp ugt %x, ~C
+        //   %r = select %c, -1, %a
+        //   =>
+        //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
+        //   %u0 = extractvalue %u, 0
+        //   %u1 = extractvalue %u, 1
+        //   %r = select %u1, -1, %u0
+        SDVTList VTs = DAG.getVTList(VT, VT0);
+        SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
+        return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
+      }
     }
 
-    if ((!LegalOperations &&
-         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
-        TLI.isOperationLegal(ISD::SELECT_CC, VT))
-      return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
-                         N0.getOperand(1), N1, N2, N0.getOperand(2));
+    if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
+        (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
+                         N0.getOperand(2));
+
     return SimplifySelect(DL, N0, N1, N2);
   }
 
@@ -7388,7 +7618,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
       TargetLowering::TypeSplitVector)
     return SDValue();
-  SDValue MaskLo, MaskHi, Lo, Hi;
+  SDValue MaskLo, MaskHi;
   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
   EVT LoVT, HiVT;
@@ -7416,17 +7646,15 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
                           Alignment, MSC->getAAInfo(), MSC->getRanges());
 
   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
-  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
-                            DL, OpsLo, MMO);
+  SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+                                    DataLo.getValueType(), DL, OpsLo, MMO);
 
-  SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
-  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
-                            DL, OpsHi, MMO);
-
-  AddToWorklist(Lo.getNode());
-  AddToWorklist(Hi.getNode());
-
-  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+  // The order of the Scatter operation after split is well defined. The "Hi"
+  // part comes after the "Lo". So these two operations should be chained one
+  // after another.
+  SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
+  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                              DL, OpsHi, MMO);
 }
 
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
@@ -7525,9 +7753,9 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   SDValue MaskLo, MaskHi, Lo, Hi;
   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
-  SDValue Src0 = MGT->getValue();
-  SDValue Src0Lo, Src0Hi;
-  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+  SDValue PassThru = MGT->getPassThru();
+  SDValue PassThruLo, PassThruHi;
+  std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
 
   EVT LoVT, HiVT;
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
@@ -7550,11 +7778,11 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                           Alignment, MGT->getAAInfo(), MGT->getRanges());
 
-  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
+  SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
                            MMO);
 
-  SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
+  SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
                            MMO);
 
@@ -7599,9 +7827,9 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
     SDValue MaskLo, MaskHi, Lo, Hi;
     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
-    SDValue Src0 = MLD->getSrc0();
-    SDValue Src0Lo, Src0Hi;
-    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+    SDValue PassThru = MLD->getPassThru();
+    SDValue PassThruLo, PassThruHi;
+    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
 
     EVT LoVT, HiVT;
     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
@@ -7625,8 +7853,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
-    Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
-                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
+    Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
+                           MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
 
     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
                                      MLD->isExpandingLoad());
@@ -7637,8 +7865,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
         MLD->getAAInfo(), MLD->getRanges());
 
-    Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
-                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
+    Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
+                           MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
 
     AddToWorklist(Lo.getNode());
     AddToWorklist(Hi.getNode());
@@ -7717,9 +7945,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   SDLoc DL(N);
 
-  // fold (vselect C, X, X) -> X
-  if (N1 == N2)
-    return N1;
+  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
+    return V;
 
   // Canonicalize integer abs.
   // vselect (setg[te] X,  0),  X, -X ->
@@ -7754,12 +7981,26 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
     }
 
+    // vselect x, y (fcmp lt x, y) -> fminnum x, y
+    // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
+    //
+    // This is OK if we don't care about what happens if either operand is a
+    // NaN.
+    //
+    EVT VT = N->getValueType(0);
+    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+      if (SDValue FMinMax = combineMinNumMaxNum(
+            DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+        return FMinMax;
+    }
+
     // If this select has a condition (setcc) with narrower operands than the
     // select, try to widen the compare to match the select width.
     // TODO: This should be extended to handle any constant.
     // TODO: This could be extended to handle non-loading patterns, but that
     //       requires thorough testing to avoid regressions.
-    if (isNullConstantOrNullSplatConstant(RHS)) {
+    if (isNullOrNullSplat(RHS)) {
       EVT NarrowVT = LHS.getValueType();
       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
@@ -7902,9 +8143,8 @@ SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
 /// Vector extends are not folded if operations are legal; this is to
 /// avoid introducing illegal build_vector dag nodes.
-static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
-                                         SelectionDAG &DAG, bool LegalTypes,
-                                         bool LegalOperations) {
+static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+                                         SelectionDAG &DAG, bool LegalTypes) {
   unsigned Opcode = N->getOpcode();
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -7918,16 +8158,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   // fold (zext c1) -> c1
   // fold (aext c1) -> c1
   if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+    return DAG.getNode(Opcode, SDLoc(N), VT, N0);
 
   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
   EVT SVT = VT.getScalarType();
-  if (!(VT.isVector() &&
-      (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+  if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
-    return nullptr;
+    return SDValue();
 
   // We can fold this node into a build_vector.
   unsigned VTBits = SVT.getSizeInBits();
@@ -7936,10 +8175,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
 
-  for (unsigned i=0; i != NumElts; ++i) {
-    SDValue Op = N0->getOperand(i);
-    if (Op->isUndef()) {
-      Elts.push_back(DAG.getUNDEF(SVT));
+  // For zero-extensions, UNDEF elements still guarantee to have the upper
+  // bits set to zero.
+  bool IsZext =
+      Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    SDValue Op = N0.getOperand(i);
+    if (Op.isUndef()) {
+      Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
       continue;
     }
 
@@ -7953,7 +8197,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
   }
 
-  return DAG.getBuildVector(VT, DL, Elts).getNode();
+  return DAG.getBuildVector(VT, DL, Elts);
 }
 
 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
@@ -8269,7 +8513,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
 
   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   EVT MemVT = LN0->getMemoryVT();
-  if ((LegalOperations || LN0->isVolatile()) &&
+  if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
     return {};
 
@@ -8359,9 +8603,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
-                                              LegalOperations))
-    return SDValue(Res, 0);
+  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+    return Res;
 
   // fold (sext (sext x)) -> (sext x)
   // fold (sext (aext x)) -> (sext x)
@@ -8498,21 +8741,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       // if this is the case.
       EVT SVT = getSetCCResultType(N00VT);
 
-      // We know that the # elements of the results is the same as the
-      // # elements of the compare (and the # elements of the compare result
-      // for that matter).  Check to see that they are the same size.  If so,
-      // we know that the element size of the sext'd result matches the
-      // element size of the compare operands.
-      if (VT.getSizeInBits() == SVT.getSizeInBits())
-        return DAG.getSetCC(DL, VT, N00, N01, CC);
-
-      // If the desired elements are smaller or larger than the source
-      // elements, we can use a matching integer vector type and then
-      // truncate/sign extend.
-      EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
-      if (SVT == MatchingVecType) {
-        SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
-        return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+      // If we already have the desired type, don't change it.
+      if (SVT != N0.getValueType()) {
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        if (VT.getSizeInBits() == SVT.getSizeInBits())
+          return DAG.getSetCC(DL, VT, N00, N01, CC);
+
+        // If the desired elements are smaller or larger than the source
+        // elements, we can use a matching integer vector type and then
+        // truncate/sign extend.
+        EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+        if (SVT == MatchingVecType) {
+          SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+          return DAG.getSExtOrTrunc(VsetCC, DL, VT);
+        }
       }
     }
 
@@ -8569,40 +8815,37 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
                          KnownBits &Known) {
   if (N->getOpcode() == ISD::TRUNCATE) {
     Op = N->getOperand(0);
-    DAG.computeKnownBits(Op, Known);
+    Known = DAG.computeKnownBits(Op);
     return true;
   }
 
-  if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
-      cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+  if (N.getOpcode() != ISD::SETCC ||
+      N.getValueType().getScalarType() != MVT::i1 ||
+      cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
     return false;
 
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   assert(Op0.getValueType() == Op1.getValueType());
 
-  if (isNullConstant(Op0))
+  if (isNullOrNullSplat(Op0))
     Op = Op1;
-  else if (isNullConstant(Op1))
+  else if (isNullOrNullSplat(Op1))
     Op = Op0;
   else
     return false;
 
-  DAG.computeKnownBits(Op, Known);
+  Known = DAG.computeKnownBits(Op);
 
-  if (!(Known.Zero | 1).isAllOnesValue())
-    return false;
-
-  return true;
+  return (Known.Zero | 1).isAllOnesValue();
 }
 
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
-                                              LegalOperations))
-    return SDValue(Res, 0);
+  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+    return Res;
 
   // fold (zext (zext x)) -> (zext x)
   // fold (zext (aext x)) -> (zext x)
@@ -8613,17 +8856,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (truncate x)) -> (zext x) or
   //      (zext (truncate x)) -> (truncate x)
   // This is valid when the truncated bits of x are already zero.
-  // FIXME: We should extend this to work for vectors too.
   SDValue Op;
   KnownBits Known;
-  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
+  if (isTruncateOf(DAG, N0, Op, Known)) {
     APInt TruncatedBits =
-      (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
-      APInt(Op.getValueSizeInBits(), 0) :
-      APInt::getBitsSet(Op.getValueSizeInBits(),
-                        N0.getValueSizeInBits(),
-                        std::min(Op.getValueSizeInBits(),
-                                 VT.getSizeInBits()));
+      (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
+      APInt(Op.getScalarValueSizeInBits(), 0) :
+      APInt::getBitsSet(Op.getScalarValueSizeInBits(),
+                        N0.getScalarValueSizeInBits(),
+                        std::min(Op.getScalarValueSizeInBits(),
+                                 VT.getScalarSizeInBits()));
     if (TruncatedBits.isSubsetOf(Known.Zero))
       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
   }
@@ -8851,9 +9093,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
-                                              LegalOperations))
-    return SDValue(Res, 0);
+  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+    return Res;
 
   // fold (aext (aext x)) -> (aext x)
   // fold (aext (zext x)) -> (zext x)
@@ -8968,17 +9209,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
                              N0.getOperand(1),
                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
       // If the desired elements are smaller or larger than the source
       // elements we can use a matching integer vector type and then
       // truncate/any extend
-      else {
-        EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
-        SDValue VsetCC =
-          DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
-                        N0.getOperand(1),
-                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
-        return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
-      }
+      EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
+      SDValue VsetCC =
+        DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+                      N0.getOperand(1),
+                      cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
     }
 
     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -9025,6 +9265,26 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
   }
 
+  // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
+  // than X. Just move the AssertZext in front of the truncate and drop the
+  // AssertSExt.
+  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+      N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+      Opcode == ISD::AssertZext) {
+    SDValue BigA = N0.getOperand(0);
+    EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+    assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+           "Asserting zero/sign-extended bits to a type larger than the "
+           "truncated destination does not provide information");
+
+    if (AssertVT.bitsLT(BigA_AssertVT)) {
+      SDLoc DL(N);
+      SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+                                      BigA.getOperand(0), N1);
+      return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+    }
+  }
+
   return SDValue();
 }
 
@@ -9046,6 +9306,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (VT.isVector())
     return SDValue();
 
+  unsigned ShAmt = 0;
+  bool HasShiftedOffset = false;
   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   // extended to VT.
   if (Opc == ISD::SIGN_EXTEND_INREG) {
@@ -9073,15 +9335,25 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   } else if (Opc == ISD::AND) {
     // An AND with a constant mask is the same as a truncate + zero-extend.
     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
-    if (!AndC || !AndC->getAPIntValue().isMask())
+    if (!AndC)
+      return SDValue();
+
+    const APInt &Mask = AndC->getAPIntValue();
+    unsigned ActiveBits = 0;
+    if (Mask.isMask()) {
+      ActiveBits = Mask.countTrailingOnes();
+    } else if (Mask.isShiftedMask()) {
+      ShAmt = Mask.countTrailingZeros();
+      APInt ShiftedMask = Mask.lshr(ShAmt);
+      ActiveBits = ShiftedMask.countTrailingOnes();
+      HasShiftedOffset = true;
+    } else
       return SDValue();
 
-    unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
     ExtType = ISD::ZEXTLOAD;
     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   }
 
-  unsigned ShAmt = 0;
   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
     SDValue SRL = N0;
     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
@@ -9150,13 +9422,16 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
     return SDValue();
 
-  // For big endian targets, we need to adjust the offset to the pointer to
-  // load the correct bytes.
-  if (DAG.getDataLayout().isBigEndian()) {
+  auto AdjustBigEndianShift = [&](unsigned ShAmt) {
     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
-    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
-  }
+    return LVTStoreBits - EVTStoreBits - ShAmt;
+  };
+
+  // For big endian targets, we need to adjust the offset to the pointer to
+  // load the correct bytes.
+  if (DAG.getDataLayout().isBigEndian())
+    ShAmt = AdjustBigEndianShift(ShAmt);
 
   EVT PtrType = N0.getOperand(1).getValueType();
   uint64_t PtrOff = ShAmt / 8;
@@ -9204,6 +9479,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
   }
 
+  if (HasShiftedOffset) {
+    // Recalculate the shift amount after it has been altered to calculate
+    // the offset.
+    if (DAG.getDataLayout().isBigEndian())
+      ShAmt = AdjustBigEndianShift(ShAmt);
+
+    // We're using a shifted mask, so the load now has an offset. This means
+    // that data has been loaded into the lower bytes than it would have been
+    // before, so we need to shl the loaded data into the correct position in the
+    // register.
+    SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
+    Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+  }
+
   // Return the new loaded value.
   return Result;
 }
@@ -9235,12 +9525,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 
   // fold (sext_in_reg (sext x)) -> (sext x)
   // fold (sext_in_reg (aext x)) -> (sext x)
-  // if x is small enough.
+  // if x is small enough or if we know that x has more than 1 sign bit and the
+  // sign_extend_inreg is extending from one of them.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getScalarValueSizeInBits() <= EVTBits &&
+    unsigned N00Bits = N00.getScalarValueSizeInBits();
+    if ((N00Bits <= EVTBits ||
+         (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
-      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
   }
 
   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
@@ -9250,7 +9543,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
     if (!LegalOperations ||
         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
-      return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+      return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+                         N0.getOperand(0));
   }
 
   // fold (sext_in_reg (zext x)) -> (sext x)
@@ -9345,9 +9639,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
   if (N0.isUndef())
     return DAG.getUNDEF(VT);
 
-  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
-                                              LegalOperations))
-    return SDValue(Res, 0);
+  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+    return Res;
+
+  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+    return SDValue(N, 0);
 
   return SDValue();
 }
@@ -9359,9 +9655,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
   if (N0.isUndef())
     return DAG.getUNDEF(VT);
 
-  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
-                                              LegalOperations))
-    return SDValue(Res, 0);
+  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+    return Res;
+
+  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+    return SDValue(N, 0);
 
   return SDValue();
 }
@@ -9458,8 +9756,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
     SDValue Amt = N0.getOperand(1);
-    KnownBits Known;
-    DAG.computeKnownBits(Amt, Known);
+    KnownBits Known = DAG.computeKnownBits(Amt);
     unsigned Size = VT.getScalarSizeInBits();
     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
       SDLoc SL(N);
@@ -9636,6 +9933,32 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
     return NewVSel;
 
+  // Narrow a suitable binary operation with a non-opaque constant operand by
+  // moving it ahead of the truncate. This is limited to pre-legalization
+  // because targets may prefer a wider type during later combines and invert
+  // this transform.
+  switch (N0.getOpcode()) {
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    if (!LegalOperations && N0.hasOneUse() &&
+        (isConstantOrConstantVector(N0.getOperand(0), true) ||
+         isConstantOrConstantVector(N0.getOperand(1), true))) {
+      // TODO: We already restricted this to pre-legalization, but for vectors
+      // we are extra cautious to not create an unsupported operation.
+      // Target-specific changes are likely needed to avoid regressions here.
+      if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
+        SDLoc DL(N);
+        SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+        SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+        return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+      }
+    }
+  }
+
   return SDValue();
 }
 
@@ -9694,11 +10017,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
     return SDValue();
 
-  // TODO: Use splat values for the constant-checking below and remove this
-  // restriction.
+  // TODO: Handle cases where the integer constant is a different scalar
+  // bitwidth to the FP.
   SDValue N0 = N->getOperand(0);
   EVT SourceVT = N0.getValueType();
-  if (SourceVT.isVector())
+  if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
     return SDValue();
 
   unsigned FPOpcode;
@@ -9706,25 +10029,35 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
   switch (N0.getOpcode()) {
   case ISD::AND:
     FPOpcode = ISD::FABS;
-    SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
+    SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
     break;
   case ISD::XOR:
     FPOpcode = ISD::FNEG;
-    SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
+    SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
+    break;
+  case ISD::OR:
+    FPOpcode = ISD::FABS;
+    SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
     break;
-  // TODO: ISD::OR --> ISD::FNABS?
   default:
     return SDValue();
   }
 
   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+  // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
+  //   fneg (fabs X)
   SDValue LogicOp0 = N0.getOperand(0);
-  ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
       LogicOp0.getOpcode() == ISD::BITCAST &&
-      LogicOp0->getOperand(0).getValueType() == VT)
-    return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
+      LogicOp0.getOperand(0).getValueType() == VT) {
+    SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+    NumFPLogicOpsConv++;
+    if (N0.getOpcode() == ISD::OR)
+      return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
+    return FPOp;
+  }
 
   return SDValue();
 }
@@ -9737,33 +10070,32 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
     return DAG.getUNDEF(VT);
 
   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
-  // Only do this before legalize, since afterward the target may be depending
-  // on the bitconvert.
+  // Only do this before legalize types, since we might create an illegal
+  // scalar type. Even if we knew we wouldn't create an illegal scalar type
+  // we can only do this before legalize ops, since the target maybe
+  // depending on the bitcast.
   // First check to see if this is all constant.
   if (!LegalTypes &&
       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
-      VT.isVector()) {
-    bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
-
-    EVT DestEltVT = N->getValueType(0).getVectorElementType();
-    assert(!DestEltVT.isVector() &&
-           "Element type of vector ValueType must not be vector!");
-    if (isSimple)
-      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
-  }
+      VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+    return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
+                                             VT.getVectorElementType());
 
   // If the input is a constant, let getNode fold it.
-  // We always need to check that this is just a fp -> int or int -> conversion
-  // otherwise we will get back N which will confuse the caller into thinking
-  // we used CombineTo. This can block target combines from running. If we can't
-  // allowed legal operations, we need to ensure the resulting operation will be
-  // legal.
-  // TODO: Maybe we should check that the return value isn't N explicitly?
-  if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
-       (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
-      (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
-       (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
-    return DAG.getBitcast(VT, N0);
+  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+    // If we can't allow illegal operations, we need to check that this is just
+    // a fp -> int or int -> conversion and that the resulting operation will
+    // be legal.
+    if (!LegalOperations ||
+        (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+         TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+        (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+         TLI.isOperationLegal(ISD::Constant, VT))) {
+      SDValue C = DAG.getBitcast(VT, N0);
+      if (C.getNode() != N)
+        return C;
+    }
+  }
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
   if (N0.getOpcode() == ISD::BITCAST)
@@ -9772,12 +10104,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // fold (conv (load x)) -> (load (conv*)x)
   // If the resultant load doesn't need a higher alignment than the original!
   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
-      // Do not change the width of a volatile load.
-      !cast<LoadSDNode>(N0)->isVolatile() &&
       // Do not remove the cast if the types differ in endian layout.
       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+      // If the load is volatile, we only want to change the load type if the
+      // resulting load is legal. Otherwise we might increase the number of
+      // memory accesses. We don't care if the original type was legal or not
+      // as we assume software couldn't rely on the number of accesses of an
+      // illegal type.
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isOperationLegal(ISD::LOAD, VT)) &&
       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     unsigned OrigAlign = LN0->getAlignment();
@@ -9934,7 +10270,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // float vectors bitcast to integer vectors) into shuffles.
   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
-      N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+      N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
@@ -10000,15 +10336,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   // If this is a conversion of N elements of one type to N elements of another
   // type, convert each element.  This handles FP<->INT cases.
   if (SrcBitSize == DstBitSize) {
-    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
-                              BV->getValueType(0).getVectorNumElements());
-
-    // Due to the FP element handling below calling this routine recursively,
-    // we can end up with a scalar-to-vector node here.
-    if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
-                         DAG.getBitcast(DstEltVT, BV->getOperand(0)));
-
     SmallVector<SDValue, 8> Ops;
     for (SDValue Op : BV->op_values()) {
       // If the vector element type is not legal, the BUILD_VECTOR operands
@@ -10018,6 +10345,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
       AddToWorklist(Ops.back().getNode());
     }
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                              BV->getValueType(0).getVectorNumElements());
     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
   }
 
@@ -10651,17 +10980,18 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
 
-  // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
-  // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+  // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
+  // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
-      auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
-      if (XC1 && XC1->isExactlyValue(+1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
-                           Y, Flags);
-      if (XC1 && XC1->isExactlyValue(-1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
-                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+      if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
+        if (C->isExactlyValue(+1.0))
+          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                             Y, Flags);
+        if (C->isExactlyValue(-1.0))
+          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                             DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+      }
     }
     return SDValue();
   };
@@ -10671,29 +11001,30 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   if (SDValue FMA = FuseFADD(N1, N0, Flags))
     return FMA;
 
-  // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
-  // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
-  // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
-  // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+  // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
+  // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
+  // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
+  // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
-      auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
-      if (XC0 && XC0->isExactlyValue(+1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
-                           Y, Flags);
-      if (XC0 && XC0->isExactlyValue(-1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
-                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
-
-      auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
-      if (XC1 && XC1->isExactlyValue(+1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
-                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
-      if (XC1 && XC1->isExactlyValue(-1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
-                           Y, Flags);
+      if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
+        if (C0->isExactlyValue(+1.0))
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+                             Y, Flags);
+        if (C0->isExactlyValue(-1.0))
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+                             DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+      }
+      if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
+        if (C1->isExactlyValue(+1.0))
+          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                             DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+        if (C1->isExactlyValue(-1.0))
+          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                             Y, Flags);
+      }
     }
     return SDValue();
   };
@@ -10706,14 +11037,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   return SDValue();
 }
 
-static bool isFMulNegTwo(SDValue &N) {
-  if (N.getOpcode() != ISD::FMUL)
-    return false;
-  if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
-    return CFP->isExactlyValue(-2.0);
-  return false;
-}
-
 SDValue DAGCombiner::visitFADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -10737,6 +11060,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
 
+  // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
+  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
+  if (N1C && N1C->isZero())
+    if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
+      return N0;
+
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
@@ -10752,23 +11081,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     return DAG.getNode(ISD::FSUB, DL, VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
 
-  // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
-  // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
-  if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
-      (isFMulNegTwo(N1) && N1.hasOneUse())) {
-    bool N1IsFMul = isFMulNegTwo(N1);
-    SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
-    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
-    return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
-  }
+  auto isFMulNegTwo = [](SDValue FMul) {
+    if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
+      return false;
+    auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
+    return C && C->isExactlyValue(-2.0);
+  };
 
-  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
-  if (N1C && N1C->isZero()) {
-    if (N1C->isNegative() || Options.UnsafeFPMath ||
-        Flags.hasNoSignedZeros()) {
-      // fold (fadd A, 0) -> A
-      return N0;
-    }
+  // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
+  if (isFMulNegTwo(N0)) {
+    SDValue B = N0.getOperand(0);
+    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+    return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+  }
+  // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
+  if (isFMulNegTwo(N1)) {
+    SDValue B = N1.getOperand(0);
+    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
+    return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
   }
 
   // No FP constant should be created after legalization as Instruction
@@ -10887,8 +11217,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
-  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
@@ -10920,9 +11250,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
       return DAG.getConstantFP(0.0f, DL, VT);
   }
 
-  // (fsub 0, B) -> -B
+  // (fsub -0.0, N1) -> -N1
   if (N0CFP && N0CFP->isZero()) {
-    if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+    if (N0CFP->isNegative() ||
+        (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
         return GetNegatedExpression(N1, DAG, LegalOperations);
       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -10930,27 +11261,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
     }
   }
 
+  if ((Options.UnsafeFPMath ||
+      (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
+      && N1.getOpcode() == ISD::FADD) {
+    // X - (X + Y) -> -Y
+    if (N0 == N1->getOperand(0))
+      return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+    // X - (Y + X) -> -Y
+    if (N0 == N1->getOperand(1))
+      return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+  }
+
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
     return DAG.getNode(ISD::FADD, DL, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
-  // If 'unsafe math' is enabled, fold lots of things.
-  if (Options.UnsafeFPMath) {
-    // (fsub x, (fadd x, y)) -> (fneg y)
-    // (fsub x, (fadd y, x)) -> (fneg y)
-    if (N1.getOpcode() == ISD::FADD) {
-      SDValue N10 = N1->getOperand(0);
-      SDValue N11 = N1->getOperand(1);
-
-      if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
-        return GetNegatedExpression(N11, DAG, LegalOperations);
-
-      if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
-        return GetNegatedExpression(N10, DAG, LegalOperations);
-    }
-  }
-
   // FSUB -> FMA combines:
   if (SDValue Fused = visitFSUBForFMACombine(N)) {
     AddToWorklist(Fused.getNode());
@@ -10963,8 +11289,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 SDValue DAGCombiner::visitFMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
-  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
@@ -11002,26 +11328,16 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
 
   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
-    if (N0.getOpcode() == ISD::FMUL) {
-      // Fold scalars or any vector constants (not just splats).
-      // This fold is done in general by InstCombine, but extra fmul insts
-      // may have been generated during lowering.
+    if (isConstantFPBuildVectorOrConstantFP(N1) &&
+        N0.getOpcode() == ISD::FMUL) {
       SDValue N00 = N0.getOperand(0);
       SDValue N01 = N0.getOperand(1);
-      auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
-      auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
-      auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-
-      // Check 1: Make sure that the first operand of the inner multiply is NOT
-      // a constant. Otherwise, we may induce infinite looping.
-      if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
-        // Check 2: Make sure that the second operand of the inner multiply and
-        // the second operand of the outer multiply are constants.
-        if ((N1CFP && isConstOrConstSplatFP(N01)) ||
-            (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
-          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
-        }
+      // Avoid an infinite loop by making sure that N00 is not a constant
+      // (the inner multiply has not been constant folded yet).
+      if (isConstantFPBuildVectorOrConstantFP(N01) &&
+          !isConstantFPBuildVectorOrConstantFP(N00)) {
+        SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+        return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
       }
     }
 
@@ -11445,15 +11761,15 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
   EVT VT = N->getValueType(0);
 
   if (N0CFP && N1CFP) // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
 
-  if (N1CFP) {
-    const APFloat &V = N1CFP->getValueAPF();
+  if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
+    const APFloat &V = N1C->getValueAPF();
     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
     if (!V.isNegative()) {
@@ -11489,6 +11805,72 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFPOW(SDNode *N) {
+  ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
+  if (!ExponentC)
+    return SDValue();
+
+  // Try to convert x ** (1/3) into cube root.
+  // TODO: Handle the various flavors of long double.
+  // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+  //       Some range near 1/3 should be fine.
+  EVT VT = N->getValueType(0);
+  if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+      (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+    // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+    // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+    // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
+    // For regular numbers, rounding may cause the results to differ.
+    // Therefore, we require { nsz ninf nnan afn } for this transform.
+    // TODO: We could select out the special cases if we don't have nsz/ninf.
+    SDNodeFlags Flags = N->getFlags();
+    if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+        !Flags.hasApproximateFuncs())
+      return SDValue();
+
+    // Do not create a cbrt() libcall if the target does not have it, and do not
+    // turn a pow that has lowering support into a cbrt() libcall.
+    if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+        (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+         DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+      return SDValue();
+
+    return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+  }
+
+  // Try to convert x ** (1/4) into square roots.
+  // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
+  // TODO: This could be extended (using a target hook) to handle smaller
+  // power-of-2 fractional exponents.
+  if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+    // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
+    // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
+    // For regular numbers, rounding may cause the results to differ.
+    // Therefore, we require { nsz ninf afn } for this transform.
+    // TODO: We could select out the special cases if we don't have nsz/ninf.
+    SDNodeFlags Flags = N->getFlags();
+    if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+        !Flags.hasApproximateFuncs())
+      return SDValue();
+
+    // Don't double the number of libcalls. We are trying to inline fast code.
+    if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
+      return SDValue();
+
+    // Assume that libcalls are the smallest code.
+    // TODO: This restriction should probably be lifted for vectors.
+    if (DAG.getMachineFunction().getFunction().optForSize())
+      return SDValue();
+
+    // pow(X, 0.25) --> sqrt(sqrt(X))
+    SDLoc DL(N);
+    SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
+    return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+  }
+
+  return SDValue();
+}
+
 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
                                const TargetLowering &TLI) {
   // This optimization is guarded by a function attribute because it may produce
@@ -11538,8 +11920,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
 
   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
   // but UINT_TO_FP is legal on this target, try to convert.
-  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
-      TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+  if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
+      hasOperation(ISD::UINT_TO_FP, OpVT)) {
     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
     if (DAG.SignBitIsZero(N0))
       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
@@ -11595,8 +11977,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
 
   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
   // but SINT_TO_FP is legal on this target, try to convert.
-  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
-      TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+  if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
+      hasOperation(ISD::SINT_TO_FP, OpVT)) {
     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
     if (DAG.SignBitIsZero(N0))
       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
@@ -11917,7 +12299,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
+                            APFloat (*Op)(const APFloat &, const APFloat &)) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
@@ -11927,36 +12310,31 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
+    return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
   }
 
   // Canonicalize to constant on RHS.
   if (isConstantFPBuildVectorOrConstantFP(N0) &&
-     !isConstantFPBuildVectorOrConstantFP(N1))
-    return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
+      !isConstantFPBuildVectorOrConstantFP(N1))
+    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
 
   return SDValue();
 }
 
-SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  EVT VT = N->getValueType(0);
-  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
-  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+  return visitFMinMax(DAG, N, minnum);
+}
 
-  if (N0CFP && N1CFP) {
-    const APFloat &C0 = N0CFP->getValueAPF();
-    const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
-  }
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
+  return visitFMinMax(DAG, N, maxnum);
+}
 
-  // Canonicalize to constant on RHS.
-  if (isConstantFPBuildVectorOrConstantFP(N0) &&
-     !isConstantFPBuildVectorOrConstantFP(N1))
-    return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
+SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
+  return visitFMinMax(DAG, N, minimum);
+}
 
-  return SDValue();
+SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
+  return visitFMinMax(DAG, N, maximum);
 }
 
 SDValue DAGCombiner::visitFABS(SDNode *N) {
@@ -11976,11 +12354,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
 
-  // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
-  // constant pool values.
-  if (!TLI.isFAbsFree(VT) &&
-      N0.getOpcode() == ISD::BITCAST &&
-      N0.getNode()->hasOneUse()) {
+  // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
+  if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
     SDValue Int = N0.getOperand(0);
     EVT IntVT = Int.getValueType();
     if (IntVT.isInteger() && !IntVT.isVector()) {
@@ -12512,8 +12887,15 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
       if (TryNext)
         continue;
 
-      // Check for #2
-      if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+      // Check for #2.
+      SmallPtrSet<const SDNode *, 32> Visited;
+      SmallVector<const SDNode *, 8> Worklist;
+      // Ptr is predecessor to both N and Op.
+      Visited.insert(Ptr.getNode());
+      Worklist.push_back(N);
+      Worklist.push_back(Op);
+      if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+          !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
         SDValue Result = isLoad
           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
                                BasePtr, Offset, AM)
@@ -12571,6 +12953,157 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
 }
 
+static inline int numVectorEltsOrZero(EVT T) {
+  return T.isVector() ? T.getVectorNumElements() : 0;
+}
+
+bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
+  Val = ST->getValue();
+  EVT STType = Val.getValueType();
+  EVT STMemType = ST->getMemoryVT();
+  if (STType == STMemType)
+    return true;
+  if (isTypeLegal(STMemType))
+    return false; // fail.
+  if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
+      TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
+    Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
+    return true;
+  }
+  if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
+      STType.isInteger() && STMemType.isInteger()) {
+    Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
+    return true;
+  }
+  if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
+    Val = DAG.getBitcast(STMemType, Val);
+    return true;
+  }
+  return false; // fail.
+}
+
+bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
+  EVT LDMemType = LD->getMemoryVT();
+  EVT LDType = LD->getValueType(0);
+  assert(Val.getValueType() == LDMemType &&
+         "Attempting to extend value of non-matching type");
+  if (LDType == LDMemType)
+    return true;
+  if (LDMemType.isInteger() && LDType.isInteger()) {
+    switch (LD->getExtensionType()) {
+    case ISD::NON_EXTLOAD:
+      Val = DAG.getBitcast(LDType, Val);
+      return true;
+    case ISD::EXTLOAD:
+      Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
+      return true;
+    case ISD::SEXTLOAD:
+      Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
+      return true;
+    case ISD::ZEXTLOAD:
+      Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
+      return true;
+    }
+  }
+  return false;
+}
+
+SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
+  if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+    return SDValue();
+  SDValue Chain = LD->getOperand(0);
+  StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+  if (!ST || ST->isVolatile())
+    return SDValue();
+
+  EVT LDType = LD->getValueType(0);
+  EVT LDMemType = LD->getMemoryVT();
+  EVT STMemType = ST->getMemoryVT();
+  EVT STType = ST->getValue().getValueType();
+
+  BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+  BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
+  int64_t Offset;
+  if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+    return SDValue();
+
+  // Normalize for Endianness. After this Offset=0 will denote that the least
+  // significant bit in the loaded value maps to the least significant bit in
+  // the stored value). With Offset=n (for n > 0) the loaded value starts at the
+  // n:th least significant byte of the stored value.
+  if (DAG.getDataLayout().isBigEndian())
+    Offset = (STMemType.getStoreSizeInBits() -
+              LDMemType.getStoreSizeInBits()) / 8 - Offset;
+
+  // Check that the stored value cover all bits that are loaded.
+  bool STCoversLD =
+      (Offset >= 0) &&
+      (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+
+  auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
+    if (LD->isIndexed()) {
+      bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
+                    LD->getAddressingMode() == ISD::POST_DEC);
+      unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
+      SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
+                             LD->getOperand(1), LD->getOperand(2));
+      SDValue Ops[] = {Val, Idx, Chain};
+      return CombineTo(LD, Ops, 3);
+    }
+    return CombineTo(LD, Val, Chain);
+  };
+
+  if (!STCoversLD)
+    return SDValue();
+
+  // Memory as copy space (potentially masked).
+  if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
+    // Simple case: Direct non-truncating forwarding
+    if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+      return ReplaceLd(LD, ST->getValue(), Chain);
+    // Can we model the truncate and extension with an and mask?
+    if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
+        !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
+      // Mask to size of LDMemType
+      auto Mask =
+          DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
+                                               STMemType.getSizeInBits()),
+                          SDLoc(ST), STType);
+      auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
+      return ReplaceLd(LD, Val, Chain);
+    }
+  }
+
+  // TODO: Deal with nonzero offset.
+  if (LD->getBasePtr().isUndef() || Offset != 0)
+    return SDValue();
+  // Model necessary truncations / extenstions.
+  SDValue Val;
+  // Truncate Value To Stored Memory Size.
+  do {
+    if (!getTruncatedStoreValue(ST, Val))
+      continue;
+    if (!isTypeLegal(LDMemType))
+      continue;
+    if (STMemType != LDMemType) {
+      // TODO: Support vectors? This requires extract_subvector/bitcast.
+      if (!STMemType.isVector() && !LDMemType.isVector() &&
+          STMemType.isInteger() && LDMemType.isInteger())
+        Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
+      else
+        continue;
+    }
+    if (!extendLoadedValueToExtension(LD, Val))
+      continue;
+    return ReplaceLd(LD, Val, Chain);
+  } while (false);
+
+  // On failure, cleanup dead nodes we may have created.
+  if (Val->use_empty())
+    deleteAndRecombine(Val.getNode());
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   LoadSDNode *LD  = cast<LoadSDNode>(N);
   SDValue Chain = LD->getChain();
@@ -12637,17 +13170,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
 
   // If this load is directly stored, replace the load value with the stored
   // value.
-  // TODO: Handle store large -> read small portion.
-  // TODO: Handle TRUNCSTORE/LOADEXT
-  if (OptLevel != CodeGenOpt::None &&
-      ISD::isNormalLoad(N) && !LD->isVolatile()) {
-    if (ISD::isNON_TRUNCStore(Chain.getNode())) {
-      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
-      if (PrevST->getBasePtr() == Ptr &&
-          PrevST->getValue().getValueType() == N->getValueType(0))
-        return CombineTo(N, PrevST->getOperand(1), Chain);
-    }
-  }
+  if (auto V = ForwardStoreValueToDirectLoad(LD))
+    return V;
 
   // Try to infer better alignment information than the load already has.
   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
@@ -13055,8 +13579,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
 
   // Sort the slices so that elements that are likely to be next to each
   // other in memory are next to each other in the list.
-  llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
-             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+  llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
   });
@@ -13689,7 +14212,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
         SDValue Val = St->getValue();
         // If constant is of the wrong type, convert it now.
         if (MemVT != Val.getValueType()) {
-          Val = peekThroughBitcast(Val);
+          Val = peekThroughBitcasts(Val);
           // Deal with constants of wrong size.
           if (ElementSizeBits != Val.getValueSizeInBits()) {
             EVT IntMemVT =
@@ -13715,7 +14238,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
       SmallVector<SDValue, 8> Ops;
       for (unsigned i = 0; i < NumStores; ++i) {
         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-        SDValue Val = peekThroughBitcast(St->getValue());
+        SDValue Val = peekThroughBitcasts(St->getValue());
         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
         // type MemVT. If the underlying value is not the correct
         // type, but it is an extraction of an appropriate vector we
@@ -13725,26 +14248,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
         if ((MemVT != Val.getValueType()) &&
             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
-          SDValue Vec = Val.getOperand(0);
           EVT MemVTScalarTy = MemVT.getScalarType();
-          SDValue Idx = Val.getOperand(1);
           // We may need to add a bitcast here to get types to line up.
-          if (MemVTScalarTy != Vec.getValueType()) {
-            unsigned Elts = Vec.getValueType().getSizeInBits() /
-                            MemVTScalarTy.getSizeInBits();
-            if (Val.getValueType().isVector() && MemVT.isVector()) {
-              unsigned IdxC = cast<ConstantSDNode>(Idx)->getZExtValue();
-              unsigned NewIdx =
-                  ((uint64_t)IdxC * MemVT.getVectorNumElements()) / Elts;
-              Idx = DAG.getConstant(NewIdx, SDLoc(Val), Idx.getValueType());
-            }
-            EVT NewVecTy =
-                EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
-            Vec = DAG.getBitcast(NewVecTy, Vec);
+          if (MemVTScalarTy != Val.getValueType().getScalarType()) {
+            Val = DAG.getBitcast(MemVT, Val);
+          } else {
+            unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
+                                            : ISD::EXTRACT_VECTOR_ELT;
+            SDValue Vec = Val.getOperand(0);
+            SDValue Idx = Val.getOperand(1);
+            Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
           }
-          auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
-                                        : ISD::EXTRACT_VECTOR_ELT;
-          Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
         }
         Ops.push_back(Val);
       }
@@ -13769,7 +14283,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
 
       SDValue Val = St->getValue();
-      Val = peekThroughBitcast(Val);
+      Val = peekThroughBitcasts(Val);
       StoreInt <<= ElementSizeBits;
       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
         StoreInt |= C->getAPIntValue()
@@ -13832,7 +14346,7 @@ void DAGCombiner::getStoreMergeCandidates(
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   EVT MemVT = St->getMemoryVT();
 
-  SDValue Val = peekThroughBitcast(St->getValue());
+  SDValue Val = peekThroughBitcasts(St->getValue());
   // We must have a base and an offset.
   if (!BasePtr.getBase().getNode())
     return;
@@ -13866,7 +14380,7 @@ void DAGCombiner::getStoreMergeCandidates(
                             int64_t &Offset) -> bool {
     if (Other->isVolatile() || Other->isIndexed())
       return false;
-    SDValue Val = peekThroughBitcast(Other->getValue());
+    SDValue Val = peekThroughBitcasts(Other->getValue());
     // Allow merging constants of different types as integers.
     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
                                            : Other->getMemoryVT() != MemVT;
@@ -13973,11 +14487,12 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   Worklist.push_back(RootNode);
   while (!Worklist.empty()) {
     auto N = Worklist.pop_back_val();
+    if (!Visited.insert(N).second)
+      continue; // Already present in Visited.
     if (N->getOpcode() == ISD::TokenFactor) {
       for (SDValue Op : N->ops())
         Worklist.push_back(Op.getNode());
     }
-    Visited.insert(N);
   }
 
   // Don't count pruning nodes towards max.
@@ -13990,14 +14505,14 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
     //                    in candidate selection and can be
     //                    safely ignored
     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
-    //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant
-    //                      and so no cycles are possible.
-    //   * (Op 3) -> appears to always be undef. Cannot be source of cycle.
-    //
-    // Thus we need only check predecessors of the value operands.
-    auto *Op = N->getOperand(1).getNode();
-    if (Visited.insert(Op).second)
-      Worklist.push_back(Op);
+    //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
+    //                       but aren't necessarily fromt the same base node, so
+    //                       cycles possible (e.g. via indexed store).
+    //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
+    //               non-indexed stores). Not constant on all targets (e.g. ARM)
+    //               and so can participate in a cycle.
+    for (unsigned j = 1; j < N->getNumOperands(); ++j)
+      Worklist.push_back(N->getOperand(j).getNode());
   }
   // Search through DAG. We can stop early if we find a store node.
   for (unsigned i = 0; i < NumStores; ++i)
@@ -14030,7 +14545,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
 
   // Perform an early exit check. Do not bother looking at stored values that
   // are not constants, loads, or extracted vector elements.
-  SDValue StoredVal = peekThroughBitcast(St->getValue());
+  SDValue StoredVal = peekThroughBitcasts(St->getValue());
   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
                        isa<ConstantFPSDNode>(StoredVal);
@@ -14051,10 +14566,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
 
   // Sort the memory operands according to their distance from the
   // base pointer.
-  llvm::sort(StoreNodes.begin(), StoreNodes.end(),
-             [](MemOpLink LHS, MemOpLink RHS) {
-               return LHS.OffsetFromBase < RHS.OffsetFromBase;
-             });
+  llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+    return LHS.OffsetFromBase < RHS.OffsetFromBase;
+  });
 
   // Store Merge attempts to merge the lowest stores. This generally
   // works out as if successful, as the remaining stores are checked
@@ -14299,7 +14813,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
 
     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      SDValue Val = peekThroughBitcast(St->getValue());
+      SDValue Val = peekThroughBitcasts(St->getValue());
       LoadSDNode *Ld = cast<LoadSDNode>(Val);
 
       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
@@ -14647,8 +15161,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     EVT SVT = Value.getOperand(0).getValueType();
+    // If the store is volatile, we only want to change the store type if the
+    // resulting store is legal. Otherwise we might increase the number of
+    // memory accesses. We don't care if the original type was legal or not
+    // as we assume software couldn't rely on the number of accesses of an
+    // illegal type.
     if (((!LegalOperations && !ST->isVolatile()) ||
-         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+         TLI.isOperationLegal(ISD::STORE, SVT)) &&
         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
       unsigned OrigAlign = ST->getAlignment();
       bool Fast = false;
@@ -14699,7 +15218,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // FIXME: is there such a thing as a truncating indexed store?
   if (ST->isTruncatingStore() && ST->isUnindexed() &&
-      Value.getValueType().isInteger()) {
+      Value.getValueType().isInteger() &&
+      (!isa<ConstantSDNode>(Value) ||
+       !cast<ConstantSDNode>(Value)->isOpaque())) {
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
@@ -14983,6 +15504,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     return InVec;
 
   EVT VT = InVec.getValueType();
+  unsigned NumElts = VT.getVectorNumElements();
 
   // Remove redundant insertions:
   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
@@ -14990,12 +15512,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
     return InVec;
 
-  // We must know which element is being inserted for folds below here.
   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
-  if (!IndexC)
+  if (!IndexC) {
+    // If this is variable insert to undef vector, it might be better to splat:
+    // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
+    if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
+      SmallVector<SDValue, 8> Ops(NumElts, InVal);
+      return DAG.getBuildVector(VT, DL, Ops);
+    }
     return SDValue();
-  unsigned Elt = IndexC->getZExtValue();
+  }
 
+  // We must know which element is being inserted for folds below here.
+  unsigned Elt = IndexC->getZExtValue();
   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
     return Shuf;
 
@@ -15033,11 +15562,11 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     Ops.append(InVec.getNode()->op_begin(),
                InVec.getNode()->op_end());
   } else if (InVec.isUndef()) {
-    unsigned NElts = VT.getVectorNumElements();
-    Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+    Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
   } else {
     return SDValue();
   }
+  assert(Ops.size() == NumElts && "Unexpected vector size");
 
   // Insert the element
   if (Elt < Ops.size()) {
@@ -15051,8 +15580,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   return DAG.getBuildVector(VT, DL, Ops);
 }
 
-SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
-    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
+                                                  SDValue EltNo,
+                                                  LoadSDNode *OriginalLoad) {
   assert(!OriginalLoad->isVolatile());
 
   EVT ResultVT = EVE->getValueType(0);
@@ -15134,70 +15664,132 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   return SDValue(EVE, 0);
 }
 
+/// Transform a vector binary operation into a scalar binary operation by moving
+/// the math/logic after an extract element of a vector.
+static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
+                                       bool LegalOperations) {
+  SDValue Vec = ExtElt->getOperand(0);
+  SDValue Index = ExtElt->getOperand(1);
+  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+  if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+    return SDValue();
+
+  // Targets may want to avoid this to prevent an expensive register transfer.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.shouldScalarizeBinop(Vec))
+    return SDValue();
+
+  // Extracting an element of a vector constant is constant-folded, so this
+  // transform is just replacing a vector op with a scalar op while moving the
+  // extract.
+  SDValue Op0 = Vec.getOperand(0);
+  SDValue Op1 = Vec.getOperand(1);
+  if (isAnyConstantBuildVector(Op0, true) ||
+      isAnyConstantBuildVector(Op1, true)) {
+    // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
+    // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
+    SDLoc DL(ExtElt);
+    EVT VT = ExtElt->getValueType(0);
+    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
+    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
+    return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
-  // (vextract (scalar_to_vector val, 0) -> val
-  SDValue InVec = N->getOperand(0);
-  EVT VT = InVec.getValueType();
-  EVT NVT = N->getValueType(0);
+  SDValue VecOp = N->getOperand(0);
+  SDValue Index = N->getOperand(1);
+  EVT ScalarVT = N->getValueType(0);
+  EVT VecVT = VecOp.getValueType();
+  if (VecOp.isUndef())
+    return DAG.getUNDEF(ScalarVT);
 
-  if (InVec.isUndef())
-    return DAG.getUNDEF(NVT);
+  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+  //
+  // This only really matters if the index is non-constant since other combines
+  // on the constant elements already work.
+  SDLoc DL(N);
+  if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+      Index == VecOp.getOperand(2)) {
+    SDValue Elt = VecOp.getOperand(1);
+    return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
+  }
 
-  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+  // (vextract (scalar_to_vector val, 0) -> val
+  if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
     // Check if the result type doesn't match the inserted element type. A
     // SCALAR_TO_VECTOR may truncate the inserted element and the
     // EXTRACT_VECTOR_ELT may widen the extracted vector.
-    SDValue InOp = InVec.getOperand(0);
-    if (InOp.getValueType() != NVT) {
-      assert(InOp.getValueType().isInteger() && NVT.isInteger());
-      return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
+    SDValue InOp = VecOp.getOperand(0);
+    if (InOp.getValueType() != ScalarVT) {
+      assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+      return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
     }
     return InOp;
   }
 
-  SDValue EltNo = N->getOperand(1);
-  ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
-
   // extract_vector_elt of out-of-bounds element -> UNDEF
-  if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
-    return DAG.getUNDEF(NVT);
+  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  if (IndexC && IndexC->getAPIntValue().uge(NumElts))
+    return DAG.getUNDEF(ScalarVT);
 
   // extract_vector_elt (build_vector x, y), 1 -> y
-  if (ConstEltNo &&
-      InVec.getOpcode() == ISD::BUILD_VECTOR &&
-      TLI.isTypeLegal(VT) &&
-      (InVec.hasOneUse() ||
-       TLI.aggressivelyPreferBuildVectorSources(VT))) {
-    SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+  if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
+      TLI.isTypeLegal(VecVT) &&
+      (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
+    SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
     EVT InEltVT = Elt.getValueType();
 
     // Sometimes build_vector's scalar input types do not match result type.
-    if (NVT == InEltVT)
+    if (ScalarVT == InEltVT)
       return Elt;
 
     // TODO: It may be useful to truncate if free if the build_vector implicitly
     // converts.
   }
 
-  // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
-  bool isLE = DAG.getDataLayout().isLittleEndian();
-  unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
-  if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
-      ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
-    SDValue BCSrc = InVec.getOperand(0);
-    if (BCSrc.getValueType().isScalarInteger())
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
+  // TODO: These transforms should not require the 'hasOneUse' restriction, but
+  // there are regressions on multiple targets without it. We can end up with a
+  // mess of scalar and vector code if we reduce only part of the DAG to scalar.
+  if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
+      VecOp.hasOneUse()) {
+    // The vector index of the LSBs of the source depend on the endian-ness.
+    bool IsLE = DAG.getDataLayout().isLittleEndian();
+    unsigned ExtractIndex = IndexC->getZExtValue();
+    // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
+    unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
+    SDValue BCSrc = VecOp.getOperand(0);
+    if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
+      return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
+
+    if (LegalTypes && BCSrc.getValueType().isInteger() &&
+        BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+      // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
+      // trunc i64 X to i32
+      SDValue X = BCSrc.getOperand(0);
+      assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
+             "Extract element and scalar to vector can't change element type "
+             "from FP to integer.");
+      unsigned XBitWidth = X.getValueSizeInBits();
+      unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+      BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
+
+      // An extract element return value type can be wider than its vector
+      // operand element type. In that case, the high bits are undefined, so
+      // it's possible that we may need to extend rather than truncate.
+      if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
+        assert(XBitWidth % VecEltBitWidth == 0 &&
+               "Scalar bitwidth must be a multiple of vector element bitwidth");
+        return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
+      }
+    }
   }
 
-  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
-  //
-  // This only really matters if the index is non-constant since other combines
-  // on the constant elements already work.
-  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
-      EltNo == InVec.getOperand(2)) {
-    SDValue Elt = InVec.getOperand(1);
-    return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
-  }
+  if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
+    return BO;
 
   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   // We only perform this optimization before the op legalization phase because
@@ -15205,30 +15797,29 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // patterns. For example on AVX, extracting elements from a wide vector
   // without using extract_subvector. However, if we can find an underlying
   // scalar value, then we can always use that.
-  if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
-    int NumElem = VT.getVectorNumElements();
-    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+  if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
+    auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
     // Find the new index to extract from.
-    int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
+    int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
 
     // Extracting an undef index is undef.
     if (OrigElt == -1)
-      return DAG.getUNDEF(NVT);
+      return DAG.getUNDEF(ScalarVT);
 
     // Select the right vector half to extract from.
     SDValue SVInVec;
-    if (OrigElt < NumElem) {
-      SVInVec = InVec->getOperand(0);
+    if (OrigElt < (int)NumElts) {
+      SVInVec = VecOp.getOperand(0);
     } else {
-      SVInVec = InVec->getOperand(1);
-      OrigElt -= NumElem;
+      SVInVec = VecOp.getOperand(1);
+      OrigElt -= NumElts;
     }
 
     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
       SDValue InOp = SVInVec.getOperand(OrigElt);
-      if (InOp.getValueType() != NVT) {
-        assert(InOp.getValueType().isInteger() && NVT.isInteger());
-        InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+      if (InOp.getValueType() != ScalarVT) {
+        assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+        InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
       }
 
       return InOp;
@@ -15239,136 +15830,131 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
     if (!LegalOperations ||
         // FIXME: Should really be just isOperationLegalOrCustom.
-        TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
-        TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
+        TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
+        TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
-                         DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
+                         DAG.getConstant(OrigElt, DL, IndexTy));
     }
   }
 
   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
   // simplify it based on the (valid) extraction indices.
-  if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+  if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-               Use->getOperand(0) == InVec &&
+               Use->getOperand(0) == VecOp &&
                isa<ConstantSDNode>(Use->getOperand(1));
       })) {
-    APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
-    for (SDNode *Use : InVec->uses()) {
+    APInt DemandedElts = APInt::getNullValue(NumElts);
+    for (SDNode *Use : VecOp->uses()) {
       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
-      if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+      if (CstElt->getAPIntValue().ult(NumElts))
         DemandedElts.setBit(CstElt->getZExtValue());
     }
-    if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+    if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
+      // We simplified the vector operand of this extract element. If this
+      // extract is not dead, visit it again so it is folded properly.
+      if (N->getOpcode() != ISD::DELETED_NODE)
+        AddToWorklist(N);
       return SDValue(N, 0);
+    }
   }
 
-  bool BCNumEltsChanged = false;
-  EVT ExtVT = VT.getVectorElementType();
-  EVT LVT = ExtVT;
-
+  // Everything under here is trying to match an extract of a loaded value.
   // If the result of load has to be truncated, then it's not necessarily
   // profitable.
-  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+  bool BCNumEltsChanged = false;
+  EVT ExtVT = VecVT.getVectorElementType();
+  EVT LVT = ExtVT;
+  if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
     return SDValue();
 
-  if (InVec.getOpcode() == ISD::BITCAST) {
+  if (VecOp.getOpcode() == ISD::BITCAST) {
     // Don't duplicate a load with other uses.
-    if (!InVec.hasOneUse())
+    if (!VecOp.hasOneUse())
       return SDValue();
 
-    EVT BCVT = InVec.getOperand(0).getValueType();
+    EVT BCVT = VecOp.getOperand(0).getValueType();
     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
       return SDValue();
-    if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+    if (NumElts != BCVT.getVectorNumElements())
       BCNumEltsChanged = true;
-    InVec = InVec.getOperand(0);
+    VecOp = VecOp.getOperand(0);
     ExtVT = BCVT.getVectorElementType();
   }
 
-  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
-  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
-      ISD::isNormalLoad(InVec.getNode()) &&
-      !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
-    SDValue Index = N->getOperand(1);
-    if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
-      if (!OrigLoad->isVolatile()) {
-        return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
-                                                             OrigLoad);
-      }
-    }
+  // extract (vector load $addr), i --> load $addr + i * size
+  if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
+      ISD::isNormalLoad(VecOp.getNode()) &&
+      !Index->hasPredecessor(VecOp.getNode())) {
+    auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
+    if (VecLoad && !VecLoad->isVolatile())
+      return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
   }
 
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
-  if (!LegalOperations) return SDValue();
+  if (!LegalOperations || !IndexC)
+    return SDValue();
 
   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+  int Elt = IndexC->getZExtValue();
+  LoadSDNode *LN0 = nullptr;
+  if (ISD::isNormalLoad(VecOp.getNode())) {
+    LN0 = cast<LoadSDNode>(VecOp);
+  } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+             VecOp.getOperand(0).getValueType() == ExtVT &&
+             ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
+    // Don't duplicate a load with other uses.
+    if (!VecOp.hasOneUse())
+      return SDValue();
 
-  if (ConstEltNo) {
-    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
+  }
+  if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
+    // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+    // =>
+    // (load $addr+1*size)
 
-    LoadSDNode *LN0 = nullptr;
-    const ShuffleVectorSDNode *SVN = nullptr;
-    if (ISD::isNormalLoad(InVec.getNode())) {
-      LN0 = cast<LoadSDNode>(InVec);
-    } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-               InVec.getOperand(0).getValueType() == ExtVT &&
-               ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
-      // Don't duplicate a load with other uses.
-      if (!InVec.hasOneUse())
-        return SDValue();
+    // Don't duplicate a load with other uses.
+    if (!VecOp.hasOneUse())
+      return SDValue();
+
+    // If the bit convert changed the number of elements, it is unsafe
+    // to examine the mask.
+    if (BCNumEltsChanged)
+      return SDValue();
 
-      LN0 = cast<LoadSDNode>(InVec.getOperand(0));
-    } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
-      // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
-      // =>
-      // (load $addr+1*size)
+    // Select the input vector, guarding against out of range extract vector.
+    int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
+    VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
 
+    if (VecOp.getOpcode() == ISD::BITCAST) {
       // Don't duplicate a load with other uses.
-      if (!InVec.hasOneUse())
-        return SDValue();
-
-      // If the bit convert changed the number of elements, it is unsafe
-      // to examine the mask.
-      if (BCNumEltsChanged)
+      if (!VecOp.hasOneUse())
         return SDValue();
 
-      // Select the input vector, guarding against out of range extract vector.
-      unsigned NumElems = VT.getVectorNumElements();
-      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
-      InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
-
-      if (InVec.getOpcode() == ISD::BITCAST) {
-        // Don't duplicate a load with other uses.
-        if (!InVec.hasOneUse())
-          return SDValue();
-
-        InVec = InVec.getOperand(0);
-      }
-      if (ISD::isNormalLoad(InVec.getNode())) {
-        LN0 = cast<LoadSDNode>(InVec);
-        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
-        EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
-      }
+      VecOp = VecOp.getOperand(0);
     }
+    if (ISD::isNormalLoad(VecOp.getNode())) {
+      LN0 = cast<LoadSDNode>(VecOp);
+      Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
+      Index = DAG.getConstant(Elt, DL, Index.getValueType());
+    }
+  }
 
-    // Make sure we found a non-volatile load and the extractelement is
-    // the only use.
-    if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
-      return SDValue();
-
-    // If Idx was -1 above, Elt is going to be -1, so just return undef.
-    if (Elt == -1)
-      return DAG.getUNDEF(LVT);
+  // Make sure we found a non-volatile load and the extractelement is
+  // the only use.
+  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+    return SDValue();
 
-    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
-  }
+  // If Idx was -1 above, Elt is going to be -1, so just return undef.
+  if (Elt == -1)
+    return DAG.getUNDEF(LVT);
 
-  return SDValue();
+  return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
 }
 
 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
@@ -15484,77 +16070,6 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   return DAG.getBitcast(VT, BV);
 }
 
-SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
-  EVT VT = N->getValueType(0);
-
-  unsigned NumInScalars = N->getNumOperands();
-  SDLoc DL(N);
-
-  EVT SrcVT = MVT::Other;
-  unsigned Opcode = ISD::DELETED_NODE;
-  unsigned NumDefs = 0;
-
-  for (unsigned i = 0; i != NumInScalars; ++i) {
-    SDValue In = N->getOperand(i);
-    unsigned Opc = In.getOpcode();
-
-    if (Opc == ISD::UNDEF)
-      continue;
-
-    // If all scalar values are floats and converted from integers.
-    if (Opcode == ISD::DELETED_NODE &&
-        (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
-      Opcode = Opc;
-    }
-
-    if (Opc != Opcode)
-      return SDValue();
-
-    EVT InVT = In.getOperand(0).getValueType();
-
-    // If all scalar values are typed differently, bail out. It's chosen to
-    // simplify BUILD_VECTOR of integer types.
-    if (SrcVT == MVT::Other)
-      SrcVT = InVT;
-    if (SrcVT != InVT)
-      return SDValue();
-    NumDefs++;
-  }
-
-  // If the vector has just one element defined, it's not worth to fold it into
-  // a vectorized one.
-  if (NumDefs < 2)
-    return SDValue();
-
-  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
-         && "Should only handle conversion from integer to float.");
-  assert(SrcVT != MVT::Other && "Cannot determine source type!");
-
-  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
-
-  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
-    return SDValue();
-
-  // Just because the floating-point vector type is legal does not necessarily
-  // mean that the corresponding integer vector type is.
-  if (!isTypeLegal(NVT))
-    return SDValue();
-
-  SmallVector<SDValue, 8> Opnds;
-  for (unsigned i = 0; i != NumInScalars; ++i) {
-    SDValue In = N->getOperand(i);
-
-    if (In.isUndef())
-      Opnds.push_back(DAG.getUNDEF(SrcVT));
-    else
-      Opnds.push_back(In.getOperand(0));
-  }
-  SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
-  AddToWorklist(BV.getNode());
-
-  return DAG.getNode(Opcode, DL, VT, BV);
-}
-
 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                            ArrayRef<int> VectorMask,
                                            SDValue VecIn1, SDValue VecIn2,
@@ -15676,6 +16191,78 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
   return Shuffle;
 }
 
+static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
+  assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+  // First, determine where the build vector is not undef.
+  // TODO: We could extend this to handle zero elements as well as undefs.
+  int NumBVOps = BV->getNumOperands();
+  int ZextElt = -1;
+  for (int i = 0; i != NumBVOps; ++i) {
+    SDValue Op = BV->getOperand(i);
+    if (Op.isUndef())
+      continue;
+    if (ZextElt == -1)
+      ZextElt = i;
+    else
+      return SDValue();
+  }
+  // Bail out if there's no non-undef element.
+  if (ZextElt == -1)
+    return SDValue();
+
+  // The build vector contains some number of undef elements and exactly
+  // one other element. That other element must be a zero-extended scalar
+  // extracted from a vector at a constant index to turn this into a shuffle.
+  // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+  SDValue Zext = BV->getOperand(ZextElt);
+  if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
+      Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+    return SDValue();
+
+  // The zero-extend must be a multiple of the source size.
+  SDValue Extract = Zext.getOperand(0);
+  unsigned DestSize = Zext.getValueSizeInBits();
+  unsigned SrcSize = Extract.getValueSizeInBits();
+  if (DestSize % SrcSize != 0)
+    return SDValue();
+
+  // Create a shuffle mask that will combine the extracted element with zeros
+  // and undefs.
+  int ZextRatio =  DestSize / SrcSize;
+  int NumMaskElts = NumBVOps * ZextRatio;
+  SmallVector<int, 32> ShufMask(NumMaskElts, -1);
+  for (int i = 0; i != NumMaskElts; ++i) {
+    if (i / ZextRatio == ZextElt) {
+      // The low bits of the (potentially translated) extracted element map to
+      // the source vector. The high bits map to zero. We will use a zero vector
+      // as the 2nd source operand of the shuffle, so use the 1st element of
+      // that vector (mask value is number-of-elements) for the high bits.
+      if (i % ZextRatio == 0)
+        ShufMask[i] = Extract.getConstantOperandVal(1);
+      else
+        ShufMask[i] = NumMaskElts;
+    }
+
+    // Undef elements of the build vector remain undef because we initialize
+    // the shuffle mask with -1.
+  }
+
+  // Turn this into a shuffle with zero if that's legal.
+  EVT VecVT = Extract.getOperand(0).getValueType();
+  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
+    return SDValue();
+
+  // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
+  // bitcast (shuffle V, ZeroVec, VectorMask)
+  SDLoc DL(BV);
+  SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
+  SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
+                                      ShufMask);
+  return DAG.getBitcast(BV->getValueType(0), Shuf);
+}
+
 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
 // operations. If the types of the vectors we're extracting from allow it,
 // turn this into a vector_shuffle node.
@@ -15687,6 +16274,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
   if (!isTypeLegal(VT))
     return SDValue();
 
+  if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
+    return V;
+
   // May only combine to shuffle after legalize if shuffle is legal.
   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
     return SDValue();
@@ -15950,7 +16540,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // TODO: Maybe this is useful for non-splat too?
   if (!LegalOperations) {
     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
-      Splat = peekThroughBitcast(Splat);
+      Splat = peekThroughBitcasts(Splat);
       EVT SrcVT = Splat.getValueType();
       if (SrcVT.isVector()) {
         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
@@ -16001,9 +16591,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
     return V;
 
-  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
-    return V;
-
   if (SDValue V = reduceBuildVecToShuffle(N))
     return V;
 
@@ -16085,8 +16672,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
   SmallVector<int, 8> Mask;
 
   for (SDValue Op : N->ops()) {
-    // Peek through any bitcast.
-    Op = peekThroughBitcast(Op);
+    Op = peekThroughBitcasts(Op);
 
     // UNDEF nodes convert to UNDEF shuffle mask values.
     if (Op.isUndef()) {
@@ -16103,9 +16689,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
     // We want the EVT of the original extraction to correctly scale the
     // extraction index.
     EVT ExtVT = ExtVec.getValueType();
-
-    // Peek through any bitcast.
-    ExtVec = peekThroughBitcast(ExtVec);
+    ExtVec = peekThroughBitcasts(ExtVec);
 
     // UNDEF nodes convert to UNDEF shuffle mask values.
     if (ExtVec.isUndef()) {
@@ -16169,11 +16753,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     SDValue In = N->getOperand(0);
     assert(In.getValueType().isVector() && "Must concat vectors");
 
-    // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
-    if (In->getOpcode() == ISD::BITCAST &&
-        !In->getOperand(0).getValueType().isVector()) {
-      SDValue Scalar = In->getOperand(0);
+    SDValue Scalar = peekThroughOneUseBitcasts(In);
 
+    // concat_vectors(scalar_to_vector(scalar), undef) ->
+    //     scalar_to_vector(scalar)
+    if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+         Scalar.hasOneUse()) {
+      EVT SVT = Scalar.getValueType().getVectorElementType();
+      if (SVT == Scalar.getOperand(0).getValueType())
+        Scalar = Scalar.getOperand(0);
+    }
+
+    // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
+    if (!Scalar.getValueType().isVector()) {
       // If the bitcast type isn't legal, it might be a trunc of a legal type;
       // look through the trunc so we can still do the transform:
       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
@@ -16182,7 +16774,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
         Scalar = Scalar->getOperand(0);
 
-      EVT SclTy = Scalar->getValueType(0);
+      EVT SclTy = Scalar.getValueType();
 
       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
         return SDValue();
@@ -16310,60 +16902,93 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   return SDValue();
 }
 
-/// If we are extracting a subvector produced by a wide binary operator with at
-/// at least one operand that was the result of a vector concatenation, then try
-/// to use the narrow vector operands directly to avoid the concatenation and
-/// extraction.
+/// If we are extracting a subvector produced by a wide binary operator try
+/// to use a narrow binary operator and/or avoid concatenation and extraction.
 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
   // some of these bailouts with other transforms.
 
   // The extract index must be a constant, so we can map it to a concat operand.
-  auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
-  if (!ExtractIndex)
-    return SDValue();
-
-  // Only handle the case where we are doubling and then halving. A larger ratio
-  // may require more than two narrow binops to replace the wide binop.
-  EVT VT = Extract->getValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-  assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
-         "Extract index is not a multiple of the vector length.");
-  if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
+  auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+  if (!ExtractIndexC)
     return SDValue();
 
   // We are looking for an optionally bitcasted wide vector binary operator
   // feeding an extract subvector.
-  SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
-
-  // TODO: The motivating case for this transform is an x86 AVX1 target. That
-  // target has temptingly almost legal versions of bitwise logic ops in 256-bit
-  // flavors, but no other 256-bit integer support. This could be extended to
-  // handle any binop, but that may require fixing/adding other folds to avoid
-  // codegen regressions.
-  unsigned BOpcode = BinOp.getOpcode();
-  if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+  SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
+  if (!ISD::isBinaryOp(BinOp.getNode()))
     return SDValue();
 
-  // The binop must be a vector type, so we can chop it in half.
+  // The binop must be a vector type, so we can extract some fraction of it.
   EVT WideBVT = BinOp.getValueType();
   if (!WideBVT.isVector())
     return SDValue();
 
+  EVT VT = Extract->getValueType(0);
+  unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+  assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
+         "Extract index is not a multiple of the vector length.");
+
+  // Bail out if this is not a proper multiple width extraction.
+  unsigned WideWidth = WideBVT.getSizeInBits();
+  unsigned NarrowWidth = VT.getSizeInBits();
+  if (WideWidth % NarrowWidth != 0)
+    return SDValue();
+
+  // Bail out if we are extracting a fraction of a single operation. This can
+  // occur because we potentially looked through a bitcast of the binop.
+  unsigned NarrowingRatio = WideWidth / NarrowWidth;
+  unsigned WideNumElts = WideBVT.getVectorNumElements();
+  if (WideNumElts % NarrowingRatio != 0)
+    return SDValue();
+
   // Bail out if the target does not support a narrower version of the binop.
   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
-                                   WideBVT.getVectorNumElements() / 2);
+                                   WideNumElts / NarrowingRatio);
+  unsigned BOpcode = BinOp.getOpcode();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
     return SDValue();
 
-  // Peek through bitcasts of the binary operator operands if needed.
-  SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
-  SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
+  // If extraction is cheap, we don't need to look at the binop operands
+  // for concat ops. The narrow binop alone makes this transform profitable.
+  // We can't just reuse the original extract index operand because we may have
+  // bitcasted.
+  unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
+  unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+  EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+  if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
+      BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
+    // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
+    SDLoc DL(Extract);
+    SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+    SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+                            BinOp.getOperand(0), NewExtIndex);
+    SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+                            BinOp.getOperand(1), NewExtIndex);
+    SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
+                                      BinOp.getNode()->getFlags());
+    return DAG.getBitcast(VT, NarrowBinOp);
+  }
+
+  // Only handle the case where we are doubling and then halving. A larger ratio
+  // may require more than two narrow binops to replace the wide binop.
+  if (NarrowingRatio != 2)
+    return SDValue();
+
+  // TODO: The motivating case for this transform is an x86 AVX1 target. That
+  // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+  // flavors, but no other 256-bit integer support. This could be extended to
+  // handle any binop, but that may require fixing/adding other folds to avoid
+  // codegen regressions.
+  if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+    return SDValue();
 
   // We need at least one concatenation operation of a binop operand to make
   // this transform worthwhile. The concat must double the input vector sizes.
   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
+  SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
+  SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
   bool ConcatL =
       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
   bool ConcatR =
@@ -16372,11 +16997,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
     return SDValue();
 
   // If one of the binop operands was not the result of a concat, we must
-  // extract a half-sized operand for our new narrow binop. We can't just reuse
-  // the original extract index operand because we may have bitcasted.
-  unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
-  unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
-  EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+  // extract a half-sized operand for our new narrow binop.
   SDLoc DL(Extract);
 
   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
@@ -16404,17 +17025,19 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   if (DAG.getDataLayout().isBigEndian())
     return SDValue();
 
-  // TODO: The one-use check is overly conservative. Check the cost of the
-  // extract instead or remove that condition entirely.
   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
-  if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
-      !ExtIdx)
+  if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+    return SDValue();
+
+  // Allow targets to opt-out.
+  EVT VT = Extract->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
     return SDValue();
 
   // The narrow load will be offset from the base address of the old load if
   // we are extracting from something besides index 0 (little-endian).
-  EVT VT = Extract->getValueType(0);
   SDLoc DL(Extract);
   SDValue BaseAddr = Ld->getOperand(1);
   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
@@ -16447,9 +17070,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   //    Vi if possible
   // Only operand 0 is checked as 'concat' assumes all inputs of the same
   // type.
-  if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+  if (V.getOpcode() == ISD::CONCAT_VECTORS &&
       isa<ConstantSDNode>(N->getOperand(1)) &&
-      V->getOperand(0).getValueType() == NVT) {
+      V.getOperand(0).getValueType() == NVT) {
     unsigned Idx = N->getConstantOperandVal(1);
     unsigned NumElems = NVT.getVectorNumElements();
     assert((Idx % NumElems) == 0 &&
@@ -16457,13 +17080,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     return V->getOperand(Idx / NumElems);
   }
 
-  // Skip bitcasting
-  V = peekThroughBitcast(V);
+  V = peekThroughBitcasts(V);
 
   // If the input is a build vector. Try to make a smaller build vector.
-  if (V->getOpcode() == ISD::BUILD_VECTOR) {
+  if (V.getOpcode() == ISD::BUILD_VECTOR) {
     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
-      EVT InVT = V->getValueType(0);
+      EVT InVT = V.getValueType();
       unsigned ExtractSize = NVT.getSizeInBits();
       unsigned EltSize = InVT.getScalarSizeInBits();
       // Only do this if we won't split any elements.
@@ -16496,16 +17118,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     }
   }
 
-  if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+  if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
     // Handle only simple case where vector being inserted and vector
     // being extracted are of same size.
-    EVT SmallVT = V->getOperand(1).getValueType();
+    EVT SmallVT = V.getOperand(1).getValueType();
     if (!NVT.bitsEq(SmallVT))
       return SDValue();
 
     // Only handle cases where both indexes are constants.
-    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
-    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+    auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
 
     if (InsIdx && ExtIdx) {
       // Combine:
@@ -16515,11 +17137,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       //    otherwise => (extract_subvec V1, ExtIdx)
       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
-        return DAG.getBitcast(NVT, V->getOperand(1));
+        return DAG.getBitcast(NVT, V.getOperand(1));
       return DAG.getNode(
           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
-          DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
-          N->getOperand(1));
+          DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+                         N->getOperand(1));
     }
   }
 
@@ -16620,14 +17242,17 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
   SDValue N0 = SVN->getOperand(0);
   SDValue N1 = SVN->getOperand(1);
 
-  if (!N0->hasOneUse() || !N1->hasOneUse())
+  if (!N0->hasOneUse())
     return SDValue();
 
   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
   // discussed above.
   if (!N1.isUndef()) {
-    bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
-    bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+    if (!N1->hasOneUse())
+      return SDValue();
+
+    bool N0AnyConst = isAnyConstantBuildVector(N0);
+    bool N1AnyConst = isAnyConstantBuildVector(N1);
     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
       return SDValue();
     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -16693,8 +17318,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
                                             SelectionDAG &DAG,
                                             const TargetLowering &TLI,
-                                            bool LegalOperations,
-                                            bool LegalTypes) {
+                                            bool LegalOperations) {
   EVT VT = SVN->getValueType(0);
   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
 
@@ -16730,11 +17354,14 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
 
     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
-    if (!LegalTypes || TLI.isTypeLegal(OutVT))
+    // Never create an illegal type. Only create unsupported operations if we
+    // are pre-legalization.
+    if (TLI.isTypeLegal(OutVT))
       if (!LegalOperations ||
           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
         return DAG.getBitcast(VT,
-                            DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+                              DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
+                                          SDLoc(SVN), OutVT, N0));
   }
 
   return SDValue();
@@ -16754,7 +17381,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
   if (!VT.isInteger() || IsBigEndian)
     return SDValue();
 
-  SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
+  SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
 
   unsigned Opcode = N0.getOpcode();
   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
@@ -17039,7 +17666,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     return SDValue(N, 0);
 
   // Match shuffles that can be converted to any_vector_extend_in_reg.
-  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
+  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
     return V;
 
   // Combine "truncate_vector_in_reg" style shuffles.
@@ -17057,7 +17684,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
-  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
       return Res;
 
@@ -17067,15 +17694,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
       N1.isUndef() && Level < AfterLegalizeVectorOps &&
       TLI.isTypeLegal(VT)) {
-
-    // Peek through the bitcast only if there is one user.
-    SDValue BC0 = N0;
-    while (BC0.getOpcode() == ISD::BITCAST) {
-      if (!BC0.hasOneUse())
-        break;
-      BC0 = BC0.getOperand(0);
-    }
-
     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
       if (Scale == 1)
         return SmallVector<int, 8>(Mask.begin(), Mask.end());
@@ -17086,7 +17704,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
       return NewMask;
     };
-
+    
+    SDValue BC0 = peekThroughOneUseBitcasts(N0);
     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
       EVT SVT = VT.getScalarType();
       EVT InnerVT = BC0->getValueType(0);
@@ -17329,12 +17948,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   if (N1.isUndef())
     return N0;
 
-  // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
-  // us to pull BITCASTs from input to output.
-  if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
-    if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
-      return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
-
   // If this is an insert of an extracted vector into an undef vector, we can
   // just use the input to the extract.
   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -17382,6 +17995,14 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
                        N1, N2);
 
+  // Eliminate an intermediate insert into an undef vector:
+  // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
+  // insert_subvector undef, X, N2
+  if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+      N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
+                       N1.getOperand(1), N2);
+
   if (!isa<ConstantSDNode>(N2))
     return SDValue();
 
@@ -17417,6 +18038,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   }
 
+  // Simplify source operands based on insertion.
+  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
@@ -17454,7 +18079,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
 
   EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
-  SDValue RHS = peekThroughBitcast(N->getOperand(1));
+  SDValue RHS = peekThroughBitcasts(N->getOperand(1));
   SDLoc DL(N);
 
   // Make sure we're not running after operation legalization where it
@@ -17684,31 +18309,64 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                       LLD->getBasePtr().getValueType()))
       return false;
 
+    // The loads must not depend on one another.
+    if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
+      return false;
+
     // Check that the select condition doesn't reach either load.  If so,
     // folding this will induce a cycle into the DAG.  If not, this is safe to
     // xform, so create a select of the addresses.
+
+    SmallPtrSet<const SDNode *, 32> Visited;
+    SmallVector<const SDNode *, 16> Worklist;
+
+    // Always fail if LLD and RLD are not independent. TheSelect is a
+    // predecessor to all Nodes in question so we need not search past it.
+
+    Visited.insert(TheSelect);
+    Worklist.push_back(LLD);
+    Worklist.push_back(RLD);
+
+    if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
+        SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
+      return false;
+
     SDValue Addr;
     if (TheSelect->getOpcode() == ISD::SELECT) {
+      // We cannot do this optimization if any pair of {RLD, LLD} is a
+      // predecessor to {RLD, LLD, CondNode}. As we've already compared the
+      // Loads, we only need to check if CondNode is a successor to one of the
+      // loads. We can further avoid this if there's no use of their chain
+      // value.
       SDNode *CondNode = TheSelect->getOperand(0).getNode();
-      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
-          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
-        return false;
-      // The loads must not depend on one another.
-      if (LLD->isPredecessorOf(RLD) ||
-          RLD->isPredecessorOf(LLD))
+      Worklist.push_back(CondNode);
+
+      if ((LLD->hasAnyUseOfValue(1) &&
+           SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
+          (RLD->hasAnyUseOfValue(1) &&
+           SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
         return false;
+
       Addr = DAG.getSelect(SDLoc(TheSelect),
                            LLD->getBasePtr().getValueType(),
                            TheSelect->getOperand(0), LLD->getBasePtr(),
                            RLD->getBasePtr());
     } else {  // Otherwise SELECT_CC
+      // We cannot do this optimization if any pair of {RLD, LLD} is a
+      // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
+      // the Loads, we only need to check if CondLHS/CondRHS is a successor to
+      // one of the loads. We can further avoid this if there's no use of their
+      // chain value.
+
       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+      Worklist.push_back(CondLHS);
+      Worklist.push_back(CondRHS);
 
       if ((LLD->hasAnyUseOfValue(1) &&
-           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+           SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
           (RLD->hasAnyUseOfValue(1) &&
-           (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
+           SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
         return false;
 
       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
@@ -17823,6 +18481,63 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
 }
 
+/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+/// in it. This may be a win when the constant is not otherwise available
+/// because it replaces two constant pool loads with one.
+SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
+    const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+    ISD::CondCode CC) {
+  if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+    return SDValue();
+
+  // If we are before legalize types, we want the other legalization to happen
+  // first (for example, to avoid messing with soft float).
+  auto *TV = dyn_cast<ConstantFPSDNode>(N2);
+  auto *FV = dyn_cast<ConstantFPSDNode>(N3);
+  EVT VT = N2.getValueType();
+  if (!TV || !FV || !TLI.isTypeLegal(VT))
+    return SDValue();
+
+  // If a constant can be materialized without loads, this does not make sense.
+  if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
+      TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
+      TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+    return SDValue();
+
+  // If both constants have multiple uses, then we won't need to do an extra
+  // load. The values are likely around in registers for other users.
+  if (!TV->hasOneUse() && !FV->hasOneUse())
+    return SDValue();
+
+  Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
+                       const_cast<ConstantFP*>(TV->getConstantFPValue()) };
+  Type *FPTy = Elts[0]->getType();
+  const DataLayout &TD = DAG.getDataLayout();
+
+  // Create a ConstantArray of the two constants.
+  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+  SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+                                      TD.getPrefTypeAlignment(FPTy));
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+  // Get offsets to the 0 and 1 elements of the array, so we can select between
+  // them.
+  SDValue Zero = DAG.getIntPtrConstant(0, DL);
+  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+  SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
+  SDValue Cond =
+      DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
+  AddToWorklist(Cond.getNode());
+  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
+  AddToWorklist(CstOffset.getNode());
+  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
+  AddToWorklist(CPIdx.getNode());
+  return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+                     MachinePointerInfo::getConstantPool(
+                         DAG.getMachineFunction()), Alignment);
+}
+
 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
 /// where 'cond' is the comparison specified by CC.
 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -17831,75 +18546,26 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   // (x ? y : y) -> y.
   if (N2 == N3) return N2;
 
+  EVT CmpOpVT = N0.getValueType();
   EVT VT = N2.getValueType();
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
 
-  // Determine if the condition we're dealing with is constant
-  SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
-                              N0, N1, CC, DL, false);
+  // Determine if the condition we're dealing with is constant.
+  SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
+                              false);
   if (SCC.getNode()) AddToWorklist(SCC.getNode());
 
-  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+  if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
     // fold select_cc true, x, y -> x
     // fold select_cc false, x, y -> y
     return !SCCC->isNullValue() ? N2 : N3;
   }
 
-  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
-  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
-  // in it.  This is a win when the constant is not otherwise available because
-  // it replaces two constant pool loads with one.  We only do this if the FP
-  // type is known to be legal, because if it isn't, then we are before legalize
-  // types an we want the other legalization to happen first (e.g. to avoid
-  // messing with soft float) and if the ConstantFP is not legal, because if
-  // it is legal, we may not need to store the FP constant in a constant pool.
-  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
-    if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
-      if (TLI.isTypeLegal(N2.getValueType()) &&
-          (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
-               TargetLowering::Legal &&
-           !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
-           !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
-          // If both constants have multiple uses, then we won't need to do an
-          // extra load, they are likely around in registers for other users.
-          (TV->hasOneUse() || FV->hasOneUse())) {
-        Constant *Elts[] = {
-          const_cast<ConstantFP*>(FV->getConstantFPValue()),
-          const_cast<ConstantFP*>(TV->getConstantFPValue())
-        };
-        Type *FPTy = Elts[0]->getType();
-        const DataLayout &TD = DAG.getDataLayout();
-
-        // Create a ConstantArray of the two constants.
-        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
-        SDValue CPIdx =
-            DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
-                                TD.getPrefTypeAlignment(FPTy));
-        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-
-        // Get the offsets to the 0 and 1 element of the array so that we can
-        // select between them.
-        SDValue Zero = DAG.getIntPtrConstant(0, DL);
-        unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
-        SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
-
-        SDValue Cond = DAG.getSetCC(DL,
-                                    getSetCCResultType(N0.getValueType()),
-                                    N0, N1, CC);
-        AddToWorklist(Cond.getNode());
-        SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
-                                          Cond, One, Zero);
-        AddToWorklist(CstOffset.getNode());
-        CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
-                            CstOffset);
-        AddToWorklist(CPIdx.getNode());
-        return DAG.getLoad(
-            TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
-            MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
-            Alignment);
-      }
-    }
+  if (SDValue V =
+          convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
+    return V;
 
   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
     return V;
@@ -17913,7 +18579,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
     SDValue AndLHS = N0->getOperand(0);
-    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
       // Shift the tested bit over the sign bit.
       const APInt &AndMask = ConstAndRHS->getAPIntValue();
@@ -17934,48 +18600,48 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   }
 
   // fold select C, 16, 0 -> shl C, 4
-  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
-      TLI.getBooleanContents(N0.getValueType()) ==
-          TargetLowering::ZeroOrOneBooleanContent) {
+  bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
+  bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
+
+  if ((Fold || Swap) &&
+      TLI.getBooleanContents(CmpOpVT) ==
+          TargetLowering::ZeroOrOneBooleanContent &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
+
+    if (Swap) {
+      CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+      std::swap(N2C, N3C);
+    }
 
     // If the caller doesn't want us to simplify this into a zext of a compare,
     // don't do it.
     if (NotExtCompare && N2C->isOne())
       return SDValue();
 
-    // Get a SetCC of the condition
-    // NOTE: Don't create a SETCC if it's not legal on this target.
-    if (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
-      SDValue Temp, SCC;
-      // cast from setcc result type to select result type
-      if (LegalTypes) {
-        SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
-                            N0, N1, CC);
-        if (N2.getValueType().bitsLT(SCC.getValueType()))
-          Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
-                                        N2.getValueType());
-        else
-          Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
-                             N2.getValueType(), SCC);
-      } else {
-        SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
-        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
-                           N2.getValueType(), SCC);
-      }
+    SDValue Temp, SCC;
+    // zext (setcc n0, n1)
+    if (LegalTypes) {
+      SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+      if (VT.bitsLT(SCC.getValueType()))
+        Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+    } else {
+      SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+    }
 
-      AddToWorklist(SCC.getNode());
-      AddToWorklist(Temp.getNode());
+    AddToWorklist(SCC.getNode());
+    AddToWorklist(Temp.getNode());
 
-      if (N2C->isOne())
-        return Temp;
+    if (N2C->isOne())
+      return Temp;
 
-      // shl setcc result by log2 n2c
-      return DAG.getNode(
-          ISD::SHL, DL, N2.getValueType(), Temp,
-          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
-                          getShiftAmountTy(Temp.getValueType())));
-    }
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                       SDLoc(Temp),
+                                       getShiftAmountTy(Temp.getValueType())));
   }
 
   // Check to see if this is an integer abs.
@@ -17995,18 +18661,16 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
 
-    EVT XType = N0.getValueType();
-    if (SubC && SubC->isNullValue() && XType.isInteger()) {
+    if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
       SDLoc DL(N0);
-      SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
-                                  N0,
-                                  DAG.getConstant(XType.getSizeInBits() - 1, DL,
-                                         getShiftAmountTy(N0.getValueType())));
-      SDValue Add = DAG.getNode(ISD::ADD, DL,
-                                XType, N0, Shift);
+      SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
+                                  DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
+                                                  DL,
+                                                  getShiftAmountTy(CmpOpVT)));
+      SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
-      return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+      return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
     }
   }
 
@@ -18067,21 +18731,14 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   if (DAG.getMachineFunction().getFunction().optForMinSize())
     return SDValue();
 
-  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
-  if (!C)
-    return SDValue();
-
-  // Avoid division by zero.
-  if (C->isNullValue())
-    return SDValue();
-
   SmallVector<SDNode *, 8> Built;
-  SDValue S =
-      TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
+  if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
+    for (SDNode *N : Built)
+      AddToWorklist(N);
+    return S;
+  }
 
-  for (SDNode *N : Built)
-    AddToWorklist(N);
-  return S;
+  return SDValue();
 }
 
 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
@@ -18096,11 +18753,13 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
     return SDValue();
 
   SmallVector<SDNode *, 8> Built;
-  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
+  if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
+    for (SDNode *N : Built)
+      AddToWorklist(N);
+    return S;
+  }
 
-  for (SDNode *N : Built)
-    AddToWorklist(N);
-  return S;
+  return SDValue();
 }
 
 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
@@ -18113,21 +18772,14 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   if (DAG.getMachineFunction().getFunction().optForMinSize())
     return SDValue();
 
-  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
-  if (!C)
-    return SDValue();
-
-  // Avoid division by zero.
-  if (C->isNullValue())
-    return SDValue();
-
   SmallVector<SDNode *, 8> Built;
-  SDValue S =
-      TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
+  if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
+    for (SDNode *N : Built)
+      AddToWorklist(N);
+    return S;
+  }
 
-  for (SDNode *N : Built)
-    AddToWorklist(N);
-  return S;
+  return SDValue();
 }
 
 /// Determines the LogBase2 value for a non-null input value using the
@@ -18583,6 +19235,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
 }
 
+// TODO: Replace with with std::monostate when we move to C++17.
+struct UnitT { } Unit;
+bool operator==(const UnitT &, const UnitT &) { return true; }
+bool operator!=(const UnitT &, const UnitT &) { return false; }
+
 // This function tries to collect a bunch of potentially interesting
 // nodes to improve the chains of, all at once. This might seem
 // redundant, as this function gets called when visiting every store
@@ -18595,13 +19252,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
 // the nodes that will eventually be candidates, and then not be able
 // to go from a partially-merged state to the desired final
 // fully-merged state.
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
-  if (OptLevel == CodeGenOpt::None)
-    return false;
+
+bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
+  SmallVector<StoreSDNode *, 8> ChainedStores;
+  StoreSDNode *STChain = St;
+  // Intervals records which offsets from BaseIndex have been covered. In
+  // the common case, every store writes to the immediately previous address
+  // space and thus merged with the previous interval at insertion time.
+
+  using IMap =
+      llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
+  IMap::Allocator A;
+  IMap Intervals(A);
 
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
-  BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
+  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
 
   // We must have a base and an offset.
   if (!BasePtr.getBase().getNode())
@@ -18611,76 +19277,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
   if (BasePtr.getBase().isUndef())
     return false;
 
-  SmallVector<StoreSDNode *, 8> ChainedStores;
-  ChainedStores.push_back(St);
+  // Add ST's interval.
+  Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
 
-  // Walk up the chain and look for nodes with offsets from the same
-  // base pointer. Stop when reaching an instruction with a different kind
-  // or instruction which has a different base pointer.
-  StoreSDNode *Index = St;
-  while (Index) {
+  while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
     // If the chain has more than one use, then we can't reorder the mem ops.
-    if (Index != St && !SDValue(Index, 0)->hasOneUse())
+    if (!SDValue(Chain, 0)->hasOneUse())
       break;
-
-    if (Index->isVolatile() || Index->isIndexed())
+    if (Chain->isVolatile() || Chain->isIndexed())
       break;
 
     // Find the base pointer and offset for this memory node.
-    BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
-
+    const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
     // Check that the base pointer is the same as the original one.
-    if (!BasePtr.equalBaseIndex(Ptr, DAG))
+    int64_t Offset;
+    if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
+      break;
+    int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
+    // Make sure we don't overlap with other intervals by checking the ones to
+    // the left or right before inserting.
+    auto I = Intervals.find(Offset);
+    // If there's a next interval, we should end before it.
+    if (I != Intervals.end() && I.start() < (Offset + Length))
+      break;
+    // If there's a previous interval, we should start after it.
+    if (I != Intervals.begin() && (--I).stop() <= Offset)
       break;
+    Intervals.insert(Offset, Offset + Length, Unit);
 
-    // Walk up the chain to find the next store node, ignoring any
-    // intermediate loads. Any other kind of node will halt the loop.
-    SDNode *NextInChain = Index->getChain().getNode();
-    while (true) {
-      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
-        // We found a store node. Use it for the next iteration.
-        if (STn->isVolatile() || STn->isIndexed()) {
-          Index = nullptr;
-          break;
-        }
-        ChainedStores.push_back(STn);
-        Index = STn;
-        break;
-      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
-        NextInChain = Ldn->getChain().getNode();
-        continue;
-      } else {
-        Index = nullptr;
-        break;
-      }
-    }// end while
+    ChainedStores.push_back(Chain);
+    STChain = Chain;
   }
 
-  // At this point, ChainedStores lists all of the Store nodes
-  // reachable by iterating up through chain nodes matching the above
-  // conditions.  For each such store identified, try to find an
-  // earlier chain to attach the store to which won't violate the
-  // required ordering.
-  bool MadeChangeToSt = false;
-  SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+  // If we didn't find a chained store, exit.
+  if (ChainedStores.size() == 0)
+    return false;
+
+  // Improve all chained stores (St and ChainedStores members) starting from
+  // where the store chain ended and return single TokenFactor.
+  SDValue NewChain = STChain->getChain();
+  SmallVector<SDValue, 8> TFOps;
+  for (unsigned I = ChainedStores.size(); I;) {
+    StoreSDNode *S = ChainedStores[--I];
+    SDValue BetterChain = FindBetterChain(S, NewChain);
+    S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
+        S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
+    TFOps.push_back(SDValue(S, 0));
+    ChainedStores[I] = S;
+  }
+
+  // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
+  SDValue BetterChain = FindBetterChain(St, NewChain);
+  SDValue NewST;
+  if (St->isTruncatingStore())
+    NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
+                              St->getBasePtr(), St->getMemoryVT(),
+                              St->getMemOperand());
+  else
+    NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
+                         St->getBasePtr(), St->getMemOperand());
 
-  for (StoreSDNode *ChainedStore : ChainedStores) {
-    SDValue Chain = ChainedStore->getChain();
-    SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+  TFOps.push_back(NewST);
 
-    if (Chain != BetterChain) {
-      if (ChainedStore == St)
-        MadeChangeToSt = true;
-      BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
-    }
-  }
+  // If we improved every element of TFOps, then we've lost the dependence on
+  // NewChain to successors of St and we need to add it back to TFOps. Do so at
+  // the beginning to keep relative order consistent with FindBetterChains.
+  auto hasImprovedChain = [&](SDValue ST) -> bool {
+    return ST->getOperand(0) != NewChain;
+  };
+  bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
+  if (AddNewChain)
+    TFOps.insert(TFOps.begin(), NewChain);
+
+  SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+  CombineTo(St, TF);
+
+  AddToWorklist(STChain);
+  // Add TF operands worklist in reverse order.
+  for (auto I = TF->getNumOperands(); I;)
+    AddToWorklist(TF->getOperand(--I).getNode());
+  AddToWorklist(TF.getNode());
+  return true;
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
 
-  // Do all replacements after finding the replacements to make to avoid making
-  // the chains more complicated by introducing new TokenFactors.
-  for (auto Replacement : BetterChains)
-    replaceStoreChain(Replacement.first, Replacement.second);
+  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
 
-  return MadeChangeToSt;
+  // We must have a base and an offset.
+  if (!BasePtr.getBase().getNode())
+    return false;
+
+  // Do not handle stores to undef base pointers.
+  if (BasePtr.getBase().isUndef())
+    return false;
+
+  // Directly improve a chain of disjoint stores starting at St.
+  if (parallelizeChainedStores(St))
+    return true;
+
+  // Improve St's Chain..
+  SDValue BetterChain = FindBetterChain(St, St->getChain());
+  if (St->getChain() != BetterChain) {
+    replaceStoreChain(St, BetterChain);
+    return true;
+  }
+  return false;
 }
 
 /// This is the entry point for the file.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 795ade588b8f..a9a3c44ea0c9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -89,6 +89,7 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -110,6 +111,7 @@
 #include <utility>
 
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "isel"
 
@@ -545,6 +547,15 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
   assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
          "Invalid iterator!");
   while (I != E) {
+    if (LastFlushPoint == I)
+      LastFlushPoint = E;
+    if (SavedInsertPt == I)
+      SavedInsertPt = E;
+    if (EmitStartPt == I)
+      EmitStartPt = E.isValid() ? &*E : nullptr;
+    if (LastLocalValue == I)
+      LastLocalValue = E.isValid() ? &*E : nullptr;
+
     MachineInstr *Dead = &*I;
     ++I;
     Dead->eraseFromParent();
@@ -1426,6 +1437,18 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     }
     return true;
   }
+  case Intrinsic::dbg_label: {
+    const DbgLabelInst *DI = cast<DbgLabelInst>(II);
+    assert(DI->getLabel() && "Missing label");
+    if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+      return true;
+    }
+
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
+    return true;
+  }
   case Intrinsic::objectsize: {
     ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
     unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -1436,6 +1459,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, ResultReg);
     return true;
   }
+  case Intrinsic::is_constant: {
+    Constant *ResCI = ConstantInt::get(II->getType(), 0);
+    unsigned ResultReg = getRegForValue(ResCI);
+    if (!ResultReg)
+      return false;
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::launder_invariant_group:
   case Intrinsic::strip_invariant_group:
   case Intrinsic::expect: {
@@ -1565,7 +1596,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
   MachineInstr *SavedLastLocalValue = getLastLocalValue();
   // Just before the terminator instruction, insert instructions to
   // feed PHI nodes in successor blocks.
-  if (isa<TerminatorInst>(I)) {
+  if (I->isTerminator()) {
     if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
       // PHI node handling may have generated local value instructions,
       // even though it failed to handle all PHI nodes.
@@ -1629,7 +1660,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
 
   DbgLoc = DebugLoc();
   // Undo phi node updates, because they will be added again by SelectionDAG.
-  if (isa<TerminatorInst>(I)) {
+  if (I->isTerminator()) {
     // PHI node handling may have generated local value instructions.
     // We remove them because SelectionDAGISel will generate them again.
     removeDeadLocalValueCode(SavedLastLocalValue);
@@ -1680,7 +1711,10 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
 
 /// Emit an FNeg operation.
 bool FastISel::selectFNeg(const User *I) {
-  unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+  Value *X;
+  if (!match(I, m_FNeg(m_Value(X))))
+    return false;
+  unsigned OpReg = getRegForValue(X);
   if (!OpReg)
     return false;
   bool OpRegIsKill = hasTrivialKill(I);
@@ -1770,11 +1804,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
     return selectBinaryOp(I, ISD::FADD);
   case Instruction::Sub:
     return selectBinaryOp(I, ISD::SUB);
-  case Instruction::FSub:
+  case Instruction::FSub: 
     // FNeg is currently represented in LLVM IR as a special case of FSub.
-    if (BinaryOperator::isFNeg(I))
-      return selectFNeg(I);
-    return selectBinaryOp(I, ISD::FSUB);
+    return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB);
   case Instruction::Mul:
     return selectBinaryOp(I, ISD::MUL);
   case Instruction::FMul:
@@ -2211,7 +2243,7 @@ unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
 /// might result in multiple MBB's for one BB.  As such, the start of the
 /// BB might correspond to a different MBB than the end.
 bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
-  const TerminatorInst *TI = LLVMBB->getTerminator();
+  const Instruction *TI = LLVMBB->getTerminator();
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
   FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d3c31911d677..fba728625b07 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -579,9 +579,18 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
 const Value *
 FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
   if (VirtReg2Value.empty()) {
+    SmallVector<EVT, 4> ValueVTs;
     for (auto &P : ValueMap) {
-      VirtReg2Value[P.second] = P.first;
+      ValueVTs.clear();
+      ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+                      P.first->getType(), ValueVTs);
+      unsigned Reg = P.second;
+      for (EVT VT : ValueVTs) {
+        unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+          VirtReg2Value[Reg++] = P.first;
+      }
     }
   }
-  return VirtReg2Value[Vreg];
+  return VirtReg2Value.lookup(Vreg);
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index d6171f3177d7..6a6114677cc2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -524,7 +524,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       Reg = R->getReg();
       DefMI = nullptr;
     } else {
-      Reg = getVR(Node->getOperand(0), VRBaseMap);
+      Reg = R ? R->getReg() : getVR(Node->getOperand(0), VRBaseMap);
       DefMI = MRI->getVRegDef(Reg);
     }
 
@@ -652,6 +652,12 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
   MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
   unsigned NumOps = Node->getNumOperands();
+  // If the input pattern has a chain, then the root of the corresponding
+  // output pattern will get a chain as well. This can happen to be a
+  // REG_SEQUENCE (which is not "guarded" by countOperands/CountResults).
+  if (NumOps && Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+    --NumOps; // Ignore chain if it exists.
+
   assert((NumOps & 1) == 1 &&
          "REG_SEQUENCE must have an odd number of operands!");
   for (unsigned i = 1; i != NumOps; ++i) {
@@ -694,6 +700,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
 
+  SD->setIsEmitted();
+
+  if (SD->isInvalidated()) {
+    // An invalidated SDNode must generate an undef DBG_VALUE: although the
+    // original value is no longer computed, earlier DBG_VALUEs live ranges
+    // must not leak into later code.
+    auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+    MIB.addReg(0U);
+    MIB.addReg(0U, RegState::Debug);
+    MIB.addMetadata(Var);
+    MIB.addMetadata(Expr);
+    return &*MIB;
+  }
+
   if (SD->getKind() == SDDbgValue::FRAMEIX) {
     // Stack address; this needs to be lowered in target-dependent fashion.
     // EmitTargetCodeForFrameDebugValue is responsible for allocation.
@@ -735,6 +755,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
         MIB.addImm(CI->getSExtValue());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       MIB.addFPImm(CF);
+    } else if (isa<ConstantPointerNull>(V)) {
+      // Note: This assumes that all nullptr constants are zero-valued.
+      MIB.addImm(0);
     } else {
       // Could be an Undef.  In any case insert an Undef so we can see what we
       // dropped.
@@ -868,6 +891,15 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 
     if (Flags.hasAllowReassociation())
       MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+
+    if (Flags.hasNoUnsignedWrap())
+      MI->setFlag(MachineInstr::MIFlag::NoUWrap);
+
+    if (Flags.hasNoSignedWrap())
+      MI->setFlag(MachineInstr::MIFlag::NoSWrap);
+
+    if (Flags.hasExact())
+      MI->setFlag(MachineInstr::MIFlag::IsExact);
   }
 
   // Emit all of the actual operands of this instruction, adding them to the
@@ -886,9 +918,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
       MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
                                  RegState::EarlyClobber);
 
-  // Transfer all of the memory reference descriptions of this instruction.
-  MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
-                 cast<MachineSDNode>(Node)->memoperands_end());
+  // Set the memory reference descriptions of this instruction now that it is
+  // part of the function.
+  MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands());
 
   // Insert the instruction into position in the block. This needs to
   // happen before any custom inserter hook is called so that the
@@ -950,7 +982,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   }
 
   // Finally mark unused registers as dead.
-  if (!UsedRegs.empty() || II.getImplicitDefs())
+  if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
     MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
   // Run post-isel target hook to adjust this instruction if needed.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 36c436918916..d3aea37f944d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -176,7 +176,6 @@ private:
 
   SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
   SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
-  SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
   SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -239,7 +238,7 @@ public:
 } // end anonymous namespace
 
 /// Return a vector shuffle operation which
-/// performs the same shuffe in terms of order or result bytes, but on a type
+/// performs the same shuffle in terms of order or result bytes, but on a type
 /// whose vector element type is narrower than the original shuffle type.
 /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
 SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
@@ -1060,6 +1059,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
   case ISD::ADDROFRETURNADDR:
+  case ISD::SPONENTRY:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be custom-lowered.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1094,6 +1094,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::STRICT_FSUB:
   case ISD::STRICT_FMUL:
   case ISD::STRICT_FDIV:
+  case ISD::STRICT_FREM:
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FMA:
   case ISD::STRICT_FPOW:
@@ -1107,6 +1108,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::STRICT_FLOG2:
   case ISD::STRICT_FRINT:
   case ISD::STRICT_FNEARBYINT:
+  case ISD::STRICT_FMAXNUM:
+  case ISD::STRICT_FMINNUM:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FROUND:
+  case ISD::STRICT_FTRUNC:
     // These pseudo-ops get legalized as if they were their non-strict
     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
     // is also legal, but if ISD::FSQRT requires expansion then so does
@@ -1114,6 +1121,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
                                             Node->getValueType(0));
     break;
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT: {
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    break;
+  }
+  case ISD::SMULFIX: {
+    unsigned Scale = Node->getConstantOperandVal(2);
+    Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+                                              Node->getValueType(0), Scale);
+    break;
+  }
+  case ISD::MSCATTER:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                    cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
+    break;
+  case ISD::MSTORE:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                    cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
+    break;
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -1148,6 +1176,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
       }
     }
     break;
+    case ISD::FSHL:
+    case ISD::FSHR:
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
     case ISD::SHL_PARTS: {
@@ -1247,6 +1277,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   // Caches for hasPredecessorHelper
   SmallPtrSet<const SDNode *, 32> Visited;
   SmallVector<const SDNode *, 16> Worklist;
+  Visited.insert(Op.getNode());
   Worklist.push_back(Idx.getNode());
   SDValue StackPtr, Ch;
   for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
@@ -2299,9 +2330,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
 SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
                                                    EVT DestVT,
                                                    const SDLoc &dl) {
+  EVT SrcVT = Op0.getValueType();
+
   // TODO: Should any fast-math-flags be set for the created nodes?
   LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
-  if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+  if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
     LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
                          "expansion\n");
 
@@ -2346,116 +2379,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
     // subtract the bias
     SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
     // final result
-    SDValue Result;
-    // handle final rounding
-    if (DestVT == MVT::f64) {
-      // do nothing
-      Result = Sub;
-    } else if (DestVT.bitsLT(MVT::f64)) {
-      Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
-                           DAG.getIntPtrConstant(0, dl));
-    } else if (DestVT.bitsGT(MVT::f64)) {
-      Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
-    }
+    SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
     return Result;
   }
   assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
   // Code below here assumes !isSigned without checking again.
 
-  // Implementation of unsigned i64 to f64 following the algorithm in
-  // __floatundidf in compiler_rt. This implementation has the advantage
-  // of performing rounding correctly, both in the default rounding mode
-  // and in all alternate rounding modes.
-  // TODO: Generalize this for use with other types.
-  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
-    LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
-    SDValue TwoP52 =
-      DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
-    SDValue TwoP84PlusTwoP52 =
-      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl,
-                        MVT::f64);
-    SDValue TwoP84 =
-      DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64);
-
-    SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
-    SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
-                             DAG.getConstant(32, dl, MVT::i64));
-    SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
-    SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
-    SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
-    SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
-    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
-                                TwoP84PlusTwoP52);
-    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
-  }
-
-  // TODO: Generalize this for use with other types.
-  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
-    LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
-    // For unsigned conversions, convert them to signed conversions using the
-    // algorithm from the x86_64 __floatundidf in compiler_rt.
-    if (!isSigned) {
-      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
-
-      SDValue ShiftConst = DAG.getConstant(
-          1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout()));
-      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
-      SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
-      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
-      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
-
-      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
-      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
-
-      // TODO: This really should be implemented using a branch rather than a
-      // select.  We happen to get lucky and machinesink does the right
-      // thing most of the time.  This would be a good candidate for a
-      //pseudo-op, or, even better, for whole-function isel.
-      SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
-        Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
-      return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
-    }
-
-    // Otherwise, implement the fully general conversion.
-
-    SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
-         DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64));
-    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
-         DAG.getConstant(UINT64_C(0x800), dl, MVT::i64));
-    SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
-         DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64));
-    SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2,
-                              DAG.getConstant(UINT64_C(0), dl, MVT::i64),
-                              ISD::SETNE);
-    SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
-    SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0,
-                              DAG.getConstant(UINT64_C(0x0020000000000000), dl,
-                                              MVT::i64),
-                              ISD::SETUGE);
-    SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
-    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout());
-
-    SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
-                             DAG.getConstant(32, dl, SHVT));
-    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
-    SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
-    SDValue TwoP32 =
-      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl,
-                        MVT::f64);
-    SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
-    SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
-    SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
-    SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
-    return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
-                       DAG.getIntPtrConstant(0, dl));
-  }
-
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
-  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()),
-                                 Op0,
-                                 DAG.getConstant(0, dl, Op0.getValueType()),
-                                 ISD::SETLT);
+  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
+                                 DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
   SDValue Zero = DAG.getIntPtrConstant(0, dl),
           Four = DAG.getIntPtrConstant(4, dl);
   SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
@@ -2465,7 +2398,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
   // as a negative number.  To counteract this, the dynamic code adds an
   // offset depending on the data type.
   uint64_t FF;
-  switch (Op0.getSimpleValueType().SimpleTy) {
+  switch (SrcVT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
@@ -2614,22 +2547,22 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
     // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT));
-    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
 
     // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT));
-    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
 
     // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
-    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT));
-    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
     return Tmp;
   }
@@ -2705,126 +2638,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
   }
 }
 
-/// Expand the specified bitcount instruction into operations.
-SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
-                                             const SDLoc &dl) {
-  switch (Opc) {
-  default: llvm_unreachable("Cannot expand this yet!");
-  case ISD::CTPOP: {
-    EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    unsigned Len = VT.getSizeInBits();
-
-    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
-           "CTPOP not implemented for this type.");
-
-    // This is the "best" algorithm from
-    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-
-    SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)),
-                                     dl, VT);
-    SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)),
-                                     dl, VT);
-    SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)),
-                                     dl, VT);
-    SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)),
-                                     dl, VT);
-
-    // v = v - ((v >> 1) & 0x55555555...)
-    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
-                     DAG.getNode(ISD::AND, dl, VT,
-                                 DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(1, dl, ShVT)),
-                                 Mask55));
-    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
-    Op = DAG.getNode(ISD::ADD, dl, VT,
-                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
-                     DAG.getNode(ISD::AND, dl, VT,
-                                 DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(2, dl, ShVT)),
-                                 Mask33));
-    // v = (v + (v >> 4)) & 0x0F0F0F0F...
-    Op = DAG.getNode(ISD::AND, dl, VT,
-                     DAG.getNode(ISD::ADD, dl, VT, Op,
-                                 DAG.getNode(ISD::SRL, dl, VT, Op,
-                                             DAG.getConstant(4, dl, ShVT))),
-                     Mask0F);
-    // v = (v * 0x01010101...) >> (Len - 8)
-    Op = DAG.getNode(ISD::SRL, dl, VT,
-                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
-                     DAG.getConstant(Len - 8, dl, ShVT));
-
-    return Op;
-  }
-  case ISD::CTLZ_ZERO_UNDEF:
-    // This trivially expands to CTLZ.
-    return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
-  case ISD::CTLZ: {
-    EVT VT = Op.getValueType();
-    unsigned Len = VT.getSizeInBits();
-
-    if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
-      EVT SetCCVT = getSetCCResultType(VT);
-      SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
-      SDValue Zero = DAG.getConstant(0, dl, VT);
-      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
-      return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
-                         DAG.getConstant(Len, dl, VT), CTLZ);
-    }
-
-    // for now, we do this:
-    // x = x | (x >> 1);
-    // x = x | (x >> 2);
-    // ...
-    // x = x | (x >>16);
-    // x = x | (x >>32); // for 64-bit input
-    // return popcount(~x);
-    //
-    // Ref: "Hacker's Delight" by Henry Warren
-    EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
-      SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
-      Op = DAG.getNode(ISD::OR, dl, VT, Op,
-                       DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
-    }
-    Op = DAG.getNOT(dl, Op, VT);
-    return DAG.getNode(ISD::CTPOP, dl, VT, Op);
-  }
-  case ISD::CTTZ_ZERO_UNDEF:
-    // This trivially expands to CTTZ.
-    return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
-  case ISD::CTTZ: {
-    EVT VT = Op.getValueType();
-    unsigned Len = VT.getSizeInBits();
-
-    if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
-      EVT SetCCVT = getSetCCResultType(VT);
-      SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
-      SDValue Zero = DAG.getConstant(0, dl, VT);
-      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
-      return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
-                         DAG.getConstant(Len, dl, VT), CTTZ);
-    }
-
-    // for now, we use: { return popcount(~x & (x - 1)); }
-    // unless the target has ctlz but not ctpop, in which case we use:
-    // { return 32 - nlz(~x & (x-1)); }
-    // Ref: "Hacker's Delight" by Henry Warren
-    SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
-                               DAG.getNOT(dl, Op, VT),
-                               DAG.getNode(ISD::SUB, dl, VT, Op,
-                                           DAG.getConstant(1, dl, VT)));
-    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
-    if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
-        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
-      return DAG.getNode(ISD::SUB, dl, VT,
-                         DAG.getConstant(VT.getSizeInBits(), dl, VT),
-                         DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
-    return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
-  }
-  }
-}
-
 bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   LLVM_DEBUG(dbgs() << "Trying to expand node\n");
   SmallVector<SDValue, 8> Results;
@@ -2832,13 +2645,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
   bool NeedInvert;
   switch (Node->getOpcode()) {
+  case ISD::ABS:
+    if (TLI.expandABS(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
+    break;
   case ISD::CTPOP:
+    if (TLI.expandCTPOP(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
+    break;
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:
+    if (TLI.expandCTLZ(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
+    break;
   case ISD::CTTZ:
   case ISD::CTTZ_ZERO_UNDEF:
-    Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
-    Results.push_back(Tmp1);
+    if (TLI.expandCTTZ(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
     break;
   case ISD::BITREVERSE:
     Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
@@ -3033,8 +2856,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   }
-  case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
+    if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+      Results.push_back(Tmp1);
+      break;
+    }
+    LLVM_FALLTHROUGH;
+  case ISD::SINT_TO_FP:
     Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
                                 Node->getOperand(0), Node->getValueType(0), dl);
     Results.push_back(Tmp1);
@@ -3043,29 +2871,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
       Results.push_back(Tmp1);
     break;
-  case ISD::FP_TO_UINT: {
-    SDValue True, False;
-    EVT VT =  Node->getOperand(0).getValueType();
-    EVT NVT = Node->getValueType(0);
-    APFloat apf(DAG.EVTToAPFloatSemantics(VT),
-                APInt::getNullValue(VT.getSizeInBits()));
-    APInt x = APInt::getSignMask(NVT.getSizeInBits());
-    (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
-    Tmp1 = DAG.getConstantFP(apf, dl, VT);
-    Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
-                        Node->getOperand(0),
-                        Tmp1, ISD::SETLT);
-    True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
-    // TODO: Should any fast-math-flags be set for the FSUB?
-    False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
-                        DAG.getNode(ISD::FSUB, dl, VT,
-                                    Node->getOperand(0), Tmp1));
-    False = DAG.getNode(ISD::XOR, dl, NVT, False,
-                        DAG.getConstant(x, dl, NVT));
-    Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
-    Results.push_back(Tmp1);
+  case ISD::FP_TO_UINT:
+    if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
     break;
-  }
   case ISD::VAARG:
     Results.push_back(DAG.expandVAArg(Node));
     Results.push_back(Results[0].getValue(1));
@@ -3252,7 +3061,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   }
-
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM: {
+    if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG))
+      Results.push_back(Expanded);
+    break;
+  }
   case ISD::FSIN:
   case ISD::FCOS: {
     EVT VT = Node->getValueType(0);
@@ -3460,6 +3274,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
     break;
   }
+  case ISD::FSHL:
+  case ISD::FSHR:
+    if (TLI.expandFunnelShift(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (TLI.expandROT(Node, Tmp1, DAG))
+      Results.push_back(Tmp1);
+    break;
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT:
+    Results.push_back(TLI.expandAddSubSat(Node, DAG));
+    break;
+  case ISD::SMULFIX:
+    Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
+    break;
   case ISD::SADDO:
   case ISD::SSUBO: {
     SDValue LHS = Node->getOperand(0);
@@ -3852,10 +3685,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     (void)Legalized;
     assert(Legalized && "Can't legalize BR_CC with legal condition!");
 
-    // If we expanded the SETCC by inverting the condition code, then wrap
-    // the existing SETCC in a NOT to restore the intended condition.
-    if (NeedInvert)
-      Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+    assert(!NeedInvert && "Don't know how to invert BR_CC!");
 
     // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
     // node.
@@ -3899,46 +3729,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     ReplaceNode(SDValue(Node, 0), Result);
     break;
   }
-  case ISD::ROTL:
-  case ISD::ROTR: {
-    bool IsLeft = Node->getOpcode() == ISD::ROTL;
-    SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1);
-    EVT ResVT = Node->getValueType(0);
-    EVT OpVT = Op0.getValueType();
-    assert(OpVT == ResVT &&
-           "The result and the operand types of rotate should match");
-    EVT ShVT = Op1.getValueType();
-    SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT);
-
-    // If a rotate in the other direction is legal, use it.
-    unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
-    if (TLI.isOperationLegal(RevRot, ResVT)) {
-      SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
-      Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub));
-      break;
-    }
-
-    // Otherwise,
-    //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
-    //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
-    //
-    assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) &&
-           "Expecting the type bitwidth to be a power of 2");
-    unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
-    unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
-    SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT,
-                                 Width, DAG.getConstant(1, dl, ShVT));
-    SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
-    SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1);
-    SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1);
-
-    SDValue Or = DAG.getNode(ISD::OR, dl, ResVT,
-                             DAG.getNode(ShOpc, dl, ResVT, Op0, And0),
-                             DAG.getNode(HsOpc, dl, ResVT, Op0, And1));
-    Results.push_back(Or);
-    break;
-  }
-
   case ISD::GLOBAL_OFFSET_TABLE:
   case ISD::GlobalAddress:
   case ISD::GlobalTLSAddress:
@@ -3958,7 +3748,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     return false;
   }
 
-  LLVM_DEBUG(dbgs() << "Succesfully expanded node\n");
+  LLVM_DEBUG(dbgs() << "Successfully expanded node\n");
   ReplaceNode(Node, Results.data());
   return true;
 }
@@ -4031,11 +3821,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     break;
   }
   case ISD::FMINNUM:
+  case ISD::STRICT_FMINNUM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
                                       RTLIB::FMIN_F80, RTLIB::FMIN_F128,
                                       RTLIB::FMIN_PPCF128));
     break;
   case ISD::FMAXNUM:
+  case ISD::STRICT_FMAXNUM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
                                       RTLIB::FMAX_F80, RTLIB::FMAX_F128,
                                       RTLIB::FMAX_PPCF128));
@@ -4046,6 +3838,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                       RTLIB::SQRT_F80, RTLIB::SQRT_F128,
                                       RTLIB::SQRT_PPCF128));
     break;
+  case ISD::FCBRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+                                      RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+                                      RTLIB::CBRT_PPCF128));
+    break;
   case ISD::FSIN:
   case ISD::STRICT_FSIN:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
@@ -4128,16 +3925,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                         RTLIB::EXP2_PPCF128));
     break;
   case ISD::FTRUNC:
+  case ISD::STRICT_FTRUNC:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
                                       RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
                                       RTLIB::TRUNC_PPCF128));
     break;
   case ISD::FFLOOR:
+  case ISD::STRICT_FFLOOR:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
                                       RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
                                       RTLIB::FLOOR_PPCF128));
     break;
   case ISD::FCEIL:
+  case ISD::STRICT_FCEIL:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
                                       RTLIB::CEIL_F80, RTLIB::CEIL_F128,
                                       RTLIB::CEIL_PPCF128));
@@ -4157,6 +3957,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                       RTLIB::NEARBYINT_PPCF128));
     break;
   case ISD::FROUND:
+  case ISD::STRICT_FROUND:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
                                       RTLIB::ROUND_F64,
                                       RTLIB::ROUND_F80,
@@ -4188,6 +3989,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                       RTLIB::DIV_PPCF128));
     break;
   case ISD::FREM:
+  case ISD::STRICT_FREM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
                                       RTLIB::REM_F80, RTLIB::REM_F128,
                                       RTLIB::REM_PPCF128));
@@ -4260,6 +4062,21 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                        RTLIB::MUL_I16, RTLIB::MUL_I32,
                                        RTLIB::MUL_I64, RTLIB::MUL_I128));
     break;
+  case ISD::CTLZ_ZERO_UNDEF:
+    switch (Node->getSimpleValueType(0).SimpleTy) {
+    default:
+      llvm_unreachable("LibCall explicitly requested, but not available");
+    case MVT::i32:
+      Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
+      break;
+    case MVT::i64:
+      Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
+      break;
+    case MVT::i128:
+      Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
+      break;
+    }
+    break;
   }
 
   // Replace the original node with the legalized result.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2e6f6edbce55..4644e9588e7b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1750,6 +1750,11 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
 bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
   SDValue R = SDValue();
 
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+    LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+    return false;
+  }
+
   // Nodes that use a promotion-requiring floating point operand, but doesn't
   // produce a promotion-requiring floating point result, need to be legalized
   // to use the promoted float operand.  Nodes that produce at least one
@@ -1905,8 +1910,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     // Binary FP Operations
     case ISD::FADD:
     case ISD::FDIV:
-    case ISD::FMAXNAN:
-    case ISD::FMINNAN:
+    case ISD::FMAXIMUM:
+    case ISD::FMINIMUM:
     case ISD::FMAXNUM:
     case ISD::FMINNUM:
     case ISD::FMUL:
@@ -2138,9 +2143,9 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
   SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
   SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
 
-  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
-                     N->getOperand(0), N->getOperand(1), TrueVal, FalseVal,
-                     N->getOperand(4));
+  return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+                     TrueVal.getNode()->getValueType(0), N->getOperand(0),
+                     N->getOperand(1), TrueVal, FalseVal, N->getOperand(4));
 }
 
 // Construct a SDNode that transforms the SINT or UINT operand to the promoted
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 133831fa76fb..5fbc70fce60d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -118,6 +118,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
 
   case ISD::FP_TO_FP16:  Res = PromoteIntRes_FP_TO_FP16(N); break;
 
+  case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
+
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR:
@@ -138,9 +140,17 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SMULO:
   case ISD::UMULO:       Res = PromoteIntRes_XMULO(N, ResNo); break;
 
+  case ISD::ADDE:
+  case ISD::SUBE:
   case ISD::ADDCARRY:
   case ISD::SUBCARRY:    Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
 
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT:     Res = PromoteIntRes_ADDSUBSAT(N); break;
+  case ISD::SMULFIX:     Res = PromoteIntRes_SMULFIX(N); break;
+
   case ISD::ATOMIC_LOAD:
     Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
 
@@ -305,12 +315,45 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     // make us bitcast between two vectors which are legalized in different ways.
     if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+    // If the output type is also a vector and widening it to the same size
+    // as the widened input type would be a legal type, we can widen the bitcast
+    // and handle the promotion after.
+    if (NOutVT.isVector()) {
+      unsigned WidenInSize = NInVT.getSizeInBits();
+      unsigned OutSize = OutVT.getSizeInBits();
+      if (WidenInSize % OutSize == 0) {
+        unsigned Scale = WidenInSize / OutSize;
+        EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
+                                         OutVT.getVectorElementType(),
+                                         OutVT.getVectorNumElements() * Scale);
+        if (isTypeLegal(WideOutVT)) {
+          InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
+          MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+          InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
+                             DAG.getConstant(0, dl, IdxTy));
+          return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
+        }
+      }
+    }
   }
 
   return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
                      CreateStackStoreLoad(InOp, OutVT));
 }
 
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
+static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
+                                       const TargetLowering &TLI,
+                                       SelectionDAG &DAG) {
+  EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+  // If the value won't fit in the prefered type, just use something safe. It
+  // will be legalized when the shift is expanded.
+  if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+    ShiftVT = MVT::i32;
+  return ShiftVT;
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
   EVT OVT = N->getValueType(0);
@@ -318,10 +361,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  return DAG.getNode(
-      ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
-      DAG.getConstant(DiffBits, dl,
-                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, dl, ShiftVT));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -331,10 +373,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  return DAG.getNode(
-      ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
-      DAG.getConstant(DiffBits, dl,
-                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  return DAG.getNode(ISD::SRL, dl, NVT,
+                     DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, dl, ShiftVT));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -399,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDLoc dl(N);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
-                     N->getOperand(1));
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // If the input also needs to be promoted, do that first so we can get a
+  // get a good idea for the output type.
+  if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
+      == TargetLowering::TypePromoteInteger) {
+    SDValue In = GetPromotedInteger(Op0);
+
+    // If the new type is larger than NVT, use it. We probably won't need to
+    // promote it again.
+    EVT SVT = In.getValueType().getScalarType();
+    if (SVT.bitsGE(NVT)) {
+      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
+      return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
+    }
+  }
+
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
@@ -438,6 +498,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
   return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDLoc dl(N);
+
+  return DAG.getNode(N->getOpcode(), dl, NVT);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
@@ -483,11 +550,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
+  SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
 
   SDLoc dl(N);
   SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
-                                  N->getMask(), ExtSrc0, N->getMemoryVT(),
+                                  N->getMask(), ExtPassThru, N->getMemoryVT(),
                                   N->getMemOperand(), ISD::SEXTLOAD);
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
@@ -497,12 +564,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
-  assert(NVT == ExtSrc0.getValueType() &&
+  SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+  assert(NVT == ExtPassThru.getValueType() &&
       "Gather result type and the passThru agrument type should be the same");
 
   SDLoc dl(N);
-  SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+  SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
                    N->getIndex(), N->getScale() };
   SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
                                     N->getMemoryVT(), dl, Ops,
@@ -534,6 +601,61 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   return SDValue(Res.getNode(), 1);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
+  // For promoting iN -> iM, this can be expanded by
+  // 1. ANY_EXTEND iN to iM
+  // 2. SHL by M-N
+  // 3. [US][ADD|SUB]SAT
+  // 4. L/ASHR by M-N
+  SDLoc dl(N);
+  SDValue Op1 = N->getOperand(0);
+  SDValue Op2 = N->getOperand(1);
+  unsigned OldBits = Op1.getScalarValueSizeInBits();
+
+  unsigned Opcode = N->getOpcode();
+  unsigned ShiftOp;
+  switch (Opcode) {
+  case ISD::SADDSAT:
+  case ISD::SSUBSAT:
+    ShiftOp = ISD::SRA;
+    break;
+  case ISD::UADDSAT:
+  case ISD::USUBSAT:
+    ShiftOp = ISD::SRL;
+    break;
+  default:
+    llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+                     "addition or subtraction");
+  }
+
+  SDValue Op1Promoted = GetPromotedInteger(Op1);
+  SDValue Op2Promoted = GetPromotedInteger(Op2);
+
+  EVT PromotedType = Op1Promoted.getValueType();
+  unsigned NewBits = PromotedType.getScalarSizeInBits();
+  unsigned SHLAmount = NewBits - OldBits;
+  EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+  SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+  Op1Promoted =
+      DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+  Op2Promoted =
+      DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+  SDValue Result =
+      DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+  return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+  // Can just promote the operands then continue with operation.
+  SDLoc dl(N);
+  SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+  SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+  EVT PromotedType = Op1Promoted.getValueType();
+  return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
+                     N->getOperand(2));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
   if (ResNo == 1)
     return PromoteIntRes_Overflow(N);
@@ -763,6 +885,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
   return Res;
 }
 
+// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from 
+// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
 SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
   if (ResNo == 1)
     return PromoteIntRes_Overflow(N);
@@ -960,6 +1085,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
 
   case ISD::ADDCARRY:
   case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
+
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
+
+  case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
+
+  case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -981,9 +1113,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
 /// shared among BR_CC, SELECT_CC, and SETCC handlers.
 void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
                                             ISD::CondCode CCCode) {
-  // We have to insert explicit sign or zero extends.  Note that we could
-  // insert sign extends for ALL conditions, but zero extend is cheaper on
-  // many machines (an AND instead of two shifts), so prefer it.
+  // We have to insert explicit sign or zero extends. Note that we could
+  // insert sign extends for ALL conditions. For those operations where either
+  // zero or sign extension would be valid, use SExtOrZExtPromotedInteger
+  // which will choose the cheapest for the target.
   switch (CCCode) {
   default: llvm_unreachable("Unknown integer comparison!");
   case ISD::SETEQ:
@@ -994,7 +1127,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
     // We would prefer to promote the comparison operand with sign extension.
     // If the width of OpL/OpR excluding the duplicated sign bits is no greater
     // than the width of NewLHS/NewRH, we can avoid inserting real truncate
-    // instruction, which is redudant eventually.
+    // instruction, which is redundant eventually.
     unsigned OpLEffectiveBits =
         OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
     unsigned OpREffectiveBits =
@@ -1004,8 +1137,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
       NewLHS = OpL;
       NewRHS = OpR;
     } else {
-      NewLHS = ZExtPromotedInteger(NewLHS);
-      NewRHS = ZExtPromotedInteger(NewRHS);
+      NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+      NewRHS = SExtOrZExtPromotedInteger(NewRHS);
     }
     break;
   }
@@ -1013,11 +1146,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
   case ISD::SETUGT:
   case ISD::SETULE:
   case ISD::SETULT:
-    // ALL of these operations will work if we either sign or zero extend
-    // the operands (including the unsigned comparisons!).  Zero extend is
-    // usually a simpler/cheaper operation, so prefer it.
-    NewLHS = ZExtPromotedInteger(NewLHS);
-    NewRHS = ZExtPromotedInteger(NewRHS);
+    NewLHS = SExtOrZExtPromotedInteger(NewLHS);
+    NewRHS = SExtOrZExtPromotedInteger(NewRHS);
     break;
   case ISD::SETGE:
   case ISD::SETGT:
@@ -1219,28 +1349,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
   SDLoc dl(N);
 
   bool TruncateStore = false;
-  if (OpNo == 2) {
-    // Mask comes before the data operand. If the data operand is legal, we just
-    // promote the mask.
-    // When the data operand has illegal type, we should legalize the data
-    // operand first. The mask will be promoted/splitted/widened according to
-    // the data operand type.
-    if (TLI.isTypeLegal(DataVT)) {
-      Mask = PromoteTargetBoolean(Mask, DataVT);
-      // Update in place.
-      SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
-      NewOps[2] = Mask;
-      return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
-    }
-
-    if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
-      return PromoteIntOp_MSTORE(N, 3);
-    if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
-      return WidenVecOp_MSTORE(N, 3);
-    assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
-    return SplitVecOp_MSTORE(N, 3);
+  if (OpNo == 3) {
+    Mask = PromoteTargetBoolean(Mask, DataVT);
+    // Update in place.
+    SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+    NewOps[3] = Mask;
+    return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
   } else { // Data operand
-    assert(OpNo == 3 && "Unexpected operand for promotion");
+    assert(OpNo == 1 && "Unexpected operand for promotion");
     DataOp = GetPromotedInteger(DataOp);
     TruncateStore = true;
   }
@@ -1274,14 +1390,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
   } else
     NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
 
-  SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
-  // updated in place.
-  if (Res.getNode() == N)
-    return Res;
-
-  ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
-  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
-  return SDValue();
+  return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1342,6 +1451,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
   return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+  SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+  return SDValue(
+      DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
+  // Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
+  assert(OpNo > 1 && "Don't know how to promote this operand!");
+  // Promote the rw, locality, and cache type arguments to a supported integer
+  // width.
+  SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
+  SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
+  SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+                                        Op2, Op3, Op4),
+                 0);
+}
+
 //===----------------------------------------------------------------------===//
 //  Integer Result Expansion
 //===----------------------------------------------------------------------===//
@@ -1475,6 +1608,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
   case ISD::UMULO:
   case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+  case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1595,8 +1734,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
 
   APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
-  KnownBits Known;
-  DAG.computeKnownBits(N->getOperand(1), Known);
+  KnownBits Known = DAG.computeKnownBits(N->getOperand(1));
 
   // If we don't know anything about the high bits, exit.
   if (((Known.Zero|Known.One) & HighBitMask) == 0)
@@ -2437,6 +2575,101 @@ void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
   ReplaceValueWith(SDValue(N, 1), R.getValue(2));
 }
 
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
+                                              SDValue &Hi) {
+  SDValue Result = TLI.expandAddSubSat(N, DAG);
+  SplitInteger(Result, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  uint64_t Scale = N->getConstantOperandVal(2);
+  if (!Scale) {
+    SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+    SplitInteger(Result, Lo, Hi);
+    return;
+  }
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue LL, LH, RL, RH;
+  GetExpandedInteger(LHS, LL, LH);
+  GetExpandedInteger(RHS, RL, RH);
+  SmallVector<SDValue, 4> Result;
+
+  if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+                          TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+                          LL, LH, RL, RH)) {
+    report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+    return;
+  }
+
+  unsigned VTSize = VT.getScalarSizeInBits();
+  unsigned NVTSize = NVT.getScalarSizeInBits();
+  EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+
+  // Shift whole amount by scale.
+  SDValue ResultLL = Result[0];
+  SDValue ResultLH = Result[1];
+  SDValue ResultHL = Result[2];
+  SDValue ResultHH = Result[3];
+
+  // After getting the multplication result in 4 parts, we need to perform a
+  // shift right by the amount of the scale to get the result in that scale.
+  // Let's say we multiply 2 64 bit numbers. The resulting value can be held in
+  // 128 bits that are cut into 4 32-bit parts:
+  //
+  //      HH       HL       LH       LL
+  //  |---32---|---32---|---32---|---32---|
+  // 128      96       64       32        0
+  //
+  //                    |------VTSize-----|
+  //
+  //                             |NVTSize-|
+  //
+  // The resulting Lo and Hi will only need to be one of these 32-bit parts
+  // after shifting.
+  if (Scale < NVTSize) {
+    // If the scale is less than the size of the VT we expand to, the Hi and
+    // Lo of the result will be in the first 2 parts of the result after
+    // shifting right. This only requires shifting by the scale as far as the
+    // third part in the result (ResultHL).
+    SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
+    SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
+    Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
+    Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+                     DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
+    Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
+    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                     DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+  } else if (Scale == NVTSize) {
+    // If the scales are equal, Lo and Hi are ResultLH and Result HL,
+    // respectively. Avoid shifting to prevent undefined behavior.
+    Lo = ResultLH;
+    Hi = ResultHL;
+  } else if (Scale < VTSize) {
+    // If the scale is instead less than the old VT size, but greater than or
+    // equal to the expanded VT size, the first part of the result (ResultLL) is
+    // no longer a part of Lo because it would be scaled out anyway. Instead we
+    // can start shifting right from the fourth part (ResultHH) to the second
+    // part (ResultLH), and Result LH will be the new Lo.
+    SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
+    SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
+    Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
+    Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+                     DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+    Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
+    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                     DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+  } else {
+    llvm_unreachable(
+        "Expected the scale to be less than the width of the operands");
+  }
+}
+
 void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
                                              SDValue &Lo, SDValue &Hi) {
   SDValue LHS = Node->getOperand(0);
@@ -2705,25 +2938,56 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
 
-  // A divide for UMULO should be faster than a function call.
   if (N->getOpcode() == ISD::UMULO) {
+    // This section expands the operation into the following sequence of
+    // instructions. `iNh` here refers to a type which has half the bit width of
+    // the type the original operation operated on.
+    //
+    // %0 = %LHS.HI != 0 && %RHS.HI != 0
+    // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
+    // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
+    // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
+    // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
+    // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
+    //
+    // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
     SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
-
-    SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
-    SplitInteger(MUL, Lo, Hi);
-
-    // A divide for UMULO will be faster than a function call. Select to
-    // make sure we aren't using 0.
-    SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
-                                  RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
-    SDValue NotZero = DAG.getSelect(dl, VT, isZero,
-                                    DAG.getConstant(1, dl, VT), RHS);
-    SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
-    SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
-                                    ISD::SETNE);
-    Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
-                             DAG.getConstant(0, dl, N->getValueType(1)),
-                             Overflow);
+    SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
+    SplitInteger(LHS, LHSLow, LHSHigh);
+    SplitInteger(RHS, RHSLow, RHSHigh);
+    EVT HalfVT = LHSLow.getValueType()
+      , BitVT = N->getValueType(1);
+    SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
+    SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
+
+    SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
+    SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
+      DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
+      DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
+
+    SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
+    Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
+    SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+                                    One.getValue(0));
+
+    SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
+    Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
+    SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
+                                    Two.getValue(0));
+
+    // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
+    // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
+    // operation recursively legalized?).
+    //
+    // Many backends understand this pattern and will convert into LOHI
+    // themselves, if applicable.
+    SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
+      DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
+      DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
+    SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
+    SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
+    Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
+    SplitInteger(Five, Lo, Hi);
     ReplaceValueWith(SDValue(N, 1), Overflow);
     return;
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 135922d6f267..032000f6cb79 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -281,6 +281,20 @@ private:
     return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
   }
 
+  // Get a promoted operand and sign or zero extend it to the final size
+  // (depending on TargetLoweringInfo::isSExtCheaperThanZExt). For a given
+  // subtarget and type, the choice of sign or zero-extension will be
+  // consistent.
+  SDValue SExtOrZExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    SDLoc DL(Op);
+    Op = GetPromotedInteger(Op);
+    if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
+                         DAG.getValueType(OldVT));
+    return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType());
+  }
+
   // Integer Result Promotion.
   void PromoteIntegerResult(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -330,6 +344,9 @@ private:
   SDValue PromoteIntRes_UNDEF(SDNode *N);
   SDValue PromoteIntRes_VAARG(SDNode *N);
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
+  SDValue PromoteIntRes_SMULFIX(SDNode *N);
+  SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
 
   // Integer Operand Promotion.
   bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -360,6 +377,9 @@ private:
   SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
+  SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SMULFIX(SDNode *N);
 
   void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
 
@@ -414,6 +434,8 @@ private:
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBSAT         (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SMULFIX           (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_ATOMIC_LOAD       (SDNode *N, SDValue &Lo, SDValue &Hi);
 
@@ -650,6 +672,7 @@ private:
   SDValue ScalarizeVecRes_BinOp(SDNode *N);
   SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
   SDValue ScalarizeVecRes_InregOp(SDNode *N);
   SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
 
@@ -668,6 +691,8 @@ private:
   SDValue ScalarizeVecRes_UNDEF(SDNode *N);
   SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
 
+  SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_BITCAST(SDNode *N);
@@ -703,6 +728,8 @@ private:
   void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
 
+  void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+
   void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -780,6 +807,7 @@ private:
   SDValue WidenVecRes_Ternary(SDNode *N);
   SDValue WidenVecRes_Binary(SDNode *N);
   SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+  SDValue WidenVecRes_StrictFP(SDNode *N);
   SDValue WidenVecRes_Convert(SDNode *N);
   SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
   SDValue WidenVecRes_POWI(SDNode *N);
@@ -796,6 +824,7 @@ private:
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
   SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+  SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_SETCC(SDNode* N);
 
@@ -844,9 +873,6 @@ private:
   /// MaskVT to ToMaskVT if needed with vector extension or truncation.
   SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
 
-  /// Get the target mask VT, and widen if needed.
-  EVT getSETCCWidenedResultTy(SDValue SetCC);
-
   //===--------------------------------------------------------------------===//
   // Generic Splitting: LegalizeTypesGeneric.cpp
   //===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index df3134828af5..b9d370441c3e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -327,7 +327,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
     NumElements >>= 1;
     SplitInteger(Op, Parts[0], Parts[1]);
     if (DAG.getDataLayout().isBigEndian())
-        std::swap(Parts[0], Parts[1]);
+      std::swap(Parts[0], Parts[1]);
     IntegerToVector(Parts[0], NumElements, Ops, EltVT);
     IntegerToVector(Parts[1], NumElements, Ops, EltVT);
   } else {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3a98a7a904cb..4923a529c21b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -86,9 +86,10 @@ class VectorLegalizer {
   /// operations to legalize them.
   SDValue Expand(SDValue Op);
 
-  /// Implements expansion for FNEG; falls back to UnrollVectorOp if
-  /// FSUB isn't legal.
-  ///
+  /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
+  /// FP_TO_SINT isn't legal.
+  SDValue ExpandFP_TO_UINT(SDValue Op);
+
   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
@@ -116,6 +117,12 @@ class VectorLegalizer {
   /// the remaining lanes, finally bitcasting to the proper type.
   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
 
+  /// Implement expand-based legalization of ABS vector operations.
+  /// If following expanding is legal/custom then do it:
+  /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
+  /// else unroll the operation.
+  SDValue ExpandABS(SDValue Op);
+
   /// Expand bswap of vectors into a shuffle if legal.
   SDValue ExpandBSWAP(SDValue Op);
 
@@ -128,8 +135,13 @@ class VectorLegalizer {
   SDValue ExpandFNEG(SDValue Op);
   SDValue ExpandFSUB(SDValue Op);
   SDValue ExpandBITREVERSE(SDValue Op);
+  SDValue ExpandCTPOP(SDValue Op);
   SDValue ExpandCTLZ(SDValue Op);
-  SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
+  SDValue ExpandCTTZ(SDValue Op);
+  SDValue ExpandFunnelShift(SDValue Op);
+  SDValue ExpandROT(SDValue Op);
+  SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+  SDValue ExpandAddSubSat(SDValue Op);
   SDValue ExpandStrictFPOp(SDValue Op);
 
   /// Implements vector promotion.
@@ -226,7 +238,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
                            Op.getResNo());
 
-  bool HasVectorValue = false;
   if (Op.getOpcode() == ISD::LOAD) {
     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -240,16 +251,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         return TranslateLegalizeResults(Op, Result);
       case TargetLowering::Custom:
         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
-          if (Lowered == Result)
-            return TranslateLegalizeResults(Op, Lowered);
-          Changed = true;
-          if (Lowered->getNumValues() != Op->getNumValues()) {
-            // This expanded to something other than the load. Assume the
-            // lowering code took care of any chain values, and just handle the
-            // returned value.
-            assert(Result.getValue(1).use_empty() &&
-                   "There are still live users of the old chain!");
-            return LegalizeOp(Lowered);
+          assert(Lowered->getNumValues() == Op->getNumValues() &&
+                 "Unexpected number of results");
+          if (Lowered != Result) {
+            // Make sure the new code is also legal.
+            Lowered = LegalizeOp(Lowered);
+            Changed = true;
           }
           return TranslateLegalizeResults(Op, Lowered);
         }
@@ -272,7 +279,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         return TranslateLegalizeResults(Op, Result);
       case TargetLowering::Custom: {
         SDValue Lowered = TLI.LowerOperation(Result, DAG);
-        Changed = Lowered != Result;
+        if (Lowered != Result) {
+          // Make sure the new code is also legal.
+          Lowered = LegalizeOp(Lowered);
+          Changed = true;
+        }
         return TranslateLegalizeResults(Op, Lowered);
       }
       case TargetLowering::Expand:
@@ -280,9 +291,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         return LegalizeOp(ExpandStore(Op));
       }
     }
-  } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
-    HasVectorValue = true;
+  }
 
+  bool HasVectorValue = false;
   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
        J != E;
        ++J)
@@ -298,6 +309,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::STRICT_FSUB:
   case ISD::STRICT_FMUL:
   case ISD::STRICT_FDIV:
+  case ISD::STRICT_FREM:
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FMA:
   case ISD::STRICT_FPOW:
@@ -311,6 +323,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::STRICT_FLOG2:
   case ISD::STRICT_FRINT:
   case ISD::STRICT_FNEARBYINT:
+  case ISD::STRICT_FMAXNUM:
+  case ISD::STRICT_FMINNUM:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FROUND:
+  case ISD::STRICT_FTRUNC:
     // These pseudo-ops get legalized as if they were their non-strict
     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
     // is also legal, but if ISD::FSQRT requires expansion then so does
@@ -321,6 +339,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::ADD:
   case ISD::SUB:
   case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::SREM:
@@ -338,8 +358,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:
+  case ISD::FSHL:
+  case ISD::FSHR:
   case ISD::ROTL:
   case ISD::ROTR:
+  case ISD::ABS:
   case ISD::BSWAP:
   case ISD::BITREVERSE:
   case ISD::CTLZ:
@@ -361,8 +384,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FABS:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FMINNAN:
-  case ISD::FMAXNAN:
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
+  case ISD::FMINIMUM:
+  case ISD::FMAXIMUM:
   case ISD::FCOPYSIGN:
   case ISD::FSQRT:
   case ISD::FSIN:
@@ -394,8 +419,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SMUL_LOHI:
   case ISD::UMUL_LOHI:
   case ISD::FCANONICALIZE:
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT:
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
     break;
+  case ISD::SMULFIX: {
+    unsigned Scale = Node->getConstantOperandVal(2);
+    Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
+                                              Node->getValueType(0), Scale);
+    break;
+  }
   case ISD::FP_ROUND_INREG:
     Action = TLI.getOperationAction(Node->getOpcode(),
                cast<VTSDNode>(Node->getOperand(1))->getVT());
@@ -405,14 +440,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(0).getValueType());
     break;
-  case ISD::MSCATTER:
-    Action = TLI.getOperationAction(Node->getOpcode(),
-               cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
-    break;
-  case ISD::MSTORE:
-    Action = TLI.getOperationAction(Node->getOpcode(),
-               cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
-    break;
   }
 
   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -720,6 +747,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
     return ExpandVSELECT(Op);
   case ISD::SELECT:
     return ExpandSELECT(Op);
+  case ISD::FP_TO_UINT:
+    return ExpandFP_TO_UINT(Op);
   case ISD::UINT_TO_FP:
     return ExpandUINT_TO_FLOAT(Op);
   case ISD::FNEG:
@@ -728,17 +757,37 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
     return ExpandFSUB(Op);
   case ISD::SETCC:
     return UnrollVSETCC(Op);
+  case ISD::ABS:
+    return ExpandABS(Op);
   case ISD::BITREVERSE:
     return ExpandBITREVERSE(Op);
+  case ISD::CTPOP:
+    return ExpandCTPOP(Op);
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:
     return ExpandCTLZ(Op);
+  case ISD::CTTZ:
   case ISD::CTTZ_ZERO_UNDEF:
-    return ExpandCTTZ_ZERO_UNDEF(Op);
+    return ExpandCTTZ(Op);
+  case ISD::FSHL:
+  case ISD::FSHR:
+    return ExpandFunnelShift(Op);
+  case ISD::ROTL:
+  case ISD::ROTR:
+    return ExpandROT(Op);
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+    return ExpandFMINNUM_FMAXNUM(Op);
+  case ISD::USUBSAT:
+  case ISD::SSUBSAT:
+  case ISD::UADDSAT:
+  case ISD::SADDSAT:
+    return ExpandAddSubSat(Op);
   case ISD::STRICT_FADD:
   case ISD::STRICT_FSUB:
   case ISD::STRICT_FMUL:
   case ISD::STRICT_FDIV:
+  case ISD::STRICT_FREM:
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FMA:
   case ISD::STRICT_FPOW:
@@ -752,6 +801,12 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
   case ISD::STRICT_FLOG2:
   case ISD::STRICT_FRINT:
   case ISD::STRICT_FNEARBYINT:
+  case ISD::STRICT_FMAXNUM:
+  case ISD::STRICT_FMINNUM:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FROUND:
+  case ISD::STRICT_FTRUNC:
     return ExpandStrictFPOp(Op);
   default:
     return DAG.UnrollVectorOp(Op.getNode());
@@ -866,7 +921,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
 
   // First build an any-extend node which can be legalized above when we
   // recurse through it.
-  Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+  Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
 
   // Now we need sign extend. Do this by shifting the elements. Even if these
   // aren't legal operations, they have a better chance of being legalized
@@ -1024,10 +1079,35 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
 }
 
+SDValue VectorLegalizer::ExpandABS(SDValue Op) {
+  // Attempt to expand using TargetLowering.
+  SDValue Result;
+  if (TLI.expandABS(Op.getNode(), Result, DAG))
+    return Result;
+
+  // Otherwise go ahead and unroll.
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+  // Attempt to expand using TargetLowering.
+  SDValue Result;
+  if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+    return Result;
+
+  // Otherwise go ahead and unroll.
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   EVT VT = Op.getOperand(0).getValueType();
   SDLoc DL(Op);
 
+  // Attempt to expand using TargetLowering.
+  SDValue Result;
+  if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
+    return Result;
+
   // Make sure that the SINT_TO_FP and SRL instructions are available.
   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
@@ -1086,56 +1166,55 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
+SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
+  SDValue Result;
+  if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
+    return Result;
+
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
-  EVT VT = Op.getValueType();
-  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+  SDValue Result;
+  if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
+    return Result;
 
-  // If the non-ZERO_UNDEF version is supported we can use that instead.
-  if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
-      TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
-    SDLoc DL(Op);
-    return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
-  }
+  return DAG.UnrollVectorOp(Op.getNode());
+}
 
-  // If CTPOP is available we can lower with a CTPOP based method:
-  // u16 ctlz(u16 x) {
-  //   x |= (x >> 1);
-  //   x |= (x >> 2);
-  //   x |= (x >> 4);
-  //   x |= (x >> 8);
-  //   return ctpop(~x);
-  // }
-  // Ref: "Hacker's Delight" by Henry Warren
-  if (isPowerOf2_32(NumBitsPerElt) &&
-      TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
-      TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
-      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
-      TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
-    SDLoc DL(Op);
-    SDValue Res = Op.getOperand(0);
-    EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
+  SDValue Result;
+  if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
+    return Result;
 
-    for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
-      Res = DAG.getNode(
-          ISD::OR, DL, VT, Res,
-          DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+  return DAG.UnrollVectorOp(Op.getNode());
+}
 
-    Res = DAG.getNOT(DL, Res, VT);
-    return DAG.getNode(ISD::CTPOP, DL, VT, Res);
-  }
+SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
+  SDValue Result;
+  if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
+    return Result;
 
-  // Otherwise go ahead and unroll.
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
-SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
-  // If the non-ZERO_UNDEF version is supported we can use that instead.
-  if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
-    SDLoc DL(Op);
-    return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
-  }
+SDValue VectorLegalizer::ExpandROT(SDValue Op) {
+  SDValue Result;
+  if (TLI.expandROT(Op.getNode(), Result, DAG))
+    return Result;
 
-  // Otherwise go ahead and unroll.
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
+  if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
+    return Expanded;
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
+  if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+    return Expanded;
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
@@ -1183,7 +1262,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
   AddLegalizedOperand(Op.getValue(0), Result);
   AddLegalizedOperand(Op.getValue(1), NewChain);
 
-  return NewChain;
+  return Op.getResNo() ? NewChain : Result;
 }
 
 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index b21249d01ef9..f367e9358576 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -113,13 +113,20 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMUL:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FMINNAN:
-  case ISD::FMAXNAN:
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
+  case ISD::FMINIMUM:
+  case ISD::FMAXIMUM:
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
 
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT:
+
   case ISD::FPOW:
   case ISD::FREM:
   case ISD::FSUB:
@@ -139,6 +146,35 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMA:
     R = ScalarizeVecRes_TernaryOp(N);
     break;
+  case ISD::STRICT_FADD:
+  case ISD::STRICT_FSUB:
+  case ISD::STRICT_FMUL:
+  case ISD::STRICT_FDIV:
+  case ISD::STRICT_FREM:
+  case ISD::STRICT_FSQRT:
+  case ISD::STRICT_FMA:
+  case ISD::STRICT_FPOW:
+  case ISD::STRICT_FPOWI:
+  case ISD::STRICT_FSIN:
+  case ISD::STRICT_FCOS:
+  case ISD::STRICT_FEXP:
+  case ISD::STRICT_FEXP2:
+  case ISD::STRICT_FLOG:
+  case ISD::STRICT_FLOG10:
+  case ISD::STRICT_FLOG2:
+  case ISD::STRICT_FRINT:
+  case ISD::STRICT_FNEARBYINT:
+  case ISD::STRICT_FMAXNUM:
+  case ISD::STRICT_FMINNUM:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FROUND:
+  case ISD::STRICT_FTRUNC:
+    R = ScalarizeVecRes_StrictFPOp(N);
+    break;
+  case ISD::SMULFIX:
+    R = ScalarizeVecRes_SMULFIX(N);
+    break;
   }
 
   // If R is null, the sub-method took care of registering the result.
@@ -161,6 +197,44 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
                      Op0.getValueType(), Op0, Op1, Op2);
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+  SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+  SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+  SDValue Op2 = N->getOperand(2);
+  return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+                     Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
+  EVT VT = N->getValueType(0).getVectorElementType();
+  unsigned NumOpers = N->getNumOperands();
+  SDValue Chain = N->getOperand(0);
+  EVT ValueVTs[] = {VT, MVT::Other};
+  SDLoc dl(N);
+
+  SmallVector<SDValue, 4> Opers;
+
+  // The Chain is the first operand.
+  Opers.push_back(Chain);
+
+  // Now process the remaining operands.
+  for (unsigned i = 1; i < NumOpers; ++i) {
+    SDValue Oper = N->getOperand(i);
+
+    if (Oper.getValueType().isVector())
+      Oper = GetScalarizedVector(Oper);
+
+    Opers.push_back(Oper);
+  }
+
+  SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
                                                        unsigned ResNo) {
   SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -731,8 +805,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMUL:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FMINNAN:
-  case ISD::FMAXNAN:
+  case ISD::FMINIMUM:
+  case ISD::FMAXIMUM:
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::FDIV:
@@ -750,6 +824,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
+  case ISD::SADDSAT:
+  case ISD::UADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::USUBSAT:
     SplitVecRes_BinOp(N, Lo, Hi);
     break;
   case ISD::FMA:
@@ -759,6 +837,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::STRICT_FSUB:
   case ISD::STRICT_FMUL:
   case ISD::STRICT_FDIV:
+  case ISD::STRICT_FREM:
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FMA:
   case ISD::STRICT_FPOW:
@@ -772,8 +851,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::STRICT_FLOG2:
   case ISD::STRICT_FRINT:
   case ISD::STRICT_FNEARBYINT:
+  case ISD::STRICT_FMAXNUM:
+  case ISD::STRICT_FMINNUM:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FROUND:
+  case ISD::STRICT_FTRUNC:
     SplitVecRes_StrictFPOp(N, Lo, Hi);
     break;
+  case ISD::SMULFIX:
+    SplitVecRes_SMULFIX(N, Lo, Hi);
+    break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -811,6 +899,20 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
                    Op0Hi, Op1Hi, Op2Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  SDValue RHSLo, RHSHi;
+  GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+  SDLoc dl(N);
+  SDValue Op2 = N->getOperand(2);
+
+  unsigned Opcode = N->getOpcode();
+  Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
+  Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+}
+
 void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   // We know the result is a vector.  The input may be either a vector or a
@@ -1238,7 +1340,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
   SDValue Ch = MLD->getChain();
   SDValue Ptr = MLD->getBasePtr();
   SDValue Mask = MLD->getMask();
-  SDValue Src0 = MLD->getSrc0();
+  SDValue PassThru = MLD->getPassThru();
   unsigned Alignment = MLD->getOriginalAlignment();
   ISD::LoadExtType ExtType = MLD->getExtensionType();
 
@@ -1259,18 +1361,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
-  SDValue Src0Lo, Src0Hi;
-  if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Src0, Src0Lo, Src0Hi);
+  SDValue PassThruLo, PassThruHi;
+  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(PassThru, PassThruLo, PassThruHi);
   else
-    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(),
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
-  Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+  Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
                          ExtType, MLD->isExpandingLoad());
 
   Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
@@ -1282,7 +1384,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
       HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
       MLD->getRanges());
 
-  Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+  Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
                          ExtType, MLD->isExpandingLoad());
 
   // Build a factor node to remember that this load is independent of the
@@ -1305,7 +1407,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
   SDValue Ch = MGT->getChain();
   SDValue Ptr = MGT->getBasePtr();
   SDValue Mask = MGT->getMask();
-  SDValue Src0 = MGT->getValue();
+  SDValue PassThru = MGT->getPassThru();
   SDValue Index = MGT->getIndex();
   SDValue Scale = MGT->getScale();
   unsigned Alignment = MGT->getOriginalAlignment();
@@ -1322,11 +1424,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
   // Split MemoryVT
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
-  SDValue Src0Lo, Src0Hi;
-  if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Src0, Src0Lo, Src0Hi);
+  SDValue PassThruLo, PassThruHi;
+  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(PassThru, PassThruLo, PassThruHi);
   else
-    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
 
   SDValue IndexHi, IndexLo;
   if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1339,11 +1441,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MGT->getAAInfo(), MGT->getRanges());
 
-  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
+  SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
                            MMO);
 
-  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
+  SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
                            MMO);
 
@@ -1620,13 +1722,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::VSELECT:
       Res = SplitVecOp_VSELECT(N, OpNo);
       break;
-    case ISD::FP_TO_SINT:
-    case ISD::FP_TO_UINT:
-      if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
-        Res = SplitVecOp_TruncateHelper(N);
-      else
-        Res = SplitVecOp_UnaryOp(N);
-      break;
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
       if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
@@ -1634,6 +1729,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
       else
         Res = SplitVecOp_UnaryOp(N);
       break;
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT:
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
@@ -1746,10 +1843,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
   case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
   case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
   case ISD::VECREDUCE_FMAX:
-    CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
+    CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
     break;
   case ISD::VECREDUCE_FMIN:
-    CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
+    CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
     break;
   default:
     llvm_unreachable("Unexpected reduce ISD node");
@@ -1860,6 +1957,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 
   // Load back the required element.
   StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
+
+  // FIXME: This is to handle i1 vectors with elements promoted to i8.
+  // i1 vector handling needs general improvement.
+  if (N->getValueType(0).bitsLT(EltVT)) {
+    SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr,
+      MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+    return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0));
+  }
+
   return DAG.getExtLoad(
       ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
       MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
@@ -1886,7 +1992,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
   SDValue Index = MGT->getIndex();
   SDValue Scale = MGT->getScale();
   SDValue Mask = MGT->getMask();
-  SDValue Src0 = MGT->getValue();
+  SDValue PassThru = MGT->getPassThru();
   unsigned Alignment = MGT->getOriginalAlignment();
 
   SDValue MaskLo, MaskHi;
@@ -1900,11 +2006,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
-  SDValue Src0Lo, Src0Hi;
-  if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Src0, Src0Lo, Src0Hi);
+  SDValue PassThruLo, PassThruHi;
+  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(PassThru, PassThruLo, PassThruHi);
   else
-    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
 
   SDValue IndexHi, IndexLo;
   if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1917,7 +2023,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MGT->getAAInfo(), MGT->getRanges());
 
-  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
+  SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
   SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
                                    OpsLo, MMO);
 
@@ -1927,7 +2033,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
                          Alignment, MGT->getAAInfo(),
                          MGT->getRanges());
 
-  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
+  SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
   SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
                                    OpsHi, MMO);
 
@@ -2164,16 +2270,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   unsigned InElementSize = InVT.getScalarSizeInBits();
   unsigned OutElementSize = OutVT.getScalarSizeInBits();
 
+  // Determine the split output VT. If its legal we can just split dirctly.
+  EVT LoOutVT, HiOutVT;
+  std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
+  assert(LoOutVT == HiOutVT && "Unequal split?");
+
   // If the input elements are only 1/2 the width of the result elements,
   // just use the normal splitting. Our trick only work if there's room
   // to split more than once.
-  if (InElementSize <= OutElementSize * 2)
+  if (isTypeLegal(LoOutVT) ||
+      InElementSize <= OutElementSize * 2)
     return SplitVecOp_UnaryOp(N);
   SDLoc DL(N);
 
+  // Don't touch if this will be scalarized.
+  EVT FinalVT = InVT;
+  while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+    FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+  if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
+    return SplitVecOp_UnaryOp(N);
+
   // Get the split input vector.
   SDValue InLoVec, InHiVec;
   GetSplitVector(InVec, InLoVec, InHiVec);
+
   // Truncate them to 1/2 the element size.
   EVT HalfElementVT = IsFloat ?
     EVT::getFloatingPointVT(InElementSize/2) :
@@ -2298,12 +2419,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::XOR:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FMINNAN:
-  case ISD::FMAXNAN:
+  case ISD::FMINIMUM:
+  case ISD::FMAXIMUM:
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
+  case ISD::UADDSAT:
+  case ISD::SADDSAT:
+  case ISD::USUBSAT:
+  case ISD::SSUBSAT:
     Res = WidenVecRes_Binary(N);
     break;
 
@@ -2320,6 +2445,33 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_BinaryCanTrap(N);
     break;
 
+  case ISD::STRICT_FADD:
+  case ISD::STRICT_FSUB:
+  case ISD::STRICT_FMUL:
+  case ISD::STRICT_FDIV:
+  case ISD::STRICT_FREM:
+  case ISD::STRICT_FSQRT:
+  case ISD::STRICT_FMA:
+  case ISD::STRICT_FPOW:
+  case ISD::STRICT_FPOWI:
+  case ISD::STRICT_FSIN:
+  case ISD::STRICT_FCOS:
+  case ISD::STRICT_FEXP:
+  case ISD::STRICT_FEXP2:
+  case ISD::STRICT_FLOG:
+  case ISD::STRICT_FLOG10:
+  case ISD::STRICT_FLOG2:
+  case ISD::STRICT_FRINT:
+  case ISD::STRICT_FNEARBYINT:
+  case ISD::STRICT_FMAXNUM:
+  case ISD::STRICT_FMINNUM:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FROUND:
+  case ISD::STRICT_FTRUNC:
+    Res = WidenVecRes_StrictFP(N);
+    break;
+
   case ISD::FCOPYSIGN:
     Res = WidenVecRes_FCOPYSIGN(N);
     break;
@@ -2353,11 +2505,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_Convert(N);
     break;
 
-  case ISD::BITREVERSE:
-  case ISD::BSWAP:
-  case ISD::CTLZ:
-  case ISD::CTPOP:
-  case ISD::CTTZ:
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
@@ -2368,12 +2515,37 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FLOG10:
   case ISD::FLOG2:
   case ISD::FNEARBYINT:
-  case ISD::FNEG:
   case ISD::FRINT:
   case ISD::FROUND:
   case ISD::FSIN:
   case ISD::FSQRT:
-  case ISD::FTRUNC:
+  case ISD::FTRUNC: {
+    // We're going to widen this vector op to a legal type by padding with undef
+    // elements. If the wide vector op is eventually going to be expanded to
+    // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+    // libcalls on the undef elements. We are assuming that if the scalar op
+    // requires expanding, then the vector op needs expanding too.
+    EVT VT = N->getValueType(0);
+    if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+      EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+      assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+             "Target supports vector op, but scalar requires expansion?");
+      Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+      break;
+    }
+  }
+  // If the target has custom/legal support for the scalar FP intrinsic ops
+  // (they are probably not destined to become libcalls), then widen those like
+  // any other unary ops.
+  LLVM_FALLTHROUGH;
+
+  case ISD::BITREVERSE:
+  case ISD::BSWAP:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FNEG:
+  case ISD::FCANONICALIZE:
     Res = WidenVecRes_Unary(N);
     break;
   case ISD::FMA:
@@ -2405,6 +2577,88 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
 }
 
+// Given a vector of operations that have been broken up to widen, see
+// if we can collect them together into the next widest legal VT. This
+// implementation is trap-safe.
+static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
+                                 SmallVectorImpl<SDValue> &ConcatOps,
+                                 unsigned ConcatEnd, EVT VT, EVT MaxVT,
+                                 EVT WidenVT) {
+  // Check to see if we have a single operation with the widen type.
+  if (ConcatEnd == 1) {
+    VT = ConcatOps[0].getValueType();
+    if (VT == WidenVT)
+      return ConcatOps[0];
+  }
+
+  SDLoc dl(ConcatOps[0]);
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  int Idx = 0;
+
+  // while (Some element of ConcatOps is not of type MaxVT) {
+  //   From the end of ConcatOps, collect elements of the same type and put
+  //   them into an op of the next larger supported type
+  // }
+  while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+    Idx = ConcatEnd - 1;
+    VT = ConcatOps[Idx--].getValueType();
+    while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+      Idx--;
+
+    int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+    EVT NextVT;
+    do {
+      NextSize *= 2;
+      NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+    } while (!TLI.isTypeLegal(NextVT));
+
+    if (!VT.isVector()) {
+      // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+      SDValue VecOp = DAG.getUNDEF(NextVT);
+      unsigned NumToInsert = ConcatEnd - Idx - 1;
+      for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+        VecOp = DAG.getNode(
+            ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+            DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      }
+      ConcatOps[Idx+1] = VecOp;
+      ConcatEnd = Idx + 2;
+    } else {
+      // Vector type, create a CONCAT_VECTORS of type NextVT
+      SDValue undefVec = DAG.getUNDEF(VT);
+      unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+      SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+      unsigned RealVals = ConcatEnd - Idx - 1;
+      unsigned SubConcatEnd = 0;
+      unsigned SubConcatIdx = Idx + 1;
+      while (SubConcatEnd < RealVals)
+        SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+      while (SubConcatEnd < OpsToConcat)
+        SubConcatOps[SubConcatEnd++] = undefVec;
+      ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+                                            NextVT, SubConcatOps);
+      ConcatEnd = SubConcatIdx + 1;
+    }
+  }
+
+  // Check to see if we have a single operation with the widen type.
+  if (ConcatEnd == 1) {
+    VT = ConcatOps[0].getValueType();
+    if (VT == WidenVT)
+      return ConcatOps[0];
+  }
+
+  // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+  unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+  if (NumOps != ConcatEnd ) {
+    SDValue UndefVal = DAG.getUNDEF(MaxVT);
+    for (unsigned j = ConcatEnd; j < NumOps; ++j)
+      ConcatOps[j] = UndefVal;
+  }
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+                     makeArrayRef(ConcatOps.data(), NumOps));
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   // Binary op widening for operations that can trap.
   unsigned Opcode = N->getOpcode();
@@ -2477,75 +2731,119 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
     }
   }
 
-  // Check to see if we have a single operation with the widen type.
-  if (ConcatEnd == 1) {
-    VT = ConcatOps[0].getValueType();
-    if (VT == WidenVT)
-      return ConcatOps[0];
+  return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+  // StrictFP op widening for operations that can trap.
+  unsigned NumOpers = N->getNumOperands();
+  unsigned Opcode = N->getOpcode();
+  SDLoc dl(N);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts = VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+    NumElts = NumElts / 2;
+    VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
   }
 
-  // while (Some element of ConcatOps is not of type MaxVT) {
-  //   From the end of ConcatOps, collect elements of the same type and put
-  //   them into an op of the next larger supported type
+  // No legal vector version so unroll the vector operation and then widen.
+  if (NumElts == 1)
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+  // Since the operation can trap, apply operation on the original vector.
+  EVT MaxVT = VT;
+  SmallVector<SDValue, 4> InOps;
+  unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+  SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+  SmallVector<SDValue, 16> Chains;
+  unsigned ConcatEnd = 0;  // Current ConcatOps index.
+  int Idx = 0;        // Current Idx into input vectors.
+
+  // The Chain is the first operand.
+  InOps.push_back(N->getOperand(0));
+
+  // Now process the remaining operands.
+  for (unsigned i = 1; i < NumOpers; ++i) {
+    SDValue Oper = N->getOperand(i);
+
+    if (Oper.getValueType().isVector()) {
+      assert(Oper.getValueType() == N->getValueType(0) && 
+             "Invalid operand type to widen!");
+      Oper = GetWidenedVector(Oper);
+    }
+
+    InOps.push_back(Oper);
+  }
+
+  // NumElts := greatest legal vector size (at most WidenVT)
+  // while (orig. vector has unhandled elements) {
+  //   take munches of size NumElts from the beginning and add to ConcatOps
+  //   NumElts := next smaller supported vector size or 1
   // }
-  while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
-    Idx = ConcatEnd - 1;
-    VT = ConcatOps[Idx--].getValueType();
-    while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
-      Idx--;
+  while (CurNumElts != 0) {
+    while (CurNumElts >= NumElts) {
+      SmallVector<SDValue, 4> EOps;
+      
+      for (unsigned i = 0; i < NumOpers; ++i) {
+        SDValue Op = InOps[i];
+        
+        if (Op.getValueType().isVector()) 
+          Op = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
-    int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
-    EVT NextVT;
+        EOps.push_back(Op);
+      }
+
+      EVT OperVT[] = {VT, MVT::Other};
+      SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
+      ConcatOps[ConcatEnd++] = Oper;
+      Chains.push_back(Oper.getValue(1));
+      Idx += NumElts;
+      CurNumElts -= NumElts;
+    }
     do {
-      NextSize *= 2;
-      NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
-    } while (!TLI.isTypeLegal(NextVT));
+      NumElts = NumElts / 2;
+      VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+    } while (!TLI.isTypeLegal(VT) && NumElts != 1);
 
-    if (!VT.isVector()) {
-      // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
-      SDValue VecOp = DAG.getUNDEF(NextVT);
-      unsigned NumToInsert = ConcatEnd - Idx - 1;
-      for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
-        VecOp = DAG.getNode(
-            ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
-            DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    if (NumElts == 1) {
+      for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+        SmallVector<SDValue, 4> EOps;
+
+        for (unsigned i = 0; i < NumOpers; ++i) {
+          SDValue Op = InOps[i];
+
+          if (Op.getValueType().isVector())
+            Op = DAG.getNode(
+              ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+              DAG.getConstant(Idx, dl, 
+                              TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+          EOps.push_back(Op);
+        }
+
+        EVT WidenVT[] = {WidenEltVT, MVT::Other}; 
+        SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
+        ConcatOps[ConcatEnd++] = Oper;
+        Chains.push_back(Oper.getValue(1));
       }
-      ConcatOps[Idx+1] = VecOp;
-      ConcatEnd = Idx + 2;
-    } else {
-      // Vector type, create a CONCAT_VECTORS of type NextVT
-      SDValue undefVec = DAG.getUNDEF(VT);
-      unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
-      SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
-      unsigned RealVals = ConcatEnd - Idx - 1;
-      unsigned SubConcatEnd = 0;
-      unsigned SubConcatIdx = Idx + 1;
-      while (SubConcatEnd < RealVals)
-        SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
-      while (SubConcatEnd < OpsToConcat)
-        SubConcatOps[SubConcatEnd++] = undefVec;
-      ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
-                                            NextVT, SubConcatOps);
-      ConcatEnd = SubConcatIdx + 1;
+      CurNumElts = 0;
     }
   }
 
-  // Check to see if we have a single operation with the widen type.
-  if (ConcatEnd == 1) {
-    VT = ConcatOps[0].getValueType();
-    if (VT == WidenVT)
-      return ConcatOps[0];
-  }
+  // Build a factor node to remember all the Ops that have been created.
+  SDValue NewChain;
+  if (Chains.size() == 1)
+    NewChain = Chains[0];
+  else
+    NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), NewChain);
 
-  // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
-  unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
-  if (NumOps != ConcatEnd ) {
-    SDValue UndefVal = DAG.getUNDEF(MaxVT);
-    for (unsigned j = ConcatEnd; j < NumOps; ++j)
-      ConcatOps[j] = UndefVal;
-  }
-  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
-                     makeArrayRef(ConcatOps.data(), NumOps));
+  return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -2575,10 +2873,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       // If both input and result vector types are of same width, extend
       // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
       // accepts fewer elements in the result than in the input.
+      if (Opcode == ISD::ANY_EXTEND)
+        return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
       if (Opcode == ISD::SIGN_EXTEND)
-        return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+        return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
       if (Opcode == ISD::ZERO_EXTEND)
-        return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+        return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
     }
   }
 
@@ -2591,11 +2891,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     if (WidenNumElts % InVTNumElts == 0) {
       // Widen the input and call convert on the widened input vector.
       unsigned NumConcat = WidenNumElts/InVTNumElts;
-      SmallVector<SDValue, 16> Ops(NumConcat);
+      SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
       Ops[0] = InOp;
-      SDValue UndefVal = DAG.getUNDEF(InVT);
-      for (unsigned i = 1; i != NumConcat; ++i)
-        Ops[i] = UndefVal;
       SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVec);
@@ -2614,11 +2911,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   }
 
   // Otherwise unroll into some nasty scalar code and rebuild the vector.
-  SmallVector<SDValue, 16> Ops(WidenNumElts);
   EVT EltVT = WidenVT.getVectorElementType();
-  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
-  unsigned i;
-  for (i=0; i < MinElts; ++i) {
+  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+  // Use the original element count so we don't do more scalar opts than
+  // necessary.
+  unsigned MinElts = N->getValueType(0).getVectorNumElements();
+  for (unsigned i=0; i < MinElts; ++i) {
     SDValue Val = DAG.getNode(
         ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
         DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -2628,10 +2926,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
   }
 
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
-  for (; i < WidenNumElts; ++i)
-    Ops[i] = UndefVal;
-
   return DAG.getBuildVector(WidenVT, DL, Ops);
 }
 
@@ -2654,11 +2948,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
     if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
       switch (Opcode) {
       case ISD::ANY_EXTEND_VECTOR_INREG:
-        return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
       case ISD::SIGN_EXTEND_VECTOR_INREG:
-        return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
       case ISD::ZERO_EXTEND_VECTOR_INREG:
-        return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+        return DAG.getNode(Opcode, DL, WidenVT, InOp);
       }
     }
   }
@@ -2810,22 +3102,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
     }
 
     if (TLI.isTypeLegal(NewInVT)) {
-      // Because the result and the input are different vector types, widening
-      // the result could create a legal type but widening the input might make
-      // it an illegal type that might lead to repeatedly splitting the input
-      // and then widening it. To avoid this, we widen the input only if
-      // it results in a legal type.
-      SmallVector<SDValue, 16> Ops(NewNumElts);
-      SDValue UndefVal = DAG.getUNDEF(InVT);
-      Ops[0] = InOp;
-      for (unsigned i = 1; i < NewNumElts; ++i)
-        Ops[i] = UndefVal;
-
       SDValue NewVec;
-      if (InVT.isVector())
+      if (InVT.isVector()) {
+        // Because the result and the input are different vector types, widening
+        // the result could create a legal type but widening the input might make
+        // it an illegal type that might lead to repeatedly splitting the input
+        // and then widening it. To avoid this, we widen the input only if
+        // it results in a legal type.
+        SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT));
+        Ops[0] = InOp;
+
         NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
-      else
-        NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
+      } else {
+        NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
+      }
       return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
     }
   }
@@ -3003,7 +3293,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
   SDValue Mask = N->getMask();
   EVT MaskVT = Mask.getValueType();
-  SDValue Src0 = GetWidenedVector(N->getSrc0());
+  SDValue PassThru = GetWidenedVector(N->getPassThru());
   ISD::LoadExtType ExtType = N->getExtensionType();
   SDLoc dl(N);
 
@@ -3014,9 +3304,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
   Mask = ModifyToType(Mask, WideMaskVT, true);
 
   SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
-                                  Mask, Src0, N->getMemoryVT(),
+                                  Mask, PassThru, N->getMemoryVT(),
                                   N->getMemOperand(), ExtType,
-                                        N->isExpandingLoad());
+                                  N->isExpandingLoad());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3028,7 +3318,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
   EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Mask = N->getMask();
   EVT MaskVT = Mask.getValueType();
-  SDValue Src0 = GetWidenedVector(N->getValue());
+  SDValue PassThru = GetWidenedVector(N->getPassThru());
   SDValue Scale = N->getScale();
   unsigned NumElts = WideVT.getVectorNumElements();
   SDLoc dl(N);
@@ -3045,7 +3335,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
                                      Index.getValueType().getScalarType(),
                                      NumElts);
   Index = ModifyToType(Index, WideIndexVT);
-  SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale };
+  SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
+                    Scale };
   SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
                                     N->getMemoryVT(), dl, Ops,
                                     N->getMemOperand());
@@ -3155,16 +3446,6 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
   return Mask;
 }
 
-// Get the target mask VT, and widen if needed.
-EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
-  assert(SetCC->getOpcode() == ISD::SETCC);
-  LLVMContext &Ctx = *DAG.getContext();
-  EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
-  if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
-    MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
-  return MaskVT;
-}
-
 // This method tries to handle VSELECT and its mask by legalizing operands
 // (which may require widening) and if needed adjusting the mask vector type
 // to match that of the VSELECT. Without it, many cases end up with
@@ -3232,7 +3513,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
 
   SDValue Mask;
   if (Cond->getOpcode() == ISD::SETCC) {
-    EVT MaskVT = getSETCCWidenedResultTy(Cond);
+    EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
     Mask = convertMask(Cond, MaskVT, ToMaskVT);
   } else if (isLogicalMaskOp(Cond->getOpcode()) &&
              Cond->getOperand(0).getOpcode() == ISD::SETCC &&
@@ -3240,8 +3521,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
     // Cond is (AND/OR/XOR (SETCC, SETCC))
     SDValue SETCC0 = Cond->getOperand(0);
     SDValue SETCC1 = Cond->getOperand(1);
-    EVT VT0 = getSETCCWidenedResultTy(SETCC0);
-    EVT VT1 = getSETCCWidenedResultTy(SETCC1);
+    EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
+    EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
     unsigned ScalarBits0 = VT0.getScalarSizeInBits();
     unsigned ScalarBits1 = VT1.getScalarSizeInBits();
     unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
@@ -3414,6 +3695,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
   case ISD::MSTORE:             Res = WidenVecOp_MSTORE(N, OpNo); break;
+  case ISD::MGATHER:            Res = WidenVecOp_MGATHER(N, OpNo); break;
   case ISD::MSCATTER:           Res = WidenVecOp_MSCATTER(N, OpNo); break;
   case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
   case ISD::FCOPYSIGN:          Res = WidenVecOp_FCOPYSIGN(N); break;
@@ -3503,11 +3785,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
   default:
     llvm_unreachable("Extend legalization on extend operation!");
   case ISD::ANY_EXTEND:
-    return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+    return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
   case ISD::SIGN_EXTEND:
-    return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+    return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
   case ISD::ZERO_EXTEND:
-    return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+    return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
   }
 }
 
@@ -3537,8 +3819,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
                                 InVT.getVectorNumElements());
   if (TLI.isTypeLegal(WideVT)) {
     SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
-                       DAG.getIntPtrConstant(0, dl));
+    return DAG.getNode(
+        ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+        DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
 
   EVT InEltVT = InVT.getVectorElementType();
@@ -3580,20 +3863,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
-  // If the input vector is not legal, it is likely that we will not find a
-  // legal vector of the same size. Replace the concatenate vector with a
-  // nasty build vector.
   EVT VT = N->getValueType(0);
   EVT EltVT = VT.getVectorElementType();
+  EVT InVT = N->getOperand(0).getValueType();
   SDLoc dl(N);
+
+  // If the widen width for this operand is the same as the width of the concat
+  // and all but the first operand is undef, just use the widened operand.
+  unsigned NumOperands = N->getNumOperands();
+  if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+    unsigned i;
+    for (i = 1; i < NumOperands; ++i)
+      if (!N->getOperand(i).isUndef())
+        break;
+
+    if (i == NumOperands)
+      return GetWidenedVector(N->getOperand(0));
+  }
+
+  // Otherwise, fall back to a nasty build vector.
   unsigned NumElts = VT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(NumElts);
 
-  EVT InVT = N->getOperand(0).getValueType();
   unsigned NumInElts = InVT.getVectorNumElements();
 
   unsigned Idx = 0;
-  unsigned NumOperands = N->getNumOperands();
   for (unsigned i=0; i < NumOperands; ++i) {
     SDValue InOp = N->getOperand(i);
     assert(getTypeAction(InOp.getValueType()) ==
@@ -3641,7 +3935,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
-  assert((OpNo == 2 || OpNo == 3) &&
+  assert((OpNo == 1 || OpNo == 3) &&
          "Can widen only data or mask operand of mstore");
   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
@@ -3649,8 +3943,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
   SDValue StVal = MST->getValue();
   SDLoc dl(N);
 
-  if (OpNo == 3) {
-    // Widen the value
+  if (OpNo == 1) {
+    // Widen the value.
     StVal = GetWidenedVector(StVal);
 
     // The mask should be widened as well.
@@ -3660,18 +3954,15 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
                                       WideVT.getVectorNumElements());
     Mask = ModifyToType(Mask, WideMaskVT, true);
   } else {
+    // Widen the mask.
     EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
     Mask = ModifyToType(Mask, WideMaskVT, true);
 
     EVT ValueVT = StVal.getValueType();
-    if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector)
-      StVal = GetWidenedVector(StVal);
-    else {
-      EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
-                                    ValueVT.getVectorElementType(),
-                                    WideMaskVT.getVectorNumElements());
-      StVal = ModifyToType(StVal, WideVT);
-    }
+    EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+                                  ValueVT.getVectorElementType(),
+                                  WideMaskVT.getVectorNumElements());
+    StVal = ModifyToType(StVal, WideVT);
   }
 
   assert(Mask.getValueType().getVectorNumElements() ==
@@ -3682,36 +3973,59 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
                             false, MST->isCompressingStore());
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 4 && "Can widen only the index of mgather");
+  auto *MG = cast<MaskedGatherSDNode>(N);
+  SDValue DataOp = MG->getPassThru();
+  SDValue Mask = MG->getMask();
+  SDValue Scale = MG->getScale();
+
+  // Just widen the index. It's allowed to have extra elements.
+  SDValue Index = GetWidenedVector(MG->getIndex());
+
+  SDLoc dl(N);
+  SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
+                   Scale};
+  SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
+                                    MG->getMemOperand());
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+  return SDValue();
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
-  assert(OpNo == 1 && "Can widen only data operand of mscatter");
   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   SDValue DataOp = MSC->getValue();
   SDValue Mask = MSC->getMask();
-  EVT MaskVT = Mask.getValueType();
+  SDValue Index = MSC->getIndex();
   SDValue Scale = MSC->getScale();
 
-  // Widen the value.
-  SDValue WideVal = GetWidenedVector(DataOp);
-  EVT WideVT = WideVal.getValueType();
-  unsigned NumElts = WideVT.getVectorNumElements();
-  SDLoc dl(N);
+  unsigned NumElts;
+  if (OpNo == 1) {
+    DataOp = GetWidenedVector(DataOp);
+    NumElts = DataOp.getValueType().getVectorNumElements();
 
-  // The mask should be widened as well.
-  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
-                                    MaskVT.getVectorElementType(), NumElts);
-  Mask = ModifyToType(Mask, WideMaskVT, true);
+    // Widen index.
+    EVT IndexVT = Index.getValueType();
+    EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+                                       IndexVT.getVectorElementType(), NumElts);
+    Index = ModifyToType(Index, WideIndexVT);
 
-  // Widen index.
-  SDValue Index = MSC->getIndex();
-  EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
-                                     Index.getValueType().getScalarType(),
-                                     NumElts);
-  Index = ModifyToType(Index, WideIndexVT);
+    // The mask should be widened as well.
+    EVT MaskVT = Mask.getValueType();
+    EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+                                      MaskVT.getVectorElementType(), NumElts);
+    Mask = ModifyToType(Mask, WideMaskVT, true);
+  } else if (OpNo == 4) {
+    // Just widen the index. It's allowed to have extra elements.
+    Index = GetWidenedVector(Index);
+  } else
+    llvm_unreachable("Can't widen this operand of mscatter");
 
-  SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index,
+  SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
                    Scale};
   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
-                              MSC->getMemoryVT(), dl, Ops,
+                              MSC->getMemoryVT(), SDLoc(N), Ops,
                               MSC->getMemOperand());
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 7e6b57426338..f7566b246f32 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -24,6 +24,7 @@ class DIVariable;
 class DIExpression;
 class SDNode;
 class Value;
+class raw_ostream;
 
 /// Holds the information from a dbg_value node through SDISel.
 /// We do not use SDValue here to avoid including its header.
@@ -52,6 +53,7 @@ private:
   enum DbgValueKind kind;
   bool IsIndirect;
   bool Invalid = false;
+  bool Emitted = false;
 
 public:
   /// Constructor for non-constants.
@@ -124,6 +126,17 @@ public:
   /// deleted.
   void setIsInvalidated() { Invalid = true; }
   bool isInvalidated() const { return Invalid; }
+
+  /// setIsEmitted / isEmitted - Getter/Setter for flag indicating that this
+  /// SDDbgValue has been emitted to an MBB.
+  void setIsEmitted() { Emitted = true; }
+  bool isEmitted() const { return Emitted; }
+
+  /// clearIsEmitted - Reset Emitted flag, for certain special cases where
+  /// dbg.addr is emitted twice.
+  void clearIsEmitted() { Emitted = false; }
+
+  LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
 };
 
 /// Holds the information from a dbg_label node through SDISel.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3944d7df286d..90e109b022fd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -125,8 +125,7 @@ void ScheduleDAGFast::Schedule() {
   // Build the scheduling graph.
   BuildSchedGraph(nullptr);
 
-  LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
-                 .dumpAll(this));
+  LLVM_DEBUG(dump());
 
   // Execute the actual scheduling loop.
   ListScheduleBottomUp();
@@ -144,7 +143,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    PredSU->dump(this);
+    dumpNode(*PredSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -182,7 +181,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// the Available queue.
 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
   LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
-  LLVM_DEBUG(SU->dump(this));
+  LLVM_DEBUG(dumpNode(*SU));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
   SU->setHeightToAtLeast(CurCycle);
@@ -777,11 +776,9 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     if (N->getHasDebugValue()) {
       MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
       for (auto DV : DAG->GetDbgValues(N)) {
-        if (DV->isInvalidated())
-          continue;
-        if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
-          BB->insert(InsertPos, DbgMI);
-        DV->setIsInvalidated();
+        if (!DV->isEmitted())
+          if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+            BB->insert(InsertPos, DbgMI);
       }
     }
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 43e8ffd3839c..8d75b8133a30 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -365,7 +365,7 @@ void ScheduleDAGRRList::Schedule() {
   // Build the scheduling graph.
   BuildSchedGraph(nullptr);
 
-  LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
+  LLVM_DEBUG(dump());
   Topo.InitDAGTopologicalSorting();
 
   AvailableQueue->initNodes(SUnits);
@@ -396,7 +396,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    PredSU->dump(this);
+    dumpNode(*PredSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -729,7 +729,7 @@ static void resetVRegCycle(SUnit *SU);
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
   LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
-  LLVM_DEBUG(SU->dump(this));
+  LLVM_DEBUG(dumpNode(*SU));
 
 #ifndef NDEBUG
   if (CurCycle < SU->getHeight())
@@ -828,7 +828,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
-  LLVM_DEBUG(SU->dump(this));
+  LLVM_DEBUG(dumpNode(*SU));
 
   for (SDep &Pred : SU->Preds) {
     CapturePred(&Pred);
@@ -1130,7 +1130,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     return nullptr;
 
   LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
-  LLVM_DEBUG(SU->dump(this));
+  LLVM_DEBUG(dumpNode(*SU));
 
   if (N->getGluedNode() &&
       !TII->canCopyGluedNodeDuringSchedule(N)) {
@@ -1888,7 +1888,7 @@ public:
     while (!DumpQueue.empty()) {
       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
       dbgs() << "Height " << SU->getHeight() << ": ";
-      SU->dump(DAG);
+      DAG->dumpNode(*SU);
     }
   }
 #endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 430d8fb34476..e258f0a218a5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -145,20 +145,18 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
     Ops.push_back(ExtraOper);
 
   SDVTList VTList = DAG->getVTList(VTs);
-  MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr;
   MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
 
   // Store memory references.
-  if (MN) {
-    Begin = MN->memoperands_begin();
-    End = MN->memoperands_end();
-  }
+  SmallVector<MachineMemOperand *, 2> MMOs;
+  if (MN)
+    MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
 
   DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
 
   // Reset the memory references
   if (MN)
-    MN->setMemRefs(Begin, End);
+    DAG->setNodeMemRefs(MN, MMOs);
 }
 
 static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
@@ -244,7 +242,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
     return;
 
   // Sort them in increasing order.
-  llvm::sort(Offsets.begin(), Offsets.end());
+  llvm::sort(Offsets);
 
   // Check if the loads are close enough.
   SmallVector<SDNode*, 4> Loads;
@@ -650,18 +648,20 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
     dep.setLatency(Latency);
 }
 
-void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
-  // Cannot completely remove virtual function even in release mode.
+void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  if (!SU->getNode()) {
+  dumpNodeName(SU);
+  dbgs() << ": ";
+
+  if (!SU.getNode()) {
     dbgs() << "PHYS REG COPY\n";
     return;
   }
 
-  SU->getNode()->dump(DAG);
+  SU.getNode()->dump(DAG);
   dbgs() << "\n";
   SmallVector<SDNode *, 4> GluedNodes;
-  for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+  for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
     GluedNodes.push_back(N);
   while (!GluedNodes.empty()) {
     dbgs() << "    ";
@@ -672,11 +672,22 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
 #endif
 }
 
+void ScheduleDAGSDNodes::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  if (EntrySU.getNode() != nullptr)
+    dumpNodeAll(EntrySU);
+  for (const SUnit &SU : SUnits)
+    dumpNodeAll(SU);
+  if (ExitSU.getNode() != nullptr)
+    dumpNodeAll(ExitSU);
+#endif
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ScheduleDAGSDNodes::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     if (SUnit *SU = Sequence[i])
-      SU->dump(this);
+      dumpNode(*SU);
     else
       dbgs() << "**** NOOP ****\n";
   }
@@ -711,7 +722,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
   MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
   for (auto DV : DAG->GetDbgValues(N)) {
-    if (DV->isInvalidated())
+    if (DV->isEmitted())
       continue;
     unsigned DVOrder = DV->getOrder();
     if (!Order || DVOrder == Order) {
@@ -720,7 +731,6 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
         Orders.push_back({DVOrder, DbgMI});
         BB->insert(InsertPos, DbgMI);
       }
-      DV->setIsInvalidated();
     }
   }
 }
@@ -811,8 +821,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
     for (; PDI != PDE; ++PDI) {
       MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
-      if (DbgMI)
+      if (DbgMI) {
         BB->insert(InsertPos, DbgMI);
+        // We re-emit the dbg_value closer to its use, too, after instructions
+        // are emitted to the BB.
+        (*PDI)->clearIsEmitted();
+      }
     }
   }
 
@@ -878,7 +892,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
       for (; DI != DE; ++DI) {
         if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
           break;
-        if ((*DI)->isInvalidated())
+        if ((*DI)->isEmitted())
           continue;
 
         MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
@@ -900,7 +914,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     // some of them before one or more conditional branches?
     SmallVector<MachineInstr*, 8> DbgMIs;
     for (; DI != DE; ++DI) {
-      if ((*DI)->isInvalidated())
+      if ((*DI)->isEmitted())
         continue;
       assert((*DI)->getOrder() >= LastOrder &&
              "emitting DBG_VALUE out of order");
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 6417e16bd0fd..3fa7ad895725 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -122,8 +122,8 @@ class InstrItineraryData;
     virtual MachineBasicBlock*
     EmitSchedule(MachineBasicBlock::iterator &InsertPos);
 
-    void dumpNode(const SUnit *SU) const override;
-
+    void dumpNode(const SUnit &SU) const override;
+    void dump() const override;
     void dumpSchedule() const;
 
     std::string getGraphNodeLabel(const SUnit *SU) const override;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 84055f8ecc1a..416061475b1a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -118,7 +118,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
+    dumpNode(*SuccSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -152,7 +152,7 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
 /// the Available queue.
 void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
-  LLVM_DEBUG(SU->dump(this));
+  LLVM_DEBUG(dumpNode(*SU));
 
   Sequence.push_back(SU);
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9e38e675d13a..647496c1afcb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -87,6 +87,8 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
 void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
 void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
 
+void SelectionDAG::DAGNodeDeletedListener::anchor() {}
+
 #define DEBUG_TYPE "selectiondag"
 
 static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
@@ -269,15 +271,24 @@ bool ISD::allOperandsUndef(const SDNode *N) {
 }
 
 bool ISD::matchUnaryPredicate(SDValue Op,
-                              std::function<bool(ConstantSDNode *)> Match) {
+                              std::function<bool(ConstantSDNode *)> Match,
+                              bool AllowUndefs) {
+  // FIXME: Add support for scalar UNDEF cases?
   if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
     return Match(Cst);
 
+  // FIXME: Add support for vector UNDEF cases?
   if (ISD::BUILD_VECTOR != Op.getOpcode())
     return false;
 
   EVT SVT = Op.getValueType().getScalarType();
   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+    if (AllowUndefs && Op.getOperand(i).isUndef()) {
+      if (!Match(nullptr))
+        return false;
+      continue;
+    }
+
     auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
     if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
       return false;
@@ -287,26 +298,33 @@ bool ISD::matchUnaryPredicate(SDValue Op,
 
 bool ISD::matchBinaryPredicate(
     SDValue LHS, SDValue RHS,
-    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
+    bool AllowUndefs) {
   if (LHS.getValueType() != RHS.getValueType())
     return false;
 
+  // TODO: Add support for scalar UNDEF cases?
   if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
     if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
       return Match(LHSCst, RHSCst);
 
+  // TODO: Add support for vector UNDEF cases?
   if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
       ISD::BUILD_VECTOR != RHS.getOpcode())
     return false;
 
   EVT SVT = LHS.getValueType().getScalarType();
   for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
-    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
-    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
-    if (!LHSCst || !RHSCst)
+    SDValue LHSOp = LHS.getOperand(i);
+    SDValue RHSOp = RHS.getOperand(i);
+    bool LHSUndef = AllowUndefs && LHSOp.isUndef();
+    bool RHSUndef = AllowUndefs && RHSOp.isUndef();
+    auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp);
+    auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
+    if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
       return false;
-    if (LHSCst->getValueType(0) != SVT ||
-        LHSCst->getValueType(0) != RHSCst->getValueType(0))
+    if (LHSOp.getValueType() != SVT ||
+        LHSOp.getValueType() != RHSOp.getValueType())
       return false;
     if (!Match(LHSCst, RHSCst))
       return false;
@@ -984,7 +1002,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
 void SelectionDAG::init(MachineFunction &NewMF,
                         OptimizationRemarkEmitter &NewORE,
                         Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
-                        DivergenceAnalysis * Divergence) {
+                        LegacyDivergenceAnalysis * Divergence) {
   MF = &NewMF;
   SDAGISelPass = PassPtr;
   ORE = &NewORE;
@@ -1118,39 +1136,6 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
                  getConstant(Imm, DL, Op.getValueType()));
 }
 
-SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
-                                              EVT VT) {
-  assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
-         "The sizes of the input and result must match in order to perform the "
-         "extend in-register.");
-  assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
-         "The destination vector type must have fewer lanes than the input.");
-  return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
-SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
-                                               EVT VT) {
-  assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
-         "The sizes of the input and result must match in order to perform the "
-         "extend in-register.");
-  assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
-         "The destination vector type must have fewer lanes than the input.");
-  return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
-SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
-                                               EVT VT) {
-  assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
-         "The sizes of the input and result must match in order to perform the "
-         "extend in-register.");
-  assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
-         "The destination vector type must have fewer lanes than the input.");
-  return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
-}
-
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
   EVT EltVT = VT.getScalarType();
@@ -1718,7 +1703,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
   // SDNode doesn't have access to it.  This memory will be "leaked" when
   // the node is deallocated, but recovered when the NodeAllocator is released.
   int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
-  std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);
+  llvm::copy(MaskVec, MaskAlloc);
 
   auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
                                            dl.getDebugLoc(), MaskAlloc);
@@ -2135,6 +2120,15 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
       return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
     break;
   }
+  case ISD::SIGN_EXTEND_INREG:
+    EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
+    unsigned ExVTBits = ExVT.getScalarSizeInBits();
+
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if (Mask.getActiveBits() <= ExVTBits)
+      return V.getOperand(0);
+
+    break;
   }
   return SDValue();
 }
@@ -2151,9 +2145,103 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
 /// for bits that V cannot have.
 bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
                                      unsigned Depth) const {
-  KnownBits Known;
-  computeKnownBits(Op, Known, Depth);
-  return Mask.isSubsetOf(Known.Zero);
+  return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+}
+
+/// isSplatValue - Return true if the vector V has the same value
+/// across all DemandedElts.
+bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
+                                APInt &UndefElts) {
+  if (!DemandedElts)
+    return false; // No demanded elts, better to assume we don't know anything.
+
+  EVT VT = V.getValueType();
+  assert(VT.isVector() && "Vector type expected");
+
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
+  UndefElts = APInt::getNullValue(NumElts);
+
+  switch (V.getOpcode()) {
+  case ISD::BUILD_VECTOR: {
+    SDValue Scl;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      SDValue Op = V.getOperand(i);
+      if (Op.isUndef()) {
+        UndefElts.setBit(i);
+        continue;
+      }
+      if (!DemandedElts[i])
+        continue;
+      if (Scl && Scl != Op)
+        return false;
+      Scl = Op;
+    }
+    return true;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    // Check if this is a shuffle node doing a splat.
+    // TODO: Do we need to handle shuffle(splat, undef, mask)?
+    int SplatIndex = -1;
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
+    for (int i = 0; i != (int)NumElts; ++i) {
+      int M = Mask[i];
+      if (M < 0) {
+        UndefElts.setBit(i);
+        continue;
+      }
+      if (!DemandedElts[i])
+        continue;
+      if (0 <= SplatIndex && SplatIndex != M)
+        return false;
+      SplatIndex = M;
+    }
+    return true;
+  }
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Src = V.getOperand(0);
+    ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1));
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+    if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+      // Offset the demanded elts by the subvector index.
+      uint64_t Idx = SubIdx->getZExtValue();
+      APInt UndefSrcElts;
+      APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+      if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) {
+        UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
+        return true;
+      }
+    }
+    break;
+  }
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::AND: {
+    APInt UndefLHS, UndefRHS;
+    SDValue LHS = V.getOperand(0);
+    SDValue RHS = V.getOperand(1);
+    if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
+        isSplatValue(RHS, DemandedElts, UndefRHS)) {
+      UndefElts = UndefLHS | UndefRHS;
+      return true;
+    }
+    break;
+  }
+  }
+
+  return false;
+}
+
+/// Helper wrapper to main isSplatValue function.
+bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
+  EVT VT = V.getValueType();
+  assert(VT.isVector() && "Vector type expected");
+  unsigned NumElts = VT.getVectorNumElements();
+
+  APInt UndefElts;
+  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+  return isSplatValue(V, DemandedElts, UndefElts) &&
+         (AllowUndefs || !UndefElts);
 }
 
 /// Helper function that checks to see if a node is a constant or a
@@ -2195,60 +2283,59 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
 /// Determine which bits of Op are known to be either zero or one and return
 /// them in Known. For vectors, the known bits are those that are shared by
 /// every vector element.
-void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
-                                    unsigned Depth) const {
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
   EVT VT = Op.getValueType();
   APInt DemandedElts = VT.isVector()
                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
                            : APInt(1, 1);
-  computeKnownBits(Op, Known, DemandedElts, Depth);
+  return computeKnownBits(Op, DemandedElts, Depth);
 }
 
 /// Determine which bits of Op are known to be either zero or one and return
 /// them in Known. The DemandedElts argument allows us to only collect the known
 /// bits that are shared by the requested vector elements.
-void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
-                                    const APInt &DemandedElts,
-                                    unsigned Depth) const {
+KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
+                                         unsigned Depth) const {
   unsigned BitWidth = Op.getScalarValueSizeInBits();
 
-  Known = KnownBits(BitWidth);   // Don't know anything.
+  KnownBits Known(BitWidth);   // Don't know anything.
 
   if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
     // We know all of the bits for a constant!
     Known.One = C->getAPIntValue();
     Known.Zero = ~Known.One;
-    return;
+    return Known;
   }
   if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
     // We know all of the bits for a constant fp!
     Known.One = C->getValueAPF().bitcastToAPInt();
     Known.Zero = ~Known.One;
-    return;
+    return Known;
   }
 
   if (Depth == 6)
-    return;  // Limit search depth.
+    return Known;  // Limit search depth.
 
   KnownBits Known2;
   unsigned NumElts = DemandedElts.getBitWidth();
+  assert((!Op.getValueType().isVector() ||
+          NumElts == Op.getValueType().getVectorNumElements()) &&
+         "Unexpected vector size");
 
   if (!DemandedElts)
-    return;  // No demanded elts, better to assume we don't know anything.
+    return Known;  // No demanded elts, better to assume we don't know anything.
 
   unsigned Opcode = Op.getOpcode();
   switch (Opcode) {
   case ISD::BUILD_VECTOR:
     // Collect the known bits that are shared by every demanded vector element.
-    assert(NumElts == Op.getValueType().getVectorNumElements() &&
-           "Unexpected vector size");
     Known.Zero.setAllBits(); Known.One.setAllBits();
     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
       if (!DemandedElts[i])
         continue;
 
       SDValue SrcOp = Op.getOperand(i);
-      computeKnownBits(SrcOp, Known2, Depth + 1);
+      Known2 = computeKnownBits(SrcOp, Depth + 1);
 
       // BUILD_VECTOR can implicitly truncate sources, we must handle this.
       if (SrcOp.getValueSizeInBits() != BitWidth) {
@@ -2295,7 +2382,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     // Known bits are the values that are shared by every demanded element.
     if (!!DemandedLHS) {
       SDValue LHS = Op.getOperand(0);
-      computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
+      Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
       Known.One &= Known2.One;
       Known.Zero &= Known2.Zero;
     }
@@ -2304,7 +2391,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       break;
     if (!!DemandedRHS) {
       SDValue RHS = Op.getOperand(1);
-      computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
+      Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
       Known.One &= Known2.One;
       Known.Zero &= Known2.Zero;
     }
@@ -2321,7 +2408,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       DemandedSub = DemandedSub.trunc(NumSubVectorElts);
       if (!!DemandedSub) {
         SDValue Sub = Op.getOperand(i);
-        computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
+        Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
         Known.One &= Known2.One;
         Known.Zero &= Known2.Zero;
       }
@@ -2344,22 +2431,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       uint64_t Idx = SubIdx->getZExtValue();
       APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
       if (!!DemandedSubElts) {
-        computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1);
+        Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
         if (Known.isUnknown())
           break; // early-out.
       }
       APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
       APInt DemandedSrcElts = DemandedElts & ~SubMask;
       if (!!DemandedSrcElts) {
-        computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1);
+        Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
         Known.One &= Known2.One;
         Known.Zero &= Known2.Zero;
       }
     } else {
-      computeKnownBits(Sub, Known, Depth + 1);
+      Known = computeKnownBits(Sub, Depth + 1);
       if (Known.isUnknown())
         break; // early-out.
-      computeKnownBits(Src, Known2, Depth + 1);
+      Known2 = computeKnownBits(Src, Depth + 1);
       Known.One &= Known2.One;
       Known.Zero &= Known2.Zero;
     }
@@ -2375,12 +2462,25 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       // Offset the demanded elts by the subvector index.
       uint64_t Idx = SubIdx->getZExtValue();
       APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
-      computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
+      Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
     } else {
-      computeKnownBits(Src, Known, Depth + 1);
+      Known = computeKnownBits(Src, Depth + 1);
     }
     break;
   }
+  case ISD::SCALAR_TO_VECTOR: {
+    // We know about scalar_to_vector as much as we know about it source,
+    // which becomes the first element of otherwise unknown vector.
+    if (DemandedElts != 1)
+      break;
+
+    SDValue N0 = Op.getOperand(0);
+    Known = computeKnownBits(N0, Depth + 1);
+    if (N0.getValueSizeInBits() != BitWidth)
+      Known = Known.trunc(BitWidth);
+
+    break;
+  }
   case ISD::BITCAST: {
     SDValue N0 = Op.getOperand(0);
     EVT SubVT = N0.getValueType();
@@ -2392,7 +2492,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
 
     // Fast handling of 'identity' bitcasts.
     if (BitWidth == SubBitWidth) {
-      computeKnownBits(N0, Known, DemandedElts, Depth + 1);
+      Known = computeKnownBits(N0, DemandedElts, Depth + 1);
       break;
     }
 
@@ -2413,7 +2513,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
           SubDemandedElts.setBit(i * SubScale);
 
       for (unsigned i = 0; i != SubScale; ++i) {
-        computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
+        Known2 = computeKnownBits(N0, SubDemandedElts.shl(i),
                          Depth + 1);
         unsigned Shifts = IsLE ? i : SubScale - 1 - i;
         Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
@@ -2434,7 +2534,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
         if (DemandedElts[i])
           SubDemandedElts.setBit(i / SubScale);
 
-      computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);
+      Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
 
       Known.Zero.setAllBits(); Known.One.setAllBits();
       for (unsigned i = 0; i != NumElts; ++i)
@@ -2452,8 +2552,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::AND:
     // If either the LHS or the RHS are Zero, the result is zero.
-    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
     Known.One &= Known2.One;
@@ -2461,8 +2561,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     Known.Zero |= Known2.Zero;
     break;
   case ISD::OR:
-    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     Known.Zero &= Known2.Zero;
@@ -2470,8 +2570,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     Known.One |= Known2.One;
     break;
   case ISD::XOR: {
-    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
@@ -2481,8 +2581,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::MUL: {
-    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
     // If low bits are zero in either operand, output low known-0 bits.
     // Also compute a conservative estimate for high known-0 bits.
@@ -2503,10 +2603,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     unsigned LeadZ = Known2.countMinLeadingZeros();
 
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
     if (RHSMaxLeadingZeros != BitWidth)
       LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
@@ -2516,22 +2616,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::SELECT:
   case ISD::VSELECT:
-    computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1);
+    Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
     // If we don't know any bits, early out.
     if (Known.isUnknown())
       break;
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
     Known.Zero &= Known2.Zero;
     break;
   case ISD::SELECT_CC:
-    computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1);
+    Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
     // If we don't know any bits, early out.
     if (Known.isUnknown())
       break;
-    computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1);
+    Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
@@ -2560,7 +2660,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   case ISD::SHL:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
       unsigned Shift = ShAmt->getZExtValue();
       Known.Zero <<= Shift;
       Known.One <<= Shift;
@@ -2570,7 +2670,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   case ISD::SRL:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
       unsigned Shift = ShAmt->getZExtValue();
       Known.Zero.lshrInPlace(Shift);
       Known.One.lshrInPlace(Shift);
@@ -2599,13 +2699,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   case ISD::SRA:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
       unsigned Shift = ShAmt->getZExtValue();
       // Sign extend known zero/one bit (else is unknown).
       Known.Zero.ashrInPlace(Shift);
       Known.One.ashrInPlace(Shift);
     }
     break;
+  case ISD::FSHL:
+  case ISD::FSHR:
+    if (ConstantSDNode *C =
+            isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+      unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+      // For fshl, 0-shift returns the 1st arg.
+      // For fshr, 0-shift returns the 2nd arg.
+      if (Amt == 0) {
+        Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+                                 DemandedElts, Depth + 1);
+        break;
+      }
+
+      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+      Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      if (Opcode == ISD::FSHL) {
+        Known.One <<= Amt;
+        Known.Zero <<= Amt;
+        Known2.One.lshrInPlace(BitWidth - Amt);
+        Known2.Zero.lshrInPlace(BitWidth - Amt);
+      } else {
+        Known.One <<= BitWidth - Amt;
+        Known.Zero <<= BitWidth - Amt;
+        Known2.One.lshrInPlace(Amt);
+        Known2.Zero.lshrInPlace(Amt);
+      }
+      Known.One |= Known2.One;
+      Known.Zero |= Known2.Zero;
+    }
+    break;
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     unsigned EBits = EVT.getScalarSizeInBits();
@@ -2623,7 +2756,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignMask;
 
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known.One &= InputDemandedBits;
     Known.Zero &= InputDemandedBits;
 
@@ -2643,7 +2776,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::CTTZ:
   case ISD::CTTZ_ZERO_UNDEF: {
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     // If we have a known 1, its position is our upper bound.
     unsigned PossibleTZ = Known2.countMaxTrailingZeros();
     unsigned LowBits = Log2_32(PossibleTZ) + 1;
@@ -2652,7 +2785,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF: {
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     // If we have a known 1, its position is our upper bound.
     unsigned PossibleLZ = Known2.countMaxLeadingZeros();
     unsigned LowBits = Log2_32(PossibleLZ) + 1;
@@ -2660,7 +2793,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::CTPOP: {
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     // If we know some of the bits are zero, they can't be one.
     unsigned PossibleOnes = Known2.countMaxPopulation();
     Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
@@ -2681,41 +2814,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::ZERO_EXTEND_VECTOR_INREG: {
     EVT InVT = Op.getOperand(0).getValueType();
-    APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
-    computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1);
+    APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+    Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
     Known = Known.zext(BitWidth);
     Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
     break;
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known = Known.zext(BitWidth);
     Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
     break;
   }
-  // TODO ISD::SIGN_EXTEND_VECTOR_INREG
+  case ISD::SIGN_EXTEND_VECTOR_INREG: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+    Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+    // If the sign bit is known to be zero or one, then sext will extend
+    // it to the top bits, else it will just zext.
+    Known = Known.sext(BitWidth);
+    break;
+  }
   case ISD::SIGN_EXTEND: {
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     // If the sign bit is known to be zero or one, then sext will extend
     // it to the top bits, else it will just zext.
     Known = Known.sext(BitWidth);
     break;
   }
   case ISD::ANY_EXTEND: {
-    computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    Known = computeKnownBits(Op.getOperand(0), Depth+1);
     Known = Known.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known = Known.trunc(BitWidth);
     break;
   }
   case ISD::AssertZext: {
     EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
-    computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    Known = computeKnownBits(Op.getOperand(0), Depth+1);
     Known.Zero |= (~InMask);
     Known.One  &= (~Known.Zero);
     break;
@@ -2745,7 +2886,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
         unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
+        Known2 = computeKnownBits(Op.getOperand(1), DemandedElts,
                          Depth + 1);
 
         // If all of the MaskV bits are known to be zero, then we know the
@@ -2762,12 +2903,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     // If low bits are know to be zero in both operands, then we know they are
     // going to be 0 in the result. Both addition and complement operations
     // preserve the low zero bits.
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     unsigned KnownZeroLow = Known2.countMinTrailingZeros();
     if (KnownZeroLow == 0)
       break;
 
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
     Known.Zero.setLowBits(KnownZeroLow);
     break;
@@ -2794,12 +2935,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     // known to be clear. For example, if one input has the top 10 bits clear
     // and the other has the top 8 bits clear, we know the top 7 bits of the
     // output must be clear.
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
     unsigned KnownZeroLow = Known2.countMinTrailingZeros();
 
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
-                     Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
     KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
 
@@ -2823,7 +2963,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       const APInt &RA = Rem->getAPIntValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+        Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
         // The low bits of the first operand are unchanged by the srem.
         Known.Zero = Known2.Zero & LowBits;
@@ -2847,7 +2987,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+        Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
         // The upper bits are all zero, the lower ones are unchanged.
         Known.Zero = Known2.Zero | ~LowBits;
@@ -2858,8 +2998,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
 
     uint32_t Leaders =
         std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
@@ -2868,7 +3008,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::EXTRACT_ELEMENT: {
-    computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    Known = computeKnownBits(Op.getOperand(0), Depth+1);
     const unsigned Index = Op.getConstantOperandVal(1);
     const unsigned BitWidth = Op.getValueSizeInBits();
 
@@ -2896,10 +3036,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       // If we know the element index, just demand that vector element.
       unsigned Idx = ConstEltNo->getZExtValue();
       APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
-      computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
+      Known = computeKnownBits(InVec, DemandedElt, Depth + 1);
     } else {
       // Unknown element index, so ignore DemandedElts and demand them all.
-      computeKnownBits(InVec, Known, Depth + 1);
+      Known = computeKnownBits(InVec, Depth + 1);
     }
     if (BitWidth > EltBitWidth)
       Known = Known.zext(BitWidth);
@@ -2919,7 +3059,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
 
       // If we demand the inserted element then add its common known bits.
       if (DemandedElts[EltIdx]) {
-        computeKnownBits(InVal, Known2, Depth + 1);
+        Known2 = computeKnownBits(InVal, Depth + 1);
         Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
         Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
       }
@@ -2928,33 +3068,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       // that we don't demand the inserted element.
       APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
       if (!!VectorElts) {
-        computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
+        Known2 = computeKnownBits(InVec, VectorElts, Depth + 1);
         Known.One &= Known2.One;
         Known.Zero &= Known2.Zero;
       }
     } else {
       // Unknown element index, so ignore DemandedElts and demand them all.
-      computeKnownBits(InVec, Known, Depth + 1);
-      computeKnownBits(InVal, Known2, Depth + 1);
+      Known = computeKnownBits(InVec, Depth + 1);
+      Known2 = computeKnownBits(InVal, Depth + 1);
       Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
       Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
     }
     break;
   }
   case ISD::BITREVERSE: {
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known.Zero = Known2.Zero.reverseBits();
     Known.One = Known2.One.reverseBits();
     break;
   }
   case ISD::BSWAP: {
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known.Zero = Known2.Zero.byteSwap();
     Known.One = Known2.One.byteSwap();
     break;
   }
   case ISD::ABS: {
-    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 
     // If the source's MSB is zero then we know the rest of the bits already.
     if (Known2.isNonNegative()) {
@@ -2973,8 +3113,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::UMIN: {
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
 
     // UMIN - we know that the result will have the maximum of the
     // known zero leading bits of the inputs.
@@ -2987,9 +3127,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::UMAX: {
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
 
     // UMAX - we know that the result will have the maximum of the
     // known one leading bits of the inputs.
@@ -3033,9 +3172,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     }
 
     // Fallback - just get the shared known bits of the operands.
-    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     if (Known.isUnknown()) break; // Early-out
-    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     Known.Zero &= Known2.Zero;
     Known.One &= Known2.One;
     break;
@@ -3058,6 +3197,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
 
   assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+  return Known;
 }
 
 SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -3066,11 +3206,9 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
   if (isNullConstant(N1))
     return OFK_Never;
 
-  KnownBits N1Known;
-  computeKnownBits(N1, N1Known);
+  KnownBits N1Known = computeKnownBits(N1);
   if (N1Known.Zero.getBoolValue()) {
-    KnownBits N0Known;
-    computeKnownBits(N0, N0Known);
+    KnownBits N0Known = computeKnownBits(N0);
 
     bool overflow;
     (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
@@ -3084,8 +3222,7 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
     return OFK_Never;
 
   if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
-    KnownBits N0Known;
-    computeKnownBits(N0, N0Known);
+    KnownBits N0Known = computeKnownBits(N0);
 
     if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
       return OFK_Never;
@@ -3131,8 +3268,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
   // to handle some common cases.
 
   // Fall back to computeKnownBits to catch other known cases.
-  KnownBits Known;
-  computeKnownBits(Val, Known);
+  KnownBits Known = computeKnownBits(Val);
   return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
 }
 
@@ -3240,14 +3376,35 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     if (VTBits == SrcBits)
       return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
 
+    bool IsLE = getDataLayout().isLittleEndian();
+
     // Bitcast 'large element' scalar/vector to 'small element' vector.
-    // TODO: Handle cases other than 'sign splat' when we have a use case.
-    // Requires handling of DemandedElts and Endianness.
     if ((SrcBits % VTBits) == 0) {
-      assert(Op.getValueType().isVector() && "Expected bitcast to vector");
-      Tmp = ComputeNumSignBits(N0, Depth + 1);
+      assert(VT.isVector() && "Expected bitcast to vector");
+
+      unsigned Scale = SrcBits / VTBits;
+      APInt SrcDemandedElts(NumElts / Scale, 0);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i])
+          SrcDemandedElts.setBit(i / Scale);
+
+      // Fast case - sign splat can be simply split across the small elements.
+      Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
       if (Tmp == SrcBits)
         return VTBits;
+
+      // Slow case - determine how far the sign extends into each sub-element.
+      Tmp2 = VTBits;
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i]) {
+          unsigned SubOffset = i % Scale;
+          SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset);
+          SubOffset = SubOffset * VTBits;
+          if (Tmp <= SubOffset)
+            return 1;
+          Tmp2 = std::min(Tmp2, Tmp - SubOffset);
+        }
+      return Tmp2;
     }
     break;
   }
@@ -3264,7 +3421,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
   case ISD::SIGN_EXTEND_VECTOR_INREG: {
     SDValue Src = Op.getOperand(0);
     EVT SrcVT = Src.getValueType();
-    APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+    APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements());
     Tmp = VTBits - SrcVT.getScalarSizeInBits();
     return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
   }
@@ -3361,7 +3518,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // If setcc returns 0/-1, all bits are sign bits.
     // We know that we have an integer-based boolean since these operations
     // are only available for integer.
-    if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+    if (TLI->getBooleanContents(VT.isVector(), false) ==
         TargetLowering::ZeroOrNegativeOneBooleanContent)
       return VTBits;
     break;
@@ -3396,8 +3553,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // Special case decrementing a value (ADD X, -1):
     if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
       if (CRHS->isAllOnesValue()) {
-        KnownBits Known;
-        computeKnownBits(Op.getOperand(0), Known, Depth+1);
+        KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
@@ -3421,8 +3577,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // Handle NEG.
     if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
       if (CLHS->isNullValue()) {
-        KnownBits Known;
-        computeKnownBits(Op.getOperand(1), Known, Depth+1);
+        KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
         if ((Known.Zero | 1).isAllOnesValue())
@@ -3538,7 +3693,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     }
     return ComputeNumSignBits(Src, Depth + 1);
   }
-  case ISD::CONCAT_VECTORS:
+  case ISD::CONCAT_VECTORS: {
     // Determine the minimum number of sign bits across all demanded
     // elts of the input vectors. Early out if the result is already 1.
     Tmp = std::numeric_limits<unsigned>::max();
@@ -3556,6 +3711,40 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
     return Tmp;
   }
+  case ISD::INSERT_SUBVECTOR: {
+    // If we know the element index, demand any elements from the subvector and
+    // the remainder from the src its inserted into, otherwise demand them all.
+    SDValue Src = Op.getOperand(0);
+    SDValue Sub = Op.getOperand(1);
+    auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+    if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+      Tmp = std::numeric_limits<unsigned>::max();
+      uint64_t Idx = SubIdx->getZExtValue();
+      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+      if (!!DemandedSubElts) {
+        Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+        if (Tmp == 1) return 1; // early-out
+      }
+      APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+      APInt DemandedSrcElts = DemandedElts & ~SubMask;
+      if (!!DemandedSrcElts) {
+        Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+        Tmp = std::min(Tmp, Tmp2);
+      }
+      assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+      return Tmp;
+    }
+
+    // Not able to determine the index so just assume worst case.
+    Tmp = ComputeNumSignBits(Sub, Depth + 1);
+    if (Tmp == 1) return 1; // early-out
+    Tmp2 = ComputeNumSignBits(Src, Depth + 1);
+    Tmp = std::min(Tmp, Tmp2);
+    assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+    return Tmp;
+  }
+  }
 
   // If we are looking at the loaded value of the SDNode.
   if (Op.getResNo() == 0) {
@@ -3587,8 +3776,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
 
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
   // use this information.
-  KnownBits Known;
-  computeKnownBits(Op, Known, DemandedElts, Depth);
+  KnownBits Known = computeKnownBits(Op, DemandedElts, Depth);
 
   APInt Mask;
   if (Known.isNonNegative()) {        // sign bit is 0
@@ -3622,21 +3810,121 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
   return true;
 }
 
-bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
   // If we're told that NaNs won't happen, assume they won't.
-  if (getTarget().Options.NoNaNsFPMath)
+  if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
     return true;
 
-  if (Op->getFlags().hasNoNaNs())
-    return true;
+  if (Depth == 6)
+    return false; // Limit search depth.
 
+  // TODO: Handle vectors.
   // If the value is a constant, we can obviously see if it is a NaN or not.
-  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
-    return !C->getValueAPF().isNaN();
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+    return !C->getValueAPF().isNaN() ||
+           (SNaN && !C->getValueAPF().isSignaling());
+  }
 
-  // TODO: Recognize more cases here.
+  unsigned Opcode = Op.getOpcode();
+  switch (Opcode) {
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FSIN:
+  case ISD::FCOS: {
+    if (SNaN)
+      return true;
+    // TODO: Need isKnownNeverInfinity
+    return false;
+  }
+  case ISD::FCANONICALIZE:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FTRUNC:
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FROUND:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT: {
+    if (SNaN)
+      return true;
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+  }
+  case ISD::FABS:
+  case ISD::FNEG:
+  case ISD::FCOPYSIGN: {
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+  }
+  case ISD::SELECT:
+    return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+           isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND: {
+    if (SNaN)
+      return true;
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+  }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    return true;
+  case ISD::FMA:
+  case ISD::FMAD: {
+    if (SNaN)
+      return true;
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+           isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+           isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+  }
+  case ISD::FSQRT: // Need is known positive
+  case ISD::FLOG:
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FPOWI:
+  case ISD::FPOW: {
+    if (SNaN)
+      return true;
+    // TODO: Refine on operand
+    return false;
+  }
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM: {
+    // Only one needs to be known not-nan, since it will be returned if the
+    // other ends up being one.
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
+           isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+  }
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE: {
+    if (SNaN)
+      return true;
+    // This can return a NaN if either operand is an sNaN, or if both operands
+    // are NaN.
+    return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) &&
+            isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) ||
+           (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) &&
+            isKnownNeverSNaN(Op.getOperand(0), Depth + 1));
+  }
+  case ISD::FMINIMUM:
+  case ISD::FMAXIMUM: {
+    // TODO: Does this quiet or return the origina NaN as-is?
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+           isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+  }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+  }
+  default:
+    if (Opcode >= ISD::BUILTIN_OP_END ||
+        Opcode == ISD::INTRINSIC_WO_CHAIN ||
+        Opcode == ISD::INTRINSIC_W_CHAIN ||
+        Opcode == ISD::INTRINSIC_VOID) {
+      return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth);
+    }
 
-  return false;
+    return false;
+  }
 }
 
 bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
@@ -3690,10 +3978,39 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
 bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
   assert(A.getValueType() == B.getValueType() &&
          "Values must have the same type");
-  KnownBits AKnown, BKnown;
-  computeKnownBits(A, AKnown);
-  computeKnownBits(B, BKnown);
-  return (AKnown.Zero | BKnown.Zero).isAllOnesValue();
+  return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
+}
+
+static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
+                                ArrayRef<SDValue> Ops,
+                                SelectionDAG &DAG) {
+  int NumOps = Ops.size();
+  assert(NumOps != 0 && "Can't build an empty vector!");
+  assert(VT.getVectorNumElements() == (unsigned)NumOps &&
+         "Incorrect element count in BUILD_VECTOR!");
+
+  // BUILD_VECTOR of UNDEFs is UNDEF.
+  if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+    return DAG.getUNDEF(VT);
+
+  // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity.
+  SDValue IdentitySrc;
+  bool IsIdentity = true;
+  for (int i = 0; i != NumOps; ++i) {
+    if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        Ops[i].getOperand(0).getValueType() != VT ||
+        (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) ||
+        !isa<ConstantSDNode>(Ops[i].getOperand(1)) ||
+        cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) {
+      IsIdentity = false;
+      break;
+    }
+    IdentitySrc = Ops[i].getOperand(0);
+  }
+  if (IsIdentity)
+    return IdentitySrc;
+
+  return SDValue();
 }
 
 static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
@@ -3779,9 +4096,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     case ISD::SIGN_EXTEND:
       return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
+    case ISD::TRUNCATE:
+      if (C->isOpaque())
+        break;
+      LLVM_FALLTHROUGH;
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
-    case ISD::TRUNCATE:
       return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::UINT_TO_FP:
@@ -3947,6 +4267,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::MERGE_VALUES:
   case ISD::CONCAT_VECTORS:
     return Operand;         // Factor, merge or concat of one node?  No need.
+  case ISD::BUILD_VECTOR: {
+    // Attempt to simplify BUILD_VECTOR.
+    SDValue Ops[] = {Operand};
+    if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+      return V;
+    break;
+  }
   case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
   case ISD::FP_EXTEND:
     assert(VT.isFloatingPoint() &&
@@ -4045,6 +4372,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
     break;
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    assert(VT.isVector() && "This DAG node is restricted to vector types.");
+    assert(Operand.getValueType().bitsLE(VT) &&
+           "The input must be the same size or smaller than the result.");
+    assert(VT.getVectorNumElements() <
+             Operand.getValueType().getVectorNumElements() &&
+           "The destination vector type must have fewer lanes than the input.");
+    break;
   case ISD::ABS:
     assert(VT.isInteger() && VT == Operand.getValueType() &&
            "Invalid ABS!");
@@ -4151,6 +4488,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
   case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
   case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
   case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
+  case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
+  case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
+  case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
+  case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
   case ISD::UDIV:
     if (!C2.getBoolValue())
       break;
@@ -4258,14 +4599,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
       return FoldSymbolOffset(Opcode, VT, GA, Cst1);
 
-  // For vectors extract each constant element into Inputs so we can constant
-  // fold them individually.
-  BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
-  BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
-  if (!BV1 || !BV2)
+  // For vectors, extract each constant element and fold them individually.
+  // Either input may be an undef value.
+  auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+  if (!BV1 && !Cst1->isUndef())
+    return SDValue();
+  auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+  if (!BV2 && !Cst2->isUndef())
+    return SDValue();
+  // If both operands are undef, that's handled the same way as scalars.
+  if (!BV1 && !BV2)
     return SDValue();
 
-  assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+  assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) &&
+         "Vector binop with different number of elements in operands?");
 
   EVT SVT = VT.getScalarType();
   EVT LegalSVT = SVT;
@@ -4275,15 +4622,15 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
       return SDValue();
   }
   SmallVector<SDValue, 4> Outputs;
-  for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
-    SDValue V1 = BV1->getOperand(I);
-    SDValue V2 = BV2->getOperand(I);
-
+  unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands();
+  for (unsigned I = 0; I != NumOps; ++I) {
+    SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT);
+    SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT);
     if (SVT.isInteger()) {
-        if (V1->getValueType(0).bitsGT(SVT))
-          V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
-        if (V2->getValueType(0).bitsGT(SVT))
-          V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+      if (V1->getValueType(0).bitsGT(SVT))
+        V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+      if (V2->getValueType(0).bitsGT(SVT))
+        V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
     }
 
     if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
@@ -4436,6 +4783,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (N2.getOpcode() == ISD::EntryToken) return N1;
     if (N1 == N2) return N1;
     break;
+  case ISD::BUILD_VECTOR: {
+    // Attempt to simplify BUILD_VECTOR.
+    SDValue Ops[] = {N1, N2};
+    if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+      return V;
+    break;
+  }
   case ISD::CONCAT_VECTORS: {
     // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     SDValue Ops[] = {N1, N2};
@@ -4477,6 +4831,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
+  case ISD::SADDSAT:
+  case ISD::SSUBSAT:
+  case ISD::UADDSAT:
+  case ISD::USUBSAT:
     assert(VT.isInteger() && "This operator does not apply to FP types!");
     assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
@@ -4499,6 +4857,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:
+    if (SDValue V = simplifyShift(N1, N2))
+      return V;
+    LLVM_FALLTHROUGH;
   case ISD::ROTL:
   case ISD::ROTR:
     assert(VT == N1.getValueType() &&
@@ -4507,7 +4868,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            "Shifts only work on integers");
     assert((!VT.isVector() || VT == N2.getValueType()) &&
            "Vector shift amounts must be in the same as their first arg");
-    // Verify that the shift amount VT is bit enough to hold valid shift
+    // Verify that the shift amount VT is big enough to hold valid shift
     // amounts.  This catches things like trying to shift an i1024 value by an
     // i8, which is easy to fall into in generic code that uses
     // TLI.getShiftAmount().
@@ -4555,8 +4916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(!EVT.isVector() &&
            "AssertSExt/AssertZExt type should be the vector element type "
            "rather than the vector type!");
-    assert(EVT.bitsLE(VT) && "Not extending!");
-    if (VT == EVT) return N1; // noop assertion.
+    assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+    if (VT.getScalarType() == EVT) return N1; // noop assertion.
     break;
   }
   case ISD::SIGN_EXTEND_INREG: {
@@ -4793,14 +5154,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
   }
 
-  // Any FP binop with an undef operand is folded to NaN. This matches the
-  // behavior of the IR optimizer.
   switch (Opcode) {
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
+    // If both operands are undef, the result is undef. If 1 operand is undef,
+    // the result is NaN. This should match the behavior of the IR optimizer.
+    if (N1.isUndef() && N2.isUndef())
+      return getUNDEF(VT);
     if (N1.isUndef() || N2.isUndef())
       return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
   }
@@ -4819,9 +5182,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       case ISD::SDIV:
       case ISD::UREM:
       case ISD::SREM:
-      case ISD::SRA:
-      case ISD::SRL:
-      case ISD::SHL:
+      case ISD::SSUBSAT:
+      case ISD::USUBSAT:
         return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
       }
     }
@@ -4837,21 +5199,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
         return getConstant(0, DL, VT);
       LLVM_FALLTHROUGH;
     case ISD::ADD:
-    case ISD::ADDC:
-    case ISD::ADDE:
     case ISD::SUB:
     case ISD::UDIV:
     case ISD::SDIV:
     case ISD::UREM:
     case ISD::SREM:
-    case ISD::SRA:
-    case ISD::SRL:
-    case ISD::SHL:
       return getUNDEF(VT);       // fold op(arg1, undef) -> undef
     case ISD::MUL:
     case ISD::AND:
+    case ISD::SSUBSAT:
+    case ISD::USUBSAT:
       return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
     case ISD::OR:
+    case ISD::SADDSAT:
+    case ISD::UADDSAT:
       return getAllOnesConstant(DL, VT);
     }
   }
@@ -4907,6 +5268,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
     break;
   }
+  case ISD::BUILD_VECTOR: {
+    // Attempt to simplify BUILD_VECTOR.
+    SDValue Ops[] = {N1, N2, N3};
+    if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+      return V;
+    break;
+  }
   case ISD::CONCAT_VECTORS: {
     // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     SDValue Ops[] = {N1, N2, N3};
@@ -4915,6 +5283,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::SETCC: {
+    assert(VT.isInteger() && "SETCC result type must be an integer!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           "SETCC operands must have the same type!");
+    assert(VT.isVector() == N1.getValueType().isVector() &&
+           "SETCC type should be vector iff the operand type is vector!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) &&
+           "SETCC vector element counts must match!");
     // Use FoldSetCC to simplify SETCC's.
     if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
       return V;
@@ -4927,13 +5303,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::SELECT:
-    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
-     if (N1C->getZExtValue())
-       return N2;             // select true, X, Y -> X
-     return N3;             // select false, X, Y -> Y
-    }
-
-    if (N2 == N3) return N2;   // select C, X, X -> X
+  case ISD::VSELECT:
+    if (SDValue V = simplifySelect(N1, N2, N3))
+      return V;
     break;
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
@@ -5048,8 +5420,11 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
-    if (VT.isInteger())
-      return DAG.getConstant(Val, dl, VT);
+    if (VT.isInteger()) {
+      bool IsOpaque = VT.getSizeInBits() > 64 ||
+          !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+      return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+    }
     return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
                              VT);
   }
@@ -5229,12 +5604,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
 
       // If the new VT cannot cover all of the remaining bits, then consider
       // issuing a (or a pair of) unaligned and overlapping load / store.
-      // FIXME: Only does this for 64-bit or more since we don't have proper
-      // cost model for unaligned load / store.
       bool Fast;
-      if (NumMemOps && AllowOverlap &&
-          VTSize >= 8 && NewVTSize < Size &&
-          TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
+      if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+          TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
+          Fast)
         VTSize = Size;
       else {
         VT = NewVT;
@@ -6495,11 +6868,11 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
 }
 
 SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
-                                    SDValue Ptr, SDValue Mask, SDValue Src0,
+                                    SDValue Ptr, SDValue Mask, SDValue PassThru,
                                     EVT MemVT, MachineMemOperand *MMO,
                                     ISD::LoadExtType ExtTy, bool isExpanding) {
   SDVTList VTs = getVTList(VT, MVT::Other);
-  SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+  SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
@@ -6530,7 +6903,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
         "Invalid chain type");
   EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
-  SDValue Ops[] = { Chain, Ptr, Mask, Val };
+  SDValue Ops[] = { Chain, Val, Ptr, Mask };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
@@ -6574,12 +6947,12 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
                                           VTs, VT, MMO);
   createOperands(N, Ops);
 
-  assert(N->getValue().getValueType() == N->getValueType(0) &&
+  assert(N->getPassThru().getValueType() == N->getValueType(0) &&
          "Incompatible type of the PassThru value in MaskedGatherSDNode");
   assert(N->getMask().getValueType().getVectorNumElements() ==
              N->getValueType(0).getVectorNumElements() &&
          "Vector width mismatch between mask and data");
-  assert(N->getIndex().getValueType().getVectorNumElements() ==
+  assert(N->getIndex().getValueType().getVectorNumElements() >=
              N->getValueType(0).getVectorNumElements() &&
          "Vector width mismatch between index and data");
   assert(isa<ConstantSDNode>(N->getScale()) &&
@@ -6616,7 +6989,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
   assert(N->getMask().getValueType().getVectorNumElements() ==
              N->getValue().getValueType().getVectorNumElements() &&
          "Vector width mismatch between mask and data");
-  assert(N->getIndex().getValueType().getVectorNumElements() ==
+  assert(N->getIndex().getValueType().getVectorNumElements() >=
              N->getValue().getValueType().getVectorNumElements() &&
          "Vector width mismatch between index and data");
   assert(isa<ConstantSDNode>(N->getScale()) &&
@@ -6630,6 +7003,60 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
   return V;
 }
 
+SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
+  // select undef, T, F --> T (if T is a constant), otherwise F
+  // select, ?, undef, F --> F
+  // select, ?, T, undef --> T
+  if (Cond.isUndef())
+    return isConstantValueOfAnyType(T) ? T : F;
+  if (T.isUndef())
+    return F;
+  if (F.isUndef())
+    return T;
+
+  // select true, T, F --> T
+  // select false, T, F --> F
+  if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
+    return CondC->isNullValue() ? F : T;
+
+  // TODO: This should simplify VSELECT with constant condition using something
+  // like this (but check boolean contents to be complete?):
+  //  if (ISD::isBuildVectorAllOnes(Cond.getNode()))
+  //    return T;
+  //  if (ISD::isBuildVectorAllZeros(Cond.getNode()))
+  //    return F;
+
+  // select ?, T, T --> T
+  if (T == F)
+    return T;
+
+  return SDValue();
+}
+
+SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
+  // shift undef, Y --> 0 (can always assume that the undef value is 0)
+  if (X.isUndef())
+    return getConstant(0, SDLoc(X.getNode()), X.getValueType());
+  // shift X, undef --> undef (because it may shift by the bitwidth)
+  if (Y.isUndef())
+    return getUNDEF(X.getValueType());
+
+  // shift 0, Y --> 0
+  // shift X, 0 --> X
+  if (isNullOrNullSplat(X) || isNullOrNullSplat(Y))
+    return X;
+
+  // shift X, C >= bitwidth(X) --> undef
+  // All vector elements must be too big (or undef) to avoid partial undefs.
+  auto isShiftTooBig = [X](ConstantSDNode *Val) {
+    return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits());
+  };
+  if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true))
+    return getUNDEF(X.getValueType());
+
+  return SDValue();
+}
+
 SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
                                SDValue Ptr, SDValue SV, unsigned Align) {
   SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
@@ -6659,12 +7086,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case 0: return getNode(Opcode, DL, VT);
   case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
   case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
-  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags);
   default: break;
   }
 
   switch (Opcode) {
   default: break;
+  case ISD::BUILD_VECTOR:
+    // Attempt to simplify BUILD_VECTOR.
+    if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
+      return V;
+    break;
   case ISD::CONCAT_VECTORS:
     // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
@@ -6880,7 +7312,7 @@ SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
   SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
   if (!Result) {
     EVT *Array = Allocator.Allocate<EVT>(NumVTs);
-    std::copy(VTs.begin(), VTs.end(), Array);
+    llvm::copy(VTs, Array);
     Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
     VTListMap.InsertNode(Result, IP);
   }
@@ -7010,6 +7442,27 @@ void SDNode::DropOperands() {
   }
 }
 
+void SelectionDAG::setNodeMemRefs(MachineSDNode *N,
+                                  ArrayRef<MachineMemOperand *> NewMemRefs) {
+  if (NewMemRefs.empty()) {
+    N->clearMemRefs();
+    return;
+  }
+
+  // Check if we can avoid allocating by storing a single reference directly.
+  if (NewMemRefs.size() == 1) {
+    N->MemRefs = NewMemRefs[0];
+    N->NumMemRefs = 1;
+    return;
+  }
+
+  MachineMemOperand **MemRefsBuffer =
+      Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size());
+  llvm::copy(NewMemRefs, MemRefsBuffer);
+  N->MemRefs = MemRefsBuffer;
+  N->NumMemRefs = static_cast<int>(NewMemRefs.size());
+}
+
 /// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
 /// machine opcode.
 ///
@@ -7152,7 +7605,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 
   // For MachineNode, initialize the memory references information.
   if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
-    MN->setMemRefs(nullptr, nullptr);
+    MN->clearMemRefs();
 
   // Swap for an appropriately sized array from the recycler.
   removeOperands(N);
@@ -7202,6 +7655,12 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
     NewOpc = ISD::FNEARBYINT;
     IsUnary = true;
     break;
+  case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+  case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
+  case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
+  case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
+  case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
+  case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
   }
 
   // We're taking this node out of the chain, so we need to re-link things.
@@ -7488,8 +7947,11 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
                     Dbg->getDebugLoc(), Dbg->getOrder());
     ClonedDVs.push_back(Clone);
 
-    if (InvalidateDbg)
+    if (InvalidateDbg) {
+      // Invalidate value and indicate the SDDbgValue should not be emitted.
       Dbg->setIsInvalidated();
+      Dbg->setIsEmitted();
+    }
   }
 
   for (SDDbgValue *Dbg : ClonedDVs)
@@ -7526,6 +7988,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
                         DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
         ClonedDVs.push_back(Clone);
         DV->setIsInvalidated();
+        DV->setIsEmitted();
         LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
                    N0.getNode()->dumprFull(this);
                    dbgs() << " into " << *DIExpr << '\n');
@@ -7688,7 +8151,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
 
   // Preserve Debug Info.
   for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
-    transferDbgValues(SDValue(From, i), *To);
+    transferDbgValues(SDValue(From, i), To[i]);
 
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
@@ -7700,18 +8163,22 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
     // This node is about to morph, remove its old self from the CSE maps.
     RemoveNodeFromCSEMaps(User);
 
-    // A user can appear in a use list multiple times, and when this
-    // happens the uses are usually next to each other in the list.
-    // To help reduce the number of CSE recomputations, process all
-    // the uses of this user that we can find this way.
+    // A user can appear in a use list multiple times, and when this happens the
+    // uses are usually next to each other in the list.  To help reduce the
+    // number of CSE and divergence recomputations, process all the uses of this
+    // user that we can find this way.
+    bool To_IsDivergent = false;
     do {
       SDUse &Use = UI.getUse();
       const SDValue &ToOp = To[Use.getResNo()];
       ++UI;
       Use.set(ToOp);
-      if (To->getNode()->isDivergent() != From->isDivergent())
-        updateDivergence(User);
+      To_IsDivergent |= ToOp->isDivergent();
     } while (UI != UE && *UI == User);
+
+    if (To_IsDivergent != From->isDivergent())
+      updateDivergence(User);
+
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User);
@@ -7842,6 +8309,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
   }
 }
 
+#ifndef NDEBUG
 void SelectionDAG::VerifyDAGDiverence()
 {
   std::vector<SDNode*> TopoOrder;
@@ -7868,6 +8336,7 @@ void SelectionDAG::VerifyDAGDiverence()
            "Divergence bit inconsistency detected\n");
   }
 }
+#endif
 
 
 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
@@ -7901,7 +8370,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
   }
 
   // Sort the uses, so that all the uses from a given User are together.
-  llvm::sort(Uses.begin(), Uses.end());
+  llvm::sort(Uses);
 
   for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
        UseIndex != UseIndexEnd; ) {
@@ -8053,6 +8522,32 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
   return TokenFactor;
 }
 
+SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
+                                                     Function **OutFunction) {
+  assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
+
+  auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+  auto *Module = MF->getFunction().getParent();
+  auto *Function = Module->getFunction(Symbol);
+
+  if (OutFunction != nullptr)
+      *OutFunction = Function;
+
+  if (Function != nullptr) {
+    auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace());
+    return getGlobalAddress(Function, SDLoc(Op), PtrTy);
+  }
+
+  std::string ErrorStr;
+  raw_string_ostream ErrorFormatter(ErrorStr);
+
+  ErrorFormatter << "Undefined external symbol ";
+  ErrorFormatter << '"' << Symbol << '"';
+  ErrorFormatter.flush();
+
+  report_fatal_error(ErrorStr);
+}
+
 //===----------------------------------------------------------------------===//
 //                              SDNode Class
 //===----------------------------------------------------------------------===//
@@ -8077,11 +8572,26 @@ bool llvm::isOneConstant(SDValue V) {
   return Const != nullptr && Const->isOne();
 }
 
+SDValue llvm::peekThroughBitcasts(SDValue V) {
+  while (V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  return V;
+}
+
+SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
+  while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse())
+    V = V.getOperand(0);
+  return V;
+}
+
 bool llvm::isBitwiseNot(SDValue V) {
-  return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
+  if (V.getOpcode() != ISD::XOR)
+    return false;
+  ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
+  return C && C->isAllOnesValue();
 }
 
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
@@ -8090,9 +8600,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
 
     // BuildVectors can truncate their operands. Ignore that case here.
-    // FIXME: We blindly ignore splats which include undef which is overly
-    // pessimistic.
-    if (CN && UndefElements.none() &&
+    if (CN && (UndefElements.none() || AllowUndefs) &&
         CN->getValueType(0) == N.getValueType().getScalarType())
       return CN;
   }
@@ -8100,21 +8608,40 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
   return nullptr;
 }
 
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
     return CN;
 
   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
     BitVector UndefElements;
     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
-    if (CN && UndefElements.none())
+    if (CN && (UndefElements.none() || AllowUndefs))
       return CN;
   }
 
   return nullptr;
 }
 
+bool llvm::isNullOrNullSplat(SDValue N) {
+  // TODO: may want to use peekThroughBitcast() here.
+  ConstantSDNode *C = isConstOrConstSplat(N);
+  return C && C->isNullValue();
+}
+
+bool llvm::isOneOrOneSplat(SDValue N) {
+  // TODO: may want to use peekThroughBitcast() here.
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  ConstantSDNode *C = isConstOrConstSplat(N);
+  return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N) {
+  N = peekThroughBitcasts(N);
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  ConstantSDNode *C = isConstOrConstSplat(N);
+  return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+}
+
 HandleSDNode::~HandleSDNode() {
   DropOperands();
 }
@@ -8318,6 +8845,64 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
   this->Flags.intersectWith(Flags);
 }
 
+SDValue
+SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
+                                  ArrayRef<ISD::NodeType> CandidateBinOps) {
+  // The pattern must end in an extract from index 0.
+  if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      !isNullConstant(Extract->getOperand(1)))
+    return SDValue();
+
+  SDValue Op = Extract->getOperand(0);
+  unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+
+  // Match against one of the candidate binary ops.
+  if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
+        return Op.getOpcode() == unsigned(BinOp);
+      }))
+    return SDValue();
+
+  // At each stage, we're looking for something that looks like:
+  // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
+  //                    <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
+  //                               i32 undef, i32 undef, i32 undef, i32 undef>
+  // %a = binop <8 x i32> %op, %s
+  // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
+  // we expect something like:
+  // <4,5,6,7,u,u,u,u>
+  // <2,3,u,u,u,u,u,u>
+  // <1,u,u,u,u,u,u,u>
+  unsigned CandidateBinOp = Op.getOpcode();
+  for (unsigned i = 0; i < Stages; ++i) {
+    if (Op.getOpcode() != CandidateBinOp)
+      return SDValue();
+
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0);
+    if (Shuffle) {
+      Op = Op1;
+    } else {
+      Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1);
+      Op = Op0;
+    }
+
+    // The first operand of the shuffle should be the same as the other operand
+    // of the binop.
+    if (!Shuffle || Shuffle->getOperand(0) != Op)
+      return SDValue();
+
+    // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+    for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
+      if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
+        return SDValue();
+  }
+
+  BinOp = (ISD::NodeType)CandidateBinOp;
+  return Op;
+}
+
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   assert(N->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
@@ -8681,8 +9266,11 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
 
 void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
   assert(!Node->OperandList && "Node already has operands");
+  assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >=
+             Vals.size() &&
+         "too many operands to fit into SDNode");
   SDUse *Ops = OperandRecycler.allocate(
-    ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+      ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
 
   bool IsDivergent = false;
   for (unsigned I = 0; I != Vals.size(); ++I) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index c859f16e74fe..488bac1a9a80 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -19,8 +19,9 @@
 
 using namespace llvm;
 
-bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
-                                     const SelectionDAG &DAG, int64_t &Off) {
+bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
+                                     const SelectionDAG &DAG,
+                                     int64_t &Off) const {
   // Conservatively fail if we a match failed..
   if (!Base.getNode() || !Other.Base.getNode())
     return false;
@@ -75,7 +76,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
+BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
                                        const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
@@ -106,14 +107,14 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
       if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
         if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
           Offset += C->getSExtValue();
-          Base = Base->getOperand(0);
+          Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
           continue;
         }
       break;
     case ISD::ADD:
       if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
         Offset += C->getSExtValue();
-        Base = Base->getOperand(0);
+        Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0));
         continue;
       }
       break;
@@ -129,7 +130,7 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
             Offset -= Off;
           else
             Offset += Off;
-          Base = LSBase->getBasePtr();
+          Base = DAG.getTargetLoweringInfo().unwrapAddress(LSBase->getBasePtr());
           continue;
         }
       break;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c1c15514c09a..871ab9b29881 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -88,6 +88,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
@@ -121,6 +122,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "isel"
 
@@ -614,6 +616,32 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
     std::reverse(Parts, Parts + OrigNumParts);
 }
 
+static SDValue widenVectorToPartType(SelectionDAG &DAG,
+                                     SDValue Val, const SDLoc &DL, EVT PartVT) {
+  if (!PartVT.isVector())
+    return SDValue();
+
+  EVT ValueVT = Val.getValueType();
+  unsigned PartNumElts = PartVT.getVectorNumElements();
+  unsigned ValueNumElts = ValueVT.getVectorNumElements();
+  if (PartNumElts > ValueNumElts &&
+      PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+    EVT ElementVT = PartVT.getVectorElementType();
+    // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
+    // undef elements.
+    SmallVector<SDValue, 16> Ops;
+    DAG.ExtractVectorElements(Val, Ops);
+    SDValue EltUndef = DAG.getUNDEF(ElementVT);
+    for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
+      Ops.push_back(EltUndef);
+
+    // FIXME: Use CONCAT for 2x -> 4x.
+    return DAG.getBuildVector(PartVT, DL, Ops);
+  }
+
+  return SDValue();
+}
+
 /// getCopyToPartsVector - Create a series of nodes that contain the specified
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
@@ -632,28 +660,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
     } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
       // Bitconvert vector->vector case.
       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
-    } else if (PartVT.isVector() &&
-               PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
-               PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
-      EVT ElementVT = PartVT.getVectorElementType();
-      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
-      // undef elements.
-      SmallVector<SDValue, 16> Ops;
-      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
-        Ops.push_back(DAG.getNode(
-            ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
-            DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
-
-      for (unsigned i = ValueVT.getVectorNumElements(),
-           e = PartVT.getVectorNumElements(); i != e; ++i)
-        Ops.push_back(DAG.getUNDEF(ElementVT));
-
-      Val = DAG.getBuildVector(PartVT, DL, Ops);
-
-      // FIXME: Use CONCAT for 2x -> 4x.
-
-      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
-      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+    } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
+      Val = Widened;
     } else if (PartVT.isVector() &&
                PartEVT.getVectorElementType().bitsGE(
                  ValueVT.getVectorElementType()) &&
@@ -695,33 +703,38 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
         TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
                                    NumIntermediates, RegisterVT);
   }
-  unsigned NumElements = ValueVT.getVectorNumElements();
 
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
   NumParts = NumRegs; // Silence a compiler warning.
   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 
+  unsigned IntermediateNumElts = IntermediateVT.isVector() ?
+    IntermediateVT.getVectorNumElements() : 1;
+
   // Convert the vector to the appropiate type if necessary.
-  unsigned DestVectorNoElts =
-      NumIntermediates *
-      (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+  unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
+
   EVT BuiltVectorTy = EVT::getVectorVT(
       *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
-  if (Val.getValueType() != BuiltVectorTy)
+  MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+  if (ValueVT != BuiltVectorTy) {
+    if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
+      Val = Widened;
+
     Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+  }
 
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
-    if (IntermediateVT.isVector())
-      Ops[i] =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
-                      DAG.getConstant(i * (NumElements / NumIntermediates), DL,
-                                      TLI.getVectorIdxTy(DAG.getDataLayout())));
-    else
+    if (IntermediateVT.isVector()) {
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+                           DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
+    } else {
       Ops[i] = DAG.getNode(
           ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
-          DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+          DAG.getConstant(i, DL, IdxVT));
+    }
   }
 
   // Split the intermediate operands into legal parts.
@@ -810,7 +823,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
       // If the source register was virtual and if we know something about it,
       // add an assert node.
       if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
-          !RegisterVT.isInteger() || RegisterVT.isVector())
+          !RegisterVT.isInteger())
         continue;
 
       const FunctionLoweringInfo::LiveOutInfo *LOI =
@@ -818,7 +831,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
       if (!LOI)
         continue;
 
-      unsigned RegSize = RegisterVT.getSizeInBits();
+      unsigned RegSize = RegisterVT.getScalarSizeInBits();
       unsigned NumSignBits = LOI->NumSignBits;
       unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
 
@@ -1019,8 +1032,19 @@ SDValue SelectionDAGBuilder::getRoot() {
   }
 
   // Otherwise, we have to make a token factor node.
-  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
-                             PendingLoads);
+  // If we have >= 2^16 loads then split across multiple token factors as
+  // there's a 64k limit on the number of SDNode operands.
+  SDValue Root;
+  size_t Limit = (1 << 16) - 1;
+  while (PendingLoads.size() > Limit) {
+    unsigned SliceIdx = PendingLoads.size() - Limit;
+    auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
+    SDValue NewTF =
+        DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
+    PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
+    PendingLoads.emplace_back(NewTF);
+  }
+  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
   PendingLoads.clear();
   DAG.setRoot(Root);
   return Root;
@@ -1054,7 +1078,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
 
 void SelectionDAGBuilder::visit(const Instruction &I) {
   // Set up outgoing PHI node register values before emitting the terminator.
-  if (isa<TerminatorInst>(&I)) {
+  if (I.isTerminator()) {
     HandlePHINodesInSuccessorBlocks(I.getParent());
   }
 
@@ -1082,7 +1106,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
     }
   }
 
-  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+  if (!I.isTerminator() && !HasTailCall &&
       !isStatepoint(&I)) // statepoints handle their exports internally
     CopyToExportRegsIfNeeded(&I);
 
@@ -1178,7 +1202,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
     unsigned InReg = It->second;
 
     RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                     DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V));
+                     DAG.getDataLayout(), InReg, Ty,
+                     None); // This is not an ABI copy.
     SDValue Chain = DAG.getEntryNode();
     Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
                                  V);
@@ -1437,8 +1462,11 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
   // Don't emit any special code for the cleanuppad instruction. It just marks
   // the start of an EH scope/funclet.
   FuncInfo.MBB->setIsEHScopeEntry();
-  FuncInfo.MBB->setIsEHFuncletEntry();
-  FuncInfo.MBB->setIsCleanupFuncletEntry();
+  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+  if (Pers != EHPersonality::Wasm_CXX) {
+    FuncInfo.MBB->setIsEHFuncletEntry();
+    FuncInfo.MBB->setIsCleanupFuncletEntry();
+  }
 }
 
 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
@@ -1458,6 +1486,7 @@ static void findUnwindDestinations(
     classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
   bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
   bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
   bool IsSEH = isAsynchronousEHPersonality(Personality);
 
   while (EHPadBB) {
@@ -1472,7 +1501,8 @@ static void findUnwindDestinations(
       // personalities.
       UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
       UnwindDests.back().first->setIsEHScopeEntry();
-      UnwindDests.back().first->setIsEHFuncletEntry();
+      if (!IsWasmCXX)
+        UnwindDests.back().first->setIsEHFuncletEntry();
       break;
     } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
       // Add the catchpad handlers to the possible destinations.
@@ -1807,7 +1837,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
   SwitchCases.push_back(CB);
 }
 
-/// FindMergedConditions - If Cond is an expression like
 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                MachineBasicBlock *TBB,
                                                MachineBasicBlock *FBB,
@@ -1819,13 +1848,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                bool InvertCond) {
   // Skip over not part of the tree and remember to invert op and operands at
   // next level.
-  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
-    const Value *CondOp = BinaryOperator::getNotArgument(Cond);
-    if (InBlock(CondOp, CurBB->getBasicBlock())) {
-      FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
-                           !InvertCond);
-      return;
-    }
+  Value *NotCond;
+  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+      InBlock(NotCond, CurBB->getBasicBlock())) {
+    FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+                         !InvertCond);
+    return;
   }
 
   const Instruction *BOp = dyn_cast<Instruction>(Cond);
@@ -2193,12 +2221,11 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
       DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
   if (Global) {
     MachinePointerInfo MPInfo(Global);
-    MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
     auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
                  MachineMemOperand::MODereferenceable;
-    *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
-                                       DAG.getEVTAlignment(PtrTy));
-    Node->setMemRefs(MemRefs, MemRefs + 1);
+    MachineMemOperand *MemRef = MF.getMachineMemOperand(
+        MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
+    DAG.setNodeMemRefs(Node, {MemRef});
   }
   return SDValue(Node, 0);
 }
@@ -2514,9 +2541,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   assert(FuncInfo.MBB->isEHPad() &&
          "Call to landingpad not in landing pad!");
 
-  MachineBasicBlock *MBB = FuncInfo.MBB;
-  addLandingPadInfo(LP, *MBB);
-
   // If there aren't registers to copy the values into (e.g., during SjLj
   // exceptions), then don't bother to create these DAG nodes.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -2567,8 +2591,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
     assert(CC.Low == CC.High && "Input clusters must be single-case");
 #endif
 
-  llvm::sort(Clusters.begin(), Clusters.end(),
-             [](const CaseCluster &a, const CaseCluster &b) {
+  llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
     return a.Low->getValue().slt(b.Low->getValue());
   });
 
@@ -2789,6 +2812,15 @@ static bool isVectorReductionOp(const User *I) {
   return ReduxExtracted;
 }
 
+void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
+  SDNodeFlags Flags;
+
+  SDValue Op = getValue(I.getOperand(0));
+  SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
+                                    Op, Flags);
+  setValue(&I, UnNodeValue);
+}
+
 void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
   SDNodeFlags Flags;
   if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
@@ -2815,7 +2847,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op2 = getValue(I.getOperand(1));
 
   EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
-      Op2.getValueType(), DAG.getDataLayout());
+      Op1.getValueType(), DAG.getDataLayout());
 
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2932,7 +2964,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     ISD::VSELECT : ISD::SELECT;
 
   // Min/max matching is only viable if all output VTs are the same.
-  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
+  if (is_splat(ValueVTs)) {
     EVT VT = ValueVTs[0];
     LLVMContext &Ctx = *DAG.getContext();
     auto &TLI = DAG.getTargetLoweringInfo();
@@ -2960,16 +2992,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     case SPF_FMINNUM:
       switch (SPR.NaNBehavior) {
       case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
-      case SPNB_RETURNS_NAN:   Opc = ISD::FMINNAN; break;
+      case SPNB_RETURNS_NAN:   Opc = ISD::FMINIMUM; break;
       case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
       case SPNB_RETURNS_ANY: {
         if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
           Opc = ISD::FMINNUM;
-        else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
-          Opc = ISD::FMINNAN;
+        else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
+          Opc = ISD::FMINIMUM;
         else if (UseScalarMinMax)
           Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
-            ISD::FMINNUM : ISD::FMINNAN;
+            ISD::FMINNUM : ISD::FMINIMUM;
         break;
       }
       }
@@ -2977,17 +3009,17 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     case SPF_FMAXNUM:
       switch (SPR.NaNBehavior) {
       case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
-      case SPNB_RETURNS_NAN:   Opc = ISD::FMAXNAN; break;
+      case SPNB_RETURNS_NAN:   Opc = ISD::FMAXIMUM; break;
       case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
       case SPNB_RETURNS_ANY:
 
         if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
           Opc = ISD::FMAXNUM;
-        else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
-          Opc = ISD::FMAXNAN;
+        else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
+          Opc = ISD::FMAXIMUM;
         else if (UseScalarMinMax)
           Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
-            ISD::FMAXNUM : ISD::FMAXNAN;
+            ISD::FMAXNUM : ISD::FMAXIMUM;
         break;
       }
       break;
@@ -3662,8 +3694,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   if (isVolatile || NumValues > MaxParallelChains)
     // Serialize volatile loads with other side effects.
     Root = getRoot();
-  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
-               SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
+  else if (AA &&
+           AA->pointsToConstantMemory(MemoryLocation(
+               SV,
+               LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+               AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -3774,9 +3809,12 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
   Type *Ty = I.getType();
   AAMDNodes AAInfo;
   I.getAAMetadata(AAInfo);
-  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
-             SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
-         "load_from_swift_error should not be constant memory");
+  assert(
+      (!AA ||
+       !AA->pointsToConstantMemory(MemoryLocation(
+           SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
+           AAInfo))) &&
+      "load_from_swift_error should not be constant memory");
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -4063,8 +4101,12 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
 
   // Do not serialize masked loads of constant memory with anything.
-  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
-      PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
+  bool AddToChain =
+      !AA || !AA->pointsToConstantMemory(MemoryLocation(
+                 PtrOperand,
+                 LocationSize::precise(
+                     DAG.getDataLayout().getTypeStoreSize(I.getType())),
+                 AAInfo));
   SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
 
   MachineMemOperand *MMO =
@@ -4105,10 +4147,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   const Value *BasePtr = Ptr;
   bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
   bool ConstantMemory = false;
-  if (UniformBase &&
-      AA && AA->pointsToConstantMemory(MemoryLocation(
-          BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
-          AAInfo))) {
+  if (UniformBase && AA &&
+      AA->pointsToConstantMemory(
+          MemoryLocation(BasePtr,
+                         LocationSize::precise(
+                             DAG.getDataLayout().getTypeStoreSize(I.getType())),
+                         AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -5038,6 +5082,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout())));
     return nullptr;
+  case Intrinsic::sponentry:
+    setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
+                             TLI.getPointerTy(DAG.getDataLayout())));
+    return nullptr;
   case Intrinsic::frameaddress:
     setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
@@ -5176,7 +5224,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_addr:
   case Intrinsic::dbg_declare: {
-    const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
+    const auto &DI = cast<DbgVariableIntrinsic>(I);
     DILocalVariable *Variable = DI.getVariable();
     DIExpression *Expression = DI.getExpression();
     dropDanglingDebugInfo(Variable, Expression);
@@ -5276,7 +5324,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       return nullptr;
 
     SDDbgValue *SDV;
-    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+        isa<ConstantPointerNull>(V)) {
       SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
       DAG.AddDbgValue(SDV, nullptr, false);
       return nullptr;
@@ -5553,8 +5602,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::minnum: {
     auto VT = getValue(I.getArgOperand(0)).getValueType();
     unsigned Opc =
-        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
-            ? ISD::FMINNAN
+        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)
+            ? ISD::FMINIMUM
             : ISD::FMINNUM;
     setValue(&I, DAG.getNode(Opc, sdl, VT,
                              getValue(I.getArgOperand(0)),
@@ -5564,14 +5613,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::maxnum: {
     auto VT = getValue(I.getArgOperand(0)).getValueType();
     unsigned Opc =
-        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
-            ? ISD::FMAXNAN
+        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)
+            ? ISD::FMAXIMUM
             : ISD::FMAXNUM;
     setValue(&I, DAG.getNode(Opc, sdl, VT,
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
     return nullptr;
   }
+  case Intrinsic::minimum:
+    setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return nullptr;
+  case Intrinsic::maximum:
+    setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1))));
+    return nullptr;
   case Intrinsic::copysign:
     setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -5603,6 +5664,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::experimental_constrained_log2:
   case Intrinsic::experimental_constrained_rint:
   case Intrinsic::experimental_constrained_nearbyint:
+  case Intrinsic::experimental_constrained_maxnum:
+  case Intrinsic::experimental_constrained_minnum:
+  case Intrinsic::experimental_constrained_ceil:
+  case Intrinsic::experimental_constrained_floor:
+  case Intrinsic::experimental_constrained_round:
+  case Intrinsic::experimental_constrained_trunc:
     visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
     return nullptr;
   case Intrinsic::fmuladd: {
@@ -5693,43 +5760,94 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Y = getValue(I.getArgOperand(1));
     SDValue Z = getValue(I.getArgOperand(2));
     EVT VT = X.getValueType();
+    SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
+    SDValue Zero = DAG.getConstant(0, sdl, VT);
+    SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
 
-    // When X == Y, this is rotate. Create the node directly if legal.
-    // TODO: This should also be done if the operation is custom, but we have
-    // to make sure targets are handling the modulo shift amount as expected.
-    // TODO: If the rotate direction (left or right) corresponding to the shift
-    // is not available, adjust the shift value and invert the direction.
-    auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
-    if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) {
-      setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+    auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+    if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
+      setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
       return nullptr;
     }
 
-    // Get the shift amount and inverse shift amount, modulo the bit-width.
-    SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
-    SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
-    SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z);
-    SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+    // When X == Y, this is rotate. If the data type has a power-of-2 size, we
+    // avoid the select that is necessary in the general case to filter out
+    // the 0-shift possibility that leads to UB.
+    if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
+      auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+      if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+        setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+        return nullptr;
+      }
 
-    // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW))
-    // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW))
+      // Some targets only rotate one way. Try the opposite direction.
+      RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
+      if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
+        // Negate the shift amount because it is safe to ignore the high bits.
+        SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+        setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
+        return nullptr;
+      }
+
+      // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
+      // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
+      SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
+      SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+      SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
+      SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
+      setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
+      return nullptr;
+    }
+
+    // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+    // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+    SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
     SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
     SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
-    SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
-
-    // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would
-    // be X | Y. If X == Y (rotate), that's fine. If not, we have to select.
-    if (X != Y) {
-      SDValue Zero = DAG.getConstant(0, sdl, VT);
-      EVT CCVT = MVT::i1;
-      if (VT.isVector())
-        CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
-      // For fshl, 0 shift returns the 1st arg (X).
-      // For fshr, 0 shift returns the 2nd arg (Y).
-      SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
-      Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res);
-    }
-    setValue(&I, Res);
+    SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
+
+    // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+    // and that is undefined. We must compare and select to avoid UB.
+    EVT CCVT = MVT::i1;
+    if (VT.isVector())
+      CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
+
+    // For fshl, 0-shift returns the 1st arg (X).
+    // For fshr, 0-shift returns the 2nd arg (Y).
+    SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
+    setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
+    return nullptr;
+  }
+  case Intrinsic::sadd_sat: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+    return nullptr;
+  }
+  case Intrinsic::uadd_sat: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
+    return nullptr;
+  }
+  case Intrinsic::ssub_sat: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+    return nullptr;
+  }
+  case Intrinsic::usub_sat: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
+    return nullptr;
+  }
+  case Intrinsic::smul_fix: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    setValue(&I,
+             DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
     return nullptr;
   }
   case Intrinsic::stacksave: {
@@ -5824,6 +5942,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return nullptr;
   }
+
+  case Intrinsic::is_constant:
+    // If this wasn't constant-folded away by now, then it's not a
+    // constant.
+    setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
+    return nullptr;
+
   case Intrinsic::annotation:
   case Intrinsic::ptr_annotation:
   case Intrinsic::launder_invariant_group:
@@ -6224,7 +6349,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                      GA->getGlobal(), getCurSDLoc(),
                                      Val.getValueType(), GA->getOffset())});
     }
-    llvm::sort(Targets.begin(), Targets.end(),
+    llvm::sort(Targets,
                [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
                  return T1.Offset < T2.Offset;
                });
@@ -6243,12 +6368,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
 
-  case Intrinsic::wasm_landingpad_index: {
-    // TODO store landing pad index in a map, which will be used when generating
-    // LSDA information
+  case Intrinsic::wasm_landingpad_index:
+    // Information this intrinsic contained has been transferred to
+    // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
+    // delete it now.
     return nullptr;
   }
-  }
 }
 
 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
@@ -6311,6 +6436,24 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   case Intrinsic::experimental_constrained_nearbyint:
     Opcode = ISD::STRICT_FNEARBYINT;
     break;
+  case Intrinsic::experimental_constrained_maxnum:
+    Opcode = ISD::STRICT_FMAXNUM;
+    break;
+  case Intrinsic::experimental_constrained_minnum:
+    Opcode = ISD::STRICT_FMINNUM;
+    break;
+  case Intrinsic::experimental_constrained_ceil:
+    Opcode = ISD::STRICT_FCEIL;
+    break;
+  case Intrinsic::experimental_constrained_floor:
+    Opcode = ISD::STRICT_FFLOOR;
+    break;
+  case Intrinsic::experimental_constrained_round:
+    Opcode = ISD::STRICT_FROUND;
+    break;
+  case Intrinsic::experimental_constrained_trunc:
+    Opcode = ISD::STRICT_FTRUNC;
+    break;
   }
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Chain = getRoot();
@@ -6405,7 +6548,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
       WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
       EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
                                 BeginLabel, EndLabel);
-    } else {
+    } else if (!isScopedEHPersonality(Pers)) {
       MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
     }
   }
@@ -7200,10 +7343,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
 ///
 ///   OpInfo describes the operand
 ///   RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
-                                 const SDLoc &DL, SDISelAsmOperandInfo &OpInfo,
+static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+                                 SDISelAsmOperandInfo &OpInfo,
                                  SDISelAsmOperandInfo &RefOpInfo) {
   LLVMContext &Context = *DAG.getContext();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   MachineFunction &MF = DAG.getMachineFunction();
   SmallVector<unsigned, 4> Regs;
@@ -7211,13 +7355,21 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
 
   // If this is a constraint for a single physreg, or a constraint for a
   // register class, find it.
-  std::pair<unsigned, const TargetRegisterClass *> PhysReg =
-      TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode,
-                                       RefOpInfo.ConstraintVT);
+  unsigned AssignedReg;
+  const TargetRegisterClass *RC;
+  std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
+      &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
+  // RC is unset only on failure. Return immediately.
+  if (!RC)
+    return;
+
+  // Get the actual register value type.  This is important, because the user
+  // may have asked for (e.g.) the AX register in i32 type.  We need to
+  // remember that AX is actually i16 to get the right extension.
+  const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
 
-  unsigned NumRegs = 1;
   if (OpInfo.ConstraintVT != MVT::Other) {
-    // If this is a FP operand in an integer register (or visa versa), or more
+    // If this is an FP operand in an integer register (or visa versa), or more
     // generally if the operand value disagrees with the register class we plan
     // to stick it in, fix the operand type.
     //
@@ -7225,34 +7377,30 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
     // Bitcast for output value is done at the end of visitInlineAsm().
     if ((OpInfo.Type == InlineAsm::isOutput ||
          OpInfo.Type == InlineAsm::isInput) &&
-        PhysReg.second &&
-        !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
+        !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
       // Try to convert to the first EVT that the reg class contains.  If the
       // types are identical size, use a bitcast to convert (e.g. two differing
       // vector types).  Note: output bitcast is done at the end of
       // visitInlineAsm().
-      MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
         // Exclude indirect inputs while they are unsupported because the code
         // to perform the load is missing and thus OpInfo.CallOperand still
-        // refer to the input address rather than the pointed-to value.
+        // refers to the input address rather than the pointed-to value.
         if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
           OpInfo.CallOperand =
               DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
-        // If the operand is a FP value and we want it in integer registers,
+        // If the operand is an FP value and we want it in integer registers,
         // use the corresponding integer type. This turns an f64 value into
         // i64, which can be passed with two i32 values on a 32-bit machine.
       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
-        RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+        MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
         if (OpInfo.Type == InlineAsm::isInput)
           OpInfo.CallOperand =
-              DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
-        OpInfo.ConstraintVT = RegVT;
+              DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = VT;
       }
     }
-
-    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
   }
 
   // No need to allocate a matching input constraint since the constraint it's
@@ -7260,59 +7408,38 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
   if (OpInfo.isMatchingInputConstraint())
     return;
 
-  MVT RegVT;
   EVT ValueVT = OpInfo.ConstraintVT;
+  if (OpInfo.ConstraintVT == MVT::Other)
+    ValueVT = RegVT;
+
+  // Initialize NumRegs.
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other)
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
 
   // If this is a constraint for a specific physical register, like {r17},
   // assign it now.
-  if (unsigned AssignedReg = PhysReg.first) {
-    const TargetRegisterClass *RC = PhysReg.second;
-    if (OpInfo.ConstraintVT == MVT::Other)
-      ValueVT = *TRI.legalclasstypes_begin(*RC);
-
-    // Get the actual register value type.  This is important, because the user
-    // may have asked for (e.g.) the AX register in i32 type.  We need to
-    // remember that AX is actually i16 to get the right extension.
-    RegVT = *TRI.legalclasstypes_begin(*RC);
-
-    // This is a explicit reference to a physical register.
-    Regs.push_back(AssignedReg);
-
-    // If this is an expanded reference, add the rest of the regs to Regs.
-    if (NumRegs != 1) {
-      TargetRegisterClass::iterator I = RC->begin();
-      for (; *I != AssignedReg; ++I)
-        assert(I != RC->end() && "Didn't find reg!");
-
-      // Already added the first reg.
-      --NumRegs; ++I;
-      for (; NumRegs; --NumRegs, ++I) {
-        assert(I != RC->end() && "Ran out of registers to allocate!");
-        Regs.push_back(*I);
-      }
-    }
 
-    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
-    return;
-  }
+  // If this associated to a specific register, initialize iterator to correct
+  // place. If virtual, make sure we have enough registers
 
-  // Otherwise, if this was a reference to an LLVM register class, create vregs
-  // for this reference.
-  if (const TargetRegisterClass *RC = PhysReg.second) {
-    RegVT = *TRI.legalclasstypes_begin(*RC);
-    if (OpInfo.ConstraintVT == MVT::Other)
-      ValueVT = RegVT;
+  // Initialize iterator if necessary
+  TargetRegisterClass::iterator I = RC->begin();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
 
-    // Create the appropriate number of virtual registers.
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-    for (; NumRegs; --NumRegs)
-      Regs.push_back(RegInfo.createVirtualRegister(RC));
+  // Do not check for single registers.
+  if (AssignedReg) {
+      for (; *I != AssignedReg; ++I)
+        assert(I != RC->end() && "AssignedReg should be member of RC");
+  }
 
-    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
-    return;
+  for (; NumRegs; --NumRegs, ++I) {
+    assert(I != RC->end() && "Ran out of registers to allocate!");
+    auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC);
+    Regs.push_back(R);
   }
 
-  // Otherwise, we couldn't allocate enough registers for this.
+  OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
 }
 
 static unsigned
@@ -7333,21 +7460,6 @@ findMatchingInlineAsmOperand(unsigned OperandNo,
   return CurOp;
 }
 
-/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
-/// \return true if it has succeeded, false otherwise
-static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
-                              MVT RegVT, SelectionDAG &DAG) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-  for (unsigned i = 0, e = NumRegs; i != e; ++i) {
-    if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
-      Regs.push_back(RegInfo.createVirtualRegister(RC));
-    else
-      return false;
-  }
-  return true;
-}
-
 namespace {
 
 class ExtraFlags {
@@ -7404,12 +7516,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
   unsigned ResNo = 0;   // ResNo - The result number of the next output.
-  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
-    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+  for (auto &T : TargetConstraints) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
-    MVT OpVT = MVT::Other;
-
     // Compute the value type for each operand.
     if (OpInfo.Type == InlineAsm::isInput ||
         (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
@@ -7423,39 +7533,37 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT =
+      OpInfo.ConstraintVT =
           OpInfo
               .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
               .getSimpleVT();
-    }
-
-    if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
+    } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
-                                      STy->getElementType(ResNo));
+        OpInfo.ConstraintVT = TLI.getSimpleValueType(
+            DAG.getDataLayout(), STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+        OpInfo.ConstraintVT =
+            TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
       }
       ++ResNo;
+    } else {
+      OpInfo.ConstraintVT = MVT::Other;
     }
 
-    OpInfo.ConstraintVT = OpVT;
-
     if (!hasMemory)
       hasMemory = OpInfo.hasMemory(TLI);
 
     // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
-    // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
-    auto TargetConstraint = TargetConstraints[i];
+    // FIXME: Could we compute this on OpInfo rather than T?
 
     // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
+    TLI.ComputeConstraintToUse(T, SDValue());
 
-    ExtraInfo.update(TargetConstraint);
+    ExtraInfo.update(T);
   }
 
   SDValue Chain, Flag;
@@ -7469,9 +7577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   // Second pass over the constraints: compute which constraint option to use
   // and assign registers to constraints that want a specific physreg.
-  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
+  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
     // If this is an output operand with a matching input operand, look up the
     // matching input. If their types mismatch, e.g. one is an integer, the
     // other is floating point, or their sizes are different, flag it as an
@@ -7511,24 +7617,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     SDISelAsmOperandInfo &RefOpInfo =
         OpInfo.isMatchingInputConstraint()
             ? ConstraintOperands[OpInfo.getMatchedOperand()]
-            : ConstraintOperands[i];
+            : OpInfo;
     if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
+      GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
   }
 
   // Third pass - Loop over all of the operands, assigning virtual or physregs
   // to register class operands.
-  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
     SDISelAsmOperandInfo &RefOpInfo =
         OpInfo.isMatchingInputConstraint()
             ? ConstraintOperands[OpInfo.getMatchedOperand()]
-            : ConstraintOperands[i];
+            : OpInfo;
 
     // C_Register operands have already been allocated, Other/Memory don't need
     // to be.
     if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
+      GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -7555,9 +7660,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
   std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
 
-  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
+  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
     switch (OpInfo.Type) {
     case InlineAsm::isOutput:
       if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
@@ -7635,9 +7738,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
           SmallVector<unsigned, 4> Regs;
 
-          if (!createVirtualRegs(Regs,
-                                 InlineAsm::getNumOperandRegisters(OpFlag),
-                                 RegVT, DAG)) {
+          if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) {
+            unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
+            MachineRegisterInfo &RegInfo =
+                DAG.getMachineFunction().getRegInfo();
+            for (unsigned i = 0; i != NumRegs; ++i)
+              Regs.push_back(RegInfo.createVirtualRegister(RC));
+          } else {
             emitInlineAsmError(CS, "inline asm error: This value type register "
                                    "class is not natively supported!");
             return;
@@ -7772,19 +7879,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     unsigned numRet;
     ArrayRef<Type *> ResultTypes;
     SmallVector<SDValue, 1> ResultValues(1);
-    if (CSResultType->isSingleValueType()) {
-      numRet = 1;
-      ResultValues[0] = Val;
-      ResultTypes = makeArrayRef(CSResultType);
-    } else {
-      numRet = CSResultType->getNumContainedTypes();
+    if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) {
+      numRet = StructResult->getNumElements();
       assert(Val->getNumOperands() == numRet &&
              "Mismatch in number of output operands in asm result");
-      ResultTypes = CSResultType->subtypes();
+      ResultTypes = StructResult->elements();
       ArrayRef<SDUse> ValueUses = Val->ops();
       ResultValues.resize(numRet);
       std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
                      [](const SDUse &u) -> SDValue { return u.get(); });
+    } else {
+      numRet = 1;
+      ResultValues[0] = Val;
+      ResultTypes = makeArrayRef(CSResultType);
     }
     SmallVector<EVT, 1> ResultVTs(numRet);
     for (unsigned i = 0; i < numRet; i++) {
@@ -7922,7 +8029,8 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
     return Op;
 
   APInt Hi = CR.getUnsignedMax();
-  unsigned Bits = Hi.getActiveBits();
+  unsigned Bits = std::max(Hi.getActiveBits(),
+                           static_cast<unsigned>(IntegerType::MIN_INT_BITS));
 
   EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
 
@@ -8677,7 +8785,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   // notional registers required by the type.
 
   RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
-                   getABIRegCopyCC(V));
+                   None); // This is not an ABI copy.
   SDValue Chain = DAG.getEntryNode();
 
   ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -9210,7 +9318,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 /// the end.
 void
 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
-  const TerminatorInst *TI = LLVMBB->getTerminator();
+  const Instruction *TI = LLVMBB->getTerminator();
 
   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
 
@@ -9642,7 +9750,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
   }
 
   BitTestInfo BTI;
-  llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+  llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
     // Sort by probability first, number of bits second, bit mask third.
     if (a.ExtraProb != b.ExtraProb)
       return a.ExtraProb > b.ExtraProb;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 4b5dda982f1b..5f9cdb69daf7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -854,6 +854,9 @@ private:
   void visitInvoke(const InvokeInst &I);
   void visitResume(const ResumeInst &I);
 
+  void visitUnary(const User &I, unsigned Opcode);
+  void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
+
   void visitBinary(const User &I, unsigned Opcode);
   void visitShift(const User &I, unsigned Opcode);
   void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fa341e8b5fa5..43df2abb674b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "SDNodeDbgValue.h"
 #include <cstdint>
 #include <iterator>
 
@@ -123,6 +124,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::RETURNADDR:                 return "RETURNADDR";
   case ISD::ADDROFRETURNADDR:           return "ADDROFRETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
+  case ISD::SPONENTRY:                  return "SPONENTRY";
   case ISD::LOCAL_RECOVER:              return "LOCAL_RECOVER";
   case ISD::READ_REGISTER:              return "READ_REGISTER";
   case ISD::WRITE_REGISTER:             return "WRITE_REGISTER";
@@ -174,25 +176,34 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   // Unary operators
   case ISD::FABS:                       return "fabs";
   case ISD::FMINNUM:                    return "fminnum";
+  case ISD::STRICT_FMINNUM:             return "strict_fminnum";
   case ISD::FMAXNUM:                    return "fmaxnum";
-  case ISD::FMINNAN:                    return "fminnan";
-  case ISD::FMAXNAN:                    return "fmaxnan";
+  case ISD::STRICT_FMAXNUM:             return "strict_fmaxnum";
+  case ISD::FMINNUM_IEEE:               return "fminnum_ieee";
+  case ISD::FMAXNUM_IEEE:               return "fmaxnum_ieee";
+  case ISD::FMINIMUM:                   return "fminimum";
+  case ISD::FMAXIMUM:                   return "fmaximum";
   case ISD::FNEG:                       return "fneg";
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::STRICT_FSQRT:               return "strict_fsqrt";
+  case ISD::FCBRT:                      return "fcbrt";
   case ISD::FSIN:                       return "fsin";
   case ISD::STRICT_FSIN:                return "strict_fsin";
   case ISD::FCOS:                       return "fcos";
   case ISD::STRICT_FCOS:                return "strict_fcos";
   case ISD::FSINCOS:                    return "fsincos";
   case ISD::FTRUNC:                     return "ftrunc";
+  case ISD::STRICT_FTRUNC:              return "strict_ftrunc";
   case ISD::FFLOOR:                     return "ffloor";
+  case ISD::STRICT_FFLOOR:              return "strict_ffloor";
   case ISD::FCEIL:                      return "fceil";
+  case ISD::STRICT_FCEIL:               return "strict_fceil";
   case ISD::FRINT:                      return "frint";
   case ISD::STRICT_FRINT:               return "strict_frint";
   case ISD::FNEARBYINT:                 return "fnearbyint";
   case ISD::STRICT_FNEARBYINT:          return "strict_fnearbyint";
   case ISD::FROUND:                     return "fround";
+  case ISD::STRICT_FROUND:              return "strict_fround";
   case ISD::FEXP:                       return "fexp";
   case ISD::STRICT_FEXP:                return "strict_fexp";
   case ISD::FEXP2:                      return "fexp2";
@@ -226,6 +237,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::SRL:                        return "srl";
   case ISD::ROTL:                       return "rotl";
   case ISD::ROTR:                       return "rotr";
+  case ISD::FSHL:                       return "fshl";
+  case ISD::FSHR:                       return "fshr";
   case ISD::FADD:                       return "fadd";
   case ISD::STRICT_FADD:                return "strict_fadd";
   case ISD::FSUB:                       return "fsub";
@@ -280,6 +293,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::SRA_PARTS:                  return "sra_parts";
   case ISD::SRL_PARTS:                  return "srl_parts";
 
+  case ISD::SADDSAT:                    return "saddsat";
+  case ISD::UADDSAT:                    return "uaddsat";
+  case ISD::SSUBSAT:                    return "ssubsat";
+  case ISD::USUBSAT:                    return "usubsat";
+  case ISD::SMULFIX:                    return "smulfix";
+
   // Conversion operators.
   case ISD::SIGN_EXTEND:                return "sign_extend";
   case ISD::ZERO_EXTEND:                return "zero_extend";
@@ -681,9 +700,26 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     OS << ':' << L->getLine();
     if (unsigned C = L->getColumn())
       OS << ':' << C;
+
+    for (SDDbgValue *Dbg : G->GetDbgValues(this)) {
+      if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+        continue;
+      Dbg->dump(OS);
+    }
   }
 }
 
+LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const {
+ OS << " DbgVal";
+ if (kind==SDNODE)
+   OS << '(' << u.s.ResNo << ')';
+ OS << ":\"" << Var->getName() << '"';
+#ifndef NDEBUG
+ if (Expr->getNumElements())
+   Expr->dump();
+#endif
+}
+
 /// Return true if this node is so simple that we should just print it inline
 /// if it appears as an operand.
 static bool shouldPrintInline(const SDNode &Node) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index f7bd8847bee3..af5c2433fa2f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,7 +177,8 @@ static const bool ViewDAGCombine1 = false,
 /// RegisterScheduler class - Track the registration of instruction schedulers.
 ///
 //===---------------------------------------------------------------------===//
-MachinePassRegistry RegisterScheduler::Registry;
+MachinePassRegistry<RegisterScheduler::FunctionPassCtor>
+    RegisterScheduler::Registry;
 
 //===---------------------------------------------------------------------===//
 ///
@@ -417,7 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
 
   CurDAG->init(*MF, *ORE, this, LibInfo,
-   getAnalysisIfAvailable<DivergenceAnalysis>());
+   getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
   FuncInfo->set(Fn, *MF, CurDAG);
 
   // Now get the optional analyzes if we want to.
@@ -451,7 +453,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       if (!succ_empty(&BB))
         continue;
 
-      const TerminatorInst *Term = BB.getTerminator();
+      const Instruction *Term = BB.getTerminator();
       if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
         continue;
 
@@ -695,14 +697,14 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
     if (!TargetRegisterInfo::isVirtualRegister(DestReg))
       continue;
 
-    // Ignore non-scalar or non-integer values.
+    // Ignore non-integer values.
     SDValue Src = N->getOperand(2);
     EVT SrcVT = Src.getValueType();
-    if (!SrcVT.isInteger() || SrcVT.isVector())
+    if (!SrcVT.isInteger())
       continue;
 
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
-    CurDAG->computeKnownBits(Src, Known);
+    Known = CurDAG->computeKnownBits(Src);
     FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
   } while (!Worklist.empty());
 }
@@ -714,8 +716,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   int BlockNumber = -1;
   (void)BlockNumber;
   bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
   TargetTransformInfo &TTI =
       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
+#endif
 
   // Pre-type legalization allow creation of any node types.
   CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -750,8 +754,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
   }
 
+#ifndef NDEBUG
   if (TTI.hasBranchDivergence())
     CurDAG->VerifyDAGDiverence();
+#endif
 
   LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
                     << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -770,8 +776,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     Changed = CurDAG->LegalizeTypes();
   }
 
+#ifndef NDEBUG
   if (TTI.hasBranchDivergence())
     CurDAG->VerifyDAGDiverence();
+#endif
 
   LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
                     << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -792,8 +800,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
     }
 
+#ifndef NDEBUG
     if (TTI.hasBranchDivergence())
       CurDAG->VerifyDAGDiverence();
+#endif
 
     LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
                       << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -839,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
                       << "'\n";
                CurDAG->dump());
 
+#ifndef NDEBUG
     if (TTI.hasBranchDivergence())
       CurDAG->VerifyDAGDiverence();
+#endif
   }
 
   if (ViewLegalizeDAGs && MatchFilterBB)
@@ -852,8 +864,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Legalize();
   }
 
+#ifndef NDEBUG
   if (TTI.hasBranchDivergence())
     CurDAG->VerifyDAGDiverence();
+#endif
 
   LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
                     << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -870,8 +884,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
   }
 
+#ifndef NDEBUG
   if (TTI.hasBranchDivergence())
     CurDAG->VerifyDAGDiverence();
+#endif
 
   LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
                     << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
@@ -1114,6 +1130,37 @@ static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
   return false;
 }
 
+// wasm.landingpad.index intrinsic is for associating a landing pad index number
+// with a catchpad instruction. Retrieve the landing pad index in the intrinsic
+// and store the mapping in the function.
+static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
+                                   const CatchPadInst *CPI) {
+  MachineFunction *MF = MBB->getParent();
+  // In case of single catch (...), we don't emit LSDA, so we don't need
+  // this information.
+  bool IsSingleCatchAllClause =
+      CPI->getNumArgOperands() == 1 &&
+      cast<Constant>(CPI->getArgOperand(0))->isNullValue();
+  if (!IsSingleCatchAllClause) {
+    // Create a mapping from landing pad label to landing pad index.
+    bool IntrFound = false;
+    for (const User *U : CPI->users()) {
+      if (const auto *Call = dyn_cast<IntrinsicInst>(U)) {
+        Intrinsic::ID IID = Call->getIntrinsicID();
+        if (IID == Intrinsic::wasm_landingpad_index) {
+          Value *IndexArg = Call->getArgOperand(1);
+          int Index = cast<ConstantInt>(IndexArg)->getZExtValue();
+          MF->setWasmLandingPadIndex(MBB, Index);
+          IntrFound = true;
+          break;
+        }
+      }
+    }
+    assert(IntrFound && "wasm.landingpad.index intrinsic not found!");
+    (void)IntrFound;
+  }
+}
+
 /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
 /// do other setup for EH landing-pad blocks.
 bool SelectionDAGISel::PrepareEHLandingPad() {
@@ -1123,44 +1170,48 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
   const TargetRegisterClass *PtrRC =
       TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
 
+  auto Pers = classifyEHPersonality(PersonalityFn);
+
   // Catchpads have one live-in register, which typically holds the exception
   // pointer or code.
-  if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
-    if (hasExceptionPointerOrCodeUser(CPI)) {
-      // Get or create the virtual register to hold the pointer or code.  Mark
-      // the live in physreg and copy into the vreg.
-      MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
-      assert(EHPhysReg && "target lacks exception pointer register");
-      MBB->addLiveIn(EHPhysReg);
-      unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
-      BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
-              TII->get(TargetOpcode::COPY), VReg)
-          .addReg(EHPhysReg, RegState::Kill);
+  if (isFuncletEHPersonality(Pers)) {
+    if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+      if (hasExceptionPointerOrCodeUser(CPI)) {
+        // Get or create the virtual register to hold the pointer or code.  Mark
+        // the live in physreg and copy into the vreg.
+        MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+        assert(EHPhysReg && "target lacks exception pointer register");
+        MBB->addLiveIn(EHPhysReg);
+        unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+        BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+                TII->get(TargetOpcode::COPY), VReg)
+            .addReg(EHPhysReg, RegState::Kill);
+      }
     }
     return true;
   }
 
-  if (!LLVMBB->isLandingPad())
-    return true;
-
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->addLandingPad(MBB);
 
-  // Assign the call site to the landing pad's begin label.
-  MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-
   const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
-  // Mark exception register as live in.
-  if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
-    FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
-
-  // Mark exception selector register as live in.
-  if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
-    FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+  if (Pers == EHPersonality::Wasm_CXX) {
+    if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
+      mapWasmLandingPadIndex(MBB, CPI);
+  } else {
+    // Assign the call site to the landing pad's begin label.
+    MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+    // Mark exception register as live in.
+    if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
+      FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
+    // Mark exception selector register as live in.
+    if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
+      FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+  }
 
   return true;
 }
@@ -1171,7 +1222,7 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
 static bool isFoldedOrDeadInstruction(const Instruction *I,
                                       FunctionLoweringInfo *FuncInfo) {
   return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
-         !isa<TerminatorInst>(I) &&    // Terminators aren't folded.
+         !I->isTerminator() &&     // Terminators aren't folded.
          !isa<DbgInfoIntrinsic>(I) &&  // Debug instructions aren't folded.
          !I->isEHPad() &&              // EH pad instructions aren't folded.
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
@@ -1688,7 +1739,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
                                    Inst->getDebugLoc(), LLVMBB);
 
         bool ShouldAbort = EnableFastISelAbort;
-        if (isa<TerminatorInst>(Inst)) {
+        if (Inst->isTerminator()) {
           // Use a different message for terminator misses.
           R << "FastISel missed terminator";
           // Don't abort for terminator unless the level is really high
@@ -2160,9 +2211,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
   // Otherwise, the DAG Combiner may have proven that the value coming in is
   // either already zero or is not demanded.  Check for known zero input bits.
   APInt NeededMask = DesiredMask & ~ActualMask;
-
-  KnownBits Known;
-  CurDAG->computeKnownBits(LHS, Known);
+  KnownBits Known = CurDAG->computeKnownBits(LHS);
 
   // If all the missing bits in the or are already known to be set, match!
   if (NeededMask.isSubsetOf(Known.One))
@@ -3156,6 +3205,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                                 N.getNode()))
         break;
       continue;
+    case OPC_CheckPredicateWithOperands: {
+      unsigned OpNum = MatcherTable[MatcherIndex++];
+      SmallVector<SDValue, 8> Operands;
+
+      for (unsigned i = 0; i < OpNum; ++i)
+        Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first);
+
+      unsigned PredNo = MatcherTable[MatcherIndex++];
+      if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands))
+        break;
+      continue;
+    }
     case OPC_CheckComplexPat: {
       unsigned CPNum = MatcherTable[MatcherIndex++];
       unsigned RecNo = MatcherTable[MatcherIndex++];
@@ -3598,38 +3659,22 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
         bool mayLoad = MCID.mayLoad();
         bool mayStore = MCID.mayStore();
 
-        unsigned NumMemRefs = 0;
-        for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
-               MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
-          if ((*I)->isLoad()) {
-            if (mayLoad)
-              ++NumMemRefs;
-          } else if ((*I)->isStore()) {
-            if (mayStore)
-              ++NumMemRefs;
-          } else {
-            ++NumMemRefs;
-          }
-        }
-
-        MachineSDNode::mmo_iterator MemRefs =
-          MF->allocateMemRefsArray(NumMemRefs);
-
-        MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
-        for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
-               MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
-          if ((*I)->isLoad()) {
+        // We expect to have relatively few of these so just filter them into a
+        // temporary buffer so that we can easily add them to the instruction.
+        SmallVector<MachineMemOperand *, 4> FilteredMemRefs;
+        for (MachineMemOperand *MMO : MatchedMemRefs) {
+          if (MMO->isLoad()) {
             if (mayLoad)
-              *MemRefsPos++ = *I;
-          } else if ((*I)->isStore()) {
+              FilteredMemRefs.push_back(MMO);
+          } else if (MMO->isStore()) {
             if (mayStore)
-              *MemRefsPos++ = *I;
+              FilteredMemRefs.push_back(MMO);
           } else {
-            *MemRefsPos++ = *I;
+            FilteredMemRefs.push_back(MMO);
           }
         }
 
-        Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+        CurDAG->setNodeMemRefs(Res, FilteredMemRefs);
       }
 
       LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 54cbd6859f70..90a1b350fc94 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -522,7 +522,16 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // The vm state arguments are lowered in an opaque manner.  We do not know
   // what type of values are contained within.
   for (const Value *V : SI.DeoptState) {
-    SDValue Incoming = Builder.getValue(V);
+    SDValue Incoming;
+    // If this is a function argument at a static frame index, generate it as
+    // the frame index.
+    if (const Argument *Arg = dyn_cast<Argument>(V)) {
+      int FI = Builder.FuncInfo.getArgumentFrameIndex(Arg);
+      if (FI != INT_MAX)
+        Incoming = Builder.DAG.getFrameIndex(FI, Builder.getFrameIndexTy());
+    }
+    if (!Incoming.getNode())
+      Incoming = Builder.getValue(V);
     const bool LiveInValue = LiveInDeopt && !isGCValue(V);
     lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e317268fa5f4..a2f05c1e3cef 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -55,10 +55,12 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
   const Function &F = DAG.getMachineFunction().getFunction();
 
   // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
+  // the return. Ignore NoAlias and NonNull because they don't affect the
+  // call sequence.
   AttributeList CallerAttrs = F.getAttributes();
   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
           .removeAttribute(Attribute::NoAlias)
+          .removeAttribute(Attribute::NonNull)
           .hasAttributes())
     return false;
 
@@ -429,87 +431,56 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
   return false;
 }
 
-bool
-TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
-                                     const APInt &Demanded,
-                                     DAGCombinerInfo &DCI,
-                                     TargetLoweringOpt &TLO) const {
-  SDValue Op = User->getOperand(OpIdx);
-  KnownBits Known;
-
-  if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
-    return false;
-
-
-  // Old will not always be the same as Op.  For example:
-  //
-  // Demanded = 0xffffff
-  // Op = i64 truncate (i32 and x, 0xffffff)
-  // In this case simplify demand bits will want to replace the 'and' node
-  // with the value 'x', which will give us:
-  // Old = i32 and x, 0xffffff
-  // New = x
-  if (TLO.Old.hasOneUse()) {
-    // For the one use case, we just commit the change.
-    DCI.CommitTargetLoweringOpt(TLO);
-    return true;
-  }
-
-  // If Old has more than one use then it must be Op, because the
-  // AssumeSingleUse flag is not propogated to recursive calls of
-  // SimplifyDemanded bits, so the only node with multiple use that
-  // it will attempt to combine will be Op.
-  assert(TLO.Old == Op);
-
-  SmallVector <SDValue, 4> NewOps;
-  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
-    if (i == OpIdx) {
-      NewOps.push_back(TLO.New);
-      continue;
-    }
-    NewOps.push_back(User->getOperand(i));
-  }
-  User = TLO.DAG.UpdateNodeOperands(User, NewOps);
-  // Op has less users now, so we may be able to perform additional combines
-  // with it.
-  DCI.AddToWorklist(Op.getNode());
-  // User's operands have been updated, so we may be able to do new combines
-  // with it.
-  DCI.AddToWorklist(User);
-  return true;
-}
-
-bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
                                           DAGCombinerInfo &DCI) const {
-
   SelectionDAG &DAG = DCI.DAG;
   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                         !DCI.isBeforeLegalizeOps());
   KnownBits Known;
 
-  bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
-  if (Simplified)
+  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
+  if (Simplified) {
+    DCI.AddToWorklist(Op.getNode());
     DCI.CommitTargetLoweringOpt(TLO);
+  }
   return Simplified;
 }
 
-/// Look at Op. At this point, we know that only the DemandedMask bits of the
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                                          KnownBits &Known,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth,
+                                          bool AssumeSingleUse) const {
+  EVT VT = Op.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
+                              AssumeSingleUse);
+}
+
+/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
 /// result of Op are ever used downstream. If we can use this information to
 /// simplify Op, create a new simplified DAG node and return true, returning the
 /// original and new nodes in Old and New. Otherwise, analyze the expression and
 /// return a mask of Known bits for the expression (used to simplify the
 /// caller).  The Known bits may only be accurate for those bits in the
-/// DemandedMask.
-bool TargetLowering::SimplifyDemandedBits(SDValue Op,
-                                          const APInt &DemandedMask,
-                                          KnownBits &Known,
-                                          TargetLoweringOpt &TLO,
-                                          unsigned Depth,
-                                          bool AssumeSingleUse) const {
-  unsigned BitWidth = DemandedMask.getBitWidth();
+/// OriginalDemandedBits and OriginalDemandedElts.
+bool TargetLowering::SimplifyDemandedBits(
+    SDValue Op, const APInt &OriginalDemandedBits,
+    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+    unsigned Depth, bool AssumeSingleUse) const {
+  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
   assert(Op.getScalarValueSizeInBits() == BitWidth &&
          "Mask size mismatches value type size!");
-  APInt NewMask = DemandedMask;
+
+  unsigned NumElts = OriginalDemandedElts.getBitWidth();
+  assert((!Op.getValueType().isVector() ||
+          NumElts == Op.getValueType().getVectorNumElements()) &&
+         "Unexpected vector size");
+
+  APInt DemandedBits = OriginalDemandedBits;
+  APInt DemandedElts = OriginalDemandedElts;
   SDLoc dl(Op);
   auto &DL = TLO.DAG.getDataLayout();
 
@@ -529,18 +500,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (Depth != 0) {
       // If not at the root, Just compute the Known bits to
       // simplify things downstream.
-      TLO.DAG.computeKnownBits(Op, Known, Depth);
+      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
       return false;
     }
     // If this is the root being simplified, allow it to have multiple uses,
-    // just set the NewMask to all bits.
-    NewMask = APInt::getAllOnesValue(BitWidth);
-  } else if (DemandedMask == 0) {
-    // Not demanding any bits from Op.
+    // just set the DemandedBits/Elts to all bits.
+    DemandedBits = APInt::getAllOnesValue(BitWidth);
+    DemandedElts = APInt::getAllOnesValue(NumElts);
+  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
+    // Not demanding any bits/elts from Op.
     if (!Op.isUndef())
       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
     return false;
-  } else if (Depth == 6) {        // Limit search depth.
+  } else if (Depth == 6) { // Limit search depth.
     return false;
   }
 
@@ -570,24 +542,90 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       Known.One &= Known2.One;
       Known.Zero &= Known2.Zero;
     }
-    return false;   // Don't fall through, will infinitely loop.
-  case ISD::AND:
+    return false; // Don't fall through, will infinitely loop.
+  case ISD::CONCAT_VECTORS: {
+    Known.Zero.setAllBits();
+    Known.One.setAllBits();
+    EVT SubVT = Op.getOperand(0).getValueType();
+    unsigned NumSubVecs = Op.getNumOperands();
+    unsigned NumSubElts = SubVT.getVectorNumElements();
+    for (unsigned i = 0; i != NumSubVecs; ++i) {
+      APInt DemandedSubElts =
+          DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+      if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
+                               Known2, TLO, Depth + 1))
+        return true;
+      // Known bits are shared by every demanded subvector element.
+      if (!!DemandedSubElts) {
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
+    }
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+    // Collect demanded elements from shuffle operands..
+    APInt DemandedLHS(NumElts, 0);
+    APInt DemandedRHS(NumElts, 0);
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (!DemandedElts[i])
+        continue;
+      int M = ShuffleMask[i];
+      if (M < 0) {
+        // For UNDEF elements, we don't know anything about the common state of
+        // the shuffle result.
+        DemandedLHS.clearAllBits();
+        DemandedRHS.clearAllBits();
+        break;
+      }
+      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+      if (M < (int)NumElts)
+        DemandedLHS.setBit(M);
+      else
+        DemandedRHS.setBit(M - NumElts);
+    }
+
+    if (!!DemandedLHS || !!DemandedRHS) {
+      Known.Zero.setAllBits();
+      Known.One.setAllBits();
+      if (!!DemandedLHS) {
+        if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
+                                 Known2, TLO, Depth + 1))
+          return true;
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
+      if (!!DemandedRHS) {
+        if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
+                                 Known2, TLO, Depth + 1))
+          return true;
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
+    }
+    break;
+  }
+  case ISD::AND: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
     // If the RHS is a constant, check to see if the LHS would be zero without
     // using the bits from the RHS.  Below, we use knowledge about the RHS to
     // simplify the LHS, here we're using information from the LHS to simplify
     // the RHS.
-    if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
-      SDValue Op0 = Op.getOperand(0);
-      KnownBits LHSKnown;
+    if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
+      KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
       // If the LHS already has zeros where RHSC does, this 'and' is dead.
-      if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+      if ((LHSKnown.Zero & DemandedBits) ==
+          (~RHSC->getAPIntValue() & DemandedBits))
         return TLO.CombineTo(Op, Op0);
 
       // If any of the set bits in the RHS are known zero on the LHS, shrink
       // the constant.
-      if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
+      if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
         return true;
 
       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -597,34 +635,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
           LHSKnown.One == ~RHSC->getAPIntValue()) {
-        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
-                                      Op.getOperand(1));
+        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
         return TLO.CombineTo(Op, Xor);
       }
     }
 
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
-                             Known2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
+                             Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known one on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
-    if (NewMask.isSubsetOf(Known2.Zero | Known.One))
-      return TLO.CombineTo(Op, Op.getOperand(0));
-    if (NewMask.isSubsetOf(Known.Zero | Known2.One))
-      return TLO.CombineTo(Op, Op.getOperand(1));
+    if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
+      return TLO.CombineTo(Op, Op0);
+    if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
+      return TLO.CombineTo(Op, Op1);
     // If all of the demanded bits in the inputs are known zeros, return zero.
-    if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
+    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
+    if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
       return true;
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -632,26 +669,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
     Known.Zero |= Known2.Zero;
     break;
-  case ISD::OR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+  }
+  case ISD::OR: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
-                             Known2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
+                             Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
-    if (NewMask.isSubsetOf(Known2.One | Known.Zero))
-      return TLO.CombineTo(Op, Op.getOperand(0));
-    if (NewMask.isSubsetOf(Known.One | Known2.Zero))
-      return TLO.CombineTo(Op, Op.getOperand(1));
+    if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+      return TLO.CombineTo(Op, Op0);
+    if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+      return TLO.CombineTo(Op, Op1);
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(Op, NewMask, TLO))
+    if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
       return true;
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -659,78 +700,81 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // Output known-1 are known to be set if set in either the LHS | RHS.
     Known.One |= Known2.One;
     break;
+  }
   case ISD::XOR: {
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
-    if (NewMask.isSubsetOf(Known.Zero))
-      return TLO.CombineTo(Op, Op.getOperand(0));
-    if (NewMask.isSubsetOf(Known2.Zero))
-      return TLO.CombineTo(Op, Op.getOperand(1));
+    if (DemandedBits.isSubsetOf(Known.Zero))
+      return TLO.CombineTo(Op, Op0);
+    if (DemandedBits.isSubsetOf(Known2.Zero))
+      return TLO.CombineTo(Op, Op1);
     // If the operation can be done in a smaller type, do so.
-    if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
+    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
       return true;
 
     // If all of the unknown bits are known to be zero on one side or the other
     // (but not both) turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
-    if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
-                                               Op.getOperand(0),
-                                               Op.getOperand(1)));
+    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
     KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
 
-    // If all of the demanded bits on one side are known, and all of the set
-    // bits on that side are also known to be set on the other side, turn this
-    // into an AND, as we know the bits will be cleared.
-    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
-    // NB: it is okay if more bits are known than are requested
-    if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
-      if (Known.One == Known2.One) { // set bits are the same on both sides
-        SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
-                                                 Op.getOperand(0), ANDC));
+    if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+      // If one side is a constant, and all of the known set bits on the other
+      // side are also set in the constant, turn this into an AND, as we know
+      // the bits will be cleared.
+      //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+      // NB: it is okay if more bits are known than are requested
+      if (C->getAPIntValue() == Known2.One) {
+        SDValue ANDC =
+            TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
       }
-    }
 
-    // If the RHS is a constant, see if we can change it. Don't alter a -1
-    // constant because that's a 'not' op, and that is better for combining and
-    // codegen.
-    ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1));
-    if (C && !C->isAllOnesValue()) {
-      if (NewMask.isSubsetOf(C->getAPIntValue())) {
-        // We're flipping all demanded bits. Flip the undemanded bits too.
-        SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
-        return TLO.CombineTo(Op, New);
+      // If the RHS is a constant, see if we can change it. Don't alter a -1
+      // constant because that's a 'not' op, and that is better for combining
+      // and codegen.
+      if (!C->isAllOnesValue()) {
+        if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
+          // We're flipping all demanded bits. Flip the undemanded bits too.
+          SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+          return TLO.CombineTo(Op, New);
+        }
+        // If we can't turn this into a 'not', try to shrink the constant.
+        if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+          return true;
       }
-      // If we can't turn this into a 'not', try to shrink the constant.
-      if (ShrinkDemandedConstant(Op, NewMask, TLO))
-        return true;
     }
 
     Known = std::move(KnownOut);
     break;
   }
   case ISD::SELECT:
-    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
+                             Depth + 1))
       return true;
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
     // If the operands are constants, see if we can simplify them.
-    if (ShrinkDemandedConstant(Op, NewMask, TLO))
+    if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
       return true;
 
     // Only known if known in both the LHS and RHS.
@@ -738,15 +782,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     Known.Zero &= Known2.Zero;
     break;
   case ISD::SELECT_CC:
-    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
+                             Depth + 1))
       return true;
-    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
     // If the operands are constants, see if we can simplify them.
-    if (ShrinkDemandedConstant(Op, NewMask, TLO))
+    if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
       return true;
 
     // Only known if known in both the LHS and RHS.
@@ -760,7 +806,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If (1) we only need the sign-bit, (2) the setcc operands are the same
     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
     // -1, we may be able to bypass the setcc.
-    if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
+    if (DemandedBits.isSignMask() &&
+        Op0.getScalarValueSizeInBits() == BitWidth &&
         getBooleanContents(VT) ==
             BooleanContent::ZeroOrNegativeOneBooleanContent) {
       // If we're testing X < 0, then this compare isn't needed - just use X!
@@ -780,10 +827,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       Known.Zero.setBitsFrom(1);
     break;
   }
-  case ISD::SHL:
-    if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
-      SDValue InOp = Op.getOperand(0);
+  case ISD::SHL: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
 
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
       // If the shift count is an invalid immediate, don't do anything.
       if (SA->getAPIntValue().uge(BitWidth))
         break;
@@ -793,90 +841,91 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the bottom bits (which are shifted
       // out) are never demanded.
-      if (InOp.getOpcode() == ISD::SRL) {
-        if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
-          if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+      if (Op0.getOpcode() == ISD::SRL) {
+        if (ShAmt &&
+            (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+          if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
             if (SA2->getAPIntValue().ult(BitWidth)) {
               unsigned C1 = SA2->getZExtValue();
               unsigned Opc = ISD::SHL;
-              int Diff = ShAmt-C1;
+              int Diff = ShAmt - C1;
               if (Diff < 0) {
                 Diff = -Diff;
                 Opc = ISD::SRL;
               }
 
-              SDValue NewSA =
-                TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
-              return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
-                                                       InOp.getOperand(0),
-                                                       NewSA));
+              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+              return TLO.CombineTo(
+                  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
             }
           }
         }
       }
 
-      if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
+      if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
+                               Depth + 1))
         return true;
 
       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
       // are not demanded. This will likely allow the anyext to be folded away.
-      if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
-        SDValue InnerOp = InOp.getOperand(0);
+      if (Op0.getOpcode() == ISD::ANY_EXTEND) {
+        SDValue InnerOp = Op0.getOperand(0);
         EVT InnerVT = InnerOp.getValueType();
         unsigned InnerBits = InnerVT.getScalarSizeInBits();
-        if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
+        if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
           EVT ShTy = getShiftAmountTy(InnerVT, DL);
           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
             ShTy = InnerVT;
           SDValue NarrowShl =
-            TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
-                            TLO.DAG.getConstant(ShAmt, dl, ShTy));
-          return
-            TLO.CombineTo(Op,
-                          TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
+              TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+                              TLO.DAG.getConstant(ShAmt, dl, ShTy));
+          return TLO.CombineTo(
+              Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
         }
         // Repeat the SHL optimization above in cases where an extension
         // intervenes: (shl (anyext (shr x, c1)), c2) to
         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
         // aren't demanded (as above) and that the shifted upper c1 bits of
         // x aren't demanded.
-        if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+        if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
             InnerOp.hasOneUse()) {
-          if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+          if (ConstantSDNode *SA2 =
+                  isConstOrConstSplat(InnerOp.getOperand(1))) {
             unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
-            if (InnerShAmt < ShAmt &&
-                InnerShAmt < InnerBits &&
-                NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
-                NewMask.countTrailingZeros() >= ShAmt) {
-              SDValue NewSA =
-                TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
-                                    Op.getOperand(1).getValueType());
+            if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
+                DemandedBits.getActiveBits() <=
+                    (InnerBits - InnerShAmt + ShAmt) &&
+                DemandedBits.countTrailingZeros() >= ShAmt) {
+              SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+                                                  Op1.getValueType());
               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
                                                InnerOp.getOperand(0));
-              return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
-                                                       NewExt, NewSA));
+              return TLO.CombineTo(
+                  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
             }
           }
         }
       }
 
       Known.Zero <<= ShAmt;
-      Known.One  <<= ShAmt;
+      Known.One <<= ShAmt;
       // low bits known zero.
       Known.Zero.setLowBits(ShAmt);
     }
     break;
-  case ISD::SRL:
-    if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
-      SDValue InOp = Op.getOperand(0);
+  }
+  case ISD::SRL: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
 
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
       // If the shift count is an invalid immediate, don't do anything.
       if (SA->getAPIntValue().uge(BitWidth))
         break;
 
       unsigned ShAmt = SA->getZExtValue();
-      APInt InDemandedMask = (NewMask << ShAmt);
+      APInt InDemandedMask = (DemandedBits << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
@@ -886,56 +935,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the top bits (which are shifted out)
       // are never demanded.
-      if (InOp.getOpcode() == ISD::SHL) {
-        if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+      if (Op0.getOpcode() == ISD::SHL) {
+        if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
           if (ShAmt &&
-              (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+              (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
             if (SA2->getAPIntValue().ult(BitWidth)) {
               unsigned C1 = SA2->getZExtValue();
               unsigned Opc = ISD::SRL;
-              int Diff = ShAmt-C1;
+              int Diff = ShAmt - C1;
               if (Diff < 0) {
                 Diff = -Diff;
                 Opc = ISD::SHL;
               }
 
-              SDValue NewSA =
-                TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
-              return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
-                                                       InOp.getOperand(0),
-                                                       NewSA));
+              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+              return TLO.CombineTo(
+                  Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
             }
           }
         }
       }
 
       // Compute the new bits that are at the top now.
-      if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
+      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShAmt);
       Known.One.lshrInPlace(ShAmt);
 
-      Known.Zero.setHighBits(ShAmt);  // High bits known zero.
+      Known.Zero.setHighBits(ShAmt); // High bits known zero.
     }
     break;
-  case ISD::SRA:
+  }
+  case ISD::SRA: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
     // If this is an arithmetic shift right and only the low-bit is set, we can
     // always convert this into a logical shr, even if the shift amount is
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
-    if (NewMask.isOneValue())
-      return TLO.CombineTo(Op,
-                           TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
-                                           Op.getOperand(1)));
+    if (DemandedBits.isOneValue())
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
 
-    if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
       // If the shift count is an invalid immediate, don't do anything.
       if (SA->getAPIntValue().uge(BitWidth))
         break;
 
       unsigned ShAmt = SA->getZExtValue();
-      APInt InDemandedMask = (NewMask << ShAmt);
+      APInt InDemandedMask = (DemandedBits << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
@@ -944,11 +993,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
       // If any of the demanded bits are produced by the sign extension, we also
       // demand the input sign bit.
-      if (NewMask.countLeadingZeros() < ShAmt)
+      if (DemandedBits.countLeadingZeros() < ShAmt)
         InDemandedMask.setSignBit();
 
-      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
-                               Depth+1))
+      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShAmt);
@@ -957,22 +1005,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
       if (Known.Zero[BitWidth - ShAmt - 1] ||
-          NewMask.countLeadingZeros() >= ShAmt) {
+          DemandedBits.countLeadingZeros() >= ShAmt) {
         SDNodeFlags Flags;
         Flags.setExact(Op->getFlags().hasExact());
-        return TLO.CombineTo(Op,
-                             TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
-                                             Op.getOperand(1), Flags));
+        return TLO.CombineTo(
+            Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
       }
 
-      int Log2 = NewMask.exactLogBase2();
+      int Log2 = DemandedBits.exactLogBase2();
       if (Log2 >= 0) {
         // The bit must come from the sign.
         SDValue NewSA =
-          TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
-                              Op.getOperand(1).getValueType());
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
-                                                 Op.getOperand(0), NewSA));
+            TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
       }
 
       if (Known.One[BitWidth - ShAmt - 1])
@@ -980,15 +1025,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         Known.One.setHighBits(ShAmt);
     }
     break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
+    SDValue Op0 = Op.getOperand(0);
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     unsigned ExVTBits = ExVT.getScalarSizeInBits();
 
     // If we only care about the highest bit, don't bother shifting right.
-    if (NewMask.isSignMask()) {
-      SDValue InOp = Op.getOperand(0);
+    if (DemandedBits.isSignMask()) {
       bool AlreadySignExtended =
-        TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
+          TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
       // However if the input is already sign extended we expect the sign
       // extension to be dropped altogether later and do not simplify.
       if (!AlreadySignExtended) {
@@ -998,25 +1044,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
 
-        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
-                                               ShiftAmtTy);
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
-                                                 ShiftAmt));
+        SDValue ShiftAmt =
+            TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
+        return TLO.CombineTo(Op,
+                             TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
       }
     }
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
-    if (NewMask.getActiveBits() <= ExVTBits)
-      return TLO.CombineTo(Op, Op.getOperand(0));
+    if (DemandedBits.getActiveBits() <= ExVTBits)
+      return TLO.CombineTo(Op, Op0);
 
-    APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
+    APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
 
     // Since the sign extended bits are demanded, we know that the sign
     // bit is demanded.
     InputDemandedBits.setBit(ExVTBits - 1);
 
-    if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
-                             Known, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 
@@ -1025,14 +1070,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the input sign bit is known zero, convert this into a zero extension.
     if (Known.Zero[ExVTBits - 1])
-      return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
-                                   Op.getOperand(0), dl, ExVT.getScalarType()));
+      return TLO.CombineTo(
+          Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
 
     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
-    if (Known.One[ExVTBits - 1]) {    // Input sign bit known set
+    if (Known.One[ExVTBits - 1]) { // Input sign bit known set
       Known.One.setBitsFrom(ExVTBits);
       Known.Zero &= Mask;
-    } else {                       // Input sign bit unknown
+    } else { // Input sign bit unknown
       Known.Zero &= Mask;
       Known.One &= Mask;
     }
@@ -1042,8 +1087,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     EVT HalfVT = Op.getOperand(0).getValueType();
     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
 
-    APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
-    APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+    APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+    APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
 
     KnownBits KnownLo, KnownHi;
 
@@ -1061,36 +1106,35 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ZERO_EXTEND: {
-    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+    SDValue Src = Op.getOperand(0);
+    unsigned InBits = Src.getScalarValueSizeInBits();
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    if (NewMask.getActiveBits() <= OperandBitWidth)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
-                                               Op.getOperand(0)));
+    if (DemandedBits.getActiveBits() <= InBits)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
 
-    APInt InMask = NewMask.trunc(OperandBitWidth);
-    if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
+    APInt InDemandedBits = DemandedBits.trunc(InBits);
+    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(OperandBitWidth);
+    Known.Zero.setBitsFrom(InBits);
     break;
   }
   case ISD::SIGN_EXTEND: {
-    unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
+    SDValue Src = Op.getOperand(0);
+    unsigned InBits = Src.getScalarValueSizeInBits();
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    if (NewMask.getActiveBits() <= InBits)
-      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
-                                              Op.getOperand(0)));
+    if (DemandedBits.getActiveBits() <= InBits)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
 
     // Since some of the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    APInt InDemandedBits = NewMask.trunc(InBits);
+    APInt InDemandedBits = DemandedBits.trunc(InBits);
     InDemandedBits.setBit(InBits - 1);
 
-    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
-                             Depth+1))
+    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     // If the sign bit is known one, the top bits match.
@@ -1098,34 +1142,55 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the sign bit is known zero, convert this to a zero extend.
     if (Known.isNonNegative())
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
-                                               Op.getOperand(0)));
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
+    break;
+  }
+  case ISD::SIGN_EXTEND_VECTOR_INREG: {
+    // TODO - merge this with SIGN_EXTEND above?
+    SDValue Src = Op.getOperand(0);
+    unsigned InBits = Src.getScalarValueSizeInBits();
+
+    APInt InDemandedBits = DemandedBits.trunc(InBits);
+
+    // If some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (InBits < DemandedBits.getActiveBits())
+      InDemandedBits.setBit(InBits - 1);
+
+    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
+      return true;
+    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+    // If the sign bit is known one, the top bits match.
+    Known = Known.sext(BitWidth);
     break;
   }
   case ISD::ANY_EXTEND: {
-    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
-    APInt InMask = NewMask.trunc(OperandBitWidth);
-    if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
+    SDValue Src = Op.getOperand(0);
+    unsigned InBits = Src.getScalarValueSizeInBits();
+    APInt InDemandedBits = DemandedBits.trunc(InBits);
+    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     Known = Known.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
+    SDValue Src = Op.getOperand(0);
+
     // Simplify the input, using demanded bit information, and compute the known
     // zero/one bits live out.
-    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
-    APInt TruncMask = NewMask.zext(OperandBitWidth);
-    if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
+    unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
+    APInt TruncMask = DemandedBits.zext(OperandBitWidth);
+    if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
       return true;
     Known = Known.trunc(BitWidth);
 
     // If the input is only used by this truncate, see if we can shrink it based
     // on the known demanded bits.
-    if (Op.getOperand(0).getNode()->hasOneUse()) {
-      SDValue In = Op.getOperand(0);
-      switch (In.getOpcode()) {
-      default: break;
+    if (Src.getNode()->hasOneUse()) {
+      switch (Src.getOpcode()) {
+      default:
+        break;
       case ISD::SRL:
         // Shrink SRL by a constant if none of the high bits shifted in are
         // demanded.
@@ -1133,10 +1198,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
           // undesirable.
           break;
-        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
         if (!ShAmt)
           break;
-        SDValue Shift = In.getOperand(1);
+        SDValue Shift = Src.getOperand(1);
         if (TLO.LegalTypes()) {
           uint64_t ShVal = ShAmt->getZExtValue();
           Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
@@ -1148,13 +1213,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           HighBits.lshrInPlace(ShAmt->getZExtValue());
           HighBits = HighBits.trunc(BitWidth);
 
-          if (!(HighBits & NewMask)) {
+          if (!(HighBits & DemandedBits)) {
             // None of the shifted in bits are needed.  Add a truncate of the
             // shift input, then shift it.
-            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
-                                               In.getOperand(0));
-            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
-                                                     Shift));
+            SDValue NewTrunc =
+                TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+            return TLO.CombineTo(
+                Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
           }
         }
         break;
@@ -1169,7 +1234,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // demanded by its users.
     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
-    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
                              Known, TLO, Depth+1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1177,50 +1242,111 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     Known.Zero |= ~InMask;
     break;
   }
-  case ISD::BITCAST:
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue Src = Op.getOperand(0);
+    SDValue Idx = Op.getOperand(1);
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+    unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+
+    // Demand the bits from every vector element without a constant index.
+    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+    if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
+      if (CIdx->getAPIntValue().ult(NumSrcElts))
+        DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
+
+    // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+    // anything about the extended bits.
+    APInt DemandedSrcBits = DemandedBits;
+    if (BitWidth > EltBitWidth)
+      DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
+
+    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
+                             Depth + 1))
+      return true;
+
+    Known = Known2;
+    if (BitWidth > EltBitWidth)
+      Known = Known.zext(BitWidth);
+    break;
+  }
+  case ISD::BITCAST: {
+    SDValue Src = Op.getOperand(0);
+    EVT SrcVT = Src.getValueType();
+    unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+
     // If this is an FP->Int bitcast and if the sign bit is the only
     // thing demanded, turn this into a FGETSIGN.
-    if (!TLO.LegalOperations() && !VT.isVector() &&
-        !Op.getOperand(0).getValueType().isVector() &&
-        NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
-        Op.getOperand(0).getValueType().isFloatingPoint()) {
+    if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
+        DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
+        SrcVT.isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
-      bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
-      if ((OpVTLegal || i32Legal) && VT.isSimple() &&
-           Op.getOperand(0).getValueType() != MVT::f16 &&
-           Op.getOperand(0).getValueType() != MVT::f128) {
+      bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+      if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
+          SrcVT != MVT::f128) {
         // Cannot eliminate/lower SHL for f128 yet.
         EVT Ty = OpVTLegal ? VT : MVT::i32;
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
-        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
         if (!OpVTLegal && OpVTSizeInBits > 32)
           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
         unsigned ShVal = Op.getValueSizeInBits() - 1;
         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+        return TLO.CombineTo(Op,
+                             TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
+      }
+    }
+    // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
+    // demanding the element if any bits from it are demanded.
+    // TODO - bigendian once we have test coverage.
+    // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
+    if (SrcVT.isVector() && NumSrcEltBits > 1 &&
+        (BitWidth % NumSrcEltBits) == 0 &&
+        TLO.DAG.getDataLayout().isLittleEndian()) {
+      unsigned Scale = BitWidth / NumSrcEltBits;
+      auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
+        DemandedSubElts = APInt::getNullValue(Scale);
+        for (unsigned i = 0; i != Scale; ++i) {
+          unsigned Offset = i * NumSrcEltBits;
+          APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+          if (!Sub.isNullValue())
+            DemandedSubElts.setBit(i);
+        }
+        return true;
+      };
+
+      APInt DemandedSubElts;
+      if (GetDemandedSubMask(DemandedSubElts)) {
+        unsigned NumSrcElts = SrcVT.getVectorNumElements();
+        APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
+
+        APInt KnownUndef, KnownZero;
+        if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+                                       TLO, Depth + 1))
+          return true;
       }
     }
     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
     // recursive call where Known may be useful to the caller.
     if (Depth > 0) {
-      TLO.DAG.computeKnownBits(Op, Known, Depth);
+      Known = TLO.DAG.computeKnownBits(Op, Depth);
       return false;
     }
     break;
+  }
   case ISD::ADD:
   case ISD::MUL:
   case ISD::SUB: {
     // Add, Sub, and Mul don't demand any bits in positions beyond that
     // of the highest bit demanded of them.
     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
-    unsigned NewMaskLZ = NewMask.countLeadingZeros();
-    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
-    if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
-        SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
+    unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
+    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
+    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+        SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
         // See if the operation should be performed at a smaller bit width.
-        ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
+        ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
       SDNodeFlags Flags = Op.getNode()->getFlags();
       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
         // Disable the nsw and nuw flags. We can no longer guarantee that we
@@ -1240,7 +1366,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
     // is probably not useful (and could be detrimental).
     ConstantSDNode *C = isConstOrConstSplat(Op1);
-    APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
+    APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
     if (C && !C->isAllOnesValue() && !C->isOne() &&
         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
@@ -1257,24 +1383,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     LLVM_FALLTHROUGH;
   }
   default:
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+      if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
+                                            Known, TLO, Depth))
+        return true;
+      break;
+    }
+
     // Just use computeKnownBits to compute output bits.
-    TLO.DAG.computeKnownBits(Op, Known, Depth);
+    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
     break;
   }
 
   // If we know the value of all of the demanded bits, return this as a
   // constant.
-  if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
+  if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
     // Avoid folding to a constant if any OpaqueConstant is involved.
     const SDNode *N = Op.getNode();
     for (SDNodeIterator I = SDNodeIterator::begin(N),
-         E = SDNodeIterator::end(N); I != E; ++I) {
+                        E = SDNodeIterator::end(N);
+         I != E; ++I) {
       SDNode *Op = *I;
       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
         if (C->isOpaque())
           return false;
     }
-    return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+    // TODO: Handle float bits as well.
+    if (VT.isInteger())
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
   }
 
   return false;
@@ -1291,8 +1427,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
 
   bool Simplified =
       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
-  if (Simplified)
+  if (Simplified) {
+    DCI.AddToWorklist(Op.getNode());
     DCI.CommitTargetLoweringOpt(TLO);
+  }
   return Simplified;
 }
 
@@ -1371,6 +1509,23 @@ bool TargetLowering::SimplifyDemandedVectorElts(
                                      TLO, Depth + 1))
         return true;
 
+      // Try calling SimplifyDemandedBits, converting demanded elts to the bits
+      // of the large element.
+      // TODO - bigendian once we have test coverage.
+      if (TLO.DAG.getDataLayout().isLittleEndian()) {
+        unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
+        APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+        for (unsigned i = 0; i != NumElts; ++i)
+          if (DemandedElts[i]) {
+            unsigned Ofs = (i % Scale) * EltSizeInBits;
+            SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
+          }
+
+        KnownBits Known;
+        if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
+          return true;
+      }
+
       // If the src element is zero/undef then all the output elements will be -
       // only demanded elements are guaranteed to be correct.
       for (unsigned i = 0; i != NumSrcElts; ++i) {
@@ -1463,7 +1618,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     EVT SubVT = Sub.getValueType();
     unsigned NumSubElts = SubVT.getVectorNumElements();
     const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
-    if (Idx.uge(NumElts - NumSubElts))
+    if (Idx.ugt(NumElts - NumSubElts))
       break;
     unsigned SubIdx = Idx.getZExtValue();
     APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
@@ -1481,22 +1636,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     break;
   }
   case ISD::EXTRACT_SUBVECTOR: {
-    if (!isa<ConstantSDNode>(Op.getOperand(1)))
-      break;
     SDValue Src = Op.getOperand(0);
+    ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
-    const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
-    if (Idx.uge(NumSrcElts - NumElts))
-      break;
-    // Offset the demanded elts by the subvector index.
-    uint64_t SubIdx = Idx.getZExtValue();
-    APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx);
-    APInt SrcUndef, SrcZero;
-    if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
-                                   Depth + 1))
-      return true;
-    KnownUndef = SrcUndef.extractBits(NumElts, SubIdx);
-    KnownZero = SrcZero.extractBits(NumElts, SubIdx);
+    if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+      // Offset the demanded elts by the subvector index.
+      uint64_t Idx = SubIdx->getZExtValue();
+      APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+      APInt SrcUndef, SrcZero;
+      if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+                                     Depth + 1))
+        return true;
+      KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+      KnownZero = SrcZero.extractBits(NumElts, Idx);
+    }
     break;
   }
   case ISD::INSERT_VECTOR_ELT: {
@@ -1510,9 +1663,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
       unsigned Idx = CIdx->getZExtValue();
       if (!DemandedElts[Idx])
         return TLO.CombineTo(Op, Vec);
-      DemandedElts.clearBit(Idx);
 
-      if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
+      APInt DemandedVecElts(DemandedElts);
+      DemandedVecElts.clearBit(Idx);
+      if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
                                      KnownZero, TLO, Depth + 1))
         return true;
 
@@ -1534,12 +1688,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     break;
   }
   case ISD::VSELECT: {
-    APInt DemandedLHS(DemandedElts);
-    APInt DemandedRHS(DemandedElts);
-
-    // TODO - add support for constant vselect masks.
+    // Try to transform the select condition based on the current demanded
+    // elements.
+    // TODO: If a condition element is undef, we can choose from one arm of the
+    //       select (and if one arm is undef, then we can propagate that to the
+    //       result).
+    // TODO - add support for constant vselect masks (see IR version of this).
+    APInt UnusedUndef, UnusedZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
+                                   UnusedZero, TLO, Depth + 1))
+      return true;
 
     // See if we can simplify either vselect operand.
+    APInt DemandedLHS(DemandedElts);
+    APInt DemandedRHS(DemandedElts);
     APInt UndefLHS, ZeroLHS;
     APInt UndefRHS, ZeroRHS;
     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
@@ -1624,8 +1786,35 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     }
     break;
   }
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG: {
+    APInt SrcUndef, SrcZero;
+    SDValue Src = Op.getOperand(0);
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+    APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
+                                   SrcZero, TLO, Depth + 1))
+      return true;
+    KnownZero = SrcZero.zextOrTrunc(NumElts);
+    KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+    if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+      // zext(undef) upper bits are guaranteed to be zero.
+      if (DemandedElts.isSubsetOf(KnownUndef))
+        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+      KnownUndef.clearAllBits();
+    }
+    break;
+  }
+  case ISD::OR:
+  case ISD::XOR:
   case ISD::ADD:
-  case ISD::SUB: {
+  case ISD::SUB:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM: {
     APInt SrcUndef, SrcZero;
     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
                                    SrcZero, TLO, Depth + 1))
@@ -1637,21 +1826,58 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     KnownUndef &= SrcUndef;
     break;
   }
+  case ISD::AND: {
+    APInt SrcUndef, SrcZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
+                                   SrcZero, TLO, Depth + 1))
+      return true;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+                                   KnownZero, TLO, Depth + 1))
+      return true;
+
+    // If either side has a zero element, then the result element is zero, even
+    // if the other is an UNDEF.
+    KnownZero |= SrcZero;
+    KnownUndef &= SrcUndef;
+    KnownUndef &= ~KnownZero;
+    break;
+  }
   case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
                                    KnownZero, TLO, Depth + 1))
       return true;
+
+    if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+      // zext(undef) upper bits are guaranteed to be zero.
+      if (DemandedElts.isSubsetOf(KnownUndef))
+        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+      KnownUndef.clearAllBits();
+    }
     break;
   default: {
-    if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
                                                   KnownZero, TLO, Depth))
         return true;
+    } else {
+      KnownBits Known;
+      APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+      if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
+                               Depth, AssumeSingleUse))
+        return true;
+    }
     break;
   }
   }
-
   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
+
+  // Constant fold all undef cases.
+  // TODO: Handle zero cases as well.
+  if (DemandedElts.isSubsetOf(KnownUndef))
+    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
   return false;
 }
 
@@ -1711,6 +1937,32 @@ bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
   return false;
 }
 
+bool TargetLowering::SimplifyDemandedBitsForTargetNode(
+    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+    KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use SimplifyDemandedBits if you don't know whether Op"
+         " is a target node!");
+  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
+  return false;
+}
+
+bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+                                                  const SelectionDAG &DAG,
+                                                  bool SNaN,
+                                                  unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use isKnownNeverNaN if you don't know whether Op"
+         " is a target node!");
+  return false;
+}
+
 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
 // work with truncating build vectors and vectors with elements of less than
 // 8 bits.
@@ -1901,10 +2153,24 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
   } else
     return SDValue();
 
-  const APInt &I01 = C01->getAPIntValue();
-  // Both of them must be power-of-two, and the constant from setcc is bigger.
-  if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
-    return SDValue();
+  APInt I01 = C01->getAPIntValue();
+
+  auto checkConstants = [&I1, &I01]() -> bool {
+    // Both of them must be power-of-two, and the constant from setcc is bigger.
+    return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
+  };
+
+  if (checkConstants()) {
+    // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
+  } else {
+    // What if we invert constants? (and the target predicate)
+    I1.negate();
+    I01.negate();
+    NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+    if (!checkConstants())
+      return SDValue();
+    // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
+  }
 
   // They are power-of-two, so which bit is set?
   const unsigned KeptBits = I1.logBase2();
@@ -2141,7 +2407,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       }
       if (bestWidth) {
         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
-        if (newVT.isRound()) {
+        if (newVT.isRound() &&
+            shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
           EVT PtrType = Lod->getOperand(1).getValueType();
           SDValue Ptr = Lod->getBasePtr();
           if (bestOffset != 0)
@@ -2819,8 +3086,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
 /// Returns true (and the GlobalValue and the offset) if the node is a
 /// GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
                                     int64_t &Offset) const {
+
+  SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
+
   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
     GA = GASD->getGlobal();
     Offset += GASD->getOffset();
@@ -3419,34 +3689,63 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
 
 /// Given an exact SDIV by a constant, create a multiplication
 /// with the multiplicative inverse of the constant.
-static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
                               const SDLoc &dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDNode *> &Created) {
-  assert(d != 0 && "Division by zero!");
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  EVT SVT = VT.getScalarType();
+  EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+  EVT ShSVT = ShVT.getScalarType();
+
+  bool UseSRA = false;
+  SmallVector<SDValue, 16> Shifts, Factors;
+
+  auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+    if (C->isNullValue())
+      return false;
+    APInt Divisor = C->getAPIntValue();
+    unsigned Shift = Divisor.countTrailingZeros();
+    if (Shift) {
+      Divisor.ashrInPlace(Shift);
+      UseSRA = true;
+    }
+    // Calculate the multiplicative inverse, using Newton's method.
+    APInt t;
+    APInt Factor = Divisor;
+    while ((t = Divisor * Factor) != 1)
+      Factor *= APInt(Divisor.getBitWidth(), 2) - t;
+    Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
+    Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+    return true;
+  };
+
+  // Collect all magic values from the build vector.
+  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+    return SDValue();
+
+  SDValue Shift, Factor;
+  if (VT.isVector()) {
+    Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+    Factor = DAG.getBuildVector(VT, dl, Factors);
+  } else {
+    Shift = Shifts[0];
+    Factor = Factors[0];
+  }
+
+  SDValue Res = Op0;
 
   // Shift the value upfront if it is even, so the LSB is one.
-  unsigned ShAmt = d.countTrailingZeros();
-  if (ShAmt) {
+  if (UseSRA) {
     // TODO: For UDIV use SRL instead of SRA.
-    SDValue Amt =
-        DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
-                                                        DAG.getDataLayout()));
     SDNodeFlags Flags;
     Flags.setExact(true);
-    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags);
-    Created.push_back(Op1.getNode());
-    d.ashrInPlace(ShAmt);
+    Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
+    Created.push_back(Res.getNode());
   }
 
-  // Calculate the multiplicative inverse, using Newton's method.
-  APInt t, xn = d;
-  while ((t = d*xn) != 1)
-    xn *= APInt(d.getBitWidth(), 2) - t;
-
-  SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
-  SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
-  Created.push_back(Mul.getNode());
-  return Mul;
+  return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
 }
 
 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
@@ -3463,11 +3762,15 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.
 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
-                                  SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+                                  bool IsAfterLegalization,
                                   SmallVectorImpl<SDNode *> &Created) const {
-  EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  EVT SVT = VT.getScalarType();
+  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+  EVT ShSVT = ShVT.getScalarType();
+  unsigned EltBits = VT.getScalarSizeInBits();
 
   // Check to see if we can do this.
   // FIXME: We should be more aggressive here.
@@ -3476,50 +3779,90 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
 
   // If the sdiv has an 'exact' bit we can use a simpler lowering.
   if (N->getFlags().hasExact())
-    return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created);
+    return BuildExactSDIV(*this, N, dl, DAG, Created);
+
+  SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
+
+  auto BuildSDIVPattern = [&](ConstantSDNode *C) {
+    if (C->isNullValue())
+      return false;
+
+    const APInt &Divisor = C->getAPIntValue();
+    APInt::ms magics = Divisor.magic();
+    int NumeratorFactor = 0;
+    int ShiftMask = -1;
+
+    if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+      // If d is +1/-1, we just multiply the numerator by +1/-1.
+      NumeratorFactor = Divisor.getSExtValue();
+      magics.m = 0;
+      magics.s = 0;
+      ShiftMask = 0;
+    } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+      // If d > 0 and m < 0, add the numerator.
+      NumeratorFactor = 1;
+    } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+      // If d < 0 and m > 0, subtract the numerator.
+      NumeratorFactor = -1;
+    }
+
+    MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+    Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
+    Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+    ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
+    return true;
+  };
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Collect the shifts / magic values from each element.
+  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+    return SDValue();
 
-  APInt::ms magics = Divisor.magic();
+  SDValue MagicFactor, Factor, Shift, ShiftMask;
+  if (VT.isVector()) {
+    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+    Factor = DAG.getBuildVector(VT, dl, Factors);
+    Shift = DAG.getBuildVector(ShVT, dl, Shifts);
+    ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+  } else {
+    MagicFactor = MagicFactors[0];
+    Factor = Factors[0];
+    Shift = Shifts[0];
+    ShiftMask = ShiftMasks[0];
+  }
 
-  // Multiply the numerator (operand 0) by the magic value
-  // FIXME: We should support doing a MUL in a wider type
+  // Multiply the numerator (operand 0) by the magic value.
+  // FIXME: We should support doing a MUL in a wider type.
   SDValue Q;
-  if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
-                            isOperationLegalOrCustom(ISD::MULHS, VT))
-    Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
-                    DAG.getConstant(magics.m, dl, VT));
-  else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
-                                 isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
-    Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
-                              N->getOperand(0),
-                              DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
-  else
-    return SDValue();       // No mulhs or equvialent
+  if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
+                          : isOperationLegalOrCustom(ISD::MULHS, VT))
+    Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
+  else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
+                               : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+    SDValue LoHi =
+        DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
+    Q = SDValue(LoHi.getNode(), 1);
+  } else
+    return SDValue(); // No mulhs or equivalent.
+  Created.push_back(Q.getNode());
 
+  // (Optionally) Add/subtract the numerator using Factor.
+  Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
+  Created.push_back(Factor.getNode());
+  Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
   Created.push_back(Q.getNode());
 
-  // If d > 0 and m < 0, add the numerator
-  if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
-    Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
-    Created.push_back(Q.getNode());
-  }
-  // If d < 0 and m > 0, subtract the numerator.
-  if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
-    Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
-    Created.push_back(Q.getNode());
-  }
-  auto &DL = DAG.getDataLayout();
-  // Shift right algebraic if shift value is nonzero
-  if (magics.s > 0) {
-    Q = DAG.getNode(
-        ISD::SRA, dl, VT, Q,
-        DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
-    Created.push_back(Q.getNode());
-  }
-  // Extract the sign bit and add it to the quotient
-  SDValue T =
-      DAG.getNode(ISD::SRL, dl, VT, Q,
-                  DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
-                                  getShiftAmountTy(Q.getValueType(), DL)));
+  // Shift right algebraic by shift value.
+  Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
+  Created.push_back(Q.getNode());
+
+  // Extract the sign bit, mask it and add it to the quotient.
+  SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
+  SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
+  Created.push_back(T.getNode());
+  T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
   Created.push_back(T.getNode());
   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
 }
@@ -3528,72 +3871,133 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.
 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
-SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
-                                  SelectionDAG &DAG, bool IsAfterLegalization,
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                  bool IsAfterLegalization,
                                   SmallVectorImpl<SDNode *> &Created) const {
-  EVT VT = N->getValueType(0);
   SDLoc dl(N);
-  auto &DL = DAG.getDataLayout();
+  EVT VT = N->getValueType(0);
+  EVT SVT = VT.getScalarType();
+  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+  EVT ShSVT = ShVT.getScalarType();
+  unsigned EltBits = VT.getScalarSizeInBits();
 
   // Check to see if we can do this.
   // FIXME: We should be more aggressive here.
   if (!isTypeLegal(VT))
     return SDValue();
 
-  // FIXME: We should use a narrower constant when the upper
-  // bits are known to be zero.
-  APInt::mu magics = Divisor.magicu();
+  bool UseNPQ = false;
+  SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
 
-  SDValue Q = N->getOperand(0);
+  auto BuildUDIVPattern = [&](ConstantSDNode *C) {
+    if (C->isNullValue())
+      return false;
+    // FIXME: We should use a narrower constant when the upper
+    // bits are known to be zero.
+    APInt Divisor = C->getAPIntValue();
+    APInt::mu magics = Divisor.magicu();
+    unsigned PreShift = 0, PostShift = 0;
+
+    // If the divisor is even, we can avoid using the expensive fixup by
+    // shifting the divided value upfront.
+    if (magics.a != 0 && !Divisor[0]) {
+      PreShift = Divisor.countTrailingZeros();
+      // Get magic number for the shifted divisor.
+      magics = Divisor.lshr(PreShift).magicu(PreShift);
+      assert(magics.a == 0 && "Should use cheap fixup now");
+    }
 
-  // If the divisor is even, we can avoid using the expensive fixup by shifting
-  // the divided value upfront.
-  if (magics.a != 0 && !Divisor[0]) {
-    unsigned Shift = Divisor.countTrailingZeros();
-    Q = DAG.getNode(
-        ISD::SRL, dl, VT, Q,
-        DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
-    Created.push_back(Q.getNode());
+    APInt Magic = magics.m;
+
+    unsigned SelNPQ;
+    if (magics.a == 0 || Divisor.isOneValue()) {
+      assert(magics.s < Divisor.getBitWidth() &&
+             "We shouldn't generate an undefined shift!");
+      PostShift = magics.s;
+      SelNPQ = false;
+    } else {
+      PostShift = magics.s - 1;
+      SelNPQ = true;
+    }
+
+    PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
+    MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
+    NPQFactors.push_back(
+        DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+                               : APInt::getNullValue(EltBits),
+                        dl, SVT));
+    PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
+    UseNPQ |= SelNPQ;
+    return true;
+  };
 
-    // Get magic number for the shifted divisor.
-    magics = Divisor.lshr(Shift).magicu(Shift);
-    assert(magics.a == 0 && "Should use cheap fixup now");
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Collect the shifts/magic values from each element.
+  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+    return SDValue();
+
+  SDValue PreShift, PostShift, MagicFactor, NPQFactor;
+  if (VT.isVector()) {
+    PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
+    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
+    NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
+    PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+  } else {
+    PreShift = PreShifts[0];
+    MagicFactor = MagicFactors[0];
+    PostShift = PostShifts[0];
   }
 
-  // Multiply the numerator (operand 0) by the magic value
-  // FIXME: We should support doing a MUL in a wider type
-  if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
-                            isOperationLegalOrCustom(ISD::MULHU, VT))
-    Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
-  else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
-                                 isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
-    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
-                            DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
-  else
-    return SDValue();       // No mulhu or equivalent
+  SDValue Q = N0;
+  Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
+  Created.push_back(Q.getNode());
+
+  // FIXME: We should support doing a MUL in a wider type.
+  auto GetMULHU = [&](SDValue X, SDValue Y) {
+    if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
+                            : isOperationLegalOrCustom(ISD::MULHU, VT))
+      return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
+    if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
+                            : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
+      SDValue LoHi =
+          DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
+      return SDValue(LoHi.getNode(), 1);
+    }
+    return SDValue(); // No mulhu or equivalent
+  };
+
+  // Multiply the numerator (operand 0) by the magic value.
+  Q = GetMULHU(Q, MagicFactor);
+  if (!Q)
+    return SDValue();
 
   Created.push_back(Q.getNode());
 
-  if (magics.a == 0) {
-    assert(magics.s < Divisor.getBitWidth() &&
-           "We shouldn't generate an undefined shift!");
-    return DAG.getNode(
-        ISD::SRL, dl, VT, Q,
-        DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
-  } else {
-    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
-    Created.push_back(NPQ.getNode());
-    NPQ = DAG.getNode(
-        ISD::SRL, dl, VT, NPQ,
-        DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
+  if (UseNPQ) {
+    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
     Created.push_back(NPQ.getNode());
-    NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+
+    // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+    // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
+    if (VT.isVector())
+      NPQ = GetMULHU(NPQ, NPQFactor);
+    else
+      NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
+
     Created.push_back(NPQ.getNode());
-    return DAG.getNode(
-        ISD::SRL, dl, VT, NPQ,
-        DAG.getConstant(magics.s - 1, dl,
-                        getShiftAmountTy(NPQ.getValueType(), DL)));
+
+    Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+    Created.push_back(Q.getNode());
   }
+
+  Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
+  Created.push_back(Q.getNode());
+
+  SDValue One = DAG.getConstant(1, dl, VT);
+  SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
+  return DAG.getSelect(dl, VT, IsOne, N0, Q);
 }
 
 bool TargetLowering::
@@ -3750,8 +4154,17 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
     return false;
 
-  Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
-                     Merge(Lo, Hi));
+  SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+  EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+  bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
+                  isOperationLegalOrCustom(ISD::ADDE, VT));
+  if (UseGlue)
+    Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+                       Merge(Lo, Hi));
+  else
+    Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
+                       Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
 
   SDValue Carry = Next.getValue(1);
   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
@@ -3760,9 +4173,13 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
     return false;
 
-  SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
-  Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
-                   Carry);
+  if (UseGlue)
+    Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+                     Carry);
+  else
+    Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+                     Zero, Carry);
+
   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
 
   if (Opcode == ISD::SMUL_LOHI) {
@@ -3797,66 +4214,525 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
   return Ok;
 }
 
+bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
+                                       SelectionDAG &DAG) const {
+  EVT VT = Node->getValueType(0);
+
+  if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+                        !isOperationLegalOrCustom(ISD::SRL, VT) ||
+                        !isOperationLegalOrCustom(ISD::SUB, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+    return false;
+
+  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+  SDValue X = Node->getOperand(0);
+  SDValue Y = Node->getOperand(1);
+  SDValue Z = Node->getOperand(2);
+
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  bool IsFSHL = Node->getOpcode() == ISD::FSHL;
+  SDLoc DL(SDValue(Node, 0));
+
+  EVT ShVT = Z.getValueType();
+  SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+  SDValue Zero = DAG.getConstant(0, DL, ShVT);
+
+  SDValue ShAmt;
+  if (isPowerOf2_32(EltSizeInBits)) {
+    SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+    ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+  } else {
+    ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+  }
+
+  SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+  SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+  SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+  SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
+
+  // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
+  // and that is undefined. We must compare and select to avoid UB.
+  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
+
+  // For fshl, 0-shift returns the 1st arg (X).
+  // For fshr, 0-shift returns the 2nd arg (Y).
+  SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
+  Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+  return true;
+}
+
+// TODO: Merge with expandFunnelShift.
+bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
+                               SelectionDAG &DAG) const {
+  EVT VT = Node->getValueType(0);
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  bool IsLeft = Node->getOpcode() == ISD::ROTL;
+  SDValue Op0 = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  SDLoc DL(SDValue(Node, 0));
+
+  EVT ShVT = Op1.getValueType();
+  SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+
+  // If a rotate in the other direction is legal, use it.
+  unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+  if (isOperationLegal(RevRot, VT)) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+    Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
+    return true;
+  }
+
+  if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+                        !isOperationLegalOrCustom(ISD::SRL, VT) ||
+                        !isOperationLegalOrCustom(ISD::SUB, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+    return false;
+
+  // Otherwise,
+  //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
+  //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+  //
+  assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
+         "Expecting the type bitwidth to be a power of 2");
+  unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+  unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+  SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
+  SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+  SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+  SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+  Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
+                       DAG.getNode(HsOpc, DL, VT, Op0, And1));
+  return true;
+}
+
 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
                                SelectionDAG &DAG) const {
-  EVT VT = Node->getOperand(0).getValueType();
-  EVT NVT = Node->getValueType(0);
+  SDValue Src = Node->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  EVT DstVT = Node->getValueType(0);
   SDLoc dl(SDValue(Node, 0));
 
   // FIXME: Only f32 to i64 conversions are supported.
-  if (VT != MVT::f32 || NVT != MVT::i64)
+  if (SrcVT != MVT::f32 || DstVT != MVT::i64)
     return false;
 
   // Expand f32 -> i64 conversion
   // This algorithm comes from compiler-rt's implementation of fixsfdi:
   // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
-  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
-                                VT.getSizeInBits());
+  unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
+  EVT IntVT = SrcVT.changeTypeToInteger();
+  EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
+
   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
   SDValue Bias = DAG.getConstant(127, dl, IntVT);
-  SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
-                                     IntVT);
-  SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
+  SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
+  SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
 
-  SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+  SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
 
-  auto &DL = DAG.getDataLayout();
   SDValue ExponentBits = DAG.getNode(
       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
-      DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
+      DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
 
-  SDValue Sign = DAG.getNode(
-      ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
-      DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
-  Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+  SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+                             DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+                             DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
+  Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
 
   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
-      DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
-      DAG.getConstant(0x00800000, dl, IntVT));
+                          DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+                          DAG.getConstant(0x00800000, dl, IntVT));
 
-  R = DAG.getZExtOrTrunc(R, dl, NVT);
+  R = DAG.getZExtOrTrunc(R, dl, DstVT);
 
   R = DAG.getSelectCC(
       dl, Exponent, ExponentLoBit,
-      DAG.getNode(ISD::SHL, dl, NVT, R,
+      DAG.getNode(ISD::SHL, dl, DstVT, R,
                   DAG.getZExtOrTrunc(
                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
-                      dl, getShiftAmountTy(IntVT, DL))),
-      DAG.getNode(ISD::SRL, dl, NVT, R,
+                      dl, IntShVT)),
+      DAG.getNode(ISD::SRL, dl, DstVT, R,
                   DAG.getZExtOrTrunc(
                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
-                      dl, getShiftAmountTy(IntVT, DL))),
+                      dl, IntShVT)),
       ISD::SETGT);
 
-  SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
-      DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
-      Sign);
+  SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
+                            DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
 
   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
-      DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
+                           DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
+  return true;
+}
+
+bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+                                      SelectionDAG &DAG) const {
+  SDLoc dl(SDValue(Node, 0));
+  SDValue Src = Node->getOperand(0);
+
+  EVT SrcVT = Src.getValueType();
+  EVT DstVT = Node->getValueType(0);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+  // Only expand vector types if we have the appropriate vector bit operations.
+  if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+                           !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
+    return false;
+
+  // If the maximum float value is smaller then the signed integer range,
+  // the destination signmask can't be represented by the float, so we can
+  // just use FP_TO_SINT directly.
+  const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
+  APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+  APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
+  if (APFloat::opOverflow &
+      APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
+    Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+    return true;
+  }
+
+  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+
+  bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+  if (Strict) {
+    // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
+    // signmask then offset (the result of which should be fully representable).
+    // Sel = Src < 0x8000000000000000
+    // Val = select Sel, Src, Src - 0x8000000000000000
+    // Ofs = select Sel, 0, 0x8000000000000000
+    // Result = fp_to_sint(Val) ^ Ofs
+
+    // TODO: Should any fast-math-flags be set for the FSUB?
+    SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
+                                DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+    SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
+                                DAG.getConstant(SignMask, dl, DstVT));
+    Result = DAG.getNode(ISD::XOR, dl, DstVT,
+                         DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+  } else {
+    // Expand based on maximum range of FP_TO_SINT:
+    // True = fp_to_sint(Src)
+    // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
+    // Result = select (Src < 0x8000000000000000), True, False
+
+    SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+    // TODO: Should any fast-math-flags be set for the FSUB?
+    SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
+                                DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+    False = DAG.getNode(ISD::XOR, dl, DstVT, False,
+                        DAG.getConstant(SignMask, dl, DstVT));
+    Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+  }
+  return true;
+}
+
+bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+                                      SelectionDAG &DAG) const {
+  SDValue Src = Node->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  EVT DstVT = Node->getValueType(0);
+
+  if (SrcVT.getScalarType() != MVT::i64)
+    return false;
+
+  SDLoc dl(SDValue(Node, 0));
+  EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
+
+  if (DstVT.getScalarType() == MVT::f32) {
+    // Only expand vector types if we have the appropriate vector bit
+    // operations.
+    if (SrcVT.isVector() &&
+        (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+         !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+         !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
+         !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+         !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+      return false;
+
+    // For unsigned conversions, convert them to signed conversions using the
+    // algorithm from the x86_64 __floatundidf in compiler_rt.
+    SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+
+    SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+    SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+    SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+    SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+    SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+    SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+    SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+
+    // TODO: This really should be implemented using a branch rather than a
+    // select.  We happen to get lucky and machinesink does the right
+    // thing most of the time.  This would be a good candidate for a
+    // pseudo-op, or, even better, for whole-function isel.
+    EVT SetCCVT =
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+    SDValue SignBitTest = DAG.getSetCC(
+        dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+    Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
+    return true;
+  }
+
+  if (DstVT.getScalarType() == MVT::f64) {
+    // Only expand vector types if we have the appropriate vector bit
+    // operations.
+    if (SrcVT.isVector() &&
+        (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+         !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+         !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+         !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+         !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
+      return false;
+
+    // Implementation of unsigned i64 to f64 following the algorithm in
+    // __floatundidf in compiler_rt. This implementation has the advantage
+    // of performing rounding correctly, both in the default rounding mode
+    // and in all alternate rounding modes.
+    SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+    SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+        BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+    SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+    SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+    SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+    SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+    SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+    SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+    SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+    SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+    SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+    Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+    return true;
+  }
+
+  return false;
+}
+
+SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
+                                              SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
+    ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+  EVT VT = Node->getValueType(0);
+  if (isOperationLegalOrCustom(NewOp, VT)) {
+    SDValue Quiet0 = Node->getOperand(0);
+    SDValue Quiet1 = Node->getOperand(1);
+
+    if (!Node->getFlags().hasNoNaNs()) {
+      // Insert canonicalizes if it's possible we need to quiet to get correct
+      // sNaN behavior.
+      if (!DAG.isKnownNeverSNaN(Quiet0)) {
+        Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
+                             Node->getFlags());
+      }
+      if (!DAG.isKnownNeverSNaN(Quiet1)) {
+        Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
+                             Node->getFlags());
+      }
+    }
+
+    return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
+  }
+
+  return SDValue();
+}
+
+bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
+                                 SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  EVT VT = Node->getValueType(0);
+  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+  SDValue Op = Node->getOperand(0);
+  unsigned Len = VT.getScalarSizeInBits();
+  assert(VT.isInteger() && "CTPOP not implemented for this type.");
+
+  // TODO: Add support for irregular type lengths.
+  if (!(Len <= 128 && Len % 8 == 0))
+    return false;
+
+  // Only expand vector types if we have the appropriate vector bit operations.
+  if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
+                        !isOperationLegalOrCustom(ISD::SUB, VT) ||
+                        !isOperationLegalOrCustom(ISD::SRL, VT) ||
+                        (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+    return false;
+
+  // This is the "best" algorithm from
+  // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+  SDValue Mask55 =
+      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+  SDValue Mask33 =
+      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
+  SDValue Mask0F =
+      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+  SDValue Mask01 =
+      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+
+  // v = v - ((v >> 1) & 0x55555555...)
+  Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+                   DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, VT, Op,
+                                           DAG.getConstant(1, dl, ShVT)),
+                               Mask55));
+  // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+  Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+                   DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, VT, Op,
+                                           DAG.getConstant(2, dl, ShVT)),
+                               Mask33));
+  // v = (v + (v >> 4)) & 0x0F0F0F0F...
+  Op = DAG.getNode(ISD::AND, dl, VT,
+                   DAG.getNode(ISD::ADD, dl, VT, Op,
+                               DAG.getNode(ISD::SRL, dl, VT, Op,
+                                           DAG.getConstant(4, dl, ShVT))),
+                   Mask0F);
+  // v = (v * 0x01010101...) >> (Len - 8)
+  if (Len > 8)
+    Op =
+        DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+                    DAG.getConstant(Len - 8, dl, ShVT));
+
+  Result = Op;
+  return true;
+}
+
+bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
+                                SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  EVT VT = Node->getValueType(0);
+  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+  SDValue Op = Node->getOperand(0);
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+  // If the non-ZERO_UNDEF version is supported we can use that instead.
+  if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+      isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+    Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
+    return true;
+  }
+
+  // If the ZERO_UNDEF version is supported use that and handle the zero case.
+  if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+    EVT SetCCVT =
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+    Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+                         DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
+    return true;
+  }
+
+  // Only expand vector types if we have the appropriate vector bit operations.
+  if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+                        !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+                        !isOperationLegalOrCustom(ISD::SRL, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
+    return false;
+
+  // for now, we do this:
+  // x = x | (x >> 1);
+  // x = x | (x >> 2);
+  // ...
+  // x = x | (x >>16);
+  // x = x | (x >>32); // for 64-bit input
+  // return popcount(~x);
+  //
+  // Ref: "Hacker's Delight" by Henry Warren
+  for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+    SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
+    Op = DAG.getNode(ISD::OR, dl, VT, Op,
+                     DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
+  }
+  Op = DAG.getNOT(dl, Op, VT);
+  Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+  return true;
+}
+
+bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
+                                SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  EVT VT = Node->getValueType(0);
+  SDValue Op = Node->getOperand(0);
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+  // If the non-ZERO_UNDEF version is supported we can use that instead.
+  if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+      isOperationLegalOrCustom(ISD::CTTZ, VT)) {
+    Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
+    return true;
+  }
+
+  // If the ZERO_UNDEF version is supported use that and handle the zero case.
+  if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+    EVT SetCCVT =
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+    Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+                         DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
+    return true;
+  }
+
+  // Only expand vector types if we have the appropriate vector bit operations.
+  if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
+                        (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+                         !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+                        !isOperationLegalOrCustom(ISD::SUB, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+    return false;
+
+  // for now, we use: { return popcount(~x & (x - 1)); }
+  // unless the target has ctlz but not ctpop, in which case we use:
+  // { return 32 - nlz(~x & (x-1)); }
+  // Ref: "Hacker's Delight" by Henry Warren
+  SDValue Tmp = DAG.getNode(
+      ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
+      DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
+
+  // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+  if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
+    Result =
+        DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+                    DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
+    return true;
+  }
+
+  Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
+  return true;
+}
+
+bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
+                               SelectionDAG &DAG) const {
+  SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+  SDValue Op = N->getOperand(0);
+
+  // Only expand vector types if we have the appropriate vector operations.
+  if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+                        !isOperationLegalOrCustom(ISD::ADD, VT) ||
+                        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+    return false;
+
+  SDValue Shift =
+      DAG.getNode(ISD::SRA, dl, VT, Op,
+                  DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
+  SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
+  Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
   return true;
 }
 
@@ -3876,8 +4752,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
   assert(SrcEltVT.isByteSized());
 
-  EVT PtrVT = BasePTR.getValueType();
-
   SmallVector<SDValue, 8> Vals;
   SmallVector<SDValue, 8> LoadChains;
 
@@ -3888,8 +4762,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
 
-    BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
-                          DAG.getConstant(Stride, SL, PtrVT));
+    BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
 
     Vals.push_back(ScalarLoad.getValue(0));
     LoadChains.push_back(ScalarLoad.getValue(1));
@@ -3989,7 +4862,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
   if (VT.isFloatingPoint() || VT.isVector()) {
     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
-      if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
+      if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
+          LoadedVT.isVector()) {
         // Scalarize the load and let the individual components be handled.
         SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
         if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
@@ -4139,13 +5013,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
   EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
   auto &MF = DAG.getMachineFunction();
+  EVT MemVT = ST->getMemoryVT();
 
   SDLoc dl(ST);
-  if (ST->getMemoryVT().isFloatingPoint() ||
-      ST->getMemoryVT().isVector()) {
+  if (MemVT.isFloatingPoint() || MemVT.isVector()) {
     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
     if (isTypeLegal(intVT)) {
-      if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
+      if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
+          MemVT.isVector()) {
         // Scalarize the store and let the individual components be handled.
         SDValue Result = scalarizeVectorStore(ST, DAG);
 
@@ -4399,3 +5274,134 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
   }
   return SDValue();
 }
+
+SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
+  unsigned Opcode = Node->getOpcode();
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  EVT VT = LHS.getValueType();
+  SDLoc dl(Node);
+
+  // usub.sat(a, b) -> umax(a, b) - b
+  if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
+    SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
+    return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
+  }
+
+  if (VT.isVector()) {
+    // TODO: Consider not scalarizing here.
+    return SDValue();
+  }
+
+  unsigned OverflowOp;
+  switch (Opcode) {
+  case ISD::SADDSAT:
+    OverflowOp = ISD::SADDO;
+    break;
+  case ISD::UADDSAT:
+    OverflowOp = ISD::UADDO;
+    break;
+  case ISD::SSUBSAT:
+    OverflowOp = ISD::SSUBO;
+    break;
+  case ISD::USUBSAT:
+    OverflowOp = ISD::USUBO;
+    break;
+  default:
+    llvm_unreachable("Expected method to receive signed or unsigned saturation "
+                     "addition or subtraction node.");
+  }
+
+  assert(LHS.getValueType().isScalarInteger() &&
+         "Expected operands to be integers. Vector of int arguments should "
+         "already be unrolled.");
+  assert(RHS.getValueType().isScalarInteger() &&
+         "Expected operands to be integers. Vector of int arguments should "
+         "already be unrolled.");
+  assert(LHS.getValueType() == RHS.getValueType() &&
+         "Expected both operands to be the same type");
+
+  unsigned BitWidth = LHS.getValueSizeInBits();
+  EVT ResultType = LHS.getValueType();
+  EVT BoolVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
+  SDValue Result =
+      DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
+  SDValue SumDiff = Result.getValue(0);
+  SDValue Overflow = Result.getValue(1);
+  SDValue Zero = DAG.getConstant(0, dl, ResultType);
+
+  if (Opcode == ISD::UADDSAT) {
+    // Just need to check overflow for SatMax.
+    APInt MaxVal = APInt::getMaxValue(BitWidth);
+    SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+    return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff);
+  } else if (Opcode == ISD::USUBSAT) {
+    // Just need to check overflow for SatMin.
+    APInt MinVal = APInt::getMinValue(BitWidth);
+    SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
+    return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff);
+  } else {
+    // SatMax -> Overflow && SumDiff < 0
+    // SatMin -> Overflow && SumDiff >= 0
+    APInt MinVal = APInt::getSignedMinValue(BitWidth);
+    APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+    SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
+    SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+    SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
+    Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
+    return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
+  }
+}
+
+SDValue
+TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
+                                                    SelectionDAG &DAG) const {
+  assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
+  assert(Node->getNumOperands() == 3 &&
+         "Expected signed fixed point multiplication to have 3 operands.");
+
+  SDLoc dl(Node);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  assert(LHS.getValueType().isScalarInteger() &&
+         "Expected operands to be integers. Vector of int arguments should "
+         "already be unrolled.");
+  assert(RHS.getValueType().isScalarInteger() &&
+         "Expected operands to be integers. Vector of int arguments should "
+         "already be unrolled.");
+  assert(LHS.getValueType() == RHS.getValueType() &&
+         "Expected both operands to be the same type");
+
+  unsigned Scale = Node->getConstantOperandVal(2);
+  EVT VT = LHS.getValueType();
+  assert(Scale < VT.getScalarSizeInBits() &&
+         "Expected scale to be less than the number of bits.");
+
+  if (!Scale)
+    return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+
+  // Get the upper and lower bits of the result.
+  SDValue Lo, Hi;
+  if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
+    SDValue Result =
+        DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+    Lo = Result.getValue(0);
+    Hi = Result.getValue(1);
+  } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+    Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+    Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+  } else {
+    report_fatal_error("Unable to expand signed fixed point multiplication.");
+  }
+
+  // The result will need to be shifted right by the scale since both operands
+  // are scaled. The result is given to us in 2 halves, so we only want part of
+  // both in the result.
+  EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+  Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
+  Hi = DAG.getNode(
+      ISD::SHL, dl, VT, Hi,
+      DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
+  return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index ed74b3e4fa19..fccbb8ec91cb 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -95,7 +95,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
   }
 
   // Sort the Idx2MBBMap
-  llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+  llvm::sort(idx2MBBMap, Idx2MBBCompare());
 
   LLVM_DEBUG(mf->print(dbgs(), this));
 
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index 8fbe724045e6..bcc8f8cf18bc 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -76,6 +77,18 @@ public:
   /// Returns the last insert point as an iterator for \pCurLI in \pMBB.
   MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
                                                      MachineBasicBlock &MBB);
+
+  /// Return the base index of the first insert point in \pMBB.
+  SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) {
+    SlotIndex Res = LIS.getMBBStartIdx(&MBB);
+    if (!MBB.empty()) {
+      MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+      if (MII != MBB.end())
+        Res = LIS.getInstructionIndex(*MII);
+    }
+    return Res;
+  }
+
 };
 
 /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
@@ -225,6 +238,10 @@ public:
   MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
     return IPA.getLastInsertPointIter(*CurLI, *BB);
   }
+
+  SlotIndex getFirstSplitPoint(unsigned Num) {
+    return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
+  }
 };
 
 /// SplitEditor - Edit machine code and LiveIntervals for live range
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 81a41970f9e2..eb8552915e2a 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -1022,9 +1022,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
       }
 
       // We adjust AliasAnalysis information for merged stack slots.
-      MachineSDNode::mmo_iterator NewMemOps =
-          MF->allocateMemRefsArray(I.getNumMemOperands());
-      unsigned MemOpIdx = 0;
+      SmallVector<MachineMemOperand *, 2> NewMMOs;
       bool ReplaceMemOps = false;
       for (MachineMemOperand *MMO : I.memoperands()) {
         // If this memory location can be a slot remapped here,
@@ -1051,17 +1049,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
           }
         }
         if (MayHaveConflictingAAMD) {
-          NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
+          NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
           ReplaceMemOps = true;
+        } else {
+          NewMMOs.push_back(MMO);
         }
-        else
-          NewMemOps[MemOpIdx++] = MMO;
       }
 
       // If any memory operand is updated, set memory references of
       // this instruction.
       if (ReplaceMemOps)
-        I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
+        I.setMemRefs(*MF, NewMMOs);
     }
 
   // Update the location of C++ catch objects for the MSVC personality routine.
@@ -1233,7 +1231,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   });
 
   for (auto &s : LiveStarts)
-    llvm::sort(s.begin(), s.end());
+    llvm::sort(s);
 
   bool Changed = true;
   while (Changed) {
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index 19a191c01db9..0676fa2421e8 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -268,11 +268,10 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   // in the list. Merge entries that refer to the same dwarf register and use
   // the maximum size that needs to be spilled.
 
-  llvm::sort(LiveOuts.begin(), LiveOuts.end(),
-             [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
-               // Only sort by the dwarf register number.
-               return LHS.DwarfRegNum < RHS.DwarfRegNum;
-             });
+  llvm::sort(LiveOuts, [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+    // Only sort by the dwarf register number.
+    return LHS.DwarfRegNum < RHS.DwarfRegNum;
+  });
 
   for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
     for (auto II = std::next(I); II != E; ++II) {
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index cb12c7ce6e82..3b578c7391da 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -157,14 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
   return NeedsProtector;
 }
 
-static bool isLifetimeInst(const Instruction *I) {
-  if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) {
-    const auto Id = Intrinsic->getIntrinsicID();
-    return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end;
-  }
-  return false;
-}
-
 bool StackProtector::HasAddressTaken(const Instruction *AI) {
   for (const User *U : AI->users()) {
     if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -175,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
         return true;
     } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
       // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
-      if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI))
+      if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
         return true;
     } else if (isa<InvokeInst>(U)) {
       return true;
@@ -199,6 +191,18 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
   return false;
 }
 
+/// Search for the first call to the llvm.stackprotector intrinsic and return it
+/// if present.
+static const CallInst *findStackProtectorIntrinsic(Function &F) {
+  for (const BasicBlock &BB : F)
+    for (const Instruction &I : BB)
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        if (CI->getCalledFunction() ==
+            Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector))
+          return CI;
+  return nullptr;
+}
+
 /// Check whether or not this function needs a stack protector based
 /// upon the stack protector level.
 ///
@@ -215,13 +219,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
 bool StackProtector::RequiresStackProtector() {
   bool Strong = false;
   bool NeedsProtector = false;
-  for (const BasicBlock &BB : *F)
-    for (const Instruction &I : BB)
-      if (const CallInst *CI = dyn_cast<CallInst>(&I))
-        if (CI->getCalledFunction() ==
-            Intrinsic::getDeclaration(F->getParent(),
-                                      Intrinsic::stackprotector))
-          HasPrologue = true;
+  HasPrologue = findStackProtectorIntrinsic(*F);
 
   if (F->hasFnAttribute(Attribute::SafeStack))
     return false;
@@ -379,7 +377,8 @@ bool StackProtector::InsertStackProtectors() {
   // protection in SDAG.
   bool SupportsSelectionDAGSP =
       TLI->useStackGuardXorFP() ||
-      (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+      (EnableSelectionDAGSP && !TM->Options.EnableFastISel &&
+       !TM->Options.EnableGlobalISel);
   AllocaInst *AI = nullptr;       // Place on stack that stores the stack guard.
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
@@ -399,6 +398,14 @@ bool StackProtector::InsertStackProtectors() {
     if (SupportsSelectionDAGSP)
       break;
 
+    // Find the stack guard slot if the prologue was not created by this pass
+    // itself via a previous call to CreatePrologue().
+    if (!AI) {
+      const CallInst *SPCall = findStackProtectorIntrinsic(*F);
+      assert(SPCall && "Call to llvm.stackprotector is missing");
+      AI = cast<AllocaInst>(SPCall->getArgOperand(1));
+    }
+
     // Set HasIRCheck to true, so that SelectionDAG will not generate its own
     // version. SelectionDAG called 'shouldEmitSDCheck' to check whether
     // instrumentation has already been generated.
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index eb15b15a24a6..d8c6a249e4da 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -214,7 +214,7 @@ void StackSlotColoring::InitializeSlots() {
   Intervals.reserve(LS->getNumIntervals());
   for (auto &I : *LS)
     Intervals.push_back(&I);
-  llvm::sort(Intervals.begin(), Intervals.end(),
+  llvm::sort(Intervals,
              [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
 
   // Gather all spill slots into a list.
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f0cfa2fbe4fd..cf78fb5a1f12 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -30,12 +30,6 @@ using namespace llvm;
 
 TargetFrameLowering::~TargetFrameLowering() = default;
 
-/// The default implementation just looks at attribute "no-frame-pointer-elim".
-bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
-  auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim");
-  return Attr.getValueAsString() == "true";
-}
-
 bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
   assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
          MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 963f8178b509..2a17af391105 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -339,42 +339,32 @@ bool TargetInstrInfo::PredicateInstruction(
   return MadeChange;
 }
 
-bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
-                                           const MachineMemOperand *&MMO,
-                                           int &FrameIndex) const {
+bool TargetInstrInfo::hasLoadFromStackSlot(
+    const MachineInstr &MI,
+    SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+  size_t StartSize = Accesses.size();
   for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
                                   oe = MI.memoperands_end();
        o != oe; ++o) {
-    if ((*o)->isLoad()) {
-      if (const FixedStackPseudoSourceValue *Value =
-          dyn_cast_or_null<FixedStackPseudoSourceValue>(
-              (*o)->getPseudoValue())) {
-        FrameIndex = Value->getFrameIndex();
-        MMO = *o;
-        return true;
-      }
-    }
+    if ((*o)->isLoad() &&
+        dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+      Accesses.push_back(*o);
   }
-  return false;
+  return Accesses.size() != StartSize;
 }
 
-bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
-                                          const MachineMemOperand *&MMO,
-                                          int &FrameIndex) const {
+bool TargetInstrInfo::hasStoreToStackSlot(
+    const MachineInstr &MI,
+    SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
+  size_t StartSize = Accesses.size();
   for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
                                   oe = MI.memoperands_end();
        o != oe; ++o) {
-    if ((*o)->isStore()) {
-      if (const FixedStackPseudoSourceValue *Value =
-          dyn_cast_or_null<FixedStackPseudoSourceValue>(
-              (*o)->getPseudoValue())) {
-        FrameIndex = Value->getFrameIndex();
-        MMO = *o;
-        return true;
-      }
-    }
+    if ((*o)->isStore() &&
+        dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+      Accesses.push_back(*o);
   }
-  return false;
+  return Accesses.size() != StartSize;
 }
 
 bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
@@ -388,8 +378,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
     return true;
   }
   unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
-  // Convert bit size to byte size to be consistent with
-  // MCRegisterClass::getSize().
+  // Convert bit size to byte size.
   if (BitSize % 8)
     return false;
 
@@ -584,7 +573,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   }
 
   if (NewMI) {
-    NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    NewMI->setMemRefs(MF, MI.memoperands());
     // Add a memory operand, foldMemoryOperandImpl doesn't do that.
     assert((!(Flags & MachineMemOperand::MOStore) ||
             NewMI->mayStore()) &&
@@ -654,10 +643,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
 
   // Copy the memoperands from the load to the folded instruction.
   if (MI.memoperands_empty()) {
-    NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
+    NewMI->setMemRefs(MF, LoadMI.memoperands());
   } else {
     // Handle the rare case of folding multiple loads.
-    NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    NewMI->setMemRefs(MF, MI.memoperands());
     for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
                                     E = LoadMI.memoperands_end();
          I != E; ++I) {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7b1b76821daa..e86190375642 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -161,7 +161,8 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
   }
 
-  if (TT.isGNUEnvironment() || TT.isOSFuchsia()) {
+  if (TT.isGNUEnvironment() || TT.isOSFuchsia() ||
+      (TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
     setLibcallName(RTLIB::SINCOS_F64, "sincos");
     setLibcallName(RTLIB::SINCOS_F80, "sincosl");
@@ -599,14 +600,23 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
     setOperationAction(ISD::FMINNUM, VT, Expand);
     setOperationAction(ISD::FMAXNUM, VT, Expand);
-    setOperationAction(ISD::FMINNAN, VT, Expand);
-    setOperationAction(ISD::FMAXNAN, VT, Expand);
+    setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
+    setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
+    setOperationAction(ISD::FMINIMUM, VT, Expand);
+    setOperationAction(ISD::FMAXIMUM, VT, Expand);
     setOperationAction(ISD::FMAD, VT, Expand);
     setOperationAction(ISD::SMIN, VT, Expand);
     setOperationAction(ISD::SMAX, VT, Expand);
     setOperationAction(ISD::UMIN, VT, Expand);
     setOperationAction(ISD::UMAX, VT, Expand);
     setOperationAction(ISD::ABS, VT, Expand);
+    setOperationAction(ISD::FSHL, VT, Expand);
+    setOperationAction(ISD::FSHR, VT, Expand);
+    setOperationAction(ISD::SADDSAT, VT, Expand);
+    setOperationAction(ISD::UADDSAT, VT, Expand);
+    setOperationAction(ISD::SSUBSAT, VT, Expand);
+    setOperationAction(ISD::USUBSAT, VT, Expand);
+    setOperationAction(ISD::SMULFIX, VT, Expand);
 
     // Overflow operations default to expand
     setOperationAction(ISD::SADDO, VT, Expand);
@@ -666,6 +676,7 @@ void TargetLoweringBase::initActions() {
 
   // These library functions default to expand.
   for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+    setOperationAction(ISD::FCBRT,      VT, Expand);
     setOperationAction(ISD::FLOG ,      VT, Expand);
     setOperationAction(ISD::FLOG2,      VT, Expand);
     setOperationAction(ISD::FLOG10,     VT, Expand);
@@ -968,7 +979,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
       MIB.add(MI->getOperand(i));
 
     // Inherit previous memory operands.
-    MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+    MIB.cloneMemRefs(*MI);
     assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
 
     // Add a new memory operand for this FI.
@@ -1096,7 +1107,7 @@ void TargetLoweringBase::computeRegisterProperties(
       LegalIntReg = IntReg;
     } else {
       RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
-        (const MVT::SimpleValueType)LegalIntReg;
+        (MVT::SimpleValueType)LegalIntReg;
       ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
     }
   }
@@ -1443,6 +1454,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
   case CatchPad:       return 0;
   case CatchSwitch:    return 0;
   case CleanupPad:     return 0;
+  case FNeg:           return ISD::FNEG;
   case Add:            return ISD::ADD;
   case FAdd:           return ISD::FADD;
   case Sub:            return ISD::SUB;
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 16140f0b12be..cb2fe691d702 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -95,6 +95,161 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                                              const TargetMachine &TgtM) {
   TargetLoweringObjectFile::Initialize(Ctx, TgtM);
   TM = &TgtM;
+
+  CodeModel::Model CM = TgtM.getCodeModel();
+
+  switch (TgtM.getTargetTriple().getArch()) {
+  case Triple::arm:
+  case Triple::armeb:
+  case Triple::thumb:
+  case Triple::thumbeb:
+    if (Ctx.getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
+      break;
+    // Fallthrough if not using EHABI
+    LLVM_FALLTHROUGH;
+  case Triple::ppc:
+  case Triple::x86:
+    PersonalityEncoding = isPositionIndependent()
+                              ? dwarf::DW_EH_PE_indirect |
+                                    dwarf::DW_EH_PE_pcrel |
+                                    dwarf::DW_EH_PE_sdata4
+                              : dwarf::DW_EH_PE_absptr;
+    LSDAEncoding = isPositionIndependent()
+                       ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+                       : dwarf::DW_EH_PE_absptr;
+    TTypeEncoding = isPositionIndependent()
+                        ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                              dwarf::DW_EH_PE_sdata4
+                        : dwarf::DW_EH_PE_absptr;
+    break;
+  case Triple::x86_64:
+    if (isPositionIndependent()) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        ((CM == CodeModel::Small || CM == CodeModel::Medium)
+         ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+        (CM == CodeModel::Small
+         ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        ((CM == CodeModel::Small || CM == CodeModel::Medium)
+         ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
+    } else {
+      PersonalityEncoding =
+        (CM == CodeModel::Small || CM == CodeModel::Medium)
+        ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = (CM == CodeModel::Small)
+        ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = (CM == CodeModel::Small)
+        ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
+    }
+    break;
+  case Triple::hexagon:
+    PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+    LSDAEncoding = dwarf::DW_EH_PE_absptr;
+    TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    if (isPositionIndependent()) {
+      PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+      LSDAEncoding |= dwarf::DW_EH_PE_pcrel;
+      TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
+    }
+    break;
+  case Triple::aarch64:
+  case Triple::aarch64_be:
+    // The small model guarantees static code/data size < 4GB, but not where it
+    // will be in memory. Most of these could end up >2GB away so even a signed
+    // pc-relative 32-bit address is insufficient, theoretically.
+    if (isPositionIndependent()) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
+    break;
+  case Triple::lanai:
+    LSDAEncoding = dwarf::DW_EH_PE_absptr;
+    PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+    TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    break;
+  case Triple::mips:
+  case Triple::mipsel:
+  case Triple::mips64:
+  case Triple::mips64el:
+    // MIPS uses indirect pointer to refer personality functions and types, so
+    // that the eh_frame section can be read-only. DW.ref.personality will be
+    // generated for relocation.
+    PersonalityEncoding = dwarf::DW_EH_PE_indirect;
+    // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
+    //        identify N64 from just a triple.
+    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                    dwarf::DW_EH_PE_sdata4;
+    // We don't support PC-relative LSDA references in GAS so we use the default
+    // DW_EH_PE_absptr for those.
+
+    // FreeBSD must be explicit about the data size and using pcrel since it's
+    // assembler/linker won't do the automatic conversion that the Linux tools
+    // do.
+    if (TgtM.getTargetTriple().isOSFreeBSD()) {
+      PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    }
+    break;
+  case Triple::ppc64:
+  case Triple::ppc64le:
+    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+      dwarf::DW_EH_PE_udata8;
+    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+      dwarf::DW_EH_PE_udata8;
+    break;
+  case Triple::sparcel:
+  case Triple::sparc:
+    if (isPositionIndependent()) {
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+    } else {
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
+    break;
+  case Triple::sparcv9:
+    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    if (isPositionIndependent()) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
+    break;
+  case Triple::systemz:
+    // All currently-defined code models guarantee that 4-byte PC-relative
+    // values will be in range.
+    if (isPositionIndependent()) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
+    break;
+  default:
+    break;
+  }
 }
 
 void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
@@ -351,6 +506,30 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
   return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
 }
 
+static unsigned getEntrySizeForKind(SectionKind Kind) {
+  if (Kind.isMergeable1ByteCString())
+    return 1;
+  else if (Kind.isMergeable2ByteCString())
+    return 2;
+  else if (Kind.isMergeable4ByteCString())
+    return 4;
+  else if (Kind.isMergeableConst4())
+    return 4;
+  else if (Kind.isMergeableConst8())
+    return 8;
+  else if (Kind.isMergeableConst16())
+    return 16;
+  else if (Kind.isMergeableConst32())
+    return 32;
+  else {
+    // We shouldn't have mergeable C strings or mergeable constants that we
+    // didn't handle above.
+    assert(!Kind.isMergeableCString() && "unknown string width");
+    assert(!Kind.isMergeableConst() && "unknown data width");
+    return 0;
+  }
+}
+
 MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   StringRef SectionName = GO->getSection();
@@ -395,7 +574,7 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
 
   MCSectionELF *Section = getContext().getELFSection(
       SectionName, getELFSectionType(SectionName, Kind), Flags,
-      /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
+      getEntrySizeForKind(Kind), Group, UniqueID, AssociatedSymbol);
   // Make sure that we did not get some other section with incompatible sh_link.
   // This should not be possible due to UniqueID code above.
   assert(Section->getAssociatedSymbol() == AssociatedSymbol &&
@@ -422,30 +601,6 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
   return ".data.rel.ro";
 }
 
-static unsigned getEntrySizeForKind(SectionKind Kind) {
-  if (Kind.isMergeable1ByteCString())
-    return 1;
-  else if (Kind.isMergeable2ByteCString())
-    return 2;
-  else if (Kind.isMergeable4ByteCString())
-    return 4;
-  else if (Kind.isMergeableConst4())
-    return 4;
-  else if (Kind.isMergeableConst8())
-    return 8;
-  else if (Kind.isMergeableConst16())
-    return 16;
-  else if (Kind.isMergeableConst32())
-    return 32;
-  else {
-    // We shouldn't have mergeable C strings or mergeable constants that we
-    // didn't handle above.
-    assert(!Kind.isMergeableCString() && "unknown string width");
-    assert(!Kind.isMergeableConst() && "unknown data width");
-    return 0;
-  }
-}
-
 static MCSectionELF *selectELFSectionForGlobal(
     MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
     const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
@@ -640,6 +795,14 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
       MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
 }
 
+MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
+  // Use ".GCC.command.line" since this feature is to support clang's
+  // -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
+  // same name.
+  return getContext().getELFSection(".GCC.command.line", ELF::SHT_PROGBITS,
+                                    ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+}
+
 void
 TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
   UseInitArray = UseInitArray_;
@@ -684,6 +847,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                             MachO::S_MOD_TERM_FUNC_POINTERS,
                                             SectionKind::getData());
   }
+
+  PersonalityEncoding =
+      dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+  LSDAEncoding = dwarf::DW_EH_PE_pcrel;
+  TTypeEncoding =
+      dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
 }
 
 void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
@@ -939,6 +1108,22 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
   //       .indirect_symbol        _extfoo
   //       .long   0
   //
+  // The indirect symbol table (and sections of non_lazy_symbol_pointers type)
+  // may point to both local (same translation unit) and global (other
+  // translation units) symbols. Example:
+  //
+  // .section __DATA,__pointers,non_lazy_symbol_pointers
+  // L1:
+  //    .indirect_symbol _myGlobal
+  //    .long 0
+  // L2:
+  //    .indirect_symbol _myLocal
+  //    .long _myLocal
+  //
+  // If the symbol is local, instead of the symbol's index, the assembler
+  // places the constant INDIRECT_SYMBOL_LOCAL into the indirect symbol table.
+  // Then the linker will notice the constant in the table and will look at the
+  // content of the symbol.
   MachineModuleInfoMachO &MachOMMI =
     MMI->getObjFileInfo<MachineModuleInfoMachO>();
   MCContext &Ctx = getContext();
@@ -958,9 +1143,12 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
   MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
 
   MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
-  if (!StubSym.getPointer())
-    StubSym = MachineModuleInfoImpl::
-      StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+  if (!StubSym.getPointer()) {
+    bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal();
+    // With the assumption that IsIndirectLocal == GV->hasLocalLinkage().
+    StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym),
+                                                 !IsIndirectLocal);
+  }
 
   const MCExpr *BSymExpr =
     MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
@@ -1296,8 +1484,25 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
                                                    unsigned Priority,
                                                    const MCSymbol *KeySym,
                                                    MCSectionCOFF *Default) {
-  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment())
-    return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+    // If the priority is the default, use .CRT$XCU, possibly associative.
+    if (Priority == 65535)
+      return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+
+    // Otherwise, we need to compute a new section name. Low priorities should
+    // run earlier. The linker will sort sections ASCII-betically, and we need a
+    // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we
+    // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really
+    // low priorities need to sort before 'L', since the CRT uses that
+    // internally, so we use ".CRT$XCA00001" for them.
+    SmallString<24> Name;
+    raw_svector_ostream OS(Name);
+    OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
+    MCSectionCOFF *Sec = Ctx.getCOFFSection(
+        Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+        SectionKind::getReadOnly());
+    return Ctx.getAssociativeCOFFSection(Sec, KeySym, 0);
+  }
 
   std::string Name = IsCtor ? ".ctors" : ".dtors";
   if (Priority != 65535)
@@ -1571,6 +1776,10 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
 void TargetLoweringObjectFileWasm::InitializeWasm() {
   StaticCtorSection =
       getContext().getWasmSection(".init_array", SectionKind::getData());
+
+  // We don't use PersonalityEncoding and LSDAEncoding because we don't emit
+  // .cfi directives. We use TTypeEncoding to encode typeinfo global variables.
+  TTypeEncoding = dwarf::DW_EH_PE_absptr;
 }
 
 MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 853e71d0efa5..3c133fb8594e 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -23,15 +23,34 @@ using namespace llvm;
 /// DisableFramePointerElim - This returns true if frame pointer elimination
 /// optimization should be disabled for the given machine function.
 bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
-  // Check to see if we should eliminate all frame pointers.
-  if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF))
+  // Check to see if the target want to forcably keep frame pointer.
+  if (MF.getSubtarget().getFrameLowering()->keepFramePointer(MF))
     return true;
 
-  // Check to see if we should eliminate non-leaf frame pointers.
-  if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf"))
-    return MF.getFrameInfo().hasCalls();
+  const Function &F = MF.getFunction();
+
+  // TODO: Remove support for old `fp elim` function attributes after fully
+  //       migrate to use "frame-pointer"
+  if (!F.hasFnAttribute("frame-pointer")) {
+    // Check to see if we should eliminate all frame pointers.
+    if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true")
+      return true;
+
+    // Check to see if we should eliminate non-leaf frame pointers.
+    if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+      return MF.getFrameInfo().hasCalls();
 
-  return false;
+    return false;
+  }
+
+  StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
+  if (FP == "all")
+    return true;
+  if (FP == "non-leaf")
+    return MF.getFrameInfo().hasCalls();
+  if (FP == "none")
+    return false;
+  llvm_unreachable("unknown frame pointer flag");
 }
 
 /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index 2db03288f2ac..28126fcf766d 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
@@ -107,10 +108,10 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
     cl::desc("Print LLVM IR input to isel pass"));
 static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
     cl::desc("Dump garbage collector data"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
-    cl::desc("Verify generated machine code"),
-    cl::init(false),
-    cl::ZeroOrMore);
+static cl::opt<cl::boolOrDefault>
+    VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+                      cl::desc("Verify generated machine code"),
+                      cl::ZeroOrMore);
 enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
 // Enable or disable the MachineOutliner.
 static cl::opt<RunOutliner> EnableMachineOutliner(
@@ -136,13 +137,15 @@ static cl::opt<std::string> PrintMachineInstrs(
     "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
     cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
 
-static cl::opt<int> EnableGlobalISelAbort(
+static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
     "global-isel-abort", cl::Hidden,
     cl::desc("Enable abort calls when \"global\" instruction selection "
-             "fails to lower/select an instruction: 0 disable the abort, "
-             "1 enable the abort, and "
-             "2 disable the abort but emit a diagnostic on failure"),
-    cl::init(1));
+             "fails to lower/select an instruction"),
+    cl::values(
+        clEnumValN(GlobalISelAbortMode::Disable, "0", "Disable the abort"),
+        clEnumValN(GlobalISelAbortMode::Enable, "1", "Enable the abort"),
+        clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
+                   "Disable the abort but emit a diagnostic on failure")));
 
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
@@ -342,11 +345,39 @@ static AnalysisID getPassIDFromName(StringRef PassName) {
   return PI ? PI->getTypeInfo() : nullptr;
 }
 
+static std::pair<StringRef, unsigned>
+getPassNameAndInstanceNum(StringRef PassName) {
+  StringRef Name, InstanceNumStr;
+  std::tie(Name, InstanceNumStr) = PassName.split(',');
+
+  unsigned InstanceNum = 0;
+  if (!InstanceNumStr.empty() && InstanceNumStr.getAsInteger(10, InstanceNum))
+    report_fatal_error("invalid pass instance specifier " + PassName);
+
+  return std::make_pair(Name, InstanceNum);
+}
+
 void TargetPassConfig::setStartStopPasses() {
-  StartBefore = getPassIDFromName(StartBeforeOpt);
-  StartAfter = getPassIDFromName(StartAfterOpt);
-  StopBefore = getPassIDFromName(StopBeforeOpt);
-  StopAfter = getPassIDFromName(StopAfterOpt);
+  StringRef StartBeforeName;
+  std::tie(StartBeforeName, StartBeforeInstanceNum) =
+    getPassNameAndInstanceNum(StartBeforeOpt);
+
+  StringRef StartAfterName;
+  std::tie(StartAfterName, StartAfterInstanceNum) =
+    getPassNameAndInstanceNum(StartAfterOpt);
+
+  StringRef StopBeforeName;
+  std::tie(StopBeforeName, StopBeforeInstanceNum)
+    = getPassNameAndInstanceNum(StopBeforeOpt);
+
+  StringRef StopAfterName;
+  std::tie(StopAfterName, StopAfterInstanceNum)
+    = getPassNameAndInstanceNum(StopAfterOpt);
+
+  StartBefore = getPassIDFromName(StartBeforeName);
+  StartAfter = getPassIDFromName(StartAfterName);
+  StopBefore = getPassIDFromName(StopBeforeName);
+  StopAfter = getPassIDFromName(StopAfterName);
   if (StartBefore && StartAfter)
     report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
                        Twine(StartAfterOptName) + Twine(" specified!"));
@@ -383,6 +414,9 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
   if (TM.Options.EnableIPRA)
     setRequiresCodeGenSCCOrder();
 
+  if (EnableGlobalISelAbort.getNumOccurrences())
+    TM.Options.GlobalISelAbort = EnableGlobalISelAbort;
+
   setStartStopPasses();
 }
 
@@ -418,8 +452,13 @@ TargetPassConfig::TargetPassConfig()
                      "triple set?");
 }
 
-bool TargetPassConfig::hasLimitedCodeGenPipeline() const {
-  return StartBefore || StartAfter || StopBefore || StopAfter;
+bool TargetPassConfig::willCompleteCodeGenPipeline() {
+  return StopBeforeOpt.empty() && StopAfterOpt.empty();
+}
+
+bool TargetPassConfig::hasLimitedCodeGenPipeline() {
+  return !StartBeforeOpt.empty() || !StartAfterOpt.empty() ||
+         !willCompleteCodeGenPipeline();
 }
 
 std::string
@@ -482,9 +521,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
   // and shouldn't reference it.
   AnalysisID PassID = P->getPassID();
 
-  if (StartBefore == PassID)
+  if (StartBefore == PassID && StartBeforeCount++ == StartBeforeInstanceNum)
     Started = true;
-  if (StopBefore == PassID)
+  if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
     Stopped = true;
   if (Started && !Stopped) {
     std::string Banner;
@@ -507,9 +546,11 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
   } else {
     delete P;
   }
-  if (StopAfter == PassID)
+
+  if (StopAfter == PassID && StopAfterCount++ == StopAfterInstanceNum)
     Stopped = true;
-  if (StartAfter == PassID)
+
+  if (StartAfter == PassID && StartAfterCount++ == StartAfterInstanceNum)
     Started = true;
   if (Stopped && !Started)
     report_fatal_error("Cannot stop compilation after pass that is not run");
@@ -552,7 +593,7 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) {
 }
 
 void TargetPassConfig::addVerifyPass(const std::string &Banner) {
-  bool Verify = VerifyMachineCode;
+  bool Verify = VerifyMachineCode == cl::BOU_TRUE;
 #ifdef EXPENSIVE_CHECKS
   if (VerifyMachineCode == cl::BOU_UNSET)
     Verify = TM->isMachineVerifierClean();
@@ -714,18 +755,34 @@ void TargetPassConfig::addISelPrepare() {
 bool TargetPassConfig::addCoreISelPasses() {
   // Enable FastISel with -fast-isel, but allow that to be overridden.
   TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
-  if (EnableFastISelOption == cl::BOU_TRUE ||
-      (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))
-    TM->setFastISel(true);
 
-  // Ask the target for an instruction selector.
-  // Explicitly enabling fast-isel should override implicitly enabled
-  // global-isel.
-  if (EnableGlobalISelOption == cl::BOU_TRUE ||
-      (EnableGlobalISelOption == cl::BOU_UNSET &&
-       TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) {
+  // Determine an instruction selector.
+  enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
+  SelectorType Selector;
+
+  if (EnableFastISelOption == cl::BOU_TRUE)
+    Selector = SelectorType::FastISel;
+  else if (EnableGlobalISelOption == cl::BOU_TRUE ||
+           (TM->Options.EnableGlobalISel &&
+            EnableGlobalISelOption != cl::BOU_FALSE))
+    Selector = SelectorType::GlobalISel;
+  else if (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())
+    Selector = SelectorType::FastISel;
+  else
+    Selector = SelectorType::SelectionDAG;
+
+  // Set consistently TM->Options.EnableFastISel and EnableGlobalISel.
+  if (Selector == SelectorType::FastISel) {
+    TM->setFastISel(true);
+    TM->setGlobalISel(false);
+  } else if (Selector == SelectorType::GlobalISel) {
     TM->setFastISel(false);
+    TM->setGlobalISel(true);
+  }
 
+  // Add instruction selector passes.
+  if (Selector == SelectorType::GlobalISel) {
+    SaveAndRestore<bool> SavedAddingMachinePasses(AddingMachinePasses, true);
     if (addIRTranslator())
       return true;
 
@@ -804,15 +861,17 @@ void TargetPassConfig::addMachinePasses() {
   AddingMachinePasses = true;
 
   // Insert a machine instr printer pass after the specified pass.
-  if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
-      !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
-    const PassRegistry *PR = PassRegistry::getPassRegistry();
-    const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
-    const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
-    assert (TPI && IPI && "Pass ID not registered!");
-    const char *TID = (const char *)(TPI->getTypeInfo());
-    const char *IID = (const char *)(IPI->getTypeInfo());
-    insertPass(TID, IID);
+  StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue();
+  if (!PrintMachineInstrsPassName.equals("") &&
+      !PrintMachineInstrsPassName.equals("option-unspecified")) {
+    if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) {
+      const PassRegistry *PR = PassRegistry::getPassRegistry();
+      const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
+      assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!");
+      const char *TID = (const char *)(TPI->getTypeInfo());
+      const char *IID = (const char *)(IPI->getTypeInfo());
+      insertPass(TID, IID);
+    }
   }
 
   // Print the instruction selected machine code...
@@ -981,7 +1040,8 @@ bool TargetPassConfig::getOptimizeRegAlloc() const {
 }
 
 /// RegisterRegAlloc's global Registry tracks allocator registration.
-MachinePassRegistry RegisterRegAlloc::Registry;
+MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
+    RegisterRegAlloc::Registry;
 
 /// A dummy default pass factory indicates whether the register allocator is
 /// overridden on the command line.
@@ -1155,14 +1215,9 @@ void TargetPassConfig::addBlockPlacement() {
 /// GlobalISel Configuration
 //===---------------------------------------------------------------------===//
 bool TargetPassConfig::isGlobalISelAbortEnabled() const {
-  if (EnableGlobalISelAbort.getNumOccurrences() > 0)
-    return EnableGlobalISelAbort == 1;
-
-  // When no abort behaviour is specified, we don't abort if the target says
-  // that GISel is enabled.
-  return !TM->Options.EnableGlobalISel;
+  return TM->Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
 }
 
 bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
-  return EnableGlobalISelAbort == 2;
+  return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
 }
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0ca435016ead..4b72f6a84ca1 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -592,17 +592,17 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
   // the two-address register.
   // e.g.
   // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
-  // %reg1029 = MOV8rr %reg1028
+  // %reg1029 = COPY %reg1028
   // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
-  // insert => %reg1030 = MOV8rr %reg1028
+  // insert => %reg1030 = COPY %reg1028
   // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
-  // In this case, it might not be possible to coalesce the second MOV8rr
+  // In this case, it might not be possible to coalesce the second COPY
   // instruction if the first one is coalesced. So it would be profitable to
   // commute it:
   // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
-  // %reg1029 = MOV8rr %reg1028
+  // %reg1029 = COPY %reg1028
   // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
-  // insert => %reg1030 = MOV8rr %reg1029
+  // insert => %reg1030 = COPY %reg1029
   // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
 
   if (!isPlainlyKilled(MI, regC, LIS))
@@ -929,9 +929,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
   MachineBasicBlock::iterator Begin = MI;
   MachineBasicBlock::iterator AfterMI = std::next(Begin);
   MachineBasicBlock::iterator End = AfterMI;
-  while (End->isCopy() &&
-         regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
-    Defs.push_back(End->getOperand(0).getReg());
+  while (End != MBB->end()) {
+    End = skipDebugInstructionsForward(End, MBB->end());
+    if (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI))
+      Defs.push_back(End->getOperand(0).getReg());
+    else
+      break;
     ++End;
   }
 
@@ -1608,23 +1611,28 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
   }
 
   if (AllUsesCopied) {
+    bool ReplacedAllUntiedUses = true;
     if (!IsEarlyClobber) {
       // Replace other (un-tied) uses of regB with LastCopiedReg.
       for (MachineOperand &MO : MI->operands()) {
-        if (MO.isReg() && MO.getReg() == RegB &&
-            MO.isUse()) {
-          if (MO.isKill()) {
-            MO.setIsKill(false);
-            RemovedKillFlag = true;
+        if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+          if (MO.getSubReg() == SubRegB) {
+            if (MO.isKill()) {
+              MO.setIsKill(false);
+              RemovedKillFlag = true;
+            }
+            MO.setReg(LastCopiedReg);
+            MO.setSubReg(0);
+          } else {
+            ReplacedAllUntiedUses = false;
           }
-          MO.setReg(LastCopiedReg);
-          MO.setSubReg(MO.getSubReg());
         }
       }
     }
 
     // Update live variables for regB.
-    if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+    if (RemovedKillFlag && ReplacedAllUntiedUses &&
+        LV && LV->getVarInfo(RegB).removeKill(*MI)) {
       MachineBasicBlock::iterator PrevMI = MI;
       --PrevMI;
       LV->addVirtualRegisterKilled(RegB, *PrevMI);
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 0ead2b8340ab..ed7bef667e77 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -525,7 +525,7 @@ void VirtRegRewriter::rewrite() {
         // Preserve semantics of sub-register operands.
         unsigned SubReg = MO.getSubReg();
         if (SubReg != 0) {
-          if (NoSubRegLiveness) {
+          if (NoSubRegLiveness || !MRI->shouldTrackSubRegLiveness(VirtReg)) {
             // A virtual register kill refers to the whole register, so we may
             // have to add implicit killed operands for the super-register.  A
             // partial redef always kills and redefines the super-register.
diff --git a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 83d04da5dd0c..e5002eb95346 100644
--- a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -137,6 +137,7 @@ class WasmEHPrepare : public FunctionPass {
   Value *LSDAField = nullptr;      // lsda field
   Value *SelectorField = nullptr;  // selector
 
+  Function *ThrowF = nullptr;           // wasm.throw() intrinsic
   Function *CatchF = nullptr;           // wasm.catch.extract() intrinsic
   Function *LPadIndexF = nullptr;       // wasm.landingpad.index() intrinsic
   Function *LSDAF = nullptr;            // wasm.lsda() intrinsic
@@ -145,6 +146,9 @@ class WasmEHPrepare : public FunctionPass {
   Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
   Function *ClangCallTermF = nullptr;   // __clang_call_terminate() function
 
+  bool prepareEHPads(Function &F);
+  bool prepareThrows(Function &F);
+
   void prepareEHPad(BasicBlock *BB, unsigned Index);
   void prepareTerminateCleanupPad(BasicBlock *BB);
 
@@ -177,7 +181,62 @@ bool WasmEHPrepare::doInitialization(Module &M) {
   return false;
 }
 
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &BBs) {
+  SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
+  while (!WL.empty()) {
+    auto *BB = WL.pop_back_val();
+    if (pred_begin(BB) != pred_end(BB))
+      continue;
+    WL.append(succ_begin(BB), succ_end(BB));
+    DeleteDeadBlock(BB);
+  }
+}
+
 bool WasmEHPrepare::runOnFunction(Function &F) {
+  bool Changed = false;
+  Changed |= prepareThrows(F);
+  Changed |= prepareEHPads(F);
+  return Changed;
+}
+
+bool WasmEHPrepare::prepareThrows(Function &F) {
+  Module &M = *F.getParent();
+  IRBuilder<> IRB(F.getContext());
+  bool Changed = false;
+
+  // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
+  ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
+
+  // Insert an unreachable instruction after a call to @llvm.wasm.throw and
+  // delete all following instructions within the BB, and delete all the dead
+  // children of the BB as well.
+  for (User *U : ThrowF->users()) {
+    // A call to @llvm.wasm.throw() is only generated from
+    // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an
+    // InvokeInst.
+    auto *ThrowI = cast<CallInst>(U);
+    if (ThrowI->getFunction() != &F)
+      continue;
+    Changed = true;
+    auto *BB = ThrowI->getParent();
+    SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB));
+    auto &InstList = BB->getInstList();
+    InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
+    IRB.SetInsertPoint(BB);
+    IRB.CreateUnreachable();
+    eraseDeadBBsAndChildren(Succs);
+  }
+
+  return Changed;
+}
+
+bool WasmEHPrepare::prepareEHPads(Function &F) {
+  Module &M = *F.getParent();
+  IRBuilder<> IRB(F.getContext());
+
   SmallVector<BasicBlock *, 16> CatchPads;
   SmallVector<BasicBlock *, 16> CleanupPads;
   for (BasicBlock &BB : F) {
@@ -194,9 +253,6 @@ bool WasmEHPrepare::runOnFunction(Function &F) {
     return false;
   assert(F.hasPersonalityFn() && "Personality function not found");
 
-  Module &M = *F.getParent();
-  IRBuilder<> IRB(F.getContext());
-
   // __wasm_lpad_context global variable
   LPadContextGV = cast<GlobalVariable>(
       M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
@@ -300,7 +356,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
   // This is to create a map of <landingpad EH label, landingpad index> in
   // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
   // Pseudocode: wasm.landingpad.index(Index);
-  IRB.CreateCall(LPadIndexF, IRB.getInt32(Index));
+  IRB.CreateCall(LPadIndexF, {FPI, IRB.getInt32(Index)});
 
   // Pseudocode: __wasm_lpad_context.lpad_index = index;
   IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 65d0a7a774fe..6a15240fa6e0 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -218,7 +218,7 @@ static void calculateStateNumbersForInvokes(const Function *Fn,
 // to. If the unwind edge came from an invoke, return null.
 static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
                                                  Value *ParentPad) {
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   if (isa<InvokeInst>(TI))
     return nullptr;
   if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
@@ -977,7 +977,7 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
         break;
       }
 
-      TerminatorInst *TI = BB->getTerminator();
+      Instruction *TI = BB->getTerminator();
       // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
       bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
       // The token consumed by a CatchReturnInst must match the funclet token.
@@ -1074,7 +1074,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
   AllocaInst *SpillSlot = nullptr;
   Instruction *EHPad = PHIBlock->getFirstNonPHI();
 
-  if (!isa<TerminatorInst>(EHPad)) {
+  if (!EHPad->isTerminator()) {
     // If the EHPad isn't a terminator, then we can insert a load in this block
     // that will dominate all uses.
     SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
@@ -1148,8 +1148,7 @@ void WinEHPrepare::insertPHIStore(
     BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
     SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
 
-  if (PredBlock->isEHPad() &&
-      isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+  if (PredBlock->isEHPad() && PredBlock->getFirstNonPHI()->isTerminator()) {
     // Pred is unsplittable, so we need to queue it on the worklist.
     Worklist.push_back({PredBlock, PredVal});
     return;
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index 44a67743169e..cbcaa5692828 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -75,7 +75,7 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
 Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols,
                                          uint32_t InitialOffset) {
   for (auto I : Symbols) {
-    if (auto EC = visitSymbolRecord(I, InitialOffset))
+    if (auto EC = visitSymbolRecord(I, InitialOffset + Symbols.skew()))
       return EC;
     InitialOffset += I.length();
   }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
index 8de266b836b4..2a9753add311 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -14,25 +14,23 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
-namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class CodeViewErrorCategory : public std::error_category {
 public:
   const char *name() const noexcept override { return "llvm.codeview"; }
-
   std::string message(int Condition) const override {
     switch (static_cast<cv_error_code>(Condition)) {
     case cv_error_code::unspecified:
-      return "An unknown error has occurred.";
+      return "An unknown CodeView error has occurred.";
     case cv_error_code::insufficient_buffer:
       return "The buffer is not large enough to read the requested number of "
              "bytes.";
     case cv_error_code::corrupt_record:
       return "The CodeView record is corrupted.";
     case cv_error_code::no_records:
-      return "There are no records";
+      return "There are no records.";
     case cv_error_code::operation_unsupported:
       return "The requested operation is not supported.";
     case cv_error_code::unknown_member_record:
@@ -41,31 +39,10 @@ public:
     llvm_unreachable("Unrecognized cv_error_code");
   }
 };
-} // end anonymous namespace
-
-static ManagedStatic<CodeViewErrorCategory> Category;
-
-char CodeViewError::ID = 0;
-
-CodeViewError::CodeViewError(cv_error_code C) : CodeViewError(C, "") {}
 
-CodeViewError::CodeViewError(const std::string &Context)
-    : CodeViewError(cv_error_code::unspecified, Context) {}
-
-CodeViewError::CodeViewError(cv_error_code C, const std::string &Context)
-    : Code(C) {
-  ErrMsg = "CodeView Error: ";
-  std::error_code EC = convertToErrorCode();
-  if (Code != cv_error_code::unspecified)
-    ErrMsg += EC.message() + "  ";
-  if (!Context.empty())
-    ErrMsg += Context;
+static llvm::ManagedStatic<CodeViewErrorCategory> CodeViewErrCategory;
+const std::error_category &llvm::codeview::CVErrorCategory() {
+  return *CodeViewErrCategory;
 }
 
-void CodeViewError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-
-const std::string &CodeViewError::getErrorMessage() const { return ErrMsg; }
-
-std::error_code CodeViewError::convertToErrorCode() const {
-  return std::error_code(static_cast<int>(Code), *Category);
-}
+char CodeViewError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
index bf9dd7c86862..4001741f560a 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -79,7 +79,7 @@ Error DebugCrossModuleImportsSubsection::commit(
   for (const auto &M : Mappings)
     Ids.push_back(&M);
 
-  llvm::sort(Ids.begin(), Ids.end(), [this](const T &L1, const T &L2) {
+  llvm::sort(Ids, [this](const T &L1, const T &L2) {
     return Strings.getIdForString(L1->getKey()) <
            Strings.getIdForString(L2->getKey());
   });
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
index fd558aa9cc8a..5881bf177a55 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -14,8 +14,11 @@ using namespace llvm;
 using namespace llvm::codeview;
 
 Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) {
-  if (auto EC = Reader.readObject(RelocPtr))
-    return EC;
+  if (Reader.bytesRemaining() % sizeof(FrameData) != 0) {
+    if (auto EC = Reader.readObject(RelocPtr))
+      return EC;
+  }
+
   if (Reader.bytesRemaining() % sizeof(FrameData) != 0)
     return make_error<CodeViewError>(cv_error_code::corrupt_record,
                                      "Invalid frame data record format!");
@@ -26,15 +29,30 @@ Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) {
   return Error::success();
 }
 
+Error DebugFrameDataSubsectionRef::initialize(BinaryStreamRef Section) {
+  BinaryStreamReader Reader(Section);
+  return initialize(Reader);
+}
+
 uint32_t DebugFrameDataSubsection::calculateSerializedSize() const {
-  return 4 + sizeof(FrameData) * Frames.size();
+  uint32_t Size = sizeof(FrameData) * Frames.size();
+  if (IncludeRelocPtr)
+    Size += sizeof(uint32_t);
+  return Size;
 }
 
 Error DebugFrameDataSubsection::commit(BinaryStreamWriter &Writer) const {
-  if (auto EC = Writer.writeInteger<uint32_t>(0))
-    return EC;
-
-  if (auto EC = Writer.writeArray(makeArrayRef(Frames)))
+  if (IncludeRelocPtr) {
+    if (auto EC = Writer.writeInteger<uint32_t>(0))
+      return EC;
+  }
+
+  std::vector<FrameData> SortedFrames(Frames.begin(), Frames.end());
+  std::sort(SortedFrames.begin(), SortedFrames.end(),
+            [](const FrameData &LHS, const FrameData &RHS) {
+              return LHS.RvaStart < RHS.RvaStart;
+            });
+  if (auto EC = Writer.writeArray(makeArrayRef(SortedFrames)))
     return EC;
   return Error::success();
 }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index d2acc9a21003..9b251f5931b3 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -91,7 +91,7 @@ std::vector<uint32_t> DebugStringTableSubsection::sortedIds() const {
   Result.reserve(IdToString.size());
   for (const auto &Entry : IdToString)
     Result.push_back(Entry.first);
-  llvm::sort(Result.begin(), Result.end());
+  llvm::sort(Result);
   return Result;
 }
 
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index d8301cab1657..ef4e42f79ebc 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -200,6 +200,8 @@ static const EnumEntry<uint32_t> FrameProcSymFlagNames[] = {
     CV_ENUM_CLASS_ENT(FrameProcedureOptions, Inlined),
     CV_ENUM_CLASS_ENT(FrameProcedureOptions, StrictSecurityChecks),
     CV_ENUM_CLASS_ENT(FrameProcedureOptions, SafeBuffers),
+    CV_ENUM_CLASS_ENT(FrameProcedureOptions, EncodedLocalBasePointerMask),
+    CV_ENUM_CLASS_ENT(FrameProcedureOptions, EncodedParamBasePointerMask),
     CV_ENUM_CLASS_ENT(FrameProcedureOptions, ProfileGuidedOptimization),
     CV_ENUM_CLASS_ENT(FrameProcedureOptions, ValidProfileCounts),
     CV_ENUM_CLASS_ENT(FrameProcedureOptions, OptimizedForSpeed),
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index ca8007411cad..ddcad8c631d7 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -89,6 +89,8 @@ uint32_t LazyRandomTypeCollection::getOffsetOfType(TypeIndex Index) {
 }
 
 CVType LazyRandomTypeCollection::getType(TypeIndex Index) {
+  assert(!Index.isSimple());
+
   auto EC = ensureTypeExists(Index);
   error(std::move(EC));
   assert(contains(Index));
@@ -97,6 +99,9 @@ CVType LazyRandomTypeCollection::getType(TypeIndex Index) {
 }
 
 Optional<CVType> LazyRandomTypeCollection::tryGetType(TypeIndex Index) {
+  if (Index.isSimple())
+    return None;
+
   if (auto EC = ensureTypeExists(Index)) {
     consumeError(std::move(EC));
     return None;
@@ -151,6 +156,7 @@ Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) {
 }
 
 void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) {
+  assert(!Index.isSimple());
   uint32_t MinSize = Index.toArrayIndex() + 1;
 
   if (MinSize <= capacity())
@@ -163,6 +169,7 @@ void LazyRandomTypeCollection::ensureCapacityFor(TypeIndex Index) {
 }
 
 Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
+  assert(!TI.isSimple());
   if (PartialOffsets.empty())
     return fullScanForType(TI);
 
@@ -217,6 +224,7 @@ Optional<TypeIndex> LazyRandomTypeCollection::getNext(TypeIndex Prev) {
 }
 
 Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) {
+  assert(!TI.isSimple());
   assert(PartialOffsets.empty());
 
   TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0);
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
index f8bf961f22a1..04e0bab745d3 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -32,8 +32,8 @@ namespace {
 class CVSymbolDumperImpl : public SymbolVisitorCallbacks {
 public:
   CVSymbolDumperImpl(TypeCollection &Types, SymbolDumpDelegate *ObjDelegate,
-                     ScopedPrinter &W, bool PrintRecordBytes)
-      : Types(Types), ObjDelegate(ObjDelegate), W(W),
+                     ScopedPrinter &W, CPUType CPU, bool PrintRecordBytes)
+      : Types(Types), ObjDelegate(ObjDelegate), W(W), CompilationCPUType(CPU),
         PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {}
 
 /// CVSymbolVisitor overrides.
@@ -46,6 +46,8 @@ public:
   Error visitSymbolEnd(CVSymbol &Record) override;
   Error visitUnknownSymbol(CVSymbol &Record) override;
 
+  CPUType getCompilationCPUType() const { return CompilationCPUType; }
+
 private:
   void printLocalVariableAddrRange(const LocalVariableAddrRange &Range,
                                    uint32_t RelocationOffset);
@@ -56,6 +58,9 @@ private:
   SymbolDumpDelegate *ObjDelegate;
   ScopedPrinter &W;
 
+  /// Save the machine or CPU type when dumping a compile symbols.
+  CPUType CompilationCPUType = CPUType::X64;
+
   bool PrintRecordBytes;
   bool InFunctionScope;
 };
@@ -235,6 +240,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
   W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames());
   W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames());
   W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames());
+  CompilationCPUType = Compile2.Machine;
   std::string FrontendVersion;
   {
     raw_string_ostream Out(FrontendVersion);
@@ -255,9 +261,11 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
 
 Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            Compile3Sym &Compile3) {
-  W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames());
-  W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames());
+  W.printEnum("Language", uint8_t(Compile3.getLanguage()), getSourceLanguageNames());
+  W.printFlags("Flags", uint32_t(Compile3.getFlags()),
+               getCompileSym3FlagNames());
   W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames());
+  CompilationCPUType = Compile3.Machine;
   std::string FrontendVersion;
   {
     raw_string_ostream Out(FrontendVersion);
@@ -415,6 +423,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
              FrameProc.SectionIdOfExceptionHandler);
   W.printFlags("Flags", static_cast<uint32_t>(FrameProc.Flags),
                getFrameProcSymFlagNames());
+  W.printEnum("LocalFramePtrReg",
+              uint16_t(FrameProc.getLocalFramePtrReg(CompilationCPUType)),
+              getRegisterNames());
+  W.printEnum("ParamFramePtrReg",
+              uint16_t(FrameProc.getParamFramePtrReg(CompilationCPUType)),
+              getRegisterNames());
   return Error::success();
 }
 
@@ -625,21 +639,27 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
 Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
   SymbolVisitorCallbackPipeline Pipeline;
   SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
-  CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
+  CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, CompilationCPUType,
+                            PrintRecordBytes);
 
   Pipeline.addCallbackToPipeline(Deserializer);
   Pipeline.addCallbackToPipeline(Dumper);
   CVSymbolVisitor Visitor(Pipeline);
-  return Visitor.visitSymbolRecord(Record);
+  auto Err = Visitor.visitSymbolRecord(Record);
+  CompilationCPUType = Dumper.getCompilationCPUType();
+  return Err;
 }
 
 Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
   SymbolVisitorCallbackPipeline Pipeline;
   SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
-  CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
+  CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, CompilationCPUType,
+                            PrintRecordBytes);
 
   Pipeline.addCallbackToPipeline(Deserializer);
   Pipeline.addCallbackToPipeline(Dumper);
   CVSymbolVisitor Visitor(Pipeline);
-  return Visitor.visitSymbolStream(Symbols);
+  auto Err = Visitor.visitSymbolStream(Symbols);
+  CompilationCPUType = Dumper.getCompilationCPUType();
+  return Err;
 }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
new file mode 100644
index 000000000000..01746138ad1f
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
@@ -0,0 +1,94 @@
+//===- SymbolRecordHelpers.cpp ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+template <typename RecordT> RecordT createRecord(const CVSymbol &sym) {
+  RecordT record(static_cast<SymbolRecordKind>(sym.kind()));
+  cantFail(SymbolDeserializer::deserializeAs<RecordT>(sym, record));
+  return record;
+}
+
+uint32_t llvm::codeview::getScopeEndOffset(const CVSymbol &Sym) {
+  assert(symbolOpensScope(Sym.kind()));
+  switch (Sym.kind()) {
+  case SymbolKind::S_GPROC32:
+  case SymbolKind::S_LPROC32:
+  case SymbolKind::S_GPROC32_ID:
+  case SymbolKind::S_LPROC32_ID:
+  case SymbolKind::S_LPROC32_DPC:
+  case SymbolKind::S_LPROC32_DPC_ID: {
+    ProcSym Proc = createRecord<ProcSym>(Sym);
+    return Proc.End;
+  }
+  case SymbolKind::S_BLOCK32: {
+    BlockSym Block = createRecord<BlockSym>(Sym);
+    return Block.End;
+  }
+  case SymbolKind::S_THUNK32: {
+    Thunk32Sym Thunk = createRecord<Thunk32Sym>(Sym);
+    return Thunk.End;
+  }
+  case SymbolKind::S_INLINESITE: {
+    InlineSiteSym Site = createRecord<InlineSiteSym>(Sym);
+    return Site.End;
+  }
+  default:
+    assert(false && "Unknown record type");
+    return 0;
+  }
+}
+
+uint32_t
+llvm::codeview::getScopeParentOffset(const llvm::codeview::CVSymbol &Sym) {
+  assert(symbolOpensScope(Sym.kind()));
+  switch (Sym.kind()) {
+  case SymbolKind::S_GPROC32:
+  case SymbolKind::S_LPROC32:
+  case SymbolKind::S_GPROC32_ID:
+  case SymbolKind::S_LPROC32_ID:
+  case SymbolKind::S_LPROC32_DPC:
+  case SymbolKind::S_LPROC32_DPC_ID: {
+    ProcSym Proc = createRecord<ProcSym>(Sym);
+    return Proc.Parent;
+  }
+  case SymbolKind::S_BLOCK32: {
+    BlockSym Block = createRecord<BlockSym>(Sym);
+    return Block.Parent;
+  }
+  case SymbolKind::S_THUNK32: {
+    Thunk32Sym Thunk = createRecord<Thunk32Sym>(Sym);
+    return Thunk.Parent;
+  }
+  case SymbolKind::S_INLINESITE: {
+    InlineSiteSym Site = createRecord<InlineSiteSym>(Sym);
+    return Site.Parent;
+  }
+  default:
+    assert(false && "Unknown record type");
+    return 0;
+  }
+}
+
+CVSymbolArray
+llvm::codeview::limitSymbolArrayToScope(const CVSymbolArray &Symbols,
+                                        uint32_t ScopeBegin) {
+  CVSymbol Opener = *Symbols.at(ScopeBegin);
+  assert(symbolOpensScope(Opener.kind()));
+  uint32_t EndOffset = getScopeEndOffset(Opener);
+  CVSymbol Closer = *Symbols.at(EndOffset);
+  EndOffset += Closer.RecordData.size();
+  return Symbols.substream(ScopeBegin, EndOffset);
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index e77c8e8f02f5..2af8205cebc3 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -471,3 +471,77 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
 
   return Error::success();
 }
+
+RegisterId codeview::decodeFramePtrReg(EncodedFramePtrReg EncodedReg,
+                                       CPUType CPU) {
+  assert(unsigned(EncodedReg) < 4);
+  switch (CPU) {
+  // FIXME: Add ARM and AArch64 variants here.
+  default:
+    break;
+  case CPUType::Intel8080:
+  case CPUType::Intel8086:
+  case CPUType::Intel80286:
+  case CPUType::Intel80386:
+  case CPUType::Intel80486:
+  case CPUType::Pentium:
+  case CPUType::PentiumPro:
+  case CPUType::Pentium3:
+    switch (EncodedReg) {
+    case EncodedFramePtrReg::None:     return RegisterId::NONE;
+    case EncodedFramePtrReg::StackPtr: return RegisterId::VFRAME;
+    case EncodedFramePtrReg::FramePtr: return RegisterId::EBP;
+    case EncodedFramePtrReg::BasePtr:  return RegisterId::EBX;
+    }
+    llvm_unreachable("bad encoding");
+  case CPUType::X64:
+    switch (EncodedReg) {
+    case EncodedFramePtrReg::None:     return RegisterId::NONE;
+    case EncodedFramePtrReg::StackPtr: return RegisterId::RSP;
+    case EncodedFramePtrReg::FramePtr: return RegisterId::RBP;
+    case EncodedFramePtrReg::BasePtr:  return RegisterId::R13;
+    }
+    llvm_unreachable("bad encoding");
+  }
+  return RegisterId::NONE;
+}
+
+EncodedFramePtrReg codeview::encodeFramePtrReg(RegisterId Reg, CPUType CPU) {
+  switch (CPU) {
+  // FIXME: Add ARM and AArch64 variants here.
+  default:
+    break;
+  case CPUType::Intel8080:
+  case CPUType::Intel8086:
+  case CPUType::Intel80286:
+  case CPUType::Intel80386:
+  case CPUType::Intel80486:
+  case CPUType::Pentium:
+  case CPUType::PentiumPro:
+  case CPUType::Pentium3:
+    switch (Reg) {
+    case RegisterId::VFRAME:
+      return EncodedFramePtrReg::StackPtr;
+    case RegisterId::EBP:
+      return EncodedFramePtrReg::FramePtr;
+    case RegisterId::EBX:
+      return EncodedFramePtrReg::BasePtr;
+    default:
+      break;
+    }
+    break;
+  case CPUType::X64:
+    switch (Reg) {
+    case RegisterId::RSP:
+      return EncodedFramePtrReg::StackPtr;
+    case RegisterId::RBP:
+      return EncodedFramePtrReg::FramePtr;
+    case RegisterId::R13:
+      return EncodedFramePtrReg::BasePtr;
+    default:
+      break;
+    }
+    break;
+  }
+  return EncodedFramePtrReg::None;
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 7c68c9167c98..f5d3bea43a14 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -361,7 +361,6 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) {
 
 Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) {
   printTypeIndex("PointeeType", Ptr.getReferentType());
-  W->printHex("PointerAttributes", uint32_t(Ptr.getOptions()));
   W->printEnum("PtrType", unsigned(Ptr.getPointerKind()),
                makeArrayRef(PtrKindNames));
   W->printEnum("PtrMode", unsigned(Ptr.getMode()), makeArrayRef(PtrModeNames));
@@ -371,6 +370,8 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) {
   W->printNumber("IsVolatile", Ptr.isVolatile());
   W->printNumber("IsUnaligned", Ptr.isUnaligned());
   W->printNumber("IsRestrict", Ptr.isRestrict());
+  W->printNumber("IsThisPtr&", Ptr.isLValueReferenceThisPtr());
+  W->printNumber("IsThisPtr&&", Ptr.isRValueReferenceThisPtr());
   W->printNumber("SizeOf", Ptr.getSize());
 
   if (Ptr.isPointerToMember()) {
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
index 24fe5fcb28d4..332d67470da5 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -74,6 +74,9 @@ StringRef TypeIndex::simpleTypeName(TypeIndex TI) {
   if (TI.isNoneType())
     return "<no type>";
 
+  if (TI == TypeIndex::NullptrT())
+    return "std::nullptr_t";
+
   // This is a simple type.
   for (const auto &SimpleTypeName : SimpleTypeNames) {
     if (SimpleTypeName.Kind == TI.getSimpleKind()) {
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
new file mode 100644
index 000000000000..2a66474cf5b6
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
@@ -0,0 +1,53 @@
+//===- TypeRecordHelpers.cpp ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+template <typename RecordT> static ClassOptions getUdtOptions(CVType CVT) {
+  RecordT Record;
+  if (auto EC = TypeDeserializer::deserializeAs<RecordT>(CVT, Record)) {
+    consumeError(std::move(EC));
+    return ClassOptions::None;
+  }
+  return Record.getOptions();
+}
+
+bool llvm::codeview::isUdtForwardRef(CVType CVT) {
+  ClassOptions UdtOptions = ClassOptions::None;
+  switch (CVT.kind()) {
+  case LF_STRUCTURE:
+  case LF_CLASS:
+  case LF_INTERFACE:
+    UdtOptions = getUdtOptions<ClassRecord>(std::move(CVT));
+    break;
+  case LF_ENUM:
+    UdtOptions = getUdtOptions<EnumRecord>(std::move(CVT));
+    break;
+  case LF_UNION:
+    UdtOptions = getUdtOptions<UnionRecord>(std::move(CVT));
+    break;
+  default:
+    return false;
+  }
+  return (UdtOptions & ClassOptions::ForwardReference) != ClassOptions::None;
+}
+
+TypeIndex llvm::codeview::getModifiedType(const CVType &CVT) {
+  assert(CVT.kind() == LF_MODIFIER);
+  SmallVector<TypeIndex, 1> Refs;
+  discoverTypeIndices(CVT, Refs);
+  return Refs.front();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 2e29c9d7dfa0..bae11ce6a6a1 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
@@ -63,7 +64,12 @@ class TypeStreamMerger {
 public:
   explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
       : IndexMap(SourceToDest) {
-    SourceToDest.clear();
+    // When dealing with precompiled headers objects, all data in SourceToDest
+    // belongs to the precompiled headers object, and is assumed to be already
+    // remapped to the target PDB. Any forthcoming type that will be merged in
+    // might potentially back-reference this data. We also don't want to resolve
+    // twice the types in the precompiled object.
+    CurIndex += SourceToDest.size();
   }
 
   static const TypeIndex Untranslated;
@@ -71,7 +77,7 @@ public:
   // Local hashing entry points
   Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
                          MergingTypeTableBuilder &DestTypes,
-                         const CVTypeArray &IdsAndTypes);
+                         const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S);
   Error mergeIdRecords(MergingTypeTableBuilder &Dest,
                        ArrayRef<TypeIndex> TypeSourceToDest,
                        const CVTypeArray &Ids);
@@ -82,13 +88,15 @@ public:
   Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
                          GlobalTypeTableBuilder &DestTypes,
                          const CVTypeArray &IdsAndTypes,
-                         ArrayRef<GloballyHashedType> Hashes);
+                         ArrayRef<GloballyHashedType> Hashes,
+                         Optional<uint32_t> &S);
   Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
                        ArrayRef<TypeIndex> TypeSourceToDest,
                        const CVTypeArray &Ids,
                        ArrayRef<GloballyHashedType> Hashes);
   Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
-                         ArrayRef<GloballyHashedType> Hashes);
+                         ArrayRef<GloballyHashedType> Hashes,
+                         Optional<uint32_t> &S);
 
 private:
   Error doit(const CVTypeArray &Types);
@@ -156,6 +164,8 @@ private:
     return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
   }
 
+  Expected<bool> shouldRemapType(const CVType &Type);
+
   Optional<Error> LastError;
 
   bool UseGlobalHashes = false;
@@ -185,6 +195,8 @@ private:
   /// Temporary storage that we use to copy a record's data while re-writing
   /// its type indices.
   SmallVector<uint8_t, 256> RemapStorage;
+
+  Optional<uint32_t> PCHSignature;
 };
 
 } // end anonymous namespace
@@ -261,22 +273,27 @@ Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
 
 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
                                          MergingTypeTableBuilder &DestTypes,
-                                         const CVTypeArray &IdsAndTypes) {
+                                         const CVTypeArray &IdsAndTypes,
+                                         Optional<uint32_t> &S) {
   DestIdStream = &DestIds;
   DestTypeStream = &DestTypes;
   UseGlobalHashes = false;
-  return doit(IdsAndTypes);
+  auto Err = doit(IdsAndTypes);
+  S = PCHSignature;
+  return Err;
 }
 
 // Global hashing entry points
 Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
                                          const CVTypeArray &Types,
-                                         ArrayRef<GloballyHashedType> Hashes) {
+                                         ArrayRef<GloballyHashedType> Hashes,
+                                         Optional<uint32_t> &S) {
   DestGlobalTypeStream = &Dest;
   UseGlobalHashes = true;
   GlobalHashes = Hashes;
-
-  return doit(Types);
+  auto Err = doit(Types);
+  S = PCHSignature;
+  return Err;
 }
 
 Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
@@ -294,12 +311,15 @@ Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
                                          GlobalTypeTableBuilder &DestTypes,
                                          const CVTypeArray &IdsAndTypes,
-                                         ArrayRef<GloballyHashedType> Hashes) {
+                                         ArrayRef<GloballyHashedType> Hashes,
+                                         Optional<uint32_t> &S) {
   DestGlobalIdStream = &DestIds;
   DestGlobalTypeStream = &DestTypes;
   UseGlobalHashes = true;
   GlobalHashes = Hashes;
-  return doit(IdsAndTypes);
+  auto Err = doit(IdsAndTypes);
+  S = PCHSignature;
+  return Err;
 }
 
 Error TypeStreamMerger::doit(const CVTypeArray &Types) {
@@ -326,7 +346,7 @@ Error TypeStreamMerger::doit(const CVTypeArray &Types) {
            "second pass found more bad indices");
     if (!LastError && NumBadIndices == BadIndicesRemaining) {
       return llvm::make_error<CodeViewError>(
-          cv_error_code::corrupt_record, "input type graph contains cycles");
+          cv_error_code::corrupt_record, "Input type graph contains cycles");
     }
   }
 
@@ -345,25 +365,30 @@ Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
 }
 
 Error TypeStreamMerger::remapType(const CVType &Type) {
-  auto DoSerialize =
-      [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
-    return remapIndices(Type, Storage);
-  };
+  auto R = shouldRemapType(Type);
+  if (!R)
+    return R.takeError();
 
   TypeIndex DestIdx = Untranslated;
-  if (LLVM_LIKELY(UseGlobalHashes)) {
-    GlobalTypeTableBuilder &Dest =
-        isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
-    GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
-    DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
-  } else {
-    MergingTypeTableBuilder &Dest =
-        isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
-
-    RemapStorage.resize(Type.RecordData.size());
-    ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
-    if (!Result.empty())
-      DestIdx = Dest.insertRecordBytes(Result);
+  if (*R) {
+    auto DoSerialize =
+        [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
+      return remapIndices(Type, Storage);
+    };
+    if (LLVM_LIKELY(UseGlobalHashes)) {
+      GlobalTypeTableBuilder &Dest =
+          isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
+      GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
+      DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
+    } else {
+      MergingTypeTableBuilder &Dest =
+          isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
+
+      RemapStorage.resize(Type.RecordData.size());
+      ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
+      if (!Result.empty())
+        DestIdx = Dest.insertRecordBytes(Result);
+    }
   }
   addMapping(DestIdx);
 
@@ -418,25 +443,28 @@ Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
 
 Error llvm::codeview::mergeTypeAndIdRecords(
     MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
-    SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes) {
+    SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
+    Optional<uint32_t> &PCHSignature) {
   TypeStreamMerger M(SourceToDest);
-  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes);
+  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature);
 }
 
 Error llvm::codeview::mergeTypeAndIdRecords(
     GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
     SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
-    ArrayRef<GloballyHashedType> Hashes) {
+    ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) {
   TypeStreamMerger M(SourceToDest);
-  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes);
+  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes,
+                            PCHSignature);
 }
 
 Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
                                        SmallVectorImpl<TypeIndex> &SourceToDest,
                                        const CVTypeArray &Types,
-                                       ArrayRef<GloballyHashedType> Hashes) {
+                                       ArrayRef<GloballyHashedType> Hashes,
+                                       Optional<uint32_t> &PCHSignature) {
   TypeStreamMerger M(SourceToDest);
-  return M.mergeTypeRecords(Dest, Types, Hashes);
+  return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature);
 }
 
 Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
@@ -447,3 +475,20 @@ Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
   TypeStreamMerger M(SourceToDest);
   return M.mergeIdRecords(Dest, Types, Ids, Hashes);
 }
+
+Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
+  // For object files containing precompiled types, we need to extract the
+  // signature, through EndPrecompRecord. This is done here for performance
+  // reasons, to avoid re-parsing the Types stream.
+  if (Type.kind() == LF_ENDPRECOMP) {
+    EndPrecompRecord EP;
+    if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
+                                                  EP))
+      return joinErrors(std::move(EC), errorCorruptRecord());
+    if (PCHSignature.hasValue())
+      return errorCorruptRecord();
+    PCHSignature.emplace(EP.getSignature());
+    return false;
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 4582e036f9fc..54daf34ff253 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -14,6 +14,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DJB.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ScopedPrinter.h"
@@ -45,9 +46,9 @@ llvm::Error AppleAcceleratorTable::extract() {
   uint32_t Offset = 0;
 
   // Check that we can at least read the header.
-  if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength)+4))
-    return make_error<StringError>("Section too small: cannot read header.",
-                                   inconvertibleErrorCode());
+  if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength) + 4))
+    return createStringError(errc::illegal_byte_sequence,
+                             "Section too small: cannot read header.");
 
   Hdr.Magic = AccelSection.getU32(&Offset);
   Hdr.Version = AccelSection.getU16(&Offset);
@@ -62,9 +63,9 @@ llvm::Error AppleAcceleratorTable::extract() {
   // equal to the size for an empty table and hence pointer after the section.
   if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength +
                                   Hdr.BucketCount * 4 + Hdr.HashCount * 8 - 1))
-    return make_error<StringError>(
-        "Section too small: cannot read buckets and hashes.",
-        inconvertibleErrorCode());
+    return createStringError(
+        errc::illegal_byte_sequence,
+        "Section too small: cannot read buckets and hashes.");
 
   HdrData.DIEOffsetBase = AccelSection.getU32(&Offset);
   uint32_t NumAtoms = AccelSection.getU32(&Offset);
@@ -380,8 +381,8 @@ llvm::Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
                                              uint32_t *Offset) {
   // Check that we can read the fixed-size part.
   if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1))
-    return make_error<StringError>("Section too small: cannot read header.",
-                                   inconvertibleErrorCode());
+    return createStringError(errc::illegal_byte_sequence,
+                             "Section too small: cannot read header.");
 
   UnitLength = AS.getU32(Offset);
   Version = AS.getU16(Offset);
@@ -395,9 +396,9 @@ llvm::Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
   AugmentationStringSize = alignTo(AS.getU32(Offset), 4);
 
   if (!AS.isValidOffsetForDataOfSize(*Offset, AugmentationStringSize))
-    return make_error<StringError>(
-        "Section too small: cannot read header augmentation.",
-        inconvertibleErrorCode());
+    return createStringError(
+        errc::illegal_byte_sequence,
+        "Section too small: cannot read header augmentation.");
   AugmentationString.resize(AugmentationStringSize);
   AS.getU8(Offset, reinterpret_cast<uint8_t *>(AugmentationString.data()),
            AugmentationStringSize);
@@ -439,8 +440,8 @@ DWARFDebugNames::Abbrev DWARFDebugNames::AbbrevMapInfo::getTombstoneKey() {
 Expected<DWARFDebugNames::AttributeEncoding>
 DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) {
   if (*Offset >= EntriesBase) {
-    return make_error<StringError>("Incorrectly terminated abbreviation table.",
-                                   inconvertibleErrorCode());
+    return createStringError(errc::illegal_byte_sequence,
+                             "Incorrectly terminated abbreviation table.");
   }
 
   uint32_t Index = Section.AccelSection.getULEB128(Offset);
@@ -465,8 +466,8 @@ DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) {
 Expected<DWARFDebugNames::Abbrev>
 DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) {
   if (*Offset >= EntriesBase) {
-    return make_error<StringError>("Incorrectly terminated abbreviation table.",
-                                   inconvertibleErrorCode());
+    return createStringError(errc::illegal_byte_sequence,
+                             "Incorrectly terminated abbreviation table.");
   }
 
   uint32_t Code = Section.AccelSection.getULEB128(Offset);
@@ -501,9 +502,8 @@ Error DWARFDebugNames::NameIndex::extract() {
   Offset += Hdr.NameCount * 4;
 
   if (!AS.isValidOffsetForDataOfSize(Offset, Hdr.AbbrevTableSize))
-    return make_error<StringError>(
-        "Section too small: cannot read abbreviations.",
-        inconvertibleErrorCode());
+    return createStringError(errc::illegal_byte_sequence,
+                             "Section too small: cannot read abbreviations.");
 
   EntriesBase = Offset + Hdr.AbbrevTableSize;
 
@@ -514,10 +514,9 @@ Error DWARFDebugNames::NameIndex::extract() {
     if (isSentinel(*AbbrevOr))
       return Error::success();
 
-    if (!Abbrevs.insert(std::move(*AbbrevOr)).second) {
-      return make_error<StringError>("Duplicate abbreviation code.",
-                                     inconvertibleErrorCode());
-    }
+    if (!Abbrevs.insert(std::move(*AbbrevOr)).second)
+      return createStringError(errc::invalid_argument,
+                               "Duplicate abbreviation code.");
   }
 }
 DWARFDebugNames::Entry::Entry(const NameIndex &NameIdx, const Abbrev &Abbr)
@@ -600,8 +599,8 @@ Expected<DWARFDebugNames::Entry>
 DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const {
   const DWARFDataExtractor &AS = Section.AccelSection;
   if (!AS.isValidOffset(*Offset))
-    return make_error<StringError>("Incorrectly terminated entry list.",
-                                   inconvertibleErrorCode());
+    return createStringError(errc::illegal_byte_sequence,
+                             "Incorrectly terminated entry list.");
 
   uint32_t AbbrevCode = AS.getULEB128(Offset);
   if (AbbrevCode == 0)
@@ -609,16 +608,15 @@ DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const {
 
   const auto AbbrevIt = Abbrevs.find_as(AbbrevCode);
   if (AbbrevIt == Abbrevs.end())
-    return make_error<StringError>("Invalid abbreviation.",
-                                   inconvertibleErrorCode());
+    return createStringError(errc::invalid_argument, "Invalid abbreviation.");
 
   Entry E(*this, *AbbrevIt);
 
   dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
   for (auto &Value : E.Values) {
     if (!Value.extractValue(AS, Offset, FormParams))
-      return make_error<StringError>("Error extracting index attribute values.",
-                                     inconvertibleErrorCode());
+      return createStringError(errc::io_error,
+                               "Error extracting index attribute values.");
   }
   return std::move(E);
 }
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 9d2554ff9e2e..e6620ee3dd1d 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -99,22 +99,18 @@ using ContributionCollection =
 // Collect all the contributions to the string offsets table from all units,
 // sort them by their starting offsets and remove duplicates.
 static ContributionCollection
-collectContributionData(DWARFContext::cu_iterator_range CUs,
-                        DWARFContext::tu_section_iterator_range TUSs) {
+collectContributionData(DWARFContext::unit_iterator_range Units) {
   ContributionCollection Contributions;
-  for (const auto &CU : CUs)
-    Contributions.push_back(CU->getStringOffsetsTableContribution());
-  for (const auto &TUS : TUSs)
-    for (const auto &TU : TUS)
-      Contributions.push_back(TU->getStringOffsetsTableContribution());
-
+  for (const auto &U : Units)
+    Contributions.push_back(U->getStringOffsetsTableContribution());
   // Sort the contributions so that any invalid ones are placed at
   // the start of the contributions vector. This way they are reported
   // first.
-  llvm::sort(Contributions.begin(), Contributions.end(),
+  llvm::sort(Contributions,
              [](const Optional<StrOffsetsContributionDescriptor> &L,
                 const Optional<StrOffsetsContributionDescriptor> &R) {
-               if (L && R) return L->Base < R->Base;
+               if (L && R)
+                 return L->Base < R->Base;
                return R.hasValue();
              });
 
@@ -136,9 +132,8 @@ collectContributionData(DWARFContext::cu_iterator_range CUs,
 static void dumpDWARFv5StringOffsetsSection(
     raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
     const DWARFSection &StringOffsetsSection, StringRef StringSection,
-    DWARFContext::cu_iterator_range CUs,
-    DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian) {
-  auto Contributions = collectContributionData(CUs, TUSs);
+    DWARFContext::unit_iterator_range Units, bool LittleEndian) {
+  auto Contributions = collectContributionData(Units);
   DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0);
   DataExtractor StrData(StringSection, LittleEndian, 0);
   uint64_t SectionSize = StringOffsetsSection.Data.size();
@@ -215,18 +210,18 @@ static void dumpDWARFv5StringOffsetsSection(
 // a header containing size and version number. Alternatively, it may be a
 // monolithic series of string offsets, as generated by the pre-DWARF v5
 // implementation of split DWARF.
-static void dumpStringOffsetsSection(
-    raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
-    const DWARFSection &StringOffsetsSection, StringRef StringSection,
-    DWARFContext::cu_iterator_range CUs,
-    DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian,
-    unsigned MaxVersion) {
+static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
+                                     const DWARFObject &Obj,
+                                     const DWARFSection &StringOffsetsSection,
+                                     StringRef StringSection,
+                                     DWARFContext::unit_iterator_range Units,
+                                     bool LittleEndian, unsigned MaxVersion) {
   // If we have at least one (compile or type) unit with DWARF v5 or greater,
   // we assume that the section is formatted like a DWARF v5 string offsets
   // section.
   if (MaxVersion >= 5)
     dumpDWARFv5StringOffsetsSection(OS, SectionName, Obj, StringOffsetsSection,
-                                    StringSection, CUs, TUSs, LittleEndian);
+                                    StringSection, Units, LittleEndian);
   else {
     DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
     uint32_t offset = 0;
@@ -254,19 +249,12 @@ static void dumpStringOffsetsSection(
 static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
                             DIDumpOptions DumpOpts, uint16_t Version,
                             uint8_t AddrSize) {
-  // TODO: Make this more general: add callback types to Error.h, create
-  // implementation and make all DWARF classes use them.
-  static auto WarnCallback = [](Error Warn) {
-    handleAllErrors(std::move(Warn), [](ErrorInfoBase &Info) {
-      WithColor::warning() << Info.message() << '\n';
-    });
-  };
   uint32_t Offset = 0;
   while (AddrData.isValidOffset(Offset)) {
     DWARFDebugAddrTable AddrTable;
     uint32_t TableOffset = Offset;
-    if (Error Err = AddrTable.extract(AddrData, &Offset, Version,
-                                      AddrSize, WarnCallback)) {
+    if (Error Err = AddrTable.extract(AddrData, &Offset, Version, AddrSize,
+                                      DWARFContext::dumpWarning)) {
       WithColor::error() << toString(std::move(Err)) << '\n';
       // Keep going after an error, if we can, assuming that the length field
       // could be read. If it couldn't, stop reading the section.
@@ -281,9 +269,11 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
 }
 
 // Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5).
-static void dumpRnglistsSection(raw_ostream &OS,
-                                DWARFDataExtractor &rnglistData,
-                                DIDumpOptions DumpOpts) {
+static void
+dumpRnglistsSection(raw_ostream &OS, DWARFDataExtractor &rnglistData,
+                    llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+                        LookupPooledAddress,
+                    DIDumpOptions DumpOpts) {
   uint32_t Offset = 0;
   while (rnglistData.isValidOffset(Offset)) {
     llvm::DWARFDebugRnglistTable Rnglists;
@@ -297,16 +287,36 @@ static void dumpRnglistsSection(raw_ostream &OS,
         break;
       Offset = TableOffset + Length;
     } else {
-      Rnglists.dump(OS, DumpOpts);
+      Rnglists.dump(OS, LookupPooledAddress, DumpOpts);
     }
   }
 }
 
+static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts,
+                                DWARFDataExtractor Data,
+                                const MCRegisterInfo *MRI,
+                                Optional<uint64_t> DumpOffset) {
+  uint32_t Offset = 0;
+  DWARFDebugLoclists Loclists;
+
+  DWARFListTableHeader Header(".debug_loclists", "locations");
+  if (Error E = Header.extract(Data, &Offset)) {
+    WithColor::error() << toString(std::move(E)) << '\n';
+    return;
+  }
+
+  Header.dump(OS, DumpOpts);
+  DataExtractor LocData(Data.getData().drop_front(Offset),
+                        Data.isLittleEndian(), Header.getAddrSize());
+
+  Loclists.parse(LocData, Header.getVersion());
+  Loclists.dump(OS, 0, MRI, DumpOffset);
+}
+
 void DWARFContext::dump(
     raw_ostream &OS, DIDumpOptions DumpOpts,
     std::array<Optional<uint64_t>, DIDT_ID_Count> DumpOffsets) {
 
-  Optional<uint64_t> DumpOffset;
   uint64_t DumpType = DumpOpts.DumpType;
 
   StringRef Extension = sys::path::extension(DObj->getFileName());
@@ -323,13 +333,13 @@ void DWARFContext::dump(
   bool Explicit = DumpType != DIDT_All && !IsDWO;
   bool ExplicitDWO = Explicit && IsDWO;
   auto shouldDump = [&](bool Explicit, const char *Name, unsigned ID,
-                        StringRef Section) {
-    DumpOffset = DumpOffsets[ID];
+                        StringRef Section) -> Optional<uint64_t> * {
     unsigned Mask = 1U << ID;
     bool Should = (DumpType & Mask) && (Explicit || !Section.empty());
-    if (Should)
-      OS << "\n" << Name << " contents:\n";
-    return Should;
+    if (!Should)
+      return nullptr;
+    OS << "\n" << Name << " contents:\n";
+    return &DumpOffsets[ID];
   };
 
   // Dump individual sections.
@@ -340,57 +350,63 @@ void DWARFContext::dump(
                  DObj->getAbbrevDWOSection()))
     getDebugAbbrevDWO()->dump(OS);
 
-  auto dumpDebugInfo = [&](bool IsExplicit, const char *Name,
-                           DWARFSection Section, cu_iterator_range CUs) {
-    if (shouldDump(IsExplicit, Name, DIDT_ID_DebugInfo, Section.Data)) {
-      if (DumpOffset)
-        getDIEForOffset(DumpOffset.getValue())
+  auto dumpDebugInfo = [&](const char *Name, unit_iterator_range Units) {
+    OS << '\n' << Name << " contents:\n";
+    if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugInfo])
+      for (const auto &U : Units)
+        U->getDIEForOffset(DumpOffset.getValue())
             .dump(OS, 0, DumpOpts.noImplicitRecursion());
-      else
-        for (const auto &CU : CUs)
-          CU->dump(OS, DumpOpts);
-    }
+    else
+      for (const auto &U : Units)
+        U->dump(OS, DumpOpts);
   };
-  dumpDebugInfo(Explicit, ".debug_info", DObj->getInfoSection(),
-                compile_units());
-  dumpDebugInfo(ExplicitDWO, ".debug_info.dwo", DObj->getInfoDWOSection(),
-                dwo_compile_units());
+  if ((DumpType & DIDT_DebugInfo)) {
+    if (Explicit || getNumCompileUnits())
+      dumpDebugInfo(".debug_info", info_section_units());
+    if (ExplicitDWO || getNumDWOCompileUnits())
+      dumpDebugInfo(".debug_info.dwo", dwo_info_section_units());
+  }
 
-  auto dumpDebugType = [&](const char *Name,
-                           tu_section_iterator_range TUSections) {
+  auto dumpDebugType = [&](const char *Name, unit_iterator_range Units) {
     OS << '\n' << Name << " contents:\n";
-    DumpOffset = DumpOffsets[DIDT_ID_DebugTypes];
-    for (const auto &TUS : TUSections)
-      for (const auto &TU : TUS)
-        if (DumpOffset)
-          TU->getDIEForOffset(*DumpOffset)
-              .dump(OS, 0, DumpOpts.noImplicitRecursion());
-        else
-          TU->dump(OS, DumpOpts);
+    for (const auto &U : Units)
+      if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugTypes])
+        U->getDIEForOffset(*DumpOffset)
+            .dump(OS, 0, DumpOpts.noImplicitRecursion());
+      else
+        U->dump(OS, DumpOpts);
   };
   if ((DumpType & DIDT_DebugTypes)) {
     if (Explicit || getNumTypeUnits())
-      dumpDebugType(".debug_types", type_unit_sections());
+      dumpDebugType(".debug_types", types_section_units());
     if (ExplicitDWO || getNumDWOTypeUnits())
-      dumpDebugType(".debug_types.dwo", dwo_type_unit_sections());
+      dumpDebugType(".debug_types.dwo", dwo_types_section_units());
   }
 
-  if (shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc,
-                 DObj->getLocSection().Data)) {
-    getDebugLoc()->dump(OS, getRegisterInfo(), DumpOffset);
+  if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc,
+                                   DObj->getLocSection().Data)) {
+    getDebugLoc()->dump(OS, getRegisterInfo(), *Off);
+  }
+  if (const auto *Off =
+          shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists,
+                     DObj->getLoclistsSection().Data)) {
+    DWARFDataExtractor Data(*DObj, DObj->getLoclistsSection(), isLittleEndian(),
+                            0);
+    dumpLoclistsSection(OS, DumpOpts, Data, getRegisterInfo(), *Off);
   }
-  if (shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc,
-                 DObj->getLocDWOSection().Data)) {
-    getDebugLocDWO()->dump(OS, getRegisterInfo(), DumpOffset);
+  if (const auto *Off =
+          shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc,
+                     DObj->getLocDWOSection().Data)) {
+    getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), *Off);
   }
 
-  if (shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
-                 DObj->getDebugFrameSection()))
-    getDebugFrame()->dump(OS, getRegisterInfo(), DumpOffset);
+  if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame,
+                                   DObj->getDebugFrameSection()))
+    getDebugFrame()->dump(OS, getRegisterInfo(), *Off);
 
-  if (shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame,
-                 DObj->getEHFrameSection()))
-    getEHFrame()->dump(OS, getRegisterInfo(), DumpOffset);
+  if (const auto *Off = shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame,
+                                   DObj->getEHFrameSection()))
+    getEHFrame()->dump(OS, getRegisterInfo(), *Off);
 
   if (DumpType & DIDT_DebugMacro) {
     if (Explicit || !getDebugMacro()->empty()) {
@@ -409,38 +425,41 @@ void DWARFContext::dump(
   }
 
   auto DumpLineSection = [&](DWARFDebugLine::SectionParser Parser,
-                             DIDumpOptions DumpOpts) {
+                             DIDumpOptions DumpOpts,
+                             Optional<uint64_t> DumpOffset) {
     while (!Parser.done()) {
       if (DumpOffset && Parser.getOffset() != *DumpOffset) {
-        Parser.skip();
+        Parser.skip(dumpWarning);
         continue;
       }
       OS << "debug_line[" << format("0x%8.8x", Parser.getOffset()) << "]\n";
       if (DumpOpts.Verbose) {
-        Parser.parseNext(DWARFDebugLine::warn, DWARFDebugLine::warn, &OS);
+        Parser.parseNext(dumpWarning, dumpWarning, &OS);
       } else {
-        DWARFDebugLine::LineTable LineTable = Parser.parseNext();
+        DWARFDebugLine::LineTable LineTable =
+            Parser.parseNext(dumpWarning, dumpWarning);
         LineTable.dump(OS, DumpOpts);
       }
     }
   };
 
-  if (shouldDump(Explicit, ".debug_line", DIDT_ID_DebugLine,
-                 DObj->getLineSection().Data)) {
+  if (const auto *Off = shouldDump(Explicit, ".debug_line", DIDT_ID_DebugLine,
+                                   DObj->getLineSection().Data)) {
     DWARFDataExtractor LineData(*DObj, DObj->getLineSection(), isLittleEndian(),
                                 0);
     DWARFDebugLine::SectionParser Parser(LineData, *this, compile_units(),
-                                         type_unit_sections());
-    DumpLineSection(Parser, DumpOpts);
+                                         type_units());
+    DumpLineSection(Parser, DumpOpts, *Off);
   }
 
-  if (shouldDump(ExplicitDWO, ".debug_line.dwo", DIDT_ID_DebugLine,
-                 DObj->getLineDWOSection().Data)) {
+  if (const auto *Off =
+          shouldDump(ExplicitDWO, ".debug_line.dwo", DIDT_ID_DebugLine,
+                     DObj->getLineDWOSection().Data)) {
     DWARFDataExtractor LineData(*DObj, DObj->getLineDWOSection(),
                                 isLittleEndian(), 0);
     DWARFDebugLine::SectionParser Parser(LineData, *this, dwo_compile_units(),
-                                         dwo_type_unit_sections());
-    DumpLineSection(Parser, DumpOpts);
+                                         dwo_type_units());
+    DumpLineSection(Parser, DumpOpts, *Off);
   }
 
   if (shouldDump(Explicit, ".debug_cu_index", DIDT_ID_DebugCUIndex,
@@ -509,56 +528,64 @@ void DWARFContext::dump(
     }
   }
 
+  auto LookupPooledAddress = [&](uint32_t Index) -> Optional<SectionedAddress> {
+    const auto &CUs = compile_units();
+    auto I = CUs.begin();
+    if (I == CUs.end())
+      return None;
+    return (*I)->getAddrOffsetSectionItem(Index);
+  };
+
   if (shouldDump(Explicit, ".debug_rnglists", DIDT_ID_DebugRnglists,
                  DObj->getRnglistsSection().Data)) {
     DWARFDataExtractor RnglistData(*DObj, DObj->getRnglistsSection(),
                                    isLittleEndian(), 0);
-    dumpRnglistsSection(OS, RnglistData, DumpOpts);
+    dumpRnglistsSection(OS, RnglistData, LookupPooledAddress, DumpOpts);
   }
 
   if (shouldDump(ExplicitDWO, ".debug_rnglists.dwo", DIDT_ID_DebugRnglists,
                  DObj->getRnglistsDWOSection().Data)) {
     DWARFDataExtractor RnglistData(*DObj, DObj->getRnglistsDWOSection(),
                                    isLittleEndian(), 0);
-    dumpRnglistsSection(OS, RnglistData, DumpOpts);
+    dumpRnglistsSection(OS, RnglistData, LookupPooledAddress, DumpOpts);
   }
 
   if (shouldDump(Explicit, ".debug_pubnames", DIDT_ID_DebugPubnames,
-                 DObj->getPubNamesSection()))
-    DWARFDebugPubTable(DObj->getPubNamesSection(), isLittleEndian(), false)
+                 DObj->getPubNamesSection().Data))
+    DWARFDebugPubTable(*DObj, DObj->getPubNamesSection(), isLittleEndian(), false)
         .dump(OS);
 
   if (shouldDump(Explicit, ".debug_pubtypes", DIDT_ID_DebugPubtypes,
-                 DObj->getPubTypesSection()))
-    DWARFDebugPubTable(DObj->getPubTypesSection(), isLittleEndian(), false)
+                 DObj->getPubTypesSection().Data))
+    DWARFDebugPubTable(*DObj, DObj->getPubTypesSection(), isLittleEndian(), false)
         .dump(OS);
 
   if (shouldDump(Explicit, ".debug_gnu_pubnames", DIDT_ID_DebugGnuPubnames,
-                 DObj->getGnuPubNamesSection()))
-    DWARFDebugPubTable(DObj->getGnuPubNamesSection(), isLittleEndian(),
+                 DObj->getGnuPubNamesSection().Data))
+    DWARFDebugPubTable(*DObj, DObj->getGnuPubNamesSection(), isLittleEndian(),
                        true /* GnuStyle */)
         .dump(OS);
 
   if (shouldDump(Explicit, ".debug_gnu_pubtypes", DIDT_ID_DebugGnuPubtypes,
-                 DObj->getGnuPubTypesSection()))
-    DWARFDebugPubTable(DObj->getGnuPubTypesSection(), isLittleEndian(),
+                 DObj->getGnuPubTypesSection().Data))
+    DWARFDebugPubTable(*DObj, DObj->getGnuPubTypesSection(), isLittleEndian(),
                        true /* GnuStyle */)
         .dump(OS);
 
   if (shouldDump(Explicit, ".debug_str_offsets", DIDT_ID_DebugStrOffsets,
                  DObj->getStringOffsetSection().Data))
-    dumpStringOffsetsSection(
-        OS, "debug_str_offsets", *DObj, DObj->getStringOffsetSection(),
-        DObj->getStringSection(), compile_units(), type_unit_sections(),
-        isLittleEndian(), getMaxVersion());
+    dumpStringOffsetsSection(OS, "debug_str_offsets", *DObj,
+                             DObj->getStringOffsetSection(),
+                             DObj->getStringSection(), normal_units(),
+                             isLittleEndian(), getMaxVersion());
   if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets,
                  DObj->getStringOffsetDWOSection().Data))
-    dumpStringOffsetsSection(
-        OS, "debug_str_offsets.dwo", *DObj, DObj->getStringOffsetDWOSection(),
-        DObj->getStringDWOSection(), dwo_compile_units(),
-        dwo_type_unit_sections(), isLittleEndian(), getMaxVersion());
+    dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", *DObj,
+                             DObj->getStringOffsetDWOSection(),
+                             DObj->getStringDWOSection(), dwo_units(),
+                             isLittleEndian(), getMaxDWOVersion());
 
-  if (shouldDump(Explicit, ".gnu_index", DIDT_ID_GdbIndex,
+  if (shouldDump(Explicit, ".gdb_index", DIDT_ID_GdbIndex,
                  DObj->getGdbIndexSection())) {
     getGdbIndex().dump(OS);
   }
@@ -584,11 +611,12 @@ void DWARFContext::dump(
 }
 
 DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
-  DWOCUs.parseDWO(*this, DObj->getInfoDWOSection(), true);
+  parseDWOUnits(LazyParse);
 
   if (const auto &CUI = getCUIndex()) {
     if (const auto *R = CUI.getFromHash(Hash))
-      return DWOCUs.getUnitForIndexEntry(*R);
+      return dyn_cast_or_null<DWARFCompileUnit>(
+          DWOUnits.getUnitForIndexEntry(*R));
     return nullptr;
   }
 
@@ -607,14 +635,14 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
         continue;
     }
     if (DWOCU->getDWOId() == Hash)
-      return DWOCU.get();
+      return dyn_cast<DWARFCompileUnit>(DWOCU.get());
   }
   return nullptr;
 }
 
 DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) {
-  parseCompileUnits();
-  if (auto *CU = CUs.getUnitForOffset(Offset))
+  parseNormalUnits();
+  if (auto *CU = NormalUnits.getUnitForOffset(Offset))
     return CU->getDIEForOffset(Offset);
   return DWARFDie();
 }
@@ -690,26 +718,28 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() {
     return Loc.get();
 
   Loc.reset(new DWARFDebugLoc);
-  // Assume all compile units have the same address byte size.
+  // Assume all units have the same address byte size.
   if (getNumCompileUnits()) {
     DWARFDataExtractor LocData(*DObj, DObj->getLocSection(), isLittleEndian(),
-                               getCompileUnitAtIndex(0)->getAddressByteSize());
+                               getUnitAtIndex(0)->getAddressByteSize());
     Loc->parse(LocData);
   }
   return Loc.get();
 }
 
-const DWARFDebugLocDWO *DWARFContext::getDebugLocDWO() {
+const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() {
   if (LocDWO)
     return LocDWO.get();
 
-  LocDWO.reset(new DWARFDebugLocDWO());
+  LocDWO.reset(new DWARFDebugLoclists());
   // Assume all compile units have the same address byte size.
-  if (getNumCompileUnits()) {
-    DataExtractor LocData(DObj->getLocDWOSection().Data, isLittleEndian(),
-                          getCompileUnitAtIndex(0)->getAddressByteSize());
-    LocDWO->parse(LocData);
-  }
+  // FIXME: We don't need AddressSize for split DWARF since relocatable
+  // addresses cannot appear there. At the moment DWARFExpression requires it.
+  DataExtractor LocData(DObj->getLocDWOSection().Data, isLittleEndian(), 4);
+  // Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and
+  // that means we are parsing the new style .debug_loc (pre-standatized version
+  // of the .debug_loclists).
+  LocDWO->parse(LocData, 4 /* Version */);
   return LocDWO.get();
 }
 
@@ -737,7 +767,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
   // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
   DWARFDataExtractor debugFrameData(DObj->getDebugFrameSection(),
                                     isLittleEndian(), DObj->getAddressSize());
-  DebugFrame.reset(new DWARFDebugFrame(false /* IsEH */));
+  DebugFrame.reset(new DWARFDebugFrame(getArch(), false /* IsEH */));
   DebugFrame->parse(debugFrameData);
   return DebugFrame.get();
 }
@@ -748,7 +778,7 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() {
 
   DWARFDataExtractor debugFrameData(DObj->getEHFrameSection(), isLittleEndian(),
                                     DObj->getAddressSize());
-  DebugFrame.reset(new DWARFDebugFrame(true /* IsEH */));
+  DebugFrame.reset(new DWARFDebugFrame(getArch(), true /* IsEH */));
   DebugFrame->parse(debugFrameData);
   return DebugFrame.get();
 }
@@ -806,9 +836,9 @@ const AppleAcceleratorTable &DWARFContext::getAppleObjC() {
 const DWARFDebugLine::LineTable *
 DWARFContext::getLineTableForUnit(DWARFUnit *U) {
   Expected<const DWARFDebugLine::LineTable *> ExpectedLineTable =
-      getLineTableForUnit(U, DWARFDebugLine::warn);
+      getLineTableForUnit(U, dumpWarning);
   if (!ExpectedLineTable) {
-    DWARFDebugLine::warn(ExpectedLineTable.takeError());
+    dumpWarning(ExpectedLineTable.takeError());
     return nullptr;
   }
   return *ExpectedLineTable;
@@ -843,35 +873,34 @@ Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit(
                                    RecoverableErrorCallback);
 }
 
-void DWARFContext::parseCompileUnits() {
-  CUs.parse(*this, DObj->getInfoSection());
-}
-
-void DWARFContext::parseTypeUnits() {
-  if (!TUs.empty())
+void DWARFContext::parseNormalUnits() {
+  if (!NormalUnits.empty())
     return;
+  DObj->forEachInfoSections([&](const DWARFSection &S) {
+    NormalUnits.addUnitsForSection(*this, S, DW_SECT_INFO);
+  });
+  NormalUnits.finishedInfoUnits();
   DObj->forEachTypesSections([&](const DWARFSection &S) {
-    TUs.emplace_back();
-    TUs.back().parse(*this, S);
+    NormalUnits.addUnitsForSection(*this, S, DW_SECT_TYPES);
   });
 }
 
-void DWARFContext::parseDWOCompileUnits() {
-  DWOCUs.parseDWO(*this, DObj->getInfoDWOSection());
-}
-
-void DWARFContext::parseDWOTypeUnits() {
-  if (!DWOTUs.empty())
+void DWARFContext::parseDWOUnits(bool Lazy) {
+  if (!DWOUnits.empty())
     return;
+  DObj->forEachInfoDWOSections([&](const DWARFSection &S) {
+    DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_INFO, Lazy);
+  });
+  DWOUnits.finishedInfoUnits();
   DObj->forEachTypesDWOSections([&](const DWARFSection &S) {
-    DWOTUs.emplace_back();
-    DWOTUs.back().parseDWO(*this, S);
+    DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_TYPES, Lazy);
   });
 }
 
 DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
-  parseCompileUnits();
-  return CUs.getUnitForOffset(Offset);
+  parseNormalUnits();
+  return dyn_cast_or_null<DWARFCompileUnit>(
+      NormalUnits.getUnitForOffset(Offset));
 }
 
 DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
@@ -1213,19 +1242,20 @@ class DWARFObjInMemory final : public DWARFObject {
   const object::ObjectFile *Obj = nullptr;
   std::vector<SectionName> SectionNames;
 
-  using TypeSectionMap = MapVector<object::SectionRef, DWARFSectionMap,
+  using InfoSectionMap = MapVector<object::SectionRef, DWARFSectionMap,
                                    std::map<object::SectionRef, unsigned>>;
 
-  TypeSectionMap TypesSections;
-  TypeSectionMap TypesDWOSections;
+  InfoSectionMap InfoSections;
+  InfoSectionMap TypesSections;
+  InfoSectionMap InfoDWOSections;
+  InfoSectionMap TypesDWOSections;
 
-  DWARFSectionMap InfoSection;
   DWARFSectionMap LocSection;
+  DWARFSectionMap LocListsSection;
   DWARFSectionMap LineSection;
   DWARFSectionMap RangeSection;
   DWARFSectionMap RnglistsSection;
   DWARFSectionMap StringOffsetSection;
-  DWARFSectionMap InfoDWOSection;
   DWARFSectionMap LineDWOSection;
   DWARFSectionMap LocDWOSection;
   DWARFSectionMap StringOffsetDWOSection;
@@ -1237,16 +1267,19 @@ class DWARFObjInMemory final : public DWARFObject {
   DWARFSectionMap AppleNamespacesSection;
   DWARFSectionMap AppleObjCSection;
   DWARFSectionMap DebugNamesSection;
+  DWARFSectionMap PubNamesSection;
+  DWARFSectionMap PubTypesSection;
+  DWARFSectionMap GnuPubNamesSection;
+  DWARFSectionMap GnuPubTypesSection;
 
   DWARFSectionMap *mapNameToDWARFSection(StringRef Name) {
     return StringSwitch<DWARFSectionMap *>(Name)
-        .Case("debug_info", &InfoSection)
         .Case("debug_loc", &LocSection)
+        .Case("debug_loclists", &LocListsSection)
         .Case("debug_line", &LineSection)
         .Case("debug_str_offsets", &StringOffsetSection)
         .Case("debug_ranges", &RangeSection)
         .Case("debug_rnglists", &RnglistsSection)
-        .Case("debug_info.dwo", &InfoDWOSection)
         .Case("debug_loc.dwo", &LocDWOSection)
         .Case("debug_line.dwo", &LineDWOSection)
         .Case("debug_names", &DebugNamesSection)
@@ -1254,6 +1287,10 @@ class DWARFObjInMemory final : public DWARFObject {
         .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
         .Case("debug_addr", &AddrSection)
         .Case("apple_names", &AppleNamesSection)
+        .Case("debug_pubnames", &PubNamesSection)
+        .Case("debug_pubtypes", &PubTypesSection)
+        .Case("debug_gnu_pubnames", &GnuPubNamesSection)
+        .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
         .Case("apple_types", &AppleTypesSection)
         .Case("apple_namespaces", &AppleNamespacesSection)
         .Case("apple_namespac", &AppleNamespacesSection)
@@ -1267,12 +1304,8 @@ class DWARFObjInMemory final : public DWARFObject {
   StringRef EHFrameSection;
   StringRef StringSection;
   StringRef MacinfoSection;
-  StringRef PubNamesSection;
-  StringRef PubTypesSection;
-  StringRef GnuPubNamesSection;
   StringRef AbbrevDWOSection;
   StringRef StringDWOSection;
-  StringRef GnuPubTypesSection;
   StringRef CUIndexSection;
   StringRef GdbIndexSection;
   StringRef TUIndexSection;
@@ -1292,10 +1325,6 @@ class DWARFObjInMemory final : public DWARFObject {
         .Case("eh_frame", &EHFrameSection)
         .Case("debug_str", &StringSection)
         .Case("debug_macinfo", &MacinfoSection)
-        .Case("debug_pubnames", &PubNamesSection)
-        .Case("debug_pubtypes", &PubTypesSection)
-        .Case("debug_gnu_pubnames", &GnuPubNamesSection)
-        .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
         .Case("debug_abbrev.dwo", &AbbrevDWOSection)
         .Case("debug_str.dwo", &StringDWOSection)
         .Case("debug_cu_index", &CUIndexSection)
@@ -1335,6 +1364,16 @@ public:
     for (const auto &SecIt : Sections) {
       if (StringRef *SectionData = mapSectionToMember(SecIt.first()))
         *SectionData = SecIt.second->getBuffer();
+      else if (SecIt.first() == "debug_info")
+        // Find debug_info and debug_types data by section rather than name as
+        // there are multiple, comdat grouped, of these sections.
+        InfoSections[SectionRef()].Data = SecIt.second->getBuffer();
+      else if (SecIt.first() == "debug_info.dwo")
+        InfoDWOSections[SectionRef()].Data = SecIt.second->getBuffer();
+      else if (SecIt.first() == "debug_types")
+        TypesSections[SectionRef()].Data = SecIt.second->getBuffer();
+      else if (SecIt.first() == "debug_types.dwo")
+        TypesDWOSections[SectionRef()].Data = SecIt.second->getBuffer();
     }
   }
   DWARFObjInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
@@ -1389,9 +1428,13 @@ public:
           // FIXME: Use the other dwo range section when we emit it.
           RangeDWOSection.Data = Data;
         }
+      } else if (Name == "debug_info") {
+        // Find debug_info and debug_types data by section rather than name as
+        // there are multiple, comdat grouped, of these sections.
+        InfoSections[Section].Data = Data;
+      } else if (Name == "debug_info.dwo") {
+        InfoDWOSections[Section].Data = Data;
       } else if (Name == "debug_types") {
-        // Find debug_types data by section rather than name as there are
-        // multiple, comdat grouped, debug_types sections.
         TypesSections[Section].Data = Data;
       } else if (Name == "debug_types.dwo") {
         TypesDWOSections[Section].Data = Data;
@@ -1426,9 +1469,16 @@ public:
       DWARFSectionMap *Sec = mapNameToDWARFSection(RelSecName);
       RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr;
       if (!Map) {
-        // Find debug_types relocs by section rather than name as there are
-        // multiple, comdat grouped, debug_types sections.
-        if (RelSecName == "debug_types")
+        // Find debug_info and debug_types relocs by section rather than name
+        // as there are multiple, comdat grouped, of these sections.
+        if (RelSecName == "debug_info")
+          Map = &static_cast<DWARFSectionMap &>(InfoSections[*RelocatedSection])
+                     .Relocs;
+        else if (RelSecName == "debug_info.dwo")
+          Map = &static_cast<DWARFSectionMap &>(
+                     InfoDWOSections[*RelocatedSection])
+                     .Relocs;
+        else if (RelSecName == "debug_types")
           Map =
               &static_cast<DWARFSectionMap &>(TypesSections[*RelocatedSection])
                    .Relocs;
@@ -1526,8 +1576,10 @@ public:
   StringRef getLineStringSection() const override { return LineStringSection; }
 
   // Sections for DWARF5 split dwarf proposal.
-  const DWARFSection &getInfoDWOSection() const override {
-    return InfoDWOSection;
+  void forEachInfoDWOSections(
+      function_ref<void(const DWARFSection &)> F) const override {
+    for (auto &P : InfoDWOSections)
+      F(P.second);
   }
   void forEachTypesDWOSections(
       function_ref<void(const DWARFSection &)> F) const override {
@@ -1537,6 +1589,7 @@ public:
 
   StringRef getAbbrevSection() const override { return AbbrevSection; }
   const DWARFSection &getLocSection() const override { return LocSection; }
+  const DWARFSection &getLoclistsSection() const override { return LocListsSection; }
   StringRef getARangeSection() const override { return ARangeSection; }
   StringRef getDebugFrameSection() const override { return DebugFrameSection; }
   StringRef getEHFrameSection() const override { return EHFrameSection; }
@@ -1547,12 +1600,12 @@ public:
     return RnglistsSection;
   }
   StringRef getMacinfoSection() const override { return MacinfoSection; }
-  StringRef getPubNamesSection() const override { return PubNamesSection; }
-  StringRef getPubTypesSection() const override { return PubTypesSection; }
-  StringRef getGnuPubNamesSection() const override {
+  const DWARFSection &getPubNamesSection() const override { return PubNamesSection; }
+  const DWARFSection &getPubTypesSection() const override { return PubTypesSection; }
+  const DWARFSection &getGnuPubNamesSection() const override {
     return GnuPubNamesSection;
   }
-  StringRef getGnuPubTypesSection() const override {
+  const DWARFSection &getGnuPubTypesSection() const override {
     return GnuPubTypesSection;
   }
   const DWARFSection &getAppleNamesSection() const override {
@@ -1573,7 +1626,11 @@ public:
 
   StringRef getFileName() const override { return FileName; }
   uint8_t getAddressSize() const override { return AddressSize; }
-  const DWARFSection &getInfoSection() const override { return InfoSection; }
+  void forEachInfoSections(
+      function_ref<void(const DWARFSection &)> F) const override {
+    for (auto &P : InfoSections)
+      F(P.second);
+  }
   void forEachTypesSections(
       function_ref<void(const DWARFSection &)> F) const override {
     for (auto &P : TypesSections)
@@ -1609,7 +1666,8 @@ Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) {
   const Target *TheTarget =
       TargetRegistry::lookupTarget(TT.str(), TargetLookupError);
   if (!TargetLookupError.empty())
-    return make_error<StringError>(TargetLookupError, inconvertibleErrorCode());
+    return createStringError(errc::invalid_argument,
+                             TargetLookupError.c_str());
   RegInfo.reset(TheTarget->createMCRegInfo(TT.str()));
   return Error::success();
 }
@@ -1627,3 +1685,9 @@ uint8_t DWARFContext::getCUAddrSize() {
   }
   return Addr;
 }
+
+void DWARFContext::dumpWarning(Error Warning) {
+  handleAllErrors(std::move(Warning), [](ErrorInfoBase &Info) {
+      WithColor::warning() << Info.message() << '\n';
+  });
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index 7085ca067ba6..22759bfac26c 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -148,7 +148,7 @@ void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
                HeaderData.Length, HeaderData.Version, HeaderData.AddrSize,
                HeaderData.SegSize);
 
-  static const char *Fmt32 = "0x%8.8" PRIx32;
+  static const char *Fmt32 = "0x%8.8" PRIx64;
   static const char *Fmt64 = "0x%16.16" PRIx64;
   std::string AddrFmt = "\n";
   std::string AddrFmtVerbose = " => ";
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index 19bfcaed2021..e8c5dec821b4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -12,6 +12,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/WithColor.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -53,10 +54,12 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
   for (const auto &CU : CTX->compile_units()) {
     uint32_t CUOffset = CU->getOffset();
     if (ParsedCUOffsets.insert(CUOffset).second) {
-      DWARFAddressRangesVector CURanges;
-      CU->collectAddressRanges(CURanges);
-      for (const auto &R : CURanges)
-        appendRange(CUOffset, R.LowPC, R.HighPC);
+      Expected<DWARFAddressRangesVector> CURanges = CU->collectAddressRanges();
+      if (!CURanges)
+        WithColor::error() << toString(CURanges.takeError()) << '\n';
+      else
+        for (const auto &R : *CURanges)
+          appendRange(CUOffset, R.LowPC, R.HighPC);
     }
   }
 
@@ -80,7 +83,7 @@ void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
 void DWARFDebugAranges::construct() {
   std::multiset<uint32_t> ValidCUs;  // Maintain the set of CUs describing
                                      // a current address range.
-  llvm::sort(Endpoints.begin(), Endpoints.end());
+  llvm::sort(Endpoints);
   uint64_t PrevAddress = -1ULL;
   for (const auto &E : Endpoints) {
     if (PrevAddress < E.Address && !ValidCUs.empty()) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 73333395f4c1..ba55ffc28174 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -47,9 +48,9 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
       uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK;
       switch (Primary) {
       default:
-        return make_error<StringError>(
-            "Invalid primary CFI opcode",
-            std::make_error_code(std::errc::illegal_byte_sequence));
+        return createStringError(errc::illegal_byte_sequence,
+                                 "Invalid primary CFI opcode 0x%" PRIx8,
+                                 Primary);
       case DW_CFA_advance_loc:
       case DW_CFA_restore:
         addInstruction(Primary, Op1);
@@ -62,9 +63,9 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset,
       // Extended opcode - its value is Opcode itself.
       switch (Opcode) {
       default:
-        return make_error<StringError>(
-            "Invalid extended CFI opcode",
-            std::make_error_code(std::errc::illegal_byte_sequence));
+        return createStringError(errc::illegal_byte_sequence,
+                                 "Invalid extended CFI opcode 0x%" PRIx8,
+                                 Opcode);
       case DW_CFA_nop:
       case DW_CFA_remember_state:
       case DW_CFA_restore_state:
@@ -224,7 +225,7 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
   switch (Type) {
   case OT_Unset: {
     OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to";
-    auto OpcodeName = CallFrameString(Opcode);
+    auto OpcodeName = CallFrameString(Opcode, Arch);
     if (!OpcodeName.empty())
       OS << " " << OpcodeName;
     else
@@ -278,7 +279,7 @@ void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH,
     if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
       Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
     OS.indent(2 * IndentLevel);
-    OS << CallFrameString(Opcode) << ":";
+    OS << CallFrameString(Opcode, Arch) << ":";
     for (unsigned i = 0; i < Instr.Ops.size(); ++i)
       printOperand(OS, MRI, IsEH, Instr, i, Instr.Ops[i]);
     OS << '\n';
@@ -324,8 +325,9 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
   OS << "\n";
 }
 
-DWARFDebugFrame::DWARFDebugFrame(bool IsEH, uint64_t EHFrameAddress)
-    : IsEH(IsEH), EHFrameAddress(EHFrameAddress) {}
+DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch,
+    bool IsEH, uint64_t EHFrameAddress)
+    : Arch(Arch), IsEH(IsEH), EHFrameAddress(EHFrameAddress) {}
 
 DWARFDebugFrame::~DWARFDebugFrame() = default;
 
@@ -395,7 +397,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
       uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset);
       uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
       int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
-      uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
+      uint64_t ReturnAddressRegister =
+          Version == 1 ? Data.getU8(&Offset) : Data.getULEB128(&Offset);
 
       // Parse the augmentation data for EH CIEs
       StringRef AugmentationData("");
@@ -443,6 +446,11 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
               StartAugmentationOffset = Offset;
               EndAugmentationOffset = Offset +
                 static_cast<uint32_t>(*AugmentationLength);
+              break;
+            case 'B':
+              // B-Key is used for signing functions associated with this
+              // augmentation string
+              break;
           }
         }
 
@@ -459,7 +467,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
           StartOffset, Length, Version, AugmentationString, AddressSize,
           SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor,
           ReturnAddressRegister, AugmentationData, FDEPointerEncoding,
-          LSDAPointerEncoding, Personality, PersonalityEncoding);
+          LSDAPointerEncoding, Personality, PersonalityEncoding, Arch);
       CIEs[StartOffset] = Cie.get();
       Entries.emplace_back(std::move(Cie));
     } else {
@@ -511,7 +519,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
 
       Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer,
                                    InitialLocation, AddressRange,
-                                   Cie, LSDAAddress));
+                                   Cie, LSDAAddress, Arch));
     }
 
     if (Error E =
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 53a8e193ef56..1d621ff244f3 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -15,6 +15,7 @@
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/WithColor.h"
@@ -273,24 +274,6 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
   return true;
 }
 
-template <typename... Ts>
-static std::string formatErrorString(char const *Fmt, const Ts &... Vals) {
-  std::string Buffer;
-  raw_string_ostream Stream(Buffer);
-  Stream << format(Fmt, Vals...);
-  return Stream.str();
-}
-
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
-  return make_error<StringError>(formatErrorString(Fmt, Vals...),
-                                 inconvertibleErrorCode());
-}
-
-static Error createError(char const *Msg) {
-  return make_error<StringError>(Msg, inconvertibleErrorCode());
-}
-
 Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
                                       uint32_t *OffsetPtr,
                                       const DWARFContext &Ctx,
@@ -303,14 +286,15 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
     FormParams.Format = dwarf::DWARF64;
     TotalLength = DebugLineData.getU64(OffsetPtr);
   } else if (TotalLength >= 0xffffff00) {
-    return createError(
+    return createStringError(errc::invalid_argument,
         "parsing line table prologue at offset 0x%8.8" PRIx64
         " unsupported reserved unit length found of value 0x%8.8" PRIx64,
         PrologueOffset, TotalLength);
   }
   FormParams.Version = DebugLineData.getU16(OffsetPtr);
   if (getVersion() < 2)
-    return createError("parsing line table prologue at offset 0x%8.8" PRIx64
+    return createStringError(errc::not_supported,
+                       "parsing line table prologue at offset 0x%8.8" PRIx64
                        " found unsupported version 0x%2.2" PRIx16,
                        PrologueOffset, getVersion());
 
@@ -342,7 +326,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
     if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
                               FormParams, Ctx, U, ContentTypes,
                               IncludeDirectories, FileNames)) {
-      return createError(
+      return createStringError(errc::invalid_argument,
           "parsing line table prologue at 0x%8.8" PRIx64
           " found an invalid directory or file table description at"
           " 0x%8.8" PRIx64,
@@ -353,7 +337,8 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
                          ContentTypes, IncludeDirectories, FileNames);
 
   if (*OffsetPtr != EndPrologueOffset)
-    return createError("parsing line table prologue at 0x%8.8" PRIx64
+    return createStringError(errc::invalid_argument,
+                       "parsing line table prologue at 0x%8.8" PRIx64
                        " should have ended at 0x%8.8" PRIx64
                        " but it ended at 0x%8.8" PRIx64,
                        PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr);
@@ -470,7 +455,7 @@ Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
     DWARFDataExtractor &DebugLineData, uint32_t Offset, const DWARFContext &Ctx,
     const DWARFUnit *U, std::function<void(Error)> RecoverableErrorCallback) {
   if (!DebugLineData.isValidOffset(Offset))
-    return createError("offset 0x%8.8" PRIx32
+    return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx32
                        " is not a valid debug line section offset",
                        Offset);
 
@@ -575,7 +560,8 @@ Error DWARFDebugLine::LineTable::parse(
         if (DebugLineData.getAddressSize() == 0)
           DebugLineData.setAddressSize(Len - 1);
         else if (DebugLineData.getAddressSize() != Len - 1) {
-          return createError("mismatching address size at offset 0x%8.8" PRIx32
+          return createStringError(errc::invalid_argument,
+                             "mismatching address size at offset 0x%8.8" PRIx32
                              " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64,
                              ExtOffset, DebugLineData.getAddressSize(),
                              Len - 1);
@@ -640,7 +626,8 @@ Error DWARFDebugLine::LineTable::parse(
       // Make sure the stated and parsed lengths are the same.
       // Otherwise we have an unparseable line-number program.
       if (*OffsetPtr - ExtOffset != Len)
-        return createError("unexpected line op length at offset 0x%8.8" PRIx32
+        return createStringError(errc::illegal_byte_sequence,
+                           "unexpected line op length at offset 0x%8.8" PRIx32
                            " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx32,
                            ExtOffset, Len, *OffsetPtr - ExtOffset);
     } else if (Opcode < Prologue.OpcodeBase) {
@@ -847,11 +834,12 @@ Error DWARFDebugLine::LineTable::parse(
 
   if (!State.Sequence.Empty)
     RecoverableErrorCallback(
-        createError("last sequence in debug line table is not terminated!"));
+        createStringError(errc::illegal_byte_sequence,
+                    "last sequence in debug line table is not terminated!"));
 
   // Sort all sequences so that address lookup will work faster.
   if (!Sequences.empty()) {
-    llvm::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+    llvm::sort(Sequences, Sequence::orderByLowPC);
     // Note: actually, instruction address ranges of sequences should not
     // overlap (in shared objects and executables). If they do, the address
     // lookup would still work, though, but result would be ambiguous.
@@ -1047,17 +1035,16 @@ bool DWARFDebugLine::LineTable::getFileLineInfoForAddress(
 // line-table section.
 static DWARFDebugLine::SectionParser::LineToUnitMap
 buildLineToUnitMap(DWARFDebugLine::SectionParser::cu_range CUs,
-                   DWARFDebugLine::SectionParser::tu_range TUSections) {
+                   DWARFDebugLine::SectionParser::tu_range TUs) {
   DWARFDebugLine::SectionParser::LineToUnitMap LineToUnit;
   for (const auto &CU : CUs)
     if (auto CUDIE = CU->getUnitDIE())
       if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list)))
         LineToUnit.insert(std::make_pair(*StmtOffset, &*CU));
-  for (const auto &TUS : TUSections)
-    for (const auto &TU : TUS)
-      if (auto TUDIE = TU->getUnitDIE())
-        if (auto StmtOffset = toSectionOffset(TUDIE.find(DW_AT_stmt_list)))
-          LineToUnit.insert(std::make_pair(*StmtOffset, &*TU));
+  for (const auto &TU : TUs)
+    if (auto TUDIE = TU->getUnitDIE())
+      if (auto StmtOffset = toSectionOffset(TUDIE.find(DW_AT_stmt_list)))
+        LineToUnit.insert(std::make_pair(*StmtOffset, &*TU));
   return LineToUnit;
 }
 
@@ -1125,9 +1112,3 @@ void DWARFDebugLine::SectionParser::moveToNextTable(uint32_t OldOffset,
     Done = true;
   }
 }
-
-void DWARFDebugLine::warn(Error Err) {
-  handleAllErrors(std::move(Err), [](ErrorInfoBase &Info) {
-    WithColor::warning() << Info.message() << '\n';
-  });
-}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index 617b914ecce9..f8b5ff6ec8fb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -124,7 +124,7 @@ DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, unsigned *Offset) {
     StringRef str = Data.getData().substr(*Offset, Bytes);
     *Offset += Bytes;
     E.Loc.reserve(str.size());
-    std::copy(str.begin(), str.end(), std::back_inserter(E.Loc));
+    llvm::copy(str, std::back_inserter(E.Loc));
     LL.Entries.push_back(std::move(E));
   }
 }
@@ -144,51 +144,74 @@ void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
     WithColor::error() << "failed to consume entire .debug_loc section\n";
 }
 
-Optional<DWARFDebugLocDWO::LocationList>
-DWARFDebugLocDWO::parseOneLocationList(DataExtractor Data, unsigned *Offset) {
+Optional<DWARFDebugLoclists::LocationList>
+DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
+                                         unsigned Version) {
   LocationList LL;
   LL.Offset = *Offset;
 
   // dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list.
   while (auto Kind =
              static_cast<dwarf::LocationListEntry>(Data.getU8(Offset))) {
-    if (Kind != dwarf::DW_LLE_startx_length) {
+
+    Entry E;
+    E.Kind = Kind;
+    switch (Kind) {
+    case dwarf::DW_LLE_startx_length:
+      E.Value0 = Data.getULEB128(Offset);
+      // Pre-DWARF 5 has different interpretation of the length field. We have
+      // to support both pre- and standartized styles for the compatibility.
+      if (Version < 5)
+        E.Value1 = Data.getU32(Offset);
+      else
+        E.Value1 = Data.getULEB128(Offset);
+      break;
+    case dwarf::DW_LLE_start_length:
+      E.Value0 = Data.getAddress(Offset);
+      E.Value1 = Data.getULEB128(Offset);
+      break;
+    case dwarf::DW_LLE_offset_pair:
+      E.Value0 = Data.getULEB128(Offset);
+      E.Value1 = Data.getULEB128(Offset);
+      break;
+    case dwarf::DW_LLE_base_address:
+      E.Value0 = Data.getAddress(Offset);
+      break;
+    default:
       WithColor::error() << "dumping support for LLE of kind " << (int)Kind
                          << " not implemented\n";
       return None;
     }
 
-    Entry E;
-    E.Start = Data.getULEB128(Offset);
-    E.Length = Data.getU32(Offset);
-
-    unsigned Bytes = Data.getU16(Offset);
-    // A single location description describing the location of the object...
-    StringRef str = Data.getData().substr(*Offset, Bytes);
-    *Offset += Bytes;
-    E.Loc.resize(str.size());
-    std::copy(str.begin(), str.end(), E.Loc.begin());
+    if (Kind != dwarf::DW_LLE_base_address) {
+      unsigned Bytes = Data.getU16(Offset);
+      // A single location description describing the location of the object...
+      StringRef str = Data.getData().substr(*Offset, Bytes);
+      *Offset += Bytes;
+      E.Loc.resize(str.size());
+      llvm::copy(str, E.Loc.begin());
+    }
 
     LL.Entries.push_back(std::move(E));
   }
   return LL;
 }
 
-void DWARFDebugLocDWO::parse(DataExtractor data) {
+void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) {
   IsLittleEndian = data.isLittleEndian();
   AddressSize = data.getAddressSize();
 
   uint32_t Offset = 0;
   while (data.isValidOffset(Offset)) {
-    if (auto LL = parseOneLocationList(data, &Offset))
+    if (auto LL = parseOneLocationList(data, &Offset, Version))
       Locations.push_back(std::move(*LL));
     else
       return;
   }
 }
 
-DWARFDebugLocDWO::LocationList const *
-DWARFDebugLocDWO::getLocationListAtOffset(uint64_t Offset) const {
+DWARFDebugLoclists::LocationList const *
+DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const {
   auto It = std::lower_bound(
       Locations.begin(), Locations.end(), Offset,
       [](const LocationList &L, uint64_t Offset) { return L.Offset < Offset; });
@@ -197,23 +220,49 @@ DWARFDebugLocDWO::getLocationListAtOffset(uint64_t Offset) const {
   return nullptr;
 }
 
-void DWARFDebugLocDWO::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
-                                          unsigned AddressSize,
-                                          const MCRegisterInfo *MRI,
-                                          unsigned Indent) const {
+void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
+                                            bool IsLittleEndian,
+                                            unsigned AddressSize,
+                                            const MCRegisterInfo *MRI,
+                                            unsigned Indent) const {
   for (const Entry &E : Entries) {
-    OS << '\n';
-    OS.indent(Indent);
-    OS << "Addr idx " << E.Start << " (w/ length " << E.Length << "): ";
+    switch (E.Kind) {
+    case dwarf::DW_LLE_startx_length:
+      OS << '\n';
+      OS.indent(Indent);
+      OS << "Addr idx " << E.Value0 << " (w/ length " << E.Value1 << "): ";
+      break;
+    case dwarf::DW_LLE_start_length:
+      OS << '\n';
+      OS.indent(Indent);
+      OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2,
+                   AddressSize * 2, E.Value0, AddressSize * 2, AddressSize * 2,
+                   E.Value0 + E.Value1);
+      break;
+    case dwarf::DW_LLE_offset_pair:
+      OS << '\n';
+      OS.indent(Indent);
+      OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2,
+                   AddressSize * 2, BaseAddr + E.Value0, AddressSize * 2,
+                   AddressSize * 2, BaseAddr + E.Value1);
+      break;
+    case dwarf::DW_LLE_base_address:
+      BaseAddr = E.Value0;
+      break;
+    default:
+      llvm_unreachable("unreachable locations list kind");
+    }
+
     dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI);
   }
 }
 
-void DWARFDebugLocDWO::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
-                            Optional<uint64_t> Offset) const {
+void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
+                              const MCRegisterInfo *MRI,
+                              Optional<uint64_t> Offset) const {
   auto DumpLocationList = [&](const LocationList &L) {
     OS << format("0x%8.8x: ", L.Offset);
-    L.dump(OS, IsLittleEndian, AddressSize, MRI, /*Indent=*/12);
+    L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, /*Indent=*/12);
     OS << "\n\n";
   };
 
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index de8b6e543fab..abd1ad59a9c1 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/DataExtractor.h"
@@ -18,10 +19,11 @@
 using namespace llvm;
 using namespace dwarf;
 
-DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
-                                       bool GnuStyle)
+DWARFDebugPubTable::DWARFDebugPubTable(const DWARFObject &Obj,
+                                       const DWARFSection &Sec,
+                                       bool LittleEndian, bool GnuStyle)
     : GnuStyle(GnuStyle) {
-  DataExtractor PubNames(Data, LittleEndian, 0);
+  DWARFDataExtractor PubNames(Obj, Sec, LittleEndian, 0);
   uint32_t Offset = 0;
   while (PubNames.isValidOffset(Offset)) {
     Sets.push_back({});
@@ -29,10 +31,10 @@ DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
 
     SetData.Length = PubNames.getU32(&Offset);
     SetData.Version = PubNames.getU16(&Offset);
-    SetData.Offset = PubNames.getU32(&Offset);
+    SetData.Offset = PubNames.getRelocatedValue(4, &Offset);
     SetData.Size = PubNames.getU32(&Offset);
 
-    while (Offset < Data.size()) {
+    while (Offset < Sec.Data.size()) {
       uint32_t DieRef = PubNames.getU32(&Offset);
       if (DieRef == 0)
         break;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index a565718debd0..dfb913000a46 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cinttypes>
@@ -16,15 +17,6 @@
 
 using namespace llvm;
 
-// FIXME: There are several versions of this. Consolidate them.
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
-  std::string Buffer;
-  raw_string_ostream Stream(Buffer);
-  Stream << format(Fmt, Vals...);
-  return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-}
-
 void DWARFDebugRangeList::clear() {
   Offset = -1U;
   AddressSize = 0;
@@ -35,11 +27,13 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
                                    uint32_t *offset_ptr) {
   clear();
   if (!data.isValidOffset(*offset_ptr))
-    return createError("invalid range list offset 0x%" PRIx32, *offset_ptr);
+    return createStringError(errc::invalid_argument,
+                       "invalid range list offset 0x%" PRIx32, *offset_ptr);
 
   AddressSize = data.getAddressSize();
   if (AddressSize != 4 && AddressSize != 8)
-    return createError("invalid address size: %d", AddressSize);
+    return createStringError(errc::invalid_argument,
+                       "invalid address size: %" PRIu8, AddressSize);
   Offset = *offset_ptr;
   while (true) {
     RangeListEntry Entry;
@@ -53,7 +47,8 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
     // Check that both values were extracted correctly.
     if (*offset_ptr != prev_offset + 2 * AddressSize) {
       clear();
-      return createError("invalid range list entry at offset 0x%" PRIx32,
+      return createStringError(errc::invalid_argument,
+                         "invalid range list entry at offset 0x%" PRIx32,
                          prev_offset);
     }
     if (Entry.isEndOfListEntry())
@@ -74,7 +69,7 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const {
 }
 
 DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
-    llvm::Optional<BaseAddress> BaseAddr) const {
+    llvm::Optional<SectionedAddress> BaseAddr) const {
   DWARFAddressRangesVector Res;
   for (const RangeListEntry &RLE : Entries) {
     if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index b19c808a8fb3..60c6eb30857f 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -10,28 +10,13 @@
 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
-  std::string Buffer;
-  raw_string_ostream Stream(Buffer);
-  Stream << format(Fmt, Vals...);
-  return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-}
-
-namespace llvm {   // workaround for gcc bug
-template <>
-Error DWARFListType<RangeListEntry>::createError(const char *Fmt, const char *s,
-                                                 uint32_t Val) {
-  return ::createError(Fmt, s, Val);
-}
-}
-
 Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
                               uint32_t *OffsetPtr) {
   Offset = *OffsetPtr;
@@ -47,31 +32,49 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
     Value0 = Value1 = 0;
     break;
   // TODO: Support other encodings.
-  case dwarf::DW_RLE_base_addressx:
-    return createError("unsupported rnglists encoding DW_RLE_base_addressx "
-                       "at offset 0x%" PRIx32,
-                       *OffsetPtr - 1);
+  case dwarf::DW_RLE_base_addressx: {
+    uint32_t PreviousOffset = *OffsetPtr - 1;
+    Value0 = Data.getULEB128(OffsetPtr);
+    if (End < *OffsetPtr)
+      return createStringError(
+          errc::invalid_argument,
+          "read past end of table when reading "
+          "DW_RLE_base_addressx encoding at offset 0x%" PRIx32,
+          PreviousOffset);
+    break;
+  }
   case dwarf::DW_RLE_startx_endx:
-    return createError("unsupported rnglists encoding DW_RLE_startx_endx at "
+    return createStringError(errc::not_supported,
+                       "unsupported rnglists encoding DW_RLE_startx_endx at "
                        "offset 0x%" PRIx32,
                        *OffsetPtr - 1);
-  case dwarf::DW_RLE_startx_length:
-    return createError("unsupported rnglists encoding DW_RLE_startx_length "
-                       "at offset 0x%" PRIx32,
-                       *OffsetPtr - 1);
+  case dwarf::DW_RLE_startx_length: {
+    uint32_t PreviousOffset = *OffsetPtr - 1;
+    Value0 = Data.getULEB128(OffsetPtr);
+    Value1 = Data.getULEB128(OffsetPtr);
+    if (End < *OffsetPtr)
+      return createStringError(
+          errc::invalid_argument,
+          "read past end of table when reading "
+          "DW_RLE_startx_length encoding at offset 0x%" PRIx32,
+          PreviousOffset);
+    break;
+  }
   case dwarf::DW_RLE_offset_pair: {
     uint32_t PreviousOffset = *OffsetPtr - 1;
     Value0 = Data.getULEB128(OffsetPtr);
     Value1 = Data.getULEB128(OffsetPtr);
     if (End < *OffsetPtr)
-      return createError("read past end of table when reading "
+      return createStringError(errc::invalid_argument,
+                         "read past end of table when reading "
                          "DW_RLE_offset_pair encoding at offset 0x%" PRIx32,
                          PreviousOffset);
     break;
   }
   case dwarf::DW_RLE_base_address: {
     if ((End - *OffsetPtr) < Data.getAddressSize())
-      return createError("insufficient space remaining in table for "
+      return createStringError(errc::invalid_argument,
+                         "insufficient space remaining in table for "
                          "DW_RLE_base_address encoding at offset 0x%" PRIx32,
                          *OffsetPtr - 1);
     Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
@@ -79,7 +82,8 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
   }
   case dwarf::DW_RLE_start_end: {
     if ((End - *OffsetPtr) < unsigned(Data.getAddressSize() * 2))
-      return createError("insufficient space remaining in table for "
+      return createStringError(errc::invalid_argument,
+                         "insufficient space remaining in table for "
                          "DW_RLE_start_end encoding "
                          "at offset 0x%" PRIx32,
                          *OffsetPtr - 1);
@@ -92,13 +96,15 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
     Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex);
     Value1 = Data.getULEB128(OffsetPtr);
     if (End < *OffsetPtr)
-      return createError("read past end of table when reading "
+      return createStringError(errc::invalid_argument,
+                         "read past end of table when reading "
                          "DW_RLE_start_length encoding at offset 0x%" PRIx32,
                          PreviousOffset);
     break;
   }
   default:
-    return createError("unknown rnglists encoding 0x%" PRIx32
+    return createStringError(errc::not_supported,
+                       "unknown rnglists encoding 0x%" PRIx32
                        " at offset 0x%" PRIx32,
                        uint32_t(Encoding), *OffsetPtr - 1);
   }
@@ -107,12 +113,19 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
   return Error::success();
 }
 
-DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
-    llvm::Optional<BaseAddress> BaseAddr) const {
+DWARFAddressRangesVector
+DWARFDebugRnglist::getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
+                                     DWARFUnit &U) const {
   DWARFAddressRangesVector Res;
   for (const RangeListEntry &RLE : Entries) {
     if (RLE.EntryKind == dwarf::DW_RLE_end_of_list)
       break;
+    if (RLE.EntryKind == dwarf::DW_RLE_base_addressx) {
+      BaseAddr = U.getAddrOffsetSectionItem(RLE.Value0);
+      if (!BaseAddr)
+        BaseAddr = {RLE.Value0, -1ULL};
+      continue;
+    }
     if (RLE.EntryKind == dwarf::DW_RLE_base_address) {
       BaseAddr = {RLE.Value0, RLE.SectionIndex};
       continue;
@@ -140,6 +153,15 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
       E.LowPC = RLE.Value0;
       E.HighPC = E.LowPC + RLE.Value1;
       break;
+    case dwarf::DW_RLE_startx_length: {
+      auto Start = U.getAddrOffsetSectionItem(RLE.Value0);
+      if (!Start)
+        Start = {0, -1ULL};
+      E.SectionIndex = Start->SectionIndex;
+      E.LowPC = Start->Address;
+      E.HighPC = E.LowPC + RLE.Value1;
+      break;
+    }
     default:
       // Unsupported encodings should have been reported during extraction,
       // so we should not run into any here.
@@ -150,9 +172,11 @@ DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
   return Res;
 }
 
-void RangeListEntry::dump(raw_ostream &OS, uint8_t AddrSize,
-                          uint8_t MaxEncodingStringLength,
-                          uint64_t &CurrentBase, DIDumpOptions DumpOpts) const {
+void RangeListEntry::dump(
+    raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
+    uint64_t &CurrentBase, DIDumpOptions DumpOpts,
+    llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+        LookupPooledAddress) const {
   auto PrintRawEntry = [](raw_ostream &OS, const RangeListEntry &Entry,
                           uint8_t AddrSize, DIDumpOptions DumpOpts) {
     if (DumpOpts.Verbose) {
@@ -179,6 +203,17 @@ void RangeListEntry::dump(raw_ostream &OS, uint8_t AddrSize,
   case dwarf::DW_RLE_end_of_list:
     OS << (DumpOpts.Verbose ? "" : "<End of list>");
     break;
+    //  case dwarf::DW_RLE_base_addressx:
+  case dwarf::DW_RLE_base_addressx: {
+    if (auto SA = LookupPooledAddress(Value0))
+      CurrentBase = SA->Address;
+    else
+      CurrentBase = Value0;
+    if (!DumpOpts.Verbose)
+      return;
+    OS << format(" 0x%*.*" PRIx64, AddrSize * 2, AddrSize * 2, Value0);
+    break;
+  }
   case dwarf::DW_RLE_base_address:
     // In non-verbose mode we do not print anything for this entry.
     CurrentBase = Value0;
@@ -198,6 +233,14 @@ void RangeListEntry::dump(raw_ostream &OS, uint8_t AddrSize,
   case dwarf::DW_RLE_start_end:
     DWARFAddressRange(Value0, Value1).dump(OS, AddrSize, DumpOpts);
     break;
+  case dwarf::DW_RLE_startx_length: {
+    PrintRawEntry(OS, *this, AddrSize, DumpOpts);
+    uint64_t Start = 0;
+    if (auto SA = LookupPooledAddress(Value0))
+      Start = SA->Address;
+    DWARFAddressRange(Start, Start + Value1).dump(OS, AddrSize, DumpOpts);
+    break;
+  } break;
   default:
     llvm_unreachable("Unsupported range list encoding");
   }
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 904ceab7b286..81ef0c8c7aec 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -59,25 +59,19 @@ static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS,
                        const DWARFAddressRangesVector &Ranges,
                        unsigned AddressSize, unsigned Indent,
                        const DIDumpOptions &DumpOpts) {
+  if (!DumpOpts.ShowAddresses)
+    return;
+
   ArrayRef<SectionName> SectionNames;
   if (DumpOpts.Verbose)
     SectionNames = Obj.getSectionNames();
 
   for (const DWARFAddressRange &R : Ranges) {
-
     OS << '\n';
     OS.indent(Indent);
     R.dump(OS, AddressSize);
 
-    if (SectionNames.empty() || R.SectionIndex == -1ULL)
-      continue;
-
-    StringRef Name = SectionNames[R.SectionIndex].Name;
-    OS << " \"" << Name << '\"';
-
-    // Print section index if name is not unique.
-    if (!SectionNames[R.SectionIndex].IsNameUnique)
-      OS << format(" [%" PRIu64 "]", R.SectionIndex);
+    DWARFFormValue::dumpAddressSection(Obj, OS, DumpOpts, R.SectionIndex);
   }
 }
 
@@ -99,27 +93,45 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
 
   FormValue.dump(OS, DumpOpts);
   if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) {
-    const DWARFSection &LocSection = Obj.getLocSection();
-    const DWARFSection &LocDWOSection = Obj.getLocDWOSection();
     uint32_t Offset = *FormValue.getAsSectionOffset();
-    if (!LocSection.Data.empty()) {
+    if (!U->isDWOUnit() && !U->getLocSection()->Data.empty()) {
       DWARFDebugLoc DebugLoc;
-      DWARFDataExtractor Data(Obj, LocSection, Ctx.isLittleEndian(),
+      DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(),
                               Obj.getAddressSize());
       auto LL = DebugLoc.parseOneLocationList(Data, &Offset);
       if (LL) {
         uint64_t BaseAddr = 0;
-        if (Optional<BaseAddress> BA = U->getBaseAddress())
+        if (Optional<SectionedAddress> BA = U->getBaseAddress())
           BaseAddr = BA->Address;
         LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, BaseAddr,
                  Indent);
       } else
         OS << "error extracting location list.";
-    } else if (!LocDWOSection.Data.empty()) {
-      DataExtractor Data(LocDWOSection.Data, Ctx.isLittleEndian(), 0);
-      auto LL = DWARFDebugLocDWO::parseOneLocationList(Data, &Offset);
+      return;
+    }
+
+    bool UseLocLists = !U->isDWOUnit();
+    StringRef LoclistsSectionData =
+        UseLocLists ? Obj.getLoclistsSection().Data : U->getLocSectionData();
+
+    if (!LoclistsSectionData.empty()) {
+      DataExtractor Data(LoclistsSectionData, Ctx.isLittleEndian(),
+                         Obj.getAddressSize());
+
+      // Old-style location list were used in DWARF v4 (.debug_loc.dwo section).
+      // Modern locations list (.debug_loclists) are used starting from v5.
+      // Ideally we should take the version from the .debug_loclists section
+      // header, but using CU's version for simplicity.
+      auto LL = DWARFDebugLoclists::parseOneLocationList(
+          Data, &Offset, UseLocLists ? U->getVersion() : 4);
+
+      uint64_t BaseAddr = 0;
+      if (Optional<SectionedAddress> BA = U->getBaseAddress())
+        BaseAddr = BA->Address;
+
       if (LL)
-        LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, Indent);
+        LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI,
+                 Indent);
       else
         OS << "error extracting location list.";
     }
@@ -134,10 +146,54 @@ static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) {
   OS << TagStr.substr(7, TagStr.size() - 12) << " ";
 }
 
-/// Recursively dump the DIE type name when applicable.
-static void dumpTypeName(raw_ostream &OS, const DWARFDie &Die) {
-  DWARFDie D = Die.getAttributeValueAsReferencedDie(DW_AT_type);
+static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
+  Optional<uint64_t> Bound;
+  for (const DWARFDie &C : D.children())
+    if (C.getTag() == DW_TAG_subrange_type) {
+      Optional<uint64_t> LB;
+      Optional<uint64_t> Count;
+      Optional<uint64_t> UB;
+      Optional<unsigned> DefaultLB;
+      if (Optional<DWARFFormValue> L = C.find(DW_AT_lower_bound))
+        LB = L->getAsUnsignedConstant();
+      if (Optional<DWARFFormValue> CountV = C.find(DW_AT_count))
+        Count = CountV->getAsUnsignedConstant();
+      if (Optional<DWARFFormValue> UpperV = C.find(DW_AT_upper_bound))
+        UB = UpperV->getAsUnsignedConstant();
+      if (Optional<DWARFFormValue> LV =
+              D.getDwarfUnit()->getUnitDIE().find(DW_AT_language))
+        if (Optional<uint64_t> LC = LV->getAsUnsignedConstant())
+          if ((DefaultLB =
+                   LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LC))))
+            if (LB && *LB == *DefaultLB)
+              LB = None;
+      if (!LB && !Count && !UB)
+        OS << "[]";
+      else if (!LB && (Count || UB) && DefaultLB)
+        OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']';
+      else {
+        OS << "[[";
+        if (LB)
+          OS << *LB;
+        else
+          OS << '?';
+        OS << ", ";
+        if (Count)
+          if (LB)
+            OS << *LB + *Count;
+          else
+            OS << "? + " << *Count;
+        else if (UB)
+          OS << *UB + 1;
+        else
+          OS << '?';
+        OS << ")]";
+      }
+    }
+}
 
+/// Recursively dump the DIE type name when applicable.
+static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
   if (!D.isValid())
     return;
 
@@ -155,22 +211,46 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &Die) {
   case DW_TAG_ptr_to_member_type:
   case DW_TAG_reference_type:
   case DW_TAG_rvalue_reference_type:
+  case DW_TAG_subroutine_type:
     break;
   default:
     dumpTypeTagName(OS, T);
   }
 
   // Follow the DW_AT_type if possible.
-  dumpTypeName(OS, D);
+  DWARFDie TypeDie = D.getAttributeValueAsReferencedDie(DW_AT_type);
+  dumpTypeName(OS, TypeDie);
 
   switch (T) {
-  case DW_TAG_array_type:
-    OS << "[]";
+  case DW_TAG_subroutine_type: {
+    if (!TypeDie)
+      OS << "void";
+    OS << '(';
+    bool First = true;
+    for (const DWARFDie &C : D.children()) {
+      if (C.getTag() == DW_TAG_formal_parameter) {
+        if (!First)
+          OS << ", ";
+        First = false;
+        dumpTypeName(OS, C.getAttributeValueAsReferencedDie(DW_AT_type));
+      }
+    }
+    OS << ')';
+    break;
+  }
+  case DW_TAG_array_type: {
+    dumpArrayType(OS, D);
     break;
+  }
   case DW_TAG_pointer_type:
     OS << '*';
     break;
   case DW_TAG_ptr_to_member_type:
+    if (DWARFDie Cont =
+            D.getAttributeValueAsReferencedDie(DW_AT_containing_type)) {
+      dumpTypeName(OS << ' ', Cont);
+      OS << "::";
+    }
     OS << '*';
     break;
   case DW_TAG_reference_type:
@@ -244,16 +324,19 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
   else
     formValue.dump(OS, DumpOpts);
 
+  std::string Space = DumpOpts.ShowAddresses ? " " : "";
+
   // We have dumped the attribute raw value. For some attributes
   // having both the raw value and the pretty-printed value is
   // interesting. These attributes are handled below.
   if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) {
-    if (const char *Name = Die.getAttributeValueAsReferencedDie(Attr).getName(
-            DINameKind::LinkageName))
-      OS << " \"" << Name << '\"';
+    if (const char *Name =
+            Die.getAttributeValueAsReferencedDie(formValue).getName(
+                DINameKind::LinkageName))
+      OS << Space << "\"" << Name << '\"';
   } else if (Attr == DW_AT_type) {
-    OS << " \"";
-    dumpTypeName(OS, Die);
+    OS << Space << "\"";
+    dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(formValue));
     OS << '"';
   } else if (Attr == DW_AT_APPLE_property_attribute) {
     if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
@@ -262,10 +345,9 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
     const DWARFObject &Obj = Die.getDwarfUnit()->getContext().getDWARFObj();
     // For DW_FORM_rnglistx we need to dump the offset separately, since
     // we have only dumped the index so far.
-    Optional<DWARFFormValue> Value = Die.find(DW_AT_ranges);
-    if (Value && Value->getForm() == DW_FORM_rnglistx)
+    if (formValue.getForm() == DW_FORM_rnglistx)
       if (auto RangeListOffset =
-              U->getRnglistOffset(*Value->getAsSectionOffset())) {
+              U->getRnglistOffset(*formValue.getAsSectionOffset())) {
         DWARFFormValue FV(dwarf::DW_FORM_sec_offset);
         FV.setUValue(*RangeListOffset);
         FV.dump(OS, DumpOpts);
@@ -349,8 +431,15 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
 
 DWARFDie
 DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
-  if (auto SpecRef = toReference(find(Attr))) {
-    if (auto SpecUnit = U->getUnitSection().getUnitForOffset(*SpecRef))
+  if (Optional<DWARFFormValue> F = find(Attr))
+    return getAttributeValueAsReferencedDie(*F);
+  return DWARFDie();
+}
+
+DWARFDie
+DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const {
+  if (auto SpecRef = toReference(V)) {
+    if (auto SpecUnit = U->getUnitVector().getUnitForOffset(*SpecRef))
       return SpecUnit->getDIEForOffset(*SpecRef);
   }
   return DWARFDie();
@@ -377,13 +466,13 @@ Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
 bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC,
                                uint64_t &SectionIndex) const {
   auto F = find(DW_AT_low_pc);
-  auto LowPcAddr = toAddress(F);
+  auto LowPcAddr = toSectionedAddress(F);
   if (!LowPcAddr)
     return false;
-  if (auto HighPcAddr = getHighPC(*LowPcAddr)) {
-    LowPC = *LowPcAddr;
+  if (auto HighPcAddr = getHighPC(LowPcAddr->Address)) {
+    LowPC = LowPcAddr->Address;
     HighPC = *HighPcAddr;
-    SectionIndex = F->getSectionIndex();
+    SectionIndex = LowPcAddr->SectionIndex;
     return true;
   }
   return false;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index a9ea26c476ca..2df4456053fb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -94,6 +94,7 @@ static DescVector getDescriptions() {
       Desc(Op::Dwarf3, Op::SizeLEB, Op::SizeBlock);
   Descriptions[DW_OP_stack_value] = Desc(Op::Dwarf3);
   Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3);
+  Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
   Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
   Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB);
   return Descriptions;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 1aa43c6b6517..7719fea63120 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -308,6 +308,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
       break;
     case DW_FORM_GNU_addr_index:
     case DW_FORM_GNU_str_index:
+    case DW_FORM_addrx:
     case DW_FORM_strx:
       Value.uval = Data.getULEB128(OffsetPtr);
       break;
@@ -330,6 +331,29 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
   return true;
 }
 
+void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS,
+                                          DIDumpOptions DumpOpts,
+                                          SectionedAddress SA) const {
+  OS << format("0x%016" PRIx64, SA.Address);
+  dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts,
+                     SA.SectionIndex);
+}
+
+void DWARFFormValue::dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
+                                        DIDumpOptions DumpOpts,
+                                        uint64_t SectionIndex) {
+  if (!DumpOpts.Verbose || SectionIndex == -1ULL)
+    return;
+  ArrayRef<SectionName> SectionNames = Obj.getSectionNames();
+  const auto &SecRef = SectionNames[SectionIndex];
+
+  OS << " \"" << SecRef.Name << '\"';
+
+  // Print section index if name is not unique.
+  if (!SecRef.IsNameUnique)
+    OS << format(" [%" PRIu64 "]", SectionIndex);
+}
+
 void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
   uint64_t UValue = Value.uval;
   bool CURelativeOffset = false;
@@ -338,15 +362,21 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
                             : nulls();
   switch (Form) {
   case DW_FORM_addr:
-    AddrOS << format("0x%016" PRIx64, UValue);
+    dumpSectionedAddress(AddrOS, DumpOpts, {Value.uval, Value.SectionIndex});
     break;
+  case DW_FORM_addrx:
+  case DW_FORM_addrx1:
+  case DW_FORM_addrx2:
+  case DW_FORM_addrx3:
+  case DW_FORM_addrx4:
   case DW_FORM_GNU_addr_index: {
-    AddrOS << format(" indexed (%8.8x) address = ", (uint32_t)UValue);
-    uint64_t Address;
+    Optional<SectionedAddress> A = U->getAddrOffsetSectionItem(UValue);
+    if (!A || DumpOpts.Verbose)
+      AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue);
     if (U == nullptr)
       OS << "<invalid dwarf unit>";
-    else if (U->getAddrOffsetSectionItem(UValue, Address))
-      AddrOS << format("0x%016" PRIx64, Address);
+    else if (A)
+      dumpSectionedAddress(AddrOS, DumpOpts, *A);
     else
       OS << "<no .debug_addr section>";
     break;
@@ -387,16 +417,16 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
       switch (Form) {
       case DW_FORM_exprloc:
       case DW_FORM_block:
-        OS << format("<0x%" PRIx64 "> ", UValue);
+        AddrOS << format("<0x%" PRIx64 "> ", UValue);
         break;
       case DW_FORM_block1:
-        OS << format("<0x%2.2x> ", (uint8_t)UValue);
+        AddrOS << format("<0x%2.2x> ", (uint8_t)UValue);
         break;
       case DW_FORM_block2:
-        OS << format("<0x%4.4x> ", (uint16_t)UValue);
+        AddrOS << format("<0x%4.4x> ", (uint16_t)UValue);
         break;
       case DW_FORM_block4:
-        OS << format("<0x%8.8x> ", (uint32_t)UValue);
+        AddrOS << format("<0x%8.8x> ", (uint32_t)UValue);
         break;
       default:
         break;
@@ -407,7 +437,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
         // UValue contains size of block
         const uint8_t *EndDataPtr = DataPtr + UValue;
         while (DataPtr < EndDataPtr) {
-          OS << format("%2.2x ", *DataPtr);
+          AddrOS << format("%2.2x ", *DataPtr);
           ++DataPtr;
         }
       } else
@@ -438,7 +468,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
   case DW_FORM_strx4:
   case DW_FORM_GNU_str_index:
     if (DumpOpts.Verbose)
-      OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue);
+      OS << format("indexed (%8.8x) string = ", (uint32_t)UValue);
     dumpString(OS);
     break;
   case DW_FORM_GNU_strp_alt:
@@ -501,8 +531,9 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
   if (CURelativeOffset) {
     if (DumpOpts.Verbose)
       OS << " => {";
-    WithColor(OS, HighlightColor::Address).get()
-        << format("0x%8.8" PRIx64, UValue + (U ? U->getOffset() : 0));
+    if (DumpOpts.ShowAddresses)
+      WithColor(OS, HighlightColor::Address).get()
+          << format("0x%8.8" PRIx64, UValue + (U ? U->getOffset() : 0));
     if (DumpOpts.Verbose)
       OS << "}";
   }
@@ -536,10 +567,12 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
   if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx ||
       Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 ||
       Form == DW_FORM_strx4) {
-    uint64_t StrOffset;
-    if (!U || !U->getStringOffsetSectionItem(Offset, StrOffset))
+    if (!U)
+      return None;
+    Optional<uint64_t> StrOffset = U->getStringOffsetSectionItem(Offset);
+    if (!StrOffset)
       return None;
-    Offset = StrOffset;
+    Offset = *StrOffset;
   }
   // Prefer the Unit's string extractor, because for .dwo it will point to
   // .debug_str.dwo, while the Context's extractor always uses .debug_str.
@@ -554,16 +587,23 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
 }
 
 Optional<uint64_t> DWARFFormValue::getAsAddress() const {
+  if (auto SA = getAsSectionedAddress())
+    return SA->Address;
+  return None;
+}
+Optional<SectionedAddress> DWARFFormValue::getAsSectionedAddress() const {
   if (!isFormClass(FC_Address))
     return None;
-  if (Form == DW_FORM_GNU_addr_index) {
+  if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx) {
     uint32_t Index = Value.uval;
-    uint64_t Result;
-    if (!U || !U->getAddrOffsetSectionItem(Index, Result))
+    if (!U)
+      return None;
+    Optional<SectionedAddress> SA = U->getAddrOffsetSectionItem(Index);
+    if (!SA)
       return None;
-    return Result;
+    return SA;
   }
-  return Value.uval;
+  return {{Value.uval, Value.SectionIndex}};
 }
 
 Optional<uint64_t> DWARFFormValue::getAsReference() const {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index ebd6104ab878..1abd931e3b8b 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -11,6 +11,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -33,6 +34,16 @@ void DWARFGdbIndex::dumpCUList(raw_ostream &OS) const {
                  CU.Length);
 }
 
+void DWARFGdbIndex::dumpTUList(raw_ostream &OS) const {
+  OS << formatv("\n  Types CU list offset = {0:x}, has {1} entries:\n",
+                TuListOffset, TuList.size());
+  uint32_t I = 0;
+  for (const TypeUnitEntry &TU : TuList)
+    OS << formatv("    {0}: offset = {1:x8}, type_offset = {2:x8}, "
+                  "type_signature = {3:x16}\n",
+                  I++, TU.Offset, TU.TypeOffset, TU.TypeSignature);
+}
+
 void DWARFGdbIndex::dumpAddressArea(raw_ostream &OS) const {
   OS << format("\n  Address area offset = 0x%x, has %" PRId64 " entries:",
                AddressAreaOffset, (uint64_t)AddressArea.size())
@@ -94,6 +105,7 @@ void DWARFGdbIndex::dump(raw_ostream &OS) {
   if (HasContent) {
     OS << "  Version = " << Version << '\n';
     dumpCUList(OS);
+    dumpTUList(OS);
     dumpAddressArea(OS);
     dumpSymbolTable(OS);
     dumpConstantPool(OS);
@@ -127,9 +139,14 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
 
   // CU Types are no longer needed as DWARF skeleton type units never made it
   // into the standard.
-  uint32_t CuTypesListSize = (AddressAreaOffset - CuTypesOffset) / 24;
-  if (CuTypesListSize != 0)
-    return false;
+  uint32_t TuListSize = (AddressAreaOffset - CuTypesOffset) / 24;
+  TuList.resize(TuListSize);
+  for (uint32_t I = 0; I < TuListSize; ++I) {
+    uint64_t CuOffset = Data.getU64(&Offset);
+    uint64_t TypeOffset = Data.getU64(&Offset);
+    uint64_t Signature = Data.getU64(&Offset);
+    TuList[I] = {CuOffset, TypeOffset, Signature};
+  }
 
   uint32_t AddressAreaSize = (SymbolTableOffset - AddressAreaOffset) / 20;
   AddressArea.reserve(AddressAreaSize);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 559afc7559bd..462c036d73ad 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -9,42 +9,39 @@
 
 #include "llvm/DebugInfo/DWARF/DWARFListTable.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-template <typename... Ts>
-static Error createError(char const *Fmt, const Ts &... Vals) {
-  std::string Buffer;
-  raw_string_ostream Stream(Buffer);
-  Stream << format(Fmt, Vals...);
-  return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-}
-
 Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
                                     uint32_t *OffsetPtr) {
   HeaderOffset = *OffsetPtr;
   // Read and verify the length field.
   if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t)))
-    return createError("section is not large enough to contain a "
+    return createStringError(errc::invalid_argument,
+                       "section is not large enough to contain a "
                        "%s table length at offset 0x%" PRIx32,
                        SectionName.data(), *OffsetPtr);
   // TODO: Add support for DWARF64.
   HeaderData.Length = Data.getU32(OffsetPtr);
   if (HeaderData.Length == 0xffffffffu)
-    return createError("DWARF64 is not supported in %s at offset 0x%" PRIx32,
+    return createStringError(errc::not_supported,
+                       "DWARF64 is not supported in %s at offset 0x%" PRIx32,
                        SectionName.data(), HeaderOffset);
   Format = dwarf::DwarfFormat::DWARF32;
   if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header))
-    return createError("%s table at offset 0x%" PRIx32
+    return createStringError(errc::invalid_argument,
+                       "%s table at offset 0x%" PRIx32
                        " has too small length (0x%" PRIx32
                        ") to contain a complete header",
                        SectionName.data(), HeaderOffset, length());
   uint32_t End = HeaderOffset + length();
   if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset))
-    return createError("section is not large enough to contain a %s table "
+    return createStringError(errc::invalid_argument,
+                       "section is not large enough to contain a %s table "
                        "of length 0x%" PRIx32 " at offset 0x%" PRIx32,
                        SectionName.data(), length(), HeaderOffset);
 
@@ -55,20 +52,23 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
 
   // Perform basic validation of the remaining header fields.
   if (HeaderData.Version != 5)
-    return createError("unrecognised %s table version %" PRIu16
+    return createStringError(errc::invalid_argument,
+                       "unrecognised %s table version %" PRIu16
                        " in table at offset 0x%" PRIx32,
                        SectionName.data(), HeaderData.Version, HeaderOffset);
   if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
-    return createError("%s table at offset 0x%" PRIx32
-                       " has unsupported address size %hhu",
+    return createStringError(errc::not_supported,
+                       "%s table at offset 0x%" PRIx32
+                       " has unsupported address size %" PRIu8,
                        SectionName.data(), HeaderOffset, HeaderData.AddrSize);
   if (HeaderData.SegSize != 0)
-    return createError("%s table at offset 0x%" PRIx32
+    return createStringError(errc::not_supported,
+                       "%s table at offset 0x%" PRIx32
                        " has unsupported segment selector size %" PRIu8,
                        SectionName.data(), HeaderOffset, HeaderData.SegSize);
   if (End < HeaderOffset + sizeof(HeaderData) +
                 HeaderData.OffsetEntryCount * sizeof(uint32_t))
-    return createError(
+    return createStringError(errc::invalid_argument,
         "%s table at offset 0x%" PRIx32 " has more offset entries (%" PRIu32
         ") than there is space for",
         SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 3b408857d29f..80234665bdeb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -11,13 +11,16 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/WithColor.h"
 #include <algorithm>
@@ -31,34 +34,161 @@
 using namespace llvm;
 using namespace dwarf;
 
-void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
+void DWARFUnitVector::addUnitsForSection(DWARFContext &C,
+                                         const DWARFSection &Section,
+                                         DWARFSectionKind SectionKind) {
   const DWARFObject &D = C.getDWARFObj();
-  parseImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(),
-            D.getStringSection(), D.getStringOffsetSection(),
-            &D.getAddrSection(), D.getLineSection(), D.isLittleEndian(), false,
-            false);
+  addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(),
+               &D.getLocSection(), D.getStringSection(),
+               D.getStringOffsetSection(), &D.getAddrSection(),
+               D.getLineSection(), D.isLittleEndian(), false, false,
+               SectionKind);
 }
 
-void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
-                                    const DWARFSection &DWOSection, bool Lazy) {
+void DWARFUnitVector::addUnitsForDWOSection(DWARFContext &C,
+                                            const DWARFSection &DWOSection,
+                                            DWARFSectionKind SectionKind,
+                                            bool Lazy) {
   const DWARFObject &D = C.getDWARFObj();
-  parseImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(),
-            D.getStringDWOSection(), D.getStringOffsetDWOSection(),
-            &D.getAddrSection(), D.getLineDWOSection(), C.isLittleEndian(),
-            true, Lazy);
+  addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(),
+               &D.getLocDWOSection(), D.getStringDWOSection(),
+               D.getStringOffsetDWOSection(), &D.getAddrSection(),
+               D.getLineDWOSection(), C.isLittleEndian(), true, Lazy,
+               SectionKind);
+}
+
+void DWARFUnitVector::addUnitsImpl(
+    DWARFContext &Context, const DWARFObject &Obj, const DWARFSection &Section,
+    const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+    const DWARFSection *LocSection, StringRef SS, const DWARFSection &SOS,
+    const DWARFSection *AOS, const DWARFSection &LS, bool LE, bool IsDWO,
+    bool Lazy, DWARFSectionKind SectionKind) {
+  DWARFDataExtractor Data(Obj, Section, LE, 0);
+  // Lazy initialization of Parser, now that we have all section info.
+  if (!Parser) {
+    Parser = [=, &Context, &Obj, &Section, &SOS,
+              &LS](uint32_t Offset, DWARFSectionKind SectionKind,
+                   const DWARFSection *CurSection,
+                   const DWARFUnitIndex::Entry *IndexEntry)
+        -> std::unique_ptr<DWARFUnit> {
+      const DWARFSection &InfoSection = CurSection ? *CurSection : Section;
+      DWARFDataExtractor Data(Obj, InfoSection, LE, 0);
+      if (!Data.isValidOffset(Offset))
+        return nullptr;
+      const DWARFUnitIndex *Index = nullptr;
+      if (IsDWO)
+        Index = &getDWARFUnitIndex(Context, SectionKind);
+      DWARFUnitHeader Header;
+      if (!Header.extract(Context, Data, &Offset, SectionKind, Index,
+                          IndexEntry))
+        return nullptr;
+      std::unique_ptr<DWARFUnit> U;
+      if (Header.isTypeUnit())
+        U = llvm::make_unique<DWARFTypeUnit>(Context, InfoSection, Header, DA,
+                                             RS, LocSection, SS, SOS, AOS, LS,
+                                             LE, IsDWO, *this);
+      else
+        U = llvm::make_unique<DWARFCompileUnit>(Context, InfoSection, Header,
+                                                DA, RS, LocSection, SS, SOS,
+                                                AOS, LS, LE, IsDWO, *this);
+      return U;
+    };
+  }
+  if (Lazy)
+    return;
+  // Find a reasonable insertion point within the vector.  We skip over
+  // (a) units from a different section, (b) units from the same section
+  // but with lower offset-within-section.  This keeps units in order
+  // within a section, although not necessarily within the object file,
+  // even if we do lazy parsing.
+  auto I = this->begin();
+  uint32_t Offset = 0;
+  while (Data.isValidOffset(Offset)) {
+    if (I != this->end() &&
+        (&(*I)->getInfoSection() != &Section || (*I)->getOffset() == Offset)) {
+      ++I;
+      continue;
+    }
+    auto U = Parser(Offset, SectionKind, &Section, nullptr);
+    // If parsing failed, we're done with this section.
+    if (!U)
+      break;
+    Offset = U->getNextUnitOffset();
+    I = std::next(this->insert(I, std::move(U)));
+  }
+}
+
+DWARFUnit *DWARFUnitVector::addUnit(std::unique_ptr<DWARFUnit> Unit) {
+  auto I = std::upper_bound(begin(), end(), Unit,
+                            [](const std::unique_ptr<DWARFUnit> &LHS,
+                               const std::unique_ptr<DWARFUnit> &RHS) {
+                              return LHS->getOffset() < RHS->getOffset();
+                            });
+  return this->insert(I, std::move(Unit))->get();
+}
+
+DWARFUnit *DWARFUnitVector::getUnitForOffset(uint32_t Offset) const {
+  auto end = begin() + getNumInfoUnits();
+  auto *CU =
+      std::upper_bound(begin(), end, Offset,
+                       [](uint32_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
+                         return LHS < RHS->getNextUnitOffset();
+                       });
+  if (CU != end && (*CU)->getOffset() <= Offset)
+    return CU->get();
+  return nullptr;
+}
+
+DWARFUnit *
+DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
+  const auto *CUOff = E.getOffset(DW_SECT_INFO);
+  if (!CUOff)
+    return nullptr;
+
+  auto Offset = CUOff->Offset;
+  auto end = begin() + getNumInfoUnits();
+
+  auto *CU =
+      std::upper_bound(begin(), end, CUOff->Offset,
+                       [](uint32_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
+                         return LHS < RHS->getNextUnitOffset();
+                       });
+  if (CU != end && (*CU)->getOffset() <= Offset)
+    return CU->get();
+
+  if (!Parser)
+    return nullptr;
+
+  auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+  if (!U)
+    U = nullptr;
+
+  auto *NewCU = U.get();
+  this->insert(CU, std::move(U));
+  ++NumInfoUnits;
+  return NewCU;
 }
 
 DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
-                     const DWARFUnitHeader &Header,
-                     const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+                     const DWARFUnitHeader &Header, const DWARFDebugAbbrev *DA,
+                     const DWARFSection *RS, const DWARFSection *LocSection,
                      StringRef SS, const DWARFSection &SOS,
                      const DWARFSection *AOS, const DWARFSection &LS, bool LE,
-                     bool IsDWO, const DWARFUnitSectionBase &UnitSection)
+                     bool IsDWO, const DWARFUnitVector &UnitVector)
     : Context(DC), InfoSection(Section), Header(Header), Abbrev(DA),
-      RangeSection(RS), LineSection(LS), StringSection(SS),
-      StringOffsetSection(SOS),  AddrOffsetSection(AOS), isLittleEndian(LE),
-      isDWO(IsDWO), UnitSection(UnitSection) {
+      RangeSection(RS), LocSection(LocSection), LineSection(LS),
+      StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
+      isLittleEndian(LE), IsDWO(IsDWO), UnitVector(UnitVector) {
   clear();
+  // For split DWARF we only need to keep track of the location list section's
+  // data (no relocations), and if we are reading a package file, we need to
+  // adjust the location list data based on the index entries.
+  if (IsDWO) {
+    LocSectionData = LocSection->Data;
+    if (auto *IndexEntry = Header.getIndexEntry())
+      if (const auto *C = IndexEntry->getOffset(DW_SECT_LOC))
+        LocSectionData = LocSectionData.substr(C->Offset, C->Length);
+  }
 }
 
 DWARFUnit::~DWARFUnit() = default;
@@ -68,38 +198,50 @@ DWARFDataExtractor DWARFUnit::getDebugInfoExtractor() const {
                             getAddressByteSize());
 }
 
-bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
-                                                uint64_t &Result) const {
+Optional<SectionedAddress>
+DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
+  if (IsDWO) {
+    auto R = Context.info_section_units();
+    auto I = R.begin();
+    // Surprising if a DWO file has more than one skeleton unit in it - this
+    // probably shouldn't be valid, but if a use case is found, here's where to
+    // support it (probably have to linearly search for the matching skeleton CU
+    // here)
+    if (I != R.end() && std::next(I) == R.end())
+      return (*I)->getAddrOffsetSectionItem(Index);
+  }
   uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize();
   if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize())
-    return false;
+    return None;
   DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection,
                         isLittleEndian, getAddressByteSize());
-  Result = DA.getRelocatedAddress(&Offset);
-  return true;
+  uint64_t Section;
+  uint64_t Address = DA.getRelocatedAddress(&Offset, &Section);
+  return {{Address, Section}};
 }
 
-bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
-                                           uint64_t &Result) const {
+Optional<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const {
   if (!StringOffsetsTableContribution)
-    return false;
+    return None;
   unsigned ItemSize = getDwarfStringOffsetsByteSize();
   uint32_t Offset = getStringOffsetsBase() + Index * ItemSize;
   if (StringOffsetSection.Data.size() < Offset + ItemSize)
-    return false;
+    return None;
   DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
                         isLittleEndian, 0);
-  Result = DA.getRelocatedValue(ItemSize, &Offset);
-  return true;
+  return DA.getRelocatedValue(ItemSize, &Offset);
 }
 
 bool DWARFUnitHeader::extract(DWARFContext &Context,
                               const DWARFDataExtractor &debug_info,
                               uint32_t *offset_ptr,
                               DWARFSectionKind SectionKind,
-                              const DWARFUnitIndex *Index) {
+                              const DWARFUnitIndex *Index,
+                              const DWARFUnitIndex::Entry *Entry) {
   Offset = *offset_ptr;
-  IndexEntry = Index ? Index->getFromOffset(*offset_ptr) : nullptr;
+  IndexEntry = Entry;
+  if (!IndexEntry && Index)
+    IndexEntry = Index->getFromOffset(*offset_ptr);
   Length = debug_info.getU32(offset_ptr);
   // FIXME: Support DWARF64.
   unsigned SizeOfLength = 4;
@@ -166,13 +308,10 @@ parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   // We are expected to be called with Offset 0 or pointing just past the table
   // header, which is 12 bytes long for DWARF32.
   if (Offset > 0) {
-    if (Offset < 12U) {
-      std::string Buffer;
-      raw_string_ostream Stream(Buffer);
-      Stream << format(
-          "Did not detect a valid range list table with base = 0x%x", Offset);
-      return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
-    }
+    if (Offset < 12U)
+      return createStringError(errc::invalid_argument, "Did not detect a valid"
+                               " range list table with base = 0x%" PRIu32,
+                               Offset);
     Offset -= 12U;
   }
   llvm::DWARFDebugRnglistTable Table;
@@ -274,11 +413,13 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
     DWARFDie UnitDie = getUnitDIE();
     if (Optional<uint64_t> DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
       Header.setDWOId(*DWOId);
-    if (!isDWO) {
+    if (!IsDWO) {
       assert(AddrOffsetSectionBase == 0);
       assert(RangeSectionBase == 0);
-      AddrOffsetSectionBase =
-          toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
+      AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0);
+      if (!AddrOffsetSectionBase)
+        AddrOffsetSectionBase =
+            toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
       RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
     }
 
@@ -289,27 +430,19 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
     // offsets table starting at offset 0 of the debug_str_offsets.dwo section.
     // In both cases we need to determine the format of the contribution,
     // which may differ from the unit's format.
-    uint64_t StringOffsetsContributionBase =
-        isDWO ? 0 : toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0);
-    auto IndexEntry = Header.getIndexEntry();
-    if (IndexEntry)
-      if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
-        StringOffsetsContributionBase += C->Offset;
-
     DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
                           isLittleEndian, 0);
-    if (isDWO)
+    if (IsDWO)
       StringOffsetsTableContribution =
-          determineStringOffsetsTableContributionDWO(
-              DA, StringOffsetsContributionBase);
+          determineStringOffsetsTableContributionDWO(DA);
     else if (getVersion() >= 5)
-      StringOffsetsTableContribution = determineStringOffsetsTableContribution(
-          DA, StringOffsetsContributionBase);
+      StringOffsetsTableContribution =
+          determineStringOffsetsTableContribution(DA);
 
     // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
     // describe address ranges.
     if (getVersion() >= 5) {
-      if (isDWO)
+      if (IsDWO)
         setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0);
       else
         setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
@@ -329,20 +462,20 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
 
         // In a split dwarf unit, there is no DW_AT_rnglists_base attribute.
         // Adjust RangeSectionBase to point past the table header.
-        if (isDWO && RngListTable)
+        if (IsDWO && RngListTable)
           RangeSectionBase = RngListTable->getHeaderSize();
       }
     }
 
     // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
     // skeleton CU DIE, so that DWARF users not aware of it are not broken.
-  }
+    }
 
   return DieArray.size();
 }
 
 bool DWARFUnit::parseDWO() {
-  if (isDWO)
+  if (IsDWO)
     return false;
   if (DWO.get())
     return false;
@@ -412,12 +545,12 @@ DWARFUnit::findRnglistFromOffset(uint32_t Offset) {
                                   isLittleEndian, RngListTable->getAddrSize());
     auto RangeListOrError = RngListTable->findList(RangesData, Offset);
     if (RangeListOrError)
-      return RangeListOrError.get().getAbsoluteRanges(getBaseAddress());
+      return RangeListOrError.get().getAbsoluteRanges(getBaseAddress(), *this);
     return RangeListOrError.takeError();
   }
 
-  return make_error<StringError>("missing or invalid range list table",
-                                 inconvertibleErrorCode());
+  return createStringError(errc::invalid_argument,
+                           "missing or invalid range list table");
 }
 
 Expected<DWARFAddressRangesVector>
@@ -425,51 +558,26 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) {
   if (auto Offset = getRnglistOffset(Index))
     return findRnglistFromOffset(*Offset + RangeSectionBase);
 
-  std::string Buffer;
-  raw_string_ostream Stream(Buffer);
   if (RngListTable)
-    Stream << format("invalid range list table index %d", Index);
+    return createStringError(errc::invalid_argument,
+                             "invalid range list table index %d", Index);
   else
-    Stream << "missing or invalid range list table";
-  return make_error<StringError>(Stream.str(), inconvertibleErrorCode());
+    return createStringError(errc::invalid_argument,
+                             "missing or invalid range list table");
 }
 
-void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
+Expected<DWARFAddressRangesVector> DWARFUnit::collectAddressRanges() {
   DWARFDie UnitDie = getUnitDIE();
   if (!UnitDie)
-    return;
+    return createStringError(errc::invalid_argument, "No unit DIE");
+
   // First, check if unit DIE describes address ranges for the whole unit.
   auto CUDIERangesOrError = UnitDie.getAddressRanges();
-  if (CUDIERangesOrError) {
-    if (!CUDIERangesOrError.get().empty()) {
-      CURanges.insert(CURanges.end(), CUDIERangesOrError.get().begin(),
-                      CUDIERangesOrError.get().end());
-      return;
-    }
-  } else
-    WithColor::error() << "decoding address ranges: "
-                       << toString(CUDIERangesOrError.takeError()) << '\n';
-
-  // This function is usually called if there in no .debug_aranges section
-  // in order to produce a compile unit level set of address ranges that
-  // is accurate. If the DIEs weren't parsed, then we don't want all dies for
-  // all compile units to stay loaded when they weren't needed. So we can end
-  // up parsing the DWARF and then throwing them all away to keep memory usage
-  // down.
-  const bool ClearDIEs = extractDIEsIfNeeded(false) > 1;
-  getUnitDIE().collectChildrenAddressRanges(CURanges);
-
-  // Collect address ranges from DIEs in .dwo if necessary.
-  bool DWOCreated = parseDWO();
-  if (DWO)
-    DWO->collectAddressRanges(CURanges);
-  if (DWOCreated)
-    DWO.reset();
-
-  // Keep memory down by clearing DIEs if this generate function
-  // caused them to be parsed.
-  if (ClearDIEs)
-    clearDIEs(true);
+  if (!CUDIERangesOrError)
+    return createStringError(errc::invalid_argument,
+                             "decoding address ranges: %s",
+                             toString(CUDIERangesOrError.takeError()).c_str());
+  return *CUDIERangesOrError;
 }
 
 void DWARFUnit::updateAddressDieMap(DWARFDie Die) {
@@ -637,15 +745,13 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
   return Abbrevs;
 }
 
-llvm::Optional<BaseAddress> DWARFUnit::getBaseAddress() {
+llvm::Optional<SectionedAddress> DWARFUnit::getBaseAddress() {
   if (BaseAddr)
     return BaseAddr;
 
   DWARFDie UnitDie = getUnitDIE();
   Optional<DWARFFormValue> PC = UnitDie.find({DW_AT_low_pc, DW_AT_entry_pc});
-  if (Optional<uint64_t> Addr = toAddress(PC))
-    BaseAddr = {*Addr, PC->getSectionIndex()};
-
+  BaseAddr = toSectionedAddress(PC);
   return BaseAddr;
 }
 
@@ -660,7 +766,7 @@ StrOffsetsContributionDescriptor::validateContributionSize(
   if (ValidationSize >= Size)
     if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize))
       return *this;
-  return Optional<StrOffsetsContributionDescriptor>();
+  return None;
 }
 
 // Look for a DWARF64-formatted contribution to the string offsets table
@@ -668,18 +774,17 @@ StrOffsetsContributionDescriptor::validateContributionSize(
 static Optional<StrOffsetsContributionDescriptor>
 parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   if (!DA.isValidOffsetForDataOfSize(Offset, 16))
-    return Optional<StrOffsetsContributionDescriptor>();
+    return None;
 
   if (DA.getU32(&Offset) != 0xffffffff)
-    return Optional<StrOffsetsContributionDescriptor>();
+    return None;
 
   uint64_t Size = DA.getU64(&Offset);
   uint8_t Version = DA.getU16(&Offset);
   (void)DA.getU16(&Offset); // padding
   // The encoded length includes the 2-byte version field and the 2-byte
   // padding, so we need to subtract them out when we populate the descriptor.
-  return StrOffsetsContributionDescriptor(Offset, Size - 4, Version, DWARF64);
-  //return Optional<StrOffsetsContributionDescriptor>(Descriptor);
+  return {{Offset, Size - 4, Version, DWARF64}};
 }
 
 // Look for a DWARF32-formatted contribution to the string offsets table
@@ -687,22 +792,20 @@ parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
 static Optional<StrOffsetsContributionDescriptor>
 parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   if (!DA.isValidOffsetForDataOfSize(Offset, 8))
-    return Optional<StrOffsetsContributionDescriptor>();
+    return None;
   uint32_t ContributionSize = DA.getU32(&Offset);
   if (ContributionSize >= 0xfffffff0)
-    return Optional<StrOffsetsContributionDescriptor>();
+    return None;
   uint8_t Version = DA.getU16(&Offset);
   (void)DA.getU16(&Offset); // padding
   // The encoded length includes the 2-byte version field and the 2-byte
   // padding, so we need to subtract them out when we populate the descriptor.
-  return StrOffsetsContributionDescriptor(Offset, ContributionSize - 4, Version,
-                                          DWARF32);
-  //return Optional<StrOffsetsContributionDescriptor>(Descriptor);
+  return {{Offset, ContributionSize - 4, Version, DWARF32}};
 }
 
 Optional<StrOffsetsContributionDescriptor>
-DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
-                                                   uint64_t Offset) {
+DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
+  auto Offset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base), 0);
   Optional<StrOffsetsContributionDescriptor> Descriptor;
   // Attempt to find a DWARF64 contribution 16 bytes before the base.
   if (Offset >= 16)
@@ -715,8 +818,13 @@ DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA,
 }
 
 Optional<StrOffsetsContributionDescriptor>
-DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
-                                                      uint64_t Offset) {
+DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
+  uint64_t Offset = 0;
+  auto IndexEntry = Header.getIndexEntry();
+  const auto *C =
+      IndexEntry ? IndexEntry->getOffset(DW_SECT_STR_OFFSETS) : nullptr;
+  if (C)
+    Offset = C->Offset;
   if (getVersion() >= 5) {
     // Look for a valid contribution at the given offset.
     auto Descriptor =
@@ -728,15 +836,9 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA,
   // Prior to DWARF v5, we derive the contribution size from the
   // index table (in a package file). In a .dwo file it is simply
   // the length of the string offsets section.
-  uint64_t Size = 0;
-  auto IndexEntry = Header.getIndexEntry();
   if (!IndexEntry)
-    Size = StringOffsetSection.Data.size();
-  else if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
-    Size = C->Length;
-  // Return a descriptor with the given offset as base, version 4 and
-  // DWARF32 format.
-  //return Optional<StrOffsetsContributionDescriptor>(
-      //StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32));
-  return StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32);
+    return {{0, StringOffsetSection.Data.size(), 4, DWARF32}};
+  if (C)
+    return {{C->Offset, C->Length, 4, DWARF32}};
+  return None;
 }
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 17f17572a309..84b6c4b81817 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -164,14 +164,27 @@ DWARFUnitIndex::Entry::getOffset() const {
 
 const DWARFUnitIndex::Entry *
 DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
-  for (uint32_t i = 0; i != Header.NumBuckets; ++i)
-    if (const auto &Contribs = Rows[i].Contributions) {
-      const auto &InfoContrib = Contribs[InfoColumn];
-      if (InfoContrib.Offset <= Offset &&
-          Offset < (InfoContrib.Offset + InfoContrib.Length))
-        return &Rows[i];
-    }
-  return nullptr;
+  if (OffsetLookup.empty()) {
+    for (uint32_t i = 0; i != Header.NumBuckets; ++i)
+      if (Rows[i].Contributions)
+        OffsetLookup.push_back(&Rows[i]);
+    llvm::sort(OffsetLookup, [&](Entry *E1, Entry *E2) {
+      return E1->Contributions[InfoColumn].Offset <
+             E2->Contributions[InfoColumn].Offset;
+    });
+  }
+  auto I =
+      llvm::upper_bound(OffsetLookup, Offset, [&](uint32_t Offset, Entry *E2) {
+        return Offset < E2->Contributions[InfoColumn].Offset;
+      });
+  if (I == OffsetLookup.begin())
+    return nullptr;
+  --I;
+  const auto *E = *I;
+  const auto &InfoContrib = E->Contributions[InfoColumn];
+  if ((InfoContrib.Offset + InfoContrib.Length) <= Offset)
+    return nullptr;
+  return E;
 }
 
 const DWARFUnitIndex::Entry *DWARFUnitIndex::getFromHash(uint64_t S) const {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 82d52c467bc0..f8370178b627 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -6,7 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-
 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
@@ -93,7 +92,7 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
   auto End = Ranges.end();
   auto Iter = findRange(RHS.Ranges.front());
   for (const auto &R : RHS.Ranges) {
-    if(Iter == End)
+    if (Iter == End)
       return false;
     if (R.HighPC <= Iter->LowPC)
       continue;
@@ -156,14 +155,14 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
                       OffsetStart);
     if (!ValidLength)
       note() << "The length for this unit is too "
-            "large for the .debug_info provided.\n";
+                "large for the .debug_info provided.\n";
     if (!ValidVersion)
       note() << "The 16 bit unit header version is not valid.\n";
     if (!ValidType)
       note() << "The unit type encoding is not valid.\n";
     if (!ValidAbbrevOffset)
       note() << "The offset into the .debug_abbrev section is "
-            "not valid.\n";
+                "not valid.\n";
     if (!ValidAddrSize)
       note() << "The address size is unsupported.\n";
   }
@@ -171,24 +170,38 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
   return Success;
 }
 
-bool DWARFVerifier::verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType) {
-  uint32_t NumUnitErrors = 0;
+unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
+  unsigned NumUnitErrors = 0;
   unsigned NumDies = Unit.getNumDIEs();
   for (unsigned I = 0; I < NumDies; ++I) {
     auto Die = Unit.getDIEAtIndex(I);
+
     if (Die.getTag() == DW_TAG_null)
       continue;
+
+    bool HasTypeAttr = false;
     for (auto AttrValue : Die.attributes()) {
       NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
       NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
+      HasTypeAttr |= (AttrValue.Attr == DW_AT_type);
+    }
+
+    if (!HasTypeAttr && (Die.getTag() == DW_TAG_formal_parameter ||
+                         Die.getTag() == DW_TAG_variable ||
+                         Die.getTag() == DW_TAG_array_type)) {
+      error() << "DIE with tag " << TagString(Die.getTag())
+              << " is missing type attribute:\n";
+      dump(Die) << '\n';
+      NumUnitErrors++;
     }
+    NumUnitErrors += verifyDebugInfoCallSite(Die);
   }
 
   DWARFDie Die = Unit.getUnitDIE(/* ExtractUnitDIEOnly = */ false);
   if (!Die) {
     error() << "Compilation unit without DIE.\n";
     NumUnitErrors++;
-    return NumUnitErrors == 0;
+    return NumUnitErrors;
   }
 
   if (!dwarf::isUnitType(Die.getTag())) {
@@ -197,8 +210,8 @@ bool DWARFVerifier::verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType) {
     NumUnitErrors++;
   }
 
-  if (UnitType != 0 &&
-      !DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
+  uint8_t UnitType = Unit.getUnitType();
+  if (!DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) {
     error() << "Compilation unit type (" << dwarf::UnitTypeString(UnitType)
             << ") and root DIE (" << dwarf::TagString(Die.getTag())
             << ") do not match.\n";
@@ -208,7 +221,39 @@ bool DWARFVerifier::verifyUnitContents(DWARFUnit &Unit, uint8_t UnitType) {
   DieRangeInfo RI;
   NumUnitErrors += verifyDieRanges(Die, RI);
 
-  return NumUnitErrors == 0;
+  return NumUnitErrors;
+}
+
+unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
+  if (Die.getTag() != DW_TAG_call_site)
+    return 0;
+
+  DWARFDie Curr = Die.getParent();
+  for (; Curr.isValid() && !Curr.isSubprogramDIE(); Curr = Die.getParent()) {
+    if (Curr.getTag() == DW_TAG_inlined_subroutine) {
+      error() << "Call site entry nested within inlined subroutine:";
+      Curr.dump(OS);
+      return 1;
+    }
+  }
+
+  if (!Curr.isValid()) {
+    error() << "Call site entry not nested within a valid subprogram:";
+    Die.dump(OS);
+    return 1;
+  }
+
+  Optional<DWARFFormValue> CallAttr =
+      Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls,
+                 DW_AT_call_all_tail_calls});
+  if (!CallAttr) {
+    error() << "Subprogram with call site entry has no DW_AT_call attribute:";
+    Curr.dump(OS);
+    Die.dump(OS, /*indent*/ 1);
+    return 1;
+  }
+
+  return 0;
 }
 
 unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) {
@@ -252,20 +297,18 @@ bool DWARFVerifier::handleDebugAbbrev() {
   return NumErrors == 0;
 }
 
-bool DWARFVerifier::handleDebugInfo() {
-  OS << "Verifying .debug_info Unit Header Chain...\n";
-
+unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
+                                          DWARFSectionKind SectionKind) {
   const DWARFObject &DObj = DCtx.getDWARFObj();
-  DWARFDataExtractor DebugInfoData(DObj, DObj.getInfoSection(),
-                                   DCtx.isLittleEndian(), 0);
-  uint32_t NumDebugInfoErrors = 0;
+  DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
+  unsigned NumDebugInfoErrors = 0;
   uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
   uint8_t UnitType = 0;
   bool isUnitDWARF64 = false;
   bool isHeaderChainValid = true;
   bool hasDIE = DebugInfoData.isValidOffset(Offset);
-  DWARFUnitSection<DWARFTypeUnit> TUSection{};
-  DWARFUnitSection<DWARFCompileUnit> CUSection{};
+  DWARFUnitVector TypeUnitVector;
+  DWARFUnitVector CompileUnitVector;
   while (hasDIE) {
     OffsetStart = Offset;
     if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
@@ -275,46 +318,64 @@ bool DWARFVerifier::handleDebugInfo() {
         break;
     } else {
       DWARFUnitHeader Header;
-      Header.extract(DCtx, DebugInfoData, &OffsetStart);
-      std::unique_ptr<DWARFUnit> Unit;
+      Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
+      DWARFUnit *Unit;
       switch (UnitType) {
       case dwarf::DW_UT_type:
       case dwarf::DW_UT_split_type: {
-        Unit.reset(new DWARFTypeUnit(
-            DCtx, DObj.getInfoSection(), Header, DCtx.getDebugAbbrev(),
-            &DObj.getRangeSection(), DObj.getStringSection(),
+        Unit = TypeUnitVector.addUnit(llvm::make_unique<DWARFTypeUnit>(
+            DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
+            &DObj.getLocSection(), DObj.getStringSection(),
             DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
-            DObj.getLineSection(), DCtx.isLittleEndian(), false, TUSection));
+            DObj.getLineSection(), DCtx.isLittleEndian(), false,
+            TypeUnitVector));
         break;
       }
       case dwarf::DW_UT_skeleton:
       case dwarf::DW_UT_split_compile:
       case dwarf::DW_UT_compile:
       case dwarf::DW_UT_partial:
-      // UnitType = 0 means that we are
-      // verifying a compile unit in DWARF v4.
+      // UnitType = 0 means that we are verifying a compile unit in DWARF v4.
       case 0: {
-        Unit.reset(new DWARFCompileUnit(
-            DCtx, DObj.getInfoSection(), Header, DCtx.getDebugAbbrev(),
-            &DObj.getRangeSection(), DObj.getStringSection(),
+        Unit = CompileUnitVector.addUnit(llvm::make_unique<DWARFCompileUnit>(
+            DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(),
+            &DObj.getLocSection(), DObj.getStringSection(),
             DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(),
-            DObj.getLineSection(), DCtx.isLittleEndian(), false, CUSection));
+            DObj.getLineSection(), DCtx.isLittleEndian(), false,
+            CompileUnitVector));
         break;
       }
       default: { llvm_unreachable("Invalid UnitType."); }
       }
-      if (!verifyUnitContents(*Unit, UnitType))
-        ++NumDebugInfoErrors;
+      NumDebugInfoErrors += verifyUnitContents(*Unit);
     }
     hasDIE = DebugInfoData.isValidOffset(Offset);
     ++UnitIdx;
   }
   if (UnitIdx == 0 && !hasDIE) {
-    warn() << ".debug_info is empty.\n";
+    warn() << "Section is empty.\n";
     isHeaderChainValid = true;
   }
+  if (!isHeaderChainValid)
+    ++NumDebugInfoErrors;
   NumDebugInfoErrors += verifyDebugInfoReferences();
-  return (isHeaderChainValid && NumDebugInfoErrors == 0);
+  return NumDebugInfoErrors;
+}
+
+bool DWARFVerifier::handleDebugInfo() {
+  const DWARFObject &DObj = DCtx.getDWARFObj();
+  unsigned NumErrors = 0;
+
+  OS << "Verifying .debug_info Unit Header Chain...\n";
+  DObj.forEachInfoSections([&](const DWARFSection &S) {
+    NumErrors += verifyUnitSection(S, DW_SECT_INFO);
+  });
+
+  OS << "Verifying .debug_types Unit Header Chain...\n";
+  DObj.forEachTypesSections([&](const DWARFSection &S) {
+    NumErrors += verifyUnitSection(S, DW_SECT_TYPES);
+  });
+  return NumErrors == 0;
 }
 
 unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
@@ -336,20 +397,42 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
   // Build RI for this DIE and check that ranges within this DIE do not
   // overlap.
   DieRangeInfo RI(Die);
-  for (auto Range : Ranges) {
-    if (!Range.valid()) {
-      ++NumErrors;
-      error() << "Invalid address range " << Range << "\n";
-      continue;
-    }
 
-    // Verify that ranges don't intersect.
-    const auto IntersectingRange = RI.insert(Range);
-    if (IntersectingRange != RI.Ranges.end()) {
-      ++NumErrors;
-      error() << "DIE has overlapping address ranges: " << Range << " and "
-              << *IntersectingRange << "\n";
-      break;
+  // TODO support object files better
+  //
+  // Some object file formats (i.e. non-MachO) support COMDAT.  ELF in
+  // particular does so by placing each function into a section.  The DWARF data
+  // for the function at that point uses a section relative DW_FORM_addrp for
+  // the DW_AT_low_pc and a DW_FORM_data4 for the offset as the DW_AT_high_pc.
+  // In such a case, when the Die is the CU, the ranges will overlap, and we
+  // will flag valid conflicting ranges as invalid.
+  //
+  // For such targets, we should read the ranges from the CU and partition them
+  // by the section id.  The ranges within a particular section should be
+  // disjoint, although the ranges across sections may overlap.  We would map
+  // the child die to the entity that it references and the section with which
+  // it is associated.  The child would then be checked against the range
+  // information for the associated section.
+  //
+  // For now, simply elide the range verification for the CU DIEs if we are
+  // processing an object file.
+
+  if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
+    for (auto Range : Ranges) {
+      if (!Range.valid()) {
+        ++NumErrors;
+        error() << "Invalid address range " << Range << "\n";
+        continue;
+      }
+
+      // Verify that ranges don't intersect.
+      const auto IntersectingRange = RI.insert(Range);
+      if (IntersectingRange != RI.Ranges.end()) {
+        ++NumErrors;
+        error() << "DIE has overlapping address ranges: " << Range << " and "
+                << *IntersectingRange << "\n";
+        break;
+      }
     }
   }
 
@@ -358,9 +441,8 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
   if (IntersectingChild != ParentRI.Children.end()) {
     ++NumErrors;
     error() << "DIEs have overlapping address ranges:";
-    Die.dump(OS, 0);
-    IntersectingChild->Die.dump(OS, 0);
-    OS << "\n";
+    dump(Die);
+    dump(IntersectingChild->Die) << '\n';
   }
 
   // Verify that ranges are contained within their parent.
@@ -370,9 +452,8 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
   if (ShouldBeContained && !ParentRI.contains(RI)) {
     ++NumErrors;
     error() << "DIE address ranges are not contained in its parent's ranges:";
-    ParentRI.Die.dump(OS, 0);
-    Die.dump(OS, 2);
-    OS << "\n";
+    dump(ParentRI.Die);
+    dump(Die, 2) << '\n';
   }
 
   // Recursively check children.
@@ -388,8 +469,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
   auto ReportError = [&](const Twine &TitleMsg) {
     ++NumErrors;
     error() << TitleMsg << '\n';
-    Die.dump(OS, 0, DumpOpts);
-    OS << "\n";
+    dump(Die) << '\n';
   };
 
   const DWARFObject &DObj = DCtx.getDWARFObj();
@@ -438,7 +518,33 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
     }
     break;
   }
-
+  case DW_AT_specification:
+  case DW_AT_abstract_origin: {
+    if (auto ReferencedDie = Die.getAttributeValueAsReferencedDie(Attr)) {
+      auto DieTag = Die.getTag();
+      auto RefTag = ReferencedDie.getTag();
+      if (DieTag == RefTag)
+        break;
+      if (DieTag == DW_TAG_inlined_subroutine && RefTag == DW_TAG_subprogram)
+        break;
+      if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member)
+        break;
+      ReportError("DIE with tag " + TagString(DieTag) + " has " +
+                  AttributeString(Attr) +
+                  " that points to DIE with "
+                  "incompatible tag " +
+                  TagString(RefTag));
+    }
+    break;
+  }
+  case DW_AT_type: {
+    DWARFDie TypeDie = Die.getAttributeValueAsReferencedDie(DW_AT_type);
+    if (TypeDie && !isType(TypeDie.getTag())) {
+      ReportError("DIE has " + AttributeString(Attr) +
+                  " with incompatible tag " + TagString(TypeDie.getTag()));
+    }
+    break;
+  }
   default:
     break;
   }
@@ -448,6 +554,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
 unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
                                             DWARFAttribute &AttrValue) {
   const DWARFObject &DObj = DCtx.getDWARFObj();
+  auto DieCU = Die.getDwarfUnit();
   unsigned NumErrors = 0;
   const auto Form = AttrValue.Value.getForm();
   switch (Form) {
@@ -460,7 +567,6 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
     Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
     assert(RefVal);
     if (RefVal) {
-      auto DieCU = Die.getDwarfUnit();
       auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
       auto CUOffset = AttrValue.Value.getRawUValue();
       if (CUOffset >= CUSize) {
@@ -470,7 +576,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
                 << " is invalid (must be less than CU size of "
                 << format("0x%08" PRIx32, CUSize) << "):\n";
         Die.dump(OS, 0, DumpOpts);
-        OS << "\n";
+        dump(Die) << '\n';
       } else {
         // Valid reference, but we will verify it points to an actual
         // DIE later.
@@ -485,12 +591,11 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
     Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
     assert(RefVal);
     if (RefVal) {
-      if (*RefVal >= DObj.getInfoSection().Data.size()) {
+      if (*RefVal >= DieCU->getInfoSection().Data.size()) {
         ++NumErrors;
         error() << "DW_FORM_ref_addr offset beyond .debug_info "
                    "bounds:\n";
-        Die.dump(OS, 0, DumpOpts);
-        OS << "\n";
+        dump(Die) << '\n';
       } else {
         // Valid reference, but we will verify it points to an actual
         // DIE later.
@@ -505,8 +610,46 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
     if (SecOffset && *SecOffset >= DObj.getStringSection().size()) {
       ++NumErrors;
       error() << "DW_FORM_strp offset beyond .debug_str bounds:\n";
-      Die.dump(OS, 0, DumpOpts);
-      OS << "\n";
+      dump(Die) << '\n';
+    }
+    break;
+  }
+  case DW_FORM_strx:
+  case DW_FORM_strx1:
+  case DW_FORM_strx2:
+  case DW_FORM_strx3:
+  case DW_FORM_strx4: {
+    auto Index = AttrValue.Value.getRawUValue();
+    auto DieCU = Die.getDwarfUnit();
+    // Check that we have a valid DWARF v5 string offsets table.
+    if (!DieCU->getStringOffsetsTableContribution()) {
+      ++NumErrors;
+      error() << FormEncodingString(Form)
+              << " used without a valid string offsets table:\n";
+      dump(Die) << '\n';
+      break;
+    }
+    // Check that the index is within the bounds of the section. 
+    unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
+    // Use a 64-bit type to calculate the offset to guard against overflow.
+    uint64_t Offset =
+        (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize;
+    if (DObj.getStringOffsetSection().Data.size() < Offset + ItemSize) {
+      ++NumErrors;
+      error() << FormEncodingString(Form) << " uses index "
+              << format("%" PRIu64, Index) << ", which is too large:\n";
+      dump(Die) << '\n';
+      break;
+    }
+    // Check that the string offset is valid.
+    uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index);
+    if (StringOffset >= DObj.getStringSection().size()) {
+      ++NumErrors;
+      error() << FormEncodingString(Form) << " uses index "
+              << format("%" PRIu64, Index)
+              << ", but the referenced string"
+                 " offset is beyond .debug_str bounds:\n";
+      dump(Die) << '\n';
     }
     break;
   }
@@ -528,11 +671,8 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() {
     ++NumErrors;
     error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
             << ". Offset is in between DIEs:\n";
-    for (auto Offset : Pair.second) {
-      auto ReferencingDie = DCtx.getDIEForOffset(Offset);
-      ReferencingDie.dump(OS, 0, DumpOpts);
-      OS << "\n";
-    }
+    for (auto Offset : Pair.second)
+      dump(DCtx.getDIEForOffset(Offset)) << '\n';
     OS << "\n";
   }
   return NumErrors;
@@ -555,8 +695,7 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() {
         ++NumDebugLineErrors;
         error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset)
                 << "] was not able to be parsed for CU:\n";
-        Die.dump(OS, 0, DumpOpts);
-        OS << '\n';
+        dump(Die) << '\n';
         continue;
       }
     } else {
@@ -573,9 +712,8 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() {
               << format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
               << format("0x%08" PRIx32, Die.getOffset())
               << ", have the same DW_AT_stmt_list section offset:\n";
-      Iter->second.dump(OS, 0, DumpOpts);
-      Die.dump(OS, 0, DumpOpts);
-      OS << '\n';
+      dump(Iter->second);
+      dump(Die) << '\n';
       // Already verified this line table before, no need to do it again.
       continue;
     }
@@ -671,6 +809,16 @@ void DWARFVerifier::verifyDebugLineRows() {
   }
 }
 
+DWARFVerifier::DWARFVerifier(raw_ostream &S, DWARFContext &D,
+                             DIDumpOptions DumpOpts)
+    : OS(S), DCtx(D), DumpOpts(std::move(DumpOpts)), IsObjectFile(false),
+      IsMachOObject(false) {
+  if (const auto *F = DCtx.getDWARFObj().getFile()) {
+    IsObjectFile = F->isRelocatableObject();
+    IsMachOObject = F->isMachO();
+  }
+}
+
 bool DWARFVerifier::handleDebugLine() {
   NumDebugLineErrors = 0;
   OS << "Verifying .debug_line...\n";
@@ -816,8 +964,8 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) {
 
       if (Iter->second != NotIndexed) {
         error() << formatv("Name Index @ {0:x} references a CU @ {1:x}, but "
-                          "this CU is already indexed by Name Index @ {2:x}\n",
-                          NI.getUnitOffset(), Offset, Iter->second);
+                           "this CU is already indexed by Name Index @ {2:x}\n",
+                           NI.getUnitOffset(), Offset, Iter->second);
         continue;
       }
       Iter->second = NI.getUnitOffset();
@@ -1048,16 +1196,19 @@ DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
   return NumErrors;
 }
 
-static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE) {
+static SmallVector<StringRef, 2> getNames(const DWARFDie &DIE,
+                                          bool IncludeLinkageName = true) {
   SmallVector<StringRef, 2> Result;
   if (const char *Str = DIE.getName(DINameKind::ShortName))
     Result.emplace_back(Str);
   else if (DIE.getTag() == dwarf::DW_TAG_namespace)
     Result.emplace_back("(anonymous namespace)");
 
-  if (const char *Str = DIE.getName(DINameKind::LinkageName)) {
-    if (Result.empty() || Result[0] != Str)
-      Result.emplace_back(Str);
+  if (IncludeLinkageName) {
+    if (const char *Str = DIE.getName(DINameKind::LinkageName)) {
+      if (Result.empty() || Result[0] != Str)
+        Result.emplace_back(Str);
+    }
   }
 
   return Result;
@@ -1200,7 +1351,9 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness(
   // "If a subprogram or inlined subroutine is included, and has a
   // DW_AT_linkage_name attribute, there will be an additional index entry for
   // the linkage name."
-  auto EntryNames = getNames(Die);
+  auto IncludeLinkageName = Die.getTag() == DW_TAG_subprogram ||
+                            Die.getTag() == DW_TAG_inlined_subroutine;
+  auto EntryNames = getNames(Die, IncludeLinkageName);
   if (EntryNames.empty())
     return 0;
 
@@ -1212,8 +1365,9 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness(
   // make sure we catch any missing items, we instead blacklist all TAGs that we
   // know shouldn't be indexed.
   switch (Die.getTag()) {
-  // Compile unit has a name but it shouldn't be indexed.
+  // Compile units and modules have names but shouldn't be indexed.
   case DW_TAG_compile_unit:
+  case DW_TAG_module:
     return 0;
 
   // Function and template parameters are not globally visible, so we shouldn't
@@ -1315,11 +1469,12 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
   if (NumErrors > 0)
     return NumErrors;
 
-  for (const std::unique_ptr<DWARFCompileUnit> &CU : DCtx.compile_units()) {
+  for (const std::unique_ptr<DWARFUnit> &U : DCtx.compile_units()) {
     if (const DWARFDebugNames::NameIndex *NI =
-            AccelTable.getCUNameIndex(CU->getOffset())) {
+            AccelTable.getCUNameIndex(U->getOffset())) {
+      auto *CU = cast<DWARFCompileUnit>(U.get());
       for (const DWARFDebugInfoEntry &Die : CU->dies())
-        NumErrors += verifyNameIndexCompleteness(DWARFDie(CU.get(), &Die), *NI);
+        NumErrors += verifyNameIndexCompleteness(DWARFDie(CU, &Die), *NI);
     }
   }
   return NumErrors;
@@ -1330,17 +1485,17 @@ bool DWARFVerifier::handleAccelTables() {
   DataExtractor StrData(D.getStringSection(), DCtx.isLittleEndian(), 0);
   unsigned NumErrors = 0;
   if (!D.getAppleNamesSection().Data.empty())
-    NumErrors +=
-        verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData, ".apple_names");
+    NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData,
+                                       ".apple_names");
   if (!D.getAppleTypesSection().Data.empty())
-    NumErrors +=
-        verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData, ".apple_types");
+    NumErrors += verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData,
+                                       ".apple_types");
   if (!D.getAppleNamespacesSection().Data.empty())
     NumErrors += verifyAppleAccelTable(&D.getAppleNamespacesSection(), &StrData,
-                                  ".apple_namespaces");
+                                       ".apple_namespaces");
   if (!D.getAppleObjCSection().Data.empty())
-    NumErrors +=
-        verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc");
+    NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData,
+                                       ".apple_objc");
 
   if (!D.getDebugNamesSection().Data.empty())
     NumErrors += verifyDebugNames(D.getDebugNamesSection(), StrData);
@@ -1352,3 +1507,8 @@ raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); }
 raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); }
 
 raw_ostream &DWARFVerifier::note() const { return WithColor::note(OS); }
+
+raw_ostream &DWARFVerifier::dump(const DWARFDie &Die, unsigned indent) const {
+  Die.dump(OS, indent, DumpOpts);
+  return OS;
+}
diff --git a/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp b/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
index 1b8294e47e75..bfac6bebba3f 100644
--- a/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
+++ b/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
@@ -14,14 +14,12 @@
 using namespace llvm;
 using namespace llvm::msf;
 
-namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class MSFErrorCategory : public std::error_category {
 public:
   const char *name() const noexcept override { return "llvm.msf"; }
-
   std::string message(int Condition) const override {
     switch (static_cast<msf_error_code>(Condition)) {
     case msf_error_code::unspecified:
@@ -41,30 +39,8 @@ public:
     llvm_unreachable("Unrecognized msf_error_code");
   }
 };
-} // end anonymous namespace
-
-static ManagedStatic<MSFErrorCategory> Category;
-
-char MSFError::ID = 0;
-
-MSFError::MSFError(msf_error_code C) : MSFError(C, "") {}
-
-MSFError::MSFError(const std::string &Context)
-    : MSFError(msf_error_code::unspecified, Context) {}
-
-MSFError::MSFError(msf_error_code C, const std::string &Context) : Code(C) {
-  ErrMsg = "MSF Error: ";
-  std::error_code EC = convertToErrorCode();
-  if (Code != msf_error_code::unspecified)
-    ErrMsg += EC.message() + "  ";
-  if (!Context.empty())
-    ErrMsg += Context;
-}
-
-void MSFError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
 
-const std::string &MSFError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<MSFErrorCategory> MSFCategory;
+const std::error_category &llvm::msf::MSFErrCategory() { return *MSFCategory; }
 
-std::error_code MSFError::convertToErrorCode() const {
-  return std::error_code(static_cast<int>(Code), *Category);
-}
+char MSFError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
index 7eabed8cad48..6a10513fad97 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
@@ -8,8 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/ConvertUTF.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
@@ -23,16 +22,7 @@ uint32_t DIADataStream::getRecordCount() const {
 }
 
 std::string DIADataStream::getName() const {
-  CComBSTR Name16;
-  if (S_OK != StreamData->get_name(&Name16))
-    return std::string();
-
-  std::string Name8;
-  llvm::ArrayRef<char> Name16Bytes(reinterpret_cast<char *>(Name16.m_str),
-                                   Name16.ByteLength());
-  if (!llvm::convertUTF16ToUTF8String(Name16Bytes, Name8))
-    return std::string();
-  return Name8;
+  return invokeBstrMethod(*StreamData, &IDiaEnumDebugStreamData::get_name);
 }
 
 llvm::Optional<DIADataStream::RecordType>
@@ -65,11 +55,3 @@ bool DIADataStream::getNext(RecordType &Record) {
 }
 
 void DIADataStream::reset() { StreamData->Reset(); }
-
-DIADataStream *DIADataStream::clone() const {
-  CComPtr<IDiaEnumDebugStreamData> EnumeratorClone;
-  if (S_OK != StreamData->Clone(&EnumeratorClone))
-    return nullptr;
-
-  return new DIADataStream(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
index f62c4991fe33..d2451f13e6cb 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
@@ -45,10 +45,3 @@ std::unique_ptr<IPDBDataStream> DIAEnumDebugStreams::getNext() {
 }
 
 void DIAEnumDebugStreams::reset() { Enumerator->Reset(); }
-
-DIAEnumDebugStreams *DIAEnumDebugStreams::clone() const {
-  CComPtr<IDiaEnumDebugStreams> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumDebugStreams(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
new file mode 100644
index 000000000000..f873f3525df5
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
@@ -0,0 +1,42 @@
+//==- DIAEnumFrameData.cpp ---------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAFrameData.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+
+using namespace llvm::pdb;
+
+DIAEnumFrameData::DIAEnumFrameData(CComPtr<IDiaEnumFrameData> DiaEnumerator)
+    : Enumerator(DiaEnumerator) {}
+
+uint32_t DIAEnumFrameData::getChildCount() const {
+  LONG Count = 0;
+  return (S_OK == Enumerator->get_Count(&Count)) ? Count : 0;
+}
+
+std::unique_ptr<IPDBFrameData>
+DIAEnumFrameData::getChildAtIndex(uint32_t Index) const {
+  CComPtr<IDiaFrameData> Item;
+  if (S_OK != Enumerator->Item(Index, &Item))
+    return nullptr;
+
+  return std::unique_ptr<IPDBFrameData>(new DIAFrameData(Item));
+}
+
+std::unique_ptr<IPDBFrameData> DIAEnumFrameData::getNext() {
+  CComPtr<IDiaFrameData> Item;
+  ULONG NumFetched = 0;
+  if (S_OK != Enumerator->Next(1, &Item, &NumFetched))
+    return nullptr;
+
+  return std::unique_ptr<IPDBFrameData>(new DIAFrameData(Item));
+}
+
+void DIAEnumFrameData::reset() { Enumerator->Reset(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
index d7c908e04593..6c361b81e33d 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
@@ -15,9 +15,8 @@ using namespace llvm;
 using namespace llvm::pdb;
 
 DIAEnumInjectedSources::DIAEnumInjectedSources(
-    const DIASession &PDBSession,
     CComPtr<IDiaEnumInjectedSources> DiaEnumerator)
-    : Session(PDBSession), Enumerator(DiaEnumerator) {}
+    : Enumerator(DiaEnumerator) {}
 
 uint32_t DIAEnumInjectedSources::getChildCount() const {
   LONG Count = 0;
@@ -43,10 +42,3 @@ std::unique_ptr<IPDBInjectedSource> DIAEnumInjectedSources::getNext() {
 }
 
 void DIAEnumInjectedSources::reset() { Enumerator->Reset(); }
-
-DIAEnumInjectedSources *DIAEnumInjectedSources::clone() const {
-  CComPtr<IDiaEnumInjectedSources> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumInjectedSources(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
index 796ce214b383..0820d9dc7c9f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
@@ -42,10 +42,3 @@ std::unique_ptr<IPDBLineNumber> DIAEnumLineNumbers::getNext() {
 }
 
 void DIAEnumLineNumbers::reset() { Enumerator->Reset(); }
-
-DIAEnumLineNumbers *DIAEnumLineNumbers::clone() const {
-  CComPtr<IDiaEnumLineNumbers> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumLineNumbers(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
index 1f405f049198..90c857aa5713 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
@@ -45,10 +45,3 @@ std::unique_ptr<IPDBSectionContrib> DIAEnumSectionContribs::getNext() {
 }
 
 void DIAEnumSectionContribs::reset() { Enumerator->Reset(); }
-
-DIAEnumSectionContribs *DIAEnumSectionContribs::clone() const {
-  CComPtr<IDiaEnumSectionContribs> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumSectionContribs(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
index b9311d060128..06595e7ec1c8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
@@ -42,10 +42,3 @@ std::unique_ptr<IPDBSourceFile> DIAEnumSourceFiles::getNext() {
 }
 
 void DIAEnumSourceFiles::reset() { Enumerator->Reset(); }
-
-DIAEnumSourceFiles *DIAEnumSourceFiles::clone() const {
-  CComPtr<IDiaEnumSourceFiles> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumSourceFiles(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
index 266638530c2f..48bc32767e6c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
@@ -46,10 +46,3 @@ std::unique_ptr<PDBSymbol> DIAEnumSymbols::getNext() {
 }
 
 void DIAEnumSymbols::reset() { Enumerator->Reset(); }
-
-DIAEnumSymbols *DIAEnumSymbols::clone() const {
-  CComPtr<IDiaEnumSymbols> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumSymbols(Session, EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
index 511b55585ebd..6fa096156d48 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
@@ -13,9 +13,8 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-DIAEnumTables::DIAEnumTables(
-  CComPtr<IDiaEnumTables> DiaEnumerator)
-  : Enumerator(DiaEnumerator) {}
+DIAEnumTables::DIAEnumTables(CComPtr<IDiaEnumTables> DiaEnumerator)
+    : Enumerator(DiaEnumerator) {}
 
 uint32_t DIAEnumTables::getChildCount() const {
   LONG Count = 0;
@@ -44,10 +43,3 @@ std::unique_ptr<IPDBTable> DIAEnumTables::getNext() {
 }
 
 void DIAEnumTables::reset() { Enumerator->Reset(); }
-
-DIAEnumTables *DIAEnumTables::clone() const {
-  CComPtr<IDiaEnumTables> EnumeratorClone;
-  if (S_OK != Enumerator->Clone(&EnumeratorClone))
-    return nullptr;
-  return new DIAEnumTables(EnumeratorClone);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
index 0da877b0fbad..819651f77787 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
@@ -11,14 +11,13 @@ using namespace llvm::pdb;
 class DIAErrorCategory : public std::error_category {
 public:
   const char *name() const noexcept override { return "llvm.pdb.dia"; }
-
   std::string message(int Condition) const override {
     switch (static_cast<dia_error_code>(Condition)) {
     case dia_error_code::could_not_create_impl:
-      return "Failed to connect to DIA at runtime.  Verify that Visual Studio "
+      return "Failed to connect to DIA at runtime. Verify that Visual Studio "
              "is properly installed, or that msdiaXX.dll is in your PATH.";
     case dia_error_code::invalid_file_format:
-      return "Unable to load PDB.  The file has an unrecognized format.";
+      return "Unable to load PDB. The file has an unrecognized format.";
     case dia_error_code::invalid_parameter:
       return "The parameter is incorrect.";
     case dia_error_code::already_loaded:
@@ -32,27 +31,7 @@ public:
   }
 };
 
-static ManagedStatic<DIAErrorCategory> Category;
-
-char DIAError::ID = 0;
-
-DIAError::DIAError(dia_error_code C) : DIAError(C, "") {}
-
-DIAError::DIAError(StringRef Context)
-    : DIAError(dia_error_code::unspecified, Context) {}
-
-DIAError::DIAError(dia_error_code C, StringRef Context) : Code(C) {
-  ErrMsg = "DIA Error: ";
-  std::error_code EC = convertToErrorCode();
-  ErrMsg += EC.message() + "  ";
-  if (!Context.empty())
-    ErrMsg += Context;
-}
-
-void DIAError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
-
-StringRef DIAError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<DIAErrorCategory> DIACategory;
+const std::error_category &llvm::pdb::DIAErrCategory() { return *DIACategory; }
 
-std::error_code DIAError::convertToErrorCode() const {
-  return std::error_code(static_cast<int>(Code), *Category);
-}
+char DIAError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
new file mode 100644
index 000000000000..533cce7923c0
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
@@ -0,0 +1,53 @@
+//===- DIAFrameData.cpp - DIA impl. of IPDBFrameData -------------- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/DIA/DIAFrameData.h"
+#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
+
+using namespace llvm::pdb;
+
+DIAFrameData::DIAFrameData(CComPtr<IDiaFrameData> DiaFrameData)
+    : FrameData(DiaFrameData) {}
+
+template <typename ArgType>
+ArgType
+PrivateGetDIAValue(IDiaFrameData *FrameData,
+                   HRESULT (__stdcall IDiaFrameData::*Method)(ArgType *)) {
+  ArgType Value;
+  if (S_OK == (FrameData->*Method)(&Value))
+    return static_cast<ArgType>(Value);
+
+  return ArgType();
+}
+
+uint32_t DIAFrameData::getAddressOffset() const {
+  return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_addressOffset);
+}
+
+uint32_t DIAFrameData::getAddressSection() const {
+  return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_addressSection);
+}
+
+uint32_t DIAFrameData::getLengthBlock() const {
+  return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_lengthBlock);
+}
+
+std::string DIAFrameData::getProgram() const {
+  return invokeBstrMethod(*FrameData, &IDiaFrameData::get_program);
+}
+
+uint32_t DIAFrameData::getRelativeVirtualAddress() const {
+  return PrivateGetDIAValue(FrameData,
+                            &IDiaFrameData::get_relativeVirtualAddress);
+}
+
+uint64_t DIAFrameData::getVirtualAddress() const {
+  return PrivateGetDIAValue(FrameData, &IDiaFrameData::get_virtualAddress);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index 7d6cb254e1d1..cd4d00a13b18 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -15,6 +15,7 @@
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
 #include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
 #include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
@@ -115,16 +116,7 @@ RetType PrivateGetDIAValue(IDiaSymbol *Symbol,
 std::string
 PrivateGetDIAValue(IDiaSymbol *Symbol,
                    HRESULT (__stdcall IDiaSymbol::*Method)(BSTR *)) {
-  CComBSTR Result16;
-  if (S_OK != (Symbol->*Method)(&Result16))
-    return std::string();
-
-  const char *SrcBytes = reinterpret_cast<const char *>(Result16.m_str);
-  llvm::ArrayRef<char> SrcByteArray(SrcBytes, Result16.ByteLength());
-  std::string Result8;
-  if (!llvm::convertUTF16ToUTF8String(SrcByteArray, Result8))
-    return std::string();
-  return Result8;
+  return invokeBstrMethod(*Symbol, Method);
 }
 
 codeview::GUID
@@ -141,16 +133,33 @@ PrivateGetDIAValue(IDiaSymbol *Symbol,
   return IdResult;
 }
 
+template <typename PrintType, typename ArgType>
+void DumpDIAValueAs(llvm::raw_ostream &OS, int Indent, StringRef Name,
+                    IDiaSymbol *Symbol,
+                    HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
+  ArgType Value;
+  if (S_OK == (Symbol->*Method)(&Value))
+    dumpSymbolField(OS, Name, static_cast<PrintType>(Value), Indent);
+}
+
+void DumpDIAIdValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
+                    IDiaSymbol *Symbol,
+                    HRESULT (__stdcall IDiaSymbol::*Method)(DWORD *),
+                    const IPDBSession &Session, PdbSymbolIdField FieldId,
+                    PdbSymbolIdField ShowFlags, PdbSymbolIdField RecurseFlags) {
+  DWORD Value;
+  if (S_OK == (Symbol->*Method)(&Value))
+    dumpSymbolIdField(OS, Name, Value, Indent, Session, FieldId, ShowFlags,
+                      RecurseFlags);
+}
+
 template <typename ArgType>
 void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
                   IDiaSymbol *Symbol,
                   HRESULT (__stdcall IDiaSymbol::*Method)(ArgType *)) {
   ArgType Value;
-  if (S_OK == (Symbol->*Method)(&Value)) {
-    OS << "\n";
-    OS.indent(Indent);
-    OS << Name << ": " << Value;
-  }
+  if (S_OK == (Symbol->*Method)(&Value))
+    dumpSymbolField(OS, Name, Value, Indent);
 }
 
 void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
@@ -162,11 +171,8 @@ void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
   const char *Bytes = reinterpret_cast<const char *>(Value);
   ArrayRef<char> ByteArray(Bytes, ::SysStringByteLen(Value));
   std::string Result;
-  if (llvm::convertUTF16ToUTF8String(ByteArray, Result)) {
-    OS << "\n";
-    OS.indent(Indent);
-    OS << Name << ": " << Result;
-  }
+  if (llvm::convertUTF16ToUTF8String(ByteArray, Result))
+    dumpSymbolField(OS, Name, Result, Indent);
   ::SysFreeString(Value);
 }
 
@@ -177,12 +183,11 @@ void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
   Value.vt = VT_EMPTY;
   if (S_OK != (Symbol->*Method)(&Value))
     return;
-  OS << "\n";
-  OS.indent(Indent);
   Variant V = VariantFromVARIANT(Value);
-  OS << Name << ": " << V;
-}
+
+  dumpSymbolField(OS, Name, V, Indent);
 }
+} // namespace
 
 namespace llvm {
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GUID &G) {
@@ -191,182 +196,203 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GUID &G) {
   A.format(OS, "");
   return OS;
 }
-}
+} // namespace llvm
 
 DIARawSymbol::DIARawSymbol(const DIASession &PDBSession,
                            CComPtr<IDiaSymbol> DiaSymbol)
     : Session(PDBSession), Symbol(DiaSymbol) {}
 
+#define RAW_ID_METHOD_DUMP(Stream, Method, Session, FieldId, ShowFlags,        \
+                           RecurseFlags)                                       \
+  DumpDIAIdValue(Stream, Indent, StringRef{#Method}, Symbol,                   \
+                 &IDiaSymbol::get_##Method, Session, FieldId, ShowFlags,       \
+                 RecurseFlags);
+
 #define RAW_METHOD_DUMP(Stream, Method)                                        \
-  DumpDIAValue(Stream, Indent, StringRef(#Method), Symbol, &IDiaSymbol::Method);
-
-void DIARawSymbol::dump(raw_ostream &OS, int Indent) const {
-  RAW_METHOD_DUMP(OS, get_access)
-  RAW_METHOD_DUMP(OS, get_addressOffset)
-  RAW_METHOD_DUMP(OS, get_addressSection)
-  RAW_METHOD_DUMP(OS, get_age)
-  RAW_METHOD_DUMP(OS, get_arrayIndexTypeId)
-  RAW_METHOD_DUMP(OS, get_backEndMajor)
-  RAW_METHOD_DUMP(OS, get_backEndMinor)
-  RAW_METHOD_DUMP(OS, get_backEndBuild)
-  RAW_METHOD_DUMP(OS, get_backEndQFE)
-  RAW_METHOD_DUMP(OS, get_baseDataOffset)
-  RAW_METHOD_DUMP(OS, get_baseDataSlot)
-  RAW_METHOD_DUMP(OS, get_baseSymbolId)
-  RAW_METHOD_DUMP(OS, get_baseType)
-  RAW_METHOD_DUMP(OS, get_bitPosition)
-  RAW_METHOD_DUMP(OS, get_callingConvention)
-  RAW_METHOD_DUMP(OS, get_classParentId)
-  RAW_METHOD_DUMP(OS, get_compilerName)
-  RAW_METHOD_DUMP(OS, get_count)
-  RAW_METHOD_DUMP(OS, get_countLiveRanges)
-  RAW_METHOD_DUMP(OS, get_frontEndMajor)
-  RAW_METHOD_DUMP(OS, get_frontEndMinor)
-  RAW_METHOD_DUMP(OS, get_frontEndBuild)
-  RAW_METHOD_DUMP(OS, get_frontEndQFE)
-  RAW_METHOD_DUMP(OS, get_lexicalParentId)
-  RAW_METHOD_DUMP(OS, get_libraryName)
-  RAW_METHOD_DUMP(OS, get_liveRangeStartAddressOffset)
-  RAW_METHOD_DUMP(OS, get_liveRangeStartAddressSection)
-  RAW_METHOD_DUMP(OS, get_liveRangeStartRelativeVirtualAddress)
-  RAW_METHOD_DUMP(OS, get_localBasePointerRegisterId)
-  RAW_METHOD_DUMP(OS, get_lowerBoundId)
-  RAW_METHOD_DUMP(OS, get_memorySpaceKind)
-  RAW_METHOD_DUMP(OS, get_name)
-  RAW_METHOD_DUMP(OS, get_numberOfAcceleratorPointerTags)
-  RAW_METHOD_DUMP(OS, get_numberOfColumns)
-  RAW_METHOD_DUMP(OS, get_numberOfModifiers)
-  RAW_METHOD_DUMP(OS, get_numberOfRegisterIndices)
-  RAW_METHOD_DUMP(OS, get_numberOfRows)
-  RAW_METHOD_DUMP(OS, get_objectFileName)
-  RAW_METHOD_DUMP(OS, get_oemId)
-  RAW_METHOD_DUMP(OS, get_oemSymbolId)
-  RAW_METHOD_DUMP(OS, get_offsetInUdt)
-  RAW_METHOD_DUMP(OS, get_platform)
-  RAW_METHOD_DUMP(OS, get_rank)
-  RAW_METHOD_DUMP(OS, get_registerId)
-  RAW_METHOD_DUMP(OS, get_registerType)
-  RAW_METHOD_DUMP(OS, get_relativeVirtualAddress)
-  RAW_METHOD_DUMP(OS, get_samplerSlot)
-  RAW_METHOD_DUMP(OS, get_signature)
-  RAW_METHOD_DUMP(OS, get_sizeInUdt)
-  RAW_METHOD_DUMP(OS, get_slot)
-  RAW_METHOD_DUMP(OS, get_sourceFileName)
-  RAW_METHOD_DUMP(OS, get_stride)
-  RAW_METHOD_DUMP(OS, get_subTypeId)
-  RAW_METHOD_DUMP(OS, get_symbolsFileName)
-  RAW_METHOD_DUMP(OS, get_symIndexId)
-  RAW_METHOD_DUMP(OS, get_targetOffset)
-  RAW_METHOD_DUMP(OS, get_targetRelativeVirtualAddress)
-  RAW_METHOD_DUMP(OS, get_targetVirtualAddress)
-  RAW_METHOD_DUMP(OS, get_targetSection)
-  RAW_METHOD_DUMP(OS, get_textureSlot)
-  RAW_METHOD_DUMP(OS, get_timeStamp)
-  RAW_METHOD_DUMP(OS, get_token)
-  RAW_METHOD_DUMP(OS, get_typeId)
-  RAW_METHOD_DUMP(OS, get_uavSlot)
-  RAW_METHOD_DUMP(OS, get_undecoratedName)
-  RAW_METHOD_DUMP(OS, get_unmodifiedTypeId)
-  RAW_METHOD_DUMP(OS, get_upperBoundId)
-  RAW_METHOD_DUMP(OS, get_virtualBaseDispIndex)
-  RAW_METHOD_DUMP(OS, get_virtualBaseOffset)
-  RAW_METHOD_DUMP(OS, get_virtualTableShapeId)
-  RAW_METHOD_DUMP(OS, get_dataKind)
-  RAW_METHOD_DUMP(OS, get_symTag)
-  RAW_METHOD_DUMP(OS, get_guid)
-  RAW_METHOD_DUMP(OS, get_offset)
-  RAW_METHOD_DUMP(OS, get_thisAdjust)
-  RAW_METHOD_DUMP(OS, get_virtualBasePointerOffset)
-  RAW_METHOD_DUMP(OS, get_locationType)
-  RAW_METHOD_DUMP(OS, get_machineType)
-  RAW_METHOD_DUMP(OS, get_thunkOrdinal)
-  RAW_METHOD_DUMP(OS, get_length)
-  RAW_METHOD_DUMP(OS, get_liveRangeLength)
-  RAW_METHOD_DUMP(OS, get_virtualAddress)
-  RAW_METHOD_DUMP(OS, get_udtKind)
-  RAW_METHOD_DUMP(OS, get_constructor)
-  RAW_METHOD_DUMP(OS, get_customCallingConvention)
-  RAW_METHOD_DUMP(OS, get_farReturn)
-  RAW_METHOD_DUMP(OS, get_code)
-  RAW_METHOD_DUMP(OS, get_compilerGenerated)
-  RAW_METHOD_DUMP(OS, get_constType)
-  RAW_METHOD_DUMP(OS, get_editAndContinueEnabled)
-  RAW_METHOD_DUMP(OS, get_function)
-  RAW_METHOD_DUMP(OS, get_stride)
-  RAW_METHOD_DUMP(OS, get_noStackOrdering)
-  RAW_METHOD_DUMP(OS, get_hasAlloca)
-  RAW_METHOD_DUMP(OS, get_hasAssignmentOperator)
-  RAW_METHOD_DUMP(OS, get_isCTypes)
-  RAW_METHOD_DUMP(OS, get_hasCastOperator)
-  RAW_METHOD_DUMP(OS, get_hasDebugInfo)
-  RAW_METHOD_DUMP(OS, get_hasEH)
-  RAW_METHOD_DUMP(OS, get_hasEHa)
-  RAW_METHOD_DUMP(OS, get_hasInlAsm)
-  RAW_METHOD_DUMP(OS, get_framePointerPresent)
-  RAW_METHOD_DUMP(OS, get_inlSpec)
-  RAW_METHOD_DUMP(OS, get_interruptReturn)
-  RAW_METHOD_DUMP(OS, get_hasLongJump)
-  RAW_METHOD_DUMP(OS, get_hasManagedCode)
-  RAW_METHOD_DUMP(OS, get_hasNestedTypes)
-  RAW_METHOD_DUMP(OS, get_noInline)
-  RAW_METHOD_DUMP(OS, get_noReturn)
-  RAW_METHOD_DUMP(OS, get_optimizedCodeDebugInfo)
-  RAW_METHOD_DUMP(OS, get_overloadedOperator)
-  RAW_METHOD_DUMP(OS, get_hasSEH)
-  RAW_METHOD_DUMP(OS, get_hasSecurityChecks)
-  RAW_METHOD_DUMP(OS, get_hasSetJump)
-  RAW_METHOD_DUMP(OS, get_strictGSCheck)
-  RAW_METHOD_DUMP(OS, get_isAcceleratorGroupSharedLocal)
-  RAW_METHOD_DUMP(OS, get_isAcceleratorPointerTagLiveRange)
-  RAW_METHOD_DUMP(OS, get_isAcceleratorStubFunction)
-  RAW_METHOD_DUMP(OS, get_isAggregated)
-  RAW_METHOD_DUMP(OS, get_intro)
-  RAW_METHOD_DUMP(OS, get_isCVTCIL)
-  RAW_METHOD_DUMP(OS, get_isConstructorVirtualBase)
-  RAW_METHOD_DUMP(OS, get_isCxxReturnUdt)
-  RAW_METHOD_DUMP(OS, get_isDataAligned)
-  RAW_METHOD_DUMP(OS, get_isHLSLData)
-  RAW_METHOD_DUMP(OS, get_isHotpatchable)
-  RAW_METHOD_DUMP(OS, get_indirectVirtualBaseClass)
-  RAW_METHOD_DUMP(OS, get_isInterfaceUdt)
-  RAW_METHOD_DUMP(OS, get_intrinsic)
-  RAW_METHOD_DUMP(OS, get_isLTCG)
-  RAW_METHOD_DUMP(OS, get_isLocationControlFlowDependent)
-  RAW_METHOD_DUMP(OS, get_isMSILNetmodule)
-  RAW_METHOD_DUMP(OS, get_isMatrixRowMajor)
-  RAW_METHOD_DUMP(OS, get_managed)
-  RAW_METHOD_DUMP(OS, get_msil)
-  RAW_METHOD_DUMP(OS, get_isMultipleInheritance)
-  RAW_METHOD_DUMP(OS, get_isNaked)
-  RAW_METHOD_DUMP(OS, get_nested)
-  RAW_METHOD_DUMP(OS, get_isOptimizedAway)
-  RAW_METHOD_DUMP(OS, get_packed)
-  RAW_METHOD_DUMP(OS, get_isPointerBasedOnSymbolValue)
-  RAW_METHOD_DUMP(OS, get_isPointerToDataMember)
-  RAW_METHOD_DUMP(OS, get_isPointerToMemberFunction)
-  RAW_METHOD_DUMP(OS, get_pure)
-  RAW_METHOD_DUMP(OS, get_RValueReference)
-  RAW_METHOD_DUMP(OS, get_isRefUdt)
-  RAW_METHOD_DUMP(OS, get_reference)
-  RAW_METHOD_DUMP(OS, get_restrictedType)
-  RAW_METHOD_DUMP(OS, get_isReturnValue)
-  RAW_METHOD_DUMP(OS, get_isSafeBuffers)
-  RAW_METHOD_DUMP(OS, get_scoped)
-  RAW_METHOD_DUMP(OS, get_isSdl)
-  RAW_METHOD_DUMP(OS, get_isSingleInheritance)
-  RAW_METHOD_DUMP(OS, get_isSplitted)
-  RAW_METHOD_DUMP(OS, get_isStatic)
-  RAW_METHOD_DUMP(OS, get_isStripped)
-  RAW_METHOD_DUMP(OS, get_unalignedType)
-  RAW_METHOD_DUMP(OS, get_notReached)
-  RAW_METHOD_DUMP(OS, get_isValueUdt)
-  RAW_METHOD_DUMP(OS, get_virtual)
-  RAW_METHOD_DUMP(OS, get_virtualBaseClass)
-  RAW_METHOD_DUMP(OS, get_isVirtualInheritance)
-  RAW_METHOD_DUMP(OS, get_volatileType)
-  RAW_METHOD_DUMP(OS, get_wasInlined)
-  RAW_METHOD_DUMP(OS, get_unused)
-  RAW_METHOD_DUMP(OS, get_value)
+  DumpDIAValue(Stream, Indent, StringRef{#Method}, Symbol,                     \
+               &IDiaSymbol::get_##Method);
+
+#define RAW_METHOD_DUMP_AS(Stream, Method, Type)                               \
+  DumpDIAValueAs<Type>(Stream, Indent, StringRef{#Method}, Symbol,             \
+                       &IDiaSymbol::get_##Method);
+
+void DIARawSymbol::dump(raw_ostream &OS, int Indent,
+                        PdbSymbolIdField ShowIdFields,
+                        PdbSymbolIdField RecurseIdFields) const {
+  RAW_ID_METHOD_DUMP(OS, symIndexId, Session, PdbSymbolIdField::SymIndexId,
+                     ShowIdFields, RecurseIdFields);
+  RAW_METHOD_DUMP_AS(OS, symTag, PDB_SymType);
+
+  RAW_METHOD_DUMP(OS, access);
+  RAW_METHOD_DUMP(OS, addressOffset);
+  RAW_METHOD_DUMP(OS, addressSection);
+  RAW_METHOD_DUMP(OS, age);
+  RAW_METHOD_DUMP(OS, arrayIndexTypeId);
+  RAW_METHOD_DUMP(OS, backEndMajor);
+  RAW_METHOD_DUMP(OS, backEndMinor);
+  RAW_METHOD_DUMP(OS, backEndBuild);
+  RAW_METHOD_DUMP(OS, backEndQFE);
+  RAW_METHOD_DUMP(OS, baseDataOffset);
+  RAW_METHOD_DUMP(OS, baseDataSlot);
+  RAW_METHOD_DUMP(OS, baseSymbolId);
+  RAW_METHOD_DUMP_AS(OS, baseType, PDB_BuiltinType);
+  RAW_METHOD_DUMP(OS, bitPosition);
+  RAW_METHOD_DUMP_AS(OS, callingConvention, PDB_CallingConv);
+  RAW_ID_METHOD_DUMP(OS, classParentId, Session, PdbSymbolIdField::ClassParent,
+                     ShowIdFields, RecurseIdFields);
+  RAW_METHOD_DUMP(OS, compilerName);
+  RAW_METHOD_DUMP(OS, count);
+  RAW_METHOD_DUMP(OS, countLiveRanges);
+  RAW_METHOD_DUMP(OS, frontEndMajor);
+  RAW_METHOD_DUMP(OS, frontEndMinor);
+  RAW_METHOD_DUMP(OS, frontEndBuild);
+  RAW_METHOD_DUMP(OS, frontEndQFE);
+  RAW_ID_METHOD_DUMP(OS, lexicalParentId, Session,
+                     PdbSymbolIdField::LexicalParent, ShowIdFields,
+                     RecurseIdFields);
+  RAW_METHOD_DUMP(OS, libraryName);
+  RAW_METHOD_DUMP(OS, liveRangeStartAddressOffset);
+  RAW_METHOD_DUMP(OS, liveRangeStartAddressSection);
+  RAW_METHOD_DUMP(OS, liveRangeStartRelativeVirtualAddress);
+  RAW_METHOD_DUMP(OS, localBasePointerRegisterId);
+  RAW_METHOD_DUMP(OS, lowerBoundId);
+  RAW_METHOD_DUMP(OS, memorySpaceKind);
+  RAW_METHOD_DUMP(OS, name);
+  RAW_METHOD_DUMP(OS, numberOfAcceleratorPointerTags);
+  RAW_METHOD_DUMP(OS, numberOfColumns);
+  RAW_METHOD_DUMP(OS, numberOfModifiers);
+  RAW_METHOD_DUMP(OS, numberOfRegisterIndices);
+  RAW_METHOD_DUMP(OS, numberOfRows);
+  RAW_METHOD_DUMP(OS, objectFileName);
+  RAW_METHOD_DUMP(OS, oemId);
+  RAW_METHOD_DUMP(OS, oemSymbolId);
+  RAW_METHOD_DUMP(OS, offsetInUdt);
+  RAW_METHOD_DUMP(OS, platform);
+  RAW_METHOD_DUMP(OS, rank);
+  RAW_METHOD_DUMP(OS, registerId);
+  RAW_METHOD_DUMP(OS, registerType);
+  RAW_METHOD_DUMP(OS, relativeVirtualAddress);
+  RAW_METHOD_DUMP(OS, samplerSlot);
+  RAW_METHOD_DUMP(OS, signature);
+  RAW_METHOD_DUMP(OS, sizeInUdt);
+  RAW_METHOD_DUMP(OS, slot);
+  RAW_METHOD_DUMP(OS, sourceFileName);
+  RAW_METHOD_DUMP(OS, stride);
+  RAW_METHOD_DUMP(OS, subTypeId);
+  RAW_METHOD_DUMP(OS, symbolsFileName);
+  RAW_METHOD_DUMP(OS, targetOffset);
+  RAW_METHOD_DUMP(OS, targetRelativeVirtualAddress);
+  RAW_METHOD_DUMP(OS, targetVirtualAddress);
+  RAW_METHOD_DUMP(OS, targetSection);
+  RAW_METHOD_DUMP(OS, textureSlot);
+  RAW_METHOD_DUMP(OS, timeStamp);
+  RAW_METHOD_DUMP(OS, token);
+  RAW_ID_METHOD_DUMP(OS, typeId, Session, PdbSymbolIdField::Type, ShowIdFields,
+                     RecurseIdFields);
+  RAW_METHOD_DUMP(OS, uavSlot);
+  RAW_METHOD_DUMP(OS, undecoratedName);
+  RAW_ID_METHOD_DUMP(OS, unmodifiedTypeId, Session,
+                     PdbSymbolIdField::UnmodifiedType, ShowIdFields,
+                     RecurseIdFields);
+  RAW_METHOD_DUMP(OS, upperBoundId);
+  RAW_METHOD_DUMP(OS, virtualBaseDispIndex);
+  RAW_METHOD_DUMP(OS, virtualBaseOffset);
+  RAW_METHOD_DUMP(OS, virtualTableShapeId);
+  RAW_METHOD_DUMP_AS(OS, dataKind, PDB_DataKind);
+  RAW_METHOD_DUMP(OS, guid);
+  RAW_METHOD_DUMP(OS, offset);
+  RAW_METHOD_DUMP(OS, thisAdjust);
+  RAW_METHOD_DUMP(OS, virtualBasePointerOffset);
+  RAW_METHOD_DUMP_AS(OS, locationType, PDB_LocType);
+  RAW_METHOD_DUMP(OS, machineType);
+  RAW_METHOD_DUMP(OS, thunkOrdinal);
+  RAW_METHOD_DUMP(OS, length);
+  RAW_METHOD_DUMP(OS, liveRangeLength);
+  RAW_METHOD_DUMP(OS, virtualAddress);
+  RAW_METHOD_DUMP_AS(OS, udtKind, PDB_UdtType);
+  RAW_METHOD_DUMP(OS, constructor);
+  RAW_METHOD_DUMP(OS, customCallingConvention);
+  RAW_METHOD_DUMP(OS, farReturn);
+  RAW_METHOD_DUMP(OS, code);
+  RAW_METHOD_DUMP(OS, compilerGenerated);
+  RAW_METHOD_DUMP(OS, constType);
+  RAW_METHOD_DUMP(OS, editAndContinueEnabled);
+  RAW_METHOD_DUMP(OS, function);
+  RAW_METHOD_DUMP(OS, stride);
+  RAW_METHOD_DUMP(OS, noStackOrdering);
+  RAW_METHOD_DUMP(OS, hasAlloca);
+  RAW_METHOD_DUMP(OS, hasAssignmentOperator);
+  RAW_METHOD_DUMP(OS, isCTypes);
+  RAW_METHOD_DUMP(OS, hasCastOperator);
+  RAW_METHOD_DUMP(OS, hasDebugInfo);
+  RAW_METHOD_DUMP(OS, hasEH);
+  RAW_METHOD_DUMP(OS, hasEHa);
+  RAW_METHOD_DUMP(OS, hasInlAsm);
+  RAW_METHOD_DUMP(OS, framePointerPresent);
+  RAW_METHOD_DUMP(OS, inlSpec);
+  RAW_METHOD_DUMP(OS, interruptReturn);
+  RAW_METHOD_DUMP(OS, hasLongJump);
+  RAW_METHOD_DUMP(OS, hasManagedCode);
+  RAW_METHOD_DUMP(OS, hasNestedTypes);
+  RAW_METHOD_DUMP(OS, noInline);
+  RAW_METHOD_DUMP(OS, noReturn);
+  RAW_METHOD_DUMP(OS, optimizedCodeDebugInfo);
+  RAW_METHOD_DUMP(OS, overloadedOperator);
+  RAW_METHOD_DUMP(OS, hasSEH);
+  RAW_METHOD_DUMP(OS, hasSecurityChecks);
+  RAW_METHOD_DUMP(OS, hasSetJump);
+  RAW_METHOD_DUMP(OS, strictGSCheck);
+  RAW_METHOD_DUMP(OS, isAcceleratorGroupSharedLocal);
+  RAW_METHOD_DUMP(OS, isAcceleratorPointerTagLiveRange);
+  RAW_METHOD_DUMP(OS, isAcceleratorStubFunction);
+  RAW_METHOD_DUMP(OS, isAggregated);
+  RAW_METHOD_DUMP(OS, intro);
+  RAW_METHOD_DUMP(OS, isCVTCIL);
+  RAW_METHOD_DUMP(OS, isConstructorVirtualBase);
+  RAW_METHOD_DUMP(OS, isCxxReturnUdt);
+  RAW_METHOD_DUMP(OS, isDataAligned);
+  RAW_METHOD_DUMP(OS, isHLSLData);
+  RAW_METHOD_DUMP(OS, isHotpatchable);
+  RAW_METHOD_DUMP(OS, indirectVirtualBaseClass);
+  RAW_METHOD_DUMP(OS, isInterfaceUdt);
+  RAW_METHOD_DUMP(OS, intrinsic);
+  RAW_METHOD_DUMP(OS, isLTCG);
+  RAW_METHOD_DUMP(OS, isLocationControlFlowDependent);
+  RAW_METHOD_DUMP(OS, isMSILNetmodule);
+  RAW_METHOD_DUMP(OS, isMatrixRowMajor);
+  RAW_METHOD_DUMP(OS, managed);
+  RAW_METHOD_DUMP(OS, msil);
+  RAW_METHOD_DUMP(OS, isMultipleInheritance);
+  RAW_METHOD_DUMP(OS, isNaked);
+  RAW_METHOD_DUMP(OS, nested);
+  RAW_METHOD_DUMP(OS, isOptimizedAway);
+  RAW_METHOD_DUMP(OS, packed);
+  RAW_METHOD_DUMP(OS, isPointerBasedOnSymbolValue);
+  RAW_METHOD_DUMP(OS, isPointerToDataMember);
+  RAW_METHOD_DUMP(OS, isPointerToMemberFunction);
+  RAW_METHOD_DUMP(OS, pure);
+  RAW_METHOD_DUMP(OS, RValueReference);
+  RAW_METHOD_DUMP(OS, isRefUdt);
+  RAW_METHOD_DUMP(OS, reference);
+  RAW_METHOD_DUMP(OS, restrictedType);
+  RAW_METHOD_DUMP(OS, isReturnValue);
+  RAW_METHOD_DUMP(OS, isSafeBuffers);
+  RAW_METHOD_DUMP(OS, scoped);
+  RAW_METHOD_DUMP(OS, isSdl);
+  RAW_METHOD_DUMP(OS, isSingleInheritance);
+  RAW_METHOD_DUMP(OS, isSplitted);
+  RAW_METHOD_DUMP(OS, isStatic);
+  RAW_METHOD_DUMP(OS, isStripped);
+  RAW_METHOD_DUMP(OS, unalignedType);
+  RAW_METHOD_DUMP(OS, notReached);
+  RAW_METHOD_DUMP(OS, isValueUdt);
+  RAW_METHOD_DUMP(OS, virtual);
+  RAW_METHOD_DUMP(OS, virtualBaseClass);
+  RAW_METHOD_DUMP(OS, isVirtualInheritance);
+  RAW_METHOD_DUMP(OS, volatileType);
+  RAW_METHOD_DUMP(OS, wasInlined);
+  RAW_METHOD_DUMP(OS, unused);
+  RAW_METHOD_DUMP(OS, value);
 }
 
 std::unique_ptr<IPDBEnumSymbols>
@@ -414,9 +440,8 @@ DIARawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name,
   wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
 
   CComPtr<IDiaEnumSymbols> DiaEnumerator;
-  if (S_OK !=
-      Symbol->findChildrenExByAddr(EnumVal, Name16Str, CompareFlags, Section,
-                                   Offset, &DiaEnumerator))
+  if (S_OK != Symbol->findChildrenExByAddr(EnumVal, Name16Str, CompareFlags,
+                                           Section, Offset, &DiaEnumerator))
     return nullptr;
 
   return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
@@ -434,9 +459,8 @@ DIARawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name,
   wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
 
   CComPtr<IDiaEnumSymbols> DiaEnumerator;
-  if (S_OK !=
-      Symbol->findChildrenExByVA(EnumVal, Name16Str, CompareFlags, VA,
-                                  &DiaEnumerator))
+  if (S_OK != Symbol->findChildrenExByVA(EnumVal, Name16Str, CompareFlags, VA,
+                                         &DiaEnumerator))
     return nullptr;
 
   return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
@@ -453,9 +477,8 @@ DIARawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
   wchar_t *Name16Str = reinterpret_cast<wchar_t *>(Name16.data());
 
   CComPtr<IDiaEnumSymbols> DiaEnumerator;
-  if (S_OK !=
-      Symbol->findChildrenExByRVA(EnumVal, Name16Str, CompareFlags, RVA,
-                                  &DiaEnumerator))
+  if (S_OK != Symbol->findChildrenExByRVA(EnumVal, Name16Str, CompareFlags, RVA,
+                                          &DiaEnumerator))
     return nullptr;
 
   return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
@@ -500,7 +523,8 @@ std::unique_ptr<IPDBEnumLineNumbers>
 DIARawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset,
                                      uint32_t Length) const {
   CComPtr<IDiaEnumLineNumbers> DiaEnumerator;
-  if (S_OK != Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator))
+  if (S_OK !=
+      Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator))
     return nullptr;
 
   return llvm::make_unique<DIAEnumLineNumbers>(DiaEnumerator);
@@ -536,8 +560,7 @@ void DIARawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
   Symbol->get_dataBytes(DataSize, &DataSize, bytes.data());
 }
 
-std::string
-DIARawSymbol::getUndecoratedNameEx(PDB_UndnameFlags Flags) const {
+std::string DIARawSymbol::getUndecoratedNameEx(PDB_UndnameFlags Flags) const {
   CComBSTR Result16;
   if (S_OK != Symbol->get_undecoratedNameEx((DWORD)Flags, &Result16))
     return std::string();
@@ -567,7 +590,7 @@ uint32_t DIARawSymbol::getAge() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_age);
 }
 
-uint32_t DIARawSymbol::getArrayIndexTypeId() const {
+SymIndexId DIARawSymbol::getArrayIndexTypeId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_arrayIndexTypeId);
 }
 
@@ -586,7 +609,7 @@ uint32_t DIARawSymbol::getBaseDataSlot() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseDataSlot);
 }
 
-uint32_t DIARawSymbol::getBaseSymbolId() const {
+SymIndexId DIARawSymbol::getBaseSymbolId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_baseSymbolId);
 }
 
@@ -604,7 +627,7 @@ PDB_CallingConv DIARawSymbol::getCallingConvention() const {
       Symbol, &IDiaSymbol::get_callingConvention);
 }
 
-uint32_t DIARawSymbol::getClassParentId() const {
+SymIndexId DIARawSymbol::getClassParentId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_classParentId);
 }
 
@@ -631,7 +654,7 @@ PDB_Lang DIARawSymbol::getLanguage() const {
   return PrivateGetDIAValue<DWORD, PDB_Lang>(Symbol, &IDiaSymbol::get_language);
 }
 
-uint32_t DIARawSymbol::getLexicalParentId() const {
+SymIndexId DIARawSymbol::getLexicalParentId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_lexicalParentId);
 }
 
@@ -659,7 +682,7 @@ codeview::RegisterId DIARawSymbol::getLocalBasePointerRegisterId() const {
       Symbol, &IDiaSymbol::get_localBasePointerRegisterId);
 }
 
-uint32_t DIARawSymbol::getLowerBoundId() const {
+SymIndexId DIARawSymbol::getLowerBoundId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_lowerBoundId);
 }
 
@@ -700,7 +723,7 @@ uint32_t DIARawSymbol::getOemId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_oemId);
 }
 
-uint32_t DIARawSymbol::getOemSymbolId() const {
+SymIndexId DIARawSymbol::getOemSymbolId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_oemSymbolId);
 }
 
@@ -749,8 +772,7 @@ std::string DIARawSymbol::getSourceFileName() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_sourceFileName);
 }
 
-std::unique_ptr<IPDBLineNumber>
-DIARawSymbol::getSrcLineOnTypeDefn() const {
+std::unique_ptr<IPDBLineNumber> DIARawSymbol::getSrcLineOnTypeDefn() const {
   CComPtr<IDiaLineNumber> LineNumber;
   if (FAILED(Symbol->getSrcLineOnTypeDefn(&LineNumber)) || !LineNumber)
     return nullptr;
@@ -762,7 +784,7 @@ uint32_t DIARawSymbol::getStride() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_stride);
 }
 
-uint32_t DIARawSymbol::getSubTypeId() const {
+SymIndexId DIARawSymbol::getSubTypeId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_subTypeId);
 }
 
@@ -770,7 +792,7 @@ std::string DIARawSymbol::getSymbolsFileName() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_symbolsFileName);
 }
 
-uint32_t DIARawSymbol::getSymIndexId() const {
+SymIndexId DIARawSymbol::getSymIndexId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_symIndexId);
 }
 
@@ -803,7 +825,7 @@ uint32_t DIARawSymbol::getToken() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_token);
 }
 
-uint32_t DIARawSymbol::getTypeId() const {
+SymIndexId DIARawSymbol::getTypeId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_typeId);
 }
 
@@ -815,11 +837,11 @@ std::string DIARawSymbol::getUndecoratedName() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_undecoratedName);
 }
 
-uint32_t DIARawSymbol::getUnmodifiedTypeId() const {
+SymIndexId DIARawSymbol::getUnmodifiedTypeId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_unmodifiedTypeId);
 }
 
-uint32_t DIARawSymbol::getUpperBoundId() const {
+SymIndexId DIARawSymbol::getUpperBoundId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_upperBoundId);
 }
 
@@ -840,7 +862,7 @@ uint32_t DIARawSymbol::getVirtualBaseOffset() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualBaseOffset);
 }
 
-uint32_t DIARawSymbol::getVirtualTableShapeId() const {
+SymIndexId DIARawSymbol::getVirtualTableShapeId() const {
   return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId);
 }
 
@@ -852,7 +874,7 @@ DIARawSymbol::getVirtualBaseTableType() const {
 
   auto RawVT = llvm::make_unique<DIARawSymbol>(Session, TableType);
   auto Pointer =
-      llvm::make_unique<PDBSymbolTypePointer>(Session, std::move(RawVT));
+      PDBSymbol::createAs<PDBSymbolTypePointer>(Session, std::move(RawVT));
   return unique_dyn_cast<PDBSymbolTypeBuiltin>(Pointer->getPointeeType());
 }
 
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
index b7dc49f53e23..8e233ca15161 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
@@ -25,7 +25,7 @@ std::unique_ptr<PDBSymbolCompiland> DIASectionContrib::getCompiland() const {
     return nullptr;
 
   auto RawSymbol = llvm::make_unique<DIARawSymbol>(Session, Symbol);
-  return llvm::make_unique<PDBSymbolCompiland>(Session, std::move(RawSymbol));
+  return PDBSymbol::createAs<PDBSymbolCompiland>(Session, std::move(RawSymbol));
 }
 
 template <typename ArgType>
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
index d81f59400eb3..bd375e172ac0 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -9,6 +9,7 @@
 #include "llvm/DebugInfo/PDB/DIA/DIASession.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h"
@@ -42,7 +43,7 @@ static Error ErrorFromHResult(HRESULT Result, const char *Str, Ts &&... Args) {
 
   switch (Result) {
   case E_PDB_NOT_FOUND:
-    return make_error<GenericError>(generic_error_code::invalid_path, Context);
+    return errorCodeToError(std::error_code(ENOENT, std::generic_category()));
   case E_PDB_FORMAT:
     return make_error<DIAError>(dia_error_code::invalid_file_format, Context);
   case E_INVALIDARG:
@@ -71,8 +72,7 @@ static Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
 // If the CoCreateInstance call above failed, msdia*.dll is not registered.
 // Try loading the DLL corresponding to the #included DIA SDK.
 #if !defined(_MSC_VER)
-  return llvm::make_error<GenericError>(
-      "DIA is only supported when using MSVC.");
+  return llvm::make_error<PDBError>(pdb_error_code::dia_failed_loading);
 #else
   const wchar_t *msdia_dll = nullptr;
 #if _MSC_VER >= 1900 && _MSC_VER < 2000
@@ -104,7 +104,7 @@ Error DIASession::createFromPdb(StringRef Path,
 
   llvm::SmallVector<UTF16, 128> Path16;
   if (!llvm::convertUTF8ToUTF16String(Path, Path16))
-    return make_error<GenericError>(generic_error_code::invalid_path);
+    return make_error<PDBError>(pdb_error_code::invalid_utf8_path, Path);
 
   const wchar_t *Path16Str = reinterpret_cast<const wchar_t *>(Path16.data());
   HRESULT HR;
@@ -130,7 +130,7 @@ Error DIASession::createFromExe(StringRef Path,
 
   llvm::SmallVector<UTF16, 128> Path16;
   if (!llvm::convertUTF8ToUTF16String(Path, Path16))
-    return make_error<GenericError>(generic_error_code::invalid_path, Path);
+    return make_error<PDBError>(pdb_error_code::invalid_utf8_path, Path);
 
   const wchar_t *Path16Str = reinterpret_cast<const wchar_t *>(Path16.data());
   HRESULT HR;
@@ -188,7 +188,8 @@ bool DIASession::addressForRVA(uint32_t RVA, uint32_t &Section,
   return false;
 }
 
-std::unique_ptr<PDBSymbol> DIASession::getSymbolById(uint32_t SymbolId) const {
+std::unique_ptr<PDBSymbol>
+DIASession::getSymbolById(SymIndexId SymbolId) const {
   CComPtr<IDiaSymbol> LocatedSymbol;
   if (S_OK != Session->symbolById(SymbolId, &LocatedSymbol))
     return nullptr;
@@ -407,7 +408,7 @@ DIASession::getInjectedSources() const {
   if (!Files)
     return nullptr;
 
-  return llvm::make_unique<DIAEnumInjectedSources>(*this, Files);
+  return llvm::make_unique<DIAEnumInjectedSources>(Files);
 }
 
 std::unique_ptr<IPDBEnumSectionContribs>
@@ -419,3 +420,13 @@ DIASession::getSectionContribs() const {
 
   return llvm::make_unique<DIAEnumSectionContribs>(*this, Sections);
 }
+
+std::unique_ptr<IPDBEnumFrameData>
+DIASession::getFrameData() const {
+  CComPtr<IDiaEnumFrameData> FD =
+      getTableEnumerator<IDiaEnumFrameData>(*Session);
+  if (!FD)
+    return nullptr;
+
+  return llvm::make_unique<DIAEnumFrameData>(FD);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
index 8605f55b402c..d3e408166a87 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
@@ -8,12 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
 #include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
-#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
@@ -23,16 +22,7 @@ DIASourceFile::DIASourceFile(const DIASession &PDBSession,
     : Session(PDBSession), SourceFile(DiaSourceFile) {}
 
 std::string DIASourceFile::getFileName() const {
-  CComBSTR FileName16;
-  HRESULT Result = SourceFile->get_fileName(&FileName16);
-  if (S_OK != Result)
-    return std::string();
-
-  std::string FileName8;
-  llvm::ArrayRef<char> FileNameBytes(reinterpret_cast<char *>(FileName16.m_str),
-                                     FileName16.ByteLength());
-  llvm::convertUTF16ToUTF8String(FileNameBytes, FileName8);
-  return FileName8;
+  return invokeBstrMethod(*SourceFile, &IDiaSourceFile::get_fileName);
 }
 
 uint32_t DIASourceFile::getUniqueId() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp
index 5705c2370dc6..6017081b2cb6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp
@@ -8,14 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/DIA/DIATable.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/ConvertUTF.h"
+#include "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-DIATable::DIATable(CComPtr<IDiaTable> DiaTable)
-  : Table(DiaTable) {}
+DIATable::DIATable(CComPtr<IDiaTable> DiaTable) : Table(DiaTable) {}
 
 uint32_t DIATable::getItemCount() const {
   LONG Count = 0;
@@ -23,16 +21,7 @@ uint32_t DIATable::getItemCount() const {
 }
 
 std::string DIATable::getName() const {
-  CComBSTR Name16;
-  if (S_OK != Table->get_name(&Name16))
-    return std::string();
-
-  std::string Name8;
-  llvm::ArrayRef<char> Name16Bytes(reinterpret_cast<char *>(Name16.m_str),
-                                   Name16.ByteLength());
-  if (!llvm::convertUTF16ToUTF8String(Name16Bytes, Name8))
-    return std::string();
-  return Name8;
+  return invokeBstrMethod(*Table, &IDiaTable::get_name);
 }
 
 PDB_TableType DIATable::getTableType() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
index 2a677b9abe2d..256952073e88 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
@@ -14,55 +14,34 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
-class GenericErrorCategory : public std::error_category {
+class PDBErrorCategory : public std::error_category {
 public:
   const char *name() const noexcept override { return "llvm.pdb"; }
-
   std::string message(int Condition) const override {
-    switch (static_cast<generic_error_code>(Condition)) {
-    case generic_error_code::unspecified:
+    switch (static_cast<pdb_error_code>(Condition)) {
+    case pdb_error_code::unspecified:
       return "An unknown error has occurred.";
-    case generic_error_code::type_server_not_found:
-      return "Type server PDB was not found.";
-    case generic_error_code::dia_sdk_not_present:
-      return "LLVM was not compiled with support for DIA.  This usually means "
+    case pdb_error_code::dia_sdk_not_present:
+      return "LLVM was not compiled with support for DIA. This usually means "
              "that you are not using MSVC, or your Visual Studio "
-             "installation "
-             "is corrupt.";
-    case generic_error_code::invalid_path:
-      return "Unable to load PDB.  Make sure the file exists and is readable.";
+             "installation is corrupt.";
+    case pdb_error_code::dia_failed_loading:
+      return "DIA is only supported when using MSVC.";
+    case pdb_error_code::invalid_utf8_path:
+      return "The PDB file path is an invalid UTF8 sequence.";
+    case pdb_error_code::signature_out_of_date:
+      return "The signature does not match; the file(s) might be out of date.";
+    case pdb_error_code::external_cmdline_ref:
+      return "The path to this file must be provided on the command-line.";
     }
     llvm_unreachable("Unrecognized generic_error_code");
   }
 };
-} // end anonymous namespace
-
-static ManagedStatic<GenericErrorCategory> Category;
-
-char GenericError::ID = 0;
-
-GenericError::GenericError(generic_error_code C) : GenericError(C, "") {}
-
-GenericError::GenericError(StringRef Context)
-    : GenericError(generic_error_code::unspecified, Context) {}
-
-GenericError::GenericError(generic_error_code C, StringRef Context) : Code(C) {
-  ErrMsg = "PDB Error: ";
-  std::error_code EC = convertToErrorCode();
-  if (Code != generic_error_code::unspecified)
-    ErrMsg += EC.message() + "  ";
-  if (!Context.empty())
-    ErrMsg += Context;
-}
-
-void GenericError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
 
-StringRef GenericError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<PDBErrorCategory> PDBCategory;
+const std::error_category &llvm::pdb::PDBErrCategory() { return *PDBCategory; }
 
-std::error_code GenericError::convertToErrorCode() const {
-  return std::error_code(static_cast<int>(Code), *Category);
-}
+char PDBError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index b97f1e90bcf8..ab93efc839a9 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -19,7 +19,6 @@
 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/Support/BinaryItemStream.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 
 using namespace llvm;
@@ -66,12 +65,22 @@ void DbiModuleDescriptorBuilder::setFirstSectionContrib(
 }
 
 void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) {
-  Symbols.push_back(Symbol);
-  // Symbols written to a PDB file are required to be 4 byte aligned.  The same
+  // Defer to the bulk API. It does the same thing.
+  addSymbolsInBulk(Symbol.data());
+}
+
+void DbiModuleDescriptorBuilder::addSymbolsInBulk(
+    ArrayRef<uint8_t> BulkSymbols) {
+  // Do nothing for empty runs of symbols.
+  if (BulkSymbols.empty())
+    return;
+
+  Symbols.push_back(BulkSymbols);
+  // Symbols written to a PDB file are required to be 4 byte aligned. The same
   // is not true of object files.
-  assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 &&
+  assert(BulkSymbols.size() % alignOf(CodeViewContainer::Pdb) == 0 &&
          "Invalid Symbol alignment!");
-  SymbolByteSize += Symbol.length();
+  SymbolByteSize += BulkSymbols.size();
 }
 
 void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
@@ -145,16 +154,13 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
     if (auto EC =
             SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
       return EC;
-    BinaryItemStream<CVSymbol> Records(llvm::support::endianness::little);
-    Records.setItems(Symbols);
-    BinaryStreamRef RecordsRef(Records);
-    if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
-      return EC;
-    if (auto EC = SymbolWriter.padToAlignment(4))
-      return EC;
-    // TODO: Write C11 Line data
+    for (ArrayRef<uint8_t> Syms : Symbols) {
+      if (auto EC = SymbolWriter.writeBytes(Syms))
+        return EC;
+    }
     assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
            "Invalid debug section alignment!");
+    // TODO: Write C11 Line data
     for (const auto &Builder : C13Builders) {
       assert(Builder && "Empty C13 Fragment Builder!");
       if (auto EC = Builder->commit(SymbolWriter))
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
index edaa783398ca..60ac17b655a7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -197,7 +197,7 @@ PDB_Machine DbiStream::getMachineType() const {
   return static_cast<PDB_Machine>(Machine);
 }
 
-FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() {
+FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() const {
   return SectionHeaders;
 }
 
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index f6043bfd7cf9..094216ea800a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -11,6 +11,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/BinaryFormat/COFF.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
@@ -74,10 +75,27 @@ void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) {
   PublicsStreamIndex = Index;
 }
 
+void DbiStreamBuilder::addNewFpoData(const codeview::FrameData &FD) {
+  if (!NewFpoData.hasValue())
+    NewFpoData.emplace(false);
+
+  NewFpoData->addFrameData(FD);
+}
+
+void DbiStreamBuilder::addOldFpoData(const object::FpoData &FD) {
+  OldFpoData.push_back(FD);
+}
+
 Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
                                      ArrayRef<uint8_t> Data) {
+  assert(Type != DbgHeaderType::NewFPO &&
+         "NewFPO data should be written via addFrameData()!");
+
   DbgStreams[(int)Type].emplace();
-  DbgStreams[(int)Type]->Data = Data;
+  DbgStreams[(int)Type]->Size = Data.size();
+  DbgStreams[(int)Type]->WriteFn = [Data](BinaryStreamWriter &Writer) {
+    return Writer.writeArray(Data);
+  };
   return Error::success();
 }
 
@@ -272,10 +290,30 @@ Error DbiStreamBuilder::finalize() {
 }
 
 Error DbiStreamBuilder::finalizeMsfLayout() {
+  if (NewFpoData.hasValue()) {
+    DbgStreams[(int)DbgHeaderType::NewFPO].emplace();
+    DbgStreams[(int)DbgHeaderType::NewFPO]->Size =
+        NewFpoData->calculateSerializedSize();
+    DbgStreams[(int)DbgHeaderType::NewFPO]->WriteFn =
+        [this](BinaryStreamWriter &Writer) {
+          return NewFpoData->commit(Writer);
+        };
+  }
+
+  if (!OldFpoData.empty()) {
+    DbgStreams[(int)DbgHeaderType::FPO].emplace();
+    DbgStreams[(int)DbgHeaderType::FPO]->Size =
+        sizeof(object::FpoData) * OldFpoData.size();
+    DbgStreams[(int)DbgHeaderType::FPO]->WriteFn =
+        [this](BinaryStreamWriter &Writer) {
+          return Writer.writeArray(makeArrayRef(OldFpoData));
+        };
+  }
+
   for (auto &S : DbgStreams) {
     if (!S.hasValue())
       continue;
-    auto ExpectedIndex = Msf.addStream(S->Data.size());
+    auto ExpectedIndex = Msf.addStream(S->Size);
     if (!ExpectedIndex)
       return ExpectedIndex.takeError();
     S->StreamNumber = *ExpectedIndex;
@@ -406,7 +444,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
     auto WritableStream = WritableMappedBlockStream::createIndexedStream(
         Layout, MsfBuffer, Stream->StreamNumber, Allocator);
     BinaryStreamWriter DbgStreamWriter(*WritableStream);
-    if (auto EC = DbgStreamWriter.writeArray(Stream->Data))
+
+    if (auto EC = Stream->WriteFn(DbgStreamWriter))
       return EC;
   }
 
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 58efc2256ae1..57da7003da2b 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/DebugInfo/CodeView/RecordName.h"
 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -20,6 +21,7 @@
 #include "llvm/DebugInfo/PDB/Native/Hash.h"
 #include "llvm/Support/BinaryItemStream.h"
 #include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/xxhash.h"
 #include <algorithm>
 #include <vector>
 
@@ -29,8 +31,27 @@ using namespace llvm::pdb;
 using namespace llvm::codeview;
 
 struct llvm::pdb::GSIHashStreamBuilder {
+  struct UdtDenseMapInfo {
+    static inline CVSymbol getEmptyKey() {
+      static CVSymbol Empty;
+      return Empty;
+    }
+    static inline CVSymbol getTombstoneKey() {
+      static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
+                                ArrayRef<uint8_t>());
+      return Tombstone;
+    }
+    static unsigned getHashValue(const CVSymbol &Val) {
+      return xxHash64(Val.RecordData);
+    }
+    static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
+      return LHS.RecordData == RHS.RecordData;
+    }
+  };
+
   std::vector<CVSymbol> Records;
   uint32_t StreamIndex;
+  llvm::DenseSet<CVSymbol, UdtDenseMapInfo> UdtHashes;
   std::vector<PSHashRecord> HashRecords;
   std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
   std::vector<support::ulittle32_t> HashBuckets;
@@ -42,10 +63,18 @@ struct llvm::pdb::GSIHashStreamBuilder {
 
   template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
     T Copy(Symbol);
-    Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
-                                                       CodeViewContainer::Pdb));
+    addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
+                                               CodeViewContainer::Pdb));
+  }
+  void addSymbol(const CVSymbol &Symbol) {
+    if (Symbol.kind() == S_UDT) {
+      auto Iter = UdtHashes.insert(Symbol);
+      if (!Iter.second)
+        return;
+    }
+
+    Records.push_back(Symbol);
   }
-  void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); }
 };
 
 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
@@ -144,11 +173,10 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
     // can properly early-out when it detects the record won't be found.  The
     // algorithm used here corredsponds to the function
     // caseInsensitiveComparePchPchCchCch in the reference implementation.
-    llvm::sort(Bucket.begin(), Bucket.end(),
-              [](const std::pair<StringRef, PSHashRecord> &Left,
-                 const std::pair<StringRef, PSHashRecord> &Right) {
-                return gsiRecordLess(Left.first, Right.first);
-              });
+    llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
+                          const std::pair<StringRef, PSHashRecord> &Right) {
+      return gsiRecordLess(Left.first, Right.first);
+    });
 
     for (const auto &Entry : Bucket)
       HashRecords.push_back(Entry.second);
@@ -273,10 +301,6 @@ void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
   GSH->addSymbol(Sym, Msf);
 }
 
-void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) {
-  GSH->addSymbol(Sym, Msf);
-}
-
 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
   GSH->addSymbol(Sym);
 }
@@ -310,13 +334,14 @@ Error GSIStreamBuilder::commitPublicsHashStream(
   PublicsStreamHeader Header;
 
   // FIXME: Fill these in. They are for incremental linking.
+  Header.SymHash = PSH->calculateSerializedLength();
+  Header.AddrMap = PSH->Records.size() * 4;
   Header.NumThunks = 0;
   Header.SizeOfThunk = 0;
   Header.ISectThunkTable = 0;
+  memset(Header.Padding, 0, sizeof(Header.Padding));
   Header.OffThunkTable = 0;
   Header.NumSections = 0;
-  Header.SymHash = PSH->calculateSerializedLength();
-  Header.AddrMap = PSH->Records.size() * 4;
   if (auto EC = Writer.writeObject(Header))
     return EC;
 
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
index 36076f436ad0..e36319566821 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
@@ -20,7 +20,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+
+#include "llvm/DebugInfo/CodeView/RecordName.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Error.h"
 #include <algorithm>
@@ -41,6 +45,43 @@ Error GlobalsStream::reload() {
   return Error::success();
 }
 
+std::vector<std::pair<uint32_t, codeview::CVSymbol>>
+GlobalsStream::findRecordsByName(StringRef Name,
+                                 const SymbolStream &Symbols) const {
+  std::vector<std::pair<uint32_t, codeview::CVSymbol>> Result;
+
+  // Hash the name to figure out which bucket this goes into.
+  size_t ExpandedBucketIndex = hashStringV1(Name) % IPHR_HASH;
+  int32_t CompressedBucketIndex = GlobalsTable.BucketMap[ExpandedBucketIndex];
+  if (CompressedBucketIndex == -1)
+    return Result;
+
+  uint32_t LastBucketIndex = GlobalsTable.HashBuckets.size() - 1;
+  uint32_t StartRecordIndex =
+      GlobalsTable.HashBuckets[CompressedBucketIndex] / 12;
+  uint32_t EndRecordIndex = 0;
+  if (LLVM_LIKELY(uint32_t(CompressedBucketIndex) < LastBucketIndex)) {
+    EndRecordIndex = GlobalsTable.HashBuckets[CompressedBucketIndex + 1];
+  } else {
+    // If this is the last bucket, it consists of all hash records until the end
+    // of the HashRecords array.
+    EndRecordIndex = GlobalsTable.HashRecords.size() * 12;
+  }
+
+  EndRecordIndex /= 12;
+
+  assert(EndRecordIndex <= GlobalsTable.HashRecords.size());
+  while (StartRecordIndex < EndRecordIndex) {
+    PSHashRecord PSH = GlobalsTable.HashRecords[StartRecordIndex];
+    uint32_t Off = PSH.Off - 1;
+    codeview::CVSymbol Record = Symbols.readRecord(Off);
+    if (codeview::getSymbolName(Record) == Name)
+      Result.push_back(std::make_pair(Off, std::move(Record)));
+    ++StartRecordIndex;
+  }
+  return Result;
+}
+
 static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
   if (HashHdr->VerHdr != GSIHashHeader::HdrVersion)
     return make_error<RawError>(
@@ -86,7 +127,9 @@ static Error readGSIHashRecords(FixedStreamArray<PSHashRecord> &HashRecords,
 
 static Error
 readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
-                   ArrayRef<uint8_t> &HashBitmap, const GSIHashHeader *HashHdr,
+                   FixedStreamArray<support::ulittle32_t> &HashBitmap,
+                   const GSIHashHeader *HashHdr,
+                   MutableArrayRef<int32_t> BucketMap,
                    BinaryStreamReader &Reader) {
   if (auto EC = checkHashHdrVersion(HashHdr))
     return EC;
@@ -94,13 +137,27 @@ readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
   // Before the actual hash buckets, there is a bitmap of length determined by
   // IPHR_HASH.
   size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
-  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
-  if (auto EC = Reader.readBytes(HashBitmap, NumBitmapEntries))
+  uint32_t NumBitmapEntries = BitmapSizeInBits / 32;
+  if (auto EC = Reader.readArray(HashBitmap, NumBitmapEntries))
     return joinErrors(std::move(EC),
                       make_error<RawError>(raw_error_code::corrupt_file,
                                            "Could not read a bitmap."));
+  uint32_t NumBuckets1 = 0;
+  uint32_t CompressedBucketIdx = 0;
+  for (uint32_t I = 0; I <= IPHR_HASH; ++I) {
+    uint8_t WordIdx = I / 32;
+    uint8_t BitIdx = I % 32;
+    bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx);
+    if (IsSet) {
+      ++NumBuckets1;
+      BucketMap[I] = CompressedBucketIdx++;
+    } else {
+      BucketMap[I] = -1;
+    }
+  }
+
   uint32_t NumBuckets = 0;
-  for (uint8_t B : HashBitmap)
+  for (uint32_t B : HashBitmap)
     NumBuckets += countPopulation(B);
 
   // Hash buckets follow.
@@ -118,7 +175,8 @@ Error GSIHashTable::read(BinaryStreamReader &Reader) {
   if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
     return EC;
   if (HashHdr->HrSize > 0)
-    if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr, Reader))
+    if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr,
+                                     BucketMap, Reader))
       return EC;
   return Error::success();
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 54d6835f1121..3b5a2accdba6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -32,15 +32,20 @@ InfoStreamBuilder::InfoStreamBuilder(msf::MSFBuilder &Msf,
 
 void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; }
 
+void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) {
+  Features.push_back(Sig);
+}
+
+void InfoStreamBuilder::setHashPDBContentsToGUID(bool B) {
+  HashPDBContentsToGUID = B;
+}
+
 void InfoStreamBuilder::setAge(uint32_t A) { Age = A; }
 
 void InfoStreamBuilder::setSignature(uint32_t S) { Signature = S; }
 
 void InfoStreamBuilder::setGuid(GUID G) { Guid = G; }
 
-void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) {
-  Features.push_back(Sig);
-}
 
 Error InfoStreamBuilder::finalizeMsfLayout() {
   uint32_t Length = sizeof(InfoStreamHeader) +
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 2e1f61c7a25d..8c97f4a012f0 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -11,7 +11,9 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Support/BinaryStreamReader.h"
@@ -47,7 +49,8 @@ Error ModuleDebugStreamRef::reload() {
 
   if (auto EC = Reader.readInteger(Signature))
     return EC;
-  if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize - 4))
+  Reader.setOffset(0);
+  if (auto EC = Reader.readSubstream(SymbolsSubstream, SymbolSize))
     return EC;
   if (auto EC = Reader.readSubstream(C11LinesSubstream, C11Size))
     return EC;
@@ -55,8 +58,8 @@ Error ModuleDebugStreamRef::reload() {
     return EC;
 
   BinaryStreamReader SymbolReader(SymbolsSubstream.StreamData);
-  if (auto EC =
-          SymbolReader.readArray(SymbolArray, SymbolReader.bytesRemaining()))
+  if (auto EC = SymbolReader.readArray(
+          SymbolArray, SymbolReader.bytesRemaining(), sizeof(uint32_t)))
     return EC;
 
   BinaryStreamReader SubsectionsReader(C13LinesSubstream.StreamData);
@@ -76,6 +79,11 @@ Error ModuleDebugStreamRef::reload() {
   return Error::success();
 }
 
+const codeview::CVSymbolArray
+ModuleDebugStreamRef::getSymbolArrayForScope(uint32_t ScopeBegin) const {
+  return limitSymbolArrayToScope(SymbolArray, ScopeBegin);
+}
+
 BinarySubstreamRef ModuleDebugStreamRef::getSymbolsSubstream() const {
   return SymbolsSubstream;
 }
@@ -97,6 +105,12 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
   return make_range(SymbolArray.begin(HadError), SymbolArray.end());
 }
 
+CVSymbol ModuleDebugStreamRef::readSymbolAtOffset(uint32_t Offset) const {
+  auto Iter = SymbolArray.at(Offset);
+  assert(Iter != SymbolArray.end());
+  return *Iter;
+}
+
 iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
 ModuleDebugStreamRef::subsections() const {
   return make_range(Subsections.begin(), Subsections.end());
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp
deleted file mode 100644
index 4644ddcf24e3..000000000000
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- NativeBuiltinSymbol.cpp ------------------------------------ C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
-
-
-namespace llvm {
-namespace pdb {
-
-NativeBuiltinSymbol::NativeBuiltinSymbol(NativeSession &PDBSession,
-                                         SymIndexId Id, PDB_BuiltinType T,
-                                         uint64_t L)
-    : NativeRawSymbol(PDBSession, Id), Session(PDBSession), Type(T), Length(L) {
-}
-
-NativeBuiltinSymbol::~NativeBuiltinSymbol() {}
-
-std::unique_ptr<NativeRawSymbol> NativeBuiltinSymbol::clone() const {
-  return llvm::make_unique<NativeBuiltinSymbol>(Session, SymbolId, Type, Length);
-}
-
-void NativeBuiltinSymbol::dump(raw_ostream &OS, int Indent) const {
-  // TODO:  Apparently nothing needs this yet.
-}
-
-PDB_SymType NativeBuiltinSymbol::getSymTag() const {
-  return PDB_SymType::BuiltinType;
-}
-
-PDB_BuiltinType NativeBuiltinSymbol::getBuiltinType() const { return Type; }
-
-bool NativeBuiltinSymbol::isConstType() const { return false; }
-
-uint64_t NativeBuiltinSymbol::getLength() const { return Length; }
-
-bool NativeBuiltinSymbol::isUnalignedType() const { return false; }
-
-bool NativeBuiltinSymbol::isVolatileType() const { return false; }
-
-} // namespace pdb
-} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index 7132a99a9f16..efa70b0e7bd8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 
 #include "llvm/ADT/STLExtras.h"
 
@@ -17,21 +18,31 @@ namespace pdb {
 NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session,
                                              SymIndexId SymbolId,
                                              DbiModuleDescriptor MI)
-    : NativeRawSymbol(Session, SymbolId), Module(MI) {}
+    : NativeRawSymbol(Session, PDB_SymType::Compiland, SymbolId), Module(MI) {}
 
 PDB_SymType NativeCompilandSymbol::getSymTag() const {
   return PDB_SymType::Compiland;
 }
 
-std::unique_ptr<NativeRawSymbol> NativeCompilandSymbol::clone() const {
-  return llvm::make_unique<NativeCompilandSymbol>(Session, SymbolId, Module);
+void NativeCompilandSymbol::dump(raw_ostream &OS, int Indent,
+                                 PdbSymbolIdField ShowIdFields,
+                                 PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolField(OS, "libraryName", getLibraryName(), Indent);
+  dumpSymbolField(OS, "name", getName(), Indent);
+  dumpSymbolField(OS, "editAndContinueEnabled", isEditAndContinueEnabled(),
+                  Indent);
 }
 
 bool NativeCompilandSymbol::isEditAndContinueEnabled() const {
   return Module.hasECInfo();
 }
 
-uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; }
+SymIndexId NativeCompilandSymbol::getLexicalParentId() const { return 0; }
 
 // The usage of getObjFileName for getLibraryName and getModuleName for getName
 // may seem backwards, but it is consistent with DIA, which is what this API
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
new file mode 100644
index 000000000000..6eece3df2db3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
@@ -0,0 +1,55 @@
+//==- NativeEnumGlobals.cpp - Native Global Enumerator impl ------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeEnumGlobals::NativeEnumGlobals(NativeSession &PDBSession,
+                                     std::vector<codeview::SymbolKind> Kinds)
+    : Index(0), Session(PDBSession) {
+  GlobalsStream &GS = cantFail(Session.getPDBFile().getPDBGlobalsStream());
+  SymbolStream &SS = cantFail(Session.getPDBFile().getPDBSymbolStream());
+  for (uint32_t Off : GS.getGlobalsTable()) {
+    CVSymbol S = SS.readRecord(Off);
+    if (!llvm::is_contained(Kinds, S.kind()))
+      continue;
+    MatchOffsets.push_back(Off);
+  }
+}
+
+uint32_t NativeEnumGlobals::getChildCount() const {
+  return static_cast<uint32_t>(MatchOffsets.size());
+}
+
+std::unique_ptr<PDBSymbol>
+NativeEnumGlobals::getChildAtIndex(uint32_t N) const {
+  if (N >= MatchOffsets.size())
+    return nullptr;
+
+  SymIndexId Id =
+      Session.getSymbolCache().getOrCreateGlobalSymbolByOffset(MatchOffsets[N]);
+  return Session.getSymbolCache().getSymbolById(Id);
+}
+
+std::unique_ptr<PDBSymbol> NativeEnumGlobals::getNext() {
+  return getChildAtIndex(Index++);
+}
+
+void NativeEnumGlobals::reset() { Index = 0; }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index a65782e2d4fc..6e4d56443a07 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -10,42 +10,35 @@
 #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
 
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
 #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
 
 namespace llvm {
 namespace pdb {
 
-NativeEnumModules::NativeEnumModules(NativeSession &PDBSession,
-                                     const DbiModuleList &Modules,
-                                     uint32_t Index)
-    : Session(PDBSession), Modules(Modules), Index(Index) {}
+NativeEnumModules::NativeEnumModules(NativeSession &PDBSession, uint32_t Index)
+    : Session(PDBSession), Index(Index) {}
 
 uint32_t NativeEnumModules::getChildCount() const {
-  return static_cast<uint32_t>(Modules.getModuleCount());
+  return Session.getSymbolCache().getNumCompilands();
 }
 
 std::unique_ptr<PDBSymbol>
-NativeEnumModules::getChildAtIndex(uint32_t Index) const {
-  if (Index >= Modules.getModuleCount())
-    return nullptr;
-  return Session.createCompilandSymbol(Modules.getModuleDescriptor(Index));
+NativeEnumModules::getChildAtIndex(uint32_t N) const {
+  return Session.getSymbolCache().getOrCreateCompiland(N);
 }
 
 std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() {
-  if (Index >= Modules.getModuleCount())
+  if (Index >= getChildCount())
     return nullptr;
   return getChildAtIndex(Index++);
 }
 
 void NativeEnumModules::reset() { Index = 0; }
 
-NativeEnumModules *NativeEnumModules::clone() const {
-  return new NativeEnumModules(Session, Modules, Index);
-}
-
 }
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp
deleted file mode 100644
index 38d65917306a..000000000000
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbol.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- NativeEnumSymbol.cpp - info about enum type --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
-
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-
-#include <cassert>
-
-using namespace llvm;
-using namespace llvm::pdb;
-
-NativeEnumSymbol::NativeEnumSymbol(NativeSession &Session, SymIndexId Id,
-                                   const codeview::CVType &CVT)
-    : NativeRawSymbol(Session, Id), CV(CVT),
-      Record(codeview::TypeRecordKind::Enum) {
-  assert(CV.kind() == codeview::TypeLeafKind::LF_ENUM);
-  cantFail(visitTypeRecord(CV, *this));
-}
-
-NativeEnumSymbol::~NativeEnumSymbol() {}
-
-std::unique_ptr<NativeRawSymbol> NativeEnumSymbol::clone() const {
-  return llvm::make_unique<NativeEnumSymbol>(Session, SymbolId, CV);
-}
-
-std::unique_ptr<IPDBEnumSymbols>
-NativeEnumSymbol::findChildren(PDB_SymType Type) const {
-  switch (Type) {
-  case PDB_SymType::Data: {
-    // TODO(amccarth):  Provide an actual implementation.
-    return nullptr;
-  }
-  default:
-    return nullptr;
-  }
-}
-
-Error NativeEnumSymbol::visitKnownRecord(codeview::CVType &CVR,
-                                         codeview::EnumRecord &ER) {
-  Record = ER;
-  return Error::success();
-}
-
-Error NativeEnumSymbol::visitKnownMember(codeview::CVMemberRecord &CVM,
-                                         codeview::EnumeratorRecord &R) {
-  return Error::success();
-}
-
-PDB_SymType NativeEnumSymbol::getSymTag() const { return PDB_SymType::Enum; }
-
-uint32_t NativeEnumSymbol::getClassParentId() const { return 0xFFFFFFFF; }
-
-uint32_t NativeEnumSymbol::getUnmodifiedTypeId() const { return 0; }
-
-bool NativeEnumSymbol::hasConstructor() const {
-  return bool(Record.getOptions() &
-              codeview::ClassOptions::HasConstructorOrDestructor);
-}
-
-bool NativeEnumSymbol::hasAssignmentOperator() const {
-  return bool(Record.getOptions() &
-              codeview::ClassOptions::HasOverloadedAssignmentOperator);
-}
-
-bool NativeEnumSymbol::hasCastOperator() const {
-  return bool(Record.getOptions() &
-              codeview::ClassOptions::HasConversionOperator);
-}
-
-uint64_t NativeEnumSymbol::getLength() const {
-  const auto Id = Session.findSymbolByTypeIndex(Record.getUnderlyingType());
-  const auto UnderlyingType =
-      Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(Id);
-  return UnderlyingType ? UnderlyingType->getLength() : 0;
-}
-
-std::string NativeEnumSymbol::getName() const { return Record.getName(); }
-
-bool NativeEnumSymbol::isNested() const {
-  return bool(Record.getOptions() & codeview::ClassOptions::Nested);
-}
-
-bool NativeEnumSymbol::hasOverloadedOperator() const {
-  return bool(Record.getOptions() &
-              codeview::ClassOptions::HasOverloadedOperator);
-}
-
-bool NativeEnumSymbol::isPacked() const {
-  return bool(Record.getOptions() & codeview::ClassOptions::Packed);
-}
-
-bool NativeEnumSymbol::isScoped() const {
-  return bool(Record.getOptions() & codeview::ClassOptions::Scoped);
-}
-
-uint32_t NativeEnumSymbol::getTypeId() const {
-  return Session.findSymbolByTypeIndex(Record.getUnderlyingType());
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
index 36a68a1c62de..288a9128147a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
@@ -9,39 +9,58 @@
 
 #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
 
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
 
-namespace llvm {
-namespace pdb {
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
 
 NativeEnumTypes::NativeEnumTypes(NativeSession &PDBSession,
-                                 codeview::LazyRandomTypeCollection &Types,
-                                 codeview::TypeLeafKind Kind)
-    : Matches(), Index(0), Session(PDBSession), Kind(Kind) {
-  for (auto Index = Types.getFirst(); Index;
-       Index = Types.getNext(Index.getValue())) {
-    if (Types.getType(Index.getValue()).kind() == Kind)
-      Matches.push_back(Index.getValue());
+                                 LazyRandomTypeCollection &Types,
+                                 std::vector<codeview::TypeLeafKind> Kinds)
+    : Matches(), Index(0), Session(PDBSession) {
+  Optional<TypeIndex> TI = Types.getFirst();
+  while (TI) {
+    CVType CVT = Types.getType(*TI);
+    TypeLeafKind K = CVT.kind();
+    if (llvm::is_contained(Kinds, K)) {
+      // Don't add forward refs, we'll find those later while enumerating.
+      if (!isUdtForwardRef(CVT))
+        Matches.push_back(*TI);
+    } else if (K == TypeLeafKind::LF_MODIFIER) {
+      TypeIndex ModifiedTI = getModifiedType(CVT);
+      if (!ModifiedTI.isSimple()) {
+        CVType UnmodifiedCVT = Types.getType(ModifiedTI);
+        // LF_MODIFIERs point to forward refs, but don't worry about that
+        // here.  We're pushing the TypeIndex of the LF_MODIFIER itself,
+        // so we'll worry about resolving forward refs later.
+        if (llvm::is_contained(Kinds, UnmodifiedCVT.kind()))
+          Matches.push_back(*TI);
+      }
+    }
+    TI = Types.getNext(*TI);
   }
 }
 
-NativeEnumTypes::NativeEnumTypes(
-    NativeSession &PDBSession, const std::vector<codeview::TypeIndex> &Matches,
-    codeview::TypeLeafKind Kind)
-    : Matches(Matches), Index(0), Session(PDBSession), Kind(Kind) {}
+NativeEnumTypes::NativeEnumTypes(NativeSession &PDBSession,
+                                 std::vector<codeview::TypeIndex> Indices)
+    : Matches(std::move(Indices)), Index(0), Session(PDBSession) {}
 
 uint32_t NativeEnumTypes::getChildCount() const {
   return static_cast<uint32_t>(Matches.size());
 }
 
-std::unique_ptr<PDBSymbol>
-NativeEnumTypes::getChildAtIndex(uint32_t Index) const {
-  if (Index < Matches.size())
-    return Session.createEnumSymbol(Matches[Index]);
+std::unique_ptr<PDBSymbol> NativeEnumTypes::getChildAtIndex(uint32_t N) const {
+  if (N < Matches.size()) {
+    SymIndexId Id = Session.getSymbolCache().findSymbolByTypeIndex(Matches[N]);
+    return Session.getSymbolCache().getSymbolById(Id);
+  }
   return nullptr;
 }
 
@@ -50,10 +69,3 @@ std::unique_ptr<PDBSymbol> NativeEnumTypes::getNext() {
 }
 
 void NativeEnumTypes::reset() { Index = 0; }
-
-NativeEnumTypes *NativeEnumTypes::clone() const {
-  return new NativeEnumTypes(Session, Matches, Kind);
-}
-
-} // namespace pdb
-} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index e8b06065fc60..6dde5d08a500 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -12,34 +12,53 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
 
-namespace llvm {
-namespace pdb {
+using namespace llvm;
+using namespace llvm::pdb;
 
-NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId)
-    : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {}
+static DbiStream *getDbiStreamPtr(NativeSession &Session) {
+  Expected<DbiStream &> DbiS = Session.getPDBFile().getPDBDbiStream();
+  if (DbiS)
+    return &DbiS.get();
 
-std::unique_ptr<NativeRawSymbol> NativeExeSymbol::clone() const {
-  return llvm::make_unique<NativeExeSymbol>(Session, SymbolId);
+  consumeError(DbiS.takeError());
+  return nullptr;
 }
 
+NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId)
+    : NativeRawSymbol(Session, PDB_SymType::Exe, SymbolId),
+      Dbi(getDbiStreamPtr(Session)) {}
+
 std::unique_ptr<IPDBEnumSymbols>
 NativeExeSymbol::findChildren(PDB_SymType Type) const {
   switch (Type) {
   case PDB_SymType::Compiland: {
-    auto Dbi = File.getPDBDbiStream();
-    if (Dbi) {
-      const DbiModuleList &Modules = Dbi->modules();
-      return std::unique_ptr<IPDBEnumSymbols>(
-          new NativeEnumModules(Session, Modules));
-    }
-    consumeError(Dbi.takeError());
+    return std::unique_ptr<IPDBEnumSymbols>(new NativeEnumModules(Session));
     break;
   }
+  case PDB_SymType::ArrayType:
+    return Session.getSymbolCache().createTypeEnumerator(codeview::LF_ARRAY);
   case PDB_SymType::Enum:
-    return Session.createTypeEnumerator(codeview::LF_ENUM);
+    return Session.getSymbolCache().createTypeEnumerator(codeview::LF_ENUM);
+  case PDB_SymType::PointerType:
+    return Session.getSymbolCache().createTypeEnumerator(codeview::LF_POINTER);
+  case PDB_SymType::UDT:
+    return Session.getSymbolCache().createTypeEnumerator(
+        {codeview::LF_STRUCTURE, codeview::LF_CLASS, codeview::LF_UNION,
+         codeview::LF_INTERFACE});
+  case PDB_SymType::VTableShape:
+    return Session.getSymbolCache().createTypeEnumerator(codeview::LF_VTSHAPE);
+  case PDB_SymType::FunctionSig:
+    return Session.getSymbolCache().createTypeEnumerator(
+        {codeview::LF_PROCEDURE, codeview::LF_MFUNCTION});
+  case PDB_SymType::Typedef:
+    return Session.getSymbolCache().createGlobalsEnumerator(codeview::S_UDT);
+
   default:
     break;
   }
@@ -47,7 +66,7 @@ NativeExeSymbol::findChildren(PDB_SymType Type) const {
 }
 
 uint32_t NativeExeSymbol::getAge() const {
-  auto IS = File.getPDBInfoStream();
+  auto IS = Session.getPDBFile().getPDBInfoStream();
   if (IS)
     return IS->getAge();
   consumeError(IS.takeError());
@@ -55,11 +74,11 @@ uint32_t NativeExeSymbol::getAge() const {
 }
 
 std::string NativeExeSymbol::getSymbolsFileName() const {
-  return File.getFilePath();
+  return Session.getPDBFile().getFilePath();
 }
 
 codeview::GUID NativeExeSymbol::getGuid() const {
-  auto IS = File.getPDBInfoStream();
+  auto IS = Session.getPDBFile().getPDBInfoStream();
   if (IS)
     return IS->getGuid();
   consumeError(IS.takeError());
@@ -67,7 +86,7 @@ codeview::GUID NativeExeSymbol::getGuid() const {
 }
 
 bool NativeExeSymbol::hasCTypes() const {
-  auto Dbi = File.getPDBDbiStream();
+  auto Dbi = Session.getPDBFile().getPDBDbiStream();
   if (Dbi)
     return Dbi->hasCTypes();
   consumeError(Dbi.takeError());
@@ -75,12 +94,9 @@ bool NativeExeSymbol::hasCTypes() const {
 }
 
 bool NativeExeSymbol::hasPrivateSymbols() const {
-  auto Dbi = File.getPDBDbiStream();
+  auto Dbi = Session.getPDBFile().getPDBDbiStream();
   if (Dbi)
     return !Dbi->isStripped();
   consumeError(Dbi.takeError());
   return false;
 }
-
-} // namespace pdb
-} // namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index a4b029596314..62950cb3e52a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -7,82 +7,92 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
 #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/Support/FormatVariadic.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId)
-    : Session(PDBSession), SymbolId(SymbolId) {}
+NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, PDB_SymType Tag,
+                                 SymIndexId SymbolId)
+    : Session(PDBSession), Tag(Tag), SymbolId(SymbolId) {}
 
-void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {}
+void NativeRawSymbol::dump(raw_ostream &OS, int Indent,
+                           PdbSymbolIdField ShowIdFields,
+                           PdbSymbolIdField RecurseIdFields) const {
+  dumpSymbolIdField(OS, "symIndexId", SymbolId, Indent, Session,
+                    PdbSymbolIdField::SymIndexId, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolField(OS, "symTag", Tag, Indent);
+}
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findChildren(PDB_SymType Type) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findChildren(PDB_SymType Type, StringRef Name,
     PDB_NameSearchFlags Flags) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name,
     PDB_NameSearchFlags Flags, uint32_t Section, uint32_t Offset) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name,
    PDB_NameSearchFlags Flags, uint64_t VA) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
     PDB_NameSearchFlags Flags, uint32_t RVA) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findInlineFramesByAddr(uint32_t Section,
                                         uint32_t Offset) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumSymbols>
 NativeRawSymbol::findInlineFramesByVA(uint64_t VA) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<PDBSymbol>>();
 }
 
 std::unique_ptr<IPDBEnumLineNumbers>
 NativeRawSymbol::findInlineeLines() const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
 }
 
 std::unique_ptr<IPDBEnumLineNumbers>
 NativeRawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset,
                                         uint32_t Length) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
 }
 
 std::unique_ptr<IPDBEnumLineNumbers>
 NativeRawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
 }
 
 std::unique_ptr<IPDBEnumLineNumbers>
 NativeRawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const {
-  return nullptr;
+  return llvm::make_unique<NullEnumerator<IPDBLineNumber>>();
 }
 
 void NativeRawSymbol::getDataBytes(SmallVector<uint8_t, 32> &bytes) const {
@@ -105,9 +115,7 @@ uint32_t NativeRawSymbol::getAge() const {
   return 0;
 }
 
-uint32_t NativeRawSymbol::getArrayIndexTypeId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getArrayIndexTypeId() const { return 0; }
 
 void NativeRawSymbol::getBackEndVersion(VersionInfo &Version) const {
   Version.Major = 0;
@@ -124,9 +132,7 @@ uint32_t NativeRawSymbol::getBaseDataSlot() const {
   return 0;
 }
 
-uint32_t NativeRawSymbol::getBaseSymbolId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getBaseSymbolId() const { return 0; }
 
 PDB_BuiltinType NativeRawSymbol::getBuiltinType() const {
   return PDB_BuiltinType::None;
@@ -140,9 +146,7 @@ PDB_CallingConv NativeRawSymbol::getCallingConvention() const {
   return PDB_CallingConv::FarStdCall;
 }
 
-uint32_t NativeRawSymbol::getClassParentId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getClassParentId() const { return 0; }
 
 std::string NativeRawSymbol::getCompilerName() const {
   return {};
@@ -167,9 +171,7 @@ PDB_Lang NativeRawSymbol::getLanguage() const {
   return PDB_Lang::Cobol;
 }
 
-uint32_t NativeRawSymbol::getLexicalParentId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getLexicalParentId() const { return 0; }
 
 std::string NativeRawSymbol::getLibraryName() const {
   return {};
@@ -188,12 +190,10 @@ uint32_t NativeRawSymbol::getLiveRangeStartRelativeVirtualAddress() const {
 }
 
 codeview::RegisterId NativeRawSymbol::getLocalBasePointerRegisterId() const {
-  return codeview::RegisterId::CVRegEAX;
+  return codeview::RegisterId::EAX;
 }
 
-uint32_t NativeRawSymbol::getLowerBoundId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getLowerBoundId() const { return 0; }
 
 uint32_t NativeRawSymbol::getMemorySpaceKind() const {
   return 0;
@@ -231,9 +231,7 @@ uint32_t NativeRawSymbol::getOemId() const {
   return 0;
 }
 
-uint32_t NativeRawSymbol::getOemSymbolId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getOemSymbolId() const { return 0; }
 
 uint32_t NativeRawSymbol::getOffsetInUdt() const {
   return 0;
@@ -248,7 +246,7 @@ uint32_t NativeRawSymbol::getRank() const {
 }
 
 codeview::RegisterId NativeRawSymbol::getRegisterId() const {
-  return codeview::RegisterId::CVRegEAX;
+  return codeview::RegisterId::EAX;
 }
 
 uint32_t NativeRawSymbol::getRegisterType() const {
@@ -288,13 +286,11 @@ uint32_t NativeRawSymbol::getStride() const {
   return 0;
 }
 
-uint32_t NativeRawSymbol::getSubTypeId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getSubTypeId() const { return 0; }
 
 std::string NativeRawSymbol::getSymbolsFileName() const { return {}; }
 
-uint32_t NativeRawSymbol::getSymIndexId() const { return SymbolId; }
+SymIndexId NativeRawSymbol::getSymIndexId() const { return SymbolId; }
 
 uint32_t NativeRawSymbol::getTargetOffset() const {
   return 0;
@@ -324,9 +320,7 @@ uint32_t NativeRawSymbol::getToken() const {
   return 0;
 }
 
-uint32_t NativeRawSymbol::getTypeId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getTypeId() const { return 0; }
 
 uint32_t NativeRawSymbol::getUavSlot() const {
   return 0;
@@ -341,13 +335,9 @@ std::string NativeRawSymbol::getUndecoratedNameEx(
   return {};
 }
 
-uint32_t NativeRawSymbol::getUnmodifiedTypeId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getUnmodifiedTypeId() const { return 0; }
 
-uint32_t NativeRawSymbol::getUpperBoundId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getUpperBoundId() const { return 0; }
 
 Variant NativeRawSymbol::getValue() const {
   return Variant();
@@ -361,9 +351,7 @@ uint32_t NativeRawSymbol::getVirtualBaseOffset() const {
   return 0;
 }
 
-uint32_t NativeRawSymbol::getVirtualTableShapeId() const {
-  return 0;
-}
+SymIndexId NativeRawSymbol::getVirtualTableShapeId() const { return 0; }
 
 std::unique_ptr<PDBSymbolTypeBuiltin>
 NativeRawSymbol::getVirtualBaseTableType() const {
@@ -374,9 +362,7 @@ PDB_DataKind NativeRawSymbol::getDataKind() const {
   return PDB_DataKind::Unknown;
 }
 
-PDB_SymType NativeRawSymbol::getSymTag() const {
-  return PDB_SymType::None;
-}
+PDB_SymType NativeRawSymbol::getSymTag() const { return Tag; }
 
 codeview::GUID NativeRawSymbol::getGuid() const { return codeview::GUID{{0}}; }
 
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 086da13135c5..7807e312365c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -11,16 +11,16 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
-#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
 #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
@@ -40,31 +40,19 @@ using namespace llvm;
 using namespace llvm::msf;
 using namespace llvm::pdb;
 
-namespace {
-// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary
-// to instantiate a NativeBuiltinSymbol for that type.
-static const struct BuiltinTypeEntry {
-  codeview::SimpleTypeKind Kind;
-  PDB_BuiltinType Type;
-  uint32_t Size;
-} BuiltinTypes[] = {
-    {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4},
-    {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4},
-    {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4},
-    {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8},
-    {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1},
-    {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
-    {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
-    {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2},
-    {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1}
-    // This table can be grown as necessary, but these are the only types we've
-    // needed so far.
-};
-} // namespace
+static DbiStream *getDbiStreamPtr(PDBFile &File) {
+  Expected<DbiStream &> DbiS = File.getPDBDbiStream();
+  if (DbiS)
+    return &DbiS.get();
+
+  consumeError(DbiS.takeError());
+  return nullptr;
+}
 
 NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile,
                              std::unique_ptr<BumpPtrAllocator> Allocator)
-    : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {}
+    : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)),
+      Cache(*this, getDbiStreamPtr(*Pdb)) {}
 
 NativeSession::~NativeSession() = default;
 
@@ -92,97 +80,17 @@ Error NativeSession::createFromExe(StringRef Path,
   return make_error<RawError>(raw_error_code::feature_unsupported);
 }
 
-std::unique_ptr<PDBSymbolCompiland>
-NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) {
-  const auto Id = static_cast<SymIndexId>(SymbolCache.size());
-  SymbolCache.push_back(
-      llvm::make_unique<NativeCompilandSymbol>(*this, Id, MI));
-  return llvm::make_unique<PDBSymbolCompiland>(
-      *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
-}
-
-std::unique_ptr<PDBSymbolTypeEnum>
-NativeSession::createEnumSymbol(codeview::TypeIndex Index) {
-  const auto Id = findSymbolByTypeIndex(Index);
-  return llvm::make_unique<PDBSymbolTypeEnum>(
-      *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone()));
-}
-
-std::unique_ptr<IPDBEnumSymbols>
-NativeSession::createTypeEnumerator(codeview::TypeLeafKind Kind) {
-  auto Tpi = Pdb->getPDBTpiStream();
-  if (!Tpi) {
-    consumeError(Tpi.takeError());
-    return nullptr;
-  }
-  auto &Types = Tpi->typeCollection();
-  return std::unique_ptr<IPDBEnumSymbols>(
-      new NativeEnumTypes(*this, Types, codeview::LF_ENUM));
-}
-
-SymIndexId NativeSession::findSymbolByTypeIndex(codeview::TypeIndex Index) {
-  // First see if it's already in our cache.
-  const auto Entry = TypeIndexToSymbolId.find(Index);
-  if (Entry != TypeIndexToSymbolId.end())
-    return Entry->second;
-
-  // Symbols for built-in types are created on the fly.
-  if (Index.isSimple()) {
-    // FIXME:  We will eventually need to handle pointers to other simple types,
-    // which are still simple types in the world of CodeView TypeIndexes.
-    if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
-      return 0;
-    const auto Kind = Index.getSimpleKind();
-    const auto It =
-        std::find_if(std::begin(BuiltinTypes), std::end(BuiltinTypes),
-                     [Kind](const BuiltinTypeEntry &Builtin) {
-                       return Builtin.Kind == Kind;
-                     });
-    if (It == std::end(BuiltinTypes))
-      return 0;
-    SymIndexId Id = SymbolCache.size();
-    SymbolCache.emplace_back(
-        llvm::make_unique<NativeBuiltinSymbol>(*this, Id, It->Type, It->Size));
-    TypeIndexToSymbolId[Index] = Id;
-    return Id;
-  }
-
-  // We need to instantiate and cache the desired type symbol.
-  auto Tpi = Pdb->getPDBTpiStream();
-  if (!Tpi) {
-    consumeError(Tpi.takeError());
-    return 0;
-  }
-  auto &Types = Tpi->typeCollection();
-  const auto &I = Types.getType(Index);
-  const auto Id = static_cast<SymIndexId>(SymbolCache.size());
-  // TODO(amccarth):  Make this handle all types, not just LF_ENUMs.
-  assert(I.kind() == codeview::LF_ENUM);
-  SymbolCache.emplace_back(llvm::make_unique<NativeEnumSymbol>(*this, Id, I));
-  TypeIndexToSymbolId[Index] = Id;
-  return Id;
-}
-
 uint64_t NativeSession::getLoadAddress() const { return 0; }
 
 bool NativeSession::setLoadAddress(uint64_t Address) { return false; }
 
 std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() {
-  const auto Id = static_cast<SymIndexId>(SymbolCache.size());
-  SymbolCache.push_back(llvm::make_unique<NativeExeSymbol>(*this, Id));
-  auto RawSymbol = SymbolCache[Id]->clone();
-  auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
-  std::unique_ptr<PDBSymbolExe> ExeSymbol(
-      static_cast<PDBSymbolExe *>(PdbSymbol.release()));
-  return ExeSymbol;
+  return PDBSymbol::createAs<PDBSymbolExe>(*this, getNativeGlobalScope());
 }
 
 std::unique_ptr<PDBSymbol>
-NativeSession::getSymbolById(uint32_t SymbolId) const {
-  // If the caller has a SymbolId, it'd better be in our SymbolCache.
-  return SymbolId < SymbolCache.size()
-             ? PDBSymbol::create(*this, SymbolCache[SymbolId]->clone())
-             : nullptr;
+NativeSession::getSymbolById(SymIndexId SymbolId) const {
+  return Cache.getSymbolById(SymbolId);
 }
 
 bool NativeSession::addressForVA(uint64_t VA, uint32_t &Section,
@@ -291,3 +199,19 @@ std::unique_ptr<IPDBEnumSectionContribs>
 NativeSession::getSectionContribs() const {
   return nullptr;
 }
+
+std::unique_ptr<IPDBEnumFrameData>
+NativeSession::getFrameData() const {
+  return nullptr;
+}
+
+void NativeSession::initializeExeSymbol() {
+  if (ExeSymbol == 0)
+    ExeSymbol = Cache.createSymbol<NativeExeSymbol>();
+}
+
+NativeExeSymbol &NativeSession::getNativeGlobalScope() const {
+  const_cast<NativeSession &>(*this).initializeExeSymbol();
+
+  return Cache.getNativeSymbolById<NativeExeSymbol>(ExeSymbol);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
new file mode 100644
index 000000000000..6ebb8cae3a65
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
@@ -0,0 +1,123 @@
+//===- NativeSymbolEnumerator.cpp - info about enumerators ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeSymbolEnumerator::NativeSymbolEnumerator(
+    NativeSession &Session, SymIndexId Id, const NativeTypeEnum &Parent,
+    codeview::EnumeratorRecord Record)
+    : NativeRawSymbol(Session, PDB_SymType::Data, Id), Parent(Parent),
+      Record(std::move(Record)) {}
+
+NativeSymbolEnumerator::~NativeSymbolEnumerator() {}
+
+void NativeSymbolEnumerator::dump(raw_ostream &OS, int Indent,
+                                  PdbSymbolIdField ShowIdFields,
+                                  PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+  dumpSymbolIdField(OS, "classParentId", getClassParentId(), Indent, Session,
+                    PdbSymbolIdField::ClassParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolIdField(OS, "lexicalParentId", getLexicalParentId(), Indent,
+                    Session, PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolField(OS, "name", getName(), Indent);
+  dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+                    PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+  dumpSymbolField(OS, "dataKind", getDataKind(), Indent);
+  dumpSymbolField(OS, "locationType", getLocationType(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+  dumpSymbolField(OS, "value", getValue(), Indent);
+}
+
+SymIndexId NativeSymbolEnumerator::getClassParentId() const {
+  return Parent.getSymIndexId();
+}
+
+SymIndexId NativeSymbolEnumerator::getLexicalParentId() const { return 0; }
+
+std::string NativeSymbolEnumerator::getName() const { return Record.Name; }
+
+SymIndexId NativeSymbolEnumerator::getTypeId() const {
+  return Parent.getTypeId();
+}
+
+PDB_DataKind NativeSymbolEnumerator::getDataKind() const {
+  return PDB_DataKind::Constant;
+}
+
+PDB_LocType NativeSymbolEnumerator::getLocationType() const {
+  return PDB_LocType::Constant;
+}
+
+bool NativeSymbolEnumerator::isConstType() const { return false; }
+
+bool NativeSymbolEnumerator::isVolatileType() const { return false; }
+
+bool NativeSymbolEnumerator::isUnalignedType() const { return false; }
+
+Variant NativeSymbolEnumerator::getValue() const {
+  const NativeTypeBuiltin &BT = Parent.getUnderlyingBuiltinType();
+
+  switch (BT.getBuiltinType()) {
+  case PDB_BuiltinType::Int:
+  case PDB_BuiltinType::Long:
+  case PDB_BuiltinType::Char: {
+    assert(Record.Value.isSignedIntN(BT.getLength() * 8));
+    int64_t N = Record.Value.getSExtValue();
+    switch (BT.getLength()) {
+    case 1:
+      return Variant{static_cast<int8_t>(N)};
+    case 2:
+      return Variant{static_cast<int16_t>(N)};
+    case 4:
+      return Variant{static_cast<int32_t>(N)};
+    case 8:
+      return Variant{static_cast<int64_t>(N)};
+    }
+    break;
+  }
+  case PDB_BuiltinType::UInt:
+  case PDB_BuiltinType::ULong: {
+    assert(Record.Value.isIntN(BT.getLength() * 8));
+    uint64_t U = Record.Value.getZExtValue();
+    switch (BT.getLength()) {
+    case 1:
+      return Variant{static_cast<uint8_t>(U)};
+    case 2:
+      return Variant{static_cast<uint16_t>(U)};
+    case 4:
+      return Variant{static_cast<uint32_t>(U)};
+    case 8:
+      return Variant{static_cast<uint64_t>(U)};
+    }
+    break;
+  }
+  case PDB_BuiltinType::Bool: {
+    assert(Record.Value.isIntN(BT.getLength() * 8));
+    uint64_t U = Record.Value.getZExtValue();
+    return Variant{static_cast<bool>(U)};
+  }
+  default:
+    assert(false && "Invalid enumeration type");
+    break;
+  }
+
+  return Variant{Record.Value.getSExtValue()};
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
new file mode 100644
index 000000000000..a52561728a98
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
@@ -0,0 +1,67 @@
+//===- NativeTypeArray.cpp - info about arrays ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeArray::NativeTypeArray(NativeSession &Session, SymIndexId Id,
+                                 codeview::TypeIndex TI,
+                                 codeview::ArrayRecord Record)
+    : NativeRawSymbol(Session, PDB_SymType::ArrayType, Id), Record(Record),
+      Index(TI) {}
+NativeTypeArray::~NativeTypeArray() {}
+
+void NativeTypeArray::dump(raw_ostream &OS, int Indent,
+                           PdbSymbolIdField ShowIdFields,
+                           PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolField(OS, "arrayIndexTypeId", getArrayIndexTypeId(), Indent);
+  dumpSymbolIdField(OS, "elementTypeId", getTypeId(), Indent, Session,
+                    PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolField(OS, "length", getLength(), Indent);
+  dumpSymbolField(OS, "count", getCount(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+SymIndexId NativeTypeArray::getArrayIndexTypeId() const {
+  return Session.getSymbolCache().findSymbolByTypeIndex(Record.getIndexType());
+}
+
+bool NativeTypeArray::isConstType() const { return false; }
+
+bool NativeTypeArray::isUnalignedType() const { return false; }
+
+bool NativeTypeArray::isVolatileType() const { return false; }
+
+uint32_t NativeTypeArray::getCount() const {
+  NativeRawSymbol &Element =
+      Session.getSymbolCache().getNativeSymbolById(getTypeId());
+  return getLength() / Element.getLength();
+}
+
+SymIndexId NativeTypeArray::getTypeId() const {
+  return Session.getSymbolCache().findSymbolByTypeIndex(
+      Record.getElementType());
+}
+
+uint64_t NativeTypeArray::getLength() const { return Record.Size; }
+\ No newline at end of file
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
new file mode 100644
index 000000000000..7b0f13f3c075
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
@@ -0,0 +1,47 @@
+//===- NativeTypeBuiltin.cpp -------------------------------------- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeBuiltin::NativeTypeBuiltin(NativeSession &PDBSession, SymIndexId Id,
+                                     ModifierOptions Mods, PDB_BuiltinType T,
+                                     uint64_t L)
+    : NativeRawSymbol(PDBSession, PDB_SymType::BuiltinType, Id),
+      Session(PDBSession), Mods(Mods), Type(T), Length(L) {}
+
+NativeTypeBuiltin::~NativeTypeBuiltin() {}
+
+void NativeTypeBuiltin::dump(raw_ostream &OS, int Indent,
+                             PdbSymbolIdField ShowIdFields,
+                             PdbSymbolIdField RecurseIdFields) const {}
+
+PDB_SymType NativeTypeBuiltin::getSymTag() const {
+  return PDB_SymType::BuiltinType;
+}
+
+PDB_BuiltinType NativeTypeBuiltin::getBuiltinType() const { return Type; }
+
+bool NativeTypeBuiltin::isConstType() const {
+  return (Mods & ModifierOptions::Const) != ModifierOptions::None;
+}
+
+uint64_t NativeTypeBuiltin::getLength() const { return Length; }
+
+bool NativeTypeBuiltin::isUnalignedType() const {
+  return (Mods & ModifierOptions::Unaligned) != ModifierOptions::None;
+}
+
+bool NativeTypeBuiltin::isVolatileType() const {
+  return (Mods & ModifierOptions::Volatile) != ModifierOptions::None;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
new file mode 100644
index 000000000000..37176fe083b9
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
@@ -0,0 +1,382 @@
+//===- NativeTypeEnum.cpp - info about enum type ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+
+#include "llvm/Support/FormatVariadic.h"
+
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+namespace {
+// Yea, this is a pretty terrible class name.  But if we have an enum:
+//
+// enum Foo {
+//  A,
+//  B
+// };
+//
+// then A and B are the "enumerators" of the "enum" Foo.  And we need
+// to enumerate them.
+class NativeEnumEnumEnumerators : public IPDBEnumSymbols, TypeVisitorCallbacks {
+public:
+  NativeEnumEnumEnumerators(NativeSession &Session,
+                            const NativeTypeEnum &ClassParent);
+
+  uint32_t getChildCount() const override;
+  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override;
+  std::unique_ptr<PDBSymbol> getNext() override;
+  void reset() override;
+
+private:
+  Error visitKnownMember(CVMemberRecord &CVM,
+                         EnumeratorRecord &Record) override;
+  Error visitKnownMember(CVMemberRecord &CVM,
+                         ListContinuationRecord &Record) override;
+
+  NativeSession &Session;
+  const NativeTypeEnum &ClassParent;
+  std::vector<EnumeratorRecord> Enumerators;
+  Optional<TypeIndex> ContinuationIndex;
+  uint32_t Index = 0;
+};
+} // namespace
+
+NativeEnumEnumEnumerators::NativeEnumEnumEnumerators(
+    NativeSession &Session, const NativeTypeEnum &ClassParent)
+    : Session(Session), ClassParent(ClassParent) {
+  TpiStream &Tpi = cantFail(Session.getPDBFile().getPDBTpiStream());
+  LazyRandomTypeCollection &Types = Tpi.typeCollection();
+
+  ContinuationIndex = ClassParent.getEnumRecord().FieldList;
+  while (ContinuationIndex) {
+    CVType FieldList = Types.getType(*ContinuationIndex);
+    assert(FieldList.kind() == LF_FIELDLIST);
+    ContinuationIndex.reset();
+    cantFail(visitMemberRecordStream(FieldList.data(), *this));
+  }
+}
+
+Error NativeEnumEnumEnumerators::visitKnownMember(CVMemberRecord &CVM,
+                                                  EnumeratorRecord &Record) {
+  Enumerators.push_back(Record);
+  return Error::success();
+}
+
+Error NativeEnumEnumEnumerators::visitKnownMember(
+    CVMemberRecord &CVM, ListContinuationRecord &Record) {
+  ContinuationIndex = Record.ContinuationIndex;
+  return Error::success();
+}
+
+uint32_t NativeEnumEnumEnumerators::getChildCount() const {
+  return Enumerators.size();
+}
+
+std::unique_ptr<PDBSymbol>
+NativeEnumEnumEnumerators::getChildAtIndex(uint32_t Index) const {
+  if (Index >= getChildCount())
+    return nullptr;
+
+  SymIndexId Id = Session.getSymbolCache()
+                      .getOrCreateFieldListMember<NativeSymbolEnumerator>(
+                          ClassParent.getEnumRecord().FieldList, Index,
+                          ClassParent, Enumerators[Index]);
+  return Session.getSymbolCache().getSymbolById(Id);
+}
+
+std::unique_ptr<PDBSymbol> NativeEnumEnumEnumerators::getNext() {
+  if (Index >= getChildCount())
+    return nullptr;
+
+  return getChildAtIndex(Index++);
+}
+
+void NativeEnumEnumEnumerators::reset() { Index = 0; }
+
+NativeTypeEnum::NativeTypeEnum(NativeSession &Session, SymIndexId Id,
+                               TypeIndex Index, EnumRecord Record)
+    : NativeRawSymbol(Session, PDB_SymType::Enum, Id), Index(Index),
+      Record(std::move(Record)) {}
+
+NativeTypeEnum::NativeTypeEnum(NativeSession &Session, SymIndexId Id,
+                               NativeTypeEnum &UnmodifiedType,
+                               codeview::ModifierRecord Modifier)
+    : NativeRawSymbol(Session, PDB_SymType::Enum, Id),
+      UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {}
+
+NativeTypeEnum::~NativeTypeEnum() {}
+
+void NativeTypeEnum::dump(raw_ostream &OS, int Indent,
+                          PdbSymbolIdField ShowIdFields,
+                          PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolField(OS, "baseType", static_cast<uint32_t>(getBuiltinType()),
+                  Indent);
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolField(OS, "name", getName(), Indent);
+  dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+                    PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+  if (Modifiers.hasValue())
+    dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent,
+                      Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields,
+                      RecurseIdFields);
+  dumpSymbolField(OS, "length", getLength(), Indent);
+  dumpSymbolField(OS, "constructor", hasConstructor(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "hasAssignmentOperator", hasAssignmentOperator(), Indent);
+  dumpSymbolField(OS, "hasCastOperator", hasCastOperator(), Indent);
+  dumpSymbolField(OS, "hasNestedTypes", hasNestedTypes(), Indent);
+  dumpSymbolField(OS, "overloadedOperator", hasOverloadedOperator(), Indent);
+  dumpSymbolField(OS, "isInterfaceUdt", isInterfaceUdt(), Indent);
+  dumpSymbolField(OS, "intrinsic", isIntrinsic(), Indent);
+  dumpSymbolField(OS, "nested", isNested(), Indent);
+  dumpSymbolField(OS, "packed", isPacked(), Indent);
+  dumpSymbolField(OS, "isRefUdt", isRefUdt(), Indent);
+  dumpSymbolField(OS, "scoped", isScoped(), Indent);
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "isValueUdt", isValueUdt(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeTypeEnum::findChildren(PDB_SymType Type) const {
+  if (Type != PDB_SymType::Data)
+    return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+
+  const NativeTypeEnum *ClassParent = nullptr;
+  if (!Modifiers)
+    ClassParent = this;
+  else
+    ClassParent = UnmodifiedType;
+  return llvm::make_unique<NativeEnumEnumEnumerators>(Session, *ClassParent);
+}
+
+PDB_SymType NativeTypeEnum::getSymTag() const { return PDB_SymType::Enum; }
+
+PDB_BuiltinType NativeTypeEnum::getBuiltinType() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getBuiltinType();
+
+  Session.getSymbolCache().findSymbolByTypeIndex(Record->getUnderlyingType());
+
+  codeview::TypeIndex Underlying = Record->getUnderlyingType();
+
+  // This indicates a corrupt record.
+  if (!Underlying.isSimple() ||
+      Underlying.getSimpleMode() != SimpleTypeMode::Direct) {
+    return PDB_BuiltinType::None;
+  }
+
+  switch (Underlying.getSimpleKind()) {
+  case SimpleTypeKind::Boolean128:
+  case SimpleTypeKind::Boolean64:
+  case SimpleTypeKind::Boolean32:
+  case SimpleTypeKind::Boolean16:
+  case SimpleTypeKind::Boolean8:
+    return PDB_BuiltinType::Bool;
+  case SimpleTypeKind::NarrowCharacter:
+  case SimpleTypeKind::UnsignedCharacter:
+  case SimpleTypeKind::SignedCharacter:
+    return PDB_BuiltinType::Char;
+  case SimpleTypeKind::WideCharacter:
+    return PDB_BuiltinType::WCharT;
+  case SimpleTypeKind::Character16:
+    return PDB_BuiltinType::Char16;
+  case SimpleTypeKind::Character32:
+    return PDB_BuiltinType::Char32;
+  case SimpleTypeKind::Int128:
+  case SimpleTypeKind::Int128Oct:
+  case SimpleTypeKind::Int16:
+  case SimpleTypeKind::Int16Short:
+  case SimpleTypeKind::Int32:
+  case SimpleTypeKind::Int32Long:
+  case SimpleTypeKind::Int64:
+  case SimpleTypeKind::Int64Quad:
+    return PDB_BuiltinType::Int;
+  case SimpleTypeKind::UInt128:
+  case SimpleTypeKind::UInt128Oct:
+  case SimpleTypeKind::UInt16:
+  case SimpleTypeKind::UInt16Short:
+  case SimpleTypeKind::UInt32:
+  case SimpleTypeKind::UInt32Long:
+  case SimpleTypeKind::UInt64:
+  case SimpleTypeKind::UInt64Quad:
+    return PDB_BuiltinType::UInt;
+  case SimpleTypeKind::HResult:
+    return PDB_BuiltinType::HResult;
+  case SimpleTypeKind::Complex16:
+  case SimpleTypeKind::Complex32:
+  case SimpleTypeKind::Complex32PartialPrecision:
+  case SimpleTypeKind::Complex64:
+  case SimpleTypeKind::Complex80:
+  case SimpleTypeKind::Complex128:
+    return PDB_BuiltinType::Complex;
+  case SimpleTypeKind::Float16:
+  case SimpleTypeKind::Float32:
+  case SimpleTypeKind::Float32PartialPrecision:
+  case SimpleTypeKind::Float48:
+  case SimpleTypeKind::Float64:
+  case SimpleTypeKind::Float80:
+  case SimpleTypeKind::Float128:
+    return PDB_BuiltinType::Float;
+  default:
+    return PDB_BuiltinType::None;
+  }
+  llvm_unreachable("Unreachable");
+}
+
+SymIndexId NativeTypeEnum::getUnmodifiedTypeId() const {
+  return UnmodifiedType ? UnmodifiedType->getSymIndexId() : 0;
+}
+
+bool NativeTypeEnum::hasConstructor() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasConstructor();
+
+  return bool(Record->getOptions() &
+              codeview::ClassOptions::HasConstructorOrDestructor);
+}
+
+bool NativeTypeEnum::hasAssignmentOperator() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasAssignmentOperator();
+
+  return bool(Record->getOptions() &
+              codeview::ClassOptions::HasOverloadedAssignmentOperator);
+}
+
+bool NativeTypeEnum::hasNestedTypes() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasNestedTypes();
+
+  return bool(Record->getOptions() &
+              codeview::ClassOptions::ContainsNestedClass);
+}
+
+bool NativeTypeEnum::isIntrinsic() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isIntrinsic();
+
+  return bool(Record->getOptions() & codeview::ClassOptions::Intrinsic);
+}
+
+bool NativeTypeEnum::hasCastOperator() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasCastOperator();
+
+  return bool(Record->getOptions() &
+              codeview::ClassOptions::HasConversionOperator);
+}
+
+uint64_t NativeTypeEnum::getLength() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getLength();
+
+  const auto Id = Session.getSymbolCache().findSymbolByTypeIndex(
+      Record->getUnderlyingType());
+  const auto UnderlyingType =
+      Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(Id);
+  return UnderlyingType ? UnderlyingType->getLength() : 0;
+}
+
+std::string NativeTypeEnum::getName() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getName();
+
+  return Record->getName();
+}
+
+bool NativeTypeEnum::isNested() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isNested();
+
+  return bool(Record->getOptions() & codeview::ClassOptions::Nested);
+}
+
+bool NativeTypeEnum::hasOverloadedOperator() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasOverloadedOperator();
+
+  return bool(Record->getOptions() &
+              codeview::ClassOptions::HasOverloadedOperator);
+}
+
+bool NativeTypeEnum::isPacked() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isPacked();
+
+  return bool(Record->getOptions() & codeview::ClassOptions::Packed);
+}
+
+bool NativeTypeEnum::isScoped() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isScoped();
+
+  return bool(Record->getOptions() & codeview::ClassOptions::Scoped);
+}
+
+SymIndexId NativeTypeEnum::getTypeId() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getTypeId();
+
+  return Session.getSymbolCache().findSymbolByTypeIndex(
+      Record->getUnderlyingType());
+}
+
+bool NativeTypeEnum::isRefUdt() const { return false; }
+
+bool NativeTypeEnum::isValueUdt() const { return false; }
+
+bool NativeTypeEnum::isInterfaceUdt() const { return false; }
+
+bool NativeTypeEnum::isConstType() const {
+  if (!Modifiers)
+    return false;
+  return ((Modifiers->getModifiers() & ModifierOptions::Const) !=
+          ModifierOptions::None);
+}
+
+bool NativeTypeEnum::isVolatileType() const {
+  if (!Modifiers)
+    return false;
+  return ((Modifiers->getModifiers() & ModifierOptions::Volatile) !=
+          ModifierOptions::None);
+}
+
+bool NativeTypeEnum::isUnalignedType() const {
+  if (!Modifiers)
+    return false;
+  return ((Modifiers->getModifiers() & ModifierOptions::Unaligned) !=
+          ModifierOptions::None);
+}
+
+const NativeTypeBuiltin &NativeTypeEnum::getUnderlyingBuiltinType() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getUnderlyingBuiltinType();
+
+  return Session.getSymbolCache().getNativeSymbolById<NativeTypeBuiltin>(
+      getTypeId());
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
new file mode 100644
index 000000000000..a9590fffdb87
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
@@ -0,0 +1,200 @@
+//===- NativeTypeFunctionSig.cpp - info about function signature -*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h"
+
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+namespace {
+// This is kind of a silly class, hence why we keep it private to the file.
+// It's only purpose is to wrap the real type record.  I guess this is so that
+// we can have the lexical parent point to the function instead of the global
+// scope.
+class NativeTypeFunctionArg : public NativeRawSymbol {
+public:
+  NativeTypeFunctionArg(NativeSession &Session,
+                        std::unique_ptr<PDBSymbol> RealType)
+      : NativeRawSymbol(Session, PDB_SymType::FunctionArg, 0),
+        RealType(std::move(RealType)) {}
+
+  void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields,
+            PdbSymbolIdField RecurseIdFields) const override {
+    NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+    dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+                      PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+  }
+
+  SymIndexId getTypeId() const override { return RealType->getSymIndexId(); }
+
+  std::unique_ptr<PDBSymbol> RealType;
+};
+
+class NativeEnumFunctionArgs : public IPDBEnumChildren<PDBSymbol> {
+public:
+  NativeEnumFunctionArgs(NativeSession &Session,
+                         std::unique_ptr<NativeEnumTypes> TypeEnumerator)
+      : Session(Session), TypeEnumerator(std::move(TypeEnumerator)) {}
+
+  uint32_t getChildCount() const override {
+    return TypeEnumerator->getChildCount();
+  }
+  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override {
+    return wrap(TypeEnumerator->getChildAtIndex(Index));
+  }
+  std::unique_ptr<PDBSymbol> getNext() override {
+    return wrap(TypeEnumerator->getNext());
+  }
+
+  void reset() override { TypeEnumerator->reset(); }
+
+private:
+  std::unique_ptr<PDBSymbol> wrap(std::unique_ptr<PDBSymbol> S) const {
+    if (!S)
+      return nullptr;
+    auto NTFA = llvm::make_unique<NativeTypeFunctionArg>(Session, std::move(S));
+    return PDBSymbol::create(Session, std::move(NTFA));
+  }
+  NativeSession &Session;
+  std::unique_ptr<NativeEnumTypes> TypeEnumerator;
+};
+} // namespace
+
+NativeTypeFunctionSig::NativeTypeFunctionSig(NativeSession &Session,
+                                             SymIndexId Id,
+                                             codeview::TypeIndex Index,
+                                             codeview::ProcedureRecord Proc)
+    : NativeRawSymbol(Session, PDB_SymType::FunctionSig, Id),
+      Proc(std::move(Proc)), Index(Index), IsMemberFunction(false) {}
+
+NativeTypeFunctionSig::NativeTypeFunctionSig(
+    NativeSession &Session, SymIndexId Id, codeview::TypeIndex Index,
+    codeview::MemberFunctionRecord MemberFunc)
+    : NativeRawSymbol(Session, PDB_SymType::FunctionSig, Id),
+      MemberFunc(std::move(MemberFunc)), Index(Index), IsMemberFunction(true) {}
+
+void NativeTypeFunctionSig::initialize() {
+  if (IsMemberFunction) {
+    ClassParentId =
+        Session.getSymbolCache().findSymbolByTypeIndex(MemberFunc.ClassType);
+    initializeArgList(MemberFunc.ArgumentList);
+  } else {
+    initializeArgList(Proc.ArgumentList);
+  }
+}
+
+NativeTypeFunctionSig::~NativeTypeFunctionSig() {}
+
+void NativeTypeFunctionSig::initializeArgList(codeview::TypeIndex ArgListTI) {
+  TpiStream &Tpi = cantFail(Session.getPDBFile().getPDBTpiStream());
+  CVType CVT = Tpi.typeCollection().getType(ArgListTI);
+
+  cantFail(TypeDeserializer::deserializeAs<ArgListRecord>(CVT, ArgList));
+}
+
+void NativeTypeFunctionSig::dump(raw_ostream &OS, int Indent,
+                                 PdbSymbolIdField ShowIdFields,
+                                 PdbSymbolIdField RecurseIdFields) const {
+
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+
+  dumpSymbolField(OS, "callingConvention", getCallingConvention(), Indent);
+  dumpSymbolField(OS, "count", getCount(), Indent);
+  dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+                    PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+  if (IsMemberFunction)
+    dumpSymbolField(OS, "thisAdjust", getThisAdjust(), Indent);
+  dumpSymbolField(OS, "constructor", hasConstructor(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "isConstructorVirtualBase", isConstructorVirtualBase(),
+                  Indent);
+  dumpSymbolField(OS, "isCxxReturnUdt", isCxxReturnUdt(), Indent);
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeTypeFunctionSig::findChildren(PDB_SymType Type) const {
+  if (Type != PDB_SymType::FunctionArg)
+    return llvm::make_unique<NullEnumerator<PDBSymbol>>();
+
+  auto NET = llvm::make_unique<NativeEnumTypes>(Session,
+                                                /* copy */ ArgList.ArgIndices);
+  return std::unique_ptr<IPDBEnumSymbols>(
+      new NativeEnumFunctionArgs(Session, std::move(NET)));
+}
+
+SymIndexId NativeTypeFunctionSig::getClassParentId() const {
+  if (!IsMemberFunction)
+    return 0;
+
+  return ClassParentId;
+}
+
+PDB_CallingConv NativeTypeFunctionSig::getCallingConvention() const {
+  return IsMemberFunction ? MemberFunc.CallConv : Proc.CallConv;
+}
+
+uint32_t NativeTypeFunctionSig::getCount() const {
+  return IsMemberFunction ? (1 + MemberFunc.getParameterCount())
+                          : Proc.getParameterCount();
+}
+
+SymIndexId NativeTypeFunctionSig::getTypeId() const {
+  TypeIndex ReturnTI =
+      IsMemberFunction ? MemberFunc.getReturnType() : Proc.getReturnType();
+
+  SymIndexId Result = Session.getSymbolCache().findSymbolByTypeIndex(ReturnTI);
+  return Result;
+}
+
+int32_t NativeTypeFunctionSig::getThisAdjust() const {
+  return IsMemberFunction ? MemberFunc.getThisPointerAdjustment() : 0;
+}
+
+bool NativeTypeFunctionSig::hasConstructor() const {
+  if (!IsMemberFunction)
+    return false;
+
+  return (MemberFunc.getOptions() & FunctionOptions::Constructor) !=
+         FunctionOptions::None;
+}
+
+bool NativeTypeFunctionSig::isConstType() const { return false; }
+
+bool NativeTypeFunctionSig::isConstructorVirtualBase() const {
+  if (!IsMemberFunction)
+    return false;
+
+  return (MemberFunc.getOptions() &
+          FunctionOptions::ConstructorWithVirtualBases) !=
+         FunctionOptions::None;
+}
+
+bool NativeTypeFunctionSig::isCxxReturnUdt() const {
+  FunctionOptions Options =
+      IsMemberFunction ? MemberFunc.getOptions() : Proc.getOptions();
+  return (Options & FunctionOptions::CxxReturnUdt) != FunctionOptions::None;
+}
+
+bool NativeTypeFunctionSig::isUnalignedType() const { return false; }
+
+bool NativeTypeFunctionSig::isVolatileType() const { return false; }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
new file mode 100644
index 000000000000..bd8ecb6c4007
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
@@ -0,0 +1,194 @@
+//===- NativeTypePointer.cpp - info about pointer type ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h"
+
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypePointer::NativeTypePointer(NativeSession &Session, SymIndexId Id,
+                                     codeview::TypeIndex TI)
+    : NativeRawSymbol(Session, PDB_SymType::PointerType, Id), TI(TI) {
+  assert(TI.isSimple());
+  assert(TI.getSimpleMode() != SimpleTypeMode::Direct);
+}
+
+NativeTypePointer::NativeTypePointer(NativeSession &Session, SymIndexId Id,
+                                     codeview::TypeIndex TI,
+                                     codeview::PointerRecord Record)
+    : NativeRawSymbol(Session, PDB_SymType::PointerType, Id), TI(TI),
+      Record(std::move(Record)) {}
+
+NativeTypePointer::~NativeTypePointer() {}
+
+void NativeTypePointer::dump(raw_ostream &OS, int Indent,
+                             PdbSymbolIdField ShowIdFields,
+                             PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  if (isMemberPointer()) {
+    dumpSymbolIdField(OS, "classParentId", getClassParentId(), Indent, Session,
+                      PdbSymbolIdField::ClassParent, ShowIdFields,
+                      RecurseIdFields);
+  }
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+                    PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+  dumpSymbolField(OS, "length", getLength(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "isPointerToDataMember", isPointerToDataMember(), Indent);
+  dumpSymbolField(OS, "isPointerToMemberFunction", isPointerToMemberFunction(),
+                  Indent);
+  dumpSymbolField(OS, "RValueReference", isRValueReference(), Indent);
+  dumpSymbolField(OS, "reference", isReference(), Indent);
+  dumpSymbolField(OS, "restrictedType", isRestrictedType(), Indent);
+  if (isMemberPointer()) {
+    if (isSingleInheritance())
+      dumpSymbolField(OS, "isSingleInheritance", 1, Indent);
+    else if (isMultipleInheritance())
+      dumpSymbolField(OS, "isMultipleInheritance", 1, Indent);
+    else if (isVirtualInheritance())
+      dumpSymbolField(OS, "isVirtualInheritance", 1, Indent);
+  }
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+SymIndexId NativeTypePointer::getClassParentId() const {
+  if (!isMemberPointer())
+    return 0;
+
+  assert(Record);
+  const MemberPointerInfo &MPI = Record->getMemberInfo();
+  return Session.getSymbolCache().findSymbolByTypeIndex(MPI.ContainingType);
+}
+
+uint64_t NativeTypePointer::getLength() const {
+  if (Record)
+    return Record->getSize();
+
+  switch (TI.getSimpleMode()) {
+  case SimpleTypeMode::NearPointer:
+  case SimpleTypeMode::FarPointer:
+  case SimpleTypeMode::HugePointer:
+    return 2;
+  case SimpleTypeMode::NearPointer32:
+  case SimpleTypeMode::FarPointer32:
+    return 4;
+  case SimpleTypeMode::NearPointer64:
+    return 8;
+  case SimpleTypeMode::NearPointer128:
+    return 16;
+  default:
+    assert(false && "invalid simple type mode!");
+  }
+  return 0;
+}
+
+SymIndexId NativeTypePointer::getTypeId() const {
+  // This is the pointee SymIndexId.
+  TypeIndex Referent = Record ? Record->ReferentType : TI.makeDirect();
+
+  return Session.getSymbolCache().findSymbolByTypeIndex(Referent);
+}
+
+bool NativeTypePointer::isReference() const {
+  if (!Record)
+    return false;
+  return Record->getMode() == PointerMode::LValueReference;
+}
+
+bool NativeTypePointer::isRValueReference() const {
+  if (!Record)
+    return false;
+  return Record->getMode() == PointerMode::RValueReference;
+}
+
+bool NativeTypePointer::isPointerToDataMember() const {
+  if (!Record)
+    return false;
+  return Record->getMode() == PointerMode::PointerToDataMember;
+}
+
+bool NativeTypePointer::isPointerToMemberFunction() const {
+  if (!Record)
+    return false;
+  return Record->getMode() == PointerMode::PointerToMemberFunction;
+}
+
+bool NativeTypePointer::isConstType() const {
+  if (!Record)
+    return false;
+  return (Record->getOptions() & PointerOptions::Const) != PointerOptions::None;
+}
+
+bool NativeTypePointer::isRestrictedType() const {
+  if (!Record)
+    return false;
+  return (Record->getOptions() & PointerOptions::Restrict) !=
+         PointerOptions::None;
+}
+
+bool NativeTypePointer::isVolatileType() const {
+  if (!Record)
+    return false;
+  return (Record->getOptions() & PointerOptions::Volatile) !=
+         PointerOptions::None;
+}
+
+bool NativeTypePointer::isUnalignedType() const {
+  if (!Record)
+    return false;
+  return (Record->getOptions() & PointerOptions::Unaligned) !=
+         PointerOptions::None;
+}
+
+static inline bool isInheritanceKind(const MemberPointerInfo &MPI,
+                                     PointerToMemberRepresentation P1,
+                                     PointerToMemberRepresentation P2) {
+  return (MPI.getRepresentation() == P1 || MPI.getRepresentation() == P2);
+}
+
+bool NativeTypePointer::isSingleInheritance() const {
+  if (!isMemberPointer())
+    return false;
+  return isInheritanceKind(
+      Record->getMemberInfo(),
+      PointerToMemberRepresentation::SingleInheritanceData,
+      PointerToMemberRepresentation::SingleInheritanceFunction);
+}
+
+bool NativeTypePointer::isMultipleInheritance() const {
+  if (!isMemberPointer())
+    return false;
+  return isInheritanceKind(
+      Record->getMemberInfo(),
+      PointerToMemberRepresentation::MultipleInheritanceData,
+      PointerToMemberRepresentation::MultipleInheritanceFunction);
+}
+
+bool NativeTypePointer::isVirtualInheritance() const {
+  if (!isMemberPointer())
+    return false;
+  return isInheritanceKind(
+      Record->getMemberInfo(),
+      PointerToMemberRepresentation::VirtualInheritanceData,
+      PointerToMemberRepresentation::VirtualInheritanceFunction);
+}
+
+bool NativeTypePointer::isMemberPointer() const {
+  return isPointerToDataMember() || isPointerToMemberFunction();
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
new file mode 100644
index 000000000000..60b373282267
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
@@ -0,0 +1,27 @@
+#include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeTypedef::NativeTypeTypedef(NativeSession &Session, SymIndexId Id,
+                                     codeview::UDTSym Typedef)
+    : NativeRawSymbol(Session, PDB_SymType::Typedef, Id),
+      Record(std::move(Typedef)) {}
+
+NativeTypeTypedef::~NativeTypeTypedef() {}
+
+void NativeTypeTypedef::dump(raw_ostream &OS, int Indent,
+                             PdbSymbolIdField ShowIdFields,
+                             PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+  dumpSymbolField(OS, "name", getName(), Indent);
+  dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
+                    PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
+}
+
+std::string NativeTypeTypedef::getName() const { return Record.Name; }
+
+SymIndexId NativeTypeTypedef::getTypeId() const {
+  return Session.getSymbolCache().findSymbolByTypeIndex(Record.Type);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
new file mode 100644
index 000000000000..3abf91dcc6a3
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
@@ -0,0 +1,221 @@
+//===- NativeTypeUDT.cpp - info about class/struct type ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h"
+
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+                             codeview::TypeIndex TI, codeview::ClassRecord CR)
+    : NativeRawSymbol(Session, PDB_SymType::UDT, Id), Index(TI),
+      Class(std::move(CR)), Tag(Class.getPointer()) {}
+
+NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+                             codeview::TypeIndex TI, codeview::UnionRecord UR)
+    : NativeRawSymbol(Session, PDB_SymType::UDT, Id), Index(TI),
+      Union(std::move(UR)), Tag(Union.getPointer()) {}
+
+NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
+                             NativeTypeUDT &UnmodifiedType,
+                             codeview::ModifierRecord Modifier)
+    : NativeRawSymbol(Session, PDB_SymType::UDT, Id),
+      UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {}
+
+NativeTypeUDT::~NativeTypeUDT() {}
+
+void NativeTypeUDT::dump(raw_ostream &OS, int Indent,
+                         PdbSymbolIdField ShowIdFields,
+                         PdbSymbolIdField RecurseIdFields) const {
+
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolField(OS, "name", getName(), Indent);
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  if (Modifiers.hasValue())
+    dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent,
+                      Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields,
+                      RecurseIdFields);
+  if (getUdtKind() != PDB_UdtType::Union)
+    dumpSymbolField(OS, "virtualTableShapeId", getVirtualTableShapeId(),
+                    Indent);
+  dumpSymbolField(OS, "length", getLength(), Indent);
+  dumpSymbolField(OS, "udtKind", getUdtKind(), Indent);
+  dumpSymbolField(OS, "constructor", hasConstructor(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "hasAssignmentOperator", hasAssignmentOperator(), Indent);
+  dumpSymbolField(OS, "hasCastOperator", hasCastOperator(), Indent);
+  dumpSymbolField(OS, "hasNestedTypes", hasNestedTypes(), Indent);
+  dumpSymbolField(OS, "overloadedOperator", hasOverloadedOperator(), Indent);
+  dumpSymbolField(OS, "isInterfaceUdt", isInterfaceUdt(), Indent);
+  dumpSymbolField(OS, "intrinsic", isIntrinsic(), Indent);
+  dumpSymbolField(OS, "nested", isNested(), Indent);
+  dumpSymbolField(OS, "packed", isPacked(), Indent);
+  dumpSymbolField(OS, "isRefUdt", isRefUdt(), Indent);
+  dumpSymbolField(OS, "scoped", isScoped(), Indent);
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "isValueUdt", isValueUdt(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+std::string NativeTypeUDT::getName() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getName();
+
+  return Tag->getName();
+}
+
+SymIndexId NativeTypeUDT::getLexicalParentId() const { return 0; }
+
+SymIndexId NativeTypeUDT::getUnmodifiedTypeId() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getSymIndexId();
+
+  return 0;
+}
+
+SymIndexId NativeTypeUDT::getVirtualTableShapeId() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getVirtualTableShapeId();
+
+  if (Class)
+    return Session.getSymbolCache().findSymbolByTypeIndex(Class->VTableShape);
+
+  return 0;
+}
+
+uint64_t NativeTypeUDT::getLength() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getLength();
+
+  if (Class)
+    return Class->getSize();
+
+  return Union->getSize();
+}
+
+PDB_UdtType NativeTypeUDT::getUdtKind() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->getUdtKind();
+
+  switch (Tag->Kind) {
+  case TypeRecordKind::Class:
+    return PDB_UdtType::Class;
+  case TypeRecordKind::Union:
+    return PDB_UdtType::Union;
+  case TypeRecordKind::Struct:
+    return PDB_UdtType::Struct;
+  case TypeRecordKind::Interface:
+    return PDB_UdtType::Interface;
+  default:
+    llvm_unreachable("Unexected udt kind");
+  }
+}
+
+bool NativeTypeUDT::hasConstructor() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasConstructor();
+
+  return (Tag->Options & ClassOptions::HasConstructorOrDestructor) !=
+         ClassOptions::None;
+}
+
+bool NativeTypeUDT::isConstType() const {
+  if (!Modifiers)
+    return false;
+  return (Modifiers->Modifiers & ModifierOptions::Const) !=
+         ModifierOptions::None;
+}
+
+bool NativeTypeUDT::hasAssignmentOperator() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasAssignmentOperator();
+
+  return (Tag->Options & ClassOptions::HasOverloadedAssignmentOperator) !=
+         ClassOptions::None;
+}
+
+bool NativeTypeUDT::hasCastOperator() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasCastOperator();
+
+  return (Tag->Options & ClassOptions::HasConversionOperator) !=
+         ClassOptions::None;
+}
+
+bool NativeTypeUDT::hasNestedTypes() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasNestedTypes();
+
+  return (Tag->Options & ClassOptions::ContainsNestedClass) !=
+         ClassOptions::None;
+}
+
+bool NativeTypeUDT::hasOverloadedOperator() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->hasOverloadedOperator();
+
+  return (Tag->Options & ClassOptions::HasOverloadedOperator) !=
+         ClassOptions::None;
+}
+
+bool NativeTypeUDT::isInterfaceUdt() const { return false; }
+
+bool NativeTypeUDT::isIntrinsic() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isIntrinsic();
+
+  return (Tag->Options & ClassOptions::Intrinsic) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isNested() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isNested();
+
+  return (Tag->Options & ClassOptions::Nested) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isPacked() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isPacked();
+
+  return (Tag->Options & ClassOptions::Packed) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isRefUdt() const { return false; }
+
+bool NativeTypeUDT::isScoped() const {
+  if (UnmodifiedType)
+    return UnmodifiedType->isScoped();
+
+  return (Tag->Options & ClassOptions::Scoped) != ClassOptions::None;
+}
+
+bool NativeTypeUDT::isValueUdt() const { return false; }
+
+bool NativeTypeUDT::isUnalignedType() const {
+  if (!Modifiers)
+    return false;
+  return (Modifiers->Modifiers & ModifierOptions::Unaligned) !=
+         ModifierOptions::None;
+}
+
+bool NativeTypeUDT::isVolatileType() const {
+  if (!Modifiers)
+    return false;
+  return (Modifiers->Modifiers & ModifierOptions::Volatile) !=
+         ModifierOptions::None;
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
new file mode 100644
index 000000000000..837fe19ec88c
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
@@ -0,0 +1,35 @@
+#include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+// Create a pointer record for a non-simple type.
+NativeTypeVTShape::NativeTypeVTShape(NativeSession &Session, SymIndexId Id,
+                                     codeview::TypeIndex TI,
+                                     codeview::VFTableShapeRecord SR)
+    : NativeRawSymbol(Session, PDB_SymType::VTableShape, Id), TI(TI),
+      Record(std::move(SR)) {}
+
+NativeTypeVTShape::~NativeTypeVTShape() {}
+
+void NativeTypeVTShape::dump(raw_ostream &OS, int Indent,
+                             PdbSymbolIdField ShowIdFields,
+                             PdbSymbolIdField RecurseIdFields) const {
+  NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields);
+
+  dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
+                    PdbSymbolIdField::LexicalParent, ShowIdFields,
+                    RecurseIdFields);
+  dumpSymbolField(OS, "count", getCount(), Indent);
+  dumpSymbolField(OS, "constType", isConstType(), Indent);
+  dumpSymbolField(OS, "unalignedType", isUnalignedType(), Indent);
+  dumpSymbolField(OS, "volatileType", isVolatileType(), Indent);
+}
+
+bool NativeTypeVTShape::isConstType() const { return false; }
+
+bool NativeTypeVTShape::isVolatileType() const { return false; }
+
+bool NativeTypeVTShape::isUnalignedType() const { return false; }
+
+uint32_t NativeTypeVTShape::getCount() const { return Record.Slots.size(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 78b11937f051..a1f8786ff12f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -125,7 +125,7 @@ Error PDBFile::parseFileHeaders() {
   if (auto EC = Reader.readObject(SB)) {
     consumeError(std::move(EC));
     return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Does not contain superblock");
+                                "MSF superblock is missing");
   }
 
   if (auto EC = msf::validateSuperBlock(*SB))
@@ -401,7 +401,9 @@ uint32_t PDBFile::getPointerSize() {
   return 4;
 }
 
-bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
+bool PDBFile::hasPDBDbiStream() const {
+  return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
+}
 
 bool PDBFile::hasPDBGlobalsStream() {
   auto DbiS = getPDBDbiStream();
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index e164e7cf1c52..e0ceb7499ee5 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -12,7 +12,6 @@
 #include "llvm/ADT/BitVector.h"
 
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
@@ -26,6 +25,7 @@
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/JamCRC.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/xxhash.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -262,13 +262,14 @@ void PDBFileBuilder::commitInjectedSources(WritableBinaryStream &MsfBuffer,
   }
 }
 
-Error PDBFileBuilder::commit(StringRef Filename) {
+Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
   assert(!Filename.empty());
   if (auto EC = finalizeMsfLayout())
     return EC;
 
   MSFLayout Layout;
-  auto ExpectedMsfBuffer = Msf->commit(Filename, Layout);
+  Expected<FileBufferByteStream> ExpectedMsfBuffer =
+      Msf->commit(Filename, Layout);
   if (!ExpectedMsfBuffer)
     return ExpectedMsfBuffer.takeError();
   FileBufferByteStream Buffer = std::move(*ExpectedMsfBuffer);
@@ -330,11 +331,28 @@ Error PDBFileBuilder::commit(StringRef Filename) {
 
   // Set the build id at the very end, after every other byte of the PDB
   // has been written.
-  // FIXME: Use a hash of the PDB rather than time(nullptr) for the signature.
-  H->Age = Info->getAge();
-  H->Guid = Info->getGuid();
-  Optional<uint32_t> Sig = Info->getSignature();
-  H->Signature = Sig.hasValue() ? *Sig : time(nullptr);
+  if (Info->hashPDBContentsToGUID()) {
+    // Compute a hash of all sections of the output file.
+    uint64_t Digest =
+        xxHash64({Buffer.getBufferStart(), Buffer.getBufferEnd()});
+
+    H->Age = 1;
+
+    memcpy(H->Guid.Guid, &Digest, 8);
+    // xxhash only gives us 8 bytes, so put some fixed data in the other half.
+    memcpy(H->Guid.Guid + 8, "LLD PDB.", 8);
+
+    // Put the hash in the Signature field too.
+    H->Signature = static_cast<uint32_t>(Digest);
+
+    // Return GUID to caller.
+    memcpy(Guid, H->Guid.Guid, 16);
+  } else {
+    H->Age = Info->getAge();
+    H->Guid = Info->getGuid();
+    Optional<uint32_t> Sig = Info->getSignature();
+    H->Signature = Sig.hasValue() ? *Sig : time(nullptr);
+  }
 
   return Buffer.commit();
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
index 548289fff3df..dec9797088f2 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/RawError.cpp
@@ -5,14 +5,12 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class RawErrorCategory : public std::error_category {
 public:
   const char *name() const noexcept override { return "llvm.pdb.raw"; }
-
   std::string message(int Condition) const override {
     switch (static_cast<raw_error_code>(Condition)) {
     case raw_error_code::unspecified:
@@ -46,30 +44,8 @@ public:
     llvm_unreachable("Unrecognized raw_error_code");
   }
 };
-} // end anonymous namespace
-
-static ManagedStatic<RawErrorCategory> Category;
-
-char RawError::ID = 0;
-
-RawError::RawError(raw_error_code C) : RawError(C, "") {}
-
-RawError::RawError(const std::string &Context)
-    : RawError(raw_error_code::unspecified, Context) {}
-
-RawError::RawError(raw_error_code C, const std::string &Context) : Code(C) {
-  ErrMsg = "Native PDB Error: ";
-  std::error_code EC = convertToErrorCode();
-  if (Code != raw_error_code::unspecified)
-    ErrMsg += EC.message() + "  ";
-  if (!Context.empty())
-    ErrMsg += Context;
-}
-
-void RawError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
 
-const std::string &RawError::getErrorMessage() const { return ErrMsg; }
+static llvm::ManagedStatic<RawErrorCategory> RawCategory;
+const std::error_category &llvm::pdb::RawErrCategory() { return *RawCategory; }
 
-std::error_code RawError::convertToErrorCode() const {
-  return std::error_code(static_cast<int>(Code), *Category);
-}
+char RawError::ID;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
new file mode 100644
index 000000000000..5cdd628312fe
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
@@ -0,0 +1,299 @@
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary
+// to instantiate a NativeBuiltinSymbol for that type.
+static const struct BuiltinTypeEntry {
+  codeview::SimpleTypeKind Kind;
+  PDB_BuiltinType Type;
+  uint32_t Size;
+} BuiltinTypes[] = {
+    {codeview::SimpleTypeKind::None, PDB_BuiltinType::None, 0},
+    {codeview::SimpleTypeKind::Void, PDB_BuiltinType::Void, 0},
+    {codeview::SimpleTypeKind::HResult, PDB_BuiltinType::HResult, 4},
+    {codeview::SimpleTypeKind::Int16Short, PDB_BuiltinType::Int, 2},
+    {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2},
+    {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4},
+    {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4},
+    {codeview::SimpleTypeKind::Int32Long, PDB_BuiltinType::Int, 4},
+    {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4},
+    {codeview::SimpleTypeKind::Int64Quad, PDB_BuiltinType::Int, 8},
+    {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8},
+    {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1},
+    {codeview::SimpleTypeKind::WideCharacter, PDB_BuiltinType::WCharT, 2},
+    {codeview::SimpleTypeKind::Character16, PDB_BuiltinType::Char16, 2},
+    {codeview::SimpleTypeKind::Character32, PDB_BuiltinType::Char32, 4},
+    {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
+    {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
+    {codeview::SimpleTypeKind::Float32, PDB_BuiltinType::Float, 4},
+    {codeview::SimpleTypeKind::Float64, PDB_BuiltinType::Float, 8},
+    {codeview::SimpleTypeKind::Float80, PDB_BuiltinType::Float, 10},
+    {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1},
+    // This table can be grown as necessary, but these are the only types we've
+    // needed so far.
+};
+
+SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi)
+    : Session(Session), Dbi(Dbi) {
+  // Id 0 is reserved for the invalid symbol.
+  Cache.push_back(nullptr);
+
+  if (Dbi)
+    Compilands.resize(Dbi->modules().getModuleCount());
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+SymbolCache::createTypeEnumerator(TypeLeafKind Kind) {
+  return createTypeEnumerator(std::vector<TypeLeafKind>{Kind});
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+SymbolCache::createTypeEnumerator(std::vector<TypeLeafKind> Kinds) {
+  auto Tpi = Session.getPDBFile().getPDBTpiStream();
+  if (!Tpi) {
+    consumeError(Tpi.takeError());
+    return nullptr;
+  }
+  auto &Types = Tpi->typeCollection();
+  return std::unique_ptr<IPDBEnumSymbols>(
+      new NativeEnumTypes(Session, Types, std::move(Kinds)));
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+SymbolCache::createGlobalsEnumerator(codeview::SymbolKind Kind) {
+  return std::unique_ptr<IPDBEnumSymbols>(
+      new NativeEnumGlobals(Session, {Kind}));
+}
+
+SymIndexId SymbolCache::createSimpleType(TypeIndex Index,
+                                         ModifierOptions Mods) {
+  if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct)
+    return createSymbol<NativeTypePointer>(Index);
+
+  const auto Kind = Index.getSimpleKind();
+  const auto It = std::find_if(
+      std::begin(BuiltinTypes), std::end(BuiltinTypes),
+      [Kind](const BuiltinTypeEntry &Builtin) { return Builtin.Kind == Kind; });
+  if (It == std::end(BuiltinTypes))
+    return 0;
+  return createSymbol<NativeTypeBuiltin>(Mods, It->Type, It->Size);
+}
+
+SymIndexId
+SymbolCache::createSymbolForModifiedType(codeview::TypeIndex ModifierTI,
+                                         codeview::CVType CVT) {
+  ModifierRecord Record;
+  if (auto EC = TypeDeserializer::deserializeAs<ModifierRecord>(CVT, Record)) {
+    consumeError(std::move(EC));
+    return 0;
+  }
+
+  if (Record.ModifiedType.isSimple())
+    return createSimpleType(Record.ModifiedType, Record.Modifiers);
+
+  // Make sure we create and cache a record for the unmodified type.
+  SymIndexId UnmodifiedId = findSymbolByTypeIndex(Record.ModifiedType);
+  NativeRawSymbol &UnmodifiedNRS = *Cache[UnmodifiedId];
+
+  switch (UnmodifiedNRS.getSymTag()) {
+  case PDB_SymType::Enum:
+    return createSymbol<NativeTypeEnum>(
+        static_cast<NativeTypeEnum &>(UnmodifiedNRS), std::move(Record));
+  case PDB_SymType::UDT:
+    return createSymbol<NativeTypeUDT>(
+        static_cast<NativeTypeUDT &>(UnmodifiedNRS), std::move(Record));
+  default:
+    // No other types can be modified.  (LF_POINTER, for example, records
+    // its modifiers a different way.
+    assert(false && "Invalid LF_MODIFIER record");
+    break;
+  }
+  return 0;
+}
+
+SymIndexId SymbolCache::findSymbolByTypeIndex(codeview::TypeIndex Index) {
+  // First see if it's already in our cache.
+  const auto Entry = TypeIndexToSymbolId.find(Index);
+  if (Entry != TypeIndexToSymbolId.end())
+    return Entry->second;
+
+  // Symbols for built-in types are created on the fly.
+  if (Index.isSimple()) {
+    SymIndexId Result = createSimpleType(Index, ModifierOptions::None);
+    assert(TypeIndexToSymbolId.count(Index) == 0);
+    TypeIndexToSymbolId[Index] = Result;
+    return Result;
+  }
+
+  // We need to instantiate and cache the desired type symbol.
+  auto Tpi = Session.getPDBFile().getPDBTpiStream();
+  if (!Tpi) {
+    consumeError(Tpi.takeError());
+    return 0;
+  }
+  codeview::LazyRandomTypeCollection &Types = Tpi->typeCollection();
+  codeview::CVType CVT = Types.getType(Index);
+
+  if (isUdtForwardRef(CVT)) {
+    Expected<TypeIndex> EFD = Tpi->findFullDeclForForwardRef(Index);
+
+    if (!EFD)
+      consumeError(EFD.takeError());
+    else if (*EFD != Index) {
+      assert(!isUdtForwardRef(Types.getType(*EFD)));
+      SymIndexId Result = findSymbolByTypeIndex(*EFD);
+      // Record a mapping from ForwardRef -> SymIndex of complete type so that
+      // we'll take the fast path next time.
+      assert(TypeIndexToSymbolId.count(Index) == 0);
+      TypeIndexToSymbolId[Index] = Result;
+      return Result;
+    }
+  }
+
+  // At this point if we still have a forward ref udt it means the full decl was
+  // not in the PDB.  We just have to deal with it and use the forward ref.
+  SymIndexId Id = 0;
+  switch (CVT.kind()) {
+  case codeview::LF_ENUM:
+    Id = createSymbolForType<NativeTypeEnum, EnumRecord>(Index, std::move(CVT));
+    break;
+  case codeview::LF_ARRAY:
+    Id = createSymbolForType<NativeTypeArray, ArrayRecord>(Index,
+                                                           std::move(CVT));
+    break;
+  case codeview::LF_CLASS:
+  case codeview::LF_STRUCTURE:
+  case codeview::LF_INTERFACE:
+    Id = createSymbolForType<NativeTypeUDT, ClassRecord>(Index, std::move(CVT));
+    break;
+  case codeview::LF_UNION:
+    Id = createSymbolForType<NativeTypeUDT, UnionRecord>(Index, std::move(CVT));
+    break;
+  case codeview::LF_POINTER:
+    Id = createSymbolForType<NativeTypePointer, PointerRecord>(Index,
+                                                               std::move(CVT));
+    break;
+  case codeview::LF_MODIFIER:
+    Id = createSymbolForModifiedType(Index, std::move(CVT));
+    break;
+  case codeview::LF_PROCEDURE:
+    Id = createSymbolForType<NativeTypeFunctionSig, ProcedureRecord>(
+        Index, std::move(CVT));
+    break;
+  case codeview::LF_MFUNCTION:
+    Id = createSymbolForType<NativeTypeFunctionSig, MemberFunctionRecord>(
+        Index, std::move(CVT));
+    break;
+  case codeview::LF_VTSHAPE:
+    Id = createSymbolForType<NativeTypeVTShape, VFTableShapeRecord>(
+        Index, std::move(CVT));
+    break;
+  default:
+    Id = createSymbolPlaceholder();
+    break;
+  }
+  if (Id != 0) {
+    assert(TypeIndexToSymbolId.count(Index) == 0);
+    TypeIndexToSymbolId[Index] = Id;
+  }
+  return Id;
+}
+
+std::unique_ptr<PDBSymbol>
+SymbolCache::getSymbolById(SymIndexId SymbolId) const {
+  assert(SymbolId < Cache.size());
+
+  // Id 0 is reserved.
+  if (SymbolId == 0 || SymbolId >= Cache.size())
+    return nullptr;
+
+  // Make sure to handle the case where we've inserted a placeholder symbol
+  // for types we don't yet suppport.
+  NativeRawSymbol *NRS = Cache[SymbolId].get();
+  if (!NRS)
+    return nullptr;
+
+  return PDBSymbol::create(Session, *NRS);
+}
+
+NativeRawSymbol &SymbolCache::getNativeSymbolById(SymIndexId SymbolId) const {
+  return *Cache[SymbolId];
+}
+
+uint32_t SymbolCache::getNumCompilands() const {
+  if (!Dbi)
+    return 0;
+
+  return Dbi->modules().getModuleCount();
+}
+
+SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) {
+  auto Iter = GlobalOffsetToSymbolId.find(Offset);
+  if (Iter != GlobalOffsetToSymbolId.end())
+    return Iter->second;
+
+  SymbolStream &SS = cantFail(Session.getPDBFile().getPDBSymbolStream());
+  CVSymbol CVS = SS.readRecord(Offset);
+  SymIndexId Id = 0;
+  switch (CVS.kind()) {
+  case SymbolKind::S_UDT: {
+    UDTSym US = cantFail(SymbolDeserializer::deserializeAs<UDTSym>(CVS));
+    Id = createSymbol<NativeTypeTypedef>(std::move(US));
+    break;
+  }
+  default:
+    Id = createSymbolPlaceholder();
+    break;
+  }
+  if (Id != 0) {
+    assert(GlobalOffsetToSymbolId.count(Offset) == 0);
+    GlobalOffsetToSymbolId[Offset] = Id;
+  }
+
+  return Id;
+}
+
+std::unique_ptr<PDBSymbolCompiland>
+SymbolCache::getOrCreateCompiland(uint32_t Index) {
+  if (!Dbi)
+    return nullptr;
+
+  if (Index >= Compilands.size())
+    return nullptr;
+
+  if (Compilands[Index] == 0) {
+    const DbiModuleList &Modules = Dbi->modules();
+    Compilands[Index] =
+        createSymbol<NativeCompilandSymbol>(Modules.getModuleDescriptor(Index));
+  }
+
+  return Session.getConcreteSymbolById<PDBSymbolCompiland>(Compilands[Index]);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 77a2d57a8369..18708826ffc7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -50,6 +50,32 @@ static Expected<uint32_t> getHashForUdt(const CVType &Rec) {
 }
 
 template <typename T>
+static Expected<TagRecordHash> getTagRecordHashForUdt(const CVType &Rec) {
+  T Deserialized;
+  if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
+                                               Deserialized))
+    return std::move(E);
+
+  ClassOptions Opts = Deserialized.getOptions();
+
+  bool ForwardRef = bool(Opts & ClassOptions::ForwardReference);
+
+  uint32_t ThisRecordHash = getHashForUdt(Deserialized, Rec.data());
+
+  // If we don't have a forward ref we can't compute the hash of it from the
+  // full record because it requires hashing the entire buffer.
+  if (!ForwardRef)
+    return TagRecordHash{std::move(Deserialized), ThisRecordHash, 0};
+
+  bool Scoped = bool(Opts & ClassOptions::Scoped);
+
+  StringRef NameToHash =
+      Scoped ? Deserialized.getUniqueName() : Deserialized.getName();
+  uint32_t FullHash = hashStringV1(NameToHash);
+  return TagRecordHash{std::move(Deserialized), FullHash, ThisRecordHash};
+}
+
+template <typename T>
 static Expected<uint32_t> getSourceLineHash(const CVType &Rec) {
   T Deserialized;
   if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
@@ -60,6 +86,23 @@ static Expected<uint32_t> getSourceLineHash(const CVType &Rec) {
   return hashStringV1(StringRef(Buf, 4));
 }
 
+Expected<TagRecordHash> llvm::pdb::hashTagRecord(const codeview::CVType &Type) {
+  switch (Type.kind()) {
+  case LF_CLASS:
+  case LF_STRUCTURE:
+  case LF_INTERFACE:
+    return getTagRecordHashForUdt<ClassRecord>(Type);
+  case LF_UNION:
+    return getTagRecordHashForUdt<UnionRecord>(Type);
+  case LF_ENUM:
+    return getTagRecordHashForUdt<EnumRecord>(Type);
+  default:
+    assert(false && "Type is not a tag record!");
+  }
+  return make_error<StringError>("Invalid record type",
+                                 inconvertibleErrorCode());
+}
+
 Expected<uint32_t> llvm::pdb::hashTypeRecord(const CVType &Rec) {
   switch (Rec.kind()) {
   case LF_CLASS:
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 0680b673380a..f234d446e6a0 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -11,8 +11,11 @@
 
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/CodeView/RecordName.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -140,6 +143,88 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const {
 uint32_t TpiStream::getNumHashBuckets() const { return Header->NumHashBuckets; }
 uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; }
 
+void TpiStream::buildHashMap() {
+  if (!HashMap.empty())
+    return;
+  if (HashValues.empty())
+    return;
+
+  HashMap.resize(Header->NumHashBuckets);
+
+  TypeIndex TIB{Header->TypeIndexBegin};
+  TypeIndex TIE{Header->TypeIndexEnd};
+  while (TIB < TIE) {
+    uint32_t HV = HashValues[TIB.toArrayIndex()];
+    HashMap[HV].push_back(TIB++);
+  }
+}
+
+std::vector<TypeIndex> TpiStream::findRecordsByName(StringRef Name) const {
+  if (!supportsTypeLookup())
+    const_cast<TpiStream*>(this)->buildHashMap();
+
+  uint32_t Bucket = hashStringV1(Name) % Header->NumHashBuckets;
+  if (Bucket > HashMap.size())
+    return {};
+
+  std::vector<TypeIndex> Result;
+  for (TypeIndex TI : HashMap[Bucket]) {
+    std::string ThisName = computeTypeName(*Types, TI);
+    if (ThisName == Name)
+      Result.push_back(TI);
+  }
+  return Result;
+}
+
+bool TpiStream::supportsTypeLookup() const { return !HashMap.empty(); }
+
+Expected<TypeIndex>
+TpiStream::findFullDeclForForwardRef(TypeIndex ForwardRefTI) const {
+  if (!supportsTypeLookup())
+    const_cast<TpiStream*>(this)->buildHashMap();
+
+  CVType F = Types->getType(ForwardRefTI);
+  if (!isUdtForwardRef(F))
+    return ForwardRefTI;
+
+  Expected<TagRecordHash> ForwardTRH = hashTagRecord(F);
+  if (!ForwardTRH)
+    return ForwardTRH.takeError();
+
+  uint32_t BucketIdx = ForwardTRH->FullRecordHash % Header->NumHashBuckets;
+
+  for (TypeIndex TI : HashMap[BucketIdx]) {
+    CVType CVT = Types->getType(TI);
+    if (CVT.kind() != F.kind())
+      continue;
+
+    Expected<TagRecordHash> FullTRH = hashTagRecord(CVT);
+    if (!FullTRH)
+      return FullTRH.takeError();
+    if (ForwardTRH->FullRecordHash != FullTRH->FullRecordHash)
+      continue;
+    TagRecord &ForwardTR = ForwardTRH->getRecord();
+    TagRecord &FullTR = FullTRH->getRecord();
+
+    if (!ForwardTR.hasUniqueName()) {
+      if (ForwardTR.getName() == FullTR.getName())
+        return TI;
+      continue;
+    }
+
+    if (!FullTR.hasUniqueName())
+      continue;
+    if (ForwardTR.getUniqueName() == FullTR.getUniqueName())
+      return TI;
+  }
+  return ForwardRefTI;
+}
+
+codeview::CVType TpiStream::getType(codeview::TypeIndex Index) {
+  assert(!Index.isSimple());
+  return Types->getType(Index);
+}
+
 BinarySubstreamRef TpiStream::getTypeRecordsSubstream() const {
   return TypeRecordsSubstream;
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
index 40f5ae9ba845..fc1ad8bcd7cd 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
@@ -29,7 +29,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
         MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
                                      /*RequiresNullTerminator=*/false);
     if (!ErrorOrBuffer)
-      return make_error<GenericError>(generic_error_code::invalid_path, Path);
+      return errorCodeToError(ErrorOrBuffer.getError());
 
     return NativeSession::createFromPdb(std::move(*ErrorOrBuffer), Session);
   }
@@ -37,7 +37,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
 #if LLVM_ENABLE_DIA_SDK
   return DIASession::createFromPdb(Path, Session);
 #else
-  return make_error<GenericError>("DIA is not installed on the system");
+  return make_error<PDBError>(pdb_error_code::dia_sdk_not_present);
 #endif
 }
 
@@ -50,6 +50,6 @@ Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
 #if LLVM_ENABLE_DIA_SDK
   return DIASession::createFromExe(Path, Session);
 #else
-  return make_error<GenericError>("DIA is not installed on the system");
+  return make_error<PDBError>(pdb_error_code::dia_sdk_not_present);
 #endif
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index a4e316417f96..0d8af232cd92 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -43,6 +43,33 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
 }
 
 raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
+                                   const PDB_BuiltinType &Type) {
+  switch (Type) {
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, None, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Void, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, WCharT, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Int, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, UInt, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Float, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, BCD, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Bool, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Long, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, ULong, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Currency, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Date, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Variant, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Complex, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Bitfield, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, BSTR, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, HResult, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char16, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char32, OS)
+  }
+  return OS;
+}
+
+raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
                                    const PDB_CallingConv &Conv) {
   OS << "__";
   switch (Conv) {
@@ -202,8 +229,20 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_SymType &Tag) {
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CustomType, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, ManagedType, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Dimension, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CallSite, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, InlineSite, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, BaseInterface, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, VectorType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, MatrixType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, HLSLType, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Caller, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Callee, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Export, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, HeapAllocationSite, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, CoffGroup, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SymType, Inlinee, OS)
   default:
-    OS << "Unknown";
+    OS << "Unknown SymTag " << uint32_t(Tag);
   }
   return OS;
 }
@@ -293,7 +332,7 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const Variant &Value) {
       OS << Value.Value.Single;
       break;
     case PDB_VariantType::UInt16:
-      OS << Value.Value.Double;
+      OS << Value.Value.UInt16;
       break;
     case PDB_VariantType::UInt32:
       OS << Value.Value.UInt32;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
index c62796507a01..951909295d13 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/IPDBDataStream.h"
+#include "llvm/DebugInfo/PDB/IPDBFrameData.h"
 #include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
 #include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
@@ -35,3 +36,5 @@ IPDBTable::~IPDBTable() = default;
 IPDBInjectedSource::~IPDBInjectedSource() = default;
 
 IPDBSectionContrib::~IPDBSectionContrib() = default;
+
+IPDBFrameData::~IPDBFrameData() = default;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
index 74010c2dd7dd..d492edafdafe 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -50,23 +50,20 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbol::PDBSymbol(const IPDBSession &PDBSession,
-                     std::unique_ptr<IPDBRawSymbol> Symbol)
-    : Session(PDBSession), RawSymbol(std::move(Symbol)) {}
+PDBSymbol::PDBSymbol(const IPDBSession &PDBSession) : Session(PDBSession) {}
 
-PDBSymbol::PDBSymbol(PDBSymbol &Symbol)
-    : Session(Symbol.Session), RawSymbol(std::move(Symbol.RawSymbol)) {}
+PDBSymbol::PDBSymbol(PDBSymbol &&Other)
+    : Session(Other.Session), RawSymbol(std::move(Other.RawSymbol)) {}
 
 PDBSymbol::~PDBSymbol() = default;
 
 #define FACTORY_SYMTAG_CASE(Tag, Type)                                         \
   case PDB_SymType::Tag:                                                       \
-    return std::unique_ptr<PDBSymbol>(new Type(PDBSession, std::move(Symbol)));
+    return std::unique_ptr<PDBSymbol>(new Type(PDBSession));
 
 std::unique_ptr<PDBSymbol>
-PDBSymbol::create(const IPDBSession &PDBSession,
-                  std::unique_ptr<IPDBRawSymbol> Symbol) {
-  switch (Symbol->getSymTag()) {
+PDBSymbol::createSymbol(const IPDBSession &PDBSession, PDB_SymType Tag) {
+  switch (Tag) {
     FACTORY_SYMTAG_CASE(Exe, PDBSymbolExe)
     FACTORY_SYMTAG_CASE(Compiland, PDBSymbolCompiland)
     FACTORY_SYMTAG_CASE(CompilandDetails, PDBSymbolCompilandDetails)
@@ -98,18 +95,35 @@ PDBSymbol::create(const IPDBSession &PDBSession,
     FACTORY_SYMTAG_CASE(ManagedType, PDBSymbolTypeManaged)
     FACTORY_SYMTAG_CASE(Dimension, PDBSymbolTypeDimension)
   default:
-    return std::unique_ptr<PDBSymbol>(
-        new PDBSymbolUnknown(PDBSession, std::move(Symbol)));
+    return std::unique_ptr<PDBSymbol>(new PDBSymbolUnknown(PDBSession));
   }
 }
 
-void PDBSymbol::defaultDump(raw_ostream &OS, int Indent) const {
-  RawSymbol->dump(OS, Indent);
+std::unique_ptr<PDBSymbol>
+PDBSymbol::create(const IPDBSession &PDBSession,
+                  std::unique_ptr<IPDBRawSymbol> RawSymbol) {
+  auto SymbolPtr = createSymbol(PDBSession, RawSymbol->getSymTag());
+  SymbolPtr->RawSymbol = RawSymbol.get();
+  SymbolPtr->OwnedRawSymbol = std::move(RawSymbol);
+  return SymbolPtr;
+}
+
+std::unique_ptr<PDBSymbol> PDBSymbol::create(const IPDBSession &PDBSession,
+                                             IPDBRawSymbol &RawSymbol) {
+  auto SymbolPtr = createSymbol(PDBSession, RawSymbol.getSymTag());
+  SymbolPtr->RawSymbol = &RawSymbol;
+  return SymbolPtr;
+}
+
+void PDBSymbol::defaultDump(raw_ostream &OS, int Indent,
+                            PdbSymbolIdField ShowFlags,
+                            PdbSymbolIdField RecurseFlags) const {
+  RawSymbol->dump(OS, Indent, ShowFlags, RecurseFlags);
 }
 
 void PDBSymbol::dumpProperties() const {
   outs() << "\n";
-  defaultDump(outs(), 0);
+  defaultDump(outs(), 0, PdbSymbolIdField::All, PdbSymbolIdField::None);
   outs().flush();
 }
 
@@ -123,10 +137,6 @@ void PDBSymbol::dumpChildStats() const {
   outs().flush();
 }
 
-std::unique_ptr<PDBSymbol> PDBSymbol::clone() const {
-  return Session.getSymbolById(getSymIndexId());
-}
-
 PDB_SymType PDBSymbol::getSymTag() const { return RawSymbol->getSymTag(); }
 uint32_t PDBSymbol::getSymIndexId() const { return RawSymbol->getSymIndexId(); }
 
@@ -172,3 +182,34 @@ PDBSymbol::getChildStats(TagStats &Stats) const {
 std::unique_ptr<PDBSymbol> PDBSymbol::getSymbolByIdHelper(uint32_t Id) const {
   return Session.getSymbolById(Id);
 }
+
+void llvm::pdb::dumpSymbolIdField(raw_ostream &OS, StringRef Name,
+                                  SymIndexId Value, int Indent,
+                                  const IPDBSession &Session,
+                                  PdbSymbolIdField FieldId,
+                                  PdbSymbolIdField ShowFlags,
+                                  PdbSymbolIdField RecurseFlags) {
+  if ((FieldId & ShowFlags) == PdbSymbolIdField::None)
+    return;
+
+  OS << "\n";
+  OS.indent(Indent);
+  OS << Name << ": " << Value;
+  // Don't recurse unless the user requested it.
+  if ((FieldId & RecurseFlags) == PdbSymbolIdField::None)
+    return;
+  // And obviously don't recurse on the symbol itself.
+  if (FieldId == PdbSymbolIdField::SymIndexId)
+    return;
+
+  auto Child = Session.getSymbolById(Value);
+
+  // It could have been a placeholder symbol for a type we don't yet support,
+  // so just exit in that case.
+  if (!Child)
+    return;
+
+  // Don't recurse more than once, so pass PdbSymbolIdField::None) for the
+  // recurse flags.
+  Child->defaultDump(OS, Indent + 2, ShowFlags, PdbSymbolIdField::None);
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index 3648272e1d0e..cb1a9bee8024 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -16,12 +16,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolAnnotation::PDBSymbolAnnotation(const IPDBSession &PDBSession,
-                                         std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Annotation);
-}
-
 void PDBSymbolAnnotation::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index 7076b4aec347..13eec9734d02 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -17,10 +17,4 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolBlock::PDBSymbolBlock(const IPDBSession &PDBSession,
-                               std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Block);
-}
-
 void PDBSymbolBlock::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index 8798c7b9db88..bbc5e6dd2a17 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -22,12 +22,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolCompiland::PDBSymbolCompiland(const IPDBSession &PDBSession,
-                                       std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Compiland);
-}
-
 void PDBSymbolCompiland::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index f73cd36d057a..bdd8535a3ef3 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolCompilandDetails::PDBSymbolCompilandDetails(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::CompilandDetails);
-}
-
 void PDBSymbolCompilandDetails::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index df696fa8c5f2..f88df2df6be4 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -18,12 +18,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolCompilandEnv::PDBSymbolCompilandEnv(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::CompilandEnv);
-}
-
 std::string PDBSymbolCompilandEnv::getValue() const {
   Variant Value = RawSymbol->getValue();
   if (Value.Type != PDB_VariantType::String)
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index a7b69a755941..10a21806adb6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -18,12 +18,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolCustom::PDBSymbolCustom(const IPDBSession &PDBSession,
-                                 std::unique_ptr<IPDBRawSymbol> CustomSymbol)
-    : PDBSymbol(PDBSession, std::move(CustomSymbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Custom);
-}
-
 void PDBSymbolCustom::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) {
   RawSymbol->getDataBytes(bytes);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
index ae4a8038ccd7..7de94670bcb3 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolData::PDBSymbolData(const IPDBSession &PDBSession,
-                             std::unique_ptr<IPDBRawSymbol> DataSymbol)
-    : PDBSymbol(PDBSession, std::move(DataSymbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Data);
-}
-
 void PDBSymbolData::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
 
 std::unique_ptr<IPDBEnumLineNumbers> PDBSymbolData::getLineNumbers() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index 7417167b61ad..eb409412af59 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession,
-                           std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Exe);
-}
-
 void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
 
 uint32_t PDBSymbolExe::getPointerByteSize() const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 37ca1abe86e9..75063cb3e7f8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -69,10 +69,6 @@ public:
 
   void reset() override { CurIter = Args.empty() ? Args.end() : Args.begin(); }
 
-  FunctionArgEnumerator *clone() const override {
-    return new FunctionArgEnumerator(Session, Func);
-  }
-
 private:
   typedef std::vector<std::unique_ptr<PDBSymbolData>> ArgListType;
   const IPDBSession &Session;
@@ -82,12 +78,6 @@ private:
 };
 }
 
-PDBSymbolFunc::PDBSymbolFunc(const IPDBSession &PDBSession,
-                             std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Function);
-}
-
 std::unique_ptr<IPDBEnumChildren<PDBSymbolData>>
 PDBSymbolFunc::getArguments() const {
   return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index 4a4195beb4ea..af8aafa7be96 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolFuncDebugEnd::PDBSymbolFuncDebugEnd(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::FuncDebugEnd);
-}
-
 void PDBSymbolFuncDebugEnd::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index a448a404dc4a..77b510873bea 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolFuncDebugStart::PDBSymbolFuncDebugStart(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::FuncDebugStart);
-}
-
 void PDBSymbolFuncDebugStart::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index a67a20d8e352..c802b97925e6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -16,10 +16,4 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolLabel::PDBSymbolLabel(const IPDBSession &PDBSession,
-                               std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Label);
-}
-
 void PDBSymbolLabel::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index dbec16fcbaac..a2dd2ab92dd9 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolPublicSymbol::PDBSymbolPublicSymbol(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::PublicSymbol);
-}
-
 void PDBSymbolPublicSymbol::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index b2648197f9cc..d227e3a7a60c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -16,10 +16,4 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolThunk::PDBSymbolThunk(const IPDBSession &PDBSession,
-                               std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Thunk);
-}
-
 void PDBSymbolThunk::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index ba40f65ef40f..a2064d1ac1eb 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -16,12 +16,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeArray::PDBSymbolTypeArray(const IPDBSession &PDBSession,
-                                       std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::ArrayType);
-}
-
 void PDBSymbolTypeArray::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index 0fdf8b6d0f77..f0376c05557f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeBaseClass::PDBSymbolTypeBaseClass(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::BaseClass);
-}
-
 void PDBSymbolTypeBaseClass::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index 0bf563af7df5..a9f59e5f9d4d 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -16,12 +16,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeBuiltin::PDBSymbolTypeBuiltin(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::BuiltinType);
-}
-
 void PDBSymbolTypeBuiltin::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index 726e7e1cdbb4..cfb347fbac55 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeCustom::PDBSymbolTypeCustom(const IPDBSession &PDBSession,
-                                         std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::CustomType);
-}
-
 void PDBSymbolTypeCustom::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 6c84b984d210..4eb48997635a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -17,13 +17,6 @@
 
 using namespace llvm;
 using namespace llvm::pdb;
-
-PDBSymbolTypeDimension::PDBSymbolTypeDimension(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Dimension);
-}
-
 void PDBSymbolTypeDimension::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index f9c3067c20bf..2e88d9eb284a 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -17,10 +17,4 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeEnum::PDBSymbolTypeEnum(const IPDBSession &PDBSession,
-                                     std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Enum);
-}
-
 void PDBSymbolTypeEnum::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index c01877287888..00d2d51aa8a7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeFriend::PDBSymbolTypeFriend(const IPDBSession &PDBSession,
-                                         std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Friend);
-}
-
 void PDBSymbolTypeFriend::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index 4d5cd63f6857..0399e110d592 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -16,12 +16,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeFunctionArg::PDBSymbolTypeFunctionArg(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::FunctionArg);
-}
-
 void PDBSymbolTypeFunctionArg::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 8fd3b49155c9..c0564d3941dd 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -56,23 +56,12 @@ public:
 
   void reset() override { Enumerator->reset(); }
 
-  MyType *clone() const override {
-    std::unique_ptr<ArgEnumeratorType> Clone(Enumerator->clone());
-    return new FunctionArgEnumerator(Session, std::move(Clone));
-  }
-
 private:
   const IPDBSession &Session;
   std::unique_ptr<ArgEnumeratorType> Enumerator;
 };
 }
 
-PDBSymbolTypeFunctionSig::PDBSymbolTypeFunctionSig(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::FunctionSig);
-}
-
 std::unique_ptr<IPDBEnumSymbols>
 PDBSymbolTypeFunctionSig::getArguments() const {
   return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index 7cfba823b4fa..1faaf9c67a2c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeManaged::PDBSymbolTypeManaged(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::ManagedType);
-}
-
 void PDBSymbolTypeManaged::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index 69819811d61f..cf5a369116a9 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypePointer::PDBSymbolTypePointer(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::PointerType);
-}
-
 void PDBSymbolTypePointer::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 102b540e0fef..1838f1612b49 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -16,12 +16,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeTypedef::PDBSymbolTypeTypedef(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::Typedef);
-}
-
 void PDBSymbolTypeTypedef::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index 715ae15e1a7a..2f5222f34fe4 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -23,10 +23,4 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeUDT::PDBSymbolTypeUDT(const IPDBSession &PDBSession,
-                                   std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::UDT);
-}
-
 void PDBSymbolTypeUDT::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index 9a21855f57f0..0262f91e8336 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -16,12 +16,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeVTable::PDBSymbolTypeVTable(const IPDBSession &PDBSession,
-                                         std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::VTable);
-}
-
 void PDBSymbolTypeVTable::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index ddc0574617c5..16c3a3606981 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolTypeVTableShape::PDBSymbolTypeVTableShape(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::VTableShape);
-}
-
 void PDBSymbolTypeVTableShape::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index fdbe845f455a..7bcf9457a2b6 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -17,8 +17,4 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolUnknown::PDBSymbolUnknown(const IPDBSession &PDBSession,
-                                   std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
 void PDBSymbolUnknown::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index f40578f4372a..ecf2126f8802 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -17,12 +17,6 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
-PDBSymbolUsingNamespace::PDBSymbolUsingNamespace(
-    const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
-    : PDBSymbol(PDBSession, std::move(Symbol)) {
-  assert(RawSymbol->getSymTag() == PDB_SymType::UsingNamespace);
-}
-
 void PDBSymbolUsingNamespace::dump(PDBSymDumper &Dumper) const {
   Dumper.dump(*this);
 }
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 95a356d33eb4..c3e750a1b932 100644
--- a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -411,7 +411,8 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
                                     Objects.first->getFileName(), Session)) {
         Modules.insert(
             std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
-        return std::move(Err);
+        // Return along the PDB filename to provide more context
+        return createFileError(PDBFileName, std::move(Err));
       }
       Context.reset(new PDBContext(*CoffObject, std::move(Session)));
 #else
diff --git a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
index 72e4b56c05e3..b2de0be2b70c 100644
--- a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -11,1761 +11,239 @@
 // file does not yet support:
 //   - C++ modules TS
 
-#include "Compiler.h"
-#include "StringView.h"
-#include "Utility.h"
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/ItaniumDemangle.h"
 
 #include <cassert>
 #include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <functional>
 #include <numeric>
 #include <utility>
 #include <vector>
 
-namespace {
-// Base class of all AST nodes. The AST is built by the parser, then is
-// traversed by the printLeft/Right functions to produce a demangled string.
-class Node {
-public:
-  enum Kind : unsigned char {
-    KNodeArrayNode,
-    KDotSuffix,
-    KVendorExtQualType,
-    KQualType,
-    KConversionOperatorType,
-    KPostfixQualifiedType,
-    KElaboratedTypeSpefType,
-    KNameType,
-    KAbiTagAttr,
-    KEnableIfAttr,
-    KObjCProtoName,
-    KPointerType,
-    KReferenceType,
-    KPointerToMemberType,
-    KArrayType,
-    KFunctionType,
-    KNoexceptSpec,
-    KDynamicExceptionSpec,
-    KFunctionEncoding,
-    KLiteralOperator,
-    KSpecialName,
-    KCtorVtableSpecialName,
-    KQualifiedName,
-    KNestedName,
-    KLocalName,
-    KVectorType,
-    KParameterPack,
-    KTemplateArgumentPack,
-    KParameterPackExpansion,
-    KTemplateArgs,
-    KForwardTemplateReference,
-    KNameWithTemplateArgs,
-    KGlobalQualifiedName,
-    KStdQualifiedName,
-    KExpandedSpecialSubstitution,
-    KSpecialSubstitution,
-    KCtorDtorName,
-    KDtorName,
-    KUnnamedTypeName,
-    KClosureTypeName,
-    KStructuredBindingName,
-    KExpr,
-    KBracedExpr,
-    KBracedRangeExpr,
-  };
-
-  Kind K;
-
-  /// Three-way bool to track a cached value. Unknown is possible if this node
-  /// has an unexpanded parameter pack below it that may affect this cache.
-  enum class Cache : unsigned char { Yes, No, Unknown, };
-
-  /// Tracks if this node has a component on its right side, in which case we
-  /// need to call printRight.
-  Cache RHSComponentCache;
-
-  /// Track if this node is a (possibly qualified) array type. This can affect
-  /// how we format the output string.
-  Cache ArrayCache;
-
-  /// Track if this node is a (possibly qualified) function type. This can
-  /// affect how we format the output string.
-  Cache FunctionCache;
-
-  Node(Kind K_, Cache RHSComponentCache_ = Cache::No,
-       Cache ArrayCache_ = Cache::No, Cache FunctionCache_ = Cache::No)
-      : K(K_), RHSComponentCache(RHSComponentCache_), ArrayCache(ArrayCache_),
-        FunctionCache(FunctionCache_) {}
-
-  bool hasRHSComponent(OutputStream &S) const {
-    if (RHSComponentCache != Cache::Unknown)
-      return RHSComponentCache == Cache::Yes;
-    return hasRHSComponentSlow(S);
-  }
-
-  bool hasArray(OutputStream &S) const {
-    if (ArrayCache != Cache::Unknown)
-      return ArrayCache == Cache::Yes;
-    return hasArraySlow(S);
-  }
-
-  bool hasFunction(OutputStream &S) const {
-    if (FunctionCache != Cache::Unknown)
-      return FunctionCache == Cache::Yes;
-    return hasFunctionSlow(S);
-  }
-
-  Kind getKind() const { return K; }
-
-  virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
-  virtual bool hasArraySlow(OutputStream &) const { return false; }
-  virtual bool hasFunctionSlow(OutputStream &) const { return false; }
-
-  // Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
-  // get at a node that actually represents some concrete syntax.
-  virtual const Node *getSyntaxNode(OutputStream &) const {
-    return this;
-  }
+using namespace llvm;
+using namespace llvm::itanium_demangle;
 
-  void print(OutputStream &S) const {
-    printLeft(S);
-    if (RHSComponentCache != Cache::No)
-      printRight(S);
-  }
-
-  // Print the "left" side of this Node into OutputStream.
-  virtual void printLeft(OutputStream &) const = 0;
-
-  // Print the "right". This distinction is necessary to represent C++ types
-  // that appear on the RHS of their subtype, such as arrays or functions.
-  // Since most types don't have such a component, provide a default
-  // implementation.
-  virtual void printRight(OutputStream &) const {}
-
-  virtual StringView getBaseName() const { return StringView(); }
-
-  // Silence compiler warnings, this dtor will never be called.
-  virtual ~Node() = default;
+constexpr const char *itanium_demangle::FloatData<float>::spec;
+constexpr const char *itanium_demangle::FloatData<double>::spec;
+constexpr const char *itanium_demangle::FloatData<long double>::spec;
 
-#ifndef NDEBUG
-  LLVM_DUMP_METHOD void dump() const {
-    char *Buffer = static_cast<char*>(std::malloc(1024));
-    OutputStream S(Buffer, 1024);
-    print(S);
-    S += '\0';
-    printf("Symbol dump for %p: %s\n", (const void*)this, S.getBuffer());
-    std::free(S.getBuffer());
-  }
-#endif
-};
-
-class NodeArray {
-  Node **Elements;
-  size_t NumElements;
-
-public:
-  NodeArray() : Elements(nullptr), NumElements(0) {}
-  NodeArray(Node **Elements_, size_t NumElements_)
-      : Elements(Elements_), NumElements(NumElements_) {}
-
-  bool empty() const { return NumElements == 0; }
-  size_t size() const { return NumElements; }
-
-  Node **begin() const { return Elements; }
-  Node **end() const { return Elements + NumElements; }
-
-  Node *operator[](size_t Idx) const { return Elements[Idx]; }
-
-  void printWithComma(OutputStream &S) const {
-    bool FirstElement = true;
-    for (size_t Idx = 0; Idx != NumElements; ++Idx) {
-      size_t BeforeComma = S.getCurrentPosition();
-      if (!FirstElement)
-        S += ", ";
-      size_t AfterComma = S.getCurrentPosition();
-      Elements[Idx]->print(S);
-
-      // Elements[Idx] is an empty parameter pack expansion, we should erase the
-      // comma we just printed.
-      if (AfterComma == S.getCurrentPosition()) {
-        S.setCurrentPosition(BeforeComma);
-        continue;
+// <discriminator> := _ <non-negative number>      # when number < 10
+//                 := __ <non-negative number> _   # when number >= 10
+//  extension      := decimal-digit+               # at the end of string
+const char *itanium_demangle::parse_discriminator(const char *first,
+                                                  const char *last) {
+  // parse but ignore discriminator
+  if (first != last) {
+    if (*first == '_') {
+      const char *t1 = first + 1;
+      if (t1 != last) {
+        if (std::isdigit(*t1))
+          first = t1 + 1;
+        else if (*t1 == '_') {
+          for (++t1; t1 != last && std::isdigit(*t1); ++t1)
+            ;
+          if (t1 != last && *t1 == '_')
+            first = t1 + 1;
+        }
       }
-
-      FirstElement = false;
+    } else if (std::isdigit(*first)) {
+      const char *t1 = first + 1;
+      for (; t1 != last && std::isdigit(*t1); ++t1)
+        ;
+      if (t1 == last)
+        first = last;
     }
   }
-};
-
-struct NodeArrayNode : Node {
-  NodeArray Array;
-  NodeArrayNode(NodeArray Array_) : Node(KNodeArrayNode), Array(Array_) {}
-  void printLeft(OutputStream &S) const override {
-    Array.printWithComma(S);
-  }
-};
-
-class DotSuffix final : public Node {
-  const Node *Prefix;
-  const StringView Suffix;
-
-public:
-  DotSuffix(Node *Prefix_, StringView Suffix_)
-      : Node(KDotSuffix), Prefix(Prefix_), Suffix(Suffix_) {}
-
-  void printLeft(OutputStream &s) const override {
-    Prefix->print(s);
-    s += " (";
-    s += Suffix;
-    s += ")";
-  }
-};
-
-class VendorExtQualType final : public Node {
-  const Node *Ty;
-  StringView Ext;
-
-public:
-  VendorExtQualType(Node *Ty_, StringView Ext_)
-      : Node(KVendorExtQualType), Ty(Ty_), Ext(Ext_) {}
-
-  void printLeft(OutputStream &S) const override {
-    Ty->print(S);
-    S += " ";
-    S += Ext;
-  }
-};
-
-enum FunctionRefQual : unsigned char {
-  FrefQualNone,
-  FrefQualLValue,
-  FrefQualRValue,
-};
-
-enum Qualifiers {
-  QualNone = 0,
-  QualConst = 0x1,
-  QualVolatile = 0x2,
-  QualRestrict = 0x4,
-};
-
-void addQualifiers(Qualifiers &Q1, Qualifiers Q2) {
-  Q1 = static_cast<Qualifiers>(Q1 | Q2);
+  return first;
 }
 
-class QualType : public Node {
-protected:
-  const Qualifiers Quals;
-  const Node *Child;
-
-  void printQuals(OutputStream &S) const {
-    if (Quals & QualConst)
-      S += " const";
-    if (Quals & QualVolatile)
-      S += " volatile";
-    if (Quals & QualRestrict)
-      S += " restrict";
-  }
-
-public:
-  QualType(Node *Child_, Qualifiers Quals_)
-      : Node(KQualType, Child_->RHSComponentCache,
-             Child_->ArrayCache, Child_->FunctionCache),
-        Quals(Quals_), Child(Child_) {}
-
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return Child->hasRHSComponent(S);
-  }
-  bool hasArraySlow(OutputStream &S) const override {
-    return Child->hasArray(S);
-  }
-  bool hasFunctionSlow(OutputStream &S) const override {
-    return Child->hasFunction(S);
-  }
-
-  void printLeft(OutputStream &S) const override {
-    Child->printLeft(S);
-    printQuals(S);
-  }
-
-  void printRight(OutputStream &S) const override { Child->printRight(S); }
-};
-
-class ConversionOperatorType final : public Node {
-  const Node *Ty;
-
-public:
-  ConversionOperatorType(Node *Ty_)
-      : Node(KConversionOperatorType), Ty(Ty_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "operator ";
-    Ty->print(S);
-  }
-};
-
-class PostfixQualifiedType final : public Node {
-  const Node *Ty;
-  const StringView Postfix;
-
-public:
-  PostfixQualifiedType(Node *Ty_, StringView Postfix_)
-      : Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {}
-
-  void printLeft(OutputStream &s) const override {
-    Ty->printLeft(s);
-    s += Postfix;
-  }
-};
-
-class NameType final : public Node {
-  const StringView Name;
-
-public:
-  NameType(StringView Name_) : Node(KNameType), Name(Name_) {}
-
-  StringView getName() const { return Name; }
-  StringView getBaseName() const override { return Name; }
-
-  void printLeft(OutputStream &s) const override { s += Name; }
-};
-
-class ElaboratedTypeSpefType : public Node {
-  StringView Kind;
-  Node *Child;
-public:
-  ElaboratedTypeSpefType(StringView Kind_, Node *Child_)
-      : Node(KElaboratedTypeSpefType), Kind(Kind_), Child(Child_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += Kind;
-    S += ' ';
-    Child->print(S);
-  }
-};
-
-struct AbiTagAttr : Node {
-  Node *Base;
-  StringView Tag;
-
-  AbiTagAttr(Node* Base_, StringView Tag_)
-      : Node(KAbiTagAttr, Base_->RHSComponentCache,
-             Base_->ArrayCache, Base_->FunctionCache),
-        Base(Base_), Tag(Tag_) {}
-
-  void printLeft(OutputStream &S) const override {
-    Base->printLeft(S);
-    S += "[abi:";
-    S += Tag;
-    S += "]";
-  }
-};
-
-class EnableIfAttr : public Node {
-  NodeArray Conditions;
-public:
-  EnableIfAttr(NodeArray Conditions_)
-      : Node(KEnableIfAttr), Conditions(Conditions_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += " [enable_if:";
-    Conditions.printWithComma(S);
-    S += ']';
-  }
-};
-
-class ObjCProtoName : public Node {
-  Node *Ty;
-  StringView Protocol;
-
-  friend class PointerType;
-
-public:
-  ObjCProtoName(Node *Ty_, StringView Protocol_)
-      : Node(KObjCProtoName), Ty(Ty_), Protocol(Protocol_) {}
-
-  bool isObjCObject() const {
-    return Ty->getKind() == KNameType &&
-           static_cast<NameType *>(Ty)->getName() == "objc_object";
-  }
-
-  void printLeft(OutputStream &S) const override {
-    Ty->print(S);
-    S += "<";
-    S += Protocol;
-    S += ">";
-  }
-};
-
-class PointerType final : public Node {
-  const Node *Pointee;
-
-public:
-  PointerType(Node *Pointee_)
-      : Node(KPointerType, Pointee_->RHSComponentCache),
-        Pointee(Pointee_) {}
-
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return Pointee->hasRHSComponent(S);
-  }
-
-  void printLeft(OutputStream &s) const override {
-    // We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
-    if (Pointee->getKind() != KObjCProtoName ||
-        !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
-      Pointee->printLeft(s);
-      if (Pointee->hasArray(s))
-        s += " ";
-      if (Pointee->hasArray(s) || Pointee->hasFunction(s))
-        s += "(";
-      s += "*";
-    } else {
-      const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
-      s += "id<";
-      s += objcProto->Protocol;
-      s += ">";
-    }
-  }
-
-  void printRight(OutputStream &s) const override {
-    if (Pointee->getKind() != KObjCProtoName ||
-        !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
-      if (Pointee->hasArray(s) || Pointee->hasFunction(s))
-        s += ")";
-      Pointee->printRight(s);
-    }
-  }
-};
-
-enum class ReferenceKind {
-  LValue,
-  RValue,
-};
-
-// Represents either a LValue or an RValue reference type.
-class ReferenceType : public Node {
-  const Node *Pointee;
-  ReferenceKind RK;
-
-  mutable bool Printing = false;
+#ifndef NDEBUG
+namespace {
+struct DumpVisitor {
+  unsigned Depth = 0;
+  bool PendingNewline = false;
 
-  // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
-  // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
-  // other combination collapses to a lvalue ref.
-  std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
-    auto SoFar = std::make_pair(RK, Pointee);
-    for (;;) {
-      const Node *SN = SoFar.second->getSyntaxNode(S);
-      if (SN->getKind() != KReferenceType)
-        break;
-      auto *RT = static_cast<const ReferenceType *>(SN);
-      SoFar.second = RT->Pointee;
-      SoFar.first = std::min(SoFar.first, RT->RK);
-    }
-    return SoFar;
+  template<typename NodeT> static constexpr bool wantsNewline(const NodeT *) {
+    return true;
   }
+  static bool wantsNewline(NodeArray A) { return !A.empty(); }
+  static constexpr bool wantsNewline(...) { return false; }
 
-public:
-  ReferenceType(Node *Pointee_, ReferenceKind RK_)
-      : Node(KReferenceType, Pointee_->RHSComponentCache),
-        Pointee(Pointee_), RK(RK_) {}
-
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return Pointee->hasRHSComponent(S);
+  template<typename ...Ts> static bool anyWantNewline(Ts ...Vs) {
+    for (bool B : {wantsNewline(Vs)...})
+      if (B)
+        return true;
+    return false;
   }
 
-  void printLeft(OutputStream &s) const override {
-    if (Printing)
-      return;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
-    Collapsed.second->printLeft(s);
-    if (Collapsed.second->hasArray(s))
-      s += " ";
-    if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
-      s += "(";
-
-    s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+  void printStr(const char *S) { fprintf(stderr, "%s", S); }
+  void print(StringView SV) {
+    fprintf(stderr, "\"%.*s\"", (int)SV.size(), SV.begin());
   }
-  void printRight(OutputStream &s) const override {
-    if (Printing)
-      return;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
-    if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
-      s += ")";
-    Collapsed.second->printRight(s);
-  }
-};
-
-class PointerToMemberType final : public Node {
-  const Node *ClassType;
-  const Node *MemberType;
-
-public:
-  PointerToMemberType(Node *ClassType_, Node *MemberType_)
-      : Node(KPointerToMemberType, MemberType_->RHSComponentCache),
-        ClassType(ClassType_), MemberType(MemberType_) {}
-
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    return MemberType->hasRHSComponent(S);
-  }
-
-  void printLeft(OutputStream &s) const override {
-    MemberType->printLeft(s);
-    if (MemberType->hasArray(s) || MemberType->hasFunction(s))
-      s += "(";
+  void print(const Node *N) {
+    if (N)
+      N->visit(std::ref(*this));
     else
-      s += " ";
-    ClassType->print(s);
-    s += "::*";
-  }
-
-  void printRight(OutputStream &s) const override {
-    if (MemberType->hasArray(s) || MemberType->hasFunction(s))
-      s += ")";
-    MemberType->printRight(s);
-  }
-};
-
-class NodeOrString {
-  const void *First;
-  const void *Second;
-
-public:
-  /* implicit */ NodeOrString(StringView Str) {
-    const char *FirstChar = Str.begin();
-    const char *SecondChar = Str.end();
-    if (SecondChar == nullptr) {
-      assert(FirstChar == SecondChar);
-      ++FirstChar, ++SecondChar;
-    }
-    First = static_cast<const void *>(FirstChar);
-    Second = static_cast<const void *>(SecondChar);
-  }
-
-  /* implicit */ NodeOrString(Node *N)
-      : First(static_cast<const void *>(N)), Second(nullptr) {}
-  NodeOrString() : First(nullptr), Second(nullptr) {}
-
-  bool isString() const { return Second && First; }
-  bool isNode() const { return First && !Second; }
-  bool isEmpty() const { return !First && !Second; }
-
-  StringView asString() const {
-    assert(isString());
-    return StringView(static_cast<const char *>(First),
-                      static_cast<const char *>(Second));
-  }
-
-  const Node *asNode() const {
-    assert(isNode());
-    return static_cast<const Node *>(First);
-  }
-};
-
-class ArrayType final : public Node {
-  Node *Base;
-  NodeOrString Dimension;
-
-public:
-  ArrayType(Node *Base_, NodeOrString Dimension_)
-      : Node(KArrayType,
-             /*RHSComponentCache=*/Cache::Yes,
-             /*ArrayCache=*/Cache::Yes),
-        Base(Base_), Dimension(Dimension_) {}
-
-  // Incomplete array type.
-  ArrayType(Node *Base_)
-      : Node(KArrayType,
-             /*RHSComponentCache=*/Cache::Yes,
-             /*ArrayCache=*/Cache::Yes),
-        Base(Base_) {}
-
-  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
-  bool hasArraySlow(OutputStream &) const override { return true; }
-
-  void printLeft(OutputStream &S) const override { Base->printLeft(S); }
-
-  void printRight(OutputStream &S) const override {
-    if (S.back() != ']')
-      S += " ";
-    S += "[";
-    if (Dimension.isString())
-      S += Dimension.asString();
-    else if (Dimension.isNode())
-      Dimension.asNode()->print(S);
-    S += "]";
-    Base->printRight(S);
-  }
-};
-
-class FunctionType final : public Node {
-  Node *Ret;
-  NodeArray Params;
-  Qualifiers CVQuals;
-  FunctionRefQual RefQual;
-  Node *ExceptionSpec;
-
-public:
-  FunctionType(Node *Ret_, NodeArray Params_, Qualifiers CVQuals_,
-               FunctionRefQual RefQual_, Node *ExceptionSpec_)
-      : Node(KFunctionType,
-             /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
-             /*FunctionCache=*/Cache::Yes),
-        Ret(Ret_), Params(Params_), CVQuals(CVQuals_), RefQual(RefQual_),
-        ExceptionSpec(ExceptionSpec_) {}
-
-  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
-  bool hasFunctionSlow(OutputStream &) const override { return true; }
-
-  // Handle C++'s ... quirky decl grammar by using the left & right
-  // distinction. Consider:
-  //   int (*f(float))(char) {}
-  // f is a function that takes a float and returns a pointer to a function
-  // that takes a char and returns an int. If we're trying to print f, start
-  // by printing out the return types's left, then print our parameters, then
-  // finally print right of the return type.
-  void printLeft(OutputStream &S) const override {
-    Ret->printLeft(S);
-    S += " ";
-  }
-
-  void printRight(OutputStream &S) const override {
-    S += "(";
-    Params.printWithComma(S);
-    S += ")";
-    Ret->printRight(S);
-
-    if (CVQuals & QualConst)
-      S += " const";
-    if (CVQuals & QualVolatile)
-      S += " volatile";
-    if (CVQuals & QualRestrict)
-      S += " restrict";
-
-    if (RefQual == FrefQualLValue)
-      S += " &";
-    else if (RefQual == FrefQualRValue)
-      S += " &&";
-
-    if (ExceptionSpec != nullptr) {
-      S += ' ';
-      ExceptionSpec->print(S);
-    }
-  }
-};
-
-class NoexceptSpec : public Node {
-  Node *E;
-public:
-  NoexceptSpec(Node *E_) : Node(KNoexceptSpec), E(E_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "noexcept(";
-    E->print(S);
-    S += ")";
+      printStr("<null>");
   }
-};
-
-class DynamicExceptionSpec : public Node {
-  NodeArray Types;
-public:
-  DynamicExceptionSpec(NodeArray Types_)
-      : Node(KDynamicExceptionSpec), Types(Types_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "throw(";
-    Types.printWithComma(S);
-    S += ')';
-  }
-};
-
-class FunctionEncoding final : public Node {
-  Node *Ret;
-  Node *Name;
-  NodeArray Params;
-  Node *Attrs;
-  Qualifiers CVQuals;
-  FunctionRefQual RefQual;
-
-public:
-  FunctionEncoding(Node *Ret_, Node *Name_, NodeArray Params_,
-                   Node *Attrs_, Qualifiers CVQuals_, FunctionRefQual RefQual_)
-      : Node(KFunctionEncoding,
-             /*RHSComponentCache=*/Cache::Yes, /*ArrayCache=*/Cache::No,
-             /*FunctionCache=*/Cache::Yes),
-        Ret(Ret_), Name(Name_), Params(Params_), Attrs(Attrs_),
-        CVQuals(CVQuals_), RefQual(RefQual_) {}
-
-  Qualifiers getCVQuals() const { return CVQuals; }
-  FunctionRefQual getRefQual() const { return RefQual; }
-  NodeArray getParams() const { return Params; }
-  Node *getReturnType() const { return Ret; }
-
-  bool hasRHSComponentSlow(OutputStream &) const override { return true; }
-  bool hasFunctionSlow(OutputStream &) const override { return true; }
-
-  Node *getName() { return const_cast<Node *>(Name); }
-
-  void printLeft(OutputStream &S) const override {
-    if (Ret) {
-      Ret->printLeft(S);
-      if (!Ret->hasRHSComponent(S))
-        S += " ";
+  void print(NodeOrString NS) {
+    if (NS.isNode())
+      print(NS.asNode());
+    else if (NS.isString())
+      print(NS.asString());
+    else
+      printStr("NodeOrString()");
+  }
+  void print(NodeArray A) {
+    ++Depth;
+    printStr("{");
+    bool First = true;
+    for (const Node *N : A) {
+      if (First)
+        print(N);
+      else
+        printWithComma(N);
+      First = false;
     }
-    Name->print(S);
-  }
-
-  void printRight(OutputStream &S) const override {
-    S += "(";
-    Params.printWithComma(S);
-    S += ")";
-    if (Ret)
-      Ret->printRight(S);
-
-    if (CVQuals & QualConst)
-      S += " const";
-    if (CVQuals & QualVolatile)
-      S += " volatile";
-    if (CVQuals & QualRestrict)
-      S += " restrict";
-
-    if (RefQual == FrefQualLValue)
-      S += " &";
-    else if (RefQual == FrefQualRValue)
-      S += " &&";
-
-    if (Attrs != nullptr)
-      Attrs->print(S);
-  }
-};
-
-class LiteralOperator : public Node {
-  const Node *OpName;
-
-public:
-  LiteralOperator(Node *OpName_) : Node(KLiteralOperator), OpName(OpName_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "operator\"\" ";
-    OpName->print(S);
-  }
-};
-
-class SpecialName final : public Node {
-  const StringView Special;
-  const Node *Child;
-
-public:
-  SpecialName(StringView Special_, Node* Child_)
-      : Node(KSpecialName), Special(Special_), Child(Child_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += Special;
-    Child->print(S);
+    printStr("}");
+    --Depth;
   }
-};
 
-class CtorVtableSpecialName final : public Node {
-  const Node *FirstType;
-  const Node *SecondType;
+  // Overload used when T is exactly 'bool', not merely convertible to 'bool'.
+  void print(bool B) { printStr(B ? "true" : "false"); }
 
-public:
-  CtorVtableSpecialName(Node *FirstType_, Node *SecondType_)
-      : Node(KCtorVtableSpecialName),
-        FirstType(FirstType_), SecondType(SecondType_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "construction vtable for ";
-    FirstType->print(S);
-    S += "-in-";
-    SecondType->print(S);
+  template <class T>
+  typename std::enable_if<std::is_unsigned<T>::value>::type print(T N) {
+    fprintf(stderr, "%llu", (unsigned long long)N);
   }
-};
 
-struct NestedName : Node {
-  Node *Qual;
-  Node *Name;
-
-  NestedName(Node *Qual_, Node *Name_)
-      : Node(KNestedName), Qual(Qual_), Name(Name_) {}
-
-  StringView getBaseName() const override { return Name->getBaseName(); }
-
-  void printLeft(OutputStream &S) const override {
-    Qual->print(S);
-    S += "::";
-    Name->print(S);
+  template <class T>
+  typename std::enable_if<std::is_signed<T>::value>::type print(T N) {
+    fprintf(stderr, "%lld", (long long)N);
   }
-};
-
-struct LocalName : Node {
-  Node *Encoding;
-  Node *Entity;
-
-  LocalName(Node *Encoding_, Node *Entity_)
-      : Node(KLocalName), Encoding(Encoding_), Entity(Entity_) {}
-
-  void printLeft(OutputStream &S) const override {
-    Encoding->print(S);
-    S += "::";
-    Entity->print(S);
-  }
-};
-
-class QualifiedName final : public Node {
-  // qualifier::name
-  const Node *Qualifier;
-  const Node *Name;
-
-public:
-  QualifiedName(Node* Qualifier_, Node* Name_)
-      : Node(KQualifiedName), Qualifier(Qualifier_), Name(Name_) {}
 
-  StringView getBaseName() const override { return Name->getBaseName(); }
-
-  void printLeft(OutputStream &S) const override {
-    Qualifier->print(S);
-    S += "::";
-    Name->print(S);
-  }
-};
-
-class VectorType final : public Node {
-  const Node *BaseType;
-  const NodeOrString Dimension;
-  const bool IsPixel;
-
-public:
-  VectorType(NodeOrString Dimension_)
-      : Node(KVectorType), BaseType(nullptr), Dimension(Dimension_),
-        IsPixel(true) {}
-  VectorType(Node *BaseType_, NodeOrString Dimension_)
-      : Node(KVectorType), BaseType(BaseType_),
-        Dimension(Dimension_), IsPixel(false) {}
-
-  void printLeft(OutputStream &S) const override {
-    if (IsPixel) {
-      S += "pixel vector[";
-      S += Dimension.asString();
-      S += "]";
-    } else {
-      BaseType->print(S);
-      S += " vector[";
-      if (Dimension.isNode())
-        Dimension.asNode()->print(S);
-      else if (Dimension.isString())
-        S += Dimension.asString();
-      S += "]";
+  void print(ReferenceKind RK) {
+    switch (RK) {
+    case ReferenceKind::LValue:
+      return printStr("ReferenceKind::LValue");
+    case ReferenceKind::RValue:
+      return printStr("ReferenceKind::RValue");
     }
   }
-};
-
-/// An unexpanded parameter pack (either in the expression or type context). If
-/// this AST is correct, this node will have a ParameterPackExpansion node above
-/// it.
-///
-/// This node is created when some <template-args> are found that apply to an
-/// <encoding>, and is stored in the TemplateParams table. In order for this to
-/// appear in the final AST, it has to referenced via a <template-param> (ie,
-/// T_).
-class ParameterPack final : public Node {
-  NodeArray Data;
-
-  // Setup OutputStream for a pack expansion unless we're already expanding one.
-  void initializePackExpansion(OutputStream &S) const {
-    if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
-      S.CurrentPackMax = static_cast<unsigned>(Data.size());
-      S.CurrentPackIndex = 0;
-    }
-  }
-
-public:
-  ParameterPack(NodeArray Data_) : Node(KParameterPack), Data(Data_) {
-    ArrayCache = FunctionCache = RHSComponentCache = Cache::Unknown;
-    if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
-          return P->ArrayCache == Cache::No;
-        }))
-      ArrayCache = Cache::No;
-    if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
-          return P->FunctionCache == Cache::No;
-        }))
-      FunctionCache = Cache::No;
-    if (std::all_of(Data.begin(), Data.end(), [](Node* P) {
-          return P->RHSComponentCache == Cache::No;
-        }))
-      RHSComponentCache = Cache::No;
-  }
-
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
-  }
-  bool hasArraySlow(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() && Data[Idx]->hasArray(S);
-  }
-  bool hasFunctionSlow(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() && Data[Idx]->hasFunction(S);
-  }
-  const Node *getSyntaxNode(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
-  }
-
-  void printLeft(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    if (Idx < Data.size())
-      Data[Idx]->printLeft(S);
-  }
-  void printRight(OutputStream &S) const override {
-    initializePackExpansion(S);
-    size_t Idx = S.CurrentPackIndex;
-    if (Idx < Data.size())
-      Data[Idx]->printRight(S);
-  }
-};
-
-/// A variadic template argument. This node represents an occurrence of
-/// J<something>E in some <template-args>. It isn't itself unexpanded, unless
-/// one of it's Elements is. The parser inserts a ParameterPack into the
-/// TemplateParams table if the <template-args> this pack belongs to apply to an
-/// <encoding>.
-class TemplateArgumentPack final : public Node {
-  NodeArray Elements;
-public:
-  TemplateArgumentPack(NodeArray Elements_)
-      : Node(KTemplateArgumentPack), Elements(Elements_) {}
-
-  NodeArray getElements() const { return Elements; }
-
-  void printLeft(OutputStream &S) const override {
-    Elements.printWithComma(S);
-  }
-};
-
-/// A pack expansion. Below this node, there are some unexpanded ParameterPacks
-/// which each have Child->ParameterPackSize elements.
-class ParameterPackExpansion final : public Node {
-  const Node *Child;
-
-public:
-  ParameterPackExpansion(Node* Child_)
-      : Node(KParameterPackExpansion), Child(Child_) {}
-
-  const Node *getChild() const { return Child; }
-
-  void printLeft(OutputStream &S) const override {
-    constexpr unsigned Max = std::numeric_limits<unsigned>::max();
-    SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
-    SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
-    size_t StreamPos = S.getCurrentPosition();
-
-    // Print the first element in the pack. If Child contains a ParameterPack,
-    // it will set up S.CurrentPackMax and print the first element.
-    Child->print(S);
-
-    // No ParameterPack was found in Child. This can occur if we've found a pack
-    // expansion on a <function-param>.
-    if (S.CurrentPackMax == Max) {
-      S += "...";
-      return;
+  void print(FunctionRefQual RQ) {
+    switch (RQ) {
+    case FunctionRefQual::FrefQualNone:
+      return printStr("FunctionRefQual::FrefQualNone");
+    case FunctionRefQual::FrefQualLValue:
+      return printStr("FunctionRefQual::FrefQualLValue");
+    case FunctionRefQual::FrefQualRValue:
+      return printStr("FunctionRefQual::FrefQualRValue");
     }
-
-    // We found a ParameterPack, but it has no elements. Erase whatever we may
-    // of printed.
-    if (S.CurrentPackMax == 0) {
-      S.setCurrentPosition(StreamPos);
-      return;
-    }
-
-    // Else, iterate through the rest of the elements in the pack.
-    for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
-      S += ", ";
-      S.CurrentPackIndex = I;
-      Child->print(S);
-    }
-  }
-};
-
-class TemplateArgs final : public Node {
-  NodeArray Params;
-
-public:
-  TemplateArgs(NodeArray Params_) : Node(KTemplateArgs), Params(Params_) {}
-
-  NodeArray getParams() { return Params; }
-
-  void printLeft(OutputStream &S) const override {
-    S += "<";
-    Params.printWithComma(S);
-    if (S.back() == '>')
-      S += " ";
-    S += ">";
   }
-};
-
-struct ForwardTemplateReference : Node {
-  size_t Index;
-  Node *Ref = nullptr;
-
-  // If we're currently printing this node. It is possible (though invalid) for
-  // a forward template reference to refer to itself via a substitution. This
-  // creates a cyclic AST, which will stack overflow printing. To fix this, bail
-  // out if more than one print* function is active.
-  mutable bool Printing = false;
-
-  ForwardTemplateReference(size_t Index_)
-      : Node(KForwardTemplateReference, Cache::Unknown, Cache::Unknown,
-             Cache::Unknown),
-        Index(Index_) {}
-
-  bool hasRHSComponentSlow(OutputStream &S) const override {
-    if (Printing)
-      return false;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->hasRHSComponent(S);
-  }
-  bool hasArraySlow(OutputStream &S) const override {
-    if (Printing)
-      return false;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->hasArray(S);
-  }
-  bool hasFunctionSlow(OutputStream &S) const override {
-    if (Printing)
-      return false;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->hasFunction(S);
-  }
-  const Node *getSyntaxNode(OutputStream &S) const override {
-    if (Printing)
-      return this;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    return Ref->getSyntaxNode(S);
-  }
-
-  void printLeft(OutputStream &S) const override {
-    if (Printing)
-      return;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    Ref->printLeft(S);
-  }
-  void printRight(OutputStream &S) const override {
-    if (Printing)
-      return;
-    SwapAndRestore<bool> SavePrinting(Printing, true);
-    Ref->printRight(S);
-  }
-};
-
-struct NameWithTemplateArgs : Node {
-  // name<template_args>
-  Node *Name;
-  Node *TemplateArgs;
-
-  NameWithTemplateArgs(Node *Name_, Node *TemplateArgs_)
-      : Node(KNameWithTemplateArgs), Name(Name_), TemplateArgs(TemplateArgs_) {}
-
-  StringView getBaseName() const override { return Name->getBaseName(); }
-
-  void printLeft(OutputStream &S) const override {
-    Name->print(S);
-    TemplateArgs->print(S);
-  }
-};
-
-class GlobalQualifiedName final : public Node {
-  Node *Child;
-
-public:
-  GlobalQualifiedName(Node* Child_)
-      : Node(KGlobalQualifiedName), Child(Child_) {}
-
-  StringView getBaseName() const override { return Child->getBaseName(); }
-
-  void printLeft(OutputStream &S) const override {
-    S += "::";
-    Child->print(S);
-  }
-};
-
-struct StdQualifiedName : Node {
-  Node *Child;
-
-  StdQualifiedName(Node *Child_) : Node(KStdQualifiedName), Child(Child_) {}
-
-  StringView getBaseName() const override { return Child->getBaseName(); }
-
-  void printLeft(OutputStream &S) const override {
-    S += "std::";
-    Child->print(S);
-  }
-};
-
-enum class SpecialSubKind {
-  allocator,
-  basic_string,
-  string,
-  istream,
-  ostream,
-  iostream,
-};
-
-class ExpandedSpecialSubstitution final : public Node {
-  SpecialSubKind SSK;
-
-public:
-  ExpandedSpecialSubstitution(SpecialSubKind SSK_)
-      : Node(KExpandedSpecialSubstitution), SSK(SSK_) {}
-
-  StringView getBaseName() const override {
-    switch (SSK) {
-    case SpecialSubKind::allocator:
-      return StringView("allocator");
-    case SpecialSubKind::basic_string:
-      return StringView("basic_string");
-    case SpecialSubKind::string:
-      return StringView("basic_string");
-    case SpecialSubKind::istream:
-      return StringView("basic_istream");
-    case SpecialSubKind::ostream:
-      return StringView("basic_ostream");
-    case SpecialSubKind::iostream:
-      return StringView("basic_iostream");
-    }
-    LLVM_BUILTIN_UNREACHABLE;
-  }
-
-  void printLeft(OutputStream &S) const override {
-    switch (SSK) {
-    case SpecialSubKind::allocator:
-      S += "std::basic_string<char, std::char_traits<char>, "
-           "std::allocator<char> >";
-      break;
-    case SpecialSubKind::basic_string:
-    case SpecialSubKind::string:
-      S += "std::basic_string<char, std::char_traits<char>, "
-           "std::allocator<char> >";
-      break;
-    case SpecialSubKind::istream:
-      S += "std::basic_istream<char, std::char_traits<char> >";
-      break;
-    case SpecialSubKind::ostream:
-      S += "std::basic_ostream<char, std::char_traits<char> >";
-      break;
-    case SpecialSubKind::iostream:
-      S += "std::basic_iostream<char, std::char_traits<char> >";
-      break;
-    }
-  }
-};
-
-class SpecialSubstitution final : public Node {
-public:
-  SpecialSubKind SSK;
-
-  SpecialSubstitution(SpecialSubKind SSK_)
-      : Node(KSpecialSubstitution), SSK(SSK_) {}
-
-  StringView getBaseName() const override {
-    switch (SSK) {
-    case SpecialSubKind::allocator:
-      return StringView("allocator");
-    case SpecialSubKind::basic_string:
-      return StringView("basic_string");
-    case SpecialSubKind::string:
-      return StringView("string");
-    case SpecialSubKind::istream:
-      return StringView("istream");
-    case SpecialSubKind::ostream:
-      return StringView("ostream");
-    case SpecialSubKind::iostream:
-      return StringView("iostream");
+  void print(Qualifiers Qs) {
+    if (!Qs) return printStr("QualNone");
+    struct QualName { Qualifiers Q; const char *Name; } Names[] = {
+      {QualConst, "QualConst"},
+      {QualVolatile, "QualVolatile"},
+      {QualRestrict, "QualRestrict"},
+    };
+    for (QualName Name : Names) {
+      if (Qs & Name.Q) {
+        printStr(Name.Name);
+        Qs = Qualifiers(Qs & ~Name.Q);
+        if (Qs) printStr(" | ");
+      }
     }
-    LLVM_BUILTIN_UNREACHABLE;
   }
-
-  void printLeft(OutputStream &S) const override {
+  void print(SpecialSubKind SSK) {
     switch (SSK) {
     case SpecialSubKind::allocator:
-      S += "std::allocator";
-      break;
+      return printStr("SpecialSubKind::allocator");
     case SpecialSubKind::basic_string:
-      S += "std::basic_string";
-      break;
+      return printStr("SpecialSubKind::basic_string");
     case SpecialSubKind::string:
-      S += "std::string";
-      break;
+      return printStr("SpecialSubKind::string");
     case SpecialSubKind::istream:
-      S += "std::istream";
-      break;
+      return printStr("SpecialSubKind::istream");
     case SpecialSubKind::ostream:
-      S += "std::ostream";
-      break;
+      return printStr("SpecialSubKind::ostream");
     case SpecialSubKind::iostream:
-      S += "std::iostream";
-      break;
-    }
-  }
-};
-
-class CtorDtorName final : public Node {
-  const Node *Basename;
-  const bool IsDtor;
-
-public:
-  CtorDtorName(Node *Basename_, bool IsDtor_)
-      : Node(KCtorDtorName), Basename(Basename_), IsDtor(IsDtor_) {}
-
-  void printLeft(OutputStream &S) const override {
-    if (IsDtor)
-      S += "~";
-    S += Basename->getBaseName();
-  }
-};
-
-class DtorName : public Node {
-  const Node *Base;
-
-public:
-  DtorName(Node *Base_) : Node(KDtorName), Base(Base_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "~";
-    Base->printLeft(S);
-  }
-};
-
-class UnnamedTypeName : public Node {
-  const StringView Count;
-
-public:
-  UnnamedTypeName(StringView Count_) : Node(KUnnamedTypeName), Count(Count_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "'unnamed";
-    S += Count;
-    S += "\'";
-  }
-};
-
-class ClosureTypeName : public Node {
-  NodeArray Params;
-  StringView Count;
-
-public:
-  ClosureTypeName(NodeArray Params_, StringView Count_)
-      : Node(KClosureTypeName), Params(Params_), Count(Count_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "\'lambda";
-    S += Count;
-    S += "\'(";
-    Params.printWithComma(S);
-    S += ")";
-  }
-};
-
-class StructuredBindingName : public Node {
-  NodeArray Bindings;
-public:
-  StructuredBindingName(NodeArray Bindings_)
-      : Node(KStructuredBindingName), Bindings(Bindings_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += '[';
-    Bindings.printWithComma(S);
-    S += ']';
-  }
-};
-
-// -- Expression Nodes --
-
-struct Expr : public Node {
-  Expr(Kind K = KExpr) : Node(K) {}
-};
-
-class BinaryExpr : public Expr {
-  const Node *LHS;
-  const StringView InfixOperator;
-  const Node *RHS;
-
-public:
-  BinaryExpr(Node *LHS_, StringView InfixOperator_, Node *RHS_)
-      : LHS(LHS_), InfixOperator(InfixOperator_), RHS(RHS_) {}
-
-  void printLeft(OutputStream &S) const override {
-    // might be a template argument expression, then we need to disambiguate
-    // with parens.
-    if (InfixOperator == ">")
-      S += "(";
-
-    S += "(";
-    LHS->print(S);
-    S += ") ";
-    S += InfixOperator;
-    S += " (";
-    RHS->print(S);
-    S += ")";
-
-    if (InfixOperator == ">")
-      S += ")";
-  }
-};
-
-class ArraySubscriptExpr : public Expr {
-  const Node *Op1;
-  const Node *Op2;
-
-public:
-  ArraySubscriptExpr(Node *Op1_, Node *Op2_) : Op1(Op1_), Op2(Op2_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Op1->print(S);
-    S += ")[";
-    Op2->print(S);
-    S += "]";
-  }
-};
-
-class PostfixExpr : public Expr {
-  const Node *Child;
-  const StringView Operand;
-
-public:
-  PostfixExpr(Node *Child_, StringView Operand_)
-      : Child(Child_), Operand(Operand_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Child->print(S);
-    S += ")";
-    S += Operand;
-  }
-};
-
-class ConditionalExpr : public Expr {
-  const Node *Cond;
-  const Node *Then;
-  const Node *Else;
-
-public:
-  ConditionalExpr(Node *Cond_, Node *Then_, Node *Else_)
-      : Cond(Cond_), Then(Then_), Else(Else_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Cond->print(S);
-    S += ") ? (";
-    Then->print(S);
-    S += ") : (";
-    Else->print(S);
-    S += ")";
-  }
-};
-
-class MemberExpr : public Expr {
-  const Node *LHS;
-  const StringView Kind;
-  const Node *RHS;
-
-public:
-  MemberExpr(Node *LHS_, StringView Kind_, Node *RHS_)
-      : LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
-
-  void printLeft(OutputStream &S) const override {
-    LHS->print(S);
-    S += Kind;
-    RHS->print(S);
-  }
-};
-
-class EnclosingExpr : public Expr {
-  const StringView Prefix;
-  const Node *Infix;
-  const StringView Postfix;
-
-public:
-  EnclosingExpr(StringView Prefix_, Node *Infix_, StringView Postfix_)
-      : Prefix(Prefix_), Infix(Infix_), Postfix(Postfix_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += Prefix;
-    Infix->print(S);
-    S += Postfix;
-  }
-};
-
-class CastExpr : public Expr {
-  // cast_kind<to>(from)
-  const StringView CastKind;
-  const Node *To;
-  const Node *From;
-
-public:
-  CastExpr(StringView CastKind_, Node *To_, Node *From_)
-      : CastKind(CastKind_), To(To_), From(From_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += CastKind;
-    S += "<";
-    To->printLeft(S);
-    S += ">(";
-    From->printLeft(S);
-    S += ")";
-  }
-};
-
-class SizeofParamPackExpr : public Expr {
-  Node *Pack;
-
-public:
-  SizeofParamPackExpr(Node *Pack_) : Pack(Pack_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "sizeof...(";
-    ParameterPackExpansion PPE(Pack);
-    PPE.printLeft(S);
-    S += ")";
-  }
-};
-
-class CallExpr : public Expr {
-  const Node *Callee;
-  NodeArray Args;
-
-public:
-  CallExpr(Node *Callee_, NodeArray Args_) : Callee(Callee_), Args(Args_) {}
-
-  void printLeft(OutputStream &S) const override {
-    Callee->print(S);
-    S += "(";
-    Args.printWithComma(S);
-    S += ")";
-  }
-};
-
-class NewExpr : public Expr {
-  // new (expr_list) type(init_list)
-  NodeArray ExprList;
-  Node *Type;
-  NodeArray InitList;
-  bool IsGlobal; // ::operator new ?
-  bool IsArray;  // new[] ?
-public:
-  NewExpr(NodeArray ExprList_, Node *Type_, NodeArray InitList_, bool IsGlobal_,
-          bool IsArray_)
-      : ExprList(ExprList_), Type(Type_), InitList(InitList_),
-        IsGlobal(IsGlobal_), IsArray(IsArray_) {}
-
-  void printLeft(OutputStream &S) const override {
-    if (IsGlobal)
-      S += "::operator ";
-    S += "new";
-    if (IsArray)
-      S += "[]";
-    S += ' ';
-    if (!ExprList.empty()) {
-      S += "(";
-      ExprList.printWithComma(S);
-      S += ")";
+      return printStr("SpecialSubKind::iostream");
     }
-    Type->print(S);
-    if (!InitList.empty()) {
-      S += "(";
-      InitList.printWithComma(S);
-      S += ")";
-    }
-
-  }
-};
-
-class DeleteExpr : public Expr {
-  Node *Op;
-  bool IsGlobal;
-  bool IsArray;
-
-public:
-  DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_)
-      : Op(Op_), IsGlobal(IsGlobal_), IsArray(IsArray_) {}
-
-  void printLeft(OutputStream &S) const override {
-    if (IsGlobal)
-      S += "::";
-    S += "delete";
-    if (IsArray)
-      S += "[] ";
-    Op->print(S);
-  }
-};
-
-class PrefixExpr : public Expr {
-  StringView Prefix;
-  Node *Child;
-
-public:
-  PrefixExpr(StringView Prefix_, Node *Child_) : Prefix(Prefix_), Child(Child_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += Prefix;
-    S += "(";
-    Child->print(S);
-    S += ")";
   }
-};
-
-class FunctionParam : public Expr {
-  StringView Number;
 
-public:
-  FunctionParam(StringView Number_) : Number(Number_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "fp";
-    S += Number;
+  void newLine() {
+    printStr("\n");
+    for (unsigned I = 0; I != Depth; ++I)
+      printStr(" ");
+    PendingNewline = false;
   }
-};
 
-class ConversionExpr : public Expr {
-  const Node *Type;
-  NodeArray Expressions;
-
-public:
-  ConversionExpr(const Node *Type_, NodeArray Expressions_)
-      : Type(Type_), Expressions(Expressions_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Type->print(S);
-    S += ")(";
-    Expressions.printWithComma(S);
-    S += ")";
+  template<typename T> void printWithPendingNewline(T V) {
+    print(V);
+    if (wantsNewline(V))
+      PendingNewline = true;
   }
-};
-
-class InitListExpr : public Expr {
-  Node *Ty;
-  NodeArray Inits;
-public:
-  InitListExpr(Node *Ty_, NodeArray Inits_) : Ty(Ty_), Inits(Inits_) {}
-
-  void printLeft(OutputStream &S) const override {
-    if (Ty)
-      Ty->print(S);
-    S += '{';
-    Inits.printWithComma(S);
-    S += '}';
-  }
-};
 
-class BracedExpr : public Expr {
-  Node *Elem;
-  Node *Init;
-  bool IsArray;
-public:
-  BracedExpr(Node *Elem_, Node *Init_, bool IsArray_)
-      : Expr(KBracedExpr), Elem(Elem_), Init(Init_), IsArray(IsArray_) {}
-
-  void printLeft(OutputStream &S) const override {
-    if (IsArray) {
-      S += '[';
-      Elem->print(S);
-      S += ']';
+  template<typename T> void printWithComma(T V) {
+    if (PendingNewline || wantsNewline(V)) {
+      printStr(",");
+      newLine();
     } else {
-      S += '.';
-      Elem->print(S);
+      printStr(", ");
     }
-    if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
-      S += " = ";
-    Init->print(S);
-  }
-};
 
-class BracedRangeExpr : public Expr {
-  Node *First;
-  Node *Last;
-  Node *Init;
-public:
-  BracedRangeExpr(Node *First_, Node *Last_, Node *Init_)
-      : Expr(KBracedRangeExpr), First(First_), Last(Last_), Init(Init_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += '[';
-    First->print(S);
-    S += " ... ";
-    Last->print(S);
-    S += ']';
-    if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
-      S += " = ";
-    Init->print(S);
+    printWithPendingNewline(V);
   }
-};
-
-struct FoldExpr : Expr {
-  Node *Pack, *Init;
-  StringView OperatorName;
-  bool IsLeftFold;
-
-  FoldExpr(bool IsLeftFold_, StringView OperatorName_, Node *Pack_, Node *Init_)
-      : Pack(Pack_), Init(Init_), OperatorName(OperatorName_),
-        IsLeftFold(IsLeftFold_) {}
 
-  void printLeft(OutputStream &S) const override {
-    auto PrintPack = [&] {
-      S += '(';
-      ParameterPackExpansion(Pack).print(S);
-      S += ')';
-    };
-
-    S += '(';
+  struct CtorArgPrinter {
+    DumpVisitor &Visitor;
 
-    if (IsLeftFold) {
-      // init op ... op pack
-      if (Init != nullptr) {
-        Init->print(S);
-        S += ' ';
-        S += OperatorName;
-        S += ' ';
-      }
-      // ... op pack
-      S += "... ";
-      S += OperatorName;
-      S += ' ';
-      PrintPack();
-    } else { // !IsLeftFold
-      // pack op ...
-      PrintPack();
-      S += ' ';
-      S += OperatorName;
-      S += " ...";
-      // pack op ... op init
-      if (Init != nullptr) {
-        S += ' ';
-        S += OperatorName;
-        S += ' ';
-        Init->print(S);
-      }
+    template<typename T, typename ...Rest> void operator()(T V, Rest ...Vs) {
+      if (Visitor.anyWantNewline(V, Vs...))
+        Visitor.newLine();
+      Visitor.printWithPendingNewline(V);
+      int PrintInOrder[] = { (Visitor.printWithComma(Vs), 0)..., 0 };
+      (void)PrintInOrder;
     }
-    S += ')';
-  }
-};
-
-class ThrowExpr : public Expr {
-  const Node *Op;
-
-public:
-  ThrowExpr(Node *Op_) : Op(Op_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "throw ";
-    Op->print(S);
-  }
-};
-
-class BoolExpr : public Expr {
-  bool Value;
-
-public:
-  BoolExpr(bool Value_) : Value(Value_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += Value ? StringView("true") : StringView("false");
-  }
-};
-
-class IntegerCastExpr : public Expr {
-  // ty(integer)
-  Node *Ty;
-  StringView Integer;
-
-public:
-  IntegerCastExpr(Node *Ty_, StringView Integer_)
-      : Ty(Ty_), Integer(Integer_) {}
-
-  void printLeft(OutputStream &S) const override {
-    S += "(";
-    Ty->print(S);
-    S += ")";
-    S += Integer;
-  }
-};
-
-class IntegerExpr : public Expr {
-  StringView Type;
-  StringView Value;
-
-public:
-  IntegerExpr(StringView Type_, StringView Value_) : Type(Type_), Value(Value_) {}
+  };
 
-  void printLeft(OutputStream &S) const override {
-    if (Type.size() > 3) {
-      S += "(";
-      S += Type;
-      S += ")";
+  template<typename NodeT> void operator()(const NodeT *Node) {
+    Depth += 2;
+    fprintf(stderr, "%s(", itanium_demangle::NodeKind<NodeT>::name());
+    Node->match(CtorArgPrinter{*this});
+    fprintf(stderr, ")");
+    Depth -= 2;
+  }
+
+  void operator()(const ForwardTemplateReference *Node) {
+    Depth += 2;
+    fprintf(stderr, "ForwardTemplateReference(");
+    if (Node->Ref && !Node->Printing) {
+      Node->Printing = true;
+      CtorArgPrinter{*this}(Node->Ref);
+      Node->Printing = false;
+    } else {
+      CtorArgPrinter{*this}(Node->Index);
     }
-
-    if (Value[0] == 'n') {
-      S += "-";
-      S += Value.dropFront(1);
-    } else
-      S += Value;
-
-    if (Type.size() <= 3)
-      S += Type;
+    fprintf(stderr, ")");
+    Depth -= 2;
   }
 };
+}
 
-template <class Float> struct FloatData;
-
-template <class Float> class FloatExpr : public Expr {
-  const StringView Contents;
-
-public:
-  FloatExpr(StringView Contents_) : Contents(Contents_) {}
-
-  void printLeft(OutputStream &s) const override {
-    const char *first = Contents.begin();
-    const char *last = Contents.end() + 1;
-
-    const size_t N = FloatData<Float>::mangled_size;
-    if (static_cast<std::size_t>(last - first) > N) {
-      last = first + N;
-      union {
-        Float value;
-        char buf[sizeof(Float)];
-      };
-      const char *t = first;
-      char *e = buf;
-      for (; t != last; ++t, ++e) {
-        unsigned d1 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
-                                  : static_cast<unsigned>(*t - 'a' + 10);
-        ++t;
-        unsigned d0 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
-                                  : static_cast<unsigned>(*t - 'a' + 10);
-        *e = static_cast<char>((d1 << 4) + d0);
-      }
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-      std::reverse(buf, e);
+void itanium_demangle::Node::dump() const {
+  DumpVisitor V;
+  visit(std::ref(V));
+  V.newLine();
+}
 #endif
-      char num[FloatData<Float>::max_demangled_size] = {0};
-      int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
-      s += StringView(num, num + n);
-    }
-  }
-};
 
+namespace {
 class BumpPointerAllocator {
   struct BlockMeta {
     BlockMeta* Next;
@@ -1823,3134 +301,28 @@ public:
   ~BumpPointerAllocator() { reset(); }
 };
 
-template <class T, size_t N>
-class PODSmallVector {
-  static_assert(std::is_pod<T>::value,
-                "T is required to be a plain old data type");
-
-  T* First;
-  T* Last;
-  T* Cap;
-  T Inline[N];
-
-  bool isInline() const { return First == Inline; }
-
-  void clearInline() {
-    First = Inline;
-    Last = Inline;
-    Cap = Inline + N;
-  }
-
-  void reserve(size_t NewCap) {
-    size_t S = size();
-    if (isInline()) {
-      auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
-      if (Tmp == nullptr)
-        std::terminate();
-      std::copy(First, Last, Tmp);
-      First = Tmp;
-    } else {
-      First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
-      if (First == nullptr)
-        std::terminate();
-    }
-    Last = First + S;
-    Cap = First + NewCap;
-  }
+class DefaultAllocator {
+  BumpPointerAllocator Alloc;
 
 public:
-  PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
-
-  PODSmallVector(const PODSmallVector&) = delete;
-  PODSmallVector& operator=(const PODSmallVector&) = delete;
-
-  PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
-    if (Other.isInline()) {
-      std::copy(Other.begin(), Other.end(), First);
-      Last = First + Other.size();
-      Other.clear();
-      return;
-    }
-
-    First = Other.First;
-    Last = Other.Last;
-    Cap = Other.Cap;
-    Other.clearInline();
-  }
-
-  PODSmallVector& operator=(PODSmallVector&& Other) {
-    if (Other.isInline()) {
-      if (!isInline()) {
-        std::free(First);
-        clearInline();
-      }
-      std::copy(Other.begin(), Other.end(), First);
-      Last = First + Other.size();
-      Other.clear();
-      return *this;
-    }
-
-    if (isInline()) {
-      First = Other.First;
-      Last = Other.Last;
-      Cap = Other.Cap;
-      Other.clearInline();
-      return *this;
-    }
-
-    std::swap(First, Other.First);
-    std::swap(Last, Other.Last);
-    std::swap(Cap, Other.Cap);
-    Other.clear();
-    return *this;
-  }
-
-  void push_back(const T& Elem) {
-    if (Last == Cap)
-      reserve(size() * 2);
-    *Last++ = Elem;
-  }
-
-  void pop_back() {
-    assert(Last != First && "Popping empty vector!");
-    --Last;
-  }
-
-  void dropBack(size_t Index) {
-    assert(Index <= size() && "dropBack() can't expand!");
-    Last = First + Index;
-  }
-
-  T* begin() { return First; }
-  T* end() { return Last; }
-
-  bool empty() const { return First == Last; }
-  size_t size() const { return static_cast<size_t>(Last - First); }
-  T& back() {
-    assert(Last != First && "Calling back() on empty vector!");
-    return *(Last - 1);
-  }
-  T& operator[](size_t Index) {
-    assert(Index < size() && "Invalid access!");
-    return *(begin() + Index);
-  }
-  void clear() { Last = First; }
-
-  ~PODSmallVector() {
-    if (!isInline())
-      std::free(First);
-  }
-};
-
-struct Db {
-  const char *First;
-  const char *Last;
-
-  // Name stack, this is used by the parser to hold temporary names that were
-  // parsed. The parser collapses multiple names into new nodes to construct
-  // the AST. Once the parser is finished, names.size() == 1.
-  PODSmallVector<Node *, 32> Names;
-
-  // Substitution table. Itanium supports name substitutions as a means of
-  // compression. The string "S42_" refers to the 44nd entry (base-36) in this
-  // table.
-  PODSmallVector<Node *, 32> Subs;
-
-  // Template parameter table. Like the above, but referenced like "T42_".
-  // This has a smaller size compared to Subs and Names because it can be
-  // stored on the stack.
-  PODSmallVector<Node *, 8> TemplateParams;
-
-  // Set of unresolved forward <template-param> references. These can occur in a
-  // conversion operator's type, and are resolved in the enclosing <encoding>.
-  PODSmallVector<ForwardTemplateReference *, 4> ForwardTemplateRefs;
-
-  bool TryToParseTemplateArgs = true;
-  bool PermitForwardTemplateReferences = false;
-  bool ParsingLambdaParams = false;
-
-  BumpPointerAllocator ASTAllocator;
+  void reset() { Alloc.reset(); }
 
-  Db(const char *First_, const char *Last_) : First(First_), Last(Last_) {}
-
-  void reset(const char *First_, const char *Last_) {
-    First = First_;
-    Last = Last_;
-    Names.clear();
-    Subs.clear();
-    TemplateParams.clear();
-    ParsingLambdaParams = false;
-    TryToParseTemplateArgs = true;
-    PermitForwardTemplateReferences = false;
-    ASTAllocator.reset();
-  }
-
-  template <class T, class... Args> T *make(Args &&... args) {
-    return new (ASTAllocator.allocate(sizeof(T)))
+  template<typename T, typename ...Args> T *makeNode(Args &&...args) {
+    return new (Alloc.allocate(sizeof(T)))
         T(std::forward<Args>(args)...);
   }
 
-  template <class It> NodeArray makeNodeArray(It begin, It end) {
-    size_t sz = static_cast<size_t>(end - begin);
-    void *mem = ASTAllocator.allocate(sizeof(Node *) * sz);
-    Node **data = new (mem) Node *[sz];
-    std::copy(begin, end, data);
-    return NodeArray(data, sz);
-  }
-
-  NodeArray popTrailingNodeArray(size_t FromPosition) {
-    assert(FromPosition <= Names.size());
-    NodeArray res =
-        makeNodeArray(Names.begin() + (long)FromPosition, Names.end());
-    Names.dropBack(FromPosition);
-    return res;
-  }
-
-  bool consumeIf(StringView S) {
-    if (StringView(First, Last).startsWith(S)) {
-      First += S.size();
-      return true;
-    }
-    return false;
-  }
-
-  bool consumeIf(char C) {
-    if (First != Last && *First == C) {
-      ++First;
-      return true;
-    }
-    return false;
-  }
-
-  char consume() { return First != Last ? *First++ : '\0'; }
-
-  char look(unsigned Lookahead = 0) {
-    if (static_cast<size_t>(Last - First) <= Lookahead)
-      return '\0';
-    return First[Lookahead];
-  }
-
-  size_t numLeft() const { return static_cast<size_t>(Last - First); }
-
-  StringView parseNumber(bool AllowNegative = false);
-  Qualifiers parseCVQualifiers();
-  bool parsePositiveInteger(size_t *Out);
-  StringView parseBareSourceName();
-
-  bool parseSeqId(size_t *Out);
-  Node *parseSubstitution();
-  Node *parseTemplateParam();
-  Node *parseTemplateArgs(bool TagTemplates = false);
-  Node *parseTemplateArg();
-
-  /// Parse the <expr> production.
-  Node *parseExpr();
-  Node *parsePrefixExpr(StringView Kind);
-  Node *parseBinaryExpr(StringView Kind);
-  Node *parseIntegerLiteral(StringView Lit);
-  Node *parseExprPrimary();
-  template <class Float> Node *parseFloatingLiteral();
-  Node *parseFunctionParam();
-  Node *parseNewExpr();
-  Node *parseConversionExpr();
-  Node *parseBracedExpr();
-  Node *parseFoldExpr();
-
-  /// Parse the <type> production.
-  Node *parseType();
-  Node *parseFunctionType();
-  Node *parseVectorType();
-  Node *parseDecltype();
-  Node *parseArrayType();
-  Node *parsePointerToMemberType();
-  Node *parseClassEnumType();
-  Node *parseQualifiedType();
-
-  Node *parseEncoding();
-  bool parseCallOffset();
-  Node *parseSpecialName();
-
-  /// Holds some extra information about a <name> that is being parsed. This
-  /// information is only pertinent if the <name> refers to an <encoding>.
-  struct NameState {
-    bool CtorDtorConversion = false;
-    bool EndsWithTemplateArgs = false;
-    Qualifiers CVQualifiers = QualNone;
-    FunctionRefQual ReferenceQualifier = FrefQualNone;
-    size_t ForwardTemplateRefsBegin;
-
-    NameState(Db *Enclosing)
-        : ForwardTemplateRefsBegin(Enclosing->ForwardTemplateRefs.size()) {}
-  };
-
-  bool resolveForwardTemplateRefs(NameState &State) {
-    size_t I = State.ForwardTemplateRefsBegin;
-    size_t E = ForwardTemplateRefs.size();
-    for (; I < E; ++I) {
-      size_t Idx = ForwardTemplateRefs[I]->Index;
-      if (Idx >= TemplateParams.size())
-        return true;
-      ForwardTemplateRefs[I]->Ref = TemplateParams[Idx];
-    }
-    ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin);
-    return false;
+  void *allocateNodeArray(size_t sz) {
+    return Alloc.allocate(sizeof(Node *) * sz);
   }
-
-  /// Parse the <name> production>
-  Node *parseName(NameState *State = nullptr);
-  Node *parseLocalName(NameState *State);
-  Node *parseOperatorName(NameState *State);
-  Node *parseUnqualifiedName(NameState *State);
-  Node *parseUnnamedTypeName(NameState *State);
-  Node *parseSourceName(NameState *State);
-  Node *parseUnscopedName(NameState *State);
-  Node *parseNestedName(NameState *State);
-  Node *parseCtorDtorName(Node *&SoFar, NameState *State);
-
-  Node *parseAbiTags(Node *N);
-
-  /// Parse the <unresolved-name> production.
-  Node *parseUnresolvedName();
-  Node *parseSimpleId();
-  Node *parseBaseUnresolvedName();
-  Node *parseUnresolvedType();
-  Node *parseDestructorName();
-
-  /// Top-level entry point into the parser.
-  Node *parse();
 };
+}  // unnamed namespace
 
-const char* parse_discriminator(const char* first, const char* last);
-
-// <name> ::= <nested-name> // N
-//        ::= <local-name> # See Scope Encoding below  // Z
-//        ::= <unscoped-template-name> <template-args>
-//        ::= <unscoped-name>
-//
-// <unscoped-template-name> ::= <unscoped-name>
-//                          ::= <substitution>
-Node *Db::parseName(NameState *State) {
-  consumeIf('L'); // extension
-
-  if (look() == 'N')
-    return parseNestedName(State);
-  if (look() == 'Z')
-    return parseLocalName(State);
-
-  //        ::= <unscoped-template-name> <template-args>
-  if (look() == 'S' && look(1) != 't') {
-    Node *S = parseSubstitution();
-    if (S == nullptr)
-      return nullptr;
-    if (look() != 'I')
-      return nullptr;
-    Node *TA = parseTemplateArgs(State != nullptr);
-    if (TA == nullptr)
-      return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(S, TA);
-  }
-
-  Node *N = parseUnscopedName(State);
-  if (N == nullptr)
-    return nullptr;
-  //        ::= <unscoped-template-name> <template-args>
-  if (look() == 'I') {
-    Subs.push_back(N);
-    Node *TA = parseTemplateArgs(State != nullptr);
-    if (TA == nullptr)
-      return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(N, TA);
-  }
-  //        ::= <unscoped-name>
-  return N;
-}
-
-// <local-name> := Z <function encoding> E <entity name> [<discriminator>]
-//              := Z <function encoding> E s [<discriminator>]
-//              := Z <function encoding> Ed [ <parameter number> ] _ <entity name>
-Node *Db::parseLocalName(NameState *State) {
-  if (!consumeIf('Z'))
-    return nullptr;
-  Node *Encoding = parseEncoding();
-  if (Encoding == nullptr || !consumeIf('E'))
-    return nullptr;
-
-  if (consumeIf('s')) {
-    First = parse_discriminator(First, Last);
-    return make<LocalName>(Encoding, make<NameType>("string literal"));
-  }
-
-  if (consumeIf('d')) {
-    parseNumber(true);
-    if (!consumeIf('_'))
-      return nullptr;
-    Node *N = parseName(State);
-    if (N == nullptr)
-      return nullptr;
-    return make<LocalName>(Encoding, N);
-  }
-
-  Node *Entity = parseName(State);
-  if (Entity == nullptr)
-    return nullptr;
-  First = parse_discriminator(First, Last);
-  return make<LocalName>(Encoding, Entity);
-}
-
-// <unscoped-name> ::= <unqualified-name>
-//                 ::= St <unqualified-name>   # ::std::
-// extension       ::= StL<unqualified-name>
-Node *Db::parseUnscopedName(NameState *State) {
- if (consumeIf("StL") || consumeIf("St")) {
-   Node *R = parseUnqualifiedName(State);
-   if (R == nullptr)
-     return nullptr;
-   return make<StdQualifiedName>(R);
- }
- return parseUnqualifiedName(State);
-}
-
-// <unqualified-name> ::= <operator-name> [abi-tags]
-//                    ::= <ctor-dtor-name>
-//                    ::= <source-name>
-//                    ::= <unnamed-type-name>
-//                    ::= DC <source-name>+ E      # structured binding declaration
-Node *Db::parseUnqualifiedName(NameState *State) {
- // <ctor-dtor-name>s are special-cased in parseNestedName().
- Node *Result;
- if (look() == 'U')
-   Result = parseUnnamedTypeName(State);
- else if (look() >= '1' && look() <= '9')
-   Result = parseSourceName(State);
- else if (consumeIf("DC")) {
-   size_t BindingsBegin = Names.size();
-   do {
-     Node *Binding = parseSourceName(State);
-     if (Binding == nullptr)
-       return nullptr;
-     Names.push_back(Binding);
-   } while (!consumeIf('E'));
-   Result = make<StructuredBindingName>(popTrailingNodeArray(BindingsBegin));
- } else
-   Result = parseOperatorName(State);
- if (Result != nullptr)
-   Result = parseAbiTags(Result);
- return Result;
-}
-
-// <unnamed-type-name> ::= Ut [<nonnegative number>] _
-//                     ::= <closure-type-name>
-//
-// <closure-type-name> ::= Ul <lambda-sig> E [ <nonnegative number> ] _
-//
-// <lambda-sig> ::= <parameter type>+  # Parameter types or "v" if the lambda has no parameters
-Node *Db::parseUnnamedTypeName(NameState *) {
-  if (consumeIf("Ut")) {
-    StringView Count = parseNumber();
-    if (!consumeIf('_'))
-      return nullptr;
-    return make<UnnamedTypeName>(Count);
-  }
-  if (consumeIf("Ul")) {
-    NodeArray Params;
-    SwapAndRestore<bool> SwapParams(ParsingLambdaParams, true);
-    if (!consumeIf("vE")) {
-      size_t ParamsBegin = Names.size();
-      do {
-        Node *P = parseType();
-        if (P == nullptr)
-          return nullptr;
-        Names.push_back(P);
-      } while (!consumeIf('E'));
-      Params = popTrailingNodeArray(ParamsBegin);
-    }
-    StringView Count = parseNumber();
-    if (!consumeIf('_'))
-      return nullptr;
-    return make<ClosureTypeName>(Params, Count);
-  }
-  return nullptr;
-}
-
-// <source-name> ::= <positive length number> <identifier>
-Node *Db::parseSourceName(NameState *) {
-  size_t Length = 0;
-  if (parsePositiveInteger(&Length))
-    return nullptr;
-  if (numLeft() < Length || Length == 0)
-    return nullptr;
-  StringView Name(First, First + Length);
-  First += Length;
-  if (Name.startsWith("_GLOBAL__N"))
-    return make<NameType>("(anonymous namespace)");
-  return make<NameType>(Name);
-}
-
-//   <operator-name> ::= aa    # &&
-//                   ::= ad    # & (unary)
-//                   ::= an    # &
-//                   ::= aN    # &=
-//                   ::= aS    # =
-//                   ::= cl    # ()
-//                   ::= cm    # ,
-//                   ::= co    # ~
-//                   ::= cv <type>    # (cast)
-//                   ::= da    # delete[]
-//                   ::= de    # * (unary)
-//                   ::= dl    # delete
-//                   ::= dv    # /
-//                   ::= dV    # /=
-//                   ::= eo    # ^
-//                   ::= eO    # ^=
-//                   ::= eq    # ==
-//                   ::= ge    # >=
-//                   ::= gt    # >
-//                   ::= ix    # []
-//                   ::= le    # <=
-//                   ::= li <source-name>  # operator ""
-//                   ::= ls    # <<
-//                   ::= lS    # <<=
-//                   ::= lt    # <
-//                   ::= mi    # -
-//                   ::= mI    # -=
-//                   ::= ml    # *
-//                   ::= mL    # *=
-//                   ::= mm    # -- (postfix in <expression> context)
-//                   ::= na    # new[]
-//                   ::= ne    # !=
-//                   ::= ng    # - (unary)
-//                   ::= nt    # !
-//                   ::= nw    # new
-//                   ::= oo    # ||
-//                   ::= or    # |
-//                   ::= oR    # |=
-//                   ::= pm    # ->*
-//                   ::= pl    # +
-//                   ::= pL    # +=
-//                   ::= pp    # ++ (postfix in <expression> context)
-//                   ::= ps    # + (unary)
-//                   ::= pt    # ->
-//                   ::= qu    # ?
-//                   ::= rm    # %
-//                   ::= rM    # %=
-//                   ::= rs    # >>
-//                   ::= rS    # >>=
-//                   ::= ss    # <=> C++2a
-//                   ::= v <digit> <source-name>        # vendor extended operator
-Node *Db::parseOperatorName(NameState *State) {
-  switch (look()) {
-  case 'a':
-    switch (look(1)) {
-    case 'a':
-      First += 2;
-      return make<NameType>("operator&&");
-    case 'd':
-    case 'n':
-      First += 2;
-      return make<NameType>("operator&");
-    case 'N':
-      First += 2;
-      return make<NameType>("operator&=");
-    case 'S':
-      First += 2;
-      return make<NameType>("operator=");
-    }
-    return nullptr;
-  case 'c':
-    switch (look(1)) {
-    case 'l':
-      First += 2;
-      return make<NameType>("operator()");
-    case 'm':
-      First += 2;
-      return make<NameType>("operator,");
-    case 'o':
-      First += 2;
-      return make<NameType>("operator~");
-    //                   ::= cv <type>    # (cast)
-    case 'v': {
-      First += 2;
-      SwapAndRestore<bool> SaveTemplate(TryToParseTemplateArgs, false);
-      // If we're parsing an encoding, State != nullptr and the conversion
-      // operators' <type> could have a <template-param> that refers to some
-      // <template-arg>s further ahead in the mangled name.
-      SwapAndRestore<bool> SavePermit(PermitForwardTemplateReferences,
-                                      PermitForwardTemplateReferences ||
-                                          State != nullptr);
-      Node* Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      if (State) State->CtorDtorConversion = true;
-      return make<ConversionOperatorType>(Ty);
-    }
-    }
-    return nullptr;
-  case 'd':
-    switch (look(1)) {
-    case 'a':
-      First += 2;
-      return make<NameType>("operator delete[]");
-    case 'e':
-      First += 2;
-      return make<NameType>("operator*");
-    case 'l':
-      First += 2;
-      return make<NameType>("operator delete");
-    case 'v':
-      First += 2;
-      return make<NameType>("operator/");
-    case 'V':
-      First += 2;
-      return make<NameType>("operator/=");
-    }
-    return nullptr;
-  case 'e':
-    switch (look(1)) {
-    case 'o':
-      First += 2;
-      return make<NameType>("operator^");
-    case 'O':
-      First += 2;
-      return make<NameType>("operator^=");
-    case 'q':
-      First += 2;
-      return make<NameType>("operator==");
-    }
-    return nullptr;
-  case 'g':
-    switch (look(1)) {
-    case 'e':
-      First += 2;
-      return make<NameType>("operator>=");
-    case 't':
-      First += 2;
-      return make<NameType>("operator>");
-    }
-    return nullptr;
-  case 'i':
-    if (look(1) == 'x') {
-      First += 2;
-      return make<NameType>("operator[]");
-    }
-    return nullptr;
-  case 'l':
-    switch (look(1)) {
-    case 'e':
-      First += 2;
-      return make<NameType>("operator<=");
-    //                   ::= li <source-name>  # operator ""
-    case 'i': {
-      First += 2;
-      Node *SN = parseSourceName(State);
-      if (SN == nullptr)
-        return nullptr;
-      return make<LiteralOperator>(SN);
-    }
-    case 's':
-      First += 2;
-      return make<NameType>("operator<<");
-    case 'S':
-      First += 2;
-      return make<NameType>("operator<<=");
-    case 't':
-      First += 2;
-      return make<NameType>("operator<");
-    }
-    return nullptr;
-  case 'm':
-    switch (look(1)) {
-    case 'i':
-      First += 2;
-      return make<NameType>("operator-");
-    case 'I':
-      First += 2;
-      return make<NameType>("operator-=");
-    case 'l':
-      First += 2;
-      return make<NameType>("operator*");
-    case 'L':
-      First += 2;
-      return make<NameType>("operator*=");
-    case 'm':
-      First += 2;
-      return make<NameType>("operator--");
-    }
-    return nullptr;
-  case 'n':
-    switch (look(1)) {
-    case 'a':
-      First += 2;
-      return make<NameType>("operator new[]");
-    case 'e':
-      First += 2;
-      return make<NameType>("operator!=");
-    case 'g':
-      First += 2;
-      return make<NameType>("operator-");
-    case 't':
-      First += 2;
-      return make<NameType>("operator!");
-    case 'w':
-      First += 2;
-      return make<NameType>("operator new");
-    }
-    return nullptr;
-  case 'o':
-    switch (look(1)) {
-    case 'o':
-      First += 2;
-      return make<NameType>("operator||");
-    case 'r':
-      First += 2;
-      return make<NameType>("operator|");
-    case 'R':
-      First += 2;
-      return make<NameType>("operator|=");
-    }
-    return nullptr;
-  case 'p':
-    switch (look(1)) {
-    case 'm':
-      First += 2;
-      return make<NameType>("operator->*");
-    case 'l':
-      First += 2;
-      return make<NameType>("operator+");
-    case 'L':
-      First += 2;
-      return make<NameType>("operator+=");
-    case 'p':
-      First += 2;
-      return make<NameType>("operator++");
-    case 's':
-      First += 2;
-      return make<NameType>("operator+");
-    case 't':
-      First += 2;
-      return make<NameType>("operator->");
-    }
-    return nullptr;
-  case 'q':
-    if (look(1) == 'u') {
-      First += 2;
-      return make<NameType>("operator?");
-    }
-    return nullptr;
-  case 'r':
-    switch (look(1)) {
-    case 'm':
-      First += 2;
-      return make<NameType>("operator%");
-    case 'M':
-      First += 2;
-      return make<NameType>("operator%=");
-    case 's':
-      First += 2;
-      return make<NameType>("operator>>");
-    case 'S':
-      First += 2;
-      return make<NameType>("operator>>=");
-    }
-    return nullptr;
-  case 's':
-    if (look(1) == 's') {
-      First += 2;
-      return make<NameType>("operator<=>");
-    }
-    return nullptr;
-  // ::= v <digit> <source-name>        # vendor extended operator
-  case 'v':
-    if (std::isdigit(look(1))) {
-      First += 2;
-      Node *SN = parseSourceName(State);
-      if (SN == nullptr)
-        return nullptr;
-      return make<ConversionOperatorType>(SN);
-    }
-    return nullptr;
-  }
-  return nullptr;
-}
-
-// <ctor-dtor-name> ::= C1  # complete object constructor
-//                  ::= C2  # base object constructor
-//                  ::= C3  # complete object allocating constructor
-//   extension      ::= C5    # ?
-//                  ::= D0  # deleting destructor
-//                  ::= D1  # complete object destructor
-//                  ::= D2  # base object destructor
-//   extension      ::= D5    # ?
-Node *Db::parseCtorDtorName(Node *&SoFar, NameState *State) {
-  if (SoFar->K == Node::KSpecialSubstitution) {
-    auto SSK = static_cast<SpecialSubstitution *>(SoFar)->SSK;
-    switch (SSK) {
-    case SpecialSubKind::string:
-    case SpecialSubKind::istream:
-    case SpecialSubKind::ostream:
-    case SpecialSubKind::iostream:
-      SoFar = make<ExpandedSpecialSubstitution>(SSK);
-    default:
-      break;
-    }
-  }
-
-  if (consumeIf('C')) {
-    bool IsInherited = consumeIf('I');
-    if (look() != '1' && look() != '2' && look() != '3' && look() != '5')
-      return nullptr;
-    ++First;
-    if (State) State->CtorDtorConversion = true;
-    if (IsInherited) {
-      if (parseName(State) == nullptr)
-        return nullptr;
-    }
-    return make<CtorDtorName>(SoFar, false);
-  }
-
-  if (look() == 'D' &&
-      (look(1) == '0' || look(1) == '1' || look(1) == '2' || look(1) == '5')) {
-    First += 2;
-    if (State) State->CtorDtorConversion = true;
-    return make<CtorDtorName>(SoFar, true);
-  }
-
-  return nullptr;
-}
-
-// <nested-name> ::= N [<CV-Qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E
-//               ::= N [<CV-Qualifiers>] [<ref-qualifier>] <template-prefix> <template-args> E
-//
-// <prefix> ::= <prefix> <unqualified-name>
-//          ::= <template-prefix> <template-args>
-//          ::= <template-param>
-//          ::= <decltype>
-//          ::= # empty
-//          ::= <substitution>
-//          ::= <prefix> <data-member-prefix>
-//  extension ::= L
-//
-// <data-member-prefix> := <member source-name> [<template-args>] M
-//
-// <template-prefix> ::= <prefix> <template unqualified-name>
-//                   ::= <template-param>
-//                   ::= <substitution>
-Node *Db::parseNestedName(NameState *State) {
-  if (!consumeIf('N'))
-    return nullptr;
-
-  Qualifiers CVTmp = parseCVQualifiers();
-  if (State) State->CVQualifiers = CVTmp;
-
-  if (consumeIf('O')) {
-    if (State) State->ReferenceQualifier = FrefQualRValue;
-  } else if (consumeIf('R')) {
-    if (State) State->ReferenceQualifier = FrefQualLValue;
-  } else
-    if (State) State->ReferenceQualifier = FrefQualNone;
-
-  Node *SoFar = nullptr;
-  auto PushComponent = [&](Node *Comp) {
-    if (SoFar) SoFar = make<NestedName>(SoFar, Comp);
-    else       SoFar = Comp;
-    if (State) State->EndsWithTemplateArgs = false;
-  };
-
-  if (consumeIf("St"))
-    SoFar = make<NameType>("std");
-
-  while (!consumeIf('E')) {
-    consumeIf('L'); // extension
-
-    // <data-member-prefix> := <member source-name> [<template-args>] M
-    if (consumeIf('M')) {
-      if (SoFar == nullptr)
-        return nullptr;
-      continue;
-    }
-
-    //          ::= <template-param>
-    if (look() == 'T') {
-      Node *TP = parseTemplateParam();
-      if (TP == nullptr)
-        return nullptr;
-      PushComponent(TP);
-      Subs.push_back(SoFar);
-      continue;
-    }
-
-    //          ::= <template-prefix> <template-args>
-    if (look() == 'I') {
-      Node *TA = parseTemplateArgs(State != nullptr);
-      if (TA == nullptr || SoFar == nullptr)
-        return nullptr;
-      SoFar = make<NameWithTemplateArgs>(SoFar, TA);
-      if (State) State->EndsWithTemplateArgs = true;
-      Subs.push_back(SoFar);
-      continue;
-    }
-
-    //          ::= <decltype>
-    if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) {
-      Node *DT = parseDecltype();
-      if (DT == nullptr)
-        return nullptr;
-      PushComponent(DT);
-      Subs.push_back(SoFar);
-      continue;
-    }
-
-    //          ::= <substitution>
-    if (look() == 'S' && look(1) != 't') {
-      Node *S = parseSubstitution();
-      if (S == nullptr)
-        return nullptr;
-      PushComponent(S);
-      if (SoFar != S)
-        Subs.push_back(S);
-      continue;
-    }
-
-    // Parse an <unqualified-name> thats actually a <ctor-dtor-name>.
-    if (look() == 'C' || (look() == 'D' && look(1) != 'C')) {
-      if (SoFar == nullptr)
-        return nullptr;
-      Node *CtorDtor = parseCtorDtorName(SoFar, State);
-      if (CtorDtor == nullptr)
-        return nullptr;
-      PushComponent(CtorDtor);
-      SoFar = parseAbiTags(SoFar);
-      if (SoFar == nullptr)
-        return nullptr;
-      Subs.push_back(SoFar);
-      continue;
-    }
-
-    //          ::= <prefix> <unqualified-name>
-    Node *N = parseUnqualifiedName(State);
-    if (N == nullptr)
-      return nullptr;
-    PushComponent(N);
-    Subs.push_back(SoFar);
-  }
-
-  if (SoFar == nullptr || Subs.empty())
-    return nullptr;
-
-  Subs.pop_back();
-  return SoFar;
-}
-
-// <simple-id> ::= <source-name> [ <template-args> ]
-Node *Db::parseSimpleId() {
-  Node *SN = parseSourceName(/*NameState=*/nullptr);
-  if (SN == nullptr)
-    return nullptr;
-  if (look() == 'I') {
-    Node *TA = parseTemplateArgs();
-    if (TA == nullptr)
-      return nullptr;
-    return make<NameWithTemplateArgs>(SN, TA);
-  }
-  return SN;
-}
-
-// <destructor-name> ::= <unresolved-type>  # e.g., ~T or ~decltype(f())
-//                   ::= <simple-id>        # e.g., ~A<2*N>
-Node *Db::parseDestructorName() {
-  Node *Result;
-  if (std::isdigit(look()))
-    Result = parseSimpleId();
-  else
-    Result = parseUnresolvedType();
-  if (Result == nullptr)
-    return nullptr;
-  return make<DtorName>(Result);
-}
-
-// <unresolved-type> ::= <template-param>
-//                   ::= <decltype>
-//                   ::= <substitution>
-Node *Db::parseUnresolvedType() {
-  if (look() == 'T') {
-    Node *TP = parseTemplateParam();
-    if (TP == nullptr)
-      return nullptr;
-    Subs.push_back(TP);
-    return TP;
-  }
-  if (look() == 'D') {
-    Node *DT = parseDecltype();
-    if (DT == nullptr)
-      return nullptr;
-    Subs.push_back(DT);
-    return DT;
-  }
-  return parseSubstitution();
-}
-
-// <base-unresolved-name> ::= <simple-id>                                # unresolved name
-//          extension     ::= <operator-name>                            # unresolved operator-function-id
-//          extension     ::= <operator-name> <template-args>            # unresolved operator template-id
-//                        ::= on <operator-name>                         # unresolved operator-function-id
-//                        ::= on <operator-name> <template-args>         # unresolved operator template-id
-//                        ::= dn <destructor-name>                       # destructor or pseudo-destructor;
-//                                                                         # e.g. ~X or ~X<N-1>
-Node *Db::parseBaseUnresolvedName() {
-  if (std::isdigit(look()))
-    return parseSimpleId();
-
-  if (consumeIf("dn"))
-    return parseDestructorName();
-
-  consumeIf("on");
-
-  Node *Oper = parseOperatorName(/*NameState=*/nullptr);
-  if (Oper == nullptr)
-    return nullptr;
-  if (look() == 'I') {
-    Node *TA = parseTemplateArgs();
-    if (TA == nullptr)
-      return nullptr;
-    return make<NameWithTemplateArgs>(Oper, TA);
-  }
-  return Oper;
-}
-
-// <unresolved-name>
-//  extension        ::= srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
-//                   ::= [gs] <base-unresolved-name>                     # x or (with "gs") ::x
-//                   ::= [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
-//                                                                       # A::x, N::y, A<T>::z; "gs" means leading "::"
-//                   ::= sr <unresolved-type> <base-unresolved-name>     # T::x / decltype(p)::x
-//  extension        ::= sr <unresolved-type> <template-args> <base-unresolved-name>
-//                                                                       # T::N::x /decltype(p)::N::x
-//  (ignored)        ::= srN <unresolved-type>  <unresolved-qualifier-level>+ E <base-unresolved-name>
-//
-// <unresolved-qualifier-level> ::= <simple-id>
-Node *Db::parseUnresolvedName() {
-  Node *SoFar = nullptr;
-
-  // srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
-  // srN <unresolved-type>                   <unresolved-qualifier-level>+ E <base-unresolved-name>
-  if (consumeIf("srN")) {
-    SoFar = parseUnresolvedType();
-    if (SoFar == nullptr)
-      return nullptr;
-
-    if (look() == 'I') {
-      Node *TA = parseTemplateArgs();
-      if (TA == nullptr)
-        return nullptr;
-      SoFar = make<NameWithTemplateArgs>(SoFar, TA);
-    }
-
-    while (!consumeIf('E')) {
-      Node *Qual = parseSimpleId();
-      if (Qual == nullptr)
-        return nullptr;
-      SoFar = make<QualifiedName>(SoFar, Qual);
-    }
-
-    Node *Base = parseBaseUnresolvedName();
-    if (Base == nullptr)
-      return nullptr;
-    return make<QualifiedName>(SoFar, Base);
-  }
-
-  bool Global = consumeIf("gs");
-
-  // [gs] <base-unresolved-name>                     # x or (with "gs") ::x
-  if (!consumeIf("sr")) {
-    SoFar = parseBaseUnresolvedName();
-    if (SoFar == nullptr)
-      return nullptr;
-    if (Global)
-      SoFar = make<GlobalQualifiedName>(SoFar);
-    return SoFar;
-  }
-
-  // [gs] sr <unresolved-qualifier-level>+ E   <base-unresolved-name>
-  if (std::isdigit(look())) {
-    do {
-      Node *Qual = parseSimpleId();
-      if (Qual == nullptr)
-        return nullptr;
-      if (SoFar)
-        SoFar = make<QualifiedName>(SoFar, Qual);
-      else if (Global)
-        SoFar = make<GlobalQualifiedName>(Qual);
-      else
-        SoFar = Qual;
-    } while (!consumeIf('E'));
-  }
-  //      sr <unresolved-type>                 <base-unresolved-name>
-  //      sr <unresolved-type> <template-args> <base-unresolved-name>
-  else {
-    SoFar = parseUnresolvedType();
-    if (SoFar == nullptr)
-      return nullptr;
-
-    if (look() == 'I') {
-      Node *TA = parseTemplateArgs();
-      if (TA == nullptr)
-        return nullptr;
-      SoFar = make<NameWithTemplateArgs>(SoFar, TA);
-    }
-  }
-
-  assert(SoFar != nullptr);
-
-  Node *Base = parseBaseUnresolvedName();
-  if (Base == nullptr)
-    return nullptr;
-  return make<QualifiedName>(SoFar, Base);
-}
-
-// <abi-tags> ::= <abi-tag> [<abi-tags>]
-// <abi-tag> ::= B <source-name>
-Node *Db::parseAbiTags(Node *N) {
-  while (consumeIf('B')) {
-    StringView SN = parseBareSourceName();
-    if (SN.empty())
-      return nullptr;
-    N = make<AbiTagAttr>(N, SN);
-  }
-  return N;
-}
-
-// <number> ::= [n] <non-negative decimal integer>
-StringView Db::parseNumber(bool AllowNegative) {
-  const char *Tmp = First;
-  if (AllowNegative)
-    consumeIf('n');
-  if (numLeft() == 0 || !std::isdigit(*First))
-    return StringView();
-  while (numLeft() != 0 && std::isdigit(*First))
-    ++First;
-  return StringView(Tmp, First);
-}
-
-// <positive length number> ::= [0-9]*
-bool Db::parsePositiveInteger(size_t *Out) {
-  *Out = 0;
-  if (look() < '0' || look() > '9')
-    return true;
-  while (look() >= '0' && look() <= '9') {
-    *Out *= 10;
-    *Out += static_cast<size_t>(consume() - '0');
-  }
-  return false;
-}
-
-StringView Db::parseBareSourceName() {
-  size_t Int = 0;
-  if (parsePositiveInteger(&Int) || numLeft() < Int)
-    return StringView();
-  StringView R(First, First + Int);
-  First += Int;
-  return R;
-}
-
-// <function-type> ::= [<CV-qualifiers>] [<exception-spec>] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
-//
-// <exception-spec> ::= Do                # non-throwing exception-specification (e.g., noexcept, throw())
-//                  ::= DO <expression> E # computed (instantiation-dependent) noexcept
-//                  ::= Dw <type>+ E      # dynamic exception specification with instantiation-dependent types
-//
-// <ref-qualifier> ::= R                   # & ref-qualifier
-// <ref-qualifier> ::= O                   # && ref-qualifier
-Node *Db::parseFunctionType() {
-  Qualifiers CVQuals = parseCVQualifiers();
-
-  Node *ExceptionSpec = nullptr;
-  if (consumeIf("Do")) {
-    ExceptionSpec = make<NameType>("noexcept");
-  } else if (consumeIf("DO")) {
-    Node *E = parseExpr();
-    if (E == nullptr || !consumeIf('E'))
-      return nullptr;
-    ExceptionSpec = make<NoexceptSpec>(E);
-  } else if (consumeIf("Dw")) {
-    size_t SpecsBegin = Names.size();
-    while (!consumeIf('E')) {
-      Node *T = parseType();
-      if (T == nullptr)
-        return nullptr;
-      Names.push_back(T);
-    }
-    ExceptionSpec =
-      make<DynamicExceptionSpec>(popTrailingNodeArray(SpecsBegin));
-  }
-
-  consumeIf("Dx"); // transaction safe
-
-  if (!consumeIf('F'))
-    return nullptr;
-  consumeIf('Y'); // extern "C"
-  Node *ReturnType = parseType();
-  if (ReturnType == nullptr)
-    return nullptr;
-
-  FunctionRefQual ReferenceQualifier = FrefQualNone;
-  size_t ParamsBegin = Names.size();
-  while (true) {
-    if (consumeIf('E'))
-      break;
-    if (consumeIf('v'))
-      continue;
-    if (consumeIf("RE")) {
-      ReferenceQualifier = FrefQualLValue;
-      break;
-    }
-    if (consumeIf("OE")) {
-      ReferenceQualifier = FrefQualRValue;
-      break;
-    }
-    Node *T = parseType();
-    if (T == nullptr)
-      return nullptr;
-    Names.push_back(T);
-  }
-
-  NodeArray Params = popTrailingNodeArray(ParamsBegin);
-  return make<FunctionType>(ReturnType, Params, CVQuals,
-                            ReferenceQualifier, ExceptionSpec);
-}
-
-// extension:
-// <vector-type>           ::= Dv <positive dimension number> _ <extended element type>
-//                         ::= Dv [<dimension expression>] _ <element type>
-// <extended element type> ::= <element type>
-//                         ::= p # AltiVec vector pixel
-Node *Db::parseVectorType() {
-  if (!consumeIf("Dv"))
-    return nullptr;
-  if (look() >= '1' && look() <= '9') {
-    StringView DimensionNumber = parseNumber();
-    if (!consumeIf('_'))
-      return nullptr;
-    if (consumeIf('p'))
-      return make<VectorType>(DimensionNumber);
-    Node *ElemType = parseType();
-    if (ElemType == nullptr)
-      return nullptr;
-    return make<VectorType>(ElemType, DimensionNumber);
-  }
-
-  if (!consumeIf('_')) {
-    Node *DimExpr = parseExpr();
-    if (!DimExpr)
-      return nullptr;
-    if (!consumeIf('_'))
-      return nullptr;
-    Node *ElemType = parseType();
-    if (!ElemType)
-      return nullptr;
-    return make<VectorType>(ElemType, DimExpr);
-  }
-  Node *ElemType = parseType();
-  if (!ElemType)
-    return nullptr;
-  return make<VectorType>(ElemType, StringView());
-}
-
-// <decltype>  ::= Dt <expression> E  # decltype of an id-expression or class member access (C++0x)
-//             ::= DT <expression> E  # decltype of an expression (C++0x)
-Node *Db::parseDecltype() {
-  if (!consumeIf('D'))
-    return nullptr;
-  if (!consumeIf('t') && !consumeIf('T'))
-    return nullptr;
-  Node *E = parseExpr();
-  if (E == nullptr)
-    return nullptr;
-  if (!consumeIf('E'))
-    return nullptr;
-  return make<EnclosingExpr>("decltype(", E, ")");
-}
-
-// <array-type> ::= A <positive dimension number> _ <element type>
-//              ::= A [<dimension expression>] _ <element type>
-Node *Db::parseArrayType() {
-  if (!consumeIf('A'))
-    return nullptr;
-
-  if (std::isdigit(look())) {
-    StringView Dimension = parseNumber();
-    if (!consumeIf('_'))
-      return nullptr;
-    Node *Ty = parseType();
-    if (Ty == nullptr)
-      return nullptr;
-    return make<ArrayType>(Ty, Dimension);
-  }
-
-  if (!consumeIf('_')) {
-    Node *DimExpr = parseExpr();
-    if (DimExpr == nullptr)
-      return nullptr;
-    if (!consumeIf('_'))
-      return nullptr;
-    Node *ElementType = parseType();
-    if (ElementType == nullptr)
-      return nullptr;
-    return make<ArrayType>(ElementType, DimExpr);
-  }
-
-  Node *Ty = parseType();
-  if (Ty == nullptr)
-    return nullptr;
-  return make<ArrayType>(Ty);
-}
-
-// <pointer-to-member-type> ::= M <class type> <member type>
-Node *Db::parsePointerToMemberType() {
-  if (!consumeIf('M'))
-    return nullptr;
-  Node *ClassType = parseType();
-  if (ClassType == nullptr)
-    return nullptr;
-  Node *MemberType = parseType();
-  if (MemberType == nullptr)
-    return nullptr;
-  return make<PointerToMemberType>(ClassType, MemberType);
-}
-
-// <class-enum-type> ::= <name>     # non-dependent type name, dependent type name, or dependent typename-specifier
-//                   ::= Ts <name>  # dependent elaborated type specifier using 'struct' or 'class'
-//                   ::= Tu <name>  # dependent elaborated type specifier using 'union'
-//                   ::= Te <name>  # dependent elaborated type specifier using 'enum'
-Node *Db::parseClassEnumType() {
-  StringView ElabSpef;
-  if (consumeIf("Ts"))
-    ElabSpef = "struct";
-  else if (consumeIf("Tu"))
-    ElabSpef = "union";
-  else if (consumeIf("Te"))
-    ElabSpef = "enum";
-
-  Node *Name = parseName();
-  if (Name == nullptr)
-    return nullptr;
-
-  if (!ElabSpef.empty())
-    return make<ElaboratedTypeSpefType>(ElabSpef, Name);
-
-  return Name;
-}
-
-// <qualified-type>     ::= <qualifiers> <type>
-// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
-// <extended-qualifier> ::= U <source-name> [<template-args>] # vendor extended type qualifier
-Node *Db::parseQualifiedType() {
-  if (consumeIf('U')) {
-    StringView Qual = parseBareSourceName();
-    if (Qual.empty())
-      return nullptr;
-
-    // FIXME parse the optional <template-args> here!
-
-    // extension            ::= U <objc-name> <objc-type>  # objc-type<identifier>
-    if (Qual.startsWith("objcproto")) {
-      StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto"));
-      StringView Proto;
-      {
-        SwapAndRestore<const char *> SaveFirst(First, ProtoSourceName.begin()),
-                                     SaveLast(Last, ProtoSourceName.end());
-        Proto = parseBareSourceName();
-      }
-      if (Proto.empty())
-        return nullptr;
-      Node *Child = parseQualifiedType();
-      if (Child == nullptr)
-        return nullptr;
-      return make<ObjCProtoName>(Child, Proto);
-    }
-
-    Node *Child = parseQualifiedType();
-    if (Child == nullptr)
-      return nullptr;
-    return make<VendorExtQualType>(Child, Qual);
-  }
-
-  Qualifiers Quals = parseCVQualifiers();
-  Node *Ty = parseType();
-  if (Ty == nullptr)
-    return nullptr;
-  if (Quals != QualNone)
-    Ty = make<QualType>(Ty, Quals);
-  return Ty;
-}
-
-// <type>      ::= <builtin-type>
-//             ::= <qualified-type>
-//             ::= <function-type>
-//             ::= <class-enum-type>
-//             ::= <array-type>
-//             ::= <pointer-to-member-type>
-//             ::= <template-param>
-//             ::= <template-template-param> <template-args>
-//             ::= <decltype>
-//             ::= P <type>        # pointer
-//             ::= R <type>        # l-value reference
-//             ::= O <type>        # r-value reference (C++11)
-//             ::= C <type>        # complex pair (C99)
-//             ::= G <type>        # imaginary (C99)
-//             ::= <substitution>  # See Compression below
-// extension   ::= U <objc-name> <objc-type>  # objc-type<identifier>
-// extension   ::= <vector-type> # <vector-type> starts with Dv
-//
-// <objc-name> ::= <k0 number> objcproto <k1 number> <identifier>  # k0 = 9 + <number of digits in k1> + k1
-// <objc-type> ::= <source-name>  # PU<11+>objcproto 11objc_object<source-name> 11objc_object -> id<source-name>
-Node *Db::parseType() {
-  Node *Result = nullptr;
-
-  switch (look()) {
-  //             ::= <qualified-type>
-  case 'r':
-  case 'V':
-  case 'K': {
-    unsigned AfterQuals = 0;
-    if (look(AfterQuals) == 'r') ++AfterQuals;
-    if (look(AfterQuals) == 'V') ++AfterQuals;
-    if (look(AfterQuals) == 'K') ++AfterQuals;
-
-    if (look(AfterQuals) == 'F' ||
-        (look(AfterQuals) == 'D' &&
-         (look(AfterQuals + 1) == 'o' || look(AfterQuals + 1) == 'O' ||
-          look(AfterQuals + 1) == 'w' || look(AfterQuals + 1) == 'x'))) {
-      Result = parseFunctionType();
-      break;
-    }
-    LLVM_FALLTHROUGH;
-  }
-  case 'U': {
-    Result = parseQualifiedType();
-    break;
-  }
-  // <builtin-type> ::= v    # void
-  case 'v':
-    ++First;
-    return make<NameType>("void");
-  //                ::= w    # wchar_t
-  case 'w':
-    ++First;
-    return make<NameType>("wchar_t");
-  //                ::= b    # bool
-  case 'b':
-    ++First;
-    return make<NameType>("bool");
-  //                ::= c    # char
-  case 'c':
-    ++First;
-    return make<NameType>("char");
-  //                ::= a    # signed char
-  case 'a':
-    ++First;
-    return make<NameType>("signed char");
-  //                ::= h    # unsigned char
-  case 'h':
-    ++First;
-    return make<NameType>("unsigned char");
-  //                ::= s    # short
-  case 's':
-    ++First;
-    return make<NameType>("short");
-  //                ::= t    # unsigned short
-  case 't':
-    ++First;
-    return make<NameType>("unsigned short");
-  //                ::= i    # int
-  case 'i':
-    ++First;
-    return make<NameType>("int");
-  //                ::= j    # unsigned int
-  case 'j':
-    ++First;
-    return make<NameType>("unsigned int");
-  //                ::= l    # long
-  case 'l':
-    ++First;
-    return make<NameType>("long");
-  //                ::= m    # unsigned long
-  case 'm':
-    ++First;
-    return make<NameType>("unsigned long");
-  //                ::= x    # long long, __int64
-  case 'x':
-    ++First;
-    return make<NameType>("long long");
-  //                ::= y    # unsigned long long, __int64
-  case 'y':
-    ++First;
-    return make<NameType>("unsigned long long");
-  //                ::= n    # __int128
-  case 'n':
-    ++First;
-    return make<NameType>("__int128");
-  //                ::= o    # unsigned __int128
-  case 'o':
-    ++First;
-    return make<NameType>("unsigned __int128");
-  //                ::= f    # float
-  case 'f':
-    ++First;
-    return make<NameType>("float");
-  //                ::= d    # double
-  case 'd':
-    ++First;
-    return make<NameType>("double");
-  //                ::= e    # long double, __float80
-  case 'e':
-    ++First;
-    return make<NameType>("long double");
-  //                ::= g    # __float128
-  case 'g':
-    ++First;
-    return make<NameType>("__float128");
-  //                ::= z    # ellipsis
-  case 'z':
-    ++First;
-    return make<NameType>("...");
-
-  // <builtin-type> ::= u <source-name>    # vendor extended type
-  case 'u': {
-    ++First;
-    StringView Res = parseBareSourceName();
-    if (Res.empty())
-      return nullptr;
-    return make<NameType>(Res);
-  }
-  case 'D':
-    switch (look(1)) {
-    //                ::= Dd   # IEEE 754r decimal floating point (64 bits)
-    case 'd':
-      First += 2;
-      return make<NameType>("decimal64");
-    //                ::= De   # IEEE 754r decimal floating point (128 bits)
-    case 'e':
-      First += 2;
-      return make<NameType>("decimal128");
-    //                ::= Df   # IEEE 754r decimal floating point (32 bits)
-    case 'f':
-      First += 2;
-      return make<NameType>("decimal32");
-    //                ::= Dh   # IEEE 754r half-precision floating point (16 bits)
-    case 'h':
-      First += 2;
-      return make<NameType>("decimal16");
-    //                ::= Di   # char32_t
-    case 'i':
-      First += 2;
-      return make<NameType>("char32_t");
-    //                ::= Ds   # char16_t
-    case 's':
-      First += 2;
-      return make<NameType>("char16_t");
-    //                ::= Da   # auto (in dependent new-expressions)
-    case 'a':
-      First += 2;
-      return make<NameType>("auto");
-    //                ::= Dc   # decltype(auto)
-    case 'c':
-      First += 2;
-      return make<NameType>("decltype(auto)");
-    //                ::= Dn   # std::nullptr_t (i.e., decltype(nullptr))
-    case 'n':
-      First += 2;
-      return make<NameType>("std::nullptr_t");
-
-    //             ::= <decltype>
-    case 't':
-    case 'T': {
-      Result = parseDecltype();
-      break;
-    }
-    // extension   ::= <vector-type> # <vector-type> starts with Dv
-    case 'v': {
-      Result = parseVectorType();
-      break;
-    }
-    //           ::= Dp <type>       # pack expansion (C++0x)
-    case 'p': {
-      First += 2;
-      Node *Child = parseType();
-      if (!Child)
-        return nullptr;
-      Result = make<ParameterPackExpansion>(Child);
-      break;
-    }
-    // Exception specifier on a function type.
-    case 'o':
-    case 'O':
-    case 'w':
-    // Transaction safe function type.
-    case 'x':
-      Result = parseFunctionType();
-      break;
-    }
-    break;
-  //             ::= <function-type>
-  case 'F': {
-    Result = parseFunctionType();
-    break;
-  }
-  //             ::= <array-type>
-  case 'A': {
-    Result = parseArrayType();
-    break;
-  }
-  //             ::= <pointer-to-member-type>
-  case 'M': {
-    Result = parsePointerToMemberType();
-    break;
-  }
-  //             ::= <template-param>
-  case 'T': {
-    // This could be an elaborate type specifier on a <class-enum-type>.
-    if (look(1) == 's' || look(1) == 'u' || look(1) == 'e') {
-      Result = parseClassEnumType();
-      break;
-    }
-
-    Result = parseTemplateParam();
-    if (Result == nullptr)
-      return nullptr;
-
-    // Result could be either of:
-    //   <type>        ::= <template-param>
-    //   <type>        ::= <template-template-param> <template-args>
-    //
-    //   <template-template-param> ::= <template-param>
-    //                             ::= <substitution>
-    //
-    // If this is followed by some <template-args>, and we're permitted to
-    // parse them, take the second production.
-
-    if (TryToParseTemplateArgs && look() == 'I') {
-      Node *TA = parseTemplateArgs();
-      if (TA == nullptr)
-        return nullptr;
-      Result = make<NameWithTemplateArgs>(Result, TA);
-    }
-    break;
-  }
-  //             ::= P <type>        # pointer
-  case 'P': {
-    ++First;
-    Node *Ptr = parseType();
-    if (Ptr == nullptr)
-      return nullptr;
-    Result = make<PointerType>(Ptr);
-    break;
-  }
-  //             ::= R <type>        # l-value reference
-  case 'R': {
-    ++First;
-    Node *Ref = parseType();
-    if (Ref == nullptr)
-      return nullptr;
-    Result = make<ReferenceType>(Ref, ReferenceKind::LValue);
-    break;
-  }
-  //             ::= O <type>        # r-value reference (C++11)
-  case 'O': {
-    ++First;
-    Node *Ref = parseType();
-    if (Ref == nullptr)
-      return nullptr;
-    Result = make<ReferenceType>(Ref, ReferenceKind::RValue);
-    break;
-  }
-  //             ::= C <type>        # complex pair (C99)
-  case 'C': {
-    ++First;
-    Node *P = parseType();
-    if (P == nullptr)
-      return nullptr;
-    Result = make<PostfixQualifiedType>(P, " complex");
-    break;
-  }
-  //             ::= G <type>        # imaginary (C99)
-  case 'G': {
-    ++First;
-    Node *P = parseType();
-    if (P == nullptr)
-      return P;
-    Result = make<PostfixQualifiedType>(P, " imaginary");
-    break;
-  }
-  //             ::= <substitution>  # See Compression below
-  case 'S': {
-    if (look(1) && look(1) != 't') {
-      Node *Sub = parseSubstitution();
-      if (Sub == nullptr)
-        return nullptr;
-
-      // Sub could be either of:
-      //   <type>        ::= <substitution>
-      //   <type>        ::= <template-template-param> <template-args>
-      //
-      //   <template-template-param> ::= <template-param>
-      //                             ::= <substitution>
-      //
-      // If this is followed by some <template-args>, and we're permitted to
-      // parse them, take the second production.
-
-      if (TryToParseTemplateArgs && look() == 'I') {
-        Node *TA = parseTemplateArgs();
-        if (TA == nullptr)
-          return nullptr;
-        Result = make<NameWithTemplateArgs>(Sub, TA);
-        break;
-      }
-
-      // If all we parsed was a substitution, don't re-insert into the
-      // substitution table.
-      return Sub;
-    }
-    LLVM_FALLTHROUGH;
-  }
-  //        ::= <class-enum-type>
-  default: {
-    Result = parseClassEnumType();
-    break;
-  }
-  }
-
-  // If we parsed a type, insert it into the substitution table. Note that all
-  // <builtin-type>s and <substitution>s have already bailed out, because they
-  // don't get substitutions.
-  if (Result != nullptr)
-    Subs.push_back(Result);
-  return Result;
-}
-
-Node *Db::parsePrefixExpr(StringView Kind) {
-  Node *E = parseExpr();
-  if (E == nullptr)
-    return nullptr;
-  return make<PrefixExpr>(Kind, E);
-}
-
-Node *Db::parseBinaryExpr(StringView Kind) {
-  Node *LHS = parseExpr();
-  if (LHS == nullptr)
-    return nullptr;
-  Node *RHS = parseExpr();
-  if (RHS == nullptr)
-    return nullptr;
-  return make<BinaryExpr>(LHS, Kind, RHS);
-}
-
-Node *Db::parseIntegerLiteral(StringView Lit) {
-  StringView Tmp = parseNumber(true);
-  if (!Tmp.empty() && consumeIf('E'))
-    return make<IntegerExpr>(Lit, Tmp);
-  return nullptr;
-}
-
-// <CV-Qualifiers> ::= [r] [V] [K]
-Qualifiers Db::parseCVQualifiers() {
-  Qualifiers CVR = QualNone;
-  if (consumeIf('r'))
-    addQualifiers(CVR, QualRestrict);
-  if (consumeIf('V'))
-    addQualifiers(CVR, QualVolatile);
-  if (consumeIf('K'))
-    addQualifiers(CVR, QualConst);
-  return CVR;
-}
-
-// <function-param> ::= fp <top-level CV-Qualifiers> _                                     # L == 0, first parameter
-//                  ::= fp <top-level CV-Qualifiers> <parameter-2 non-negative number> _   # L == 0, second and later parameters
-//                  ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> _         # L > 0, first parameter
-//                  ::= fL <L-1 non-negative number> p <top-level CV-Qualifiers> <parameter-2 non-negative number> _   # L > 0, second and later parameters
-Node *Db::parseFunctionParam() {
-  if (consumeIf("fp")) {
-    parseCVQualifiers();
-    StringView Num = parseNumber();
-    if (!consumeIf('_'))
-      return nullptr;
-    return make<FunctionParam>(Num);
-  }
-  if (consumeIf("fL")) {
-    if (parseNumber().empty())
-      return nullptr;
-    if (!consumeIf('p'))
-      return nullptr;
-    parseCVQualifiers();
-    StringView Num = parseNumber();
-    if (!consumeIf('_'))
-      return nullptr;
-    return make<FunctionParam>(Num);
-  }
-  return nullptr;
-}
-
-// [gs] nw <expression>* _ <type> E                     # new (expr-list) type
-// [gs] nw <expression>* _ <type> <initializer>         # new (expr-list) type (init)
-// [gs] na <expression>* _ <type> E                     # new[] (expr-list) type
-// [gs] na <expression>* _ <type> <initializer>         # new[] (expr-list) type (init)
-// <initializer> ::= pi <expression>* E                 # parenthesized initialization
-Node *Db::parseNewExpr() {
-  bool Global = consumeIf("gs");
-  bool IsArray = look(1) == 'a';
-  if (!consumeIf("nw") && !consumeIf("na"))
-    return nullptr;
-  size_t Exprs = Names.size();
-  while (!consumeIf('_')) {
-    Node *Ex = parseExpr();
-    if (Ex == nullptr)
-      return nullptr;
-    Names.push_back(Ex);
-  }
-  NodeArray ExprList = popTrailingNodeArray(Exprs);
-  Node *Ty = parseType();
-  if (Ty == nullptr)
-    return Ty;
-  if (consumeIf("pi")) {
-    size_t InitsBegin = Names.size();
-    while (!consumeIf('E')) {
-      Node *Init = parseExpr();
-      if (Init == nullptr)
-        return Init;
-      Names.push_back(Init);
-    }
-    NodeArray Inits = popTrailingNodeArray(InitsBegin);
-    return make<NewExpr>(ExprList, Ty, Inits, Global, IsArray);
-  } else if (!consumeIf('E'))
-    return nullptr;
-  return make<NewExpr>(ExprList, Ty, NodeArray(), Global, IsArray);
-}
-
-// cv <type> <expression>                               # conversion with one argument
-// cv <type> _ <expression>* E                          # conversion with a different number of arguments
-Node *Db::parseConversionExpr() {
-  if (!consumeIf("cv"))
-    return nullptr;
-  Node *Ty;
-  {
-    SwapAndRestore<bool> SaveTemp(TryToParseTemplateArgs, false);
-    Ty = parseType();
-  }
-
-  if (Ty == nullptr)
-    return nullptr;
-
-  if (consumeIf('_')) {
-    size_t ExprsBegin = Names.size();
-    while (!consumeIf('E')) {
-      Node *E = parseExpr();
-      if (E == nullptr)
-        return E;
-      Names.push_back(E);
-    }
-    NodeArray Exprs = popTrailingNodeArray(ExprsBegin);
-    return make<ConversionExpr>(Ty, Exprs);
-  }
-
-  Node *E[1] = {parseExpr()};
-  if (E[0] == nullptr)
-    return nullptr;
-  return make<ConversionExpr>(Ty, makeNodeArray(E, E + 1));
-}
-
-// <expr-primary> ::= L <type> <value number> E                          # integer literal
-//                ::= L <type> <value float> E                           # floating literal
-//                ::= L <string type> E                                  # string literal
-//                ::= L <nullptr type> E                                 # nullptr literal (i.e., "LDnE")
-// FIXME:         ::= L <type> <real-part float> _ <imag-part float> E   # complex floating point literal (C 2000)
-//                ::= L <mangled-name> E                                 # external name
-Node *Db::parseExprPrimary() {
-  if (!consumeIf('L'))
-    return nullptr;
-  switch (look()) {
-  case 'w':
-    ++First;
-    return parseIntegerLiteral("wchar_t");
-  case 'b':
-    if (consumeIf("b0E"))
-      return make<BoolExpr>(0);
-    if (consumeIf("b1E"))
-      return make<BoolExpr>(1);
-    return nullptr;
-  case 'c':
-    ++First;
-    return parseIntegerLiteral("char");
-  case 'a':
-    ++First;
-    return parseIntegerLiteral("signed char");
-  case 'h':
-    ++First;
-    return parseIntegerLiteral("unsigned char");
-  case 's':
-    ++First;
-    return parseIntegerLiteral("short");
-  case 't':
-    ++First;
-    return parseIntegerLiteral("unsigned short");
-  case 'i':
-    ++First;
-    return parseIntegerLiteral("");
-  case 'j':
-    ++First;
-    return parseIntegerLiteral("u");
-  case 'l':
-    ++First;
-    return parseIntegerLiteral("l");
-  case 'm':
-    ++First;
-    return parseIntegerLiteral("ul");
-  case 'x':
-    ++First;
-    return parseIntegerLiteral("ll");
-  case 'y':
-    ++First;
-    return parseIntegerLiteral("ull");
-  case 'n':
-    ++First;
-    return parseIntegerLiteral("__int128");
-  case 'o':
-    ++First;
-    return parseIntegerLiteral("unsigned __int128");
-  case 'f':
-    ++First;
-    return parseFloatingLiteral<float>();
-  case 'd':
-    ++First;
-    return parseFloatingLiteral<double>();
-  case 'e':
-    ++First;
-    return parseFloatingLiteral<long double>();
-  case '_':
-    if (consumeIf("_Z")) {
-      Node *R = parseEncoding();
-      if (R != nullptr && consumeIf('E'))
-        return R;
-    }
-    return nullptr;
-  case 'T':
-    // Invalid mangled name per
-    //   http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html
-    return nullptr;
-  default: {
-    // might be named type
-    Node *T = parseType();
-    if (T == nullptr)
-      return nullptr;
-    StringView N = parseNumber();
-    if (!N.empty()) {
-      if (!consumeIf('E'))
-        return nullptr;
-      return make<IntegerCastExpr>(T, N);
-    }
-    if (consumeIf('E'))
-      return T;
-    return nullptr;
-  }
-  }
-}
-
-// <braced-expression> ::= <expression>
-//                     ::= di <field source-name> <braced-expression>    # .name = expr
-//                     ::= dx <index expression> <braced-expression>     # [expr] = expr
-//                     ::= dX <range begin expression> <range end expression> <braced-expression>
-Node *Db::parseBracedExpr() {
-  if (look() == 'd') {
-    switch (look(1)) {
-    case 'i': {
-      First += 2;
-      Node *Field = parseSourceName(/*NameState=*/nullptr);
-      if (Field == nullptr)
-        return nullptr;
-      Node *Init = parseBracedExpr();
-      if (Init == nullptr)
-        return nullptr;
-      return make<BracedExpr>(Field, Init, /*isArray=*/false);
-    }
-    case 'x': {
-      First += 2;
-      Node *Index = parseExpr();
-      if (Index == nullptr)
-        return nullptr;
-      Node *Init = parseBracedExpr();
-      if (Init == nullptr)
-        return nullptr;
-      return make<BracedExpr>(Index, Init, /*isArray=*/true);
-    }
-    case 'X': {
-      First += 2;
-      Node *RangeBegin = parseExpr();
-      if (RangeBegin == nullptr)
-        return nullptr;
-      Node *RangeEnd = parseExpr();
-      if (RangeEnd == nullptr)
-        return nullptr;
-      Node *Init = parseBracedExpr();
-      if (Init == nullptr)
-        return nullptr;
-      return make<BracedRangeExpr>(RangeBegin, RangeEnd, Init);
-    }
-    }
-  }
-  return parseExpr();
-}
-
-// (not yet in the spec)
-// <fold-expr> ::= fL <binary-operator-name> <expression> <expression>
-//             ::= fR <binary-operator-name> <expression> <expression>
-//             ::= fl <binary-operator-name> <expression>
-//             ::= fr <binary-operator-name> <expression>
-Node *Db::parseFoldExpr() {
-  if (!consumeIf('f'))
-    return nullptr;
-
-  char FoldKind = look();
-  bool IsLeftFold, HasInitializer;
-  HasInitializer = FoldKind == 'L' || FoldKind == 'R';
-  if (FoldKind == 'l' || FoldKind == 'L')
-    IsLeftFold = true;
-  else if (FoldKind == 'r' || FoldKind == 'R')
-    IsLeftFold = false;
-  else
-    return nullptr;
-  ++First;
-
-  // FIXME: This map is duplicated in parseOperatorName and parseExpr.
-  StringView OperatorName;
-  if      (consumeIf("aa")) OperatorName = "&&";
-  else if (consumeIf("an")) OperatorName = "&";
-  else if (consumeIf("aN")) OperatorName = "&=";
-  else if (consumeIf("aS")) OperatorName = "=";
-  else if (consumeIf("cm")) OperatorName = ",";
-  else if (consumeIf("ds")) OperatorName = ".*";
-  else if (consumeIf("dv")) OperatorName = "/";
-  else if (consumeIf("dV")) OperatorName = "/=";
-  else if (consumeIf("eo")) OperatorName = "^";
-  else if (consumeIf("eO")) OperatorName = "^=";
-  else if (consumeIf("eq")) OperatorName = "==";
-  else if (consumeIf("ge")) OperatorName = ">=";
-  else if (consumeIf("gt")) OperatorName = ">";
-  else if (consumeIf("le")) OperatorName = "<=";
-  else if (consumeIf("ls")) OperatorName = "<<";
-  else if (consumeIf("lS")) OperatorName = "<<=";
-  else if (consumeIf("lt")) OperatorName = "<";
-  else if (consumeIf("mi")) OperatorName = "-";
-  else if (consumeIf("mI")) OperatorName = "-=";
-  else if (consumeIf("ml")) OperatorName = "*";
-  else if (consumeIf("mL")) OperatorName = "*=";
-  else if (consumeIf("ne")) OperatorName = "!=";
-  else if (consumeIf("oo")) OperatorName = "||";
-  else if (consumeIf("or")) OperatorName = "|";
-  else if (consumeIf("oR")) OperatorName = "|=";
-  else if (consumeIf("pl")) OperatorName = "+";
-  else if (consumeIf("pL")) OperatorName = "+=";
-  else if (consumeIf("rm")) OperatorName = "%";
-  else if (consumeIf("rM")) OperatorName = "%=";
-  else if (consumeIf("rs")) OperatorName = ">>";
-  else if (consumeIf("rS")) OperatorName = ">>=";
-  else return nullptr;
-
-  Node *Pack = parseExpr(), *Init = nullptr;
-  if (Pack == nullptr)
-    return nullptr;
-  if (HasInitializer) {
-    Init = parseExpr();
-    if (Init == nullptr)
-      return nullptr;
-  }
-
-  if (IsLeftFold && Init)
-    std::swap(Pack, Init);
-
-  return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init);
-}
-
-// <expression> ::= <unary operator-name> <expression>
-//              ::= <binary operator-name> <expression> <expression>
-//              ::= <ternary operator-name> <expression> <expression> <expression>
-//              ::= cl <expression>+ E                                   # call
-//              ::= cv <type> <expression>                               # conversion with one argument
-//              ::= cv <type> _ <expression>* E                          # conversion with a different number of arguments
-//              ::= [gs] nw <expression>* _ <type> E                     # new (expr-list) type
-//              ::= [gs] nw <expression>* _ <type> <initializer>         # new (expr-list) type (init)
-//              ::= [gs] na <expression>* _ <type> E                     # new[] (expr-list) type
-//              ::= [gs] na <expression>* _ <type> <initializer>         # new[] (expr-list) type (init)
-//              ::= [gs] dl <expression>                                 # delete expression
-//              ::= [gs] da <expression>                                 # delete[] expression
-//              ::= pp_ <expression>                                     # prefix ++
-//              ::= mm_ <expression>                                     # prefix --
-//              ::= ti <type>                                            # typeid (type)
-//              ::= te <expression>                                      # typeid (expression)
-//              ::= dc <type> <expression>                               # dynamic_cast<type> (expression)
-//              ::= sc <type> <expression>                               # static_cast<type> (expression)
-//              ::= cc <type> <expression>                               # const_cast<type> (expression)
-//              ::= rc <type> <expression>                               # reinterpret_cast<type> (expression)
-//              ::= st <type>                                            # sizeof (a type)
-//              ::= sz <expression>                                      # sizeof (an expression)
-//              ::= at <type>                                            # alignof (a type)
-//              ::= az <expression>                                      # alignof (an expression)
-//              ::= nx <expression>                                      # noexcept (expression)
-//              ::= <template-param>
-//              ::= <function-param>
-//              ::= dt <expression> <unresolved-name>                    # expr.name
-//              ::= pt <expression> <unresolved-name>                    # expr->name
-//              ::= ds <expression> <expression>                         # expr.*expr
-//              ::= sZ <template-param>                                  # size of a parameter pack
-//              ::= sZ <function-param>                                  # size of a function parameter pack
-//              ::= sP <template-arg>* E                                 # sizeof...(T), size of a captured template parameter pack from an alias template
-//              ::= sp <expression>                                      # pack expansion
-//              ::= tw <expression>                                      # throw expression
-//              ::= tr                                                   # throw with no operand (rethrow)
-//              ::= <unresolved-name>                                    # f(p), N::f(p), ::f(p),
-//                                                                       # freestanding dependent name (e.g., T::x),
-//                                                                       # objectless nonstatic member reference
-//              ::= fL <binary-operator-name> <expression> <expression>
-//              ::= fR <binary-operator-name> <expression> <expression>
-//              ::= fl <binary-operator-name> <expression>
-//              ::= fr <binary-operator-name> <expression>
-//              ::= <expr-primary>
-Node *Db::parseExpr() {
-  bool Global = consumeIf("gs");
-  if (numLeft() < 2)
-    return nullptr;
-
-  switch (*First) {
-  case 'L':
-    return parseExprPrimary();
-  case 'T':
-    return parseTemplateParam();
-  case 'f': {
-    // Disambiguate a fold expression from a <function-param>.
-    if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2))))
-      return parseFunctionParam();
-    return parseFoldExpr();
-  }
-  case 'a':
-    switch (First[1]) {
-    case 'a':
-      First += 2;
-      return parseBinaryExpr("&&");
-    case 'd':
-      First += 2;
-      return parsePrefixExpr("&");
-    case 'n':
-      First += 2;
-      return parseBinaryExpr("&");
-    case 'N':
-      First += 2;
-      return parseBinaryExpr("&=");
-    case 'S':
-      First += 2;
-      return parseBinaryExpr("=");
-    case 't': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      return make<EnclosingExpr>("alignof (", Ty, ")");
-    }
-    case 'z': {
-      First += 2;
-      Node *Ty = parseExpr();
-      if (Ty == nullptr)
-        return nullptr;
-      return make<EnclosingExpr>("alignof (", Ty, ")");
-    }
-    }
-    return nullptr;
-  case 'c':
-    switch (First[1]) {
-    // cc <type> <expression>                               # const_cast<type>(expression)
-    case 'c': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return Ty;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<CastExpr>("const_cast", Ty, Ex);
-    }
-    // cl <expression>+ E                                   # call
-    case 'l': {
-      First += 2;
-      Node *Callee = parseExpr();
-      if (Callee == nullptr)
-        return Callee;
-      size_t ExprsBegin = Names.size();
-      while (!consumeIf('E')) {
-        Node *E = parseExpr();
-        if (E == nullptr)
-          return E;
-        Names.push_back(E);
-      }
-      return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin));
-    }
-    case 'm':
-      First += 2;
-      return parseBinaryExpr(",");
-    case 'o':
-      First += 2;
-      return parsePrefixExpr("~");
-    case 'v':
-      return parseConversionExpr();
-    }
-    return nullptr;
-  case 'd':
-    switch (First[1]) {
-    case 'a': {
-      First += 2;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<DeleteExpr>(Ex, Global, /*is_array=*/true);
-    }
-    case 'c': {
-      First += 2;
-      Node *T = parseType();
-      if (T == nullptr)
-        return T;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<CastExpr>("dynamic_cast", T, Ex);
-    }
-    case 'e':
-      First += 2;
-      return parsePrefixExpr("*");
-    case 'l': {
-      First += 2;
-      Node *E = parseExpr();
-      if (E == nullptr)
-        return E;
-      return make<DeleteExpr>(E, Global, /*is_array=*/false);
-    }
-    case 'n':
-      return parseUnresolvedName();
-    case 's': {
-      First += 2;
-      Node *LHS = parseExpr();
-      if (LHS == nullptr)
-        return nullptr;
-      Node *RHS = parseExpr();
-      if (RHS == nullptr)
-        return nullptr;
-      return make<MemberExpr>(LHS, ".*", RHS);
-    }
-    case 't': {
-      First += 2;
-      Node *LHS = parseExpr();
-      if (LHS == nullptr)
-        return LHS;
-      Node *RHS = parseExpr();
-      if (RHS == nullptr)
-        return nullptr;
-      return make<MemberExpr>(LHS, ".", RHS);
-    }
-    case 'v':
-      First += 2;
-      return parseBinaryExpr("/");
-    case 'V':
-      First += 2;
-      return parseBinaryExpr("/=");
-    }
-    return nullptr;
-  case 'e':
-    switch (First[1]) {
-    case 'o':
-      First += 2;
-      return parseBinaryExpr("^");
-    case 'O':
-      First += 2;
-      return parseBinaryExpr("^=");
-    case 'q':
-      First += 2;
-      return parseBinaryExpr("==");
-    }
-    return nullptr;
-  case 'g':
-    switch (First[1]) {
-    case 'e':
-      First += 2;
-      return parseBinaryExpr(">=");
-    case 't':
-      First += 2;
-      return parseBinaryExpr(">");
-    }
-    return nullptr;
-  case 'i':
-    switch (First[1]) {
-    case 'x': {
-      First += 2;
-      Node *Base = parseExpr();
-      if (Base == nullptr)
-        return nullptr;
-      Node *Index = parseExpr();
-      if (Index == nullptr)
-        return Index;
-      return make<ArraySubscriptExpr>(Base, Index);
-    }
-    case 'l': {
-      First += 2;
-      size_t InitsBegin = Names.size();
-      while (!consumeIf('E')) {
-        Node *E = parseBracedExpr();
-        if (E == nullptr)
-          return nullptr;
-        Names.push_back(E);
-      }
-      return make<InitListExpr>(nullptr, popTrailingNodeArray(InitsBegin));
-    }
-    }
-    return nullptr;
-  case 'l':
-    switch (First[1]) {
-    case 'e':
-      First += 2;
-      return parseBinaryExpr("<=");
-    case 's':
-      First += 2;
-      return parseBinaryExpr("<<");
-    case 'S':
-      First += 2;
-      return parseBinaryExpr("<<=");
-    case 't':
-      First += 2;
-      return parseBinaryExpr("<");
-    }
-    return nullptr;
-  case 'm':
-    switch (First[1]) {
-    case 'i':
-      First += 2;
-      return parseBinaryExpr("-");
-    case 'I':
-      First += 2;
-      return parseBinaryExpr("-=");
-    case 'l':
-      First += 2;
-      return parseBinaryExpr("*");
-    case 'L':
-      First += 2;
-      return parseBinaryExpr("*=");
-    case 'm':
-      First += 2;
-      if (consumeIf('_'))
-        return parsePrefixExpr("--");
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return nullptr;
-      return make<PostfixExpr>(Ex, "--");
-    }
-    return nullptr;
-  case 'n':
-    switch (First[1]) {
-    case 'a':
-    case 'w':
-      return parseNewExpr();
-    case 'e':
-      First += 2;
-      return parseBinaryExpr("!=");
-    case 'g':
-      First += 2;
-      return parsePrefixExpr("-");
-    case 't':
-      First += 2;
-      return parsePrefixExpr("!");
-    case 'x':
-      First += 2;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<EnclosingExpr>("noexcept (", Ex, ")");
-    }
-    return nullptr;
-  case 'o':
-    switch (First[1]) {
-    case 'n':
-      return parseUnresolvedName();
-    case 'o':
-      First += 2;
-      return parseBinaryExpr("||");
-    case 'r':
-      First += 2;
-      return parseBinaryExpr("|");
-    case 'R':
-      First += 2;
-      return parseBinaryExpr("|=");
-    }
-    return nullptr;
-  case 'p':
-    switch (First[1]) {
-    case 'm':
-      First += 2;
-      return parseBinaryExpr("->*");
-    case 'l':
-      First += 2;
-      return parseBinaryExpr("+");
-    case 'L':
-      First += 2;
-      return parseBinaryExpr("+=");
-    case 'p': {
-      First += 2;
-      if (consumeIf('_'))
-        return parsePrefixExpr("++");
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<PostfixExpr>(Ex, "++");
-    }
-    case 's':
-      First += 2;
-      return parsePrefixExpr("+");
-    case 't': {
-      First += 2;
-      Node *L = parseExpr();
-      if (L == nullptr)
-        return nullptr;
-      Node *R = parseExpr();
-      if (R == nullptr)
-        return nullptr;
-      return make<MemberExpr>(L, "->", R);
-    }
-    }
-    return nullptr;
-  case 'q':
-    if (First[1] == 'u') {
-      First += 2;
-      Node *Cond = parseExpr();
-      if (Cond == nullptr)
-        return nullptr;
-      Node *LHS = parseExpr();
-      if (LHS == nullptr)
-        return nullptr;
-      Node *RHS = parseExpr();
-      if (RHS == nullptr)
-        return nullptr;
-      return make<ConditionalExpr>(Cond, LHS, RHS);
-    }
-    return nullptr;
-  case 'r':
-    switch (First[1]) {
-    case 'c': {
-      First += 2;
-      Node *T = parseType();
-      if (T == nullptr)
-        return T;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<CastExpr>("reinterpret_cast", T, Ex);
-    }
-    case 'm':
-      First += 2;
-      return parseBinaryExpr("%");
-    case 'M':
-      First += 2;
-      return parseBinaryExpr("%=");
-    case 's':
-      First += 2;
-      return parseBinaryExpr(">>");
-    case 'S':
-      First += 2;
-      return parseBinaryExpr(">>=");
-    }
-    return nullptr;
-  case 's':
-    switch (First[1]) {
-    case 'c': {
-      First += 2;
-      Node *T = parseType();
-      if (T == nullptr)
-        return T;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<CastExpr>("static_cast", T, Ex);
-    }
-    case 'p': {
-      First += 2;
-      Node *Child = parseExpr();
-      if (Child == nullptr)
-        return nullptr;
-      return make<ParameterPackExpansion>(Child);
-    }
-    case 'r':
-      return parseUnresolvedName();
-    case 't': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return Ty;
-      return make<EnclosingExpr>("sizeof (", Ty, ")");
-    }
-    case 'z': {
-      First += 2;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<EnclosingExpr>("sizeof (", Ex, ")");
-    }
-    case 'Z':
-      First += 2;
-      if (look() == 'T') {
-        Node *R = parseTemplateParam();
-        if (R == nullptr)
-          return nullptr;
-        return make<SizeofParamPackExpr>(R);
-      } else if (look() == 'f') {
-        Node *FP = parseFunctionParam();
-        if (FP == nullptr)
-          return nullptr;
-        return make<EnclosingExpr>("sizeof... (", FP, ")");
-      }
-      return nullptr;
-    case 'P': {
-      First += 2;
-      size_t ArgsBegin = Names.size();
-      while (!consumeIf('E')) {
-        Node *Arg = parseTemplateArg();
-        if (Arg == nullptr)
-          return nullptr;
-        Names.push_back(Arg);
-      }
-      return make<EnclosingExpr>(
-          "sizeof... (", make<NodeArrayNode>(popTrailingNodeArray(ArgsBegin)),
-          ")");
-    }
-    }
-    return nullptr;
-  case 't':
-    switch (First[1]) {
-    case 'e': {
-      First += 2;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return Ex;
-      return make<EnclosingExpr>("typeid (", Ex, ")");
-    }
-    case 'i': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return Ty;
-      return make<EnclosingExpr>("typeid (", Ty, ")");
-    }
-    case 'l': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      size_t InitsBegin = Names.size();
-      while (!consumeIf('E')) {
-        Node *E = parseBracedExpr();
-        if (E == nullptr)
-          return nullptr;
-        Names.push_back(E);
-      }
-      return make<InitListExpr>(Ty, popTrailingNodeArray(InitsBegin));
-    }
-    case 'r':
-      First += 2;
-      return make<NameType>("throw");
-    case 'w': {
-      First += 2;
-      Node *Ex = parseExpr();
-      if (Ex == nullptr)
-        return nullptr;
-      return make<ThrowExpr>(Ex);
-    }
-    }
-    return nullptr;
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9':
-    return parseUnresolvedName();
-  }
-  return nullptr;
-}
-
-// <call-offset> ::= h <nv-offset> _
-//               ::= v <v-offset> _
-//
-// <nv-offset> ::= <offset number>
-//               # non-virtual base override
-//
-// <v-offset>  ::= <offset number> _ <virtual offset number>
-//               # virtual base override, with vcall offset
-bool Db::parseCallOffset() {
-  // Just scan through the call offset, we never add this information into the
-  // output.
-  if (consumeIf('h'))
-    return parseNumber(true).empty() || !consumeIf('_');
-  if (consumeIf('v'))
-    return parseNumber(true).empty() || !consumeIf('_') ||
-           parseNumber(true).empty() || !consumeIf('_');
-  return true;
-}
-
-// <special-name> ::= TV <type>    # virtual table
-//                ::= TT <type>    # VTT structure (construction vtable index)
-//                ::= TI <type>    # typeinfo structure
-//                ::= TS <type>    # typeinfo name (null-terminated byte string)
-//                ::= Tc <call-offset> <call-offset> <base encoding>
-//                    # base is the nominal target function of thunk
-//                    # first call-offset is 'this' adjustment
-//                    # second call-offset is result adjustment
-//                ::= T <call-offset> <base encoding>
-//                    # base is the nominal target function of thunk
-//                ::= GV <object name> # Guard variable for one-time initialization
-//                                     # No <type>
-//                ::= TW <object name> # Thread-local wrapper
-//                ::= TH <object name> # Thread-local initialization
-//                ::= GR <object name> _             # First temporary
-//                ::= GR <object name> <seq-id> _    # Subsequent temporaries
-//      extension ::= TC <first type> <number> _ <second type> # construction vtable for second-in-first
-//      extension ::= GR <object name> # reference temporary for object
-Node *Db::parseSpecialName() {
-  switch (look()) {
-  case 'T':
-    switch (look(1)) {
-    // TV <type>    # virtual table
-    case 'V': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      return make<SpecialName>("vtable for ", Ty);
-    }
-    // TT <type>    # VTT structure (construction vtable index)
-    case 'T': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      return make<SpecialName>("VTT for ", Ty);
-    }
-    // TI <type>    # typeinfo structure
-    case 'I': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      return make<SpecialName>("typeinfo for ", Ty);
-    }
-    // TS <type>    # typeinfo name (null-terminated byte string)
-    case 'S': {
-      First += 2;
-      Node *Ty = parseType();
-      if (Ty == nullptr)
-        return nullptr;
-      return make<SpecialName>("typeinfo name for ", Ty);
-    }
-    // Tc <call-offset> <call-offset> <base encoding>
-    case 'c': {
-      First += 2;
-      if (parseCallOffset() || parseCallOffset())
-        return nullptr;
-      Node *Encoding = parseEncoding();
-      if (Encoding == nullptr)
-        return nullptr;
-      return make<SpecialName>("covariant return thunk to ", Encoding);
-    }
-    // extension ::= TC <first type> <number> _ <second type>
-    //               # construction vtable for second-in-first
-    case 'C': {
-      First += 2;
-      Node *FirstType = parseType();
-      if (FirstType == nullptr)
-        return nullptr;
-      if (parseNumber(true).empty() || !consumeIf('_'))
-        return nullptr;
-      Node *SecondType = parseType();
-      if (SecondType == nullptr)
-        return nullptr;
-      return make<CtorVtableSpecialName>(SecondType, FirstType);
-    }
-    // TW <object name> # Thread-local wrapper
-    case 'W': {
-      First += 2;
-      Node *Name = parseName();
-      if (Name == nullptr)
-        return nullptr;
-      return make<SpecialName>("thread-local wrapper routine for ", Name);
-    }
-    // TH <object name> # Thread-local initialization
-    case 'H': {
-      First += 2;
-      Node *Name = parseName();
-      if (Name == nullptr)
-        return nullptr;
-      return make<SpecialName>("thread-local initialization routine for ", Name);
-    }
-    // T <call-offset> <base encoding>
-    default: {
-      ++First;
-      bool IsVirt = look() == 'v';
-      if (parseCallOffset())
-        return nullptr;
-      Node *BaseEncoding = parseEncoding();
-      if (BaseEncoding == nullptr)
-        return nullptr;
-      if (IsVirt)
-        return make<SpecialName>("virtual thunk to ", BaseEncoding);
-      else
-        return make<SpecialName>("non-virtual thunk to ", BaseEncoding);
-    }
-    }
-  case 'G':
-    switch (look(1)) {
-    // GV <object name> # Guard variable for one-time initialization
-    case 'V': {
-      First += 2;
-      Node *Name = parseName();
-      if (Name == nullptr)
-        return nullptr;
-      return make<SpecialName>("guard variable for ", Name);
-    }
-    // GR <object name> # reference temporary for object
-    // GR <object name> _             # First temporary
-    // GR <object name> <seq-id> _    # Subsequent temporaries
-    case 'R': {
-      First += 2;
-      Node *Name = parseName();
-      if (Name == nullptr)
-        return nullptr;
-      size_t Count;
-      bool ParsedSeqId = !parseSeqId(&Count);
-      if (!consumeIf('_') && ParsedSeqId)
-        return nullptr;
-      return make<SpecialName>("reference temporary for ", Name);
-    }
-    }
-  }
-  return nullptr;
-}
-
-// <encoding> ::= <function name> <bare-function-type>
-//            ::= <data name>
-//            ::= <special-name>
-Node *Db::parseEncoding() {
-  if (look() == 'G' || look() == 'T')
-    return parseSpecialName();
-
-  auto IsEndOfEncoding = [&] {
-    // The set of chars that can potentially follow an <encoding> (none of which
-    // can start a <type>). Enumerating these allows us to avoid speculative
-    // parsing.
-    return numLeft() == 0 || look() == 'E' || look() == '.' || look() == '_';
-  };
-
-  NameState NameInfo(this);
-  Node *Name = parseName(&NameInfo);
-  if (Name == nullptr)
-    return nullptr;
-
-  if (resolveForwardTemplateRefs(NameInfo))
-    return nullptr;
-
-  if (IsEndOfEncoding())
-    return Name;
-
-  Node *Attrs = nullptr;
-  if (consumeIf("Ua9enable_ifI")) {
-    size_t BeforeArgs = Names.size();
-    while (!consumeIf('E')) {
-      Node *Arg = parseTemplateArg();
-      if (Arg == nullptr)
-        return nullptr;
-      Names.push_back(Arg);
-    }
-    Attrs = make<EnableIfAttr>(popTrailingNodeArray(BeforeArgs));
-  }
-
-  Node *ReturnType = nullptr;
-  if (!NameInfo.CtorDtorConversion && NameInfo.EndsWithTemplateArgs) {
-    ReturnType = parseType();
-    if (ReturnType == nullptr)
-      return nullptr;
-  }
-
-  if (consumeIf('v'))
-    return make<FunctionEncoding>(ReturnType, Name, NodeArray(),
-                                  Attrs, NameInfo.CVQualifiers,
-                                  NameInfo.ReferenceQualifier);
-
-  size_t ParamsBegin = Names.size();
-  do {
-    Node *Ty = parseType();
-    if (Ty == nullptr)
-      return nullptr;
-    Names.push_back(Ty);
-  } while (!IsEndOfEncoding());
-
-  return make<FunctionEncoding>(ReturnType, Name,
-                                popTrailingNodeArray(ParamsBegin),
-                                Attrs, NameInfo.CVQualifiers,
-                                NameInfo.ReferenceQualifier);
-}
-
-template <class Float>
-struct FloatData;
-
-template <>
-struct FloatData<float>
-{
-    static const size_t mangled_size = 8;
-    static const size_t max_demangled_size = 24;
-    static constexpr const char* spec = "%af";
-};
-
-constexpr const char* FloatData<float>::spec;
-
-template <>
-struct FloatData<double>
-{
-    static const size_t mangled_size = 16;
-    static const size_t max_demangled_size = 32;
-    static constexpr const char* spec = "%a";
-};
-
-constexpr const char* FloatData<double>::spec;
-
-template <>
-struct FloatData<long double>
-{
-#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \
-    defined(__wasm__)
-    static const size_t mangled_size = 32;
-#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__)
-    static const size_t mangled_size = 16;
-#else
-    static const size_t mangled_size = 20;  // May need to be adjusted to 16 or 24 on other platforms
-#endif
-    static const size_t max_demangled_size = 40;
-    static constexpr const char *spec = "%LaL";
-};
-
-constexpr const char *FloatData<long double>::spec;
-
-template <class Float> Node *Db::parseFloatingLiteral() {
-  const size_t N = FloatData<Float>::mangled_size;
-  if (numLeft() <= N)
-    return nullptr;
-  StringView Data(First, First + N);
-  for (char C : Data)
-    if (!std::isxdigit(C))
-      return nullptr;
-  First += N;
-  if (!consumeIf('E'))
-    return nullptr;
-  return make<FloatExpr<Float>>(Data);
-}
-
-// <seq-id> ::= <0-9A-Z>+
-bool Db::parseSeqId(size_t *Out) {
-  if (!(look() >= '0' && look() <= '9') &&
-      !(look() >= 'A' && look() <= 'Z'))
-    return true;
-
-  size_t Id = 0;
-  while (true) {
-    if (look() >= '0' && look() <= '9') {
-      Id *= 36;
-      Id += static_cast<size_t>(look() - '0');
-    } else if (look() >= 'A' && look() <= 'Z') {
-      Id *= 36;
-      Id += static_cast<size_t>(look() - 'A') + 10;
-    } else {
-      *Out = Id;
-      return false;
-    }
-    ++First;
-  }
-}
-
-// <substitution> ::= S <seq-id> _
-//                ::= S_
-// <substitution> ::= Sa # ::std::allocator
-// <substitution> ::= Sb # ::std::basic_string
-// <substitution> ::= Ss # ::std::basic_string < char,
-//                                               ::std::char_traits<char>,
-//                                               ::std::allocator<char> >
-// <substitution> ::= Si # ::std::basic_istream<char,  std::char_traits<char> >
-// <substitution> ::= So # ::std::basic_ostream<char,  std::char_traits<char> >
-// <substitution> ::= Sd # ::std::basic_iostream<char, std::char_traits<char> >
-Node *Db::parseSubstitution() {
-  if (!consumeIf('S'))
-    return nullptr;
-
-  if (std::islower(look())) {
-    Node *SpecialSub;
-    switch (look()) {
-    case 'a':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::allocator);
-      break;
-    case 'b':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::basic_string);
-      break;
-    case 's':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::string);
-      break;
-    case 'i':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::istream);
-      break;
-    case 'o':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::ostream);
-      break;
-    case 'd':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::iostream);
-      break;
-    default:
-      return nullptr;
-    }
-    // Itanium C++ ABI 5.1.2: If a name that would use a built-in <substitution>
-    // has ABI tags, the tags are appended to the substitution; the result is a
-    // substitutable component.
-    Node *WithTags = parseAbiTags(SpecialSub);
-    if (WithTags != SpecialSub) {
-      Subs.push_back(WithTags);
-      SpecialSub = WithTags;
-    }
-    return SpecialSub;
-  }
-
-  //                ::= S_
-  if (consumeIf('_')) {
-    if (Subs.empty())
-      return nullptr;
-    return Subs[0];
-  }
-
-  //                ::= S <seq-id> _
-  size_t Index = 0;
-  if (parseSeqId(&Index))
-    return nullptr;
-  ++Index;
-  if (!consumeIf('_') || Index >= Subs.size())
-    return nullptr;
-  return Subs[Index];
-}
-
-// <template-param> ::= T_    # first template parameter
-//                  ::= T <parameter-2 non-negative number> _
-Node *Db::parseTemplateParam() {
-  if (!consumeIf('T'))
-    return nullptr;
-
-  size_t Index = 0;
-  if (!consumeIf('_')) {
-    if (parsePositiveInteger(&Index))
-      return nullptr;
-    ++Index;
-    if (!consumeIf('_'))
-      return nullptr;
-  }
-
-  // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list
-  // are mangled as the corresponding artificial template type parameter.
-  if (ParsingLambdaParams)
-    return make<NameType>("auto");
-
-  // If we're in a context where this <template-param> refers to a
-  // <template-arg> further ahead in the mangled name (currently just conversion
-  // operator types), then we should only look it up in the right context.
-  if (PermitForwardTemplateReferences) {
-    ForwardTemplateRefs.push_back(make<ForwardTemplateReference>(Index));
-    return ForwardTemplateRefs.back();
-  }
-
-  if (Index >= TemplateParams.size())
-    return nullptr;
-  return TemplateParams[Index];
-}
-
-// <template-arg> ::= <type>                    # type or template
-//                ::= X <expression> E          # expression
-//                ::= <expr-primary>            # simple expressions
-//                ::= J <template-arg>* E       # argument pack
-//                ::= LZ <encoding> E           # extension
-Node *Db::parseTemplateArg() {
-  switch (look()) {
-  case 'X': {
-    ++First;
-    Node *Arg = parseExpr();
-    if (Arg == nullptr || !consumeIf('E'))
-      return nullptr;
-    return Arg;
-  }
-  case 'J': {
-    ++First;
-    size_t ArgsBegin = Names.size();
-    while (!consumeIf('E')) {
-      Node *Arg = parseTemplateArg();
-      if (Arg == nullptr)
-        return nullptr;
-      Names.push_back(Arg);
-    }
-    NodeArray Args = popTrailingNodeArray(ArgsBegin);
-    return make<TemplateArgumentPack>(Args);
-  }
-  case 'L': {
-    //                ::= LZ <encoding> E           # extension
-    if (look(1) == 'Z') {
-      First += 2;
-      Node *Arg = parseEncoding();
-      if (Arg == nullptr || !consumeIf('E'))
-        return nullptr;
-      return Arg;
-    }
-    //                ::= <expr-primary>            # simple expressions
-    return parseExprPrimary();
-  }
-  default:
-    return parseType();
-  }
-}
-
-// <template-args> ::= I <template-arg>* E
-//     extension, the abi says <template-arg>+
-Node *Db::parseTemplateArgs(bool TagTemplates) {
-  if (!consumeIf('I'))
-    return nullptr;
-
-  // <template-params> refer to the innermost <template-args>. Clear out any
-  // outer args that we may have inserted into TemplateParams.
-  if (TagTemplates)
-    TemplateParams.clear();
-
-  size_t ArgsBegin = Names.size();
-  while (!consumeIf('E')) {
-    if (TagTemplates) {
-      auto OldParams = std::move(TemplateParams);
-      Node *Arg = parseTemplateArg();
-      TemplateParams = std::move(OldParams);
-      if (Arg == nullptr)
-        return nullptr;
-      Names.push_back(Arg);
-      Node *TableEntry = Arg;
-      if (Arg->getKind() == Node::KTemplateArgumentPack) {
-        TableEntry = make<ParameterPack>(
-            static_cast<TemplateArgumentPack*>(TableEntry)->getElements());
-      }
-      TemplateParams.push_back(TableEntry);
-    } else {
-      Node *Arg = parseTemplateArg();
-      if (Arg == nullptr)
-        return nullptr;
-      Names.push_back(Arg);
-    }
-  }
-  return make<TemplateArgs>(popTrailingNodeArray(ArgsBegin));
-}
-
-// <discriminator> := _ <non-negative number>      # when number < 10
-//                 := __ <non-negative number> _   # when number >= 10
-//  extension      := decimal-digit+               # at the end of string
-
-const char*
-parse_discriminator(const char* first, const char* last)
-{
-    // parse but ignore discriminator
-    if (first != last)
-    {
-        if (*first == '_')
-        {
-            const char* t1 = first+1;
-            if (t1 != last)
-            {
-                if (std::isdigit(*t1))
-                    first = t1+1;
-                else if (*t1 == '_')
-                {
-                    for (++t1; t1 != last && std::isdigit(*t1); ++t1)
-                        ;
-                    if (t1 != last && *t1 == '_')
-                        first = t1 + 1;
-                }
-            }
-        }
-        else if (std::isdigit(*first))
-        {
-            const char* t1 = first+1;
-            for (; t1 != last && std::isdigit(*t1); ++t1)
-                ;
-            if (t1 == last)
-                first = last;
-        }
-    }
-    return first;
-}
-
-// <mangled-name> ::= _Z <encoding>
-//                ::= <type>
-// extension      ::= ___Z <encoding> _block_invoke
-// extension      ::= ___Z <encoding> _block_invoke<decimal-digit>+
-// extension      ::= ___Z <encoding> _block_invoke_<decimal-digit>+
-Node *Db::parse() {
-  if (consumeIf("_Z")) {
-    Node *Encoding = parseEncoding();
-    if (Encoding == nullptr)
-      return nullptr;
-    if (look() == '.') {
-      Encoding = make<DotSuffix>(Encoding, StringView(First, Last));
-      First = Last;
-    }
-    if (numLeft() != 0)
-      return nullptr;
-    return Encoding;
-  }
-
-  if (consumeIf("___Z")) {
-    Node *Encoding = parseEncoding();
-    if (Encoding == nullptr || !consumeIf("_block_invoke"))
-      return nullptr;
-    bool RequireNumber = consumeIf('_');
-    if (parseNumber().empty() && RequireNumber)
-      return nullptr;
-    if (numLeft() != 0)
-      return nullptr;
-    return make<SpecialName>("invocation function for block in ", Encoding);
-  }
-
-  Node *Ty = parseType();
-  if (numLeft() != 0)
-    return nullptr;
-  return Ty;
-}
-
-bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
-                            size_t InitSize) {
-  size_t BufferSize;
-  if (Buf == nullptr) {
-    Buf = static_cast<char *>(std::malloc(InitSize));
-    if (Buf == nullptr)
-      return true;
-    BufferSize = InitSize;
-  } else
-    BufferSize = *N;
-
-  S.reset(Buf, BufferSize);
-  return false;
-}
+//===----------------------------------------------------------------------===//
+// Code beyond this point should not be synchronized with libc++abi.
+//===----------------------------------------------------------------------===//
 
-}  // unnamed namespace
+using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>;
 
 char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
                             size_t *N, int *Status) {
@@ -4961,14 +333,14 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
   }
 
   int InternalStatus = demangle_success;
-  Db Parser(MangledName, MangledName + std::strlen(MangledName));
+  Demangler Parser(MangledName, MangledName + std::strlen(MangledName));
   OutputStream S;
 
   Node *AST = Parser.parse();
 
   if (AST == nullptr)
     InternalStatus = demangle_invalid_mangled_name;
-  else if (initializeOutputStream(Buf, N, S, 1024))
+  else if (!initializeOutputStream(Buf, N, S, 1024))
     InternalStatus = demangle_memory_alloc_failure;
   else {
     assert(Parser.ForwardTemplateRefs.empty());
@@ -4984,13 +356,11 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
   return InternalStatus == demangle_success ? Buf : nullptr;
 }
 
-namespace llvm {
-
 ItaniumPartialDemangler::ItaniumPartialDemangler()
-    : RootNode(nullptr), Context(new Db{nullptr, nullptr}) {}
+    : RootNode(nullptr), Context(new Demangler{nullptr, nullptr}) {}
 
 ItaniumPartialDemangler::~ItaniumPartialDemangler() {
-  delete static_cast<Db *>(Context);
+  delete static_cast<Demangler *>(Context);
 }
 
 ItaniumPartialDemangler::ItaniumPartialDemangler(
@@ -5008,16 +378,16 @@ operator=(ItaniumPartialDemangler &&Other) {
 
 // Demangle MangledName into an AST, storing it into this->RootNode.
 bool ItaniumPartialDemangler::partialDemangle(const char *MangledName) {
-  Db *Parser = static_cast<Db *>(Context);
+  Demangler *Parser = static_cast<Demangler *>(Context);
   size_t Len = std::strlen(MangledName);
   Parser->reset(MangledName, MangledName + Len);
   RootNode = Parser->parse();
   return RootNode == nullptr;
 }
 
-static char *printNode(Node *RootNode, char *Buf, size_t *N) {
+static char *printNode(const Node *RootNode, char *Buf, size_t *N) {
   OutputStream S;
-  if (initializeOutputStream(Buf, N, S, 128))
+  if (!initializeOutputStream(Buf, N, S, 128))
     return nullptr;
   RootNode->print(S);
   S += '\0';
@@ -5030,24 +400,24 @@ char *ItaniumPartialDemangler::getFunctionBaseName(char *Buf, size_t *N) const {
   if (!isFunction())
     return nullptr;
 
-  Node *Name = static_cast<FunctionEncoding *>(RootNode)->getName();
+  const Node *Name = static_cast<const FunctionEncoding *>(RootNode)->getName();
 
   while (true) {
     switch (Name->getKind()) {
     case Node::KAbiTagAttr:
-      Name = static_cast<AbiTagAttr *>(Name)->Base;
+      Name = static_cast<const AbiTagAttr *>(Name)->Base;
       continue;
     case Node::KStdQualifiedName:
-      Name = static_cast<StdQualifiedName *>(Name)->Child;
+      Name = static_cast<const StdQualifiedName *>(Name)->Child;
       continue;
     case Node::KNestedName:
-      Name = static_cast<NestedName *>(Name)->Name;
+      Name = static_cast<const NestedName *>(Name)->Name;
       continue;
     case Node::KLocalName:
-      Name = static_cast<LocalName *>(Name)->Entity;
+      Name = static_cast<const LocalName *>(Name)->Entity;
       continue;
     case Node::KNameWithTemplateArgs:
-      Name = static_cast<NameWithTemplateArgs *>(Name)->Name;
+      Name = static_cast<const NameWithTemplateArgs *>(Name)->Name;
       continue;
     default:
       return printNode(Name, Buf, N);
@@ -5059,20 +429,20 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
                                                           size_t *N) const {
   if (!isFunction())
     return nullptr;
-  Node *Name = static_cast<FunctionEncoding *>(RootNode)->getName();
+  const Node *Name = static_cast<const FunctionEncoding *>(RootNode)->getName();
 
   OutputStream S;
-  if (initializeOutputStream(Buf, N, S, 128))
+  if (!initializeOutputStream(Buf, N, S, 128))
     return nullptr;
 
  KeepGoingLocalFunction:
   while (true) {
     if (Name->getKind() == Node::KAbiTagAttr) {
-      Name = static_cast<AbiTagAttr *>(Name)->Base;
+      Name = static_cast<const AbiTagAttr *>(Name)->Base;
       continue;
     }
     if (Name->getKind() == Node::KNameWithTemplateArgs) {
-      Name = static_cast<NameWithTemplateArgs *>(Name)->Name;
+      Name = static_cast<const NameWithTemplateArgs *>(Name)->Name;
       continue;
     }
     break;
@@ -5083,10 +453,10 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
     S += "std";
     break;
   case Node::KNestedName:
-    static_cast<NestedName *>(Name)->Qual->print(S);
+    static_cast<const NestedName *>(Name)->Qual->print(S);
     break;
   case Node::KLocalName: {
-    auto *LN = static_cast<LocalName *>(Name);
+    auto *LN = static_cast<const LocalName *>(Name);
     LN->Encoding->print(S);
     S += "::";
     Name = LN->Entity;
@@ -5115,7 +485,7 @@ char *ItaniumPartialDemangler::getFunctionParameters(char *Buf,
   NodeArray Params = static_cast<FunctionEncoding *>(RootNode)->getParams();
 
   OutputStream S;
-  if (initializeOutputStream(Buf, N, S, 128))
+  if (!initializeOutputStream(Buf, N, S, 128))
     return nullptr;
 
   S += '(';
@@ -5133,10 +503,11 @@ char *ItaniumPartialDemangler::getFunctionReturnType(
     return nullptr;
 
   OutputStream S;
-  if (initializeOutputStream(Buf, N, S, 128))
+  if (!initializeOutputStream(Buf, N, S, 128))
     return nullptr;
 
-  if (Node *Ret = static_cast<FunctionEncoding *>(RootNode)->getReturnType())
+  if (const Node *Ret =
+          static_cast<const FunctionEncoding *>(RootNode)->getReturnType())
     Ret->print(S);
 
   S += '\0';
@@ -5154,12 +525,12 @@ bool ItaniumPartialDemangler::hasFunctionQualifiers() const {
   assert(RootNode != nullptr && "must call partialDemangle()");
   if (!isFunction())
     return false;
-  auto *E = static_cast<FunctionEncoding *>(RootNode);
+  auto *E = static_cast<const FunctionEncoding *>(RootNode);
   return E->getCVQuals() != QualNone || E->getRefQual() != FrefQualNone;
 }
 
 bool ItaniumPartialDemangler::isCtorOrDtor() const {
-  Node *N = static_cast<Node *>(RootNode);
+  const Node *N = static_cast<const Node *>(RootNode);
   while (N) {
     switch (N->getKind()) {
     default:
@@ -5168,22 +539,22 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const {
       return true;
 
     case Node::KAbiTagAttr:
-      N = static_cast<AbiTagAttr *>(N)->Base;
+      N = static_cast<const AbiTagAttr *>(N)->Base;
       break;
     case Node::KFunctionEncoding:
-      N = static_cast<FunctionEncoding *>(N)->getName();
+      N = static_cast<const FunctionEncoding *>(N)->getName();
       break;
     case Node::KLocalName:
-      N = static_cast<LocalName *>(N)->Entity;
+      N = static_cast<const LocalName *>(N)->Entity;
       break;
     case Node::KNameWithTemplateArgs:
-      N = static_cast<NameWithTemplateArgs *>(N)->Name;
+      N = static_cast<const NameWithTemplateArgs *>(N)->Name;
       break;
     case Node::KNestedName:
-      N = static_cast<NestedName *>(N)->Name;
+      N = static_cast<const NestedName *>(N)->Name;
       break;
     case Node::KStdQualifiedName:
-      N = static_cast<StdQualifiedName *>(N)->Child;
+      N = static_cast<const StdQualifiedName *>(N)->Child;
       break;
     }
   }
@@ -5192,17 +563,16 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const {
 
 bool ItaniumPartialDemangler::isFunction() const {
   assert(RootNode != nullptr && "must call partialDemangle()");
-  return static_cast<Node *>(RootNode)->getKind() == Node::KFunctionEncoding;
+  return static_cast<const Node *>(RootNode)->getKind() ==
+         Node::KFunctionEncoding;
 }
 
 bool ItaniumPartialDemangler::isSpecialName() const {
   assert(RootNode != nullptr && "must call partialDemangle()");
-  auto K = static_cast<Node *>(RootNode)->getKind();
+  auto K = static_cast<const Node *>(RootNode)->getKind();
   return K == Node::KSpecialName || K == Node::KCtorVtableSpecialName;
 }
 
 bool ItaniumPartialDemangler::isData() const {
   return !isFunction() && !isSpecialName();
 }
-
-}
diff --git a/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp b/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 3eac87d61011..51ffa0bff7f3 100644
--- a/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -14,334 +14,34 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Demangle/MicrosoftDemangle.h"
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
 
-#include "Compiler.h"
-#include "StringView.h"
-#include "Utility.h"
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
 
+#include <array>
 #include <cctype>
+#include <cstdio>
 #include <tuple>
 
-// This memory allocator is extremely fast, but it doesn't call dtors
-// for allocated objects. That means you can't use STL containers
-// (such as std::vector) with this allocator. But it pays off --
-// the demangler is 3x faster with this allocator compared to one with
-// STL containers.
-namespace {
-  constexpr size_t AllocUnit = 4096;
-
-class ArenaAllocator {
-  struct AllocatorNode {
-    uint8_t *Buf = nullptr;
-    size_t Used = 0;
-    size_t Capacity = 0;
-    AllocatorNode *Next = nullptr;
-  };
-
-  void addNode(size_t Capacity) {
-    AllocatorNode *NewHead = new AllocatorNode;
-    NewHead->Buf = new uint8_t[Capacity];
-    NewHead->Next = Head;
-    NewHead->Capacity = Capacity;
-    Head = NewHead;
-    NewHead->Used = 0;
-  }
-
-public:
-  ArenaAllocator() { addNode(AllocUnit); }
-
-  ~ArenaAllocator() {
-    while (Head) {
-      assert(Head->Buf);
-      delete[] Head->Buf;
-      AllocatorNode *Next = Head->Next;
-      delete Head;
-      Head = Next;
-    }
-  }
-
-  char *allocUnalignedBuffer(size_t Length) {
-    uint8_t *Buf = Head->Buf + Head->Used;
-
-    Head->Used += Length;
-    if (Head->Used > Head->Capacity) {
-      // It's possible we need a buffer which is larger than our default unit
-      // size, so we need to be careful to add a node with capacity that is at
-      // least as large as what we need.
-      addNode(std::max(AllocUnit, Length));
-      Head->Used = Length;
-      Buf = Head->Buf;
-    }
-
-    return reinterpret_cast<char *>(Buf);
-  }
-
-  template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
-
-    size_t Size = sizeof(T);
-    assert(Head && Head->Buf);
-
-    size_t P = (size_t)Head->Buf + Head->Used;
-    uintptr_t AlignedP =
-        (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
-    uint8_t *PP = (uint8_t *)AlignedP;
-    size_t Adjustment = AlignedP - P;
-
-    Head->Used += Size + Adjustment;
-    if (Head->Used < Head->Capacity)
-      return new (PP) T(std::forward<Args>(ConstructorArgs)...);
-
-    addNode(AllocUnit);
-    Head->Used = Size;
-    return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
-  }
-
-private:
-  AllocatorNode *Head = nullptr;
-};
-} // namespace
+using namespace llvm;
+using namespace ms_demangle;
 
 static bool startsWithDigit(StringView S) {
   return !S.empty() && std::isdigit(S.front());
 }
 
-// Writes a space if the last token does not end with a punctuation.
-static void outputSpaceIfNecessary(OutputStream &OS) {
-  if (OS.empty())
-    return;
-
-  char C = OS.back();
-  if (isalnum(C) || C == '>')
-    OS << " ";
-}
-
-// Storage classes
-enum Qualifiers : uint8_t {
-  Q_None = 0,
-  Q_Const = 1 << 0,
-  Q_Volatile = 1 << 1,
-  Q_Far = 1 << 2,
-  Q_Huge = 1 << 3,
-  Q_Unaligned = 1 << 4,
-  Q_Restrict = 1 << 5,
-  Q_Pointer64 = 1 << 6
-};
-
-enum class StorageClass : uint8_t {
-  None,
-  PrivateStatic,
-  ProtectedStatic,
-  PublicStatic,
-  Global,
-  FunctionLocalStatic
-};
-
-enum class QualifierMangleMode { Drop, Mangle, Result };
-
-enum class PointerAffinity { Pointer, Reference, RValueReference };
-
-// Calling conventions
-enum class CallingConv : uint8_t {
-  None,
-  Cdecl,
-  Pascal,
-  Thiscall,
-  Stdcall,
-  Fastcall,
-  Clrcall,
-  Eabi,
-  Vectorcall,
-  Regcall,
-};
-
-enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef };
-
-// Types
-enum class PrimTy : uint8_t {
-  Unknown,
-  None,
-  Function,
-  Ptr,
-  MemberPtr,
-  Array,
-
-  Struct,
-  Union,
-  Class,
-  Enum,
-
-  Void,
-  Bool,
-  Char,
-  Schar,
-  Uchar,
-  Char16,
-  Char32,
-  Short,
-  Ushort,
-  Int,
-  Uint,
-  Long,
-  Ulong,
-  Int64,
-  Uint64,
-  Wchar,
-  Float,
-  Double,
-  Ldouble,
-  Nullptr
-};
-
-// Function classes
-enum FuncClass : uint8_t {
-  Public = 1 << 0,
-  Protected = 1 << 1,
-  Private = 1 << 2,
-  Global = 1 << 3,
-  Static = 1 << 4,
-  Virtual = 1 << 5,
-  Far = 1 << 6,
-};
-
-namespace {
-
-struct Type;
-struct Name;
-
-struct FunctionParams {
-  bool IsVariadic = false;
 
-  Type *Current = nullptr;
-
-  FunctionParams *Next = nullptr;
+struct NodeList {
+  Node *N = nullptr;
+  NodeList *Next = nullptr;
 };
 
-struct TemplateParams {
-  bool IsTemplateTemplate = false;
-  bool IsAliasTemplate = false;
-
-  // Type can be null if this is a template template parameter.  In that case
-  // only Name will be valid.
-  Type *ParamType = nullptr;
-
-  // Name can be valid if this is a template template parameter (see above) or
-  // this is a function declaration (e.g. foo<&SomeFunc>).  In the latter case
-  // Name contains the name of the function and Type contains the signature.
-  Name *ParamName = nullptr;
-
-  TemplateParams *Next = nullptr;
-};
-
-// The type class. Mangled symbols are first parsed and converted to
-// this type and then converted to string.
-struct Type {
-  virtual ~Type() {}
-
-  virtual Type *clone(ArenaAllocator &Arena) const;
-
-  // Write the "first half" of a given type.  This is a static functions to
-  // give the code a chance to do processing that is common to a subset of
-  // subclasses
-  static void outputPre(OutputStream &OS, Type &Ty);
-
-  // Write the "second half" of a given type.  This is a static functions to
-  // give the code a chance to do processing that is common to a subset of
-  // subclasses
-  static void outputPost(OutputStream &OS, Type &Ty);
-
-  virtual void outputPre(OutputStream &OS);
-  virtual void outputPost(OutputStream &OS);
-
-  // Primitive type such as Int.
-  PrimTy Prim = PrimTy::Unknown;
-
-  Qualifiers Quals = Q_None;
-  StorageClass Storage = StorageClass::None; // storage class
-};
-
-// Represents an identifier which may be a template.
-struct Name {
-  // Name read from an MangledName string.
-  StringView Str;
-
-  // Overloaded operators are represented as special BackReferences in mangled
-  // symbols. If this is an operator name, "op" has an operator name (e.g.
-  // ">>"). Otherwise, empty.
-  StringView Operator;
-
-  // Template parameters. Null if not a template.
-  TemplateParams *TParams = nullptr;
-
-  // Nested BackReferences (e.g. "A::B::C") are represented as a linked list.
-  Name *Next = nullptr;
-};
-
-struct PointerType : public Type {
-  Type *clone(ArenaAllocator &Arena) const override;
-  void outputPre(OutputStream &OS) override;
-  void outputPost(OutputStream &OS) override;
-
-  PointerAffinity Affinity;
-
-  // Represents a type X in "a pointer to X", "a reference to X",
-  // "an array of X", or "a function returning X".
-  Type *Pointee = nullptr;
-};
-
-struct MemberPointerType : public Type {
-  Type *clone(ArenaAllocator &Arena) const override;
-  void outputPre(OutputStream &OS) override;
-  void outputPost(OutputStream &OS) override;
-
-  Name *MemberName = nullptr;
-
-  // Represents a type X in "a pointer to X", "a reference to X",
-  // "an array of X", or "a function returning X".
-  Type *Pointee = nullptr;
-};
-
-struct FunctionType : public Type {
-  Type *clone(ArenaAllocator &Arena) const override;
-  void outputPre(OutputStream &OS) override;
-  void outputPost(OutputStream &OS) override;
-
-  // True if this FunctionType instance is the Pointee of a PointerType or
-  // MemberPointerType.
-  bool IsFunctionPointer = false;
-
-  Type *ReturnType = nullptr;
-  // If this is a reference, the type of reference.
-  ReferenceKind RefKind;
-
-  CallingConv CallConvention;
-  FuncClass FunctionClass;
-
-  FunctionParams Params;
-};
-
-struct UdtType : public Type {
-  Type *clone(ArenaAllocator &Arena) const override;
-  void outputPre(OutputStream &OS) override;
-
-  Name *UdtName = nullptr;
-};
-
-struct ArrayType : public Type {
-  Type *clone(ArenaAllocator &Arena) const override;
-  void outputPre(OutputStream &OS) override;
-  void outputPost(OutputStream &OS) override;
-
-  // Either NextDimension or ElementType will be valid.
-  ArrayType *NextDimension = nullptr;
-  uint32_t ArrayDimension = 0;
-
-  Type *ElementType = nullptr;
-};
-
-} // namespace
-
-static bool isMemberPointer(StringView MangledName) {
+static bool isMemberPointer(StringView MangledName, bool &Error) {
+  Error = false;
   switch (MangledName.popFront()) {
   case '$':
     // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
@@ -359,7 +59,8 @@ static bool isMemberPointer(StringView MangledName) {
     // what.
     break;
   default:
-    assert(false && "Ty is not a pointer type!");
+    Error = true;
+    return false;
   }
 
   // If it starts with a number, then 6 indicates a non-member function
@@ -390,45 +91,46 @@ static bool isMemberPointer(StringView MangledName) {
   case 'T':
     return true;
   default:
-    assert(false);
+    Error = true;
+    return false;
   }
-  return false;
 }
 
-static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
-  outputSpaceIfNecessary(OS);
-
-  switch (CC) {
-  case CallingConv::Cdecl:
-    OS << "__cdecl";
-    break;
-  case CallingConv::Fastcall:
-    OS << "__fastcall";
-    break;
-  case CallingConv::Pascal:
-    OS << "__pascal";
-    break;
-  case CallingConv::Regcall:
-    OS << "__regcall";
-    break;
-  case CallingConv::Stdcall:
-    OS << "__stdcall";
-    break;
-  case CallingConv::Thiscall:
-    OS << "__thiscall";
-    break;
-  case CallingConv::Eabi:
-    OS << "__eabi";
-    break;
-  case CallingConv::Vectorcall:
-    OS << "__vectorcall";
-    break;
-  case CallingConv::Clrcall:
-    OS << "__clrcall";
-    break;
-  default:
-    break;
-  }
+static SpecialIntrinsicKind
+consumeSpecialIntrinsicKind(StringView &MangledName) {
+  if (MangledName.consumeFront("?_7"))
+    return SpecialIntrinsicKind::Vftable;
+  if (MangledName.consumeFront("?_8"))
+    return SpecialIntrinsicKind::Vbtable;
+  if (MangledName.consumeFront("?_9"))
+    return SpecialIntrinsicKind::VcallThunk;
+  if (MangledName.consumeFront("?_A"))
+    return SpecialIntrinsicKind::Typeof;
+  if (MangledName.consumeFront("?_B"))
+    return SpecialIntrinsicKind::LocalStaticGuard;
+  if (MangledName.consumeFront("?_C"))
+    return SpecialIntrinsicKind::StringLiteralSymbol;
+  if (MangledName.consumeFront("?_P"))
+    return SpecialIntrinsicKind::UdtReturning;
+  if (MangledName.consumeFront("?_R0"))
+    return SpecialIntrinsicKind::RttiTypeDescriptor;
+  if (MangledName.consumeFront("?_R1"))
+    return SpecialIntrinsicKind::RttiBaseClassDescriptor;
+  if (MangledName.consumeFront("?_R2"))
+    return SpecialIntrinsicKind::RttiBaseClassArray;
+  if (MangledName.consumeFront("?_R3"))
+    return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
+  if (MangledName.consumeFront("?_R4"))
+    return SpecialIntrinsicKind::RttiCompleteObjLocator;
+  if (MangledName.consumeFront("?_S"))
+    return SpecialIntrinsicKind::LocalVftable;
+  if (MangledName.consumeFront("?__E"))
+    return SpecialIntrinsicKind::DynamicInitializer;
+  if (MangledName.consumeFront("?__F"))
+    return SpecialIntrinsicKind::DynamicAtexitDestructor;
+  if (MangledName.consumeFront("?__J"))
+    return SpecialIntrinsicKind::LocalStaticThreadGuard;
+  return SpecialIntrinsicKind::None;
 }
 
 static bool startsWithLocalScopePattern(StringView S) {
@@ -472,519 +174,575 @@ static bool startsWithLocalScopePattern(StringView S) {
   return true;
 }
 
-static void outputName(OutputStream &OS, const Name *TheName);
-
-// Write a function or template parameter list.
-static void outputParameterList(OutputStream &OS,
-                                const FunctionParams &Params) {
-  if (!Params.Current) {
-    OS << "void";
-    return;
-  }
-
-  const FunctionParams *Head = &Params;
-  while (Head) {
-    Type::outputPre(OS, *Head->Current);
-    Type::outputPost(OS, *Head->Current);
-
-    Head = Head->Next;
-
-    if (Head)
-      OS << ", ";
+static bool isTagType(StringView S) {
+  switch (S.front()) {
+  case 'T': // union
+  case 'U': // struct
+  case 'V': // class
+  case 'W': // enum
+    return true;
   }
+  return false;
 }
 
-static void outputParameterList(OutputStream &OS,
-                                const TemplateParams &Params) {
-  if (!Params.ParamType && !Params.ParamName) {
-    OS << "<>";
-    return;
-  }
-
-  OS << "<";
-  const TemplateParams *Head = &Params;
-  while (Head) {
-    // Type can be null if this is a template template parameter,
-    // and Name can be null if this is a simple type.
-
-    if (Head->ParamType && Head->ParamName) {
-      // Function pointer.
-      OS << "&";
-      Type::outputPre(OS, *Head->ParamType);
-      outputName(OS, Head->ParamName);
-      Type::outputPost(OS, *Head->ParamType);
-    } else if (Head->ParamType) {
-      // simple type.
-      Type::outputPre(OS, *Head->ParamType);
-      Type::outputPost(OS, *Head->ParamType);
-    } else {
-      // Template alias.
-      outputName(OS, Head->ParamName);
-    }
+static bool isCustomType(StringView S) { return S[0] == '?'; }
 
-    Head = Head->Next;
+static bool isPointerType(StringView S) {
+  if (S.startsWith("$$Q")) // foo &&
+    return true;
 
-    if (Head)
-      OS << ", ";
+  switch (S.front()) {
+  case 'A': // foo &
+  case 'P': // foo *
+  case 'Q': // foo *const
+  case 'R': // foo *volatile
+  case 'S': // foo *const volatile
+    return true;
   }
-  OS << ">";
+  return false;
 }
 
-static void outputName(OutputStream &OS, const Name *TheName) {
-  if (!TheName)
-    return;
-
-  outputSpaceIfNecessary(OS);
-
-  const Name *Previous = nullptr;
-  // Print out namespaces or outer class BackReferences.
-  for (; TheName->Next; TheName = TheName->Next) {
-    Previous = TheName;
-    OS << TheName->Str;
-    if (TheName->TParams)
-      outputParameterList(OS, *TheName->TParams);
-    OS << "::";
-  }
-
-  // Print out a regular name.
-  if (TheName->Operator.empty()) {
-    OS << TheName->Str;
-    if (TheName->TParams)
-      outputParameterList(OS, *TheName->TParams);
-    return;
-  }
-
-  // Print out ctor or dtor.
-  if (TheName->Operator == "dtor")
-    OS << "~";
-
-  if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
-    OS << Previous->Str;
-    if (Previous->TParams)
-      outputParameterList(OS, *Previous->TParams);
-    return;
-  }
+static bool isArrayType(StringView S) { return S[0] == 'Y'; }
 
-  // Print out an overloaded operator.
-  if (!TheName->Str.empty())
-    OS << TheName->Str << "::";
-  OS << "operator" << TheName->Operator;
+static bool isFunctionType(StringView S) {
+  return S.startsWith("$$A8@@") || S.startsWith("$$A6");
 }
 
-namespace {
-
-Type *Type::clone(ArenaAllocator &Arena) const {
-  return Arena.alloc<Type>(*this);
+static FunctionRefQualifier
+demangleFunctionRefQualifier(StringView &MangledName) {
+  if (MangledName.consumeFront('G'))
+    return FunctionRefQualifier::Reference;
+  else if (MangledName.consumeFront('H'))
+    return FunctionRefQualifier::RValueReference;
+  return FunctionRefQualifier::None;
 }
 
-// Write the "first half" of a given type.
-void Type::outputPre(OutputStream &OS, Type &Ty) {
-  // Function types require custom handling of const and static so we
-  // handle them separately.  All other types use the same decoration
-  // for these modifiers, so handle them here in common code.
-  if (Ty.Prim == PrimTy::Function) {
-    Ty.outputPre(OS);
-    return;
-  }
+static std::pair<Qualifiers, PointerAffinity>
+demanglePointerCVQualifiers(StringView &MangledName) {
+  if (MangledName.consumeFront("$$Q"))
+    return std::make_pair(Q_None, PointerAffinity::RValueReference);
 
-  switch (Ty.Storage) {
-  case StorageClass::PrivateStatic:
-  case StorageClass::PublicStatic:
-  case StorageClass::ProtectedStatic:
-    OS << "static ";
+  switch (MangledName.popFront()) {
+  case 'A':
+    return std::make_pair(Q_None, PointerAffinity::Reference);
+  case 'P':
+    return std::make_pair(Q_None, PointerAffinity::Pointer);
+  case 'Q':
+    return std::make_pair(Q_Const, PointerAffinity::Pointer);
+  case 'R':
+    return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
+  case 'S':
+    return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
+                          PointerAffinity::Pointer);
   default:
-    break;
-  }
-  Ty.outputPre(OS);
-
-  if (Ty.Quals & Q_Const) {
-    outputSpaceIfNecessary(OS);
-    OS << "const";
+    assert(false && "Ty is not a pointer type!");
   }
+  return std::make_pair(Q_None, PointerAffinity::Pointer);
+}
 
-  if (Ty.Quals & Q_Volatile) {
-    outputSpaceIfNecessary(OS);
-    OS << "volatile";
-  }
+StringView Demangler::copyString(StringView Borrowed) {
+  char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
+  std::strcpy(Stable, Borrowed.begin());
 
-  if (Ty.Quals & Q_Restrict) {
-    outputSpaceIfNecessary(OS);
-    OS << "__restrict";
-  }
+  return {Stable, Borrowed.size()};
 }
 
-// Write the "second half" of a given type.
-void Type::outputPost(OutputStream &OS, Type &Ty) { Ty.outputPost(OS); }
-
-void Type::outputPre(OutputStream &OS) {
-  switch (Prim) {
-  case PrimTy::Void:
-    OS << "void";
-    break;
-  case PrimTy::Bool:
-    OS << "bool";
-    break;
-  case PrimTy::Char:
-    OS << "char";
-    break;
-  case PrimTy::Schar:
-    OS << "signed char";
+SpecialTableSymbolNode *
+Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
+                                          SpecialIntrinsicKind K) {
+  NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
+  switch (K) {
+  case SpecialIntrinsicKind::Vftable:
+    NI->Name = "`vftable'";
     break;
-  case PrimTy::Uchar:
-    OS << "unsigned char";
+  case SpecialIntrinsicKind::Vbtable:
+    NI->Name = "`vbtable'";
     break;
-  case PrimTy::Char16:
-    OS << "char16_t";
+  case SpecialIntrinsicKind::LocalVftable:
+    NI->Name = "`local vftable'";
     break;
-  case PrimTy::Char32:
-    OS << "char32_t";
-    break;
-  case PrimTy::Short:
-    OS << "short";
-    break;
-  case PrimTy::Ushort:
-    OS << "unsigned short";
-    break;
-  case PrimTy::Int:
-    OS << "int";
-    break;
-  case PrimTy::Uint:
-    OS << "unsigned int";
-    break;
-  case PrimTy::Long:
-    OS << "long";
-    break;
-  case PrimTy::Ulong:
-    OS << "unsigned long";
-    break;
-  case PrimTy::Int64:
-    OS << "__int64";
-    break;
-  case PrimTy::Uint64:
-    OS << "unsigned __int64";
-    break;
-  case PrimTy::Wchar:
-    OS << "wchar_t";
-    break;
-  case PrimTy::Float:
-    OS << "float";
-    break;
-  case PrimTy::Double:
-    OS << "double";
-    break;
-  case PrimTy::Ldouble:
-    OS << "long double";
-    break;
-  case PrimTy::Nullptr:
-    OS << "std::nullptr_t";
+  case SpecialIntrinsicKind::RttiCompleteObjLocator:
+    NI->Name = "`RTTI Complete Object Locator'";
     break;
   default:
-    assert(false && "Invalid primitive type!");
+    LLVM_BUILTIN_UNREACHABLE;
   }
-}
-void Type::outputPost(OutputStream &OS) {}
-
-Type *PointerType::clone(ArenaAllocator &Arena) const {
-  return Arena.alloc<PointerType>(*this);
-}
-
-static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity,
-                                   const Name *MemberName,
-                                   const Type *Pointee) {
-  // "[]" and "()" (for function parameters) take precedence over "*",
-  // so "int *x(int)" means "x is a function returning int *". We need
-  // parentheses to supercede the default precedence. (e.g. we want to
-  // emit something like "int (*x)(int)".)
-  if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) {
-    OS << "(";
-    if (Pointee->Prim == PrimTy::Function) {
-      const FunctionType *FTy = static_cast<const FunctionType *>(Pointee);
-      assert(FTy->IsFunctionPointer);
-      outputCallingConvention(OS, FTy->CallConvention);
-      OS << " ";
-    }
+  QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
+  SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
+  STSN->Name = QN;
+  bool IsMember = false;
+  char Front = MangledName.popFront();
+  if (Front != '6' && Front != '7') {
+    Error = true;
+    return nullptr;
   }
 
-  if (MemberName) {
-    outputName(OS, MemberName);
-    OS << "::";
+  std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
+  if (!MangledName.consumeFront('@'))
+    STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
+  return STSN;
+}
+
+LocalStaticGuardVariableNode *
+Demangler::demangleLocalStaticGuard(StringView &MangledName) {
+  LocalStaticGuardIdentifierNode *LSGI =
+      Arena.alloc<LocalStaticGuardIdentifierNode>();
+  QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
+  LocalStaticGuardVariableNode *LSGVN =
+      Arena.alloc<LocalStaticGuardVariableNode>();
+  LSGVN->Name = QN;
+
+  if (MangledName.consumeFront("4IA"))
+    LSGVN->IsVisible = false;
+  else if (MangledName.consumeFront("5"))
+    LSGVN->IsVisible = true;
+  else {
+    Error = true;
+    return nullptr;
   }
 
-  if (Affinity == PointerAffinity::Pointer)
-    OS << "*";
-  else if (Affinity == PointerAffinity::Reference)
-    OS << "&";
-  else
-    OS << "&&";
+  if (!MangledName.empty())
+    LSGI->ScopeIndex = demangleUnsigned(MangledName);
+  return LSGVN;
 }
 
-void PointerType::outputPre(OutputStream &OS) {
-  Type::outputPre(OS, *Pointee);
-
-  outputSpaceIfNecessary(OS);
+static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
+                                                      StringView Name) {
+  NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
+  Id->Name = Name;
+  return Id;
+}
 
-  if (Quals & Q_Unaligned)
-    OS << "__unaligned ";
+static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
+                                                  IdentifierNode *Identifier) {
+  QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
+  QN->Components = Arena.alloc<NodeArrayNode>();
+  QN->Components->Count = 1;
+  QN->Components->Nodes = Arena.allocArray<Node *>(1);
+  QN->Components->Nodes[0] = Identifier;
+  return QN;
+}
 
-  outputPointerIndicator(OS, Affinity, nullptr, Pointee);
+static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
+                                                  StringView Name) {
+  NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
+  return synthesizeQualifiedName(Arena, Id);
+}
 
-  // FIXME: We should output this, but it requires updating lots of tests.
-  // if (Ty.Quals & Q_Pointer64)
-  //  OS << " __ptr64";
+static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
+                                              TypeNode *Type,
+                                              StringView VariableName) {
+  VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
+  VSN->Type = Type;
+  VSN->Name = synthesizeQualifiedName(Arena, VariableName);
+  return VSN;
 }
 
-void PointerType::outputPost(OutputStream &OS) {
-  if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
-    OS << ")";
+VariableSymbolNode *Demangler::demangleUntypedVariable(
+    ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
+  NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
+  QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
+  VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
+  VSN->Name = QN;
+  if (MangledName.consumeFront("8"))
+    return VSN;
 
-  Type::outputPost(OS, *Pointee);
+  Error = true;
+  return nullptr;
 }
 
-Type *MemberPointerType::clone(ArenaAllocator &Arena) const {
-  return Arena.alloc<MemberPointerType>(*this);
-}
+VariableSymbolNode *
+Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
+                                               StringView &MangledName) {
+  RttiBaseClassDescriptorNode *RBCDN =
+      Arena.alloc<RttiBaseClassDescriptorNode>();
+  RBCDN->NVOffset = demangleUnsigned(MangledName);
+  RBCDN->VBPtrOffset = demangleSigned(MangledName);
+  RBCDN->VBTableOffset = demangleUnsigned(MangledName);
+  RBCDN->Flags = demangleUnsigned(MangledName);
+  if (Error)
+    return nullptr;
 
-void MemberPointerType::outputPre(OutputStream &OS) {
-  Type::outputPre(OS, *Pointee);
+  VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
+  VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
+  MangledName.consumeFront('8');
+  return VSN;
+}
 
-  outputSpaceIfNecessary(OS);
+FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
+                                                    bool IsDestructor) {
+  DynamicStructorIdentifierNode *DSIN =
+      Arena.alloc<DynamicStructorIdentifierNode>();
+  DSIN->IsDestructor = IsDestructor;
 
-  outputPointerIndicator(OS, PointerAffinity::Pointer, MemberName, Pointee);
+  bool IsKnownStaticDataMember = false;
+  if (MangledName.consumeFront('?'))
+    IsKnownStaticDataMember = true;
 
-  // FIXME: We should output this, but it requires updating lots of tests.
-  // if (Ty.Quals & Q_Pointer64)
-  //  OS << " __ptr64";
-  if (Quals & Q_Restrict)
-    OS << " __restrict";
-}
+  QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
 
-void MemberPointerType::outputPost(OutputStream &OS) {
-  if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
-    OS << ")";
+  SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
+  FunctionSymbolNode *FSN = nullptr;
+  Symbol->Name = QN;
 
-  Type::outputPost(OS, *Pointee);
-}
+  if (Symbol->kind() == NodeKind::VariableSymbol) {
+    DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
 
-Type *FunctionType::clone(ArenaAllocator &Arena) const {
-  return Arena.alloc<FunctionType>(*this);
-}
+    // Older versions of clang mangled this type of symbol incorrectly.  They
+    // would omit the leading ? and they would only emit a single @ at the end.
+    // The correct mangling is a leading ? and 2 trailing @ signs.  Handle
+    // both cases.
+    int AtCount = IsKnownStaticDataMember ? 2 : 1;
+    for (int I = 0; I < AtCount; ++I) {
+      if (MangledName.consumeFront('@'))
+        continue;
+      Error = true;
+      return nullptr;
+    }
 
-void FunctionType::outputPre(OutputStream &OS) {
-  if (!(FunctionClass & Global)) {
-    if (FunctionClass & Static)
-      OS << "static ";
-  }
+    FSN = demangleFunctionEncoding(MangledName);
+    FSN->Name = synthesizeQualifiedName(Arena, DSIN);
+  } else {
+    if (IsKnownStaticDataMember) {
+      // This was supposed to be a static data member, but we got a function.
+      Error = true;
+      return nullptr;
+    }
 
-  if (ReturnType) {
-    Type::outputPre(OS, *ReturnType);
-    OS << " ";
+    FSN = static_cast<FunctionSymbolNode *>(Symbol);
+    DSIN->Name = Symbol->Name;
+    FSN->Name = synthesizeQualifiedName(Arena, DSIN);
   }
 
-  // Function pointers print the calling convention as void (__cdecl *)(params)
-  // rather than void __cdecl (*)(params).  So we need to let the PointerType
-  // class handle this.
-  if (!IsFunctionPointer)
-    outputCallingConvention(OS, CallConvention);
+  return FSN;
 }
 
-void FunctionType::outputPost(OutputStream &OS) {
-  OS << "(";
-  outputParameterList(OS, Params);
-  OS << ")";
-  if (Quals & Q_Const)
-    OS << " const";
-  if (Quals & Q_Volatile)
-    OS << " volatile";
-  if (Quals & Q_Restrict)
-    OS << " __restrict";
-  if (Quals & Q_Unaligned)
-    OS << " __unaligned";
-
-  if (RefKind == ReferenceKind::LValueRef)
-    OS << " &";
-  else if (RefKind == ReferenceKind::RValueRef)
-    OS << " &&";
+SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
+  SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
+  if (SIK == SpecialIntrinsicKind::None)
+    return nullptr;
 
-  if (ReturnType)
-    Type::outputPost(OS, *ReturnType);
-  return;
+  switch (SIK) {
+  case SpecialIntrinsicKind::StringLiteralSymbol:
+    return demangleStringLiteral(MangledName);
+  case SpecialIntrinsicKind::Vftable:
+  case SpecialIntrinsicKind::Vbtable:
+  case SpecialIntrinsicKind::LocalVftable:
+  case SpecialIntrinsicKind::RttiCompleteObjLocator:
+    return demangleSpecialTableSymbolNode(MangledName, SIK);
+  case SpecialIntrinsicKind::VcallThunk:
+    return demangleVcallThunkNode(MangledName);
+  case SpecialIntrinsicKind::LocalStaticGuard:
+    return demangleLocalStaticGuard(MangledName);
+  case SpecialIntrinsicKind::RttiTypeDescriptor: {
+    TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
+    if (Error)
+      break;
+    if (!MangledName.consumeFront("@8"))
+      break;
+    if (!MangledName.empty())
+      break;
+    return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
+  }
+  case SpecialIntrinsicKind::RttiBaseClassArray:
+    return demangleUntypedVariable(Arena, MangledName,
+                                   "`RTTI Base Class Array'");
+  case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
+    return demangleUntypedVariable(Arena, MangledName,
+                                   "`RTTI Class Hierarchy Descriptor'");
+  case SpecialIntrinsicKind::RttiBaseClassDescriptor:
+    return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
+  case SpecialIntrinsicKind::DynamicInitializer:
+    return demangleInitFiniStub(MangledName, false);
+  case SpecialIntrinsicKind::DynamicAtexitDestructor:
+    return demangleInitFiniStub(MangledName, true);
+  default:
+    break;
+  }
+  Error = true;
+  return nullptr;
 }
 
-Type *UdtType::clone(ArenaAllocator &Arena) const {
-  return Arena.alloc<UdtType>(*this);
+IdentifierNode *
+Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
+  assert(MangledName.startsWith('?'));
+  MangledName = MangledName.dropFront();
+
+  if (MangledName.consumeFront("__"))
+    return demangleFunctionIdentifierCode(
+        MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
+  else if (MangledName.consumeFront("_"))
+    return demangleFunctionIdentifierCode(MangledName,
+                                          FunctionIdentifierCodeGroup::Under);
+  return demangleFunctionIdentifierCode(MangledName,
+                                        FunctionIdentifierCodeGroup::Basic);
+}
+
+StructorIdentifierNode *
+Demangler::demangleStructorIdentifier(StringView &MangledName,
+                                      bool IsDestructor) {
+  StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
+  N->IsDestructor = IsDestructor;
+  return N;
+}
+
+ConversionOperatorIdentifierNode *
+Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
+  ConversionOperatorIdentifierNode *N =
+      Arena.alloc<ConversionOperatorIdentifierNode>();
+  return N;
+}
+
+LiteralOperatorIdentifierNode *
+Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
+  LiteralOperatorIdentifierNode *N =
+      Arena.alloc<LiteralOperatorIdentifierNode>();
+  N->Name = demangleSimpleString(MangledName, false);
+  return N;
+}
+
+static IntrinsicFunctionKind
+translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
+  // Not all ? identifiers are intrinsics *functions*.  This function only maps
+  // operator codes for the special functions, all others are handled elsewhere,
+  // hence the IFK::None entries in the table.
+  using IFK = IntrinsicFunctionKind;
+  static IFK Basic[36] = {
+      IFK::None,             // ?0 # Foo::Foo()
+      IFK::None,             // ?1 # Foo::~Foo()
+      IFK::New,              // ?2 # operator new
+      IFK::Delete,           // ?3 # operator delete
+      IFK::Assign,           // ?4 # operator=
+      IFK::RightShift,       // ?5 # operator>>
+      IFK::LeftShift,        // ?6 # operator<<
+      IFK::LogicalNot,       // ?7 # operator!
+      IFK::Equals,           // ?8 # operator==
+      IFK::NotEquals,        // ?9 # operator!=
+      IFK::ArraySubscript,   // ?A # operator[]
+      IFK::None,             // ?B # Foo::operator <type>()
+      IFK::Pointer,          // ?C # operator->
+      IFK::Dereference,      // ?D # operator*
+      IFK::Increment,        // ?E # operator++
+      IFK::Decrement,        // ?F # operator--
+      IFK::Minus,            // ?G # operator-
+      IFK::Plus,             // ?H # operator+
+      IFK::BitwiseAnd,       // ?I # operator&
+      IFK::MemberPointer,    // ?J # operator->*
+      IFK::Divide,           // ?K # operator/
+      IFK::Modulus,          // ?L # operator%
+      IFK::LessThan,         // ?M operator<
+      IFK::LessThanEqual,    // ?N operator<=
+      IFK::GreaterThan,      // ?O operator>
+      IFK::GreaterThanEqual, // ?P operator>=
+      IFK::Comma,            // ?Q operator,
+      IFK::Parens,           // ?R operator()
+      IFK::BitwiseNot,       // ?S operator~
+      IFK::BitwiseXor,       // ?T operator^
+      IFK::BitwiseOr,        // ?U operator|
+      IFK::LogicalAnd,       // ?V operator&&
+      IFK::LogicalOr,        // ?W operator||
+      IFK::TimesEqual,       // ?X operator*=
+      IFK::PlusEqual,        // ?Y operator+=
+      IFK::MinusEqual,       // ?Z operator-=
+  };
+  static IFK Under[36] = {
+      IFK::DivEqual,           // ?_0 operator/=
+      IFK::ModEqual,           // ?_1 operator%=
+      IFK::RshEqual,           // ?_2 operator>>=
+      IFK::LshEqual,           // ?_3 operator<<=
+      IFK::BitwiseAndEqual,    // ?_4 operator&=
+      IFK::BitwiseOrEqual,     // ?_5 operator|=
+      IFK::BitwiseXorEqual,    // ?_6 operator^=
+      IFK::None,               // ?_7 # vftable
+      IFK::None,               // ?_8 # vbtable
+      IFK::None,               // ?_9 # vcall
+      IFK::None,               // ?_A # typeof
+      IFK::None,               // ?_B # local static guard
+      IFK::None,               // ?_C # string literal
+      IFK::VbaseDtor,          // ?_D # vbase destructor
+      IFK::VecDelDtor,         // ?_E # vector deleting destructor
+      IFK::DefaultCtorClosure, // ?_F # default constructor closure
+      IFK::ScalarDelDtor,      // ?_G # scalar deleting destructor
+      IFK::VecCtorIter,        // ?_H # vector constructor iterator
+      IFK::VecDtorIter,        // ?_I # vector destructor iterator
+      IFK::VecVbaseCtorIter,   // ?_J # vector vbase constructor iterator
+      IFK::VdispMap,           // ?_K # virtual displacement map
+      IFK::EHVecCtorIter,      // ?_L # eh vector constructor iterator
+      IFK::EHVecDtorIter,      // ?_M # eh vector destructor iterator
+      IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
+      IFK::CopyCtorClosure,    // ?_O # copy constructor closure
+      IFK::None,               // ?_P<name> # udt returning <name>
+      IFK::None,               // ?_Q # <unknown>
+      IFK::None,               // ?_R0 - ?_R4 # RTTI Codes
+      IFK::None,               // ?_S # local vftable
+      IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
+      IFK::ArrayNew,                // ?_U operator new[]
+      IFK::ArrayDelete,             // ?_V operator delete[]
+      IFK::None,                    // ?_W <unused>
+      IFK::None,                    // ?_X <unused>
+      IFK::None,                    // ?_Y <unused>
+      IFK::None,                    // ?_Z <unused>
+  };
+  static IFK DoubleUnder[36] = {
+      IFK::None,                       // ?__0 <unused>
+      IFK::None,                       // ?__1 <unused>
+      IFK::None,                       // ?__2 <unused>
+      IFK::None,                       // ?__3 <unused>
+      IFK::None,                       // ?__4 <unused>
+      IFK::None,                       // ?__5 <unused>
+      IFK::None,                       // ?__6 <unused>
+      IFK::None,                       // ?__7 <unused>
+      IFK::None,                       // ?__8 <unused>
+      IFK::None,                       // ?__9 <unused>
+      IFK::ManVectorCtorIter,          // ?__A managed vector ctor iterator
+      IFK::ManVectorDtorIter,          // ?__B managed vector dtor iterator
+      IFK::EHVectorCopyCtorIter,       // ?__C EH vector copy ctor iterator
+      IFK::EHVectorVbaseCopyCtorIter,  // ?__D EH vector vbase copy ctor iter
+      IFK::None,                       // ?__E dynamic initializer for `T'
+      IFK::None,                       // ?__F dynamic atexit destructor for `T'
+      IFK::VectorCopyCtorIter,         // ?__G vector copy constructor iter
+      IFK::VectorVbaseCopyCtorIter,    // ?__H vector vbase copy ctor iter
+      IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
+                                       // iter
+      IFK::None,                       // ?__J local static thread guard
+      IFK::None,                       // ?__K operator ""_name
+      IFK::CoAwait,                    // ?__L co_await
+      IFK::None,                       // ?__M <unused>
+      IFK::None,                       // ?__N <unused>
+      IFK::None,                       // ?__O <unused>
+      IFK::None,                       // ?__P <unused>
+      IFK::None,                       // ?__Q <unused>
+      IFK::None,                       // ?__R <unused>
+      IFK::None,                       // ?__S <unused>
+      IFK::None,                       // ?__T <unused>
+      IFK::None,                       // ?__U <unused>
+      IFK::None,                       // ?__V <unused>
+      IFK::None,                       // ?__W <unused>
+      IFK::None,                       // ?__X <unused>
+      IFK::None,                       // ?__Y <unused>
+      IFK::None,                       // ?__Z <unused>
+  };
+
+  int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
+  switch (Group) {
+  case FunctionIdentifierCodeGroup::Basic:
+    return Basic[Index];
+  case FunctionIdentifierCodeGroup::Under:
+    return Under[Index];
+  case FunctionIdentifierCodeGroup::DoubleUnder:
+    return DoubleUnder[Index];
+  }
+  LLVM_BUILTIN_UNREACHABLE;
 }
 
-void UdtType::outputPre(OutputStream &OS) {
-  switch (Prim) {
-  case PrimTy::Class:
-    OS << "class ";
-    break;
-  case PrimTy::Struct:
-    OS << "struct ";
-    break;
-  case PrimTy::Union:
-    OS << "union ";
-    break;
-  case PrimTy::Enum:
-    OS << "enum ";
+IdentifierNode *
+Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
+                                          FunctionIdentifierCodeGroup Group) {
+  switch (Group) {
+  case FunctionIdentifierCodeGroup::Basic:
+    switch (char CH = MangledName.popFront()) {
+    case '0':
+    case '1':
+      return demangleStructorIdentifier(MangledName, CH == '1');
+    case 'B':
+      return demangleConversionOperatorIdentifier(MangledName);
+    default:
+      return Arena.alloc<IntrinsicFunctionIdentifierNode>(
+          translateIntrinsicFunctionCode(CH, Group));
+    }
     break;
-  default:
-    assert(false && "Not a udt type!");
+  case FunctionIdentifierCodeGroup::Under:
+    return Arena.alloc<IntrinsicFunctionIdentifierNode>(
+        translateIntrinsicFunctionCode(MangledName.popFront(), Group));
+  case FunctionIdentifierCodeGroup::DoubleUnder:
+    switch (char CH = MangledName.popFront()) {
+    case 'K':
+      return demangleLiteralOperatorIdentifier(MangledName);
+    default:
+      return Arena.alloc<IntrinsicFunctionIdentifierNode>(
+          translateIntrinsicFunctionCode(CH, Group));
+    }
   }
+  // No Mangling Yet:      Spaceship,                    // operator<=>
 
-  outputName(OS, UdtName);
+  return nullptr;
 }
 
-Type *ArrayType::clone(ArenaAllocator &Arena) const {
-  return Arena.alloc<ArrayType>(*this);
-}
+SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
+                                             QualifiedNameNode *Name) {
+  // Read a variable.
+  switch (MangledName.front()) {
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4': {
+    StorageClass SC = demangleVariableStorageClass(MangledName);
+    return demangleVariableEncoding(MangledName, SC);
+  }
+  case '8':
+    return nullptr;
+  }
+  FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
 
-void ArrayType::outputPre(OutputStream &OS) {
-  Type::outputPre(OS, *ElementType);
+  IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
+  if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
+    ConversionOperatorIdentifierNode *COIN =
+        static_cast<ConversionOperatorIdentifierNode *>(UQN);
+    COIN->TargetType = FSN->Signature->ReturnType;
+  }
+  return FSN;
 }
 
-void ArrayType::outputPost(OutputStream &OS) {
-  if (ArrayDimension > 0)
-    OS << "[" << ArrayDimension << "]";
-  if (NextDimension)
-    Type::outputPost(OS, *NextDimension);
-  else if (ElementType)
-    Type::outputPost(OS, *ElementType);
-}
+// Parser entry point.
+SymbolNode *Demangler::parse(StringView &MangledName) {
+  // We can't demangle MD5 names, just output them as-is.
+  // Also, MSVC-style mangled symbols must start with '?'.
+  if (MangledName.startsWith("??@")) {
+    // This is an MD5 mangled name.  We can't demangle it, just return the
+    // mangled name.
+    SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
+    S->Name = synthesizeQualifiedName(Arena, MangledName);
+    return S;
+  }
 
-struct Symbol {
-  Name *SymbolName = nullptr;
-  Type *SymbolType = nullptr;
-};
+  if (!MangledName.startsWith('?')) {
+    Error = true;
+    return nullptr;
+  }
 
-} // namespace
-
-namespace {
-
-// Demangler class takes the main role in demangling symbols.
-// It has a set of functions to parse mangled symbols into Type instances.
-// It also has a set of functions to cnovert Type instances to strings.
-class Demangler {
-public:
-  Demangler() = default;
-
-  // You are supposed to call parse() first and then check if error is true.  If
-  // it is false, call output() to write the formatted name to the given stream.
-  Symbol *parse(StringView &MangledName);
-  void output(const Symbol *S, OutputStream &OS);
-
-  // True if an error occurred.
-  bool Error = false;
-
-private:
-  Type *demangleVariableEncoding(StringView &MangledName);
-  Type *demangleFunctionEncoding(StringView &MangledName);
-
-  Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
-
-  // Parser functions. This is a recursive-descent parser.
-  Type *demangleType(StringView &MangledName, QualifierMangleMode QMM);
-  Type *demangleBasicType(StringView &MangledName);
-  UdtType *demangleClassType(StringView &MangledName);
-  PointerType *demanglePointerType(StringView &MangledName);
-  MemberPointerType *demangleMemberPointerType(StringView &MangledName);
-  FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals,
-                                     bool IsFunctionPointer);
-
-  ArrayType *demangleArrayType(StringView &MangledName);
-
-  TemplateParams *demangleTemplateParameterList(StringView &MangledName);
-  FunctionParams demangleFunctionParameterList(StringView &MangledName);
-
-  int demangleNumber(StringView &MangledName);
-
-  void memorizeString(StringView s);
-
-  /// Allocate a copy of \p Borrowed into memory that we own.
-  StringView copyString(StringView Borrowed);
-
-  Name *demangleFullyQualifiedTypeName(StringView &MangledName);
-  Name *demangleFullyQualifiedSymbolName(StringView &MangledName);
-
-  Name *demangleUnqualifiedTypeName(StringView &MangledName);
-  Name *demangleUnqualifiedSymbolName(StringView &MangledName);
-
-  Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName);
-  Name *demangleNameScopePiece(StringView &MangledName);
-
-  Name *demangleBackRefName(StringView &MangledName);
-  Name *demangleClassTemplateName(StringView &MangledName);
-  Name *demangleOperatorName(StringView &MangledName);
-  Name *demangleSimpleName(StringView &MangledName, bool Memorize);
-  Name *demangleAnonymousNamespaceName(StringView &MangledName);
-  Name *demangleLocallyScopedNamePiece(StringView &MangledName);
-
-  StringView demangleSimpleString(StringView &MangledName, bool Memorize);
-
-  FuncClass demangleFunctionClass(StringView &MangledName);
-  CallingConv demangleCallingConvention(StringView &MangledName);
-  StorageClass demangleVariableStorageClass(StringView &MangledName);
-  ReferenceKind demangleReferenceKind(StringView &MangledName);
-  void demangleThrowSpecification(StringView &MangledName);
-
-  std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
-
-  // Memory allocator.
-  ArenaAllocator Arena;
-
-  // A single type uses one global back-ref table for all function params.
-  // This means back-refs can even go "into" other types.  Examples:
-  //
-  //  // Second int* is a back-ref to first.
-  //  void foo(int *, int*);
-  //
-  //  // Second int* is not a back-ref to first (first is not a function param).
-  //  int* foo(int*);
-  //
-  //  // Second int* is a back-ref to first (ALL function types share the same
-  //  // back-ref map.
-  //  using F = void(*)(int*);
-  //  F G(int *);
-  Type *FunctionParamBackRefs[10];
-  size_t FunctionParamBackRefCount = 0;
-
-  // The first 10 BackReferences in a mangled name can be back-referenced by
-  // special name @[0-9]. This is a storage for the first 10 BackReferences.
-  StringView BackReferences[10];
-  size_t BackRefCount = 0;
-};
-} // namespace
+  MangledName.consumeFront('?');
 
-StringView Demangler::copyString(StringView Borrowed) {
-  char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
-  std::strcpy(Stable, Borrowed.begin());
+  // ?$ is a template instantiation, but all other names that start with ? are
+  // operators / special names.
+  if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
+    return SI;
 
-  return {Stable, Borrowed.size()};
-}
+  // What follows is a main symbol name. This may include namespaces or class
+  // back references.
+  QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
+  if (Error)
+    return nullptr;
 
-// Parser entry point.
-Symbol *Demangler::parse(StringView &MangledName) {
-  Symbol *S = Arena.alloc<Symbol>();
-
-  // MSVC-style mangled symbols must start with '?'.
-  if (!MangledName.consumeFront("?")) {
-    S->SymbolName = Arena.alloc<Name>();
-    S->SymbolName->Str = MangledName;
-    S->SymbolType = Arena.alloc<Type>();
-    S->SymbolType->Prim = PrimTy::Unknown;
-    return S;
+  SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
+  if (Symbol) {
+    Symbol->Name = QN;
   }
 
-  // What follows is a main symbol name. This may include
-  // namespaces or class BackReferences.
-  S->SymbolName = demangleFullyQualifiedSymbolName(MangledName);
+  if (Error)
+    return nullptr;
 
-  // Read a variable.
-  S->SymbolType = startsWithDigit(MangledName)
-                      ? demangleVariableEncoding(MangledName)
-                      : demangleFunctionEncoding(MangledName);
+  return Symbol;
+}
 
-  return S;
+TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
+  if (!MangledName.consumeFront(".?A"))
+    return nullptr;
+  MangledName.consumeFront(".?A");
+  if (MangledName.empty())
+    return nullptr;
+
+  return demangleClassType(MangledName);
 }
 
 // <type-encoding> ::= <storage-class> <variable-type>
@@ -994,44 +752,41 @@ Symbol *Demangler::parse(StringView &MangledName) {
 //                 ::= 3  # global
 //                 ::= 4  # static local
 
-Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
-  StorageClass SC = demangleVariableStorageClass(MangledName);
+VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
+                                                        StorageClass SC) {
+  VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
 
-  Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop);
-
-  Ty->Storage = SC;
+  VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
+  VSN->SC = SC;
 
   // <variable-type> ::= <type> <cvr-qualifiers>
   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
-  switch (Ty->Prim) {
-  case PrimTy::Ptr:
-  case PrimTy::MemberPtr: {
+  switch (VSN->Type->kind()) {
+  case NodeKind::PointerType: {
+    PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
+
     Qualifiers ExtraChildQuals = Q_None;
-    Ty->Quals =
-        Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName));
+    PTN->Quals = Qualifiers(VSN->Type->Quals |
+                            demanglePointerExtQualifiers(MangledName));
 
     bool IsMember = false;
     std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
 
-    if (Ty->Prim == PrimTy::MemberPtr) {
-      assert(IsMember);
-      Name *BackRefName = demangleFullyQualifiedTypeName(MangledName);
+    if (PTN->ClassParent) {
+      QualifiedNameNode *BackRefName =
+          demangleFullyQualifiedTypeName(MangledName);
       (void)BackRefName;
-      MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
-      MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
-    } else {
-      PointerType *PTy = static_cast<PointerType *>(Ty);
-      PTy->Pointee->Quals = Qualifiers(PTy->Pointee->Quals | ExtraChildQuals);
     }
+    PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
 
     break;
   }
   default:
-    Ty->Quals = demangleQualifiers(MangledName).first;
+    VSN->Type->Quals = demangleQualifiers(MangledName).first;
     break;
   }
 
-  return Ty;
+  return VSN;
 }
 
 // Sometimes numbers are encoded in mangled symbols. For example,
@@ -1045,21 +800,21 @@ Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
 //                        ::= <hex digit>+ @  # when Numbrer == 0 or >= 10
 //
 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
-int Demangler::demangleNumber(StringView &MangledName) {
-  bool neg = MangledName.consumeFront("?");
+std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
+  bool IsNegative = MangledName.consumeFront('?');
 
   if (startsWithDigit(MangledName)) {
-    int32_t Ret = MangledName[0] - '0' + 1;
+    uint64_t Ret = MangledName[0] - '0' + 1;
     MangledName = MangledName.dropFront(1);
-    return neg ? -Ret : Ret;
+    return {Ret, IsNegative};
   }
 
-  int Ret = 0;
+  uint64_t Ret = 0;
   for (size_t i = 0; i < MangledName.size(); ++i) {
     char C = MangledName[i];
     if (C == '@') {
       MangledName = MangledName.dropFront(i + 1);
-      return neg ? -Ret : Ret;
+      return {Ret, IsNegative};
     }
     if ('A' <= C && C <= 'P') {
       Ret = (Ret << 4) + (C - 'A');
@@ -1069,191 +824,457 @@ int Demangler::demangleNumber(StringView &MangledName) {
   }
 
   Error = true;
-  return 0;
+  return {0ULL, false};
+}
+
+uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
+  bool IsNegative = false;
+  uint64_t Number = 0;
+  std::tie(Number, IsNegative) = demangleNumber(MangledName);
+  if (IsNegative)
+    Error = true;
+  return Number;
+}
+
+int64_t Demangler::demangleSigned(StringView &MangledName) {
+  bool IsNegative = false;
+  uint64_t Number = 0;
+  std::tie(Number, IsNegative) = demangleNumber(MangledName);
+  if (Number > INT64_MAX)
+    Error = true;
+  int64_t I = static_cast<int64_t>(Number);
+  return IsNegative ? -I : I;
 }
 
 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
 // Memorize it.
 void Demangler::memorizeString(StringView S) {
-  if (BackRefCount >= sizeof(BackReferences) / sizeof(*BackReferences))
+  if (Backrefs.NamesCount >= BackrefContext::Max)
     return;
-  for (size_t i = 0; i < BackRefCount; ++i)
-    if (S == BackReferences[i])
+  for (size_t i = 0; i < Backrefs.NamesCount; ++i)
+    if (S == Backrefs.Names[i]->Name)
       return;
-  BackReferences[BackRefCount++] = S;
+  NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
+  N->Name = S;
+  Backrefs.Names[Backrefs.NamesCount++] = N;
 }
 
-Name *Demangler::demangleBackRefName(StringView &MangledName) {
+NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
   assert(startsWithDigit(MangledName));
 
   size_t I = MangledName[0] - '0';
-  if (I >= BackRefCount) {
+  if (I >= Backrefs.NamesCount) {
     Error = true;
     return nullptr;
   }
 
   MangledName = MangledName.dropFront();
-  Name *Node = Arena.alloc<Name>();
-  Node->Str = BackReferences[I];
-  return Node;
+  return Backrefs.Names[I];
 }
 
-Name *Demangler::demangleClassTemplateName(StringView &MangledName) {
-  assert(MangledName.startsWith("?$"));
-  MangledName.consumeFront("?$");
-
-  Name *Node = demangleSimpleName(MangledName, false);
-  Node->TParams = demangleTemplateParameterList(MangledName);
-
+void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
   // Render this class template name into a string buffer so that we can
   // memorize it for the purpose of back-referencing.
-  OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
-  outputName(OS, Node);
+  OutputStream OS;
+  if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+    // FIXME: Propagate out-of-memory as an error?
+    std::terminate();
+  Identifier->output(OS, OF_Default);
   OS << '\0';
   char *Name = OS.getBuffer();
 
   StringView Owned = copyString(Name);
   memorizeString(Owned);
   std::free(Name);
-
-  return Node;
 }
 
-Name *Demangler::demangleOperatorName(StringView &MangledName) {
-  assert(MangledName.startsWith('?'));
-  MangledName.consumeFront('?');
+IdentifierNode *
+Demangler::demangleTemplateInstantiationName(StringView &MangledName,
+                                             NameBackrefBehavior NBB) {
+  assert(MangledName.startsWith("?$"));
+  MangledName.consumeFront("?$");
 
-  auto NameString = [this, &MangledName]() -> StringView {
-    switch (MangledName.popFront()) {
-    case '0':
-      return "ctor";
-    case '1':
-      return "dtor";
-    case '2':
-      return " new";
-    case '3':
-      return " delete";
-    case '4':
-      return "=";
-    case '5':
-      return ">>";
-    case '6':
-      return "<<";
-    case '7':
-      return "!";
-    case '8':
-      return "==";
-    case '9':
-      return "!=";
-    case 'A':
-      return "[]";
-    case 'C':
-      return "->";
-    case 'D':
-      return "*";
-    case 'E':
-      return "++";
-    case 'F':
-      return "--";
-    case 'G':
-      return "-";
-    case 'H':
-      return "+";
-    case 'I':
-      return "&";
-    case 'J':
-      return "->*";
-    case 'K':
-      return "/";
-    case 'L':
-      return "%";
-    case 'M':
-      return "<";
-    case 'N':
-      return "<=";
-    case 'O':
-      return ">";
-    case 'P':
-      return ">=";
-    case 'Q':
-      return ",";
-    case 'R':
-      return "()";
-    case 'S':
-      return "~";
-    case 'T':
-      return "^";
-    case 'U':
-      return "|";
-    case 'V':
-      return "&&";
-    case 'W':
-      return "||";
-    case 'X':
-      return "*=";
-    case 'Y':
-      return "+=";
-    case 'Z':
-      return "-=";
-    case '_': {
-      if (MangledName.empty())
-        break;
+  BackrefContext OuterContext;
+  std::swap(OuterContext, Backrefs);
 
-      switch (MangledName.popFront()) {
-      case '0':
-        return "/=";
-      case '1':
-        return "%=";
-      case '2':
-        return ">>=";
-      case '3':
-        return "<<=";
-      case '4':
-        return "&=";
-      case '5':
-        return "|=";
-      case '6':
-        return "^=";
-      case 'U':
-        return " new[]";
-      case 'V':
-        return " delete[]";
-      case '_':
-        if (MangledName.consumeFront("L"))
-          return " co_await";
-        if (MangledName.consumeFront("K")) {
-          size_t EndPos = MangledName.find('@');
-          if (EndPos == StringView::npos)
-            break;
-          StringView OpName = demangleSimpleString(MangledName, false);
-          size_t FullSize = OpName.size() + 3; // <space>""OpName
-          char *Buffer = Arena.allocUnalignedBuffer(FullSize);
-          Buffer[0] = ' ';
-          Buffer[1] = '"';
-          Buffer[2] = '"';
-          std::memcpy(Buffer + 3, OpName.begin(), OpName.size());
-          return {Buffer, FullSize};
-        }
-      }
-    }
-    }
-    Error = true;
-    return "";
-  };
+  IdentifierNode *Identifier =
+      demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
+  if (!Error)
+    Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
 
-  Name *Node = Arena.alloc<Name>();
-  Node->Operator = NameString();
-  return Node;
+  std::swap(OuterContext, Backrefs);
+  if (Error)
+    return nullptr;
+
+  if (NBB & NBB_Template)
+    memorizeIdentifier(Identifier);
+
+  return Identifier;
 }
 
-Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) {
+NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
+                                                   bool Memorize) {
   StringView S = demangleSimpleString(MangledName, Memorize);
   if (Error)
     return nullptr;
 
-  Name *Node = Arena.alloc<Name>();
-  Node->Str = S;
-  return Node;
+  NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
+  Name->Name = S;
+  return Name;
+}
+
+static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
+
+static uint8_t rebasedHexDigitToNumber(char C) {
+  assert(isRebasedHexDigit(C));
+  return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
+}
+
+uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
+  if (!MangledName.startsWith('?'))
+    return MangledName.popFront();
+
+  MangledName = MangledName.dropFront();
+  if (MangledName.empty())
+    goto CharLiteralError;
+
+  if (MangledName.consumeFront('$')) {
+    // Two hex digits
+    if (MangledName.size() < 2)
+      goto CharLiteralError;
+    StringView Nibbles = MangledName.substr(0, 2);
+    if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
+      goto CharLiteralError;
+    // Don't append the null terminator.
+    uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
+    uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
+    MangledName = MangledName.dropFront(2);
+    return (C1 << 4) | C2;
+  }
+
+  if (startsWithDigit(MangledName)) {
+    const char *Lookup = ",/\\:. \n\t'-";
+    char C = Lookup[MangledName[0] - '0'];
+    MangledName = MangledName.dropFront();
+    return C;
+  }
+
+  if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
+    char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
+                       '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
+                       '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
+                       '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
+    char C = Lookup[MangledName[0] - 'a'];
+    MangledName = MangledName.dropFront();
+    return C;
+  }
+
+  if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
+    char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
+                       '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
+                       '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
+                       '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
+    char C = Lookup[MangledName[0] - 'A'];
+    MangledName = MangledName.dropFront();
+    return C;
+  }
+
+CharLiteralError:
+  Error = true;
+  return '\0';
+}
+
+wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
+  uint8_t C1, C2;
+
+  C1 = demangleCharLiteral(MangledName);
+  if (Error)
+    goto WCharLiteralError;
+  C2 = demangleCharLiteral(MangledName);
+  if (Error)
+    goto WCharLiteralError;
+
+  return ((wchar_t)C1 << 8) | (wchar_t)C2;
+
+WCharLiteralError:
+  Error = true;
+  return L'\0';
+}
+
+static void writeHexDigit(char *Buffer, uint8_t Digit) {
+  assert(Digit <= 15);
+  *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
+}
+
+static void outputHex(OutputStream &OS, unsigned C) {
+  if (C == 0) {
+    OS << "\\x00";
+    return;
+  }
+  // It's easier to do the math if we can work from right to left, but we need
+  // to print the numbers from left to right.  So render this into a temporary
+  // buffer first, then output the temporary buffer.  Each byte is of the form
+  // \xAB, which means that each byte needs 4 characters.  Since there are at
+  // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
+  char TempBuffer[17];
+
+  ::memset(TempBuffer, 0, sizeof(TempBuffer));
+  constexpr int MaxPos = 15;
+
+  int Pos = MaxPos - 1;
+  while (C != 0) {
+    for (int I = 0; I < 2; ++I) {
+      writeHexDigit(&TempBuffer[Pos--], C % 16);
+      C /= 16;
+    }
+    TempBuffer[Pos--] = 'x';
+    TempBuffer[Pos--] = '\\';
+    assert(Pos >= 0);
+  }
+  OS << StringView(&TempBuffer[Pos + 1]);
+}
+
+static void outputEscapedChar(OutputStream &OS, unsigned C) {
+  switch (C) {
+  case '\'': // single quote
+    OS << "\\\'";
+    return;
+  case '\"': // double quote
+    OS << "\\\"";
+    return;
+  case '\\': // backslash
+    OS << "\\\\";
+    return;
+  case '\a': // bell
+    OS << "\\a";
+    return;
+  case '\b': // backspace
+    OS << "\\b";
+    return;
+  case '\f': // form feed
+    OS << "\\f";
+    return;
+  case '\n': // new line
+    OS << "\\n";
+    return;
+  case '\r': // carriage return
+    OS << "\\r";
+    return;
+  case '\t': // tab
+    OS << "\\t";
+    return;
+  case '\v': // vertical tab
+    OS << "\\v";
+    return;
+  default:
+    break;
+  }
+
+  if (C > 0x1F && C < 0x7F) {
+    // Standard ascii char.
+    OS << (char)C;
+    return;
+  }
+
+  outputHex(OS, C);
+}
+
+static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
+  const uint8_t *End = StringBytes + Length - 1;
+  unsigned Count = 0;
+  while (Length > 0 && *End == 0) {
+    --Length;
+    --End;
+    ++Count;
+  }
+  return Count;
+}
+
+static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
+                                   unsigned Length) {
+  unsigned Result = 0;
+  for (unsigned I = 0; I < Length; ++I) {
+    if (*StringBytes++ == 0)
+      ++Result;
+  }
+  return Result;
+}
+
+static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
+                                  unsigned NumBytes) {
+  assert(NumBytes > 0);
+
+  // If the number of bytes is odd, this is guaranteed to be a char string.
+  if (NumBytes % 2 == 1)
+    return 1;
+
+  // All strings can encode at most 32 bytes of data.  If it's less than that,
+  // then we encoded the entire string.  In this case we check for a 1-byte,
+  // 2-byte, or 4-byte null terminator.
+  if (NumBytes < 32) {
+    unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
+    if (TrailingNulls >= 4)
+      return 4;
+    if (TrailingNulls >= 2)
+      return 2;
+    return 1;
+  }
+
+  // The whole string was not able to be encoded.  Try to look at embedded null
+  // terminators to guess.  The heuristic is that we count all embedded null
+  // terminators.  If more than 2/3 are null, it's a char32.  If more than 1/3
+  // are null, it's a char16.  Otherwise it's a char8.  This obviously isn't
+  // perfect and is biased towards languages that have ascii alphabets, but this
+  // was always going to be best effort since the encoding is lossy.
+  unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
+  if (Nulls >= 2 * NumChars / 3)
+    return 4;
+  if (Nulls >= NumChars / 3)
+    return 2;
+  return 1;
+}
+
+static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
+                                    unsigned CharIndex, unsigned CharBytes) {
+  assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
+  unsigned Offset = CharIndex * CharBytes;
+  unsigned Result = 0;
+  StringBytes = StringBytes + Offset;
+  for (unsigned I = 0; I < CharBytes; ++I) {
+    unsigned C = static_cast<unsigned>(StringBytes[I]);
+    Result |= C << (8 * I);
+  }
+  return Result;
+}
+
+FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
+  FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
+  VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
+  FSN->Signature = Arena.alloc<ThunkSignatureNode>();
+  FSN->Signature->FunctionClass = FC_NoParameterList;
+
+  FSN->Name = demangleNameScopeChain(MangledName, VTIN);
+  if (!Error)
+    Error = !MangledName.consumeFront("$B");
+  if (!Error)
+    VTIN->OffsetInVTable = demangleUnsigned(MangledName);
+  if (!Error)
+    Error = !MangledName.consumeFront('A');
+  if (!Error)
+    FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
+  return (Error) ? nullptr : FSN;
+}
+
+EncodedStringLiteralNode *
+Demangler::demangleStringLiteral(StringView &MangledName) {
+  // This function uses goto, so declare all variables up front.
+  OutputStream OS;
+  StringView CRC;
+  uint64_t StringByteSize;
+  bool IsWcharT = false;
+  bool IsNegative = false;
+  size_t CrcEndPos = 0;
+  char *ResultBuffer = nullptr;
+
+  EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
+
+  // Prefix indicating the beginning of a string literal
+  if (!MangledName.consumeFront("@_"))
+    goto StringLiteralError;
+  if (MangledName.empty())
+    goto StringLiteralError;
+
+  // Char Type (regular or wchar_t)
+  switch (MangledName.popFront()) {
+  case '1':
+    IsWcharT = true;
+    LLVM_FALLTHROUGH;
+  case '0':
+    break;
+  default:
+    goto StringLiteralError;
+  }
+
+  // Encoded Length
+  std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
+  if (Error || IsNegative)
+    goto StringLiteralError;
+
+  // CRC 32 (always 8 characters plus a terminator)
+  CrcEndPos = MangledName.find('@');
+  if (CrcEndPos == StringView::npos)
+    goto StringLiteralError;
+  CRC = MangledName.substr(0, CrcEndPos);
+  MangledName = MangledName.dropFront(CrcEndPos + 1);
+  if (MangledName.empty())
+    goto StringLiteralError;
+
+  if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+    // FIXME: Propagate out-of-memory as an error?
+    std::terminate();
+  if (IsWcharT) {
+    Result->Char = CharKind::Wchar;
+    if (StringByteSize > 64)
+      Result->IsTruncated = true;
+
+    while (!MangledName.consumeFront('@')) {
+      assert(StringByteSize >= 2);
+      wchar_t W = demangleWcharLiteral(MangledName);
+      if (StringByteSize != 2 || Result->IsTruncated)
+        outputEscapedChar(OS, W);
+      StringByteSize -= 2;
+      if (Error)
+        goto StringLiteralError;
+    }
+  } else {
+    // The max byte length is actually 32, but some compilers mangled strings
+    // incorrectly, so we have to assume it can go higher.
+    constexpr unsigned MaxStringByteLength = 32 * 4;
+    uint8_t StringBytes[MaxStringByteLength];
+
+    unsigned BytesDecoded = 0;
+    while (!MangledName.consumeFront('@')) {
+      assert(StringByteSize >= 1);
+      StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
+    }
+
+    if (StringByteSize > BytesDecoded)
+      Result->IsTruncated = true;
+
+    unsigned CharBytes =
+        guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
+    assert(StringByteSize % CharBytes == 0);
+    switch (CharBytes) {
+    case 1:
+      Result->Char = CharKind::Char;
+      break;
+    case 2:
+      Result->Char = CharKind::Char16;
+      break;
+    case 4:
+      Result->Char = CharKind::Char32;
+      break;
+    default:
+      LLVM_BUILTIN_UNREACHABLE;
+    }
+    const unsigned NumChars = BytesDecoded / CharBytes;
+    for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
+      unsigned NextChar =
+          decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
+      if (CharIndex + 1 < NumChars || Result->IsTruncated)
+        outputEscapedChar(OS, NextChar);
+    }
+  }
+
+  OS << '\0';
+  ResultBuffer = OS.getBuffer();
+  Result->DecodedString = copyString(ResultBuffer);
+  std::free(ResultBuffer);
+  return Result;
+
+StringLiteralError:
+  Error = true;
+  return nullptr;
 }
 
 StringView Demangler::demangleSimpleString(StringView &MangledName,
@@ -1274,70 +1295,104 @@ StringView Demangler::demangleSimpleString(StringView &MangledName,
   return {};
 }
 
-Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
+NamedIdentifierNode *
+Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
   assert(MangledName.startsWith("?A"));
   MangledName.consumeFront("?A");
 
-  Name *Node = Arena.alloc<Name>();
-  Node->Str = "`anonymous namespace'";
-  if (MangledName.consumeFront('@'))
-    return Node;
-
-  Error = true;
-  return nullptr;
+  NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
+  Node->Name = "`anonymous namespace'";
+  size_t EndPos = MangledName.find('@');
+  if (EndPos == StringView::npos) {
+    Error = true;
+    return nullptr;
+  }
+  StringView NamespaceKey = MangledName.substr(0, EndPos);
+  memorizeString(NamespaceKey);
+  MangledName = MangledName.substr(EndPos + 1);
+  return Node;
 }
 
-Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
+NamedIdentifierNode *
+Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
   assert(startsWithLocalScopePattern(MangledName));
 
-  Name *Node = Arena.alloc<Name>();
+  NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
   MangledName.consumeFront('?');
-  int ScopeIdentifier = demangleNumber(MangledName);
+  auto Number = demangleNumber(MangledName);
+  assert(!Number.second);
 
   // One ? to terminate the number
   MangledName.consumeFront('?');
 
   assert(!Error);
-  Symbol *Scope = parse(MangledName);
+  Node *Scope = parse(MangledName);
   if (Error)
     return nullptr;
 
   // Render the parent symbol's name into a buffer.
-  OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
+  OutputStream OS;
+  if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+    // FIXME: Propagate out-of-memory as an error?
+    std::terminate();
   OS << '`';
-  output(Scope, OS);
+  Scope->output(OS, OF_Default);
   OS << '\'';
-  OS << "::`" << ScopeIdentifier << "'";
+  OS << "::`" << Number.first << "'";
   OS << '\0';
   char *Result = OS.getBuffer();
-  Node->Str = copyString(Result);
+  Identifier->Name = copyString(Result);
   std::free(Result);
-  return Node;
+  return Identifier;
 }
 
 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
-Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
-  Name *TypeName = demangleUnqualifiedTypeName(MangledName);
-  assert(TypeName);
+QualifiedNameNode *
+Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
+  IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true);
+  if (Error)
+    return nullptr;
+  assert(Identifier);
 
-  Name *QualName = demangleNameScopeChain(MangledName, TypeName);
-  assert(QualName);
-  return QualName;
+  QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
+  if (Error)
+    return nullptr;
+  assert(QN);
+  return QN;
 }
 
 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
 // Symbol names have slightly different rules regarding what can appear
 // so we separate out the implementations for flexibility.
-Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
-  Name *SymbolName = demangleUnqualifiedSymbolName(MangledName);
-  assert(SymbolName);
+QualifiedNameNode *
+Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
+  // This is the final component of a symbol name (i.e. the leftmost component
+  // of a mangled name.  Since the only possible template instantiation that
+  // can appear in this context is a function template, and since those are
+  // not saved for the purposes of name backreferences, only backref simple
+  // names.
+  IdentifierNode *Identifier =
+      demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
+  if (Error)
+    return nullptr;
 
-  Name *QualName = demangleNameScopeChain(MangledName, SymbolName);
-  assert(QualName);
-  return QualName;
+  QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
+  if (Error)
+    return nullptr;
+
+  if (Identifier->kind() == NodeKind::StructorIdentifier) {
+    StructorIdentifierNode *SIN =
+        static_cast<StructorIdentifierNode *>(Identifier);
+    assert(QN->Components->Count >= 2);
+    Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
+    SIN->Class = static_cast<IdentifierNode *>(ClassNode);
+  }
+  assert(QN);
+  return QN;
 }
 
-Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
+IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
+                                                       bool Memorize) {
   // An inner-most name can be a back-reference, because a fully-qualified name
   // (e.g. Scope + Inner) can contain other fully qualified names inside of
   // them (for example template parameters), and these nested parameters can
@@ -1346,27 +1401,29 @@ Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
     return demangleBackRefName(MangledName);
 
   if (MangledName.startsWith("?$"))
-    return demangleClassTemplateName(MangledName);
+    return demangleTemplateInstantiationName(MangledName, NBB_Template);
 
-  return demangleSimpleName(MangledName, true);
+  return demangleSimpleName(MangledName, Memorize);
 }
 
-Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) {
+IdentifierNode *
+Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
+                                         NameBackrefBehavior NBB) {
   if (startsWithDigit(MangledName))
     return demangleBackRefName(MangledName);
   if (MangledName.startsWith("?$"))
-    return demangleClassTemplateName(MangledName);
+    return demangleTemplateInstantiationName(MangledName, NBB);
   if (MangledName.startsWith('?'))
-    return demangleOperatorName(MangledName);
-  return demangleSimpleName(MangledName, true);
+    return demangleFunctionIdentifierCode(MangledName);
+  return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0);
 }
 
-Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
+IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
   if (startsWithDigit(MangledName))
     return demangleBackRefName(MangledName);
 
   if (MangledName.startsWith("?$"))
-    return demangleClassTemplateName(MangledName);
+    return demangleTemplateInstantiationName(MangledName, NBB_Template);
 
   if (MangledName.startsWith("?A"))
     return demangleAnonymousNamespaceName(MangledName);
@@ -1377,77 +1434,130 @@ Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
   return demangleSimpleName(MangledName, true);
 }
 
-Name *Demangler::demangleNameScopeChain(StringView &MangledName,
-                                        Name *UnqualifiedName) {
-  Name *Head = UnqualifiedName;
+static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
+                                          size_t Count) {
+  NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
+  N->Count = Count;
+  N->Nodes = Arena.allocArray<Node *>(Count);
+  for (size_t I = 0; I < Count; ++I) {
+    N->Nodes[I] = Head->N;
+    Head = Head->Next;
+  }
+  return N;
+}
+
+QualifiedNameNode *
+Demangler::demangleNameScopeChain(StringView &MangledName,
+                                  IdentifierNode *UnqualifiedName) {
+  NodeList *Head = Arena.alloc<NodeList>();
 
+  Head->N = UnqualifiedName;
+
+  size_t Count = 1;
   while (!MangledName.consumeFront("@")) {
+    ++Count;
+    NodeList *NewHead = Arena.alloc<NodeList>();
+    NewHead->Next = Head;
+    Head = NewHead;
+
     if (MangledName.empty()) {
       Error = true;
       return nullptr;
     }
 
     assert(!Error);
-    Name *Elem = demangleNameScopePiece(MangledName);
+    IdentifierNode *Elem = demangleNameScopePiece(MangledName);
     if (Error)
       return nullptr;
 
-    Elem->Next = Head;
-    Head = Elem;
+    Head->N = Elem;
   }
-  return Head;
+
+  QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
+  QN->Components = nodeListToNodeArray(Arena, Head, Count);
+  return QN;
 }
 
 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
-  SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
-  RestoreOnError.shouldRestore(false);
-
   switch (MangledName.popFront()) {
+  case '9':
+    return FuncClass(FC_ExternC | FC_NoParameterList);
   case 'A':
-    return Private;
+    return FC_Private;
   case 'B':
-    return FuncClass(Private | Far);
+    return FuncClass(FC_Private | FC_Far);
   case 'C':
-    return FuncClass(Private | Static);
+    return FuncClass(FC_Private | FC_Static);
   case 'D':
-    return FuncClass(Private | Static);
+    return FuncClass(FC_Private | FC_Static);
   case 'E':
-    return FuncClass(Private | Virtual);
+    return FuncClass(FC_Private | FC_Virtual);
   case 'F':
-    return FuncClass(Private | Virtual);
+    return FuncClass(FC_Private | FC_Virtual);
+  case 'G':
+    return FuncClass(FC_Private | FC_StaticThisAdjust);
+  case 'H':
+    return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
   case 'I':
-    return Protected;
+    return FuncClass(FC_Protected);
   case 'J':
-    return FuncClass(Protected | Far);
+    return FuncClass(FC_Protected | FC_Far);
   case 'K':
-    return FuncClass(Protected | Static);
+    return FuncClass(FC_Protected | FC_Static);
   case 'L':
-    return FuncClass(Protected | Static | Far);
+    return FuncClass(FC_Protected | FC_Static | FC_Far);
   case 'M':
-    return FuncClass(Protected | Virtual);
+    return FuncClass(FC_Protected | FC_Virtual);
   case 'N':
-    return FuncClass(Protected | Virtual | Far);
+    return FuncClass(FC_Protected | FC_Virtual | FC_Far);
+  case 'O':
+    return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
+  case 'P':
+    return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
   case 'Q':
-    return Public;
+    return FuncClass(FC_Public);
   case 'R':
-    return FuncClass(Public | Far);
+    return FuncClass(FC_Public | FC_Far);
   case 'S':
-    return FuncClass(Public | Static);
+    return FuncClass(FC_Public | FC_Static);
   case 'T':
-    return FuncClass(Public | Static | Far);
+    return FuncClass(FC_Public | FC_Static | FC_Far);
   case 'U':
-    return FuncClass(Public | Virtual);
+    return FuncClass(FC_Public | FC_Virtual);
   case 'V':
-    return FuncClass(Public | Virtual | Far);
+    return FuncClass(FC_Public | FC_Virtual | FC_Far);
+  case 'W':
+    return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
+  case 'X':
+    return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
   case 'Y':
-    return Global;
+    return FuncClass(FC_Global);
   case 'Z':
-    return FuncClass(Global | Far);
+    return FuncClass(FC_Global | FC_Far);
+  case '$': {
+    FuncClass VFlag = FC_VirtualThisAdjust;
+    if (MangledName.consumeFront('R'))
+      VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
+
+    switch (MangledName.popFront()) {
+    case '0':
+      return FuncClass(FC_Private | FC_Virtual | VFlag);
+    case '1':
+      return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
+    case '2':
+      return FuncClass(FC_Protected | FC_Virtual | VFlag);
+    case '3':
+      return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
+    case '4':
+      return FuncClass(FC_Public | FC_Virtual | VFlag);
+    case '5':
+      return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
+    }
+  }
   }
 
   Error = true;
-  RestoreOnError.shouldRestore(true);
-  return Public;
+  return FC_Public;
 }
 
 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
@@ -1526,112 +1636,68 @@ Demangler::demangleQualifiers(StringView &MangledName) {
   return std::make_pair(Q_None, false);
 }
 
-static bool isTagType(StringView S) {
-  switch (S.front()) {
-  case 'T': // union
-  case 'U': // struct
-  case 'V': // class
-  case 'W': // enum
-    return true;
-  }
-  return false;
-}
-
-static bool isPointerType(StringView S) {
-  if (S.startsWith("$$Q")) // foo &&
-    return true;
-
-  switch (S.front()) {
-  case 'A': // foo &
-  case 'P': // foo *
-  case 'Q': // foo *const
-  case 'R': // foo *volatile
-  case 'S': // foo *const volatile
-    return true;
-  }
-  return false;
-}
-
-static bool isArrayType(StringView S) { return S[0] == 'Y'; }
-
-static bool isFunctionType(StringView S) {
-  return S.startsWith("$$A8@@") || S.startsWith("$$A6");
-}
-
 // <variable-type> ::= <type> <cvr-qualifiers>
 //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
-Type *Demangler::demangleType(StringView &MangledName,
-                              QualifierMangleMode QMM) {
+TypeNode *Demangler::demangleType(StringView &MangledName,
+                                  QualifierMangleMode QMM) {
   Qualifiers Quals = Q_None;
   bool IsMember = false;
-  bool IsMemberKnown = false;
   if (QMM == QualifierMangleMode::Mangle) {
     std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
-    IsMemberKnown = true;
   } else if (QMM == QualifierMangleMode::Result) {
-    if (MangledName.consumeFront('?')) {
+    if (MangledName.consumeFront('?'))
       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
-      IsMemberKnown = true;
-    }
   }
 
-  Type *Ty = nullptr;
+  TypeNode *Ty = nullptr;
   if (isTagType(MangledName))
     Ty = demangleClassType(MangledName);
   else if (isPointerType(MangledName)) {
-    if (!IsMemberKnown)
-      IsMember = isMemberPointer(MangledName);
-
-    if (IsMember)
+    if (isMemberPointer(MangledName, Error))
       Ty = demangleMemberPointerType(MangledName);
-    else
+    else if (!Error)
       Ty = demanglePointerType(MangledName);
+    else
+      return nullptr;
   } else if (isArrayType(MangledName))
     Ty = demangleArrayType(MangledName);
   else if (isFunctionType(MangledName)) {
     if (MangledName.consumeFront("$$A8@@"))
-      Ty = demangleFunctionType(MangledName, true, false);
+      Ty = demangleFunctionType(MangledName, true);
     else {
       assert(MangledName.startsWith("$$A6"));
       MangledName.consumeFront("$$A6");
-      Ty = demangleFunctionType(MangledName, false, false);
+      Ty = demangleFunctionType(MangledName, false);
     }
+  } else if (isCustomType(MangledName)) {
+    Ty = demangleCustomType(MangledName);
   } else {
-    Ty = demangleBasicType(MangledName);
-    assert(Ty && !Error);
-    if (!Ty || Error)
-      return Ty;
+    Ty = demanglePrimitiveType(MangledName);
   }
 
+  if (!Ty || Error)
+    return Ty;
   Ty->Quals = Qualifiers(Ty->Quals | Quals);
   return Ty;
 }
 
-ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) {
-  if (MangledName.consumeFront('G'))
-    return ReferenceKind::LValueRef;
-  else if (MangledName.consumeFront('H'))
-    return ReferenceKind::RValueRef;
-  return ReferenceKind::None;
-}
-
-void Demangler::demangleThrowSpecification(StringView &MangledName) {
+bool Demangler::demangleThrowSpecification(StringView &MangledName) {
+  if (MangledName.consumeFront("_E"))
+    return true;
   if (MangledName.consumeFront('Z'))
-    return;
+    return false;
 
   Error = true;
+  return false;
 }
 
-FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
-                                              bool HasThisQuals,
-                                              bool IsFunctionPointer) {
-  FunctionType *FTy = Arena.alloc<FunctionType>();
-  FTy->Prim = PrimTy::Function;
-  FTy->IsFunctionPointer = IsFunctionPointer;
+FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
+                                                       bool HasThisQuals) {
+  FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
 
   if (HasThisQuals) {
     FTy->Quals = demanglePointerExtQualifiers(MangledName);
-    FTy->RefKind = demangleReferenceKind(MangledName);
+    FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
     FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
   }
 
@@ -1646,70 +1712,100 @@ FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
 
   FTy->Params = demangleFunctionParameterList(MangledName);
 
-  demangleThrowSpecification(MangledName);
+  FTy->IsNoexcept = demangleThrowSpecification(MangledName);
 
   return FTy;
 }
 
-Type *Demangler::demangleFunctionEncoding(StringView &MangledName) {
+FunctionSymbolNode *
+Demangler::demangleFunctionEncoding(StringView &MangledName) {
+  FuncClass ExtraFlags = FC_None;
+  if (MangledName.consumeFront("$$J0"))
+    ExtraFlags = FC_ExternC;
+
   FuncClass FC = demangleFunctionClass(MangledName);
+  FC = FuncClass(ExtraFlags | FC);
+
+  FunctionSignatureNode *FSN = nullptr;
+  ThunkSignatureNode *TTN = nullptr;
+  if (FC & FC_StaticThisAdjust) {
+    TTN = Arena.alloc<ThunkSignatureNode>();
+    TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
+  } else if (FC & FC_VirtualThisAdjust) {
+    TTN = Arena.alloc<ThunkSignatureNode>();
+    if (FC & FC_VirtualThisAdjustEx) {
+      TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
+      TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
+    }
+    TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
+    TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
+  }
 
-  bool HasThisQuals = !(FC & (Global | Static));
-  FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false);
-  FTy->FunctionClass = FC;
+  if (FC & FC_NoParameterList) {
+    // This is an extern "C" function whose full signature hasn't been mangled.
+    // This happens when we need to mangle a local symbol inside of an extern
+    // "C" function.
+    FSN = Arena.alloc<FunctionSignatureNode>();
+  } else {
+    bool HasThisQuals = !(FC & (FC_Global | FC_Static));
+    FSN = demangleFunctionType(MangledName, HasThisQuals);
+  }
+  if (TTN) {
+    *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
+    FSN = TTN;
+  }
+  FSN->FunctionClass = FC;
 
-  return FTy;
+  FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
+  Symbol->Signature = FSN;
+  return Symbol;
 }
 
-// Reads a primitive type.
-Type *Demangler::demangleBasicType(StringView &MangledName) {
-  Type *Ty = Arena.alloc<Type>();
+CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
+  assert(MangledName.startsWith('?'));
+  MangledName.popFront();
 
-  if (MangledName.consumeFront("$$T")) {
-    Ty->Prim = PrimTy::Nullptr;
-    return Ty;
-  }
+  CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
+  CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true);
+  if (!MangledName.consumeFront('@'))
+    Error = true;
+  if (Error)
+    return nullptr;
+  return CTN;
+}
+
+// Reads a primitive type.
+PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
+  if (MangledName.consumeFront("$$T"))
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
 
   switch (MangledName.popFront()) {
   case 'X':
-    Ty->Prim = PrimTy::Void;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
   case 'D':
-    Ty->Prim = PrimTy::Char;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
   case 'C':
-    Ty->Prim = PrimTy::Schar;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
   case 'E':
-    Ty->Prim = PrimTy::Uchar;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
   case 'F':
-    Ty->Prim = PrimTy::Short;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
   case 'G':
-    Ty->Prim = PrimTy::Ushort;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
   case 'H':
-    Ty->Prim = PrimTy::Int;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
   case 'I':
-    Ty->Prim = PrimTy::Uint;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
   case 'J':
-    Ty->Prim = PrimTy::Long;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
   case 'K':
-    Ty->Prim = PrimTy::Ulong;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
   case 'M':
-    Ty->Prim = PrimTy::Float;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
   case 'N':
-    Ty->Prim = PrimTy::Double;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
   case 'O':
-    Ty->Prim = PrimTy::Ldouble;
-    break;
+    return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
   case '_': {
     if (MangledName.empty()) {
       Error = true;
@@ -1717,98 +1813,63 @@ Type *Demangler::demangleBasicType(StringView &MangledName) {
     }
     switch (MangledName.popFront()) {
     case 'N':
-      Ty->Prim = PrimTy::Bool;
-      break;
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
     case 'J':
-      Ty->Prim = PrimTy::Int64;
-      break;
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
     case 'K':
-      Ty->Prim = PrimTy::Uint64;
-      break;
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
     case 'W':
-      Ty->Prim = PrimTy::Wchar;
-      break;
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
     case 'S':
-      Ty->Prim = PrimTy::Char16;
-      break;
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
     case 'U':
-      Ty->Prim = PrimTy::Char32;
-      break;
-    default:
-      Error = true;
-      return nullptr;
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
     }
     break;
   }
-  default:
-    Error = true;
-    return nullptr;
   }
-  return Ty;
+  Error = true;
+  return nullptr;
 }
 
-UdtType *Demangler::demangleClassType(StringView &MangledName) {
-  UdtType *UTy = Arena.alloc<UdtType>();
+TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
+  TagTypeNode *TT = nullptr;
 
   switch (MangledName.popFront()) {
   case 'T':
-    UTy->Prim = PrimTy::Union;
+    TT = Arena.alloc<TagTypeNode>(TagKind::Union);
     break;
   case 'U':
-    UTy->Prim = PrimTy::Struct;
+    TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
     break;
   case 'V':
-    UTy->Prim = PrimTy::Class;
+    TT = Arena.alloc<TagTypeNode>(TagKind::Class);
     break;
   case 'W':
     if (MangledName.popFront() != '4') {
       Error = true;
       return nullptr;
     }
-    UTy->Prim = PrimTy::Enum;
+    TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
     break;
   default:
     assert(false);
   }
 
-  UTy->UdtName = demangleFullyQualifiedTypeName(MangledName);
-  return UTy;
-}
-
-static std::pair<Qualifiers, PointerAffinity>
-demanglePointerCVQualifiers(StringView &MangledName) {
-  if (MangledName.consumeFront("$$Q"))
-    return std::make_pair(Q_None, PointerAffinity::RValueReference);
-
-  switch (MangledName.popFront()) {
-  case 'A':
-    return std::make_pair(Q_None, PointerAffinity::Reference);
-  case 'P':
-    return std::make_pair(Q_None, PointerAffinity::Pointer);
-  case 'Q':
-    return std::make_pair(Q_Const, PointerAffinity::Pointer);
-  case 'R':
-    return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
-  case 'S':
-    return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
-                          PointerAffinity::Pointer);
-  default:
-    assert(false && "Ty is not a pointer type!");
-  }
-  return std::make_pair(Q_None, PointerAffinity::Pointer);
+  TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
+  return TT;
 }
 
 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
 //                       # the E is required for 64-bit non-static pointers
-PointerType *Demangler::demanglePointerType(StringView &MangledName) {
-  PointerType *Pointer = Arena.alloc<PointerType>();
+PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
+  PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
 
   std::tie(Pointer->Quals, Pointer->Affinity) =
       demanglePointerCVQualifiers(MangledName);
 
-  Pointer->Prim = PrimTy::Ptr;
   if (MangledName.consumeFront("6")) {
-    Pointer->Pointee = demangleFunctionType(MangledName, false, true);
+    Pointer->Pointee = demangleFunctionType(MangledName, false);
     return Pointer;
   }
 
@@ -1819,27 +1880,25 @@ PointerType *Demangler::demanglePointerType(StringView &MangledName) {
   return Pointer;
 }
 
-MemberPointerType *
-Demangler::demangleMemberPointerType(StringView &MangledName) {
-  MemberPointerType *Pointer = Arena.alloc<MemberPointerType>();
-  Pointer->Prim = PrimTy::MemberPtr;
+PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
+  PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
 
-  PointerAffinity Affinity;
-  std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
-  assert(Affinity == PointerAffinity::Pointer);
+  std::tie(Pointer->Quals, Pointer->Affinity) =
+      demanglePointerCVQualifiers(MangledName);
+  assert(Pointer->Affinity == PointerAffinity::Pointer);
 
   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
 
   if (MangledName.consumeFront("8")) {
-    Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
-    Pointer->Pointee = demangleFunctionType(MangledName, true, true);
+    Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
+    Pointer->Pointee = demangleFunctionType(MangledName, true);
   } else {
     Qualifiers PointeeQuals = Q_None;
     bool IsMember = false;
     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
     assert(IsMember);
-    Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
+    Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
 
     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
     Pointer->Pointee->Quals = PointeeQuals;
@@ -1860,77 +1919,94 @@ Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
   return Quals;
 }
 
-ArrayType *Demangler::demangleArrayType(StringView &MangledName) {
+ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
   assert(MangledName.front() == 'Y');
   MangledName.popFront();
 
-  int Dimension = demangleNumber(MangledName);
-  if (Dimension <= 0) {
+  uint64_t Rank = 0;
+  bool IsNegative = false;
+  std::tie(Rank, IsNegative) = demangleNumber(MangledName);
+  if (IsNegative || Rank == 0) {
     Error = true;
     return nullptr;
   }
 
-  ArrayType *ATy = Arena.alloc<ArrayType>();
-  ArrayType *Dim = ATy;
-  for (int I = 0; I < Dimension; ++I) {
-    Dim->Prim = PrimTy::Array;
-    Dim->ArrayDimension = demangleNumber(MangledName);
-    Dim->NextDimension = Arena.alloc<ArrayType>();
-    Dim = Dim->NextDimension;
+  ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
+  NodeList *Head = Arena.alloc<NodeList>();
+  NodeList *Tail = Head;
+
+  for (uint64_t I = 0; I < Rank; ++I) {
+    uint64_t D = 0;
+    std::tie(D, IsNegative) = demangleNumber(MangledName);
+    if (IsNegative) {
+      Error = true;
+      return nullptr;
+    }
+    Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
+    if (I + 1 < Rank) {
+      Tail->Next = Arena.alloc<NodeList>();
+      Tail = Tail->Next;
+    }
   }
+  ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
 
   if (MangledName.consumeFront("$$C")) {
-    if (MangledName.consumeFront("B"))
-      ATy->Quals = Q_Const;
-    else if (MangledName.consumeFront("C") || MangledName.consumeFront("D"))
-      ATy->Quals = Qualifiers(Q_Const | Q_Volatile);
-    else if (!MangledName.consumeFront("A"))
+    bool IsMember = false;
+    std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
+    if (IsMember) {
       Error = true;
+      return nullptr;
+    }
   }
 
   ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
-  Dim->ElementType = ATy->ElementType;
   return ATy;
 }
 
 // Reads a function or a template parameters.
-FunctionParams
+NodeArrayNode *
 Demangler::demangleFunctionParameterList(StringView &MangledName) {
   // Empty parameter list.
   if (MangledName.consumeFront('X'))
     return {};
 
-  FunctionParams *Head;
-  FunctionParams **Current = &Head;
+  NodeList *Head = Arena.alloc<NodeList>();
+  NodeList **Current = &Head;
+  size_t Count = 0;
   while (!Error && !MangledName.startsWith('@') &&
          !MangledName.startsWith('Z')) {
+    ++Count;
 
     if (startsWithDigit(MangledName)) {
       size_t N = MangledName[0] - '0';
-      if (N >= FunctionParamBackRefCount) {
+      if (N >= Backrefs.FunctionParamCount) {
         Error = true;
         return {};
       }
       MangledName = MangledName.dropFront();
 
-      *Current = Arena.alloc<FunctionParams>();
-      (*Current)->Current = FunctionParamBackRefs[N]->clone(Arena);
+      *Current = Arena.alloc<NodeList>();
+      (*Current)->N = Backrefs.FunctionParams[N];
       Current = &(*Current)->Next;
       continue;
     }
 
     size_t OldSize = MangledName.size();
 
-    *Current = Arena.alloc<FunctionParams>();
-    (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop);
+    *Current = Arena.alloc<NodeList>();
+    TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
+    if (!TN || Error)
+      return nullptr;
+
+    (*Current)->N = TN;
 
     size_t CharsConsumed = OldSize - MangledName.size();
     assert(CharsConsumed != 0);
 
     // Single-letter types are ignored for backreferences because memorizing
     // them doesn't save anything.
-    if (FunctionParamBackRefCount <= 9 && CharsConsumed > 1)
-      FunctionParamBackRefs[FunctionParamBackRefCount++] = (*Current)->Current;
+    if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
+      Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
 
     Current = &(*Current)->Next;
   }
@@ -1938,99 +2014,206 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
   if (Error)
     return {};
 
+  NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
   // list or '@' (non variadic).  Careful not to consume "@Z", as in that case
   // the following Z could be a throw specifier.
   if (MangledName.consumeFront('@'))
-    return *Head;
+    return NA;
 
   if (MangledName.consumeFront('Z')) {
-    Head->IsVariadic = true;
-    return *Head;
+    // This is a variadic parameter list.  We probably need a variadic node to
+    // append to the end.
+    return NA;
   }
 
   Error = true;
   return {};
 }
 
-TemplateParams *
+NodeArrayNode *
 Demangler::demangleTemplateParameterList(StringView &MangledName) {
-  TemplateParams *Head;
-  TemplateParams **Current = &Head;
-  while (!Error && !MangledName.startsWith('@')) {
-    // Template parameter lists don't participate in back-referencing.
-    *Current = Arena.alloc<TemplateParams>();
+  NodeList *Head;
+  NodeList **Current = &Head;
+  size_t Count = 0;
 
-    // Empty parameter pack.
+  while (!Error && !MangledName.startsWith('@')) {
     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
-        MangledName.consumeFront("$$$V")) {
-      if (!MangledName.startsWith('@'))
-        Error = true;
+        MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
+      // parameter pack separator
       continue;
     }
 
+    ++Count;
+
+    // Template parameter lists don't participate in back-referencing.
+    *Current = Arena.alloc<NodeList>();
+
+    NodeList &TP = **Current;
+
+    TemplateParameterReferenceNode *TPRN = nullptr;
     if (MangledName.consumeFront("$$Y")) {
-      (*Current)->IsTemplateTemplate = true;
-      (*Current)->IsAliasTemplate = true;
-      (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName);
-    } else if (MangledName.consumeFront("$1?")) {
-      (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName);
-      (*Current)->ParamType = demangleFunctionEncoding(MangledName);
+      // Template alias
+      TP.N = demangleFullyQualifiedTypeName(MangledName);
+    } else if (MangledName.consumeFront("$$B")) {
+      // Array
+      TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
+    } else if (MangledName.consumeFront("$$C")) {
+      // Type has qualifiers.
+      TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
+    } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
+               MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
+      // Pointer to member
+      TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
+      TPRN->IsMemberPointer = true;
+
+      MangledName = MangledName.dropFront();
+      // 1 - single inheritance       <name>
+      // H - multiple inheritance     <name> <number>
+      // I - virtual inheritance      <name> <number> <number> <number>
+      // J - unspecified inheritance  <name> <number> <number> <number>
+      char InheritanceSpecifier = MangledName.popFront();
+      SymbolNode *S = nullptr;
+      if (MangledName.startsWith('?')) {
+        S = parse(MangledName);
+        memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
+      }
+
+      switch (InheritanceSpecifier) {
+      case 'J':
+        TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+            demangleSigned(MangledName);
+        LLVM_FALLTHROUGH;
+      case 'I':
+        TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+            demangleSigned(MangledName);
+        LLVM_FALLTHROUGH;
+      case 'H':
+        TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+            demangleSigned(MangledName);
+        LLVM_FALLTHROUGH;
+      case '1':
+        break;
+      default:
+        Error = true;
+        break;
+      }
+      TPRN->Affinity = PointerAffinity::Pointer;
+      TPRN->Symbol = S;
+    } else if (MangledName.startsWith("$E?")) {
+      MangledName.consumeFront("$E");
+      // Reference to symbol
+      TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
+      TPRN->Symbol = parse(MangledName);
+      TPRN->Affinity = PointerAffinity::Reference;
+    } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
+      TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
+
+      // Data member pointer.
+      MangledName = MangledName.dropFront();
+      char InheritanceSpecifier = MangledName.popFront();
+
+      switch (InheritanceSpecifier) {
+      case 'G':
+        TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+            demangleSigned(MangledName);
+        LLVM_FALLTHROUGH;
+      case 'F':
+        TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+            demangleSigned(MangledName);
+        TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
+            demangleSigned(MangledName);
+        LLVM_FALLTHROUGH;
+      case '0':
+        break;
+      default:
+        Error = true;
+        break;
+      }
+      TPRN->IsMemberPointer = true;
+
+    } else if (MangledName.consumeFront("$0")) {
+      // Integral non-type template parameter
+      bool IsNegative = false;
+      uint64_t Value = 0;
+      std::tie(Value, IsNegative) = demangleNumber(MangledName);
+
+      TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
     } else {
-      (*Current)->ParamType =
-          demangleType(MangledName, QualifierMangleMode::Drop);
+      TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
     }
+    if (Error)
+      return nullptr;
 
-    Current = &(*Current)->Next;
+    Current = &TP.Next;
   }
 
   if (Error)
-    return {};
+    return nullptr;
 
   // Template parameter lists cannot be variadic, so it can only be terminated
   // by @.
   if (MangledName.consumeFront('@'))
-    return Head;
+    return nodeListToNodeArray(Arena, Head, Count);
   Error = true;
-  return {};
+  return nullptr;
 }
 
-void Demangler::output(const Symbol *S, OutputStream &OS) {
-  // Converts an AST to a string.
-  //
-  // Converting an AST representing a C++ type to a string is tricky due
-  // to the bad grammar of the C++ declaration inherited from C. You have
-  // to construct a string from inside to outside. For example, if a type
-  // X is a pointer to a function returning int, the order you create a
-  // string becomes something like this:
-  //
-  //   (1) X is a pointer: *X
-  //   (2) (1) is a function returning int: int (*X)()
-  //
-  // So you cannot construct a result just by appending strings to a result.
-  //
-  // To deal with this, we split the function into two. outputPre() writes
-  // the "first half" of type declaration, and outputPost() writes the
-  // "second half". For example, outputPre() writes a return type for a
-  // function and outputPost() writes an parameter list.
-  Type::outputPre(OS, *S->SymbolType);
-  outputName(OS, S->SymbolName);
-  Type::outputPost(OS, *S->SymbolType);
+void Demangler::dumpBackReferences() {
+  std::printf("%d function parameter backreferences\n",
+              (int)Backrefs.FunctionParamCount);
+
+  // Create an output stream so we can render each type.
+  OutputStream OS;
+  if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+    std::terminate();
+  for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
+    OS.setCurrentPosition(0);
+
+    TypeNode *T = Backrefs.FunctionParams[I];
+    T->output(OS, OF_Default);
+
+    std::printf("  [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(),
+                OS.getBuffer());
+  }
+  std::free(OS.getBuffer());
+
+  if (Backrefs.FunctionParamCount > 0)
+    std::printf("\n");
+  std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
+  for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
+    std::printf("  [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
+                Backrefs.Names[I]->Name.begin());
+  }
+  if (Backrefs.NamesCount > 0)
+    std::printf("\n");
 }
 
 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
-                              int *Status) {
+                              int *Status, MSDemangleFlags Flags) {
+  int InternalStatus = demangle_success;
   Demangler D;
+  OutputStream S;
+
   StringView Name{MangledName};
-  Symbol *S = D.parse(Name);
+  SymbolNode *AST = D.parse(Name);
+
+  if (Flags & MSDF_DumpBackrefs)
+    D.dumpBackReferences();
 
   if (D.Error)
-    *Status = llvm::demangle_invalid_mangled_name;
-  else
-    *Status = llvm::demangle_success;
+    InternalStatus = demangle_invalid_mangled_name;
+  else if (!initializeOutputStream(Buf, N, S, 1024))
+    InternalStatus = demangle_memory_alloc_failure;
+  else {
+    AST->output(S, OF_Default);
+    S += '\0';
+    if (N != nullptr)
+      *N = S.getCurrentPosition();
+    Buf = S.getBuffer();
+  }
 
-  OutputStream OS = OutputStream::create(Buf, N, 1024);
-  D.output(S, OS);
-  OS << '\0';
-  return OS.getBuffer();
+  if (Status)
+    *Status = InternalStatus;
+  return InternalStatus == demangle_success ? Buf : nullptr;
 }
diff --git a/contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
new file mode 100644
index 000000000000..622f8e75e351
--- /dev/null
+++ b/contrib/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -0,0 +1,635 @@
+//===- MicrosoftDemangle.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a demangler for MSVC-style mangled symbols.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
+#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/Utility.h"
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+using namespace ms_demangle;
+
+#define OUTPUT_ENUM_CLASS_VALUE(Enum, Value, Desc)                             \
+  case Enum::Value:                                                            \
+    OS << Desc;                                                                \
+    break;
+
+// Writes a space if the last token does not end with a punctuation.
+static void outputSpaceIfNecessary(OutputStream &OS) {
+  if (OS.empty())
+    return;
+
+  char C = OS.back();
+  if (std::isalnum(C) || C == '>')
+    OS << " ";
+}
+
+static bool outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+  switch (Q) {
+  case Q_Const:
+    OS << "const";
+    return true;
+  case Q_Volatile:
+    OS << "volatile";
+    return true;
+  case Q_Restrict:
+    OS << "__restrict";
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
+static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
+                                     Qualifiers Mask, bool NeedSpace) {
+  if (!(Q & Mask))
+    return NeedSpace;
+
+  if (NeedSpace)
+    OS << " ";
+
+  outputSingleQualifier(OS, Mask);
+  return true;
+}
+
+static void outputQualifiers(OutputStream &OS, Qualifiers Q, bool SpaceBefore,
+                             bool SpaceAfter) {
+  if (Q == Q_None)
+    return;
+
+  size_t Pos1 = OS.getCurrentPosition();
+  SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Const, SpaceBefore);
+  SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Volatile, SpaceBefore);
+  SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Restrict, SpaceBefore);
+  size_t Pos2 = OS.getCurrentPosition();
+  if (SpaceAfter && Pos2 > Pos1)
+    OS << " ";
+}
+
+static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
+  outputSpaceIfNecessary(OS);
+
+  switch (CC) {
+  case CallingConv::Cdecl:
+    OS << "__cdecl";
+    break;
+  case CallingConv::Fastcall:
+    OS << "__fastcall";
+    break;
+  case CallingConv::Pascal:
+    OS << "__pascal";
+    break;
+  case CallingConv::Regcall:
+    OS << "__regcall";
+    break;
+  case CallingConv::Stdcall:
+    OS << "__stdcall";
+    break;
+  case CallingConv::Thiscall:
+    OS << "__thiscall";
+    break;
+  case CallingConv::Eabi:
+    OS << "__eabi";
+    break;
+  case CallingConv::Vectorcall:
+    OS << "__vectorcall";
+    break;
+  case CallingConv::Clrcall:
+    OS << "__clrcall";
+    break;
+  default:
+    break;
+  }
+}
+
+std::string Node::toString(OutputFlags Flags) const {
+  OutputStream OS;
+  initializeOutputStream(nullptr, nullptr, OS, 1024);
+  this->output(OS, Flags);
+  OS << '\0';
+  return {OS.getBuffer()};
+}
+
+void TypeNode::outputQuals(bool SpaceBefore, bool SpaceAfter) const {}
+
+void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+  switch (PrimKind) {
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Bool, "bool");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char, "char");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Schar, "signed char");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uchar, "unsigned char");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char16, "char16_t");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char32, "char32_t");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Short, "short");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ushort, "unsigned short");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Int, "int");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uint, "unsigned int");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Long, "long");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ulong, "unsigned long");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Int64, "__int64");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uint64, "unsigned __int64");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Wchar, "wchar_t");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Float, "float");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Double, "double");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ldouble, "long double");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Nullptr, "std::nullptr_t");
+  }
+  outputQualifiers(OS, Quals, true, false);
+}
+
+void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags) const {
+  output(OS, Flags, ", ");
+}
+
+void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags,
+                           StringView Separator) const {
+  if (Count == 0)
+    return;
+  if (Nodes[0])
+    Nodes[0]->output(OS, Flags);
+  for (size_t I = 1; I < Count; ++I) {
+    OS << Separator;
+    Nodes[I]->output(OS, Flags);
+  }
+}
+
+void EncodedStringLiteralNode::output(OutputStream &OS,
+                                      OutputFlags Flags) const {
+  switch (Char) {
+  case CharKind::Wchar:
+    OS << "L\"";
+    break;
+  case CharKind::Char:
+    OS << "\"";
+    break;
+  case CharKind::Char16:
+    OS << "u\"";
+    break;
+  case CharKind::Char32:
+    OS << "U\"";
+    break;
+  }
+  OS << DecodedString << "\"";
+  if (IsTruncated)
+    OS << "...";
+}
+
+void IntegerLiteralNode::output(OutputStream &OS, OutputFlags Flags) const {
+  if (IsNegative)
+    OS << '-';
+  OS << Value;
+}
+
+void TemplateParameterReferenceNode::output(OutputStream &OS,
+                                            OutputFlags Flags) const {
+  if (ThunkOffsetCount > 0)
+    OS << "{";
+  else if (Affinity == PointerAffinity::Pointer)
+    OS << "&";
+
+  if (Symbol) {
+    Symbol->output(OS, Flags);
+    if (ThunkOffsetCount > 0)
+      OS << ", ";
+  }
+
+  if (ThunkOffsetCount > 0)
+    OS << ThunkOffsets[0];
+  for (int I = 1; I < ThunkOffsetCount; ++I) {
+    OS << ", " << ThunkOffsets[I];
+  }
+  if (ThunkOffsetCount > 0)
+    OS << "}";
+}
+
+void IdentifierNode::outputTemplateParameters(OutputStream &OS,
+                                              OutputFlags Flags) const {
+  if (!TemplateParams)
+    return;
+  OS << "<";
+  TemplateParams->output(OS, Flags);
+  OS << ">";
+}
+
+void DynamicStructorIdentifierNode::output(OutputStream &OS,
+                                           OutputFlags Flags) const {
+  if (IsDestructor)
+    OS << "`dynamic atexit destructor for ";
+  else
+    OS << "`dynamic initializer for ";
+
+  if (Variable) {
+    OS << "`";
+    Variable->output(OS, Flags);
+    OS << "''";
+  } else {
+    OS << "'";
+    Name->output(OS, Flags);
+    OS << "''";
+  }
+}
+
+void NamedIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
+  OS << Name;
+  outputTemplateParameters(OS, Flags);
+}
+
+void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
+                                             OutputFlags Flags) const {
+  switch (Operator) {
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, New, "operator new");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Delete, "operator delete");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Assign, "operator=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, RightShift, "operator>>");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LeftShift, "operator<<");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LogicalNot, "operator!");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Equals, "operator==");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, NotEquals, "operator!=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ArraySubscript,
+                            "operator[]");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Pointer, "operator->");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Increment, "operator++");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Decrement, "operator--");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Minus, "operator-");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Plus, "operator+");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Dereference, "operator*");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseAnd, "operator&");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, MemberPointer,
+                            "operator->*");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Divide, "operator/");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Modulus, "operator%");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LessThan, "operator<");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LessThanEqual, "operator<=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, GreaterThan, "operator>");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, GreaterThanEqual,
+                            "operator>=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Comma, "operator,");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Parens, "operator()");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseNot, "operator~");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseXor, "operator^");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseOr, "operator|");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LogicalAnd, "operator&&");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LogicalOr, "operator||");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, TimesEqual, "operator*=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, PlusEqual, "operator+=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, MinusEqual, "operator-=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, DivEqual, "operator/=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ModEqual, "operator%=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, RshEqual, "operator>>=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LshEqual, "operator<<=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseAndEqual,
+                            "operator&=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseOrEqual,
+                            "operator|=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, BitwiseXorEqual,
+                            "operator^=");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VbaseDtor, "`vbase dtor'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecDelDtor,
+                            "`vector deleting dtor'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, DefaultCtorClosure,
+                            "`default ctor closure'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ScalarDelDtor,
+                            "`scalar deleting dtor'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecCtorIter,
+                            "`vector ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecDtorIter,
+                            "`vector dtor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VecVbaseCtorIter,
+                            "`vector vbase ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VdispMap,
+                            "`virtual displacement map'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVecCtorIter,
+                            "`eh vector ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVecDtorIter,
+                            "`eh vector dtor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVecVbaseCtorIter,
+                            "`eh vector vbase ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CopyCtorClosure,
+                            "`copy ctor closure'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, LocalVftableCtorClosure,
+                            "`local vftable ctor closure'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ArrayNew, "operator new[]");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ArrayDelete,
+                            "operator delete[]");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorCtorIter,
+                            "`managed vector ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorDtorIter,
+                            "`managed vector dtor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVectorCopyCtorIter,
+                            "`EH vector copy ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, EHVectorVbaseCopyCtorIter,
+                            "`EH vector vbase copy ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VectorCopyCtorIter,
+                            "`vector copy ctor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, VectorVbaseCopyCtorIter,
+                            "`vector vbase copy constructor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorVbaseCopyCtorIter,
+                            "`managed vector vbase copy constructor iterator'");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CoAwait, "co_await");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Spaceship, "operator <=>");
+  case IntrinsicFunctionKind::MaxIntrinsic:
+  case IntrinsicFunctionKind::None:
+    break;
+  }
+  outputTemplateParameters(OS, Flags);
+}
+
+void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
+                                            OutputFlags Flags) const {
+  OS << "`local static guard'";
+  if (ScopeIndex > 0)
+    OS << "{" << ScopeIndex << "}";
+}
+
+void ConversionOperatorIdentifierNode::output(OutputStream &OS,
+                                              OutputFlags Flags) const {
+  OS << "operator";
+  outputTemplateParameters(OS, Flags);
+  OS << " ";
+  TargetType->output(OS, Flags);
+}
+
+void StructorIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
+  if (IsDestructor)
+    OS << "~";
+  Class->output(OS, Flags);
+  outputTemplateParameters(OS, Flags);
+}
+
+void LiteralOperatorIdentifierNode::output(OutputStream &OS,
+                                           OutputFlags Flags) const {
+  OS << "operator \"\"" << Name;
+  outputTemplateParameters(OS, Flags);
+}
+
+void FunctionSignatureNode::outputPre(OutputStream &OS,
+                                      OutputFlags Flags) const {
+  if (FunctionClass & FC_Public)
+    OS << "public: ";
+  if (FunctionClass & FC_Protected)
+    OS << "protected: ";
+  if (FunctionClass & FC_Private)
+    OS << "private: ";
+
+  if (!(FunctionClass & FC_Global)) {
+    if (FunctionClass & FC_Static)
+      OS << "static ";
+  }
+  if (FunctionClass & FC_Virtual)
+    OS << "virtual ";
+
+  if (FunctionClass & FC_ExternC)
+    OS << "extern \"C\" ";
+
+  if (ReturnType) {
+    ReturnType->outputPre(OS, Flags);
+    OS << " ";
+  }
+
+  if (!(Flags & OF_NoCallingConvention))
+    outputCallingConvention(OS, CallConvention);
+}
+
+void FunctionSignatureNode::outputPost(OutputStream &OS,
+                                       OutputFlags Flags) const {
+  if (!(FunctionClass & FC_NoParameterList)) {
+    OS << "(";
+    if (Params)
+      Params->output(OS, Flags);
+    else
+      OS << "void";
+    OS << ")";
+  }
+
+  if (Quals & Q_Const)
+    OS << " const";
+  if (Quals & Q_Volatile)
+    OS << " volatile";
+  if (Quals & Q_Restrict)
+    OS << " __restrict";
+  if (Quals & Q_Unaligned)
+    OS << " __unaligned";
+
+  if (IsNoexcept)
+    OS << " noexcept";
+
+  if (RefQualifier == FunctionRefQualifier::Reference)
+    OS << " &";
+  else if (RefQualifier == FunctionRefQualifier::RValueReference)
+    OS << " &&";
+
+  if (ReturnType)
+    ReturnType->outputPost(OS, Flags);
+}
+
+void ThunkSignatureNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+  OS << "[thunk]: ";
+
+  FunctionSignatureNode::outputPre(OS, Flags);
+}
+
+void ThunkSignatureNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+  if (FunctionClass & FC_StaticThisAdjust) {
+    OS << "`adjustor{" << ThisAdjust.StaticOffset << "}'";
+  } else if (FunctionClass & FC_VirtualThisAdjust) {
+    if (FunctionClass & FC_VirtualThisAdjustEx) {
+      OS << "`vtordispex{" << ThisAdjust.VBPtrOffset << ", "
+         << ThisAdjust.VBOffsetOffset << ", " << ThisAdjust.VtordispOffset
+         << ", " << ThisAdjust.StaticOffset << "}'";
+    } else {
+      OS << "`vtordisp{" << ThisAdjust.VtordispOffset << ", "
+         << ThisAdjust.StaticOffset << "}'";
+    }
+  }
+
+  FunctionSignatureNode::outputPost(OS, Flags);
+}
+
+void PointerTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+  if (Pointee->kind() == NodeKind::FunctionSignature) {
+    // If this is a pointer to a function, don't output the calling convention.
+    // It needs to go inside the parentheses.
+    const FunctionSignatureNode *Sig =
+        static_cast<const FunctionSignatureNode *>(Pointee);
+    Sig->outputPre(OS, OF_NoCallingConvention);
+  } else
+    Pointee->outputPre(OS, Flags);
+
+  outputSpaceIfNecessary(OS);
+
+  if (Quals & Q_Unaligned)
+    OS << "__unaligned ";
+
+  if (Pointee->kind() == NodeKind::ArrayType) {
+    OS << "(";
+  } else if (Pointee->kind() == NodeKind::FunctionSignature) {
+    OS << "(";
+    const FunctionSignatureNode *Sig =
+        static_cast<const FunctionSignatureNode *>(Pointee);
+    outputCallingConvention(OS, Sig->CallConvention);
+    OS << " ";
+  }
+
+  if (ClassParent) {
+    ClassParent->output(OS, Flags);
+    OS << "::";
+  }
+
+  switch (Affinity) {
+  case PointerAffinity::Pointer:
+    OS << "*";
+    break;
+  case PointerAffinity::Reference:
+    OS << "&";
+    break;
+  case PointerAffinity::RValueReference:
+    OS << "&&";
+    break;
+  default:
+    assert(false);
+  }
+  outputQualifiers(OS, Quals, false, false);
+}
+
+void PointerTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+  if (Pointee->kind() == NodeKind::ArrayType ||
+      Pointee->kind() == NodeKind::FunctionSignature)
+    OS << ")";
+
+  Pointee->outputPost(OS, Flags);
+}
+
+void TagTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+  if (!(Flags & OF_NoTagSpecifier)) {
+    switch (Tag) {
+      OUTPUT_ENUM_CLASS_VALUE(TagKind, Class, "class");
+      OUTPUT_ENUM_CLASS_VALUE(TagKind, Struct, "struct");
+      OUTPUT_ENUM_CLASS_VALUE(TagKind, Union, "union");
+      OUTPUT_ENUM_CLASS_VALUE(TagKind, Enum, "enum");
+    }
+    OS << " ";
+  }
+  QualifiedName->output(OS, Flags);
+  outputQualifiers(OS, Quals, true, false);
+}
+
+void TagTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+
+void ArrayTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+  ElementType->outputPre(OS, Flags);
+  outputQualifiers(OS, Quals, true, false);
+}
+
+void ArrayTypeNode::outputOneDimension(OutputStream &OS, OutputFlags Flags,
+                                       Node *N) const {
+  assert(N->kind() == NodeKind::IntegerLiteral);
+  IntegerLiteralNode *ILN = static_cast<IntegerLiteralNode *>(N);
+  if (ILN->Value != 0)
+    ILN->output(OS, Flags);
+}
+
+void ArrayTypeNode::outputDimensionsImpl(OutputStream &OS,
+                                         OutputFlags Flags) const {
+  if (Dimensions->Count == 0)
+    return;
+
+  outputOneDimension(OS, Flags, Dimensions->Nodes[0]);
+  for (size_t I = 1; I < Dimensions->Count; ++I) {
+    OS << "][";
+    outputOneDimension(OS, Flags, Dimensions->Nodes[I]);
+  }
+}
+
+void ArrayTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+  OS << "[";
+  outputDimensionsImpl(OS, Flags);
+  OS << "]";
+
+  ElementType->outputPost(OS, Flags);
+}
+
+void SymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+  Name->output(OS, Flags);
+}
+
+void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+  Signature->outputPre(OS, Flags);
+  outputSpaceIfNecessary(OS);
+  Name->output(OS, Flags);
+  Signature->outputPost(OS, Flags);
+}
+
+void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+  switch (SC) {
+  case StorageClass::PrivateStatic:
+    OS << "private: static ";
+    break;
+  case StorageClass::PublicStatic:
+    OS << "public: static ";
+    break;
+  case StorageClass::ProtectedStatic:
+    OS << "protected: static ";
+    break;
+  default:
+    break;
+  }
+
+  if (Type) {
+    Type->outputPre(OS, Flags);
+    outputSpaceIfNecessary(OS);
+  }
+  Name->output(OS, Flags);
+  if (Type)
+    Type->outputPost(OS, Flags);
+}
+
+void CustomTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+  Identifier->output(OS, Flags);
+}
+void CustomTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+
+void QualifiedNameNode::output(OutputStream &OS, OutputFlags Flags) const {
+  Components->output(OS, Flags, "::");
+}
+
+void RttiBaseClassDescriptorNode::output(OutputStream &OS,
+                                         OutputFlags Flags) const {
+  OS << "`RTTI Base Class Descriptor at (";
+  OS << NVOffset << ", " << VBPtrOffset << ", " << VBTableOffset << ", "
+     << this->Flags;
+  OS << ")'";
+}
+
+void LocalStaticGuardVariableNode::output(OutputStream &OS,
+                                          OutputFlags Flags) const {
+  Name->output(OS, Flags);
+}
+
+void VcallThunkIdentifierNode::output(OutputStream &OS,
+                                      OutputFlags Flags) const {
+  OS << "`vcall'{" << OffsetInVTable << ", {flat}}";
+}
+
+void SpecialTableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+  outputQualifiers(OS, Quals, false, true);
+  Name->output(OS, Flags);
+  if (TargetName) {
+    OS << "{for `";
+    TargetName->output(OS, Flags);
+    OS << "'}";
+  }
+  return;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index fd4f0746f7f9..8204f5a90268 100644
--- a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -76,8 +76,8 @@ struct RegisteredObjectInfo {
 };
 
 // Buffer for an in-memory object file in executable memory
-typedef llvm::DenseMap< const char*, RegisteredObjectInfo>
-  RegisteredObjectBufferMap;
+typedef llvm::DenseMap<JITEventListener::ObjectKey, RegisteredObjectInfo>
+    RegisteredObjectBufferMap;
 
 /// Global access point for the JIT debugging interface designed for use with a
 /// singleton toolbox. Handles thread-safe registration and deregistration of
@@ -99,13 +99,13 @@ public:
   /// Creates an entry in the JIT registry for the buffer @p Object,
   /// which must contain an object file in executable memory with any
   /// debug information for the debugger.
-  void NotifyObjectEmitted(const ObjectFile &Object,
-                           const RuntimeDyld::LoadedObjectInfo &L) override;
+  void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
+                          const RuntimeDyld::LoadedObjectInfo &L) override;
 
   /// Removes the internal registration of @p Object, and
   /// frees associated resources.
   /// Returns true if @p Object was found in ObjectBufferMap.
-  void NotifyFreeingObject(const ObjectFile &Object) override;
+  void notifyFreeingObject(ObjectKey K) override;
 
 private:
   /// Deregister the debug info for the given object file from the debugger
@@ -147,11 +147,11 @@ GDBJITRegistrationListener::~GDBJITRegistrationListener() {
   ObjectBufferMap.clear();
 }
 
-void GDBJITRegistrationListener::NotifyObjectEmitted(
-                                       const ObjectFile &Object,
-                                       const RuntimeDyld::LoadedObjectInfo &L) {
+void GDBJITRegistrationListener::notifyObjectLoaded(
+    ObjectKey K, const ObjectFile &Obj,
+    const RuntimeDyld::LoadedObjectInfo &L) {
 
-  OwningBinary<ObjectFile> DebugObj = L.getObjectForDebug(Object);
+  OwningBinary<ObjectFile> DebugObj = L.getObjectForDebug(Obj);
 
   // Bail out if debug objects aren't supported.
   if (!DebugObj.getBinary())
@@ -160,11 +160,8 @@ void GDBJITRegistrationListener::NotifyObjectEmitted(
   const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart();
   size_t      Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize();
 
-  const char *Key = Object.getMemoryBufferRef().getBufferStart();
-
-  assert(Key && "Attempt to register a null object with a debugger.");
   llvm::MutexGuard locked(*JITDebugLock);
-  assert(ObjectBufferMap.find(Key) == ObjectBufferMap.end() &&
+  assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() &&
          "Second attempt to perform debug registration.");
   jit_code_entry* JITCodeEntry = new jit_code_entry();
 
@@ -175,16 +172,15 @@ void GDBJITRegistrationListener::NotifyObjectEmitted(
     JITCodeEntry->symfile_addr = Buffer;
     JITCodeEntry->symfile_size = Size;
 
-    ObjectBufferMap[Key] = RegisteredObjectInfo(Size, JITCodeEntry,
-                                                std::move(DebugObj));
+    ObjectBufferMap[K] =
+        RegisteredObjectInfo(Size, JITCodeEntry, std::move(DebugObj));
     NotifyDebugger(JITCodeEntry);
   }
 }
 
-void GDBJITRegistrationListener::NotifyFreeingObject(const ObjectFile& Object) {
-  const char *Key = Object.getMemoryBufferRef().getBufferStart();
+void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) {
   llvm::MutexGuard locked(*JITDebugLock);
-  RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Key);
+  RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K);
 
   if (I != ObjectBufferMap.end()) {
     deregisterObjectInternal(I);
diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 211f5216811f..e9051c198506 100644
--- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -47,7 +47,7 @@ class IntelJITEventListener : public JITEventListener {
   typedef DenseMap<const void *, MethodAddressVector>  ObjectMap;
 
   ObjectMap  LoadedObjectMap;
-  std::map<const char*, OwningBinary<ObjectFile>> DebugObjects;
+  std::map<ObjectKey, OwningBinary<ObjectFile>> DebugObjects;
 
 public:
   IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
@@ -57,10 +57,10 @@ public:
   ~IntelJITEventListener() {
   }
 
-  void NotifyObjectEmitted(const ObjectFile &Obj,
-                           const RuntimeDyld::LoadedObjectInfo &L) override;
+  void notifyObjectLoaded(ObjectKey Key, const ObjectFile &Obj,
+                          const RuntimeDyld::LoadedObjectInfo &L) override;
 
-  void NotifyFreeingObject(const ObjectFile &Obj) override;
+  void notifyFreeingObject(ObjectKey Key) override;
 };
 
 static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress,
@@ -96,9 +96,9 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat(
   return Result;
 }
 
-void IntelJITEventListener::NotifyObjectEmitted(
-                                       const ObjectFile &Obj,
-                                       const RuntimeDyld::LoadedObjectInfo &L) {
+void IntelJITEventListener::notifyObjectLoaded(
+    ObjectKey Key, const ObjectFile &Obj,
+    const RuntimeDyld::LoadedObjectInfo &L) {
 
   OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
   const ObjectFile *DebugObj = DebugObjOwner.getBinary();
@@ -188,17 +188,17 @@ void IntelJITEventListener::NotifyObjectEmitted(
   // registered function addresses for each loaded object.  We will
   // use the MethodIDs map to get the registered ID for each function.
   LoadedObjectMap[ObjData] = Functions;
-  DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner);
+  DebugObjects[Key] = std::move(DebugObjOwner);
 }
 
-void IntelJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+void IntelJITEventListener::notifyFreeingObject(ObjectKey Key) {
   // This object may not have been registered with the listener. If it wasn't,
   // bail out.
-  if (DebugObjects.find(Obj.getData().data()) == DebugObjects.end())
+  if (DebugObjects.find(Key) == DebugObjects.end())
     return;
 
   // Get the address of the object image for use as a unique identifier
-  const ObjectFile &DebugObj = *DebugObjects[Obj.getData().data()].getBinary();
+  const ObjectFile &DebugObj = *DebugObjects[Key].getBinary();
   const void* ObjData = DebugObj.getData().data();
 
   // Get the object's function list from LoadedObjectMap
@@ -223,7 +223,7 @@ void IntelJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
 
   // Erase the object from LoadedObjectMap
   LoadedObjectMap.erase(OI);
-  DebugObjects.erase(Obj.getData().data());
+  DebugObjects.erase(Key);
 }
 
 }  // anonymous namespace.
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 39cf6d4a32a3..98dca1102759 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1778,17 +1778,14 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
 
 void Interpreter::visitInsertElementInst(InsertElementInst &I) {
   ExecutionContext &SF = ECStack.back();
-  Type *Ty = I.getType();
-
-  if(!(Ty->isVectorTy()) )
-    llvm_unreachable("Unhandled dest type for insertelement instruction");
+  VectorType *Ty = cast<VectorType>(I.getType());
 
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
   GenericValue Dest;
 
-  Type *TyContained = Ty->getContainedType(0);
+  Type *TyContained = Ty->getElementType();
 
   const unsigned indx = unsigned(Src3.IntVal.getZExtValue());
   Dest.AggregateVal = Src1.AggregateVal;
@@ -1814,9 +1811,7 @@ void Interpreter::visitInsertElementInst(InsertElementInst &I) {
 void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){
   ExecutionContext &SF = ECStack.back();
 
-  Type *Ty = I.getType();
-  if(!(Ty->isVectorTy()))
-    llvm_unreachable("Unhandled dest type for shufflevector instruction");
+  VectorType *Ty = cast<VectorType>(I.getType());
 
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
@@ -1827,7 +1822,7 @@ void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){
   // bytecode can't contain different types for src1 and src2 for a
   // shufflevector instruction.
 
-  Type *TyContained = Ty->getContainedType(0);
+  Type *TyContained = Ty->getElementType();
   unsigned src1Size = (unsigned)Src1.AggregateVal.size();
   unsigned src2Size = (unsigned)Src2.AggregateVal.size();
   unsigned src3Size = (unsigned)Src3.AggregateVal.size();
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 64dca930722e..334fcacf8078 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -103,8 +103,9 @@ static ExFunc lookupFunction(const Function *F) {
   // composite function name should be.
   std::string ExtName = "lle_";
   FunctionType *FT = F->getFunctionType();
-  for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
-    ExtName += getTypeID(FT->getContainedType(i));
+  ExtName += getTypeID(FT->getReturnType());
+  for (Type *T : FT->params())
+    ExtName += getTypeID(T);
   ExtName += ("_" + F->getName()).str();
 
   sys::ScopedLock Writer(*FunctionsLock);
@@ -227,7 +228,8 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
   Type *RetTy = FTy->getReturnType();
   ffi_type *rtype = ffiTypeFor(RetTy);
 
-  if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+  if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, args.data()) ==
+      FFI_OK) {
     SmallVector<uint8_t, 128> ret;
     if (RetTy->getTypeID() != Type::VoidTyID)
       ret.resize(TD.getTypeStoreSize(RetTy));
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 2c663c2e1edf..ffc6707e1488 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -95,7 +95,7 @@ MCJIT::~MCJIT() {
 
   for (auto &Obj : LoadedObjects)
     if (Obj)
-      NotifyFreeingObject(*Obj);
+      notifyFreeingObject(*Obj);
 
   Archives.clear();
 }
@@ -119,7 +119,7 @@ void MCJIT::addObjectFile(std::unique_ptr<object::ObjectFile> Obj) {
   if (Dyld.hasError())
     report_fatal_error(Dyld.getErrorString());
 
-  NotifyObjectEmitted(*Obj, *L);
+  notifyObjectLoaded(*Obj, *L);
 
   LoadedObjects.push_back(std::move(Obj));
 }
@@ -216,7 +216,7 @@ void MCJIT::generateCodeForModule(Module *M) {
   if (!LoadedObject) {
     std::string Buf;
     raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(LoadedObject.takeError(), OS, "");
+    logAllUnhandledErrors(LoadedObject.takeError(), OS);
     OS.flush();
     report_fatal_error(Buf);
   }
@@ -226,7 +226,7 @@ void MCJIT::generateCodeForModule(Module *M) {
   if (Dyld.hasError())
     report_fatal_error(Dyld.getErrorString());
 
-  NotifyObjectEmitted(*LoadedObject.get(), *L);
+  notifyObjectLoaded(*LoadedObject.get(), *L);
 
   Buffers.push_back(std::move(ObjectToLoad));
   LoadedObjects.push_back(std::move(*LoadedObject));
@@ -326,8 +326,9 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
       return *AddrOrErr;
     else
       report_fatal_error(AddrOrErr.takeError());
-  } else
+  } else if (auto Err = Sym.takeError())
     report_fatal_error(Sym.takeError());
+  return 0;
 }
 
 JITSymbol MCJIT::findSymbol(const std::string &Name,
@@ -647,19 +648,23 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
   }
 }
 
-void MCJIT::NotifyObjectEmitted(const object::ObjectFile& Obj,
-                                const RuntimeDyld::LoadedObjectInfo &L) {
+void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj,
+                               const RuntimeDyld::LoadedObjectInfo &L) {
+  uint64_t Key =
+      static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Obj.getData().data()));
   MutexGuard locked(lock);
   MemMgr->notifyObjectLoaded(this, Obj);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
-    EventListeners[I]->NotifyObjectEmitted(Obj, L);
+    EventListeners[I]->notifyObjectLoaded(Key, Obj, L);
   }
 }
 
-void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) {
+void MCJIT::notifyFreeingObject(const object::ObjectFile &Obj) {
+  uint64_t Key =
+      static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Obj.getData().data()));
   MutexGuard locked(lock);
   for (JITEventListener *L : EventListeners)
-    L->NotifyFreeingObject(Obj);
+    L->notifyFreeingObject(Key);
 }
 
 JITSymbol
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 943b14942a0f..1119e138720f 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -331,9 +331,9 @@ protected:
   /// the future.
   std::unique_ptr<MemoryBuffer> emitObject(Module *M);
 
-  void NotifyObjectEmitted(const object::ObjectFile& Obj,
-                           const RuntimeDyld::LoadedObjectInfo &L);
-  void NotifyFreeingObject(const object::ObjectFile& Obj);
+  void notifyObjectLoaded(const object::ObjectFile &Obj,
+                          const RuntimeDyld::LoadedObjectInfo &L);
+  void notifyFreeingObject(const object::ObjectFile &Obj);
 
   JITSymbol findExistingSymbol(const std::string &Name);
   Module *findModuleForSymbol(const std::string &Name, bool CheckFunctionsOnly);
diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 6f0825fb38da..21af6b585c41 100644
--- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -40,7 +40,7 @@ class OProfileJITEventListener : public JITEventListener {
   std::unique_ptr<OProfileWrapper> Wrapper;
 
   void initialize();
-  std::map<const char*, OwningBinary<ObjectFile>> DebugObjects;
+  std::map<ObjectKey, OwningBinary<ObjectFile>> DebugObjects;
 
 public:
   OProfileJITEventListener(std::unique_ptr<OProfileWrapper> LibraryWrapper)
@@ -50,10 +50,10 @@ public:
 
   ~OProfileJITEventListener();
 
-  void NotifyObjectEmitted(const ObjectFile &Obj,
-                           const RuntimeDyld::LoadedObjectInfo &L) override;
+  void notifyObjectLoaded(ObjectKey Key, const ObjectFile &Obj,
+                          const RuntimeDyld::LoadedObjectInfo &L) override;
 
-  void NotifyFreeingObject(const ObjectFile &Obj) override;
+  void notifyFreeingObject(ObjectKey Key) override;
 };
 
 void OProfileJITEventListener::initialize() {
@@ -78,9 +78,9 @@ OProfileJITEventListener::~OProfileJITEventListener() {
   }
 }
 
-void OProfileJITEventListener::NotifyObjectEmitted(
-                                       const ObjectFile &Obj,
-                                       const RuntimeDyld::LoadedObjectInfo &L) {
+void OProfileJITEventListener::notifyObjectLoaded(
+    ObjectKey Key, const ObjectFile &Obj,
+    const RuntimeDyld::LoadedObjectInfo &L) {
   if (!Wrapper->isAgentAvailable()) {
     return;
   }
@@ -137,18 +137,18 @@ void OProfileJITEventListener::NotifyObjectEmitted(
     }
   }
 
-  DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner);
+  DebugObjects[Key] = std::move(DebugObjOwner);
 }
 
-void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+void OProfileJITEventListener::notifyFreeingObject(ObjectKey Key) {
   if (Wrapper->isAgentAvailable()) {
 
     // If there was no agent registered when the original object was loaded then
     // we won't have created a debug object for it, so bail out.
-    if (DebugObjects.find(Obj.getData().data()) == DebugObjects.end())
+    if (DebugObjects.find(Key) == DebugObjects.end())
       return;
 
-    const ObjectFile &DebugObj = *DebugObjects[Obj.getData().data()].getBinary();
+    const ObjectFile &DebugObj = *DebugObjects[Key].getBinary();
 
     // Use symbol info to iterate functions in the object.
     for (symbol_iterator I = DebugObj.symbol_begin(),
@@ -171,7 +171,7 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
     }
   }
 
-  DebugObjects.erase(Obj.getData().data());
+  DebugObjects.erase(Key);
 }
 
 }  // anonymous namespace.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index d42e7b05ba67..241eb3600da7 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -8,201 +8,86 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
 
 using namespace llvm;
 using namespace llvm::orc;
 
-namespace {
+static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM,
+                                         StringRef Suffix,
+                                         GVPredicate ShouldExtract) {
 
-template <typename MaterializerFtor>
-class LambdaValueMaterializer final : public ValueMaterializer {
-public:
-  LambdaValueMaterializer(MaterializerFtor M) : M(std::move(M)) {}
-
-  Value *materialize(Value *V) final { return M(V); }
-
-private:
-  MaterializerFtor M;
-};
+  auto DeleteExtractedDefs = [](GlobalValue &GV) {
+    // Bump the linkage: this global will be provided by the external module.
+    GV.setLinkage(GlobalValue::ExternalLinkage);
 
-template <typename MaterializerFtor>
-LambdaValueMaterializer<MaterializerFtor>
-createLambdaValueMaterializer(MaterializerFtor M) {
-  return LambdaValueMaterializer<MaterializerFtor>(std::move(M));
-}
-} // namespace
-
-static void extractAliases(MaterializationResponsibility &R, Module &M,
-                           MangleAndInterner &Mangle) {
-  SymbolAliasMap Aliases;
-
-  std::vector<GlobalAlias *> ModAliases;
-  for (auto &A : M.aliases())
-    ModAliases.push_back(&A);
-
-  for (auto *A : ModAliases) {
-    Constant *Aliasee = A->getAliasee();
-    assert(A->hasName() && "Anonymous alias?");
-    assert(Aliasee->hasName() && "Anonymous aliasee");
-    std::string AliasName = A->getName();
-
-    Aliases[Mangle(AliasName)] = SymbolAliasMapEntry(
-        {Mangle(Aliasee->getName()), JITSymbolFlags::fromGlobalValue(*A)});
-
-    if (isa<Function>(Aliasee)) {
-      auto *F = cloneFunctionDecl(M, *cast<Function>(Aliasee));
-      A->replaceAllUsesWith(F);
-      A->eraseFromParent();
-      F->setName(AliasName);
-    } else if (isa<GlobalValue>(Aliasee)) {
-      auto *G = cloneGlobalVariableDecl(M, *cast<GlobalVariable>(Aliasee));
-      A->replaceAllUsesWith(G);
-      A->eraseFromParent();
-      G->setName(AliasName);
-    }
-  }
-
-  R.replace(symbolAliases(std::move(Aliases)));
-}
-
-static std::unique_ptr<Module>
-extractAndClone(Module &M, LLVMContext &NewContext, StringRef Suffix,
-                function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {
-  SmallVector<char, 1> ClonedModuleBuffer;
-
-  {
-    std::set<GlobalValue *> ClonedDefsInSrc;
-    ValueToValueMapTy VMap;
-    auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) {
-      if (ShouldCloneDefinition(GV)) {
-        ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
-        return true;
-      }
-      return false;
-    });
-
-    for (auto *GV : ClonedDefsInSrc) {
-      // Delete the definition and bump the linkage in the source module.
-      if (isa<Function>(GV)) {
-        auto &F = *cast<Function>(GV);
-        F.deleteBody();
-        F.setPersonalityFn(nullptr);
-      } else if (isa<GlobalVariable>(GV)) {
-        cast<GlobalVariable>(GV)->setInitializer(nullptr);
+    // Delete the definition in the source module.
+    if (isa<Function>(GV)) {
+      auto &F = cast<Function>(GV);
+      F.deleteBody();
+      F.setPersonalityFn(nullptr);
+    } else if (isa<GlobalVariable>(GV)) {
+      cast<GlobalVariable>(GV).setInitializer(nullptr);
+    } else if (isa<GlobalAlias>(GV)) {
+      // We need to turn deleted aliases into function or variable decls based
+      // on the type of their aliasee.
+      auto &A = cast<GlobalAlias>(GV);
+      Constant *Aliasee = A.getAliasee();
+      assert(A.hasName() && "Anonymous alias?");
+      assert(Aliasee->hasName() && "Anonymous aliasee");
+      std::string AliasName = A.getName();
+
+      if (isa<Function>(Aliasee)) {
+        auto *F = cloneFunctionDecl(*A.getParent(), *cast<Function>(Aliasee));
+        A.replaceAllUsesWith(F);
+        A.eraseFromParent();
+        F->setName(AliasName);
+      } else if (isa<GlobalVariable>(Aliasee)) {
+        auto *G = cloneGlobalVariableDecl(*A.getParent(),
+                                          *cast<GlobalVariable>(Aliasee));
+        A.replaceAllUsesWith(G);
+        A.eraseFromParent();
+        G->setName(AliasName);
       } else
-        llvm_unreachable("Unsupported global type");
+        llvm_unreachable("Alias to unsupported type");
+    } else
+      llvm_unreachable("Unsupported global type");
+  };
 
-      GV->setLinkage(GlobalValue::ExternalLinkage);
-    }
-
-    BitcodeWriter BCWriter(ClonedModuleBuffer);
-
-    BCWriter.writeModule(*Tmp);
-    BCWriter.writeSymtab();
-    BCWriter.writeStrtab();
-  }
-
-  MemoryBufferRef ClonedModuleBufferRef(
-      StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
-      "cloned module buffer");
+  auto NewTSMod = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs);
+  auto &M = *NewTSMod.getModule();
+  M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str());
 
-  auto ClonedModule =
-      cantFail(parseBitcodeFile(ClonedModuleBufferRef, NewContext));
-  ClonedModule->setModuleIdentifier((M.getName() + Suffix).str());
-  return ClonedModule;
-}
-
-static std::unique_ptr<Module> extractGlobals(Module &M,
-                                              LLVMContext &NewContext) {
-  return extractAndClone(M, NewContext, ".globals", [](const GlobalValue *GV) {
-    return isa<GlobalVariable>(GV);
-  });
+  return NewTSMod;
 }
 
 namespace llvm {
 namespace orc {
 
-class ExtractingIRMaterializationUnit : public IRMaterializationUnit {
+class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
 public:
-  ExtractingIRMaterializationUnit(ExecutionSession &ES,
-                                  CompileOnDemandLayer2 &Parent,
-                                  std::unique_ptr<Module> M)
-      : IRMaterializationUnit(ES, std::move(M)), Parent(Parent) {}
-
-  ExtractingIRMaterializationUnit(std::unique_ptr<Module> M,
-                                  SymbolFlagsMap SymbolFlags,
-                                  SymbolNameToDefinitionMap SymbolToDefinition,
-                                  CompileOnDemandLayer2 &Parent)
-      : IRMaterializationUnit(std::move(M), std::move(SymbolFlags),
+  PartitioningIRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
+                                    VModuleKey K, CompileOnDemandLayer &Parent)
+      : IRMaterializationUnit(ES, std::move(TSM), std::move(K)),
+        Parent(Parent) {}
+
+  PartitioningIRMaterializationUnit(
+      ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags,
+      SymbolNameToDefinitionMap SymbolToDefinition,
+      CompileOnDemandLayer &Parent)
+      : IRMaterializationUnit(std::move(TSM), std::move(K),
+                              std::move(SymbolFlags),
                               std::move(SymbolToDefinition)),
         Parent(Parent) {}
 
 private:
   void materialize(MaterializationResponsibility R) override {
-    // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
-    //        extracted module key, extracted module, and source module key
-    //        together. This could be used, for example, to provide a specific
-    //        memory manager instance to the linking layer.
-
-    auto RequestedSymbols = R.getRequestedSymbols();
-
-    // Extract the requested functions into a new module.
-    std::unique_ptr<Module> ExtractedFunctionsModule;
-    if (!RequestedSymbols.empty()) {
-      std::string Suffix;
-      std::set<const GlobalValue *> FunctionsToClone;
-      for (auto &Name : RequestedSymbols) {
-        auto I = SymbolToDefinition.find(Name);
-        assert(I != SymbolToDefinition.end() && I->second != nullptr &&
-               "Should have a non-null definition");
-        FunctionsToClone.insert(I->second);
-        Suffix += ".";
-        Suffix += *Name;
-      }
-
-      std::lock_guard<std::mutex> Lock(SourceModuleMutex);
-      ExtractedFunctionsModule =
-          extractAndClone(*M, Parent.GetAvailableContext(), Suffix,
-                          [&](const GlobalValue *GV) -> bool {
-                            return FunctionsToClone.count(GV);
-                          });
-    }
-
-    // Build a new ExtractingIRMaterializationUnit to delegate the unrequested
-    // symbols to.
-    SymbolFlagsMap DelegatedSymbolFlags;
-    IRMaterializationUnit::SymbolNameToDefinitionMap
-        DelegatedSymbolToDefinition;
-    for (auto &KV : SymbolToDefinition) {
-      if (RequestedSymbols.count(KV.first))
-        continue;
-      DelegatedSymbolFlags[KV.first] =
-          JITSymbolFlags::fromGlobalValue(*KV.second);
-      DelegatedSymbolToDefinition[KV.first] = KV.second;
-    }
-
-    if (!DelegatedSymbolFlags.empty()) {
-      assert(DelegatedSymbolFlags.size() ==
-                 DelegatedSymbolToDefinition.size() &&
-             "SymbolFlags and SymbolToDefinition should have the same number "
-             "of entries");
-      R.replace(llvm::make_unique<ExtractingIRMaterializationUnit>(
-          std::move(M), std::move(DelegatedSymbolFlags),
-          std::move(DelegatedSymbolToDefinition), Parent));
-    }
-
-    if (ExtractedFunctionsModule)
-      Parent.emitExtractedFunctionsModule(std::move(R),
-                                          std::move(ExtractedFunctionsModule));
+    Parent.emitPartition(std::move(R), std::move(TSM),
+                         std::move(SymbolToDefinition));
   }
 
-  void discard(const VSO &V, SymbolStringPtr Name) override {
+  void discard(const JITDylib &V, const SymbolStringPtr &Name) override {
     // All original symbols were materialized by the CODLayer and should be
     // final. The function bodies provided by M should never be overridden.
     llvm_unreachable("Discard should never be called on an "
@@ -210,44 +95,98 @@ private:
   }
 
   mutable std::mutex SourceModuleMutex;
-  CompileOnDemandLayer2 &Parent;
+  CompileOnDemandLayer &Parent;
 };
 
-CompileOnDemandLayer2::CompileOnDemandLayer2(
-    ExecutionSession &ES, IRLayer &BaseLayer, JITCompileCallbackManager &CCMgr,
-    IndirectStubsManagerBuilder BuildIndirectStubsManager,
-    GetAvailableContextFunction GetAvailableContext)
-    : IRLayer(ES), BaseLayer(BaseLayer), CCMgr(CCMgr),
-      BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)),
-      GetAvailableContext(std::move(GetAvailableContext)) {}
-
-Error CompileOnDemandLayer2::add(VSO &V, VModuleKey K,
-                                 std::unique_ptr<Module> M) {
-  return IRLayer::add(V, K, std::move(M));
+Optional<CompileOnDemandLayer::GlobalValueSet>
+CompileOnDemandLayer::compileRequested(GlobalValueSet Requested) {
+  return std::move(Requested);
 }
 
-void CompileOnDemandLayer2::emit(MaterializationResponsibility R, VModuleKey K,
-                                 std::unique_ptr<Module> M) {
+Optional<CompileOnDemandLayer::GlobalValueSet>
+CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) {
+  return None;
+}
+
+CompileOnDemandLayer::CompileOnDemandLayer(
+    ExecutionSession &ES, IRLayer &BaseLayer, LazyCallThroughManager &LCTMgr,
+    IndirectStubsManagerBuilder BuildIndirectStubsManager)
+    : IRLayer(ES), BaseLayer(BaseLayer), LCTMgr(LCTMgr),
+      BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)) {}
+
+void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
+  this->Partition = std::move(Partition);
+}
+
+void CompileOnDemandLayer::emit(MaterializationResponsibility R,
+                                ThreadSafeModule TSM) {
+  assert(TSM.getModule() && "Null module");
+
   auto &ES = getExecutionSession();
-  assert(M && "M should not be null");
+  auto &M = *TSM.getModule();
+
+  // First, do some cleanup on the module:
+  cleanUpModule(M);
+
+  // Now sort the callables and non-callables, build re-exports and lodge the
+  // actual module with the implementation dylib.
+  auto &PDR = getPerDylibResources(R.getTargetJITDylib());
 
-  for (auto &GV : M->global_values())
-    if (GV.hasWeakLinkage())
-      GV.setLinkage(GlobalValue::ExternalLinkage);
+  MangleAndInterner Mangle(ES, M.getDataLayout());
+  SymbolAliasMap NonCallables;
+  SymbolAliasMap Callables;
+  for (auto &GV : M.global_values()) {
+    if (GV.isDeclaration() || GV.hasLocalLinkage() || GV.hasAppendingLinkage())
+      continue;
 
-  MangleAndInterner Mangle(ES, M->getDataLayout());
+    auto Name = Mangle(GV.getName());
+    auto Flags = JITSymbolFlags::fromGlobalValue(GV);
+    if (Flags.isCallable())
+      Callables[Name] = SymbolAliasMapEntry(Name, Flags);
+    else
+      NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
+  }
+
+  // Create a partitioning materialization unit and lodge it with the
+  // implementation dylib.
+  if (auto Err = PDR.getImplDylib().define(
+          llvm::make_unique<PartitioningIRMaterializationUnit>(
+              ES, std::move(TSM), R.getVModuleKey(), *this))) {
+    ES.reportError(std::move(Err));
+    R.failMaterialization();
+    return;
+  }
 
-  extractAliases(R, *M, Mangle);
+  R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true));
+  R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(),
+                          std::move(Callables)));
+}
 
-  auto GlobalsModule = extractGlobals(*M, GetAvailableContext());
+CompileOnDemandLayer::PerDylibResources &
+CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) {
+  auto I = DylibResources.find(&TargetD);
+  if (I == DylibResources.end()) {
+    auto &ImplD = getExecutionSession().createJITDylib(
+        TargetD.getName() + ".impl", false);
+    TargetD.withSearchOrderDo([&](const JITDylibSearchList &TargetSearchOrder) {
+      auto NewSearchOrder = TargetSearchOrder;
+      assert(!NewSearchOrder.empty() &&
+             NewSearchOrder.front().first == &TargetD &&
+             NewSearchOrder.front().second == true &&
+             "TargetD must be at the front of its own search order and match "
+             "non-exported symbol");
+      NewSearchOrder.insert(std::next(NewSearchOrder.begin()), {&ImplD, true});
+      ImplD.setSearchOrder(std::move(NewSearchOrder), false);
+    });
+    PerDylibResources PDR(ImplD, BuildIndirectStubsManager());
+    I = DylibResources.insert(std::make_pair(&TargetD, std::move(PDR))).first;
+  }
 
-  // Delete the bodies of any available externally functions, rename the
-  // rest, and build the compile callbacks.
-  std::map<SymbolStringPtr, std::pair<JITTargetAddress, JITSymbolFlags>>
-      StubCallbacksAndLinkages;
-  auto &TargetVSO = R.getTargetVSO();
+  return I->second;
+}
 
-  for (auto &F : M->functions()) {
+void CompileOnDemandLayer::cleanUpModule(Module &M) {
+  for (auto &F : M.functions()) {
     if (F.isDeclaration())
       continue;
 
@@ -256,87 +195,108 @@ void CompileOnDemandLayer2::emit(MaterializationResponsibility R, VModuleKey K,
       F.setPersonalityFn(nullptr);
       continue;
     }
+  }
+}
 
-    assert(F.hasName() && "Function should have a name");
-    std::string StubUnmangledName = F.getName();
-    F.setName(F.getName() + "$body");
-    auto StubDecl = cloneFunctionDecl(*M, F);
-    StubDecl->setName(StubUnmangledName);
-    StubDecl->setPersonalityFn(nullptr);
-    StubDecl->setLinkage(GlobalValue::ExternalLinkage);
-    F.replaceAllUsesWith(StubDecl);
-
-    auto StubName = Mangle(StubUnmangledName);
-    auto BodyName = Mangle(F.getName());
-    if (auto CallbackAddr = CCMgr.getCompileCallback(
-            [BodyName, &TargetVSO, &ES]() -> JITTargetAddress {
-              if (auto Sym = lookup({&TargetVSO}, BodyName))
-                return Sym->getAddress();
-              else {
-                ES.reportError(Sym.takeError());
-                return 0;
-              }
-            })) {
-      auto Flags = JITSymbolFlags::fromGlobalValue(F);
-      Flags &= ~JITSymbolFlags::Weak;
-      StubCallbacksAndLinkages[std::move(StubName)] =
-          std::make_pair(*CallbackAddr, Flags);
-    } else {
-      ES.reportError(CallbackAddr.takeError());
-      R.failMaterialization();
-      return;
-    }
+void CompileOnDemandLayer::expandPartition(GlobalValueSet &Partition) {
+  // Expands the partition to ensure the following rules hold:
+  // (1) If any alias is in the partition, its aliasee is also in the partition.
+  // (2) If any aliasee is in the partition, its aliases are also in the
+  //     partiton.
+  // (3) If any global variable is in the partition then all global variables
+  //     are in the partition.
+  assert(!Partition.empty() && "Unexpected empty partition");
+
+  const Module &M = *(*Partition.begin())->getParent();
+  bool ContainsGlobalVariables = false;
+  std::vector<const GlobalValue *> GVsToAdd;
+
+  for (auto *GV : Partition)
+    if (isa<GlobalAlias>(GV))
+      GVsToAdd.push_back(
+          cast<GlobalValue>(cast<GlobalAlias>(GV)->getAliasee()));
+    else if (isa<GlobalVariable>(GV))
+      ContainsGlobalVariables = true;
+
+  for (auto &A : M.aliases())
+    if (Partition.count(cast<GlobalValue>(A.getAliasee())))
+      GVsToAdd.push_back(&A);
+
+  if (ContainsGlobalVariables)
+    for (auto &G : M.globals())
+      GVsToAdd.push_back(&G);
+
+  for (auto *GV : GVsToAdd)
+    Partition.insert(GV);
+}
+
+void CompileOnDemandLayer::emitPartition(
+    MaterializationResponsibility R, ThreadSafeModule TSM,
+    IRMaterializationUnit::SymbolNameToDefinitionMap Defs) {
+
+  // FIXME: Need a 'notify lazy-extracting/emitting' callback to tie the
+  //        extracted module key, extracted module, and source module key
+  //        together. This could be used, for example, to provide a specific
+  //        memory manager instance to the linking layer.
+
+  auto &ES = getExecutionSession();
+
+  GlobalValueSet RequestedGVs;
+  for (auto &Name : R.getRequestedSymbols()) {
+    assert(Defs.count(Name) && "No definition for symbol");
+    RequestedGVs.insert(Defs[Name]);
   }
 
-  // Build the stub inits map.
-  IndirectStubsManager::StubInitsMap StubInits;
-  for (auto &KV : StubCallbacksAndLinkages)
-    StubInits[*KV.first] = KV.second;
+  auto GVsToExtract = Partition(RequestedGVs);
 
-  // Build the function-body-extracting materialization unit.
-  if (auto Err = R.getTargetVSO().define(
-          llvm::make_unique<ExtractingIRMaterializationUnit>(ES, *this,
-                                                             std::move(M)))) {
-    ES.reportError(std::move(Err));
-    R.failMaterialization();
+  // Take a 'None' partition to mean the whole module (as opposed to an empty
+  // partition, which means "materialize nothing"). Emit the whole module
+  // unmodified to the base layer.
+  if (GVsToExtract == None) {
+    Defs.clear();
+    BaseLayer.emit(std::move(R), std::move(TSM));
     return;
   }
 
-  // Build the stubs.
-  // FIXME: Remove function bodies materialization unit if stub creation fails.
-  auto &StubsMgr = getStubsManager(TargetVSO);
-  if (auto Err = StubsMgr.createStubs(StubInits)) {
-    ES.reportError(std::move(Err));
-    R.failMaterialization();
+  // If the partition is empty, return the whole module to the symbol table.
+  if (GVsToExtract->empty()) {
+    R.replace(llvm::make_unique<PartitioningIRMaterializationUnit>(
+        std::move(TSM), R.getSymbols(), std::move(Defs), *this));
     return;
   }
 
-  // Resolve and finalize stubs.
-  SymbolMap ResolvedStubs;
-  for (auto &KV : StubCallbacksAndLinkages) {
-    if (auto Sym = StubsMgr.findStub(*KV.first, false))
-      ResolvedStubs[KV.first] = Sym;
-    else
-      llvm_unreachable("Stub went missing");
+  // Ok -- we actually need to partition the symbols. Promote the symbol
+  // linkages/names.
+  // FIXME: We apply this once per partitioning. It's safe, but overkill.
+  {
+    auto PromotedGlobals = PromoteSymbols(*TSM.getModule());
+    if (!PromotedGlobals.empty()) {
+      MangleAndInterner Mangle(ES, TSM.getModule()->getDataLayout());
+      SymbolFlagsMap SymbolFlags;
+      for (auto &GV : PromotedGlobals)
+        SymbolFlags[Mangle(GV->getName())] =
+            JITSymbolFlags::fromGlobalValue(*GV);
+      if (auto Err = R.defineMaterializing(SymbolFlags)) {
+        ES.reportError(std::move(Err));
+        R.failMaterialization();
+        return;
+      }
+    }
   }
 
-  R.resolve(ResolvedStubs);
+  expandPartition(*GVsToExtract);
 
-  BaseLayer.emit(std::move(R), std::move(K), std::move(GlobalsModule));
-}
+  // Extract the requested partiton (plus any necessary aliases) and
+  // put the rest back into the impl dylib.
+  auto ShouldExtract = [&](const GlobalValue &GV) -> bool {
+    return GVsToExtract->count(&GV);
+  };
 
-IndirectStubsManager &CompileOnDemandLayer2::getStubsManager(const VSO &V) {
-  std::lock_guard<std::mutex> Lock(CODLayerMutex);
-  StubManagersMap::iterator I = StubsMgrs.find(&V);
-  if (I == StubsMgrs.end())
-    I = StubsMgrs.insert(std::make_pair(&V, BuildIndirectStubsManager())).first;
-  return *I->second;
-}
+  auto ExtractedTSM = extractSubModule(TSM, ".submodule", ShouldExtract);
+  R.replace(llvm::make_unique<PartitioningIRMaterializationUnit>(
+      ES, std::move(TSM), R.getVModuleKey(), *this));
 
-void CompileOnDemandLayer2::emitExtractedFunctionsModule(
-    MaterializationResponsibility R, std::unique_ptr<Module> M) {
-  auto K = getExecutionSession().allocateVModule();
-  BaseLayer.emit(std::move(R), std::move(K), std::move(M));
+  BaseLayer.emit(std::move(R), std::move(ExtractedTSM));
 }
 
 } // end namespace orc
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 4325d57f73d0..73c0bcdf7d28 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -1,4 +1,4 @@
-//===----- Core.cpp - Core ORC APIs (MaterializationUnit, VSO, etc.) ------===//
+//===--- Core.cpp - Core ORC APIs (MaterializationUnit, JITDylib, etc.) ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,6 +11,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/IR/Mangler.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 
@@ -18,98 +19,203 @@
 #include <future>
 #endif
 
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+
+namespace {
+
+#ifndef NDEBUG
+
+cl::opt<bool> PrintHidden("debug-orc-print-hidden", cl::init(false),
+                          cl::desc("debug print hidden symbols defined by "
+                                   "materialization units"),
+                          cl::Hidden);
+
+cl::opt<bool> PrintCallable("debug-orc-print-callable", cl::init(false),
+                            cl::desc("debug print callable symbols defined by "
+                                     "materialization units"),
+                            cl::Hidden);
+
+cl::opt<bool> PrintData("debug-orc-print-data", cl::init(false),
+                        cl::desc("debug print data symbols defined by "
+                                 "materialization units"),
+                        cl::Hidden);
+
+#endif // NDEBUG
+
+// SetPrinter predicate that prints every element.
+template <typename T> struct PrintAll {
+  bool operator()(const T &E) { return true; }
+};
+
+bool anyPrintSymbolOptionSet() {
+#ifndef NDEBUG
+  return PrintHidden || PrintCallable || PrintData;
+#else
+  return false;
+#endif // NDEBUG
+}
+
+bool flagsMatchCLOpts(const JITSymbolFlags &Flags) {
+#ifndef NDEBUG
+  // Bail out early if this is a hidden symbol and we're not printing hiddens.
+  if (!PrintHidden && !Flags.isExported())
+    return false;
+
+  // Return true if this is callable and we're printing callables.
+  if (PrintCallable && Flags.isCallable())
+    return true;
+
+  // Return true if this is data and we're printing data.
+  if (PrintData && !Flags.isCallable())
+    return true;
+
+  // otherwise return false.
+  return false;
+#else
+  return false;
+#endif // NDEBUG
+}
+
+// Prints a set of items, filtered by an user-supplied predicate.
+template <typename Set, typename Pred = PrintAll<typename Set::value_type>>
+class SetPrinter {
+public:
+  SetPrinter(const Set &S, Pred ShouldPrint = Pred())
+      : S(S), ShouldPrint(std::move(ShouldPrint)) {}
+
+  void printTo(llvm::raw_ostream &OS) const {
+    bool PrintComma = false;
+    OS << "{";
+    for (auto &E : S) {
+      if (ShouldPrint(E)) {
+        if (PrintComma)
+          OS << ',';
+        OS << ' ' << E;
+        PrintComma = true;
+      }
+    }
+    OS << " }";
+  }
+
+private:
+  const Set &S;
+  mutable Pred ShouldPrint;
+};
+
+template <typename Set, typename Pred>
+SetPrinter<Set, Pred> printSet(const Set &S, Pred P = Pred()) {
+  return SetPrinter<Set, Pred>(S, std::move(P));
+}
+
+// Render a SetPrinter by delegating to its printTo method.
+template <typename Set, typename Pred>
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const SetPrinter<Set, Pred> &Printer) {
+  Printer.printTo(OS);
+  return OS;
+}
+
+struct PrintSymbolFlagsMapElemsMatchingCLOpts {
+  bool operator()(const orc::SymbolFlagsMap::value_type &KV) {
+    return flagsMatchCLOpts(KV.second);
+  }
+};
+
+struct PrintSymbolMapElemsMatchingCLOpts {
+  bool operator()(const orc::SymbolMap::value_type &KV) {
+    return flagsMatchCLOpts(KV.second.getFlags());
+  }
+};
+
+} // end anonymous namespace
+
 namespace llvm {
 namespace orc {
 
+  SymbolStringPool::PoolMapEntry SymbolStringPtr::Tombstone(0);
+
 char FailedToMaterialize::ID = 0;
 char SymbolsNotFound::ID = 0;
+char SymbolsCouldNotBeRemoved::ID = 0;
 
 RegisterDependenciesFunction NoDependenciesToRegister =
     RegisterDependenciesFunction();
 
 void MaterializationUnit::anchor() {}
 
+raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym) {
+  return OS << *Sym;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) {
+  return OS << printSet(Symbols, PrintAll<SymbolStringPtr>());
+}
+
 raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) {
+  if (Flags.isCallable())
+    OS << "[Callable]";
+  else
+    OS << "[Data]";
   if (Flags.isWeak())
-    OS << 'W';
+    OS << "[Weak]";
   else if (Flags.isCommon())
-    OS << 'C';
-  else
-    OS << 'S';
+    OS << "[Common]";
 
-  if (Flags.isExported())
-    OS << 'E';
-  else
-    OS << 'H';
+  if (!Flags.isExported())
+    OS << "[Hidden]";
 
   return OS;
 }
 
 raw_ostream &operator<<(raw_ostream &OS, const JITEvaluatedSymbol &Sym) {
-  OS << format("0x%016x", Sym.getAddress()) << " " << Sym.getFlags();
-  return OS;
+  return OS << format("0x%016" PRIx64, Sym.getAddress()) << " "
+            << Sym.getFlags();
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV) {
+  return OS << "(\"" << KV.first << "\", " << KV.second << ")";
 }
 
 raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV) {
-  OS << "\"" << *KV.first << "\": " << KV.second;
-  return OS;
+  return OS << "(\"" << KV.first << "\": " << KV.second << ")";
 }
 
-raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) {
-  OS << "{";
-  if (!Symbols.empty()) {
-    OS << " \"" << **Symbols.begin() << "\"";
-    for (auto &Sym : make_range(std::next(Symbols.begin()), Symbols.end()))
-      OS << ", \"" << *Sym << "\"";
-  }
-  OS << " }";
-  return OS;
+raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) {
+  return OS << printSet(SymbolFlags, PrintSymbolFlagsMapElemsMatchingCLOpts());
 }
 
 raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols) {
-  OS << "{";
-  if (!Symbols.empty()) {
-    OS << " {" << *Symbols.begin() << "}";
-    for (auto &Sym : make_range(std::next(Symbols.begin()), Symbols.end()))
-      OS << ", {" << Sym << "}";
-  }
-  OS << " }";
-  return OS;
+  return OS << printSet(Symbols, PrintSymbolMapElemsMatchingCLOpts());
 }
 
-raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) {
-  OS << "{";
-  if (!SymbolFlags.empty()) {
-    OS << " {\"" << *SymbolFlags.begin()->first
-       << "\": " << SymbolFlags.begin()->second << "}";
-    for (auto &KV :
-         make_range(std::next(SymbolFlags.begin()), SymbolFlags.end()))
-      OS << ", {\"" << *KV.first << "\": " << KV.second << "}";
-  }
-  OS << " }";
-  return OS;
+raw_ostream &operator<<(raw_ostream &OS,
+                        const SymbolDependenceMap::value_type &KV) {
+  return OS << "(" << KV.first << ", " << KV.second << ")";
 }
 
 raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps) {
-  OS << "{";
-  if (!Deps.empty()) {
-    OS << " { " << Deps.begin()->first->getName() << ": "
-       << Deps.begin()->second << " }";
-    for (auto &KV : make_range(std::next(Deps.begin()), Deps.end()))
-      OS << ", { " << KV.first->getName() << ": " << KV.second << " }";
-  }
-  OS << " }";
-  return OS;
+  return OS << printSet(Deps, PrintAll<SymbolDependenceMap::value_type>());
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) {
+  OS << "MU@" << &MU << " (\"" << MU.getName() << "\"";
+  if (anyPrintSymbolOptionSet())
+    OS << ", " << MU.getSymbols();
+  return OS << ")";
 }
 
-raw_ostream &operator<<(raw_ostream &OS, const VSOList &VSOs) {
+raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) {
   OS << "[";
-  if (!VSOs.empty()) {
-    assert(VSOs.front() && "VSOList entries must not be null");
-    OS << " " << VSOs.front()->getName();
-    for (auto *V : make_range(std::next(VSOs.begin()), VSOs.end())) {
-      assert(V && "VSOList entries must not be null");
-      OS << ", " << V->getName();
+  if (!JDs.empty()) {
+    assert(JDs.front().first && "JITDylibList entries must not be null");
+    OS << " (\"" << JDs.front().first->getName() << "\", "
+       << (JDs.front().second ? "true" : "false") << ")";
+    for (auto &KV : make_range(std::next(JDs.begin()), JDs.end())) {
+      assert(KV.first && "JITDylibList entries must not be null");
+      OS << ", (\"" << KV.first->getName() << "\", "
+         << (KV.second ? "true" : "false") << ")";
     }
   }
   OS << " ]";
@@ -142,359 +248,17 @@ void SymbolsNotFound::log(raw_ostream &OS) const {
   OS << "Symbols not found: " << Symbols;
 }
 
-void ExecutionSessionBase::legacyFailQuery(AsynchronousSymbolQuery &Q,
-                                           Error Err) {
-  assert(!!Err && "Error should be in failure state");
-
-  bool SendErrorToQuery;
-  runSessionLocked([&]() {
-    Q.detach();
-    SendErrorToQuery = Q.canStillFail();
-  });
-
-  if (SendErrorToQuery)
-    Q.handleFailed(std::move(Err));
-  else
-    reportError(std::move(Err));
-}
-
-Expected<SymbolMap> ExecutionSessionBase::legacyLookup(
-    ExecutionSessionBase &ES, LegacyAsyncLookupFunction AsyncLookup,
-    SymbolNameSet Names, bool WaitUntilReady,
-    RegisterDependenciesFunction RegisterDependencies) {
-#if LLVM_ENABLE_THREADS
-  // In the threaded case we use promises to return the results.
-  std::promise<SymbolMap> PromisedResult;
-  std::mutex ErrMutex;
-  Error ResolutionError = Error::success();
-  std::promise<void> PromisedReady;
-  Error ReadyError = Error::success();
-  auto OnResolve = [&](Expected<SymbolMap> R) {
-    if (R)
-      PromisedResult.set_value(std::move(*R));
-    else {
-      {
-        ErrorAsOutParameter _(&ResolutionError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ResolutionError = R.takeError();
-      }
-      PromisedResult.set_value(SymbolMap());
-    }
-  };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      if (Err) {
-        ErrorAsOutParameter _(&ReadyError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ReadyError = std::move(Err);
-      }
-      PromisedReady.set_value();
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        ES.reportError(std::move(Err));
-    };
-  }
-
-#else
-  SymbolMap Result;
-  Error ResolutionError = Error::success();
-  Error ReadyError = Error::success();
-
-  auto OnResolve = [&](Expected<SymbolMap> R) {
-    ErrorAsOutParameter _(&ResolutionError);
-    if (R)
-      Result = std::move(*R);
-    else
-      ResolutionError = R.takeError();
-  };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      ErrorAsOutParameter _(&ReadyError);
-      if (Err)
-        ReadyError = std::move(Err);
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        ES.reportError(std::move(Err));
-    };
-  }
-#endif
-
-  auto Query = std::make_shared<AsynchronousSymbolQuery>(
-      Names, std::move(OnResolve), std::move(OnReady));
-  // FIXME: This should be run session locked along with the registration code
-  // and error reporting below.
-  SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
-
-  // If the query was lodged successfully then register the dependencies,
-  // otherwise fail it with an error.
-  if (UnresolvedSymbols.empty())
-    RegisterDependencies(Query->QueryRegistrations);
-  else {
-    bool DeliverError = runSessionLocked([&]() {
-      Query->detach();
-      return Query->canStillFail();
-    });
-    auto Err = make_error<SymbolsNotFound>(std::move(UnresolvedSymbols));
-    if (DeliverError)
-      Query->handleFailed(std::move(Err));
-    else
-      ES.reportError(std::move(Err));
-  }
-
-#if LLVM_ENABLE_THREADS
-  auto ResultFuture = PromisedResult.get_future();
-  auto Result = ResultFuture.get();
-
-  {
-    std::lock_guard<std::mutex> Lock(ErrMutex);
-    if (ResolutionError) {
-      // ReadyError will never be assigned. Consume the success value.
-      cantFail(std::move(ReadyError));
-      return std::move(ResolutionError);
-    }
-  }
-
-  if (WaitUntilReady) {
-    auto ReadyFuture = PromisedReady.get_future();
-    ReadyFuture.get();
-
-    {
-      std::lock_guard<std::mutex> Lock(ErrMutex);
-      if (ReadyError)
-        return std::move(ReadyError);
-    }
-  } else
-    cantFail(std::move(ReadyError));
-
-  return std::move(Result);
-
-#else
-  if (ResolutionError) {
-    // ReadyError will never be assigned. Consume the success value.
-    cantFail(std::move(ReadyError));
-    return std::move(ResolutionError);
-  }
-
-  if (ReadyError)
-    return std::move(ReadyError);
-
-  return Result;
-#endif
-}
-
-void ExecutionSessionBase::lookup(
-    const VSOList &VSOs, const SymbolNameSet &Symbols,
-    SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
-    RegisterDependenciesFunction RegisterDependencies) {
-
-  // lookup can be re-entered recursively if running on a single thread. Run any
-  // outstanding MUs in case this query depends on them, otherwise the main
-  // thread will starve waiting for a result from an MU that it failed to run.
-  runOutstandingMUs();
-
-  auto Unresolved = std::move(Symbols);
-  std::map<VSO *, MaterializationUnitList> MUsMap;
-  auto Q = std::make_shared<AsynchronousSymbolQuery>(
-      Symbols, std::move(OnResolve), std::move(OnReady));
-  bool QueryIsFullyResolved = false;
-  bool QueryIsFullyReady = false;
-  bool QueryFailed = false;
-
-  runSessionLocked([&]() {
-    for (auto *V : VSOs) {
-      assert(V && "VSOList entries must not be null");
-      assert(!MUsMap.count(V) &&
-             "VSOList should not contain duplicate entries");
-      V->lodgeQuery(Q, Unresolved, MUsMap[V]);
-    }
-
-    if (Unresolved.empty()) {
-      // Query lodged successfully.
-
-      // Record whether this query is fully ready / resolved. We will use
-      // this to call handleFullyResolved/handleFullyReady outside the session
-      // lock.
-      QueryIsFullyResolved = Q->isFullyResolved();
-      QueryIsFullyReady = Q->isFullyReady();
-
-      // Call the register dependencies function.
-      if (RegisterDependencies && !Q->QueryRegistrations.empty())
-        RegisterDependencies(Q->QueryRegistrations);
-    } else {
-      // Query failed due to unresolved symbols.
-      QueryFailed = true;
-
-      // Disconnect the query from its dependencies.
-      Q->detach();
-
-      // Replace the MUs.
-      for (auto &KV : MUsMap)
-        for (auto &MU : KV.second)
-          KV.first->replace(std::move(MU));
-    }
-  });
-
-  if (QueryFailed) {
-    Q->handleFailed(make_error<SymbolsNotFound>(std::move(Unresolved)));
-    return;
-  } else {
-    if (QueryIsFullyResolved)
-      Q->handleFullyResolved();
-    if (QueryIsFullyReady)
-      Q->handleFullyReady();
-  }
-
-  // Move the MUs to the OutstandingMUs list, then materialize.
-  {
-    std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
-
-    for (auto &KV : MUsMap)
-      for (auto &MU : KV.second)
-        OutstandingMUs.push_back(std::make_pair(KV.first, std::move(MU)));
-  }
-
-  runOutstandingMUs();
+SymbolsCouldNotBeRemoved::SymbolsCouldNotBeRemoved(SymbolNameSet Symbols)
+    : Symbols(std::move(Symbols)) {
+  assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
 }
 
-Expected<SymbolMap>
-ExecutionSessionBase::lookup(const VSOList &VSOs, const SymbolNameSet &Symbols,
-                             RegisterDependenciesFunction RegisterDependencies,
-                             bool WaitUntilReady) {
-#if LLVM_ENABLE_THREADS
-  // In the threaded case we use promises to return the results.
-  std::promise<SymbolMap> PromisedResult;
-  std::mutex ErrMutex;
-  Error ResolutionError = Error::success();
-  std::promise<void> PromisedReady;
-  Error ReadyError = Error::success();
-  auto OnResolve = [&](Expected<SymbolMap> R) {
-    if (R)
-      PromisedResult.set_value(std::move(*R));
-    else {
-      {
-        ErrorAsOutParameter _(&ResolutionError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ResolutionError = R.takeError();
-      }
-      PromisedResult.set_value(SymbolMap());
-    }
-  };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      if (Err) {
-        ErrorAsOutParameter _(&ReadyError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ReadyError = std::move(Err);
-      }
-      PromisedReady.set_value();
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        reportError(std::move(Err));
-    };
-  }
-
-#else
-  SymbolMap Result;
-  Error ResolutionError = Error::success();
-  Error ReadyError = Error::success();
-
-  auto OnResolve = [&](Expected<SymbolMap> R) {
-    ErrorAsOutParameter _(&ResolutionError);
-    if (R)
-      Result = std::move(*R);
-    else
-      ResolutionError = R.takeError();
-  };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      ErrorAsOutParameter _(&ReadyError);
-      if (Err)
-        ReadyError = std::move(Err);
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        reportError(std::move(Err));
-    };
-  }
-#endif
-
-  // Perform the asynchronous lookup.
-  lookup(VSOs, Symbols, OnResolve, OnReady, RegisterDependencies);
-
-#if LLVM_ENABLE_THREADS
-  auto ResultFuture = PromisedResult.get_future();
-  auto Result = ResultFuture.get();
-
-  {
-    std::lock_guard<std::mutex> Lock(ErrMutex);
-    if (ResolutionError) {
-      // ReadyError will never be assigned. Consume the success value.
-      cantFail(std::move(ReadyError));
-      return std::move(ResolutionError);
-    }
-  }
-
-  if (WaitUntilReady) {
-    auto ReadyFuture = PromisedReady.get_future();
-    ReadyFuture.get();
-
-    {
-      std::lock_guard<std::mutex> Lock(ErrMutex);
-      if (ReadyError)
-        return std::move(ReadyError);
-    }
-  } else
-    cantFail(std::move(ReadyError));
-
-  return std::move(Result);
-
-#else
-  if (ResolutionError) {
-    // ReadyError will never be assigned. Consume the success value.
-    cantFail(std::move(ReadyError));
-    return std::move(ResolutionError);
-  }
-
-  if (ReadyError)
-    return std::move(ReadyError);
-
-  return Result;
-#endif
+std::error_code SymbolsCouldNotBeRemoved::convertToErrorCode() const {
+  return orcError(OrcErrorCode::UnknownORCError);
 }
 
-void ExecutionSessionBase::runOutstandingMUs() {
-  while (1) {
-    std::pair<VSO *, std::unique_ptr<MaterializationUnit>> VSOAndMU;
-
-    {
-      std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
-      if (!OutstandingMUs.empty()) {
-        VSOAndMU = std::move(OutstandingMUs.back());
-        OutstandingMUs.pop_back();
-      }
-    }
-
-    if (VSOAndMU.first) {
-      assert(VSOAndMU.second && "VSO, but no MU?");
-      dispatchMaterialization(*VSOAndMU.first, std::move(VSOAndMU.second));
-    } else
-      break;
-  }
+void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const {
+  OS << "Symbols could not be removed: " << Symbols;
 }
 
 AsynchronousSymbolQuery::AsynchronousSymbolQuery(
@@ -520,23 +284,45 @@ void AsynchronousSymbolQuery::resolve(const SymbolStringPtr &Name,
 
 void AsynchronousSymbolQuery::handleFullyResolved() {
   assert(NotYetResolvedCount == 0 && "Not fully resolved?");
-  assert(NotifySymbolsResolved &&
-         "NotifySymbolsResolved already called or error occurred");
-  NotifySymbolsResolved(std::move(ResolvedSymbols));
+
+  if (!NotifySymbolsResolved) {
+    // handleFullyResolved may be called by handleFullyReady (see comments in
+    // that method), in which case this is a no-op, so bail out.
+    assert(!NotifySymbolsReady &&
+           "NotifySymbolsResolved already called or an error occurred");
+    return;
+  }
+
+  auto TmpNotifySymbolsResolved = std::move(NotifySymbolsResolved);
   NotifySymbolsResolved = SymbolsResolvedCallback();
+  TmpNotifySymbolsResolved(std::move(ResolvedSymbols));
 }
 
 void AsynchronousSymbolQuery::notifySymbolReady() {
-  assert(NotYetReadyCount != 0 && "All symbols already finalized");
+  assert(NotYetReadyCount != 0 && "All symbols already emitted");
   --NotYetReadyCount;
 }
 
 void AsynchronousSymbolQuery::handleFullyReady() {
+  assert(NotifySymbolsReady &&
+         "NotifySymbolsReady already called or an error occurred");
+
+  auto TmpNotifySymbolsReady = std::move(NotifySymbolsReady);
+  NotifySymbolsReady = SymbolsReadyCallback();
+
+  if (NotYetResolvedCount == 0 && NotifySymbolsResolved) {
+    // The NotifyResolved callback of one query must have caused this query to
+    // become ready (i.e. there is still a handleFullyResolved callback waiting
+    // to be made back up the stack). Fold the handleFullyResolved call into
+    // this one before proceeding. This will cause the call further up the
+    // stack to become a no-op.
+    handleFullyResolved();
+  }
+
   assert(QueryRegistrations.empty() &&
          "Query is still registered with some symbols");
   assert(!NotifySymbolsResolved && "Resolution not applied yet");
-  NotifySymbolsReady(Error::success());
-  NotifySymbolsReady = SymbolsReadyCallback();
+  TmpNotifySymbolsReady(Error::success());
 }
 
 bool AsynchronousSymbolQuery::canStillFail() {
@@ -557,17 +343,19 @@ void AsynchronousSymbolQuery::handleFailed(Error Err) {
   NotifySymbolsReady = SymbolsReadyCallback();
 }
 
-void AsynchronousSymbolQuery::addQueryDependence(VSO &V, SymbolStringPtr Name) {
-  bool Added = QueryRegistrations[&V].insert(std::move(Name)).second;
+void AsynchronousSymbolQuery::addQueryDependence(JITDylib &JD,
+                                                 SymbolStringPtr Name) {
+  bool Added = QueryRegistrations[&JD].insert(std::move(Name)).second;
   (void)Added;
   assert(Added && "Duplicate dependence notification?");
 }
 
 void AsynchronousSymbolQuery::removeQueryDependence(
-    VSO &V, const SymbolStringPtr &Name) {
-  auto QRI = QueryRegistrations.find(&V);
-  assert(QRI != QueryRegistrations.end() && "No dependencies registered for V");
-  assert(QRI->second.count(Name) && "No dependency on Name in V");
+    JITDylib &JD, const SymbolStringPtr &Name) {
+  auto QRI = QueryRegistrations.find(&JD);
+  assert(QRI != QueryRegistrations.end() &&
+         "No dependencies registered for JD");
+  assert(QRI->second.count(Name) && "No dependency on Name in JD");
   QRI->second.erase(Name);
   if (QRI->second.empty())
     QueryRegistrations.erase(QRI);
@@ -583,8 +371,8 @@ void AsynchronousSymbolQuery::detach() {
 }
 
 MaterializationResponsibility::MaterializationResponsibility(
-    VSO &V, SymbolFlagsMap SymbolFlags)
-    : V(V), SymbolFlags(std::move(SymbolFlags)) {
+    JITDylib &JD, SymbolFlagsMap SymbolFlags, VModuleKey K)
+    : JD(JD), SymbolFlags(std::move(SymbolFlags)), K(std::move(K)) {
   assert(!this->SymbolFlags.empty() && "Materializing nothing?");
 
 #ifndef NDEBUG
@@ -598,11 +386,13 @@ MaterializationResponsibility::~MaterializationResponsibility() {
          "All symbols should have been explicitly materialized or failed");
 }
 
-SymbolNameSet MaterializationResponsibility::getRequestedSymbols() {
-  return V.getRequestedSymbols(SymbolFlags);
+SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
+  return JD.getRequestedSymbols(SymbolFlags);
 }
 
 void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
+  LLVM_DEBUG(dbgs() << "In " << JD.getName() << " resolving " << Symbols
+                    << "\n");
 #ifndef NDEBUG
   for (auto &KV : Symbols) {
     auto I = SymbolFlags.find(KV.first);
@@ -619,17 +409,17 @@ void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
   }
 #endif
 
-  V.resolve(Symbols);
+  JD.resolve(Symbols);
 }
 
-void MaterializationResponsibility::finalize() {
+void MaterializationResponsibility::emit() {
 #ifndef NDEBUG
   for (auto &KV : SymbolFlags)
     assert(!KV.second.isMaterializing() &&
-           "Failed to resolve symbol before finalization");
+           "Failed to resolve symbol before emission");
 #endif // NDEBUG
 
-  V.finalize(SymbolFlags);
+  JD.emit(SymbolFlags);
   SymbolFlags.clear();
 }
 
@@ -637,8 +427,8 @@ Error MaterializationResponsibility::defineMaterializing(
     const SymbolFlagsMap &NewSymbolFlags) {
   // Add the given symbols to this responsibility object.
   // It's ok if we hit a duplicate here: In that case the new version will be
-  // discarded, and the VSO::defineMaterializing method will return a duplicate
-  // symbol error.
+  // discarded, and the JITDylib::defineMaterializing method will return a
+  // duplicate symbol error.
   for (auto &KV : NewSymbolFlags) {
     auto I = SymbolFlags.insert(KV).first;
     (void)I;
@@ -647,7 +437,7 @@ Error MaterializationResponsibility::defineMaterializing(
 #endif
   }
 
-  return V.defineMaterializing(NewSymbolFlags);
+  return JD.defineMaterializing(NewSymbolFlags);
 }
 
 void MaterializationResponsibility::failMaterialization() {
@@ -656,7 +446,7 @@ void MaterializationResponsibility::failMaterialization() {
   for (auto &KV : SymbolFlags)
     FailedSymbols.insert(KV.first);
 
-  V.notifyFailed(FailedSymbols);
+  JD.notifyFailed(FailedSymbols);
   SymbolFlags.clear();
 }
 
@@ -665,11 +455,21 @@ void MaterializationResponsibility::replace(
   for (auto &KV : MU->getSymbols())
     SymbolFlags.erase(KV.first);
 
-  V.replace(std::move(MU));
+  LLVM_DEBUG(JD.getExecutionSession().runSessionLocked([&]() {
+    dbgs() << "In " << JD.getName() << " replacing symbols with " << *MU
+           << "\n";
+  }););
+
+  JD.replace(std::move(MU));
 }
 
 MaterializationResponsibility
-MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
+MaterializationResponsibility::delegate(const SymbolNameSet &Symbols,
+                                        VModuleKey NewKey) {
+
+  if (NewKey == VModuleKey())
+    NewKey = K;
+
   SymbolFlagsMap DelegatedFlags;
 
   for (auto &Name : Symbols) {
@@ -682,34 +482,40 @@ MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
     SymbolFlags.erase(I);
   }
 
-  return MaterializationResponsibility(V, std::move(DelegatedFlags));
+  return MaterializationResponsibility(JD, std::move(DelegatedFlags),
+                                       std::move(NewKey));
 }
 
 void MaterializationResponsibility::addDependencies(
     const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) {
   assert(SymbolFlags.count(Name) &&
          "Symbol not covered by this MaterializationResponsibility instance");
-  V.addDependencies(Name, Dependencies);
+  JD.addDependencies(Name, Dependencies);
 }
 
 void MaterializationResponsibility::addDependenciesForAll(
     const SymbolDependenceMap &Dependencies) {
   for (auto &KV : SymbolFlags)
-    V.addDependencies(KV.first, Dependencies);
+    JD.addDependencies(KV.first, Dependencies);
 }
 
 AbsoluteSymbolsMaterializationUnit::AbsoluteSymbolsMaterializationUnit(
-    SymbolMap Symbols)
-    : MaterializationUnit(extractFlags(Symbols)), Symbols(std::move(Symbols)) {}
+    SymbolMap Symbols, VModuleKey K)
+    : MaterializationUnit(extractFlags(Symbols), std::move(K)),
+      Symbols(std::move(Symbols)) {}
+
+StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
+  return "<Absolute Symbols>";
+}
 
 void AbsoluteSymbolsMaterializationUnit::materialize(
     MaterializationResponsibility R) {
   R.resolve(Symbols);
-  R.finalize();
+  R.emit();
 }
 
-void AbsoluteSymbolsMaterializationUnit::discard(const VSO &V,
-                                                 SymbolStringPtr Name) {
+void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
+                                                 const SymbolStringPtr &Name) {
   assert(Symbols.count(Name) && "Symbol is not part of this MU");
   Symbols.erase(Name);
 }
@@ -723,19 +529,26 @@ AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) {
 }
 
 ReExportsMaterializationUnit::ReExportsMaterializationUnit(
-    VSO *SourceVSO, SymbolAliasMap Aliases)
-    : MaterializationUnit(extractFlags(Aliases)), SourceVSO(SourceVSO),
+    JITDylib *SourceJD, bool MatchNonExported, SymbolAliasMap Aliases,
+    VModuleKey K)
+    : MaterializationUnit(extractFlags(Aliases), std::move(K)),
+      SourceJD(SourceJD), MatchNonExported(MatchNonExported),
       Aliases(std::move(Aliases)) {}
 
+StringRef ReExportsMaterializationUnit::getName() const {
+  return "<Reexports>";
+}
+
 void ReExportsMaterializationUnit::materialize(
     MaterializationResponsibility R) {
 
-  auto &ES = R.getTargetVSO().getExecutionSession();
-  VSO &TgtV = R.getTargetVSO();
-  VSO &SrcV = SourceVSO ? *SourceVSO : TgtV;
+  auto &ES = R.getTargetJITDylib().getExecutionSession();
+  JITDylib &TgtJD = R.getTargetJITDylib();
+  JITDylib &SrcJD = SourceJD ? *SourceJD : TgtJD;
 
   // Find the set of requested aliases and aliasees. Return any unrequested
-  // aliases back to the VSO so as to not prematurely materialize any aliasees.
+  // aliases back to the JITDylib so as to not prematurely materialize any
+  // aliasees.
   auto RequestedSymbols = R.getRequestedSymbols();
   SymbolAliasMap RequestedAliases;
 
@@ -747,8 +560,8 @@ void ReExportsMaterializationUnit::materialize(
   }
 
   if (!Aliases.empty()) {
-    if (SourceVSO)
-      R.replace(reexports(*SourceVSO, std::move(Aliases)));
+    if (SourceJD)
+      R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported));
     else
       R.replace(symbolAliases(std::move(Aliases)));
   }
@@ -776,20 +589,22 @@ void ReExportsMaterializationUnit::materialize(
     SymbolNameSet QuerySymbols;
     SymbolAliasMap QueryAliases;
 
-    for (auto I = RequestedAliases.begin(), E = RequestedAliases.end();
-         I != E;) {
-      auto Tmp = I++;
-
+    // Collect as many aliases as we can without including a chain.
+    for (auto &KV : RequestedAliases) {
       // Chain detected. Skip this symbol for this round.
-      if (&SrcV == &TgtV && (QueryAliases.count(Tmp->second.Aliasee) ||
-                             RequestedAliases.count(Tmp->second.Aliasee)))
+      if (&SrcJD == &TgtJD && (QueryAliases.count(KV.second.Aliasee) ||
+                               RequestedAliases.count(KV.second.Aliasee)))
         continue;
 
-      ResponsibilitySymbols.insert(Tmp->first);
-      QuerySymbols.insert(Tmp->second.Aliasee);
-      QueryAliases[Tmp->first] = std::move(Tmp->second);
-      RequestedAliases.erase(Tmp);
+      ResponsibilitySymbols.insert(KV.first);
+      QuerySymbols.insert(KV.second.Aliasee);
+      QueryAliases[KV.first] = std::move(KV.second);
     }
+
+    // Remove the aliases collected this round from the RequestedAliases map.
+    for (auto &KV : QueryAliases)
+      RequestedAliases.erase(KV.first);
+
     assert(!QuerySymbols.empty() && "Alias cycle detected!");
 
     auto QueryInfo = std::make_shared<OnResolveInfo>(
@@ -806,21 +621,21 @@ void ReExportsMaterializationUnit::materialize(
     QueryInfos.pop_back();
 
     auto RegisterDependencies = [QueryInfo,
-                                 &SrcV](const SymbolDependenceMap &Deps) {
+                                 &SrcJD](const SymbolDependenceMap &Deps) {
       // If there were no materializing symbols, just bail out.
       if (Deps.empty())
         return;
 
-      // Otherwise the only deps should be on SrcV.
-      assert(Deps.size() == 1 && Deps.count(&SrcV) &&
+      // Otherwise the only deps should be on SrcJD.
+      assert(Deps.size() == 1 && Deps.count(&SrcJD) &&
              "Unexpected dependencies for reexports");
 
-      auto &SrcVDeps = Deps.find(&SrcV)->second;
+      auto &SrcJDDeps = Deps.find(&SrcJD)->second;
       SymbolDependenceMap PerAliasDepsMap;
-      auto &PerAliasDeps = PerAliasDepsMap[&SrcV];
+      auto &PerAliasDeps = PerAliasDepsMap[&SrcJD];
 
       for (auto &KV : QueryInfo->Aliases)
-        if (SrcVDeps.count(KV.second.Aliasee)) {
+        if (SrcJDDeps.count(KV.second.Aliasee)) {
           PerAliasDeps = {KV.second.Aliasee};
           QueryInfo->R.addDependencies(KV.first, PerAliasDepsMap);
         }
@@ -836,9 +651,9 @@ void ReExportsMaterializationUnit::materialize(
               (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
         }
         QueryInfo->R.resolve(ResolutionMap);
-        QueryInfo->R.finalize();
+        QueryInfo->R.emit();
       } else {
-        auto &ES = QueryInfo->R.getTargetVSO().getExecutionSession();
+        auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
         ES.reportError(Result.takeError());
         QueryInfo->R.failMaterialization();
       }
@@ -846,12 +661,14 @@ void ReExportsMaterializationUnit::materialize(
 
     auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
 
-    ES.lookup({&SrcV}, QuerySymbols, std::move(OnResolve), std::move(OnReady),
+    ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols,
+              std::move(OnResolve), std::move(OnReady),
               std::move(RegisterDependencies));
   }
 }
 
-void ReExportsMaterializationUnit::discard(const VSO &V, SymbolStringPtr Name) {
+void ReExportsMaterializationUnit::discard(const JITDylib &JD,
+                                           const SymbolStringPtr &Name) {
   assert(Aliases.count(Name) &&
          "Symbol not covered by this MaterializationUnit");
   Aliases.erase(Name);
@@ -867,8 +684,8 @@ ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
 }
 
 Expected<SymbolAliasMap>
-buildSimpleReexportsAliasMap(VSO &SourceV, const SymbolNameSet &Symbols) {
-  auto Flags = SourceV.lookupFlags(Symbols);
+buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
+  auto Flags = SourceJD.lookupFlags(Symbols);
 
   if (Flags.size() != Symbols.size()) {
     SymbolNameSet Unresolved = Symbols;
@@ -886,7 +703,33 @@ buildSimpleReexportsAliasMap(VSO &SourceV, const SymbolNameSet &Symbols) {
   return Result;
 }
 
-Error VSO::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
+ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
+                                       bool MatchNonExported,
+                                       SymbolPredicate Allow)
+    : SourceJD(SourceJD), MatchNonExported(MatchNonExported),
+      Allow(std::move(Allow)) {}
+
+SymbolNameSet ReexportsGenerator::operator()(JITDylib &JD,
+                                             const SymbolNameSet &Names) {
+  orc::SymbolNameSet Added;
+  orc::SymbolAliasMap AliasMap;
+
+  auto Flags = SourceJD.lookupFlags(Names);
+
+  for (auto &KV : Flags) {
+    if (Allow && !Allow(KV.first))
+      continue;
+    AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second);
+    Added.insert(KV.first);
+  }
+
+  if (!Added.empty())
+    cantFail(JD.define(reexports(SourceJD, AliasMap, MatchNonExported)));
+
+  return Added;
+}
+
+Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
   return ES.runSessionLocked([&]() -> Error {
     std::vector<SymbolMap::iterator> AddedSyms;
 
@@ -916,7 +759,7 @@ Error VSO::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
   });
 }
 
-void VSO::replace(std::unique_ptr<MaterializationUnit> MU) {
+void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
   assert(MU != nullptr && "Can not replace with a null MaterializationUnit");
 
   auto MustRunMU =
@@ -967,13 +810,14 @@ void VSO::replace(std::unique_ptr<MaterializationUnit> MU) {
     ES.dispatchMaterialization(*this, std::move(MustRunMU));
 }
 
-SymbolNameSet VSO::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) {
+SymbolNameSet
+JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
   return ES.runSessionLocked([&]() {
     SymbolNameSet RequestedSymbols;
 
     for (auto &KV : SymbolFlags) {
-      assert(Symbols.count(KV.first) && "VSO does not cover this symbol?");
-      assert(Symbols[KV.first].getFlags().isMaterializing() &&
+      assert(Symbols.count(KV.first) && "JITDylib does not cover this symbol?");
+      assert(Symbols.find(KV.first)->second.getFlags().isMaterializing() &&
              "getRequestedSymbols can only be called for materializing "
              "symbols");
       auto I = MaterializingInfos.find(KV.first);
@@ -988,47 +832,47 @@ SymbolNameSet VSO::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) {
   });
 }
 
-void VSO::addDependencies(const SymbolStringPtr &Name,
-                          const SymbolDependenceMap &Dependencies) {
+void JITDylib::addDependencies(const SymbolStringPtr &Name,
+                               const SymbolDependenceMap &Dependencies) {
   assert(Symbols.count(Name) && "Name not in symbol table");
   assert((Symbols[Name].getFlags().isLazy() ||
           Symbols[Name].getFlags().isMaterializing()) &&
          "Symbol is not lazy or materializing");
 
   auto &MI = MaterializingInfos[Name];
-  assert(!MI.IsFinalized && "Can not add dependencies to finalized symbol");
+  assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol");
 
   for (auto &KV : Dependencies) {
-    assert(KV.first && "Null VSO in dependency?");
-    auto &OtherVSO = *KV.first;
-    auto &DepsOnOtherVSO = MI.UnfinalizedDependencies[&OtherVSO];
+    assert(KV.first && "Null JITDylib in dependency?");
+    auto &OtherJITDylib = *KV.first;
+    auto &DepsOnOtherJITDylib = MI.UnemittedDependencies[&OtherJITDylib];
 
     for (auto &OtherSymbol : KV.second) {
 #ifndef NDEBUG
-      // Assert that this symbol exists and has not been finalized already.
-      auto SymI = OtherVSO.Symbols.find(OtherSymbol);
-      assert(SymI != OtherVSO.Symbols.end() &&
+      // Assert that this symbol exists and has not been emitted already.
+      auto SymI = OtherJITDylib.Symbols.find(OtherSymbol);
+      assert(SymI != OtherJITDylib.Symbols.end() &&
              (SymI->second.getFlags().isLazy() ||
               SymI->second.getFlags().isMaterializing()) &&
-             "Dependency on finalized symbol");
+             "Dependency on emitted symbol");
 #endif
 
-      auto &OtherMI = OtherVSO.MaterializingInfos[OtherSymbol];
+      auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol];
 
-      if (OtherMI.IsFinalized)
-        transferFinalizedNodeDependencies(MI, Name, OtherMI);
-      else if (&OtherVSO != this || OtherSymbol != Name) {
+      if (OtherMI.IsEmitted)
+        transferEmittedNodeDependencies(MI, Name, OtherMI);
+      else if (&OtherJITDylib != this || OtherSymbol != Name) {
         OtherMI.Dependants[this].insert(Name);
-        DepsOnOtherVSO.insert(OtherSymbol);
+        DepsOnOtherJITDylib.insert(OtherSymbol);
       }
     }
 
-    if (DepsOnOtherVSO.empty())
-      MI.UnfinalizedDependencies.erase(&OtherVSO);
+    if (DepsOnOtherJITDylib.empty())
+      MI.UnemittedDependencies.erase(&OtherJITDylib);
   }
 }
 
-void VSO::resolve(const SymbolMap &Resolved) {
+void JITDylib::resolve(const SymbolMap &Resolved) {
   auto FullyResolvedQueries = ES.runSessionLocked([&, this]() {
     AsynchronousSymbolQuerySet FullyResolvedQueries;
     for (const auto &KV : Resolved) {
@@ -1074,11 +918,11 @@ void VSO::resolve(const SymbolMap &Resolved) {
   }
 }
 
-void VSO::finalize(const SymbolFlagsMap &Finalized) {
+void JITDylib::emit(const SymbolFlagsMap &Emitted) {
   auto FullyReadyQueries = ES.runSessionLocked([&, this]() {
     AsynchronousSymbolQuerySet ReadyQueries;
 
-    for (const auto &KV : Finalized) {
+    for (const auto &KV : Emitted) {
       const auto &Name = KV.first;
 
       auto MII = MaterializingInfos.find(Name);
@@ -1087,59 +931,59 @@ void VSO::finalize(const SymbolFlagsMap &Finalized) {
 
       auto &MI = MII->second;
 
-      // For each dependant, transfer this node's unfinalized dependencies to
-      // it. If the dependant node is fully finalized then notify any pending
-      // queries.
+      // For each dependant, transfer this node's emitted dependencies to
+      // it. If the dependant node is ready (i.e. has no unemitted
+      // dependencies) then notify any pending queries.
       for (auto &KV : MI.Dependants) {
-        auto &DependantVSO = *KV.first;
+        auto &DependantJD = *KV.first;
         for (auto &DependantName : KV.second) {
           auto DependantMII =
-              DependantVSO.MaterializingInfos.find(DependantName);
-          assert(DependantMII != DependantVSO.MaterializingInfos.end() &&
+              DependantJD.MaterializingInfos.find(DependantName);
+          assert(DependantMII != DependantJD.MaterializingInfos.end() &&
                  "Dependant should have MaterializingInfo");
 
           auto &DependantMI = DependantMII->second;
 
           // Remove the dependant's dependency on this node.
-          assert(DependantMI.UnfinalizedDependencies[this].count(Name) &&
+          assert(DependantMI.UnemittedDependencies[this].count(Name) &&
                  "Dependant does not count this symbol as a dependency?");
-          DependantMI.UnfinalizedDependencies[this].erase(Name);
-          if (DependantMI.UnfinalizedDependencies[this].empty())
-            DependantMI.UnfinalizedDependencies.erase(this);
-
-          // Transfer unfinalized dependencies from this node to the dependant.
-          DependantVSO.transferFinalizedNodeDependencies(DependantMI,
-                                                         DependantName, MI);
-
-          // If the dependant is finalized and this node was the last of its
-          // unfinalized dependencies then notify any pending queries on the
-          // dependant node.
-          if (DependantMI.IsFinalized &&
-              DependantMI.UnfinalizedDependencies.empty()) {
+          DependantMI.UnemittedDependencies[this].erase(Name);
+          if (DependantMI.UnemittedDependencies[this].empty())
+            DependantMI.UnemittedDependencies.erase(this);
+
+          // Transfer unemitted dependencies from this node to the dependant.
+          DependantJD.transferEmittedNodeDependencies(DependantMI,
+                                                      DependantName, MI);
+
+          // If the dependant is emitted and this node was the last of its
+          // unemitted dependencies then the dependant node is now ready, so
+          // notify any pending queries on the dependant node.
+          if (DependantMI.IsEmitted &&
+              DependantMI.UnemittedDependencies.empty()) {
             assert(DependantMI.Dependants.empty() &&
                    "Dependants should be empty by now");
             for (auto &Q : DependantMI.PendingQueries) {
               Q->notifySymbolReady();
               if (Q->isFullyReady())
                 ReadyQueries.insert(Q);
-              Q->removeQueryDependence(DependantVSO, DependantName);
+              Q->removeQueryDependence(DependantJD, DependantName);
             }
 
-            // If this dependant node was fully finalized we can erase its
-            // MaterializingInfo and update its materializing state.
-            assert(DependantVSO.Symbols.count(DependantName) &&
+            // Since this dependant is now ready, we erase its MaterializingInfo
+            // and update its materializing state.
+            assert(DependantJD.Symbols.count(DependantName) &&
                    "Dependant has no entry in the Symbols table");
-            auto &DependantSym = DependantVSO.Symbols[DependantName];
-            DependantSym.setFlags(static_cast<JITSymbolFlags::FlagNames>(
-                DependantSym.getFlags() & ~JITSymbolFlags::Materializing));
-            DependantVSO.MaterializingInfos.erase(DependantMII);
+            auto &DependantSym = DependantJD.Symbols[DependantName];
+            DependantSym.setFlags(DependantSym.getFlags() &
+                                  ~JITSymbolFlags::Materializing);
+            DependantJD.MaterializingInfos.erase(DependantMII);
           }
         }
       }
       MI.Dependants.clear();
-      MI.IsFinalized = true;
+      MI.IsEmitted = true;
 
-      if (MI.UnfinalizedDependencies.empty()) {
+      if (MI.UnemittedDependencies.empty()) {
         for (auto &Q : MI.PendingQueries) {
           Q->notifySymbolReady();
           if (Q->isFullyReady())
@@ -1149,8 +993,7 @@ void VSO::finalize(const SymbolFlagsMap &Finalized) {
         assert(Symbols.count(Name) &&
                "Symbol has no entry in the Symbols table");
         auto &Sym = Symbols[Name];
-        Sym.setFlags(static_cast<JITSymbolFlags::FlagNames>(
-            Sym.getFlags() & ~JITSymbolFlags::Materializing));
+        Sym.setFlags(Sym.getFlags() & ~JITSymbolFlags::Materializing);
         MaterializingInfos.erase(MII);
       }
     }
@@ -1164,7 +1007,7 @@ void VSO::finalize(const SymbolFlagsMap &Finalized) {
   }
 }
 
-void VSO::notifyFailed(const SymbolNameSet &FailedSymbols) {
+void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
 
   // FIXME: This should fail any transitively dependant symbols too.
 
@@ -1173,7 +1016,7 @@ void VSO::notifyFailed(const SymbolNameSet &FailedSymbols) {
 
     for (auto &Name : FailedSymbols) {
       auto I = Symbols.find(Name);
-      assert(I != Symbols.end() && "Symbol not present in this VSO");
+      assert(I != Symbols.end() && "Symbol not present in this JITDylib");
       Symbols.erase(I);
 
       auto MII = MaterializingInfos.find(Name);
@@ -1206,42 +1049,108 @@ void VSO::notifyFailed(const SymbolNameSet &FailedSymbols) {
     Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
 }
 
-void VSO::setSearchOrder(VSOList NewSearchOrder, bool SearchThisVSOFirst) {
-  if (SearchThisVSOFirst && NewSearchOrder.front() != this)
-    NewSearchOrder.insert(NewSearchOrder.begin(), this);
+void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder,
+                              bool SearchThisJITDylibFirst,
+                              bool MatchNonExportedInThisDylib) {
+  if (SearchThisJITDylibFirst && NewSearchOrder.front().first != this)
+    NewSearchOrder.insert(NewSearchOrder.begin(),
+                          {this, MatchNonExportedInThisDylib});
 
   ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); });
 }
 
-void VSO::addToSearchOrder(VSO &V) {
-  ES.runSessionLocked([&]() { SearchOrder.push_back(&V); });
+void JITDylib::addToSearchOrder(JITDylib &JD, bool MatchNonExported) {
+  ES.runSessionLocked([&]() {
+    SearchOrder.push_back({&JD, MatchNonExported});
+  });
 }
 
-void VSO::replaceInSearchOrder(VSO &OldV, VSO &NewV) {
+void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD,
+                                    bool MatchNonExported) {
   ES.runSessionLocked([&]() {
-    auto I = std::find(SearchOrder.begin(), SearchOrder.end(), &OldV);
+    auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(),
+                          [&](const JITDylibSearchList::value_type &KV) {
+                            return KV.first == &OldJD;
+                          });
 
     if (I != SearchOrder.end())
-      *I = &NewV;
+      *I = {&NewJD, MatchNonExported};
   });
 }
 
-void VSO::removeFromSearchOrder(VSO &V) {
+void JITDylib::removeFromSearchOrder(JITDylib &JD) {
   ES.runSessionLocked([&]() {
-    auto I = std::find(SearchOrder.begin(), SearchOrder.end(), &V);
+    auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(),
+                          [&](const JITDylibSearchList::value_type &KV) {
+                            return KV.first == &JD;
+                          });
     if (I != SearchOrder.end())
       SearchOrder.erase(I);
   });
 }
 
-SymbolFlagsMap VSO::lookupFlags(const SymbolNameSet &Names) {
+Error JITDylib::remove(const SymbolNameSet &Names) {
+  return ES.runSessionLocked([&]() -> Error {
+    using SymbolMaterializerItrPair =
+        std::pair<SymbolMap::iterator, UnmaterializedInfosMap::iterator>;
+    std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
+    SymbolNameSet Missing;
+    SymbolNameSet Materializing;
+
+    for (auto &Name : Names) {
+      auto I = Symbols.find(Name);
+
+      // Note symbol missing.
+      if (I == Symbols.end()) {
+        Missing.insert(Name);
+        continue;
+      }
+
+      // Note symbol materializing.
+      if (I->second.getFlags().isMaterializing()) {
+        Materializing.insert(Name);
+        continue;
+      }
+
+      auto UMII = I->second.getFlags().isLazy() ? UnmaterializedInfos.find(Name)
+                                                : UnmaterializedInfos.end();
+      SymbolsToRemove.push_back(std::make_pair(I, UMII));
+    }
+
+    // If any of the symbols are not defined, return an error.
+    if (!Missing.empty())
+      return make_error<SymbolsNotFound>(std::move(Missing));
+
+    // If any of the symbols are currently materializing, return an error.
+    if (!Materializing.empty())
+      return make_error<SymbolsCouldNotBeRemoved>(std::move(Materializing));
+
+    // Remove the symbols.
+    for (auto &SymbolMaterializerItrPair : SymbolsToRemove) {
+      auto UMII = SymbolMaterializerItrPair.second;
+
+      // If there is a materializer attached, call discard.
+      if (UMII != UnmaterializedInfos.end()) {
+        UMII->second->MU->doDiscard(*this, UMII->first);
+        UnmaterializedInfos.erase(UMII);
+      }
+
+      auto SymI = SymbolMaterializerItrPair.first;
+      Symbols.erase(SymI);
+    }
+
+    return Error::success();
+  });
+}
+
+SymbolFlagsMap JITDylib::lookupFlags(const SymbolNameSet &Names) {
   return ES.runSessionLocked([&, this]() {
     SymbolFlagsMap Result;
     auto Unresolved = lookupFlagsImpl(Result, Names);
-    if (FallbackDefinitionGenerator && !Unresolved.empty()) {
-      auto FallbackDefs = FallbackDefinitionGenerator(*this, Unresolved);
-      if (!FallbackDefs.empty()) {
-        auto Unresolved2 = lookupFlagsImpl(Result, FallbackDefs);
+    if (DefGenerator && !Unresolved.empty()) {
+      auto NewDefs = DefGenerator(*this, Unresolved);
+      if (!NewDefs.empty()) {
+        auto Unresolved2 = lookupFlagsImpl(Result, NewDefs);
         (void)Unresolved2;
         assert(Unresolved2.empty() &&
                "All fallback defs should have been found by lookupFlagsImpl");
@@ -1251,8 +1160,8 @@ SymbolFlagsMap VSO::lookupFlags(const SymbolNameSet &Names) {
   });
 }
 
-SymbolNameSet VSO::lookupFlagsImpl(SymbolFlagsMap &Flags,
-                                   const SymbolNameSet &Names) {
+SymbolNameSet JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
+                                        const SymbolNameSet &Names) {
   SymbolNameSet Unresolved;
 
   for (auto &Name : Names) {
@@ -1270,38 +1179,43 @@ SymbolNameSet VSO::lookupFlagsImpl(SymbolFlagsMap &Flags,
   return Unresolved;
 }
 
-void VSO::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                     SymbolNameSet &Unresolved, MaterializationUnitList &MUs) {
+void JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+                          SymbolNameSet &Unresolved, bool MatchNonExported,
+                          MaterializationUnitList &MUs) {
   assert(Q && "Query can not be null");
 
-  lodgeQueryImpl(Q, Unresolved, MUs);
-  if (FallbackDefinitionGenerator && !Unresolved.empty()) {
-    auto FallbackDefs = FallbackDefinitionGenerator(*this, Unresolved);
-    if (!FallbackDefs.empty()) {
-      for (auto &D : FallbackDefs)
+  lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs);
+  if (DefGenerator && !Unresolved.empty()) {
+    auto NewDefs = DefGenerator(*this, Unresolved);
+    if (!NewDefs.empty()) {
+      for (auto &D : NewDefs)
         Unresolved.erase(D);
-      lodgeQueryImpl(Q, FallbackDefs, MUs);
-      assert(FallbackDefs.empty() &&
+      lodgeQueryImpl(Q, NewDefs, MatchNonExported, MUs);
+      assert(NewDefs.empty() &&
              "All fallback defs should have been found by lookupImpl");
     }
   }
 }
 
-void VSO::lodgeQueryImpl(
+void JITDylib::lodgeQueryImpl(
     std::shared_ptr<AsynchronousSymbolQuery> &Q, SymbolNameSet &Unresolved,
+    bool MatchNonExported,
     std::vector<std::unique_ptr<MaterializationUnit>> &MUs) {
-  for (auto I = Unresolved.begin(), E = Unresolved.end(); I != E;) {
-    auto TmpI = I++;
-    auto Name = *TmpI;
 
+  std::vector<SymbolStringPtr> ToRemove;
+  for (auto Name : Unresolved) {
     // Search for the name in Symbols. Skip it if not found.
     auto SymI = Symbols.find(Name);
     if (SymI == Symbols.end())
       continue;
 
-    // If we found Name in V, remove it frome the Unresolved set and add it
-    // to the added set.
-    Unresolved.erase(TmpI);
+    // If this is a non exported symbol and we're skipping those then skip it.
+    if (!SymI->second.getFlags().isExported() && !MatchNonExported)
+      continue;
+
+    // If we matched against Name in JD, mark it to be removed from the Unresolved
+    // set.
+    ToRemove.push_back(Name);
 
     // If the symbol has an address then resolve it.
     if (SymI->second.getAddress() != 0)
@@ -1333,8 +1247,8 @@ void VSO::lodgeQueryImpl(
       // Add MU to the list of MaterializationUnits to be materialized.
       MUs.push_back(std::move(MU));
     } else if (!SymI->second.getFlags().isMaterializing()) {
-      // The symbol is neither lazy nor materializing. Finalize it and
-      // continue.
+      // The symbol is neither lazy nor materializing, so it must be
+      // ready. Notify the query and continue.
       Q->notifySymbolReady();
       continue;
     }
@@ -1346,10 +1260,14 @@ void VSO::lodgeQueryImpl(
     MI.PendingQueries.push_back(Q);
     Q->addQueryDependence(*this, Name);
   }
+
+  // Remove any symbols that we found.
+  for (auto &Name : ToRemove)
+    Unresolved.erase(Name);
 }
 
-SymbolNameSet VSO::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
-                                SymbolNameSet Names) {
+SymbolNameSet JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
+                                     SymbolNameSet Names) {
   assert(Q && "Query can not be null");
 
   ES.runOutstandingMUs();
@@ -1360,15 +1278,15 @@ SymbolNameSet VSO::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
   SymbolNameSet Unresolved = std::move(Names);
   ES.runSessionLocked([&, this]() {
     ActionFlags = lookupImpl(Q, MUs, Unresolved);
-    if (FallbackDefinitionGenerator && !Unresolved.empty()) {
+    if (DefGenerator && !Unresolved.empty()) {
       assert(ActionFlags == None &&
              "ActionFlags set but unresolved symbols remain?");
-      auto FallbackDefs = FallbackDefinitionGenerator(*this, Unresolved);
-      if (!FallbackDefs.empty()) {
-        for (auto &D : FallbackDefs)
+      auto NewDefs = DefGenerator(*this, Unresolved);
+      if (!NewDefs.empty()) {
+        for (auto &D : NewDefs)
           Unresolved.erase(D);
-        ActionFlags = lookupImpl(Q, MUs, FallbackDefs);
-        assert(FallbackDefs.empty() &&
+        ActionFlags = lookupImpl(Q, MUs, NewDefs);
+        assert(NewDefs.empty() &&
                "All fallback defs should have been found by lookupImpl");
       }
     }
@@ -1400,24 +1318,22 @@ SymbolNameSet VSO::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
   return Unresolved;
 }
 
-VSO::LookupImplActionFlags
-VSO::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
-                SymbolNameSet &Unresolved) {
+JITDylib::LookupImplActionFlags
+JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+                     std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
+                     SymbolNameSet &Unresolved) {
   LookupImplActionFlags ActionFlags = None;
+  std::vector<SymbolStringPtr> ToRemove;
 
-  for (auto I = Unresolved.begin(), E = Unresolved.end(); I != E;) {
-    auto TmpI = I++;
-    auto Name = *TmpI;
+  for (auto Name : Unresolved) {
 
     // Search for the name in Symbols. Skip it if not found.
     auto SymI = Symbols.find(Name);
     if (SymI == Symbols.end())
       continue;
 
-    // If we found Name in V, remove it frome the Unresolved set and add it
-    // to the dependencies set.
-    Unresolved.erase(TmpI);
+    // If we found Name, mark it to be removed from the Unresolved set.
+    ToRemove.push_back(Name);
 
     // If the symbol has an address then resolve it.
     if (SymI->second.getAddress() != 0) {
@@ -1452,8 +1368,8 @@ VSO::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
       // Add MU to the list of MaterializationUnits to be materialized.
       MUs.push_back(std::move(MU));
     } else if (!SymI->second.getFlags().isMaterializing()) {
-      // The symbol is neither lazy nor materializing. Finalize it and
-      // continue.
+      // The symbol is neither lazy nor materializing, so it must be ready.
+      // Notify the query and continue.
       Q->notifySymbolReady();
       if (Q->isFullyReady())
         ActionFlags |= NotifyFullyReady;
@@ -1468,19 +1384,30 @@ VSO::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
     Q->addQueryDependence(*this, Name);
   }
 
+  // Remove any marked symbols from the Unresolved set.
+  for (auto &Name : ToRemove)
+    Unresolved.erase(Name);
+
   return ActionFlags;
 }
 
-void VSO::dump(raw_ostream &OS) {
+void JITDylib::dump(raw_ostream &OS) {
   ES.runSessionLocked([&, this]() {
-    OS << "VSO \"" << VSOName
-       << "\" (ES: " << format("0x%016x", reinterpret_cast<uintptr_t>(&ES))
-       << "):\n"
+    OS << "JITDylib \"" << JITDylibName << "\" (ES: "
+       << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) << "):\n"
+       << "Search order: [";
+    for (auto &KV : SearchOrder)
+      OS << " (\"" << KV.first->getName() << "\", "
+         << (KV.second ? "all" : "exported only") << ")";
+    OS << " ]\n"
        << "Symbol table:\n";
 
     for (auto &KV : Symbols) {
-      OS << "    \"" << *KV.first
-         << "\": " << format("0x%016x", KV.second.getAddress());
+      OS << "    \"" << *KV.first << "\": ";
+      if (auto Addr = KV.second.getAddress())
+        OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags();
+      else
+        OS << "<not resolved>";
       if (KV.second.getFlags().isLazy() ||
           KV.second.getFlags().isMaterializing()) {
         OS << " (";
@@ -1492,7 +1419,7 @@ void VSO::dump(raw_ostream &OS) {
         }
         if (KV.second.getFlags().isMaterializing())
           OS << " Materializing";
-        OS << " )\n";
+        OS << ", " << KV.second.getFlags() << " )\n";
       } else
         OS << "\n";
     }
@@ -1501,7 +1428,7 @@ void VSO::dump(raw_ostream &OS) {
       OS << "  MaterializingInfos entries:\n";
     for (auto &KV : MaterializingInfos) {
       OS << "    \"" << *KV.first << "\":\n"
-         << "      IsFinalized = " << (KV.second.IsFinalized ? "true" : "false")
+         << "      IsEmitted = " << (KV.second.IsEmitted ? "true" : "false")
          << "\n"
          << "      " << KV.second.PendingQueries.size()
          << " pending queries: { ";
@@ -1510,19 +1437,19 @@ void VSO::dump(raw_ostream &OS) {
       OS << "}\n      Dependants:\n";
       for (auto &KV2 : KV.second.Dependants)
         OS << "        " << KV2.first->getName() << ": " << KV2.second << "\n";
-      OS << "      Unfinalized Dependencies:\n";
-      for (auto &KV2 : KV.second.UnfinalizedDependencies)
+      OS << "      Unemitted Dependencies:\n";
+      for (auto &KV2 : KV.second.UnemittedDependencies)
         OS << "        " << KV2.first->getName() << ": " << KV2.second << "\n";
     }
   });
 }
 
-VSO::VSO(ExecutionSessionBase &ES, std::string Name)
-    : ES(ES), VSOName(std::move(Name)) {
-  SearchOrder.push_back(this);
+JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
+    : ES(ES), JITDylibName(std::move(Name)) {
+  SearchOrder.push_back({this, true});
 }
 
-Error VSO::defineImpl(MaterializationUnit &MU) {
+Error JITDylib::defineImpl(MaterializationUnit &MU) {
   SymbolNameSet Duplicates;
   SymbolNameSet MUDefsOverridden;
 
@@ -1599,8 +1526,8 @@ Error VSO::defineImpl(MaterializationUnit &MU) {
   return Error::success();
 }
 
-void VSO::detachQueryHelper(AsynchronousSymbolQuery &Q,
-                            const SymbolNameSet &QuerySymbols) {
+void JITDylib::detachQueryHelper(AsynchronousSymbolQuery &Q,
+                                 const SymbolNameSet &QuerySymbols) {
   for (auto &QuerySymbol : QuerySymbols) {
     assert(MaterializingInfos.count(QuerySymbol) &&
            "QuerySymbol does not have MaterializingInfo");
@@ -1619,53 +1546,395 @@ void VSO::detachQueryHelper(AsynchronousSymbolQuery &Q,
   }
 }
 
-void VSO::transferFinalizedNodeDependencies(
+void JITDylib::transferEmittedNodeDependencies(
     MaterializingInfo &DependantMI, const SymbolStringPtr &DependantName,
-    MaterializingInfo &FinalizedMI) {
-  for (auto &KV : FinalizedMI.UnfinalizedDependencies) {
-    auto &DependencyVSO = *KV.first;
-    SymbolNameSet *UnfinalizedDependenciesOnDependencyVSO = nullptr;
+    MaterializingInfo &EmittedMI) {
+  for (auto &KV : EmittedMI.UnemittedDependencies) {
+    auto &DependencyJD = *KV.first;
+    SymbolNameSet *UnemittedDependenciesOnDependencyJD = nullptr;
 
     for (auto &DependencyName : KV.second) {
-      auto &DependencyMI = DependencyVSO.MaterializingInfos[DependencyName];
+      auto &DependencyMI = DependencyJD.MaterializingInfos[DependencyName];
 
       // Do not add self dependencies.
       if (&DependencyMI == &DependantMI)
         continue;
 
-      // If we haven't looked up the dependencies for DependencyVSO yet, do it
+      // If we haven't looked up the dependencies for DependencyJD yet, do it
       // now and cache the result.
-      if (!UnfinalizedDependenciesOnDependencyVSO)
-        UnfinalizedDependenciesOnDependencyVSO =
-            &DependantMI.UnfinalizedDependencies[&DependencyVSO];
+      if (!UnemittedDependenciesOnDependencyJD)
+        UnemittedDependenciesOnDependencyJD =
+            &DependantMI.UnemittedDependencies[&DependencyJD];
 
       DependencyMI.Dependants[this].insert(DependantName);
-      UnfinalizedDependenciesOnDependencyVSO->insert(DependencyName);
+      UnemittedDependenciesOnDependencyJD->insert(DependencyName);
     }
   }
 }
 
-VSO &ExecutionSession::createVSO(std::string Name) {
-  return runSessionLocked([&, this]() -> VSO & {
-      VSOs.push_back(std::unique_ptr<VSO>(new VSO(*this, std::move(Name))));
-    return *VSOs.back();
+ExecutionSession::ExecutionSession(std::shared_ptr<SymbolStringPool> SSP)
+    : SSP(SSP ? std::move(SSP) : std::make_shared<SymbolStringPool>()) {
+  // Construct the main dylib.
+  JDs.push_back(std::unique_ptr<JITDylib>(new JITDylib(*this, "<main>")));
+}
+
+JITDylib &ExecutionSession::getMainJITDylib() {
+  return runSessionLocked([this]() -> JITDylib & { return *JDs.front(); });
+}
+
+JITDylib &ExecutionSession::createJITDylib(std::string Name,
+                                           bool AddToMainDylibSearchOrder) {
+  return runSessionLocked([&, this]() -> JITDylib & {
+    JDs.push_back(
+        std::unique_ptr<JITDylib>(new JITDylib(*this, std::move(Name))));
+    if (AddToMainDylibSearchOrder)
+      JDs.front()->addToSearchOrder(*JDs.back());
+    return *JDs.back();
   });
 }
 
-Expected<SymbolMap> lookup(const VSOList &VSOs, SymbolNameSet Names) {
+void ExecutionSession::legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err) {
+  assert(!!Err && "Error should be in failure state");
 
-  if (VSOs.empty())
-    return SymbolMap();
+  bool SendErrorToQuery;
+  runSessionLocked([&]() {
+    Q.detach();
+    SendErrorToQuery = Q.canStillFail();
+  });
 
-  auto &ES = (*VSOs.begin())->getExecutionSession();
+  if (SendErrorToQuery)
+    Q.handleFailed(std::move(Err));
+  else
+    reportError(std::move(Err));
+}
 
-  return ES.lookup(VSOs, Names, NoDependenciesToRegister, true);
+Expected<SymbolMap> ExecutionSession::legacyLookup(
+    LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
+    bool WaitUntilReady, RegisterDependenciesFunction RegisterDependencies) {
+#if LLVM_ENABLE_THREADS
+  // In the threaded case we use promises to return the results.
+  std::promise<SymbolMap> PromisedResult;
+  std::mutex ErrMutex;
+  Error ResolutionError = Error::success();
+  std::promise<void> PromisedReady;
+  Error ReadyError = Error::success();
+  auto OnResolve = [&](Expected<SymbolMap> R) {
+    if (R)
+      PromisedResult.set_value(std::move(*R));
+    else {
+      {
+        ErrorAsOutParameter _(&ResolutionError);
+        std::lock_guard<std::mutex> Lock(ErrMutex);
+        ResolutionError = R.takeError();
+      }
+      PromisedResult.set_value(SymbolMap());
+    }
+  };
+
+  std::function<void(Error)> OnReady;
+  if (WaitUntilReady) {
+    OnReady = [&](Error Err) {
+      if (Err) {
+        ErrorAsOutParameter _(&ReadyError);
+        std::lock_guard<std::mutex> Lock(ErrMutex);
+        ReadyError = std::move(Err);
+      }
+      PromisedReady.set_value();
+    };
+  } else {
+    OnReady = [&](Error Err) {
+      if (Err)
+        reportError(std::move(Err));
+    };
+  }
+
+#else
+  SymbolMap Result;
+  Error ResolutionError = Error::success();
+  Error ReadyError = Error::success();
+
+  auto OnResolve = [&](Expected<SymbolMap> R) {
+    ErrorAsOutParameter _(&ResolutionError);
+    if (R)
+      Result = std::move(*R);
+    else
+      ResolutionError = R.takeError();
+  };
+
+  std::function<void(Error)> OnReady;
+  if (WaitUntilReady) {
+    OnReady = [&](Error Err) {
+      ErrorAsOutParameter _(&ReadyError);
+      if (Err)
+        ReadyError = std::move(Err);
+    };
+  } else {
+    OnReady = [&](Error Err) {
+      if (Err)
+        reportError(std::move(Err));
+    };
+  }
+#endif
+
+  auto Query = std::make_shared<AsynchronousSymbolQuery>(
+      Names, std::move(OnResolve), std::move(OnReady));
+  // FIXME: This should be run session locked along with the registration code
+  // and error reporting below.
+  SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
+
+  // If the query was lodged successfully then register the dependencies,
+  // otherwise fail it with an error.
+  if (UnresolvedSymbols.empty())
+    RegisterDependencies(Query->QueryRegistrations);
+  else {
+    bool DeliverError = runSessionLocked([&]() {
+      Query->detach();
+      return Query->canStillFail();
+    });
+    auto Err = make_error<SymbolsNotFound>(std::move(UnresolvedSymbols));
+    if (DeliverError)
+      Query->handleFailed(std::move(Err));
+    else
+      reportError(std::move(Err));
+  }
+
+#if LLVM_ENABLE_THREADS
+  auto ResultFuture = PromisedResult.get_future();
+  auto Result = ResultFuture.get();
+
+  {
+    std::lock_guard<std::mutex> Lock(ErrMutex);
+    if (ResolutionError) {
+      // ReadyError will never be assigned. Consume the success value.
+      cantFail(std::move(ReadyError));
+      return std::move(ResolutionError);
+    }
+  }
+
+  if (WaitUntilReady) {
+    auto ReadyFuture = PromisedReady.get_future();
+    ReadyFuture.get();
+
+    {
+      std::lock_guard<std::mutex> Lock(ErrMutex);
+      if (ReadyError)
+        return std::move(ReadyError);
+    }
+  } else
+    cantFail(std::move(ReadyError));
+
+  return std::move(Result);
+
+#else
+  if (ResolutionError) {
+    // ReadyError will never be assigned. Consume the success value.
+    cantFail(std::move(ReadyError));
+    return std::move(ResolutionError);
+  }
+
+  if (ReadyError)
+    return std::move(ReadyError);
+
+  return Result;
+#endif
 }
 
-/// Look up a symbol by searching a list of VSOs.
-Expected<JITEvaluatedSymbol> lookup(const VSOList &VSOs, SymbolStringPtr Name) {
+void ExecutionSession::lookup(
+    const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
+    SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+    RegisterDependenciesFunction RegisterDependencies) {
+
+  // lookup can be re-entered recursively if running on a single thread. Run any
+  // outstanding MUs in case this query depends on them, otherwise this lookup
+  // will starve waiting for a result from an MU that is stuck in the queue.
+  runOutstandingMUs();
+
+  auto Unresolved = std::move(Symbols);
+  std::map<JITDylib *, MaterializationUnitList> CollectedMUsMap;
+  auto Q = std::make_shared<AsynchronousSymbolQuery>(
+      Unresolved, std::move(OnResolve), std::move(OnReady));
+  bool QueryIsFullyResolved = false;
+  bool QueryIsFullyReady = false;
+  bool QueryFailed = false;
+
+  runSessionLocked([&]() {
+    for (auto &KV : SearchOrder) {
+      assert(KV.first && "JITDylibList entries must not be null");
+      assert(!CollectedMUsMap.count(KV.first) &&
+             "JITDylibList should not contain duplicate entries");
+
+      auto &JD = *KV.first;
+      auto MatchNonExported = KV.second;
+      JD.lodgeQuery(Q, Unresolved, MatchNonExported, CollectedMUsMap[&JD]);
+    }
+
+    if (Unresolved.empty()) {
+      // Query lodged successfully.
+
+      // Record whether this query is fully ready / resolved. We will use
+      // this to call handleFullyResolved/handleFullyReady outside the session
+      // lock.
+      QueryIsFullyResolved = Q->isFullyResolved();
+      QueryIsFullyReady = Q->isFullyReady();
+
+      // Call the register dependencies function.
+      if (RegisterDependencies && !Q->QueryRegistrations.empty())
+        RegisterDependencies(Q->QueryRegistrations);
+    } else {
+      // Query failed due to unresolved symbols.
+      QueryFailed = true;
+
+      // Disconnect the query from its dependencies.
+      Q->detach();
+
+      // Replace the MUs.
+      for (auto &KV : CollectedMUsMap)
+        for (auto &MU : KV.second)
+          KV.first->replace(std::move(MU));
+    }
+  });
+
+  if (QueryFailed) {
+    Q->handleFailed(make_error<SymbolsNotFound>(std::move(Unresolved)));
+    return;
+  } else {
+    if (QueryIsFullyResolved)
+      Q->handleFullyResolved();
+    if (QueryIsFullyReady)
+      Q->handleFullyReady();
+  }
+
+  // Move the MUs to the OutstandingMUs list, then materialize.
+  {
+    std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
+
+    for (auto &KV : CollectedMUsMap)
+      for (auto &MU : KV.second)
+        OutstandingMUs.push_back(std::make_pair(KV.first, std::move(MU)));
+  }
+
+  runOutstandingMUs();
+}
+
+Expected<SymbolMap> ExecutionSession::lookup(
+    const JITDylibSearchList &SearchOrder, const SymbolNameSet &Symbols,
+    RegisterDependenciesFunction RegisterDependencies, bool WaitUntilReady) {
+#if LLVM_ENABLE_THREADS
+  // In the threaded case we use promises to return the results.
+  std::promise<SymbolMap> PromisedResult;
+  std::mutex ErrMutex;
+  Error ResolutionError = Error::success();
+  std::promise<void> PromisedReady;
+  Error ReadyError = Error::success();
+  auto OnResolve = [&](Expected<SymbolMap> R) {
+    if (R)
+      PromisedResult.set_value(std::move(*R));
+    else {
+      {
+        ErrorAsOutParameter _(&ResolutionError);
+        std::lock_guard<std::mutex> Lock(ErrMutex);
+        ResolutionError = R.takeError();
+      }
+      PromisedResult.set_value(SymbolMap());
+    }
+  };
+
+  std::function<void(Error)> OnReady;
+  if (WaitUntilReady) {
+    OnReady = [&](Error Err) {
+      if (Err) {
+        ErrorAsOutParameter _(&ReadyError);
+        std::lock_guard<std::mutex> Lock(ErrMutex);
+        ReadyError = std::move(Err);
+      }
+      PromisedReady.set_value();
+    };
+  } else {
+    OnReady = [&](Error Err) {
+      if (Err)
+        reportError(std::move(Err));
+    };
+  }
+
+#else
+  SymbolMap Result;
+  Error ResolutionError = Error::success();
+  Error ReadyError = Error::success();
+
+  auto OnResolve = [&](Expected<SymbolMap> R) {
+    ErrorAsOutParameter _(&ResolutionError);
+    if (R)
+      Result = std::move(*R);
+    else
+      ResolutionError = R.takeError();
+  };
+
+  std::function<void(Error)> OnReady;
+  if (WaitUntilReady) {
+    OnReady = [&](Error Err) {
+      ErrorAsOutParameter _(&ReadyError);
+      if (Err)
+        ReadyError = std::move(Err);
+    };
+  } else {
+    OnReady = [&](Error Err) {
+      if (Err)
+        reportError(std::move(Err));
+    };
+  }
+#endif
+
+  // Perform the asynchronous lookup.
+  lookup(SearchOrder, Symbols, OnResolve, OnReady, RegisterDependencies);
+
+#if LLVM_ENABLE_THREADS
+  auto ResultFuture = PromisedResult.get_future();
+  auto Result = ResultFuture.get();
+
+  {
+    std::lock_guard<std::mutex> Lock(ErrMutex);
+    if (ResolutionError) {
+      // ReadyError will never be assigned. Consume the success value.
+      cantFail(std::move(ReadyError));
+      return std::move(ResolutionError);
+    }
+  }
+
+  if (WaitUntilReady) {
+    auto ReadyFuture = PromisedReady.get_future();
+    ReadyFuture.get();
+
+    {
+      std::lock_guard<std::mutex> Lock(ErrMutex);
+      if (ReadyError)
+        return std::move(ReadyError);
+    }
+  } else
+    cantFail(std::move(ReadyError));
+
+  return std::move(Result);
+
+#else
+  if (ResolutionError) {
+    // ReadyError will never be assigned. Consume the success value.
+    cantFail(std::move(ReadyError));
+    return std::move(ResolutionError);
+  }
+
+  if (ReadyError)
+    return std::move(ReadyError);
+
+  return Result;
+#endif
+}
+
+Expected<JITEvaluatedSymbol>
+ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
+                         SymbolStringPtr Name) {
   SymbolNameSet Names({Name});
-  if (auto ResultMap = lookup(VSOs, std::move(Names))) {
+
+  if (auto ResultMap = lookup(SearchOrder, std::move(Names),
+                              NoDependenciesToRegister, true)) {
     assert(ResultMap->size() == 1 && "Unexpected number of results");
     assert(ResultMap->count(Name) && "Missing result for symbol");
     return std::move(ResultMap->begin()->second);
@@ -1673,8 +1942,53 @@ Expected<JITEvaluatedSymbol> lookup(const VSOList &VSOs, SymbolStringPtr Name) {
     return ResultMap.takeError();
 }
 
-MangleAndInterner::MangleAndInterner(ExecutionSessionBase &ES,
-                                     const DataLayout &DL)
+Expected<JITEvaluatedSymbol>
+ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder,
+                         SymbolStringPtr Name) {
+  SymbolNameSet Names({Name});
+
+  JITDylibSearchList FullSearchOrder;
+  FullSearchOrder.reserve(SearchOrder.size());
+  for (auto *JD : SearchOrder)
+    FullSearchOrder.push_back({JD, false});
+
+  return lookup(FullSearchOrder, Name);
+}
+
+Expected<JITEvaluatedSymbol>
+ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder, StringRef Name) {
+  return lookup(SearchOrder, intern(Name));
+}
+
+void ExecutionSession::dump(raw_ostream &OS) {
+  runSessionLocked([this, &OS]() {
+    for (auto &JD : JDs)
+      JD->dump(OS);
+  });
+}
+
+void ExecutionSession::runOutstandingMUs() {
+  while (1) {
+    std::pair<JITDylib *, std::unique_ptr<MaterializationUnit>> JITDylibAndMU;
+
+    {
+      std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
+      if (!OutstandingMUs.empty()) {
+        JITDylibAndMU = std::move(OutstandingMUs.back());
+        OutstandingMUs.pop_back();
+      }
+    }
+
+    if (JITDylibAndMU.first) {
+      assert(JITDylibAndMU.second && "JITDylib, but no MU?");
+      dispatchMaterialization(*JITDylibAndMU.first,
+                              std::move(JITDylibAndMU.second));
+    } else
+      break;
+  }
+}
+
+MangleAndInterner::MangleAndInterner(ExecutionSession &ES, const DataLayout &DL)
     : ES(ES), DL(DL) {}
 
 SymbolStringPtr MangleAndInterner::operator()(StringRef Name) {
@@ -1683,7 +1997,7 @@ SymbolStringPtr MangleAndInterner::operator()(StringRef Name) {
     raw_string_ostream MangledNameStream(MangledName);
     Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
   }
-  return ES.getSymbolStringPool().intern(MangledName);
+  return ES.intern(MangledName);
 }
 
 } // End namespace orc.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 6157677ce355..7c3c50b4d6e5 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -19,45 +19,6 @@
 namespace llvm {
 namespace orc {
 
-JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
-    : TT(std::move(TT)) {}
-
-Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
-  return JITTargetMachineBuilder(Triple(sys::getProcessTriple()));
-}
-
-Expected<std::unique_ptr<TargetMachine>>
-JITTargetMachineBuilder::createTargetMachine() {
-  if (!Arch.empty()) {
-    Triple::ArchType Type = Triple::getArchTypeForLLVMName(Arch);
-
-    if (Type == Triple::UnknownArch)
-      return make_error<StringError>(std::string("Unknown arch: ") + Arch,
-                                     inconvertibleErrorCode());
-  }
-
-  std::string ErrMsg;
-  auto *TheTarget = TargetRegistry::lookupTarget(TT.getTriple(), ErrMsg);
-  if (!TheTarget)
-    return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
-
-  auto *TM =
-      TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(),
-                                     Options, RM, CM, OptLevel, /*JIT*/ true);
-  if (!TM)
-    return make_error<StringError>("Could not allocate target machine",
-                                   inconvertibleErrorCode());
-
-  return std::unique_ptr<TargetMachine>(TM);
-}
-
-JITTargetMachineBuilder &JITTargetMachineBuilder::addFeatures(
-    const std::vector<std::string> &FeatureVec) {
-  for (const auto &F : FeatureVec)
-    Features.AddFeature(F);
-  return *this;
-}
-
 CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
   : InitList(
       GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
@@ -126,18 +87,24 @@ iterator_range<CtorDtorIterator> getDestructors(const Module &M) {
                     CtorDtorIterator(DtorsList, true));
 }
 
-void CtorDtorRunner2::add(iterator_range<CtorDtorIterator> CtorDtors) {
-  if (CtorDtors.begin() == CtorDtors.end())
+void CtorDtorRunner::add(iterator_range<CtorDtorIterator> CtorDtors) {
+  if (empty(CtorDtors))
     return;
 
   MangleAndInterner Mangle(
-      V.getExecutionSession(),
+      JD.getExecutionSession(),
       (*CtorDtors.begin()).Func->getParent()->getDataLayout());
 
   for (const auto &CtorDtor : CtorDtors) {
     assert(CtorDtor.Func && CtorDtor.Func->hasName() &&
            "Ctor/Dtor function must be named to be runnable under the JIT");
 
+    // FIXME: Maybe use a symbol promoter here instead.
+    if (CtorDtor.Func->hasLocalLinkage()) {
+      CtorDtor.Func->setLinkage(GlobalValue::ExternalLinkage);
+      CtorDtor.Func->setVisibility(GlobalValue::HiddenVisibility);
+    }
+
     if (CtorDtor.Data && cast<GlobalValue>(CtorDtor.Data)->isDeclaration()) {
       dbgs() << "  Skipping because why now?\n";
       continue;
@@ -148,7 +115,7 @@ void CtorDtorRunner2::add(iterator_range<CtorDtorIterator> CtorDtors) {
   }
 }
 
-Error CtorDtorRunner2::run() {
+Error CtorDtorRunner::run() {
   using CtorDtorTy = void (*)();
 
   SymbolNameSet Names;
@@ -161,7 +128,10 @@ Error CtorDtorRunner2::run() {
     }
   }
 
-  if (auto CtorDtorMap = lookup({&V}, std::move(Names))) {
+  auto &ES = JD.getExecutionSession();
+  if (auto CtorDtorMap =
+          ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names),
+                    NoDependenciesToRegister, true)) {
     for (auto &KV : CtorDtorsByPriority) {
       for (auto &Name : KV.second) {
         assert(CtorDtorMap->count(Name) && "No entry for Name");
@@ -195,32 +165,46 @@ int LocalCXXRuntimeOverridesBase::CXAAtExitOverride(DestructorPtr Destructor,
   return 0;
 }
 
-Error LocalCXXRuntimeOverrides2::enable(VSO &V, MangleAndInterner &Mangle) {
-  SymbolMap RuntimeInterposes(
-      {{Mangle("__dso_handle"),
-        JITEvaluatedSymbol(toTargetAddress(&DSOHandleOverride),
-                           JITSymbolFlags::Exported)},
-       {Mangle("__cxa_atexit"),
-        JITEvaluatedSymbol(toTargetAddress(&CXAAtExitOverride),
-                           JITSymbolFlags::Exported)}});
+Error LocalCXXRuntimeOverrides::enable(JITDylib &JD,
+                                        MangleAndInterner &Mangle) {
+  SymbolMap RuntimeInterposes;
+  RuntimeInterposes[Mangle("__dso_handle")] =
+    JITEvaluatedSymbol(toTargetAddress(&DSOHandleOverride),
+                       JITSymbolFlags::Exported);
+  RuntimeInterposes[Mangle("__cxa_atexit")] =
+    JITEvaluatedSymbol(toTargetAddress(&CXAAtExitOverride),
+                       JITSymbolFlags::Exported);
 
-  return V.define(absoluteSymbols(std::move(RuntimeInterposes)));
+  return JD.define(absoluteSymbols(std::move(RuntimeInterposes)));
 }
 
-DynamicLibraryFallbackGenerator::DynamicLibraryFallbackGenerator(
+DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator(
     sys::DynamicLibrary Dylib, const DataLayout &DL, SymbolPredicate Allow)
     : Dylib(std::move(Dylib)), Allow(std::move(Allow)),
       GlobalPrefix(DL.getGlobalPrefix()) {}
 
-SymbolNameSet DynamicLibraryFallbackGenerator::
-operator()(VSO &V, const SymbolNameSet &Names) {
+Expected<DynamicLibrarySearchGenerator>
+DynamicLibrarySearchGenerator::Load(const char *FileName, const DataLayout &DL,
+                                    SymbolPredicate Allow) {
+  std::string ErrMsg;
+  auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg);
+  if (!Lib.isValid())
+    return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+  return DynamicLibrarySearchGenerator(std::move(Lib), DL, std::move(Allow));
+}
+
+SymbolNameSet DynamicLibrarySearchGenerator::
+operator()(JITDylib &JD, const SymbolNameSet &Names) {
   orc::SymbolNameSet Added;
   orc::SymbolMap NewSymbols;
 
   bool HasGlobalPrefix = (GlobalPrefix != '\0');
 
   for (auto &Name : Names) {
-    if (!Allow(Name) || (*Name).empty())
+    if ((*Name).empty())
+      continue;
+
+    if (Allow && !Allow(Name))
       continue;
 
     if (HasGlobalPrefix && (*Name).front() != GlobalPrefix)
@@ -235,11 +219,11 @@ operator()(VSO &V, const SymbolNameSet &Names) {
     }
   }
 
-  // Add any new symbols to V. Since the fallback generator is only called for
-  // symbols that are not already defined, this will never trigger a duplicate
+  // Add any new symbols to JD. Since the generator is only called for symbols
+  // that are not already defined, this will never trigger a duplicate
   // definition error, so we can wrap this call in a 'cantFail'.
   if (!NewSymbols.empty())
-    cantFail(V.define(absoluteSymbols(std::move(NewSymbols))));
+    cantFail(JD.define(absoluteSymbols(std::move(NewSymbols))));
 
   return Added;
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index 0c17f9b7ad49..d952d1be70da 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -12,28 +12,28 @@
 namespace llvm {
 namespace orc {
 
-IRCompileLayer2::IRCompileLayer2(ExecutionSession &ES, ObjectLayer &BaseLayer,
+IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
                                  CompileFunction Compile)
     : IRLayer(ES), BaseLayer(BaseLayer), Compile(std::move(Compile)) {}
 
-void IRCompileLayer2::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
+void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
   std::lock_guard<std::mutex> Lock(IRLayerMutex);
   this->NotifyCompiled = std::move(NotifyCompiled);
 }
 
-void IRCompileLayer2::emit(MaterializationResponsibility R, VModuleKey K,
-                           std::unique_ptr<Module> M) {
-  assert(M && "Module must not be null");
+void IRCompileLayer::emit(MaterializationResponsibility R,
+                          ThreadSafeModule TSM) {
+  assert(TSM.getModule() && "Module must not be null");
 
-  if (auto Obj = Compile(*M)) {
+  if (auto Obj = Compile(*TSM.getModule())) {
     {
       std::lock_guard<std::mutex> Lock(IRLayerMutex);
       if (NotifyCompiled)
-        NotifyCompiled(K, std::move(M));
+        NotifyCompiled(R.getVModuleKey(), std::move(TSM));
       else
-        M = nullptr;
+        TSM = ThreadSafeModule();
     }
-    BaseLayer.emit(std::move(R), std::move(K), std::move(*Obj));
+    BaseLayer.emit(std::move(R), std::move(*Obj));
   } else {
     R.failMaterialization();
     getExecutionSession().reportError(Obj.takeError());
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 4dd3cfdfe387..7bc0d696e3ac 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -13,20 +13,20 @@
 namespace llvm {
 namespace orc {
 
-IRTransformLayer2::IRTransformLayer2(ExecutionSession &ES,
+IRTransformLayer::IRTransformLayer(ExecutionSession &ES,
                                      IRLayer &BaseLayer,
                                      TransformFunction Transform)
     : IRLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
-void IRTransformLayer2::emit(MaterializationResponsibility R, VModuleKey K,
-                             std::unique_ptr<Module> M) {
-  assert(M && "Module must not be null");
+void IRTransformLayer::emit(MaterializationResponsibility R,
+                            ThreadSafeModule TSM) {
+  assert(TSM.getModule() && "Module must not be null");
 
-  if (auto TransformedMod = Transform(std::move(M)))
-    BaseLayer.emit(std::move(R), std::move(K), std::move(*TransformedMod));
+  if (auto TransformedTSM = Transform(std::move(TSM), R))
+    BaseLayer.emit(std::move(R), std::move(*TransformedTSM));
   else {
     R.failMaterialization();
-    getExecutionSession().reportError(TransformedMod.takeError());
+    getExecutionSession().reportError(TransformedTSM.takeError());
   }
 }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 9ca2c5cb4a55..82000ec5b32b 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -27,19 +27,22 @@ public:
   using CompileFunction = JITCompileCallbackManager::CompileFunction;
 
   CompileCallbackMaterializationUnit(SymbolStringPtr Name,
-                                     CompileFunction Compile)
-      : MaterializationUnit(SymbolFlagsMap({{Name, JITSymbolFlags::Exported}})),
+                                     CompileFunction Compile, VModuleKey K)
+      : MaterializationUnit(SymbolFlagsMap({{Name, JITSymbolFlags::Exported}}),
+                            std::move(K)),
         Name(std::move(Name)), Compile(std::move(Compile)) {}
 
+  StringRef getName() const override { return "<Compile Callbacks>"; }
+
 private:
-  void materialize(MaterializationResponsibility R) {
+  void materialize(MaterializationResponsibility R) override {
     SymbolMap Result;
     Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
     R.resolve(Result);
-    R.finalize();
+    R.emit();
   }
 
-  void discard(const VSO &V, SymbolStringPtr Name) {
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
     llvm_unreachable("Discard should never occur on a LMU?");
   }
 
@@ -52,20 +55,21 @@ private:
 namespace llvm {
 namespace orc {
 
-void JITCompileCallbackManager::anchor() {}
 void IndirectStubsManager::anchor() {}
+void TrampolinePool::anchor() {}
 
 Expected<JITTargetAddress>
 JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
-  if (auto TrampolineAddr = getAvailableTrampolineAddr()) {
-    auto CallbackName = ES.getSymbolStringPool().intern(
-        std::string("cc") + std::to_string(++NextCallbackId));
+  if (auto TrampolineAddr = TP->getTrampoline()) {
+    auto CallbackName =
+        ES.intern(std::string("cc") + std::to_string(++NextCallbackId));
 
     std::lock_guard<std::mutex> Lock(CCMgrMutex);
     AddrToSymbol[*TrampolineAddr] = CallbackName;
-    cantFail(CallbacksVSO.define(
+    cantFail(CallbacksJD.define(
         llvm::make_unique<CompileCallbackMaterializationUnit>(
-            std::move(CallbackName), std::move(Compile))));
+            std::move(CallbackName), std::move(Compile),
+            ES.allocateVModule())));
     return *TrampolineAddr;
   } else
     return TrampolineAddr.takeError();
@@ -88,7 +92,7 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
       {
         raw_string_ostream ErrMsgStream(ErrMsg);
         ErrMsgStream << "No compile callback for trampoline at "
-                     << format("0x%016x", TrampolineAddr);
+                     << format("0x%016" PRIx64, TrampolineAddr);
       }
       ES.reportError(
           make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode()));
@@ -97,9 +101,10 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
       Name = I->second;
   }
 
-  if (auto Sym = lookup({&CallbacksVSO}, Name))
+  if (auto Sym = ES.lookup(JITDylibSearchList({{&CallbacksJD, true}}), Name))
     return Sym->getAddress();
   else {
+    llvm::dbgs() << "Didn't find callback.\n";
     // If anything goes wrong materializing Sym then report it to the session
     // and return the ErrorHandlerAddress;
     ES.reportError(Sym.takeError());
@@ -107,29 +112,46 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
   }
 }
 
-std::unique_ptr<JITCompileCallbackManager>
+Expected<std::unique_ptr<JITCompileCallbackManager>>
 createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
                                   JITTargetAddress ErrorHandlerAddress) {
   switch (T.getArch()) {
-    default: return nullptr;
-
-    case Triple::aarch64: {
-      typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
-      return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+  default:
+    return make_error<StringError>(
+        std::string("No callback manager available for ") + T.str(),
+        inconvertibleErrorCode());
+  case Triple::aarch64: {
+    typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
+    return CCMgrT::Create(ES, ErrorHandlerAddress);
     }
 
     case Triple::x86: {
       typedef orc::LocalJITCompileCallbackManager<orc::OrcI386> CCMgrT;
-      return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+      return CCMgrT::Create(ES, ErrorHandlerAddress);
+    }
+
+    case Triple::mips: {
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcMips32Be> CCMgrT;
+      return CCMgrT::Create(ES, ErrorHandlerAddress);
+    }
+    case Triple::mipsel: {
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcMips32Le> CCMgrT;
+      return CCMgrT::Create(ES, ErrorHandlerAddress);
+    }
+
+    case Triple::mips64:
+    case Triple::mips64el: {
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcMips64> CCMgrT;
+      return CCMgrT::Create(ES, ErrorHandlerAddress);
     }
 
     case Triple::x86_64: {
       if ( T.getOS() == Triple::OSType::Win32 ) {
         typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
-        return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+        return CCMgrT::Create(ES, ErrorHandlerAddress);
       } else {
         typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_SysV> CCMgrT;
-        return llvm::make_unique<CCMgrT>(ES, ErrorHandlerAddress);
+        return CCMgrT::Create(ES, ErrorHandlerAddress);
       }
     }
 
@@ -157,6 +179,25 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
                        orc::LocalIndirectStubsManager<orc::OrcI386>>();
       };
 
+    case Triple::mips:
+      return [](){
+          return llvm::make_unique<
+                      orc::LocalIndirectStubsManager<orc::OrcMips32Be>>();
+      };
+
+    case Triple::mipsel:
+      return [](){
+          return llvm::make_unique<
+                      orc::LocalIndirectStubsManager<orc::OrcMips32Le>>();
+      };
+
+    case Triple::mips64:
+    case Triple::mips64el:
+      return [](){
+          return llvm::make_unique<
+                      orc::LocalIndirectStubsManager<orc::OrcMips64>>();
+      };
+      
     case Triple::x86_64:
       if (T.getOS() == Triple::OSType::Win32) {
         return [](){
@@ -210,57 +251,34 @@ void makeStub(Function &F, Value &ImplPointer) {
     Builder.CreateRet(Call);
 }
 
-// Utility class for renaming global values and functions during partitioning.
-class GlobalRenamer {
-public:
-
-  static bool needsRenaming(const Value &New) {
-    return !New.hasName() || New.getName().startswith("\01L");
-  }
-
-  const std::string& getRename(const Value &Orig) {
-    // See if we have a name for this global.
-    {
-      auto I = Names.find(&Orig);
-      if (I != Names.end())
-        return I->second;
+std::vector<GlobalValue *> SymbolLinkagePromoter::operator()(Module &M) {
+  std::vector<GlobalValue *> PromotedGlobals;
+
+  for (auto &GV : M.global_values()) {
+    bool Promoted = true;
+
+    // Rename if necessary.
+    if (!GV.hasName())
+      GV.setName("__orc_anon." + Twine(NextId++));
+    else if (GV.getName().startswith("\01L"))
+      GV.setName("__" + GV.getName().substr(1) + "." + Twine(NextId++));
+    else if (GV.hasLocalLinkage())
+      GV.setName("__orc_lcl." + GV.getName() + "." + Twine(NextId++));
+    else
+      Promoted = false;
+
+    if (GV.hasLocalLinkage()) {
+      GV.setLinkage(GlobalValue::ExternalLinkage);
+      GV.setVisibility(GlobalValue::HiddenVisibility);
+      Promoted = true;
     }
+    GV.setUnnamedAddr(GlobalValue::UnnamedAddr::None);
 
-    // Nope. Create a new one.
-    // FIXME: Use a more robust uniquing scheme. (This may blow up if the user
-    //        writes a "__orc_anon[[:digit:]]* method).
-    unsigned ID = Names.size();
-    std::ostringstream NameStream;
-    NameStream << "__orc_anon" << ID++;
-    auto I = Names.insert(std::make_pair(&Orig, NameStream.str()));
-    return I.first->second;
+    if (Promoted)
+      PromotedGlobals.push_back(&GV);
   }
-private:
-  DenseMap<const Value*, std::string> Names;
-};
-
-static void raiseVisibilityOnValue(GlobalValue &V, GlobalRenamer &R) {
-  if (V.hasLocalLinkage()) {
-    if (R.needsRenaming(V))
-      V.setName(R.getRename(V));
-    V.setLinkage(GlobalValue::ExternalLinkage);
-    V.setVisibility(GlobalValue::HiddenVisibility);
-  }
-  V.setUnnamedAddr(GlobalValue::UnnamedAddr::None);
-  assert(!R.needsRenaming(V) && "Invalid global name.");
-}
-
-void makeAllSymbolsExternallyAccessible(Module &M) {
-  GlobalRenamer Renamer;
-
-  for (auto &F : M)
-    raiseVisibilityOnValue(F, Renamer);
-
-  for (auto &GV : M.globals())
-    raiseVisibilityOnValue(GV, Renamer);
 
-  for (auto &A : M.aliases())
-    raiseVisibilityOnValue(A, Renamer);
+  return PromotedGlobals;
 }
 
 Function* cloneFunctionDecl(Module &Dst, const Function &F,
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
new file mode 100644
index 000000000000..4af09d196ff9
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -0,0 +1,55 @@
+//===----- JITTargetMachineBuilder.cpp - Build TargetMachines for JIT -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+namespace orc {
+
+JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
+    : TT(std::move(TT)) {
+  Options.EmulatedTLS = true;
+  Options.ExplicitEmulatedTLS = true;
+}
+
+Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
+  // FIXME: getProcessTriple is bogus. It returns the host LLVM was compiled on,
+  //        rather than a valid triple for the current process.
+  return JITTargetMachineBuilder(Triple(sys::getProcessTriple()));
+}
+
+Expected<std::unique_ptr<TargetMachine>>
+JITTargetMachineBuilder::createTargetMachine() {
+
+  std::string ErrMsg;
+  auto *TheTarget = TargetRegistry::lookupTarget(TT.getTriple(), ErrMsg);
+  if (!TheTarget)
+    return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+
+  auto *TM =
+      TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(),
+                                     Options, RM, CM, OptLevel, /*JIT*/ true);
+  if (!TM)
+    return make_error<StringError>("Could not allocate target machine",
+                                   inconvertibleErrorCode());
+
+  return std::unique_ptr<TargetMachine>(TM);
+}
+
+JITTargetMachineBuilder &JITTargetMachineBuilder::addFeatures(
+    const std::vector<std::string> &FeatureVec) {
+  for (const auto &F : FeatureVec)
+    Features.AddFeature(F);
+  return *this;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 52ff4efe56b2..e2089f9106bd 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -12,49 +12,109 @@
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/Mangler.h"
 
+namespace {
+
+  // A SimpleCompiler that owns its TargetMachine.
+  class TMOwningSimpleCompiler : public llvm::orc::SimpleCompiler {
+  public:
+    TMOwningSimpleCompiler(std::unique_ptr<llvm::TargetMachine> TM)
+      : llvm::orc::SimpleCompiler(*TM), TM(std::move(TM)) {}
+  private:
+    // FIXME: shared because std::functions (and thus
+    // IRCompileLayer::CompileFunction) are not moveable.
+    std::shared_ptr<llvm::TargetMachine> TM;
+  };
+
+} // end anonymous namespace
+
 namespace llvm {
 namespace orc {
 
+LLJIT::~LLJIT() {
+  if (CompileThreads)
+    CompileThreads->wait();
+}
+
 Expected<std::unique_ptr<LLJIT>>
-LLJIT::Create(std::unique_ptr<ExecutionSession> ES,
-              std::unique_ptr<TargetMachine> TM, DataLayout DL) {
-  return std::unique_ptr<LLJIT>(
-      new LLJIT(std::move(ES), std::move(TM), std::move(DL)));
+LLJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+              unsigned NumCompileThreads) {
+
+  if (NumCompileThreads == 0) {
+    // If NumCompileThreads == 0 then create a single-threaded LLJIT instance.
+    auto TM = JTMB.createTargetMachine();
+    if (!TM)
+      return TM.takeError();
+    return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
+                                            std::move(*TM), std::move(DL)));
+  }
+
+  return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
+                                          std::move(JTMB), std::move(DL),
+                                          NumCompileThreads));
 }
 
 Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) {
-  auto InternedName = ES->getSymbolStringPool().intern(Name);
+  auto InternedName = ES->intern(Name);
   SymbolMap Symbols({{InternedName, Sym}});
   return Main.define(absoluteSymbols(std::move(Symbols)));
 }
 
-Error LLJIT::addIRModule(VSO &V, std::unique_ptr<Module> M) {
-  assert(M && "Can not add null module");
+Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
+  assert(TSM && "Can not add null module");
 
-  if (auto Err = applyDataLayout(*M))
+  if (auto Err = applyDataLayout(*TSM.getModule()))
     return Err;
 
-  auto K = ES->allocateVModule();
-  return CompileLayer.add(V, K, std::move(M));
+  return CompileLayer.add(JD, std::move(TSM), ES->allocateVModule());
+}
+
+Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
+  assert(Obj && "Can not add null object");
+
+  return ObjLinkingLayer.add(JD, std::move(Obj), ES->allocateVModule());
 }
 
-Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(VSO &V,
+Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
                                                         StringRef Name) {
-  return llvm::orc::lookup({&V}, ES->getSymbolStringPool().intern(Name));
+  return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name));
 }
 
 LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES,
              std::unique_ptr<TargetMachine> TM, DataLayout DL)
-    : ES(std::move(ES)), Main(this->ES->createVSO("main")), TM(std::move(TM)),
-      DL(std::move(DL)),
-      ObjLinkingLayer(*this->ES,
-                      [this](VModuleKey K) { return getMemoryManager(K); }),
-      CompileLayer(*this->ES, ObjLinkingLayer, SimpleCompiler(*this->TM)),
+    : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
+      ObjLinkingLayer(
+          *this->ES,
+          []() { return llvm::make_unique<SectionMemoryManager>(); }),
+      CompileLayer(*this->ES, ObjLinkingLayer,
+                   TMOwningSimpleCompiler(std::move(TM))),
       CtorRunner(Main), DtorRunner(Main) {}
 
-std::shared_ptr<RuntimeDyld::MemoryManager>
-LLJIT::getMemoryManager(VModuleKey K) {
-  return llvm::make_unique<SectionMemoryManager>();
+LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+             DataLayout DL, unsigned NumCompileThreads)
+    : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
+      ObjLinkingLayer(
+          *this->ES,
+          []() { return llvm::make_unique<SectionMemoryManager>(); }),
+      CompileLayer(*this->ES, ObjLinkingLayer,
+                   ConcurrentIRCompiler(std::move(JTMB))),
+      CtorRunner(Main), DtorRunner(Main) {
+  assert(NumCompileThreads != 0 &&
+         "Multithreaded LLJIT instance can not be created with 0 threads");
+
+  // Move modules to new contexts when they're emitted so that we can compile
+  // them in parallel.
+  CompileLayer.setCloneToNewContextOnEmit(true);
+
+  // Create a thread pool to compile on and set the execution session
+  // dispatcher to use the thread pool.
+  CompileThreads = llvm::make_unique<ThreadPool>(NumCompileThreads);
+  this->ES->setDispatchMaterialization(
+      [this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
+        // FIXME: Switch to move capture once we have c++14.
+        auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
+        auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
+        CompileThreads->async(std::move(Work));
+      });
 }
 
 std::string LLJIT::mangle(StringRef UnmangledName) {
@@ -84,16 +144,15 @@ void LLJIT::recordCtorDtors(Module &M) {
 }
 
 Expected<std::unique_ptr<LLLazyJIT>>
-LLLazyJIT::Create(std::unique_ptr<ExecutionSession> ES,
-                  std::unique_ptr<TargetMachine> TM, DataLayout DL,
-                  LLVMContext &Ctx) {
-  const Triple &TT = TM->getTargetTriple();
+LLLazyJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
+                  JITTargetAddress ErrorAddr, unsigned NumCompileThreads) {
+  auto ES = llvm::make_unique<ExecutionSession>();
 
-  auto CCMgr = createLocalCompileCallbackManager(TT, *ES, 0);
-  if (!CCMgr)
-    return make_error<StringError>(
-        std::string("No callback manager available for ") + TT.str(),
-        inconvertibleErrorCode());
+  const Triple &TT = JTMB.getTargetTriple();
+
+  auto LCTMgr = createLocalLazyCallThroughManager(TT, *ES, ErrorAddr);
+  if (!LCTMgr)
+    return LCTMgr.takeError();
 
   auto ISMBuilder = createLocalIndirectStubsManagerBuilder(TT);
   if (!ISMBuilder)
@@ -101,34 +160,51 @@ LLLazyJIT::Create(std::unique_ptr<ExecutionSession> ES,
         std::string("No indirect stubs manager builder for ") + TT.str(),
         inconvertibleErrorCode());
 
-  return std::unique_ptr<LLLazyJIT>(
-      new LLLazyJIT(std::move(ES), std::move(TM), std::move(DL), Ctx,
-                    std::move(CCMgr), std::move(ISMBuilder)));
+  if (NumCompileThreads == 0) {
+    auto TM = JTMB.createTargetMachine();
+    if (!TM)
+      return TM.takeError();
+    return std::unique_ptr<LLLazyJIT>(
+        new LLLazyJIT(std::move(ES), std::move(*TM), std::move(DL),
+                      std::move(*LCTMgr), std::move(ISMBuilder)));
+  }
+
+  return std::unique_ptr<LLLazyJIT>(new LLLazyJIT(
+      std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads,
+      std::move(*LCTMgr), std::move(ISMBuilder)));
 }
 
-Error LLLazyJIT::addLazyIRModule(VSO &V, std::unique_ptr<Module> M) {
-  assert(M && "Can not add null module");
+Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
+  assert(TSM && "Can not add null module");
 
-  if (auto Err = applyDataLayout(*M))
+  if (auto Err = applyDataLayout(*TSM.getModule()))
     return Err;
 
-  makeAllSymbolsExternallyAccessible(*M);
+  recordCtorDtors(*TSM.getModule());
 
-  recordCtorDtors(*M);
-
-  auto K = ES->allocateVModule();
-  return CODLayer.add(V, K, std::move(M));
+  return CODLayer.add(JD, std::move(TSM), ES->allocateVModule());
 }
 
 LLLazyJIT::LLLazyJIT(
     std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
-    DataLayout DL, LLVMContext &Ctx,
-    std::unique_ptr<JITCompileCallbackManager> CCMgr,
+    DataLayout DL, std::unique_ptr<LazyCallThroughManager> LCTMgr,
     std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
     : LLJIT(std::move(ES), std::move(TM), std::move(DL)),
-      CCMgr(std::move(CCMgr)), TransformLayer(*this->ES, CompileLayer),
-      CODLayer(*this->ES, TransformLayer, *this->CCMgr, std::move(ISMBuilder),
-               [&]() -> LLVMContext & { return Ctx; }) {}
+      LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
+      CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
+               std::move(ISMBuilder)) {}
+
+LLLazyJIT::LLLazyJIT(
+    std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
+    DataLayout DL, unsigned NumCompileThreads,
+    std::unique_ptr<LazyCallThroughManager> LCTMgr,
+    std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
+    : LLJIT(std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads),
+      LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
+      CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
+               std::move(ISMBuilder)) {
+  CODLayer.setCloneToNewContextOnEmit(true);
+}
 
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index b9da3b7fb8d5..11af76825e9f 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -9,7 +9,9 @@
 
 #include "llvm/ExecutionEngine/Orc/Layer.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "orc"
 
 namespace llvm {
 namespace orc {
@@ -17,17 +19,19 @@ namespace orc {
 IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {}
 IRLayer::~IRLayer() {}
 
-Error IRLayer::add(VSO &V, VModuleKey K, std::unique_ptr<Module> M) {
-  return V.define(llvm::make_unique<BasicIRLayerMaterializationUnit>(
-      *this, std::move(K), std::move(M)));
+Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
+  return JD.define(llvm::make_unique<BasicIRLayerMaterializationUnit>(
+      *this, std::move(K), std::move(TSM)));
 }
 
 IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
-                                             std::unique_ptr<Module> M)
-  : MaterializationUnit(SymbolFlagsMap()), M(std::move(M)) {
+                                             ThreadSafeModule TSM, VModuleKey K)
+    : MaterializationUnit(SymbolFlagsMap(), std::move(K)), TSM(std::move(TSM)) {
+
+  assert(this->TSM && "Module must not be null");
 
-  MangleAndInterner Mangle(ES, this->M->getDataLayout());
-  for (auto &G : this->M->global_values()) {
+  MangleAndInterner Mangle(ES, this->TSM.getModule()->getDataLayout());
+  for (auto &G : this->TSM.getModule()->global_values()) {
     if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() &&
         !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) {
       auto MangledName = Mangle(G.getName());
@@ -38,12 +42,24 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
 }
 
 IRMaterializationUnit::IRMaterializationUnit(
-    std::unique_ptr<Module> M, SymbolFlagsMap SymbolFlags,
+    ThreadSafeModule TSM, VModuleKey K, SymbolFlagsMap SymbolFlags,
     SymbolNameToDefinitionMap SymbolToDefinition)
-    : MaterializationUnit(std::move(SymbolFlags)), M(std::move(M)),
-      SymbolToDefinition(std::move(SymbolToDefinition)) {}
+    : MaterializationUnit(std::move(SymbolFlags), std::move(K)),
+      TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {}
+
+StringRef IRMaterializationUnit::getName() const {
+  if (TSM.getModule())
+    return TSM.getModule()->getModuleIdentifier();
+  return "<null module>";
+}
+
+void IRMaterializationUnit::discard(const JITDylib &JD,
+                                    const SymbolStringPtr &Name) {
+  LLVM_DEBUG(JD.getExecutionSession().runSessionLocked([&]() {
+    dbgs() << "In " << JD.getName() << " discarding " << *Name << " from MU@"
+           << this << " (" << getName() << ")\n";
+  }););
 
-void IRMaterializationUnit::discard(const VSO &V, SymbolStringPtr Name) {
   auto I = SymbolToDefinition.find(Name);
   assert(I != SymbolToDefinition.end() &&
          "Symbol not provided by this MU, or previously discarded");
@@ -54,53 +70,117 @@ void IRMaterializationUnit::discard(const VSO &V, SymbolStringPtr Name) {
 }
 
 BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit(
-    IRLayer &L, VModuleKey K, std::unique_ptr<Module> M)
-  : IRMaterializationUnit(L.getExecutionSession(), std::move(M)),
+    IRLayer &L, VModuleKey K, ThreadSafeModule TSM)
+    : IRMaterializationUnit(L.getExecutionSession(), std::move(TSM),
+                            std::move(K)),
       L(L), K(std::move(K)) {}
 
 void BasicIRLayerMaterializationUnit::materialize(
     MaterializationResponsibility R) {
-  L.emit(std::move(R), std::move(K), std::move(M));
+
+  // Throw away the SymbolToDefinition map: it's not usable after we hand
+  // off the module.
+  SymbolToDefinition.clear();
+
+  // If cloneToNewContextOnEmit is set, clone the module now.
+  if (L.getCloneToNewContextOnEmit())
+    TSM = cloneToNewContext(TSM);
+
+#ifndef NDEBUG
+  auto &ES = R.getTargetJITDylib().getExecutionSession();
+#endif // NDEBUG
+
+  auto Lock = TSM.getContextLock();
+  LLVM_DEBUG(ES.runSessionLocked([&]() {
+    dbgs() << "Emitting, for " << R.getTargetJITDylib().getName() << ", "
+           << *this << "\n";
+  }););
+  L.emit(std::move(R), std::move(TSM));
+  LLVM_DEBUG(ES.runSessionLocked([&]() {
+    dbgs() << "Finished emitting, for " << R.getTargetJITDylib().getName()
+           << ", " << *this << "\n";
+  }););
 }
 
 ObjectLayer::ObjectLayer(ExecutionSession &ES) : ES(ES) {}
 
 ObjectLayer::~ObjectLayer() {}
 
-Error ObjectLayer::add(VSO &V, VModuleKey K, std::unique_ptr<MemoryBuffer> O) {
-  return V.define(llvm::make_unique<BasicObjectLayerMaterializationUnit>(
-      *this, std::move(K), std::move(O)));
+Error ObjectLayer::add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O,
+                       VModuleKey K) {
+  auto ObjMU = BasicObjectLayerMaterializationUnit::Create(*this, std::move(K),
+                                                           std::move(O));
+  if (!ObjMU)
+    return ObjMU.takeError();
+  return JD.define(std::move(*ObjMU));
+}
+
+Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>>
+BasicObjectLayerMaterializationUnit::Create(ObjectLayer &L, VModuleKey K,
+                                            std::unique_ptr<MemoryBuffer> O) {
+  auto SymbolFlags =
+      getObjectSymbolFlags(L.getExecutionSession(), O->getMemBufferRef());
+
+  if (!SymbolFlags)
+    return SymbolFlags.takeError();
+
+  return std::unique_ptr<BasicObjectLayerMaterializationUnit>(
+      new BasicObjectLayerMaterializationUnit(L, K, std::move(O),
+                                              std::move(*SymbolFlags)));
 }
 
 BasicObjectLayerMaterializationUnit::BasicObjectLayerMaterializationUnit(
-    ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O)
-    : MaterializationUnit(SymbolFlagsMap()), L(L), K(std::move(K)),
-      O(std::move(O)) {
-
-  auto &ES = L.getExecutionSession();
-  auto Obj = cantFail(
-      object::ObjectFile::createObjectFile(this->O->getMemBufferRef()));
-
-  for (auto &Sym : Obj->symbols()) {
-    if (!(Sym.getFlags() & object::BasicSymbolRef::SF_Undefined) &&
-         (Sym.getFlags() & object::BasicSymbolRef::SF_Exported)) {
-      auto InternedName =
-          ES.getSymbolStringPool().intern(cantFail(Sym.getName()));
-      SymbolFlags[InternedName] = JITSymbolFlags::fromObjectSymbol(Sym);
-    }
-  }
+    ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O,
+    SymbolFlagsMap SymbolFlags)
+    : MaterializationUnit(std::move(SymbolFlags), std::move(K)), L(L),
+      O(std::move(O)) {}
+
+StringRef BasicObjectLayerMaterializationUnit::getName() const {
+  if (O)
+    return O->getBufferIdentifier();
+  return "<null object>";
 }
 
 void BasicObjectLayerMaterializationUnit::materialize(
     MaterializationResponsibility R) {
-  L.emit(std::move(R), std::move(K), std::move(O));
+  L.emit(std::move(R), std::move(O));
 }
 
-void BasicObjectLayerMaterializationUnit::discard(const VSO &V,
-                                                  SymbolStringPtr Name) {
+void BasicObjectLayerMaterializationUnit::discard(const JITDylib &JD,
+                                                  const SymbolStringPtr &Name) {
   // FIXME: Support object file level discard. This could be done by building a
   //        filter to pass to the object layer along with the object itself.
 }
 
+Expected<SymbolFlagsMap> getObjectSymbolFlags(ExecutionSession &ES,
+                                              MemoryBufferRef ObjBuffer) {
+  auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+
+  if (!Obj)
+    return Obj.takeError();
+
+  SymbolFlagsMap SymbolFlags;
+  for (auto &Sym : (*Obj)->symbols()) {
+    // Skip symbols not defined in this object file.
+    if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined)
+      continue;
+
+    // Skip symbols that are not global.
+    if (!(Sym.getFlags() & object::BasicSymbolRef::SF_Global))
+      continue;
+
+    auto Name = Sym.getName();
+    if (!Name)
+      return Name.takeError();
+    auto InternedName = ES.intern(*Name);
+    auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+    if (!SymFlags)
+      return SymFlags.takeError();
+    SymbolFlags[InternedName] = std::move(*SymFlags);
+  }
+
+  return SymbolFlags;
+}
+
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
new file mode 100644
index 000000000000..55f4a7c5afce
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -0,0 +1,208 @@
+//===---------- LazyReexports.cpp - Utilities for lazy reexports ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+void LazyCallThroughManager::NotifyResolvedFunction::anchor() {}
+
+LazyCallThroughManager::LazyCallThroughManager(
+    ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr,
+    std::unique_ptr<TrampolinePool> TP)
+    : ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(std::move(TP)) {}
+
+Expected<JITTargetAddress> LazyCallThroughManager::getCallThroughTrampoline(
+    JITDylib &SourceJD, SymbolStringPtr SymbolName,
+    std::shared_ptr<NotifyResolvedFunction> NotifyResolved) {
+  std::lock_guard<std::mutex> Lock(LCTMMutex);
+  auto Trampoline = TP->getTrampoline();
+
+  if (!Trampoline)
+    return Trampoline.takeError();
+
+  Reexports[*Trampoline] = std::make_pair(&SourceJD, std::move(SymbolName));
+  Notifiers[*Trampoline] = std::move(NotifyResolved);
+  return *Trampoline;
+}
+
+JITTargetAddress
+LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) {
+  JITDylib *SourceJD = nullptr;
+  SymbolStringPtr SymbolName;
+
+  {
+    std::lock_guard<std::mutex> Lock(LCTMMutex);
+    auto I = Reexports.find(TrampolineAddr);
+    if (I == Reexports.end())
+      return ErrorHandlerAddr;
+    SourceJD = I->second.first;
+    SymbolName = I->second.second;
+  }
+
+  auto LookupResult = ES.lookup(JITDylibSearchList({{SourceJD, true}}),
+                                {SymbolName}, NoDependenciesToRegister, true);
+
+  if (!LookupResult) {
+    ES.reportError(LookupResult.takeError());
+    return ErrorHandlerAddr;
+  }
+
+  assert(LookupResult->size() == 1 && "Unexpected number of results");
+  assert(LookupResult->count(SymbolName) && "Unexpected result");
+
+  auto ResolvedAddr = LookupResult->begin()->second.getAddress();
+
+  std::shared_ptr<NotifyResolvedFunction> NotifyResolved = nullptr;
+  {
+    std::lock_guard<std::mutex> Lock(LCTMMutex);
+    auto I = Notifiers.find(TrampolineAddr);
+    if (I != Notifiers.end()) {
+      NotifyResolved = I->second;
+      Notifiers.erase(I);
+    }
+  }
+
+  if (NotifyResolved) {
+    if (auto Err = (*NotifyResolved)(*SourceJD, SymbolName, ResolvedAddr)) {
+      ES.reportError(std::move(Err));
+      return ErrorHandlerAddr;
+    }
+  }
+
+  return ResolvedAddr;
+}
+
+Expected<std::unique_ptr<LazyCallThroughManager>>
+createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
+                                  JITTargetAddress ErrorHandlerAddr) {
+  switch (T.getArch()) {
+  default:
+    return make_error<StringError>(
+        std::string("No callback manager available for ") + T.str(),
+        inconvertibleErrorCode());
+
+  case Triple::aarch64:
+    return LocalLazyCallThroughManager::Create<OrcAArch64>(ES,
+                                                           ErrorHandlerAddr);
+
+  case Triple::x86:
+    return LocalLazyCallThroughManager::Create<OrcI386>(ES, ErrorHandlerAddr);
+
+  case Triple::mips:
+    return LocalLazyCallThroughManager::Create<OrcMips32Be>(ES,
+                                                            ErrorHandlerAddr);
+
+  case Triple::mipsel:
+    return LocalLazyCallThroughManager::Create<OrcMips32Le>(ES,
+                                                            ErrorHandlerAddr);
+
+  case Triple::mips64:
+  case Triple::mips64el:
+    return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);
+
+  case Triple::x86_64:
+    if (T.getOS() == Triple::OSType::Win32)
+      return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(
+          ES, ErrorHandlerAddr);
+    else
+      return LocalLazyCallThroughManager::Create<OrcX86_64_SysV>(
+          ES, ErrorHandlerAddr);
+  }
+}
+
+LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit(
+    LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager,
+    JITDylib &SourceJD, SymbolAliasMap CallableAliases, VModuleKey K)
+    : MaterializationUnit(extractFlags(CallableAliases), std::move(K)),
+      LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD),
+      CallableAliases(std::move(CallableAliases)),
+      NotifyResolved(LazyCallThroughManager::createNotifyResolvedFunction(
+          [&ISManager](JITDylib &JD, const SymbolStringPtr &SymbolName,
+                       JITTargetAddress ResolvedAddr) {
+            return ISManager.updatePointer(*SymbolName, ResolvedAddr);
+          })) {}
+
+StringRef LazyReexportsMaterializationUnit::getName() const {
+  return "<Lazy Reexports>";
+}
+
+void LazyReexportsMaterializationUnit::materialize(
+    MaterializationResponsibility R) {
+  auto RequestedSymbols = R.getRequestedSymbols();
+
+  SymbolAliasMap RequestedAliases;
+  for (auto &RequestedSymbol : RequestedSymbols) {
+    auto I = CallableAliases.find(RequestedSymbol);
+    assert(I != CallableAliases.end() && "Symbol not found in alias map?");
+    RequestedAliases[I->first] = std::move(I->second);
+    CallableAliases.erase(I);
+  }
+
+  if (!CallableAliases.empty())
+    R.replace(lazyReexports(LCTManager, ISManager, SourceJD,
+                            std::move(CallableAliases)));
+
+  IndirectStubsManager::StubInitsMap StubInits;
+  for (auto &Alias : RequestedAliases) {
+
+    auto CallThroughTrampoline = LCTManager.getCallThroughTrampoline(
+        SourceJD, Alias.second.Aliasee, NotifyResolved);
+
+    if (!CallThroughTrampoline) {
+      SourceJD.getExecutionSession().reportError(
+          CallThroughTrampoline.takeError());
+      R.failMaterialization();
+      return;
+    }
+
+    StubInits[*Alias.first] =
+        std::make_pair(*CallThroughTrampoline, Alias.second.AliasFlags);
+  }
+
+  if (auto Err = ISManager.createStubs(StubInits)) {
+    SourceJD.getExecutionSession().reportError(std::move(Err));
+    R.failMaterialization();
+    return;
+  }
+
+  SymbolMap Stubs;
+  for (auto &Alias : RequestedAliases)
+    Stubs[Alias.first] = ISManager.findStub(*Alias.first, false);
+
+  R.resolve(Stubs);
+  R.emit();
+}
+
+void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
+                                               const SymbolStringPtr &Name) {
+  assert(CallableAliases.count(Name) &&
+         "Symbol not covered by this MaterializationUnit");
+  CallableAliases.erase(Name);
+}
+
+SymbolFlagsMap
+LazyReexportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) {
+  SymbolFlagsMap SymbolFlags;
+  for (auto &KV : Aliases) {
+    assert(KV.second.AliasFlags.isCallable() &&
+           "Lazy re-exports must be callable symbols");
+    SymbolFlags[KV.first] = KV.second.AliasFlags;
+  }
+  return SymbolFlags;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
index 18be9a042f7f..ddb72544b770 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/Legacy.cpp
@@ -18,47 +18,47 @@ JITSymbolResolverAdapter::JITSymbolResolverAdapter(
     ExecutionSession &ES, SymbolResolver &R, MaterializationResponsibility *MR)
     : ES(ES), R(R), MR(MR) {}
 
-Expected<JITSymbolResolverAdapter::LookupResult>
-JITSymbolResolverAdapter::lookup(const LookupSet &Symbols) {
+void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
+                                      OnResolvedFunction OnResolved) {
   SymbolNameSet InternedSymbols;
   for (auto &S : Symbols)
-    InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
+    InternedSymbols.insert(ES.intern(S));
 
-  auto LookupFn = [&, this](std::shared_ptr<AsynchronousSymbolQuery> Q,
-                            SymbolNameSet Unresolved) {
-    return R.lookup(std::move(Q), std::move(Unresolved));
-  };
+  auto OnResolvedWithUnwrap = [OnResolved](Expected<SymbolMap> InternedResult) {
+    if (!InternedResult) {
+      OnResolved(InternedResult.takeError());
+      return;
+    }
 
-  auto RegisterDependencies = [&](const SymbolDependenceMap &Deps) {
-    if (MR)
-      MR->addDependenciesForAll(Deps);
+    LookupResult Result;
+    for (auto &KV : *InternedResult)
+      Result[*KV.first] = std::move(KV.second);
+    OnResolved(Result);
   };
 
-  auto InternedResult =
-      ES.legacyLookup(ES, std::move(LookupFn), std::move(InternedSymbols),
-                      false, RegisterDependencies);
-
-  if (!InternedResult)
-    return InternedResult.takeError();
+  auto Q = std::make_shared<AsynchronousSymbolQuery>(
+      InternedSymbols, OnResolvedWithUnwrap,
+      [this](Error Err) { ES.reportError(std::move(Err)); });
 
-  JITSymbolResolver::LookupResult Result;
-  for (auto &KV : *InternedResult)
-    Result[*KV.first] = KV.second;
-
-  return Result;
+  auto Unresolved = R.lookup(Q, InternedSymbols);
+  if (Unresolved.empty()) {
+    if (MR)
+      MR->addDependenciesForAll(Q->QueryRegistrations);
+  } else
+    ES.legacyFailQuery(*Q, make_error<SymbolsNotFound>(std::move(Unresolved)));
 }
 
-Expected<JITSymbolResolverAdapter::LookupFlagsResult>
-JITSymbolResolverAdapter::lookupFlags(const LookupSet &Symbols) {
+Expected<JITSymbolResolverAdapter::LookupSet>
+JITSymbolResolverAdapter::getResponsibilitySet(const LookupSet &Symbols) {
   SymbolNameSet InternedSymbols;
   for (auto &S : Symbols)
-    InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
+    InternedSymbols.insert(ES.intern(S));
 
-  SymbolFlagsMap SymbolFlags = R.lookupFlags(InternedSymbols);
-  LookupFlagsResult Result;
-  for (auto &KV : SymbolFlags) {
-    ResolvedStrings.insert(KV.first);
-    Result[*KV.first] = KV.second;
+  auto InternedResult = R.getResponsibilitySet(InternedSymbols);
+  LookupSet Result;
+  for (auto &S : InternedResult) {
+    ResolvedStrings.insert(S);
+    Result.insert(*S);
   }
 
   return Result;
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
index 3796e3d37bc2..922fc6f021ce 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
@@ -14,8 +14,8 @@
 namespace llvm {
 namespace orc {
 
-SymbolFlagsMap NullResolver::lookupFlags(const SymbolNameSet &Symbols) {
-  return SymbolFlagsMap();
+SymbolNameSet NullResolver::getResponsibilitySet(const SymbolNameSet &Symbols) {
+  return Symbols;
 }
 
 SymbolNameSet
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
index 6980c8140fd0..825f53204736 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
@@ -13,17 +13,17 @@
 namespace llvm {
 namespace orc {
 
-ObjectTransformLayer2::ObjectTransformLayer2(ExecutionSession &ES,
-                                             ObjectLayer &BaseLayer,
-                                             TransformFunction Transform)
+ObjectTransformLayer::ObjectTransformLayer(ExecutionSession &ES,
+                                            ObjectLayer &BaseLayer,
+                                            TransformFunction Transform)
     : ObjectLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
-void ObjectTransformLayer2::emit(MaterializationResponsibility R, VModuleKey K,
-                                 std::unique_ptr<MemoryBuffer> O) {
+void ObjectTransformLayer::emit(MaterializationResponsibility R,
+                                std::unique_ptr<MemoryBuffer> O) {
   assert(O && "Module must not be null");
 
   if (auto TransformedObj = Transform(std::move(O)))
-    BaseLayer.emit(std::move(R), std::move(K), std::move(*TransformedObj));
+    BaseLayer.emit(std::move(R), std::move(*TransformedObj));
   else {
     R.failMaterialization();
     getExecutionSession().reportError(TransformedObj.takeError());
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index e3c968157976..aa4055542426 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -537,5 +537,448 @@ Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   return Error::success();
 }
 
+void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem,
+                                       JITReentryFn ReentryFn,
+                                       void *CallbackMgr, bool isBigEndian) {
+
+  const uint32_t ResolverCode[] = {
+      // resolver_entry:
+      0x27bdff98,                    // 0x00: addiu $sp,$sp,-104
+      0xafa20000,                    // 0x04: sw $v0,0($sp)
+      0xafa30004,                    // 0x08: sw $v1,4($sp)
+      0xafa40008,                    // 0x0c: sw $a0,8($sp)
+      0xafa5000c,                    // 0x10: sw $a1,12($sp)
+      0xafa60010,                    // 0x14: sw $a2,16($sp)
+      0xafa70014,                    // 0x18: sw $a3,20($sp)
+      0xafb00018,                    // 0x1c: sw $s0,24($sp)
+      0xafb1001c,                    // 0x20: sw $s1,28($sp)
+      0xafb20020,                    // 0x24: sw $s2,32($sp)
+      0xafb30024,                    // 0x28: sw $s3,36($sp)
+      0xafb40028,                    // 0x2c: sw $s4,40($sp)
+      0xafb5002c,                    // 0x30: sw $s5,44($sp)
+      0xafb60030,                    // 0x34: sw $s6,48($sp)
+      0xafb70034,                    // 0x38: sw $s7,52($sp)
+      0xafa80038,                    // 0x3c: sw $t0,56($sp)
+      0xafa9003c,                    // 0x40: sw $t1,60($sp)
+      0xafaa0040,                    // 0x44: sw $t2,64($sp)
+      0xafab0044,                    // 0x48: sw $t3,68($sp)
+      0xafac0048,                    // 0x4c: sw $t4,72($sp)
+      0xafad004c,                    // 0x50: sw $t5,76($sp)
+      0xafae0050,                    // 0x54: sw $t6,80($sp)
+      0xafaf0054,                    // 0x58: sw $t7,84($sp)
+      0xafb80058,                    // 0x5c: sw $t8,88($sp)
+      0xafb9005c,                    // 0x60: sw $t9,92($sp)
+      0xafbe0060,                    // 0x64: sw $fp,96($sp)
+      0xafbf0064,                    // 0x68: sw $ra,100($sp)
+
+      // Callback manager addr.
+      0x00000000,                    // 0x6c: lui $a0,callbackmgr
+      0x00000000,                    // 0x70: addiu $a0,$a0,callbackmgr
+
+      0x03e02825,                    // 0x74: move $a1, $ra
+      0x24a5ffec,                    // 0x78: addiu $a1,$a1,-20
+
+      // JIT re-entry fn addr:
+      0x00000000,                    // 0x7c: lui $t9,reentry
+      0x00000000,                    // 0x80: addiu $t9,$t9,reentry
+
+      0x0320f809,                    // 0x84: jalr $t9
+      0x00000000,                    // 0x88: nop
+      0x8fbf0064,                    // 0x8c: lw $ra,100($sp)
+      0x8fbe0060,                    // 0x90: lw $fp,96($sp)
+      0x8fb9005c,                    // 0x94: lw $t9,92($sp)
+      0x8fb80058,                    // 0x98: lw $t8,88($sp)
+      0x8faf0054,                    // 0x9c: lw $t7,84($sp)
+      0x8fae0050,                    // 0xa0: lw $t6,80($sp)
+      0x8fad004c,                    // 0xa4: lw $t5,76($sp)
+      0x8fac0048,                    // 0xa8: lw $t4,72($sp)
+      0x8fab0044,                    // 0xac: lw $t3,68($sp)
+      0x8faa0040,                    // 0xb0: lw $t2,64($sp)
+      0x8fa9003c,                    // 0xb4: lw $t1,60($sp)
+      0x8fa80038,                    // 0xb8: lw $t0,56($sp)
+      0x8fb70034,                    // 0xbc: lw $s7,52($sp)
+      0x8fb60030,                    // 0xc0: lw $s6,48($sp)
+      0x8fb5002c,                    // 0xc4: lw $s5,44($sp)
+      0x8fb40028,                    // 0xc8: lw $s4,40($sp)
+      0x8fb30024,                    // 0xcc: lw $s3,36($sp)
+      0x8fb20020,                    // 0xd0: lw $s2,32($sp)
+      0x8fb1001c,                    // 0xd4: lw $s1,28($sp)
+      0x8fb00018,                    // 0xd8: lw $s0,24($sp)
+      0x8fa70014,                    // 0xdc: lw $a3,20($sp)
+      0x8fa60010,                    // 0xe0: lw $a2,16($sp)
+      0x8fa5000c,                    // 0xe4: lw $a1,12($sp)
+      0x8fa40008,                    // 0xe8: lw $a0,8($sp)
+      0x27bd0068,                    // 0xec: addiu $sp,$sp,104
+      0x0300f825,                    // 0xf0: move $ra, $t8
+      0x03200008,                    // 0xf4: jr $t9
+      0x00000000,                    // 0xf8: move $t9, $v0/v1
+  };
+
+  const unsigned ReentryFnAddrOffset = 0x7c;   // JIT re-entry fn addr lui
+  const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui
+  const unsigned Offsett = 0xf8;
+
+  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+
+  // Depending on endian return value will be in v0 or v1.
+  uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825;
+  memcpy(ResolverMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
+
+  uint64_t CallMgrAddr = reinterpret_cast<uint64_t>(CallbackMgr);
+  uint32_t CallMgrLUi = 0x3c040000 | (((CallMgrAddr + 0x8000) >> 16) & 0xFFFF);
+  uint32_t CallMgrADDiu = 0x24840000 | ((CallMgrAddr) & 0xFFFF);
+  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi));
+  memcpy(ResolverMem + CallbackMgrAddrOffset + 4, &CallMgrADDiu,
+         sizeof(CallMgrADDiu));
+
+  uint64_t ReentryAddr = reinterpret_cast<uint64_t>(ReentryFn);
+  uint32_t ReentryLUi = 0x3c190000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF);
+  uint32_t ReentryADDiu = 0x27390000 | ((ReentryAddr) & 0xFFFF);
+  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi));
+  memcpy(ResolverMem + ReentryFnAddrOffset + 4, &ReentryADDiu,
+         sizeof(ReentryADDiu));
+}
+
+void OrcMips32_Base::writeTrampolines(uint8_t *TrampolineMem,
+                                      void *ResolverAddr,
+                                      unsigned NumTrampolines) {
+
+  uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+  uint64_t ResolveAddr = reinterpret_cast<uint64_t>(ResolverAddr);
+  uint32_t RHiAddr = ((ResolveAddr + 0x8000) >> 16);
+
+  for (unsigned I = 0; I < NumTrampolines; ++I) {
+    Trampolines[5 * I + 0] = 0x03e0c025;                           // move $t8,$ra
+    Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);      // lui $t9,resolveAddr
+    Trampolines[5 * I + 2] = 0x27390000 | (ResolveAddr & 0xFFFF);  // addiu $t9,$t9,resolveAddr
+    Trampolines[5 * I + 3] = 0x0320f809;                           // jalr $t9
+    Trampolines[5 * I + 4] = 0x00000000;                           // nop
+  }
+}
+
+Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                             unsigned MinStubs,
+                                             void *InitialPtrVal) {
+  // Stub format is:
+  //
+  // .section __orc_stubs
+  // stub1:
+  //                 lui $t9, ptr1
+  //                 lw $t9, %lo(ptr1)($t9)
+  //                 jr $t9
+  // stub2:
+  //                 lui $t9, ptr2
+  //                 lw $t9,%lo(ptr1)($t9)
+  //                 jr $t9
+  //
+  // ...
+  //
+  // .section __orc_ptrs
+  // ptr1:
+  //                 .word 0x0
+  // ptr2:
+  //                 .word 0x0
+  //
+  // ...
+
+  const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+  // Emit at least MinStubs, rounded up to fill the pages allocated.
+  unsigned PageSize = sys::Process::getPageSize();
+  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+  unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+  // Allocate memory for stubs and pointers in one call.
+  std::error_code EC;
+  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+      2 * NumPages * PageSize, nullptr,
+      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+  if (EC)
+    return errorCodeToError(EC);
+
+  // Create separate MemoryBlocks representing the stubs and pointers.
+  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+                                 NumPages * PageSize,
+                             NumPages * PageSize);
+
+  // Populate the stubs page stubs and mark it executable.
+  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlock.base());
+  uint64_t PtrAddr = reinterpret_cast<uint64_t>(Stub) + NumPages * PageSize;
+
+  for (unsigned I = 0; I < NumStubs; ++I) {
+    uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
+    Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF);  // lui $t9,ptr1
+    Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9)
+    Stub[4 * I + 2] = 0x03200008;                      // jr $t9
+    Stub[4 * I + 3] = 0x00000000;                      // nop
+    PtrAddr += 4;
+  }
+
+  if (auto EC = sys::Memory::protectMappedMemory(
+          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+    return errorCodeToError(EC);
+
+  // Initialize all pointers to point at FailureAddress.
+  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+  for (unsigned I = 0; I < NumStubs; ++I)
+    Ptr[I] = InitialPtrVal;
+
+  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+  return Error::success();
+}
+
+void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+                                  void *CallbackMgr) {
+
+  const uint32_t ResolverCode[] = {
+      //resolver_entry:
+      0x67bdff30,                     // 0x00: daddiu $sp,$sp,-208
+      0xffa20000,                     // 0x04: sd v0,0(sp)
+      0xffa30008,                     // 0x08: sd v1,8(sp)
+      0xffa40010,                     // 0x0c: sd a0,16(sp)
+      0xffa50018,                     // 0x10: sd a1,24(sp)
+      0xffa60020,                     // 0x14: sd a2,32(sp)
+      0xffa70028,                     // 0x18: sd a3,40(sp)
+      0xffa80030,                     // 0x1c: sd a4,48(sp)
+      0xffa90038,                     // 0x20: sd a5,56(sp)
+      0xffaa0040,                     // 0x24: sd a6,64(sp)
+      0xffab0048,                     // 0x28: sd a7,72(sp)
+      0xffac0050,                     // 0x2c: sd t0,80(sp)
+      0xffad0058,                     // 0x30: sd t1,88(sp)
+      0xffae0060,                     // 0x34: sd t2,96(sp)
+      0xffaf0068,                     // 0x38: sd t3,104(sp)
+      0xffb00070,                     // 0x3c: sd s0,112(sp)
+      0xffb10078,                     // 0x40: sd s1,120(sp)
+      0xffb20080,                     // 0x44: sd s2,128(sp)
+      0xffb30088,                     // 0x48: sd s3,136(sp)
+      0xffb40090,                     // 0x4c: sd s4,144(sp)
+      0xffb50098,                     // 0x50: sd s5,152(sp)
+      0xffb600a0,                     // 0x54: sd s6,160(sp)
+      0xffb700a8,                     // 0x58: sd s7,168(sp)
+      0xffb800b0,                     // 0x5c: sd t8,176(sp)
+      0xffb900b8,                     // 0x60: sd t9,184(sp)
+      0xffbe00c0,                     // 0x64: sd fp,192(sp)
+      0xffbf00c8,                     // 0x68: sd ra,200(sp)
+
+      // Callback manager addr.
+      0x00000000,                     // 0x6c: lui $a0,heighest(callbackmgr)
+      0x00000000,                     // 0x70: daddiu $a0,$a0,heigher(callbackmgr)
+      0x00000000,                     // 0x74: dsll $a0,$a0,16
+      0x00000000,                     // 0x78: daddiu $a0,$a0,hi(callbackmgr)
+      0x00000000,                     // 0x7c: dsll $a0,$a0,16
+      0x00000000,                     // 0x80: daddiu $a0,$a0,lo(callbackmgr)
+
+      0x03e02825,                     // 0x84: move $a1, $ra
+      0x64a5ffdc,                     // 0x88: daddiu $a1,$a1,-36
+
+      // JIT re-entry fn addr:
+      0x00000000,                     // 0x8c: lui $t9,reentry
+      0x00000000,                     // 0x90: daddiu $t9,$t9,reentry
+      0x00000000,                     // 0x94: dsll $t9,$t9,
+      0x00000000,                     // 0x98: daddiu $t9,$t9,
+      0x00000000,                     // 0x9c: dsll $t9,$t9,
+      0x00000000,                     // 0xa0: daddiu $t9,$t9,
+      0x0320f809,                     // 0xa4: jalr $t9
+      0x00000000,                     // 0xa8: nop
+      0xdfbf00c8,                     // 0xac: ld ra, 200(sp)
+      0xdfbe00c0,                     // 0xb0: ld fp, 192(sp)
+      0xdfb900b8,                     // 0xb4: ld t9, 184(sp)
+      0xdfb800b0,                     // 0xb8: ld t8, 176(sp)
+      0xdfb700a8,                     // 0xbc: ld s7, 168(sp)
+      0xdfb600a0,                     // 0xc0: ld s6, 160(sp)
+      0xdfb50098,                     // 0xc4: ld s5, 152(sp)
+      0xdfb40090,                     // 0xc8: ld s4, 144(sp)
+      0xdfb30088,                     // 0xcc: ld s3, 136(sp)
+      0xdfb20080,                     // 0xd0: ld s2, 128(sp)
+      0xdfb10078,                     // 0xd4: ld s1, 120(sp)
+      0xdfb00070,                     // 0xd8: ld s0, 112(sp)
+      0xdfaf0068,                     // 0xdc: ld t3, 104(sp)
+      0xdfae0060,                     // 0xe0: ld t2, 96(sp)
+      0xdfad0058,                     // 0xe4: ld t1, 88(sp)
+      0xdfac0050,                     // 0xe8: ld t0, 80(sp)
+      0xdfab0048,                     // 0xec: ld a7, 72(sp)
+      0xdfaa0040,                     // 0xf0: ld a6, 64(sp)
+      0xdfa90038,                     // 0xf4: ld a5, 56(sp)
+      0xdfa80030,                     // 0xf8: ld a4, 48(sp)
+      0xdfa70028,                     // 0xfc: ld a3, 40(sp)
+      0xdfa60020,                     // 0x100: ld a2, 32(sp)
+      0xdfa50018,                     // 0x104: ld a1, 24(sp)
+      0xdfa40010,                     // 0x108: ld a0, 16(sp)
+      0xdfa30008,                     // 0x10c: ld v1, 8(sp)
+      0x67bd00d0,                     // 0x110: daddiu $sp,$sp,208
+      0x0300f825,                     // 0x114: move $ra, $t8
+      0x03200008,                     // 0x118: jr $t9
+      0x0040c825,                     // 0x11c: move $t9, $v0
+  };
+
+  const unsigned ReentryFnAddrOffset = 0x8c;   // JIT re-entry fn addr lui
+  const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui
+
+  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+
+  uint64_t CallMgrAddr = reinterpret_cast<uint64_t>(CallbackMgr);
+
+  uint32_t CallMgrLUi =
+      0x3c040000 | (((CallMgrAddr + 0x800080008000) >> 48) & 0xFFFF);
+  uint32_t CallMgrDADDiu =
+      0x64840000 | (((CallMgrAddr + 0x80008000) >> 32) & 0xFFFF);
+  uint32_t CallMgrDSLL = 0x00042438;
+  uint32_t CallMgrDADDiu2 =
+      0x64840000 | ((((CallMgrAddr + 0x8000) >> 16) & 0xFFFF));
+  uint32_t CallMgrDSLL2 = 0x00042438;
+  uint32_t CallMgrDADDiu3 = 0x64840000 | ((CallMgrAddr)&0xFFFF);
+
+  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi));
+  memcpy(ResolverMem + (CallbackMgrAddrOffset + 4), &CallMgrDADDiu,
+         sizeof(CallMgrDADDiu));
+  memcpy(ResolverMem + (CallbackMgrAddrOffset + 8), &CallMgrDSLL,
+         sizeof(CallMgrDSLL));
+  memcpy(ResolverMem + (CallbackMgrAddrOffset + 12), &CallMgrDADDiu2,
+         sizeof(CallMgrDADDiu2));
+  memcpy(ResolverMem + (CallbackMgrAddrOffset + 16), &CallMgrDSLL2,
+         sizeof(CallMgrDSLL2));
+  memcpy(ResolverMem + (CallbackMgrAddrOffset + 20), &CallMgrDADDiu3,
+         sizeof(CallMgrDADDiu3));
+
+  uint64_t ReentryAddr = reinterpret_cast<uint64_t>(ReentryFn);
+
+  uint32_t ReentryLUi =
+      0x3c190000 | (((ReentryAddr + 0x800080008000) >> 48) & 0xFFFF);
+
+  uint32_t ReentryDADDiu =
+      0x67390000 | (((ReentryAddr + 0x80008000) >> 32) & 0xFFFF);
+
+  uint32_t ReentryDSLL = 0x0019cc38;
+
+  uint32_t ReentryDADDiu2 =
+      0x67390000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF);
+
+  uint32_t ReentryDSLL2 = 0x0019cc38;
+
+  uint32_t ReentryDADDiu3 = 0x67390000 | ((ReentryAddr)&0xFFFF);
+
+  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi));
+  memcpy(ResolverMem + (ReentryFnAddrOffset + 4), &ReentryDADDiu,
+         sizeof(ReentryDADDiu));
+  memcpy(ResolverMem + (ReentryFnAddrOffset + 8), &ReentryDSLL,
+         sizeof(ReentryDSLL));
+  memcpy(ResolverMem + (ReentryFnAddrOffset + 12), &ReentryDADDiu2,
+         sizeof(ReentryDADDiu2));
+  memcpy(ResolverMem + (ReentryFnAddrOffset + 16), &ReentryDSLL2,
+         sizeof(ReentryDSLL2));
+  memcpy(ResolverMem + (ReentryFnAddrOffset + 20), &ReentryDADDiu3,
+         sizeof(ReentryDADDiu3));
+}
+
+void OrcMips64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+                                 unsigned NumTrampolines) {
+
+  uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+  uint64_t ResolveAddr = reinterpret_cast<uint64_t>(ResolverAddr);
+
+  uint64_t HeighestAddr = ((ResolveAddr + 0x800080008000) >> 48);
+  uint64_t HeigherAddr = ((ResolveAddr + 0x80008000) >> 32);
+  uint64_t HiAddr = ((ResolveAddr + 0x8000) >> 16);
+
+  for (unsigned I = 0; I < NumTrampolines; ++I) {
+    Trampolines[10 * I + 0] = 0x03e0c025;                            // move $t8,$ra
+    Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF);  // lui $t9,resolveAddr
+    Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF);   // daddiu $t9,$t9,%higher(resolveAddr)
+    Trampolines[10 * I + 3] = 0x0019cc38;                            // dsll $t9,$t9,16
+    Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF);        // daddiu $t9,$t9,%hi(ptr)
+    Trampolines[10 * I + 5] = 0x0019cc38;                            // dsll $t9,$t9,16
+    Trampolines[10 * I + 6] = 0x67390000 | (ResolveAddr & 0xFFFF);   // daddiu $t9,$t9,%lo(ptr)
+    Trampolines[10 * I + 7] = 0x0320f809;                            // jalr $t9
+    Trampolines[10 * I + 8] = 0x00000000;                            // nop
+    Trampolines[10 * I + 9] = 0x00000000;                            // nop
+  }
+}
+
+Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                        unsigned MinStubs,
+                                        void *InitialPtrVal) {
+  // Stub format is:
+  //
+  // .section __orc_stubs
+  // stub1:
+  //                 lui $t9,ptr1
+  //                 dsll $t9,$t9,16
+  //                 daddiu $t9,$t9,%hi(ptr)
+  //                 dsll $t9,$t9,16
+  //                 ld $t9,%lo(ptr)
+  //                 jr $t9
+  // stub2:
+  //                 lui $t9,ptr1
+  //                 dsll $t9,$t9,16
+  //                 daddiu $t9,$t9,%hi(ptr)
+  //                 dsll $t9,$t9,16
+  //                 ld $t9,%lo(ptr)
+  //                 jr $t9
+  //
+  // ...
+  //
+  // .section __orc_ptrs
+  // ptr1:
+  //                 .dword 0x0
+  // ptr2:
+  //                 .dword 0x0
+  //
+  // ...
+  const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+  // Emit at least MinStubs, rounded up to fill the pages allocated.
+  unsigned PageSize = sys::Process::getPageSize();
+  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+  unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+  // Allocate memory for stubs and pointers in one call.
+  std::error_code EC;
+  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+      2 * NumPages * PageSize, nullptr,
+      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+  if (EC)
+    return errorCodeToError(EC);
+
+  // Create separate MemoryBlocks representing the stubs and pointers.
+  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+                                 NumPages * PageSize,
+                             NumPages * PageSize);
+
+  // Populate the stubs page stubs and mark it executable.
+  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlock.base());
+  uint64_t PtrAddr = reinterpret_cast<uint64_t>(PtrsBlock.base());
+
+  for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
+    uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
+    uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32);
+    uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16);
+    Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF);  // lui $t9,ptr1
+    Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF);   // daddiu $t9,$t9,%higher(ptr)
+    Stub[8 * I + 2] = 0x0019cc38;                            // dsll $t9,$t9,16
+    Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF);        // daddiu $t9,$t9,%hi(ptr)
+    Stub[8 * I + 4] = 0x0019cc38;                            // dsll $t9,$t9,16
+    Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF);       // ld $t9,%lo(ptr)
+    Stub[8 * I + 6] = 0x03200008;                            // jr $t9
+    Stub[8 * I + 7] = 0x00000000;                            // nop
+  }
+
+  if (auto EC = sys::Memory::protectMappedMemory(
+          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+    return errorCodeToError(EC);
+
+  // Initialize all pointers to point at FailureAddress.
+  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+  for (unsigned I = 0; I < NumStubs; ++I)
+    Ptr[I] = InitialPtrVal;
+
+  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+  return Error::success();
+}
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index d6005d24a648..6dea64a6e78f 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -42,89 +42,110 @@ void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName,
 
 void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; }
 
-LLVMOrcErrorCode
-LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
-                                 LLVMOrcTargetAddress *RetAddr,
-                                 LLVMOrcLazyCompileCallbackFn Callback,
-                                 void *CallbackCtx) {
+LLVMErrorRef LLVMOrcCreateLazyCompileCallback(
+    LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr,
+    LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) {
   OrcCBindingsStack &J = *unwrap(JITStack);
-  return J.createLazyCompileCallback(*RetAddr, Callback, CallbackCtx);
+  if (auto Addr = J.createLazyCompileCallback(Callback, CallbackCtx)) {
+    *RetAddr = *Addr;
+    return LLVMErrorSuccess;
+  } else
+    return wrap(Addr.takeError());
 }
 
-LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
-                                           const char *StubName,
-                                           LLVMOrcTargetAddress InitAddr) {
+LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+                                       const char *StubName,
+                                       LLVMOrcTargetAddress InitAddr) {
   OrcCBindingsStack &J = *unwrap(JITStack);
-  return J.createIndirectStub(StubName, InitAddr);
+  return wrap(J.createIndirectStub(StubName, InitAddr));
 }
 
-LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
-                                               const char *StubName,
-                                               LLVMOrcTargetAddress NewAddr) {
+LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+                                           const char *StubName,
+                                           LLVMOrcTargetAddress NewAddr) {
   OrcCBindingsStack &J = *unwrap(JITStack);
-  return J.setIndirectStubPointer(StubName, NewAddr);
+  return wrap(J.setIndirectStubPointer(StubName, NewAddr));
 }
 
-LLVMOrcErrorCode
-LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
-                            LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
-                            LLVMOrcSymbolResolverFn SymbolResolver,
-                            void *SymbolResolverCtx) {
+LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack,
+                                         LLVMOrcModuleHandle *RetHandle,
+                                         LLVMModuleRef Mod,
+                                         LLVMOrcSymbolResolverFn SymbolResolver,
+                                         void *SymbolResolverCtx) {
   OrcCBindingsStack &J = *unwrap(JITStack);
   std::unique_ptr<Module> M(unwrap(Mod));
-  return J.addIRModuleEager(*RetHandle, std::move(M), SymbolResolver,
-                            SymbolResolverCtx);
+  if (auto Handle =
+          J.addIRModuleEager(std::move(M), SymbolResolver, SymbolResolverCtx)) {
+    *RetHandle = *Handle;
+    return LLVMErrorSuccess;
+  } else
+    return wrap(Handle.takeError());
 }
 
-LLVMOrcErrorCode
-LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
-                           LLVMOrcModuleHandle *RetHandle, LLVMModuleRef Mod,
-                           LLVMOrcSymbolResolverFn SymbolResolver,
-                           void *SymbolResolverCtx) {
+LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack,
+                                        LLVMOrcModuleHandle *RetHandle,
+                                        LLVMModuleRef Mod,
+                                        LLVMOrcSymbolResolverFn SymbolResolver,
+                                        void *SymbolResolverCtx) {
   OrcCBindingsStack &J = *unwrap(JITStack);
   std::unique_ptr<Module> M(unwrap(Mod));
-  return J.addIRModuleLazy(*RetHandle, std::move(M), SymbolResolver,
-                           SymbolResolverCtx);
+  if (auto Handle =
+          J.addIRModuleLazy(std::move(M), SymbolResolver, SymbolResolverCtx)) {
+    *RetHandle = *Handle;
+    return LLVMErrorSuccess;
+  } else
+    return wrap(Handle.takeError());
 }
 
-LLVMOrcErrorCode
-LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
-                     LLVMOrcModuleHandle *RetHandle,
-                     LLVMMemoryBufferRef Obj,
-                     LLVMOrcSymbolResolverFn SymbolResolver,
-                     void *SymbolResolverCtx) {
+LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack,
+                                  LLVMOrcModuleHandle *RetHandle,
+                                  LLVMMemoryBufferRef Obj,
+                                  LLVMOrcSymbolResolverFn SymbolResolver,
+                                  void *SymbolResolverCtx) {
   OrcCBindingsStack &J = *unwrap(JITStack);
   std::unique_ptr<MemoryBuffer> O(unwrap(Obj));
-  return J.addObject(*RetHandle, std::move(O), SymbolResolver,
-                     SymbolResolverCtx);
+  if (auto Handle =
+          J.addObject(std::move(O), SymbolResolver, SymbolResolverCtx)) {
+    *RetHandle = *Handle;
+    return LLVMErrorSuccess;
+  } else
+    return wrap(Handle.takeError());
 }
 
-LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
-                                     LLVMOrcModuleHandle H) {
+LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack,
+                                 LLVMOrcModuleHandle H) {
   OrcCBindingsStack &J = *unwrap(JITStack);
-  return J.removeModule(H);
+  return wrap(J.removeModule(H));
 }
 
-LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
-                                         LLVMOrcTargetAddress *RetAddr,
-                                         const char *SymbolName) {
+LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+                                     LLVMOrcTargetAddress *RetAddr,
+                                     const char *SymbolName) {
   OrcCBindingsStack &J = *unwrap(JITStack);
-  return J.findSymbolAddress(*RetAddr, SymbolName, true);
+  if (auto Addr = J.findSymbolAddress(SymbolName, true)) {
+    *RetAddr = *Addr;
+    return LLVMErrorSuccess;
+  } else
+    return wrap(Addr.takeError());
 }
 
-LLVMOrcErrorCode LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
-                                           LLVMOrcTargetAddress *RetAddr,
-                                           LLVMOrcModuleHandle H,
-                                           const char *SymbolName) {
+LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack,
+                                       LLVMOrcTargetAddress *RetAddr,
+                                       LLVMOrcModuleHandle H,
+                                       const char *SymbolName) {
   OrcCBindingsStack &J = *unwrap(JITStack);
-  return J.findSymbolAddressIn(*RetAddr, H, SymbolName, true);
+  if (auto Addr = J.findSymbolAddressIn(H, SymbolName, true)) {
+    *RetAddr = *Addr;
+    return LLVMErrorSuccess;
+  } else
+    return wrap(Addr.takeError());
 }
 
-LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
+LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
   auto *J = unwrap(JITStack);
   auto Err = J->shutdown();
   delete J;
-  return Err;
+  return wrap(std::move(Err));
 }
 
 void LLVMOrcRegisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L)
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index b9f8a370d2f0..817a4b89bfb0 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -77,9 +77,9 @@ public:
   };
 
   template <>
-  class GenericLayerImpl<orc::RTDyldObjectLinkingLayer> : public GenericLayer {
+  class GenericLayerImpl<orc::LegacyRTDyldObjectLinkingLayer> : public GenericLayer {
   private:
-    using LayerT = orc::RTDyldObjectLinkingLayer;
+    using LayerT = orc::LegacyRTDyldObjectLinkingLayer;
   public:
     GenericLayerImpl(LayerT &Layer) : Layer(Layer) {}
 
@@ -107,10 +107,10 @@ class OrcCBindingsStack {
 public:
 
   using CompileCallbackMgr = orc::JITCompileCallbackManager;
-  using ObjLayerT = orc::RTDyldObjectLinkingLayer;
-  using CompileLayerT = orc::IRCompileLayer<ObjLayerT, orc::SimpleCompiler>;
+  using ObjLayerT = orc::LegacyRTDyldObjectLinkingLayer;
+  using CompileLayerT = orc::LegacyIRCompileLayer<ObjLayerT, orc::SimpleCompiler>;
   using CODLayerT =
-        orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>;
+        orc::LegacyCompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>;
 
   using CallbackManagerBuilder =
       std::function<std::unique_ptr<CompileCallbackMgr>()>;
@@ -129,20 +129,21 @@ private:
         : Stack(Stack), ExternalResolver(std::move(ExternalResolver)),
           ExternalResolverCtx(std::move(ExternalResolverCtx)) {}
 
-    orc::SymbolFlagsMap
-    lookupFlags(const orc::SymbolNameSet &Symbols) override {
-      orc::SymbolFlagsMap SymbolFlags;
+    orc::SymbolNameSet
+    getResponsibilitySet(const orc::SymbolNameSet &Symbols) override {
+      orc::SymbolNameSet Result;
 
       for (auto &S : Symbols) {
-        if (auto Sym = findSymbol(*S))
-          SymbolFlags[S] = Sym.getFlags();
-        else if (auto Err = Sym.takeError()) {
+        if (auto Sym = findSymbol(*S)) {
+          if (!Sym.getFlags().isStrong())
+            Result.insert(S);
+        } else if (auto Err = Sym.takeError()) {
           Stack.reportError(std::move(Err));
-          return orc::SymbolFlagsMap();
+          return orc::SymbolNameSet();
         }
       }
 
-      return SymbolFlags;
+      return Result;
     }
 
     orc::SymbolNameSet
@@ -182,10 +183,17 @@ private:
       // 2. Runtime overrides.
       // 3. External resolver (if present).
 
-      if (auto Sym = Stack.CODLayer.findSymbol(Name, true))
-        return Sym;
-      else if (auto Err = Sym.takeError())
-        return Sym.takeError();
+      if (Stack.CODLayer) {
+        if (auto Sym = Stack.CODLayer->findSymbol(Name, true))
+          return Sym;
+        else if (auto Err = Sym.takeError())
+          return Sym.takeError();
+      } else {
+        if (auto Sym = Stack.CompileLayer.findSymbol(Name, true))
+          return Sym;
+        else if (auto Err = Sym.takeError())
+          return Sym.takeError();
+      }
 
       if (auto Sym = Stack.CXXRuntimeOverrides.searchOverrides(Name))
         return Sym;
@@ -205,8 +213,8 @@ private:
 public:
   OrcCBindingsStack(TargetMachine &TM,
                     IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
-      : CCMgr(createLocalCompileCallbackManager(TM.getTargetTriple(), ES, 0)),
-        DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()),
+      : CCMgr(createCompileCallbackManager(TM, ES)), DL(TM.createDataLayout()),
+        IndirectStubsMgr(IndirectStubsMgrBuilder()),
         ObjectLayer(ES,
                     [this](orc::VModuleKey K) {
                       auto ResolverI = Resolvers.find(K);
@@ -226,31 +234,19 @@ public:
 		      this->notifyFreed(K, Obj);
                     }),
         CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
-        CODLayer(ES, CompileLayer,
-                 [this](orc::VModuleKey K) {
-                   auto ResolverI = Resolvers.find(K);
-                   assert(ResolverI != Resolvers.end() &&
-                          "No resolver for module K");
-                   return ResolverI->second;
-                 },
-                 [this](orc::VModuleKey K,
-                        std::shared_ptr<orc::SymbolResolver> Resolver) {
-                   assert(!Resolvers.count(K) && "Resolver already present");
-                   Resolvers[K] = std::move(Resolver);
-                 },
-                 [](Function &F) { return std::set<Function *>({&F}); },
-                 *this->CCMgr, std::move(IndirectStubsMgrBuilder), false),
+        CODLayer(createCODLayer(ES, CompileLayer, CCMgr.get(),
+                                std::move(IndirectStubsMgrBuilder), Resolvers)),
         CXXRuntimeOverrides(
             [this](const std::string &S) { return mangle(S); }) {}
 
-  LLVMOrcErrorCode shutdown() {
+  Error shutdown() {
     // Run any destructors registered with __cxa_atexit.
     CXXRuntimeOverrides.runDestructors();
     // Run any IR destructors.
     for (auto &DtorRunner : IRStaticDestructorRunners)
       if (auto Err = DtorRunner.runViaLayer(*this))
-        return mapError(std::move(Err));
-    return LLVMOrcErrSuccess;
+        return Err;
+    return Error::success();
   }
 
   std::string mangle(StringRef Name) {
@@ -267,35 +263,28 @@ public:
     return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
   }
 
-
-  LLVMOrcErrorCode
-  createLazyCompileCallback(JITTargetAddress &RetAddr,
-                            LLVMOrcLazyCompileCallbackFn Callback,
+  Expected<JITTargetAddress>
+  createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
                             void *CallbackCtx) {
     auto WrappedCallback = [=]() -> JITTargetAddress {
       return Callback(wrap(this), CallbackCtx);
     };
 
-    if (auto CCAddr = CCMgr->getCompileCallback(std::move(WrappedCallback))) {
-      RetAddr = *CCAddr;
-      return LLVMOrcErrSuccess;
-    } else
-      return mapError(CCAddr.takeError());
+    return CCMgr->getCompileCallback(std::move(WrappedCallback));
   }
 
-  LLVMOrcErrorCode createIndirectStub(StringRef StubName,
-                                      JITTargetAddress Addr) {
-    return mapError(
-        IndirectStubsMgr->createStub(StubName, Addr, JITSymbolFlags::Exported));
+  Error createIndirectStub(StringRef StubName, JITTargetAddress Addr) {
+    return IndirectStubsMgr->createStub(StubName, Addr,
+                                        JITSymbolFlags::Exported);
   }
 
-  LLVMOrcErrorCode setIndirectStubPointer(StringRef Name,
-                                          JITTargetAddress Addr) {
-    return mapError(IndirectStubsMgr->updatePointer(Name, Addr));
+  Error setIndirectStubPointer(StringRef Name, JITTargetAddress Addr) {
+    return IndirectStubsMgr->updatePointer(Name, Addr);
   }
+
   template <typename LayerT>
-  LLVMOrcErrorCode
-  addIRModule(orc::VModuleKey &RetKey, LayerT &Layer, std::unique_ptr<Module> M,
+  Expected<orc::VModuleKey>
+  addIRModule(LayerT &Layer, std::unique_ptr<Module> M,
               std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
               LLVMOrcSymbolResolverFn ExternalResolver,
               void *ExternalResolverCtx) {
@@ -313,79 +302,84 @@ public:
       DtorNames.push_back(mangle(Dtor.Func->getName()));
 
     // Add the module to the JIT.
-    RetKey = ES.allocateVModule();
-    Resolvers[RetKey] = std::make_shared<CBindingsResolver>(
-        *this, ExternalResolver, ExternalResolverCtx);
-    if (auto Err = Layer.addModule(RetKey, std::move(M)))
-      return mapError(std::move(Err));
+    auto K = ES.allocateVModule();
+    Resolvers[K] = std::make_shared<CBindingsResolver>(*this, ExternalResolver,
+                                                       ExternalResolverCtx);
+    if (auto Err = Layer.addModule(K, std::move(M)))
+      return std::move(Err);
 
-    KeyLayers[RetKey] = detail::createGenericLayer(Layer);
+    KeyLayers[K] = detail::createGenericLayer(Layer);
 
     // Run the static constructors, and save the static destructor runner for
     // execution when the JIT is torn down.
-    orc::CtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames),
-                                                      RetKey);
+    orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), K);
     if (auto Err = CtorRunner.runViaLayer(*this))
-      return mapError(std::move(Err));
+      return std::move(Err);
 
-    IRStaticDestructorRunners.emplace_back(std::move(DtorNames), RetKey);
+    IRStaticDestructorRunners.emplace_back(std::move(DtorNames), K);
 
-    return LLVMOrcErrSuccess;
+    return K;
   }
 
-  LLVMOrcErrorCode addIRModuleEager(orc::VModuleKey &RetKey,
-                                    std::unique_ptr<Module> M,
-                                    LLVMOrcSymbolResolverFn ExternalResolver,
-                                    void *ExternalResolverCtx) {
-    return addIRModule(RetKey, CompileLayer, std::move(M),
+  Expected<orc::VModuleKey>
+  addIRModuleEager(std::unique_ptr<Module> M,
+                   LLVMOrcSymbolResolverFn ExternalResolver,
+                   void *ExternalResolverCtx) {
+    return addIRModule(CompileLayer, std::move(M),
                        llvm::make_unique<SectionMemoryManager>(),
                        std::move(ExternalResolver), ExternalResolverCtx);
   }
 
-  LLVMOrcErrorCode addIRModuleLazy(orc::VModuleKey &RetKey,
-                                   std::unique_ptr<Module> M,
-                                   LLVMOrcSymbolResolverFn ExternalResolver,
-                                   void *ExternalResolverCtx) {
-    return addIRModule(RetKey, CODLayer, std::move(M),
+  Expected<orc::VModuleKey>
+  addIRModuleLazy(std::unique_ptr<Module> M,
+                  LLVMOrcSymbolResolverFn ExternalResolver,
+                  void *ExternalResolverCtx) {
+    if (!CODLayer)
+      return make_error<StringError>("Can not add lazy module: No compile "
+                                     "callback manager available",
+                                     inconvertibleErrorCode());
+
+    return addIRModule(*CODLayer, std::move(M),
                        llvm::make_unique<SectionMemoryManager>(),
                        std::move(ExternalResolver), ExternalResolverCtx);
   }
 
-  LLVMOrcErrorCode removeModule(orc::VModuleKey K) {
+  Error removeModule(orc::VModuleKey K) {
     // FIXME: Should error release the module key?
     if (auto Err = KeyLayers[K]->removeModule(K))
-      return mapError(std::move(Err));
+      return Err;
     ES.releaseVModule(K);
     KeyLayers.erase(K);
-    return LLVMOrcErrSuccess;
+    return Error::success();
   }
 
-  LLVMOrcErrorCode addObject(orc::VModuleKey &RetKey,
-                             std::unique_ptr<MemoryBuffer> ObjBuffer,
-                             LLVMOrcSymbolResolverFn ExternalResolver,
-                             void *ExternalResolverCtx) {
+  Expected<orc::VModuleKey> addObject(std::unique_ptr<MemoryBuffer> ObjBuffer,
+                                      LLVMOrcSymbolResolverFn ExternalResolver,
+                                      void *ExternalResolverCtx) {
     if (auto Obj = object::ObjectFile::createObjectFile(
             ObjBuffer->getMemBufferRef())) {
 
-      RetKey = ES.allocateVModule();
-      Resolvers[RetKey] = std::make_shared<CBindingsResolver>(
+      auto K = ES.allocateVModule();
+      Resolvers[K] = std::make_shared<CBindingsResolver>(
           *this, ExternalResolver, ExternalResolverCtx);
 
-      if (auto Err = ObjectLayer.addObject(RetKey, std::move(ObjBuffer)))
-        return mapError(std::move(Err));
+      if (auto Err = ObjectLayer.addObject(K, std::move(ObjBuffer)))
+        return std::move(Err);
 
-      KeyLayers[RetKey] = detail::createGenericLayer(ObjectLayer);
+      KeyLayers[K] = detail::createGenericLayer(ObjectLayer);
 
-      return LLVMOrcErrSuccess;
+      return K;
     } else
-      return mapError(Obj.takeError());
+      return Obj.takeError();
   }
 
   JITSymbol findSymbol(const std::string &Name,
                                  bool ExportedSymbolsOnly) {
     if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
       return Sym;
-    return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
+    if (CODLayer)
+      return CODLayer->findSymbol(mangle(Name), ExportedSymbolsOnly);
+    return CompileLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
   }
 
   JITSymbol findSymbolIn(orc::VModuleKey K, const std::string &Name,
@@ -394,45 +388,39 @@ public:
     return KeyLayers[K]->findSymbolIn(K, mangle(Name), ExportedSymbolsOnly);
   }
 
-  LLVMOrcErrorCode findSymbolAddress(JITTargetAddress &RetAddr,
-                                     const std::string &Name,
-                                     bool ExportedSymbolsOnly) {
-    RetAddr = 0;
+  Expected<JITTargetAddress> findSymbolAddress(const std::string &Name,
+                                               bool ExportedSymbolsOnly) {
     if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) {
       // Successful lookup, non-null symbol:
-      if (auto AddrOrErr = Sym.getAddress()) {
-        RetAddr = *AddrOrErr;
-        return LLVMOrcErrSuccess;
-      } else
-        return mapError(AddrOrErr.takeError());
+      if (auto AddrOrErr = Sym.getAddress())
+        return *AddrOrErr;
+      else
+        return AddrOrErr.takeError();
     } else if (auto Err = Sym.takeError()) {
       // Lookup failure - report error.
-      return mapError(std::move(Err));
+      return std::move(Err);
     }
-    // Otherwise we had a successful lookup but got a null result. We already
-    // set RetAddr to '0' above, so just return success.
-    return LLVMOrcErrSuccess;
+
+    // No symbol not found. Return 0.
+    return 0;
   }
 
-  LLVMOrcErrorCode findSymbolAddressIn(JITTargetAddress &RetAddr,
-                                       orc::VModuleKey K,
-                                       const std::string &Name,
-                                       bool ExportedSymbolsOnly) {
-    RetAddr = 0;
+  Expected<JITTargetAddress> findSymbolAddressIn(orc::VModuleKey K,
+                                                 const std::string &Name,
+                                                 bool ExportedSymbolsOnly) {
     if (auto Sym = findSymbolIn(K, Name, ExportedSymbolsOnly)) {
       // Successful lookup, non-null symbol:
-      if (auto AddrOrErr = Sym.getAddress()) {
-        RetAddr = *AddrOrErr;
-        return LLVMOrcErrSuccess;
-      } else
-        return mapError(AddrOrErr.takeError());
+      if (auto AddrOrErr = Sym.getAddress())
+        return *AddrOrErr;
+      else
+        return AddrOrErr.takeError();
     } else if (auto Err = Sym.takeError()) {
       // Lookup failure - report error.
-      return mapError(std::move(Err));
+      return std::move(Err);
     }
-    // Otherwise we had a successful lookup but got a null result. We already
-    // set RetAddr to '0' above, so just return success.
-    return LLVMOrcErrSuccess;
+
+    // Symbol not found. Return 0.
+    return 0;
   }
 
   const std::string &getErrorMessage() const { return ErrMsg; }
@@ -455,17 +443,45 @@ public:
   }
 
 private:
+  using ResolverMap =
+      std::map<orc::VModuleKey, std::shared_ptr<orc::SymbolResolver>>;
+
+  static std::unique_ptr<CompileCallbackMgr>
+  createCompileCallbackManager(TargetMachine &TM, orc::ExecutionSession &ES) {
+    auto CCMgr = createLocalCompileCallbackManager(TM.getTargetTriple(), ES, 0);
+    if (!CCMgr) {
+      // FIXME: It would be good if we could report this somewhere, but we do
+      //        have an instance yet.
+      logAllUnhandledErrors(CCMgr.takeError(), errs(), "ORC error: ");
+      return nullptr;
+    }
+    return std::move(*CCMgr);
+  }
 
-  LLVMOrcErrorCode mapError(Error Err) {
-    LLVMOrcErrorCode Result = LLVMOrcErrSuccess;
-    handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
-      // Handler of last resort.
-      Result = LLVMOrcErrGeneric;
-      ErrMsg = "";
-      raw_string_ostream ErrStream(ErrMsg);
-      EIB.log(ErrStream);
-    });
-    return Result;
+  static std::unique_ptr<CODLayerT>
+  createCODLayer(orc::ExecutionSession &ES, CompileLayerT &CompileLayer,
+                 CompileCallbackMgr *CCMgr,
+                 IndirectStubsManagerBuilder IndirectStubsMgrBuilder,
+                 ResolverMap &Resolvers) {
+    // If there is no compile callback manager available we can not create a
+    // compile on demand layer.
+    if (!CCMgr)
+      return nullptr;
+
+    return llvm::make_unique<CODLayerT>(
+        ES, CompileLayer,
+        [&Resolvers](orc::VModuleKey K) {
+          auto ResolverI = Resolvers.find(K);
+          assert(ResolverI != Resolvers.end() && "No resolver for module K");
+          return ResolverI->second;
+        },
+        [&Resolvers](orc::VModuleKey K,
+                     std::shared_ptr<orc::SymbolResolver> Resolver) {
+          assert(!Resolvers.count(K) && "Resolver already present");
+          Resolvers[K] = std::move(Resolver);
+        },
+        [](Function &F) { return std::set<Function *>({&F}); }, *CCMgr,
+        std::move(IndirectStubsMgrBuilder), false);
   }
 
   void reportError(Error Err) {
@@ -476,13 +492,17 @@ private:
   void notifyFinalized(orc::VModuleKey K,
 		       const object::ObjectFile &Obj,
 		       const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
+    uint64_t Key = static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(Obj.getData().data()));
     for (auto &Listener : EventListeners)
-      Listener->NotifyObjectEmitted(Obj, LoadedObjInfo);
+      Listener->notifyObjectLoaded(Key, Obj, LoadedObjInfo);
   }
 
   void notifyFreed(orc::VModuleKey K, const object::ObjectFile &Obj) {
+    uint64_t Key = static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(Obj.getData().data()));
     for (auto &Listener : EventListeners)
-      Listener->NotifyFreeingObject(Obj);
+      Listener->notifyFreeingObject(Key);
   }
 
   orc::ExecutionSession ES;
@@ -497,15 +517,15 @@ private:
 
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
-  CODLayerT CODLayer;
+  std::unique_ptr<CODLayerT> CODLayer;
 
   std::map<orc::VModuleKey, std::unique_ptr<detail::GenericLayer>> KeyLayers;
 
-  orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
-  std::vector<orc::CtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
+  orc::LegacyLocalCXXRuntimeOverrides CXXRuntimeOverrides;
+  std::vector<orc::LegacyCtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
   std::string ErrMsg;
 
-  std::map<orc::VModuleKey, std::shared_ptr<orc::SymbolResolver>> Resolvers;
+  ResolverMap Resolvers;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index 4def579e7097..617bc2fc64b5 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -128,7 +128,7 @@ void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
   auto &CtorDtorsMap = isDtors ? UnexecutedDestructors : UnexecutedConstructors;
 
   for (auto &KV : CtorDtorsMap)
-    cantFail(CtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
+    cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
                  .runViaLayer(LazyEmitLayer));
 
   CtorDtorsMap.clear();
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index abe89ce70af9..36e7e83a8bab 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -144,26 +144,29 @@ class OrcMCJITReplacement : public ExecutionEngine {
   public:
     LinkingORCResolver(OrcMCJITReplacement &M) : M(M) {}
 
-    SymbolFlagsMap lookupFlags(const SymbolNameSet &Symbols) override {
-      SymbolFlagsMap SymbolFlags;
+    SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) override {
+      SymbolNameSet Result;
 
       for (auto &S : Symbols) {
         if (auto Sym = M.findMangledSymbol(*S)) {
-          SymbolFlags[S] = Sym.getFlags();
+          if (!Sym.getFlags().isStrong())
+            Result.insert(S);
         } else if (auto Err = Sym.takeError()) {
           M.reportError(std::move(Err));
-          return SymbolFlagsMap();
+          return SymbolNameSet();
         } else {
           if (auto Sym2 = M.ClientResolver->findSymbolInLogicalDylib(*S)) {
-            SymbolFlags[S] = Sym2.getFlags();
+            if (!Sym2.getFlags().isStrong())
+              Result.insert(S);
           } else if (auto Err = Sym2.takeError()) {
             M.reportError(std::move(Err));
-            return SymbolFlagsMap();
-          }
+            return SymbolNameSet();
+          } else
+            Result.insert(S);
         }
       }
 
-      return SymbolFlags;
+      return Result;
     }
 
     SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query,
@@ -272,14 +275,14 @@ public:
     {
       unsigned CtorId = 0, DtorId = 0;
       for (auto Ctor : orc::getConstructors(*M)) {
-        std::string NewCtorName = ("$static_ctor." + Twine(CtorId++)).str();
+        std::string NewCtorName = ("__ORCstatic_ctor." + Twine(CtorId++)).str();
         Ctor.Func->setName(NewCtorName);
         Ctor.Func->setLinkage(GlobalValue::ExternalLinkage);
         Ctor.Func->setVisibility(GlobalValue::HiddenVisibility);
         CtorNames.push_back(mangle(NewCtorName));
       }
       for (auto Dtor : orc::getDestructors(*M)) {
-        std::string NewDtorName = ("$static_dtor." + Twine(DtorId++)).str();
+        std::string NewDtorName = ("__ORCstatic_dtor." + Twine(DtorId++)).str();
         dbgs() << "Found dtor: " << NewDtorName << "\n";
         Dtor.Func->setName(NewDtorName);
         Dtor.Func->setLinkage(GlobalValue::ExternalLinkage);
@@ -458,8 +461,8 @@ private:
     return MangledName;
   }
 
-  using ObjectLayerT = RTDyldObjectLinkingLayer;
-  using CompileLayerT = IRCompileLayer<ObjectLayerT, orc::SimpleCompiler>;
+  using ObjectLayerT = LegacyRTDyldObjectLinkingLayer;
+  using CompileLayerT = LegacyIRCompileLayer<ObjectLayerT, orc::SimpleCompiler>;
   using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>;
 
   ExecutionSession ES;
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 71b4b73ca6d3..299d76183cd4 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -14,57 +14,56 @@ namespace {
 using namespace llvm;
 using namespace llvm::orc;
 
-class VSOSearchOrderResolver : public JITSymbolResolver {
+class JITDylibSearchOrderResolver : public JITSymbolResolver {
 public:
-  VSOSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {}
+  JITDylibSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {}
 
-  Expected<LookupResult> lookup(const LookupSet &Symbols) {
-    auto &ES = MR.getTargetVSO().getExecutionSession();
+  void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) {
+    auto &ES = MR.getTargetJITDylib().getExecutionSession();
     SymbolNameSet InternedSymbols;
 
+    // Intern the requested symbols: lookup takes interned strings.
     for (auto &S : Symbols)
-      InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
-
+      InternedSymbols.insert(ES.intern(S));
+
+    // Build an OnResolve callback to unwrap the interned strings and pass them
+    // to the OnResolved callback.
+    // FIXME: Switch to move capture of OnResolved once we have c++14.
+    auto OnResolvedWithUnwrap =
+        [OnResolved](Expected<SymbolMap> InternedResult) {
+          if (!InternedResult) {
+            OnResolved(InternedResult.takeError());
+            return;
+          }
+
+          LookupResult Result;
+          for (auto &KV : *InternedResult)
+            Result[*KV.first] = std::move(KV.second);
+          OnResolved(Result);
+        };
+
+    // We're not waiting for symbols to be ready. Just log any errors.
+    auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
+
+    // Register dependencies for all symbols contained in this set.
     auto RegisterDependencies = [&](const SymbolDependenceMap &Deps) {
       MR.addDependenciesForAll(Deps);
     };
 
-    auto InternedResult =
-        MR.getTargetVSO().withSearchOrderDo([&](const VSOList &VSOs) {
-          return ES.lookup(VSOs, InternedSymbols, RegisterDependencies, false);
-        });
-
-    if (!InternedResult)
-      return InternedResult.takeError();
-
-    LookupResult Result;
-    for (auto &KV : *InternedResult)
-      Result[*KV.first] = std::move(KV.second);
-
-    return Result;
+    JITDylibSearchList SearchOrder;
+    MR.getTargetJITDylib().withSearchOrderDo(
+        [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
+    ES.lookup(SearchOrder, InternedSymbols, OnResolvedWithUnwrap, OnReady,
+              RegisterDependencies);
   }
 
-  Expected<LookupFlagsResult> lookupFlags(const LookupSet &Symbols) {
-    auto &ES = MR.getTargetVSO().getExecutionSession();
-
-    SymbolNameSet InternedSymbols;
-
-    for (auto &S : Symbols)
-      InternedSymbols.insert(ES.getSymbolStringPool().intern(S));
-
-    SymbolFlagsMap InternedResult;
-    MR.getTargetVSO().withSearchOrderDo([&](const VSOList &VSOs) {
-      // An empty search order is pathalogical, but allowed.
-      if (VSOs.empty())
-        return;
+  Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
+    LookupSet Result;
 
-      assert(VSOs.front() && "VSOList entry can not be null");
-      InternedResult = VSOs.front()->lookupFlags(InternedSymbols);
-    });
-
-    LookupFlagsResult Result;
-    for (auto &KV : InternedResult)
-      Result[*KV.first] = std::move(KV.second);
+    for (auto &KV : MR.getSymbols()) {
+      if (Symbols.count(*KV.first))
+        Result.insert(*KV.first);
+    }
 
     return Result;
   }
@@ -78,52 +77,41 @@ private:
 namespace llvm {
 namespace orc {
 
-RTDyldObjectLinkingLayer2::RTDyldObjectLinkingLayer2(
+RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
     ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
-    NotifyLoadedFunction NotifyLoaded, NotifyFinalizedFunction NotifyFinalized)
+    NotifyLoadedFunction NotifyLoaded, NotifyEmittedFunction NotifyEmitted)
     : ObjectLayer(ES), GetMemoryManager(GetMemoryManager),
       NotifyLoaded(std::move(NotifyLoaded)),
-      NotifyFinalized(std::move(NotifyFinalized)), ProcessAllSections(false) {}
+      NotifyEmitted(std::move(NotifyEmitted)) {}
 
-void RTDyldObjectLinkingLayer2::emit(MaterializationResponsibility R,
-                                     VModuleKey K,
-                                     std::unique_ptr<MemoryBuffer> O) {
+void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
+                                    std::unique_ptr<MemoryBuffer> O) {
   assert(O && "Object must not be null");
 
-  auto &ES = getExecutionSession();
-
-  auto ObjFile = object::ObjectFile::createObjectFile(*O);
-  if (!ObjFile) {
-    getExecutionSession().reportError(ObjFile.takeError());
-    R.failMaterialization();
-  }
-
-  auto MemoryManager = GetMemoryManager(K);
-
-  VSOSearchOrderResolver Resolver(R);
-  auto RTDyld = llvm::make_unique<RuntimeDyld>(*MemoryManager, Resolver);
-  RTDyld->setProcessAllSections(ProcessAllSections);
+  // This method launches an asynchronous link step that will fulfill our
+  // materialization responsibility. We need to switch R to be heap
+  // allocated before that happens so it can live as long as the asynchronous
+  // link needs it to (i.e. it must be able to outlive this method).
+  auto SharedR = std::make_shared<MaterializationResponsibility>(std::move(R));
 
-  {
-    std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
+  auto &ES = getExecutionSession();
 
-    assert(!ActiveRTDylds.count(K) &&
-           "An active RTDyld already exists for this key?");
-    ActiveRTDylds[K] = RTDyld.get();
+  auto Obj = object::ObjectFile::createObjectFile(*O);
 
-    assert(!MemMgrs.count(K) &&
-           "A memory manager already exists for this key?");
-    MemMgrs[K] = std::move(MemoryManager);
+  if (!Obj) {
+    getExecutionSession().reportError(Obj.takeError());
+    SharedR->failMaterialization();
+    return;
   }
 
-  auto Info = RTDyld->loadObject(**ObjFile);
-
+  // Collect the internal symbols from the object file: We will need to
+  // filter these later.
+  auto InternalSymbols = std::make_shared<std::set<StringRef>>();
   {
-    std::set<StringRef> InternalSymbols;
-    for (auto &Sym : (*ObjFile)->symbols()) {
+    for (auto &Sym : (*Obj)->symbols()) {
       if (!(Sym.getFlags() & object::BasicSymbolRef::SF_Global)) {
         if (auto SymName = Sym.getName())
-          InternalSymbols.insert(*SymName);
+          InternalSymbols->insert(*SymName);
         else {
           ES.reportError(SymName.takeError());
           R.failMaterialization();
@@ -131,46 +119,97 @@ void RTDyldObjectLinkingLayer2::emit(MaterializationResponsibility R,
         }
       }
     }
-
-    SymbolMap Symbols;
-    for (auto &KV : RTDyld->getSymbolTable())
-      if (!InternalSymbols.count(KV.first))
-        Symbols[ES.getSymbolStringPool().intern(KV.first)] = KV.second;
-
-    R.resolve(Symbols);
   }
 
-  if (NotifyLoaded)
-    NotifyLoaded(K, **ObjFile, *Info);
-
-  RTDyld->finalizeWithMemoryManagerLocking();
+  auto K = R.getVModuleKey();
+  RuntimeDyld::MemoryManager *MemMgr = nullptr;
 
+  // Create a record a memory manager for this object.
   {
+    auto Tmp = GetMemoryManager();
     std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
-    ActiveRTDylds.erase(K);
+    MemMgrs.push_back(std::move(Tmp));
+    MemMgr = MemMgrs.back().get();
   }
 
-  if (RTDyld->hasError()) {
-    ES.reportError(make_error<StringError>(RTDyld->getErrorString(),
-                                           inconvertibleErrorCode()));
-    R.failMaterialization();
-    return;
+  JITDylibSearchOrderResolver Resolver(*SharedR);
+
+  /* Thoughts on proper cross-dylib weak symbol handling:
+   *
+   * Change selection of canonical defs to be a manually triggered process, and
+   * add a 'canonical' bit to symbol definitions. When canonical def selection
+   * is triggered, sweep the JITDylibs to mark defs as canonical, discard
+   * duplicate defs.
+   */
+  jitLinkForORC(
+      **Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections,
+      [this, K, SharedR, &Obj, InternalSymbols](
+          std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+          std::map<StringRef, JITEvaluatedSymbol> ResolvedSymbols) {
+        return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo),
+                         ResolvedSymbols, *InternalSymbols);
+      },
+      [this, K, SharedR](Error Err) {
+        onObjEmit(K, *SharedR, std::move(Err));
+      });
+}
+
+Error RTDyldObjectLinkingLayer::onObjLoad(
+    VModuleKey K, MaterializationResponsibility &R, object::ObjectFile &Obj,
+    std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo,
+    std::map<StringRef, JITEvaluatedSymbol> Resolved,
+    std::set<StringRef> &InternalSymbols) {
+  SymbolFlagsMap ExtraSymbolsToClaim;
+  SymbolMap Symbols;
+  for (auto &KV : Resolved) {
+    // Scan the symbols and add them to the Symbols map for resolution.
+
+    // We never claim internal symbols.
+    if (InternalSymbols.count(KV.first))
+      continue;
+
+    auto InternedName = getExecutionSession().intern(KV.first);
+    auto Flags = KV.second.getFlags();
+
+    // Override object flags and claim responsibility for symbols if
+    // requested.
+    if (OverrideObjectFlags || AutoClaimObjectSymbols) {
+      auto I = R.getSymbols().find(InternedName);
+
+      if (OverrideObjectFlags && I != R.getSymbols().end())
+        Flags = JITSymbolFlags::stripTransientFlags(I->second);
+      else if (AutoClaimObjectSymbols && I == R.getSymbols().end())
+        ExtraSymbolsToClaim[InternedName] = Flags;
+    }
+
+    Symbols[InternedName] = JITEvaluatedSymbol(KV.second.getAddress(), Flags);
   }
 
-  R.finalize();
+  if (!ExtraSymbolsToClaim.empty())
+    if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
+      return Err;
+
+  R.resolve(Symbols);
+
+  if (NotifyLoaded)
+    NotifyLoaded(K, Obj, *LoadedObjInfo);
 
-  if (NotifyFinalized)
-    NotifyFinalized(K);
+  return Error::success();
 }
 
-void RTDyldObjectLinkingLayer2::mapSectionAddress(
-    VModuleKey K, const void *LocalAddress, JITTargetAddress TargetAddr) const {
-  std::lock_guard<std::mutex> Lock(RTDyldLayerMutex);
-  auto ActiveRTDyldItr = ActiveRTDylds.find(K);
+void RTDyldObjectLinkingLayer::onObjEmit(VModuleKey K,
+                                          MaterializationResponsibility &R,
+                                          Error Err) {
+  if (Err) {
+    getExecutionSession().reportError(std::move(Err));
+    R.failMaterialization();
+    return;
+  }
+
+  R.emit();
 
-  assert(ActiveRTDyldItr != ActiveRTDylds.end() &&
-         "No active RTDyld instance found for key");
-  ActiveRTDyldItr->second->mapSectionAddress(LocalAddress, TargetAddr);
+  if (NotifyEmitted)
+    NotifyEmitted(K);
 }
 
 } // End namespace orc.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
new file mode 100644
index 000000000000..9525b168fbd3
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -0,0 +1,65 @@
+//===-- ThreadSafeModule.cpp - Thread safe Module, Context, and Utilities
+//h-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+namespace orc {
+
+ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM,
+                                   GVPredicate ShouldCloneDef,
+                                   GVModifier UpdateClonedDefSource) {
+  assert(TSM && "Can not clone null module");
+
+  if (!ShouldCloneDef)
+    ShouldCloneDef = [](const GlobalValue &) { return true; };
+
+  auto Lock = TSM.getContextLock();
+
+  SmallVector<char, 1> ClonedModuleBuffer;
+
+  {
+    std::set<GlobalValue *> ClonedDefsInSrc;
+    ValueToValueMapTy VMap;
+    auto Tmp = CloneModule(*TSM.getModule(), VMap, [&](const GlobalValue *GV) {
+      if (ShouldCloneDef(*GV)) {
+        ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
+        return true;
+      }
+      return false;
+    });
+
+    if (UpdateClonedDefSource)
+      for (auto *GV : ClonedDefsInSrc)
+        UpdateClonedDefSource(*GV);
+
+    BitcodeWriter BCWriter(ClonedModuleBuffer);
+
+    BCWriter.writeModule(*Tmp);
+    BCWriter.writeSymtab();
+    BCWriter.writeStrtab();
+  }
+
+  MemoryBufferRef ClonedModuleBufferRef(
+      StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
+      "cloned module buffer");
+  ThreadSafeContext NewTSCtx(llvm::make_unique<LLVMContext>());
+
+  auto ClonedModule =
+      cantFail(parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext()));
+  ClonedModule->setModuleIdentifier(TSM.getModule()->getName());
+  return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index 7bf8120d23df..f195d0282998 100644
--- a/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -66,9 +66,9 @@ public:
       CloseMarker();
   }
 
-  void NotifyObjectEmitted(const ObjectFile &Obj,
-                           const RuntimeDyld::LoadedObjectInfo &L) override;
-  void NotifyFreeingObject(const ObjectFile &Obj) override;
+  void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj,
+                          const RuntimeDyld::LoadedObjectInfo &L) override;
+  void notifyFreeingObject(ObjectKey K) override;
 
 private:
   bool InitDebuggingDir();
@@ -227,8 +227,9 @@ PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
     SuccessfullyInitialized = true;
 }
 
-void PerfJITEventListener::NotifyObjectEmitted(
-    const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
+void PerfJITEventListener::notifyObjectLoaded(
+    ObjectKey K, const ObjectFile &Obj,
+    const RuntimeDyld::LoadedObjectInfo &L) {
 
   if (!SuccessfullyInitialized)
     return;
@@ -280,7 +281,7 @@ void PerfJITEventListener::NotifyObjectEmitted(
   Dumpstream->flush();
 }
 
-void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+void PerfJITEventListener::notifyFreeingObject(ObjectKey K) {
   // perf currently doesn't have an interface for unloading. But munmap()ing the
   // code section does, so that's ok.
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 18eb0e461921..0553c217c2a2 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Object/ObjectFile.h"
 
 using namespace llvm;
 
@@ -25,11 +27,18 @@ JITSymbolFlags llvm::JITSymbolFlags::fromGlobalValue(const GlobalValue &GV) {
     Flags |= JITSymbolFlags::Common;
   if (!GV.hasLocalLinkage() && !GV.hasHiddenVisibility())
     Flags |= JITSymbolFlags::Exported;
+
+  if (isa<Function>(GV))
+    Flags |= JITSymbolFlags::Callable;
+  else if (isa<GlobalAlias>(GV) &&
+           isa<Function>(cast<GlobalAlias>(GV).getAliasee()))
+    Flags |= JITSymbolFlags::Callable;
+
   return Flags;
 }
 
-JITSymbolFlags
-llvm::JITSymbolFlags::fromObjectSymbol(const object::BasicSymbolRef &Symbol) {
+Expected<JITSymbolFlags>
+llvm::JITSymbolFlags::fromObjectSymbol(const object::SymbolRef &Symbol) {
   JITSymbolFlags Flags = JITSymbolFlags::None;
   if (Symbol.getFlags() & object::BasicSymbolRef::SF_Weak)
     Flags |= JITSymbolFlags::Weak;
@@ -37,11 +46,19 @@ llvm::JITSymbolFlags::fromObjectSymbol(const object::BasicSymbolRef &Symbol) {
     Flags |= JITSymbolFlags::Common;
   if (Symbol.getFlags() & object::BasicSymbolRef::SF_Exported)
     Flags |= JITSymbolFlags::Exported;
+
+  auto SymbolType = Symbol.getType();
+  if (!SymbolType)
+    return SymbolType.takeError();
+
+  if (*SymbolType & object::SymbolRef::ST_Function)
+    Flags |= JITSymbolFlags::Callable;
+
   return Flags;
 }
 
-ARMJITSymbolFlags llvm::ARMJITSymbolFlags::fromObjectSymbol(
-                                         const object::BasicSymbolRef &Symbol) {
+ARMJITSymbolFlags
+llvm::ARMJITSymbolFlags::fromObjectSymbol(const object::SymbolRef &Symbol) {
   ARMJITSymbolFlags Flags;
   if (Symbol.getFlags() & object::BasicSymbolRef::SF_Thumb)
     Flags |= ARMJITSymbolFlags::Thumb;
@@ -51,48 +68,64 @@ ARMJITSymbolFlags llvm::ARMJITSymbolFlags::fromObjectSymbol(
 /// Performs lookup by, for each symbol, first calling
 ///        findSymbolInLogicalDylib and if that fails calling
 ///        findSymbol.
-Expected<JITSymbolResolver::LookupResult>
-LegacyJITSymbolResolver::lookup(const LookupSet &Symbols) {
+void LegacyJITSymbolResolver::lookup(const LookupSet &Symbols,
+                                     OnResolvedFunction OnResolved) {
   JITSymbolResolver::LookupResult Result;
   for (auto &Symbol : Symbols) {
     std::string SymName = Symbol.str();
     if (auto Sym = findSymbolInLogicalDylib(SymName)) {
       if (auto AddrOrErr = Sym.getAddress())
         Result[Symbol] = JITEvaluatedSymbol(*AddrOrErr, Sym.getFlags());
-      else
-        return AddrOrErr.takeError();
-    } else if (auto Err = Sym.takeError())
-      return std::move(Err);
-    else {
+      else {
+        OnResolved(AddrOrErr.takeError());
+        return;
+      }
+    } else if (auto Err = Sym.takeError()) {
+      OnResolved(std::move(Err));
+      return;
+    } else {
       // findSymbolInLogicalDylib failed. Lets try findSymbol.
       if (auto Sym = findSymbol(SymName)) {
         if (auto AddrOrErr = Sym.getAddress())
           Result[Symbol] = JITEvaluatedSymbol(*AddrOrErr, Sym.getFlags());
-        else
-          return AddrOrErr.takeError();
-      } else if (auto Err = Sym.takeError())
-        return std::move(Err);
-      else
-        return make_error<StringError>("Symbol not found: " + Symbol,
-                                       inconvertibleErrorCode());
+        else {
+          OnResolved(AddrOrErr.takeError());
+          return;
+        }
+      } else if (auto Err = Sym.takeError()) {
+        OnResolved(std::move(Err));
+        return;
+      } else {
+        OnResolved(make_error<StringError>("Symbol not found: " + Symbol,
+                                           inconvertibleErrorCode()));
+        return;
+      }
     }
   }
 
-  return std::move(Result);
+  OnResolved(std::move(Result));
 }
 
 /// Performs flags lookup by calling findSymbolInLogicalDylib and
 ///        returning the flags value for that symbol.
-Expected<JITSymbolResolver::LookupFlagsResult>
-LegacyJITSymbolResolver::lookupFlags(const LookupSet &Symbols) {
-  JITSymbolResolver::LookupFlagsResult Result;
+Expected<JITSymbolResolver::LookupSet>
+LegacyJITSymbolResolver::getResponsibilitySet(const LookupSet &Symbols) {
+  JITSymbolResolver::LookupSet Result;
 
   for (auto &Symbol : Symbols) {
     std::string SymName = Symbol.str();
-    if (auto Sym = findSymbolInLogicalDylib(SymName))
-      Result[Symbol] = Sym.getFlags();
-    else if (auto Err = Sym.takeError())
+    if (auto Sym = findSymbolInLogicalDylib(SymName)) {
+      // If there's an existing def but it is not strong, then the caller is
+      // responsible for it.
+      if (!Sym.getFlags().isStrong())
+        Result.insert(Symbol);
+    } else if (auto Err = Sym.takeError())
       return std::move(Err);
+    else {
+      // If there is no existing definition then the caller is responsible for
+      // it.
+      Result.insert(Symbol);
+    }
   }
 
   return std::move(Result);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 76f5e5ead504..53cb782c55c4 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -19,10 +19,13 @@
 #include "RuntimeDyldMachO.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MutexGuard.h"
 
+#include <future>
+
 using namespace llvm;
 using namespace llvm::object;
 
@@ -131,6 +134,14 @@ void RuntimeDyldImpl::resolveRelocations() {
     ErrorStr = toString(std::move(Err));
   }
 
+  resolveLocalRelocations();
+
+  // Print out sections after relocation.
+  LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
+                 dumpSectionMemory(Sections[i], "after relocations"););
+}
+
+void RuntimeDyldImpl::resolveLocalRelocations() {
   // Iterate over all outstanding relocations
   for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) {
     // The Section here (Sections[i]) refers to the section in which the
@@ -143,10 +154,6 @@ void RuntimeDyldImpl::resolveRelocations() {
     resolveRelocationList(it->second, Addr);
   }
   Relocations.clear();
-
-  // Print out sections after relocation.
-  LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i)
-                 dumpSectionMemory(Sections[i], "after relocations"););
 }
 
 void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
@@ -204,7 +211,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
 
   // First, collect all weak and common symbols. We need to know if stronger
   // definitions occur elsewhere.
-  JITSymbolResolver::LookupFlagsResult SymbolFlags;
+  JITSymbolResolver::LookupSet ResponsibilitySet;
   {
     JITSymbolResolver::LookupSet Symbols;
     for (auto &Sym : Obj.symbols()) {
@@ -218,10 +225,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
       }
     }
 
-    if (auto FlagsResultOrErr = Resolver.lookupFlags(Symbols))
-      SymbolFlags = std::move(*FlagsResultOrErr);
+    if (auto ResultOrErr = Resolver.getResponsibilitySet(Symbols))
+      ResponsibilitySet = std::move(*ResultOrErr);
     else
-      return FlagsResultOrErr.takeError();
+      return ResultOrErr.takeError();
   }
 
   // Parse symbols
@@ -249,37 +256,36 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
       return NameOrErr.takeError();
 
     // Compute JIT symbol flags.
-    JITSymbolFlags JITSymFlags = getJITSymbolFlags(*I);
+    auto JITSymFlags = getJITSymbolFlags(*I);
+    if (!JITSymFlags)
+      return JITSymFlags.takeError();
 
     // If this is a weak definition, check to see if there's a strong one.
     // If there is, skip this symbol (we won't be providing it: the strong
     // definition will). If there's no strong definition, make this definition
     // strong.
-    if (JITSymFlags.isWeak() || JITSymFlags.isCommon()) {
+    if (JITSymFlags->isWeak() || JITSymFlags->isCommon()) {
       // First check whether there's already a definition in this instance.
-      // FIXME: Override existing weak definitions with strong ones.
       if (GlobalSymbolTable.count(Name))
         continue;
 
-      // Then check whether we found flags for an existing symbol during the
-      // flags lookup earlier.
-      auto FlagsI = SymbolFlags.find(Name);
-      if (FlagsI == SymbolFlags.end() ||
-          (JITSymFlags.isWeak() && !FlagsI->second.isStrong()) ||
-          (JITSymFlags.isCommon() && FlagsI->second.isCommon())) {
-        if (JITSymFlags.isWeak())
-          JITSymFlags &= ~JITSymbolFlags::Weak;
-        if (JITSymFlags.isCommon()) {
-          JITSymFlags &= ~JITSymbolFlags::Common;
-          uint32_t Align = I->getAlignment();
-          uint64_t Size = I->getCommonSize();
-          if (!CommonAlign)
-            CommonAlign = Align;
-          CommonSize = alignTo(CommonSize, Align) + Size;
-          CommonSymbolsToAllocate.push_back(*I);
-        }
-      } else
+      // If we're not responsible for this symbol, skip it.
+      if (!ResponsibilitySet.count(Name))
         continue;
+
+      // Otherwise update the flags on the symbol to make this definition
+      // strong.
+      if (JITSymFlags->isWeak())
+        *JITSymFlags &= ~JITSymbolFlags::Weak;
+      if (JITSymFlags->isCommon()) {
+        *JITSymFlags &= ~JITSymbolFlags::Common;
+        uint32_t Align = I->getAlignment();
+        uint64_t Size = I->getCommonSize();
+        if (!CommonAlign)
+          CommonAlign = Align;
+        CommonSize = alignTo(CommonSize, Align) + Size;
+        CommonSymbolsToAllocate.push_back(*I);
+      }
     }
 
     if (Flags & SymbolRef::SF_Absolute &&
@@ -296,7 +302,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
                         << " SID: " << SectionID
                         << " Offset: " << format("%p", (uintptr_t)Addr)
                         << " flags: " << Flags << "\n");
-      GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, JITSymFlags);
+      GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, *JITSymFlags);
     } else if (SymType == object::SymbolRef::ST_Function ||
                SymType == object::SymbolRef::ST_Data ||
                SymType == object::SymbolRef::ST_Unknown ||
@@ -329,7 +335,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
                         << " Offset: " << format("%p", (uintptr_t)SectOffset)
                         << " flags: " << Flags << "\n");
       GlobalSymbolTable[Name] =
-          SymbolTableEntry(SectionID, SectOffset, JITSymFlags);
+          SymbolTableEntry(SectionID, SectOffset, *JITSymFlags);
     }
   }
 
@@ -642,7 +648,8 @@ void RuntimeDyldImpl::writeBytesUnaligned(uint64_t Value, uint8_t *Dst,
   }
 }
 
-JITSymbolFlags RuntimeDyldImpl::getJITSymbolFlags(const BasicSymbolRef &SR) {
+Expected<JITSymbolFlags>
+RuntimeDyldImpl::getJITSymbolFlags(const SymbolRef &SR) {
   return JITSymbolFlags::fromObjectSymbol(SR);
 }
 
@@ -683,11 +690,15 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
       Addr += AlignOffset;
       Offset += AlignOffset;
     }
-    JITSymbolFlags JITSymFlags = getJITSymbolFlags(Sym);
+    auto JITSymFlags = getJITSymbolFlags(Sym);
+
+    if (!JITSymFlags)
+      return JITSymFlags.takeError();
+
     LLVM_DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
                       << format("%p", Addr) << "\n");
     GlobalSymbolTable[Name] =
-      SymbolTableEntry(SectionID, Offset, JITSymFlags);
+        SymbolTableEntry(SectionID, Offset, std::move(*JITSymFlags));
     Offset += Size;
     Addr += Size;
   }
@@ -992,42 +1003,8 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
   }
 }
 
-Error RuntimeDyldImpl::resolveExternalSymbols() {
-  StringMap<JITEvaluatedSymbol> ExternalSymbolMap;
-
-  // Resolution can trigger emission of more symbols, so iterate until
-  // we've resolved *everything*.
-  {
-    JITSymbolResolver::LookupSet ResolvedSymbols;
-
-    while (true) {
-      JITSymbolResolver::LookupSet NewSymbols;
-
-      for (auto &RelocKV : ExternalSymbolRelocations) {
-        StringRef Name = RelocKV.first();
-        if (!Name.empty() && !GlobalSymbolTable.count(Name) &&
-            !ResolvedSymbols.count(Name))
-          NewSymbols.insert(Name);
-      }
-
-      if (NewSymbols.empty())
-        break;
-
-      auto NewResolverResults = Resolver.lookup(NewSymbols);
-      if (!NewResolverResults)
-        return NewResolverResults.takeError();
-
-      assert(NewResolverResults->size() == NewSymbols.size() &&
-             "Should have errored on unresolved symbols");
-
-      for (auto &RRKV : *NewResolverResults) {
-        assert(!ResolvedSymbols.count(RRKV.first) && "Redundant resolution?");
-        ExternalSymbolMap.insert(RRKV);
-        ResolvedSymbols.insert(RRKV.first);
-      }
-    }
-  }
-
+void RuntimeDyldImpl::applyExternalSymbolRelocations(
+    const StringMap<JITEvaluatedSymbol> ExternalSymbolMap) {
   while (!ExternalSymbolRelocations.empty()) {
 
     StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin();
@@ -1089,10 +1066,114 @@ Error RuntimeDyldImpl::resolveExternalSymbols() {
 
     ExternalSymbolRelocations.erase(i);
   }
+}
+
+Error RuntimeDyldImpl::resolveExternalSymbols() {
+  StringMap<JITEvaluatedSymbol> ExternalSymbolMap;
+
+  // Resolution can trigger emission of more symbols, so iterate until
+  // we've resolved *everything*.
+  {
+    JITSymbolResolver::LookupSet ResolvedSymbols;
+
+    while (true) {
+      JITSymbolResolver::LookupSet NewSymbols;
+
+      for (auto &RelocKV : ExternalSymbolRelocations) {
+        StringRef Name = RelocKV.first();
+        if (!Name.empty() && !GlobalSymbolTable.count(Name) &&
+            !ResolvedSymbols.count(Name))
+          NewSymbols.insert(Name);
+      }
+
+      if (NewSymbols.empty())
+        break;
+
+#ifdef _MSC_VER
+      using ExpectedLookupResult =
+          MSVCPExpected<JITSymbolResolver::LookupResult>;
+#else
+      using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
+#endif
+
+      auto NewSymbolsP = std::make_shared<std::promise<ExpectedLookupResult>>();
+      auto NewSymbolsF = NewSymbolsP->get_future();
+      Resolver.lookup(NewSymbols,
+                      [=](Expected<JITSymbolResolver::LookupResult> Result) {
+                        NewSymbolsP->set_value(std::move(Result));
+                      });
+
+      auto NewResolverResults = NewSymbolsF.get();
+
+      if (!NewResolverResults)
+        return NewResolverResults.takeError();
+
+      assert(NewResolverResults->size() == NewSymbols.size() &&
+             "Should have errored on unresolved symbols");
+
+      for (auto &RRKV : *NewResolverResults) {
+        assert(!ResolvedSymbols.count(RRKV.first) && "Redundant resolution?");
+        ExternalSymbolMap.insert(RRKV);
+        ResolvedSymbols.insert(RRKV.first);
+      }
+    }
+  }
+
+  applyExternalSymbolRelocations(ExternalSymbolMap);
 
   return Error::success();
 }
 
+void RuntimeDyldImpl::finalizeAsync(
+    std::unique_ptr<RuntimeDyldImpl> This, std::function<void(Error)> OnEmitted,
+    std::unique_ptr<MemoryBuffer> UnderlyingBuffer) {
+
+  // FIXME: Move-capture OnRelocsApplied and UnderlyingBuffer once we have
+  // c++14.
+  auto SharedUnderlyingBuffer =
+      std::shared_ptr<MemoryBuffer>(std::move(UnderlyingBuffer));
+  auto SharedThis = std::shared_ptr<RuntimeDyldImpl>(std::move(This));
+  auto PostResolveContinuation =
+      [SharedThis, OnEmitted, SharedUnderlyingBuffer](
+          Expected<JITSymbolResolver::LookupResult> Result) {
+        if (!Result) {
+          OnEmitted(Result.takeError());
+          return;
+        }
+
+        /// Copy the result into a StringMap, where the keys are held by value.
+        StringMap<JITEvaluatedSymbol> Resolved;
+        for (auto &KV : *Result)
+          Resolved[KV.first] = KV.second;
+
+        SharedThis->applyExternalSymbolRelocations(Resolved);
+        SharedThis->resolveLocalRelocations();
+        SharedThis->registerEHFrames();
+        std::string ErrMsg;
+        if (SharedThis->MemMgr.finalizeMemory(&ErrMsg))
+          OnEmitted(make_error<StringError>(std::move(ErrMsg),
+                                            inconvertibleErrorCode()));
+        else
+          OnEmitted(Error::success());
+      };
+
+  JITSymbolResolver::LookupSet Symbols;
+
+  for (auto &RelocKV : SharedThis->ExternalSymbolRelocations) {
+    StringRef Name = RelocKV.first();
+    assert(!Name.empty() && "Symbol has no name?");
+    assert(!SharedThis->GlobalSymbolTable.count(Name) &&
+           "Name already processed. RuntimeDyld instances can not be re-used "
+           "when finalizing with finalizeAsync.");
+    Symbols.insert(Name);
+  }
+
+  if (!Symbols.empty()) {
+    SharedThis->Resolver.lookup(Symbols, PostResolveContinuation);
+  } else
+    PostResolveContinuation(std::map<StringRef, JITEvaluatedSymbol>());
+}
+
 //===----------------------------------------------------------------------===//
 // RuntimeDyld class implementation
 
@@ -1240,5 +1321,35 @@ void RuntimeDyld::deregisterEHFrames() {
   if (Dyld)
     Dyld->deregisterEHFrames();
 }
+// FIXME: Kill this with fire once we have a new JIT linker: this is only here
+// so that we can re-use RuntimeDyld's implementation without twisting the
+// interface any further for ORC's purposes.
+void jitLinkForORC(object::ObjectFile &Obj,
+                   std::unique_ptr<MemoryBuffer> UnderlyingBuffer,
+                   RuntimeDyld::MemoryManager &MemMgr,
+                   JITSymbolResolver &Resolver, bool ProcessAllSections,
+                   std::function<Error(
+                       std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObj,
+                       std::map<StringRef, JITEvaluatedSymbol>)>
+                       OnLoaded,
+                   std::function<void(Error)> OnEmitted) {
+
+  RuntimeDyld RTDyld(MemMgr, Resolver);
+  RTDyld.setProcessAllSections(ProcessAllSections);
+
+  auto Info = RTDyld.loadObject(Obj);
+
+  if (RTDyld.hasError()) {
+    OnEmitted(make_error<StringError>(RTDyld.getErrorString(),
+                                      inconvertibleErrorCode()));
+    return;
+  }
+
+  if (auto Err = OnLoaded(std::move(Info), RTDyld.getSymbolTable()))
+    OnEmitted(std::move(Err));
+
+  RuntimeDyldImpl::finalizeAsync(std::move(RTDyld.Dyld), std::move(OnEmitted),
+                                 std::move(UnderlyingBuffer));
+}
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1c54ad6fb03f..340ddaab186d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -66,7 +66,7 @@ RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
   } else {
     HasError = true;
     raw_string_ostream ErrStream(ErrorStr);
-    logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream, "");
+    logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream);
     return nullptr;
   }
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index fa8906869b3a..6eb6256080ff 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -14,8 +14,10 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/Path.h"
 #include <cctype>
+#include <future>
 #include <memory>
 #include <utility>
 
@@ -729,15 +731,35 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
   return DidAllTestsPass && (NumRules != 0);
 }
 
+Expected<JITSymbolResolver::LookupResult> RuntimeDyldCheckerImpl::lookup(
+    const JITSymbolResolver::LookupSet &Symbols) const {
+
+#ifdef _MSC_VER
+  using ExpectedLookupResult = MSVCPExpected<JITSymbolResolver::LookupResult>;
+#else
+  using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
+#endif
+
+  auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
+  auto ResultF = ResultP->get_future();
+
+  getRTDyld().Resolver.lookup(
+      Symbols, [=](Expected<JITSymbolResolver::LookupResult> Result) {
+        ResultP->set_value(std::move(Result));
+      });
+  return ResultF.get();
+}
+
 bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
   if (getRTDyld().getSymbol(Symbol))
     return true;
-  JITSymbolResolver::LookupSet Symbols({Symbol});
-  auto Result = getRTDyld().Resolver.lookup(Symbols);
+  auto Result = lookup({Symbol});
+
   if (!Result) {
     logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
     return false;
   }
+
   assert(Result->count(Symbol) && "Missing symbol result");
   return true;
 }
@@ -751,8 +773,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
   if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
     return InternalSymbol.getAddress();
 
-  JITSymbolResolver::LookupSet Symbols({Symbol});
-  auto Result = getRTDyld().Resolver.lookup(Symbols);
+  auto Result = lookup({Symbol});
   if (!Result) {
     logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
     return 0;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index b462ef2c00ce..6da1a68d06d6 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -41,6 +41,9 @@ private:
 
   RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
 
+  Expected<JITSymbolResolver::LookupResult>
+  lookup(const JITSymbolResolver::LookupSet &Symbols) const;
+
   bool isSymbolValid(StringRef Symbol) const;
   uint64_t getSymbolLocalAddr(StringRef Symbol) const;
   uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f9a81c7bd1b0..226ee715e18b 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -255,7 +255,7 @@ RuntimeDyldELF::loadObject(const object::ObjectFile &O) {
   else {
     HasError = true;
     raw_string_ostream ErrStream(ErrorStr);
-    logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream, "");
+    logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream);
     return nullptr;
   }
 }
@@ -1130,7 +1130,7 @@ RuntimeDyldELF::processRelocationRef(
     if (!SymTypeOrErr) {
       std::string Buf;
       raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(SymTypeOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -1151,7 +1151,7 @@ RuntimeDyldELF::processRelocationRef(
       if (!SectionOrErr) {
         std::string Buf;
         raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SectionOrErr.takeError(), OS, "");
+        logAllUnhandledErrors(SectionOrErr.takeError(), OS);
         OS.flush();
         report_fatal_error(Buf);
       }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 4d7cc36d0666..4c650e09ac1f 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -370,7 +370,7 @@ protected:
   void writeBytesUnaligned(uint64_t Value, uint8_t *Dst, unsigned Size) const;
 
   /// Generate JITSymbolFlags from a libObject symbol.
-  virtual JITSymbolFlags getJITSymbolFlags(const BasicSymbolRef &Sym);
+  virtual Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &Sym);
 
   /// Modify the given target address based on the given symbol flags.
   /// This can be used by subclasses to tweak addresses based on symbol flags,
@@ -433,6 +433,9 @@ protected:
                        const ObjectFile &Obj, ObjSectionToIDMap &ObjSectionToID,
                        StubMap &Stubs) = 0;
 
+  void applyExternalSymbolRelocations(
+      const StringMap<JITEvaluatedSymbol> ExternalSymbolMap);
+
   /// Resolve relocations to external symbols.
   Error resolveExternalSymbols();
 
@@ -536,6 +539,12 @@ public:
 
   void resolveRelocations();
 
+  void resolveLocalRelocations();
+
+  static void finalizeAsync(std::unique_ptr<RuntimeDyldImpl> This,
+                            std::function<void(Error)> OnEmitted,
+                            std::unique_ptr<MemoryBuffer> UnderlyingBuffer);
+
   void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
 
   void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index c5a215c83331..d47fcd45be88 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -370,7 +370,7 @@ RuntimeDyldMachO::loadObject(const object::ObjectFile &O) {
   else {
     HasError = true;
     raw_string_ostream ErrStream(ErrorStr);
-    logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream, "");
+    logAllUnhandledErrors(ObjSectionToIDOrErr.takeError(), ErrStream);
     return nullptr;
   }
 }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 729ea1ec48a4..8723dd0fd0ea 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -28,7 +28,7 @@ static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
   if (!SymTypeOrErr) {
     std::string Buf;
     raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(SymTypeOrErr.takeError(), OS, "");
+    logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
     OS.flush();
     report_fatal_error(Buf);
   }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 2d6e5c4aea67..aee5f6dc3746 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -37,7 +37,13 @@ private:
     if (!ImageBase) {
       ImageBase = std::numeric_limits<uint64_t>::max();
       for (const SectionEntry &Section : Sections)
-        ImageBase = std::min(ImageBase, Section.getLoadAddress());
+        // The Sections list may contain sections that weren't loaded for
+        // whatever reason: they may be debug sections, and ProcessAllSections
+        // is false, or they may be sections that contain 0 bytes. If the
+        // section isn't loaded, the load address will be 0, and it should not
+        // be included in the ImageBase calculation.
+        if (Section.getLoadAddress() != 0)
+          ImageBase = std::min(ImageBase, Section.getLoadAddress());
     }
     return ImageBase;
   }
@@ -122,6 +128,13 @@ public:
       break;
     }
 
+    case COFF::IMAGE_REL_AMD64_SECREL: {
+      assert(static_cast<int64_t>(RE.Addend) <= INT32_MAX && "Relocation overflow");
+      assert(static_cast<int64_t>(RE.Addend) >= INT32_MIN && "Relocation underflow");
+      writeBytesUnaligned(RE.Addend, Target, 4);
+      break;
+    }
+
     default:
       llvm_unreachable("Relocation type not implemented yet!");
       break;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 64a6b2901819..ab7cd2bdae15 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -34,9 +34,11 @@ public:
 
   unsigned getStubAlignment() override { return 4; }
 
-  JITSymbolFlags getJITSymbolFlags(const BasicSymbolRef &SR) override {
+  Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &SR) override {
     auto Flags = RuntimeDyldImpl::getJITSymbolFlags(SR);
-    Flags.getTargetFlags() = ARMJITSymbolFlags::fromObjectSymbol(SR);
+    if (!Flags)
+      return Flags.takeError();
+    Flags->getTargetFlags() = ARMJITSymbolFlags::fromObjectSymbol(SR);
     return Flags;
   }
 
diff --git a/contrib/llvm/lib/FuzzMutate/IRMutator.cpp b/contrib/llvm/lib/FuzzMutate/IRMutator.cpp
index 2dc7dfb880a2..40e402cdadef 100644
--- a/contrib/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/contrib/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -73,6 +73,7 @@ static void eliminateDeadCode(Function &F) {
   FPM.addPass(DCEPass());
   FunctionAnalysisManager FAM;
   FAM.registerPass([&] { return TargetLibraryAnalysis(); });
+  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
   FPM.run(F, FAM);
 }
 
diff --git a/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 9f5b7d608a1d..337184535558 100644
--- a/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/contrib/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -136,7 +136,7 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB,
   auto IsMatchingPtr = [&Srcs, &Pred](Instruction *Inst) {
     // Invoke instructions sometimes produce valid pointers but currently
     // we can't insert loads or stores from them
-    if (isa<TerminatorInst>(Inst))
+    if (Inst->isTerminator())
       return false;
 
     if (auto PtrTy = dyn_cast<PointerType>(Inst->getType())) {
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 99a25a723b4a..a5dc623e1a30 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -36,7 +36,6 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
@@ -199,7 +198,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
       !isa<GlobalVariable>(V) && !isa<Function>(V) && !isa<BasicBlock>(V);
   if (auto *BA = dyn_cast<BlockAddress>(V))
     ID = OM.lookup(BA->getBasicBlock()).first;
-  llvm::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) {
+  llvm::sort(List, [&](const Entry &L, const Entry &R) {
     const Use *LU = L.first;
     const Use *RU = R.first;
     if (LU == RU)
@@ -363,6 +362,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
   case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
   case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
+  case CallingConv::AArch64_VectorCall: Out << "aarch64_vector_pcs"; break;
   case CallingConv::MSP430_INTR:   Out << "msp430_intrcc"; break;
   case CallingConv::AVR_INTR:      Out << "avr_intrcc "; break;
   case CallingConv::AVR_SIGNAL:    Out << "avr_signalcc "; break;
@@ -704,6 +704,10 @@ private:
   DenseMap<GlobalValue::GUID, unsigned> GUIDMap;
   unsigned GUIDNext = 0;
 
+  /// TypeIdMap - The slot map for type ids used in the summary index.
+  StringMap<unsigned> TypeIdMap;
+  unsigned TypeIdNext = 0;
+
 public:
   /// Construct from a module.
   ///
@@ -735,6 +739,7 @@ public:
   int getAttributeGroupSlot(AttributeSet AS);
   int getModulePathSlot(StringRef Path);
   int getGUIDSlot(GlobalValue::GUID GUID);
+  int getTypeIdSlot(StringRef Id);
 
   /// If you'd like to deal with a function instead of just a module, use
   /// this method to get its data into the SlotTracker.
@@ -789,6 +794,7 @@ private:
 
   inline void CreateModulePathSlot(StringRef Path);
   void CreateGUIDSlot(GlobalValue::GUID GUID);
+  void CreateTypeIdSlot(StringRef Id);
 
   /// Add all of the module level global variables (and their initializers)
   /// and function declarations, but not the contents of those functions.
@@ -991,9 +997,9 @@ void SlotTracker::processFunction() {
 
       // We allow direct calls to any llvm.foo function here, because the
       // target may not be linked into the optimizer.
-      if (auto CS = ImmutableCallSite(&I)) {
+      if (const auto *Call = dyn_cast<CallBase>(&I)) {
         // Add all the call attributes to the table.
-        AttributeSet Attrs = CS.getAttributes().getFnAttributes();
+        AttributeSet Attrs = Call->getAttributes().getFnAttributes();
         if (Attrs.hasAttributes())
           CreateAttributeSetSlot(Attrs);
       }
@@ -1025,8 +1031,12 @@ void SlotTracker::processIndex() {
   for (auto &GlobalList : *TheIndex)
     CreateGUIDSlot(GlobalList.first);
 
-  for (auto &TId : TheIndex->typeIds())
-    CreateGUIDSlot(GlobalValue::getGUID(TId.first));
+  // Start numbering the TypeIds after the GUIDs.
+  TypeIdNext = GUIDNext;
+
+  for (auto TidIter = TheIndex->typeIds().begin();
+       TidIter != TheIndex->typeIds().end(); TidIter++)
+    CreateTypeIdSlot(TidIter->second.first);
 
   ST_DEBUG("end processIndex!\n");
 }
@@ -1132,6 +1142,15 @@ int SlotTracker::getGUIDSlot(GlobalValue::GUID GUID) {
   return I == GUIDMap.end() ? -1 : (int)I->second;
 }
 
+int SlotTracker::getTypeIdSlot(StringRef Id) {
+  // Check for uninitialized state and do lazy initialization.
+  initializeIndexIfNeeded();
+
+  // Find the TypeId string in the map
+  auto I = TypeIdMap.find(Id);
+  return I == TypeIdMap.end() ? -1 : (int)I->second;
+}
+
 /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
 void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
   assert(V && "Can't insert a null Value into SlotTracker!");
@@ -1202,6 +1221,11 @@ void SlotTracker::CreateGUIDSlot(GlobalValue::GUID GUID) {
   GUIDMap[GUID] = GUIDNext++;
 }
 
+/// Create a new slot for the specified Id
+void SlotTracker::CreateTypeIdSlot(StringRef Id) {
+  TypeIdMap[Id] = TypeIdNext++;
+}
+
 //===----------------------------------------------------------------------===//
 // AsmWriter Implementation
 //===----------------------------------------------------------------------===//
@@ -1216,24 +1240,6 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
                                    SlotTracker *Machine, const Module *Context,
                                    bool FromValue = false);
 
-static void writeAtomicRMWOperation(raw_ostream &Out,
-                                    AtomicRMWInst::BinOp Op) {
-  switch (Op) {
-  default: Out << " <unknown operation " << Op << ">"; break;
-  case AtomicRMWInst::Xchg: Out << " xchg"; break;
-  case AtomicRMWInst::Add:  Out << " add"; break;
-  case AtomicRMWInst::Sub:  Out << " sub"; break;
-  case AtomicRMWInst::And:  Out << " and"; break;
-  case AtomicRMWInst::Nand: Out << " nand"; break;
-  case AtomicRMWInst::Or:   Out << " or"; break;
-  case AtomicRMWInst::Xor:  Out << " xor"; break;
-  case AtomicRMWInst::Max:  Out << " max"; break;
-  case AtomicRMWInst::Min:  Out << " min"; break;
-  case AtomicRMWInst::UMax: Out << " umax"; break;
-  case AtomicRMWInst::UMin: Out << " umin"; break;
-  }
-}
-
 static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
   if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
     // 'Fast' is an abbreviation for all fast-math-flags.
@@ -1600,10 +1606,13 @@ struct MDFieldPrinter {
   void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true);
   void printBool(StringRef Name, bool Value, Optional<bool> Default = None);
   void printDIFlags(StringRef Name, DINode::DIFlags Flags);
+  void printDISPFlags(StringRef Name, DISubprogram::DISPFlags Flags);
   template <class IntTy, class Stringifier>
   void printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString,
                       bool ShouldSkipZero = true);
   void printEmissionKind(StringRef Name, DICompileUnit::DebugEmissionKind EK);
+  void printNameTableKind(StringRef Name,
+                          DICompileUnit::DebugNameTableKind NTK);
 };
 
 } // end anonymous namespace
@@ -1696,11 +1705,42 @@ void MDFieldPrinter::printDIFlags(StringRef Name, DINode::DIFlags Flags) {
     Out << FlagsFS << Extra;
 }
 
+void MDFieldPrinter::printDISPFlags(StringRef Name,
+                                    DISubprogram::DISPFlags Flags) {
+  // Always print this field, because no flags in the IR at all will be
+  // interpreted as old-style isDefinition: true.
+  Out << FS << Name << ": ";
+
+  if (!Flags) {
+    Out << 0;
+    return;
+  }
+
+  SmallVector<DISubprogram::DISPFlags, 8> SplitFlags;
+  auto Extra = DISubprogram::splitFlags(Flags, SplitFlags);
+
+  FieldSeparator FlagsFS(" | ");
+  for (auto F : SplitFlags) {
+    auto StringF = DISubprogram::getFlagString(F);
+    assert(!StringF.empty() && "Expected valid flag");
+    Out << FlagsFS << StringF;
+  }
+  if (Extra || SplitFlags.empty())
+    Out << FlagsFS << Extra;
+}
+
 void MDFieldPrinter::printEmissionKind(StringRef Name,
                                        DICompileUnit::DebugEmissionKind EK) {
   Out << FS << Name << ": " << DICompileUnit::emissionKindString(EK);
 }
 
+void MDFieldPrinter::printNameTableKind(StringRef Name,
+                                        DICompileUnit::DebugNameTableKind NTK) {
+  if (NTK == DICompileUnit::DebugNameTableKind::Default)
+    return;
+  Out << FS << Name << ": " << DICompileUnit::nameTableKindString(NTK);
+}
+
 template <class IntTy, class Stringifier>
 void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
                                     Stringifier toString, bool ShouldSkipZero) {
@@ -1744,6 +1784,8 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL,
   Printer.printInt("column", DL->getColumn());
   Printer.printMetadata("scope", DL->getRawScope(), /* ShouldSkipNull */ false);
   Printer.printMetadata("inlinedAt", DL->getRawInlinedAt());
+  Printer.printBool("isImplicitCode", DL->isImplicitCode(),
+                    /* Default */ false);
   Out << ")";
 }
 
@@ -1787,6 +1829,7 @@ static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
   Printer.printInt("align", N->getAlignInBits());
   Printer.printDwarfEnum("encoding", N->getEncoding(),
                          dwarf::AttributeEncodingString);
+  Printer.printDIFlags("flags", N->getFlags());
   Out << ")";
 }
 
@@ -1890,7 +1933,8 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Printer.printBool("splitDebugInlining", N->getSplitDebugInlining(), true);
   Printer.printBool("debugInfoForProfiling", N->getDebugInfoForProfiling(),
                     false);
-  Printer.printBool("gnuPubnames", N->getGnuPubnames(), false);
+  Printer.printNameTableKind("nameTableKind", N->getNameTableKind());
+  Printer.printBool("rangesBaseAddress", N->getRangesBaseAddress(), false);
   Out << ")";
 }
 
@@ -1905,18 +1949,14 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
   Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Printer.printMetadata("type", N->getRawType());
-  Printer.printBool("isLocal", N->isLocalToUnit());
-  Printer.printBool("isDefinition", N->isDefinition());
   Printer.printInt("scopeLine", N->getScopeLine());
   Printer.printMetadata("containingType", N->getRawContainingType());
-  Printer.printDwarfEnum("virtuality", N->getVirtuality(),
-                         dwarf::VirtualityString);
   if (N->getVirtuality() != dwarf::DW_VIRTUALITY_none ||
       N->getVirtualIndex() != 0)
     Printer.printInt("virtualIndex", N->getVirtualIndex(), false);
   Printer.printInt("thisAdjustment", N->getThisAdjustment());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printBool("isOptimized", N->isOptimized());
+  Printer.printDISPFlags("spFlags", N->getSPFlags());
   Printer.printMetadata("unit", N->getRawUnit());
   Printer.printMetadata("templateParams", N->getRawTemplateParams());
   Printer.printMetadata("declaration", N->getRawDeclaration());
@@ -2040,6 +2080,7 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
   Printer.printBool("isLocal", N->isLocalToUnit());
   Printer.printBool("isDefinition", N->isDefinition());
   Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
   Printer.printInt("align", N->getAlignInBits());
   Out << ")";
 }
@@ -2252,11 +2293,15 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
       Machine = MachineStorage.get();
     }
     int Slot = Machine->getMetadataSlot(N);
-    if (Slot == -1)
+    if (Slot == -1) {
+      if (const DILocation *Loc = dyn_cast<DILocation>(N)) {
+        writeDILocation(Out, Loc, TypePrinter, Machine, Context);
+        return;
+      }
       // Give the pointer value instead of "badref", since this comes up all
       // the time when debugging.
       Out << "<" << N << ">";
-    else
+    } else
       Out << '!' << Slot;
     return;
   }
@@ -2313,7 +2358,7 @@ public:
 
   void writeOperand(const Value *Op, bool PrintType);
   void writeParamOperand(const Value *Operand, AttributeSet Attrs);
-  void writeOperandBundles(ImmutableCallSite CS);
+  void writeOperandBundles(const CallBase *Call);
   void writeSyncScope(const LLVMContext &Context,
                       SyncScope::ID SSID);
   void writeAtomic(const LLVMContext &Context,
@@ -2464,15 +2509,15 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
   WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
 }
 
-void AssemblyWriter::writeOperandBundles(ImmutableCallSite CS) {
-  if (!CS.hasOperandBundles())
+void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
+  if (!Call->hasOperandBundles())
     return;
 
   Out << " [ ";
 
   bool FirstBundle = true;
-  for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
-    OperandBundleUse BU = CS.getOperandBundleAt(i);
+  for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i) {
+    OperandBundleUse BU = Call->getOperandBundleAt(i);
 
     if (!FirstBundle)
       Out << ", ";
@@ -2643,12 +2688,12 @@ void AssemblyWriter::printModuleSummaryIndex() {
   }
 
   // Print the TypeIdMap entries.
-  for (auto &TId : TheIndex->typeIds()) {
-    auto GUID = GlobalValue::getGUID(TId.first);
-    Out << "^" << Machine.getGUIDSlot(GUID) << " = typeid: (name: \""
-        << TId.first << "\"";
-    printTypeIdSummary(TId.second);
-    Out << ") ; guid = " << GUID << "\n";
+  for (auto TidIter = TheIndex->typeIds().begin();
+       TidIter != TheIndex->typeIds().end(); TidIter++) {
+    Out << "^" << Machine.getTypeIdSlot(TidIter->second.first)
+        << " = typeid: (name: \"" << TidIter->second.first << "\"";
+    printTypeIdSummary(TidIter->second.second);
+    Out << ") ; guid = " << TidIter->first << "\n";
   }
 }
 
@@ -2800,7 +2845,7 @@ void AssemblyWriter::printAliasSummary(const AliasSummary *AS) {
 }
 
 void AssemblyWriter::printGlobalVarSummary(const GlobalVarSummary *GS) {
-  // Nothing for now
+  Out << ", varFlags: (readonly: " << GS->VarFlags.ReadOnly << ")";
 }
 
 static std::string getLinkageName(GlobalValue::LinkageTypes LT) {
@@ -2840,22 +2885,6 @@ static std::string getLinkageNameWithSpace(GlobalValue::LinkageTypes LT) {
   return getLinkageName(LT) + " ";
 }
 
-static const char *getHotnessName(CalleeInfo::HotnessType HT) {
-  switch (HT) {
-  case CalleeInfo::HotnessType::Unknown:
-    return "unknown";
-  case CalleeInfo::HotnessType::Cold:
-    return "cold";
-  case CalleeInfo::HotnessType::None:
-    return "none";
-  case CalleeInfo::HotnessType::Hot:
-    return "hot";
-  case CalleeInfo::HotnessType::Critical:
-    return "critical";
-  }
-  llvm_unreachable("invalid hotness");
-}
-
 void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
   Out << ", insts: " << FS->instCount();
 
@@ -2867,6 +2896,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
     Out << ", readOnly: " << FFlags.ReadOnly;
     Out << ", noRecurse: " << FFlags.NoRecurse;
     Out << ", returnDoesNotAlias: " << FFlags.ReturnDoesNotAlias;
+    Out << ", noInline: " << FFlags.NoInline;
     Out << ")";
   }
   if (!FS->calls().empty()) {
@@ -2897,12 +2927,19 @@ void AssemblyWriter::printTypeIdInfo(
     Out << "typeTests: (";
     FieldSeparator FS;
     for (auto &GUID : TIDInfo.TypeTests) {
-      Out << FS;
-      auto Slot = Machine.getGUIDSlot(GUID);
-      if (Slot != -1)
-        Out << "^" << Slot;
-      else
+      auto TidIter = TheIndex->typeIds().equal_range(GUID);
+      if (TidIter.first == TidIter.second) {
+        Out << FS;
         Out << GUID;
+        continue;
+      }
+      // Print all type id that correspond to this GUID.
+      for (auto It = TidIter.first; It != TidIter.second; ++It) {
+        Out << FS;
+        auto Slot = Machine.getTypeIdSlot(It->second.first);
+        assert(Slot != -1);
+        Out << "^" << Slot;
+      }
     }
     Out << ")";
   }
@@ -2928,14 +2965,25 @@ void AssemblyWriter::printTypeIdInfo(
 }
 
 void AssemblyWriter::printVFuncId(const FunctionSummary::VFuncId VFId) {
-  Out << "vFuncId: (";
-  auto Slot = Machine.getGUIDSlot(VFId.GUID);
-  if (Slot != -1)
-    Out << "^" << Slot;
-  else
+  auto TidIter = TheIndex->typeIds().equal_range(VFId.GUID);
+  if (TidIter.first == TidIter.second) {
+    Out << "vFuncId: (";
     Out << "guid: " << VFId.GUID;
-  Out << ", offset: " << VFId.Offset;
-  Out << ")";
+    Out << ", offset: " << VFId.Offset;
+    Out << ")";
+    return;
+  }
+  // Print all type id that correspond to this GUID.
+  FieldSeparator FS;
+  for (auto It = TidIter.first; It != TidIter.second; ++It) {
+    Out << FS;
+    Out << "vFuncId: (";
+    auto Slot = Machine.getTypeIdSlot(It->second.first);
+    assert(Slot != -1);
+    Out << "^" << Slot;
+    Out << ", offset: " << VFId.Offset;
+    Out << ")";
+  }
 }
 
 void AssemblyWriter::printNonConstVCalls(
@@ -2955,11 +3003,13 @@ void AssemblyWriter::printConstVCalls(
   FieldSeparator FS;
   for (auto &ConstVCall : VCallList) {
     Out << FS;
+    Out << "(";
     printVFuncId(ConstVCall.VFunc);
     if (!ConstVCall.Args.empty()) {
       Out << ", ";
       printArgs(ConstVCall.Args);
     }
+    Out << ")";
   }
   Out << ")";
 }
@@ -2989,6 +3039,8 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
     FieldSeparator FS;
     for (auto &Ref : RefList) {
       Out << FS;
+      if (Ref.isReadOnly())
+        Out << "readonly ";
       Out << "^" << Machine.getGUIDSlot(Ref.getGUID());
     }
     Out << ")";
@@ -3354,6 +3406,13 @@ void AssemblyWriter::printFunction(const Function *F) {
   StringRef UA = getUnnamedAddrEncoding(F->getUnnamedAddr());
   if (!UA.empty())
     Out << ' ' << UA;
+  // We print the function address space if it is non-zero or if we are writing
+  // a module with a non-zero program address space or if there is no valid
+  // Module* so that the file can be parsed without the datalayout string.
+  const Module *Mod = F->getParent();
+  if (F->getAddressSpace() != 0 || !Mod ||
+      Mod->getDataLayout().getProgramAddressSpace() != 0)
+    Out << " addrspace(" << F->getAddressSpace() << ")";
   if (Attrs.hasAttributes(AttributeList::FunctionIndex))
     Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
   if (F->hasSection()) {
@@ -3491,6 +3550,23 @@ void AssemblyWriter::printInfoComment(const Value &V) {
     AnnotationWriter->printInfoComment(V, Out);
 }
 
+static void maybePrintCallAddrSpace(const Value *Operand, const Instruction *I,
+                                    raw_ostream &Out) {
+  // We print the address space of the call if it is non-zero.
+  unsigned CallAddrSpace = Operand->getType()->getPointerAddressSpace();
+  bool PrintAddrSpace = CallAddrSpace != 0;
+  if (!PrintAddrSpace) {
+    const Module *Mod = getModuleFromVal(I);
+    // We also print it if it is zero but not equal to the program address space
+    // or if we can't find a valid Module* to make it possible to parse
+    // the resulting file even without a datalayout string.
+    if (!Mod || Mod->getDataLayout().getProgramAddressSpace() != 0)
+      PrintAddrSpace = true;
+  }
+  if (PrintAddrSpace)
+    Out << " addrspace(" << CallAddrSpace << ")";
+}
+
 // This member is called for each Instruction in a function..
 void AssemblyWriter::printInstruction(const Instruction &I) {
   if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
@@ -3547,7 +3623,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
 
   // Print out the atomicrmw operation
   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
-    writeAtomicRMWOperation(Out, RMWI->getOperation());
+    Out << ' ' << AtomicRMWInst::getOperationName(RMWI->getOperation());
 
   // Print out the type of the operands...
   const Value *Operand = I.getNumOperands() ? I.getOperand(0) : nullptr;
@@ -3688,6 +3764,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     if (PAL.hasAttributes(AttributeList::ReturnIndex))
       Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
 
+    // Only print addrspace(N) if necessary:
+    maybePrintCallAddrSpace(Operand, &I, Out);
+
     // If possible, print out the short form of the call instruction.  We can
     // only do this if the first argument is a pointer to a nonvararg function,
     // and if the return type is not a pointer to a function.
@@ -3730,6 +3809,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     if (PAL.hasAttributes(AttributeList::ReturnIndex))
       Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
 
+    // Only print addrspace(N) if necessary:
+    maybePrintCallAddrSpace(Operand, &I, Out);
+
     // If possible, print out the short form of the invoke instruction. We can
     // only do this if the first argument is a pointer to a nonvararg function,
     // and if the return type is not a pointer to a function.
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index d87187481be0..ff46debb7a9e 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -323,6 +323,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "returns_twice";
   if (hasAttribute(Attribute::SExt))
     return "signext";
+  if (hasAttribute(Attribute::SpeculativeLoadHardening))
+    return "speculative_load_hardening";
   if (hasAttribute(Attribute::Speculatable))
     return "speculatable";
   if (hasAttribute(Attribute::StackProtect))
@@ -637,7 +639,7 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const {
 AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
     : AvailableAttrs(0), NumAttrs(Attrs.size()) {
   // There's memory after the node where we can store the entries in.
-  std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
+  llvm::copy(Attrs, getTrailingObjects<Attribute>());
 
   for (const auto I : *this) {
     if (!I.isStringAttribute()) {
@@ -656,7 +658,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
   FoldingSetNodeID ID;
 
   SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
-  llvm::sort(SortedAttrs.begin(), SortedAttrs.end());
+  llvm::sort(SortedAttrs);
 
   for (const auto Attr : SortedAttrs)
     Attr.Profile(ID);
@@ -807,7 +809,7 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C,
   assert(!Sets.empty() && "pointless AttributeListImpl");
 
   // There's memory after the node where we can store the entries in.
-  std::copy(Sets.begin(), Sets.end(), getTrailingObjects<AttributeSet>());
+  llvm::copy(Sets, getTrailingObjects<AttributeSet>());
 
   // Initialize AvailableFunctionAttrs summary bitset.
   static_assert(Attribute::EndAttrKinds <=
@@ -1683,28 +1685,32 @@ adjustCallerStackProbeSize(Function &Caller, const Function &Callee) {
 }
 
 /// If the inlined function defines a min legal vector width, then ensure
-/// the calling function has the same or larger min legal vector width. This
-/// function is called after the inlining decision has been made so we have to
-/// merge the attribute this way. Heuristics that would use
+/// the calling function has the same or larger min legal vector width. If the
+/// caller has the attribute, but the callee doesn't, we need to remove the
+/// attribute from the caller since we can't make any guarantees about the
+/// caller's requirements.
+/// This function is called after the inlining decision has been made so we have
+/// to merge the attribute this way. Heuristics that would use
 /// min-legal-vector-width to determine inline compatibility would need to be
 /// handled as part of inline cost analysis.
 static void
 adjustMinLegalVectorWidth(Function &Caller, const Function &Callee) {
-  if (Callee.hasFnAttribute("min-legal-vector-width")) {
-    uint64_t CalleeVectorWidth;
-    Callee.getFnAttribute("min-legal-vector-width")
-          .getValueAsString()
-          .getAsInteger(0, CalleeVectorWidth);
-    if (Caller.hasFnAttribute("min-legal-vector-width")) {
+  if (Caller.hasFnAttribute("min-legal-vector-width")) {
+    if (Callee.hasFnAttribute("min-legal-vector-width")) {
       uint64_t CallerVectorWidth;
       Caller.getFnAttribute("min-legal-vector-width")
             .getValueAsString()
             .getAsInteger(0, CallerVectorWidth);
-      if (CallerVectorWidth < CalleeVectorWidth) {
+      uint64_t CalleeVectorWidth;
+      Callee.getFnAttribute("min-legal-vector-width")
+            .getValueAsString()
+            .getAsInteger(0, CalleeVectorWidth);
+      if (CallerVectorWidth < CalleeVectorWidth)
         Caller.addFnAttr(Callee.getFnAttribute("min-legal-vector-width"));
-      }
     } else {
-      Caller.addFnAttr(Callee.getFnAttribute("min-legal-vector-width"));
+      // If the callee doesn't have the attribute then we don't know anything
+      // and must drop the attribute from the caller.
+      Caller.removeFnAttr("min-legal-vector-width");
     }
   }
 }
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index f098ad9725b6..b2eb8b09982e 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -71,7 +71,27 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
   // like to use this information to remove upgrade code for some older
   // intrinsics. It is currently undecided how we will determine that future
   // point.
-  if (Name=="ssse3.pabs.b.128" || // Added in 6.0
+  if (Name == "addcarryx.u32" || // Added in 8.0
+      Name == "addcarryx.u64" || // Added in 8.0
+      Name == "addcarry.u32" || // Added in 8.0
+      Name == "addcarry.u64" || // Added in 8.0
+      Name == "subborrow.u32" || // Added in 8.0
+      Name == "subborrow.u64" || // Added in 8.0
+      Name.startswith("sse2.padds.") || // Added in 8.0
+      Name.startswith("sse2.psubs.") || // Added in 8.0
+      Name.startswith("sse2.paddus.") || // Added in 8.0
+      Name.startswith("sse2.psubus.") || // Added in 8.0
+      Name.startswith("avx2.padds.") || // Added in 8.0
+      Name.startswith("avx2.psubs.") || // Added in 8.0
+      Name.startswith("avx2.paddus.") || // Added in 8.0
+      Name.startswith("avx2.psubus.") || // Added in 8.0
+      Name.startswith("avx512.padds.") || // Added in 8.0
+      Name.startswith("avx512.psubs.") || // Added in 8.0
+      Name.startswith("avx512.mask.padds.") || // Added in 8.0
+      Name.startswith("avx512.mask.psubs.") || // Added in 8.0
+      Name.startswith("avx512.mask.paddus.") || // Added in 8.0
+      Name.startswith("avx512.mask.psubus.") || // Added in 8.0
+      Name=="ssse3.pabs.b.128" || // Added in 6.0
       Name=="ssse3.pabs.w.128" || // Added in 6.0
       Name=="ssse3.pabs.d.128" || // Added in 6.0
       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
@@ -265,6 +285,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
+      Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
+      Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
+      Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
+      Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
+      Name.startswith("avx512.vpshld.") || // Added in 8.0
+      Name.startswith("avx512.vpshrd.") || // Added in 8.0
       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
@@ -272,10 +298,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
-      Name.startswith("avx512.mask.prorv.") || // Added in 7.0
-      Name.startswith("avx512.mask.pror.") || // Added in 7.0
-      Name.startswith("avx512.mask.prolv.") || // Added in 7.0
-      Name.startswith("avx512.mask.prol.") || // Added in 7.0
+      Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
+      Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
       Name == "sse.cvtsi2ss" || // Added in 7.0
       Name == "sse.cvtsi642ss" || // Added in 7.0
       Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -340,6 +364,13 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.cvtmask2") || // Added in 5.0
       (Name.startswith("xop.vpcom") && // Added in 3.2
        F->arg_size() == 2) ||
+      Name.startswith("xop.vprot") || // Added in 8.0
+      Name.startswith("avx512.prol") || // Added in 8.0
+      Name.startswith("avx512.pror") || // Added in 8.0
+      Name.startswith("avx512.mask.prorv.") || // Added in 8.0
+      Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
+      Name.startswith("avx512.mask.prolv.") || // Added in 8.0
+      Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
       Name.startswith("avx512.ptestm") || //Added in 6.0
       Name.startswith("avx512.ptestnm") || //Added in 6.0
       Name.startswith("sse2.pavg") || // Added in 6.0
@@ -363,6 +394,17 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
     return true;
   }
 
+  if (Name == "rdtscp") { // Added in 8.0
+    // If this intrinsic has 0 operands, it's the new version.
+    if (F->getFunctionType()->getNumParams() == 0)
+      return false;
+
+    rename(F);
+    NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                      Intrinsic::x86_rdtscp);
+    return true;
+  }
+
   // SSE4.1 ptest functions may have an old signature.
   if (Name.startswith("sse41.ptest")) { // Added in 3.2
     if (Name.substr(11) == "c")
@@ -456,7 +498,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       // the end of the name. Change name from llvm.arm.neon.vclz.* to
       //  llvm.ctlz.*
       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
-      NewFn = Function::Create(fType, F->getLinkage(),
+      NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
                                "llvm.ctlz." + Name.substr(14), F->getParent());
       return true;
     }
@@ -472,7 +514,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       // Can't use Intrinsic::getDeclaration here as the return types might
       // then only be structurally equal.
       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
-      NewFn = Function::Create(fType, F->getLinkage(),
+      NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
                                "llvm." + Name + ".p0i8", F->getParent());
       return true;
     }
@@ -502,6 +544,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
       return true;
     }
+    if (Name == "x86.seh.recoverfp") {
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+      return true;
+    }
     break;
   }
 
@@ -899,6 +945,148 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
   return EmitX86Select(Builder, Mask, Align, Passthru);
 }
 
+static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
+                                          bool ZeroMask, bool IndexForm) {
+  Type *Ty = CI.getType();
+  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+  unsigned EltWidth = Ty->getScalarSizeInBits();
+  bool IsFloat = Ty->isFPOrFPVectorTy();
+  Intrinsic::ID IID;
+  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
+  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_d_128;
+  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
+  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_q_128;
+  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
+  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_d_256;
+  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
+  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_q_256;
+  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
+  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_d_512;
+  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
+  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
+    IID = Intrinsic::x86_avx512_vpermi2var_q_512;
+  else if (VecWidth == 128 && EltWidth == 16)
+    IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
+  else if (VecWidth == 256 && EltWidth == 16)
+    IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
+  else if (VecWidth == 512 && EltWidth == 16)
+    IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
+  else if (VecWidth == 128 && EltWidth == 8)
+    IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
+  else if (VecWidth == 256 && EltWidth == 8)
+    IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
+  else if (VecWidth == 512 && EltWidth == 8)
+    IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
+  else
+    llvm_unreachable("Unexpected intrinsic");
+
+  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
+                    CI.getArgOperand(2) };
+
+  // If this isn't index form we need to swap operand 0 and 1.
+  if (!IndexForm)
+    std::swap(Args[0], Args[1]);
+
+  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
+                                Args);
+  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
+                             : Builder.CreateBitCast(CI.getArgOperand(1),
+                                                     Ty);
+  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
+}
+
+static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
+                                            bool IsSigned, bool IsAddition) {
+  Type *Ty = CI.getType();
+  Value *Op0 = CI.getOperand(0);
+  Value *Op1 = CI.getOperand(1);
+
+  Intrinsic::ID IID =
+      IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
+               : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
+  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
+
+  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+    Value *VecSrc = CI.getOperand(2);
+    Value *Mask = CI.getOperand(3);
+    Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+  }
+  return Res;
+}
+
+static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
+                               bool IsRotateRight) {
+  Type *Ty = CI.getType();
+  Value *Src = CI.getArgOperand(0);
+  Value *Amt = CI.getArgOperand(1);
+
+  // Amount may be scalar immediate, in which case create a splat vector.
+  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+  // we only care about the lowest log2 bits anyway.
+  if (Amt->getType() != Ty) {
+    unsigned NumElts = Ty->getVectorNumElements();
+    Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+    Amt = Builder.CreateVectorSplat(NumElts, Amt);
+  }
+
+  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
+  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
+
+  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+    Value *VecSrc = CI.getOperand(2);
+    Value *Mask = CI.getOperand(3);
+    Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+  }
+  return Res;
+}
+
+static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
+                                    bool IsShiftRight, bool ZeroMask) {
+  Type *Ty = CI.getType();
+  Value *Op0 = CI.getArgOperand(0);
+  Value *Op1 = CI.getArgOperand(1);
+  Value *Amt = CI.getArgOperand(2);
+
+  if (IsShiftRight)
+    std::swap(Op0, Op1);
+
+  // Amount may be scalar immediate, in which case create a splat vector.
+  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+  // we only care about the lowest log2 bits anyway.
+  if (Amt->getType() != Ty) {
+    unsigned NumElts = Ty->getVectorNumElements();
+    Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+    Amt = Builder.CreateVectorSplat(NumElts, Amt);
+  }
+
+  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
+  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
+  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
+
+  unsigned NumArgs = CI.getNumArgOperands();
+  if (NumArgs >= 4) { // For masked intrinsics.
+    Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
+                    ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
+                                   CI.getArgOperand(0);
+    Value *Mask = CI.getOperand(NumArgs - 1);
+    Res = EmitX86Select(Builder, Mask, Res, VecSrc);
+  }
+  return Res;
+}
+
 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
                                  Value *Ptr, Value *Data, Value *Mask,
                                  bool Aligned) {
@@ -1265,106 +1453,13 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
       IID = Intrinsic::x86_avx512_dbpsadbw_512;
     else
       llvm_unreachable("Unexpected intrinsic");
-  } else if (Name.startswith("vpshld.")) {
-    if (VecWidth == 128 && Name[7] == 'q')
-      IID = Intrinsic::x86_avx512_vpshld_q_128;
-    else if (VecWidth == 128 && Name[7] == 'd')
-      IID = Intrinsic::x86_avx512_vpshld_d_128;
-    else if (VecWidth == 128 && Name[7] == 'w')
-      IID = Intrinsic::x86_avx512_vpshld_w_128;
-    else if (VecWidth == 256 && Name[7] == 'q')
-      IID = Intrinsic::x86_avx512_vpshld_q_256;
-    else if (VecWidth == 256 && Name[7] == 'd')
-      IID = Intrinsic::x86_avx512_vpshld_d_256;
-    else if (VecWidth == 256 && Name[7] == 'w')
-      IID = Intrinsic::x86_avx512_vpshld_w_256;
-    else if (VecWidth == 512 && Name[7] == 'q')
-      IID = Intrinsic::x86_avx512_vpshld_q_512;
-    else if (VecWidth == 512 && Name[7] == 'd')
-      IID = Intrinsic::x86_avx512_vpshld_d_512;
-    else if (VecWidth == 512 && Name[7] == 'w')
-      IID = Intrinsic::x86_avx512_vpshld_w_512;
-    else
-      llvm_unreachable("Unexpected intrinsic");
-  } else if (Name.startswith("vpshrd.")) {
-    if (VecWidth == 128 && Name[7] == 'q')
-      IID = Intrinsic::x86_avx512_vpshrd_q_128;
-    else if (VecWidth == 128 && Name[7] == 'd')
-      IID = Intrinsic::x86_avx512_vpshrd_d_128;
-    else if (VecWidth == 128 && Name[7] == 'w')
-      IID = Intrinsic::x86_avx512_vpshrd_w_128;
-    else if (VecWidth == 256 && Name[7] == 'q')
-      IID = Intrinsic::x86_avx512_vpshrd_q_256;
-    else if (VecWidth == 256 && Name[7] == 'd')
-      IID = Intrinsic::x86_avx512_vpshrd_d_256;
-    else if (VecWidth == 256 && Name[7] == 'w')
-      IID = Intrinsic::x86_avx512_vpshrd_w_256;
-    else if (VecWidth == 512 && Name[7] == 'q')
-      IID = Intrinsic::x86_avx512_vpshrd_q_512;
-    else if (VecWidth == 512 && Name[7] == 'd')
-      IID = Intrinsic::x86_avx512_vpshrd_d_512;
-    else if (VecWidth == 512 && Name[7] == 'w')
-      IID = Intrinsic::x86_avx512_vpshrd_w_512;
-    else
-      llvm_unreachable("Unexpected intrinsic");
-  } else if (Name.startswith("prorv.")) {
-    if (VecWidth == 128 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prorv_d_128;
-    else if (VecWidth == 256 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prorv_d_256;
-    else if (VecWidth == 512 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prorv_d_512;
-    else if (VecWidth == 128 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prorv_q_128;
-    else if (VecWidth == 256 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prorv_q_256;
-    else if (VecWidth == 512 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prorv_q_512;
-    else
-      llvm_unreachable("Unexpected intrinsic");
-  } else if (Name.startswith("prolv.")) {
-    if (VecWidth == 128 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prolv_d_128;
-    else if (VecWidth == 256 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prolv_d_256;
-    else if (VecWidth == 512 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prolv_d_512;
-    else if (VecWidth == 128 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prolv_q_128;
-    else if (VecWidth == 256 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prolv_q_256;
-    else if (VecWidth == 512 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prolv_q_512;
-    else
-      llvm_unreachable("Unexpected intrinsic");
-  } else if (Name.startswith("pror.")) {
-    if (VecWidth == 128 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_pror_d_128;
-    else if (VecWidth == 256 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_pror_d_256;
-    else if (VecWidth == 512 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_pror_d_512;
-    else if (VecWidth == 128 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_pror_q_128;
-    else if (VecWidth == 256 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_pror_q_256;
-    else if (VecWidth == 512 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_pror_q_512;
-    else
-      llvm_unreachable("Unexpected intrinsic");
-  } else if (Name.startswith("prol.")) {
-    if (VecWidth == 128 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prol_d_128;
-    else if (VecWidth == 256 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prol_d_256;
-    else if (VecWidth == 512 && EltWidth == 32)
-      IID = Intrinsic::x86_avx512_prol_d_512;
-    else if (VecWidth == 128 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prol_q_128;
-    else if (VecWidth == 256 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prol_q_256;
-    else if (VecWidth == 512 && EltWidth == 64)
-      IID = Intrinsic::x86_avx512_prol_q_512;
+  } else if (Name.startswith("pmultishift.qb.")) {
+    if (VecWidth == 128)
+      IID = Intrinsic::x86_avx512_pmultishift_qb_128;
+    else if (VecWidth == 256)
+      IID = Intrinsic::x86_avx512_pmultishift_qb_256;
+    else if (VecWidth == 512)
+      IID = Intrinsic::x86_avx512_pmultishift_qb_512;
     else
       llvm_unreachable("Unexpected intrinsic");
   } else
@@ -1654,46 +1749,44 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         C = ConstantInt::getNullValue(Builder.getInt16Ty());
       Rep = Builder.CreateICmpEQ(Rep, C);
       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
-    } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
-      Type *I32Ty = Type::getInt32Ty(C);
-      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
-                                                 ConstantInt::get(I32Ty, 0));
-      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
-                                                 ConstantInt::get(I32Ty, 0));
-      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
-                                        Builder.CreateFAdd(Elt0, Elt1),
-                                        ConstantInt::get(I32Ty, 0));
-    } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
+    } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
+                         Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
+                         Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
+                         Name == "sse.div.ss" || Name == "sse2.div.sd")) {
       Type *I32Ty = Type::getInt32Ty(C);
       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
                                                  ConstantInt::get(I32Ty, 0));
       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
                                                  ConstantInt::get(I32Ty, 0));
-      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
-                                        Builder.CreateFSub(Elt0, Elt1),
-                                        ConstantInt::get(I32Ty, 0));
-    } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
-      Type *I32Ty = Type::getInt32Ty(C);
-      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
-                                                 ConstantInt::get(I32Ty, 0));
-      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
-                                                 ConstantInt::get(I32Ty, 0));
-      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
-                                        Builder.CreateFMul(Elt0, Elt1),
-                                        ConstantInt::get(I32Ty, 0));
-    } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
-      Type *I32Ty = Type::getInt32Ty(C);
-      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
-                                                 ConstantInt::get(I32Ty, 0));
-      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
-                                                 ConstantInt::get(I32Ty, 0));
-      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
-                                        Builder.CreateFDiv(Elt0, Elt1),
+      Value *EltOp;
+      if (Name.contains(".add."))
+        EltOp = Builder.CreateFAdd(Elt0, Elt1);
+      else if (Name.contains(".sub."))
+        EltOp = Builder.CreateFSub(Elt0, Elt1);
+      else if (Name.contains(".mul."))
+        EltOp = Builder.CreateFMul(Elt0, Elt1);
+      else
+        EltOp = Builder.CreateFDiv(Elt0, Elt1);
+      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
                                         ConstantInt::get(I32Ty, 0));
     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
       bool CmpEq = Name[16] == 'e';
       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
+    } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
+      Type *OpTy = CI->getArgOperand(0)->getType();
+      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+      Intrinsic::ID IID;
+      switch (VecWidth) {
+      default: llvm_unreachable("Unexpected intrinsic");
+      case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
+      case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
+      case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
+      }
+
+      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+                               { CI->getOperand(0), CI->getArgOperand(1) });
+      Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
       Type *OpTy = CI->getArgOperand(0)->getType();
       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
@@ -1948,6 +2041,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
       Rep = Builder.CreateOr(Sel0, Sel1);
+    } else if (IsX86 && (Name.startswith("xop.vprot") ||
+                         Name.startswith("avx512.prol") ||
+                         Name.startswith("avx512.mask.prol"))) {
+      Rep = upgradeX86Rotate(Builder, *CI, false);
+    } else if (IsX86 && (Name.startswith("avx512.pror") ||
+                         Name.startswith("avx512.mask.pror"))) {
+      Rep = upgradeX86Rotate(Builder, *CI, true);
+    } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
+                         Name.startswith("avx512.mask.vpshld") ||
+                         Name.startswith("avx512.maskz.vpshld"))) {
+      bool ZeroMask = Name[11] == 'z';
+      Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
+    } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
+                         Name.startswith("avx512.mask.vpshrd") ||
+                         Name.startswith("avx512.maskz.vpshrd"))) {
+      bool ZeroMask = Name[11] == 'z';
+      Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
     } else if (IsX86 && Name == "sse42.crc32.64.8") {
       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
                                                Intrinsic::x86_sse42_crc32_32_8);
@@ -2059,6 +2169,24 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       if (CI->getNumArgOperands() == 3)
         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
                             CI->getArgOperand(1));
+    } else if (IsX86 && (Name.startswith("sse2.padds.") ||
+                         Name.startswith("sse2.psubs.") ||
+                         Name.startswith("avx2.padds.") ||
+                         Name.startswith("avx2.psubs.") ||
+                         Name.startswith("avx512.padds.") ||
+                         Name.startswith("avx512.psubs.") ||
+                         Name.startswith("avx512.mask.padds.") ||
+                         Name.startswith("avx512.mask.psubs."))) {
+      bool IsAdd = Name.contains(".padds");
+      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
+    } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
+                         Name.startswith("sse2.psubus.") ||
+                         Name.startswith("avx2.paddus.") ||
+                         Name.startswith("avx2.psubus.") ||
+                         Name.startswith("avx512.mask.paddus.") ||
+                         Name.startswith("avx512.mask.psubus."))) {
+      bool IsAdd = Name.contains(".paddus");
+      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
                                       CI->getArgOperand(1),
@@ -2376,24 +2504,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                           CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
-      Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
-      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
-                          CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
-      Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
-                              CI->getArgOperand(1));
-      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
-                          CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
-      Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
-      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
-                          CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
-      Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
-      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
-                          CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
+    } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
+                         Name.startswith("avx512.mask.pand."))) {
       VectorType *FTy = cast<VectorType>(CI->getType());
       VectorType *ITy = VectorType::getInteger(FTy);
       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2401,7 +2513,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Rep = Builder.CreateBitCast(Rep, FTy);
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                           CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
+    } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
+                         Name.startswith("avx512.mask.pandn."))) {
       VectorType *FTy = cast<VectorType>(CI->getType());
       VectorType *ITy = VectorType::getInteger(FTy);
       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
@@ -2410,7 +2523,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Rep = Builder.CreateBitCast(Rep, FTy);
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                           CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
+    } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
+                         Name.startswith("avx512.mask.por."))) {
       VectorType *FTy = cast<VectorType>(CI->getType());
       VectorType *ITy = VectorType::getInteger(FTy);
       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2418,7 +2532,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Rep = Builder.CreateBitCast(Rep, FTy);
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                           CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
+    } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
+                         Name.startswith("avx512.mask.pxor."))) {
       VectorType *FTy = cast<VectorType>(CI->getType());
       VectorType *ITy = VectorType::getInteger(FTy);
       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2502,26 +2617,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       }
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                           CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
+    } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
+                         Name.startswith("avx512.mask.min.p")) &&
                Name.drop_front(18) == ".512") {
-      Intrinsic::ID IID;
-      if (Name[17] == 's')
-        IID = Intrinsic::x86_avx512_max_ps_512;
-      else
-        IID = Intrinsic::x86_avx512_max_pd_512;
-
-      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
-                               { CI->getArgOperand(0), CI->getArgOperand(1),
-                                 CI->getArgOperand(4) });
-      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
-                          CI->getArgOperand(2));
-    } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
-               Name.drop_front(18) == ".512") {
-      Intrinsic::ID IID;
-      if (Name[17] == 's')
-        IID = Intrinsic::x86_avx512_min_ps_512;
-      else
-        IID = Intrinsic::x86_avx512_min_pd_512;
+      bool IsDouble = Name[17] == 'd';
+      bool IsMin = Name[13] == 'i';
+      static const Intrinsic::ID MinMaxTbl[2][2] = {
+        { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
+        { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
+      };
+      Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
 
       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
                                { CI->getArgOperand(0), CI->getArgOperand(1),
@@ -3065,62 +3170,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                          Name.startswith("avx512.maskz.vpermt2var."))) {
       bool ZeroMask = Name[11] == 'z';
       bool IndexForm = Name[17] == 'i';
-      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
-      unsigned EltWidth = CI->getType()->getScalarSizeInBits();
-      bool IsFloat = CI->getType()->isFPOrFPVectorTy();
-      Intrinsic::ID IID;
-      if (VecWidth == 128 && EltWidth == 32 && IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
-      else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_d_128;
-      else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
-      else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_q_128;
-      else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
-      else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_d_256;
-      else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
-      else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_q_256;
-      else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
-      else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_d_512;
-      else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
-      else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
-        IID = Intrinsic::x86_avx512_vpermi2var_q_512;
-      else if (VecWidth == 128 && EltWidth == 16)
-        IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
-      else if (VecWidth == 256 && EltWidth == 16)
-        IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
-      else if (VecWidth == 512 && EltWidth == 16)
-        IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
-      else if (VecWidth == 128 && EltWidth == 8)
-        IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
-      else if (VecWidth == 256 && EltWidth == 8)
-        IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
-      else if (VecWidth == 512 && EltWidth == 8)
-        IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
-      else
-        llvm_unreachable("Unexpected intrinsic");
-
-      Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
-                        CI->getArgOperand(2) };
-
-      // If this isn't index form we need to swap operand 0 and 1.
-      if (!IndexForm)
-        std::swap(Args[0], Args[1]);
-
-      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
-                               Args);
-      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
-                                 : Builder.CreateBitCast(CI->getArgOperand(1),
-                                                         CI->getType());
-      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+      Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
                          Name.startswith("avx512.maskz.vpdpbusd.") ||
                          Name.startswith("avx512.mask.vpdpbusds.") ||
@@ -3181,6 +3231,39 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
                                  : CI->getArgOperand(0);
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+    } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
+                         Name == "addcarry.u32" || Name == "addcarry.u64" ||
+                         Name == "subborrow.u32" || Name == "subborrow.u64")) {
+      Intrinsic::ID IID;
+      if (Name[0] == 'a' && Name.back() == '2')
+        IID = Intrinsic::x86_addcarry_32;
+      else if (Name[0] == 'a' && Name.back() == '4')
+        IID = Intrinsic::x86_addcarry_64;
+      else if (Name[0] == 's' && Name.back() == '2')
+        IID = Intrinsic::x86_subborrow_32;
+      else if (Name[0] == 's' && Name.back() == '4')
+        IID = Intrinsic::x86_subborrow_64;
+      else
+        llvm_unreachable("Unexpected intrinsic");
+
+      // Make a call with 3 operands.
+      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
+                        CI->getArgOperand(2)};
+      Value *NewCall = Builder.CreateCall(
+                                Intrinsic::getDeclaration(CI->getModule(), IID),
+                                Args);
+
+      // Extract the second result and store it.
+      Value *Data = Builder.CreateExtractValue(NewCall, 1);
+      // Cast the pointer to the right type.
+      Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
+                                 llvm::PointerType::getUnqual(Data->getType()));
+      Builder.CreateAlignedStore(Data, Ptr, 1);
+      // Replace the original call result with the first result of the new call.
+      Value *CF = Builder.CreateExtractValue(NewCall, 0);
+
+      CI->replaceAllUsesWith(CF);
+      Rep = nullptr;
     } else if (IsX86 && Name.startswith("avx512.mask.") &&
                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
       // Rep will be updated by the call in the condition.
@@ -3356,6 +3439,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     break;
   }
 
+  case Intrinsic::x86_rdtscp: {
+    // This used to take 1 arguments. If we have no arguments, it is already
+    // upgraded.
+    if (CI->getNumOperands() == 0)
+      return;
+
+    NewCall = Builder.CreateCall(NewFn);
+    // Extract the second result and store it.
+    Value *Data = Builder.CreateExtractValue(NewCall, 1);
+    // Cast the pointer to the right type.
+    Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
+                                 llvm::PointerType::getUnqual(Data->getType()));
+    Builder.CreateAlignedStore(Data, Ptr, 1);
+    // Replace the original call result with the first result of the new call.
+    Value *TSC = Builder.CreateExtractValue(NewCall, 0);
+
+    std::string Name = CI->getName();
+    if (!Name.empty()) {
+      CI->setName(Name + ".old");
+      NewCall->setName(Name);
+    }
+    CI->replaceAllUsesWith(TSC);
+    CI->eraseFromParent();
+    return;
+  }
+
   case Intrinsic::x86_sse41_insertps:
   case Intrinsic::x86_sse41_dppd:
   case Intrinsic::x86_sse41_dpps:
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
index 7c3e5862d1cd..375924360dda 100644
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -135,9 +135,10 @@ const Module *BasicBlock::getModule() const {
   return getParent()->getParent();
 }
 
-const TerminatorInst *BasicBlock::getTerminator() const {
-  if (InstList.empty()) return nullptr;
-  return dyn_cast<TerminatorInst>(&InstList.back());
+const Instruction *BasicBlock::getTerminator() const {
+  if (InstList.empty() || !InstList.back().isTerminator())
+    return nullptr;
+  return &InstList.back();
 }
 
 const CallInst *BasicBlock::getTerminatingMustTailCall() const {
@@ -205,10 +206,8 @@ const Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() const {
     if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
       continue;
 
-    if (auto *II = dyn_cast<IntrinsicInst>(&I))
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-          II->getIntrinsicID() == Intrinsic::lifetime_end)
-        continue;
+    if (I.isLifetimeStartOrEnd())
+      continue;
 
     return &I;
   }
@@ -259,6 +258,14 @@ const BasicBlock *BasicBlock::getUniquePredecessor() const {
   return PredBB;
 }
 
+bool BasicBlock::hasNPredecessors(unsigned N) const {
+  return hasNItems(pred_begin(this), pred_end(this), N);
+}
+
+bool BasicBlock::hasNPredecessorsOrMore(unsigned N) const {
+  return hasNItemsOrMore(pred_begin(this), pred_end(this), N);
+}
+
 const BasicBlock *BasicBlock::getSingleSuccessor() const {
   succ_const_iterator SI = succ_begin(this), E = succ_end(this);
   if (SI == E) return nullptr; // no successors
@@ -384,7 +391,7 @@ bool BasicBlock::isLegalToHoistInto() const {
   assert(Term->getNumSuccessors() > 0);
 
   // Instructions should not be hoisted across exception handling boundaries.
-  return !Term->isExceptional();
+  return !Term->isExceptionalTerminator();
 }
 
 /// This splits a basic block into two at the specified
@@ -437,12 +444,12 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
 }
 
 void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
-  TerminatorInst *TI = getTerminator();
+  Instruction *TI = getTerminator();
   if (!TI)
     // Cope with being called on a BasicBlock that doesn't have a terminator
     // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
     return;
-  for (BasicBlock *Succ : TI->successors()) {
+  for (BasicBlock *Succ : successors(TI)) {
     // N.B. Succ might not be a complete BasicBlock, so don't assume
     // that it ends with a non-phi instruction.
     for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
@@ -468,7 +475,7 @@ const LandingPadInst *BasicBlock::getLandingPadInst() const {
 }
 
 Optional<uint64_t> BasicBlock::getIrrLoopHeaderWeight() const {
-  const TerminatorInst *TI = getTerminator();
+  const Instruction *TI = getTerminator();
   if (MDNode *MDIrrLoopHeader =
       TI->getMetadata(LLVMContext::MD_irr_loop)) {
     MDString *MDName = cast<MDString>(MDIrrLoopHeader->getOperand(0));
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index 90a8366d1696..57de6b042303 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -916,13 +916,14 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
   return ConstantVector::get(Result);
 }
 
-
-Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
-                                              Constant *C1, Constant *C2) {
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
+                                              Constant *C2) {
   assert(Instruction::isBinaryOp(Opcode) && "Non-binary instruction detected");
 
-  // Handle UndefValue up front.
-  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+  // Handle scalar UndefValue. Vectors are always evaluated per element.
+  bool HasScalarUndef = !C1->getType()->isVectorTy() &&
+                        (isa<UndefValue>(C1) || isa<UndefValue>(C2));
+  if (HasScalarUndef) {
     switch (static_cast<Instruction::BinaryOps>(Opcode)) {
     case Instruction::Xor:
       if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
@@ -1024,9 +1025,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     }
   }
 
-  // At this point neither constant should be an UndefValue.
-  assert(!isa<UndefValue>(C1) && !isa<UndefValue>(C2) &&
-         "Unexpected UndefValue");
+  // Neither constant should be UndefValue, unless these are vector constants.
+  assert(!HasScalarUndef && "Unexpected UndefValue");
 
   // Handle simplifications when the RHS is a constant int.
   if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
@@ -1218,7 +1218,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       }
     }
   } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
-    // Perform elementwise folding.
+    // Fold each element and create a vector constant from those constants.
     SmallVector<Constant*, 16> Result;
     Type *Ty = IntegerType::get(VTy->getContext(), 32);
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
@@ -2052,7 +2052,7 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
 static bool isIndexInRangeOfArrayType(uint64_t NumElements,
                                       const ConstantInt *CI) {
   // We cannot bounds check the index if it doesn't fit in an int64_t.
-  if (CI->getValue().getActiveBits() > 64)
+  if (CI->getValue().getMinSignedBits() > 64)
     return false;
 
   // A negative index or an index past the end of our sequential type is
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 2351e7e4a389..d36967fdcfe1 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -184,18 +184,15 @@ bool Constant::isNotMinSignedValue() const {
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
     return !CFP->getValueAPF().bitcastToAPInt().isMinSignedValue();
 
-  // Check for constant vectors which are splats of INT_MIN values.
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
-    if (Constant *Splat = CV->getSplatValue())
-      return Splat->isNotMinSignedValue();
-
-  // Check for constant vectors which are splats of INT_MIN values.
-  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) {
-    if (CV->isSplat()) {
-      if (CV->getElementType()->isFloatingPointTy())
-        return !CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue();
-      return !CV->getElementAsAPInt(0).isMinSignedValue();
+  // Check that vectors don't contain INT_MIN
+  if (this->getType()->isVectorTy()) {
+    unsigned NumElts = this->getType()->getVectorNumElements();
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Elt = this->getAggregateElement(i);
+      if (!Elt || !Elt->isNotMinSignedValue())
+        return false;
     }
+    return true;
   }
 
   // It *may* contain INT_MIN, we can't tell.
@@ -353,8 +350,12 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
 
 Constant *Constant::getAggregateElement(Constant *Elt) const {
   assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt)) {
+    // Check if the constant fits into an uint64_t.
+    if (CI->getValue().getActiveBits() > 64)
+      return nullptr;
     return getAggregateElement(CI->getZExtValue());
+  }
   return nullptr;
 }
 
@@ -722,9 +723,9 @@ Constant *ConstantFP::get(Type *Ty, StringRef Str) {
   return C;
 }
 
-Constant *ConstantFP::getNaN(Type *Ty, bool Negative, unsigned Type) {
+Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) {
   const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
-  APFloat NaN = APFloat::getNaN(Semantics, Negative, Type);
+  APFloat NaN = APFloat::getNaN(Semantics, Negative, Payload);
   Constant *C = get(Ty->getContext(), NaN);
 
   if (VectorType *VTy = dyn_cast<VectorType>(Ty))
@@ -733,6 +734,28 @@ Constant *ConstantFP::getNaN(Type *Ty, bool Negative, unsigned Type) {
   return C;
 }
 
+Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+  APFloat NaN = APFloat::getQNaN(Semantics, Negative, Payload);
+  Constant *C = get(Ty->getContext(), NaN);
+  
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+  
+  return C;
+}
+
+Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
+  APFloat NaN = APFloat::getSNaN(Semantics, Negative, Payload);
+  Constant *C = get(Ty->getContext(), NaN);
+  
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+  
+  return C;
+}
+
 Constant *ConstantFP::getNegativeZero(Type *Ty) {
   const fltSemantics &Semantics = *TypeToFloatSemantics(Ty->getScalarType());
   APFloat NegZero = APFloat::getZero(Semantics, /*Negative=*/true);
@@ -940,7 +963,7 @@ ConstantAggregate::ConstantAggregate(CompositeType *T, ValueTy VT,
                                      ArrayRef<Constant *> V)
     : Constant(T, VT, OperandTraits<ConstantAggregate>::op_end(this) - V.size(),
                V.size()) {
-  std::copy(V.begin(), V.end(), op_begin());
+  llvm::copy(V, op_begin());
 
   // Check that types match, unless this is an opaque struct.
   if (auto *ST = dyn_cast<StructType>(T))
@@ -1780,6 +1803,36 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
   return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
 }
 
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C, unsigned Flags, 
+                            Type *OnlyIfReducedTy) {
+  // Check the operands for consistency first.
+  assert(Instruction::isUnaryOp(Opcode) &&
+         "Invalid opcode in unary constant expression");
+
+#ifndef NDEBUG
+  switch (Opcode) {
+  case Instruction::FNeg:
+    assert(C->getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  default:
+    break;
+  }
+#endif
+
+  // TODO: Try to constant fold operation.
+
+  if (OnlyIfReducedTy == C->getType())
+    return nullptr;
+
+  Constant *ArgVec[] = { C };
+  ConstantExprKeyType Key(Opcode, ArgVec, 0, Flags);
+
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(C->getType(), Key);
+}
+
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
                             unsigned Flags, Type *OnlyIfReducedTy) {
   // Check the operands for consistency first.
@@ -1946,9 +1999,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
   if (!Ty)
     Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType();
   else
-    assert(
-        Ty ==
-        cast<PointerType>(C->getType()->getScalarType())->getContainedType(0u));
+    assert(Ty ==
+           cast<PointerType>(C->getType()->getScalarType())->getElementType());
 
   if (Constant *FC =
           ConstantFoldGetElementPtr(Ty, C, InBounds, InRangeIndex, Idxs))
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index e9f31e4ded68..eac171397084 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -529,7 +529,9 @@ struct ConstantExprKeyType {
   ConstantExpr *create(TypeClass *Ty) const {
     switch (Opcode) {
     default:
-      if (Instruction::isCast(Opcode))
+      if (Instruction::isCast(Opcode) ||
+          (Opcode >= Instruction::UnaryOpsBegin &&
+           Opcode < Instruction::UnaryOpsEnd))
         return new UnaryConstantExpr(Opcode, Ops[0], Ty);
       if ((Opcode >= Instruction::BinaryOpsBegin &&
            Opcode < Instruction::BinaryOpsEnd))
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index bea4dee15c13..815797f4b7ea 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -15,8 +15,8 @@
 #include "llvm-c/Core.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -107,6 +107,14 @@ void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback,
   unwrap(C)->setYieldCallback(YieldCallback, OpaqueHandle);
 }
 
+LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C) {
+  return unwrap(C)->shouldDiscardValueNames();
+}
+
+void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard) {
+  unwrap(C)->setDiscardValueNames(Discard);
+}
+
 void LLVMContextDispose(LLVMContextRef C) {
   delete unwrap(C);
 }
@@ -706,6 +714,10 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->isOpaque();
 }
 
+LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isLiteral();
+}
+
 LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
   return wrap(unwrap(M)->getTypeByName(Name));
 }
@@ -868,6 +880,38 @@ void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) {
   unwrap<Instruction>(Inst)->setMetadata(KindID, N);
 }
 
+struct LLVMOpaqueValueMetadataEntry {
+  unsigned Kind;
+  LLVMMetadataRef Metadata;
+};
+
+using MetadataEntries = SmallVectorImpl<std::pair<unsigned, MDNode *>>;
+static LLVMValueMetadataEntry *
+llvm_getMetadata(size_t *NumEntries,
+                 llvm::function_ref<void(MetadataEntries &)> AccessMD) {
+  SmallVector<std::pair<unsigned, MDNode *>, 8> MVEs;
+  AccessMD(MVEs);
+
+  LLVMOpaqueValueMetadataEntry *Result =
+  static_cast<LLVMOpaqueValueMetadataEntry *>(
+                                              safe_malloc(MVEs.size() * sizeof(LLVMOpaqueValueMetadataEntry)));
+  for (unsigned i = 0; i < MVEs.size(); ++i) {
+    const auto &ModuleFlag = MVEs[i];
+    Result[i].Kind = ModuleFlag.first;
+    Result[i].Metadata = wrap(ModuleFlag.second);
+  }
+  *NumEntries = MVEs.size();
+  return Result;
+}
+
+LLVMValueMetadataEntry *
+LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Value,
+                                               size_t *NumEntries) {
+  return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) {
+    unwrap<Instruction>(Value)->getAllMetadata(Entries);
+  });
+}
+
 /*--.. Conversion functions ................................................--*/
 
 #define LLVM_DEFINE_VALUE_CAST(name)                                       \
@@ -1065,6 +1109,54 @@ unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V) {
   return cast<MDNode>(MD->getMetadata())->getNumOperands();
 }
 
+LLVMNamedMDNodeRef LLVMGetFirstNamedMetadata(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::named_metadata_iterator I = Mod->named_metadata_begin();
+  if (I == Mod->named_metadata_end())
+    return nullptr;
+  return wrap(&*I);
+}
+
+LLVMNamedMDNodeRef LLVMGetLastNamedMetadata(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::named_metadata_iterator I = Mod->named_metadata_end();
+  if (I == Mod->named_metadata_begin())
+    return nullptr;
+  return wrap(&*--I);
+}
+
+LLVMNamedMDNodeRef LLVMGetNextNamedMetadata(LLVMNamedMDNodeRef NMD) {
+  NamedMDNode *NamedNode = unwrap<NamedMDNode>(NMD);
+  Module::named_metadata_iterator I(NamedNode);
+  if (++I == NamedNode->getParent()->named_metadata_end())
+    return nullptr;
+  return wrap(&*I);
+}
+
+LLVMNamedMDNodeRef LLVMGetPreviousNamedMetadata(LLVMNamedMDNodeRef NMD) {
+  NamedMDNode *NamedNode = unwrap<NamedMDNode>(NMD);
+  Module::named_metadata_iterator I(NamedNode);
+  if (I == NamedNode->getParent()->named_metadata_begin())
+    return nullptr;
+  return wrap(&*--I);
+}
+
+LLVMNamedMDNodeRef LLVMGetNamedMetadata(LLVMModuleRef M,
+                                        const char *Name, size_t NameLen) {
+  return wrap(unwrap(M)->getNamedMetadata(StringRef(Name, NameLen)));
+}
+
+LLVMNamedMDNodeRef LLVMGetOrInsertNamedMetadata(LLVMModuleRef M,
+                                                const char *Name, size_t NameLen) {
+  return wrap(unwrap(M)->getOrInsertNamedMetadata({Name, NameLen}));
+}
+
+const char *LLVMGetNamedMetadataName(LLVMNamedMDNodeRef NMD, size_t *NameLen) {
+  NamedMDNode *NamedNode = unwrap<NamedMDNode>(NMD);
+  *NameLen = NamedNode->getName().size();
+  return NamedNode->getName().data();
+}
+
 void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest) {
   auto *MD = cast<MetadataAsValue>(unwrap(V));
   if (auto *MDV = dyn_cast<ValueAsMetadata>(MD->getMetadata())) {
@@ -1105,6 +1197,78 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name,
   N->addOperand(extractMDNode(unwrap<MetadataAsValue>(Val)));
 }
 
+const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) {
+  if (!Length) return nullptr;
+  StringRef S;
+  if (const auto *I = unwrap<Instruction>(Val)) {
+    S = I->getDebugLoc()->getDirectory();
+  } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+    GV->getDebugInfo(GVEs);
+    if (GVEs.size())
+      if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
+        S = DGV->getDirectory();
+  } else if (const auto *F = unwrap<Function>(Val)) {
+    if (const DISubprogram *DSP = F->getSubprogram())
+      S = DSP->getDirectory();
+  } else {
+    assert(0 && "Expected Instruction, GlobalVariable or Function");
+    return nullptr;
+  }
+  *Length = S.size();
+  return S.data();
+}
+
+const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) {
+  if (!Length) return nullptr;
+  StringRef S;
+  if (const auto *I = unwrap<Instruction>(Val)) {
+    S = I->getDebugLoc()->getFilename();
+  } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+    GV->getDebugInfo(GVEs);
+    if (GVEs.size())
+      if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
+        S = DGV->getFilename();
+  } else if (const auto *F = unwrap<Function>(Val)) {
+    if (const DISubprogram *DSP = F->getSubprogram())
+      S = DSP->getFilename();
+  } else {
+    assert(0 && "Expected Instruction, GlobalVariable or Function");
+    return nullptr;
+  }
+  *Length = S.size();
+  return S.data();
+}
+
+unsigned LLVMGetDebugLocLine(LLVMValueRef Val) {
+  unsigned L = 0;
+  if (const auto *I = unwrap<Instruction>(Val)) {
+    L = I->getDebugLoc()->getLine();
+  } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+    GV->getDebugInfo(GVEs);
+    if (GVEs.size())
+      if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
+        L = DGV->getLine();
+  } else if (const auto *F = unwrap<Function>(Val)) {
+    if (const DISubprogram *DSP = F->getSubprogram())
+      L = DSP->getLine();
+  } else {
+    assert(0 && "Expected Instruction, GlobalVariable or Function");
+    return -1;
+  }
+  return L;
+}
+
+unsigned LLVMGetDebugLocColumn(LLVMValueRef Val) {
+  unsigned C = 0;
+  if (const auto *I = unwrap<Instruction>(Val))
+    if (const auto &L = I->getDebugLoc())
+      C = L->getColumn();
+  return C;
+}
+
 /*--.. Operations on scalar constants ......................................--*/
 
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -1453,17 +1617,21 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices) {
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getGetElementPtr(
-      nullptr, unwrap<Constant>(ConstantVal), IdxList));
+  Constant *Val = unwrap<Constant>(ConstantVal);
+  Type *Ty =
+      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  return wrap(ConstantExpr::getGetElementPtr(Ty, Val, IdxList));
 }
 
 LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
                                   LLVMValueRef *ConstantIndices,
                                   unsigned NumIndices) {
-  Constant* Val = unwrap<Constant>(ConstantVal);
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getInBoundsGetElementPtr(nullptr, Val, IdxList));
+  Constant *Val = unwrap<Constant>(ConstantVal);
+  Type *Ty =
+      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(Ty, Val, IdxList));
 }
 
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
@@ -1792,6 +1960,10 @@ void LLVMSetUnnamedAddr(LLVMValueRef Global, LLVMBool HasUnnamedAddr) {
                      : GlobalValue::UnnamedAddr::None);
 }
 
+LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global) {
+  return wrap(unwrap<GlobalValue>(Global)->getValueType());
+}
+
 /*--.. Operations on global variables, load and store instructions .........--*/
 
 unsigned LLVMGetAlignment(LLVMValueRef V) {
@@ -1824,6 +1996,49 @@ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
         "only GlobalValue, AllocaInst, LoadInst and StoreInst have alignment");
 }
 
+LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value,
+                                                  size_t *NumEntries) {
+  return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) {
+    if (Instruction *Instr = dyn_cast<Instruction>(unwrap(Value))) {
+      Instr->getAllMetadata(Entries);
+    } else {
+      unwrap<GlobalObject>(Value)->getAllMetadata(Entries);
+    }
+  });
+}
+
+unsigned LLVMValueMetadataEntriesGetKind(LLVMValueMetadataEntry *Entries,
+                                         unsigned Index) {
+  LLVMOpaqueValueMetadataEntry MVE =
+      static_cast<LLVMOpaqueValueMetadataEntry>(Entries[Index]);
+  return MVE.Kind;
+}
+
+LLVMMetadataRef
+LLVMValueMetadataEntriesGetMetadata(LLVMValueMetadataEntry *Entries,
+                                    unsigned Index) {
+  LLVMOpaqueValueMetadataEntry MVE =
+      static_cast<LLVMOpaqueValueMetadataEntry>(Entries[Index]);
+  return MVE.Metadata;
+}
+
+void LLVMDisposeValueMetadataEntries(LLVMValueMetadataEntry *Entries) {
+  free(Entries);
+}
+
+void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind,
+                           LLVMMetadataRef MD) {
+  unwrap<GlobalObject>(Global)->setMetadata(Kind, unwrap<MDNode>(MD));
+}
+
+void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind) {
+  unwrap<GlobalObject>(Global)->eraseMetadata(Kind);
+}
+
+void LLVMGlobalClearMetadata(LLVMValueRef Global) {
+  unwrap<GlobalObject>(Global)->clearMetadata();
+}
+
 /*--.. Operations on global variables ......................................--*/
 
 LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
@@ -2076,6 +2291,50 @@ unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
   return 0;
 }
 
+static Intrinsic::ID llvm_map_to_intrinsic_id(unsigned ID) {
+  assert(ID < llvm::Intrinsic::num_intrinsics && "Intrinsic ID out of range");
+  return llvm::Intrinsic::ID(ID);
+}
+
+LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod,
+                                         unsigned ID,
+                                         LLVMTypeRef *ParamTypes,
+                                         size_t ParamCount) {
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+  auto IID = llvm_map_to_intrinsic_id(ID);
+  return wrap(llvm::Intrinsic::getDeclaration(unwrap(Mod), IID, Tys));
+}
+
+const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength) {
+  auto IID = llvm_map_to_intrinsic_id(ID);
+  auto Str = llvm::Intrinsic::getName(IID);
+  *NameLength = Str.size();
+  return Str.data();
+}
+
+LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID,
+                                 LLVMTypeRef *ParamTypes, size_t ParamCount) {
+  auto IID = llvm_map_to_intrinsic_id(ID);
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+  return wrap(llvm::Intrinsic::getType(*unwrap(Ctx), IID, Tys));
+}
+
+const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
+                                            LLVMTypeRef *ParamTypes,
+                                            size_t ParamCount,
+                                            size_t *NameLength) {
+  auto IID = llvm_map_to_intrinsic_id(ID);
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+  auto Str = llvm::Intrinsic::getName(IID, Tys);
+  *NameLength = Str.length();
+  return strdup(Str.c_str());
+}
+
+LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID) {
+  auto IID = llvm_map_to_intrinsic_id(ID);
+  return llvm::Intrinsic::isOverloaded(IID);
+}
+
 unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
   return unwrap<Function>(Fn)->getCallingConv();
 }
@@ -2277,6 +2536,11 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
   return wrap(&*--I);
 }
 
+LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C,
+                                                const char *Name) {
+  return wrap(llvm::BasicBlock::Create(*unwrap(C), Name));
+}
+
 LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
                                                 LLVMValueRef FnRef,
                                                 const char *Name) {
@@ -2391,47 +2655,52 @@ LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst) {
   return nullptr;
 }
 
+LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst) {
+  Instruction *I = dyn_cast<Instruction>(unwrap(Inst));
+  return (I && I->isTerminator()) ? wrap(I) : nullptr;
+}
+
 unsigned LLVMGetNumArgOperands(LLVMValueRef Instr) {
   if (FuncletPadInst *FPI = dyn_cast<FuncletPadInst>(unwrap(Instr))) {
     return FPI->getNumArgOperands();
   }
-  return CallSite(unwrap<Instruction>(Instr)).getNumArgOperands();
+  return unwrap<CallBase>(Instr)->getNumArgOperands();
 }
 
 /*--.. Call and invoke instructions ........................................--*/
 
 unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
-  return CallSite(unwrap<Instruction>(Instr)).getCallingConv();
+  return unwrap<CallBase>(Instr)->getCallingConv();
 }
 
 void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
-  return CallSite(unwrap<Instruction>(Instr))
-    .setCallingConv(static_cast<CallingConv::ID>(CC));
+  return unwrap<CallBase>(Instr)->setCallingConv(
+      static_cast<CallingConv::ID>(CC));
 }
 
 void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
                                 unsigned align) {
-  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  auto *Call = unwrap<CallBase>(Instr);
   Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align);
-  Call.addAttribute(index, AlignAttr);
+  Call->addAttribute(index, AlignAttr);
 }
 
 void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
                               LLVMAttributeRef A) {
-  CallSite(unwrap<Instruction>(C)).addAttribute(Idx, unwrap(A));
+  unwrap<CallBase>(C)->addAttribute(Idx, unwrap(A));
 }
 
 unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
                                        LLVMAttributeIndex Idx) {
-  auto CS = CallSite(unwrap<Instruction>(C));
-  auto AS = CS.getAttributes().getAttributes(Idx);
+  auto *Call = unwrap<CallBase>(C);
+  auto AS = Call->getAttributes().getAttributes(Idx);
   return AS.getNumAttributes();
 }
 
 void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
                                LLVMAttributeRef *Attrs) {
-  auto CS = CallSite(unwrap<Instruction>(C));
-  auto AS = CS.getAttributes().getAttributes(Idx);
+  auto *Call = unwrap<CallBase>(C);
+  auto AS = Call->getAttributes().getAttributes(Idx);
   for (auto A : AS)
     *Attrs++ = wrap(A);
 }
@@ -2439,30 +2708,32 @@ void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
 LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
                                               LLVMAttributeIndex Idx,
                                               unsigned KindID) {
-  return wrap(CallSite(unwrap<Instruction>(C))
-    .getAttribute(Idx, (Attribute::AttrKind)KindID));
+  return wrap(
+      unwrap<CallBase>(C)->getAttribute(Idx, (Attribute::AttrKind)KindID));
 }
 
 LLVMAttributeRef LLVMGetCallSiteStringAttribute(LLVMValueRef C,
                                                 LLVMAttributeIndex Idx,
                                                 const char *K, unsigned KLen) {
-  return wrap(CallSite(unwrap<Instruction>(C))
-    .getAttribute(Idx, StringRef(K, KLen)));
+  return wrap(unwrap<CallBase>(C)->getAttribute(Idx, StringRef(K, KLen)));
 }
 
 void LLVMRemoveCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
                                      unsigned KindID) {
-  CallSite(unwrap<Instruction>(C))
-    .removeAttribute(Idx, (Attribute::AttrKind)KindID);
+  unwrap<CallBase>(C)->removeAttribute(Idx, (Attribute::AttrKind)KindID);
 }
 
 void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
                                        const char *K, unsigned KLen) {
-  CallSite(unwrap<Instruction>(C)).removeAttribute(Idx, StringRef(K, KLen));
+  unwrap<CallBase>(C)->removeAttribute(Idx, StringRef(K, KLen));
 }
 
 LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr) {
-  return wrap(CallSite(unwrap<Instruction>(Instr)).getCalledValue());
+  return wrap(unwrap<CallBase>(Instr)->getCalledValue());
+}
+
+LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef Instr) {
+  return wrap(unwrap<CallBase>(Instr)->getFunctionType());
 }
 
 /*--.. Operations on call instructions (only) ..............................--*/
@@ -2506,15 +2777,15 @@ void LLVMSetUnwindDest(LLVMValueRef Invoke, LLVMBasicBlockRef B) {
 /*--.. Operations on terminators ...........................................--*/
 
 unsigned LLVMGetNumSuccessors(LLVMValueRef Term) {
-  return unwrap<TerminatorInst>(Term)->getNumSuccessors();
+  return unwrap<Instruction>(Term)->getNumSuccessors();
 }
 
 LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i) {
-  return wrap(unwrap<TerminatorInst>(Term)->getSuccessor(i));
+  return wrap(unwrap<Instruction>(Term)->getSuccessor(i));
 }
 
 void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block) {
-  return unwrap<TerminatorInst>(Term)->setSuccessor(i,unwrap(block));
+  return unwrap<Instruction>(Term)->setSuccessor(i, unwrap(block));
 }
 
 /*--.. Operations on branch instructions (only) ............................--*/
@@ -2584,6 +2855,8 @@ unsigned LLVMGetNumIndices(LLVMValueRef Inst) {
     return EV->getNumIndices();
   if (auto *IV = dyn_cast<InsertValueInst>(I))
     return IV->getNumIndices();
+  if (auto *CE = dyn_cast<ConstantExpr>(I))
+    return CE->getIndices().size();
   llvm_unreachable(
     "LLVMGetNumIndices applies only to extractvalue and insertvalue!");
 }
@@ -2594,6 +2867,8 @@ const unsigned *LLVMGetIndices(LLVMValueRef Inst) {
     return EV->getIndices().data();
   if (auto *IV = dyn_cast<InsertValueInst>(I))
     return IV->getIndices().data();
+  if (auto *CE = dyn_cast<ConstantExpr>(I))
+    return CE->getIndices().data();
   llvm_unreachable(
     "LLVMGetIndices applies only to extractvalue and insertvalue!");
 }
@@ -2704,9 +2979,22 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
                              LLVMValueRef *Args, unsigned NumArgs,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                              const char *Name) {
-  return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
-                                      makeArrayRef(unwrap(Args), NumArgs),
-                                      Name));
+  Value *V = unwrap(Fn);
+  FunctionType *FnT =
+      cast<FunctionType>(cast<PointerType>(V->getType())->getElementType());
+
+  return wrap(
+      unwrap(B)->CreateInvoke(FnT, unwrap(Fn), unwrap(Then), unwrap(Catch),
+                              makeArrayRef(unwrap(Args), NumArgs), Name));
+}
+
+LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+                              LLVMValueRef *Args, unsigned NumArgs,
+                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+                              const char *Name) {
+  return wrap(unwrap(B)->CreateInvoke(
+      unwrap<FunctionType>(Ty), unwrap(Fn), unwrap(Then), unwrap(Catch),
+      makeArrayRef(unwrap(Args), NumArgs), Name));
 }
 
 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3021,6 +3309,30 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
   return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
 }
 
+LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr, 
+                             LLVMValueRef Val, LLVMValueRef Len,
+                             unsigned Align) {
+  return wrap(unwrap(B)->CreateMemSet(unwrap(Ptr), unwrap(Val), unwrap(Len), Align));
+}
+
+LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B, 
+                             LLVMValueRef Dst, unsigned DstAlign,
+                             LLVMValueRef Src, unsigned SrcAlign,
+                             LLVMValueRef Size) {
+  return wrap(unwrap(B)->CreateMemCpy(unwrap(Dst), DstAlign,
+                                      unwrap(Src), SrcAlign,
+                                      unwrap(Size)));
+}
+
+LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B,
+                              LLVMValueRef Dst, unsigned DstAlign,
+                              LLVMValueRef Src, unsigned SrcAlign,
+                              LLVMValueRef Size) {
+  return wrap(unwrap(B)->CreateMemMove(unwrap(Dst), DstAlign,
+                                       unwrap(Src), SrcAlign,
+                                       unwrap(Size)));
+}
+
 LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
                              const char *Name) {
   return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), nullptr, Name));
@@ -3038,7 +3350,15 @@ LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
 
 LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
                            const char *Name) {
-  return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+  Value *V = unwrap(PointerVal);
+  PointerType *Ty = cast<PointerType>(V->getType());
+
+  return wrap(unwrap(B)->CreateLoad(Ty->getElementType(), V, Name));
+}
+
+LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                            LLVMValueRef PointerVal, const char *Name) {
+  return wrap(unwrap(B)->CreateLoad(unwrap(Ty), unwrap(PointerVal), Name));
 }
 
 LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
@@ -3093,20 +3413,50 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                           LLVMValueRef *Indices, unsigned NumIndices,
                           const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateGEP(nullptr, unwrap(Pointer), IdxList, Name));
+  Value *Val = unwrap(Pointer);
+  Type *Ty =
+      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  return wrap(unwrap(B)->CreateGEP(Ty, Val, IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                           LLVMValueRef Pointer, LLVMValueRef *Indices,
+                           unsigned NumIndices, const char *Name) {
+  ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+  return wrap(unwrap(B)->CreateGEP(unwrap(Ty), unwrap(Pointer), IdxList, Name));
 }
 
 LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                   LLVMValueRef *Indices, unsigned NumIndices,
                                   const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+  Value *Val = unwrap(Pointer);
+  Type *Ty =
+      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  return wrap(unwrap(B)->CreateInBoundsGEP(Ty, Val, IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                   LLVMValueRef Pointer, LLVMValueRef *Indices,
+                                   unsigned NumIndices, const char *Name) {
+  ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
   return wrap(
-      unwrap(B)->CreateInBoundsGEP(nullptr, unwrap(Pointer), IdxList, Name));
+      unwrap(B)->CreateInBoundsGEP(unwrap(Ty), unwrap(Pointer), IdxList, Name));
 }
 
 LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                 unsigned Idx, const char *Name) {
-  return wrap(unwrap(B)->CreateStructGEP(nullptr, unwrap(Pointer), Idx, Name));
+  Value *Val = unwrap(Pointer);
+  Type *Ty =
+      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  return wrap(unwrap(B)->CreateStructGEP(Ty, Val, Idx, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                 LLVMValueRef Pointer, unsigned Idx,
+                                 const char *Name) {
+  return wrap(
+      unwrap(B)->CreateStructGEP(unwrap(Ty), unwrap(Pointer), Idx, Name));
 }
 
 LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
@@ -3248,6 +3598,13 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
   return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
 }
 
+LLVMValueRef LLVMBuildIntCast2(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, LLVMBool IsSigned,
+                               const char *Name) {
+  return wrap(
+      unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), IsSigned, Name));
+}
+
 LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
                               LLVMTypeRef DestTy, const char *Name) {
   return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
@@ -3284,9 +3641,20 @@ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
 LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
                            LLVMValueRef *Args, unsigned NumArgs,
                            const char *Name) {
-  return wrap(unwrap(B)->CreateCall(unwrap(Fn),
-                                    makeArrayRef(unwrap(Args), NumArgs),
-                                    Name));
+  Value *V = unwrap(Fn);
+  FunctionType *FnT =
+      cast<FunctionType>(cast<PointerType>(V->getType())->getElementType());
+
+  return wrap(unwrap(B)->CreateCall(FnT, unwrap(Fn),
+                                    makeArrayRef(unwrap(Args), NumArgs), Name));
+}
+
+LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn,
+                            LLVMValueRef *Args, unsigned NumArgs,
+                            const char *Name) {
+  FunctionType *FTy = unwrap<FunctionType>(Ty);
+  return wrap(unwrap(B)->CreateCall(FTy, unwrap(Fn),
+                                    makeArrayRef(unwrap(Args), NumArgs), Name));
 }
 
 LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 5c5477f4f40f..fb81634a2868 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -139,7 +139,8 @@ DICompileUnit *DIBuilder::createCompileUnit(
     unsigned Lang, DIFile *File, StringRef Producer, bool isOptimized,
     StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
     DICompileUnit::DebugEmissionKind Kind, uint64_t DWOId,
-    bool SplitDebugInlining, bool DebugInfoForProfiling, bool GnuPubnames) {
+    bool SplitDebugInlining, bool DebugInfoForProfiling,
+    DICompileUnit::DebugNameTableKind NameTableKind, bool RangesBaseAddress) {
 
   assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
           (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
@@ -149,7 +150,8 @@ DICompileUnit *DIBuilder::createCompileUnit(
   CUNode = DICompileUnit::getDistinct(
       VMContext, Lang, File, Producer, isOptimized, Flags, RunTimeVer,
       SplitName, Kind, nullptr, nullptr, nullptr, nullptr, nullptr, DWOId,
-      SplitDebugInlining, DebugInfoForProfiling, GnuPubnames);
+      SplitDebugInlining, DebugInfoForProfiling, NameTableKind,
+      RangesBaseAddress);
 
   // Create a named metadata so that it is easier to find cu in a module.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
@@ -256,10 +258,11 @@ DIBasicType *DIBuilder::createNullPtrType() {
 }
 
 DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
-                                        unsigned Encoding) {
+                                        unsigned Encoding,
+                                        DINode::DIFlags Flags) {
   assert(!Name.empty() && "Unable to create type without name");
   return DIBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
-                          0, Encoding);
+                          0, Encoding, Flags);
 }
 
 DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
@@ -345,13 +348,10 @@ static ConstantAsMetadata *getConstantOrNull(Constant *C) {
   return nullptr;
 }
 
-DIDerivedType *DIBuilder::createVariantMemberType(DIScope *Scope, StringRef Name,
-						  DIFile *File, unsigned LineNumber,
-						  uint64_t SizeInBits,
-						  uint32_t AlignInBits,
-						  uint64_t OffsetInBits,
-						  Constant *Discriminant,
-						  DINode::DIFlags Flags, DIType *Ty) {
+DIDerivedType *DIBuilder::createVariantMemberType(
+    DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+    Constant *Discriminant, DINode::DIFlags Flags, DIType *Ty) {
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
                             LineNumber, getNonCompileUnitScope(Scope), Ty,
                             SizeInBits, AlignInBits, OffsetInBits, None, Flags,
@@ -504,11 +504,11 @@ DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
 DICompositeType *DIBuilder::createEnumerationType(
     DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
     uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
-    DIType *UnderlyingType, StringRef UniqueIdentifier, bool IsFixed) {
+    DIType *UnderlyingType, StringRef UniqueIdentifier, bool IsScoped) {
   auto *CTy = DICompositeType::get(
       VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
       getNonCompileUnitScope(Scope), UnderlyingType, SizeInBits, AlignInBits, 0,
-      IsFixed ? DINode::FlagFixedEnum : DINode::FlagZero, Elements, 0, nullptr,
+      IsScoped ? DINode::FlagEnumClass : DINode::FlagZero, Elements, 0, nullptr,
       nullptr, UniqueIdentifier);
   AllEnumTypes.push_back(CTy);
   trackIfUnresolved(CTy);
@@ -640,13 +640,13 @@ static void checkGlobalVariableScope(DIScope *Context) {
 DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
     unsigned LineNumber, DIType *Ty, bool isLocalToUnit, DIExpression *Expr,
-    MDNode *Decl, uint32_t AlignInBits) {
+    MDNode *Decl, MDTuple *templateParams, uint32_t AlignInBits) {
   checkGlobalVariableScope(Context);
 
   auto *GV = DIGlobalVariable::getDistinct(
       VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
       LineNumber, Ty, isLocalToUnit, true, cast_or_null<DIDerivedType>(Decl),
-      AlignInBits);
+      templateParams, AlignInBits);
   if (!Expr)
     Expr = createExpression();
   auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr);
@@ -657,13 +657,13 @@ DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
 DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
     unsigned LineNumber, DIType *Ty, bool isLocalToUnit, MDNode *Decl,
-    uint32_t AlignInBits) {
+    MDTuple *templateParams, uint32_t AlignInBits) {
   checkGlobalVariableScope(Context);
 
   return DIGlobalVariable::getTemporary(
              VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
              LineNumber, Ty, isLocalToUnit, false,
-             cast_or_null<DIDerivedType>(Decl), AlignInBits)
+             cast_or_null<DIDerivedType>(Decl), templateParams, AlignInBits)
       .release();
 }
 
@@ -751,18 +751,18 @@ static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
 
 DISubprogram *DIBuilder::createFunction(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
-    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-    bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
-    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl,
+    unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
+    DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
+    DITemplateParameterArray TParams, DISubprogram *Decl,
     DITypeArray ThrownTypes) {
+  bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
   auto *Node = getSubprogram(
-      /* IsDistinct = */ isDefinition, VMContext,
-      getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty,
-      isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags,
-      isOptimized, isDefinition ? CUNode : nullptr, TParams, Decl,
+      /*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context),
+      Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags,
+      SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl,
       MDTuple::getTemporary(VMContext, None).release(), ThrownTypes);
 
-  if (isDefinition)
+  if (IsDefinition)
     AllSubprograms.push_back(Node);
   trackIfUnresolved(Node);
   return Node;
@@ -770,35 +770,37 @@ DISubprogram *DIBuilder::createFunction(
 
 DISubprogram *DIBuilder::createTempFunctionFwdDecl(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
-    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-    bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
-    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl,
+    unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
+    DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
+    DITemplateParameterArray TParams, DISubprogram *Decl,
     DITypeArray ThrownTypes) {
-  return DISubprogram::getTemporary(
-             VMContext, getNonCompileUnitScope(Context), Name, LinkageName,
-             File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr,
-             0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr,
-             TParams, Decl, nullptr, ThrownTypes)
+  bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
+  return DISubprogram::getTemporary(VMContext, getNonCompileUnitScope(Context),
+                                    Name, LinkageName, File, LineNo, Ty,
+                                    ScopeLine, nullptr, 0, 0, Flags, SPFlags,
+                                    IsDefinition ? CUNode : nullptr, TParams,
+                                    Decl, nullptr, ThrownTypes)
       .release();
 }
 
 DISubprogram *DIBuilder::createMethod(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
-    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-    bool isDefinition, unsigned VK, unsigned VIndex, int ThisAdjustment,
-    DIType *VTableHolder, DINode::DIFlags Flags, bool isOptimized,
-    DITemplateParameterArray TParams, DITypeArray ThrownTypes) {
+    unsigned LineNo, DISubroutineType *Ty, unsigned VIndex, int ThisAdjustment,
+    DIType *VTableHolder, DINode::DIFlags Flags,
+    DISubprogram::DISPFlags SPFlags, DITemplateParameterArray TParams,
+    DITypeArray ThrownTypes) {
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
   // FIXME: Do we want to use different scope/lines?
+  bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
   auto *SP = getSubprogram(
-      /* IsDistinct = */ isDefinition, VMContext, cast<DIScope>(Context), Name,
-      LinkageName, F, LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
-      VTableHolder, VK, VIndex, ThisAdjustment, Flags, isOptimized,
-      isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr, ThrownTypes);
+      /*IsDistinct=*/IsDefinition, VMContext, cast<DIScope>(Context), Name,
+      LinkageName, F, LineNo, Ty, LineNo, VTableHolder, VIndex, ThisAdjustment,
+      Flags, SPFlags, IsDefinition ? CUNode : nullptr, TParams, nullptr,
+      nullptr, ThrownTypes);
 
-  if (isDefinition)
+  if (IsDefinition)
     AllSubprograms.push_back(SP);
   trackIfUnresolved(SP);
   return SP;
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index 62c67127276e..63c24b5ee7af 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -635,6 +635,14 @@ unsigned DataLayout::getPointerSize(unsigned AS) const {
   return I->TypeByteWidth;
 }
 
+unsigned DataLayout::getMaxPointerSize() const {
+  unsigned MaxPointerSize = 0;
+  for (auto &P : Pointers)
+    MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth);
+
+  return MaxPointerSize;
+}
+
 unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
   assert(Ty->isPtrOrPtrVectorTy() &&
          "This should only be called with a pointer or pointer vector type");
@@ -808,15 +816,29 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
 /// global.  This includes an explicitly requested alignment (if the global
 /// has one).
 unsigned DataLayout::getPreferredAlignment(const GlobalVariable *GV) const {
+  unsigned GVAlignment = GV->getAlignment();
+  // If a section is specified, always precisely honor explicit alignment,
+  // so we don't insert padding into a section we don't control.
+  if (GVAlignment && GV->hasSection())
+    return GVAlignment;
+
+  // If no explicit alignment is specified, compute the alignment based on
+  // the IR type. If an alignment is specified, increase it to match the ABI
+  // alignment of the IR type.
+  //
+  // FIXME: Not sure it makes sense to use the alignment of the type if
+  // there's already an explicit alignment specification.
   Type *ElemType = GV->getValueType();
   unsigned Alignment = getPrefTypeAlignment(ElemType);
-  unsigned GVAlignment = GV->getAlignment();
   if (GVAlignment >= Alignment) {
     Alignment = GVAlignment;
   } else if (GVAlignment != 0) {
     Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
   }
 
+  // If no explicit alignment is specified, and the global is large, increase
+  // the alignment to 16.
+  // FIXME: Why 16, specifically?
   if (GV->hasInitializer() && GVAlignment == 0) {
     if (Alignment < 16) {
       // If the global is not external, see if it is large.  If so, give it a
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 165c881c13e7..9fa31773b598 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -280,7 +280,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
 }
 
 static MDNode *stripDebugLocFromLoopID(MDNode *N) {
-  assert(N->op_begin() != N->op_end() && "Missing self reference?");
+  assert(!empty(N->operands()) && "Missing self reference?");
 
   // if there is no debug location, we do not have to rewrite this MDNode.
   if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) {
@@ -438,11 +438,10 @@ private:
     auto distinctMDSubprogram = [&]() {
       return DISubprogram::getDistinct(
           MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
-          FileAndScope, MDS->getLine(), Type, MDS->isLocalToUnit(),
-          MDS->isDefinition(), MDS->getScopeLine(), ContainingType,
-          MDS->getVirtuality(), MDS->getVirtualIndex(),
-          MDS->getThisAdjustment(), MDS->getFlags(), MDS->isOptimized(), Unit,
-          TemplateParams, Declaration, Variables);
+          FileAndScope, MDS->getLine(), Type, MDS->getScopeLine(),
+          ContainingType, MDS->getVirtualIndex(), MDS->getThisAdjustment(),
+          MDS->getFlags(), MDS->getSPFlags(), Unit, TemplateParams, Declaration,
+          Variables);
     };
 
     if (MDS->isDistinct())
@@ -450,11 +449,9 @@ private:
 
     auto *NewMDS = DISubprogram::get(
         MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
-        FileAndScope, MDS->getLine(), Type, MDS->isLocalToUnit(),
-        MDS->isDefinition(), MDS->getScopeLine(), ContainingType,
-        MDS->getVirtuality(), MDS->getVirtualIndex(), MDS->getThisAdjustment(),
-        MDS->getFlags(), MDS->isOptimized(), Unit, TemplateParams, Declaration,
-        Variables);
+        FileAndScope, MDS->getLine(), Type, MDS->getScopeLine(), ContainingType,
+        MDS->getVirtualIndex(), MDS->getThisAdjustment(), MDS->getFlags(),
+        MDS->getSPFlags(), Unit, TemplateParams, Declaration, Variables);
 
     StringRef OldLinkageName = MDS->getLinkageName();
 
@@ -491,7 +488,8 @@ private:
         CU->getSplitDebugFilename(), DICompileUnit::LineTablesOnly, EnumTypes,
         RetainedTypes, GlobalVariables, ImportedEntities, CU->getMacros(),
         CU->getDWOId(), CU->getSplitDebugInlining(),
-        CU->getDebugInfoForProfiling(), CU->getGnuPubnames());
+        CU->getDebugInfoForProfiling(), CU->getNameTableKind(),
+        CU->getRangesBaseAddress());
   }
 
   DILocation *getReplacementMDLocation(DILocation *MLD) {
@@ -690,8 +688,7 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
 
 void Instruction::applyMergedLocation(const DILocation *LocA,
                                       const DILocation *LocB) {
-  setDebugLoc(DILocation::getMergedLocation(LocA, LocB,
-                                            DILocation::WithGeneratedLocation));
+  setDebugLoc(DILocation::getMergedLocation(LocA, LocB));
 }
 
 //===----------------------------------------------------------------------===//
@@ -700,8 +697,9 @@ void Instruction::applyMergedLocation(const DILocation *LocA,
 
 static unsigned map_from_llvmDWARFsourcelanguage(LLVMDWARFSourceLanguage lang) {
   switch (lang) {
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
-case LLVMDWARFSourceLanguage##NAME: return ID;
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
+  case LLVMDWARFSourceLanguage##NAME:                                          \
+    return ID;
 #include "llvm/BinaryFormat/Dwarf.def"
 #undef HANDLE_DW_LANG
   }
@@ -720,6 +718,11 @@ static LLVMDIFlags map_to_llvmDIFlags(DINode::DIFlags Flags) {
   return static_cast<LLVMDIFlags>(Flags);
 }
 
+static DISubprogram::DISPFlags
+pack_into_DISPFlags(bool IsLocalToUnit, bool IsDefinition, bool IsOptimized) {
+  return DISubprogram::toSPFlags(IsLocalToUnit, IsDefinition, IsOptimized);
+}
+
 unsigned LLVMDebugMetadataVersion() {
   return DEBUG_METADATA_VERSION;
 }
@@ -803,9 +806,10 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction(
     unsigned ScopeLine, LLVMDIFlags Flags, LLVMBool IsOptimized) {
   return wrap(unwrap(Builder)->createFunction(
       unwrapDI<DIScope>(Scope), {Name, NameLen}, {LinkageName, LinkageNameLen},
-      unwrapDI<DIFile>(File), LineNo, unwrapDI<DISubroutineType>(Ty),
-      IsLocalToUnit, IsDefinition, ScopeLine, map_from_llvmDIFlags(Flags),
-      IsOptimized, nullptr, nullptr, nullptr));
+      unwrapDI<DIFile>(File), LineNo, unwrapDI<DISubroutineType>(Ty), ScopeLine,
+      map_from_llvmDIFlags(Flags),
+      pack_into_DISPFlags(IsLocalToUnit, IsDefinition, IsOptimized), nullptr,
+      nullptr, nullptr));
 }
 
 
@@ -948,9 +952,11 @@ LLVMDIBuilderCreateVectorType(LLVMDIBuilderRef Builder, uint64_t Size,
 LLVMMetadataRef
 LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Builder, const char *Name,
                              size_t NameLen, uint64_t SizeInBits,
-                             LLVMDWARFTypeEncoding Encoding) {
+                             LLVMDWARFTypeEncoding Encoding,
+                             LLVMDIFlags Flags) {
   return wrap(unwrap(Builder)->createBasicType({Name, NameLen},
-                                               SizeInBits, Encoding));
+                                               SizeInBits, Encoding,
+                                               map_from_llvmDIFlags(Flags)));
 }
 
 LLVMMetadataRef LLVMDIBuilderCreatePointerType(
@@ -1219,23 +1225,16 @@ LLVMDIBuilderCreateConstantValueExpression(LLVMDIBuilderRef Builder,
   return wrap(unwrap(Builder)->createConstantValueExpression(Value));
 }
 
-LLVMMetadataRef
-LLVMDIBuilderCreateGlobalVariableExpression(LLVMDIBuilderRef Builder,
-                                            LLVMMetadataRef Scope,
-                                            const char *Name, size_t NameLen,
-                                            const char *Linkage, size_t LinkLen,
-                                            LLVMMetadataRef File,
-                                            unsigned LineNo,
-                                            LLVMMetadataRef Ty,
-                                            LLVMBool LocalToUnit,
-                                            LLVMMetadataRef Expr,
-                                            LLVMMetadataRef Decl,
-                                            uint32_t AlignInBits) {
+LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+    size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File,
+    unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+    LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits) {
   return wrap(unwrap(Builder)->createGlobalVariableExpression(
-                  unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LinkLen},
-                  unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty),
-                  LocalToUnit, unwrap<DIExpression>(Expr),
-                  unwrapDI<MDNode>(Decl), AlignInBits));
+      unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LinkLen},
+      unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty), LocalToUnit,
+      unwrap<DIExpression>(Expr), unwrapDI<MDNode>(Decl),
+      nullptr, AlignInBits));
 }
 
 LLVMMetadataRef LLVMTemporaryMDNode(LLVMContextRef Ctx, LLVMMetadataRef *Data,
@@ -1255,26 +1254,21 @@ void LLVMMetadataReplaceAllUsesWith(LLVMMetadataRef TargetMetadata,
   MDNode::deleteTemporary(Node);
 }
 
-LLVMMetadataRef
-LLVMDIBuilderCreateTempGlobalVariableFwdDecl(LLVMDIBuilderRef Builder,
-                                             LLVMMetadataRef Scope,
-                                             const char *Name, size_t NameLen,
-                                             const char *Linkage, size_t LnkLen,
-                                             LLVMMetadataRef File,
-                                             unsigned LineNo,
-                                             LLVMMetadataRef Ty,
-                                             LLVMBool LocalToUnit,
-                                             LLVMMetadataRef Decl,
-                                             uint32_t AlignInBits) {
+LLVMMetadataRef LLVMDIBuilderCreateTempGlobalVariableFwdDecl(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
+    size_t NameLen, const char *Linkage, size_t LnkLen, LLVMMetadataRef File,
+    unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
+    LLVMMetadataRef Decl, uint32_t AlignInBits) {
   return wrap(unwrap(Builder)->createTempGlobalVariableFwdDecl(
-                  unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LnkLen},
-                  unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty),
-                  LocalToUnit, unwrapDI<MDNode>(Decl), AlignInBits));
+      unwrapDI<DIScope>(Scope), {Name, NameLen}, {Linkage, LnkLen},
+      unwrapDI<DIFile>(File), LineNo, unwrapDI<DIType>(Ty), LocalToUnit,
+      unwrapDI<MDNode>(Decl), nullptr, AlignInBits));
 }
 
-LLVMValueRef LLVMDIBuilderInsertDeclareBefore(
-  LLVMDIBuilderRef Builder, LLVMValueRef Storage, LLVMMetadataRef VarInfo,
-  LLVMMetadataRef Expr, LLVMMetadataRef DL, LLVMValueRef Instr) {
+LLVMValueRef
+LLVMDIBuilderInsertDeclareBefore(LLVMDIBuilderRef Builder, LLVMValueRef Storage,
+                                 LLVMMetadataRef VarInfo, LLVMMetadataRef Expr,
+                                 LLVMMetadataRef DL, LLVMValueRef Instr) {
   return wrap(unwrap(Builder)->insertDeclare(
                   unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
                   unwrap<DIExpression>(Expr), unwrap<DILocation>(DL),
@@ -1353,3 +1347,14 @@ LLVMMetadataRef LLVMGetSubprogram(LLVMValueRef Func) {
 void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
   unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
 }
+
+LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) {
+  switch(unwrap(Metadata)->getMetadataID()) {
+#define HANDLE_METADATA_LEAF(CLASS) \
+  case Metadata::CLASS##Kind: \
+    return (LLVMMetadataKind)LLVM##CLASS##MetadataKind;
+#include "llvm/IR/Metadata.def"
+  default:
+    return (LLVMMetadataKind)LLVMGenericDINodeMetadataKind;
+  }
+}
diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
index 910e8c2fb74f..92f3f21f754c 100644
--- a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -14,16 +14,19 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "LLVMContextImpl.h"
 #include "MetadataImpl.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 
+#include <numeric>
+
 using namespace llvm;
 
 DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
-                       unsigned Column, ArrayRef<Metadata *> MDs)
+                       unsigned Column, ArrayRef<Metadata *> MDs,
+                       bool ImplicitCode)
     : MDNode(C, DILocationKind, Storage, MDs) {
   assert((MDs.size() == 1 || MDs.size() == 2) &&
          "Expected a scope and optional inlined-at");
@@ -33,6 +36,8 @@ DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
 
   SubclassData32 = Line;
   SubclassData16 = Column;
+
+  setImplicitCode(ImplicitCode);
 }
 
 static void adjustColumn(unsigned &Column) {
@@ -43,15 +48,15 @@ static void adjustColumn(unsigned &Column) {
 
 DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
                                 unsigned Column, Metadata *Scope,
-                                Metadata *InlinedAt, StorageType Storage,
-                                bool ShouldCreate) {
+                                Metadata *InlinedAt, bool ImplicitCode,
+                                StorageType Storage, bool ShouldCreate) {
   // Fixup column.
   adjustColumn(Column);
 
   if (Storage == Uniqued) {
-    if (auto *N =
-            getUniqued(Context.pImpl->DILocations,
-                       DILocationInfo::KeyTy(Line, Column, Scope, InlinedAt)))
+    if (auto *N = getUniqued(Context.pImpl->DILocations,
+                             DILocationInfo::KeyTy(Line, Column, Scope,
+                                                   InlinedAt, ImplicitCode)))
       return N;
     if (!ShouldCreate)
       return nullptr;
@@ -63,36 +68,94 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
   Ops.push_back(Scope);
   if (InlinedAt)
     Ops.push_back(InlinedAt);
-  return storeImpl(new (Ops.size())
-                       DILocation(Context, Storage, Line, Column, Ops),
+  return storeImpl(new (Ops.size()) DILocation(Context, Storage, Line, Column,
+                                               Ops, ImplicitCode),
                    Storage, Context.pImpl->DILocations);
 }
 
 const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
-                                                const DILocation *LocB,
-                                                bool GenerateLocation) {
+                                                const DILocation *LocB) {
   if (!LocA || !LocB)
     return nullptr;
 
-  if (LocA == LocB || !LocA->canDiscriminate(*LocB))
+  if (LocA == LocB)
     return LocA;
 
-  if (!GenerateLocation)
-    return nullptr;
-
   SmallPtrSet<DILocation *, 5> InlinedLocationsA;
   for (DILocation *L = LocA->getInlinedAt(); L; L = L->getInlinedAt())
     InlinedLocationsA.insert(L);
+  SmallSet<std::pair<DIScope *, DILocation *>, 5> Locations;
+  DIScope *S = LocA->getScope();
+  DILocation *L = LocA->getInlinedAt();
+  while (S) {
+    Locations.insert(std::make_pair(S, L));
+    S = S->getScope().resolve();
+    if (!S && L) {
+      S = L->getScope();
+      L = L->getInlinedAt();
+    }
+  }
   const DILocation *Result = LocB;
-  for (DILocation *L = LocB->getInlinedAt(); L; L = L->getInlinedAt()) {
-    Result = L;
-    if (InlinedLocationsA.count(L))
+  S = LocB->getScope();
+  L = LocB->getInlinedAt();
+  while (S) {
+    if (Locations.count(std::make_pair(S, L)))
       break;
+    S = S->getScope().resolve();
+    if (!S && L) {
+      S = L->getScope();
+      L = L->getInlinedAt();
+    }
   }
-  return DILocation::get(Result->getContext(), 0, 0, Result->getScope(),
-                         Result->getInlinedAt());
+
+  // If the two locations are irreconsilable, just pick one. This is misleading,
+  // but on the other hand, it's a "line 0" location.
+  if (!S || !isa<DILocalScope>(S))
+    S = LocA->getScope();
+  return DILocation::get(Result->getContext(), 0, 0, S, L);
 }
 
+Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI) {
+  SmallVector<unsigned, 3> Components = {BD, DF, CI};
+  uint64_t RemainingWork = 0U;
+  // We use RemainingWork to figure out if we have no remaining components to
+  // encode. For example: if BD != 0 but DF == 0 && CI == 0, we don't need to
+  // encode anything for the latter 2.
+  // Since any of the input components is at most 32 bits, their sum will be
+  // less than 34 bits, and thus RemainingWork won't overflow.
+  RemainingWork = std::accumulate(Components.begin(), Components.end(), RemainingWork);
+
+  int I = 0;
+  unsigned Ret = 0;
+  unsigned NextBitInsertionIndex = 0;
+  while (RemainingWork > 0) {
+    unsigned C = Components[I++];
+    RemainingWork -= C;
+    unsigned EC = encodeComponent(C);
+    Ret |= (EC << NextBitInsertionIndex);
+    NextBitInsertionIndex += encodingBits(C);
+  }
+
+  // Encoding may be unsuccessful because of overflow. We determine success by
+  // checking equivalence of components before & after encoding. Alternatively,
+  // we could determine Success during encoding, but the current alternative is
+  // simpler.
+  unsigned TBD, TDF, TCI = 0;
+  decodeDiscriminator(Ret, TBD, TDF, TCI);
+  if (TBD == BD && TDF == DF && TCI == CI)
+    return Ret;
+  return None;
+}
+
+void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
+                                     unsigned &CI) {
+  BD = getUnsignedFromPrefixEncoding(D);
+  DF = getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(D));
+  CI = getUnsignedFromPrefixEncoding(
+      getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
+}
+
+
 DINode::DIFlags DINode::getFlag(StringRef Flag) {
   return StringSwitch<DIFlags>(Flag)
 #define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
@@ -274,13 +337,14 @@ DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, int64_t Value,
 DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
                                   MDString *Name, uint64_t SizeInBits,
                                   uint32_t AlignInBits, unsigned Encoding,
-                                  StorageType Storage, bool ShouldCreate) {
+                                  DIFlags Flags, StorageType Storage,
+                                  bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(DIBasicType,
-                        (Tag, Name, SizeInBits, AlignInBits, Encoding));
+                        (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags));
   Metadata *Ops[] = {nullptr, nullptr, Name};
-  DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding),
-                       Ops);
+  DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding,
+                      Flags), Ops);
 }
 
 Optional<DIBasicType::Signedness> DIBasicType::getSignedness() const {
@@ -449,7 +513,8 @@ DICompileUnit *DICompileUnit::getImpl(
     unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
     Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros,
     uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
-    bool GnuPubnames, StorageType Storage, bool ShouldCreate) {
+    unsigned NameTableKind, bool RangesBaseAddress, StorageType Storage,
+    bool ShouldCreate) {
   assert(Storage != Uniqued && "Cannot unique DICompileUnit");
   assert(isCanonical(Producer) && "Expected canonical MDString");
   assert(isCanonical(Flags) && "Expected canonical MDString");
@@ -462,7 +527,8 @@ DICompileUnit *DICompileUnit::getImpl(
   return storeImpl(new (array_lengthof(Ops)) DICompileUnit(
                        Context, Storage, SourceLanguage, IsOptimized,
                        RuntimeVersion, EmissionKind, DWOId, SplitDebugInlining,
-                       DebugInfoForProfiling, GnuPubnames, Ops),
+                       DebugInfoForProfiling, NameTableKind, RangesBaseAddress,
+                       Ops),
                    Storage);
 }
 
@@ -472,6 +538,16 @@ DICompileUnit::getEmissionKind(StringRef Str) {
       .Case("NoDebug", NoDebug)
       .Case("FullDebug", FullDebug)
       .Case("LineTablesOnly", LineTablesOnly)
+      .Case("DebugDirectivesOnly", DebugDirectivesOnly)
+      .Default(None);
+}
+
+Optional<DICompileUnit::DebugNameTableKind>
+DICompileUnit::getNameTableKind(StringRef Str) {
+  return StringSwitch<Optional<DebugNameTableKind>>(Str)
+      .Case("Default", DebugNameTableKind::Default)
+      .Case("GNU", DebugNameTableKind::GNU)
+      .Case("None", DebugNameTableKind::None)
       .Default(None);
 }
 
@@ -480,6 +556,19 @@ const char *DICompileUnit::emissionKindString(DebugEmissionKind EK) {
   case NoDebug:        return "NoDebug";
   case FullDebug:      return "FullDebug";
   case LineTablesOnly: return "LineTablesOnly";
+  case DebugDirectivesOnly: return "DebugDirectivesOnly";
+  }
+  return nullptr;
+}
+
+const char *DICompileUnit::nameTableKindString(DebugNameTableKind NTK) {
+  switch (NTK) {
+  case DebugNameTableKind::Default:
+    return nullptr;
+  case DebugNameTableKind::GNU:
+    return "GNU";
+  case DebugNameTableKind::None:
+    return "None";
   }
   return nullptr;
 }
@@ -496,21 +585,55 @@ DILocalScope *DILocalScope::getNonLexicalBlockFileScope() const {
   return const_cast<DILocalScope *>(this);
 }
 
+DISubprogram::DISPFlags DISubprogram::getFlag(StringRef Flag) {
+  return StringSwitch<DISPFlags>(Flag)
+#define HANDLE_DISP_FLAG(ID, NAME) .Case("DISPFlag" #NAME, SPFlag##NAME)
+#include "llvm/IR/DebugInfoFlags.def"
+      .Default(SPFlagZero);
+}
+
+StringRef DISubprogram::getFlagString(DISPFlags Flag) {
+  switch (Flag) {
+  // Appease a warning.
+  case SPFlagVirtuality:
+    return "";
+#define HANDLE_DISP_FLAG(ID, NAME)                                             \
+  case SPFlag##NAME:                                                           \
+    return "DISPFlag" #NAME;
+#include "llvm/IR/DebugInfoFlags.def"
+  }
+  return "";
+}
+
+DISubprogram::DISPFlags
+DISubprogram::splitFlags(DISPFlags Flags,
+                         SmallVectorImpl<DISPFlags> &SplitFlags) {
+  // Multi-bit fields can require special handling. In our case, however, the
+  // only multi-bit field is virtuality, and all its values happen to be
+  // single-bit values, so the right behavior just falls out.
+#define HANDLE_DISP_FLAG(ID, NAME)                                             \
+  if (DISPFlags Bit = Flags & SPFlag##NAME) {                                  \
+    SplitFlags.push_back(Bit);                                                 \
+    Flags &= ~Bit;                                                             \
+  }
+#include "llvm/IR/DebugInfoFlags.def"
+  return Flags;
+}
+
 DISubprogram *DISubprogram::getImpl(
     LLVMContext &Context, Metadata *Scope, MDString *Name,
     MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
-    bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
-    Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-    int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
+    unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
+    int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
     Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
     Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   assert(isCanonical(LinkageName) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(
-      DISubprogram, (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, ScopeLine, ContainingType, Virtuality,
-                     VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
-                     TemplateParams, Declaration, RetainedNodes, ThrownTypes));
+  DEFINE_GETIMPL_LOOKUP(DISubprogram,
+                        (Scope, Name, LinkageName, File, Line, Type, ScopeLine,
+                         ContainingType, VirtualIndex, ThisAdjustment, Flags,
+                         SPFlags, Unit, TemplateParams, Declaration,
+                         RetainedNodes, ThrownTypes));
   SmallVector<Metadata *, 11> Ops = {
       File,        Scope,         Name,           LinkageName,    Type,       Unit,
       Declaration, RetainedNodes, ContainingType, TemplateParams, ThrownTypes};
@@ -522,11 +645,10 @@ DISubprogram *DISubprogram::getImpl(
         Ops.pop_back();
     }
   }
-  DEFINE_GETIMPL_STORE_N(DISubprogram,
-                         (Line, ScopeLine, Virtuality, VirtualIndex,
-                          ThisAdjustment, Flags, IsLocalToUnit, IsDefinition,
-                          IsOptimized),
-                         Ops, Ops.size());
+  DEFINE_GETIMPL_STORE_N(
+      DISubprogram,
+      (Line, ScopeLine, VirtualIndex, ThisAdjustment, Flags, SPFlags), Ops,
+      Ops.size());
 }
 
 bool DISubprogram::describes(const Function *F) const {
@@ -609,19 +731,24 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                           MDString *LinkageName, Metadata *File, unsigned Line,
                           Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
                           Metadata *StaticDataMemberDeclaration,
-                          uint32_t AlignInBits, StorageType Storage,
-                          bool ShouldCreate) {
+                          Metadata *TemplateParams, uint32_t AlignInBits,
+                          StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   assert(isCanonical(LinkageName) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DIGlobalVariable,
-                        (Scope, Name, LinkageName, File, Line, Type,
-                         IsLocalToUnit, IsDefinition,
-                         StaticDataMemberDeclaration, AlignInBits));
-  Metadata *Ops[] = {
-      Scope, Name, File, Type, Name, LinkageName, StaticDataMemberDeclaration};
+  DEFINE_GETIMPL_LOOKUP(DIGlobalVariable, (Scope, Name, LinkageName, File, Line,
+                                           Type, IsLocalToUnit, IsDefinition,
+                                           StaticDataMemberDeclaration,
+                                           TemplateParams, AlignInBits));
+  Metadata *Ops[] = {Scope,
+                     Name,
+                     File,
+                     Type,
+                     Name,
+                     LinkageName,
+                     StaticDataMemberDeclaration,
+                     TemplateParams};
   DEFINE_GETIMPL_STORE(DIGlobalVariable,
-                       (Line, IsLocalToUnit, IsDefinition, AlignInBits),
-                       Ops);
+                       (Line, IsLocalToUnit, IsDefinition, AlignInBits), Ops);
 }
 
 DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
diff --git a/contrib/llvm/lib/IR/DebugLoc.cpp b/contrib/llvm/lib/IR/DebugLoc.cpp
index 36f3e179a2c0..10ec98ac7e6c 100644
--- a/contrib/llvm/lib/IR/DebugLoc.cpp
+++ b/contrib/llvm/lib/IR/DebugLoc.cpp
@@ -56,15 +56,28 @@ DebugLoc DebugLoc::getFnDebugLoc() const {
   return DebugLoc();
 }
 
+bool DebugLoc::isImplicitCode() const {
+  if (DILocation *Loc = get()) {
+    return Loc->isImplicitCode();
+  }
+  return true;
+}
+
+void DebugLoc::setImplicitCode(bool ImplicitCode) {
+  if (DILocation *Loc = get()) {
+    Loc->setImplicitCode(ImplicitCode);
+  }
+}
+
 DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
-                       const MDNode *InlinedAt) {
+                       const MDNode *InlinedAt, bool ImplicitCode) {
   // If no scope is available, this is an unknown location.
   if (!Scope)
     return DebugLoc();
 
   return DILocation::get(Scope->getContext(), Line, Col,
                          const_cast<MDNode *>(Scope),
-                         const_cast<MDNode *>(InlinedAt));
+                         const_cast<MDNode *>(InlinedAt), ImplicitCode);
 }
 
 DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt,
diff --git a/contrib/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
index 5ddb1196b072..dc957ab7dad9 100644
--- a/contrib/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
@@ -33,9 +33,10 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
 #include <atomic>
 #include <cassert>
 #include <memory>
@@ -103,10 +104,15 @@ void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const {
   DP << getMsg();
 }
 
+void DiagnosticInfo::anchor() {}
+void DiagnosticInfoStackSize::anchor() {}
+void DiagnosticInfoWithLocationBase::anchor() {}
+void DiagnosticInfoIROptimization::anchor() {}
+
 DiagnosticLocation::DiagnosticLocation(const DebugLoc &DL) {
   if (!DL)
     return;
-  Filename = DL->getFilename();
+  File = DL->getFile();
   Line = DL->getLine();
   Column = DL->getColumn();
 }
@@ -114,17 +120,36 @@ DiagnosticLocation::DiagnosticLocation(const DebugLoc &DL) {
 DiagnosticLocation::DiagnosticLocation(const DISubprogram *SP) {
   if (!SP)
     return;
-  Filename = SP->getFilename();
+  
+  File = SP->getFile();
   Line = SP->getScopeLine();
   Column = 0;
 }
 
-void DiagnosticInfoWithLocationBase::getLocation(StringRef *Filename,
-                                                 unsigned *Line,
-                                                 unsigned *Column) const {
-  *Filename = Loc.getFilename();
-  *Line = Loc.getLine();
-  *Column = Loc.getColumn();
+StringRef DiagnosticLocation::getRelativePath() const {
+  return File->getFilename();
+}
+
+std::string DiagnosticLocation::getAbsolutePath() const {
+  StringRef Name = File->getFilename();
+  if (sys::path::is_absolute(Name))
+    return Name;
+
+  SmallString<128> Path;
+  sys::path::append(Path, File->getDirectory(), Name);
+  return sys::path::remove_leading_dotslash(Path).str();
+}
+
+std::string DiagnosticInfoWithLocationBase::getAbsolutePath() const {
+  return Loc.getAbsolutePath();
+}
+
+void DiagnosticInfoWithLocationBase::getLocation(StringRef &RelativePath,
+                                                 unsigned &Line,
+                                                 unsigned &Column) const {
+  RelativePath = Loc.getRelativePath();
+  Line = Loc.getLine();
+  Column = Loc.getColumn();
 }
 
 const std::string DiagnosticInfoWithLocationBase::getLocationStr() const {
@@ -132,7 +157,7 @@ const std::string DiagnosticInfoWithLocationBase::getLocationStr() const {
   unsigned Line = 0;
   unsigned Column = 0;
   if (isLocationAvailable())
-    getLocation(&Filename, &Line, &Column);
+    getLocation(Filename, Line, Column);
   return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
 }
 
@@ -346,6 +371,9 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
   return OS.str();
 }
 
+void OptimizationRemarkAnalysisFPCommute::anchor() {}
+void OptimizationRemarkAnalysisAliasing::anchor() {}
+
 namespace llvm {
 namespace yaml {
 
@@ -399,7 +427,7 @@ template <> struct MappingTraits<DiagnosticLocation> {
   static void mapping(IO &io, DiagnosticLocation &DL) {
     assert(io.outputting() && "input not yet implemented");
 
-    StringRef File = DL.getFilename();
+    StringRef File = DL.getRelativePath();
     unsigned Line = DL.getLine();
     unsigned Col = DL.getColumn();
 
diff --git a/contrib/llvm/lib/IR/DomTreeUpdater.cpp b/contrib/llvm/lib/IR/DomTreeUpdater.cpp
index f035a86eddae..b72c1b77c2ce 100644
--- a/contrib/llvm/lib/IR/DomTreeUpdater.cpp
+++ b/contrib/llvm/lib/IR/DomTreeUpdater.cpp
@@ -152,39 +152,34 @@ bool DomTreeUpdater::forceFlushDeletedBB() {
   return true;
 }
 
-bool DomTreeUpdater::recalculate(Function &F) {
-  if (!DT && !PDT)
-    return false;
+void DomTreeUpdater::recalculate(Function &F) {
 
   if (Strategy == UpdateStrategy::Eager) {
     if (DT)
       DT->recalculate(F);
     if (PDT)
       PDT->recalculate(F);
-    return true;
+    return;
   }
 
+  // There is little performance gain if we pend the recalculation under
+  // Lazy UpdateStrategy so we recalculate available trees immediately.
+
   // Prevent forceFlushDeletedBB() from erasing DomTree or PostDomTree nodes.
   IsRecalculatingDomTree = IsRecalculatingPostDomTree = true;
 
   // Because all trees are going to be up-to-date after recalculation,
   // flush awaiting deleted BasicBlocks.
-  if (forceFlushDeletedBB() || hasPendingUpdates()) {
-    if (DT)
-      DT->recalculate(F);
-    if (PDT)
-      PDT->recalculate(F);
-
-    // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
-    IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
-    PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
-    dropOutOfDateUpdates();
-    return true;
-  }
+  forceFlushDeletedBB();
+  if (DT)
+    DT->recalculate(F);
+  if (PDT)
+    PDT->recalculate(F);
 
   // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
   IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
-  return false;
+  PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
+  dropOutOfDateUpdates();
 }
 
 bool DomTreeUpdater::hasPendingUpdates() const {
diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp
index d8971e05f476..cf9f5759ba53 100644
--- a/contrib/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm/lib/IR/Dominators.cpp
@@ -41,7 +41,7 @@ static constexpr bool ExpensiveChecksEnabled = false;
 #endif
 
 bool BasicBlockEdge::isSingleEdge() const {
-  const TerminatorInst *TI = Start->getTerminator();
+  const Instruction *TI = Start->getTerminator();
   unsigned NumEdgesToEnd = 0;
   for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
     if (TI->getSuccessor(i) == End)
@@ -67,12 +67,17 @@ template class llvm::DomTreeNodeBase<BasicBlock>;
 template class llvm::DominatorTreeBase<BasicBlock, false>; // DomTreeBase
 template class llvm::DominatorTreeBase<BasicBlock, true>; // PostDomTreeBase
 
-template struct llvm::DomTreeBuilder::Update<BasicBlock *>;
+template class llvm::cfg::Update<BasicBlock *>;
 
 template void llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBDomTree>(
     DomTreeBuilder::BBDomTree &DT);
+template void
+llvm::DomTreeBuilder::CalculateWithUpdates<DomTreeBuilder::BBDomTree>(
+    DomTreeBuilder::BBDomTree &DT, BBUpdates U);
+
 template void llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBPostDomTree>(
     DomTreeBuilder::BBPostDomTree &DT);
+// No CalculateWithUpdates<PostDomTree> instantiation, unless a usecase arises.
 
 template void llvm::DomTreeBuilder::InsertEdge<DomTreeBuilder::BBDomTree>(
     DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To);
@@ -372,193 +377,3 @@ void DominatorTreeWrapperPass::print(raw_ostream &OS, const Module *) const {
   DT.print(OS);
 }
 
-//===----------------------------------------------------------------------===//
-//  DeferredDominance Implementation
-//===----------------------------------------------------------------------===//
-//
-// The implementation details of the DeferredDominance class which allows
-// one to queue updates to a DominatorTree.
-//
-//===----------------------------------------------------------------------===//
-
-/// Queues multiple updates and discards duplicates.
-void DeferredDominance::applyUpdates(
-    ArrayRef<DominatorTree::UpdateType> Updates) {
-  SmallVector<DominatorTree::UpdateType, 8> Seen;
-  for (auto U : Updates)
-    // Avoid duplicates to applyUpdate() to save on analysis.
-    if (std::none_of(Seen.begin(), Seen.end(),
-                     [U](DominatorTree::UpdateType S) { return S == U; })) {
-      Seen.push_back(U);
-      applyUpdate(U.getKind(), U.getFrom(), U.getTo());
-    }
-}
-
-/// Helper method for a single edge insertion. It's almost always better
-/// to batch updates and call applyUpdates to quickly remove duplicate edges.
-/// This is best used when there is only a single insertion needed to update
-/// Dominators.
-void DeferredDominance::insertEdge(BasicBlock *From, BasicBlock *To) {
-  applyUpdate(DominatorTree::Insert, From, To);
-}
-
-/// Helper method for a single edge deletion. It's almost always better
-/// to batch updates and call applyUpdates to quickly remove duplicate edges.
-/// This is best used when there is only a single deletion needed to update
-/// Dominators.
-void DeferredDominance::deleteEdge(BasicBlock *From, BasicBlock *To) {
-  applyUpdate(DominatorTree::Delete, From, To);
-}
-
-/// Delays the deletion of a basic block until a flush() event.
-void DeferredDominance::deleteBB(BasicBlock *DelBB) {
-  assert(DelBB && "Invalid push_back of nullptr DelBB.");
-  assert(pred_empty(DelBB) && "DelBB has one or more predecessors.");
-  // DelBB is unreachable and all its instructions are dead.
-  while (!DelBB->empty()) {
-    Instruction &I = DelBB->back();
-    // Replace used instructions with an arbitrary value (undef).
-    if (!I.use_empty())
-      I.replaceAllUsesWith(llvm::UndefValue::get(I.getType()));
-    DelBB->getInstList().pop_back();
-  }
-  // Make sure DelBB has a valid terminator instruction. As long as DelBB is a
-  // Child of Function F it must contain valid IR.
-  new UnreachableInst(DelBB->getContext(), DelBB);
-  DeletedBBs.insert(DelBB);
-}
-
-/// Returns true if DelBB is awaiting deletion at a flush() event.
-bool DeferredDominance::pendingDeletedBB(BasicBlock *DelBB) {
-  if (DeletedBBs.empty())
-    return false;
-  return DeletedBBs.count(DelBB) != 0;
-}
-
-/// Returns true if pending DT updates are queued for a flush() event.
-bool DeferredDominance::pending() { return !PendUpdates.empty(); }
-
-/// Flushes all pending updates and block deletions. Returns a
-/// correct DominatorTree reference to be used by the caller for analysis.
-DominatorTree &DeferredDominance::flush() {
-  // Updates to DT must happen before blocks are deleted below. Otherwise the
-  // DT traversal will encounter badref blocks and assert.
-  if (!PendUpdates.empty()) {
-    DT.applyUpdates(PendUpdates);
-    PendUpdates.clear();
-  }
-  flushDelBB();
-  return DT;
-}
-
-/// Drops all internal state and forces a (slow) recalculation of the
-/// DominatorTree based on the current state of the LLVM IR in F. This should
-/// only be used in corner cases such as the Entry block of F being deleted.
-void DeferredDominance::recalculate(Function &F) {
-  // flushDelBB must be flushed before the recalculation. The state of the IR
-  // must be consistent before the DT traversal algorithm determines the
-  // actual DT.
-  if (flushDelBB() || !PendUpdates.empty()) {
-    DT.recalculate(F);
-    PendUpdates.clear();
-  }
-}
-
-/// Debug method to help view the state of pending updates.
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DeferredDominance::dump() const {
-  raw_ostream &OS = llvm::dbgs();
-  OS << "PendUpdates:\n";
-  int I = 0;
-  for (auto U : PendUpdates) {
-    OS << "  " << I << " : ";
-    ++I;
-    if (U.getKind() == DominatorTree::Insert)
-      OS << "Insert, ";
-    else
-      OS << "Delete, ";
-    BasicBlock *From = U.getFrom();
-    if (From) {
-      auto S = From->getName();
-      if (!From->hasName())
-        S = "(no name)";
-      OS << S << "(" << From << "), ";
-    } else {
-      OS << "(badref), ";
-    }
-    BasicBlock *To = U.getTo();
-    if (To) {
-      auto S = To->getName();
-      if (!To->hasName())
-        S = "(no_name)";
-      OS << S << "(" << To << ")\n";
-    } else {
-      OS << "(badref)\n";
-    }
-  }
-  OS << "DeletedBBs:\n";
-  I = 0;
-  for (auto BB : DeletedBBs) {
-    OS << "  " << I << " : ";
-    ++I;
-    if (BB->hasName())
-      OS << BB->getName() << "(";
-    else
-      OS << "(no_name)(";
-    OS << BB << ")\n";
-  }
-}
-#endif
-
-/// Apply an update (Kind, From, To) to the internal queued updates. The
-/// update is only added when determined to be necessary. Checks for
-/// self-domination, unnecessary updates, duplicate requests, and balanced
-/// pairs of requests are all performed. Returns true if the update is
-/// queued and false if it is discarded.
-bool DeferredDominance::applyUpdate(DominatorTree::UpdateKind Kind,
-                                    BasicBlock *From, BasicBlock *To) {
-  if (From == To)
-    return false; // Cannot dominate self; discard update.
-
-  // Discard updates by inspecting the current state of successors of From.
-  // Since applyUpdate() must be called *after* the Terminator of From is
-  // altered we can determine if the update is unnecessary.
-  bool HasEdge = std::any_of(succ_begin(From), succ_end(From),
-                             [To](BasicBlock *B) { return B == To; });
-  if (Kind == DominatorTree::Insert && !HasEdge)
-    return false; // Unnecessary Insert: edge does not exist in IR.
-  if (Kind == DominatorTree::Delete && HasEdge)
-    return false; // Unnecessary Delete: edge still exists in IR.
-
-  // Analyze pending updates to determine if the update is unnecessary.
-  DominatorTree::UpdateType Update = {Kind, From, To};
-  DominatorTree::UpdateType Invert = {Kind != DominatorTree::Insert
-                                          ? DominatorTree::Insert
-                                          : DominatorTree::Delete,
-                                      From, To};
-  for (auto I = PendUpdates.begin(), E = PendUpdates.end(); I != E; ++I) {
-    if (Update == *I)
-      return false; // Discard duplicate updates.
-    if (Invert == *I) {
-      // Update and Invert are both valid (equivalent to a no-op). Remove
-      // Invert from PendUpdates and discard the Update.
-      PendUpdates.erase(I);
-      return false;
-    }
-  }
-  PendUpdates.push_back(Update); // Save the valid update.
-  return true;
-}
-
-/// Performs all pending basic block deletions. We have to defer the deletion
-/// of these blocks until after the DominatorTree updates are applied. The
-/// internal workings of the DominatorTree code expect every update's From
-/// and To blocks to exist and to be a member of the same Function.
-bool DeferredDominance::flushDelBB() {
-  if (DeletedBBs.empty())
-    return false;
-  for (auto *BB : DeletedBBs)
-    BB->eraseFromParent();
-  DeletedBBs.clear();
-  return true;
-}
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index 72090f5bac3e..a88478b89bfc 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -24,7 +24,6 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -195,14 +194,19 @@ LLVMContext &Function::getContext() const {
   return getType()->getContext();
 }
 
-unsigned Function::getInstructionCount() {
+unsigned Function::getInstructionCount() const {
   unsigned NumInstrs = 0;
-  for (BasicBlock &BB : BasicBlocks)
+  for (const BasicBlock &BB : BasicBlocks)
     NumInstrs += std::distance(BB.instructionsWithoutDebug().begin(),
                                BB.instructionsWithoutDebug().end());
   return NumInstrs;
 }
 
+Function *Function::Create(FunctionType *Ty, LinkageTypes Linkage,
+                           const Twine &N, Module &M) {
+  return Create(Ty, Linkage, M.getDataLayout().getProgramAddressSpace(), N, &M);
+}
+
 void Function::removeFromParent() {
   getParent()->getFunctionList().remove(getIterator());
 }
@@ -215,10 +219,19 @@ void Function::eraseFromParent() {
 // Function Implementation
 //===----------------------------------------------------------------------===//
 
-Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
-                   Module *ParentModule)
+static unsigned computeAddrSpace(unsigned AddrSpace, Module *M) {
+  // If AS == -1 and we are passed a valid module pointer we place the function
+  // in the program address space. Otherwise we default to AS0.
+  if (AddrSpace == static_cast<unsigned>(-1))
+    return M ? M->getDataLayout().getProgramAddressSpace() : 0;
+  return AddrSpace;
+}
+
+Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
+                   const Twine &name, Module *ParentModule)
     : GlobalObject(Ty, Value::FunctionVal,
-                   OperandTraits<Function>::op_begin(this), 0, Linkage, name),
+                   OperandTraits<Function>::op_begin(this), 0, Linkage, name,
+                   computeAddrSpace(AddrSpace, ParentModule)),
       NumArgs(Ty->getNumParams()) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
          "invalid return type");
@@ -1243,13 +1256,13 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
     const User *FU = U.getUser();
     if (isa<BlockAddress>(FU))
       continue;
-    if (!isa<CallInst>(FU) && !isa<InvokeInst>(FU)) {
+    const auto *Call = dyn_cast<CallBase>(FU);
+    if (!Call) {
       if (PutOffender)
         *PutOffender = FU;
       return true;
     }
-    ImmutableCallSite CS(cast<Instruction>(FU));
-    if (!CS.isCallee(&U)) {
+    if (!Call->isCallee(&U)) {
       if (PutOffender)
         *PutOffender = FU;
       return true;
@@ -1275,12 +1288,10 @@ bool Function::isDefTriviallyDead() const {
 /// callsFunctionThatReturnsTwice - Return true if the function has a call to
 /// setjmp or other function that gcc recognizes as "returning twice".
 bool Function::callsFunctionThatReturnsTwice() const {
-  for (const_inst_iterator
-         I = inst_begin(this), E = inst_end(this); I != E; ++I) {
-    ImmutableCallSite CS(&*I);
-    if (CS && CS.hasFnAttr(Attribute::ReturnsTwice))
-      return true;
-  }
+  for (const Instruction &I : instructions(this))
+    if (const auto *Call = dyn_cast<CallBase>(&I))
+      if (Call->hasFnAttr(Attribute::ReturnsTwice))
+        return true;
 
   return false;
 }
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
index 20b2334a626f..cbd6450a20c9 100644
--- a/contrib/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -108,6 +108,11 @@ unsigned GlobalValue::getAlignment() const {
   return cast<GlobalObject>(this)->getAlignment();
 }
 
+unsigned GlobalValue::getAddressSpace() const {
+  PointerType *PtrTy = getType();
+  return PtrTy->getAddressSpace();
+}
+
 void GlobalObject::setAlignment(unsigned Align) {
   assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
   assert(Align <= MaximumAlignment &&
@@ -247,7 +252,7 @@ bool GlobalValue::canIncreaseAlignment() const {
   // Conservatively assume ELF if there's no parent pointer.
   bool isELF =
       (!Parent || Triple(Parent->getTargetTriple()).isOSBinFormatELF());
-  if (isELF && hasDefaultVisibility() && !hasLocalLinkage())
+  if (isELF && !isDSOLocal())
     return false;
 
   return true;
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
index 405a56bfb31d..a98189956770 100644
--- a/contrib/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -50,6 +50,7 @@ GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
                                 nullptr, GlobalVariable::NotThreadLocal,
                                 AddressSpace);
   GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  GV->setAlignment(1);
   return GV;
 }
 
@@ -730,28 +731,29 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
  return createCallHelper(FnGCRelocate, Args, this, Name);
 }
 
-CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID,
-                                               Value *LHS, Value *RHS,
-                                               const Twine &Name) {
+CallInst *IRBuilderBase::CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
+                                              Instruction *FMFSource,
+                                              const Twine &Name) {
   Module *M = BB->getModule();
-  Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
-  return createCallHelper(Fn, { LHS, RHS }, this, Name);
+  Function *Fn = Intrinsic::getDeclaration(M, ID, {V->getType()});
+  return createCallHelper(Fn, {V}, this, Name, FMFSource);
 }
 
-CallInst *IRBuilderBase::CreateIntrinsic(Intrinsic::ID ID,
-                                         Instruction *FMFSource,
-                                         const Twine &Name) {
+CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS,
+                                               Value *RHS,
+                                               Instruction *FMFSource,
+                                               const Twine &Name) {
   Module *M = BB->getModule();
-  Function *Fn = Intrinsic::getDeclaration(M, ID);
-  return createCallHelper(Fn, {}, this, Name);
+  Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
+  return createCallHelper(Fn, {LHS, RHS}, this, Name, FMFSource);
 }
 
 CallInst *IRBuilderBase::CreateIntrinsic(Intrinsic::ID ID,
+                                         ArrayRef<Type *> Types,
                                          ArrayRef<Value *> Args,
                                          Instruction *FMFSource,
                                          const Twine &Name) {
-  assert(!Args.empty() && "Expected at least one argument to intrinsic");
   Module *M = BB->getModule();
-  Function *Fn = Intrinsic::getDeclaration(M, ID, { Args.front()->getType() });
+  Function *Fn = Intrinsic::getDeclaration(M, ID, Types);
   return createCallHelper(Fn, Args, this, Name, FMFSource);
 }
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index befe1d9ffb1c..43010220b9f3 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -27,7 +27,8 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
       ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
 PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &) {
-  OS << Banner;
+  if (!Banner.empty())
+    OS << Banner << "\n";
   if (llvm::isFunctionInPrintList("*"))
     M.print(OS, nullptr, ShouldPreserveUseListOrder);
   else {
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index 508db9bcaf19..d861b5288592 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -303,6 +303,9 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case CatchPad: return "catchpad";
   case CatchSwitch: return "catchswitch";
 
+  // Standard unary operators...
+  case FNeg: return "fneg";
+
   // Standard binary operators...
   case Add: return "add";
   case FAdd: return "fadd";
@@ -592,7 +595,15 @@ bool Instruction::mayThrow() const {
 
 bool Instruction::isSafeToRemove() const {
   return (!isa<CallInst>(this) || !this->mayHaveSideEffects()) &&
-         !isa<TerminatorInst>(this);
+         !this->isTerminator();
+}
+
+bool Instruction::isLifetimeStartOrEnd() const {
+  auto II = dyn_cast<IntrinsicInst>(this);
+  if (!II)
+    return false;
+  Intrinsic::ID ID = II->getIntrinsicID();
+  return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
 }
 
 const Instruction *Instruction::getNextNonDebugInstruction() const {
@@ -602,6 +613,13 @@ const Instruction *Instruction::getNextNonDebugInstruction() const {
   return nullptr;
 }
 
+const Instruction *Instruction::getPrevNonDebugInstruction() const {
+  for (const Instruction *I = getPrevNode(); I; I = I->getPrevNode())
+    if (!isa<DbgInfoIntrinsic>(I))
+      return I;
+  return nullptr;
+}
+
 bool Instruction::isAssociative() const {
   unsigned Opcode = getOpcode();
   if (isAssociative(Opcode))
@@ -617,6 +635,42 @@ bool Instruction::isAssociative() const {
   }
 }
 
+unsigned Instruction::getNumSuccessors() const {
+  switch (getOpcode()) {
+#define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
+  case Instruction::OPC:                                                       \
+    return static_cast<const CLASS *>(this)->getNumSuccessors();
+#include "llvm/IR/Instruction.def"
+  default:
+    break;
+  }
+  llvm_unreachable("not a terminator");
+}
+
+BasicBlock *Instruction::getSuccessor(unsigned idx) const {
+  switch (getOpcode()) {
+#define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
+  case Instruction::OPC:                                                       \
+    return static_cast<const CLASS *>(this)->getSuccessor(idx);
+#include "llvm/IR/Instruction.def"
+  default:
+    break;
+  }
+  llvm_unreachable("not a terminator");
+}
+
+void Instruction::setSuccessor(unsigned idx, BasicBlock *B) {
+  switch (getOpcode()) {
+#define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
+  case Instruction::OPC:                                                       \
+    return static_cast<CLASS *>(this)->setSuccessor(idx, B);
+#include "llvm/IR/Instruction.def"
+  default:
+    break;
+  }
+  llvm_unreachable("not a terminator");
+}
+
 Instruction *Instruction::cloneImpl() const {
   llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
 }
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index 32db918dab97..06b46724a87f 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -65,50 +66,7 @@ AllocaInst::getAllocationSizeInBits(const DataLayout &DL) const {
 //===----------------------------------------------------------------------===//
 
 User::op_iterator CallSite::getCallee() const {
-  Instruction *II(getInstruction());
-  return isCall()
-    ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
-    : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
-}
-
-//===----------------------------------------------------------------------===//
-//                            TerminatorInst Class
-//===----------------------------------------------------------------------===//
-
-unsigned TerminatorInst::getNumSuccessors() const {
-  switch (getOpcode()) {
-#define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
-  case Instruction::OPC:                                                       \
-    return static_cast<const CLASS *>(this)->getNumSuccessors();
-#include "llvm/IR/Instruction.def"
-  default:
-    break;
-  }
-  llvm_unreachable("not a terminator");
-}
-
-BasicBlock *TerminatorInst::getSuccessor(unsigned idx) const {
-  switch (getOpcode()) {
-#define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
-  case Instruction::OPC:                                                       \
-    return static_cast<const CLASS *>(this)->getSuccessor(idx);
-#include "llvm/IR/Instruction.def"
-  default:
-    break;
-  }
-  llvm_unreachable("not a terminator");
-}
-
-void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) {
-  switch (getOpcode()) {
-#define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
-  case Instruction::OPC:                                                       \
-    return static_cast<CLASS *>(this)->setSuccessor(idx, B);
-#include "llvm/IR/Instruction.def"
-  default:
-    break;
-  }
-  llvm_unreachable("not a terminator");
+  return cast<CallBase>(getInstruction())->op_end() - 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -294,6 +252,112 @@ void LandingPadInst::addClause(Constant *Val) {
 }
 
 //===----------------------------------------------------------------------===//
+//                        CallBase Implementation
+//===----------------------------------------------------------------------===//
+
+Function *CallBase::getCaller() { return getParent()->getParent(); }
+
+bool CallBase::isIndirectCall() const {
+  const Value *V = getCalledValue();
+  if (isa<Function>(V) || isa<Constant>(V))
+    return false;
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    if (CI->isInlineAsm())
+      return false;
+  return true;
+}
+
+Intrinsic::ID CallBase::getIntrinsicID() const {
+  if (auto *F = getCalledFunction())
+    return F->getIntrinsicID();
+  return Intrinsic::not_intrinsic;
+}
+
+bool CallBase::isReturnNonNull() const {
+  if (hasRetAttr(Attribute::NonNull))
+    return true;
+
+  if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
+           !NullPointerIsDefined(getCaller(),
+                                 getType()->getPointerAddressSpace()))
+    return true;
+
+  return false;
+}
+
+Value *CallBase::getReturnedArgOperand() const {
+  unsigned Index;
+
+  if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+    return getArgOperand(Index - AttributeList::FirstArgIndex);
+  if (const Function *F = getCalledFunction())
+    if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
+        Index)
+      return getArgOperand(Index - AttributeList::FirstArgIndex);
+
+  return nullptr;
+}
+
+bool CallBase::hasRetAttr(Attribute::AttrKind Kind) const {
+  if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+    return true;
+
+  // Look at the callee, if available.
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+  return false;
+}
+
+/// Determine whether the argument or parameter has the given attribute.
+bool CallBase::paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+  assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
+
+  if (Attrs.hasParamAttribute(ArgNo, Kind))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasParamAttribute(ArgNo, Kind);
+  return false;
+}
+
+bool CallBase::hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const {
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(AttributeList::FunctionIndex, Kind);
+  return false;
+}
+
+bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const {
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(AttributeList::FunctionIndex, Kind);
+  return false;
+}
+
+CallBase::op_iterator
+CallBase::populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
+                                     const unsigned BeginIndex) {
+  auto It = op_begin() + BeginIndex;
+  for (auto &B : Bundles)
+    It = std::copy(B.input_begin(), B.input_end(), It);
+
+  auto *ContextImpl = getContext().pImpl;
+  auto BI = Bundles.begin();
+  unsigned CurrentIndex = BeginIndex;
+
+  for (auto &BOI : bundle_op_infos()) {
+    assert(BI != Bundles.end() && "Incorrect allocation?");
+
+    BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag());
+    BOI.Begin = CurrentIndex;
+    BOI.End = CurrentIndex + BI->input_size();
+    CurrentIndex = BOI.End;
+    BI++;
+  }
+
+  assert(BI == Bundles.end() && "Incorrect allocation?");
+
+  return It;
+}
+
+//===----------------------------------------------------------------------===//
 //                        CallInst Implementation
 //===----------------------------------------------------------------------===//
 
@@ -302,7 +366,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
   this->FTy = FTy;
   assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 &&
          "NumOperands not set up?");
-  Op<-1>() = Func;
+  setCalledOperand(Func);
 
 #ifndef NDEBUG
   assert((Args.size() == FTy->getNumParams() ||
@@ -315,7 +379,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
            "Calling a function with a bad signature!");
 #endif
 
-  std::copy(Args.begin(), Args.end(), op_begin());
+  llvm::copy(Args, op_begin());
 
   auto It = populateBundleOperandInfos(Bundles, Args.size());
   (void)It;
@@ -324,43 +388,34 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
   setName(NameStr);
 }
 
-void CallInst::init(Value *Func, const Twine &NameStr) {
-  FTy =
-      cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+void CallInst::init(FunctionType *FTy, Value *Func, const Twine &NameStr) {
+  this->FTy = FTy;
   assert(getNumOperands() == 1 && "NumOperands not set up?");
-  Op<-1>() = Func;
+  setCalledOperand(Func);
 
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
 
   setName(NameStr);
 }
 
-CallInst::CallInst(Value *Func, const Twine &Name, Instruction *InsertBefore)
-    : CallBase<CallInst>(
-          cast<FunctionType>(
-              cast<PointerType>(Func->getType())->getElementType())
-              ->getReturnType(),
-          Instruction::Call,
-          OperandTraits<CallBase<CallInst>>::op_end(this) - 1, 1,
-          InsertBefore) {
-  init(Func, Name);
+CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name,
+                   Instruction *InsertBefore)
+    : CallBase(Ty->getReturnType(), Instruction::Call,
+               OperandTraits<CallBase>::op_end(this) - 1, 1, InsertBefore) {
+  init(Ty, Func, Name);
 }
 
-CallInst::CallInst(Value *Func, const Twine &Name, BasicBlock *InsertAtEnd)
-    : CallBase<CallInst>(
-          cast<FunctionType>(
-              cast<PointerType>(Func->getType())->getElementType())
-              ->getReturnType(),
-          Instruction::Call,
-          OperandTraits<CallBase<CallInst>>::op_end(this) - 1, 1, InsertAtEnd) {
-  init(Func, Name);
+CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name,
+                   BasicBlock *InsertAtEnd)
+    : CallBase(Ty->getReturnType(), Instruction::Call,
+               OperandTraits<CallBase>::op_end(this) - 1, 1, InsertAtEnd) {
+  init(Ty, Func, Name);
 }
 
 CallInst::CallInst(const CallInst &CI)
-    : CallBase<CallInst>(CI.Attrs, CI.FTy, CI.getType(), Instruction::Call,
-                         OperandTraits<CallBase<CallInst>>::op_end(this) -
-                             CI.getNumOperands(),
-                         CI.getNumOperands()) {
+    : CallBase(CI.Attrs, CI.FTy, CI.getType(), Instruction::Call,
+               OperandTraits<CallBase>::op_end(this) - CI.getNumOperands(),
+               CI.getNumOperands()) {
   setTailCallKind(CI.getTailCallKind());
   setCallingConv(CI.getCallingConv());
 
@@ -600,11 +655,12 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
                       const Twine &NameStr) {
   this->FTy = FTy;
 
-  assert(getNumOperands() == 3 + Args.size() + CountBundleInputs(Bundles) &&
+  assert((int)getNumOperands() ==
+             ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)) &&
          "NumOperands not set up?");
-  Op<-3>() = Fn;
-  Op<-2>() = IfNormal;
-  Op<-1>() = IfException;
+  setNormalDest(IfNormal);
+  setUnwindDest(IfException);
+  setCalledOperand(Fn);
 
 #ifndef NDEBUG
   assert(((Args.size() == FTy->getNumParams()) ||
@@ -617,7 +673,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
            "Invoking a function with a bad signature!");
 #endif
 
-  std::copy(Args.begin(), Args.end(), op_begin());
+  llvm::copy(Args, op_begin());
 
   auto It = populateBundleOperandInfos(Bundles, Args.size());
   (void)It;
@@ -627,10 +683,9 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
 }
 
 InvokeInst::InvokeInst(const InvokeInst &II)
-    : CallBase<InvokeInst>(II.Attrs, II.FTy, II.getType(), Instruction::Invoke,
-                           OperandTraits<CallBase<InvokeInst>>::op_end(this) -
-                               II.getNumOperands(),
-                           II.getNumOperands()) {
+    : CallBase(II.Attrs, II.FTy, II.getType(), Instruction::Invoke,
+               OperandTraits<CallBase>::op_end(this) - II.getNumOperands(),
+               II.getNumOperands()) {
   setCallingConv(II.getCallingConv());
   std::copy(II.op_begin(), II.op_end(), op_begin());
   std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(),
@@ -662,55 +717,53 @@ LandingPadInst *InvokeInst::getLandingPadInst() const {
 //===----------------------------------------------------------------------===//
 
 ReturnInst::ReturnInst(const ReturnInst &RI)
-  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this) -
-                     RI.getNumOperands(),
-                   RI.getNumOperands()) {
+    : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+                  OperandTraits<ReturnInst>::op_end(this) - RI.getNumOperands(),
+                  RI.getNumOperands()) {
   if (RI.getNumOperands())
     Op<0>() = RI.Op<0>();
   SubclassOptionalData = RI.SubclassOptionalData;
 }
 
 ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
-                   InsertBefore) {
+    : Instruction(Type::getVoidTy(C), Instruction::Ret,
+                  OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                  InsertBefore) {
   if (retVal)
     Op<0>() = retVal;
 }
 
 ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
-                   InsertAtEnd) {
+    : Instruction(Type::getVoidTy(C), Instruction::Ret,
+                  OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                  InsertAtEnd) {
   if (retVal)
     Op<0>() = retVal;
 }
 
 ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
-}
+    : Instruction(Type::getVoidTy(Context), Instruction::Ret,
+                  OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {}
 
 //===----------------------------------------------------------------------===//
 //                        ResumeInst Implementation
 //===----------------------------------------------------------------------===//
 
 ResumeInst::ResumeInst(const ResumeInst &RI)
-  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume,
-                   OperandTraits<ResumeInst>::op_begin(this), 1) {
+    : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Resume,
+                  OperandTraits<ResumeInst>::op_begin(this), 1) {
   Op<0>() = RI.Op<0>();
 }
 
 ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
-                   OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
+    : Instruction(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+                  OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
   Op<0>() = Exn;
 }
 
 ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
-                   OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
+    : Instruction(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+                  OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
   Op<0>() = Exn;
 }
 
@@ -719,10 +772,10 @@ ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
 //===----------------------------------------------------------------------===//
 
 CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI)
-    : TerminatorInst(CRI.getType(), Instruction::CleanupRet,
-                     OperandTraits<CleanupReturnInst>::op_end(this) -
-                         CRI.getNumOperands(),
-                     CRI.getNumOperands()) {
+    : Instruction(CRI.getType(), Instruction::CleanupRet,
+                  OperandTraits<CleanupReturnInst>::op_end(this) -
+                      CRI.getNumOperands(),
+                  CRI.getNumOperands()) {
   setInstructionSubclassData(CRI.getSubclassDataFromInstruction());
   Op<0>() = CRI.Op<0>();
   if (CRI.hasUnwindDest())
@@ -740,19 +793,19 @@ void CleanupReturnInst::init(Value *CleanupPad, BasicBlock *UnwindBB) {
 
 CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
                                      unsigned Values, Instruction *InsertBefore)
-    : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
-                     Instruction::CleanupRet,
-                     OperandTraits<CleanupReturnInst>::op_end(this) - Values,
-                     Values, InsertBefore) {
+    : Instruction(Type::getVoidTy(CleanupPad->getContext()),
+                  Instruction::CleanupRet,
+                  OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+                  Values, InsertBefore) {
   init(CleanupPad, UnwindBB);
 }
 
 CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
                                      unsigned Values, BasicBlock *InsertAtEnd)
-    : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
-                     Instruction::CleanupRet,
-                     OperandTraits<CleanupReturnInst>::op_end(this) - Values,
-                     Values, InsertAtEnd) {
+    : Instruction(Type::getVoidTy(CleanupPad->getContext()),
+                  Instruction::CleanupRet,
+                  OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+                  Values, InsertAtEnd) {
   init(CleanupPad, UnwindBB);
 }
 
@@ -765,25 +818,25 @@ void CatchReturnInst::init(Value *CatchPad, BasicBlock *BB) {
 }
 
 CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI)
-    : TerminatorInst(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet,
-                     OperandTraits<CatchReturnInst>::op_begin(this), 2) {
+    : Instruction(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet,
+                  OperandTraits<CatchReturnInst>::op_begin(this), 2) {
   Op<0>() = CRI.Op<0>();
   Op<1>() = CRI.Op<1>();
 }
 
 CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
                                  Instruction *InsertBefore)
-    : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
-                     OperandTraits<CatchReturnInst>::op_begin(this), 2,
-                     InsertBefore) {
+    : Instruction(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+                  OperandTraits<CatchReturnInst>::op_begin(this), 2,
+                  InsertBefore) {
   init(CatchPad, BB);
 }
 
 CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
                                  BasicBlock *InsertAtEnd)
-    : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
-                     OperandTraits<CatchReturnInst>::op_begin(this), 2,
-                     InsertAtEnd) {
+    : Instruction(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+                  OperandTraits<CatchReturnInst>::op_begin(this), 2,
+                  InsertAtEnd) {
   init(CatchPad, BB);
 }
 
@@ -795,8 +848,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
                                  unsigned NumReservedValues,
                                  const Twine &NameStr,
                                  Instruction *InsertBefore)
-    : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
-                     InsertBefore) {
+    : Instruction(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+                  InsertBefore) {
   if (UnwindDest)
     ++NumReservedValues;
   init(ParentPad, UnwindDest, NumReservedValues + 1);
@@ -806,8 +859,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
 CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
                                  unsigned NumReservedValues,
                                  const Twine &NameStr, BasicBlock *InsertAtEnd)
-    : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
-                     InsertAtEnd) {
+    : Instruction(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+                  InsertAtEnd) {
   if (UnwindDest)
     ++NumReservedValues;
   init(ParentPad, UnwindDest, NumReservedValues + 1);
@@ -815,8 +868,8 @@ CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
 }
 
 CatchSwitchInst::CatchSwitchInst(const CatchSwitchInst &CSI)
-    : TerminatorInst(CSI.getType(), Instruction::CatchSwitch, nullptr,
-                     CSI.getNumOperands()) {
+    : Instruction(CSI.getType(), Instruction::CatchSwitch, nullptr,
+                  CSI.getNumOperands()) {
   init(CSI.getParentPad(), CSI.getUnwindDest(), CSI.getNumOperands());
   setNumHungOffUseOperands(ReservedSpace);
   Use *OL = getOperandList();
@@ -876,7 +929,7 @@ void CatchSwitchInst::removeHandler(handler_iterator HI) {
 void FuncletPadInst::init(Value *ParentPad, ArrayRef<Value *> Args,
                           const Twine &NameStr) {
   assert(getNumOperands() == 1 + Args.size() && "NumOperands not set up?");
-  std::copy(Args.begin(), Args.end(), op_begin());
+  llvm::copy(Args, op_begin());
   setParentPad(ParentPad);
   setName(NameStr);
 }
@@ -914,13 +967,11 @@ FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
 
 UnreachableInst::UnreachableInst(LLVMContext &Context,
                                  Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
-                   nullptr, 0, InsertBefore) {
-}
+    : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, nullptr,
+                  0, InsertBefore) {}
 UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
-                   nullptr, 0, InsertAtEnd) {
-}
+    : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, nullptr,
+                  0, InsertAtEnd) {}
 
 //===----------------------------------------------------------------------===//
 //                        BranchInst Implementation
@@ -933,18 +984,18 @@ void BranchInst::AssertOK() {
 }
 
 BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 1,
-                   1, InsertBefore) {
+    : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                  OperandTraits<BranchInst>::op_end(this) - 1, 1,
+                  InsertBefore) {
   assert(IfTrue && "Branch destination may not be null!");
   Op<-1>() = IfTrue;
 }
 
 BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
                        Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 3,
-                   3, InsertBefore) {
+    : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                  OperandTraits<BranchInst>::op_end(this) - 3, 3,
+                  InsertBefore) {
   Op<-1>() = IfTrue;
   Op<-2>() = IfFalse;
   Op<-3>() = Cond;
@@ -954,18 +1005,16 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
 }
 
 BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 1,
-                   1, InsertAtEnd) {
+    : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                  OperandTraits<BranchInst>::op_end(this) - 1, 1, InsertAtEnd) {
   assert(IfTrue && "Branch destination may not be null!");
   Op<-1>() = IfTrue;
 }
 
 BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
-           BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 3,
-                   3, InsertAtEnd) {
+                       BasicBlock *InsertAtEnd)
+    : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                  OperandTraits<BranchInst>::op_end(this) - 3, 3, InsertAtEnd) {
   Op<-1>() = IfTrue;
   Op<-2>() = IfFalse;
   Op<-3>() = Cond;
@@ -974,10 +1023,10 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
 #endif
 }
 
-BranchInst::BranchInst(const BranchInst &BI) :
-  TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
-                 OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
-                 BI.getNumOperands()) {
+BranchInst::BranchInst(const BranchInst &BI)
+    : Instruction(Type::getVoidTy(BI.getContext()), Instruction::Br,
+                  OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+                  BI.getNumOperands()) {
   Op<-1>() = BI.Op<-1>();
   if (BI.getNumOperands() != 1) {
     assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
@@ -1089,28 +1138,30 @@ void LoadInst::AssertOK() {
          "Alignment required for atomic load");
 }
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
-    : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertBef) {}
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name,
+                   Instruction *InsertBef)
+    : LoadInst(Ty, Ptr, Name, /*isVolatile=*/false, InsertBef) {}
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
-    : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertAE) {}
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name,
+                   BasicBlock *InsertAE)
+    : LoadInst(Ty, Ptr, Name, /*isVolatile=*/false, InsertAE) {}
 
 LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    Instruction *InsertBef)
     : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {}
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    BasicBlock *InsertAE)
-    : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
+    : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
 
 LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, Instruction *InsertBef)
     : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
                SyncScope::System, InsertBef) {}
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, BasicBlock *InsertAE)
-    : LoadInst(Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
+    : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic,
                SyncScope::System, InsertAE) {}
 
 LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
@@ -1125,12 +1176,11 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
-                   unsigned Align, AtomicOrdering Order,
-                   SyncScope::ID SSID,
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
+                   unsigned Align, AtomicOrdering Order, SyncScope::ID SSID,
                    BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
+    : UnaryInstruction(Ty, Load, Ptr, InsertAE) {
+  assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
   setVolatile(isVolatile);
   setAlignment(Align);
   setAtomic(Order, SSID);
@@ -1138,48 +1188,6 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
   setName(Name);
 }
 
-LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(AtomicOrdering::NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(AtomicOrdering::NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Type *Ty, Value *Ptr, const char *Name, bool isVolatile,
-                   Instruction *InsertBef)
-    : UnaryInstruction(Ty, Load, Ptr, InsertBef) {
-  assert(Ty == cast<PointerType>(Ptr->getType())->getElementType());
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(AtomicOrdering::NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
-                   BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(AtomicOrdering::NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
 void LoadInst::setAlignment(unsigned Align) {
   assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
   assert(Align <= MaximumAlignment &&
@@ -1376,6 +1384,37 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
   Init(Operation, Ptr, Val, Ordering, SSID);
 }
 
+StringRef AtomicRMWInst::getOperationName(BinOp Op) {
+  switch (Op) {
+  case AtomicRMWInst::Xchg:
+    return "xchg";
+  case AtomicRMWInst::Add:
+    return "add";
+  case AtomicRMWInst::Sub:
+    return "sub";
+  case AtomicRMWInst::And:
+    return "and";
+  case AtomicRMWInst::Nand:
+    return "nand";
+  case AtomicRMWInst::Or:
+    return "or";
+  case AtomicRMWInst::Xor:
+    return "xor";
+  case AtomicRMWInst::Max:
+    return "max";
+  case AtomicRMWInst::Min:
+    return "min";
+  case AtomicRMWInst::UMax:
+    return "umax";
+  case AtomicRMWInst::UMin:
+    return "umin";
+  case AtomicRMWInst::BAD_BINOP:
+    return "<invalid operation>";
+  }
+
+  llvm_unreachable("invalid atomicrmw operation");
+}
+
 //===----------------------------------------------------------------------===//
 //                       FenceInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1405,7 +1444,7 @@ void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
   assert(getNumOperands() == 1 + IdxList.size() &&
          "NumOperands not initialized?");
   Op<0>() = Ptr;
-  std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
+  llvm::copy(IdxList, op_begin() + 1);
   setName(Name);
 }
 
@@ -1700,17 +1739,17 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask,
   }
 }
 
-bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
+static bool isSingleSourceMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
   assert(!Mask.empty() && "Shuffle mask must contain elements");
   bool UsesLHS = false;
   bool UsesRHS = false;
-  for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
+  for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
     if (Mask[i] == -1)
       continue;
-    assert(Mask[i] >= 0 && Mask[i] < (NumElts * 2) &&
+    assert(Mask[i] >= 0 && Mask[i] < (NumOpElts * 2) &&
            "Out-of-bounds shuffle mask element");
-    UsesLHS |= (Mask[i] < NumElts);
-    UsesRHS |= (Mask[i] >= NumElts);
+    UsesLHS |= (Mask[i] < NumOpElts);
+    UsesRHS |= (Mask[i] >= NumOpElts);
     if (UsesLHS && UsesRHS)
       return false;
   }
@@ -1718,18 +1757,30 @@ bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
   return true;
 }
 
-bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask) {
-  if (!isSingleSourceMask(Mask))
+bool ShuffleVectorInst::isSingleSourceMask(ArrayRef<int> Mask) {
+  // We don't have vector operand size information, so assume operands are the
+  // same size as the mask.
+  return isSingleSourceMaskImpl(Mask, Mask.size());
+}
+
+static bool isIdentityMaskImpl(ArrayRef<int> Mask, int NumOpElts) {
+  if (!isSingleSourceMaskImpl(Mask, NumOpElts))
     return false;
-  for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
+  for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
     if (Mask[i] == -1)
       continue;
-    if (Mask[i] != i && Mask[i] != (NumElts + i))
+    if (Mask[i] != i && Mask[i] != (NumOpElts + i))
       return false;
   }
   return true;
 }
 
+bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask) {
+  // We don't have vector operand size information, so assume operands are the
+  // same size as the mask.
+  return isIdentityMaskImpl(Mask, Mask.size());
+}
+
 bool ShuffleVectorInst::isReverseMask(ArrayRef<int> Mask) {
   if (!isSingleSourceMask(Mask))
     return false;
@@ -1801,6 +1852,79 @@ bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) {
   return true;
 }
 
+bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
+                                               int NumSrcElts, int &Index) {
+  // Must extract from a single source.
+  if (!isSingleSourceMaskImpl(Mask, NumSrcElts))
+    return false;
+
+  // Must be smaller (else this is an Identity shuffle).
+  if (NumSrcElts <= (int)Mask.size())
+    return false;
+
+  // Find start of extraction, accounting that we may start with an UNDEF.
+  int SubIndex = -1;
+  for (int i = 0, e = Mask.size(); i != e; ++i) {
+    int M = Mask[i];
+    if (M < 0)
+      continue;
+    int Offset = (M % NumSrcElts) - i;
+    if (0 <= SubIndex && SubIndex != Offset)
+      return false;
+    SubIndex = Offset;
+  }
+
+  if (0 <= SubIndex) {
+    Index = SubIndex;
+    return true;
+  }
+  return false;
+}
+
+bool ShuffleVectorInst::isIdentityWithPadding() const {
+  int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+  int NumMaskElts = getType()->getVectorNumElements();
+  if (NumMaskElts <= NumOpElts)
+    return false;
+
+  // The first part of the mask must choose elements from exactly 1 source op.
+  SmallVector<int, 16> Mask = getShuffleMask();
+  if (!isIdentityMaskImpl(Mask, NumOpElts))
+    return false;
+
+  // All extending must be with undef elements.
+  for (int i = NumOpElts; i < NumMaskElts; ++i)
+    if (Mask[i] != -1)
+      return false;
+
+  return true;
+}
+
+bool ShuffleVectorInst::isIdentityWithExtract() const {
+  int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+  int NumMaskElts = getType()->getVectorNumElements();
+  if (NumMaskElts >= NumOpElts)
+    return false;
+
+  return isIdentityMaskImpl(getShuffleMask(), NumOpElts);
+}
+
+bool ShuffleVectorInst::isConcat() const {
+  // Vector concatenation is differentiated from identity with padding.
+  if (isa<UndefValue>(Op<0>()) || isa<UndefValue>(Op<1>()))
+    return false;
+
+  int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+  int NumMaskElts = getType()->getVectorNumElements();
+  if (NumMaskElts != NumOpElts * 2)
+    return false;
+
+  // Use the mask length rather than the operands' vector lengths here. We
+  // already know that the shuffle returns a vector twice as long as the inputs,
+  // and neither of the inputs are undef vectors. If the mask picks consecutive
+  // elements from both inputs, then this is a concatenation of the inputs.
+  return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
+}
 
 //===----------------------------------------------------------------------===//
 //                             InsertValueInst Class
@@ -1887,6 +2011,59 @@ Type *ExtractValueInst::getIndexedType(Type *Agg,
 }
 
 //===----------------------------------------------------------------------===//
+//                             UnaryOperator Class
+//===----------------------------------------------------------------------===//
+
+UnaryOperator::UnaryOperator(UnaryOps iType, Value *S,
+                             Type *Ty, const Twine &Name,
+                             Instruction *InsertBefore)
+  : UnaryInstruction(Ty, iType, S, InsertBefore) {
+  Op<0>() = S;
+  setName(Name);
+  AssertOK();
+}
+
+UnaryOperator::UnaryOperator(UnaryOps iType, Value *S,
+                             Type *Ty, const Twine &Name,
+                             BasicBlock *InsertAtEnd)
+  : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
+  Op<0>() = S;
+  setName(Name);
+  AssertOK();
+}
+
+UnaryOperator *UnaryOperator::Create(UnaryOps Op, Value *S,
+                                     const Twine &Name,
+                                     Instruction *InsertBefore) {
+  return new UnaryOperator(Op, S, S->getType(), Name, InsertBefore);
+}
+
+UnaryOperator *UnaryOperator::Create(UnaryOps Op, Value *S,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAtEnd) {
+  UnaryOperator *Res = Create(Op, S, Name);
+  InsertAtEnd->getInstList().push_back(Res);
+  return Res;
+}
+
+void UnaryOperator::AssertOK() {
+  Value *LHS = getOperand(0);
+  (void)LHS; // Silence warnings.
+#ifndef NDEBUG
+  switch (getOpcode()) {
+  case FNeg:
+    assert(getType() == LHS->getType() &&
+           "Unary operation should return same type as operand!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  default: llvm_unreachable("Invalid opcode provided");
+  }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
 //                             BinaryOperator Class
 //===----------------------------------------------------------------------===//
 
@@ -2068,71 +2245,6 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
                             Op->getType(), Name, InsertAtEnd);
 }
 
-// isConstantAllOnes - Helper function for several functions below
-static inline bool isConstantAllOnes(const Value *V) {
-  if (const Constant *C = dyn_cast<Constant>(V))
-    return C->isAllOnesValue();
-  return false;
-}
-
-bool BinaryOperator::isNeg(const Value *V) {
-  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
-    if (Bop->getOpcode() == Instruction::Sub)
-      if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0)))
-        return C->isNegativeZeroValue();
-  return false;
-}
-
-bool BinaryOperator::isFNeg(const Value *V, bool IgnoreZeroSign) {
-  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
-    if (Bop->getOpcode() == Instruction::FSub)
-      if (Constant *C = dyn_cast<Constant>(Bop->getOperand(0))) {
-        if (!IgnoreZeroSign)
-          IgnoreZeroSign = cast<Instruction>(V)->hasNoSignedZeros();
-        return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue();
-      }
-  return false;
-}
-
-bool BinaryOperator::isNot(const Value *V) {
-  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
-    return (Bop->getOpcode() == Instruction::Xor &&
-            (isConstantAllOnes(Bop->getOperand(1)) ||
-             isConstantAllOnes(Bop->getOperand(0))));
-  return false;
-}
-
-Value *BinaryOperator::getNegArgument(Value *BinOp) {
-  return cast<BinaryOperator>(BinOp)->getOperand(1);
-}
-
-const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
-  return getNegArgument(const_cast<Value*>(BinOp));
-}
-
-Value *BinaryOperator::getFNegArgument(Value *BinOp) {
-  return cast<BinaryOperator>(BinOp)->getOperand(1);
-}
-
-const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
-  return getFNegArgument(const_cast<Value*>(BinOp));
-}
-
-Value *BinaryOperator::getNotArgument(Value *BinOp) {
-  assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
-  BinaryOperator *BO = cast<BinaryOperator>(BinOp);
-  Value *Op0 = BO->getOperand(0);
-  Value *Op1 = BO->getOperand(1);
-  if (isConstantAllOnes(Op0)) return Op1;
-
-  assert(isConstantAllOnes(Op1));
-  return Op0;
-}
-
-const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
-  return getNotArgument(const_cast<Value*>(BinOp));
-}
-
 // Exchange the two operands to this instruction. This instruction is safe to
 // use on any binary instruction and does not modify the semantics of the
 // instruction. If the instruction is order-dependent (SetLT f.e.), the opcode
@@ -2978,12 +3090,14 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
       return false;
 
     // A vector of pointers must have the same number of elements.
-    if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy)) {
-      if (VectorType *DstVecTy = dyn_cast<VectorType>(DstTy))
-        return (SrcVecTy->getNumElements() == DstVecTy->getNumElements());
-
-      return false;
-    }
+    VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy);
+    VectorType *DstVecTy = dyn_cast<VectorType>(DstTy);
+    if (SrcVecTy && DstVecTy)
+      return (SrcVecTy->getNumElements() == DstVecTy->getNumElements());
+    if (SrcVecTy)
+      return SrcVecTy->getNumElements() == 1;
+    if (DstVecTy)
+      return DstVecTy->getNumElements() == 1;
 
     return true;
   }
@@ -3171,15 +3285,18 @@ AddrSpaceCastInst::AddrSpaceCastInst(
 //===----------------------------------------------------------------------===//
 
 CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
-                 Value *RHS, const Twine &Name, Instruction *InsertBefore)
+                 Value *RHS, const Twine &Name, Instruction *InsertBefore,
+                 Instruction *FlagsSource)
   : Instruction(ty, op,
                 OperandTraits<CmpInst>::op_begin(this),
                 OperandTraits<CmpInst>::operands(this),
                 InsertBefore) {
-    Op<0>() = LHS;
-    Op<1>() = RHS;
+  Op<0>() = LHS;
+  Op<1>() = RHS;
   setPredicate((Predicate)predicate);
   setName(Name);
+  if (FlagsSource)
+    copyIRFlags(FlagsSource);
 }
 
 CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
@@ -3518,8 +3635,8 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
 /// constructor can also autoinsert before another instruction.
 SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
                        Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
-                   nullptr, 0, InsertBefore) {
+    : Instruction(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                  nullptr, 0, InsertBefore) {
   init(Value, Default, 2+NumCases*2);
 }
 
@@ -3529,13 +3646,13 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
 /// constructor also autoinserts at the end of the specified BasicBlock.
 SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
                        BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
-                   nullptr, 0, InsertAtEnd) {
+    : Instruction(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                  nullptr, 0, InsertAtEnd) {
   init(Value, Default, 2+NumCases*2);
 }
 
 SwitchInst::SwitchInst(const SwitchInst &SI)
-  : TerminatorInst(SI.getType(), Instruction::Switch, nullptr, 0) {
+    : Instruction(SI.getType(), Instruction::Switch, nullptr, 0) {
   init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
   setNumHungOffUseOperands(SI.getNumOperands());
   Use *OL = getOperandList();
@@ -3547,7 +3664,6 @@ SwitchInst::SwitchInst(const SwitchInst &SI)
   SubclassOptionalData = SI.SubclassOptionalData;
 }
 
-
 /// addCase - Add an entry to the switch instruction...
 ///
 void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
@@ -3626,21 +3742,21 @@ void IndirectBrInst::growOperands() {
 
 IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
                                Instruction *InsertBefore)
-: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
-                 nullptr, 0, InsertBefore) {
+    : Instruction(Type::getVoidTy(Address->getContext()),
+                  Instruction::IndirectBr, nullptr, 0, InsertBefore) {
   init(Address, NumCases);
 }
 
 IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
                                BasicBlock *InsertAtEnd)
-: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
-                 nullptr, 0, InsertAtEnd) {
+    : Instruction(Type::getVoidTy(Address->getContext()),
+                  Instruction::IndirectBr, nullptr, 0, InsertAtEnd) {
   init(Address, NumCases);
 }
 
 IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
-    : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
-                     nullptr, IBI.getNumOperands()) {
+    : Instruction(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+                  nullptr, IBI.getNumOperands()) {
   allocHungoffUses(IBI.getNumOperands());
   Use *OL = getOperandList();
   const Use *InOL = IBI.getOperandList();
@@ -3688,6 +3804,10 @@ GetElementPtrInst *GetElementPtrInst::cloneImpl() const {
   return new (getNumOperands()) GetElementPtrInst(*this);
 }
 
+UnaryOperator *UnaryOperator::cloneImpl() const {
+  return Create(getOpcode(), Op<0>());
+}
+
 BinaryOperator *BinaryOperator::cloneImpl() const {
   return Create(getOpcode(), Op<0>(), Op<1>());
 }
@@ -3718,7 +3838,7 @@ AllocaInst *AllocaInst::cloneImpl() const {
 }
 
 LoadInst *LoadInst::cloneImpl() const {
-  return new LoadInst(getOperand(0), Twine(), isVolatile(),
+  return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(),
                       getAlignment(), getOrdering(), getSyncScopeID());
 }
 
diff --git a/contrib/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm/lib/IR/IntrinsicInst.cpp
index 787889934d82..df3a38ac147f 100644
--- a/contrib/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm/lib/IR/IntrinsicInst.cpp
@@ -32,10 +32,11 @@
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
-/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+/// DbgVariableIntrinsic - This is the common base class for debug info
+/// intrinsics for variables.
 ///
 
-Value *DbgInfoIntrinsic::getVariableLocation(bool AllowNullOp) const {
+Value *DbgVariableIntrinsic::getVariableLocation(bool AllowNullOp) const {
   Value *Op = getArgOperand(0);
   if (AllowNullOp && !Op)
     return nullptr;
@@ -45,14 +46,11 @@ Value *DbgInfoIntrinsic::getVariableLocation(bool AllowNullOp) const {
     return V->getValue();
 
   // When the value goes to null, it gets replaced by an empty MDNode.
-  assert((isa<DbgLabelInst>(this)
-          || !cast<MDNode>(MD)->getNumOperands())
-	 && "DbgValueInst Expected an empty MDNode");
-
+  assert(!cast<MDNode>(MD)->getNumOperands() && "Expected an empty MDNode");
   return nullptr;
 }
 
-Optional<uint64_t> DbgInfoIntrinsic::getFragmentSizeInBits() const {
+Optional<uint64_t> DbgVariableIntrinsic::getFragmentSizeInBits() const {
   if (auto Fragment = getExpression()->getFragmentInfo())
     return Fragment->SizeInBits;
   return getVariable()->getSizeInBits();
@@ -154,6 +152,10 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
     case Intrinsic::experimental_constrained_log2:
     case Intrinsic::experimental_constrained_rint:
     case Intrinsic::experimental_constrained_nearbyint:
+    case Intrinsic::experimental_constrained_ceil:
+    case Intrinsic::experimental_constrained_floor:
+    case Intrinsic::experimental_constrained_round:
+    case Intrinsic::experimental_constrained_trunc:
       return true;
   }
 }
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index 62d9e387162e..944d8265151d 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -61,6 +61,7 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
     {MD_associated, "associated"},
     {MD_callees, "callees"},
     {MD_irr_loop, "irr_loop"},
+    {MD_access_group, "llvm.access.group"},
   };
 
   for (auto &MDKind : MDKinds) {
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index 3b2e1e81b1c1..2d120869860a 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -280,21 +280,24 @@ template <> struct MDNodeKeyImpl<DILocation> {
   unsigned Column;
   Metadata *Scope;
   Metadata *InlinedAt;
+  bool ImplicitCode;
 
   MDNodeKeyImpl(unsigned Line, unsigned Column, Metadata *Scope,
-                Metadata *InlinedAt)
-      : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {}
+                Metadata *InlinedAt, bool ImplicitCode)
+      : Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt),
+        ImplicitCode(ImplicitCode) {}
   MDNodeKeyImpl(const DILocation *L)
       : Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()),
-        InlinedAt(L->getRawInlinedAt()) {}
+        InlinedAt(L->getRawInlinedAt()), ImplicitCode(L->isImplicitCode()) {}
 
   bool isKeyOf(const DILocation *RHS) const {
     return Line == RHS->getLine() && Column == RHS->getColumn() &&
-           Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt();
+           Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt() &&
+           ImplicitCode == RHS->isImplicitCode();
   }
 
   unsigned getHashValue() const {
-    return hash_combine(Line, Column, Scope, InlinedAt);
+    return hash_combine(Line, Column, Scope, InlinedAt, ImplicitCode);
   }
 };
 
@@ -376,20 +379,22 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
   uint64_t SizeInBits;
   uint32_t AlignInBits;
   unsigned Encoding;
+  unsigned Flags;
 
   MDNodeKeyImpl(unsigned Tag, MDString *Name, uint64_t SizeInBits,
-                uint32_t AlignInBits, unsigned Encoding)
+                uint32_t AlignInBits, unsigned Encoding, unsigned Flags)
       : Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
-        Encoding(Encoding) {}
+        Encoding(Encoding), Flags(Flags) {}
   MDNodeKeyImpl(const DIBasicType *N)
       : Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()),
-        AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()) {}
+        AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), Flags(N->getFlags()) {}
 
   bool isKeyOf(const DIBasicType *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getRawName() &&
            SizeInBits == RHS->getSizeInBits() &&
            AlignInBits == RHS->getAlignInBits() &&
-           Encoding == RHS->getEncoding();
+           Encoding == RHS->getEncoding() &&
+           Flags == RHS->getFlags();
   }
 
   unsigned getHashValue() const {
@@ -607,15 +612,12 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
   Metadata *File;
   unsigned Line;
   Metadata *Type;
-  bool IsLocalToUnit;
-  bool IsDefinition;
   unsigned ScopeLine;
   Metadata *ContainingType;
-  unsigned Virtuality;
   unsigned VirtualIndex;
   int ThisAdjustment;
   unsigned Flags;
-  bool IsOptimized;
+  unsigned SPFlags;
   Metadata *Unit;
   Metadata *TemplateParams;
   Metadata *Declaration;
@@ -624,45 +626,39 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
 
   MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
                 Metadata *File, unsigned Line, Metadata *Type,
-                bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
-                Metadata *ContainingType, unsigned Virtuality,
+                unsigned ScopeLine, Metadata *ContainingType,
                 unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
-                bool IsOptimized, Metadata *Unit, Metadata *TemplateParams,
+                unsigned SPFlags, Metadata *Unit, Metadata *TemplateParams,
                 Metadata *Declaration, Metadata *RetainedNodes,
                 Metadata *ThrownTypes)
       : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
-        Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
-        IsDefinition(IsDefinition), ScopeLine(ScopeLine),
-        ContainingType(ContainingType), Virtuality(Virtuality),
-        VirtualIndex(VirtualIndex), ThisAdjustment(ThisAdjustment),
-        Flags(Flags), IsOptimized(IsOptimized), Unit(Unit),
-        TemplateParams(TemplateParams), Declaration(Declaration),
+        Line(Line), Type(Type), ScopeLine(ScopeLine),
+        ContainingType(ContainingType), VirtualIndex(VirtualIndex),
+        ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags),
+        Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration),
         RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes) {}
   MDNodeKeyImpl(const DISubprogram *N)
       : Scope(N->getRawScope()), Name(N->getRawName()),
         LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
-        Line(N->getLine()), Type(N->getRawType()),
-        IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
-        ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
-        Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
+        Line(N->getLine()), Type(N->getRawType()), ScopeLine(N->getScopeLine()),
+        ContainingType(N->getRawContainingType()),
+        VirtualIndex(N->getVirtualIndex()),
         ThisAdjustment(N->getThisAdjustment()), Flags(N->getFlags()),
-        IsOptimized(N->isOptimized()), Unit(N->getRawUnit()),
+        SPFlags(N->getSPFlags()), Unit(N->getRawUnit()),
         TemplateParams(N->getRawTemplateParams()),
-        Declaration(N->getRawDeclaration()), RetainedNodes(N->getRawRetainedNodes()),
+        Declaration(N->getRawDeclaration()),
+        RetainedNodes(N->getRawRetainedNodes()),
         ThrownTypes(N->getRawThrownTypes()) {}
 
   bool isKeyOf(const DISubprogram *RHS) const {
     return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
            LinkageName == RHS->getRawLinkageName() &&
            File == RHS->getRawFile() && Line == RHS->getLine() &&
-           Type == RHS->getRawType() && IsLocalToUnit == RHS->isLocalToUnit() &&
-           IsDefinition == RHS->isDefinition() &&
-           ScopeLine == RHS->getScopeLine() &&
+           Type == RHS->getRawType() && ScopeLine == RHS->getScopeLine() &&
            ContainingType == RHS->getRawContainingType() &&
-           Virtuality == RHS->getVirtuality() &&
            VirtualIndex == RHS->getVirtualIndex() &&
            ThisAdjustment == RHS->getThisAdjustment() &&
-           Flags == RHS->getFlags() && IsOptimized == RHS->isOptimized() &&
+           Flags == RHS->getFlags() && SPFlags == RHS->getSPFlags() &&
            Unit == RHS->getUnit() &&
            TemplateParams == RHS->getRawTemplateParams() &&
            Declaration == RHS->getRawDeclaration() &&
@@ -670,11 +666,13 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
            ThrownTypes == RHS->getRawThrownTypes();
   }
 
+  bool isDefinition() const { return SPFlags & DISubprogram::SPFlagDefinition; }
+
   unsigned getHashValue() const {
     // If this is a declaration inside an ODR type, only hash the type and the
     // name.  Otherwise the hash will be stronger than
     // MDNodeSubsetEqualImpl::isDeclarationOfODRMember().
-    if (!IsDefinition && LinkageName)
+    if (!isDefinition() && LinkageName)
       if (auto *CT = dyn_cast_or_null<DICompositeType>(Scope))
         if (CT->getRawIdentifier())
           return hash_combine(LinkageName, Scope);
@@ -691,7 +689,7 @@ template <> struct MDNodeSubsetEqualImpl<DISubprogram> {
   using KeyTy = MDNodeKeyImpl<DISubprogram>;
 
   static bool isSubsetEqual(const KeyTy &LHS, const DISubprogram *RHS) {
-    return isDeclarationOfODRMember(LHS.IsDefinition, LHS.Scope,
+    return isDeclarationOfODRMember(LHS.isDefinition(), LHS.Scope,
                                     LHS.LinkageName, LHS.TemplateParams, RHS);
   }
 
@@ -865,23 +863,26 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
   bool IsLocalToUnit;
   bool IsDefinition;
   Metadata *StaticDataMemberDeclaration;
+  Metadata *TemplateParams;
   uint32_t AlignInBits;
 
   MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
                 Metadata *File, unsigned Line, Metadata *Type,
                 bool IsLocalToUnit, bool IsDefinition,
-                Metadata *StaticDataMemberDeclaration, uint32_t AlignInBits)
+                Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+                uint32_t AlignInBits)
       : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
         Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
         IsDefinition(IsDefinition),
         StaticDataMemberDeclaration(StaticDataMemberDeclaration),
-        AlignInBits(AlignInBits) {}
+        TemplateParams(TemplateParams), AlignInBits(AlignInBits) {}
   MDNodeKeyImpl(const DIGlobalVariable *N)
       : Scope(N->getRawScope()), Name(N->getRawName()),
         LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
         Line(N->getLine()), Type(N->getRawType()),
         IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
         StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()),
+        TemplateParams(N->getRawTemplateParams()),
         AlignInBits(N->getAlignInBits()) {}
 
   bool isKeyOf(const DIGlobalVariable *RHS) const {
@@ -892,6 +893,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
            IsDefinition == RHS->isDefinition() &&
            StaticDataMemberDeclaration ==
                RHS->getRawStaticDataMemberDeclaration() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
            AlignInBits == RHS->getAlignInBits();
   }
 
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index 54d602d926e5..01d14f17bba5 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/LegacyPassManagers.h"
 #include "llvm/IR/LegacyPassNameParser.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -99,27 +100,31 @@ static cl::list<std::string>
 /// This is a helper to determine whether to print IR before or
 /// after a pass.
 
-static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
+bool llvm::shouldPrintBeforePass() {
+  return PrintBeforeAll || !PrintBefore.empty();
+}
+
+bool llvm::shouldPrintAfterPass() {
+  return PrintAfterAll || !PrintAfter.empty();
+}
+
+static bool ShouldPrintBeforeOrAfterPass(StringRef PassID,
                                          PassOptionList &PassesToPrint) {
   for (auto *PassInf : PassesToPrint) {
     if (PassInf)
-      if (PassInf->getPassArgument() == PI->getPassArgument()) {
+      if (PassInf->getPassArgument() == PassID) {
         return true;
       }
   }
   return false;
 }
 
-/// This is a utility to check whether a pass should have IR dumped
-/// before it.
-static bool ShouldPrintBeforePass(const PassInfo *PI) {
-  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
+bool llvm::shouldPrintBeforePass(StringRef PassID) {
+  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
 }
 
-/// This is a utility to check whether a pass should have IR dumped
-/// after it.
-static bool ShouldPrintAfterPass(const PassInfo *PI) {
-  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
+bool llvm::shouldPrintAfterPass(StringRef PassID) {
+  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
 }
 
 bool llvm::forcePrintModuleIR() { return PrintModuleScope; }
@@ -135,34 +140,32 @@ bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
   return PassDebugging >= Executions;
 }
 
-unsigned PMDataManager::initSizeRemarkInfo(Module &M) {
+unsigned PMDataManager::initSizeRemarkInfo(
+    Module &M, StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount) {
   // Only calculate getInstructionCount if the size-info remark is requested.
-  return M.getInstructionCount();
-}
-
-void PMDataManager::emitInstrCountChangedRemark(Pass *P, Module &M,
-                                                unsigned CountBefore) {
-  // We need a function containing at least one basic block in order to output
-  // remarks. Since it's possible that the first function in the module doesn't
-  // actually contain a basic block, we have to go and find one that's suitable
-  // for emitting remarks.
-  auto It = std::find_if(M.begin(), M.end(),
-                         [](const Function &Fn) { return !Fn.empty(); });
-
-  // Didn't find a function. Quit.
-  if (It == M.end())
-    return;
-
-  // We found a function containing at least one basic block.
-  Function *F = &*It;
+  unsigned InstrCount = 0;
 
-  // How many instructions are in the module now?
-  unsigned CountAfter = M.getInstructionCount();
+  // Collect instruction counts for every function. We'll use this to emit
+  // per-function size remarks later.
+  for (Function &F : M) {
+    unsigned FCount = F.getInstructionCount();
 
-  // If there was no change, don't emit a remark.
-  if (CountBefore == CountAfter)
-    return;
+    // Insert a record into FunctionToInstrCount keeping track of the current
+    // size of the function as the first member of a pair. Set the second
+    // member to 0; if the function is deleted by the pass, then when we get
+    // here, we'll be able to let the user know that F no longer contributes to
+    // the module.
+    FunctionToInstrCount[F.getName().str()] =
+        std::pair<unsigned, unsigned>(FCount, 0);
+    InstrCount += FCount;
+  }
+  return InstrCount;
+}
 
+void PMDataManager::emitInstrCountChangedRemark(
+    Pass *P, Module &M, int64_t Delta, unsigned CountBefore,
+    StringMap<std::pair<unsigned, unsigned>> &FunctionToInstrCount,
+    Function *F) {
   // If it's a pass manager, don't emit a remark. (This hinges on the assumption
   // that the only passes that return non-null with getAsPMDataManager are pass
   // managers.) The reason we have to do this is to avoid emitting remarks for
@@ -170,11 +173,53 @@ void PMDataManager::emitInstrCountChangedRemark(Pass *P, Module &M,
   if (P->getAsPMDataManager())
     return;
 
-  // Compute a possibly negative delta between the instruction count before
-  // running P, and after running P.
-  int64_t Delta =
-      static_cast<int64_t>(CountAfter) - static_cast<int64_t>(CountBefore);
+  // Set to true if this isn't a module pass or CGSCC pass.
+  bool CouldOnlyImpactOneFunction = (F != nullptr);
+
+  // Helper lambda that updates the changes to the size of some function.
+  auto UpdateFunctionChanges =
+      [&FunctionToInstrCount](Function &MaybeChangedFn) {
+        // Update the total module count.
+        unsigned FnSize = MaybeChangedFn.getInstructionCount();
+        auto It = FunctionToInstrCount.find(MaybeChangedFn.getName());
+
+        // If we created a new function, then we need to add it to the map and
+        // say that it changed from 0 instructions to FnSize.
+        if (It == FunctionToInstrCount.end()) {
+          FunctionToInstrCount[MaybeChangedFn.getName()] =
+              std::pair<unsigned, unsigned>(0, FnSize);
+          return;
+        }
+        // Insert the new function size into the second member of the pair. This
+        // tells us whether or not this function changed in size.
+        It->second.second = FnSize;
+      };
+
+  // We need to initially update all of the function sizes.
+  // If no function was passed in, then we're either a module pass or an
+  // CGSCC pass.
+  if (!CouldOnlyImpactOneFunction)
+    std::for_each(M.begin(), M.end(), UpdateFunctionChanges);
+  else
+    UpdateFunctionChanges(*F);
+
+  // Do we have a function we can use to emit a remark?
+  if (!CouldOnlyImpactOneFunction) {
+    // We need a function containing at least one basic block in order to output
+    // remarks. Since it's possible that the first function in the module
+    // doesn't actually contain a basic block, we have to go and find one that's
+    // suitable for emitting remarks.
+    auto It = std::find_if(M.begin(), M.end(),
+                          [](const Function &Fn) { return !Fn.empty(); });
 
+    // Didn't find a function. Quit.
+    if (It == M.end())
+      return;
+
+    // We found a function containing at least one basic block.
+    F = &*It;
+  }
+  int64_t CountAfter = static_cast<int64_t>(CountBefore) + Delta;
   BasicBlock &BB = *F->begin();
   OptimizationRemarkAnalysis R("size-info", "IRSizeChange",
                                DiagnosticLocation(), &BB);
@@ -188,6 +233,55 @@ void PMDataManager::emitInstrCountChangedRemark(Pass *P, Module &M,
     << "; Delta: "
     << DiagnosticInfoOptimizationBase::Argument("DeltaInstrCount", Delta);
   F->getContext().diagnose(R); // Not using ORE for layering reasons.
+
+  // Emit per-function size change remarks separately.
+  std::string PassName = P->getPassName().str();
+
+  // Helper lambda that emits a remark when the size of a function has changed.
+  auto EmitFunctionSizeChangedRemark = [&FunctionToInstrCount, &F, &BB,
+                                        &PassName](const std::string &Fname) {
+    unsigned FnCountBefore, FnCountAfter;
+    std::pair<unsigned, unsigned> &Change = FunctionToInstrCount[Fname];
+    std::tie(FnCountBefore, FnCountAfter) = Change;
+    int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
+                      static_cast<int64_t>(FnCountBefore);
+
+    if (FnDelta == 0)
+      return;
+
+    // FIXME: We shouldn't use BB for the location here. Unfortunately, because
+    // the function that we're looking at could have been deleted, we can't use
+    // it for the source location. We *want* remarks when a function is deleted
+    // though, so we're kind of stuck here as is. (This remark, along with the
+    // whole-module size change remarks really ought not to have source
+    // locations at all.)
+    OptimizationRemarkAnalysis FR("size-info", "FunctionIRSizeChange",
+                                  DiagnosticLocation(), &BB);
+    FR << DiagnosticInfoOptimizationBase::Argument("Pass", PassName)
+       << ": Function: "
+       << DiagnosticInfoOptimizationBase::Argument("Function", Fname)
+       << ": IR instruction count changed from "
+       << DiagnosticInfoOptimizationBase::Argument("IRInstrsBefore",
+                                                   FnCountBefore)
+       << " to "
+       << DiagnosticInfoOptimizationBase::Argument("IRInstrsAfter",
+                                                   FnCountAfter)
+       << "; Delta: "
+       << DiagnosticInfoOptimizationBase::Argument("DeltaInstrCount", FnDelta);
+    F->getContext().diagnose(FR);
+
+    // Update the function size.
+    Change.first = FnCountAfter;
+  };
+
+  // Are we looking at more than one function? If so, emit remarks for all of
+  // the functions in the module. Otherwise, only emit one remark.
+  if (!CouldOnlyImpactOneFunction)
+    std::for_each(FunctionToInstrCount.keys().begin(),
+                  FunctionToInstrCount.keys().end(),
+                  EmitFunctionSizeChangedRemark);
+  else
+    EmitFunctionSizeChangedRemark(F->getName().str());
 }
 
 void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
@@ -494,65 +588,6 @@ char PassManagerImpl::ID = 0;
 } // End of legacy namespace
 } // End of llvm namespace
 
-namespace {
-
-//===----------------------------------------------------------------------===//
-/// TimingInfo Class - This class is used to calculate information about the
-/// amount of time each pass takes to execute.  This only happens when
-/// -time-passes is enabled on the command line.
-///
-
-static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
-
-class TimingInfo {
-  DenseMap<Pass*, Timer*> TimingData;
-  TimerGroup TG;
-public:
-  // Use 'create' member to get this.
-  TimingInfo() : TG("pass", "... Pass execution timing report ...") {}
-
-  // TimingDtor - Print out information about timing information
-  ~TimingInfo() {
-    // Delete all of the timers, which accumulate their info into the
-    // TimerGroup.
-    for (auto &I : TimingData)
-      delete I.second;
-    // TimerGroup is deleted next, printing the report.
-  }
-
-  // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
-  // to a non-null value (if the -time-passes option is enabled) or it leaves it
-  // null.  It may be called multiple times.
-  static void createTheTimeInfo();
-
-  // print - Prints out timing information and then resets the timers.
-  void print() {
-    TG.print(*CreateInfoOutputFile());
-  }
-
-  /// getPassTimer - Return the timer for the specified pass if it exists.
-  Timer *getPassTimer(Pass *P) {
-    if (P->getAsPMDataManager())
-      return nullptr;
-
-    sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
-    Timer *&T = TimingData[P];
-    if (!T) {
-      StringRef PassName = P->getPassName();
-      StringRef PassArgument;
-      if (const PassInfo *PI = Pass::lookupPassInfo(P->getPassID()))
-        PassArgument = PI->getPassArgument();
-      T = new Timer(PassArgument.empty() ? PassName : PassArgument, PassName,
-                    TG);
-    }
-    return T;
-  }
-};
-
-} // End of anon namespace
-
-static TimingInfo *TheTimeInfo;
-
 //===----------------------------------------------------------------------===//
 // PMTopLevelManager implementation
 
@@ -677,6 +712,8 @@ void PMTopLevelManager::schedulePass(Pass *P) {
   // available at this point.
   const PassInfo *PI = findAnalysisPassInfo(P->getPassID());
   if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+    // Remove any cached AnalysisUsage information.
+    AnUsageMap.erase(P);
     delete P;
     return;
   }
@@ -747,7 +784,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
     return;
   }
 
-  if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+  if (PI && !PI->isAnalysis() && shouldPrintBeforePass(PI->getPassArgument())) {
     Pass *PP = P->createPrinterPass(
         dbgs(), ("*** IR Dump Before " + P->getPassName() + " ***").str());
     PP->assignPassManager(activeStack, getTopLevelPassManagerType());
@@ -756,7 +793,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
   // Add the requested pass to the best available pass manager.
   P->assignPassManager(activeStack, getTopLevelPassManagerType());
 
-  if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+  if (PI && !PI->isAnalysis() && shouldPrintAfterPass(PI->getPassArgument())) {
     Pass *PP = P->createPrinterPass(
         dbgs(), ("*** IR Dump After " + P->getPassName() + " ***").str());
     PP->assignPassManager(activeStack, getTopLevelPassManagerType());
@@ -1343,9 +1380,16 @@ bool BBPassManager::runOnFunction(Function &F) {
   bool Changed = doInitialization(F);
   Module &M = *F.getParent();
 
-  unsigned InstrCount = 0;
+  unsigned InstrCount, BBSize = 0;
+  StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
   bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
-  for (BasicBlock &BB : F)
+  if (EmitICRemark)
+    InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+
+  for (BasicBlock &BB : F) {
+    // Collect the initial size of the basic block.
+    if (EmitICRemark)
+      BBSize = BB.size();
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       BasicBlockPass *BP = getContainedPass(Index);
       bool LocalChanged = false;
@@ -1359,11 +1403,20 @@ bool BBPassManager::runOnFunction(Function &F) {
         // If the pass crashes, remember this.
         PassManagerPrettyStackEntry X(BP, BB);
         TimeRegion PassTimer(getPassTimer(BP));
-        if (EmitICRemark)
-          InstrCount = initSizeRemarkInfo(M);
         LocalChanged |= BP->runOnBasicBlock(BB);
-        if (EmitICRemark)
-          emitInstrCountChangedRemark(BP, M, InstrCount);
+        if (EmitICRemark) {
+          unsigned NewSize = BB.size();
+          // Update the size of the basic block, emit a remark, and update the
+          // size of the module.
+          if (NewSize != BBSize) {
+            int64_t Delta =
+                static_cast<int64_t>(NewSize) - static_cast<int64_t>(BBSize);
+            emitInstrCountChangedRemark(BP, M, Delta, InstrCount,
+                                        FunctionToInstrCount, &F);
+            InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+            BBSize = NewSize;
+          }
+        }
       }
 
       Changed |= LocalChanged;
@@ -1378,6 +1431,7 @@ bool BBPassManager::runOnFunction(Function &F) {
       recordAvailableAnalysis(BP);
       removeDeadPasses(BP, BB.getName(), ON_BASICBLOCK_MSG);
     }
+  }
 
   return doFinalization(F) || Changed;
 }
@@ -1525,7 +1579,6 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
 // Return true if any function is modified by a pass.
 bool FunctionPassManagerImpl::run(Function &F) {
   bool Changed = false;
-  TimingInfo::createTheTimeInfo();
 
   initializeAllAnalysisInfo();
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
@@ -1567,8 +1620,15 @@ bool FPPassManager::runOnFunction(Function &F) {
   // Collect inherited analysis from Module level pass manager.
   populateInheritedAnalysis(TPM->activeStack);
 
-  unsigned InstrCount = 0;
+  unsigned InstrCount, FunctionSize = 0;
+  StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
   bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+  // Collect the initial size of the module.
+  if (EmitICRemark) {
+    InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+    FunctionSize = F.getInstructionCount();
+  }
+
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
     bool LocalChanged = false;
@@ -1581,11 +1641,21 @@ bool FPPassManager::runOnFunction(Function &F) {
     {
       PassManagerPrettyStackEntry X(FP, F);
       TimeRegion PassTimer(getPassTimer(FP));
-      if (EmitICRemark)
-        InstrCount = initSizeRemarkInfo(M);
       LocalChanged |= FP->runOnFunction(F);
-      if (EmitICRemark)
-        emitInstrCountChangedRemark(FP, M, InstrCount);
+      if (EmitICRemark) {
+        unsigned NewSize = F.getInstructionCount();
+
+        // Update the size of the function, emit a remark, and update the size
+        // of the module.
+        if (NewSize != FunctionSize) {
+          int64_t Delta = static_cast<int64_t>(NewSize) -
+                          static_cast<int64_t>(FunctionSize);
+          emitInstrCountChangedRemark(FP, M, Delta, InstrCount,
+                                      FunctionToInstrCount, &F);
+          InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+          FunctionSize = NewSize;
+        }
+      }
     }
 
     Changed |= LocalChanged;
@@ -1649,8 +1719,15 @@ MPPassManager::runOnModule(Module &M) {
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
     Changed |= getContainedPass(Index)->doInitialization(M);
 
-  unsigned InstrCount = 0;
+  unsigned InstrCount, ModuleCount = 0;
+  StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
   bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+  // Collect the initial size of the module.
+  if (EmitICRemark) {
+    InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+    ModuleCount = InstrCount;
+  }
+
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     ModulePass *MP = getContainedPass(Index);
     bool LocalChanged = false;
@@ -1664,11 +1741,18 @@ MPPassManager::runOnModule(Module &M) {
       PassManagerPrettyStackEntry X(MP, M);
       TimeRegion PassTimer(getPassTimer(MP));
 
-      if (EmitICRemark)
-        InstrCount = initSizeRemarkInfo(M);
       LocalChanged |= MP->runOnModule(M);
-      if (EmitICRemark)
-        emitInstrCountChangedRemark(MP, M, InstrCount);
+      if (EmitICRemark) {
+        // Update the size of the module.
+        ModuleCount = M.getInstructionCount();
+        if (ModuleCount != InstrCount) {
+          int64_t Delta = static_cast<int64_t>(ModuleCount) -
+                          static_cast<int64_t>(InstrCount);
+          emitInstrCountChangedRemark(MP, M, Delta, InstrCount,
+                                      FunctionToInstrCount);
+          InstrCount = ModuleCount;
+        }
+      }
     }
 
     Changed |= LocalChanged;
@@ -1761,7 +1845,6 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
 /// whether any of the passes modifies the module, and if so, return true.
 bool PassManagerImpl::run(Module &M) {
   bool Changed = false;
-  TimingInfo::createTheTimeInfo();
 
   dumpArguments();
   dumpPasses();
@@ -1806,41 +1889,6 @@ bool PassManager::run(Module &M) {
 }
 
 //===----------------------------------------------------------------------===//
-// TimingInfo implementation
-
-bool llvm::TimePassesIsEnabled = false;
-static cl::opt<bool, true> EnableTiming(
-    "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden,
-    cl::desc("Time each pass, printing elapsed time for each on exit"));
-
-// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
-// a non-null value (if the -time-passes option is enabled) or it leaves it
-// null.  It may be called multiple times.
-void TimingInfo::createTheTimeInfo() {
-  if (!TimePassesIsEnabled || TheTimeInfo) return;
-
-  // Constructed the first time this is called, iff -time-passes is enabled.
-  // This guarantees that the object will be constructed before static globals,
-  // thus it will be destroyed before them.
-  static ManagedStatic<TimingInfo> TTI;
-  TheTimeInfo = &*TTI;
-}
-
-/// If TimingInfo is enabled then start pass timer.
-Timer *llvm::getPassTimer(Pass *P) {
-  if (TheTimeInfo)
-    return TheTimeInfo->getPassTimer(P);
-  return nullptr;
-}
-
-/// If timing is enabled, report the times collected up to now and then reset
-/// them.
-void llvm::reportAndResetTimings() {
-  if (TheTimeInfo)
-    TheTimeInfo->print();
-}
-
-//===----------------------------------------------------------------------===//
 // PMStack implementation
 //
 
diff --git a/contrib/llvm/lib/IR/MDBuilder.cpp b/contrib/llvm/lib/IR/MDBuilder.cpp
index 1bb23c0330f3..3fa541f1b535 100644
--- a/contrib/llvm/lib/IR/MDBuilder.cpp
+++ b/contrib/llvm/lib/IR/MDBuilder.cpp
@@ -260,8 +260,9 @@ MDNode *MDBuilder::createMutableTBAAAccessTag(MDNode *Tag) {
 }
 
 MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
-  SmallVector<Metadata *, 2> Vals(2);
-  Vals[0] = createString("loop_header_weight");
-  Vals[1] = createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight));
+  Metadata *Vals[] = {
+    createString("loop_header_weight"),
+    createConstant(ConstantInt::get(Type::getInt64Ty(Context), Weight)),
+  };
   return MDNode::get(Context, Vals);
 }
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 83a22d95bd81..5536c2497f1e 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -237,7 +237,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
   // Copy out uses since UseMap will get touched below.
   using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
   SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
-  llvm::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) {
+  llvm::sort(Uses, [](const UseTy &L, const UseTy &R) {
     return L.second.second < R.second.second;
   });
   for (const auto &Pair : Uses) {
@@ -290,7 +290,7 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
   // Copy out uses since UseMap could get touched below.
   using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
   SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
-  llvm::sort(Uses.begin(), Uses.end(), [](const UseTy &L, const UseTy &R) {
+  llvm::sort(Uses, [](const UseTy &L, const UseTy &R) {
     return L.second.second < R.second.second;
   });
   UseMap.clear();
@@ -1484,7 +1484,7 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) {
       std::vector<uint64_t> Elements(OrigElements.size() + 2);
       Elements[0] = dwarf::DW_OP_plus_uconst;
       Elements[1] = Offset;
-      std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2);
+      llvm::copy(OrigElements, Elements.begin() + 2);
       E = DIExpression::get(getContext(), Elements);
       Attachment = DIGlobalVariableExpression::get(getContext(), GV, E);
     }
diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp
index f18024063533..93f27304424f 100644
--- a/contrib/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm/lib/IR/Module.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/IR/Module.h"
 #include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -45,6 +46,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/VersionTuple.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -145,7 +147,8 @@ Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
   GlobalValue *F = getNamedValue(Name);
   if (!F) {
     // Nope, add it
-    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage,
+                                     DL.getProgramAddressSpace(), Name);
     if (!New->isIntrinsic())       // Intrinsics get attrs set on construction
       New->setAttributes(AttributeList);
     FunctionList.push_back(New);
@@ -154,8 +157,9 @@ Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
 
   // If the function exists but has the wrong type, return a bitcast to the
   // right type.
-  if (F->getType() != PointerType::getUnqual(Ty))
-    return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+  auto *PTy = PointerType::get(Ty, F->getAddressSpace());
+  if (F->getType() != PTy)
+    return ConstantExpr::getBitCast(F, PTy);
 
   // Otherwise, we just found the existing function or a prototype.
   return F;
@@ -199,16 +203,14 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name,
 ///      with a constantexpr cast to the right type.
 ///   3. Finally, if the existing global is the correct declaration, return the
 ///      existing global.
-Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
+Constant *Module::getOrInsertGlobal(
+    StringRef Name, Type *Ty,
+    function_ref<GlobalVariable *()> CreateGlobalCallback) {
   // See if we have a definition for the specified global already.
   GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
-  if (!GV) {
-    // Nope, add it
-    GlobalVariable *New =
-      new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
-                         nullptr, Name);
-     return New;                    // Return the new declaration.
-  }
+  if (!GV)
+    GV = CreateGlobalCallback();
+  assert(GV && "The CreateGlobalCallback is expected to create a global");
 
   // If the variable exists but has the wrong type, return a bitcast to the
   // right type.
@@ -221,6 +223,14 @@ Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
   return GV;
 }
 
+// Overload to construct a global variable using its constructor's defaults.
+Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
+  return getOrInsertGlobal(Name, Ty, [&] {
+    return new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+                              nullptr, Name);
+  });
+}
+
 //===----------------------------------------------------------------------===//
 // Methods for easy access to the global variables in the module.
 //
@@ -505,6 +515,24 @@ void Module::setPIELevel(PIELevel::Level PL) {
   addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL);
 }
 
+Optional<CodeModel::Model> Module::getCodeModel() const {
+  auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Code Model"));
+
+  if (!Val)
+    return None;
+
+  return static_cast<CodeModel::Model>(
+      cast<ConstantInt>(Val->getValue())->getZExtValue());
+}
+
+void Module::setCodeModel(CodeModel::Model CL) {
+  // Linking object files with different code models is undefined behavior
+  // because the compiler would have to generate additional code (to span
+  // longer jumps) if a larger code model is used with a smaller one.
+  // Therefore we will treat attempts to mix code models as an error.
+  addModuleFlag(ModFlagBehavior::Error, "Code Model", CL);
+}
+
 void Module::setProfileSummary(Metadata *M) {
   addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
 }
@@ -526,6 +554,45 @@ void Module::setRtLibUseGOT() {
   addModuleFlag(ModFlagBehavior::Max, "RtLibUseGOT", 1);
 }
 
+void Module::setSDKVersion(const VersionTuple &V) {
+  SmallVector<unsigned, 3> Entries;
+  Entries.push_back(V.getMajor());
+  if (auto Minor = V.getMinor()) {
+    Entries.push_back(*Minor);
+    if (auto Subminor = V.getSubminor())
+      Entries.push_back(*Subminor);
+    // Ignore the 'build' component as it can't be represented in the object
+    // file.
+  }
+  addModuleFlag(ModFlagBehavior::Warning, "SDK Version",
+                ConstantDataArray::get(Context, Entries));
+}
+
+VersionTuple Module::getSDKVersion() const {
+  auto *CM = dyn_cast_or_null<ConstantAsMetadata>(getModuleFlag("SDK Version"));
+  if (!CM)
+    return {};
+  auto *Arr = dyn_cast_or_null<ConstantDataArray>(CM->getValue());
+  if (!Arr)
+    return {};
+  auto getVersionComponent = [&](unsigned Index) -> Optional<unsigned> {
+    if (Index >= Arr->getNumElements())
+      return None;
+    return (unsigned)Arr->getElementAsInteger(Index);
+  };
+  auto Major = getVersionComponent(0);
+  if (!Major)
+    return {};
+  VersionTuple Result = VersionTuple(*Major);
+  if (auto Minor = getVersionComponent(1)) {
+    Result = VersionTuple(*Major, *Minor);
+    if (auto Subminor = getVersionComponent(2)) {
+      Result = VersionTuple(*Major, *Minor, *Subminor);
+    }
+  }
+  return Result;
+}
+
 GlobalVariable *llvm::collectUsedGlobalVariables(
     const Module &M, SmallPtrSetImpl<GlobalValue *> &Set, bool CompilerUsed) {
   const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
diff --git a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
index 4c4466f9a902..46b88cd31779 100644
--- a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -14,11 +14,17 @@
 
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "module-summary-index"
+
+STATISTIC(ReadOnlyLiveGVars,
+          "Number of live global variables marked read only");
+
 FunctionSummary FunctionSummary::ExternalNode =
     FunctionSummary::makeDummyFunctionSummary({});
 bool ValueInfo::isDSOLocal() const {
@@ -30,6 +36,17 @@ bool ValueInfo::isDSOLocal() const {
                       });
 }
 
+// Gets the number of immutable refs in RefEdgeList
+unsigned FunctionSummary::immutableRefCount() const {
+  // Here we take advantage of having all readonly references
+  // located in the end of the RefEdgeList.
+  auto Refs = refs();
+  unsigned ImmutableRefCnt = 0;
+  for (int I = Refs.size() - 1; I >= 0 && Refs[I].isReadOnly(); --I)
+    ImmutableRefCnt++;
+  return ImmutableRefCnt;
+}
+
 // Collect for the given module the list of function it defines
 // (GUID -> Summary).
 void ModuleSummaryIndex::collectDefinedFunctionsForModule(
@@ -84,6 +101,80 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const {
   return false;
 }
 
+static void propagateConstantsToRefs(GlobalValueSummary *S) {
+  // If reference is not readonly then referenced summary is not
+  // readonly either. Note that:
+  // - All references from GlobalVarSummary are conservatively considered as
+  //   not readonly. Tracking them properly requires more complex analysis
+  //   then we have now.
+  //
+  // - AliasSummary objects have no refs at all so this function is a no-op
+  //   for them.
+  for (auto &VI : S->refs()) {
+    if (VI.isReadOnly()) {
+      // We only mark refs as readonly when computing function summaries on
+      // analysis phase.
+      assert(isa<FunctionSummary>(S));
+      continue;
+    }
+    for (auto &Ref : VI.getSummaryList())
+      // If references to alias is not readonly then aliasee is not readonly
+      if (auto *GVS = dyn_cast<GlobalVarSummary>(Ref->getBaseObject()))
+        GVS->setReadOnly(false);
+  }
+}
+
+// Do the constant propagation in combined index.
+// The goal of constant propagation is internalization of readonly
+// variables. To determine which variables are readonly and which
+// are not we take following steps:
+// - During analysis we speculatively assign readonly attribute to
+//   all variables which can be internalized. When computing function
+//   summary we also assign readonly attribute to a reference if
+//   function doesn't modify referenced variable.
+//
+// - After computing dead symbols in combined index we do the constant
+//   propagation. During this step we clear readonly attribute from
+//   all variables which:
+//   a. are preserved or can't be imported
+//   b. referenced by any global variable initializer
+//   c. referenced by a function and reference is not readonly
+//
+// Internalization itself happens in the backend after import is finished
+// See internalizeImmutableGVs.
+void ModuleSummaryIndex::propagateConstants(
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+  for (auto &P : *this)
+    for (auto &S : P.second.SummaryList) {
+      if (!isGlobalValueLive(S.get()))
+        // We don't examine references from dead objects
+        continue;
+
+      // Global variable can't be marked read only if it is not eligible
+      // to import since we need to ensure that all external references
+      // get a local (imported) copy. It also can't be marked read only
+      // if it or any alias (since alias points to the same memory) are
+      // preserved or notEligibleToImport, since either of those means
+      // there could be writes that are not visible (because preserved
+      // means it could have external to DSO writes, and notEligibleToImport
+      // means it could have writes via inline assembly leading it to be
+      // in the @llvm.*used).
+      if (auto *GVS = dyn_cast<GlobalVarSummary>(S->getBaseObject()))
+        // Here we intentionally pass S.get() not GVS, because S could be
+        // an alias.
+        if (!canImportGlobalVar(S.get()) || GUIDPreservedSymbols.count(P.first))
+          GVS->setReadOnly(false);
+      propagateConstantsToRefs(S.get());
+    }
+  if (llvm::AreStatisticsEnabled())
+    for (auto &P : *this)
+      if (P.second.SummaryList.size())
+        if (auto *GVS = dyn_cast<GlobalVarSummary>(
+                P.second.SummaryList[0]->getBaseObject()))
+          if (isGlobalValueLive(GVS) && GVS->isReadOnly())
+            ReadOnlyLiveGVars++;
+}
+
 // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot)
 // then delete this function and update its tests
 LLVM_DUMP_METHOD
@@ -108,6 +199,7 @@ namespace {
 struct Attributes {
   void add(const Twine &Name, const Twine &Value,
            const Twine &Comment = Twine());
+  void addComment(const Twine &Comment);
   std::string getAsString() const;
 
   std::vector<std::string> Attrs;
@@ -129,6 +221,10 @@ void Attributes::add(const Twine &Name, const Twine &Value,
   A += Value.str();
   A += "\"";
   Attrs.push_back(A);
+  addComment(Comment);
+}
+
+void Attributes::addComment(const Twine &Comment) {
   if (!Comment.isTriviallyEmpty()) {
     if (Comments.empty())
       Comments = " // ";
@@ -182,8 +278,9 @@ static std::string linkageToString(GlobalValue::LinkageTypes LT) {
 
 static std::string fflagsToString(FunctionSummary::FFlags F) {
   auto FlagValue = [](unsigned V) { return V ? '1' : '0'; };
-  char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
-                    FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias), 0};
+  char FlagRep[] = {FlagValue(F.ReadNone),     FlagValue(F.ReadOnly),
+                    FlagValue(F.NoRecurse),    FlagValue(F.ReturnDoesNotAlias),
+                    FlagValue(F.NoInline), 0};
 
   return FlagRep;
 }
@@ -198,9 +295,12 @@ static std::string getSummaryAttributes(GlobalValueSummary* GVS) {
          ", ffl: " + fflagsToString(FS->fflags());
 }
 
+static std::string getNodeVisualName(GlobalValue::GUID Id) {
+  return std::string("@") + std::to_string(Id);
+}
+
 static std::string getNodeVisualName(const ValueInfo &VI) {
-  return VI.name().empty() ? std::string("@") + std::to_string(VI.getGUID())
-                           : VI.name().str();
+  return VI.name().empty() ? getNodeVisualName(VI.getGUID()) : VI.name().str();
 }
 
 static std::string getNodeLabel(const ValueInfo &VI, GlobalValueSummary *GVS) {
@@ -221,13 +321,25 @@ static std::string getNodeLabel(const ValueInfo &VI, GlobalValueSummary *GVS) {
 // specific module associated with it. Typically this is function
 // or variable defined in native object or library.
 static void defineExternalNode(raw_ostream &OS, const char *Pfx,
-                               const ValueInfo &VI) {
-  auto StrId = std::to_string(VI.getGUID());
-  OS << "  " << StrId << " [label=\"" << getNodeVisualName(VI)
-     << "\"]; // defined externally\n";
+                               const ValueInfo &VI, GlobalValue::GUID Id) {
+  auto StrId = std::to_string(Id);
+  OS << "  " << StrId << " [label=\"";
+
+  if (VI) {
+    OS << getNodeVisualName(VI);
+  } else {
+    OS << getNodeVisualName(Id);
+  }
+  OS << "\"]; // defined externally\n";
+}
+
+static bool hasReadOnlyFlag(const GlobalValueSummary *S) {
+  if (auto *GVS = dyn_cast<GlobalVarSummary>(S))
+    return GVS->isReadOnly();
+  return false;
 }
 
-void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
+void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
   std::vector<Edge> CrossModuleEdges;
   DenseMap<GlobalValue::GUID, std::vector<uint64_t>> NodeMap;
   StringMap<GVSummaryMapTy> ModuleToDefinedGVS;
@@ -241,14 +353,18 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
                                        "_" + std::to_string(Id);
   };
 
-  auto DrawEdge = [&](const char *Pfx, int SrcMod, GlobalValue::GUID SrcId,
-                      int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) {
-    // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown
-    // hotness, ...
-    TypeOrHotness += 2;
+  auto DrawEdge = [&](const char *Pfx, uint64_t SrcMod, GlobalValue::GUID SrcId,
+                      uint64_t DstMod, GlobalValue::GUID DstId,
+                      int TypeOrHotness) {
+    // 0 - alias
+    // 1 - reference
+    // 2 - constant reference
+    // Other value: (hotness - 3).
+    TypeOrHotness += 3;
     static const char *EdgeAttrs[] = {
         " [style=dotted]; // alias",
         " [style=dashed]; // ref",
+        " [style=dashed,color=forestgreen]; // const-ref",
         " // call (hotness : Unknown)",
         " [color=blue]; // call (hotness : Cold)",
         " // call (hotness : None)",
@@ -291,6 +407,8 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
         A.add("shape", "box");
       } else {
         A.add("shape", "Mrecord", "variable");
+        if (Flags.Live && hasReadOnlyFlag(SummaryIt.second))
+          A.addComment("immutable");
       }
 
       auto VI = getValueInfo(SummaryIt.first);
@@ -308,13 +426,20 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
     for (auto &SummaryIt : GVSMap) {
       auto *GVS = SummaryIt.second;
       for (auto &R : GVS->refs())
-        Draw(SummaryIt.first, R.getGUID(), -1);
+        Draw(SummaryIt.first, R.getGUID(), R.isReadOnly() ? -1 : -2);
 
       if (auto *AS = dyn_cast_or_null<AliasSummary>(SummaryIt.second)) {
-        auto AliaseeOrigId = AS->getAliasee().getOriginalName();
-        auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId);
-
-        Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2);
+        GlobalValue::GUID AliaseeId;
+        if (AS->hasAliaseeGUID())
+          AliaseeId = AS->getAliaseeGUID();
+        else {
+          auto AliaseeOrigId = AS->getAliasee().getOriginalName();
+          AliaseeId = getGUIDFromOriginalID(AliaseeOrigId);
+          if (!AliaseeId)
+            AliaseeId = AliaseeOrigId;
+        }
+
+        Draw(SummaryIt.first, AliaseeId, -3);
         continue;
       }
 
@@ -330,7 +455,7 @@ void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const {
   for (auto &E : CrossModuleEdges) {
     auto &ModList = NodeMap[E.Dst];
     if (ModList.empty()) {
-      defineExternalNode(OS, "  ", getValueInfo(E.Dst));
+      defineExternalNode(OS, "  ", getValueInfo(E.Dst), E.Dst);
       // Add fake module to the list to draw an edge to an external node
       // in the loop below.
       ModList.push_back(-1);
diff --git a/contrib/llvm/lib/IR/PassInstrumentation.cpp b/contrib/llvm/lib/IR/PassInstrumentation.cpp
new file mode 100644
index 000000000000..5aa2bc6d895e
--- /dev/null
+++ b/contrib/llvm/lib/IR/PassInstrumentation.cpp
@@ -0,0 +1,22 @@
+//===- PassInstrumentation.cpp - Pass Instrumentation interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of PassInstrumentation class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+AnalysisKey PassInstrumentationAnalysis::Key;
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/PassTimingInfo.cpp b/contrib/llvm/lib/IR/PassTimingInfo.cpp
new file mode 100644
index 000000000000..40b3977ecbd9
--- /dev/null
+++ b/contrib/llvm/lib/IR/PassTimingInfo.cpp
@@ -0,0 +1,268 @@
+//===- PassTimingInfo.cpp - LLVM Pass Timing Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Timing infrastructure for both
+// new and legacy pass managers.
+//
+// PassTimingInfo Class - This class is used to calculate information about the
+// amount of time each pass takes to execute.  This only happens when
+// -time-passes is enabled on the command line.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "time-passes"
+
+namespace llvm {
+
+bool TimePassesIsEnabled = false;
+
+static cl::opt<bool, true> EnableTiming(
+    "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden,
+    cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+namespace {
+namespace legacy {
+
+//===----------------------------------------------------------------------===//
+// Legacy pass manager's PassTimingInfo implementation
+
+/// Provides an interface for collecting pass timing information.
+///
+/// It was intended to be generic but now we decided to split
+/// interfaces completely. This is now exclusively for legacy-pass-manager use.
+class PassTimingInfo {
+public:
+  using PassInstanceID = void *;
+
+private:
+  StringMap<unsigned> PassIDCountMap; ///< Map that counts instances of passes
+  DenseMap<PassInstanceID, std::unique_ptr<Timer>> TimingData; ///< timers for pass instances
+  TimerGroup TG;
+
+public:
+  /// Default constructor for yet-inactive timeinfo.
+  /// Use \p init() to activate it.
+  PassTimingInfo();
+
+  /// Print out timing information and release timers.
+  ~PassTimingInfo();
+
+  /// Initializes the static \p TheTimeInfo member to a non-null value when
+  /// -time-passes is enabled. Leaves it null otherwise.
+  ///
+  /// This method may be called multiple times.
+  static void init();
+
+  /// Prints out timing information and then resets the timers.
+  void print();
+
+  /// Returns the timer for the specified pass if it exists.
+  Timer *getPassTimer(Pass *, PassInstanceID);
+
+  static PassTimingInfo *TheTimeInfo;
+
+private:
+  Timer *newPassTimer(StringRef PassID, StringRef PassDesc);
+};
+
+static ManagedStatic<sys::SmartMutex<true>> TimingInfoMutex;
+
+PassTimingInfo::PassTimingInfo()
+    : TG("pass", "... Pass execution timing report ...") {}
+
+PassTimingInfo::~PassTimingInfo() {
+  // Deleting the timers accumulates their info into the TG member.
+  // Then TG member is (implicitly) deleted, actually printing the report.
+  TimingData.clear();
+}
+
+void PassTimingInfo::init() {
+  if (!TimePassesIsEnabled || TheTimeInfo)
+    return;
+
+  // Constructed the first time this is called, iff -time-passes is enabled.
+  // This guarantees that the object will be constructed after static globals,
+  // thus it will be destroyed before them.
+  static ManagedStatic<PassTimingInfo> TTI;
+  TheTimeInfo = &*TTI;
+}
+
+/// Prints out timing information and then resets the timers.
+void PassTimingInfo::print() { TG.print(*CreateInfoOutputFile()); }
+
+Timer *PassTimingInfo::newPassTimer(StringRef PassID, StringRef PassDesc) {
+  unsigned &num = PassIDCountMap[PassID];
+  num++;
+  // Appending description with a pass-instance number for all but the first one
+  std::string PassDescNumbered =
+      num <= 1 ? PassDesc.str() : formatv("{0} #{1}", PassDesc, num).str();
+  return new Timer(PassID, PassDescNumbered, TG);
+}
+
+Timer *PassTimingInfo::getPassTimer(Pass *P, PassInstanceID Pass) {
+  if (P->getAsPMDataManager())
+    return nullptr;
+
+  init();
+  sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+  std::unique_ptr<Timer> &T = TimingData[Pass];
+
+  if (!T) {
+    StringRef PassName = P->getPassName();
+    StringRef PassArgument;
+    if (const PassInfo *PI = Pass::lookupPassInfo(P->getPassID()))
+      PassArgument = PI->getPassArgument();
+    T.reset(newPassTimer(PassArgument.empty() ? PassName : PassArgument, PassName));
+  }
+  return T.get();
+}
+
+PassTimingInfo *PassTimingInfo::TheTimeInfo;
+} // namespace legacy
+} // namespace
+
+Timer *getPassTimer(Pass *P) {
+  legacy::PassTimingInfo::init();
+  if (legacy::PassTimingInfo::TheTimeInfo)
+    return legacy::PassTimingInfo::TheTimeInfo->getPassTimer(P, P);
+  return nullptr;
+}
+
+/// If timing is enabled, report the times collected up to now and then reset
+/// them.
+void reportAndResetTimings() {
+  if (legacy::PassTimingInfo::TheTimeInfo)
+    legacy::PassTimingInfo::TheTimeInfo->print();
+}
+
+//===----------------------------------------------------------------------===//
+// Pass timing handling for the New Pass Manager
+//===----------------------------------------------------------------------===//
+
+/// Returns the timer for the specified pass invocation of \p PassID.
+/// Each time it creates a new timer.
+Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
+  // Bump counts for each request of the timer.
+  unsigned Count = nextPassID(PassID);
+
+  // Unconditionally appending description with a pass-invocation number.
+  std::string FullDesc = formatv("{0} #{1}", PassID, Count).str();
+
+  PassInvocationID UID{PassID, Count};
+  Timer *T = new Timer(PassID, FullDesc, TG);
+  auto Pair = TimingData.try_emplace(UID, T);
+  assert(Pair.second && "should always create a new timer");
+  return *(Pair.first->second.get());
+}
+
+TimePassesHandler::TimePassesHandler(bool Enabled)
+    : TG("pass", "... Pass execution timing report ..."), Enabled(Enabled) {}
+
+void TimePassesHandler::print() { TG.print(*CreateInfoOutputFile()); }
+
+LLVM_DUMP_METHOD void TimePassesHandler::dump() const {
+  dbgs() << "Dumping timers for " << getTypeName<TimePassesHandler>()
+         << ":\n\tRunning:\n";
+  for (auto &I : TimingData) {
+    const Timer *MyTimer = I.second.get();
+    if (!MyTimer || MyTimer->isRunning())
+      dbgs() << "\tTimer " << MyTimer << " for pass " << I.first.first << "("
+             << I.first.second << ")\n";
+  }
+  dbgs() << "\tTriggered:\n";
+  for (auto &I : TimingData) {
+    const Timer *MyTimer = I.second.get();
+    if (!MyTimer || (MyTimer->hasTriggered() && !MyTimer->isRunning()))
+      dbgs() << "\tTimer " << MyTimer << " for pass " << I.first.first << "("
+             << I.first.second << ")\n";
+  }
+}
+
+void TimePassesHandler::startTimer(StringRef PassID) {
+  Timer &MyTimer = getPassTimer(PassID);
+  TimerStack.push_back(&MyTimer);
+  if (!MyTimer.isRunning())
+    MyTimer.startTimer();
+}
+
+void TimePassesHandler::stopTimer(StringRef PassID) {
+  assert(TimerStack.size() > 0 && "empty stack in popTimer");
+  Timer *MyTimer = TimerStack.pop_back_val();
+  assert(MyTimer && "timer should be present");
+  if (MyTimer->isRunning())
+    MyTimer->stopTimer();
+}
+
+static bool matchPassManager(StringRef PassID) {
+  size_t prefix_pos = PassID.find('<');
+  if (prefix_pos == StringRef::npos)
+    return false;
+  StringRef Prefix = PassID.substr(0, prefix_pos);
+  return Prefix.endswith("PassManager") || Prefix.endswith("PassAdaptor") ||
+         Prefix.endswith("AnalysisManagerProxy");
+}
+
+bool TimePassesHandler::runBeforePass(StringRef PassID) {
+  if (matchPassManager(PassID))
+    return true;
+
+  startTimer(PassID);
+
+  LLVM_DEBUG(dbgs() << "after runBeforePass(" << PassID << ")\n");
+  LLVM_DEBUG(dump());
+
+  // we are not going to skip this pass, thus return true.
+  return true;
+}
+
+void TimePassesHandler::runAfterPass(StringRef PassID) {
+  if (matchPassManager(PassID))
+    return;
+
+  stopTimer(PassID);
+
+  LLVM_DEBUG(dbgs() << "after runAfterPass(" << PassID << ")\n");
+  LLVM_DEBUG(dump());
+}
+
+void TimePassesHandler::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+  if (!Enabled)
+    return;
+
+  PIC.registerBeforePassCallback(
+      [this](StringRef P, Any) { return this->runBeforePass(P); });
+  PIC.registerAfterPassCallback(
+      [this](StringRef P, Any) { this->runAfterPass(P); });
+  PIC.registerAfterPassInvalidatedCallback(
+      [this](StringRef P) { this->runAfterPass(P); });
+  PIC.registerBeforeAnalysisCallback(
+      [this](StringRef P, Any) { this->runBeforePass(P); });
+  PIC.registerAfterAnalysisCallback(
+      [this](StringRef P, Any) { this->runAfterPass(P); });
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
index 6f73126be738..12ada1320225 100644
--- a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -92,6 +92,7 @@ public:
         Listed = true;
       }
     }
+    (void)Listed;
     assert(Listed && "basic block is not found among incoming blocks");
     return false;
   }
@@ -133,7 +134,7 @@ public:
     // Top-down walk of the dominator tree
     ReversePostOrderTraversal<const Function *> RPOT(&F);
     for (const BasicBlock *BB : RPOT) {
-      const TerminatorInst *TI = BB->getTerminator();
+      const Instruction *TI = BB->getTerminator();
       assert(TI && "blocks must be well formed");
 
       // For conditional branches, we can perform simple conditional propagation on
@@ -256,8 +257,7 @@ static bool containsGCPtrType(Type *Ty) {
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
     return containsGCPtrType(AT->getElementType());
   if (StructType *ST = dyn_cast<StructType>(Ty))
-    return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
-                       containsGCPtrType);
+    return llvm::any_of(ST->elements(), containsGCPtrType);
   return false;
 }
 
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index 83016496ff7e..0fb079c5ab73 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -297,20 +297,26 @@ FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
 FunctionType *FunctionType::get(Type *ReturnType,
                                 ArrayRef<Type*> Params, bool isVarArg) {
   LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
-  FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
-  auto I = pImpl->FunctionTypes.find_as(Key);
+  const FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
   FunctionType *FT;
-
-  if (I == pImpl->FunctionTypes.end()) {
+  // Since we only want to allocate a fresh function type in case none is found
+  // and we don't want to perform two lookups (one for checking if existent and
+  // one for inserting the newly allocated one), here we instead lookup based on
+  // Key and update the reference to the function type in-place to a newly
+  // allocated one if not found.
+  auto Insertion = pImpl->FunctionTypes.insert_as(nullptr, Key);
+  if (Insertion.second) {
+    // The function type was not found. Allocate one and update FunctionTypes
+    // in-place.
     FT = (FunctionType *)pImpl->TypeAllocator.Allocate(
         sizeof(FunctionType) + sizeof(Type *) * (Params.size() + 1),
         alignof(FunctionType));
     new (FT) FunctionType(ReturnType, Params, isVarArg);
-    pImpl->FunctionTypes.insert(FT);
+    *Insertion.first = FT;
   } else {
-    FT = *I;
+    // The function type was found. Just return it.
+    FT = *Insertion.first;
   }
-
   return FT;
 }
 
@@ -336,18 +342,25 @@ bool FunctionType::isValidArgumentType(Type *ArgTy) {
 StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
                             bool isPacked) {
   LLVMContextImpl *pImpl = Context.pImpl;
-  AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
-  auto I = pImpl->AnonStructTypes.find_as(Key);
-  StructType *ST;
+  const AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
 
-  if (I == pImpl->AnonStructTypes.end()) {
-    // Value not found.  Create a new type!
+  StructType *ST;
+  // Since we only want to allocate a fresh struct type in case none is found
+  // and we don't want to perform two lookups (one for checking if existent and
+  // one for inserting the newly allocated one), here we instead lookup based on
+  // Key and update the reference to the struct type in-place to a newly
+  // allocated one if not found.
+  auto Insertion = pImpl->AnonStructTypes.insert_as(nullptr, Key);
+  if (Insertion.second) {
+    // The struct type was not found. Allocate one and update AnonStructTypes
+    // in-place.
     ST = new (Context.pImpl->TypeAllocator) StructType(Context);
     ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
     ST->setBody(ETypes, isPacked);
-    Context.pImpl->AnonStructTypes.insert(ST);
+    *Insertion.first = ST;
   } else {
-    ST = *I;
+    // The struct type was found. Just return it.
+    ST = *Insertion.first;
   }
 
   return ST;
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index 295d6ecf0db0..80b993c89f7f 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -16,7 +16,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -130,20 +129,11 @@ void Value::destroyValueName() {
 }
 
 bool Value::hasNUses(unsigned N) const {
-  const_use_iterator UI = use_begin(), E = use_end();
-
-  for (; N; --N, ++UI)
-    if (UI == E) return false;  // Too few.
-  return UI == E;
+  return hasNItems(use_begin(), use_end(), N);
 }
 
 bool Value::hasNUsesOrMore(unsigned N) const {
-  const_use_iterator UI = use_begin(), E = use_end();
-
-  for (; N; --N, ++UI)
-    if (UI == E) return false;  // Too few.
-
-  return true;
+  return hasNItemsOrMore(use_begin(), use_end(), N);
 }
 
 bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
@@ -405,7 +395,7 @@ static bool contains(Value *Expr, Value *V) {
 }
 #endif // NDEBUG
 
-void Value::doRAUW(Value *New, bool NoMetadata) {
+void Value::doRAUW(Value *New, ReplaceMetadataUses ReplaceMetaUses) {
   assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
   assert(!contains(New, this) &&
          "this->replaceAllUsesWith(expr(this)) is NOT valid!");
@@ -415,7 +405,7 @@ void Value::doRAUW(Value *New, bool NoMetadata) {
   // Notify all ValueHandles (if present) that this value is going away.
   if (HasValueHandle)
     ValueHandleBase::ValueIsRAUWd(this, New);
-  if (!NoMetadata && isUsedByMetadata())
+  if (ReplaceMetaUses == ReplaceMetadataUses::Yes && isUsedByMetadata())
     ValueAsMetadata::handleRAUW(this, New);
 
   while (!materialized_use_empty()) {
@@ -437,11 +427,11 @@ void Value::doRAUW(Value *New, bool NoMetadata) {
 }
 
 void Value::replaceAllUsesWith(Value *New) {
-  doRAUW(New, false /* NoMetadata */);
+  doRAUW(New, ReplaceMetadataUses::Yes);
 }
 
 void Value::replaceNonMetadataUsesWith(Value *New) {
-  doRAUW(New, true /* NoMetadata */);
+  doRAUW(New, ReplaceMetadataUses::No);
 }
 
 // Like replaceAllUsesWith except it does not handle constants or basic blocks.
@@ -512,8 +502,8 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
         return V;
       V = GA->getAliasee();
     } else {
-      if (auto CS = ImmutableCallSite(V)) {
-        if (const Value *RV = CS.getReturnedArgOperand()) {
+      if (const auto *Call = dyn_cast<CallBase>(V)) {
+        if (const Value *RV = Call->getReturnedArgOperand()) {
           V = RV;
           continue;
         }
@@ -521,9 +511,9 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
         // but it can't be marked with returned attribute, that's why it needs
         // special case.
         if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups &&
-            (CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
-             CS.getIntrinsicID() == Intrinsic::strip_invariant_group)) {
-          V = CS.getArgOperand(0);
+            (Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+             Call->getIntrinsicID() == Intrinsic::strip_invariant_group)) {
+          V = Call->getArgOperand(0);
           continue;
         }
       }
@@ -582,8 +572,8 @@ Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
     } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
       V = GA->getAliasee();
     } else {
-      if (auto CS = ImmutableCallSite(V))
-        if (const Value *RV = CS.getReturnedArgOperand()) {
+      if (const auto *Call = dyn_cast<CallBase>(V))
+        if (const Value *RV = Call->getReturnedArgOperand()) {
           V = RV;
           continue;
         }
@@ -617,10 +607,11 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
       DerefBytes = A->getDereferenceableOrNullBytes();
       CanBeNull = true;
     }
-  } else if (auto CS = ImmutableCallSite(this)) {
-    DerefBytes = CS.getDereferenceableBytes(AttributeList::ReturnIndex);
+  } else if (const auto *Call = dyn_cast<CallBase>(this)) {
+    DerefBytes = Call->getDereferenceableBytes(AttributeList::ReturnIndex);
     if (DerefBytes == 0) {
-      DerefBytes = CS.getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
+      DerefBytes =
+          Call->getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
       CanBeNull = true;
     }
   } else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) {
@@ -692,8 +683,8 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
       if (AllocatedType->isSized())
         Align = DL.getPrefTypeAlignment(AllocatedType);
     }
-  } else if (auto CS = ImmutableCallSite(this))
-    Align = CS.getAttributes().getRetAlignment();
+  } else if (const auto *Call = dyn_cast<CallBase>(this))
+    Align = Call->getAttributes().getRetAlignment();
   else if (const LoadInst *LI = dyn_cast<LoadInst>(this))
     if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
       ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index e5231bb78a36..30e77b92009f 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -65,7 +65,6 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
@@ -140,21 +139,20 @@ private:
   }
 
   void Write(const Value *V) {
-    if (!V)
-      return;
+    if (V)
+      Write(*V);
+  }
+
+  void Write(const Value &V) {
     if (isa<Instruction>(V)) {
-      V->print(*OS, MST);
+      V.print(*OS, MST);
       *OS << '\n';
     } else {
-      V->printAsOperand(*OS, true, MST);
+      V.printAsOperand(*OS, true, MST);
       *OS << '\n';
     }
   }
 
-  void Write(ImmutableCallSite CS) {
-    Write(CS.getInstruction());
-  }
-
   void Write(const Metadata *MD) {
     if (!MD)
       return;
@@ -281,13 +279,16 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// Whether the current function has a DISubprogram attached to it.
   bool HasDebugInfo = false;
 
+  /// Whether source was present on the first DIFile encountered in each CU.
+  DenseMap<const DICompileUnit *, bool> HasSourceDebugInfo;
+
   /// Stores the count of how many objects were passed to llvm.localescape for a
   /// given function and the largest index passed to llvm.localrecover.
   DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
 
   // Maps catchswitches and cleanuppads that unwind to siblings to the
   // terminators that indicate the unwind, used to detect cycles therein.
-  MapVector<Instruction *, TerminatorInst *> SiblingFuncletInfo;
+  MapVector<Instruction *, Instruction *> SiblingFuncletInfo;
 
   /// Cache of constants visited in search of ConstantExprs.
   SmallPtrSet<const Constant *, 32> ConstantExprVisited;
@@ -383,6 +384,7 @@ public:
 
     visitModuleFlags(M);
     visitModuleIdents(M);
+    visitModuleCommandLines(M);
 
     verifyCompileUnits();
 
@@ -405,6 +407,7 @@ private:
   void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F);
   void visitComdat(const Comdat &C);
   void visitModuleIdents(const Module &M);
+  void visitModuleCommandLines(const Module &M);
   void visitModuleFlags(const Module &M);
   void visitModuleFlag(const MDNode *Op,
                        DenseMap<const MDString *, const MDNode *> &SeenIDs,
@@ -443,6 +446,8 @@ private:
   void visitBitCastInst(BitCastInst &I);
   void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
   void visitPHINode(PHINode &PN);
+  void visitCallBase(CallBase &Call);
+  void visitUnaryOperator(UnaryOperator &U);
   void visitBinaryOperator(BinaryOperator &B);
   void visitICmpInst(ICmpInst &IC);
   void visitFCmpInst(FCmpInst &FC);
@@ -457,7 +462,7 @@ private:
   void visitStoreInst(StoreInst &SI);
   void verifyDominatesUse(Instruction &I, unsigned i);
   void visitInstruction(Instruction &I);
-  void visitTerminatorInst(TerminatorInst &I);
+  void visitTerminator(Instruction &I);
   void visitBranchInst(BranchInst &BI);
   void visitReturnInst(ReturnInst &RI);
   void visitSwitchInst(SwitchInst &SI);
@@ -465,9 +470,9 @@ private:
   void visitSelectInst(SelectInst &SI);
   void visitUserOp1(Instruction &I);
   void visitUserOp2(Instruction &I) { visitUserOp1(I); }
-  void visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS);
+  void visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call);
   void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI);
-  void visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII);
+  void visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII);
   void visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI);
   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
   void visitAtomicRMWInst(AtomicRMWInst &RMWI);
@@ -485,8 +490,7 @@ private:
   void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
   void visitCleanupReturnInst(CleanupReturnInst &CRI);
 
-  void verifyCallSite(CallSite CS);
-  void verifySwiftErrorCallSite(CallSite CS, const Value *SwiftErrorVal);
+  void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal);
   void verifySwiftErrorValue(const Value *SwiftErrorVal);
   void verifyMustTailCall(CallInst &CI);
   bool performTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT,
@@ -501,16 +505,16 @@ private:
 
   void visitConstantExprsRecursively(const Constant *EntryC);
   void visitConstantExpr(const ConstantExpr *CE);
-  void verifyStatepoint(ImmutableCallSite CS);
+  void verifyStatepoint(const CallBase &Call);
   void verifyFrameRecoverIndices();
   void verifySiblingFuncletUnwinds();
 
-  void verifyFragmentExpression(const DbgInfoIntrinsic &I);
+  void verifyFragmentExpression(const DbgVariableIntrinsic &I);
   template <typename ValueOrMetadata>
   void verifyFragmentExpression(const DIVariable &V,
                                 DIExpression::FragmentInfo Fragment,
                                 ValueOrMetadata *Desc);
-  void verifyFnArgs(const DbgInfoIntrinsic &I);
+  void verifyFnArgs(const DbgVariableIntrinsic &I);
 
   /// Module-level debug info verification...
   void verifyCompileUnits();
@@ -518,6 +522,9 @@ private:
   /// Module-level verification that all @llvm.experimental.deoptimize
   /// declarations share the same calling convention.
   void verifyDeoptimizeCallingConvs();
+
+  /// Verify all-or-nothing property of DIFile source attribute within a CU.
+  void verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F);
 };
 
 } // end anonymous namespace
@@ -632,7 +639,8 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
     if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getValueType())) {
       StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       PointerType *FuncPtrTy =
-          FunctionType::get(Type::getVoidTy(Context), false)->getPointerTo();
+          FunctionType::get(Type::getVoidTy(Context), false)->
+          getPointerTo(DL.getProgramAddressSpace());
       // FIXME: Reject the 2-field form in LLVM 4.0.
       Assert(STy &&
                  (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
@@ -886,6 +894,8 @@ void Verifier::visitDIBasicType(const DIBasicType &N) {
   AssertDI(N.getTag() == dwarf::DW_TAG_base_type ||
                N.getTag() == dwarf::DW_TAG_unspecified_type,
            "invalid tag", &N);
+  AssertDI(!(N.isBigEndian() && N.isLittleEndian()) ,
+            "has conflicting flags", &N);
 }
 
 void Verifier::visitDIDerivedType(const DIDerivedType &N) {
@@ -1028,6 +1038,8 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
   AssertDI(!N.getFile()->getFilename().empty(), "invalid filename", &N,
            N.getFile());
 
+  verifySourceDebugInfo(N, *N.getFile());
+
   AssertDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind),
            "invalid emission kind", &N);
 
@@ -1105,6 +1117,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
     AssertDI(N.isDistinct(), "subprogram definitions must be distinct", &N);
     AssertDI(Unit, "subprogram definitions must have a compile unit", &N);
     AssertDI(isa<DICompileUnit>(Unit), "invalid unit type", &N, Unit);
+    if (N.getFile())
+      verifySourceDebugInfo(*N.getUnit(), *N.getFile());
   } else {
     // Subprogram declarations (part of the type hierarchy).
     AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N);
@@ -1117,6 +1131,10 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
       AssertDI(Op && isa<DIType>(Op), "invalid thrown type", &N, ThrownTypes,
                Op);
   }
+
+  if (N.areAllCallsDescribed())
+    AssertDI(N.isDefinition(),
+             "DIFlagAllCallsDescribed must be attached to a definition");
 }
 
 void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
@@ -1223,6 +1241,8 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) {
   AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
   AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
            "local variable requires a valid scope", &N, N.getRawScope());
+  if (auto Ty = N.getType())
+    AssertDI(!isa<DISubroutineType>(Ty), "invalid type", &N, N.getType());
 }
 
 void Verifier::visitDILabel(const DILabel &N) {
@@ -1295,6 +1315,24 @@ void Verifier::visitModuleIdents(const Module &M) {
   }
 }
 
+void Verifier::visitModuleCommandLines(const Module &M) {
+  const NamedMDNode *CommandLines = M.getNamedMetadata("llvm.commandline");
+  if (!CommandLines)
+    return;
+
+  // llvm.commandline takes a list of metadata entry. Each entry has only one
+  // string. Scan each llvm.commandline entry and make sure that this
+  // requirement is met.
+  for (const MDNode *N : CommandLines->operands()) {
+    Assert(N->getNumOperands() == 1,
+           "incorrect number of operands in llvm.commandline metadata", N);
+    Assert(dyn_cast_or_null<MDString>(N->getOperand(0)),
+           ("invalid value for llvm.commandline metadata entry operand"
+            "(the operand should be a string)"),
+           N->getOperand(0));
+  }
+}
+
 void Verifier::visitModuleFlags(const Module &M) {
   const NamedMDNode *Flags = M.getModuleFlagsMetadata();
   if (!Flags) return;
@@ -1476,6 +1514,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
   case Attribute::InaccessibleMemOnly:
   case Attribute::InaccessibleMemOrArgMemOnly:
   case Attribute::AllocSize:
+  case Attribute::SpeculativeLoadHardening:
   case Attribute::Speculatable:
   case Attribute::StrictFP:
     return true;
@@ -1854,127 +1893,136 @@ bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
 }
 
 /// Verify that statepoint intrinsic is well formed.
-void Verifier::verifyStatepoint(ImmutableCallSite CS) {
-  assert(CS.getCalledFunction() &&
-         CS.getCalledFunction()->getIntrinsicID() ==
-           Intrinsic::experimental_gc_statepoint);
-
-  const Instruction &CI = *CS.getInstruction();
+void Verifier::verifyStatepoint(const CallBase &Call) {
+  assert(Call.getCalledFunction() &&
+         Call.getCalledFunction()->getIntrinsicID() ==
+             Intrinsic::experimental_gc_statepoint);
 
-  Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory() &&
-         !CS.onlyAccessesArgMemory(),
+  Assert(!Call.doesNotAccessMemory() && !Call.onlyReadsMemory() &&
+             !Call.onlyAccessesArgMemory(),
          "gc.statepoint must read and write all memory to preserve "
          "reordering restrictions required by safepoint semantics",
-         &CI);
+         Call);
 
-  const Value *IDV = CS.getArgument(0);
+  const Value *IDV = Call.getArgOperand(0);
   Assert(isa<ConstantInt>(IDV), "gc.statepoint ID must be a constant integer",
-         &CI);
+         Call);
 
-  const Value *NumPatchBytesV = CS.getArgument(1);
+  const Value *NumPatchBytesV = Call.getArgOperand(1);
   Assert(isa<ConstantInt>(NumPatchBytesV),
          "gc.statepoint number of patchable bytes must be a constant integer",
-         &CI);
+         Call);
   const int64_t NumPatchBytes =
       cast<ConstantInt>(NumPatchBytesV)->getSExtValue();
   assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
-  Assert(NumPatchBytes >= 0, "gc.statepoint number of patchable bytes must be "
-                             "positive",
-         &CI);
+  Assert(NumPatchBytes >= 0,
+         "gc.statepoint number of patchable bytes must be "
+         "positive",
+         Call);
 
-  const Value *Target = CS.getArgument(2);
+  const Value *Target = Call.getArgOperand(2);
   auto *PT = dyn_cast<PointerType>(Target->getType());
   Assert(PT && PT->getElementType()->isFunctionTy(),
-         "gc.statepoint callee must be of function pointer type", &CI, Target);
+         "gc.statepoint callee must be of function pointer type", Call, Target);
   FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
 
-  const Value *NumCallArgsV = CS.getArgument(3);
+  const Value *NumCallArgsV = Call.getArgOperand(3);
   Assert(isa<ConstantInt>(NumCallArgsV),
          "gc.statepoint number of arguments to underlying call "
          "must be constant integer",
-         &CI);
+         Call);
   const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
   Assert(NumCallArgs >= 0,
          "gc.statepoint number of arguments to underlying call "
          "must be positive",
-         &CI);
+         Call);
   const int NumParams = (int)TargetFuncType->getNumParams();
   if (TargetFuncType->isVarArg()) {
     Assert(NumCallArgs >= NumParams,
-           "gc.statepoint mismatch in number of vararg call args", &CI);
+           "gc.statepoint mismatch in number of vararg call args", Call);
 
     // TODO: Remove this limitation
     Assert(TargetFuncType->getReturnType()->isVoidTy(),
            "gc.statepoint doesn't support wrapping non-void "
            "vararg functions yet",
-           &CI);
+           Call);
   } else
     Assert(NumCallArgs == NumParams,
-           "gc.statepoint mismatch in number of call args", &CI);
+           "gc.statepoint mismatch in number of call args", Call);
 
-  const Value *FlagsV = CS.getArgument(4);
+  const Value *FlagsV = Call.getArgOperand(4);
   Assert(isa<ConstantInt>(FlagsV),
-         "gc.statepoint flags must be constant integer", &CI);
+         "gc.statepoint flags must be constant integer", Call);
   const uint64_t Flags = cast<ConstantInt>(FlagsV)->getZExtValue();
   Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
-         "unknown flag used in gc.statepoint flags argument", &CI);
+         "unknown flag used in gc.statepoint flags argument", Call);
 
   // Verify that the types of the call parameter arguments match
   // the type of the wrapped callee.
+  AttributeList Attrs = Call.getAttributes();
   for (int i = 0; i < NumParams; i++) {
     Type *ParamType = TargetFuncType->getParamType(i);
-    Type *ArgType = CS.getArgument(5 + i)->getType();
+    Type *ArgType = Call.getArgOperand(5 + i)->getType();
     Assert(ArgType == ParamType,
            "gc.statepoint call argument does not match wrapped "
            "function type",
-           &CI);
+           Call);
+
+    if (TargetFuncType->isVarArg()) {
+      AttributeSet ArgAttrs = Attrs.getParamAttributes(5 + i);
+      Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
+             "Attribute 'sret' cannot be used for vararg call arguments!",
+             Call);
+    }
   }
 
   const int EndCallArgsInx = 4 + NumCallArgs;
 
-  const Value *NumTransitionArgsV = CS.getArgument(EndCallArgsInx+1);
+  const Value *NumTransitionArgsV = Call.getArgOperand(EndCallArgsInx + 1);
   Assert(isa<ConstantInt>(NumTransitionArgsV),
          "gc.statepoint number of transition arguments "
          "must be constant integer",
-         &CI);
+         Call);
   const int NumTransitionArgs =
       cast<ConstantInt>(NumTransitionArgsV)->getZExtValue();
   Assert(NumTransitionArgs >= 0,
-         "gc.statepoint number of transition arguments must be positive", &CI);
+         "gc.statepoint number of transition arguments must be positive", Call);
   const int EndTransitionArgsInx = EndCallArgsInx + 1 + NumTransitionArgs;
 
-  const Value *NumDeoptArgsV = CS.getArgument(EndTransitionArgsInx+1);
+  const Value *NumDeoptArgsV = Call.getArgOperand(EndTransitionArgsInx + 1);
   Assert(isa<ConstantInt>(NumDeoptArgsV),
          "gc.statepoint number of deoptimization arguments "
          "must be constant integer",
-         &CI);
+         Call);
   const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
-  Assert(NumDeoptArgs >= 0, "gc.statepoint number of deoptimization arguments "
-                            "must be positive",
-         &CI);
+  Assert(NumDeoptArgs >= 0,
+         "gc.statepoint number of deoptimization arguments "
+         "must be positive",
+         Call);
 
   const int ExpectedNumArgs =
       7 + NumCallArgs + NumTransitionArgs + NumDeoptArgs;
-  Assert(ExpectedNumArgs <= (int)CS.arg_size(),
-         "gc.statepoint too few arguments according to length fields", &CI);
+  Assert(ExpectedNumArgs <= (int)Call.arg_size(),
+         "gc.statepoint too few arguments according to length fields", Call);
 
   // Check that the only uses of this gc.statepoint are gc.result or
   // gc.relocate calls which are tied to this statepoint and thus part
   // of the same statepoint sequence
-  for (const User *U : CI.users()) {
-    const CallInst *Call = dyn_cast<const CallInst>(U);
-    Assert(Call, "illegal use of statepoint token", &CI, U);
-    if (!Call) continue;
-    Assert(isa<GCRelocateInst>(Call) || isa<GCResultInst>(Call),
+  for (const User *U : Call.users()) {
+    const CallInst *UserCall = dyn_cast<const CallInst>(U);
+    Assert(UserCall, "illegal use of statepoint token", Call, U);
+    if (!UserCall)
+      continue;
+    Assert(isa<GCRelocateInst>(UserCall) || isa<GCResultInst>(UserCall),
            "gc.result or gc.relocate are the only value uses "
            "of a gc.statepoint",
-           &CI, U);
-    if (isa<GCResultInst>(Call)) {
-      Assert(Call->getArgOperand(0) == &CI,
-             "gc.result connected to wrong gc.statepoint", &CI, Call);
+           Call, U);
+    if (isa<GCResultInst>(UserCall)) {
+      Assert(UserCall->getArgOperand(0) == &Call,
+             "gc.result connected to wrong gc.statepoint", Call, UserCall);
     } else if (isa<GCRelocateInst>(Call)) {
-      Assert(Call->getArgOperand(0) == &CI,
-             "gc.relocate connected to wrong gc.statepoint", &CI, Call);
+      Assert(UserCall->getArgOperand(0) == &Call,
+             "gc.relocate connected to wrong gc.statepoint", Call, UserCall);
     }
   }
 
@@ -2001,7 +2049,7 @@ void Verifier::verifyFrameRecoverIndices() {
   }
 }
 
-static Instruction *getSuccPad(TerminatorInst *Terminator) {
+static Instruction *getSuccPad(Instruction *Terminator) {
   BasicBlock *UnwindDest;
   if (auto *II = dyn_cast<InvokeInst>(Terminator))
     UnwindDest = II->getUnwindDest();
@@ -2020,7 +2068,7 @@ void Verifier::verifySiblingFuncletUnwinds() {
     if (Visited.count(PredPad))
       continue;
     Active.insert(PredPad);
-    TerminatorInst *Terminator = Pair.second;
+    Instruction *Terminator = Pair.second;
     do {
       Instruction *SuccPad = getSuccPad(Terminator);
       if (Active.count(SuccPad)) {
@@ -2029,7 +2077,7 @@ void Verifier::verifySiblingFuncletUnwinds() {
         SmallVector<Instruction *, 8> CycleNodes;
         do {
           CycleNodes.push_back(CyclePad);
-          TerminatorInst *CycleTerminator = SiblingFuncletInfo[CyclePad];
+          Instruction *CycleTerminator = SiblingFuncletInfo[CyclePad];
           if (CycleTerminator != CyclePad)
             CycleNodes.push_back(CycleTerminator);
           CyclePad = getSuccPad(CycleTerminator);
@@ -2262,6 +2310,10 @@ void Verifier::visitFunction(const Function &F) {
       if (!Seen.insert(DL).second)
         continue;
 
+      Metadata *Parent = DL->getRawScope();
+      AssertDI(Parent && isa<DILocalScope>(Parent),
+               "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
+               Parent);
       DILocalScope *Scope = DL->getInlinedAtScope();
       if (Scope && !Seen.insert(Scope).second)
         continue;
@@ -2293,7 +2345,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
   if (isa<PHINode>(BB.front())) {
     SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
     SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
-    llvm::sort(Preds.begin(), Preds.end());
+    llvm::sort(Preds);
     for (const PHINode &PN : BB.phis()) {
       // Ensure that PHI nodes have at least one entry!
       Assert(PN.getNumIncomingValues() != 0,
@@ -2311,7 +2363,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
         Values.push_back(
             std::make_pair(PN.getIncomingBlock(i), PN.getIncomingValue(i)));
-      llvm::sort(Values.begin(), Values.end());
+      llvm::sort(Values);
 
       for (unsigned i = 0, e = Values.size(); i != e; ++i) {
         // Check to make sure that if there is more than one entry for a
@@ -2340,7 +2392,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
   }
 }
 
-void Verifier::visitTerminatorInst(TerminatorInst &I) {
+void Verifier::visitTerminator(Instruction &I) {
   // Ensure that terminators only exist at the end of the basic block.
   Assert(&I == I.getParent()->getTerminator(),
          "Terminator found in the middle of a basic block!", I.getParent());
@@ -2352,7 +2404,7 @@ void Verifier::visitBranchInst(BranchInst &BI) {
     Assert(BI.getCondition()->getType()->isIntegerTy(1),
            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
   }
-  visitTerminatorInst(BI);
+  visitTerminator(BI);
 }
 
 void Verifier::visitReturnInst(ReturnInst &RI) {
@@ -2371,7 +2423,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
 
   // Check to make sure that the return value has necessary properties for
   // terminators...
-  visitTerminatorInst(RI);
+  visitTerminator(RI);
 }
 
 void Verifier::visitSwitchInst(SwitchInst &SI) {
@@ -2386,7 +2438,7 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
            "Duplicate integer as switch case", &SI, Case.getCaseValue());
   }
 
-  visitTerminatorInst(SI);
+  visitTerminator(SI);
 }
 
 void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
@@ -2396,7 +2448,7 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
     Assert(BI.getDestination(i)->getType()->isLabelTy(),
            "Indirectbr destinations must all have pointer type!", &BI);
 
-  visitTerminatorInst(BI);
+  visitTerminator(BI);
 }
 
 void Verifier::visitSelectInst(SelectInst &SI) {
@@ -2695,77 +2747,79 @@ void Verifier::visitPHINode(PHINode &PN) {
   visitInstruction(PN);
 }
 
-void Verifier::verifyCallSite(CallSite CS) {
-  Instruction *I = CS.getInstruction();
-
-  Assert(CS.getCalledValue()->getType()->isPointerTy(),
-         "Called function must be a pointer!", I);
-  PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+void Verifier::visitCallBase(CallBase &Call) {
+  Assert(Call.getCalledValue()->getType()->isPointerTy(),
+         "Called function must be a pointer!", Call);
+  PointerType *FPTy = cast<PointerType>(Call.getCalledValue()->getType());
 
   Assert(FPTy->getElementType()->isFunctionTy(),
-         "Called function is not pointer to function type!", I);
+         "Called function is not pointer to function type!", Call);
 
-  Assert(FPTy->getElementType() == CS.getFunctionType(),
-         "Called function is not the same type as the call!", I);
+  Assert(FPTy->getElementType() == Call.getFunctionType(),
+         "Called function is not the same type as the call!", Call);
 
-  FunctionType *FTy = CS.getFunctionType();
+  FunctionType *FTy = Call.getFunctionType();
 
   // Verify that the correct number of arguments are being passed
   if (FTy->isVarArg())
-    Assert(CS.arg_size() >= FTy->getNumParams(),
-           "Called function requires more parameters than were provided!", I);
+    Assert(Call.arg_size() >= FTy->getNumParams(),
+           "Called function requires more parameters than were provided!",
+           Call);
   else
-    Assert(CS.arg_size() == FTy->getNumParams(),
-           "Incorrect number of arguments passed to called function!", I);
+    Assert(Call.arg_size() == FTy->getNumParams(),
+           "Incorrect number of arguments passed to called function!", Call);
 
   // Verify that all arguments to the call match the function type.
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    Assert(CS.getArgument(i)->getType() == FTy->getParamType(i),
+    Assert(Call.getArgOperand(i)->getType() == FTy->getParamType(i),
            "Call parameter type does not match function signature!",
-           CS.getArgument(i), FTy->getParamType(i), I);
+           Call.getArgOperand(i), FTy->getParamType(i), Call);
 
-  AttributeList Attrs = CS.getAttributes();
+  AttributeList Attrs = Call.getAttributes();
 
-  Assert(verifyAttributeCount(Attrs, CS.arg_size()),
-         "Attribute after last parameter!", I);
+  Assert(verifyAttributeCount(Attrs, Call.arg_size()),
+         "Attribute after last parameter!", Call);
 
   if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
     // Don't allow speculatable on call sites, unless the underlying function
     // declaration is also speculatable.
-    Function *Callee
-      = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+    Function *Callee =
+        dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
     Assert(Callee && Callee->isSpeculatable(),
-           "speculatable attribute may not apply to call sites", I);
+           "speculatable attribute may not apply to call sites", Call);
   }
 
   // Verify call attributes.
-  verifyFunctionAttrs(FTy, Attrs, I);
+  verifyFunctionAttrs(FTy, Attrs, &Call);
 
   // Conservatively check the inalloca argument.
   // We have a bug if we can find that there is an underlying alloca without
   // inalloca.
-  if (CS.hasInAllocaArgument()) {
-    Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1);
+  if (Call.hasInAllocaArgument()) {
+    Value *InAllocaArg = Call.getArgOperand(FTy->getNumParams() - 1);
     if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets()))
       Assert(AI->isUsedWithInAlloca(),
-             "inalloca argument for call has mismatched alloca", AI, I);
+             "inalloca argument for call has mismatched alloca", AI, Call);
   }
 
   // For each argument of the callsite, if it has the swifterror argument,
   // make sure the underlying alloca/parameter it comes from has a swifterror as
   // well.
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    if (CS.paramHasAttr(i, Attribute::SwiftError)) {
-      Value *SwiftErrorArg = CS.getArgument(i);
+    if (Call.paramHasAttr(i, Attribute::SwiftError)) {
+      Value *SwiftErrorArg = Call.getArgOperand(i);
       if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
         Assert(AI->isSwiftError(),
-               "swifterror argument for call has mismatched alloca", AI, I);
+               "swifterror argument for call has mismatched alloca", AI, Call);
         continue;
       }
       auto ArgI = dyn_cast<Argument>(SwiftErrorArg);
-      Assert(ArgI, "swifterror argument should come from an alloca or parameter", SwiftErrorArg, I);
+      Assert(ArgI,
+             "swifterror argument should come from an alloca or parameter",
+             SwiftErrorArg, Call);
       Assert(ArgI->hasSwiftErrorAttr(),
-             "swifterror argument for call has mismatched parameter", ArgI, I);
+             "swifterror argument for call has mismatched parameter", ArgI,
+             Call);
     }
 
   if (FTy->isVarArg()) {
@@ -2781,90 +2835,97 @@ void Verifier::verifyCallSite(CallSite CS) {
     }
 
     // Check attributes on the varargs part.
-    for (unsigned Idx = FTy->getNumParams(); Idx < CS.arg_size(); ++Idx) {
-      Type *Ty = CS.getArgument(Idx)->getType();
+    for (unsigned Idx = FTy->getNumParams(); Idx < Call.arg_size(); ++Idx) {
+      Type *Ty = Call.getArgOperand(Idx)->getType();
       AttributeSet ArgAttrs = Attrs.getParamAttributes(Idx);
-      verifyParameterAttrs(ArgAttrs, Ty, I);
+      verifyParameterAttrs(ArgAttrs, Ty, &Call);
 
       if (ArgAttrs.hasAttribute(Attribute::Nest)) {
-        Assert(!SawNest, "More than one parameter has attribute nest!", I);
+        Assert(!SawNest, "More than one parameter has attribute nest!", Call);
         SawNest = true;
       }
 
       if (ArgAttrs.hasAttribute(Attribute::Returned)) {
         Assert(!SawReturned, "More than one parameter has attribute returned!",
-               I);
+               Call);
         Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
                "Incompatible argument and return types for 'returned' "
                "attribute",
-               I);
+               Call);
         SawReturned = true;
       }
 
-      Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
-             "Attribute 'sret' cannot be used for vararg call arguments!", I);
+      // Statepoint intrinsic is vararg but the wrapped function may be not.
+      // Allow sret here and check the wrapped function in verifyStatepoint.
+      if (!Call.getCalledFunction() ||
+          Call.getCalledFunction()->getIntrinsicID() !=
+              Intrinsic::experimental_gc_statepoint)
+        Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
+               "Attribute 'sret' cannot be used for vararg call arguments!",
+               Call);
 
       if (ArgAttrs.hasAttribute(Attribute::InAlloca))
-        Assert(Idx == CS.arg_size() - 1, "inalloca isn't on the last argument!",
-               I);
+        Assert(Idx == Call.arg_size() - 1,
+               "inalloca isn't on the last argument!", Call);
     }
   }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (CS.getCalledFunction() == nullptr ||
-      !CS.getCalledFunction()->getName().startswith("llvm.")) {
+  if (!Call.getCalledFunction() ||
+      !Call.getCalledFunction()->getName().startswith("llvm.")) {
     for (Type *ParamTy : FTy->params()) {
       Assert(!ParamTy->isMetadataTy(),
-             "Function has metadata parameter but isn't an intrinsic", I);
+             "Function has metadata parameter but isn't an intrinsic", Call);
       Assert(!ParamTy->isTokenTy(),
-             "Function has token parameter but isn't an intrinsic", I);
+             "Function has token parameter but isn't an intrinsic", Call);
     }
   }
 
   // Verify that indirect calls don't return tokens.
-  if (CS.getCalledFunction() == nullptr)
+  if (!Call.getCalledFunction())
     Assert(!FTy->getReturnType()->isTokenTy(),
            "Return type cannot be token for indirect call!");
 
-  if (Function *F = CS.getCalledFunction())
+  if (Function *F = Call.getCalledFunction())
     if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
-      visitIntrinsicCallSite(ID, CS);
+      visitIntrinsicCall(ID, Call);
 
   // Verify that a callsite has at most one "deopt", at most one "funclet" and
   // at most one "gc-transition" operand bundle.
   bool FoundDeoptBundle = false, FoundFuncletBundle = false,
        FoundGCTransitionBundle = false;
-  for (unsigned i = 0, e = CS.getNumOperandBundles(); i < e; ++i) {
-    OperandBundleUse BU = CS.getOperandBundleAt(i);
+  for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) {
+    OperandBundleUse BU = Call.getOperandBundleAt(i);
     uint32_t Tag = BU.getTagID();
     if (Tag == LLVMContext::OB_deopt) {
-      Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", I);
+      Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", Call);
       FoundDeoptBundle = true;
     } else if (Tag == LLVMContext::OB_gc_transition) {
       Assert(!FoundGCTransitionBundle, "Multiple gc-transition operand bundles",
-             I);
+             Call);
       FoundGCTransitionBundle = true;
     } else if (Tag == LLVMContext::OB_funclet) {
-      Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", I);
+      Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", Call);
       FoundFuncletBundle = true;
       Assert(BU.Inputs.size() == 1,
-             "Expected exactly one funclet bundle operand", I);
+             "Expected exactly one funclet bundle operand", Call);
       Assert(isa<FuncletPadInst>(BU.Inputs.front()),
              "Funclet bundle operands should correspond to a FuncletPadInst",
-             I);
+             Call);
     }
   }
 
   // Verify that each inlinable callsite of a debug-info-bearing function in a
   // debug-info-bearing function has a debug location attached to it. Failure to
   // do so causes assertion failures when the inliner sets up inline scope info.
-  if (I->getFunction()->getSubprogram() && CS.getCalledFunction() &&
-      CS.getCalledFunction()->getSubprogram())
-    AssertDI(I->getDebugLoc(), "inlinable function call in a function with "
-                               "debug info must have a !dbg location",
-             I);
+  if (Call.getFunction()->getSubprogram() && Call.getCalledFunction() &&
+      Call.getCalledFunction()->getSubprogram())
+    AssertDI(Call.getDebugLoc(),
+             "inlinable function call in a function with "
+             "debug info must have a !dbg location",
+             Call);
 
-  visitInstruction(*I);
+  visitInstruction(Call);
 }
 
 /// Two types are "congruent" if they are identical, or if they are both pointer
@@ -2959,14 +3020,14 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
 }
 
 void Verifier::visitCallInst(CallInst &CI) {
-  verifyCallSite(&CI);
+  visitCallBase(CI);
 
   if (CI.isMustTailCall())
     verifyMustTailCall(CI);
 }
 
 void Verifier::visitInvokeInst(InvokeInst &II) {
-  verifyCallSite(&II);
+  visitCallBase(II);
 
   // Verify that the first non-PHI instruction of the unwind destination is an
   // exception handling instruction.
@@ -2975,7 +3036,29 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
       "The unwind destination does not have an exception handling instruction!",
       &II);
 
-  visitTerminatorInst(II);
+  visitTerminator(II);
+}
+
+/// visitUnaryOperator - Check the argument to the unary operator.
+///
+void Verifier::visitUnaryOperator(UnaryOperator &U) {
+  Assert(U.getType() == U.getOperand(0)->getType(), 
+         "Unary operators must have same type for"
+         "operands and result!",
+         &U);
+
+  switch (U.getOpcode()) {
+  // Check that floating-point arithmetic operators are only used with
+  // floating-point operands.
+  case Instruction::FNeg:
+    Assert(U.getType()->isFPOrFPVectorTy(),
+           "FNeg operator only works with float types!", &U);
+    break;
+  default:
+    llvm_unreachable("Unknown UnaryOperator opcode!");
+  }
+
+  visitInstruction(U);
 }
 
 /// visitBinaryOperator - Check that both arguments to the binary operator are
@@ -3131,6 +3214,12 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              "All GEP indices should be of integer type");
     }
   }
+
+  if (auto *PTy = dyn_cast<PointerType>(GEP.getType())) {
+    Assert(GEP.getAddressSpace() == PTy->getAddressSpace(),
+           "GEP address space doesn't match type", &GEP);
+  }
+
   visitInstruction(GEP);
 }
 
@@ -3247,16 +3336,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
 }
 
 /// Check that SwiftErrorVal is used as a swifterror argument in CS.
-void Verifier::verifySwiftErrorCallSite(CallSite CS,
-                                        const Value *SwiftErrorVal) {
+void Verifier::verifySwiftErrorCall(CallBase &Call,
+                                    const Value *SwiftErrorVal) {
   unsigned Idx = 0;
-  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-       I != E; ++I, ++Idx) {
+  for (auto I = Call.arg_begin(), E = Call.arg_end(); I != E; ++I, ++Idx) {
     if (*I == SwiftErrorVal) {
-      Assert(CS.paramHasAttr(Idx, Attribute::SwiftError),
+      Assert(Call.paramHasAttr(Idx, Attribute::SwiftError),
              "swifterror value when used in a callsite should be marked "
              "with swifterror attribute",
-              SwiftErrorVal, CS);
+             SwiftErrorVal, Call);
     }
   }
 }
@@ -3275,10 +3363,8 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) {
       Assert(StoreI->getOperand(1) == SwiftErrorVal,
              "swifterror value should be the second operand when used "
              "by stores", SwiftErrorVal, U);
-    if (auto CallI = dyn_cast<CallInst>(U))
-      verifySwiftErrorCallSite(const_cast<CallInst*>(CallI), SwiftErrorVal);
-    if (auto II = dyn_cast<InvokeInst>(U))
-      verifySwiftErrorCallSite(const_cast<InvokeInst*>(II), SwiftErrorVal);
+    if (auto *Call = dyn_cast<CallBase>(U))
+      verifySwiftErrorCall(*const_cast<CallBase *>(Call), SwiftErrorVal);
   }
 }
 
@@ -3341,17 +3427,19 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
          "atomicrmw instructions must be atomic.", &RMWI);
   Assert(RMWI.getOrdering() != AtomicOrdering::Unordered,
          "atomicrmw instructions cannot be unordered.", &RMWI);
+  auto Op = RMWI.getOperation();
   PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
   Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
   Type *ElTy = PTy->getElementType();
-  Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
+  Assert(ElTy->isIntegerTy(), "atomicrmw " +
+         AtomicRMWInst::getOperationName(Op) +
+         " operand must have integer type!",
          &RMWI, ElTy);
   checkAtomicMemAccessSize(ElTy, &RMWI);
   Assert(ElTy == RMWI.getOperand(1)->getType(),
          "Argument value type does not match pointer operand type!", &RMWI,
          ElTy);
-  Assert(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
-             RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+  Assert(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP,
          "Invalid binary operation!", &RMWI);
   visitInstruction(RMWI);
 }
@@ -3430,7 +3518,7 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
   Instruction *ToPad = &I;
   Value *ToPadParent = getParentPad(ToPad);
   for (BasicBlock *PredBB : predecessors(BB)) {
-    TerminatorInst *TI = PredBB->getTerminator();
+    Instruction *TI = PredBB->getTerminator();
     Value *FromPad;
     if (auto *II = dyn_cast<InvokeInst>(TI)) {
       Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
@@ -3518,7 +3606,7 @@ void Verifier::visitResumeInst(ResumeInst &RI) {
            "inside a function.",
            &RI);
 
-  visitTerminatorInst(RI);
+  visitTerminator(RI);
 }
 
 void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
@@ -3546,7 +3634,7 @@ void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
          "CatchReturnInst needs to be provided a CatchPad", &CatchReturn,
          CatchReturn.getOperand(0));
 
-  visitTerminatorInst(CatchReturn);
+  visitTerminator(CatchReturn);
 }
 
 void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
@@ -3667,7 +3755,7 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
           // Record cleanup sibling unwinds for verifySiblingFuncletUnwinds
           if (isa<CleanupPadInst>(&FPI) && !isa<ConstantTokenNone>(UnwindPad) &&
               getParentPad(UnwindPad) == getParentPad(&FPI))
-            SiblingFuncletInfo[&FPI] = cast<TerminatorInst>(U);
+            SiblingFuncletInfo[&FPI] = cast<Instruction>(U);
         }
       }
       // Make sure we visit all uses of FPI, but for nested pads stop as
@@ -3768,7 +3856,7 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
   }
 
   visitEHPadPredecessors(CatchSwitch);
-  visitTerminatorInst(CatchSwitch);
+  visitTerminator(CatchSwitch);
 }
 
 void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
@@ -3784,7 +3872,7 @@ void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
            &CRI);
   }
 
-  visitTerminatorInst(CRI);
+  visitTerminator(CRI);
 }
 
 void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
@@ -3867,6 +3955,10 @@ void Verifier::visitInstruction(Instruction &I) {
     }
   }
 
+  // Get a pointer to the call base of the instruction if it is some form of
+  // call.
+  const CallBase *CBI = dyn_cast<CallBase>(&I);
+
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
     Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
 
@@ -3879,10 +3971,9 @@ void Verifier::visitInstruction(Instruction &I) {
     if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
       // Check to make sure that the "address of" an intrinsic function is never
       // taken.
-      Assert(
-          !F->isIntrinsic() ||
-              i == (isa<CallInst>(I) ? e - 1 : isa<InvokeInst>(I) ? e - 3 : 0),
-          "Cannot take the address of an intrinsic!", &I);
+      Assert(!F->isIntrinsic() ||
+                 (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)),
+             "Cannot take the address of an intrinsic!", &I);
       Assert(
           !F->isIntrinsic() || isa<CallInst>(I) ||
               F->getIntrinsicID() == Intrinsic::donothing ||
@@ -3908,8 +3999,7 @@ void Verifier::visitInstruction(Instruction &I) {
     } else if (isa<Instruction>(I.getOperand(i))) {
       verifyDominatesUse(I, i);
     } else if (isa<InlineAsm>(I.getOperand(i))) {
-      Assert((i + 1 == e && isa<CallInst>(I)) ||
-                 (i + 3 == e && isa<InvokeInst>(I)),
+      Assert(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i),
              "Cannot take the address of an inline asm!", &I);
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) {
       if (CE->getType()->isPtrOrPtrVectorTy() ||
@@ -3984,15 +4074,15 @@ void Verifier::visitInstruction(Instruction &I) {
     visitMDNode(*N);
   }
 
-  if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I))
+  if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
     verifyFragmentExpression(*DII);
 
   InstsInThisBlock.insert(&I);
 }
 
 /// Allow intrinsics to be verified in different ways.
-void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
-  Function *IF = CS.getCalledFunction();
+void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
+  Function *IF = Call.getCalledFunction();
   Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!",
          IF);
 
@@ -4038,15 +4128,15 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
 
   // If the intrinsic takes MDNode arguments, verify that they are either global
   // or are local to *this* function.
-  for (Value *V : CS.args())
+  for (Value *V : Call.args())
     if (auto *MD = dyn_cast<MetadataAsValue>(V))
-      visitMetadataAsValue(*MD, CS.getCaller());
+      visitMetadataAsValue(*MD, Call.getCaller());
 
   switch (ID) {
   default:
     break;
   case Intrinsic::coro_id: {
-    auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts();
+    auto *InfoArg = Call.getArgOperand(3)->stripPointerCasts();
     if (isa<ConstantPointerNull>(InfoArg))
       break;
     auto *GV = dyn_cast<GlobalVariable>(InfoArg);
@@ -4061,10 +4151,10 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   }
   case Intrinsic::ctlz:  // llvm.ctlz
   case Intrinsic::cttz:  // llvm.cttz
-    Assert(isa<ConstantInt>(CS.getArgOperand(1)),
+    Assert(isa<ConstantInt>(Call.getArgOperand(1)),
            "is_zero_undef argument of bit counting intrinsics must be a "
            "constant int",
-           CS);
+           Call);
     break;
   case Intrinsic::experimental_constrained_fadd:
   case Intrinsic::experimental_constrained_fsub:
@@ -4084,59 +4174,64 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   case Intrinsic::experimental_constrained_log2:
   case Intrinsic::experimental_constrained_rint:
   case Intrinsic::experimental_constrained_nearbyint:
-    visitConstrainedFPIntrinsic(
-        cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
+  case Intrinsic::experimental_constrained_maxnum:
+  case Intrinsic::experimental_constrained_minnum:
+  case Intrinsic::experimental_constrained_ceil:
+  case Intrinsic::experimental_constrained_floor:
+  case Intrinsic::experimental_constrained_round:
+  case Intrinsic::experimental_constrained_trunc:
+    visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(Call));
     break;
   case Intrinsic::dbg_declare: // llvm.dbg.declare
-    Assert(isa<MetadataAsValue>(CS.getArgOperand(0)),
-           "invalid llvm.dbg.declare intrinsic call 1", CS);
-    visitDbgIntrinsic("declare", cast<DbgInfoIntrinsic>(*CS.getInstruction()));
+    Assert(isa<MetadataAsValue>(Call.getArgOperand(0)),
+           "invalid llvm.dbg.declare intrinsic call 1", Call);
+    visitDbgIntrinsic("declare", cast<DbgVariableIntrinsic>(Call));
     break;
   case Intrinsic::dbg_addr: // llvm.dbg.addr
-    visitDbgIntrinsic("addr", cast<DbgInfoIntrinsic>(*CS.getInstruction()));
+    visitDbgIntrinsic("addr", cast<DbgVariableIntrinsic>(Call));
     break;
   case Intrinsic::dbg_value: // llvm.dbg.value
-    visitDbgIntrinsic("value", cast<DbgInfoIntrinsic>(*CS.getInstruction()));
+    visitDbgIntrinsic("value", cast<DbgVariableIntrinsic>(Call));
     break;
   case Intrinsic::dbg_label: // llvm.dbg.label
-    visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(*CS.getInstruction()));
+    visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(Call));
     break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
   case Intrinsic::memset: {
-    const auto *MI = cast<MemIntrinsic>(CS.getInstruction());
+    const auto *MI = cast<MemIntrinsic>(&Call);
     auto IsValidAlignment = [&](unsigned Alignment) -> bool {
       return Alignment == 0 || isPowerOf2_32(Alignment);
     };
     Assert(IsValidAlignment(MI->getDestAlignment()),
            "alignment of arg 0 of memory intrinsic must be 0 or a power of 2",
-           CS);
+           Call);
     if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) {
       Assert(IsValidAlignment(MTI->getSourceAlignment()),
              "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
-             CS);
+             Call);
     }
-    Assert(isa<ConstantInt>(CS.getArgOperand(3)),
+    Assert(isa<ConstantInt>(Call.getArgOperand(3)),
            "isvolatile argument of memory intrinsics must be a constant int",
-           CS);
+           Call);
     break;
   }
   case Intrinsic::memcpy_element_unordered_atomic:
   case Intrinsic::memmove_element_unordered_atomic:
   case Intrinsic::memset_element_unordered_atomic: {
-    const auto *AMI = cast<AtomicMemIntrinsic>(CS.getInstruction());
+    const auto *AMI = cast<AtomicMemIntrinsic>(&Call);
 
     ConstantInt *ElementSizeCI =
         dyn_cast<ConstantInt>(AMI->getRawElementSizeInBytes());
     Assert(ElementSizeCI,
            "element size of the element-wise unordered atomic memory "
            "intrinsic must be a constant int",
-           CS);
+           Call);
     const APInt &ElementSizeVal = ElementSizeCI->getValue();
     Assert(ElementSizeVal.isPowerOf2(),
            "element size of the element-wise atomic memory intrinsic "
            "must be a power of 2",
-           CS);
+           Call);
 
     if (auto *LengthCI = dyn_cast<ConstantInt>(AMI->getLength())) {
       uint64_t Length = LengthCI->getZExtValue();
@@ -4144,7 +4239,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
       Assert((Length % ElementSize) == 0,
              "constant length must be a multiple of the element size in the "
              "element-wise atomic memory intrinsic",
-             CS);
+             Call);
     }
 
     auto IsValidAlignment = [&](uint64_t Alignment) {
@@ -4152,11 +4247,11 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
     };
     uint64_t DstAlignment = AMI->getDestAlignment();
     Assert(IsValidAlignment(DstAlignment),
-           "incorrect alignment of the destination argument", CS);
+           "incorrect alignment of the destination argument", Call);
     if (const auto *AMT = dyn_cast<AtomicMemTransferInst>(AMI)) {
       uint64_t SrcAlignment = AMT->getSourceAlignment();
       Assert(IsValidAlignment(SrcAlignment),
-             "incorrect alignment of the source argument", CS);
+             "incorrect alignment of the source argument", Call);
     }
     break;
   }
@@ -4165,76 +4260,76 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   case Intrinsic::gcread:
     if (ID == Intrinsic::gcroot) {
       AllocaInst *AI =
-        dyn_cast<AllocaInst>(CS.getArgOperand(0)->stripPointerCasts());
-      Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", CS);
-      Assert(isa<Constant>(CS.getArgOperand(1)),
-             "llvm.gcroot parameter #2 must be a constant.", CS);
+          dyn_cast<AllocaInst>(Call.getArgOperand(0)->stripPointerCasts());
+      Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", Call);
+      Assert(isa<Constant>(Call.getArgOperand(1)),
+             "llvm.gcroot parameter #2 must be a constant.", Call);
       if (!AI->getAllocatedType()->isPointerTy()) {
-        Assert(!isa<ConstantPointerNull>(CS.getArgOperand(1)),
+        Assert(!isa<ConstantPointerNull>(Call.getArgOperand(1)),
                "llvm.gcroot parameter #1 must either be a pointer alloca, "
                "or argument #2 must be a non-null constant.",
-               CS);
+               Call);
       }
     }
 
-    Assert(CS.getParent()->getParent()->hasGC(),
-           "Enclosing function does not use GC.", CS);
+    Assert(Call.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", Call);
     break;
   case Intrinsic::init_trampoline:
-    Assert(isa<Function>(CS.getArgOperand(1)->stripPointerCasts()),
+    Assert(isa<Function>(Call.getArgOperand(1)->stripPointerCasts()),
            "llvm.init_trampoline parameter #2 must resolve to a function.",
-           CS);
+           Call);
     break;
   case Intrinsic::prefetch:
-    Assert(isa<ConstantInt>(CS.getArgOperand(1)) &&
-               isa<ConstantInt>(CS.getArgOperand(2)) &&
-               cast<ConstantInt>(CS.getArgOperand(1))->getZExtValue() < 2 &&
-               cast<ConstantInt>(CS.getArgOperand(2))->getZExtValue() < 4,
-           "invalid arguments to llvm.prefetch", CS);
+    Assert(isa<ConstantInt>(Call.getArgOperand(1)) &&
+               isa<ConstantInt>(Call.getArgOperand(2)) &&
+               cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
+               cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+           "invalid arguments to llvm.prefetch", Call);
     break;
   case Intrinsic::stackprotector:
-    Assert(isa<AllocaInst>(CS.getArgOperand(1)->stripPointerCasts()),
-           "llvm.stackprotector parameter #2 must resolve to an alloca.", CS);
+    Assert(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
+           "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
     break;
   case Intrinsic::lifetime_start:
   case Intrinsic::lifetime_end:
   case Intrinsic::invariant_start:
-    Assert(isa<ConstantInt>(CS.getArgOperand(0)),
+    Assert(isa<ConstantInt>(Call.getArgOperand(0)),
            "size argument of memory use markers must be a constant integer",
-           CS);
+           Call);
     break;
   case Intrinsic::invariant_end:
-    Assert(isa<ConstantInt>(CS.getArgOperand(1)),
-           "llvm.invariant.end parameter #2 must be a constant integer", CS);
+    Assert(isa<ConstantInt>(Call.getArgOperand(1)),
+           "llvm.invariant.end parameter #2 must be a constant integer", Call);
     break;
 
   case Intrinsic::localescape: {
-    BasicBlock *BB = CS.getParent();
+    BasicBlock *BB = Call.getParent();
     Assert(BB == &BB->getParent()->front(),
-           "llvm.localescape used outside of entry block", CS);
+           "llvm.localescape used outside of entry block", Call);
     Assert(!SawFrameEscape,
-           "multiple calls to llvm.localescape in one function", CS);
-    for (Value *Arg : CS.args()) {
+           "multiple calls to llvm.localescape in one function", Call);
+    for (Value *Arg : Call.args()) {
       if (isa<ConstantPointerNull>(Arg))
         continue; // Null values are allowed as placeholders.
       auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
       Assert(AI && AI->isStaticAlloca(),
-             "llvm.localescape only accepts static allocas", CS);
+             "llvm.localescape only accepts static allocas", Call);
     }
-    FrameEscapeInfo[BB->getParent()].first = CS.getNumArgOperands();
+    FrameEscapeInfo[BB->getParent()].first = Call.getNumArgOperands();
     SawFrameEscape = true;
     break;
   }
   case Intrinsic::localrecover: {
-    Value *FnArg = CS.getArgOperand(0)->stripPointerCasts();
+    Value *FnArg = Call.getArgOperand(0)->stripPointerCasts();
     Function *Fn = dyn_cast<Function>(FnArg);
     Assert(Fn && !Fn->isDeclaration(),
            "llvm.localrecover first "
            "argument must be function defined in this module",
-           CS);
-    auto *IdxArg = dyn_cast<ConstantInt>(CS.getArgOperand(2));
+           Call);
+    auto *IdxArg = dyn_cast<ConstantInt>(Call.getArgOperand(2));
     Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
-           CS);
+           Call);
     auto &Entry = FrameEscapeInfo[Fn];
     Entry.second = unsigned(
         std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
@@ -4242,45 +4337,46 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   }
 
   case Intrinsic::experimental_gc_statepoint:
-    Assert(!CS.isInlineAsm(),
-           "gc.statepoint support for inline assembly unimplemented", CS);
-    Assert(CS.getParent()->getParent()->hasGC(),
-           "Enclosing function does not use GC.", CS);
+    if (auto *CI = dyn_cast<CallInst>(&Call))
+      Assert(!CI->isInlineAsm(),
+             "gc.statepoint support for inline assembly unimplemented", CI);
+    Assert(Call.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", Call);
 
-    verifyStatepoint(CS);
+    verifyStatepoint(Call);
     break;
   case Intrinsic::experimental_gc_result: {
-    Assert(CS.getParent()->getParent()->hasGC(),
-           "Enclosing function does not use GC.", CS);
+    Assert(Call.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", Call);
     // Are we tied to a statepoint properly?
-    CallSite StatepointCS(CS.getArgOperand(0));
+    const auto *StatepointCall = dyn_cast<CallBase>(Call.getArgOperand(0));
     const Function *StatepointFn =
-      StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr;
+        StatepointCall ? StatepointCall->getCalledFunction() : nullptr;
     Assert(StatepointFn && StatepointFn->isDeclaration() &&
                StatepointFn->getIntrinsicID() ==
                    Intrinsic::experimental_gc_statepoint,
-           "gc.result operand #1 must be from a statepoint", CS,
-           CS.getArgOperand(0));
+           "gc.result operand #1 must be from a statepoint", Call,
+           Call.getArgOperand(0));
 
     // Assert that result type matches wrapped callee.
-    const Value *Target = StatepointCS.getArgument(2);
+    const Value *Target = StatepointCall->getArgOperand(2);
     auto *PT = cast<PointerType>(Target->getType());
     auto *TargetFuncType = cast<FunctionType>(PT->getElementType());
-    Assert(CS.getType() == TargetFuncType->getReturnType(),
-           "gc.result result type does not match wrapped callee", CS);
+    Assert(Call.getType() == TargetFuncType->getReturnType(),
+           "gc.result result type does not match wrapped callee", Call);
     break;
   }
   case Intrinsic::experimental_gc_relocate: {
-    Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS);
+    Assert(Call.getNumArgOperands() == 3, "wrong number of arguments", Call);
 
-    Assert(isa<PointerType>(CS.getType()->getScalarType()),
-           "gc.relocate must return a pointer or a vector of pointers", CS);
+    Assert(isa<PointerType>(Call.getType()->getScalarType()),
+           "gc.relocate must return a pointer or a vector of pointers", Call);
 
     // Check that this relocate is correctly tied to the statepoint
 
     // This is case for relocate on the unwinding path of an invoke statepoint
     if (LandingPadInst *LandingPad =
-          dyn_cast<LandingPadInst>(CS.getArgOperand(0))) {
+            dyn_cast<LandingPadInst>(Call.getArgOperand(0))) {
 
       const BasicBlock *InvokeBB =
           LandingPad->getParent()->getUniquePredecessor();
@@ -4293,167 +4389,198 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
              InvokeBB);
       Assert(isStatepoint(InvokeBB->getTerminator()),
              "gc relocate should be linked to a statepoint", InvokeBB);
-    }
-    else {
+    } else {
       // In all other cases relocate should be tied to the statepoint directly.
       // This covers relocates on a normal return path of invoke statepoint and
       // relocates of a call statepoint.
-      auto Token = CS.getArgOperand(0);
+      auto Token = Call.getArgOperand(0);
       Assert(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)),
-             "gc relocate is incorrectly tied to the statepoint", CS, Token);
+             "gc relocate is incorrectly tied to the statepoint", Call, Token);
     }
 
     // Verify rest of the relocate arguments.
-
-    ImmutableCallSite StatepointCS(
-        cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint());
+    const CallBase &StatepointCall =
+        *cast<CallBase>(cast<GCRelocateInst>(Call).getStatepoint());
 
     // Both the base and derived must be piped through the safepoint.
-    Value* Base = CS.getArgOperand(1);
+    Value *Base = Call.getArgOperand(1);
     Assert(isa<ConstantInt>(Base),
-           "gc.relocate operand #2 must be integer offset", CS);
+           "gc.relocate operand #2 must be integer offset", Call);
 
-    Value* Derived = CS.getArgOperand(2);
+    Value *Derived = Call.getArgOperand(2);
     Assert(isa<ConstantInt>(Derived),
-           "gc.relocate operand #3 must be integer offset", CS);
+           "gc.relocate operand #3 must be integer offset", Call);
 
     const int BaseIndex = cast<ConstantInt>(Base)->getZExtValue();
     const int DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
     // Check the bounds
-    Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCS.arg_size(),
-           "gc.relocate: statepoint base index out of bounds", CS);
-    Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCS.arg_size(),
-           "gc.relocate: statepoint derived index out of bounds", CS);
+    Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCall.arg_size(),
+           "gc.relocate: statepoint base index out of bounds", Call);
+    Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCall.arg_size(),
+           "gc.relocate: statepoint derived index out of bounds", Call);
 
     // Check that BaseIndex and DerivedIndex fall within the 'gc parameters'
     // section of the statepoint's argument.
-    Assert(StatepointCS.arg_size() > 0,
+    Assert(StatepointCall.arg_size() > 0,
            "gc.statepoint: insufficient arguments");
-    Assert(isa<ConstantInt>(StatepointCS.getArgument(3)),
+    Assert(isa<ConstantInt>(StatepointCall.getArgOperand(3)),
            "gc.statement: number of call arguments must be constant integer");
     const unsigned NumCallArgs =
-        cast<ConstantInt>(StatepointCS.getArgument(3))->getZExtValue();
-    Assert(StatepointCS.arg_size() > NumCallArgs + 5,
+        cast<ConstantInt>(StatepointCall.getArgOperand(3))->getZExtValue();
+    Assert(StatepointCall.arg_size() > NumCallArgs + 5,
            "gc.statepoint: mismatch in number of call arguments");
-    Assert(isa<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 5)),
+    Assert(isa<ConstantInt>(StatepointCall.getArgOperand(NumCallArgs + 5)),
            "gc.statepoint: number of transition arguments must be "
            "a constant integer");
     const int NumTransitionArgs =
-        cast<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 5))
+        cast<ConstantInt>(StatepointCall.getArgOperand(NumCallArgs + 5))
             ->getZExtValue();
     const int DeoptArgsStart = 4 + NumCallArgs + 1 + NumTransitionArgs + 1;
-    Assert(isa<ConstantInt>(StatepointCS.getArgument(DeoptArgsStart)),
+    Assert(isa<ConstantInt>(StatepointCall.getArgOperand(DeoptArgsStart)),
            "gc.statepoint: number of deoptimization arguments must be "
            "a constant integer");
     const int NumDeoptArgs =
-        cast<ConstantInt>(StatepointCS.getArgument(DeoptArgsStart))
+        cast<ConstantInt>(StatepointCall.getArgOperand(DeoptArgsStart))
             ->getZExtValue();
     const int GCParamArgsStart = DeoptArgsStart + 1 + NumDeoptArgs;
-    const int GCParamArgsEnd = StatepointCS.arg_size();
+    const int GCParamArgsEnd = StatepointCall.arg_size();
     Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd,
            "gc.relocate: statepoint base index doesn't fall within the "
            "'gc parameters' section of the statepoint call",
-           CS);
+           Call);
     Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd,
            "gc.relocate: statepoint derived index doesn't fall within the "
            "'gc parameters' section of the statepoint call",
-           CS);
+           Call);
 
     // Relocated value must be either a pointer type or vector-of-pointer type,
     // but gc_relocate does not need to return the same pointer type as the
     // relocated pointer. It can be casted to the correct type later if it's
     // desired. However, they must have the same address space and 'vectorness'
-    GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
+    GCRelocateInst &Relocate = cast<GCRelocateInst>(Call);
     Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(),
-           "gc.relocate: relocated value must be a gc pointer", CS);
+           "gc.relocate: relocated value must be a gc pointer", Call);
 
-    auto ResultType = CS.getType();
+    auto ResultType = Call.getType();
     auto DerivedType = Relocate.getDerivedPtr()->getType();
     Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(),
            "gc.relocate: vector relocates to vector and pointer to pointer",
-           CS);
+           Call);
     Assert(
         ResultType->getPointerAddressSpace() ==
             DerivedType->getPointerAddressSpace(),
         "gc.relocate: relocating a pointer shouldn't change its address space",
-        CS);
+        Call);
     break;
   }
   case Intrinsic::eh_exceptioncode:
   case Intrinsic::eh_exceptionpointer: {
-    Assert(isa<CatchPadInst>(CS.getArgOperand(0)),
-           "eh.exceptionpointer argument must be a catchpad", CS);
+    Assert(isa<CatchPadInst>(Call.getArgOperand(0)),
+           "eh.exceptionpointer argument must be a catchpad", Call);
     break;
   }
   case Intrinsic::masked_load: {
-    Assert(CS.getType()->isVectorTy(), "masked_load: must return a vector", CS);
+    Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector",
+           Call);
 
-    Value *Ptr = CS.getArgOperand(0);
-    //Value *Alignment = CS.getArgOperand(1);
-    Value *Mask = CS.getArgOperand(2);
-    Value *PassThru = CS.getArgOperand(3);
-    Assert(Mask->getType()->isVectorTy(),
-           "masked_load: mask must be vector", CS);
+    Value *Ptr = Call.getArgOperand(0);
+    // Value *Alignment = Call.getArgOperand(1);
+    Value *Mask = Call.getArgOperand(2);
+    Value *PassThru = Call.getArgOperand(3);
+    Assert(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
+           Call);
 
     // DataTy is the overloaded type
     Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
-    Assert(DataTy == CS.getType(),
-           "masked_load: return must match pointer type", CS);
+    Assert(DataTy == Call.getType(),
+           "masked_load: return must match pointer type", Call);
     Assert(PassThru->getType() == DataTy,
-           "masked_load: pass through and data type must match", CS);
+           "masked_load: pass through and data type must match", Call);
     Assert(Mask->getType()->getVectorNumElements() ==
-           DataTy->getVectorNumElements(),
-           "masked_load: vector mask must be same length as data", CS);
+               DataTy->getVectorNumElements(),
+           "masked_load: vector mask must be same length as data", Call);
     break;
   }
   case Intrinsic::masked_store: {
-    Value *Val = CS.getArgOperand(0);
-    Value *Ptr = CS.getArgOperand(1);
-    //Value *Alignment = CS.getArgOperand(2);
-    Value *Mask = CS.getArgOperand(3);
-    Assert(Mask->getType()->isVectorTy(),
-           "masked_store: mask must be vector", CS);
+    Value *Val = Call.getArgOperand(0);
+    Value *Ptr = Call.getArgOperand(1);
+    // Value *Alignment = Call.getArgOperand(2);
+    Value *Mask = Call.getArgOperand(3);
+    Assert(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
+           Call);
 
     // DataTy is the overloaded type
     Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
     Assert(DataTy == Val->getType(),
-           "masked_store: storee must match pointer type", CS);
+           "masked_store: storee must match pointer type", Call);
     Assert(Mask->getType()->getVectorNumElements() ==
-           DataTy->getVectorNumElements(),
-           "masked_store: vector mask must be same length as data", CS);
+               DataTy->getVectorNumElements(),
+           "masked_store: vector mask must be same length as data", Call);
     break;
   }
 
   case Intrinsic::experimental_guard: {
-    Assert(CS.isCall(), "experimental_guard cannot be invoked", CS);
-    Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
+    Assert(isa<CallInst>(Call), "experimental_guard cannot be invoked", Call);
+    Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
            "experimental_guard must have exactly one "
            "\"deopt\" operand bundle");
     break;
   }
 
   case Intrinsic::experimental_deoptimize: {
-    Assert(CS.isCall(), "experimental_deoptimize cannot be invoked", CS);
-    Assert(CS.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
+    Assert(isa<CallInst>(Call), "experimental_deoptimize cannot be invoked",
+           Call);
+    Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
            "experimental_deoptimize must have exactly one "
            "\"deopt\" operand bundle");
-    Assert(CS.getType() == CS.getInstruction()->getFunction()->getReturnType(),
+    Assert(Call.getType() == Call.getFunction()->getReturnType(),
            "experimental_deoptimize return type must match caller return type");
 
-    if (CS.isCall()) {
-      auto *DeoptCI = CS.getInstruction();
-      auto *RI = dyn_cast<ReturnInst>(DeoptCI->getNextNode());
+    if (isa<CallInst>(Call)) {
+      auto *RI = dyn_cast<ReturnInst>(Call.getNextNode());
       Assert(RI,
              "calls to experimental_deoptimize must be followed by a return");
 
-      if (!CS.getType()->isVoidTy() && RI)
-        Assert(RI->getReturnValue() == DeoptCI,
+      if (!Call.getType()->isVoidTy() && RI)
+        Assert(RI->getReturnValue() == &Call,
                "calls to experimental_deoptimize must be followed by a return "
                "of the value computed by experimental_deoptimize");
     }
 
     break;
   }
+  case Intrinsic::sadd_sat:
+  case Intrinsic::uadd_sat:
+  case Intrinsic::ssub_sat:
+  case Intrinsic::usub_sat: {
+    Value *Op1 = Call.getArgOperand(0);
+    Value *Op2 = Call.getArgOperand(1);
+    Assert(Op1->getType()->isIntOrIntVectorTy(),
+           "first operand of [us][add|sub]_sat must be an int type or vector "
+           "of ints");
+    Assert(Op2->getType()->isIntOrIntVectorTy(),
+           "second operand of [us][add|sub]_sat must be an int type or vector "
+           "of ints");
+    break;
+  }
+  case Intrinsic::smul_fix: {
+    Value *Op1 = Call.getArgOperand(0);
+    Value *Op2 = Call.getArgOperand(1);
+    Assert(Op1->getType()->isIntOrIntVectorTy(),
+           "first operand of smul_fix must be an int type or vector "
+           "of ints");
+    Assert(Op2->getType()->isIntOrIntVectorTy(),
+           "second operand of smul_fix must be an int type or vector "
+           "of ints");
+
+    auto *Op3 = dyn_cast<ConstantInt>(Call.getArgOperand(2));
+    Assert(Op3, "third argument of smul_fix must be a constant integer");
+    Assert(Op3->getType()->getBitWidth() <= 32,
+           "third argument of smul_fix must fit within 32 bits");
+    Assert(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
+           "the scale of smul_fix must be less than the width of the operands");
+    break;
+  }
   };
 }
 
@@ -4491,7 +4618,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
          "invalid exception behavior argument", &FPI);
 }
 
-void Verifier::visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII) {
+void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
   auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
   AssertDI(isa<ValueAsMetadata>(MD) ||
              (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
@@ -4527,13 +4654,21 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgInfoIntrinsic &DII) {
            &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
            Loc->getScope()->getSubprogram());
 
+  // This check is redundant with one in visitLocalVariable().
+  AssertDI(isType(Var->getRawType()), "invalid type ref", Var,
+           Var->getRawType());
+  if (auto *Type = dyn_cast_or_null<DIType>(Var->getRawType()))
+    if (Type->isBlockByrefStruct())
+      AssertDI(DII.getExpression() && DII.getExpression()->getNumElements(),
+               "BlockByRef variable without complex expression", Var, &DII);
+
   verifyFnArgs(DII);
 }
 
 void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) {
-  AssertDI(isa<DILabel>(DLI.getRawVariable()),
+  AssertDI(isa<DILabel>(DLI.getRawLabel()),
          "invalid llvm.dbg." + Kind + " intrinsic variable", &DLI,
-         DLI.getRawVariable());
+         DLI.getRawLabel());
 
   // Ignore broken !dbg attachments; they're checked elsewhere.
   if (MDNode *N = DLI.getDebugLoc().getAsMDNode())
@@ -4560,10 +4695,7 @@ void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) {
            Loc->getScope()->getSubprogram());
 }
 
-void Verifier::verifyFragmentExpression(const DbgInfoIntrinsic &I) {
-  if (dyn_cast<DbgLabelInst>(&I))
-    return;
-
+void Verifier::verifyFragmentExpression(const DbgVariableIntrinsic &I) {
   DILocalVariable *V = dyn_cast_or_null<DILocalVariable>(I.getRawVariable());
   DIExpression *E = dyn_cast_or_null<DIExpression>(I.getRawExpression());
 
@@ -4605,7 +4737,7 @@ void Verifier::verifyFragmentExpression(const DIVariable &V,
   AssertDI(FragSize != *VarSize, "fragment covers entire variable", Desc, &V);
 }
 
-void Verifier::verifyFnArgs(const DbgInfoIntrinsic &I) {
+void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) {
   // This function does not take the scope of noninlined function arguments into
   // account. Don't run it if current function is nodebug, because it may
   // contain inlined debug intrinsics.
@@ -4662,6 +4794,14 @@ void Verifier::verifyDeoptimizeCallingConvs() {
   }
 }
 
+void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
+  bool HasSource = F.getSource().hasValue();
+  if (!HasSourceDebugInfo.count(&U))
+    HasSourceDebugInfo[&U] = HasSource;
+  AssertDI(HasSource == HasSourceDebugInfo[&U],
+           "inconsistent use of embedded source");
+}
+
 //===----------------------------------------------------------------------===//
 //  Implement the public interfaces to this file...
 //===----------------------------------------------------------------------===//
@@ -4718,9 +4858,10 @@ struct VerifierLegacyPass : public FunctionPass {
   }
 
   bool runOnFunction(Function &F) override {
-    if (!V->verify(F) && FatalErrors)
+    if (!V->verify(F) && FatalErrors) {
+      errs() << "in function " << F.getName() << '\n'; 
       report_fatal_error("Broken function found, compilation aborted!");
-
+    }
     return false;
   }
 
diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp
index 68d210cb7d73..3a955060deaa 100644
--- a/contrib/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm/lib/LTO/LTO.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/LTO/LTOBackend.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
 #include "llvm/Linker/IRMover.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Support/Error.h"
@@ -42,6 +43,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 
 #include <set>
@@ -56,22 +58,20 @@ static cl::opt<bool>
     DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
                    cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
 
-// The values are (type identifier, summary) pairs.
-typedef DenseMap<
-    GlobalValue::GUID,
-    TinyPtrVector<const std::pair<const std::string, TypeIdSummary> *>>
-    TypeIdSummariesByGuidTy;
+/// Enable global value internalization in LTO.
+cl::opt<bool> EnableLTOInternalization(
+    "enable-lto-internalization", cl::init(true), cl::Hidden,
+    cl::desc("Enable global value internalization in LTO"));
 
-// Returns a unique hash for the Module considering the current list of
+// Computes a unique hash for the Module considering the current list of
 // export/import and other global analysis results.
 // The hash is produced in \p Key.
-static void computeCacheKey(
+void llvm::computeLTOCacheKey(
     SmallString<40> &Key, const Config &Conf, const ModuleSummaryIndex &Index,
     StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
     const FunctionImporter::ExportSetTy &ExportList,
     const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
     const GVSummaryMapTy &DefinedGlobals,
-    const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid,
     const std::set<GlobalValue::GUID> &CfiFunctionDefs,
     const std::set<GlobalValue::GUID> &CfiFunctionDecls) {
   // Compute the unique hash for this entry.
@@ -134,6 +134,7 @@ static void computeCacheKey(
   AddUnsigned(Conf.CGFileType);
   AddUnsigned(Conf.OptLevel);
   AddUnsigned(Conf.UseNewPM);
+  AddUnsigned(Conf.Freestanding);
   AddString(Conf.OptPipeline);
   AddString(Conf.AAPipeline);
   AddString(Conf.OverrideTriple);
@@ -189,6 +190,8 @@ static void computeCacheKey(
       AddUnsigned(VI.isDSOLocal());
       AddUsedCfiGlobal(VI.getGUID());
     }
+    if (auto *GVS = dyn_cast<GlobalVarSummary>(GS))
+      AddUnsigned(GVS->isReadOnly());
     if (auto *FS = dyn_cast<FunctionSummary>(GS)) {
       for (auto &TT : FS->type_tests())
         UsedTypeIds.insert(TT);
@@ -220,8 +223,14 @@ static void computeCacheKey(
   // Imported functions may introduce new uses of type identifier resolutions,
   // so we need to collect their used resolutions as well.
   for (auto &ImpM : ImportList)
-    for (auto &ImpF : ImpM.second)
-      AddUsedThings(Index.findSummaryInModule(ImpF, ImpM.first()));
+    for (auto &ImpF : ImpM.second) {
+      GlobalValueSummary *S = Index.findSummaryInModule(ImpF, ImpM.first());
+      AddUsedThings(S);
+      // If this is an alias, we also care about any types/etc. that the aliasee
+      // may reference.
+      if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
+        AddUsedThings(AS->getBaseObject());
+    }
 
   auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
     AddString(TId);
@@ -255,10 +264,9 @@ static void computeCacheKey(
 
   // Include the hash for all type identifiers used by this module.
   for (GlobalValue::GUID TId : UsedTypeIds) {
-    auto SummariesI = TypeIdSummariesByGuid.find(TId);
-    if (SummariesI != TypeIdSummariesByGuid.end())
-      for (auto *Summary : SummariesI->second)
-        AddTypeIdSummary(Summary->first, Summary->second);
+    auto TidIter = Index.typeIds().equal_range(TId);
+    for (auto It = TidIter.first; It != TidIter.second; ++It)
+      AddTypeIdSummary(It->second.first, It->second.second);
   }
 
   AddUnsigned(UsedCfiDefs.size());
@@ -271,14 +279,21 @@ static void computeCacheKey(
 
   if (!Conf.SampleProfile.empty()) {
     auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile);
-    if (FileOrErr)
+    if (FileOrErr) {
       Hasher.update(FileOrErr.get()->getBuffer());
+
+      if (!Conf.ProfileRemapping.empty()) {
+        FileOrErr = MemoryBuffer::getFile(Conf.ProfileRemapping);
+        if (FileOrErr)
+          Hasher.update(FileOrErr.get()->getBuffer());
+      }
+    }
   }
 
   Key = toHex(Hasher.result());
 }
 
-static void thinLTOResolveWeakForLinkerGUID(
+static void thinLTOResolvePrevailingGUID(
     GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID,
     DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
@@ -287,7 +302,10 @@ static void thinLTOResolveWeakForLinkerGUID(
         recordNewLinkage) {
   for (auto &S : GVSummaryList) {
     GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
-    if (!GlobalValue::isWeakForLinker(OriginalLinkage))
+    // Ignore local and appending linkage values since the linker
+    // doesn't resolve them.
+    if (GlobalValue::isLocalLinkage(OriginalLinkage) ||
+        GlobalValue::isAppendingLinkage(S->linkage()))
       continue;
     // We need to emit only one of these. The prevailing module will keep it,
     // but turned into a weak, while the others will drop it when possible.
@@ -311,13 +329,13 @@ static void thinLTOResolveWeakForLinkerGUID(
   }
 }
 
-// Resolve Weak and LinkOnce values in the \p Index.
+/// Resolve linkage for prevailing symbols in the \p Index.
 //
 // We'd like to drop these functions if they are no longer referenced in the
 // current module. However there is a chance that another module is still
 // referencing them because of the import. We make sure we always emit at least
 // one copy.
-void llvm::thinLTOResolveWeakForLinkerInIndex(
+void llvm::thinLTOResolvePrevailingInIndex(
     ModuleSummaryIndex &Index,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
@@ -333,9 +351,9 @@ void llvm::thinLTOResolveWeakForLinkerInIndex(
         GlobalInvolvedWithAlias.insert(&AS->getAliasee());
 
   for (auto &I : Index)
-    thinLTOResolveWeakForLinkerGUID(I.second.SummaryList, I.first,
-                                    GlobalInvolvedWithAlias, isPrevailing,
-                                    recordNewLinkage);
+    thinLTOResolvePrevailingGUID(I.second.SummaryList, I.first,
+                                 GlobalInvolvedWithAlias, isPrevailing,
+                                 recordNewLinkage);
 }
 
 static void thinLTOInternalizeAndPromoteGUID(
@@ -345,7 +363,14 @@ static void thinLTOInternalizeAndPromoteGUID(
     if (isExported(S->modulePath(), GUID)) {
       if (GlobalValue::isLocalLinkage(S->linkage()))
         S->setLinkage(GlobalValue::ExternalLinkage);
-    } else if (!GlobalValue::isLocalLinkage(S->linkage()))
+    } else if (EnableLTOInternalization &&
+               // Ignore local and appending linkage values since the linker
+               // doesn't resolve them.
+               !GlobalValue::isLocalLinkage(S->linkage()) &&
+               S->linkage() != GlobalValue::AppendingLinkage &&
+               // We can't internalize available_externally globals because this
+               // can break function pointer equality.
+               S->linkage() != GlobalValue::AvailableExternallyLinkage)
       S->setLinkage(GlobalValue::InternalLinkage);
   }
 }
@@ -521,6 +546,15 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
   if (!LTOInfo)
     return LTOInfo.takeError();
 
+  if (EnableSplitLTOUnit.hasValue()) {
+    // If only some modules were split, flag this in the index so that
+    // we can skip or error on optimizations that need consistently split
+    // modules (whole program devirt and lower type tests).
+    if (EnableSplitLTOUnit.getValue() != LTOInfo->EnableSplitLTOUnit)
+      ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
+  } else
+    EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
+
   BitcodeModule BM = Input.Mods[ModI];
   auto ModSyms = Input.module_symbols(ModI);
   addModuleToGlobalRes(ModSyms, {ResI, ResE},
@@ -668,8 +702,12 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
       }
 
       // Set the 'local' flag based on the linker resolution for this symbol.
-      if (Res.FinalDefinitionInLinkageUnit)
+      if (Res.FinalDefinitionInLinkageUnit) {
         GV->setDSOLocal(true);
+        if (GV->hasDLLImportStorageClass())
+          GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
+                                 DefaultStorageClass);
+      }
     }
     // Common resolution: collect the maximum size/alignment over all commons.
     // We also record if we see an instance of a common as prevailing, so that
@@ -798,7 +836,8 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
       return PrevailingType::Unknown;
     return It->second;
   };
-  computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing);
+  computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols,
+                                  isPrevailing, Conf.OptLevel > 0);
 
   // Setup output file to emit statistics.
   std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
@@ -877,7 +916,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
         continue;
       GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
                                               : GlobalValue::UnnamedAddr::None);
-      if (R.second.Partition == 0)
+      if (EnableLTOInternalization && R.second.Partition == 0)
         GV->setLinkage(GlobalValue::InternalLinkage);
     }
 
@@ -917,7 +956,6 @@ class InProcessThinBackend : public ThinBackendProc {
   ThreadPool BackendThreadPool;
   AddStreamFn AddStream;
   NativeObjectCache Cache;
-  TypeIdSummariesByGuidTy TypeIdSummariesByGuid;
   std::set<GlobalValue::GUID> CfiFunctionDefs;
   std::set<GlobalValue::GUID> CfiFunctionDecls;
 
@@ -933,12 +971,6 @@ public:
       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
         BackendThreadPool(ThinLTOParallelismLevel),
         AddStream(std::move(AddStream)), Cache(std::move(Cache)) {
-    // Create a mapping from type identifier GUIDs to type identifier summaries.
-    // This allows backends to use the type identifier GUIDs stored in the
-    // function summaries to determine which type identifier summaries affect
-    // each function without needing to compute GUIDs in each backend.
-    for (auto &TId : CombinedIndex.typeIds())
-      TypeIdSummariesByGuid[GlobalValue::getGUID(TId.first)].push_back(&TId);
     for (auto &Name : CombinedIndex.cfiFunctionDefs())
       CfiFunctionDefs.insert(
           GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
@@ -954,8 +986,7 @@ public:
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
       const GVSummaryMapTy &DefinedGlobals,
-      MapVector<StringRef, BitcodeModule> &ModuleMap,
-      const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
+      MapVector<StringRef, BitcodeModule> &ModuleMap) {
     auto RunThinBackend = [&](AddStreamFn AddStream) {
       LTOLLVMContext BackendContext(Conf);
       Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
@@ -977,9 +1008,9 @@ public:
 
     SmallString<40> Key;
     // The module may be cached, this helps handling it.
-    computeCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, ExportList,
-                    ResolvedODR, DefinedGlobals, TypeIdSummariesByGuid,
-                    CfiFunctionDefs, CfiFunctionDecls);
+    computeLTOCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList,
+                       ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs,
+                       CfiFunctionDecls);
     if (AddStreamFn CacheAddStream = Cache(Task, Key))
       return RunThinBackend(CacheAddStream);
 
@@ -1003,11 +1034,10 @@ public:
             const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
                 &ResolvedODR,
             const GVSummaryMapTy &DefinedGlobals,
-            MapVector<StringRef, BitcodeModule> &ModuleMap,
-            const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
+            MapVector<StringRef, BitcodeModule> &ModuleMap) {
           Error E = runThinLTOBackendThread(
               AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
-              ResolvedODR, DefinedGlobals, ModuleMap, TypeIdSummariesByGuid);
+              ResolvedODR, DefinedGlobals, ModuleMap);
           if (E) {
             std::unique_lock<std::mutex> L(ErrMu);
             if (Err)
@@ -1017,8 +1047,7 @@ public:
           }
         },
         BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList),
-        std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap),
-        std::ref(TypeIdSummariesByGuid));
+        std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));
     return Error::success();
   }
 
@@ -1156,6 +1185,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
     if (!ModuleToDefinedGVSummaries.count(Mod.first))
       ModuleToDefinedGVSummaries.try_emplace(Mod.first);
 
+  // Synthesize entry counts for functions in the CombinedIndex.
+  computeSyntheticCounts(ThinLTO.CombinedIndex);
+
   StringMap<FunctionImporter::ImportMapTy> ImportLists(
       ThinLTO.ModuleMap.size());
   StringMap<FunctionImporter::ExportSetTy> ExportLists(
@@ -1210,8 +1242,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
                               GlobalValue::LinkageTypes NewLinkage) {
     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
   };
-  thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
-                                     recordNewLinkage);
+  thinLTOResolvePrevailingInIndex(ThinLTO.CombinedIndex, isPrevailing,
+                                  recordNewLinkage);
 
   std::unique_ptr<ThinBackendProc> BackendProc =
       ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
diff --git a/contrib/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm/lib/LTO/LTOBackend.cpp
index eadbb410bd5a..926c419e34a8 100644
--- a/contrib/llvm/lib/LTO/LTOBackend.cpp
+++ b/contrib/llvm/lib/LTO/LTOBackend.cpp
@@ -138,9 +138,15 @@ createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
     RelocModel =
         M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
 
+  Optional<CodeModel::Model> CodeModel;
+  if (Conf.CodeModel)
+    CodeModel = *Conf.CodeModel;
+  else
+    CodeModel = M.getCodeModel();
+
   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
       TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
-      Conf.CodeModel, Conf.CGOptLevel));
+      CodeModel, Conf.CGOptLevel));
 }
 
 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
@@ -149,13 +155,14 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
                            const ModuleSummaryIndex *ImportSummary) {
   Optional<PGOOptions> PGOOpt;
   if (!Conf.SampleProfile.empty())
-    PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true);
+    PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping,
+                        false, true);
 
   PassBuilder PB(TM, PGOOpt);
   AAManager AA;
 
   // Parse a custom AA pipeline if asked to.
-  if (!PB.parseAAPipeline(AA, "default"))
+  if (auto Err = PB.parseAAPipeline(AA, "default"))
     report_fatal_error("Error parsing default AA pipeline");
 
   LoopAnalysisManager LAM(Conf.DebugPassManager);
@@ -214,9 +221,9 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
 
   // Parse a custom AA pipeline if asked to.
   if (!AAPipelineDesc.empty())
-    if (!PB.parseAAPipeline(AA, AAPipelineDesc))
-      report_fatal_error("unable to parse AA pipeline description: " +
-                         AAPipelineDesc);
+    if (auto Err = PB.parseAAPipeline(AA, AAPipelineDesc))
+      report_fatal_error("unable to parse AA pipeline description '" +
+                         AAPipelineDesc + "': " + toString(std::move(Err)));
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
@@ -239,9 +246,9 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
   MPM.addPass(VerifierPass());
 
   // Now, add all the passes we've been requested to.
-  if (!PB.parsePassPipeline(MPM, PipelineDesc))
-    report_fatal_error("unable to parse pass pipeline description: " +
-                       PipelineDesc);
+  if (auto Err = PB.parsePassPipeline(MPM, PipelineDesc))
+    report_fatal_error("unable to parse pass pipeline description '" +
+                       PipelineDesc + "': " + toString(std::move(Err)));
 
   if (!DisableVerify)
     MPM.addPass(VerifierPass());
@@ -483,7 +490,7 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
 
   dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
 
-  thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
+  thinLTOResolvePrevailingInModule(Mod, DefinedGlobals);
 
   if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index ffe9af74cdca..3b63bbc7e256 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/LTO/LTO.h"
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index 20fc0943539f..0d40d49dbe39 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -73,7 +73,7 @@ bool LTOModule::isBitcodeFile(StringRef Path) {
 bool LTOModule::isThinLTO() {
   Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef);
   if (!Result) {
-    logAllUnhandledErrors(Result.takeError(), errs(), "");
+    logAllUnhandledErrors(Result.takeError(), errs());
     return false;
   }
   return Result->IsThinLTO;
diff --git a/contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp
new file mode 100644
index 000000000000..bcdd984daa58
--- /dev/null
+++ b/contrib/llvm/lib/LTO/SummaryBasedOptimizations.cpp
@@ -0,0 +1,86 @@
+//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements optimizations that are based on the module summaries.
+// These optimizations are performed during the thinlink phase of the
+// compilation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/SummaryBasedOptimizations.h"
+#include "llvm/Analysis/SyntheticCountsUtils.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+
+using namespace llvm;
+
+cl::opt<bool> ThinLTOSynthesizeEntryCounts(
+    "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
+    cl::desc("Synthesize entry counts based on the summary"));
+
+extern cl::opt<int> InitialSyntheticCount;
+
+static void initializeCounts(ModuleSummaryIndex &Index) {
+  auto Root = Index.calculateCallGraphRoot();
+  // Root is a fake node. All its successors are the actual roots of the
+  // callgraph.
+  // FIXME: This initializes the entry counts of only the root nodes. This makes
+  // sense when compiling a binary with ThinLTO, but for libraries any of the
+  // non-root nodes could be called from outside.
+  for (auto &C : Root.calls()) {
+    auto &V = C.first;
+    for (auto &GVS : V.getSummaryList()) {
+      auto S = GVS.get()->getBaseObject();
+      auto *F = cast<FunctionSummary>(S);
+      F->setEntryCount(InitialSyntheticCount);
+    }
+  }
+}
+
+void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
+  if (!ThinLTOSynthesizeEntryCounts)
+    return;
+
+  using Scaled64 = ScaledNumber<uint64_t>;
+  initializeCounts(Index);
+  auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
+    return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
+  };
+  auto GetEntryCount = [](ValueInfo V) {
+    if (V.getSummaryList().size()) {
+      auto S = V.getSummaryList().front().get()->getBaseObject();
+      auto *F = cast<FunctionSummary>(S);
+      return F->entryCount();
+    } else {
+      return UINT64_C(0);
+    }
+  };
+  auto AddToEntryCount = [](ValueInfo V, Scaled64 New) {
+    if (!V.getSummaryList().size())
+      return;
+    for (auto &GVS : V.getSummaryList()) {
+      auto S = GVS.get()->getBaseObject();
+      auto *F = cast<FunctionSummary>(S);
+      F->setEntryCount(
+          SaturatingAdd(F->entryCount(), New.template toInt<uint64_t>()));
+    }
+  };
+
+  auto GetProfileCount = [&](ValueInfo V, FunctionSummary::EdgeTy &Edge) {
+    auto RelFreq = GetCallSiteRelFreq(Edge);
+    Scaled64 EC(GetEntryCount(V), 0);
+    return RelFreq * EC;
+  };
+  // After initializing the counts in initializeCounts above, the counts have to
+  // be propagated across the combined callgraph.
+  // SyntheticCountsUtils::propagate takes care of this propagation on any
+  // callgraph that specialized GraphTraits.
+  SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(&Index, GetProfileCount,
+                                                        AddToEntryCount);
+  Index.setHasSyntheticEntryCounts();
+}
diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 642e538ecf92..d9ec68fe3eb5 100644
--- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -29,9 +29,11 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
+#include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/LTO/LTO.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Support/CachePruning.h"
@@ -297,8 +299,7 @@ public:
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      const GVSummaryMapTy &DefinedFunctions,
-      const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel,
+      const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel,
       bool Freestanding, const TargetMachineBuilder &TMBuilder) {
     if (CachePath.empty())
       return;
@@ -307,87 +308,26 @@ public:
       // The module does not have an entry, it can't have a hash at all
       return;
 
-    // Compute the unique hash for this entry
-    // This is based on the current compiler version, the module itself, the
-    // export list, the hash for every single module in the import list, the
-    // list of ResolvedODR for the module, and the list of preserved symbols.
-
-    // Include the hash for the current module
-    auto ModHash = Index.getModuleHash(ModuleID);
-
-    if (all_of(ModHash, [](uint32_t V) { return V == 0; }))
+    if (all_of(Index.getModuleHash(ModuleID),
+               [](uint32_t V) { return V == 0; }))
       // No hash entry, no caching!
       return;
 
-    SHA1 Hasher;
-
-    // Include the parts of the LTO configuration that affect code generation.
-    auto AddString = [&](StringRef Str) {
-      Hasher.update(Str);
-      Hasher.update(ArrayRef<uint8_t>{0});
-    };
-    auto AddUnsigned = [&](unsigned I) {
-      uint8_t Data[4];
-      Data[0] = I;
-      Data[1] = I >> 8;
-      Data[2] = I >> 16;
-      Data[3] = I >> 24;
-      Hasher.update(ArrayRef<uint8_t>{Data, 4});
-    };
-
-    // Start with the compiler revision
-    Hasher.update(LLVM_VERSION_STRING);
-#ifdef LLVM_REVISION
-    Hasher.update(LLVM_REVISION);
-#endif
-
-    // Hash the optimization level and the target machine settings.
-    AddString(TMBuilder.MCpu);
-    // FIXME: Hash more of Options. For now all clients initialize Options from
-    // command-line flags (which is unsupported in production), but may set
-    // RelaxELFRelocations. The clang driver can also pass FunctionSections,
-    // DataSections and DebuggerTuning via command line flags.
-    AddUnsigned(TMBuilder.Options.RelaxELFRelocations);
-    AddUnsigned(TMBuilder.Options.FunctionSections);
-    AddUnsigned(TMBuilder.Options.DataSections);
-    AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning);
-    AddString(TMBuilder.MAttr);
-    if (TMBuilder.RelocModel)
-      AddUnsigned(*TMBuilder.RelocModel);
-    AddUnsigned(TMBuilder.CGOptLevel);
-    AddUnsigned(OptLevel);
-    AddUnsigned(Freestanding);
-
-    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
-    for (auto F : ExportList)
-      // The export list can impact the internalization, be conservative here
-      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
-
-    // Include the hash for every module we import functions from
-    for (auto &Entry : ImportList) {
-      auto ModHash = Index.getModuleHash(Entry.first());
-      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
-    }
-
-    // Include the hash for the resolved ODR.
-    for (auto &Entry : ResolvedODR) {
-      Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
-                                      sizeof(GlobalValue::GUID)));
-      Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
-                                      sizeof(GlobalValue::LinkageTypes)));
-    }
-
-    // Include the hash for the preserved symbols.
-    for (auto &Entry : PreservedSymbols) {
-      if (DefinedFunctions.count(Entry))
-        Hasher.update(
-            ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
-    }
+    llvm::lto::Config Conf;
+    Conf.OptLevel = OptLevel;
+    Conf.Options = TMBuilder.Options;
+    Conf.CPU = TMBuilder.MCpu;
+    Conf.MAttrs.push_back(TMBuilder.MAttr);
+    Conf.RelocModel = TMBuilder.RelocModel;
+    Conf.CGOptLevel = TMBuilder.CGOptLevel;
+    Conf.Freestanding = Freestanding;
+    SmallString<40> Key;
+    computeLTOCacheKey(Key, Conf, Index, ModuleID, ImportList, ExportList,
+                       ResolvedODR, DefinedGVSummaries);
 
     // This choice of file name allows the cache to be pruned (see pruneCache()
     // in include/llvm/Support/CachePruning.h).
-    sys::path::append(EntryPath, CachePath,
-                      "llvmcache-" + toHex(Hasher.result()));
+    sys::path::append(EntryPath, CachePath, "llvmcache-" + Key);
   }
 
   // Access the path to this entry in the cache.
@@ -456,8 +396,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
   if (!SingleModule) {
     promoteModule(TheModule, Index);
 
-    // Apply summary-based LinkOnce/Weak resolution decisions.
-    thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
+    // Apply summary-based prevailing-symbol resolution decisions.
+    thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals);
 
     // Save temps: after promotion.
     saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
@@ -499,12 +439,12 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
   return codegenModule(TheModule, TM);
 }
 
-/// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
+/// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map
 /// for caching, and in the \p Index for application during the ThinLTO
 /// backends. This is needed for correctness for exported symbols (ensure
 /// at least one copy kept) and a compile-time optimization (to drop duplicate
 /// copies when possible).
-static void resolveWeakForLinkerInIndex(
+static void resolvePrevailingInIndex(
     ModuleSummaryIndex &Index,
     StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
         &ResolvedODR) {
@@ -526,7 +466,7 @@ static void resolveWeakForLinkerInIndex(
     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
   };
 
-  thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
+  thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage);
 }
 
 // Initialize the TargetMachine builder for a given Triple
@@ -645,7 +585,8 @@ static void computeDeadSymbolsInIndex(
   auto isPrevailing = [&](GlobalValue::GUID G) {
     return PrevailingType::Unknown;
   };
-  computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+  computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing,
+                                  /* ImportEnabled = */ true);
 }
 
 /**
@@ -674,11 +615,11 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
                            ExportLists);
 
-  // Resolve LinkOnce/Weak symbols.
+  // Resolve prevailing symbols
   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
-  resolveWeakForLinkerInIndex(Index, ResolvedODR);
+  resolvePrevailingInIndex(Index, ResolvedODR);
 
-  thinLTOResolveWeakForLinkerModule(
+  thinLTOResolvePrevailingInModule(
       TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
 
   // Promote the exported values in the index, so that they are promoted
@@ -721,37 +662,52 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
  * Compute the list of summaries needed for importing into module.
  */
 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
-    StringRef ModulePath, ModuleSummaryIndex &Index,
+    Module &TheModule, ModuleSummaryIndex &Index,
     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
   auto ModuleCount = Index.modulePaths().size();
+  auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
 
+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+
   // Generate import/export list
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
                            ExportLists);
 
-  llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
-                                         ImportLists[ModulePath],
-                                         ModuleToSummariesForIndex);
+  llvm::gatherImportedSummariesForModule(
+      ModuleIdentifier, ModuleToDefinedGVSummaries,
+      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
 }
 
 /**
  * Emit the list of files needed for importing into module.
  */
-void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
-                                       StringRef OutputName,
+void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
                                        ModuleSummaryIndex &Index) {
   auto ModuleCount = Index.modulePaths().size();
+  auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
 
+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+
   // Generate import/export list
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
@@ -759,13 +715,13 @@ void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
                            ExportLists);
 
   std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
-  llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
-                                         ImportLists[ModulePath],
-                                         ModuleToSummariesForIndex);
+  llvm::gatherImportedSummariesForModule(
+      ModuleIdentifier, ModuleToDefinedGVSummaries,
+      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
 
   std::error_code EC;
-  if ((EC =
-           EmitImportsFiles(ModulePath, OutputName, ModuleToSummariesForIndex)))
+  if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
+                             ModuleToSummariesForIndex)))
     report_fatal_error(Twine("Failed to open ") + OutputName +
                        " to save imports lists\n");
 }
@@ -818,14 +774,6 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
   optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
 }
 
-/**
- * Perform ThinLTO CodeGen.
- */
-std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
-  initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
-  return codegenModule(TheModule, *TMBuilder.create());
-}
-
 /// Write out the generated object file, either from CacheEntryPath or from
 /// OutputBuffer, preferring hard-link when possible.
 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
@@ -893,7 +841,7 @@ void ThinLTOCodeGenerator::run() {
                                  /*IsImporting*/ false);
 
         // CodeGen
-        auto OutputBuffer = codegen(*TheModule);
+        auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create());
         if (SavedObjectsDirectoryPath.empty())
           ProducedBinaries[count] = std::move(OutputBuffer);
         else
@@ -936,6 +884,9 @@ void ThinLTOCodeGenerator::run() {
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
 
+  // Synthesize entry counts for functions in the combined index.
+  computeSyntheticCounts(*Index);
+
   // Collect the import/export lists for all modules from the call-graph in the
   // combined index.
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
@@ -949,20 +900,24 @@ void ThinLTOCodeGenerator::run() {
   // on the index, and nuke this map.
   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
 
-  // Resolve LinkOnce/Weak symbols, this has to be computed early because it
+  // Resolve prevailing symbols, this has to be computed early because it
   // impacts the caching.
-  resolveWeakForLinkerInIndex(*Index, ResolvedODR);
+  resolvePrevailingInIndex(*Index, ResolvedODR);
 
   // Use global summary-based analysis to identify symbols that can be
   // internalized (because they aren't exported or preserved as per callback).
   // Changes are made in the index, consumed in the ThinLTO backends.
   internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index);
 
-  // Make sure that every module has an entry in the ExportLists and
-  // ResolvedODR maps to enable threaded access to these maps below.
-  for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
-    ExportLists[DefinedGVSummaries.first()];
-    ResolvedODR[DefinedGVSummaries.first()];
+  // Make sure that every module has an entry in the ExportLists, ImportList,
+  // GVSummary and ResolvedODR maps to enable threaded access to these maps
+  // below.
+  for (auto &Module : Modules) {
+    auto ModuleIdentifier = Module.getBufferIdentifier();
+    ExportLists[ModuleIdentifier];
+    ImportLists[ModuleIdentifier];
+    ResolvedODR[ModuleIdentifier];
+    ModuleToDefinedGVSummaries[ModuleIdentifier];
   }
 
   // Compute the ordering we will process the inputs: the rough heuristic here
@@ -971,12 +926,11 @@ void ThinLTOCodeGenerator::run() {
   std::vector<int> ModulesOrdering;
   ModulesOrdering.resize(Modules.size());
   std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
-  llvm::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
-             [&](int LeftIndex, int RightIndex) {
-               auto LSize = Modules[LeftIndex].getBuffer().size();
-               auto RSize = Modules[RightIndex].getBuffer().size();
-               return LSize > RSize;
-             });
+  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+    auto LSize = Modules[LeftIndex].getBuffer().size();
+    auto RSize = Modules[RightIndex].getBuffer().size();
+    return LSize > RSize;
+  });
 
   // Parallel optimizer + codegen
   {
@@ -987,14 +941,14 @@ void ThinLTOCodeGenerator::run() {
         auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
         auto &ExportList = ExportLists[ModuleIdentifier];
 
-        auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
+        auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier];
 
         // The module may be cached, this helps handling it.
         ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
                                     ImportLists[ModuleIdentifier], ExportList,
                                     ResolvedODR[ModuleIdentifier],
-                                    DefinedFunctions, GUIDPreservedSymbols,
-                                    OptLevel, Freestanding, TMBuilder);
+                                    DefinedGVSummaries, OptLevel, Freestanding,
+                                    TMBuilder);
         auto CacheEntryPath = CacheEntry.getEntryPath();
 
         {
diff --git a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
index c982a5b0e5aa..00482dee6e10 100644
--- a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
+++ b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
@@ -95,12 +95,18 @@ private:
     if (GV.hasPrivateLinkage())
       return;
 
-    // Conservatively append user-supplied runtime library functions to
-    // llvm.compiler.used.  These could be internalized and deleted by
-    // optimizations like -globalopt, causing problems when later optimizations
-    // add new library calls (e.g., llvm.memset => memset and printf => puts).
+    // Conservatively append user-supplied runtime library functions (supplied
+    // either directly, or via a function alias) to llvm.compiler.used.  These
+    // could be internalized and deleted by optimizations like -globalopt,
+    // causing problems when later optimizations add new library calls (e.g.,
+    // llvm.memset => memset and printf => puts).
     // Leave it to the linker to remove any dead code (e.g. with -dead_strip).
-    if (isa<Function>(GV) && Libcalls.count(GV.getName())) {
+    GlobalValue *FuncAliasee = nullptr;
+    if (isa<GlobalAlias>(GV)) {
+      auto *A = cast<GlobalAlias>(&GV);
+      FuncAliasee = dyn_cast<Function>(A->getAliasee());
+    }
+    if ((isa<Function>(GV) || FuncAliasee) && Libcalls.count(GV.getName())) {
       LLVMUsed.push_back(&GV);
       return;
     }
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
index 738dec8e1f29..afbc57abfcc0 100644
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -978,11 +978,14 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
   // containing a GV from the source module, in which case SGV will be
   // the same as DGV and NewGV, and TypeMap.get() will assert since it
   // assumes it is being invoked on a type in the source module.
-  if (DGV && NewGV != SGV)
-    C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
+  if (DGV && NewGV != SGV) {
+    C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+      NewGV, TypeMap.get(SGV->getType()));
+  }
 
   if (DGV && NewGV != DGV) {
-    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType()));
+    DGV->replaceAllUsesWith(
+      ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewGV, DGV->getType()));
     DGV->eraseFromParent();
   }
 
@@ -1059,10 +1062,15 @@ void IRLinker::prepareCompileUnitsForImport() {
     ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr);
     ValueMap.MD()[CU->getRawMacros()].reset(nullptr);
     ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr);
-    // We import global variables only temporarily in order for instcombine
-    // and globalopt to perform constant folding and static constructor
-    // evaluation. After that elim-avail-extern will covert imported globals
-    // back to declarations, so we don't need debug info for them.
+    // The original definition (or at least its debug info - if the variable is
+    // internalized an optimized away) will remain in the source module, so
+    // there's no need to import them.
+    // If LLVM ever does more advanced optimizations on global variables
+    // (removing/localizing write operations, for instance) that can track
+    // through debug info, this decision may need to be revisited - but do so
+    // with care when it comes to debug info size. Emitting small CUs containing
+    // only a few imported entities into every destination module may be very
+    // size inefficient.
     ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr);
 
     // Imported entities only need to be mapped in if they have local
@@ -1227,8 +1235,14 @@ Error IRLinker::linkModuleFlagsMetadata() {
     case Module::Warning: {
       // Emit a warning if the values differ.
       if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
-        emitWarning("linking module flags '" + ID->getString() +
-                    "': IDs have conflicting values");
+        std::string str;
+        raw_string_ostream(str)
+            << "linking module flags '" << ID->getString()
+            << "': IDs have conflicting values ('" << *SrcOp->getOperand(2)
+            << "' from " << SrcM->getModuleIdentifier() << " with '"
+            << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier()
+            << ')';
+        emitWarning(str);
       }
       continue;
     }
diff --git a/contrib/llvm/lib/MC/ConstantPools.cpp b/contrib/llvm/lib/MC/ConstantPools.cpp
index ca5440237e49..18277a225640 100644
--- a/contrib/llvm/lib/MC/ConstantPools.cpp
+++ b/contrib/llvm/lib/MC/ConstantPools.cpp
@@ -97,16 +97,14 @@ void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
 
 void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
   MCSection *Section = Streamer.getCurrentSectionOnly();
-  if (ConstantPool *CP = getConstantPool(Section)) {
+  if (ConstantPool *CP = getConstantPool(Section))
     emitConstantPool(Streamer, Section, *CP);
-  }
 }
 
 void AssemblerConstantPools::clearCacheForCurrentSection(MCStreamer &Streamer) {
   MCSection *Section = Streamer.getCurrentSectionOnly();
-  if (ConstantPool *CP = getConstantPool(Section)) {
+  if (ConstantPool *CP = getConstantPool(Section))
     CP->clearCache();
-  }
 }
 
 const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index db531f75c87c..89f3b30cddd6 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -29,6 +29,7 @@
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
@@ -668,6 +669,20 @@ void ELFWriter::computeSymbolTable(
     } else {
       const MCSectionELF &Section =
           static_cast<const MCSectionELF &>(Symbol.getSection());
+
+      // We may end up with a situation when section symbol is technically
+      // defined, but should not be. That happens because we explicitly
+      // pre-create few .debug_* sections to have accessors.
+      // And if these sections were not really defined in the code, but were
+      // referenced, we simply error out.
+      if (!Section.isRegistered()) {
+        assert(static_cast<const MCSymbolELF &>(Symbol).getType() ==
+               ELF::STT_SECTION);
+        Ctx.reportError(SMLoc(),
+                        "Undefined section reference: " + Symbol.getName());
+        continue;
+      }
+
       if (Mode == NonDwoOnly && isDwoSection(Section))
         continue;
       MSD.SectionIndex = SectionIndexMap.lookup(&Section);
@@ -1107,6 +1122,8 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
       SectionIndexMap[RelSection] = addToSectionTable(RelSection);
       Relocations.push_back(RelSection);
     }
+
+    OWriter.TargetObjectWriter->addTargetSectionFlags(Ctx, Section);
   }
 
   MCSectionELF *CGProfileSection = nullptr;
@@ -1273,6 +1290,8 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
   for (const MCSymbol *&Sym : AddrsigSyms) {
     if (const MCSymbol *R = Renames.lookup(cast<MCSymbolELF>(Sym)))
       Sym = R;
+    if (Sym->isInSection() && Sym->getName().startswith(".L"))
+      Sym = Sym->getSection().getBeginSymbol();
     Sym->setUsedInReloc();
   }
 }
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
index d8fb875b67c6..15886eb619b9 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -25,7 +25,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   COMMDirectiveAlignmentIsInBytes = false;
   LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
+  HasSingleParameterDotFile = true;
   WeakRefDirective = "\t.weak\t";
   HasLinkOnceDirective = true;
 
diff --git a/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp b/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp
index fc55059ff75d..d448664baa14 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoWasm.cpp
@@ -15,7 +15,7 @@
 #include "llvm/MC/MCAsmInfoWasm.h"
 using namespace llvm;
 
-void MCAsmInfoWasm::anchor() { }
+void MCAsmInfoWasm::anchor() {}
 
 MCAsmInfoWasm::MCAsmInfoWasm() {
   HasIdentDirective = true;
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index ae02f50bf8bd..e017103070bf 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -147,9 +147,9 @@ public:
   void EmitLinkerOptions(ArrayRef<std::string> Options) override;
   void EmitDataRegion(MCDataRegionType Kind) override;
   void EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor,
-                      unsigned Update) override;
+                      unsigned Update, VersionTuple SDKVersion) override;
   void EmitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
-                        unsigned Update) override;
+                        unsigned Update, VersionTuple SDKVersion) override;
   void EmitThumbFunc(MCSymbol *Func) override;
 
   void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
@@ -266,6 +266,7 @@ public:
   void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override;
 
   void EmitIdent(StringRef IdentString) override;
+  void EmitCFIBKeyFrame() override;
   void EmitCFISections(bool EH, bool Debug) override;
   void EmitCFIDefCfa(int64_t Register, int64_t Offset) override;
   void EmitCFIDefCfaOffset(int64_t Offset) override;
@@ -285,10 +286,12 @@ public:
   void EmitCFIUndefined(int64_t Register) override;
   void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
   void EmitCFIWindowSave() override;
+  void EmitCFINegateRAState() override;
   void EmitCFIReturnColumn(int64_t Register) override;
 
   void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override;
   void EmitWinCFIEndProc(SMLoc Loc) override;
+  void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override;
   void EmitWinCFIStartChained(SMLoc Loc) override;
   void EmitWinCFIEndChained(SMLoc Loc) override;
   void EmitWinCFIPushReg(unsigned Register, SMLoc Loc) override;
@@ -513,31 +516,51 @@ static const char *getVersionMinDirective(MCVersionMinType Type) {
   llvm_unreachable("Invalid MC version min type");
 }
 
+static void EmitSDKVersionSuffix(raw_ostream &OS,
+                                 const VersionTuple &SDKVersion) {
+  if (SDKVersion.empty())
+    return;
+  OS << '\t' << "sdk_version " << SDKVersion.getMajor();
+  if (auto Minor = SDKVersion.getMinor()) {
+    OS << ", " << *Minor;
+    if (auto Subminor = SDKVersion.getSubminor()) {
+      OS << ", " << *Subminor;
+    }
+  }
+}
+
 void MCAsmStreamer::EmitVersionMin(MCVersionMinType Type, unsigned Major,
-                                   unsigned Minor, unsigned Update) {
+                                   unsigned Minor, unsigned Update,
+                                   VersionTuple SDKVersion) {
   OS << '\t' << getVersionMinDirective(Type) << ' ' << Major << ", " << Minor;
   if (Update)
     OS << ", " << Update;
+  EmitSDKVersionSuffix(OS, SDKVersion);
   EmitEOL();
 }
 
 static const char *getPlatformName(MachO::PlatformType Type) {
   switch (Type) {
-  case MachO::PLATFORM_MACOS:    return "macos";
-  case MachO::PLATFORM_IOS:      return "ios";
-  case MachO::PLATFORM_TVOS:     return "tvos";
-  case MachO::PLATFORM_WATCHOS:  return "watchos";
-  case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+  case MachO::PLATFORM_MACOS:            return "macos";
+  case MachO::PLATFORM_IOS:              return "ios";
+  case MachO::PLATFORM_TVOS:             return "tvos";
+  case MachO::PLATFORM_WATCHOS:          return "watchos";
+  case MachO::PLATFORM_BRIDGEOS:         return "bridgeos";
+  case MachO::PLATFORM_IOSSIMULATOR:     return "iossimulator";
+  case MachO::PLATFORM_TVOSSIMULATOR:    return "tvossimulator";
+  case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
   }
   llvm_unreachable("Invalid Mach-O platform type");
 }
 
 void MCAsmStreamer::EmitBuildVersion(unsigned Platform, unsigned Major,
-                                     unsigned Minor, unsigned Update) {
+                                     unsigned Minor, unsigned Update,
+                                     VersionTuple SDKVersion) {
   const char *PlatformName = getPlatformName((MachO::PlatformType)Platform);
   OS << "\t.build_version " << PlatformName << ", " << Major << ", " << Minor;
   if (Update)
     OS << ", " << Update;
+  EmitSDKVersionSuffix(OS, SDKVersion);
   EmitEOL();
 }
 
@@ -858,10 +881,14 @@ void MCAsmStreamer::EmitBytes(StringRef Data) {
   // supported, emit as vector of 8bits data.
   if (Data.size() == 1 ||
       !(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
-    const char *Directive = MAI->getData8bitsDirective();
-    for (const unsigned char C : Data.bytes()) {
-      OS << Directive << (unsigned)C;
-      EmitEOL();
+    if (MCTargetStreamer *TS = getTargetStreamer()) {
+      TS->emitRawBytes(Data);
+    } else {
+      const char *Directive = MAI->getData8bitsDirective();
+      for (const unsigned char C : Data.bytes()) {
+        OS << Directive << (unsigned)C;
+        EmitEOL();
+      }
     }
     return;
   }
@@ -1298,20 +1325,17 @@ void MCAsmStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                        unsigned Line, unsigned Column,
                                        bool PrologueEnd, bool IsStmt,
                                        StringRef FileName, SMLoc Loc) {
+  // Validate the directive.
+  if (!checkCVLocSection(FunctionId, FileNo, Loc))
+    return;
+
   OS << "\t.cv_loc\t" << FunctionId << " " << FileNo << " " << Line << " "
      << Column;
   if (PrologueEnd)
     OS << " prologue_end";
 
-  unsigned OldIsStmt = getContext().getCVContext().getCurrentCVLoc().isStmt();
-  if (IsStmt != OldIsStmt) {
-    OS << " is_stmt ";
-
-    if (IsStmt)
-      OS << "1";
-    else
-      OS << "0";
-  }
+  if (IsStmt)
+    OS << " is_stmt 1";
 
   if (IsVerboseAsm) {
     OS.PadToColumn(MAI->getCommentColumn());
@@ -1319,8 +1343,6 @@ void MCAsmStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
        << Column;
   }
   EmitEOL();
-  this->MCStreamer::EmitCVLocDirective(FunctionId, FileNo, Line, Column,
-                                       PrologueEnd, IsStmt, FileName, Loc);
 }
 
 void MCAsmStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -1569,12 +1591,24 @@ void MCAsmStreamer::EmitCFIWindowSave() {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCFINegateRAState() {
+  MCStreamer::EmitCFINegateRAState();
+  OS << "\t.cfi_negate_ra_state";
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitCFIReturnColumn(int64_t Register) {
   MCStreamer::EmitCFIReturnColumn(Register);
   OS << "\t.cfi_return_column " << Register;
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCFIBKeyFrame() {
+  MCStreamer::EmitCFIBKeyFrame();
+  OS << "\t.cfi_b_key_frame";
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
   MCStreamer::EmitWinCFIStartProc(Symbol, Loc);
 
@@ -1590,6 +1624,10 @@ void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) {
   EmitEOL();
 }
 
+// TODO: Implement
+void MCAsmStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+}
+
 void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) {
   MCStreamer::EmitWinCFIStartChained(Loc);
 
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index 1e23b6d816e8..cde6a93a1647 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -111,6 +111,7 @@ void MCAssembler::reset() {
   ELFHeaderEFlags = 0;
   LOHContainer.reset();
   VersionInfo.Major = 0;
+  VersionInfo.SDKVersion = VersionTuple();
 
   // reset objects owned by us
   if (getBackendPtr())
diff --git a/contrib/llvm/lib/MC/MCCodeView.cpp b/contrib/llvm/lib/MC/MCCodeView.cpp
index 155fd7eeb576..978ac789c31e 100644
--- a/contrib/llvm/lib/MC/MCCodeView.cpp
+++ b/contrib/llvm/lib/MC/MCCodeView.cpp
@@ -128,6 +128,14 @@ bool CodeViewContext::recordInlinedCallSiteId(unsigned FuncId, unsigned IAFunc,
   return true;
 }
 
+void CodeViewContext::recordCVLoc(MCContext &Ctx, const MCSymbol *Label,
+                                  unsigned FunctionId, unsigned FileNo,
+                                  unsigned Line, unsigned Column,
+                                  bool PrologueEnd, bool IsStmt) {
+  addLineEntry(MCCVLoc{
+      Label, FunctionId, FileNo, Line, Column, PrologueEnd, IsStmt});
+}
+
 MCDataFragment *CodeViewContext::getStringTableFragment() {
   if (!StrTabFragment) {
     StrTabFragment = new MCDataFragment();
@@ -255,7 +263,7 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,
   OS.EmitValueImpl(SRE, 4);
 }
 
-void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
+void CodeViewContext::addLineEntry(const MCCVLoc &LineEntry) {
   size_t Offset = MCCVLines.size();
   auto I = MCCVLineStartStop.insert(
       {LineEntry.getFunctionId(), {Offset, Offset + 1}});
@@ -264,9 +272,9 @@ void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
   MCCVLines.push_back(LineEntry);
 }
 
-std::vector<MCCVLineEntry>
+std::vector<MCCVLoc>
 CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
-  std::vector<MCCVLineEntry> FilteredLines;
+  std::vector<MCCVLoc> FilteredLines;
   auto I = MCCVLineStartStop.find(FuncId);
   if (I != MCCVLineStartStop.end()) {
     MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
@@ -289,9 +297,9 @@ CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
               FilteredLines.back().getFileNum() != IA.File ||
               FilteredLines.back().getLine() != IA.Line ||
               FilteredLines.back().getColumn() != IA.Col) {
-            FilteredLines.push_back(MCCVLineEntry(
+            FilteredLines.push_back(MCCVLoc(
                 MCCVLines[Idx].getLabel(),
-                MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));
+                FuncId, IA.File, IA.Line, IA.Col, false, false));
           }
         }
       }
@@ -308,7 +316,7 @@ std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
   return I->second;
 }
 
-ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
+ArrayRef<MCCVLoc> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
   if (R <= L)
     return None;
   if (L >= MCCVLines.size())
@@ -331,8 +339,8 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
   OS.EmitCOFFSectionIndex(FuncBegin);
 
   // Actual line info.
-  std::vector<MCCVLineEntry> Locs = getFunctionLineEntries(FuncId);
-  bool HaveColumns = any_of(Locs, [](const MCCVLineEntry &LineEntry) {
+  std::vector<MCCVLoc> Locs = getFunctionLineEntries(FuncId);
+  bool HaveColumns = any_of(Locs, [](const MCCVLoc &LineEntry) {
     return LineEntry.getColumn() != 0;
   });
   OS.EmitIntValue(HaveColumns ? int(LF_HaveColumns) : 0, 2);
@@ -342,7 +350,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
     // Emit a file segment for the run of locations that share a file id.
     unsigned CurFileNum = I->getFileNum();
     auto FileSegEnd =
-        std::find_if(I, E, [CurFileNum](const MCCVLineEntry &Loc) {
+        std::find_if(I, E, [CurFileNum](const MCCVLoc &Loc) {
           return Loc.getFileNum() != CurFileNum;
         });
     unsigned EntryCount = FileSegEnd - I;
@@ -424,13 +432,13 @@ void CodeViewContext::emitInlineLineTableForFunction(MCObjectStreamer &OS,
                                   OS.getCurrentSectionOnly());
 }
 
-void CodeViewContext::emitDefRange(
+MCFragment *CodeViewContext::emitDefRange(
     MCObjectStreamer &OS,
     ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
     StringRef FixedSizePortion) {
   // Create and insert a fragment into the current section that will be encoded
   // later.
-  new MCCVDefRangeFragment(Ranges, FixedSizePortion,
+  return new MCCVDefRangeFragment(Ranges, FixedSizePortion,
                            OS.getCurrentSectionOnly());
 }
 
@@ -468,14 +476,14 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
 
   if (LocBegin >= LocEnd)
     return;
-  ArrayRef<MCCVLineEntry> Locs = getLinesForExtent(LocBegin, LocEnd);
+  ArrayRef<MCCVLoc> Locs = getLinesForExtent(LocBegin, LocEnd);
   if (Locs.empty())
     return;
 
   // Check that the locations are all in the same section.
 #ifndef NDEBUG
   const MCSection *FirstSec = &Locs.front().getLabel()->getSection();
-  for (const MCCVLineEntry &Loc : Locs) {
+  for (const MCCVLoc &Loc : Locs) {
     if (&Loc.getLabel()->getSection() != FirstSec) {
       errs() << ".cv_loc " << Loc.getFunctionId() << ' ' << Loc.getFileNum()
              << ' ' << Loc.getLine() << ' ' << Loc.getColumn()
@@ -488,7 +496,8 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
   // Make an artificial start location using the function start and the inlinee
   // lines start location information. All deltas start relative to this
   // location.
-  MCCVLineEntry StartLoc(Frag.getFnStartSym(), MCCVLoc(Locs.front()));
+  MCCVLoc StartLoc = Locs.front();
+  StartLoc.setLabel(Frag.getFnStartSym());
   StartLoc.setFileNum(Frag.StartFileId);
   StartLoc.setLine(Frag.StartLineNum);
   bool HaveOpenRange = false;
@@ -500,7 +509,7 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
 
   SmallVectorImpl<char> &Buffer = Frag.getContents();
   Buffer.clear(); // Clear old contents if we went through relaxation.
-  for (const MCCVLineEntry &Loc : Locs) {
+  for (const MCCVLoc &Loc : Locs) {
     // Exit early if our line table would produce an oversized InlineSiteSym
     // record. Account for the ChangeCodeLength annotation emitted after the
     // loop ends.
@@ -585,10 +594,10 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
   unsigned EndSymLength =
       computeLabelDiff(Layout, LastLabel, Frag.getFnEndSym());
   unsigned LocAfterLength = ~0U;
-  ArrayRef<MCCVLineEntry> LocAfter = getLinesForExtent(LocEnd, LocEnd + 1);
+  ArrayRef<MCCVLoc> LocAfter = getLinesForExtent(LocEnd, LocEnd + 1);
   if (!LocAfter.empty()) {
     // Only try to compute this difference if we're in the same section.
-    const MCCVLineEntry &Loc = LocAfter[0];
+    const MCCVLoc &Loc = LocAfter[0];
     if (&Loc.getLabel()->getSection() == &LastLabel->getSection())
       LocAfterLength = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
   }
@@ -686,31 +695,3 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
     }
   }
 }
-
-//
-// This is called when an instruction is assembled into the specified section
-// and if there is information from the last .cv_loc directive that has yet to have
-// a line entry made for it is made.
-//
-void MCCVLineEntry::Make(MCObjectStreamer *MCOS) {
-  CodeViewContext &CVC = MCOS->getContext().getCVContext();
-  if (!CVC.getCVLocSeen())
-    return;
-
-  // Create a symbol at in the current section for use in the line entry.
-  MCSymbol *LineSym = MCOS->getContext().createTempSymbol();
-  // Set the value of the symbol to use for the MCCVLineEntry.
-  MCOS->EmitLabel(LineSym);
-
-  // Get the current .loc info saved in the context.
-  const MCCVLoc &CVLoc = CVC.getCurrentCVLoc();
-
-  // Create a (local) line entry with the symbol and the current .loc info.
-  MCCVLineEntry LineEntry(LineSym, CVLoc);
-
-  // clear CVLocSeen saying the current .loc info is now used.
-  CVC.clearCVLocSeen();
-
-  // Add the line entry to this section's entries.
-  CVC.addLineEntry(LineEntry);
-}
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index 606da2526890..fab517075c5a 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -592,7 +592,7 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
   return !LineTable.getMCDwarfFiles()[FileNumber].Name.empty();
 }
 
-/// Remove empty sections from SectionStartEndSyms, to avoid generating
+/// Remove empty sections from SectionsForRanges, to avoid generating
 /// useless debug info for them.
 void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
   SectionsForRanges.remove_if(
@@ -605,11 +605,6 @@ CodeViewContext &MCContext::getCVContext() {
   return *CVContext.get();
 }
 
-void MCContext::clearCVLocSeen() {
-  if (CVContext)
-    CVContext->clearCVLocSeen();
-}
-
 //===----------------------------------------------------------------------===//
 // Error Reporting
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 0461c2564ccf..38b02694d81d 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -463,10 +463,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
                MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym, 4), 4);
 
   // Next 2 bytes is the Version.
-  // FIXME: On Darwin we still default to V2.
   unsigned LineTableVersion = context.getDwarfVersion();
-  if (context.getObjectFileInfo()->getTargetTriple().isOSDarwin())
-    LineTableVersion = 2;
   MCOS->EmitIntValue(LineTableVersion, 2);
 
   // Keep track of the bytes between the very start and where the header length
@@ -1335,6 +1332,10 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
     Streamer.EmitIntValue(dwarf::DW_CFA_GNU_window_save, 1);
     return;
 
+  case MCCFIInstruction::OpNegateRAState:
+    Streamer.EmitIntValue(dwarf::DW_CFA_AARCH64_negate_ra_state, 1);
+    return;
+
   case MCCFIInstruction::OpUndefined: {
     unsigned Reg = Instr.getRegister();
     Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
@@ -1421,7 +1422,12 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
     unsigned Reg = Instr.getRegister();
     if (!IsEH)
       Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg);
-    Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+    if (Reg < 64) {
+      Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+    } else {
+      Streamer.EmitIntValue(dwarf::DW_CFA_restore_extended, 1);
+      Streamer.EmitULEB128IntValue(Reg);
+    }
     return;
   }
   case MCCFIInstruction::OpGnuArgsSize:
@@ -1559,9 +1565,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) {
   uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion());
   Streamer.EmitIntValue(CIEVersion, 1);
 
-  // Augmentation String
-  SmallString<8> Augmentation;
   if (IsEH) {
+    SmallString<8> Augmentation;
     Augmentation += "z";
     if (Frame.Personality)
       Augmentation += "P";
@@ -1570,6 +1575,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) {
     Augmentation += "R";
     if (Frame.IsSignalFrame)
       Augmentation += "S";
+    if (Frame.IsBKeyFrame)
+      Augmentation += "B";
     Streamer.EmitBytes(Augmentation);
   }
   Streamer.EmitIntValue(0, 1);
@@ -1724,25 +1731,28 @@ namespace {
 
 struct CIEKey {
   static const CIEKey getEmptyKey() {
-    return CIEKey(nullptr, 0, -1, false, false, static_cast<unsigned>(INT_MAX));
+    return CIEKey(nullptr, 0, -1, false, false, static_cast<unsigned>(INT_MAX),
+                  false);
   }
 
   static const CIEKey getTombstoneKey() {
-    return CIEKey(nullptr, -1, 0, false, false, static_cast<unsigned>(INT_MAX));
+    return CIEKey(nullptr, -1, 0, false, false, static_cast<unsigned>(INT_MAX),
+                  false);
   }
 
   CIEKey(const MCSymbol *Personality, unsigned PersonalityEncoding,
          unsigned LSDAEncoding, bool IsSignalFrame, bool IsSimple,
-         unsigned RAReg)
+         unsigned RAReg, bool IsBKeyFrame)
       : Personality(Personality), PersonalityEncoding(PersonalityEncoding),
         LsdaEncoding(LSDAEncoding), IsSignalFrame(IsSignalFrame),
-        IsSimple(IsSimple), RAReg(RAReg) {}
+        IsSimple(IsSimple), RAReg(RAReg), IsBKeyFrame(IsBKeyFrame) {}
 
   explicit CIEKey(const MCDwarfFrameInfo &Frame)
       : Personality(Frame.Personality),
         PersonalityEncoding(Frame.PersonalityEncoding),
         LsdaEncoding(Frame.LsdaEncoding), IsSignalFrame(Frame.IsSignalFrame),
-        IsSimple(Frame.IsSimple), RAReg(Frame.RAReg) {}
+        IsSimple(Frame.IsSimple), RAReg(Frame.RAReg),
+        IsBKeyFrame(Frame.IsBKeyFrame) {}
 
   const MCSymbol *Personality;
   unsigned PersonalityEncoding;
@@ -1750,6 +1760,7 @@ struct CIEKey {
   bool IsSignalFrame;
   bool IsSimple;
   unsigned RAReg;
+  bool IsBKeyFrame;
 };
 
 } // end anonymous namespace
@@ -1761,9 +1772,9 @@ template <> struct DenseMapInfo<CIEKey> {
   static CIEKey getTombstoneKey() { return CIEKey::getTombstoneKey(); }
 
   static unsigned getHashValue(const CIEKey &Key) {
-    return static_cast<unsigned>(
-        hash_combine(Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
-                     Key.IsSignalFrame, Key.IsSimple, Key.RAReg));
+    return static_cast<unsigned>(hash_combine(
+        Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
+        Key.IsSignalFrame, Key.IsSimple, Key.RAReg, Key.IsBKeyFrame));
   }
 
   static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) {
@@ -1771,8 +1782,8 @@ template <> struct DenseMapInfo<CIEKey> {
            LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
            LHS.LsdaEncoding == RHS.LsdaEncoding &&
            LHS.IsSignalFrame == RHS.IsSignalFrame &&
-           LHS.IsSimple == RHS.IsSimple &&
-           LHS.RAReg == RHS.RAReg;
+           LHS.IsSimple == RHS.IsSimple && LHS.RAReg == RHS.RAReg &&
+           LHS.IsBKeyFrame == RHS.IsBKeyFrame;
   }
 };
 
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
index 483ee94c0db1..ff53dd7299c1 100644
--- a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -26,3 +26,6 @@ void
 MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
                                     std::vector<ELFRelocationEntry> &Relocs) {
 }
+
+void MCELFObjectTargetWriter::addTargetSectionFlags(MCContext &Ctx,
+                                                    MCSectionELF &Sec) {}
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index ef6f0041e0c8..3c022199145f 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -304,7 +304,9 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Hexagon_IE: return "IE";
   case VK_Hexagon_IE_GOT: return "IEGOT";
   case VK_WebAssembly_FUNCTION: return "FUNCTION";
+  case VK_WebAssembly_GLOBAL: return "GLOBAL";
   case VK_WebAssembly_TYPEINDEX: return "TYPEINDEX";
+  case VK_WebAssembly_EVENT: return "EVENT";
   case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
   case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
   case VK_AMDGPU_REL32_LO: return "rel32@lo";
@@ -418,7 +420,9 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("hi8", VK_AVR_HI8)
     .Case("hlo8", VK_AVR_HLO8)
     .Case("function", VK_WebAssembly_FUNCTION)
+    .Case("global", VK_WebAssembly_GLOBAL)
     .Case("typeindex", VK_WebAssembly_TYPEINDEX)
+    .Case("event", VK_WebAssembly_EVENT)
     .Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
     .Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
     .Case("rel32@lo", VK_AMDGPU_REL32_LO)
@@ -766,7 +770,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
           case MCBinaryExpr::NE:
             Res = MCValue::get((R->isEqualTo(R)) ? 0 : -1);
             return true;
-          default: {}
+          default: break;
           }
         }
       return false;
diff --git a/contrib/llvm/lib/MC/MCFragment.cpp b/contrib/llvm/lib/MC/MCFragment.cpp
index 0ebcf21a422e..d22b117972bf 100644
--- a/contrib/llvm/lib/MC/MCFragment.cpp
+++ b/contrib/llvm/lib/MC/MCFragment.cpp
@@ -237,8 +237,8 @@ MCFragment::~MCFragment() = default;
 
 MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
                        MCSection *Parent)
-    : Kind(Kind), HasInstructions(HasInstructions), Parent(Parent),
-      Atom(nullptr), Offset(~UINT64_C(0)) {
+    : Kind(Kind), HasInstructions(HasInstructions), LayoutOrder(0),
+      Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) {
   if (Parent && !isDummy())
     Parent->getFragmentList().push_back(this);
 }
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
index f9b71caaf91c..64f111fc7114 100644
--- a/contrib/llvm/lib/MC/MCInst.cpp
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -72,11 +72,17 @@ void MCInst::print(raw_ostream &OS) const {
 
 void MCInst::dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer,
                          StringRef Separator) const {
+  StringRef InstName = Printer ? Printer->getOpcodeName(getOpcode()) : "";
+  dump_pretty(OS, InstName, Separator);
+}
+
+void MCInst::dump_pretty(raw_ostream &OS, StringRef Name,
+                         StringRef Separator) const {
   OS << "<MCInst #" << getOpcode();
 
-  // Show the instruction opcode name if we have access to a printer.
-  if (Printer)
-    OS << ' ' << Printer->getOpcodeName(getOpcode());
+  // Show the instruction opcode name if we have it.
+  if (!Name.empty())
+    OS << ' ' << Name;
 
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     OS << Separator;
diff --git a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
index 4d7c89116893..8223f3a5c66f 100644
--- a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -24,11 +24,6 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
   return false;
 }
 
-bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
-                                           const MCInst &Inst) const {
-  return false;
-}
-
 bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
                                      uint64_t Size, uint64_t &Target) const {
   if (Inst.getNumOperands() == 0 ||
diff --git a/contrib/llvm/lib/MC/MCInstrDesc.cpp b/contrib/llvm/lib/MC/MCInstrDesc.cpp
index ee55f3eff3ac..53cba864a85d 100644
--- a/contrib/llvm/lib/MC/MCInstrDesc.cpp
+++ b/contrib/llvm/lib/MC/MCInstrDesc.cpp
@@ -39,15 +39,6 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI,
     return false;
   if (hasDefOfPhysReg(MI, PC, RI))
     return true;
-  // A variadic instruction may define PC in the variable operand list.
-  // There's currently no indication of which entries in a variable
-  // list are defs and which are uses. While that's the case, this function
-  // needs to assume they're defs in order to be conservatively correct.
-  for (int i = NumOperands, e = MI.getNumOperands(); i != e; ++i) {
-    if (MI.getOperand(i).isReg() &&
-        RI.isSubRegisterEq(PC, MI.getOperand(i).getReg()))
-      return true;
-  }
   return false;
 }
 
@@ -66,5 +57,10 @@ bool MCInstrDesc::hasDefOfPhysReg(const MCInst &MI, unsigned Reg,
     if (MI.getOperand(i).isReg() &&
         RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
       return true;
+  if (variadicOpsAreDefs())
+    for (int i = NumOperands - 1, e = MI.getNumOperands(); i != e; ++i)
+      if (MI.getOperand(i).isReg() &&
+          RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
+        return true;
   return hasImplicitDefOfPhysReg(Reg, &RI);
 }
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index 43e69605787c..b30317e74672 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -89,10 +89,10 @@ public:
   void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
   void EmitLinkerOptions(ArrayRef<std::string> Options) override;
   void EmitDataRegion(MCDataRegionType Kind) override;
-  void EmitVersionMin(MCVersionMinType Kind, unsigned Major,
-                      unsigned Minor, unsigned Update) override;
-  void EmitBuildVersion(unsigned Platform, unsigned Major,
-                        unsigned Minor, unsigned Update) override;
+  void EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor,
+                      unsigned Update, VersionTuple SDKVersion) override;
+  void EmitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor,
+                        unsigned Update, VersionTuple SDKVersion) override;
   void EmitThumbFunc(MCSymbol *Func) override;
   bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
   void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
@@ -270,14 +270,16 @@ void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) {
 }
 
 void MCMachOStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major,
-                                     unsigned Minor, unsigned Update) {
-  getAssembler().setVersionMin(Kind, Major, Minor, Update);
+                                     unsigned Minor, unsigned Update,
+                                     VersionTuple SDKVersion) {
+  getAssembler().setVersionMin(Kind, Major, Minor, Update, SDKVersion);
 }
 
 void MCMachOStreamer::EmitBuildVersion(unsigned Platform, unsigned Major,
-                                       unsigned Minor, unsigned Update) {
+                                       unsigned Minor, unsigned Update,
+                                       VersionTuple SDKVersion) {
   getAssembler().setBuildVersion((MachO::PlatformType)Platform, Major, Minor,
-                                 Update);
+                                 Update, SDKVersion);
 }
 
 void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
@@ -507,7 +509,7 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
       new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE),
                           DWARFMustBeAtTheEnd, LabelSections);
   const Triple &Target = Context.getObjectFileInfo()->getTargetTriple();
-  S->EmitVersionForTarget(Target);
+  S->EmitVersionForTarget(Target, Context.getObjectFileInfo()->getSDKVersion());
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
   return S;
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index a96dec184441..4e97e7550bcb 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -22,6 +23,9 @@ namespace {
     /// @name MCStreamer Interface
     /// @{
 
+    bool hasRawTextSupport() const override { return true; }
+    void EmitRawTextImpl(StringRef String) override {}
+
     bool EmitSymbolAttribute(MCSymbol *Symbol,
                              MCSymbolAttr Attribute) override {
       return true;
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index b88d2d801822..9e35355d06e0 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -63,11 +63,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   if (T.isWatchABI())
     OmitDwarfIfHaveCompactUnwind = true;
 
-  PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
-    | dwarf::DW_EH_PE_sdata4;
-  LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
-  TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-    dwarf::DW_EH_PE_sdata4;
+  FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
 
   // .comm doesn't support alignment before Leopard.
   if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
@@ -258,9 +254,16 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   DwarfStrOffSection =
       Ctx->getMachOSection("__DWARF", "__debug_str_offs", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata(), "section_str_off");
+  DwarfAddrSection =
+      Ctx->getMachOSection("__DWARF", "__debug_addr", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_info");
   DwarfLocSection =
       Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata(), "section_debug_loc");
+  DwarfLoclistsSection =
+      Ctx->getMachOSection("__DWARF", "__debug_loclists", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_debug_loc");
+
   DwarfARangesSection =
       Ctx->getMachOSection("__DWARF", "__debug_aranges", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata());
@@ -295,11 +298,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
   switch (T.getArch()) {
   case Triple::mips:
   case Triple::mipsel:
-    FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
-    break;
   case Triple::mips64:
   case Triple::mips64el:
-    FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
+    FDECFIEncoding = Ctx->getAsmInfo()->getCodePointerSize() == 4
+                         ? dwarf::DW_EH_PE_sdata4
+                         : dwarf::DW_EH_PE_sdata8;
     break;
   case Triple::ppc64:
   case Triple::ppc64le:
@@ -311,158 +314,12 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
   case Triple::bpfeb:
     FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
     break;
-  default:
-    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-    break;
-  }
-
-  switch (T.getArch()) {
-  case Triple::arm:
-  case Triple::armeb:
-  case Triple::thumb:
-  case Triple::thumbeb:
-    if (Ctx->getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
-      break;
-    // Fallthrough if not using EHABI
-    LLVM_FALLTHROUGH;
-  case Triple::ppc:
-  case Triple::x86:
-    PersonalityEncoding = PositionIndependent
-                              ? dwarf::DW_EH_PE_indirect |
-                                    dwarf::DW_EH_PE_pcrel |
-                                    dwarf::DW_EH_PE_sdata4
-                              : dwarf::DW_EH_PE_absptr;
-    LSDAEncoding = PositionIndependent
-                       ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
-                       : dwarf::DW_EH_PE_absptr;
-    TTypeEncoding = PositionIndependent
-                        ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-                              dwarf::DW_EH_PE_sdata4
-                        : dwarf::DW_EH_PE_absptr;
-    break;
-  case Triple::x86_64:
-    if (PositionIndependent) {
-      PersonalityEncoding =
-          dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-          (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
-      LSDAEncoding = dwarf::DW_EH_PE_pcrel |
-                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
-      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-                      (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
-    } else {
-      PersonalityEncoding =
-          Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
-      LSDAEncoding = Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
-      TTypeEncoding = Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
-    }
-    break;
   case Triple::hexagon:
-    PersonalityEncoding = dwarf::DW_EH_PE_absptr;
-    LSDAEncoding = dwarf::DW_EH_PE_absptr;
-    FDECFIEncoding = dwarf::DW_EH_PE_absptr;
-    TTypeEncoding = dwarf::DW_EH_PE_absptr;
-    if (PositionIndependent) {
-      PersonalityEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
-      LSDAEncoding |= dwarf::DW_EH_PE_pcrel;
-      FDECFIEncoding |= dwarf::DW_EH_PE_pcrel;
-      TTypeEncoding |= dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel;
-    }
-    break;
-  case Triple::aarch64:
-  case Triple::aarch64_be:
-    // The small model guarantees static code/data size < 4GB, but not where it
-    // will be in memory. Most of these could end up >2GB away so even a signed
-    // pc-relative 32-bit address is insufficient, theoretically.
-    if (PositionIndependent) {
-      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata8;
-      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
-      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata8;
-    } else {
-      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
-      LSDAEncoding = dwarf::DW_EH_PE_absptr;
-      TTypeEncoding = dwarf::DW_EH_PE_absptr;
-    }
-    break;
-  case Triple::lanai:
-    LSDAEncoding = dwarf::DW_EH_PE_absptr;
-    PersonalityEncoding = dwarf::DW_EH_PE_absptr;
-    TTypeEncoding = dwarf::DW_EH_PE_absptr;
-    break;
-  case Triple::mips:
-  case Triple::mipsel:
-  case Triple::mips64:
-  case Triple::mips64el:
-    // MIPS uses indirect pointer to refer personality functions and types, so
-    // that the eh_frame section can be read-only. DW.ref.personality will be
-    // generated for relocation.
-    PersonalityEncoding = dwarf::DW_EH_PE_indirect;
-    // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't
-    //        identify N64 from just a triple.
-    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-                    dwarf::DW_EH_PE_sdata4;
-    // We don't support PC-relative LSDA references in GAS so we use the default
-    // DW_EH_PE_absptr for those.
-
-    // FreeBSD must be explicit about the data size and using pcrel since it's
-    // assembler/linker won't do the automatic conversion that the Linux tools
-    // do.
-    if (T.isOSFreeBSD()) {
-      PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-    }
-    break;
-  case Triple::ppc64:
-  case Triple::ppc64le:
-    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-      dwarf::DW_EH_PE_udata8;
-    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
-    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-      dwarf::DW_EH_PE_udata8;
-    break;
-  case Triple::sparcel:
-  case Triple::sparc:
-    if (PositionIndependent) {
-      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-    } else {
-      LSDAEncoding = dwarf::DW_EH_PE_absptr;
-      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
-      TTypeEncoding = dwarf::DW_EH_PE_absptr;
-    }
-    break;
-  case Triple::sparcv9:
-    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-    if (PositionIndependent) {
-      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-    } else {
-      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
-      TTypeEncoding = dwarf::DW_EH_PE_absptr;
-    }
-    break;
-  case Triple::systemz:
-    // All currently-defined code models guarantee that 4-byte PC-relative
-    // values will be in range.
-    if (PositionIndependent) {
-      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-    } else {
-      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
-      LSDAEncoding = dwarf::DW_EH_PE_absptr;
-      TTypeEncoding = dwarf::DW_EH_PE_absptr;
-    }
+    FDECFIEncoding =
+        PositionIndependent ? dwarf::DW_EH_PE_pcrel : dwarf::DW_EH_PE_absptr;
     break;
   default:
+    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
     break;
   }
 
@@ -582,25 +439,26 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
       Ctx->getELFSection(".debug_str_offsets", DebugSecType, 0);
   DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0);
   DwarfRnglistsSection = Ctx->getELFSection(".debug_rnglists", DebugSecType, 0);
+  DwarfLoclistsSection = Ctx->getELFSection(".debug_loclists", DebugSecType, 0);
 
   // Fission Sections
   DwarfInfoDWOSection =
-      Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0);
+      Ctx->getELFSection(".debug_info.dwo", DebugSecType, ELF::SHF_EXCLUDE);
   DwarfTypesDWOSection =
-      Ctx->getELFSection(".debug_types.dwo", DebugSecType, 0);
+      Ctx->getELFSection(".debug_types.dwo", DebugSecType, ELF::SHF_EXCLUDE);
   DwarfAbbrevDWOSection =
-      Ctx->getELFSection(".debug_abbrev.dwo", DebugSecType, 0);
-  DwarfStrDWOSection =
-      Ctx->getELFSection(".debug_str.dwo", DebugSecType,
-                         ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+      Ctx->getELFSection(".debug_abbrev.dwo", DebugSecType, ELF::SHF_EXCLUDE);
+  DwarfStrDWOSection = Ctx->getELFSection(
+      ".debug_str.dwo", DebugSecType,
+      ELF::SHF_MERGE | ELF::SHF_STRINGS | ELF::SHF_EXCLUDE, 1, "");
   DwarfLineDWOSection =
-      Ctx->getELFSection(".debug_line.dwo", DebugSecType, 0);
+      Ctx->getELFSection(".debug_line.dwo", DebugSecType, ELF::SHF_EXCLUDE);
   DwarfLocDWOSection =
-      Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0);
-  DwarfStrOffDWOSection =
-      Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0);
+      Ctx->getELFSection(".debug_loc.dwo", DebugSecType, ELF::SHF_EXCLUDE);
+  DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo",
+                                             DebugSecType, ELF::SHF_EXCLUDE);
   DwarfRnglistsDWOSection =
-      Ctx->getELFSection(".debug_rnglists.dwo", DebugSecType, 0);
+      Ctx->getELFSection(".debug_rnglists.dwo", DebugSecType, ELF::SHF_EXCLUDE);
 
   // DWP Sections
   DwarfCUIndexSection =
@@ -621,10 +479,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
 }
 
 void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
-  EHFrameSection = Ctx->getCOFFSection(
-      ".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                       COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
-      SectionKind::getData());
+  EHFrameSection =
+      Ctx->getCOFFSection(".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                           COFF::IMAGE_SCN_MEM_READ,
+                          SectionKind::getData());
 
   // Set the `IMAGE_SCN_MEM_16BIT` flag when compiling for thumb mode.  This is
   // used to indicate to the linker that the text segment contains thumb instructions
@@ -652,11 +510,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
       ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
       SectionKind::getReadOnly());
 
-  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
-  // though it contains relocatable pointers.  In PIC mode, this is probably a
-  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
-  // adjusted or this should be a data section.
-  if (T.getArch() == Triple::x86_64) {
+  if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::aarch64) {
     // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
     LSDASection = nullptr;
   } else {
@@ -893,6 +747,12 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
   DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", SectionKind::getMetadata());
   DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", SectionKind::getMetadata());
 
+  // Wasm use data section for LSDA.
+  // TODO Consider putting each function's exception table in a separate
+  // section, as in -function-sections, to facilitate lld's --gc-section.
+  LSDASection = Ctx->getWasmSection(".rodata.gcc_except_table",
+                                    SectionKind::getReadOnlyWithRel());
+
   // TODO: Define more sections.
 }
 
@@ -908,8 +768,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
   SupportsCompactUnwindWithoutEHFrame = false;
   OmitDwarfIfHaveCompactUnwind = false;
 
-  PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding =
-      dwarf::DW_EH_PE_absptr;
+  FDECFIEncoding = dwarf::DW_EH_PE_absptr;
 
   CompactUnwindDwarfEHFrameOnly = 0;
 
@@ -949,16 +808,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
   }
 }
 
-MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
+MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
+                                                   uint64_t Hash) const {
   switch (TT.getObjectFormat()) {
   case Triple::ELF:
-    return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
-                              0, utostr(Hash));
+    return Ctx->getELFSection(Name, ELF::SHT_PROGBITS, ELF::SHF_GROUP, 0,
+                              utostr(Hash));
   case Triple::MachO:
   case Triple::COFF:
   case Triple::Wasm:
   case Triple::UnknownObjectFormat:
-    report_fatal_error("Cannot get DWARF types section for this object file "
+    report_fatal_error("Cannot get DWARF comdat section for this object file "
                        "format: not implemented.");
     break;
   }
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 4b6dad5ce8f3..6ec705bdddb7 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -59,11 +59,35 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
   PendingLabels.clear();
 }
 
+// When fixup's offset is a forward declared label, e.g.:
+//
+//   .reloc 1f, R_MIPS_JALR, foo
+// 1: nop
+//
+// postpone adding it to Fixups vector until the label is defined and its offset
+// is known.
+void MCObjectStreamer::resolvePendingFixups() {
+  for (PendingMCFixup &PendingFixup : PendingFixups) {
+    if (!PendingFixup.Sym || PendingFixup.Sym->isUndefined ()) {
+      getContext().reportError(PendingFixup.Fixup.getLoc(),
+                               "unresolved relocation offset");
+      continue;
+    }
+    flushPendingLabels(PendingFixup.DF, PendingFixup.DF->getContents().size());
+    PendingFixup.Fixup.setOffset(PendingFixup.Sym->getOffset());
+    PendingFixup.DF->getFixups().push_back(PendingFixup.Fixup);
+  }
+  PendingFixups.clear();
+}
+
 // As a compile-time optimization, avoid allocating and evaluating an MCExpr
 // tree for (Hi - Lo) when Hi and Lo are offsets into the same fragment.
-static Optional<uint64_t> absoluteSymbolDiff(const MCSymbol *Hi,
-                                             const MCSymbol *Lo) {
+static Optional<uint64_t>
+absoluteSymbolDiff(MCAssembler &Asm, const MCSymbol *Hi, const MCSymbol *Lo) {
   assert(Hi && Lo);
+  if (Asm.getBackendPtr()->requiresDiffExpressionRelocations())
+    return None;
+
   if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() ||
       Hi->isVariable() || Lo->isVariable())
     return None;
@@ -74,7 +98,7 @@ static Optional<uint64_t> absoluteSymbolDiff(const MCSymbol *Hi,
 void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
                                               const MCSymbol *Lo,
                                               unsigned Size) {
-  if (Optional<uint64_t> Diff = absoluteSymbolDiff(Hi, Lo)) {
+  if (Optional<uint64_t> Diff = absoluteSymbolDiff(getAssembler(), Hi, Lo)) {
     EmitIntValue(*Diff, Size);
     return;
   }
@@ -83,7 +107,7 @@ void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
 
 void MCObjectStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi,
                                                        const MCSymbol *Lo) {
-  if (Optional<uint64_t> Diff = absoluteSymbolDiff(Hi, Lo)) {
+  if (Optional<uint64_t> Diff = absoluteSymbolDiff(getAssembler(), Hi, Lo)) {
     EmitULEB128IntValue(*Diff);
     return;
   }
@@ -170,7 +194,6 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
   MCDataFragment *DF = getOrCreateDataFragment();
   flushPendingLabels(DF, DF->getContents().size());
 
-  MCCVLineEntry::Make(this);
   MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
 
   // Avoid fixups when possible.
@@ -267,7 +290,6 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
                                          const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
   flushPendingLabels(nullptr);
-  getContext().clearCVLocSeen();
   getContext().clearDwarfLocSeen();
 
   bool Created = getAssembler().registerSection(*Section);
@@ -308,7 +330,6 @@ void MCObjectStreamer::EmitInstructionImpl(const MCInst &Inst,
 
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
-  MCCVLineEntry::Make(this);
   MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
 
   // If this instruction doesn't need relaxation, just emit it as data.
@@ -443,12 +464,16 @@ void MCObjectStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                           unsigned Line, unsigned Column,
                                           bool PrologueEnd, bool IsStmt,
                                           StringRef FileName, SMLoc Loc) {
-  // In case we see two .cv_loc directives in a row, make sure the
-  // first one gets a line entry.
-  MCCVLineEntry::Make(this);
+  // Validate the directive.
+  if (!checkCVLocSection(FunctionId, FileNo, Loc))
+    return;
 
-  this->MCStreamer::EmitCVLocDirective(FunctionId, FileNo, Line, Column,
-                                       PrologueEnd, IsStmt, FileName, Loc);
+  // Emit a label at the current position and record it in the CodeViewContext.
+  MCSymbol *LineSym = getContext().createTempSymbol();
+  EmitLabel(LineSym);
+  getContext().getCVContext().recordCVLoc(getContext(), LineSym, FunctionId,
+                                          FileNo, Line, Column, PrologueEnd,
+                                          IsStmt);
 }
 
 void MCObjectStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -472,7 +497,11 @@ void MCObjectStreamer::EmitCVInlineLinetableDirective(
 void MCObjectStreamer::EmitCVDefRangeDirective(
     ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
     StringRef FixedSizePortion) {
-  getContext().getCVContext().emitDefRange(*this, Ranges, FixedSizePortion);
+  MCFragment *Frag =
+      getContext().getCVContext().emitDefRange(*this, Ranges, FixedSizePortion);
+  // Attach labels that were pending before we created the defrange fragment to
+  // the beginning of the new fragment.
+  flushPendingLabels(Frag, 0);
   this->MCStreamer::EmitCVDefRangeDirective(Ranges, FixedSizePortion);
 }
 
@@ -488,11 +517,16 @@ void MCObjectStreamer::EmitCVFileChecksumOffsetDirective(unsigned FileNo) {
 }
 
 void MCObjectStreamer::EmitBytes(StringRef Data) {
-  MCCVLineEntry::Make(this);
   MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
   MCDataFragment *DF = getOrCreateDataFragment();
   flushPendingLabels(DF, DF->getContents().size());
   DF->getContents().append(Data.begin(), Data.end());
+
+  // EmitBytes might not cover all possible ways we emit data (or could be used
+  // to emit executable code in some cases), but is the best method we have
+  // right now for checking this.
+  MCSection *Sec = getCurrentSectionOnly();
+  Sec->setHasData(true);
 }
 
 void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -594,16 +628,6 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
 bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
                                           const MCExpr *Expr, SMLoc Loc,
                                           const MCSubtargetInfo &STI) {
-  int64_t OffsetValue;
-  if (!Offset.evaluateAsAbsolute(OffsetValue))
-    llvm_unreachable("Offset is not absolute");
-
-  if (OffsetValue < 0)
-    llvm_unreachable("Offset is negative");
-
-  MCDataFragment *DF = getOrCreateDataFragment(&STI);
-  flushPendingLabels(DF, DF->getContents().size());
-
   Optional<MCFixupKind> MaybeKind = Assembler->getBackend().getFixupKind(Name);
   if (!MaybeKind.hasValue())
     return true;
@@ -613,7 +637,30 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
   if (Expr == nullptr)
     Expr =
         MCSymbolRefExpr::create(getContext().createTempSymbol(), getContext());
-  DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
+
+  MCDataFragment *DF = getOrCreateDataFragment(&STI);
+  flushPendingLabels(DF, DF->getContents().size());
+
+  int64_t OffsetValue;
+  if (Offset.evaluateAsAbsolute(OffsetValue)) {
+    if (OffsetValue < 0)
+      llvm_unreachable(".reloc offset is negative");
+    DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
+    return false;
+  }
+
+  if (Offset.getKind() != llvm::MCExpr::SymbolRef)
+    llvm_unreachable(".reloc offset is not absolute nor a label");
+
+  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Offset);
+  if (SRE.getSymbol().isDefined()) {
+    DF->getFixups().push_back(MCFixup::create(SRE.getSymbol().getOffset(),
+                                              Expr, Kind, Loc));
+    return false;
+  }
+
+  PendingFixups.emplace_back(&SRE.getSymbol(), DF,
+                                         MCFixup::create(-1, Expr, Kind, Loc));
   return false;
 }
 
@@ -680,5 +727,6 @@ void MCObjectStreamer::FinishImpl() {
   MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
 
   flushPendingLabels();
+  resolvePendingFixups();
   getAssembler().Finish();
 }
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index 74835fd70c04..2b0d20f9b8e2 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 
 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
 // integer as a hexadecimal, possibly with leading zeroes.
-static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
-  const char *FirstHex = nullptr;
+static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
+                               bool LexHex) {
+  const char *FirstNonDec = nullptr;
   const char *LookAhead = CurPtr;
   while (true) {
     if (isDigit(*LookAhead)) {
       ++LookAhead;
-    } else if (isHexDigit(*LookAhead)) {
-      if (!FirstHex)
-        FirstHex = LookAhead;
-      ++LookAhead;
     } else {
-      break;
+      if (!FirstNonDec)
+        FirstNonDec = LookAhead;
+
+      // Keep going if we are looking for a 'h' suffix.
+      if (LexHex && isHexDigit(*LookAhead))
+        ++LookAhead;
+      else
+        break;
     }
   }
-  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
-  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
+  bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
+  CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
   if (isHex)
     return 16;
   return DefaultRadix;
@@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
 AsmToken AsmLexer::LexDigit() {
   // MASM-flavor binary integer: [01]+[bB]
   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
-  if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
+  if (LexMasmIntegers && isdigit(CurPtr[-1])) {
     const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
                                    CurPtr - 1 : nullptr;
     const char *OldCurPtr = CurPtr;
@@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() {
 
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
-    unsigned Radix = doLookAhead(CurPtr, 10);
+    unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
     bool isHex = Radix == 16;
     // Check for floating point literals.
     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() {
       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
                            "invalid hexdecimal number");
 
-    // Consume the [bB][hH].
-    if (Radix == 2 || Radix == 16)
+    // Consume the [hH].
+    if (LexMasmIntegers && Radix == 16)
       ++CurPtr;
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores type
@@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() {
     return intToken(Result, Value);
   }
 
-  if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
+  if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
     ++CurPtr;
     // See if we actually have "0b" as part of something like "jmp 0b\n"
     if (!isDigit(CurPtr[0])) {
@@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() {
       return ReturnError(TokStart, "invalid hexadecimal number");
 
     // Consume the optional [hH].
-    if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
+    if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
       ++CurPtr;
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() {
 
   // Either octal or hexadecimal.
   APInt Value(128, 0, true);
-  unsigned Radix = doLookAhead(CurPtr, 8);
+  unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
   bool isHex = Radix == 16;
   StringRef Result(TokStart, CurPtr - TokStart);
   if (Result.getAsInteger(Radix, Value))
@@ -623,7 +627,6 @@ AsmToken AsmLexer::LexToken() {
     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
-  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
@@ -642,6 +645,12 @@ AsmToken AsmLexer::LexToken() {
       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
     }
     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+  case '-':
+    if (*CurPtr == '>') {
+      ++CurPtr;
+      return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
+    }
+    return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
   case '|':
     if (*CurPtr == '|') {
       ++CurPtr;
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index d88c6f76826f..cf42a6f7075b 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -180,6 +180,9 @@ private:
   /// Did we already inform the user about inconsistent MD5 usage?
   bool ReportedInconsistentMD5 = false;
 
+  // Is alt macro mode enabled.
+  bool AltMacroMode = false;
+
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI, unsigned CB);
@@ -226,7 +229,9 @@ public:
 
   void setParsingInlineAsm(bool V) override {
     ParsingInlineAsm = V;
-    Lexer.setParsingMSInlineAsm(V);
+    // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
+    // hex integer literals.
+    Lexer.setLexMasmIntegers(V);
   }
   bool isParsingInlineAsm() override { return ParsingInlineAsm; }
 
@@ -260,8 +265,6 @@ public:
   /// }
 
 private:
-  bool isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc);
-  void altMacroString(StringRef AltMacroStr, std::string &Res);
   bool parseStatement(ParseStatementInfo &Info,
                       MCAsmParserSemaCallback *SI);
   bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
@@ -467,6 +470,7 @@ private:
     DK_CV_INLINE_LINETABLE,
     DK_CV_DEF_RANGE,
     DK_CV_STRINGTABLE,
+    DK_CV_STRING,
     DK_CV_FILECHECKSUMS,
     DK_CV_FILECHECKSUM_OFFSET,
     DK_CV_FPO_DATA,
@@ -491,6 +495,7 @@ private:
     DK_CFI_UNDEFINED,
     DK_CFI_REGISTER,
     DK_CFI_WINDOW_SAVE,
+    DK_CFI_B_KEY_FRAME,
     DK_MACROS_ON,
     DK_MACROS_OFF,
     DK_ALTMACRO,
@@ -538,7 +543,7 @@ private:
   bool parseDirectiveStabs();
 
   // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
-  // ".cv_inline_linetable", ".cv_def_range"
+  // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
   bool parseDirectiveCVFile();
   bool parseDirectiveCVFuncId();
   bool parseDirectiveCVInlineSiteId();
@@ -546,6 +551,7 @@ private:
   bool parseDirectiveCVLinetable();
   bool parseDirectiveCVInlineLinetable();
   bool parseDirectiveCVDefRange();
+  bool parseDirectiveCVString();
   bool parseDirectiveCVStringTable();
   bool parseDirectiveCVFileChecksums();
   bool parseDirectiveCVFileChecksumOffset();
@@ -670,6 +676,7 @@ namespace llvm {
 extern MCAsmParserExtension *createDarwinAsmParser();
 extern MCAsmParserExtension *createELFAsmParser();
 extern MCAsmParserExtension *createCOFFAsmParser();
+extern MCAsmParserExtension *createWasmAsmParser();
 
 } // end namespace llvm
 
@@ -700,10 +707,7 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
     PlatformParser.reset(createELFAsmParser());
     break;
   case MCObjectFileInfo::IsWasm:
-    // TODO: WASM will need its own MCAsmParserExtension implementation, but
-    // for now we can re-use the ELF one, since the directives can be the
-    // same for now.
-    PlatformParser.reset(createELFAsmParser());
+    PlatformParser.reset(createWasmAsmParser());
     break;
   }
 
@@ -895,6 +899,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
       eatToEndOfStatement();
   }
 
+  getTargetParser().onEndOfFile();
+  printPendingErrors();
+
   // All errors should have been emitted.
   assert(!hasPendingError() && "unexpected error from parseStatement");
 
@@ -1100,7 +1107,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     // This is a symbol reference.
     StringRef SymbolName = Identifier;
     if (SymbolName.empty())
-      return true;
+      return Error(getLexer().getLoc(), "expected a symbol reference");
 
     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
 
@@ -1123,7 +1130,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     // semantics in the face of reassignment.
     if (Sym->isVariable()) {
       auto V = Sym->getVariableValue(/*SetUsed*/ false);
-      bool DoInline = isa<MCConstantExpr>(V);
+      bool DoInline = isa<MCConstantExpr>(V) && !Variant;
       if (auto TV = dyn_cast<MCTargetExpr>(V))
         DoInline = TV->inlineAssignedExpr();
       if (DoInline) {
@@ -1321,11 +1328,12 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
 /// the End argument will be filled with the last location pointed to the '>'
 /// character.
 
-/// There is a gap between the AltMacro's documentation and the single quote implementation.
-/// GCC does not fully support this feature and so we will not support it.
+/// There is a gap between the AltMacro's documentation and the single quote
+/// implementation. GCC does not fully support this feature and so we will not
+/// support it.
 /// TODO: Adding single quote as a string.
-bool AsmParser::isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) {
-  assert((StrLoc.getPointer() != NULL) &&
+static bool isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) {
+  assert((StrLoc.getPointer() != nullptr) &&
          "Argument to the function cannot be a NULL value");
   const char *CharPtr = StrLoc.getPointer();
   while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
@@ -1342,12 +1350,14 @@ bool AsmParser::isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) {
 }
 
 /// creating a string without the escape characters '!'.
-void AsmParser::altMacroString(StringRef AltMacroStr,std::string &Res) {
+static std::string altMacroString(StringRef AltMacroStr) {
+  std::string Res;
   for (size_t Pos = 0; Pos < AltMacroStr.size(); Pos++) {
     if (AltMacroStr[Pos] == '!')
       Pos++;
     Res += AltMacroStr[Pos];
   }
+  return Res;
 }
 
 /// Parse an expression and return it.
@@ -1806,6 +1816,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       Lex();
     }
 
+    getTargetParser().doBeforeLabelEmit(Sym);
+
     // Emit the label.
     if (!getTargetParser().isParsingInlineAsm())
       Out.EmitLabel(Sym, IDLoc);
@@ -1842,7 +1854,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   // Otherwise, we have a normal instruction or directive.
 
   // Directives start with "."
-  if (IDVal[0] == '.' && IDVal != ".") {
+  if (IDVal.startswith(".") && IDVal != ".") {
     // There are several entities interested in parsing directives:
     //
     // 1. The target-specific assembly parser. Some directives are target
@@ -2029,6 +2041,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveCVInlineLinetable();
     case DK_CV_DEF_RANGE:
       return parseDirectiveCVDefRange();
+    case DK_CV_STRING:
+      return parseDirectiveCVString();
     case DK_CV_STRINGTABLE:
       return parseDirectiveCVStringTable();
     case DK_CV_FILECHECKSUMS:
@@ -2431,21 +2445,20 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
           for (const AsmToken &Token : A[Index])
             // For altmacro mode, you can write '%expr'.
             // The prefix '%' evaluates the expression 'expr'
-            // and uses the result as a string (e.g. replace %(1+2) with the string "3").
+            // and uses the result as a string (e.g. replace %(1+2) with the
+            // string "3").
             // Here, we identify the integer token which is the result of the
-            // absolute expression evaluation and replace it with its string representation.
-            if ((Lexer.IsaAltMacroMode()) &&
-                 (*(Token.getString().begin()) == '%') && Token.is(AsmToken::Integer))
+            // absolute expression evaluation and replace it with its string
+            // representation.
+            if (AltMacroMode && Token.getString().front() == '%' &&
+                Token.is(AsmToken::Integer))
               // Emit an integer value to the buffer.
               OS << Token.getIntVal();
             // Only Token that was validated as a string and begins with '<'
             // is considered altMacroString!!!
-            else if ((Lexer.IsaAltMacroMode()) &&
-                     (*(Token.getString().begin()) == '<') &&
+            else if (AltMacroMode && Token.getString().front() == '<' &&
                      Token.is(AsmToken::String)) {
-              std::string Res;
-              altMacroString(Token.getStringContents(), Res);
-              OS << Res;
+              OS << altMacroString(Token.getStringContents());
             }
             // We expect no quotes around the string's contents when
             // parsing for varargs.
@@ -2627,31 +2640,33 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
 
     SMLoc StrLoc = Lexer.getLoc();
     SMLoc EndLoc;
-    if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Percent)) {
-        const MCExpr *AbsoluteExp;
-        int64_t Value;
-        /// Eat '%'
-        Lex();
-        if (parseExpression(AbsoluteExp, EndLoc))
-          return false;
-        if (!AbsoluteExp->evaluateAsAbsolute(Value,
-                                             getStreamer().getAssemblerPtr()))
-          return Error(StrLoc, "expected absolute expression");
-        const char *StrChar = StrLoc.getPointer();
-        const char *EndChar = EndLoc.getPointer();
-        AsmToken newToken(AsmToken::Integer, StringRef(StrChar , EndChar - StrChar), Value);
-        FA.Value.push_back(newToken);
-    } else if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Less) &&
+    if (AltMacroMode && Lexer.is(AsmToken::Percent)) {
+      const MCExpr *AbsoluteExp;
+      int64_t Value;
+      /// Eat '%'
+      Lex();
+      if (parseExpression(AbsoluteExp, EndLoc))
+        return false;
+      if (!AbsoluteExp->evaluateAsAbsolute(Value,
+                                           getStreamer().getAssemblerPtr()))
+        return Error(StrLoc, "expected absolute expression");
+      const char *StrChar = StrLoc.getPointer();
+      const char *EndChar = EndLoc.getPointer();
+      AsmToken newToken(AsmToken::Integer,
+                        StringRef(StrChar, EndChar - StrChar), Value);
+      FA.Value.push_back(newToken);
+    } else if (AltMacroMode && Lexer.is(AsmToken::Less) &&
                isAltmacroString(StrLoc, EndLoc)) {
-        const char *StrChar = StrLoc.getPointer();
-        const char *EndChar = EndLoc.getPointer();
-        jumpToLoc(EndLoc, CurBuffer);
-        /// Eat from '<' to '>'
-        Lex();
-        AsmToken newToken(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
-        FA.Value.push_back(newToken);
+      const char *StrChar = StrLoc.getPointer();
+      const char *EndChar = EndLoc.getPointer();
+      jumpToLoc(EndLoc, CurBuffer);
+      /// Eat from '<' to '>'
+      Lex();
+      AsmToken newToken(AsmToken::String,
+                        StringRef(StrChar, EndChar - StrChar));
+      FA.Value.push_back(newToken);
     } else if(parseMacroArgument(FA.Value, Vararg))
-        return true;
+      return true;
 
     unsigned PI = Parameter;
     if (!FA.Name.empty()) {
@@ -2927,20 +2942,20 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
 bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
   const MCExpr *Offset;
   const MCExpr *Expr = nullptr;
-
-  SMLoc OffsetLoc = Lexer.getTok().getLoc();
   int64_t OffsetValue;
-  // We can only deal with constant expressions at the moment.
+  SMLoc OffsetLoc = Lexer.getTok().getLoc();
 
   if (parseExpression(Offset))
     return true;
 
-  if (check(!Offset->evaluateAsAbsolute(OffsetValue,
-                                        getStreamer().getAssemblerPtr()),
-            OffsetLoc, "expression is not a constant value") ||
-      check(OffsetValue < 0, OffsetLoc, "expression is negative") ||
-      parseToken(AsmToken::Comma, "expected comma") ||
-      check(getTok().isNot(AsmToken::Identifier), "expected relocation name"))
+  if ((Offset->evaluateAsAbsolute(OffsetValue,
+                                  getStreamer().getAssemblerPtr()) &&
+       check(OffsetValue < 0, OffsetLoc, "expression is negative")) ||
+      (check(Offset->getKind() != llvm::MCExpr::Constant &&
+             Offset->getKind() != llvm::MCExpr::SymbolRef,
+             OffsetLoc, "expected non-negative number or a label")) ||
+      (parseToken(AsmToken::Comma, "expected comma") ||
+       check(getTok().isNot(AsmToken::Identifier), "expected relocation name")))
     return true;
 
   SMLoc NameLoc = Lexer.getTok().getLoc();
@@ -3348,9 +3363,12 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
     }
   }
 
-  if (FileNumber == -1)
+  if (FileNumber == -1) {
+    if (!getContext().getAsmInfo()->hasSingleParameterDotFile())
+      return Error(DirectiveLoc,
+                   "target does not support '.file' without a number");
     getStreamer().EmitFileDirective(Filename);
-  else {
+  } else {
     // In case there is a -g option as well as debug info from directive .file,
     // we turn off the -g option, directly use the existing debug info instead.
     // Also reset any implicit ".file 0" for the assembler source.
@@ -3813,6 +3831,20 @@ bool AsmParser::parseDirectiveCVDefRange() {
   return false;
 }
 
+/// parseDirectiveCVString
+/// ::= .cv_stringtable "string"
+bool AsmParser::parseDirectiveCVString() {
+  std::string Data;
+  if (checkForValidSection() || parseEscapedString(Data))
+    return addErrorSuffix(" in '.cv_string' directive");
+
+  // Put the string in the table and emit the offset.
+  std::pair<StringRef, unsigned> Insertion =
+      getCVContext().addToStringTable(Data);
+  getStreamer().EmitIntValue(Insertion.second, 4);
+  return false;
+}
+
 /// parseDirectiveCVStringTable
 /// ::= .cv_stringtable
 bool AsmParser::parseDirectiveCVStringTable() {
@@ -3895,7 +3927,12 @@ bool AsmParser::parseDirectiveCFIStartProc() {
       return addErrorSuffix(" in '.cfi_startproc' directive");
   }
 
-  getStreamer().EmitCFIStartProc(!Simple.empty());
+  // TODO(kristina): Deal with a corner case of incorrect diagnostic context
+  // being produced if this directive is emitted as part of preprocessor macro
+  // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
+  // Tools like llvm-mc on the other hand are not affected by it, and report
+  // correct context information.
+  getStreamer().EmitCFIStartProc(!Simple.empty(), Lexer.getLoc());
   return false;
 }
 
@@ -4163,10 +4200,7 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
 bool AsmParser::parseDirectiveAltmacro(StringRef Directive) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '" + Directive + "' directive");
-  if (Directive == ".altmacro")
-    getLexer().SetAltMacroMode(true);
-  else
-    getLexer().SetAltMacroMode(false);
+  AltMacroMode = (Directive == ".altmacro");
   return false;
 }
 
@@ -5238,6 +5272,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
   DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
   DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
+  DirectiveKindMap[".cv_string"] = DK_CV_STRING;
   DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
   DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
   DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
@@ -5265,6 +5300,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
   DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
   DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
+  DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
   DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
   DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
   DirectiveKindMap[".macro"] = DK_MACRO;
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index e6fc1fac81ba..cd99112292a9 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -459,7 +459,12 @@ public:
 
   bool parseBuildVersion(StringRef Directive, SMLoc Loc);
   bool parseVersionMin(StringRef Directive, SMLoc Loc, MCVersionMinType Type);
+  bool parseMajorMinorVersionComponent(unsigned *Major, unsigned *Minor,
+                                       const char *VersionName);
+  bool parseOptionalTrailingVersionComponent(unsigned *Component,
+                                             const char *ComponentName);
   bool parseVersion(unsigned *Major, unsigned *Minor, unsigned *Update);
+  bool parseSDKVersion(VersionTuple &SDKVersion);
   void checkVersion(StringRef Directive, StringRef Arg, SMLoc Loc,
                     Triple::OSType ExpectedOS);
 };
@@ -1000,43 +1005,89 @@ bool DarwinAsmParser::parseDirectiveDataRegionEnd(StringRef, SMLoc) {
   return false;
 }
 
-/// parseVersion ::= major, minor [, update]
-bool DarwinAsmParser::parseVersion(unsigned *Major, unsigned *Minor,
-                                   unsigned *Update) {
+static bool isSDKVersionToken(const AsmToken &Tok) {
+  return Tok.is(AsmToken::Identifier) && Tok.getIdentifier() == "sdk_version";
+}
+
+/// parseMajorMinorVersionComponent ::= major, minor
+bool DarwinAsmParser::parseMajorMinorVersionComponent(unsigned *Major,
+                                                      unsigned *Minor,
+                                                      const char *VersionName) {
   // Get the major version number.
   if (getLexer().isNot(AsmToken::Integer))
-    return TokError("invalid OS major version number, integer expected");
+    return TokError(Twine("invalid ") + VersionName +
+                    " major version number, integer expected");
   int64_t MajorVal = getLexer().getTok().getIntVal();
   if (MajorVal > 65535 || MajorVal <= 0)
-    return TokError("invalid OS major version number");
+    return TokError(Twine("invalid ") + VersionName + " major version number");
   *Major = (unsigned)MajorVal;
   Lex();
   if (getLexer().isNot(AsmToken::Comma))
-    return TokError("OS minor version number required, comma expected");
+    return TokError(Twine(VersionName) +
+                    " minor version number required, comma expected");
   Lex();
   // Get the minor version number.
   if (getLexer().isNot(AsmToken::Integer))
-    return TokError("invalid OS minor version number, integer expected");
+    return TokError(Twine("invalid ") + VersionName +
+                    " minor version number, integer expected");
   int64_t MinorVal = getLexer().getTok().getIntVal();
   if (MinorVal > 255 || MinorVal < 0)
-    return TokError("invalid OS minor version number");
+    return TokError(Twine("invalid ") + VersionName + " minor version number");
   *Minor = MinorVal;
   Lex();
+  return false;
+}
+
+/// parseOptionalTrailingVersionComponent ::= , version_number
+bool DarwinAsmParser::parseOptionalTrailingVersionComponent(
+    unsigned *Component, const char *ComponentName) {
+  assert(getLexer().is(AsmToken::Comma) && "comma expected");
+  Lex();
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError(Twine("invalid ") + ComponentName +
+                    " version number, integer expected");
+  int64_t Val = getLexer().getTok().getIntVal();
+  if (Val > 255 || Val < 0)
+    return TokError(Twine("invalid ") + ComponentName + " version number");
+  *Component = Val;
+  Lex();
+  return false;
+}
+
+/// parseVersion ::= parseMajorMinorVersionComponent
+///                      parseOptionalTrailingVersionComponent
+bool DarwinAsmParser::parseVersion(unsigned *Major, unsigned *Minor,
+                                   unsigned *Update) {
+  if (parseMajorMinorVersionComponent(Major, Minor, "OS"))
+    return true;
 
   // Get the update level, if specified
   *Update = 0;
-  if (getLexer().is(AsmToken::EndOfStatement))
+  if (getLexer().is(AsmToken::EndOfStatement) ||
+      isSDKVersionToken(getLexer().getTok()))
     return false;
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("invalid OS update specifier, comma expected");
+  if (parseOptionalTrailingVersionComponent(Update, "OS update"))
+    return true;
+  return false;
+}
+
+bool DarwinAsmParser::parseSDKVersion(VersionTuple &SDKVersion) {
+  assert(isSDKVersionToken(getLexer().getTok()) && "expected sdk_version");
   Lex();
-  if (getLexer().isNot(AsmToken::Integer))
-    return TokError("invalid OS update version number, integer expected");
-  int64_t UpdateVal = getLexer().getTok().getIntVal();
-  if (UpdateVal > 255 || UpdateVal < 0)
-    return TokError("invalid OS update version number");
-  *Update = UpdateVal;
-  Lex();
+  unsigned Major, Minor;
+  if (parseMajorMinorVersionComponent(&Major, &Minor, "SDK"))
+    return true;
+  SDKVersion = VersionTuple(Major, Minor);
+
+  // Get the subminor version, if specified.
+  if (getLexer().is(AsmToken::Comma)) {
+    unsigned Subminor;
+    if (parseOptionalTrailingVersionComponent(&Subminor, "SDK subminor"))
+      return true;
+    SDKVersion = VersionTuple(Major, Minor, Subminor);
+  }
   return false;
 }
 
@@ -1066,10 +1117,10 @@ static Triple::OSType getOSTypeFromMCVM(MCVersionMinType Type) {
 }
 
 /// parseVersionMin
-///   ::= .ios_version_min parseVersion
-///   |   .macosx_version_min parseVersion
-///   |   .tvos_version_min parseVersion
-///   |   .watchos_version_min parseVersion
+///   ::= .ios_version_min parseVersion parseSDKVersion
+///   |   .macosx_version_min parseVersion parseSDKVersion
+///   |   .tvos_version_min parseVersion parseSDKVersion
+///   |   .watchos_version_min parseVersion parseSDKVersion
 bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
                                       MCVersionMinType Type) {
   unsigned Major;
@@ -1078,13 +1129,16 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
   if (parseVersion(&Major, &Minor, &Update))
     return true;
 
+  VersionTuple SDKVersion;
+  if (isSDKVersionToken(getLexer().getTok()) && parseSDKVersion(SDKVersion))
+    return true;
+
   if (parseToken(AsmToken::EndOfStatement))
     return addErrorSuffix(Twine(" in '") + Directive + "' directive");
 
   Triple::OSType ExpectedOS = getOSTypeFromMCVM(Type);
   checkVersion(Directive, StringRef(), Loc, ExpectedOS);
-
-  getStreamer().EmitVersionMin(Type, Major, Minor, Update);
+  getStreamer().EmitVersionMin(Type, Major, Minor, Update, SDKVersion);
   return false;
 }
 
@@ -1094,13 +1148,16 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
   case MachO::PLATFORM_IOS:     return Triple::IOS;
   case MachO::PLATFORM_TVOS:    return Triple::TvOS;
   case MachO::PLATFORM_WATCHOS: return Triple::WatchOS;
-  case MachO::PLATFORM_BRIDGEOS: /* silence warning */break;
+  case MachO::PLATFORM_BRIDGEOS:         /* silence warning */ break;
+  case MachO::PLATFORM_IOSSIMULATOR:     /* silence warning */ break;
+  case MachO::PLATFORM_TVOSSIMULATOR:    /* silence warning */ break;
+  case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break;
   }
   llvm_unreachable("Invalid mach-o platform type");
 }
 
 /// parseBuildVersion
-///   ::= .build_version (macos|ios|tvos|watchos), parseVersion
+///   ::= .build_version (macos|ios|tvos|watchos), parseVersion parseSDKVersion
 bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
   StringRef PlatformName;
   SMLoc PlatformLoc = getTok().getLoc();
@@ -1126,14 +1183,17 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
   if (parseVersion(&Major, &Minor, &Update))
     return true;
 
+  VersionTuple SDKVersion;
+  if (isSDKVersionToken(getLexer().getTok()) && parseSDKVersion(SDKVersion))
+    return true;
+
   if (parseToken(AsmToken::EndOfStatement))
     return addErrorSuffix(" in '.build_version' directive");
 
   Triple::OSType ExpectedOS
     = getOSTypeFromPlatform((MachO::PlatformType)Platform);
   checkVersion(Directive, PlatformName, Loc, ExpectedOS);
-
-  getStreamer().EmitBuildVersion(Platform, Major, Minor, Update);
+  getStreamer().EmitBuildVersion(Platform, Major, Minor, Update, SDKVersion);
   return false;
 }
 
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 3d9590e1f9f5..d568f7a71eeb 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -321,6 +321,9 @@ static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
     case 'y':
       flags |= ELF::SHF_ARM_PURECODE;
       break;
+    case 's':
+      flags |= ELF::SHF_HEX_GPREL;
+      break;
     case 'G':
       flags |= ELF::SHF_GROUP;
       break;
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 75cd318e4fa3..10960fc69633 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -15,7 +15,7 @@
 
 using namespace llvm;
 
-MCAsmLexer::MCAsmLexer() : AltMacroMode(false) {
+MCAsmLexer::MCAsmLexer() {
   CurTok.emplace_back(AsmToken::Space, StringRef());
 }
 
@@ -85,6 +85,7 @@ void AsmToken::dump(raw_ostream &OS) const {
   case AsmToken::LessGreater:        OS << "LessGreater"; break;
   case AsmToken::LessLess:           OS << "LessLess"; break;
   case AsmToken::Minus:              OS << "Minus"; break;
+  case AsmToken::MinusGreater:       OS << "MinusGreater"; break;
   case AsmToken::Percent:            OS << "Percent"; break;
   case AsmToken::Pipe:               OS << "Pipe"; break;
   case AsmToken::PipePipe:           OS << "PipePipe"; break;
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
index d439734e76fc..efedcdc5a314 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -21,7 +21,7 @@
 
 using namespace llvm;
 
-MCAsmParser::MCAsmParser() : ShowParsedOperands(0) {}
+MCAsmParser::MCAsmParser() {}
 
 MCAsmParser::~MCAsmParser() = default;
 
diff --git a/contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp
new file mode 100644
index 000000000000..93bb0cb3c72e
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -0,0 +1,145 @@
+//===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// --
+//
+// Note, this is for wasm, the binary format (analogous to ELF), not wasm,
+// the instruction set (analogous to x86), for which parsing code lives in
+// WebAssemblyAsmParser.
+//
+// This file contains processing for generic directives implemented using
+// MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
+// WebAssemblyAsmParser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/MachineValueType.h"
+
+using namespace llvm;
+
+namespace {
+
+class WasmAsmParser : public MCAsmParserExtension {
+  MCAsmParser *Parser;
+  MCAsmLexer *Lexer;
+
+  template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+  void addDirectiveHandler(StringRef Directive) {
+    MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+        this, HandleDirective<WasmAsmParser, HandlerMethod>);
+
+    getParser().addDirectiveHandler(Directive, Handler);
+  }
+
+public:
+  WasmAsmParser() : Parser(nullptr), Lexer(nullptr) {
+    BracketExpressionsSupported = true;
+  }
+
+  void Initialize(MCAsmParser &P) override {
+    Parser = &P;
+    Lexer = &Parser->getLexer();
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(*Parser);
+
+    addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
+    addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
+    addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
+    addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
+  }
+
+  bool Error(const StringRef &msg, const AsmToken &tok) {
+    return Parser->Error(tok.getLoc(), msg + tok.getString());
+  }
+
+  bool IsNext(AsmToken::TokenKind Kind) {
+    auto ok = Lexer->is(Kind);
+    if (ok) Lex();
+    return ok;
+  }
+
+  bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
+    if (!IsNext(Kind))
+      return Error(std::string("Expected ") + KindName + ", instead got: ",
+                   Lexer->getTok());
+    return false;
+  }
+
+  bool parseSectionDirectiveText(StringRef, SMLoc) {
+    // FIXME: .text currently no-op.
+    return false;
+  }
+
+  bool parseSectionDirective(StringRef, SMLoc) {
+    // FIXME: .section currently no-op.
+    while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex();
+    return false;
+  }
+
+  // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
+  // so maybe could be shared somehow.
+  bool parseDirectiveSize(StringRef, SMLoc) {
+    StringRef Name;
+    if (Parser->parseIdentifier(Name))
+      return TokError("expected identifier in directive");
+    auto Sym = getContext().getOrCreateSymbol(Name);
+    if (Lexer->isNot(AsmToken::Comma))
+      return TokError("unexpected token in directive");
+    Lex();
+    const MCExpr *Expr;
+    if (Parser->parseExpression(Expr))
+      return true;
+    if (Lexer->isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in directive");
+    Lex();
+    // MCWasmStreamer implements this.
+    getStreamer().emitELFSize(Sym, Expr);
+    return false;
+  }
+
+  bool parseDirectiveType(StringRef, SMLoc) {
+    // This could be the start of a function, check if followed by
+    // "label,@function"
+    if (!Lexer->is(AsmToken::Identifier))
+      return Error("Expected label after .type directive, got: ",
+                   Lexer->getTok());
+    auto WasmSym = cast<MCSymbolWasm>(
+                     getStreamer().getContext().getOrCreateSymbol(
+                       Lexer->getTok().getString()));
+    Lex();
+    if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
+          Lexer->is(AsmToken::Identifier)))
+      return Error("Expected label,@type declaration, got: ", Lexer->getTok());
+    auto TypeName = Lexer->getTok().getString();
+    if (TypeName == "function")
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+    else if (TypeName == "global")
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+    else
+      return Error("Unknown WASM symbol type: ", Lexer->getTok());
+    Lex();
+    return Expect(AsmToken::EndOfStatement, "EOL");
+  }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+MCAsmParserExtension *createWasmAsmParser() {
+  return new WasmAsmParser;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/MC/MCRegisterInfo.cpp b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
index 8e47963b4418..5abae5379867 100644
--- a/contrib/llvm/lib/MC/MCRegisterInfo.cpp
+++ b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
@@ -127,6 +128,8 @@ int MCRegisterInfo::getCodeViewRegNum(unsigned RegNum) const {
     report_fatal_error("target does not implement codeview register mapping");
   const DenseMap<unsigned, int>::const_iterator I = L2CVRegs.find(RegNum);
   if (I == L2CVRegs.end())
-    report_fatal_error("unknown codeview register");
+    report_fatal_error("unknown codeview register " + (RegNum < getNumRegs()
+                                                           ? getName(RegNum)
+                                                           : Twine(RegNum)));
   return I->second;
 }
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
index 97bc65387dd5..d4f11d10136a 100644
--- a/contrib/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -23,7 +23,8 @@ using namespace llvm;
 
 MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
     : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
-      IsRegistered(false), DummyFragment(this), Variant(V), Kind(K) {}
+      HasData(false), IsRegistered(false), DummyFragment(this), Variant(V),
+      Kind(K) {}
 
 MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
   if (!End)
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index 4d77d05cc505..7ee1694ebbf7 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -116,6 +116,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
   } else if (T.isARM() || T.isThumb()) {
     if (Flags & ELF::SHF_ARM_PURECODE)
       OS << 'y';
+  } else if (Arch == Triple::hexagon) {
+    if (Flags & ELF::SHF_HEX_GPREL)
+      OS << 's';
   }
 
   OS << '"';
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index 21a9c3604cfc..6a8471bc61b4 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -72,6 +72,18 @@ void MCTargetStreamer::emitValue(const MCExpr *Value) {
   Streamer.EmitRawText(OS.str());
 }
 
+void MCTargetStreamer::emitRawBytes(StringRef Data) {
+  const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+  const char *Directive = MAI->getData8bitsDirective();
+  for (const unsigned char C : Data.bytes()) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+
+    OS << Directive << (unsigned)C;
+    Streamer.EmitRawText(OS.str());
+  }
+}
+
 void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
 
 MCStreamer::MCStreamer(MCContext &Ctx)
@@ -209,6 +221,13 @@ void MCStreamer::emitDwarfFile0Directive(StringRef Directory,
                                       Source);
 }
 
+void MCStreamer::EmitCFIBKeyFrame() {
+  MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+  if (!CurFrame)
+    return;
+  CurFrame->IsBKeyFrame = true;
+}
+
 void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                        unsigned Column, unsigned Flags,
                                        unsigned Isa,
@@ -270,22 +289,28 @@ bool MCStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId,
 void MCStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                     unsigned Line, unsigned Column,
                                     bool PrologueEnd, bool IsStmt,
-                                    StringRef FileName, SMLoc Loc) {
+                                    StringRef FileName, SMLoc Loc) {}
+
+bool MCStreamer::checkCVLocSection(unsigned FuncId, unsigned FileNo,
+                                   SMLoc Loc) {
   CodeViewContext &CVC = getContext().getCVContext();
-  MCCVFunctionInfo *FI = CVC.getCVFunctionInfo(FunctionId);
-  if (!FI)
-    return getContext().reportError(
+  MCCVFunctionInfo *FI = CVC.getCVFunctionInfo(FuncId);
+  if (!FI) {
+    getContext().reportError(
         Loc, "function id not introduced by .cv_func_id or .cv_inline_site_id");
+    return false;
+  }
 
   // Track the section
   if (FI->Section == nullptr)
     FI->Section = getCurrentSectionOnly();
-  else if (FI->Section != getCurrentSectionOnly())
-    return getContext().reportError(
+  else if (FI->Section != getCurrentSectionOnly()) {
+    getContext().reportError(
         Loc,
         "all .cv_loc directives for a function must be in the same section");
-
-  CVC.setCurrentCVLoc(FunctionId, FileNo, Line, Column, PrologueEnd, IsStmt);
+    return false;
+  }
+  return true;
 }
 
 void MCStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -341,10 +366,10 @@ void MCStreamer::EmitCFISections(bool EH, bool Debug) {
   assert(EH || Debug);
 }
 
-void MCStreamer::EmitCFIStartProc(bool IsSimple) {
+void MCStreamer::EmitCFIStartProc(bool IsSimple, SMLoc Loc) {
   if (hasUnfinishedDwarfFrameInfo())
-    getContext().reportError(
-        SMLoc(), "starting new .cfi frame before finishing the previous one");
+    return getContext().reportError(
+        Loc, "starting new .cfi frame before finishing the previous one");
 
   MCDwarfFrameInfo Frame;
   Frame.IsSimple = IsSimple;
@@ -559,6 +584,15 @@ void MCStreamer::EmitCFIWindowSave() {
   CurFrame->Instructions.push_back(Instruction);
 }
 
+void MCStreamer::EmitCFINegateRAState() {
+  MCSymbol *Label = EmitCFILabel();
+  MCCFIInstruction Instruction = MCCFIInstruction::createNegateRAState(Label);
+  MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+  if (!CurFrame)
+    return;
+  CurFrame->Instructions.push_back(Instruction);
+}
+
 void MCStreamer::EmitCFIReturnColumn(int64_t Register) {
   MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
   if (!CurFrame)
@@ -609,6 +643,17 @@ void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) {
   CurFrame->End = Label;
 }
 
+void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+  WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
+  if (!CurFrame)
+    return;
+  if (CurFrame->ChainedParent)
+    getContext().reportError(Loc, "Not all chained regions terminated!");
+
+  MCSymbol *Label = EmitCFILabel();
+  CurFrame->FuncletOrFuncEnd = Label;
+}
+
 void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) {
   WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
   if (!CurFrame)
@@ -820,13 +865,11 @@ void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) {
   CurFrame->PrologEnd = Label;
 }
 
-void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
-}
+void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {}
 
 void MCStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {}
 
-void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
-}
+void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {}
 
 void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
 
@@ -836,9 +879,12 @@ void MCStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {}
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
 void MCStreamer::EmitRawTextImpl(StringRef String) {
-  errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
-  " something must not be fully mc'ized\n";
-  abort();
+  // This is not llvm_unreachable for the sake of out of tree backend
+  // developers who may not have assembly streamers and should serve as a
+  // reminder to not accidentally call EmitRawText in the absence of such.
+  report_fatal_error("EmitRawText called on an MCStreamer that doesn't support "
+                     "it (target backend is likely missing an AsmStreamer "
+                     "implementation)");
 }
 
 void MCStreamer::EmitRawText(const Twine &T) {
@@ -872,8 +918,9 @@ void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
     TS->emitAssignment(Symbol, Value);
 }
 
-void MCTargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
-                              const MCInst &Inst, const MCSubtargetInfo &STI) {
+void MCTargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter,
+                                      raw_ostream &OS, const MCInst &Inst,
+                                      const MCSubtargetInfo &STI) {
   InstPrinter.printInst(&Inst, OS, "", STI);
 }
 
@@ -1016,7 +1063,8 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) {
   return Sym;
 }
 
-void MCStreamer::EmitVersionForTarget(const Triple &Target) {
+void MCStreamer::EmitVersionForTarget(const Triple &Target,
+                                      const VersionTuple &SDKVersion) {
   if (!Target.isOSBinFormatMachO() || !Target.isOSDarwin())
     return;
   // Do we even know the version?
@@ -1042,5 +1090,5 @@ void MCStreamer::EmitVersionForTarget(const Triple &Target) {
     Target.getiOSVersion(Major, Minor, Update);
   }
   if (Major != 0)
-    EmitVersionMin(VersionType, Major, Minor, Update);
+    EmitVersionMin(VersionType, Major, Minor, Update, SDKVersion);
 }
diff --git a/contrib/llvm/lib/MC/MCWasmStreamer.cpp b/contrib/llvm/lib/MC/MCWasmStreamer.cpp
index 0e5932214047..d2a152058b90 100644
--- a/contrib/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCWasmStreamer.cpp
@@ -61,7 +61,7 @@ void MCWasmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 void MCWasmStreamer::ChangeSection(MCSection *Section,
                                    const MCExpr *Subsection) {
   MCAssembler &Asm = getAssembler();
-  auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
+  auto *SectionWasm = cast<MCSectionWasm>(Section);
   const MCSymbol *Grp = SectionWasm->getGroup();
   if (Grp)
     Asm.registerSymbol(*Grp);
@@ -119,7 +119,6 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
     break;
 
   case MCSA_ELF_TypeObject:
-    Symbol->setType(wasm::WASM_SYMBOL_TYPE_DATA);
     break;
 
   default:
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
index 1407f25e6f2a..0724b109e1a1 100644
--- a/contrib/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -11,6 +11,9 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Win64EH.h"
@@ -23,6 +26,8 @@ static uint8_t CountOfUnwindCodes(std::vector<WinEH::Instruction> &Insns) {
   uint8_t Count = 0;
   for (const auto &I : Insns) {
     switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
+    default:
+      llvm_unreachable("Unsupported unwind code");
     case Win64EH::UOP_PushNonVol:
     case Win64EH::UOP_AllocSmall:
     case Win64EH::UOP_SetFPReg:
@@ -60,6 +65,8 @@ static void EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
   uint16_t w;
   b2 = (inst.Operation & 0x0F);
   switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+  default:
+    llvm_unreachable("Unsupported unwind code");
   case Win64EH::UOP_PushNonVol:
     EmitAbsDifference(streamer, inst.Label, begin);
     b2 |= (inst.Register & 0x0F) << 4;
@@ -242,3 +249,348 @@ void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo(
   ::EmitUnwindInfo(Streamer, info);
 }
 
+static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
+                                const MCSymbol *RHS) {
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Diff =
+      MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
+                              MCSymbolRefExpr::create(RHS, Context), Context);
+  MCObjectStreamer *OS = (MCObjectStreamer *)(&Streamer);
+  int64_t value;
+  Diff->evaluateAsAbsolute(value, OS->getAssembler());
+  return value;
+}
+
+static uint32_t
+ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
+  uint32_t Count = 0;
+  for (const auto &I : Insns) {
+    switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
+    default:
+      llvm_unreachable("Unsupported ARM64 unwind code");
+    case Win64EH::UOP_AllocSmall:
+      Count += 1;
+      break;
+    case Win64EH::UOP_AllocMedium:
+      Count += 2;
+      break;
+    case Win64EH::UOP_AllocLarge:
+      Count += 4;
+      break;
+    case Win64EH::UOP_SaveFPLRX:
+      Count += 1;
+      break;
+    case Win64EH::UOP_SaveFPLR:
+      Count += 1;
+      break;
+    case Win64EH::UOP_SaveReg:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveRegP:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveRegPX:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveRegX:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveFReg:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveFRegP:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveFRegX:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SaveFRegPX:
+      Count += 2;
+      break;
+    case Win64EH::UOP_SetFP:
+      Count += 1;
+      break;
+    case Win64EH::UOP_AddFP:
+      Count += 2;
+      break;
+    case Win64EH::UOP_Nop:
+      Count += 1;
+      break;
+    case Win64EH::UOP_End:
+      Count += 1;
+      break;
+    }
+  }
+  return Count;
+}
+
+// Unwind opcode encodings and restrictions are documented at
+// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
+                                WinEH::Instruction &inst) {
+  uint8_t b, reg;
+  switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+  default:
+    llvm_unreachable("Unsupported ARM64 unwind code");
+  case Win64EH::UOP_AllocSmall:
+    b = (inst.Offset >> 4) & 0x1F;
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_AllocMedium: {
+    uint16_t hw = (inst.Offset >> 4) & 0x7FF;
+    b = 0xC0;
+    b |= (hw >> 8);
+    streamer.EmitIntValue(b, 1);
+    b = hw & 0xFF;
+    streamer.EmitIntValue(b, 1);
+    break;
+  }
+  case Win64EH::UOP_AllocLarge: {
+    uint32_t w;
+    b = 0xE0;
+    streamer.EmitIntValue(b, 1);
+    w = inst.Offset >> 4;
+    b = (w & 0x00FF0000) >> 16;
+    streamer.EmitIntValue(b, 1);
+    b = (w & 0x0000FF00) >> 8;
+    streamer.EmitIntValue(b, 1);
+    b = w & 0x000000FF;
+    streamer.EmitIntValue(b, 1);
+    break;
+  }
+  case Win64EH::UOP_SetFP:
+    b = 0xE1;
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_AddFP:
+    b = 0xE2;
+    streamer.EmitIntValue(b, 1);
+    b = (inst.Offset >> 3);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_Nop:
+    b = 0xE3;
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveFPLRX:
+    b = 0x80;
+    b |= ((inst.Offset - 1) >> 3) & 0x3F;
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveFPLR:
+    b = 0x40;
+    b |= (inst.Offset >> 3) & 0x3F;
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveReg:
+    assert(inst.Register >= 19 && "Saved reg must be >= 19");
+    reg = inst.Register - 19;
+    b = 0xD0 | ((reg & 0xC) >> 2);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveRegX:
+    assert(inst.Register >= 19 && "Saved reg must be >= 19");
+    reg = inst.Register - 19;
+    b = 0xD4 | ((reg & 0x8) >> 3);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x7) << 5) | ((inst.Offset >> 3) - 1);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveRegP:
+    assert(inst.Register >= 19 && "Saved registers must be >= 19");
+    reg = inst.Register - 19;
+    b = 0xC8 | ((reg & 0xC) >> 2);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveRegPX:
+    assert(inst.Register >= 19 && "Saved registers must be >= 19");
+    reg = inst.Register - 19;
+    b = 0xCC | ((reg & 0xC) >> 2);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveFReg:
+    assert(inst.Register >= 8 && "Saved dreg must be >= 8");
+    reg = inst.Register - 8;
+    b = 0xDC | ((reg & 0x4) >> 2);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveFRegX:
+    assert(inst.Register >= 8 && "Saved dreg must be >= 8");
+    reg = inst.Register - 8;
+    b = 0xDE;
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x7) << 5) | ((inst.Offset >> 3) - 1);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveFRegP:
+    assert(inst.Register >= 8 && "Saved dregs must be >= 8");
+    reg = inst.Register - 8;
+    b = 0xD8 | ((reg & 0x4) >> 2);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x3) << 6) | (inst.Offset >> 3);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_SaveFRegPX:
+    assert(inst.Register >= 8 && "Saved dregs must be >= 8");
+    reg = inst.Register - 8;
+    b = 0xDA | ((reg & 0x4) >> 2);
+    streamer.EmitIntValue(b, 1);
+    b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1);
+    streamer.EmitIntValue(b, 1);
+    break;
+  case Win64EH::UOP_End:
+    b = 0xE4;
+    streamer.EmitIntValue(b, 1);
+    break;
+  }
+}
+
+// Populate the .xdata section.  The format of .xdata on ARM64 is documented at
+// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
+  // If this UNWIND_INFO already has a symbol, it's already been emitted.
+  if (info->Symbol)
+    return;
+
+  MCContext &context = streamer.getContext();
+  MCSymbol *Label = context.createTempSymbol();
+
+  streamer.EmitValueToAlignment(4);
+  streamer.EmitLabel(Label);
+  info->Symbol = Label;
+
+  uint32_t FuncLength = 0x0;
+  if (info->FuncletOrFuncEnd)
+    FuncLength = (uint32_t)GetAbsDifference(streamer, info->FuncletOrFuncEnd,
+                                            info->Begin);
+  FuncLength /= 4;
+  uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
+  uint32_t TotalCodeBytes = PrologCodeBytes;
+
+  // Process epilogs.
+  MapVector<MCSymbol *, uint32_t> EpilogInfo;
+  for (auto &I : info->EpilogMap) {
+    MCSymbol *EpilogStart = I.first;
+    auto &EpilogInstrs = I.second;
+    uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
+    EpilogInfo[EpilogStart] = TotalCodeBytes;
+    TotalCodeBytes += CodeBytes;
+  }
+
+  // Code Words, Epilog count, E, X, Vers, Function Length
+  uint32_t row1 = 0x0;
+  uint32_t CodeWords = TotalCodeBytes / 4;
+  uint32_t CodeWordsMod = TotalCodeBytes % 4;
+  if (CodeWordsMod)
+    CodeWords++;
+  uint32_t EpilogCount = info->EpilogMap.size();
+  bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124;
+  if (!ExtensionWord) {
+    row1 |= (EpilogCount & 0x1F) << 22;
+    row1 |= (CodeWords & 0x1F) << 27;
+  }
+  // E is always 0 right now, TODO: packed epilog setup
+  if (info->HandlesExceptions) // X
+    row1 |= 1 << 20;
+  row1 |= FuncLength & 0x3FFFF;
+  streamer.EmitIntValue(row1, 4);
+
+  // Extended Code Words, Extended Epilog Count
+  if (ExtensionWord) {
+    // FIXME: We should be able to split unwind info into multiple sections.
+    // FIXME: We should share epilog codes across epilogs, where possible,
+    // which would make this issue show up less frequently.
+    if (CodeWords > 0xFF || EpilogCount > 0xFFFF)
+      report_fatal_error("SEH unwind data splitting not yet implemented");
+    uint32_t row2 = 0x0;
+    row2 |= (CodeWords & 0xFF) << 16;
+    row2 |= (EpilogCount & 0xFFFF);
+    streamer.EmitIntValue(row2, 4);
+  }
+
+  // Epilog Start Index, Epilog Start Offset
+  for (auto &I : EpilogInfo) {
+    MCSymbol *EpilogStart = I.first;
+    uint32_t EpilogIndex = I.second;
+    uint32_t EpilogOffset =
+        (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin);
+    if (EpilogOffset)
+      EpilogOffset /= 4;
+    uint32_t row3 = EpilogOffset;
+    row3 |= (EpilogIndex & 0x3FF) << 22;
+    streamer.EmitIntValue(row3, 4);
+  }
+
+  // Emit prolog unwind instructions (in reverse order).
+  uint8_t numInst = info->Instructions.size();
+  for (uint8_t c = 0; c < numInst; ++c) {
+    WinEH::Instruction inst = info->Instructions.back();
+    info->Instructions.pop_back();
+    ARM64EmitUnwindCode(streamer, info->Begin, inst);
+  }
+
+  // Emit epilog unwind instructions
+  for (auto &I : info->EpilogMap) {
+    auto &EpilogInstrs = I.second;
+    for (uint32_t i = 0; i < EpilogInstrs.size(); i++) {
+      WinEH::Instruction inst = EpilogInstrs[i];
+      ARM64EmitUnwindCode(streamer, info->Begin, inst);
+    }
+  }
+
+  int32_t BytesMod = CodeWords * 4 - TotalCodeBytes;
+  assert(BytesMod >= 0);
+  for (int i = 0; i < BytesMod; i++)
+    streamer.EmitIntValue(0xE3, 1);
+
+  if (info->HandlesExceptions)
+    streamer.EmitValue(
+        MCSymbolRefExpr::create(info->ExceptionHandler,
+                                MCSymbolRefExpr::VK_COFF_IMGREL32, context),
+        4);
+}
+
+static void ARM64EmitRuntimeFunction(MCStreamer &streamer,
+                                     const WinEH::FrameInfo *info) {
+  MCContext &context = streamer.getContext();
+
+  streamer.EmitValueToAlignment(4);
+  EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
+  streamer.EmitValue(MCSymbolRefExpr::create(info->Symbol,
+                                             MCSymbolRefExpr::VK_COFF_IMGREL32,
+                                             context),
+                     4);
+}
+
+void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const {
+  // Emit the unwind info structs first.
+  for (const auto &CFI : Streamer.getWinFrameInfos()) {
+    MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection);
+    Streamer.SwitchSection(XData);
+    ARM64EmitUnwindInfo(Streamer, CFI.get());
+  }
+
+  // Now emit RUNTIME_FUNCTION entries.
+  for (const auto &CFI : Streamer.getWinFrameInfos()) {
+    MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection);
+    Streamer.SwitchSection(PData);
+    ARM64EmitRuntimeFunction(Streamer, CFI.get());
+  }
+}
+
+void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo(
+    MCStreamer &Streamer, WinEH::FrameInfo *info) const {
+  // Switch sections (the static function above is meant to be called from
+  // here and from Emit().
+  MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
+  Streamer.SwitchSection(XData);
+  ARM64EmitUnwindInfo(Streamer, info);
+}
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
index 2664528909af..2fa65658ccfa 100644
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -597,8 +597,8 @@ void MachObjectWriter::computeSymbolTable(
   }
 
   // External and undefined symbols are required to be in lexicographic order.
-  llvm::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
-  llvm::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+  llvm::sort(ExternalSymbolData);
+  llvm::sort(UndefinedSymbolData);
 
   // Set the symbol indices.
   Index = 0;
@@ -846,18 +846,27 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
 
   // Write out the deployment target information, if it's available.
   if (VersionInfo.Major != 0) {
-    assert(VersionInfo.Update < 256 && "unencodable update target version");
-    assert(VersionInfo.Minor < 256 && "unencodable minor target version");
-    assert(VersionInfo.Major < 65536 && "unencodable major target version");
-    uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) |
-      (VersionInfo.Major << 16);
+    auto EncodeVersion = [](VersionTuple V) -> uint32_t {
+      assert(!V.empty() && "empty version");
+      unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
+      unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
+      assert(Update < 256 && "unencodable update target version");
+      assert(Minor < 256 && "unencodable minor target version");
+      assert(V.getMajor() < 65536 && "unencodable major target version");
+      return Update | (Minor << 8) | (V.getMajor() << 16);
+    };
+    uint32_t EncodedVersion = EncodeVersion(
+        VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
+    uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
+                              ? EncodeVersion(VersionInfo.SDKVersion)
+                              : 0;
     if (VersionInfo.EmitBuildVersion) {
       // FIXME: Currently empty tools. Add clang version in the future.
       W.write<uint32_t>(MachO::LC_BUILD_VERSION);
       W.write<uint32_t>(sizeof(MachO::build_version_command));
       W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
       W.write<uint32_t>(EncodedVersion);
-      W.write<uint32_t>(0);         // SDK version.
+      W.write<uint32_t>(SDKVersion);
       W.write<uint32_t>(0);         // Empty tools list.
     } else {
       MachO::LoadCommandType LCType
@@ -865,7 +874,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
       W.write<uint32_t>(LCType);
       W.write<uint32_t>(sizeof(MachO::version_min_command));
       W.write<uint32_t>(EncodedVersion);
-      W.write<uint32_t>(0);         // reserved.
+      W.write<uint32_t>(SDKVersion);
     }
   }
 
diff --git a/contrib/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm/lib/MC/WasmObjectWriter.cpp
index 5a979d36e81b..0cca3757be90 100644
--- a/contrib/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WasmObjectWriter.cpp
@@ -56,9 +56,10 @@ struct SectionBookkeeping {
   uint32_t Index;
 };
 
-// The signature of a wasm function, in a struct capable of being used as a
-// DenseMap key.
-struct WasmFunctionType {
+// The signature of a wasm function or event, in a struct capable of being used
+// as a DenseMap key.
+// TODO: Consider using wasm::WasmSignature directly instead.
+struct WasmSignature {
   // Support empty and tombstone instances, needed by DenseMap.
   enum { Plain, Empty, Tombstone } State;
 
@@ -68,36 +69,35 @@ struct WasmFunctionType {
   // The parameter types of the function.
   SmallVector<wasm::ValType, 4> Params;
 
-  WasmFunctionType() : State(Plain) {}
+  WasmSignature() : State(Plain) {}
 
-  bool operator==(const WasmFunctionType &Other) const {
+  bool operator==(const WasmSignature &Other) const {
     return State == Other.State && Returns == Other.Returns &&
            Params == Other.Params;
   }
 };
 
-// Traits for using WasmFunctionType in a DenseMap.
-struct WasmFunctionTypeDenseMapInfo {
-  static WasmFunctionType getEmptyKey() {
-    WasmFunctionType FuncTy;
-    FuncTy.State = WasmFunctionType::Empty;
-    return FuncTy;
+// Traits for using WasmSignature in a DenseMap.
+struct WasmSignatureDenseMapInfo {
+  static WasmSignature getEmptyKey() {
+    WasmSignature Sig;
+    Sig.State = WasmSignature::Empty;
+    return Sig;
   }
-  static WasmFunctionType getTombstoneKey() {
-    WasmFunctionType FuncTy;
-    FuncTy.State = WasmFunctionType::Tombstone;
-    return FuncTy;
+  static WasmSignature getTombstoneKey() {
+    WasmSignature Sig;
+    Sig.State = WasmSignature::Tombstone;
+    return Sig;
   }
-  static unsigned getHashValue(const WasmFunctionType &FuncTy) {
-    uintptr_t Value = FuncTy.State;
-    for (wasm::ValType Ret : FuncTy.Returns)
-      Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
-    for (wasm::ValType Param : FuncTy.Params)
-      Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
+  static unsigned getHashValue(const WasmSignature &Sig) {
+    uintptr_t Value = Sig.State;
+    for (wasm::ValType Ret : Sig.Returns)
+      Value += DenseMapInfo<uint32_t>::getHashValue(uint32_t(Ret));
+    for (wasm::ValType Param : Sig.Params)
+      Value += DenseMapInfo<uint32_t>::getHashValue(uint32_t(Param));
     return Value;
   }
-  static bool isEqual(const WasmFunctionType &LHS,
-                      const WasmFunctionType &RHS) {
+  static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) {
     return LHS == RHS;
   }
 };
@@ -117,7 +117,7 @@ struct WasmDataSegment {
 
 // A wasm function to be written into the function section.
 struct WasmFunction {
-  int32_t Type;
+  uint32_t SigIndex;
   const MCSymbolWasm *Sym;
 };
 
@@ -137,11 +137,11 @@ struct WasmComdatEntry {
 
 // Information about a single relocation.
 struct WasmRelocationEntry {
-  uint64_t Offset;                  // Where is the relocation.
-  const MCSymbolWasm *Symbol;       // The symbol to relocate with.
-  int64_t Addend;                   // A value to add to the symbol.
-  unsigned Type;                    // The type of the relocation.
-  const MCSectionWasm *FixupSection;// The section the relocation is targeting.
+  uint64_t Offset;                   // Where is the relocation.
+  const MCSymbolWasm *Symbol;        // The symbol to relocate with.
+  int64_t Addend;                    // A value to add to the symbol.
+  unsigned Type;                     // The type of the relocation.
+  const MCSectionWasm *FixupSection; // The section the relocation is targeting.
 
   WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
                       int64_t Addend, unsigned Type,
@@ -163,8 +163,8 @@ struct WasmRelocationEntry {
   }
 
   void print(raw_ostream &Out) const {
-    Out << wasm::relocTypetoString(Type)
-        << " Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend
+    Out << wasm::relocTypetoString(Type) << " Off=" << Offset
+        << ", Sym=" << *Symbol << ", Addend=" << Addend
         << ", FixupSection=" << FixupSection->getSectionName();
   }
 
@@ -215,7 +215,8 @@ class WasmObjectWriter : public MCObjectWriter {
   // Maps function symbols to the table element index space. Used
   // for TABLE_INDEX relocation types (i.e. address taken functions).
   DenseMap<const MCSymbolWasm *, uint32_t> TableIndices;
-  // Maps function/global symbols to the function/global/section index space.
+  // Maps function/global symbols to the function/global/event/section index
+  // space.
   DenseMap<const MCSymbolWasm *, uint32_t> WasmIndices;
   // Maps data symbols to the Wasm segment and offset/size with the segment.
   DenseMap<const MCSymbolWasm *, wasm::WasmDataReference> DataLocations;
@@ -230,13 +231,13 @@ class WasmObjectWriter : public MCObjectWriter {
   // Map from section to defining function symbol.
   DenseMap<const MCSection *, const MCSymbol *> SectionFunctions;
 
-  DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
-      FunctionTypeIndices;
-  SmallVector<WasmFunctionType, 4> FunctionTypes;
+  DenseMap<WasmSignature, uint32_t, WasmSignatureDenseMapInfo> SignatureIndices;
+  SmallVector<WasmSignature, 4> Signatures;
   SmallVector<WasmGlobal, 4> Globals;
   SmallVector<WasmDataSegment, 4> DataSegments;
   unsigned NumFunctionImports = 0;
   unsigned NumGlobalImports = 0;
+  unsigned NumEventImports = 0;
   uint32_t SectionCount = 0;
 
   // TargetObjectWriter wrappers.
@@ -265,8 +266,8 @@ private:
     TableIndices.clear();
     DataLocations.clear();
     CustomSectionsRelocations.clear();
-    FunctionTypeIndices.clear();
-    FunctionTypes.clear();
+    SignatureIndices.clear();
+    Signatures.clear();
     Globals.clear();
     DataSegments.clear();
     SectionFunctions.clear();
@@ -291,11 +292,9 @@ private:
     W.OS << Str;
   }
 
-  void writeValueType(wasm::ValType Ty) {
-    W.OS << static_cast<char>(Ty);
-  }
+  void writeValueType(wasm::ValType Ty) { W.OS << static_cast<char>(Ty); }
 
-  void writeTypeSection(ArrayRef<WasmFunctionType> FunctionTypes);
+  void writeTypeSection(ArrayRef<WasmSignature> Signatures);
   void writeImportSection(ArrayRef<wasm::WasmImport> Imports, uint32_t DataSize,
                           uint32_t NumElements);
   void writeFunctionSection(ArrayRef<WasmFunction> Functions);
@@ -305,8 +304,9 @@ private:
   void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
                         ArrayRef<WasmFunction> Functions);
   void writeDataSection();
+  void writeEventSection(ArrayRef<wasm::WasmEventType> Events);
   void writeRelocSection(uint32_t SectionIndex, StringRef Name,
-                         ArrayRef<WasmRelocationEntry> Relocations);
+                         std::vector<WasmRelocationEntry> &Relocations);
   void writeLinkingMetaDataSection(
       ArrayRef<wasm::WasmSymbolInfo> SymbolInfos,
       ArrayRef<std::pair<uint16_t, uint32_t>> InitFuncs,
@@ -323,7 +323,9 @@ private:
 
   uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry);
   uint32_t getFunctionType(const MCSymbolWasm &Symbol);
-  uint32_t registerFunctionType(const MCSymbolWasm &Symbol);
+  uint32_t getEventType(const MCSymbolWasm &Symbol);
+  void registerFunctionType(const MCSymbolWasm &Symbol);
+  void registerEventType(const MCSymbolWasm &Symbol);
 };
 
 } // end anonymous namespace
@@ -529,8 +531,8 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
 
 // Write X as an (unsigned) LEB value at offset Offset in Stream, padded
 // to allow patching.
-static void
-WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+static void WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X,
+                              uint64_t Offset) {
   uint8_t Buffer[5];
   unsigned SizeLen = encodeULEB128(X, Buffer, 5);
   assert(SizeLen == 5);
@@ -539,8 +541,8 @@ WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
 
 // Write X as an signed LEB value at offset Offset in Stream, padded
 // to allow patching.
-static void
-WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X, uint64_t Offset) {
+static void WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X,
+                               uint64_t Offset) {
   uint8_t Buffer[5];
   unsigned SizeLen = encodeSLEB128(X, Buffer, 5);
   assert(SizeLen == 5);
@@ -554,7 +556,7 @@ static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
   Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
 }
 
-static const MCSymbolWasm* ResolveSymbol(const MCSymbolWasm& Symbol) {
+static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) {
   if (Symbol.isVariable()) {
     const MCExpr *Expr = Symbol.getVariableValue();
     auto *Inner = cast<MCSymbolRefExpr>(Expr);
@@ -582,7 +584,8 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
     return getRelocationIndexValue(RelEntry);
   case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
   case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
-    // Provisional value is function/global Wasm index
+  case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+    // Provisional value is function/global/event Wasm index
     if (!WasmIndices.count(RelEntry.Symbol))
       report_fatal_error("symbol not found in wasm index space: " +
                          RelEntry.Symbol->getName());
@@ -626,10 +629,9 @@ static void addData(SmallVectorImpl<char> &DataBytes,
         report_fatal_error("only byte values supported for alignment");
       // If nops are requested, use zeros, as this is the data section.
       uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue();
-      uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(),
-                                                 Align->getAlignment()),
-                                         DataBytes.size() +
-                                             Align->getMaxBytesToEmit());
+      uint64_t Size =
+          std::min<uint64_t>(alignTo(DataBytes.size(), Align->getAlignment()),
+                             DataBytes.size() + Align->getMaxBytesToEmit());
       DataBytes.resize(Size, Value);
     } else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) {
       int64_t NumValues;
@@ -637,10 +639,12 @@ static void addData(SmallVectorImpl<char> &DataBytes,
         llvm_unreachable("The fill should be an assembler constant");
       DataBytes.insert(DataBytes.end(), Fill->getValueSize() * NumValues,
                        Fill->getValue());
+    } else if (auto *LEB = dyn_cast<MCLEBFragment>(&Frag)) {
+      const SmallVectorImpl<char> &Contents = LEB->getContents();
+      DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
     } else {
       const auto &DataFrag = cast<MCDataFragment>(Frag);
       const SmallVectorImpl<char> &Contents = DataFrag.getContents();
-
       DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
     }
   }
@@ -678,6 +682,7 @@ void WasmObjectWriter::applyRelocations(
     case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
     case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
     case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
+    case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
       WritePatchableLEB(Stream, Value, Offset);
       break;
     case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
@@ -696,23 +701,22 @@ void WasmObjectWriter::applyRelocations(
   }
 }
 
-void WasmObjectWriter::writeTypeSection(
-    ArrayRef<WasmFunctionType> FunctionTypes) {
-  if (FunctionTypes.empty())
+void WasmObjectWriter::writeTypeSection(ArrayRef<WasmSignature> Signatures) {
+  if (Signatures.empty())
     return;
 
   SectionBookkeeping Section;
   startSection(Section, wasm::WASM_SEC_TYPE);
 
-  encodeULEB128(FunctionTypes.size(), W.OS);
+  encodeULEB128(Signatures.size(), W.OS);
 
-  for (const WasmFunctionType &FuncTy : FunctionTypes) {
+  for (const WasmSignature &Sig : Signatures) {
     W.OS << char(wasm::WASM_TYPE_FUNC);
-    encodeULEB128(FuncTy.Params.size(), W.OS);
-    for (wasm::ValType Ty : FuncTy.Params)
+    encodeULEB128(Sig.Params.size(), W.OS);
+    for (wasm::ValType Ty : Sig.Params)
       writeValueType(Ty);
-    encodeULEB128(FuncTy.Returns.size(), W.OS);
-    for (wasm::ValType Ty : FuncTy.Returns)
+    encodeULEB128(Sig.Returns.size(), W.OS);
+    for (wasm::ValType Ty : Sig.Returns)
       writeValueType(Ty);
   }
 
@@ -745,14 +749,18 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
       W.OS << char(Import.Global.Mutable ? 1 : 0);
       break;
     case wasm::WASM_EXTERNAL_MEMORY:
-      encodeULEB128(0, W.OS); // flags
+      encodeULEB128(0, W.OS);        // flags
       encodeULEB128(NumPages, W.OS); // initial
       break;
     case wasm::WASM_EXTERNAL_TABLE:
       W.OS << char(Import.Table.ElemType);
-      encodeULEB128(0, W.OS); // flags
+      encodeULEB128(0, W.OS);           // flags
       encodeULEB128(NumElements, W.OS); // initial
       break;
+    case wasm::WASM_EXTERNAL_EVENT:
+      encodeULEB128(Import.Event.Attribute, W.OS);
+      encodeULEB128(Import.Event.SigIndex, W.OS);
+      break;
     default:
       llvm_unreachable("unsupported import kind");
     }
@@ -770,7 +778,7 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
 
   encodeULEB128(Functions.size(), W.OS);
   for (const WasmFunction &Func : Functions)
-    encodeULEB128(Func.Type, W.OS);
+    encodeULEB128(Func.SigIndex, W.OS);
 
   endSection(Section);
 }
@@ -795,6 +803,22 @@ void WasmObjectWriter::writeGlobalSection() {
   endSection(Section);
 }
 
+void WasmObjectWriter::writeEventSection(ArrayRef<wasm::WasmEventType> Events) {
+  if (Events.empty())
+    return;
+
+  SectionBookkeeping Section;
+  startSection(Section, wasm::WASM_SEC_EVENT);
+
+  encodeULEB128(Events.size(), W.OS);
+  for (const wasm::WasmEventType &Event : Events) {
+    encodeULEB128(Event.Attribute, W.OS);
+    encodeULEB128(Event.SigIndex, W.OS);
+  }
+
+  endSection(Section);
+}
+
 void WasmObjectWriter::writeExportSection(ArrayRef<wasm::WasmExport> Exports) {
   if (Exports.empty())
     return;
@@ -892,21 +916,33 @@ void WasmObjectWriter::writeDataSection() {
 
 void WasmObjectWriter::writeRelocSection(
     uint32_t SectionIndex, StringRef Name,
-    ArrayRef<WasmRelocationEntry> Relocations) {
+    std::vector<WasmRelocationEntry> &Relocs) {
   // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
   // for descriptions of the reloc sections.
 
-  if (Relocations.empty())
+  if (Relocs.empty())
     return;
 
+  // First, ensure the relocations are sorted in offset order.  In general they
+  // should already be sorted since `recordRelocation` is called in offset
+  // order, but for the code section we combine many MC sections into single
+  // wasm section, and this order is determined by the order of Asm.Symbols()
+  // not the sections order.
+  std::stable_sort(
+      Relocs.begin(), Relocs.end(),
+      [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) {
+        return (A.Offset + A.FixupSection->getSectionOffset()) <
+               (B.Offset + B.FixupSection->getSectionOffset());
+      });
+
   SectionBookkeeping Section;
   startCustomSection(Section, std::string("reloc.") + Name.str());
 
   encodeULEB128(SectionIndex, W.OS);
-  encodeULEB128(Relocations.size(), W.OS);
-  for (const WasmRelocationEntry& RelEntry : Relocations) {
-    uint64_t Offset = RelEntry.Offset +
-                      RelEntry.FixupSection->getSectionOffset();
+  encodeULEB128(Relocs.size(), W.OS);
+  for (const WasmRelocationEntry &RelEntry : Relocs) {
+    uint64_t Offset =
+        RelEntry.Offset + RelEntry.FixupSection->getSectionOffset();
     uint32_t Index = getRelocationIndexValue(RelEntry);
 
     W.OS << char(RelEntry.Type);
@@ -944,6 +980,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
       switch (Sym.Kind) {
       case wasm::WASM_SYMBOL_TYPE_FUNCTION:
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+      case wasm::WASM_SYMBOL_TYPE_EVENT:
         encodeULEB128(Sym.ElementIndex, W.OS);
         if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0)
           writeString(Sym.Name);
@@ -984,7 +1021,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
     startSection(SubSection, wasm::WASM_INIT_FUNCS);
     encodeULEB128(InitFuncs.size(), W.OS);
     for (auto &StartFunc : InitFuncs) {
-      encodeULEB128(StartFunc.first, W.OS); // priority
+      encodeULEB128(StartFunc.first, W.OS);  // priority
       encodeULEB128(StartFunc.second, W.OS); // function index
     }
     endSection(SubSection);
@@ -1029,30 +1066,57 @@ void WasmObjectWriter::writeCustomSections(const MCAssembler &Asm,
   }
 }
 
-uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) {
+uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) {
   assert(Symbol.isFunction());
   assert(TypeIndices.count(&Symbol));
   return TypeIndices[&Symbol];
 }
 
-uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) {
+uint32_t WasmObjectWriter::getEventType(const MCSymbolWasm &Symbol) {
+  assert(Symbol.isEvent());
+  assert(TypeIndices.count(&Symbol));
+  return TypeIndices[&Symbol];
+}
+
+void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) {
   assert(Symbol.isFunction());
 
-  WasmFunctionType F;
-  const MCSymbolWasm* ResolvedSym = ResolveSymbol(Symbol);
-  F.Returns = ResolvedSym->getReturns();
-  F.Params = ResolvedSym->getParams();
+  WasmSignature S;
+  const MCSymbolWasm *ResolvedSym = ResolveSymbol(Symbol);
+  if (auto *Sig = ResolvedSym->getSignature()) {
+    S.Returns = Sig->Returns;
+    S.Params = Sig->Params;
+  }
 
-  auto Pair =
-      FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
+  auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size()));
   if (Pair.second)
-    FunctionTypes.push_back(F);
+    Signatures.push_back(S);
   TypeIndices[&Symbol] = Pair.first->second;
 
   LLVM_DEBUG(dbgs() << "registerFunctionType: " << Symbol
                     << " new:" << Pair.second << "\n");
   LLVM_DEBUG(dbgs() << "  -> type index: " << Pair.first->second << "\n");
-  return Pair.first->second;
+}
+
+void WasmObjectWriter::registerEventType(const MCSymbolWasm &Symbol) {
+  assert(Symbol.isEvent());
+
+  // TODO Currently we don't generate imported exceptions, but if we do, we
+  // should have a way of infering types of imported exceptions.
+  WasmSignature S;
+  if (auto *Sig = Symbol.getSignature()) {
+    S.Returns = Sig->Returns;
+    S.Params = Sig->Params;
+  }
+
+  auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size()));
+  if (Pair.second)
+    Signatures.push_back(S);
+  TypeIndices[&Symbol] = Pair.first->second;
+
+  LLVM_DEBUG(dbgs() << "registerEventType: " << Symbol << " new:" << Pair.second
+                    << "\n");
+  LLVM_DEBUG(dbgs() << "  -> type index: " << Pair.first->second << "\n");
 }
 
 static bool isInSymtab(const MCSymbolWasm &Sym) {
@@ -1086,6 +1150,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   SmallVector<uint32_t, 4> TableElems;
   SmallVector<wasm::WasmImport, 4> Imports;
   SmallVector<wasm::WasmExport, 4> Exports;
+  SmallVector<wasm::WasmEventType, 1> Events;
   SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
   SmallVector<std::pair<uint16_t, uint32_t>, 2> InitFuncs;
   std::map<StringRef, std::vector<WasmComdatEntry>> Comdats;
@@ -1111,10 +1176,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   TableImport.Module = TableSym->getModuleName();
   TableImport.Field = TableSym->getName();
   TableImport.Kind = wasm::WASM_EXTERNAL_TABLE;
-  TableImport.Table.ElemType = wasm::WASM_TYPE_ANYFUNC;
+  TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF;
   Imports.push_back(TableImport);
 
-  // Populate FunctionTypeIndices, and Imports and WasmIndices for undefined
+  // Populate SignatureIndices, and Imports and WasmIndices for undefined
   // symbols.  This must be done before populating WasmIndices for defined
   // symbols.
   for (const MCSymbol &S : Asm.symbols()) {
@@ -1125,6 +1190,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
     if (WS.isFunction())
       registerFunctionType(WS);
 
+    if (WS.isEvent())
+      registerEventType(WS);
+
     if (WS.isTemporary())
       continue;
 
@@ -1149,6 +1217,18 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
         Import.Global = WS.getGlobalType();
         Imports.push_back(Import);
         WasmIndices[&WS] = NumGlobalImports++;
+      } else if (WS.isEvent()) {
+        if (WS.isWeak())
+          report_fatal_error("undefined event symbol cannot be weak");
+
+        wasm::WasmImport Import;
+        Import.Module = WS.getModuleName();
+        Import.Field = WS.getName();
+        Import.Kind = wasm::WASM_EXTERNAL_EVENT;
+        Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+        Import.Event.SigIndex = getEventType(WS);
+        Imports.push_back(Import);
+        WasmIndices[&WS] = NumEventImports++;
       }
     }
   }
@@ -1176,7 +1256,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       Segment.Offset = DataSize;
       Segment.Section = &Section;
       addData(Segment.Data, Section);
-      Segment.Alignment = Section.getAlignment();
+      Segment.Alignment = Log2_32(Section.getAlignment());
       Segment.Flags = 0;
       DataSize += Segment.Data.size();
       Section.setSegmentIndex(SegmentIndex);
@@ -1195,7 +1275,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       if (Name.startswith(".custom_section."))
         Name = Name.substr(strlen(".custom_section."));
 
-      MCSymbol* Begin = Sec.getBeginSymbol();
+      MCSymbol *Begin = Sec.getBeginSymbol();
       if (Begin) {
         WasmIndices[cast<MCSymbolWasm>(Begin)] = CustomSections.size();
         if (SectionName != Begin->getName())
@@ -1240,7 +1320,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
         // A definition. Write out the function body.
         Index = NumFunctionImports + Functions.size();
         WasmFunction Func;
-        Func.Type = getFunctionType(WS);
+        Func.SigIndex = getFunctionType(WS);
         Func.Sym = &WS;
         WasmIndices[&WS] = Index;
         Functions.push_back(Func);
@@ -1256,6 +1336,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       }
 
       LLVM_DEBUG(dbgs() << "  -> function index: " << Index << "\n");
+
     } else if (WS.isData()) {
       if (WS.isTemporary() && !WS.getSize())
         continue;
@@ -1285,6 +1366,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
           static_cast<uint32_t>(Size)};
       DataLocations[&WS] = Ref;
       LLVM_DEBUG(dbgs() << "  -> segment index: " << Ref.Segment << "\n");
+
     } else if (WS.isGlobal()) {
       // A "true" Wasm global (currently just __stack_pointer)
       if (WS.isDefined())
@@ -1293,6 +1375,24 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       // An import; the index was assigned above
       LLVM_DEBUG(dbgs() << "  -> global index: "
                         << WasmIndices.find(&WS)->second << "\n");
+
+    } else if (WS.isEvent()) {
+      // C++ exception symbol (__cpp_exception)
+      unsigned Index;
+      if (WS.isDefined()) {
+        Index = NumEventImports + Events.size();
+        wasm::WasmEventType Event;
+        Event.SigIndex = getEventType(WS);
+        Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+        WasmIndices[&WS] = Index;
+        Events.push_back(Event);
+      } else {
+        // An import; the index was assigned above.
+        Index = WasmIndices.find(&WS)->second;
+      }
+      LLVM_DEBUG(dbgs() << "  -> event index: " << WasmIndices.find(&WS)->second
+                        << "\n");
+
     } else {
       assert(WS.isSection());
     }
@@ -1326,7 +1426,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       DataLocations[&WS] = Ref;
       LLVM_DEBUG(dbgs() << "  -> index:" << Ref.Segment << "\n");
     } else {
-      report_fatal_error("don't yet support global aliases");
+      report_fatal_error("don't yet support global/event aliases");
     }
   }
 
@@ -1424,7 +1524,8 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
     unsigned PrefixLength = strlen(".init_array");
     if (WS.getSectionName().size() > PrefixLength) {
       if (WS.getSectionName()[PrefixLength] != '.')
-        report_fatal_error(".init_array section priority should start with '.'");
+        report_fatal_error(
+            ".init_array section priority should start with '.'");
       if (WS.getSectionName()
               .substr(PrefixLength + 1)
               .getAsInteger(10, Priority))
@@ -1432,14 +1533,16 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
     }
     const auto &DataFrag = cast<MCDataFragment>(Frag);
     const SmallVectorImpl<char> &Contents = DataFrag.getContents();
-    for (const uint8_t *p = (const uint8_t *)Contents.data(),
-                     *end = (const uint8_t *)Contents.data() + Contents.size();
+    for (const uint8_t *
+             p = (const uint8_t *)Contents.data(),
+            *end = (const uint8_t *)Contents.data() + Contents.size();
          p != end; ++p) {
       if (*p != 0)
         report_fatal_error("non-symbolic data in .init_array section");
     }
     for (const MCFixup &Fixup : DataFrag.getFixups()) {
-      assert(Fixup.getKind() == MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
+      assert(Fixup.getKind() ==
+             MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
       const MCExpr *Expr = Fixup.getValue();
       auto *Sym = dyn_cast<MCSymbolRefExpr>(Expr);
       if (!Sym)
@@ -1456,12 +1559,13 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   // Write out the Wasm header.
   writeHeader(Asm);
 
-  writeTypeSection(FunctionTypes);
+  writeTypeSection(Signatures);
   writeImportSection(Imports, DataSize, TableElems.size());
   writeFunctionSection(Functions);
   // Skip the "table" section; we import the table instead.
   // Skip the "memory" section; we import the memory instead.
   writeGlobalSection();
+  writeEventSection(Events);
   writeExportSection(Exports);
   writeElemSection(TableElems);
   writeCodeSection(Asm, Layout, Functions);
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 9ffecd99df68..b774852eabe6 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/JamCRC.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -58,8 +59,6 @@ namespace {
 using name = SmallString<COFF::NameSize>;
 
 enum AuxiliaryType {
-  ATFunctionDefinition,
-  ATbfAndefSymbol,
   ATWeakExternal,
   ATFile,
   ATSectionDefinition
@@ -147,6 +146,10 @@ public:
 
   bool UseBigObj;
 
+  bool EmitAddrsigSection = false;
+  MCSectionCOFF *AddrsigSection;
+  std::vector<const MCSymbol *> AddrsigSyms;
+
   WinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
                       raw_pwrite_stream &OS);
 
@@ -206,6 +209,11 @@ public:
   void assignSectionNumbers();
   void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
 
+  void emitAddrsigSection() override { EmitAddrsigSection = true; }
+  void addAddrsigSymbol(const MCSymbol *Sym) override {
+    AddrsigSyms.push_back(Sym);
+  }
+
   uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
 };
 
@@ -515,24 +523,6 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
     const COFFSymbol::AuxiliarySymbols &S) {
   for (const AuxSymbol &i : S) {
     switch (i.AuxType) {
-    case ATFunctionDefinition:
-      W.write<uint32_t>(i.Aux.FunctionDefinition.TagIndex);
-      W.write<uint32_t>(i.Aux.FunctionDefinition.TotalSize);
-      W.write<uint32_t>(i.Aux.FunctionDefinition.PointerToLinenumber);
-      W.write<uint32_t>(i.Aux.FunctionDefinition.PointerToNextFunction);
-      W.OS.write_zeros(sizeof(i.Aux.FunctionDefinition.unused));
-      if (UseBigObj)
-        W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
-      break;
-    case ATbfAndefSymbol:
-      W.OS.write_zeros(sizeof(i.Aux.bfAndefSymbol.unused1));
-      W.write<uint16_t>(i.Aux.bfAndefSymbol.Linenumber);
-      W.OS.write_zeros(sizeof(i.Aux.bfAndefSymbol.unused2));
-      W.write<uint32_t>(i.Aux.bfAndefSymbol.PointerToNextFunction);
-      W.OS.write_zeros(sizeof(i.Aux.bfAndefSymbol.unused3));
-      if (UseBigObj)
-        W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
-      break;
     case ATWeakExternal:
       W.write<uint32_t>(i.Aux.WeakExternal.TagIndex);
       W.write<uint32_t>(i.Aux.WeakExternal.Characteristics);
@@ -568,10 +558,9 @@ void WinCOFFObjectWriter::writeSectionHeaders() {
   std::vector<COFFSection *> Arr;
   for (auto &Section : Sections)
     Arr.push_back(Section.get());
-  llvm::sort(Arr.begin(), Arr.end(),
-             [](const COFFSection *A, const COFFSection *B) {
-               return A->Number < B->Number;
-             });
+  llvm::sort(Arr, [](const COFFSection *A, const COFFSection *B) {
+    return A->Number < B->Number;
+  });
 
   for (auto &Section : Arr) {
     if (Section->Number == -1)
@@ -630,14 +619,9 @@ void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
 
   // Write the section contents.
   if (Sec.Header.PointerToRawData != 0) {
-    assert(W.OS.tell() <= Sec.Header.PointerToRawData &&
+    assert(W.OS.tell() == Sec.Header.PointerToRawData &&
            "Section::PointerToRawData is insane!");
 
-    unsigned PaddingSize = Sec.Header.PointerToRawData - W.OS.tell();
-    assert(PaddingSize < 4 &&
-           "Should only need at most three bytes of padding!");
-    W.OS.write_zeros(PaddingSize);
-
     uint32_t CRC = writeSectionContents(Asm, Layout, MCSec);
 
     // Update the section definition auxiliary symbol to record the CRC.
@@ -677,6 +661,13 @@ void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
 
 void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
                                                    const MCAsmLayout &Layout) {
+  if (EmitAddrsigSection) {
+    AddrsigSection = Asm.getContext().getCOFFSection(
+        ".llvm_addrsig", COFF::IMAGE_SCN_LNK_REMOVE,
+        SectionKind::getMetadata());
+    Asm.registerSection(*AddrsigSection);
+  }
+
   // "Define" each section & symbol. This creates section & symbol
   // entries in the staging area.
   for (const auto &Section : Asm)
@@ -915,10 +906,7 @@ void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
     Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
 
     if (IsPhysicalSection(Sec)) {
-      // Align the section data to a four byte boundary.
-      Offset = alignTo(Offset, 4);
       Sec->Header.PointerToRawData = Offset;
-
       Offset += Sec->Header.SizeOfRawData;
     }
 
@@ -1020,22 +1008,47 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
       continue;
 
     const MCSectionCOFF &MCSec = *Section->MCSection;
+    const MCSymbol *AssocMCSym = MCSec.getCOMDATSymbol();
+    assert(AssocMCSym);
+
+    // It's an error to try to associate with an undefined symbol or a symbol
+    // without a section.
+    if (!AssocMCSym->isInSection()) {
+      Asm.getContext().reportError(
+          SMLoc(), Twine("cannot make section ") + MCSec.getSectionName() +
+                       Twine(" associative with sectionless symbol ") +
+                       AssocMCSym->getName());
+      continue;
+    }
 
-    const MCSymbol *COMDAT = MCSec.getCOMDATSymbol();
-    assert(COMDAT);
-    COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(COMDAT);
-    assert(COMDATSymbol);
-    COFFSection *Assoc = COMDATSymbol->Section;
-    if (!Assoc)
-      report_fatal_error(
-          Twine("Missing associated COMDAT section for section ") +
-          MCSec.getSectionName());
+    const auto *AssocMCSec = cast<MCSectionCOFF>(&AssocMCSym->getSection());
+    assert(SectionMap.count(AssocMCSec));
+    COFFSection *AssocSec = SectionMap[AssocMCSec];
 
     // Skip this section if the associated section is unused.
-    if (Assoc->Number == -1)
+    if (AssocSec->Number == -1)
       continue;
 
-    Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Assoc->Number;
+    Section->Symbol->Aux[0].Aux.SectionDefinition.Number = AssocSec->Number;
+  }
+
+  // Create the contents of the .llvm_addrsig section.
+  if (EmitAddrsigSection) {
+    auto Frag = new MCDataFragment(AddrsigSection);
+    Frag->setLayoutOrder(0);
+    raw_svector_ostream OS(Frag->getContents());
+    for (const MCSymbol *S : AddrsigSyms) {
+      if (!S->isTemporary()) {
+        encodeULEB128(S->getIndex(), OS);
+        continue;
+      }
+
+      MCSection *TargetSection = &S->getSection();
+      assert(SectionMap.find(TargetSection) != SectionMap.end() &&
+             "Section must already have been defined in "
+             "executePostLayoutBinding!");
+      encodeULEB128(SectionMap[TargetSection]->Symbol->getIndex(), OS);
+    }
   }
 
   assignFileOffsets(Asm, Layout);
diff --git a/contrib/llvm/lib/MCA/Context.cpp b/contrib/llvm/lib/MCA/Context.cpp
new file mode 100644
index 000000000000..c1b197dfe2e6
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Context.cpp
@@ -0,0 +1,65 @@
+//===---------------------------- Context.cpp -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units.  A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/RetireStage.h"
+
+namespace llvm {
+namespace mca {
+
+std::unique_ptr<Pipeline>
+Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
+                               SourceMgr &SrcMgr) {
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Create the hardware units defining the backend.
+  auto RCU = llvm::make_unique<RetireControlUnit>(SM);
+  auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+  auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+                                       Opts.StoreQueueSize, Opts.AssumeNoAlias);
+  auto HWS = llvm::make_unique<Scheduler>(SM, *LSU);
+
+  // Create the pipeline stages.
+  auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
+  auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
+                                                   *RCU, *PRF);
+  auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+  auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
+
+  // Pass the ownership of all the hardware units to this Context.
+  addHardwareUnit(std::move(RCU));
+  addHardwareUnit(std::move(PRF));
+  addHardwareUnit(std::move(LSU));
+  addHardwareUnit(std::move(HWS));
+
+  // Build the pipeline.
+  auto StagePipeline = llvm::make_unique<Pipeline>();
+  StagePipeline->appendStage(std::move(Fetch));
+  StagePipeline->appendStage(std::move(Dispatch));
+  StagePipeline->appendStage(std::move(Execute));
+  StagePipeline->appendStage(std::move(Retire));
+  return StagePipeline;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/HWEventListener.cpp b/contrib/llvm/lib/MCA/HWEventListener.cpp
index f27a04a9a980..4a0e5b1754dd 100644
--- a/contrib/llvm/tools/llvm-mca/HWEventListener.cpp
+++ b/contrib/llvm/lib/MCA/HWEventListener.cpp
@@ -12,10 +12,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "HWEventListener.h"
+#include "llvm/MCA/HWEventListener.h"
 
+namespace llvm {
 namespace mca {
 
 // Anchor the vtable here.
 void HWEventListener::anchor() {}
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/HardwareUnit.cpp b/contrib/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
index 103cde9afcc8..edd32b9c0c1a 100644
--- a/contrib/llvm/tools/llvm-mca/HardwareUnit.cpp
+++ b/contrib/llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -13,11 +13,13 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "HardwareUnit.h"
+#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
 
+namespace llvm {
 namespace mca {
 
 // Pin the vtable with this method.
 HardwareUnit::~HardwareUnit() = default;
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
new file mode 100644
index 000000000000..8895eb392b60
--- /dev/null
+++ b/contrib/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -0,0 +1,190 @@
+//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load-Store Unit for the llvm-mca tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
+#include "llvm/MCA/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+               bool AssumeNoAlias)
+    : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+  if (SM.hasExtraProcessorInfo()) {
+    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+    if (!LQ_Size && EPI.LoadQueueID) {
+      const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
+      LQ_Size = LdQDesc.BufferSize;
+    }
+
+    if (!SQ_Size && EPI.StoreQueueID) {
+      const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
+      SQ_Size = StQDesc.BufferSize;
+    }
+  }
+}
+
+#ifndef NDEBUG
+void LSUnit::dump() const {
+  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
+  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
+  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
+  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+}
+#endif
+
+void LSUnit::assignLQSlot(unsigned Index) {
+  assert(!isLQFull());
+  assert(LoadQueue.count(Index) == 0);
+
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
+                    << ",slot=" << LoadQueue.size() << ">\n");
+  LoadQueue.insert(Index);
+}
+
+void LSUnit::assignSQSlot(unsigned Index) {
+  assert(!isSQFull());
+  assert(StoreQueue.count(Index) == 0);
+
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
+                    << ",slot=" << StoreQueue.size() << ">\n");
+  StoreQueue.insert(Index);
+}
+
+void LSUnit::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  unsigned IsMemBarrier = Desc.HasSideEffects;
+  assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+
+  const unsigned Index = IR.getSourceIndex();
+  if (Desc.MayLoad) {
+    if (IsMemBarrier)
+      LoadBarriers.insert(Index);
+    assignLQSlot(Index);
+  }
+
+  if (Desc.MayStore) {
+    if (IsMemBarrier)
+      StoreBarriers.insert(Index);
+    assignSQSlot(Index);
+  }
+}
+
+LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (Desc.MayLoad && isLQFull())
+    return LSUnit::LSU_LQUEUE_FULL;
+  if (Desc.MayStore && isSQFull())
+    return LSUnit::LSU_SQUEUE_FULL;
+  return LSUnit::LSU_AVAILABLE;
+}
+
+bool LSUnit::isReady(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  const unsigned Index = IR.getSourceIndex();
+  bool IsALoad = Desc.MayLoad;
+  bool IsAStore = Desc.MayStore;
+  assert((IsALoad || IsAStore) && "Not a memory operation!");
+  assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
+  assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
+
+  if (IsALoad && !LoadBarriers.empty()) {
+    unsigned LoadBarrierIndex = *LoadBarriers.begin();
+    // A younger load cannot pass a older load barrier.
+    if (Index > LoadBarrierIndex)
+      return false;
+    // A load barrier cannot pass a older load.
+    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
+      return false;
+  }
+
+  if (IsAStore && !StoreBarriers.empty()) {
+    unsigned StoreBarrierIndex = *StoreBarriers.begin();
+    // A younger store cannot pass a older store barrier.
+    if (Index > StoreBarrierIndex)
+      return false;
+    // A store barrier cannot pass a older store.
+    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
+      return false;
+  }
+
+  // A load may not pass a previous store unless flag 'NoAlias' is set.
+  // A load may pass a previous load.
+  if (NoAlias && IsALoad)
+    return true;
+
+  if (StoreQueue.size()) {
+    // A load may not pass a previous store.
+    // A store may not pass a previous store.
+    if (Index > *StoreQueue.begin())
+      return false;
+  }
+
+  // Okay, we are older than the oldest store in the queue.
+  // If there are no pending loads, then we can say for sure that this
+  // instruction is ready.
+  if (isLQEmpty())
+    return true;
+
+  // Check if there are no older loads.
+  if (Index <= *LoadQueue.begin())
+    return true;
+
+  // There is at least one younger load.
+  //
+  // A store may not pass a previous load.
+  // A load may pass a previous load.
+  return !IsAStore;
+}
+
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  const unsigned Index = IR.getSourceIndex();
+  bool IsALoad = Desc.MayLoad;
+  bool IsAStore = Desc.MayStore;
+
+  if (IsALoad) {
+    if (LoadQueue.erase(Index)) {
+      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                        << " has been removed from the load queue.\n");
+    }
+    if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
+      LLVM_DEBUG(
+          dbgs() << "[LSUnit]: Instruction idx=" << Index
+                 << " has been removed from the set of load barriers.\n");
+      LoadBarriers.erase(Index);
+    }
+  }
+
+  if (IsAStore) {
+    if (StoreQueue.erase(Index)) {
+      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                        << " has been removed from the store queue.\n");
+    }
+
+    if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
+      LLVM_DEBUG(
+          dbgs() << "[LSUnit]: Instruction idx=" << Index
+                 << " has been removed from the set of store barriers.\n");
+      StoreBarriers.erase(Index);
+    }
+  }
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFile.cpp b/contrib/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 44de105b8996..22977e5ded65 100644
--- a/contrib/llvm/tools/llvm-mca/RegisterFile.cpp
+++ b/contrib/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -14,20 +14,20 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "RegisterFile.h"
-#include "Instruction.h"
+#include "llvm/MCA/HardwareUnits/RegisterFile.h"
+#include "llvm/MCA/Instruction.h"
 #include "llvm/Support/Debug.h"
 
-using namespace llvm;
-
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
-RegisterFile::RegisterFile(const llvm::MCSchedModel &SM,
-                           const llvm::MCRegisterInfo &mri, unsigned NumRegs)
-    : MRI(mri), RegisterMappings(mri.getNumRegs(),
-                                 {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) {
+RegisterFile::RegisterFile(const MCSchedModel &SM, const MCRegisterInfo &mri,
+                           unsigned NumRegs)
+    : MRI(mri),
+      RegisterMappings(mri.getNumRegs(), {WriteRef(), RegisterRenamingInfo()}),
+      ZeroRegisters(mri.getNumRegs(), false) {
   initialize(SM, NumRegs);
 }
 
@@ -36,7 +36,7 @@ void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
   // declared by the target. The number of physical registers in the default
   // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
   // means: this register file has an unbounded number of physical registers.
-  addRegisterFile({} /* all registers */, NumRegs);
+  RegisterFiles.emplace_back(NumRegs);
   if (!SM.hasExtraProcessorInfo())
     return;
 
@@ -44,30 +44,36 @@ void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
   // object. The size of every register file, as well as the mapping between
   // register files and register classes is specified via tablegen.
   const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
-  for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
+
+  // Skip invalid register file at index 0.
+  for (unsigned I = 1, E = Info.NumRegisterFiles; I < E; ++I) {
     const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
-    // Skip invalid register files with zero physical registers.
-    unsigned Length = RF.NumRegisterCostEntries;
-    if (!RF.NumPhysRegs)
-      continue;
+    assert(RF.NumPhysRegs && "Invalid PRF with zero physical registers!");
+
     // The cost of a register definition is equivalent to the number of
     // physical registers that are allocated at register renaming stage.
+    unsigned Length = RF.NumRegisterCostEntries;
     const MCRegisterCostEntry *FirstElt =
         &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
-    addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
-                    RF.NumPhysRegs);
+    addRegisterFile(RF, ArrayRef<MCRegisterCostEntry>(FirstElt, Length));
   }
 }
 
-void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
-                                   unsigned NumPhysRegs) {
+void RegisterFile::cycleStart() {
+  for (RegisterMappingTracker &RMT : RegisterFiles)
+    RMT.NumMoveEliminated = 0;
+}
+
+void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
+                                   ArrayRef<MCRegisterCostEntry> Entries) {
   // A default register file is always allocated at index #0. That register file
   // is mainly used to count the total number of mappings created by all
   // register files at runtime. Users can limit the number of available physical
   // registers in register file #0 through the command line flag
   // `-register-file-size`.
   unsigned RegisterFileIndex = RegisterFiles.size();
-  RegisterFiles.emplace_back(NumPhysRegs);
+  RegisterFiles.emplace_back(RF.NumPhysRegs, RF.MaxMovesEliminatedPerCycle,
+                             RF.AllowZeroMoveEliminationOnly);
 
   // Special case where there is no register class identifier in the set.
   // An empty set of register classes means: this register file contains all
@@ -93,6 +99,7 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
       }
       IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
       Entry.RenameAs = Reg;
+      Entry.AllowMoveElimination = RCE.AllowMoveElimination;
 
       // Assume the same cost for each sub-register.
       for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
@@ -139,8 +146,7 @@ void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
 }
 
 void RegisterFile::addRegisterWrite(WriteRef Write,
-                                    MutableArrayRef<unsigned> UsedPhysRegs,
-                                    bool ShouldAllocatePhysRegs) {
+                                    MutableArrayRef<unsigned> UsedPhysRegs) {
   WriteState &WS = *Write.getWriteState();
   unsigned RegID = WS.getRegisterID();
   assert(RegID && "Adding an invalid register definition?");
@@ -163,10 +169,15 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
   // a false dependency on RenameAs. The only exception is for when the write
   // implicitly clears the upper portion of the underlying register.
   // If a write clears its super-registers, then it is renamed as `RenameAs`.
+  bool IsWriteZero = WS.isWriteZero();
+  bool IsEliminated = WS.isEliminated();
+  bool ShouldAllocatePhysRegs = !IsWriteZero && !IsEliminated;
   const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  WS.setPRF(RRI.IndexPlusCost.first);
+
   if (RRI.RenameAs && RRI.RenameAs != RegID) {
     RegID = RRI.RenameAs;
-    const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+    WriteRef &OtherWrite = RegisterMappings[RegID].first;
 
     if (!WS.clearsSuperRegisters()) {
       // The processor keeps the definition of `RegID` together with register
@@ -174,34 +185,69 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
       // register is allocated.
       ShouldAllocatePhysRegs = false;
 
-      if (OtherWrite.getSourceIndex() != Write.getSourceIndex()) {
+      WriteState *OtherWS = OtherWrite.getWriteState();
+      if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
         // This partial write has a false dependency on RenameAs.
-        WS.setDependentWrite(OtherWrite.getWriteState());
+        assert(!IsEliminated && "Unexpected partial update!");
+        OtherWS->addUser(&WS);
       }
     }
   }
 
-  // Update the mapping for register RegID including its sub-registers.
-  RegisterMappings[RegID].first = Write;
-  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
-    RegisterMappings[*I].first = Write;
+  // Update zero registers.
+  unsigned ZeroRegisterID =
+      WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
+  if (IsWriteZero) {
+    ZeroRegisters.setBit(ZeroRegisterID);
+    for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+      ZeroRegisters.setBit(*I);
+  } else {
+    ZeroRegisters.clearBit(ZeroRegisterID);
+    for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
+      ZeroRegisters.clearBit(*I);
+  }
+
+  // If this is move has been eliminated, then the call to tryEliminateMove
+  // should have already updated all the register mappings.
+  if (!IsEliminated) {
+    // Update the mapping for register RegID including its sub-registers.
+    RegisterMappings[RegID].first = Write;
+    RegisterMappings[RegID].second.AliasRegID = 0U;
+    for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+      RegisterMappings[*I].first = Write;
+      RegisterMappings[*I].second.AliasRegID = 0U;
+    }
 
-  // No physical registers are allocated for instructions that are optimized in
-  // hardware. For example, zero-latency data-dependency breaking instructions
-  // don't consume physical registers.
-  if (ShouldAllocatePhysRegs)
-    allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+    // No physical registers are allocated for instructions that are optimized
+    // in hardware. For example, zero-latency data-dependency breaking
+    // instructions don't consume physical registers.
+    if (ShouldAllocatePhysRegs)
+      allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+  }
 
   if (!WS.clearsSuperRegisters())
     return;
 
-  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
-    RegisterMappings[*I].first = Write;
+  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    if (!IsEliminated) {
+      RegisterMappings[*I].first = Write;
+      RegisterMappings[*I].second.AliasRegID = 0U;
+    }
+
+    if (IsWriteZero)
+      ZeroRegisters.setBit(*I);
+    else
+      ZeroRegisters.clearBit(*I);
+  }
 }
 
-void RegisterFile::removeRegisterWrite(const WriteState &WS,
-                                       MutableArrayRef<unsigned> FreedPhysRegs,
-                                       bool ShouldFreePhysRegs) {
+void RegisterFile::removeRegisterWrite(
+    const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
+  // Early exit if this write was eliminated. A write eliminated at register
+  // renaming stage generates an alias, and it is not added to the PRF.
+  if (WS.isEliminated())
+    return;
+
   unsigned RegID = WS.getRegisterID();
 
   assert(RegID != 0 && "Invalidating an already invalid register?");
@@ -209,6 +255,7 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
          "Invalidating a write of unknown cycles!");
   assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
 
+  bool ShouldFreePhysRegs = !WS.isWriteZero();
   unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
   if (RenameAs && RenameAs != RegID) {
     RegID = RenameAs;
@@ -242,11 +289,88 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
   }
 }
 
-void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
-                                 unsigned RegID) const {
+bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
+  const RegisterMapping &RMFrom = RegisterMappings[RS.getRegisterID()];
+  const RegisterMapping &RMTo = RegisterMappings[WS.getRegisterID()];
+
+  // From and To must be owned by the same PRF.
+  const RegisterRenamingInfo &RRIFrom = RMFrom.second;
+  const RegisterRenamingInfo &RRITo = RMTo.second;
+  unsigned RegisterFileIndex = RRIFrom.IndexPlusCost.first;
+  if (RegisterFileIndex != RRITo.IndexPlusCost.first)
+    return false;
+
+  // We only allow move elimination for writes that update a full physical
+  // register. On X86, move elimination is possible with 32-bit general purpose
+  // registers because writes to those registers are not partial writes.  If a
+  // register move is a partial write, then we conservatively assume that move
+  // elimination fails, since it would either trigger a partial update, or the
+  // issue of a merge opcode.
+  //
+  // Note that this constraint may be lifted in future.  For example, we could
+  // make this model more flexible, and let users customize the set of registers
+  // (i.e. register classes) that allow move elimination.
+  //
+  // For now, we assume that there is a strong correlation between registers
+  // that allow move elimination, and how those same registers are renamed in
+  // hardware.
+  if (RRITo.RenameAs && RRITo.RenameAs != WS.getRegisterID()) {
+    // Early exit if the PRF doesn't support move elimination for this register.
+    if (!RegisterMappings[RRITo.RenameAs].second.AllowMoveElimination)
+      return false;
+    if (!WS.clearsSuperRegisters())
+      return false;
+  }
+
+  RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+  if (RMT.MaxMoveEliminatedPerCycle &&
+      RMT.NumMoveEliminated == RMT.MaxMoveEliminatedPerCycle)
+    return false;
+
+  bool IsZeroMove = ZeroRegisters[RS.getRegisterID()];
+  if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
+    return false;
+
+  MCPhysReg FromReg = RS.getRegisterID();
+  MCPhysReg ToReg = WS.getRegisterID();
+
+  // Construct an alias.
+  MCPhysReg AliasReg = FromReg;
+  if (RRIFrom.RenameAs)
+    AliasReg = RRIFrom.RenameAs;
+
+  const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
+  if (RMAlias.AliasRegID)
+    AliasReg = RMAlias.AliasRegID;
+
+  if (AliasReg != ToReg) {
+    RegisterMappings[ToReg].second.AliasRegID = AliasReg;
+    for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
+      RegisterMappings[*I].second.AliasRegID = AliasReg;
+  }
+
+  RMT.NumMoveEliminated++;
+  if (IsZeroMove) {
+    WS.setWriteZero();
+    RS.setReadZero();
+  }
+  WS.setEliminated();
+
+  return true;
+}
+
+void RegisterFile::collectWrites(const ReadState &RS,
+                                 SmallVectorImpl<WriteRef> &Writes) const {
+  unsigned RegID = RS.getRegisterID();
   assert(RegID && RegID < RegisterMappings.size());
   LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
                     << MRI.getName(RegID) << '\n');
+
+  // Check if this is an alias.
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  if (RRI.AliasRegID)
+    RegID = RRI.AliasRegID;
+
   const WriteRef &WR = RegisterMappings[RegID].first;
   if (WR.isValid())
     Writes.push_back(WR);
@@ -259,23 +383,38 @@ void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
   }
 
   // Remove duplicate entries and resize the input vector.
-  llvm::sort(Writes.begin(), Writes.end(),
-             [](const WriteRef &Lhs, const WriteRef &Rhs) {
-               return Lhs.getWriteState() < Rhs.getWriteState();
-             });
-  auto It = std::unique(Writes.begin(), Writes.end());
-  Writes.resize(std::distance(Writes.begin(), It));
+  if (Writes.size() > 1) {
+    sort(Writes, [](const WriteRef &Lhs, const WriteRef &Rhs) {
+      return Lhs.getWriteState() < Rhs.getWriteState();
+    });
+    auto It = std::unique(Writes.begin(), Writes.end());
+    Writes.resize(std::distance(Writes.begin(), It));
+  }
 
   LLVM_DEBUG({
     for (const WriteRef &WR : Writes) {
       const WriteState &WS = *WR.getWriteState();
       dbgs() << "[PRF] Found a dependent use of Register "
-             << MRI.getName(WS.getRegisterID()) << " (defined by intruction #"
+             << MRI.getName(WS.getRegisterID()) << " (defined by instruction #"
              << WR.getSourceIndex() << ")\n";
     }
   });
 }
 
+void RegisterFile::addRegisterRead(ReadState &RS,
+                                   SmallVectorImpl<WriteRef> &Defs) const {
+  unsigned RegID = RS.getRegisterID();
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  RS.setPRF(RRI.IndexPlusCost.first);
+  if (RS.isIndependentFromDef())
+    return;
+
+  if (ZeroRegisters[RS.getRegisterID()])
+    RS.setReadZero();
+  collectWrites(RS, Defs);
+  RS.setDependentWrites(Defs.size());
+}
+
 unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
   SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
 
@@ -306,8 +445,14 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
       // microarchitectural registers in register file #0 was changed by the
       // users via flag -reg-file-size. Alternatively, the scheduling model
       // specified a too small number of registers for this register file.
-      report_fatal_error(
-          "Not enough microarchitectural registers in the register file");
+      LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
+
+      // FIXME: Normalize the instruction register count to match the
+      // NumPhysRegs value.  This is a highly unusual case, and is not expected
+      // to occur.  This normalization is hiding an inconsistency in either the
+      // scheduling model or in the value that the user might have specified
+      // for NumPhysRegs.
+      NumRegs = RMT.NumPhysRegs;
     }
 
     if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
@@ -321,14 +466,16 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
 void RegisterFile::dump() const {
   for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
     const RegisterMapping &RM = RegisterMappings[I];
-    if (!RM.first.getWriteState())
-      continue;
     const RegisterRenamingInfo &RRI = RM.second;
-    dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first
-           << ", Cost=" << RRI.IndexPlusCost.second
-           << ", RenameAs=" << RRI.RenameAs << ", ";
-    RM.first.dump();
-    dbgs() << '\n';
+    if (ZeroRegisters[I]) {
+      dbgs() << MRI.getName(I) << ", " << I
+             << ", PRF=" << RRI.IndexPlusCost.first
+             << ", Cost=" << RRI.IndexPlusCost.second
+             << ", RenameAs=" << RRI.RenameAs << ", IsZero=" << ZeroRegisters[I]
+             << ",";
+      RM.first.dump();
+      dbgs() << '\n';
+    }
   }
 
   for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
@@ -341,3 +488,4 @@ void RegisterFile::dump() const {
 #endif
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
new file mode 100644
index 000000000000..2039b58e8ee5
--- /dev/null
+++ b/contrib/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -0,0 +1,331 @@
+//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy.  These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/ResourceManager.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+ResourceStrategy::~ResourceStrategy() = default;
+
+// Returns the index of the highest bit set. For resource masks, the position of
+// the highest bit set can be used to construct a resource mask identifier.
+static unsigned getResourceStateIndex(uint64_t Mask) {
+  return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask);
+}
+
+static uint64_t selectImpl(uint64_t CandidateMask,
+                           uint64_t &NextInSequenceMask) {
+  // The upper bit set in CandidateMask identifies our next candidate resource.
+  CandidateMask = 1ULL << (getResourceStateIndex(CandidateMask) - 1);
+  NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
+  return CandidateMask;
+}
+
+uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
+  // This method assumes that ReadyMask cannot be zero.
+  uint64_t CandidateMask = ReadyMask & NextInSequenceMask;
+  if (CandidateMask)
+    return selectImpl(CandidateMask, NextInSequenceMask);
+
+  NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+  RemovedFromNextInSequence = 0;
+  CandidateMask = ReadyMask & NextInSequenceMask;
+  if (CandidateMask)
+    return selectImpl(CandidateMask, NextInSequenceMask);
+
+  NextInSequenceMask = ResourceUnitMask;
+  CandidateMask = ReadyMask & NextInSequenceMask;
+  return selectImpl(CandidateMask, NextInSequenceMask);
+}
+
+void DefaultResourceStrategy::used(uint64_t Mask) {
+  if (Mask > NextInSequenceMask) {
+    RemovedFromNextInSequence |= Mask;
+    return;
+  }
+
+  NextInSequenceMask &= (~Mask);
+  if (NextInSequenceMask)
+    return;
+
+  NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+  RemovedFromNextInSequence = 0;
+}
+
+ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
+                             uint64_t Mask)
+    : ProcResourceDescIndex(Index), ResourceMask(Mask),
+      BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) {
+  if (IsAGroup) {
+    ResourceSizeMask =
+        ResourceMask ^ 1ULL << (getResourceStateIndex(ResourceMask) - 1);
+  } else {
+    ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
+  }
+  ReadyMask = ResourceSizeMask;
+  AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
+  Unavailable = false;
+}
+
+bool ResourceState::isReady(unsigned NumUnits) const {
+  return (!isReserved() || isADispatchHazard()) &&
+         countPopulation(ReadyMask) >= NumUnits;
+}
+
+ResourceStateEvent ResourceState::isBufferAvailable() const {
+  if (isADispatchHazard() && isReserved())
+    return RS_RESERVED;
+  if (!isBuffered() || AvailableSlots)
+    return RS_BUFFER_AVAILABLE;
+  return RS_BUFFER_UNAVAILABLE;
+}
+
+#ifndef NDEBUG
+void ResourceState::dump() const {
+  dbgs() << "MASK=" << format_hex(ResourceMask, 16)
+         << ", SZMASK=" << format_hex(ResourceSizeMask, 16)
+         << ", RDYMASK=" << format_hex(ReadyMask, 16)
+         << ", BufferSize=" << BufferSize
+         << ", AvailableSlots=" << AvailableSlots
+         << ", Reserved=" << Unavailable << '\n';
+}
+#endif
+
+static std::unique_ptr<ResourceStrategy>
+getStrategyFor(const ResourceState &RS) {
+  if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
+    return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+  return std::unique_ptr<ResourceStrategy>(nullptr);
+}
+
+ResourceManager::ResourceManager(const MCSchedModel &SM)
+    : Resources(SM.getNumProcResourceKinds()),
+      Strategies(SM.getNumProcResourceKinds()),
+      Resource2Groups(SM.getNumProcResourceKinds(), 0),
+      ProcResID2Mask(SM.getNumProcResourceKinds()) {
+  computeProcResourceMasks(SM, ProcResID2Mask);
+
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    uint64_t Mask = ProcResID2Mask[I];
+    unsigned Index = getResourceStateIndex(Mask);
+    Resources[Index] =
+        llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+    Strategies[Index] = getStrategyFor(*Resources[Index]);
+  }
+
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    uint64_t Mask = ProcResID2Mask[I];
+    unsigned Index = getResourceStateIndex(Mask);
+    const ResourceState &RS = *Resources[Index];
+    if (!RS.isAResourceGroup())
+      continue;
+
+    uint64_t GroupMaskIdx = 1ULL << (Index - 1);
+    Mask -= GroupMaskIdx;
+    while (Mask) {
+      // Extract lowest set isolated bit.
+      uint64_t Unit = Mask & (-Mask);
+      unsigned IndexUnit = getResourceStateIndex(Unit);
+      Resource2Groups[IndexUnit] |= GroupMaskIdx;
+      Mask ^= Unit;
+    }
+  }
+}
+
+void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+                                            uint64_t ResourceMask) {
+  unsigned Index = getResourceStateIndex(ResourceMask);
+  assert(Index < Resources.size() && "Invalid processor resource index!");
+  assert(S && "Unexpected null strategy in input!");
+  Strategies[Index] = std::move(S);
+}
+
+unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
+  return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+}
+
+unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
+  return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
+}
+
+// Returns the actual resource consumed by this Use.
+// First, is the primary resource ID.
+// Second, is the specific sub-resource ID.
+ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
+  unsigned Index = getResourceStateIndex(ResourceID);
+  assert(Index < Resources.size() && "Invalid resource use!");
+  ResourceState &RS = *Resources[Index];
+  assert(RS.isReady() && "No available units to select!");
+
+  // Special case where RS is not a group, and it only declares a single
+  // resource unit.
+  if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
+    return std::make_pair(ResourceID, RS.getReadyMask());
+
+  uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
+  if (RS.isAResourceGroup())
+    return selectPipe(SubResourceID);
+  return std::make_pair(ResourceID, SubResourceID);
+}
+
+void ResourceManager::use(const ResourceRef &RR) {
+  // Mark the sub-resource referenced by RR as used.
+  unsigned RSID = getResourceStateIndex(RR.first);
+  ResourceState &RS = *Resources[RSID];
+  RS.markSubResourceAsUsed(RR.second);
+  // Remember to update the resource strategy for non-group resources with
+  // multiple units.
+  if (RS.getNumUnits() > 1)
+    Strategies[RSID]->used(RR.second);
+
+  // If there are still available units in RR.first,
+  // then we are done.
+  if (RS.isReady())
+    return;
+
+  // Notify groups that RR.first is no longer available.
+  uint64_t Users = Resource2Groups[RSID];
+  while (Users) {
+    // Extract lowest set isolated bit.
+    unsigned GroupIndex = getResourceStateIndex(Users & (-Users));
+    ResourceState &CurrentUser = *Resources[GroupIndex];
+    CurrentUser.markSubResourceAsUsed(RR.first);
+    Strategies[GroupIndex]->used(RR.first);
+    // Reset lowest set bit.
+    Users &= Users - 1;
+  }
+}
+
+void ResourceManager::release(const ResourceRef &RR) {
+  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+  bool WasFullyUsed = !RS.isReady();
+  RS.releaseSubResource(RR.second);
+  if (!WasFullyUsed)
+    return;
+
+  for (std::unique_ptr<ResourceState> &Res : Resources) {
+    ResourceState &Current = *Res;
+    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+      continue;
+
+    if (Current.containsResource(RR.first))
+      Current.releaseSubResource(RR.first);
+  }
+}
+
+ResourceStateEvent
+ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
+  ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
+  for (uint64_t Buffer : Buffers) {
+    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+    Result = RS.isBufferAvailable();
+    if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
+      break;
+  }
+  return Result;
+}
+
+void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
+  for (const uint64_t Buffer : Buffers) {
+    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+    assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
+    RS.reserveBuffer();
+
+    if (RS.isADispatchHazard()) {
+      assert(!RS.isReserved());
+      RS.setReserved();
+    }
+  }
+}
+
+void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
+  for (const uint64_t R : Buffers)
+    Resources[getResourceStateIndex(R)]->releaseBuffer();
+}
+
+bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
+  return all_of(
+      Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) {
+        unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+        unsigned Index = getResourceStateIndex(E.first);
+        return Resources[Index]->isReady(NumUnits);
+      });
+}
+
+void ResourceManager::issueInstruction(
+    const InstrDesc &Desc,
+    SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
+  for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
+    const CycleSegment &CS = R.second.CS;
+    if (!CS.size()) {
+      releaseResource(R.first);
+      continue;
+    }
+
+    assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+    if (!R.second.isReserved()) {
+      ResourceRef Pipe = selectPipe(R.first);
+      use(Pipe);
+      BusyResources[Pipe] += CS.size();
+      Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
+          Pipe, ResourceCycles(CS.size())));
+    } else {
+      assert((countPopulation(R.first) > 1) && "Expected a group!");
+      // Mark this group as reserved.
+      assert(R.second.isReserved());
+      reserveResource(R.first);
+      BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+    }
+  }
+}
+
+void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
+  for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
+    if (BR.second)
+      BR.second--;
+    if (!BR.second) {
+      // Release this resource.
+      const ResourceRef &RR = BR.first;
+
+      if (countPopulation(RR.first) == 1)
+        release(RR);
+
+      releaseResource(RR.first);
+      ResourcesFreed.push_back(RR);
+    }
+  }
+
+  for (const ResourceRef &RF : ResourcesFreed)
+    BusyResources.erase(RF);
+}
+
+void ResourceManager::reserveResource(uint64_t ResourceID) {
+  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  assert(!Resource.isReserved());
+  Resource.setReserved();
+}
+
+void ResourceManager::releaseResource(uint64_t ResourceID) {
+  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  Resource.clearReserved();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnit.cpp b/contrib/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
index 123058541f28..de9f24552c38 100644
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnit.cpp
+++ b/contrib/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -12,16 +12,15 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "RetireControlUnit.h"
+#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
 #include "llvm/Support/Debug.h"
 
-using namespace llvm;
-
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
-RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM)
+RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
     : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
       AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
   // Check if the scheduling model provides extra information about the machine
@@ -41,10 +40,10 @@ RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM)
 // Reserves a number of slots, and returns a new token.
 unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
                                         unsigned NumMicroOps) {
-  assert(isAvailable(NumMicroOps));
+  assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!");
   unsigned NormalizedQuantity =
       std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
-  // Zero latency instructions may have zero mOps. Artificially bump this
+  // Zero latency instructions may have zero uOps. Artificially bump this
   // value to 1. Although zero latency instructions don't consume scheduler
   // resources, they still consume one slot in the retire queue.
   NormalizedQuantity = std::max(NormalizedQuantity, 1U);
@@ -61,9 +60,10 @@ const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
 }
 
 void RetireControlUnit::consumeCurrentToken() {
-  const RetireControlUnit::RUToken &Current = peekCurrentToken();
+  RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx];
   assert(Current.NumSlots && "Reserved zero slots?");
-  assert(Current.IR.isValid() && "Invalid RUToken in the RCU queue.");
+  assert(Current.IR && "Invalid RUToken in the RCU queue.");
+  Current.IR.getInstruction()->retire();
 
   // Update the slot index to be the next item in the circular queue.
   CurrentInstructionSlotIdx += Current.NumSlots;
@@ -73,7 +73,7 @@ void RetireControlUnit::consumeCurrentToken() {
 
 void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
   assert(Queue.size() > TokenID);
-  assert(Queue[TokenID].Executed == false && Queue[TokenID].IR.isValid());
+  assert(Queue[TokenID].Executed == false && Queue[TokenID].IR);
   Queue[TokenID].Executed = true;
 }
 
@@ -85,3 +85,4 @@ void RetireControlUnit::dump() const {
 #endif
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
new file mode 100644
index 000000000000..355ef79d06a6
--- /dev/null
+++ b/contrib/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -0,0 +1,247 @@
+//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A scheduler for processor resource units and processor resource groups.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
+  // Ensure we have a valid (non-null) strategy object.
+  Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+}
+
+// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
+SchedulerStrategy::~SchedulerStrategy() = default;
+DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
+
+#ifndef NDEBUG
+void Scheduler::dump() const {
+  dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+  dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+  dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
+  Resources->dump();
+}
+#endif
+
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
+  switch (Resources->canBeDispatched(Desc.Buffers)) {
+  case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
+    return Scheduler::SC_BUFFERS_FULL;
+  case ResourceStateEvent::RS_RESERVED:
+    return Scheduler::SC_DISPATCH_GROUP_STALL;
+  case ResourceStateEvent::RS_BUFFER_AVAILABLE:
+    break;
+  }
+
+  // Give lower priority to LSUnit stall events.
+  switch (LSU.isAvailable(IR)) {
+  case LSUnit::LSU_LQUEUE_FULL:
+    return Scheduler::SC_LOAD_QUEUE_FULL;
+  case LSUnit::LSU_SQUEUE_FULL:
+    return Scheduler::SC_STORE_QUEUE_FULL;
+  case LSUnit::LSU_AVAILABLE:
+    return Scheduler::SC_AVAILABLE;
+  }
+
+  llvm_unreachable("Don't know how to process this LSU state result!");
+}
+
+void Scheduler::issueInstructionImpl(
+    InstRef &IR,
+    SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
+  Instruction *IS = IR.getInstruction();
+  const InstrDesc &D = IS->getDesc();
+
+  // Issue the instruction and collect all the consumed resources
+  // into a vector. That vector is then used to notify the listener.
+  Resources->issueInstruction(D, UsedResources);
+
+  // Notify the instruction that it started executing.
+  // This updates the internal state of each write.
+  IS->execute();
+
+  if (IS->isExecuting())
+    IssuedSet.emplace_back(IR);
+  else if (IS->isExecuted())
+    LSU.onInstructionExecuted(IR);
+}
+
+// Release the buffered resources and issue the instruction.
+void Scheduler::issueInstruction(
+    InstRef &IR,
+    SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+    SmallVectorImpl<InstRef> &ReadyInstructions) {
+  const Instruction &Inst = *IR.getInstruction();
+  bool HasDependentUsers = Inst.hasDependentUsers();
+
+  Resources->releaseBuffers(Inst.getDesc().Buffers);
+  issueInstructionImpl(IR, UsedResources);
+  // Instructions that have been issued during this cycle might have unblocked
+  // other dependent instructions. Dependent instructions may be issued during
+  // this same cycle if operands have ReadAdvance entries.  Promote those
+  // instructions to the ReadySet and notify the caller that those are ready.
+  if (HasDependentUsers)
+    promoteToReadySet(ReadyInstructions);
+}
+
+void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+  // Scan the set of waiting instructions and promote them to the
+  // ready queue if operands are all ready.
+  unsigned RemovedElements = 0;
+  for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR)
+      break;
+
+    // Check if this instruction is now ready. In case, force
+    // a transition in state using method 'update()'.
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isReady())
+      IS.update();
+
+    // Check if there are still unsolved data dependencies.
+    if (!isReady(IR)) {
+      ++I;
+      continue;
+    }
+
+    Ready.emplace_back(IR);
+    ReadySet.emplace_back(IR);
+
+    IR.invalidate();
+    ++RemovedElements;
+    std::iter_swap(I, E - RemovedElements);
+  }
+
+  WaitSet.resize(WaitSet.size() - RemovedElements);
+}
+
+InstRef Scheduler::select() {
+  unsigned QueueIndex = ReadySet.size();
+  for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+    const InstRef &IR = ReadySet[I];
+    if (QueueIndex == ReadySet.size() ||
+        Strategy->compare(IR, ReadySet[QueueIndex])) {
+      const InstrDesc &D = IR.getInstruction()->getDesc();
+      if (Resources->canBeIssued(D))
+        QueueIndex = I;
+    }
+  }
+
+  if (QueueIndex == ReadySet.size())
+    return InstRef();
+
+  // We found an instruction to issue.
+  InstRef IR = ReadySet[QueueIndex];
+  std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+  ReadySet.pop_back();
+  return IR;
+}
+
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+  unsigned RemovedElements = 0;
+  for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR)
+      break;
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isExecuted()) {
+      LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+                        << " is still executing.\n");
+      ++I;
+      continue;
+    }
+
+    // Instruction IR has completed execution.
+    LSU.onInstructionExecuted(IR);
+    Executed.emplace_back(IR);
+    ++RemovedElements;
+    IR.invalidate();
+    std::iter_swap(I, E - RemovedElements);
+  }
+
+  IssuedSet.resize(IssuedSet.size() - RemovedElements);
+}
+
+void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+                           SmallVectorImpl<InstRef> &Executed,
+                           SmallVectorImpl<InstRef> &Ready) {
+  // Release consumed resources.
+  Resources->cycleEvent(Freed);
+
+  // Propagate the cycle event to the 'Issued' and 'Wait' sets.
+  for (InstRef &IR : IssuedSet)
+    IR.getInstruction()->cycleEvent();
+
+  updateIssuedSet(Executed);
+
+  for (InstRef &IR : WaitSet)
+    IR.getInstruction()->cycleEvent();
+
+  promoteToReadySet(Ready);
+}
+
+bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (Desc.isZeroLatency())
+    return true;
+  // Instructions that use an in-order dispatch/issue processor resource must be
+  // issued immediately to the pipeline(s). Any other in-order buffered
+  // resources (i.e. BufferSize=1) is consumed.
+  return Desc.MustIssueImmediately;
+}
+
+void Scheduler::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  Resources->reserveBuffers(Desc.Buffers);
+
+  // If necessary, reserve queue entries in the load-store unit (LSU).
+  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+  if (IsMemOp)
+    LSU.dispatch(IR);
+
+  if (!isReady(IR)) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+    WaitSet.push_back(IR);
+    return;
+  }
+
+  // Don't add a zero-latency instruction to the Ready queue.
+  // A zero-latency instruction doesn't consume any scheduler resources. That is
+  // because it doesn't need to be executed, and it is often removed at register
+  // renaming stage. For example, register-register moves are often optimized at
+  // register renaming stage by simply updating register aliases. On some
+  // targets, zero-idiom instructions (for example: a xor that clears the value
+  // of a register) are treated specially, and are often eliminated at register
+  // renaming stage.
+  if (!mustIssueImmediately(IR)) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+    ReadySet.push_back(IR);
+  }
+}
+
+bool Scheduler::isReady(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+  return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR));
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp b/contrib/llvm/lib/MCA/InstrBuilder.cpp
index 053b7b4e8175..d2d65e55537c 100644
--- a/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp
+++ b/contrib/llvm/lib/MCA/InstrBuilder.cpp
@@ -12,7 +12,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "InstrBuilder.h"
+#include "llvm/MCA/InstrBuilder.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/MC/MCInst.h"
@@ -22,9 +22,19 @@
 
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
+InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
+                           const llvm::MCInstrInfo &mcii,
+                           const llvm::MCRegisterInfo &mri,
+                           const llvm::MCInstrAnalysis *mcia)
+    : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
+      FirstReturnInst(true) {
+  const MCSchedModel &SM = STI.getSchedModel();
+  ProcResourceMasks.resize(SM.getNumProcResourceKinds());
+  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+}
 
 static void initializeUsedResources(InstrDesc &ID,
                                     const MCSchedClassDesc &SCDesc,
@@ -48,12 +58,23 @@ static void initializeUsedResources(InstrDesc &ID,
   // part of a "Super" resource. The key value is the "Super" resource mask ID.
   DenseMap<uint64_t, unsigned> SuperResources;
 
+  unsigned NumProcResources = SM.getNumProcResourceKinds();
+  APInt Buffers(NumProcResources, 0);
+
+  bool AllInOrderResources = true;
+  bool AnyDispatchHazards = false;
   for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
     const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
     const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
     uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
-    if (PR.BufferSize != -1)
-      ID.Buffers.push_back(Mask);
+    if (PR.BufferSize < 0) {
+      AllInOrderResources = false;
+    } else {
+      Buffers.setBit(PRE->ProcResourceIdx);
+      AnyDispatchHazards |= (PR.BufferSize == 0);
+      AllInOrderResources &= (PR.BufferSize <= 1);
+    }
+
     CycleSegment RCy(0, PRE->Cycles, false);
     Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
     if (PR.SuperIdx) {
@@ -62,18 +83,19 @@ static void initializeUsedResources(InstrDesc &ID,
     }
   }
 
+  ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
+
   // Sort elements by mask popcount, so that we prioritize resource units over
   // resource groups, and smaller groups over larger groups.
-  llvm::sort(Worklist.begin(), Worklist.end(),
-             [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
-               unsigned popcntA = countPopulation(A.first);
-               unsigned popcntB = countPopulation(B.first);
-               if (popcntA < popcntB)
-                 return true;
-               if (popcntA > popcntB)
-                 return false;
-               return A.first < B.first;
-             });
+  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+    unsigned popcntA = countPopulation(A.first);
+    unsigned popcntB = countPopulation(B.first);
+    if (popcntA < popcntB)
+      return true;
+    if (popcntA > popcntB)
+      return false;
+    return A.first < B.first;
+  });
 
   uint64_t UsedResourceUnits = 0;
 
@@ -99,7 +121,7 @@ static void initializeUsedResources(InstrDesc &ID,
     for (unsigned J = I + 1; J < E; ++J) {
       ResourcePlusCycles &B = Worklist[J];
       if ((NormalizedMask & B.first) == NormalizedMask) {
-        B.second.CS.Subtract(A.second.size() - SuperResources[A.first]);
+        B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
         if (countPopulation(B.first) > 1)
           B.second.NumUnits++;
       }
@@ -132,11 +154,36 @@ static void initializeUsedResources(InstrDesc &ID,
     }
   }
 
+  // Identify extra buffers that are consumed through super resources.
+  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
+    for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
+      const MCProcResourceDesc &PR = *SM.getProcResource(I);
+      if (PR.BufferSize == -1)
+        continue;
+
+      uint64_t Mask = ProcResourceMasks[I];
+      if (Mask != SR.first && ((Mask & SR.first) == SR.first))
+        Buffers.setBit(I);
+    }
+  }
+
+  // Now set the buffers.
+  if (unsigned NumBuffers = Buffers.countPopulation()) {
+    ID.Buffers.resize(NumBuffers);
+    for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
+      if (Buffers[I]) {
+        --NumBuffers;
+        ID.Buffers[NumBuffers] = ProcResourceMasks[I];
+      }
+    }
+  }
+
   LLVM_DEBUG({
     for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
-      dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
+      dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
+             << "cy=" << R.second.size() << '\n';
     for (const uint64_t R : ID.Buffers)
-      dbgs() << "\t\tBuffer Mask=" << R << '\n';
+      dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
   });
 }
 
@@ -155,33 +202,92 @@ static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
   ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
 }
 
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+  // Count register definitions, and skip non register operands in the process.
+  unsigned I, E;
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+    const MCOperand &Op = MCI.getOperand(I);
+    if (Op.isReg())
+      --NumExplicitDefs;
+  }
+
+  if (NumExplicitDefs) {
+    return make_error<InstructionError<MCInst>>(
+        "Expected more register operand definitions.", MCI);
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    // Always assume that the optional definition is the last operand.
+    const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+    if (I == MCI.getNumOperands() || !Op.isReg()) {
+      std::string Message =
+          "expected a register operand for an optional definition. Instruction "
+          "has not been correctly analyzed.";
+      return make_error<InstructionError<MCInst>>(Message, MCI);
+    }
+  }
+
+  return ErrorSuccess();
+}
+
 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
                                   unsigned SchedClassID) {
   const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
   const MCSchedModel &SM = STI.getSchedModel();
   const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
 
-  // These are for now the (strong) assumptions made by this algorithm:
-  //  * The number of explicit and implicit register definitions in a MCInst
-  //    matches the number of explicit and implicit definitions according to
-  //    the opcode descriptor (MCInstrDesc).
-  //  * Register definitions take precedence over register uses in the operands
-  //    list.
-  //  * If an opcode specifies an optional definition, then the optional
-  //    definition is always the last operand in the sequence, and it can be
-  //    set to zero (i.e. "no register").
+  // Assumptions made by this algorithm:
+  //  1. The number of explicit and implicit register definitions in a MCInst
+  //     matches the number of explicit and implicit definitions according to
+  //     the opcode descriptor (MCInstrDesc).
+  //  2. Uses start at index #(MCDesc.getNumDefs()).
+  //  3. There can only be a single optional register definition, an it is
+  //     always the last operand of the sequence (excluding extra operands
+  //     contributed by variadic opcodes).
   //
   // These assumptions work quite well for most out-of-order in-tree targets
   // like x86. This is mainly because the vast majority of instructions is
   // expanded to MCInst using a straightforward lowering logic that preserves
   // the ordering of the operands.
+  //
+  // About assumption 1.
+  // The algorithm allows non-register operands between register operand
+  // definitions. This helps to handle some special ARM instructions with
+  // implicit operand increment (-mtriple=armv7):
+  //
+  // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed
+  //                             @  <MCOperand Reg:59>
+  //                             @  <MCOperand Imm:0>     (!!)
+  //                             @  <MCOperand Reg:67>
+  //                             @  <MCOperand Imm:0>
+  //                             @  <MCOperand Imm:14>
+  //                             @  <MCOperand Reg:0>>
+  //
+  // MCDesc reports:
+  //  6 explicit operands.
+  //  1 optional definition
+  //  2 explicit definitions (!!)
+  //
+  // The presence of an 'Imm' operand between the two register definitions
+  // breaks the assumption that "register definitions are always at the
+  // beginning of the operand sequence".
+  //
+  // To workaround this issue, this algorithm ignores (i.e. skips) any
+  // non-register operands between register definitions.  The optional
+  // definition is still at index #(NumOperands-1).
+  //
+  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+  // That means, register R1 from the example is both read and written.
   unsigned NumExplicitDefs = MCDesc.getNumDefs();
   unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
   unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
   unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
   if (MCDesc.hasOptionalDef())
     TotalDefs++;
-  ID.Writes.resize(TotalDefs);
+
+  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+  ID.Writes.resize(TotalDefs + NumVariadicOps);
   // Iterate over the operands list, and skip non-register operands.
   // The first NumExplictDefs register operands are expected to be register
   // definitions.
@@ -208,18 +314,15 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
     }
     Write.IsOptionalDef = false;
     LLVM_DEBUG({
-      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+      dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex
              << ", Latency=" << Write.Latency
              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
     });
     CurrentDef++;
   }
 
-  if (CurrentDef != NumExplicitDefs)
-    llvm::report_fatal_error(
-        "error: Expected more register operand definitions. ");
-
-  CurrentDef = 0;
+  assert(CurrentDef == NumExplicitDefs &&
+         "Expected more register operand definitions.");
   for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
     unsigned Index = NumExplicitDefs + CurrentDef;
     WriteDescriptor &Write = ID.Writes[Index];
@@ -241,7 +344,7 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
     Write.IsOptionalDef = false;
     assert(Write.RegisterID != 0 && "Expected a valid phys register!");
     LLVM_DEBUG({
-      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+      dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
              << ", PhysReg=" << MRI.getName(Write.RegisterID)
              << ", Latency=" << Write.Latency
              << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
@@ -249,73 +352,149 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
   }
 
   if (MCDesc.hasOptionalDef()) {
-    // Always assume that the optional definition is the last operand of the
-    // MCInst sequence.
-    const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
-    if (i == MCI.getNumOperands() || !Op.isReg())
-      llvm::report_fatal_error(
-          "error: expected a register operand for an optional "
-          "definition. Instruction has not be correctly analyzed.\n",
-          false);
-
-    WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
-    Write.OpIndex = MCI.getNumOperands() - 1;
+    WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+    Write.OpIndex = MCDesc.getNumOperands() - 1;
     // Assign a default latency for this write.
     Write.Latency = ID.MaxLatency;
     Write.SClassOrWriteResourceID = 0;
     Write.IsOptionalDef = true;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
   }
+
+  if (!NumVariadicOps)
+    return;
+
+  // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
+  // "unmodeledSideEffects', then this logic optimistically assumes that any
+  // extra register operands in the variadic sequence is not a register
+  // definition.
+  //
+  // Otherwise, we conservatively assume that any register operand from the
+  // variadic sequence is both a register read and a register write.
+  bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
+                        !MCDesc.hasUnmodeledSideEffects();
+  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
+  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+       I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    WriteDescriptor &Write = ID.Writes[CurrentDef];
+    Write.OpIndex = OpIndex;
+    // Assign a default latency for this write.
+    Write.Latency = ID.MaxLatency;
+    Write.SClassOrWriteResourceID = 0;
+    Write.IsOptionalDef = false;
+    ++CurrentDef;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  ID.Writes.resize(CurrentDef);
 }
 
 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
                                  unsigned SchedClassID) {
   const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
-  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
+  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+  // Remove the optional definition.
+  if (MCDesc.hasOptionalDef())
+    --NumExplicitUses;
+  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
+  ID.Reads.resize(TotalUses);
+  unsigned CurrentUse = 0;
+  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
+       ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
 
-  // Skip explicit definitions.
-  unsigned i = 0;
-  for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
-    const MCOperand &Op = MCI.getOperand(i);
-    if (Op.isReg())
-      NumExplicitDefs--;
+    ReadDescriptor &Read = ID.Reads[CurrentUse];
+    Read.OpIndex = OpIndex;
+    Read.UseIndex = I;
+    Read.SchedClassID = SchedClassID;
+    ++CurrentUse;
+    LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << '\n');
   }
 
-  if (NumExplicitDefs)
-    llvm::report_fatal_error(
-        "error: Expected more register operand definitions. ", false);
-
-  unsigned NumExplicitUses = MCI.getNumOperands() - i;
-  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
-  if (MCDesc.hasOptionalDef()) {
-    assert(NumExplicitUses);
-    NumExplicitUses--;
+  // For the purpose of ReadAdvance, implicit uses come directly after explicit
+  // uses. The "UseIndex" must be updated according to that implicit layout.
+  for (unsigned I = 0; I < NumImplicitUses; ++I) {
+    ReadDescriptor &Read = ID.Reads[CurrentUse + I];
+    Read.OpIndex = ~I;
+    Read.UseIndex = NumExplicitUses + I;
+    Read.RegisterID = MCDesc.getImplicitUses()[I];
+    Read.SchedClassID = SchedClassID;
+    LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << ", RegisterID="
+                      << MRI.getName(Read.RegisterID) << '\n');
   }
-  unsigned TotalUses = NumExplicitUses + NumImplicitUses;
-  if (!TotalUses)
-    return;
 
-  ID.Reads.resize(TotalUses);
-  for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
+  CurrentUse += NumImplicitUses;
+
+  // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
+  // "unmodeledSideEffects", then this logic optimistically assumes that any
+  // extra register operands in the variadic sequence are not register
+  // definition.
+
+  bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
+                        !MCDesc.hasUnmodeledSideEffects();
+  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+       I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
     ReadDescriptor &Read = ID.Reads[CurrentUse];
-    Read.OpIndex = i + CurrentUse;
-    Read.UseIndex = CurrentUse;
+    Read.OpIndex = OpIndex;
+    Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
     Read.SchedClassID = SchedClassID;
-    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+    ++CurrentUse;
+    LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
                       << ", UseIndex=" << Read.UseIndex << '\n');
   }
 
-  for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
-    ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
-    Read.OpIndex = ~CurrentUse;
-    Read.UseIndex = NumExplicitUses + CurrentUse;
-    Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
-    Read.SchedClassID = SchedClassID;
-    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
-                      << MRI.getName(Read.RegisterID) << '\n');
+  ID.Reads.resize(CurrentUse);
+}
+
+Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
+                                    const MCInst &MCI) const {
+  if (ID.NumMicroOps != 0)
+    return ErrorSuccess();
+
+  bool UsesMemory = ID.MayLoad || ID.MayStore;
+  bool UsesBuffers = !ID.Buffers.empty();
+  bool UsesResources = !ID.Resources.empty();
+  if (!UsesMemory && !UsesBuffers && !UsesResources)
+    return ErrorSuccess();
+
+  StringRef Message;
+  if (UsesMemory) {
+    Message = "found an inconsistent instruction that decodes "
+              "into zero opcodes and that consumes load/store "
+              "unit resources.";
+  } else {
+    Message = "found an inconsistent instruction that decodes "
+              "to zero opcodes and that consumes scheduler "
+              "resources.";
   }
+
+  return make_error<InstructionError<MCInst>>(Message, MCI);
 }
 
-const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
   assert(STI.getSchedModel().hasInstrSchedModel() &&
          "Itineraries are not yet supported!");
 
@@ -326,64 +505,76 @@ const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
 
   // Then obtain the scheduling class information from the instruction.
   unsigned SchedClassID = MCDesc.getSchedClass();
-  unsigned CPUID = SM.getProcessorID();
+  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
 
   // Try to solve variant scheduling classes.
-  if (SchedClassID) {
+  if (IsVariant) {
+    unsigned CPUID = SM.getProcessorID();
     while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
       SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
 
-    if (!SchedClassID)
-      llvm::report_fatal_error("unable to resolve this variant class.");
+    if (!SchedClassID) {
+      return make_error<InstructionError<MCInst>>(
+          "unable to resolve scheduling class for write variant.", MCI);
+    }
   }
 
-  // Check if this instruction is supported. Otherwise, report a fatal error.
+  // Check if this instruction is supported. Otherwise, report an error.
   const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
   if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
-    std::string ToString;
-    llvm::raw_string_ostream OS(ToString);
-    WithColor::error() << "found an unsupported instruction in the input"
-                       << " assembly sequence.\n";
-    MCIP.printInst(&MCI, OS, "", STI);
-    OS.flush();
-
-    WithColor::note() << "instruction: " << ToString << '\n';
-    llvm::report_fatal_error(
-        "Don't know how to analyze unsupported instructions.");
+    return make_error<InstructionError<MCInst>>(
+        "found an unsupported instruction in the input assembly sequence.",
+        MCI);
   }
 
+  LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
+  LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
+
   // Create a new empty descriptor.
   std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
   ID->NumMicroOps = SCDesc.NumMicroOps;
 
-  if (MCDesc.isCall()) {
+  if (MCDesc.isCall() && FirstCallInst) {
     // We don't correctly model calls.
     WithColor::warning() << "found a call in the input assembly sequence.\n";
     WithColor::note() << "call instructions are not correctly modeled. "
                       << "Assume a latency of 100cy.\n";
+    FirstCallInst = false;
   }
 
-  if (MCDesc.isReturn()) {
+  if (MCDesc.isReturn() && FirstReturnInst) {
     WithColor::warning() << "found a return instruction in the input"
                          << " assembly sequence.\n";
     WithColor::note() << "program counter updates are ignored.\n";
+    FirstReturnInst = false;
   }
 
   ID->MayLoad = MCDesc.mayLoad();
   ID->MayStore = MCDesc.mayStore();
   ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+  ID->BeginGroup = SCDesc.BeginGroup;
+  ID->EndGroup = SCDesc.EndGroup;
 
   initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
   computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+
+  if (Error Err = verifyOperands(MCDesc, MCI))
+    return std::move(Err);
+
   populateWrites(*ID, MCI, SchedClassID);
   populateReads(*ID, MCI, SchedClassID);
 
   LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
   LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
 
+  // Sanity check on the instruction descriptor.
+  if (Error Err = verifyInstrDesc(*ID, MCI))
+    return std::move(Err);
+
   // Now add the new descriptor.
   SchedClassID = MCDesc.getSchedClass();
-  if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
+  bool IsVariadic = MCDesc.isVariadic();
+  if (!IsVariadic && !IsVariant) {
     Descriptors[MCI.getOpcode()] = std::move(ID);
     return *Descriptors[MCI.getOpcode()];
   }
@@ -392,7 +583,8 @@ const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
   return *VariantDescriptors[&MCI];
 }
 
-const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
   if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
     return *Descriptors[MCI.getOpcode()];
 
@@ -402,11 +594,28 @@ const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
   return createInstrDescImpl(MCI);
 }
 
-std::unique_ptr<Instruction>
+Expected<std::unique_ptr<Instruction>>
 InstrBuilder::createInstruction(const MCInst &MCI) {
-  const InstrDesc &D = getOrCreateInstrDesc(MCI);
+  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+  if (!DescOrErr)
+    return DescOrErr.takeError();
+  const InstrDesc &D = *DescOrErr;
   std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
 
+  // Check if this is a dependency breaking instruction.
+  APInt Mask;
+
+  bool IsZeroIdiom = false;
+  bool IsDepBreaking = false;
+  if (MCIA) {
+    unsigned ProcID = STI.getSchedModel().getProcessorID();
+    IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
+    IsDepBreaking =
+        IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
+    if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
+      NewIS->setOptimizableMove();
+  }
+
   // Initialize Reads first.
   for (const ReadDescriptor &RD : D.Reads) {
     int RegID = -1;
@@ -428,12 +637,33 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
 
     // Okay, this is a register operand. Create a ReadState for it.
     assert(RegID > 0 && "Invalid register ID found!");
-    NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
+    NewIS->getUses().emplace_back(RD, RegID);
+    ReadState &RS = NewIS->getUses().back();
+
+    if (IsDepBreaking) {
+      // A mask of all zeroes means: explicit input operands are not
+      // independent.
+      if (Mask.isNullValue()) {
+        if (!RD.isImplicitRead())
+          RS.setIndependentFromDef();
+      } else {
+        // Check if this register operand is independent according to `Mask`.
+        // Note that Mask may not have enough bits to describe all explicit and
+        // implicit input operands. If this register operand doesn't have a
+        // corresponding bit in Mask, then conservatively assume that it is
+        // dependent.
+        if (Mask.getBitWidth() > RD.UseIndex) {
+          // Okay. This map describe register use `RD.UseIndex`.
+          if (Mask[RD.UseIndex])
+            RS.setIndependentFromDef();
+        }
+      }
+    }
   }
 
   // Early exit if there are no writes.
   if (D.Writes.empty())
-    return NewIS;
+    return std::move(NewIS);
 
   // Track register writes that implicitly clear the upper portion of the
   // underlying super-registers using an APInt.
@@ -441,11 +671,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
 
   // Now query the MCInstrAnalysis object to obtain information about which
   // register writes implicitly clear the upper portion of a super-register.
-  MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
-
-  // Check if this is a dependency breaking instruction.
-  if (MCIA.isDependencyBreaking(STI, MCI))
-    NewIS->setDependencyBreaking();
+  if (MCIA)
+    MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
 
   // Initialize writes.
   unsigned WriteIndex = 0;
@@ -459,11 +686,13 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
     }
 
     assert(RegID && "Expected a valid register ID!");
-    NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
-        WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex]));
+    NewIS->getDefs().emplace_back(WD, RegID,
+                                  /* ClearsSuperRegs */ WriteMask[WriteIndex],
+                                  /* WritesZero */ IsZeroIdiom);
     ++WriteIndex;
   }
 
-  return NewIS;
+  return std::move(NewIS);
 }
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/Instruction.cpp b/contrib/llvm/lib/MCA/Instruction.cpp
index 0c8476705572..057e95ca9990 100644
--- a/contrib/llvm/tools/llvm-mca/Instruction.cpp
+++ b/contrib/llvm/lib/MCA/Instruction.cpp
@@ -12,14 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Instruction.h"
+#include "llvm/MCA/Instruction.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
-
 void ReadState::writeStartEvent(unsigned Cycles) {
   assert(DependentWrites);
   assert(CyclesLeft == UNKNOWN_CYCLES);
@@ -50,6 +49,10 @@ void WriteState::onInstructionIssued() {
     unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
     RS->writeStartEvent(ReadCycles);
   }
+
+  // Notify any writes that are in a false dependency with this write.
+  if (PartialWrite)
+    PartialWrite->writeStartEvent(CyclesLeft);
 }
 
 void WriteState::addUser(ReadState *User, int ReadAdvance) {
@@ -62,8 +65,22 @@ void WriteState::addUser(ReadState *User, int ReadAdvance) {
     return;
   }
 
-  std::pair<ReadState *, int> NewPair(User, ReadAdvance);
-  Users.insert(NewPair);
+  if (llvm::find_if(Users, [&User](const std::pair<ReadState *, int> &Use) {
+        return Use.first == User;
+      }) == Users.end()) {
+    Users.emplace_back(User, ReadAdvance);
+  }
+}
+
+void WriteState::addUser(WriteState *User) {
+  if (CyclesLeft != UNKNOWN_CYCLES) {
+    User->writeStartEvent(std::max(0, CyclesLeft));
+    return;
+  }
+
+  assert(!PartialWrite && "PartialWrite already set!");
+  PartialWrite = User;
+  User->setDependentWrite(this);
 }
 
 void WriteState::cycleEvent() {
@@ -72,6 +89,9 @@ void WriteState::cycleEvent() {
   // specify a negative ReadAdvance.
   if (CyclesLeft != UNKNOWN_CYCLES)
     CyclesLeft--;
+
+  if (DependentWriteCyclesLeft)
+    DependentWriteCyclesLeft--;
 }
 
 void ReadState::cycleEvent() {
@@ -93,7 +113,7 @@ void ReadState::cycleEvent() {
 
 #ifndef NDEBUG
 void WriteState::dump() const {
-  dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << getLatency() << ", RegID "
+  dbgs() << "{ OpIdx=" << WD->OpIndex << ", Lat=" << getLatency() << ", RegID "
          << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
 }
 
@@ -120,34 +140,38 @@ void Instruction::execute() {
   Stage = IS_EXECUTING;
 
   // Set the cycles left before the write-back stage.
-  CyclesLeft = Desc.MaxLatency;
+  CyclesLeft = getLatency();
 
-  for (UniqueDef &Def : Defs)
-    Def->onInstructionIssued();
+  for (WriteState &WS : getDefs())
+    WS.onInstructionIssued();
 
   // Transition to the "executed" stage if this is a zero-latency instruction.
   if (!CyclesLeft)
     Stage = IS_EXECUTED;
 }
 
+void Instruction::forceExecuted() {
+  assert(Stage == IS_READY && "Invalid internal state!");
+  CyclesLeft = 0;
+  Stage = IS_EXECUTED;
+}
+
 void Instruction::update() {
   assert(isDispatched() && "Unexpected instruction stage found!");
 
-  if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
+  if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
     return;
 
   // A partial register write cannot complete before a dependent write.
-  auto IsDefReady = [&](const UniqueDef &Def) {
-    if (const WriteState *Write = Def->getDependentWrite()) {
-      int WriteLatency = Write->getCyclesLeft();
-      if (WriteLatency == UNKNOWN_CYCLES)
-        return false;
-      return static_cast<unsigned>(WriteLatency) < Desc.MaxLatency;
+  auto IsDefReady = [&](const WriteState &Def) {
+    if (!Def.getDependentWrite()) {
+      unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
+      return !CyclesLeft || CyclesLeft < getLatency();
     }
-    return true;
+    return false;
   };
 
-  if (llvm::all_of(Defs, IsDefReady))
+  if (all_of(getDefs(), IsDefReady))
     Stage = IS_READY;
 }
 
@@ -156,8 +180,11 @@ void Instruction::cycleEvent() {
     return;
 
   if (isDispatched()) {
-    for (UniqueUse &Use : Uses)
-      Use->cycleEvent();
+    for (ReadState &Use : getUses())
+      Use.cycleEvent();
+
+    for (WriteState &Def : getDefs())
+      Def.cycleEvent();
 
     update();
     return;
@@ -165,8 +192,8 @@ void Instruction::cycleEvent() {
 
   assert(isExecuting() && "Instruction not in-flight?");
   assert(CyclesLeft && "Instruction already executed?");
-  for (UniqueDef &Def : Defs)
-    Def->cycleEvent();
+  for (WriteState &Def : getDefs())
+    Def.cycleEvent();
   CyclesLeft--;
   if (!CyclesLeft)
     Stage = IS_EXECUTED;
@@ -175,3 +202,4 @@ void Instruction::cycleEvent() {
 const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/Pipeline.cpp b/contrib/llvm/lib/MCA/Pipeline.cpp
new file mode 100644
index 000000000000..4c0e37c9ba7e
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Pipeline.cpp
@@ -0,0 +1,97 @@
+//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Pipeline::addEventListener(HWEventListener *Listener) {
+  if (Listener)
+    Listeners.insert(Listener);
+  for (auto &S : Stages)
+    S->addListener(Listener);
+}
+
+bool Pipeline::hasWorkToProcess() {
+  return any_of(Stages, [](const std::unique_ptr<Stage> &S) {
+    return S->hasWorkToComplete();
+  });
+}
+
+Expected<unsigned> Pipeline::run() {
+  assert(!Stages.empty() && "Unexpected empty pipeline found!");
+
+  do {
+    notifyCycleBegin();
+    if (Error Err = runCycle())
+      return std::move(Err);
+    notifyCycleEnd();
+    ++Cycles;
+  } while (hasWorkToProcess());
+
+  return Cycles;
+}
+
+Error Pipeline::runCycle() {
+  Error Err = ErrorSuccess();
+  // Update stages before we start processing new instructions.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+    const std::unique_ptr<Stage> &S = *I;
+    Err = S->cycleStart();
+  }
+
+  // Now fetch and execute new instructions.
+  InstRef IR;
+  Stage &FirstStage = *Stages[0];
+  while (!Err && FirstStage.isAvailable(IR))
+    Err = FirstStage.execute(IR);
+
+  // Update stages in preparation for a new cycle.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+    const std::unique_ptr<Stage> &S = *I;
+    Err = S->cycleEnd();
+  }
+
+  return Err;
+}
+
+void Pipeline::appendStage(std::unique_ptr<Stage> S) {
+  assert(S && "Invalid null stage in input!");
+  if (!Stages.empty()) {
+    Stage *Last = Stages.back().get();
+    Last->setNextInSequence(S.get());
+  }
+
+  Stages.push_back(std::move(S));
+}
+
+void Pipeline::notifyCycleBegin() {
+  LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n');
+  for (HWEventListener *Listener : Listeners)
+    Listener->onCycleBegin();
+}
+
+void Pipeline::notifyCycleEnd() {
+  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n");
+  for (HWEventListener *Listener : Listeners)
+    Listener->onCycleEnd();
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStage.cpp b/contrib/llvm/lib/MCA/Stages/DispatchStage.cpp
index 1f508886c298..7fb4eb6a1c0e 100644
--- a/contrib/llvm/tools/llvm-mca/DispatchStage.cpp
+++ b/contrib/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -16,28 +16,28 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "DispatchStage.h"
-#include "HWEventListener.h"
-#include "Scheduler.h"
+#include "llvm/MCA/Stages/DispatchStage.h"
+#include "llvm/MCA/HWEventListener.h"
+#include "llvm/MCA/HardwareUnits/Scheduler.h"
 #include "llvm/Support/Debug.h"
 
-using namespace llvm;
-
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
 void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
-                                                ArrayRef<unsigned> UsedRegs) {
+                                                ArrayRef<unsigned> UsedRegs,
+                                                unsigned UOps) const {
   LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
-  notifyEvent<HWInstructionEvent>(HWInstructionDispatchedEvent(IR, UsedRegs));
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionDispatchedEvent(IR, UsedRegs, UOps));
 }
 
-bool DispatchStage::checkPRF(const InstRef &IR) {
+bool DispatchStage::checkPRF(const InstRef &IR) const {
   SmallVector<unsigned, 4> RegDefs;
-  for (const std::unique_ptr<WriteState> &RegDef :
-       IR.getInstruction()->getDefs())
-    RegDefs.emplace_back(RegDef->getRegisterID());
+  for (const WriteState &RegDef : IR.getInstruction()->getDefs())
+    RegDefs.emplace_back(RegDef.getRegisterID());
 
   const unsigned RegisterMask = PRF.isAvailable(RegDefs);
   // A mask with all zeroes means: register files are available.
@@ -50,7 +50,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) {
   return true;
 }
 
-bool DispatchStage::checkRCU(const InstRef &IR) {
+bool DispatchStage::checkRCU(const InstRef &IR) const {
   const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
   if (RCU.isAvailable(NumMicroOps))
     return true;
@@ -59,20 +59,17 @@ bool DispatchStage::checkRCU(const InstRef &IR) {
   return false;
 }
 
-bool DispatchStage::checkScheduler(const InstRef &IR) {
-  HWStallEvent::GenericEventType Event;
-  const bool Ready = SC.canBeDispatched(IR, Event);
-  if (!Ready)
-    notifyEvent<HWStallEvent>(HWStallEvent(Event, IR));
-  return Ready;
+bool DispatchStage::canDispatch(const InstRef &IR) const {
+  return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
 }
 
 void DispatchStage::updateRAWDependencies(ReadState &RS,
                                           const MCSubtargetInfo &STI) {
   SmallVector<WriteRef, 4> DependentWrites;
 
-  collectWrites(DependentWrites, RS.getRegisterID());
-  RS.setDependentWrites(DependentWrites.size());
+  // Collect all the dependent writes, and update RS internal state.
+  PRF.addRegisterRead(RS, DependentWrites);
+
   // We know that this read depends on all the writes in DependentWrites.
   // For each write, check if we have ReadAdvance information, and use it
   // to figure out in how many cycles this read becomes available.
@@ -87,7 +84,7 @@ void DispatchStage::updateRAWDependencies(ReadState &RS,
   }
 }
 
-void DispatchStage::dispatch(InstRef IR) {
+Error DispatchStage::dispatch(InstRef IR) {
   assert(!CarryOver && "Cannot dispatch another instruction!");
   Instruction &IS = *IR.getInstruction();
   const InstrDesc &Desc = IS.getDesc();
@@ -96,52 +93,94 @@ void DispatchStage::dispatch(InstRef IR) {
     assert(AvailableEntries == DispatchWidth);
     AvailableEntries = 0;
     CarryOver = NumMicroOps - DispatchWidth;
+    CarriedOver = IR;
   } else {
     assert(AvailableEntries >= NumMicroOps);
     AvailableEntries -= NumMicroOps;
   }
 
+  // Check if this instructions ends the dispatch group.
+  if (Desc.EndGroup)
+    AvailableEntries = 0;
+
+  // Check if this is an optimizable reg-reg move.
+  bool IsEliminated = false;
+  if (IS.isOptimizableMove()) {
+    assert(IS.getDefs().size() == 1 && "Expected a single input!");
+    assert(IS.getUses().size() == 1 && "Expected a single output!");
+    IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]);
+  }
+
   // A dependency-breaking instruction doesn't have to wait on the register
   // input operands, and it is often optimized at register renaming stage.
   // Update RAW dependencies if this instruction is not a dependency-breaking
   // instruction. A dependency-breaking instruction is a zero-latency
   // instruction that doesn't consume hardware resources.
   // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
-  bool IsDependencyBreaking = IS.isDependencyBreaking();
-  for (std::unique_ptr<ReadState> &RS : IS.getUses())
-    if (RS->isImplicitRead() || !IsDependencyBreaking)
-      updateRAWDependencies(*RS, STI);
-
-  // By default, a dependency-breaking zero-latency instruction is expected to
-  // be optimized at register renaming stage. That means, no physical register
-  // is allocated to the instruction.
-  bool ShouldAllocateRegisters =
-      !(Desc.isZeroLatency() && IsDependencyBreaking);
-  SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
-  for (std::unique_ptr<WriteState> &WS : IS.getDefs()) {
-    PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles,
-                         ShouldAllocateRegisters);
+  //
+  // We also don't update data dependencies for instructions that have been
+  // eliminated at register renaming stage.
+  if (!IsEliminated) {
+    for (ReadState &RS : IS.getUses())
+      updateRAWDependencies(RS, STI);
   }
 
+  // By default, a dependency-breaking zero-idiom is expected to be optimized
+  // at register renaming stage. That means, no physical register is allocated
+  // to the instruction.
+  SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
+  for (WriteState &WS : IS.getDefs())
+    PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles);
+
   // Reserve slots in the RCU, and notify the instruction that it has been
   // dispatched to the schedulers for execution.
   IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
 
-  // Notify listeners of the "instruction dispatched" event.
-  notifyInstructionDispatched(IR, RegisterFiles);
+  // Notify listeners of the "instruction dispatched" event,
+  // and move IR to the next stage.
+  notifyInstructionDispatched(IR, RegisterFiles,
+                              std::min(DispatchWidth, NumMicroOps));
+  return moveToTheNextStage(IR);
 }
 
-void DispatchStage::cycleStart() {
+Error DispatchStage::cycleStart() {
+  PRF.cycleStart();
+
+  if (!CarryOver) {
+    AvailableEntries = DispatchWidth;
+    return ErrorSuccess();
+  }
+
   AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
-  CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
+  unsigned DispatchedOpcodes = DispatchWidth - AvailableEntries;
+  CarryOver -= DispatchedOpcodes;
+  assert(CarriedOver && "Invalid dispatched instruction");
+
+  SmallVector<unsigned, 8> RegisterFiles(PRF.getNumRegisterFiles(), 0U);
+  notifyInstructionDispatched(CarriedOver, RegisterFiles, DispatchedOpcodes);
+  if (!CarryOver)
+    CarriedOver = InstRef();
+  return ErrorSuccess();
 }
 
-bool DispatchStage::execute(InstRef &IR) {
+bool DispatchStage::isAvailable(const InstRef &IR) const {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  if (!isAvailable(Desc.NumMicroOps) || !canDispatch(IR))
+  unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+  if (Required > AvailableEntries)
     return false;
-  dispatch(IR);
-  return true;
+
+  if (Desc.BeginGroup && AvailableEntries != DispatchWidth)
+    return false;
+
+  // The dispatch logic doesn't internally buffer instructions.  It only accepts
+  // instructions that can be successfully moved to the next stage during this
+  // same cycle.
+  return canDispatch(IR);
+}
+
+Error DispatchStage::execute(InstRef &IR) {
+  assert(canDispatch(IR) && "Cannot dispatch another instruction!");
+  return dispatch(IR);
 }
 
 #ifndef NDEBUG
@@ -151,3 +190,4 @@ void DispatchStage::dump() const {
 }
 #endif
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/Stages/EntryStage.cpp b/contrib/llvm/lib/MCA/Stages/EntryStage.cpp
new file mode 100644
index 000000000000..3325bb36f5af
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -0,0 +1,76 @@
+//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Fetch stage of an instruction pipeline.  Its sole
+/// purpose in life is to produce instructions for the rest of the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Instruction.h"
+
+namespace llvm {
+namespace mca {
+
+bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; }
+
+bool EntryStage::isAvailable(const InstRef & /* unused */) const {
+  if (CurrentInstruction)
+    return checkNextStage(CurrentInstruction);
+  return false;
+}
+
+void EntryStage::getNextInstruction() {
+  assert(!CurrentInstruction && "There is already an instruction to process!");
+  if (!SM.hasNext())
+    return;
+  SourceRef SR = SM.peekNext();
+  std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second);
+  CurrentInstruction = InstRef(SR.first, Inst.get());
+  Instructions.emplace_back(std::move(Inst));
+  SM.updateNext();
+}
+
+llvm::Error EntryStage::execute(InstRef & /*unused */) {
+  assert(CurrentInstruction && "There is no instruction to process!");
+  if (llvm::Error Val = moveToTheNextStage(CurrentInstruction))
+    return Val;
+
+  // Move the program counter.
+  CurrentInstruction.invalidate();
+  getNextInstruction();
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleStart() {
+  if (!CurrentInstruction)
+    getNextInstruction();
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error EntryStage::cycleEnd() {
+  // Find the first instruction which hasn't been retired.
+  auto Range = make_range(&Instructions[NumRetired], Instructions.end());
+  auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) {
+    return !I->isRetired();
+  });
+
+  NumRetired = std::distance(Instructions.begin(), It);
+  // Erase instructions up to the first that hasn't been retired.
+  if ((NumRetired * 2) >= Instructions.size()) {
+    Instructions.erase(Instructions.begin(), It);
+    NumRetired = 0;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp b/contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp
new file mode 100644
index 000000000000..e78327763fa1
--- /dev/null
+++ b/contrib/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -0,0 +1,225 @@
+//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of an instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
+  switch (Status) {
+  case Scheduler::SC_LOAD_QUEUE_FULL:
+    return HWStallEvent::LoadQueueFull;
+  case Scheduler::SC_STORE_QUEUE_FULL:
+    return HWStallEvent::StoreQueueFull;
+  case Scheduler::SC_BUFFERS_FULL:
+    return HWStallEvent::SchedulerQueueFull;
+  case Scheduler::SC_DISPATCH_GROUP_STALL:
+    return HWStallEvent::DispatchGroupStall;
+  case Scheduler::SC_AVAILABLE:
+    return HWStallEvent::Invalid;
+  }
+
+  llvm_unreachable("Don't know how to process this StallKind!");
+}
+
+bool ExecuteStage::isAvailable(const InstRef &IR) const {
+  if (Scheduler::Status S = HWS.isAvailable(IR)) {
+    HWStallEvent::GenericEventType ET = toHWStallEventType(S);
+    notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
+    return false;
+  }
+
+  return true;
+}
+
+Error ExecuteStage::issueInstruction(InstRef &IR) {
+  SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
+  SmallVector<InstRef, 4> Ready;
+  HWS.issueInstruction(IR, Used, Ready);
+
+  notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
+
+  notifyInstructionIssued(IR, Used);
+  if (IR.getInstruction()->isExecuted()) {
+    notifyInstructionExecuted(IR);
+    // FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
+
+  for (const InstRef &I : Ready)
+    notifyInstructionReady(I);
+  return ErrorSuccess();
+}
+
+Error ExecuteStage::issueReadyInstructions() {
+  InstRef IR = HWS.select();
+  while (IR) {
+    if (Error Err = issueInstruction(IR))
+      return Err;
+
+    // Select the next instruction to issue.
+    IR = HWS.select();
+  }
+
+  return ErrorSuccess();
+}
+
+Error ExecuteStage::cycleStart() {
+  SmallVector<ResourceRef, 8> Freed;
+  SmallVector<InstRef, 4> Executed;
+  SmallVector<InstRef, 4> Ready;
+
+  HWS.cycleEvent(Freed, Executed, Ready);
+
+  for (const ResourceRef &RR : Freed)
+    notifyResourceAvailable(RR);
+
+  for (InstRef &IR : Executed) {
+    notifyInstructionExecuted(IR);
+    // FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
+
+  for (const InstRef &IR : Ready)
+    notifyInstructionReady(IR);
+
+  return issueReadyInstructions();
+}
+
+#ifndef NDEBUG
+static void verifyInstructionEliminated(const InstRef &IR) {
+  const Instruction &Inst = *IR.getInstruction();
+  assert(Inst.isEliminated() && "Instruction was not eliminated!");
+  assert(Inst.isReady() && "Instruction in an inconsistent state!");
+
+  // Ensure that instructions eliminated at register renaming stage are in a
+  // consistent state.
+  const InstrDesc &Desc = Inst.getDesc();
+  assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!");
+}
+#endif
+
+Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
+#ifndef NDEBUG
+  verifyInstructionEliminated(IR);
+#endif
+  notifyInstructionReady(IR);
+  notifyInstructionIssued(IR, {});
+  IR.getInstruction()->forceExecuted();
+  notifyInstructionExecuted(IR);
+  return moveToTheNextStage(IR);
+}
+
+// Schedule the instruction for execution on the hardware.
+Error ExecuteStage::execute(InstRef &IR) {
+  assert(isAvailable(IR) && "Scheduler is not available!");
+
+#ifndef NDEBUG
+  // Ensure that the HWS has not stored this instruction in its queues.
+  HWS.sanityCheck(IR);
+#endif
+
+  if (IR.getInstruction()->isEliminated())
+    return handleInstructionEliminated(IR);
+
+  // Reserve a slot in each buffered resource. Also, mark units with
+  // BufferSize=0 as reserved. Resources with a buffer size of zero will only
+  // be released after MCIS is issued, and all the ResourceCycles for those
+  // units have been consumed.
+  HWS.dispatch(IR);
+  notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
+  if (!HWS.isReady(IR))
+    return ErrorSuccess();
+
+  // If we did not return early, then the scheduler is ready for execution.
+  notifyInstructionReady(IR);
+
+  // If we cannot issue immediately, the HWS will add IR to its ready queue for
+  // execution later, so we must return early here.
+  if (!HWS.mustIssueImmediately(IR))
+    return ErrorSuccess();
+
+  // Issue IR to the underlying pipelines.
+  return issueInstruction(IR);
+}
+
+void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Executed, IR));
+}
+
+void ExecuteStage::notifyInstructionReady(const InstRef &IR) const {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Ready, IR));
+}
+
+void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) const {
+  LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
+                    << RR.second << "]\n");
+  for (HWEventListener *Listener : getListeners())
+    Listener->onResourceAvailable(RR);
+}
+
+void ExecuteStage::notifyInstructionIssued(
+    const InstRef &IR,
+    MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
+  LLVM_DEBUG({
+    dbgs() << "[E] Instruction Issued: #" << IR << '\n';
+    for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
+      dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
+             << Resource.first.second << "], ";
+      dbgs() << "cycles: " << Resource.second << '\n';
+    }
+  });
+
+  // Replace resource masks with valid resource processor IDs.
+  for (std::pair<ResourceRef, ResourceCycles> &Use : Used)
+    Use.first.first = HWS.getResourceID(Use.first.first);
+
+  notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
+}
+
+void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR,
+                                                   bool Reserved) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (Desc.Buffers.empty())
+    return;
+
+  SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end());
+  std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(),
+                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
+  if (Reserved) {
+    for (HWEventListener *Listener : getListeners())
+      Listener->onReservedBuffers(IR, BufferIDs);
+    return;
+  }
+
+  for (HWEventListener *Listener : getListeners())
+    Listener->onReleasedBuffers(IR, BufferIDs);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/InstructionTables.cpp b/contrib/llvm/lib/MCA/Stages/InstructionTables.cpp
index 9b9dbc37fbdb..f918c183aa5a 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionTables.cpp
+++ b/contrib/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -15,14 +15,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "InstructionTables.h"
+#include "llvm/MCA/Stages/InstructionTables.h"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
-
-bool InstructionTables::execute(InstRef &IR) {
-  ArrayRef<uint64_t> Masks = IB.getProcResourceMasks();
+Error InstructionTables::execute(InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
   UsedResources.clear();
 
@@ -31,17 +29,17 @@ bool InstructionTables::execute(InstRef &IR) {
     // Skip zero-cycle resources (i.e., unused resources).
     if (!Resource.second.size())
       continue;
-    double Cycles = static_cast<double>(Resource.second.size());
+    unsigned Cycles = Resource.second.size();
     unsigned Index = std::distance(
         Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
     const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
     unsigned NumUnits = ProcResource.NumUnits;
     if (!ProcResource.SubUnitsIdxBegin) {
       // The number of cycles consumed by each unit.
-      Cycles /= NumUnits;
       for (unsigned I = 0, E = NumUnits; I < E; ++I) {
         ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
-        UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles));
+        UsedResources.emplace_back(
+            std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
       }
       continue;
     }
@@ -53,10 +51,10 @@ bool InstructionTables::execute(InstRef &IR) {
       unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
       const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
       // Compute the number of cycles consumed by each resource unit.
-      double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits);
       for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
         ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
-        UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles));
+        UsedResources.emplace_back(std::make_pair(
+            ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
       }
     }
   }
@@ -64,7 +62,8 @@ bool InstructionTables::execute(InstRef &IR) {
   // Send a fake instruction issued event to all the views.
   HWInstructionIssuedEvent Event(IR, UsedResources);
   notifyEvent<HWInstructionIssuedEvent>(Event);
-  return true;
+  return ErrorSuccess();
 }
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RetireStage.cpp b/contrib/llvm/lib/MCA/Stages/RetireStage.cpp
index 55c3b887e478..d6bcc518662f 100644
--- a/contrib/llvm/tools/llvm-mca/RetireStage.cpp
+++ b/contrib/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -14,19 +14,18 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "RetireStage.h"
-#include "HWEventListener.h"
+#include "llvm/MCA/Stages/RetireStage.h"
+#include "llvm/MCA/HWEventListener.h"
 #include "llvm/Support/Debug.h"
 
-using namespace llvm;
-
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
-void RetireStage::cycleStart() {
+llvm::Error RetireStage::cycleStart() {
   if (RCU.isEmpty())
-    return;
+    return llvm::ErrorSuccess();
 
   const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
   unsigned NumRetired = 0;
@@ -40,18 +39,24 @@ void RetireStage::cycleStart() {
     notifyInstructionRetired(Current.IR);
     NumRetired++;
   }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error RetireStage::execute(InstRef &IR) {
+  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  return llvm::ErrorSuccess();
 }
 
-void RetireStage::notifyInstructionRetired(const InstRef &IR) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Retired: #" << IR << '\n');
-  SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
+void RetireStage::notifyInstructionRetired(const InstRef &IR) const {
+  LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
+  llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
   const Instruction &Inst = *IR.getInstruction();
-  const InstrDesc &Desc = Inst.getDesc();
 
-  bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking());
-  for (const std::unique_ptr<WriteState> &WS : Inst.getDefs())
-    PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs);
+  for (const WriteState &WS : Inst.getDefs())
+    PRF.removeRegisterWrite(WS, FreedRegs);
   notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
 }
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/Stage.cpp b/contrib/llvm/lib/MCA/Stages/Stage.cpp
index 7ead940e63c1..38191645e736 100644
--- a/contrib/llvm/tools/llvm-mca/Stage.cpp
+++ b/contrib/llvm/lib/MCA/Stages/Stage.cpp
@@ -13,15 +13,17 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Stage.h"
+#include "llvm/MCA/Stages/Stage.h"
 
+namespace llvm {
 namespace mca {
 
 // Pin the vtable here in the implementation file.
-Stage::Stage() {}
+Stage::~Stage() = default;
 
 void Stage::addListener(HWEventListener *Listener) {
   Listeners.insert(Listener);
 }
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/Support.cpp b/contrib/llvm/lib/MCA/Support.cpp
index 8f6b8a91f38f..335953e10481 100644
--- a/contrib/llvm/tools/llvm-mca/Support.cpp
+++ b/contrib/llvm/lib/MCA/Support.cpp
@@ -13,20 +13,24 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Support.h"
+#include "llvm/MCA/Support.h"
 #include "llvm/MC/MCSchedule.h"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
+#define DEBUG_TYPE "llvm-mca"
 
 void computeProcResourceMasks(const MCSchedModel &SM,
-                              SmallVectorImpl<uint64_t> &Masks) {
+                              MutableArrayRef<uint64_t> Masks) {
   unsigned ProcResourceID = 0;
 
+  assert(Masks.size() == SM.getNumProcResourceKinds() &&
+         "Invalid number of elements");
+  // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
+  Masks[0] = 0;
+
   // Create a unique bitmask for every processor resource unit.
-  // Skip resource at index 0, since it always references 'InvalidUnit'.
-  Masks.resize(SM.getNumProcResourceKinds());
   for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
     const MCProcResourceDesc &Desc = *SM.getProcResource(I);
     if (Desc.SubUnitsIdxBegin)
@@ -47,6 +51,16 @@ void computeProcResourceMasks(const MCSchedModel &SM,
     }
     ProcResourceID++;
   }
+
+#ifndef NDEBUG
+  LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
+                    << "\n");
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
+                      << '\n');
+  }
+#endif
 }
 
 double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
@@ -77,3 +91,4 @@ double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
 }
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp
index ea17b2220a0b..da93602cbb28 100644
--- a/contrib/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp
@@ -27,6 +27,8 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <map>
+
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
 #else
@@ -119,6 +121,11 @@ static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) {
   OS.indent(Size - SizeSoFar);
 }
 
+static bool isDarwin(object::Archive::Kind Kind) {
+  return Kind == object::Archive::K_DARWIN ||
+         Kind == object::Archive::K_DARWIN64;
+}
+
 static bool isBSDLike(object::Archive::Kind Kind) {
   switch (Kind) {
   case object::Archive::K_GNU:
@@ -126,8 +133,8 @@ static bool isBSDLike(object::Archive::Kind Kind) {
     return false;
   case object::Archive::K_BSD:
   case object::Archive::K_DARWIN:
-    return true;
   case object::Archive::K_DARWIN64:
+    return true;
   case object::Archive::K_COFF:
     break;
   }
@@ -243,20 +250,33 @@ static void addToStringTable(raw_ostream &Out, StringRef ArcName,
 
 static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
                               raw_ostream &StringTable,
+                              StringMap<uint64_t> &MemberNames,
                               object::Archive::Kind Kind, bool Thin,
                               StringRef ArcName, const NewArchiveMember &M,
+                              sys::TimePoint<std::chrono::seconds> ModTime,
                               unsigned Size) {
+
   if (isBSDLike(Kind))
-    return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID,
+    return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
                                 M.Perms, Size);
   if (!useStringTable(Thin, M.MemberName))
-    return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID,
+    return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
                                      M.Perms, Size);
   Out << '/';
-  uint64_t NamePos = StringTable.tell();
-  addToStringTable(StringTable, ArcName, M, Thin);
+  uint64_t NamePos;
+  if (Thin) {
+    NamePos = StringTable.tell();
+    addToStringTable(StringTable, ArcName, M, Thin);
+  } else {
+    auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
+    if (Insertion.second) {
+      Insertion.first->second = StringTable.tell();
+      addToStringTable(StringTable, ArcName, M, Thin);
+    }
+    NamePos = Insertion.first->second;
+  }
   printWithSpacePadding(Out, NamePos, 15);
-  printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size);
+  printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size);
 }
 
 namespace {
@@ -310,7 +330,9 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
                              bool Deterministic, ArrayRef<MemberData> Members,
                              StringRef StringTable) {
-  if (StringTable.empty())
+  // We don't write a symbol table on an archive with no members -- except on
+  // Darwin, where the linker will abort unless the archive has a symbol table.
+  if (StringTable.empty() && !isDarwin(Kind))
     return;
 
   unsigned NumSyms = 0;
@@ -318,15 +340,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
     NumSyms += M.Symbols.size();
 
   unsigned Size = 0;
-  Size += is64BitKind(Kind) ? 8 : 4; // Number of entries
+  unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t);
+
+  Size += OffsetSize; // Number of entries
   if (isBSDLike(Kind))
-    Size += NumSyms * 8; // Table
-  else if (is64BitKind(Kind))
-    Size += NumSyms * 8; // Table
+    Size += NumSyms * OffsetSize * 2; // Table
   else
-    Size += NumSyms * 4; // Table
+    Size += NumSyms * OffsetSize; // Table
   if (isBSDLike(Kind))
-    Size += 4; // byte count
+    Size += OffsetSize; // byte count
   Size += StringTable.size();
   // ld64 expects the members to be 8-byte aligned for 64-bit content and at
   // least 4-byte aligned for 32-bit content.  Opt for the larger encoding
@@ -336,25 +358,26 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
   unsigned Pad = OffsetToAlignment(Size, Alignment);
   Size += Pad;
 
-  if (isBSDLike(Kind))
-    printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0,
-                         0, Size);
-  else if (is64BitKind(Kind))
-    printGNUSmallMemberHeader(Out, "/SYM64", now(Deterministic), 0, 0, 0, Size);
-  else
-    printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size);
+  if (isBSDLike(Kind)) {
+    const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
+    printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
+                         Size);
+  } else {
+    const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
+    printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
+  }
 
   uint64_t Pos = Out.tell() + Size;
 
   if (isBSDLike(Kind))
-    print<uint32_t>(Out, Kind, NumSyms * 8);
+    printNBits(Out, Kind, NumSyms * 2 * OffsetSize);
   else
     printNBits(Out, Kind, NumSyms);
 
   for (const MemberData &M : Members) {
     for (unsigned StringOffset : M.Symbols) {
       if (isBSDLike(Kind))
-        print<uint32_t>(Out, Kind, StringOffset);
+        printNBits(Out, Kind, StringOffset);
       printNBits(Out, Kind, Pos); // member offset
     }
     Pos += M.Header.size() + M.Data.size() + M.Padding.size();
@@ -362,7 +385,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
 
   if (isBSDLike(Kind))
     // byte count of the string table
-    print<uint32_t>(Out, Kind, StringTable.size());
+    printNBits(Out, Kind, StringTable.size());
   Out << StringTable;
 
   while (Pad--)
@@ -372,20 +395,32 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
 static Expected<std::vector<unsigned>>
 getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
   std::vector<unsigned> Ret;
-  LLVMContext Context;
 
-  Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
-      object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown,
-                                               &Context);
-  if (!ObjOrErr) {
-    // FIXME: check only for "not an object file" errors.
-    consumeError(ObjOrErr.takeError());
-    return Ret;
+  // In the scenario when LLVMContext is populated SymbolicFile will contain a
+  // reference to it, thus SymbolicFile should be destroyed first.
+  LLVMContext Context;
+  std::unique_ptr<object::SymbolicFile> Obj;
+  if (identify_magic(Buf.getBuffer()) == file_magic::bitcode) {
+    auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
+        Buf, file_magic::bitcode, &Context);
+    if (!ObjOrErr) {
+      // FIXME: check only for "not an object file" errors.
+      consumeError(ObjOrErr.takeError());
+      return Ret;
+    }
+    Obj = std::move(*ObjOrErr);
+  } else {
+    auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
+    if (!ObjOrErr) {
+      // FIXME: check only for "not an object file" errors.
+      consumeError(ObjOrErr.takeError());
+      return Ret;
+    }
+    Obj = std::move(*ObjOrErr);
   }
 
   HasObject = true;
-  object::SymbolicFile &Obj = *ObjOrErr.get();
-  for (const object::BasicSymbolRef &S : Obj.symbols()) {
+  for (const object::BasicSymbolRef &S : Obj->symbols()) {
     if (!isArchiveSymbol(S))
       continue;
     Ret.push_back(SymNames.tell());
@@ -399,7 +434,7 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
 static Expected<std::vector<MemberData>>
 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
                   object::Archive::Kind Kind, bool Thin, StringRef ArcName,
-                  ArrayRef<NewArchiveMember> NewMembers) {
+                  bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) {
   static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
 
   // This ignores the symbol table, but we only need the value mod 8 and the
@@ -408,6 +443,62 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 
   std::vector<MemberData> Ret;
   bool HasObject = false;
+
+  // Deduplicate long member names in the string table and reuse earlier name
+  // offsets. This especially saves space for COFF Import libraries where all
+  // members have the same name.
+  StringMap<uint64_t> MemberNames;
+
+  // UniqueTimestamps is a special case to improve debugging on Darwin:
+  //
+  // The Darwin linker does not link debug info into the final
+  // binary. Instead, it emits entries of type N_OSO in in the output
+  // binary's symbol table, containing references to the linked-in
+  // object files. Using that reference, the debugger can read the
+  // debug data directly from the object files. Alternatively, an
+  // invocation of 'dsymutil' will link the debug data from the object
+  // files into a dSYM bundle, which can be loaded by the debugger,
+  // instead of the object files.
+  //
+  // For an object file, the N_OSO entries contain the absolute path
+  // path to the file, and the file's timestamp. For an object
+  // included in an archive, the path is formatted like
+  // "/absolute/path/to/archive.a(member.o)", and the timestamp is the
+  // archive member's timestamp, rather than the archive's timestamp.
+  //
+  // However, this doesn't always uniquely identify an object within
+  // an archive -- an archive file can have multiple entries with the
+  // same filename. (This will happen commonly if the original object
+  // files started in different directories.) The only way they get
+  // distinguished, then, is via the timestamp. But this process is
+  // unable to find the correct object file in the archive when there
+  // are two files of the same name and timestamp.
+  //
+  // Additionally, timestamp==0 is treated specially, and causes the
+  // timestamp to be ignored as a match criteria.
+  //
+  // That will "usually" work out okay when creating an archive not in
+  // deterministic timestamp mode, because the objects will probably
+  // have been created at different timestamps.
+  //
+  // To ameliorate this problem, in deterministic archive mode (which
+  // is the default), on Darwin we will emit a unique non-zero
+  // timestamp for each entry with a duplicated name. This is still
+  // deterministic: the only thing affecting that timestamp is the
+  // order of the files in the resultant archive.
+  //
+  // See also the functions that handle the lookup:
+  // in lldb: ObjectContainerBSDArchive::Archive::FindObject()
+  // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers().
+  bool UniqueTimestamps = Deterministic && isDarwin(Kind);
+  std::map<StringRef, unsigned> FilenameCount;
+  if (UniqueTimestamps) {
+    for (const NewArchiveMember &M : NewMembers)
+      FilenameCount[M.MemberName]++;
+    for (auto &Entry : FilenameCount)
+      Entry.second = Entry.second > 1 ? 1 : 0;
+  }
+
   for (const NewArchiveMember &M : NewMembers) {
     std::string Header;
     raw_string_ostream Out(Header);
@@ -419,14 +510,19 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
     // least 4-byte aligned for 32-bit content.  Opt for the larger encoding
     // uniformly.  This matches the behaviour with cctools and ensures that ld64
     // is happy with archives that we generate.
-    unsigned MemberPadding = Kind == object::Archive::K_DARWIN
-                                 ? OffsetToAlignment(Data.size(), 8)
-                                 : 0;
+    unsigned MemberPadding =
+        isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0;
     unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2);
     StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding);
 
-    printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M,
-                      Buf.getBufferSize() + MemberPadding);
+    sys::TimePoint<std::chrono::seconds> ModTime;
+    if (UniqueTimestamps)
+      // Increment timestamp for each file of a given name.
+      ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
+    else
+      ModTime = M.ModTime;
+    printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
+                      M, ModTime, Buf.getBufferSize() + MemberPadding);
     Out.flush();
 
     Expected<std::vector<unsigned>> Symbols =
@@ -457,8 +553,8 @@ Error llvm::writeArchive(StringRef ArcName,
   SmallString<0> StringTableBuf;
   raw_svector_ostream StringTable(StringTableBuf);
 
-  Expected<std::vector<MemberData>> DataOrErr =
-      computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers);
+  Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
+      StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers);
   if (Error E = DataOrErr.takeError())
     return E;
   std::vector<MemberData> &Data = *DataOrErr;
@@ -470,7 +566,7 @@ Error llvm::writeArchive(StringRef ArcName,
   if (WriteSymtab) {
     uint64_t MaxOffset = 0;
     uint64_t LastOffset = MaxOffset;
-    for (const auto& M : Data) {
+    for (const auto &M : Data) {
       // Record the start of the member's offset
       LastOffset = MaxOffset;
       // Account for the size of each part associated with the member.
@@ -494,8 +590,12 @@ Error llvm::writeArchive(StringRef ArcName,
     // If LastOffset isn't going to fit in a 32-bit varible we need to switch
     // to 64-bit. Note that the file can be larger than 4GB as long as the last
     // member starts before the 4GB offset.
-    if (LastOffset >= (1ULL << Sym64Threshold))
-      Kind = object::Archive::K_GNU64;
+    if (LastOffset >= (1ULL << Sym64Threshold)) {
+      if (Kind == object::Archive::K_DARWIN)
+        Kind = object::Archive::K_DARWIN64;
+      else
+        Kind = object::Archive::K_GNU64;
+    }
   }
 
   Expected<sys::fs::TempFile> Temp =
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
index d7c25921ec36..fe41987f5c27 100644
--- a/contrib/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -88,7 +88,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
 
 Expected<OwningBinary<Binary>> object::createBinary(StringRef Path) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
-      MemoryBuffer::getFileOrSTDIN(Path);
+      MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
+                                   /*RequiresNullTerminator=*/false);
   if (std::error_code EC = FileOrErr.getError())
     return errorCodeToError(EC);
   std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get();
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index 85b1913cb23b..fc1deeba339a 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -616,6 +616,8 @@ std::error_code COFFObjectFile::initBaseRelocPtr() {
       IntPtr);
   BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>(
       IntPtr + DataEntry->Size);
+  // FIXME: Verify the section containing BaseRelocHeader has at least
+  // DataEntry->Size bytes after DataEntry->RelativeVirtualAddress.
   return std::error_code();
 }
 
@@ -637,10 +639,10 @@ std::error_code COFFObjectFile::initDebugDirectoryPtr() {
   if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
     return EC;
   DebugDirectoryBegin = reinterpret_cast<const debug_directory *>(IntPtr);
-  if (std::error_code EC = getRvaPtr(
-          DataEntry->RelativeVirtualAddress + DataEntry->Size, IntPtr))
-    return EC;
-  DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(IntPtr);
+  DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(
+      IntPtr + DataEntry->Size);
+  // FIXME: Verify the section containing DebugDirectoryBegin has at least
+  // DataEntry->Size bytes after DataEntry->RelativeVirtualAddress.
   return std::error_code();
 }
 
@@ -936,6 +938,18 @@ iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const {
   return make_range(base_reloc_begin(), base_reloc_end());
 }
 
+std::error_code
+COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const {
+  Res = COFFHeader;
+  return std::error_code();
+}
+
+std::error_code
+COFFObjectFile::getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const {
+  Res = COFFBigObjHeader;
+  return std::error_code();
+}
+
 std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
   Res = PE32Header;
   return std::error_code();
@@ -1051,6 +1065,16 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const {
   return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize);
 }
 
+uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const {
+  uintptr_t Offset =
+      reinterpret_cast<uintptr_t>(Symbol.getRawPtr()) - getSymbolTable();
+  assert(Offset % getSymbolTableEntrySize() == 0 &&
+         "Symbol did not point to the beginning of a symbol");
+  size_t Index = Offset / getSymbolTableEntrySize();
+  assert(Index < getNumberOfSymbols());
+  return Index;
+}
+
 std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
                                                StringRef &Res) const {
   StringRef Name;
@@ -1176,16 +1200,12 @@ COFFObjectFile::getRelocations(const coff_section *Sec) const {
 
 #define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type)                           \
   case COFF::reloc_type:                                                       \
-    Res = #reloc_type;                                                         \
-    break;
+    return #reloc_type;
 
-void COFFObjectFile::getRelocationTypeName(
-    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
-  const coff_relocation *Reloc = toRel(Rel);
-  StringRef Res;
+StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
   switch (getMachine()) {
   case COFF::IMAGE_FILE_MACHINE_AMD64:
-    switch (Reloc->Type) {
+    switch (Type) {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR32);
@@ -1204,11 +1224,11 @@ void COFFObjectFile::getRelocationTypeName(
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_PAIR);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_SSPAN32);
     default:
-      Res = "Unknown";
+      return "Unknown";
     }
     break;
   case COFF::IMAGE_FILE_MACHINE_ARMNT:
-    switch (Reloc->Type) {
+    switch (Type) {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB);
@@ -1225,11 +1245,11 @@ void COFFObjectFile::getRelocationTypeName(
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T);
     default:
-      Res = "Unknown";
+      return "Unknown";
     }
     break;
   case COFF::IMAGE_FILE_MACHINE_ARM64:
-    switch (Reloc->Type) {
+    switch (Type) {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB);
@@ -1248,11 +1268,11 @@ void COFFObjectFile::getRelocationTypeName(
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14);
     default:
-      Res = "Unknown";
+      return "Unknown";
     }
     break;
   case COFF::IMAGE_FILE_MACHINE_I386:
-    switch (Reloc->Type) {
+    switch (Type) {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL16);
@@ -1265,21 +1285,33 @@ void COFFObjectFile::getRelocationTypeName(
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_SECREL7);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_REL32);
     default:
-      Res = "Unknown";
+      return "Unknown";
     }
     break;
   default:
-    Res = "Unknown";
+    return "Unknown";
   }
-  Result.append(Res.begin(), Res.end());
 }
 
 #undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME
 
+void COFFObjectFile::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+  const coff_relocation *Reloc = toRel(Rel);
+  StringRef Res = getRelocationTypeName(Reloc->Type);
+  Result.append(Res.begin(), Res.end());
+}
+
 bool COFFObjectFile::isRelocatableObject() const {
   return !DataDirectory;
 }
 
+StringRef COFFObjectFile::mapDebugSectionName(StringRef Name) const {
+  return StringSwitch<StringRef>(Name)
+      .Case("eh_fram", "eh_frame")
+      .Default(Name);
+}
+
 bool ImportDirectoryEntryRef::
 operator==(const ImportDirectoryEntryRef &Other) const {
   return ImportTable == Other.ImportTable && Index == Other.Index;
diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp
index 2eefb7ef13a3..cf8313f88f93 100644
--- a/contrib/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm/lib/Object/ELF.cpp
@@ -139,6 +139,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
       break;
     }
     break;
+  case ELF::EM_MSP430:
+    switch (Type) {
+#include "llvm/BinaryFormat/ELFRelocs/MSP430.def"
+    default:
+      break;
+    }
+    break;
   default:
     break;
   }
@@ -147,7 +154,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
 
 #undef ELF_RELOC
 
-uint32_t llvm::object::getELFRelrRelocationType(uint32_t Machine) {
+uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
   switch (Machine) {
   case ELF::EM_X86_64:
     return ELF::R_X86_64_RELATIVE;
@@ -293,7 +300,7 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
   Elf_Rela Rela;
   Rela.r_info = 0;
   Rela.r_addend = 0;
-  Rela.setType(getRelrRelocationType(), false);
+  Rela.setType(getRelativeRelocationType(), false);
   std::vector<Elf_Rela> Relocs;
 
   // Word type: uint32_t for Elf32, and uint64_t for Elf64.
@@ -393,20 +400,17 @@ ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const {
     if (GroupedByAddend && GroupHasAddend)
       Addend += ReadSLEB();
 
+    if (!GroupHasAddend)
+      Addend = 0;
+
     for (uint64_t I = 0; I != NumRelocsInGroup; ++I) {
       Elf_Rela R;
       Offset += GroupedByOffsetDelta ? GroupOffsetDelta : ReadSLEB();
       R.r_offset = Offset;
       R.r_info = GroupedByInfo ? GroupRInfo : ReadSLEB();
-
-      if (GroupHasAddend) {
-        if (!GroupedByAddend)
-          Addend += ReadSLEB();
-        R.r_addend = Addend;
-      } else {
-        R.r_addend = 0;
-      }
-
+      if (GroupHasAddend && !GroupedByAddend)
+        Addend += ReadSLEB();
+      R.r_addend = Addend;
       Relocs.push_back(R);
 
       if (ErrStr)
diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp
index e806c8f28b15..9fb3a55ac7b1 100644
--- a/contrib/llvm/lib/Object/ELFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFTypes.h"
@@ -23,6 +24,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
@@ -327,3 +329,68 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
 
   TheTriple.setArchName(Triple);
 }
+
+std::vector<std::pair<DataRefImpl, uint64_t>>
+ELFObjectFileBase::getPltAddresses() const {
+  std::string Err;
+  const auto Triple = makeTriple();
+  const auto *T = TargetRegistry::lookupTarget(Triple.str(), Err);
+  if (!T)
+    return {};
+  uint64_t JumpSlotReloc = 0;
+  switch (Triple.getArch()) {
+    case Triple::x86:
+      JumpSlotReloc = ELF::R_386_JUMP_SLOT;
+      break;
+    case Triple::x86_64:
+      JumpSlotReloc = ELF::R_X86_64_JUMP_SLOT;
+      break;
+    case Triple::aarch64:
+      JumpSlotReloc = ELF::R_AARCH64_JUMP_SLOT;
+      break;
+    default:
+      return {};
+  }
+  std::unique_ptr<const MCInstrInfo> MII(T->createMCInstrInfo());
+  std::unique_ptr<const MCInstrAnalysis> MIA(
+      T->createMCInstrAnalysis(MII.get()));
+  if (!MIA)
+    return {};
+  Optional<SectionRef> Plt = None, RelaPlt = None, GotPlt = None;
+  for (const SectionRef &Section : sections()) {
+    StringRef Name;
+    if (Section.getName(Name))
+      continue;
+    if (Name == ".plt")
+      Plt = Section;
+    else if (Name == ".rela.plt" || Name == ".rel.plt")
+      RelaPlt = Section;
+    else if (Name == ".got.plt")
+      GotPlt = Section;
+  }
+  if (!Plt || !RelaPlt || !GotPlt)
+    return {};
+  StringRef PltContents;
+  if (Plt->getContents(PltContents))
+    return {};
+  ArrayRef<uint8_t> PltBytes((const uint8_t *)PltContents.data(),
+                             Plt->getSize());
+  auto PltEntries = MIA->findPltEntries(Plt->getAddress(), PltBytes,
+                                        GotPlt->getAddress(), Triple);
+  // Build a map from GOT entry virtual address to PLT entry virtual address.
+  DenseMap<uint64_t, uint64_t> GotToPlt;
+  for (const auto &Entry : PltEntries)
+    GotToPlt.insert(std::make_pair(Entry.second, Entry.first));
+  // Find the relocations in the dynamic relocation table that point to
+  // locations in the GOT for which we know the corresponding PLT entry.
+  std::vector<std::pair<DataRefImpl, uint64_t>> Result;
+  for (const auto &Relocation : RelaPlt->relocations()) {
+    if (Relocation.getType() != JumpSlotReloc)
+      continue;
+    auto PltEntryIter = GotToPlt.find(Relocation.getOffset());
+    if (PltEntryIter != GotToPlt.end())
+      Result.push_back(std::make_pair(
+          Relocation.getSymbol()->getRawDataRefImpl(), PltEntryIter->second));
+  }
+  return Result;
+}
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
index 7d43a84f3e0e..6fa23e06c409 100644
--- a/contrib/llvm/lib/Object/Error.cpp
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -57,6 +57,7 @@ std::string _object_error_category::message(int EV) const {
                    "defined.");
 }
 
+void BinaryError::anchor() {}
 char BinaryError::ID = 0;
 char GenericBinaryError::ID = 0;
 
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index e422903f2805..ce4d1cf92e20 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -1592,8 +1592,8 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
                            "command extends past the end of the symbol table");
       return;
     }
-    if (Dysymtab.nextdefsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) {
-      Err = malformedError("nextdefsym in LC_DYSYMTAB load command "
+    if (Dysymtab.nextdefsym != 0 && Dysymtab.iextdefsym > Symtab.nsyms) {
+      Err = malformedError("iextdefsym in LC_DYSYMTAB load command "
                            "extends past the end of the symbol table");
       return;
     }
@@ -1606,7 +1606,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
       return;
     }
     if (Dysymtab.nundefsym != 0 && Dysymtab.iundefsym > Symtab.nsyms) {
-      Err = malformedError("nundefsym in LC_DYSYMTAB load command "
+      Err = malformedError("iundefsym in LC_DYSYMTAB load command "
                            "extends past the end of the symbol table");
       return;
     }
@@ -2438,7 +2438,7 @@ basic_symbol_iterator MachOObjectFile::symbol_end() const {
   return basic_symbol_iterator(SymbolRef(DRI, this));
 }
 
-basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
+symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
   MachO::symtab_command Symtab = getSymtabLoadCommand();
   if (!SymtabLoadCmd || Index >= Symtab.nsyms)
     report_fatal_error("Requested symbol index is out of range.");
diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
index b353ef3c835b..33ce7d8109fb 100644
--- a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -100,6 +100,7 @@ initializeRecordStreamer(const Module &M,
   MCObjectFileInfo MOFI;
   MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
   MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, MCCtx);
+  MOFI.setSDKVersion(M.getSDKVersion());
   RecordStreamer Streamer(MCCtx, M);
   T->createNullTargetStreamer(Streamer);
 
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
index 5fd823e0117e..f5de2e1d5ce2 100644
--- a/contrib/llvm/lib/Object/Object.cpp
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -105,7 +105,7 @@ void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
   if (!SecOrErr) {
    std::string Buf;
    raw_string_ostream OS(Buf);
-   logAllUnhandledErrors(SecOrErr.takeError(), OS, "");
+   logAllUnhandledErrors(SecOrErr.takeError(), OS);
    OS.flush();
    report_fatal_error(Buf);
   }
@@ -187,7 +187,7 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
   if (!Ret) {
     std::string Buf;
     raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(Ret.takeError(), OS, "");
+    logAllUnhandledErrors(Ret.takeError(), OS);
     OS.flush();
     report_fatal_error(Buf);
   }
@@ -199,7 +199,7 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
   if (!Ret) {
     std::string Buf;
     raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(Ret.takeError(), OS, "");
+    logAllUnhandledErrors(Ret.takeError(), OS);
     OS.flush();
     report_fatal_error(Buf);
   }
@@ -229,7 +229,7 @@ const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
   SmallVector<char, 0> ret;
   (*unwrap(RI))->getTypeName(ret);
   char *str = static_cast<char*>(safe_malloc(ret.size()));
-  std::copy(ret.begin(), ret.end(), str);
+  llvm::copy(ret, str);
   return str;
 }
 
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index db0ff220c4d8..cf63b89adc12 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -77,6 +77,14 @@ bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
 
 bool ObjectFile::isSectionStripped(DataRefImpl Sec) const { return false; }
 
+bool ObjectFile::isBerkeleyText(DataRefImpl Sec) const {
+  return isSectionText(Sec);
+}
+
+bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const {
+  return isSectionData(Sec);
+}
+
 section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
   return section_iterator(SectionRef(Sec, this));
 }
diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp
index 4d4c887b2d97..d84cb48c9fbd 100644
--- a/contrib/llvm/lib/Object/WasmObjectFile.cpp
+++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -37,8 +38,8 @@ using namespace object;
 
 void WasmSymbol::print(raw_ostream &Out) const {
   Out << "Name=" << Info.Name
-  << ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind))
-  << ", Flags=" << Info.Flags;
+      << ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind))
+      << ", Flags=" << Info.Flags;
   if (!isTypeData()) {
     Out << ", ElemIndex=" << Info.ElementIndex;
   } else if (isDefined()) {
@@ -62,9 +63,9 @@ ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) {
   return std::move(ObjectFile);
 }
 
-#define VARINT7_MAX ((1<<7)-1)
-#define VARINT7_MIN (-(1<<7))
-#define VARUINT7_MAX (1<<7)
+#define VARINT7_MAX ((1 << 7) - 1)
+#define VARINT7_MIN (-(1 << 7))
+#define VARUINT7_MAX (1 << 7)
 #define VARUINT1_MAX (1)
 
 static uint8_t readUint8(WasmObjectFile::ReadContext &Ctx) {
@@ -82,6 +83,8 @@ static uint32_t readUint32(WasmObjectFile::ReadContext &Ctx) {
 }
 
 static int32_t readFloat32(WasmObjectFile::ReadContext &Ctx) {
+  if (Ctx.Ptr + 4 > Ctx.End)
+    report_fatal_error("EOF while reading float64");
   int32_t Result = 0;
   memcpy(&Result, Ctx.Ptr, sizeof(Result));
   Ctx.Ptr += sizeof(Result);
@@ -89,6 +92,8 @@ static int32_t readFloat32(WasmObjectFile::ReadContext &Ctx) {
 }
 
 static int64_t readFloat64(WasmObjectFile::ReadContext &Ctx) {
+  if (Ctx.Ptr + 8 > Ctx.End)
+    report_fatal_error("EOF while reading float64");
   int64_t Result = 0;
   memcpy(&Result, Ctx.Ptr, sizeof(Result));
   Ctx.Ptr += sizeof(Result);
@@ -97,7 +102,7 @@ static int64_t readFloat64(WasmObjectFile::ReadContext &Ctx) {
 
 static uint64_t readULEB128(WasmObjectFile::ReadContext &Ctx) {
   unsigned Count;
-  const char* Error = nullptr;
+  const char *Error = nullptr;
   uint64_t Result = decodeULEB128(Ctx.Ptr, &Count, Ctx.End, &Error);
   if (Error)
     report_fatal_error(Error);
@@ -117,7 +122,7 @@ static StringRef readString(WasmObjectFile::ReadContext &Ctx) {
 
 static int64_t readLEB128(WasmObjectFile::ReadContext &Ctx) {
   unsigned Count;
-  const char* Error = nullptr;
+  const char *Error = nullptr;
   uint64_t Result = decodeSLEB128(Ctx.Ptr, &Count, Ctx.End, &Error);
   if (Error)
     report_fatal_error(Error);
@@ -171,7 +176,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
   case wasm::WASM_OPCODE_F64_CONST:
     Expr.Value.Float64 = readFloat64(Ctx);
     break;
-  case wasm::WASM_OPCODE_GET_GLOBAL:
+  case wasm::WASM_OPCODE_GLOBAL_GET:
     Expr.Value.Global = readULEB128(Ctx);
     break;
   default:
@@ -189,7 +194,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
 
 static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) {
   wasm::WasmLimits Result;
-  Result.Flags = readVaruint1(Ctx);
+  Result.Flags = readVaruint32(Ctx);
   Result.Initial = readVaruint32(Ctx);
   if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
     Result.Maximum = readVaruint32(Ctx);
@@ -203,8 +208,8 @@ static wasm::WasmTable readTable(WasmObjectFile::ReadContext &Ctx) {
   return Table;
 }
 
-static Error readSection(WasmSection &Section,
-                         WasmObjectFile::ReadContext &Ctx) {
+static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
+                         WasmSectionOrderChecker &Checker) {
   Section.Offset = Ctx.Ptr - Ctx.Start;
   Section.Type = readUint8(Ctx);
   LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
@@ -216,10 +221,24 @@ static Error readSection(WasmSection &Section,
     return make_error<StringError>("Section too large",
                                    object_error::parse_failed);
   if (Section.Type == wasm::WASM_SEC_CUSTOM) {
-    const uint8_t *NameStart = Ctx.Ptr;
-    Section.Name = readString(Ctx);
-    Size -= Ctx.Ptr - NameStart;
+    WasmObjectFile::ReadContext SectionCtx;
+    SectionCtx.Start = Ctx.Ptr;
+    SectionCtx.Ptr = Ctx.Ptr;
+    SectionCtx.End = Ctx.Ptr + Size;
+
+    Section.Name = readString(SectionCtx);
+
+    uint32_t SectionNameSize = SectionCtx.Ptr - SectionCtx.Start;
+    Ctx.Ptr += SectionNameSize;
+    Size -= SectionNameSize;
+  }
+
+  if (!Checker.isValidSectionOrder(Section.Type, Section.Name)) {
+    return make_error<StringError>("Out of order section type: " +
+                                       llvm::to_string(Section.Type),
+                                   object_error::parse_failed);
   }
+
   Section.Content = ArrayRef<uint8_t>(Ctx.Ptr, Size);
   Ctx.Ptr += Size;
   return Error::success();
@@ -230,8 +249,8 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
   ErrorAsOutParameter ErrAsOutParam(&Err);
   Header.Magic = getData().substr(0, 4);
   if (Header.Magic != StringRef("\0asm", 4)) {
-    Err = make_error<StringError>("Bad magic number",
-                                  object_error::parse_failed);
+    Err =
+        make_error<StringError>("Bad magic number", object_error::parse_failed);
     return;
   }
 
@@ -254,8 +273,9 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
   }
 
   WasmSection Sec;
+  WasmSectionOrderChecker Checker;
   while (Ctx.Ptr < Ctx.End) {
-    if ((Err = readSection(Sec, Ctx)))
+    if ((Err = readSection(Sec, Ctx, Checker)))
       return;
     if ((Err = parseSection(Sec)))
       return;
@@ -284,6 +304,8 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
     return parseMemorySection(Ctx);
   case wasm::WASM_SEC_GLOBAL:
     return parseGlobalSection(Ctx);
+  case wasm::WASM_SEC_EVENT:
+    return parseEventSection(Ctx);
   case wasm::WASM_SEC_EXPORT:
     return parseExportSection(Ctx);
   case wasm::WASM_SEC_START:
@@ -300,6 +322,22 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
   }
 }
 
+Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
+  // See https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+  DylinkInfo.MemorySize = readVaruint32(Ctx);
+  DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
+  DylinkInfo.TableSize = readVaruint32(Ctx);
+  DylinkInfo.TableAlignment = readVaruint32(Ctx);
+  uint32_t Count = readVaruint32(Ctx);
+  while (Count--) {
+    DylinkInfo.Needed.push_back(readString(Ctx));
+  }
+  if (Ctx.Ptr != Ctx.End)
+    return make_error<GenericBinaryError>("dylink section ended prematurely",
+                                          object_error::parse_failed);
+  return Error::success();
+}
+
 Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
   llvm::DenseSet<uint64_t> Seen;
   if (Functions.size() != FunctionTypes.size()) {
@@ -336,8 +374,8 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
       break;
     }
     if (Ctx.Ptr != SubSectionEnd)
-      return make_error<GenericBinaryError>("Name sub-section ended prematurely",
-                                            object_error::parse_failed);
+      return make_error<GenericBinaryError>(
+          "Name sub-section ended prematurely", object_error::parse_failed);
   }
 
   if (Ctx.Ptr != Ctx.End)
@@ -350,7 +388,8 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
   HasLinkingSection = true;
   if (Functions.size() != FunctionTypes.size()) {
     return make_error<GenericBinaryError>(
-        "Linking data must come after code section", object_error::parse_failed);
+        "Linking data must come after code section",
+        object_error::parse_failed);
   }
 
   LinkingData.Version = readVaruint32(Ctx);
@@ -427,19 +466,24 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
 
   std::vector<wasm::WasmImport *> ImportedGlobals;
   std::vector<wasm::WasmImport *> ImportedFunctions;
+  std::vector<wasm::WasmImport *> ImportedEvents;
   ImportedGlobals.reserve(Imports.size());
   ImportedFunctions.reserve(Imports.size());
+  ImportedEvents.reserve(Imports.size());
   for (auto &I : Imports) {
     if (I.Kind == wasm::WASM_EXTERNAL_FUNCTION)
       ImportedFunctions.emplace_back(&I);
     else if (I.Kind == wasm::WASM_EXTERNAL_GLOBAL)
       ImportedGlobals.emplace_back(&I);
+    else if (I.Kind == wasm::WASM_EXTERNAL_EVENT)
+      ImportedEvents.emplace_back(&I);
   }
 
   while (Count--) {
     wasm::WasmSymbolInfo Info;
-    const wasm::WasmSignature *FunctionType = nullptr;
+    const wasm::WasmSignature *Signature = nullptr;
     const wasm::WasmGlobalType *GlobalType = nullptr;
+    const wasm::WasmEventType *EventType = nullptr;
 
     Info.Kind = readUint8(Ctx);
     Info.Flags = readVaruint32(Ctx);
@@ -455,13 +499,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
       if (IsDefined) {
         Info.Name = readString(Ctx);
         unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
-        FunctionType = &Signatures[FunctionTypes[FuncIndex]];
+        Signature = &Signatures[FunctionTypes[FuncIndex]];
         wasm::WasmFunction &Function = Functions[FuncIndex];
         if (Function.SymbolName.empty())
           Function.SymbolName = Info.Name;
       } else {
         wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex];
-        FunctionType = &Signatures[Import.SigIndex];
+        Signature = &Signatures[Import.SigIndex];
         Info.Name = Import.Field;
         Info.Module = Import.Module;
       }
@@ -473,9 +517,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
           IsDefined != isDefinedGlobalIndex(Info.ElementIndex))
         return make_error<GenericBinaryError>("invalid global symbol index",
                                               object_error::parse_failed);
-      if (!IsDefined &&
-          (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
-              wasm::WASM_SYMBOL_BINDING_WEAK)
+      if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
+                            wasm::WASM_SYMBOL_BINDING_WEAK)
         return make_error<GenericBinaryError>("undefined weak global symbol",
                                               object_error::parse_failed);
       if (IsDefined) {
@@ -521,6 +564,34 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
       break;
     }
 
+    case wasm::WASM_SYMBOL_TYPE_EVENT: {
+      Info.ElementIndex = readVaruint32(Ctx);
+      if (!isValidEventIndex(Info.ElementIndex) ||
+          IsDefined != isDefinedEventIndex(Info.ElementIndex))
+        return make_error<GenericBinaryError>("invalid event symbol index",
+                                              object_error::parse_failed);
+      if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
+                            wasm::WASM_SYMBOL_BINDING_WEAK)
+        return make_error<GenericBinaryError>("undefined weak global symbol",
+                                              object_error::parse_failed);
+      if (IsDefined) {
+        Info.Name = readString(Ctx);
+        unsigned EventIndex = Info.ElementIndex - NumImportedEvents;
+        wasm::WasmEvent &Event = Events[EventIndex];
+        Signature = &Signatures[Event.Type.SigIndex];
+        EventType = &Event.Type;
+        if (Event.SymbolName.empty())
+          Event.SymbolName = Info.Name;
+
+      } else {
+        wasm::WasmImport &Import = *ImportedEvents[Info.ElementIndex];
+        EventType = &Import.Event;
+        Signature = &Signatures[EventType->SigIndex];
+        Info.Name = Import.Field;
+      }
+      break;
+    }
+
     default:
       return make_error<GenericBinaryError>("Invalid symbol type",
                                             object_error::parse_failed);
@@ -533,8 +604,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
                                                 Twine(Info.Name),
                                             object_error::parse_failed);
     LinkingData.SymbolTable.emplace_back(Info);
-    Symbols.emplace_back(LinkingData.SymbolTable.back(), FunctionType,
-                         GlobalType);
+    Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, EventType,
+                         Signature);
     LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
   }
 
@@ -547,7 +618,8 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
   for (unsigned ComdatIndex = 0; ComdatIndex < ComdatCount; ++ComdatIndex) {
     StringRef Name = readString(Ctx);
     if (Name.empty() || !ComdatSet.insert(Name).second)
-      return make_error<GenericBinaryError>("Bad/duplicate COMDAT name " + Twine(Name),
+      return make_error<GenericBinaryError>("Bad/duplicate COMDAT name " +
+                                                Twine(Name),
                                             object_error::parse_failed);
     LinkingData.Comdats.emplace_back(Name);
     uint32_t Flags = readVaruint32(Ctx);
@@ -565,8 +637,8 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
                                               object_error::parse_failed);
       case wasm::WASM_COMDAT_DATA:
         if (Index >= DataSegments.size())
-          return make_error<GenericBinaryError>("COMDAT data index out of range",
-                                                object_error::parse_failed);
+          return make_error<GenericBinaryError>(
+              "COMDAT data index out of range", object_error::parse_failed);
         if (DataSegments[Index].Data.Comdat != UINT32_MAX)
           return make_error<GenericBinaryError>("Data segment in two COMDATs",
                                                 object_error::parse_failed);
@@ -574,8 +646,8 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
         break;
       case wasm::WASM_COMDAT_FUNCTION:
         if (!isDefinedFunctionIndex(Index))
-          return make_error<GenericBinaryError>("COMDAT function index out of range",
-                                                object_error::parse_failed);
+          return make_error<GenericBinaryError>(
+              "COMDAT function index out of range", object_error::parse_failed);
         if (getDefinedFunction(Index).Comdat != UINT32_MAX)
           return make_error<GenericBinaryError>("Function in two COMDATs",
                                                 object_error::parse_failed);
@@ -592,13 +664,18 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
   if (SectionIndex >= Sections.size())
     return make_error<GenericBinaryError>("Invalid section index",
                                           object_error::parse_failed);
-  WasmSection& Section = Sections[SectionIndex];
+  WasmSection &Section = Sections[SectionIndex];
   uint32_t RelocCount = readVaruint32(Ctx);
   uint32_t EndOffset = Section.Content.size();
+  uint32_t PreviousOffset = 0;
   while (RelocCount--) {
     wasm::WasmRelocation Reloc = {};
     Reloc.Type = readVaruint32(Ctx);
     Reloc.Offset = readVaruint32(Ctx);
+    if (Reloc.Offset < PreviousOffset)
+      return make_error<GenericBinaryError>("Relocations not in offset order",
+                                            object_error::parse_failed);
+    PreviousOffset = Reloc.Offset;
     Reloc.Index = readVaruint32(Ctx);
     switch (Reloc.Type) {
     case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
@@ -618,6 +695,11 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
         return make_error<GenericBinaryError>("Bad relocation global index",
                                               object_error::parse_failed);
       break;
+    case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+      if (!isValidEventSymbol(Reloc.Index))
+        return make_error<GenericBinaryError>("Bad relocation event index",
+                                              object_error::parse_failed);
+      break;
     case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
     case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
     case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
@@ -666,7 +748,10 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
 }
 
 Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
-  if (Sec.Name == "name") {
+  if (Sec.Name == "dylink") {
+    if (Error Err = parseDylinkSection(Ctx))
+      return Err;
+  } else if (Sec.Name == "name") {
     if (Error Err = parseNameSection(Ctx))
       return Err;
   } else if (Sec.Name == "linking") {
@@ -684,17 +769,16 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
   Signatures.reserve(Count);
   while (Count--) {
     wasm::WasmSignature Sig;
-    Sig.ReturnType = wasm::WASM_TYPE_NORESULT;
     uint8_t Form = readUint8(Ctx);
     if (Form != wasm::WASM_TYPE_FUNC) {
       return make_error<GenericBinaryError>("Invalid signature type",
                                             object_error::parse_failed);
     }
     uint32_t ParamCount = readVaruint32(Ctx);
-    Sig.ParamTypes.reserve(ParamCount);
+    Sig.Params.reserve(ParamCount);
     while (ParamCount--) {
       uint32_t ParamType = readUint8(Ctx);
-      Sig.ParamTypes.push_back(ParamType);
+      Sig.Params.push_back(wasm::ValType(ParamType));
     }
     uint32_t ReturnCount = readVaruint32(Ctx);
     if (ReturnCount) {
@@ -702,9 +786,9 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
         return make_error<GenericBinaryError>(
             "Multiple return types not supported", object_error::parse_failed);
       }
-      Sig.ReturnType = readUint8(Ctx);
+      Sig.Returns.push_back(wasm::ValType(readUint8(Ctx)));
     }
-    Signatures.push_back(Sig);
+    Signatures.push_back(std::move(Sig));
   }
   if (Ctx.Ptr != Ctx.End)
     return make_error<GenericBinaryError>("Type section ended prematurely",
@@ -735,13 +819,18 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
       break;
     case wasm::WASM_EXTERNAL_TABLE:
       Im.Table = readTable(Ctx);
-      if (Im.Table.ElemType != wasm::WASM_TYPE_ANYFUNC)
+      if (Im.Table.ElemType != wasm::WASM_TYPE_FUNCREF)
         return make_error<GenericBinaryError>("Invalid table element type",
                                               object_error::parse_failed);
       break;
+    case wasm::WASM_EXTERNAL_EVENT:
+      NumImportedEvents++;
+      Im.Event.Attribute = readVarint32(Ctx);
+      Im.Event.SigIndex = readVarint32(Ctx);
+      break;
     default:
-      return make_error<GenericBinaryError>(
-          "Unexpected import kind", object_error::parse_failed);
+      return make_error<GenericBinaryError>("Unexpected import kind",
+                                            object_error::parse_failed);
     }
     Imports.push_back(Im);
   }
@@ -773,7 +862,7 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) {
   Tables.reserve(Count);
   while (Count--) {
     Tables.push_back(readTable(Ctx));
-    if (Tables.back().ElemType != wasm::WASM_TYPE_ANYFUNC) {
+    if (Tables.back().ElemType != wasm::WASM_TYPE_FUNCREF) {
       return make_error<GenericBinaryError>("Invalid table element type",
                                             object_error::parse_failed);
     }
@@ -815,6 +904,24 @@ Error WasmObjectFile::parseGlobalSection(ReadContext &Ctx) {
   return Error::success();
 }
 
+Error WasmObjectFile::parseEventSection(ReadContext &Ctx) {
+  EventSection = Sections.size();
+  uint32_t Count = readVarint32(Ctx);
+  Events.reserve(Count);
+  while (Count--) {
+    wasm::WasmEvent Event;
+    Event.Index = NumImportedEvents + Events.size();
+    Event.Type.Attribute = readVaruint32(Ctx);
+    Event.Type.SigIndex = readVarint32(Ctx);
+    Events.push_back(Event);
+  }
+
+  if (Ctx.Ptr != Ctx.End)
+    return make_error<GenericBinaryError>("Event section ended prematurely",
+                                          object_error::parse_failed);
+  return Error::success();
+}
+
 Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
   uint32_t Count = readVaruint32(Ctx);
   Exports.reserve(Count);
@@ -834,12 +941,17 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
         return make_error<GenericBinaryError>("Invalid global export",
                                               object_error::parse_failed);
       break;
+    case wasm::WASM_EXTERNAL_EVENT:
+      if (!isValidEventIndex(Ex.Index))
+        return make_error<GenericBinaryError>("Invalid event export",
+                                              object_error::parse_failed);
+      break;
     case wasm::WASM_EXTERNAL_MEMORY:
     case wasm::WASM_EXTERNAL_TABLE:
       break;
     default:
-      return make_error<GenericBinaryError>(
-          "Unexpected export kind", object_error::parse_failed);
+      return make_error<GenericBinaryError>("Unexpected export kind",
+                                            object_error::parse_failed);
     }
     Exports.push_back(Ex);
   }
@@ -865,6 +977,14 @@ bool WasmObjectFile::isDefinedGlobalIndex(uint32_t Index) const {
   return Index >= NumImportedGlobals && isValidGlobalIndex(Index);
 }
 
+bool WasmObjectFile::isValidEventIndex(uint32_t Index) const {
+  return Index < NumImportedEvents + Events.size();
+}
+
+bool WasmObjectFile::isDefinedEventIndex(uint32_t Index) const {
+  return Index >= NumImportedEvents && isValidEventIndex(Index);
+}
+
 bool WasmObjectFile::isValidFunctionSymbol(uint32_t Index) const {
   return Index < Symbols.size() && Symbols[Index].isTypeFunction();
 }
@@ -873,6 +993,10 @@ bool WasmObjectFile::isValidGlobalSymbol(uint32_t Index) const {
   return Index < Symbols.size() && Symbols[Index].isTypeGlobal();
 }
 
+bool WasmObjectFile::isValidEventSymbol(uint32_t Index) const {
+  return Index < Symbols.size() && Symbols[Index].isTypeEvent();
+}
+
 bool WasmObjectFile::isValidDataSymbol(uint32_t Index) const {
   return Index < Symbols.size() && Symbols[Index].isTypeData();
 }
@@ -891,6 +1015,11 @@ wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) {
   return Globals[Index - NumImportedGlobals];
 }
 
+wasm::WasmEvent &WasmObjectFile::getDefinedEvent(uint32_t Index) {
+  assert(isDefinedEventIndex(Index));
+  return Events[Index - NumImportedEvents];
+}
+
 Error WasmObjectFile::parseStartSection(ReadContext &Ctx) {
   StartFunction = readVaruint32(Ctx);
   if (!isValidFunctionIndex(StartFunction))
@@ -1050,10 +1179,11 @@ Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
   return getSymbolValue(Symb);
 }
 
-uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol& Sym) const {
+uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
   switch (Sym.Info.Kind) {
   case wasm::WASM_SYMBOL_TYPE_FUNCTION:
   case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+  case wasm::WASM_SYMBOL_TYPE_EVENT:
     return Sym.Info.ElementIndex;
   case wasm::WASM_SYMBOL_TYPE_DATA: {
     // The value of a data symbol is the segment offset, plus the symbol
@@ -1096,6 +1226,8 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const {
     return SymbolRef::ST_Data;
   case wasm::WASM_SYMBOL_TYPE_SECTION:
     return SymbolRef::ST_Debug;
+  case wasm::WASM_SYMBOL_TYPE_EVENT:
+    return SymbolRef::ST_Other;
   }
 
   llvm_unreachable("Unknown WasmSymbol::SymbolType");
@@ -1104,7 +1236,7 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const {
 
 Expected<section_iterator>
 WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
-  const WasmSymbol& Sym = getWasmSymbol(Symb);
+  const WasmSymbol &Sym = getWasmSymbol(Symb);
   if (Sym.isUndefined())
     return section_end();
 
@@ -1119,10 +1251,12 @@ WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
   case wasm::WASM_SYMBOL_TYPE_DATA:
     Ref.d.a = DataSection;
     break;
-  case wasm::WASM_SYMBOL_TYPE_SECTION: {
+  case wasm::WASM_SYMBOL_TYPE_SECTION:
     Ref.d.a = Sym.Info.ElementIndex;
     break;
-  }
+  case wasm::WASM_SYMBOL_TYPE_EVENT:
+    Ref.d.a = EventSection;
+    break;
   default:
     llvm_unreachable("Unknown WasmSymbol::SymbolType");
   }
@@ -1145,6 +1279,7 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
     ECase(TABLE);
     ECase(MEMORY);
     ECase(GLOBAL);
+    ECase(EVENT);
     ECase(EXPORT);
     ECase(START);
     ECase(ELEM);
@@ -1218,9 +1353,7 @@ relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const {
   return relocation_iterator(RelocationRef(RelocRef, this));
 }
 
-void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
-  Rel.d.b++;
-}
+void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { Rel.d.b++; }
 
 uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const {
   const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
@@ -1244,12 +1377,12 @@ uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const {
 
 void WasmObjectFile::getRelocationTypeName(
     DataRefImpl Ref, SmallVectorImpl<char> &Result) const {
-  const wasm::WasmRelocation& Rel = getWasmRelocation(Ref);
+  const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
   StringRef Res = "Unknown";
 
-#define WASM_RELOC(name, value)  \
-  case wasm::name:              \
-    Res = #name;               \
+#define WASM_RELOC(name, value)                                                \
+  case wasm::name:                                                             \
+    Res = #name;                                                               \
     break;
 
   switch (Rel.Type) {
@@ -1283,9 +1416,9 @@ SubtargetFeatures WasmObjectFile::getFeatures() const {
   return SubtargetFeatures();
 }
 
-bool WasmObjectFile::isRelocatableObject() const {
-  return HasLinkingSection;
-}
+bool WasmObjectFile::isRelocatableObject() const { return HasLinkingSection; }
+
+bool WasmObjectFile::isSharedObject() const { return HasDylinkSection; }
 
 const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const {
   assert(Ref.d.a < Sections.size());
@@ -1305,7 +1438,62 @@ WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const {
 const wasm::WasmRelocation &
 WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const {
   assert(Ref.d.a < Sections.size());
-  const WasmSection& Sec = Sections[Ref.d.a];
+  const WasmSection &Sec = Sections[Ref.d.a];
   assert(Ref.d.b < Sec.Relocations.size());
   return Sec.Relocations[Ref.d.b];
 }
+
+int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
+                                             StringRef CustomSectionName) {
+  switch (ID) {
+  case wasm::WASM_SEC_CUSTOM:
+    return StringSwitch<unsigned>(CustomSectionName)
+        .Case("dylink", WASM_SEC_ORDER_DYLINK)
+        .Case("linking", WASM_SEC_ORDER_LINKING)
+        .StartsWith("reloc.", WASM_SEC_ORDER_RELOC)
+        .Case("name", WASM_SEC_ORDER_NAME)
+        .Case("producers", WASM_SEC_ORDER_PRODUCERS)
+        .Default(-1);
+  case wasm::WASM_SEC_TYPE:
+    return WASM_SEC_ORDER_TYPE;
+  case wasm::WASM_SEC_IMPORT:
+    return WASM_SEC_ORDER_IMPORT;
+  case wasm::WASM_SEC_FUNCTION:
+    return WASM_SEC_ORDER_FUNCTION;
+  case wasm::WASM_SEC_TABLE:
+    return WASM_SEC_ORDER_TABLE;
+  case wasm::WASM_SEC_MEMORY:
+    return WASM_SEC_ORDER_MEMORY;
+  case wasm::WASM_SEC_GLOBAL:
+    return WASM_SEC_ORDER_GLOBAL;
+  case wasm::WASM_SEC_EXPORT:
+    return WASM_SEC_ORDER_EXPORT;
+  case wasm::WASM_SEC_START:
+    return WASM_SEC_ORDER_START;
+  case wasm::WASM_SEC_ELEM:
+    return WASM_SEC_ORDER_ELEM;
+  case wasm::WASM_SEC_CODE:
+    return WASM_SEC_ORDER_CODE;
+  case wasm::WASM_SEC_DATA:
+    return WASM_SEC_ORDER_DATA;
+  case wasm::WASM_SEC_DATACOUNT:
+    return WASM_SEC_ORDER_DATACOUNT;
+  case wasm::WASM_SEC_EVENT:
+    return WASM_SEC_ORDER_EVENT;
+  default:
+    llvm_unreachable("invalid section");
+  }
+}
+
+bool WasmSectionOrderChecker::isValidSectionOrder(unsigned ID,
+                                                  StringRef CustomSectionName) {
+  int Order = getSectionOrder(ID, CustomSectionName);
+  if (Order == -1) // Skip unknown sections
+    return true;
+  // There can be multiple "reloc." sections. Otherwise there shouldn't be any
+  // duplicate section orders.
+  bool IsValid = (LastOrder == Order && Order == WASM_SEC_ORDER_RELOC) ||
+                 LastOrder < Order;
+  LastOrder = Order;
+  return IsValid;
+}
diff --git a/contrib/llvm/lib/Object/WindowsResource.cpp b/contrib/llvm/lib/Object/WindowsResource.cpp
index 1b7282f13db0..65413dd8bea1 100644
--- a/contrib/llvm/lib/Object/WindowsResource.cpp
+++ b/contrib/llvm/lib/Object/WindowsResource.cpp
@@ -259,7 +259,7 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef,
   std::vector<UTF16> EndianCorrectedName;
   if (sys::IsBigEndianHost) {
     EndianCorrectedName.resize(NameRef.size() + 1);
-    std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1);
+    llvm::copy(NameRef, EndianCorrectedName.begin() + 1);
     EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
     CorrectedName = makeArrayRef(EndianCorrectedName);
   } else
@@ -501,8 +501,7 @@ void WindowsResourceCOFFWriter::writeFirstSection() {
 void WindowsResourceCOFFWriter::writeSecondSection() {
   // Now write the .rsrc$02 section.
   for (auto const &RawDataEntry : Data) {
-    std::copy(RawDataEntry.begin(), RawDataEntry.end(),
-              BufferStart + CurrentOffset);
+    llvm::copy(RawDataEntry, BufferStart + CurrentOffset);
     CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t));
   }
 
@@ -672,7 +671,7 @@ void WindowsResourceCOFFWriter::writeDirectoryStringTable() {
     support::endian::write16le(BufferStart + CurrentOffset, Length);
     CurrentOffset += sizeof(uint16_t);
     auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset);
-    std::copy(String.begin(), String.end(), Start);
+    llvm::copy(String, Start);
     CurrentOffset += Length * sizeof(UTF16);
     TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t);
   }
diff --git a/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp b/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp
index 9351ef96beb2..fdd94f4054e1 100644
--- a/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -407,7 +407,8 @@ struct NDLLCharacteristics {
 void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
                                                   COFFYAML::Relocation &Rel) {
   IO.mapRequired("VirtualAddress", Rel.VirtualAddress);
-  IO.mapRequired("SymbolName", Rel.SymbolName);
+  IO.mapOptional("SymbolName", Rel.SymbolName, StringRef());
+  IO.mapOptional("SymbolTableIndex", Rel.SymbolTableIndex);
 
   COFF::header &H = *static_cast<COFF::header *>(IO.getContext());
   if (H.Machine == COFF::IMAGE_FILE_MACHINE_I386) {
diff --git a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 6debd8ab0c6e..4deeae878013 100644
--- a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -511,7 +511,7 @@ std::shared_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection(
     const codeview::StringsAndChecksums &SC) const {
   assert(SC.hasStrings());
 
-  auto Result = std::make_shared<DebugFrameDataSubsection>();
+  auto Result = std::make_shared<DebugFrameDataSubsection>(true);
   for (const auto &YF : Frames) {
     codeview::FrameData F;
     F.CodeSize = YF.CodeSize;
diff --git a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 745f79cd77f3..713e9a710e94 100644
--- a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -108,7 +108,7 @@ void ScalarBitSetTraits<ExportFlags>::bitset(IO &io, ExportFlags &Flags) {
 }
 
 void ScalarBitSetTraits<PublicSymFlags>::bitset(IO &io, PublicSymFlags &Flags) {
-  auto FlagNames = getProcSymFlagNames();
+  auto FlagNames = getPublicSymFlagNames();
   for (const auto &E : FlagNames) {
     io.bitSetCase(Flags, E.Name.str().c_str(),
                   static_cast<PublicSymFlags>(E.Value));
diff --git a/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
index f916b5d5f392..215d6bdd091e 100644
--- a/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -337,10 +337,18 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
     BCase(EF_HEXAGON_MACH_V3);
     BCase(EF_HEXAGON_MACH_V4);
     BCase(EF_HEXAGON_MACH_V5);
+    BCase(EF_HEXAGON_MACH_V55);
+    BCase(EF_HEXAGON_MACH_V60);
+    BCase(EF_HEXAGON_MACH_V62);
+    BCase(EF_HEXAGON_MACH_V65);
     BCase(EF_HEXAGON_ISA_V2);
     BCase(EF_HEXAGON_ISA_V3);
     BCase(EF_HEXAGON_ISA_V4);
     BCase(EF_HEXAGON_ISA_V5);
+    BCase(EF_HEXAGON_ISA_V55);
+    BCase(EF_HEXAGON_ISA_V60);
+    BCase(EF_HEXAGON_ISA_V62);
+    BCase(EF_HEXAGON_ISA_V65);
     break;
   case ELF::EM_AVR:
     BCase(EF_AVR_ARCH_AVR1);
@@ -402,7 +410,9 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX902, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
     BCase(EF_AMDGPU_XNACK);
+    BCase(EF_AMDGPU_SRAM_ECC);
     break;
   case ELF::EM_X86_64:
     break;
@@ -743,6 +753,7 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
   IO.mapRequired("Class", FileHdr.Class);
   IO.mapRequired("Data", FileHdr.Data);
   IO.mapOptional("OSABI", FileHdr.OSABI, ELFYAML::ELF_ELFOSABI(0));
+  IO.mapOptional("ABIVersion", FileHdr.ABIVersion, Hex8(0));
   IO.mapRequired("Type", FileHdr.Type);
   IO.mapRequired("Machine", FileHdr.Machine);
   IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0));
@@ -816,6 +827,7 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
   IO.mapOptional("Address", Section.Address, Hex64(0));
   IO.mapOptional("Link", Section.Link, StringRef());
   IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
+  IO.mapOptional("EntSize", Section.EntSize);
   IO.mapOptional("Info", Section.Info, StringRef());
 }
 
diff --git a/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp b/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp
index 3c20bb74d501..47bf853e0d3e 100644
--- a/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -48,6 +48,16 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
   IO.mapOptional("Relocations", Section.Relocations);
 }
 
+static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Name", Section.Name);
+  IO.mapRequired("MemorySize", Section.MemorySize);
+  IO.mapRequired("MemoryAlignment", Section.MemoryAlignment);
+  IO.mapRequired("TableSize", Section.TableSize);
+  IO.mapRequired("TableAlignment", Section.TableAlignment);
+  IO.mapRequired("Needed", Section.Needed);
+}
+
 static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
   commonSectionMapping(IO, Section);
   IO.mapRequired("Name", Section.Name);
@@ -100,6 +110,11 @@ static void sectionMapping(IO &IO, WasmYAML::GlobalSection &Section) {
   IO.mapOptional("Globals", Section.Globals);
 }
 
+static void sectionMapping(IO &IO, WasmYAML::EventSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapOptional("Events", Section.Events);
+}
+
 static void sectionMapping(IO &IO, WasmYAML::ExportSection &Section) {
   commonSectionMapping(IO, Section);
   IO.mapOptional("Exports", Section.Exports);
@@ -142,7 +157,11 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
     } else {
       IO.mapRequired("Name", SectionName);
     }
-    if (SectionName == "linking") {
+    if (SectionName == "dylink") {
+      if (!IO.outputting())
+        Section.reset(new WasmYAML::DylinkSection());
+      sectionMapping(IO, *cast<WasmYAML::DylinkSection>(Section.get()));
+    } else if (SectionName == "linking") {
       if (!IO.outputting())
         Section.reset(new WasmYAML::LinkingSection());
       sectionMapping(IO, *cast<WasmYAML::LinkingSection>(Section.get()));
@@ -187,6 +206,11 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
       Section.reset(new WasmYAML::GlobalSection());
     sectionMapping(IO, *cast<WasmYAML::GlobalSection>(Section.get()));
     break;
+  case wasm::WASM_SEC_EVENT:
+    if (!IO.outputting())
+      Section.reset(new WasmYAML::EventSection());
+    sectionMapping(IO, *cast<WasmYAML::EventSection>(Section.get()));
+    break;
   case wasm::WASM_SEC_EXPORT:
     if (!IO.outputting())
       Section.reset(new WasmYAML::ExportSection());
@@ -227,6 +251,7 @@ void ScalarEnumerationTraits<WasmYAML::SectionType>::enumeration(
   ECase(TABLE);
   ECase(MEMORY);
   ECase(GLOBAL);
+  ECase(EVENT);
   ECase(EXPORT);
   ECase(START);
   ECase(ELEM);
@@ -307,9 +332,12 @@ void MappingTraits<WasmYAML::Import>::mapping(IO &IO,
   } else if (Import.Kind == wasm::WASM_EXTERNAL_GLOBAL) {
     IO.mapRequired("GlobalType", Import.GlobalImport.Type);
     IO.mapRequired("GlobalMutable", Import.GlobalImport.Mutable);
+  } else if (Import.Kind == wasm::WASM_EXTERNAL_EVENT) {
+    IO.mapRequired("EventAttribute", Import.EventImport.Attribute);
+    IO.mapRequired("EventSigIndex", Import.EventImport.SigIndex);
   } else if (Import.Kind == wasm::WASM_EXTERNAL_TABLE) {
     IO.mapRequired("Table", Import.TableImport);
-  } else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY ) {
+  } else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY) {
     IO.mapRequired("Memory", Import.Memory);
   } else {
     llvm_unreachable("unhandled import type");
@@ -349,7 +377,7 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
   case wasm::WASM_OPCODE_F64_CONST:
     IO.mapRequired("Value", Expr.Value.Float64);
     break;
-  case wasm::WASM_OPCODE_GET_GLOBAL:
+  case wasm::WASM_OPCODE_GLOBAL_GET:
     IO.mapRequired("Index", Expr.Value.Global);
     break;
   }
@@ -383,8 +411,8 @@ void MappingTraits<WasmYAML::ComdatEntry>::mapping(
   IO.mapRequired("Index", ComdatEntry.Index);
 }
 
-void MappingTraits<WasmYAML::Comdat>::mapping(
-    IO &IO, WasmYAML::Comdat &Comdat) {
+void MappingTraits<WasmYAML::Comdat>::mapping(IO &IO,
+                                              WasmYAML::Comdat &Comdat) {
   IO.mapRequired("Name", Comdat.Name);
   IO.mapRequired("Entries", Comdat.Entries);
 }
@@ -399,6 +427,8 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
     IO.mapRequired("Function", Info.ElementIndex);
   } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_GLOBAL) {
     IO.mapRequired("Global", Info.ElementIndex);
+  } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_EVENT) {
+    IO.mapRequired("Event", Info.ElementIndex);
   } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA) {
     if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) {
       IO.mapRequired("Segment", Info.DataRef.Segment);
@@ -412,24 +442,31 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
   }
 }
 
+void MappingTraits<WasmYAML::Event>::mapping(IO &IO, WasmYAML::Event &Event) {
+  IO.mapRequired("Index", Event.Index);
+  IO.mapRequired("Attribute", Event.Attribute);
+  IO.mapRequired("SigIndex", Event.SigIndex);
+}
+
 void ScalarBitSetTraits<WasmYAML::LimitFlags>::bitset(
     IO &IO, WasmYAML::LimitFlags &Value) {
 #define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_LIMITS_FLAG_##X)
   BCase(HAS_MAX);
+  BCase(IS_SHARED);
 #undef BCase
 }
 
 void ScalarBitSetTraits<WasmYAML::SegmentFlags>::bitset(
-    IO &IO, WasmYAML::SegmentFlags &Value) {
-}
+    IO &IO, WasmYAML::SegmentFlags &Value) {}
 
 void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
     IO &IO, WasmYAML::SymbolFlags &Value) {
-#define BCaseMask(M, X) IO.maskedBitSetCase(Value, #X, wasm::WASM_SYMBOL_##X, wasm::WASM_SYMBOL_##M)
-  //BCaseMask(BINDING_MASK, BINDING_GLOBAL);
+#define BCaseMask(M, X)                                                        \
+  IO.maskedBitSetCase(Value, #X, wasm::WASM_SYMBOL_##X, wasm::WASM_SYMBOL_##M)
+  // BCaseMask(BINDING_MASK, BINDING_GLOBAL);
   BCaseMask(BINDING_MASK, BINDING_WEAK);
   BCaseMask(BINDING_MASK, BINDING_LOCAL);
-  //BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT);
+  // BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT);
   BCaseMask(VISIBILITY_MASK, VISIBILITY_HIDDEN);
   BCaseMask(UNDEFINED, UNDEFINED);
 #undef BCaseMask
@@ -442,6 +479,7 @@ void ScalarEnumerationTraits<WasmYAML::SymbolKind>::enumeration(
   ECase(DATA);
   ECase(GLOBAL);
   ECase(SECTION);
+  ECase(EVENT);
 #undef ECase
 }
 
@@ -452,7 +490,8 @@ void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration(
   ECase(I64);
   ECase(F32);
   ECase(F64);
-  ECase(ANYFUNC);
+  ECase(V128);
+  ECase(FUNCREF);
   ECase(FUNC);
   ECase(NORESULT);
 #undef ECase
@@ -465,6 +504,7 @@ void ScalarEnumerationTraits<WasmYAML::ExportKind>::enumeration(
   ECase(TABLE);
   ECase(MEMORY);
   ECase(GLOBAL);
+  ECase(EVENT);
 #undef ECase
 }
 
@@ -476,14 +516,14 @@ void ScalarEnumerationTraits<WasmYAML::Opcode>::enumeration(
   ECase(I64_CONST);
   ECase(F64_CONST);
   ECase(F32_CONST);
-  ECase(GET_GLOBAL);
+  ECase(GLOBAL_GET);
 #undef ECase
 }
 
 void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
     IO &IO, WasmYAML::TableType &Type) {
 #define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
-  ECase(ANYFUNC);
+  ECase(FUNCREF);
 #undef ECase
 }
 
diff --git a/contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp b/contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp
new file mode 100644
index 000000000000..0478d2bfbfa6
--- /dev/null
+++ b/contrib/llvm/lib/OptRemarks/OptRemarksParser.cpp
@@ -0,0 +1,368 @@
+//===- OptRemarksParser.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for optimization remarks in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/OptRemarks.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+
+namespace {
+struct RemarkParser {
+  /// Source manager for better error messages.
+  SourceMgr SM;
+  /// Stream for yaml parsing.
+  yaml::Stream Stream;
+  /// Storage for the error stream.
+  std::string ErrorString;
+  /// The error stream.
+  raw_string_ostream ErrorStream;
+  /// Iterator in the YAML stream.
+  yaml::document_iterator DI;
+  /// The parsed remark (if any).
+  Optional<LLVMOptRemarkEntry> LastRemark;
+  /// Temporary parsing buffer for the arguments.
+  SmallVector<LLVMOptRemarkArg, 8> TmpArgs;
+  /// The state used by the parser to parse a remark entry. Invalidated with
+  /// every call to `parseYAMLElement`.
+  struct ParseState {
+    /// Temporary parsing buffer for the arguments.
+    SmallVectorImpl<LLVMOptRemarkArg> *Args;
+    StringRef Type;
+    StringRef Pass;
+    StringRef Name;
+    StringRef Function;
+    /// Optional.
+    Optional<StringRef> File;
+    Optional<unsigned> Line;
+    Optional<unsigned> Column;
+    Optional<unsigned> Hotness;
+
+    ParseState(SmallVectorImpl<LLVMOptRemarkArg> &Args) : Args(&Args) {}
+    /// Use Args only as a **temporary** buffer.
+    ~ParseState() { Args->clear(); }
+  };
+
+  ParseState State;
+
+  /// Set to `true` if we had any errors during parsing.
+  bool HadAnyErrors = false;
+
+  RemarkParser(StringRef Buf)
+      : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
+        DI(Stream.begin()), LastRemark(), TmpArgs(), State(TmpArgs) {
+    SM.setDiagHandler(RemarkParser::HandleDiagnostic, this);
+  }
+
+  /// Parse a YAML element.
+  Error parseYAMLElement(yaml::Document &Remark);
+
+private:
+  /// Parse one key to a string.
+  /// otherwise.
+  Error parseKey(StringRef &Result, yaml::KeyValueNode &Node);
+  /// Parse one value to a string.
+  Error parseValue(StringRef &Result, yaml::KeyValueNode &Node);
+  /// Parse one value to an unsigned.
+  Error parseValue(Optional<unsigned> &Result, yaml::KeyValueNode &Node);
+  /// Parse a debug location.
+  Error parseDebugLoc(Optional<StringRef> &File, Optional<unsigned> &Line,
+                      Optional<unsigned> &Column, yaml::KeyValueNode &Node);
+  /// Parse an argument.
+  Error parseArg(SmallVectorImpl<LLVMOptRemarkArg> &TmpArgs, yaml::Node &Node);
+
+  /// Handle a diagnostic from the YAML stream. Records the error in the
+  /// RemarkParser class.
+  static void HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
+    assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
+    auto *Parser = static_cast<RemarkParser *>(Ctx);
+    Diag.print(/*ProgName=*/nullptr, Parser->ErrorStream, /*ShowColors*/ false,
+               /*ShowKindLabels*/ true);
+  }
+};
+
+class ParseError : public ErrorInfo<ParseError> {
+public:
+  static char ID;
+
+  ParseError(StringRef Message, yaml::Node &Node)
+      : Message(Message), Node(Node) {}
+
+  void log(raw_ostream &OS) const override { OS << Message; }
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+
+  StringRef getMessage() const { return Message; }
+  yaml::Node &getNode() const { return Node; }
+
+private:
+  StringRef Message; // No need to hold a full copy of the buffer.
+  yaml::Node &Node;
+};
+
+char ParseError::ID = 0;
+
+static LLVMOptRemarkStringRef toOptRemarkStr(StringRef Str) {
+  return {Str.data(), static_cast<uint32_t>(Str.size())};
+}
+
+Error RemarkParser::parseKey(StringRef &Result, yaml::KeyValueNode &Node) {
+  auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey());
+  if (!Key)
+    return make_error<ParseError>("key is not a string.", Node);
+
+  Result = Key->getRawValue();
+  return Error::success();
+}
+
+Error RemarkParser::parseValue(StringRef &Result, yaml::KeyValueNode &Node) {
+  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  if (!Value)
+    return make_error<ParseError>("expected a value of scalar type.", Node);
+  Result = Value->getRawValue();
+
+  if (Result.front() == '\'')
+    Result = Result.drop_front();
+
+  if (Result.back() == '\'')
+    Result = Result.drop_back();
+
+  return Error::success();
+}
+
+Error RemarkParser::parseValue(Optional<unsigned> &Result,
+                               yaml::KeyValueNode &Node) {
+  SmallVector<char, 4> Tmp;
+  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  if (!Value)
+    return make_error<ParseError>("expected a value of scalar type.", Node);
+  unsigned UnsignedValue = 0;
+  if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
+    return make_error<ParseError>("expected a value of integer type.", *Value);
+  Result = UnsignedValue;
+  return Error::success();
+}
+
+Error RemarkParser::parseDebugLoc(Optional<StringRef> &File,
+                                  Optional<unsigned> &Line,
+                                  Optional<unsigned> &Column,
+                                  yaml::KeyValueNode &Node) {
+  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
+  if (!DebugLoc)
+    return make_error<ParseError>("expected a value of mapping type.", Node);
+
+  for (yaml::KeyValueNode &DLNode : *DebugLoc) {
+    StringRef KeyName;
+    if (Error E = parseKey(KeyName, DLNode))
+      return E;
+    if (KeyName == "File") {
+      File = StringRef(); // Set the optional to contain a default constructed
+                          // value, to be passed to the parsing function.
+      if (Error E = parseValue(*File, DLNode))
+        return E;
+    } else if (KeyName == "Column") {
+      if (Error E = parseValue(Column, DLNode))
+        return E;
+    } else if (KeyName == "Line") {
+      if (Error E = parseValue(Line, DLNode))
+        return E;
+    } else {
+      return make_error<ParseError>("unknown entry in DebugLoc map.", DLNode);
+    }
+  }
+
+  // If any of the debug loc fields is missing, return an error.
+  if (!File || !Line || !Column)
+    return make_error<ParseError>("DebugLoc node incomplete.", Node);
+
+  return Error::success();
+}
+
+Error RemarkParser::parseArg(SmallVectorImpl<LLVMOptRemarkArg> &Args,
+                             yaml::Node &Node) {
+  auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
+  if (!ArgMap)
+    return make_error<ParseError>("expected a value of mapping type.", Node);
+
+  StringRef ValueStr;
+  StringRef KeyStr;
+  Optional<StringRef> File;
+  Optional<unsigned> Line;
+  Optional<unsigned> Column;
+
+  for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
+    StringRef KeyName;
+    if (Error E = parseKey(KeyName, ArgEntry))
+      return E;
+
+    // Try to parse debug locs.
+    if (KeyName == "DebugLoc") {
+      // Can't have multiple DebugLoc entries per argument.
+      if (File || Line || Column)
+        return make_error<ParseError>(
+            "only one DebugLoc entry is allowed per argument.", ArgEntry);
+
+      if (Error E = parseDebugLoc(File, Line, Column, ArgEntry))
+        return E;
+      continue;
+    }
+
+    // If we already have a string, error out.
+    if (!ValueStr.empty())
+      return make_error<ParseError>(
+          "only one string entry is allowed per argument.", ArgEntry);
+
+    // Try to parse a string.
+    if (Error E = parseValue(ValueStr, ArgEntry))
+      return E;
+
+    // Keep the key from the string.
+    KeyStr = KeyName;
+  }
+
+  if (KeyStr.empty())
+    return make_error<ParseError>("argument key is missing.", *ArgMap);
+  if (ValueStr.empty())
+    return make_error<ParseError>("argument value is missing.", *ArgMap);
+
+  Args.push_back(LLVMOptRemarkArg{
+      toOptRemarkStr(KeyStr), toOptRemarkStr(ValueStr),
+      LLVMOptRemarkDebugLoc{toOptRemarkStr(File.getValueOr(StringRef())),
+                            Line.getValueOr(0), Column.getValueOr(0)}});
+
+  return Error::success();
+}
+
+Error RemarkParser::parseYAMLElement(yaml::Document &Remark) {
+  // Parsing a new remark, clear the previous one.
+  LastRemark = None;
+  State = ParseState(TmpArgs);
+
+  auto *Root = dyn_cast<yaml::MappingNode>(Remark.getRoot());
+  if (!Root)
+    return make_error<ParseError>("document root is not of mapping type.",
+                                  *Remark.getRoot());
+
+  State.Type = Root->getRawTag();
+
+  for (yaml::KeyValueNode &RemarkField : *Root) {
+    StringRef KeyName;
+    if (Error E = parseKey(KeyName, RemarkField))
+      return E;
+
+    if (KeyName == "Pass") {
+      if (Error E = parseValue(State.Pass, RemarkField))
+        return E;
+    } else if (KeyName == "Name") {
+      if (Error E = parseValue(State.Name, RemarkField))
+        return E;
+    } else if (KeyName == "Function") {
+      if (Error E = parseValue(State.Function, RemarkField))
+        return E;
+    } else if (KeyName == "Hotness") {
+      if (Error E = parseValue(State.Hotness, RemarkField))
+        return E;
+    } else if (KeyName == "DebugLoc") {
+      if (Error E =
+              parseDebugLoc(State.File, State.Line, State.Column, RemarkField))
+        return E;
+    } else if (KeyName == "Args") {
+      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+      if (!Args)
+        return make_error<ParseError>("wrong value type for key.", RemarkField);
+
+      for (yaml::Node &Arg : *Args)
+        if (Error E = parseArg(*State.Args, Arg))
+          return E;
+    } else {
+      return make_error<ParseError>("unknown key.", RemarkField);
+    }
+  }
+
+  // If the YAML parsing failed, don't even continue parsing. We might
+  // encounter malformed YAML.
+  if (Stream.failed())
+    return make_error<ParseError>("YAML parsing failed.", *Remark.getRoot());
+
+  // Check if any of the mandatory fields are missing.
+  if (State.Type.empty() || State.Pass.empty() || State.Name.empty() ||
+      State.Function.empty())
+    return make_error<ParseError>("Type, Pass, Name or Function missing.",
+                                  *Remark.getRoot());
+
+  LastRemark = LLVMOptRemarkEntry{
+      toOptRemarkStr(State.Type),
+      toOptRemarkStr(State.Pass),
+      toOptRemarkStr(State.Name),
+      toOptRemarkStr(State.Function),
+      LLVMOptRemarkDebugLoc{toOptRemarkStr(State.File.getValueOr(StringRef())),
+                            State.Line.getValueOr(0),
+                            State.Column.getValueOr(0)},
+      State.Hotness.getValueOr(0),
+      static_cast<uint32_t>(State.Args->size()),
+      State.Args->data()};
+
+  return Error::success();
+}
+} // namespace
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RemarkParser, LLVMOptRemarkParserRef)
+
+extern "C" LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
+                                                            uint64_t Size) {
+  return wrap(
+      new RemarkParser(StringRef(static_cast<const char *>(Buf), Size)));
+}
+
+extern "C" LLVMOptRemarkEntry *
+LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser) {
+  RemarkParser &TheParser = *unwrap(Parser);
+  // Check for EOF.
+  if (TheParser.HadAnyErrors || TheParser.DI == TheParser.Stream.end())
+    return nullptr;
+
+  // Try to parse an entry.
+  if (Error E = TheParser.parseYAMLElement(*TheParser.DI)) {
+    handleAllErrors(std::move(E), [&](const ParseError &PE) {
+      TheParser.Stream.printError(&PE.getNode(),
+                                  Twine(PE.getMessage()) + Twine('\n'));
+      TheParser.HadAnyErrors = true;
+    });
+    return nullptr;
+  }
+
+  // Move on.
+  ++TheParser.DI;
+
+  // Return the just-parsed remark.
+  if (Optional<LLVMOptRemarkEntry> &Entry = TheParser.LastRemark)
+    return &*Entry;
+  return nullptr;
+}
+
+extern "C" LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser) {
+  return unwrap(Parser)->HadAnyErrors;
+}
+
+extern "C" const char *
+LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser) {
+  return unwrap(Parser)->ErrorStream.str().c_str();
+}
+
+extern "C" void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser) {
+  delete unwrap(Parser);
+}
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
index 022b9d5d933e..312ff7808759 100644
--- a/contrib/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -521,19 +521,17 @@ static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) {
   return getOptionHelpGroup(Opts, GroupID);
 }
 
-void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+void OptTable::PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
                          bool ShowHidden, bool ShowAllAliases) const {
-  PrintHelp(OS, Name, Title, /*Include*/ 0, /*Exclude*/
+  PrintHelp(OS, Usage, Title, /*Include*/ 0, /*Exclude*/
             (ShowHidden ? 0 : HelpHidden), ShowAllAliases);
 }
 
-void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
+void OptTable::PrintHelp(raw_ostream &OS, const char *Usage, const char *Title,
                          unsigned FlagsToInclude, unsigned FlagsToExclude,
                          bool ShowAllAliases) const {
-  OS << "OVERVIEW: " << Title << "\n";
-  OS << '\n';
-  OS << "USAGE: " << Name << " [options] <inputs>\n";
-  OS << '\n';
+  OS << "OVERVIEW: " << Title << "\n\n";
+  OS << "USAGE: " << Usage << "\n\n";
 
   // Render help text into a map of group-name to a list of (option, help)
   // pairs.
diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp
index eb04dcc8b6ef..5ec94ea6f40a 100644
--- a/contrib/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm/lib/Passes/PassBuilder.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
@@ -58,10 +59,10 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
-#include "llvm/Transforms/Instrumentation/CGProfile.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
@@ -75,6 +76,7 @@
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/Transforms/IPO/GlobalOpt.h"
 #include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
 #include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/IPO/Internalize.h"
@@ -86,9 +88,14 @@
 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
 #include "llvm/Transforms/Scalar/ADCE.h"
 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
@@ -127,6 +134,7 @@
 #include "llvm/Transforms/Scalar/LowerAtomic.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
 #include "llvm/Transforms/Scalar/NaryReassociate.h"
@@ -136,14 +144,17 @@
 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
 #include "llvm/Transforms/Scalar/SCCP.h"
 #include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/Sink.h"
 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
 #include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
 #include "llvm/Transforms/Utils/LCSSA.h"
 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
@@ -152,6 +163,7 @@
 #include "llvm/Transforms/Utils/Mem2Reg.h"
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
 
@@ -193,6 +205,12 @@ static cl::opt<bool> EnableSyntheticCounts(
 static Regex DefaultAliasRegex(
     "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
 
+static cl::opt<bool>
+    EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
+              cl::desc("Enable control height reduction optimization (CHR)"));
+
+extern cl::opt<bool> EnableHotColdSplit;
+
 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
   switch (Level) {
   case PassBuilder::O0:
@@ -486,6 +504,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(FPM, Level);
 
+  if (EnableCHR && Level == O3 && PGOOpt &&
+      (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
+    FPM.addPass(ControlHeightReductionPass());
+
   return FPM;
 }
 
@@ -493,7 +515,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
                                     PassBuilder::OptimizationLevel Level,
                                     bool RunProfileGen,
                                     std::string ProfileGenFile,
-                                    std::string ProfileUseFile) {
+                                    std::string ProfileUseFile,
+                                    std::string ProfileRemappingFile) {
   // Generally running simplification passes and the inliner with an high
   // threshold results in smaller executables, but there may be cases where
   // the size grows, so let's be conservative here and skip this simplification
@@ -547,7 +570,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
   }
 
   if (!ProfileUseFile.empty())
-    MPM.addPass(PGOInstrumentationUse(ProfileUseFile));
+    MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile));
 }
 
 static InlineParams
@@ -593,6 +616,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
     // Annotate sample profile right after early FPM to ensure freshness of
     // the debug info.
     MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+                                        PGOOpt->ProfileRemappingFile,
                                         Phase == ThinLTOPhase::PreLink));
     // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
     // for the profile annotation to be accurate in the ThinLTO backend.
@@ -642,7 +666,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
       (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) {
     addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
-                      PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+                      PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile,
+                      PGOOpt->ProfileRemappingFile);
     MPM.addPass(PGOIndirectCallPromotion(false, false));
   }
 
@@ -693,6 +718,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
       buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
 
+  // We only want to do hot cold splitting once for ThinLTO, during the
+  // post-link ThinLTO.
+  if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink)
+    MPM.addPass(HotColdSplittingPass());
+
   for (auto &C : CGSCCOptimizerLateEPCallbacks)
     C(MainCGPipeline, Level);
 
@@ -809,7 +839,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
     OptimizePM.addPass(
         createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
   }
-  OptimizePM.addPass(LoopUnrollPass(Level));
+  OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
+  OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
   OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
@@ -841,6 +872,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   // inserting redudnancies into the progrem. This even includes SimplifyCFG.
   OptimizePM.addPass(SpeculateAroundPHIsPass());
 
+  for (auto &C : OptimizerLastEPCallbacks)
+    C(OptimizePM, Level);
+
   // Add the core optimizing pipeline.
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
 
@@ -980,6 +1014,13 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   assert(Level != O0 && "Must request optimizations for the default pipeline!");
   ModulePassManager MPM(DebugLogging);
 
+  if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
+    // Load sample profile before running the LTO optimization pipeline.
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+                                        PGOOpt->ProfileRemappingFile,
+                                        false /* ThinLTOPhase::PreLink */));
+  }
+
   // Remove unused virtual tables to improve the quality of code generated by
   // whole-program devirtualization and bitset lowering.
   MPM.addPass(GlobalDCEPass());
@@ -1202,6 +1243,91 @@ static Optional<int> parseDevirtPassName(StringRef Name) {
   return Count;
 }
 
+static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
+  if (!Name.consume_front(PassName))
+    return false;
+  // normal pass name w/o parameters == default parameters
+  if (Name.empty())
+    return true;
+  return Name.startswith("<") && Name.endswith(">");
+}
+
+namespace {
+
+/// This performs customized parsing of pass name with parameters.
+///
+/// We do not need parametrization of passes in textual pipeline very often,
+/// yet on a rare occasion ability to specify parameters right there can be
+/// useful.
+///
+/// \p Name - parameterized specification of a pass from a textual pipeline
+/// is a string in a form of :
+///      PassName '<' parameter-list '>'
+///
+/// Parameter list is being parsed by the parser callable argument, \p Parser,
+/// It takes a string-ref of parameters and returns either StringError or a
+/// parameter list in a form of a custom parameters type, all wrapped into
+/// Expected<> template class.
+///
+template <typename ParametersParseCallableT>
+auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
+                         StringRef PassName) -> decltype(Parser(StringRef{})) {
+  using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
+
+  StringRef Params = Name;
+  if (!Params.consume_front(PassName)) {
+    assert(false &&
+           "unable to strip pass name from parametrized pass specification");
+  }
+  if (Params.empty())
+    return ParametersT{};
+  if (!Params.consume_front("<") || !Params.consume_back(">")) {
+    assert(false && "invalid format for parametrized pass name");
+  }
+
+  Expected<ParametersT> Result = Parser(Params);
+  assert((Result || Result.template errorIsA<StringError>()) &&
+         "Pass parameter parser can only return StringErrors.");
+  return std::move(Result);
+}
+
+/// Parser of parameters for LoopUnroll pass.
+Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
+  LoopUnrollOptions UnrollOpts;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+    int OptLevel = StringSwitch<int>(ParamName)
+                       .Case("O0", 0)
+                       .Case("O1", 1)
+                       .Case("O2", 2)
+                       .Case("O3", 3)
+                       .Default(-1);
+    if (OptLevel >= 0) {
+      UnrollOpts.setOptLevel(OptLevel);
+      continue;
+    }
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "partial") {
+      UnrollOpts.setPartial(Enable);
+    } else if (ParamName == "peeling") {
+      UnrollOpts.setPeeling(Enable);
+    } else if (ParamName == "runtime") {
+      UnrollOpts.setRuntime(Enable);
+    } else if (ParamName == "upperbound") {
+      UnrollOpts.setUpperBound(Enable);
+    } else {
+      return make_error<StringError>(
+          formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return UnrollOpts;
+}
+
+} // namespace
+
 /// Tests whether a pass name starts with a valid prefix for a default pipeline
 /// alias.
 static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
@@ -1297,6 +1423,9 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
 #define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
   if (Name == NAME)                                                            \
     return true;
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                   \
+  if (checkParametrizedPassName(Name, NAME))                                   \
+    return true;
 #define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
   if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
     return true;
@@ -1383,9 +1512,9 @@ PassBuilder::parsePipelineText(StringRef Text) {
   return {std::move(ResultPipeline)};
 }
 
-bool PassBuilder::parseModulePass(ModulePassManager &MPM,
-                                  const PipelineElement &E, bool VerifyEachPass,
-                                  bool DebugLogging) {
+Error PassBuilder::parseModulePass(ModulePassManager &MPM,
+                                   const PipelineElement &E,
+                                   bool VerifyEachPass, bool DebugLogging) {
   auto &Name = E.Name;
   auto &InnerPipeline = E.InnerPipeline;
 
@@ -1393,50 +1522,56 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
   if (!InnerPipeline.empty()) {
     if (Name == "module") {
       ModulePassManager NestedMPM(DebugLogging);
-      if (!parseModulePassPipeline(NestedMPM, InnerPipeline, VerifyEachPass,
-                                   DebugLogging))
-        return false;
+      if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
+                                             VerifyEachPass, DebugLogging))
+        return Err;
       MPM.addPass(std::move(NestedMPM));
-      return true;
+      return Error::success();
     }
     if (Name == "cgscc") {
       CGSCCPassManager CGPM(DebugLogging);
-      if (!parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
-                                  DebugLogging))
-        return false;
+      if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
+                                            DebugLogging))
+        return Err;
       MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
-      return true;
+      return Error::success();
     }
     if (Name == "function") {
       FunctionPassManager FPM(DebugLogging);
-      if (!parseFunctionPassPipeline(FPM, InnerPipeline, VerifyEachPass,
-                                     DebugLogging))
-        return false;
+      if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-      return true;
+      return Error::success();
     }
     if (auto Count = parseRepeatPassName(Name)) {
       ModulePassManager NestedMPM(DebugLogging);
-      if (!parseModulePassPipeline(NestedMPM, InnerPipeline, VerifyEachPass,
-                                   DebugLogging))
-        return false;
+      if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
+                                             VerifyEachPass, DebugLogging))
+        return Err;
       MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
-      return true;
+      return Error::success();
     }
 
     for (auto &C : ModulePipelineParsingCallbacks)
       if (C(Name, MPM, InnerPipeline))
-        return true;
+        return Error::success();
 
     // Normal passes can't have pipelines.
-    return false;
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as module pipeline", Name).str(),
+        inconvertibleErrorCode());
+    ;
   }
 
   // Manually handle aliases for pre-configured pipeline fragments.
   if (startsWithDefaultPipelineAliasPrefix(Name)) {
     SmallVector<StringRef, 3> Matches;
     if (!DefaultAliasRegex.match(Name, &Matches))
-      return false;
+      return make_error<StringError>(
+          formatv("unknown default pipeline alias '{0}'", Name).str(),
+          inconvertibleErrorCode());
+
     assert(Matches.size() == 3 && "Must capture two matched strings!");
 
     OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
@@ -1448,7 +1583,7 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
                               .Case("Oz", Oz);
     if (L == O0)
       // At O0 we do nothing at all!
-      return true;
+      return Error::success();
 
     if (Matches[1] == "default") {
       MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
@@ -1462,38 +1597,40 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
       assert(Matches[1] == "lto" && "Not one of the matched options!");
       MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr));
     }
-    return true;
+    return Error::success();
   }
 
   // Finally expand the basic registered passes from the .inc file.
 #define MODULE_PASS(NAME, CREATE_PASS)                                         \
   if (Name == NAME) {                                                          \
     MPM.addPass(CREATE_PASS);                                                  \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
   if (Name == "require<" NAME ">") {                                           \
     MPM.addPass(                                                               \
         RequireAnalysisPass<                                                   \
             std::remove_reference<decltype(CREATE_PASS)>::type, Module>());    \
-    return true;                                                               \
+    return Error::success();                                                   \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
     MPM.addPass(InvalidateAnalysisPass<                                        \
                 std::remove_reference<decltype(CREATE_PASS)>::type>());        \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #include "PassRegistry.def"
 
   for (auto &C : ModulePipelineParsingCallbacks)
     if (C(Name, MPM, InnerPipeline))
-      return true;
-  return false;
+      return Error::success();
+  return make_error<StringError>(
+      formatv("unknown module pass '{0}'", Name).str(),
+      inconvertibleErrorCode());
 }
 
-bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
-                                 const PipelineElement &E, bool VerifyEachPass,
-                                 bool DebugLogging) {
+Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
+                                  const PipelineElement &E, bool VerifyEachPass,
+                                  bool DebugLogging) {
   auto &Name = E.Name;
   auto &InnerPipeline = E.InnerPipeline;
 
@@ -1501,53 +1638,55 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
   if (!InnerPipeline.empty()) {
     if (Name == "cgscc") {
       CGSCCPassManager NestedCGPM(DebugLogging);
-      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
-                                  DebugLogging))
-        return false;
+      if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+                                            VerifyEachPass, DebugLogging))
+        return Err;
       // Add the nested pass manager with the appropriate adaptor.
       CGPM.addPass(std::move(NestedCGPM));
-      return true;
+      return Error::success();
     }
     if (Name == "function") {
       FunctionPassManager FPM(DebugLogging);
-      if (!parseFunctionPassPipeline(FPM, InnerPipeline, VerifyEachPass,
-                                     DebugLogging))
-        return false;
+      if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
       // Add the nested pass manager with the appropriate adaptor.
       CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
-      return true;
+      return Error::success();
     }
     if (auto Count = parseRepeatPassName(Name)) {
       CGSCCPassManager NestedCGPM(DebugLogging);
-      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
-                                  DebugLogging))
-        return false;
+      if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+                                            VerifyEachPass, DebugLogging))
+        return Err;
       CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
-      return true;
+      return Error::success();
     }
     if (auto MaxRepetitions = parseDevirtPassName(Name)) {
       CGSCCPassManager NestedCGPM(DebugLogging);
-      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
-                                  DebugLogging))
-        return false;
+      if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+                                            VerifyEachPass, DebugLogging))
+        return Err;
       CGPM.addPass(
           createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions));
-      return true;
+      return Error::success();
     }
 
     for (auto &C : CGSCCPipelineParsingCallbacks)
       if (C(Name, CGPM, InnerPipeline))
-        return true;
+        return Error::success();
 
     // Normal passes can't have pipelines.
-    return false;
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(),
+        inconvertibleErrorCode());
   }
 
 // Now expand the basic registered passes from the .inc file.
 #define CGSCC_PASS(NAME, CREATE_PASS)                                          \
   if (Name == NAME) {                                                          \
     CGPM.addPass(CREATE_PASS);                                                 \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
   if (Name == "require<" NAME ">") {                                           \
@@ -1555,24 +1694,26 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
                  std::remove_reference<decltype(CREATE_PASS)>::type,           \
                  LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,    \
                  CGSCCUpdateResult &>());                                      \
-    return true;                                                               \
+    return Error::success();                                                   \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
     CGPM.addPass(InvalidateAnalysisPass<                                       \
                  std::remove_reference<decltype(CREATE_PASS)>::type>());       \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #include "PassRegistry.def"
 
   for (auto &C : CGSCCPipelineParsingCallbacks)
     if (C(Name, CGPM, InnerPipeline))
-      return true;
-  return false;
+      return Error::success();
+  return make_error<StringError>(
+      formatv("unknown cgscc pass '{0}'", Name).str(),
+      inconvertibleErrorCode());
 }
 
-bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
-                                    const PipelineElement &E,
-                                    bool VerifyEachPass, bool DebugLogging) {
+Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
+                                     const PipelineElement &E,
+                                     bool VerifyEachPass, bool DebugLogging) {
   auto &Name = E.Name;
   auto &InnerPipeline = E.InnerPipeline;
 
@@ -1580,68 +1721,80 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
   if (!InnerPipeline.empty()) {
     if (Name == "function") {
       FunctionPassManager NestedFPM(DebugLogging);
-      if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass,
-                                     DebugLogging))
-        return false;
+      if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
       // Add the nested pass manager with the appropriate adaptor.
       FPM.addPass(std::move(NestedFPM));
-      return true;
+      return Error::success();
     }
     if (Name == "loop") {
       LoopPassManager LPM(DebugLogging);
-      if (!parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
-                                 DebugLogging))
-        return false;
+      if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
+                                           DebugLogging))
+        return Err;
       // Add the nested pass manager with the appropriate adaptor.
       FPM.addPass(
           createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
-      return true;
+      return Error::success();
     }
     if (auto Count = parseRepeatPassName(Name)) {
       FunctionPassManager NestedFPM(DebugLogging);
-      if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass,
-                                     DebugLogging))
-        return false;
+      if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
       FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
-      return true;
+      return Error::success();
     }
 
     for (auto &C : FunctionPipelineParsingCallbacks)
       if (C(Name, FPM, InnerPipeline))
-        return true;
+        return Error::success();
 
     // Normal passes can't have pipelines.
-    return false;
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as function pipeline", Name).str(),
+        inconvertibleErrorCode());
   }
 
 // Now expand the basic registered passes from the .inc file.
 #define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
   if (Name == NAME) {                                                          \
     FPM.addPass(CREATE_PASS);                                                  \
-    return true;                                                               \
+    return Error::success();                                                   \
+  }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                   \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    FPM.addPass(CREATE_PASS(Params.get()));                                    \
+    return Error::success();                                                   \
   }
 #define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
   if (Name == "require<" NAME ">") {                                           \
     FPM.addPass(                                                               \
         RequireAnalysisPass<                                                   \
             std::remove_reference<decltype(CREATE_PASS)>::type, Function>());  \
-    return true;                                                               \
+    return Error::success();                                                   \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
     FPM.addPass(InvalidateAnalysisPass<                                        \
                 std::remove_reference<decltype(CREATE_PASS)>::type>());        \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #include "PassRegistry.def"
 
   for (auto &C : FunctionPipelineParsingCallbacks)
     if (C(Name, FPM, InnerPipeline))
-      return true;
-  return false;
+      return Error::success();
+  return make_error<StringError>(
+      formatv("unknown function pass '{0}'", Name).str(),
+      inconvertibleErrorCode());
 }
 
-bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
-                                bool VerifyEachPass, bool DebugLogging) {
+Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
+                                 bool VerifyEachPass, bool DebugLogging) {
   StringRef Name = E.Name;
   auto &InnerPipeline = E.InnerPipeline;
 
@@ -1649,35 +1802,37 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
   if (!InnerPipeline.empty()) {
     if (Name == "loop") {
       LoopPassManager NestedLPM(DebugLogging);
-      if (!parseLoopPassPipeline(NestedLPM, InnerPipeline, VerifyEachPass,
-                                 DebugLogging))
-        return false;
+      if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
+                                           VerifyEachPass, DebugLogging))
+        return Err;
       // Add the nested pass manager with the appropriate adaptor.
       LPM.addPass(std::move(NestedLPM));
-      return true;
+      return Error::success();
     }
     if (auto Count = parseRepeatPassName(Name)) {
       LoopPassManager NestedLPM(DebugLogging);
-      if (!parseLoopPassPipeline(NestedLPM, InnerPipeline, VerifyEachPass,
-                                 DebugLogging))
-        return false;
+      if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
+                                           VerifyEachPass, DebugLogging))
+        return Err;
       LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
-      return true;
+      return Error::success();
     }
 
     for (auto &C : LoopPipelineParsingCallbacks)
       if (C(Name, LPM, InnerPipeline))
-        return true;
+        return Error::success();
 
     // Normal passes can't have pipelines.
-    return false;
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as loop pipeline", Name).str(),
+        inconvertibleErrorCode());
   }
 
 // Now expand the basic registered passes from the .inc file.
 #define LOOP_PASS(NAME, CREATE_PASS)                                           \
   if (Name == NAME) {                                                          \
     LPM.addPass(CREATE_PASS);                                                  \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
   if (Name == "require<" NAME ">") {                                           \
@@ -1685,19 +1840,20 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
                 std::remove_reference<decltype(CREATE_PASS)>::type, Loop,      \
                 LoopAnalysisManager, LoopStandardAnalysisResults &,            \
                 LPMUpdater &>());                                              \
-    return true;                                                               \
+    return Error::success();                                                   \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
     LPM.addPass(InvalidateAnalysisPass<                                        \
                 std::remove_reference<decltype(CREATE_PASS)>::type>());        \
-    return true;                                                               \
+    return Error::success();                                                   \
   }
 #include "PassRegistry.def"
 
   for (auto &C : LoopPipelineParsingCallbacks)
     if (C(Name, LPM, InnerPipeline))
-      return true;
-  return false;
+      return Error::success();
+  return make_error<StringError>(formatv("unknown loop pass '{0}'", Name).str(),
+                                 inconvertibleErrorCode());
 }
 
 bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
@@ -1721,41 +1877,42 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
   return false;
 }
 
-bool PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
-                                        ArrayRef<PipelineElement> Pipeline,
-                                        bool VerifyEachPass,
-                                        bool DebugLogging) {
+Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
+                                         ArrayRef<PipelineElement> Pipeline,
+                                         bool VerifyEachPass,
+                                         bool DebugLogging) {
   for (const auto &Element : Pipeline) {
-    if (!parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
-      return false;
+    if (auto Err = parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
     // FIXME: No verifier support for Loop passes!
   }
-  return true;
+  return Error::success();
 }
 
-bool PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
-                                            ArrayRef<PipelineElement> Pipeline,
-                                            bool VerifyEachPass,
-                                            bool DebugLogging) {
+Error PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
+                                             ArrayRef<PipelineElement> Pipeline,
+                                             bool VerifyEachPass,
+                                             bool DebugLogging) {
   for (const auto &Element : Pipeline) {
-    if (!parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
-      return false;
+    if (auto Err =
+            parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
     if (VerifyEachPass)
       FPM.addPass(VerifierPass());
   }
-  return true;
+  return Error::success();
 }
 
-bool PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
-                                         ArrayRef<PipelineElement> Pipeline,
-                                         bool VerifyEachPass,
-                                         bool DebugLogging) {
+Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+                                          ArrayRef<PipelineElement> Pipeline,
+                                          bool VerifyEachPass,
+                                          bool DebugLogging) {
   for (const auto &Element : Pipeline) {
-    if (!parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
-      return false;
+    if (auto Err = parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
     // FIXME: No verifier support for CGSCC passes!
   }
-  return true;
+  return Error::success();
 }
 
 void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
@@ -1771,28 +1928,30 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
   LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); });
 }
 
-bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
-                                          ArrayRef<PipelineElement> Pipeline,
-                                          bool VerifyEachPass,
-                                          bool DebugLogging) {
+Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
+                                           ArrayRef<PipelineElement> Pipeline,
+                                           bool VerifyEachPass,
+                                           bool DebugLogging) {
   for (const auto &Element : Pipeline) {
-    if (!parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
-      return false;
+    if (auto Err = parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
     if (VerifyEachPass)
       MPM.addPass(VerifierPass());
   }
-  return true;
+  return Error::success();
 }
 
 // Primary pass pipeline description parsing routine for a \c ModulePassManager
 // FIXME: Should this routine accept a TargetMachine or require the caller to
 // pre-populate the analysis managers with target-specific stuff?
-bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
-                                    StringRef PipelineText, bool VerifyEachPass,
-                                    bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
   auto Pipeline = parsePipelineText(PipelineText);
   if (!Pipeline || Pipeline->empty())
-    return false;
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
 
   // If the first name isn't at the module layer, wrap the pipeline up
   // automatically.
@@ -1809,73 +1968,106 @@ bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
     } else {
       for (auto &C : TopLevelPipelineParsingCallbacks)
         if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging))
-          return true;
-
-      // Unknown pass name!
-      return false;
+          return Error::success();
+
+      // Unknown pass or pipeline name!
+      auto &InnerPipeline = Pipeline->front().InnerPipeline;
+      return make_error<StringError>(
+          formatv("unknown {0} name '{1}'",
+                  (InnerPipeline.empty() ? "pass" : "pipeline"), FirstName)
+              .str(),
+          inconvertibleErrorCode());
     }
   }
 
-  return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging);
+  if (auto Err =
+          parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+    return Err;
+  return Error::success();
 }
 
 // Primary pass pipeline description parsing routine for a \c CGSCCPassManager
-bool PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
-                                    StringRef PipelineText, bool VerifyEachPass,
-                                    bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
   auto Pipeline = parsePipelineText(PipelineText);
   if (!Pipeline || Pipeline->empty())
-    return false;
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
 
   StringRef FirstName = Pipeline->front().Name;
   if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks))
-    return false;
-
-  return parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging);
+    return make_error<StringError>(
+        formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName,
+                PipelineText)
+            .str(),
+        inconvertibleErrorCode());
+
+  if (auto Err =
+          parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+    return Err;
+  return Error::success();
 }
 
 // Primary pass pipeline description parsing routine for a \c
 // FunctionPassManager
-bool PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
-                                    StringRef PipelineText, bool VerifyEachPass,
-                                    bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
   auto Pipeline = parsePipelineText(PipelineText);
   if (!Pipeline || Pipeline->empty())
-    return false;
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
 
   StringRef FirstName = Pipeline->front().Name;
   if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks))
-    return false;
-
-  return parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
-                                   DebugLogging);
+    return make_error<StringError>(
+        formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName,
+                PipelineText)
+            .str(),
+        inconvertibleErrorCode());
+
+  if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
+                                           DebugLogging))
+    return Err;
+  return Error::success();
 }
 
 // Primary pass pipeline description parsing routine for a \c LoopPassManager
-bool PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
-                                    StringRef PipelineText, bool VerifyEachPass,
-                                    bool DebugLogging) {
+Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
   auto Pipeline = parsePipelineText(PipelineText);
   if (!Pipeline || Pipeline->empty())
-    return false;
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
+
+  if (auto Err =
+          parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+    return Err;
 
-  return parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging);
+  return Error::success();
 }
 
-bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
+Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
   // If the pipeline just consists of the word 'default' just replace the AA
   // manager with our default one.
   if (PipelineText == "default") {
     AA = buildDefaultAAPipeline();
-    return true;
+    return Error::success();
   }
 
   while (!PipelineText.empty()) {
     StringRef Name;
     std::tie(Name, PipelineText) = PipelineText.split(',');
     if (!parseAAPassName(AA, Name))
-      return false;
+      return make_error<StringError>(
+          formatv("unknown alias analysis name '{0}'", Name).str(),
+          inconvertibleErrorCode());
   }
 
-  return true;
+  return Error::success();
 }
diff --git a/contrib/llvm/lib/Passes/PassRegistry.def b/contrib/llvm/lib/Passes/PassRegistry.def
index 6ae93a476968..771d2f5b212a 100644
--- a/contrib/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm/lib/Passes/PassRegistry.def
@@ -24,8 +24,10 @@ MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
 MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis())
 MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
 MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
+MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
 MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
 MODULE_ANALYSIS("verify", VerifierAnalysis())
+MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 
 #ifndef MODULE_ALIAS_ANALYSIS
 #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS)                               \
@@ -40,6 +42,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
 #endif
 MODULE_PASS("always-inline", AlwaysInlinerPass())
 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
+MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
 MODULE_PASS("cg-profile", CGProfilePass())
 MODULE_PASS("constmerge", ConstantMergePass())
 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
@@ -50,6 +53,7 @@ MODULE_PASS("function-import", FunctionImportPass())
 MODULE_PASS("globaldce", GlobalDCEPass())
 MODULE_PASS("globalopt", GlobalOptPass())
 MODULE_PASS("globalsplit", GlobalSplitPass())
+MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
 MODULE_PASS("instrprof", InstrProfiling())
@@ -69,6 +73,7 @@ MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
 MODULE_PASS("print", PrintModulePass(dbgs()))
 MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs()))
 MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
+MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs()))
 MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
 MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
 MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
@@ -84,6 +89,7 @@ MODULE_PASS("verify", VerifierPass())
 #endif
 CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
 CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy())
+CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 #undef CGSCC_ANALYSIS
 
 #ifndef CGSCC_PASS
@@ -117,10 +123,12 @@ FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
 FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
 FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
 FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
+FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
 FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
 FUNCTION_ANALYSIS("targetir",
                   TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
 FUNCTION_ANALYSIS("verify", VerifierAnalysis())
+FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 
 #ifndef FUNCTION_ALIAS_ANALYSIS
 #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)                             \
@@ -148,6 +156,7 @@ FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
 FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
 FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
 FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("chr", ControlHeightReductionPass())
 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
 FUNCTION_PASS("dce", DCEPass())
 FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
@@ -157,6 +166,7 @@ FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
 FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
 FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
 FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false))
+FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
 FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true))
 FUNCTION_PASS("gvn-hoist", GVNHoistPass())
 FUNCTION_PASS("instcombine", InstCombinePass())
@@ -170,6 +180,7 @@ FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
 FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn", GVN())
+FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
 FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
 FUNCTION_PASS("loop-sink", LoopSinkPass())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass())
@@ -190,6 +201,7 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
 FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
 FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
 FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
+FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
 FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
 FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
 FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
@@ -199,7 +211,9 @@ FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
 FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
 FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
 FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
+FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
 FUNCTION_PASS("reassociate", ReassociatePass())
+FUNCTION_PASS("scalarizer", ScalarizerPass())
 FUNCTION_PASS("sccp", SCCPPass())
 FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
 FUNCTION_PASS("sink", SinkingPass())
@@ -209,7 +223,6 @@ FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
 FUNCTION_PASS("sroa", SROA())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
-FUNCTION_PASS("unroll", LoopUnrollPass())
 FUNCTION_PASS("verify", VerifierPass())
 FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
 FUNCTION_PASS("verify<loops>", LoopVerifierPass())
@@ -217,14 +230,26 @@ FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
 FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
 FUNCTION_PASS("view-cfg", CFGViewerPass())
 FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
+FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
+FUNCTION_PASS("msan", MemorySanitizerPass())
+FUNCTION_PASS("tsan", ThreadSanitizerPass())
 #undef FUNCTION_PASS
 
+#ifndef FUNCTION_PASS_WITH_PARAMS
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#endif
+FUNCTION_PASS_WITH_PARAMS("unroll",
+			  [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); },
+			  parseLoopUnrollOptions)
+#undef FUNCTION_PASS_WITH_PARAMS
+
 #ifndef LOOP_ANALYSIS
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)
 #endif
 LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
 LOOP_ANALYSIS("access-info", LoopAccessAnalysis())
 LOOP_ANALYSIS("ivusers", IVUsersAnalysis())
+LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 #undef LOOP_ANALYSIS
 
 #ifndef LOOP_PASS
diff --git a/contrib/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm/lib/Passes/StandardInstrumentations.cpp
new file mode 100644
index 000000000000..a1dfc39d472c
--- /dev/null
+++ b/contrib/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -0,0 +1,243 @@
+//===- Standard pass instrumentations handling ----------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines IR-printing pass instrumentation callbacks as well as
+/// StandardInstrumentations class that manages standard pass instrumentations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/StandardInstrumentations.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extracting Module out of \p IR unit. Also fills a textual description
+/// of \p IR for use in header when printing.
+Optional<std::pair<const Module *, std::string>> unwrapModule(Any IR) {
+  if (any_isa<const Module *>(IR))
+    return std::make_pair(any_cast<const Module *>(IR), std::string());
+
+  if (any_isa<const Function *>(IR)) {
+    const Function *F = any_cast<const Function *>(IR);
+    if (!llvm::isFunctionInPrintList(F->getName()))
+      return None;
+    const Module *M = F->getParent();
+    return std::make_pair(M, formatv(" (function: {0})", F->getName()).str());
+  }
+
+  if (any_isa<const LazyCallGraph::SCC *>(IR)) {
+    const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
+    for (const LazyCallGraph::Node &N : *C) {
+      const Function &F = N.getFunction();
+      if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) {
+        const Module *M = F.getParent();
+        return std::make_pair(M, formatv(" (scc: {0})", C->getName()).str());
+      }
+    }
+    return None;
+  }
+
+  if (any_isa<const Loop *>(IR)) {
+    const Loop *L = any_cast<const Loop *>(IR);
+    const Function *F = L->getHeader()->getParent();
+    if (!isFunctionInPrintList(F->getName()))
+      return None;
+    const Module *M = F->getParent();
+    std::string LoopName;
+    raw_string_ostream ss(LoopName);
+    L->getHeader()->printAsOperand(ss, false);
+    return std::make_pair(M, formatv(" (loop: {0})", ss.str()).str());
+  }
+
+  llvm_unreachable("Unknown IR unit");
+}
+
+void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) {
+  dbgs() << Banner << Extra << "\n";
+  M->print(dbgs(), nullptr, false);
+}
+void printIR(const Function *F, StringRef Banner,
+             StringRef Extra = StringRef()) {
+  if (!llvm::isFunctionInPrintList(F->getName()))
+    return;
+  dbgs() << Banner << Extra << "\n" << static_cast<const Value &>(*F);
+}
+void printIR(const LazyCallGraph::SCC *C, StringRef Banner,
+             StringRef Extra = StringRef()) {
+  bool BannerPrinted = false;
+  for (const LazyCallGraph::Node &N : *C) {
+    const Function &F = N.getFunction();
+    if (!F.isDeclaration() && llvm::isFunctionInPrintList(F.getName())) {
+      if (!BannerPrinted) {
+        dbgs() << Banner << Extra << "\n";
+        BannerPrinted = true;
+      }
+      F.print(dbgs());
+    }
+  }
+}
+void printIR(const Loop *L, StringRef Banner) {
+  const Function *F = L->getHeader()->getParent();
+  if (!llvm::isFunctionInPrintList(F->getName()))
+    return;
+  llvm::printLoop(const_cast<Loop &>(*L), dbgs(), Banner);
+}
+
+/// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into
+/// llvm::Any and does actual print job.
+void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) {
+  if (ForceModule) {
+    if (auto UnwrappedModule = unwrapModule(IR))
+      printIR(UnwrappedModule->first, Banner, UnwrappedModule->second);
+    return;
+  }
+
+  if (any_isa<const Module *>(IR)) {
+    const Module *M = any_cast<const Module *>(IR);
+    assert(M && "module should be valid for printing");
+    printIR(M, Banner);
+    return;
+  }
+
+  if (any_isa<const Function *>(IR)) {
+    const Function *F = any_cast<const Function *>(IR);
+    assert(F && "function should be valid for printing");
+    printIR(F, Banner);
+    return;
+  }
+
+  if (any_isa<const LazyCallGraph::SCC *>(IR)) {
+    const LazyCallGraph::SCC *C = any_cast<const LazyCallGraph::SCC *>(IR);
+    assert(C && "scc should be valid for printing");
+    std::string Extra = formatv(" (scc: {0})", C->getName());
+    printIR(C, Banner, Extra);
+    return;
+  }
+
+  if (any_isa<const Loop *>(IR)) {
+    const Loop *L = any_cast<const Loop *>(IR);
+    assert(L && "Loop should be valid for printing");
+    printIR(L, Banner);
+    return;
+  }
+  llvm_unreachable("Unknown wrapped IR type");
+}
+
+} // namespace
+
+PrintIRInstrumentation::~PrintIRInstrumentation() {
+  assert(ModuleDescStack.empty() && "ModuleDescStack is not empty at exit");
+}
+
+void PrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR) {
+  assert(StoreModuleDesc);
+  const Module *M = nullptr;
+  std::string Extra;
+  if (auto UnwrappedModule = unwrapModule(IR))
+    std::tie(M, Extra) = UnwrappedModule.getValue();
+  ModuleDescStack.emplace_back(M, Extra, PassID);
+}
+
+PrintIRInstrumentation::PrintModuleDesc
+PrintIRInstrumentation::popModuleDesc(StringRef PassID) {
+  assert(!ModuleDescStack.empty() && "empty ModuleDescStack");
+  PrintModuleDesc ModuleDesc = ModuleDescStack.pop_back_val();
+  assert(std::get<2>(ModuleDesc).equals(PassID) && "malformed ModuleDescStack");
+  return ModuleDesc;
+}
+
+bool PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
+  if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+    return true;
+
+  // Saving Module for AfterPassInvalidated operations.
+  // Note: here we rely on a fact that we do not change modules while
+  // traversing the pipeline, so the latest captured module is good
+  // for all print operations that has not happen yet.
+  if (StoreModuleDesc && llvm::shouldPrintAfterPass(PassID))
+    pushModuleDesc(PassID, IR);
+
+  if (!llvm::shouldPrintBeforePass(PassID))
+    return true;
+
+  SmallString<20> Banner = formatv("*** IR Dump Before {0} ***", PassID);
+  unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR());
+  return true;
+}
+
+void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
+  if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+    return;
+
+  if (!llvm::shouldPrintAfterPass(PassID))
+    return;
+
+  if (StoreModuleDesc)
+    popModuleDesc(PassID);
+
+  SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID);
+  unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR());
+}
+
+void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
+  if (!StoreModuleDesc || !llvm::shouldPrintAfterPass(PassID))
+    return;
+
+  if (PassID.startswith("PassManager<") || PassID.contains("PassAdaptor<"))
+    return;
+
+  const Module *M;
+  std::string Extra;
+  StringRef StoredPassID;
+  std::tie(M, Extra, StoredPassID) = popModuleDesc(PassID);
+  // Additional filtering (e.g. -filter-print-func) can lead to module
+  // printing being skipped.
+  if (!M)
+    return;
+
+  SmallString<20> Banner =
+      formatv("*** IR Dump After {0} *** invalidated: ", PassID);
+  printIR(M, Banner, Extra);
+}
+
+void PrintIRInstrumentation::registerCallbacks(
+    PassInstrumentationCallbacks &PIC) {
+  // BeforePass callback is not just for printing, it also saves a Module
+  // for later use in AfterPassInvalidated.
+  StoreModuleDesc = llvm::forcePrintModuleIR() && llvm::shouldPrintAfterPass();
+  if (llvm::shouldPrintBeforePass() || StoreModuleDesc)
+    PIC.registerBeforePassCallback(
+        [this](StringRef P, Any IR) { return this->printBeforePass(P, IR); });
+
+  if (llvm::shouldPrintAfterPass()) {
+    PIC.registerAfterPassCallback(
+        [this](StringRef P, Any IR) { this->printAfterPass(P, IR); });
+    PIC.registerAfterPassInvalidatedCallback(
+        [this](StringRef P) { this->printAfterPassInvalidated(P); });
+  }
+}
+
+void StandardInstrumentations::registerCallbacks(
+    PassInstrumentationCallbacks &PIC) {
+  PrintIR.registerCallbacks(PIC);
+  TimePasses.registerCallbacks(PIC);
+}
diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index b3c2b182e76c..b2dde3406a63 100644
--- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -83,7 +83,7 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
     return Counter::getZero();
 
   // Group the terms by counter ID.
-  llvm::sort(Terms.begin(), Terms.end(), [](const Term &LHS, const Term &RHS) {
+  llvm::sort(Terms, [](const Term &LHS, const Term &RHS) {
     return LHS.CounterID < RHS.CounterID;
   });
 
@@ -207,12 +207,6 @@ Error CoverageMapping::loadFunctionRecord(
   else
     OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
 
-  // Don't load records for (filenames, function) pairs we've already seen.
-  auto FilenamesHash = hash_combine_range(Record.Filenames.begin(),
-                                          Record.Filenames.end());
-  if (!RecordProvenance[FilenamesHash].insert(hash_value(OrigFuncName)).second)
-    return Error::success();
-
   CounterMappingContext Ctx(Record.Expressions);
 
   std::vector<uint64_t> Counts;
@@ -230,6 +224,15 @@ Error CoverageMapping::loadFunctionRecord(
 
   assert(!Record.MappingRegions.empty() && "Function has no regions");
 
+  // This coverage record is a zero region for a function that's unused in
+  // some TU, but used in a different TU. Ignore it. The coverage maps from the
+  // the other TU will either be loaded (providing full region counts) or they
+  // won't (in which case we don't unintuitively report functions as uncovered
+  // when they have non-zero counts in the profile).
+  if (Record.MappingRegions.size() == 1 &&
+      Record.MappingRegions[0].Count.isZero() && Counts[0] > 0)
+    return Error::success();
+
   FunctionRecord Function(OrigFuncName, Record.Filenames);
   for (const auto &Region : Record.MappingRegions) {
     Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
@@ -239,11 +242,12 @@ Error CoverageMapping::loadFunctionRecord(
     }
     Function.pushRegion(Region, *ExecutionCount);
   }
-  if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
-    FuncCounterMismatches.emplace_back(Record.FunctionName,
-                                       Function.CountedRegions.size());
+
+  // Don't create records for (filenames, function) pairs we've already seen.
+  auto FilenamesHash = hash_combine_range(Record.Filenames.begin(),
+                                          Record.Filenames.end());
+  if (!RecordProvenance[FilenamesHash].insert(hash_value(OrigFuncName)).second)
     return Error::success();
-  }
 
   Functions.push_back(std::move(Function));
   return Error::success();
@@ -459,8 +463,7 @@ class SegmentBuilder {
 
   /// Sort a nested sequence of regions from a single file.
   static void sortNestedRegions(MutableArrayRef<CountedRegion> Regions) {
-    llvm::sort(Regions.begin(), Regions.end(), [](const CountedRegion &LHS,
-                                                  const CountedRegion &RHS) {
+    llvm::sort(Regions, [](const CountedRegion &LHS, const CountedRegion &RHS) {
       if (LHS.startLoc() != RHS.startLoc())
         return LHS.startLoc() < RHS.startLoc();
       if (LHS.endLoc() != RHS.endLoc())
@@ -557,7 +560,7 @@ std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
   for (const auto &Function : getCoveredFunctions())
     Filenames.insert(Filenames.end(), Function.Filenames.begin(),
                      Function.Filenames.end());
-  llvm::sort(Filenames.begin(), Filenames.end());
+  llvm::sort(Filenames);
   auto Last = std::unique(Filenames.begin(), Filenames.end());
   Filenames.erase(Last, Filenames.end());
   return Filenames;
diff --git a/contrib/llvm/lib/ProfileData/GCOV.cpp b/contrib/llvm/lib/ProfileData/GCOV.cpp
index c9155439ec46..b687346a2c05 100644
--- a/contrib/llvm/lib/ProfileData/GCOV.cpp
+++ b/contrib/llvm/lib/ProfileData/GCOV.cpp
@@ -111,9 +111,7 @@ void GCOVFile::print(raw_ostream &OS) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// dump - Dump GCOVFile content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVFile::dump() const {
-  print(dbgs());
-}
+LLVM_DUMP_METHOD void GCOVFile::dump() const { print(dbgs()); }
 #endif
 
 /// collectLineCounts - Collect line counts. This must be used after
@@ -359,9 +357,7 @@ void GCOVFunction::print(raw_ostream &OS) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// dump - Dump GCOVFunction content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVFunction::dump() const {
-  print(dbgs());
-}
+LLVM_DUMP_METHOD void GCOVFunction::dump() const { print(dbgs()); }
 #endif
 
 /// collectLineCounts - Collect line counts. This must be used after
@@ -437,12 +433,135 @@ void GCOVBlock::print(raw_ostream &OS) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// dump - Dump GCOVBlock content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVBlock::dump() const {
-  print(dbgs());
-}
+LLVM_DUMP_METHOD void GCOVBlock::dump() const { print(dbgs()); }
 #endif
 
 //===----------------------------------------------------------------------===//
+// Cycles detection
+//
+// The algorithm in GCC is based on the algorihtm by Hawick & James:
+//   "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
+//   http://complexity.massey.ac.nz/cstn/013/cstn-013.pdf.
+
+/// Get the count for the detected cycle.
+uint64_t GCOVBlock::getCycleCount(const Edges &Path) {
+  uint64_t CycleCount = std::numeric_limits<uint64_t>::max();
+  for (auto E : Path) {
+    CycleCount = std::min(E->CyclesCount, CycleCount);
+  }
+  for (auto E : Path) {
+    E->CyclesCount -= CycleCount;
+  }
+  return CycleCount;
+}
+
+/// Unblock a vertex previously marked as blocked.
+void GCOVBlock::unblock(const GCOVBlock *U, BlockVector &Blocked,
+                        BlockVectorLists &BlockLists) {
+  auto it = find(Blocked, U);
+  if (it == Blocked.end()) {
+    return;
+  }
+
+  const size_t index = it - Blocked.begin();
+  Blocked.erase(it);
+
+  const BlockVector ToUnblock(BlockLists[index]);
+  BlockLists.erase(BlockLists.begin() + index);
+  for (auto GB : ToUnblock) {
+    GCOVBlock::unblock(GB, Blocked, BlockLists);
+  }
+}
+
+bool GCOVBlock::lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start,
+                               Edges &Path, BlockVector &Blocked,
+                               BlockVectorLists &BlockLists,
+                               const BlockVector &Blocks, uint64_t &Count) {
+  Blocked.push_back(V);
+  BlockLists.emplace_back(BlockVector());
+  bool FoundCircuit = false;
+
+  for (auto E : V->dsts()) {
+    const GCOVBlock *W = &E->Dst;
+    if (W < Start || find(Blocks, W) == Blocks.end()) {
+      continue;
+    }
+
+    Path.push_back(E);
+
+    if (W == Start) {
+      // We've a cycle.
+      Count += GCOVBlock::getCycleCount(Path);
+      FoundCircuit = true;
+    } else if (find(Blocked, W) == Blocked.end() && // W is not blocked.
+               GCOVBlock::lookForCircuit(W, Start, Path, Blocked, BlockLists,
+                                         Blocks, Count)) {
+      FoundCircuit = true;
+    }
+
+    Path.pop_back();
+  }
+
+  if (FoundCircuit) {
+    GCOVBlock::unblock(V, Blocked, BlockLists);
+  } else {
+    for (auto E : V->dsts()) {
+      const GCOVBlock *W = &E->Dst;
+      if (W < Start || find(Blocks, W) == Blocks.end()) {
+        continue;
+      }
+      const size_t index = find(Blocked, W) - Blocked.begin();
+      BlockVector &List = BlockLists[index];
+      if (find(List, V) == List.end()) {
+        List.push_back(V);
+      }
+    }
+  }
+
+  return FoundCircuit;
+}
+
+/// Get the count for the list of blocks which lie on the same line.
+void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) {
+  for (auto Block : Blocks) {
+    Edges Path;
+    BlockVector Blocked;
+    BlockVectorLists BlockLists;
+
+    GCOVBlock::lookForCircuit(Block, Block, Path, Blocked, BlockLists, Blocks,
+                              Count);
+  }
+}
+
+/// Get the count for the list of blocks which lie on the same line.
+uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) {
+  uint64_t Count = 0;
+
+  for (auto Block : Blocks) {
+    if (Block->getNumSrcEdges() == 0) {
+      // The block has no predecessors and a non-null counter
+      // (can be the case with entry block in functions).
+      Count += Block->getCount();
+    } else {
+      // Add counts from predecessors that are not on the same line.
+      for (auto E : Block->srcs()) {
+        const GCOVBlock *W = &E->Src;
+        if (find(Blocks, W) == Blocks.end()) {
+          Count += E->Count;
+        }
+      }
+    }
+    for (auto E : Block->dsts()) {
+      E->CyclesCount = E->Count;
+    }
+  }
+
+  GCOVBlock::getCyclesCount(Blocks, Count);
+
+  return Count;
+}
+
+//===----------------------------------------------------------------------===//
 // FileInfo implementation.
 
 // Safe integer division, returns 0 if numerator is 0.
@@ -578,8 +697,8 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
     return llvm::make_unique<raw_null_ostream>();
 
   std::error_code EC;
-  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath, EC,
-                                              sys::fs::F_Text);
+  auto OS =
+      llvm::make_unique<raw_fd_ostream>(CoveragePath, EC, sys::fs::F_Text);
   if (EC) {
     errs() << EC.message() << "\n";
     return llvm::make_unique<raw_null_ostream>();
@@ -593,7 +712,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
   SmallVector<StringRef, 4> Filenames;
   for (const auto &LI : LineInfo)
     Filenames.push_back(LI.first());
-  llvm::sort(Filenames.begin(), Filenames.end());
+  llvm::sort(Filenames);
 
   for (StringRef Filename : Filenames) {
     auto AllLines = LineConsumer(Filename);
@@ -628,17 +747,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
 
         // Add up the block counts to form line counts.
         DenseMap<const GCOVFunction *, bool> LineExecs;
-        uint64_t LineCount = 0;
         for (const GCOVBlock *Block : Blocks) {
-          if (Options.AllBlocks) {
-            // Only take the highest block count for that line.
-            uint64_t BlockCount = Block->getCount();
-            LineCount = LineCount > BlockCount ? LineCount : BlockCount;
-          } else {
-            // Sum up all of the block counts.
-            LineCount += Block->getCount();
-          }
-
           if (Options.FuncCoverage) {
             // This is a slightly convoluted way to most accurately gather line
             // statistics for functions. Basically what is happening is that we
@@ -674,6 +783,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
           }
         }
 
+        const uint64_t LineCount = GCOVBlock::getLineCount(Blocks);
         if (LineCount == 0)
           CovOS << "    #####:";
         else {
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 544a77ec20a5..aaa8000ff2f9 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -252,11 +252,12 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
 // data, its original linkage must be non-internal.
 std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
   if (!InLTO) {
-    StringRef FileName = (StaticFuncFullModulePrefix
-                              ? F.getParent()->getName()
-                              : sys::path::filename(F.getParent()->getName()));
-    if (StaticFuncFullModulePrefix && StaticFuncStripDirNamePrefix != 0)
-      FileName = stripDirPrefix(FileName, StaticFuncStripDirNamePrefix);
+    StringRef FileName(F.getParent()->getSourceFileName());
+    uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
+    if (StripLevel < StaticFuncStripDirNamePrefix)
+      StripLevel = StaticFuncStripDirNamePrefix;
+    if (StripLevel)
+      FileName = stripDirPrefix(FileName, StripLevel);
     return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
   }
 
diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
index 3b704158a5c5..eaf0eb04bfbf 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
@@ -23,6 +24,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SymbolRemappingReader.h"
 #include "llvm/Support/SwapByteOrder.h"
 #include <algorithm>
 #include <cctype>
@@ -88,16 +90,29 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
 }
 
 Expected<std::unique_ptr<IndexedInstrProfReader>>
-IndexedInstrProfReader::create(const Twine &Path) {
+IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
   // Set up the buffer to read.
   auto BufferOrError = setupMemoryBuffer(Path);
   if (Error E = BufferOrError.takeError())
     return std::move(E);
-  return IndexedInstrProfReader::create(std::move(BufferOrError.get()));
+
+  // Set up the remapping buffer if requested.
+  std::unique_ptr<MemoryBuffer> RemappingBuffer;
+  std::string RemappingPathStr = RemappingPath.str();
+  if (!RemappingPathStr.empty()) {
+    auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
+    if (Error E = RemappingBufferOrError.takeError())
+      return std::move(E);
+    RemappingBuffer = std::move(RemappingBufferOrError.get());
+  }
+
+  return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
+                                        std::move(RemappingBuffer));
 }
 
 Expected<std::unique_ptr<IndexedInstrProfReader>>
-IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
+IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
+                               std::unique_ptr<MemoryBuffer> RemappingBuffer) {
   // Sanity check the buffer.
   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
     return make_error<InstrProfError>(instrprof_error::too_large);
@@ -105,7 +120,8 @@ IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
   // Create the reader.
   if (!IndexedInstrProfReader::hasFormat(*Buffer))
     return make_error<InstrProfError>(instrprof_error::bad_magic);
-  auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer));
+  auto Result = llvm::make_unique<IndexedInstrProfReader>(
+      std::move(Buffer), std::move(RemappingBuffer));
 
   // Initialize the reader and return the result.
   if (Error E = initializeReader(*Result))
@@ -587,6 +603,124 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
   RecordIterator = HashTable->data_begin();
 }
 
+namespace {
+/// A remapper that does not apply any remappings.
+class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
+  InstrProfReaderIndexBase &Underlying;
+
+public:
+  InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
+      : Underlying(Underlying) {}
+
+  Error getRecords(StringRef FuncName,
+                   ArrayRef<NamedInstrProfRecord> &Data) override {
+    return Underlying.getRecords(FuncName, Data);
+  }
+};
+}
+
+/// A remapper that applies remappings based on a symbol remapping file.
+template <typename HashTableImpl>
+class llvm::InstrProfReaderItaniumRemapper
+    : public InstrProfReaderRemapper {
+public:
+  InstrProfReaderItaniumRemapper(
+      std::unique_ptr<MemoryBuffer> RemapBuffer,
+      InstrProfReaderIndex<HashTableImpl> &Underlying)
+      : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
+  }
+
+  /// Extract the original function name from a PGO function name.
+  static StringRef extractName(StringRef Name) {
+    // We can have multiple :-separated pieces; there can be pieces both
+    // before and after the mangled name. Find the first part that starts
+    // with '_Z'; we'll assume that's the mangled name we want.
+    std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
+    while (true) {
+      Parts = Parts.second.split(':');
+      if (Parts.first.startswith("_Z"))
+        return Parts.first;
+      if (Parts.second.empty())
+        return Name;
+    }
+  }
+
+  /// Given a mangled name extracted from a PGO function name, and a new
+  /// form for that mangled name, reconstitute the name.
+  static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
+                               StringRef Replacement,
+                               SmallVectorImpl<char> &Out) {
+    Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
+    Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
+    Out.insert(Out.end(), Replacement.begin(), Replacement.end());
+    Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
+  }
+
+  Error populateRemappings() override {
+    if (Error E = Remappings.read(*RemapBuffer))
+      return E;
+    for (StringRef Name : Underlying.HashTable->keys()) {
+      StringRef RealName = extractName(Name);
+      if (auto Key = Remappings.insert(RealName)) {
+        // FIXME: We could theoretically map the same equivalence class to
+        // multiple names in the profile data. If that happens, we should
+        // return NamedInstrProfRecords from all of them.
+        MappedNames.insert({Key, RealName});
+      }
+    }
+    return Error::success();
+  }
+
+  Error getRecords(StringRef FuncName,
+                   ArrayRef<NamedInstrProfRecord> &Data) override {
+    StringRef RealName = extractName(FuncName);
+    if (auto Key = Remappings.lookup(RealName)) {
+      StringRef Remapped = MappedNames.lookup(Key);
+      if (!Remapped.empty()) {
+        if (RealName.begin() == FuncName.begin() &&
+            RealName.end() == FuncName.end())
+          FuncName = Remapped;
+        else {
+          // Try rebuilding the name from the given remapping.
+          SmallString<256> Reconstituted;
+          reconstituteName(FuncName, RealName, Remapped, Reconstituted);
+          Error E = Underlying.getRecords(Reconstituted, Data);
+          if (!E)
+            return E;
+
+          // If we failed because the name doesn't exist, fall back to asking
+          // about the original name.
+          if (Error Unhandled = handleErrors(
+                  std::move(E), [](std::unique_ptr<InstrProfError> Err) {
+                    return Err->get() == instrprof_error::unknown_function
+                               ? Error::success()
+                               : Error(std::move(Err));
+                  }))
+            return Unhandled;
+        }
+      }
+    }
+    return Underlying.getRecords(FuncName, Data);
+  }
+
+private:
+  /// The memory buffer containing the remapping configuration. Remappings
+  /// holds pointers into this buffer.
+  std::unique_ptr<MemoryBuffer> RemapBuffer;
+
+  /// The mangling remapper.
+  SymbolRemappingReader Remappings;
+
+  /// Mapping from mangled name keys to the name used for the key in the
+  /// profile data.
+  /// FIXME: Can we store a location within the on-disk hash table instead of
+  /// redoing lookup?
+  DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
+
+  /// The real profile data reader.
+  InstrProfReaderIndex<HashTableImpl> &Underlying;
+};
+
 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
   using namespace support;
 
@@ -683,10 +817,22 @@ Error IndexedInstrProfReader::readHeader() {
   uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
 
   // The rest of the file is an on disk hash table.
-  InstrProfReaderIndexBase *IndexPtr = nullptr;
-  IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>(
-      Start + HashOffset, Cur, Start, HashType, FormatVersion);
-  Index.reset(IndexPtr);
+  auto IndexPtr =
+      llvm::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
+          Start + HashOffset, Cur, Start, HashType, FormatVersion);
+
+  // Load the remapping table now if requested.
+  if (RemappingBuffer) {
+    Remapper = llvm::make_unique<
+        InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
+        std::move(RemappingBuffer), *IndexPtr);
+    if (Error E = Remapper->populateRemappings())
+      return E;
+  } else {
+    Remapper = llvm::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
+  }
+  Index = std::move(IndexPtr);
+
   return success();
 }
 
@@ -707,7 +853,7 @@ Expected<InstrProfRecord>
 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
                                            uint64_t FuncHash) {
   ArrayRef<NamedInstrProfRecord> Data;
-  Error Err = Index->getRecords(FuncName, Data);
+  Error Err = Remapper->getRecords(FuncName, Data);
   if (Err)
     return std::move(Err);
   // Found it. Look for counters with the right hash.
diff --git a/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 62f00d693c68..3a8462fd9b0d 100644
--- a/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -58,7 +58,7 @@ void SampleProfileSummaryBuilder::addRecord(
 void ProfileSummaryBuilder::computeDetailedSummary() {
   if (DetailedSummaryCutoffs.empty())
     return;
-  llvm::sort(DetailedSummaryCutoffs.begin(), DetailedSummaryCutoffs.end());
+  llvm::sort(DetailedSummaryCutoffs);
   auto Iter = CountFrequencies.begin();
   const auto End = CountFrequencies.end();
 
diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp
index 30438ba7962a..1a124415f179 100644
--- a/contrib/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp
@@ -26,6 +26,14 @@
 using namespace llvm;
 using namespace sampleprof;
 
+namespace llvm {
+namespace sampleprof {
+SampleProfileFormat FunctionSamples::Format;
+DenseMap<uint64_t, StringRef> FunctionSamples::GUIDToFuncNameMap;
+Module *FunctionSamples::CurrentModule;
+} // namespace sampleprof
+} // namespace llvm
+
 namespace {
 
 // FIXME: This class is only here to support the transition to llvm::Error. It
@@ -59,6 +67,8 @@ class SampleProfErrorCategoryType : public std::error_category {
       return "Unimplemented feature";
     case sampleprof_error::counter_overflow:
       return "Counter overflow";
+    case sampleprof_error::ostream_seek_unsupported:
+      return "Ostream does not support seek";
     }
     llvm_unreachable("A value of sampleprof_error has no message.");
   }
diff --git a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
index 79335e67cd98..a68d1e9d3ab0 100644
--- a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -320,6 +321,21 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
 }
 
 template <typename T>
+ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
+  std::error_code EC;
+
+  if (Data + sizeof(T) > End) {
+    EC = sampleprof_error::truncated;
+    reportError(0, EC.message());
+    return EC;
+  }
+
+  using namespace support;
+  T Val = endian::readNext<T, little, unaligned>(Data);
+  return Val;
+}
+
+template <typename T>
 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
   std::error_code EC;
   auto Idx = readNumber<uint32_t>();
@@ -423,29 +439,51 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
   return sampleprof_error::success;
 }
 
-std::error_code SampleProfileReaderBinary::read() {
-  while (!at_eof()) {
-    auto NumHeadSamples = readNumber<uint64_t>();
-    if (std::error_code EC = NumHeadSamples.getError())
-      return EC;
+std::error_code SampleProfileReaderBinary::readFuncProfile() {
+  auto NumHeadSamples = readNumber<uint64_t>();
+  if (std::error_code EC = NumHeadSamples.getError())
+    return EC;
 
-    auto FName(readStringFromTable());
-    if (std::error_code EC = FName.getError())
-      return EC;
+  auto FName(readStringFromTable());
+  if (std::error_code EC = FName.getError())
+    return EC;
 
-    Profiles[*FName] = FunctionSamples();
-    FunctionSamples &FProfile = Profiles[*FName];
-    FProfile.setName(*FName);
+  Profiles[*FName] = FunctionSamples();
+  FunctionSamples &FProfile = Profiles[*FName];
+  FProfile.setName(*FName);
 
-    FProfile.addHeadSamples(*NumHeadSamples);
+  FProfile.addHeadSamples(*NumHeadSamples);
+
+  if (std::error_code EC = readProfile(FProfile))
+    return EC;
+  return sampleprof_error::success;
+}
 
-    if (std::error_code EC = readProfile(FProfile))
+std::error_code SampleProfileReaderBinary::read() {
+  while (!at_eof()) {
+    if (std::error_code EC = readFuncProfile())
       return EC;
   }
 
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderCompactBinary::read() {
+  for (auto Name : FuncsToUse) {
+    auto GUID = std::to_string(MD5Hash(Name));
+    auto iter = FuncOffsetTable.find(StringRef(GUID));
+    if (iter == FuncOffsetTable.end())
+      continue;
+    const uint8_t *SavedData = Data;
+    Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+           iter->second;
+    if (std::error_code EC = readFuncProfile())
+      return EC;
+    Data = SavedData;
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
   if (Magic == SPMagic())
     return sampleprof_error::success;
@@ -514,6 +552,53 @@ std::error_code SampleProfileReaderBinary::readHeader() {
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderCompactBinary::readHeader() {
+  SampleProfileReaderBinary::readHeader();
+  if (std::error_code EC = readFuncOffsetTable())
+    return EC;
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
+  auto TableOffset = readUnencodedNumber<uint64_t>();
+  if (std::error_code EC = TableOffset.getError())
+    return EC;
+
+  const uint8_t *SavedData = Data;
+  const uint8_t *TableStart =
+      reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+      *TableOffset;
+  Data = TableStart;
+
+  auto Size = readNumber<uint64_t>();
+  if (std::error_code EC = Size.getError())
+    return EC;
+
+  FuncOffsetTable.reserve(*Size);
+  for (uint32_t I = 0; I < *Size; ++I) {
+    auto FName(readStringFromTable());
+    if (std::error_code EC = FName.getError())
+      return EC;
+
+    auto Offset = readNumber<uint64_t>();
+    if (std::error_code EC = Offset.getError())
+      return EC;
+
+    FuncOffsetTable[*FName] = *Offset;
+  }
+  End = TableStart;
+  Data = SavedData;
+  return sampleprof_error::success;
+}
+
+void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
+  FuncsToUse.clear();
+  for (auto &F : M) {
+    StringRef Fname = F.getName().split('.').first;
+    FuncsToUse.insert(Fname);
+  }
+}
+
 std::error_code SampleProfileReaderBinary::readSummaryEntry(
     std::vector<ProfileSummaryEntry> &Entries) {
   auto Cutoff = readNumber<uint64_t>();
@@ -827,6 +912,40 @@ bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
   return Magic == "adcg*704";
 }
 
+std::error_code SampleProfileReaderItaniumRemapper::read() {
+  // If the underlying data is in compact format, we can't remap it because
+  // we don't know what the original function names were.
+  if (getFormat() == SPF_Compact_Binary) {
+    Ctx.diagnose(DiagnosticInfoSampleProfile(
+        Buffer->getBufferIdentifier(),
+        "Profile data remapping cannot be applied to profile data "
+        "in compact format (original mangled names are not available).",
+        DS_Warning));
+    return sampleprof_error::success;
+  }
+
+  if (Error E = Remappings.read(*Buffer)) {
+    handleAllErrors(
+        std::move(E), [&](const SymbolRemappingParseError &ParseError) {
+          reportError(ParseError.getLineNum(), ParseError.getMessage());
+        });
+    return sampleprof_error::malformed;
+  }
+
+  for (auto &Sample : getProfiles())
+    if (auto Key = Remappings.insert(Sample.first()))
+      SampleMap.insert({Key, &Sample.second});
+
+  return sampleprof_error::success;
+}
+
+FunctionSamples *
+SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) {
+  if (auto Key = Remappings.lookup(Fname))
+    return SampleMap.lookup(Key);
+  return SampleProfileReader::getSamplesFor(Fname);
+}
+
 /// Prepare a memory buffer for the contents of \p Filename.
 ///
 /// \returns an error code indicating the status of the buffer.
@@ -859,6 +978,27 @@ SampleProfileReader::create(const Twine &Filename, LLVMContext &C) {
   return create(BufferOrError.get(), C);
 }
 
+/// Create a sample profile remapper from the given input, to remap the
+/// function names in the given profile data.
+///
+/// \param Filename The file to open.
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \param Underlying The underlying profile data reader to remap.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReader>>
+SampleProfileReaderItaniumRemapper::create(
+    const Twine &Filename, LLVMContext &C,
+    std::unique_ptr<SampleProfileReader> Underlying) {
+  auto BufferOrError = setupMemoryBuffer(Filename);
+  if (std::error_code EC = BufferOrError.getError())
+    return EC;
+  return llvm::make_unique<SampleProfileReaderItaniumRemapper>(
+      std::move(BufferOrError.get()), C, std::move(Underlying));
+}
+
 /// Create a sample profile reader based on the format of the input data.
 ///
 /// \param B The memory buffer to create the reader from (assumes ownership).
@@ -880,6 +1020,7 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
   else
     return sampleprof_error::unrecognized_format;
 
+  FunctionSamples::Format = Reader->getFormat();
   if (std::error_code EC = Reader->readHeader())
     return EC;
 
diff --git a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
index b4de30118b8b..b1c669ec31c4 100644
--- a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -22,6 +22,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LEB128.h"
@@ -64,6 +66,15 @@ SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterCompactBinary::write(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
+    return EC;
+  if (std::error_code EC = writeFuncOffsetTable())
+    return EC;
+  return sampleprof_error::success;
+}
+
 /// Write samples to a text file.
 ///
 /// Note: it may be tempting to implement this in terms of
@@ -168,6 +179,30 @@ std::error_code SampleProfileWriterRawBinary::writeNameTable() {
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
+  auto &OS = *OutputStream;
+
+  // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
+  auto &OFS = static_cast<raw_fd_ostream &>(OS);
+  uint64_t FuncOffsetTableStart = OS.tell();
+  if (OFS.seek(TableOffset) == (uint64_t)-1)
+    return sampleprof_error::ostream_seek_unsupported;
+  support::endian::Writer Writer(*OutputStream, support::little);
+  Writer.write(FuncOffsetTableStart);
+  if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
+    return sampleprof_error::ostream_seek_unsupported;
+
+  // Write out the table size.
+  encodeULEB128(FuncOffsetTable.size(), OS);
+
+  // Write out FuncOffsetTable.
+  for (auto entry : FuncOffsetTable) {
+    writeNameIdx(entry.first);
+    encodeULEB128(entry.second, OS);
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
   auto &OS = *OutputStream;
   std::set<StringRef> V;
@@ -215,6 +250,19 @@ std::error_code SampleProfileWriterBinary::writeHeader(
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterCompactBinary::writeHeader(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  support::endian::Writer Writer(*OutputStream, support::little);
+  if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
+    return EC;
+
+  // Reserve a slot for the offset of function offset table. The slot will
+  // be populated with the offset of FuncOffsetTable later.
+  TableOffset = OutputStream->tell();
+  Writer.write(static_cast<uint64_t>(-2));
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterBinary::writeSummary() {
   auto &OS = *OutputStream;
   encodeULEB128(Summary->getTotalCount(), OS);
@@ -283,6 +331,15 @@ std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) {
   return writeBody(S);
 }
 
+std::error_code
+SampleProfileWriterCompactBinary::write(const FunctionSamples &S) {
+  uint64_t Offset = OutputStream->tell();
+  StringRef Name = S.getName();
+  FuncOffsetTable[Name] = Offset;
+  encodeULEB128(S.getHeadSamples(), *OutputStream);
+  return writeBody(S);
+}
+
 /// Create a sample profile file writer based on the specified format.
 ///
 /// \param Filename The file to create.
diff --git a/contrib/llvm/lib/Support/AArch64TargetParser.cpp b/contrib/llvm/lib/Support/AArch64TargetParser.cpp
new file mode 100644
index 000000000000..e897137df680
--- /dev/null
+++ b/contrib/llvm/lib/Support/AArch64TargetParser.cpp
@@ -0,0 +1,206 @@
+//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise AArch64 hardware features
+// such as FPU/CPU/ARCH and extension names.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AArch64TargetParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cctype>
+
+using namespace llvm;
+
+static unsigned checkArchVersion(llvm::StringRef Arch) {
+  if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1]))
+    return (Arch[1] - 48);
+  return 0;
+}
+
+unsigned AArch64::getDefaultFPU(StringRef CPU, AArch64::ArchKind AK) {
+  if (CPU == "generic")
+    return AArch64ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
+
+  return StringSwitch<unsigned>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)       \
+  .Case(NAME, ARM::DEFAULT_FPU)
+#include "../../include/llvm/Support/AArch64TargetParser.def"
+  .Default(ARM::FK_INVALID);
+}
+
+unsigned AArch64::getDefaultExtensions(StringRef CPU, AArch64::ArchKind AK) {
+  if (CPU == "generic")
+    return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
+
+  return StringSwitch<unsigned>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)       \
+  .Case(NAME, AArch64ARCHNames[static_cast<unsigned>(ArchKind::ID)]            \
+                      .ArchBaseExtensions |                                    \
+                  DEFAULT_EXT)
+#include "../../include/llvm/Support/AArch64TargetParser.def"
+  .Default(AArch64::AEK_INVALID);
+}
+
+AArch64::ArchKind AArch64::getCPUArchKind(StringRef CPU) {
+  if (CPU == "generic")
+    return ArchKind::ARMV8A;
+
+  return StringSwitch<AArch64::ArchKind>(CPU)
+#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)       \
+  .Case(NAME, ArchKind::ID)
+#include "../../include/llvm/Support/AArch64TargetParser.def"
+  .Default(ArchKind::INVALID);
+}
+
+bool AArch64::getExtensionFeatures(unsigned Extensions,
+                                   std::vector<StringRef> &Features) {
+  if (Extensions == AArch64::AEK_INVALID)
+    return false;
+
+  if (Extensions & AEK_FP)
+    Features.push_back("+fp-armv8");
+  if (Extensions & AEK_SIMD)
+    Features.push_back("+neon");
+  if (Extensions & AEK_CRC)
+    Features.push_back("+crc");
+  if (Extensions & AEK_CRYPTO)
+    Features.push_back("+crypto");
+  if (Extensions & AEK_DOTPROD)
+    Features.push_back("+dotprod");
+  if (Extensions & AEK_FP16FML)
+    Features.push_back("+fp16fml");
+  if (Extensions & AEK_FP16)
+    Features.push_back("+fullfp16");
+  if (Extensions & AEK_PROFILE)
+    Features.push_back("+spe");
+  if (Extensions & AEK_RAS)
+    Features.push_back("+ras");
+  if (Extensions & AEK_LSE)
+    Features.push_back("+lse");
+  if (Extensions & AEK_RDM)
+    Features.push_back("+rdm");
+  if (Extensions & AEK_SVE)
+    Features.push_back("+sve");
+  if (Extensions & AEK_RCPC)
+    Features.push_back("+rcpc");
+
+  return true;
+}
+
+bool AArch64::getArchFeatures(AArch64::ArchKind AK,
+                              std::vector<StringRef> &Features) {
+  if (AK == ArchKind::ARMV8_1A)
+    Features.push_back("+v8.1a");
+  if (AK == ArchKind::ARMV8_2A)
+    Features.push_back("+v8.2a");
+  if (AK == ArchKind::ARMV8_3A)
+    Features.push_back("+v8.3a");
+  if (AK == ArchKind::ARMV8_4A)
+    Features.push_back("+v8.4a");
+  if (AK == ArchKind::ARMV8_5A)
+    Features.push_back("+v8.5a");
+
+  return AK != ArchKind::INVALID;
+}
+
+StringRef AArch64::getArchName(AArch64::ArchKind AK) {
+  return AArch64ARCHNames[static_cast<unsigned>(AK)].getName();
+}
+
+StringRef AArch64::getCPUAttr(AArch64::ArchKind AK) {
+  return AArch64ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
+}
+
+StringRef AArch64::getSubArch(AArch64::ArchKind AK) {
+  return AArch64ARCHNames[static_cast<unsigned>(AK)].getSubArch();
+}
+
+unsigned AArch64::getArchAttr(AArch64::ArchKind AK) {
+  return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
+}
+
+StringRef AArch64::getArchExtName(unsigned ArchExtKind) {
+  for (const auto &AE : AArch64ARCHExtNames)
+    if (ArchExtKind == AE.ID)
+      return AE.getName();
+  return StringRef();
+}
+
+StringRef AArch64::getArchExtFeature(StringRef ArchExt) {
+  if (ArchExt.startswith("no")) {
+    StringRef ArchExtBase(ArchExt.substr(2));
+    for (const auto &AE : AArch64ARCHExtNames) {
+      if (AE.NegFeature && ArchExtBase == AE.getName())
+        return StringRef(AE.NegFeature);
+    }
+  }
+
+  for (const auto &AE : AArch64ARCHExtNames)
+    if (AE.Feature && ArchExt == AE.getName())
+      return StringRef(AE.Feature);
+  return StringRef();
+}
+
+StringRef AArch64::getDefaultCPU(StringRef Arch) {
+  ArchKind AK = parseArch(Arch);
+  if (AK == ArchKind::INVALID)
+    return StringRef();
+
+  // Look for multiple AKs to find the default for pair AK+Name.
+  for (const auto &CPU : AArch64CPUNames)
+    if (CPU.ArchID == AK && CPU.Default)
+      return CPU.getName();
+
+  // If we can't find a default then target the architecture instead
+  return "generic";
+}
+
+void AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
+  for (const auto &Arch : AArch64CPUNames) {
+    if (Arch.ArchID != ArchKind::INVALID)
+      Values.push_back(Arch.getName());
+  }
+}
+
+bool AArch64::isX18ReservedByDefault(const Triple &TT) {
+  return TT.isAndroid() || TT.isOSDarwin() || TT.isOSFuchsia() ||
+         TT.isOSWindows();
+}
+
+// Allows partial match, ex. "v8a" matches "armv8a".
+AArch64::ArchKind AArch64::parseArch(StringRef Arch) {
+  Arch = ARM::getCanonicalArchName(Arch);
+  if (checkArchVersion(Arch) < 8)
+    return ArchKind::INVALID;
+
+  StringRef Syn = ARM::getArchSynonym(Arch);
+  for (const auto A : AArch64ARCHNames) {
+    if (A.getName().endswith(Syn))
+      return A.ID;
+  }
+  return ArchKind::INVALID;
+}
+
+AArch64::ArchExtKind AArch64::parseArchExt(StringRef ArchExt) {
+  for (const auto A : AArch64ARCHExtNames) {
+    if (ArchExt == A.getName())
+      return static_cast<ArchExtKind>(A.ID);
+  }
+  return AArch64::AEK_INVALID;
+}
+
+AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) {
+  for (const auto C : AArch64CPUNames) {
+    if (CPU == C.getName())
+      return C.ArchID;
+  }
+  return ArchKind::INVALID;
+}
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 1fae0e9b8d6d..a5f4f98c489a 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -16,8 +16,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/bit.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -78,7 +80,7 @@ void APInt::initSlowCase(uint64_t val, bool isSigned) {
   U.pVal[0] = val;
   if (isSigned && int64_t(val) < 0)
     for (unsigned i = 1; i < getNumWords(); ++i)
-      U.pVal[i] = WORD_MAX;
+      U.pVal[i] = WORDTYPE_MAX;
   clearUnusedBits();
 }
 
@@ -304,13 +306,13 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
   unsigned hiWord = whichWord(hiBit);
 
   // Create an initial mask for the low word with zeros below loBit.
-  uint64_t loMask = WORD_MAX << whichBit(loBit);
+  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
 
   // If hiBit is not aligned, we need a high mask.
   unsigned hiShiftAmt = whichBit(hiBit);
   if (hiShiftAmt != 0) {
     // Create a high mask with zeros above hiBit.
-    uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
+    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
     // set the bits in hiWord.
     if (hiWord == loWord)
@@ -323,7 +325,7 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 
   // Fill any words between loWord and hiWord with all ones.
   for (unsigned word = loWord + 1; word < hiWord; ++word)
-    U.pVal[word] = WORD_MAX;
+    U.pVal[word] = WORDTYPE_MAX;
 }
 
 /// Toggle every bit to its opposite value.
@@ -354,7 +356,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 
   // Single word result can be done as a direct bitmask.
   if (isSingleWord()) {
-    uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
     U.VAL &= ~(mask << bitPosition);
     U.VAL |= (subBits.U.VAL << bitPosition);
     return;
@@ -366,7 +368,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 
   // Insertion within a single word can be done as a direct bitmask.
   if (loWord == hi1Word) {
-    uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
     U.pVal[loWord] &= ~(mask << loBit);
     U.pVal[loWord] |= (subBits.U.VAL << loBit);
     return;
@@ -382,7 +384,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
     // Mask+insert remaining bits.
     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
     if (remainingBits != 0) {
-      uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
+      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
       U.pVal[hi1Word] &= ~mask;
       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
     }
@@ -558,7 +560,7 @@ unsigned APInt::countLeadingOnesSlowCase() const {
   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
   if (Count == highWordBits) {
     for (i--; i >= 0; --i) {
-      if (U.pVal[i] == WORD_MAX)
+      if (U.pVal[i] == WORDTYPE_MAX)
         Count += APINT_BITS_PER_WORD;
       else {
         Count += llvm::countLeadingOnes(U.pVal[i]);
@@ -582,7 +584,7 @@ unsigned APInt::countTrailingZerosSlowCase() const {
 unsigned APInt::countTrailingOnesSlowCase() const {
   unsigned Count = 0;
   unsigned i = 0;
-  for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
+  for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
     Count += APINT_BITS_PER_WORD;
   if (i < getNumWords())
     Count += llvm::countTrailingOnes(U.pVal[i]);
@@ -711,24 +713,20 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 }
 
 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
-  union {
-    double D;
-    uint64_t I;
-  } T;
-  T.D = Double;
+  uint64_t I = bit_cast<uint64_t>(Double);
 
   // Get the sign bit from the highest order bit
-  bool isNeg = T.I >> 63;
+  bool isNeg = I >> 63;
 
   // Get the 11-bit exponent and adjust for the 1023 bit bias
-  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
 
   // If the exponent is negative, the value is < 0 so just return 0.
   if (exp < 0)
     return APInt(width, 0u);
 
   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
-  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
 
   // If the exponent doesn't shift all bits out of the mantissa
   if (exp < 52)
@@ -805,12 +803,8 @@ double APInt::roundToDouble(bool isSigned) const {
 
   // The leading bit of mantissa is implicit, so get rid of it.
   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
-  union {
-    double D;
-    uint64_t I;
-  } T;
-  T.I = sign | (exp << 52) | mantissa;
-  return T.D;
+  uint64_t I = sign | (exp << 52) | mantissa;
+  return bit_cast<double>(I);
 }
 
 // Truncate to new width.
@@ -1253,20 +1247,18 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
 
 // The DEBUG macros here tend to be spam in the debug output if you're not
 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
-#pragma push_macro("LLVM_DEBUG")
-#ifndef KNUTH_DEBUG
-#undef LLVM_DEBUG
-#define LLVM_DEBUG(X)                                                          \
-  do {                                                                         \
-  } while (false)
+#ifdef KNUTH_DEBUG
+#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
+#else
+#define DEBUG_KNUTH(X) do {} while(false)
 #endif
 
-  LLVM_DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
-  LLVM_DEBUG(dbgs() << "KnuthDiv: original:");
-  LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
-  LLVM_DEBUG(dbgs() << " by");
-  LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
-  LLVM_DEBUG(dbgs() << '\n');
+  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
+  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+  DEBUG_KNUTH(dbgs() << " by");
+  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
+  DEBUG_KNUTH(dbgs() << '\n');
   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
   // u and v by d. Note that we have taken Knuth's advice here to use a power
   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
@@ -1292,16 +1284,16 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
   }
   u[m+n] = u_carry;
 
-  LLVM_DEBUG(dbgs() << "KnuthDiv:   normal:");
-  LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
-  LLVM_DEBUG(dbgs() << " by");
-  LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
-  LLVM_DEBUG(dbgs() << '\n');
+  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
+  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+  DEBUG_KNUTH(dbgs() << " by");
+  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
+  DEBUG_KNUTH(dbgs() << '\n');
 
   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
   int j = m;
   do {
-    LLVM_DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
     // D3. [Calculate q'.].
     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
@@ -1311,7 +1303,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
     // value qp is one too large, and it eliminates all cases where qp is two
     // too large.
     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
-    LLVM_DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
     uint64_t qp = dividend / v[n-1];
     uint64_t rp = dividend % v[n-1];
     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
@@ -1320,7 +1312,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
         qp--;
     }
-    LLVM_DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
 
     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
@@ -1336,15 +1328,15 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
       u[j+i] = Lo_32(subres);
       borrow = Hi_32(p) - Hi_32(subres);
-      LLVM_DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
+      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
                         << ", borrow = " << borrow << '\n');
     }
     bool isNeg = u[j+n] < borrow;
     u[j+n] -= Lo_32(borrow);
 
-    LLVM_DEBUG(dbgs() << "KnuthDiv: after subtraction:");
-    LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
-    LLVM_DEBUG(dbgs() << '\n');
+    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
+    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+    DEBUG_KNUTH(dbgs() << '\n');
 
     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
     // negative, go to step D6; otherwise go on to step D7.
@@ -1365,16 +1357,16 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
       }
       u[j+n] += carry;
     }
-    LLVM_DEBUG(dbgs() << "KnuthDiv: after correction:");
-    LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
-    LLVM_DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
+    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
+    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
 
     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
   } while (--j >= 0);
 
-  LLVM_DEBUG(dbgs() << "KnuthDiv: quotient:");
-  LLVM_DEBUG(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
-  LLVM_DEBUG(dbgs() << '\n');
+  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
+  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
+  DEBUG_KNUTH(dbgs() << '\n');
 
   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
   // remainder may be obtained by dividing u[...] by d. If r is non-null we
@@ -1385,23 +1377,21 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
     // shift right here.
     if (shift) {
       uint32_t carry = 0;
-      LLVM_DEBUG(dbgs() << "KnuthDiv: remainder:");
+      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
       for (int i = n-1; i >= 0; i--) {
         r[i] = (u[i] >> shift) | carry;
         carry = u[i] << (32 - shift);
-        LLVM_DEBUG(dbgs() << " " << r[i]);
+        DEBUG_KNUTH(dbgs() << " " << r[i]);
       }
     } else {
       for (int i = n-1; i >= 0; i--) {
         r[i] = u[i];
-        LLVM_DEBUG(dbgs() << " " << r[i]);
+        DEBUG_KNUTH(dbgs() << " " << r[i]);
       }
     }
-    LLVM_DEBUG(dbgs() << '\n');
+    DEBUG_KNUTH(dbgs() << '\n');
   }
-  LLVM_DEBUG(dbgs() << '\n');
-
-#pragma pop_macro("LLVM_DEBUG")
+  DEBUG_KNUTH(dbgs() << '\n');
 }
 
 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
@@ -1957,7 +1947,43 @@ APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
   return *this << ShAmt;
 }
 
+APInt APInt::sadd_sat(const APInt &RHS) const {
+  bool Overflow;
+  APInt Res = sadd_ov(RHS, Overflow);
+  if (!Overflow)
+    return Res;
+
+  return isNegative() ? APInt::getSignedMinValue(BitWidth)
+                      : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::uadd_sat(const APInt &RHS) const {
+  bool Overflow;
+  APInt Res = uadd_ov(RHS, Overflow);
+  if (!Overflow)
+    return Res;
+
+  return APInt::getMaxValue(BitWidth);
+}
+
+APInt APInt::ssub_sat(const APInt &RHS) const {
+  bool Overflow;
+  APInt Res = ssub_ov(RHS, Overflow);
+  if (!Overflow)
+    return Res;
 
+  return isNegative() ? APInt::getSignedMinValue(BitWidth)
+                      : APInt::getSignedMaxValue(BitWidth);
+}
+
+APInt APInt::usub_sat(const APInt &RHS) const {
+  bool Overflow;
+  APInt Res = usub_ov(RHS, Overflow);
+  if (!Overflow)
+    return Res;
+
+  return APInt(BitWidth, 0);
+}
 
 
 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
@@ -2707,3 +2733,193 @@ APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
   }
   llvm_unreachable("Unknown APInt::Rounding enum");
 }
+
+Optional<APInt>
+llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
+                                           unsigned RangeWidth) {
+  unsigned CoeffWidth = A.getBitWidth();
+  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
+  assert(RangeWidth <= CoeffWidth &&
+         "Value range width should be less than coefficient width");
+  assert(RangeWidth > 1 && "Value range bit width should be > 1");
+
+  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
+                    << "x + " << C << ", rw:" << RangeWidth << '\n');
+
+  // Identify 0 as a (non)solution immediately.
+  if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
+    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
+    return APInt(CoeffWidth, 0);
+  }
+
+  // The result of APInt arithmetic has the same bit width as the operands,
+  // so it can actually lose high bits. A product of two n-bit integers needs
+  // 2n-1 bits to represent the full value.
+  // The operation done below (on quadratic coefficients) that can produce
+  // the largest value is the evaluation of the equation during bisection,
+  // which needs 3 times the bitwidth of the coefficient, so the total number
+  // of required bits is 3n.
+  //
+  // The purpose of this extension is to simulate the set Z of all integers,
+  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
+  // and negative numbers (not so much in a modulo arithmetic). The method
+  // used to solve the equation is based on the standard formula for real
+  // numbers, and uses the concepts of "positive" and "negative" with their
+  // usual meanings.
+  CoeffWidth *= 3;
+  A = A.sext(CoeffWidth);
+  B = B.sext(CoeffWidth);
+  C = C.sext(CoeffWidth);
+
+  // Make A > 0 for simplicity. Negate cannot overflow at this point because
+  // the bit width has increased.
+  if (A.isNegative()) {
+    A.negate();
+    B.negate();
+    C.negate();
+  }
+
+  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
+  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
+  // and R = 2^BitWidth.
+  // Since we're trying not only to find exact solutions, but also values
+  // that "wrap around", such a set will always have a solution, i.e. an x
+  // that satisfies at least one of the equations, or such that |q(x)|
+  // exceeds kR, while |q(x-1)| for the same k does not.
+  //
+  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
+  // positive solution n (in the above sense), and also such that the n
+  // will be the least among all solutions corresponding to k = 0, 1, ...
+  // (more precisely, the least element in the set
+  //   { n(k) | k is such that a solution n(k) exists }).
+  //
+  // Consider the parabola (over real numbers) that corresponds to the
+  // quadratic equation. Since A > 0, the arms of the parabola will point
+  // up. Picking different values of k will shift it up and down by R.
+  //
+  // We want to shift the parabola in such a way as to reduce the problem
+  // of solving q(x) = kR to solving shifted_q(x) = 0.
+  // (The interesting solutions are the ceilings of the real number
+  // solutions.)
+  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
+  APInt TwoA = 2 * A;
+  APInt SqrB = B * B;
+  bool PickLow;
+
+  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
+    assert(A.isStrictlyPositive());
+    APInt T = V.abs().urem(A);
+    if (T.isNullValue())
+      return V;
+    return V.isNegative() ? V+T : V+(A-T);
+  };
+
+  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
+  // iff B is positive.
+  if (B.isNonNegative()) {
+    // If B >= 0, the vertex it at a negative location (or at 0), so in
+    // order to have a non-negative solution we need to pick k that makes
+    // C-kR negative. To satisfy all the requirements for the solution
+    // that we are looking for, it needs to be closest to 0 of all k.
+    C = C.srem(R);
+    if (C.isStrictlyPositive())
+      C -= R;
+    // Pick the greater solution.
+    PickLow = false;
+  } else {
+    // If B < 0, the vertex is at a positive location. For any solution
+    // to exist, the discriminant must be non-negative. This means that
+    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
+    // lower bound on values of k: kR >= C - B^2/4A.
+    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
+    // Round LowkR up (towards +inf) to the nearest kR.
+    LowkR = RoundUp(LowkR, R);
+
+    // If there exists k meeting the condition above, and such that
+    // C-kR > 0, there will be two positive real number solutions of
+    // q(x) = kR. Out of all such values of k, pick the one that makes
+    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
+    // In other words, find maximum k such that LowkR <= kR < C.
+    if (C.sgt(LowkR)) {
+      // If LowkR < C, then such a k is guaranteed to exist because
+      // LowkR itself is a multiple of R.
+      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
+      // Pick the smaller solution.
+      PickLow = true;
+    } else {
+      // If C-kR < 0 for all potential k's, it means that one solution
+      // will be negative, while the other will be positive. The positive
+      // solution will shift towards 0 if the parabola is moved up.
+      // Pick the kR closest to the lower bound (i.e. make C-kR closest
+      // to 0, or in other words, out of all parabolas that have solutions,
+      // pick the one that is the farthest "up").
+      // Since LowkR is itself a multiple of R, simply take C-LowkR.
+      C -= LowkR;
+      // Pick the greater solution.
+      PickLow = false;
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
+                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
+
+  APInt D = SqrB - 4*A*C;
+  assert(D.isNonNegative() && "Negative discriminant");
+  APInt SQ = D.sqrt();
+
+  APInt Q = SQ * SQ;
+  bool InexactSQ = Q != D;
+  // The calculated SQ may actually be greater than the exact (non-integer)
+  // value. If that's the case, decremement SQ to get a value that is lower.
+  if (Q.sgt(D))
+    SQ -= 1;
+
+  APInt X;
+  APInt Rem;
+
+  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
+  // When using the quadratic formula directly, the calculated low root
+  // may be greater than the exact one, since we would be subtracting SQ.
+  // To make sure that the calculated root is not greater than the exact
+  // one, subtract SQ+1 when calculating the low root (for inexact value
+  // of SQ).
+  if (PickLow)
+    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
+  else
+    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
+
+  // The updated coefficients should be such that the (exact) solution is
+  // positive. Since APInt division rounds towards 0, the calculated one
+  // can be 0, but cannot be negative.
+  assert(X.isNonNegative() && "Solution should be non-negative");
+
+  if (!InexactSQ && Rem.isNullValue()) {
+    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
+    return X;
+  }
+
+  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
+  // The exact value of the square root of D should be between SQ and SQ+1.
+  // This implies that the solution should be between that corresponding to
+  // SQ (i.e. X) and that corresponding to SQ+1.
+  //
+  // The calculated X cannot be greater than the exact (real) solution.
+  // Actually it must be strictly less than the exact solution, while
+  // X+1 will be greater than or equal to it.
+
+  APInt VX = (A*X + B)*X + C;
+  APInt VY = VX + TwoA*X + A + B;
+  bool SignChange = VX.isNegative() != VY.isNegative() ||
+                    VX.isNullValue() != VY.isNullValue();
+  // If the sign did not change between X and X+1, X is not a valid solution.
+  // This could happen when the actual (exact) roots don't have an integer
+  // between them, so they would both be contained between X and X+1.
+  if (!SignChange) {
+    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
+    return None;
+  }
+
+  X += 1;
+  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
+  return X;
+}
diff --git a/contrib/llvm/lib/Support/ARMTargetParser.cpp b/contrib/llvm/lib/Support/ARMTargetParser.cpp
new file mode 100644
index 000000000000..07294b0c09a3
--- /dev/null
+++ b/contrib/llvm/lib/Support/ARMTargetParser.cpp
@@ -0,0 +1,577 @@
+//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise ARM hardware features
+// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ARMTargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cctype>
+
+using namespace llvm;
+
+static StringRef getHWDivSynonym(StringRef HWDiv) {
+  return StringSwitch<StringRef>(HWDiv)
+      .Case("thumb,arm", "arm,thumb")
+      .Default(HWDiv);
+}
+
+// Allows partial match, ex. "v7a" matches "armv7a".
+ARM::ArchKind ARM::parseArch(StringRef Arch) {
+  Arch = getCanonicalArchName(Arch);
+  StringRef Syn = getArchSynonym(Arch);
+  for (const auto A : ARCHNames) {
+    if (A.getName().endswith(Syn))
+      return A.ID;
+  }
+  return ArchKind::INVALID;
+}
+
+// Version number (ex. v7 = 7).
+unsigned ARM::parseArchVersion(StringRef Arch) {
+  Arch = getCanonicalArchName(Arch);
+  switch (parseArch(Arch)) {
+  case ArchKind::ARMV2:
+  case ArchKind::ARMV2A:
+    return 2;
+  case ArchKind::ARMV3:
+  case ArchKind::ARMV3M:
+    return 3;
+  case ArchKind::ARMV4:
+  case ArchKind::ARMV4T:
+    return 4;
+  case ArchKind::ARMV5T:
+  case ArchKind::ARMV5TE:
+  case ArchKind::IWMMXT:
+  case ArchKind::IWMMXT2:
+  case ArchKind::XSCALE:
+  case ArchKind::ARMV5TEJ:
+    return 5;
+  case ArchKind::ARMV6:
+  case ArchKind::ARMV6K:
+  case ArchKind::ARMV6T2:
+  case ArchKind::ARMV6KZ:
+  case ArchKind::ARMV6M:
+    return 6;
+  case ArchKind::ARMV7A:
+  case ArchKind::ARMV7VE:
+  case ArchKind::ARMV7R:
+  case ArchKind::ARMV7M:
+  case ArchKind::ARMV7S:
+  case ArchKind::ARMV7EM:
+  case ArchKind::ARMV7K:
+    return 7;
+  case ArchKind::ARMV8A:
+  case ArchKind::ARMV8_1A:
+  case ArchKind::ARMV8_2A:
+  case ArchKind::ARMV8_3A:
+  case ArchKind::ARMV8_4A:
+  case ArchKind::ARMV8_5A:
+  case ArchKind::ARMV8R:
+  case ArchKind::ARMV8MBaseline:
+  case ArchKind::ARMV8MMainline:
+    return 8;
+  case ArchKind::INVALID:
+    return 0;
+  }
+  llvm_unreachable("Unhandled architecture");
+}
+
+// Profile A/R/M
+ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
+  Arch = getCanonicalArchName(Arch);
+  switch (parseArch(Arch)) {
+  case ArchKind::ARMV6M:
+  case ArchKind::ARMV7M:
+  case ArchKind::ARMV7EM:
+  case ArchKind::ARMV8MMainline:
+  case ArchKind::ARMV8MBaseline:
+    return ProfileKind::M;
+  case ArchKind::ARMV7R:
+  case ArchKind::ARMV8R:
+    return ProfileKind::R;
+  case ArchKind::ARMV7A:
+  case ArchKind::ARMV7VE:
+  case ArchKind::ARMV7K:
+  case ArchKind::ARMV8A:
+  case ArchKind::ARMV8_1A:
+  case ArchKind::ARMV8_2A:
+  case ArchKind::ARMV8_3A:
+  case ArchKind::ARMV8_4A:
+  case ArchKind::ARMV8_5A:
+    return ProfileKind::A;
+  case ArchKind::ARMV2:
+  case ArchKind::ARMV2A:
+  case ArchKind::ARMV3:
+  case ArchKind::ARMV3M:
+  case ArchKind::ARMV4:
+  case ArchKind::ARMV4T:
+  case ArchKind::ARMV5T:
+  case ArchKind::ARMV5TE:
+  case ArchKind::ARMV5TEJ:
+  case ArchKind::ARMV6:
+  case ArchKind::ARMV6K:
+  case ArchKind::ARMV6T2:
+  case ArchKind::ARMV6KZ:
+  case ArchKind::ARMV7S:
+  case ArchKind::IWMMXT:
+  case ArchKind::IWMMXT2:
+  case ArchKind::XSCALE:
+  case ArchKind::INVALID:
+    return ProfileKind::INVALID;
+  }
+  llvm_unreachable("Unhandled architecture");
+}
+
+StringRef ARM::getArchSynonym(StringRef Arch) {
+  return StringSwitch<StringRef>(Arch)
+      .Case("v5", "v5t")
+      .Case("v5e", "v5te")
+      .Case("v6j", "v6")
+      .Case("v6hl", "v6k")
+      .Cases("v6m", "v6sm", "v6s-m", "v6-m")
+      .Cases("v6z", "v6zk", "v6kz")
+      .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+      .Case("v7r", "v7-r")
+      .Case("v7m", "v7-m")
+      .Case("v7em", "v7e-m")
+      .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
+      .Case("v8.1a", "v8.1-a")
+      .Case("v8.2a", "v8.2-a")
+      .Case("v8.3a", "v8.3-a")
+      .Case("v8.4a", "v8.4-a")
+      .Case("v8.5a", "v8.5-a")
+      .Case("v8r", "v8-r")
+      .Case("v8m.base", "v8-m.base")
+      .Case("v8m.main", "v8-m.main")
+      .Default(Arch);
+}
+
+bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
+
+  if (FPUKind >= FK_LAST || FPUKind == FK_INVALID)
+    return false;
+
+  // fp-only-sp and d16 subtarget features are independent of each other, so we
+  // must enable/disable both.
+  switch (FPUNames[FPUKind].Restriction) {
+  case FPURestriction::SP_D16:
+    Features.push_back("+fp-only-sp");
+    Features.push_back("+d16");
+    break;
+  case FPURestriction::D16:
+    Features.push_back("-fp-only-sp");
+    Features.push_back("+d16");
+    break;
+  case FPURestriction::None:
+    Features.push_back("-fp-only-sp");
+    Features.push_back("-d16");
+    break;
+  }
+
+  // FPU version subtarget features are inclusive of lower-numbered ones, so
+  // enable the one corresponding to this version and disable all that are
+  // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
+  // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
+  switch (FPUNames[FPUKind].FPUVer) {
+  case FPUVersion::VFPV5:
+    Features.push_back("+fp-armv8");
+    break;
+  case FPUVersion::VFPV4:
+    Features.push_back("+vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  case FPUVersion::VFPV3_FP16:
+    Features.push_back("+vfp3");
+    Features.push_back("+fp16");
+    Features.push_back("-vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  case FPUVersion::VFPV3:
+    Features.push_back("+vfp3");
+    Features.push_back("-fp16");
+    Features.push_back("-vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  case FPUVersion::VFPV2:
+    Features.push_back("+vfp2");
+    Features.push_back("-vfp3");
+    Features.push_back("-fp16");
+    Features.push_back("-vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  case FPUVersion::NONE:
+    Features.push_back("-vfp2");
+    Features.push_back("-vfp3");
+    Features.push_back("-fp16");
+    Features.push_back("-vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  }
+
+  // crypto includes neon, so we handle this similarly to FPU version.
+  switch (FPUNames[FPUKind].NeonSupport) {
+  case NeonSupportLevel::Crypto:
+    Features.push_back("+neon");
+    Features.push_back("+crypto");
+    break;
+  case NeonSupportLevel::Neon:
+    Features.push_back("+neon");
+    Features.push_back("-crypto");
+    break;
+  case NeonSupportLevel::None:
+    Features.push_back("-neon");
+    Features.push_back("-crypto");
+    break;
+  }
+
+  return true;
+}
+
+// Little/Big endian
+ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
+  if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
+      Arch.startswith("aarch64_be"))
+    return EndianKind::BIG;
+
+  if (Arch.startswith("arm") || Arch.startswith("thumb")) {
+    if (Arch.endswith("eb"))
+      return EndianKind::BIG;
+    else
+      return EndianKind::LITTLE;
+  }
+
+  if (Arch.startswith("aarch64"))
+    return EndianKind::LITTLE;
+
+  return EndianKind::INVALID;
+}
+
+// ARM, Thumb, AArch64
+ARM::ISAKind ARM::parseArchISA(StringRef Arch) {
+  return StringSwitch<ISAKind>(Arch)
+      .StartsWith("aarch64", ISAKind::AARCH64)
+      .StartsWith("arm64", ISAKind::AARCH64)
+      .StartsWith("thumb", ISAKind::THUMB)
+      .StartsWith("arm", ISAKind::ARM)
+      .Default(ISAKind::INVALID);
+}
+
+unsigned ARM::parseFPU(StringRef FPU) {
+  StringRef Syn = getFPUSynonym(FPU);
+  for (const auto F : FPUNames) {
+    if (Syn == F.getName())
+      return F.ID;
+  }
+  return FK_INVALID;
+}
+
+ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
+  if (FPUKind >= FK_LAST)
+    return NeonSupportLevel::None;
+  return FPUNames[FPUKind].NeonSupport;
+}
+
+// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
+// (iwmmxt|xscale)(eb)? is also permitted. If the former, return
+// "v.+", if the latter, return unmodified string, minus 'eb'.
+// If invalid, return empty string.
+StringRef ARM::getCanonicalArchName(StringRef Arch) {
+  size_t offset = StringRef::npos;
+  StringRef A = Arch;
+  StringRef Error = "";
+
+  // Begins with "arm" / "thumb", move past it.
+  if (A.startswith("arm64"))
+    offset = 5;
+  else if (A.startswith("arm"))
+    offset = 3;
+  else if (A.startswith("thumb"))
+    offset = 5;
+  else if (A.startswith("aarch64")) {
+    offset = 7;
+    // AArch64 uses "_be", not "eb" suffix.
+    if (A.find("eb") != StringRef::npos)
+      return Error;
+    if (A.substr(offset, 3) == "_be")
+      offset += 3;
+  }
+
+  // Ex. "armebv7", move past the "eb".
+  if (offset != StringRef::npos && A.substr(offset, 2) == "eb")
+    offset += 2;
+  // Or, if it ends with eb ("armv7eb"), chop it off.
+  else if (A.endswith("eb"))
+    A = A.substr(0, A.size() - 2);
+  // Trim the head
+  if (offset != StringRef::npos)
+    A = A.substr(offset);
+
+  // Empty string means offset reached the end, which means it's valid.
+  if (A.empty())
+    return Arch;
+
+  // Only match non-marketing names
+  if (offset != StringRef::npos) {
+    // Must start with 'vN'.
+    if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
+      return Error;
+    // Can't have an extra 'eb'.
+    if (A.find("eb") != StringRef::npos)
+      return Error;
+  }
+
+  // Arch will either be a 'v' name (v7a) or a marketing name (xscale).
+  return A;
+}
+
+StringRef ARM::getFPUSynonym(StringRef FPU) {
+  return StringSwitch<StringRef>(FPU)
+      .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+      .Case("vfp2", "vfpv2")
+      .Case("vfp3", "vfpv3")
+      .Case("vfp4", "vfpv4")
+      .Case("vfp3-d16", "vfpv3-d16")
+      .Case("vfp4-d16", "vfpv4-d16")
+      .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
+      .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+      .Case("fp5-sp-d16", "fpv5-sp-d16")
+      .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+      // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
+      .Case("neon-vfpv3", "neon")
+      .Default(FPU);
+}
+
+StringRef ARM::getFPUName(unsigned FPUKind) {
+  if (FPUKind >= FK_LAST)
+    return StringRef();
+  return FPUNames[FPUKind].getName();
+}
+
+ARM::FPUVersion ARM::getFPUVersion(unsigned FPUKind) {
+  if (FPUKind >= FK_LAST)
+    return FPUVersion::NONE;
+  return FPUNames[FPUKind].FPUVer;
+}
+
+ARM::FPURestriction ARM::getFPURestriction(unsigned FPUKind) {
+  if (FPUKind >= FK_LAST)
+    return FPURestriction::None;
+  return FPUNames[FPUKind].Restriction;
+}
+
+unsigned ARM::getDefaultFPU(StringRef CPU, ARM::ArchKind AK) {
+  if (CPU == "generic")
+    return ARM::ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
+
+  return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)           \
+  .Case(NAME, DEFAULT_FPU)
+#include "llvm/Support/ARMTargetParser.def"
+   .Default(ARM::FK_INVALID);
+}
+
+unsigned ARM::getDefaultExtensions(StringRef CPU, ARM::ArchKind AK) {
+  if (CPU == "generic")
+    return ARM::ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
+
+  return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)           \
+  .Case(NAME,                                                                  \
+        ARCHNames[static_cast<unsigned>(ArchKind::ID)].ArchBaseExtensions |    \
+            DEFAULT_EXT)
+#include "llvm/Support/ARMTargetParser.def"
+  .Default(ARM::AEK_INVALID);
+}
+
+bool ARM::getHWDivFeatures(unsigned HWDivKind,
+                           std::vector<StringRef> &Features) {
+
+  if (HWDivKind == AEK_INVALID)
+    return false;
+
+  if (HWDivKind & AEK_HWDIVARM)
+    Features.push_back("+hwdiv-arm");
+  else
+    Features.push_back("-hwdiv-arm");
+
+  if (HWDivKind & AEK_HWDIVTHUMB)
+    Features.push_back("+hwdiv");
+  else
+    Features.push_back("-hwdiv");
+
+  return true;
+}
+
+bool ARM::getExtensionFeatures(unsigned Extensions,
+                               std::vector<StringRef> &Features) {
+
+  if (Extensions == AEK_INVALID)
+    return false;
+
+  if (Extensions & AEK_CRC)
+    Features.push_back("+crc");
+  else
+    Features.push_back("-crc");
+
+  if (Extensions & AEK_DSP)
+    Features.push_back("+dsp");
+  else
+    Features.push_back("-dsp");
+
+  if (Extensions & AEK_FP16FML)
+    Features.push_back("+fp16fml");
+  else
+    Features.push_back("-fp16fml");
+
+  if (Extensions & AEK_RAS)
+    Features.push_back("+ras");
+  else
+    Features.push_back("-ras");
+
+  if (Extensions & AEK_DOTPROD)
+    Features.push_back("+dotprod");
+  else
+    Features.push_back("-dotprod");
+
+  return getHWDivFeatures(Extensions, Features);
+}
+
+StringRef ARM::getArchName(ARM::ArchKind AK) {
+  return ARCHNames[static_cast<unsigned>(AK)].getName();
+}
+
+StringRef ARM::getCPUAttr(ARM::ArchKind AK) {
+  return ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
+}
+
+StringRef ARM::getSubArch(ARM::ArchKind AK) {
+  return ARCHNames[static_cast<unsigned>(AK)].getSubArch();
+}
+
+unsigned ARM::getArchAttr(ARM::ArchKind AK) {
+  return ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
+}
+
+StringRef ARM::getArchExtName(unsigned ArchExtKind) {
+  for (const auto AE : ARCHExtNames) {
+    if (ArchExtKind == AE.ID)
+      return AE.getName();
+  }
+  return StringRef();
+}
+
+StringRef ARM::getArchExtFeature(StringRef ArchExt) {
+  if (ArchExt.startswith("no")) {
+    StringRef ArchExtBase(ArchExt.substr(2));
+    for (const auto AE : ARCHExtNames) {
+      if (AE.NegFeature && ArchExtBase == AE.getName())
+        return StringRef(AE.NegFeature);
+    }
+  }
+  for (const auto AE : ARCHExtNames) {
+    if (AE.Feature && ArchExt == AE.getName())
+      return StringRef(AE.Feature);
+  }
+
+  return StringRef();
+}
+
+StringRef ARM::getHWDivName(unsigned HWDivKind) {
+  for (const auto D : HWDivNames) {
+    if (HWDivKind == D.ID)
+      return D.getName();
+  }
+  return StringRef();
+}
+
+StringRef ARM::getDefaultCPU(StringRef Arch) {
+  ArchKind AK = parseArch(Arch);
+  if (AK == ArchKind::INVALID)
+    return StringRef();
+
+  // Look for multiple AKs to find the default for pair AK+Name.
+  for (const auto CPU : CPUNames) {
+    if (CPU.ArchID == AK && CPU.Default)
+      return CPU.getName();
+  }
+
+  // If we can't find a default then target the architecture instead
+  return "generic";
+}
+
+unsigned ARM::parseHWDiv(StringRef HWDiv) {
+  StringRef Syn = getHWDivSynonym(HWDiv);
+  for (const auto D : HWDivNames) {
+    if (Syn == D.getName())
+      return D.ID;
+  }
+  return AEK_INVALID;
+}
+
+unsigned ARM::parseArchExt(StringRef ArchExt) {
+  for (const auto A : ARCHExtNames) {
+    if (ArchExt == A.getName())
+      return A.ID;
+  }
+  return AEK_INVALID;
+}
+
+ARM::ArchKind ARM::parseCPUArch(StringRef CPU) {
+  for (const auto C : CPUNames) {
+    if (CPU == C.getName())
+      return C.ArchID;
+  }
+  return ArchKind::INVALID;
+}
+
+void ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
+  for (const CpuNames<ArchKind> &Arch : CPUNames) {
+    if (Arch.ArchID != ArchKind::INVALID)
+      Values.push_back(Arch.getName());
+  }
+}
+
+StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
+  StringRef ArchName =
+      CPU.empty() ? TT.getArchName() : getArchName(parseCPUArch(CPU));
+
+  if (TT.isOSBinFormatMachO()) {
+    if (TT.getEnvironment() == Triple::EABI ||
+        TT.getOS() == Triple::UnknownOS ||
+        parseArchProfile(ArchName) == ProfileKind::M)
+      return "aapcs";
+    if (TT.isWatchABI())
+      return "aapcs16";
+    return "apcs-gnu";
+  } else if (TT.isOSWindows())
+    // FIXME: this is invalid for WindowsCE.
+    return "aapcs";
+
+  // Select the default based on the platform.
+  switch (TT.getEnvironment()) {
+  case Triple::Android:
+  case Triple::GNUEABI:
+  case Triple::GNUEABIHF:
+  case Triple::MuslEABI:
+  case Triple::MuslEABIHF:
+    return "aapcs-linux";
+  case Triple::EABIHF:
+  case Triple::EABI:
+    return "aapcs";
+  default:
+    if (TT.isOSNetBSD())
+      return "apcs-gnu";
+    if (TT.isOSOpenBSD())
+      return "aapcs-linux";
+    return "aapcs";
+  }
+}
diff --git a/contrib/llvm/lib/Support/BinaryStreamError.cpp b/contrib/llvm/lib/Support/BinaryStreamError.cpp
index 60f5e21f041a..cdc811d78d63 100644
--- a/contrib/llvm/lib/Support/BinaryStreamError.cpp
+++ b/contrib/llvm/lib/Support/BinaryStreamError.cpp
@@ -47,7 +47,7 @@ BinaryStreamError::BinaryStreamError(stream_error_code C, StringRef Context)
   }
 }
 
-void BinaryStreamError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
+void BinaryStreamError::log(raw_ostream &OS) const { OS << ErrMsg; }
 
 StringRef BinaryStreamError::getErrorMessage() const { return ErrMsg; }
 
diff --git a/contrib/llvm/lib/Support/BuryPointer.cpp b/contrib/llvm/lib/Support/BuryPointer.cpp
new file mode 100644
index 000000000000..6c988b4a0ab2
--- /dev/null
+++ b/contrib/llvm/lib/Support/BuryPointer.cpp
@@ -0,0 +1,31 @@
+//===- BuryPointer.cpp - Memory Manipulation/Leak ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BuryPointer.h"
+#include "llvm/Support/Compiler.h"
+#include <atomic>
+
+namespace llvm {
+
+void BuryPointer(const void *Ptr) {
+  // This function may be called only a small fixed amount of times per each
+  // invocation, otherwise we do actually have a leak which we want to report.
+  // If this function is called more than kGraveYardMaxSize times, the pointers
+  // will not be properly buried and a leak detector will report a leak, which
+  // is what we want in such case.
+  static const size_t kGraveYardMaxSize = 16;
+  LLVM_ATTRIBUTE_UNUSED static const void *GraveYard[kGraveYardMaxSize];
+  static std::atomic<unsigned> GraveYardSize;
+  unsigned Idx = GraveYardSize++;
+  if (Idx >= kGraveYardMaxSize)
+    return;
+  GraveYard[Idx] = Ptr;
+}
+
+}
diff --git a/contrib/llvm/lib/Support/COM.cpp b/contrib/llvm/lib/Support/COM.cpp
index 2e3ff66843d3..97cd085853b0 100644
--- a/contrib/llvm/lib/Support/COM.cpp
+++ b/contrib/llvm/lib/Support/COM.cpp
@@ -18,6 +18,6 @@
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/COM.inc"
-#elif _WIN32
+#elif defined(_WIN32)
 #include "Windows/COM.inc"
 #endif
diff --git a/contrib/llvm/lib/Support/CachePruning.cpp b/contrib/llvm/lib/Support/CachePruning.cpp
index 7326c4fc91fb..a0aa6024b3ed 100644
--- a/contrib/llvm/lib/Support/CachePruning.cpp
+++ b/contrib/llvm/lib/Support/CachePruning.cpp
@@ -27,6 +27,28 @@
 
 using namespace llvm;
 
+namespace {
+struct FileInfo {
+  sys::TimePoint<> Time;
+  uint64_t Size;
+  std::string Path;
+
+  /// Used to determine which files to prune first. Also used to determine
+  /// set membership, so must take into account all fields.
+  bool operator<(const FileInfo &Other) const {
+    if (Time < Other.Time)
+      return true;
+    else if (Other.Time < Time)
+      return false;
+    if (Other.Size < Size)
+      return true;
+    else if (Size < Other.Size)
+      return false;
+    return Path < Other.Path;
+  }
+};
+} // anonymous namespace
+
 /// Write a new timestamp file with the given path. This is used for the pruning
 /// interval option.
 static void writeTimestampFile(StringRef TimestampFile) {
@@ -185,8 +207,9 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
     writeTimestampFile(TimestampFile);
   }
 
-  // Keep track of space. Needs to be kept ordered by size for determinism.
-  std::set<std::pair<uint64_t, std::string>> FileSizes;
+  // Keep track of files to delete to get below the size limit.
+  // Order by time of last use so that recently used files are preserved.
+  std::set<FileInfo> FileInfos;
   uint64_t TotalSize = 0;
 
   // Walk the entire directory cache, looking for unused files.
@@ -224,22 +247,22 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
 
     // Leave it here for now, but add it to the list of size-based pruning.
     TotalSize += StatusOrErr->getSize();
-    FileSizes.insert({StatusOrErr->getSize(), std::string(File->path())});
+    FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
   }
 
-  auto FileAndSize = FileSizes.rbegin();
-  size_t NumFiles = FileSizes.size();
+  auto FileInfo = FileInfos.begin();
+  size_t NumFiles = FileInfos.size();
 
   auto RemoveCacheFile = [&]() {
     // Remove the file.
-    sys::fs::remove(FileAndSize->second);
+    sys::fs::remove(FileInfo->Path);
     // Update size
-    TotalSize -= FileAndSize->first;
+    TotalSize -= FileInfo->Size;
     NumFiles--;
-    LLVM_DEBUG(dbgs() << " - Remove " << FileAndSize->second << " (size "
-                      << FileAndSize->first << "), new occupancy is "
-                      << TotalSize << "%\n");
-    ++FileAndSize;
+    LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
+                      << FileInfo->Size << "), new occupancy is " << TotalSize
+                      << "%\n");
+    ++FileInfo;
   };
 
   // Prune for number of files.
@@ -270,7 +293,7 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
                       << Policy.MaxSizeBytes << " bytes\n");
 
     // Remove the oldest accessed files first, till we get below the threshold.
-    while (TotalSize > TotalSizeTarget && FileAndSize != FileSizes.rend())
+    while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
       RemoveCacheFile();
   }
   return true;
diff --git a/contrib/llvm/lib/Support/CodeGenCoverage.cpp b/contrib/llvm/lib/Support/CodeGenCoverage.cpp
index f0a53db4e32a..811020e3254a 100644
--- a/contrib/llvm/lib/Support/CodeGenCoverage.cpp
+++ b/contrib/llvm/lib/Support/CodeGenCoverage.cpp
@@ -22,7 +22,7 @@
 
 #if LLVM_ON_UNIX
 #include <unistd.h>
-#elif _WIN32
+#elif defined(_WIN32)
 #include <windows.h>
 #endif
 
@@ -93,7 +93,7 @@ bool CodeGenCoverage::emit(StringRef CoveragePrefix,
     std::string Pid =
 #if LLVM_ON_UNIX
         llvm::to_string(::getpid());
-#elif _WIN32
+#elif defined(_WIN32)
         llvm::to_string(::GetCurrentProcessId());
 #else
         "";
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index a1e659a01c8e..f7290b54dcf3 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -426,12 +426,17 @@ Option *CommandLineParser::LookupOption(SubCommand &Sub, StringRef &Arg,
     return I != Sub.OptionsMap.end() ? I->second : nullptr;
   }
 
-  // If the argument before the = is a valid option name, we match.  If not,
-  // return Arg unmolested.
+  // If the argument before the = is a valid option name and the option allows
+  // non-prefix form (ie is not AlwaysPrefix), we match.  If not, signal match
+  // failure by returning nullptr.
   auto I = Sub.OptionsMap.find(Arg.substr(0, EqualPos));
   if (I == Sub.OptionsMap.end())
     return nullptr;
 
+  auto O = I->second;
+  if (O->getFormattingFlag() == cl::AlwaysPrefix)
+    return nullptr;
+
   Value = Arg.substr(EqualPos + 1);
   Arg = Arg.substr(0, EqualPos);
   return I->second;
@@ -539,7 +544,9 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
   switch (Handler->getValueExpectedFlag()) {
   case ValueRequired:
     if (!Value.data()) { // No value specified?
-      if (i + 1 >= argc)
+      // If no other argument or the option only supports prefix form, we
+      // cannot look at the next argument.
+      if (i + 1 >= argc || Handler->getFormattingFlag() == cl::AlwaysPrefix)
         return Handler->error("requires a value!");
       // Steal the next argument, like for '-o filename'
       assert(argv && "null check");
@@ -597,7 +604,8 @@ static inline bool isGrouping(const Option *O) {
   return O->getFormattingFlag() == cl::Grouping;
 }
 static inline bool isPrefixedOrGrouping(const Option *O) {
-  return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+  return isGrouping(O) || O->getFormattingFlag() == cl::Prefix ||
+         O->getFormattingFlag() == cl::AlwaysPrefix;
 }
 
 // getOptionPred - Check to see if there are any options that satisfy the
@@ -647,7 +655,8 @@ HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
   // If the option is a prefixed option, then the value is simply the
   // rest of the name...  so fall through to later processing, by
   // setting up the argument name flags and value fields.
-  if (PGOpt->getFormattingFlag() == cl::Prefix) {
+  if (PGOpt->getFormattingFlag() == cl::Prefix ||
+      PGOpt->getFormattingFlag() == cl::AlwaysPrefix) {
     Value = Arg.substr(Length);
     Arg = Arg.substr(0, Length);
     assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
@@ -693,6 +702,10 @@ static bool isWhitespace(char C) {
   return C == ' ' || C == '\t' || C == '\r' || C == '\n';
 }
 
+static bool isWhitespaceOrNull(char C) {
+  return isWhitespace(C) || C == '\0';
+}
+
 static bool isQuote(char C) { return C == '\"' || C == '\''; }
 
 void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
@@ -808,7 +821,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     // INIT state indicates that the current input index is at the start of
     // the string or between tokens.
     if (State == INIT) {
-      if (isWhitespace(C)) {
+      if (isWhitespaceOrNull(C)) {
         // Mark the end of lines in response files
         if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
@@ -832,7 +845,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     // quotes.
     if (State == UNQUOTED) {
       // Whitespace means the end of the token.
-      if (isWhitespace(C)) {
+      if (isWhitespaceOrNull(C)) {
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
         Token.clear();
         State = INIT;
@@ -1057,8 +1070,27 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
 }
 
 bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
-                                 StringRef Overview, raw_ostream *Errs) {
-  return GlobalParser->ParseCommandLineOptions(argc, argv, Overview,
+                                 StringRef Overview, raw_ostream *Errs,
+                                 const char *EnvVar) {
+  SmallVector<const char *, 20> NewArgv;
+  BumpPtrAllocator A;
+  StringSaver Saver(A);
+  NewArgv.push_back(argv[0]);
+
+  // Parse options from environment variable.
+  if (EnvVar) {
+    if (llvm::Optional<std::string> EnvValue =
+            sys::Process::GetEnv(StringRef(EnvVar)))
+      TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv);
+  }
+
+  // Append options from command line.
+  for (int I = 1; I < argc; ++I)
+    NewArgv.push_back(argv[I]);
+  int NewArgc = static_cast<int>(NewArgv.size());
+
+  // Parse all options.
+  return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview,
                                                Errs);
 }
 
diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp
index c279d10f6c61..95261d4aad23 100644
--- a/contrib/llvm/lib/Support/Compression.cpp
+++ b/contrib/llvm/lib/Support/Compression.cpp
@@ -29,16 +29,6 @@ static Error createError(StringRef Err) {
   return make_error<StringError>(Err, inconvertibleErrorCode());
 }
 
-static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
-  switch (Level) {
-    case zlib::NoCompression: return 0;
-    case zlib::BestSpeedCompression: return 1;
-    case zlib::DefaultCompression: return Z_DEFAULT_COMPRESSION;
-    case zlib::BestSizeCompression: return 9;
-  }
-  llvm_unreachable("Invalid zlib::CompressionLevel!");
-}
-
 static StringRef convertZlibCodeToString(int Code) {
   switch (Code) {
   case Z_MEM_ERROR:
@@ -58,18 +48,16 @@ static StringRef convertZlibCodeToString(int Code) {
 bool zlib::isAvailable() { return true; }
 
 Error zlib::compress(StringRef InputBuffer,
-                     SmallVectorImpl<char> &CompressedBuffer,
-                     CompressionLevel Level) {
+                     SmallVectorImpl<char> &CompressedBuffer, int Level) {
   unsigned long CompressedSize = ::compressBound(InputBuffer.size());
-  CompressedBuffer.resize(CompressedSize);
-  int CLevel = encodeZlibCompressionLevel(Level);
-  int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
-                        (const Bytef *)InputBuffer.data(), InputBuffer.size(),
-                        CLevel);
+  CompressedBuffer.reserve(CompressedSize);
+  int Res =
+      ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
+                  (const Bytef *)InputBuffer.data(), InputBuffer.size(), Level);
   // Tell MemorySanitizer that zlib output buffer is fully initialized.
   // This avoids a false report when running LLVM with uninstrumented ZLib.
   __msan_unpoison(CompressedBuffer.data(), CompressedSize);
-  CompressedBuffer.resize(CompressedSize);
+  CompressedBuffer.set_size(CompressedSize);
   return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
 }
 
@@ -101,8 +89,7 @@ uint32_t zlib::crc32(StringRef Buffer) {
 #else
 bool zlib::isAvailable() { return false; }
 Error zlib::compress(StringRef InputBuffer,
-                     SmallVectorImpl<char> &CompressedBuffer,
-                     CompressionLevel Level) {
+                     SmallVectorImpl<char> &CompressedBuffer, int Level) {
   llvm_unreachable("zlib::compress is unavailable");
 }
 Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
@@ -118,4 +105,3 @@ uint32_t zlib::crc32(StringRef Buffer) {
   llvm_unreachable("zlib::crc32 is unavailable");
 }
 #endif
-
diff --git a/contrib/llvm/lib/Support/DebugCounter.cpp b/contrib/llvm/lib/Support/DebugCounter.cpp
index 9c12de0776ad..6598103658da 100644
--- a/contrib/llvm/lib/Support/DebugCounter.cpp
+++ b/contrib/llvm/lib/Support/DebugCounter.cpp
@@ -49,8 +49,18 @@ static DebugCounterList DebugCounterOption(
     cl::desc("Comma separated list of debug counter skip and count"),
     cl::CommaSeparated, cl::ZeroOrMore, cl::location(DebugCounter::instance()));
 
+static cl::opt<bool> PrintDebugCounter(
+    "print-debug-counter", cl::Hidden, cl::init(false), cl::Optional,
+    cl::desc("Print out debug counter info after all counters accumulated"));
+
 static ManagedStatic<DebugCounter> DC;
 
+// Print information when destroyed, iff command line option is specified.
+DebugCounter::~DebugCounter() {
+  if (isCountingEnabled() && PrintDebugCounter)
+    print(dbgs());
+}
+
 DebugCounter &DebugCounter::instance() { return *DC; }
 
 // This is called by the command line parser when it sees a value for the
@@ -83,8 +93,10 @@ void DebugCounter::push_back(const std::string &Val) {
       return;
     }
     enableAllCounters();
-    Counters[CounterID].Skip = CounterVal;
-    Counters[CounterID].IsSet = true;
+
+    CounterInfo &Counter = Counters[CounterID];
+    Counter.Skip = CounterVal;
+    Counter.IsSet = true;
   } else if (CounterPair.first.endswith("-count")) {
     auto CounterName = CounterPair.first.drop_back(6);
     unsigned CounterID = getCounterId(CounterName);
@@ -94,8 +106,10 @@ void DebugCounter::push_back(const std::string &Val) {
       return;
     }
     enableAllCounters();
-    Counters[CounterID].StopAfter = CounterVal;
-    Counters[CounterID].IsSet = true;
+
+    CounterInfo &Counter = Counters[CounterID];
+    Counter.StopAfter = CounterVal;
+    Counter.IsSet = true;
   } else {
     errs() << "DebugCounter Error: " << CounterPair.first
            << " does not end with -skip or -count\n";
@@ -103,11 +117,18 @@ void DebugCounter::push_back(const std::string &Val) {
 }
 
 void DebugCounter::print(raw_ostream &OS) const {
+  SmallVector<StringRef, 16> CounterNames(RegisteredCounters.begin(),
+                                          RegisteredCounters.end());
+  sort(CounterNames.begin(), CounterNames.end());
+
+  auto &Us = instance();
   OS << "Counters and values:\n";
-  for (const auto &KV : Counters)
-    OS << left_justify(RegisteredCounters[KV.first], 32) << ": {"
-       << KV.second.Count << "," << KV.second.Skip << ","
-       << KV.second.StopAfter << "}\n";
+  for (auto &CounterName : CounterNames) {
+    unsigned CounterID = getCounterId(CounterName);
+    OS << left_justify(RegisteredCounters[CounterID], 32) << ": {"
+       << Us.Counters[CounterID].Count << "," << Us.Counters[CounterID].Skip
+       << "," << Us.Counters[CounterID].StopAfter << "}\n";
+  }
 }
 
 LLVM_DUMP_METHOD void DebugCounter::dump() const {
diff --git a/contrib/llvm/lib/Support/Error.cpp b/contrib/llvm/lib/Support/Error.cpp
index 83345bf6edb9..30bfc3e6d2fb 100644
--- a/contrib/llvm/lib/Support/Error.cpp
+++ b/contrib/llvm/lib/Support/Error.cpp
@@ -19,6 +19,7 @@ namespace {
 
   enum class ErrorErrorCode : int {
     MultipleErrors = 1,
+    FileError,
     InconvertibleError
   };
 
@@ -37,6 +38,8 @@ namespace {
         return "Inconvertible error value. An error has occurred that could "
                "not be converted to a known std::error_code. Please file a "
                "bug.";
+      case ErrorErrorCode::FileError:
+          return "A file error occurred.";
       }
       llvm_unreachable("Unhandled error code");
     }
@@ -51,8 +54,10 @@ namespace llvm {
 void ErrorInfoBase::anchor() {}
 char ErrorInfoBase::ID = 0;
 char ErrorList::ID = 0;
+void ECError::anchor() {}
 char ECError::ID = 0;
 char StringError::ID = 0;
+char FileError::ID = 0;
 
 void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) {
   if (!E)
@@ -75,6 +80,11 @@ std::error_code inconvertibleErrorCode() {
                          *ErrorErrorCat);
 }
 
+std::error_code FileError::convertToErrorCode() const {
+  return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
+                         *ErrorErrorCat);
+}
+
 Error errorCodeToError(std::error_code EC) {
   if (!EC)
     return Error::success();
@@ -103,10 +113,21 @@ void Error::fatalUncheckedError() const {
 }
 #endif
 
-StringError::StringError(const Twine &S, std::error_code EC)
+StringError::StringError(std::error_code EC, const Twine &S)
     : Msg(S.str()), EC(EC) {}
 
-void StringError::log(raw_ostream &OS) const { OS << Msg; }
+StringError::StringError(const Twine &S, std::error_code EC)
+    : Msg(S.str()), EC(EC), PrintMsgOnly(true) {}
+
+void StringError::log(raw_ostream &OS) const {
+  if (PrintMsgOnly) {
+    OS << Msg;
+  } else {
+    OS << EC.message();
+    if (!Msg.empty())
+      OS << (" " + Msg);
+  }
+}
 
 std::error_code StringError::convertToErrorCode() const {
   return EC;
@@ -121,11 +142,31 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
   std::string ErrMsg;
   {
     raw_string_ostream ErrStream(ErrMsg);
-    logAllUnhandledErrors(std::move(Err), ErrStream, "");
+    logAllUnhandledErrors(std::move(Err), ErrStream);
   }
   report_fatal_error(ErrMsg);
 }
 
+} // end namespace llvm
+
+LLVMErrorTypeId LLVMGetErrorTypeId(LLVMErrorRef Err) {
+  return reinterpret_cast<ErrorInfoBase *>(Err)->dynamicClassID();
+}
+
+void LLVMConsumeError(LLVMErrorRef Err) { consumeError(unwrap(Err)); }
+
+char *LLVMGetErrorMessage(LLVMErrorRef Err) {
+  std::string Tmp = toString(unwrap(Err));
+  char *ErrMsg = new char[Tmp.size() + 1];
+  memcpy(ErrMsg, Tmp.data(), Tmp.size());
+  ErrMsg[Tmp.size()] = '\0';
+  return ErrMsg;
+}
+
+void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; }
+
+LLVMErrorTypeId LLVMGetStringErrorTypeId() {
+  return reinterpret_cast<void *>(&StringError::ID);
 }
 
 #ifndef _MSC_VER
diff --git a/contrib/llvm/lib/Support/FileCheck.cpp b/contrib/llvm/lib/Support/FileCheck.cpp
new file mode 100644
index 000000000000..37986c96c081
--- /dev/null
+++ b/contrib/llvm/lib/Support/FileCheck.cpp
@@ -0,0 +1,1446 @@
+//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FileCheck does a line-by line check of a file that validates whether it
+// contains the expected content.  This is useful for regression tests etc.
+//
+// This file implements most of the API that will be used by the FileCheck utility
+// as well as various unittests.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileCheck.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <cstdint>
+#include <list>
+#include <map>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+
+/// Parses the given string into the Pattern.
+///
+/// \p Prefix provides which prefix is being matched, \p SM provides the
+/// SourceMgr used for error reports, and \p LineNumber is the line number in
+/// the input file from which the pattern string was read. Returns true in
+/// case of an error, false otherwise.
+bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
+                           SourceMgr &SM, unsigned LineNumber,
+                           const FileCheckRequest &Req) {
+  bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
+
+  this->LineNumber = LineNumber;
+  PatternLoc = SMLoc::getFromPointer(PatternStr.data());
+
+  if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
+    // Ignore trailing whitespace.
+    while (!PatternStr.empty() &&
+           (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
+      PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
+
+  // Check that there is something on the line.
+  if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
+    SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
+                    "found empty check string with prefix '" + Prefix + ":'");
+    return true;
+  }
+
+  if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
+    SM.PrintMessage(
+        PatternLoc, SourceMgr::DK_Error,
+        "found non-empty check string for empty check with prefix '" + Prefix +
+            ":'");
+    return true;
+  }
+
+  if (CheckTy == Check::CheckEmpty) {
+    RegExStr = "(\n$)";
+    return false;
+  }
+
+  // Check to see if this is a fixed string, or if it has regex pieces.
+  if (!MatchFullLinesHere &&
+      (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
+                                 PatternStr.find("[[") == StringRef::npos))) {
+    FixedStr = PatternStr;
+    return false;
+  }
+
+  if (MatchFullLinesHere) {
+    RegExStr += '^';
+    if (!Req.NoCanonicalizeWhiteSpace)
+      RegExStr += " *";
+  }
+
+  // Paren value #0 is for the fully matched string.  Any new parenthesized
+  // values add from there.
+  unsigned CurParen = 1;
+
+  // Otherwise, there is at least one regex piece.  Build up the regex pattern
+  // by escaping scary characters in fixed strings, building up one big regex.
+  while (!PatternStr.empty()) {
+    // RegEx matches.
+    if (PatternStr.startswith("{{")) {
+      // This is the start of a regex match.  Scan for the }}.
+      size_t End = PatternStr.find("}}");
+      if (End == StringRef::npos) {
+        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                        SourceMgr::DK_Error,
+                        "found start of regex string with no end '}}'");
+        return true;
+      }
+
+      // Enclose {{}} patterns in parens just like [[]] even though we're not
+      // capturing the result for any purpose.  This is required in case the
+      // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
+      // want this to turn into: "abc(x|z)def" not "abcx|zdef".
+      RegExStr += '(';
+      ++CurParen;
+
+      if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
+        return true;
+      RegExStr += ')';
+
+      PatternStr = PatternStr.substr(End + 2);
+      continue;
+    }
+
+    // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
+    // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
+    // second form is [[foo]] which is a reference to foo.  The variable name
+    // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
+    // it.  This is to catch some common errors.
+    if (PatternStr.startswith("[[")) {
+      // Find the closing bracket pair ending the match.  End is going to be an
+      // offset relative to the beginning of the match string.
+      size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
+
+      if (End == StringRef::npos) {
+        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                        SourceMgr::DK_Error,
+                        "invalid named regex reference, no ]] found");
+        return true;
+      }
+
+      StringRef MatchStr = PatternStr.substr(2, End);
+      PatternStr = PatternStr.substr(End + 4);
+
+      // Get the regex name (e.g. "foo").
+      size_t NameEnd = MatchStr.find(':');
+      StringRef Name = MatchStr.substr(0, NameEnd);
+
+      if (Name.empty()) {
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+                        "invalid name in named regex: empty name");
+        return true;
+      }
+
+      // Verify that the name/expression is well formed. FileCheck currently
+      // supports @LINE, @LINE+number, @LINE-number expressions. The check here
+      // is relaxed, more strict check is performed in \c EvaluateExpression.
+      bool IsExpression = false;
+      for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+        if (i == 0) {
+          if (Name[i] == '$')  // Global vars start with '$'
+            continue;
+          if (Name[i] == '@') {
+            if (NameEnd != StringRef::npos) {
+              SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                              SourceMgr::DK_Error,
+                              "invalid name in named regex definition");
+              return true;
+            }
+            IsExpression = true;
+            continue;
+          }
+        }
+        if (Name[i] != '_' && !isalnum(Name[i]) &&
+            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
+          SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
+                          SourceMgr::DK_Error, "invalid name in named regex");
+          return true;
+        }
+      }
+
+      // Name can't start with a digit.
+      if (isdigit(static_cast<unsigned char>(Name[0]))) {
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+                        "invalid name in named regex");
+        return true;
+      }
+
+      // Handle [[foo]].
+      if (NameEnd == StringRef::npos) {
+        // Handle variables that were defined earlier on the same line by
+        // emitting a backreference.
+        if (VariableDefs.find(Name) != VariableDefs.end()) {
+          unsigned VarParenNum = VariableDefs[Name];
+          if (VarParenNum < 1 || VarParenNum > 9) {
+            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                            SourceMgr::DK_Error,
+                            "Can't back-reference more than 9 variables");
+            return true;
+          }
+          AddBackrefToRegEx(VarParenNum);
+        } else {
+          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        }
+        continue;
+      }
+
+      // Handle [[foo:.*]].
+      VariableDefs[Name] = CurParen;
+      RegExStr += '(';
+      ++CurParen;
+
+      if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
+        return true;
+
+      RegExStr += ')';
+    }
+
+    // Handle fixed string matches.
+    // Find the end, which is the start of the next regex.
+    size_t FixedMatchEnd = PatternStr.find("{{");
+    FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
+    RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
+    PatternStr = PatternStr.substr(FixedMatchEnd);
+  }
+
+  if (MatchFullLinesHere) {
+    if (!Req.NoCanonicalizeWhiteSpace)
+      RegExStr += " *";
+    RegExStr += '$';
+  }
+
+  return false;
+}
+
+bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
+  Regex R(RS);
+  std::string Error;
+  if (!R.isValid(Error)) {
+    SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
+                    "invalid regex: " + Error);
+    return true;
+  }
+
+  RegExStr += RS.str();
+  CurParen += R.getNumMatches();
+  return false;
+}
+
+void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
+  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
+  std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
+  RegExStr += Backref;
+}
+
+/// Evaluates expression and stores the result to \p Value.
+///
+/// Returns true on success and false when the expression has invalid syntax.
+bool FileCheckPattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
+  // The only supported expression is @LINE([\+-]\d+)?
+  if (!Expr.startswith("@LINE"))
+    return false;
+  Expr = Expr.substr(StringRef("@LINE").size());
+  int Offset = 0;
+  if (!Expr.empty()) {
+    if (Expr[0] == '+')
+      Expr = Expr.substr(1);
+    else if (Expr[0] != '-')
+      return false;
+    if (Expr.getAsInteger(10, Offset))
+      return false;
+  }
+  Value = llvm::itostr(LineNumber + Offset);
+  return true;
+}
+
+/// Matches the pattern string against the input buffer \p Buffer
+///
+/// This returns the position that is matched or npos if there is no match. If
+/// there is a match, the size of the matched string is returned in \p
+/// MatchLen.
+///
+/// The \p VariableTable StringMap provides the current values of filecheck
+/// variables and is updated if this match defines new values.
+size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
+                      StringMap<StringRef> &VariableTable) const {
+  // If this is the EOF pattern, match it immediately.
+  if (CheckTy == Check::CheckEOF) {
+    MatchLen = 0;
+    return Buffer.size();
+  }
+
+  // If this is a fixed string pattern, just match it now.
+  if (!FixedStr.empty()) {
+    MatchLen = FixedStr.size();
+    return Buffer.find(FixedStr);
+  }
+
+  // Regex match.
+
+  // If there are variable uses, we need to create a temporary string with the
+  // actual value.
+  StringRef RegExToMatch = RegExStr;
+  std::string TmpStr;
+  if (!VariableUses.empty()) {
+    TmpStr = RegExStr;
+
+    unsigned InsertOffset = 0;
+    for (const auto &VariableUse : VariableUses) {
+      std::string Value;
+
+      if (VariableUse.first[0] == '@') {
+        if (!EvaluateExpression(VariableUse.first, Value))
+          return StringRef::npos;
+      } else {
+        StringMap<StringRef>::iterator it =
+            VariableTable.find(VariableUse.first);
+        // If the variable is undefined, return an error.
+        if (it == VariableTable.end())
+          return StringRef::npos;
+
+        // Look up the value and escape it so that we can put it into the regex.
+        Value += Regex::escape(it->second);
+      }
+
+      // Plop it into the regex at the adjusted offset.
+      TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
+                    Value.begin(), Value.end());
+      InsertOffset += Value.size();
+    }
+
+    // Match the newly constructed regex.
+    RegExToMatch = TmpStr;
+  }
+
+  SmallVector<StringRef, 4> MatchInfo;
+  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
+    return StringRef::npos;
+
+  // Successful regex match.
+  assert(!MatchInfo.empty() && "Didn't get any match");
+  StringRef FullMatch = MatchInfo[0];
+
+  // If this defines any variables, remember their values.
+  for (const auto &VariableDef : VariableDefs) {
+    assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
+    VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
+  }
+
+  // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
+  // the required preceding newline, which is consumed by the pattern in the
+  // case of CHECK-EMPTY but not CHECK-NEXT.
+  size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
+  MatchLen = FullMatch.size() - MatchStartSkip;
+  return FullMatch.data() - Buffer.data() + MatchStartSkip;
+}
+
+
+/// Computes an arbitrary estimate for the quality of matching this pattern at
+/// the start of \p Buffer; a distance of zero should correspond to a perfect
+/// match.
+unsigned
+FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
+                              const StringMap<StringRef> &VariableTable) const {
+  // Just compute the number of matching characters. For regular expressions, we
+  // just compare against the regex itself and hope for the best.
+  //
+  // FIXME: One easy improvement here is have the regex lib generate a single
+  // example regular expression which matches, and use that as the example
+  // string.
+  StringRef ExampleString(FixedStr);
+  if (ExampleString.empty())
+    ExampleString = RegExStr;
+
+  // Only compare up to the first line in the buffer, or the string size.
+  StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
+  BufferPrefix = BufferPrefix.split('\n').first;
+  return BufferPrefix.edit_distance(ExampleString);
+}
+
+void FileCheckPattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
+                                const StringMap<StringRef> &VariableTable,
+                                SMRange MatchRange) const {
+  // If this was a regular expression using variables, print the current
+  // variable values.
+  if (!VariableUses.empty()) {
+    for (const auto &VariableUse : VariableUses) {
+      SmallString<256> Msg;
+      raw_svector_ostream OS(Msg);
+      StringRef Var = VariableUse.first;
+      if (Var[0] == '@') {
+        std::string Value;
+        if (EvaluateExpression(Var, Value)) {
+          OS << "with expression \"";
+          OS.write_escaped(Var) << "\" equal to \"";
+          OS.write_escaped(Value) << "\"";
+        } else {
+          OS << "uses incorrect expression \"";
+          OS.write_escaped(Var) << "\"";
+        }
+      } else {
+        StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
+
+        // Check for undefined variable references.
+        if (it == VariableTable.end()) {
+          OS << "uses undefined variable \"";
+          OS.write_escaped(Var) << "\"";
+        } else {
+          OS << "with variable \"";
+          OS.write_escaped(Var) << "\" equal to \"";
+          OS.write_escaped(it->second) << "\"";
+        }
+      }
+
+      if (MatchRange.isValid())
+        SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
+                        {MatchRange});
+      else
+        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+                        SourceMgr::DK_Note, OS.str());
+    }
+  }
+}
+
+static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
+                                  const SourceMgr &SM, SMLoc Loc,
+                                  Check::FileCheckType CheckTy,
+                                  StringRef Buffer, size_t Pos, size_t Len,
+                                  std::vector<FileCheckDiag> *Diags,
+                                  bool AdjustPrevDiag = false) {
+  SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
+  SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
+  SMRange Range(Start, End);
+  if (Diags) {
+    if (AdjustPrevDiag)
+      Diags->rbegin()->MatchTy = MatchTy;
+    else
+      Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
+  }
+  return Range;
+}
+
+void FileCheckPattern::PrintFuzzyMatch(
+    const SourceMgr &SM, StringRef Buffer,
+    const StringMap<StringRef> &VariableTable,
+    std::vector<FileCheckDiag> *Diags) const {
+  // Attempt to find the closest/best fuzzy match.  Usually an error happens
+  // because some string in the output didn't exactly match. In these cases, we
+  // would like to show the user a best guess at what "should have" matched, to
+  // save them having to actually check the input manually.
+  size_t NumLinesForward = 0;
+  size_t Best = StringRef::npos;
+  double BestQuality = 0;
+
+  // Use an arbitrary 4k limit on how far we will search.
+  for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
+    if (Buffer[i] == '\n')
+      ++NumLinesForward;
+
+    // Patterns have leading whitespace stripped, so skip whitespace when
+    // looking for something which looks like a pattern.
+    if (Buffer[i] == ' ' || Buffer[i] == '\t')
+      continue;
+
+    // Compute the "quality" of this match as an arbitrary combination of the
+    // match distance and the number of lines skipped to get to this match.
+    unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
+    double Quality = Distance + (NumLinesForward / 100.);
+
+    if (Quality < BestQuality || Best == StringRef::npos) {
+      Best = i;
+      BestQuality = Quality;
+    }
+  }
+
+  // Print the "possible intended match here" line if we found something
+  // reasonable and not equal to what we showed in the "scanning from here"
+  // line.
+  if (Best && Best != StringRef::npos && BestQuality < 50) {
+    SMRange MatchRange =
+        ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
+                           getCheckTy(), Buffer, Best, 0, Diags);
+    SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
+                    "possible intended match here");
+
+    // FIXME: If we wanted to be really friendly we would show why the match
+    // failed, as it can be hard to spot simple one character differences.
+  }
+}
+
+/// Finds the closing sequence of a regex variable usage or definition.
+///
+/// \p Str has to point in the beginning of the definition (right after the
+/// opening sequence). Returns the offset of the closing sequence within Str,
+/// or npos if it was not found.
+size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
+  // Offset keeps track of the current offset within the input Str
+  size_t Offset = 0;
+  // [...] Nesting depth
+  size_t BracketDepth = 0;
+
+  while (!Str.empty()) {
+    if (Str.startswith("]]") && BracketDepth == 0)
+      return Offset;
+    if (Str[0] == '\\') {
+      // Backslash escapes the next char within regexes, so skip them both.
+      Str = Str.substr(2);
+      Offset += 2;
+    } else {
+      switch (Str[0]) {
+      default:
+        break;
+      case '[':
+        BracketDepth++;
+        break;
+      case ']':
+        if (BracketDepth == 0) {
+          SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
+                          SourceMgr::DK_Error,
+                          "missing closing \"]\" for regex variable");
+          exit(1);
+        }
+        BracketDepth--;
+        break;
+      }
+      Str = Str.substr(1);
+      Offset++;
+    }
+  }
+
+  return StringRef::npos;
+}
+
+/// Canonicalize whitespaces in the file. Line endings are replaced with
+/// UNIX-style '\n'.
+StringRef
+llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
+                                  SmallVectorImpl<char> &OutputBuffer) {
+  OutputBuffer.reserve(MB.getBufferSize());
+
+  for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
+       Ptr != End; ++Ptr) {
+    // Eliminate trailing dosish \r.
+    if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
+      continue;
+    }
+
+    // If current char is not a horizontal whitespace or if horizontal
+    // whitespace canonicalization is disabled, dump it to output as is.
+    if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
+      OutputBuffer.push_back(*Ptr);
+      continue;
+    }
+
+    // Otherwise, add one space and advance over neighboring space.
+    OutputBuffer.push_back(' ');
+    while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
+      ++Ptr;
+  }
+
+  // Add a null byte and then return all but that byte.
+  OutputBuffer.push_back('\0');
+  return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
+}
+
+FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
+                             const Check::FileCheckType &CheckTy,
+                             SMLoc CheckLoc, MatchType MatchTy,
+                             SMRange InputRange)
+    : CheckTy(CheckTy), MatchTy(MatchTy) {
+  auto Start = SM.getLineAndColumn(InputRange.Start);
+  auto End = SM.getLineAndColumn(InputRange.End);
+  InputStartLine = Start.first;
+  InputStartCol = Start.second;
+  InputEndLine = End.first;
+  InputEndCol = End.second;
+  Start = SM.getLineAndColumn(CheckLoc);
+  CheckLine = Start.first;
+  CheckCol = Start.second;
+}
+
+static bool IsPartOfWord(char c) {
+  return (isalnum(c) || c == '-' || c == '_');
+}
+
+Check::FileCheckType &Check::FileCheckType::setCount(int C) {
+  assert(Count > 0 && "zero and negative counts are not supported");
+  assert((C == 1 || Kind == CheckPlain) &&
+         "count supported only for plain CHECK directives");
+  Count = C;
+  return *this;
+}
+
+// Get a description of the type.
+std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
+  switch (Kind) {
+  case Check::CheckNone:
+    return "invalid";
+  case Check::CheckPlain:
+    if (Count > 1)
+      return Prefix.str() + "-COUNT";
+    return Prefix;
+  case Check::CheckNext:
+    return Prefix.str() + "-NEXT";
+  case Check::CheckSame:
+    return Prefix.str() + "-SAME";
+  case Check::CheckNot:
+    return Prefix.str() + "-NOT";
+  case Check::CheckDAG:
+    return Prefix.str() + "-DAG";
+  case Check::CheckLabel:
+    return Prefix.str() + "-LABEL";
+  case Check::CheckEmpty:
+    return Prefix.str() + "-EMPTY";
+  case Check::CheckEOF:
+    return "implicit EOF";
+  case Check::CheckBadNot:
+    return "bad NOT";
+  case Check::CheckBadCount:
+    return "bad COUNT";
+  }
+  llvm_unreachable("unknown FileCheckType");
+}
+
+static std::pair<Check::FileCheckType, StringRef>
+FindCheckType(StringRef Buffer, StringRef Prefix) {
+  if (Buffer.size() <= Prefix.size())
+    return {Check::CheckNone, StringRef()};
+
+  char NextChar = Buffer[Prefix.size()];
+
+  StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
+  // Verify that the : is present after the prefix.
+  if (NextChar == ':')
+    return {Check::CheckPlain, Rest};
+
+  if (NextChar != '-')
+    return {Check::CheckNone, StringRef()};
+
+  if (Rest.consume_front("COUNT-")) {
+    int64_t Count;
+    if (Rest.consumeInteger(10, Count))
+      // Error happened in parsing integer.
+      return {Check::CheckBadCount, Rest};
+    if (Count <= 0 || Count > INT32_MAX)
+      return {Check::CheckBadCount, Rest};
+    if (!Rest.consume_front(":"))
+      return {Check::CheckBadCount, Rest};
+    return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
+  }
+
+  if (Rest.consume_front("NEXT:"))
+    return {Check::CheckNext, Rest};
+
+  if (Rest.consume_front("SAME:"))
+    return {Check::CheckSame, Rest};
+
+  if (Rest.consume_front("NOT:"))
+    return {Check::CheckNot, Rest};
+
+  if (Rest.consume_front("DAG:"))
+    return {Check::CheckDAG, Rest};
+
+  if (Rest.consume_front("LABEL:"))
+    return {Check::CheckLabel, Rest};
+
+  if (Rest.consume_front("EMPTY:"))
+    return {Check::CheckEmpty, Rest};
+
+  // You can't combine -NOT with another suffix.
+  if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
+      Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
+      Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
+      Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
+    return {Check::CheckBadNot, Rest};
+
+  return {Check::CheckNone, Rest};
+}
+
+// From the given position, find the next character after the word.
+static size_t SkipWord(StringRef Str, size_t Loc) {
+  while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
+    ++Loc;
+  return Loc;
+}
+
+/// Search the buffer for the first prefix in the prefix regular expression.
+///
+/// This searches the buffer using the provided regular expression, however it
+/// enforces constraints beyond that:
+/// 1) The found prefix must not be a suffix of something that looks like
+///    a valid prefix.
+/// 2) The found prefix must be followed by a valid check type suffix using \c
+///    FindCheckType above.
+///
+/// Returns a pair of StringRefs into the Buffer, which combines:
+///   - the first match of the regular expression to satisfy these two is
+///   returned,
+///     otherwise an empty StringRef is returned to indicate failure.
+///   - buffer rewound to the location right after parsed suffix, for parsing
+///     to continue from
+///
+/// If this routine returns a valid prefix, it will also shrink \p Buffer to
+/// start at the beginning of the returned prefix, increment \p LineNumber for
+/// each new line consumed from \p Buffer, and set \p CheckTy to the type of
+/// check found by examining the suffix.
+///
+/// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
+/// is unspecified.
+static std::pair<StringRef, StringRef>
+FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
+                        unsigned &LineNumber, Check::FileCheckType &CheckTy) {
+  SmallVector<StringRef, 2> Matches;
+
+  while (!Buffer.empty()) {
+    // Find the first (longest) match using the RE.
+    if (!PrefixRE.match(Buffer, &Matches))
+      // No match at all, bail.
+      return {StringRef(), StringRef()};
+
+    StringRef Prefix = Matches[0];
+    Matches.clear();
+
+    assert(Prefix.data() >= Buffer.data() &&
+           Prefix.data() < Buffer.data() + Buffer.size() &&
+           "Prefix doesn't start inside of buffer!");
+    size_t Loc = Prefix.data() - Buffer.data();
+    StringRef Skipped = Buffer.substr(0, Loc);
+    Buffer = Buffer.drop_front(Loc);
+    LineNumber += Skipped.count('\n');
+
+    // Check that the matched prefix isn't a suffix of some other check-like
+    // word.
+    // FIXME: This is a very ad-hoc check. it would be better handled in some
+    // other way. Among other things it seems hard to distinguish between
+    // intentional and unintentional uses of this feature.
+    if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
+      // Now extract the type.
+      StringRef AfterSuffix;
+      std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix);
+
+      // If we've found a valid check type for this prefix, we're done.
+      if (CheckTy != Check::CheckNone)
+        return {Prefix, AfterSuffix};
+    }
+
+    // If we didn't successfully find a prefix, we need to skip this invalid
+    // prefix and continue scanning. We directly skip the prefix that was
+    // matched and any additional parts of that check-like word.
+    Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
+  }
+
+  // We ran out of buffer while skipping partial matches so give up.
+  return {StringRef(), StringRef()};
+}
+
+/// Read the check file, which specifies the sequence of expected strings.
+///
+/// The strings are added to the CheckStrings vector. Returns true in case of
+/// an error, false otherwise.
+bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
+                                    Regex &PrefixRE,
+                                    std::vector<FileCheckString> &CheckStrings) {
+  std::vector<FileCheckPattern> ImplicitNegativeChecks;
+  for (const auto &PatternString : Req.ImplicitCheckNot) {
+    // Create a buffer with fake command line content in order to display the
+    // command line option responsible for the specific implicit CHECK-NOT.
+    std::string Prefix = "-implicit-check-not='";
+    std::string Suffix = "'";
+    std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
+        Prefix + PatternString + Suffix, "command line");
+
+    StringRef PatternInBuffer =
+        CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
+    SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
+
+    ImplicitNegativeChecks.push_back(FileCheckPattern(Check::CheckNot));
+    ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
+                                               "IMPLICIT-CHECK", SM, 0, Req);
+  }
+
+  std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
+
+  // LineNumber keeps track of the line on which CheckPrefix instances are
+  // found.
+  unsigned LineNumber = 1;
+
+  while (1) {
+    Check::FileCheckType CheckTy;
+
+    // See if a prefix occurs in the memory buffer.
+    StringRef UsedPrefix;
+    StringRef AfterSuffix;
+    std::tie(UsedPrefix, AfterSuffix) =
+        FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
+    if (UsedPrefix.empty())
+      break;
+    assert(UsedPrefix.data() == Buffer.data() &&
+           "Failed to move Buffer's start forward, or pointed prefix outside "
+           "of the buffer!");
+    assert(AfterSuffix.data() >= Buffer.data() &&
+           AfterSuffix.data() < Buffer.data() + Buffer.size() &&
+           "Parsing after suffix doesn't start inside of buffer!");
+
+    // Location to use for error messages.
+    const char *UsedPrefixStart = UsedPrefix.data();
+
+    // Skip the buffer to the end of parsed suffix (or just prefix, if no good
+    // suffix was processed).
+    Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
+                                 : AfterSuffix;
+
+    // Complain about useful-looking but unsupported suffixes.
+    if (CheckTy == Check::CheckBadNot) {
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
+                      "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
+      return true;
+    }
+
+    // Complain about invalid count specification.
+    if (CheckTy == Check::CheckBadCount) {
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
+                      "invalid count in -COUNT specification on prefix '" +
+                          UsedPrefix + "'");
+      return true;
+    }
+
+    // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
+    // leading whitespace.
+    if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
+      Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
+
+    // Scan ahead to the end of line.
+    size_t EOL = Buffer.find_first_of("\n\r");
+
+    // Remember the location of the start of the pattern, for diagnostics.
+    SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
+
+    // Parse the pattern.
+    FileCheckPattern P(CheckTy);
+    if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
+      return true;
+
+    // Verify that CHECK-LABEL lines do not define or use variables
+    if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
+      SM.PrintMessage(
+          SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
+          "found '" + UsedPrefix + "-LABEL:'"
+                                   " with variable definition or use");
+      return true;
+    }
+
+    Buffer = Buffer.substr(EOL);
+
+    // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
+    if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
+         CheckTy == Check::CheckEmpty) &&
+        CheckStrings.empty()) {
+      StringRef Type = CheckTy == Check::CheckNext
+                           ? "NEXT"
+                           : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
+      SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
+                      SourceMgr::DK_Error,
+                      "found '" + UsedPrefix + "-" + Type +
+                          "' without previous '" + UsedPrefix + ": line");
+      return true;
+    }
+
+    // Handle CHECK-DAG/-NOT.
+    if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
+      DagNotMatches.push_back(P);
+      continue;
+    }
+
+    // Okay, add the string we captured to the output vector and move on.
+    CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
+    std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
+    DagNotMatches = ImplicitNegativeChecks;
+  }
+
+  // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
+  // prefix as a filler for the error message.
+  if (!DagNotMatches.empty()) {
+    CheckStrings.emplace_back(FileCheckPattern(Check::CheckEOF), *Req.CheckPrefixes.begin(),
+                              SMLoc::getFromPointer(Buffer.data()));
+    std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
+  }
+
+  if (CheckStrings.empty()) {
+    errs() << "error: no check strings found with prefix"
+           << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
+    auto I = Req.CheckPrefixes.begin();
+    auto E = Req.CheckPrefixes.end();
+    if (I != E) {
+      errs() << "\'" << *I << ":'";
+      ++I;
+    }
+    for (; I != E; ++I)
+      errs() << ", \'" << *I << ":'";
+
+    errs() << '\n';
+    return true;
+  }
+
+  return false;
+}
+
+static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
+                       StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
+                       int MatchedCount, StringRef Buffer,
+                       StringMap<StringRef> &VariableTable, size_t MatchPos,
+                       size_t MatchLen, const FileCheckRequest &Req,
+                       std::vector<FileCheckDiag> *Diags) {
+  if (ExpectedMatch) {
+    if (!Req.Verbose)
+      return;
+    if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
+      return;
+  }
+  SMRange MatchRange = ProcessMatchResult(
+      ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
+                    : FileCheckDiag::MatchFoundButExcluded,
+      SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
+  std::string Message = formatv("{0}: {1} string found in input",
+                                Pat.getCheckTy().getDescription(Prefix),
+                                (ExpectedMatch ? "expected" : "excluded"))
+                            .str();
+  if (Pat.getCount() > 1)
+    Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
+
+  SM.PrintMessage(
+      Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
+  SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
+                  {MatchRange});
+  Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
+}
+
+static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
+                       const FileCheckString &CheckStr, int MatchedCount,
+                       StringRef Buffer, StringMap<StringRef> &VariableTable,
+                       size_t MatchPos, size_t MatchLen, FileCheckRequest &Req,
+                       std::vector<FileCheckDiag> *Diags) {
+  PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
+             MatchedCount, Buffer, VariableTable, MatchPos, MatchLen, Req,
+             Diags);
+}
+
+static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
+                         StringRef Prefix, SMLoc Loc,
+                         const FileCheckPattern &Pat, int MatchedCount,
+                         StringRef Buffer, StringMap<StringRef> &VariableTable,
+                         bool VerboseVerbose,
+                         std::vector<FileCheckDiag> *Diags) {
+  if (!ExpectedMatch && !VerboseVerbose)
+    return;
+
+  // Otherwise, we have an error, emit an error message.
+  std::string Message = formatv("{0}: {1} string not found in input",
+                                Pat.getCheckTy().getDescription(Prefix),
+                                (ExpectedMatch ? "expected" : "excluded"))
+                            .str();
+  if (Pat.getCount() > 1)
+    Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
+
+  SM.PrintMessage(
+      Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
+
+  // Print the "scanning from here" line.  If the current position is at the
+  // end of a line, advance to the start of the next line.
+  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+  SMRange SearchRange = ProcessMatchResult(
+      ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
+                    : FileCheckDiag::MatchNoneAndExcluded,
+      SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+  SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
+
+  // Allow the pattern to print additional information if desired.
+  Pat.PrintVariableUses(SM, Buffer, VariableTable);
+
+  if (ExpectedMatch)
+    Pat.PrintFuzzyMatch(SM, Buffer, VariableTable, Diags);
+}
+
+static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
+                         const FileCheckString &CheckStr, int MatchedCount,
+                         StringRef Buffer, StringMap<StringRef> &VariableTable,
+                         bool VerboseVerbose,
+                         std::vector<FileCheckDiag> *Diags) {
+  PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
+               MatchedCount, Buffer, VariableTable, VerboseVerbose, Diags);
+}
+
+/// Count the number of newlines in the specified range.
+static unsigned CountNumNewlinesBetween(StringRef Range,
+                                        const char *&FirstNewLine) {
+  unsigned NumNewLines = 0;
+  while (1) {
+    // Scan for newline.
+    Range = Range.substr(Range.find_first_of("\n\r"));
+    if (Range.empty())
+      return NumNewLines;
+
+    ++NumNewLines;
+
+    // Handle \n\r and \r\n as a single newline.
+    if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
+        (Range[0] != Range[1]))
+      Range = Range.substr(1);
+    Range = Range.substr(1);
+
+    if (NumNewLines == 1)
+      FirstNewLine = Range.begin();
+  }
+}
+
+/// Match check string and its "not strings" and/or "dag strings".
+size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
+                              bool IsLabelScanMode, size_t &MatchLen,
+                              StringMap<StringRef> &VariableTable,
+                              FileCheckRequest &Req,
+                              std::vector<FileCheckDiag> *Diags) const {
+  size_t LastPos = 0;
+  std::vector<const FileCheckPattern *> NotStrings;
+
+  // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
+  // bounds; we have not processed variable definitions within the bounded block
+  // yet so cannot handle any final CHECK-DAG yet; this is handled when going
+  // over the block again (including the last CHECK-LABEL) in normal mode.
+  if (!IsLabelScanMode) {
+    // Match "dag strings" (with mixed "not strings" if any).
+    LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable, Req, Diags);
+    if (LastPos == StringRef::npos)
+      return StringRef::npos;
+  }
+
+  // Match itself from the last position after matching CHECK-DAG.
+  size_t LastMatchEnd = LastPos;
+  size_t FirstMatchPos = 0;
+  // Go match the pattern Count times. Majority of patterns only match with
+  // count 1 though.
+  assert(Pat.getCount() != 0 && "pattern count can not be zero");
+  for (int i = 1; i <= Pat.getCount(); i++) {
+    StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
+    size_t CurrentMatchLen;
+    // get a match at current start point
+    size_t MatchPos = Pat.Match(MatchBuffer, CurrentMatchLen, VariableTable);
+    if (i == 1)
+      FirstMatchPos = LastPos + MatchPos;
+
+    // report
+    if (MatchPos == StringRef::npos) {
+      PrintNoMatch(true, SM, *this, i, MatchBuffer, VariableTable,
+                   Req.VerboseVerbose, Diags);
+      return StringRef::npos;
+    }
+    PrintMatch(true, SM, *this, i, MatchBuffer, VariableTable, MatchPos,
+               CurrentMatchLen, Req, Diags);
+
+    // move start point after the match
+    LastMatchEnd += MatchPos + CurrentMatchLen;
+  }
+  // Full match len counts from first match pos.
+  MatchLen = LastMatchEnd - FirstMatchPos;
+
+  // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
+  // or CHECK-NOT
+  if (!IsLabelScanMode) {
+    size_t MatchPos = FirstMatchPos - LastPos;
+    StringRef MatchBuffer = Buffer.substr(LastPos);
+    StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
+
+    // If this check is a "CHECK-NEXT", verify that the previous match was on
+    // the previous line (i.e. that there is one newline between them).
+    if (CheckNext(SM, SkippedRegion)) {
+      ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
+                         Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
+                         Diags, Req.Verbose);
+      return StringRef::npos;
+    }
+
+    // If this check is a "CHECK-SAME", verify that the previous match was on
+    // the same line (i.e. that there is no newline between them).
+    if (CheckSame(SM, SkippedRegion)) {
+      ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
+                         Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
+                         Diags, Req.Verbose);
+      return StringRef::npos;
+    }
+
+    // If this match had "not strings", verify that they don't exist in the
+    // skipped region.
+    if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+      return StringRef::npos;
+  }
+
+  return FirstMatchPos;
+}
+
+/// Verify there is a single line in the given buffer.
+bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
+  if (Pat.getCheckTy() != Check::CheckNext &&
+      Pat.getCheckTy() != Check::CheckEmpty)
+    return false;
+
+  Twine CheckName =
+      Prefix +
+      Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
+
+  // Count the number of newlines between the previous match and this one.
+  assert(Buffer.data() !=
+             SM.getMemoryBuffer(SM.FindBufferContainingLoc(
+                                    SMLoc::getFromPointer(Buffer.data())))
+                 ->getBufferStart() &&
+         "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
+
+  const char *FirstNewLine = nullptr;
+  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
+
+  if (NumNewLines == 0) {
+    SM.PrintMessage(Loc, SourceMgr::DK_Error,
+                    CheckName + ": is on the same line as previous match");
+    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
+                    "'next' match was here");
+    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+                    "previous match ended here");
+    return true;
+  }
+
+  if (NumNewLines != 1) {
+    SM.PrintMessage(Loc, SourceMgr::DK_Error,
+                    CheckName +
+                        ": is not on the line after the previous match");
+    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
+                    "'next' match was here");
+    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+                    "previous match ended here");
+    SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
+                    "non-matching line after previous match is here");
+    return true;
+  }
+
+  return false;
+}
+
+/// Verify there is no newline in the given buffer.
+bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
+  if (Pat.getCheckTy() != Check::CheckSame)
+    return false;
+
+  // Count the number of newlines between the previous match and this one.
+  assert(Buffer.data() !=
+             SM.getMemoryBuffer(SM.FindBufferContainingLoc(
+                                    SMLoc::getFromPointer(Buffer.data())))
+                 ->getBufferStart() &&
+         "CHECK-SAME can't be the first check in a file");
+
+  const char *FirstNewLine = nullptr;
+  unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
+
+  if (NumNewLines != 0) {
+    SM.PrintMessage(Loc, SourceMgr::DK_Error,
+                    Prefix +
+                        "-SAME: is not on the same line as the previous match");
+    SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
+                    "'next' match was here");
+    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+                    "previous match ended here");
+    return true;
+  }
+
+  return false;
+}
+
+/// Verify there's no "not strings" in the given buffer.
+bool FileCheckString::CheckNot(
+    const SourceMgr &SM, StringRef Buffer,
+    const std::vector<const FileCheckPattern *> &NotStrings,
+    StringMap<StringRef> &VariableTable, const FileCheckRequest &Req,
+    std::vector<FileCheckDiag> *Diags) const {
+  for (const FileCheckPattern *Pat : NotStrings) {
+    assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
+
+    size_t MatchLen = 0;
+    size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
+
+    if (Pos == StringRef::npos) {
+      PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
+                   VariableTable, Req.VerboseVerbose, Diags);
+      continue;
+    }
+
+    PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, VariableTable,
+               Pos, MatchLen, Req, Diags);
+
+    return true;
+  }
+
+  return false;
+}
+
+/// Match "dag strings" and their mixed "not strings".
+size_t
+FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
+                          std::vector<const FileCheckPattern *> &NotStrings,
+                          StringMap<StringRef> &VariableTable,
+                          const FileCheckRequest &Req,
+                          std::vector<FileCheckDiag> *Diags) const {
+  if (DagNotStrings.empty())
+    return 0;
+
+  // The start of the search range.
+  size_t StartPos = 0;
+
+  struct MatchRange {
+    size_t Pos;
+    size_t End;
+  };
+  // A sorted list of ranges for non-overlapping CHECK-DAG matches.  Match
+  // ranges are erased from this list once they are no longer in the search
+  // range.
+  std::list<MatchRange> MatchRanges;
+
+  // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
+  // group, so we don't use a range-based for loop here.
+  for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
+       PatItr != PatEnd; ++PatItr) {
+    const FileCheckPattern &Pat = *PatItr;
+    assert((Pat.getCheckTy() == Check::CheckDAG ||
+            Pat.getCheckTy() == Check::CheckNot) &&
+           "Invalid CHECK-DAG or CHECK-NOT!");
+
+    if (Pat.getCheckTy() == Check::CheckNot) {
+      NotStrings.push_back(&Pat);
+      continue;
+    }
+
+    assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
+
+    // CHECK-DAG always matches from the start.
+    size_t MatchLen = 0, MatchPos = StartPos;
+
+    // Search for a match that doesn't overlap a previous match in this
+    // CHECK-DAG group.
+    for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
+      StringRef MatchBuffer = Buffer.substr(MatchPos);
+      size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
+      // With a group of CHECK-DAGs, a single mismatching means the match on
+      // that group of CHECK-DAGs fails immediately.
+      if (MatchPosBuf == StringRef::npos) {
+        PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
+                     VariableTable, Req.VerboseVerbose, Diags);
+        return StringRef::npos;
+      }
+      // Re-calc it as the offset relative to the start of the original string.
+      MatchPos += MatchPosBuf;
+      if (Req.VerboseVerbose)
+        PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer,
+                   VariableTable, MatchPos, MatchLen, Req, Diags);
+      MatchRange M{MatchPos, MatchPos + MatchLen};
+      if (Req.AllowDeprecatedDagOverlap) {
+        // We don't need to track all matches in this mode, so we just maintain
+        // one match range that encompasses the current CHECK-DAG group's
+        // matches.
+        if (MatchRanges.empty())
+          MatchRanges.insert(MatchRanges.end(), M);
+        else {
+          auto Block = MatchRanges.begin();
+          Block->Pos = std::min(Block->Pos, M.Pos);
+          Block->End = std::max(Block->End, M.End);
+        }
+        break;
+      }
+      // Iterate previous matches until overlapping match or insertion point.
+      bool Overlap = false;
+      for (; MI != ME; ++MI) {
+        if (M.Pos < MI->End) {
+          // !Overlap => New match has no overlap and is before this old match.
+          // Overlap => New match overlaps this old match.
+          Overlap = MI->Pos < M.End;
+          break;
+        }
+      }
+      if (!Overlap) {
+        // Insert non-overlapping match into list.
+        MatchRanges.insert(MI, M);
+        break;
+      }
+      if (Req.VerboseVerbose) {
+        SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
+        SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
+        SMRange OldRange(OldStart, OldEnd);
+        SM.PrintMessage(OldStart, SourceMgr::DK_Note,
+                        "match discarded, overlaps earlier DAG match here",
+                        {OldRange});
+        if (Diags)
+          Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
+      }
+      MatchPos = MI->End;
+    }
+    if (!Req.VerboseVerbose)
+      PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, VariableTable,
+                 MatchPos, MatchLen, Req, Diags);
+
+    // Handle the end of a CHECK-DAG group.
+    if (std::next(PatItr) == PatEnd ||
+        std::next(PatItr)->getCheckTy() == Check::CheckNot) {
+      if (!NotStrings.empty()) {
+        // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
+        // CHECK-DAG, verify that there are no 'not' strings occurred in that
+        // region.
+        StringRef SkippedRegion =
+            Buffer.slice(StartPos, MatchRanges.begin()->Pos);
+        if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+          return StringRef::npos;
+        // Clear "not strings".
+        NotStrings.clear();
+      }
+      // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
+      // end of this CHECK-DAG group's match range.
+      StartPos = MatchRanges.rbegin()->End;
+      // Don't waste time checking for (impossible) overlaps before that.
+      MatchRanges.clear();
+    }
+  }
+
+  return StartPos;
+}
+
+// A check prefix must contain only alphanumeric, hyphens and underscores.
+static bool ValidateCheckPrefix(StringRef CheckPrefix) {
+  Regex Validator("^[a-zA-Z0-9_-]*$");
+  return Validator.match(CheckPrefix);
+}
+
+bool llvm::FileCheck::ValidateCheckPrefixes() {
+  StringSet<> PrefixSet;
+
+  for (StringRef Prefix : Req.CheckPrefixes) {
+    // Reject empty prefixes.
+    if (Prefix == "")
+      return false;
+
+    if (!PrefixSet.insert(Prefix).second)
+      return false;
+
+    if (!ValidateCheckPrefix(Prefix))
+      return false;
+  }
+
+  return true;
+}
+
+// Combines the check prefixes into a single regex so that we can efficiently
+// scan for any of the set.
+//
+// The semantics are that the longest-match wins which matches our regex
+// library.
+Regex llvm::FileCheck::buildCheckPrefixRegex() {
+  // I don't think there's a way to specify an initial value for cl::list,
+  // so if nothing was specified, add the default
+  if (Req.CheckPrefixes.empty())
+    Req.CheckPrefixes.push_back("CHECK");
+
+  // We already validated the contents of CheckPrefixes so just concatenate
+  // them as alternatives.
+  SmallString<32> PrefixRegexStr;
+  for (StringRef Prefix : Req.CheckPrefixes) {
+    if (Prefix != Req.CheckPrefixes.front())
+      PrefixRegexStr.push_back('|');
+
+    PrefixRegexStr.append(Prefix);
+  }
+
+  return Regex(PrefixRegexStr);
+}
+
+// Remove local variables from \p VariableTable. Global variables
+// (start with '$') are preserved.
+static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
+  SmallVector<StringRef, 16> LocalVars;
+  for (const auto &Var : VariableTable)
+    if (Var.first()[0] != '$')
+      LocalVars.push_back(Var.first());
+
+  for (const auto &Var : LocalVars)
+    VariableTable.erase(Var);
+}
+
+/// Check the input to FileCheck provided in the \p Buffer against the \p
+/// CheckStrings read from the check file.
+///
+/// Returns false if the input fails to satisfy the checks.
+bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
+                                 ArrayRef<FileCheckString> CheckStrings,
+                                 std::vector<FileCheckDiag> *Diags) {
+  bool ChecksFailed = false;
+
+  /// VariableTable - This holds all the current filecheck variables.
+  StringMap<StringRef> VariableTable;
+
+  for (const auto& Def : Req.GlobalDefines)
+    VariableTable.insert(StringRef(Def).split('='));
+
+  unsigned i = 0, j = 0, e = CheckStrings.size();
+  while (true) {
+    StringRef CheckRegion;
+    if (j == e) {
+      CheckRegion = Buffer;
+    } else {
+      const FileCheckString &CheckLabelStr = CheckStrings[j];
+      if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
+        ++j;
+        continue;
+      }
+
+      // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
+      size_t MatchLabelLen = 0;
+      size_t MatchLabelPos = CheckLabelStr.Check(
+          SM, Buffer, true, MatchLabelLen, VariableTable, Req, Diags);
+      if (MatchLabelPos == StringRef::npos)
+        // Immediately bail of CHECK-LABEL fails, nothing else we can do.
+        return false;
+
+      CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
+      Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
+      ++j;
+    }
+
+    if (Req.EnableVarScope)
+      ClearLocalVars(VariableTable);
+
+    for (; i != j; ++i) {
+      const FileCheckString &CheckStr = CheckStrings[i];
+
+      // Check each string within the scanned region, including a second check
+      // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
+      size_t MatchLen = 0;
+      size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
+                                       VariableTable, Req, Diags);
+
+      if (MatchPos == StringRef::npos) {
+        ChecksFailed = true;
+        i = j;
+        break;
+      }
+
+      CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
+    }
+
+    if (j == e)
+      break;
+  }
+
+  // Success if no checks failed.
+  return !ChecksFailed;
+}
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
index 1214b5a0ba1f..b8223126227d 100644
--- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -61,6 +61,12 @@ public:
     consumeError(Temp.discard());
   }
 
+  void discard() override {
+    // Delete the temp file if it still was open, but keeping the mapping
+    // active.
+    consumeError(Temp.discard());
+  }
+
 private:
   std::unique_ptr<fs::mapped_file_region> Buffer;
   fs::TempFile Temp;
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index cf9847faccd1..ee69a64ac97b 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -275,7 +275,7 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) {
 
   // Clear out new buckets.
   Buckets = AllocateBuckets(NewBucketCount);
-  // Set NumBuckets only if allocation of new buckets was succesful
+  // Set NumBuckets only if allocation of new buckets was successful.
   NumBuckets = NewBucketCount;
   NumNodes = 0;
 
diff --git a/contrib/llvm/lib/Support/FormatVariadic.cpp b/contrib/llvm/lib/Support/FormatVariadic.cpp
index 6dd133e6c50a..1f3505d5f74f 100644
--- a/contrib/llvm/lib/Support/FormatVariadic.cpp
+++ b/contrib/llvm/lib/Support/FormatVariadic.cpp
@@ -152,3 +152,5 @@ formatv_object_base::parseFormatString(StringRef Fmt) {
   }
   return Replacements;
 }
+
+void detail::format_adapter::anchor() { }
diff --git a/contrib/llvm/lib/Support/Hashing.cpp b/contrib/llvm/lib/Support/Hashing.cpp
index c69efb7c3cc9..7de25cec7371 100644
--- a/contrib/llvm/lib/Support/Hashing.cpp
+++ b/contrib/llvm/lib/Support/Hashing.cpp
@@ -20,10 +20,10 @@ using namespace llvm;
 // Provide a definition and static initializer for the fixed seed. This
 // initializer should always be zero to ensure its value can never appear to be
 // non-zero, even during dynamic initialization.
-size_t llvm::hashing::detail::fixed_seed_override = 0;
+uint64_t llvm::hashing::detail::fixed_seed_override = 0;
 
 // Implement the function for forced setting of the fixed seed.
 // FIXME: Use atomic operations here so that there is no data race.
-void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
+void llvm::set_fixed_execution_hash_seed(uint64_t fixed_value) {
   hashing::detail::fixed_seed_override = fixed_value;
 }
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 2c718dd3f5a8..d5a688c7fb9b 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -196,6 +196,32 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
             .Default("generic");
   }
 
+  if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
+    for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
+      if (Lines[I].startswith("CPU part")) {
+        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+          .Case("0x516", "thunderx2t99")
+          .Case("0x0516", "thunderx2t99")
+          .Case("0xaf", "thunderx2t99")
+          .Case("0x0af", "thunderx2t99")
+          .Case("0xa1", "thunderxt88")
+          .Case("0x0a1", "thunderxt88")
+          .Default("generic");
+      }
+    }
+  }
+
+  if (Implementer == "0x48") // HiSilicon Technologies, Inc.
+    // Look for the CPU part line.
+    for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+      if (Lines[I].startswith("CPU part"))
+        // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+        // values correspond to the "Part number" in the CP15/c0 register. The
+        // contents are specified in the various processor manuals.
+        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+          .Case("0xd01", "tsv110")
+          .Default("generic");
+
   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
     // Look for the CPU part line.
     for (unsigned I = 0, E = Lines.size(); I != E; ++I)
@@ -496,8 +522,8 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
 static void
 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                                 unsigned Brand_id, unsigned Features,
-                                unsigned Features2, unsigned *Type,
-                                unsigned *Subtype) {
+                                unsigned Features2, unsigned Features3,
+                                unsigned *Type, unsigned *Subtype) {
   if (Brand_id != 0)
     return;
   switch (Family) {
@@ -664,12 +690,24 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
       break;
 
     default: // Unknown family 6 CPU, try to guess.
+      if (Features & (1 << X86::FEATURE_AVX512VBMI2)) {
+        *Type = X86::INTEL_COREI7;
+        *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
+        break;
+      }
+
       if (Features & (1 << X86::FEATURE_AVX512VBMI)) {
         *Type = X86::INTEL_COREI7;
         *Subtype = X86::INTEL_COREI7_CANNONLAKE;
         break;
       }
 
+      if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) {
+        *Type = X86::INTEL_COREI7;
+        *Subtype = X86::INTEL_COREI7_CASCADELAKE;
+        break;
+      }
+
       if (Features & (1 << X86::FEATURE_AVX512VL)) {
         *Type = X86::INTEL_COREI7;
         *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
@@ -681,8 +719,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         break;
       }
 
-      if (Features2 & (1 << (X86::FEATURE_CLFLUSHOPT - 32))) {
-        if (Features2 & (1 << (X86::FEATURE_SHA - 32))) {
+      if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) {
+        if (Features3 & (1 << (X86::FEATURE_SHA - 64))) {
           *Type = X86::INTEL_GOLDMONT;
         } else {
           *Type = X86::INTEL_COREI7;
@@ -690,7 +728,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         }
         break;
       }
-      if (Features2 & (1 << (X86::FEATURE_ADX - 32))) {
+      if (Features3 & (1 << (X86::FEATURE_ADX - 64))) {
         *Type = X86::INTEL_COREI7;
         *Subtype = X86::INTEL_COREI7_BROADWELL;
         break;
@@ -706,7 +744,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         break;
       }
       if (Features & (1 << X86::FEATURE_SSE4_2)) {
-        if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) {
+        if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
           *Type = X86::INTEL_SILVERMONT;
         } else {
           *Type = X86::INTEL_COREI7;
@@ -720,7 +758,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         break;
       }
       if (Features & (1 << X86::FEATURE_SSSE3)) {
-        if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) {
+        if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
           *Type = X86::INTEL_BONNELL; // "bonnell"
         } else {
           *Type = X86::INTEL_CORE2; // "core2"
@@ -728,7 +766,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         }
         break;
       }
-      if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) {
+      if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
         *Type = X86::INTEL_CORE2; // "core2"
         *Subtype = X86::INTEL_CORE2_65;
         break;
@@ -754,7 +792,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     }
     break;
   case 15: {
-    if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) {
+    if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
       *Type = X86::INTEL_NOCONA;
       break;
     }
@@ -862,40 +900,52 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
 }
 
 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
-                                 unsigned *FeaturesOut,
-                                 unsigned *Features2Out) {
+                                 unsigned *FeaturesOut, unsigned *Features2Out,
+                                 unsigned *Features3Out) {
   unsigned Features = 0;
   unsigned Features2 = 0;
+  unsigned Features3 = 0;
   unsigned EAX, EBX;
 
+  auto setFeature = [&](unsigned F) {
+    if (F < 32)
+      Features |= 1U << (F & 0x1f);
+    else if (F < 64)
+      Features2 |= 1U << ((F - 32) & 0x1f);
+    else if (F < 96)
+      Features3 |= 1U << ((F - 64) & 0x1f);
+    else
+      llvm_unreachable("Unexpected FeatureBit");
+  };
+
   if ((EDX >> 15) & 1)
-    Features |= 1 << X86::FEATURE_CMOV;
+    setFeature(X86::FEATURE_CMOV);
   if ((EDX >> 23) & 1)
-    Features |= 1 << X86::FEATURE_MMX;
+    setFeature(X86::FEATURE_MMX);
   if ((EDX >> 25) & 1)
-    Features |= 1 << X86::FEATURE_SSE;
+    setFeature(X86::FEATURE_SSE);
   if ((EDX >> 26) & 1)
-    Features |= 1 << X86::FEATURE_SSE2;
+    setFeature(X86::FEATURE_SSE2);
 
   if ((ECX >> 0) & 1)
-    Features |= 1 << X86::FEATURE_SSE3;
+    setFeature(X86::FEATURE_SSE3);
   if ((ECX >> 1) & 1)
-    Features |= 1 << X86::FEATURE_PCLMUL;
+    setFeature(X86::FEATURE_PCLMUL);
   if ((ECX >> 9) & 1)
-    Features |= 1 << X86::FEATURE_SSSE3;
+    setFeature(X86::FEATURE_SSSE3);
   if ((ECX >> 12) & 1)
-    Features |= 1 << X86::FEATURE_FMA;
+    setFeature(X86::FEATURE_FMA);
   if ((ECX >> 19) & 1)
-    Features |= 1 << X86::FEATURE_SSE4_1;
+    setFeature(X86::FEATURE_SSE4_1);
   if ((ECX >> 20) & 1)
-    Features |= 1 << X86::FEATURE_SSE4_2;
+    setFeature(X86::FEATURE_SSE4_2);
   if ((ECX >> 23) & 1)
-    Features |= 1 << X86::FEATURE_POPCNT;
+    setFeature(X86::FEATURE_POPCNT);
   if ((ECX >> 25) & 1)
-    Features |= 1 << X86::FEATURE_AES;
+    setFeature(X86::FEATURE_AES);
 
   if ((ECX >> 22) & 1)
-    Features2 |= 1 << (X86::FEATURE_MOVBE - 32);
+    setFeature(X86::FEATURE_MOVBE);
 
   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
@@ -906,49 +956,59 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
 
   if (HasAVX)
-    Features |= 1 << X86::FEATURE_AVX;
+    setFeature(X86::FEATURE_AVX);
 
   bool HasLeaf7 =
       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
 
   if (HasLeaf7 && ((EBX >> 3) & 1))
-    Features |= 1 << X86::FEATURE_BMI;
+    setFeature(X86::FEATURE_BMI);
   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
-    Features |= 1 << X86::FEATURE_AVX2;
+    setFeature(X86::FEATURE_AVX2);
   if (HasLeaf7 && ((EBX >> 9) & 1))
-    Features |= 1 << X86::FEATURE_BMI2;
+    setFeature(X86::FEATURE_BMI2);
   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512F;
+    setFeature(X86::FEATURE_AVX512F);
   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512DQ;
+    setFeature(X86::FEATURE_AVX512DQ);
   if (HasLeaf7 && ((EBX >> 19) & 1))
-    Features2 |= 1 << (X86::FEATURE_ADX - 32);
+    setFeature(X86::FEATURE_ADX);
   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512IFMA;
+    setFeature(X86::FEATURE_AVX512IFMA);
   if (HasLeaf7 && ((EBX >> 23) & 1))
-    Features2 |= 1 << (X86::FEATURE_CLFLUSHOPT - 32);
+    setFeature(X86::FEATURE_CLFLUSHOPT);
   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512PF;
+    setFeature(X86::FEATURE_AVX512PF);
   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512ER;
+    setFeature(X86::FEATURE_AVX512ER);
   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512CD;
+    setFeature(X86::FEATURE_AVX512CD);
   if (HasLeaf7 && ((EBX >> 29) & 1))
-    Features2 |= 1 << (X86::FEATURE_SHA - 32);
+    setFeature(X86::FEATURE_SHA);
   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512BW;
+    setFeature(X86::FEATURE_AVX512BW);
   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512VL;
+    setFeature(X86::FEATURE_AVX512VL);
 
   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512VBMI;
+    setFeature(X86::FEATURE_AVX512VBMI);
+  if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
+    setFeature(X86::FEATURE_AVX512VBMI2);
+  if (HasLeaf7 && ((ECX >> 8) & 1))
+    setFeature(X86::FEATURE_GFNI);
+  if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
+    setFeature(X86::FEATURE_VPCLMULQDQ);
+  if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
+    setFeature(X86::FEATURE_AVX512VNNI);
+  if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
+    setFeature(X86::FEATURE_AVX512BITALG);
   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX512VPOPCNTDQ;
+    setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
 
   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX5124VNNIW;
+    setFeature(X86::FEATURE_AVX5124VNNIW);
   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
-    Features |= 1 << X86::FEATURE_AVX5124FMAPS;
+    setFeature(X86::FEATURE_AVX5124FMAPS);
 
   unsigned MaxExtLevel;
   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -956,17 +1016,18 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   if (HasExtLeaf1 && ((ECX >> 6) & 1))
-    Features |= 1 << X86::FEATURE_SSE4_A;
+    setFeature(X86::FEATURE_SSE4_A);
   if (HasExtLeaf1 && ((ECX >> 11) & 1))
-    Features |= 1 << X86::FEATURE_XOP;
+    setFeature(X86::FEATURE_XOP);
   if (HasExtLeaf1 && ((ECX >> 16) & 1))
-    Features |= 1 << X86::FEATURE_FMA4;
+    setFeature(X86::FEATURE_FMA4);
 
   if (HasExtLeaf1 && ((EDX >> 29) & 1))
-    Features2 |= 1 << (X86::FEATURE_EM64T - 32);
+    setFeature(X86::FEATURE_EM64T);
 
   *FeaturesOut  = Features;
   *Features2Out = Features2;
+  *Features3Out = Features3;
 }
 
 StringRef sys::getHostCPUName() {
@@ -987,16 +1048,16 @@ StringRef sys::getHostCPUName() {
 
   unsigned Brand_id = EBX & 0xff;
   unsigned Family = 0, Model = 0;
-  unsigned Features = 0, Features2 = 0;
+  unsigned Features = 0, Features2 = 0, Features3 = 0;
   detectX86FamilyModel(EAX, &Family, &Model);
-  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
+  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2, &Features3);
 
   unsigned Type = 0;
   unsigned Subtype = 0;
 
   if (Vendor == SIG_INTEL) {
     getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
-                                    Features2, &Type, &Subtype);
+                                    Features2, Features3, &Type, &Subtype);
   } else if (Vendor == SIG_AMD) {
     getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
   }
@@ -1022,8 +1083,10 @@ StringRef sys::getHostCPUName() {
   mach_msg_type_number_t infoCount;
 
   infoCount = HOST_BASIC_INFO_COUNT;
-  host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+  mach_port_t hostPort = mach_host_self();
+  host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
             &infoCount);
+  mach_port_deallocate(mach_task_self(), hostPort);
 
   if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
     return "generic";
@@ -1215,6 +1278,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
 
+  Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
+
   // Miscellaneous memory related features, detected by
   // using the 0x80000008 leaf of the CPUID instruction
   bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
diff --git a/contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
new file mode 100644
index 000000000000..e55dcd761809
--- /dev/null
+++ b/contrib/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
@@ -0,0 +1,322 @@
+//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Demangle/ItaniumDemangle.h"
+#include "llvm/Support/Allocator.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+using llvm::itanium_demangle::ForwardTemplateReference;
+using llvm::itanium_demangle::Node;
+using llvm::itanium_demangle::NodeKind;
+
+namespace {
+struct FoldingSetNodeIDBuilder {
+  llvm::FoldingSetNodeID &ID;
+  void operator()(const Node *P) { ID.AddPointer(P); }
+  void operator()(StringView Str) {
+    ID.AddString(llvm::StringRef(Str.begin(), Str.size()));
+  }
+  template<typename T>
+  typename std::enable_if<std::is_integral<T>::value ||
+                          std::is_enum<T>::value>::type
+  operator()(T V) {
+    ID.AddInteger((unsigned long long)V);
+  }
+  void operator()(itanium_demangle::NodeOrString NS) {
+    if (NS.isNode()) {
+      ID.AddInteger(0);
+      (*this)(NS.asNode());
+    } else if (NS.isString()) {
+      ID.AddInteger(1);
+      (*this)(NS.asString());
+    } else {
+      ID.AddInteger(2);
+    }
+  }
+  void operator()(itanium_demangle::NodeArray A) {
+    ID.AddInteger(A.size());
+    for (const Node *N : A)
+      (*this)(N);
+  }
+};
+
+template<typename ...T>
+void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) {
+  FoldingSetNodeIDBuilder Builder = {ID};
+  Builder(K);
+  int VisitInOrder[] = {
+    (Builder(V), 0) ...,
+    0 // Avoid empty array if there are no arguments.
+  };
+  (void)VisitInOrder;
+}
+
+// FIXME: Convert this to a generic lambda when possible.
+template<typename NodeT> struct ProfileSpecificNode {
+  FoldingSetNodeID &ID;
+  template<typename ...T> void operator()(T ...V) {
+    profileCtor(ID, NodeKind<NodeT>::Kind, V...);
+  }
+};
+
+struct ProfileNode {
+  FoldingSetNodeID &ID;
+  template<typename NodeT> void operator()(const NodeT *N) {
+    N->match(ProfileSpecificNode<NodeT>{ID});
+  }
+};
+
+template<> void ProfileNode::operator()(const ForwardTemplateReference *N) {
+  llvm_unreachable("should never canonicalize a ForwardTemplateReference");
+}
+
+void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) {
+  N->visit(ProfileNode{ID});
+}
+
+class FoldingNodeAllocator {
+  class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode {
+  public:
+    // 'Node' in this context names the injected-class-name of the base class.
+    itanium_demangle::Node *getNode() {
+      return reinterpret_cast<itanium_demangle::Node *>(this + 1);
+    }
+    void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); }
+  };
+
+  BumpPtrAllocator RawAlloc;
+  llvm::FoldingSet<NodeHeader> Nodes;
+
+public:
+  void reset() {}
+
+  template <typename T, typename... Args>
+  std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) {
+    // FIXME: Don't canonicalize forward template references for now, because
+    // they contain state (the resolved template node) that's not known at their
+    // point of creation.
+    if (std::is_same<T, ForwardTemplateReference>::value) {
+      // Note that we don't use if-constexpr here and so we must still write
+      // this code in a generic form.
+      return {new (RawAlloc.Allocate(sizeof(T), alignof(T)))
+                  T(std::forward<Args>(As)...),
+              true};
+    }
+
+    llvm::FoldingSetNodeID ID;
+    profileCtor(ID, NodeKind<T>::Kind, As...);
+
+    void *InsertPos;
+    if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos))
+      return {static_cast<T*>(Existing->getNode()), false};
+
+    if (!CreateNewNodes)
+      return {nullptr, true};
+
+    static_assert(alignof(T) <= alignof(NodeHeader),
+                  "underaligned node header for specific node kind");
+    void *Storage =
+        RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader));
+    NodeHeader *New = new (Storage) NodeHeader;
+    T *Result = new (New->getNode()) T(std::forward<Args>(As)...);
+    Nodes.InsertNode(New, InsertPos);
+    return {Result, true};
+  }
+
+  template<typename T, typename... Args>
+  Node *makeNode(Args &&...As) {
+    return getOrCreateNode<T>(true, std::forward<Args>(As)...).first;
+  }
+
+  void *allocateNodeArray(size_t sz) {
+    return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *));
+  }
+};
+
+class CanonicalizerAllocator : public FoldingNodeAllocator {
+  Node *MostRecentlyCreated = nullptr;
+  Node *TrackedNode = nullptr;
+  bool TrackedNodeIsUsed = false;
+  bool CreateNewNodes = true;
+  llvm::SmallDenseMap<Node*, Node*, 32> Remappings;
+
+  template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) {
+    std::pair<Node *, bool> Result =
+        getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...);
+    if (Result.second) {
+      // Node is new. Make a note of that.
+      MostRecentlyCreated = Result.first;
+    } else if (Result.first) {
+      // Node is pre-existing; check if it's in our remapping table.
+      if (auto *N = Remappings.lookup(Result.first)) {
+        Result.first = N;
+        assert(Remappings.find(Result.first) == Remappings.end() &&
+               "should never need multiple remap steps");
+      }
+      if (Result.first == TrackedNode)
+        TrackedNodeIsUsed = true;
+    }
+    return Result.first;
+  }
+
+  /// Helper to allow makeNode to be partially-specialized on T.
+  template<typename T> struct MakeNodeImpl {
+    CanonicalizerAllocator &Self;
+    template<typename ...Args> Node *make(Args &&...As) {
+      return Self.makeNodeSimple<T>(std::forward<Args>(As)...);
+    }
+  };
+
+public:
+  template<typename T, typename ...Args> Node *makeNode(Args &&...As) {
+    return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...);
+  }
+
+  void reset() { MostRecentlyCreated = nullptr; }
+
+  void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; }
+
+  void addRemapping(Node *A, Node *B) {
+    // Note, we don't need to check whether B is also remapped, because if it
+    // was we would have already remapped it when building it.
+    Remappings.insert(std::make_pair(A, B));
+  }
+
+  bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; }
+
+  void trackUsesOf(Node *N) {
+    TrackedNode = N;
+    TrackedNodeIsUsed = false;
+  }
+  bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
+};
+
+/// Convert St3foo to NSt3fooE so that equivalences naming one also affect the
+/// other.
+template<>
+struct CanonicalizerAllocator::MakeNodeImpl<
+           itanium_demangle::StdQualifiedName> {
+  CanonicalizerAllocator &Self;
+  Node *make(Node *Child) {
+    Node *StdNamespace = Self.makeNode<itanium_demangle::NameType>("std");
+    if (!StdNamespace)
+      return nullptr;
+    return Self.makeNode<itanium_demangle::NestedName>(StdNamespace, Child);
+  }
+};
+
+// FIXME: Also expand built-in substitutions?
+
+using CanonicalizingDemangler =
+    itanium_demangle::ManglingParser<CanonicalizerAllocator>;
+}
+
+struct ItaniumManglingCanonicalizer::Impl {
+  CanonicalizingDemangler Demangler = {nullptr, nullptr};
+};
+
+ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {}
+ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; }
+
+ItaniumManglingCanonicalizer::EquivalenceError
+ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
+                                             StringRef Second) {
+  auto &Alloc = P->Demangler.ASTAllocator;
+  Alloc.setCreateNewNodes(true);
+
+  auto Parse = [&](StringRef Str) {
+    P->Demangler.reset(Str.begin(), Str.end());
+    Node *N = nullptr;
+    switch (Kind) {
+      // A <name>, with minor extensions to allow arbitrary namespace and
+      // template names that can't easily be written as <name>s.
+    case FragmentKind::Name:
+      // Very special case: allow "St" as a shorthand for "3std". It's not
+      // valid as a <name> mangling, but is nonetheless the most natural
+      // way to name the 'std' namespace.
+      if (Str.size() == 2 && P->Demangler.consumeIf("St"))
+        N = P->Demangler.make<itanium_demangle::NameType>("std");
+      // We permit substitutions to name templates without their template
+      // arguments. This mostly just falls out, as almost all template names
+      // are valid as <name>s, but we also want to parse <substitution>s as
+      // <name>s, even though they're not.
+      else if (Str.startswith("S"))
+        // Parse the substitution and optional following template arguments.
+        N = P->Demangler.parseType();
+      else
+        N = P->Demangler.parseName();
+      break;
+
+      // A <type>.
+    case FragmentKind::Type:
+      N = P->Demangler.parseType();
+      break;
+
+      // An <encoding>.
+    case FragmentKind::Encoding:
+      N = P->Demangler.parseEncoding();
+      break;
+    }
+
+    // If we have trailing junk, the mangling is invalid.
+    if (P->Demangler.numLeft() != 0)
+      N = nullptr;
+
+    // If any node was created after N, then we cannot safely remap it because
+    // it might already be in use by another node.
+    return std::make_pair(N, Alloc.isMostRecentlyCreated(N));
+  };
+
+  Node *FirstNode, *SecondNode;
+  bool FirstIsNew, SecondIsNew;
+
+  std::tie(FirstNode, FirstIsNew) = Parse(First);
+  if (!FirstNode)
+    return EquivalenceError::InvalidFirstMangling;
+
+  Alloc.trackUsesOf(FirstNode);
+  std::tie(SecondNode, SecondIsNew) = Parse(Second);
+  if (!SecondNode)
+    return EquivalenceError::InvalidSecondMangling;
+
+  // If they're already equivalent, there's nothing to do.
+  if (FirstNode == SecondNode)
+    return EquivalenceError::Success;
+
+  if (FirstIsNew && !Alloc.trackedNodeIsUsed())
+    Alloc.addRemapping(FirstNode, SecondNode);
+  else if (SecondIsNew)
+    Alloc.addRemapping(SecondNode, FirstNode);
+  else
+    return EquivalenceError::ManglingAlreadyUsed;
+
+  return EquivalenceError::Success;
+}
+
+ItaniumManglingCanonicalizer::Key
+ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
+  P->Demangler.ASTAllocator.setCreateNewNodes(true);
+  P->Demangler.reset(Mangling.begin(), Mangling.end());
+  return reinterpret_cast<Key>(P->Demangler.parse());
+}
+
+ItaniumManglingCanonicalizer::Key
+ItaniumManglingCanonicalizer::lookup(StringRef Mangling) {
+  P->Demangler.ASTAllocator.setCreateNewNodes(false);
+  P->Demangler.reset(Mangling.begin(), Mangling.end());
+  return reinterpret_cast<Key>(P->Demangler.parse());
+}
diff --git a/contrib/llvm/lib/Support/JSON.cpp b/contrib/llvm/lib/Support/JSON.cpp
index a5dae7a7c2e0..d468013fb94a 100644
--- a/contrib/llvm/lib/Support/JSON.cpp
+++ b/contrib/llvm/lib/Support/JSON.cpp
@@ -517,7 +517,7 @@ static std::vector<const Object::value_type *> sortedElements(const Object &O) {
   std::vector<const Object::value_type *> Elements;
   for (const auto &E : O)
     Elements.push_back(&E);
-  llvm::sort(Elements.begin(), Elements.end(),
+  llvm::sort(Elements,
              [](const Object::value_type *L, const Object::value_type *R) {
                return L->first < R->first;
              });
diff --git a/contrib/llvm/lib/Support/Locale.cpp b/contrib/llvm/lib/Support/Locale.cpp
index e57d377c9ab5..1b3300b90f2a 100644
--- a/contrib/llvm/lib/Support/Locale.cpp
+++ b/contrib/llvm/lib/Support/Locale.cpp
@@ -7,24 +7,11 @@ namespace sys {
 namespace locale {
 
 int columnWidth(StringRef Text) {
-#if _WIN32
-  return Text.size();
-#else
   return llvm::sys::unicode::columnWidthUTF8(Text);
-#endif
 }
 
 bool isPrint(int UCS) {
-#if _WIN32
-  // Restrict characters that we'll try to print to the lower part of ASCII
-  // except for the control characters (0x20 - 0x7E). In general one can not
-  // reliably output code points U+0080 and higher using narrow character C/C++
-  // output functions in Windows, because the meaning of the upper 128 codes is
-  // determined by the active code page in the console.
-  return ' ' <= UCS && UCS <= '~';
-#else
   return llvm::sys::unicode::isPrintable(UCS);
-#endif
 }
 
 } // namespace locale
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
index 77baf7ac4bdd..c166230ba3a3 100644
--- a/contrib/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -24,7 +24,7 @@
 #include <sys/types.h>
 #include <system_error>
 #include <tuple>
-#if _WIN32
+#ifdef _WIN32
 #include <windows.h>
 #endif
 #if LLVM_ON_UNIX
@@ -295,7 +295,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
   if (getState() != LFS_Shared)
     return Res_Success;
 
-#if _WIN32
+#ifdef _WIN32
   unsigned long Interval = 1;
 #else
   struct timespec Interval;
@@ -310,7 +310,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
     // finish up and remove the lock file.
     // FIXME: Should we hook in to system APIs to get a notification when the
     // lock file is deleted?
-#if _WIN32
+#ifdef _WIN32
     Sleep(Interval);
 #else
     nanosleep(&Interval, nullptr);
@@ -329,7 +329,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
       return Res_OwnerDied;
 
     // Exponentially increase the time we wait for the lock to be removed.
-#if _WIN32
+#ifdef _WIN32
     Interval *= 2;
 #else
     Interval.tv_sec *= 2;
@@ -340,7 +340,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
     }
 #endif
   } while (
-#if _WIN32
+#ifdef _WIN32
            Interval < MaxSeconds * 1000
 #else
            Interval.tv_sec < (time_t)MaxSeconds
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
index 768a819c8d05..5ce2f50ebdaa 100644
--- a/contrib/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -190,48 +190,57 @@ createUniqueEntity(const Twine &Model, int &ResultFD,
   ResultPath.push_back(0);
   ResultPath.pop_back();
 
-retry_random_path:
-  // Replace '%' with random chars.
-  for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
-    if (ModelStorage[i] == '%')
-      ResultPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
-  }
-
-  // Try to open + create the file.
-  switch (Type) {
-  case FS_File: {
-    if (std::error_code EC =
-            sys::fs::openFileForReadWrite(Twine(ResultPath.begin()), ResultFD,
-                                          sys::fs::CD_CreateNew, Flags, Mode)) {
-      if (EC == errc::file_exists)
-        goto retry_random_path;
-      return EC;
+  // Limit the number of attempts we make, so that we don't infinite loop. E.g.
+  // "permission denied" could be for a specific file (so we retry with a
+  // different name) or for the whole directory (retry would always fail).
+  // Checking which is racy, so we try a number of times, then give up.
+  std::error_code EC;
+  for (int Retries = 128; Retries > 0; --Retries) {
+    // Replace '%' with random chars.
+    for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
+      if (ModelStorage[i] == '%')
+        ResultPath[i] =
+            "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
     }
 
-    return std::error_code();
-  }
+    // Try to open + create the file.
+    switch (Type) {
+    case FS_File: {
+      EC = sys::fs::openFileForReadWrite(Twine(ResultPath.begin()), ResultFD,
+                                         sys::fs::CD_CreateNew, Flags, Mode);
+      if (EC) {
+        // errc::permission_denied happens on Windows when we try to open a file
+        // that has been marked for deletion.
+        if (EC == errc::file_exists || EC == errc::permission_denied)
+          continue;
+        return EC;
+      }
 
-  case FS_Name: {
-    std::error_code EC =
-        sys::fs::access(ResultPath.begin(), sys::fs::AccessMode::Exist);
-    if (EC == errc::no_such_file_or_directory)
       return std::error_code();
-    if (EC)
-      return EC;
-    goto retry_random_path;
-  }
+    }
 
-  case FS_Dir: {
-    if (std::error_code EC =
-            sys::fs::create_directory(ResultPath.begin(), false)) {
-      if (EC == errc::file_exists)
-        goto retry_random_path;
-      return EC;
+    case FS_Name: {
+      EC = sys::fs::access(ResultPath.begin(), sys::fs::AccessMode::Exist);
+      if (EC == errc::no_such_file_or_directory)
+        return std::error_code();
+      if (EC)
+        return EC;
+      continue;
     }
-    return std::error_code();
-  }
+
+    case FS_Dir: {
+      EC = sys::fs::create_directory(ResultPath.begin(), false);
+      if (EC) {
+        if (EC == errc::file_exists)
+          continue;
+        return EC;
+      }
+      return std::error_code();
+    }
+    }
+    llvm_unreachable("Invalid Type");
   }
-  llvm_unreachable("Invalid Type");
+  return EC;
 }
 
 namespace llvm {
@@ -524,7 +533,7 @@ void replace_path_prefix(SmallVectorImpl<char> &Path,
 
   // If prefixes have the same size we can simply copy the new one over.
   if (OldPrefix.size() == NewPrefix.size()) {
-    std::copy(NewPrefix.begin(), NewPrefix.end(), Path.begin());
+    llvm::copy(NewPrefix, Path.begin());
     return;
   }
 
@@ -840,9 +849,8 @@ getPotentiallyUniqueTempFileName(const Twine &Prefix, StringRef Suffix,
   return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name);
 }
 
-static std::error_code make_absolute(const Twine &current_directory,
-                                     SmallVectorImpl<char> &path,
-                                     bool use_current_directory) {
+void make_absolute(const Twine &current_directory,
+                   SmallVectorImpl<char> &path) {
   StringRef p(path.data(), path.size());
 
   bool rootDirectory = path::has_root_directory(p);
@@ -851,14 +859,11 @@ static std::error_code make_absolute(const Twine &current_directory,
 
   // Already absolute.
   if (rootName && rootDirectory)
-    return std::error_code();
+    return;
 
   // All of the following conditions will need the current directory.
   SmallString<128> current_dir;
-  if (use_current_directory)
-    current_directory.toVector(current_dir);
-  else if (std::error_code ec = current_path(current_dir))
-    return ec;
+  current_directory.toVector(current_dir);
 
   // Relative path. Prepend the current directory.
   if (!rootName && !rootDirectory) {
@@ -866,7 +871,7 @@ static std::error_code make_absolute(const Twine &current_directory,
     path::append(current_dir, p);
     // Set path to the result.
     path.swap(current_dir);
-    return std::error_code();
+    return;
   }
 
   if (!rootName && rootDirectory) {
@@ -875,7 +880,7 @@ static std::error_code make_absolute(const Twine &current_directory,
     path::append(curDirRootName, p);
     // Set path to the result.
     path.swap(curDirRootName);
-    return std::error_code();
+    return;
   }
 
   if (rootName && !rootDirectory) {
@@ -887,20 +892,23 @@ static std::error_code make_absolute(const Twine &current_directory,
     SmallString<128> res;
     path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
     path.swap(res);
-    return std::error_code();
+    return;
   }
 
   llvm_unreachable("All rootName and rootDirectory combinations should have "
                    "occurred above!");
 }
 
-std::error_code make_absolute(const Twine &current_directory,
-                              SmallVectorImpl<char> &path) {
-  return make_absolute(current_directory, path, true);
-}
-
 std::error_code make_absolute(SmallVectorImpl<char> &path) {
-  return make_absolute(Twine(), path, false);
+  if (path::is_absolute(path))
+    return {};
+
+  SmallString<128> current_dir;
+  if (std::error_code ec = current_path(current_dir))
+    return ec;
+
+  make_absolute(current_dir, path);
+  return {};
 }
 
 std::error_code create_directories(const Twine &Path, bool IgnoreExisting,
@@ -1076,12 +1084,13 @@ std::error_code is_other(const Twine &Path, bool &Result) {
   return std::error_code();
 }
 
-void directory_entry::replace_filename(const Twine &filename,
-                                       basic_file_status st) {
-  SmallString<128> path = path::parent_path(Path);
-  path::append(path, filename);
-  Path = path.str();
-  Status = st;
+void directory_entry::replace_filename(const Twine &Filename, file_type Type,
+                                       basic_file_status Status) {
+  SmallString<128> PathStr = path::parent_path(Path);
+  path::append(PathStr, Filename);
+  this->Path = PathStr.str();
+  this->Type = Type;
+  this->Status = Status;
 }
 
 ErrorOr<perms> getPermissions(const Twine &Path) {
@@ -1230,17 +1239,5 @@ Expected<TempFile> TempFile::create(const Twine &Model, unsigned Mode) {
 }
 }
 
-namespace path {
-
-bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
-                          const Twine &Path2, const Twine &Path3) {
-  if (getUserCacheDir(Result)) {
-    append(Result, Path1, Path2, Path3);
-    return true;
-  }
-  return false;
-}
-
-} // end namespace path
 } // end namsspace sys
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
index 3f5a9d722ca0..f32355aefbb7 100644
--- a/contrib/llvm/lib/Support/Process.cpp
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
@@ -82,12 +83,11 @@ static const char colorcodes[2][2][8][10] = {
  { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
 };
 
-// This is set to true when Process::PreventCoreFiles() is called.
-static bool coreFilesPrevented = false;
+// A CMake option controls wheter we emit core dumps by default. An application
+// may disable core dumps by calling Process::PreventCoreFiles().
+static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
 
-bool Process::AreCoreFilesPrevented() {
-  return coreFilesPrevented;
-}
+bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
 
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
diff --git a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
index f1f22af82a81..df0d87fab021 100644
--- a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
+++ b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
@@ -49,7 +49,7 @@ RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
   Data[0] = Seed;
   Data[1] = Seed >> 32;
 
-  std::copy(Salt.begin(), Salt.end(), Data.begin() + 2);
+  llvm::copy(Salt, Data.begin() + 2);
 
   std::seed_seq SeedSeq(Data.begin(), Data.end());
   Generator.seed(SeedSeq);
diff --git a/contrib/llvm/lib/Support/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
index 6534ff69b84c..333f492d4589 100644
--- a/contrib/llvm/lib/Support/Signals.cpp
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -20,6 +20,8 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
@@ -155,7 +157,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
   }
 
   Optional<StringRef> Redirects[] = {StringRef(InputFile),
-                                     StringRef(OutputFile), llvm::None};
+                                     StringRef(OutputFile), StringRef("")};
   StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
 #ifdef _WIN32
                       // Pass --relative-address on Windows so that we don't
@@ -180,8 +182,14 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
   auto CurLine = Lines.begin();
   int frame_no = 0;
   for (int i = 0; i < Depth; i++) {
+    auto PrintLineHeader = [&]() {
+      OS << right_justify(formatv("#{0}", frame_no++).str(),
+                          std::log10(Depth) + 2)
+         << ' ' << format_ptr(StackTrace[i]) << ' ';
+    };
     if (!Modules[i]) {
-      OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << '\n';
+      PrintLineHeader();
+      OS << '\n';
       continue;
     }
     // Read pairs of lines (function name and file/line info) until we
@@ -192,7 +200,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
       StringRef FunctionName = *CurLine++;
       if (FunctionName.empty())
         break;
-      OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << ' ';
+      PrintLineHeader();
       if (!FunctionName.startswith("??"))
         OS << FunctionName << ' ';
       if (CurLine == Lines.end())
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
index d8fde7fa8990..a55ad881d012 100644
--- a/contrib/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -269,7 +270,7 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
   : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
     Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
     FixIts(Hints.begin(), Hints.end()) {
-  llvm::sort(FixIts.begin(), FixIts.end());
+  llvm::sort(FixIts);
 }
 
 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
@@ -345,12 +346,18 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
 static void printSourceLine(raw_ostream &S, StringRef LineContents) {
   // Print out the source line one character at a time, so we can expand tabs.
   for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
-    if (LineContents[i] != '\t') {
-      S << LineContents[i];
-      ++OutCol;
-      continue;
+    size_t NextTab = LineContents.find('\t', i);
+    // If there were no tabs left, print the rest, we are done.
+    if (NextTab == StringRef::npos) {
+      S << LineContents.drop_front(i);
+      break;
     }
 
+    // Otherwise, print from i to NextTab.
+    S << LineContents.slice(i, NextTab);
+    OutCol += NextTab - i;
+    i = NextTab;
+
     // If we have a tab, emit at least one space, then round up to 8 columns.
     do {
       S << ' ';
@@ -364,65 +371,48 @@ static bool isNonASCII(char c) {
   return c & 0x80;
 }
 
-void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
-                         bool ShowKindLabel) const {
-  // Display colors only if OS supports colors.
-  ShowColors &= S.has_colors();
-
-  if (ShowColors)
-    S.changeColor(raw_ostream::SAVEDCOLOR, true);
+void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
+                         bool ShowColors, bool ShowKindLabel) const {
+  {
+    WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
 
-  if (ProgName && ProgName[0])
-    S << ProgName << ": ";
+    if (ProgName && ProgName[0])
+      S << ProgName << ": ";
 
-  if (!Filename.empty()) {
-    if (Filename == "-")
-      S << "<stdin>";
-    else
-      S << Filename;
+    if (!Filename.empty()) {
+      if (Filename == "-")
+        S << "<stdin>";
+      else
+        S << Filename;
 
-    if (LineNo != -1) {
-      S << ':' << LineNo;
-      if (ColumnNo != -1)
-        S << ':' << (ColumnNo+1);
+      if (LineNo != -1) {
+        S << ':' << LineNo;
+        if (ColumnNo != -1)
+          S << ':' << (ColumnNo + 1);
+      }
+      S << ": ";
     }
-    S << ": ";
   }
 
   if (ShowKindLabel) {
     switch (Kind) {
     case SourceMgr::DK_Error:
-      if (ShowColors)
-        S.changeColor(raw_ostream::RED, true);
-      S << "error: ";
+      WithColor::error(OS, "", !ShowColors);
       break;
     case SourceMgr::DK_Warning:
-      if (ShowColors)
-        S.changeColor(raw_ostream::MAGENTA, true);
-      S << "warning: ";
+      WithColor::warning(OS, "", !ShowColors);
       break;
     case SourceMgr::DK_Note:
-      if (ShowColors)
-        S.changeColor(raw_ostream::BLACK, true);
-      S << "note: ";
+      WithColor::note(OS, "", !ShowColors);
       break;
     case SourceMgr::DK_Remark:
-      if (ShowColors)
-        S.changeColor(raw_ostream::BLUE, true);
-      S << "remark: ";
+      WithColor::remark(OS, "", !ShowColors);
       break;
     }
-
-    if (ShowColors) {
-      S.resetColor();
-      S.changeColor(raw_ostream::SAVEDCOLOR, true);
-    }
   }
 
-  S << Message << '\n';
-
-  if (ShowColors)
-    S.resetColor();
+  WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors)
+      << Message << '\n';
 
   if (LineNo == -1 || ColumnNo == -1)
     return;
@@ -433,7 +423,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
   // expanding them later, and bail out rather than show incorrect ranges and
   // misaligned fixits for any other odd characters.
   if (find_if(LineContents, isNonASCII) != LineContents.end()) {
-    printSourceLine(S, LineContents);
+    printSourceLine(OS, LineContents);
     return;
   }
   size_t NumColumns = LineContents.size();
@@ -467,29 +457,27 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
   // least.
   CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
 
-  printSourceLine(S, LineContents);
+  printSourceLine(OS, LineContents);
 
-  if (ShowColors)
-    S.changeColor(raw_ostream::GREEN, true);
+  {
+    WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors);
 
-  // Print out the caret line, matching tabs in the source line.
-  for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
-    if (i >= LineContents.size() || LineContents[i] != '\t') {
-      S << CaretLine[i];
-      ++OutCol;
-      continue;
-    }
+    // Print out the caret line, matching tabs in the source line.
+    for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
+      if (i >= LineContents.size() || LineContents[i] != '\t') {
+        S << CaretLine[i];
+        ++OutCol;
+        continue;
+      }
 
-    // Okay, we have a tab.  Insert the appropriate number of characters.
-    do {
-      S << CaretLine[i];
-      ++OutCol;
-    } while ((OutCol % TabStop) != 0);
+      // Okay, we have a tab.  Insert the appropriate number of characters.
+      do {
+        S << CaretLine[i];
+        ++OutCol;
+      } while ((OutCol % TabStop) != 0);
+    }
+    S << '\n';
   }
-  S << '\n';
-
-  if (ShowColors)
-    S.resetColor();
 
   // Print out the replacement line, matching tabs in the source line.
   if (FixItInsertionLine.empty())
@@ -497,14 +485,14 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
 
   for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
     if (i >= LineContents.size() || LineContents[i] != '\t') {
-      S << FixItInsertionLine[i];
+      OS << FixItInsertionLine[i];
       ++OutCol;
       continue;
     }
 
     // Okay, we have a tab.  Insert the appropriate number of characters.
     do {
-      S << FixItInsertionLine[i];
+      OS << FixItInsertionLine[i];
       // FIXME: This is trying not to break up replacements, but then to re-sync
       // with the tabs between replacements. This will fail, though, if two
       // fix-it replacements are exactly adjacent, or if a fix-it contains a
@@ -515,5 +503,5 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
       ++OutCol;
     } while (((OutCol % TabStop) != 0) && i != e);
   }
-  S << '\n';
+  OS << '\n';
 }
diff --git a/contrib/llvm/lib/Support/StringSaver.cpp b/contrib/llvm/lib/Support/StringSaver.cpp
index 1ded2bdb09de..bf0ac8de9821 100644
--- a/contrib/llvm/lib/Support/StringSaver.cpp
+++ b/contrib/llvm/lib/Support/StringSaver.cpp
@@ -13,7 +13,8 @@ using namespace llvm;
 
 StringRef StringSaver::save(StringRef S) {
   char *P = Alloc.Allocate<char>(S.size() + 1);
-  memcpy(P, S.data(), S.size());
+  if (!S.empty())
+    memcpy(P, S.data(), S.size());
   P[S.size()] = '\0';
   return StringRef(P, S.size());
 }
diff --git a/contrib/llvm/lib/Support/SymbolRemappingReader.cpp b/contrib/llvm/lib/Support/SymbolRemappingReader.cpp
new file mode 100644
index 000000000000..264c890ce8f1
--- /dev/null
+++ b/contrib/llvm/lib/Support/SymbolRemappingReader.cpp
@@ -0,0 +1,81 @@
+//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions needed for reading and applying symbol
+// remapping files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SymbolRemappingReader.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/LineIterator.h"
+
+using namespace llvm;
+
+char SymbolRemappingParseError::ID;
+
+/// Load a set of name remappings from a text file.
+///
+/// See the documentation at the top of the file for an explanation of
+/// the expected format.
+Error SymbolRemappingReader::read(MemoryBuffer &B) {
+  line_iterator LineIt(B, /*SkipBlanks=*/true, '#');
+
+  auto ReportError = [&](Twine Msg) {
+    return llvm::make_error<SymbolRemappingParseError>(
+        B.getBufferIdentifier(), LineIt.line_number(), Msg);
+  };
+
+  for (; !LineIt.is_at_eof(); ++LineIt) {
+    StringRef Line = *LineIt;
+    Line = Line.ltrim(' ');
+    // line_iterator only detects comments starting in column 1.
+    if (Line.startswith("#") || Line.empty())
+      continue;
+
+    SmallVector<StringRef, 4> Parts;
+    Line.split(Parts, ' ', /*MaxSplits*/-1, /*KeepEmpty*/false);
+
+    if (Parts.size() != 3)
+      return ReportError("Expected 'kind mangled_name mangled_name', "
+                         "found '" + Line + "'");
+
+    using FK = ItaniumManglingCanonicalizer::FragmentKind;
+    Optional<FK> FragmentKind = StringSwitch<Optional<FK>>(Parts[0])
+                                    .Case("name", FK::Name)
+                                    .Case("type", FK::Type)
+                                    .Case("encoding", FK::Encoding)
+                                    .Default(None);
+    if (!FragmentKind)
+      return ReportError("Invalid kind, expected 'name', 'type', or 'encoding',"
+                         " found '" + Parts[0] + "'");
+
+    using EE = ItaniumManglingCanonicalizer::EquivalenceError;
+    switch (Canonicalizer.addEquivalence(*FragmentKind, Parts[1], Parts[2])) {
+    case EE::Success:
+      break;
+
+    case EE::ManglingAlreadyUsed:
+      return ReportError("Manglings '" + Parts[1] + "' and '" + Parts[2] + "' "
+                         "have both been used in prior remappings. Move this "
+                         "remapping earlier in the file.");
+
+    case EE::InvalidFirstMangling:
+      return ReportError("Could not demangle '" + Parts[1] + "' "
+                         "as a <" + Parts[0] + ">; invalid mangling?");
+
+    case EE::InvalidSecondMangling:
+      return ReportError("Could not demangle '" + Parts[2] + "' "
+                         "as a <" + Parts[0] + ">; invalid mangling?");
+    }
+  }
+
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
index 2c167a4d086c..bdc0dc52c5e2 100644
--- a/contrib/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -14,926 +14,186 @@
 
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/TargetParser.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
-#include <cctype>
 
 using namespace llvm;
-using namespace ARM;
-using namespace AArch64;
+using namespace AMDGPU;
 
 namespace {
 
-// List of canonical FPU names (use getFPUSynonym) and which architectural
-// features they correspond to (use getFPUFeatures).
-// FIXME: TableGen this.
-// The entries must appear in the order listed in ARM::FPUKind for correct indexing
-static const struct {
-  const char *NameCStr;
-  size_t NameLength;
-  ARM::FPUKind ID;
-  ARM::FPUVersion FPUVersion;
-  ARM::NeonSupportLevel NeonSupport;
-  ARM::FPURestriction Restriction;
-
-  StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} FPUNames[] = {
-#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) \
-  { NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION },
-#include "llvm/Support/ARMTargetParser.def"
-};
-
-// List of canonical arch names (use getArchSynonym).
-// This table also provides the build attribute fields for CPU arch
-// and Arch ID, according to the Addenda to the ARM ABI, chapters
-// 2.4 and 2.3.5.2 respectively.
-// FIXME: SubArch values were simplified to fit into the expectations
-// of the triples and are not conforming with their official names.
-// Check to see if the expectation should be changed.
-// FIXME: TableGen this.
-template <typename T> struct ArchNames {
-  const char *NameCStr;
-  size_t NameLength;
-  const char *CPUAttrCStr;
-  size_t CPUAttrLength;
-  const char *SubArchCStr;
-  size_t SubArchLength;
-  unsigned DefaultFPU;
-  unsigned ArchBaseExtensions;
-  T ID;
-  ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
-
-  StringRef getName() const { return StringRef(NameCStr, NameLength); }
-
-  // CPU class in build attributes.
-  StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); }
-
-  // Sub-Arch name.
-  StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
+struct GPUInfo {
+  StringLiteral Name;
+  StringLiteral CanonicalName;
+  AMDGPU::GPUKind Kind;
+  unsigned Features;
 };
-ArchNames<ARM::ArchKind> ARCHNames[] = {
-#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)       \
-  {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH,       \
-   sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ARM::ArchKind::ID, ARCH_ATTR},
-#include "llvm/Support/ARMTargetParser.def"
-};
-
-ArchNames<AArch64::ArchKind> AArch64ARCHNames[] = {
- #define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)       \
-   {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH,       \
-    sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, AArch64::ArchKind::ID, ARCH_ATTR},
- #include "llvm/Support/AArch64TargetParser.def"
- };
-
 
-// List of Arch Extension names.
-// FIXME: TableGen this.
-static const struct {
-  const char *NameCStr;
-  size_t NameLength;
-  unsigned ID;
-  const char *Feature;
-  const char *NegFeature;
-
-  StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} ARCHExtNames[] = {
-#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
-  { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE },
-#include "llvm/Support/ARMTargetParser.def"
-},AArch64ARCHExtNames[] = {
-#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
-  { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE },
-#include "llvm/Support/AArch64TargetParser.def"
+constexpr GPUInfo R600GPUs[26] = {
+  // Name       Canonical    Kind        Features
+  //            Name
+  {{"r600"},    {"r600"},    GK_R600,    FEATURE_NONE },
+  {{"rv630"},   {"r600"},    GK_R600,    FEATURE_NONE },
+  {{"rv635"},   {"r600"},    GK_R600,    FEATURE_NONE },
+  {{"r630"},    {"r630"},    GK_R630,    FEATURE_NONE },
+  {{"rs780"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
+  {{"rs880"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
+  {{"rv610"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
+  {{"rv620"},   {"rs880"},   GK_RS880,   FEATURE_NONE },
+  {{"rv670"},   {"rv670"},   GK_RV670,   FEATURE_NONE },
+  {{"rv710"},   {"rv710"},   GK_RV710,   FEATURE_NONE },
+  {{"rv730"},   {"rv730"},   GK_RV730,   FEATURE_NONE },
+  {{"rv740"},   {"rv770"},   GK_RV770,   FEATURE_NONE },
+  {{"rv770"},   {"rv770"},   GK_RV770,   FEATURE_NONE },
+  {{"cedar"},   {"cedar"},   GK_CEDAR,   FEATURE_NONE },
+  {{"palm"},    {"cedar"},   GK_CEDAR,   FEATURE_NONE },
+  {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA  },
+  {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA  },
+  {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE },
+  {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE },
+  {{"sumo"},    {"sumo"},    GK_SUMO,    FEATURE_NONE },
+  {{"sumo2"},   {"sumo"},    GK_SUMO,    FEATURE_NONE },
+  {{"barts"},   {"barts"},   GK_BARTS,   FEATURE_NONE },
+  {{"caicos"},  {"caicos"},  GK_CAICOS,  FEATURE_NONE },
+  {{"aruba"},   {"cayman"},  GK_CAYMAN,  FEATURE_FMA  },
+  {{"cayman"},  {"cayman"},  GK_CAYMAN,  FEATURE_FMA  },
+  {{"turks"},   {"turks"},   GK_TURKS,   FEATURE_NONE }
 };
 
-// List of HWDiv names (use getHWDivSynonym) and which architectural
-// features they correspond to (use getHWDivFeatures).
-// FIXME: TableGen this.
-static const struct {
-  const char *NameCStr;
-  size_t NameLength;
-  unsigned ID;
-
-  StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} HWDivNames[] = {
-#define ARM_HW_DIV_NAME(NAME, ID) { NAME, sizeof(NAME) - 1, ID },
-#include "llvm/Support/ARMTargetParser.def"
+// This table should be sorted by the value of GPUKind
+// Don't bother listing the implicitly true features
+constexpr GPUInfo AMDGCNGPUs[33] = {
+  // Name         Canonical    Kind        Features
+  //              Name
+  {{"gfx600"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
+  {{"tahiti"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
+  {{"gfx601"},    {"gfx601"},  GK_GFX601,  FEATURE_NONE},
+  {{"hainan"},    {"gfx601"},  GK_GFX601,  FEATURE_NONE},
+  {{"oland"},     {"gfx601"},  GK_GFX601,  FEATURE_NONE},
+  {{"pitcairn"},  {"gfx601"},  GK_GFX601,  FEATURE_NONE},
+  {{"verde"},     {"gfx601"},  GK_GFX601,  FEATURE_NONE},
+  {{"gfx700"},    {"gfx700"},  GK_GFX700,  FEATURE_NONE},
+  {{"kaveri"},    {"gfx700"},  GK_GFX700,  FEATURE_NONE},
+  {{"gfx701"},    {"gfx701"},  GK_GFX701,  FEATURE_FAST_FMA_F32},
+  {{"hawaii"},    {"gfx701"},  GK_GFX701,  FEATURE_FAST_FMA_F32},
+  {{"gfx702"},    {"gfx702"},  GK_GFX702,  FEATURE_FAST_FMA_F32},
+  {{"gfx703"},    {"gfx703"},  GK_GFX703,  FEATURE_NONE},
+  {{"kabini"},    {"gfx703"},  GK_GFX703,  FEATURE_NONE},
+  {{"mullins"},   {"gfx703"},  GK_GFX703,  FEATURE_NONE},
+  {{"gfx704"},    {"gfx704"},  GK_GFX704,  FEATURE_NONE},
+  {{"bonaire"},   {"gfx704"},  GK_GFX704,  FEATURE_NONE},
+  {{"gfx801"},    {"gfx801"},  GK_GFX801,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"carrizo"},   {"gfx801"},  GK_GFX801,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx802"},    {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
+  {{"iceland"},   {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
+  {{"tonga"},     {"gfx802"},  GK_GFX802,  FEATURE_FAST_DENORMAL_F32},
+  {{"gfx803"},    {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
+  {{"fiji"},      {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
+  {{"polaris10"}, {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
+  {{"polaris11"}, {"gfx803"},  GK_GFX803,  FEATURE_FAST_DENORMAL_F32},
+  {{"gfx810"},    {"gfx810"},  GK_GFX810,  FEATURE_FAST_DENORMAL_F32},
+  {{"stoney"},    {"gfx810"},  GK_GFX810,  FEATURE_FAST_DENORMAL_F32},
+  {{"gfx900"},    {"gfx900"},  GK_GFX900,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx902"},    {"gfx902"},  GK_GFX902,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx904"},    {"gfx904"},  GK_GFX904,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx906"},    {"gfx906"},  GK_GFX906,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx909"},    {"gfx909"},  GK_GFX909,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
 };
 
-// List of CPU names and their arches.
-// The same CPU can have multiple arches and can be default on multiple arches.
-// When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
-// When this becomes table-generated, we'd probably need two tables.
-// FIXME: TableGen this.
-template <typename T> struct CpuNames {
-  const char *NameCStr;
-  size_t NameLength;
-  T ArchID;
-  bool Default; // is $Name the default CPU for $ArchID ?
-  unsigned DefaultExtensions;
-
-  StringRef getName() const { return StringRef(NameCStr, NameLength); }
-};
-CpuNames<ARM::ArchKind> CPUNames[] = {
-#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-  { NAME, sizeof(NAME) - 1, ARM::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT },
-#include "llvm/Support/ARMTargetParser.def"
-};
-
-CpuNames<AArch64::ArchKind> AArch64CPUNames[] = {
- #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-   { NAME, sizeof(NAME) - 1, AArch64::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT },
- #include "llvm/Support/AArch64TargetParser.def"
- };
-
-} // namespace
-
-// ======================================================= //
-// Information by ID
-// ======================================================= //
-
-StringRef ARM::getFPUName(unsigned FPUKind) {
-  if (FPUKind >= ARM::FK_LAST)
-    return StringRef();
-  return FPUNames[FPUKind].getName();
-}
-
-FPUVersion ARM::getFPUVersion(unsigned FPUKind) {
-  if (FPUKind >= ARM::FK_LAST)
-    return FPUVersion::NONE;
-  return FPUNames[FPUKind].FPUVersion;
-}
-
-ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
-  if (FPUKind >= ARM::FK_LAST)
-    return ARM::NeonSupportLevel::None;
-  return FPUNames[FPUKind].NeonSupport;
-}
-
-ARM::FPURestriction ARM::getFPURestriction(unsigned FPUKind) {
-  if (FPUKind >= ARM::FK_LAST)
-    return ARM::FPURestriction::None;
-  return FPUNames[FPUKind].Restriction;
-}
-
-unsigned llvm::ARM::getDefaultFPU(StringRef CPU, ArchKind AK) {
-  if (CPU == "generic")
-    return ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
-
-  return StringSwitch<unsigned>(CPU)
-#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-    .Case(NAME, DEFAULT_FPU)
-#include "llvm/Support/ARMTargetParser.def"
-    .Default(ARM::FK_INVALID);
-}
-
-unsigned llvm::ARM::getDefaultExtensions(StringRef CPU, ArchKind AK) {
-  if (CPU == "generic")
-    return ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
-
-  return StringSwitch<unsigned>(CPU)
-#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-    .Case(NAME, ARCHNames[static_cast<unsigned>(ARM::ArchKind::ID)]\
-            .ArchBaseExtensions | DEFAULT_EXT)
-#include "llvm/Support/ARMTargetParser.def"
-    .Default(ARM::AEK_INVALID);
-}
-
-bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
-                                 std::vector<StringRef> &Features) {
-
-  if (HWDivKind == ARM::AEK_INVALID)
-    return false;
-
-  if (HWDivKind & ARM::AEK_HWDIVARM)
-    Features.push_back("+hwdiv-arm");
-  else
-    Features.push_back("-hwdiv-arm");
-
-  if (HWDivKind & ARM::AEK_HWDIVTHUMB)
-    Features.push_back("+hwdiv");
-  else
-    Features.push_back("-hwdiv");
-
-  return true;
-}
-
-bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
-                                     std::vector<StringRef> &Features) {
-
-  if (Extensions == ARM::AEK_INVALID)
-    return false;
-
-  if (Extensions & ARM::AEK_CRC)
-    Features.push_back("+crc");
-  else
-    Features.push_back("-crc");
-
-  if (Extensions & ARM::AEK_DSP)
-    Features.push_back("+dsp");
-  else
-    Features.push_back("-dsp");
-
-  if (Extensions & ARM::AEK_RAS)
-    Features.push_back("+ras");
-  else
-    Features.push_back("-ras");
-
-  if (Extensions & ARM::AEK_DOTPROD)
-    Features.push_back("+dotprod");
-  else
-    Features.push_back("-dotprod");
-
-  return getHWDivFeatures(Extensions, Features);
-}
-
-bool llvm::ARM::getFPUFeatures(unsigned FPUKind,
-                               std::vector<StringRef> &Features) {
-
-  if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID)
-    return false;
-
-  // fp-only-sp and d16 subtarget features are independent of each other, so we
-  // must enable/disable both.
-  switch (FPUNames[FPUKind].Restriction) {
-  case ARM::FPURestriction::SP_D16:
-    Features.push_back("+fp-only-sp");
-    Features.push_back("+d16");
-    break;
-  case ARM::FPURestriction::D16:
-    Features.push_back("-fp-only-sp");
-    Features.push_back("+d16");
-    break;
-  case ARM::FPURestriction::None:
-    Features.push_back("-fp-only-sp");
-    Features.push_back("-d16");
-    break;
-  }
-
-  // FPU version subtarget features are inclusive of lower-numbered ones, so
-  // enable the one corresponding to this version and disable all that are
-  // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
-  // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
-  switch (FPUNames[FPUKind].FPUVersion) {
-  case ARM::FPUVersion::VFPV5:
-    Features.push_back("+fp-armv8");
-    break;
-  case ARM::FPUVersion::VFPV4:
-    Features.push_back("+vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case ARM::FPUVersion::VFPV3_FP16:
-    Features.push_back("+vfp3");
-    Features.push_back("+fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case ARM::FPUVersion::VFPV3:
-    Features.push_back("+vfp3");
-    Features.push_back("-fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case ARM::FPUVersion::VFPV2:
-    Features.push_back("+vfp2");
-    Features.push_back("-vfp3");
-    Features.push_back("-fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case ARM::FPUVersion::NONE:
-    Features.push_back("-vfp2");
-    Features.push_back("-vfp3");
-    Features.push_back("-fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  }
-
-  // crypto includes neon, so we handle this similarly to FPU version.
-  switch (FPUNames[FPUKind].NeonSupport) {
-  case ARM::NeonSupportLevel::Crypto:
-    Features.push_back("+neon");
-    Features.push_back("+crypto");
-    break;
-  case ARM::NeonSupportLevel::Neon:
-    Features.push_back("+neon");
-    Features.push_back("-crypto");
-    break;
-  case ARM::NeonSupportLevel::None:
-    Features.push_back("-neon");
-    Features.push_back("-crypto");
-    break;
-  }
+const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
+  GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
 
-  return true;
-}
-
-StringRef llvm::ARM::getArchName(ArchKind AK) {
-  return ARCHNames[static_cast<unsigned>(AK)].getName();
-}
+  auto I = std::lower_bound(Table.begin(), Table.end(), Search,
+    [](const GPUInfo &A, const GPUInfo &B) {
+      return A.Kind < B.Kind;
+    });
 
-StringRef llvm::ARM::getCPUAttr(ArchKind AK) {
-  return ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
+  if (I == Table.end())
+    return nullptr;
+  return I;
 }
 
-StringRef llvm::ARM::getSubArch(ArchKind AK) {
-  return ARCHNames[static_cast<unsigned>(AK)].getSubArch();
-}
-
-unsigned llvm::ARM::getArchAttr(ArchKind AK) {
-  return ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
-}
-
-StringRef llvm::ARM::getArchExtName(unsigned ArchExtKind) {
-  for (const auto AE : ARCHExtNames) {
-    if (ArchExtKind == AE.ID)
-      return AE.getName();
-  }
-  return StringRef();
-}
-
-StringRef llvm::ARM::getArchExtFeature(StringRef ArchExt) {
-  if (ArchExt.startswith("no")) {
-    StringRef ArchExtBase(ArchExt.substr(2));
-    for (const auto AE : ARCHExtNames) {
-      if (AE.NegFeature && ArchExtBase == AE.getName())
-        return StringRef(AE.NegFeature);
-    }
-  }
-  for (const auto AE : ARCHExtNames) {
-    if (AE.Feature && ArchExt == AE.getName())
-      return StringRef(AE.Feature);
-  }
-
-  return StringRef();
-}
-
-StringRef llvm::ARM::getHWDivName(unsigned HWDivKind) {
-  for (const auto D : HWDivNames) {
-    if (HWDivKind == D.ID)
-      return D.getName();
-  }
-  return StringRef();
-}
-
-StringRef llvm::ARM::getDefaultCPU(StringRef Arch) {
-  ArchKind AK = parseArch(Arch);
-  if (AK == ARM::ArchKind::INVALID)
-    return StringRef();
-
-  // Look for multiple AKs to find the default for pair AK+Name.
-  for (const auto CPU : CPUNames) {
-    if (CPU.ArchID == AK && CPU.Default)
-      return CPU.getName();
-  }
-
-  // If we can't find a default then target the architecture instead
-  return "generic";
-}
-
-StringRef llvm::AArch64::getFPUName(unsigned FPUKind) {
-  return ARM::getFPUName(FPUKind);
-}
-
-ARM::FPUVersion AArch64::getFPUVersion(unsigned FPUKind) {
-  return ARM::getFPUVersion(FPUKind);
-}
-
-ARM::NeonSupportLevel AArch64::getFPUNeonSupportLevel(unsigned FPUKind) {
-  return ARM::getFPUNeonSupportLevel( FPUKind);
-}
-
-ARM::FPURestriction AArch64::getFPURestriction(unsigned FPUKind) {
-  return ARM::getFPURestriction(FPUKind);
-}
-
-unsigned llvm::AArch64::getDefaultFPU(StringRef CPU, ArchKind AK) {
-  if (CPU == "generic")
-    return AArch64ARCHNames[static_cast<unsigned>(AK)].DefaultFPU;
-
-  return StringSwitch<unsigned>(CPU)
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-    .Case(NAME, DEFAULT_FPU)
-#include "llvm/Support/AArch64TargetParser.def"
-    .Default(ARM::FK_INVALID);
-}
-
-unsigned llvm::AArch64::getDefaultExtensions(StringRef CPU, ArchKind AK) {
-  if (CPU == "generic")
-    return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchBaseExtensions;
-
-  return StringSwitch<unsigned>(CPU)
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)       \
-  .Case(NAME,                                                                  \
-        AArch64ARCHNames[static_cast<unsigned>(AArch64::ArchKind::ID)] \
-            .ArchBaseExtensions | \
-            DEFAULT_EXT)
-#include "llvm/Support/AArch64TargetParser.def"
-    .Default(AArch64::AEK_INVALID);
-}
-
-AArch64::ArchKind llvm::AArch64::getCPUArchKind(StringRef CPU) {
-  if (CPU == "generic")
-    return AArch64::ArchKind::ARMV8A;
-
-  return StringSwitch<AArch64::ArchKind>(CPU)
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-  .Case(NAME, AArch64::ArchKind:: ID)
-#include "llvm/Support/AArch64TargetParser.def"
-    .Default(AArch64::ArchKind::INVALID);
-}
-
-bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
-                                     std::vector<StringRef> &Features) {
-
-  if (Extensions == AArch64::AEK_INVALID)
-    return false;
-
-  if (Extensions & AArch64::AEK_FP)
-    Features.push_back("+fp-armv8");
-  if (Extensions & AArch64::AEK_SIMD)
-    Features.push_back("+neon");
-  if (Extensions & AArch64::AEK_CRC)
-    Features.push_back("+crc");
-  if (Extensions & AArch64::AEK_CRYPTO)
-    Features.push_back("+crypto");
-  if (Extensions & AArch64::AEK_DOTPROD)
-    Features.push_back("+dotprod");
-  if (Extensions & AArch64::AEK_FP16)
-    Features.push_back("+fullfp16");
-  if (Extensions & AArch64::AEK_PROFILE)
-    Features.push_back("+spe");
-  if (Extensions & AArch64::AEK_RAS)
-    Features.push_back("+ras");
-  if (Extensions & AArch64::AEK_LSE)
-    Features.push_back("+lse");
-  if (Extensions & AArch64::AEK_RDM)
-    Features.push_back("+rdm");
-  if (Extensions & AArch64::AEK_SVE)
-    Features.push_back("+sve");
-  if (Extensions & AArch64::AEK_RCPC)
-    Features.push_back("+rcpc");
-
-  return true;
-}
-
-bool llvm::AArch64::getFPUFeatures(unsigned FPUKind,
-                               std::vector<StringRef> &Features) {
-  return ARM::getFPUFeatures(FPUKind, Features);
-}
-
-bool llvm::AArch64::getArchFeatures(AArch64::ArchKind AK,
-                                    std::vector<StringRef> &Features) {
-  if (AK == AArch64::ArchKind::ARMV8_1A)
-    Features.push_back("+v8.1a");
-  if (AK == AArch64::ArchKind::ARMV8_2A)
-    Features.push_back("+v8.2a");
-  if (AK == AArch64::ArchKind::ARMV8_3A)
-    Features.push_back("+v8.3a");
-  if (AK == AArch64::ArchKind::ARMV8_4A)
-    Features.push_back("+v8.4a");
-
-  return AK != AArch64::ArchKind::INVALID;
-}
-
-StringRef llvm::AArch64::getArchName(ArchKind AK) {
-  return AArch64ARCHNames[static_cast<unsigned>(AK)].getName();
-}
-
-StringRef llvm::AArch64::getCPUAttr(ArchKind AK) {
-  return AArch64ARCHNames[static_cast<unsigned>(AK)].getCPUAttr();
-}
-
-StringRef llvm::AArch64::getSubArch(ArchKind AK) {
-  return AArch64ARCHNames[static_cast<unsigned>(AK)].getSubArch();
-}
-
-unsigned llvm::AArch64::getArchAttr(ArchKind AK) {
-  return AArch64ARCHNames[static_cast<unsigned>(AK)].ArchAttr;
-}
-
-StringRef llvm::AArch64::getArchExtName(unsigned ArchExtKind) {
-  for (const auto &AE : AArch64ARCHExtNames)
-    if (ArchExtKind == AE.ID)
-      return AE.getName();
-  return StringRef();
-}
-
-StringRef llvm::AArch64::getArchExtFeature(StringRef ArchExt) {
-  if (ArchExt.startswith("no")) {
-    StringRef ArchExtBase(ArchExt.substr(2));
-    for (const auto &AE : AArch64ARCHExtNames) {
-      if (AE.NegFeature && ArchExtBase == AE.getName())
-        return StringRef(AE.NegFeature);
-    }
-  }
-
-  for (const auto &AE : AArch64ARCHExtNames)
-    if (AE.Feature && ArchExt == AE.getName())
-      return StringRef(AE.Feature);
-  return StringRef();
-}
-
-StringRef llvm::AArch64::getDefaultCPU(StringRef Arch) {
-  AArch64::ArchKind AK = parseArch(Arch);
-  if (AK == ArchKind::INVALID)
-    return StringRef();
-
-  // Look for multiple AKs to find the default for pair AK+Name.
-  for (const auto &CPU : AArch64CPUNames)
-    if (CPU.ArchID == AK && CPU.Default)
-      return CPU.getName();
-
-  // If we can't find a default then target the architecture instead
-  return "generic";
-}
-
-unsigned llvm::AArch64::checkArchVersion(StringRef Arch) {
-  if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1]))
-    return (Arch[1] - 48);
-  return 0;
-}
-
-// ======================================================= //
-// Parsers
-// ======================================================= //
-
-static StringRef getHWDivSynonym(StringRef HWDiv) {
-  return StringSwitch<StringRef>(HWDiv)
-      .Case("thumb,arm", "arm,thumb")
-      .Default(HWDiv);
-}
-
-static StringRef getFPUSynonym(StringRef FPU) {
-  return StringSwitch<StringRef>(FPU)
-      .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
-      .Case("vfp2", "vfpv2")
-      .Case("vfp3", "vfpv3")
-      .Case("vfp4", "vfpv4")
-      .Case("vfp3-d16", "vfpv3-d16")
-      .Case("vfp4-d16", "vfpv4-d16")
-      .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
-      .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
-      .Case("fp5-sp-d16", "fpv5-sp-d16")
-      .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
-      // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
-      .Case("neon-vfpv3", "neon")
-      .Default(FPU);
-}
-
-static StringRef getArchSynonym(StringRef Arch) {
-  return StringSwitch<StringRef>(Arch)
-      .Case("v5", "v5t")
-      .Case("v5e", "v5te")
-      .Case("v6j", "v6")
-      .Case("v6hl", "v6k")
-      .Cases("v6m", "v6sm", "v6s-m", "v6-m")
-      .Cases("v6z", "v6zk", "v6kz")
-      .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
-      .Case("v7r", "v7-r")
-      .Case("v7m", "v7-m")
-      .Case("v7em", "v7e-m")
-      .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
-      .Case("v8.1a", "v8.1-a")
-      .Case("v8.2a", "v8.2-a")
-      .Case("v8.3a", "v8.3-a")
-      .Case("v8.4a", "v8.4-a")
-      .Case("v8r", "v8-r")
-      .Case("v8m.base", "v8-m.base")
-      .Case("v8m.main", "v8-m.main")
-      .Default(Arch);
-}
-
-// MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
-// (iwmmxt|xscale)(eb)? is also permitted. If the former, return
-// "v.+", if the latter, return unmodified string, minus 'eb'.
-// If invalid, return empty string.
-StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) {
-  size_t offset = StringRef::npos;
-  StringRef A = Arch;
-  StringRef Error = "";
-
-  // Begins with "arm" / "thumb", move past it.
-  if (A.startswith("arm64"))
-    offset = 5;
-  else if (A.startswith("arm"))
-    offset = 3;
-  else if (A.startswith("thumb"))
-    offset = 5;
-  else if (A.startswith("aarch64")) {
-    offset = 7;
-    // AArch64 uses "_be", not "eb" suffix.
-    if (A.find("eb") != StringRef::npos)
-      return Error;
-    if (A.substr(offset, 3) == "_be")
-      offset += 3;
-  }
-
-  // Ex. "armebv7", move past the "eb".
-  if (offset != StringRef::npos && A.substr(offset, 2) == "eb")
-    offset += 2;
-  // Or, if it ends with eb ("armv7eb"), chop it off.
-  else if (A.endswith("eb"))
-    A = A.substr(0, A.size() - 2);
-  // Trim the head
-  if (offset != StringRef::npos)
-    A = A.substr(offset);
-
-  // Empty string means offset reached the end, which means it's valid.
-  if (A.empty())
-    return Arch;
-
-  // Only match non-marketing names
-  if (offset != StringRef::npos) {
-    // Must start with 'vN'.
-    if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
-      return Error;
-    // Can't have an extra 'eb'.
-    if (A.find("eb") != StringRef::npos)
-      return Error;
-  }
-
-  // Arch will either be a 'v' name (v7a) or a marketing name (xscale).
-  return A;
-}
-
-unsigned llvm::ARM::parseHWDiv(StringRef HWDiv) {
-  StringRef Syn = getHWDivSynonym(HWDiv);
-  for (const auto D : HWDivNames) {
-    if (Syn == D.getName())
-      return D.ID;
-  }
-  return ARM::AEK_INVALID;
-}
-
-unsigned llvm::ARM::parseFPU(StringRef FPU) {
-  StringRef Syn = getFPUSynonym(FPU);
-  for (const auto F : FPUNames) {
-    if (Syn == F.getName())
-      return F.ID;
-  }
-  return ARM::FK_INVALID;
-}
-
-// Allows partial match, ex. "v7a" matches "armv7a".
-ARM::ArchKind ARM::parseArch(StringRef Arch) {
-  Arch = getCanonicalArchName(Arch);
-  StringRef Syn = getArchSynonym(Arch);
-  for (const auto A : ARCHNames) {
-    if (A.getName().endswith(Syn))
-      return A.ID;
-  }
-  return ARM::ArchKind::INVALID;
-}
+} // namespace
 
-unsigned llvm::ARM::parseArchExt(StringRef ArchExt) {
-  for (const auto A : ARCHExtNames) {
-    if (ArchExt == A.getName())
-      return A.ID;
-  }
-  return ARM::AEK_INVALID;
+StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
+  if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
+    return Entry->CanonicalName;
+  return "";
 }
 
-ARM::ArchKind llvm::ARM::parseCPUArch(StringRef CPU) {
-  for (const auto C : CPUNames) {
-    if (CPU == C.getName())
-      return C.ArchID;
-  }
-  return ARM::ArchKind::INVALID;
+StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
+  if (const auto *Entry = getArchEntry(AK, R600GPUs))
+    return Entry->CanonicalName;
+  return "";
 }
 
-void llvm::ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
-  for (const CpuNames<ARM::ArchKind> &Arch : CPUNames) {
-    if (Arch.ArchID != ARM::ArchKind::INVALID)
-      Values.push_back(Arch.getName());
+AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
+  for (const auto C : AMDGCNGPUs) {
+    if (CPU == C.Name)
+      return C.Kind;
   }
-}
 
-void llvm::AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
-  for (const CpuNames<AArch64::ArchKind> &Arch : AArch64CPUNames) {
-    if (Arch.ArchID != AArch64::ArchKind::INVALID)
-      Values.push_back(Arch.getName());
-  }
-}
-
-// ARM, Thumb, AArch64
-ARM::ISAKind ARM::parseArchISA(StringRef Arch) {
-  return StringSwitch<ARM::ISAKind>(Arch)
-      .StartsWith("aarch64", ARM::ISAKind::AARCH64)
-      .StartsWith("arm64", ARM::ISAKind::AARCH64)
-      .StartsWith("thumb", ARM::ISAKind::THUMB)
-      .StartsWith("arm", ARM::ISAKind::ARM)
-      .Default(ARM::ISAKind::INVALID);
+  return AMDGPU::GPUKind::GK_NONE;
 }
 
-// Little/Big endian
-ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
-  if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
-      Arch.startswith("aarch64_be"))
-    return ARM::EndianKind::BIG;
-
-  if (Arch.startswith("arm") || Arch.startswith("thumb")) {
-    if (Arch.endswith("eb"))
-      return ARM::EndianKind::BIG;
-    else
-      return ARM::EndianKind::LITTLE;
+AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
+  for (const auto C : R600GPUs) {
+    if (CPU == C.Name)
+      return C.Kind;
   }
 
-  if (Arch.startswith("aarch64"))
-    return ARM::EndianKind::LITTLE;
-
-  return ARM::EndianKind::INVALID;
+  return AMDGPU::GPUKind::GK_NONE;
 }
 
-// Profile A/R/M
-ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
-  Arch = getCanonicalArchName(Arch);
-  switch (parseArch(Arch)) {
-  case ARM::ArchKind::ARMV6M:
-  case ARM::ArchKind::ARMV7M:
-  case ARM::ArchKind::ARMV7EM:
-  case ARM::ArchKind::ARMV8MMainline:
-  case ARM::ArchKind::ARMV8MBaseline:
-    return ARM::ProfileKind::M;
-  case ARM::ArchKind::ARMV7R:
-  case ARM::ArchKind::ARMV8R:
-    return ARM::ProfileKind::R;
-  case ARM::ArchKind::ARMV7A:
-  case ARM::ArchKind::ARMV7VE:
-  case ARM::ArchKind::ARMV7K:
-  case ARM::ArchKind::ARMV8A:
-  case ARM::ArchKind::ARMV8_1A:
-  case ARM::ArchKind::ARMV8_2A:
-  case ARM::ArchKind::ARMV8_3A:
-  case ARM::ArchKind::ARMV8_4A:
-    return ARM::ProfileKind::A;
-  case ARM::ArchKind::ARMV2:
-  case ARM::ArchKind::ARMV2A:
-  case ARM::ArchKind::ARMV3:
-  case ARM::ArchKind::ARMV3M:
-  case ARM::ArchKind::ARMV4:
-  case ARM::ArchKind::ARMV4T:
-  case ARM::ArchKind::ARMV5T:
-  case ARM::ArchKind::ARMV5TE:
-  case ARM::ArchKind::ARMV5TEJ:
-  case ARM::ArchKind::ARMV6:
-  case ARM::ArchKind::ARMV6K:
-  case ARM::ArchKind::ARMV6T2:
-  case ARM::ArchKind::ARMV6KZ:
-  case ARM::ArchKind::ARMV7S:
-  case ARM::ArchKind::IWMMXT:
-  case ARM::ArchKind::IWMMXT2:
-  case ARM::ArchKind::XSCALE:
-  case ARM::ArchKind::INVALID:
-    return ARM::ProfileKind::INVALID;
-  }
-  llvm_unreachable("Unhandled architecture");
+unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
+  if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
+    return Entry->Features;
+  return FEATURE_NONE;
 }
 
-// Version number (ex. v7 = 7).
-unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
-  Arch = getCanonicalArchName(Arch);
-  switch (parseArch(Arch)) {
-  case ARM::ArchKind::ARMV2:
-  case ARM::ArchKind::ARMV2A:
-    return 2;
-  case ARM::ArchKind::ARMV3:
-  case ARM::ArchKind::ARMV3M:
-    return 3;
-  case ARM::ArchKind::ARMV4:
-  case ARM::ArchKind::ARMV4T:
-    return 4;
-  case ARM::ArchKind::ARMV5T:
-  case ARM::ArchKind::ARMV5TE:
-  case ARM::ArchKind::IWMMXT:
-  case ARM::ArchKind::IWMMXT2:
-  case ARM::ArchKind::XSCALE:
-  case ARM::ArchKind::ARMV5TEJ:
-    return 5;
-  case ARM::ArchKind::ARMV6:
-  case ARM::ArchKind::ARMV6K:
-  case ARM::ArchKind::ARMV6T2:
-  case ARM::ArchKind::ARMV6KZ:
-  case ARM::ArchKind::ARMV6M:
-    return 6;
-  case ARM::ArchKind::ARMV7A:
-  case ARM::ArchKind::ARMV7VE:
-  case ARM::ArchKind::ARMV7R:
-  case ARM::ArchKind::ARMV7M:
-  case ARM::ArchKind::ARMV7S:
-  case ARM::ArchKind::ARMV7EM:
-  case ARM::ArchKind::ARMV7K:
-    return 7;
-  case ARM::ArchKind::ARMV8A:
-  case ARM::ArchKind::ARMV8_1A:
-  case ARM::ArchKind::ARMV8_2A:
-  case ARM::ArchKind::ARMV8_3A:
-  case ARM::ArchKind::ARMV8_4A:
-  case ARM::ArchKind::ARMV8R:
-  case ARM::ArchKind::ARMV8MBaseline:
-  case ARM::ArchKind::ARMV8MMainline:
-    return 8;
-  case ARM::ArchKind::INVALID:
-    return 0;
-  }
-  llvm_unreachable("Unhandled architecture");
+unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
+  if (const auto *Entry = getArchEntry(AK, R600GPUs))
+    return Entry->Features;
+  return FEATURE_NONE;
 }
 
-StringRef llvm::ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
-  StringRef ArchName =
-      CPU.empty() ? TT.getArchName() : ARM::getArchName(ARM::parseCPUArch(CPU));
-
-  if (TT.isOSBinFormatMachO()) {
-    if (TT.getEnvironment() == Triple::EABI ||
-        TT.getOS() == Triple::UnknownOS ||
-        llvm::ARM::parseArchProfile(ArchName) == ARM::ProfileKind::M)
-      return "aapcs";
-    if (TT.isWatchABI())
-      return "aapcs16";
-    return "apcs-gnu";
-  } else if (TT.isOSWindows())
-    // FIXME: this is invalid for WindowsCE.
-    return "aapcs";
-
-  // Select the default based on the platform.
-  switch (TT.getEnvironment()) {
-  case Triple::Android:
-  case Triple::GNUEABI:
-  case Triple::GNUEABIHF:
-  case Triple::MuslEABI:
-  case Triple::MuslEABIHF:
-    return "aapcs-linux";
-  case Triple::EABIHF:
-  case Triple::EABI:
-    return "aapcs";
-  default:
-    if (TT.isOSNetBSD())
-      return "apcs-gnu";
-    if (TT.isOSOpenBSD())
-      return "aapcs-linux";
-    return "aapcs";
-  }
+void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
+  // XXX: Should this only report unique canonical names?
+  for (const auto C : AMDGCNGPUs)
+    Values.push_back(C.Name);
 }
 
-StringRef llvm::AArch64::getCanonicalArchName(StringRef Arch) {
-  return ARM::getCanonicalArchName(Arch);
+void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
+  for (const auto C : R600GPUs)
+    Values.push_back(C.Name);
 }
 
-unsigned llvm::AArch64::parseFPU(StringRef FPU) {
-  return ARM::parseFPU(FPU);
-}
+AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
+  if (GPU == "generic")
+    return {7, 0, 0};
 
-// Allows partial match, ex. "v8a" matches "armv8a".
-AArch64::ArchKind AArch64::parseArch(StringRef Arch) {
-  Arch = getCanonicalArchName(Arch);
-  if (checkArchVersion(Arch) < 8)
-    return ArchKind::INVALID;
-
-  StringRef Syn = getArchSynonym(Arch);
-  for (const auto A : AArch64ARCHNames) {
-    if (A.getName().endswith(Syn))
-      return A.ID;
-  }
-  return ArchKind::INVALID;
-}
-
-AArch64::ArchExtKind llvm::AArch64::parseArchExt(StringRef ArchExt) {
-  for (const auto A : AArch64ARCHExtNames) {
-    if (ArchExt == A.getName())
-      return static_cast<ArchExtKind>(A.ID);
-  }
-  return AArch64::AEK_INVALID;
-}
+  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+  if (AK == AMDGPU::GPUKind::GK_NONE)
+    return {0, 0, 0};
 
-AArch64::ArchKind llvm::AArch64::parseCPUArch(StringRef CPU) {
-  for (const auto C : AArch64CPUNames) {
-    if (CPU == C.getName())
-      return C.ArchID;
+  switch (AK) {
+  case GK_GFX600: return {6, 0, 0};
+  case GK_GFX601: return {6, 0, 1};
+  case GK_GFX700: return {7, 0, 0};
+  case GK_GFX701: return {7, 0, 1};
+  case GK_GFX702: return {7, 0, 2};
+  case GK_GFX703: return {7, 0, 3};
+  case GK_GFX704: return {7, 0, 4};
+  case GK_GFX801: return {8, 0, 1};
+  case GK_GFX802: return {8, 0, 2};
+  case GK_GFX803: return {8, 0, 3};
+  case GK_GFX810: return {8, 1, 0};
+  case GK_GFX900: return {9, 0, 0};
+  case GK_GFX902: return {9, 0, 2};
+  case GK_GFX904: return {9, 0, 4};
+  case GK_GFX906: return {9, 0, 6};
+  case GK_GFX909: return {9, 0, 9};
+  default:        return {0, 0, 0};
   }
-  return ArchKind::INVALID;
-}
-
-// ARM, Thumb, AArch64
-ARM::ISAKind AArch64::parseArchISA(StringRef Arch) {
-  return ARM::parseArchISA(Arch);
-}
-
-// Little/Big endian
-ARM::EndianKind AArch64::parseArchEndian(StringRef Arch) {
-  return ARM::parseArchEndian(Arch);
-}
-
-// Profile A/R/M
-ARM::ProfileKind AArch64::parseArchProfile(StringRef Arch) {
-  return ARM::parseArchProfile(Arch);
-}
-
-// Version number (ex. v8 = 8).
-unsigned llvm::AArch64::parseArchVersion(StringRef Arch) {
-  return ARM::parseArchVersion(Arch);
-}
-
-bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) {
-  return TT.isOSDarwin() || TT.isOSFuchsia() || TT.isOSWindows();
 }
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
index c5eba5714766..bb63891cd713 100644
--- a/contrib/llvm/lib/Support/TargetRegistry.cpp
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -72,7 +72,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT,
   auto I = find_if(targets(), ArchMatch);
 
   if (I == targets().end()) {
-    Error = "No available targets are compatible with this triple.";
+    Error = "No available targets are compatible with triple \"" + TT + "\"";
     return nullptr;
   }
 
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index 61d3b6c6e319..82f5810dd107 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -295,7 +295,7 @@ void TimerGroup::addTimer(Timer &T) {
 
 void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   // Sort the timers in descending order by amount of time taken.
-  llvm::sort(TimersToPrint.begin(), TimersToPrint.end());
+  llvm::sort(TimersToPrint);
 
   TimeRecord Total;
   for (const PrintRecord &Record : TimersToPrint)
@@ -343,8 +343,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
 }
 
 void TimerGroup::prepareToPrintList() {
-  // See if any of our timers were started, if so add them to TimersToPrint and
-  // reset them.
+  // See if any of our timers were started, if so add them to TimersToPrint.
   for (Timer *T = FirstTimer; T; T = T->Next) {
     if (!T->hasTriggered()) continue;
     bool WasRunning = T->isRunning();
@@ -368,6 +367,12 @@ void TimerGroup::print(raw_ostream &OS) {
     PrintQueuedTimers(OS);
 }
 
+void TimerGroup::clear() {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  for (Timer *T = FirstTimer; T; T = T->Next)
+    T->clear();
+}
+
 void TimerGroup::printAll(raw_ostream &OS) {
   sys::SmartScopedLock<true> L(*TimerLock);
 
@@ -375,6 +380,12 @@ void TimerGroup::printAll(raw_ostream &OS) {
     TG->print(OS);
 }
 
+void TimerGroup::clearAll() {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+    TG->clear();
+}
+
 void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R,
                                 const char *suffix, double Value) {
   assert(yaml::needsQuotes(Name) == yaml::QuotingType::None &&
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index b14d6492b1ed..26d9327f6208 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -35,7 +35,6 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
   case mips64:         return "mips64";
   case mips64el:       return "mips64el";
   case msp430:         return "msp430";
-  case nios2:          return "nios2";
   case ppc64:          return "powerpc64";
   case ppc64le:        return "powerpc64le";
   case ppc:            return "powerpc";
@@ -102,8 +101,6 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
   case mips64:
   case mips64el:    return "mips";
 
-  case nios2:       return "nios2";
-
   case hexagon:     return "hexagon";
 
   case amdgcn:      return "amdgcn";
@@ -209,6 +206,9 @@ StringRef Triple::getOSTypeName(OSType Kind) {
   case Mesa3D: return "mesa3d";
   case Contiki: return "contiki";
   case AMDPAL: return "amdpal";
+  case HermitCore: return "hermit";
+  case Hurd: return "hurd";
+  case WASI: return "wasi";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -271,7 +271,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("mips64", mips64)
     .Case("mips64el", mips64el)
     .Case("msp430", msp430)
-    .Case("nios2", nios2)
     .Case("ppc64", ppc64)
     .Case("ppc32", ppc)
     .Case("ppc", ppc)
@@ -398,11 +397,14 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("thumbeb", Triple::thumbeb)
     .Case("avr", Triple::avr)
     .Case("msp430", Triple::msp430)
-    .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
-    .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
-    .Cases("mips64", "mips64eb", Triple::mips64)
-    .Case("mips64el", Triple::mips64el)
-    .Case("nios2", Triple::nios2)
+    .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6",
+           "mipsr6", Triple::mips)
+    .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el",
+           Triple::mipsel)
+    .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6",
+           "mips64r6", "mipsn32r6", Triple::mips64)
+    .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
+           "mipsn32r6el", Triple::mips64el)
     .Case("r600", Triple::r600)
     .Case("amdgcn", Triple::amdgcn)
     .Case("riscv32", Triple::riscv32)
@@ -502,6 +504,9 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("mesa3d", Triple::Mesa3D)
     .StartsWith("contiki", Triple::Contiki)
     .StartsWith("amdpal", Triple::AMDPAL)
+    .StartsWith("hermit", Triple::HermitCore)
+    .StartsWith("hurd", Triple::Hurd)
+    .StartsWith("wasi", Triple::WASI)
     .Default(Triple::UnknownOS);
 }
 
@@ -538,6 +543,10 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
 }
 
 static Triple::SubArchType parseSubArch(StringRef SubArchName) {
+  if (SubArchName.startswith("mips") &&
+      (SubArchName.endswith("r6el") || SubArchName.endswith("r6")))
+    return Triple::MipsSubArch_r6;
+
   StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
 
   // For now, this is the small part. Early return.
@@ -594,6 +603,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
     return Triple::ARMSubArch_v8_3a;
   case ARM::ArchKind::ARMV8_4A:
     return Triple::ARMSubArch_v8_4a;
+  case ARM::ArchKind::ARMV8_5A:
+    return Triple::ARMSubArch_v8_5a;
   case ARM::ArchKind::ARMV8R:
     return Triple::ARMSubArch_v8r;
   case ARM::ArchKind::ARMV8MBaseline:
@@ -651,7 +662,6 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::mips64el:
   case Triple::mipsel:
   case Triple::msp430:
-  case Triple::nios2:
   case Triple::nvptx:
   case Triple::nvptx64:
   case Triple::ppc64le:
@@ -709,6 +719,15 @@ Triple::Triple(const Twine &Str)
           ObjectFormat = parseFormat(Components[3]);
         }
       }
+    } else {
+      Environment =
+          StringSwitch<Triple::EnvironmentType>(Components[0])
+              .StartsWith("mipsn32", Triple::GNUABIN32)
+              .StartsWith("mips64", Triple::GNUABI64)
+              .StartsWith("mipsisa64", Triple::GNUABI64)
+              .StartsWith("mipsisa32", Triple::GNU)
+              .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU)
+              .Default(UnknownEnvironment);
     }
   }
   if (ObjectFormat == UnknownObjectFormat)
@@ -887,6 +906,12 @@ std::string Triple::normalize(StringRef Str) {
     }
   }
 
+  // Replace empty components with "unknown" value.
+  for (unsigned i = 0, e = Components.size(); i < e; ++i) {
+    if (Components[i].empty())
+      Components[i] = "unknown";
+  }
+
   // Special case logic goes here.  At this point Arch, Vendor and OS have the
   // correct values for the computed components.
   std::string NormalizedEnvironment;
@@ -1194,7 +1219,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::le32:
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
-  case llvm::Triple::nios2:
   case llvm::Triple::nvptx:
   case llvm::Triple::ppc:
   case llvm::Triple::r600:
@@ -1279,7 +1303,6 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::le32:
   case Triple::mips:
   case Triple::mipsel:
-  case Triple::nios2:
   case Triple::nvptx:
   case Triple::ppc:
   case Triple::r600:
@@ -1328,7 +1351,6 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::kalimba:
   case Triple::lanai:
   case Triple::msp430:
-  case Triple::nios2:
   case Triple::r600:
   case Triple::tce:
   case Triple::tcele:
@@ -1400,7 +1422,6 @@ Triple Triple::getBigEndianArchVariant() const {
   case Triple::le32:
   case Triple::le64:
   case Triple::msp430:
-  case Triple::nios2:
   case Triple::nvptx64:
   case Triple::nvptx:
   case Triple::r600:
@@ -1487,7 +1508,6 @@ bool Triple::isLittleEndian() const {
   case Triple::mips64el:
   case Triple::mipsel:
   case Triple::msp430:
-  case Triple::nios2:
   case Triple::nvptx64:
   case Triple::nvptx:
   case Triple::ppc64le:
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
index b4279d4fcc0c..d7cc0d627d09 100644
--- a/contrib/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -38,6 +38,8 @@
 #ifdef __APPLE__
 #include <mach-o/dyld.h>
 #include <sys/attr.h>
+#elif defined(__DragonFly__)
+#include <sys/mount.h>
 #endif
 
 // Both stdio.h and cstdio are included via different paths and
@@ -49,11 +51,12 @@
 // For GNU Hurd
 #if defined(__GNU__) && !defined(PATH_MAX)
 # define PATH_MAX 4096
+# define MAXPATHLEN 4096
 #endif
 
 #include <sys/types.h>
 #if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) &&   \
-    !defined(__linux__)
+    !defined(__linux__) && !defined(__FreeBSD_kernel__)
 #include <sys/statvfs.h>
 #define STATVFS statvfs
 #define FSTATVFS fstatvfs
@@ -82,7 +85,7 @@
 #define STATVFS_F_FRSIZE(vfs) static_cast<uint64_t>(vfs.f_bsize)
 #endif
 
-#if defined(__NetBSD__)
+#if defined(__NetBSD__) || defined(__DragonFly__) || defined(__GNU__)
 #define STATVFS_F_FLAG(vfs) (vfs).f_flag
 #else
 #define STATVFS_F_FLAG(vfs) (vfs).f_flags
@@ -98,7 +101,7 @@ const file_t kInvalidFile = -1;
 
 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||     \
     defined(__minix) || defined(__FreeBSD_kernel__) || defined(__linux__) ||   \
-    defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX)
+    defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__)
 static int
 test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
 {
@@ -178,14 +181,34 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
   char exe_path[MAXPATHLEN];
   StringRef aPath("/proc/self/exe");
   if (sys::fs::exists(aPath)) {
-      // /proc is not always mounted under Linux (chroot for example).
-      ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
-      if (len >= 0)
-          return std::string(exe_path, len);
+    // /proc is not always mounted under Linux (chroot for example).
+    ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
+    if (len < 0)
+      return "";
+
+    // Null terminate the string for realpath. readlink never null
+    // terminates its output.
+    len = std::min(len, ssize_t(sizeof(exe_path) - 1));
+    exe_path[len] = '\0';
+
+    // On Linux, /proc/self/exe always looks through symlinks. However, on
+    // GNU/Hurd, /proc/self/exe is a symlink to the path that was used to start
+    // the program, and not the eventual binary file. Therefore, call realpath
+    // so this behaves the same on all platforms.
+#if _POSIX_VERSION >= 200112 || defined(__GLIBC__)
+    char *real_path = realpath(exe_path, NULL);
+    std::string ret = std::string(real_path);
+    free(real_path);
+    return ret;
+#else
+    char real_path[MAXPATHLEN];
+    realpath(exe_path, real_path);
+    return std::string(real_path);
+#endif
   } else {
-      // Fall back to the classical detection.
-      if (getprogpath(exe_path, argv0))
-        return exe_path;
+    // Fall back to the classical detection.
+    if (getprogpath(exe_path, argv0))
+      return exe_path;
   }
 #elif defined(HAVE_DLFCN_H) && defined(HAVE_DLADDR)
   // Use dladdr to get executable path if available.
@@ -206,11 +229,11 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
 }
 
 TimePoint<> basic_file_status::getLastAccessedTime() const {
-  return toTimePoint(fs_st_atime);
+  return toTimePoint(fs_st_atime, fs_st_atime_nsec);
 }
 
 TimePoint<> basic_file_status::getLastModificationTime() const {
-  return toTimePoint(fs_st_mtime);
+  return toTimePoint(fs_st_mtime, fs_st_mtime_nsec);
 }
 
 UniqueID file_status::getUniqueID() const {
@@ -347,7 +370,7 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
 }
 
 static bool is_local_impl(struct STATVFS &Vfs) {
-#if defined(__linux__)
+#if defined(__linux__) || defined(__GNU__)
 #ifndef NFS_SUPER_MAGIC
 #define NFS_SUPER_MAGIC 0x6969
 #endif
@@ -357,7 +380,11 @@ static bool is_local_impl(struct STATVFS &Vfs) {
 #ifndef CIFS_MAGIC_NUMBER
 #define CIFS_MAGIC_NUMBER 0xFF534D42
 #endif
+#ifdef __GNU__
+  switch ((uint32_t)Vfs.__f_type) {
+#else
   switch ((uint32_t)Vfs.f_type) {
+#endif
   case NFS_SUPER_MAGIC:
   case SMB_SUPER_MAGIC:
   case CIFS_MAGIC_NUMBER:
@@ -523,37 +550,62 @@ static void expandTildeExpr(SmallVectorImpl<char> &Path) {
   llvm::sys::path::append(Path, Storage);
 }
 
+
+void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) {
+  dest.clear();
+  if (path.isTriviallyEmpty())
+    return;
+
+  path.toVector(dest);
+  expandTildeExpr(dest);
+
+  return;
+}
+
+static file_type typeForMode(mode_t Mode) {
+  if (S_ISDIR(Mode))
+    return file_type::directory_file;
+  else if (S_ISREG(Mode))
+    return file_type::regular_file;
+  else if (S_ISBLK(Mode))
+    return file_type::block_file;
+  else if (S_ISCHR(Mode))
+    return file_type::character_file;
+  else if (S_ISFIFO(Mode))
+    return file_type::fifo_file;
+  else if (S_ISSOCK(Mode))
+    return file_type::socket_file;
+  else if (S_ISLNK(Mode))
+    return file_type::symlink_file;
+  return file_type::type_unknown;
+}
+
 static std::error_code fillStatus(int StatRet, const struct stat &Status,
                                   file_status &Result) {
   if (StatRet != 0) {
-    std::error_code ec(errno, std::generic_category());
-    if (ec == errc::no_such_file_or_directory)
+    std::error_code EC(errno, std::generic_category());
+    if (EC == errc::no_such_file_or_directory)
       Result = file_status(file_type::file_not_found);
     else
       Result = file_status(file_type::status_error);
-    return ec;
+    return EC;
   }
 
-  file_type Type = file_type::type_unknown;
-
-  if (S_ISDIR(Status.st_mode))
-    Type = file_type::directory_file;
-  else if (S_ISREG(Status.st_mode))
-    Type = file_type::regular_file;
-  else if (S_ISBLK(Status.st_mode))
-    Type = file_type::block_file;
-  else if (S_ISCHR(Status.st_mode))
-    Type = file_type::character_file;
-  else if (S_ISFIFO(Status.st_mode))
-    Type = file_type::fifo_file;
-  else if (S_ISSOCK(Status.st_mode))
-    Type = file_type::socket_file;
-  else if (S_ISLNK(Status.st_mode))
-    Type = file_type::symlink_file;
+  uint32_t atime_nsec, mtime_nsec;
+#if defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC)
+  atime_nsec = Status.st_atimespec.tv_nsec;
+  mtime_nsec = Status.st_mtimespec.tv_nsec;
+#elif defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC)
+  atime_nsec = Status.st_atim.tv_nsec;
+  mtime_nsec = Status.st_mtim.tv_nsec;
+#else
+  atime_nsec = mtime_nsec = 0;
+#endif
 
   perms Perms = static_cast<perms>(Status.st_mode) & all_perms;
-  Result = file_status(Type, Perms, Status.st_dev, Status.st_nlink,
-                       Status.st_ino, Status.st_atime, Status.st_mtime,
+  Result = file_status(typeForMode(Status.st_mode), Perms, Status.st_dev,
+                       Status.st_nlink, Status.st_ino,
+                       Status.st_atime, atime_nsec, Status.st_mtime, mtime_nsec,
                        Status.st_uid, Status.st_gid, Status.st_size);
 
   return std::error_code();
@@ -583,17 +635,22 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
   return std::error_code();
 }
 
-std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
+std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
+                                                 TimePoint<> ModificationTime) {
 #if defined(HAVE_FUTIMENS)
   timespec Times[2];
-  Times[0] = Times[1] = sys::toTimeSpec(Time);
+  Times[0] = sys::toTimeSpec(AccessTime);
+  Times[1] = sys::toTimeSpec(ModificationTime);
   if (::futimens(FD, Times))
     return std::error_code(errno, std::generic_category());
   return std::error_code();
 #elif defined(HAVE_FUTIMES)
   timeval Times[2];
-  Times[0] = Times[1] = sys::toTimeVal(
-      std::chrono::time_point_cast<std::chrono::microseconds>(Time));
+  Times[0] = sys::toTimeVal(
+      std::chrono::time_point_cast<std::chrono::microseconds>(AccessTime));
+  Times[1] =
+      sys::toTimeVal(std::chrono::time_point_cast<std::chrono::microseconds>(
+          ModificationTime));
   if (::futimes(FD, Times))
     return std::error_code(errno, std::generic_category());
   return std::error_code();
@@ -691,19 +748,30 @@ std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
   return std::error_code();
 }
 
-std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+static file_type direntType(dirent* Entry) {
+  // Most platforms provide the file type in the dirent: Linux/BSD/Mac.
+  // The DTTOIF macro lets us reuse our status -> type conversion.
+#if defined(_DIRENT_HAVE_D_TYPE) && defined(DTTOIF)
+  return typeForMode(DTTOIF(Entry->d_type));
+#else
+  // Other platforms such as Solaris require a stat() to get the type.
+  return file_type::type_unknown;
+#endif
+}
+
+std::error_code detail::directory_iterator_increment(detail::DirIterState &It) {
   errno = 0;
-  dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
-  if (cur_dir == nullptr && errno != 0) {
+  dirent *CurDir = ::readdir(reinterpret_cast<DIR *>(It.IterationHandle));
+  if (CurDir == nullptr && errno != 0) {
     return std::error_code(errno, std::generic_category());
-  } else if (cur_dir != nullptr) {
-    StringRef name(cur_dir->d_name);
-    if ((name.size() == 1 && name[0] == '.') ||
-        (name.size() == 2 && name[0] == '.' && name[1] == '.'))
-      return directory_iterator_increment(it);
-    it.CurrentEntry.replace_filename(name);
+  } else if (CurDir != nullptr) {
+    StringRef Name(CurDir->d_name);
+    if ((Name.size() == 1 && Name[0] == '.') ||
+        (Name.size() == 2 && Name[0] == '.' && Name[1] == '.'))
+      return directory_iterator_increment(It);
+    It.CurrentEntry.replace_filename(Name, direntType(CurDir));
   } else
-    return directory_iterator_destruct(it);
+    return directory_iterator_destruct(It);
 
   return std::error_code();
 }
@@ -952,29 +1020,6 @@ static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) {
   return false;
 }
 
-static bool getUserCacheDir(SmallVectorImpl<char> &Result) {
-  // First try using XDG_CACHE_HOME env variable,
-  // as specified in XDG Base Directory Specification at
-  // http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
-  if (const char *XdgCacheDir = std::getenv("XDG_CACHE_HOME")) {
-    Result.clear();
-    Result.append(XdgCacheDir, XdgCacheDir + strlen(XdgCacheDir));
-    return true;
-  }
-
-  // Try Darwin configuration query
-  if (getDarwinConfDir(false, Result))
-    return true;
-
-  // Use "$HOME/.cache" if $HOME is available
-  if (home_directory(Result)) {
-    append(Result, ".cache");
-    return true;
-  }
-
-  return false;
-}
-
 static const char *getEnvTempDir() {
   // Check whether the temporary directory is specified by an environment
   // variable.
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index de26695d64ea..ad88d5e96906 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -47,6 +47,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <string>
+#include <sysexits.h>
 #ifdef HAVE_BACKTRACE
 # include BACKTRACE_HEADER         // For backtrace().
 #endif
@@ -334,6 +335,10 @@ static RETSIGTYPE SignalHandler(int Sig) {
       if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr))
         return OldInterruptFunction();
 
+      // Send a special return code that drivers can check for, from sysexits.h.
+      if (Sig == SIGPIPE)
+        exit(EX_IOERR);
+
       raise(Sig);   // Execute the default handler.
       return;
    }
diff --git a/contrib/llvm/lib/Support/VirtualFileSystem.cpp b/contrib/llvm/lib/Support/VirtualFileSystem.cpp
new file mode 100644
index 000000000000..f2a8a1bb27af
--- /dev/null
+++ b/contrib/llvm/lib/Support/VirtualFileSystem.cpp
@@ -0,0 +1,2070 @@
+//===- VirtualFileSystem.cpp - Virtual File System Layer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtualFileSystem interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::vfs;
+
+using llvm::sys::fs::file_status;
+using llvm::sys::fs::file_type;
+using llvm::sys::fs::perms;
+using llvm::sys::fs::UniqueID;
+
+Status::Status(const file_status &Status)
+    : UID(Status.getUniqueID()), MTime(Status.getLastModificationTime()),
+      User(Status.getUser()), Group(Status.getGroup()), Size(Status.getSize()),
+      Type(Status.type()), Perms(Status.permissions()) {}
+
+Status::Status(StringRef Name, UniqueID UID, sys::TimePoint<> MTime,
+               uint32_t User, uint32_t Group, uint64_t Size, file_type Type,
+               perms Perms)
+    : Name(Name), UID(UID), MTime(MTime), User(User), Group(Group), Size(Size),
+      Type(Type), Perms(Perms) {}
+
+Status Status::copyWithNewName(const Status &In, StringRef NewName) {
+  return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
+                In.getUser(), In.getGroup(), In.getSize(), In.getType(),
+                In.getPermissions());
+}
+
+Status Status::copyWithNewName(const file_status &In, StringRef NewName) {
+  return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
+                In.getUser(), In.getGroup(), In.getSize(), In.type(),
+                In.permissions());
+}
+
+bool Status::equivalent(const Status &Other) const {
+  assert(isStatusKnown() && Other.isStatusKnown());
+  return getUniqueID() == Other.getUniqueID();
+}
+
+bool Status::isDirectory() const { return Type == file_type::directory_file; }
+
+bool Status::isRegularFile() const { return Type == file_type::regular_file; }
+
+bool Status::isOther() const {
+  return exists() && !isRegularFile() && !isDirectory() && !isSymlink();
+}
+
+bool Status::isSymlink() const { return Type == file_type::symlink_file; }
+
+bool Status::isStatusKnown() const { return Type != file_type::status_error; }
+
+bool Status::exists() const {
+  return isStatusKnown() && Type != file_type::file_not_found;
+}
+
+File::~File() = default;
+
+FileSystem::~FileSystem() = default;
+
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+FileSystem::getBufferForFile(const llvm::Twine &Name, int64_t FileSize,
+                             bool RequiresNullTerminator, bool IsVolatile) {
+  auto F = openFileForRead(Name);
+  if (!F)
+    return F.getError();
+
+  return (*F)->getBuffer(Name, FileSize, RequiresNullTerminator, IsVolatile);
+}
+
+std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+  if (llvm::sys::path::is_absolute(Path))
+    return {};
+
+  auto WorkingDir = getCurrentWorkingDirectory();
+  if (!WorkingDir)
+    return WorkingDir.getError();
+
+  llvm::sys::fs::make_absolute(WorkingDir.get(), Path);
+  return {};
+}
+
+std::error_code FileSystem::getRealPath(const Twine &Path,
+                                        SmallVectorImpl<char> &Output) const {
+  return errc::operation_not_permitted;
+}
+
+std::error_code FileSystem::isLocal(const Twine &Path, bool &Result) {
+  return errc::operation_not_permitted;
+}
+
+bool FileSystem::exists(const Twine &Path) {
+  auto Status = status(Path);
+  return Status && Status->exists();
+}
+
+#ifndef NDEBUG
+static bool isTraversalComponent(StringRef Component) {
+  return Component.equals("..") || Component.equals(".");
+}
+
+static bool pathHasTraversal(StringRef Path) {
+  using namespace llvm::sys;
+
+  for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
+    if (isTraversalComponent(Comp))
+      return true;
+  return false;
+}
+#endif
+
+//===-----------------------------------------------------------------------===/
+// RealFileSystem implementation
+//===-----------------------------------------------------------------------===/
+
+namespace {
+
+/// Wrapper around a raw file descriptor.
+class RealFile : public File {
+  friend class RealFileSystem;
+
+  int FD;
+  Status S;
+  std::string RealName;
+
+  RealFile(int FD, StringRef NewName, StringRef NewRealPathName)
+      : FD(FD), S(NewName, {}, {}, {}, {}, {},
+                  llvm::sys::fs::file_type::status_error, {}),
+        RealName(NewRealPathName.str()) {
+    assert(FD >= 0 && "Invalid or inactive file descriptor");
+  }
+
+public:
+  ~RealFile() override;
+
+  ErrorOr<Status> status() override;
+  ErrorOr<std::string> getName() override;
+  ErrorOr<std::unique_ptr<MemoryBuffer>> getBuffer(const Twine &Name,
+                                                   int64_t FileSize,
+                                                   bool RequiresNullTerminator,
+                                                   bool IsVolatile) override;
+  std::error_code close() override;
+};
+
+} // namespace
+
+RealFile::~RealFile() { close(); }
+
+ErrorOr<Status> RealFile::status() {
+  assert(FD != -1 && "cannot stat closed file");
+  if (!S.isStatusKnown()) {
+    file_status RealStatus;
+    if (std::error_code EC = sys::fs::status(FD, RealStatus))
+      return EC;
+    S = Status::copyWithNewName(RealStatus, S.getName());
+  }
+  return S;
+}
+
+ErrorOr<std::string> RealFile::getName() {
+  return RealName.empty() ? S.getName().str() : RealName;
+}
+
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+RealFile::getBuffer(const Twine &Name, int64_t FileSize,
+                    bool RequiresNullTerminator, bool IsVolatile) {
+  assert(FD != -1 && "cannot get buffer for closed file");
+  return MemoryBuffer::getOpenFile(FD, Name, FileSize, RequiresNullTerminator,
+                                   IsVolatile);
+}
+
+std::error_code RealFile::close() {
+  std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD);
+  FD = -1;
+  return EC;
+}
+
+namespace {
+
+/// The file system according to your operating system.
+class RealFileSystem : public FileSystem {
+public:
+  ErrorOr<Status> status(const Twine &Path) override;
+  ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+  std::error_code isLocal(const Twine &Path, bool &Result) override;
+  std::error_code getRealPath(const Twine &Path,
+                              SmallVectorImpl<char> &Output) const override;
+
+private:
+  mutable std::mutex CWDMutex;
+  mutable std::string CWDCache;
+};
+
+} // namespace
+
+ErrorOr<Status> RealFileSystem::status(const Twine &Path) {
+  sys::fs::file_status RealStatus;
+  if (std::error_code EC = sys::fs::status(Path, RealStatus))
+    return EC;
+  return Status::copyWithNewName(RealStatus, Path.str());
+}
+
+ErrorOr<std::unique_ptr<File>>
+RealFileSystem::openFileForRead(const Twine &Name) {
+  int FD;
+  SmallString<256> RealName;
+  if (std::error_code EC =
+          sys::fs::openFileForRead(Name, FD, sys::fs::OF_None, &RealName))
+    return EC;
+  return std::unique_ptr<File>(new RealFile(FD, Name.str(), RealName.str()));
+}
+
+llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
+  std::lock_guard<std::mutex> Lock(CWDMutex);
+  if (!CWDCache.empty())
+    return CWDCache;
+  SmallString<256> Dir;
+  if (std::error_code EC = llvm::sys::fs::current_path(Dir))
+    return EC;
+  CWDCache = Dir.str();
+  return CWDCache;
+}
+
+std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+  // FIXME: chdir is thread hostile; on the other hand, creating the same
+  // behavior as chdir is complex: chdir resolves the path once, thus
+  // guaranteeing that all subsequent relative path operations work
+  // on the same path the original chdir resulted in. This makes a
+  // difference for example on network filesystems, where symlinks might be
+  // switched during runtime of the tool. Fixing this depends on having a
+  // file system abstraction that allows openat() style interactions.
+  if (auto EC = llvm::sys::fs::set_current_path(Path))
+    return EC;
+
+  // Invalidate cache.
+  std::lock_guard<std::mutex> Lock(CWDMutex);
+  CWDCache.clear();
+  return std::error_code();
+}
+
+std::error_code RealFileSystem::isLocal(const Twine &Path, bool &Result) {
+  return llvm::sys::fs::is_local(Path, Result);
+}
+
+std::error_code
+RealFileSystem::getRealPath(const Twine &Path,
+                            SmallVectorImpl<char> &Output) const {
+  return llvm::sys::fs::real_path(Path, Output);
+}
+
+IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
+  static IntrusiveRefCntPtr<FileSystem> FS = new RealFileSystem();
+  return FS;
+}
+
+namespace {
+
+class RealFSDirIter : public llvm::vfs::detail::DirIterImpl {
+  llvm::sys::fs::directory_iterator Iter;
+
+public:
+  RealFSDirIter(const Twine &Path, std::error_code &EC) : Iter(Path, EC) {
+    if (Iter != llvm::sys::fs::directory_iterator())
+      CurrentEntry = directory_entry(Iter->path(), Iter->type());
+  }
+
+  std::error_code increment() override {
+    std::error_code EC;
+    Iter.increment(EC);
+    CurrentEntry = (Iter == llvm::sys::fs::directory_iterator())
+                       ? directory_entry()
+                       : directory_entry(Iter->path(), Iter->type());
+    return EC;
+  }
+};
+
+} // namespace
+
+directory_iterator RealFileSystem::dir_begin(const Twine &Dir,
+                                             std::error_code &EC) {
+  return directory_iterator(std::make_shared<RealFSDirIter>(Dir, EC));
+}
+
+//===-----------------------------------------------------------------------===/
+// OverlayFileSystem implementation
+//===-----------------------------------------------------------------------===/
+
+OverlayFileSystem::OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> BaseFS) {
+  FSList.push_back(std::move(BaseFS));
+}
+
+void OverlayFileSystem::pushOverlay(IntrusiveRefCntPtr<FileSystem> FS) {
+  FSList.push_back(FS);
+  // Synchronize added file systems by duplicating the working directory from
+  // the first one in the list.
+  FS->setCurrentWorkingDirectory(getCurrentWorkingDirectory().get());
+}
+
+ErrorOr<Status> OverlayFileSystem::status(const Twine &Path) {
+  // FIXME: handle symlinks that cross file systems
+  for (iterator I = overlays_begin(), E = overlays_end(); I != E; ++I) {
+    ErrorOr<Status> Status = (*I)->status(Path);
+    if (Status || Status.getError() != llvm::errc::no_such_file_or_directory)
+      return Status;
+  }
+  return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+ErrorOr<std::unique_ptr<File>>
+OverlayFileSystem::openFileForRead(const llvm::Twine &Path) {
+  // FIXME: handle symlinks that cross file systems
+  for (iterator I = overlays_begin(), E = overlays_end(); I != E; ++I) {
+    auto Result = (*I)->openFileForRead(Path);
+    if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+      return Result;
+  }
+  return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+llvm::ErrorOr<std::string>
+OverlayFileSystem::getCurrentWorkingDirectory() const {
+  // All file systems are synchronized, just take the first working directory.
+  return FSList.front()->getCurrentWorkingDirectory();
+}
+
+std::error_code
+OverlayFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+  for (auto &FS : FSList)
+    if (std::error_code EC = FS->setCurrentWorkingDirectory(Path))
+      return EC;
+  return {};
+}
+
+std::error_code OverlayFileSystem::isLocal(const Twine &Path, bool &Result) {
+  for (auto &FS : FSList)
+    if (FS->exists(Path))
+      return FS->isLocal(Path, Result);
+  return errc::no_such_file_or_directory;
+}
+
+std::error_code
+OverlayFileSystem::getRealPath(const Twine &Path,
+                               SmallVectorImpl<char> &Output) const {
+  for (auto &FS : FSList)
+    if (FS->exists(Path))
+      return FS->getRealPath(Path, Output);
+  return errc::no_such_file_or_directory;
+}
+
+llvm::vfs::detail::DirIterImpl::~DirIterImpl() = default;
+
+namespace {
+
+class OverlayFSDirIterImpl : public llvm::vfs::detail::DirIterImpl {
+  OverlayFileSystem &Overlays;
+  std::string Path;
+  OverlayFileSystem::iterator CurrentFS;
+  directory_iterator CurrentDirIter;
+  llvm::StringSet<> SeenNames;
+
+  std::error_code incrementFS() {
+    assert(CurrentFS != Overlays.overlays_end() && "incrementing past end");
+    ++CurrentFS;
+    for (auto E = Overlays.overlays_end(); CurrentFS != E; ++CurrentFS) {
+      std::error_code EC;
+      CurrentDirIter = (*CurrentFS)->dir_begin(Path, EC);
+      if (EC && EC != errc::no_such_file_or_directory)
+        return EC;
+      if (CurrentDirIter != directory_iterator())
+        break; // found
+    }
+    return {};
+  }
+
+  std::error_code incrementDirIter(bool IsFirstTime) {
+    assert((IsFirstTime || CurrentDirIter != directory_iterator()) &&
+           "incrementing past end");
+    std::error_code EC;
+    if (!IsFirstTime)
+      CurrentDirIter.increment(EC);
+    if (!EC && CurrentDirIter == directory_iterator())
+      EC = incrementFS();
+    return EC;
+  }
+
+  std::error_code incrementImpl(bool IsFirstTime) {
+    while (true) {
+      std::error_code EC = incrementDirIter(IsFirstTime);
+      if (EC || CurrentDirIter == directory_iterator()) {
+        CurrentEntry = directory_entry();
+        return EC;
+      }
+      CurrentEntry = *CurrentDirIter;
+      StringRef Name = llvm::sys::path::filename(CurrentEntry.path());
+      if (SeenNames.insert(Name).second)
+        return EC; // name not seen before
+    }
+    llvm_unreachable("returned above");
+  }
+
+public:
+  OverlayFSDirIterImpl(const Twine &Path, OverlayFileSystem &FS,
+                       std::error_code &EC)
+      : Overlays(FS), Path(Path.str()), CurrentFS(Overlays.overlays_begin()) {
+    CurrentDirIter = (*CurrentFS)->dir_begin(Path, EC);
+    EC = incrementImpl(true);
+  }
+
+  std::error_code increment() override { return incrementImpl(false); }
+};
+
+} // namespace
+
+directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir,
+                                                std::error_code &EC) {
+  return directory_iterator(
+      std::make_shared<OverlayFSDirIterImpl>(Dir, *this, EC));
+}
+
+void ProxyFileSystem::anchor() {}
+
+namespace llvm {
+namespace vfs {
+
+namespace detail {
+
+enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink };
+
+/// The in memory file system is a tree of Nodes. Every node can either be a
+/// file , hardlink or a directory.
+class InMemoryNode {
+  InMemoryNodeKind Kind;
+  std::string FileName;
+
+public:
+  InMemoryNode(llvm::StringRef FileName, InMemoryNodeKind Kind)
+      : Kind(Kind), FileName(llvm::sys::path::filename(FileName)) {}
+  virtual ~InMemoryNode() = default;
+
+  /// Get the filename of this node (the name without the directory part).
+  StringRef getFileName() const { return FileName; }
+  InMemoryNodeKind getKind() const { return Kind; }
+  virtual std::string toString(unsigned Indent) const = 0;
+};
+
+class InMemoryFile : public InMemoryNode {
+  Status Stat;
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+
+public:
+  InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
+      : InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)),
+        Buffer(std::move(Buffer)) {}
+
+  /// Return the \p Status for this node. \p RequestedName should be the name
+  /// through which the caller referred to this node. It will override
+  /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
+  Status getStatus(StringRef RequestedName) const {
+    return Status::copyWithNewName(Stat, RequestedName);
+  }
+  llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }
+
+  std::string toString(unsigned Indent) const override {
+    return (std::string(Indent, ' ') + Stat.getName() + "\n").str();
+  }
+
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_File;
+  }
+};
+
+namespace {
+
+class InMemoryHardLink : public InMemoryNode {
+  const InMemoryFile &ResolvedFile;
+
+public:
+  InMemoryHardLink(StringRef Path, const InMemoryFile &ResolvedFile)
+      : InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {}
+  const InMemoryFile &getResolvedFile() const { return ResolvedFile; }
+
+  std::string toString(unsigned Indent) const override {
+    return std::string(Indent, ' ') + "HardLink to -> " +
+           ResolvedFile.toString(0);
+  }
+
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_HardLink;
+  }
+};
+
+/// Adapt a InMemoryFile for VFS' File interface.  The goal is to make
+/// \p InMemoryFileAdaptor mimic as much as possible the behavior of
+/// \p RealFile.
+class InMemoryFileAdaptor : public File {
+  const InMemoryFile &Node;
+  /// The name to use when returning a Status for this file.
+  std::string RequestedName;
+
+public:
+  explicit InMemoryFileAdaptor(const InMemoryFile &Node,
+                               std::string RequestedName)
+      : Node(Node), RequestedName(std::move(RequestedName)) {}
+
+  llvm::ErrorOr<Status> status() override {
+    return Node.getStatus(RequestedName);
+  }
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    llvm::MemoryBuffer *Buf = Node.getBuffer();
+    return llvm::MemoryBuffer::getMemBuffer(
+        Buf->getBuffer(), Buf->getBufferIdentifier(), RequiresNullTerminator);
+  }
+
+  std::error_code close() override { return {}; }
+};
+} // namespace
+
+class InMemoryDirectory : public InMemoryNode {
+  Status Stat;
+  llvm::StringMap<std::unique_ptr<InMemoryNode>> Entries;
+
+public:
+  InMemoryDirectory(Status Stat)
+      : InMemoryNode(Stat.getName(), IME_Directory), Stat(std::move(Stat)) {}
+
+  /// Return the \p Status for this node. \p RequestedName should be the name
+  /// through which the caller referred to this node. It will override
+  /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
+  Status getStatus(StringRef RequestedName) const {
+    return Status::copyWithNewName(Stat, RequestedName);
+  }
+  InMemoryNode *getChild(StringRef Name) {
+    auto I = Entries.find(Name);
+    if (I != Entries.end())
+      return I->second.get();
+    return nullptr;
+  }
+
+  InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) {
+    return Entries.insert(make_pair(Name, std::move(Child)))
+        .first->second.get();
+  }
+
+  using const_iterator = decltype(Entries)::const_iterator;
+
+  const_iterator begin() const { return Entries.begin(); }
+  const_iterator end() const { return Entries.end(); }
+
+  std::string toString(unsigned Indent) const override {
+    std::string Result =
+        (std::string(Indent, ' ') + Stat.getName() + "\n").str();
+    for (const auto &Entry : Entries)
+      Result += Entry.second->toString(Indent + 2);
+    return Result;
+  }
+
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_Directory;
+  }
+};
+
+namespace {
+Status getNodeStatus(const InMemoryNode *Node, StringRef RequestedName) {
+  if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
+    return Dir->getStatus(RequestedName);
+  if (auto File = dyn_cast<detail::InMemoryFile>(Node))
+    return File->getStatus(RequestedName);
+  if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node))
+    return Link->getResolvedFile().getStatus(RequestedName);
+  llvm_unreachable("Unknown node type");
+}
+} // namespace
+} // namespace detail
+
+InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
+    : Root(new detail::InMemoryDirectory(
+          Status("", getNextVirtualUniqueID(), llvm::sys::TimePoint<>(), 0, 0,
+                 0, llvm::sys::fs::file_type::directory_file,
+                 llvm::sys::fs::perms::all_all))),
+      UseNormalizedPaths(UseNormalizedPaths) {}
+
+InMemoryFileSystem::~InMemoryFileSystem() = default;
+
+std::string InMemoryFileSystem::toString() const {
+  return Root->toString(/*Indent=*/0);
+}
+
+bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
+                                 std::unique_ptr<llvm::MemoryBuffer> Buffer,
+                                 Optional<uint32_t> User,
+                                 Optional<uint32_t> Group,
+                                 Optional<llvm::sys::fs::file_type> Type,
+                                 Optional<llvm::sys::fs::perms> Perms,
+                                 const detail::InMemoryFile *HardLinkTarget) {
+  SmallString<128> Path;
+  P.toVector(Path);
+
+  // Fix up relative paths. This just prepends the current working directory.
+  std::error_code EC = makeAbsolute(Path);
+  assert(!EC);
+  (void)EC;
+
+  if (useNormalizedPaths())
+    llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+  if (Path.empty())
+    return false;
+
+  detail::InMemoryDirectory *Dir = Root.get();
+  auto I = llvm::sys::path::begin(Path), E = sys::path::end(Path);
+  const auto ResolvedUser = User.getValueOr(0);
+  const auto ResolvedGroup = Group.getValueOr(0);
+  const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
+  const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
+  assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer");
+  // Any intermediate directories we create should be accessible by
+  // the owner, even if Perms says otherwise for the final path.
+  const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
+  while (true) {
+    StringRef Name = *I;
+    detail::InMemoryNode *Node = Dir->getChild(Name);
+    ++I;
+    if (!Node) {
+      if (I == E) {
+        // End of the path.
+        std::unique_ptr<detail::InMemoryNode> Child;
+        if (HardLinkTarget)
+          Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
+        else {
+          // Create a new file or directory.
+          Status Stat(P.str(), getNextVirtualUniqueID(),
+                      llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+                      ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+                      ResolvedPerms);
+          if (ResolvedType == sys::fs::file_type::directory_file) {
+            Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
+          } else {
+            Child.reset(
+                new detail::InMemoryFile(std::move(Stat), std::move(Buffer)));
+          }
+        }
+        Dir->addChild(Name, std::move(Child));
+        return true;
+      }
+
+      // Create a new directory. Use the path up to here.
+      Status Stat(
+          StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
+          getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
+          ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
+          NewDirectoryPerms);
+      Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
+          Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
+      continue;
+    }
+
+    if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
+      Dir = NewDir;
+    } else {
+      assert((isa<detail::InMemoryFile>(Node) ||
+              isa<detail::InMemoryHardLink>(Node)) &&
+             "Must be either file, hardlink or directory!");
+
+      // Trying to insert a directory in place of a file.
+      if (I != E)
+        return false;
+
+      // Return false only if the new file is different from the existing one.
+      if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
+        return Link->getResolvedFile().getBuffer()->getBuffer() ==
+               Buffer->getBuffer();
+      }
+      return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
+             Buffer->getBuffer();
+    }
+  }
+}
+
+bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
+                                 std::unique_ptr<llvm::MemoryBuffer> Buffer,
+                                 Optional<uint32_t> User,
+                                 Optional<uint32_t> Group,
+                                 Optional<llvm::sys::fs::file_type> Type,
+                                 Optional<llvm::sys::fs::perms> Perms) {
+  return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type,
+                 Perms, /*HardLinkTarget=*/nullptr);
+}
+
+bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
+                                      llvm::MemoryBuffer *Buffer,
+                                      Optional<uint32_t> User,
+                                      Optional<uint32_t> Group,
+                                      Optional<llvm::sys::fs::file_type> Type,
+                                      Optional<llvm::sys::fs::perms> Perms) {
+  return addFile(P, ModificationTime,
+                 llvm::MemoryBuffer::getMemBuffer(
+                     Buffer->getBuffer(), Buffer->getBufferIdentifier()),
+                 std::move(User), std::move(Group), std::move(Type),
+                 std::move(Perms));
+}
+
+static ErrorOr<const detail::InMemoryNode *>
+lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
+                   const Twine &P) {
+  SmallString<128> Path;
+  P.toVector(Path);
+
+  // Fix up relative paths. This just prepends the current working directory.
+  std::error_code EC = FS.makeAbsolute(Path);
+  assert(!EC);
+  (void)EC;
+
+  if (FS.useNormalizedPaths())
+    llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+  if (Path.empty())
+    return Dir;
+
+  auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
+  while (true) {
+    detail::InMemoryNode *Node = Dir->getChild(*I);
+    ++I;
+    if (!Node)
+      return errc::no_such_file_or_directory;
+
+    // Return the file if it's at the end of the path.
+    if (auto File = dyn_cast<detail::InMemoryFile>(Node)) {
+      if (I == E)
+        return File;
+      return errc::no_such_file_or_directory;
+    }
+
+    // If Node is HardLink then return the resolved file.
+    if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) {
+      if (I == E)
+        return &File->getResolvedFile();
+      return errc::no_such_file_or_directory;
+    }
+    // Traverse directories.
+    Dir = cast<detail::InMemoryDirectory>(Node);
+    if (I == E)
+      return Dir;
+  }
+}
+
+bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
+                                     const Twine &ToPath) {
+  auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath);
+  auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath);
+  // FromPath must not have been added before. ToPath must have been added
+  // before. Resolved ToPath must be a File.
+  if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode))
+    return false;
+  return this->addFile(FromPath, 0, nullptr, None, None, None, None,
+                       cast<detail::InMemoryFile>(*ToNode));
+}
+
+llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
+  auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+  if (Node)
+    return detail::getNodeStatus(*Node, Path.str());
+  return Node.getError();
+}
+
+llvm::ErrorOr<std::unique_ptr<File>>
+InMemoryFileSystem::openFileForRead(const Twine &Path) {
+  auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+  if (!Node)
+    return Node.getError();
+
+  // When we have a file provide a heap-allocated wrapper for the memory buffer
+  // to match the ownership semantics for File.
+  if (auto *F = dyn_cast<detail::InMemoryFile>(*Node))
+    return std::unique_ptr<File>(
+        new detail::InMemoryFileAdaptor(*F, Path.str()));
+
+  // FIXME: errc::not_a_file?
+  return make_error_code(llvm::errc::invalid_argument);
+}
+
+namespace {
+
+/// Adaptor from InMemoryDir::iterator to directory_iterator.
+class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl {
+  detail::InMemoryDirectory::const_iterator I;
+  detail::InMemoryDirectory::const_iterator E;
+  std::string RequestedDirName;
+
+  void setCurrentEntry() {
+    if (I != E) {
+      SmallString<256> Path(RequestedDirName);
+      llvm::sys::path::append(Path, I->second->getFileName());
+      sys::fs::file_type Type;
+      switch (I->second->getKind()) {
+      case detail::IME_File:
+      case detail::IME_HardLink:
+        Type = sys::fs::file_type::regular_file;
+        break;
+      case detail::IME_Directory:
+        Type = sys::fs::file_type::directory_file;
+        break;
+      }
+      CurrentEntry = directory_entry(Path.str(), Type);
+    } else {
+      // When we're at the end, make CurrentEntry invalid and DirIterImpl will
+      // do the rest.
+      CurrentEntry = directory_entry();
+    }
+  }
+
+public:
+  InMemoryDirIterator() = default;
+
+  explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir,
+                               std::string RequestedDirName)
+      : I(Dir.begin()), E(Dir.end()),
+        RequestedDirName(std::move(RequestedDirName)) {
+    setCurrentEntry();
+  }
+
+  std::error_code increment() override {
+    ++I;
+    setCurrentEntry();
+    return {};
+  }
+};
+
+} // namespace
+
+directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
+                                                 std::error_code &EC) {
+  auto Node = lookupInMemoryNode(*this, Root.get(), Dir);
+  if (!Node) {
+    EC = Node.getError();
+    return directory_iterator(std::make_shared<InMemoryDirIterator>());
+  }
+
+  if (auto *DirNode = dyn_cast<detail::InMemoryDirectory>(*Node))
+    return directory_iterator(
+        std::make_shared<InMemoryDirIterator>(*DirNode, Dir.str()));
+
+  EC = make_error_code(llvm::errc::not_a_directory);
+  return directory_iterator(std::make_shared<InMemoryDirIterator>());
+}
+
+std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) {
+  SmallString<128> Path;
+  P.toVector(Path);
+
+  // Fix up relative paths. This just prepends the current working directory.
+  std::error_code EC = makeAbsolute(Path);
+  assert(!EC);
+  (void)EC;
+
+  if (useNormalizedPaths())
+    llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+  if (!Path.empty())
+    WorkingDirectory = Path.str();
+  return {};
+}
+
+std::error_code
+InMemoryFileSystem::getRealPath(const Twine &Path,
+                                SmallVectorImpl<char> &Output) const {
+  auto CWD = getCurrentWorkingDirectory();
+  if (!CWD || CWD->empty())
+    return errc::operation_not_permitted;
+  Path.toVector(Output);
+  if (auto EC = makeAbsolute(Output))
+    return EC;
+  llvm::sys::path::remove_dots(Output, /*remove_dot_dot=*/true);
+  return {};
+}
+
+std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) {
+  Result = false;
+  return {};
+}
+
+} // namespace vfs
+} // namespace llvm
+
+//===-----------------------------------------------------------------------===/
+// RedirectingFileSystem implementation
+//===-----------------------------------------------------------------------===/
+
+// FIXME: reuse implementation common with OverlayFSDirIterImpl as these
+// iterators are conceptually similar.
+class llvm::vfs::VFSFromYamlDirIterImpl
+    : public llvm::vfs::detail::DirIterImpl {
+  std::string Dir;
+  RedirectingFileSystem::RedirectingDirectoryEntry::iterator Current, End;
+
+  // To handle 'fallthrough' mode we need to iterate at first through
+  // RedirectingDirectoryEntry and then through ExternalFS. These operations are
+  // done sequentially, we just need to keep a track of what kind of iteration
+  // we are currently performing.
+
+  /// Flag telling if we should iterate through ExternalFS or stop at the last
+  /// RedirectingDirectoryEntry::iterator.
+  bool IterateExternalFS;
+  /// Flag telling if we have switched to iterating through ExternalFS.
+  bool IsExternalFSCurrent = false;
+  FileSystem &ExternalFS;
+  directory_iterator ExternalDirIter;
+  llvm::StringSet<> SeenNames;
+
+  /// To combine multiple iterations, different methods are responsible for
+  /// different iteration steps.
+  /// @{
+
+  /// Responsible for dispatching between RedirectingDirectoryEntry iteration
+  /// and ExternalFS iteration.
+  std::error_code incrementImpl(bool IsFirstTime);
+  /// Responsible for RedirectingDirectoryEntry iteration.
+  std::error_code incrementContent(bool IsFirstTime);
+  /// Responsible for ExternalFS iteration.
+  std::error_code incrementExternal();
+  /// @}
+
+public:
+  VFSFromYamlDirIterImpl(
+      const Twine &Path,
+      RedirectingFileSystem::RedirectingDirectoryEntry::iterator Begin,
+      RedirectingFileSystem::RedirectingDirectoryEntry::iterator End,
+      bool IterateExternalFS, FileSystem &ExternalFS, std::error_code &EC);
+
+  std::error_code increment() override;
+};
+
+llvm::ErrorOr<std::string>
+RedirectingFileSystem::getCurrentWorkingDirectory() const {
+  return ExternalFS->getCurrentWorkingDirectory();
+}
+
+std::error_code
+RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+  return ExternalFS->setCurrentWorkingDirectory(Path);
+}
+
+std::error_code RedirectingFileSystem::isLocal(const Twine &Path,
+                                               bool &Result) {
+  return ExternalFS->isLocal(Path, Result);
+}
+
+directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
+                                                    std::error_code &EC) {
+  ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Dir);
+  if (!E) {
+    EC = E.getError();
+    if (IsFallthrough && EC == errc::no_such_file_or_directory)
+      return ExternalFS->dir_begin(Dir, EC);
+    return {};
+  }
+  ErrorOr<Status> S = status(Dir, *E);
+  if (!S) {
+    EC = S.getError();
+    return {};
+  }
+  if (!S->isDirectory()) {
+    EC = std::error_code(static_cast<int>(errc::not_a_directory),
+                         std::system_category());
+    return {};
+  }
+
+  auto *D = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(*E);
+  return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>(
+      Dir, D->contents_begin(), D->contents_end(),
+      /*IterateExternalFS=*/IsFallthrough, *ExternalFS, EC));
+}
+
+void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) {
+  ExternalContentsPrefixDir = PrefixDir.str();
+}
+
+StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const {
+  return ExternalContentsPrefixDir;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const {
+  for (const auto &Root : Roots)
+    dumpEntry(Root.get());
+}
+
+LLVM_DUMP_METHOD void
+RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E,
+                                 int NumSpaces) const {
+  StringRef Name = E->getName();
+  for (int i = 0, e = NumSpaces; i < e; ++i)
+    dbgs() << " ";
+  dbgs() << "'" << Name.str().c_str() << "'"
+         << "\n";
+
+  if (E->getKind() == RedirectingFileSystem::EK_Directory) {
+    auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
+    assert(DE && "Should be a directory");
+
+    for (std::unique_ptr<Entry> &SubEntry :
+         llvm::make_range(DE->contents_begin(), DE->contents_end()))
+      dumpEntry(SubEntry.get(), NumSpaces + 2);
+  }
+}
+#endif
+
+/// A helper class to hold the common YAML parsing state.
+class llvm::vfs::RedirectingFileSystemParser {
+  yaml::Stream &Stream;
+
+  void error(yaml::Node *N, const Twine &Msg) { Stream.printError(N, Msg); }
+
+  // false on error
+  bool parseScalarString(yaml::Node *N, StringRef &Result,
+                         SmallVectorImpl<char> &Storage) {
+    const auto *S = dyn_cast<yaml::ScalarNode>(N);
+
+    if (!S) {
+      error(N, "expected string");
+      return false;
+    }
+    Result = S->getValue(Storage);
+    return true;
+  }
+
+  // false on error
+  bool parseScalarBool(yaml::Node *N, bool &Result) {
+    SmallString<5> Storage;
+    StringRef Value;
+    if (!parseScalarString(N, Value, Storage))
+      return false;
+
+    if (Value.equals_lower("true") || Value.equals_lower("on") ||
+        Value.equals_lower("yes") || Value == "1") {
+      Result = true;
+      return true;
+    } else if (Value.equals_lower("false") || Value.equals_lower("off") ||
+               Value.equals_lower("no") || Value == "0") {
+      Result = false;
+      return true;
+    }
+
+    error(N, "expected boolean value");
+    return false;
+  }
+
+  struct KeyStatus {
+    bool Required;
+    bool Seen = false;
+
+    KeyStatus(bool Required = false) : Required(Required) {}
+  };
+
+  using KeyStatusPair = std::pair<StringRef, KeyStatus>;
+
+  // false on error
+  bool checkDuplicateOrUnknownKey(yaml::Node *KeyNode, StringRef Key,
+                                  DenseMap<StringRef, KeyStatus> &Keys) {
+    if (!Keys.count(Key)) {
+      error(KeyNode, "unknown key");
+      return false;
+    }
+    KeyStatus &S = Keys[Key];
+    if (S.Seen) {
+      error(KeyNode, Twine("duplicate key '") + Key + "'");
+      return false;
+    }
+    S.Seen = true;
+    return true;
+  }
+
+  // false on error
+  bool checkMissingKeys(yaml::Node *Obj, DenseMap<StringRef, KeyStatus> &Keys) {
+    for (const auto &I : Keys) {
+      if (I.second.Required && !I.second.Seen) {
+        error(Obj, Twine("missing key '") + I.first + "'");
+        return false;
+      }
+    }
+    return true;
+  }
+
+  RedirectingFileSystem::Entry *
+  lookupOrCreateEntry(RedirectingFileSystem *FS, StringRef Name,
+                      RedirectingFileSystem::Entry *ParentEntry = nullptr) {
+    if (!ParentEntry) { // Look for a existent root
+      for (const auto &Root : FS->Roots) {
+        if (Name.equals(Root->getName())) {
+          ParentEntry = Root.get();
+          return ParentEntry;
+        }
+      }
+    } else { // Advance to the next component
+      auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
+          ParentEntry);
+      for (std::unique_ptr<RedirectingFileSystem::Entry> &Content :
+           llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+        auto *DirContent =
+            dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
+                Content.get());
+        if (DirContent && Name.equals(Content->getName()))
+          return DirContent;
+      }
+    }
+
+    // ... or create a new one
+    std::unique_ptr<RedirectingFileSystem::Entry> E =
+        llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+            Name, Status("", getNextVirtualUniqueID(),
+                         std::chrono::system_clock::now(), 0, 0, 0,
+                         file_type::directory_file, sys::fs::all_all));
+
+    if (!ParentEntry) { // Add a new root to the overlay
+      FS->Roots.push_back(std::move(E));
+      ParentEntry = FS->Roots.back().get();
+      return ParentEntry;
+    }
+
+    auto *DE =
+        dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(ParentEntry);
+    DE->addContent(std::move(E));
+    return DE->getLastContent();
+  }
+
+  void uniqueOverlayTree(RedirectingFileSystem *FS,
+                         RedirectingFileSystem::Entry *SrcE,
+                         RedirectingFileSystem::Entry *NewParentE = nullptr) {
+    StringRef Name = SrcE->getName();
+    switch (SrcE->getKind()) {
+    case RedirectingFileSystem::EK_Directory: {
+      auto *DE =
+          dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE);
+      assert(DE && "Must be a directory");
+      // Empty directories could be present in the YAML as a way to
+      // describe a file for a current directory after some of its subdir
+      // is parsed. This only leads to redundant walks, ignore it.
+      if (!Name.empty())
+        NewParentE = lookupOrCreateEntry(FS, Name, NewParentE);
+      for (std::unique_ptr<RedirectingFileSystem::Entry> &SubEntry :
+           llvm::make_range(DE->contents_begin(), DE->contents_end()))
+        uniqueOverlayTree(FS, SubEntry.get(), NewParentE);
+      break;
+    }
+    case RedirectingFileSystem::EK_File: {
+      auto *FE = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE);
+      assert(FE && "Must be a file");
+      assert(NewParentE && "Parent entry must exist");
+      auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(
+          NewParentE);
+      DE->addContent(
+          llvm::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+              Name, FE->getExternalContentsPath(), FE->getUseName()));
+      break;
+    }
+    }
+  }
+
+  std::unique_ptr<RedirectingFileSystem::Entry>
+  parseEntry(yaml::Node *N, RedirectingFileSystem *FS, bool IsRootEntry) {
+    auto *M = dyn_cast<yaml::MappingNode>(N);
+    if (!M) {
+      error(N, "expected mapping node for file or directory entry");
+      return nullptr;
+    }
+
+    KeyStatusPair Fields[] = {
+        KeyStatusPair("name", true),
+        KeyStatusPair("type", true),
+        KeyStatusPair("contents", false),
+        KeyStatusPair("external-contents", false),
+        KeyStatusPair("use-external-name", false),
+    };
+
+    DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
+
+    bool HasContents = false; // external or otherwise
+    std::vector<std::unique_ptr<RedirectingFileSystem::Entry>>
+        EntryArrayContents;
+    std::string ExternalContentsPath;
+    std::string Name;
+    yaml::Node *NameValueNode;
+    auto UseExternalName =
+        RedirectingFileSystem::RedirectingFileEntry::NK_NotSet;
+    RedirectingFileSystem::EntryKind Kind;
+
+    for (auto &I : *M) {
+      StringRef Key;
+      // Reuse the buffer for key and value, since we don't look at key after
+      // parsing value.
+      SmallString<256> Buffer;
+      if (!parseScalarString(I.getKey(), Key, Buffer))
+        return nullptr;
+
+      if (!checkDuplicateOrUnknownKey(I.getKey(), Key, Keys))
+        return nullptr;
+
+      StringRef Value;
+      if (Key == "name") {
+        if (!parseScalarString(I.getValue(), Value, Buffer))
+          return nullptr;
+
+        NameValueNode = I.getValue();
+        if (FS->UseCanonicalizedPaths) {
+          SmallString<256> Path(Value);
+          // Guarantee that old YAML files containing paths with ".." and "."
+          // are properly canonicalized before read into the VFS.
+          Path = sys::path::remove_leading_dotslash(Path);
+          sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+          Name = Path.str();
+        } else {
+          Name = Value;
+        }
+      } else if (Key == "type") {
+        if (!parseScalarString(I.getValue(), Value, Buffer))
+          return nullptr;
+        if (Value == "file")
+          Kind = RedirectingFileSystem::EK_File;
+        else if (Value == "directory")
+          Kind = RedirectingFileSystem::EK_Directory;
+        else {
+          error(I.getValue(), "unknown value for 'type'");
+          return nullptr;
+        }
+      } else if (Key == "contents") {
+        if (HasContents) {
+          error(I.getKey(),
+                "entry already has 'contents' or 'external-contents'");
+          return nullptr;
+        }
+        HasContents = true;
+        auto *Contents = dyn_cast<yaml::SequenceNode>(I.getValue());
+        if (!Contents) {
+          // FIXME: this is only for directories, what about files?
+          error(I.getValue(), "expected array");
+          return nullptr;
+        }
+
+        for (auto &I : *Contents) {
+          if (std::unique_ptr<RedirectingFileSystem::Entry> E =
+                  parseEntry(&I, FS, /*IsRootEntry*/ false))
+            EntryArrayContents.push_back(std::move(E));
+          else
+            return nullptr;
+        }
+      } else if (Key == "external-contents") {
+        if (HasContents) {
+          error(I.getKey(),
+                "entry already has 'contents' or 'external-contents'");
+          return nullptr;
+        }
+        HasContents = true;
+        if (!parseScalarString(I.getValue(), Value, Buffer))
+          return nullptr;
+
+        SmallString<256> FullPath;
+        if (FS->IsRelativeOverlay) {
+          FullPath = FS->getExternalContentsPrefixDir();
+          assert(!FullPath.empty() &&
+                 "External contents prefix directory must exist");
+          llvm::sys::path::append(FullPath, Value);
+        } else {
+          FullPath = Value;
+        }
+
+        if (FS->UseCanonicalizedPaths) {
+          // Guarantee that old YAML files containing paths with ".." and "."
+          // are properly canonicalized before read into the VFS.
+          FullPath = sys::path::remove_leading_dotslash(FullPath);
+          sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true);
+        }
+        ExternalContentsPath = FullPath.str();
+      } else if (Key == "use-external-name") {
+        bool Val;
+        if (!parseScalarBool(I.getValue(), Val))
+          return nullptr;
+        UseExternalName =
+            Val ? RedirectingFileSystem::RedirectingFileEntry::NK_External
+                : RedirectingFileSystem::RedirectingFileEntry::NK_Virtual;
+      } else {
+        llvm_unreachable("key missing from Keys");
+      }
+    }
+
+    if (Stream.failed())
+      return nullptr;
+
+    // check for missing keys
+    if (!HasContents) {
+      error(N, "missing key 'contents' or 'external-contents'");
+      return nullptr;
+    }
+    if (!checkMissingKeys(N, Keys))
+      return nullptr;
+
+    // check invalid configuration
+    if (Kind == RedirectingFileSystem::EK_Directory &&
+        UseExternalName !=
+            RedirectingFileSystem::RedirectingFileEntry::NK_NotSet) {
+      error(N, "'use-external-name' is not supported for directories");
+      return nullptr;
+    }
+
+    if (IsRootEntry && !sys::path::is_absolute(Name)) {
+      assert(NameValueNode && "Name presence should be checked earlier");
+      error(NameValueNode,
+            "entry with relative path at the root level is not discoverable");
+      return nullptr;
+    }
+
+    // Remove trailing slash(es), being careful not to remove the root path
+    StringRef Trimmed(Name);
+    size_t RootPathLen = sys::path::root_path(Trimmed).size();
+    while (Trimmed.size() > RootPathLen &&
+           sys::path::is_separator(Trimmed.back()))
+      Trimmed = Trimmed.slice(0, Trimmed.size() - 1);
+    // Get the last component
+    StringRef LastComponent = sys::path::filename(Trimmed);
+
+    std::unique_ptr<RedirectingFileSystem::Entry> Result;
+    switch (Kind) {
+    case RedirectingFileSystem::EK_File:
+      Result = llvm::make_unique<RedirectingFileSystem::RedirectingFileEntry>(
+          LastComponent, std::move(ExternalContentsPath), UseExternalName);
+      break;
+    case RedirectingFileSystem::EK_Directory:
+      Result =
+          llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+              LastComponent, std::move(EntryArrayContents),
+              Status("", getNextVirtualUniqueID(),
+                     std::chrono::system_clock::now(), 0, 0, 0,
+                     file_type::directory_file, sys::fs::all_all));
+      break;
+    }
+
+    StringRef Parent = sys::path::parent_path(Trimmed);
+    if (Parent.empty())
+      return Result;
+
+    // if 'name' contains multiple components, create implicit directory entries
+    for (sys::path::reverse_iterator I = sys::path::rbegin(Parent),
+                                     E = sys::path::rend(Parent);
+         I != E; ++I) {
+      std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> Entries;
+      Entries.push_back(std::move(Result));
+      Result =
+          llvm::make_unique<RedirectingFileSystem::RedirectingDirectoryEntry>(
+              *I, std::move(Entries),
+              Status("", getNextVirtualUniqueID(),
+                     std::chrono::system_clock::now(), 0, 0, 0,
+                     file_type::directory_file, sys::fs::all_all));
+    }
+    return Result;
+  }
+
+public:
+  RedirectingFileSystemParser(yaml::Stream &S) : Stream(S) {}
+
+  // false on error
+  bool parse(yaml::Node *Root, RedirectingFileSystem *FS) {
+    auto *Top = dyn_cast<yaml::MappingNode>(Root);
+    if (!Top) {
+      error(Root, "expected mapping node");
+      return false;
+    }
+
+    KeyStatusPair Fields[] = {
+        KeyStatusPair("version", true),
+        KeyStatusPair("case-sensitive", false),
+        KeyStatusPair("use-external-names", false),
+        KeyStatusPair("overlay-relative", false),
+        KeyStatusPair("fallthrough", false),
+        KeyStatusPair("roots", true),
+    };
+
+    DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
+    std::vector<std::unique_ptr<RedirectingFileSystem::Entry>> RootEntries;
+
+    // Parse configuration and 'roots'
+    for (auto &I : *Top) {
+      SmallString<10> KeyBuffer;
+      StringRef Key;
+      if (!parseScalarString(I.getKey(), Key, KeyBuffer))
+        return false;
+
+      if (!checkDuplicateOrUnknownKey(I.getKey(), Key, Keys))
+        return false;
+
+      if (Key == "roots") {
+        auto *Roots = dyn_cast<yaml::SequenceNode>(I.getValue());
+        if (!Roots) {
+          error(I.getValue(), "expected array");
+          return false;
+        }
+
+        for (auto &I : *Roots) {
+          if (std::unique_ptr<RedirectingFileSystem::Entry> E =
+                  parseEntry(&I, FS, /*IsRootEntry*/ true))
+            RootEntries.push_back(std::move(E));
+          else
+            return false;
+        }
+      } else if (Key == "version") {
+        StringRef VersionString;
+        SmallString<4> Storage;
+        if (!parseScalarString(I.getValue(), VersionString, Storage))
+          return false;
+        int Version;
+        if (VersionString.getAsInteger<int>(10, Version)) {
+          error(I.getValue(), "expected integer");
+          return false;
+        }
+        if (Version < 0) {
+          error(I.getValue(), "invalid version number");
+          return false;
+        }
+        if (Version != 0) {
+          error(I.getValue(), "version mismatch, expected 0");
+          return false;
+        }
+      } else if (Key == "case-sensitive") {
+        if (!parseScalarBool(I.getValue(), FS->CaseSensitive))
+          return false;
+      } else if (Key == "overlay-relative") {
+        if (!parseScalarBool(I.getValue(), FS->IsRelativeOverlay))
+          return false;
+      } else if (Key == "use-external-names") {
+        if (!parseScalarBool(I.getValue(), FS->UseExternalNames))
+          return false;
+      } else if (Key == "fallthrough") {
+        if (!parseScalarBool(I.getValue(), FS->IsFallthrough))
+          return false;
+      } else {
+        llvm_unreachable("key missing from Keys");
+      }
+    }
+
+    if (Stream.failed())
+      return false;
+
+    if (!checkMissingKeys(Top, Keys))
+      return false;
+
+    // Now that we sucessefully parsed the YAML file, canonicalize the internal
+    // representation to a proper directory tree so that we can search faster
+    // inside the VFS.
+    for (auto &E : RootEntries)
+      uniqueOverlayTree(FS, E.get());
+
+    return true;
+  }
+};
+
+RedirectingFileSystem *
+RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer,
+                              SourceMgr::DiagHandlerTy DiagHandler,
+                              StringRef YAMLFilePath, void *DiagContext,
+                              IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+  SourceMgr SM;
+  yaml::Stream Stream(Buffer->getMemBufferRef(), SM);
+
+  SM.setDiagHandler(DiagHandler, DiagContext);
+  yaml::document_iterator DI = Stream.begin();
+  yaml::Node *Root = DI->getRoot();
+  if (DI == Stream.end() || !Root) {
+    SM.PrintMessage(SMLoc(), SourceMgr::DK_Error, "expected root node");
+    return nullptr;
+  }
+
+  RedirectingFileSystemParser P(Stream);
+
+  std::unique_ptr<RedirectingFileSystem> FS(
+      new RedirectingFileSystem(std::move(ExternalFS)));
+
+  if (!YAMLFilePath.empty()) {
+    // Use the YAML path from -ivfsoverlay to compute the dir to be prefixed
+    // to each 'external-contents' path.
+    //
+    // Example:
+    //    -ivfsoverlay dummy.cache/vfs/vfs.yaml
+    // yields:
+    //  FS->ExternalContentsPrefixDir => /<absolute_path_to>/dummy.cache/vfs
+    //
+    SmallString<256> OverlayAbsDir = sys::path::parent_path(YAMLFilePath);
+    std::error_code EC = llvm::sys::fs::make_absolute(OverlayAbsDir);
+    assert(!EC && "Overlay dir final path must be absolute");
+    (void)EC;
+    FS->setExternalContentsPrefixDir(OverlayAbsDir);
+  }
+
+  if (!P.parse(Root, FS.get()))
+    return nullptr;
+
+  return FS.release();
+}
+
+ErrorOr<RedirectingFileSystem::Entry *>
+RedirectingFileSystem::lookupPath(const Twine &Path_) const {
+  SmallString<256> Path;
+  Path_.toVector(Path);
+
+  // Handle relative paths
+  if (std::error_code EC = makeAbsolute(Path))
+    return EC;
+
+  // Canonicalize path by removing ".", "..", "./", etc components. This is
+  // a VFS request, do bot bother about symlinks in the path components
+  // but canonicalize in order to perform the correct entry search.
+  if (UseCanonicalizedPaths) {
+    Path = sys::path::remove_leading_dotslash(Path);
+    sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+  }
+
+  if (Path.empty())
+    return make_error_code(llvm::errc::invalid_argument);
+
+  sys::path::const_iterator Start = sys::path::begin(Path);
+  sys::path::const_iterator End = sys::path::end(Path);
+  for (const auto &Root : Roots) {
+    ErrorOr<RedirectingFileSystem::Entry *> Result =
+        lookupPath(Start, End, Root.get());
+    if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+      return Result;
+  }
+  return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+ErrorOr<RedirectingFileSystem::Entry *>
+RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
+                                  sys::path::const_iterator End,
+                                  RedirectingFileSystem::Entry *From) const {
+#ifndef _WIN32
+  assert(!isTraversalComponent(*Start) &&
+         !isTraversalComponent(From->getName()) &&
+         "Paths should not contain traversal components");
+#else
+  // FIXME: this is here to support windows, remove it once canonicalized
+  // paths become globally default.
+  if (Start->equals("."))
+    ++Start;
+#endif
+
+  StringRef FromName = From->getName();
+
+  // Forward the search to the next component in case this is an empty one.
+  if (!FromName.empty()) {
+    if (CaseSensitive ? !Start->equals(FromName)
+                      : !Start->equals_lower(FromName))
+      // failure to match
+      return make_error_code(llvm::errc::no_such_file_or_directory);
+
+    ++Start;
+
+    if (Start == End) {
+      // Match!
+      return From;
+    }
+  }
+
+  auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(From);
+  if (!DE)
+    return make_error_code(llvm::errc::not_a_directory);
+
+  for (const std::unique_ptr<RedirectingFileSystem::Entry> &DirEntry :
+       llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+    ErrorOr<RedirectingFileSystem::Entry *> Result =
+        lookupPath(Start, End, DirEntry.get());
+    if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+      return Result;
+  }
+  return make_error_code(llvm::errc::no_such_file_or_directory);
+}
+
+static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
+                                      Status ExternalStatus) {
+  Status S = ExternalStatus;
+  if (!UseExternalNames)
+    S = Status::copyWithNewName(S, Path.str());
+  S.IsVFSMapped = true;
+  return S;
+}
+
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path,
+                                              RedirectingFileSystem::Entry *E) {
+  assert(E != nullptr);
+  if (auto *F = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(E)) {
+    ErrorOr<Status> S = ExternalFS->status(F->getExternalContentsPath());
+    assert(!S || S->getName() == F->getExternalContentsPath());
+    if (S)
+      return getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
+                                     *S);
+    return S;
+  } else { // directory
+    auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
+    return Status::copyWithNewName(DE->getStatus(), Path.str());
+  }
+}
+
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path) {
+  ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
+  if (!Result) {
+    if (IsFallthrough &&
+        Result.getError() == llvm::errc::no_such_file_or_directory) {
+      return ExternalFS->status(Path);
+    }
+    return Result.getError();
+  }
+  return status(Path, *Result);
+}
+
+namespace {
+
+/// Provide a file wrapper with an overriden status.
+class FileWithFixedStatus : public File {
+  std::unique_ptr<File> InnerFile;
+  Status S;
+
+public:
+  FileWithFixedStatus(std::unique_ptr<File> InnerFile, Status S)
+      : InnerFile(std::move(InnerFile)), S(std::move(S)) {}
+
+  ErrorOr<Status> status() override { return S; }
+  ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    return InnerFile->getBuffer(Name, FileSize, RequiresNullTerminator,
+                                IsVolatile);
+  }
+
+  std::error_code close() override { return InnerFile->close(); }
+};
+
+} // namespace
+
+ErrorOr<std::unique_ptr<File>>
+RedirectingFileSystem::openFileForRead(const Twine &Path) {
+  ErrorOr<RedirectingFileSystem::Entry *> E = lookupPath(Path);
+  if (!E) {
+    if (IsFallthrough &&
+        E.getError() == llvm::errc::no_such_file_or_directory) {
+      return ExternalFS->openFileForRead(Path);
+    }
+    return E.getError();
+  }
+
+  auto *F = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(*E);
+  if (!F) // FIXME: errc::not_a_file?
+    return make_error_code(llvm::errc::invalid_argument);
+
+  auto Result = ExternalFS->openFileForRead(F->getExternalContentsPath());
+  if (!Result)
+    return Result;
+
+  auto ExternalStatus = (*Result)->status();
+  if (!ExternalStatus)
+    return ExternalStatus.getError();
+
+  // FIXME: Update the status with the name and VFSMapped.
+  Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
+                                     *ExternalStatus);
+  return std::unique_ptr<File>(
+      llvm::make_unique<FileWithFixedStatus>(std::move(*Result), S));
+}
+
+std::error_code
+RedirectingFileSystem::getRealPath(const Twine &Path,
+                                   SmallVectorImpl<char> &Output) const {
+  ErrorOr<RedirectingFileSystem::Entry *> Result = lookupPath(Path);
+  if (!Result) {
+    if (IsFallthrough &&
+        Result.getError() == llvm::errc::no_such_file_or_directory) {
+      return ExternalFS->getRealPath(Path, Output);
+    }
+    return Result.getError();
+  }
+
+  if (auto *F =
+          dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(*Result)) {
+    return ExternalFS->getRealPath(F->getExternalContentsPath(), Output);
+  }
+  // Even if there is a directory entry, fall back to ExternalFS if allowed,
+  // because directories don't have a single external contents path.
+  return IsFallthrough ? ExternalFS->getRealPath(Path, Output)
+                       : llvm::errc::invalid_argument;
+}
+
+IntrusiveRefCntPtr<FileSystem>
+vfs::getVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
+                    SourceMgr::DiagHandlerTy DiagHandler,
+                    StringRef YAMLFilePath, void *DiagContext,
+                    IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+  return RedirectingFileSystem::create(std::move(Buffer), DiagHandler,
+                                       YAMLFilePath, DiagContext,
+                                       std::move(ExternalFS));
+}
+
+static void getVFSEntries(RedirectingFileSystem::Entry *SrcE,
+                          SmallVectorImpl<StringRef> &Path,
+                          SmallVectorImpl<YAMLVFSEntry> &Entries) {
+  auto Kind = SrcE->getKind();
+  if (Kind == RedirectingFileSystem::EK_Directory) {
+    auto *DE = dyn_cast<RedirectingFileSystem::RedirectingDirectoryEntry>(SrcE);
+    assert(DE && "Must be a directory");
+    for (std::unique_ptr<RedirectingFileSystem::Entry> &SubEntry :
+         llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+      Path.push_back(SubEntry->getName());
+      getVFSEntries(SubEntry.get(), Path, Entries);
+      Path.pop_back();
+    }
+    return;
+  }
+
+  assert(Kind == RedirectingFileSystem::EK_File && "Must be a EK_File");
+  auto *FE = dyn_cast<RedirectingFileSystem::RedirectingFileEntry>(SrcE);
+  assert(FE && "Must be a file");
+  SmallString<128> VPath;
+  for (auto &Comp : Path)
+    llvm::sys::path::append(VPath, Comp);
+  Entries.push_back(YAMLVFSEntry(VPath.c_str(), FE->getExternalContentsPath()));
+}
+
+void vfs::collectVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
+                             SourceMgr::DiagHandlerTy DiagHandler,
+                             StringRef YAMLFilePath,
+                             SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
+                             void *DiagContext,
+                             IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+  RedirectingFileSystem *VFS = RedirectingFileSystem::create(
+      std::move(Buffer), DiagHandler, YAMLFilePath, DiagContext,
+      std::move(ExternalFS));
+  ErrorOr<RedirectingFileSystem::Entry *> RootE = VFS->lookupPath("/");
+  if (!RootE)
+    return;
+  SmallVector<StringRef, 8> Components;
+  Components.push_back("/");
+  getVFSEntries(*RootE, Components, CollectedEntries);
+}
+
+UniqueID vfs::getNextVirtualUniqueID() {
+  static std::atomic<unsigned> UID;
+  unsigned ID = ++UID;
+  // The following assumes that uint64_t max will never collide with a real
+  // dev_t value from the OS.
+  return UniqueID(std::numeric_limits<uint64_t>::max(), ID);
+}
+
+void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) {
+  assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute");
+  assert(sys::path::is_absolute(RealPath) && "real path not absolute");
+  assert(!pathHasTraversal(VirtualPath) && "path traversal is not supported");
+  Mappings.emplace_back(VirtualPath, RealPath);
+}
+
+namespace {
+
+class JSONWriter {
+  llvm::raw_ostream &OS;
+  SmallVector<StringRef, 16> DirStack;
+
+  unsigned getDirIndent() { return 4 * DirStack.size(); }
+  unsigned getFileIndent() { return 4 * (DirStack.size() + 1); }
+  bool containedIn(StringRef Parent, StringRef Path);
+  StringRef containedPart(StringRef Parent, StringRef Path);
+  void startDirectory(StringRef Path);
+  void endDirectory();
+  void writeEntry(StringRef VPath, StringRef RPath);
+
+public:
+  JSONWriter(llvm::raw_ostream &OS) : OS(OS) {}
+
+  void write(ArrayRef<YAMLVFSEntry> Entries, Optional<bool> UseExternalNames,
+             Optional<bool> IsCaseSensitive, Optional<bool> IsOverlayRelative,
+             StringRef OverlayDir);
+};
+
+} // namespace
+
+bool JSONWriter::containedIn(StringRef Parent, StringRef Path) {
+  using namespace llvm::sys;
+
+  // Compare each path component.
+  auto IParent = path::begin(Parent), EParent = path::end(Parent);
+  for (auto IChild = path::begin(Path), EChild = path::end(Path);
+       IParent != EParent && IChild != EChild; ++IParent, ++IChild) {
+    if (*IParent != *IChild)
+      return false;
+  }
+  // Have we exhausted the parent path?
+  return IParent == EParent;
+}
+
+StringRef JSONWriter::containedPart(StringRef Parent, StringRef Path) {
+  assert(!Parent.empty());
+  assert(containedIn(Parent, Path));
+  return Path.slice(Parent.size() + 1, StringRef::npos);
+}
+
+void JSONWriter::startDirectory(StringRef Path) {
+  StringRef Name =
+      DirStack.empty() ? Path : containedPart(DirStack.back(), Path);
+  DirStack.push_back(Path);
+  unsigned Indent = getDirIndent();
+  OS.indent(Indent) << "{\n";
+  OS.indent(Indent + 2) << "'type': 'directory',\n";
+  OS.indent(Indent + 2) << "'name': \"" << llvm::yaml::escape(Name) << "\",\n";
+  OS.indent(Indent + 2) << "'contents': [\n";
+}
+
+void JSONWriter::endDirectory() {
+  unsigned Indent = getDirIndent();
+  OS.indent(Indent + 2) << "]\n";
+  OS.indent(Indent) << "}";
+
+  DirStack.pop_back();
+}
+
+void JSONWriter::writeEntry(StringRef VPath, StringRef RPath) {
+  unsigned Indent = getFileIndent();
+  OS.indent(Indent) << "{\n";
+  OS.indent(Indent + 2) << "'type': 'file',\n";
+  OS.indent(Indent + 2) << "'name': \"" << llvm::yaml::escape(VPath) << "\",\n";
+  OS.indent(Indent + 2) << "'external-contents': \""
+                        << llvm::yaml::escape(RPath) << "\"\n";
+  OS.indent(Indent) << "}";
+}
+
+void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
+                       Optional<bool> UseExternalNames,
+                       Optional<bool> IsCaseSensitive,
+                       Optional<bool> IsOverlayRelative,
+                       StringRef OverlayDir) {
+  using namespace llvm::sys;
+
+  OS << "{\n"
+        "  'version': 0,\n";
+  if (IsCaseSensitive.hasValue())
+    OS << "  'case-sensitive': '"
+       << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n";
+  if (UseExternalNames.hasValue())
+    OS << "  'use-external-names': '"
+       << (UseExternalNames.getValue() ? "true" : "false") << "',\n";
+  bool UseOverlayRelative = false;
+  if (IsOverlayRelative.hasValue()) {
+    UseOverlayRelative = IsOverlayRelative.getValue();
+    OS << "  'overlay-relative': '" << (UseOverlayRelative ? "true" : "false")
+       << "',\n";
+  }
+  OS << "  'roots': [\n";
+
+  if (!Entries.empty()) {
+    const YAMLVFSEntry &Entry = Entries.front();
+    startDirectory(path::parent_path(Entry.VPath));
+
+    StringRef RPath = Entry.RPath;
+    if (UseOverlayRelative) {
+      unsigned OverlayDirLen = OverlayDir.size();
+      assert(RPath.substr(0, OverlayDirLen) == OverlayDir &&
+             "Overlay dir must be contained in RPath");
+      RPath = RPath.slice(OverlayDirLen, RPath.size());
+    }
+
+    writeEntry(path::filename(Entry.VPath), RPath);
+
+    for (const auto &Entry : Entries.slice(1)) {
+      StringRef Dir = path::parent_path(Entry.VPath);
+      if (Dir == DirStack.back())
+        OS << ",\n";
+      else {
+        while (!DirStack.empty() && !containedIn(DirStack.back(), Dir)) {
+          OS << "\n";
+          endDirectory();
+        }
+        OS << ",\n";
+        startDirectory(Dir);
+      }
+      StringRef RPath = Entry.RPath;
+      if (UseOverlayRelative) {
+        unsigned OverlayDirLen = OverlayDir.size();
+        assert(RPath.substr(0, OverlayDirLen) == OverlayDir &&
+               "Overlay dir must be contained in RPath");
+        RPath = RPath.slice(OverlayDirLen, RPath.size());
+      }
+      writeEntry(path::filename(Entry.VPath), RPath);
+    }
+
+    while (!DirStack.empty()) {
+      OS << "\n";
+      endDirectory();
+    }
+    OS << "\n";
+  }
+
+  OS << "  ]\n"
+     << "}\n";
+}
+
+void YAMLVFSWriter::write(llvm::raw_ostream &OS) {
+  llvm::sort(Mappings, [](const YAMLVFSEntry &LHS, const YAMLVFSEntry &RHS) {
+    return LHS.VPath < RHS.VPath;
+  });
+
+  JSONWriter(OS).write(Mappings, UseExternalNames, IsCaseSensitive,
+                       IsOverlayRelative, OverlayDir);
+}
+
+VFSFromYamlDirIterImpl::VFSFromYamlDirIterImpl(
+    const Twine &_Path,
+    RedirectingFileSystem::RedirectingDirectoryEntry::iterator Begin,
+    RedirectingFileSystem::RedirectingDirectoryEntry::iterator End,
+    bool IterateExternalFS, FileSystem &ExternalFS, std::error_code &EC)
+    : Dir(_Path.str()), Current(Begin), End(End),
+      IterateExternalFS(IterateExternalFS), ExternalFS(ExternalFS) {
+  EC = incrementImpl(/*IsFirstTime=*/true);
+}
+
+std::error_code VFSFromYamlDirIterImpl::increment() {
+  return incrementImpl(/*IsFirstTime=*/false);
+}
+
+std::error_code VFSFromYamlDirIterImpl::incrementExternal() {
+  assert(!(IsExternalFSCurrent && ExternalDirIter == directory_iterator()) &&
+         "incrementing past end");
+  std::error_code EC;
+  if (IsExternalFSCurrent) {
+    ExternalDirIter.increment(EC);
+  } else if (IterateExternalFS) {
+    ExternalDirIter = ExternalFS.dir_begin(Dir, EC);
+    IsExternalFSCurrent = true;
+    if (EC && EC != errc::no_such_file_or_directory)
+      return EC;
+    EC = {};
+  }
+  if (EC || ExternalDirIter == directory_iterator()) {
+    CurrentEntry = directory_entry();
+  } else {
+    CurrentEntry = *ExternalDirIter;
+  }
+  return EC;
+}
+
+std::error_code VFSFromYamlDirIterImpl::incrementContent(bool IsFirstTime) {
+  assert((IsFirstTime || Current != End) && "cannot iterate past end");
+  if (!IsFirstTime)
+    ++Current;
+  while (Current != End) {
+    SmallString<128> PathStr(Dir);
+    llvm::sys::path::append(PathStr, (*Current)->getName());
+    sys::fs::file_type Type;
+    switch ((*Current)->getKind()) {
+    case RedirectingFileSystem::EK_Directory:
+      Type = sys::fs::file_type::directory_file;
+      break;
+    case RedirectingFileSystem::EK_File:
+      Type = sys::fs::file_type::regular_file;
+      break;
+    }
+    CurrentEntry = directory_entry(PathStr.str(), Type);
+    return {};
+  }
+  return incrementExternal();
+}
+
+std::error_code VFSFromYamlDirIterImpl::incrementImpl(bool IsFirstTime) {
+  while (true) {
+    std::error_code EC = IsExternalFSCurrent ? incrementExternal()
+                                             : incrementContent(IsFirstTime);
+    if (EC || CurrentEntry.path().empty())
+      return EC;
+    StringRef Name = llvm::sys::path::filename(CurrentEntry.path());
+    if (SeenNames.insert(Name).second)
+      return EC; // name not seen before
+  }
+  llvm_unreachable("returned above");
+}
+
+vfs::recursive_directory_iterator::recursive_directory_iterator(
+    FileSystem &FS_, const Twine &Path, std::error_code &EC)
+    : FS(&FS_) {
+  directory_iterator I = FS->dir_begin(Path, EC);
+  if (I != directory_iterator()) {
+    State = std::make_shared<detail::RecDirIterState>();
+    State->Stack.push(I);
+  }
+}
+
+vfs::recursive_directory_iterator &
+recursive_directory_iterator::increment(std::error_code &EC) {
+  assert(FS && State && !State->Stack.empty() && "incrementing past end");
+  assert(!State->Stack.top()->path().empty() && "non-canonical end iterator");
+  vfs::directory_iterator End;
+
+  if (State->HasNoPushRequest)
+    State->HasNoPushRequest = false;
+  else {
+    if (State->Stack.top()->type() == sys::fs::file_type::directory_file) {
+      vfs::directory_iterator I = FS->dir_begin(State->Stack.top()->path(), EC);
+      if (I != End) {
+        State->Stack.push(I);
+        return *this;
+      }
+    }
+  }
+
+  while (!State->Stack.empty() && State->Stack.top().increment(EC) == End)
+    State->Stack.pop();
+
+  if (State->Stack.empty())
+    State.reset(); // end iterator
+
+  return *this;
+}
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index f425d607af47..d34aa763124c 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -416,7 +416,7 @@ static std::error_code rename_internal(HANDLE FromHandle, const Twine &To,
       *reinterpret_cast<FILE_RENAME_INFO *>(RenameInfoBuf.data());
   RenameInfo.ReplaceIfExists = ReplaceIfExists;
   RenameInfo.RootDirectory = 0;
-  RenameInfo.FileNameLength = ToWide.size();
+  RenameInfo.FileNameLength = ToWide.size() * sizeof(wchar_t);
   std::copy(ToWide.begin(), ToWide.end(), &RenameInfo.FileName[0]);
 
   SetLastError(ERROR_SUCCESS);
@@ -766,10 +766,12 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
   return std::error_code();
 }
 
-std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
-  FILETIME FT = toFILETIME(Time);
+std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
+                                                 TimePoint<> ModificationTime) {
+  FILETIME AccessFT = toFILETIME(AccessTime);
+  FILETIME ModifyFT = toFILETIME(ModificationTime);
   HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
-  if (!SetFileTime(FileHandle, NULL, &FT, &FT))
+  if (!SetFileTime(FileHandle, NULL, &AccessFT, &ModifyFT))
     return mapWindowsError(::GetLastError());
   return std::error_code();
 }
@@ -852,16 +854,37 @@ mapped_file_region::mapped_file_region(int fd, mapmode mode, size_t length,
     Mapping = 0;
 }
 
+static bool hasFlushBufferKernelBug() {
+  static bool Ret{GetWindowsOSVersion() < llvm::VersionTuple(10, 0, 0, 17763)};
+  return Ret;
+}
+
+static bool isEXE(StringRef Magic) {
+  static const char PEMagic[] = {'P', 'E', '\0', '\0'};
+  if (Magic.startswith(StringRef("MZ")) && Magic.size() >= 0x3c + 4) {
+    uint32_t off = read32le(Magic.data() + 0x3c);
+    // PE/COFF file, either EXE or DLL.
+    if (Magic.substr(off).startswith(StringRef(PEMagic, sizeof(PEMagic))))
+      return true;
+  }
+  return false;
+}
+
 mapped_file_region::~mapped_file_region() {
   if (Mapping) {
+
+    bool Exe = isEXE(StringRef((char *)Mapping, Size));
+
     ::UnmapViewOfFile(Mapping);
 
-    if (Mode == mapmode::readwrite) {
+    if (Mode == mapmode::readwrite && Exe && hasFlushBufferKernelBug()) {
       // There is a Windows kernel bug, the exact trigger conditions of which
       // are not well understood.  When triggered, dirty pages are not properly
       // flushed and subsequent process's attempts to read a file can return
       // invalid data.  Calling FlushFileBuffers on the write handle is
       // sufficient to ensure that this bug is not triggered.
+      // The bug only occurs when writing an executable and executing it right
+      // after, under high I/O pressure.
       ::FlushFileBuffers(FileHandle);
     }
 
@@ -900,28 +923,28 @@ static basic_file_status status_from_find_data(WIN32_FIND_DATAW *FindData) {
                            FindData->nFileSizeHigh, FindData->nFileSizeLow);
 }
 
-std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
-                                                     StringRef path,
-                                                     bool follow_symlinks) {
-  SmallVector<wchar_t, 128> path_utf16;
+std::error_code detail::directory_iterator_construct(detail::DirIterState &IT,
+                                                     StringRef Path,
+                                                     bool FollowSymlinks) {
+  SmallVector<wchar_t, 128> PathUTF16;
 
-  if (std::error_code ec = widenPath(path, path_utf16))
-    return ec;
+  if (std::error_code EC = widenPath(Path, PathUTF16))
+    return EC;
 
   // Convert path to the format that Windows is happy with.
-  if (path_utf16.size() > 0 &&
-      !is_separator(path_utf16[path.size() - 1]) &&
-      path_utf16[path.size() - 1] != L':') {
-    path_utf16.push_back(L'\\');
-    path_utf16.push_back(L'*');
+  if (PathUTF16.size() > 0 &&
+      !is_separator(PathUTF16[Path.size() - 1]) &&
+      PathUTF16[Path.size() - 1] != L':') {
+    PathUTF16.push_back(L'\\');
+    PathUTF16.push_back(L'*');
   } else {
-    path_utf16.push_back(L'*');
+    PathUTF16.push_back(L'*');
   }
 
   //  Get the first directory entry.
   WIN32_FIND_DATAW FirstFind;
   ScopedFindHandle FindHandle(::FindFirstFileExW(
-      c_str(path_utf16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch,
+      c_str(PathUTF16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch,
       NULL, FIND_FIRST_EX_LARGE_FETCH));
   if (!FindHandle)
     return mapWindowsError(::GetLastError());
@@ -934,43 +957,45 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
       DWORD LastError = ::GetLastError();
       // Check for end.
       if (LastError == ERROR_NO_MORE_FILES)
-        return detail::directory_iterator_destruct(it);
+        return detail::directory_iterator_destruct(IT);
       return mapWindowsError(LastError);
     } else
       FilenameLen = ::wcslen(FirstFind.cFileName);
 
   // Construct the current directory entry.
-  SmallString<128> directory_entry_name_utf8;
-  if (std::error_code ec =
+  SmallString<128> DirectoryEntryNameUTF8;
+  if (std::error_code EC =
           UTF16ToUTF8(FirstFind.cFileName, ::wcslen(FirstFind.cFileName),
-                      directory_entry_name_utf8))
-    return ec;
+                      DirectoryEntryNameUTF8))
+    return EC;
 
-  it.IterationHandle = intptr_t(FindHandle.take());
-  SmallString<128> directory_entry_path(path);
-  path::append(directory_entry_path, directory_entry_name_utf8);
-  it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks,
-                                    status_from_find_data(&FirstFind));
+  IT.IterationHandle = intptr_t(FindHandle.take());
+  SmallString<128> DirectoryEntryPath(Path);
+  path::append(DirectoryEntryPath, DirectoryEntryNameUTF8);
+  IT.CurrentEntry =
+      directory_entry(DirectoryEntryPath, FollowSymlinks,
+                      file_type_from_attrs(FirstFind.dwFileAttributes),
+                      status_from_find_data(&FirstFind));
 
   return std::error_code();
 }
 
-std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
-  if (it.IterationHandle != 0)
+std::error_code detail::directory_iterator_destruct(detail::DirIterState &IT) {
+  if (IT.IterationHandle != 0)
     // Closes the handle if it's valid.
-    ScopedFindHandle close(HANDLE(it.IterationHandle));
-  it.IterationHandle = 0;
-  it.CurrentEntry = directory_entry();
+    ScopedFindHandle close(HANDLE(IT.IterationHandle));
+  IT.IterationHandle = 0;
+  IT.CurrentEntry = directory_entry();
   return std::error_code();
 }
 
-std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+std::error_code detail::directory_iterator_increment(detail::DirIterState &IT) {
   WIN32_FIND_DATAW FindData;
-  if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+  if (!::FindNextFileW(HANDLE(IT.IterationHandle), &FindData)) {
     DWORD LastError = ::GetLastError();
     // Check for end.
     if (LastError == ERROR_NO_MORE_FILES)
-      return detail::directory_iterator_destruct(it);
+      return detail::directory_iterator_destruct(IT);
     return mapWindowsError(LastError);
   }
 
@@ -978,16 +1003,18 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
   if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
       (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
                            FindData.cFileName[1] == L'.'))
-    return directory_iterator_increment(it);
+    return directory_iterator_increment(IT);
 
-  SmallString<128> directory_entry_path_utf8;
-  if (std::error_code ec =
+  SmallString<128> DirectoryEntryPathUTF8;
+  if (std::error_code EC =
           UTF16ToUTF8(FindData.cFileName, ::wcslen(FindData.cFileName),
-                      directory_entry_path_utf8))
-    return ec;
+                      DirectoryEntryPathUTF8))
+    return EC;
 
-  it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8),
-                                   status_from_find_data(&FindData));
+  IT.CurrentEntry.replace_filename(
+      Twine(DirectoryEntryPathUTF8),
+      file_type_from_attrs(FindData.dwFileAttributes),
+      status_from_find_data(&FindData));
   return std::error_code();
 }
 
@@ -1226,6 +1253,17 @@ static void expandTildeExpr(SmallVectorImpl<char> &Path) {
   Path.insert(Path.begin() + 1, HomeDir.begin() + 1, HomeDir.end());
 }
 
+void expand_tilde(const Twine &path, SmallVectorImpl<char> &dest) {
+  dest.clear();
+  if (path.isTriviallyEmpty())
+    return;
+
+  path.toVector(dest);
+  expandTildeExpr(dest);
+
+  return;
+}
+
 std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest,
                           bool expand_tilde) {
   dest.clear();
@@ -1264,10 +1302,6 @@ static bool getKnownFolderPath(KNOWNFOLDERID folderId,
   return ok;
 }
 
-bool getUserCacheDir(SmallVectorImpl<char> &Result) {
-  return getKnownFolderPath(FOLDERID_LocalAppData, Result);
-}
-
 bool home_directory(SmallVectorImpl<char> &result) {
   return getKnownFolderPath(FOLDERID_Profile, result);
 }
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
index 30126568769c..2b2d79231434 100644
--- a/contrib/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/WindowsError.h"
 #include <malloc.h>
 
@@ -140,73 +142,59 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
   return std::string(Res.data());
 }
 
-static const char *AllocateString(const SmallVectorImpl<char> &S,
-                                  BumpPtrAllocator &Alloc) {
-  char *Buf = reinterpret_cast<char *>(Alloc.Allocate(S.size() + 1, 1));
-  ::memcpy(Buf, S.data(), S.size());
-  Buf[S.size()] = '\0';
-  return Buf;
-}
-
-/// Convert Arg from UTF-16 to UTF-8 and push it onto Args.
-static std::error_code ConvertAndPushArg(const wchar_t *Arg,
-                                         SmallVectorImpl<const char *> &Args,
-                                         BumpPtrAllocator &Alloc) {
-  SmallVector<char, MAX_PATH> ArgString;
-  if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), ArgString))
-    return ec;
-  Args.push_back(AllocateString(ArgString, Alloc));
-  return std::error_code();
-}
-
-/// Perform wildcard expansion of Arg, or just push it into Args if it
-/// doesn't have wildcards or doesn't match any files.
-static std::error_code WildcardExpand(const wchar_t *Arg,
+/// Perform wildcard expansion of Arg, or just push it into Args if it doesn't
+/// have wildcards or doesn't match any files.
+static std::error_code WildcardExpand(StringRef Arg,
                                       SmallVectorImpl<const char *> &Args,
-                                      BumpPtrAllocator &Alloc) {
-  if (!wcspbrk(Arg, L"*?")) {
-    // Arg does not contain any wildcard characters. This is the common case.
-    return ConvertAndPushArg(Arg, Args, Alloc);
-  }
+                                      StringSaver &Saver) {
+  std::error_code EC;
 
-  if (wcscmp(Arg, L"/?") == 0 || wcscmp(Arg, L"-?") == 0) {
-    // Don't wildcard expand /?. Always treat it as an option.
-    return ConvertAndPushArg(Arg, Args, Alloc);
+  // Don't expand Arg if it does not contain any wildcard characters. This is
+  // the common case. Also don't wildcard expand /?. Always treat it as an
+  // option.
+  if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" ||
+      Arg == "-?") {
+    Args.push_back(Arg.data());
+    return EC;
   }
 
-  // Extract any directory part of the argument.
-  SmallVector<char, MAX_PATH> Dir;
-  if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), Dir))
-    return ec;
-  sys::path::remove_filename(Dir);
-  const int DirSize = Dir.size();
+  // Convert back to UTF-16 so we can call FindFirstFileW.
+  SmallVector<wchar_t, MAX_PATH> ArgW;
+  EC = windows::UTF8ToUTF16(Arg, ArgW);
+  if (EC)
+    return EC;
 
   // Search for matching files.
   // FIXME:  This assumes the wildcard is only in the file name and not in the
   // directory portion of the file path.  For example, it doesn't handle
   // "*\foo.c" nor "s?c\bar.cpp".
   WIN32_FIND_DATAW FileData;
-  HANDLE FindHandle = FindFirstFileW(Arg, &FileData);
+  HANDLE FindHandle = FindFirstFileW(ArgW.data(), &FileData);
   if (FindHandle == INVALID_HANDLE_VALUE) {
-    return ConvertAndPushArg(Arg, Args, Alloc);
+    Args.push_back(Arg.data());
+    return EC;
   }
 
-  std::error_code ec;
+  // Extract any directory part of the argument.
+  SmallString<MAX_PATH> Dir = Arg;
+  sys::path::remove_filename(Dir);
+  const int DirSize = Dir.size();
+
   do {
-    SmallVector<char, MAX_PATH> FileName;
-    ec = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
+    SmallString<MAX_PATH> FileName;
+    EC = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
                               FileName);
-    if (ec)
+    if (EC)
       break;
 
     // Append FileName to Dir, and remove it afterwards.
-    llvm::sys::path::append(Dir, StringRef(FileName.data(), FileName.size()));
-    Args.push_back(AllocateString(Dir, Alloc));
+    llvm::sys::path::append(Dir, FileName);
+    Args.push_back(Saver.save(StringRef(Dir)).data());
     Dir.resize(DirSize);
   } while (FindNextFileW(FindHandle, &FileData));
 
   FindClose(FindHandle);
-  return ec;
+  return EC;
 }
 
 static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
@@ -243,18 +231,20 @@ static std::error_code GetExecutableName(SmallVectorImpl<char> &Filename) {
 std::error_code
 windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
                                  BumpPtrAllocator &Alloc) {
-  int ArgCount;
-  std::unique_ptr<wchar_t *[], decltype(&LocalFree)> UnicodeCommandLine{
-    CommandLineToArgvW(GetCommandLineW(), &ArgCount), &LocalFree};
-  if (!UnicodeCommandLine)
-    return mapWindowsError(::GetLastError());
-
+  const wchar_t *CmdW = GetCommandLineW();
+  assert(CmdW);
   std::error_code EC;
+  SmallString<MAX_PATH> Cmd;
+  EC = windows::UTF16ToUTF8(CmdW, wcslen(CmdW), Cmd);
+  if (EC)
+    return EC;
 
-  Args.reserve(ArgCount);
+  SmallVector<const char *, 20> TmpArgs;
+  StringSaver Saver(Alloc);
+  cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
 
-  for (int I = 0; I < ArgCount; ++I) {
-    EC = WildcardExpand(UnicodeCommandLine[I], Args, Alloc);
+  for (const char *Arg : TmpArgs) {
+    EC = WildcardExpand(Arg, Args, Saver);
     if (EC)
       return EC;
   }
@@ -266,7 +256,7 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
   if (EC)
     return EC;
   sys::path::append(Arg0, Filename);
-  Args[0] = AllocateString(Arg0, Alloc);
+  Args[0] = Saver.save(Arg0).data();
   return std::error_code();
 }
 
@@ -328,6 +318,15 @@ bool Process::StandardErrHasColors() {
 
 static bool UseANSI = false;
 void Process::UseANSIEscapeCodes(bool enable) {
+#if defined(ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+  if (enable) {
+    HANDLE Console = GetStdHandle(STD_OUTPUT_HANDLE);
+    DWORD Mode;
+    GetConsoleMode(Console, &Mode);
+    Mode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
+    SetConsoleMode(Console, Mode);
+  }
+#endif
   UseANSI = enable;
 }
 
@@ -461,3 +460,27 @@ unsigned Process::GetRandomNumber() {
     ReportLastErrorFatal("Could not generate a random number");
   return Ret;
 }
+
+typedef NTSTATUS(WINAPI* RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
+#define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
+
+llvm::VersionTuple llvm::GetWindowsOSVersion() {
+  HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
+  if (hMod) {
+    auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+    if (getVer) {
+      RTL_OSVERSIONINFOEXW info{};
+      info.dwOSVersionInfoSize = sizeof(info);
+      if (getVer((PRTL_OSVERSIONINFOW)&info) == STATUS_SUCCESS) {
+        return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0,
+                                  info.dwBuildNumber);
+      }
+    }
+  }
+  return llvm::VersionTuple(0, 0, 0, 0);
+}
+
+bool llvm::RunningWindows8OrGreater() {
+  // Windows 8 is version 6.2, service pack 0.
+  return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0);
+}
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index cb68c5b10e52..c037956603f2 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -105,6 +105,25 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
   return std::string(U8Result.begin(), U8Result.end());
 }
 
+bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix) {
+  if (!ErrMsg)
+    return true;
+  char *buffer = NULL;
+  DWORD LastError = GetLastError();
+  DWORD R = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                               FORMAT_MESSAGE_FROM_SYSTEM |
+                               FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                           NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
+  if (R)
+    *ErrMsg = prefix + ": " + buffer;
+  else
+    *ErrMsg = prefix + ": Unknown error";
+  *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
+
+  LocalFree(buffer);
+  return R != 0;
+}
+
 static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
                          std::string *ErrMsg) {
   HANDLE h;
@@ -317,7 +336,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
 static bool argNeedsQuotes(StringRef Arg) {
   if (Arg.empty())
     return true;
-  return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|");
+  return StringRef::npos != Arg.find_first_of("\t \"&\'()*<>\\`^|\n");
 }
 
 static std::string quoteSingleArg(StringRef Arg) {
diff --git a/contrib/llvm/lib/Support/Windows/Threading.inc b/contrib/llvm/lib/Support/Windows/Threading.inc
index decb48887af2..0bd92f66c6b8 100644
--- a/contrib/llvm/lib/Support/Windows/Threading.inc
+++ b/contrib/llvm/lib/Support/Windows/Threading.inc
@@ -14,7 +14,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 
-#include "Windows/WindowsSupport.h"
+#include "WindowsSupport.h"
 #include <process.h>
 
 // Windows will at times define MemoryFence.
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index c2fd6bb982d4..979cc5d01390 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -41,6 +41,7 @@
 #include "llvm/Config/config.h" // Get build system configuration settings
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/VersionTuple.h"
 #include <cassert>
 #include <string>
 #include <system_error>
@@ -49,54 +50,29 @@
 // Must be included after windows.h
 #include <wincrypt.h>
 
+namespace llvm {
+
 /// Determines if the program is running on Windows 8 or newer. This
 /// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
 /// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
 /// yet have VersionHelpers.h, so we have our own helper.
-inline bool RunningWindows8OrGreater() {
-  // Windows 8 is version 6.2, service pack 0.
-  OSVERSIONINFOEXW osvi = {};
-  osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-  osvi.dwMajorVersion = 6;
-  osvi.dwMinorVersion = 2;
-  osvi.wServicePackMajor = 0;
-
-  DWORDLONG Mask = 0;
-  Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
-  Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
-  Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
-
-  return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
-                                       VER_SERVICEPACKMAJOR,
-                            Mask) != FALSE;
-}
+bool RunningWindows8OrGreater();
 
-inline bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix) {
-  if (!ErrMsg)
-    return true;
-  char *buffer = NULL;
-  DWORD LastError = GetLastError();
-  DWORD R = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
-                               FORMAT_MESSAGE_FROM_SYSTEM |
-                               FORMAT_MESSAGE_MAX_WIDTH_MASK,
-                           NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
-  if (R)
-    *ErrMsg = prefix + ": " + buffer;
-  else
-    *ErrMsg = prefix + ": Unknown error";
-  *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
-
-  LocalFree(buffer);
-  return R != 0;
-}
+/// Returns the Windows version as Major.Minor.0.BuildNumber. Uses
+/// RtlGetVersion or GetVersionEx under the hood depending on what is available.
+/// GetVersionEx is deprecated, but this API exposes the build number which can
+/// be useful for working around certain kernel bugs.
+llvm::VersionTuple GetWindowsOSVersion();
+
+bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix);
 
 template <typename HandleTraits>
 class ScopedHandle {
   typedef typename HandleTraits::handle_type handle_type;
   handle_type Handle;
 
-  ScopedHandle(const ScopedHandle &other); // = delete;
-  void operator=(const ScopedHandle &other); // = delete;
+  ScopedHandle(const ScopedHandle &other) = delete;
+  void operator=(const ScopedHandle &other) = delete;
 public:
   ScopedHandle()
     : Handle(HandleTraits::GetInvalid()) {}
@@ -201,7 +177,6 @@ typedef ScopedHandle<RegTraits>          ScopedRegHandle;
 typedef ScopedHandle<FindHandleTraits>   ScopedFindHandle;
 typedef ScopedHandle<JobHandleTraits>    ScopedJobHandle;
 
-namespace llvm {
 template <class T>
 class SmallVectorImpl;
 
diff --git a/contrib/llvm/lib/Support/WithColor.cpp b/contrib/llvm/lib/Support/WithColor.cpp
index d2e13f0e86de..cf4c10956f21 100644
--- a/contrib/llvm/lib/Support/WithColor.cpp
+++ b/contrib/llvm/lib/Support/WithColor.cpp
@@ -19,15 +19,10 @@ static cl::opt<cl::boolOrDefault>
              cl::desc("Use colors in output (default=autodetect)"),
              cl::init(cl::BOU_UNSET));
 
-bool WithColor::colorsEnabled(raw_ostream &OS) {
-  if (UseColor == cl::BOU_UNSET)
-    return OS.has_colors();
-  return UseColor == cl::BOU_TRUE;
-}
-
-WithColor::WithColor(raw_ostream &OS, HighlightColor Color) : OS(OS) {
+WithColor::WithColor(raw_ostream &OS, HighlightColor Color, bool DisableColors)
+    : OS(OS), DisableColors(DisableColors) {
   // Detect color from terminal type unless the user passed the --color option.
-  if (colorsEnabled(OS)) {
+  if (colorsEnabled()) {
     switch (Color) {
     case HighlightColor::Address:
       OS.changeColor(raw_ostream::YELLOW);
@@ -56,6 +51,9 @@ WithColor::WithColor(raw_ostream &OS, HighlightColor Color) : OS(OS) {
     case HighlightColor::Note:
       OS.changeColor(raw_ostream::BLACK, true);
       break;
+    case HighlightColor::Remark:
+      OS.changeColor(raw_ostream::BLUE, true);
+      break;
     }
   }
 }
@@ -66,25 +64,58 @@ raw_ostream &WithColor::warning() { return warning(errs()); }
 
 raw_ostream &WithColor::note() { return note(errs()); }
 
-raw_ostream &WithColor::error(raw_ostream &OS, StringRef Prefix) {
+raw_ostream &WithColor::remark() { return remark(errs()); }
+
+raw_ostream &WithColor::error(raw_ostream &OS, StringRef Prefix,
+                              bool DisableColors) {
   if (!Prefix.empty())
     OS << Prefix << ": ";
-  return WithColor(OS, HighlightColor::Error).get() << "error: ";
+  return WithColor(OS, HighlightColor::Error, DisableColors).get()
+         << "error: ";
 }
 
-raw_ostream &WithColor::warning(raw_ostream &OS, StringRef Prefix) {
+raw_ostream &WithColor::warning(raw_ostream &OS, StringRef Prefix,
+                                bool DisableColors) {
   if (!Prefix.empty())
     OS << Prefix << ": ";
-  return WithColor(OS, HighlightColor::Warning).get() << "warning: ";
+  return WithColor(OS, HighlightColor::Warning, DisableColors).get()
+         << "warning: ";
 }
 
-raw_ostream &WithColor::note(raw_ostream &OS, StringRef Prefix) {
+raw_ostream &WithColor::note(raw_ostream &OS, StringRef Prefix,
+                             bool DisableColors) {
   if (!Prefix.empty())
     OS << Prefix << ": ";
-  return WithColor(OS, HighlightColor::Note).get() << "note: ";
+  return WithColor(OS, HighlightColor::Note, DisableColors).get() << "note: ";
 }
 
-WithColor::~WithColor() {
-  if (colorsEnabled(OS))
+raw_ostream &WithColor::remark(raw_ostream &OS, StringRef Prefix,
+                               bool DisableColors) {
+  if (!Prefix.empty())
+    OS << Prefix << ": ";
+  return WithColor(OS, HighlightColor::Remark, DisableColors).get()
+         << "remark: ";
+}
+
+bool WithColor::colorsEnabled() {
+  if (DisableColors)
+    return false;
+  if (UseColor == cl::BOU_UNSET)
+    return OS.has_colors();
+  return UseColor == cl::BOU_TRUE;
+}
+
+WithColor &WithColor::changeColor(raw_ostream::Colors Color, bool Bold,
+                                  bool BG) {
+  if (colorsEnabled())
+    OS.changeColor(Color, Bold, BG);
+  return *this;
+}
+
+WithColor &WithColor::resetColor() {
+  if (colorsEnabled())
     OS.resetColor();
+  return *this;
 }
+
+WithColor::~WithColor() { resetColor(); }
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
index d6345efd00cd..b9bbee7883c6 100644
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -98,7 +98,7 @@ bool Input::setCurrentDocument() {
       ++DocIterator;
       return setCurrentDocument();
     }
-    TopNode = this->createHNodes(N);
+    TopNode = createHNodes(N);
     CurrentNode = TopNode.get();
     return true;
   }
@@ -341,9 +341,23 @@ void Input::scalarString(StringRef &S, QuotingType) {
 
 void Input::blockScalarString(StringRef &S) { scalarString(S, QuotingType::None); }
 
+void Input::scalarTag(std::string &Tag) {
+  Tag = CurrentNode->_node->getVerbatimTag();
+}
+
 void Input::setError(HNode *hnode, const Twine &message) {
   assert(hnode && "HNode must not be NULL");
-  this->setError(hnode->_node, message);
+  setError(hnode->_node, message);
+}
+
+NodeKind Input::getNodeKind() {
+  if (isa<ScalarHNode>(CurrentNode))
+    return NodeKind::Scalar;
+  else if (isa<MapHNode>(CurrentNode))
+    return NodeKind::Map;
+  else if (isa<SequenceHNode>(CurrentNode))
+    return NodeKind::Sequence;
+  llvm_unreachable("Unsupported node kind");
 }
 
 void Input::setError(Node *node, const Twine &message) {
@@ -366,7 +380,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
   } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
     auto SQHNode = llvm::make_unique<SequenceHNode>(N);
     for (Node &SN : *SQ) {
-      auto Entry = this->createHNodes(&SN);
+      auto Entry = createHNodes(&SN);
       if (EC)
         break;
       SQHNode->Entries.push_back(std::move(Entry));
@@ -391,7 +405,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
         // Copy string to permanent storage
         KeyStr = StringStorage.str().copy(StringAllocator);
       }
-      auto ValueHNode = this->createHNodes(Value);
+      auto ValueHNode = createHNodes(Value);
       if (EC)
         break;
       mapHNode->Mapping[KeyStr] = std::move(ValueHNode);
@@ -406,7 +420,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
 }
 
 void Input::setError(const Twine &Message) {
-  this->setError(CurrentNode, Message);
+  setError(CurrentNode, Message);
 }
 
 bool Input::canElideEmptySequence() {
@@ -436,15 +450,17 @@ bool Output::mapTag(StringRef Tag, bool Use) {
     // If this tag is being written inside a sequence we should write the start
     // of the sequence before writing the tag, otherwise the tag won't be
     // attached to the element in the sequence, but rather the sequence itself.
-    bool SequenceElement =
-        StateStack.size() > 1 && (StateStack[StateStack.size() - 2] == inSeq ||
-          StateStack[StateStack.size() - 2] == inFlowSeq);
+    bool SequenceElement = false;
+    if (StateStack.size() > 1) {
+      auto &E = StateStack[StateStack.size() - 2];
+      SequenceElement = inSeqAnyElement(E) || inFlowSeqAnyElement(E);
+    }
     if (SequenceElement && StateStack.back() == inMapFirstKey) {
-      this->newLineCheck();
+      newLineCheck();
     } else {
-      this->output(" ");
+      output(" ");
     }
-    this->output(Tag);
+    output(Tag);
     if (SequenceElement) {
       // If we're writing the tag during the first element of a map, the tag
       // takes the place of the first element in the sequence.
@@ -461,6 +477,9 @@ bool Output::mapTag(StringRef Tag, bool Use) {
 }
 
 void Output::endMapping() {
+  // If we did not map anything, we should explicitly emit an empty map
+  if (StateStack.back() == inMapFirstKey)
+    output("{}");
   StateStack.pop_back();
 }
 
@@ -476,8 +495,8 @@ bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
     if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) {
       flowKey(Key);
     } else {
-      this->newLineCheck();
-      this->paddedKey(Key);
+      newLineCheck();
+      paddedKey(Key);
     }
     return true;
   }
@@ -496,23 +515,23 @@ void Output::postflightKey(void *) {
 
 void Output::beginFlowMapping() {
   StateStack.push_back(inFlowMapFirstKey);
-  this->newLineCheck();
+  newLineCheck();
   ColumnAtMapFlowStart = Column;
   output("{ ");
 }
 
 void Output::endFlowMapping() {
   StateStack.pop_back();
-  this->outputUpToEndOfLine(" }");
+  outputUpToEndOfLine(" }");
 }
 
 void Output::beginDocuments() {
-  this->outputUpToEndOfLine("---");
+  outputUpToEndOfLine("---");
 }
 
 bool Output::preflightDocument(unsigned index) {
   if (index > 0)
-    this->outputUpToEndOfLine("\n---");
+    outputUpToEndOfLine("\n---");
   return true;
 }
 
@@ -524,12 +543,15 @@ void Output::endDocuments() {
 }
 
 unsigned Output::beginSequence() {
-  StateStack.push_back(inSeq);
+  StateStack.push_back(inSeqFirstElement);
   NeedsNewLine = true;
   return 0;
 }
 
 void Output::endSequence() {
+  // If we did not emit anything, we should explicitly emit an empty sequence
+  if (StateStack.back() == inSeqFirstElement)
+    output("[]");
   StateStack.pop_back();
 }
 
@@ -538,11 +560,18 @@ bool Output::preflightElement(unsigned, void *&) {
 }
 
 void Output::postflightElement(void *) {
+  if (StateStack.back() == inSeqFirstElement) {
+    StateStack.pop_back();
+    StateStack.push_back(inSeqOtherElement);
+  } else if (StateStack.back() == inFlowSeqFirstElement) {
+    StateStack.pop_back();
+    StateStack.push_back(inFlowSeqOtherElement);
+  }
 }
 
 unsigned Output::beginFlowSequence() {
-  StateStack.push_back(inFlowSeq);
-  this->newLineCheck();
+  StateStack.push_back(inFlowSeqFirstElement);
+  newLineCheck();
   ColumnAtFlowStart = Column;
   output("[ ");
   NeedFlowSequenceComma = false;
@@ -551,7 +580,7 @@ unsigned Output::beginFlowSequence() {
 
 void Output::endFlowSequence() {
   StateStack.pop_back();
-  this->outputUpToEndOfLine(" ]");
+  outputUpToEndOfLine(" ]");
 }
 
 bool Output::preflightFlowElement(unsigned, void *&) {
@@ -577,8 +606,8 @@ void Output::beginEnumScalar() {
 
 bool Output::matchEnumScalar(const char *Str, bool Match) {
   if (Match && !EnumerationMatchFound) {
-    this->newLineCheck();
-    this->outputUpToEndOfLine(Str);
+    newLineCheck();
+    outputUpToEndOfLine(Str);
     EnumerationMatchFound = true;
   }
   return false;
@@ -597,7 +626,7 @@ void Output::endEnumScalar() {
 }
 
 bool Output::beginBitSetScalar(bool &DoClear) {
-  this->newLineCheck();
+  newLineCheck();
   output("[ ");
   NeedBitValueComma = false;
   DoClear = false;
@@ -608,27 +637,27 @@ bool Output::bitSetMatch(const char *Str, bool Matches) {
   if (Matches) {
     if (NeedBitValueComma)
       output(", ");
-    this->output(Str);
+    output(Str);
     NeedBitValueComma = true;
   }
   return false;
 }
 
 void Output::endBitSetScalar() {
-  this->outputUpToEndOfLine(" ]");
+  outputUpToEndOfLine(" ]");
 }
 
 void Output::scalarString(StringRef &S, QuotingType MustQuote) {
-  this->newLineCheck();
+  newLineCheck();
   if (S.empty()) {
     // Print '' for the empty string because leaving the field empty is not
     // allowed.
-    this->outputUpToEndOfLine("''");
+    outputUpToEndOfLine("''");
     return;
   }
   if (MustQuote == QuotingType::None) {
     // Only quote if we must.
-    this->outputUpToEndOfLine(S);
+    outputUpToEndOfLine(S);
     return;
   }
 
@@ -645,7 +674,7 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
   // escapes. This is handled in yaml::escape.
   if (MustQuote == QuotingType::Double) {
     output(yaml::escape(Base, /* EscapePrintable= */ false));
-    this->outputUpToEndOfLine(Quote);
+    outputUpToEndOfLine(Quote);
     return;
   }
 
@@ -659,7 +688,7 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
     ++j;
   }
   output(StringRef(&Base[i], j - i));
-  this->outputUpToEndOfLine(Quote); // Ending quote.
+  outputUpToEndOfLine(Quote); // Ending quote.
 }
 
 void Output::blockScalarString(StringRef &S) {
@@ -680,6 +709,14 @@ void Output::blockScalarString(StringRef &S) {
   }
 }
 
+void Output::scalarTag(std::string &Tag) {
+  if (Tag.empty())
+    return;
+  newLineCheck();
+  output(Tag);
+  output(" ");
+}
+
 void Output::setError(const Twine &message) {
 }
 
@@ -693,7 +730,7 @@ bool Output::canElideEmptySequence() {
     return true;
   if (StateStack.back() != inMapFirstKey)
     return true;
-  return (StateStack[StateStack.size()-2] != inSeq);
+  return !inSeqAnyElement(StateStack[StateStack.size() - 2]);
 }
 
 void Output::output(StringRef s) {
@@ -702,10 +739,9 @@ void Output::output(StringRef s) {
 }
 
 void Output::outputUpToEndOfLine(StringRef s) {
-  this->output(s);
-  if (StateStack.empty() || (StateStack.back() != inFlowSeq &&
-                             StateStack.back() != inFlowMapFirstKey &&
-                             StateStack.back() != inFlowMapOtherKey))
+  output(s);
+  if (StateStack.empty() || (!inFlowSeqAnyElement(StateStack.back()) &&
+                             !inFlowMapAnyKey(StateStack.back())))
     NeedsNewLine = true;
 }
 
@@ -723,18 +759,22 @@ void Output::newLineCheck() {
     return;
   NeedsNewLine = false;
 
-  this->outputNewLine();
+  outputNewLine();
+
+  if (StateStack.size() == 0)
+    return;
 
-  assert(StateStack.size() > 0);
   unsigned Indent = StateStack.size() - 1;
   bool OutputDash = false;
 
-  if (StateStack.back() == inSeq) {
+  if (StateStack.back() == inSeqFirstElement ||
+      StateStack.back() == inSeqOtherElement) {
     OutputDash = true;
-  } else if ((StateStack.size() > 1) && ((StateStack.back() == inMapFirstKey) ||
-             (StateStack.back() == inFlowSeq) ||
-             (StateStack.back() == inFlowMapFirstKey)) &&
-             (StateStack[StateStack.size() - 2] == inSeq)) {
+  } else if ((StateStack.size() > 1) &&
+             ((StateStack.back() == inMapFirstKey) ||
+              inFlowSeqAnyElement(StateStack.back()) ||
+              (StateStack.back() == inFlowMapFirstKey)) &&
+             inSeqAnyElement(StateStack[StateStack.size() - 2])) {
     --Indent;
     OutputDash = true;
   }
@@ -772,6 +812,24 @@ void Output::flowKey(StringRef Key) {
   output(": ");
 }
 
+NodeKind Output::getNodeKind() { report_fatal_error("invalid call"); }
+
+bool Output::inSeqAnyElement(InState State) {
+  return State == inSeqFirstElement || State == inSeqOtherElement;
+}
+
+bool Output::inFlowSeqAnyElement(InState State) {
+  return State == inFlowSeqFirstElement || State == inFlowSeqOtherElement;
+}
+
+bool Output::inMapAnyKey(InState State) {
+  return State == inMapFirstKey || State == inMapOtherKey;
+}
+
+bool Output::inFlowMapAnyKey(InState State) {
+  return State == inFlowMapFirstKey || State == inFlowMapOtherKey;
+}
+
 //===----------------------------------------------------------------------===//
 //  traits for built-in types
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 038ad00bd608..21dde7ff914a 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -60,6 +60,7 @@
 #endif
 
 #ifdef _WIN32
+#include "llvm/Support/ConvertUTF.h"
 #include "Windows/WindowsSupport.h"
 #endif
 
@@ -567,6 +568,12 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
   if (FD <= STDERR_FILENO)
     ShouldClose = false;
 
+#ifdef _WIN32
+  // Check if this is a console device. This is not equivalent to isatty.
+  IsWindowsConsole =
+      ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
+#endif
+
   // Get the starting position.
   off_t loc = ::lseek(FD, 0, SEEK_CUR);
 #ifdef _WIN32
@@ -609,25 +616,77 @@ raw_fd_ostream::~raw_fd_ostream() {
                        /*GenCrashDiag=*/false);
 }
 
+#if defined(_WIN32)
+// The most reliable way to print unicode in a Windows console is with
+// WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This
+// assumes that LLVM programs always print valid UTF-8 to the console. The data
+// might not be UTF-8 for two major reasons:
+// 1. The program is printing binary (-filetype=obj -o -), in which case it
+// would have been gibberish anyway.
+// 2. The program is printing text in a semi-ascii compatible codepage like
+// shift-jis or cp1252.
+//
+// Most LLVM programs don't produce non-ascii text unless they are quoting
+// user source input. A well-behaved LLVM program should either validate that
+// the input is UTF-8 or transcode from the local codepage to UTF-8 before
+// quoting it. If they don't, this may mess up the encoding, but this is still
+// probably the best compromise we can make.
+static bool write_console_impl(int FD, StringRef Data) {
+  SmallVector<wchar_t, 256> WideText;
+
+  // Fall back to ::write if it wasn't valid UTF-8.
+  if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText))
+    return false;
+
+  // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data
+  // that can be written to the console at a time.
+  size_t MaxWriteSize = WideText.size();
+  if (!RunningWindows8OrGreater())
+    MaxWriteSize = 32767;
+
+  size_t WCharsWritten = 0;
+  do {
+    size_t WCharsToWrite =
+        std::min(MaxWriteSize, WideText.size() - WCharsWritten);
+    DWORD ActuallyWritten;
+    bool Success =
+        ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten],
+                        WCharsToWrite, &ActuallyWritten,
+                        /*Reserved=*/nullptr);
+
+    // The most likely reason for WriteConsoleW to fail is that FD no longer
+    // points to a console. Fall back to ::write. If this isn't the first loop
+    // iteration, something is truly wrong.
+    if (!Success)
+      return false;
+
+    WCharsWritten += ActuallyWritten;
+  } while (WCharsWritten != WideText.size());
+  return true;
+}
+#endif
+
 void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   assert(FD >= 0 && "File already closed.");
   pos += Size;
 
-  // The maximum write size is limited to SSIZE_MAX because a write
-  // greater than SSIZE_MAX is implementation-defined in POSIX.
-  // Since SSIZE_MAX is not portable, we use SIZE_MAX >> 1 instead.
-  size_t MaxWriteSize = SIZE_MAX >> 1;
+#if defined(_WIN32)
+  // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16
+  // and using WriteConsoleW. If that fails, fall back to plain write().
+  if (IsWindowsConsole)
+    if (write_console_impl(FD, StringRef(Ptr, Size)))
+      return;
+#endif
+
+  // The maximum write size is limited to INT32_MAX. A write
+  // greater than SSIZE_MAX is implementation-defined in POSIX,
+  // and Windows _write requires 32 bit input.
+  size_t MaxWriteSize = INT32_MAX;
 
 #if defined(__linux__)
   // It is observed that Linux returns EINVAL for a very large write (>2G).
   // Make it a reasonably small value.
   MaxWriteSize = 1024 * 1024 * 1024;
-#elif defined(_WIN32)
-  // Writing a large size of output to Windows console returns ENOMEM. It seems
-  // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
-  // the latter has a size limit (66000 bytes or less, depending on heap usage).
-  if (::_isatty(FD) && !RunningWindows8OrGreater())
-    MaxWriteSize = 32767;
 #endif
 
   do {
@@ -696,8 +755,17 @@ void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
 }
 
 size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
-  // Windows and Minix have no st_blksize.
+#if defined(_WIN32)
+  // Disable buffering for console devices. Console output is re-encoded from
+  // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the
+  // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled
+  // below on most other OSs, so do the same thing on Windows and avoid that
+  // complexity.
+  if (IsWindowsConsole)
+    return 0;
+  return raw_ostream::preferred_buffer_size();
+#elif !defined(__minix)
+  // Minix has no st_blksize.
   assert(FD >= 0 && "File not yet open!");
   struct stat statbuf;
   if (fstat(FD, &statbuf) != 0)
@@ -846,3 +914,5 @@ void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,
                                    uint64_t Offset) {}
 
 void raw_pwrite_stream::anchor() {}
+
+void buffer_ostream::anchor() {}
diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp
index 3a0701626089..02698416609f 100644
--- a/contrib/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm/lib/TableGen/Main.cpp
@@ -46,6 +46,10 @@ static cl::list<std::string>
 IncludeDirs("I", cl::desc("Directory of include files"),
             cl::value_desc("directory"), cl::Prefix);
 
+static cl::list<std::string>
+MacroNames("D", cl::desc("Name of the macro to be defined"),
+            cl::value_desc("macro name"), cl::Prefix);
+
 static int reportError(const char *ProgName, Twine Msg) {
   errs() << ProgName << ": " << Msg;
   errs().flush();
@@ -91,28 +95,44 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
 
-  TGParser Parser(SrcMgr, Records);
+  TGParser Parser(SrcMgr, MacroNames, Records);
 
   if (Parser.ParseFile())
     return 1;
 
-  std::error_code EC;
-  ToolOutputFile Out(OutputFilename, EC, sys::fs::F_Text);
-  if (EC)
-    return reportError(argv0, "error opening " + OutputFilename + ":" +
-                                  EC.message() + "\n");
+  // Write output to memory.
+  std::string OutString;
+  raw_string_ostream Out(OutString);
+  if (MainFn(Out, Records))
+    return 1;
+
+  // Always write the depfile, even if the main output hasn't changed.
+  // If it's missing, Ninja considers the output dirty.  If this was below
+  // the early exit below and someone deleted the .inc.d file but not the .inc
+  // file, tablegen would never write the depfile.
   if (!DependFilename.empty()) {
     if (int Ret = createDependencyFile(Parser, argv0))
       return Ret;
   }
 
-  if (MainFn(Out.os(), Records))
-    return 1;
+  // Only updates the real output file if there are any differences.
+  // This prevents recompilation of all the files depending on it if there
+  // aren't any.
+  if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename))
+    if (std::move(ExistingOrErr.get())->getBuffer() == Out.str())
+      return 0;
+
+  std::error_code EC;
+  ToolOutputFile OutFile(OutputFilename, EC, sys::fs::F_Text);
+  if (EC)
+    return reportError(argv0, "error opening " + OutputFilename + ":" +
+                                  EC.message() + "\n");
+  OutFile.os() << Out.str();
 
   if (ErrorsPrinted > 0)
     return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
 
   // Declare success.
-  Out.keep();
+  OutFile.keep();
   return 0;
 }
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
index 43d178caef30..cf1685a2e8c2 100644
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -158,10 +158,9 @@ RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
 
   SmallVector<Record *, 4> Classes(UnsortedClasses.begin(),
                                    UnsortedClasses.end());
-  llvm::sort(Classes.begin(), Classes.end(),
-             [](Record *LHS, Record *RHS) {
-               return LHS->getNameInitAsString() < RHS->getNameInitAsString();
-             });
+  llvm::sort(Classes, [](Record *LHS, Record *RHS) {
+    return LHS->getNameInitAsString() < RHS->getNameInitAsString();
+  });
 
   FoldingSetNodeID ID;
   ProfileRecordRecTy(ID, Classes);
@@ -487,7 +486,7 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const {
 
     SmallVector<Init *, 16> NewBits(BRT->getNumBits());
     for (unsigned i = 0; i != BRT->getNumBits(); ++i)
-      NewBits[i] = BitInit::get(Value & (1LL << i));
+      NewBits[i] = BitInit::get(Value & ((i < 64) ? (1LL << i) : 0));
 
     return BitsInit::get(NewBits);
   }
@@ -710,6 +709,8 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
         return StringInit::get(LHSi->getAsString());
     } else if (isa<RecordRecTy>(getType())) {
       if (StringInit *Name = dyn_cast<StringInit>(LHS)) {
+        if (!CurRec && !IsFinal)
+          break;
         assert(CurRec && "NULL pointer");
         Record *D;
 
diff --git a/contrib/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm/lib/TableGen/TGLexer.cpp
index 652be6e8dbbf..16aeee561075 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm/lib/TableGen/TGLexer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"
+#include <algorithm>
 #include <cctype>
 #include <cerrno>
 #include <cstdint>
@@ -28,11 +29,35 @@
 
 using namespace llvm;
 
-TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
+namespace {
+// A list of supported preprocessing directives with their
+// internal token kinds and names.
+struct {
+  tgtok::TokKind Kind;
+  const char *Word;
+} PreprocessorDirs[] = {
+  { tgtok::Ifdef, "ifdef" },
+  { tgtok::Else, "else" },
+  { tgtok::Endif, "endif" },
+  { tgtok::Define, "define" }
+};
+} // end anonymous namespace
+
+TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
   CurBuffer = SrcMgr.getMainFileID();
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
   CurPtr = CurBuf.begin();
   TokStart = nullptr;
+
+  // Pretend that we enter the "top-level" include file.
+  PrepIncludeStack.push_back(
+      make_unique<std::vector<PreprocessorControlDesc>>());
+
+  // Put all macros defined in the command line into the DefinedMacros set.
+  std::for_each(Macros.begin(), Macros.end(),
+                [this](const std::string &MacroName) {
+                  DefinedMacros.insert(MacroName);
+                });
 }
 
 SMLoc TGLexer::getLoc() const {
@@ -41,11 +66,42 @@ SMLoc TGLexer::getLoc() const {
 
 /// ReturnError - Set the error to the specified string at the specified
 /// location.  This is defined to always return tgtok::Error.
-tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
   PrintError(Loc, Msg);
   return tgtok::Error;
 }
 
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+  return ReturnError(SMLoc::getFromPointer(Loc), Msg);
+}
+
+bool TGLexer::processEOF() {
+  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+  if (ParentIncludeLoc != SMLoc()) {
+    // If prepExitInclude() detects a problem with the preprocessing
+    // control stack, it will return false.  Pretend that we reached
+    // the final EOF and stop lexing more tokens by returning false
+    // to LexToken().
+    if (!prepExitInclude(false))
+      return false;
+
+    CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+    CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
+    CurPtr = ParentIncludeLoc.getPointer();
+    // Make sure TokStart points into the parent file's buffer.
+    // LexToken() assigns to it before calling getNextChar(),
+    // so it is pointing into the included file now.
+    TokStart = CurPtr;
+    return true;
+  }
+
+  // Pretend that we exit the "top-level" include file.
+  // Note that in case of an error (e.g. control stack imbalance)
+  // the routine will issue a fatal error.
+  prepExitInclude(true);
+  return false;
+}
+
 int TGLexer::getNextChar() {
   char CurChar = *CurPtr++;
   switch (CurChar) {
@@ -57,16 +113,6 @@ int TGLexer::getNextChar() {
     if (CurPtr-1 != CurBuf.end())
       return 0;  // Just whitespace.
 
-    // If this is the end of an included file, pop the parent file off the
-    // include stack.
-    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
-    if (ParentIncludeLoc != SMLoc()) {
-      CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
-      CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
-      CurPtr = ParentIncludeLoc.getPointer();
-      return getNextChar();
-    }
-
     // Otherwise, return end of file.
     --CurPtr;  // Another call to lex will return EOF again.
     return EOF;
@@ -83,11 +129,11 @@ int TGLexer::getNextChar() {
   }
 }
 
-int TGLexer::peekNextChar(int Index) {
+int TGLexer::peekNextChar(int Index) const {
   return *(CurPtr + Index);
 }
 
-tgtok::TokKind TGLexer::LexToken() {
+tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
   TokStart = CurPtr;
   // This always consumes at least one character.
   int CurChar = getNextChar();
@@ -100,7 +146,18 @@ tgtok::TokKind TGLexer::LexToken() {
 
     // Unknown character, emit an error.
     return ReturnError(TokStart, "Unexpected character");
-  case EOF: return tgtok::Eof;
+  case EOF:
+    // Lex next token, if we just left an include file.
+    // Note that leaving an include file means that the next
+    // symbol is located at the end of 'include "..."'
+    // construct, so LexToken() is called with default
+    // false parameter.
+    if (processEOF())
+      return LexToken();
+
+    // Return EOF denoting the end of lexing.
+    return tgtok::Eof;
+
   case ':': return tgtok::colon;
   case ';': return tgtok::semi;
   case '.': return tgtok::period;
@@ -114,15 +171,27 @@ tgtok::TokKind TGLexer::LexToken() {
   case ')': return tgtok::r_paren;
   case '=': return tgtok::equal;
   case '?': return tgtok::question;
-  case '#': return tgtok::paste;
+  case '#':
+    if (FileOrLineStart) {
+      tgtok::TokKind Kind = prepIsDirective();
+      if (Kind != tgtok::Error)
+        return lexPreprocessor(Kind);
+    }
+
+    return tgtok::paste;
+
+  case '\r':
+    PrintFatalError("getNextChar() must never return '\r'");
+    return tgtok::Error;
 
   case 0:
   case ' ':
   case '\t':
-  case '\n':
-  case '\r':
     // Ignore whitespace.
-    return LexToken();
+    return LexToken(FileOrLineStart);
+  case '\n':
+    // Ignore whitespace, and identify the new line.
+    return LexToken(true);
   case '/':
     // If this is the start of a // comment, skip until the end of the line or
     // the end of the buffer.
@@ -133,7 +202,7 @@ tgtok::TokKind TGLexer::LexToken() {
         return tgtok::Error;
     } else // Otherwise, this is an error.
       return ReturnError(TokStart, "Unexpected character");
-    return LexToken();
+    return LexToken(FileOrLineStart);
   case '-': case '+':
   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
   case '7': case '8': case '9': {
@@ -249,10 +318,10 @@ tgtok::TokKind TGLexer::LexVarName() {
 }
 
 tgtok::TokKind TGLexer::LexIdentifier() {
-  // The first letter is [a-zA-Z_#].
+  // The first letter is [a-zA-Z_].
   const char *IdentStart = TokStart;
 
-  // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     ++CurPtr;
 
@@ -322,6 +391,9 @@ bool TGLexer::LexInclude() {
   // Save the line number and lex buffer of the includer.
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
   CurPtr = CurBuf.begin();
+
+  PrepIncludeStack.push_back(
+      make_unique<std::vector<PreprocessorControlDesc>>());
   return false;
 }
 
@@ -496,3 +568,444 @@ tgtok::TokKind TGLexer::LexExclaim() {
 
   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
 }
+
+bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
+  // Report an error, if preprocessor control stack for the current
+  // file is not empty.
+  if (!PrepIncludeStack.back()->empty()) {
+    prepReportPreprocessorStackError();
+
+    return false;
+  }
+
+  // Pop the preprocessing controls from the include stack.
+  if (PrepIncludeStack.empty()) {
+    PrintFatalError("Preprocessor include stack is empty");
+  }
+
+  PrepIncludeStack.pop_back();
+
+  if (IncludeStackMustBeEmpty) {
+    if (!PrepIncludeStack.empty())
+      PrintFatalError("Preprocessor include stack is not empty");
+  } else {
+    if (PrepIncludeStack.empty())
+      PrintFatalError("Preprocessor include stack is empty");
+  }
+
+  return true;
+}
+
+tgtok::TokKind TGLexer::prepIsDirective() const {
+  for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) {
+    int NextChar = *CurPtr;
+    bool Match = true;
+    unsigned I = 0;
+    for (; I < strlen(PreprocessorDirs[ID].Word); ++I) {
+      if (NextChar != PreprocessorDirs[ID].Word[I]) {
+        Match = false;
+        break;
+      }
+
+      NextChar = peekNextChar(I + 1);
+    }
+
+    // Check for whitespace after the directive.  If there is no whitespace,
+    // then we do not recognize it as a preprocessing directive.
+    if (Match) {
+      tgtok::TokKind Kind = PreprocessorDirs[ID].Kind;
+
+      // New line and EOF may follow only #else/#endif.  It will be reported
+      // as an error for #ifdef/#define after the call to prepLexMacroName().
+      if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
+          NextChar == '\n' ||
+          // It looks like TableGen does not support '\r' as the actual
+          // carriage return, e.g. getNextChar() treats a single '\r'
+          // as '\n'.  So we do the same here.
+          NextChar == '\r')
+        return Kind;
+
+      // Allow comments after some directives, e.g.:
+      //     #else// OR #else/**/
+      //     #endif// OR #endif/**/
+      //
+      // Note that we do allow comments after #ifdef/#define here, e.g.
+      //     #ifdef/**/ AND #ifdef//
+      //     #define/**/ AND #define//
+      //
+      // These cases will be reported as incorrect after calling
+      // prepLexMacroName().  We could have supported C-style comments
+      // after #ifdef/#define, but this would complicate the code
+      // for little benefit.
+      if (NextChar == '/') {
+        NextChar = peekNextChar(I + 1);
+
+        if (NextChar == '*' || NextChar == '/')
+          return Kind;
+
+        // Pretend that we do not recognize the directive.
+      }
+    }
+  }
+
+  return tgtok::Error;
+}
+
+bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
+  TokStart = CurPtr;
+
+  for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID)
+    if (PreprocessorDirs[ID].Kind == Kind) {
+      // Advance CurPtr to the end of the preprocessing word.
+      CurPtr += strlen(PreprocessorDirs[ID].Word);
+      return true;
+    }
+
+  PrintFatalError("Unsupported preprocessing token in "
+                  "prepEatPreprocessorDirective()");
+  return false;
+}
+
+tgtok::TokKind TGLexer::lexPreprocessor(
+    tgtok::TokKind Kind, bool ReturnNextLiveToken) {
+
+  // We must be looking at a preprocessing directive.  Eat it!
+  if (!prepEatPreprocessorDirective(Kind))
+    PrintFatalError("lexPreprocessor() called for unknown "
+                    "preprocessor directive");
+
+  if (Kind == tgtok::Ifdef) {
+    StringRef MacroName = prepLexMacroName();
+    if (MacroName.empty())
+      return ReturnError(TokStart, "Expected macro name after #ifdef");
+
+    bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
+
+    // Regardless of whether we are processing tokens or not,
+    // we put the #ifdef control on stack.
+    PrepIncludeStack.back()->push_back(
+        {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
+
+    if (!prepSkipDirectiveEnd())
+      return ReturnError(CurPtr,
+                         "Only comments are supported after #ifdef NAME");
+
+    // If we were not processing tokens before this #ifdef,
+    // then just return back to the lines skipping code.
+    if (!ReturnNextLiveToken)
+      return Kind;
+
+    // If we were processing tokens before this #ifdef,
+    // and the macro is defined, then just return the next token.
+    if (MacroIsDefined)
+      return LexToken();
+
+    // We were processing tokens before this #ifdef, and the macro
+    // is not defined, so we have to start skipping the lines.
+    // If the skipping is successful, it will return the token following
+    // either #else or #endif corresponding to this #ifdef.
+    if (prepSkipRegion(ReturnNextLiveToken))
+      return LexToken();
+
+    return tgtok::Error;
+  } else if (Kind == tgtok::Else) {
+    // Check if this #else is correct before calling prepSkipDirectiveEnd(),
+    // which will move CurPtr away from the beginning of #else.
+    if (PrepIncludeStack.back()->empty())
+      return ReturnError(TokStart, "#else without #ifdef");
+
+    PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
+
+    if (IfdefEntry.Kind != tgtok::Ifdef) {
+      PrintError(TokStart, "double #else");
+      return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
+    }
+
+    // Replace the corresponding #ifdef's control with its negation
+    // on the control stack.
+    PrepIncludeStack.back()->pop_back();
+    PrepIncludeStack.back()->push_back(
+        {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
+
+    if (!prepSkipDirectiveEnd())
+      return ReturnError(CurPtr, "Only comments are supported after #else");
+
+    // If we were processing tokens before this #else,
+    // we have to start skipping lines until the matching #endif.
+    if (ReturnNextLiveToken) {
+      if (prepSkipRegion(ReturnNextLiveToken))
+        return LexToken();
+
+      return tgtok::Error;
+    }
+
+    // Return to the lines skipping code.
+    return Kind;
+  } else if (Kind == tgtok::Endif) {
+    // Check if this #endif is correct before calling prepSkipDirectiveEnd(),
+    // which will move CurPtr away from the beginning of #endif.
+    if (PrepIncludeStack.back()->empty())
+      return ReturnError(TokStart, "#endif without #ifdef");
+
+    auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
+
+    if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
+        IfdefOrElseEntry.Kind != tgtok::Else) {
+      PrintFatalError("Invalid preprocessor control on the stack");
+      return tgtok::Error;
+    }
+
+    if (!prepSkipDirectiveEnd())
+      return ReturnError(CurPtr, "Only comments are supported after #endif");
+
+    PrepIncludeStack.back()->pop_back();
+
+    // If we were processing tokens before this #endif, then
+    // we should continue it.
+    if (ReturnNextLiveToken) {
+      return LexToken();
+    }
+
+    // Return to the lines skipping code.
+    return Kind;
+  } else if (Kind == tgtok::Define) {
+    StringRef MacroName = prepLexMacroName();
+    if (MacroName.empty())
+      return ReturnError(TokStart, "Expected macro name after #define");
+
+    if (!DefinedMacros.insert(MacroName).second)
+      PrintWarning(getLoc(),
+                   "Duplicate definition of macro: " + Twine(MacroName));
+
+    if (!prepSkipDirectiveEnd())
+      return ReturnError(CurPtr,
+                         "Only comments are supported after #define NAME");
+
+    if (!ReturnNextLiveToken) {
+      PrintFatalError("#define must be ignored during the lines skipping");
+      return tgtok::Error;
+    }
+
+    return LexToken();
+  }
+
+  PrintFatalError("Preprocessing directive is not supported");
+  return tgtok::Error;
+}
+
+bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
+  if (!MustNeverBeFalse)
+    PrintFatalError("Invalid recursion.");
+
+  do {
+    // Skip all symbols to the line end.
+    prepSkipToLineEnd();
+
+    // Find the first non-whitespace symbol in the next line(s).
+    if (!prepSkipLineBegin())
+      return false;
+
+    // If the first non-blank/comment symbol on the line is '#',
+    // it may be a start of preprocessing directive.
+    //
+    // If it is not '#' just go to the next line.
+    if (*CurPtr == '#')
+      ++CurPtr;
+    else
+      continue;
+
+    tgtok::TokKind Kind = prepIsDirective();
+
+    // If we did not find a preprocessing directive or it is #define,
+    // then just skip to the next line.  We do not have to do anything
+    // for #define in the line-skipping mode.
+    if (Kind == tgtok::Error || Kind == tgtok::Define)
+      continue;
+
+    tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
+
+    // If lexPreprocessor() encountered an error during lexing this
+    // preprocessor idiom, then return false to the calling lexPreprocessor().
+    // This will force tgtok::Error to be returned to the tokens processing.
+    if (ProcessedKind == tgtok::Error)
+      return false;
+
+    if (Kind != ProcessedKind)
+      PrintFatalError("prepIsDirective() and lexPreprocessor() "
+                      "returned different token kinds");
+
+    // If this preprocessing directive enables tokens processing,
+    // then return to the lexPreprocessor() and get to the next token.
+    // We can move from line-skipping mode to processing tokens only
+    // due to #else or #endif.
+    if (prepIsProcessingEnabled()) {
+      if (Kind != tgtok::Else && Kind != tgtok::Endif) {
+        PrintFatalError("Tokens processing was enabled by an unexpected "
+                        "preprocessing directive");
+        return false;
+      }
+
+      return true;
+    }
+  } while (CurPtr != CurBuf.end());
+
+  // We have reached the end of the file, but never left the lines-skipping
+  // mode.  This means there is no matching #endif.
+  prepReportPreprocessorStackError();
+  return false;
+}
+
+StringRef TGLexer::prepLexMacroName() {
+  // Skip whitespaces between the preprocessing directive and the macro name.
+  while (*CurPtr == ' ' || *CurPtr == '\t')
+    ++CurPtr;
+
+  TokStart = CurPtr;
+  // Macro names start with [a-zA-Z_].
+  if (*CurPtr != '_' && !isalpha(*CurPtr))
+    return "";
+
+  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+    ++CurPtr;
+
+  return StringRef(TokStart, CurPtr - TokStart);
+}
+
+bool TGLexer::prepSkipLineBegin() {
+  while (CurPtr != CurBuf.end()) {
+    switch (*CurPtr) {
+    case ' ':
+    case '\t':
+    case '\n':
+    case '\r':
+      break;
+
+    case '/': {
+      int NextChar = peekNextChar(1);
+      if (NextChar == '*') {
+        // Skip C-style comment.
+        // Note that we do not care about skipping the C++-style comments.
+        // If the line contains "//", it may not contain any processable
+        // preprocessing directive.  Just return CurPtr pointing to
+        // the first '/' in this case.  We also do not care about
+        // incorrect symbols after the first '/' - we are in lines-skipping
+        // mode, so incorrect code is allowed to some extent.
+
+        // Set TokStart to the beginning of the comment to enable proper
+        // diagnostic printing in case of error in SkipCComment().
+        TokStart = CurPtr;
+
+        // CurPtr must point to '*' before call to SkipCComment().
+        ++CurPtr;
+        if (SkipCComment())
+          return false;
+      } else {
+        // CurPtr points to the non-whitespace '/'.
+        return true;
+      }
+
+      // We must not increment CurPtr after the comment was lexed.
+      continue;
+    }
+
+    default:
+      return true;
+    }
+
+    ++CurPtr;
+  }
+
+  // We have reached the end of the file.  Return to the lines skipping
+  // code, and allow it to handle the EOF as needed.
+  return true;
+}
+
+bool TGLexer::prepSkipDirectiveEnd() {
+  while (CurPtr != CurBuf.end()) {
+    switch (*CurPtr) {
+    case ' ':
+    case '\t':
+      break;
+
+    case '\n':
+    case '\r':
+      return true;
+
+    case '/': {
+      int NextChar = peekNextChar(1);
+      if (NextChar == '/') {
+        // Skip C++-style comment.
+        // We may just return true now, but let's skip to the line/buffer end
+        // to simplify the method specification.
+        ++CurPtr;
+        SkipBCPLComment();
+      } else if (NextChar == '*') {
+        // When we are skipping C-style comment at the end of a preprocessing
+        // directive, we can skip several lines.  If any meaningful TD token
+        // follows the end of the C-style comment on the same line, it will
+        // be considered as an invalid usage of TD token.
+        // For example, we want to forbid usages like this one:
+        //     #define MACRO class Class {}
+        // But with C-style comments we also disallow the following:
+        //     #define MACRO /* This macro is used
+        //                      to ... */ class Class {}
+        // One can argue that this should be allowed, but it does not seem
+        // to be worth of the complication.  Moreover, this matches
+        // the C preprocessor behavior.
+
+        // Set TokStart to the beginning of the comment to enable proper
+        // diagnostic printer in case of error in SkipCComment().
+        TokStart = CurPtr;
+        ++CurPtr;
+        if (SkipCComment())
+          return false;
+      } else {
+        TokStart = CurPtr;
+        PrintError(CurPtr, "Unexpected character");
+        return false;
+      }
+
+      // We must not increment CurPtr after the comment was lexed.
+      continue;
+    }
+
+    default:
+      // Do not allow any non-whitespaces after the directive.
+      TokStart = CurPtr;
+      return false;
+    }
+
+    ++CurPtr;
+  }
+
+  return true;
+}
+
+void TGLexer::prepSkipToLineEnd() {
+  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end())
+    ++CurPtr;
+}
+
+bool TGLexer::prepIsProcessingEnabled() {
+  for (auto I = PrepIncludeStack.back()->rbegin(),
+            E = PrepIncludeStack.back()->rend();
+       I != E; ++I) {
+    if (!I->IsDefined)
+      return false;
+  }
+
+  return true;
+}
+
+void TGLexer::prepReportPreprocessorStackError() {
+  if (PrepIncludeStack.back()->empty())
+    PrintFatalError("prepReportPreprocessorStackError() called with "
+                    "empty control stack");
+
+  auto &PrepControl = PrepIncludeStack.back()->back();
+  PrintError(CurBuf.end(), "Reached EOF without matching #endif");
+  PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
+
+  TokStart = CurPtr;
+}
diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h
index 2c80743e3a68..e9980b36b97b 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm/lib/TableGen/TGLexer.h
@@ -14,11 +14,14 @@
 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
 #define LLVM_LIB_TABLEGEN_TGLEXER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/SMLoc.h"
 #include <cassert>
 #include <map>
+#include <memory>
 #include <string>
 
 namespace llvm {
@@ -59,7 +62,11 @@ namespace tgtok {
     BinaryIntVal,
 
     // String valued tokens.
-    Id, StrVal, VarName, CodeFragment
+    Id, StrVal, VarName, CodeFragment,
+
+    // Preprocessing tokens for internal usage by the lexer.
+    // They are never returned as a result of Lex().
+    Ifdef, Else, Endif, Define
   };
 }
 
@@ -87,10 +94,10 @@ private:
   DependenciesMapTy Dependencies;
 
 public:
-  TGLexer(SourceMgr &SrcMgr);
+  TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
 
   tgtok::TokKind Lex() {
-    return CurCode = LexToken();
+    return CurCode = LexToken(CurPtr == CurBuf.begin());
   }
 
   const DependenciesMapTy &getDependencies() const {
@@ -119,12 +126,13 @@ public:
 
 private:
   /// LexToken - Read the next token and return its code.
-  tgtok::TokKind LexToken();
+  tgtok::TokKind LexToken(bool FileOrLineStart = false);
 
+  tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
   tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
 
   int getNextChar();
-  int peekNextChar(int Index);
+  int peekNextChar(int Index) const;
   void SkipBCPLComment();
   bool SkipCComment();
   tgtok::TokKind LexIdentifier();
@@ -134,6 +142,231 @@ private:
   tgtok::TokKind LexNumber();
   tgtok::TokKind LexBracket();
   tgtok::TokKind LexExclaim();
+
+  // Process EOF encountered in LexToken().
+  // If EOF is met in an include file, then the method will update
+  // CurPtr, CurBuf and preprocessing include stack, and return true.
+  // If EOF is met in the top-level file, then the method will
+  // update and check the preprocessing include stack, and return false.
+  bool processEOF();
+
+  // *** Structures and methods for preprocessing support ***
+
+  // A set of macro names that are defined either via command line or
+  // by using:
+  //     #define NAME
+  StringSet<> DefinedMacros;
+
+  // Each of #ifdef and #else directives has a descriptor associated
+  // with it.
+  //
+  // An ordered list of preprocessing controls defined by #ifdef/#else
+  // directives that are in effect currently is called preprocessing
+  // control stack.  It is represented as a vector of PreprocessorControlDesc's.
+  //
+  // The control stack is updated according to the following rules:
+  //
+  // For each #ifdef we add an element to the control stack.
+  // For each #else we replace the top element with a descriptor
+  // with an inverted IsDefined value.
+  // For each #endif we pop the top element from the control stack.
+  //
+  // When CurPtr reaches the current buffer's end, the control stack
+  // must be empty, i.e. #ifdef and the corresponding #endif
+  // must be located in the same file.
+  struct PreprocessorControlDesc {
+    // Either tgtok::Ifdef or tgtok::Else.
+    tgtok::TokKind Kind;
+
+    // True, if the condition for this directive is true, false - otherwise.
+    // Examples:
+    //     #ifdef NAME       : true, if NAME is defined, false - otherwise.
+    //     ...
+    //     #else             : false, if NAME is defined, true - otherwise.
+    bool IsDefined;
+
+    // Pointer into CurBuf to the beginning of the preprocessing directive
+    // word, e.g.:
+    //     #ifdef NAME
+    //      ^ - SrcPos
+    SMLoc SrcPos;
+  };
+
+  // We want to disallow code like this:
+  //     file1.td:
+  //         #define NAME
+  //         #ifdef NAME
+  //         include "file2.td"
+  //     EOF
+  //     file2.td:
+  //         #endif
+  //     EOF
+  //
+  // To do this, we clear the preprocessing control stack on entry
+  // to each of the included file.  PrepIncludeStack is used to store
+  // preprocessing control stacks for the current file and all its
+  // parent files.  The back() element is the preprocessing control
+  // stack for the current file.
+  std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
+      PrepIncludeStack;
+
+  // Validate that the current preprocessing control stack is empty,
+  // since we are about to exit a file, and pop the include stack.
+  //
+  // If IncludeStackMustBeEmpty is true, the include stack must be empty
+  // after the popping, otherwise, the include stack must not be empty
+  // after the popping.  Basically, the include stack must be empty
+  // only if we exit the "top-level" file (i.e. finish lexing).
+  //
+  // The method returns false, if the current preprocessing control stack
+  // is not empty (e.g. there is an unterminated #ifdef/#else),
+  // true - otherwise.
+  bool prepExitInclude(bool IncludeStackMustBeEmpty);
+
+  // Look ahead for a preprocessing directive starting from CurPtr.  The caller
+  // must only call this method, if *(CurPtr - 1) is '#'.  If the method matches
+  // a preprocessing directive word followed by a whitespace, then it returns
+  // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
+  //
+  // CurPtr is not adjusted by this method.
+  tgtok::TokKind prepIsDirective() const;
+
+  // Given a preprocessing token kind, adjusts CurPtr to the end
+  // of the preprocessing directive word.  Returns true, unless
+  // an unsupported token kind is passed in.
+  //
+  // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
+  // to avoid adjusting CurPtr before we are sure that '#' is followed
+  // by a preprocessing directive.  If it is not, then we fall back to
+  // tgtok::paste interpretation of '#'.
+  bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
+
+  // The main "exit" point from the token parsing to preprocessor.
+  //
+  // The method is called for CurPtr, when prepIsDirective() returns
+  // true.  The first parameter matches the result of prepIsDirective(),
+  // denoting the actual preprocessor directive to be processed.
+  //
+  // If the preprocessing directive disables the tokens processing, e.g.:
+  //     #ifdef NAME // NAME is undefined
+  // then lexPreprocessor() enters the lines-skipping mode.
+  // In this mode, it does not parse any tokens, because the code under
+  // the #ifdef may not even be a correct tablegen code.  The preprocessor
+  // looks for lines containing other preprocessing directives, which
+  // may be prepended with whitespaces and C-style comments.  If the line
+  // does not contain a preprocessing directive, it is skipped completely.
+  // Otherwise, the preprocessing directive is processed by recursively
+  // calling lexPreprocessor().  The processing of the encountered
+  // preprocessing directives includes updating preprocessing control stack
+  // and adding new macros into DefinedMacros set.
+  //
+  // The second parameter controls whether lexPreprocessor() is called from
+  // LexToken() (true) or recursively from lexPreprocessor() (false).
+  //
+  // If ReturnNextLiveToken is true, the method returns the next
+  // LEX token following the current directive or following the end
+  // of the disabled preprocessing region corresponding to this directive.
+  // If ReturnNextLiveToken is false, the method returns the first parameter,
+  // unless there were errors encountered in the disabled preprocessing
+  // region - in this case, it returns tgtok::Error.
+  tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
+                                 bool ReturnNextLiveToken = true);
+
+  // Worker method for lexPreprocessor() to skip lines after some
+  // preprocessing directive up to the buffer end or to the directive
+  // that re-enables token processing.  The method returns true
+  // upon processing the next directive that re-enables tokens
+  // processing.  False is returned if an error was encountered.
+  //
+  // Note that prepSkipRegion() calls lexPreprocessor() to process
+  // encountered preprocessing directives.  In this case, the second
+  // parameter to lexPreprocessor() is set to false.  Being passed
+  // false ReturnNextLiveToken, lexPreprocessor() must never call
+  // prepSkipRegion().  We assert this by passing ReturnNextLiveToken
+  // to prepSkipRegion() and checking that it is never set to false.
+  bool prepSkipRegion(bool MustNeverBeFalse);
+
+  // Lex name of the macro after either #ifdef or #define.  We could have used
+  // LexIdentifier(), but it has special handling of "include" word, which
+  // could result in awkward diagnostic errors.  Consider:
+  // ----
+  // #ifdef include
+  // class ...
+  // ----
+  // LexIdentifier() will engage LexInclude(), which will complain about
+  // missing file with name "class".  Instead, prepLexMacroName() will treat
+  // "include" as a normal macro name.
+  //
+  // On entry, CurPtr points to the end of a preprocessing directive word.
+  // The method allows for whitespaces between the preprocessing directive
+  // and the macro name.  The allowed whitespaces are ' ' and '\t'.
+  //
+  // If the first non-whitespace symbol after the preprocessing directive
+  // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
+  // the method updates TokStart to the position of the first non-whitespace
+  // symbol, sets CurPtr to the position of the macro name's last symbol,
+  // and returns a string reference to the macro name.  Otherwise,
+  // TokStart is set to the first non-whitespace symbol after the preprocessing
+  // directive, and the method returns an empty string reference.
+  //
+  // In all cases, TokStart may be used to point to the word following
+  // the preprocessing directive.
+  StringRef prepLexMacroName();
+
+  // Skip any whitespaces starting from CurPtr.  The method is used
+  // only in the lines-skipping mode to find the first non-whitespace
+  // symbol after or at CurPtr.  Allowed whitespaces are ' ', '\t', '\n'
+  // and '\r'.  The method skips C-style comments as well, because
+  // it is used to find the beginning of the preprocessing directive.
+  // If we do not handle C-style comments the following code would
+  // result in incorrect detection of a preprocessing directive:
+  //     /*
+  //     #ifdef NAME
+  //     */
+  // As long as we skip C-style comments, the following code is correctly
+  // recognized as a preprocessing directive:
+  //     /* first line comment
+  //        second line comment */ #ifdef NAME
+  //
+  // The method returns true upon reaching the first non-whitespace symbol
+  // or EOF, CurPtr is set to point to this symbol.  The method returns false,
+  // if an error occured during skipping of a C-style comment.
+  bool prepSkipLineBegin();
+
+  // Skip any whitespaces or comments after a preprocessing directive.
+  // The method returns true upon reaching either end of the line
+  // or end of the file.  If there is a multiline C-style comment
+  // after the preprocessing directive, the method skips
+  // the comment, so the final CurPtr may point to one of the next lines.
+  // The method returns false, if an error occured during skipping
+  // C- or C++-style comment, or a non-whitespace symbol appears
+  // after the preprocessing directive.
+  //
+  // The method maybe called both during lines-skipping and tokens
+  // processing.  It actually verifies that only whitespaces or/and
+  // comments follow a preprocessing directive.
+  //
+  // After the execution of this mehod, CurPtr points either to new line
+  // symbol, buffer end or non-whitespace symbol following the preprocesing
+  // directive.
+  bool prepSkipDirectiveEnd();
+
+  // Skip all symbols to the end of the line/file.
+  // The method adjusts CurPtr, so that it points to either new line
+  // symbol in the current line or the buffer end.
+  void prepSkipToLineEnd();
+
+  // Return true, if the current preprocessor control stack is such that
+  // we should allow lexer to process the next token, false - otherwise.
+  //
+  // In particular, the method returns true, if all the #ifdef/#else
+  // controls on the stack have their IsDefined member set to true.
+  bool prepIsProcessingEnabled();
+
+  // Report an error, if we reach EOF with non-empty preprocessing control
+  // stack.  This means there is no matching #endif for the previous
+  // #ifdef/#else.
+  void prepReportPreprocessorStackError();
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
index 0a28b3a03aa1..e3849043513b 100644
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -115,8 +115,9 @@ class TGParser {
   };
 
 public:
-  TGParser(SourceMgr &SrcMgr, RecordKeeper &records)
-      : Lex(SrcMgr), CurMultiClass(nullptr), Records(records) {}
+  TGParser(SourceMgr &SrcMgr, ArrayRef<std::string> Macros,
+           RecordKeeper &records)
+    : Lex(SrcMgr, Macros), CurMultiClass(nullptr), Records(records) {}
 
   /// ParseFile - Main entrypoint for parsing a tblgen file.  These parser
   /// routines return true on error, or false on success.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index edda13ce97ef..c36d9354f3ba 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -32,12 +32,14 @@ class MachineFunctionPass;
 FunctionPass *createAArch64DeadRegisterDefinitions();
 FunctionPass *createAArch64RedundantCopyEliminationPass();
 FunctionPass *createAArch64CondBrTuning();
+FunctionPass *createAArch64CompressJumpTablesPass();
 FunctionPass *createAArch64ConditionalCompares();
 FunctionPass *createAArch64AdvSIMDScalar();
 FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
                                  CodeGenOpt::Level OptLevel);
 FunctionPass *createAArch64StorePairSuppressPass();
 FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64SpeculationHardeningPass();
 FunctionPass *createAArch64LoadStoreOptimizationPass();
 FunctionPass *createAArch64SIMDInstrOptPass();
 ModulePass *createAArch64PromoteConstantPass();
@@ -46,6 +48,7 @@ FunctionPass *createAArch64A57FPLoadBalancing();
 FunctionPass *createAArch64A53Fix835769();
 FunctionPass *createFalkorHWPFFixPass();
 FunctionPass *createFalkorMarkStridedAccessesPass();
+FunctionPass *createAArch64BranchTargetsPass();
 
 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
 
@@ -53,18 +56,23 @@ FunctionPass *createAArch64CollectLOHPass();
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
                                  AArch64Subtarget &, AArch64RegisterBankInfo &);
+FunctionPass *createAArch64PreLegalizeCombiner();
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
 void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
 void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
+void initializeAArch64BranchTargetsPass(PassRegistry&);
 void initializeAArch64CollectLOHPass(PassRegistry&);
 void initializeAArch64CondBrTuningPass(PassRegistry &);
+void initializeAArch64CompressJumpTablesPass(PassRegistry&);
 void initializeAArch64ConditionalComparesPass(PassRegistry&);
 void initializeAArch64ConditionOptimizerPass(PassRegistry&);
 void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
 void initializeAArch64ExpandPseudoPass(PassRegistry&);
+void initializeAArch64SpeculationHardeningPass(PassRegistry&);
 void initializeAArch64LoadStoreOptPass(PassRegistry&);
 void initializeAArch64SIMDInstrOptPass(PassRegistry&);
+void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
 void initializeAArch64PromoteConstantPass(PassRegistry&);
 void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
 void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index a69d38144c78..8f79140cba64 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -65,25 +65,56 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
 def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
   "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
 
+def FeaturePAN : SubtargetFeature<
+    "pan", "HasPAN", "true",
+    "Enables ARM v8.1 Privileged Access-Never extension">;
+
+def FeatureLOR : SubtargetFeature<
+    "lor", "HasLOR", "true",
+    "Enables ARM v8.1 Limited Ordering Regions extension">;
+
+def FeatureVH : SubtargetFeature<
+    "vh", "HasVH", "true",
+    "Enables ARM v8.1 Virtual Host extension">;
+
 def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
   "Enable ARMv8 PMUv3 Performance Monitors extension">;
 
 def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
   "Full FP16", [FeatureFPARMv8]>;
 
+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+  "Enable FP16 FML instructions", [FeatureFullFP16]>;
+
 def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
   "Enable Statistical Profiling extension">;
 
+def FeaturePAN_RWV : SubtargetFeature<
+    "pan-rwv", "HasPAN_RWV", "true",
+    "Enable v8.2 PAN s1e1R and s1e1W Variants",
+    [FeaturePAN]>;
+
+// UAO PState
+def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true",
+    "Enable v8.2 UAO PState">;
+
+def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
+    "true", "Enable v8.2 data Cache Clean to Point of Persistence" >;
+
 def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
   "Enable Scalable Vector Extension (SVE) instructions">;
 
-/// Cyclone has register move instructions which are "free".
 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
                                         "Has zero-cycle register moves">;
+def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
+                                        "Has zero-cycle zeroing instructions for generic registers">;
+
+def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true",
+                                        "Has zero-cycle zeroing instructions for FP registers">;
 
-/// Cyclone has instructions which zero registers for "free".
 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
-                                        "Has zero-cycle zeroing instructions">;
+                                        "Has zero-cycle zeroing instructions",
+                                        [FeatureZCZeroingGP, FeatureZCZeroingFP]>;
 
 /// ... but the floating-point version doesn't quite work in rare cases on older
 /// CPUs.
@@ -96,13 +127,14 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align",
                                           "Disallow all unaligned memory "
                                           "access">;
 
-def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
-                                         "Reserve X18, making it unavailable "
-                                         "as a GPR">;
+foreach i = {1-7,18,20} in
+    def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
+                                             "Reserve X"#i#", making it unavailable "
+                                             "as a GPR">;
 
-def FeatureReserveX20 : SubtargetFeature<"reserve-x20", "ReserveX20", "true",
-                                         "Reserve X20, making it unavailable "
-                                         "as a GPR">;
+foreach i = {8-15,18} in
+    def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
+         "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
 
 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
                                     "Use alias analysis during codegen">;
@@ -117,11 +149,11 @@ def FeaturePredictableSelectIsExpensive : SubtargetFeature<
 
 def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
     "CustomAsCheapAsMove", "true",
-    "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+    "Use custom handling of cheap instructions">;
 
 def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
     "ExynosAsCheapAsMove", "true",
-    "Use Exynos specific code in TargetInstrInfo::isAsCheapAsAMove()",
+    "Use Exynos specific handling of cheap instructions",
     [FeatureCustomCheapAsMoveHandling]>;
 
 def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
@@ -156,10 +188,18 @@ def FeatureFuseAES : SubtargetFeature<
     "fuse-aes", "HasFuseAES", "true",
     "CPU fuses AES crypto operations">;
 
+def FeatureFuseArithmeticLogic : SubtargetFeature<
+    "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
+    "CPU fuses arithmetic and logic operations">;
+
 def FeatureFuseCCSelect : SubtargetFeature<
     "fuse-csel", "HasFuseCCSelect", "true",
     "CPU fuses conditional select operations">;
 
+def FeatureFuseCryptoEOR : SubtargetFeature<
+    "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
+    "CPU fuses AES/PMULL and EOR operations">;
+
 def FeatureFuseLiterals : SubtargetFeature<
     "fuse-literals", "HasFuseLiterals", "true",
     "CPU fuses literal generation operations">;
@@ -168,6 +208,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
 
+def FeatureForce32BitJumpTables
+   : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+                      "Force jump table entries to be 32-bits wide except at MinSize">;
+
 def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
                                    "Enable support for RCPC extension">;
 
@@ -179,6 +223,66 @@ def FeatureDotProd : SubtargetFeature<
     "dotprod", "HasDotProd", "true",
     "Enable dot product support">;
 
+def FeaturePA : SubtargetFeature<
+    "pa", "HasPA", "true",
+    "Enable v8.3-A Pointer Authentication enchancement">;
+
+def FeatureJS : SubtargetFeature<
+    "jsconv", "HasJS", "true",
+    "Enable v8.3-A JavaScript FP conversion enchancement",
+    [FeatureFPARMv8]>;
+
+def FeatureCCIDX : SubtargetFeature<
+    "ccidx", "HasCCIDX", "true",
+    "Enable v8.3-A Extend of the CCSIDR number of sets">;
+
+def FeatureComplxNum : SubtargetFeature<
+    "complxnum", "HasComplxNum", "true",
+    "Enable v8.3-A Floating-point complex number support",
+    [FeatureNEON]>;
+
+def FeatureNV : SubtargetFeature<
+    "nv", "HasNV", "true",
+    "Enable v8.4-A Nested Virtualization Enchancement">;
+
+def FeatureRASv8_4 : SubtargetFeature<
+    "rasv8_4", "HasRASv8_4", "true",
+    "Enable v8.4-A Reliability, Availability and Serviceability extension",
+    [FeatureRAS]>;
+
+def FeatureMPAM : SubtargetFeature<
+    "mpam", "HasMPAM", "true",
+    "Enable v8.4-A Memory system Partitioning and Monitoring extension">;
+
+def FeatureDIT : SubtargetFeature<
+    "dit", "HasDIT", "true",
+    "Enable v8.4-A Data Independent Timing instructions">;
+
+def FeatureTRACEV8_4 : SubtargetFeature<
+    "tracev8.4", "HasTRACEV8_4", "true",
+    "Enable v8.4-A Trace extension">;
+
+def FeatureAM : SubtargetFeature<
+    "am", "HasAM", "true",
+    "Enable v8.4-A Activity Monitors extension">;
+
+def FeatureSEL2 : SubtargetFeature<
+    "sel2", "HasSEL2", "true",
+    "Enable v8.4-A Secure Exception Level 2 extension">;
+
+def FeatureTLB_RMI : SubtargetFeature<
+    "tlb-rmi", "HasTLB_RMI", "true",
+    "Enable v8.4-A TLB Range and Maintenance Instructions">;
+
+def FeatureFMI : SubtargetFeature<
+    "fmi", "HasFMI", "true",
+    "Enable v8.4-A Flag Manipulation Instructions">;
+
+// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
+def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
+    "Enable v8.4-A RCPC instructions with Immediate Offsets",
+    [FeatureRCPC]>;
+
 def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
                                         "NegativeImmediates", "false",
                                         "Convert immediates and instructions "
@@ -196,21 +300,65 @@ def FeatureAggressiveFMA :
                    "true",
                    "Enable Aggressive FMA for floating-point.">;
 
+def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true",
+  "Enable alternative NZCV format for floating point comparisons">;
+
+def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true",
+  "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
+  "an integer (in FP format) forcing it to fit into a 32- or 64-bit int" >;
+
+def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict",
+  "true", "Enable architectural speculation restriction" >;
+
+def FeatureSB : SubtargetFeature<"sb", "HasSB",
+  "true", "Enable v8.5 Speculation Barrier" >;
+
+def FeatureSSBS : SubtargetFeature<"ssbs", "HasSSBS",
+  "true", "Enable Speculative Store Bypass Safe bit" >;
+
+def FeaturePredRes : SubtargetFeature<"predres", "HasPredRes", "true",
+  "Enable v8.5a execution and data prediction invalidation instructions" >;
+
+def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP",
+    "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >;
+
+def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI",
+    "true", "Enable Branch Target Identification" >;
+
+def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
+    "true", "Enable Random Number generation instructions" >;
+
+def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
+    "true", "Enable Memory Tagging Extension" >;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
 
 def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
-  "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
+  "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM,
+  FeaturePAN, FeatureLOR, FeatureVH]>;
 
 def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
-  "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
+  "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, 
+  FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
 
 def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
-  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>;
+  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePA,
+  FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
 
 def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
-  "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>;
+  "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
+  FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT,
+  FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+  FeatureFMI, FeatureRCPC_IMMO]>;
+
+def HasV8_5aOps : SubtargetFeature<
+  "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
+  [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
+   FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
+   FeatureBranchTargetId]
+>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -226,6 +374,8 @@ include "AArch64CallingConvention.td"
 
 include "AArch64Schedule.td"
 include "AArch64InstrInfo.td"
+include "AArch64SchedPredicates.td"
+include "AArch64SchedPredExynos.td"
 
 def AArch64InstrInfo : InstrInfo;
 
@@ -245,6 +395,7 @@ include "AArch64SchedFalkor.td"
 include "AArch64SchedKryo.td"
 include "AArch64SchedExynosM1.td"
 include "AArch64SchedExynosM3.td"
+include "AArch64SchedExynosM4.td"
 include "AArch64SchedThunderX.td"
 include "AArch64SchedThunderX2T99.td"
 
@@ -343,6 +494,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                    FeatureDisableLatencySchedHeuristic,
                                    FeatureFPARMv8,
                                    FeatureFuseAES,
+                                   FeatureFuseCryptoEOR,
                                    FeatureNEON,
                                    FeaturePerfMon,
                                    FeatureZCRegMove,
@@ -356,14 +508,13 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
                                      FeatureCRC,
                                      FeatureCrypto,
                                      FeatureExynosCheapAsMoveHandling,
-                                     FeatureFPARMv8,
+                                     FeatureForce32BitJumpTables,
                                      FeatureFuseAES,
-                                     FeatureNEON,
                                      FeaturePerfMon,
                                      FeaturePostRAScheduler,
                                      FeatureSlowMisaligned128Store,
                                      FeatureUseRSqrt,
-                                     FeatureZCZeroing]>;
+                                     FeatureZCZeroingFP]>;
 
 def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
                                     "Samsung Exynos-M2 processors",
@@ -371,29 +522,47 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
                                      FeatureCRC,
                                      FeatureCrypto,
                                      FeatureExynosCheapAsMoveHandling,
-                                     FeatureFPARMv8,
+                                     FeatureForce32BitJumpTables,
                                      FeatureFuseAES,
-                                     FeatureNEON,
                                      FeaturePerfMon,
                                      FeaturePostRAScheduler,
                                      FeatureSlowMisaligned128Store,
-                                     FeatureZCZeroing]>;
+                                     FeatureZCZeroingFP]>;
 
 def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
                                     "Samsung Exynos-M3 processors",
                                     [FeatureCRC,
                                      FeatureCrypto,
                                      FeatureExynosCheapAsMoveHandling,
-                                     FeatureFPARMv8,
+                                     FeatureForce32BitJumpTables,
                                      FeatureFuseAddress,
                                      FeatureFuseAES,
                                      FeatureFuseCCSelect,
                                      FeatureFuseLiterals,
                                      FeatureLSLFast,
-                                     FeatureNEON,
                                      FeaturePerfMon,
                                      FeaturePostRAScheduler,
                                      FeaturePredictableSelectIsExpensive,
+                                     FeatureZCZeroingFP]>;
+
+def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
+                                    "Samsung Exynos-M4 processors",
+                                    [HasV8_2aOps,
+                                     FeatureArithmeticBccFusion,
+                                     FeatureArithmeticCbzFusion,
+                                     FeatureCrypto,
+                                     FeatureDotProd,
+                                     FeatureExynosCheapAsMoveHandling,
+                                     FeatureForce32BitJumpTables,
+                                     FeatureFP16FML,
+                                     FeatureFuseAddress,
+                                     FeatureFuseAES,
+                                     FeatureFuseArithmeticLogic,
+                                     FeatureFuseCCSelect,
+                                     FeatureFuseLiterals,
+                                     FeatureLSLFast,
+                                     FeaturePerfMon,
+                                     FeaturePostRAScheduler,
                                      FeatureZCZeroing]>;
 
 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
@@ -438,7 +607,7 @@ def ProcSaphira  : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
                                    FeaturePredictableSelectIsExpensive,
                                    FeatureZCZeroing,
                                    FeatureLSLFast,
-                                   HasV8_3aOps]>;
+                                   HasV8_4aOps]>;
 
 def ProcThunderX2T99  : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
                                          "ThunderX2T99",
@@ -497,6 +666,21 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
                                        FeaturePredictableSelectIsExpensive,
                                        FeatureNEON]>;
 
+def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+                                  "HiSilicon TS-V110 processors", [
+                                  HasV8_2aOps,
+                                  FeatureCrypto,
+                                  FeatureCustomCheapAsMoveHandling,
+                                  FeatureFPARMv8,
+                                  FeatureFuseAES,
+                                  FeatureNEON,
+                                  FeaturePerfMon,
+                                  FeaturePostRAScheduler,
+                                  FeatureSPE,
+                                  FeatureFullFP16,
+                                  FeatureFP16FML,
+                                  FeatureDotProd]>;
+
 def : ProcessorModel<"generic", NoSchedModel, [
                      FeatureFPARMv8,
                      FeatureFuseAES,
@@ -518,7 +702,7 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
 def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
 def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
 def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
-def : ProcessorModel<"exynos-m4", ExynosM3Model, [ProcExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
 def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
 def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
 def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
@@ -529,6 +713,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel,  [ProcThunderXT81]>;
 def : ProcessorModel<"thunderxt83", ThunderXT8XModel,  [ProcThunderXT83]>;
 // Cavium ThunderX2T9X  Processors. Formerly Broadcom Vulcan.
 def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
+def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
 
 //===----------------------------------------------------------------------===//
 // Assembly parser
@@ -577,3 +763,9 @@ def AArch64 : Target {
   let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
   let AllowRegisterRenaming = 1;
 }
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "AArch64PfmCounters.td"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index a95476b91187..452fbd3488b0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -377,11 +377,10 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
 
   // Now we have a set of sets, order them by start address so
   // we can iterate over them sequentially.
-  llvm::sort(V.begin(), V.end(),
-             [](const std::vector<Chain*> &A,
-                const std::vector<Chain*> &B) {
-      return A.front()->startsBefore(B.front());
-    });
+  llvm::sort(V,
+             [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
+               return A.front()->startsBefore(B.front());
+             });
 
   // As we only have two colors, we can track the global (BB-level) balance of
   // odds versus evens. We aim to keep this near zero to keep both execution
@@ -453,16 +452,16 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
   // change them to!
   // Final tie-break with instruction order so pass output is stable (i.e. not
   // dependent on malloc'd pointer values).
-  llvm::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
-      if (G1->size() != G2->size())
-        return G1->size() > G2->size();
-      if (G1->requiresFixup() != G2->requiresFixup())
-        return G1->requiresFixup() > G2->requiresFixup();
-      // Make sure startsBefore() produces a stable final order.
-      assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
-             "Starts before not total order!");
-      return G1->startsBefore(G2);
-    });
+  llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
+    if (G1->size() != G2->size())
+      return G1->size() > G2->size();
+    if (G1->requiresFixup() != G2->requiresFixup())
+      return G1->requiresFixup() > G2->requiresFixup();
+    // Make sure startsBefore() produces a stable final order.
+    assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
+           "Starts before not total order!");
+    return G1->startsBefore(G2);
+  });
 
   Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
   while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 52819dedc23d..0442076992e2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -21,16 +21,20 @@
 #include "InstPrinter/AArch64InstPrinter.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64TargetStreamer.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +81,12 @@ public:
     return MCInstLowering.lowerOperand(MO, MCOp);
   }
 
+  void EmitJumpTableInfo() override;
+  void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                          const MachineBasicBlock *MBB, unsigned JTI);
+
+  void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
+
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                      const MachineInstr &MI);
   void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -100,12 +110,33 @@ public:
     AU.setPreservesAll();
   }
 
-  bool runOnMachineFunction(MachineFunction &F) override {
-    AArch64FI = F.getInfo<AArch64FunctionInfo>();
-    STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
-    bool Result = AsmPrinter::runOnMachineFunction(F);
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    AArch64FI = MF.getInfo<AArch64FunctionInfo>();
+    STI = static_cast<const AArch64Subtarget*>(&MF.getSubtarget());
+
+    SetupMachineFunction(MF);
+
+    if (STI->isTargetCOFF()) {
+      bool Internal = MF.getFunction().hasInternalLinkage();
+      COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
+                                              : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+      int Type =
+        COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
+
+      OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
+      OutStreamer->EmitCOFFSymbolStorageClass(Scl);
+      OutStreamer->EmitCOFFSymbolType(Type);
+      OutStreamer->EndCOFFSymbolDef();
+    }
+
+    // Emit the rest of the function body.
+    EmitFunctionBody();
+
+    // Emit the XRay table for this function.
     emitXRayTable();
-    return Result;
+
+    // We didn't modify anything.
+    return false;
   }
 
 private:
@@ -208,7 +239,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
     // linker can safely perform dead code stripping.  Since LLVM never
     // generates code that does this, it is always safe to set.
     OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-    SM.serializeToStackMapSection();
+    emitStackMaps(SM);
   }
 }
 
@@ -433,6 +464,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   printOperand(MI, NOps - 2, OS);
 }
 
+void AArch64AsmPrinter::EmitJumpTableInfo() {
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  if (!MJTI) return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+  OutStreamer->SwitchSection(ReadOnlySec);
+
+  auto AFI = MF->getInfo<AArch64FunctionInfo>();
+  for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+    // If this jump table was deleted, ignore it.
+    if (JTBBs.empty()) continue;
+
+    unsigned Size = AFI->getJumpTableEntrySize(JTI);
+    EmitAlignment(Log2_32(Size));
+    OutStreamer->EmitLabel(GetJTISymbol(JTI));
+
+    for (auto *JTBB : JTBBs)
+      emitJumpTableEntry(MJTI, JTBB, JTI);
+  }
+}
+
+void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                           const MachineBasicBlock *MBB,
+                                           unsigned JTI) {
+  const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+  auto AFI = MF->getInfo<AArch64FunctionInfo>();
+  unsigned Size = AFI->getJumpTableEntrySize(JTI);
+
+  if (Size == 4) {
+    // .word LBB - LJTI
+    const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+    const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
+    Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+  } else {
+    // .byte (LBB - LBB) >> 2 (or .hword)
+    const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
+    const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
+    Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+    Value = MCBinaryExpr::createLShr(
+        Value, MCConstantExpr::create(2, OutContext), OutContext);
+  }
+
+  OutStreamer->EmitValue(Value, Size);
+}
+
+/// Small jump tables contain an unsigned byte or half, representing the offset
+/// from the lowest-addressed possible destination to the desired basic
+/// block. Since all instructions are 4-byte aligned, this is further compressed
+/// by counting in instructions rather than bytes (i.e. divided by 4). So, to
+/// materialize the correct destination we need:
+///
+///             adr xDest, .LBB0_0
+///             ldrb wScratch, [xTable, xEntry]   (with "lsl #1" for ldrh).
+///             add xDest, xDest, xScratch, lsl #2
+void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
+                                                const llvm::MachineInstr &MI) {
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned ScratchRegW =
+      STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
+  unsigned TableReg = MI.getOperand(2).getReg();
+  unsigned EntryReg = MI.getOperand(3).getReg();
+  int JTIdx = MI.getOperand(4).getIndex();
+  bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
+
+  // This has to be first because the compression pass based its reachability
+  // calculations on the start of the JumpTableDest instruction.
+  auto Label =
+      MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
+  EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
+                                  .addReg(DestReg)
+                                  .addExpr(MCSymbolRefExpr::create(
+                                      Label, MF->getContext())));
+
+  // Load the number of instruction-steps to offset from the label.
+  unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
+  EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
+                                  .addReg(ScratchRegW)
+                                  .addReg(TableReg)
+                                  .addReg(EntryReg)
+                                  .addImm(0)
+                                  .addImm(IsByteEntry ? 0 : 1));
+
+  // Multiply the steps by 4 and add to the already materialized base label
+  // address.
+  EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+                                  .addReg(DestReg)
+                                  .addReg(DestReg)
+                                  .addReg(ScratchReg)
+                                  .addImm(2));
+}
+
 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                                       const MachineInstr &MI) {
   unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -503,7 +632,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
 
 void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
   unsigned DestReg = MI.getOperand(0).getReg();
-  if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) {
+  if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
     // Convert H/S/D register to corresponding Q register
     if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
       DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
@@ -559,6 +688,8 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer->EmitLabel(LOHLabel);
   }
 
+  AArch64TargetStreamer *TS =
+    static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer());
   // Do any manual lowerings.
   switch (MI->getOpcode()) {
   default:
@@ -585,12 +716,27 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       OutStreamer->EmitRawText(StringRef(OS.str()));
     }
     return;
+
+  case AArch64::EMITBKEY: {
+      ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+      if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+          ExceptionHandlingType != ExceptionHandling::ARM)
+        return;
+
+      if (needsCFIMoves() == CFI_M_None)
+        return;
+
+      OutStreamer->EmitCFIBKeyFrame();
+      return;
+    }
   }
 
   // Tail calls use pseudo instructions so they have the proper code-gen
   // attributes (isCall, isReturn, etc.). We lower them to the real
   // instruction here.
-  case AArch64::TCRETURNri: {
+  case AArch64::TCRETURNri:
+  case AArch64::TCRETURNriBTI:
+  case AArch64::TCRETURNriALL: {
     MCInst TmpInst;
     TmpInst.setOpcode(AArch64::BR);
     TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
@@ -660,6 +806,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  case AArch64::JumpTableDest32: {
+    // We want:
+    //     ldrsw xScratch, [xTable, xEntry, lsl #2]
+    //     add xDest, xTable, xScratch
+    unsigned DestReg = MI->getOperand(0).getReg(),
+             ScratchReg = MI->getOperand(1).getReg(),
+             TableReg = MI->getOperand(2).getReg(),
+             EntryReg = MI->getOperand(3).getReg();
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
+                                     .addReg(ScratchReg)
+                                     .addReg(TableReg)
+                                     .addReg(EntryReg)
+                                     .addImm(0)
+                                     .addImm(1));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+                                     .addReg(DestReg)
+                                     .addReg(TableReg)
+                                     .addReg(ScratchReg)
+                                     .addImm(0));
+    return;
+  }
+  case AArch64::JumpTableDest16:
+  case AArch64::JumpTableDest8:
+    LowerJumpTableDestSmall(*OutStreamer, *MI);
+    return;
+
   case AArch64::FMOVH0:
   case AArch64::FMOVS0:
   case AArch64::FMOVD0:
@@ -683,6 +855,100 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case TargetOpcode::PATCHABLE_TAIL_CALL:
     LowerPATCHABLE_TAIL_CALL(*MI);
     return;
+
+  case AArch64::SEH_StackAlloc:
+    TS->EmitARM64WinCFIAllocStack(MI->getOperand(0).getImm());
+    return;
+
+  case AArch64::SEH_SaveFPLR:
+    TS->EmitARM64WinCFISaveFPLR(MI->getOperand(0).getImm());
+    return;
+
+  case AArch64::SEH_SaveFPLR_X:
+    assert(MI->getOperand(0).getImm() < 0 &&
+           "Pre increment SEH opcode must have a negative offset");
+    TS->EmitARM64WinCFISaveFPLRX(-MI->getOperand(0).getImm());
+    return;
+
+  case AArch64::SEH_SaveReg:
+    TS->EmitARM64WinCFISaveReg(MI->getOperand(0).getImm(),
+                               MI->getOperand(1).getImm());
+    return;
+
+  case AArch64::SEH_SaveReg_X:
+    assert(MI->getOperand(1).getImm() < 0 &&
+           "Pre increment SEH opcode must have a negative offset");
+    TS->EmitARM64WinCFISaveRegX(MI->getOperand(0).getImm(),
+		                -MI->getOperand(1).getImm());
+    return;
+
+  case AArch64::SEH_SaveRegP:
+    assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+            "Non-consecutive registers not allowed for save_regp");
+    TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
+                                MI->getOperand(2).getImm());
+    return;
+
+  case AArch64::SEH_SaveRegP_X:
+    assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+            "Non-consecutive registers not allowed for save_regp_x");
+    assert(MI->getOperand(2).getImm() < 0 &&
+           "Pre increment SEH opcode must have a negative offset");
+    TS->EmitARM64WinCFISaveRegPX(MI->getOperand(0).getImm(),
+                                 -MI->getOperand(2).getImm());
+    return;
+
+  case AArch64::SEH_SaveFReg:
+    TS->EmitARM64WinCFISaveFReg(MI->getOperand(0).getImm(),
+                                MI->getOperand(1).getImm());
+    return;
+
+  case AArch64::SEH_SaveFReg_X:
+    assert(MI->getOperand(1).getImm() < 0 &&
+           "Pre increment SEH opcode must have a negative offset");
+    TS->EmitARM64WinCFISaveFRegX(MI->getOperand(0).getImm(),
+                                 -MI->getOperand(1).getImm());
+    return;
+
+  case AArch64::SEH_SaveFRegP:
+    assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+            "Non-consecutive registers not allowed for save_regp");
+    TS->EmitARM64WinCFISaveFRegP(MI->getOperand(0).getImm(),
+                                 MI->getOperand(2).getImm());
+    return;
+
+  case AArch64::SEH_SaveFRegP_X:
+    assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
+            "Non-consecutive registers not allowed for save_regp_x");
+    assert(MI->getOperand(2).getImm() < 0 &&
+           "Pre increment SEH opcode must have a negative offset");
+    TS->EmitARM64WinCFISaveFRegPX(MI->getOperand(0).getImm(),
+                                  -MI->getOperand(2).getImm());
+    return;
+
+  case AArch64::SEH_SetFP:
+    TS->EmitARM64WinCFISetFP();
+    return;
+
+  case AArch64::SEH_AddFP:
+    TS->EmitARM64WinCFIAddFP(MI->getOperand(0).getImm());
+    return;
+
+  case AArch64::SEH_Nop:
+    TS->EmitARM64WinCFINop();
+    return;
+
+  case AArch64::SEH_PrologEnd:
+    TS->EmitARM64WinCFIPrologEnd();
+    return;
+
+  case AArch64::SEH_EpilogStart:
+    TS->EmitARM64WinCFIEpilogStart();
+    return;
+
+  case AArch64::SEH_EpilogEnd:
+    TS->EmitARM64WinCFIEpilogEnd();
+    return;
   }
 
   // Finally, do the automated lowerings for everything else.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
new file mode 100644
index 000000000000..da70a624c5be
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -0,0 +1,130 @@
+//===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts BTI instructions at the start of every function and basic
+// block which could be indirectly called. The hardware will (when enabled)
+// trap when an indirect branch or call instruction targets an instruction
+// which is not a valid BTI instruction. This is intended to guard against
+// control-flow hijacking attacks. Note that this does not do anything for RET
+// instructions, as they can be more precisely protected by return address
+// signing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-branch-targets"
+#define AARCH64_BRANCH_TARGETS_NAME "AArch64 Branch Targets"
+
+namespace {
+class AArch64BranchTargets : public MachineFunctionPass {
+public:
+  static char ID;
+  AArch64BranchTargets() : MachineFunctionPass(ID) {}
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; }
+
+private:
+  void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump);
+};
+} // end anonymous namespace
+
+char AArch64BranchTargets::ID = 0;
+
+INITIALIZE_PASS(AArch64BranchTargets, "aarch64-branch-targets",
+                AARCH64_BRANCH_TARGETS_NAME, false, false)
+
+void AArch64BranchTargets::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createAArch64BranchTargetsPass() {
+  return new AArch64BranchTargets();
+}
+
+bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  if (!F.hasFnAttribute("branch-target-enforcement"))
+    return false;
+
+  LLVM_DEBUG(
+      dbgs() << "********** AArch64 Branch Targets  **********\n"
+             << "********** Function: " << MF.getName() << '\n');
+
+  // LLVM does not consider basic blocks which are the targets of jump tables
+  // to be address-taken (the address can't escape anywhere else), but they are
+  // used for indirect branches, so need BTI instructions.
+  SmallPtrSet<MachineBasicBlock *, 8> JumpTableTargets;
+  if (auto *JTI = MF.getJumpTableInfo())
+    for (auto &JTE : JTI->getJumpTables())
+      for (auto *MBB : JTE.MBBs)
+        JumpTableTargets.insert(MBB);
+
+  bool MadeChange = false;
+  for (MachineBasicBlock &MBB : MF) {
+    bool CouldCall = false, CouldJump = false;
+    // If the function is address-taken or externally-visible, it could be
+    // indirectly called. PLT entries and tail-calls use BR, but when they are
+    // are in guarded pages should all use x16 or x17 to hold the called
+    // address, so we don't need to set CouldJump here. BR instructions in
+    // non-guarded pages (which might be non-BTI-aware code) are allowed to
+    // branch to a "BTI c" using any register.
+    if (&MBB == &*MF.begin() && (F.hasAddressTaken() || !F.hasLocalLinkage()))
+      CouldCall = true;
+
+    // If the block itself is address-taken, it could be indirectly branched
+    // to, but not called.
+    if (MBB.hasAddressTaken() || JumpTableTargets.count(&MBB))
+      CouldJump = true;
+
+    if (CouldCall || CouldJump) {
+      addBTI(MBB, CouldCall, CouldJump);
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall,
+                                  bool CouldJump) {
+  LLVM_DEBUG(dbgs() << "Adding BTI " << (CouldJump ? "j" : "")
+                    << (CouldCall ? "c" : "") << " to " << MBB.getName()
+                    << "\n");
+
+  const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
+      MBB.getParent()->getSubtarget().getInstrInfo());
+
+  unsigned HintNum = 32;
+  if (CouldCall)
+    HintNum |= 2;
+  if (CouldJump)
+    HintNum |= 4;
+  assert(HintNum != 32 && "No target kinds!");
+
+  auto MBBI = MBB.begin();
+
+  // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there.
+  if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP ||
+                            MBBI->getOpcode() == AArch64::PACIBSP))
+    return;
+
+  BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+          TII->get(AArch64::HINT))
+      .addImm(HintNum);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 26d532555e78..5980e5684e89 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -227,32 +227,45 @@ void AArch64CallLowering::splitToValueTypes(
 }
 
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                      const Value *Val, unsigned VReg) const {
-  MachineFunction &MF = MIRBuilder.getMF();
-  const Function &F = MF.getFunction();
-
+                                      const Value *Val,
+                                      ArrayRef<unsigned> VRegs) const {
   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
-  assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
-  bool Success = true;
-  if (VReg) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
+  assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+         "Return value without a vreg");
 
-    // We zero-extend i1s to i8.
-    if (MRI.getType(VReg).getSizeInBits() == 1)
-      VReg = MIRBuilder.buildZExt(LLT::scalar(8), VReg)->getOperand(0).getReg();
+  bool Success = true;
+  if (!VRegs.empty()) {
+    MachineFunction &MF = MIRBuilder.getMF();
+    const Function &F = MF.getFunction();
 
+    MachineRegisterInfo &MRI = MF.getRegInfo();
     const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
     CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
     auto &DL = F.getParent()->getDataLayout();
+    LLVMContext &Ctx = Val->getType()->getContext();
 
-    ArgInfo OrigArg{VReg, Val->getType()};
-    setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+    SmallVector<EVT, 4> SplitEVTs;
+    ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+    assert(VRegs.size() == SplitEVTs.size() &&
+           "For each split Type there should be exactly one VReg.");
 
     SmallVector<ArgInfo, 8> SplitArgs;
-    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
-                      [&](unsigned Reg, uint64_t Offset) {
-                        MIRBuilder.buildExtract(Reg, VReg, Offset);
-                      });
+    for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+      // We zero-extend i1s to i8.
+      unsigned CurVReg = VRegs[i];
+      if (MRI.getType(VRegs[i]).getSizeInBits() == 1) {
+        CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg)
+                       ->getOperand(0)
+                       .getReg();
+      }
+
+      ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
+      setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+      splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(),
+                        [&](unsigned Reg, uint64_t Offset) {
+                          MIRBuilder.buildExtract(Reg, CurVReg, Offset);
+                        });
+    }
 
     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
     Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
@@ -324,6 +337,10 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
   }
 
+  auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  if (Subtarget.hasCustomCallingConv())
+    Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+
   // Move back to the end of the basic block.
   MIRBuilder.setMBB(MBB);
 
@@ -364,8 +381,14 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   MIB.add(Callee);
 
   // Tell the call which registers are clobbered.
-  auto TRI = MF.getSubtarget().getRegisterInfo();
-  MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
+  auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
+  if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
+    TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+  MIB.addRegMask(Mask);
+
+  if (TRI->isAnyArgRegReserved(MF))
+    TRI->emitReservedArgRegCallError(MF);
 
   // Do the actual argument marshalling.
   SmallVector<unsigned, 8> PhysRegs;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 68c127fc42e5..1c2bd6a4de5d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -34,8 +34,8 @@ class AArch64CallLowering: public CallLowering {
 public:
   AArch64CallLowering(const AArch64TargetLowering &TLI);
 
-  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
-                   unsigned VReg) const override;
+  bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                   ArrayRef<unsigned> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
                             ArrayRef<unsigned> VRegs) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 30492003df14..5db941e9dac7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -123,7 +123,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
 
 // Vararg functions on windows pass floats in integer registers
 def CC_AArch64_Win64_VarArg : CallingConv<[
-  CCIfType<[f16, f32],    CCPromoteToType<f64>>,
+  CCIfType<[f16, f32], CCPromoteToType<f64>>,
   CCIfType<[f64], CCBitConvertToType<i64>>,
   CCDelegateTo<CC_AArch64_AAPCS>
 ]>;
@@ -288,6 +288,20 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
                                            D8,  D9,  D10, D11,
                                            D12, D13, D14, D15)>;
 
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
+                                               X23, X24, X25, X26, X27, X28,
+                                               D8, D9, D10, D11,
+                                               D12, D13, D14, D15)>;
+
+// AArch64 PCS for vector functions (VPCS)
+// must (additionally) preserve full Q8-Q23 registers
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+                                          X23, X24, X25, X26, X27, X28,
+                                          (sequence "Q%u", 8, 23))>;
+
 // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
 // 'this' and the pointer return value are both passed in X0 in these cases,
 // this can be partially modelled by treating X0 as a callee-saved register;
@@ -362,5 +376,7 @@ def CSR_AArch64_AAPCS_SwiftError_SCS
     : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
 def CSR_AArch64_RT_MostRegs_SCS
     : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAVPCS_SCS
+    : CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
 def CSR_AArch64_AAPCS_SCS
     : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
new file mode 100644
index 000000000000..0924a27e2586
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -0,0 +1,162 @@
+//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This pass looks at the basic blocks each jump-table refers to and works out
+// whether they can be emitted in a compressed form (with 8 or 16-bit
+// entries). If so, it changes the opcode and flags them in the associated
+// AArch64FunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-jump-tables"
+
+STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries");
+STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries");
+STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries");
+
+namespace {
+class AArch64CompressJumpTables : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  MachineFunction *MF;
+  SmallVector<int, 8> BlockInfo;
+
+  int computeBlockSize(MachineBasicBlock &MBB);
+  void scanFunction();
+
+  bool compressJumpTable(MachineInstr &MI, int Offset);
+
+public:
+  static char ID;
+  AArch64CompressJumpTables() : MachineFunctionPass(ID) {
+    initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+  StringRef getPassName() const override {
+    return "AArch64 Compress Jump Tables";
+  }
+};
+char AArch64CompressJumpTables::ID = 0;
+}
+
+INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
+                "AArch64 compress jump tables pass", false, false)
+
+int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+  int Size = 0;
+  for (const MachineInstr &MI : MBB)
+    Size += TII->getInstSizeInBytes(MI);
+  return Size;
+}
+
+void AArch64CompressJumpTables::scanFunction() {
+  BlockInfo.clear();
+  BlockInfo.resize(MF->getNumBlockIDs());
+
+  int Offset = 0;
+  for (MachineBasicBlock &MBB : *MF) {
+    BlockInfo[MBB.getNumber()] = Offset;
+    Offset += computeBlockSize(MBB);
+  }
+}
+
+bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
+                                                  int Offset) {
+  if (MI.getOpcode() != AArch64::JumpTableDest32)
+    return false;
+
+  int JTIdx = MI.getOperand(4).getIndex();
+  auto &JTInfo = *MF->getJumpTableInfo();
+  const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx];
+
+  // The jump-table might have been optimized away.
+  if (JT.MBBs.empty())
+    return false;
+
+  int MaxOffset = std::numeric_limits<int>::min(),
+      MinOffset = std::numeric_limits<int>::max();
+  MachineBasicBlock *MinBlock = nullptr;
+  for (auto Block : JT.MBBs) {
+    int BlockOffset = BlockInfo[Block->getNumber()];
+    assert(BlockOffset % 4 == 0 && "misaligned basic block");
+
+    MaxOffset = std::max(MaxOffset, BlockOffset);
+    if (BlockOffset <= MinOffset) {
+      MinOffset = BlockOffset;
+      MinBlock = Block;
+    }
+  }
+
+  // The ADR instruction needed to calculate the address of the first reachable
+  // basic block can address +/-1MB.
+  if (!isInt<21>(MinOffset - Offset)) {
+    ++NumJT32;
+    return false;
+  }
+
+  int Span = MaxOffset - MinOffset;
+  auto AFI = MF->getInfo<AArch64FunctionInfo>();
+  if (isUInt<8>(Span / 4)) {
+    AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
+    MI.setDesc(TII->get(AArch64::JumpTableDest8));
+    ++NumJT8;
+    return true;
+  } else if (isUInt<16>(Span / 4)) {
+    AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
+    MI.setDesc(TII->get(AArch64::JumpTableDest16));
+    ++NumJT16;
+    return true;
+  }
+
+  ++NumJT32;
+  return false;
+}
+
+bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
+  bool Changed = false;
+  MF = &MFIn;
+
+  const auto &ST = MF->getSubtarget<AArch64Subtarget>();
+  TII = ST.getInstrInfo();
+
+  if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+    return false;
+
+  scanFunction();
+
+  for (MachineBasicBlock &MBB : *MF) {
+    int Offset = BlockInfo[MBB.getNumber()];
+    for (MachineInstr &MI : MBB) {
+      Changed |= compressJumpTable(MI, Offset);
+      Offset += TII->getInstSizeInBytes(MI);
+    }
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createAArch64CompressJumpTablesPass() {
+  return new AArch64CompressJumpTables();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9226a9dd879b..f7190d58fbf9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -835,36 +835,55 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   }
 
   case AArch64::LOADgot: {
-    // Expand into ADRP + LDR.
+    MachineFunction *MF = MBB.getParent();
     unsigned DstReg = MI.getOperand(0).getReg();
     const MachineOperand &MO1 = MI.getOperand(1);
     unsigned Flags = MO1.getTargetFlags();
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
-    MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
-            .add(MI.getOperand(0))
-            .addReg(DstReg);
-
-    if (MO1.isGlobal()) {
-      MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
-      MIB2.addGlobalAddress(MO1.getGlobal(), 0,
-                            Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-    } else if (MO1.isSymbol()) {
-      MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
-      MIB2.addExternalSymbol(MO1.getSymbolName(),
-                             Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+    if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
+      // Tiny codemodel expand to LDR
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                        TII->get(AArch64::LDRXl), DstReg);
+
+      if (MO1.isGlobal()) {
+        MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
+      } else if (MO1.isSymbol()) {
+        MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
+      } else {
+        assert(MO1.isCPI() &&
+               "Only expect globals, externalsymbols, or constant pools");
+        MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
+      }
     } else {
-      assert(MO1.isCPI() &&
-             "Only expect globals, externalsymbols, or constant pools");
-      MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
-                                Flags | AArch64II::MO_PAGE);
-      MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
-                                Flags | AArch64II::MO_PAGEOFF |
-                                    AArch64II::MO_NC);
+      // Small codemodel expand into ADRP + LDR.
+      MachineInstrBuilder MIB1 =
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
+      MachineInstrBuilder MIB2 =
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
+              .add(MI.getOperand(0))
+              .addReg(DstReg);
+
+      if (MO1.isGlobal()) {
+        MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
+        MIB2.addGlobalAddress(MO1.getGlobal(), 0,
+                              Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+      } else if (MO1.isSymbol()) {
+        MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
+        MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
+                                                        AArch64II::MO_PAGEOFF |
+                                                        AArch64II::MO_NC);
+      } else {
+        assert(MO1.isCPI() &&
+               "Only expect globals, externalsymbols, or constant pools");
+        MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
+                                  Flags | AArch64II::MO_PAGE);
+        MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
+                                  Flags | AArch64II::MO_PAGEOFF |
+                                      AArch64II::MO_NC);
+      }
+
+      transferImpOps(MI, MIB1, MIB2);
     }
-
-    transferImpOps(MI, MIB1, MIB2);
     MI.eraseFromParent();
     return true;
   }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 572d1c22feea..47550cabb9f0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2016,8 +2016,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
       if (RetVT == MVT::i64 && VT <= MVT::i32) {
         if (WantZExt) {
           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
-          std::prev(FuncInfo.InsertPt)->eraseFromParent();
-          ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
+          MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
+          ResultReg = std::prev(I)->getOperand(0).getReg();
+          removeDeadCode(I, std::next(I));
         } else
           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
                                                  /*IsKill=*/true,
@@ -2038,7 +2039,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
           break;
         }
       }
-      MI->eraseFromParent();
+      MachineBasicBlock::iterator I(MI);
+      removeDeadCode(I, std::next(I));
       MI = nullptr;
       if (Reg)
         MI = MRI.getUniqueVRegDef(Reg);
@@ -2256,6 +2258,13 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
 
 /// Try to emit a combined compare-and-branch instruction.
 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+  // will not be produced, as they are conditional branch instructions that do
+  // not set flags.
+  if (FuncInfo.MF->getFunction().hasFnAttribute(
+          Attribute::SpeculativeLoadHardening))
+    return false;
+
   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
@@ -2918,6 +2927,9 @@ bool AArch64FastISel::fastLowerArguments() {
   if (CC != CallingConv::C && CC != CallingConv::Swift)
     return false;
 
+  if (Subtarget->hasCustomCallingConv())
+    return false;
+
   // Only handle simple cases of up to 8 GPR and FPR each.
   unsigned GPRCnt = 0;
   unsigned FPRCnt = 0;
@@ -3208,6 +3220,10 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   if (!processCallArgs(CLI, OutVTs, NumBytes))
     return false;
 
+  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  if (RegInfo->isAnyArgRegReserved(*MF))
+    RegInfo->emitReservedArgRegCallError(*MF);
+
   // Issue the call.
   MachineInstrBuilder MIB;
   if (Subtarget->useSmallAddressing()) {
@@ -3443,6 +3459,21 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, SrcReg);
     return true;
   }
+  case Intrinsic::sponentry: {
+    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+
+    // SP = FP + Fixed Object + 16
+    int FI = MFI.CreateFixedObject(4, 0, false);
+    unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(AArch64::ADDXri), ResultReg)
+            .addFrameIndex(FI)
+            .addImm(0)
+            .addImm(0);
+
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::memcpy:
   case Intrinsic::memmove: {
     const auto *MTI = cast<MemTransferInst>(II);
@@ -3738,6 +3769,9 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
     }
 
+    if (!ResultReg1)
+      return false;
+
     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
                                   /*IsKill=*/true, getInvertedCondCode(CC));
@@ -4483,7 +4517,8 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
            "Expected copy instruction");
     Reg = MI->getOperand(1).getReg();
-    MI->eraseFromParent();
+    MachineBasicBlock::iterator I(MI);
+    removeDeadCode(I, std::next(I));
   }
   updateValueMap(I, Reg);
   return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 6dc5d19862a9..538a8d7e8fbc 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -98,6 +98,7 @@
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -114,11 +115,13 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -201,6 +204,11 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  // Win64 EH requires a frame pointer if funclets are present, as the locals
+  // are accessed off the frame pointer in both the parent function and the
+  // funclets.
+  if (MF.hasEHFunclets())
+    return true;
   // Retain behavior of always omitting the FP for leaf functions when possible.
   if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
     return true;
@@ -279,6 +287,31 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
   return MBB.erase(I);
 }
 
+static bool ShouldSignReturnAddress(MachineFunction &MF) {
+  // The function should be signed in the following situations:
+  // - sign-return-address=all
+  // - sign-return-address=non-leaf and the functions spills the LR
+
+  const Function &F = MF.getFunction();
+  if (!F.hasFnAttribute("sign-return-address"))
+    return false;
+
+  StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+  if (Scope.equals("none"))
+    return false;
+
+  if (Scope.equals("all"))
+    return true;
+
+  assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
+
+  for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
+    if (Info.getReg() == AArch64::LR)
+      return true;
+
+  return false;
+}
+
 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
   MachineFunction &MF = *MBB.getParent();
@@ -330,7 +363,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
   LiveRegs.addLiveIns(*MBB);
 
   // Mark callee saved registers as used so we will not choose them.
-  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
+  const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
   for (unsigned i = 0; CSRegs[i]; ++i)
     LiveRegs.addReg(CSRegs[i]);
 
@@ -408,54 +441,217 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
   return true;
 }
 
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
+                                             const TargetInstrInfo &TII,
+                                             MachineInstr::MIFlag Flag) {
+  unsigned Opc = MBBI->getOpcode();
+  MachineBasicBlock *MBB = MBBI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  DebugLoc DL = MBBI->getDebugLoc();
+  unsigned ImmIdx = MBBI->getNumOperands() - 1;
+  int Imm = MBBI->getOperand(ImmIdx).getImm();
+  MachineInstrBuilder MIB;
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("No SEH Opcode for this instruction");
+  case AArch64::LDPDpost:
+    Imm = -Imm;
+    LLVM_FALLTHROUGH;
+  case AArch64::STPDpre: {
+    unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+    unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
+              .addImm(Reg0)
+              .addImm(Reg1)
+              .addImm(Imm * 8)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::LDPXpost:
+    Imm = -Imm;
+    LLVM_FALLTHROUGH;
+  case AArch64::STPXpre: {
+    unsigned Reg0 = MBBI->getOperand(1).getReg();
+    unsigned Reg1 = MBBI->getOperand(2).getReg();
+    if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
+                .addImm(Imm * 8)
+                .setMIFlag(Flag);
+    else
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
+                .addImm(RegInfo->getSEHRegNum(Reg0))
+                .addImm(RegInfo->getSEHRegNum(Reg1))
+                .addImm(Imm * 8)
+                .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::LDRDpost:
+    Imm = -Imm;
+    LLVM_FALLTHROUGH;
+  case AArch64::STRDpre: {
+    unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
+              .addImm(Reg)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::LDRXpost:
+    Imm = -Imm;
+    LLVM_FALLTHROUGH;
+  case AArch64::STRXpre: {
+    unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
+              .addImm(Reg)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STPDi:
+  case AArch64::LDPDi: {
+    unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+    unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
+              .addImm(Reg0)
+              .addImm(Reg1)
+              .addImm(Imm * 8)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STPXi:
+  case AArch64::LDPXi: {
+    unsigned Reg0 = MBBI->getOperand(0).getReg();
+    unsigned Reg1 = MBBI->getOperand(1).getReg();
+    if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
+                .addImm(Imm * 8)
+                .setMIFlag(Flag);
+    else
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
+                .addImm(RegInfo->getSEHRegNum(Reg0))
+                .addImm(RegInfo->getSEHRegNum(Reg1))
+                .addImm(Imm * 8)
+                .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STRXui:
+  case AArch64::LDRXui: {
+    int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
+              .addImm(Reg)
+              .addImm(Imm * 8)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STRDui:
+  case AArch64::LDRDui: {
+    unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
+              .addImm(Reg)
+              .addImm(Imm * 8)
+              .setMIFlag(Flag);
+    break;
+  }
+  }
+  auto I = MBB->insertAfter(MBBI, MIB);
+  return I;
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+                           unsigned LocalStackSize) {
+  MachineOperand *ImmOpnd = nullptr;
+  unsigned ImmIdx = MBBI->getNumOperands() - 1;
+  switch (MBBI->getOpcode()) {
+  default:
+    llvm_unreachable("Fix the offset in the SEH instruction");
+  case AArch64::SEH_SaveFPLR:
+  case AArch64::SEH_SaveRegP:
+  case AArch64::SEH_SaveReg:
+  case AArch64::SEH_SaveFRegP:
+  case AArch64::SEH_SaveFReg:
+    ImmOpnd = &MBBI->getOperand(ImmIdx);
+    break;
+  }
+  if (ImmOpnd)
+    ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
 // Convert callee-save register save/restore instruction to do stack pointer
 // decrement/increment to allocate/deallocate the callee-save stack area by
 // converting store/load to use pre/post increment version.
 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+    bool NeedsWinCFI, bool InProlog = true) {
   // Ignore instructions that do not operate on SP, i.e. shadow call stack
-  // instructions.
+  // instructions and associated CFI instruction.
   while (MBBI->getOpcode() == AArch64::STRXpost ||
-         MBBI->getOpcode() == AArch64::LDRXpre) {
-    assert(MBBI->getOperand(0).getReg() != AArch64::SP);
+         MBBI->getOpcode() == AArch64::LDRXpre ||
+         MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
+    if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
+      assert(MBBI->getOperand(0).getReg() != AArch64::SP);
     ++MBBI;
   }
-
   unsigned NewOpc;
-  bool NewIsUnscaled = false;
+  int Scale = 1;
   switch (MBBI->getOpcode()) {
   default:
     llvm_unreachable("Unexpected callee-save save/restore opcode!");
   case AArch64::STPXi:
     NewOpc = AArch64::STPXpre;
+    Scale = 8;
     break;
   case AArch64::STPDi:
     NewOpc = AArch64::STPDpre;
+    Scale = 8;
+    break;
+  case AArch64::STPQi:
+    NewOpc = AArch64::STPQpre;
+    Scale = 16;
     break;
   case AArch64::STRXui:
     NewOpc = AArch64::STRXpre;
-    NewIsUnscaled = true;
     break;
   case AArch64::STRDui:
     NewOpc = AArch64::STRDpre;
-    NewIsUnscaled = true;
+    break;
+  case AArch64::STRQui:
+    NewOpc = AArch64::STRQpre;
     break;
   case AArch64::LDPXi:
     NewOpc = AArch64::LDPXpost;
+    Scale = 8;
     break;
   case AArch64::LDPDi:
     NewOpc = AArch64::LDPDpost;
+    Scale = 8;
+    break;
+  case AArch64::LDPQi:
+    NewOpc = AArch64::LDPQpost;
+    Scale = 16;
     break;
   case AArch64::LDRXui:
     NewOpc = AArch64::LDRXpost;
-    NewIsUnscaled = true;
     break;
   case AArch64::LDRDui:
     NewOpc = AArch64::LDRDpost;
-    NewIsUnscaled = true;
+    break;
+  case AArch64::LDRQui:
+    NewOpc = AArch64::LDRQpost;
     break;
   }
+  // Get rid of the SEH code associated with the old instruction.
+  if (NeedsWinCFI) {
+    auto SEH = std::next(MBBI);
+    if (AArch64InstrInfo::isSEHInstruction(*SEH))
+      SEH->eraseFromParent();
+  }
 
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
   MIB.addReg(AArch64::SP, RegState::Define);
@@ -471,15 +667,16 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
          "instruction!");
   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
          "Unexpected base register in callee-save save/restore instruction!");
-  // Last operand is immediate offset that needs fixing.
-  assert(CSStackSizeInc % 8 == 0);
-  int64_t CSStackSizeIncImm = CSStackSizeInc;
-  if (!NewIsUnscaled)
-    CSStackSizeIncImm /= 8;
-  MIB.addImm(CSStackSizeIncImm);
+  assert(CSStackSizeInc % Scale == 0);
+  MIB.addImm(CSStackSizeInc / Scale);
 
   MIB.setMIFlags(MBBI->getFlags());
-  MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
+  MIB.setMemRefs(MBBI->memoperands());
+
+  // Generate a new SEH code that corresponds to the new instruction.
+  if (NeedsWinCFI)
+    InsertSEH(*MIB, *TII,
+              InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
 
   return std::prev(MBB.erase(MBBI));
 }
@@ -487,22 +684,43 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
 // Fixup callee-save register save/restore instructions to take into account
 // combined SP bump by adding the local stack size to the stack offsets.
 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
-                                              unsigned LocalStackSize) {
+                                              unsigned LocalStackSize,
+                                              bool NeedsWinCFI) {
+  if (AArch64InstrInfo::isSEHInstruction(MI))
+    return;
+
   unsigned Opc = MI.getOpcode();
 
   // Ignore instructions that do not operate on SP, i.e. shadow call stack
-  // instructions.
-  if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) {
-    assert(MI.getOperand(0).getReg() != AArch64::SP);
+  // instructions and associated CFI instruction.
+  if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
+      Opc == AArch64::CFI_INSTRUCTION) {
+    if (Opc != AArch64::CFI_INSTRUCTION)
+      assert(MI.getOperand(0).getReg() != AArch64::SP);
     return;
   }
 
-  (void)Opc;
-  assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
-          Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
-          Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
-          Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
-         "Unexpected callee-save save/restore opcode!");
+  unsigned Scale;
+  switch (Opc) {
+  case AArch64::STPXi:
+  case AArch64::STRXui:
+  case AArch64::STPDi:
+  case AArch64::STRDui:
+  case AArch64::LDPXi:
+  case AArch64::LDRXui:
+  case AArch64::LDPDi:
+  case AArch64::LDRDui:
+    Scale = 8;
+    break;
+  case AArch64::STPQi:
+  case AArch64::STRQui:
+  case AArch64::LDPQi:
+  case AArch64::LDRQui:
+    Scale = 16;
+    break;
+  default:
+    llvm_unreachable("Unexpected callee-save save/restore opcode!");
+  }
 
   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
@@ -510,8 +728,16 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
   // Last operand is immediate offset that needs fixing.
   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
   // All generated opcodes have scaled offsets.
-  assert(LocalStackSize % 8 == 0);
-  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
+  assert(LocalStackSize % Scale == 0);
+  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
+
+  if (NeedsWinCFI) {
+    auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+    assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+    assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
+           "Expecting a SEH instruction");
+    fixupSEHOpcode(MBBI, LocalStackSize);
+  }
 }
 
 static void adaptForLdStOpt(MachineBasicBlock &MBB,
@@ -546,6 +772,23 @@ static void adaptForLdStOpt(MachineBasicBlock &MBB,
   //
 }
 
+static bool ShouldSignWithAKey(MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  if (!F.hasFnAttribute("sign-return-address-key"))
+    return true;
+
+  const StringRef Key =
+      F.getFnAttribute("sign-return-address-key").getValueAsString();
+  assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
+  return Key.equals_lower("a_key");
+}
+
+static bool needsWinCFI(const MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+         F.needsUnwindTableEntry();
+}
+
 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -556,8 +799,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
+  bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
+                         !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool HasFP = hasFP(MF);
+  bool NeedsWinCFI = needsWinCFI(MF);
+  MF.setHasWinCFI(NeedsWinCFI);
+  bool IsFunclet = MBB.isEHFuncletEntry();
 
   // At this point, we're going to decide whether or not the function uses a
   // redzone. In most cases, the function doesn't have a redzone so let's
@@ -568,18 +815,41 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // to determine the end of the prologue.
   DebugLoc DL;
 
+  if (ShouldSignReturnAddress(MF)) {
+    if (ShouldSignWithAKey(MF))
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+          .setMIFlag(MachineInstr::FrameSetup);
+    else {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
+          .setMIFlag(MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
+    unsigned CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex)
+        .setMIFlags(MachineInstr::FrameSetup);
+  }
+
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
 
-  int NumBytes = (int)MFI.getStackSize();
+  // getStackSize() includes all the locals in its size calculation. We don't
+  // include these locals when computing the stack size of a funclet, as they
+  // are allocated in the parent's stack frame and accessed via the frame
+  // pointer from the funclet.  We only save the callee saved registers in the
+  // funclet, which are really the callee saved registers of the parent
+  // function, including the funclet.
+  int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+                           : (int)MFI.getStackSize();
   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
     assert(!HasFP && "unexpected function without stack frame but with FP");
-
     // All of the stack allocation is for locals.
     AFI->setLocalStackSize(NumBytes);
-
     if (!NumBytes)
       return;
     // REDZONE: If the stack size is less than 128 bytes, we don't need
@@ -589,36 +859,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++NumRedZoneFunctions;
     } else {
       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
-
-      // Label used to tie together the PROLOG_LABEL and the MachineMoves.
-      MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
-      // Encode the stack size of the leaf function.
-      unsigned CFIIndex = MF.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameSetup);
+                      MachineInstr::FrameSetup, false, NeedsWinCFI);
+      if (!NeedsWinCFI) {
+        // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+        MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+        // Encode the stack size of the leaf function.
+        unsigned CFIIndex = MF.addFrameInst(
+            MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
+        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex)
+            .setMIFlags(MachineInstr::FrameSetup);
+      }
     }
+
+    if (NeedsWinCFI)
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+          .setMIFlag(MachineInstr::FrameSetup);
+
     return;
   }
 
   bool IsWin64 =
       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
-  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+  // Var args are accounted for in the containing function, so don't
+  // include them for funclets.
+  unsigned FixedObject = (IsWin64 && !IsFunclet) ?
+                         alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
 
   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
   // All of the remaining stack allocations are for locals.
   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                    MachineInstr::FrameSetup);
+                    MachineInstr::FrameSetup, false, NeedsWinCFI);
     NumBytes = 0;
   } else if (PrologueSaveSize != 0) {
-    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
-                                                     -PrologueSaveSize);
+    MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
+        MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);
     NumBytes -= PrologueSaveSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -629,9 +907,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   MachineBasicBlock::iterator End = MBB.end();
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
     if (CombineSPBump)
-      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
+      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
+                                        NeedsWinCFI);
     ++MBBI;
   }
+
+  // The code below is not applicable to funclets. We have emitted all the SEH
+  // opcodes that we needed to emit.  The FP and BP belong to the containing
+  // function.
+  if (IsFunclet) {
+    if (NeedsWinCFI)
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+          .setMIFlag(MachineInstr::FrameSetup);
+    return;
+  }
+
   if (HasFP) {
     // Only set up FP if we actually need to. Frame pointer is fp =
     // sp - fixedobject - 16.
@@ -644,24 +934,58 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     // This code marks the instruction(s) that set the FP also.
     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
-                    MachineInstr::FrameSetup);
+                    MachineInstr::FrameSetup, false, NeedsWinCFI);
   }
 
   if (windowsRequiresStackProbe(MF, NumBytes)) {
     uint32_t NumWords = NumBytes >> 4;
-
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
-        .addImm(NumWords)
-        .setMIFlags(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+      // exceed this amount.  We need to move at most 2^24 - 1 into x15.
+      // This is at most two instructions, MOVZ follwed by MOVK.
+      // TODO: Fix to use multiple stack alloc unwind codes for stacks
+      // exceeding 256MB in size.
+      if (NumBytes >= (1 << 28))
+        report_fatal_error("Stack size cannot exceed 256MB for stack "
+                            "unwinding purposes");
+
+      uint32_t LowNumWords = NumWords & 0xFFFF;
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+            .addImm(LowNumWords)
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+            .setMIFlag(MachineInstr::FrameSetup);
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlag(MachineInstr::FrameSetup);
+      if ((NumWords & 0xFFFF0000) != 0) {
+          BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+              .addReg(AArch64::X15)
+              .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+              .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+              .setMIFlag(MachineInstr::FrameSetup);
+          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+          .addImm(NumWords)
+          .setMIFlags(MachineInstr::FrameSetup);
+    }
 
     switch (MF.getTarget().getCodeModel()) {
+    case CodeModel::Tiny:
     case CodeModel::Small:
     case CodeModel::Medium:
     case CodeModel::Kernel:
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
           .addExternalSymbol("__chkstk")
           .addReg(AArch64::X15, RegState::Implicit)
+          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
           .setMIFlags(MachineInstr::FrameSetup);
+      if (NeedsWinCFI)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlag(MachineInstr::FrameSetup);
       break;
     case CodeModel::Large:
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
@@ -669,11 +993,20 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
           .addExternalSymbol("__chkstk")
           .addExternalSymbol("__chkstk")
           .setMIFlags(MachineInstr::FrameSetup);
+      if (NeedsWinCFI)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlag(MachineInstr::FrameSetup);
 
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
           .addReg(AArch64::X16, RegState::Kill)
           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+          .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
+          .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
+          .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
           .setMIFlags(MachineInstr::FrameSetup);
+      if (NeedsWinCFI)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+            .setMIFlag(MachineInstr::FrameSetup);
       break;
     }
 
@@ -682,6 +1015,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
         .addReg(AArch64::X15, RegState::Kill)
         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
         .setMIFlags(MachineInstr::FrameSetup);
+    if (NeedsWinCFI)
+       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+            .addImm(NumBytes)
+            .setMIFlag(MachineInstr::FrameSetup);
     NumBytes = 0;
   }
 
@@ -701,7 +1038,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       // the correct value here, as NumBytes also includes padding bytes,
       // which shouldn't be counted here.
       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+                      MachineInstr::FrameSetup, false, NeedsWinCFI);
 
     if (NeedsRealignment) {
       const unsigned Alignment = MFI.getMaxAlignment();
@@ -724,6 +1061,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
           .addReg(scratchSPReg, RegState::Kill)
           .addImm(andMaskEncoded);
       AFI->setStackRealigned(true);
+      if (NeedsWinCFI)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+            .addImm(NumBytes & andMaskEncoded)
+            .setMIFlag(MachineInstr::FrameSetup);
     }
   }
 
@@ -737,8 +1078,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (RegInfo->hasBasePointer(MF)) {
     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                      false);
+    if (NeedsWinCFI)
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
   }
 
+  // The very last FrameSetup instruction indicates the end of prologue. Emit a
+  // SEH opcode indicating the prologue end.
+  if (NeedsWinCFI)
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+        .setMIFlag(MachineInstr::FrameSetup);
+
   if (needsFrameMoves) {
     const DataLayout &TD = MF.getDataLayout();
     const int StackGrowth = -TD.getPointerSize(0);
@@ -832,6 +1182,46 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   }
 }
 
+static void InsertReturnAddressAuth(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) {
+  if (!ShouldSignReturnAddress(MF))
+    return;
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc DL;
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  // The AUTIASP instruction assembles to a hint instruction before v8.3a so
+  // this instruction can safely used for any v8a architecture.
+  // From v8.3a onwards there are optimised authenticate LR and return
+  // instructions, namely RETA{A,B}, that can be used instead.
+  if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
+      MBBI->getOpcode() == AArch64::RET_ReallyLR) {
+    BuildMI(MBB, MBBI, DL,
+            TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
+        .copyImplicitOps(*MBBI);
+    MBB.erase(MBBI);
+  } else {
+    BuildMI(
+        MBB, MBBI, DL,
+        TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
+        .setMIFlag(MachineInstr::FrameDestroy);
+  }
+}
+
+static bool isFuncletReturnInstr(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case AArch64::CATCHRET:
+  case AArch64::CLEANUPRET:
+    return true;
+  }
+}
+
 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -840,14 +1230,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL;
   bool IsTailCallReturn = false;
+  bool NeedsWinCFI = needsWinCFI(MF);
+  bool IsFunclet = false;
+
   if (MBB.end() != MBBI) {
     DL = MBBI->getDebugLoc();
     unsigned RetOpcode = MBBI->getOpcode();
     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
-      RetOpcode == AArch64::TCRETURNri;
+                       RetOpcode == AArch64::TCRETURNri ||
+                       RetOpcode == AArch64::TCRETURNriBTI;
+    IsFunclet = isFuncletReturnInstr(*MBBI);
   }
-  int NumBytes = MFI.getStackSize();
-  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+  int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
+                           : MFI.getStackSize();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
@@ -899,25 +1296,38 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
   // it as the 2nd argument of AArch64ISD::TC_RETURN.
 
+  auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
+
   bool IsWin64 =
       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
-  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
+  // Var args are accounted for in the containing function, so don't
+  // include them for funclets.
+  unsigned FixedObject =
+      (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
 
   uint64_t AfterCSRPopSize = ArgumentPopSize;
   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+  // We cannot rely on the local stack size set in emitPrologue if the function
+  // has funclets, as funclets have different local stack size requirements, and
+  // the current value set in emitPrologue may be that of the containing
+  // function.
+  if (MF.hasEHFunclets())
+    AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
   // Assume we can't combine the last pop with the sp restore.
 
   if (!CombineSPBump && PrologueSaveSize != 0) {
     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+    while (AArch64InstrInfo::isSEHInstruction(*Pop))
+      Pop = std::prev(Pop);
     // Converting the last ldp to a post-index ldp is valid only if the last
     // ldp's offset is 0.
     const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
     // If the offset is 0, convert it to a post-index ldp.
-    if (OffsetOp.getImm() == 0) {
-      convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII,
-                                                PrologueSaveSize);
-    } else {
+    if (OffsetOp.getImm() == 0)
+      convertCalleeSaveRestoreToSPPrePostIncDec(
+          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false);
+    else {
       // If not, make sure to emit an add after the last ldp.
       // We're doing this by transfering the size to be restored from the
       // adjustment *before* the CSR pops to the adjustment *after* the CSR
@@ -937,14 +1347,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
       ++LastPopI;
       break;
     } else if (CombineSPBump)
-      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
+      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
+                                        NeedsWinCFI);
   }
 
+  if (NeedsWinCFI)
+    BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
+        .setMIFlag(MachineInstr::FrameDestroy);
+
   // If there is a single SP update, insert it before the ret and we're done.
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
-                    NumBytes + AfterCSRPopSize, TII,
-                    MachineInstr::FrameDestroy);
+                    NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
+                    false, NeedsWinCFI);
+    if (NeedsWinCFI)
+      BuildMI(MBB, MBB.getFirstTerminator(), DL,
+              TII->get(AArch64::SEH_EpilogEnd))
+          .setMIFlag(MachineInstr::FrameDestroy);
     return;
   }
 
@@ -972,9 +1391,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
       adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
 
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
-                    StackRestoreBytes, TII, MachineInstr::FrameDestroy);
-    if (Done)
+                    StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
+                    NeedsWinCFI);
+    if (Done) {
+      if (NeedsWinCFI)
+        BuildMI(MBB, MBB.getFirstTerminator(), DL,
+                TII->get(AArch64::SEH_EpilogEnd))
+            .setMIFlag(MachineInstr::FrameDestroy);
       return;
+    }
 
     NumBytes = 0;
   }
@@ -983,13 +1408,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // FIXME: Rather than doing the math here, we should instead just use
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
-  if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
+  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                     -AFI->getCalleeSavedStackSize() + 16, TII,
-                    MachineInstr::FrameDestroy);
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
   else if (NumBytes)
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
-                    MachineInstr::FrameDestroy);
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
 
   // This must be placed after the callee-save restore code because that code
   // assumes the SP is at the same location as it was after the callee-save save
@@ -1010,8 +1435,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
 
     emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
-                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
+                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
+                    NeedsWinCFI);
   }
+  if (NeedsWinCFI)
+    BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
+        .setMIFlag(MachineInstr::FrameDestroy);
 }
 
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1084,6 +1513,14 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
         // being in range for direct access. If the FPOffset is positive,
         // that'll always be best, as the SP will be even further away.
         UseFP = true;
+      } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
+        // Funclets access the locals contained in the parent's stack frame
+        // via the frame pointer, so we have to use the FP in the parent
+        // function.
+        assert(
+            Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
+            "Funclets should only be present on Win64");
+        UseFP = true;
       } else {
         // We have the choice between FP and (SP or BP).
         if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
@@ -1136,6 +1573,23 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
            Attrs.hasAttrSomewhere(Attribute::SwiftError));
 }
 
+static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
+                                             bool NeedsWinCFI) {
+  // If we are generating register pairs for a Windows function that requires
+  // EH support, then pair consecutive registers only.  There are no unwind
+  // opcodes for saves/restores of non-consectuve register pairs.
+  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
+  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+
+  // TODO: LR can be paired with any register.  We don't support this yet in
+  // the MCLayer.  We need to add support for the save_lrpair unwind code.
+  if (!NeedsWinCFI)
+    return false;
+  if (Reg2 == Reg1 + 1)
+    return false;
+  return true;
+}
+
 namespace {
 
 struct RegPairInfo {
@@ -1143,7 +1597,7 @@ struct RegPairInfo {
   unsigned Reg2 = AArch64::NoRegister;
   int FrameIdx;
   int Offset;
-  bool IsGPR;
+  enum RegType { GPR, FPR64, FPR128 } Type;
 
   RegPairInfo() = default;
 
@@ -1160,6 +1614,7 @@ static void computeCalleeSaveRegisterPairs(
   if (CSI.empty())
     return;
 
+  bool NeedsWinCFI = needsWinCFI(MF);
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   CallingConv::ID CC = MF.getFunction().getCallingConv();
@@ -1172,28 +1627,50 @@ static void computeCalleeSaveRegisterPairs(
           (Count & 1) == 0) &&
          "Odd number of callee-saved regs to spill!");
   int Offset = AFI->getCalleeSavedStackSize();
-
+  // On Linux, we will have either one or zero non-paired register.  On Windows
+  // with CFI, we can have multiple unpaired registers in order to utilize the
+  // available unwind codes.  This flag assures that the alignment fixup is done
+  // only once, as intened.
+  bool FixupDone = false;
   for (unsigned i = 0; i < Count; ++i) {
     RegPairInfo RPI;
     RPI.Reg1 = CSI[i].getReg();
 
-    assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
-           AArch64::FPR64RegClass.contains(RPI.Reg1));
-    RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+    if (AArch64::GPR64RegClass.contains(RPI.Reg1))
+      RPI.Type = RegPairInfo::GPR;
+    else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
+      RPI.Type = RegPairInfo::FPR64;
+    else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
+      RPI.Type = RegPairInfo::FPR128;
+    else
+      llvm_unreachable("Unsupported register class.");
 
     // Add the next reg to the pair if it is in the same register class.
     if (i + 1 < Count) {
       unsigned NextReg = CSI[i + 1].getReg();
-      if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
-          (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
-        RPI.Reg2 = NextReg;
+      switch (RPI.Type) {
+      case RegPairInfo::GPR:
+        if (AArch64::GPR64RegClass.contains(NextReg) &&
+            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+          RPI.Reg2 = NextReg;
+        break;
+      case RegPairInfo::FPR64:
+        if (AArch64::FPR64RegClass.contains(NextReg) &&
+            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+          RPI.Reg2 = NextReg;
+        break;
+      case RegPairInfo::FPR128:
+        if (AArch64::FPR128RegClass.contains(NextReg))
+          RPI.Reg2 = NextReg;
+        break;
+      }
     }
 
     // If either of the registers to be saved is the lr register, it means that
     // we also need to save lr in the shadow call stack.
     if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
         MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
-      if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+      if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
         report_fatal_error("Must reserve x18 to use shadow call stack");
       NeedShadowCallStackProlog = true;
     }
@@ -1219,17 +1696,22 @@ static void computeCalleeSaveRegisterPairs(
 
     RPI.FrameIdx = CSI[i].getFrameIdx();
 
-    if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
-      // Round up size of non-pair to pair size if we need to pad the
-      // callee-save area to ensure 16-byte alignment.
-      Offset -= 16;
+    int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
+    Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+    // Round up size of non-pair to pair size if we need to pad the
+    // callee-save area to ensure 16-byte alignment.
+    if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
+        RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+      FixupDone = true;
+      Offset -= 8;
+      assert(Offset % 16 == 0);
       assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
       MFI.setObjectAlignment(RPI.FrameIdx, 16);
-      AFI->setCalleeSaveStackHasFreeSpace(true);
-    } else
-      Offset -= RPI.isPaired() ? 16 : 8;
-    assert(Offset % 8 == 0);
-    RPI.Offset = Offset / 8;
+    }
+
+    assert(Offset % Scale == 0);
+    RPI.Offset = Offset / Scale;
     assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
            "Offset out of bounds for LDP/STP immediate");
 
@@ -1245,6 +1727,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
     const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  bool NeedsWinCFI = needsWinCFI(MF);
   DebugLoc DL;
   SmallVector<RegPairInfo, 8> RegPairs;
 
@@ -1262,6 +1745,27 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
         .addImm(8)
         .setMIFlag(MachineInstr::FrameSetup);
 
+    if (NeedsWinCFI)
+      BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
+          .setMIFlag(MachineInstr::FrameSetup);
+
+    if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
+      // Emit a CFI instruction that causes 8 to be subtracted from the value of
+      // x18 when unwinding past this frame.
+      static const char CFIInst[] = {
+          dwarf::DW_CFA_val_expression,
+          18, // register
+          2,  // length
+          static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+          static_cast<char>(-8) & 0x7f, // addend (sleb128)
+      };
+      unsigned CFIIndex =
+          MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst));
+      BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
     // This instruction also makes x18 live-in to the entry block.
     MBB.addLiveIn(AArch64::X18);
   }
@@ -1283,16 +1787,41 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
     // Rationale: This sequence saves uop updates compared to a sequence of
     // pre-increment spills like stp xi,xj,[sp,#-16]!
     // Note: Similar rationale and sequence for restores in epilog.
-    if (RPI.IsGPR)
-      StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
-    else
-      StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+    unsigned Size, Align;
+    switch (RPI.Type) {
+    case RegPairInfo::GPR:
+       StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
+       Size = 8;
+       Align = 8;
+       break;
+    case RegPairInfo::FPR64:
+       StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
+       Size = 8;
+       Align = 8;
+       break;
+    case RegPairInfo::FPR128:
+       StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
+       Size = 16;
+       Align = 16;
+       break;
+    }
     LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
                dbgs() << ") -> fi#(" << RPI.FrameIdx;
                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
                dbgs() << ")\n");
 
+    assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
+           "Windows unwdinding requires a consecutive (FP,LR) pair");
+    // Windows unwind codes require consecutive registers if registers are
+    // paired.  Make the switch here, so that the code below will save (x,x+1)
+    // and not (x+1,x).
+    unsigned FrameIdxReg1 = RPI.FrameIdx;
+    unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+    if (NeedsWinCFI && RPI.isPaired()) {
+      std::swap(Reg1, Reg2);
+      std::swap(FrameIdxReg1, FrameIdxReg2);
+    }
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
     if (!MRI.isReserved(Reg1))
       MBB.addLiveIn(Reg1);
@@ -1301,16 +1830,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
         MBB.addLiveIn(Reg2);
       MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
       MIB.addMemOperand(MF.getMachineMemOperand(
-          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
-          MachineMemOperand::MOStore, 8, 8));
+          MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+          MachineMemOperand::MOStore, Size, Align));
     }
     MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
         .addReg(AArch64::SP)
-        .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+        .addImm(RPI.Offset) // [sp, #offset*scale],
+                            // where factor*scale is implicit
         .setMIFlag(MachineInstr::FrameSetup);
     MIB.addMemOperand(MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
-        MachineMemOperand::MOStore, 8, 8));
+        MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
+        MachineMemOperand::MOStore, Size, Align));
+    if (NeedsWinCFI)
+      InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+
   }
   return true;
 }
@@ -1323,6 +1856,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc DL;
   SmallVector<RegPairInfo, 8> RegPairs;
+  bool NeedsWinCFI = needsWinCFI(MF);
 
   if (MI != MBB.end())
     DL = MI->getDebugLoc();
@@ -1344,32 +1878,57 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
     // Note: see comment in spillCalleeSavedRegisters()
     unsigned LdrOpc;
-    if (RPI.IsGPR)
-      LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
-    else
-      LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+    unsigned Size, Align;
+    switch (RPI.Type) {
+    case RegPairInfo::GPR:
+       LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
+       Size = 8;
+       Align = 8;
+       break;
+    case RegPairInfo::FPR64:
+       LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
+       Size = 8;
+       Align = 8;
+       break;
+    case RegPairInfo::FPR128:
+       LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
+       Size = 16;
+       Align = 16;
+       break;
+    }
     LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
                dbgs() << ") -> fi#(" << RPI.FrameIdx;
                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
                dbgs() << ")\n");
 
+    // Windows unwind codes require consecutive registers if registers are
+    // paired.  Make the switch here, so that the code below will save (x,x+1)
+    // and not (x+1,x).
+    unsigned FrameIdxReg1 = RPI.FrameIdx;
+    unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
+    if (NeedsWinCFI && RPI.isPaired()) {
+      std::swap(Reg1, Reg2);
+      std::swap(FrameIdxReg1, FrameIdxReg2);
+    }
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
     if (RPI.isPaired()) {
       MIB.addReg(Reg2, getDefRegState(true));
       MIB.addMemOperand(MF.getMachineMemOperand(
-          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
-          MachineMemOperand::MOLoad, 8, 8));
+          MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
+          MachineMemOperand::MOLoad, Size, Align));
     }
     MIB.addReg(Reg1, getDefRegState(true))
         .addReg(AArch64::SP)
-        .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+        .addImm(RPI.Offset) // [sp, #offset*scale]
+                            // where factor*scale is implicit
         .setMIFlag(MachineInstr::FrameDestroy);
     MIB.addMemOperand(MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
-        MachineMemOperand::MOLoad, 8, 8));
+        MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
+        MachineMemOperand::MOLoad, Size, Align));
+    if (NeedsWinCFI)
+      InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
   };
-
   if (ReverseCSRRestoreSeq)
     for (const RegPairInfo &RPI : reverse(RegPairs))
       EmitMI(RPI);
@@ -1406,30 +1965,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
 
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
 
   unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
                                 ? RegInfo->getBaseRegister()
                                 : (unsigned)AArch64::NoRegister;
 
-  unsigned SpillEstimate = SavedRegs.count();
-  for (unsigned i = 0; CSRegs[i]; ++i) {
-    unsigned Reg = CSRegs[i];
-    unsigned PairedReg = CSRegs[i ^ 1];
-    if (Reg == BasePointerReg)
-      SpillEstimate++;
-    if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
-      SpillEstimate++;
-  }
-  SpillEstimate += 2; // Conservatively include FP+LR in the estimate
-  unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
-
-  // The frame record needs to be created by saving the appropriate registers
-  if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
-    SavedRegs.set(AArch64::FP);
-    SavedRegs.set(AArch64::LR);
-  }
-
   unsigned ExtraCSSpill = 0;
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1453,7 +1994,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
     // MachO's compact unwind format relies on all registers being stored in
     // pairs.
     // FIXME: the usual format is actually better if unwinding isn't needed.
-    if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
+    if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+        !SavedRegs.test(PairedReg)) {
       SavedRegs.set(PairedReg);
       if (AArch64::GPR64RegClass.contains(PairedReg) &&
           !RegInfo->isReservedReg(MF, PairedReg))
@@ -1461,6 +2003,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
     }
   }
 
+  // Calculates the callee saved stack size.
+  unsigned CSStackSize = 0;
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (unsigned Reg : SavedRegs.set_bits())
+    CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+
+  // Save number of saved regs, so we can easily update CSStackSize later.
+  unsigned NumSavedRegs = SavedRegs.count();
+
+  // The frame record needs to be created by saving the appropriate registers
+  unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
+  if (hasFP(MF) ||
+      windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
+    SavedRegs.set(AArch64::FP);
+    SavedRegs.set(AArch64::LR);
+  }
+
   LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
              for (unsigned Reg
                   : SavedRegs.set_bits()) dbgs()
@@ -1468,15 +2028,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
              dbgs() << "\n";);
 
   // If any callee-saved registers are used, the frame cannot be eliminated.
-  unsigned NumRegsSpilled = SavedRegs.count();
-  bool CanEliminateFrame = NumRegsSpilled == 0;
+  bool CanEliminateFrame = SavedRegs.count() == 0;
 
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
-  unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
-  LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
-  bool BigStack = (CFSize > EstimatedStackSizeLimit);
+  bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
     AFI->setHasStackFrame(true);
 
@@ -1497,7 +2054,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
       if (produceCompactUnwindFrame(MF))
         SavedRegs.set(UnspilledCSGPRPaired);
       ExtraCSSpill = UnspilledCSGPRPaired;
-      NumRegsSpilled = SavedRegs.count();
     }
 
     // If we didn't find an extra callee-saved register to spill, create
@@ -1514,9 +2070,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
     }
   }
 
+  // Adding the size of additional 64bit GPR saves.
+  CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
+  unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
+  LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
+               << EstimatedStackSize + AlignedCSStackSize
+               << " bytes.\n");
+
   // Round up to register pair alignment to avoid additional SP adjustment
   // instructions.
-  AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
+  AFI->setCalleeSavedStackSize(AlignedCSStackSize);
+  AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
 }
 
 bool AArch64FrameLowering::enableStackSlotScavenging(
@@ -1524,3 +2088,69 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   return AFI->hasCalleeSaveStackFreeSpace();
 }
+
+void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  // If this function isn't doing Win64-style C++ EH, we don't need to do
+  // anything.
+  if (!MF.hasEHFunclets())
+    return;
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
+
+  MachineBasicBlock &MBB = MF.front();
+  auto MBBI = MBB.begin();
+  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+    ++MBBI;
+
+  if (MBBI->isTerminator())
+    return;
+
+  // Create an UnwindHelp object.
+  int UnwindHelpFI =
+      MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
+  EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
+  // We need to store -2 into the UnwindHelp object at the start of the
+  // function.
+  DebugLoc DL;
+  RS->enterBasicBlock(MBB);
+  unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
+      .addReg(DstReg, getKillRegState(true))
+      .addFrameIndex(UnwindHelpFI)
+      .addImm(0);
+}
+
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
+/// the update.  This is easily retrieved as it is exactly the offset that is set
+/// in processFunctionBeforeFrameFinalized.
+int AArch64FrameLowering::getFrameIndexReferencePreferSP(
+    const MachineFunction &MF, int FI, unsigned &FrameReg,
+    bool IgnoreSPUpdates) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
+                    << MFI.getObjectOffset(FI) << "\n");
+  FrameReg = AArch64::SP;
+  return MFI.getObjectOffset(FI);
+}
+
+/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
+/// the parent's frame pointer
+unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
+    const MachineFunction &MF) const {
+  return 0;
+}
+
+/// Funclets only need to account for space for the callee saved registers,
+/// as the locals are accounted for in the parent's stack frame.
+unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
+    const MachineFunction &MF) const {
+  // This is the size of the pushed CSRs.
+  unsigned CSSize =
+      MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
+  // This is the amount of stack a funclet needs to allocate.
+  return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
+                 getStackAlignment());
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 104e52b5f1f3..0d0385acf46e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -69,6 +69,17 @@ public:
 
   bool enableStackSlotScavenging(const MachineFunction &MF) const override;
 
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                             RegScavenger *RS) const override;
+
+  unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
+
+  unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
+
+  int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+                                     unsigned &FrameReg,
+                                     bool IgnoreSPUpdates) const override;
+
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       unsigned StackBumpBytes) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c1a9ee333b62..fc9855f6a0da 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1208,9 +1208,8 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
 
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
 
   CurDAG->RemoveDeadNode(N);
 }
@@ -1261,9 +1260,8 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
 
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
 
   ReplaceNode(N, St);
 }
@@ -1441,9 +1439,8 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
 
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
 
   ReplaceNode(N, St);
 }
@@ -1476,9 +1473,8 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
 
   ReplaceNode(N, St);
 }
@@ -2091,8 +2087,7 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
   (void)BitWidth;
   assert(BitWidth == 32 || BitWidth == 64);
 
-  KnownBits Known;
-  CurDAG->computeKnownBits(Op, Known);
+  KnownBits Known = CurDAG->computeKnownBits(Op);
 
   // Non-zero in the sense that they're not provably zero, which is the key
   // point if we want to use this value
@@ -2171,8 +2166,7 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
 
   // Compute the Known Zero for the AND as this allows us to catch more general
   // cases than just looking for AND with imm.
-  KnownBits Known;
-  CurDAG->computeKnownBits(And, Known);
+  KnownBits Known = CurDAG->computeKnownBits(And);
 
   // Non-zero in the sense that they're not provably zero, which is the key
   // point if we want to use this value.
@@ -2313,8 +2307,7 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
     // This allows to catch more general case than just looking for
     // AND with imm. Indeed, simplify-demanded-bits may have removed
     // the AND instruction because it proves it was useless.
-    KnownBits Known;
-    CurDAG->computeKnownBits(OrOpd1Val, Known);
+    KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
 
     // Check if there is enough room for the second operand to appear
     // in the first one
@@ -2690,7 +2683,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
     unsigned Reg = PMapper->Encoding;
     uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
     unsigned State;
-    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
       assert(Immed < 2 && "Bad imm");
       State = AArch64::MSRpstateImm1;
     } else {
@@ -2751,9 +2744,8 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
       Opcode, SDLoc(N),
       CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
 
   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
@@ -2923,9 +2915,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
                                           MVT::Other, MemAddr, Chain);
 
       // Transfer memoperands.
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
-      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+      MachineMemOperand *MemOp =
+          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
       ReplaceNode(Node, Ld);
       return;
     }
@@ -2944,9 +2936,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
 
       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
       // Transfer memoperands.
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
-      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+      MachineMemOperand *MemOp =
+          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
 
       ReplaceNode(Node, St);
       return;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cfc7aa96d31f..e01ca14d7f63 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -187,7 +187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
   setOperationAction(ISD::JumpTable, MVT::i64, Custom);
 
   setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
@@ -333,36 +333,38 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
 
   setOperationAction(ISD::FREM,    MVT::f16,   Promote);
-  setOperationAction(ISD::FREM,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FREM,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FREM,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FREM,    MVT::v8f16, Expand);
   setOperationAction(ISD::FPOW,    MVT::f16,   Promote);
-  setOperationAction(ISD::FPOW,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FPOW,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FPOW,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FPOW,    MVT::v8f16, Expand);
   setOperationAction(ISD::FPOWI,   MVT::f16,   Promote);
+  setOperationAction(ISD::FPOWI,   MVT::v4f16, Expand);
+  setOperationAction(ISD::FPOWI,   MVT::v8f16, Expand);
   setOperationAction(ISD::FCOS,    MVT::f16,   Promote);
-  setOperationAction(ISD::FCOS,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FCOS,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FCOS,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FCOS,    MVT::v8f16, Expand);
   setOperationAction(ISD::FSIN,    MVT::f16,   Promote);
-  setOperationAction(ISD::FSIN,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FSIN,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FSIN,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FSIN,    MVT::v8f16, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f16,   Promote);
-  setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
-  setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
+  setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
   setOperationAction(ISD::FEXP,    MVT::f16,   Promote);
-  setOperationAction(ISD::FEXP,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FEXP,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FEXP,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FEXP,    MVT::v8f16, Expand);
   setOperationAction(ISD::FEXP2,   MVT::f16,   Promote);
-  setOperationAction(ISD::FEXP2,   MVT::v4f16, Promote);
-  setOperationAction(ISD::FEXP2,   MVT::v8f16, Promote);
+  setOperationAction(ISD::FEXP2,   MVT::v4f16, Expand);
+  setOperationAction(ISD::FEXP2,   MVT::v8f16, Expand);
   setOperationAction(ISD::FLOG,    MVT::f16,   Promote);
-  setOperationAction(ISD::FLOG,    MVT::v4f16, Promote);
-  setOperationAction(ISD::FLOG,    MVT::v8f16, Promote);
+  setOperationAction(ISD::FLOG,    MVT::v4f16, Expand);
+  setOperationAction(ISD::FLOG,    MVT::v8f16, Expand);
   setOperationAction(ISD::FLOG2,   MVT::f16,   Promote);
-  setOperationAction(ISD::FLOG2,   MVT::v4f16, Promote);
-  setOperationAction(ISD::FLOG2,   MVT::v8f16, Promote);
+  setOperationAction(ISD::FLOG2,   MVT::v4f16, Expand);
+  setOperationAction(ISD::FLOG2,   MVT::v8f16, Expand);
   setOperationAction(ISD::FLOG10,  MVT::f16,   Promote);
-  setOperationAction(ISD::FLOG10,  MVT::v4f16, Promote);
-  setOperationAction(ISD::FLOG10,  MVT::v8f16, Promote);
+  setOperationAction(ISD::FLOG10,  MVT::v4f16, Expand);
+  setOperationAction(ISD::FLOG10,  MVT::v8f16, Expand);
 
   if (!Subtarget->hasFullFP16()) {
     setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
@@ -385,8 +387,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
     setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
     setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
-    setOperationAction(ISD::FMINNAN,     MVT::f16,  Promote);
-    setOperationAction(ISD::FMAXNAN,     MVT::f16,  Promote);
+    setOperationAction(ISD::FMINIMUM,    MVT::f16,  Promote);
+    setOperationAction(ISD::FMAXIMUM,    MVT::f16,  Promote);
 
     // promote v4f16 to v4f32 when that is known to be safe.
     setOperationAction(ISD::FADD,        MVT::v4f16, Promote);
@@ -450,8 +452,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FROUND, Ty, Legal);
     setOperationAction(ISD::FMINNUM, Ty, Legal);
     setOperationAction(ISD::FMAXNUM, Ty, Legal);
-    setOperationAction(ISD::FMINNAN, Ty, Legal);
-    setOperationAction(ISD::FMAXNAN, Ty, Legal);
+    setOperationAction(ISD::FMINIMUM, Ty, Legal);
+    setOperationAction(ISD::FMAXIMUM, Ty, Legal);
   }
 
   if (Subtarget->hasFullFP16()) {
@@ -463,8 +465,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FROUND,  MVT::f16, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
   }
 
   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -714,8 +716,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 
       if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
-        setOperationAction(ISD::MULHS, VT, Custom);
-        setOperationAction(ISD::MULHU, VT, Custom);
+        setOperationAction(ISD::MULHS, VT, Legal);
+        setOperationAction(ISD::MULHU, VT, Legal);
       } else {
         setOperationAction(ISD::MULHS, VT, Expand);
         setOperationAction(ISD::MULHU, VT, Expand);
@@ -792,9 +794,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
   for (MVT InnerVT : MVT::all_valuetypes())
     setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
 
-  // CNT supports only B element sizes.
+  // CNT supports only B element sizes, then use UADDLP to widen.
   if (VT != MVT::v8i8 && VT != MVT::v16i8)
-    setOperationAction(ISD::CTPOP, VT, Expand);
+    setOperationAction(ISD::CTPOP, VT, Custom);
 
   setOperationAction(ISD::UDIV, VT, Expand);
   setOperationAction(ISD::SDIV, VT, Expand);
@@ -816,8 +818,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
   // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
   if (VT.isFloatingPoint() &&
       (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
-    for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
-                            ISD::FMINNUM, ISD::FMAXNUM})
+    for (unsigned Opcode :
+         {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
       setOperationAction(Opcode, VT, Legal);
 
   if (Subtarget->isLittleEndian()) {
@@ -993,8 +995,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     break;
   case AArch64ISD::CSEL: {
     KnownBits Known2;
-    DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
-    DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+    Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+    Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
     Known.Zero &= Known2.Zero;
     Known.One &= Known2.One;
     break;
@@ -1086,6 +1088,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::FIRST_NUMBER:      break;
   case AArch64ISD::CALL:              return "AArch64ISD::CALL";
   case AArch64ISD::ADRP:              return "AArch64ISD::ADRP";
+  case AArch64ISD::ADR:               return "AArch64ISD::ADR";
   case AArch64ISD::ADDlow:            return "AArch64ISD::ADDlow";
   case AArch64ISD::LOADgot:           return "AArch64ISD::LOADgot";
   case AArch64ISD::RET_FLAG:          return "AArch64ISD::RET_FLAG";
@@ -1272,6 +1275,20 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
   return EndBB;
 }
 
+MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
+       MachineInstr &MI, MachineBasicBlock *BB) const {
+  assert(!isAsynchronousEHPersonality(classifyEHPersonality(
+             BB->getParent()->getFunction().getPersonalityFn())) &&
+         "SEH does not use catchret!");
+  return BB;
+}
+
+MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
+     MachineInstr &MI, MachineBasicBlock *BB) const {
+  MI.eraseFromParent();
+  return BB;
+}
+
 MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *BB) const {
   switch (MI.getOpcode()) {
@@ -1287,6 +1304,11 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
   case TargetOpcode::STACKMAP:
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, BB);
+
+  case AArch64::CATCHRET:
+    return EmitLoweredCatchRet(MI, BB);
+  case AArch64::CATCHPAD:
+    return EmitLoweredCatchPad(MI, BB);
   }
 }
 
@@ -1459,6 +1481,21 @@ static bool isLegalArithImmed(uint64_t C) {
   return IsLegal;
 }
 
+// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
+// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
+// can be set differently by this operation. It comes down to whether
+// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
+// everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+//
+// So, finally, the only LLVM-native comparisons that don't mention C and V
+// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
+// the absence of information about op2.
+static bool isCMN(SDValue Op, ISD::CondCode CC) {
+  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
+         (CC == ISD::SETEQ || CC == ISD::SETNE);
+}
+
 static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                               const SDLoc &dl, SelectionDAG &DAG) {
   EVT VT = LHS.getValueType();
@@ -1481,20 +1518,15 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // register to WZR/XZR if it ends up being unused.
   unsigned Opcode = AArch64ISD::SUBS;
 
-  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
-      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
-    // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
-    // can be set differently by this operation. It comes down to whether
-    // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
-    // everything is fine. If not then the optimization is wrong. Thus general
-    // comparisons are only valid if op2 != 0.
-
-    // So, finally, the only LLVM-native comparisons that don't mention C and V
-    // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
-    // the absence of information about op2.
+  if (isCMN(RHS, CC)) {
+    // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
     Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
+  } else if (isCMN(LHS, CC)) {
+    // As we are looking for EQ/NE compares, the operands can be commuted ; can
+    // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
+    Opcode = AArch64ISD::ADDS;
+    LHS = LHS.getOperand(1);
   } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
              !isUnsignedIntSetCC(CC)) {
     // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -1796,6 +1828,42 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
 
 /// @}
 
+/// Returns how profitable it is to fold a comparison's operand's shift and/or
+/// extension operations.
+static unsigned getCmpOperandFoldingProfit(SDValue Op) {
+  auto isSupportedExtend = [&](SDValue V) {
+    if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
+      return true;
+
+    if (V.getOpcode() == ISD::AND)
+      if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+        uint64_t Mask = MaskCst->getZExtValue();
+        return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+      }
+
+    return false;
+  };
+
+  if (!Op.hasOneUse())
+    return 0;
+
+  if (isSupportedExtend(Op))
+    return 1;
+
+  unsigned Opc = Op.getOpcode();
+  if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
+    if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      uint64_t Shift = ShiftCst->getZExtValue();
+      if (isSupportedExtend(Op.getOperand(0)))
+        return (Shift <= 4) ? 2 : 1;
+      EVT VT = Op.getValueType();
+      if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
+        return 1;
+    }
+
+  return 0;
+}
+
 static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                              SDValue &AArch64cc, SelectionDAG &DAG,
                              const SDLoc &dl) {
@@ -1853,6 +1921,27 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       }
     }
   }
+
+  // Comparisons are canonicalized so that the RHS operand is simpler than the
+  // LHS one, the extreme case being when RHS is an immediate. However, AArch64
+  // can fold some shift+extend operations on the RHS operand, so swap the
+  // operands if that can be done.
+  //
+  // For example:
+  //    lsl     w13, w11, #1
+  //    cmp     w13, w12
+  // can be turned into:
+  //    cmp     w12, w11, lsl #1
+  if (!isa<ConstantSDNode>(RHS) ||
+      !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
+    SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
+
+    if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
+      std::swap(LHS, RHS);
+      CC = ISD::getSetCCSwappedOperands(CC);
+    }
+  }
+
   SDValue Cmp;
   AArch64CC::CondCode AArch64CC;
   if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
@@ -2619,66 +2708,6 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
 }
 
-// Lower vector multiply high (ISD::MULHS and ISD::MULHU).
-static SDValue LowerMULH(SDValue Op, SelectionDAG &DAG) {
-  // Multiplications are only custom-lowered for 128-bit vectors so that
-  // {S,U}MULL{2} can be detected.  Otherwise v2i64 multiplications are not
-  // legal.
-  EVT VT = Op.getValueType();
-  assert(VT.is128BitVector() && VT.isInteger() &&
-         "unexpected type for custom-lowering ISD::MULH{U,S}");
-
-  SDValue V0 = Op.getOperand(0);
-  SDValue V1 = Op.getOperand(1);
-
-  SDLoc DL(Op);
-
-  EVT ExtractVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
-
-  // We turn (V0 mulhs/mulhu V1) to:
-  //
-  // (uzp2 (smull (extract_subvector (ExtractVT V128:V0, (i64 0)),
-  //              (extract_subvector (ExtractVT V128:V1, (i64 0))))),
-  //       (smull (extract_subvector (ExtractVT V128:V0, (i64 VMull2Idx)),
-  //              (extract_subvector (ExtractVT V128:V2, (i64 VMull2Idx))))))
-  //
-  // Where ExtractVT is a subvector with half number of elements, and
-  // VMullIdx2 is the index of the middle element (the high part).
-  //
-  // The vector hight part extract and multiply will be matched against
-  // {S,U}MULL{v16i8_v8i16,v8i16_v4i32,v4i32_v2i64} which in turn will
-  // issue a {s}mull2 instruction.
-  //
-  // This basically multiply the lower subvector with '{s,u}mull', the high
-  // subvector with '{s,u}mull2', and shuffle both results high part in
-  // resulting vector.
-  unsigned Mull2VectorIdx = VT.getVectorNumElements () / 2;
-  SDValue VMullIdx = DAG.getConstant(0, DL, MVT::i64);
-  SDValue VMull2Idx = DAG.getConstant(Mull2VectorIdx, DL, MVT::i64);
-
-  SDValue VMullV0 =
-    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMullIdx);
-  SDValue VMullV1 =
-    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMullIdx);
-
-  SDValue VMull2V0 =
-    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMull2Idx);
-  SDValue VMull2V1 =
-    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMull2Idx);
-
-  unsigned MullOpc = Op.getOpcode() == ISD::MULHS ? AArch64ISD::SMULL
-                                                  : AArch64ISD::UMULL;
-
-  EVT MullVT = ExtractVT.widenIntegerVectorElementType(*DAG.getContext());
-  SDValue Mull  = DAG.getNode(MullOpc, DL, MullVT, VMullV0, VMullV1);
-  SDValue Mull2 = DAG.getNode(MullOpc, DL, MullVT, VMull2V0, VMull2V1);
-
-  Mull  = DAG.getNode(ISD::BITCAST, DL, VT, Mull);
-  Mull2 = DAG.getNode(ISD::BITCAST, DL, VT, Mull2);
-
-  return DAG.getNode(AArch64ISD::UZP2, DL, VT, Mull, Mull2);
-}
-
 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2689,9 +2718,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
     return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
   }
-  case Intrinsic::aarch64_neon_abs:
-    return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
-                       Op.getOperand(1));
+  case Intrinsic::aarch64_neon_abs: {
+    EVT Ty = Op.getValueType();
+    if (Ty == MVT::i64) {
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
+                                   Op.getOperand(1));
+      Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
+      return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
+    } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
+      return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
+    } else {
+      report_fatal_error("Unexpected type for AArch64 NEON intrinic");
+    }
+  }
   case Intrinsic::aarch64_neon_smax:
     return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
@@ -2794,6 +2833,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerSELECT_CC(Op, DAG);
   case ISD::JumpTable:
     return LowerJumpTable(Op, DAG);
+  case ISD::BR_JT:
+    return LowerBR_JT(Op, DAG);
   case ISD::ConstantPool:
     return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:
@@ -2830,8 +2871,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerFP_EXTEND(Op, DAG);
   case ISD::FRAMEADDR:
     return LowerFRAMEADDR(Op, DAG);
+  case ISD::SPONENTRY:
+    return LowerSPONENTRY(Op, DAG);
   case ISD::RETURNADDR:
     return LowerRETURNADDR(Op, DAG);
+  case ISD::ADDROFRETURNADDR:
+    return LowerADDROFRETURNADDR(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:
     return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT:
@@ -2875,9 +2920,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::MUL:
     return LowerMUL(Op, DAG);
-  case ISD::MULHS:
-  case ISD::MULHU:
-    return LowerMULH(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN:
     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::STORE:
@@ -2927,6 +2969,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
     return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
   case CallingConv::Win64:
     return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
+  case CallingConv::AArch64_VectorCall:
+    return CC_AArch64_AAPCS;
   }
 }
 
@@ -3113,6 +3157,17 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
     // We currently pass all varargs at 8-byte alignment.
     StackOffset = ((StackOffset + 7) & ~7);
     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+
+    if (MFI.hasMustTailInVarArgFunc()) {
+      SmallVector<MVT, 2> RegParmTypes;
+      RegParmTypes.push_back(MVT::i64);
+      RegParmTypes.push_back(MVT::f128);
+      // Compute the set of forwarded registers. The rest are scratch.
+      SmallVectorImpl<ForwardedRegister> &Forwards =
+                                       FuncInfo->getForwardedMustTailRegParms();
+      CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
+                                               CC_AArch64_AAPCS);
+    }
   }
 
   unsigned StackArgSize = CCInfo.getNextStackOffset();
@@ -3135,6 +3190,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
   // much is there while considering tail calls (because we can reuse it).
   FuncInfo->setBytesInStackArgArea(StackArgSize);
 
+  if (Subtarget->hasCustomCallingConv())
+    Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
+
   return Chain;
 }
 
@@ -3365,6 +3423,10 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
   if (!CCMatch) {
     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+    if (Subtarget->hasCustomCallingConv()) {
+      TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
+      TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
+    }
     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
       return false;
   }
@@ -3566,6 +3628,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   SmallVector<SDValue, 8> MemOpChains;
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
+  if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
+    const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+    for (const auto &F : Forwards) {
+      SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
+       RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+    }
+  }
+
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
        ++i, ++realArgIdx) {
@@ -3758,6 +3828,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   } else
     Mask = TRI->getCallPreservedMask(MF, CallConv);
 
+  if (Subtarget->hasCustomCallingConv())
+    TRI->UpdateCustomCallPreservedMask(MF, &Mask);
+
+  if (TRI->isAnyArgRegReserved(MF))
+    TRI->emitReservedArgRegCallError(MF);
+
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -3944,13 +4020,21 @@ SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
   return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
 }
 
+// (adr sym)
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
+                                           unsigned Flags) const {
+  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
+  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
+}
+
 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                                   SelectionDAG &DAG) const {
   GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GN->getGlobal();
-  const AArch64II::TOF TargetFlags =
-      (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
-                                      : AArch64II::MO_NO_FLAG);
   unsigned char OpFlags =
       Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
 
@@ -3958,20 +4042,23 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
     assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
            "unexpected offset in global node");
 
-  // This also catches the large code model case for Darwin.
+  // This also catches the large code model case for Darwin, and tiny code
+  // model with got relocations.
   if ((OpFlags & AArch64II::MO_GOT) != 0) {
-    return getGOT(GN, DAG, TargetFlags);
+    return getGOT(GN, DAG, OpFlags);
   }
 
   SDValue Result;
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
-    Result = getAddrLarge(GN, DAG, TargetFlags);
+    Result = getAddrLarge(GN, DAG, OpFlags);
+  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+    Result = getAddrTiny(GN, DAG, OpFlags);
   } else {
-    Result = getAddr(GN, DAG, TargetFlags);
+    Result = getAddr(GN, DAG, OpFlags);
   }
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(GN);
-  if (GV->hasDLLImportStorageClass())
+  if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
                          MachinePointerInfo::getGOT(DAG.getMachineFunction()));
   return Result;
@@ -4036,8 +4123,10 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
-  const uint32_t *Mask =
-      Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const uint32_t *Mask = TRI->getTLSCallPreservedMask();
+  if (Subtarget->hasCustomCallingConv())
+    TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
 
   // Finally, we can make the call. This is just a degenerate version of a
   // normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -4087,13 +4176,15 @@ SDValue
 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() && "This function expects an ELF target");
-  assert(Subtarget->useSmallAddressing() &&
-         "ELF TLS only supported in small memory model");
+  if (getTargetMachine().getCodeModel() == CodeModel::Large)
+    report_fatal_error("ELF TLS only supported in small memory model");
   // Different choices can be made for the maximum size of the TLS area for a
   // module. For the small address model, the default TLS size is 16MiB and the
   // maximum TLS size is 4GiB.
   // FIXME: add -mtls-size command line option and make it control the 16MiB
   // vs. 4GiB code sequence generation.
+  // FIXME: add tiny codemodel support. We currently generate the same code as
+  // small, which may be larger than needed.
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -4264,6 +4355,13 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+  // will not be produced, as they are conditional branch instructions that do
+  // not set flags.
+  bool ProduceNonFlagSettingCondBr =
+      !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+
   // Handle f128 first, since lowering it will result in comparing the return
   // value of a libcall against zero, which is just what the rest of LowerBR_CC
   // is expecting to deal with.
@@ -4306,7 +4404,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
     // If the RHS of the comparison is zero, we can potentially fold this
     // to a specialized branch.
     const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
-    if (RHSC && RHSC->getZExtValue() == 0) {
+    if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
       if (CC == ISD::SETEQ) {
         // See if we can use a TBZ to fold in an AND as well.
         // TBZ has a smaller branch displacement than CBZ.  If the offset is
@@ -4349,7 +4447,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       }
     }
     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
-        LHS.getOpcode() != ISD::AND) {
+        LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
       // Don't combine AND since emitComparison converts the AND to an ANDS
       // (a.k.a. TST) and the test in the test bit and branch instruction
       // becomes redundant.  This would also increase register pressure.
@@ -4478,18 +4576,42 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
 
-  if (VT == MVT::i32)
-    Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
-  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
+  if (VT == MVT::i32 || VT == MVT::i64) {
+    if (VT == MVT::i32)
+      Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+    Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
 
-  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
-  SDValue UaddLV = DAG.getNode(
-      ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-      DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+    SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
+    SDValue UaddLV = DAG.getNode(
+        ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+        DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
 
-  if (VT == MVT::i64)
-    UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
-  return UaddLV;
+    if (VT == MVT::i64)
+      UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
+    return UaddLV;
+  }
+
+  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
+          VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
+         "Unexpected type for custom ctpop lowering");
+
+  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+  Val = DAG.getBitcast(VT8Bit, Val);
+  Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
+
+  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
+  unsigned EltSize = 8;
+  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
+  while (EltSize != VT.getScalarSizeInBits()) {
+    EltSize *= 2;
+    NumElts /= 2;
+    MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+    Val = DAG.getNode(
+        ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
+        DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
+  }
+
+  return Val;
 }
 
 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -4811,10 +4933,28 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
     return getAddrLarge(JT, DAG);
+  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+    return getAddrTiny(JT, DAG);
   }
   return getAddr(JT, DAG);
 }
 
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  // Jump table entries as PC relative offsets. No additional tweaking
+  // is necessary here. Just get the address of the jump table.
+  SDLoc DL(Op);
+  SDValue JT = Op.getOperand(1);
+  SDValue Entry = Op.getOperand(2);
+  int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
+
+  SDNode *Dest =
+      DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
+                         Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
+  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
+                     SDValue(Dest, 0));
+}
+
 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
                                                  SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -4825,6 +4965,8 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
       return getGOT(CP, DAG);
     }
     return getAddrLarge(CP, DAG);
+  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+    return getAddrTiny(CP, DAG);
   } else {
     return getAddr(CP, DAG);
   }
@@ -4836,9 +4978,10 @@ SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
     return getAddrLarge(BA, DAG);
-  } else {
-    return getAddr(BA, DAG);
+  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
+    return getAddrTiny(BA, DAG);
   }
+  return getAddr(BA, DAG);
 }
 
 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
@@ -5044,21 +5187,59 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
   return FrameAddr;
 }
 
+SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+  EVT VT = getPointerTy(DAG.getDataLayout());
+  SDLoc DL(Op);
+  int FI = MFI.CreateFixedObject(4, 0, false);
+  return DAG.getFrameIndex(FI, VT);
+}
+
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
 unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
                                                   SelectionDAG &DAG) const {
   unsigned Reg = StringSwitch<unsigned>(RegName)
                        .Case("sp", AArch64::SP)
+                       .Case("x1", AArch64::X1)
+                       .Case("w1", AArch64::W1)
+                       .Case("x2", AArch64::X2)
+                       .Case("w2", AArch64::W2)
+                       .Case("x3", AArch64::X3)
+                       .Case("w3", AArch64::W3)
+                       .Case("x4", AArch64::X4)
+                       .Case("w4", AArch64::W4)
+                       .Case("x5", AArch64::X5)
+                       .Case("w5", AArch64::W5)
+                       .Case("x6", AArch64::X6)
+                       .Case("w6", AArch64::W6)
+                       .Case("x7", AArch64::X7)
+                       .Case("w7", AArch64::W7)
                        .Case("x18", AArch64::X18)
                        .Case("w18", AArch64::W18)
                        .Case("x20", AArch64::X20)
                        .Case("w20", AArch64::W20)
                        .Default(0);
-  if (((Reg == AArch64::X18 || Reg == AArch64::W18) &&
-      !Subtarget->isX18Reserved()) ||
+  if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
+      !Subtarget->isXRegisterReserved(1)) ||
+      ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
+      !Subtarget->isXRegisterReserved(2)) ||
+      ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
+      !Subtarget->isXRegisterReserved(3)) ||
+      ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
+      !Subtarget->isXRegisterReserved(4)) ||
+      ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
+      !Subtarget->isXRegisterReserved(5)) ||
+      ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
+      !Subtarget->isXRegisterReserved(6)) ||
+      ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
+      !Subtarget->isXRegisterReserved(7)) ||
+      ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
+      !Subtarget->isXRegisterReserved(18)) ||
       ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
-      !Subtarget->isX20Reserved()))
+      !Subtarget->isXRegisterReserved(20)))
     Reg = 0;
   if (Reg)
     return Reg;
@@ -5066,6 +5247,20 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
                               + StringRef(RegName)  + "\"."));
 }
 
+SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+
+  SDValue FrameAddr =
+      DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
+  SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
+
+  return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
+}
+
 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
                                                SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -5208,40 +5403,29 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   // FIXME: We should be able to handle f128 as well with a clever lowering.
   if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
                           (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
-    LLVM_DEBUG(
-        dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
+    LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
     return true;
   }
 
-  StringRef FPType;
   bool IsLegal = false;
   SmallString<128> ImmStrVal;
   Imm.toString(ImmStrVal);
 
-  if (VT == MVT::f64) {
-    FPType = "f64";
+  if (VT == MVT::f64)
     IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
-  } else if (VT == MVT::f32) {
-    FPType = "f32";
+  else if (VT == MVT::f32)
     IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
-  } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
-    FPType = "f16";
+  else if (VT == MVT::f16 && Subtarget->hasFullFP16())
     IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
-  }
 
   if (IsLegal) {
-    LLVM_DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal
-                      << "\n");
+    LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
+                      << " imm value: " << ImmStrVal << "\n");
     return true;
   }
 
-  if (!FPType.empty())
-    LLVM_DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal
-                      << "\n");
-  else
-    LLVM_DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal
-                      << ": unsupported fp type\n");
-
+  LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
+                    << " imm value: " << ImmStrVal << "\n");
   return false;
 }
 
@@ -5453,6 +5637,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
         return std::make_pair(0U, &AArch64::GPR64commonRegClass);
       return std::make_pair(0U, &AArch64::GPR32commonRegClass);
     case 'w':
+      if (!Subtarget->hasFPARMv8())
+        break;
       if (VT.getSizeInBits() == 16)
         return std::make_pair(0U, &AArch64::FPR16RegClass);
       if (VT.getSizeInBits() == 32)
@@ -5465,6 +5651,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
     // The instructions that this constraint is designed for can
     // only take 128-bit registers so just use that regclass.
     case 'x':
+      if (!Subtarget->hasFPARMv8())
+        break;
       if (VT.getSizeInBits() == 128)
         return std::make_pair(0U, &AArch64::FPR128_loRegClass);
       break;
@@ -5500,6 +5688,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
     }
   }
 
+  if (Res.second && !Subtarget->hasFPARMv8() &&
+      !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
+      !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
+    return std::make_pair(0U, nullptr);
+
   return Res;
 }
 
@@ -6921,10 +7114,19 @@ static SDValue NormalizeBuildVector(SDValue Op,
 
   SmallVector<SDValue, 16> Ops;
   for (SDValue Lane : Op->ops()) {
+    // For integer vectors, type legalization would have promoted the
+    // operands already. Otherwise, if Op is a floating-point splat
+    // (with operands cast to integers), then the only possibilities
+    // are constants and UNDEFs.
     if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
       APInt LowBits(EltTy.getSizeInBits(),
                     CstLane->getZExtValue());
       Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
+    } else if (Lane.getNode()->isUndef()) {
+      Lane = DAG.getUNDEF(MVT::i32);
+    } else {
+      assert(Lane.getValueType() == MVT::i32 &&
+             "Unexpected BUILD_VECTOR operand type");
     }
     Ops.push_back(Lane);
   }
@@ -7050,7 +7252,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
     return DAG.getUNDEF(VT);
   }
 
-  if (isOnlyLowElement) {
+  // Convert BUILD_VECTOR where all elements but the lowest are undef into
+  // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
+  // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
+  if (isOnlyLowElement && !(NumElts == 1 && isa<ConstantSDNode>(Value))) {
     LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
                          "SCALAR_TO_VECTOR node\n");
     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
@@ -7632,7 +7837,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
   Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
 
   if (ShouldInvert)
-    return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
+    Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
 
   return Cmp;
 }
@@ -7718,8 +7923,10 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
 
-  const uint32_t *Mask =
-      Subtarget->getRegisterInfo()->getWindowsStackProbePreservedMask();
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
+  if (Subtarget->hasCustomCallingConv())
+    TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
 
   Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
                      DAG.getConstant(4, dl, MVT::i64));
@@ -7827,7 +8034,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.opc = ISD::INTRINSIC_VOID;
     // Conservatively set memVT to the entire set of vectors stored.
     unsigned NumElts = 0;
-    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+    for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
       Type *ArgTy = I.getArgOperand(ArgI)->getType();
       if (!ArgTy->isVectorTy())
         break;
@@ -7891,6 +8098,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
                                                   ISD::LoadExtType ExtTy,
                                                   EVT NewVT) const {
+  // TODO: This may be worth removing. Check regression tests for diffs.
+  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
+    return false;
+
   // If we're reducing the load width in order to avoid having to use an extra
   // instruction to do extension then it's probably a good idea.
   if (ExtTy != ISD::NON_EXTLOAD)
@@ -8348,27 +8559,30 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
                                                bool ZeroMemset,
                                                bool MemcpyStrSrc,
                                                MachineFunction &MF) const {
-  // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
-  // instruction to materialize the v2i64 zero and one store (with restrictive
-  // addressing mode). Just do two i64 store of zero-registers.
-  bool Fast;
   const Function &F = MF.getFunction();
-  if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
-      !F.hasFnAttribute(Attribute::NoImplicitFloat) &&
-      (memOpAlign(SrcAlign, DstAlign, 16) ||
-       (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
-    return MVT::f128;
+  bool CanImplicitFloat = !F.hasFnAttribute(Attribute::NoImplicitFloat);
+  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
+  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
+  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
+  // taken one instruction to materialize the v2i64 zero and one store (with
+  // restrictive addressing mode). Just do i64 stores.
+  bool IsSmallMemset = IsMemset && Size < 32;
+  auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
+    if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+      return true;
+    bool Fast;
+    return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
+  };
 
-  if (Size >= 8 &&
-      (memOpAlign(SrcAlign, DstAlign, 8) ||
-       (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+  if (CanUseNEON && IsMemset && !IsSmallMemset &&
+      AlignmentIsAcceptable(MVT::v2i64, 16))
+    return MVT::v2i64;
+  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
+    return MVT::f128;
+  if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
     return MVT::i64;
-
-  if (Size >= 4 &&
-      (memOpAlign(SrcAlign, DstAlign, 4) ||
-       (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+  if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
     return MVT::i32;
-
   return MVT::Other;
 }
 
@@ -8496,7 +8710,9 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
 }
 
 bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
+AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
+                                                     CombineLevel Level) const {
+  N = N->getOperand(0).getNode();
   EVT VT = N->getValueType(0);
     // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
     // it with shift to let it be lowered to UBFX.
@@ -9717,10 +9933,10 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_neon_umaxv:
     return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
   case Intrinsic::aarch64_neon_fmax:
-    return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
+    return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
   case Intrinsic::aarch64_neon_fmin:
-    return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
+    return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
   case Intrinsic::aarch64_neon_fmaxnm:
     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
@@ -9849,6 +10065,7 @@ static SDValue performExtendCombine(SDNode *N,
 
 static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
                                SDValue SplatVal, unsigned NumVecElts) {
+  assert(!St.isTruncatingStore() && "cannot split truncating vector store");
   unsigned OrigAlignment = St.getAlignment();
   unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
 
@@ -9923,6 +10140,11 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
   if (!StVal.hasOneUse())
     return SDValue();
 
+  // If the store is truncating then it's going down to i16 or smaller, which
+  // means it can be implemented in a single store anyway.
+  if (St.isTruncatingStore())
+    return SDValue();
+
   // If the immediate offset of the address operand is too large for the stp
   // instruction, then bail out.
   if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
@@ -9973,6 +10195,11 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
   if (NumVecElts != 4 && NumVecElts != 2)
     return SDValue();
 
+  // If the store is truncating then it's going down to i16 or smaller, which
+  // means it can be implemented in a single store anyway.
+  if (St.isTruncatingStore())
+    return SDValue();
+
   // Check that this is a splat.
   // Make sure that each of the relevant vector element locations are inserted
   // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
@@ -10129,15 +10356,6 @@ static SDValue performPostLD1Combine(SDNode *N,
         || UI.getUse().getResNo() != Addr.getResNo())
       continue;
 
-    // Check that the add is independent of the load.  Otherwise, folding it
-    // would create a cycle.
-    if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User))
-      continue;
-    // Also check that add is not used in the vector operand.  This would also
-    // create a cycle.
-    if (User->isPredecessorOf(Vector.getNode()))
-      continue;
-
     // If the increment is a constant, it must match the memory ref size.
     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
     if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
@@ -10148,11 +10366,16 @@ static SDValue performPostLD1Combine(SDNode *N,
       Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
     }
 
-    // Finally, check that the vector doesn't depend on the load.
-    // Again, this would create a cycle.
-    // The load depending on the vector is fine, as that's the case for the
-    // LD1*post we'll eventually generate anyway.
-    if (LoadSDN->isPredecessorOf(Vector.getNode()))
+    // To avoid cycle construction make sure that neither the load nor the add
+    // are predecessors to each other or the Vector.
+    SmallPtrSet<const SDNode *, 32> Visited;
+    SmallVector<const SDNode *, 16> Worklist;
+    Visited.insert(N);
+    Worklist.push_back(User);
+    Worklist.push_back(LD);
+    Worklist.push_back(Vector.getNode());
+    if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
+        SDNode::hasPredecessorHelper(User, Visited, Worklist))
       continue;
 
     SmallVector<SDValue, 8> Ops;
@@ -10238,7 +10461,13 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
 
     // Check that the add is independent of the load/store.  Otherwise, folding
     // it would create a cycle.
-    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+    SmallPtrSet<const SDNode *, 32> Visited;
+    SmallVector<const SDNode *, 16> Worklist;
+    Visited.insert(Addr.getNode());
+    Worklist.push_back(N);
+    Worklist.push_back(User);
+    if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+        SDNode::hasPredecessorHelper(User, Visited, Worklist))
       continue;
 
     // Find the new opcode for the updating load/store.
@@ -10608,6 +10837,13 @@ SDValue performCONDCombine(SDNode *N,
 static SDValue performBRCONDCombine(SDNode *N,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
+  // will not be produced, as they are conditional branch instructions that do
+  // not set flags.
+  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+    return SDValue();
+
   if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
     N = NV.getNode();
   SDValue Chain = N->getOperand(0);
@@ -10865,9 +11101,9 @@ static SDValue performNVCASTCombine(SDNode *N) {
 static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
                                            const AArch64Subtarget *Subtarget,
                                            const TargetMachine &TM) {
-  auto *GN = dyn_cast<GlobalAddressSDNode>(N);
-  if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
-                 AArch64II::MO_NO_FLAG)
+  auto *GN = cast<GlobalAddressSDNode>(N);
+  if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+      AArch64II::MO_NO_FLAG)
     return SDValue();
 
   uint64_t MinOffset = -1ull;
@@ -10999,6 +11235,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     default:
       break;
     }
+    break;
   case ISD::GlobalAddress:
     return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
   }
@@ -11196,12 +11433,10 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
         N->getOperand(0), // Chain in
     };
 
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
-    MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+    MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
 
     unsigned Opcode;
-    switch (MemOp[0]->getOrdering()) {
+    switch (MemOp->getOrdering()) {
     case AtomicOrdering::Monotonic:
       Opcode = AArch64::CASPX;
       break;
@@ -11221,7 +11456,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
 
     MachineSDNode *CmpSwap = DAG.getMachineNode(
         Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
-    CmpSwap->setMemRefs(MemOp, MemOp + 1);
+    DAG.setNodeMemRefs(CmpSwap, {MemOp});
 
     unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
     if (DAG.getDataLayout().isBigEndian())
@@ -11242,10 +11477,8 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
       AArch64::CMP_SWAP_128, SDLoc(N),
       DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), Ops);
 
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
 
   Results.push_back(SDValue(CmpSwap, 0));
   Results.push_back(SDValue(CmpSwap, 1));
@@ -11310,12 +11543,11 @@ unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
 }
 
 TargetLoweringBase::LegalizeTypeAction
-AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
-  MVT SVT = VT.getSimpleVT();
+AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
   // During type legalization, we prefer to widen v1i8, v1i16, v1i32  to v8i8,
   // v4i16, v2i32 instead of to promote.
-  if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
-      || SVT == MVT::v1f32)
+  if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
+      VT == MVT::v1f32)
     return TypeWidenVector;
 
   return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -11349,16 +11581,20 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
 }
 
-bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+TargetLowering::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
     AtomicCmpXchgInst *AI) const {
   // If subtarget has LSE, leave cmpxchg intact for codegen.
-  if (Subtarget->hasLSE()) return false;
+  if (Subtarget->hasLSE())
+    return AtomicExpansionKind::None;
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
   // implement cmpxchg without spilling. If the address being exchanged is also
   // on the stack and close enough to the spill slot, this can lead to a
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  return getTargetMachine().getOptLevel() != 0;
+  if (getTargetMachine().getOptLevel() == 0)
+    return AtomicExpansionKind::None;
+  return AtomicExpansionKind::LLSC;
 }
 
 Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
@@ -11468,6 +11704,39 @@ Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
   return TargetLowering::getIRStackGuard(IRB);
 }
 
+void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
+  // MSVC CRT provides functionalities for stack protection.
+  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
+    // MSVC CRT has a global variable holding security cookie.
+    M.getOrInsertGlobal("__security_cookie",
+                        Type::getInt8PtrTy(M.getContext()));
+
+    // MSVC CRT has a function to validate security cookie.
+    auto *SecurityCheckCookie = cast<Function>(
+        M.getOrInsertFunction("__security_check_cookie",
+                              Type::getVoidTy(M.getContext()),
+                              Type::getInt8PtrTy(M.getContext())));
+    SecurityCheckCookie->setCallingConv(CallingConv::Win64);
+    SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+    return;
+  }
+  TargetLowering::insertSSPDeclarations(M);
+}
+
+Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
+  // MSVC CRT has a global variable holding security cookie.
+  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+    return M.getGlobalVariable("__security_cookie");
+  return TargetLowering::getSDagStackGuard(M);
+}
+
+Value *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+  // MSVC CRT has a function to validate security cookie.
+  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+    return M.getFunction("__security_check_cookie");
+  return TargetLowering::getSSPStackGuardCheck(M);
+}
+
 Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
   // Android provides a fixed TLS slot for the SafeStack pointer. See the
   // definition of TLS_SLOT_SAFESTACK in
@@ -11572,3 +11841,8 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
   MF.getFrameInfo().computeMaxCallFrameSize(MF);
   TargetLoweringBase::finalizeLowering(MF);
 }
+
+// Unlike X86, we let frame lowering assign offsets to all catch objects.
+bool AArch64TargetLowering::needsFixedCatchObjects() const {
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d783c8a6048c..ffc4cc3ef534 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -35,6 +35,7 @@ enum NodeType : unsigned {
   // offset of a variable into X0, using the TLSDesc model.
   TLSDESC_CALLSEQ,
   ADRP,     // Page address of a TargetGlobalAddress operand.
+  ADR,      // ADR
   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
   LOADgot,  // Load from automatically generated descriptor (e.g. Global
             // Offset Table, TLS record).
@@ -301,6 +302,12 @@ public:
   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
                                   MachineBasicBlock *BB) const;
 
+  MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
+                                           MachineBasicBlock *BB) const;
+
+  MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
+                                         MachineBasicBlock *BB) const;
+
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
@@ -363,7 +370,8 @@ public:
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
-  bool isDesirableToCommuteWithShift(const SDNode *N) const override;
+  bool isDesirableToCommuteWithShift(const SDNode *N,
+                                     CombineLevel Level) const override;
 
   /// Returns true if it is beneficial to convert a load of a constant
   /// to just the constant itself.
@@ -388,16 +396,21 @@ public:
   TargetLoweringBase::AtomicExpansionKind
   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
-  bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
 
   bool useLoadStackGuardNode() const override;
   TargetLoweringBase::LegalizeTypeAction
-  getPreferredVectorAction(EVT VT) const override;
+  getPreferredVectorAction(MVT VT) const override;
 
   /// If the target has a standard location for the stack protector cookie,
   /// returns the address of that location. Otherwise, returns nullptr.
   Value *getIRStackGuard(IRBuilder<> &IRB) const override;
 
+  void insertSSPDeclarations(Module &M) const override;
+  Value *getSDagStackGuard(const Module &M) const override;
+  Value *getSSPStackGuardCheck(const Module &M) const override;
+
   /// If the target has a standard location for the unsafe stack pointer,
   /// returns the address of that location. Otherwise, returns nullptr.
   Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
@@ -514,6 +527,8 @@ public:
   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
                                                  CallingConv::ID CallConv,
                                                  bool isVarArg) const override;
+  /// Used for exception handling on Win64.
+  bool needsFixedCatchObjects() const override;
 private:
   /// Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
@@ -586,6 +601,8 @@ private:
   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
   template <class NodeTy>
   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
+  template <class NodeTy>
+  SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -602,6 +619,7 @@ private:
                          SDValue TVal, SDValue FVal, const SDLoc &dl,
                          SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -611,6 +629,7 @@ private:
   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 7caf32dbde2a..9061ed4f9f54 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -263,6 +263,14 @@ class SImmOperand<int width> : AsmOperandClass {
   let PredicateMethod = "isSImm<" # width # ">";
 }
 
+
+class AsmImmRange<int Low, int High> : AsmOperandClass {
+  let Name = "Imm" # Low # "_" # High;
+  let DiagnosticType = "InvalidImm" # Low # "_" # High;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
+}
+
 // Authenticated loads for v8.3 can have scaled 10-bit immediate offsets.
 def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>;
 def simm10Scaled : Operand<i64> {
@@ -271,6 +279,12 @@ def simm10Scaled : Operand<i64> {
   let PrintMethod = "printImmScale<8>";
 }
 
+def simm9s16 : Operand<i64> {
+  let ParserMatchClass = SImmScaledMemoryIndexed<9, 16>;
+  let DecoderMethod = "DecodeSImm<9>";
+  let PrintMethod = "printImmScale<16>";
+}
+
 // uimm6 predicate - True if the immediate is in the range [0, 63].
 def UImm6Operand : AsmOperandClass {
   let Name = "UImm6";
@@ -281,6 +295,10 @@ def uimm6 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
   let ParserMatchClass = UImm6Operand;
 }
 
+def uimm16 : Operand<i16>, ImmLeaf<i16, [{return Imm >= 0 && Imm < 65536;}]>{
+  let ParserMatchClass = AsmImmRange<0, 65535>;
+}
+
 def SImm9Operand : SImmOperand<9>;
 def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
   let ParserMatchClass = SImm9Operand;
@@ -366,6 +384,7 @@ def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
 def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;
 def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>;
 def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>;
+def UImm6s16Operand : UImmScaledMemoryIndexed<6, 16>;
 
 def uimm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
   let ParserMatchClass = UImm6s1Operand;
@@ -385,6 +404,11 @@ def uimm6s8 : Operand<i64>, ImmLeaf<i64,
   let PrintMethod = "printImmScale<8>";
   let ParserMatchClass = UImm6s8Operand;
 }
+def uimm6s16 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >= 0 && Imm < (64*16) && ((Imm % 16) == 0); }]> {
+  let PrintMethod = "printImmScale<16>";
+  let ParserMatchClass = UImm6s16Operand;
+}
 
 // simm6sN predicate - True if the immediate is a multiple of N in the range
 // [-32 * N, 31 * N].
@@ -435,13 +459,6 @@ def simm4s16 : Operand<i64>, ImmLeaf<i64,
   let DecoderMethod = "DecodeSImm<4>";
 }
 
-class AsmImmRange<int Low, int High> : AsmOperandClass {
-  let Name = "Imm" # Low # "_" # High;
-  let DiagnosticType = "InvalidImm" # Low # "_" # High;
-  let RenderMethod = "addImmOperands";
-  let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
-}
-
 def Imm1_8Operand : AsmImmRange<1, 8>;
 def Imm1_16Operand : AsmImmRange<1, 16>;
 def Imm1_32Operand : AsmImmRange<1, 32>;
@@ -696,11 +713,10 @@ def logical_imm64_not : Operand<i64> {
 }
 
 // imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def Imm0_65535Operand : AsmImmRange<0, 65535>;
 def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint32_t)Imm) < 65536;
 }]> {
-  let ParserMatchClass = Imm0_65535Operand;
+  let ParserMatchClass = AsmImmRange<0, 65535>;
   let PrintMethod = "printImmHex";
 }
 
@@ -1149,6 +1165,21 @@ def psbhint_op : Operand<i32> {
   }];
 }
 
+def BTIHintOperand : AsmOperandClass {
+  let Name = "BTIHint";
+  let ParserMethod = "tryParseBTIHint";
+}
+def btihint_op : Operand<i32> {
+  let ParserMatchClass = BTIHintOperand;
+  let PrintMethod = "printBTIHintOp";
+  let MCOperandPredicate = [{
+    // "bti" is an alias to "hint" only for certain values of CRm:Op2 fields.
+    if (!MCOp.isImm())
+      return false;
+    return AArch64BTIHint::lookupBTIByEncoding((MCOp.getImm() ^ 32) >> 1) != nullptr;
+  }];
+}
+
 class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
                        "mrs", "\t$Rt, $systemreg"> {
   bits<16> systemreg;
@@ -1173,16 +1204,23 @@ def pstatefield4_op : Operand<i32> {
   let PrintMethod = "printSystemPStateField";
 }
 
+// Instructions to modify PSTATE, no input reg
 let Defs = [NZCV] in
+class PstateWriteSimple<dag iops, string asm, string operands>
+  : SimpleSystemI<0, iops, asm, operands> {
+
+  let Inst{20-19} = 0b00;
+  let Inst{15-12} = 0b0100;
+}
+
 class MSRpstateImm0_15
-  : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
-                  "msr", "\t$pstatefield, $imm">,
+  : PstateWriteSimple<(ins pstatefield4_op:$pstatefield, imm0_15:$imm), "msr",
+                  "\t$pstatefield, $imm">,
     Sched<[WriteSys]> {
+
   bits<6> pstatefield;
   bits<4> imm;
-  let Inst{20-19} = 0b00;
   let Inst{18-16} = pstatefield{5-3};
-  let Inst{15-12} = 0b0100;
   let Inst{11-8} = imm;
   let Inst{7-5} = pstatefield{2-0};
 
@@ -1201,16 +1239,15 @@ def pstatefield1_op : Operand<i32> {
   let PrintMethod = "printSystemPStateField";
 }
 
-let Defs = [NZCV] in
 class MSRpstateImm0_1
-  : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
-                  "msr", "\t$pstatefield, $imm">,
+  : PstateWriteSimple<(ins pstatefield1_op:$pstatefield, imm0_1:$imm), "msr",
+                 "\t$pstatefield, $imm">,
     Sched<[WriteSys]> {
+
   bits<6> pstatefield;
   bit imm;
-  let Inst{20-19} = 0b00;
   let Inst{18-16} = pstatefield{5-3};
-  let Inst{15-9} = 0b0100000;
+  let Inst{11-9} = 0b000;
   let Inst{8} = imm;
   let Inst{7-5} = pstatefield{2-0};
 
@@ -1719,10 +1756,12 @@ multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
 }
 
 class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
-                     SDPatternOperator OpNode>
-  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+                     SDPatternOperator OpNode,
+                     RegisterClass in1regtype = regtype,
+                     RegisterClass in2regtype = regtype>
+  : I<(outs regtype:$Rd), (ins in1regtype:$Rn, in2regtype:$Rm),
       asm, "\t$Rd, $Rn, $Rm", "",
-      [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
+      [(set regtype:$Rd, (OpNode in1regtype:$Rn, in2regtype:$Rm))]> {
   bits<5> Rd;
   bits<5> Rn;
   bits<5> Rm;
@@ -1902,7 +1941,7 @@ class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
 //---
 
 def movimm32_imm : Operand<i32> {
-  let ParserMatchClass = Imm0_65535Operand;
+  let ParserMatchClass = AsmImmRange<0, 65535>;
   let EncoderMethod = "getMoveWideImmOpValue";
   let PrintMethod = "printImm";
 }
@@ -1977,23 +2016,29 @@ multiclass InsertImmediate<bits<2> opc, string asm> {
 //---
 
 class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
-                    RegisterClass srcRegtype, addsub_shifted_imm immtype,
-                    string asm, SDPatternOperator OpNode>
-    : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
-        asm, "\t$Rd, $Rn, $imm", "",
-        [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
-      Sched<[WriteI, ReadI]>  {
+                    string asm_inst, string asm_ops,
+                    dag inputs, dag pattern>
+    : I<(outs dstRegtype:$Rd), inputs, asm_inst, asm_ops, "", [pattern]>,
+      Sched<[WriteI, ReadI]> {
   bits<5>  Rd;
   bits<5>  Rn;
-  bits<14> imm;
   let Inst{30}    = isSub;
   let Inst{29}    = setFlags;
   let Inst{28-24} = 0b10001;
-  let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
-  let Inst{21-10} = imm{11-0};
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Rd;
-  let DecoderMethod = "DecodeBaseAddSubImm";
+}
+
+class AddSubImmShift<bit isSub, bit setFlags, RegisterClass dstRegtype,
+                     RegisterClass srcRegtype, addsub_shifted_imm immtype,
+                     string asm_inst, SDPatternOperator OpNode>
+    : BaseAddSubImm<isSub, setFlags, dstRegtype, asm_inst, "\t$Rd, $Rn, $imm",
+                    (ins srcRegtype:$Rn, immtype:$imm),
+                    (set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))> {
+  bits<14> imm;
+  let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
+  let Inst{21-10} = imm{11-0};
+  let DecoderMethod = "DecodeAddSubImmShift";
 }
 
 class BaseAddSubRegPseudo<RegisterClass regtype,
@@ -2097,12 +2142,12 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
   // We used to match the register variant before the immediate when the
   // register argument could be implicitly zero-extended.
   let AddedComplexity = 6 in
-  def Wri  : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
+  def Wri  : AddSubImmShift<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
                            mnemonic, OpNode> {
     let Inst{31} = 0;
   }
   let AddedComplexity = 6 in
-  def Xri  : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
+  def Xri  : AddSubImmShift<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
                            mnemonic, OpNode> {
     let Inst{31} = 1;
   }
@@ -2173,11 +2218,11 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
                    string alias, string cmpAlias> {
   let isCompare = 1, Defs = [NZCV] in {
   // Add/Subtract immediate
-  def Wri  : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
+  def Wri  : AddSubImmShift<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
                            mnemonic, OpNode> {
     let Inst{31} = 0;
   }
-  def Xri  : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
+  def Xri  : AddSubImmShift<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
                            mnemonic, OpNode> {
     let Inst{31} = 1;
   }
@@ -2271,6 +2316,27 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
                        GPR64, GPR64sponly, GPR64, 24>; // UXTX #0
 }
 
+class AddSubG<bit isSub, string asm_inst, SDPatternOperator OpNode>
+      : BaseAddSubImm<
+          isSub, 0, GPR64sp, asm_inst, "\t$Rd, $Rn, $imm6, $imm4",
+          (ins GPR64sp:$Rn, uimm6s16:$imm6, imm0_15:$imm4),
+          (set GPR64sp:$Rd, (OpNode GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4))> {
+  bits<6> imm6;
+  bits<4> imm4;
+  let Inst{31} = 1;
+  let Inst{23-22} = 0b10;
+  let Inst{21-16} = imm6;
+  let Inst{15-14} = 0b00;
+  let Inst{13-10} = imm4;
+  let Unpredictable{15-14} = 0b11;
+}
+
+class SUBP<bit setsFlags, string asm_instr, SDPatternOperator OpNode>
+      : BaseTwoOperand<0b0000, GPR64, asm_instr, null_frag, GPR64sp, GPR64sp> {
+  let Inst{31} = 1;
+  let Inst{29} = setsFlags;
+}
+
 //---
 // Extract
 //---
@@ -2853,10 +2919,10 @@ def am_ldrlit : Operand<iPTR> {
   let OperandType = "OPERAND_PCREL";
 }
 
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm>
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0, AddedComplexity = 20 in
+class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm, list<dag> pat>
     : I<(outs regtype:$Rt), (ins am_ldrlit:$label),
-        asm, "\t$Rt, $label", "", []>,
+        asm, "\t$Rt, $label", "", pat>,
       Sched<[WriteLD]> {
   bits<5> Rt;
   bits<19> label;
@@ -3932,6 +3998,78 @@ class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
   let Constraints = "@earlyclobber $Ws";
 }
 
+// Armv8.5-A Memory Tagging Extension
+class BaseMemTag<bits<2> opc1, bits<2> opc2, string asm_insn,
+                 string asm_opnds, string cstr, dag oops, dag iops>
+    : I<oops, iops, asm_insn, asm_opnds, cstr, []>,
+      Sched<[]> {
+  bits<5> Rn;
+
+  let Inst{31-24} = 0b11011001;
+  let Inst{23-22} = opc1;
+  let Inst{21}    = 1;
+  // Inst{20-12} defined by subclass
+  let Inst{11-10} = opc2;
+  let Inst{9-5}   = Rn;
+  // Inst{4-0} defined by subclass
+}
+
+class MemTagVector<bit Load, string asm_insn, string asm_opnds,
+                   dag oops, dag iops>
+    : BaseMemTag<{0b1, Load}, 0b00, asm_insn, asm_opnds,
+                 "$Rn = $wback,@earlyclobber $wback", oops, iops> {
+  bits<5> Rt;
+
+  let Inst{20-12} = 0b000000000;
+  let Inst{4-0}   = Rt;
+
+  let mayLoad = Load;
+}
+
+class MemTagLoad<string asm_insn, string asm_opnds>
+    : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "", (outs GPR64:$Rt),
+                 (ins GPR64sp:$Rn, simm9s16:$offset)> {
+  bits<5> Rt;
+  bits<9> offset;
+
+  let Inst{20-12} = offset;
+  let Inst{4-0}   = Rt;
+
+  let mayLoad = 1;
+}
+
+class BaseMemTagStore<bits<2> opc1, bits<2> opc2, string asm_insn,
+                     string asm_opnds, string cstr, dag oops, dag iops>
+    : BaseMemTag<opc1, opc2, asm_insn, asm_opnds, cstr, oops, iops> {
+  bits<5> Rt;
+  bits<9> offset;
+
+  let Inst{20-12} = offset;
+  let Inst{4-0}   = 0b11111;
+  let Unpredictable{4-0} = 0b11111;
+
+  let mayStore = 1;
+}
+
+multiclass MemTagStore<bits<2> opc1, string insn> {
+  def Offset :
+    BaseMemTagStore<opc1, 0b10, insn, "\t[$Rn, $offset]", "",
+                    (outs), (ins GPR64sp:$Rn, simm9s16:$offset)>;
+  def PreIndex :
+    BaseMemTagStore<opc1, 0b11, insn, "\t[$Rn, $offset]!",
+                    "$Rn = $wback,@earlyclobber $wback",
+                    (outs GPR64sp:$wback),
+                    (ins GPR64sp:$Rn, simm9s16:$offset)>;
+  def PostIndex :
+    BaseMemTagStore<opc1, 0b01, insn, "\t[$Rn], $offset",
+                    "$Rn = $wback,@earlyclobber $wback",
+                    (outs GPR64sp:$wback),
+                    (ins GPR64sp:$Rn, simm9s16:$offset)>;
+
+  def : InstAlias<insn # "\t[$Rn]",
+                  (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rn, 0)>;
+}
+
 //---
 // Exception generation
 //---
@@ -3948,6 +4086,19 @@ class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
   let Inst{1-0}   = ll;
 }
 
+//---
+// UDF : Permanently UNDEFINED instructions.  Format: Opc = 0x0000, 16 bit imm.
+//--
+let hasSideEffects = 1, isTrap = 1, mayLoad = 0, mayStore = 0 in {
+class UDFType<bits<16> opc, string asm>
+  : I<(outs), (ins uimm16:$imm),
+       asm, "\t$imm", "", []>,
+    Sched<[]> {
+  bits<16> imm;
+  let Inst{31-16} = opc;
+  let Inst{15-0} = imm;
+}
+}
 let Predicates = [HasFPARMv8] in {
 
 //---
@@ -4395,7 +4546,7 @@ multiclass FPConversion<string asm> {
 //---
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
+class BaseSingleOperandFPData<bits<6> opcode, RegisterClass regtype,
                               ValueType vt, string asm, SDPatternOperator node>
     : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
          [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
@@ -4403,8 +4554,8 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
   bits<5> Rd;
   bits<5> Rn;
   let Inst{31-24} = 0b00011110;
-  let Inst{21-19} = 0b100;
-  let Inst{18-15} = opcode;
+  let Inst{21}    = 0b1;
+  let Inst{20-15} = opcode;
   let Inst{14-10} = 0b10000;
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Rd;
@@ -4412,20 +4563,37 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
 
 multiclass SingleOperandFPData<bits<4> opcode, string asm,
                                SDPatternOperator node = null_frag> {
-  def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+
+  def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> {
     let Inst{23-22} = 0b11; // 16-bit size flag
     let Predicates = [HasFullFP16];
   }
 
-  def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+  def Sr : BaseSingleOperandFPData<{0b00,opcode}, FPR32, f32, asm, node> {
     let Inst{23-22} = 0b00; // 32-bit size flag
   }
 
-  def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+  def Dr : BaseSingleOperandFPData<{0b00,opcode}, FPR64, f64, asm, node> {
     let Inst{23-22} = 0b01; // 64-bit size flag
   }
 }
 
+multiclass SingleOperandFPNo16<bits<6> opcode, string asm,
+                  SDPatternOperator node = null_frag>{
+
+  def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+    let Inst{23-22} = 0b00; // 32-bit registers
+  }
+
+  def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+    let Inst{23-22} = 0b01; // 64-bit registers
+  }
+}
+
+// FRInt[32|64][Z|N] instructions
+multiclass FRIntNNT<bits<2> opcode, string asm, SDPatternOperator node = null_frag> :
+      SingleOperandFPNo16<{0b0100,opcode}, asm, node>;
+
 //---
 // Two operand floating point data processing
 //---
@@ -4790,25 +4958,6 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
   let Inst{4-0}   = Rd;
 }
 
-class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
-                                 string kind2, RegisterOperand RegType,
-                                 ValueType AccumType, ValueType InputType,
-                                 SDPatternOperator OpNode> :
-        BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
-        [(set (AccumType RegType:$dst),
-              (OpNode (AccumType RegType:$Rd),
-                      (InputType RegType:$Rn),
-                      (InputType RegType:$Rm)))]> {
-  let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
-}
-
-multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
-                                         v2i32, v8i8, OpNode>;
-  def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
-                                         v4i32, v16i8, OpNode>;
-}
-
 // All operand sizes distinguished in the encoding.
 multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
                                SDPatternOperator OpNode> {
@@ -5049,6 +5198,51 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
         V128:$LHS, V128:$MHS, V128:$RHS)>;
 }
 
+// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract
+// bytes from S-sized elements.
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
+                                 string kind2, RegisterOperand RegType,
+                                 ValueType AccumType, ValueType InputType,
+                                 SDPatternOperator OpNode> :
+        BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
+        [(set (AccumType RegType:$dst),
+              (OpNode (AccumType RegType:$Rd),
+                      (InputType RegType:$Rn),
+                      (InputType RegType:$Rm)))]> {
+  let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
+  def v8i8  : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
+                                         v2i32, v8i8, OpNode>;
+  def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
+                                         v4i32, v16i8, OpNode>;
+}
+
+// ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions
+// select inputs from 4H vectors and accumulate outputs to a 2S vector (or from
+// 8H to 4S, when Q=1).
+class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1,
+                                 string kind2, RegisterOperand RegType,
+                                 ValueType AccumType, ValueType InputType,
+                                 SDPatternOperator OpNode> :
+        BaseSIMDThreeSameVectorTied<Q, U, size, 0b11101, RegType, asm, kind1,
+		[(set (AccumType RegType:$dst),
+              (OpNode (AccumType RegType:$Rd),
+                      (InputType RegType:$Rn),
+                      (InputType RegType:$Rm)))]> {
+  let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+  let Inst{13} = b13;
+}
+
+multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
+                                  SDPatternOperator OpNode> {
+  def v4f16 : BaseSIMDThreeSameVectorFML<0, U, b13, size, asm, ".2s", ".2h", V64,
+                                         v2f32, v4f16, OpNode>;
+  def v8f16 : BaseSIMDThreeSameVectorFML<1, U, b13, size, asm, ".4s", ".4h", V128,
+                                         v4f32, v8f16, OpNode>;
+}
+
 
 //----------------------------------------------------------------------------
 // AdvSIMD two register vector instructions.
@@ -5293,7 +5487,7 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
                     [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
 }
 
-// Supports only S and D element sizes, uses high bit of the size field
+// Supports H, S and D element sizes, uses high bit of the size field
 // as an extra opcode bit.
 multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
@@ -5316,6 +5510,25 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
                           [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
 }
 
+// Supports only S and D element sizes
+multiclass SIMDTwoVectorSD<bit U, bits<5> opc, string asm,
+                           SDPatternOperator OpNode = null_frag> {
+
+  def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64,
+                                asm, ".2s", ".2s",
+                          [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+  def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128,
+                                asm, ".4s", ".4s",
+                          [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+  def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128,
+                                asm, ".2d", ".2d",
+                          [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+multiclass FRIntNNTVector<bit U, bit op, string asm,
+                          SDPatternOperator OpNode = null_frag> :
+           SIMDTwoVectorSD<U, {0b1111,op}, asm, OpNode>;
+
 // Supports only S element size.
 multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
@@ -7236,7 +7449,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
   let Inst{4-0}   = Rd;
 }
 
-// ARMv8.2 Index Dot product instructions
+// ARMv8.2-A Dot Product Instructions (Indexed)
 class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
                                       string lhs_kind, string rhs_kind,
                                       RegisterOperand RegType,
@@ -7257,10 +7470,38 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
 
 multiclass SIMDThreeSameVectorDotIndex<bit U, string asm,
                                        SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64,
-                                              v2i32, v8i8, OpNode>;
-  def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128,
-                                              v4i32, v16i8, OpNode>;
+  def v8i8  : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b",
+                                              V64, v2i32, v8i8, OpNode>;
+  def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b",
+                                              V128, v4i32, v16i8, OpNode>;
+}
+
+// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed)
+class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm,
+                                      string dst_kind, string lhs_kind,
+                                      string rhs_kind, RegisterOperand RegType,
+                                      ValueType AccumType, ValueType InputType,
+                                      SDPatternOperator OpNode> :
+        BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, RegType, RegType, V128,
+                            VectorIndexH, asm, "", dst_kind, lhs_kind, rhs_kind,
+          [(set (AccumType RegType:$dst),
+                (AccumType (OpNode (AccumType RegType:$Rd),
+                                   (InputType RegType:$Rn),
+                                   (InputType (AArch64duplane16 (v8f16 V128:$Rm),
+                                                VectorIndexH:$idx)))))]> {
+  // idx = H:L:M
+  bits<3> idx;
+  let Inst{11} = idx{2}; // H
+  let Inst{21} = idx{1}; // L
+  let Inst{20} = idx{0}; // M
+}
+
+multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
+                                       SDPatternOperator OpNode> {
+  def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h",
+                                              V64, v2f32, v4f16, OpNode>;
+  def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h",
+                                              V128, v4f32, v8f16, OpNode>;
 }
 
 multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
@@ -9748,9 +9989,10 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
   let Inst{4-0}   = Rd;
 }
 
+//8.3 CompNum - Floating-point complex number support
 multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
                                           string asm, SDPatternOperator OpNode>{
-  let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+  let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
   def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
               asm, ".4h",
               [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -9766,7 +10008,7 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
                                                (rottype i32:$rot)))]>;
   }
 
-  let Predicates = [HasV8_3a, HasNEON] in {
+  let Predicates = [HasComplxNum, HasNEON] in {
   def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
               asm, ".2s",
               [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -9822,7 +10064,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
 multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
                                              Operand rottype, string asm,
                                              SDPatternOperator OpNode> {
-  let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+  let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
   def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
               rottype, asm, ".4h",
               [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -9838,7 +10080,7 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
                                                (rottype i32:$rot)))]>;
   }
 
-  let Predicates = [HasV8_3a, HasNEON] in {
+  let Predicates = [HasComplxNum, HasNEON] in {
   def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
               rottype, asm, ".2s",
               [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -9904,7 +10146,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
 // classes.
 multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
                                      string asm, SDPatternOperator OpNode> {
-  let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
+  let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
   def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
                       V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
                       ".4h", ".h", []> {
@@ -9920,9 +10162,9 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
   }
-  } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
+  } // Predicates = HasComplxNum, HasNEON, HasFullFP16]
 
-  let Predicates = [HasV8_3a,HasNEON] in {
+  let Predicates = [HasComplxNum, HasNEON] in {
   def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
                       V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
                       ".4s", ".4s", ".s", []> {
@@ -9930,7 +10172,7 @@ multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
     let Inst{11} = idx{0};
     let Inst{21} = 0;
   }
-  } // Predicates = [HasV8_3a,HasNEON]
+  } // Predicates = [HasComplxNum, HasNEON]
 }
 
 //----------------------------------------------------------------------------
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 032d53d19620..ada067888572 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -66,7 +66,8 @@ static cl::opt<unsigned>
                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
 
 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
-    : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+    : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
+                          AArch64::CATCHRET),
       RI(STI.getTargetTriple()), Subtarget(STI) {}
 
 /// GetInstSize - Return the number of bytes of code the specified
@@ -108,6 +109,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     // This gets lowered to an instruction sequence which takes 16 bytes
     NumBytes = 16;
     break;
+  case AArch64::JumpTableDest32:
+  case AArch64::JumpTableDest16:
+  case AArch64::JumpTableDest8:
+    NumBytes = 12;
+    break;
+  case AArch64::SPACE:
+    NumBytes = MI.getOperand(1).getImm();
+    break;
   }
 
   return NumBytes;
@@ -675,14 +684,36 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
   if (!Subtarget.hasCustomCheapAsMoveHandling())
     return MI.isAsCheapAsAMove();
 
+  const unsigned Opcode = MI.getOpcode();
+
+  // Firstly, check cases gated by features.
+
+  if (Subtarget.hasZeroCycleZeroingFP()) {
+    if (Opcode == AArch64::FMOVH0 ||
+        Opcode == AArch64::FMOVS0 ||
+        Opcode == AArch64::FMOVD0)
+      return true;
+  }
+
+  if (Subtarget.hasZeroCycleZeroingGP()) {
+    if (Opcode == TargetOpcode::COPY &&
+        (MI.getOperand(1).getReg() == AArch64::WZR ||
+         MI.getOperand(1).getReg() == AArch64::XZR))
+      return true;
+  }
+
+  // Secondly, check cases specific to sub-targets.
+
   if (Subtarget.hasExynosCheapAsMoveHandling()) {
-    if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
+    if (isExynosCheapAsMove(MI))
       return true;
-    else
-      return MI.isAsCheapAsAMove();
+
+    return MI.isAsCheapAsAMove();
   }
 
-  switch (MI.getOpcode()) {
+  // Finally, check generic cases.
+
+  switch (Opcode) {
   default:
     return false;
 
@@ -723,217 +754,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
     return canBeExpandedToORR(MI, 32);
   case AArch64::MOVi64imm:
     return canBeExpandedToORR(MI, 64);
-
-  // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
-  // feature.
-  case AArch64::FMOVH0:
-  case AArch64::FMOVS0:
-  case AArch64::FMOVD0:
-    return Subtarget.hasZeroCycleZeroing();
-  case TargetOpcode::COPY:
-    return (Subtarget.hasZeroCycleZeroing() &&
-            (MI.getOperand(1).getReg() == AArch64::WZR ||
-             MI.getOperand(1).getReg() == AArch64::XZR));
   }
 
   llvm_unreachable("Unknown opcode to check as cheap as a move!");
 }
 
-bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
-  unsigned Reg, Imm, Shift;
-
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-
-  // MOV Rd, SP
-  case AArch64::ADDWri:
-  case AArch64::ADDXri:
-    if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
-      return false;
-
-    Reg = MI.getOperand(1).getReg();
-    Imm = MI.getOperand(2).getImm();
-    return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
-
-  // Literal
-  case AArch64::ADR:
-  case AArch64::ADRP:
-    return true;
-
-  // MOVI Vd, #0
-  case AArch64::MOVID:
-  case AArch64::MOVIv8b_ns:
-  case AArch64::MOVIv2d_ns:
-  case AArch64::MOVIv16b_ns:
-    Imm = MI.getOperand(1).getImm();
-    return (Imm == 0);
-
-  // MOVI Vd, #0
-  case AArch64::MOVIv2i32:
-  case AArch64::MOVIv4i16:
-  case AArch64::MOVIv4i32:
-  case AArch64::MOVIv8i16:
-    Imm = MI.getOperand(1).getImm();
-    Shift = MI.getOperand(2).getImm();
-    return (Imm == 0 && Shift == 0);
-
-  // MOV Rd, Imm
-  case AArch64::MOVNWi:
-  case AArch64::MOVNXi:
-
-  // MOV Rd, Imm
-  case AArch64::MOVZWi:
-  case AArch64::MOVZXi:
-    return true;
-
-  // MOV Rd, Imm
-  case AArch64::ORRWri:
-  case AArch64::ORRXri:
-    if (!MI.getOperand(1).isReg())
-      return false;
-
-    Reg = MI.getOperand(1).getReg();
-    Imm = MI.getOperand(2).getImm();
-    return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
-
-  // MOV Rd, Rm
-  case AArch64::ORRWrs:
-  case AArch64::ORRXrs:
-    if (!MI.getOperand(1).isReg())
-      return false;
-
-    Reg = MI.getOperand(1).getReg();
-    Imm = MI.getOperand(3).getImm();
-    Shift = AArch64_AM::getShiftValue(Imm);
-    return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
-  }
-}
-
-bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
-  unsigned Imm, Shift;
-  AArch64_AM::ShiftExtendType Ext;
-
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-
-  // WriteI
-  case AArch64::ADDSWri:
-  case AArch64::ADDSXri:
-  case AArch64::ADDWri:
-  case AArch64::ADDXri:
-  case AArch64::SUBSWri:
-  case AArch64::SUBSXri:
-  case AArch64::SUBWri:
-  case AArch64::SUBXri:
-    return true;
-
-  // WriteISReg
-  case AArch64::ADDSWrs:
-  case AArch64::ADDSXrs:
-  case AArch64::ADDWrs:
-  case AArch64::ADDXrs:
-  case AArch64::ANDSWrs:
-  case AArch64::ANDSXrs:
-  case AArch64::ANDWrs:
-  case AArch64::ANDXrs:
-  case AArch64::BICSWrs:
-  case AArch64::BICSXrs:
-  case AArch64::BICWrs:
-  case AArch64::BICXrs:
-  case AArch64::EONWrs:
-  case AArch64::EONXrs:
-  case AArch64::EORWrs:
-  case AArch64::EORXrs:
-  case AArch64::ORNWrs:
-  case AArch64::ORNXrs:
-  case AArch64::ORRWrs:
-  case AArch64::ORRXrs:
-  case AArch64::SUBSWrs:
-  case AArch64::SUBSXrs:
-  case AArch64::SUBWrs:
-  case AArch64::SUBXrs:
-    Imm = MI.getOperand(3).getImm();
-    Shift = AArch64_AM::getShiftValue(Imm);
-    Ext = AArch64_AM::getShiftType(Imm);
-    return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
-
-  // WriteIEReg
-  case AArch64::ADDSWrx:
-  case AArch64::ADDSXrx:
-  case AArch64::ADDSXrx64:
-  case AArch64::ADDWrx:
-  case AArch64::ADDXrx:
-  case AArch64::ADDXrx64:
-  case AArch64::SUBSWrx:
-  case AArch64::SUBSXrx:
-  case AArch64::SUBSXrx64:
-  case AArch64::SUBWrx:
-  case AArch64::SUBXrx:
-  case AArch64::SUBXrx64:
-    Imm = MI.getOperand(3).getImm();
-    Shift = AArch64_AM::getArithShiftValue(Imm);
-    Ext = AArch64_AM::getArithExtendType(Imm);
-    return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
-
-  case AArch64::PRFMroW:
-  case AArch64::PRFMroX:
-
-  // WriteLDIdx
-  case AArch64::LDRBBroW:
-  case AArch64::LDRBBroX:
-  case AArch64::LDRHHroW:
-  case AArch64::LDRHHroX:
-  case AArch64::LDRSBWroW:
-  case AArch64::LDRSBWroX:
-  case AArch64::LDRSBXroW:
-  case AArch64::LDRSBXroX:
-  case AArch64::LDRSHWroW:
-  case AArch64::LDRSHWroX:
-  case AArch64::LDRSHXroW:
-  case AArch64::LDRSHXroX:
-  case AArch64::LDRSWroW:
-  case AArch64::LDRSWroX:
-  case AArch64::LDRWroW:
-  case AArch64::LDRWroX:
-  case AArch64::LDRXroW:
-  case AArch64::LDRXroX:
-
-  case AArch64::LDRBroW:
-  case AArch64::LDRBroX:
-  case AArch64::LDRDroW:
-  case AArch64::LDRDroX:
-  case AArch64::LDRHroW:
-  case AArch64::LDRHroX:
-  case AArch64::LDRSroW:
-  case AArch64::LDRSroX:
-
-  // WriteSTIdx
-  case AArch64::STRBBroW:
-  case AArch64::STRBBroX:
-  case AArch64::STRHHroW:
-  case AArch64::STRHHroX:
-  case AArch64::STRWroW:
-  case AArch64::STRWroX:
-  case AArch64::STRXroW:
-  case AArch64::STRXroX:
-
-  case AArch64::STRBroW:
-  case AArch64::STRBroX:
-  case AArch64::STRDroW:
-  case AArch64::STRDroX:
-  case AArch64::STRHroW:
-  case AArch64::STRHroX:
-  case AArch64::STRSroW:
-  case AArch64::STRSroX:
-    Imm = MI.getOperand(3).getImm();
-    Ext = AArch64_AM::getMemExtendType(Imm);
-    return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
-  }
-}
-
-bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
+bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default:
     return false;
@@ -1055,6 +881,32 @@ bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
   }
 }
 
+bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+    default:
+      return false;
+    case AArch64::SEH_StackAlloc:
+    case AArch64::SEH_SaveFPLR:
+    case AArch64::SEH_SaveFPLR_X:
+    case AArch64::SEH_SaveReg:
+    case AArch64::SEH_SaveReg_X:
+    case AArch64::SEH_SaveRegP:
+    case AArch64::SEH_SaveRegP_X:
+    case AArch64::SEH_SaveFReg:
+    case AArch64::SEH_SaveFReg_X:
+    case AArch64::SEH_SaveFRegP:
+    case AArch64::SEH_SaveFRegP_X:
+    case AArch64::SEH_SetFP:
+    case AArch64::SEH_AddFP:
+    case AArch64::SEH_Nop:
+    case AArch64::SEH_PrologEnd:
+    case AArch64::SEH_EpilogStart:
+    case AArch64::SEH_EpilogEnd:
+      return true;
+  }
+}
+
 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
                                              unsigned &SrcReg, unsigned &DstReg,
                                              unsigned &SubIdx) const {
@@ -1078,7 +930,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
   const TargetRegisterInfo *TRI = &getRegisterInfo();
-  unsigned BaseRegA = 0, BaseRegB = 0;
+  MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned WidthA = 0, WidthB = 0;
 
@@ -1089,14 +941,14 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
     return false;
 
-  // Retrieve the base register, offset from the base register and width. Width
+  // Retrieve the base, offset from the base and width. Width
   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
-  // base registers are identical, and the offset of a lower memory access +
+  // base are identical, and the offset of a lower memory access +
   // the width doesn't overlap the offset of a higher memory access,
   // then the memory accesses are different.
-  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
-      getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
-    if (BaseRegA == BaseRegB) {
+  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -1107,6 +959,26 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
   return false;
 }
 
+bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+                                            const MachineBasicBlock *MBB,
+                                            const MachineFunction &MF) const {
+  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+    return true;
+  switch (MI.getOpcode()) {
+  case AArch64::HINT:
+    // CSDB hints are scheduling barriers.
+    if (MI.getOperand(0).getImm() == 0x14)
+      return true;
+    break;
+  case AArch64::DSB:
+  case AArch64::ISB:
+    // DSB and ISB also are scheduling barriers.
+    return true;
+  default:;
+  }
+  return isSEHInstruction(MI);
+}
+
 /// analyzeCompare - For a comparison instruction, return the source registers
 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
 /// Return true if the comparison instruction can be analyzed.
@@ -1593,11 +1465,36 @@ bool AArch64InstrInfo::substituteCmpToZero(
 }
 
 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
-  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
+  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
+      MI.getOpcode() != AArch64::CATCHRET)
     return false;
 
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MI.getDebugLoc();
+
+  if (MI.getOpcode() == AArch64::CATCHRET) {
+    // Skip to the first instruction before the epilog.
+    const TargetInstrInfo *TII =
+      MBB.getParent()->getSubtarget().getInstrInfo();
+    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
+    auto MBBI = MachineBasicBlock::iterator(MI);
+    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
+    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
+           FirstEpilogSEH != MBB.begin())
+      FirstEpilogSEH = std::prev(FirstEpilogSEH);
+    if (FirstEpilogSEH != MBB.begin())
+      FirstEpilogSEH = std::next(FirstEpilogSEH);
+    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
+        .addReg(AArch64::X0, RegState::Define)
+        .addMBB(TargetMBB);
+    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
+        .addReg(AArch64::X0, RegState::Define)
+        .addReg(AArch64::X0)
+        .addMBB(TargetMBB)
+        .addImm(0);
+    return true;
+  }
+
   unsigned Reg = MI.getOperand(0).getReg();
   const GlobalValue *GV =
       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
@@ -1607,7 +1504,7 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 
   if ((OpFlags & AArch64II::MO_GOT) != 0) {
     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
-        .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
+        .addGlobalAddress(GV, 0, OpFlags);
     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
         .addReg(Reg, RegState::Kill)
         .addImm(0)
@@ -1632,6 +1529,9 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
         .addReg(Reg, RegState::Kill)
         .addImm(0)
         .addMemOperand(*MI.memoperands_begin());
+  } else if (TM.getCodeModel() == CodeModel::Tiny) {
+    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
+        .addGlobalAddress(GV, 0, OpFlags);
   } else {
     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
@@ -1647,71 +1547,6 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   return true;
 }
 
-/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::ADDSWrs:
-  case AArch64::ADDSXrs:
-  case AArch64::ADDWrs:
-  case AArch64::ADDXrs:
-  case AArch64::ANDSWrs:
-  case AArch64::ANDSXrs:
-  case AArch64::ANDWrs:
-  case AArch64::ANDXrs:
-  case AArch64::BICSWrs:
-  case AArch64::BICSXrs:
-  case AArch64::BICWrs:
-  case AArch64::BICXrs:
-  case AArch64::EONWrs:
-  case AArch64::EONXrs:
-  case AArch64::EORWrs:
-  case AArch64::EORXrs:
-  case AArch64::ORNWrs:
-  case AArch64::ORNXrs:
-  case AArch64::ORRWrs:
-  case AArch64::ORRXrs:
-  case AArch64::SUBSWrs:
-  case AArch64::SUBSXrs:
-  case AArch64::SUBWrs:
-  case AArch64::SUBXrs:
-    if (MI.getOperand(3).isImm()) {
-      unsigned val = MI.getOperand(3).getImm();
-      return (val != 0);
-    }
-    break;
-  }
-  return false;
-}
-
-/// Return true if this is this instruction has a non-zero immediate
-bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::ADDSWrx:
-  case AArch64::ADDSXrx:
-  case AArch64::ADDSXrx64:
-  case AArch64::ADDWrx:
-  case AArch64::ADDXrx:
-  case AArch64::ADDXrx64:
-  case AArch64::SUBSWrx:
-  case AArch64::SUBSXrx:
-  case AArch64::SUBSXrx64:
-  case AArch64::SUBWrx:
-  case AArch64::SUBXrx:
-  case AArch64::SUBXrx64:
-    if (MI.getOperand(3).isImm()) {
-      unsigned val = MI.getOperand(3).getImm();
-      return (val != 0);
-    }
-    break;
-  }
-
-  return false;
-}
-
 // Return true if this instruction simply sets its single destination register
 // to zero. This is equivalent to a register rename of the zero-register.
 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
@@ -1834,67 +1669,6 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   return 0;
 }
 
-/// Return true if this is load/store scales or extends its register offset.
-/// This refers to scaling a dynamic index as opposed to scaled immediates.
-/// MI should be a memory op that allows scaled addressing.
-bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    break;
-  case AArch64::LDRBBroW:
-  case AArch64::LDRBroW:
-  case AArch64::LDRDroW:
-  case AArch64::LDRHHroW:
-  case AArch64::LDRHroW:
-  case AArch64::LDRQroW:
-  case AArch64::LDRSBWroW:
-  case AArch64::LDRSBXroW:
-  case AArch64::LDRSHWroW:
-  case AArch64::LDRSHXroW:
-  case AArch64::LDRSWroW:
-  case AArch64::LDRSroW:
-  case AArch64::LDRWroW:
-  case AArch64::LDRXroW:
-  case AArch64::STRBBroW:
-  case AArch64::STRBroW:
-  case AArch64::STRDroW:
-  case AArch64::STRHHroW:
-  case AArch64::STRHroW:
-  case AArch64::STRQroW:
-  case AArch64::STRSroW:
-  case AArch64::STRWroW:
-  case AArch64::STRXroW:
-  case AArch64::LDRBBroX:
-  case AArch64::LDRBroX:
-  case AArch64::LDRDroX:
-  case AArch64::LDRHHroX:
-  case AArch64::LDRHroX:
-  case AArch64::LDRQroX:
-  case AArch64::LDRSBWroX:
-  case AArch64::LDRSBXroX:
-  case AArch64::LDRSHWroX:
-  case AArch64::LDRSHXroX:
-  case AArch64::LDRSWroX:
-  case AArch64::LDRSroX:
-  case AArch64::LDRWroX:
-  case AArch64::LDRXroX:
-  case AArch64::STRBBroX:
-  case AArch64::STRBroX:
-  case AArch64::STRDroX:
-  case AArch64::STRHHroX:
-  case AArch64::STRHroX:
-  case AArch64::STRQroX:
-  case AArch64::STRSroX:
-  case AArch64::STRWroX:
-  case AArch64::STRXroX:
-
-    unsigned Val = MI.getOperand(3).getImm();
-    AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
-    return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
-  }
-  return false;
-}
-
 /// Check all MachineMemOperands for a hint to suppress pairing.
 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
@@ -2068,17 +1842,21 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
   if (MI.hasOrderedMemoryRef())
     return false;
 
-  // Make sure this is a reg+imm (as opposed to an address reloc).
-  assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
+  // Make sure this is a reg/fi+imm (as opposed to an address reloc).
+  assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
+         "Expected a reg or frame index operand.");
   if (!MI.getOperand(2).isImm())
     return false;
 
   // Can't merge/pair if the instruction modifies the base register.
   // e.g., ldr x0, [x0]
-  unsigned BaseReg = MI.getOperand(1).getReg();
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-  if (MI.modifiesRegister(BaseReg, TRI))
-    return false;
+  // This case will never occur with an FI base.
+  if (MI.getOperand(1).isReg()) {
+    unsigned BaseReg = MI.getOperand(1).getReg();
+    const TargetRegisterInfo *TRI = &getRegisterInfo();
+    if (MI.modifiesRegister(BaseReg, TRI))
+      return false;
+  }
 
   // Check if this load/store has a hint to avoid pair formation.
   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
@@ -2101,25 +1879,28 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
   return true;
 }
 
-bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
-    MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
-    const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+                                          MachineOperand *&BaseOp,
+                                          int64_t &Offset,
+                                          const TargetRegisterInfo *TRI) const {
   unsigned Width;
-  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
+  return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
 }
 
-bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
-    MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
-    const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
+    MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+    unsigned &Width, const TargetRegisterInfo *TRI) const {
   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   // Handle only loads/stores with base register followed by immediate offset.
   if (LdSt.getNumExplicitOperands() == 3) {
     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
-    if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
+    if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
+        !LdSt.getOperand(2).isImm())
       return false;
   } else if (LdSt.getNumExplicitOperands() == 4) {
     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
-    if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
+    if (!LdSt.getOperand(1).isReg() ||
+        (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
         !LdSt.getOperand(3).isImm())
       return false;
   } else
@@ -2138,13 +1919,18 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
   // multiplied by the scaling factor. Unscaled instructions have scaling factor
   // set to 1.
   if (LdSt.getNumExplicitOperands() == 3) {
-    BaseReg = LdSt.getOperand(1).getReg();
+    BaseOp = &LdSt.getOperand(1);
     Offset = LdSt.getOperand(2).getImm() * Scale;
   } else {
     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
-    BaseReg = LdSt.getOperand(2).getReg();
+    BaseOp = &LdSt.getOperand(2);
     Offset = LdSt.getOperand(3).getImm() * Scale;
   }
+
+  assert((BaseOp->isReg() || BaseOp->isFI()) &&
+         "getMemOperandWithOffset only supports base "
+         "operands of type register or frame index.");
+
   return true;
 }
 
@@ -2299,31 +2085,33 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
   return true;
 }
 
-// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
-// scaled.
-static bool scaleOffset(unsigned Opc, int64_t &Offset) {
-  unsigned OffsetStride = 1;
+static unsigned getOffsetStride(unsigned Opc) {
   switch (Opc) {
   default:
-    return false;
+    return 0;
   case AArch64::LDURQi:
   case AArch64::STURQi:
-    OffsetStride = 16;
-    break;
+    return 16;
   case AArch64::LDURXi:
   case AArch64::LDURDi:
   case AArch64::STURXi:
   case AArch64::STURDi:
-    OffsetStride = 8;
-    break;
+    return 8;
   case AArch64::LDURWi:
   case AArch64::LDURSi:
   case AArch64::LDURSWi:
   case AArch64::STURWi:
   case AArch64::STURSi:
-    OffsetStride = 4;
-    break;
+    return 4;
   }
+}
+
+// Scale the unscaled offsets.  Returns false if the unscaled offset can't be
+// scaled.
+static bool scaleOffset(unsigned Opc, int64_t &Offset) {
+  unsigned OffsetStride = getOffsetStride(Opc);
+  if (OffsetStride == 0)
+    return false;
   // If the byte-offset isn't a multiple of the stride, we can't scale this
   // offset.
   if (Offset % OffsetStride != 0)
@@ -2335,6 +2123,19 @@ static bool scaleOffset(unsigned Opc, int64_t &Offset) {
   return true;
 }
 
+// Unscale the scaled offsets. Returns false if the scaled offset can't be
+// unscaled.
+static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
+  unsigned OffsetStride = getOffsetStride(Opc);
+  if (OffsetStride == 0)
+    return false;
+
+  // Convert the "element" offset used by scaled pair load/store instructions
+  // into the byte-offset used by unscaled.
+  Offset *= OffsetStride;
+  return true;
+}
+
 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
   if (FirstOpc == SecondOpc)
     return true;
@@ -2353,15 +2154,46 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
   return false;
 }
 
+static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
+                            int64_t Offset1, unsigned Opcode1, int FI2,
+                            int64_t Offset2, unsigned Opcode2) {
+  // Accesses through fixed stack object frame indices may access a different
+  // fixed stack slot. Check that the object offsets + offsets match.
+  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
+    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
+    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
+    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
+    // Get the byte-offset from the object offset.
+    if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
+      return false;
+    ObjectOffset1 += Offset1;
+    ObjectOffset2 += Offset2;
+    // Get the "element" index in the object.
+    if (!scaleOffset(Opcode1, ObjectOffset1) ||
+        !scaleOffset(Opcode2, ObjectOffset2))
+      return false;
+    return ObjectOffset1 + 1 == ObjectOffset2;
+  }
+
+  return FI1 == FI2;
+}
+
 /// Detect opportunities for ldp/stp formation.
 ///
-/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
-                                           unsigned BaseReg1,
-                                           MachineInstr &SecondLdSt,
-                                           unsigned BaseReg2,
+/// Only called for LdSt for which getMemOperandWithOffset returns true.
+bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
+                                           MachineOperand &BaseOp2,
                                            unsigned NumLoads) const {
-  if (BaseReg1 != BaseReg2)
+  MachineInstr &FirstLdSt = *BaseOp1.getParent();
+  MachineInstr &SecondLdSt = *BaseOp2.getParent();
+  if (BaseOp1.getType() != BaseOp2.getType())
+    return false;
+
+  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+         "Only base registers and frame indices are supported.");
+
+  // Check for both base regs and base FI.
+  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
     return false;
 
   // Only cluster up to a single pair.
@@ -2397,7 +2229,20 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
     return false;
 
   // The caller should already have ordered First/SecondLdSt by offset.
-  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
+  // Note: except for non-equal frame index bases
+  if (BaseOp1.isFI()) {
+    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
+           "Caller should have ordered offsets.");
+
+    const MachineFrameInfo &MFI =
+        FirstLdSt.getParent()->getParent()->getFrameInfo();
+    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
+                           BaseOp2.getIndex(), Offset2, SecondOpc);
+  }
+
+  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
+         "Caller should have ordered offsets.");
+
   return Offset1 + 1 == Offset2;
 }
 
@@ -2478,7 +2323,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
             .addImm(0)
             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
       }
-    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
+    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -2515,7 +2360,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
           .addReg(SrcReg, getKillRegState(KillSrc))
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
-    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
+    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
@@ -2730,13 +2575,33 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   llvm_unreachable("unimplemented reg-to-reg copy");
 }
 
+static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
+                                    MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator InsertBefore,
+                                    const MCInstrDesc &MCID,
+                                    unsigned SrcReg, bool IsKill,
+                                    unsigned SubIdx0, unsigned SubIdx1, int FI,
+                                    MachineMemOperand *MMO) {
+  unsigned SrcReg0 = SrcReg;
+  unsigned SrcReg1 = SrcReg;
+  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+    SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
+    SubIdx0 = 0;
+    SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
+    SubIdx1 = 0;
+  }
+  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
+      .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
+      .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addMemOperand(MMO);
+}
+
 void AArch64InstrInfo::storeRegToStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
     bool isKill, int FI, const TargetRegisterClass *RC,
     const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
@@ -2772,8 +2637,14 @@ void AArch64InstrInfo::storeRegToStackSlot(
         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
       else
         assert(SrcReg != AArch64::SP);
-    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
       Opc = AArch64::STRDui;
+    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
+      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
+                              get(AArch64::STPWi), SrcReg, isKill,
+                              AArch64::sube32, AArch64::subo32, FI, MMO);
+      return;
+    }
     break;
   case 16:
     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
@@ -2783,14 +2654,9 @@ void AArch64InstrInfo::storeRegToStackSlot(
       Opc = AArch64::ST1Twov1d;
       Offset = false;
     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
-      BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
-          .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
-                  getKillRegState(isKill))
-          .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
-                  getKillRegState(isKill))
-          .addFrameIndex(FI)
-          .addImm(0)
-          .addMemOperand(MMO);
+      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
+                              get(AArch64::STPXi), SrcReg, isKill,
+                              AArch64::sube64, AArch64::subo64, FI, MMO);
       return;
     }
     break;
@@ -2829,7 +2695,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
                                      .addReg(SrcReg, getKillRegState(isKill))
                                      .addFrameIndex(FI);
 
@@ -2838,13 +2704,35 @@ void AArch64InstrInfo::storeRegToStackSlot(
   MI.addMemOperand(MMO);
 }
 
+static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator InsertBefore,
+                                     const MCInstrDesc &MCID,
+                                     unsigned DestReg, unsigned SubIdx0,
+                                     unsigned SubIdx1, int FI,
+                                     MachineMemOperand *MMO) {
+  unsigned DestReg0 = DestReg;
+  unsigned DestReg1 = DestReg;
+  bool IsUndef = true;
+  if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+    DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
+    SubIdx0 = 0;
+    DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
+    SubIdx1 = 0;
+    IsUndef = false;
+  }
+  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
+      .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
+      .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addMemOperand(MMO);
+}
+
 void AArch64InstrInfo::loadRegFromStackSlot(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
     int FI, const TargetRegisterClass *RC,
     const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
@@ -2880,8 +2768,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
       else
         assert(DestReg != AArch64::SP);
-    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
       Opc = AArch64::LDRDui;
+    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
+      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
+                               get(AArch64::LDPWi), DestReg, AArch64::sube32,
+                               AArch64::subo32, FI, MMO);
+      return;
+    }
     break;
   case 16:
     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
@@ -2891,14 +2785,9 @@ void AArch64InstrInfo::loadRegFromStackSlot(
       Opc = AArch64::LD1Twov1d;
       Offset = false;
     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
-      BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
-          .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
-                  getDefRegState(true))
-          .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
-                  getDefRegState(true))
-          .addFrameIndex(FI)
-          .addImm(0)
-          .addMemOperand(MMO);
+      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
+                               get(AArch64::LDPXi), DestReg, AArch64::sube64,
+                               AArch64::subo64, FI, MMO);
       return;
     }
     break;
@@ -2937,7 +2826,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
                                      .addReg(DestReg, getDefRegState(true))
                                      .addFrameIndex(FI);
   if (Offset)
@@ -2949,7 +2838,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
                            unsigned DestReg, unsigned SrcReg, int Offset,
                            const TargetInstrInfo *TII,
-                           MachineInstr::MIFlag Flag, bool SetNZCV) {
+                           MachineInstr::MIFlag Flag, bool SetNZCV,
+                           bool NeedsWinCFI) {
   if (DestReg == SrcReg && Offset == 0)
     return;
 
@@ -2994,6 +2884,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
         .setMIFlag(Flag);
 
+   if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
+     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+         .addImm(ThisVal)
+         .setMIFlag(Flag);
+
     SrcReg = DestReg;
     Offset -= ThisVal;
     if (Offset == 0)
@@ -3004,6 +2899,21 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
       .addImm(Offset)
       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
       .setMIFlag(Flag);
+
+  if (NeedsWinCFI) {
+    if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
+        (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+      if (Offset == 0)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
+                setMIFlag(Flag);
+      else
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
+                addImm(Offset).setMIFlag(Flag);
+    } else if (DestReg == AArch64::SP) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
+              addImm(Offset).setMIFlag(Flag);
+    }
+  }
 }
 
 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
@@ -4839,7 +4749,10 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
   using namespace AArch64II;
 
   static const std::pair<unsigned, const char *> TargetFlags[] = {
-      {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
+      {MO_COFFSTUB, "aarch64-coffstub"},
+      {MO_GOT, "aarch64-got"},   {MO_NC, "aarch64-nc"},
+      {MO_S, "aarch64-s"},       {MO_TLS, "aarch64-tls"},
+      {MO_DLLIMPORT, "aarch64-dllimport"}};
   return makeArrayRef(TargetFlags);
 }
 
@@ -4941,11 +4854,13 @@ enum MachineOutlinerClass {
 
 enum MachineOutlinerMBBFlags {
   LRUnavailableSomewhere = 0x2,
-  HasCalls = 0x4
+  HasCalls = 0x4,
+  UnsafeRegsDead = 0x8
 };
 
 unsigned
 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+  assert(C.LRUWasSet && "LRU wasn't set?");
   MachineFunction *MF = C.getMF();
   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
       MF->getSubtarget().getRegisterInfo());
@@ -4968,17 +4883,22 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
 outliner::OutlinedFunction
 AArch64InstrInfo::getOutliningCandidateInfo(
     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
-  unsigned SequenceSize = std::accumulate(
-      RepeatedSequenceLocs[0].front(),
-      std::next(RepeatedSequenceLocs[0].back()),
-      0, [this](unsigned Sum, const MachineInstr &MI) {
-        return Sum + getInstSizeInBytes(MI);
-      });
-
-  // Compute liveness information for each candidate.
+  outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+  unsigned SequenceSize =
+      std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+                      [this](unsigned Sum, const MachineInstr &MI) {
+                        return Sum + getInstSizeInBytes(MI);
+                      });
+
+  // Properties about candidate MBBs that hold for all of them.
+  unsigned FlagsSetInAll = 0xF;
+
+  // Compute liveness information for each candidate, and set FlagsSetInAll.
   const TargetRegisterInfo &TRI = getRegisterInfo();
   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
-                [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
+                [&FlagsSetInAll](outliner::Candidate &C) {
+                  FlagsSetInAll &= C.Flags;
+                });
 
   // According to the AArch64 Procedure Call Standard, the following are
   // undefined on entry/exit from a function call:
@@ -4991,23 +4911,31 @@ AArch64InstrInfo::getOutliningCandidateInfo(
   // of these registers is live into/across it. Thus, we need to delete
   // those
   // candidates.
-  auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
+  auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+    // If the unsafe registers in this block are all dead, then we don't need
+    // to compute liveness here.
+    if (C.Flags & UnsafeRegsDead)
+      return false;
+    C.initLRU(TRI);
     LiveRegUnits LRU = C.LRU;
     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
             !LRU.available(AArch64::NZCV));
   };
 
-  // Erase every candidate that violates the restrictions above. (It could be
-  // true that we have viable candidates, so it's not worth bailing out in
-  // the case that, say, 1 out of 20 candidates violate the restructions.)
-  RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
-                                            RepeatedSequenceLocs.end(),
-                                            CantGuaranteeValueAcrossCall),
-                             RepeatedSequenceLocs.end());
+  // Are there any candidates where those registers are live?
+  if (!(FlagsSetInAll & UnsafeRegsDead)) {
+    // Erase every candidate that violates the restrictions above. (It could be
+    // true that we have viable candidates, so it's not worth bailing out in
+    // the case that, say, 1 out of 20 candidates violate the restructions.)
+    RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+                                              RepeatedSequenceLocs.end(),
+                                              CantGuaranteeValueAcrossCall),
+                               RepeatedSequenceLocs.end());
 
-  // If the sequence is empty, we're done.
-  if (RepeatedSequenceLocs.empty())
-    return outliner::OutlinedFunction();
+    // If the sequence doesn't have enough candidates left, then we're done.
+    if (RepeatedSequenceLocs.size() < 2)
+      return outliner::OutlinedFunction();
+  }
 
   // At this point, we have only "safe" candidates to outline. Figure out
   // frame + call instruction information.
@@ -5024,6 +4952,64 @@ AArch64InstrInfo::getOutliningCandidateInfo(
   unsigned FrameID = MachineOutlinerDefault;
   unsigned NumBytesToCreateFrame = 4;
 
+  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
+    return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
+  });
+
+  // Returns true if an instructions is safe to fix up, false otherwise.
+  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
+    if (MI.isCall())
+      return true;
+
+    if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
+        !MI.readsRegister(AArch64::SP, &TRI))
+      return true;
+
+    // Any modification of SP will break our code to save/restore LR.
+    // FIXME: We could handle some instructions which add a constant
+    // offset to SP, with a bit more work.
+    if (MI.modifiesRegister(AArch64::SP, &TRI))
+      return false;
+
+    // At this point, we have a stack instruction that we might need to
+    // fix up. We'll handle it if it's a load or store.
+    if (MI.mayLoadOrStore()) {
+      MachineOperand *Base; // Filled with the base operand of MI.
+      int64_t Offset;       // Filled with the offset of MI.
+
+      // Does it allow us to offset the base operand and is the base the
+      // register SP?
+      if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
+          Base->getReg() != AArch64::SP)
+        return false;
+
+      // Find the minimum/maximum offset for this instruction and check
+      // if fixing it up would be in range.
+      int64_t MinOffset,
+          MaxOffset;  // Unscaled offsets for the instruction.
+      unsigned Scale; // The scale to multiply the offsets by.
+      unsigned DummyWidth;
+      getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
+
+      Offset += 16; // Update the offset to what it would be if we outlined.
+      if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
+        return false;
+
+      // It's in range, so we can outline it.
+      return true;
+    }
+
+    // FIXME: Add handling for instructions like "add x0, sp, #8".
+
+    // We can't fix it up, so don't outline it.
+    return false;
+  };
+
+  // True if it's possible to fix up each stack instruction in this sequence.
+  // Important for frames/call variants that modify the stack.
+  bool AllStackInstrsSafe = std::all_of(
+      FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
+
   // If the last instruction in any candidate is a terminator, then we should
   // tail call all of the candidates.
   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
@@ -5032,65 +5018,102 @@ AArch64InstrInfo::getOutliningCandidateInfo(
     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
   }
 
-  else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
+  else if (LastInstrOpcode == AArch64::BL ||
+           (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
     // FIXME: Do we need to check if the code after this uses the value of LR?
     FrameID = MachineOutlinerThunk;
     NumBytesToCreateFrame = 0;
     SetCandidateCallInfo(MachineOutlinerThunk, 4);
   }
 
-  // Make sure that LR isn't live on entry to this candidate. The only
-  // instructions that use LR that could possibly appear in a repeated sequence
-  // are calls. Therefore, we only have to check and see if LR is dead on entry
-  // to (or exit from) some candidate.
-  else if (std::all_of(RepeatedSequenceLocs.begin(),
-                       RepeatedSequenceLocs.end(),
-                       [](outliner::Candidate &C) {
-                         return C.LRU.available(AArch64::LR);
-                         })) {
-    FrameID = MachineOutlinerNoLRSave;
-    NumBytesToCreateFrame = 4;
-    SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
-  }
-
-  // LR is live, so we need to save it. Decide whether it should be saved to
-  // the stack, or if it can be saved to a register.
   else {
-    if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
-                    [this](outliner::Candidate &C) {
-                      return findRegisterToSaveLRTo(C);
-                    })) {
-      // Every candidate has an available callee-saved register for the save.
-      // We can save LR to a register.
-      FrameID = MachineOutlinerRegSave;
-      NumBytesToCreateFrame = 4;
-      SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+    // We need to decide how to emit calls + frames. We can always emit the same
+    // frame if we don't need to save to the stack. If we have to save to the
+    // stack, then we need a different frame.
+    unsigned NumBytesNoStackCalls = 0;
+    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+    for (outliner::Candidate &C : RepeatedSequenceLocs) {
+      C.initLRU(TRI);
+
+      // Is LR available? If so, we don't need a save.
+      if (C.LRU.available(AArch64::LR)) {
+        NumBytesNoStackCalls += 4;
+        C.setCallInfo(MachineOutlinerNoLRSave, 4);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // Is an unused register available? If so, we won't modify the stack, so
+      // we can outline with the same frame type as those that don't save LR.
+      else if (findRegisterToSaveLRTo(C)) {
+        NumBytesNoStackCalls += 12;
+        C.setCallInfo(MachineOutlinerRegSave, 12);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // Is SP used in the sequence at all? If not, we don't have to modify
+      // the stack, so we are guaranteed to get the same frame.
+      else if (C.UsedInSequence.available(AArch64::SP)) {
+        NumBytesNoStackCalls += 12;
+        C.setCallInfo(MachineOutlinerDefault, 12);
+        CandidatesWithoutStackFixups.push_back(C);
+      }
+
+      // If we outline this, we need to modify the stack. Pretend we don't
+      // outline this by saving all of its bytes.
+      else {
+        NumBytesNoStackCalls += SequenceSize;
+      }
     }
 
-    else {
-      // At least one candidate does not have an available callee-saved
-      // register. We must save LR to the stack.
-      FrameID = MachineOutlinerDefault;
-      NumBytesToCreateFrame = 4;
+    // If there are no places where we have to save LR, then note that we
+    // don't have to update the stack. Otherwise, give every candidate the
+    // default call type, as long as it's safe to do so.
+    if (!AllStackInstrsSafe ||
+        NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
+      RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+      FrameID = MachineOutlinerNoLRSave;
+    } else {
       SetCandidateCallInfo(MachineOutlinerDefault, 12);
     }
+
+    // If we dropped all of the candidates, bail out here.
+    if (RepeatedSequenceLocs.size() < 2) {
+      RepeatedSequenceLocs.clear();
+      return outliner::OutlinedFunction();
+    }
   }
 
-  // Check if the range contains a call. These require a save + restore of the
-  // link register.
-  if (std::any_of(RepeatedSequenceLocs[0].front(),
-                  RepeatedSequenceLocs[0].back(),
-                  [](const MachineInstr &MI) { return MI.isCall(); }))
-    NumBytesToCreateFrame += 8; // Save + restore the link register.
+  // Does every candidate's MBB contain a call? If so, then we might have a call
+  // in the range.
+  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+    // Check if the range contains a call. These require a save + restore of the
+    // link register.
+    bool ModStackToSaveLR = false;
+    if (std::any_of(FirstCand.front(), FirstCand.back(),
+                    [](const MachineInstr &MI) { return MI.isCall(); }))
+      ModStackToSaveLR = true;
+
+    // Handle the last instruction separately. If this is a tail call, then the
+    // last instruction is a call. We don't want to save + restore in this case.
+    // However, it could be possible that the last instruction is a call without
+    // it being valid to tail call this sequence. We should consider this as
+    // well.
+    else if (FrameID != MachineOutlinerThunk &&
+             FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
+      ModStackToSaveLR = true;
+
+    if (ModStackToSaveLR) {
+      // We can't fix up the stack. Bail out.
+      if (!AllStackInstrsSafe) {
+        RepeatedSequenceLocs.clear();
+        return outliner::OutlinedFunction();
+      }
 
-  // Handle the last instruction separately. If this is a tail call, then the
-  // last instruction is a call. We don't want to save + restore in this case.
-  // However, it could be possible that the last instruction is a call without
-  // it being valid to tail call this sequence. We should consider this as well.
-  else if (FrameID != MachineOutlinerThunk &&
-           FrameID != MachineOutlinerTailCall &&
-           RepeatedSequenceLocs[0].back()->isCall())
-    NumBytesToCreateFrame += 8;
+      // Save + restore LR.
+      NumBytesToCreateFrame += 8;
+    }
+  }
 
   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
                                     NumBytesToCreateFrame, FrameID);
@@ -5122,30 +5145,70 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
   return true;
 }
 
-unsigned
-AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
-  unsigned Flags = 0x0;
-  // Check if there's a call inside this MachineBasicBlock. If there is, then
-  // set a flag.
-  if (std::any_of(MBB.begin(), MBB.end(),
-                  [](MachineInstr &MI) { return MI.isCall(); }))
-    Flags |= MachineOutlinerMBBFlags::HasCalls;
-
+bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                                              unsigned &Flags) const {
   // Check if LR is available through all of the MBB. If it's not, then set
   // a flag.
   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
          "Suitable Machine Function for outlining must track liveness");
   LiveRegUnits LRU(getRegisterInfo());
-  LRU.addLiveOuts(MBB);
 
-  std::for_each(MBB.rbegin(),
-                MBB.rend(),
+  std::for_each(MBB.rbegin(), MBB.rend(),
                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
 
-  if (!LRU.available(AArch64::LR))
-      Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+  // Check if each of the unsafe registers are available...
+  bool W16AvailableInBlock = LRU.available(AArch64::W16);
+  bool W17AvailableInBlock = LRU.available(AArch64::W17);
+  bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
+
+  // If all of these are dead (and not live out), we know we don't have to check
+  // them later.
+  if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
+    Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+  // Now, add the live outs to the set.
+  LRU.addLiveOuts(MBB);
 
-  return Flags;
+  // If any of these registers is available in the MBB, but also a live out of
+  // the block, then we know outlining is unsafe.
+  if (W16AvailableInBlock && !LRU.available(AArch64::W16))
+    return false;
+  if (W17AvailableInBlock && !LRU.available(AArch64::W17))
+    return false;
+  if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
+    return false;
+
+  // Check if there's a call inside this MachineBasicBlock. If there is, then
+  // set a flag.
+  if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+    Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+  MachineFunction *MF = MBB.getParent();
+
+  // In the event that we outline, we may have to save LR. If there is an
+  // available register in the MBB, then we'll always save LR there. Check if
+  // this is true.
+  bool CanSaveLR = false;
+  const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+      MF->getSubtarget().getRegisterInfo());
+
+  // Check if there is an available register across the sequence that we can
+  // use.
+  for (unsigned Reg : AArch64::GPR64RegClass) {
+    if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
+        Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
+      CanSaveLR = true;
+      break;
+    }
+  }
+
+  // Check if we have a register we can save LR to, and if LR was used
+  // somewhere. If both of those things are true, then we need to evaluate the
+  // safety of outlining stack instructions later.
+  if (!CanSaveLR && !LRU.available(AArch64::LR))
+    Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+  return true;
 }
 
 outliner::InstrType
@@ -5268,108 +5331,19 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
     return outliner::InstrType::Illegal;
 
-  // Does this use the stack?
-  if (MI.modifiesRegister(AArch64::SP, &RI) ||
-      MI.readsRegister(AArch64::SP, &RI)) {
-    // True if there is no chance that any outlined candidate from this range
-    // could require stack fixups. That is, both
-    // * LR is available in the range (No save/restore around call)
-    // * The range doesn't include calls (No save/restore in outlined frame)
-    // are true.
-    // FIXME: This is very restrictive; the flags check the whole block,
-    // not just the bit we will try to outline.
-    bool MightNeedStackFixUp =
-        (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
-                  MachineOutlinerMBBFlags::HasCalls));
-
-    // If this instruction is in a range where it *never* needs to be fixed
-    // up, then we can *always* outline it. This is true even if it's not
-    // possible to fix that instruction up.
-    //
-    // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
-    // use SP. Suppose that I1 sits within a range that definitely doesn't
-    // need stack fixups, while I2 sits in a range that does.
-    //
-    // First, I1 can be outlined as long as we *never* fix up the stack in
-    // any sequence containing it. I1 is already a safe instruction in the
-    // original program, so as long as we don't modify it we're good to go.
-    // So this leaves us with showing that outlining I2 won't break our
-    // program.
-    //
-    // Suppose I1 and I2 belong to equivalent candidate sequences. When we
-    // look at I2, we need to see if it can be fixed up. Suppose I2, (and
-    // thus I1) cannot be fixed up. Then I2 will be assigned an unique
-    // integer label; thus, I2 cannot belong to any candidate sequence (a
-    // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
-    // as well, so we're good. Thus, I1 is always safe to outline.
-    //
-    // This gives us two things: first off, it buys us some more instructions
-    // for our search space by deeming stack instructions illegal only when
-    // they can't be fixed up AND we might have to fix them up. Second off,
-    // This allows us to catch tricky instructions like, say,
-    // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
-    // be paired with later SUBXris, which might *not* end up being outlined.
-    // If we mess with the stack to save something, then an ADDXri messes with
-    // it *after*, then we aren't going to restore the right something from
-    // the stack if we don't outline the corresponding SUBXri first. ADDXris and
-    // SUBXris are extremely common in prologue/epilogue code, so supporting
-    // them in the outliner can be a pretty big win!
-    if (!MightNeedStackFixUp)
-      return outliner::InstrType::Legal;
-
-    // Any modification of SP will break our code to save/restore LR.
-    // FIXME: We could handle some instructions which add a constant offset to
-    // SP, with a bit more work.
-    if (MI.modifiesRegister(AArch64::SP, &RI))
-      return outliner::InstrType::Illegal;
-
-    // At this point, we have a stack instruction that we might need to fix
-    // up. We'll handle it if it's a load or store.
-    if (MI.mayLoadOrStore()) {
-      unsigned Base;  // Filled with the base regiser of MI.
-      int64_t Offset; // Filled with the offset of MI.
-      unsigned DummyWidth;
-
-      // Does it allow us to offset the base register and is the base SP?
-      if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
-          Base != AArch64::SP)
-        return outliner::InstrType::Illegal;
-
-      // Find the minimum/maximum offset for this instruction and check if
-      // fixing it up would be in range.
-      int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
-      unsigned Scale;               // The scale to multiply the offsets by.
-      getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
-
-      // TODO: We should really test what happens if an instruction overflows.
-      // This is tricky to test with IR tests, but when the outliner is moved
-      // to a MIR test, it really ought to be checked.
-      Offset += 16; // Update the offset to what it would be if we outlined.
-      if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
-        return outliner::InstrType::Illegal;
-
-      // It's in range, so we can outline it.
-      return outliner::InstrType::Legal;
-    }
-
-    // FIXME: Add handling for instructions like "add x0, sp, #8".
-
-    // We can't fix it up, so don't outline it.
-    return outliner::InstrType::Illegal;
-  }
-
   return outliner::InstrType::Legal;
 }
 
 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
   for (MachineInstr &MI : MBB) {
-    unsigned Base, Width;
+    MachineOperand *Base;
+    unsigned Width;
     int64_t Offset;
 
     // Is this a load or store with an immediate offset with SP as the base?
     if (!MI.mayLoadOrStore() ||
-        !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
-        Base != AArch64::SP)
+        !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
+        (Base->isReg() && Base->getReg() != AArch64::SP))
       continue;
 
     // It is, so we have to fix it up.
@@ -5401,7 +5375,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
       TailOpcode = AArch64::TCRETURNdi;
     } else {
       assert(Call->getOpcode() == AArch64::BLR);
-      TailOpcode = AArch64::TCRETURNri;
+      TailOpcode = AArch64::TCRETURNriALL;
     }
     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
                             .add(Call->getOperand(0))
@@ -5562,3 +5536,6 @@ bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
   MachineFunction &MF) const {
   return MF.getFunction().optForMinSize();
 }
+
+#define GET_INSTRINFO_HELPERS
+#include "AArch64GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 11882e238b70..9954669d5675 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -62,14 +62,6 @@ public:
   unsigned isStoreToStackSlot(const MachineInstr &MI,
                               int &FrameIndex) const override;
 
-  /// Returns true if there is a shiftable register and that the shift value
-  /// is non-zero.
-  static bool hasShiftedReg(const MachineInstr &MI);
-
-  /// Returns true if there is an extendable register and that the extending
-  /// value is non-zero.
-  static bool hasExtendedReg(const MachineInstr &MI);
-
   /// Does this instruction set its full destination register to zero?
   static bool isGPRZero(const MachineInstr &MI);
 
@@ -79,11 +71,6 @@ public:
   /// Does this instruction rename an FPR without modifying bits?
   static bool isFPRCopy(const MachineInstr &MI);
 
-  /// Return true if this is load/store scales or extends its register offset.
-  /// This refers to scaling a dynamic index as opposed to scaled immediates.
-  /// MI should be a memory op that allows scaled addressing.
-  static bool isScaledAddr(const MachineInstr &MI);
-
   /// Return true if pairing the given load or store is hinted to be
   /// unprofitable.
   static bool isLdStPairSuppressed(const MachineInstr &MI);
@@ -110,13 +97,13 @@ public:
   /// Hint that pairing the given load or store is unprofitable.
   static void suppressLdStPair(MachineInstr &MI);
 
-  bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
-                             int64_t &Offset,
-                             const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp,
+                               int64_t &Offset,
+                               const TargetRegisterInfo *TRI) const override;
 
-  bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg,
-                                  int64_t &Offset, unsigned &Width,
-                                  const TargetRegisterInfo *TRI) const;
+  bool getMemOperandWithOffsetWidth(MachineInstr &MI, MachineOperand *&BaseOp,
+                                    int64_t &Offset, unsigned &Width,
+                                    const TargetRegisterInfo *TRI) const;
 
   /// Return the immediate offset of the base register in a load/store \p LdSt.
   MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
@@ -128,8 +115,7 @@ public:
   bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
                     int64_t &MinOffset, int64_t &MaxOffset) const;
 
-  bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
-                           MachineInstr &SecondLdSt, unsigned BaseReg2,
+  bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
                            unsigned NumLoads) const override;
 
   void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -189,6 +175,10 @@ public:
                     unsigned FalseReg) const override;
   void getNoop(MCInst &NopInst) const override;
 
+  bool isSchedulingBoundary(const MachineInstr &MI,
+                            const MachineBasicBlock *MBB,
+                            const MachineFunction &MF) const override;
+
   /// analyzeCompare - For a comparison instruction, return the source registers
   /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
   /// Return true if the comparison instruction can be analyzed.
@@ -242,7 +232,8 @@ public:
       std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
   outliner::InstrType
   getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
-  unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const override;
+  bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+                              unsigned &Flags) const override;
   void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
                           const outliner::OutlinedFunction &OF) const override;
   MachineBasicBlock::iterator
@@ -250,15 +241,15 @@ public:
                      MachineBasicBlock::iterator &It, MachineFunction &MF,
                      const outliner::Candidate &C) const override;
   bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
-  /// Returns true if the instruction sets to an immediate value that can be
-  /// executed more efficiently.
-  bool isExynosResetFast(const MachineInstr &MI) const;
-  /// Returns true if the instruction has a shift left that can be executed
-  /// more efficiently.
-  bool isExynosShiftLeftFast(const MachineInstr &MI) const;
   /// Returns true if the instruction has a shift by immediate that can be
   /// executed in one cycle less.
-  bool isFalkorShiftExtFast(const MachineInstr &MI) const;
+  static bool isFalkorShiftExtFast(const MachineInstr &MI);
+  /// Return true if the instructions is a SEH instruciton used for unwinding
+  /// on Windows.
+  static bool isSEHInstruction(const MachineInstr &MI);
+
+#define GET_INSTRINFO_HELPER_DECLS
+#include "AArch64GenInstrInfo.inc"
 
 private:
   /// Sets the offsets on outlined instructions in \p MBB which use SP
@@ -286,7 +277,7 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                      const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                      int Offset, const TargetInstrInfo *TII,
                      MachineInstr::MIFlag = MachineInstr::NoFlags,
-                     bool SetNZCV = false);
+                     bool SetNZCV = false,  bool NeedsWinCFI = false);
 
 /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
 /// FP. Return false if the offset could not be handled directly in MI, and
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d6b8bb5d89c7..c24b8b36441b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,56 @@ def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
                                  AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
 def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
                                  AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
+                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+def HasVH            : Predicate<"Subtarget->hasVH()">,
+                       AssemblerPredicate<"FeatureVH", "vh">;
+
+def HasLOR           : Predicate<"Subtarget->hasLOR()">,
+                       AssemblerPredicate<"FeatureLOR", "lor">;
+
+def HasPA            : Predicate<"Subtarget->hasPA()">,
+                       AssemblerPredicate<"FeaturePA", "pa">;
+
+def HasJS            : Predicate<"Subtarget->hasJS()">,
+                       AssemblerPredicate<"FeatureJS", "jsconv">;
+
+def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
+                       AssemblerPredicate<"FeatureCCIDX", "ccidx">;
+
+def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
+                       AssemblerPredicate<"FeatureComplxNum", "complxnum">;
+
+def HasNV            : Predicate<"Subtarget->hasNV()">,
+                       AssemblerPredicate<"FeatureNV", "nv">;
+
+def HasRASv8_4       : Predicate<"Subtarget->hasRASv8_4()">,
+                       AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">;
+
+def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
+                       AssemblerPredicate<"FeatureMPAM", "mpam">;
+
+def HasDIT           : Predicate<"Subtarget->hasDIT()">,
+                       AssemblerPredicate<"FeatureDIT", "dit">;
+
+def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
+                       AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">;
+
+def HasAM            : Predicate<"Subtarget->hasAM()">,
+                       AssemblerPredicate<"FeatureAM", "am">;
+
+def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
+                       AssemblerPredicate<"FeatureSEL2", "sel2">;
+
+def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
+                       AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
+
+def HasFMI           : Predicate<"Subtarget->hasFMI()">,
+                       AssemblerPredicate<"FeatureFMI", "fmi">;
+
+def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
+                       AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">;
+
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
@@ -49,6 +99,8 @@ def HasRDM           : Predicate<"Subtarget->hasRDM()">,
 def HasPerfMon       : Predicate<"Subtarget->hasPerfMon()">;
 def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
                                  AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
+                                 AssemblerPredicate<"FeatureFP16FML", "fp16fml">;
 def HasSPE           : Predicate<"Subtarget->hasSPE()">,
                                  AssemblerPredicate<"FeatureSPE", "spe">;
 def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
@@ -58,7 +110,20 @@ def HasSVE           : Predicate<"Subtarget->hasSVE()">,
                                  AssemblerPredicate<"FeatureSVE", "sve">;
 def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
                                  AssemblerPredicate<"FeatureRCPC", "rcpc">;
-
+def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
+                       AssemblerPredicate<"FeatureAltFPCmp", "altnzcv">;
+def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
+                       AssemblerPredicate<"FeatureFRInt3264", "frint3264">;
+def HasSB            : Predicate<"Subtarget->hasSB()">,
+                       AssemblerPredicate<"FeatureSB", "sb">;
+def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
+                       AssemblerPredicate<"FeaturePredRes", "predres">;
+def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
+                       AssemblerPredicate<"FeatureCacheDeepPersist", "ccdp">;
+def HasBTI           : Predicate<"Subtarget->hasBTI()">,
+                       AssemblerPredicate<"FeatureBranchTargetId", "bti">;
+def HasMTE           : Predicate<"Subtarget->hasMTE()">,
+                       AssemblerPredicate<"FeatureMTE", "mte">;
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def UseAlternateSExtLoadCVTF32
@@ -174,6 +239,7 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
 
 // Node definitions.
 def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
+def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
 def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
 def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
 def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
@@ -342,6 +408,9 @@ let RecomputePerFunction = 1 in {
   def NotForCodeSize   : Predicate<"!MF->getFunction().optForSize()">;
   // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
   def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
+
+  def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
+  def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
 }
 
 include "AArch64InstrFormats.td"
@@ -427,6 +496,38 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
 def : Pat<(AArch64LOADgot tconstpool:$addr),
           (LOADgot tconstpool:$addr)>;
 
+// 32-bit jump table destination is actually only 2 instructions since we can
+// use the table itself as a PC-relative base. But optimization occurs after
+// branch relaxation so be pessimistic.
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
+def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+                      Sched<[]>;
+def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+                      Sched<[]>;
+def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+                     Sched<[]>;
+}
+
+// Space-consuming pseudo to aid testing of placement and reachability
+// algorithms. Immediate operand is the number of bytes this "instruction"
+// occupies; register operands can be used to enforce dependency and constrain
+// the scheduler.
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
+                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
+            Sched<[]>;
+
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
+  def SpeculationSafeValueX
+      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
+  def SpeculationSafeValueW
+      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
+}
+
+
 //===----------------------------------------------------------------------===//
 // System instructions.
 //===----------------------------------------------------------------------===//
@@ -440,6 +541,8 @@ def : InstAlias<"sev",  (HINT 0b100)>;
 def : InstAlias<"sevl", (HINT 0b101)>;
 def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
 def : InstAlias<"csdb", (HINT 20)>;
+def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
+def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
 
 // v8.2a Statistical Profiling extension
 def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
@@ -463,11 +566,11 @@ def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
 def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
   let CRm        = 0b0010;
   let Inst{12}   = 0;
-  let Predicates = [HasV8_4a];
+  let Predicates = [HasTRACEV8_4];
 }
 }
 
-// ARMv8.2 Dot Product
+// ARMv8.2-A Dot Product
 let Predicates = [HasDotProd] in {
 defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>;
 defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>;
@@ -475,6 +578,18 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
 defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
 }
 
+// ARMv8.2-A FP16 Fused Multiply-Add Long
+let Predicates = [HasNEON, HasFP16FML] in {
+defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
+defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
+defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
+defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
+defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
+defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
+defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
+defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
+}
+
 // Armv8.2-A Crypto extensions
 let Predicates = [HasSHA3] in {
 def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
@@ -543,7 +658,7 @@ let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
 }
 
 // These pointer authentication isntructions require armv8.3a
-let Predicates = [HasV8_3a] in {
+let Predicates = [HasPA] in {
   multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
     def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
     def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
@@ -583,17 +698,17 @@ let Predicates = [HasV8_3a] in {
   defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
   defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
 
-  // v8.3a floating point conversion for javascript
-  let Predicates = [HasV8_3a, HasFPARMv8] in
-  def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
-                                        "fjcvtzs", []> {
-    let Inst{31} = 0;
-  }
+}
 
-} // HasV8_3a
+// v8.3a floating point conversion for javascript
+let Predicates = [HasJS, HasFPARMv8] in
+def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
+                                      "fjcvtzs", []> {
+  let Inst{31} = 0;
+} // HasJS, HasFPARMv8
 
 // v8.4 Flag manipulation instructions
-let Predicates = [HasV8_4a] in {
+let Predicates = [HasFMI] in {
 def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
   let Inst{20-5} = 0b0000001000000000;
 }
@@ -601,10 +716,39 @@ def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
 def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
 def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
                         "{\t$Rn, $imm, $mask}">;
-} // HasV8_4a
+} // HasFMI
+
+// v8.5 flag manipulation instructions
+let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
+
+def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
+  let Inst{18-16} = 0b000;
+  let Inst{11-8} = 0b0000;
+  let Unpredictable{11-8} = 0b1111;
+  let Inst{7-5} = 0b001;
+}
+
+def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
+  let Inst{18-16} = 0b000;
+  let Inst{11-8} = 0b0000;
+  let Unpredictable{11-8} = 0b1111;
+  let Inst{7-5} = 0b010;
+}
+} // HasAltNZCV
+
+
+// Armv8.5-A speculation barrier
+def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
+  let Inst{20-5} = 0b0001100110000111;
+  let Unpredictable{11-8} = 0b1111;
+  let Predicates = [HasSB];
+  let hasSideEffects = 1;
+}
 
 def : InstAlias<"clrex", (CLREX 0xf)>;
 def : InstAlias<"isb", (ISB 0xf)>;
+def : InstAlias<"ssbb", (DSB 0)>;
+def : InstAlias<"pssbb", (DSB 4)>;
 
 def MRS    : MRSI;
 def MSR    : MSRI;
@@ -1076,6 +1220,50 @@ defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
 defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
 defm : STOPregister<"stumin","LDUMIN">;// STUMINx
 
+// v8.5 Memory Tagging Extension
+let Predicates = [HasMTE] in {
+
+def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>,
+            Sched<[]>{
+  let Inst{31} = 1;
+}
+def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{
+  let Inst{31} = 1;
+  let isNotDuplicable = 1;
+}
+def ADDG  : AddSubG<0, "addg", null_frag>;
+def SUBG  : AddSubG<1, "subg", null_frag>;
+
+def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
+
+def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>;
+def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
+  let Defs = [NZCV];
+}
+
+def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
+
+def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
+def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
+
+def LDGV : MemTagVector<1, "ldgv", "\t$Rt, [$Rn]!",
+                   (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn)> {
+  let DecoderMethod = "DecodeLoadAllocTagArrayInstruction";
+}
+def STGV : MemTagVector<0, "stgv", "\t$Rt, [$Rn]!",
+                   (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn)>;
+
+defm STG   : MemTagStore<0b00, "stg">;
+defm STZG  : MemTagStore<0b01, "stzg">;
+defm ST2G  : MemTagStore<0b10, "st2g">;
+defm STZ2G : MemTagStore<0b11, "stz2g">;
+
+defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
+def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
+def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
+
+} // Predicates = [HasMTE]
+
 //===----------------------------------------------------------------------===//
 // Logical instructions.
 //===----------------------------------------------------------------------===//
@@ -1383,7 +1571,8 @@ def : InstAlias<"cneg $dst, $src, $cc",
 //===----------------------------------------------------------------------===//
 let isReMaterializable = 1 in {
 let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
-def ADR  : ADRI<0, "adr", adrlabel, []>;
+def ADR  : ADRI<0, "adr", adrlabel,
+                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
 } // hasSideEffects = 0
 
 def ADRP : ADRI<1, "adrp", adrplabel,
@@ -1391,6 +1580,10 @@ def ADRP : ADRI<1, "adrp", adrplabel,
 } // isReMaterializable = 1
 
 // page address of a constant pool entry, block address
+def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
+def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
+def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
+def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
 def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
 def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
 def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
@@ -1434,6 +1627,10 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
   let AsmString = ".tlsdesccall $sym";
 }
 
+// Pseudo instruction to tell the streamer to emit a 'B' character into the
+// augmentation string.
+def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
+
 // FIXME: maybe the scratch register used shouldn't be fixed to X1?
 // FIXME: can "hasSideEffects be dropped?
 let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
@@ -1493,6 +1690,8 @@ def : InstAlias<"dcps1", (DCPS1 0)>;
 def : InstAlias<"dcps2", (DCPS2 0)>;
 def : InstAlias<"dcps3", (DCPS3 0)>;
 
+def UDF : UDFType<0, "udf">;
+
 //===----------------------------------------------------------------------===//
 // Load instructions.
 //===----------------------------------------------------------------------===//
@@ -1883,14 +2082,37 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
 
 //---
 // (literal)
-def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr">;
-def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr">;
-def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr">;
-def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr">;
-def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr">;
+
+def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
+  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
+    const DataLayout &DL = MF->getDataLayout();
+    unsigned Align = G->getGlobal()->getPointerAlignment(DL);
+    return Align >= 4 && G->getOffset() % 4 == 0;
+  }
+  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
+    return C->getAlignment() >= 4 && C->getOffset() % 4 == 0;
+  return false;
+}]>;
+
+def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
+  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
+def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
+  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
+def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
+  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
+def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
+  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
+def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
+  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
 
 // load sign-extended word
-def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw">;
+def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
+  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
+
+let AddedComplexity = 20 in {
+def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
+        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
+}
 
 // prefetch
 def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
@@ -2467,8 +2689,9 @@ defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
                          [(truncstorei8 GPR32z:$Rt,
                                   (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
 
-// Armv8.4 LDAPR & STLR with Immediate Offset instruction
-let Predicates = [HasV8_4a] in {
+// Armv8.4 Weaker Release Consistency enhancements
+//         LDAPR & STLR with Immediate Offset instructions
+let Predicates = [HasRCPC_IMMO] in {
 defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
 defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
 defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
@@ -2753,7 +2976,7 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
 def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
 def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
 
-let Predicates = [HasV8_1a] in {
+let Predicates = [HasLOR] in {
   // v8.1a "Limited Order Region" extension load-acquire instructions
   def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
   def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
@@ -2886,6 +3109,13 @@ let SchedRW = [WriteFDiv] in {
 defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
 }
 
+let Predicates = [HasFRInt3264] in {
+  defm FRINT32Z : FRIntNNT<0b00, "frint32z">;
+  defm FRINT64Z : FRIntNNT<0b10, "frint64z">;
+  defm FRINT32X : FRIntNNT<0b01, "frint32x">;
+  defm FRINT64X : FRIntNNT<0b11, "frint64x">;
+} // HasFRInt3264
+
 //===----------------------------------------------------------------------===//
 // Floating point two operand instructions.
 //===----------------------------------------------------------------------===//
@@ -2895,18 +3125,18 @@ let SchedRW = [WriteFDiv] in {
 defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
 }
 defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
-defm FMAX   : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMAX   : TwoOperandFPData<0b0100, "fmax", fmaximum>;
 defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
-defm FMIN   : TwoOperandFPData<0b0101, "fmin", fminnan>;
+defm FMIN   : TwoOperandFPData<0b0101, "fmin", fminimum>;
 let SchedRW = [WriteFMul] in {
 defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
 defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
 }
 defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
 
-def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
 def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
@@ -2983,6 +3213,42 @@ def F128CSEL : Pseudo<(outs FPR128:$Rd),
   let hasNoSchedulingInfo = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// Instructions used for emitting unwind opcodes on ARM64 Windows.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in {
+  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
+  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
+  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
+  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
+  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
+  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
+  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
+  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
+}
+
+// Pseudo instructions for Windows EH
+//===----------------------------------------------------------------------===//
+let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
+   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
+   let usesCustomInserter = 1 in
+     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
+                    Sched<[]>;
+}
+
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
+    usesCustomInserter = 1 in
+def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>;
 
 //===----------------------------------------------------------------------===//
 // Floating point immediate move.
@@ -3104,6 +3370,14 @@ defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
 defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
 defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
 defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
+
+let Predicates = [HasFRInt3264] in {
+  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">;
+  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">;
+  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">;
+  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">;
+} // HasFRInt3264
+
 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
 defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
 defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
@@ -3224,11 +3498,11 @@ defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
 defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
 defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
 defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
 defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
 defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
 defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
+defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
 
 // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
 // instruction expects the addend first, while the fma intrinsic puts it last.
@@ -3895,25 +4169,6 @@ defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
 defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
   UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
 
-// Patterns for smull2/umull2.
-multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
-  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
-  def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
-                           (extract_high_v16i8 V128:$Rm))),
-             (INST8B V128:$Rn, V128:$Rm)>;
-  def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
-                           (extract_high_v8i16 V128:$Rm))),
-             (INST4H V128:$Rn, V128:$Rm)>;
-  def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
-                           (extract_high_v4i32 V128:$Rm))),
-             (INST2S V128:$Rn, V128:$Rm)>;
-}
-
-defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
-  SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
-defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
-  UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
-
 // Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
 multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
   Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -4004,44 +4259,43 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd),
 
 defm EXT : SIMDBitwiseExtract<"ext">;
 
-def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
-          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
-          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-
-// We use EXT to handle extract_subvector to copy the upper 64-bits of a
-// 128-bit vector.
-def : Pat<(v8i8  (extract_subvector V128:$Rn, (i64 8))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
-          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-
+def AdjustExtImm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
+  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
+  // 128-bit vector.
+  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
+            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
+  // single 128-bit EXT.
+  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
+                              (extract_subvector V128:$Rn, (i64 N)),
+                              (i32 imm:$imm))),
+            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
+  // A 64-bit EXT of the high half of a 128-bit register can be done using a
+  // 128-bit EXT of the whole register with an adjustment to the immediate. The
+  // top half of the other operand will be unset, but that doesn't matter as it
+  // will not be used.
+  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
+                              V64:$Rm,
+                              (i32 imm:$imm))),
+            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
+                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+                                      (AdjustExtImm imm:$imm)), dsub)>;
+}
+
+defm : ExtPat<v8i8, v16i8, 8>;
+defm : ExtPat<v4i16, v8i16, 4>;
+defm : ExtPat<v4f16, v8f16, 4>;
+defm : ExtPat<v2i32, v4i32, 2>;
+defm : ExtPat<v2f32, v4f32, 2>;
+defm : ExtPat<v1i64, v2i64, 1>;
+defm : ExtPat<v1f64, v2f64, 1>;
 
 //----------------------------------------------------------------------------
 // AdvSIMD zip vector
@@ -4137,6 +4391,12 @@ def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
 def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
 def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
 
+// DUP from a 64-bit register to a 64-bit register is just a copy
+def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
+          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
+def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
+          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
+
 def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
           (v2f32 (DUPv2i32lane
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
@@ -4739,16 +4999,6 @@ def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
 def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
           (MOVID imm0_255:$shift)>;
 
-def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v8i8  immAllZerosV), (MOVID (i32 0))>;
-
-def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v8i8  immAllOnesV), (MOVID (i32 255))>;
-
 // EDIT byte mask: 2d
 
 // The movi_edit node has the immediate value already encoded, so we use
@@ -4769,6 +5019,18 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
 def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
 def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
 
+// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
+// extract is free and this gives better MachineCSE results.
+def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+
+def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
+
 // EDIT per word & halfword: 2s, 4h, 4s, & 8h
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
@@ -5770,6 +6032,41 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
 // __builtin_trap() uses the BRK instruction on AArch64.
 def : Pat<(trap), (BRK 1)>;
 
+// Multiply high patterns which multiply the lower subvector using smull/umull
+// and the upper subvector with smull2/umull2. Then shuffle the high the high
+// part of both results together.
+def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
+          (UZP2v16i8
+           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+                            (EXTRACT_SUBREG V128:$Rm, dsub)),
+           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
+          (UZP2v8i16
+           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+                             (EXTRACT_SUBREG V128:$Rm, dsub)),
+           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
+          (UZP2v4i32
+           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+                             (EXTRACT_SUBREG V128:$Rm, dsub)),
+           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
+def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
+          (UZP2v16i8
+           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
+                            (EXTRACT_SUBREG V128:$Rm, dsub)),
+           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
+def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
+          (UZP2v8i16
+           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
+                             (EXTRACT_SUBREG V128:$Rm, dsub)),
+           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
+def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
+          (UZP2v4i32
+           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
+                             (EXTRACT_SUBREG V128:$Rm, dsub)),
+           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
+
 // Conversions within AdvSIMD types in the same register size are free.
 // But because we need a consistent lane ordering, in big endian many
 // conversions require one or more REV instructions.
@@ -6481,10 +6778,24 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
                    Sched<[WriteBrReg]>;
   def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
                    Sched<[WriteBrReg]>;
+  // Indirect tail-call with any register allowed, used by MachineOutliner when
+  // this is proven safe.
+  // FIXME: If we have to add any more hacks like this, we should instead relax
+  // some verifier checks for outlined functions.
+  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
+                      Sched<[WriteBrReg]>;
+  // Indirect tail-call limited to only use registers (x16 and x17) which are
+  // allowed to tail-call a "BTI c" instruction.
+  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
+                      Sched<[WriteBrReg]>;
 }
 
 def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
-          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
+          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
+      Requires<[NotUseBTI]>;
+def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
+          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
+      Requires<[UseBTI]>;
 def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
           (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
 def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b2b500320b5c..5eb589bf66d5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -65,6 +65,16 @@ private:
   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
+  // Helper to generate an equivalent of scalar_to_vector into a new register,
+  // returned via 'Dst'.
+  bool emitScalarToVector(unsigned &Dst, const LLT DstTy,
+                          const TargetRegisterClass *DstRC, unsigned Scalar,
+                          MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          MachineRegisterInfo &MRI) const;
+  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 
   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
@@ -667,7 +677,7 @@ void AArch64InstructionSelector::materializeLargeCMVal(
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineIRBuilder MIB(I);
 
-  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass);
+  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
   MovZ->addOperand(MF, I.getOperand(1));
   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
                                      AArch64II::MO_NC);
@@ -779,16 +789,36 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     const unsigned CondReg = I.getOperand(0).getReg();
     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 
-    if (selectCompareBranch(I, MF, MRI))
+    // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+    // instructions will not be produced, as they are conditional branch
+    // instructions that do not set flags.
+    bool ProduceNonFlagSettingCondBr =
+        !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+    if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
       return true;
 
-    auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
-                   .addUse(CondReg)
-                   .addImm(/*bit offset=*/0)
-                   .addMBB(DestMBB);
+    if (ProduceNonFlagSettingCondBr) {
+      auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
+                     .addUse(CondReg)
+                     .addImm(/*bit offset=*/0)
+                     .addMBB(DestMBB);
 
-    I.eraseFromParent();
-    return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+      I.eraseFromParent();
+      return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+    } else {
+      auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
+                     .addDef(AArch64::WZR)
+                     .addUse(CondReg)
+                     .addImm(1);
+      constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
+      auto Bcc =
+          BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
+              .addImm(AArch64CC::EQ)
+              .addMBB(DestMBB);
+
+      I.eraseFromParent();
+      return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
+    }
   }
 
   case TargetOpcode::G_BRINDIRECT: {
@@ -983,6 +1013,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       materializeLargeCMVal(I, GV, OpFlags);
       I.eraseFromParent();
       return true;
+    } else if (TM.getCodeModel() == CodeModel::Tiny) {
+      I.setDesc(TII.get(AArch64::ADR));
+      I.getOperand(1).setTargetFlags(OpFlags);
     } else {
       I.setDesc(TII.get(AArch64::MOVaddr));
       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
@@ -1010,12 +1043,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     }
     unsigned MemSizeInBits = MemOp.getSize() * 8;
 
-    // FIXME: PR36018: Volatile loads in some cases are incorrectly selected by
-    // folding with an extend. Until we have a G_SEXTLOAD solution bail out if
-    // we hit one.
-    if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile())
-      return false;
-
     const unsigned PtrReg = I.getOperand(1).getReg();
 #ifndef NDEBUG
     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
@@ -1525,11 +1552,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
     }
   }
+  case TargetOpcode::G_BUILD_VECTOR:
+    return selectBuildVector(I, MRI);
+  case TargetOpcode::G_MERGE_VALUES:
+    return selectMergeValues(I, MRI);
   }
 
   return false;
 }
 
+bool AArch64InstructionSelector::emitScalarToVector(
+    unsigned &Dst, const LLT DstTy, const TargetRegisterClass *DstRC,
+    unsigned Scalar, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI) const {
+  Dst = MRI.createVirtualRegister(DstRC);
+
+  unsigned UndefVec = MRI.createVirtualRegister(DstRC);
+  MachineInstr &UndefMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                   TII.get(TargetOpcode::IMPLICIT_DEF))
+                               .addDef(UndefVec);
+
+  auto BuildFn = [&](unsigned SubregIndex) {
+    MachineInstr &InsMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                   TII.get(TargetOpcode::INSERT_SUBREG))
+                               .addDef(Dst)
+                               .addUse(UndefVec)
+                               .addUse(Scalar)
+                               .addImm(SubregIndex);
+    constrainSelectedInstRegOperands(UndefMI, TII, TRI, RBI);
+    return constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+  };
+
+  switch (DstTy.getElementType().getSizeInBits()) {
+  case 32:
+    return BuildFn(AArch64::ssub);
+  case 64:
+    return BuildFn(AArch64::dsub);
+  default:
+    return false;
+  }
+}
+
+bool AArch64InstructionSelector::selectMergeValues(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
+  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
+
+  // At the moment we only support merging two s32s into an s64.
+  if (I.getNumOperands() != 3)
+    return false;
+  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
+    return false;
+  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+  if (RB.getID() != AArch64::GPRRegBankID)
+    return false;
+
+  auto *DstRC = &AArch64::GPR64RegClass;
+  unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
+  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                    TII.get(TargetOpcode::SUBREG_TO_REG))
+                                .addDef(SubToRegDef)
+                                .addImm(0)
+                                .addUse(I.getOperand(1).getReg())
+                                .addImm(AArch64::sub_32);
+  unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
+  // Need to anyext the second scalar before we can use bfm
+  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                    TII.get(TargetOpcode::SUBREG_TO_REG))
+                                .addDef(SubToRegDef2)
+                                .addImm(0)
+                                .addUse(I.getOperand(2).getReg())
+                                .addImm(AArch64::sub_32);
+  MachineInstr &BFM =
+      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
+           .addDef(I.getOperand(0).getReg())
+           .addUse(SubToRegDef)
+           .addUse(SubToRegDef2)
+           .addImm(32)
+           .addImm(31);
+  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
+  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
+  constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectBuildVector(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+  // Until we port more of the optimized selections, for now just use a vector
+  // insert sequence.
+  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
+  unsigned EltSize = EltTy.getSizeInBits();
+  if (EltSize < 32 || EltSize > 64)
+    return false; // Don't support all element types yet.
+  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+  unsigned Opc;
+  unsigned SubregIdx;
+  if (RB.getID() == AArch64::GPRRegBankID) {
+    if (EltSize == 32) {
+      Opc = AArch64::INSvi32gpr;
+      SubregIdx = AArch64::ssub;
+    } else {
+      Opc = AArch64::INSvi64gpr;
+      SubregIdx = AArch64::dsub;
+    }
+  } else {
+    if (EltSize == 32) {
+      Opc = AArch64::INSvi32lane;
+      SubregIdx = AArch64::ssub;
+    } else {
+      Opc = AArch64::INSvi64lane;
+      SubregIdx = AArch64::dsub;
+    }
+  }
+
+  if (EltSize * DstTy.getNumElements() != 128)
+    return false; // Don't handle unpacked vectors yet.
+
+  unsigned DstVec = 0;
+  const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
+      DstTy, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+  emitScalarToVector(DstVec, DstTy, DstRC, I.getOperand(1).getReg(),
+                     *I.getParent(), I.getIterator(), MRI);
+  for (unsigned i = 2, e = DstTy.getSizeInBits() / EltSize + 1; i < e; ++i) {
+    unsigned InsDef;
+    // For the last insert re-use the dst reg of the G_BUILD_VECTOR.
+    if (i + 1 < e)
+      InsDef = MRI.createVirtualRegister(DstRC);
+    else
+      InsDef = I.getOperand(0).getReg();
+    unsigned LaneIdx = i - 1;
+    if (RB.getID() == AArch64::FPRRegBankID) {
+      unsigned ImpDef = MRI.createVirtualRegister(DstRC);
+      MachineInstr &ImpDefMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                        TII.get(TargetOpcode::IMPLICIT_DEF))
+                                    .addDef(ImpDef);
+      unsigned InsSubDef = MRI.createVirtualRegister(DstRC);
+      MachineInstr &InsSubMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                        TII.get(TargetOpcode::INSERT_SUBREG))
+                                    .addDef(InsSubDef)
+                                    .addUse(ImpDef)
+                                    .addUse(I.getOperand(i).getReg())
+                                    .addImm(SubregIdx);
+      MachineInstr &InsEltMI =
+          *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+               .addDef(InsDef)
+               .addUse(DstVec)
+               .addImm(LaneIdx)
+               .addUse(InsSubDef)
+               .addImm(0);
+      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(InsSubMI, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(InsEltMI, TII, TRI, RBI);
+      DstVec = InsDef;
+    } else {
+      MachineInstr &InsMI =
+          *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+               .addDef(InsDef)
+               .addUse(DstVec)
+               .addImm(LaneIdx)
+               .addUse(I.getOperand(i).getReg());
+      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+      DstVec = InsDef;
+    }
+  }
+  I.eraseFromParent();
+  return true;
+}
+
 /// SelectArithImmed - Select an immediate value that can be represented as
 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
 /// Val set to the 12-bit value and Shift set to the shifter operand.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 327c758a7f8e..6f7fb7a8bc21 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -48,9 +48,21 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
   const LLT v2s64 = LLT::vector(2, 64);
 
   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
-      .legalFor({p0, s1, s8, s16, s32, s64})
-      .clampScalar(0, s1, s64)
-      .widenScalarToNextPow2(0, 8);
+    .legalFor({p0, s1, s8, s16, s32, s64, v2s64})
+    .clampScalar(0, s1, s64)
+    .widenScalarToNextPow2(0, 8)
+    .fewerElementsIf(
+      [=](const LegalityQuery &Query) {
+        return Query.Types[0].isVector() &&
+          (Query.Types[0].getElementType() != s64 ||
+           Query.Types[0].getNumElements() != 2);
+      },
+      [=](const LegalityQuery &Query) {
+        LLT EltTy = Query.Types[0].getElementType();
+        if (EltTy == s64)
+          return std::make_pair(0, LLT::vector(2, 64));
+        return std::make_pair(0, EltTy);
+      });
 
   getActionDefinitionsBuilder(G_PHI)
       .legalFor({p0, s16, s32, s64})
@@ -97,6 +109,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
 
   getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
 
+  getActionDefinitionsBuilder(G_FCEIL)
+      // If we don't have full FP16 support, then widen s16 to s32 if we
+      // encounter it.
+      .widenScalarIf(
+          [=, &ST](const LegalityQuery &Query) {
+            return Query.Types[0] == s16 && !ST.hasFullFP16();
+          },
+          [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
+      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64});
+
   getActionDefinitionsBuilder(G_INSERT)
       .unsupportedIf([=](const LegalityQuery &Query) {
         return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
@@ -167,9 +189,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .unsupportedIfMemSizeNotPow2()
       // Lower any any-extending loads left into G_ANYEXT and G_LOAD
       .lowerIf([=](const LegalityQuery &Query) {
-        return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
+        return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
       })
-      .clampNumElements(0, v2s32, v2s32);
+      .clampNumElements(0, v2s32, v2s32)
+      .clampMaxNumElements(0, s64, 1);
 
   getActionDefinitionsBuilder(G_STORE)
       .legalForTypesWithMemSize({{s8, p0, 8},
@@ -185,9 +208,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .unsupportedIfMemSizeNotPow2()
       .lowerIf([=](const LegalityQuery &Query) {
         return Query.Types[0].isScalar() &&
-               Query.Types[0].getSizeInBits() != Query.MMODescrs[0].Size * 8;
+               Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
       })
-      .clampNumElements(0, v2s32, v2s32);
+      .clampNumElements(0, v2s32, v2s32)
+      .clampMaxNumElements(0, s64, 1);
 
   // Constants
   getActionDefinitionsBuilder(G_CONSTANT)
@@ -385,13 +409,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
                          });
   }
 
+  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+      .unsupportedIf([=](const LegalityQuery &Query) {
+        const LLT &EltTy = Query.Types[1].getElementType();
+        return Query.Types[0] != EltTy;
+      })
+      .minScalar(2, s64)
+      .legalIf([=](const LegalityQuery &Query) {
+        const LLT &VecTy = Query.Types[1];
+        return VecTy == v4s32 || VecTy == v2s64;
+      });
+
+  getActionDefinitionsBuilder(G_BUILD_VECTOR)
+      .legalFor({{v4s32, s32}, {v2s64, s64}})
+      .clampNumElements(0, v4s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64)
+
+      // Deal with larger scalar types, which will be implicitly truncated.
+      .legalIf([=](const LegalityQuery &Query) {
+        return Query.Types[0].getScalarSizeInBits() <
+               Query.Types[1].getSizeInBits();
+      })
+      .minScalarSameAs(1, 0);
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
 
 bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
                                           MachineRegisterInfo &MRI,
-                                          MachineIRBuilder &MIRBuilder) const {
+                                          MachineIRBuilder &MIRBuilder,
+                                          GISelChangeObserver &Observer) const {
   switch (MI.getOpcode()) {
   default:
     // No idea what to do.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
index a745b0edbc6d..77e8bdc7623c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
 
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 
 namespace llvm {
@@ -28,7 +29,8 @@ public:
   AArch64LegalizerInfo(const AArch64Subtarget &ST);
 
   bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
-                      MachineIRBuilder &MIRBuilder) const override;
+                      MachineIRBuilder &MIRBuilder,
+                      GISelChangeObserver &Observer) const override;
 
 private:
   bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 4a19ecd69103..aa732a99469c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -702,7 +702,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
             .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
             .add(BaseRegOp)
             .addImm(OffsetImm)
-            .setMemRefs(I->mergeMemRefsWith(*MergeMI))
+            .cloneMergedMemRefs({&*I, &*MergeMI})
             .setMIFlags(I->mergeFlagsWith(*MergeMI));
   (void)MIB;
 
@@ -819,7 +819,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
             .add(RegOp1)
             .add(BaseRegOp)
             .addImm(OffsetImm)
-            .setMemRefs(I->mergeMemRefsWith(*Paired))
+            .cloneMergedMemRefs({&*I, &*Paired})
             .setMIFlags(I->mergeFlagsWith(*Paired));
 
   (void)MIB;
@@ -1338,7 +1338,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
               .add(getLdStRegOp(*I))
               .add(getLdStBaseOp(*I))
               .addImm(Value)
-              .setMemRefs(I->memoperands_begin(), I->memoperands_end())
+              .setMemRefs(I->memoperands())
               .setMIFlags(I->mergeFlagsWith(*Update));
   } else {
     // Paired instruction.
@@ -1349,7 +1349,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
               .add(getLdStRegOp(*I, 1))
               .add(getLdStBaseOp(*I))
               .addImm(Value / Scale)
-              .setMemRefs(I->memoperands_begin(), I->memoperands_end())
+              .setMemRefs(I->memoperands())
               .setMIFlags(I->mergeFlagsWith(*Update));
   }
   (void)MIB;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 6c0263585933..d71359223b1b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -44,16 +45,31 @@ AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
   assert(TheTriple.isOSWindows() &&
          "Windows is the only supported COFF target");
 
-  bool IsIndirect = (TargetFlags & AArch64II::MO_DLLIMPORT);
+  bool IsIndirect = (TargetFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB));
   if (!IsIndirect)
     return Printer.getSymbol(GV);
 
   SmallString<128> Name;
-  Name = "__imp_";
+  if (TargetFlags & AArch64II::MO_DLLIMPORT)
+    Name = "__imp_";
+  else if (TargetFlags & AArch64II::MO_COFFSTUB)
+    Name = ".refptr.";
   Printer.TM.getNameWithPrefix(Name, GV,
                                Printer.getObjFileLowering().getMangler());
 
-  return Ctx.getOrCreateSymbol(Name);
+  MCSymbol *MCSym = Ctx.getOrCreateSymbol(Name);
+
+  if (TargetFlags & AArch64II::MO_COFFSTUB) {
+    MachineModuleInfoCOFF &MMICOFF =
+        Printer.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMICOFF.getGVStubEntry(MCSym);
+
+    if (!StubSym.getPointer())
+      StubSym = MachineModuleInfoImpl::StubValueTy(Printer.getSymbol(GV), true);
+  }
+
+  return MCSym;
 }
 
 MCSymbol *
@@ -173,20 +189,51 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
 
 MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
                                                      MCSymbol *Sym) const {
-  AArch64MCExpr::VariantKind RefKind = AArch64MCExpr::VK_NONE;
+  uint32_t RefFlags = 0;
+
   if (MO.getTargetFlags() & AArch64II::MO_TLS) {
     if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGEOFF)
-      RefKind = AArch64MCExpr::VK_SECREL_LO12;
+      RefFlags |= AArch64MCExpr::VK_SECREL_LO12;
     else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
              AArch64II::MO_HI12)
-      RefKind = AArch64MCExpr::VK_SECREL_HI12;
+      RefFlags |= AArch64MCExpr::VK_SECREL_HI12;
+
+  } else if (MO.getTargetFlags() & AArch64II::MO_S) {
+    RefFlags |= AArch64MCExpr::VK_SABS;
+  } else {
+    RefFlags |= AArch64MCExpr::VK_ABS;
+  }
+
+  if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
+    RefFlags |= AArch64MCExpr::VK_G3;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2)
+    RefFlags |= AArch64MCExpr::VK_G2;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1)
+    RefFlags |= AArch64MCExpr::VK_G1;
+  else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
+    RefFlags |= AArch64MCExpr::VK_G0;
+
+  // FIXME: Currently we only set VK_NC for MO_G3/MO_G2/MO_G1/MO_G0. This is
+  // because setting VK_NC for others would mean setting their respective
+  // RefFlags correctly.  We should do this in a separate patch.
+  if (MO.getTargetFlags() & AArch64II::MO_NC) {
+    auto MOFrag = (MO.getTargetFlags() & AArch64II::MO_FRAGMENT);
+    if (MOFrag == AArch64II::MO_G3 || MOFrag == AArch64II::MO_G2 ||
+        MOFrag == AArch64II::MO_G1 || MOFrag == AArch64II::MO_G0)
+      RefFlags |= AArch64MCExpr::VK_NC;
   }
+
   const MCExpr *Expr =
       MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::createAdd(
         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+
+  auto RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
+  assert(RefKind != AArch64MCExpr::VK_INVALID &&
+         "Invalid relocation requested");
   Expr = AArch64MCExpr::create(Expr, RefKind, Ctx);
+
   return MCOperand::createExpr(Expr);
 }
 
@@ -253,4 +300,17 @@ void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     if (lowerOperand(MO, MCOp))
       OutMI.addOperand(MCOp);
   }
+
+  switch (OutMI.getOpcode()) {
+  case AArch64::CATCHRET:
+    OutMI = MCInst();
+    OutMI.setOpcode(AArch64::RET);
+    OutMI.addOperand(MCOperand::createReg(AArch64::LR));
+    break;
+  case AArch64::CLEANUPRET:
+    OutMI = MCInst();
+    OutMI.setOpcode(AArch64::RET);
+    OutMI.addOperand(MCOperand::createReg(AArch64::LR));
+    break;
+  }
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index e42214d15699..5183e7d3c0d0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
 #include <cassert>
@@ -97,6 +98,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// attribute, in which case it is set to false at construction.
   Optional<bool> HasRedZone;
 
+  /// ForwardedMustTailRegParms - A list of virtual and physical registers
+  /// that must be forwarded to every musttail call.
+  SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
 public:
   AArch64FunctionInfo() = default;
 
@@ -162,6 +166,19 @@ public:
   unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
   void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
 
+  unsigned getJumpTableEntrySize(int Idx) const {
+    auto It = JumpTableEntryInfo.find(Idx);
+    if (It != JumpTableEntryInfo.end())
+      return It->second.first;
+    return 4;
+  }
+  MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
+    return JumpTableEntryInfo.find(Idx)->second.second;
+  }
+  void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
+    JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
+  }
+
   using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>;
 
   const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
@@ -196,10 +213,16 @@ public:
     LOHRelated.insert(Args.begin(), Args.end());
   }
 
+  SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
+    return ForwardedMustTailRegParms;
+  }
+
 private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;
   SetOfInstructions LOHRelated;
+
+  DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index bc0168e783be..bc596dd38b6e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -20,161 +20,175 @@ using namespace llvm;
 
 namespace {
 
-// Fuse CMN, CMP, TST followed by Bcc.
+/// CMN, CMP, TST followed by Bcc
 static bool isArithmeticBccPair(const MachineInstr *FirstMI,
                                 const MachineInstr &SecondMI) {
-  if (SecondMI.getOpcode() == AArch64::Bcc) {
-    // Assume the 1st instr to be a wildcard if it is unspecified.
-    if (!FirstMI)
-      return true;
+  if (SecondMI.getOpcode() != AArch64::Bcc)
+    return false;
 
-    switch (FirstMI->getOpcode()) {
-    case AArch64::ADDSWri:
-    case AArch64::ADDSWrr:
-    case AArch64::ADDSXri:
-    case AArch64::ADDSXrr:
-    case AArch64::ANDSWri:
-    case AArch64::ANDSWrr:
-    case AArch64::ANDSXri:
-    case AArch64::ANDSXrr:
-    case AArch64::SUBSWri:
-    case AArch64::SUBSWrr:
-    case AArch64::SUBSXri:
-    case AArch64::SUBSXrr:
-    case AArch64::BICSWrr:
-    case AArch64::BICSXrr:
-      return true;
-    case AArch64::ADDSWrs:
-    case AArch64::ADDSXrs:
-    case AArch64::ANDSWrs:
-    case AArch64::ANDSXrs:
-    case AArch64::SUBSWrs:
-    case AArch64::SUBSXrs:
-    case AArch64::BICSWrs:
-    case AArch64::BICSXrs:
-      // Shift value can be 0 making these behave like the "rr" variant...
-      return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
-    }
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if (FirstMI == nullptr)
+    return true;
+
+  switch (FirstMI->getOpcode()) {
+  case AArch64::ADDSWri:
+  case AArch64::ADDSWrr:
+  case AArch64::ADDSXri:
+  case AArch64::ADDSXrr:
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::SUBSWri:
+  case AArch64::SUBSWrr:
+  case AArch64::SUBSXri:
+  case AArch64::SUBSXrr:
+  case AArch64::BICSWrr:
+  case AArch64::BICSXrr:
+    return true;
+  case AArch64::ADDSWrs:
+  case AArch64::ADDSXrs:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXrs:
+  case AArch64::SUBSWrs:
+  case AArch64::SUBSXrs:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrs:
+    // Shift value can be 0 making these behave like the "rr" variant...
+    return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
   }
+
   return false;
 }
 
-// Fuse ALU operations followed by CBZ/CBNZ.
+/// ALU operations followed by CBZ/CBNZ.
 static bool isArithmeticCbzPair(const MachineInstr *FirstMI,
                                 const MachineInstr &SecondMI) {
-  unsigned SecondOpcode = SecondMI.getOpcode();
+  if (SecondMI.getOpcode() != AArch64::CBZW &&
+      SecondMI.getOpcode() != AArch64::CBZX &&
+      SecondMI.getOpcode() != AArch64::CBNZW &&
+      SecondMI.getOpcode() != AArch64::CBNZX)
+    return false;
 
-  if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
-      SecondOpcode == AArch64::CBZW  || SecondOpcode == AArch64::CBZX) {
-    // Assume the 1st instr to be a wildcard if it is unspecified.
-    if (!FirstMI)
-      return true;
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if (FirstMI == nullptr)
+    return true;
 
-    switch (FirstMI->getOpcode()) {
-    case AArch64::ADDWri:
-    case AArch64::ADDWrr:
-    case AArch64::ADDXri:
-    case AArch64::ADDXrr:
-    case AArch64::ANDWri:
-    case AArch64::ANDWrr:
-    case AArch64::ANDXri:
-    case AArch64::ANDXrr:
-    case AArch64::EORWri:
-    case AArch64::EORWrr:
-    case AArch64::EORXri:
-    case AArch64::EORXrr:
-    case AArch64::ORRWri:
-    case AArch64::ORRWrr:
-    case AArch64::ORRXri:
-    case AArch64::ORRXrr:
-    case AArch64::SUBWri:
-    case AArch64::SUBWrr:
-    case AArch64::SUBXri:
-    case AArch64::SUBXrr:
-      return true;
-    case AArch64::ADDWrs:
-    case AArch64::ADDXrs:
-    case AArch64::ANDWrs:
-    case AArch64::ANDXrs:
-    case AArch64::SUBWrs:
-    case AArch64::SUBXrs:
-    case AArch64::BICWrs:
-    case AArch64::BICXrs:
-      // Shift value can be 0 making these behave like the "rr" variant...
-      return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
-    }
+  switch (FirstMI->getOpcode()) {
+  case AArch64::ADDWri:
+  case AArch64::ADDWrr:
+  case AArch64::ADDXri:
+  case AArch64::ADDXrr:
+  case AArch64::ANDWri:
+  case AArch64::ANDWrr:
+  case AArch64::ANDXri:
+  case AArch64::ANDXrr:
+  case AArch64::EORWri:
+  case AArch64::EORWrr:
+  case AArch64::EORXri:
+  case AArch64::EORXrr:
+  case AArch64::ORRWri:
+  case AArch64::ORRWrr:
+  case AArch64::ORRXri:
+  case AArch64::ORRXrr:
+  case AArch64::SUBWri:
+  case AArch64::SUBWrr:
+  case AArch64::SUBXri:
+  case AArch64::SUBXrr:
+    return true;
+  case AArch64::ADDWrs:
+  case AArch64::ADDXrs:
+  case AArch64::ANDWrs:
+  case AArch64::ANDXrs:
+  case AArch64::SUBWrs:
+  case AArch64::SUBXrs:
+  case AArch64::BICWrs:
+  case AArch64::BICXrs:
+    // Shift value can be 0 making these behave like the "rr" variant...
+    return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
   }
+
   return false;
 }
 
-// Fuse AES crypto encoding or decoding.
+/// AES crypto encoding or decoding.
 static bool isAESPair(const MachineInstr *FirstMI,
                       const MachineInstr &SecondMI) {
   // Assume the 1st instr to be a wildcard if it is unspecified.
-  unsigned FirstOpcode =
-      FirstMI ? FirstMI->getOpcode()
-              : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
-  unsigned SecondOpcode = SecondMI.getOpcode();
-
+  switch (SecondMI.getOpcode()) {
   // AES encode.
-  if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
-       FirstOpcode == AArch64::AESErr) &&
-      (SecondOpcode == AArch64::AESMCrr ||
-       SecondOpcode == AArch64::AESMCrrTied))
-    return true;
+  case AArch64::AESMCrr:
+  case AArch64::AESMCrrTied:
+    return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESErr;
   // AES decode.
-  else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
-            FirstOpcode == AArch64::AESDrr) &&
-           (SecondOpcode == AArch64::AESIMCrr ||
-            SecondOpcode == AArch64::AESIMCrrTied))
+  case AArch64::AESIMCrr:
+  case AArch64::AESIMCrrTied:
+    return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESDrr;
+  }
+
+  return false;
+}
+
+/// AESE/AESD/PMULL + EOR.
+static bool isCryptoEORPair(const MachineInstr *FirstMI,
+                            const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != AArch64::EORv16i8)
+    return false;
+
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if (FirstMI == nullptr)
+    return true;
+
+  switch (FirstMI->getOpcode()) {
+  case AArch64::AESErr:
+  case AArch64::AESDrr:
+  case AArch64::PMULLv16i8:
+  case AArch64::PMULLv8i8:
+  case AArch64::PMULLv1i64:
+  case AArch64::PMULLv2i64:
     return true;
+  }
 
   return false;
 }
 
-// Fuse literal generation.
+/// Literal generation.
 static bool isLiteralsPair(const MachineInstr *FirstMI,
                            const MachineInstr &SecondMI) {
   // Assume the 1st instr to be a wildcard if it is unspecified.
-  unsigned FirstOpcode =
-      FirstMI ? FirstMI->getOpcode()
-              : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
-  unsigned SecondOpcode = SecondMI.getOpcode();
 
   // PC relative address.
-  if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
-       FirstOpcode == AArch64::ADRP) &&
-      SecondOpcode == AArch64::ADDXri)
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
+      SecondMI.getOpcode() == AArch64::ADDXri)
     return true;
+
   // 32 bit immediate.
-  else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
-            FirstOpcode == AArch64::MOVZWi) &&
-           (SecondOpcode == AArch64::MOVKWi &&
-            SecondMI.getOperand(3).getImm() == 16))
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
+      (SecondMI.getOpcode() == AArch64::MOVKWi &&
+       SecondMI.getOperand(3).getImm() == 16))
     return true;
+
   // Lower half of 64 bit immediate.
-  else if((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
-           FirstOpcode == AArch64::MOVZXi) &&
-          (SecondOpcode == AArch64::MOVKXi &&
-           SecondMI.getOperand(3).getImm() == 16))
+  if((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZXi) &&
+     (SecondMI.getOpcode() == AArch64::MOVKXi &&
+      SecondMI.getOperand(3).getImm() == 16))
     return true;
+
   // Upper half of 64 bit immediate.
-  else if ((FirstOpcode == AArch64::INSTRUCTION_LIST_END ||
-            (FirstOpcode == AArch64::MOVKXi &&
-             FirstMI->getOperand(3).getImm() == 32)) &&
-           (SecondOpcode == AArch64::MOVKXi &&
-            SecondMI.getOperand(3).getImm() == 48))
+  if ((FirstMI == nullptr ||
+       (FirstMI->getOpcode() == AArch64::MOVKXi &&
+        FirstMI->getOperand(3).getImm() == 32)) &&
+      (SecondMI.getOpcode() == AArch64::MOVKXi &&
+       SecondMI.getOperand(3).getImm() == 48))
     return true;
 
   return false;
 }
 
-// Fuse address generation and loads or stores.
+/// Fuse address generation and loads or stores.
 static bool isAddressLdStPair(const MachineInstr *FirstMI,
                               const MachineInstr &SecondMI) {
-  unsigned SecondOpcode = SecondMI.getOpcode();
-
-  switch (SecondOpcode) {
+  switch (SecondMI.getOpcode()) {
   case AArch64::STRBBui:
   case AArch64::STRBui:
   case AArch64::STRDui:
@@ -199,63 +213,164 @@ static bool isAddressLdStPair(const MachineInstr *FirstMI,
   case AArch64::LDRSHXui:
   case AArch64::LDRSWui:
     // Assume the 1st instr to be a wildcard if it is unspecified.
-    if (!FirstMI)
+    if (FirstMI == nullptr)
       return true;
 
-    switch (FirstMI->getOpcode()) {
+   switch (FirstMI->getOpcode()) {
     case AArch64::ADR:
-      return (SecondMI.getOperand(2).getImm() == 0);
+      return SecondMI.getOperand(2).getImm() == 0;
     case AArch64::ADRP:
       return true;
     }
   }
+
   return false;
 }
 
-// Fuse compare and conditional select.
+/// Compare and conditional select.
 static bool isCCSelectPair(const MachineInstr *FirstMI,
                            const MachineInstr &SecondMI) {
-  unsigned SecondOpcode = SecondMI.getOpcode();
-
   // 32 bits
-  if (SecondOpcode == AArch64::CSELWr) {
+  if (SecondMI.getOpcode() == AArch64::CSELWr) {
     // Assume the 1st instr to be a wildcard if it is unspecified.
-    if (!FirstMI)
+    if (FirstMI == nullptr)
       return true;
 
     if (FirstMI->definesRegister(AArch64::WZR))
       switch (FirstMI->getOpcode()) {
       case AArch64::SUBSWrs:
-        return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
+        return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
       case AArch64::SUBSWrx:
-        return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
+        return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
       case AArch64::SUBSWrr:
       case AArch64::SUBSWri:
         return true;
       }
   }
+
   // 64 bits
-  else if (SecondOpcode == AArch64::CSELXr) {
+  if (SecondMI.getOpcode() == AArch64::CSELXr) {
     // Assume the 1st instr to be a wildcard if it is unspecified.
-    if (!FirstMI)
+    if (FirstMI == nullptr)
       return true;
 
     if (FirstMI->definesRegister(AArch64::XZR))
       switch (FirstMI->getOpcode()) {
       case AArch64::SUBSXrs:
-        return (!AArch64InstrInfo::hasShiftedReg(*FirstMI));
+        return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
       case AArch64::SUBSXrx:
       case AArch64::SUBSXrx64:
-        return (!AArch64InstrInfo::hasExtendedReg(*FirstMI));
+        return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
       case AArch64::SUBSXrr:
       case AArch64::SUBSXri:
         return true;
       }
   }
+
+  return false;
+}
+
+// Arithmetic and logic.
+static bool isArithmeticLogicPair(const MachineInstr *FirstMI,
+                                  const MachineInstr &SecondMI) {
+  if (AArch64InstrInfo::hasShiftedReg(SecondMI))
+    return false;
+
+  switch (SecondMI.getOpcode()) {
+  // Arithmetic
+  case AArch64::ADDWrr:
+  case AArch64::ADDXrr:
+  case AArch64::SUBWrr:
+  case AArch64::SUBXrr:
+  case AArch64::ADDWrs:
+  case AArch64::ADDXrs:
+  case AArch64::SUBWrs:
+  case AArch64::SUBXrs:
+  // Logic
+  case AArch64::ANDWrr:
+  case AArch64::ANDXrr:
+  case AArch64::BICWrr:
+  case AArch64::BICXrr:
+  case AArch64::EONWrr:
+  case AArch64::EONXrr:
+  case AArch64::EORWrr:
+  case AArch64::EORXrr:
+  case AArch64::ORNWrr:
+  case AArch64::ORNXrr:
+  case AArch64::ORRWrr:
+  case AArch64::ORRXrr:
+  case AArch64::ANDWrs:
+  case AArch64::ANDXrs:
+  case AArch64::BICWrs:
+  case AArch64::BICXrs:
+  case AArch64::EONWrs:
+  case AArch64::EONXrs:
+  case AArch64::EORWrs:
+  case AArch64::EORXrs:
+  case AArch64::ORNWrs:
+  case AArch64::ORNXrs:
+  case AArch64::ORRWrs:
+  case AArch64::ORRXrs:
+    // Assume the 1st instr to be a wildcard if it is unspecified.
+    if (FirstMI == nullptr)
+      return true;
+
+    // Arithmetic
+    switch (FirstMI->getOpcode()) {
+    case AArch64::ADDWrr:
+    case AArch64::ADDXrr:
+    case AArch64::ADDSWrr:
+    case AArch64::ADDSXrr:
+    case AArch64::SUBWrr:
+    case AArch64::SUBXrr:
+    case AArch64::SUBSWrr:
+    case AArch64::SUBSXrr:
+      return true;
+    case AArch64::ADDWrs:
+    case AArch64::ADDXrs:
+    case AArch64::ADDSWrs:
+    case AArch64::ADDSXrs:
+    case AArch64::SUBWrs:
+    case AArch64::SUBXrs:
+    case AArch64::SUBSWrs:
+    case AArch64::SUBSXrs:
+      return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+    }
+    break;
+
+  // Arithmetic, setting flags.
+  case AArch64::ADDSWrr:
+  case AArch64::ADDSXrr:
+  case AArch64::SUBSWrr:
+  case AArch64::SUBSXrr:
+  case AArch64::ADDSWrs:
+  case AArch64::ADDSXrs:
+  case AArch64::SUBSWrs:
+  case AArch64::SUBSXrs:
+    // Assume the 1st instr to be a wildcard if it is unspecified.
+    if (FirstMI == nullptr)
+      return true;
+
+    // Arithmetic, not setting flags.
+    switch (FirstMI->getOpcode()) {
+    case AArch64::ADDWrr:
+    case AArch64::ADDXrr:
+    case AArch64::SUBWrr:
+    case AArch64::SUBXrr:
+      return true;
+    case AArch64::ADDWrs:
+    case AArch64::ADDXrs:
+    case AArch64::SUBWrs:
+    case AArch64::SUBXrs:
+      return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
+    }
+    break;
+  }
+
   return false;
 }
 
-/// Check if the instr pair, FirstMI and SecondMI, should be fused
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
 static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
@@ -264,18 +379,24 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
                                    const MachineInstr &SecondMI) {
   const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
 
+  // All checking functions assume that the 1st instr is a wildcard if it is
+  // unspecified.
   if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI))
     return true;
   if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
     return true;
   if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
     return true;
+  if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
+    return true;
   if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
     return true;
   if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
     return true;
   if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI))
     return true;
+  if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
+    return true;
 
   return false;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td b/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td
new file mode 100644
index 000000000000..16ba3e4282a0
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PfmCounters.td
@@ -0,0 +1,19 @@
+//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for AArch64.
+//
+//===----------------------------------------------------------------------===//
+
+def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+  let CycleCounter = CpuCyclesPfmCounter;
+}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..3da9306e6460
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -0,0 +1,108 @@
+//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+namespace {
+class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
+public:
+  AArch64PreLegalizerCombinerInfo()
+      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+                     /*LegalizerInfo*/ nullptr) {}
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+                       MachineIRBuilder &B) const override;
+};
+
+bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+                                              MachineInstr &MI,
+                                              MachineIRBuilder &B) const {
+  CombinerHelper Helper(Observer, B);
+
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_SEXTLOAD:
+  case TargetOpcode::G_ZEXTLOAD:
+    return Helper.tryCombineExtendingLoads(MI);
+  }
+
+  return false;
+}
+
+// Pass boilerplate
+// ================
+
+class AArch64PreLegalizerCombiner : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AArch64PreLegalizerCombiner();
+
+  StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+}
+
+void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  AU.setPreservesCFG();
+  getSelectionDAGFallbackAnalysisUsage(AU);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) {
+  initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  auto *TPC = &getAnalysis<TargetPassConfig>();
+  AArch64PreLegalizerCombinerInfo PCInfo;
+  Combiner C(PCInfo, TPC);
+  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AArch64PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
+                      "Combine AArch64 machine instrs before legalization",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
+                    "Combine AArch64 machine instrs before legalization", false,
+                    false)
+
+
+namespace llvm {
+FunctionPass *createAArch64PreLegalizeCombiner() {
+  return new AArch64PreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index c497669f937f..68c48a5ec216 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -389,6 +389,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTRUNC:
+  case TargetOpcode::G_FCEIL:
     return true;
   }
   return false;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index a7c2c1b8125b..96ae45ae3d0d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -42,12 +43,16 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
 const MCPhysReg *
 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
+    return CSR_Win_AArch64_AAPCS_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::GHC)
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
     return CSR_AArch64_NoRegs_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
     return CSR_AArch64_AllRegs_SaveList;
+  if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
+    return CSR_AArch64_AAVPCS_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
     return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
            CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
@@ -72,6 +77,23 @@ const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
   return nullptr;
 }
 
+void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
+    MachineFunction &MF) const {
+  const MCPhysReg *CSRs = getCalleeSavedRegs(&MF);
+  SmallVector<MCPhysReg, 32> UpdatedCSRs;
+  for (const MCPhysReg *I = CSRs; *I; ++I)
+    UpdatedCSRs.push_back(*I);
+
+  for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
+    if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
+      UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i));
+    }
+  }
+  // Register lists are zero-terminated.
+  UpdatedCSRs.push_back(0);
+  MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
+}
+
 const TargetRegisterClass *
 AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
                                        unsigned Idx) const {
@@ -97,6 +119,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
   if (CC == CallingConv::CXX_FAST_TLS)
     return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask
                : CSR_AArch64_CXX_TLS_Darwin_RegMask;
+  if (CC == CallingConv::AArch64_VectorCall)
+    return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
   if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
           ->supportSwiftError() &&
       MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
@@ -117,6 +141,30 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
   return CSR_AArch64_TLS_ELF_RegMask;
 }
 
+void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
+                                                 const uint32_t **Mask) const {
+  uint32_t *UpdatedMask = MF.allocateRegMask();
+  unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs());
+  memcpy(UpdatedMask, *Mask, sizeof(UpdatedMask[0]) * RegMaskSize);
+
+  for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
+    if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
+      for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i),
+                                   this, true);
+           SubReg.isValid(); ++SubReg) {
+        // See TargetRegisterInfo::getCallPreservedMask for how to interpret the
+        // register mask.
+        UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32);
+      }
+    }
+  }
+  *Mask = UpdatedMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const {
+  return CSR_AArch64_NoRegs_RegMask;
+}
+
 const uint32_t *
 AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
                                                 CallingConv::ID CC) const {
@@ -147,46 +195,46 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   if (TFI->hasFP(MF) || TT.isOSDarwin())
     markSuperRegs(Reserved, AArch64::W29);
 
-  if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
-    markSuperRegs(Reserved, AArch64::W18); // Platform register
-
-  if (MF.getSubtarget<AArch64Subtarget>().isX20Reserved())
-    markSuperRegs(Reserved, AArch64::W20); // Platform register
+  for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
+    if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
+      markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
+  }
 
   if (hasBasePointer(MF))
     markSuperRegs(Reserved, AArch64::W19);
 
+  // SLH uses register W16/X16 as the taint register.
+  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+    markSuperRegs(Reserved, AArch64::W16);
+
   assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
 }
 
 bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
                                       unsigned Reg) const {
-  const AArch64FrameLowering *TFI = getFrameLowering(MF);
+  return getReservedRegs(MF)[Reg];
+}
 
-  switch (Reg) {
-  default:
-    break;
-  case AArch64::SP:
-  case AArch64::XZR:
-  case AArch64::WSP:
-  case AArch64::WZR:
-    return true;
-  case AArch64::X18:
-  case AArch64::W18:
-    return MF.getSubtarget<AArch64Subtarget>().isX18Reserved();
-  case AArch64::X19:
-  case AArch64::W19:
-    return hasBasePointer(MF);
-  case AArch64::X20:
-  case AArch64::W20:
-    return MF.getSubtarget<AArch64Subtarget>().isX20Reserved();
-  case AArch64::FP:
-  case AArch64::W29:
-    return TFI->hasFP(MF) || TT.isOSDarwin();
-  }
+bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
+  // FIXME: Get the list of argument registers from TableGen.
+  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
+                                          AArch64::X3, AArch64::X4, AArch64::X5,
+                                          AArch64::X6, AArch64::X7 };
+  return std::any_of(std::begin(GPRArgRegs), std::end(GPRArgRegs),
+                     [this, &MF](MCPhysReg r){return isReservedReg(MF, r);});
+}
 
-  return false;
+void AArch64RegisterInfo::emitReservedArgRegCallError(
+    const MachineFunction &MF) const {
+  const Function &F = MF.getFunction();
+  F.getContext().diagnose(DiagnosticInfoUnsupported{F, "AArch64 doesn't support"
+    " function calls if any of the argument registers is reserved."});
+}
+
+bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
+                                          unsigned PhysReg) const {
+  return !isReservedReg(MF, PhysReg);
 }
 
 bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
@@ -211,14 +259,15 @@ unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
 bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  // In the presence of variable sized objects, if the fixed stack size is
-  // large enough that referencing from the FP won't result in things being
-  // in range relatively often, we can use a base pointer to allow access
+  // In the presence of variable sized objects or funclets, if the fixed stack
+  // size is large enough that referencing from the FP won't result in things
+  // being in range relatively often, we can use a base pointer to allow access
   // from the other direction like the SP normally works.
+  //
   // Furthermore, if both variable sized objects are present, and the
   // stack needs to be dynamically re-aligned, the base pointer is the only
   // reliable way to reference the locals.
-  if (MFI.hasVarSizedObjects()) {
+  if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
     if (needsStackRealignment(MF))
       return true;
     // Conservatively estimate whether the negative offset from the frame
@@ -449,10 +498,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case AArch64::GPR64commonRegClassID:
     return 32 - 1                                   // XZR/SP
               - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
-              - MF.getSubtarget<AArch64Subtarget>()
-                    .isX18Reserved() // X18 reserved as platform register
-              - MF.getSubtarget<AArch64Subtarget>()
-                    .isX20Reserved() // X20 reserved as platform register
+              - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved()
               - hasBasePointer(MF);  // X19
   case AArch64::FPR8RegClassID:
   case AArch64::FPR16RegClassID:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 57000d37090d..c4153228a7c0 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -30,7 +30,18 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
 public:
   AArch64RegisterInfo(const Triple &TT);
 
+  // FIXME: This should be tablegen'd like getDwarfRegNum is
+  int getSEHRegNum(unsigned i) const {
+    return getEncodingValue(i);
+  }
+
   bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+  bool isAnyArgRegReserved(const MachineFunction &MF) const;
+  void emitReservedArgRegCallError(const MachineFunction &MF) const;
+
+  void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const;
+  void UpdateCustomCallPreservedMask(MachineFunction &MF,
+                                     const uint32_t **Mask) const;
 
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -54,6 +65,9 @@ public:
   // normal calls, so they need a different mask to represent this.
   const uint32_t *getTLSCallPreservedMask() const;
 
+  // Funclets on ARM64 Windows don't preserve any registers.
+  const uint32_t *getNoPreservedMask() const override;
+
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
   /// case that 'returned' is on an i64 first argument if the calling convention
   /// is one that can (partially) model this attribute with a preserved mask
@@ -69,6 +83,8 @@ public:
   const uint32_t *getWindowsStackProbePreservedMask() const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
+  bool isAsmClobberable(const MachineFunction &MF,
+                       unsigned PhysReg) const override;
   bool isConstantPhysReg(unsigned PhysReg) const override;
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
@@ -76,8 +92,6 @@ public:
   const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
   bool useFPForScavengingIndex(const MachineFunction &MF) const override;
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index bbf401b474ca..d3710cea0687 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -200,6 +200,12 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
                                                      X22, X23, X24, X25, X26,
                                                      X27, X28, FP, LR)>;
 
+// Restricted set of tail call registers, for use when branch target
+// enforcement is enabled. These are the only registers which can be used to
+// indirectly branch (not call) to the "BTI c" instruction at the start of a
+// BTI-protected function.
+def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
+
 // GPR register classes for post increment amount of vector load/store that
 // has alternate printing when Rm=31 and prints a constant immediate value
 // equal to the total number of bytes transferred.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
index ecc68aed1550..f757d53b6c1c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM1.td
@@ -62,13 +62,6 @@ def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
                                M1UnitNAL1]>; // All simple vector
 
 //===----------------------------------------------------------------------===//
-// Predicates.
-
-def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
-                                            MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M1ShiftLeftFastPred  : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
 // Coarse scheduling model.
 
 def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
@@ -85,14 +78,15 @@ def M1WriteAC : SchedWriteRes<[M1UnitALU,
 def M1WriteAD : SchedWriteRes<[M1UnitALU,
                                M1UnitC]>   { let Latency = 2;
                                              let NumMicroOps = 2; }
-def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteAA]>]>;
+def M1WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M1WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M1WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M1WriteAA]>]>;
 def M1WriteC1 : SchedWriteRes<[M1UnitC]>   { let Latency = 1; }
 def M1WriteC2 : SchedWriteRes<[M1UnitC]>   { let Latency = 2; }
 
 def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
-def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
-                                   SchedVar<NoSchedPred,          [M1WriteAC]>]>;
+def M1WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M1WriteAC]>,
+                                   SchedVar<NoSchedPred,            [M1WriteAB]>]>;
 
 def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
 def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
@@ -110,40 +104,27 @@ def M1WriteLD : SchedWriteRes<[M1UnitL,
                                            let ResourceCycles = [2, 1]; }
 def M1WriteLH : SchedWriteRes<[]>        { let Latency = 5;
                                            let NumMicroOps = 0; }
-def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M1WriteLC]>]>;
-def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M1WriteLD]>]>;
+def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteLC]>,
+                                   SchedVar<NoSchedPred,   [M1WriteL5]>]>;
 
 def M1WriteS1 : SchedWriteRes<[M1UnitS]>   { let Latency = 1; }
 def M1WriteS3 : SchedWriteRes<[M1UnitS]>   { let Latency = 3; }
 def M1WriteS4 : SchedWriteRes<[M1UnitS]>   { let Latency = 4; }
 def M1WriteSA : SchedWriteRes<[M1UnitS,
                                M1UnitFST,
-                               M1UnitS,
-                               M1UnitFST]> { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteSB : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
                                M1UnitA]>   { let Latency = 3;
                                              let NumMicroOps = 2; }
-def M1WriteSC : SchedWriteRes<[M1UnitS,
+def M1WriteSB : SchedWriteRes<[M1UnitS,
                                M1UnitFST,
                                M1UnitS,
                                M1UnitFST,
                                M1UnitA]>   { let Latency = 3;
                                              let NumMicroOps = 3; }
-def M1WriteSD : SchedWriteRes<[M1UnitS,
-                               M1UnitFST,
-                               M1UnitA]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M1WriteSE : SchedWriteRes<[M1UnitS,
+def M1WriteSC : SchedWriteRes<[M1UnitS,
                                M1UnitA]>   { let Latency = 2;
                                              let NumMicroOps = 2; }
-def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteSE]>]>;
-def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M1WriteSB]>]>;
+def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteSC]>,
+                                   SchedVar<NoSchedPred,   [M1WriteS1]>]>;
 
 def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
                                       SchedVar<NoSchedPred,   [ReadDefault]>]>;
@@ -414,9 +395,9 @@ def M1WriteVSTH    : SchedWriteRes<[M1UnitNALU,
                                     M1UnitS,
                                     M1UnitFST,
                                     M1UnitFST,
-                                    M1UnitFST]>      { let Latency = 14;
-                                                       let NumMicroOps = 4;
-                                                       let ResourceCycles = [1, 7, 1, 7, 1]; }
+                                    M1UnitFST]>     { let Latency = 14;
+                                                      let NumMicroOps = 4;
+                                                      let ResourceCycles = [1, 7, 1, 7, 1]; }
 def M1WriteVSTI    : SchedWriteRes<[M1UnitNALU,
                                     M1UnitS,
                                     M1UnitFST,
@@ -427,9 +408,17 @@ def M1WriteVSTI    : SchedWriteRes<[M1UnitNALU,
                                     M1UnitS,
                                     M1UnitFST,
                                     M1UnitFST,
-                                    M1UnitFST]>      { let Latency = 17;
-                                                       let NumMicroOps = 7;
-                                                       let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+                                    M1UnitFST]>     { let Latency = 17;
+                                                      let NumMicroOps = 7;
+                                                      let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+
+// Special cases.
+def M1WriteAES  : SchedWriteRes<[M1UnitNCRYPT]>     { let Latency = 1; }
+def M1WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M1WriteNALU1]>,
+                                     SchedVar<NoSchedPred,  [M1WriteA1]>]>;
+
+// Fast forwarding.
+def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
 
 // Branch instructions
 def : InstRW<[M1WriteB1], (instrs Bcc)>;
@@ -439,8 +428,11 @@ def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
 def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
 
 // Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteA1], (instrs COPY)>;
-def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rx(64)?$")>;
+def : InstRW<[M1WriteAX], (instregex ".+rs$")>;
+
+// Move instructions.
+def : InstRW<[M1WriteCOPY], (instrs COPY)>;
 
 // Divide and multiply instructions.
 
@@ -450,10 +442,20 @@ def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
 def : InstRW<[M1WriteLB,
               WriteLDHi,
               WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
-def : InstRW<[M1WriteLX,
-              ReadAdrBase], (instregex "^PRFMro[WX]")>;
+def : InstRW<[M1WriteLC,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[M1WriteL5,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M1WriteLC,
+              ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[M1WriteL5,
+              ReadAdrBase], (instrs PRFMroX)>;
 
 // Store instructions.
+def : InstRW<[M1WriteSC,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
 
 // FP data instructions.
 def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)[DS]r")>;
@@ -487,8 +489,10 @@ def : InstRW<[WriteVLD],    (instregex "^LDUR[BDHSQ]i")>;
 def : InstRW<[WriteVLD,
               WriteAdr],    (instregex "^LDR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVLD],    (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M1WriteLY,
-              ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteLD,
+              ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+              ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
 def : InstRW<[M1WriteLD,
               ReadAdrBase], (instregex "^LDRQro[WX]")>;
 def : InstRW<[WriteVLD,
@@ -507,14 +511,16 @@ def : InstRW<[WriteVST],    (instregex "^STUR[BDHSQ]i")>;
 def : InstRW<[WriteVST,
               WriteAdr],    (instregex "^STR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVST],    (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M1WriteSY,
-              ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
-def : InstRW<[M1WriteSB,
+def : InstRW<[M1WriteSA,
+              ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+              ReadAdrBase], (instregex "^STR[BDHS]roX")>;
+def : InstRW<[M1WriteSA,
               ReadAdrBase], (instregex "^STRQro[WX]")>;
 def : InstRW<[WriteVST],    (instregex "^STN?P[DSQ]i")>;
 def : InstRW<[WriteVST,
               WriteAdr],    (instregex "^STP[DS](post|pre)")>;
-def : InstRW<[M1WriteSC,
+def : InstRW<[M1WriteSB,
               WriteAdr],    (instregex "^STPQ(post|pre)")>;
 
 // ASIMD instructions.
@@ -608,21 +614,21 @@ def : InstRW<[M1WriteVLDE],   (instregex "LD1i(64)$")>;
 def : InstRW<[M1WriteVLDE,
               WriteAdr],      (instregex "LD1i(64)_POST$")>;
 
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(8b|4h|2s)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD],      (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[WriteVLD,
               WriteAdr],      (instregex "LD1Rv(8b|4h|2s)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD],      (instregex "LD1Rv(1d)$")>;
+def : InstRW<[WriteVLD,
               WriteAdr],      (instregex "LD1Rv(1d)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Rv(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD],      (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
               WriteAdr],      (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
 
-def : InstRW<[M1WriteL5],     (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD],      (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
               WriteAdr],      (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[M1WriteL5],     (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[M1WriteL5,
+def : InstRW<[WriteVLD],      (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
               WriteAdr],      (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
 def : InstRW<[M1WriteVLDA],   (instregex "LD1Twov(8b|4h|2s|1d)$")>;
 def : InstRW<[M1WriteVLDA,
@@ -830,8 +836,6 @@ def : InstRW<[M1WriteVSTI,
               WriteAdr],      (instregex "ST4Fourv(2d)_POST$")>;
 
 // Cryptography instructions.
-def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
-def M1ReadAES  : SchedReadAdvance<1, [M1WriteAES]>;
 def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
 def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 5e5369a5a7fe..15935088a17e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -26,9 +26,6 @@ def ExynosM3Model : SchedMachineModel {
   let CompleteModel         =   1; // Use the default model otherwise.
 
   list<Predicate> UnsupportedFeatures = [HasSVE];
-
-  // FIXME: Remove when all errors have been fixed.
-  let FullInstRWOverlapCheck = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -107,23 +104,12 @@ def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
                                M3UnitNSHF2]>;
 
 //===----------------------------------------------------------------------===//
-// Predicates.
-
-def M3BranchLinkFastPred  : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
-                                             MI->getOperand(0).isReg() &&
-                                             MI->getOperand(0).getReg() != AArch64::LR}]>;
-def M3ResetFastPred       : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
-def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
-                                              MI->getOpcode() == AArch64::EXTRXrri) &&
-                                             MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
-                                             MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
-def M3ShiftLeftFastPred   : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
-
-//===----------------------------------------------------------------------===//
 // Coarse scheduling model.
 
 def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
                                     let NumMicroOps = 1; }
+def M3WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+                                    let NumMicroOps = 0; }
 
 def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
 def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
@@ -140,15 +126,23 @@ def M3WriteAD : SchedWriteRes<[M3UnitALU,
                                              let NumMicroOps = 2; }
 def M3WriteC1 : SchedWriteRes<[M3UnitC]>   { let Latency = 1; }
 def M3WriteC2 : SchedWriteRes<[M3UnitC]>   { let Latency = 2; }
-def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred,     [M3WriteZ0]>,
-                                   SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
-                                   SchedVar<NoSchedPred,           [M3WriteAA]>]>;
+def M3WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAV : SchedWriteVariant<[SchedVar<IsCopyIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
+                                   SchedVar<ExynosLogicPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,     [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
+                                   SchedVar<NoSchedPred,              [M3WriteAA]>]>;
 
 def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
-def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
-                                   SchedVar<NoSchedPred,          [M3WriteAC]>]>;
+def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
+                                   SchedVar<NoSchedPred,            [M3WriteAB]>]>;
 
 def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
 def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
@@ -165,44 +159,38 @@ def M3WriteLC : SchedWriteRes<[M3UnitA,
 def M3WriteLD : SchedWriteRes<[M3UnitA,
                                M3UnitL]> { let Latency = 4;
                                            let NumMicroOps = 2; }
+def M3WriteLE : SchedWriteRes<[M3UnitA,
+                               M3UnitL]> { let Latency = 6;
+                                           let NumMicroOps = 2; }
 def M3WriteLH : SchedWriteRes<[]>        { let Latency = 5;
                                            let NumMicroOps = 0; }
-
-def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
-                                   SchedVar<NoSchedPred,         [M3WriteLB]>]>;
+def M3WriteLX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M3WriteL5]>,
+                                   SchedVar<NoSchedPred,         [M3WriteL4]>]>;
 
 def M3WriteS1 : SchedWriteRes<[M3UnitS]>   { let Latency = 1; }
 def M3WriteSA : SchedWriteRes<[M3UnitA,
                                M3UnitS,
-                               M3UnitFST]> { let Latency = 2;
+                               M3UnitFST]> { let Latency = 3;
                                              let NumMicroOps = 2; }
 def M3WriteSB : SchedWriteRes<[M3UnitA,
-                               M3UnitS]>   { let Latency = 1;
-                                             let NumMicroOps = 2; }
-def M3WriteSC : SchedWriteRes<[M3UnitA,
                                M3UnitS]>   { let Latency = 2;
                                              let NumMicroOps = 2; }
 
-def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteSB]>]>;
-def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
-                                   SchedVar<NoSchedPred,         [M3WriteSC]>]>;
-
-def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
-                                      SchedVar<NoSchedPred,   [ReadDefault]>]>;
+def M3ReadAdrBase : SchedReadVariant<[SchedVar<ExynosScaledIdxPred, [ReadDefault]>,
+                                      SchedVar<NoSchedPred,         [ReadDefault]>]>;
 
 // Branch instructions.
 def : SchedAlias<WriteBr, M3WriteZ0>;
-def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
+def : SchedAlias<WriteBrReg, M3WriteC1>;
 
 // Arithmetic and logical integer instructions.
-def : WriteRes<WriteI,     [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteIS,    [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteI,     M3WriteA1>;
+def : SchedAlias<WriteISReg, M3WriteA1>;
+def : SchedAlias<WriteIEReg, M3WriteA1>;
+def : SchedAlias<WriteIS,    M3WriteA1>;
 
 // Move instructions.
-def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
+def : SchedAlias<WriteImm, M3WriteA1>;
 
 // Divide and multiply instructions.
 def : WriteRes<WriteID32, [M3UnitC,
@@ -216,26 +204,23 @@ def : WriteRes<WriteIM64, [M3UnitC]>  { let Latency = 4;
                                         let ResourceCycles = [2]; }
 
 // Miscellaneous instructions.
-def : WriteRes<WriteExtr, [M3UnitALU,
-                           M3UnitALU]> { let Latency = 1;
-                                         let NumMicroOps = 2; }
+def : SchedAlias<WriteExtr, M3WriteAY>;
 
 // Addressing modes.
-def : WriteRes<WriteAdr, []> { let Latency = 1;
-                               let NumMicroOps = 0; }
+def : SchedAlias<WriteAdr,    M3WriteZ1>;
 def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;
 
 // Load instructions.
 def : SchedAlias<WriteLD, M3WriteL4>;
 def : WriteRes<WriteLDHi, []> { let Latency = 4;
                                 let NumMicroOps = 0; }
-def : SchedAlias<WriteLDIdx, M3WriteLX>;
+def : SchedAlias<WriteLDIdx, M3WriteLB>;
 
 // Store instructions.
 def : SchedAlias<WriteST,    M3WriteS1>;
 def : SchedAlias<WriteSTP,   M3WriteS1>;
 def : SchedAlias<WriteSTX,   M3WriteS1>;
-def : SchedAlias<WriteSTIdx, M3WriteSX>;
+def : SchedAlias<WriteSTIdx, M3WriteSB>;
 
 // FP data instructions.
 def : WriteRes<WriteF,    [M3UnitFADD]>  { let Latency = 2; }
@@ -245,7 +230,6 @@ def : WriteRes<WriteFDiv, [M3UnitFDIV]>  { let Latency = 12;
 def : WriteRes<WriteFMul, [M3UnitFMAC]>  { let Latency = 4; }
 
 // FP miscellaneous instructions.
-// TODO: Conversion between register files is much different.
 def : WriteRes<WriteFCvt,  [M3UnitFCVT]> { let Latency = 3; }
 def : WriteRes<WriteFImm,  [M3UnitNALU]> { let Latency = 1; }
 def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
@@ -481,11 +465,15 @@ def M3WriteVSTI    : SchedWriteRes<[M3UnitNALU,
 
 // Special cases.
 def M3WriteAES     : SchedWriteRes<[M3UnitNCRY]>  { let Latency = 1; }
+def M3WriteCOPY    : SchedWriteVariant<[SchedVar<ExynosFPPred, [M3WriteNALU1]>,
+                                        SchedVar<NoSchedPred,  [M3WriteZ0]>]>;
+def M3WriteMOVI    : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M3WriteZ0]>,
+                                        SchedVar<NoSchedPred,       [M3WriteNALU1]>]>;
+
+// Fast forwarding.
 def M3ReadAES      : SchedReadAdvance<1, [M3WriteAES]>;
 def M3ReadFMAC     : SchedReadAdvance<1, [M3WriteFMAC4,
                                           M3WriteFMAC5]>;
-def M3WriteMOVI    : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
-                                        SchedVar<NoSchedPred,     [M3WriteNALU1]>]>;
 def M3ReadNMUL     : SchedReadAdvance<1, [M3WriteNMUL3]>;
 
 // Branch instructions
@@ -496,29 +484,40 @@ def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
 def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
 
 // Arithmetic and logical integer instructions.
-def : InstRW<[M3WriteA1], (instrs COPY)>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
-def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M3WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M3WriteAV], (instrs ADDWri, ADDXri)>;
+def : InstRW<[M3WriteAW], (instrs ORRWri, ORRXri)>;
 
 // Move instructions.
-def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
-def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+def : InstRW<[M3WriteCOPY], (instrs COPY)>;
+def : InstRW<[M3WriteZ0],   (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0],   (instregex "^MOV[NZ][WX]i")>;
 
 // Divide and multiply instructions.
 
 // Miscellaneous instructions.
-def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
 
 // Load instructions.
 def : InstRW<[M3WriteLD,
               WriteLDHi,
               WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M3WriteLB,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[M3WriteLX,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M3WriteLB,
+              ReadAdrBase], (instrs PRFMroW)>;
 def : InstRW<[M3WriteLX,
-              ReadAdrBase], (instregex "^PRFMro[WX]")>;
+              ReadAdrBase], (instrs PRFMroX)>;
 
 // Store instructions.
+def : InstRW<[M3WriteSB,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
 
 // FP data instructions.
 def : InstRW<[M3WriteNSHF1],  (instregex "^FABS[DS]r")>;
@@ -555,9 +554,11 @@ def : InstRW<[WriteVLD],    (instregex "^LDUR[BDHSQ]i")>;
 def : InstRW<[WriteVLD,
               WriteAdr],    (instregex "^LDR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVLD],    (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[M3WriteLX,
-              ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
-def : InstRW<[M3WriteLB,
+def : InstRW<[M3WriteLE,
+              ReadAdrBase], (instregex "^LDR[BDHS]roW")>;
+def : InstRW<[WriteVLD,
+              ReadAdrBase], (instregex "^LDR[BDHS]roX")>;
+def : InstRW<[M3WriteLE,
               ReadAdrBase], (instregex "^LDRQro[WX]")>;
 def : InstRW<[WriteVLD,
               M3WriteLH],   (instregex "^LDN?P[DS]i")>;
@@ -575,8 +576,10 @@ def : InstRW<[WriteVST],    (instregex "^STUR[BDHSQ]i")>;
 def : InstRW<[WriteVST,
               WriteAdr],    (instregex "^STR[BDHSQ](post|pre)")>;
 def : InstRW<[WriteVST],    (instregex "^STR[BDHSQ]ui")>;
-def : InstRW<[M3WriteSY,
-              ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M3WriteSA,
+              ReadAdrBase], (instregex "^STR[BDHS]roW")>;
+def : InstRW<[WriteVST,
+              ReadAdrBase], (instregex "^STR[BDHS]roX")>;
 def : InstRW<[M3WriteSA,
               ReadAdrBase], (instregex "^STRQro[WX]")>;
 def : InstRW<[WriteVST],    (instregex "^STN?P[DSQ]i")>;
@@ -588,7 +591,7 @@ def : InstRW<[M3WriteSA,
 // ASIMD instructions.
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
 def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^((SQ)?ABS|SQNEG)v")>;
 def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
@@ -597,7 +600,6 @@ def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
 def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
-def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
 def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
 def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
 def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
@@ -668,108 +670,108 @@ def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
 // ASIMD load instructions.
 def : InstRW<[M3WriteL5],   (instregex "LD1Onev(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteL5],   (instregex "LD1Onev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDD,
-              WriteAdr],    (instregex "LD1i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD1i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
 def : InstRW<[M3WriteVLDE,
-              WriteAdr],    (instregex "LD1i(64)_POST")>;
+              M3WriteA1],   (instregex "LD1i(64)_POST")>;
 
 def : InstRW<[M3WriteL5],   (instregex "LD1Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteL5],   (instregex "LD1Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteL5,
-              WriteAdr],    (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDF,
-              WriteAdr],    (instregex "LD2Twov(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD2Twov(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDF,
-              WriteAdr],    (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDG,
-              WriteAdr],    (instregex "LD2i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD2i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
 def : InstRW<[M3WriteVLDH,
-              WriteAdr],    (instregex "LD2i(64)_POST")>;
+              M3WriteA1],   (instregex "LD2i(64)_POST")>;
 
 def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDA,
-              WriteAdr],    (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDI,
-              WriteAdr],    (instregex "LD3Threev(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD3Threev(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDI,
-              WriteAdr],    (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDJ,
-              WriteAdr],    (instregex "LD3i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD3i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
 def : InstRW<[M3WriteVLDL,
-              WriteAdr],    (instregex "LD3i(64)_POST")>;
+              M3WriteA1],   (instregex "LD3i(64)_POST")>;
 
 def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDB,
-              WriteAdr],    (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
 def : InstRW<[M3WriteVLDN,
-              WriteAdr],    (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+              M3WriteA1],   (instregex "LD4Fourv(8b|4h|2s)_POST")>;
 def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDN,
-              WriteAdr],    (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
 
 def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
 def : InstRW<[M3WriteVLDK,
-              WriteAdr],    (instregex "LD4i(8|16|32)_POST")>;
+              M3WriteA1],   (instregex "LD4i(8|16|32)_POST")>;
 def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
 def : InstRW<[M3WriteVLDM,
-              WriteAdr],    (instregex "LD4i(64)_POST")>;
+              M3WriteA1],   (instregex "LD4i(64)_POST")>;
 
 def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
+              M3WriteA1],   (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
 def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
 def : InstRW<[M3WriteVLDC,
-              WriteAdr],    (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
+              M3WriteA1],   (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
 
 // ASIMD store instructions.
 def : InstRW<[WriteVST],    (instregex "ST1Onev(8b|4h|2s|1d)$")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
new file mode 100644
index 000000000000..4d892465b3f2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -0,0 +1,1004 @@
+//=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Samsung Exynos M4 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M4 is an advanced superscalar microprocessor with a 6-wide
+// in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM4Model : SchedMachineModel {
+  let IssueWidth            =   6; // Up to 6 uops per cycle.
+  let MicroOpBufferSize     = 228; // ROB size.
+  let LoopMicroOpBufferSize =  48; // Based on the instruction queue size.
+  let LoadLatency           =   4; // Optimistic load cases.
+  let MispredictPenalty     =  16; // Minimum branch misprediction penalty.
+  let CompleteModel         =   1; // Use the default model otherwise.
+
+  list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M4.
+
+let SchedModel = ExynosM4Model in {
+
+def M4UnitA  : ProcResource<2>; // Simple integer
+def M4UnitC  : ProcResource<2>; // Simple and complex integer
+let Super =  M4UnitC, BufferSize = 1 in
+def M4UnitD  : ProcResource<1>; // Integer division (inside C0, serialized)
+let Super =  M4UnitC in
+def M4UnitE  : ProcResource<1>; // CRC (inside C0)
+def M4UnitB  : ProcResource<2>; // Branch
+def M4UnitL0 : ProcResource<1>; // Load
+def M4UnitS0 : ProcResource<1>; // Store
+def M4PipeLS : ProcResource<1>; // Load/Store
+let Super = M4PipeLS in {
+  def M4UnitL1 : ProcResource<1>;
+  def M4UnitS1 : ProcResource<1>;
+}
+def M4PipeF0 : ProcResource<1>; // FP #0
+let Super = M4PipeF0 in {
+  def M4UnitFMAC0 : ProcResource<1>; // FP multiplication
+  def M4UnitFADD0 : ProcResource<1>; // Simple FP
+  def M4UnitFCVT0 : ProcResource<1>; // FP conversion
+  def M4UnitNALU0 : ProcResource<1>; // Simple vector
+  def M4UnitNHAD  : ProcResource<1>; // Horizontal vector
+  def M4UnitNMSC  : ProcResource<1>; // FP and vector miscellanea
+  def M4UnitNMUL0 : ProcResource<1>; // Vector multiplication
+  def M4UnitNSHT0 : ProcResource<1>; // Vector shifting
+  def M4UnitNSHF0 : ProcResource<1>; // Vector shuffling
+  def M4UnitNCRY0 : ProcResource<1>; // Cryptographic
+}
+def M4PipeF1 : ProcResource<1>; // FP #1
+let Super = M4PipeF1 in {
+  def M4UnitFMAC1 : ProcResource<1>; // FP multiplication
+  def M4UnitFADD1 : ProcResource<1>; // Simple FP
+  def M4UnitFDIV0 : ProcResource<2>; // FP division (serialized)
+  def M4UnitFSQR0 : ProcResource<2>; // FP square root (serialized)
+  def M4UnitFST0  : ProcResource<1>; // FP store
+  def M4UnitNALU1 : ProcResource<1>; // Simple vector
+  def M4UnitNSHT1 : ProcResource<1>; // Vector shifting
+  def M4UnitNSHF1 : ProcResource<1>; // Vector shuffling
+}
+def M4PipeF2 : ProcResource<1>; // FP #2
+let Super = M4PipeF2 in {
+  def M4UnitFMAC2 : ProcResource<1>; // FP multiplication
+  def M4UnitFADD2 : ProcResource<1>; // Simple FP
+  def M4UnitFCVT1 : ProcResource<1>; // FP conversion
+  def M4UnitFDIV1 : ProcResource<2>; // FP division (serialized)
+  def M4UnitFSQR1 : ProcResource<2>; // FP square root (serialized)
+  def M4UnitFST1  : ProcResource<1>; // FP store
+  def M4UnitNALU2 : ProcResource<1>; // Simple vector
+  def M4UnitNMUL1 : ProcResource<1>; // Vector multiplication
+  def M4UnitNSHT2 : ProcResource<1>; // Vector shifting
+  def M4UnitNCRY1 : ProcResource<1>; // Cryptographic
+}
+
+def M4UnitALU   : ProcResGroup<[M4UnitA,
+                                M4UnitC]>;
+def M4UnitL     : ProcResGroup<[M4UnitL0,
+                                M4UnitL1]>;
+def M4UnitS     : ProcResGroup<[M4UnitS0,
+                                M4UnitS1]>;
+def M4UnitFMAC  : ProcResGroup<[M4UnitFMAC0,
+                                M4UnitFMAC1,
+                                M4UnitFMAC2]>;
+def M4UnitFMACH : ProcResGroup<[M4UnitFMAC0,
+                                M4UnitFMAC1]>;
+def M4UnitFADD  : ProcResGroup<[M4UnitFADD0,
+                                M4UnitFADD1,
+                                M4UnitFADD2]>;
+def M4UnitFADDH : ProcResGroup<[M4UnitFADD0,
+                                M4UnitFADD1]>;
+def M4UnitFCVT  : ProcResGroup<[M4UnitFCVT0,
+                                M4UnitFCVT1]>;
+def M4UnitFCVTH : ProcResGroup<[M4UnitFCVT0]>;
+def M4UnitFDIV  : ProcResGroup<[M4UnitFDIV0,
+                                M4UnitFDIV1]>;
+def M4UnitFDIVH : ProcResGroup<[M4UnitFDIV0]>;
+def M4UnitFSQR  : ProcResGroup<[M4UnitFSQR0,
+                                M4UnitFSQR1]>;
+def M4UnitFSQRH : ProcResGroup<[M4UnitFSQR0]>;
+def M4UnitFST   : ProcResGroup<[M4UnitFST0,
+                                M4UnitFST1]>;
+def M4UnitNALU  : ProcResGroup<[M4UnitNALU0,
+                                M4UnitNALU1,
+                                M4UnitNALU2]>;
+def M4UnitNALUH : ProcResGroup<[M4UnitNALU0,
+                                M4UnitNALU1]>;
+def M4UnitNMUL  : ProcResGroup<[M4UnitNMUL0,
+                                M4UnitNMUL1]>;
+def M4UnitNSHT  : ProcResGroup<[M4UnitNSHT0,
+                                M4UnitNSHT1,
+                                M4UnitNSHT2]>;
+def M4UnitNSHF  : ProcResGroup<[M4UnitNSHF0,
+                                M4UnitNSHF1]>;
+def M4UnitNSHFH : ProcResGroup<[M4UnitNSHF0]>;
+def M4UnitNCRY  : ProcResGroup<[M4UnitNCRY0,
+                                M4UnitNCRY1]>;
+
+//===----------------------------------------------------------------------===//
+// Resources details.
+
+def M4WriteZ0 : SchedWriteRes<[]> { let Latency = 0; }
+def M4WriteZ1 : SchedWriteRes<[]> { let Latency = 1;
+                                    let NumMicroOps = 0; }
+def M4WriteZ4 : SchedWriteRes<[]> { let Latency = 4;
+                                    let NumMicroOps = 0; }
+
+def M4WriteA1 : SchedWriteRes<[M4UnitALU]> { let Latency = 1; }
+def M4WriteA2 : SchedWriteRes<[M4UnitALU]> { let Latency = 2; }
+def M4WriteAA : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
+                                             let ResourceCycles = [2]; }
+def M4WriteAB : SchedWriteRes<[M4UnitALU,
+                               M4UnitC]>   { let Latency = 2;
+                                             let NumMicroOps = 2; }
+def M4WriteAC : SchedWriteRes<[M4UnitALU,
+                               M4UnitALU,
+                               M4UnitC]>   { let Latency = 3;
+                                             let NumMicroOps = 3; }
+def M4WriteAD : SchedWriteRes<[M4UnitALU,
+                               M4UnitC]>   { let Latency = 2;
+                                             let NumMicroOps = 2; }
+def M4WriteAF : SchedWriteRes<[M4UnitALU]> { let Latency = 2;
+                                             let NumMicroOps = 2; }
+def M4WriteAU : SchedWriteVariant<[SchedVar<IsCopyIdiomPred,   [M4WriteZ0]>,
+                                   SchedVar<ExynosArithPred,   [M4WriteA1]>,
+                                   SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+                                   SchedVar<NoSchedPred,       [M4WriteAA]>]>;
+def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
+                                   SchedVar<NoSchedPred,     [M4WriteAA]>]>;
+def M4WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred,   [M4WriteA1]>,
+                                   SchedVar<ExynosLogicExPred, [M4WriteA1]>,
+                                   SchedVar<NoSchedPred,       [M4WriteAA]>]>;
+def M4WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M4WriteA1]>,
+                                   SchedVar<NoSchedPred,              [M4WriteAF]>]>;
+
+def M4WriteB1 : SchedWriteRes<[M4UnitB]> { let Latency = 1; }
+def M4WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M4WriteAC]>,
+                                   SchedVar<NoSchedPred,            [M4WriteAB]>]>;
+
+def M4WriteC1 : SchedWriteRes<[M4UnitC]> { let Latency = 1; }
+def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; }
+def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4;
+                                           let ResourceCycles = [2]; }
+
+def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12; }
+def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21; }
+
+def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; }
+
+def M4WriteL4 : SchedWriteRes<[M4UnitL]> { let Latency = 4; }
+def M4WriteL5 : SchedWriteRes<[M4UnitL]> { let Latency = 5; }
+def M4WriteLA : SchedWriteRes<[M4UnitL,
+                               M4UnitL]> { let Latency = 5;
+                                           let NumMicroOps = 1; }
+def M4WriteLB : SchedWriteRes<[M4UnitA,
+                               M4UnitL]> { let Latency = 5;
+                                           let NumMicroOps = 2; }
+def M4WriteLC : SchedWriteRes<[M4UnitA,
+                               M4UnitL,
+                               M4UnitL]> { let Latency = 5;
+                                           let NumMicroOps = 2; }
+def M4WriteLD : SchedWriteRes<[M4UnitA,
+                               M4UnitL]> { let Latency = 4;
+                                           let NumMicroOps = 2; }
+def M4WriteLE : SchedWriteRes<[M4UnitA,
+                               M4UnitL]> { let Latency = 6;
+                                           let NumMicroOps = 2; }
+def M4WriteLH : SchedWriteRes<[]>        { let Latency = 5;
+                                           let NumMicroOps = 0; }
+def M4WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M4WriteL5]>,
+                                   SchedVar<NoSchedPred,   [M4WriteL4]>]>;
+
+def M4WriteS1 : SchedWriteRes<[M4UnitS]>  { let Latency = 1; }
+def M4WriteSA : SchedWriteRes<[M4UnitS0]> { let Latency = 3; }
+def M4WriteSB : SchedWriteRes<[M4UnitA,
+                               M4UnitS]>  { let Latency = 2;
+                                            let NumMicroOps = 1; }
+def M4WriteSX : SchedWriteVariant<[SchedVar<ExynosScaledIdxPred, [M4WriteSB]>,
+                                   SchedVar<NoSchedPred,         [M4WriteS1]>]>;
+
+def M4ReadAdrBase : SchedReadVariant<[SchedVar<
+                                        MCSchedPredicate<
+                                          CheckAny<
+                                            [ScaledIdxFn,
+                                             ExynosScaledIdxFn]>>, [ReadDefault]>,
+                                      SchedVar<NoSchedPred,        [ReadDefault]>]>;
+
+def M4WriteNEONA   : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitFADD]>  { let Latency = 3;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONB   : SchedWriteRes<[M4UnitNALU,
+                                    M4UnitS0]>    { let Latency = 5;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEOND   : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitFST]>   { let Latency = 6;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONH   : SchedWriteRes<[M4UnitNALU,
+                                    M4UnitS0]>    { let Latency = 5;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONI   : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitS0]>    { let Latency = 2;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONJ   : SchedWriteRes<[M4UnitNMSC,
+                                    M4UnitS0]>    { let Latency = 4; }
+def M4WriteNEONK   : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitNMSC,
+                                    M4UnitS0]>    { let Latency = 5;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONL   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
+def M4WriteNEONM   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
+def M4WriteNEONN   : SchedWriteRes<[M4UnitNMSC,
+                                    M4UnitNMSC]>  { let Latency = 5;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONO   : SchedWriteRes<[M4UnitNMSC,
+                                    M4UnitNMSC,
+                                    M4UnitNMSC]>  { let Latency = 8;
+                                                    let NumMicroOps = 3; }
+def M4WriteNEONP   : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitNMSC]>  { let Latency = 4;
+                                                    let NumMicroOps = 2; }
+def M4WriteNEONQ   : SchedWriteRes<[M4UnitNMSC,
+                                    M4UnitC]>     { let Latency = 3;
+                                                    let NumMicroOps = 1; }
+def M4WriteNEONR   : SchedWriteRes<[M4UnitFCVT0,
+                                    M4UnitS0]>    { let Latency = 4;
+                                                    let NumMicroOps = 1; }
+def M4WriteNEONV   : SchedWriteRes<[M4UnitFDIV,
+                                    M4UnitFDIV]>  { let Latency = 7;
+                                                    let ResourceCycles = [6, 6]; }
+def M4WriteNEONVH  : SchedWriteRes<[M4UnitFDIVH,
+                                    M4UnitFDIVH]> { let Latency = 7;
+                                                    let ResourceCycles = [6, 6]; }
+def M4WriteNEONW   : SchedWriteRes<[M4UnitFDIV,
+                                    M4UnitFDIV]>  { let Latency = 12;
+                                                    let ResourceCycles = [9, 9]; }
+def M4WriteNEONX   : SchedWriteRes<[M4UnitFSQR,
+                                    M4UnitFSQR]>  { let Latency = 8;
+                                                    let ResourceCycles = [7, 7]; }
+def M4WriteNEONXH  : SchedWriteRes<[M4UnitFSQRH,
+                                    M4UnitFSQRH]> { let Latency = 7;
+                                                    let ResourceCycles = [6, 6]; }
+def M4WriteNEONY   : SchedWriteRes<[M4UnitFSQR,
+                                    M4UnitFSQR]>  { let Latency = 12;
+                                                    let ResourceCycles = [9, 9]; }
+def M4WriteNEONZ   : SchedWriteVariant<[SchedVar<ExynosQFormPred, [M4WriteNEONO]>,
+                                        SchedVar<NoSchedPred,     [M4WriteNEONN]>]>;
+
+def M4WriteFADD2   : SchedWriteRes<[M4UnitFADD]>  { let Latency = 2; }
+def M4WriteFADD2H  : SchedWriteRes<[M4UnitFADDH]> { let Latency = 2; }
+
+def M4WriteFCVT2   : SchedWriteRes<[M4UnitFCVT]>  { let Latency = 2; }
+def M4WriteFCVT2A  : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 2; }
+def M4WriteFCVT2H  : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 2; }
+def M4WriteFCVT3   : SchedWriteRes<[M4UnitFCVT]>  { let Latency = 3; }
+def M4WriteFCVT3A  : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 3; }
+def M4WriteFCVT3H  : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 3; }
+def M4WriteFCVT4   : SchedWriteRes<[M4UnitFCVT]>  { let Latency = 4; }
+def M4WriteFCVT4A  : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 4; }
+def M4WriteFCVT6A  : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 6; }
+
+def M4WriteFDIV7   : SchedWriteRes<[M4UnitFDIV]>  { let Latency = 7;
+                                                    let ResourceCycles = [6]; }
+def M4WriteFDIV7H  : SchedWriteRes<[M4UnitFDIVH]> { let Latency = 7;
+                                                    let ResourceCycles = [6]; }
+def M4WriteFDIV12  : SchedWriteRes<[M4UnitFDIV]>  { let Latency = 12;
+                                                    let ResourceCycles = [9]; }
+
+def M4WriteFMAC2H  : SchedWriteRes<[M4UnitFMACH]> { let Latency = 2; }
+def M4WriteFMAC3H  : SchedWriteRes<[M4UnitFMACH]> { let Latency = 3; }
+def M4WriteFMAC3   : SchedWriteRes<[M4UnitFMAC]>  { let Latency = 3; }
+def M4WriteFMAC4   : SchedWriteRes<[M4UnitFMAC]>  { let Latency = 4; }
+def M4WriteFMAC4H  : SchedWriteRes<[M4UnitFMACH]> { let Latency = 4; }
+def M4WriteFMAC5   : SchedWriteRes<[M4UnitFMAC]>  { let Latency = 5; }
+
+def M4WriteFSQR7H  : SchedWriteRes<[M4UnitFSQRH]> { let Latency = 7;
+                                                    let ResourceCycles = [6]; }
+def M4WriteFSQR8   : SchedWriteRes<[M4UnitFSQR]>  { let Latency = 8;
+                                                    let ResourceCycles = [7]; }
+def M4WriteFSQR12  : SchedWriteRes<[M4UnitFSQR]>  { let Latency = 12;
+                                                    let ResourceCycles = [9]; }
+
+def M4WriteNALU1   : SchedWriteRes<[M4UnitNALU]>  { let Latency = 1; }
+def M4WriteNALU1H  : SchedWriteRes<[M4UnitNALUH]> { let Latency = 1; }
+
+def M4WriteNCRY1   : SchedWriteRes<[M4UnitNCRY]>  { let Latency = 1; }
+def M4WriteNCRY1A  : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 1; }
+def M4WriteNCRY3A  : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 3; }
+def M4WriteNCRY5A  : SchedWriteRes<[M4UnitNCRY]>  { let Latency = 5; }
+
+def M4WriteNHAD1   : SchedWriteRes<[M4UnitNHAD]>  { let Latency = 1; }
+def M4WriteNHAD3   : SchedWriteRes<[M4UnitNHAD]>  { let Latency = 3; }
+
+def M4WriteNMSC1   : SchedWriteRes<[M4UnitNMSC]>  { let Latency = 1; }
+def M4WriteNMSC2   : SchedWriteRes<[M4UnitNMSC]>  { let Latency = 2; }
+def M4WriteNMSC3   : SchedWriteRes<[M4UnitNMSC]>  { let Latency = 3; }
+
+def M4WriteNMUL3   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
+
+def M4WriteNSHF1   : SchedWriteRes<[M4UnitNSHF]>  { let Latency = 1; }
+def M4WriteNSHF1H  : SchedWriteRes<[M4UnitNSHFH]> { let Latency = 1; }
+def M4WriteNSHF3   : SchedWriteRes<[M4UnitNSHF]>  { let Latency = 3; }
+def M4WriteNSHFA   : SchedWriteRes<[M4UnitNSHF]>  { let Latency = 1;
+                                                    let ResourceCycles = [2]; }
+def M4WriteNSHFB   : SchedWriteRes<[M4UnitNSHF]>  { let Latency = 2;
+                                                    let NumMicroOps = 2;
+                                                    let ResourceCycles = [2]; }
+def M4WriteNSHFC   : SchedWriteRes<[M4UnitNSHF]>  { let Latency = 3;
+                                                    let NumMicroOps = 3;
+                                                    let ResourceCycles = [4]; }
+def M4WriteNSHFD   : SchedWriteRes<[M4UnitNSHF]>  { let Latency = 4;
+                                                    let NumMicroOps = 4;
+                                                    let ResourceCycles = [4]; }
+
+def M4WriteNSHT1   : SchedWriteRes<[M4UnitNSHT]>  { let Latency = 1; }
+def M4WriteNSHT2   : SchedWriteRes<[M4UnitNSHT]>  { let Latency = 2; }
+def M4WriteNSHT3   : SchedWriteRes<[M4UnitNSHT]>  { let Latency = 3; }
+def M4WriteNSHT4A  : SchedWriteRes<[M4UnitNSHT1]> { let Latency = 4; }
+
+def M4WriteVLDA    : SchedWriteRes<[M4UnitL,
+                                    M4UnitL]>     { let Latency = 5;
+                                                    let NumMicroOps = 2; }
+def M4WriteVLDB    : SchedWriteRes<[M4UnitL,
+                                    M4UnitL,
+                                    M4UnitL]>     { let Latency = 6;
+                                                    let NumMicroOps = 3; }
+def M4WriteVLDC    : SchedWriteRes<[M4UnitL,
+                                    M4UnitL,
+                                    M4UnitL,
+                                    M4UnitL]>     { let Latency = 6;
+                                                    let NumMicroOps = 4; }
+def M4WriteVLDD    : SchedWriteRes<[M4UnitL,
+                                    M4UnitNSHF]>  { let Latency = 6;
+                                                    let NumMicroOps = 2;
+                                                    let ResourceCycles = [2, 1]; }
+def M4WriteVLDF    : SchedWriteRes<[M4UnitL,
+                                    M4UnitL]>     { let Latency = 10;
+                                                    let NumMicroOps = 2;
+                                                    let ResourceCycles = [3, 3]; }
+def M4WriteVLDG    : SchedWriteRes<[M4UnitL,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF]>  { let Latency = 6;
+                                                    let NumMicroOps = 3;
+                                                    let ResourceCycles = [2, 1, 1]; }
+def M4WriteVLDI    : SchedWriteRes<[M4UnitL,
+                                    M4UnitL,
+                                    M4UnitL]>     { let Latency = 12;
+                                                    let NumMicroOps = 3;
+                                                    let ResourceCycles = [3, 3, 3]; }
+def M4WriteVLDJ    : SchedWriteRes<[M4UnitL,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF]>  { let Latency = 7;
+                                                    let NumMicroOps = 4;
+                                                    let ResourceCycles = [3, 1, 1, 1]; }
+def M4WriteVLDK    : SchedWriteRes<[M4UnitL,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF]>  { let Latency = 7;
+                                                    let NumMicroOps = 5;
+                                                    let ResourceCycles = [3, 1, 1, 1, 1]; }
+def M4WriteVLDL    : SchedWriteRes<[M4UnitL,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitL,
+                                    M4UnitNSHF]>  { let Latency = 7;
+                                                    let NumMicroOps = 5;
+                                                    let ResourceCycles = [3, 1, 1, 6, 1]; }
+def M4WriteVLDM    : SchedWriteRes<[M4UnitL,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitL,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF]>  { let Latency = 7;
+                                                    let NumMicroOps = 6;
+                                                    let ResourceCycles = [3, 1, 1, 3, 1, 1]; }
+def M4WriteVLDN    : SchedWriteRes<[M4UnitL,
+                                    M4UnitL,
+                                    M4UnitL,
+                                    M4UnitL]>     { let Latency = 14;
+                                                    let NumMicroOps = 4;
+                                                    let ResourceCycles = [3, 3, 3, 3]; }
+
+def M4WriteVST1    : SchedWriteRes<[M4UnitS,
+                                    M4UnitFST]>  { let Latency = 1;
+                                                   let NumMicroOps = 1; }
+def M4WriteVSTA    : WriteSequence<[WriteVST], 2>;
+def M4WriteVSTB    : WriteSequence<[WriteVST], 3>;
+def M4WriteVSTC    : WriteSequence<[WriteVST], 4>;
+def M4WriteVSTD    : SchedWriteRes<[M4UnitS,
+                                    M4UnitFST]>   { let Latency = 2; }
+def M4WriteVSTE    : SchedWriteRes<[M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 2;
+                                                    let NumMicroOps = 2; }
+def M4WriteVSTF    : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 4;
+                                                    let NumMicroOps = 4;
+                                                    let ResourceCycles = [1, 2, 1, 2, 1]; }
+def M4WriteVSTG    : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 5;
+                                                    let NumMicroOps = 6;
+                                                    let ResourceCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; }
+def M4WriteVSTI    : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 8;
+                                                    let NumMicroOps = 5;
+                                                    let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; }
+def M4WriteVSTJ    : SchedWriteRes<[M4UnitA,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 1;
+                                                    let NumMicroOps = 2; }
+def M4WriteVSTK    : SchedWriteRes<[M4UnitA,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 3;
+                                                    let NumMicroOps = 2; }
+def M4WriteVSTL    : SchedWriteRes<[M4UnitNSHF,
+                                    M4UnitNSHF,
+                                    M4UnitS,
+                                    M4UnitFST,
+                                    M4UnitS,
+                                    M4UnitFST]>   { let Latency = 4;
+                                                    let NumMicroOps = 4;
+                                                    let ResourceCycles = [1, 1, 2, 1, 2, 1]; }
+
+// Special cases.
+def M4WriteCOPY    : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
+                                        SchedVar<NoSchedPred,  [M4WriteZ0]>]>;
+def M4WriteMOVI    : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
+                                        SchedVar<NoSchedPred,       [M4WriteNALU1]>]>;
+def M4WriteMULL    : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
+                                        SchedVar<NoSchedPred,               [M4WriteNMUL3]>]>;
+
+// Fast forwarding.
+def M4ReadAESM1    : SchedReadAdvance<+1, [M4WriteNCRY1]>;
+def M4ReadFMACM1   : SchedReadAdvance<+1, [M4WriteFMAC4,
+                                           M4WriteFMAC4H,
+                                           M4WriteFMAC5]>;
+def M4ReadNMULM1   : SchedReadAdvance<+1, [M4WriteNMUL3]>;
+def M4ReadMULLP2   : SchedReadAdvance<-2, [M4WriteNEONM]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
+
+// Branch instructions.
+def : SchedAlias<WriteBr,    M4WriteZ0>;
+def : SchedAlias<WriteBrReg, M4WriteC1>;
+
+// Arithmetic and logical integer instructions.
+def : SchedAlias<WriteI,     M4WriteA1>;
+def : SchedAlias<WriteIEReg, M4WriteAA>; // FIXME: M4WriteAX crashes TableGen.
+def : SchedAlias<WriteISReg, M4WriteAA>; // FIXME: M4WriteAX crashes TableGen.
+def : SchedAlias<WriteIS,    M4WriteA1>;
+
+// Move instructions.
+def : SchedAlias<WriteImm, M4WriteA1>;
+
+// Divide and multiply instructions.
+def : SchedAlias<WriteID32, M4WriteD12>;
+def : SchedAlias<WriteID64, M4WriteD21>;
+def : SchedAlias<WriteIM32, M4WriteC3>;
+def : SchedAlias<WriteIM64, M4WriteCA>;
+
+// Miscellaneous instructions.
+def : SchedAlias<WriteExtr, M4WriteAY>;
+
+// Addressing modes.
+def : SchedAlias<WriteAdr,    M4WriteZ1>;
+def : SchedAlias<ReadAdrBase, M4ReadAdrBase>;
+
+// Load instructions.
+def : SchedAlias<WriteLD,    M4WriteL4>;
+def : SchedAlias<WriteLDHi,  M4WriteZ4>;
+def : SchedAlias<WriteLDIdx, M4WriteLX>;
+
+// Store instructions.
+def : SchedAlias<WriteST,    M4WriteS1>;
+def : SchedAlias<WriteSTP,   M4WriteS1>;
+def : SchedAlias<WriteSTX,   M4WriteS1>;
+def : SchedAlias<WriteSTIdx, M4WriteSX>;
+
+// FP data instructions.
+def : SchedAlias<WriteF,    M4WriteFADD2>;
+def : SchedAlias<WriteFCmp, M4WriteNMSC2>;
+def : SchedAlias<WriteFDiv, M4WriteFDIV12>;
+def : SchedAlias<WriteFMul, M4WriteFMAC3>;
+
+// FP miscellaneous instructions.
+def : SchedAlias<WriteFCvt,  M4WriteFCVT2>;
+def : SchedAlias<WriteFImm,  M4WriteNALU1>;
+def : SchedAlias<WriteFCopy, M4WriteCOPY>;
+
+// FP load instructions.
+def : SchedAlias<WriteVLD, M4WriteL5>;
+
+// FP store instructions.
+def : SchedAlias<WriteVST, M4WriteVST1>;
+
+// ASIMD FP instructions.
+def : SchedAlias<WriteV, M4WriteNALU1>;
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint,    []> { let Latency = 1; }
+def : WriteRes<WriteSys,     []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Generic fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+
+def : ReadAdvance<ReadI,       0>;
+def : ReadAdvance<ReadISReg,   0>;
+def : ReadAdvance<ReadIEReg,   0>;
+def : ReadAdvance<ReadIM,      0>;
+// TODO: The forwarding for 32 bits actually saves 2 cycles.
+def : ReadAdvance<ReadIMA,     3, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID,      0>;
+def : ReadAdvance<ReadExtrHi,  0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD,     0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model.
+
+// Branch instructions
+def : InstRW<[M4WriteB1], (instrs Bcc)>;
+def : InstRW<[M4WriteAF], (instrs BL)>;
+def : InstRW<[M4WriteBX], (instrs BLR)>;
+def : InstRW<[M4WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M4WriteAD], (instregex "^TBN?Z[WX]")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>;
+def : InstRW<[M4WriteAU], (instrs ORRWrs, ORRXrs)>;
+def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>;
+def : InstRW<[M4WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>;
+def : InstRW<[M4WriteAV], (instrs ADDWri, ADDXri, ORRWri, ORRXri)>;
+
+// Move instructions.
+def : InstRW<[M4WriteCOPY], (instrs COPY)>;
+def : InstRW<[M4WriteZ0],   (instrs ADR, ADRP)>;
+def : InstRW<[M4WriteZ0],   (instregex "^MOV[NZ][WX]i")>;
+
+// Divide and multiply instructions.
+
+// Miscellaneous instructions.
+
+// Load instructions.
+def : InstRW<[M4WriteLD,
+              WriteLDHi,
+              WriteAdr],    (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M4WriteL5,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>;
+def : InstRW<[WriteLDIdx,
+              ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>;
+def : InstRW<[M4WriteL5,
+              ReadAdrBase], (instrs PRFMroW)>;
+def : InstRW<[WriteLDIdx,
+              ReadAdrBase], (instrs PRFMroX)>;
+
+// Store instructions.
+def : InstRW<[M4WriteSB,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>;
+def : InstRW<[WriteST,
+              ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>;
+
+// FP data instructions.
+def : InstRW<[M4WriteNSHF1H], (instrs FABSHr)>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^FABS[SD]r")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[M4WriteFADD2],  (instregex "^F(ADD|SUB)[SD]rr")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.i16")>;
+def : InstRW<[M4WriteFADD2],  (instregex "^FADDPv.i(32|64)")>;
+def : InstRW<[M4WriteNEONQ],  (instregex "^FCCMPE?[HSD]rr")>;
+def : InstRW<[M4WriteNMSC2],  (instregex "^FCMPE?[HSD]r[ir]")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>;
+def : InstRW<[M4WriteFDIV7H], (instrs FDIVHrr)>;
+def : InstRW<[M4WriteFDIV7],  (instrs FDIVSrr)>;
+def : InstRW<[M4WriteFDIV12], (instrs FDIVDrr)>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>;
+def : InstRW<[M4WriteFMAC3H], (instregex "^FN?MULHrr")>;
+def : InstRW<[M4WriteFMAC3],  (instregex "^FN?MUL[SD]rr")>;
+def : InstRW<[M4WriteFMAC3H], (instrs FMULX16)>;
+def : InstRW<[M4WriteFMAC3],  (instregex "^FMULX(32|64)")>;
+def : InstRW<[M4WriteFMAC4H,
+              M4ReadFMACM1],  (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[M4WriteFMAC4,
+              M4ReadFMACM1],  (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
+def : InstRW<[M4WriteNALU1H], (instrs FNEGHr)>;
+def : InstRW<[M4WriteNALU1],  (instregex "^FNEG[SD]r")>;
+def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT.+r")>;
+def : InstRW<[M4WriteNEONH],  (instregex "^FCSEL[HSD]rrr")>;
+def : InstRW<[M4WriteFSQR7H], (instrs FSQRTHr)>;
+def : InstRW<[M4WriteFSQR8],  (instrs FSQRTSr)>;
+def : InstRW<[M4WriteFSQR12], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M4WriteFCVT2H], (instregex "^FCVTH[SD]r")>;
+def : InstRW<[M4WriteFCVT2H], (instregex "^FCVT[SD]Hr")>;
+def : InstRW<[M4WriteFCVT2],  (instregex "^FCVT[SD][SD]r")>;
+def : InstRW<[M4WriteFCVT6A], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
+def : InstRW<[M4WriteNEONR],  (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^FMOV[HSD][ir]")>;
+def : InstRW<[M4WriteSA],     (instregex "^FMOV[WX][HSD]r")>;
+def : InstRW<[M4WriteNEONJ],  (instregex "^FMOV[HSD][WX]r")>;
+def : InstRW<[M4WriteNEONI],  (instregex "^FMOVXDHighr")>;
+def : InstRW<[M4WriteNEONK],  (instregex "^FMOVDXHighr")>;
+def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
+def : InstRW<[M4WriteFCVT3],  (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^FRECPXv1")>;
+def : InstRW<[M4WriteFMAC4H,
+              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4,
+              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)S(32|64)")>;
+
+// FP load instructions.
+def : InstRW<[WriteVLD],    (instregex "^LDR[SDQ]l")>;
+def : InstRW<[WriteVLD],    (instregex "^LDUR[BHSDQ]i")>;
+def : InstRW<[WriteVLD,
+              WriteAdr],    (instregex "^LDR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVLD],    (instregex "^LDR[BHSDQ]ui")>;
+def : InstRW<[M4WriteLE,
+              ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>;
+def : InstRW<[WriteVLD,
+              ReadAdrBase], (instregex "^LDR[BHSD]roX")>;
+def : InstRW<[M4WriteLE,
+              ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[WriteVLD,
+              M4WriteLH],   (instregex "^LDN?P[SD]i")>;
+def : InstRW<[M4WriteLA,
+              M4WriteLH],   (instregex "^LDN?PQi")>;
+def : InstRW<[M4WriteL5,
+              M4WriteLH,
+              WriteAdr],    (instregex "^LDP[SD]post")>;
+def : InstRW<[M4WriteLB,
+              M4WriteLH,
+              WriteAdr],    (instrs LDPQpost)>;
+def : InstRW<[M4WriteLB,
+              M4WriteLH,
+              WriteAdr],    (instregex "^LDP[SD]pre")>;
+def : InstRW<[M4WriteLC,
+              M4WriteLH,
+              WriteAdr],    (instrs LDPQpre)>;
+
+// FP store instructions.
+def : InstRW<[WriteVST],    (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[WriteVST,
+              WriteAdr],    (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[WriteVST],    (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[M4WriteVSTJ,
+              ReadAdrBase], (instregex "^STR[BHSD]roW")>;
+def : InstRW<[M4WriteVSTK,
+              ReadAdrBase], (instrs STRQroW)>;
+def : InstRW<[WriteVST,
+              ReadAdrBase], (instregex "^STR[BHSD]roX")>;
+def : InstRW<[M4WriteVSTK,
+              ReadAdrBase], (instrs STRQroX)>;
+def : InstRW<[WriteVST],    (instregex "^STN?P[SD]i")>;
+def : InstRW<[M4WriteVSTA], (instregex "^STN?PQi")>;
+def : InstRW<[WriteVST,
+              WriteAdr],    (instregex "^STP[SD](post|pre)")>;
+def : InstRW<[M4WriteVSTJ,
+              WriteAdr],    (instregex "^STPQ(post|pre)")>;
+
+// ASIMD instructions.
+def : InstRW<[M4WriteNHAD1],  (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^ABSv")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]?ADDL?Pv")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU](ADD|SUB)[LW]v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^(SU|US)QADDv")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]RHADDv")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^SQ(ABS|NEG)v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]?ADDL?Vv")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^CMTSTv")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M4WriteNMSC2],  (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M4WriteNMUL3],  (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^ML[AS]v")>;
+def : InstRW<[M4WriteNMUL3],  (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteMULL,
+              M4ReadMULLP2],  (instregex "^(S|U|SQD)ML[AS]Lv")>;
+def : InstRW<[M4WriteMULL,
+              M4ReadMULLP2],  (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3],  (instregex "^[SU]DOT(lane)?v")>;
+def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]ADALPv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
+def : InstRW<[M4WriteNSHT1],  (instregex "^SHL[dv]")>;
+def : InstRW<[M4WriteNSHT1],  (instregex "^S[LR]I[dv]")>;
+def : InstRW<[M4WriteNSHT1],  (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M4WriteNSHT2],  (instregex "^[SU]?SHLLv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M4WriteNSHF1H], (instregex "^FABSv.f16")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^FABSv.f(32|64)")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^F(ABD|ADD|SUB)v.f16")>;
+def : InstRW<[M4WriteFADD2],  (instregex "^F(ABD|ADD|SUB)v.f(32|64)")>;
+def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.f16")>;
+def : InstRW<[M4WriteFADD2],  (instregex "^FADDPv.f(32|64)")>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M4WriteFCVT2],  (instregex "^FCVT(L|N|XN)v")>;
+def : InstRW<[M4WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[M4WriteFCVT2H], (instregex "^[SU]CVTFv.[fi]16")>;
+def : InstRW<[M4WriteFCVT2],  (instregex "^[SU]CVTFv.[fi](32|64)")>;
+def : InstRW<[M4WriteFDIV7H], (instrs FDIVv4f16)>;
+def : InstRW<[M4WriteNEONVH], (instrs FDIVv8f16)>;
+def : InstRW<[M4WriteFDIV7],  (instrs FDIVv2f32)>;
+def : InstRW<[M4WriteNEONV],  (instrs FDIVv4f32)>;
+def : InstRW<[M4WriteNEONW],  (instrs FDIVv2f64)>;
+def : InstRW<[M4WriteNMSC1],  (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[M4WriteNMSC2],  (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M4WriteNEONZ],  (instregex "^F(MAX|MIN)(NM)?Vv")>;
+def : InstRW<[M4WriteFMAC2H], (instregex "^FMULX?v.[fi]16")>;
+def : InstRW<[M4WriteFMAC3],  (instregex "^FMULX?v.[fi](32|64)")>;
+def : InstRW<[M4WriteFMAC4H,
+              M4ReadFMACM1],  (instregex "^FML[AS]v.[fi]16")>;
+def : InstRW<[M4WriteFMAC4,
+              M4ReadFMACM1],  (instregex "^FML[AS]v.[fi](32|64)")>;
+def : InstRW<[M4WriteNALU1H], (instregex "^FNEGv.f16")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^FNEGv.f(32|64)")>;
+def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[M4WriteFSQR7H], (instrs FSQRTv4f16)>;
+def : InstRW<[M4WriteNEONXH], (instrs FSQRTv8f16)>;
+def : InstRW<[M4WriteFSQR8],  (instrs FSQRTv2f32)>;
+def : InstRW<[M4WriteNEONX],  (instrs FSQRTv4f32)>;
+def : InstRW<[M4WriteNEONY],  (instrs FSQRTv2f64)>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M4WriteNALU1],  (instregex "^RBITv")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^CL[STZ]v")>;
+def : InstRW<[M4WriteNEONB],  (instregex "^DUPv.+gpr")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^CPY")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^DUPv.+lane")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^EXTv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>;
+def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>;
+def : InstRW<[M4WriteNEONB],  (instregex "^INSv.+gpr")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^INSv.+lane")>;
+def : InstRW<[M4WriteMOVI],   (instregex "^(MOV|MVN)I")>;
+def : InstRW<[M4WriteNALU1H], (instregex "^FMOVv.f16")>;
+def : InstRW<[M4WriteNALU1],  (instregex "^FMOVv.f(32|64)")>;
+def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
+def : InstRW<[M4WriteFCVT3],  (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
+def : InstRW<[M4WriteFCVT3],  (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
+def : InstRW<[M4WriteFMAC4H,
+              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4,
+              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M4WriteNSHFA],  (instregex "^TB[LX]v(8|16)i8One")>;
+def : InstRW<[M4WriteNSHFB],  (instregex "^TB[LX]v(8|16)i8Two")>;
+def : InstRW<[M4WriteNSHFC],  (instregex "^TB[LX]v(8|16)i8Three")>;
+def : InstRW<[M4WriteNSHFD],  (instregex "^TB[LX]v(8|16)i8Four")>;
+def : InstRW<[M4WriteNEONP],  (instregex "^[SU]MOVv")>;
+def : InstRW<[M4WriteNSHF1],  (instregex "^(TRN|UZP|ZIP)[12]v")>;
+
+// ASIMD load instructions.
+def : InstRW<[WriteVLD],    (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
+              M4WriteA1],   (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVLD],    (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
+              M4WriteA1],   (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDA,
+              M4WriteA1],   (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDA,
+              M4WriteA1],   (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDB,
+              M4WriteA1],   (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDB,
+              M4WriteA1],   (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDC,
+              M4WriteA1],   (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDC,
+              M4WriteA1],   (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDD], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVLDD,
+              M4WriteA1],   (instregex "LD1i(8|16|32|64)_POST$")>;
+
+def : InstRW<[WriteVLD],    (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD,
+              M4WriteA1],   (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVLD],    (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD,
+              M4WriteA1],   (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDF,
+              M4WriteA1],   (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDF,
+              M4WriteA1],   (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDG], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVLDG,
+              M4WriteA1],   (instregex "LD2i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDA,
+              M4WriteA1],   (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDA,
+              M4WriteA1],   (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDI,
+              M4WriteA1],   (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDI,
+              M4WriteA1],   (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[M4WriteVLDJ,
+              M4WriteA1],   (instregex "LD3i(8|16|32)_POST$")>;
+def : InstRW<[M4WriteVLDL], (instregex "LD3i64$")>;
+def : InstRW<[M4WriteVLDL,
+              M4WriteA1],   (instregex "LD3i64_POST$")>;
+
+def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDB,
+              M4WriteA1],   (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDB,
+              M4WriteA1],   (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVLDN,
+              M4WriteA1],   (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDN,
+              M4WriteA1],   (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVLDK], (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[M4WriteVLDK,
+              M4WriteA1],   (instregex "LD4i(8|16|32)_POST$")>;
+def : InstRW<[M4WriteVLDM], (instregex "LD4i64$")>;
+def : InstRW<[M4WriteVLDM,
+              M4WriteA1],   (instregex "LD4i64_POST$")>;
+
+def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVLDC,
+              M4WriteA1],   (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVLDC,
+              M4WriteA1],   (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store instructions.
+def : InstRW<[WriteVST],    (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVST,
+              M4WriteA1],   (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVST],    (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVST,
+              M4WriteA1],   (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTA,
+              M4WriteA1],   (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTA,
+              M4WriteA1],   (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTB,
+              M4WriteA1],   (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTB,
+              M4WriteA1],   (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M4WriteVSTC,
+              M4WriteA1],   (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTC,
+              M4WriteA1],   (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[WriteVST],    (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[WriteVST,
+              M4WriteA1],   (instregex "ST1i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTD,
+              M4WriteA1],   (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTE,
+              M4WriteA1],   (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTD], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTD,
+              M4WriteA1],   (instregex "ST2i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTF,
+              M4WriteA1],   (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTG,
+              M4WriteA1],   (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTE], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTE,
+              M4WriteA1],   (instregex "ST3i(8|16|32|64)_POST$")>;
+
+def : InstRW<[M4WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M4WriteVSTL,
+              M4WriteA1],   (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M4WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M4WriteVSTI,
+              M4WriteA1],   (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M4WriteVSTE], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[M4WriteVSTE,
+              M4WriteA1],   (instregex "ST4i(8|16|32|64)_POST$")>;
+
+// Cryptography instructions.
+def : InstRW<[M4WriteNCRY1],  (instregex "^AES[DE]")>;
+def : InstRW<[M4WriteNCRY1,
+              M4ReadAESM1],   (instregex "^AESI?MC")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^PMULv")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^PMULLv(1|8)i")>;
+def : InstRW<[M4WriteNCRY3A], (instregex "^PMULLv(2|16)i")>;
+def : InstRW<[M4WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>;
+def : InstRW<[M4WriteNCRY1A], (instrs SHA256SU0rr)>;
+def : InstRW<[M4WriteNCRY5A], (instrs SHA256SU1rrr)>;
+def : InstRW<[M4WriteNCRY5A], (instrs SHA256H2rrr)>;
+
+// CRC instructions.
+def : InstRW<[M4WriteE2], (instregex "^CRC32C?[BHWX]rr$")>;
+
+} // SchedModel = ExynosM4Model
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
new file mode 100644
index 000000000000..48c54230e9d8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -0,0 +1,157 @@
+//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Exynos processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check the shift in arithmetic and logic instructions.
+def ExynosCheckShift : CheckAny<[CheckShiftBy0,
+                                 CheckAll<
+                                   [CheckShiftLSL,
+                                    CheckAny<
+                                      [CheckShiftBy1,
+                                       CheckShiftBy2,
+                                       CheckShiftBy3]>]>]>;
+
+// Exynos predicates.
+
+// Identify BLR specifying the LR register as the indirect target register.
+def ExynosBranchLinkLRPred : MCSchedPredicate<
+                               CheckAll<[CheckOpcode<[BLR]>,
+                                         CheckRegOperand<0, LR>]>>;
+
+// Identify arithmetic instructions without or with limited extension or shift.
+def ExynosArithFn   : TIIPredicate<
+                        "isExynosArithFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             IsArithExtOp.ValidOpcodes,
+                             MCReturnStatement<
+                               CheckAny<[CheckExtBy0,
+                                         CheckAll<
+                                           [CheckAny<
+                                             [CheckExtUXTW,
+                                              CheckExtUXTX]>,
+                                            CheckAny<
+                                              [CheckExtBy1,
+                                               CheckExtBy2,
+                                               CheckExtBy3]>]>]>>>,
+                           MCOpcodeSwitchCase<
+                             IsArithShiftOp.ValidOpcodes,
+                             MCReturnStatement<ExynosCheckShift>>,
+                           MCOpcodeSwitchCase<
+                             IsArithUnshiftOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>],
+                          MCReturnStatement<FalsePred>>>;
+def ExynosArithPred : MCSchedPredicate<ExynosArithFn>;
+
+// Identify logic instructions with limited shift.
+def ExynosLogicFn   : TIIPredicate<
+                        "isExynosLogicFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             IsLogicShiftOp.ValidOpcodes,
+                             MCReturnStatement<ExynosCheckShift>>,
+                           MCOpcodeSwitchCase<
+                             IsLogicUnshiftOp.ValidOpcodes,
+                             MCReturnStatement<TruePred>>],
+                          MCReturnStatement<FalsePred>>>;
+def ExynosLogicPred : MCSchedPredicate<ExynosLogicFn>;
+
+// Identify more logic instructions with limited shift.
+def ExynosLogicExFn   : TIIPredicate<
+                          "isExynosLogicExFast",
+                          MCOpcodeSwitchStatement<
+                            [MCOpcodeSwitchCase<
+                               IsLogicShiftOp.ValidOpcodes,
+                               MCReturnStatement<
+                                 CheckAny<
+                                   [ExynosCheckShift,
+                                    CheckAll<
+                                     [CheckShiftLSL,
+                                      CheckShiftBy8]>]>>>,
+                             MCOpcodeSwitchCase<
+                               IsLogicUnshiftOp.ValidOpcodes,
+                               MCReturnStatement<TruePred>>],
+                            MCReturnStatement<FalsePred>>>;
+def ExynosLogicExPred : MCSchedPredicate<ExynosLogicExFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with a scaled non-extended register.
+def ExynosScaledIdxFn   : TIIPredicate<"isExynosScaledAddr",
+                                       MCOpcodeSwitchStatement<
+                                         [MCOpcodeSwitchCase<
+                                            IsLoadStoreRegOffsetOp.ValidOpcodes,
+                                            MCReturnStatement<
+                                              CheckAny<
+                                                [CheckMemExtSXTW,
+                                                 CheckMemExtUXTW,
+                                                 CheckMemScaled]>>>],
+                                         MCReturnStatement<FalsePred>>>;
+def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
+
+// Identify FP instructions.
+def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def ExynosLongVectorUpperFn   : TIIPredicate<
+                                  "isExynosLongVectorUpper",
+                                  MCOpcodeSwitchStatement<
+                                  [MCOpcodeSwitchCase<
+                                    IsLongVectorUpperOp.ValidOpcodes,
+                                    MCReturnStatement<TruePred>>],
+                                  MCReturnStatement<FalsePred>>>;
+def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
+
+// Identify 128-bit NEON instructions.
+def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
+
+// Identify instructions that reset a register efficiently.
+def ExynosResetFn   : TIIPredicate<
+                        "isExynosResetFast",
+                        MCOpcodeSwitchStatement<
+                          [MCOpcodeSwitchCase<
+                             [ADR, ADRP,
+                              MOVNWi, MOVNXi,
+                              MOVZWi, MOVZXi],
+                             MCReturnStatement<TruePred>>,
+                           MCOpcodeSwitchCase<
+                             [ORRWri, ORRXri],
+                             MCReturnStatement<
+                               CheckAll<
+                                 [CheckIsRegOperand<1>,
+                                  CheckAny<
+                                    [CheckRegOperand<1, WZR>,
+                                     CheckRegOperand<1, XZR>]>]>>>],
+                          MCReturnStatement<
+                            CheckAny<
+                              [IsCopyIdiomFn,
+                               IsZeroFPIdiomFn]>>>>;
+def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
+
+// Identify EXTR as the alias for ROR (immediate).
+def ExynosRotateRightImmPred : MCSchedPredicate<
+                                 CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+                                           CheckSameRegOperand<1, 2>]>>;
+
+// Identify cheap arithmetic and logic immediate instructions.
+def ExynosCheapFn : TIIPredicate<
+                      "isExynosCheapAsMove",
+                      MCOpcodeSwitchStatement<
+                        [MCOpcodeSwitchCase<
+                           IsArithLogicImmOp.ValidOpcodes,
+                           MCReturnStatement<TruePred>>],
+                        MCReturnStatement<
+                          CheckAny<
+                            [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
new file mode 100644
index 000000000000..dbaf11fc95dd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -0,0 +1,423 @@
+//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// Function mappers.
+
+// Check the extension type in arithmetic instructions.
+let FunctionMapper = "AArch64_AM::getArithExtendType" in {
+  def CheckExtUXTB                      : CheckImmOperand_s<3, "AArch64_AM::UXTB">;
+  def CheckExtUXTH                      : CheckImmOperand_s<3, "AArch64_AM::UXTH">;
+  def CheckExtUXTW                      : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+  def CheckExtUXTX                      : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+  def CheckExtSXTB                      : CheckImmOperand_s<3, "AArch64_AM::SXTB">;
+  def CheckExtSXTH                      : CheckImmOperand_s<3, "AArch64_AM::SXTH">;
+  def CheckExtSXTW                      : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+  def CheckExtSXTX                      : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for shifting in extended arithmetic instructions.
+foreach I = {0-3} in {
+  let FunctionMapper = "AArch64_AM::getArithShiftValue" in
+  def CheckExtBy#I                      : CheckImmOperand<3, I>;
+}
+
+// Check the extension type in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemExtendType" in {
+  def CheckMemExtUXTW                   : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
+  def CheckMemExtLSL                    : CheckImmOperand_s<3, "AArch64_AM::UXTX">;
+  def CheckMemExtSXTW                   : CheckImmOperand_s<3, "AArch64_AM::SXTW">;
+  def CheckMemExtSXTX                   : CheckImmOperand_s<3, "AArch64_AM::SXTX">;
+}
+
+// Check for scaling in the register offset addressing mode.
+let FunctionMapper = "AArch64_AM::getMemDoShift" in
+def CheckMemScaled                      : CheckImmOperandSimple<3>;
+
+// Check the shifting type in arithmetic and logic instructions.
+let FunctionMapper = "AArch64_AM::getShiftType" in {
+  def CheckShiftLSL                : CheckImmOperand_s<3, "AArch64_AM::LSL">;
+  def CheckShiftLSR                : CheckImmOperand_s<3, "AArch64_AM::LSR">;
+  def CheckShiftASR                : CheckImmOperand_s<3, "AArch64_AM::ASR">;
+  def CheckShiftROR                : CheckImmOperand_s<3, "AArch64_AM::ROR">;
+  def CheckShiftMSL                : CheckImmOperand_s<3, "AArch64_AM::MSL">;
+}
+
+// Check for shifting in arithmetic and logic instructions.
+foreach I = {0-3, 8} in {
+  let FunctionMapper = "AArch64_AM::getShiftValue" in
+  def CheckShiftBy#I        : CheckImmOperand<3, I>;
+}
+
+// Generic predicates.
+
+// Identify whether an instruction is the 64-bit NEON form based on its result.
+def CheckDForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, D0>,
+                                                 CheckRegOperand<0, D1>,
+                                                 CheckRegOperand<0, D2>,
+                                                 CheckRegOperand<0, D3>,
+                                                 CheckRegOperand<0, D4>,
+                                                 CheckRegOperand<0, D5>,
+                                                 CheckRegOperand<0, D6>,
+                                                 CheckRegOperand<0, D7>,
+                                                 CheckRegOperand<0, D8>,
+                                                 CheckRegOperand<0, D9>,
+                                                 CheckRegOperand<0, D10>,
+                                                 CheckRegOperand<0, D11>,
+                                                 CheckRegOperand<0, D12>,
+                                                 CheckRegOperand<0, D13>,
+                                                 CheckRegOperand<0, D14>,
+                                                 CheckRegOperand<0, D15>,
+                                                 CheckRegOperand<0, D16>,
+                                                 CheckRegOperand<0, D17>,
+                                                 CheckRegOperand<0, D18>,
+                                                 CheckRegOperand<0, D19>,
+                                                 CheckRegOperand<0, D20>,
+                                                 CheckRegOperand<0, D21>,
+                                                 CheckRegOperand<0, D22>,
+                                                 CheckRegOperand<0, D23>,
+                                                 CheckRegOperand<0, D24>,
+                                                 CheckRegOperand<0, D25>,
+                                                 CheckRegOperand<0, D26>,
+                                                 CheckRegOperand<0, D27>,
+                                                 CheckRegOperand<0, D28>,
+                                                 CheckRegOperand<0, D29>,
+                                                 CheckRegOperand<0, D30>,
+                                                 CheckRegOperand<0, D31>]>]>;
+
+// Identify whether an instruction is the 128-bit NEON form based on its result.
+def CheckQForm             : CheckAll<[CheckIsRegOperand<0>,
+                                       CheckAny<[CheckRegOperand<0, Q0>,
+                                                 CheckRegOperand<0, Q1>,
+                                                 CheckRegOperand<0, Q2>,
+                                                 CheckRegOperand<0, Q3>,
+                                                 CheckRegOperand<0, Q4>,
+                                                 CheckRegOperand<0, Q5>,
+                                                 CheckRegOperand<0, Q6>,
+                                                 CheckRegOperand<0, Q7>,
+                                                 CheckRegOperand<0, Q8>,
+                                                 CheckRegOperand<0, Q9>,
+                                                 CheckRegOperand<0, Q10>,
+                                                 CheckRegOperand<0, Q11>,
+                                                 CheckRegOperand<0, Q12>,
+                                                 CheckRegOperand<0, Q13>,
+                                                 CheckRegOperand<0, Q14>,
+                                                 CheckRegOperand<0, Q15>,
+                                                 CheckRegOperand<0, Q16>,
+                                                 CheckRegOperand<0, Q17>,
+                                                 CheckRegOperand<0, Q18>,
+                                                 CheckRegOperand<0, Q19>,
+                                                 CheckRegOperand<0, Q20>,
+                                                 CheckRegOperand<0, Q21>,
+                                                 CheckRegOperand<0, Q22>,
+                                                 CheckRegOperand<0, Q23>,
+                                                 CheckRegOperand<0, Q24>,
+                                                 CheckRegOperand<0, Q25>,
+                                                 CheckRegOperand<0, Q26>,
+                                                 CheckRegOperand<0, Q27>,
+                                                 CheckRegOperand<0, Q28>,
+                                                 CheckRegOperand<0, Q29>,
+                                                 CheckRegOperand<0, Q30>,
+                                                 CheckRegOperand<0, Q31>]>]>;
+
+// Identify arithmetic instructions with extend.
+def IsArithExtOp           : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
+                                          SUBWrx, SUBXrx, SUBSWrx, SUBSXrx,
+                                          ADDXrx64, ADDSXrx64,
+                                          SUBXrx64, SUBSXrx64]>;
+
+// Identify arithmetic immediate instructions.
+def IsArithImmOp           : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri,
+                                          SUBWri, SUBXri, SUBSWri, SUBSXri]>;
+
+// Identify arithmetic instructions with shift.
+def IsArithShiftOp         : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs,
+                                          SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>;
+
+// Identify arithmetic instructions without shift.
+def IsArithUnshiftOp       : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr,
+                                          SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>;
+
+// Identify logic immediate instructions.
+def IsLogicImmOp           : CheckOpcode<[ANDWri, ANDXri,
+                                          EORWri, EORXri,
+                                          ORRWri, ORRXri]>;
+
+// Identify logic instructions with shift.
+def IsLogicShiftOp         : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs,
+                                          BICWrs, BICXrs, BICSWrs, BICSXrs,
+                                          EONWrs, EONXrs,
+                                          EORWrs, EORXrs,
+                                          ORNWrs, ORNXrs,
+                                          ORRWrs, ORRXrs]>;
+
+// Identify logic instructions without shift.
+def IsLogicUnshiftOp       : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr,
+                                          BICWrr, BICXrr, BICSWrr, BICSXrr,
+                                          EONWrr, EONXrr,
+                                          EORWrr, EORXrr,
+                                          ORNWrr, ORNXrr,
+                                          ORRWrr, ORRXrr]>;
+
+// Identify arithmetic and logic immediate instructions.
+def IsArithLogicImmOp      : CheckOpcode<!listconcat(IsArithImmOp.ValidOpcodes,
+                                                     IsLogicImmOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions with shift.
+def IsArithLogicShiftOp    : CheckOpcode<!listconcat(IsArithShiftOp.ValidOpcodes,
+                                                     IsLogicShiftOp.ValidOpcodes)>;
+
+// Identify arithmetic and logic instructions without shift.
+def IsArithLogicUnshiftOp  : CheckOpcode<!listconcat(IsArithUnshiftOp.ValidOpcodes,
+                                                     IsLogicUnshiftOp.ValidOpcodes)>;
+
+// Identify whether an instruction is an ASIMD
+// load using the post index addressing mode.
+def IsLoadASIMDPostOp      : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST,
+                                          LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST,
+                                          LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST,
+                                          LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST,
+                                          LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST,
+                                          LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST,
+                                          LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST,
+                                          LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST,
+                                          LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST,
+                                          LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST,
+                                          LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST,
+                                          LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST,
+                                          LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST,
+                                          LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST,
+                                          LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST,
+                                          LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST,
+                                          LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST,
+                                          LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST,
+                                          LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST,
+                                          LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST,
+                                          LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST,
+                                          LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST,
+                                          LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST,
+                                          LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST,
+                                          LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST,
+                                          LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>;
+
+// Identify whether an instruction is an ASIMD
+// store using the post index addressing mode.
+def IsStoreASIMDPostOp     : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST,
+                                          ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST,
+                                          ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST,
+                                          ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST,
+                                          ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST,
+                                          ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST,
+                                          ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST,
+                                          ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST,
+                                          ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST,
+                                          ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST,
+                                          ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST,
+                                          ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST,
+                                          ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST,
+                                          ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST,
+                                          ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST,
+                                          ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST,
+                                          ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST,
+                                          ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>;
+
+// Identify whether an instruction is an ASIMD load
+// or store using the post index addressing mode.
+def IsLoadStoreASIMDPostOp : CheckOpcode<!listconcat(IsLoadASIMDPostOp.ValidOpcodes,
+                                                     IsStoreASIMDPostOp.ValidOpcodes)>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsLoadRegOffsetOp      : CheckOpcode<[PRFMroW, PRFMroX,
+                                          LDRBBroW, LDRBBroX,
+                                          LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX,
+                                          LDRHHroW, LDRHHroX,
+                                          LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX,
+                                          LDRWroW, LDRWroX,
+                                          LDRSWroW, LDRSWroX,
+                                          LDRXroW, LDRXroX,
+                                          LDRBroW, LDRBroX,
+                                          LDRHroW, LDRHroX,
+                                          LDRSroW, LDRSroX,
+                                          LDRDroW, LDRDroX]>;
+
+// Identify whether an instruction is a load
+// using the register offset addressing mode.
+def IsStoreRegOffsetOp     : CheckOpcode<[STRBBroW, STRBBroX,
+                                          STRHHroW, STRHHroX,
+                                          STRWroW, STRWroX,
+                                          STRXroW, STRXroX,
+                                          STRBroW, STRBroX,
+                                          STRHroW, STRHroX,
+                                          STRSroW, STRSroX,
+                                          STRDroW, STRDroX]>;
+
+// Identify whether an instruction is a load or
+// store using the register offset addressing mode.
+def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
+                                                     IsStoreRegOffsetOp.ValidOpcodes)>;
+
+// Identify whether an instruction whose result is a long vector
+// operates on the upper half of the input registers.
+def IsLongVectorUpperOp    : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
+                                          FCVTNv8i16, FCVTNv4i32,
+                                          FCVTXNv4f32,
+                                          PMULLv16i8, PMULLv2i64,
+                                          RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
+                                          RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
+                                          RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
+                                          SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
+                                          SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
+                                          SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
+                                          SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
+                                          SHLLv16i8, SHLLv8i16, SHLLv4i32,
+                                          SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
+                                          SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
+                                          SMLALv8i16_indexed, SMLALv4i32_indexed,
+                                          SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
+                                          SMLSLv8i16_indexed, SMLSLv4i32_indexed,
+                                          SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
+                                          SMULLv8i16_indexed, SMULLv4i32_indexed,
+                                          SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
+                                          SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
+                                          SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
+                                          SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
+                                          SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
+                                          SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
+                                          SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
+                                          SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
+                                          SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
+                                          SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
+                                          SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
+                                          SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
+                                          SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
+                                          SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
+                                          SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
+                                          UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
+                                          UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
+                                          UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
+                                          UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
+                                          UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
+                                          UMLALv8i16_indexed, UMLALv4i32_indexed,
+                                          UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
+                                          UMLSLv8i16_indexed, UMLSLv4i32_indexed,
+                                          UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
+                                          UMULLv8i16_indexed, UMULLv4i32_indexed,
+                                          UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
+                                          UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
+                                          USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
+                                          USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
+                                          USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
+                                          XTNv16i8, XTNv8i16, XTNv4i32]>;
+
+// Target predicates.
+
+// Identify an instruction that effectively transfers a register to another.
+def IsCopyIdiomFn     : TIIPredicate<"isCopyIdiom",
+                                     MCOpcodeSwitchStatement<
+                                       [// MOV {Rd, SP}, {SP, Rn} =>
+                                        // ADD {Rd, SP}, {SP, Rn}, #0
+                                        MCOpcodeSwitchCase<
+                                          [ADDWri, ADDXri],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckIsRegOperand<0>,
+                                               CheckIsRegOperand<1>,
+                                               CheckAny<
+                                                 [CheckRegOperand<0, WSP>,
+                                                  CheckRegOperand<0, SP>,
+                                                  CheckRegOperand<1, WSP>,
+                                                  CheckRegOperand<1, SP>]>,
+                                               CheckZeroOperand<2>]>>>,
+                                        // MOV Rd, Rm =>
+                                        // ORR Rd, ZR, Rm, LSL #0
+                                        MCOpcodeSwitchCase<
+                                          [ORRWrs, ORRXrs],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckIsRegOperand<1>,
+                                               CheckIsRegOperand<2>,
+                                               CheckAny<
+                                                 [CheckRegOperand<1, WZR>,
+                                                  CheckRegOperand<1, XZR>]>,
+                                               CheckShiftBy0]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def IsCopyIdiomPred   : MCSchedPredicate<IsCopyIdiomFn>;
+
+// Identify arithmetic instructions with an extended register.
+def RegExtendedFn     : TIIPredicate<"hasExtendedReg",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                         IsArithExtOp.ValidOpcodes,
+                                         MCReturnStatement<
+                                           CheckNot<CheckZeroOperand<3>>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def RegExtendedPred   : MCSchedPredicate<RegExtendedFn>;
+
+// Identify arithmetic and logic instructions with a shifted register.
+def RegShiftedFn      : TIIPredicate<"hasShiftedReg",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                          IsArithLogicShiftOp.ValidOpcodes,
+                                          MCReturnStatement<
+                                            CheckNot<CheckZeroOperand<3>>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def RegShiftedPred    : MCSchedPredicate<RegShiftedFn>;
+
+// Identify a load or store using the register offset addressing mode
+// with an extended or scaled register.
+def ScaledIdxFn       : TIIPredicate<"isScaledAddr",
+                                     MCOpcodeSwitchStatement<
+                                       [MCOpcodeSwitchCase<
+                                          IsLoadStoreRegOffsetOp.ValidOpcodes,
+                                          MCReturnStatement<
+                                            CheckAny<[CheckNot<CheckMemExtLSL>,
+                                                      CheckMemScaled]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def ScaledIdxPred     : MCSchedPredicate<ScaledIdxFn>;
+
+// Identify an instruction that effectively resets a FP register to zero.
+def IsZeroFPIdiomFn   : TIIPredicate<"isZeroFPIdiom",
+                                     MCOpcodeSwitchStatement<
+                                       [// MOVI Vd, #0
+                                        MCOpcodeSwitchCase<
+                                          [MOVIv8b_ns, MOVIv16b_ns,
+                                           MOVID, MOVIv2d_ns],
+                                          MCReturnStatement<CheckZeroOperand<1>>>,
+                                        // MOVI Vd, #0, LSL #0
+                                        MCOpcodeSwitchCase<
+                                          [MOVIv4i16, MOVIv8i16,
+                                           MOVIv2i32, MOVIv4i32],
+                                          MCReturnStatement<
+                                            CheckAll<
+                                              [CheckZeroOperand<1>,
+                                               CheckZeroOperand<2>]>>>],
+                                       MCReturnStatement<FalsePred>>>;
+def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
+
+// Identify an instruction that effectively resets a GP register to zero.
+def IsZeroIdiomFn     : TIIPredicate<"isZeroIdiom",
+                                    MCOpcodeSwitchStatement<
+                                      [// ORR Rd, ZR, #0
+                                       MCOpcodeSwitchCase<
+                                         [ORRWri, ORRXri],
+                                         MCReturnStatement<
+                                           CheckAll<
+                                             [CheckIsRegOperand<1>,
+                                              CheckAny<
+                                                [CheckRegOperand<1, WZR>,
+                                                 CheckRegOperand<1, XZR>]>,
+                                              CheckZeroOperand<2>]>>>],
+                                      MCReturnStatement<FalsePred>>>;
+def IsZeroIdiomPred   : MCSchedPredicate<IsZeroIdiomFn>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
index ce81f48acf71..f55ba4d42fce 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -50,17 +50,6 @@ def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
 def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
 def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
 
-// Predicate for determining when a shiftable register is shifted.
-def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
-
-// Predicate for determining when a extendedable register is extended.
-def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
-
 // Serialized two-level address load.
 // EXAMPLE: LOADGot
 def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
new file mode 100644
index 000000000000..e9699b0367d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -0,0 +1,641 @@
+//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation  --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under control flow miss-speculation.
+//
+// The pass implements tracking of control flow miss-speculation into a "taint"
+// register. That taint register can then be used to mask off registers with
+// sensitive data when executing under miss-speculation, a.k.a. "transient
+// execution".
+// This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
+//
+// It also implements speculative load hardening, i.e. using the taint register
+// to automatically mask off loaded data.
+//
+// As a possible follow-on improvement, also an intrinsics-based approach as
+// explained at https://lwn.net/Articles/759423/ could be implemented on top of
+// the current design.
+//
+// For AArch64, the following implementation choices are made to implement the
+// tracking of control flow miss-speculation into a taint register:
+// Some of these are different than the implementation choices made in
+// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
+// the instruction set characteristics result in different trade-offs.
+// - The speculation hardening is done after register allocation. With a
+//   relative abundance of registers, one register is reserved (X16) to be
+//   the taint register. X16 is expected to not clash with other register
+//   reservation mechanisms with very high probability because:
+//   . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
+//   . The only way to request X16 to be used as a programmer is through
+//     inline assembly. In the rare case a function explicitly demands to
+//     use X16/W16, this pass falls back to hardening against speculation
+//     by inserting a DSB SYS/ISB barrier pair which will prevent control
+//     flow speculation.
+// - It is easy to insert mask operations at this late stage as we have
+//   mask operations available that don't set flags.
+// - The taint variable contains all-ones when no miss-speculation is detected,
+//   and contains all-zeros when miss-speculation is detected. Therefore, when
+//   masking, an AND instruction (which only changes the register to be masked,
+//   no other side effects) can easily be inserted anywhere that's needed.
+// - The tracking of miss-speculation is done by using a data-flow conditional
+//   select instruction (CSEL) to evaluate the flags that were also used to
+//   make conditional branch direction decisions. Speculation of the CSEL
+//   instruction can be limited with a CSDB instruction - so the combination of
+//   CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
+//   aren't speculated. When conditional branch direction gets miss-speculated,
+//   the semantics of the inserted CSEL instruction is such that the taint
+//   register will contain all zero bits.
+//   One key requirement for this to work is that the conditional branch is
+//   followed by an execution of the CSEL instruction, where the CSEL
+//   instruction needs to use the same flags status as the conditional branch.
+//   This means that the conditional branches must not be implemented as one
+//   of the AArch64 conditional branches that do not use the flags as input
+//   (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
+//   selectors to not produce these instructions when speculation hardening
+//   is enabled. This pass will assert if it does encounter such an instruction.
+// - On function call boundaries, the miss-speculation state is transferred from
+//   the taint register X16 to be encoded in the SP register as value 0.
+//
+// For the aspect of automatically hardening loads, using the taint register,
+// (a.k.a. speculative load hardening, see
+//  https://llvm.org/docs/SpeculativeLoadHardening.html), the following
+// implementation choices are made for AArch64:
+//   - Many of the optimizations described at
+//     https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
+//     loads haven't been implemented yet - but for some of them there are
+//     FIXMEs in the code.
+//   - loads that load into general purpose (X or W) registers get hardened by
+//     masking the loaded data. For loads that load into other registers, the
+//     address loaded from gets hardened. It is expected that hardening the
+//     loaded data may be more efficient; but masking data in registers other
+//     than X or W is not easy and may result in being slower than just
+//     hardening the X address register loaded from.
+//   - On AArch64, CSDB instructions are inserted between the masking of the
+//     register and its first use, to ensure there's no non-control-flow
+//     speculation that might undermine the hardening mechanism.
+//
+// Future extensions/improvements could be:
+// - Implement this functionality using full speculation barriers, akin to the
+//   x86-slh-lfence option. This may be more useful for the intrinsics-based
+//   approach than for the SLH approach to masking.
+//   Note that this pass already inserts the full speculation barriers if the
+//   function for some niche reason makes use of X16/W16.
+// - no indirect branch misprediction gets protected/instrumented; but this
+//   could be done for some indirect branches, such as switch jump tables.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-speculation-hardening"
+
+#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
+
+cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
+                          cl::desc("Sanitize loads from memory."),
+                          cl::init(true));
+
+namespace {
+
+class AArch64SpeculationHardening : public MachineFunctionPass {
+public:
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
+  static char ID;
+
+  AArch64SpeculationHardening() : MachineFunctionPass(ID) {
+    initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  StringRef getPassName() const override {
+    return AARCH64_SPECULATION_HARDENING_NAME;
+  }
+
+private:
+  unsigned MisspeculatingTaintReg;
+  unsigned MisspeculatingTaintReg32Bit;
+  bool UseControlFlowSpeculationBarrier;
+  BitVector RegsNeedingCSDBBeforeUse;
+  BitVector RegsAlreadyMasked;
+
+  bool functionUsesHardeningRegister(MachineFunction &MF) const;
+  bool instrumentControlFlow(MachineBasicBlock &MBB);
+  bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                               MachineBasicBlock *&FBB,
+                               AArch64CC::CondCode &CondCode) const;
+  void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
+                          AArch64CC::CondCode &CondCode, DebugLoc DL) const;
+  void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator MBBI) const;
+  void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned TmpReg) const;
+
+  bool slhLoads(MachineBasicBlock &MBB);
+  bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              MachineInstr &MI, unsigned Reg);
+  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+  bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MBBI);
+  bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  DebugLoc DL);
+};
+
+} // end anonymous namespace
+
+char AArch64SpeculationHardening::ID = 0;
+
+INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
+                AARCH64_SPECULATION_HARDENING_NAME, false, false)
+
+bool AArch64SpeculationHardening::endsWithCondControlFlow(
+    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+    AArch64CC::CondCode &CondCode) const {
+  SmallVector<MachineOperand, 1> analyzeBranchCondCode;
+  if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
+    return false;
+
+  // Ignore if the BB ends in an unconditional branch/fall-through.
+  if (analyzeBranchCondCode.empty())
+    return false;
+
+  // If the BB ends with a single conditional branch, FBB will be set to
+  // nullptr (see API docs for TII->analyzeBranch). For the rest of the
+  // analysis we want the FBB block to be set always.
+  assert(TBB != nullptr);
+  if (FBB == nullptr)
+    FBB = MBB.getFallThrough();
+
+  // If both the true and the false condition jump to the same basic block,
+  // there isn't need for any protection - whether the branch is speculated
+  // correctly or not, we end up executing the architecturally correct code.
+  if (TBB == FBB)
+    return false;
+
+  assert(MBB.succ_size() == 2);
+  // translate analyzeBranchCondCode to CondCode.
+  assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
+  CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
+  return true;
+}
+
+void AArch64SpeculationHardening::insertTrackingCode(
+    MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
+    DebugLoc DL) const {
+  if (UseControlFlowSpeculationBarrier) {
+    // insert full control flow speculation barrier (DSB SYS + ISB)
+    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
+        .addImm(0xf);
+    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
+        .addImm(0xf);
+  } else {
+    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
+        .addDef(MisspeculatingTaintReg)
+        .addUse(MisspeculatingTaintReg)
+        .addUse(AArch64::XZR)
+        .addImm(CondCode);
+    SplitEdgeBB.addLiveIn(AArch64::NZCV);
+  }
+}
+
+bool AArch64SpeculationHardening::instrumentControlFlow(
+    MachineBasicBlock &MBB) {
+  LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
+
+  bool Modified = false;
+  MachineBasicBlock *TBB = nullptr;
+  MachineBasicBlock *FBB = nullptr;
+  AArch64CC::CondCode CondCode;
+
+  if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
+    LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
+  } else {
+    // Now insert:
+    // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
+    // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
+    // edge.
+    AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
+
+    MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
+    MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
+
+    assert(SplitEdgeTBB != nullptr);
+    assert(SplitEdgeFBB != nullptr);
+
+    DebugLoc DL;
+    if (MBB.instr_end() != MBB.instr_begin())
+      DL = (--MBB.instr_end())->getDebugLoc();
+
+    insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
+    insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
+
+    LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
+    LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
+    Modified = true;
+  }
+
+  // Perform correct code generation around function calls and before returns.
+  {
+    SmallVector<MachineInstr *, 4> ReturnInstructions;
+    SmallVector<MachineInstr *, 4> CallInstructions;
+
+    for (MachineInstr &MI : MBB) {
+      if (MI.isReturn())
+        ReturnInstructions.push_back(&MI);
+      else if (MI.isCall())
+        CallInstructions.push_back(&MI);
+    }
+
+    Modified |=
+        (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+
+    for (MachineInstr *Return : ReturnInstructions)
+      insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
+    for (MachineInstr *Call : CallInstructions) {
+      // Just after the call:
+      MachineBasicBlock::iterator i = Call;
+      i++;
+      insertSPToRegTaintPropagation(Call->getParent(), i);
+      // Just before the call:
+      insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+    }
+  }
+
+  return Modified;
+}
+
+void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
+    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+  // If full control flow speculation barriers are used, emit a control flow
+  // barrier to block potential miss-speculation in flight coming in to this
+  // function.
+  if (UseControlFlowSpeculationBarrier) {
+    // insert full control flow speculation barrier (DSB SYS + ISB)
+    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
+    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+    return;
+  }
+
+  // CMP   SP, #0   === SUBS   xzr, SP, #0
+  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+      .addDef(AArch64::XZR)
+      .addUse(AArch64::SP)
+      .addImm(0)
+      .addImm(0); // no shift
+  // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
+  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+      .addDef(MisspeculatingTaintReg)
+      .addUse(AArch64::XZR)
+      .addUse(AArch64::XZR)
+      .addImm(AArch64CC::EQ);
+}
+
+void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
+    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+    unsigned TmpReg) const {
+  // If full control flow speculation barriers are used, there will not be
+  // miss-speculation when returning from this function, and therefore, also
+  // no need to encode potential miss-speculation into the stack pointer.
+  if (UseControlFlowSpeculationBarrier)
+    return;
+
+  // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
+  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+      .addDef(TmpReg)
+      .addUse(AArch64::SP)
+      .addImm(0)
+      .addImm(0); // no shift
+  // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
+  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+      .addDef(TmpReg, RegState::Renamable)
+      .addUse(TmpReg, RegState::Kill | RegState::Renamable)
+      .addUse(MisspeculatingTaintReg, RegState::Kill)
+      .addImm(0);
+  // mov   SP, Xtmp === ADD SP, Xtmp, #0
+  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+      .addDef(AArch64::SP)
+      .addUse(TmpReg, RegState::Kill)
+      .addImm(0)
+      .addImm(0); // no shift
+}
+
+bool AArch64SpeculationHardening::functionUsesHardeningRegister(
+    MachineFunction &MF) const {
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      // treat function calls specially, as the hardening register does not
+      // need to remain live across function calls.
+      if (MI.isCall())
+        continue;
+      if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
+          MI.modifiesRegister(MisspeculatingTaintReg, TRI))
+        return true;
+    }
+  }
+  return false;
+}
+
+// Make GPR register Reg speculation-safe by putting it through the
+// SpeculationSafeValue pseudo instruction, if we can't prove that
+// the value in the register has already been hardened.
+bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
+    unsigned Reg) {
+  assert(AArch64::GPR32allRegClass.contains(Reg) ||
+         AArch64::GPR64allRegClass.contains(Reg));
+
+  // Loads cannot directly load a value into the SP (nor WSP).
+  // Therefore, if Reg is SP or WSP, it is because the instruction loads from
+  // the stack through the stack pointer.
+  //
+  // Since the stack pointer is never dynamically controllable, don't harden it.
+  if (Reg == AArch64::SP || Reg == AArch64::WSP)
+    return false;
+
+  // Do not harden the register again if already hardened before.
+  if (RegsAlreadyMasked[Reg])
+    return false;
+
+  const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
+  LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
+                           : AArch64::SpeculationSafeValueW))
+      .addDef(Reg)
+      .addUse(Reg);
+  RegsAlreadyMasked.set(Reg);
+  return true;
+}
+
+bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
+
+  RegsAlreadyMasked.reset();
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMBBI;
+  for (; MBBI != E; MBBI = NextMBBI) {
+    MachineInstr &MI = *MBBI;
+    NextMBBI = std::next(MBBI);
+    // Only harden loaded values or addresses used in loads.
+    if (!MI.mayLoad())
+      continue;
+
+    LLVM_DEBUG(dbgs() << "About to harden: " << MI);
+
+    // For general purpose register loads, harden the registers loaded into.
+    // For other loads, harden the address loaded from.
+    // Masking the loaded value is expected to result in less performance
+    // overhead, as the load can still execute speculatively in comparison to
+    // when the address loaded from gets masked. However, masking is only
+    // easy to do efficiently on GPR registers, so for loads into non-GPR
+    // registers (e.g. floating point loads), mask the address loaded from.
+    bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
+      return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
+                            AArch64::GPR64allRegClass.contains(Op.getReg()));
+    });
+    // FIXME: it might be a worthwhile optimization to not mask loaded
+    // values if all the registers involved in address calculation are already
+    // hardened, leading to this load not able to execute on a miss-speculated
+    // path.
+    bool HardenLoadedData = AllDefsAreGPR;
+    bool HardenAddressLoadedFrom = !HardenLoadedData;
+
+    // First remove registers from AlreadyMaskedRegisters if their value is
+    // updated by this instruction - it makes them contain a new value that is
+    // not guaranteed to already have been masked.
+    for (MachineOperand Op : MI.defs())
+      for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
+        RegsAlreadyMasked.reset(*AI);
+
+    // FIXME: loads from the stack with an immediate offset from the stack
+    // pointer probably shouldn't be hardened, which could result in a
+    // significant optimization. See section "Don’t check loads from
+    // compile-time constant stack offsets", in
+    // https://llvm.org/docs/SpeculativeLoadHardening.html
+
+    if (HardenLoadedData)
+      for (auto Def : MI.defs()) {
+        if (Def.isDead())
+          // Do not mask a register that is not used further.
+          continue;
+        // FIXME: For pre/post-increment addressing modes, the base register
+        // used in address calculation is also defined by this instruction.
+        // It might be a worthwhile optimization to not harden that
+        // base register increment/decrement when the increment/decrement is
+        // an immediate.
+        Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
+      }
+
+    if (HardenAddressLoadedFrom)
+      for (auto Use : MI.uses()) {
+        if (!Use.isReg())
+          continue;
+        unsigned Reg = Use.getReg();
+        // Some loads of floating point data have implicit defs/uses on a
+        // super register of that floating point data. Some examples:
+        // $s0 = LDRSui $sp, 22, implicit-def $q0
+        // $q0 = LD1i64 $q0, 1, renamable $x0
+        // We need to filter out these uses for non-GPR register which occur
+        // because the load partially fills a non-GPR register with the loaded
+        // data. Just skipping all non-GPR registers is safe (for now) as all
+        // AArch64 load instructions only use GPR registers to perform the
+        // address calculation. FIXME: However that might change once we can
+        // produce SVE gather instructions.
+        if (!(AArch64::GPR32allRegClass.contains(Reg) ||
+              AArch64::GPR64allRegClass.contains(Reg)))
+          continue;
+        Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
+      }
+  }
+  return Modified;
+}
+
+/// \brief If MBBI references a pseudo instruction that should be expanded
+/// here, do the expansion and return true. Otherwise return false.
+bool AArch64SpeculationHardening::expandSpeculationSafeValue(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  bool Is64Bit = true;
+
+  switch (Opcode) {
+  default:
+    break;
+  case AArch64::SpeculationSafeValueW:
+    Is64Bit = false;
+    LLVM_FALLTHROUGH;
+  case AArch64::SpeculationSafeValueX:
+    // Just remove the SpeculationSafe pseudo's if control flow
+    // miss-speculation isn't happening because we're already inserting barriers
+    // to guarantee that.
+    if (!UseControlFlowSpeculationBarrier) {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      // Mark this register and all its aliasing registers as needing to be
+      // value speculation hardened before its next use, by using a CSDB
+      // barrier instruction.
+      for (MachineOperand Op : MI.defs())
+        for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
+          RegsNeedingCSDBBeforeUse.set(*AI);
+
+      // Mask off with taint state.
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
+          .addDef(DstReg)
+          .addUse(SrcReg, RegState::Kill)
+          .addUse(Is64Bit ? MisspeculatingTaintReg
+                          : MisspeculatingTaintReg32Bit)
+          .addImm(0);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MBBI,
+                                             DebugLoc DL) {
+  assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
+                                              "control flow miss-speculation "
+                                              "is already blocked");
+  // insert data value speculation barrier (CSDB)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
+  RegsNeedingCSDBBeforeUse.reset();
+  return true;
+}
+
+bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
+    MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  RegsNeedingCSDBBeforeUse.reset();
+
+  // The following loop iterates over all instructions in the basic block,
+  // and performs 2 operations:
+  // 1. Insert a CSDB at this location if needed.
+  // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
+  // one.
+  //
+  // The insertion of the CSDB is done as late as possible (i.e. just before
+  // the use of a masked register), in the hope that that will reduce the
+  // total number of CSDBs in a block when there are multiple masked registers
+  // in the block.
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  DebugLoc DL;
+  while (MBBI != E) {
+    MachineInstr &MI = *MBBI;
+    DL = MI.getDebugLoc();
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+
+    // First check if a CSDB needs to be inserted due to earlier registers
+    // that were masked and that are used by the next instruction.
+    // Also emit the barrier on any potential control flow changes.
+    bool NeedToEmitBarrier = false;
+    if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
+      NeedToEmitBarrier = true;
+    if (!NeedToEmitBarrier)
+      for (MachineOperand Op : MI.uses())
+        if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
+          NeedToEmitBarrier = true;
+          break;
+        }
+
+    if (NeedToEmitBarrier)
+      Modified |= insertCSDB(MBB, MBBI, DL);
+
+    Modified |= expandSpeculationSafeValue(MBB, MBBI);
+
+    MBBI = NMBBI;
+  }
+
+  if (RegsNeedingCSDBBeforeUse.any())
+    Modified |= insertCSDB(MBB, MBBI, DL);
+
+  return Modified;
+}
+
+bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
+  if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+    return false;
+
+  MisspeculatingTaintReg = AArch64::X16;
+  MisspeculatingTaintReg32Bit = AArch64::W16;
+  TII = MF.getSubtarget().getInstrInfo();
+  TRI = MF.getSubtarget().getRegisterInfo();
+  RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
+  RegsAlreadyMasked.resize(TRI->getNumRegs());
+  UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
+
+  bool Modified = false;
+
+  // Step 1: Enable automatic insertion of SpeculationSafeValue.
+  if (HardenLoads) {
+    LLVM_DEBUG(
+        dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
+                  "SpeculationSafeValue intrinsics *****\n");
+    for (auto &MBB : MF)
+      Modified |= slhLoads(MBB);
+  }
+
+  // 2.a Add instrumentation code to function entry and exits.
+  LLVM_DEBUG(
+      dbgs()
+      << "***** AArch64SpeculationHardening - track control flow *****\n");
+
+  SmallVector<MachineBasicBlock *, 2> EntryBlocks;
+  EntryBlocks.push_back(&MF.front());
+  for (const LandingPadInfo &LPI : MF.getLandingPads())
+    EntryBlocks.push_back(LPI.LandingPadBlock);
+  for (auto Entry : EntryBlocks)
+    insertSPToRegTaintPropagation(
+        Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
+
+  // 2.b Add instrumentation code to every basic block.
+  for (auto &MBB : MF)
+    Modified |= instrumentControlFlow(MBB);
+
+  LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
+                       "SpeculationSafeValue Pseudos *****\n");
+  // Step 3: Lower SpeculationSafeValue pseudo instructions.
+  for (auto &MBB : MF)
+    Modified |= lowerSpeculationSafeValuePseudos(MBB);
+
+  return Modified;
+}
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
+  return new AArch64SpeculationHardening();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index fc7b5984fe3e..d5643d384283 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -148,9 +148,11 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
     for (auto &MI : MBB) {
       if (!isNarrowFPStore(MI))
         continue;
-      unsigned BaseReg;
+      MachineOperand *BaseOp;
       int64_t Offset;
-      if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) {
+      if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
+          BaseOp->isReg()) {
+        unsigned BaseReg = BaseOp->getReg();
         if (PrevBaseReg == BaseReg) {
           // If this block can take STPs, skip ahead to the next block.
           if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 04bb90d30d6d..dd30d25b2b50 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -14,13 +14,13 @@
 #include "AArch64Subtarget.h"
 
 #include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64PBQPRegAlloc.h"
-#include "AArch64TargetMachine.h"
-
 #include "AArch64CallLowering.h"
+#include "AArch64InstrInfo.h"
 #include "AArch64LegalizerInfo.h"
+#include "AArch64PBQPRegAlloc.h"
 #include "AArch64RegisterBankInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/IR/GlobalValue.h"
@@ -67,16 +67,30 @@ void AArch64Subtarget::initializeProperties() {
   // this in the future so we can specify it together with the subtarget
   // features.
   switch (ARMProcFamily) {
+  case Others:
+    break;
+  case CortexA35:
+    break;
+  case CortexA53:
+    PrefFunctionAlignment = 3;
+    break;
+  case CortexA55:
+    break;
+  case CortexA57:
+    MaxInterleaveFactor = 4;
+    PrefFunctionAlignment = 4;
+    break;
+  case CortexA72:
+  case CortexA73:
+  case CortexA75:
+    PrefFunctionAlignment = 4;
+    break;
   case Cyclone:
     CacheLineSize = 64;
     PrefetchDistance = 280;
     MinPrefetchStride = 2048;
     MaxPrefetchIterationsAhead = 3;
     break;
-  case CortexA57:
-    MaxInterleaveFactor = 4;
-    PrefFunctionAlignment = 4;
-    break;
   case ExynosM1:
     MaxInterleaveFactor = 4;
     MaxJumpTableSize = 8;
@@ -98,11 +112,6 @@ void AArch64Subtarget::initializeProperties() {
     MinPrefetchStride = 2048;
     MaxPrefetchIterationsAhead = 8;
     break;
-  case Saphira:
-    MaxInterleaveFactor = 4;
-    // FIXME: remove this to enable 64-bit SLP if performance looks good.
-    MinVectorRegisterBitWidth = 128;
-    break;
   case Kryo:
     MaxInterleaveFactor = 4;
     VectorInsertExtractBaseCost = 2;
@@ -113,6 +122,11 @@ void AArch64Subtarget::initializeProperties() {
     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     MinVectorRegisterBitWidth = 128;
     break;
+  case Saphira:
+    MaxInterleaveFactor = 4;
+    // FIXME: remove this to enable 64-bit SLP if performance looks good.
+    MinVectorRegisterBitWidth = 128;
+    break;
   case ThunderX2T99:
     CacheLineSize = 64;
     PrefFunctionAlignment = 3;
@@ -134,17 +148,11 @@ void AArch64Subtarget::initializeProperties() {
     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     MinVectorRegisterBitWidth = 128;
     break;
-  case CortexA35: break;
-  case CortexA53:
-    PrefFunctionAlignment = 3;
-    break;
-  case CortexA55: break;
-  case CortexA72:
-  case CortexA73:
-  case CortexA75:
+  case TSV110:
+    CacheLineSize = 64;
     PrefFunctionAlignment = 4;
+    PrefLoopAlignment = 2;
     break;
-  case Others: break;
   }
 }
 
@@ -152,10 +160,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS),
-      ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
+      ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
+      CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
+      IsLittle(LittleEndian),
       TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
       TLInfo(TM, *this) {
+  if (AArch64::isX18ReservedByDefault(TT))
+    ReserveXRegister.set(18);
+
   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
   Legalizer.reset(new AArch64LegalizerInfo(*this));
 
@@ -196,18 +209,22 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
     return AArch64II::MO_GOT;
 
-  unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
-                                                  : AArch64II::MO_NO_FLAG;
-
-  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
-    return AArch64II::MO_GOT | Flags;
+  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
+    if (GV->hasDLLImportStorageClass())
+      return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
+    if (getTargetTriple().isOSWindows())
+      return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
+    return AArch64II::MO_GOT;
+  }
 
   // The small code model's direct accesses use ADRP, which cannot
   // necessarily produce the value 0 (if the code is above 4GB).
-  if (useSmallAddressing() && GV->hasExternalWeakLinkage())
-    return AArch64II::MO_GOT | Flags;
+  // Same for the tiny code model, where we have a pc relative LDR.
+  if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
+      GV->hasExternalWeakLinkage())
+    return AArch64II::MO_GOT;
 
-  return Flags;
+  return AArch64II::MO_NO_FLAG;
 }
 
 unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
@@ -265,7 +282,7 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
   // We usually compute max call frame size after ISel. Do the computation now
   // if the .mir file didn't specify it. Note that this will probably give you
   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
-  // instructions, specify explicitely if you need it to be correct.
+  // instructions, specify explicitly if you need it to be correct.
   MachineFrameInfo &MFI = MF.getFrameInfo();
   if (!MFI.isMaxCallFrameSizeComputed())
     MFI.computeMaxCallFrameSize(MF);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5af4c0dd9c19..82f7bb755951 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -56,7 +56,8 @@ public:
     ThunderX,
     ThunderXT81,
     ThunderXT83,
-    ThunderXT88
+    ThunderXT88,
+    TSV110
   };
 
 protected:
@@ -67,6 +68,7 @@ protected:
   bool HasV8_2aOps = false;
   bool HasV8_3aOps = false;
   bool HasV8_4aOps = false;
+  bool HasV8_5aOps = false;
 
   bool HasFPARMv8 = false;
   bool HasNEON = false;
@@ -78,8 +80,36 @@ protected:
   bool HasRDM = false;
   bool HasPerfMon = false;
   bool HasFullFP16 = false;
+  bool HasFP16FML = false;
   bool HasSPE = false;
 
+  // ARMv8.1 extensions
+  bool HasVH = false;
+  bool HasPAN = false;
+  bool HasLOR = false;
+
+  // ARMv8.2 extensions
+  bool HasPsUAO = false;
+  bool HasPAN_RWV = false;
+  bool HasCCPP = false;
+
+  // ARMv8.3 extensions
+  bool HasPA = false;
+  bool HasJS = false;
+  bool HasCCIDX = false;
+  bool HasComplxNum = false;
+
+  // ARMv8.4 extensions
+  bool HasNV = false;
+  bool HasRASv8_4 = false;
+  bool HasMPAM = false;
+  bool HasDIT = false;
+  bool HasTRACEV8_4 = false;
+  bool HasAM = false;
+  bool HasSEL2 = false;
+  bool HasTLB_RMI = false;
+  bool HasFMI = false;
+  bool HasRCPC_IMMO = false;
   // ARMv8.4 Crypto extensions
   bool HasSM4 = true;
   bool HasSHA3 = true;
@@ -92,11 +122,25 @@ protected:
   bool HasRCPC = false;
   bool HasAggressiveFMA = false;
 
+  // Armv8.5-A Extensions
+  bool HasAlternativeNZCV = false;
+  bool HasFRInt3264 = false;
+  bool HasSpecRestrict = false;
+  bool HasSSBS = false;
+  bool HasSB = false;
+  bool HasPredRes = false;
+  bool HasCCDP = false;
+  bool HasBTI = false;
+  bool HasRandGen = false;
+  bool HasMTE = false;
+
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove = false;
 
   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
   bool HasZeroCycleZeroing = false;
+  bool HasZeroCycleZeroingGP = false;
+  bool HasZeroCycleZeroingFP = false;
   bool HasZeroCycleZeroingFPWorkaround = false;
 
   // StrictAlign - Disallow unaligned memory accesses.
@@ -122,10 +166,13 @@ protected:
   bool HasArithmeticCbzFusion = false;
   bool HasFuseAddress = false;
   bool HasFuseAES = false;
+  bool HasFuseArithmeticLogic = false;
   bool HasFuseCCSelect = false;
+  bool HasFuseCryptoEOR = false;
   bool HasFuseLiterals = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
+  bool Force32BitJumpTables = false;
   uint8_t MaxInterleaveFactor = 2;
   uint8_t VectorInsertExtractBaseCost = 3;
   uint16_t CacheLineSize = 0;
@@ -137,11 +184,11 @@ protected:
   unsigned MaxJumpTableSize = 0;
   unsigned WideningBaseCost = 0;
 
-  // ReserveX18 - X18 is not available as a general purpose register.
-  bool ReserveX18;
+  // ReserveXRegister[i] - X#i is not available as a general purpose register.
+  BitVector ReserveXRegister;
 
-  // ReserveX20 - X20 is not available as a general purpose register.
-  bool ReserveX20 = false;
+  // CustomCallUsedXRegister[i] - X#i call saved.
+  BitVector CustomCallSavedXRegs;
 
   bool IsLittle;
 
@@ -211,10 +258,13 @@ public:
   bool hasV8_2aOps() const { return HasV8_2aOps; }
   bool hasV8_3aOps() const { return HasV8_3aOps; }
   bool hasV8_4aOps() const { return HasV8_4aOps; }
+  bool hasV8_5aOps() const { return HasV8_5aOps; }
 
   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
 
-  bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+  bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
+
+  bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
 
   bool hasZeroCycleZeroingFPWorkaround() const {
     return HasZeroCycleZeroingFPWorkaround;
@@ -228,8 +278,12 @@ public:
     return MinVectorRegisterBitWidth;
   }
 
-  bool isX18Reserved() const { return ReserveX18; }
-  bool isX20Reserved() const { return ReserveX20; }
+  bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
+  unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
+  bool isXRegCustomCalleeSaved(size_t i) const {
+    return CustomCallSavedXRegs[i];
+  }
+  bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
   bool hasFPARMv8() const { return HasFPARMv8; }
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
@@ -258,16 +312,20 @@ public:
   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
   bool hasFuseAddress() const { return HasFuseAddress; }
   bool hasFuseAES() const { return HasFuseAES; }
+  bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
   bool hasFuseCCSelect() const { return HasFuseCCSelect; }
+  bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
   bool hasFuseLiterals() const { return HasFuseLiterals; }
 
   /// Return true if the CPU supports any kind of instruction fusion.
   bool hasFusion() const {
     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
-           hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals();
+           hasFuseAES() || hasFuseArithmeticLogic() ||
+           hasFuseCCSelect() || hasFuseLiterals();
   }
 
   bool useRSqrt() const { return UseRSqrt; }
+  bool force32BitJumpTables() const { return Force32BitJumpTables; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   unsigned getVectorInsertExtractBaseCost() const {
     return VectorInsertExtractBaseCost;
@@ -291,11 +349,22 @@ public:
 
   bool hasPerfMon() const { return HasPerfMon; }
   bool hasFullFP16() const { return HasFullFP16; }
+  bool hasFP16FML() const { return HasFP16FML; }
   bool hasSPE() const { return HasSPE; }
   bool hasLSLFast() const { return HasLSLFast; }
   bool hasSVE() const { return HasSVE; }
   bool hasRCPC() const { return HasRCPC; }
   bool hasAggressiveFMA() const { return HasAggressiveFMA; }
+  bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
+  bool hasFRInt3264() const { return HasFRInt3264; }
+  bool hasSpecRestrict() const { return HasSpecRestrict; }
+  bool hasSSBS() const { return HasSSBS; }
+  bool hasSB() const { return HasSB; }
+  bool hasPredRes() const { return HasPredRes; }
+  bool hasCCDP() const { return HasCCDP; }
+  bool hasBTI() const { return HasBTI; }
+  bool hasRandGen() const { return HasRandGen; }
+  bool hasMTE() const { return HasMTE; }
 
   bool isLittleEndian() const { return IsLittle; }
 
@@ -312,6 +381,30 @@ public:
 
   bool useAA() const override { return UseAA; }
 
+  bool hasVH() const { return HasVH; }
+  bool hasPAN() const { return HasPAN; }
+  bool hasLOR() const { return HasLOR; }
+
+  bool hasPsUAO() const { return HasPsUAO; }
+  bool hasPAN_RWV() const { return HasPAN_RWV; }
+  bool hasCCPP() const { return HasCCPP; }
+
+  bool hasPA() const { return HasPA; }
+  bool hasJS() const { return HasJS; }
+  bool hasCCIDX() const { return HasCCIDX; }
+  bool hasComplxNum() const { return HasComplxNum; }
+
+  bool hasNV() const { return HasNV; }
+  bool hasRASv8_4() const { return HasRASv8_4; }
+  bool hasMPAM() const { return HasMPAM; }
+  bool hasDIT() const { return HasDIT; }
+  bool hasTRACEV8_4() const { return HasTRACEV8_4; }
+  bool hasAM() const { return HasAM; }
+  bool hasSEL2() const { return HasSEL2; }
+  bool hasTLB_RMI() const { return HasTLB_RMI; }
+  bool hasFMI() const { return HasFMI; }
+  bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+
   bool useSmallAddressing() const {
     switch (TLInfo.getTargetMachine().getCodeModel()) {
       case CodeModel::Kernel:
@@ -346,6 +439,8 @@ public:
   bool isCallingConvWin64(CallingConv::ID CC) const {
     switch (CC) {
     case CallingConv::C:
+    case CallingConv::Fast:
+    case CallingConv::Swift:
       return isTargetWindows();
     case CallingConv::Win64:
       return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index dbc4deaf3f9f..a804fb11175b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -15,6 +15,25 @@
 include "llvm/TableGen/SearchableTable.td"
 
 //===----------------------------------------------------------------------===//
+// Features that, for the compiler, only enable system operands and PStates
+//===----------------------------------------------------------------------===//
+
+def HasCCPP    : Predicate<"Subtarget->hasCCPP()">,
+                 AssemblerPredicate<"FeatureCCPP", "ccpp">;
+
+def HasPAN     : Predicate<"Subtarget->hasPAN()">,
+                 AssemblerPredicate<"FeaturePAN",
+                 "ARM v8.1  Privileged Access-Never extension">;
+
+def HasPsUAO   : Predicate<"Subtarget->hasPsUAO()">,
+                 AssemblerPredicate<"FeaturePsUAO",
+                 "ARM v8.2 UAO PState extension (psuao)">;
+
+def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
+                 AssemblerPredicate<"FeaturePAN_RWV",
+                 "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
+
+//===----------------------------------------------------------------------===//
 // AT (address translate) instruction options.
 //===----------------------------------------------------------------------===//
 
@@ -45,7 +64,7 @@ def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
 def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
 def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
 
-let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
 def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
 def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
 }
@@ -102,9 +121,33 @@ def : DC<"CVAU",  0b011, 0b0111, 0b1011, 0b001>;
 def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
 def : DC<"CISW",  0b000, 0b0111, 0b1110, 0b010>;
 
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeatureCCPP} }] in
 def : DC<"CVAP",  0b011, 0b0111, 0b1100, 0b001>;
 
+let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
+def : DC<"CVADP",  0b011, 0b0111, 0b1101, 0b001>;
+
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : DC<"IGVAC",   0b000, 0b0111, 0b0110, 0b011>;
+def : DC<"IGSW",    0b000, 0b0111, 0b0110, 0b100>;
+def : DC<"CGSW",    0b000, 0b0111, 0b1010, 0b100>;
+def : DC<"CIGSW",   0b000, 0b0111, 0b1110, 0b100>;
+def : DC<"CGVAC",   0b011, 0b0111, 0b1010, 0b011>;
+def : DC<"CGVAP",   0b011, 0b0111, 0b1100, 0b011>;
+def : DC<"CGVADP",  0b011, 0b0111, 0b1101, 0b011>;
+def : DC<"CIGVAC",  0b011, 0b0111, 0b1110, 0b011>;
+def : DC<"GVA",     0b011, 0b0111, 0b0100, 0b011>;
+def : DC<"IGDVAC",  0b000, 0b0111, 0b0110, 0b101>;
+def : DC<"IGDSW",   0b000, 0b0111, 0b0110, 0b110>;
+def : DC<"CGDSW",   0b000, 0b0111, 0b1010, 0b110>;
+def : DC<"CIGDSW",  0b000, 0b0111, 0b1110, 0b110>;
+def : DC<"CGDVAC",  0b011, 0b0111, 0b1010, 0b101>;
+def : DC<"CGDVAP",  0b011, 0b0111, 0b1100, 0b101>;
+def : DC<"CGDVADP", 0b011, 0b0111, 0b1101, 0b101>;
+def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
+def : DC<"GZVA",    0b011, 0b0111, 0b0100, 0b100>;
+}
+
 //===----------------------------------------------------------------------===//
 // IC (instruction cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@@ -154,7 +197,7 @@ class TSB<string name, bits<4> encoding> : SearchableTable{
   bits<4> Encoding;
   let Encoding = encoding;
 
-  code Requires = [{ {AArch64::HasV8_4aOps} }];
+  code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
 }
 
 def : TSB<"csync", 0>;
@@ -290,14 +333,21 @@ def : PState<"SPSel",   0b00101>;
 def : PState<"DAIFSet", 0b11110>;
 def : PState<"DAIFClr", 0b11111>;
 // v8.1a "Privileged Access Never" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
 def : PState<"PAN",     0b00100>;
+
 // v8.2a "User Access Override" extension-specific PStates
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
 def : PState<"UAO",     0b00011>;
 // v8.4a timining insensitivity of data processing instructions
-let Requires = [{ {AArch64::HasV8_4aOps} }] in
+let Requires = [{ {AArch64::FeatureDIT} }] in
 def : PState<"DIT",     0b11010>;
+// v8.5a Spectre Mitigation
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : PState<"SSBS",    0b11001>;
+// v8.5a Memory Tagging Extension
+let Requires = [{ {AArch64::FeatureMTE} }] in
+def : PState<"TCO",     0b11100>;
 
 //===----------------------------------------------------------------------===//
 // PSB instruction options.
@@ -315,6 +365,23 @@ class PSB<string name, bits<5> encoding> : SearchableTable {
 def : PSB<"csync", 0x11>;
 
 //===----------------------------------------------------------------------===//
+// BTI instruction options.
+//===----------------------------------------------------------------------===//
+
+class BTI<string name, bits<2> encoding> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<2> Encoding;
+  let Encoding = encoding;
+}
+
+def : BTI<"c",  0b01>;
+def : BTI<"j",  0b10>;
+def : BTI<"jc", 0b11>;
+
+//===----------------------------------------------------------------------===//
 // TLBI (translation lookaside buffer invalidate) instruction options.
 //===----------------------------------------------------------------------===//
 
@@ -366,8 +433,9 @@ def : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>;
 def : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
 def : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
 
+// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
+let Requires = [{ {AArch64::FeatureTLB_RMI} }] in {
 // Armv8.4-A Outer Sharable TLB Maintenance instructions:
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
 //                         op1    CRn     CRm     op2
 def : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
 def : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
@@ -418,6 +486,23 @@ def : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>;
 def : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
 def : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
 def : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
+} //FeatureTLB_RMI
+
+// Armv8.5-A Prediction Restriction by Context instruction options:
+class PRCTX<string name, bits<4> crm> : SearchableTable {
+  let SearchableFields = ["Name", "Encoding"];
+  let EnumValueField = "Encoding";
+
+  string Name = name;
+  bits<11> Encoding;
+  let Encoding{10-4} = 0b0110111;
+  let Encoding{3-0} = crm;
+  bit NeedsReg = 1;
+  code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeaturePredRes} }] in {
+def : PRCTX<"RCTX", 0b0011>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -476,8 +561,10 @@ def : ROSysReg<"PMCEID0_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b110>;
 def : ROSysReg<"PMCEID1_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b111>;
 def : ROSysReg<"MIDR_EL1",           0b11, 0b000, 0b0000, 0b0000, 0b000>;
 def : ROSysReg<"CCSIDR_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b000>;
+
+//v8.3 CCIDX - extending the CCsIDr number of sets
 def : ROSysReg<"CCSIDR2_EL1",        0b11, 0b001, 0b0000, 0b0000, 0b010> {
-  let Requires = [{ {AArch64::HasV8_3aOps} }];
+  let Requires = [{ {AArch64::FeatureCCIDX} }];
 }
 def : ROSysReg<"CLIDR_EL1",          0b11, 0b001, 0b0000, 0b0000, 0b001>;
 def : ROSysReg<"CTR_EL0",            0b11, 0b011, 0b0000, 0b0000, 0b001>;
@@ -487,6 +574,9 @@ def : ROSysReg<"AIDR_EL1",           0b11, 0b001, 0b0000, 0b0000, 0b111>;
 def : ROSysReg<"DCZID_EL0",          0b11, 0b011, 0b0000, 0b0000, 0b111>;
 def : ROSysReg<"ID_PFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b000>;
 def : ROSysReg<"ID_PFR1_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b001>;
+def : ROSysReg<"ID_PFR2_EL1",        0b11, 0b000, 0b0000, 0b0011, 0b100> {
+    let Requires = [{ {AArch64::FeatureSpecRestrict} }];
+}
 def : ROSysReg<"ID_DFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b010>;
 def : ROSysReg<"ID_AFR0_EL1",        0b11, 0b000, 0b0000, 0b0001, 0b011>;
 def : ROSysReg<"ID_MMFR0_EL1",       0b11, 0b000, 0b0000, 0b0001, 0b100>;
@@ -512,9 +602,7 @@ def : ROSysReg<"ID_AA64ISAR0_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b000>;
 def : ROSysReg<"ID_AA64ISAR1_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b001>;
 def : ROSysReg<"ID_AA64MMFR0_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b000>;
 def : ROSysReg<"ID_AA64MMFR1_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b001>;
-def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010> {
-  let Requires = [{ {AArch64::HasV8_2aOps} }];
-}
+def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010>;
 def : ROSysReg<"MVFR0_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b000>;
 def : ROSysReg<"MVFR1_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b001>;
 def : ROSysReg<"MVFR2_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b010>;
@@ -584,7 +672,7 @@ def : ROSysReg<"ID_AA64ZFR0_EL1",    0b11, 0b000, 0b0000, 0b0100, 0b100>;
 
 // v8.1a "Limited Ordering Regions" extension-specific system register
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeatureLOR} }] in
 def : ROSysReg<"LORID_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b111>;
 
 // v8.2a "RAS extension" registers
@@ -594,6 +682,22 @@ def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
 def : ROSysReg<"ERXFR_EL1",  0b11, 0b000, 0b0101, 0b0100, 0b000>;
 }
 
+// v8.5a "random number" registers
+//                       Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureRandGen} }] in {
+def : ROSysReg<"RNDR",   0b11, 0b011, 0b0010, 0b0100, 0b000>;
+def : ROSysReg<"RNDRRS", 0b11, 0b011, 0b0010, 0b0100, 0b001>;
+}
+
+// v8.5a Software Context Number registers
+let Requires = [{ {AArch64::FeatureSpecRestrict} }] in {
+def : RWSysReg<"SCXTNUM_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>;
+def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>;
+}
+
 //===----------------------
 // Write-only regs
 //===----------------------
@@ -1102,21 +1206,21 @@ def : RWSysReg<"ICH_LR14_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b110>;
 def : RWSysReg<"ICH_LR15_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b111>;
 
 // v8.1a "Privileged Access Never" extension-specific system registers
-let Requires = [{ {AArch64::HasV8_1aOps} }] in
+let Requires = [{ {AArch64::FeaturePAN} }] in
 def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
 
 // v8.1a "Limited Ordering Regions" extension-specific system registers
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureLOR} }] in {
 def : RWSysReg<"LORSA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b000>;
 def : RWSysReg<"LOREA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b001>;
 def : RWSysReg<"LORN_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b010>;
 def : RWSysReg<"LORC_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b011>;
 }
 
-// v8.1a "Virtualization hos extensions" system registers
+// v8.1a "Virtualization Host extensions" system registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+let Requires = [{ {AArch64::FeatureVH} }] in {
 def : RWSysReg<"TTBR1_EL2",       0b11, 0b100, 0b0010, 0b0000, 0b001>;
 def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
 def : RWSysReg<"CNTHV_TVAL_EL2",  0b11, 0b100, 0b1110, 0b0011, 0b000>;
@@ -1147,7 +1251,7 @@ def : RWSysReg<"ELR_EL12",        0b11, 0b101, 0b0100, 0b0000, 0b001>;
 }
 // v8.2a registers
 //                  Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_2aOps} }] in
+let Requires = [{ {AArch64::FeaturePsUAO} }] in
 def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;
 
 // v8.2a "Statistical Profiling extension" registers
@@ -1184,7 +1288,7 @@ def : RWSysReg<"VSESR_EL2",     0b11, 0b100, 0b0101, 0b0010, 0b011>;
 
 // v8.3a "Pointer authentication extension" registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+let Requires = [{ {AArch64::FeaturePA} }] in {
 def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
 def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1197,8 +1301,8 @@ def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
 def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
 }
 
-let Requires = [{ {AArch64::HasV8_4aOps} }] in {
-
+// v8.4 "Secure Exception Level 2 extension"
+let Requires = [{ {AArch64::FeatureSEL2} }] in {
 // v8.4a "Virtualization secure second stage translation" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
@@ -1216,18 +1320,22 @@ def : RWSysReg<"CNTHPS_CTL_EL2",  0b11, 0b100, 0b1110, 0b0101, 0b001>;
 // v8.4a "Virtualization debug state" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
+} // FeatureSEL2
 
 // v8.4a RAS registers
-//                              Op0   Op1    CRn     CRm    Op2
+//                              Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
 def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
 def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
 def : RWSysReg<"ERXTS_EL1",     0b11, 0b000, 0b0101, 0b0101, 0b111>;
 def : RWSysReg<"ERXMISC2_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b010>;
 def : RWSysReg<"ERXMISC3_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b011>;
 def : ROSysReg<"ERXPFGF_EL1",   0b11, 0b000, 0b0101, 0b0100, 0b100>;
+} // FeatureRASv8_4
 
 // v8.4a MPAM registers
 //                             Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMPAM} }] in {
 def : RWSysReg<"MPAM0_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b001>;
 def : RWSysReg<"MPAM1_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b000>;
 def : RWSysReg<"MPAM2_EL2",    0b11, 0b100, 0b1010, 0b0101, 0b000>;
@@ -1244,9 +1352,11 @@ def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
 def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
 def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
 def : ROSysReg<"MPAMIDR_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b100>;
+} //FeatureMPAM
 
-// v8.4a Activitiy monitor registers
+// v8.4a Activitiy Monitor registers
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureAM} }] in {
 def : RWSysReg<"AMCR_EL0",         0b11, 0b011, 0b1101, 0b0010, 0b000>;
 def : ROSysReg<"AMCFGR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b001>;
 def : ROSysReg<"AMCGCR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b010>;
@@ -1295,6 +1405,7 @@ def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
 def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
 def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
 def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
+} //FeatureAM
 
 // v8.4a Trace Extension registers
 //
@@ -1303,19 +1414,24 @@ def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
 // but they are already defined above.
 //
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
 def : RWSysReg<"TRFCR_EL1",        0b11, 0b000, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL2",        0b11, 0b100, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL12",       0b11, 0b101, 0b0001, 0b0010, 0b001>;
+} //FeatureTRACEV8_4
 
 // v8.4a Timining insensitivity of data processing instructions
+// DIT: Data Independent Timing instructions
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureDIT} }] in {
 def : RWSysReg<"DIT",              0b11, 0b011, 0b0100, 0b0010, 0b101>;
+} //FeatureDIT
 
 // v8.4a Enhanced Support for Nested Virtualization
 //                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureNV} }] in {
 def : RWSysReg<"VNCR_EL2",         0b11, 0b100, 0b0010, 0b0010, 0b000>;
-
-} // HasV8_4aOps
+} //FeatureNV
 
 // SVE control registers
 //                                 Op0   Op1    CRn     CRm     Op2
@@ -1326,6 +1442,24 @@ def : RWSysReg<"ZCR_EL3",          0b11, 0b110, 0b0001, 0b0010, 0b000>;
 def : RWSysReg<"ZCR_EL12",         0b11, 0b101, 0b0001, 0b0010, 0b000>;
 }
 
+// V8.5a Spectre mitigation SSBS register
+//                     Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureSSBS} }] in
+def : RWSysReg<"SSBS", 0b11, 0b011, 0b0100, 0b0010, 0b110>;
+
+// v8.5a Memory Tagging Extension
+//                                 Op0   Op1    CRn     CRm     Op2
+let Requires = [{ {AArch64::FeatureMTE} }] in {
+def : RWSysReg<"TCO",              0b11, 0b011, 0b0100, 0b0010, 0b111>;
+def : RWSysReg<"GCR_EL1",          0b11, 0b000, 0b0001, 0b0000, 0b110>;
+def : RWSysReg<"RGSR_EL1",         0b11, 0b000, 0b0001, 0b0000, 0b101>;
+def : RWSysReg<"TFSR_EL1",         0b11, 0b000, 0b0110, 0b0101, 0b000>;
+def : RWSysReg<"TFSR_EL2",         0b11, 0b100, 0b0110, 0b0101, 0b000>;
+def : RWSysReg<"TFSR_EL3",         0b11, 0b110, 0b0110, 0b0110, 0b000>;
+def : RWSysReg<"TFSR_EL12",        0b11, 0b101, 0b0110, 0b0110, 0b000>;
+def : RWSysReg<"TFSRE0_EL1",       0b11, 0b000, 0b0110, 0b0110, 0b001>;
+} // HasMTE
+
 // Cyclone specific system registers
 //                                 Op0    Op1     CRn     CRm    Op2
 let Requires = [{ {AArch64::ProcCyclone} }] in
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 120d71381c67..4e016525f7e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
@@ -123,6 +124,10 @@ static cl::opt<bool>
     BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
                      cl::desc("Relax out of range conditional branches"));
 
+static cl::opt<bool> EnableCompressJumpTables(
+    "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
+    cl::desc("Use smallest entry possible for jump tables"));
+
 // FIXME: Unify control over GlobalMerge.
 static cl::opt<cl::boolOrDefault>
     EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
@@ -141,6 +146,11 @@ static cl::opt<int> EnableGlobalISelAtO(
 static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
                                          cl::init(true), cl::Hidden);
 
+static cl::opt<bool>
+    EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
+                        cl::desc("Enable the AAcrh64 branch target pass"),
+                        cl::init(true));
+
 extern "C" void LLVMInitializeAArch64Target() {
   // Register the target.
   RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -151,19 +161,23 @@ extern "C" void LLVMInitializeAArch64Target() {
   initializeAArch64A53Fix835769Pass(*PR);
   initializeAArch64A57FPLoadBalancingPass(*PR);
   initializeAArch64AdvSIMDScalarPass(*PR);
+  initializeAArch64BranchTargetsPass(*PR);
   initializeAArch64CollectLOHPass(*PR);
+  initializeAArch64CompressJumpTablesPass(*PR);
   initializeAArch64ConditionalComparesPass(*PR);
   initializeAArch64ConditionOptimizerPass(*PR);
   initializeAArch64DeadRegisterDefinitionsPass(*PR);
   initializeAArch64ExpandPseudoPass(*PR);
   initializeAArch64LoadStoreOptPass(*PR);
   initializeAArch64SIMDInstrOptPass(*PR);
+  initializeAArch64PreLegalizerCombinerPass(*PR);
   initializeAArch64PromoteConstantPass(*PR);
   initializeAArch64RedundantCopyEliminationPass(*PR);
   initializeAArch64StorePairSuppressPass(*PR);
   initializeFalkorHWPFFixPass(*PR);
   initializeFalkorMarkStridedAccessesLegacyPass(*PR);
   initializeLDTLSCleanupPass(*PR);
+  initializeAArch64SpeculationHardeningPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -206,18 +220,20 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
-                                              Optional<CodeModel::Model> CM,
-                                              bool JIT) {
+static CodeModel::Model
+getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
+                             bool JIT) {
   if (CM) {
-    if (*CM != CodeModel::Small && *CM != CodeModel::Large) {
+    if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
+        *CM != CodeModel::Large) {
       if (!TT.isOSFuchsia())
         report_fatal_error(
-            "Only small and large code models are allowed on AArch64");
-      else if (CM != CodeModel::Kernel)
-        report_fatal_error(
-            "Only small, kernel, and large code models are allowed on AArch64");
-    }
+            "Only small, tiny and large code models are allowed on AArch64");
+      else if (*CM != CodeModel::Kernel)
+        report_fatal_error("Only small, tiny, kernel, and large code models "
+                           "are allowed on AArch64");
+    } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
+      report_fatal_error("tiny code model is only supported on ELF");
     return *CM;
   }
   // The default MCJIT memory managers make no guarantees about where they can
@@ -240,7 +256,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(T,
                         computeDataLayout(TT, Options.MCOptions, LittleEndian),
                         TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
-                        getEffectiveCodeModel(TT, CM, JIT), OL),
+                        getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
       TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
   initAsmInfo();
 
@@ -249,9 +265,21 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
     this->Options.NoTrapAfterNoreturn = true;
   }
 
+  if (getMCAsmInfo()->usesWindowsCFI()) {
+    // Unwinding can get confused if the last instruction in an
+    // exception-handling region (function, funclet, try block, etc.)
+    // is a call.
+    //
+    // FIXME: We could elide the trap if the next instruction would be in
+    // the same region anyway.
+    this->Options.TrapUnreachable = true;
+  }
+
   // Enable GlobalISel at or below EnableGlobalISelAt0.
-  if (getOptLevel() <= EnableGlobalISelAtO)
+  if (getOptLevel() <= EnableGlobalISelAtO) {
     setGlobalISel(true);
+    setGlobalISelAbort(GlobalISelAbortMode::Disable);
+  }
 
   // AArch64 supports the MachineOutliner.
   setMachineOutliner(true);
@@ -346,6 +374,7 @@ public:
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addIRTranslator() override;
+  void addPreLegalizeMachineIR() override;
   bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
   void addPreGlobalInstructionSelect() override;
@@ -393,8 +422,10 @@ void AArch64PassConfig::addIRPasses() {
   TargetPassConfig::addIRPasses();
 
   // Match interleaved memory accesses to ldN/stN intrinsics.
-  if (TM->getOptLevel() != CodeGenOpt::None)
+  if (TM->getOptLevel() != CodeGenOpt::None) {
+    addPass(createInterleavedLoadCombinePass());
     addPass(createInterleavedAccessPass());
+  }
 
   if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
@@ -447,6 +478,10 @@ bool AArch64PassConfig::addIRTranslator() {
   return false;
 }
 
+void AArch64PassConfig::addPreLegalizeMachineIR() {
+  addPass(createAArch64PreLegalizeCombiner());
+}
+
 bool AArch64PassConfig::addLegalizeMachineIR() {
   addPass(new Legalizer());
   return false;
@@ -516,12 +551,28 @@ void AArch64PassConfig::addPreSched2() {
   if (TM->getOptLevel() != CodeGenOpt::None) {
     if (EnableLoadStoreOpt)
       addPass(createAArch64LoadStoreOptimizationPass());
+  }
+
+  // The AArch64SpeculationHardeningPass destroys dominator tree and natural
+  // loop info, which is needed for the FalkorHWPFFixPass and also later on.
+  // Therefore, run the AArch64SpeculationHardeningPass before the
+  // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
+  // info.
+  addPass(createAArch64SpeculationHardeningPass());
+
+  if (TM->getOptLevel() != CodeGenOpt::None) {
     if (EnableFalkorHWPFFix)
       addPass(createFalkorHWPFFixPass());
   }
 }
 
 void AArch64PassConfig::addPreEmitPass() {
+  // Machine Block Placement might have created new opportunities when run
+  // at O3, where the Tail Duplication Threshold is set to 4 instructions.
+  // Run the load/store optimizer once more.
+  if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
+    addPass(createAArch64LoadStoreOptimizationPass());
+
   if (EnableA53Fix835769)
     addPass(createAArch64A53Fix835769());
   // Relax conditional branch instructions if they're otherwise out of
@@ -529,6 +580,12 @@ void AArch64PassConfig::addPreEmitPass() {
   if (BranchRelaxation)
     addPass(&BranchRelaxationPassID);
 
+  if (EnableBranchTargets)
+    addPass(createAArch64BranchTargetsPass());
+
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+    addPass(createAArch64CompressJumpTablesPass());
+
   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
       TM->getTargetTriple().isOSBinFormatMachO())
     addPass(createAArch64CollectLOHPass());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4bc2c060a068..8ae72a7ddb57 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -22,6 +22,9 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
                                              const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
+  // AARCH64 ELF ABI does not define static relocation type for TLS offset
+  // within a module.  Do not generate AT_location for TLS variables.
+  SupportDebugThreadLocalLocation = false;
 }
 
 AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 96e751e86971..a256cb7c9215 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -659,11 +659,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                                unsigned Factor,
                                                ArrayRef<unsigned> Indices,
                                                unsigned Alignment,
-                                               unsigned AddressSpace) {
+                                               unsigned AddressSpace,
+                                               bool UseMaskForCond,
+                                               bool UseMaskForGaps) {
   assert(Factor >= 2 && "Invalid interleave factor");
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
-  if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+  if (!UseMaskForCond && !UseMaskForGaps && 
+      Factor <= TLI->getMaxSupportedInterleaveFactor()) {
     unsigned NumElts = VecTy->getVectorNumElements();
     auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
 
@@ -676,7 +679,8 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   }
 
   return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+                                           Alignment, AddressSpace,
+                                           UseMaskForCond, UseMaskForGaps);
 }
 
 int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
@@ -945,9 +949,20 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
 
 int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                    Type *SubTp) {
-  if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select ||
-      Kind == TTI::SK_PermuteSingleSrc) {
+  if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
+      Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
     static const CostTblEntry ShuffleTbl[] = {
+      // Broadcast shuffle kinds can be performed with 'dup'.
+      { TTI::SK_Broadcast, MVT::v8i8,  1 },
+      { TTI::SK_Broadcast, MVT::v16i8, 1 },
+      { TTI::SK_Broadcast, MVT::v4i16, 1 },
+      { TTI::SK_Broadcast, MVT::v8i16, 1 },
+      { TTI::SK_Broadcast, MVT::v2i32, 1 },
+      { TTI::SK_Broadcast, MVT::v4i32, 1 },
+      { TTI::SK_Broadcast, MVT::v2i64, 1 },
+      { TTI::SK_Broadcast, MVT::v2f32, 1 },
+      { TTI::SK_Broadcast, MVT::v4f32, 1 },
+      { TTI::SK_Broadcast, MVT::v2f64, 1 },
       // Transpose shuffle kinds can be performed with 'trn1/trn2' and
       // 'zip1/zip2' instructions.
       { TTI::SK_Transpose, MVT::v8i8,  1 },
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c056a7d2428b..08c1a8924220 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -146,7 +146,9 @@ public:
 
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace);
+                                 unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
 
   bool
   shouldConsiderAddressTypePromotion(const Instruction &I,
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 30a9a08f2346..6cc9b67e4d27 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -39,6 +39,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -164,6 +165,7 @@ private:
                       OperandVector &Operands);
 
   bool parseDirectiveArch(SMLoc L);
+  bool parseDirectiveArchExtension(SMLoc L);
   bool parseDirectiveCPU(SMLoc L);
   bool parseDirectiveInst(SMLoc L);
 
@@ -174,6 +176,8 @@ private:
 
   bool parseDirectiveReq(StringRef Name, SMLoc L);
   bool parseDirectiveUnreq(SMLoc L);
+  bool parseDirectiveCFINegateRAState();
+  bool parseDirectiveCFIBKeyFrame();
 
   bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
                            SmallVectorImpl<SMLoc> &Loc);
@@ -200,6 +204,7 @@ private:
   template <bool IsSVEPrefetch = false>
   OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
   OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
+  OperandMatchResultTy tryParseBTIHint(OperandVector &Operands);
   OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
   OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
   template<bool AddFPZeroAsLiteral>
@@ -282,6 +287,7 @@ private:
     k_FPImm,
     k_Barrier,
     k_PSBHint,
+    k_BTIHint,
   } Kind;
 
   SMLoc StartLoc, EndLoc;
@@ -385,6 +391,12 @@ private:
     unsigned Val;
   };
 
+  struct BTIHintOp {
+    const char *Data;
+    unsigned Length;
+    unsigned Val;
+  };
+
   struct ExtendOp {
     unsigned Val;
   };
@@ -403,6 +415,7 @@ private:
     struct SysCRImmOp SysCRImm;
     struct PrefetchOp Prefetch;
     struct PSBHintOp PSBHint;
+    struct BTIHintOp BTIHint;
     struct ShiftExtendOp ShiftExtend;
   };
 
@@ -457,6 +470,9 @@ public:
     case k_PSBHint:
       PSBHint = o.PSBHint;
       break;
+    case k_BTIHint:
+      BTIHint = o.BTIHint;
+      break;
     case k_ShiftExtend:
       ShiftExtend = o.ShiftExtend;
       break;
@@ -568,6 +584,16 @@ public:
     return StringRef(PSBHint.Data, PSBHint.Length);
   }
 
+  unsigned getBTIHint() const {
+    assert(Kind == k_BTIHint && "Invalid access!");
+    return BTIHint.Val;
+  }
+
+  StringRef getBTIHintName() const {
+    assert(Kind == k_BTIHint && "Invalid access!");
+    return StringRef(BTIHint.Data, BTIHint.Length);
+  }
+
   StringRef getPrefetchName() const {
     assert(Kind == k_Prefetch && "Invalid access!");
     return StringRef(Prefetch.Data, Prefetch.Length);
@@ -658,7 +684,7 @@ public:
     return DiagnosticPredicateTy::NearMatch;
   }
 
-  bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const {
+  bool isSymbolicUImm12Offset(const MCExpr *Expr) const {
     AArch64MCExpr::VariantKind ELFRefKind;
     MCSymbolRefExpr::VariantKind DarwinRefKind;
     int64_t Addend;
@@ -683,7 +709,7 @@ public:
       // Note that we don't range-check the addend. It's adjusted modulo page
       // size when converted, so there is no "out of range" condition when using
       // @pageoff.
-      return Addend >= 0 && (Addend % Scale) == 0;
+      return true;
     } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
                DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
       // @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
@@ -699,7 +725,7 @@ public:
 
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
     if (!MCE)
-      return isSymbolicUImm12Offset(getImm(), Scale);
+      return isSymbolicUImm12Offset(getImm());
 
     int64_t Val = MCE->getValue();
     return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
@@ -901,7 +927,7 @@ public:
 
     for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
       if (ELFRefKind == AllowedModifiers[i])
-        return Addend == 0;
+        return true;
     }
 
     return false;
@@ -996,7 +1022,8 @@ public:
     if (!isSysReg()) return false;
     return (SysReg.PStateField == AArch64PState::PAN ||
             SysReg.PStateField == AArch64PState::DIT ||
-            SysReg.PStateField == AArch64PState::UAO);
+            SysReg.PStateField == AArch64PState::UAO ||
+            SysReg.PStateField == AArch64PState::SSBS);
   }
 
   bool isSystemPStateFieldWithImm0_15() const {
@@ -1185,6 +1212,7 @@ public:
   bool isSysCR() const { return Kind == k_SysCR; }
   bool isPrefetch() const { return Kind == k_Prefetch; }
   bool isPSBHint() const { return Kind == k_PSBHint; }
+  bool isBTIHint() const { return Kind == k_BTIHint; }
   bool isShiftExtend() const { return Kind == k_ShiftExtend; }
   bool isShifter() const {
     if (!isShiftExtend())
@@ -1702,6 +1730,11 @@ public:
     Inst.addOperand(MCOperand::createImm(getPSBHint()));
   }
 
+  void addBTIHintOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(getBTIHint()));
+  }
+
   void addShifterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     unsigned Imm =
@@ -1950,6 +1983,19 @@ public:
     return Op;
   }
 
+  static std::unique_ptr<AArch64Operand> CreateBTIHint(unsigned Val,
+                                                       StringRef Str,
+                                                       SMLoc S,
+                                                       MCContext &Ctx) {
+    auto Op = make_unique<AArch64Operand>(k_BTIHint, Ctx);
+    Op->BTIHint.Val = Val << 1 | 32;
+    Op->BTIHint.Data = Str.data();
+    Op->BTIHint.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
   static std::unique_ptr<AArch64Operand>
   CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
                     bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -2030,6 +2076,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
     if (!getShiftExtendAmount() && !hasShiftExtendAmount())
       break;
     LLVM_FALLTHROUGH;
+  case k_BTIHint:
+    OS << getBTIHintName();
+    break;
   case k_ShiftExtend:
     OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
        << getShiftExtendAmount();
@@ -2395,6 +2444,29 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+/// tryParseBTIHint - Try to parse a BTI operand, mapped to Hint command
+OperandMatchResultTy
+AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  SMLoc S = getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier)) {
+    TokError("invalid operand for instruction");
+    return MatchOperand_ParseFail;
+  }
+
+  auto BTI = AArch64BTIHint::lookupBTIByName(Tok.getString());
+  if (!BTI) {
+    TokError("invalid operand for instruction");
+    return MatchOperand_ParseFail;
+  }
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(AArch64Operand::CreateBTIHint(
+      BTI->Encoding, Tok.getString(), S, getContext()));
+  return MatchOperand_Success;
+}
+
 /// tryParseAdrpLabel - Parse and validate a source label for the ADRP
 /// instruction.
 OperandMatchResultTy
@@ -2453,17 +2525,34 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
   SMLoc S = getLoc();
   const MCExpr *Expr;
 
-  const AsmToken &Tok = getParser().getTok();
-  if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
-    if (getParser().parseExpression(Expr))
-      return MatchOperand_ParseFail;
+  // Leave anything with a bracket to the default for SVE
+  if (getParser().getTok().is(AsmToken::LBrac))
+    return MatchOperand_NoMatch;
 
-    SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
-    Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+  if (getParser().getTok().is(AsmToken::Hash))
+    getParser().Lex(); // Eat hash token.
 
-    return MatchOperand_Success;
+  if (parseSymbolicImmVal(Expr))
+    return MatchOperand_ParseFail;
+
+  AArch64MCExpr::VariantKind ELFRefKind;
+  MCSymbolRefExpr::VariantKind DarwinRefKind;
+  int64_t Addend;
+  if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+    if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
+        ELFRefKind == AArch64MCExpr::VK_INVALID) {
+      // No modifier was specified at all; this is the syntax for an ELF basic
+      // ADR relocation (unfortunately).
+      Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, getContext());
+    } else {
+      Error(S, "unexpected adr label");
+      return MatchOperand_ParseFail;
+    }
   }
-  return MatchOperand_NoMatch;
+
+  SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+  Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+  return MatchOperand_Success;
 }
 
 /// tryParseFPImm - A floating point immediate expression operand.
@@ -2723,6 +2812,34 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+static const struct Extension {
+  const char *Name;
+  const FeatureBitset Features;
+} ExtensionMap[] = {
+    {"crc", {AArch64::FeatureCRC}},
+    {"sm4", {AArch64::FeatureSM4}},
+    {"sha3", {AArch64::FeatureSHA3}},
+    {"sha2", {AArch64::FeatureSHA2}},
+    {"aes", {AArch64::FeatureAES}},
+    {"crypto", {AArch64::FeatureCrypto}},
+    {"fp", {AArch64::FeatureFPARMv8}},
+    {"simd", {AArch64::FeatureNEON}},
+    {"ras", {AArch64::FeatureRAS}},
+    {"lse", {AArch64::FeatureLSE}},
+    {"predres", {AArch64::FeaturePredRes}},
+    {"ccdp", {AArch64::FeatureCacheDeepPersist}},
+    {"mte", {AArch64::FeatureMTE}},
+    {"tlb-rmi", {AArch64::FeatureTLB_RMI}},
+    {"pan-rwv", {AArch64::FeaturePAN_RWV}},
+    {"ccpp", {AArch64::FeatureCCPP}},
+    {"sve", {AArch64::FeatureSVE}},
+    // FIXME: Unsupported extensions
+    {"pan", {}},
+    {"lor", {}},
+    {"rdma", {}},
+    {"profile", {}},
+};
+
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
   if (FBS[AArch64::HasV8_1aOps])
     Str += "ARMv8.1a";
@@ -2732,8 +2849,18 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
     Str += "ARMv8.3a";
   else if (FBS[AArch64::HasV8_4aOps])
     Str += "ARMv8.4a";
-  else
-    Str += "(unknown)";
+  else if (FBS[AArch64::HasV8_5aOps])
+    Str += "ARMv8.5a";
+  else {
+    auto ext = std::find_if(std::begin(ExtensionMap),
+      std::end(ExtensionMap),
+      [&](const Extension& e)
+      // Use & in case multiple features are enabled
+      { return (FBS & e.Features) != FeatureBitset(); }
+    );
+
+    Str += ext != std::end(ExtensionMap) ? ext->Name : "(unknown)";
+  }
 }
 
 void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands,
@@ -2812,6 +2939,23 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
       return TokError(Str.c_str());
     }
     createSysAlias(TLBI->Encoding, Operands, S);
+  } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp") {
+    const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByName(Op);
+    if (!PRCTX)
+      return TokError("invalid operand for prediction restriction instruction");
+    else if (!PRCTX->haveFeatures(getSTI().getFeatureBits())) {
+      std::string Str(
+          Mnemonic.upper() + std::string(PRCTX->Name) + " requires ");
+      setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
+      return TokError(Str.c_str());
+    }
+    uint16_t PRCTX_Op2 =
+      Mnemonic == "cfp" ? 4 :
+      Mnemonic == "dvp" ? 5 :
+      Mnemonic == "cpp" ? 7 :
+      0;
+    assert(PRCTX_Op2 && "Invalid mnemonic for prediction restriction instruction");
+    createSysAlias(PRCTX->Encoding << 3 | PRCTX_Op2 , Operands, S);
   }
 
   Parser.Lex(); // Eat operand.
@@ -3630,8 +3774,10 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
   size_t Start = 0, Next = Name.find('.');
   StringRef Head = Name.slice(Start, Next);
 
-  // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
-  if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
+  // IC, DC, AT, TLBI and Prediction invalidation instructions are aliases for
+  // the SYS instruction.
+  if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" ||
+      Head == "cfp" || Head == "dvp" || Head == "cpp")
     return parseSysAlias(Head, NameLoc, Operands);
 
   Operands.push_back(
@@ -3685,13 +3831,9 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
 
   // Read the remaining operands.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    // Read the first operand.
-    if (parseOperand(Operands, false, false)) {
-      return true;
-    }
 
-    unsigned N = 2;
-    while (parseOptionalToken(AsmToken::Comma)) {
+    unsigned N = 1;
+    do {
       // Parse and remember the operand.
       if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
                                      (N == 3 && condCodeThirdOperand) ||
@@ -3719,7 +3861,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
             AArch64Operand::CreateToken("!", false, ELoc, getContext()));
 
       ++N;
-    }
+    } while (parseOptionalToken(AsmToken::Comma));
   }
 
   if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
@@ -3956,6 +4098,15 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
                    "unpredictable STXP instruction, status is also a source");
     break;
   }
+  case AArch64::LDGV: {
+    unsigned Rt = Inst.getOperand(0).getReg();
+    unsigned Rn = Inst.getOperand(1).getReg();
+    if (RI->isSubRegisterEq(Rt, Rn)) {
+      return Error(Loc[0],
+                  "unpredictable LDGV instruction, writeback register is also "
+                  "the target register");
+    }
+  }
   }
 
 
@@ -4090,6 +4241,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
     return Error(Loc, "index must be an integer in range [-128, 127].");
   case Match_InvalidMemoryIndexedSImm9:
     return Error(Loc, "index must be an integer in range [-256, 255].");
+  case Match_InvalidMemoryIndexed16SImm9:
+    return Error(Loc, "index must be a multiple of 16 in range [-4096, 4080].");
   case Match_InvalidMemoryIndexed8SImm10:
     return Error(Loc, "index must be a multiple of 8 in range [-4096, 4088].");
   case Match_InvalidMemoryIndexed4SImm7:
@@ -4106,6 +4259,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
     return Error(Loc, "index must be a multiple of 2 in range [0, 62].");
   case Match_InvalidMemoryIndexed8UImm6:
     return Error(Loc, "index must be a multiple of 8 in range [0, 504].");
+  case Match_InvalidMemoryIndexed16UImm6:
+    return Error(Loc, "index must be a multiple of 16 in range [0, 1008].");
   case Match_InvalidMemoryIndexed4UImm6:
     return Error(Loc, "index must be a multiple of 4 in range [0, 252].");
   case Match_InvalidMemoryIndexed2UImm6:
@@ -4742,10 +4897,12 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidMemoryIndexed2UImm6:
   case Match_InvalidMemoryIndexed4UImm6:
   case Match_InvalidMemoryIndexed8UImm6:
+  case Match_InvalidMemoryIndexed16UImm6:
   case Match_InvalidMemoryIndexedSImm6:
   case Match_InvalidMemoryIndexedSImm5:
   case Match_InvalidMemoryIndexedSImm8:
   case Match_InvalidMemoryIndexedSImm9:
+  case Match_InvalidMemoryIndexed16SImm9:
   case Match_InvalidMemoryIndexed8SImm10:
   case Match_InvalidImm0_1:
   case Match_InvalidImm0_7:
@@ -4874,6 +5031,12 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
     parseDirectiveUnreq(Loc);
   else if (IDVal == ".inst")
     parseDirectiveInst(Loc);
+  else if (IDVal == ".cfi_negate_ra_state")
+    parseDirectiveCFINegateRAState();
+  else if (IDVal == ".cfi_b_key_frame")
+    parseDirectiveCFIBKeyFrame();
+  else if (IDVal == ".arch_extension")
+    parseDirectiveArchExtension(Loc);
   else if (IsMachO) {
     if (IDVal == MCLOHDirectiveName())
       parseDirectiveLOH(IDVal, Loc);
@@ -4884,28 +5047,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
   return false;
 }
 
-static const struct {
-  const char *Name;
-  const FeatureBitset Features;
-} ExtensionMap[] = {
-  { "crc",  {AArch64::FeatureCRC} },
-  { "sm4",  {AArch64::FeatureSM4} },
-  { "sha3", {AArch64::FeatureSHA3} },
-  { "sha2", {AArch64::FeatureSHA2} },
-  { "aes",  {AArch64::FeatureAES} },
-  { "crypto", {AArch64::FeatureCrypto} },
-  { "fp", {AArch64::FeatureFPARMv8} },
-  { "simd", {AArch64::FeatureNEON} },
-  { "ras", {AArch64::FeatureRAS} },
-  { "lse", {AArch64::FeatureLSE} },
-
-  // FIXME: Unsupported extensions
-  { "pan", {} },
-  { "lor", {} },
-  { "rdma", {} },
-  { "profile", {} },
-};
-
 static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
                             SmallVector<StringRef, 4> &RequestedExtensions) {
   const bool NoCrypto =
@@ -4927,6 +5068,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
       RequestedExtensions.push_back("aes");
       break;
     case AArch64::ArchKind::ARMV8_4A:
+    case AArch64::ArchKind::ARMV8_5A:
       RequestedExtensions.push_back("sm4");
       RequestedExtensions.push_back("sha3");
       RequestedExtensions.push_back("sha2");
@@ -4945,6 +5087,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
       RequestedExtensions.push_back("noaes");
       break;
     case AArch64::ArchKind::ARMV8_4A:
+    case AArch64::ArchKind::ARMV8_5A:
       RequestedExtensions.push_back("nosm4");
       RequestedExtensions.push_back("nosha3");
       RequestedExtensions.push_back("nosha2");
@@ -5014,6 +5157,50 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
   return false;
 }
 
+/// parseDirectiveArchExtension
+///   ::= .arch_extension [no]feature
+bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+
+  if (getLexer().isNot(AsmToken::Identifier))
+    return Error(getLexer().getLoc(), "expected architecture extension name");
+
+  const AsmToken &Tok = Parser.getTok();
+  StringRef Name = Tok.getString();
+  SMLoc ExtLoc = Tok.getLoc();
+  Lex();
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.arch_extension' directive"))
+    return true;
+
+  bool EnableFeature = true;
+  if (Name.startswith_lower("no")) {
+    EnableFeature = false;
+    Name = Name.substr(2);
+  }
+
+  MCSubtargetInfo &STI = copySTI();
+  FeatureBitset Features = STI.getFeatureBits();
+  for (const auto &Extension : ExtensionMap) {
+    if (Extension.Name != Name)
+      continue;
+
+    if (Extension.Features.none())
+      return Error(ExtLoc, "unsupported architectural extension: " + Name);
+
+    FeatureBitset ToggleFeatures = EnableFeature
+                                       ? (~Features & Extension.Features)
+                                       : (Features & Extension.Features);
+    uint64_t Features =
+        ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+    setAvailableFeatures(Features);
+    return false;
+  }
+
+  return Error(ExtLoc, "unknown architectural extension: " + Name);
+}
+
 static SMLoc incrementLoc(SMLoc L, int Offset) {
   return SMLoc::getFromPointer(L.getPointer() + Offset);
 }
@@ -5267,6 +5454,23 @@ bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
   return false;
 }
 
+bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
+  getStreamer().EmitCFINegateRAState();
+  return false;
+}
+
+/// parseDirectiveCFIBKeyFrame
+/// ::= .cfi_b_key
+bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.cfi_b_key_frame'"))
+    return true;
+  getStreamer().EmitCFIBKeyFrame();
+  return false;
+}
+
 bool
 AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
                                     AArch64MCExpr::VariantKind &ELFRefKind,
@@ -5288,28 +5492,20 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
     return true;
   }
 
-  const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
-  if (!BE)
-    return false;
-
-  SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
-  if (!SE)
-    return false;
-  DarwinRefKind = SE->getKind();
-
-  if (BE->getOpcode() != MCBinaryExpr::Add &&
-      BE->getOpcode() != MCBinaryExpr::Sub)
+  // Check that it looks like a symbol + an addend
+  MCValue Res;
+  bool Relocatable = Expr->evaluateAsRelocatable(Res, nullptr, nullptr);
+  if (!Relocatable || Res.getSymB())
     return false;
 
-  // See if the addend is a constant, otherwise there's more going
-  // on here than we can deal with.
-  auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
-  if (!AddendExpr)
+  // Treat expressions with an ELFRefKind (like ":abs_g1:3", or
+  // ":abs_g1:x" where x is constant) as symbolic even if there is no symbol.
+  if (!Res.getSymA() && ELFRefKind == AArch64MCExpr::VK_INVALID)
     return false;
 
-  Addend = AddendExpr->getValue();
-  if (BE->getOpcode() == MCBinaryExpr::Sub)
-    Addend = -Addend;
+  if (Res.getSymA())
+    DarwinRefKind = Res.getSymA()->getKind();
+  Addend = Res.getConstant();
 
   // It's some symbol reference + a constant addend, but really
   // shouldn't use both Darwin and ELF syntax.
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index cef0ff346448..4102f1eb5cc1 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -12,7 +12,6 @@
 
 #include "AArch64Disassembler.h"
 #include "AArch64ExternalSymbolizer.h"
-#include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "Utils/AArch64BaseInfo.h"
@@ -20,6 +19,8 @@
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -159,8 +160,8 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
                                                 const void *Decoder);
 static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
                                          uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
-                                        uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
+                                         uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
                                               uint64_t Address,
                                               const void *Decoder);
@@ -219,6 +220,11 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
 static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
                                        uint64_t Addr, const void *Decoder);
 
+static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
+                                                       uint32_t insn,
+                                                       uint64_t address,
+                                                       const void* Decoder);
+
 static bool Check(DecodeStatus &Out, DecodeStatus In) {
   switch (In) {
     case MCDisassembler::Success:
@@ -1402,6 +1408,8 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::STPSpost:
   case AArch64::LDPSpre:
   case AArch64::STPSpre:
+  case AArch64::STGPpre:
+  case AArch64::STGPpost:
     DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
     break;
   }
@@ -1415,6 +1423,8 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::LDPXpre:
   case AArch64::STPXpre:
   case AArch64::LDPSWpre:
+  case AArch64::STGPpre:
+  case AArch64::STGPpost:
     NeedsDisjointWritebackTransfer = true;
     LLVM_FALLTHROUGH;
   case AArch64::LDNPXi:
@@ -1422,6 +1432,7 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::LDPXi:
   case AArch64::STPXi:
   case AArch64::LDPSWi:
+  case AArch64::STGPi:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
     break;
@@ -1652,8 +1663,8 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
   return Success;
 }
 
-static DecodeStatus DecodeBaseAddSubImm(MCInst &Inst, uint32_t insn,
-                                        uint64_t Addr, const void *Decoder) {
+static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
+                                         uint64_t Addr, const void *Decoder) {
   unsigned Rd = fieldFromInstruction(insn, 0, 5);
   unsigned Rn = fieldFromInstruction(insn, 5, 5);
   unsigned Imm = fieldFromInstruction(insn, 10, 14);
@@ -1711,11 +1722,17 @@ static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
   uint64_t op1 = fieldFromInstruction(insn, 16, 3);
   uint64_t op2 = fieldFromInstruction(insn, 5, 3);
   uint64_t crm = fieldFromInstruction(insn, 8, 4);
-
   uint64_t pstate_field = (op1 << 3) | op2;
 
+  switch (pstate_field) {
+  case 0x01: // XAFlag
+  case 0x02: // AXFlag
+    return Fail;
+  }
+
   if ((pstate_field == AArch64PState::PAN  ||
-       pstate_field == AArch64PState::UAO) && crm > 1)
+       pstate_field == AArch64PState::UAO  ||
+       pstate_field == AArch64PState::SSBS) && crm > 1)
     return Fail;
 
   Inst.addOperand(MCOperand::createImm(pstate_field));
@@ -1835,3 +1852,25 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
   Inst.addOperand(MCOperand::createImm(Imm + 1));
   return Success;
 }
+
+static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
+                                                       uint32_t insn,
+                                                       uint64_t address,
+                                                       const void* Decoder) {
+  unsigned Rn = fieldFromInstruction(insn, 5, 5);
+  unsigned Rt = fieldFromInstruction(insn, 0, 5);
+
+  // Outputs
+  DecodeGPR64spRegisterClass(Inst, Rn, address, Decoder);
+  DecodeGPR64RegisterClass(Inst, Rt, address, Decoder);
+
+  // Input (Rn again)
+  Inst.addOperand(Inst.getOperand(0));
+
+  //Do this post decode since the raw number for xzr and sp is the same
+  if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
+    return SoftFail;
+  } else {
+    return Success;
+  }
+}
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 6e64fc9347b9..342655a29b1d 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64ExternalSymbolizer.h"
-#include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -60,6 +60,8 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
     bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+  if (!SymbolLookUp)
+    return false;
   // FIXME: This method shares a lot of code with
   //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
   //        refactor the MCExternalSymbolizer interface to allow more of this
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 26e41215afc6..dcf2dd251149 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -775,8 +775,33 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   if (CnVal == 7) {
     switch (CmVal) {
     default: return false;
+    // Maybe IC, maybe Prediction Restriction
+    case 1:
+      switch (Op1Val) {
+      default: return false;
+      case 0: goto Search_IC;
+      case 3: goto Search_PRCTX;
+      }
+    // Prediction Restriction aliases
+    case 3: {
+      Search_PRCTX:
+      const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByEncoding(Encoding >> 3);
+      if (!PRCTX || !PRCTX->haveFeatures(STI.getFeatureBits()))
+        return false;
+
+      NeedsReg = PRCTX->NeedsReg;
+      switch (Op2Val) {
+      default: return false;
+      case 4: Ins = "cfp\t"; break;
+      case 5: Ins = "dvp\t"; break;
+      case 7: Ins = "cpp\t"; break;
+      }
+      Name = std::string(PRCTX->Name);
+    }
+    break;
     // IC aliases
-    case 1: case 5: {
+    case 5: {
+      Search_IC:
       const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
       if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
         return false;
@@ -787,7 +812,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
     }
     break;
     // DC aliases
-    case 4: case 6: case 10: case 11: case 12: case 14:
+    case 4: case 6: case 10: case 11: case 12: case 13: case 14:
     {
       const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
       if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
@@ -1097,6 +1122,17 @@ void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
     O << '#' << formatImm(psbhintop);
 }
 
+void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned btihintop = (MI->getOperand(OpNum).getImm() ^ 32) >> 1;
+  auto BTI = AArch64BTIHint::lookupBTIByEncoding(btihintop);
+  if (BTI)
+    O << BTI->Name;
+  else
+    O << '#' << formatImm(btihintop);
+}
+
 void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
                                            const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 8dc9264f94a1..4e9982f5b7be 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -131,6 +131,9 @@ protected:
   void printPSBHintOp(const MCInst *MI, unsigned OpNum,
                       const MCSubtargetInfo &STI, raw_ostream &O);
 
+  void printBTIHintOp(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+
   void printFPImmOperand(const MCInst *MI, unsigned OpNum,
                          const MCSubtargetInfo &STI, raw_ostream &O);
 
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 62644ab2f457..688ca755d0b5 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/bit.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
@@ -342,27 +343,23 @@ static inline bool isValidDecodeLogicalImmediate(uint64_t val,
 //
 static inline float getFPImmFloat(unsigned Imm) {
   // We expect an 8-bit binary encoding of a floating-point number here.
-  union {
-    uint32_t I;
-    float F;
-  } FPUnion;
 
   uint8_t Sign = (Imm >> 7) & 0x1;
   uint8_t Exp = (Imm >> 4) & 0x7;
   uint8_t Mantissa = Imm & 0xf;
 
-  //   8-bit FP    iEEEE Float Encoding
+  //   8-bit FP    IEEE Float Encoding
   //   abcd efgh   aBbbbbbc defgh000 00000000 00000000
   //
   // where B = NOT(b);
 
-  FPUnion.I = 0;
-  FPUnion.I |= Sign << 31;
-  FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
-  FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
-  FPUnion.I |= (Exp & 0x3) << 23;
-  FPUnion.I |= Mantissa << 19;
-  return FPUnion.F;
+  uint32_t I = 0;
+  I |= Sign << 31;
+  I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+  I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+  I |= (Exp & 0x3) << 23;
+  I |= Mantissa << 19;
+  return bit_cast<float>(I);
 }
 
 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
@@ -757,12 +754,8 @@ static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
 /// Returns true if Imm is the concatenation of a repeating pattern of type T.
 template <typename T>
 static inline bool isSVEMaskOfIdenticalElements(int64_t Imm) {
-  union {
-    int64_t Whole;
-    T Parts[sizeof(int64_t)/sizeof(T)];
-  } Vec { Imm };
-
-  return all_of(Vec.Parts, [Vec](T Elem) { return Elem == Vec.Parts[0]; });
+  auto Parts = bit_cast<std::array<T, sizeof(int64_t) / sizeof(T)>>(Imm);
+  return all_of(Parts, [&](T Elem) { return Elem == Parts[0]; });
 }
 
 /// Returns true if Imm is valid for CPY/DUP.
@@ -790,29 +783,20 @@ static inline bool isSVEAddSubImm(int64_t Imm) {
 
 /// Return true if Imm is valid for DUPM and has no single CPY/DUP equivalent.
 static inline bool isSVEMoveMaskPreferredLogicalImmediate(int64_t Imm) {
-  union {
-    int64_t D;
-    int32_t S[2];
-    int16_t H[4];
-    int8_t  B[8];
-  } Vec = { Imm };
-
-  if (isSVECpyImm<int64_t>(Vec.D))
+  if (isSVECpyImm<int64_t>(Imm))
     return false;
 
-  if (isSVEMaskOfIdenticalElements<int32_t>(Imm) &&
-      isSVECpyImm<int32_t>(Vec.S[0]))
-    return false;
+  auto S = bit_cast<std::array<int32_t, 2>>(Imm);
+  auto H = bit_cast<std::array<int16_t, 4>>(Imm);
+  auto B = bit_cast<std::array<int8_t, 8>>(Imm);
 
-  if (isSVEMaskOfIdenticalElements<int16_t>(Imm) &&
-      isSVECpyImm<int16_t>(Vec.H[0]))
+  if (isSVEMaskOfIdenticalElements<int32_t>(Imm) && isSVECpyImm<int32_t>(S[0]))
     return false;
-
-  if (isSVEMaskOfIdenticalElements<int8_t>(Imm) &&
-      isSVECpyImm<int8_t>(Vec.B[0]))
+  if (isSVEMaskOfIdenticalElements<int16_t>(Imm) && isSVECpyImm<int16_t>(H[0]))
     return false;
-
-  return isLogicalImmediate(Vec.D, 64);
+  if (isSVEMaskOfIdenticalElements<int8_t>(Imm) && isSVECpyImm<int8_t>(B[0]))
+    return false;
+  return isLogicalImmediate(Imm, 64);
 }
 
 inline static bool isAnyMOVZMovAlias(uint64_t Value, int RegWidth) {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 856946555198..ed89d991d9fb 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
-#include "AArch64RegisterInfo.h"
 #include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
@@ -109,11 +110,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case FK_Data_1:
     return 1;
 
-  case AArch64::fixup_aarch64_movw:
   case FK_Data_2:
   case FK_SecRel_2:
     return 2;
 
+  case AArch64::fixup_aarch64_movw:
   case AArch64::fixup_aarch64_pcrel_branch14:
   case AArch64::fixup_aarch64_add_imm12:
   case AArch64::fixup_aarch64_ldst_imm12_scale1:
@@ -144,9 +145,9 @@ static unsigned AdrImmBits(unsigned Value) {
   return (hi19 << 5) | (lo2 << 29);
 }
 
-static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
-                                 MCContext &Ctx, const Triple &TheTriple,
-                                 bool IsResolved) {
+static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
+                                 uint64_t Value, MCContext &Ctx,
+                                 const Triple &TheTriple, bool IsResolved) {
   unsigned Kind = Fixup.getKind();
   int64_t SignedValue = static_cast<int64_t>(Value);
   switch (Kind) {
@@ -214,10 +215,79 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     if (Value & 0xf)
       Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
     return Value >> 4;
-  case AArch64::fixup_aarch64_movw:
-    Ctx.reportError(Fixup.getLoc(),
-                    "no resolvable MOVZ/MOVK fixups supported yet");
+  case AArch64::fixup_aarch64_movw: {
+    AArch64MCExpr::VariantKind RefKind =
+        static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+    if (AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_ABS &&
+        AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_SABS) {
+      // VK_GOTTPREL, VK_TPREL, VK_DTPREL are movw fixups, but they can't
+      // ever be resolved in the assembler.
+      Ctx.reportError(Fixup.getLoc(),
+                      "relocation for a thread-local variable points to an "
+                      "absolute symbol");
+      return Value;
+    }
+
+    if (!IsResolved) {
+      // FIXME: Figure out when this can actually happen, and verify our
+      // behavior.
+      Ctx.reportError(Fixup.getLoc(), "unresolved movw fixup not yet "
+                                      "implemented");
+      return Value;
+    }
+
+    if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
+      switch (AArch64MCExpr::getAddressFrag(RefKind)) {
+      case AArch64MCExpr::VK_G0:
+        break;
+      case AArch64MCExpr::VK_G1:
+        SignedValue = SignedValue >> 16;
+        break;
+      case AArch64MCExpr::VK_G2:
+        SignedValue = SignedValue >> 32;
+        break;
+      case AArch64MCExpr::VK_G3:
+        SignedValue = SignedValue >> 48;
+        break;
+      default:
+        llvm_unreachable("Variant kind doesn't correspond to fixup");
+      }
+
+    } else {
+      switch (AArch64MCExpr::getAddressFrag(RefKind)) {
+      case AArch64MCExpr::VK_G0:
+        break;
+      case AArch64MCExpr::VK_G1:
+        Value = Value >> 16;
+        break;
+      case AArch64MCExpr::VK_G2:
+        Value = Value >> 32;
+        break;
+      case AArch64MCExpr::VK_G3:
+        Value = Value >> 48;
+        break;
+      default:
+        llvm_unreachable("Variant kind doesn't correspond to fixup");
+      }
+    }
+
+    if (RefKind & AArch64MCExpr::VK_NC) {
+      Value &= 0xFFFF;
+    }
+    else if (RefKind & AArch64MCExpr::VK_SABS) {
+      if (SignedValue > 0xFFFF || SignedValue < -0xFFFF)
+        Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+
+      // Invert the negative immediate because it will feed into a MOVN.
+      if (SignedValue < 0)
+        SignedValue = ~SignedValue;
+      Value = static_cast<uint64_t>(SignedValue);
+    }
+    else if (Value > 0xFFFF) {
+      Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+    }
     return Value;
+  }
   case AArch64::fixup_aarch64_pcrel_branch14:
     // Signed 16-bit immediate
     if (SignedValue > 32767 || SignedValue < -32768)
@@ -294,8 +364,9 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
     return; // Doesn't change encoding.
   MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
   MCContext &Ctx = Asm.getContext();
+  int64_t SignedValue = static_cast<int64_t>(Value);
   // Apply any target-specific value adjustments.
-  Value = adjustFixupValue(Fixup, Value, Ctx, TheTriple, IsResolved);
+  Value = adjustFixupValue(Fixup, Target, Value, Ctx, TheTriple, IsResolved);
 
   // Shift the value into position.
   Value <<= Info.TargetOffset;
@@ -322,6 +393,19 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
       Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
     }
   }
+
+  // FIXME: getFixupKindInfo() and getFixupKindNumBytes() could be fixed to
+  // handle this more cleanly. This may affect the output of -show-mc-encoding.
+  AArch64MCExpr::VariantKind RefKind =
+    static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+  if (RefKind & AArch64MCExpr::VK_SABS) {
+    // If the immediate is negative, generate MOVN else MOVZ.
+    // (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
+    if (SignedValue < 0)
+      Data[Offset + 3] &= ~(1 << 6);
+    else
+      Data[Offset + 3] |= (1 << 6);
+  }
 }
 
 bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst,
@@ -376,6 +460,14 @@ bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   // to the linker -- a relocation!
   if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
     return true;
+
+  AArch64MCExpr::VariantKind RefKind =
+      static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+  AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
+  // LDR GOT relocations need a relocation
+  if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
+      SymLoc == AArch64MCExpr::VK_GOT)
+    return true;
   return false;
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a11e396217af..2ccd7cef8bef 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -138,7 +138,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
       } else
         return ELF::R_AARCH64_PREL64;
     case AArch64::fixup_aarch64_pcrel_adr_imm21:
-      assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation");
+      if (SymLoc != AArch64MCExpr::VK_ABS)
+        Ctx.reportError(Fixup.getLoc(),
+                        "invalid symbol kind for ADR relocation");
       return R_CLS(ADR_PREL_LO21);
     case AArch64::fixup_aarch64_pcrel_adrp_imm21:
       if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
@@ -169,6 +171,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
     case AArch64::fixup_aarch64_ldr_pcrel_imm19:
       if (SymLoc == AArch64MCExpr::VK_GOTTPREL)
         return R_CLS(TLSIE_LD_GOTTPREL_PREL19);
+      if (SymLoc == AArch64MCExpr::VK_GOT)
+        return R_CLS(GOT_LD_PREL19);
       return R_CLS(LD_PREL_LO19);
     case AArch64::fixup_aarch64_pcrel_branch14:
       return R_CLS(TSTBR14);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index c0ef8b670286..9a7e34b0aeb1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -60,16 +60,6 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
   OS << "\t.inst\t0x" << Twine::utohexstr(Inst) << "\n";
 }
 
-class AArch64TargetELFStreamer : public AArch64TargetStreamer {
-private:
-  AArch64ELFStreamer &getStreamer();
-
-  void emitInst(uint32_t Inst) override;
-
-public:
-  AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
-};
-
 /// Extend the generic ELFStreamer class so that it can emit mapping symbols at
 /// the appropriate points in the object files. These symbols are defined in the
 /// AArch64 ELF ABI:
@@ -85,8 +75,6 @@ public:
 /// by MachO. Beware!
 class AArch64ELFStreamer : public MCELFStreamer {
 public:
-  friend class AArch64TargetELFStreamer;
-
   AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
                      std::unique_ptr<MCObjectWriter> OW,
                      std::unique_ptr<MCCodeEmitter> Emitter)
@@ -154,6 +142,11 @@ public:
     MCELFStreamer::EmitValueImpl(Value, Size, Loc);
   }
 
+  void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+                                  SMLoc Loc) override {
+    EmitDataMappingSymbol();
+    MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+  }
 private:
   enum ElfMappingSymbol {
     EMS_None,
@@ -192,6 +185,8 @@ private:
 
 } // end anonymous namespace
 
+namespace llvm {
+
 AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
   return static_cast<AArch64ELFStreamer &>(Streamer);
 }
@@ -200,8 +195,6 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
   getStreamer().emitInst(Inst);
 }
 
-namespace llvm {
-
 MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
                                                  MCInstPrinter *InstPrint,
@@ -221,14 +214,4 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context,
   return S;
 }
 
-MCTargetStreamer *
-createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
-  const Triple &TT = STI.getTargetTriple();
-  if (TT.isOSBinFormatELF())
-    return new AArch64TargetELFStreamer(S);
-  if (TT.isOSBinFormatCOFF())
-    return new AArch64TargetWinCOFFStreamer(S);
-  return nullptr;
-}
-
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index ebb49121c1bf..58e4a9c9a9e9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -115,6 +115,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
 
   CommentString = ";";
   ExceptionsType = ExceptionHandling::WinEH;
+  WinEHEncodingType = WinEH::EncodingType::Itanium;
 }
 
 AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
@@ -131,4 +132,7 @@ AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
 
   CommentString = "//";
   ExceptionsType = ExceptionHandling::DwarfCFI;
+  // The default is dwarf, but WinEH can be enabled optionally, which requires
+  // WinEHEncodingType to be set.
+  WinEHEncodingType = WinEH::EncodingType::Itanium;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index cd937935ddbf..729486b1020c 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64MCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -62,8 +62,10 @@ StringRef AArch64MCExpr::getVariantKindName() const {
   case VK_TLSDESC_LO12:        return ":tlsdesc_lo12:";
   case VK_ABS_PAGE:            return "";
   case VK_ABS_PAGE_NC:         return ":pg_hi21_nc:";
+  case VK_GOT:                 return ":got:";
   case VK_GOT_PAGE:            return ":got:";
   case VK_GOT_LO12:            return ":got_lo12:";
+  case VK_GOTTPREL:            return ":gottprel:";
   case VK_GOTTPREL_PAGE:       return ":gottprel:";
   case VK_GOTTPREL_LO12_NC:    return ":gottprel_lo12:";
   case VK_GOTTPREL_G1:         return ":gottprel_g1:";
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 4ceda7e122f4..0f8198ba4e9b 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -16,6 +16,7 @@
 #include "AArch64MCAsmInfo.h"
 #include "AArch64WinCOFFStreamer.h"
 #include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCInstrAnalysis.h"
@@ -24,12 +25,14 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 
 #define GET_INSTRINFO_MC_DESC
+#define GET_INSTRINFO_MC_HELPERS
 #include "AArch64GenInstrInfo.inc"
 
 #define GET_SUBTARGETINFO_MC_DESC
@@ -153,6 +156,31 @@ public:
     }
     return false;
   }
+
+  std::vector<std::pair<uint64_t, uint64_t>>
+  findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+                 uint64_t GotPltSectionVA,
+                 const Triple &TargetTriple) const override {
+    // Do a lightweight parsing of PLT entries.
+    std::vector<std::pair<uint64_t, uint64_t>> Result;
+    for (uint64_t Byte = 0, End = PltContents.size(); Byte + 7 < End;
+         Byte += 4) {
+      uint32_t Insn = support::endian::read32le(PltContents.data() + Byte);
+      // Check for adrp.
+      if ((Insn & 0x9f000000) != 0x90000000)
+        continue;
+      uint64_t Imm = (((PltSectionVA + Byte) >> 12) << 12) +
+            (((Insn >> 29) & 3) << 12) + (((Insn >> 5) & 0x3ffff) << 14);
+      uint32_t Insn2 = support::endian::read32le(PltContents.data() + Byte + 4);
+      // Check for: ldr Xt, [Xn, #pimm].
+      if (Insn2 >> 22 == 0x3e5) {
+        Imm += ((Insn2 >> 10) & 0xfff) << 3;
+        Result.push_back(std::make_pair(PltSectionVA + Byte, Imm));
+        Byte += 4;
+      }
+    }
+    return Result;
+  }
 };
 
 } // end anonymous namespace
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 63f50778ccdb..0f22f69bd5b0 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -84,6 +84,7 @@ void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
 // Defines symbolic names for the AArch64 instructions.
 //
 #define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
 #include "AArch64GenInstrInfo.inc"
 
 #define GET_SUBTARGETINFO_ENUM
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index dee964df2635..a6b8d963bef9 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -13,6 +13,7 @@
 
 #include "AArch64TargetStreamer.h"
 #include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -52,3 +53,17 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) {
 
   getStreamer().EmitBytes(StringRef(Buffer, 4));
 }
+
+namespace llvm {
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  const Triple &TT = STI.getTargetTriple();
+  if (TT.isOSBinFormatELF())
+    return new AArch64TargetELFStreamer(S);
+  if (TT.isOSBinFormatCOFF())
+    return new AArch64TargetWinCOFFStreamer(S);
+  return nullptr;
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 51432830f795..73fb9baea3e3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -12,6 +12,10 @@
 
 #include "llvm/MC/MCStreamer.h"
 
+namespace {
+class AArch64ELFStreamer;
+}
+
 namespace llvm {
 
 class AArch64TargetStreamer : public MCTargetStreamer {
@@ -33,10 +37,75 @@ public:
   /// Callback used to implement the .inst directive.
   virtual void emitInst(uint32_t Inst);
 
+  virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
+  virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
+  virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
+  virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISetFP() {}
+  virtual void EmitARM64WinCFIAddFP(unsigned Size) {}
+  virtual void EmitARM64WinCFINop() {}
+  virtual void EmitARM64WinCFIPrologEnd() {}
+  virtual void EmitARM64WinCFIEpilogStart() {}
+  virtual void EmitARM64WinCFIEpilogEnd() {}
+
 private:
   std::unique_ptr<AssemblerConstantPools> ConstantPools;
 };
 
+class AArch64TargetELFStreamer : public AArch64TargetStreamer {
+private:
+  AArch64ELFStreamer &getStreamer();
+
+  void emitInst(uint32_t Inst) override;
+
+public:
+  AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
+};
+
+class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
+private:
+  // True if we are processing SEH directives in an epilogue.
+  bool InEpilogCFI = false;
+
+  // Symbol of the current epilog for which we are processing SEH directives.
+  MCSymbol *CurrentEpilog = nullptr;
+public:
+  AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
+    : AArch64TargetStreamer(S) {}
+
+  // The unwind codes on ARM64 Windows are documented at
+  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+  void EmitARM64WinCFIAllocStack(unsigned Size) override;
+  void EmitARM64WinCFISaveFPLR(int Offset) override;
+  void EmitARM64WinCFISaveFPLRX(int Offset) override;
+  void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISetFP() override;
+  void EmitARM64WinCFIAddFP(unsigned Size) override;
+  void EmitARM64WinCFINop() override;
+  void EmitARM64WinCFIPrologEnd() override;
+  void EmitARM64WinCFIEpilogStart() override;
+  void EmitARM64WinCFIEpilogEnd() override;
+private:
+  void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
+};
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 9871dc553bed..b828ab832e9d 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -11,31 +11,184 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
 
 using namespace llvm;
 
 namespace {
 
 class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
-public:
-  friend class AArch64TargetWinCOFFStreamer;
+  Win64EH::ARM64UnwindEmitter EHStreamer;
 
+public:
   AArch64WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB,
                          std::unique_ptr<MCCodeEmitter> CE,
                          std::unique_ptr<MCObjectWriter> OW)
       : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
 
+  void EmitWinEHHandlerData(SMLoc Loc) override;
+  void EmitWindowsUnwindTables() override;
   void FinishImpl() override;
 };
 
+void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
+  MCStreamer::EmitWinEHHandlerData(Loc);
+
+  // We have to emit the unwind info now, because this directive
+  // actually switches to the .xdata section!
+  EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+}
+
+void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
+  if (!getNumWinFrameInfos())
+    return;
+  EHStreamer.Emit(*this);
+}
+
 void AArch64WinCOFFStreamer::FinishImpl() {
   EmitFrames(nullptr);
+  EmitWindowsUnwindTables();
 
   MCWinCOFFStreamer::FinishImpl();
 }
 } // end anonymous namespace
 
 namespace llvm {
+
+// Helper function to common out unwind code setup for those codes that can
+// belong to both prolog and epilog.
+// There are three types of Windows ARM64 SEH codes.  They can
+// 1) take no operands: SEH_Nop, SEH_PrologEnd, SEH_EpilogStart, SEH_EpilogEnd
+// 2) take an offset: SEH_StackAlloc, SEH_SaveFPLR, SEH_SaveFPLR_X
+// 3) take a register and an offset/size: all others
+void AArch64TargetWinCOFFStreamer::EmitARM64WinUnwindCode(unsigned UnwindCode,
+                                                          int Reg,
+                                                          int Offset) {
+  auto &S = getStreamer();
+  WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+  if (!CurFrame)
+    return;
+  MCSymbol *Label = S.EmitCFILabel();
+  auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
+  if (InEpilogCFI)
+    CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+  else
+    CurFrame->Instructions.push_back(Inst);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) {
+  unsigned Op = Win64EH::UOP_AllocSmall;
+  if (Size >= 16384)
+    Op = Win64EH::UOP_AllocLarge;
+  else if (Size >= 512)
+    Op = Win64EH::UOP_AllocMedium;
+  EmitARM64WinUnwindCode(Op, -1, Size);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLRX(int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLRX, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveReg(unsigned Reg,
+                                                          int Offset) {
+  assert(Offset >= 0 && Offset <= 504 &&
+        "Offset for save reg should be >= 0 && <= 504");
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveReg, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegX(unsigned Reg,
+                                                           int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegP(unsigned Reg,
+                                                           int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegP, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg,
+                                                            int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg,
+                                                           int Offset) {
+  assert(Offset >= 0 && Offset <= 504 &&
+        "Offset for save reg should be >= 0 && <= 504");
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveFReg, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegX(unsigned Reg,
+                                                            int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegP(unsigned Reg,
+                                                            int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegP, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFRegPX(unsigned Reg,
+                                                             int Offset) {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveFRegPX, Reg, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISetFP() {
+  EmitARM64WinUnwindCode(Win64EH::UOP_SetFP, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAddFP(unsigned Offset) {
+  assert(Offset <= 2040 && "UOP_AddFP must have offset <= 2040");
+  EmitARM64WinUnwindCode(Win64EH::UOP_AddFP, -1, Offset);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() {
+  EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0);
+}
+
+// The functions below handle opcodes that can end up in either a prolog or
+// an epilog, but not both.
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
+  auto &S = getStreamer();
+  WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+  if (!CurFrame)
+    return;
+
+  MCSymbol *Label = S.EmitCFILabel();
+  CurFrame->PrologEnd = Label;
+  WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+  auto it = CurFrame->Instructions.begin();
+  CurFrame->Instructions.insert(it, Inst);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogStart() {
+  auto &S = getStreamer();
+  WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+  if (!CurFrame)
+    return;
+
+  InEpilogCFI = true;
+  CurrentEpilog = S.EmitCFILabel();
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
+  auto &S = getStreamer();
+  WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+  if (!CurFrame)
+    return;
+
+  InEpilogCFI = false;
+  MCSymbol *Label = S.EmitCFILabel();
+  WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+  CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+  CurrentEpilog = nullptr;
+}
+
 MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
     std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index c05422163584..ed265a876ab3 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -17,20 +17,6 @@
 #include "AArch64TargetStreamer.h"
 #include "llvm/MC/MCWinCOFFStreamer.h"
 
-namespace {
-class AArch64WinCOFFStreamer;
-
-class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
-private:
-  AArch64WinCOFFStreamer &getStreamer();
-
-public:
-  AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
-    : AArch64TargetStreamer(S) {}
-};
-
-} // end anonymous namespace
-
 namespace llvm {
 
 MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7a8dd8bc5aee..23a65b345bad 100644
--- a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1535,7 +1535,7 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
   def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
   def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
 }
- 
+
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Unary Operations - Unpredicated Group
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 23cc21ce2e7c..c88155db7037 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -62,6 +62,13 @@ namespace llvm {
 }
 
 namespace llvm {
+  namespace AArch64PRCTX {
+#define GET_PRCTX_IMPL
+#include "AArch64GenSystemOperands.inc"
+  }
+}
+
+namespace llvm {
   namespace AArch64PRFM {
 #define GET_PRFM_IMPL
 #include "AArch64GenSystemOperands.inc"
@@ -104,6 +111,13 @@ namespace llvm {
 }
 
 namespace llvm {
+  namespace AArch64BTIHint {
+#define GET_BTI_IMPL
+#include "AArch64GenSystemOperands.inc"
+  }
+}
+
+namespace llvm {
   namespace AArch64SysReg {
 #define GET_SYSREG_IMPL
 #include "AArch64GenSystemOperands.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 2874c4ab42ea..44c6a6b44895 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -388,6 +388,14 @@ namespace AArch64PSBHint {
   #include "AArch64GenSystemOperands.inc"
 }
 
+namespace AArch64BTIHint {
+  struct BTI : SysAlias {
+    using SysAlias::SysAlias;
+  };
+  #define GET_BTI_DECL
+  #include "AArch64GenSystemOperands.inc"
+}
+
 namespace AArch64SE {
     enum ShiftExtSpecifiers {
         Invalid = -1,
@@ -499,6 +507,14 @@ namespace AArch64TLBI {
   #include "AArch64GenSystemOperands.inc"
 }
 
+namespace AArch64PRCTX {
+  struct PRCTX : SysAliasReg {
+    using SysAliasReg::SysAliasReg;
+  };
+  #define GET_PRCTX_DECL
+  #include "AArch64GenSystemOperands.inc"
+}
+
 namespace AArch64II {
   /// Target Operand Flag enum.
   enum TOF {
@@ -507,7 +523,7 @@ namespace AArch64II {
 
     MO_NO_FLAG,
 
-    MO_FRAGMENT = 0xf,
+    MO_FRAGMENT = 0x7,
 
     /// MO_PAGE - A symbol operand with this flag represents the pc-relative
     /// offset of the 4K page containing the symbol.  This is used with the
@@ -540,6 +556,11 @@ namespace AArch64II {
     /// by-12-bits instruction.
     MO_HI12 = 7,
 
+    /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the ".refptrp.FOO" symbol.  This is used for
+    /// stub symbols on windows.
+    MO_COFFSTUB = 0x8,
+
     /// MO_GOT - This flag indicates that a symbol operand represents the
     /// address of the GOT entry for the symbol, rather than the address of
     /// the symbol itself.
@@ -560,6 +581,10 @@ namespace AArch64II {
     /// to the symbol is for an import stub.  This is used for DLL import
     /// storage class indication on Windows.
     MO_DLLIMPORT = 0x80,
+
+    /// MO_S - Indicates that the bits of the symbol operand represented by
+    /// MO_G0 etc are signed.
+    MO_S = 0x100,
   };
 } // end namespace AArch64II
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2b49c2ea88e1..bb7801c172f6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -37,10 +37,13 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
 
 // SI Passes
+FunctionPass *createGCNDPPCombinePass();
 FunctionPass *createSIAnnotateControlFlowPass();
 FunctionPass *createSIFoldOperandsPass();
 FunctionPass *createSIPeepholeSDWAPass();
 FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIFixupVectorISelPass();
+FunctionPass *createSIAddIMGInitPass();
 FunctionPass *createSIShrinkInstructionsPass();
 FunctionPass *createSILoadStoreOptimizerPass();
 FunctionPass *createSIWholeQuadModePass();
@@ -57,6 +60,7 @@ FunctionPass *createAMDGPUUseNativeCallsPass();
 FunctionPass *createAMDGPUCodeGenPreparePass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
+FunctionPass *createSIModeRegisterPass();
 
 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
 
@@ -69,10 +73,18 @@ Pass *createAMDGPUAnnotateKernelFeaturesPass();
 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
 extern char &AMDGPUAnnotateKernelFeaturesID;
 
+FunctionPass *createAMDGPUAtomicOptimizerPass();
+void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
+extern char &AMDGPUAtomicOptimizerID;
+
 ModulePass *createAMDGPULowerIntrinsicsPass();
 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
 extern char &AMDGPULowerIntrinsicsID;
 
+ModulePass *createAMDGPUFixFunctionBitcastsPass();
+void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
+extern char &AMDGPUFixFunctionBitcastsID;
+
 FunctionPass *createAMDGPULowerKernelArgumentsPass();
 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
 extern char &AMDGPULowerKernelArgumentsID;
@@ -84,6 +96,9 @@ extern char &AMDGPULowerKernelAttributesID;
 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
 extern char &AMDGPURewriteOutArgumentsID;
 
+void initializeGCNDPPCombinePass(PassRegistry &);
+extern char &GCNDPPCombineID;
+
 void initializeR600ClauseMergePassPass(PassRegistry &);
 extern char &R600ClauseMergePassID;
 
@@ -114,6 +129,9 @@ extern char &SIFixSGPRCopiesID;
 void initializeSIFixVGPRCopiesPass(PassRegistry &);
 extern char &SIFixVGPRCopiesID;
 
+void initializeSIFixupVectorISelPass(PassRegistry &);
+extern char &SIFixupVectorISelID;
+
 void initializeSILowerI1CopiesPass(PassRegistry &);
 extern char &SILowerI1CopiesID;
 
@@ -141,6 +159,9 @@ extern char &AMDGPUSimplifyLibCallsID;
 void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
 extern char &AMDGPUUseNativeCallsID;
 
+void initializeSIAddIMGInitPass(PassRegistry &);
+extern char &SIAddIMGInitID;
+
 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
 extern char &AMDGPUPerfHintAnalysisID;
 
@@ -179,6 +200,9 @@ extern char &SIMemoryLegalizerID;
 void initializeSIDebuggerInsertNopsPass(PassRegistry&);
 extern char &SIDebuggerInsertNopsID;
 
+void initializeSIModeRegisterPass(PassRegistry&);
+extern char &SIModeRegisterID;
+
 void initializeSIInsertWaitcntsPass(PassRegistry&);
 extern char &SIInsertWaitcntsID;
 
@@ -190,6 +214,8 @@ extern char &AMDGPUUnifyDivergentExitNodesID;
 
 ImmutablePass *createAMDGPUAAWrapperPass();
 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+ImmutablePass *createAMDGPUExternalAAWrapperPass();
+void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
 
 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
 
@@ -221,19 +247,18 @@ enum TargetIndex {
 /// however on the GPU, each address space points to
 /// a separate piece of memory that is unique from other
 /// memory locations.
-struct AMDGPUAS {
-  // The following address space values depend on the triple environment.
-  unsigned PRIVATE_ADDRESS;  ///< Address space for private memory.
-  unsigned FLAT_ADDRESS;     ///< Address space for flat memory.
-  unsigned REGION_ADDRESS;   ///< Address space for region memory.
-
+namespace AMDGPUAS {
   enum : unsigned {
     // The maximum value for flat, generic, local, private, constant and region.
     MAX_AMDGPU_ADDRESS = 6,
 
+    FLAT_ADDRESS = 0,     ///< Address space for flat memory.
     GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
+    REGION_ADDRESS = 2,   ///< Address space for region memory.
+
     CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
     LOCAL_ADDRESS = 3,    ///< Address space for local memory.
+    PRIVATE_ADDRESS = 5,  ///< Address space for private memory.
 
     CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
 
@@ -268,14 +293,6 @@ struct AMDGPUAS {
     // Some places use this if the address space can't be determined.
     UNKNOWN_ADDRESS_SPACE = ~0u,
   };
-};
-
-namespace llvm {
-namespace AMDGPU {
-AMDGPUAS getAMDGPUAS(const Module &M);
-AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
-AMDGPUAS getAMDGPUAS(Triple T);
-} // namespace AMDGPU
-} // namespace llvm
+}
 
 #endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 445b69b35eb1..6a4cfe08e491 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -11,6 +11,10 @@ include "llvm/TableGen/SearchableTable.td"
 include "llvm/Target/Target.td"
 include "AMDGPUFeatures.td"
 
+class BoolToList<bit Value> {
+  list<int> ret = !if(Value, [1]<int>, []<int>);
+}
+
 //===------------------------------------------------------------===//
 // Subtarget Features (device properties)
 //===------------------------------------------------------------===//
@@ -140,6 +144,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
   "Additional instructions for CI+"
 >;
 
+def FeatureVIInsts : SubtargetFeature<"vi-insts",
+  "VIInsts",
+  "true",
+  "Additional instructions for VI+"
+>;
+
 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
   "GFX9Insts",
   "true",
@@ -236,6 +246,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
   "Support DPP (Data Parallel Primitives) extension"
 >;
 
+def FeatureR128A16 : SubtargetFeature<"r128-a16",
+  "HasR128A16",
+  "true",
+  "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+>;
+
 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
   "HasIntClamp",
   "true",
@@ -251,16 +267,19 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
 def FeatureDLInsts : SubtargetFeature<"dl-insts",
   "HasDLInsts",
   "true",
-  "Has deep learning instructions"
+  "Has v_fmac_f32 and v_xnor_b32 instructions"
+>;
+
+def FeatureDotInsts : SubtargetFeature<"dot-insts",
+  "HasDotInsts",
+  "true",
+  "Has v_dot* instructions"
 >;
 
-def FeatureD16PreservesUnusedBits : SubtargetFeature<
-  "d16-preserves-unused-bits",
-  "D16PreservesUnusedBits",
+def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
+  "EnableSRAMECC",
   "true",
-  "If present, then instructions defined by HasD16LoadStore predicate preserve "
-  "unused bits. Otherwise instructions defined by HasD16LoadStore predicate "
-  "zero unused bits."
+  "Enable SRAM ECC"
 >;
 
 //===------------------------------------------------------------===//
@@ -315,12 +334,6 @@ def FeatureEnableHugePrivateBuffer : SubtargetFeature<
   "Enable private/scratch buffer sizes greater than 128 GB"
 >;
 
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
-  "EnableVGPRSpilling",
-  "true",
-  "Enable spilling of VGPRs to scratch memory"
->;
-
 def FeatureDumpCode : SubtargetFeature <"DumpCode",
   "DumpCode",
   "true",
@@ -364,6 +377,16 @@ def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
   "Use ds_{read|write}_b128"
 >;
 
+// Sparse texture support requires that all result registers are zeroed when
+// PRTStrictNull is set to true. This feature is turned on for all architectures
+// but is enabled as a feature in case there are situations where PRTStrictNull
+// is disabled by the driver.
+def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
+  "EnablePRTStrictNull",
+  "true",
+  "Enable zeroing of result registers for sparse texture fetches"
+>;
+
 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
 // all OS-es on VI and newer hardware to avoid assertion failures due
 // to missing ADDR64 variants of MUBUF instructions.
@@ -390,6 +413,12 @@ def FeatureCodeObjectV3 : SubtargetFeature <
   "Generate code object version 3"
 >;
 
+def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
+  "HasTrigReducedRange",
+  "true",
+  "Requires use of fract on arguments to trig instructions"
+>;
+
 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
   "FeatureDisable","true",
@@ -409,36 +438,36 @@ class GCNSubtargetFeatureGeneration <string Value,
 def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
   FeatureWavefrontSize64, FeatureGCN,
-  FeatureLDSBankCount32, FeatureMovrel]
+  FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
 >;
 
 def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
   FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
-  FeatureCIInsts, FeatureMovrel]
+  FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
 >;
 
 def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-   FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+   FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
    FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
-   FeatureIntClamp
+   FeatureIntClamp, FeatureTrigReducedRange
   ]
 >;
 
 def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
   [FeatureFP64, FeatureLocalMemorySize65536,
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-   FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+   FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
    FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
-   FeatureAddNoCarryInsts, FeatureScalarAtomics
+   FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
   ]
 >;
 
@@ -456,34 +485,41 @@ def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
   [FeatureSouthernIslands,
    FeatureFastFMAF32,
    HalfRate64Ops,
-   FeatureLDSBankCount32]>;
+   FeatureLDSBankCount32,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
   [FeatureSouthernIslands,
-   FeatureLDSBankCount32]>;
+   FeatureLDSBankCount32,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
   [FeatureSeaIslands,
-   FeatureLDSBankCount32]>;
+   FeatureLDSBankCount32,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
   [FeatureSeaIslands,
    HalfRate64Ops,
    FeatureLDSBankCount32,
-   FeatureFastFMAF32]>;
+   FeatureFastFMAF32,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
   [FeatureSeaIslands,
    FeatureLDSBankCount16,
-   FeatureFastFMAF32]>;
+   FeatureFastFMAF32,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
   [FeatureSeaIslands,
-   FeatureLDSBankCount16]>;
+   FeatureLDSBankCount16,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4,
   [FeatureSeaIslands,
-   FeatureLDSBankCount32]>;
+   FeatureLDSBankCount32,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
   [FeatureVolcanicIslands,
@@ -491,49 +527,63 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
    HalfRate64Ops,
    FeatureLDSBankCount32,
    FeatureXNACK,
-   FeatureUnpackedD16VMem]>;
+   FeatureUnpackedD16VMem,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
   [FeatureVolcanicIslands,
    FeatureLDSBankCount32,
    FeatureSGPRInitBug,
-   FeatureUnpackedD16VMem]>;
+   FeatureUnpackedD16VMem,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
   [FeatureVolcanicIslands,
    FeatureLDSBankCount32,
-   FeatureUnpackedD16VMem]>;
+   FeatureUnpackedD16VMem,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
   [FeatureVolcanicIslands,
    FeatureLDSBankCount16,
-   FeatureXNACK]>;
+   FeatureXNACK,
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
   [FeatureGFX9,
    FeatureMadMixInsts,
    FeatureLDSBankCount32,
-   FeatureD16PreservesUnusedBits]>;
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
   [FeatureGFX9,
    FeatureMadMixInsts,
    FeatureLDSBankCount32,
    FeatureXNACK,
-   FeatureD16PreservesUnusedBits]>;
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
   [FeatureGFX9,
    FeatureLDSBankCount32,
    FeatureFmaMixInsts,
-   FeatureD16PreservesUnusedBits]>;
+   FeatureCodeObjectV3]>;
 
 def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
   [FeatureGFX9,
    HalfRate64Ops,
    FeatureFmaMixInsts,
    FeatureLDSBankCount32,
-   FeatureDLInsts]>;
+   FeatureDLInsts,
+   FeatureDotInsts,
+   FeatureSRAMECC,
+   FeatureCodeObjectV3]>;
+
+def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
+  [FeatureGFX9,
+   FeatureMadMixInsts,
+   FeatureLDSBankCount32,
+   FeatureXNACK,
+   FeatureCodeObjectV3]>;
 
 //===----------------------------------------------------------------------===//
 // Debugger related subtarget features.
@@ -665,8 +715,9 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
 def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
   AssemblerPredicate<"!FeatureUnpackedD16VMem">;
 
-def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
-  AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
+def D16PreservesUnusedBits :
+  Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
+  AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
 
 def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
 def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
@@ -674,10 +725,10 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
 def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
   AssemblerPredicate<"FeatureGFX9Insts">;
 
-def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
+def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
   AssemblerPredicate<"FeatureAddNoCarryInsts">;
 
-def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">,
+def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">,
   AssemblerPredicate<"!FeatureAddNoCarryInsts">;
 
 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
@@ -697,6 +748,9 @@ def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
 def HasDPP : Predicate<"Subtarget->hasDPP()">,
   AssemblerPredicate<"FeatureDPP">;
 
+def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
+  AssemblerPredicate<"FeatureR128A16">;
+
 def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
   AssemblerPredicate<"FeatureIntClamp">;
 
@@ -719,6 +773,9 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
 def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
   AssemblerPredicate<"FeatureDLInsts">;
 
+def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
+  AssemblerPredicate<"FeatureDotInsts">;
+
 
 def EnableLateCFGStructurize : Predicate<
   "EnableLateStructurizeCFG">;
@@ -727,7 +784,6 @@ def EnableLateCFGStructurize : Predicate<
 include "SISchedule.td"
 include "GCNProcessors.td"
 include "AMDGPUInstrInfo.td"
-include "AMDGPUIntrinsics.td"
 include "SIIntrinsics.td"
 include "AMDGPURegisterInfo.td"
 include "AMDGPURegisterBanks.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 974fbcb87191..73709ba13643 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -34,34 +34,28 @@ using namespace llvm;
 
 // Register this pass...
 char AMDGPUAAWrapperPass::ID = 0;
+char AMDGPUExternalAAWrapper::ID = 0;
 
 INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
                 "AMDGPU Address space based Alias Analysis", false, true)
 
+INITIALIZE_PASS(AMDGPUExternalAAWrapper, "amdgpu-aa-wrapper",
+                "AMDGPU Address space based Alias Analysis Wrapper", false, true)
+
 ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
   return new AMDGPUAAWrapperPass();
 }
 
+ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
+  return new AMDGPUExternalAAWrapper();
+}
+
 void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
 }
 
-// Must match the table in getAliasResult.
-AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
-  : Arch(Arch_), AS(AS_) {
-  // These arrarys are indexed by address space value
-  // enum elements 0 ... to 6
-  static const AliasResult ASAliasRulesPrivIsZero[7][7] = {
-  /*                    Private    Global    Constant  Group     Flat      Region    Constant 32-bit */
-  /* Private  */        {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
-  /* Global   */        {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
-  /* Constant */        {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
-  /* Group    */        {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias},
-  /* Flat     */        {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
-  /* Region   */        {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
-  /* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}
-  };
-  static const AliasResult ASAliasRulesGenIsZero[7][7] = {
+// These arrays are indexed by address space value enum elements 0 ... to 6
+static const AliasResult ASAliasRules[7][7] = {
   /*                    Flat       Global    Region    Group     Constant  Private   Constant 32-bit */
   /* Flat     */        {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
   /* Global   */        {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
@@ -70,37 +64,15 @@ AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Ar
   /* Constant */        {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias,  NoAlias , MayAlias},
   /* Private  */        {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
   /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
-  };
+};
+
+static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
   static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
-  if (AS.FLAT_ADDRESS == 0) {
-    assert(AS.GLOBAL_ADDRESS         == 1 &&
-           AS.REGION_ADDRESS         == 2 &&
-           AS.LOCAL_ADDRESS          == 3 &&
-           AS.CONSTANT_ADDRESS       == 4 &&
-           AS.PRIVATE_ADDRESS        == 5 &&
-           AS.CONSTANT_ADDRESS_32BIT == 6);
-    ASAliasRules = &ASAliasRulesGenIsZero;
-  } else {
-    assert(AS.PRIVATE_ADDRESS        == 0 &&
-           AS.GLOBAL_ADDRESS         == 1 &&
-           AS.CONSTANT_ADDRESS       == 2 &&
-           AS.LOCAL_ADDRESS          == 3 &&
-           AS.FLAT_ADDRESS           == 4 &&
-           AS.REGION_ADDRESS         == 5 &&
-           AS.CONSTANT_ADDRESS_32BIT == 6);
-    ASAliasRules = &ASAliasRulesPrivIsZero;
-  }
-}
 
-AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
-    unsigned AS2) const {
-  if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) {
-    if (Arch == Triple::amdgcn)
-      report_fatal_error("Pointer address space out of range");
-    return AS1 == AS2 ? MayAlias : NoAlias;
-  }
+  if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
+    return MayAlias;
 
-  return (*ASAliasRules)[AS1][AS2];
+  return ASAliasRules[AS1][AS2];
 }
 
 AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
@@ -108,8 +80,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
   unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
   unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
 
-  AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
-  if (Result == NoAlias) return Result;
+  AliasResult Result = getAliasResult(asA, asB);
+  if (Result == NoAlias)
+    return Result;
 
   // Forward the query to the next alias analysis.
   return AAResultBase::alias(LocA, LocB);
@@ -118,9 +91,9 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
 bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
                                             bool OrLocal) {
   const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
-
-  if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS ||
-      Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS_32BIT) {
+  unsigned AS = Base->getType()->getPointerAddressSpace();
+  if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
     return true;
   }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 09ad51d5e42f..d76c9fc48199 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -33,14 +33,12 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
   friend AAResultBase<AMDGPUAAResult>;
 
   const DataLayout &DL;
-  AMDGPUAS AS;
 
 public:
   explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
-    DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
+    DL(DL) {}
   AMDGPUAAResult(AMDGPUAAResult &&Arg)
-      : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
-        ASAliasRules(Arg.ASAliasRules){}
+      : AAResultBase(std::move(Arg)), DL(Arg.DL) {}
 
   /// Handle invalidation events from the new pass manager.
   ///
@@ -53,18 +51,6 @@ public:
 private:
   bool Aliases(const MDNode *A, const MDNode *B) const;
   bool PathAliases(const MDNode *A, const MDNode *B) const;
-
-  class ASAliasRulesTy {
-  public:
-    ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
-
-    AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
-
-  private:
-    Triple::ArchType Arch;
-    AMDGPUAS AS;
-    const AliasResult (*ASAliasRules)[7][7];
-  } ASAliasRules;
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
@@ -110,6 +96,19 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
+// Wrapper around ExternalAAWrapperPass so that the default constructor gets the
+// callback.
+class AMDGPUExternalAAWrapper : public ExternalAAWrapperPass {
+public:
+  static char ID;
+
+  AMDGPUExternalAAWrapper() : ExternalAAWrapperPass(
+    [](Pass &P, Function &, AAResults &AAR) {
+      if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+        AAR.addAAResult(WrapperPass->getResult());
+    }) {}
+};
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index d4bbb2c1eb8d..fc65430b745f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -86,8 +86,6 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers(
 }
 
 bool AMDGPUAlwaysInline::runOnModule(Module &M) {
-  AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M);
-
   std::vector<GlobalAlias*> AliasesToRemove;
 
   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
@@ -122,7 +120,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
   for (GlobalVariable &GV : M.globals()) {
     // TODO: Region address
     unsigned AS = GV.getType()->getAddressSpace();
-    if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS)
+    if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
       continue;
 
     recursivelyVisitUsers(GV, FuncsToAlwaysInline);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 1a70833a4472..896ac9c87779 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -46,7 +46,6 @@ namespace {
 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
 private:
   const TargetMachine *TM = nullptr;
-  AMDGPUAS AS;
 
   bool addFeatureAttributes(Function &F);
 
@@ -67,11 +66,10 @@ public:
     CallGraphSCCPass::getAnalysisUsage(AU);
   }
 
-  static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
+  static bool visitConstantExpr(const ConstantExpr *CE);
   static bool visitConstantExprsRecursively(
     const Constant *EntryC,
-    SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
-    AMDGPUAS AS);
+    SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
 };
 
 } // end anonymous namespace
@@ -85,20 +83,18 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
 
 
 // The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
-  return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
+static bool castRequiresQueuePtr(unsigned SrcAS) {
+  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
 }
 
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
-    const AMDGPUAS &AS) {
-  return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
+static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
+  return castRequiresQueuePtr(ASC->getSrcAddressSpace());
 }
 
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
-    AMDGPUAS AS) {
+bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
-    return castRequiresQueuePtr(SrcAS, AS);
+    return castRequiresQueuePtr(SrcAS);
   }
 
   return false;
@@ -106,8 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
 
 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
   const Constant *EntryC,
-  SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
-  AMDGPUAS AS) {
+  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
 
   if (!ConstantExprVisited.insert(EntryC).second)
     return false;
@@ -120,7 +115,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
 
     // Check this constant expression.
     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
-      if (visitConstantExpr(CE, AS))
+      if (visitConstantExpr(CE))
         return true;
     }
 
@@ -262,7 +257,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
         continue;
 
       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
-        if (castRequiresQueuePtr(ASC, AS)) {
+        if (castRequiresQueuePtr(ASC)) {
           NeedQueuePtr = true;
           continue;
         }
@@ -273,7 +268,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
         if (!OpC)
           continue;
 
-        if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
+        if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
           NeedQueuePtr = true;
           break;
         }
@@ -318,7 +313,6 @@ bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
   if (!TPC)
     report_fatal_error("TargetMachine is required");
 
-  AS = AMDGPU::getAMDGPUAS(CG.getModule());
   TM = &TPC->getTM<TargetMachine>();
   return false;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index ed5370826647..f88e3b0dac86 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -16,7 +16,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUIntrinsicInfo.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/IR/IRBuilder.h"
@@ -32,12 +32,11 @@ namespace {
 
 class AMDGPUAnnotateUniformValues : public FunctionPass,
                        public InstVisitor<AMDGPUAnnotateUniformValues> {
-  DivergenceAnalysis *DA;
+  LegacyDivergenceAnalysis *DA;
   MemoryDependenceResults *MDR;
   LoopInfo *LI;
   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
   bool isKernelFunc;
-  AMDGPUAS AMDGPUASI;
 
 public:
   static char ID;
@@ -49,7 +48,7 @@ public:
     return "AMDGPU Annotate Uniform Values";
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DivergenceAnalysis>();
+    AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addRequired<MemoryDependenceWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.setPreservesAll();
@@ -64,7 +63,7 @@ public:
 
 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                       "Add AMDGPU uniform metadata", false, false)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
@@ -118,14 +117,8 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
 }
 
 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
-  if (I.isUnconditional())
-    return;
-
-  Value *Cond = I.getCondition();
-  if (!DA->isUniform(Cond))
-    return;
-
-  setUniformMetadata(I.getParent()->getTerminator());
+  if (DA->isUniform(&I))
+    setUniformMetadata(I.getParent()->getTerminator());
 }
 
 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
@@ -133,7 +126,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
   if (!DA->isUniform(Ptr))
     return;
   auto isGlobalLoad = [&](LoadInst &Load)->bool {
-    return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+    return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
   };
   // We're tracking up to the Function boundaries
   // We cannot go beyond because of FunctionPass restrictions
@@ -168,7 +161,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
 }
 
 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
-  AMDGPUASI = AMDGPU::getAMDGPUAS(M);
   return false;
 }
 
@@ -176,7 +168,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  DA  = &getAnalysis<DivergenceAnalysis>();
+  DA  = &getAnalysis<LegacyDivergenceAnalysis>();
   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index e62e5d52ad74..2ded7cdb6489 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -40,11 +40,13 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/AMDGPUMetadata.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
 
 // TODO: This should get the default rounding mode from the kernel. We just set
 // the default here, but this could change if the OpenCL rounding mode pragmas
@@ -98,8 +100,11 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
   : AsmPrinter(TM, std::move(Streamer)) {
-    AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
-  }
+    if (IsaInfo::hasCodeObjectV3(getSTI()))
+      HSAMetadataStream.reset(new MetadataStreamerV3());
+    else
+      HSAMetadataStream.reset(new MetadataStreamerV2());
+}
 
 StringRef AMDGPUAsmPrinter::getPassName() const {
   return "AMDGPU Assembly Printer";
@@ -116,62 +121,70 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
 }
 
 void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  if (IsaInfo::hasCodeObjectV3(getSTI()) &&
-      TM.getTargetTriple().getOS() == Triple::AMDHSA)
-    return;
+  if (IsaInfo::hasCodeObjectV3(getSTI())) {
+    std::string ExpectedTarget;
+    raw_string_ostream ExpectedTargetOS(ExpectedTarget);
+    IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
+
+    getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
+  }
 
   if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
       TM.getTargetTriple().getOS() != Triple::AMDPAL)
     return;
 
   if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
-    HSAMetadataStream.begin(M);
+    HSAMetadataStream->begin(M);
 
   if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
     readPALMetadata(M);
 
+  if (IsaInfo::hasCodeObjectV3(getSTI()))
+    return;
+
   // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
   if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
     getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
 
   // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
-  IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
+  IsaVersion Version = getIsaVersion(getSTI()->getCPU());
   getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
-      ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+      Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
 }
 
 void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  // TODO: Add metadata to code object v3.
-  if (IsaInfo::hasCodeObjectV3(getSTI()) &&
-      TM.getTargetTriple().getOS() == Triple::AMDHSA)
-    return;
-
   // Following code requires TargetStreamer to be present.
   if (!getTargetStreamer())
     return;
 
-  // Emit ISA Version (NT_AMD_AMDGPU_ISA).
-  std::string ISAVersionString;
-  raw_string_ostream ISAVersionStream(ISAVersionString);
-  IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
-  getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+  if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+    // Emit ISA Version (NT_AMD_AMDGPU_ISA).
+    std::string ISAVersionString;
+    raw_string_ostream ISAVersionStream(ISAVersionString);
+    IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+    getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+  }
 
   // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
   if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
-    HSAMetadataStream.end();
-    getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
+    HSAMetadataStream->end();
+    bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
+    (void)Success;
+    assert(Success && "Malformed HSA Metadata");
   }
 
-  // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
-  if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
-    // Copy the PAL metadata from the map where we collected it into a vector,
-    // then write it as a .note.
-    PALMD::Metadata PALMetadataVector;
-    for (auto i : PALMetadataMap) {
-      PALMetadataVector.push_back(i.first);
-      PALMetadataVector.push_back(i.second);
+  if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+    // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
+    if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
+      // Copy the PAL metadata from the map where we collected it into a vector,
+      // then write it as a .note.
+      PALMD::Metadata PALMetadataVector;
+      for (auto i : PALMetadataMap) {
+        PALMetadataVector.push_back(i.first);
+        PALMetadataVector.push_back(i.second);
+      }
+      getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
     }
-    getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
   }
 }
 
@@ -193,13 +206,10 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
   if (!MFI.isEntryFunction())
     return;
-  if (IsaInfo::hasCodeObjectV3(getSTI()) &&
-      TM.getTargetTriple().getOS() == Triple::AMDHSA)
-    return;
 
   const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
   const Function &F = MF->getFunction();
-  if (STM.isAmdCodeObjectV2(F) &&
+  if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
       (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
        F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
     amd_kernel_code_t KernelCode;
@@ -207,10 +217,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
     getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
   }
 
-  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
-    return;
-
-  HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
+  if (STM.isAmdHsaOS())
+    HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
 }
 
 void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
@@ -241,7 +249,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
       *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
       CurrentProgramInfo.NumVGPRsForWavesPerEU,
       CurrentProgramInfo.NumSGPRsForWavesPerEU -
-          IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
+          IsaInfo::getNumExtraSGPRs(getSTI(),
                                     CurrentProgramInfo.VCCUsed,
                                     CurrentProgramInfo.FlatUsed),
       CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
@@ -259,7 +267,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
 
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
-  if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
+  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
     SmallString<128> SymbolName;
     getNameWithPrefix(SymbolName, &MF->getFunction()),
     getTargetStreamer()->EmitAMDGPUSymbolType(
@@ -562,7 +570,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
 
 int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
   const GCNSubtarget &ST) const {
-  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
+  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
                                                      UsesVCC, UsesFlatScratch);
 }
 
@@ -759,7 +767,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
 
           // 48 SGPRs - vcc, - flat_scr, -xnack
           int MaxSGPRGuess =
-              47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
+              47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
                                              ST.hasFlatAddressSpace());
           MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
           MaxVGPR = std::max(MaxVGPR, 23);
@@ -824,7 +832,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
   // unified.
   unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
-      STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+      getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
 
   // Check the addressable register limit before we add ExtraSGPRs.
   if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -906,9 +914,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   }
 
   ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
-      STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
+      &STM, ProgInfo.NumSGPRsForWavesPerEU);
   ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
-      STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
+      &STM, ProgInfo.NumVGPRsForWavesPerEU);
 
   // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
   // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
@@ -1003,7 +1011,6 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
 
 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
                                          const SIProgramInfo &CurrentProgramInfo) {
-  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
 
@@ -1024,10 +1031,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
     OutStreamer->EmitIntValue(RsrcReg, 4);
     OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
                               S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
-    if (STM.isVGPRSpillingEnabled(MF.getFunction())) {
-      OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
-      OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
-    }
+    OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
+    OutStreamer->EmitIntValue(
+        S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
   }
 
   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
@@ -1138,7 +1144,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
 
-  AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
+  AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
 
   Out.compute_pgm_resource_registers =
       CurrentProgramInfo.ComputePGMRSrc1 |
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 22982d912c70..167ac4b21e1e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -56,7 +56,7 @@ private:
   SIProgramInfo CurrentProgramInfo;
   DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
 
-  AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
+  std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
   std::map<uint32_t, uint32_t> PALMetadataMap;
 
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
@@ -143,7 +143,6 @@ public:
 protected:
   mutable std::vector<std::string> DisasmLines, HexLines;
   mutable size_t DisasmLineMaxLen;
-  AMDGPUAS AMDGPUASI;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
new file mode 100644
index 000000000000..644e4fd558ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -0,0 +1,458 @@
+//===-- AMDGPUAtomicOptimizer.cpp -----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass optimizes atomic operations by using a single lane of a wavefront
+/// to perform the atomic operation, thus reducing contention on that memory
+/// location.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "amdgpu-atomic-optimizer"
+
+using namespace llvm;
+
+namespace {
+
+enum DPP_CTRL {
+  DPP_ROW_SR1 = 0x111,
+  DPP_ROW_SR2 = 0x112,
+  DPP_ROW_SR4 = 0x114,
+  DPP_ROW_SR8 = 0x118,
+  DPP_WF_SR1 = 0x138,
+  DPP_ROW_BCAST15 = 0x142,
+  DPP_ROW_BCAST31 = 0x143
+};
+
+struct ReplacementInfo {
+  Instruction *I;
+  Instruction::BinaryOps Op;
+  unsigned ValIdx;
+  bool ValDivergent;
+};
+
+class AMDGPUAtomicOptimizer : public FunctionPass,
+                              public InstVisitor<AMDGPUAtomicOptimizer> {
+private:
+  SmallVector<ReplacementInfo, 8> ToReplace;
+  const LegacyDivergenceAnalysis *DA;
+  const DataLayout *DL;
+  DominatorTree *DT;
+  bool HasDPP;
+  bool IsPixelShader;
+
+  void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
+                      unsigned ValIdx, bool ValDivergent) const;
+
+  void setConvergent(CallInst *const CI) const;
+
+public:
+  static char ID;
+
+  AMDGPUAtomicOptimizer() : FunctionPass(ID) {}
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<LegacyDivergenceAnalysis>();
+    AU.addRequired<TargetPassConfig>();
+  }
+
+  void visitAtomicRMWInst(AtomicRMWInst &I);
+  void visitIntrinsicInst(IntrinsicInst &I);
+};
+
+} // namespace
+
+char AMDGPUAtomicOptimizer::ID = 0;
+
+char &llvm::AMDGPUAtomicOptimizerID = AMDGPUAtomicOptimizer::ID;
+
+bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) {
+  if (skipFunction(F)) {
+    return false;
+  }
+
+  DA = &getAnalysis<LegacyDivergenceAnalysis>();
+  DL = &F.getParent()->getDataLayout();
+  DominatorTreeWrapperPass *const DTW =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTW ? &DTW->getDomTree() : nullptr;
+  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  const TargetMachine &TM = TPC.getTM<TargetMachine>();
+  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+  HasDPP = ST.hasDPP();
+  IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+  visit(F);
+
+  const bool Changed = !ToReplace.empty();
+
+  for (ReplacementInfo &Info : ToReplace) {
+    optimizeAtomic(*Info.I, Info.Op, Info.ValIdx, Info.ValDivergent);
+  }
+
+  ToReplace.clear();
+
+  return Changed;
+}
+
+void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
+  // Early exit for unhandled address space atomic instructions.
+  switch (I.getPointerAddressSpace()) {
+  default:
+    return;
+  case AMDGPUAS::GLOBAL_ADDRESS:
+  case AMDGPUAS::LOCAL_ADDRESS:
+    break;
+  }
+
+  Instruction::BinaryOps Op;
+
+  switch (I.getOperation()) {
+  default:
+    return;
+  case AtomicRMWInst::Add:
+    Op = Instruction::Add;
+    break;
+  case AtomicRMWInst::Sub:
+    Op = Instruction::Sub;
+    break;
+  }
+
+  const unsigned PtrIdx = 0;
+  const unsigned ValIdx = 1;
+
+  // If the pointer operand is divergent, then each lane is doing an atomic
+  // operation on a different address, and we cannot optimize that.
+  if (DA->isDivergent(I.getOperand(PtrIdx))) {
+    return;
+  }
+
+  const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+
+  // If the value operand is divergent, each lane is contributing a different
+  // value to the atomic calculation. We can only optimize divergent values if
+  // we have DPP available on our subtarget, and the atomic operation is 32
+  // bits.
+  if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+    return;
+  }
+
+  // If we get here, we can optimize the atomic using a single wavefront-wide
+  // atomic operation to do the calculation for the entire wavefront, so
+  // remember the instruction so we can come back to it.
+  const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent};
+
+  ToReplace.push_back(Info);
+}
+
+void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
+  Instruction::BinaryOps Op;
+
+  switch (I.getIntrinsicID()) {
+  default:
+    return;
+  case Intrinsic::amdgcn_buffer_atomic_add:
+  case Intrinsic::amdgcn_struct_buffer_atomic_add:
+  case Intrinsic::amdgcn_raw_buffer_atomic_add:
+    Op = Instruction::Add;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_sub:
+  case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+  case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+    Op = Instruction::Sub;
+    break;
+  }
+
+  const unsigned ValIdx = 0;
+
+  const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
+
+  // If the value operand is divergent, each lane is contributing a different
+  // value to the atomic calculation. We can only optimize divergent values if
+  // we have DPP available on our subtarget, and the atomic operation is 32
+  // bits.
+  if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) {
+    return;
+  }
+
+  // If any of the other arguments to the intrinsic are divergent, we can't
+  // optimize the operation.
+  for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
+    if (DA->isDivergent(I.getOperand(Idx))) {
+      return;
+    }
+  }
+
+  // If we get here, we can optimize the atomic using a single wavefront-wide
+  // atomic operation to do the calculation for the entire wavefront, so
+  // remember the instruction so we can come back to it.
+  const ReplacementInfo Info = {&I, Op, ValIdx, ValDivergent};
+
+  ToReplace.push_back(Info);
+}
+
+void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
+                                           Instruction::BinaryOps Op,
+                                           unsigned ValIdx,
+                                           bool ValDivergent) const {
+  LLVMContext &Context = I.getContext();
+
+  // Start building just before the instruction.
+  IRBuilder<> B(&I);
+
+  // If we are in a pixel shader, because of how we have to mask out helper
+  // lane invocations, we need to record the entry and exit BB's.
+  BasicBlock *PixelEntryBB = nullptr;
+  BasicBlock *PixelExitBB = nullptr;
+
+  // If we're optimizing an atomic within a pixel shader, we need to wrap the
+  // entire atomic operation in a helper-lane check. We do not want any helper
+  // lanes that are around only for the purposes of derivatives to take part
+  // in any cross-lane communication, and we use a branch on whether the lane is
+  // live to do this.
+  if (IsPixelShader) {
+    // Record I's original position as the entry block.
+    PixelEntryBB = I.getParent();
+
+    Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
+    Instruction *const NonHelperTerminator =
+        SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+
+    // Record I's new position as the exit block.
+    PixelExitBB = I.getParent();
+
+    I.moveBefore(NonHelperTerminator);
+    B.SetInsertPoint(&I);
+  }
+
+  Type *const Ty = I.getType();
+  const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty);
+  Type *const VecTy = VectorType::get(B.getInt32Ty(), 2);
+
+  // This is the value in the atomic operation we need to combine in order to
+  // reduce the number of atomic operations.
+  Value *const V = I.getOperand(ValIdx);
+
+  // We need to know how many lanes are active within the wavefront, and we do
+  // this by getting the exec register, which tells us all the lanes that are
+  // active.
+  MDNode *const RegName =
+      llvm::MDNode::get(Context, llvm::MDString::get(Context, "exec"));
+  Value *const Metadata = llvm::MetadataAsValue::get(Context, RegName);
+  CallInst *const Exec =
+      B.CreateIntrinsic(Intrinsic::read_register, {B.getInt64Ty()}, {Metadata});
+  setConvergent(Exec);
+
+  // We need to know how many lanes are active within the wavefront that are
+  // below us. If we counted each lane linearly starting from 0, a lane is
+  // below us only if its associated index was less than ours. We do this by
+  // using the mbcnt intrinsic.
+  Value *const BitCast = B.CreateBitCast(Exec, VecTy);
+  Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
+  Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
+  CallInst *const PartialMbcnt = B.CreateIntrinsic(
+      Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
+  CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
+                                            {ExtractHi, PartialMbcnt});
+
+  Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
+
+  Value *LaneOffset = nullptr;
+  Value *NewV = nullptr;
+
+  // If we have a divergent value in each lane, we need to combine the value
+  // using DPP.
+  if (ValDivergent) {
+    // First we need to set all inactive invocations to 0, so that they can
+    // correctly contribute to the final result.
+    CallInst *const SetInactive = B.CreateIntrinsic(
+        Intrinsic::amdgcn_set_inactive, Ty, {V, B.getIntN(TyBitWidth, 0)});
+    setConvergent(SetInactive);
+    NewV = SetInactive;
+
+    const unsigned Iters = 6;
+    const unsigned DPPCtrl[Iters] = {DPP_ROW_SR1,     DPP_ROW_SR2,
+                                     DPP_ROW_SR4,     DPP_ROW_SR8,
+                                     DPP_ROW_BCAST15, DPP_ROW_BCAST31};
+    const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
+
+    // This loop performs an inclusive scan across the wavefront, with all lanes
+    // active (by using the WWM intrinsic).
+    for (unsigned Idx = 0; Idx < Iters; Idx++) {
+      CallInst *const DPP = B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, Ty,
+                                              {NewV, B.getInt32(DPPCtrl[Idx]),
+                                               B.getInt32(RowMask[Idx]),
+                                               B.getInt32(0xf), B.getFalse()});
+      setConvergent(DPP);
+      Value *const WWM = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+
+      NewV = B.CreateBinOp(Op, NewV, WWM);
+      NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
+    }
+
+    // NewV has returned the inclusive scan of V, but for the lane offset we
+    // require an exclusive scan. We do this by shifting the values from the
+    // entire wavefront right by 1, and by setting the bound_ctrl (last argument
+    // to the intrinsic below) to true, we can guarantee that 0 will be shifted
+    // into the 0'th invocation.
+    CallInst *const DPP =
+        B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, {Ty},
+                          {NewV, B.getInt32(DPP_WF_SR1), B.getInt32(0xf),
+                           B.getInt32(0xf), B.getTrue()});
+    setConvergent(DPP);
+    LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+
+    // Read the value from the last lane, which has accumlated the values of
+    // each active lane in the wavefront. This will be our new value with which
+    // we will provide to the atomic operation.
+    if (TyBitWidth == 64) {
+      Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
+      Value *const ExtractHi =
+          B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
+      CallInst *const ReadLaneLo = B.CreateIntrinsic(
+          Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
+      setConvergent(ReadLaneLo);
+      CallInst *const ReadLaneHi = B.CreateIntrinsic(
+          Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
+      setConvergent(ReadLaneHi);
+      Value *const PartialInsert = B.CreateInsertElement(
+          UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
+      Value *const Insert =
+          B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
+      NewV = B.CreateBitCast(Insert, Ty);
+    } else if (TyBitWidth == 32) {
+      CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
+                                                   {}, {NewV, B.getInt32(63)});
+      setConvergent(ReadLane);
+      NewV = ReadLane;
+    } else {
+      llvm_unreachable("Unhandled atomic bit width");
+    }
+  } else {
+    // Get the total number of active lanes we have by using popcount.
+    Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Exec);
+    Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
+
+    // Calculate the new value we will be contributing to the atomic operation
+    // for the entire wavefront.
+    NewV = B.CreateMul(V, CtpopCast);
+    LaneOffset = B.CreateMul(V, MbcntCast);
+  }
+
+  // We only want a single lane to enter our new control flow, and we do this
+  // by checking if there are any active lanes below us. Only one lane will
+  // have 0 active lanes below us, so that will be the only one to progress.
+  Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0));
+
+  // Store I's original basic block before we split the block.
+  BasicBlock *const EntryBB = I.getParent();
+
+  // We need to introduce some new control flow to force a single lane to be
+  // active. We do this by splitting I's basic block at I, and introducing the
+  // new block such that:
+  // entry --> single_lane -\
+  //       \------------------> exit
+  Instruction *const SingleLaneTerminator =
+      SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+
+  // Move the IR builder into single_lane next.
+  B.SetInsertPoint(SingleLaneTerminator);
+
+  // Clone the original atomic operation into single lane, replacing the
+  // original value with our newly created one.
+  Instruction *const NewI = I.clone();
+  B.Insert(NewI);
+  NewI->setOperand(ValIdx, NewV);
+
+  // Move the IR builder into exit next, and start inserting just before the
+  // original instruction.
+  B.SetInsertPoint(&I);
+
+  // Create a PHI node to get our new atomic result into the exit block.
+  PHINode *const PHI = B.CreatePHI(Ty, 2);
+  PHI->addIncoming(UndefValue::get(Ty), EntryBB);
+  PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
+
+  // We need to broadcast the value who was the lowest active lane (the first
+  // lane) to all other lanes in the wavefront. We use an intrinsic for this,
+  // but have to handle 64-bit broadcasts with two calls to this intrinsic.
+  Value *BroadcastI = nullptr;
+
+  if (TyBitWidth == 64) {
+    Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty());
+    Value *const ExtractHi =
+        B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
+    CallInst *const ReadFirstLaneLo =
+        B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
+    setConvergent(ReadFirstLaneLo);
+    CallInst *const ReadFirstLaneHi =
+        B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
+    setConvergent(ReadFirstLaneHi);
+    Value *const PartialInsert = B.CreateInsertElement(
+        UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
+    Value *const Insert =
+        B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
+    BroadcastI = B.CreateBitCast(Insert, Ty);
+  } else if (TyBitWidth == 32) {
+    CallInst *const ReadFirstLane =
+        B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
+    setConvergent(ReadFirstLane);
+    BroadcastI = ReadFirstLane;
+  } else {
+    llvm_unreachable("Unhandled atomic bit width");
+  }
+
+  // Now that we have the result of our single atomic operation, we need to
+  // get our individual lane's slice into the result. We use the lane offset we
+  // previously calculated combined with the atomic result value we got from the
+  // first lane, to get our lane's index into the atomic result.
+  Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset);
+
+  if (IsPixelShader) {
+    // Need a final PHI to reconverge to above the helper lane branch mask.
+    B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
+
+    PHINode *const PHI = B.CreatePHI(Ty, 2);
+    PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
+    PHI->addIncoming(Result, I.getParent());
+    I.replaceAllUsesWith(PHI);
+  } else {
+    // Replace the original atomic instruction with the new one.
+    I.replaceAllUsesWith(Result);
+  }
+
+  // And delete the original.
+  I.eraseFromParent();
+}
+
+void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
+  CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
+                      "AMDGPU atomic optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AMDGPUAtomicOptimizer, DEBUG_TYPE,
+                    "AMDGPU atomic optimizations", false, false)
+
+FunctionPass *llvm::createAMDGPUAtomicOptimizerPass() {
+  return new AMDGPUAtomicOptimizer();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 18c7df0d94f2..daef37f9c21f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -28,11 +28,12 @@
 using namespace llvm;
 
 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
-  : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
+  : CallLowering(&TLI) {
 }
 
 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                     const Value *Val, unsigned VReg) const {
+                                     const Value *Val,
+                                     ArrayRef<unsigned> VRegs) const {
   // FIXME: Add support for non-void returns.
   if (Val)
     return false;
@@ -50,7 +51,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const Function &F = MF.getFunction();
   const DataLayout &DL = F.getParent()->getDataLayout();
-  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
   LLT PtrType = getLLTForType(*PtrTy, DL);
   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
   unsigned KernArgSegmentPtr =
@@ -72,7 +73,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
   const DataLayout &DL = F.getParent()->getDataLayout();
-  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index f51cb6abbf65..ed859716218e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -23,8 +23,6 @@ namespace llvm {
 class AMDGPUTargetLowering;
 
 class AMDGPUCallLowering: public CallLowering {
-  AMDGPUAS AMDGPUASI;
-
   unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
                              uint64_t Offset) const;
 
@@ -35,8 +33,8 @@ class AMDGPUCallLowering: public CallLowering {
  public:
   AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
 
-  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
-                   unsigned VReg) const override;
+  bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                   ArrayRef<unsigned> VRegs) const override;
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
                             ArrayRef<unsigned> VRegs) const override;
   static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 68bc7fdd9961..367f120b5fa6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -19,7 +19,7 @@ class CCIfExtend<CCAction A>
 // Calling convention for SI
 def CC_SI : CallingConv<[
 
-  CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
+  CCIfInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
     SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
     SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
     SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -33,7 +33,7 @@ def CC_SI : CallingConv<[
   CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
 
   // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
-  CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
+  CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
     VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -64,7 +64,7 @@ def RetCC_SI_Shader : CallingConv<[
   ]>>,
 
   // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
-  CCIfType<[f32, f16] , CCAssignToReg<[
+  CCIfType<[f32, f16, v2f16] , CCAssignToReg<[
     VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 5713b7b7f9a8..4dc1e67c573d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -18,7 +18,7 @@
 #include "AMDGPUTargetMachine.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Passes.h"
@@ -60,10 +60,9 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
                              public InstVisitor<AMDGPUCodeGenPrepare, bool> {
   const GCNSubtarget *ST = nullptr;
   AssumptionCache *AC = nullptr;
-  DivergenceAnalysis *DA = nullptr;
+  LegacyDivergenceAnalysis *DA = nullptr;
   Module *Mod = nullptr;
   bool HasUnsafeFPMath = false;
-  AMDGPUAS AMDGPUASI;
 
   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
   /// binary operation \p V.
@@ -177,7 +176,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<DivergenceAnalysis>();
+    AU.addRequired<LegacyDivergenceAnalysis>();
     AU.setPreservesAll();
  }
 };
@@ -559,7 +558,7 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
   Value *FQM = Builder.CreateFMul(FA, RCP);
 
   // fq = trunc(fqm);
-  CallInst* FQ = Builder.CreateIntrinsic(Intrinsic::trunc, { FQM });
+  CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
   FQ->copyFastMathFlags(Builder.getFastMathFlags());
 
   // float fqneg = -fq;
@@ -567,17 +566,17 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
 
   // float fr = mad(fqneg, fb, fa);
   Value *FR = Builder.CreateIntrinsic(Intrinsic::amdgcn_fmad_ftz,
-                                      { FQNeg, FB, FA }, FQ);
+                                      {FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
 
   // int iq = (int)fq;
   Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
                        : Builder.CreateFPToUI(FQ, I32Ty);
 
   // fr = fabs(fr);
-  FR = Builder.CreateIntrinsic(Intrinsic::fabs, { FR }, FQ);
+  FR = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FR, FQ);
 
   // fb = fabs(fb);
-  FB = Builder.CreateIntrinsic(Intrinsic::fabs, { FB }, FQ);
+  FB = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FB, FQ);
 
   // int cv = fr >= fb;
   Value *CV = Builder.CreateFCmpOGE(FR, FB);
@@ -799,8 +798,8 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
   if (!WidenLoads)
     return false;
 
-  if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
-       I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+  if ((I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+       I.getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
       canWidenScalarExtLoad(I)) {
     IRBuilder<> Builder(&I);
     Builder.SetCurrentDebugLocation(I.getDebugLoc());
@@ -898,9 +897,8 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
   const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
   ST = &TM.getSubtarget<GCNSubtarget>(F);
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  DA = &getAnalysis<DivergenceAnalysis>();
+  DA = &getAnalysis<LegacyDivergenceAnalysis>();
   HasUnsafeFPMath = hasUnsafeFPMath(F);
-  AMDGPUASI = TM.getAMDGPUAS();
 
   bool MadeChange = false;
 
@@ -918,7 +916,7 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
 INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
                       "AMDGPU IR optimizations", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
                     false, false)
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
new file mode 100644
index 000000000000..6e2a981d3396
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
@@ -0,0 +1,63 @@
+//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Promote indirect (bitcast) calls to direct calls when they are statically
+/// known to be direct. Required when InstCombine is not run (e.g. at OptNone)
+/// because AMDGPU does not support indirect calls.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-fix-function-bitcasts"
+
+namespace {
+class AMDGPUFixFunctionBitcasts final
+    : public ModulePass,
+      public InstVisitor<AMDGPUFixFunctionBitcasts> {
+
+  bool runOnModule(Module &M) override;
+
+  bool Modified;
+
+public:
+  void visitCallSite(CallSite CS) {
+    if (CS.getCalledFunction())
+      return;
+    auto Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+    if (Callee && isLegalToPromote(CS, Callee)) {
+      promoteCall(CS, Callee);
+      Modified = true;
+    }
+  }
+
+  static char ID;
+  AMDGPUFixFunctionBitcasts() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char AMDGPUFixFunctionBitcasts::ID = 0;
+char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID;
+INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE,
+                "Fix function bitcasts for AMDGPU", false, false)
+
+ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() {
+  return new AMDGPUFixFunctionBitcasts();
+}
+
+bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) {
+  Modified = false;
+  visit(M);
+  return Modified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index ba735390f679..59bb2a16e0f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -122,15 +122,14 @@ def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
 }
 def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
 
-// FIXME: Select directly to _e32 so we don't need to deal with modifiers.
 // FIXME: We can't re-use SelectionDAG patterns here because they match
 // against a custom SDNode and we would need to create a generic machine
 // instruction that is equivalent to the custom SDNode.  This would also require
 // us to custom legalize the intrinsic to the new generic machine instruction,
 // but I can't get custom legalizing of intrinsic to work and I'm not sure if
 // this is even supported yet.
-defm : GISelVop2IntrPat <
-  int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e32, v2f16, f32>;
+def : GISelVop3Pat2ModsPat <
+  int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>;
 
 defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
 def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 3a58c6c6a29f..6eab59ab4e09 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -16,34 +16,38 @@ namespace AMDGPU {
 
 enum PartialMappingIdx {
   None = - 1,
-  PM_SGPR1  = 0,
-  PM_SGPR16 = 4,
-  PM_SGPR32 = 5,
-  PM_SGPR64 = 6,
-  PM_SGPR128 = 7,
-  PM_SGPR256 = 8,
-  PM_SGPR512 = 9,
-  PM_VGPR1  = 10,
-  PM_VGPR16 = 14,
-  PM_VGPR32 = 15,
-  PM_VGPR64 = 16,
-  PM_VGPR128 = 17,
-  PM_VGPR256 = 18,
-  PM_VGPR512 = 19,
-  PM_SGPR96 = 20,
-  PM_VGPR96 = 21
+  PM_SGPR1  = 2,
+  PM_SGPR16 = 6,
+  PM_SGPR32 = 7,
+  PM_SGPR64 = 8,
+  PM_SGPR128 = 9,
+  PM_SGPR256 = 10,
+  PM_SGPR512 = 11,
+  PM_VGPR1  = 12,
+  PM_VGPR16 = 16,
+  PM_VGPR32 = 17,
+  PM_VGPR64 = 18,
+  PM_VGPR128 = 19,
+  PM_VGPR256 = 20,
+  PM_VGPR512 = 21,
+  PM_SGPR96 = 22,
+  PM_VGPR96 = 23
 };
 
 const RegisterBankInfo::PartialMapping PartMappings[] {
   // StartIdx, Length, RegBank
   {0, 1,  SCCRegBank},
+  {0, 1,  VCCRegBank},
+
+  {0, 1,  SGPRRegBank}, // SGPR begin
   {0, 16, SGPRRegBank},
   {0, 32, SGPRRegBank},
   {0, 64, SGPRRegBank},
   {0, 128, SGPRRegBank},
   {0, 256, SGPRRegBank},
   {0, 512, SGPRRegBank},
-  {0, 1,  SGPRRegBank},
+
+  {0, 1,  VGPRRegBank}, // VGPR begin
   {0, 16, VGPRRegBank},
   {0, 32, VGPRRegBank},
   {0, 64, VGPRRegBank},
@@ -55,33 +59,43 @@ const RegisterBankInfo::PartialMapping PartMappings[] {
 };
 
 const RegisterBankInfo::ValueMapping ValMappings[] {
+  // SCC
   {&PartMappings[0], 1},
-  {nullptr, 0},
-  {nullptr, 0},
-  {nullptr, 0},
+
+  // VCC
   {&PartMappings[1], 1},
+
+  // SGPRs
   {&PartMappings[2], 1},
+  {nullptr, 0}, // Illegal power of 2 sizes
+  {nullptr, 0},
+  {nullptr, 0},
   {&PartMappings[3], 1},
   {&PartMappings[4], 1},
   {&PartMappings[5], 1},
   {&PartMappings[6], 1},
   {&PartMappings[7], 1},
+  {&PartMappings[8], 1},
+
+    // VGPRs
+  {&PartMappings[9], 1},
   {nullptr, 0},
   {nullptr, 0},
   {nullptr, 0},
-  {&PartMappings[8], 1},
-  {&PartMappings[9], 1},
   {&PartMappings[10], 1},
   {&PartMappings[11], 1},
   {&PartMappings[12], 1},
   {&PartMappings[13], 1},
   {&PartMappings[14], 1},
-  {&PartMappings[15], 1}
+  {&PartMappings[15], 1},
+  {&PartMappings[16], 1},
+  {&PartMappings[17], 1}
 };
 
 enum ValueMappingIdx {
-  SGPRStartIdx = 0,
-  VGPRStartIdx = 10
+  SCCStartIdx = 0,
+  SGPRStartIdx = 2,
+  VGPRStartIdx = 12
 };
 
 const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
@@ -89,16 +103,28 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
   unsigned Idx;
   switch (Size) {
   case 1:
-    Idx = BankID == AMDGPU::SCCRegBankID ? PM_SGPR1 : PM_VGPR1;
+    if (BankID == AMDGPU::SCCRegBankID)
+      return &ValMappings[0];
+    if (BankID == AMDGPU::VCCRegBankID)
+      return &ValMappings[1];
+
+    // 1-bit values not from a compare etc.
+    Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1;
     break;
   case 96:
+    assert(BankID != AMDGPU::VCCRegBankID);
     Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96;
     break;
   default:
+    assert(BankID != AMDGPU::VCCRegBankID);
     Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx;
     Idx += Log2_32_Ceil(Size);
     break;
   }
+
+  assert(Log2_32_Ceil(Size) == Log2_32_Ceil(ValMappings[Idx].BreakDown->Length));
+  assert(BankID == ValMappings[Idx].BreakDown->RegBank->getID());
+
   return &ValMappings[Idx];
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 01ef346f74ee..c38b0e61558b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -16,6 +16,7 @@
 #include "AMDGPUHSAMetadataStreamer.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIProgramInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
@@ -36,11 +37,14 @@ static cl::opt<bool> VerifyHSAMetadata(
 namespace AMDGPU {
 namespace HSAMD {
 
-void MetadataStreamer::dump(StringRef HSAMetadataString) const {
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV2
+//===----------------------------------------------------------------------===//
+void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
 }
 
-void MetadataStreamer::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
   errs() << "AMDGPU HSA Metadata Parser Test: ";
 
   HSAMD::Metadata FromHSAMetadataString;
@@ -63,7 +67,8 @@ void MetadataStreamer::verify(StringRef HSAMetadataString) const {
   }
 }
 
-AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+AccessQualifier
+MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
   if (AccQual.empty())
     return AccessQualifier::Unknown;
 
@@ -74,26 +79,29 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
              .Default(AccessQualifier::Default);
 }
 
-AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+AddressSpaceQualifier
+MetadataStreamerV2::getAddressSpaceQualifier(
     unsigned AddressSpace) const {
-  if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
+  switch (AddressSpace) {
+  case AMDGPUAS::PRIVATE_ADDRESS:
     return AddressSpaceQualifier::Private;
-  if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
+  case AMDGPUAS::GLOBAL_ADDRESS:
     return AddressSpaceQualifier::Global;
-  if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
+  case AMDGPUAS::CONSTANT_ADDRESS:
     return AddressSpaceQualifier::Constant;
-  if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
+  case AMDGPUAS::LOCAL_ADDRESS:
     return AddressSpaceQualifier::Local;
-  if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
+  case AMDGPUAS::FLAT_ADDRESS:
     return AddressSpaceQualifier::Generic;
-  if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
+  case AMDGPUAS::REGION_ADDRESS:
     return AddressSpaceQualifier::Region;
-
-  llvm_unreachable("Unknown address space qualifier");
+  default:
+    return AddressSpaceQualifier::Unknown;
+  }
 }
 
-ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
-                                         StringRef BaseTypeName) const {
+ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
+                                           StringRef BaseTypeName) const {
   if (TypeQual.find("pipe") != StringRef::npos)
     return ValueKind::Pipe;
 
@@ -114,13 +122,13 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
              .Case("queue_t", ValueKind::Queue)
              .Default(isa<PointerType>(Ty) ?
                           (Ty->getPointerAddressSpace() ==
-                           AMDGPUASI.LOCAL_ADDRESS ?
+                           AMDGPUAS::LOCAL_ADDRESS ?
                            ValueKind::DynamicSharedPointer :
                            ValueKind::GlobalBuffer) :
                       ValueKind::ByValue);
 }
 
-ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID: {
     auto Signed = !TypeName.startswith("u");
@@ -152,7 +160,7 @@ ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
   }
 }
 
-std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID: {
     if (!Signed)
@@ -189,8 +197,8 @@ std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
   }
 }
 
-std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
-    MDNode *Node) const {
+std::vector<uint32_t>
+MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
   std::vector<uint32_t> Dims;
   if (Node->getNumOperands() != 3)
     return Dims;
@@ -200,9 +208,9 @@ std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
   return Dims;
 }
 
-Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
-    const MachineFunction &MF,
-    const SIProgramInfo &ProgramInfo) const {
+Kernel::CodeProps::Metadata
+MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
+                                    const SIProgramInfo &ProgramInfo) const {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
@@ -229,9 +237,9 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
   return HSACodeProps;
 }
 
-Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
-    const MachineFunction &MF,
-    const SIProgramInfo &ProgramInfo) const {
+Kernel::DebugProps::Metadata
+MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
+                                     const SIProgramInfo &ProgramInfo) const {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
 
@@ -251,14 +259,14 @@ Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
   return HSADebugProps;
 }
 
-void MetadataStreamer::emitVersion() {
+void MetadataStreamerV2::emitVersion() {
   auto &Version = HSAMetadata.mVersion;
 
   Version.push_back(VersionMajor);
   Version.push_back(VersionMinor);
 }
 
-void MetadataStreamer::emitPrintf(const Module &Mod) {
+void MetadataStreamerV2::emitPrintf(const Module &Mod) {
   auto &Printf = HSAMetadata.mPrintf;
 
   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
@@ -270,7 +278,7 @@ void MetadataStreamer::emitPrintf(const Module &Mod) {
       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
 }
 
-void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
   auto &Kernel = HSAMetadata.mKernels.back();
 
   // TODO: What about other languages?
@@ -288,7 +296,7 @@ void MetadataStreamer::emitKernelLanguage(const Function &Func) {
       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
 }
 
-void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
 
   if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -306,14 +314,14 @@ void MetadataStreamer::emitKernelAttrs(const Function &Func) {
   }
 }
 
-void MetadataStreamer::emitKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
   for (auto &Arg : Func.args())
     emitKernelArg(Arg);
 
   emitHiddenKernelArgs(Func);
 }
 
-void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
   auto Func = Arg.getParent();
   auto ArgNo = Arg.getArgNo();
   const MDNode *Node;
@@ -355,7 +363,7 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
 
   unsigned PointeeAlign = 0;
   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
-    if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
+    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       PointeeAlign = Arg.getParamAlignment();
       if (PointeeAlign == 0)
         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
@@ -366,12 +374,12 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) {
                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
 }
 
-void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
-                                     ValueKind ValueKind,
-                                     unsigned PointeeAlign,
-                                     StringRef Name,
-                                     StringRef TypeName, StringRef BaseTypeName,
-                                     StringRef AccQual, StringRef TypeQual) {
+void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
+                                       ValueKind ValueKind,
+                                       unsigned PointeeAlign, StringRef Name,
+                                       StringRef TypeName,
+                                       StringRef BaseTypeName,
+                                       StringRef AccQual, StringRef TypeQual) {
   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
 
@@ -384,7 +392,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
   Arg.mPointeeAlign = PointeeAlign;
 
   if (auto PtrTy = dyn_cast<PointerType>(Ty))
-    Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+    Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
 
   Arg.mAccQual = getAccessQualifier(AccQual);
 
@@ -404,7 +412,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
   }
 }
 
-void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
   int HiddenArgNumBytes =
       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
 
@@ -422,7 +430,7 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
 
   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
-                                      AMDGPUASI.GLOBAL_ADDRESS);
+                                      AMDGPUAS::GLOBAL_ADDRESS);
 
   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
   // "none" argument.
@@ -446,13 +454,16 @@ void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
   }
 }
 
-void MetadataStreamer::begin(const Module &Mod) {
-  AMDGPUASI = getAMDGPUAS(Mod);
+bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+  return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
+}
+
+void MetadataStreamerV2::begin(const Module &Mod) {
   emitVersion();
   emitPrintf(Mod);
 }
 
-void MetadataStreamer::end() {
+void MetadataStreamerV2::end() {
   std::string HSAMetadataString;
   if (toString(HSAMetadata, HSAMetadataString))
     return;
@@ -463,7 +474,8 @@ void MetadataStreamer::end() {
     verify(HSAMetadataString);
 }
 
-void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
+void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
+                                    const SIProgramInfo &ProgramInfo) {
   auto &Func = MF.getFunction();
   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
     return;
@@ -483,6 +495,505 @@ void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo
   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
 }
 
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV3
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
+  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
+}
+
+void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
+  errs() << "AMDGPU HSA Metadata Parser Test: ";
+
+  std::shared_ptr<msgpack::Node> FromHSAMetadataString =
+      std::make_shared<msgpack::MapNode>();
+
+  yaml::Input YIn(HSAMetadataString);
+  YIn >> FromHSAMetadataString;
+  if (YIn.error()) {
+    errs() << "FAIL\n";
+    return;
+  }
+
+  std::string ToHSAMetadataString;
+  raw_string_ostream StrOS(ToHSAMetadataString);
+  yaml::Output YOut(StrOS);
+  YOut << FromHSAMetadataString;
+
+  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
+  if (HSAMetadataString != ToHSAMetadataString) {
+    errs() << "Original input: " << HSAMetadataString << '\n'
+           << "Produced output: " << StrOS.str() << '\n';
+  }
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
+  return StringSwitch<Optional<StringRef>>(AccQual)
+      .Case("read_only", StringRef("read_only"))
+      .Case("write_only", StringRef("write_only"))
+      .Case("read_write", StringRef("read_write"))
+      .Default(None);
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
+  switch (AddressSpace) {
+  case AMDGPUAS::PRIVATE_ADDRESS:
+    return StringRef("private");
+  case AMDGPUAS::GLOBAL_ADDRESS:
+    return StringRef("global");
+  case AMDGPUAS::CONSTANT_ADDRESS:
+    return StringRef("constant");
+  case AMDGPUAS::LOCAL_ADDRESS:
+    return StringRef("local");
+  case AMDGPUAS::FLAT_ADDRESS:
+    return StringRef("generic");
+  case AMDGPUAS::REGION_ADDRESS:
+    return StringRef("region");
+  default:
+    return None;
+  }
+}
+
+StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
+                                           StringRef BaseTypeName) const {
+  if (TypeQual.find("pipe") != StringRef::npos)
+    return "pipe";
+
+  return StringSwitch<StringRef>(BaseTypeName)
+      .Case("image1d_t", "image")
+      .Case("image1d_array_t", "image")
+      .Case("image1d_buffer_t", "image")
+      .Case("image2d_t", "image")
+      .Case("image2d_array_t", "image")
+      .Case("image2d_array_depth_t", "image")
+      .Case("image2d_array_msaa_t", "image")
+      .Case("image2d_array_msaa_depth_t", "image")
+      .Case("image2d_depth_t", "image")
+      .Case("image2d_msaa_t", "image")
+      .Case("image2d_msaa_depth_t", "image")
+      .Case("image3d_t", "image")
+      .Case("sampler_t", "sampler")
+      .Case("queue_t", "queue")
+      .Default(isa<PointerType>(Ty)
+                   ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
+                          ? "dynamic_shared_pointer"
+                          : "global_buffer")
+                   : "by_value");
+}
+
+StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID: {
+    auto Signed = !TypeName.startswith("u");
+    switch (Ty->getIntegerBitWidth()) {
+    case 8:
+      return Signed ? "i8" : "u8";
+    case 16:
+      return Signed ? "i16" : "u16";
+    case 32:
+      return Signed ? "i32" : "u32";
+    case 64:
+      return Signed ? "i64" : "u64";
+    default:
+      return "struct";
+    }
+  }
+  case Type::HalfTyID:
+    return "f16";
+  case Type::FloatTyID:
+    return "f32";
+  case Type::DoubleTyID:
+    return "f64";
+  case Type::PointerTyID:
+    return getValueType(Ty->getPointerElementType(), TypeName);
+  case Type::VectorTyID:
+    return getValueType(Ty->getVectorElementType(), TypeName);
+  default:
+    return "struct";
+  }
+}
+
+std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID: {
+    if (!Signed)
+      return (Twine('u') + getTypeName(Ty, true)).str();
+
+    auto BitWidth = Ty->getIntegerBitWidth();
+    switch (BitWidth) {
+    case 8:
+      return "char";
+    case 16:
+      return "short";
+    case 32:
+      return "int";
+    case 64:
+      return "long";
+    default:
+      return (Twine('i') + Twine(BitWidth)).str();
+    }
+  }
+  case Type::HalfTyID:
+    return "half";
+  case Type::FloatTyID:
+    return "float";
+  case Type::DoubleTyID:
+    return "double";
+  case Type::VectorTyID: {
+    auto VecTy = cast<VectorType>(Ty);
+    auto ElTy = VecTy->getElementType();
+    auto NumElements = VecTy->getVectorNumElements();
+    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+  }
+  default:
+    return "unknown";
+  }
+}
+
+std::shared_ptr<msgpack::ArrayNode>
+MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
+  auto Dims = std::make_shared<msgpack::ArrayNode>();
+  if (Node->getNumOperands() != 3)
+    return Dims;
+
+  for (auto &Op : Node->operands())
+    Dims->push_back(std::make_shared<msgpack::ScalarNode>(
+        mdconst::extract<ConstantInt>(Op)->getZExtValue()));
+  return Dims;
+}
+
+void MetadataStreamerV3::emitVersion() {
+  auto Version = std::make_shared<msgpack::ArrayNode>();
+  Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
+  Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
+  getRootMetadata("amdhsa.version") = std::move(Version);
+}
+
+void MetadataStreamerV3::emitPrintf(const Module &Mod) {
+  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+  if (!Node)
+    return;
+
+  auto Printf = std::make_shared<msgpack::ArrayNode>();
+  for (auto Op : Node->operands())
+    if (Op->getNumOperands())
+      Printf->push_back(std::make_shared<msgpack::ScalarNode>(
+          cast<MDString>(Op->getOperand(0))->getString()));
+  getRootMetadata("amdhsa.printf") = std::move(Printf);
+}
+
+void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
+                                            msgpack::MapNode &Kern) {
+  // TODO: What about other languages?
+  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+  if (!Node || !Node->getNumOperands())
+    return;
+  auto Op0 = Node->getOperand(0);
+  if (Op0->getNumOperands() <= 1)
+    return;
+
+  Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
+  auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
+  LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
+  LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
+  Kern[".language_version"] = std::move(LanguageVersion);
+}
+
+void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
+                                         msgpack::MapNode &Kern) {
+
+  if (auto Node = Func.getMetadata("reqd_work_group_size"))
+    Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
+  if (auto Node = Func.getMetadata("work_group_size_hint"))
+    Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
+  if (auto Node = Func.getMetadata("vec_type_hint")) {
+    Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
+        cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
+  }
+  if (Func.hasFnAttribute("runtime-handle")) {
+    Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
+        Func.getFnAttribute("runtime-handle").getValueAsString().str());
+  }
+}
+
+void MetadataStreamerV3::emitKernelArgs(const Function &Func,
+                                        msgpack::MapNode &Kern) {
+  unsigned Offset = 0;
+  auto Args = std::make_shared<msgpack::ArrayNode>();
+  for (auto &Arg : Func.args())
+    emitKernelArg(Arg, Offset, *Args);
+
+  emitHiddenKernelArgs(Func, Offset, *Args);
+
+  // TODO: What about other languages?
+  if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
+    auto &DL = Func.getParent()->getDataLayout();
+    auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
+    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
+    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
+
+    auto Int8PtrTy =
+        Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+    // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+    // "none" argument.
+    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
+    else
+      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+
+    // Emit "default queue" and "completion action" arguments if enqueue kernel
+    // is used, otherwise emit dummy "none" arguments.
+    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
+      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
+    } else {
+      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+    }
+  }
+
+  Kern[".args"] = std::move(Args);
+}
+
+void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
+                                       msgpack::ArrayNode &Args) {
+  auto Func = Arg.getParent();
+  auto ArgNo = Arg.getArgNo();
+  const MDNode *Node;
+
+  StringRef Name;
+  Node = Func->getMetadata("kernel_arg_name");
+  if (Node && ArgNo < Node->getNumOperands())
+    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+  else if (Arg.hasName())
+    Name = Arg.getName();
+
+  StringRef TypeName;
+  Node = Func->getMetadata("kernel_arg_type");
+  if (Node && ArgNo < Node->getNumOperands())
+    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef BaseTypeName;
+  Node = Func->getMetadata("kernel_arg_base_type");
+  if (Node && ArgNo < Node->getNumOperands())
+    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef AccQual;
+  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+      Arg.hasNoAliasAttr()) {
+    AccQual = "read_only";
+  } else {
+    Node = Func->getMetadata("kernel_arg_access_qual");
+    if (Node && ArgNo < Node->getNumOperands())
+      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+  }
+
+  StringRef TypeQual;
+  Node = Func->getMetadata("kernel_arg_type_qual");
+  if (Node && ArgNo < Node->getNumOperands())
+    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  Type *Ty = Arg.getType();
+  const DataLayout &DL = Func->getParent()->getDataLayout();
+
+  unsigned PointeeAlign = 0;
+  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+      PointeeAlign = Arg.getParamAlignment();
+      if (PointeeAlign == 0)
+        PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
+    }
+  }
+
+  emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+                getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
+                Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
+                TypeQual);
+}
+
+void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
+                                       StringRef ValueKind, unsigned &Offset,
+                                       msgpack::ArrayNode &Args,
+                                       unsigned PointeeAlign, StringRef Name,
+                                       StringRef TypeName,
+                                       StringRef BaseTypeName,
+                                       StringRef AccQual, StringRef TypeQual) {
+  auto ArgPtr = std::make_shared<msgpack::MapNode>();
+  auto &Arg = *ArgPtr;
+
+  if (!Name.empty())
+    Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
+  if (!TypeName.empty())
+    Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
+  auto Size = DL.getTypeAllocSize(Ty);
+  auto Align = DL.getABITypeAlignment(Ty);
+  Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
+  Offset = alignTo(Offset, Align);
+  Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
+  Offset += Size;
+  Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
+  Arg[".value_type"] =
+      std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
+  if (PointeeAlign)
+    Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty))
+    if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
+      Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
+
+  if (auto AQ = getAccessQualifier(AccQual))
+    Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
+
+  // TODO: Emit Arg[".actual_access"].
+
+  SmallVector<StringRef, 1> SplitTypeQuals;
+  TypeQual.split(SplitTypeQuals, " ", -1, false);
+  for (StringRef Key : SplitTypeQuals) {
+    if (Key == "const")
+      Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
+    else if (Key == "restrict")
+      Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
+    else if (Key == "volatile")
+      Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
+    else if (Key == "pipe")
+      Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
+  }
+
+  Args.push_back(std::move(ArgPtr));
+}
+
+void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
+                                              unsigned &Offset,
+                                              msgpack::ArrayNode &Args) {
+  int HiddenArgNumBytes =
+      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
+
+  if (!HiddenArgNumBytes)
+    return;
+
+  auto &DL = Func.getParent()->getDataLayout();
+  auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+  if (HiddenArgNumBytes >= 8)
+    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
+  if (HiddenArgNumBytes >= 16)
+    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
+  if (HiddenArgNumBytes >= 24)
+    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
+
+  auto Int8PtrTy =
+      Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+  // "none" argument.
+  if (HiddenArgNumBytes >= 32) {
+    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
+    else
+      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+  }
+
+  // Emit "default queue" and "completion action" arguments if enqueue kernel is
+  // used, otherwise emit dummy "none" arguments.
+  if (HiddenArgNumBytes >= 48) {
+    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
+      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
+    } else {
+      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+    }
+  }
+}
+
+std::shared_ptr<msgpack::MapNode>
+MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
+                                      const SIProgramInfo &ProgramInfo) const {
+  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  const Function &F = MF.getFunction();
+
+  auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
+  auto &Kern = *HSAKernelProps;
+
+  unsigned MaxKernArgAlign;
+  Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
+      STM.getKernArgSegmentSize(F, MaxKernArgAlign));
+  Kern[".group_segment_fixed_size"] =
+      std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
+  Kern[".private_segment_fixed_size"] =
+      std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
+  Kern[".kernarg_segment_align"] =
+      std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
+  Kern[".wavefront_size"] =
+      std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
+  Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
+  Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
+  Kern[".max_flat_workgroup_size"] =
+      std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
+  Kern[".sgpr_spill_count"] =
+      std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
+  Kern[".vgpr_spill_count"] =
+      std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
+
+  return HSAKernelProps;
+}
+
+bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+  return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
+}
+
+void MetadataStreamerV3::begin(const Module &Mod) {
+  emitVersion();
+  emitPrintf(Mod);
+  getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
+}
+
+void MetadataStreamerV3::end() {
+  std::string HSAMetadataString;
+  raw_string_ostream StrOS(HSAMetadataString);
+  yaml::Output YOut(StrOS);
+  YOut << HSAMetadataRoot;
+
+  if (DumpHSAMetadata)
+    dump(StrOS.str());
+  if (VerifyHSAMetadata)
+    verify(StrOS.str());
+}
+
+void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
+                                    const SIProgramInfo &ProgramInfo) {
+  auto &Func = MF.getFunction();
+  auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
+
+  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+         Func.getCallingConv() == CallingConv::SPIR_KERNEL);
+
+  auto &KernelsNode = getRootMetadata("amdhsa.kernels");
+  auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
+
+  {
+    auto &Kern = *KernelProps;
+    Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
+    Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
+        (Twine(Func.getName()) + Twine(".kd")).str());
+    emitKernelLanguage(Func, Kern);
+    emitKernelAttrs(Func, Kern);
+    emitKernelArgs(Func, Kern);
+  }
+
+  Kernels->push_back(std::move(KernelProps));
+}
+
 } // end namespace HSAMD
 } // end namespace AMDGPU
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 3424c956d781..afc09baf952d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -19,10 +19,12 @@
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
 #include "llvm/Support/AMDGPUMetadata.h"
 
 namespace llvm {
 
+class AMDGPUTargetStreamer;
 class Argument;
 class DataLayout;
 class Function;
@@ -34,10 +36,94 @@ class Type;
 namespace AMDGPU {
 namespace HSAMD {
 
-class MetadataStreamer final {
+class MetadataStreamer {
+public:
+  virtual ~MetadataStreamer(){};
+
+  virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
+
+  virtual void begin(const Module &Mod) = 0;
+
+  virtual void end() = 0;
+
+  virtual void emitKernel(const MachineFunction &MF,
+                          const SIProgramInfo &ProgramInfo) = 0;
+};
+
+class MetadataStreamerV3 final : public MetadataStreamer {
+private:
+  std::shared_ptr<msgpack::Node> HSAMetadataRoot =
+      std::make_shared<msgpack::MapNode>();
+
+  void dump(StringRef HSAMetadataString) const;
+
+  void verify(StringRef HSAMetadataString) const;
+
+  Optional<StringRef> getAccessQualifier(StringRef AccQual) const;
+
+  Optional<StringRef> getAddressSpaceQualifier(unsigned AddressSpace) const;
+
+  StringRef getValueKind(Type *Ty, StringRef TypeQual,
+                         StringRef BaseTypeName) const;
+
+  StringRef getValueType(Type *Ty, StringRef TypeName) const;
+
+  std::string getTypeName(Type *Ty, bool Signed) const;
+
+  std::shared_ptr<msgpack::ArrayNode>
+  getWorkGroupDimensions(MDNode *Node) const;
+
+  std::shared_ptr<msgpack::MapNode>
+  getHSAKernelProps(const MachineFunction &MF,
+                    const SIProgramInfo &ProgramInfo) const;
+
+  void emitVersion();
+
+  void emitPrintf(const Module &Mod);
+
+  void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
+
+  void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
+
+  void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
+
+  void emitKernelArg(const Argument &Arg, unsigned &Offset,
+                     msgpack::ArrayNode &Args);
+
+  void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
+                     unsigned &Offset, msgpack::ArrayNode &Args,
+                     unsigned PointeeAlign = 0, StringRef Name = "",
+                     StringRef TypeName = "", StringRef BaseTypeName = "",
+                     StringRef AccQual = "", StringRef TypeQual = "");
+
+  void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
+                            msgpack::ArrayNode &Args);
+
+  std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
+    return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
+  }
+
+  std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
+    return HSAMetadataRoot;
+  }
+
+public:
+  MetadataStreamerV3() = default;
+  ~MetadataStreamerV3() = default;
+
+  bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+  void begin(const Module &Mod) override;
+
+  void end() override;
+
+  void emitKernel(const MachineFunction &MF,
+                  const SIProgramInfo &ProgramInfo) override;
+};
+
+class MetadataStreamerV2 final : public MetadataStreamer {
 private:
   Metadata HSAMetadata;
-  AMDGPUAS AMDGPUASI;
 
   void dump(StringRef HSAMetadataString) const;
 
@@ -45,7 +131,7 @@ private:
 
   AccessQualifier getAccessQualifier(StringRef AccQual) const;
 
-  AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+  AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
 
   ValueKind getValueKind(Type *Ty, StringRef TypeQual,
                          StringRef BaseTypeName) const;
@@ -83,19 +169,22 @@ private:
 
   void emitHiddenKernelArgs(const Function &Func);
 
-public:
-  MetadataStreamer() = default;
-  ~MetadataStreamer() = default;
-
   const Metadata &getHSAMetadata() const {
     return HSAMetadata;
   }
 
-  void begin(const Module &Mod);
+public:
+  MetadataStreamerV2() = default;
+  ~MetadataStreamerV2() = default;
+
+  bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+  void begin(const Module &Mod) override;
 
-  void end();
+  void end() override;
 
-  void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
+  void emitKernel(const MachineFunction &MF,
+                  const SIProgramInfo &ProgramInfo) override;
 };
 
 } // end namespace HSAMD
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7cb0e12a6809..a0a045e72a58 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -29,7 +29,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -72,14 +72,12 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
   // make the right decision when generating code for different targets.
   const GCNSubtarget *Subtarget;
-  AMDGPUAS AMDGPUASI;
   bool EnableLateStructurizeCFG;
 
 public:
   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
     : SelectionDAGISel(*TM, OptLevel) {
-    AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
   }
   ~AMDGPUDAGToDAGISel() override = default;
@@ -87,7 +85,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AMDGPUArgumentUsageInfo>();
     AU.addRequired<AMDGPUPerfHintAnalysis>();
-    AU.addRequired<DivergenceAnalysis>();
+    AU.addRequired<LegacyDivergenceAnalysis>();
     SelectionDAGISel::getAnalysisUsage(AU);
   }
 
@@ -103,9 +101,12 @@ private:
   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
   bool isNoNanSrc(SDValue N) const;
   bool isInlineImmediate(const SDNode *N) const;
-
+  bool isVGPRImm(const SDNode *N) const;
+  bool isUniformLoad(const SDNode *N) const;
   bool isUniformBr(const SDNode *N) const;
 
+  MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
   SDNode *glueCopyToM0(SDNode *N) const;
 
   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
@@ -140,13 +141,6 @@ private:
                          SDValue &Offset, SDValue &SLC) const;
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
                          SDValue &Offset) const;
-  bool SelectMUBUFConstant(SDValue Constant,
-                           SDValue &SOffset,
-                           SDValue &ImmOffset) const;
-  bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
-                                  SDValue &ImmOffset) const;
-  bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
-                                   SDValue &ImmOffset, SDValue &VOffset) const;
 
   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
                         SDValue &Offset, SDValue &SLC) const;
@@ -224,7 +218,6 @@ protected:
 
 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
   const R600Subtarget *Subtarget;
-  AMDGPUAS AMDGPUASI;
 
   bool isConstantLoad(const MemSDNode *N, int cbID) const;
   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
@@ -232,9 +225,7 @@ class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
                                        SDValue& Offset);
 public:
   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
-      AMDGPUDAGToDAGISel(TM, OptLevel) {
-    AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
-      }
+      AMDGPUDAGToDAGISel(TM, OptLevel) {}
 
   void Select(SDNode *N) override;
 
@@ -251,12 +242,12 @@ protected:
 
 }  // end anonymous namespace
 
-INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
+INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
-INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
 
 /// This pass converts a legalized DAG into a AMDGPU-specific
@@ -350,7 +341,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
 }
 
 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
-  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
+  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
       !Subtarget->ldsRequiresM0Init())
     return N;
 
@@ -372,6 +363,22 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
 }
 
+MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
+                                                  EVT VT) const {
+  SDNode *Lo = CurDAG->getMachineNode(
+      AMDGPU::S_MOV_B32, DL, MVT::i32,
+      CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
+  SDNode *Hi =
+      CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+                             CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+  const SDValue Ops[] = {
+      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
+      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
+
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
+}
+
 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
   switch (NumVectorElts) {
   case 1:
@@ -557,19 +564,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     }
 
     SDLoc DL(N);
-    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
-                                CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
-                                                    MVT::i32));
-    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
-                                CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
-    const SDValue Ops[] = {
-      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
-      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
-      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
-    };
-
-    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
-                                          N->getValueType(0), Ops));
+    ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
     return;
   }
   case ISD::LOAD:
@@ -641,6 +636,20 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
   case AMDGPUISD::ATOMIC_CMP_SWAP:
     SelectATOMIC_CMP_SWAP(N);
     return;
+  case AMDGPUISD::CVT_PKRTZ_F16_F32:
+  case AMDGPUISD::CVT_PKNORM_I16_F32:
+  case AMDGPUISD::CVT_PKNORM_U16_F32:
+  case AMDGPUISD::CVT_PK_U16_U32:
+  case AMDGPUISD::CVT_PK_I16_I32: {
+    // Hack around using a legal type if f16 is illegal.
+    if (N->getValueType(0) == MVT::i32) {
+      MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
+      N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
+                              { N->getOperand(0), N->getOperand(1) });
+      SelectCode(N);
+      return;
+    }
+  }
   }
 
   SelectCode(N);
@@ -969,8 +978,6 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
 
   // default case
 
-  // FIXME: This is broken on SI where we still need to check if the base
-  // pointer is positive here.
   Base = Addr;
   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
@@ -1000,55 +1007,72 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
 
+  ConstantSDNode *C1 = nullptr;
+  SDValue N0 = Addr;
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
-    SDValue N0 = Addr.getOperand(0);
-    SDValue N1 = Addr.getOperand(1);
-    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+    C1 = cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isUInt<32>(C1->getZExtValue()))
+      N0 = Addr.getOperand(0);
+    else
+      C1 = nullptr;
+  }
+
+  if (N0.getOpcode() == ISD::ADD) {
+    // (add N2, N3) -> addr64, or
+    // (add (add N2, N3), C1) -> addr64
+    SDValue N2 = N0.getOperand(0);
+    SDValue N3 = N0.getOperand(1);
+    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
 
-    if (N0.getOpcode() == ISD::ADD) {
-      // (add (add N2, N3), C1) -> addr64
-      SDValue N2 = N0.getOperand(0);
-      SDValue N3 = N0.getOperand(1);
-      Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+    if (N2->isDivergent()) {
+      if (N3->isDivergent()) {
+        // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
+        // addr64, and construct the resource from a 0 address.
+        Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
+        VAddr = N0;
+      } else {
+        // N2 is divergent, N3 is not.
+        Ptr = N3;
+        VAddr = N2;
+      }
+    } else {
+      // N2 is not divergent.
       Ptr = N2;
       VAddr = N3;
-    } else {
-      // (add N0, C1) -> offset
-      VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
-      Ptr = N0;
-    }
-
-    if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
-      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
-      return true;
     }
-
-    if (isUInt<32>(C1->getZExtValue())) {
-      // Illegal offset, store it in soffset.
-      Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
-      SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
-                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
-                        0);
-      return true;
-    }
-  }
-
-  if (Addr.getOpcode() == ISD::ADD) {
-    // (add N0, N1) -> addr64
-    SDValue N0 = Addr.getOperand(0);
-    SDValue N1 = Addr.getOperand(1);
+    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  } else if (N0->isDivergent()) {
+    // N0 is divergent. Use it as the addr64, and construct the resource from a
+    // 0 address.
+    Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
+    VAddr = N0;
     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
+  } else {
+    // N0 -> offset, or
+    // (N0 + C1) -> offset
+    VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
     Ptr = N0;
-    VAddr = N1;
+  }
+
+  if (!C1) {
+    // No offset.
     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
     return true;
   }
 
-  // default case -> offset
-  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
-  Ptr = Addr;
-  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
+    // Legal offset for instruction.
+    Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+    return true;
+  }
 
+  // Illegal offset, store it in soffset.
+  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  SOffset =
+      SDValue(CurDAG->getMachineNode(
+                  AMDGPU::S_MOV_B32, DL, MVT::i32,
+                  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
+              0);
   return true;
 }
 
@@ -1252,101 +1276,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
 }
 
-bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
-                                             SDValue &SOffset,
-                                             SDValue &ImmOffset) const {
-  SDLoc DL(Constant);
-  const uint32_t Align = 4;
-  const uint32_t MaxImm = alignDown(4095, Align);
-  uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
-  uint32_t Overflow = 0;
-
-  if (Imm > MaxImm) {
-    if (Imm <= MaxImm + 64) {
-      // Use an SOffset inline constant for 4..64
-      Overflow = Imm - MaxImm;
-      Imm = MaxImm;
-    } else {
-      // Try to keep the same value in SOffset for adjacent loads, so that
-      // the corresponding register contents can be re-used.
-      //
-      // Load values with all low-bits (except for alignment bits) set into
-      // SOffset, so that a larger range of values can be covered using
-      // s_movk_i32.
-      //
-      // Atomic operations fail to work correctly when individual address
-      // components are unaligned, even if their sum is aligned.
-      uint32_t High = (Imm + Align) & ~4095;
-      uint32_t Low = (Imm + Align) & 4095;
-      Imm = Low;
-      Overflow = High - Align;
-    }
-  }
-
-  // There is a hardware bug in SI and CI which prevents address clamping in
-  // MUBUF instructions from working correctly with SOffsets. The immediate
-  // offset is unaffected.
-  if (Overflow > 0 &&
-      Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
-    return false;
-
-  ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
-
-  if (Overflow <= 64)
-    SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
-  else
-    SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
-                      CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
-                      0);
-
-  return true;
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
-                                                    SDValue &SOffset,
-                                                    SDValue &ImmOffset) const {
-  SDLoc DL(Offset);
-
-  if (!isa<ConstantSDNode>(Offset))
-    return false;
-
-  return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
-}
-
-bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
-                                                     SDValue &SOffset,
-                                                     SDValue &ImmOffset,
-                                                     SDValue &VOffset) const {
-  SDLoc DL(Offset);
-
-  // Don't generate an unnecessary voffset for constant offsets.
-  if (isa<ConstantSDNode>(Offset)) {
-    SDValue Tmp1, Tmp2;
-
-    // When necessary, use a voffset in <= CI anyway to work around a hardware
-    // bug.
-    if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
-        SelectMUBUFConstant(Offset, Tmp1, Tmp2))
-      return false;
-  }
-
-  if (CurDAG->isBaseWithConstantOffset(Offset)) {
-    SDValue N0 = Offset.getOperand(0);
-    SDValue N1 = Offset.getOperand(1);
-    if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
-        SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
-      VOffset = N0;
-      return true;
-    }
-  }
-
-  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
-  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
-  VOffset = Offset;
-
-  return true;
-}
-
 template <bool IsSigned>
 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
                                           SDValue &VAddr,
@@ -1525,9 +1454,13 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 
     // (add n0, c0)
-    Base = N0;
-    Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
-    return true;
+    // Don't peel off the offset (c0) if doing so could possibly lead
+    // the base (n0) to be negative.
+    if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
+      Base = N0;
+      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
+      return true;
+    }
   }
 
   if (isa<ConstantSDNode>(Index))
@@ -1768,7 +1701,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
   MemSDNode *Mem = cast<MemSDNode>(N);
   unsigned AS = Mem->getAddressSpace();
-  if (AS == AMDGPUASI.FLAT_ADDRESS) {
+  if (AS == AMDGPUAS::FLAT_ADDRESS) {
     SelectCode(N);
     return;
   }
@@ -1816,9 +1749,8 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
     return;
   }
 
-  MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
-  *MMOs = Mem->getMemOperand();
-  CmpSwap->setMemRefs(MMOs, MMOs + 1);
+  MachineMemOperand *MMO = Mem->getMemOperand();
+  CurDAG->setNodeMemRefs(CmpSwap, {MMO});
 
   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
   SDValue Extract
@@ -2117,6 +2049,80 @@ bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
   return isExtractHiElt(In, Src);
 }
 
+bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
+  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    return false;
+  }
+  const SIRegisterInfo *SIRI =
+    static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  const SIInstrInfo * SII =
+    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+  unsigned Limit = 0;
+  bool AllUsesAcceptSReg = true;
+  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+    Limit < 10 && U != E; ++U, ++Limit) {
+    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+
+    // If the register class is unknown, it could be an unknown
+    // register class that needs to be an SGPR, e.g. an inline asm
+    // constraint
+    if (!RC || SIRI->isSGPRClass(RC))
+      return false;
+
+    if (RC != &AMDGPU::VS_32RegClass) {
+      AllUsesAcceptSReg = false;
+      SDNode * User = *U;
+      if (User->isMachineOpcode()) {
+        unsigned Opc = User->getMachineOpcode();
+        MCInstrDesc Desc = SII->get(Opc);
+        if (Desc.isCommutable()) {
+          unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
+          unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
+          if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
+            unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
+            const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
+            if (CommutedRC == &AMDGPU::VS_32RegClass)
+              AllUsesAcceptSReg = true;
+          }
+        }
+      }
+      // If "AllUsesAcceptSReg == false" so far we haven't suceeded
+      // commuting current user. This means have at least one use
+      // that strictly require VGPR. Thus, we will not attempt to commute
+      // other user instructions.
+      if (!AllUsesAcceptSReg)
+        break;
+    }
+  }
+  return !AllUsesAcceptSReg && (Limit < 10);
+}
+
+bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
+  auto Ld = cast<LoadSDNode>(N);
+
+  return Ld->getAlignment() >= 4 &&
+        (
+          (
+            (
+              Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS       ||
+              Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
+            )
+            &&
+            !N->isDivergent()
+          )
+          ||
+          (
+            Subtarget->getScalarizeGlobalBehavior() &&
+            Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+            !Ld->isVolatile() &&
+            !N->isDivergent() &&
+            static_cast<const SITargetLowering *>(
+              getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
+          )
+        );
+}
+
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   const AMDGPUTargetLowering& Lowering =
     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
@@ -2152,10 +2158,10 @@ bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
   if (!N->readMem())
     return false;
   if (CbId == -1)
-    return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
-           N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+    return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+           N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
 
-  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
+  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
 }
 
 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 21e44e9589d3..6951c915b177 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -128,10 +128,8 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
 }
 
 unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
-  KnownBits Known;
   EVT VT = Op.getValueType();
-  DAG.computeKnownBits(Op, Known);
-
+  KnownBits Known = DAG.computeKnownBits(Op);
   return VT.getSizeInBits() - Known.countMinLeadingZeros();
 }
 
@@ -146,7 +144,6 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
 AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
                                            const AMDGPUSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
-  AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
   // Lower floating point store/load to integer store/load to reduce the number
   // of patterns in tablegen.
   setOperationAction(ISD::LOAD, MVT::f32, Promote);
@@ -318,6 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::FLOG, MVT::f32, Custom);
   setOperationAction(ISD::FLOG10, MVT::f32, Custom);
+  setOperationAction(ISD::FEXP, MVT::f32, Custom);
 
 
   setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
@@ -450,6 +448,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCOS, VT, Expand);
     setOperationAction(ISD::FDIV, VT, Expand);
     setOperationAction(ISD::FEXP2, VT, Expand);
+    setOperationAction(ISD::FEXP, VT, Expand);
     setOperationAction(ISD::FLOG2, VT, Expand);
     setOperationAction(ISD::FREM, VT, Expand);
     setOperationAction(ISD::FLOG, VT, Expand);
@@ -470,6 +469,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
     setOperationAction(ISD::SETCC, VT, Expand);
+    setOperationAction(ISD::FCANONICALIZE, VT, Expand);
   }
 
   // This causes using an unrolled select operation rather than expansion with
@@ -550,6 +550,8 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
   case ISD::FMAD:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
   case ISD::FSIN:
   case ISD::FTRUNC:
   case ISD::FRINT:
@@ -562,6 +564,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
   case AMDGPUISD::FMUL_LEGACY:
   case AMDGPUISD::FMIN_LEGACY:
   case AMDGPUISD::FMAX_LEGACY:
+  case AMDGPUISD::FMED3:
     return true;
   default:
     return false;
@@ -650,8 +653,11 @@ bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
 }
 
 bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
-                                                 ISD::LoadExtType,
+                                                 ISD::LoadExtType ExtTy,
                                                  EVT NewVT) const {
+  // TODO: This may be worth removing. Check regression tests for diffs.
+  if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
+    return false;
 
   unsigned NewSize = NewVT.getStoreSizeInBits();
 
@@ -662,6 +668,18 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
   EVT OldVT = N->getValueType(0);
   unsigned OldSize = OldVT.getStoreSizeInBits();
 
+  MemSDNode *MN = cast<MemSDNode>(N);
+  unsigned AS = MN->getAddressSpace();
+  // Do not shrink an aligned scalar load to sub-dword.
+  // Scalar engine cannot do sub-dword loads.
+  if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
+      (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+       AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+       (isa<LoadSDNode>(N) &&
+        AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
+      AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
+    return false;
+
   // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
   // extloads, so doing one requires using a buffer_load. In cases where we
   // still couldn't use a scalar load, using the wider load shouldn't really
@@ -722,7 +740,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
     {
       const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
       if (L->getMemOperand()->getAddrSpace()
-      == AMDGPUASI.CONSTANT_ADDRESS_32BIT)
+      == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
         return true;
       return false;
     }
@@ -1140,6 +1158,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
     return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F);
   case ISD::FLOG10:
     return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F);
+  case ISD::FEXP:
+    return lowerFEXP(Op, DAG);
   case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
   case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
@@ -1188,8 +1208,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = G->getGlobal();
 
-  if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
-      G->getAddressSpace() == AMDGPUASI.REGION_ADDRESS) {
+  if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+      G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
     if (!MFI->isEntryFunction()) {
       const Function &Fn = DAG.getMachineFunction().getFunction();
       DiagnosticInfoUnsupported BadLDSDecl(
@@ -2213,6 +2233,34 @@ SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
   return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
 }
 
+// Return M_LOG2E of appropriate type
+static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) {
+  switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+  case MVT::f32:
+    return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT);
+  case MVT::f16:
+    return DAG.getConstantFP(
+      APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"),
+      SL, VT);
+  case MVT::f64:
+    return DAG.getConstantFP(
+      APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT);
+  default:
+    llvm_unreachable("unsupported fp type");
+  }
+}
+
+// exp2(M_LOG2E_F * f);
+SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDLoc SL(Op);
+  SDValue Src = Op.getOperand(0);
+
+  const SDValue K = getLog2EVal(DAG, SL, VT);
+  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
+  return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
+}
+
 static bool isCtlzOpc(unsigned Opc) {
   return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
 }
@@ -2669,21 +2717,33 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
     AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
 }
 
-static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
-                        TargetLowering::DAGCombinerInfo &DCI) {
-
+static SDValue simplifyI24(SDNode *Node24,
+                           TargetLowering::DAGCombinerInfo &DCI) {
   SelectionDAG &DAG = DCI.DAG;
-  SDValue Op = Node24->getOperand(OpIdx);
+  SDValue LHS = Node24->getOperand(0);
+  SDValue RHS = Node24->getOperand(1);
+
+  APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
+
+  // First try to simplify using GetDemandedBits which allows the operands to
+  // have other uses, but will only perform simplifications that involve
+  // bypassing some nodes for this user.
+  SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
+  SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
+  if (DemandedLHS || DemandedRHS)
+    return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(),
+                       DemandedLHS ? DemandedLHS : LHS,
+                       DemandedRHS ? DemandedRHS : RHS);
+
+  // Now try SimplifyDemandedBits which can simplify the nodes used by our
+  // operands if this node is the only user.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = Op.getValueType();
+  if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
+    return SDValue(Node24, 0);
+  if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
+    return SDValue(Node24, 0);
 
-  APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
-  APInt KnownZero, KnownOne;
-  TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
-  if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO))
-    return true;
-
-  return false;
+  return SDValue();
 }
 
 template <typename IntTy>
@@ -2920,8 +2980,7 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
     // shl (ext x) => zext (shl x), if shift does not overflow int
     if (VT != MVT::i64)
       break;
-    KnownBits Known;
-    DAG.computeKnownBits(X, Known);
+    KnownBits Known = DAG.computeKnownBits(X);
     unsigned LZ = Known.countMinLeadingZeros();
     if (LZ < RHSVal)
       break;
@@ -3080,8 +3139,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
          Src.getOpcode() == ISD::SRA ||
          Src.getOpcode() == ISD::SHL)) {
       SDValue Amt = Src.getOperand(1);
-      KnownBits Known;
-      DAG.computeKnownBits(Amt, Known);
+      KnownBits Known = DAG.computeKnownBits(Amt);
       unsigned Size = VT.getScalarSizeInBits();
       if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
           (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
@@ -3233,8 +3291,8 @@ SDValue AMDGPUTargetLowering::performMulLoHi24Combine(
   SelectionDAG &DAG = DCI.DAG;
 
   // Simplify demanded bits before splitting into multiple users.
-  if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI))
-    return SDValue();
+  if (SDValue V = simplifyI24(N, DCI))
+    return V;
 
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -3449,9 +3507,27 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
   return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
 }
 
-static bool isConstantFPZero(SDValue N) {
-  if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
-    return C->isZero() && !C->isNegative();
+static bool isInv2Pi(const APFloat &APF) {
+  static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+  static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+  static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
+
+  return APF.bitwiseIsEqual(KF16) ||
+         APF.bitwiseIsEqual(KF32) ||
+         APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
+  if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
+    if (C->isZero() && !C->isNegative())
+      return true;
+
+    if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
+      return true;
+  }
+
   return false;
 }
 
@@ -3461,6 +3537,10 @@ static unsigned inverseMinMax(unsigned Opc) {
     return ISD::FMINNUM;
   case ISD::FMINNUM:
     return ISD::FMAXNUM;
+  case ISD::FMAXNUM_IEEE:
+    return ISD::FMINNUM_IEEE;
+  case ISD::FMINNUM_IEEE:
+    return ISD::FMAXNUM_IEEE;
   case AMDGPUISD::FMAX_LEGACY:
     return AMDGPUISD::FMIN_LEGACY;
   case AMDGPUISD::FMIN_LEGACY:
@@ -3566,6 +3646,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   }
   case ISD::FMAXNUM:
   case ISD::FMINNUM:
+  case ISD::FMAXNUM_IEEE:
+  case ISD::FMINNUM_IEEE:
   case AMDGPUISD::FMAX_LEGACY:
   case AMDGPUISD::FMIN_LEGACY: {
     // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
@@ -3577,9 +3659,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
     SDValue RHS = N0.getOperand(1);
 
     // 0 doesn't have a negated inline immediate.
-    // TODO: Shouldn't fold 1/2pi either, and should be generalized to other
-    // operations.
-    if (isConstantFPZero(RHS))
+    // TODO: This constant check should be generalized to other operations.
+    if (isConstantCostlierToNegate(RHS))
       return SDValue();
 
     SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
@@ -3591,6 +3672,16 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
   }
+  case AMDGPUISD::FMED3: {
+    SDValue Ops[3];
+    for (unsigned I = 0; I < 3; ++I)
+      Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
+
+    SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
+    if (!N0.hasOneUse())
+      DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
+    return Res;
+  }
   case ISD::FP_EXTEND:
   case ISD::FTRUNC:
   case ISD::FRINT:
@@ -3737,9 +3828,10 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
       if (Src.getValueType() == MVT::i64) {
         SDLoc SL(N);
         uint64_t CVal = C->getZExtValue();
-        return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT,
-                           DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
-                           DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+        SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                 DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
+                                 DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
+        return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
       }
     }
 
@@ -3786,9 +3878,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
   case AMDGPUISD::MUL_U24:
   case AMDGPUISD::MULHI_I24:
   case AMDGPUISD::MULHI_U24: {
-    // If the first call to simplify is successfull, then N may end up being
-    // deleted, so we shouldn't call simplifyI24 again.
-    simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI);
+    if (SDValue V = simplifyI24(N, DCI))
+      return V;
     return SDValue();
   }
   case AMDGPUISD::MUL_LOHI_I24:
@@ -3943,13 +4034,12 @@ SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
 SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
                                                    const SDLoc &SL,
                                                    SDValue Chain,
-                                                   SDValue StackPtr,
                                                    SDValue ArgVal,
                                                    int64_t Offset) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
 
-  SDValue Ptr = DAG.getObjectPtrOffset(SL, StackPtr, Offset);
+  SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
   SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4,
                                MachineMemOperand::MODereferenceable);
   return Store;
@@ -4111,6 +4201,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BUFFER_LOAD)
   NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
   NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
+  NODE_NAME_CASE(SBUFFER_LOAD)
   NODE_NAME_CASE(BUFFER_STORE)
   NODE_NAME_CASE(BUFFER_STORE_FORMAT)
   NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
@@ -4210,33 +4301,42 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
   }
   case AMDGPUISD::MUL_U24:
   case AMDGPUISD::MUL_I24: {
-    KnownBits LHSKnown, RHSKnown;
-    DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
-    DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
-
+    KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+    KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
     unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
                       RHSKnown.countMinTrailingZeros();
     Known.Zero.setLowBits(std::min(TrailZ, 32u));
 
-    unsigned LHSValBits = 32 - std::max(LHSKnown.countMinSignBits(), 8u);
-    unsigned RHSValBits = 32 - std::max(RHSKnown.countMinSignBits(), 8u);
-    unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
-    if (MaxValBits >= 32)
-      break;
+    // Truncate to 24 bits.
+    LHSKnown = LHSKnown.trunc(24);
+    RHSKnown = RHSKnown.trunc(24);
+
     bool Negative = false;
     if (Opc == AMDGPUISD::MUL_I24) {
-      bool LHSNegative = !!(LHSKnown.One  & (1 << 23));
-      bool LHSPositive = !!(LHSKnown.Zero & (1 << 23));
-      bool RHSNegative = !!(RHSKnown.One  & (1 << 23));
-      bool RHSPositive = !!(RHSKnown.Zero & (1 << 23));
+      unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
+      unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
+      unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+      if (MaxValBits >= 32)
+        break;
+      bool LHSNegative = LHSKnown.isNegative();
+      bool LHSPositive = LHSKnown.isNonNegative();
+      bool RHSNegative = RHSKnown.isNegative();
+      bool RHSPositive = RHSKnown.isNonNegative();
       if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive))
         break;
       Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative);
-    }
-    if (Negative)
-      Known.One.setHighBits(32 - MaxValBits);
-    else
+      if (Negative)
+        Known.One.setHighBits(32 - MaxValBits);
+      else
+        Known.Zero.setHighBits(32 - MaxValBits);
+    } else {
+      unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
+      unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
+      unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+      if (MaxValBits >= 32)
+        break;
       Known.Zero.setHighBits(32 - MaxValBits);
+    }
     break;
   }
   case AMDGPUISD::PERM: {
@@ -4244,9 +4344,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
     if (!CMask)
       return;
 
-    KnownBits LHSKnown, RHSKnown;
-    DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1);
-    DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1);
+    KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+    KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
     unsigned Sel = CMask->getZExtValue();
 
     for (unsigned I = 0; I < 32; I += 8) {
@@ -4320,3 +4419,107 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
     return 1;
   }
 }
+
+bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+                                                        const SelectionDAG &DAG,
+                                                        bool SNaN,
+                                                        unsigned Depth) const {
+  unsigned Opcode = Op.getOpcode();
+  switch (Opcode) {
+  case AMDGPUISD::FMIN_LEGACY:
+  case AMDGPUISD::FMAX_LEGACY: {
+    if (SNaN)
+      return true;
+
+    // TODO: Can check no nans on one of the operands for each one, but which
+    // one?
+    return false;
+  }
+  case AMDGPUISD::FMUL_LEGACY:
+  case AMDGPUISD::CVT_PKRTZ_F16_F32: {
+    if (SNaN)
+      return true;
+    return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+           DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+  }
+  case AMDGPUISD::FMED3:
+  case AMDGPUISD::FMIN3:
+  case AMDGPUISD::FMAX3:
+  case AMDGPUISD::FMAD_FTZ: {
+    if (SNaN)
+      return true;
+    return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+           DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+           DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+  }
+  case AMDGPUISD::CVT_F32_UBYTE0:
+  case AMDGPUISD::CVT_F32_UBYTE1:
+  case AMDGPUISD::CVT_F32_UBYTE2:
+  case AMDGPUISD::CVT_F32_UBYTE3:
+    return true;
+
+  case AMDGPUISD::RCP:
+  case AMDGPUISD::RSQ:
+  case AMDGPUISD::RCP_LEGACY:
+  case AMDGPUISD::RSQ_LEGACY:
+  case AMDGPUISD::RSQ_CLAMP: {
+    if (SNaN)
+      return true;
+
+    // TODO: Need is known positive check.
+    return false;
+  }
+  case AMDGPUISD::LDEXP:
+  case AMDGPUISD::FRACT: {
+    if (SNaN)
+      return true;
+    return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+  }
+  case AMDGPUISD::DIV_SCALE:
+  case AMDGPUISD::DIV_FMAS:
+  case AMDGPUISD::DIV_FIXUP:
+  case AMDGPUISD::TRIG_PREOP:
+    // TODO: Refine on operands.
+    return SNaN;
+  case AMDGPUISD::SIN_HW:
+  case AMDGPUISD::COS_HW: {
+    // TODO: Need check for infinity
+    return SNaN;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntrinsicID
+      = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    // TODO: Handle more intrinsics
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_cubeid:
+      return true;
+
+    case Intrinsic::amdgcn_frexp_mant: {
+      if (SNaN)
+        return true;
+      return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
+    }
+    case Intrinsic::amdgcn_cvt_pkrtz: {
+      if (SNaN)
+        return true;
+      return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+             DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
+    }
+    case Intrinsic::amdgcn_fdot2:
+      // TODO: Refine on operand
+      return SNaN;
+    default:
+      return false;
+    }
+  }
+  default:
+    return false;
+  }
+}
+
+TargetLowering::AtomicExpansionKind
+AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+  if (RMW->getOperation() == AtomicRMWInst::Nand)
+    return AtomicExpansionKind::CmpXChg;
+  return AtomicExpansionKind::None;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a4c3b413e103..0d22cb2e3e20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -41,8 +41,6 @@ public:
   static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
 
 protected:
-  AMDGPUAS AMDGPUASI;
-
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   /// Split a vector store into multiple scalar stores.
@@ -58,8 +56,9 @@ protected:
   SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFLOG(SDValue Op, SelectionDAG &Dag,
+  SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
                     double Log2BaseInverted) const;
+  SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
 
@@ -95,6 +94,8 @@ protected:
   SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
                              SDValue RHS, DAGCombinerInfo &DCI) const;
   SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  bool isConstantCostlierToNegate(SDValue N) const;
   SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -246,6 +247,11 @@ public:
                                            const SelectionDAG &DAG,
                                            unsigned Depth = 0) const override;
 
+  bool isKnownNeverNaNForTargetNode(SDValue Op,
+                                    const SelectionDAG &DAG,
+                                    bool SNaN = false,
+                                    unsigned Depth = 0) const override;
+
   /// Helper function that adds Reg to the LiveIn list of the DAG's
   /// MachineFunction.
   ///
@@ -279,7 +285,6 @@ public:
   SDValue storeStackInputValue(SelectionDAG &DAG,
                                const SDLoc &SL,
                                SDValue Chain,
-                               SDValue StackPtr,
                                SDValue ArgVal,
                                int64_t Offset) const;
 
@@ -299,13 +304,11 @@ public:
   uint32_t getImplicitParameterOffset(const MachineFunction &MF,
                                       const ImplicitParameter Param) const;
 
-  AMDGPUAS getAMDGPUAS() const {
-    return AMDGPUASI;
-  }
-
   MVT getFenceOperandTy(const DataLayout &DL) const override {
     return MVT::i32;
   }
+
+  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
 };
 
 namespace AMDGPUISD {
@@ -357,6 +360,7 @@ enum NodeType : unsigned {
   SIN_HW,
   FMAX_LEGACY,
   FMIN_LEGACY,
+
   FMAX3,
   SMAX3,
   UMAX3,
@@ -479,6 +483,7 @@ enum NodeType : unsigned {
   BUFFER_LOAD,
   BUFFER_LOAD_FORMAT,
   BUFFER_LOAD_FORMAT_D16,
+  SBUFFER_LOAD,
   BUFFER_STORE,
   BUFFER_STORE_FORMAT,
   BUFFER_STORE_FORMAT_D16,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 35dd9eb0a478..945c9acd379a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -44,7 +44,7 @@ ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
               cl::desc("Cost of alloca argument"));
 
 // If the amount of scratch memory to eliminate exceeds our ability to allocate
-// it into registers we gain nothing by agressively inlining functions for that
+// it into registers we gain nothing by aggressively inlining functions for that
 // heuristic.
 static cl::opt<unsigned>
 ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
@@ -118,8 +118,6 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
   if (!Callee)
     return (unsigned)Thres;
 
-  const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*Caller->getParent());
-
   // If we have a pointer to private array passed into a function
   // it will not be optimized out, leaving scratch usage.
   // Increase the inline threshold to allow inliniting in this case.
@@ -128,7 +126,7 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
   for (Value *PtrArg : CS.args()) {
     Type *Ty = PtrArg->getType();
     if (!Ty->isPointerTy() ||
-        Ty->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
+        Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
       continue;
     PtrArg = GetUnderlyingObject(PtrArg, DL);
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
@@ -174,18 +172,23 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
   Function *Caller = CS.getCaller();
   TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
 
-  if (!Callee || Callee->isDeclaration() || CS.isNoInline() ||
-      !TTI.areInlineCompatible(Caller, Callee))
-    return llvm::InlineCost::getNever();
+  if (!Callee || Callee->isDeclaration())
+    return llvm::InlineCost::getNever("undefined callee");
+
+  if (CS.isNoInline())
+    return llvm::InlineCost::getNever("noinline");
+
+  if (!TTI.areInlineCompatible(Caller, Callee))
+    return llvm::InlineCost::getNever("incompatible");
 
   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
     if (isInlineViable(*Callee))
-      return llvm::InlineCost::getAlways();
-    return llvm::InlineCost::getNever();
+      return llvm::InlineCost::getAlways("alwaysinline viable");
+    return llvm::InlineCost::getNever("alwaysinline unviable");
   }
 
   if (isWrapperOnlyCall(CS))
-    return llvm::InlineCost::getAlways();
+    return llvm::InlineCost::getAlways("wrapper-only call");
 
   InlineParams LocalParams = Params;
   LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 7442a59e594f..82644be26563 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -62,18 +62,10 @@ def AMDGPULoopOp : SDTypeProfile<0, 2,
   [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
 >;
 
-def AMDGPUBreakOp : SDTypeProfile<1, 1,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i64>]
->;
-
 def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
   [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
 >;
 
-def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
->;
-
 def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
   [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
 >;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 219d430fbb39..8eb49d49b2e0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -55,7 +55,6 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
 #define GET_GLOBALISEL_TEMPORARIES_INIT
 #include "AMDGPUGenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_INIT
-      ,AMDGPUASI(STI.getAMDGPUAS())
 {
 }
 
@@ -506,8 +505,8 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
   if (!I.hasOneMemOperand())
     return false;
 
-  if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
-      (*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT)
+  if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+      (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
     return false;
 
   if (!isInstrUniform(I))
@@ -631,6 +630,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
     return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_ADD:
     return selectG_ADD(I);
+  case TargetOpcode::G_INTTOPTR:
   case TargetOpcode::G_BITCAST:
     return selectCOPY(I);
   case TargetOpcode::G_CONSTANT:
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 68b40b20aca2..449431adc561 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -105,9 +105,6 @@ private:
 #define GET_GLOBALISEL_TEMPORARIES_DECL
 #include "AMDGPUGenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_DECL
-
-protected:
-  AMDGPUAS AMDGPUASI;
 };
 
 } // End llvm namespace.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index c9c932ef2f5f..eb8f2002ff2d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -135,6 +135,12 @@ def brtarget   : Operand<OtherVT>;
 // Misc. PatFrags
 //===----------------------------------------------------------------------===//
 
+class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
+  (ops node:$src0),
+  (op $src0),
+  [{ return N->hasOneUse(); }]
+>;
+
 class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
   (ops node:$src0, node:$src1),
   (op $src0, $src1),
@@ -152,13 +158,21 @@ def smax_oneuse : HasOneUseBinOp<smax>;
 def smin_oneuse : HasOneUseBinOp<smin>;
 def umax_oneuse : HasOneUseBinOp<umax>;
 def umin_oneuse : HasOneUseBinOp<umin>;
+
 def fminnum_oneuse : HasOneUseBinOp<fminnum>;
 def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+
+def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
+def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
+
+
 def and_oneuse : HasOneUseBinOp<and>;
 def or_oneuse : HasOneUseBinOp<or>;
 def xor_oneuse : HasOneUseBinOp<xor>;
 } // Properties = [SDNPCommutative, SDNPAssociative]
 
+def not_oneuse : HasOneUseUnaryOp<not>;
+
 def add_oneuse : HasOneUseBinOp<add>;
 def sub_oneuse : HasOneUseBinOp<sub>;
 
@@ -167,6 +181,9 @@ def shl_oneuse : HasOneUseBinOp<shl>;
 
 def select_oneuse : HasOneUseTernaryOp<select>;
 
+def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
+def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
+
 def srl_16 : PatFrag<
   (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
 >;
@@ -328,37 +345,37 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
 >;
 
 class PrivateAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
 }]>;
 
 class ConstantAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
 }]>;
 
 class LocalAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
 class GlobalAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
 class GlobalLoadAddress : CodePatPred<[{
   auto AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
+  return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS;
 }]>;
 
 class FlatLoadAddress : CodePatPred<[{
   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUASI.FLAT_ADDRESS ||
-         AS == AMDGPUASI.GLOBAL_ADDRESS ||
-         AS == AMDGPUASI.CONSTANT_ADDRESS;
+  return AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::GLOBAL_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
 }]>;
 
 class FlatStoreAddress : CodePatPred<[{
   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUASI.FLAT_ADDRESS ||
-         AS == AMDGPUASI.GLOBAL_ADDRESS;
+  return AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
@@ -480,7 +497,7 @@ def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
 class local_binary_atomic_op<SDNode atomic_op> :
   PatFrag<(ops node:$ptr, node:$value),
     (atomic_op node:$ptr, node:$value), [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
@@ -497,14 +514,14 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
 
 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
 class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
     (ops node:$ptr, node:$cmp, node:$swap),
     (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
       AtomicSDNode *AN = cast<AtomicSDNode>(N);
-      return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
+      return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
 def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
@@ -513,17 +530,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
   def "" : PatFrag<
         (ops node:$ptr, node:$value),
         (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
 
   def _noret : PatFrag<
         (ops node:$ptr, node:$value),
         (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
 
   def _ret : PatFrag<
         (ops node:$ptr, node:$value),
         (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 }
 
 defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@@ -550,12 +567,12 @@ def atomic_cmp_swap_global : PatFrag<
 def atomic_cmp_swap_global_noret : PatFrag<
   (ops node:$ptr, node:$cmp, node:$value),
   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
-  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
 
 def atomic_cmp_swap_global_ret : PatFrag<
   (ops node:$ptr, node:$cmp, node:$value),
   (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
-  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 
 //===----------------------------------------------------------------------===//
 // Misc Pattern Fragments
@@ -787,18 +804,30 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
   (BIT_ALIGN $src0, $src0, $src1)
 >;
 
-// This matches 16 permutations of
-// max(min(x, y), min(max(x, y), z))
-class IntMed3Pat<Instruction med3Inst,
+multiclass IntMed3Pat<Instruction med3Inst,
+                 SDPatternOperator min,
                  SDPatternOperator max,
-                 SDPatternOperator max_oneuse,
                  SDPatternOperator min_oneuse,
-                 ValueType vt = i32> : AMDGPUPat<
+                 SDPatternOperator max_oneuse,
+                 ValueType vt = i32> {
+
+  // This matches 16 permutations of 
+  // min(max(a, b), max(min(a, b), c))
+  def : AMDGPUPat <
+  (min (max_oneuse vt:$src0, vt:$src1),
+       (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+  (med3Inst vt:$src0, vt:$src1, vt:$src2)
+>;
+
+  // This matches 16 permutations of 
+  // max(min(x, y), min(max(x, y), z))
+  def : AMDGPUPat <
   (max (min_oneuse vt:$src0, vt:$src1),
        (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
   (med3Inst $src0, $src1, $src2)
 >;
-
+}
+  
 // Special conversion patterns
 
 def cvt_rpi_i32_f32 : PatFrag <
@@ -813,6 +842,7 @@ def cvt_flr_i32_f32 : PatFrag <
   [{ (void)N; return TM.Options.NoNaNsFPMath; }]
 >;
 
+let AddedComplexity = 2 in {
 class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
   (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
   !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
@@ -824,6 +854,7 @@ class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
   !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
                 (Inst $src0, $src1, $src2))
 >;
+} // AddedComplexity.
 
 class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
   (fdiv FP_ONE, vt:$src),
@@ -834,3 +865,25 @@ class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
   (AMDGPUrcp (fsqrt vt:$src)),
   (RsqInst $src)
 >;
+
+// Instructions which select to the same v_min_f*
+def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
+  [(fminnum_ieee node:$src0, node:$src1),
+   (fminnum node:$src0, node:$src1)]
+>;
+
+// Instructions which select to the same v_max_f*
+def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
+  [(fmaxnum_ieee node:$src0, node:$src1),
+   (fmaxnum node:$src0, node:$src1)]
+>;
+
+def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+  [(fminnum_ieee_oneuse node:$src0, node:$src1),
+   (fminnum_oneuse node:$src0, node:$src1)]
+>;
+
+def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
+  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
+   (fmaxnum_oneuse node:$src0, node:$src1)]
+>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
index 896e2055cf62..02108ca3ddd7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
@@ -40,7 +40,7 @@ StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
   if (IntrID < Intrinsic::num_intrinsics)
     return StringRef();
 
-  assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
+  assert(IntrID < SIIntrinsic::num_AMDGPU_intrinsics &&
          "Invalid intrinsic ID");
 
   return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
@@ -91,7 +91,7 @@ Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
     = cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
 
   AttributeList AS =
-      getAttributes(M->getContext(), static_cast<AMDGPUIntrinsic::ID>(IntrID));
+      getAttributes(M->getContext(), static_cast<SIIntrinsic::ID>(IntrID));
   F->setAttributes(AS);
   return F;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
index ef42f9a319af..a1a094dded23 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
@@ -20,7 +20,7 @@
 namespace llvm {
 class TargetMachine;
 
-namespace AMDGPUIntrinsic {
+namespace SIIntrinsic {
 enum ID {
   last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
 #define GET_INTRINSIC_ENUM_VALUES
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 87b072c9ea20..ef85c1040545 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -32,20 +32,52 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
     return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
   };
 
-  auto AMDGPUAS = ST.getAMDGPUAS();
-
   const LLT S1 = LLT::scalar(1);
-  const LLT V2S16 = LLT::vector(2, 16);
-
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
   const LLT S512 = LLT::scalar(512);
 
+  const LLT V2S16 = LLT::vector(2, 16);
+  const LLT V4S16 = LLT::vector(4, 16);
+  const LLT V8S16 = LLT::vector(8, 16);
+
+  const LLT V2S32 = LLT::vector(2, 32);
+  const LLT V3S32 = LLT::vector(3, 32);
+  const LLT V4S32 = LLT::vector(4, 32);
+  const LLT V5S32 = LLT::vector(5, 32);
+  const LLT V6S32 = LLT::vector(6, 32);
+  const LLT V7S32 = LLT::vector(7, 32);
+  const LLT V8S32 = LLT::vector(8, 32);
+  const LLT V9S32 = LLT::vector(9, 32);
+  const LLT V10S32 = LLT::vector(10, 32);
+  const LLT V11S32 = LLT::vector(11, 32);
+  const LLT V12S32 = LLT::vector(12, 32);
+  const LLT V13S32 = LLT::vector(13, 32);
+  const LLT V14S32 = LLT::vector(14, 32);
+  const LLT V15S32 = LLT::vector(15, 32);
+  const LLT V16S32 = LLT::vector(16, 32);
+
+  const LLT V2S64 = LLT::vector(2, 64);
+  const LLT V3S64 = LLT::vector(3, 64);
+  const LLT V4S64 = LLT::vector(4, 64);
+  const LLT V5S64 = LLT::vector(5, 64);
+  const LLT V6S64 = LLT::vector(6, 64);
+  const LLT V7S64 = LLT::vector(7, 64);
+  const LLT V8S64 = LLT::vector(8, 64);
+
+  std::initializer_list<LLT> AllS32Vectors =
+    {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
+     V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
+  std::initializer_list<LLT> AllS64Vectors =
+    {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
+
   const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
   const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
   const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
-  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS.FLAT_ADDRESS);
-  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS.PRIVATE_ADDRESS);
+  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
+  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
+
+  const LLT CodePtr = FlatPtr;
 
   const LLT AddrSpaces[] = {
     GlobalPtr,
@@ -55,13 +87,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
     PrivatePtr
   };
 
+  setAction({G_BRCOND, S1}, Legal);
+
   setAction({G_ADD, S32}, Legal);
   setAction({G_ASHR, S32}, Legal);
   setAction({G_SUB, S32}, Legal);
   setAction({G_MUL, S32}, Legal);
-  setAction({G_AND, S32}, Legal);
-  setAction({G_OR, S32}, Legal);
-  setAction({G_XOR, S32}, Legal);
+
+  // FIXME: 64-bit ones only legal for scalar
+  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+    .legalFor({S32, S1, S64, V2S32});
+
+  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
+                               G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
+    .legalFor({{S32, S1}});
 
   setAction({G_BITCAST, V2S16}, Legal);
   setAction({G_BITCAST, 1, S32}, Legal);
@@ -90,35 +129,80 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
   // between these two scenarios.
   setAction({G_CONSTANT, S1}, Legal);
 
-  setAction({G_FADD, S32}, Legal);
+  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
+
+  getActionDefinitionsBuilder(
+    { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
+    .legalFor({S32, S64});
+
+  getActionDefinitionsBuilder(G_FPTRUNC)
+    .legalFor({{S32, S64}});
+
+  // Use actual fsub instruction
+  setAction({G_FSUB, S32}, Legal);
+
+  // Must use fadd + fneg
+  setAction({G_FSUB, S64}, Lower);
 
   setAction({G_FCMP, S1}, Legal);
   setAction({G_FCMP, 1, S32}, Legal);
   setAction({G_FCMP, 1, S64}, Legal);
 
-  setAction({G_FMUL, S32}, Legal);
-
   setAction({G_ZEXT, S64}, Legal);
   setAction({G_ZEXT, 1, S32}, Legal);
 
+  setAction({G_SEXT, S64}, Legal);
+  setAction({G_SEXT, 1, S32}, Legal);
+
+  setAction({G_ANYEXT, S64}, Legal);
+  setAction({G_ANYEXT, 1, S32}, Legal);
+
   setAction({G_FPTOSI, S32}, Legal);
   setAction({G_FPTOSI, 1, S32}, Legal);
 
   setAction({G_SITOFP, S32}, Legal);
   setAction({G_SITOFP, 1, S32}, Legal);
 
+  setAction({G_UITOFP, S32}, Legal);
+  setAction({G_UITOFP, 1, S32}, Legal);
+
   setAction({G_FPTOUI, S32}, Legal);
   setAction({G_FPTOUI, 1, S32}, Legal);
 
+  setAction({G_FPOW, S32}, Legal);
+  setAction({G_FEXP2, S32}, Legal);
+  setAction({G_FLOG2, S32}, Legal);
+
+  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
+    .legalFor({S32, S64});
+
   for (LLT PtrTy : AddrSpaces) {
     LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits());
     setAction({G_GEP, PtrTy}, Legal);
     setAction({G_GEP, 1, IdxTy}, Legal);
   }
 
+  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
+
   setAction({G_ICMP, S1}, Legal);
   setAction({G_ICMP, 1, S32}, Legal);
 
+  setAction({G_CTLZ, S32}, Legal);
+  setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
+  setAction({G_CTTZ, S32}, Legal);
+  setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
+  setAction({G_BSWAP, S32}, Legal);
+  setAction({G_CTPOP, S32}, Legal);
+
+  getActionDefinitionsBuilder(G_INTTOPTR)
+    .legalIf([](const LegalityQuery &Query) {
+      return true;
+    });
+
+  getActionDefinitionsBuilder(G_PTRTOINT)
+    .legalIf([](const LegalityQuery &Query) {
+      return true;
+    });
 
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
     .legalIf([=, &ST](const LegalityQuery &Query) {
@@ -145,6 +229,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
       });
 
 
+  auto &Atomics = getActionDefinitionsBuilder(
+    {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
+     G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
+     G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
+     G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
+    .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
+               {S64, GlobalPtr}, {S64, LocalPtr}});
+  if (ST.hasFlatAddressSpace()) {
+    Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
+  }
 
   setAction({G_SELECT, S32}, Legal);
   setAction({G_SELECT, 1, S1}, Legal);
@@ -180,6 +274,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
                (Ty1.getSizeInBits() % 32 == 0);
       });
 
+  getActionDefinitionsBuilder(G_BUILD_VECTOR)
+    .legalForCartesianProduct(AllS32Vectors, {S32})
+    .legalForCartesianProduct(AllS64Vectors, {S64})
+    .clampNumElements(0, V16S32, V16S32)
+    .clampNumElements(0, V2S64, V8S64)
+    .minScalarSameAs(1, 0);
+
+  // TODO: Support any combination of v2s32
+  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+    .legalFor({{V4S32, V2S32},
+               {V8S32, V2S32},
+               {V8S32, V4S32},
+               {V4S64, V2S64},
+               {V4S16, V2S16},
+               {V8S16, V2S16},
+               {V8S16, V4S16}});
+
   // Merge/Unmerge
   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 7a7ed7a4f065..14e880042691 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1333,8 +1333,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   // for OpenCL 2.0 we have only generic implementation of sincos
   // function.
   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
-  const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
-  nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
+  nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
   Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
   if (!Fsincos) return false;
 
@@ -1347,7 +1346,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   // The allocaInst allocates the memory in private address space. This need
   // to be bitcasted to point to the address space of cos pointer type.
   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
-  if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
+  if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
     P = B.CreateAddrSpaceCast(Alloc, PTy);
   CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index f37795e961e8..4fc3fe0f105b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -995,8 +995,10 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
   } else {
     AttributeList Attr;
     LLVMContext &Ctx = M->getContext();
-    Attr.addAttribute(Ctx, AttributeList::FunctionIndex, Attribute::ReadOnly);
-    Attr.addAttribute(Ctx, AttributeList::FunctionIndex, Attribute::NoUnwind);
+    Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
+                             Attribute::ReadOnly);
+    Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
+                             Attribute::NoUnwind);
     C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
   }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index c147830e12ed..743dc7a0d00b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -16,7 +16,6 @@
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -84,8 +83,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
     return false;
 
   CallInst *KernArgSegment =
-    Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, nullptr,
-                            F.getName() + ".kernarg.segment");
+      Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
+                              nullptr, F.getName() + ".kernarg.segment");
 
   KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
   KernArgSegment->addAttribute(AttributeList::ReturnIndex,
@@ -123,14 +122,17 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
 
     VectorType *VT = dyn_cast<VectorType>(ArgTy);
     bool IsV3 = VT && VT->getNumElements() == 3;
+    bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
+
     VectorType *V4Ty = nullptr;
 
     int64_t AlignDownOffset = alignDown(EltOffset, 4);
     int64_t OffsetDiff = EltOffset - AlignDownOffset;
-    unsigned AdjustedAlign = MinAlign(KernArgBaseAlign, AlignDownOffset);
+    unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
+                                      KernArgBaseAlign);
 
     Value *ArgPtr;
-    if (Size < 32 && !ArgTy->isAggregateType()) { // FIXME: Handle aggregate types
+    if (DoShiftOpt) { // FIXME: Handle aggregate types
       // Since we don't have sub-dword scalar loads, avoid doing an extload by
       // loading earlier than the argument address, and extracting the relevant
       // bits.
@@ -148,7 +150,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
     } else {
       ArgPtr = Builder.CreateConstInBoundsGEP1_64(
         KernArgSegment,
-        AlignDownOffset,
+        EltOffset,
         Arg.getName() + ".kernarg.offset");
       ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
                                      ArgPtr->getName() + ".cast");
@@ -199,7 +201,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
 
     // TODO: Convert noalias arg to !noalias
 
-    if (Size < 32 && !ArgTy->isAggregateType()) {
+    if (DoShiftOpt) {
       Value *ExtractBits = OffsetDiff == 0 ?
         Load : Builder.CreateLShr(Load, OffsetDiff * 8);
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 1876dc3f7122..f6bdbf5e9be2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -301,6 +301,26 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCInstLowering.lower(MI, TmpInst);
     EmitToStreamer(*OutStreamer, TmpInst);
 
+#ifdef EXPENSIVE_CHECKS
+    // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot
+    // work correctly for the generic CPU).
+    //
+    // The isPseudo check really shouldn't be here, but unfortunately there are
+    // some negative lit tests that depend on being able to continue through
+    // here even when pseudo instructions haven't been lowered.
+    if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) {
+      SmallVector<MCFixup, 4> Fixups;
+      SmallVector<char, 16> CodeBytes;
+      raw_svector_ostream CodeStream(CodeBytes);
+
+      std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter(
+          *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext));
+      InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI);
+
+      assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
+    }
+#endif
+
     if (STI.dumpCode()) {
       // Disassemble instruction/operands to text.
       DisasmLines.resize(DisasmLines.size() + 1);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 995d9ae3907f..5e0b7d429022 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -42,9 +42,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
     if (!FirstMI)
       return true;
 
+    const MachineBasicBlock &MBB = *FirstMI->getParent();
+    const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+    const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
     const MachineOperand *Src2 = TII.getNamedOperand(SecondMI,
                                                      AMDGPU::OpName::src2);
-    return FirstMI->definesRegister(Src2->getReg());
+    return FirstMI->definesRegister(Src2->getReg(), TRI);
   }
   default:
     return false;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
index b50a2eb8e9e7..2feff14d34a1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -23,7 +23,8 @@ namespace ElfNote {
 
 const char SectionName[] = ".note";
 
-const char NoteName[] = "AMD";
+const char NoteNameV2[] = "AMD";
+const char NoteNameV3[] = "AMDGPU";
 
 // TODO: Remove this file once we drop code object v2.
 enum NoteType{
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 3cfdccc9fe51..e53a8fe7c074 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -99,8 +99,6 @@ private:
 
   const DataLayout *DL;
 
-  AMDGPUAS AS;
-
   const TargetLowering *TLI;
 
   void visit(const Function &F);
@@ -267,7 +265,6 @@ void AMDGPUPerfHint::runOnFunction(Function &F) {
 
   const Module &M = *F.getParent();
   DL = &M.getDataLayout();
-  AS = AMDGPU::getAMDGPUAS(M);
 
   visit(F);
   auto Loc = FIM.find(&F);
@@ -306,14 +303,14 @@ bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
   if (auto PT = dyn_cast<PointerType>(V->getType())) {
     unsigned As = PT->getAddressSpace();
     // Flat likely points to global too.
-    return As == AS.GLOBAL_ADDRESS || As == AS.FLAT_ADDRESS;
+    return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
   }
   return false;
 }
 
 bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
   if (auto PT = dyn_cast<PointerType>(V->getType()))
-    return PT->getAddressSpace() == AS.LOCAL_ADDRESS;
+    return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
   return false;
 }
 
@@ -346,7 +343,8 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
 bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
   if (auto PT = dyn_cast<PointerType>(V->getType())) {
     unsigned As = PT->getAddressSpace();
-    return As == AS.CONSTANT_ADDRESS || As == AS.CONSTANT_ADDRESS_32BIT;
+    return As == AMDGPUAS::CONSTANT_ADDRESS ||
+           As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
   }
   return false;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index d341fec6296f..5d087c099184 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -70,13 +70,17 @@ static cl::opt<bool> DisablePromoteAllocaToVector(
   cl::desc("Disable promote alloca to vector"),
   cl::init(false));
 
+static cl::opt<bool> DisablePromoteAllocaToLDS(
+  "disable-promote-alloca-to-lds",
+  cl::desc("Disable promote alloca to LDS"),
+  cl::init(false));
+
 // FIXME: This can create globals so should be a module pass.
 class AMDGPUPromoteAlloca : public FunctionPass {
 private:
   const TargetMachine *TM;
   Module *Mod = nullptr;
   const DataLayout *DL = nullptr;
-  AMDGPUAS AS;
 
   // FIXME: This should be per-kernel.
   uint32_t LocalMemLimit = 0;
@@ -156,8 +160,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
   if (!ST.isPromoteAllocaEnabled())
     return false;
 
-  AS = AMDGPU::getAMDGPUAS(*F.getParent());
-
   bool SufficientLDS = hasSufficientLocalMem(F);
   bool Changed = false;
   BasicBlock &EntryBB = *F.begin();
@@ -238,7 +240,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
 
   Type *I32Ty = Type::getInt32Ty(Mod->getContext());
   Value *CastDispatchPtr = Builder.CreateBitCast(
-    DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
+    DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
 
   // We could do a single 64-bit load here, but it's likely that the basic
   // 32-bit and extract sequence is already present, and it is probably easier
@@ -326,6 +328,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
     // Currently only handle the case where the Pointer Operand is a GEP.
     // Also we could not vectorize volatile or atomic loads.
     LoadInst *LI = cast<LoadInst>(Inst);
+    if (isa<AllocaInst>(User) &&
+        LI->getPointerOperandType() == User->getType() &&
+        isa<VectorType>(LI->getType()))
+      return true;
     return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
   }
   case Instruction::BitCast:
@@ -335,6 +341,10 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
     // since it should be canonical form, the User should be a GEP.
     // Also we could not vectorize volatile or atomic stores.
     StoreInst *SI = cast<StoreInst>(Inst);
+    if (isa<AllocaInst>(User) &&
+        SI->getPointerOperandType() == User->getType() &&
+        isa<VectorType>(SI->getValueOperand()->getType()))
+      return true;
     return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
   }
   default:
@@ -342,14 +352,15 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
   }
 }
 
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
 
   if (DisablePromoteAllocaToVector) {
     LLVM_DEBUG(dbgs() << "  Promotion alloca to vector is disabled\n");
     return false;
   }
 
-  ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
+  Type *AT = Alloca->getAllocatedType();
+  SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
 
   LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
 
@@ -396,7 +407,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
     }
   }
 
-  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+  VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
+  if (!VectorTy)
+    VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
 
   LLVM_DEBUG(dbgs() << "  Converting alloca to vector " << *AllocaTy << " -> "
                     << *VectorTy << '\n');
@@ -406,7 +419,10 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
     IRBuilder<> Builder(Inst);
     switch (Inst->getOpcode()) {
     case Instruction::Load: {
-      Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
+      if (Inst->getType() == AT)
+        break;
+
+      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
       Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
 
@@ -418,9 +434,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
       break;
     }
     case Instruction::Store: {
-      Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
-
       StoreInst *SI = cast<StoreInst>(Inst);
+      if (SI->getValueOperand()->getType() == AT)
+        break;
+
+      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
       Value *Ptr = SI->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -610,7 +628,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
   // we cannot use local memory in the pass.
   for (Type *ParamTy : FTy->params()) {
     PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
-    if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
+    if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       LocalMemLimit = 0;
       LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
                            "local memory disabled.\n");
@@ -627,7 +645,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
   // Check how much local memory is being used by global objects
   CurrentLocalMemUsage = 0;
   for (GlobalVariable &GV : Mod->globals()) {
-    if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
+    if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
       continue;
 
     for (const User *U : GV.users()) {
@@ -706,9 +724,12 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
 
   LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
 
-  if (tryPromoteAllocaToVector(&I, AS))
+  if (tryPromoteAllocaToVector(&I))
     return true; // Promoted to vector.
 
+  if (DisablePromoteAllocaToLDS)
+    return false;
+
   const Function &ContainingFunction = *I.getParent()->getParent();
   CallingConv::ID CC = ContainingFunction.getCallingConv();
 
@@ -775,7 +796,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
       Twine(F->getName()) + Twine('.') + I.getName(),
       nullptr,
       GlobalVariable::NotThreadLocal,
-      AS.LOCAL_ADDRESS);
+      AMDGPUAS::LOCAL_ADDRESS);
   GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
   GV->setAlignment(I.getAlignment());
 
@@ -808,7 +829,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
       if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
         Value *Src0 = CI->getOperand(0);
         Type *EltTy = Src0->getType()->getPointerElementType();
-        PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
+        PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
 
         if (isa<ConstantPointerNull>(CI->getOperand(0)))
           CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -825,7 +846,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
         continue;
 
       Type *EltTy = V->getType()->getPointerElementType();
-      PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
+      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
 
       // FIXME: It doesn't really make sense to try to do this for all
       // instructions.
@@ -894,7 +915,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
       Type *SrcTy = Src->getType()->getPointerElementType();
       Function *ObjectSize = Intrinsic::getDeclaration(Mod,
         Intrinsic::objectsize,
-        { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
+        { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
       );
 
       CallInst *NewCall = Builder.CreateCall(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 012e4fe200aa..7a760dcf7a90 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -35,7 +35,7 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
     : AMDGPUGenRegisterBankInfo(),
       TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
 
-  // HACK: Until this is fully tablegen'd
+  // HACK: Until this is fully tablegen'd.
   static bool AlreadyInit = false;
   if (AlreadyInit)
     return;
@@ -74,13 +74,16 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
                                           const RegisterBank &Src,
                                           unsigned Size) const {
   if (Dst.getID() == AMDGPU::SGPRRegBankID &&
-      Src.getID() == AMDGPU::VGPRRegBankID)
+      Src.getID() == AMDGPU::VGPRRegBankID) {
     return std::numeric_limits<unsigned>::max();
+  }
 
   // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
   // the valu.
   if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
-      Src.getID() == AMDGPU::SGPRRegBankID)
+      (Src.getID() == AMDGPU::SGPRRegBankID ||
+       Src.getID() == AMDGPU::VGPRRegBankID ||
+       Src.getID() == AMDGPU::VCCRegBankID))
     return std::numeric_limits<unsigned>::max();
 
   return RegisterBankInfo::copyCost(Dst, Src, Size);
@@ -145,7 +148,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     AltMappings.push_back(&SSMapping);
 
     const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
                           nullptr, // Predicate operand.
                           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
                           AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
@@ -153,7 +156,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     AltMappings.push_back(&SVMapping);
 
     const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
                           nullptr, // Predicate operand.
                           AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
                           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
@@ -161,7 +164,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     AltMappings.push_back(&VSMapping);
 
     const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
+      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
                           nullptr, // Predicate operand.
                           AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
                           AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
@@ -170,6 +173,67 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
 
     return AltMappings;
   }
+  case TargetOpcode::G_SELECT: {
+    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+    const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
+      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+      4); // Num Operands
+    AltMappings.push_back(&SSMapping);
+
+    const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
+      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
+                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
+      4); // Num Operands
+    AltMappings.push_back(&VVMapping);
+
+    return AltMappings;
+  }
+  case TargetOpcode::G_UADDE:
+  case TargetOpcode::G_USUBE:
+  case TargetOpcode::G_SADDE:
+  case TargetOpcode::G_SSUBE: {
+    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+    const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
+      getOperandsMapping(
+        {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+         AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1),
+         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+         AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1)}),
+      5); // Num Operands
+    AltMappings.push_back(&SSMapping);
+
+    const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
+      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
+                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                          AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
+      5); // Num Operands
+    AltMappings.push_back(&VVMapping);
+    return AltMappings;
+  }
+  case AMDGPU::G_BRCOND: {
+    assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+
+    const InstructionMapping &SMapping = getInstructionMapping(
+      1, 1, getOperandsMapping(
+        {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}),
+      2); // Num Operands
+    AltMappings.push_back(&SMapping);
+
+    const InstructionMapping &VMapping = getInstructionMapping(
+      1, 1, getOperandsMapping(
+        {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1), nullptr }),
+      2); // Num Operands
+    AltMappings.push_back(&VMapping);
+    return AltMappings;
+  }
   default:
     break;
   }
@@ -193,10 +257,16 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
+    if (!MI.getOperand(i).isReg())
+      continue;
     unsigned Reg = MI.getOperand(i).getReg();
-    const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
-    if (Bank && Bank->getID() != AMDGPU::SGPRRegBankID)
-      return false;
+    if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
+      if (Bank->getID() == AMDGPU::VGPRRegBankID)
+        return false;
+
+      assert(Bank->getID() == AMDGPU::SGPRRegBankID ||
+             Bank->getID() == AMDGPU::SCCRegBankID);
+    }
   }
   return true;
 }
@@ -209,7 +279,8 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
 
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
-    OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+    unsigned BankID = Size == 1 ? AMDGPU::SCCRegBankID : AMDGPU::SGPRRegBankID;
+    OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
   }
   return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
                                MI.getNumOperands());
@@ -230,12 +301,32 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
 
   unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
   unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
-  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI);
+
+  unsigned DefaultBankID = Size1 == 1 ?
+    AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
+  unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
+
   OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
 
   for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
     unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
-    OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+    unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
+    OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
+  }
+
+  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+                               MI.getNumOperands());
+}
+
+const RegisterBankInfo::InstructionMapping &
+AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+
+  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+    unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+    OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
   }
 
   return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
@@ -304,21 +395,49 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   default:
     return getInvalidInstructionMapping();
+
+  case AMDGPU::G_AND:
+  case AMDGPU::G_OR:
+  case AMDGPU::G_XOR: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    if (Size == 1) {
+      OpdsMapping[0] = OpdsMapping[1] =
+        OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
+
+    LLVM_FALLTHROUGH;
+  }
+
   case AMDGPU::G_ADD:
   case AMDGPU::G_SUB:
   case AMDGPU::G_MUL:
-  case AMDGPU::G_AND:
-  case AMDGPU::G_OR:
-  case AMDGPU::G_XOR:
   case AMDGPU::G_SHL:
+  case AMDGPU::G_UADDO:
+  case AMDGPU::G_SADDO:
+  case AMDGPU::G_USUBO:
+  case AMDGPU::G_SSUBO:
+  case AMDGPU::G_UADDE:
+  case AMDGPU::G_SADDE:
+  case AMDGPU::G_USUBE:
+  case AMDGPU::G_SSUBE:
     if (isSALUMapping(MI))
       return getDefaultMappingSOP(MI);
-    // Fall-through
+    LLVM_FALLTHROUGH;
 
   case AMDGPU::G_FADD:
+  case AMDGPU::G_FSUB:
   case AMDGPU::G_FPTOSI:
   case AMDGPU::G_FPTOUI:
   case AMDGPU::G_FMUL:
+  case AMDGPU::G_FMA:
+  case AMDGPU::G_SITOFP:
+  case AMDGPU::G_UITOFP:
+  case AMDGPU::G_FPTRUNC:
+  case AMDGPU::G_FEXP2:
+  case AMDGPU::G_FLOG2:
+  case AMDGPU::G_INTRINSIC_TRUNC:
+  case AMDGPU::G_INTRINSIC_ROUND:
     return getDefaultMappingVOP(MI);
   case AMDGPU::G_IMPLICIT_DEF: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -326,11 +445,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case AMDGPU::G_FCONSTANT:
-  case AMDGPU::G_CONSTANT: {
+  case AMDGPU::G_CONSTANT:
+  case AMDGPU::G_FRAME_INDEX:
+  case AMDGPU::G_BLOCK_ADDR: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
     break;
   }
+  case AMDGPU::G_INSERT: {
+    unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
+                                          AMDGPU::VGPRRegBankID;
+    unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+    unsigned SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+    unsigned EltSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
+    OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
+    OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
+    OpdsMapping[3] = nullptr;
+    break;
+  }
   case AMDGPU::G_EXTRACT: {
     unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
     unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
@@ -352,7 +485,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
     break;
   }
-  case AMDGPU::G_BITCAST: {
+  case AMDGPU::G_BITCAST:
+  case AMDGPU::G_INTTOPTR:
+  case AMDGPU::G_PTRTOINT:
+  case AMDGPU::G_CTLZ:
+  case AMDGPU::G_CTLZ_ZERO_UNDEF:
+  case AMDGPU::G_CTTZ:
+  case AMDGPU::G_CTTZ_ZERO_UNDEF:
+  case AMDGPU::G_CTPOP:
+  case AMDGPU::G_BSWAP:
+  case AMDGPU::G_FABS:
+  case AMDGPU::G_FNEG: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
     OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
@@ -368,7 +511,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
     break;
   }
-  case AMDGPU::G_ZEXT: {
+  case AMDGPU::G_ZEXT:
+  case AMDGPU::G_SEXT:
+  case AMDGPU::G_ANYEXT: {
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned Src = MI.getOperand(1).getReg();
     unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@@ -391,7 +536,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_FCMP: {
     unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
     unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
-    OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 1);
+    OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
     OpdsMapping[1] = nullptr; // Predicate Operand.
     OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
     OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
@@ -431,7 +576,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
     unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
                        Op3Bank == AMDGPU::SGPRRegBankID ?
-                       AMDGPU::SCCRegBankID : AMDGPU::VGPRRegBankID;
+                       AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
     OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
     OpdsMapping[1] = nullptr; // Predicate Operand.
     OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
@@ -479,6 +624,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
     break;
   }
+  case AMDGPU::G_UNMERGE_VALUES: {
+    unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
+      AMDGPU::VGPRRegBankID;
+
+    // Op1 and Dst should use the same register bank.
+    // FIXME: Shouldn't this be the default? Why do we need to handle this?
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
+      OpdsMapping[i] = AMDGPU::getValueMapping(Bank, Size);
+    }
+    break;
+  }
   case AMDGPU::G_INTRINSIC: {
     switch (MI.getOperand(1).getIntrinsicID()) {
     default:
@@ -492,6 +649,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
       break;
     }
+    case Intrinsic::amdgcn_wqm_vote: {
+      unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      OpdsMapping[0] = OpdsMapping[2]
+        = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
     }
     break;
   }
@@ -528,8 +691,50 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
     break;
   }
+  case AMDGPU::G_SELECT: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+                                    AMDGPU::SGPRRegBankID);
+    unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+    unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+    bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
+                    Op2Bank == AMDGPU::SGPRRegBankID &&
+                    Op3Bank == AMDGPU::SGPRRegBankID;
+    unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+    Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+    OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
+    OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
+    OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
+    OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+    break;
+  }
+
   case AMDGPU::G_LOAD:
     return getInstrMappingForLoad(MI);
+
+  case AMDGPU::G_ATOMICRMW_XCHG:
+  case AMDGPU::G_ATOMICRMW_ADD:
+  case AMDGPU::G_ATOMICRMW_SUB:
+  case AMDGPU::G_ATOMICRMW_AND:
+  case AMDGPU::G_ATOMICRMW_OR:
+  case AMDGPU::G_ATOMICRMW_XOR:
+  case AMDGPU::G_ATOMICRMW_MAX:
+  case AMDGPU::G_ATOMICRMW_MIN:
+  case AMDGPU::G_ATOMICRMW_UMAX:
+  case AMDGPU::G_ATOMICRMW_UMIN:
+  case AMDGPU::G_ATOMIC_CMPXCHG: {
+    return getDefaultMappingAllVGPR(MI);
+  }
+  case AMDGPU::G_BRCOND: {
+    unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+                                 AMDGPU::SGPRRegBankID);
+    assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1);
+    if (Bank != AMDGPU::SCCRegBankID)
+      Bank = AMDGPU::VCCRegBankID;
+
+    OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
+    break;
+  }
   }
 
   return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index d48a66589873..d29f4bc79a51 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -49,6 +49,8 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   bool isSALUMapping(const MachineInstr &MI) const;
   const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
   const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
+  const InstructionMapping &getDefaultMappingAllVGPR(
+    const MachineInstr &MI) const;
 public:
   AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 7f7f75f65647..570379a820e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -15,4 +15,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
   [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
 >;
 
-def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS ]>;
+def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS]>;
+
+// It is helpful to distinguish conditions from ordinary SGPRs.
+def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 07de5fc549e2..922d974f2ebd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -27,8 +27,6 @@ class TargetInstrInfo;
 struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
   AMDGPURegisterInfo();
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   /// \returns the sub reg enum value for the given \p Channel
   /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
   static unsigned getSubRegFromChannel(unsigned Channel);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index a861762a8c9e..efe501cb73c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -163,7 +163,7 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
       // some casts between structs and non-structs, but we can't bitcast
       // directly between them.  directly bitcast between them.  Blender uses
       // some casts that look like { <3 x float> }* to <4 x float>*
-      if ((SrcEltTy->isStructTy() && (SrcEltTy->getNumContainedTypes() != 1)))
+      if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
         return false;
 
       // Clang emits OpenCL 3-vector type accesses with a bitcast to the
@@ -401,8 +401,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
       if (Val->getType() != EltTy) {
         Type *EffectiveEltTy = EltTy;
         if (StructType *CT = dyn_cast<StructType>(EltTy)) {
-          assert(CT->getNumContainedTypes() == 1);
-          EffectiveEltTy = CT->getContainedType(0);
+          assert(CT->getNumElements() == 1);
+          EffectiveEltTy = CT->getElementType(0);
         }
 
         if (DL->getTypeSizeInBits(EffectiveEltTy) !=
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 98b49070fa99..ed0cc70c3d9a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -74,6 +74,9 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   // We want to be able to turn these off, but making this a subtarget feature
   // for SI has the unhelpful behavior that it unsets everything else if you
   // disable it.
+  //
+  // Similarly we want enable-prt-strict-null to be on by default and not to
+  // unset everything else if it is disabled
 
   SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
 
@@ -89,6 +92,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
     FullFS += "-fp32-denormals,";
   }
 
+  FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
+
   FullFS += FS;
 
   ParseSubtargetFeatures(GPU, FullFS);
@@ -124,10 +129,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   return *this;
 }
 
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
-                                             const FeatureBitset &FeatureBits) :
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
   TargetTriple(TT),
-  SubtargetFeatureBits(FeatureBits),
   Has16BitInsts(false),
   HasMadMixInsts(false),
   FP32Denormals(false),
@@ -136,19 +139,22 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
   HasVOP3PInsts(false),
   HasMulI24(true),
   HasMulU24(true),
+  HasInv2PiInlineImm(false),
   HasFminFmaxLegacy(true),
   EnablePromoteAlloca(false),
+  HasTrigReducedRange(false),
   LocalMemorySize(0),
   WavefrontSize(0)
   { }
 
 GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
-                                 const GCNTargetMachine &TM) :
+                           const GCNTargetMachine &TM) :
     AMDGPUGenSubtargetInfo(TT, GPU, FS),
-    AMDGPUSubtarget(TT, getFeatureBits()),
+    AMDGPUSubtarget(TT),
     TargetTriple(TT),
     Gen(SOUTHERN_ISLANDS),
     IsaVersion(ISAVersion0_0_0),
+    InstrItins(getInstrItineraryForCPU(GPU)),
     LDSBankCount(0),
     MaxPrivateElementSize(0),
 
@@ -170,16 +176,17 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     DebuggerEmitPrologue(false),
 
     EnableHugePrivateBuffer(false),
-    EnableVGPRSpilling(false),
     EnableLoadStoreOpt(false),
     EnableUnsafeDSOffsetFolding(false),
     EnableSIScheduler(false),
     EnableDS128(false),
+    EnablePRTStrictNull(false),
     DumpCode(false),
 
     FP64(false),
     GCN3Encoding(false),
     CIInsts(false),
+    VIInsts(false),
     GFX9Insts(false),
     SGPRInitBug(false),
     HasSMemRealTime(false),
@@ -189,15 +196,16 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HasVGPRIndexMode(false),
     HasScalarStores(false),
     HasScalarAtomics(false),
-    HasInv2PiInlineImm(false),
     HasSDWAOmod(false),
     HasSDWAScalar(false),
     HasSDWASdst(false),
     HasSDWAMac(false),
     HasSDWAOutModsVOPC(false),
     HasDPP(false),
+    HasR128A16(false),
     HasDLInsts(false),
-    D16PreservesUnusedBits(false),
+    HasDotInsts(false),
+    EnableSRAMECC(false),
     FlatAddressSpace(false),
     FlatInstOffsets(false),
     FlatGlobalInsts(false),
@@ -211,7 +219,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
     TLInfo(TM, *this),
     FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
-  AS = AMDGPU::getAMDGPUAS(TT);
   CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
   Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
   RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
@@ -447,7 +454,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
                              const TargetMachine &TM) :
   R600GenSubtargetInfo(TT, GPU, FS),
-  AMDGPUSubtarget(TT, getFeatureBits()),
+  AMDGPUSubtarget(TT),
   InstrInfo(*this),
   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
   FMA(false),
@@ -460,8 +467,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
   TexVTXClauseSize(0),
   Gen(R600),
   TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
-  InstrItins(getInstrItineraryForCPU(GPU)),
-  AS (AMDGPU::getAMDGPUAS(TT)) { }
+  InstrItins(getInstrItineraryForCPU(GPU)) { }
 
 void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                       unsigned NumRegionInstrs) const {
@@ -480,10 +486,6 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     Policy.ShouldTrackLaneMasks = true;
 }
 
-bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const {
-  return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
-}
-
 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
   if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
     if (SGPRs <= 80)
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 623109733651..5584759e5580 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -63,7 +63,6 @@ private:
   Triple TargetTriple;
 
 protected:
-  const FeatureBitset &SubtargetFeatureBits;
   bool Has16BitInsts;
   bool HasMadMixInsts;
   bool FP32Denormals;
@@ -72,13 +71,15 @@ protected:
   bool HasVOP3PInsts;
   bool HasMulI24;
   bool HasMulU24;
+  bool HasInv2PiInlineImm;
   bool HasFminFmaxLegacy;
   bool EnablePromoteAlloca;
+  bool HasTrigReducedRange;
   int LocalMemorySize;
   unsigned WavefrontSize;
 
 public:
-  AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+  AMDGPUSubtarget(const Triple &TT);
 
   static const AMDGPUSubtarget &get(const MachineFunction &MF);
   static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -134,7 +135,7 @@ public:
     return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
   }
 
-  bool isAmdCodeObjectV2(const Function &F) const {
+  bool isAmdHsaOrMesa(const Function &F) const {
     return isAmdHsaOS() || isMesaKernel(F);
   }
 
@@ -170,10 +171,18 @@ public:
     return HasMulU24;
   }
 
+  bool hasInv2PiInlineImm() const {
+    return HasInv2PiInlineImm;
+  }
+
   bool hasFminFmaxLegacy() const {
     return HasFminFmaxLegacy;
   }
 
+  bool hasTrigReducedRange() const {
+    return HasTrigReducedRange;
+  }
+
   bool isPromoteAllocaEnabled() const {
     return EnablePromoteAlloca;
   }
@@ -193,38 +202,26 @@ public:
   /// Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
   unsigned getExplicitKernelArgOffset(const Function &F) const {
-    return isAmdCodeObjectV2(F) ? 0 : 36;
+    return isAmdHsaOrMesa(F) ? 0 : 36;
   }
 
   /// \returns Maximum number of work groups per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
-                                                  FlatWorkGroupSize);
-  }
+  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
 
   /// \returns Minimum flat work group size supported by the subtarget.
-  unsigned getMinFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
-  }
+  virtual unsigned getMinFlatWorkGroupSize() const = 0;
 
   /// \returns Maximum flat work group size supported by the subtarget.
-  unsigned getMaxFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
-  }
+  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
 
   /// \returns Maximum number of waves per execution unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
-                                             FlatWorkGroupSize);
-  }
+  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const  = 0;
 
   /// \returns Minimum number of waves per execution unit supported by the
   /// subtarget.
-  unsigned getMinWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
-  }
+  virtual unsigned getMinWavesPerEU() const = 0;
 
   unsigned getMaxWavesPerEU() const { return 10; }
 
@@ -266,6 +263,7 @@ public:
     ISAVersion9_0_2,
     ISAVersion9_0_4,
     ISAVersion9_0_6,
+    ISAVersion9_0_9,
   };
 
   enum TrapHandlerAbi {
@@ -300,6 +298,7 @@ protected:
   Triple TargetTriple;
   unsigned Gen;
   unsigned IsaVersion;
+  InstrItineraryData InstrItins;
   int LDSBankCount;
   unsigned MaxPrivateElementSize;
 
@@ -323,11 +322,11 @@ protected:
 
   // Used as options.
   bool EnableHugePrivateBuffer;
-  bool EnableVGPRSpilling;
   bool EnableLoadStoreOpt;
   bool EnableUnsafeDSOffsetFolding;
   bool EnableSIScheduler;
   bool EnableDS128;
+  bool EnablePRTStrictNull;
   bool DumpCode;
 
   // Subtarget statically properties set by tablegen
@@ -337,6 +336,7 @@ protected:
   bool IsGCN;
   bool GCN3Encoding;
   bool CIInsts;
+  bool VIInsts;
   bool GFX9Insts;
   bool SGPRInitBug;
   bool HasSMemRealTime;
@@ -346,15 +346,16 @@ protected:
   bool HasVGPRIndexMode;
   bool HasScalarStores;
   bool HasScalarAtomics;
-  bool HasInv2PiInlineImm;
   bool HasSDWAOmod;
   bool HasSDWAScalar;
   bool HasSDWASdst;
   bool HasSDWAMac;
   bool HasSDWAOutModsVOPC;
   bool HasDPP;
+  bool HasR128A16;
   bool HasDLInsts;
-  bool D16PreservesUnusedBits;
+  bool HasDotInsts;
+  bool EnableSRAMECC;
   bool FlatAddressSpace;
   bool FlatInstOffsets;
   bool FlatGlobalInsts;
@@ -372,7 +373,6 @@ protected:
   bool FeatureDisable;
 
   SelectionDAGTargetInfo TSInfo;
-  AMDGPUAS AS;
 private:
   SIInstrInfo InstrInfo;
   SITargetLowering TLInfo;
@@ -423,6 +423,10 @@ public:
     return &TSInfo;
   }
 
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   Generation getGeneration() const {
@@ -441,10 +445,6 @@ public:
     return MaxPrivateElementSize;
   }
 
-  AMDGPUAS getAMDGPUAS() const {
-    return AS;
-  }
-
   bool hasIntClamp() const {
     return HasIntClamp;
   }
@@ -517,6 +517,10 @@ public:
     return FMA;
   }
 
+  bool hasSwap() const {
+    return GFX9Insts;
+  }
+
   TrapHandlerAbi getTrapHandlerAbi() const {
     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
   }
@@ -574,12 +578,19 @@ public:
     return getGeneration() < AMDGPUSubtarget::GFX9;
   }
 
+  /// \returns If target requires PRT Struct NULL support (zero result registers
+  /// for sparse texture support).
+  bool usePRTStrictNull() const {
+    return EnablePRTStrictNull;
+  }
+
   bool hasAutoWaitcntBeforeBarrier() const {
     return AutoWaitcntBeforeBarrier;
   }
 
   bool hasCodeObjectV3() const {
-    return CodeObjectV3;
+    // FIXME: Need to add code object v3 support for mesa and pal.
+    return isAmdHsaOS() ? CodeObjectV3 : false;
   }
 
   bool hasUnalignedBufferAccess() const {
@@ -677,8 +688,12 @@ public:
     return HasDLInsts;
   }
 
-  bool d16PreservesUnusedBits() const {
-    return D16PreservesUnusedBits;
+  bool hasDotInsts() const {
+    return HasDotInsts;
+  }
+
+  bool isSRAMECCEnabled() const {
+    return EnableSRAMECC;
   }
 
   // Scratch is allocated in 256 dword per wave blocks for the entire
@@ -707,20 +722,19 @@ public:
   /// \returns Number of execution units per compute unit supported by the
   /// subtarget.
   unsigned getEUsPerCU() const {
-    return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getEUsPerCU(this);
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerCU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
-                                             FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
   }
 
   /// \returns Maximum number of waves per execution unit supported by the
@@ -732,8 +746,7 @@ public:
   /// \returns Number of waves per work group supported by the subtarget and
   /// limited by given \p FlatWorkGroupSize.
   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getWavesPerWorkGroup(
-        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
   }
 
   // static wrappers
@@ -747,8 +760,6 @@ public:
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            unsigned NumRegionInstrs) const override;
 
-  bool isVGPRSpillingEnabled(const Function &F) const;
-
   unsigned getMaxNumUserSGPRs() const {
     return 16;
   }
@@ -781,14 +792,15 @@ public:
     return HasScalarAtomics;
   }
 
-  bool hasInv2PiInlineImm() const {
-    return HasInv2PiInlineImm;
-  }
 
   bool hasDPP() const {
     return HasDPP;
   }
 
+  bool hasR128A16() const {
+    return HasR128A16;
+  }
+
   bool enableSIScheduler() const {
     return EnableSIScheduler;
   }
@@ -817,6 +829,11 @@ public:
     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
   }
 
+  // \returns true if the subtarget supports DWORDX3 load/store instructions.
+  bool hasDwordx3LoadStores() const {
+    return CIInsts;
+  }
+
   bool hasSMovFedHazard() const {
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
@@ -851,39 +868,34 @@ public:
 
   /// \returns SGPR allocation granularity supported by the subtarget.
   unsigned getSGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getSGPRAllocGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
   }
 
   /// \returns SGPR encoding granularity supported by the subtarget.
   unsigned getSGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getSGPREncodingGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
   }
 
   /// \returns Total number of SGPRs supported by the subtarget.
   unsigned getTotalNumSGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
   }
 
   /// \returns Addressable number of SGPRs supported by the subtarget.
   unsigned getAddressableNumSGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumSGPRs(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
   }
 
   /// \returns Minimum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
-    return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU, Addressable);
+    return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
   }
 
   /// \returns Reserved number of SGPRs for given function \p MF.
@@ -901,39 +913,34 @@ public:
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getVGPRAllocGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
   unsigned getVGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getVGPREncodingGranule(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
   }
 
   /// \returns Total number of VGPRs supported by the subtarget.
   unsigned getTotalNumVGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
   }
 
   /// \returns Addressable number of VGPRs supported by the subtarget.
   unsigned getAddressableNumVGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumVGPRs(
-        MCSubtargetInfo::getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
   }
 
   /// \returns Minimum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
-                                           WavesPerEU);
+    return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -949,6 +956,34 @@ public:
   void getPostRAMutations(
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
       const override;
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const override {
+    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+  }
 };
 
 class R600Subtarget final : public R600GenSubtargetInfo,
@@ -968,7 +1003,6 @@ private:
   R600TargetLowering TLInfo;
   InstrItineraryData InstrItins;
   SelectionDAGTargetInfo TSInfo;
-  AMDGPUAS AS;
 
 public:
   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -1053,8 +1087,6 @@ public:
 
   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
 
-  AMDGPUAS getAMDGPUAS() const { return AS; }
-
   bool enableMachineScheduler() const override {
     return true;
   }
@@ -1062,6 +1094,34 @@ public:
   bool enableSubRegLiveness() const override {
     return true;
   }
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const override {
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const override {
+    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2205819c444f..e8cefdbf74b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Vectorize.h"
 #include <memory>
 
@@ -105,6 +106,11 @@ static cl::opt<bool> EnableSDWAPeephole(
   cl::desc("Enable SDWA peepholer"),
   cl::init(true));
 
+static cl::opt<bool> EnableDPPCombine(
+  "amdgpu-dpp-combine",
+  cl::desc("Enable DPP combiner"),
+  cl::init(false));
+
 // Enable address space based alias analysis
 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
   cl::desc("Enable AMDGPU Alias Analysis"),
@@ -137,6 +143,20 @@ static cl::opt<bool> EnableLowerKernelArguments(
   cl::init(true),
   cl::Hidden);
 
+// Enable atomic optimization
+static cl::opt<bool> EnableAtomicOptimizations(
+  "amdgpu-atomic-optimizations",
+  cl::desc("Enable atomic optimizations"),
+  cl::init(false),
+  cl::Hidden);
+
+// Enable Mode register optimization
+static cl::opt<bool> EnableSIModeRegisterPass(
+  "amdgpu-mode-register",
+  cl::desc("Enable mode register pass"),
+  cl::init(true),
+  cl::Hidden);
+
 extern "C" void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -150,18 +170,22 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeR600VectorRegMergerPass(*PR);
   initializeGlobalISel(*PR);
   initializeAMDGPUDAGToDAGISelPass(*PR);
+  initializeGCNDPPCombinePass(*PR);
   initializeSILowerI1CopiesPass(*PR);
   initializeSIFixSGPRCopiesPass(*PR);
   initializeSIFixVGPRCopiesPass(*PR);
+  initializeSIFixupVectorISelPass(*PR);
   initializeSIFoldOperandsPass(*PR);
   initializeSIPeepholeSDWAPass(*PR);
   initializeSIShrinkInstructionsPass(*PR);
   initializeSIOptimizeExecMaskingPreRAPass(*PR);
   initializeSILoadStoreOptimizerPass(*PR);
+  initializeAMDGPUFixFunctionBitcastsPass(*PR);
   initializeAMDGPUAlwaysInlinePass(*PR);
   initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
   initializeAMDGPUAnnotateUniformValuesPass(*PR);
   initializeAMDGPUArgumentUsageInfoPass(*PR);
+  initializeAMDGPUAtomicOptimizerPass(*PR);
   initializeAMDGPULowerKernelArgumentsPass(*PR);
   initializeAMDGPULowerKernelAttributesPass(*PR);
   initializeAMDGPULowerIntrinsicsPass(*PR);
@@ -172,6 +196,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUUnifyMetadataPass(*PR);
   initializeSIAnnotateControlFlowPass(*PR);
   initializeSIInsertWaitcntsPass(*PR);
+  initializeSIModeRegisterPass(*PR);
   initializeSIWholeQuadModePass(*PR);
   initializeSILowerControlFlowPass(*PR);
   initializeSIInsertSkipsPass(*PR);
@@ -182,6 +207,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeSIFormMemoryClausesPass(*PR);
   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
   initializeAMDGPUAAWrapperPassPass(*PR);
+  initializeAMDGPUExternalAAWrapperPass(*PR);
   initializeAMDGPUUseNativeCallsPass(*PR);
   initializeAMDGPUSimplifyLibCallsPass(*PR);
   initializeAMDGPUInlinerPass(*PR);
@@ -292,12 +318,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return Reloc::PIC_;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                          StringRef CPU, StringRef FS,
                                          TargetOptions Options,
@@ -306,9 +326,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                          CodeGenOpt::Level OptLevel)
     : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
                         FS, Options, getEffectiveRelocModel(RM),
-                        getEffectiveCodeModel(CM), OptLevel),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
       TLOF(createTLOF(getTargetTriple())) {
-  AS = AMDGPU::getAMDGPUAS(TT);
   initAsmInfo();
 }
 
@@ -331,13 +350,6 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
     FSAttr.getValueAsString();
 }
 
-static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
-  return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
-      if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
-        AAR.addAAResult(WrapperPass->getResult());
-      });
-}
-
 /// Predicate for Internalize pass.
 static bool mustPreserveGV(const GlobalValue &GV) {
   if (const Function *F = dyn_cast<Function>(&GV))
@@ -360,17 +372,6 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
     Builder.Inliner = createAMDGPUFunctionInliningPass();
   }
 
-  if (Internalize) {
-    // If we're generating code, we always have the whole program available. The
-    // relocations expected for externally visible functions aren't supported,
-    // so make sure every non-entry function is hidden.
-    Builder.addExtension(
-      PassManagerBuilder::EP_EnabledOnOptLevel0,
-      [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
-        PM.add(createInternalizePass(mustPreserveGV));
-      });
-  }
-
   Builder.addExtension(
     PassManagerBuilder::EP_ModuleOptimizerEarly,
     [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
@@ -613,20 +614,23 @@ void AMDGPUPassConfig::addIRPasses() {
   disablePass(&FuncletLayoutID);
   disablePass(&PatchableFunctionID);
 
+  addPass(createAtomicExpandPass());
+
+  // This must occur before inlining, as the inliner will not look through
+  // bitcast calls.
+  addPass(createAMDGPUFixFunctionBitcastsPass());
+
   addPass(createAMDGPULowerIntrinsicsPass());
 
-  if (TM.getTargetTriple().getArch() == Triple::r600 ||
-      !EnableAMDGPUFunctionCalls) {
-    // Function calls are not supported, so make sure we inline everything.
-    addPass(createAMDGPUAlwaysInlinePass());
-    addPass(createAlwaysInlinerLegacyPass());
-    // We need to add the barrier noop pass, otherwise adding the function
-    // inlining pass will cause all of the PassConfigs passes to be run
-    // one function at a time, which means if we have a nodule with two
-    // functions, then we will generate code for the first function
-    // without ever running any passes on the second.
-    addPass(createBarrierNoopPass());
-  }
+  // Function calls are not supported, so make sure we inline everything.
+  addPass(createAMDGPUAlwaysInlinePass());
+  addPass(createAlwaysInlinerLegacyPass());
+  // We need to add the barrier noop pass, otherwise adding the function
+  // inlining pass will cause all of the PassConfigs passes to be run
+  // one function at a time, which means if we have a nodule with two
+  // functions, then we will generate code for the first function
+  // without ever running any passes on the second.
+  addPass(createBarrierNoopPass());
 
   if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
     // TODO: May want to move later or split into an early and late one.
@@ -690,6 +694,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
 }
 
 bool AMDGPUPassConfig::addPreISel() {
+  addPass(createLowerSwitchPass());
   addPass(createFlattenCFGPass());
   return false;
 }
@@ -759,6 +764,10 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
 bool GCNPassConfig::addPreISel() {
   AMDGPUPassConfig::addPreISel();
 
+  if (EnableAtomicOptimizations) {
+    addPass(createAMDGPUAtomicOptimizerPass());
+  }
+
   // FIXME: We need to run a pass to propagate the attributes when calls are
   // supported.
   addPass(createAMDGPUAnnotateKernelFeaturesPass());
@@ -789,6 +798,8 @@ void GCNPassConfig::addMachineSSAOptimization() {
   //
   // XXX - Can we get away without running DeadMachineInstructionElim again?
   addPass(&SIFoldOperandsID);
+  if (EnableDPPCombine)
+    addPass(&GCNDPPCombineID);
   addPass(&DeadMachineInstructionElimID);
   addPass(&SILoadStoreOptimizerID);
   if (EnableSDWAPeephole) {
@@ -811,8 +822,10 @@ bool GCNPassConfig::addILPOpts() {
 
 bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
-  addPass(createSILowerI1CopiesPass());
   addPass(&SIFixSGPRCopiesID);
+  addPass(createSILowerI1CopiesPass());
+  addPass(createSIFixupVectorISelPass());
+  addPass(createSIAddIMGInitPass());
   return false;
 }
 
@@ -878,7 +891,8 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
 
 void GCNPassConfig::addPostRegAlloc() {
   addPass(&SIFixVGPRCopiesID);
-  addPass(&SIOptimizeExecMaskingID);
+  if (getOptLevel() > CodeGenOpt::None)
+    addPass(&SIOptimizeExecMaskingID);
   TargetPassConfig::addPostRegAlloc();
 }
 
@@ -889,6 +903,7 @@ void GCNPassConfig::addPreEmitPass() {
   addPass(createSIMemoryLegalizerPass());
   addPass(createSIInsertWaitcntsPass());
   addPass(createSIShrinkInstructionsPass());
+  addPass(createSIModeRegisterPass());
 
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if there
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0fe14493fabd..62fbe71d1902 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -34,7 +34,6 @@ namespace llvm {
 class AMDGPUTargetMachine : public LLVMTargetMachine {
 protected:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  AMDGPUAS AS;
 
   StringRef getGPUName(const Function &F) const;
   StringRef getFeatureString(const Function &F) const;
@@ -55,16 +54,13 @@ public:
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
-  AMDGPUAS getAMDGPUAS() const {
-    return AS;
-  }
 
   void adjustPassManager(PassManagerBuilder &) override;
+
   /// Get the integer value of a null pointer in the given address space.
   uint64_t getNullPointerValue(unsigned AddrSpace) const {
-    if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
-      return -1;
-    return 0;
+    return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+            AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
   }
 };
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index e2f718bd3c34..c4e1efde130b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -29,3 +29,13 @@ MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
 
   return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
+
+MCSection *AMDGPUTargetObjectFile::getExplicitSectionGlobal(
+    const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const {
+  // Set metadata access for the explicit section
+  StringRef SectionName = GO->getSection();
+  if (SectionName.startswith(".AMDGPU.comment."))
+    SK = SectionKind::getMetadata();
+
+  return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index dd9dc1a88fc2..a4ae1a2c18c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -26,6 +26,8 @@ class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
   public:
     MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
+    MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+                                        const TargetMachine &TM) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index a68b8d03f06e..11e4ba4b5010 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -102,7 +102,6 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   unsigned ThresholdPrivate = UnrollThresholdPrivate;
   unsigned ThresholdLocal = UnrollThresholdLocal;
   unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
-  const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
   for (const BasicBlock *BB : L->getBlocks()) {
     const DataLayout &DL = BB->getModule()->getDataLayout();
     unsigned LocalGEPsSeen = 0;
@@ -140,9 +139,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 
       unsigned AS = GEP->getAddressSpace();
       unsigned Threshold = 0;
-      if (AS == ASST.PRIVATE_ADDRESS)
+      if (AS == AMDGPUAS::PRIVATE_ADDRESS)
         Threshold = ThresholdPrivate;
-      else if (AS == ASST.LOCAL_ADDRESS)
+      else if (AS == AMDGPUAS::LOCAL_ADDRESS)
         Threshold = ThresholdLocal;
       else
         continue;
@@ -150,7 +149,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
       if (UP.Threshold >= Threshold)
         continue;
 
-      if (AS == ASST.PRIVATE_ADDRESS) {
+      if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
         const Value *Ptr = GEP->getPointerOperand();
         const AllocaInst *Alloca =
             dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
@@ -160,7 +159,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
         unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
         if (AllocaSize > MaxAlloca)
           continue;
-      } else if (AS == ASST.LOCAL_ADDRESS) {
+      } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
         LocalGEPsSeen++;
         // Inhibit unroll for local memory if we have seen addressing not to
         // a variable, most likely we will be unable to combine it.
@@ -253,19 +252,18 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 }
 
 unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
-  AMDGPUAS AS = ST->getAMDGPUAS();
-  if (AddrSpace == AS.GLOBAL_ADDRESS ||
-      AddrSpace == AS.CONSTANT_ADDRESS ||
-      AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
+  if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
+      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
     return 512;
   }
 
-  if (AddrSpace == AS.FLAT_ADDRESS ||
-      AddrSpace == AS.LOCAL_ADDRESS ||
-      AddrSpace == AS.REGION_ADDRESS)
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
+      AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::REGION_ADDRESS)
     return 128;
 
-  if (AddrSpace == AS.PRIVATE_ADDRESS)
+  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
     return 8 * ST->getMaxPrivateElementSize();
 
   llvm_unreachable("unhandled address space");
@@ -277,7 +275,7 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
   // We allow vectorization of flat stores, even though we may need to decompose
   // them later if they may access private memory. We don't have enough context
   // here, and legalization can handle it.
-  if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
+  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
     return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
       ChainSizeInBytes <= ST->getMaxPrivateElementSize();
   }
@@ -545,14 +543,15 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
   if (const Argument *A = dyn_cast<Argument>(V))
     return !isArgPassedInSGPR(A);
 
-  // Loads from the private address space are divergent, because threads
-  // can execute the load instruction with the same inputs and get different
-  // results.
+  // Loads from the private and flat address spaces are divergent, because
+  // threads can execute the load instruction with the same inputs and get
+  // different results.
   //
   // All other loads are not divergent, because if threads issue loads with the
   // same arguments, they will always get the same result.
   if (const LoadInst *Load = dyn_cast<LoadInst>(V))
-    return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
+    return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
+           Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
 
   // Atomics are divergent because they are executed sequentially: when an
   // atomic operation refers to the same address in each thread, then each
@@ -642,20 +641,19 @@ unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
 }
 
 unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
-  AMDGPUAS AS = ST->getAMDGPUAS();
-  if (AddrSpace == AS.GLOBAL_ADDRESS ||
-      AddrSpace == AS.CONSTANT_ADDRESS)
+  if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
     return 128;
-  if (AddrSpace == AS.LOCAL_ADDRESS ||
-      AddrSpace == AS.REGION_ADDRESS)
+  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::REGION_ADDRESS)
     return 64;
-  if (AddrSpace == AS.PRIVATE_ADDRESS)
+  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
     return 32;
 
-  if ((AddrSpace == AS.PARAM_D_ADDRESS ||
-      AddrSpace == AS.PARAM_I_ADDRESS ||
-      (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
-      AddrSpace <= AS.CONSTANT_BUFFER_15)))
+  if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+      AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+      (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+      AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
     return 128;
   llvm_unreachable("unhandled address space");
 }
@@ -666,9 +664,7 @@ bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
   // We allow vectorization of flat stores, even though we may need to decompose
   // them later if they may access private memory. We don't have enough context
   // here, and legalization can handle it.
-  if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS)
-    return false;
-  return true;
+  return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
 }
 
 bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 8e63d789e17d..397c5c6fa6fb 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -179,7 +179,7 @@ public:
     if (IsGraphicsShader)
       return -1;
     return ST->hasFlatAddressSpace() ?
-      ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
+      AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
   }
 
   unsigned getVectorSplitCost() { return 0; }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 0d3a1673696a..ced3f6f567e2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -25,7 +25,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -70,7 +70,7 @@ char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
 INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
                      "Unify divergent function exit nodes", false, false)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
                     "Unify divergent function exit nodes", false, false)
 
@@ -78,10 +78,10 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
   // TODO: Preserve dominator tree.
   AU.addRequired<PostDominatorTreeWrapperPass>();
 
-  AU.addRequired<DivergenceAnalysis>();
+  AU.addRequired<LegacyDivergenceAnalysis>();
 
   // No divergent values are changed, only blocks and branch edges.
-  AU.addPreserved<DivergenceAnalysis>();
+  AU.addPreserved<LegacyDivergenceAnalysis>();
 
   // We preserve the non-critical-edgeness property
   AU.addPreservedID(BreakCriticalEdgesID);
@@ -95,7 +95,7 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
 
 /// \returns true if \p BB is reachable through only uniform branches.
 /// XXX - Is there a more efficient way to find this?
-static bool isUniformlyReached(const DivergenceAnalysis &DA,
+static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
                                BasicBlock &BB) {
   SmallVector<BasicBlock *, 8> Stack;
   SmallPtrSet<BasicBlock *, 8> Visited;
@@ -163,7 +163,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
   if (PDT.getRoots().size() <= 1)
     return false;
 
-  DivergenceAnalysis &DA = getAnalysis<DivergenceAnalysis>();
+  LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
 
   // Loop over all of the blocks in a function, tracking all of the blocks that
   // return.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 31e2885c833d..3f9af27a2e5e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -49,6 +49,7 @@
 #include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -156,13 +157,12 @@ public:
     ImmTyDMask,
     ImmTyUNorm,
     ImmTyDA,
-    ImmTyR128,
+    ImmTyR128A16,
     ImmTyLWE,
     ImmTyExpTgt,
     ImmTyExpCompr,
     ImmTyExpVM,
-    ImmTyDFMT,
-    ImmTyNFMT,
+    ImmTyFORMAT,
     ImmTyHwreg,
     ImmTyOff,
     ImmTySendMsg,
@@ -291,7 +291,7 @@ public:
   bool isDMask() const { return isImmTy(ImmTyDMask); }
   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
   bool isDA() const { return isImmTy(ImmTyDA); }
-  bool isR128() const { return isImmTy(ImmTyR128); }
+  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
   bool isLWE() const { return isImmTy(ImmTyLWE); }
   bool isOff() const { return isImmTy(ImmTyOff); }
   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -312,8 +312,7 @@ public:
   bool isSLC() const { return isImmTy(ImmTySLC); }
   bool isTFE() const { return isImmTy(ImmTyTFE); }
   bool isD16() const { return isImmTy(ImmTyD16); }
-  bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
-  bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
+  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -666,8 +665,7 @@ public:
     case ImmTySLC: OS << "SLC"; break;
     case ImmTyTFE: OS << "TFE"; break;
     case ImmTyD16: OS << "D16"; break;
-    case ImmTyDFMT: OS << "DFMT"; break;
-    case ImmTyNFMT: OS << "NFMT"; break;
+    case ImmTyFORMAT: OS << "FORMAT"; break;
     case ImmTyClampSI: OS << "ClampSI"; break;
     case ImmTyOModSI: OS << "OModSI"; break;
     case ImmTyDppCtrl: OS << "DppCtrl"; break;
@@ -681,7 +679,7 @@ public:
     case ImmTyDMask: OS << "DMask"; break;
     case ImmTyUNorm: OS << "UNorm"; break;
     case ImmTyDA: OS << "DA"; break;
-    case ImmTyR128: OS << "R128"; break;
+    case ImmTyR128A16: OS << "R128A16"; break;
     case ImmTyLWE: OS << "LWE"; break;
     case ImmTyOff: OS << "Off"; break;
     case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -920,8 +918,7 @@ public:
       // Currently there is none suitable machinery in the core llvm-mc for this.
       // MCSymbol::isRedefinable is intended for another purpose, and
       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
-      AMDGPU::IsaInfo::IsaVersion ISA =
-          AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
       MCContext &Ctx = getContext();
       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
         MCSymbol *Sym =
@@ -1061,6 +1058,7 @@ public:
   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
+  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
 
   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
@@ -1092,7 +1090,6 @@ private:
   bool validateMIMGAtomicDMask(const MCInst &Inst);
   bool validateMIMGGatherDMask(const MCInst &Inst);
   bool validateMIMGDataSize(const MCInst &Inst);
-  bool validateMIMGR128(const MCInst &Inst);
   bool validateMIMGD16(const MCInst &Inst);
   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -1829,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
                                             unsigned DwordRegIndex,
                                             unsigned RegWidth) {
   // Symbols are only defined for GCN targets
-  if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
     return true;
 
   auto SymbolName = getGprCountSymbolName(RegKind);
@@ -2447,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
 }
 
-bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
-
-  const unsigned Opc = Inst.getOpcode();
-  const MCInstrDesc &Desc = MII.get(Opc);
-
-  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
-    return true;
-
-  int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
-  assert(Idx != -1);
-
-  bool R128 = (Inst.getOperand(Idx).getImm() != 0);
-
-  return !R128 || hasMIMG_R128();
-}
-
 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
 
   const unsigned Opc = Inst.getOpcode();
@@ -2497,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
       "integer clamping is not supported on this GPU");
     return false;
   }
-  if (!validateMIMGR128(Inst)) {
-    Error(IDLoc,
-      "r128 modifier is not supported on this GPU");
-    return false;
-  }
   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
   if (!validateMIMGD16(Inst)) {
     Error(IDLoc,
@@ -2661,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
     unsigned &SGPRBlocks) {
   // TODO(scott.linder): These calculations are duplicated from
   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
-  IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+  IsaVersion Version = getIsaVersion(getSTI().getCPU());
 
   unsigned NumVGPRs = NextFreeVGPR;
   unsigned NumSGPRs = NextFreeSGPR;
-  unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+  unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
 
   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
       NumSGPRs > MaxAddressableNumSGPRs)
     return OutOfRangeError(SGPRRange);
 
   NumSGPRs +=
-      IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+      IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
 
   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
       NumSGPRs > MaxAddressableNumSGPRs)
@@ -2681,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
   if (Features.test(FeatureSGPRInitBug))
     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
 
-  VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
-  SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+  VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
+  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
 
   return false;
 }
@@ -2702,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
 
   StringSet<> Seen;
 
-  IsaInfo::IsaVersion IVersion =
-      IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
 
   SMRange VGPRRange;
   uint64_t NextFreeVGPR = 0;
@@ -2962,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
   // If this directive has no arguments, then use the ISA version for the
   // targeted GPU.
   if (getLexer().is(AsmToken::EndOfStatement)) {
-    AMDGPU::IsaInfo::IsaVersion ISA =
-        AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
                                                       ISA.Stepping,
                                                       "AMD", "AMDGPU");
@@ -3025,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
 
 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
   amd_kernel_code_t Header;
-  AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
+  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
 
   while (true) {
     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
@@ -3091,9 +3065,18 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
 }
 
 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
+  const char *AssemblerDirectiveBegin;
+  const char *AssemblerDirectiveEnd;
+  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
+      AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
+          ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
+                            HSAMD::V3::AssemblerDirectiveEnd)
+          : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
+                            HSAMD::AssemblerDirectiveEnd);
+
   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
     return Error(getParser().getTok().getLoc(),
-                 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
+                 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
                  "not available on non-amdhsa OSes")).str());
   }
 
@@ -3111,7 +3094,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
 
     if (getLexer().is(AsmToken::Identifier)) {
       StringRef ID = getLexer().getTok().getIdentifier();
-      if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
+      if (ID == AssemblerDirectiveEnd) {
         Lex();
         FoundEnd = true;
         break;
@@ -3133,8 +3116,13 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
 
   YamlStream.flush();
 
-  if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
-    return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+    if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+  } else {
+    if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
+      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+  }
 
   return false;
 }
@@ -3171,6 +3159,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
 
     if (IDVal == ".amdhsa_kernel")
       return ParseDirectiveAMDHSAKernel();
+
+    // TODO: Restructure/combine with PAL metadata directive.
+    if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
+      return ParseDirectiveHSAMetadata();
   } else {
     if (IDVal == ".hsa_code_object_version")
       return ParseDirectiveHSACodeObjectVersion();
@@ -3186,10 +3178,10 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
 
     if (IDVal == ".amd_amdgpu_isa")
       return ParseDirectiveISAVersion();
-  }
 
-  if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
-    return ParseDirectiveHSAMetadata();
+    if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
+      return ParseDirectiveHSAMetadata();
+  }
 
   if (IDVal == PALMD::AssemblerDirective)
     return ParseDirectivePALMetadata();
@@ -3465,6 +3457,10 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
       case AsmToken::Identifier: {
         StringRef Tok = Parser.getTok().getString();
         if (Tok == Name) {
+          if (Tok == "r128" && isGFX9())
+            Error(S, "r128 modifier is not supported on this GPU");
+          if (Tok == "a16" && !isGFX9())
+            Error(S, "a16 modifier is not supported on this GPU");
           Bit = 1;
           Parser.Lex();
         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
@@ -3522,6 +3518,53 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
   return MatchOperand_Success;
 }
 
+// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
+// values to live in a joint format operand in the MCInst encoding.
+OperandMatchResultTy
+AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  int64_t Dfmt = 0, Nfmt = 0;
+  // dfmt and nfmt can appear in either order, and each is optional.
+  bool GotDfmt = false, GotNfmt = false;
+  while (!GotDfmt || !GotNfmt) {
+    if (!GotDfmt) {
+      auto Res = parseIntWithPrefix("dfmt", Dfmt);
+      if (Res != MatchOperand_NoMatch) {
+        if (Res != MatchOperand_Success)
+          return Res;
+        if (Dfmt >= 16) {
+          Error(Parser.getTok().getLoc(), "out of range dfmt");
+          return MatchOperand_ParseFail;
+        }
+        GotDfmt = true;
+        Parser.Lex();
+        continue;
+      }
+    }
+    if (!GotNfmt) {
+      auto Res = parseIntWithPrefix("nfmt", Nfmt);
+      if (Res != MatchOperand_NoMatch) {
+        if (Res != MatchOperand_Success)
+          return Res;
+        if (Nfmt >= 8) {
+          Error(Parser.getTok().getLoc(), "out of range nfmt");
+          return MatchOperand_ParseFail;
+        }
+        GotNfmt = true;
+        Parser.Lex();
+        continue;
+      }
+    }
+    break;
+  }
+  if (!GotDfmt && !GotNfmt)
+    return MatchOperand_NoMatch;
+  auto Format = Dfmt | Nfmt << 4;
+  Operands.push_back(
+      AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
+  return MatchOperand_Success;
+}
+
 //===----------------------------------------------------------------------===//
 // ds
 //===----------------------------------------------------------------------===//
@@ -3652,12 +3695,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
 
 static bool
 encodeCnt(
-  const AMDGPU::IsaInfo::IsaVersion ISA,
+  const AMDGPU::IsaVersion ISA,
   int64_t &IntVal,
   int64_t CntVal,
   bool Saturate,
-  unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
-  unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
+  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
+  unsigned (*decode)(const IsaVersion &Version, unsigned))
 {
   bool Failed = false;
 
@@ -3688,8 +3731,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
   if (getParser().parseAbsoluteExpression(CntVal))
     return true;
 
-  AMDGPU::IsaInfo::IsaVersion ISA =
-      AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
 
   bool Failed = true;
   bool Sat = CntName.endswith("_sat");
@@ -3724,8 +3766,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
 
 OperandMatchResultTy
 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
-  AMDGPU::IsaInfo::IsaVersion ISA =
-      AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   int64_t Waitcnt = getWaitcntBitMask(ISA);
   SMLoc S = Parser.getTok().getLoc();
 
@@ -4617,8 +4658,7 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
 
   addOptionalImmOperand(Inst, Operands, OptionalIdx,
                         AMDGPUOperand::ImmTyOffset);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
@@ -4661,7 +4701,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -4761,8 +4801,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
-  {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
-  {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
+  {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4772,7 +4811,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
-  {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
+  {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
+  {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
@@ -4844,6 +4884,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
                Op.Type == AMDGPUOperand::ImmTyNegHi) {
       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
                                         Op.ConvertResult);
+    } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
+      res = parseDfmtNfmt(Operands);
     } else {
       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
     }
@@ -5251,12 +5293,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   }
 
-  // All DPP instructions with at least one source operand have a fake "old"
-  // source at the beginning that's tied to the dst operand. Handle it here.
-  if (Desc.getNumOperands() >= 2)
-    Inst.addOperand(Inst.getOperand(0));
-
   for (unsigned E = Operands.size(); I != E; ++I) {
+    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
+                                            MCOI::TIED_TO);
+    if (TiedTo != -1) {
+      assert((unsigned)TiedTo < Inst.getNumOperands());
+      // handle tied old or src2 for MAC instructions
+      Inst.addOperand(Inst.getOperand(TiedTo));
+    }
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     // Add the register arguments
     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
index b87c47a6b9ee..51c2abeac2ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -17,14 +17,12 @@ def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [],
 def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
 def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
 def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
-def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
-def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
 
 class MubufLoad <SDPatternOperator op> : PatFrag <
   (ops node:$ptr), (op node:$ptr), [{
   auto const AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUASI.GLOBAL_ADDRESS ||
-         AS == AMDGPUASI.CONSTANT_ADDRESS;
+  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
 }]>;
 
 def mubuf_load          : MubufLoad <load>;
@@ -100,15 +98,11 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
   bits<1> has_vaddr   = 1;
   bits<1> has_glc     = 1;
   bits<1> glc_value   = 0; // the value for glc if no such operand
-  bits<4> dfmt_value  = 1; // the value for dfmt if no such operand
-  bits<3> nfmt_value  = 0; // the value for nfmt if no such operand
   bits<1> has_srsrc   = 1;
   bits<1> has_soffset = 1;
   bits<1> has_offset  = 1;
   bits<1> has_slc     = 1;
   bits<1> has_tfe     = 1;
-  bits<1> has_dfmt    = 1;
-  bits<1> has_nfmt    = 1;
 }
 
 class MTBUF_Real <MTBUF_Pseudo ps> :
@@ -126,14 +120,16 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
 
   bits<12> offset;
   bits<1>  glc;
-  bits<4>  dfmt;
-  bits<3>  nfmt;
+  bits<7>  format;
   bits<8>  vaddr;
   bits<8>  vdata;
   bits<7>  srsrc;
   bits<1>  slc;
   bits<1>  tfe;
   bits<8>  soffset;
+
+  bits<4> dfmt = format{3-0};
+  bits<3> nfmt = format{6-4};
 }
 
 class getMTBUFInsDA<list<RegisterClass> vdataList,
@@ -142,16 +138,16 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
   RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
   dag InsNoData = !if(!empty(vaddrList),
     (ins                    SReg_128:$srsrc, SCSrc_b32:$soffset,
-         offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe),
+         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
     (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
-         offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe)
+         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
   );
   dag InsData = !if(!empty(vaddrList),
     (ins vdataClass:$vdata,                    SReg_128:$srsrc,
-         SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+         SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
          SLC:$slc, TFE:$tfe),
     (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
-         SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+         SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
          SLC:$slc, TFE:$tfe)
   );
   dag ret = !if(!empty(vdataList), InsNoData, InsData);
@@ -169,15 +165,15 @@ class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
 
 class getMTBUFAsmOps<int addrKind> {
   string Pfx =
-    !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
+    !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
     !if(!eq(addrKind, BUFAddrKind.OffEn),
-            "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
+            "$vaddr, $srsrc, $format, $soffset offen",
     !if(!eq(addrKind, BUFAddrKind.IdxEn),
-            "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
+            "$vaddr, $srsrc, $format, $soffset idxen",
     !if(!eq(addrKind, BUFAddrKind.BothEn),
-            "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
+            "$vaddr, $srsrc, $format, $soffset idxen offen",
     !if(!eq(addrKind, BUFAddrKind.Addr64),
-            "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
+            "$vaddr, $srsrc, $format, $soffset addr64",
     "")))));
   string ret = Pfx # "$offset";
 }
@@ -217,14 +213,14 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
 
   def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(set load_vt:$vdata,
-     (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
-                      i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+     (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
+                      i1:$glc, i1:$slc, i1:$tfe)))]>,
     MTBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(set load_vt:$vdata,
      (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
-                      i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+                      i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
     MTBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -263,13 +259,13 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
 
   def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                                       i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+                                       i16:$offset, i8:$format, i1:$glc,
                                        i1:$slc, i1:$tfe))]>,
     MTBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                       i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+                                       i16:$offset, i8:$format, i1:$glc,
                                        i1:$slc, i1:$tfe))]>,
     MTBUFAddr64Table<1, NAME>;
 
@@ -290,6 +286,12 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
 // MUBUF classes
 //===----------------------------------------------------------------------===//
 
+class MUBUFGetBaseOpcode<string Op> {
+  string ret = !subst("DWORDX2", "DWORD",
+    !subst("DWORDX3", "DWORD",
+    !subst("DWORDX4", "DWORD", Op)));
+}
+
 class MUBUF_Pseudo <string opName, dag outs, dag ins,
                     string asmOps, list<dag> pattern=[]> :
   InstSI<outs, ins, "", pattern>,
@@ -303,6 +305,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
   string Mnemonic = opName;
   string AsmOperands = asmOps;
 
+  Instruction Opcode = !cast<Instruction>(NAME);
+  Instruction BaseOpcode = !cast<Instruction>(MUBUFGetBaseOpcode<NAME>.ret);
+
   let VM_CNT = 1;
   let EXP_CNT = 1;
   let MUBUF = 1;
@@ -325,6 +330,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
   bits<1> has_offset  = 1;
   bits<1> has_slc     = 1;
   bits<1> has_tfe     = 1;
+  bits<4> dwords      = 0;
 }
 
 class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
@@ -398,6 +404,16 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
              );
 }
 
+class getMUBUFDwords<RegisterClass regClass> {
+  string regClassAsInt = !cast<string>(regClass);
+  int ret =
+    !if(!eq(regClassAsInt, !cast<string>(VGPR_32)), 1,
+    !if(!eq(regClassAsInt, !cast<string>(VReg_64)), 2,
+    !if(!eq(regClassAsInt, !cast<string>(VReg_96)), 3,
+    !if(!eq(regClassAsInt, !cast<string>(VReg_128)), 4,
+    0))));
+}
+
 class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[], bit isLds = 0> {
   dag ret =
     !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList, [], isLds>.ret,
@@ -458,6 +474,7 @@ class MUBUF_Load_Pseudo <string opName,
   let Uses = !if(isLds, [EXEC, M0], [EXEC]);
   let has_tfe = !if(isLds, 0, 1);
   let lds = isLds;
+  let dwords = getMUBUFDwords<vdataClass>.ret;
 }
 
 // FIXME: tfe can't be an operand because it requires a separate
@@ -521,6 +538,7 @@ class MUBUF_Store_Pseudo <string opName,
   let mayLoad = 0;
   let mayStore = 1;
   let maybeAtomic = 1;
+  let dwords = getMUBUFDwords<vdataClass>.ret;
 }
 
 multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
@@ -660,11 +678,10 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
   let AsmMatchConverter = "cvtMubufAtomicReturn";
 }
 
-multiclass MUBUF_Pseudo_Atomics <string opName,
-                                 RegisterClass vdataClass,
-                                 ValueType vdataType,
-                                 SDPatternOperator atomic> {
-
+multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
+                                        RegisterClass vdataClass,
+                                        ValueType vdataType,
+                                        SDPatternOperator atomic> {
   def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
                 MUBUFAddr64Table <0, NAME>;
   def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
@@ -672,7 +689,12 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
   def _OFFEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>;
   def _IDXEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>;
   def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+}
 
+multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
+                                     RegisterClass vdataClass,
+                                     ValueType vdataType,
+                                     SDPatternOperator atomic> {
   def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(set vdataType:$vdata,
      (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
@@ -690,6 +712,13 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
   def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
 }
 
+multiclass MUBUF_Pseudo_Atomics <string opName,
+                                 RegisterClass vdataClass,
+                                 ValueType vdataType,
+                                 SDPatternOperator atomic> :
+  MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType, atomic>,
+  MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
+
 
 //===----------------------------------------------------------------------===//
 // MUBUF Instructions
@@ -1030,6 +1059,14 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
 // MUBUF Patterns
 //===----------------------------------------------------------------------===//
 
+def extract_glc : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_slc : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // buffer_load/store_format patterns
 //===----------------------------------------------------------------------===//
@@ -1037,119 +1074,129 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
 multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
                                   string opcode> {
   def : GCNPat<
-    (vt (name v4i32:$rsrc, 0,
-              (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-              imm:$glc, imm:$slc)),
+    (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, i32:$vindex,
-              (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-              imm:$glc, imm:$slc)),
-    (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
+    (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, 0)),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, 0,
-              (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-              imm:$glc, imm:$slc)),
-    (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
+    (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, imm)),
+    (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, i32:$vindex,
-              (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-              imm:$glc, imm:$slc)),
+    (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, imm)),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 }
 
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
 
 let SubtargetPredicate = HasUnpackedD16VMem in {
   defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+  defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
   defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
   defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
 } // End HasUnpackedD16VMem.
 
 let SubtargetPredicate = HasPackedD16VMem in {
   defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
+  defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
   defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
+  defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
   defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
+  defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
 } // End HasPackedD16VMem.
 
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
 
 multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
                                    string opcode> {
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$glc, imm:$slc),
+    (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
-                                    (as_i1imm $glc), (as_i1imm $slc), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$glc, imm:$slc),
-    (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
-                                   (as_i16imm $offset), (as_i1imm $glc),
-                                   (as_i1imm $slc), 0)
+    (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, 0),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
+      (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$glc, imm:$slc),
-    (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
-                                   (as_i16imm $offset), (as_i1imm $glc),
-                                   (as_i1imm $slc), 0)
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, imm),
+    (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
+      (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$glc, imm:$slc),
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
+              imm:$cachepolicy, imm),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 }
 
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
 
 let SubtargetPredicate = HasUnpackedD16VMem in {
   defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+  defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
   defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
   defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
 } // End HasUnpackedD16VMem.
 
 let SubtargetPredicate = HasPackedD16VMem in {
   defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
+  defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
   defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
+  defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
   defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
+  defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i16, "BUFFER_STORE_FORMAT_D16_XYZW">;
 } // End HasPackedD16VMem.
 
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
 
 //===----------------------------------------------------------------------===//
 // buffer_atomic patterns
@@ -1158,36 +1205,36 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
 multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
   def : GCNPat<
     (name i32:$vdata_in, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$slc),
+          0, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
-                                        (as_i16imm $offset), (as_i1imm $slc))
+                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$slc),
+          0, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, imm),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
-                                       (as_i16imm $offset), (as_i1imm $slc))
+                                       (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (name i32:$vdata_in, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$slc),
+          i32:$voffset, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
-                                       (as_i16imm $offset), (as_i1imm $slc))
+                                       (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$slc),
+          i32:$voffset, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, imm),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
       $vdata_in,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc))
+      $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 }
 
@@ -1205,49 +1252,49 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, 0,
-      (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-      imm:$slc),
+      0, i32:$soffset, imm:$offset,
+      imm:$cachepolicy, 0),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+      $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
     sub0)
 >;
 
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
-      (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-      imm:$slc),
+      0, i32:$soffset, imm:$offset,
+      imm:$cachepolicy, imm),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+      $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
     sub0)
 >;
 
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, 0,
-      (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-      imm:$slc),
+      i32:$voffset, i32:$soffset, imm:$offset,
+      imm:$cachepolicy, 0),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+      $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
     sub0)
 >;
 
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
-      (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-      imm:$slc),
+      i32:$voffset, i32:$soffset, imm:$offset,
+      imm:$cachepolicy, imm),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+      $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
     sub0)
 >;
 
@@ -1397,54 +1444,6 @@ defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D
 defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>;
 defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>;
 }
-
-// BUFFER_LOAD_DWORD*, addr64=0
-multiclass MUBUF_Load_Dword <ValueType vt,
-                             MUBUF_Pseudo offset,
-                             MUBUF_Pseudo offen,
-                             MUBUF_Pseudo idxen,
-                             MUBUF_Pseudo bothen> {
-
-  def : GCNPat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
-                                  imm:$offset, 0, 0, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
-            (as_i1imm $slc), (as_i1imm $tfe))
-  >;
-
-  def : GCNPat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
-                                  imm:$offset, 1, 0, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
-           (as_i1imm $tfe))
-  >;
-
-  def : GCNPat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
-                                  imm:$offset, 0, 1, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
-           (as_i1imm $slc), (as_i1imm $tfe))
-  >;
-
-  def : GCNPat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
-                                  imm:$offset, 1, 1, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
-            (as_i1imm $tfe))
-  >;
-}
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
-                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
-defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
-                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
-defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
-                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-
 multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
                                       ValueType vt, PatFrag atomic_st> {
   // Store follows atomic op convention so address is forst
@@ -1524,32 +1523,36 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
                                   string opcode> {
   def : GCNPat<
     (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
-              imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+              imm:$format, imm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
-      (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+      (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
-              imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+              imm:$format, imm:$cachepolicy, imm)),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
-      (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+      (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+              imm:$format, imm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
-      (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+      (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+              imm:$format, imm:$cachepolicy, imm)),
     (!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
-      (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+      (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 }
 
@@ -1576,39 +1579,36 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
                                    string opcode> {
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
-          imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+          imm:$format, imm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
-                                (as_i16imm $offset), (as_i8imm $dfmt),
-                                (as_i8imm $nfmt), (as_i1imm $glc),
-                                (as_i1imm $slc), 0)
+      (as_i16imm $offset), (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
-          imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+          imm:$format, imm:$cachepolicy, imm),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
-                                   (as_i16imm $offset), (as_i8imm $dfmt),
-                                   (as_i8imm $nfmt), (as_i1imm $glc),
-                                   (as_i1imm $slc), 0)
+      (as_i16imm $offset), (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+          imm:$format, imm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
-                                   (as_i16imm $offset), (as_i8imm $dfmt),
-                                   (as_i8imm $nfmt), (as_i1imm $glc),
-                                   (as_i1imm $slc), 0)
+      (as_i16imm $offset), (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
-          imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+          imm:$offset, imm:$format, imm:$cachepolicy, imm),
     (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset),
-      (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+      $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
   >;
 }
 
@@ -1781,8 +1781,8 @@ class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
   let Inst{15}    = ps.addr64;
   let Inst{18-16} = op;
-  let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
-  let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
   let Inst{31-26} = 0x3a; //encoding
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1811,6 +1811,7 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
 
 //===----------------------------------------------------------------------===//
 // CI
+// MTBUF - GFX6, GFX7.
 //===----------------------------------------------------------------------===//
 
 class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
@@ -2013,8 +2014,8 @@ class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
   let Inst{13}    = ps.idxen;
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
   let Inst{18-15} = op;
-  let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
-  let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
   let Inst{31-26} = 0x3a; //encoding
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2043,8 +2044,8 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
   let Inst{13}    = ps.idxen;
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
   let Inst{18-15} = op;
-  let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
-  let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
   let Inst{31-26} = 0x3a; //encoding
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2089,3 +2090,22 @@ let SubtargetPredicate = HasPackedD16VMem in {
   defm TBUFFER_STORE_FORMAT_D16_XYZ  : MTBUF_Real_AllAddr_vi <0x0e>;
   defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>;
 } // End HasUnpackedD16VMem.
+
+def MUBUFInfoTable : GenericTable {
+  let FilterClass = "MUBUF_Pseudo";
+  let CppTypeName = "MUBUFInfo";
+  let Fields = ["Opcode", "BaseOpcode", "dwords", "has_vaddr", "has_srsrc", "has_soffset"];
+
+  let PrimaryKey = ["Opcode"];
+  let PrimaryKeyName = "getMUBUFOpcodeHelper";
+}
+
+def getMUBUFInfoFromOpcode : SearchIndex {
+  let Table = MUBUFInfoTable;
+  let Key = ["Opcode"];
+}
+
+def getMUBUFInfoFromBaseOpcodeAndDwords : SearchIndex {
+  let Table = MUBUFInfoTable;
+  let Key = ["BaseOpcode", "dwords"];
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
index cdc6ab9412e6..31d2ebef481d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -728,7 +728,9 @@ class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
               (i1 0))
 >;
 
-let OtherPredicates = [LDSRequiresM0Init] in {
+// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
+// related to bounds checking.
+let OtherPredicates = [LDSRequiresM0Init, isCIVI] in {
 def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
 def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3ef473b7fd96..44040d352e6a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -121,6 +121,11 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
   let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
 }
 
+class GlobalSaddrTable <bit is_saddr, string Name = ""> {
+  bit IsSaddr = is_saddr;
+  string SaddrOp = Name;
+}
+
 // TODO: Is exec allowed for saddr? The disabled value 0x7f is the
 // same encoding value as exec_hi, so it isn't possible to use that if
 // saddr is 32-bit (which isn't handled here yet).
@@ -171,15 +176,19 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
 
 multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
   let is_flat_global = 1 in {
-    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>;
-    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>;
+    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
+      GlobalSaddrTable<0, opName>;
+    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>,
+      GlobalSaddrTable<1, opName>;
   }
 }
 
 multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
   let is_flat_global = 1 in {
-    def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
-    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>;
+    def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
+      GlobalSaddrTable<0, opName>;
+    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>,
+      GlobalSaddrTable<1, opName>;
   }
 }
 
@@ -262,6 +271,7 @@ multiclass FLAT_Atomic_Pseudo<
     (outs),
     (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
     " $vaddr, $vdata$offset$slc">,
+    GlobalSaddrTable<0, opName>,
     AtomicNoRet <opName, 0> {
     let PseudoInstr = NAME;
   }
@@ -272,10 +282,11 @@ multiclass FLAT_Atomic_Pseudo<
     " $vdst, $vaddr, $vdata$offset glc$slc",
     [(set vt:$vdst,
       (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+       GlobalSaddrTable<0, opName#"_rtn">,
        AtomicNoRet <opName, 1>;
 }
 
-multiclass FLAT_Global_Atomic_Pseudo<
+multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
   string opName,
   RegisterClass vdst_rc,
   ValueType vt,
@@ -287,35 +298,48 @@ multiclass FLAT_Global_Atomic_Pseudo<
     (outs),
     (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
     " $vaddr, $vdata, off$offset$slc">,
+    GlobalSaddrTable<0, opName>,
     AtomicNoRet <opName, 0> {
     let has_saddr = 1;
     let PseudoInstr = NAME;
   }
 
-  def _RTN : FLAT_AtomicRet_Pseudo <opName,
-    (outs vdst_rc:$vdst),
-      (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
-    " $vdst, $vaddr, $vdata, off$offset glc$slc",
-    [(set vt:$vdst,
-      (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
-      AtomicNoRet <opName, 1> {
-    let has_saddr = 1;
-  }
-
   def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
     (outs),
     (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
     " $vaddr, $vdata, $saddr$offset$slc">,
+    GlobalSaddrTable<1, opName>,
     AtomicNoRet <opName#"_saddr", 0> {
     let has_saddr = 1;
     let enabled_saddr = 1;
     let PseudoInstr = NAME#"_SADDR";
   }
+}
+
+multiclass FLAT_Global_Atomic_Pseudo_RTN<
+  string opName,
+  RegisterClass vdst_rc,
+  ValueType vt,
+  SDPatternOperator atomic = null_frag,
+  ValueType data_vt = vt,
+  RegisterClass data_rc = vdst_rc> {
+
+  def _RTN : FLAT_AtomicRet_Pseudo <opName,
+    (outs vdst_rc:$vdst),
+      (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+    " $vdst, $vaddr, $vdata, off$offset glc$slc",
+    [(set vt:$vdst,
+      (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
+      GlobalSaddrTable<0, opName#"_rtn">,
+      AtomicNoRet <opName, 1> {
+    let has_saddr = 1;
+  }
 
   def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
     (outs vdst_rc:$vdst),
       (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
     " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
+    GlobalSaddrTable<1, opName#"_rtn">,
     AtomicNoRet <opName#"_saddr", 1> {
      let has_saddr = 1;
      let enabled_saddr = 1;
@@ -323,10 +347,20 @@ multiclass FLAT_Global_Atomic_Pseudo<
   }
 }
 
+multiclass FLAT_Global_Atomic_Pseudo<
+  string opName,
+  RegisterClass vdst_rc,
+  ValueType vt,
+  SDPatternOperator atomic = null_frag,
+  ValueType data_vt = vt,
+  RegisterClass data_rc = vdst_rc> :
+    FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
+    FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
+
 class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
   (ops node:$ptr, node:$value),
   (atomic_op node:$ptr, node:$value),
-  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
+  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
 >;
 
 def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
new file mode 100644
index 000000000000..56071d0d2374
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -0,0 +1,446 @@
+//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand.If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+//                            dpp_controls..., $bound_ctrl
+// $res = VALU $dpp_value, ...
+//
+// to
+//
+// $res = VALU_DPP $folded_old, $vgpr_to_be_read_from_other_lane, ...,
+//                 dpp_controls..., $folded_bound_ctrl
+//
+// Combining rules :
+//
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_ZERO
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_OFF
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+//
+// ->$folded_old = folded value, $folded_bound_ctrl = DPP_BOUND_OFF
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gcn-dpp-combine"
+
+STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
+
+namespace {
+
+class GCNDPPCombine : public MachineFunctionPass {
+  MachineRegisterInfo *MRI;
+  const SIInstrInfo *TII;
+
+  using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+
+  MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
+
+  RegSubRegPair foldOldOpnd(MachineInstr &OrigMI,
+                            RegSubRegPair OldOpndVGPR,
+                            MachineOperand &OldOpndValue) const;
+
+  MachineInstr *createDPPInst(MachineInstr &OrigMI,
+                              MachineInstr &MovMI,
+                              RegSubRegPair OldOpndVGPR,
+                              MachineOperand *OldOpnd,
+                              bool BoundCtrlZero) const;
+
+  MachineInstr *createDPPInst(MachineInstr &OrigMI,
+                              MachineInstr &MovMI,
+                              RegSubRegPair OldOpndVGPR,
+                              bool BoundCtrlZero) const;
+
+  bool hasNoImmOrEqual(MachineInstr &MI,
+                       unsigned OpndName,
+                       int64_t Value,
+                       int64_t Mask = -1) const;
+
+  bool combineDPPMov(MachineInstr &MI) const;
+
+public:
+  static char ID;
+
+  GCNDPPCombine() : MachineFunctionPass(ID) {
+    initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "GCN DPP Combine"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+
+char GCNDPPCombine::ID = 0;
+
+char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+
+FunctionPass *llvm::createGCNDPPCombinePass() {
+  return new GCNDPPCombine();
+}
+
+static int getDPPOp(unsigned Op) {
+  auto DPP32 = AMDGPU::getDPPOp32(Op);
+  if (DPP32 != -1)
+    return DPP32;
+
+  auto E32 = AMDGPU::getVOPe32(Op);
+  return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
+}
+
+// tracks the register operand definition and returns:
+//   1. immediate operand used to initialize the register if found
+//   2. nullptr if the register operand is undef
+//   3. the operand itself otherwise
+MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
+  auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
+  if (!Def)
+    return nullptr;
+
+  switch(Def->getOpcode()) {
+  default: break;
+  case AMDGPU::IMPLICIT_DEF:
+    return nullptr;
+  case AMDGPU::COPY:
+  case AMDGPU::V_MOV_B32_e32: {
+    auto &Op1 = Def->getOperand(1);
+    if (Op1.isImm())
+      return &Op1;
+    break;
+  }
+  }
+  return &OldOpnd;
+}
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+                                           MachineInstr &MovMI,
+                                           RegSubRegPair OldOpndVGPR,
+                                           bool BoundCtrlZero) const {
+  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+  assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
+         TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
+
+  auto OrigOp = OrigMI.getOpcode();
+  auto DPPOp = getDPPOp(OrigOp);
+  if (DPPOp == -1) {
+    LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
+    return nullptr;
+  }
+
+  auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
+                         OrigMI.getDebugLoc(), TII->get(DPPOp));
+  bool Fail = false;
+  do {
+    auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
+    assert(Dst);
+    DPPInst.add(*Dst);
+    int NumOperands = 1;
+
+    const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
+    if (OldIdx != -1) {
+      assert(OldIdx == NumOperands);
+      assert(isOfRegClass(OldOpndVGPR, AMDGPU::VGPR_32RegClass, *MRI));
+      DPPInst.addReg(OldOpndVGPR.Reg, 0, OldOpndVGPR.SubReg);
+      ++NumOperands;
+    }
+
+    if (auto *Mod0 = TII->getNamedOperand(OrigMI,
+                                          AMDGPU::OpName::src0_modifiers)) {
+      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+                                          AMDGPU::OpName::src0_modifiers));
+      assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+      DPPInst.addImm(Mod0->getImm());
+      ++NumOperands;
+    }
+    auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
+    assert(Src0);
+    if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
+      LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
+      Fail = true;
+      break;
+    }
+    DPPInst.add(*Src0);
+    ++NumOperands;
+
+    if (auto *Mod1 = TII->getNamedOperand(OrigMI,
+                                          AMDGPU::OpName::src1_modifiers)) {
+      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+                                          AMDGPU::OpName::src1_modifiers));
+      assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+      DPPInst.addImm(Mod1->getImm());
+      ++NumOperands;
+    }
+    if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+      if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
+        LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
+        Fail = true;
+        break;
+      }
+      DPPInst.add(*Src1);
+      ++NumOperands;
+    }
+
+    if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
+      if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
+        LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
+        Fail = true;
+        break;
+      }
+      DPPInst.add(*Src2);
+    }
+
+    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
+    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
+    DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
+    DPPInst.addImm(BoundCtrlZero ? 1 : 0);
+  } while (false);
+
+  if (Fail) {
+    DPPInst.getInstr()->eraseFromParent();
+    return nullptr;
+  }
+  LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
+  return DPPInst.getInstr();
+}
+
+GCNDPPCombine::RegSubRegPair
+GCNDPPCombine::foldOldOpnd(MachineInstr &OrigMI,
+                           RegSubRegPair OldOpndVGPR,
+                           MachineOperand &OldOpndValue) const {
+  assert(OldOpndValue.isImm());
+  switch (OrigMI.getOpcode()) {
+  default: break;
+  case AMDGPU::V_MAX_U32_e32:
+    if (OldOpndValue.getImm() == std::numeric_limits<uint32_t>::max())
+      return OldOpndVGPR;
+    break;
+  case AMDGPU::V_MAX_I32_e32:
+    if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::max())
+      return OldOpndVGPR;
+    break;
+  case AMDGPU::V_MIN_I32_e32:
+    if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::min())
+      return OldOpndVGPR;
+    break;
+
+  case AMDGPU::V_MUL_I32_I24_e32:
+  case AMDGPU::V_MUL_U32_U24_e32:
+    if (OldOpndValue.getImm() == 1) {
+      auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+      assert(Src1 && Src1->isReg());
+      return getRegSubRegPair(*Src1);
+    }
+    break;
+  }
+  return RegSubRegPair();
+}
+
+// Cases to combine:
+//  $bound_ctrl is DPP_BOUND_ZERO, $old is any
+//  $bound_ctrl is DPP_BOUND_OFF, $old is 0
+//  -> $old = undef, $bound_ctrl = DPP_BOUND_ZERO
+
+//  $bound_ctrl is DPP_BOUND_OFF, $old is undef
+//  -> $old = undef, $bound_ctrl = DPP_BOUND_OFF
+
+//  $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+//  -> $old = folded value, $bound_ctrl = DPP_BOUND_OFF
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+                                           MachineInstr &MovMI,
+                                           RegSubRegPair OldOpndVGPR,
+                                           MachineOperand *OldOpndValue,
+                                           bool BoundCtrlZero) const {
+  assert(OldOpndVGPR.Reg);
+  if (!BoundCtrlZero && OldOpndValue) {
+    assert(OldOpndValue->isImm());
+    OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
+    if (!OldOpndVGPR.Reg) {
+      LLVM_DEBUG(dbgs() << "  failed: old immediate cannot be folded\n");
+      return nullptr;
+    }
+  }
+  return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
+}
+
+// returns true if MI doesn't have OpndName immediate operand or the
+// operand has Value
+bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
+                                    int64_t Value, int64_t Mask) const {
+  auto *Imm = TII->getNamedOperand(MI, OpndName);
+  if (!Imm)
+    return true;
+
+  assert(Imm->isImm());
+  return (Imm->getImm() & Mask) == Value;
+}
+
+bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
+  assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+  auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
+  assert(BCZOpnd && BCZOpnd->isImm());
+  bool BoundCtrlZero = 0 != BCZOpnd->getImm();
+
+  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+
+  auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
+  assert(OldOpnd && OldOpnd->isReg());
+  auto OldOpndVGPR = getRegSubRegPair(*OldOpnd);
+  auto *OldOpndValue = getOldOpndValue(*OldOpnd);
+  assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
+  if (OldOpndValue) {
+    if (BoundCtrlZero) {
+      OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef, ignore old opnd
+      OldOpndValue = nullptr;
+    } else {
+      if (!OldOpndValue->isImm()) {
+        LLVM_DEBUG(dbgs() << "  failed: old operand isn't an imm or undef\n");
+        return false;
+      }
+      if (OldOpndValue->getImm() == 0) {
+        OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef
+        OldOpndValue = nullptr;
+        BoundCtrlZero = true;
+      }
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "  old=";
+    if (!OldOpndValue)
+      dbgs() << "undef";
+    else
+      dbgs() << OldOpndValue->getImm();
+    dbgs() << ", bound_ctrl=" << BoundCtrlZero << '\n');
+
+  std::vector<MachineInstr*> OrigMIs, DPPMIs;
+  if (!OldOpndVGPR.Reg) { // OldOpndVGPR = undef
+    OldOpndVGPR = RegSubRegPair(
+      MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
+    auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
+                             TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
+    DPPMIs.push_back(UndefInst.getInstr());
+  }
+
+  OrigMIs.push_back(&MovMI);
+  bool Rollback = true;
+  for (auto &Use : MRI->use_nodbg_operands(
+       TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg())) {
+    Rollback = true;
+
+    auto &OrigMI = *Use.getParent();
+    auto OrigOp = OrigMI.getOpcode();
+    if (TII->isVOP3(OrigOp)) {
+      if (!TII->hasVALU32BitEncoding(OrigOp)) {
+        LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
+        break;
+      }
+      // check if other than abs|neg modifiers are set (opsel for example)
+      const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
+      if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
+          !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
+          !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
+          !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
+        LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
+        break;
+      }
+    } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
+      LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
+      break;
+    }
+
+    LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
+    if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+      if (auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
+                                        OldOpndValue, BoundCtrlZero)) {
+        DPPMIs.push_back(DPPInst);
+        Rollback = false;
+      }
+    } else if (OrigMI.isCommutable() &&
+               &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+      auto *BB = OrigMI.getParent();
+      auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
+      BB->insert(OrigMI, NewMI);
+      if (TII->commuteInstruction(*NewMI)) {
+        LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
+        if (auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
+                                          OldOpndValue, BoundCtrlZero)) {
+          DPPMIs.push_back(DPPInst);
+          Rollback = false;
+        }
+      } else
+        LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
+      NewMI->eraseFromParent();
+    } else
+      LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
+    if (Rollback)
+      break;
+    OrigMIs.push_back(&OrigMI);
+  }
+
+  for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
+    MI->eraseFromParent();
+
+  return !Rollback;
+}
+
+bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+  auto &ST = MF.getSubtarget<GCNSubtarget>();
+  if (!ST.hasDPP() || skipFunction(MF.getFunction()))
+    return false;
+
+  MRI = &MF.getRegInfo();
+  TII = ST.getInstrInfo();
+
+  assert(MRI->isSSA() && "Must be run on SSA");
+
+  bool Changed = false;
+  for (auto &MBB : MF) {
+    for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
+      auto &MI = *I++;
+      if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
+        Changed = true;
+        ++NumDPPMovsCombined;
+      }
+    }
+  }
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index f236f10ba75a..c6396de89c4f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -215,6 +215,14 @@ void GCNHazardRecognizer::AdvanceCycle() {
   if (!CurrCycleInstr)
     return;
 
+  // Do not track non-instructions which do not affect the wait states.
+  // If included, these instructions can lead to buffer overflow such that
+  // detectable hazards are missed.
+  if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
+    return;
+  else if (CurrCycleInstr->isDebugInstr())
+    return;
+
   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
 
   // Keep track of emitted instructions
@@ -253,8 +261,7 @@ int GCNHazardRecognizer::getWaitStatesSince(
         return WaitStates;
 
       unsigned Opcode = MI->getOpcode();
-      if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF ||
-          Opcode == AMDGPU::INLINEASM)
+      if (Opcode == AMDGPU::INLINEASM)
         continue;
     }
     ++WaitStates;
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
index 651091d44136..d62dc8d86781 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -335,7 +335,7 @@ GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
     assert(C);
     AvailQueue.remove(*C);
     auto SU = C->SU;
-    LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
 
     advanceToCycle(SU->getHeight());
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 15366d66bd85..8e4cc391dc21 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -434,8 +434,7 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
 // Sort recorded regions by pressure - highest at the front
 void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
   const auto &ST = MF.getSubtarget<GCNSubtarget>();
-  llvm::sort(Regions.begin(), Regions.end(),
-    [&ST, TargetOcc](const Region *R1, const Region *R2) {
+  llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
     return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
   });
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 192d534bb9cf..ec6bcae33555 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -258,7 +258,7 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
     assert(C);
     RQ.remove(*C);
     auto SU = C->SU;
-    LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+    LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
 
     releaseSuccessors(SU, StepNo);
     Schedule.push_back(SU);
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
index d76acfa24f90..b8142a4e4ff8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -156,3 +156,8 @@ def : ProcessorModel<"gfx904", SIQuarterSpeedModel,
 def : ProcessorModel<"gfx906", SIQuarterSpeedModel,
   [FeatureISAVersion9_0_6]
 >;
+
+def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_9]
+>;
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index db908368a179..fab0f87dfcbe 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
   printNamedBit(MI, OpNo, O, "da");
 }
 
-void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "r128");
+  if (STI.hasFeature(AMDGPU::FeatureR128A16))
+    printNamedBit(MI, OpNo, O, "a16");
+  else
+    printNamedBit(MI, OpNo, O, "r128");
 }
 
 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
@@ -236,21 +239,12 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
     O << " vm";
 }
 
-void AMDGPUInstPrinter::printDFMT(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI,
-                                  raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << " dfmt:";
-    printU8ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printNFMT(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI,
-                                  raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << " nfmt:";
-    printU8ImmDecOperand(MI, OpNo, O);
+void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  if (unsigned Val = MI->getOperand(OpNo).getImm()) {
+    O << " dfmt:" << (Val & 15);
+    O << ", nfmt:" << (Val >> 4);
   }
 }
 
@@ -1161,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
-  AMDGPU::IsaInfo::IsaVersion ISA =
-      AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
+  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
 
   unsigned SImm16 = MI->getOperand(OpNo).getImm();
   unsigned Vmcnt, Expcnt, Lgkmcnt;
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 11a496a38b2c..0ba74ca0f3e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -80,7 +80,7 @@ private:
                   raw_ostream &O);
   void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                raw_ostream &O);
-  void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+  void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                  raw_ostream &O);
   void printLWE(const MCInst *MI, unsigned OpNo,
                 const MCSubtargetInfo &STI, raw_ostream &O);
@@ -90,10 +90,8 @@ private:
                      const MCSubtargetInfo &STI, raw_ostream &O);
   void printExpVM(const MCInst *MI, unsigned OpNo,
                   const MCSubtargetInfo &STI, raw_ostream &O);
-  void printDFMT(const MCInst *MI, unsigned OpNo,
-                 const MCSubtargetInfo &STI, raw_ostream &O);
-  void printNFMT(const MCInst *MI, unsigned OpNo,
-                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFORMAT(const MCInst *MI, unsigned OpNo,
+                   const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printRegOperand(unsigned RegNo, raw_ostream &O);
   void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 07bef9103c0d..c85a1ea5b054 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -46,11 +46,9 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
   if (const auto *SymA = Target.getSymA()) {
     // SCRATCH_RSRC_DWORD[01] is a special global variable that represents
     // the scratch buffer.
-    if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
+    if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0" ||
+        SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
       return ELF::R_AMDGPU_ABS32_LO;
-
-    if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
-      return ELF::R_AMDGPU_ABS32_HI;
   }
 
   switch (Target.getAccessVariant()) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6a41e3f650bc..c17fe126546c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -17,7 +17,9 @@
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
@@ -27,6 +29,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
 
 namespace llvm {
 #include "AMDGPUPTNote.h"
@@ -34,90 +37,13 @@ namespace llvm {
 
 using namespace llvm;
 using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
 
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetStreamer
 //===----------------------------------------------------------------------===//
 
-static const struct {
-  const char *Name;
-  unsigned Mach;
-} MachTable[] = {
-      // Radeon HD 2000/3000 Series (R600).
-      { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
-      { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
-      { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
-      { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
-      // Radeon HD 4000 Series (R700).
-      { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
-      { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
-      { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
-      // Radeon HD 5000 Series (Evergreen).
-      { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
-      { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
-      { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
-      { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
-      { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
-      // Radeon HD 6000 Series (Northern Islands).
-      { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
-      { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
-      { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
-      { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
-      // AMDGCN GFX6.
-      { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
-      { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
-      { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
-      { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
-      { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
-      { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
-      { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
-      // AMDGCN GFX7.
-      { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
-      { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
-      { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
-      { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
-      { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
-      { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
-      { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
-      { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
-      { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
-      { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
-      // AMDGCN GFX8.
-      { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
-      { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
-      { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
-      { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
-      { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
-      { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
-      { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
-      { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
-      { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
-      { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
-      { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
-      // AMDGCN GFX9.
-      { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
-      { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
-      { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
-      { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
-      // Not specified processor.
-      { nullptr, ELF::EF_AMDGPU_MACH_NONE }
-};
-
-unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
-  auto Entry = MachTable;
-  for (; Entry->Name && GPU != Entry->Name; ++Entry)
-    ;
-  return Entry->Mach;
-}
-
-const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
-  auto Entry = MachTable;
-  for (; Entry->Name && Mach != Entry->Mach; ++Entry)
-    ;
-  return Entry->Name;
-}
-
-bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
+bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
   HSAMD::Metadata HSAMetadata;
   if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
     return false;
@@ -125,6 +51,104 @@ bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
   return EmitHSAMetadata(HSAMetadata);
 }
 
+bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
+  std::shared_ptr<msgpack::Node> HSAMetadataRoot;
+  yaml::Input YIn(HSAMetadataString);
+  YIn >> HSAMetadataRoot;
+  if (YIn.error())
+    return false;
+  return EmitHSAMetadata(HSAMetadataRoot, false);
+}
+
+StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
+  AMDGPU::GPUKind AK;
+
+  switch (ElfMach) {
+  case ELF::EF_AMDGPU_MACH_R600_R600:     AK = GK_R600;    break;
+  case ELF::EF_AMDGPU_MACH_R600_R630:     AK = GK_R630;    break;
+  case ELF::EF_AMDGPU_MACH_R600_RS880:    AK = GK_RS880;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV670:    AK = GK_RV670;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV710:    AK = GK_RV710;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV730:    AK = GK_RV730;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV770:    AK = GK_RV770;   break;
+  case ELF::EF_AMDGPU_MACH_R600_CEDAR:    AK = GK_CEDAR;   break;
+  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:  AK = GK_CYPRESS; break;
+  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:  AK = GK_JUNIPER; break;
+  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:  AK = GK_REDWOOD; break;
+  case ELF::EF_AMDGPU_MACH_R600_SUMO:     AK = GK_SUMO;    break;
+  case ELF::EF_AMDGPU_MACH_R600_BARTS:    AK = GK_BARTS;   break;
+  case ELF::EF_AMDGPU_MACH_R600_CAICOS:   AK = GK_CAICOS;  break;
+  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:   AK = GK_CAYMAN;  break;
+  case ELF::EF_AMDGPU_MACH_R600_TURKS:    AK = GK_TURKS;   break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909;  break;
+  case ELF::EF_AMDGPU_MACH_NONE:          AK = GK_NONE;    break;
+  }
+
+  StringRef GPUName = getArchNameAMDGCN(AK);
+  if (GPUName != "")
+    return GPUName;
+  return getArchNameR600(AK);
+}
+
+unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
+  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+  if (AK == AMDGPU::GPUKind::GK_NONE)
+    AK = parseArchR600(GPU);
+
+  switch (AK) {
+  case GK_R600:    return ELF::EF_AMDGPU_MACH_R600_R600;
+  case GK_R630:    return ELF::EF_AMDGPU_MACH_R600_R630;
+  case GK_RS880:   return ELF::EF_AMDGPU_MACH_R600_RS880;
+  case GK_RV670:   return ELF::EF_AMDGPU_MACH_R600_RV670;
+  case GK_RV710:   return ELF::EF_AMDGPU_MACH_R600_RV710;
+  case GK_RV730:   return ELF::EF_AMDGPU_MACH_R600_RV730;
+  case GK_RV770:   return ELF::EF_AMDGPU_MACH_R600_RV770;
+  case GK_CEDAR:   return ELF::EF_AMDGPU_MACH_R600_CEDAR;
+  case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
+  case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
+  case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
+  case GK_SUMO:    return ELF::EF_AMDGPU_MACH_R600_SUMO;
+  case GK_BARTS:   return ELF::EF_AMDGPU_MACH_R600_BARTS;
+  case GK_CAICOS:  return ELF::EF_AMDGPU_MACH_R600_CAICOS;
+  case GK_CAYMAN:  return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
+  case GK_TURKS:   return ELF::EF_AMDGPU_MACH_R600_TURKS;
+  case GK_GFX600:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
+  case GK_GFX601:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
+  case GK_GFX700:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
+  case GK_GFX701:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
+  case GK_GFX702:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
+  case GK_GFX703:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
+  case GK_GFX704:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
+  case GK_GFX801:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
+  case GK_GFX802:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
+  case GK_GFX803:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
+  case GK_GFX810:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
+  case GK_GFX900:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
+  case GK_GFX902:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
+  case GK_GFX904:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
+  case GK_GFX906:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
+  case GK_GFX909:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+  case GK_NONE:    return ELF::EF_AMDGPU_MACH_NONE;
+  }
+
+  llvm_unreachable("unknown GPU");
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetAsmStreamer
 //===----------------------------------------------------------------------===//
@@ -183,9 +207,26 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
   if (HSAMD::toString(HSAMetadata, HSAMetadataString))
     return false;
 
-  OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
+  OS << '\t' << AssemblerDirectiveBegin << '\n';
   OS << HSAMetadataString << '\n';
-  OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
+  OS << '\t' << AssemblerDirectiveEnd << '\n';
+  return true;
+}
+
+bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
+    std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+  V3::MetadataVerifier Verifier(Strict);
+  if (!Verifier.verify(*HSAMetadataRoot))
+    return false;
+
+  std::string HSAMetadataString;
+  raw_string_ostream StrOS(HSAMetadataString);
+  yaml::Output YOut(StrOS);
+  YOut << HSAMetadataRoot;
+
+  OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
+  OS << StrOS.str() << '\n';
+  OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
   return true;
 }
 
@@ -203,70 +244,59 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     const MCSubtargetInfo &STI, StringRef KernelName,
     const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
     bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
-  amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
-
-  IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
+  IsaVersion IVersion = getIsaVersion(STI.getCPU());
 
   OS << "\t.amdhsa_kernel " << KernelName << '\n';
 
-#define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC,                   \
-                             DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME)     \
-  if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) !=                  \
-      AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME))            \
-    STREAM << "\t\t" << DIRECTIVE << " "                                       \
-           << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
-
-  if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
-    OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
-       << '\n';
-  if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
-    OS << "\t\t.amdhsa_private_segment_fixed_size "
-       << KD.private_segment_fixed_size << '\n';
-
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
-      kernel_code_properties,
-      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
-                       kernel_code_properties,
-                       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
-                       kernel_code_properties,
-                       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
-      kernel_code_properties,
-      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
-                       kernel_code_properties,
-                       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
-      kernel_code_properties,
-      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
-      kernel_code_properties,
-      amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
+#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME)   \
+  STREAM << "\t\t" << DIRECTIVE << " "                                         \
+         << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
+
+  OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
+     << '\n';
+  OS << "\t\t.amdhsa_private_segment_fixed_size "
+     << KD.private_segment_fixed_size << '\n';
+
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
+              kernel_code_properties,
+              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+  PRINT_FIELD(
+      OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
       compute_pgm_rsrc2,
       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
-                       compute_pgm_rsrc2,
-                       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
-                       compute_pgm_rsrc2,
-                       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
-                       compute_pgm_rsrc2,
-                       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
-                       compute_pgm_rsrc2,
-                       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
-                       compute_pgm_rsrc2,
-                       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
+  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
+  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
+  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
+  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
+  PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
 
   // These directives are required.
   OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
@@ -279,54 +309,52 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
     OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
 
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
-                       compute_pgm_rsrc1,
-                       amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
-                       compute_pgm_rsrc1,
-                       amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
-                       compute_pgm_rsrc1,
-                       amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
-                       compute_pgm_rsrc1,
-                       amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
-                       compute_pgm_rsrc1,
-                       amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
-  PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
-                       compute_pgm_rsrc1,
-                       amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
+  PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
+              compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
+  PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
+              compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
+  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
+              compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
+  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
+              compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
+  PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
+              compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
+  PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
+              compute_pgm_rsrc1,
+              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
   if (IVersion.Major >= 9)
-    PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
-                         compute_pgm_rsrc1,
-                         amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
+    PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
+                compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+  PRINT_FIELD(
+      OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
       compute_pgm_rsrc2,
       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
+  PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
+  PRINT_FIELD(
+      OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
       compute_pgm_rsrc2,
       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
-      compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
-      compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
-  PRINT_IF_NOT_DEFAULT(
-      OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
-      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
-#undef PRINT_IF_NOT_DEFAULT
+  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
+  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
+  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
+  PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
+#undef PRINT_FIELD
 
   OS << "\t.end_amdhsa_kernel\n";
 }
@@ -342,12 +370,16 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
   unsigned EFlags = MCA.getELFHeaderEFlags();
 
   EFlags &= ~ELF::EF_AMDGPU_MACH;
-  EFlags |= getMACH(STI.getCPU());
+  EFlags |= getElfMach(STI.getCPU());
 
   EFlags &= ~ELF::EF_AMDGPU_XNACK;
   if (AMDGPU::hasXNACK(STI))
     EFlags |= ELF::EF_AMDGPU_XNACK;
 
+  EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
+  if (AMDGPU::hasSRAMECC(STI))
+    EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
+
   MCA.setELFHeaderEFlags(EFlags);
 }
 
@@ -355,13 +387,13 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
   return static_cast<MCELFStreamer &>(Streamer);
 }
 
-void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
-    const MCExpr *DescSZ, unsigned NoteType,
+void AMDGPUTargetELFStreamer::EmitNote(
+    StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
     function_ref<void(MCELFStreamer &)> EmitDesc) {
   auto &S = getStreamer();
   auto &Context = S.getContext();
 
-  auto NameSZ = sizeof(ElfNote::NoteName);
+  auto NameSZ = Name.size() + 1;
 
   S.PushSection();
   S.SwitchSection(Context.getELFSection(
@@ -369,7 +401,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
   S.EmitIntValue(NameSZ, 4);                                  // namesz
   S.EmitValue(DescSZ, 4);                                     // descz
   S.EmitIntValue(NoteType, 4);                                // type
-  S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ));          // name
+  S.EmitBytes(Name);                                          // name
   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
   EmitDesc(S);                                                // desc
   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
@@ -381,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
     uint32_t Major, uint32_t Minor) {
 
-  EmitAMDGPUNote(
-    MCConstantExpr::create(8, getContext()),
-    ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
-    [&](MCELFStreamer &OS){
-      OS.EmitIntValue(Major, 4);
-      OS.EmitIntValue(Minor, 4);
-    }
-  );
+  EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
+           ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
+             OS.EmitIntValue(Major, 4);
+             OS.EmitIntValue(Minor, 4);
+           });
 }
 
 void
@@ -404,21 +433,18 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
     VendorNameSize + ArchNameSize;
 
-  EmitAMDGPUNote(
-    MCConstantExpr::create(DescSZ, getContext()),
-    ElfNote::NT_AMDGPU_HSA_ISA,
-    [&](MCELFStreamer &OS) {
-      OS.EmitIntValue(VendorNameSize, 2);
-      OS.EmitIntValue(ArchNameSize, 2);
-      OS.EmitIntValue(Major, 4);
-      OS.EmitIntValue(Minor, 4);
-      OS.EmitIntValue(Stepping, 4);
-      OS.EmitBytes(VendorName);
-      OS.EmitIntValue(0, 1); // NULL terminate VendorName
-      OS.EmitBytes(ArchName);
-      OS.EmitIntValue(0, 1); // NULL terminte ArchName
-    }
-  );
+  EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
+           ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
+             OS.EmitIntValue(VendorNameSize, 2);
+             OS.EmitIntValue(ArchNameSize, 2);
+             OS.EmitIntValue(Major, 4);
+             OS.EmitIntValue(Minor, 4);
+             OS.EmitIntValue(Stepping, 4);
+             OS.EmitBytes(VendorName);
+             OS.EmitIntValue(0, 1); // NULL terminate VendorName
+             OS.EmitBytes(ArchName);
+             OS.EmitIntValue(0, 1); // NULL terminte ArchName
+           });
 }
 
 void
@@ -447,15 +473,41 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
     MCSymbolRefExpr::create(DescEnd, Context),
     MCSymbolRefExpr::create(DescBegin, Context), Context);
 
-  EmitAMDGPUNote(
-    DescSZ,
-    ELF::NT_AMD_AMDGPU_ISA,
-    [&](MCELFStreamer &OS) {
-      OS.EmitLabel(DescBegin);
-      OS.EmitBytes(IsaVersionString);
-      OS.EmitLabel(DescEnd);
-    }
-  );
+  EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
+           [&](MCELFStreamer &OS) {
+             OS.EmitLabel(DescBegin);
+             OS.EmitBytes(IsaVersionString);
+             OS.EmitLabel(DescEnd);
+           });
+  return true;
+}
+
+bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
+    std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+  V3::MetadataVerifier Verifier(Strict);
+  if (!Verifier.verify(*HSAMetadataRoot))
+    return false;
+
+  std::string HSAMetadataString;
+  raw_string_ostream StrOS(HSAMetadataString);
+  msgpack::Writer MPWriter(StrOS);
+  HSAMetadataRoot->write(MPWriter);
+
+  // Create two labels to mark the beginning and end of the desc field
+  // and a MCExpr to calculate the size of the desc field.
+  auto &Context = getContext();
+  auto *DescBegin = Context.createTempSymbol();
+  auto *DescEnd = Context.createTempSymbol();
+  auto *DescSZ = MCBinaryExpr::createSub(
+      MCSymbolRefExpr::create(DescEnd, Context),
+      MCSymbolRefExpr::create(DescBegin, Context), Context);
+
+  EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
+           [&](MCELFStreamer &OS) {
+             OS.EmitLabel(DescBegin);
+             OS.EmitBytes(StrOS.str());
+             OS.EmitLabel(DescEnd);
+           });
   return true;
 }
 
@@ -474,28 +526,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
     MCSymbolRefExpr::create(DescEnd, Context),
     MCSymbolRefExpr::create(DescBegin, Context), Context);
 
-  EmitAMDGPUNote(
-    DescSZ,
-    ELF::NT_AMD_AMDGPU_HSA_METADATA,
-    [&](MCELFStreamer &OS) {
-      OS.EmitLabel(DescBegin);
-      OS.EmitBytes(HSAMetadataString);
-      OS.EmitLabel(DescEnd);
-    }
-  );
+  EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
+           [&](MCELFStreamer &OS) {
+             OS.EmitLabel(DescBegin);
+             OS.EmitBytes(HSAMetadataString);
+             OS.EmitLabel(DescEnd);
+           });
   return true;
 }
 
 bool AMDGPUTargetELFStreamer::EmitPALMetadata(
     const PALMD::Metadata &PALMetadata) {
-  EmitAMDGPUNote(
-    MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
-    ELF::NT_AMD_AMDGPU_PAL_METADATA,
-    [&](MCELFStreamer &OS){
-      for (auto I : PALMetadata)
-        OS.EmitIntValue(I, sizeof(uint32_t));
-    }
-  );
+  EmitNote(ElfNote::NoteNameV2,
+           MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
+                                  getContext()),
+           ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
+             for (auto I : PALMetadata)
+               OS.EmitIntValue(I, sizeof(uint32_t));
+           });
   return true;
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 472da1b73593..9a807c804f9f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -11,6 +11,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
 
 #include "AMDKernelCodeT.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/AMDGPUMetadata.h"
@@ -31,13 +32,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
 protected:
   MCContext &getContext() const { return Streamer.getContext(); }
 
-  /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
-  unsigned getMACH(StringRef GPU) const;
-
 public:
-  /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
-  static const char *getMachName(unsigned Mach);
-
   AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 
   virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
@@ -58,7 +53,20 @@ public:
   virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
 
   /// \returns True on success, false on failure.
-  virtual bool EmitHSAMetadata(StringRef HSAMetadataString);
+  virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
+
+  /// \returns True on success, false on failure.
+  virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
+
+  /// Emit HSA Metadata
+  ///
+  /// When \p Strict is true, known metadata elements must already be
+  /// well-typed. When \p Strict is false, known types are inferred and
+  /// the \p HSAMetadata structure is updated with the correct types.
+  ///
+  /// \returns True on success, false on failure.
+  virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+                               bool Strict) = 0;
 
   /// \returns True on success, false on failure.
   virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
@@ -71,6 +79,9 @@ public:
       const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
       uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
       bool ReserveXNACK) = 0;
+
+  static StringRef getArchNameFromElfMach(unsigned ElfMach);
+  static unsigned getElfMach(StringRef GPU);
 };
 
 class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
@@ -95,6 +106,10 @@ public:
   bool EmitISAVersion(StringRef IsaVersionString) override;
 
   /// \returns True on success, false on failure.
+  bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+                       bool Strict) override;
+
+  /// \returns True on success, false on failure.
   bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
 
   /// \returns True on success, false on failure.
@@ -110,8 +125,8 @@ public:
 class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
   MCStreamer &Streamer;
 
-  void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType,
-                      function_ref<void(MCELFStreamer &)> EmitDesc);
+  void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
+                function_ref<void(MCELFStreamer &)> EmitDesc);
 
 public:
   AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
@@ -135,6 +150,10 @@ public:
   bool EmitISAVersion(StringRef IsaVersionString) override;
 
   /// \returns True on success, false on failure.
+  bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+                       bool Strict) override;
+
+  /// \returns True on success, false on failure.
   bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
 
   /// \returns True on success, false on failure.
diff --git a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 44c2d366e461..1c68dbd78e75 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -29,6 +29,7 @@ class MIMGBaseOpcode {
   bit Atomic = 0;
   bit AtomicX2 = 0; // (f)cmpswap
   bit Sampler = 0;
+  bit Gather4 = 0;
   bits<8> NumExtraArgs = 0;
   bit Gradients = 0;
   bit Coordinates = 1;
@@ -43,7 +44,7 @@ def MIMGBaseOpcode : GenericEnum {
 def MIMGBaseOpcodesTable : GenericTable {
   let FilterClass = "MIMGBaseOpcode";
   let CppTypeName = "MIMGBaseOpcodeInfo";
-  let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
+  let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4",
                 "NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
                 "HasD16"];
   GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
@@ -141,7 +142,7 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
 
   let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
                                 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-                                R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                                R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
   let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
                       #!if(BaseOpcode.HasD16, "$d16", "");
@@ -179,6 +180,8 @@ multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
     defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 0>;
     let VDataDwords = 4 in
     defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 0>;
+    let VDataDwords = 8 in
+    defm _V8 : MIMG_NoSampler_Src_Helper <op, asm, VReg_256, 0>;
   }
 }
 
@@ -199,7 +202,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
 
   let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
                                 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-                                R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                                R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
   let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
                       #!if(BaseOpcode.HasD16, "$d16", "");
@@ -252,7 +255,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
 
   let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
                            DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
+                           R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
   let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
 }
 
@@ -316,7 +319,7 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
 
   let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
                                 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-                                R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                                R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
   let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
                       #!if(BaseOpcode.HasD16, "$d16", "");
@@ -411,6 +414,8 @@ multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
     defm _V3 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_96>;
     let VDataDwords = 4 in
     defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
+    let VDataDwords = 8 in
+    defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
   }
 }
 
@@ -421,6 +426,7 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
                         string asm = "image_gather4"#sample.LowerCaseMod> {
   def "" : MIMG_Sampler_BaseOpcode<sample> {
     let HasD16 = 1;
+    let Gather4 = 1;
   }
 
   let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
@@ -429,6 +435,8 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
     defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
     let VDataDwords = 4 in
     defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
+    let VDataDwords = 8 in
+    defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
   }
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 1683fe6c9a57..679cf18d2c20 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -226,11 +226,11 @@ private:
         // occur in the same basic block as its definition, because
         // it is illegal for the scheduler to schedule them in
         // different blocks.
-        if (UseI->readsRegister(MOI->getReg()))
+        if (UseI->readsRegister(MOI->getReg(), &TRI))
           LastUseCount = AluInstCount;
 
         // Exit early if the current use kills the register
-        if (UseI != Def && UseI->killsRegister(MOI->getReg()))
+        if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
           break;
       }
       if (LastUseCount)
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index e00dffc4be99..e2a0f05d2b34 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -589,7 +589,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     }
 
     case Intrinsic::r600_implicitarg_ptr: {
-      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
+      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
       uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
       return DAG.getConstant(ByteOffset, DL, PtrVT);
     }
@@ -741,12 +741,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                                SDValue Op,
                                                SelectionDAG &DAG) const {
   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-  if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
+  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
 
   const DataLayout &DL = DAG.getDataLayout();
   const GlobalValue *GV = GSD->getGlobal();
-  MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
 
   SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
   return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
                                                    unsigned DwordOffset) const {
   unsigned ByteOffset = DwordOffset * 4;
   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                      AMDGPUASI.PARAM_I_ADDRESS);
+                                      AMDGPUAS::PARAM_I_ADDRESS);
 
   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
   assert(isInt<16>(ByteOffset));
@@ -1141,7 +1141,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
   //TODO: Who creates the i8 stores?
   assert(Store->isTruncatingStore()
          || Store->getValue().getValueType() == MVT::i8);
-  assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
+  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
 
   SDValue Mask;
   if (Store->getMemoryVT() == MVT::i8) {
@@ -1175,7 +1175,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
   // Load dword
   // TODO: can we be smarter about machine pointer info?
   MachinePointerInfo PtrInfo(UndefValue::get(
-      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
   SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
 
   Chain = Dst.getValue(1);
@@ -1241,9 +1241,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
 
   // Neither LOCAL nor PRIVATE can do vectors at the moment
-  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
-    if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
          StoreNode->isTruncatingStore()) {
       // Add an extra level of chain to isolate this vector
       SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
@@ -1267,7 +1267,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
                                   DAG.getConstant(2, DL, PtrVT));
 
-  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
+  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
     // It is beneficial to create MSKOR here instead of combiner to avoid
     // artificial dependencies introduced by RMW
     if (StoreNode->isTruncatingStore()) {
@@ -1320,7 +1320,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
-  if (AS != AMDGPUASI.PRIVATE_ADDRESS)
+  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
     return SDValue();
 
   if (MemVT.bitsLT(MVT::i32))
@@ -1403,7 +1403,7 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
   // Load dword
   // TODO: can we be smarter about machine pointer info?
   MachinePointerInfo PtrInfo(UndefValue::get(
-      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
+      Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS)));
   SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
 
   // Get offset within the register.
@@ -1441,7 +1441,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   EVT MemVT = LoadNode->getMemoryVT();
   ISD::LoadExtType ExtType = LoadNode->getExtensionType();
 
-  if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
+  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
       ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
     return lowerPrivateExtLoad(Op, DAG);
   }
@@ -1451,8 +1451,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = LoadNode->getChain();
   SDValue Ptr = LoadNode->getBasePtr();
 
-  if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
-      LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
+  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+      LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
       return scalarizeVectorLoad(LoadNode, DAG);
   }
@@ -1473,7 +1473,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
                       DAG.getConstant(4, DL, MVT::i32)),
                       DAG.getConstant(LoadNode->getAddressSpace() -
-                                      AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
+                                      AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
           );
     }
 
@@ -1509,7 +1509,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getMergeValues(MergedValues, DL);
   }
 
-  if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
+  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
     return SDValue();
   }
 
@@ -1606,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
     }
 
     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                          AMDGPUASI.PARAM_I_ADDRESS);
+                                          AMDGPUAS::PARAM_I_ADDRESS);
 
     // i64 isn't a legal type, so the register type used ends up as i32, which
     // isn't expected here. It attempts to create this sextload, but it ends up
@@ -1656,7 +1656,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
                                           const SelectionDAG &DAG) const {
   // Local and Private addresses do not handle vectors. Limit to i32
-  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
+  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
     return (MemVT.getSizeInBits() <= 32);
   }
   return true;
@@ -1685,14 +1685,15 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 static SDValue CompactSwizzlableVector(
   SelectionDAG &DAG, SDValue VectorEntry,
   DenseMap<unsigned, unsigned> &RemapSwizzle) {
-  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
   assert(RemapSwizzle.empty());
-  SDValue NewBldVec[4] = {
-    VectorEntry.getOperand(0),
-    VectorEntry.getOperand(1),
-    VectorEntry.getOperand(2),
-    VectorEntry.getOperand(3)
-  };
+
+  SDLoc DL(VectorEntry);
+  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+  SDValue NewBldVec[4];
+  for (unsigned i = 0; i < 4; i++)
+    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+                               DAG.getIntPtrConstant(i, DL));
 
   for (unsigned i = 0; i < 4; i++) {
     if (NewBldVec[i].isUndef())
@@ -1727,15 +1728,17 @@ static SDValue CompactSwizzlableVector(
 
 static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
                                 DenseMap<unsigned, unsigned> &RemapSwizzle) {
-  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
   assert(RemapSwizzle.empty());
-  SDValue NewBldVec[4] = {
-      VectorEntry.getOperand(0),
-      VectorEntry.getOperand(1),
-      VectorEntry.getOperand(2),
-      VectorEntry.getOperand(3)
-  };
-  bool isUnmovable[4] = { false, false, false, false };
+
+  SDLoc DL(VectorEntry);
+  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
+
+  SDValue NewBldVec[4];
+  bool isUnmovable[4] = {false, false, false, false};
+  for (unsigned i = 0; i < 4; i++)
+    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
+                               DAG.getIntPtrConstant(i, DL));
+
   for (unsigned i = 0; i < 4; i++) {
     RemapSwizzle[i] = i;
     if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
@@ -1766,7 +1769,6 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
                                             SelectionDAG &DAG,
                                             const SDLoc &DL) const {
-  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
   // Old -> New swizzle values
   DenseMap<unsigned, unsigned> SwizzleRemap;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 5397e779474c..9cc3e5f3c314 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -229,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
 }
 
 bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
-  return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
+  return MI.findRegisterUseOperandIdx(R600::AR_X, false, &RI) != -1;
 }
 
 bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
-  return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
+  return MI.findRegisterDefOperandIdx(R600::AR_X, false, false, &RI) != -1;
 }
 
 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
@@ -1500,19 +1500,19 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
 }
 
 unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
-    PseudoSourceValue::PSVKind Kind) const {
+    unsigned Kind) const {
   switch (Kind) {
   case PseudoSourceValue::Stack:
   case PseudoSourceValue::FixedStack:
-    return ST.getAMDGPUAS().PRIVATE_ADDRESS;
+    return AMDGPUAS::PRIVATE_ADDRESS;
   case PseudoSourceValue::ConstantPool:
   case PseudoSourceValue::GOT:
   case PseudoSourceValue::JumpTable:
   case PseudoSourceValue::GlobalValueCallEntry:
   case PseudoSourceValue::ExternalSymbolCallEntry:
   case PseudoSourceValue::TargetCustom:
-    return ST.getAMDGPUAS().CONSTANT_ADDRESS;
+    return AMDGPUAS::CONSTANT_ADDRESS;
   }
+
   llvm_unreachable("Invalid pseudo source kind");
-  return ST.getAMDGPUAS().PRIVATE_ADDRESS;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 7a3dece31665..e6e34dc125f4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -324,7 +324,7 @@ public:
   }
 
   unsigned getAddressSpaceForPseudoSourceKind(
-      PseudoSourceValue::PSVKind Kind) const override;
+      unsigned Kind) const override;
 };
 
 namespace R600 {
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index 7bf174f4cd86..10e873755222 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -299,7 +299,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
 class LoadParamFrag <PatFrag load_type> : PatFrag <
   (ops node:$ptr), (load_type node:$ptr),
   [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
-            (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
+            (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
 >;
 
 def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@@ -309,8 +309,8 @@ def vtx_id3_load : LoadParamFrag<load>;
 class LoadVtxId1 <PatFrag load> : PatFrag <
   (ops node:$ptr), (load node:$ptr), [{
   const MemSDNode *LD = cast<MemSDNode>(N);
-  return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
-         (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+  return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+         (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
            !isa<GlobalValue>(GetUnderlyingObject(
            LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
 }]>;
@@ -322,7 +322,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
 class LoadVtxId2 <PatFrag load> : PatFrag <
   (ops node:$ptr), (load node:$ptr), [{
   const MemSDNode *LD = cast<MemSDNode>(N);
-  return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+  return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
          isa<GlobalValue>(GetUnderlyingObject(
          LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
 }]>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index a1429a2ac50f..7769a35aadce 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -127,13 +127,13 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
 
   LLVM_DEBUG(if (SU) {
     dbgs() << " ** Pick node **\n";
-    SU->dump(DAG);
+    DAG->dumpNode(*SU);
   } else {
     dbgs() << "NO NODE \n";
     for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
       const SUnit &S = DAG->SUnits[i];
       if (!S.isScheduled)
-        S.dump(DAG);
+        DAG->dumpNode(S);
     }
   });
 
@@ -188,11 +188,11 @@ isPhysicalRegCopy(MachineInstr *MI) {
 }
 
 void R600SchedStrategy::releaseTopNode(SUnit *SU) {
-  LLVM_DEBUG(dbgs() << "Top Releasing "; SU->dump(DAG););
+  LLVM_DEBUG(dbgs() << "Top Releasing "; DAG->dumpNode(*SU));
 }
 
 void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
-  LLVM_DEBUG(dbgs() << "Bottom Releasing "; SU->dump(DAG););
+  LLVM_DEBUG(dbgs() << "Bottom Releasing "; DAG->dumpNode(*SU));
   if (isPhysicalRegCopy(SU->getInstr())) {
     PhysicalRegCopy.push_back(SU);
     return;
@@ -236,6 +236,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
       // MI will become a KILL, don't considers it in scheduling
       return AluDiscarded;
     }
+    break;
   default:
     break;
   }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
new file mode 100644
index 000000000000..69cafef4a351
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -0,0 +1,181 @@
+//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Any MIMG instructions that use tfe or lwe require an initialization of the
+/// result register that will be written in the case of a memory access failure
+/// The required code is also added to tie this init code to the result of the
+/// img instruction
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-img-init"
+
+using namespace llvm;
+
+namespace {
+
+class SIAddIMGInit : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIAddIMGInit() : MachineFunctionPass(ID) {
+    initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
+
+char SIAddIMGInit::ID = 0;
+
+char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
+
+FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
+
+bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *RI = ST.getRegisterInfo();
+  bool Changed = false;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+       ++BI) {
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+      MachineInstr &MI = *I;
+
+      auto Opcode = MI.getOpcode();
+      if (TII->isMIMG(Opcode) && !MI.mayStore()) {
+        MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
+        MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
+        MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
+
+        // Check for instructions that don't have tfe or lwe fields
+        // There shouldn't be any at this point.
+        assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
+
+        unsigned TFEVal = TFE->getImm();
+        unsigned LWEVal = LWE->getImm();
+        unsigned D16Val = D16 ? D16->getImm() : 0;
+
+        if (TFEVal || LWEVal) {
+          // At least one of TFE or LWE are non-zero
+          // We have to insert a suitable initialization of the result value and
+          // tie this to the dest of the image instruction.
+
+          const DebugLoc &DL = MI.getDebugLoc();
+
+          int DstIdx =
+              AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+
+          // Calculate which dword we have to initialize to 0.
+          MachineOperand *MO_Dmask =
+              TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
+
+          // check that dmask operand is found.
+          assert(MO_Dmask && "Expected dmask operand in instruction");
+
+          unsigned dmask = MO_Dmask->getImm();
+          // Determine the number of active lanes taking into account the
+          // Gather4 special case
+          unsigned ActiveLanes =
+              TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
+
+          // Subreg indices are counted from 1
+          // When D16 then we want next whole VGPR after write data.
+          static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
+
+          bool Packed = !ST.hasUnpackedD16VMem();
+
+          unsigned InitIdx =
+              D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
+
+          // Abandon attempt if the dst size isn't large enough
+          // - this is in fact an error but this is picked up elsewhere and
+          // reported correctly.
+          uint32_t DstSize =
+              RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+          if (DstSize < InitIdx)
+            continue;
+
+          // Create a register for the intialization value.
+          unsigned PrevDst =
+              MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+          unsigned NewDst = 0; // Final initialized value will be in here
+
+          // If PRTStrictNull feature is enabled (the default) then initialize
+          // all the result registers to 0, otherwise just the error indication
+          // register (VGPRn+1)
+          unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
+          unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
+
+          if (DstSize == 1) {
+            // In this case we can just initialize the result directly
+            BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
+                .addImm(0);
+            NewDst = PrevDst;
+          } else {
+            BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
+            for (; SizeLeft; SizeLeft--, CurrIdx++) {
+              NewDst =
+                  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+              // Initialize dword
+              unsigned SubReg =
+                  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+              BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
+                  .addImm(0);
+              // Insert into the super-reg
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
+                  .addReg(PrevDst)
+                  .addReg(SubReg)
+                  .addImm(CurrIdx);
+
+              PrevDst = NewDst;
+            }
+          }
+
+          // Add as an implicit operand
+          MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
+
+          // Tie the just added implicit operand to the dst
+          MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
+
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 74f1bd8fb986..98e9ea662324 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -16,7 +16,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
@@ -52,7 +52,7 @@ using StackEntry = std::pair<BasicBlock *, Value *>;
 using StackVector = SmallVector<StackEntry, 16>;
 
 class SIAnnotateControlFlow : public FunctionPass {
-  DivergenceAnalysis *DA;
+  LegacyDivergenceAnalysis *DA;
 
   Type *Boolean;
   Type *Void;
@@ -66,9 +66,7 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   Function *If;
   Function *Else;
-  Function *Break;
   Function *IfBreak;
-  Function *ElseBreak;
   Function *Loop;
   Function *EndCf;
 
@@ -95,8 +93,7 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   Value *
   handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
-                      BranchInst *Term,
-                      SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions);
+                      BranchInst *Term);
 
   void handleLoop(BranchInst *Term);
 
@@ -116,7 +113,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<DivergenceAnalysis>();
+    AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
@@ -127,7 +124,7 @@ public:
 INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
                       "Annotate SI Control Flow", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
                     "Annotate SI Control Flow", false, false)
 
@@ -149,9 +146,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
 
   If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
   Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
-  Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break);
   IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
-  ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break);
   Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
   EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
   return false;
@@ -160,7 +155,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
 /// Is the branch condition uniform or did the StructurizeCFG pass
 /// consider it as such?
 bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
-  return DA->isUniform(T->getCondition()) ||
+  return DA->isUniform(T) ||
          T->getMetadata("structurizecfg.uniform") != nullptr;
 }
 
@@ -227,76 +222,7 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
 
 /// Recursively handle the condition leading to a loop
 Value *SIAnnotateControlFlow::handleLoopCondition(
-    Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term,
-    SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions) {
-  // Only search through PHI nodes which are inside the loop.  If we try this
-  // with PHI nodes that are outside of the loop, we end up inserting new PHI
-  // nodes outside of the loop which depend on values defined inside the loop.
-  // This will break the module with
-  // 'Instruction does not dominate all users!' errors.
-  PHINode *Phi = nullptr;
-  if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
-    BasicBlock *Parent = Phi->getParent();
-    PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front());
-    Value *Ret = NewPhi;
-
-    // Handle all non-constant incoming values first
-    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-      Value *Incoming = Phi->getIncomingValue(i);
-      BasicBlock *From = Phi->getIncomingBlock(i);
-      if (isa<ConstantInt>(Incoming)) {
-        NewPhi->addIncoming(Broken, From);
-        continue;
-      }
-
-      Phi->setIncomingValue(i, BoolFalse);
-      Value *PhiArg = handleLoopCondition(Incoming, Broken, L,
-                                          Term, LoopPhiConditions);
-      NewPhi->addIncoming(PhiArg, From);
-    }
-
-    BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
-
-    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-      Value *Incoming = Phi->getIncomingValue(i);
-      if (Incoming != BoolTrue)
-        continue;
-
-      BasicBlock *From = Phi->getIncomingBlock(i);
-      if (From == IDom) {
-        // We're in the following situation:
-        //   IDom/From
-        //      |   \
-        //      |   If-block
-        //      |   /
-        //     Parent
-        // where we want to break out of the loop if the If-block is not taken.
-        // Due to the depth-first traversal, there should be an end.cf
-        // intrinsic in Parent, and we insert an else.break before it.
-        //
-        // Note that the end.cf need not be the first non-phi instruction
-        // of parent, particularly when we're dealing with a multi-level
-        // break, but it should occur within a group of intrinsic calls
-        // at the beginning of the block.
-        CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
-        while (OldEnd && OldEnd->getCalledFunction() != EndCf)
-          OldEnd = dyn_cast<CallInst>(OldEnd->getNextNode());
-        if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
-          Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
-          Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
-          continue;
-        }
-      }
-
-      TerminatorInst *Insert = From->getTerminator();
-      Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
-      NewPhi->setIncomingValue(i, PhiArg);
-    }
-
-    LoopPhiConditions.push_back(WeakTrackingVH(Phi));
-    return Ret;
-  }
-
+    Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
   if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
     BasicBlock *Parent = Inst->getParent();
     Instruction *Insert;
@@ -335,21 +261,15 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
   BasicBlock *Target = Term->getSuccessor(1);
   PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
 
-  SmallVector<WeakTrackingVH, 8> LoopPhiConditions;
   Value *Cond = Term->getCondition();
   Term->setCondition(BoolTrue);
-  Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions);
+  Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
 
   for (BasicBlock *Pred : predecessors(Target))
     Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
 
   Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
 
-  for (WeakTrackingVH Val : llvm::reverse(LoopPhiConditions)) {
-    if (PHINode *Cond = cast_or_null<PHINode>(Val))
-      eraseIfUnused(Cond);
-  }
-
   push(Term->getSuccessor(0), Arg);
 }
 
@@ -372,7 +292,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
         Preds.push_back(Pred);
     }
 
-    BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
+    BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, nullptr,
+                                false);
   }
 
   Value *Exec = popSaved();
@@ -386,7 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
 bool SIAnnotateControlFlow::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  DA = &getAnalysis<DivergenceAnalysis>();
+  DA = &getAnalysis<LegacyDivergenceAnalysis>();
 
   for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
        E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index a6d28d6999e5..7f6abc34cff3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -88,7 +88,10 @@ enum : uint64_t {
   IsPacked = UINT64_C(1) << 49,
 
   // Is a D16 buffer instruction.
-  D16Buf = UINT64_C(1) << 50
+  D16Buf = UINT64_C(1) << 50,
+
+  // Uses floating point double precision rounding mode
+  FPDPRounding = UINT64_C(1) << 51
 };
 
 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 566e0d3febc7..809f5bab4693 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -183,13 +183,15 @@ getCopyRegClasses(const MachineInstr &Copy,
 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
                              const TargetRegisterClass *DstRC,
                              const SIRegisterInfo &TRI) {
-  return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
+  return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
+         TRI.hasVGPRs(SrcRC);
 }
 
 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
                              const TargetRegisterClass *DstRC,
                              const SIRegisterInfo &TRI) {
-  return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
+  return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
+         TRI.hasVGPRs(DstRC);
 }
 
 static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
@@ -327,9 +329,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
     switch (DefInstr->getOpcode()) {
     default:
       break;
-    case AMDGPU::SI_BREAK:
     case AMDGPU::SI_IF_BREAK:
-    case AMDGPU::SI_ELSE_BREAK:
       return true;
     case AMDGPU::PHI:
       if (phiHasBreakDef(*DefInstr, MRI, Visited))
@@ -599,7 +599,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
           unsigned SrcReg = MI.getOperand(1).getReg();
           if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
-            TII->moveToVALU(MI);
+            TII->moveToVALU(MI, MDT);
             break;
           }
 
@@ -614,7 +614,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
             MI.setDesc(TII->get(SMovOp));
             break;
           }
-          TII->moveToVALU(MI);
+          TII->moveToVALU(MI, MDT);
         } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
           tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
         }
@@ -677,7 +677,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         SmallSet<unsigned, 8> Visited;
         if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
           LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-          TII->moveToVALU(MI);
+          TII->moveToVALU(MI, MDT);
         }
         break;
       }
@@ -690,7 +690,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 
         LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
 
-        TII->moveToVALU(MI);
+        TII->moveToVALU(MI, MDT);
         break;
       case AMDGPU::INSERT_SUBREG: {
         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
@@ -700,7 +700,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         if (TRI->isSGPRClass(DstRC) &&
             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
           LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
-          TII->moveToVALU(MI);
+          TII->moveToVALU(MI, MDT);
         }
         break;
       }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
index 5d613d8874fa..7761418c5336 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
@@ -10,7 +10,7 @@
 /// \file
 /// Computations in WWM can overwrite values in inactive channels for
 /// variables that the register allocator thinks are dead. This pass adds fake
-/// uses of those variables to WWM instructions to make sure that they aren't
+/// uses of those variables to their def(s) to make sure that they aren't
 /// overwritten.
 ///
 /// As an example, consider this snippet:
@@ -29,25 +29,44 @@
 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
 /// it would clobber even the inactive channels for which the if-condition is
 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
-/// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
+/// of %vgpr0 to its def to make sure they aren't allocated to the
 /// same register.
 ///
 /// In general, we need to figure out what registers might have their inactive
 /// channels which are eventually used accidentally clobbered by a WWM
-/// instruction. We approximate this using two conditions:
+/// instruction. We do that by spotting three separate cases of registers:
 ///
-/// 1. A definition of the variable reaches the WWM instruction.
-/// 2. The variable would be live at the WWM instruction if all its defs were
-/// partial defs (i.e. considered as a use), ignoring normal uses.
+/// 1. A "then phi": the value resulting from phi elimination of a phi node at
+///    the end of an if..endif. If there is WWM code in the "then", then we
+///    make the def at the end of the "then" branch a partial def by adding an
+///    implicit use of the register.
 ///
-/// If a register matches both conditions, then we add an implicit use of it to
-/// the WWM instruction. Condition #2 is the heart of the matter: every
-/// definition is really a partial definition, since every VALU instruction is
-/// implicitly predicated.  We can usually ignore this, but WWM forces us not
-/// to. Condition #1 prevents false positives if the variable is undefined at
-/// the WWM instruction anyways. This is overly conservative in certain cases,
-/// especially in uniform control flow, but this is a workaround anyways until
-/// LLVM gains the notion of predicated uses and definitions of variables.
+/// 2. A "loop exit register": a value written inside a loop but used outside the
+///    loop, where there is WWM code inside the loop (the case in the example
+///    above). We add an implicit_def of the register in the loop pre-header,
+///    and make the original def a partial def by adding an implicit use of the
+///    register.
+///
+/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
+///    in a loop header. If there is WWM code inside the loop, then we make all
+///    defs inside the loop partial defs by adding an implicit use of the
+///    register on each one.
+///
+/// Note that we do not need to consider an if..else..endif phi. We only need to
+/// consider non-uniform control flow, and control flow structurization would
+/// have transformed a non-uniform if..else..endif into two if..endifs.
+///
+/// The analysis to detect these cases relies on a property of the MIR
+/// arising from this pass running straight after PHIElimination and before any
+/// coalescing: that any virtual register with more than one definition must be
+/// the new register added to lower a phi node by PHIElimination.
+///
+/// FIXME: We should detect whether a register in one of the above categories is
+/// already live at the WWM code before deciding to add the implicit uses to
+/// synthesize its liveness.
+///
+/// FIXME: I believe this whole scheme may be flawed due to the possibility of
+/// the register allocator doing live interval splitting.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -59,7 +78,9 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 
@@ -71,10 +92,18 @@ namespace {
 
 class SIFixWWMLiveness : public MachineFunctionPass {
 private:
+  MachineDominatorTree *DomTree;
+  MachineLoopInfo *LoopInfo;
   LiveIntervals *LIS = nullptr;
+  const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
   MachineRegisterInfo *MRI;
 
+  std::vector<MachineInstr *> WWMs;
+  std::vector<MachineOperand *> ThenDefs;
+  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
+  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
+
 public:
   static char ID;
 
@@ -84,13 +113,11 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  bool runOnWWMInstruction(MachineInstr &MI);
-
-  void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
-
   StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequiredID(MachineDominatorsID);
+    AU.addRequiredID(MachineLoopInfoID);
     // Should preserve the same set that TwoAddressInstructions does.
     AU.addPreserved<SlotIndexes>();
     AU.addPreserved<LiveIntervals>();
@@ -100,11 +127,21 @@ public:
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
+
+private:
+  void processDef(MachineOperand &DefOpnd);
+  bool processThenDef(MachineOperand *DefOpnd);
+  bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
+  bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
 };
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
+                "SI fix WWM liveness", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
                 "SI fix WWM liveness", false, false)
 
 char SIFixWWMLiveness::ID = 0;
@@ -115,89 +152,267 @@ FunctionPass *llvm::createSIFixWWMLivenessPass() {
   return new SIFixWWMLiveness();
 }
 
-void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
-{
-  for (const MachineOperand &Op : MI.defs()) {
-    if (Op.isReg()) {
-      unsigned Reg = Op.getReg();
-      if (TRI->isVGPR(*MRI, Reg))
-        Regs.set(Reg);
-    }
-  }
-}
+bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
+  bool Modified = false;
+
+  // This doesn't actually need LiveIntervals, but we can preserve them.
+  LIS = getAnalysisIfAvailable<LiveIntervals>();
 
-bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
-  MachineBasicBlock *MBB = WWM.getParent();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
-  // Compute the registers that are live out of MI by figuring out which defs
-  // are reachable from MI.
-  SparseBitVector<> LiveOut;
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+  MRI = &MF.getRegInfo();
 
-  for (auto II = MachineBasicBlock::iterator(WWM), IE =
-       MBB->end(); II != IE; ++II) {
-    addDefs(*II, LiveOut);
-  }
+  DomTree = &getAnalysis<MachineDominatorTree>();
+  LoopInfo = &getAnalysis<MachineLoopInfo>();
 
-  for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
-                                        E = df_end(MBB);
-       I != E; ++I) {
-    for (const MachineInstr &MI : **I) {
-      addDefs(MI, LiveOut);
+  // Scan the function to find the WWM sections and the candidate registers for
+  // having liveness modified.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      if (MI.getOpcode() == AMDGPU::EXIT_WWM)
+        WWMs.push_back(&MI);
+      else {
+        for (MachineOperand &DefOpnd : MI.defs()) {
+          if (DefOpnd.isReg()) {
+            unsigned Reg = DefOpnd.getReg();
+            if (TRI->isVGPR(*MRI, Reg))
+              processDef(DefOpnd);
+          }
+        }
+      }
     }
   }
+  if (!WWMs.empty()) {
+    // Synthesize liveness over WWM sections as required.
+    for (auto ThenDef : ThenDefs)
+      Modified |= processThenDef(ThenDef);
+    for (auto LoopExitDef : LoopExitDefs)
+      Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
+    for (auto LoopPhiDef : LoopPhiDefs)
+      Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
+  }
 
-  // Compute the registers that reach MI.
-  SparseBitVector<> Reachable;
+  WWMs.clear();
+  ThenDefs.clear();
+  LoopExitDefs.clear();
+  LoopPhiDefs.clear();
 
-  for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
-       MBB->rend(); II != IE; ++II) {
-    addDefs(*II, Reachable);
-  }
+  return Modified;
+}
 
-  for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
-                                         E = idf_end(MBB);
-       I != E; ++I) {
-    for (const MachineInstr &MI : **I) {
-      addDefs(MI, Reachable);
+// During the function scan, process an operand that defines a VGPR.
+// This categorizes the register and puts it in the appropriate list for later
+// use when processing a WWM section.
+void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
+  unsigned Reg = DefOpnd.getReg();
+  // Get all the defining instructions. For convenience, make Defs[0] the def
+  // we are on now.
+  SmallVector<const MachineInstr *, 4> Defs;
+  Defs.push_back(DefOpnd.getParent());
+  for (auto &MI : MRI->def_instructions(Reg)) {
+    if (&MI != DefOpnd.getParent())
+      Defs.push_back(&MI);
+  }
+  // Check whether this def dominates all the others. If not, ignore this def.
+  // Either it is going to be processed when the scan encounters its other def
+  // that dominates all defs, or there is no def that dominates all others.
+  // The latter case is an eliminated phi from an if..else..endif or similar,
+  // which must be for uniform control flow so can be ignored.
+  // Because this pass runs shortly after PHIElimination, we assume that any
+  // multi-def register is a lowered phi, and thus has each def in a separate
+  // basic block.
+  for (unsigned I = 1; I != Defs.size(); ++I) {
+    if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
+      return;
+  }
+  // Check for the case of an if..endif lowered phi: It has two defs, one
+  // dominates the other, and there is a single use in a successor of the
+  // dominant def.
+  // Later we will spot any WWM code inside
+  // the "then" clause and turn the second def into a partial def so its
+  // liveness goes through the WWM code in the "then" clause.
+  if (Defs.size() == 2) {
+    auto DomDefBlock = Defs[0]->getParent();
+    if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
+      auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
+      for (auto Succ : DomDefBlock->successors()) {
+        if (Succ == UseBlock) {
+          LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
+          ThenDefs.push_back(&DefOpnd);
+          return;
+        }
+      }
     }
   }
-
-  // find the intersection, and add implicit uses.
-  LiveOut &= Reachable;
-
-  bool Modified = false;
-  for (unsigned Reg : LiveOut) {
-    WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
-    if (LIS) {
-      // FIXME: is there a better way to update the live interval?
-      LIS->removeInterval(Reg);
-      LIS->createAndComputeVirtRegInterval(Reg);
+  // Check for the case of a non-lowered-phi register (single def) that exits
+  // a loop, that is, it has a use that is outside a loop that the def is
+  // inside. We find the outermost loop that the def is inside but a use is
+  // outside. Later we will spot any WWM code inside that loop and then make
+  // the def a partial def so its liveness goes round the loop and through the
+  // WWM code.
+  if (Defs.size() == 1) {
+    auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
+    if (!Loop)
+      return;
+    bool IsLoopExit = false;
+    for (auto &Use : MRI->use_instructions(Reg)) {
+      auto UseBlock = Use.getParent();
+      if (Loop->contains(UseBlock))
+        continue;
+      IsLoopExit = true;
+      while (auto Parent = Loop->getParentLoop()) {
+        if (Parent->contains(UseBlock))
+          break;
+        Loop = Parent;
+      }
     }
-    Modified = true;
+    if (!IsLoopExit)
+      return;
+    LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+        << " is a loop exit reg with loop header at "
+        << "bb." << Loop->getHeader()->getNumber() << "\n");
+    LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
+            &DefOpnd, Loop));
+    return;
   }
-
-  return Modified;
+  // Check for the case of a lowered single-preheader-loop phi, that is, a
+  // multi-def register where the dominating def is in the loop pre-header and
+  // all other defs are in backedges. Later we will spot any WWM code inside
+  // that loop and then make the backedge defs partial defs so the liveness
+  // goes through the WWM code.
+  // Note that we are ignoring multi-preheader loops on the basis that the
+  // structurizer does not allow that for non-uniform loops.
+  // There must be a single use in the loop header.
+  if (!MRI->hasOneUse(Reg))
+    return;
+  auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
+  auto Loop = LoopInfo->getLoopFor(UseBlock);
+  if (!Loop || Loop->getHeader() != UseBlock
+      || Loop->contains(Defs[0]->getParent())) {
+    LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+        << " is multi-def but single use not in loop header\n");
+    return;
+  }
+  for (unsigned I = 1; I != Defs.size(); ++I) {
+    if (!Loop->contains(Defs[I]->getParent()))
+      return;
+  }
+  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
+      << " is a loop phi reg with loop header at "
+      << "bb." << Loop->getHeader()->getNumber() << "\n");
+  LoopPhiDefs.push_back(
+      std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
 }
 
-bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
-  bool Modified = false;
-
-  // This doesn't actually need LiveIntervals, but we can preserve them.
-  LIS = getAnalysisIfAvailable<LiveIntervals>();
-
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-
-  TRI = &TII->getRegisterInfo();
-  MRI = &MF.getRegInfo();
+// Process a then phi def: It has two defs, one dominates the other, and there
+// is a single use in a successor of the dominant def. Here we spot any WWM
+// code inside the "then" clause and turn the second def into a partial def so
+// its liveness goes through the WWM code in the "then" clause.
+bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
+  LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
+  if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+    // Ignore if dominating def is undef.
+    LLVM_DEBUG(dbgs() << "  ignoring as dominating def is undef\n");
+    return false;
+  }
+  unsigned Reg = DefOpnd->getReg();
+  // Get the use block, which is the endif block.
+  auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
+  // Check whether there is WWM code inside the then branch. The WWM code must
+  // be dominated by the if but not dominated by the endif.
+  bool ContainsWWM = false;
+  for (auto WWM : WWMs) {
+    if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
+        && !DomTree->dominates(UseBlock, WWM->getParent())) {
+      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
+      ContainsWWM = true;
+      break;
+    }
+  }
+  if (!ContainsWWM)
+    return false;
+  // Get the other def.
+  MachineInstr *OtherDef = nullptr;
+  for (auto &MI : MRI->def_instructions(Reg)) {
+    if (&MI != DefOpnd->getParent())
+      OtherDef = &MI;
+  }
+  // Make it a partial def.
+  OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+  LLVM_DEBUG(dbgs() << *OtherDef);
+  return true;
+}
 
-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
-        Modified |= runOnWWMInstruction(MI);
-      }
+// Process a loop exit def, that is, a register with a single use in a loop
+// that has a use outside the loop.  Here we spot any WWM code inside that loop
+// and then make the def a partial def so its liveness goes round the loop and
+// through the WWM code.
+bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
+      MachineLoop *Loop) {
+  LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
+  // Check whether there is WWM code inside the loop.
+  bool ContainsWWM = false;
+  for (auto WWM : WWMs) {
+    if (Loop->contains(WWM->getParent())) {
+      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
+      ContainsWWM = true;
+      break;
     }
   }
+  if (!ContainsWWM)
+    return false;
+  unsigned Reg = DefOpnd->getReg();
+  // Add a new implicit_def in loop preheader(s).
+  for (auto Pred : Loop->getHeader()->predecessors()) {
+    if (!Loop->contains(Pred)) {
+      auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
+          TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
+      LLVM_DEBUG(dbgs() << *ImplicitDef);
+      (void)ImplicitDef;
+    }
+  }
+  // Make the original def partial.
+  DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
+          Reg, false, /*isImp=*/true));
+  LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
+  return true;
+}
 
-  return Modified;
+// Process a loop phi def, that is, a multi-def register where the dominating
+// def is in the loop pre-header and all other defs are in backedges. Here we
+// spot any WWM code inside that loop and then make the backedge defs partial
+// defs so the liveness goes through the WWM code.
+bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
+      MachineLoop *Loop) {
+  LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
+  // Check whether there is WWM code inside the loop.
+  bool ContainsWWM = false;
+  for (auto WWM : WWMs) {
+    if (Loop->contains(WWM->getParent())) {
+      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
+      ContainsWWM = true;
+      break;
+    }
+  }
+  if (!ContainsWWM)
+    return false;
+  unsigned Reg = DefOpnd->getReg();
+  // Remove kill mark from uses.
+  for (auto &Use : MRI->use_operands(Reg))
+    Use.setIsKill(false);
+  // Make all defs except the dominating one partial defs.
+  SmallVector<MachineInstr *, 4> Defs;
+  for (auto &Def : MRI->def_instructions(Reg))
+    Defs.push_back(&Def);
+  for (auto Def : Defs) {
+    if (DefOpnd->getParent() == Def)
+      continue;
+    Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+    LLVM_DEBUG(dbgs() << *Def);
+  }
+  return true;
 }
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
new file mode 100644
index 000000000000..ee39eb04d831
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -0,0 +1,231 @@
+//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// SIFixupVectorISel pass cleans up post ISEL Vector issues.
+/// Currently this will convert GLOBAL_{LOAD|STORE}_*
+/// and GLOBAL_Atomic_* instructions into their _SADDR variants,
+/// feeding the sreg into the saddr field of the new instruction.
+/// We currently handle a REG_SEQUENCE feeding the vaddr
+/// and decompose it into a base and index.
+///
+/// Transform:
+/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
+/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
+///                                    %24:vgpr_32, %19:sreg_64_xexec
+/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
+/// %11:vreg_64 = COPY %16:vreg_64
+/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
+/// Into:
+/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
+/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
+/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#define DEBUG_TYPE "si-fixup-vector-isel"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableGlobalSGPRAddr(
+  "amdgpu-enable-global-sgpr-addr",
+  cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
+  cl::init(false));
+
+STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
+STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
+
+namespace {
+
+class SIFixupVectorISel : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIFixupVectorISel() : MachineFunctionPass(ID) {
+    initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
+                "SI Fixup Vector ISel", false, false)
+
+char SIFixupVectorISel::ID = 0;
+
+char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
+
+FunctionPass *llvm::createSIFixupVectorISelPass() {
+  return new SIFixupVectorISel();
+}
+
+static bool findSRegBaseAndIndex(MachineOperand *Op,
+                                 unsigned &BaseReg,
+                                 unsigned &IndexReg,
+                                 MachineRegisterInfo &MRI,
+                                 const SIRegisterInfo *TRI) {
+  SmallVector<MachineOperand *, 8> Worklist;
+  Worklist.push_back(Op);
+  while (!Worklist.empty()) {
+    MachineOperand *WOp = Worklist.pop_back_val();
+    if (!WOp->isReg() ||
+        !TargetRegisterInfo::isVirtualRegister(WOp->getReg()))
+      continue;
+    MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
+    switch (DefInst->getOpcode()) {
+    default:
+      continue;
+    case AMDGPU::COPY:
+      Worklist.push_back(&DefInst->getOperand(1));
+      break;
+    case AMDGPU::REG_SEQUENCE:
+      if (DefInst->getNumOperands() != 5)
+        continue;
+      Worklist.push_back(&DefInst->getOperand(1));
+      Worklist.push_back(&DefInst->getOperand(3));
+      break;
+    case AMDGPU::V_ADD_I32_e64:
+      // The V_ADD_* and its analogous V_ADDCV_* are generated by
+      // a previous pass which lowered from an ADD_64_PSEUDO,
+      // which generates subregs to break up the 64 bit args.
+      if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
+        continue;
+      BaseReg = DefInst->getOperand(2).getReg();
+      if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
+        continue;
+      IndexReg = DefInst->getOperand(3).getReg();
+      // Chase the IndexReg.
+      MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
+      if (!MI || !MI->isCopy())
+        continue;
+      // Make sure the reg class is 64 bit for Index.
+      // If the Index register is a subreg, we want it to reference
+      // a 64 bit register which we will use as the Index reg.
+      const TargetRegisterClass *IdxRC, *BaseRC;
+      IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
+      if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
+        continue;
+      IndexReg = MI->getOperand(1).getReg();
+      // Chase the BaseReg.
+      MI = MRI.getUniqueVRegDef(BaseReg);
+      if (!MI || !MI->isCopy())
+        continue;
+      // Make sure the register class is 64 bit for Base.
+      BaseReg = MI->getOperand(1).getReg();
+      BaseRC = MRI.getRegClass(BaseReg);
+      if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
+        continue;
+      // Make sure Base is SReg and Index is VReg.
+      if (!TRI->isSGPRReg(MRI, BaseReg))
+        return false;
+      if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
+        return false;
+      // clear any killed flags on Index and Base regs, used later.
+      MRI.clearKillFlags(IndexReg);
+      MRI.clearKillFlags(BaseReg);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
+static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
+                             MachineFunction &MF,
+                             MachineRegisterInfo &MRI,
+                             const GCNSubtarget &ST,
+                             const SIInstrInfo *TII,
+                             const SIRegisterInfo *TRI) {
+  if (!EnableGlobalSGPRAddr)
+    return false;
+  bool FuncModified = false;
+  MachineBasicBlock::iterator I, Next;
+  for (I = MBB.begin(); I != MBB.end(); I = Next) {
+    Next = std::next(I);
+    MachineInstr &MI = *I;
+    int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
+    if (NewOpcd < 0)
+      continue;
+    // Update our statistics on opportunities seen.
+    ++NumSGPRGlobalOccurs;
+    LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
+    // Need a Base and Index or we cant transform to _SADDR.
+    unsigned BaseReg = 0;
+    unsigned IndexReg = 0;
+    MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+    if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
+      continue;
+    ++NumSGPRGlobalSaddrs;
+    FuncModified = true;
+    // Create the new _SADDR Memory instruction.
+    bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
+    MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
+    MachineInstr *NewGlob = nullptr;
+    NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
+    if (HasVdst)
+      NewGlob->addOperand(MF, MI.getOperand(0));
+    NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
+    if (VData)
+      NewGlob->addOperand(MF, *VData);
+    NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
+    NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
+
+    MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
+    // Atomics dont have a GLC, so omit the field if not there.
+    if (Glc)
+      NewGlob->addOperand(MF, *Glc);
+    NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
+    // _D16 have an vdst_in operand, copy it in.
+    MachineOperand *VDstInOp = TII->getNamedOperand(MI,
+                                      AMDGPU::OpName::vdst_in);
+    if (VDstInOp)
+      NewGlob->addOperand(MF, *VDstInOp);
+    NewGlob->copyImplicitOps(MF, MI);
+    NewGlob->cloneMemRefs(MF, MI);
+    // Remove the old Global Memop instruction.
+    MI.eraseFromParent();
+    LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
+  }
+  return FuncModified;
+}
+
+bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  bool FuncModified = false;
+  for (MachineBasicBlock &MBB : MF) {
+    // Cleanup missed Saddr opportunites from ISel.
+    FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
+  }
+  return FuncModified;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 338cabcb906b..f4e866958369 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -35,13 +35,16 @@ struct FoldCandidate {
     uint64_t ImmToFold;
     int FrameIndexToFold;
   };
+  int ShrinkOpcode;
   unsigned char UseOpNo;
   MachineOperand::MachineOperandType Kind;
   bool Commuted;
 
   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
-                bool Commuted_ = false) :
-    UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+                bool Commuted_ = false,
+                int ShrinkOp = -1) :
+    UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+    Kind(FoldOp->getType()),
     Commuted(Commuted_) {
     if (FoldOp->isImm()) {
       ImmToFold = FoldOp->getImm();
@@ -68,6 +71,14 @@ struct FoldCandidate {
   bool isCommuted() const {
     return Commuted;
   }
+
+  bool needsShrink() const {
+    return ShrinkOpcode != -1;
+  }
+
+  int getShrinkOpcode() const {
+    return ShrinkOpcode;
+  }
 };
 
 class SIFoldOperands : public MachineFunctionPass {
@@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
 }
 
 static bool updateOperand(FoldCandidate &Fold,
+                          const SIInstrInfo &TII,
                           const TargetRegisterInfo &TRI) {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
@@ -189,10 +201,49 @@ static bool updateOperand(FoldCandidate &Fold,
         Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
       }
     }
+
+    if (Fold.needsShrink()) {
+      MachineBasicBlock *MBB = MI->getParent();
+      auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+      if (Liveness != MachineBasicBlock::LQR_Dead)
+        return false;
+
+      MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+      int Op32 = Fold.getShrinkOpcode();
+      MachineOperand &Dst0 = MI->getOperand(0);
+      MachineOperand &Dst1 = MI->getOperand(1);
+      assert(Dst0.isDef() && Dst1.isDef());
+
+      bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+
+      const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+      unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
+      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+
+      MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+
+      if (HaveNonDbgCarryUse) {
+        BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+          .addReg(AMDGPU::VCC, RegState::Kill);
+      }
+
+      // Keep the old instruction around to avoid breaking iterators, but
+      // replace the outputs with dummy registers.
+      Dst0.setReg(NewReg0);
+      Dst1.setReg(NewReg1);
+
+      if (Fold.isCommuted())
+        TII.commuteInstruction(*Inst32, false);
+      return true;
+    }
+
     Old.ChangeToImmediate(Fold.ImmToFold);
     return true;
   }
 
+  assert(!Fold.needsShrink() && "not handled");
+
   if (Fold.isFI()) {
     Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
     return true;
@@ -261,6 +312,8 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
     if (isUseMIInFoldList(FoldList, MI))
       return false;
 
+    unsigned CommuteOpNo = OpNo;
+
     // Operand is not legal, so try to commute the instruction to
     // see if this makes it possible to fold.
     unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -269,11 +322,12 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
 
     if (CanCommute) {
       if (CommuteIdx0 == OpNo)
-        OpNo = CommuteIdx1;
+        CommuteOpNo = CommuteIdx1;
       else if (CommuteIdx1 == OpNo)
-        OpNo = CommuteIdx0;
+        CommuteOpNo = CommuteIdx0;
     }
 
+
     // One of operands might be an Imm operand, and OpNo may refer to it after
     // the call of commuteInstruction() below. Such situations are avoided
     // here explicitly as OpNo must be a register operand to be a candidate
@@ -286,12 +340,34 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
         !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
       return false;
 
-    if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+    if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+      if ((Opc == AMDGPU::V_ADD_I32_e64 ||
+           Opc == AMDGPU::V_SUB_I32_e64 ||
+           Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+          OpToFold->isImm()) {
+        MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+        // Verify the other operand is a VGPR, otherwise we would violate the
+        // constant bus restriction.
+        unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
+        MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+        if (!OtherOp.isReg() ||
+            !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
+          return false;
+
+        assert(MI->getOperand(1).isDef());
+
+        int Op32 =  AMDGPU::getVOPe32(Opc);
+        FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
+                                         Op32));
+        return true;
+      }
+
       TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
       return false;
     }
 
-    FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+    FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
     return true;
   }
 
@@ -362,8 +438,6 @@ void SIFoldOperands::foldOperand(
 
   bool FoldingImm = OpToFold.isImm();
 
-  // In order to fold immediates into copies, we need to change the
-  // copy to a MOV.
   if (FoldingImm && UseMI->isCopy()) {
     unsigned DestReg = UseMI->getOperand(0).getReg();
     const TargetRegisterClass *DestRC
@@ -371,6 +445,31 @@ void SIFoldOperands::foldOperand(
       MRI->getRegClass(DestReg) :
       TRI->getPhysRegClass(DestReg);
 
+    unsigned SrcReg  = UseMI->getOperand(1).getReg();
+    if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+      TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
+      if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
+        MachineRegisterInfo::use_iterator NextUse;
+        SmallVector<FoldCandidate, 4> CopyUses;
+        for (MachineRegisterInfo::use_iterator
+          Use = MRI->use_begin(DestReg), E = MRI->use_end();
+          Use != E; Use = NextUse) {
+          NextUse = std::next(Use);
+          FoldCandidate FC = FoldCandidate(Use->getParent(),
+           Use.getOperandNo(), &UseMI->getOperand(1));
+          CopyUses.push_back(FC);
+       }
+        for (auto & F : CopyUses) {
+          foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo,
+           FoldList, CopiesToReplace);
+        }
+      }
+    }
+
+    // In order to fold immediates into copies, we need to change the
+    // copy to a MOV.
+
     unsigned MovOp = TII->getMovOpcode(DestRC);
     if (MovOp == AMDGPU::COPY)
       return;
@@ -378,6 +477,20 @@ void SIFoldOperands::foldOperand(
     UseMI->setDesc(TII->get(MovOp));
     CopiesToReplace.push_back(UseMI);
   } else {
+    if (UseMI->isCopy() && OpToFold.isReg() &&
+        TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
+        TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
+        TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+        TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
+        !UseMI->getOperand(1).getSubReg()) {
+      UseMI->getOperand(1).setReg(OpToFold.getReg());
+      UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
+      UseMI->getOperand(1).setIsKill(false);
+      CopiesToReplace.push_back(UseMI);
+      OpToFold.setIsKill(false);
+      return;
+    }
+
     const MCInstrDesc &UseDesc = UseMI->getDesc();
 
     // Don't fold into target independent nodes.  Target independent opcodes
@@ -550,6 +663,19 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
   if (!Src0->isImm() && !Src1->isImm())
     return false;
 
+  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+    if (Src0->isImm() && Src0->getImm() == 0) {
+      // v_lshl_or_b32 0, X, Y -> copy Y
+      // v_lshl_or_b32 0, X, K -> v_mov_b32 K
+      bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
+      MI->RemoveOperand(Src1Idx);
+      MI->RemoveOperand(Src0Idx);
+
+      MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
+      return true;
+    }
+  }
+
   // and k0, k1 -> v_mov_b32 (k0 & k1)
   // or k0, k1 -> v_mov_b32 (k0 | k1)
   // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
@@ -728,13 +854,17 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
     }
   } else {
     // Folding register.
+    SmallVector <MachineRegisterInfo::use_iterator, 4> UsesToProcess;
     for (MachineRegisterInfo::use_iterator
            Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end();
          Use != E; ++Use) {
-      MachineInstr *UseMI = Use->getParent();
+      UsesToProcess.push_back(Use);
+    }
+    for (auto U : UsesToProcess) {
+      MachineInstr *UseMI = U->getParent();
 
-      foldOperand(OpToFold, UseMI, Use.getOperandNo(),
-                  FoldList, CopiesToReplace);
+      foldOperand(OpToFold, UseMI, U.getOperandNo(),
+        FoldList, CopiesToReplace);
     }
   }
 
@@ -744,7 +874,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
     Copy->addImplicitDefUseOperands(*MF);
 
   for (FoldCandidate &Fold : FoldList) {
-    if (updateOperand(Fold, *TRI)) {
+    if (updateOperand(Fold, *TII, *TRI)) {
       // Clear kill flags.
       if (Fold.isReg()) {
         assert(Fold.OpToFold && Fold.OpToFold->isReg());
@@ -981,9 +1111,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
   // omod is ignored by hardware if IEEE bit is enabled. omod also does not
   // correctly handle signed zeros.
   //
-  // TODO: Check nsz on instructions when fast math flags are preserved to MI
-  // level.
-  bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
+  bool IsIEEEMode = ST->enableIEEEBit(MF);
+  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
 
   for (MachineBasicBlock *MBB : depth_first(&MF)) {
     MachineBasicBlock::iterator I, Next;
@@ -994,7 +1123,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       tryFoldInst(TII, &MI);
 
       if (!TII->isFoldableCopy(MI)) {
-        if (IsIEEEMode || !tryFoldOMod(MI))
+        // TODO: Omod might be OK if there is NSZ only on the source
+        // instruction, and not the omod multiply.
+        if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
+            !tryFoldOMod(MI))
           tryFoldClamp(MI);
         continue;
       }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index cd14239de822..aa976d5141f8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -168,16 +168,15 @@ void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
     CoveringSubregs.push_back(Idx);
   }
 
-  llvm::sort(CoveringSubregs.begin(), CoveringSubregs.end(),
-             [this](unsigned A, unsigned B) {
-               LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
-               LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
-               unsigned NA = MaskA.getNumLanes();
-               unsigned NB = MaskB.getNumLanes();
-               if (NA != NB)
-                 return NA > NB;
-               return MaskA.getHighestLane() > MaskB.getHighestLane();
-             });
+  llvm::sort(CoveringSubregs, [this](unsigned A, unsigned B) {
+    LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
+    LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
+    unsigned NA = MaskA.getNumLanes();
+    unsigned NB = MaskB.getNumLanes();
+    if (NA != NB)
+      return NA > NB;
+    return MaskA.getHighestLane() > MaskB.getHighestLane();
+  });
 
   for (unsigned Idx : CoveringSubregs) {
     LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ac0ef90f25a4..e4633c88e18f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -289,7 +289,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
     AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
 
   unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
-  if (ST.isAmdCodeObjectV2(F)) {
+  if (ST.isAmdHsaOrMesa(F)) {
     PreloadedPrivateBufferReg = MFI->getPreloadedReg(
       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
   }
@@ -308,7 +308,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
   }
 
   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
-    assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F));
+    assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
     MRI.addLiveIn(PreloadedPrivateBufferReg);
     MBB.addLiveIn(PreloadedPrivateBufferReg);
   }
@@ -333,7 +333,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
 
   bool CopyBuffer = ResourceRegUsed &&
     PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
-    ST.isAmdCodeObjectV2(F) &&
+    ST.isAmdHsaOrMesa(F) &&
     ScratchRsrcReg != PreloadedPrivateBufferReg;
 
   // This needs to be careful of the copying order to avoid overwriting one of
@@ -433,7 +433,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
   }
   if (ST.isMesaGfxShader(Fn)
       || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
-    assert(!ST.isAmdCodeObjectV2(Fn));
+    assert(!ST.isAmdHsaOrMesa(Fn));
     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
 
     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 25007861fd15..0ba921647097 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER) || defined(__MINGW32__)
 // Provide M_PI.
 #define _USE_MATH_DEFINES
 #endif
@@ -156,12 +156,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
   setOperationAction(ISD::LOAD, MVT::i1, Custom);
+  setOperationAction(ISD::LOAD, MVT::v32i32, Custom);
 
   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
   setOperationAction(ISD::STORE, MVT::v8i32, Custom);
   setOperationAction(ISD::STORE, MVT::v16i32, Custom);
   setOperationAction(ISD::STORE, MVT::i1, Custom);
+  setOperationAction(ISD::STORE, MVT::v32i32, Custom);
 
   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
@@ -207,11 +209,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
 
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
@@ -232,6 +237,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
   setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
 
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+
 #if 0
   setOperationAction(ISD::ADDCARRY, MVT::i64, Legal);
   setOperationAction(ISD::SUBCARRY, MVT::i64, Legal);
@@ -240,7 +249,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   // We only support LOAD/STORE and vector manipulation ops for vectors
   // with > 4 elements.
   for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
-        MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16 }) {
+        MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch (Op) {
       case ISD::LOAD:
@@ -339,6 +348,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::FLOG, MVT::f16, Custom);
+    setOperationAction(ISD::FEXP, MVT::f16, Custom);
     setOperationAction(ISD::FLOG10, MVT::f16, Custom);
   }
 
@@ -375,8 +385,20 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   if (Subtarget->hasBFE())
     setHasExtractBitsInsn(true);
 
-  setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
-  setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Custom);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Custom);
+  setOperationAction(ISD::FMINNUM, MVT::f64, Custom);
+  setOperationAction(ISD::FMAXNUM, MVT::f64, Custom);
+
+
+  // These are really only legal for ieee_mode functions. We should be avoiding
+  // them for functions that don't have ieee_mode enabled, so just say they are
+  // legal.
+  setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+  setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+  setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+
 
   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -465,8 +487,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     // F16 - VOP2 Actions.
     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
-    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+
     setOperationAction(ISD::FDIV, MVT::f16, Custom);
 
     // F16 - VOP3 Actions.
@@ -549,6 +570,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     // This isn't really legal, but this avoids the legalizer unrolling it (and
     // allows matching fneg (fabs x) patterns)
     setOperationAction(ISD::FABS, MVT::v2f16, Legal);
+
+    setOperationAction(ISD::FMAXNUM, MVT::f16, Custom);
+    setOperationAction(ISD::FMINNUM, MVT::f16, Custom);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f16, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f16, Legal);
+
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+
+    setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
+    setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
   }
 
   if (Subtarget->hasVOP3PInsts()) {
@@ -566,8 +598,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FADD, MVT::v2f16, Legal);
     setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
     setOperationAction(ISD::FMA, MVT::v2f16, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v2f16, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::v2f16, Legal);
+
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::v2f16, Legal);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::v2f16, Legal);
+
     setOperationAction(ISD::FCANONICALIZE, MVT::v2f16, Legal);
 
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
@@ -587,9 +621,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::FADD, MVT::v4f16, Custom);
     setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
+
+    setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
+    setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
+
     setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
     setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
+    setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
 
+    setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
     setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
     setOperationAction(ISD::SELECT, MVT::v4f16, Custom);
   }
@@ -623,6 +663,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::FSUB);
   setTargetDAGCombine(ISD::FMINNUM);
   setTargetDAGCombine(ISD::FMAXNUM);
+  setTargetDAGCombine(ISD::FMINNUM_IEEE);
+  setTargetDAGCombine(ISD::FMAXNUM_IEEE);
   setTargetDAGCombine(ISD::FMA);
   setTargetDAGCombine(ISD::SMIN);
   setTargetDAGCombine(ISD::SMAX);
@@ -638,7 +680,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
-  setTargetDAGCombine(ISD::BUILD_VECTOR);
+  setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
 
   // All memory operations. Some folding on the pointer operand is done to help
   // matching the constant offsets in the addressing modes.
@@ -707,9 +749,7 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
     if (Size == 64)
       return MVT::i32;
 
-    if (Size == 16 &&
-        Subtarget->has16BitInsts() &&
-        isPowerOf2_32(VT.getVectorNumElements()))
+    if (Size == 16 && Subtarget->has16BitInsts())
       return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
   }
 
@@ -730,9 +770,8 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
     if (Size == 64)
       return 2 * NumElts;
 
-    // FIXME: Fails to break down as we want with v3.
-    if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts))
-      return VT.getVectorNumElements() / 2;
+    if (Size == 16 && Subtarget->has16BitInsts())
+      return (VT.getVectorNumElements() + 1) / 2;
   }
 
   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -763,10 +802,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
     // FIXME: We should fix the ABI to be the same on targets without 16-bit
     // support, but unless we can properly handle 3-vectors, it will be still be
     // inconsistent.
-    if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) {
+    if (Size == 16 && Subtarget->has16BitInsts()) {
       RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
       IntermediateVT = RegisterVT;
-      NumIntermediates = NumElts / 2;
+      NumIntermediates = (NumElts + 1) / 2;
       return NumIntermediates;
     }
   }
@@ -775,6 +814,47 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
     Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
 }
 
+static MVT memVTFromAggregate(Type *Ty) {
+  // Only limited forms of aggregate type currently expected.
+  assert(Ty->isStructTy() && "Expected struct type");
+
+
+  Type *ElementType = nullptr;
+  unsigned NumElts;
+  if (Ty->getContainedType(0)->isVectorTy()) {
+    VectorType *VecComponent = cast<VectorType>(Ty->getContainedType(0));
+    ElementType = VecComponent->getElementType();
+    NumElts = VecComponent->getNumElements();
+  } else {
+    ElementType = Ty->getContainedType(0);
+    NumElts = 1;
+  }
+
+  assert((Ty->getContainedType(1) && Ty->getContainedType(1)->isIntegerTy(32)) && "Expected int32 type");
+
+  // Calculate the size of the memVT type from the aggregate
+  unsigned Pow2Elts = 0;
+  unsigned ElementSize;
+  switch (ElementType->getTypeID()) {
+    default:
+      llvm_unreachable("Unknown type!");
+    case Type::IntegerTyID:
+      ElementSize = cast<IntegerType>(ElementType)->getBitWidth();
+      break;
+    case Type::HalfTyID:
+      ElementSize = 16;
+      break;
+    case Type::FloatTyID:
+      ElementSize = 32;
+      break;
+  }
+  unsigned AdditionalElts = ElementSize == 16 ? 2 : 1;
+  Pow2Elts = 1 << Log2_32_Ceil(NumElts + AdditionalElts);
+
+  return MVT::getVectorVT(MVT::getVT(ElementType, false),
+                          Pow2Elts);
+}
+
 bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                           const CallInst &CI,
                                           MachineFunction &MF,
@@ -802,7 +882,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.flags = MachineMemOperand::MODereferenceable;
     if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
       Info.opc = ISD::INTRINSIC_W_CHAIN;
-      Info.memVT = MVT::getVT(CI.getType());
+      Info.memVT = MVT::getVT(CI.getType(), true);
+      if (Info.memVT == MVT::Other) {
+        // Some intrinsics return an aggregate type - special case to work out
+        // the correct memVT
+        Info.memVT = memVTFromAggregate(CI.getType());
+      }
       Info.flags |= MachineMemOperand::MOLoad;
     } else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
       Info.opc = ISD::INTRINSIC_VOID;
@@ -941,11 +1026,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
   if (AM.BaseGV)
     return false;
 
-  if (AS == AMDGPUASI.GLOBAL_ADDRESS)
+  if (AS == AMDGPUAS::GLOBAL_ADDRESS)
     return isLegalGlobalAddressingMode(AM);
 
-  if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
-      AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
+  if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
     // If the offset isn't a multiple of 4, it probably isn't going to be
     // correctly aligned.
     // FIXME: Can we get the real alignment here?
@@ -983,10 +1068,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
 
     return false;
 
-  } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+  } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
     return isLegalMUBUFAddressingMode(AM);
-  } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
-             AS == AMDGPUASI.REGION_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+             AS == AMDGPUAS::REGION_ADDRESS) {
     // Basic, single offset DS instructions allow a 16-bit unsigned immediate
     // field.
     // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -1001,8 +1086,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
       return true;
 
     return false;
-  } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
-             AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
+  } else if (AS == AMDGPUAS::FLAT_ADDRESS ||
+             AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
     // For an unknown address space, this usually means that this is for some
     // reason being used for pure arithmetic, and not based on some addressing
     // computation. We don't have instructions that compute pointers with any
@@ -1016,12 +1101,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
 
 bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
                                         const SelectionDAG &DAG) const {
-  if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) {
+  if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
     return (MemVT.getSizeInBits() <= 4 * 32);
-  } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+  } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
     unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
     return (MemVT.getSizeInBits() <= MaxPrivateBits);
-  } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
     return (MemVT.getSizeInBits() <= 2 * 32);
   }
   return true;
@@ -1043,8 +1128,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
     return false;
   }
 
-  if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
-      AddrSpace == AMDGPUASI.REGION_ADDRESS) {
+  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::REGION_ADDRESS) {
     // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
     // aligned, 8 byte access in a single operation using ds_read2/write2_b32
     // with adjacent offsets.
@@ -1059,17 +1144,21 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   // will access scratch.  If we had access to the IR function, then we
   // could determine if any private memory was used in the function.
   if (!Subtarget->hasUnalignedScratchAccess() &&
-      (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
-       AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
-    return false;
+      (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
+       AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+    bool AlignedBy4 = Align >= 4;
+    if (IsFast)
+      *IsFast = AlignedBy4;
+
+    return AlignedBy4;
   }
 
   if (Subtarget->hasUnalignedBufferAccess()) {
     // If we have an uniform constant load, it still requires using a slow
     // buffer instruction if unaligned.
     if (IsFast) {
-      *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS ||
-                 AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ?
+      *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
+                 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
         (Align % 4 == 0) : true;
     }
 
@@ -1109,17 +1198,15 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
   return MVT::Other;
 }
 
-static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
-  return AS == AMDGPUASI.GLOBAL_ADDRESS ||
-         AS == AMDGPUASI.FLAT_ADDRESS ||
-         AS == AMDGPUASI.CONSTANT_ADDRESS ||
-         AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+static bool isFlatGlobalAddrSpace(unsigned AS) {
+  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+         AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
 }
 
 bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
                                            unsigned DestAS) const {
-  return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
-         isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
+  return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
 }
 
 bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@@ -1133,7 +1220,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
                                             unsigned DestAS) const {
   // Flat -> private/local is a simple truncate.
   // Flat -> global is no-op
-  if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
+  if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
     return true;
 
   return isNoopAddrSpaceCast(SrcAS, DestAS);
@@ -1146,7 +1233,7 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
 }
 
 TargetLoweringBase::LegalizeTypeAction
-SITargetLowering::getPreferredVectorAction(EVT VT) const {
+SITargetLowering::getPreferredVectorAction(MVT VT) const {
   if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
     return TypeSplitVector;
 
@@ -1200,7 +1287,7 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
     = Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
 
   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-  MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
+  MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
   SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
     MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
 
@@ -1240,7 +1327,7 @@ SDValue SITargetLowering::lowerKernargMemParameter(
   uint64_t Offset, unsigned Align, bool Signed,
   const ISD::InputArg *Arg) const {
   Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
-  PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
 
   // Try to avoid using an extload by loading earlier than the argument address,
@@ -1349,7 +1436,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
   for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
     const ISD::InputArg *Arg = &Ins[I];
 
-    assert(!Arg->VT.isVector() && "vector type argument should have been split");
+    assert((!Arg->VT.isVector() || Arg->VT.getScalarSizeInBits() == 16) &&
+           "vector type argument should have been split");
 
     // First check if it's a PS input addr.
     if (CallConv == CallingConv::AMDGPU_PS &&
@@ -1642,7 +1730,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
   bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (ST.isAmdCodeObjectV2(MF.getFunction())) {
+  if (ST.isAmdHsaOrMesa(MF.getFunction())) {
     if (RequiresStackAccess) {
       // If we have stack objects, we unquestionably need the private buffer
       // resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -1951,29 +2039,6 @@ SDValue SITargetLowering::LowerFormalArguments(
       llvm_unreachable("Unknown loc info!");
     }
 
-    if (IsShader && Arg.VT.isVector()) {
-      // Build a vector from the registers
-      Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
-      unsigned NumElements = ParamType->getVectorNumElements();
-
-      SmallVector<SDValue, 4> Regs;
-      Regs.push_back(Val);
-      for (unsigned j = 1; j != NumElements; ++j) {
-        Reg = ArgLocs[ArgIdx++].getLocReg();
-        Reg = MF.addLiveIn(Reg, RC);
-
-        SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
-        Regs.push_back(Copy);
-      }
-
-      // Fill up the missing vector elements
-      NumElements = Arg.VT.getVectorNumElements() - NumElements;
-      Regs.append(NumElements, DAG.getUNDEF(VT));
-
-      InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
-      continue;
-    }
-
     InVals.push_back(Val);
   }
 
@@ -2037,48 +2102,19 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 
   bool IsShader = AMDGPU::isShader(CallConv);
 
-  Info->setIfReturnsVoid(Outs.size() == 0);
+  Info->setIfReturnsVoid(Outs.empty());
   bool IsWaveEnd = Info->returnsVoid() && IsShader;
 
-  SmallVector<ISD::OutputArg, 48> Splits;
-  SmallVector<SDValue, 48> SplitVals;
-
-  // Split vectors into their elements.
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    const ISD::OutputArg &Out = Outs[i];
-
-    if (IsShader && Out.VT.isVector()) {
-      MVT VT = Out.VT.getVectorElementType();
-      ISD::OutputArg NewOut = Out;
-      NewOut.Flags.setSplit();
-      NewOut.VT = VT;
-
-      // We want the original number of vector elements here, e.g.
-      // three or five, not four or eight.
-      unsigned NumElements = Out.ArgVT.getVectorNumElements();
-
-      for (unsigned j = 0; j != NumElements; ++j) {
-        SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
-                                   DAG.getConstant(j, DL, MVT::i32));
-        SplitVals.push_back(Elem);
-        Splits.push_back(NewOut);
-        NewOut.PartOffset += NewOut.VT.getStoreSize();
-      }
-    } else {
-      SplitVals.push_back(OutVals[i]);
-      Splits.push_back(Out);
-    }
-  }
-
   // CCValAssign - represent the assignment of the return value to a location.
   SmallVector<CCValAssign, 48> RVLocs;
+  SmallVector<ISD::OutputArg, 48> Splits;
 
   // CCState - Info about the registers and stack slots.
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                  *DAG.getContext());
 
   // Analyze outgoing return values.
-  CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg));
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
 
   SDValue Flag;
   SmallVector<SDValue, 48> RetOps;
@@ -2103,14 +2139,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   }
 
   // Copy the result values into the output registers.
-  for (unsigned i = 0, realRVLocIdx = 0;
-       i != RVLocs.size();
-       ++i, ++realRVLocIdx) {
-    CCValAssign &VA = RVLocs[i];
+  for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
+       ++I, ++RealRVLocIdx) {
+    CCValAssign &VA = RVLocs[I];
     assert(VA.isRegLoc() && "Can only return in registers!");
     // TODO: Partially return in registers if return values don't fit.
-
-    SDValue Arg = SplitVals[realRVLocIdx];
+    SDValue Arg = OutVals[RealRVLocIdx];
 
     // Copied from other backends.
     switch (VA.getLocInfo()) {
@@ -2225,11 +2259,11 @@ SDValue SITargetLowering::LowerCallResult(
 // from the explicit user arguments present in the IR.
 void SITargetLowering::passSpecialInputs(
     CallLoweringInfo &CLI,
+    CCState &CCInfo,
     const SIMachineFunctionInfo &Info,
     SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
     SmallVectorImpl<SDValue> &MemOpChains,
-    SDValue Chain,
-    SDValue StackPtr) const {
+    SDValue Chain) const {
   // If we don't have a call site, this was a call inserted by
   // legalization. These can never use special inputs.
   if (!CLI.CS)
@@ -2297,9 +2331,9 @@ void SITargetLowering::passSpecialInputs(
     if (OutgoingArg->isRegister()) {
       RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
     } else {
-      SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, StackPtr,
-                                              InputReg,
-                                              OutgoingArg->getStackOffset());
+      unsigned SpecialArgOffset = CCInfo.AllocateStack(ArgVT.getStoreSize(), 4);
+      SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+                                              SpecialArgOffset);
       MemOpChains.push_back(ArgStore);
     }
   }
@@ -2424,6 +2458,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
                               "unsupported call to variadic function ");
   }
 
+  if (!CLI.CS.getInstruction())
+    report_fatal_error("unsupported libcall legalization");
+
   if (!CLI.CS.getCalledFunction()) {
     return lowerUnhandledCall(CLI, InVals,
                               "unsupported indirect call to function ");
@@ -2442,8 +2479,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
   }
 
   // The first 4 bytes are reserved for the callee's emergency stack slot.
-  const unsigned CalleeUsableStackOffset = 4;
-
   if (IsTailCall) {
     IsTailCall = isEligibleForTailCallOptimization(
       Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -2463,25 +2498,16 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
       ++NumTailCalls;
   }
 
-  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    // FIXME: Remove this hack for function pointer types after removing
-    // support of old address space mapping. In the new address space
-    // mapping the pointer in default address space is 64 bit, therefore
-    // does not need this hack.
-    if (Callee.getValueType() == MVT::i32) {
-      const GlobalValue *GV = GA->getGlobal();
-      Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false,
-                                    GA->getTargetFlags());
-    }
-  }
-  assert(Callee.getValueType() == MVT::i64);
-
   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
+
+  // The first 4 bytes are reserved for the callee's emergency stack slot.
+  CCInfo.AllocateStack(4, 4);
+
   CCInfo.AnalyzeCallOperands(Outs, AssignFn);
 
   // Get a count of how many bytes are to be pushed on the stack.
@@ -2529,10 +2555,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
     }
   }
 
-  // Stack pointer relative accesses are done by changing the offset SGPR. This
-  // is just the VGPR offset component.
-  SDValue StackPtr = DAG.getConstant(CalleeUsableStackOffset, DL, MVT::i32);
-
   SmallVector<SDValue, 8> MemOpChains;
   MVT PtrVT = MVT::i32;
 
@@ -2576,18 +2598,22 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
       unsigned LocMemOffset = VA.getLocMemOffset();
       int32_t Offset = LocMemOffset;
 
-      SDValue PtrOff = DAG.getObjectPtrOffset(DL, StackPtr, Offset);
+      SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
+      unsigned Align = 0;
 
       if (IsTailCall) {
         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
         unsigned OpSize = Flags.isByVal() ?
           Flags.getByValSize() : VA.getValVT().getStoreSize();
 
+        // FIXME: We can have better than the minimum byval required alignment.
+        Align = Flags.isByVal() ? Flags.getByValAlign() :
+          MinAlign(Subtarget->getStackAlignment(), Offset);
+
         Offset = Offset + FPDiff;
         int FI = MFI.CreateFixedObject(OpSize, Offset, true);
 
-        DstAddr = DAG.getObjectPtrOffset(DL, DAG.getFrameIndex(FI, PtrVT),
-                                         StackPtr);
+        DstAddr = DAG.getFrameIndex(FI, PtrVT);
         DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
 
         // Make sure any stack arguments overlapping with where we're storing
@@ -2601,6 +2627,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
       } else {
         DstAddr = PtrOff;
         DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
+        Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset);
       }
 
       if (Outs[i].Flags.isByVal()) {
@@ -2611,18 +2638,18 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
             /*isVol = */ false, /*AlwaysInline = */ true,
             /*isTailCall = */ false, DstInfo,
             MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy(
-                *DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS))));
+                *DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))));
 
         MemOpChains.push_back(Cpy);
       } else {
-        SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
+        SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align);
         MemOpChains.push_back(Store);
       }
     }
   }
 
   // Copy special input registers after user input arguments.
-  passSpecialInputs(CLI, *Info, RegsToPass, MemOpChains, Chain, StackPtr);
+  passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
 
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -3460,7 +3487,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
       MIB.add(MI.getOperand(I));
 
-    MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    MIB.cloneMemRefs(MI);
     MI.eraseFromParent();
     return BB;
   }
@@ -3628,7 +3655,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return lowerDEBUGTRAP(Op, DAG);
   case ISD::FABS:
   case ISD::FNEG:
+  case ISD::FCANONICALIZE:
     return splitUnaryVectorOp(Op, DAG);
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+    return lowerFMINNUM_FMAXNUM(Op, DAG);
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:
@@ -3639,10 +3670,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
-  case ISD::FMINNUM:
-  case ISD::FMAXNUM:
   case ISD::FADD:
   case ISD::FMUL:
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
     return splitBinaryVectorOp(Op, DAG);
   }
   return SDValue();
@@ -3678,18 +3709,9 @@ static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
 SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
                                               MemSDNode *M,
                                               SelectionDAG &DAG,
+                                              ArrayRef<SDValue> Ops,
                                               bool IsIntrinsic) const {
   SDLoc DL(M);
-  SmallVector<SDValue, 10> Ops;
-  Ops.reserve(M->getNumOperands());
-
-  Ops.push_back(M->getOperand(0));
-  if (IsIntrinsic)
-    Ops.push_back(DAG.getConstant(Opcode, DL, MVT::i32));
-
-  // Skip 1, as it is the intrinsic ID.
-  for (unsigned I = 2, E = M->getNumOperands(); I != E; ++I)
-    Ops.push_back(M->getOperand(I));
 
   bool Unpacked = Subtarget->hasUnpackedD16VMem();
   EVT LoadVT = M->getValueType(0);
@@ -3717,6 +3739,69 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
   return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
 }
 
+static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
+                                  SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+  if (!CD)
+    return DAG.getUNDEF(VT);
+
+  int CondCode = CD->getSExtValue();
+  if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
+      CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
+    return DAG.getUNDEF(VT);
+
+  ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
+
+
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+
+  SDLoc DL(N);
+
+  EVT CmpVT = LHS.getValueType();
+  if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) {
+    unsigned PromoteOp = ICmpInst::isSigned(IcInput) ?
+      ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS);
+    RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS);
+  }
+
+  ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
+
+  return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
+                     DAG.getCondCode(CCOpcode));
+}
+
+static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
+                                  SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+  if (!CD)
+    return DAG.getUNDEF(VT);
+
+  int CondCode = CD->getSExtValue();
+  if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
+      CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) {
+    return DAG.getUNDEF(VT);
+  }
+
+  SDValue Src0 = N->getOperand(1);
+  SDValue Src1 = N->getOperand(2);
+  EVT CmpVT = Src0.getValueType();
+  SDLoc SL(N);
+
+  if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) {
+    Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+    Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+  }
+
+  FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
+  ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
+  return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
+                     Src1, DAG.getCondCode(CCOpcode));
+}
+
 void SITargetLowering::ReplaceNodeResults(SDNode *N,
                                           SmallVectorImpl<SDValue> &Results,
                                           SelectionDAG &DAG) const {
@@ -3761,8 +3846,13 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
       else
         Opcode = AMDGPUISD::CVT_PK_U16_U32;
 
-      SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
-      Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+      EVT VT = N->getValueType(0);
+      if (isTypeLegal(VT))
+        Results.push_back(DAG.getNode(Opcode, SL, VT, Src0, Src1));
+      else {
+        SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
+        Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+      }
       return;
     }
     }
@@ -3895,15 +3985,15 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
 
 bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
   const Triple &TT = getTargetMachine().getTargetTriple();
-  return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
-          GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+  return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
          AMDGPU::shouldEmitConstantsToTextSection(TT);
 }
 
 bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
-  return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
-          GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
-          GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+  return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
          !shouldEmitFixup(GV) &&
          !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
 }
@@ -4038,6 +4128,23 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
 }
 
+SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+
+  // FIXME: Assert during eslection that this is only selected for
+  // ieee_mode. Currently a combine can produce the ieee version for non-ieee
+  // mode functions, but this happens to be OK since it's only done in cases
+  // where there is known no sNaN.
+  if (IsIEEEMode)
+    return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
+
+  if (VT == MVT::v4f16)
+    return splitBinaryVectorOp(Op, DAG);
+  return Op;
+}
+
 SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Chain = Op.getOperand(0);
@@ -4091,10 +4198,10 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
                                              SelectionDAG &DAG) const {
   // FIXME: Use inline constants (src_{shared, private}_base) instead.
   if (Subtarget->hasApertureRegs()) {
-    unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
+    unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
         AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
         AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
-    unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
+    unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
         AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
         AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
     unsigned Encoding =
@@ -4119,7 +4226,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
 
   // Offset into amd_queue_t for group_segment_aperture_base_hi /
   // private_segment_aperture_base_hi.
-  uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
+  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
 
   SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset);
 
@@ -4127,7 +4234,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
   // TODO: We should use the value from the IR intrinsic call, but it might not
   // be available and how do we get it?
   Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
-                                              AMDGPUASI.CONSTANT_ADDRESS));
+                                              AMDGPUAS::CONSTANT_ADDRESS));
 
   MachinePointerInfo PtrInfo(V, StructOffset);
   return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
@@ -4148,11 +4255,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
     static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
 
   // flat -> local/private
-  if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+  if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
     unsigned DestAS = ASC->getDestAddressSpace();
 
-    if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
-        DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
+    if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+        DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
       unsigned NullVal = TM.getNullPointerValue(DestAS);
       SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
       SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
@@ -4164,11 +4271,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
   }
 
   // local/private -> flat
-  if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+  if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
     unsigned SrcAS = ASC->getSrcAddressSpace();
 
-    if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
-        SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
+    if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+        SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
       unsigned NullVal = TM.getNullPointerValue(SrcAS);
       SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
 
@@ -4335,30 +4442,39 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
   }
 
   assert(VT == MVT::v2f16 || VT == MVT::v2i16);
+  assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
 
   SDValue Lo = Op.getOperand(0);
   SDValue Hi = Op.getOperand(1);
 
-  Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
-  Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
+  // Avoid adding defined bits with the zero_extend.
+  if (Hi.isUndef()) {
+    Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+    SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
+    return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
+  }
 
-  Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
+  Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
   Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
 
   SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
                               DAG.getConstant(16, SL, MVT::i32));
+  if (Lo.isUndef())
+    return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
 
-  SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
+  Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+  Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
 
+  SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
   return DAG.getNode(ISD::BITCAST, SL, VT, Or);
 }
 
 bool
 SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // We can fold offsets for anything that doesn't require a GOT relocation.
-  return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
-          GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
-          GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
+  return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+          GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+          GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
          !shouldEmitGOTReloc(GA->getGlobal());
 }
 
@@ -4409,18 +4525,15 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                              SelectionDAG &DAG) const {
   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GSD->getGlobal();
-
-  if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
-      GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
-      GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS &&
-      // FIXME: It isn't correct to rely on the type of the pointer. This should
-      // be removed when address space 0 is 64-bit.
-      !GV->getType()->getElementType()->isFunctionTy())
+  if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+      GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
+      GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
 
   SDLoc DL(GSD);
   EVT PtrVT = Op.getValueType();
 
+  // FIXME: Should not make address space based decisions here.
   if (shouldEmitFixup(GV))
     return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
   else if (shouldEmitPCReloc(GV))
@@ -4431,11 +4544,11 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                             SIInstrInfo::MO_GOTPCREL32);
 
   Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
-  PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
   const DataLayout &DataLayout = DAG.getDataLayout();
   unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
-  // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
-  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+  MachinePointerInfo PtrInfo
+    = MachinePointerInfo::getGOT(DAG.getMachineFunction());
 
   return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align,
                      MachineMemOperand::MODereferenceable |
@@ -4547,11 +4660,115 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
   return Value == 0;
 }
 
+// Re-construct the required return value for a image load intrinsic.
+// This is more complicated due to the optional use TexFailCtrl which means the required
+// return type is an aggregate
+static SDValue constructRetValue(SelectionDAG &DAG,
+                                 MachineSDNode *Result,
+                                 ArrayRef<EVT> ResultTypes,
+                                 bool IsTexFail, bool Unpacked, bool IsD16,
+                                 int DMaskPop, int NumVDataDwords,
+                                 const SDLoc &DL, LLVMContext &Context) {
+  // Determine the required return type. This is the same regardless of IsTexFail flag
+  EVT ReqRetVT = ResultTypes[0];
+  EVT ReqRetEltVT = ReqRetVT.isVector() ? ReqRetVT.getVectorElementType() : ReqRetVT;
+  int ReqRetNumElts = ReqRetVT.isVector() ? ReqRetVT.getVectorNumElements() : 1;
+  EVT AdjEltVT = Unpacked && IsD16 ? MVT::i32 : ReqRetEltVT;
+  EVT AdjVT = Unpacked ? ReqRetNumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, ReqRetNumElts)
+                                           : AdjEltVT
+                       : ReqRetVT;
+
+  // Extract data part of the result
+  // Bitcast the result to the same type as the required return type
+  int NumElts;
+  if (IsD16 && !Unpacked)
+    NumElts = NumVDataDwords << 1;
+  else
+    NumElts = NumVDataDwords;
+
+  EVT CastVT = NumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, NumElts)
+                           : AdjEltVT;
+
+  // Special case for v8f16. Rather than add support for this, use v4i32 to
+  // extract the data elements
+  bool V8F16Special = false;
+  if (CastVT == MVT::v8f16) {
+    CastVT = MVT::v4i32;
+    DMaskPop >>= 1;
+    ReqRetNumElts >>= 1;
+    V8F16Special = true;
+    AdjVT = MVT::v2i32;
+  }
+
+  SDValue N = SDValue(Result, 0);
+  SDValue CastRes = DAG.getNode(ISD::BITCAST, DL, CastVT, N);
+
+  // Iterate over the result
+  SmallVector<SDValue, 4> BVElts;
+
+  if (CastVT.isVector()) {
+    DAG.ExtractVectorElements(CastRes, BVElts, 0, DMaskPop);
+  } else {
+    BVElts.push_back(CastRes);
+  }
+  int ExtraElts = ReqRetNumElts - DMaskPop;
+  while(ExtraElts--)
+    BVElts.push_back(DAG.getUNDEF(AdjEltVT));
+
+  SDValue PreTFCRes;
+  if (ReqRetNumElts > 1) {
+    SDValue NewVec = DAG.getBuildVector(AdjVT, DL, BVElts);
+    if (IsD16 && Unpacked)
+      PreTFCRes = adjustLoadValueTypeImpl(NewVec, ReqRetVT, DL, DAG, Unpacked);
+    else
+      PreTFCRes = NewVec;
+  } else {
+    PreTFCRes = BVElts[0];
+  }
+
+  if (V8F16Special)
+    PreTFCRes = DAG.getNode(ISD::BITCAST, DL, MVT::v4f16, PreTFCRes);
+
+  if (!IsTexFail) {
+    if (Result->getNumValues() > 1)
+      return DAG.getMergeValues({PreTFCRes, SDValue(Result, 1)}, DL);
+    else
+      return PreTFCRes;
+  }
+
+  // Extract the TexFail result and insert into aggregate return
+  SmallVector<SDValue, 1> TFCElt;
+  DAG.ExtractVectorElements(N, TFCElt, DMaskPop, 1);
+  SDValue TFCRes = DAG.getNode(ISD::BITCAST, DL, ResultTypes[1], TFCElt[0]);
+  return DAG.getMergeValues({PreTFCRes, TFCRes, SDValue(Result, 1)}, DL);
+}
+
+static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
+                         SDValue *LWE, bool &IsTexFail) {
+  auto TexFailCtrlConst = dyn_cast<ConstantSDNode>(TexFailCtrl.getNode());
+  if (!TexFailCtrlConst)
+    return false;
+
+  uint64_t Value = TexFailCtrlConst->getZExtValue();
+  if (Value) {
+    IsTexFail = true;
+  }
+
+  SDLoc DL(TexFailCtrlConst);
+  *TFE = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
+  Value &= ~(uint64_t)0x1;
+  *LWE = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
+  Value &= ~(uint64_t)0x2;
+
+  return Value == 0;
+}
+
 SDValue SITargetLowering::lowerImage(SDValue Op,
                                      const AMDGPU::ImageDimIntrinsicInfo *Intr,
                                      SelectionDAG &DAG) const {
   SDLoc DL(Op);
   MachineFunction &MF = DAG.getMachineFunction();
+  const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
       AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -4559,12 +4776,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
   unsigned IntrOpcode = Intr->BaseOpcode;
 
-  SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
+  SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
+  SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
   bool IsD16 = false;
+  bool IsA16 = false;
   SDValue VData;
   int NumVDataDwords;
+  bool AdjustRetType = false;
+
   unsigned AddrIdx; // Index of first address argument
   unsigned DMask;
+  unsigned DMaskLanes = 0;
 
   if (BaseOpcode->Atomic) {
     VData = Op.getOperand(2);
@@ -4587,7 +4809,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       AddrIdx = 3;
     }
   } else {
-    unsigned DMaskIdx;
+    unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
+    auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
+    if (!DMaskConst)
+      return Op;
+    DMask = DMaskConst->getZExtValue();
+    DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
 
     if (BaseOpcode->Store) {
       VData = Op.getOperand(2);
@@ -4603,58 +4830,91 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       }
 
       NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
-      DMaskIdx = 3;
     } else {
-      MVT LoadVT = Op.getSimpleValueType();
+      // Work out the num dwords based on the dmask popcount and underlying type
+      // and whether packing is supported.
+      MVT LoadVT = ResultTypes[0].getSimpleVT();
       if (LoadVT.getScalarType() == MVT::f16) {
         if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
             !BaseOpcode->HasD16)
           return Op; // D16 is unsupported for this instruction
 
         IsD16 = true;
-        if (LoadVT.isVector() && Subtarget->hasUnpackedD16VMem())
-          ResultTypes[0] = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
       }
 
-      NumVDataDwords = (ResultTypes[0].getSizeInBits() + 31) / 32;
-      DMaskIdx = isa<MemSDNode>(Op) ? 2 : 1;
-    }
+      // Confirm that the return type is large enough for the dmask specified
+      if ((LoadVT.isVector() && LoadVT.getVectorNumElements() < DMaskLanes) ||
+          (!LoadVT.isVector() && DMaskLanes > 1))
+          return Op;
 
-    auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
-    if (!DMaskConst)
-      return Op;
+      if (IsD16 && !Subtarget->hasUnpackedD16VMem())
+        NumVDataDwords = (DMaskLanes + 1) / 2;
+      else
+        NumVDataDwords = DMaskLanes;
 
-    AddrIdx = DMaskIdx + 1;
-    DMask = DMaskConst->getZExtValue();
-    if (!DMask && !BaseOpcode->Store) {
-      // Eliminate no-op loads. Stores with dmask == 0 are *not* no-op: they
-      // store the channels' default values.
-      SDValue Undef = DAG.getUNDEF(Op.getValueType());
-      if (isa<MemSDNode>(Op))
-        return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
-      return Undef;
+      AdjustRetType = true;
     }
+
+    AddrIdx = DMaskIdx + 1;
   }
 
-  unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
-                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
-                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
-                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+  unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
+  unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
+  unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
+  unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
+                       NumCoords + NumLCM;
+  unsigned NumMIVAddrs = NumVAddrs;
+
   SmallVector<SDValue, 4> VAddrs;
-  for (unsigned i = 0; i < NumVAddrs; ++i)
-    VAddrs.push_back(Op.getOperand(AddrIdx + i));
 
   // Optimize _L to _LZ when _L is zero
   if (LZMappingInfo) {
     if (auto ConstantLod =
-         dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+         dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
       if (ConstantLod->isZero() || ConstantLod->isNegative()) {
         IntrOpcode = LZMappingInfo->LZ;  // set new opcode to _lz variant of _l
-        VAddrs.pop_back();               // remove 'lod'
+        NumMIVAddrs--;               // remove 'lod'
       }
     }
   }
 
+  // Check for 16 bit addresses and pack if true.
+  unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+  MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
+  const MVT VAddrScalarVT = VAddrVT.getScalarType();
+  if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16)) &&
+      ST->hasFeature(AMDGPU::FeatureR128A16)) {
+    IsA16 = true;
+    const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
+    for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
+      SDValue AddrLo, AddrHi;
+      // Push back extra arguments.
+      if (i < DimIdx) {
+        AddrLo = Op.getOperand(i);
+      } else {
+        AddrLo = Op.getOperand(i);
+        // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
+        // in 1D, derivatives dx/dh and dx/dv are packed with undef.
+        if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
+            ((NumGradients / 2) % 2 == 1 &&
+            (i == DimIdx + (NumGradients / 2) - 1 ||
+             i == DimIdx + NumGradients - 1))) {
+          AddrHi = DAG.getUNDEF(MVT::f16);
+        } else {
+          AddrHi = Op.getOperand(i + 1);
+          i++;
+        }
+        AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorVT,
+                             {AddrLo, AddrHi});
+        AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
+      }
+      VAddrs.push_back(AddrLo);
+    }
+  } else {
+    for (unsigned i = 0; i < NumMIVAddrs; ++i)
+      VAddrs.push_back(Op.getOperand(AddrIdx + i));
+  }
+
   SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
 
   SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4674,11 +4934,53 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
     CtrlIdx = AddrIdx + NumVAddrs + 3;
   }
 
+  SDValue TFE;
+  SDValue LWE;
   SDValue TexFail = Op.getOperand(CtrlIdx);
-  auto TexFailConst = dyn_cast<ConstantSDNode>(TexFail.getNode());
-  if (!TexFailConst || TexFailConst->getZExtValue() != 0)
+  bool IsTexFail = false;
+  if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
     return Op;
 
+  if (IsTexFail) {
+    if (!DMaskLanes) {
+      // Expecting to get an error flag since TFC is on - and dmask is 0
+      // Force dmask to be at least 1 otherwise the instruction will fail
+      DMask = 0x1;
+      DMaskLanes = 1;
+      NumVDataDwords = 1;
+    }
+    NumVDataDwords += 1;
+    AdjustRetType = true;
+  }
+
+  // Has something earlier tagged that the return type needs adjusting
+  // This happens if the instruction is a load or has set TexFailCtrl flags
+  if (AdjustRetType) {
+    // NumVDataDwords reflects the true number of dwords required in the return type
+    if (DMaskLanes == 0 && !BaseOpcode->Store) {
+      // This is a no-op load. This can be eliminated
+      SDValue Undef = DAG.getUNDEF(Op.getValueType());
+      if (isa<MemSDNode>(Op))
+        return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
+      return Undef;
+    }
+
+    // Have to use a power of 2 number of dwords
+    NumVDataDwords = 1 << Log2_32_Ceil(NumVDataDwords);
+
+    EVT NewVT = NumVDataDwords > 1 ?
+                  EVT::getVectorVT(*DAG.getContext(), MVT::f32, NumVDataDwords)
+                : MVT::f32;
+
+    ResultTypes[0] = NewVT;
+    if (ResultTypes.size() == 3) {
+      // Original result was aggregate type used for TexFailCtrl results
+      // The actual instruction returns as a vector type which has now been
+      // created. Remove the aggregate result.
+      ResultTypes.erase(&ResultTypes[1]);
+    }
+  }
+
   SDValue GLC;
   SDValue SLC;
   if (BaseOpcode->Atomic) {
@@ -4701,9 +5003,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   Ops.push_back(Unorm);
   Ops.push_back(GLC);
   Ops.push_back(SLC);
-  Ops.push_back(False); // r128
-  Ops.push_back(False); // tfe
-  Ops.push_back(False); // lwe
+  Ops.push_back(IsA16 &&  // a16 or r128
+                ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
+  Ops.push_back(TFE); // tfe
+  Ops.push_back(LWE); // lwe
   Ops.push_back(DimInfo->DA ? True : False);
   if (BaseOpcode->HasD16)
     Ops.push_back(IsD16 ? True : False);
@@ -4723,25 +5026,90 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
 
   MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
   if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
-    MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
-    *MemRefs = MemOp->getMemOperand();
-    NewNode->setMemRefs(MemRefs, MemRefs + 1);
+    MachineMemOperand *MemRef = MemOp->getMemOperand();
+    DAG.setNodeMemRefs(NewNode, {MemRef});
   }
 
   if (BaseOpcode->AtomicX2) {
     SmallVector<SDValue, 1> Elt;
     DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
     return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
-  } else if (IsD16 && !BaseOpcode->Store) {
-    MVT LoadVT = Op.getSimpleValueType();
-    SDValue Adjusted = adjustLoadValueTypeImpl(
-        SDValue(NewNode, 0), LoadVT, DL, DAG, Subtarget->hasUnpackedD16VMem());
-    return DAG.getMergeValues({Adjusted, SDValue(NewNode, 1)}, DL);
+  } else if (!BaseOpcode->Store) {
+    return constructRetValue(DAG, NewNode,
+                             OrigResultTypes, IsTexFail,
+                             Subtarget->hasUnpackedD16VMem(), IsD16,
+                             DMaskLanes, NumVDataDwords, DL,
+                             *DAG.getContext());
   }
 
   return SDValue(NewNode, 0);
 }
 
+SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
+                                       SDValue Offset, SDValue GLC,
+                                       SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo(),
+      MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+          MachineMemOperand::MOInvariant,
+      VT.getStoreSize(), VT.getStoreSize());
+
+  if (!Offset->isDivergent()) {
+    SDValue Ops[] = {
+        Rsrc,
+        Offset, // Offset
+        GLC     // glc
+    };
+    return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
+                                   DAG.getVTList(VT), Ops, VT, MMO);
+  }
+
+  // We have a divergent offset. Emit a MUBUF buffer load instead. We can
+  // assume that the buffer is unswizzled.
+  SmallVector<SDValue, 4> Loads;
+  unsigned NumLoads = 1;
+  MVT LoadVT = VT.getSimpleVT();
+  unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
+  assert((LoadVT.getScalarType() == MVT::i32 ||
+          LoadVT.getScalarType() == MVT::f32) &&
+         isPowerOf2_32(NumElts));
+
+  if (NumElts == 8 || NumElts == 16) {
+    NumLoads = NumElts == 16 ? 4 : 2;
+    LoadVT = MVT::v4i32;
+  }
+
+  SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
+  unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
+  SDValue Ops[] = {
+      DAG.getEntryNode(),                         // Chain
+      Rsrc,                                       // rsrc
+      DAG.getConstant(0, DL, MVT::i32),           // vindex
+      {},                                         // voffset
+      {},                                         // soffset
+      {},                                         // offset
+      DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1),            // idxen
+  };
+
+  // Use the alignment to ensure that the required offsets will fit into the
+  // immediate offsets.
+  setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? 16 * NumLoads : 4);
+
+  uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
+  for (unsigned i = 0; i < NumLoads; ++i) {
+    Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
+    Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
+                                            Ops, LoadVT, MMO));
+  }
+
+  if (VT == MVT::v8i32 || VT == MVT::v16i32)
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Loads);
+
+  return Loads[0];
+}
+
 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                   SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -4755,14 +5123,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
   switch (IntrinsicID) {
   case Intrinsic::amdgcn_implicit_buffer_ptr: {
-    if (getSubtarget()->isAmdCodeObjectV2(MF.getFunction()))
+    if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction()))
       return emitNonHSAIntrinsicError(DAG, DL, VT);
     return getPreloadedValue(DAG, *MFI, VT,
                              AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
   }
   case Intrinsic::amdgcn_dispatch_ptr:
   case Intrinsic::amdgcn_queue_ptr: {
-    if (!Subtarget->isAmdCodeObjectV2(MF.getFunction())) {
+    if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) {
       DiagnosticInfoUnsupported BadIntrin(
           MF.getFunction(), "unsupported hsa intrinsic without hsa target",
           DL.getDebugLoc());
@@ -4880,12 +5248,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::r600_read_tgid_z:
     return getPreloadedValue(DAG, *MFI, VT,
                              AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
-  case Intrinsic::amdgcn_workitem_id_x: {
+  case Intrinsic::amdgcn_workitem_id_x:
   case Intrinsic::r600_read_tidig_x:
     return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
                           SDLoc(DAG.getEntryNode()),
                           MFI->getArgInfo().WorkItemIDX);
-  }
   case Intrinsic::amdgcn_workitem_id_y:
   case Intrinsic::r600_read_tidig_y:
     return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
@@ -4896,19 +5263,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
                           SDLoc(DAG.getEntryNode()),
                           MFI->getArgInfo().WorkItemIDZ);
-  case AMDGPUIntrinsic::SI_load_const: {
-    SDValue Ops[] = {
-      Op.getOperand(1),
-      Op.getOperand(2)
-    };
-
-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-        MachinePointerInfo(),
-        MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
-            MachineMemOperand::MOInvariant,
-        VT.getStoreSize(), 4);
-    return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
-                                   Op->getVTList(), Ops, VT, MMO);
+  case SIIntrinsic::SI_load_const: {
+    SDValue Load =
+        lowerSBuffer(MVT::i32, DL, Op.getOperand(1), Op.getOperand(2),
+                     DAG.getTargetConstant(0, DL, MVT::i1), DAG);
+    return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Load);
+  }
+  case Intrinsic::amdgcn_s_buffer_load: {
+    unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+    return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
+                        DAG.getTargetConstant(Cache & 1, DL, MVT::i1), DAG);
   }
   case Intrinsic::amdgcn_fdiv_fast:
     return lowerFDIV_FAST(Op, DAG);
@@ -4991,34 +5355,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                        Denominator, Numerator);
   }
   case Intrinsic::amdgcn_icmp: {
-    const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
-    if (!CD)
-      return DAG.getUNDEF(VT);
-
-    int CondCode = CD->getSExtValue();
-    if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
-        CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
-      return DAG.getUNDEF(VT);
-
-    ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
-    ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
-    return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
-                       Op.getOperand(2), DAG.getCondCode(CCOpcode));
+    // There is a Pat that handles this variant, so return it as-is.
+    if (Op.getOperand(1).getValueType() == MVT::i1 &&
+        Op.getConstantOperandVal(2) == 0 &&
+        Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
+      return Op;
+    return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
   }
   case Intrinsic::amdgcn_fcmp: {
-    const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
-    if (!CD)
-      return DAG.getUNDEF(VT);
-
-    int CondCode = CD->getSExtValue();
-    if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
-        CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE)
-      return DAG.getUNDEF(VT);
-
-    FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
-    ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
-    return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
-                       Op.getOperand(2), DAG.getCondCode(CCOpcode));
+    return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
   }
   case Intrinsic::amdgcn_fmed3:
     return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
@@ -5058,6 +5403,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     else
       Opcode = AMDGPUISD::CVT_PK_U16_U32;
 
+    if (isTypeLegal(VT))
+      return DAG.getNode(Opcode, DL, VT, Op.getOperand(1), Op.getOperand(2));
+
     SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
                                Op.getOperand(1), Op.getOperand(2));
     return DAG.getNode(ISD::BITCAST, DL, VT, Node);
@@ -5127,36 +5475,104 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   }
   case Intrinsic::amdgcn_buffer_load:
   case Intrinsic::amdgcn_buffer_load_format: {
+    unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+      IdxEn = Idx->getZExtValue() != 0;
     SDValue Ops[] = {
       Op.getOperand(0), // Chain
       Op.getOperand(2), // rsrc
       Op.getOperand(3), // vindex
-      Op.getOperand(4), // offset
-      Op.getOperand(5), // glc
-      Op.getOperand(6)  // slc
+      SDValue(),        // voffset -- will be set by setBufferOffsets
+      SDValue(),        // soffset -- will be set by setBufferOffsets
+      SDValue(),        // offset -- will be set by setBufferOffsets
+      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
     };
 
+    setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
     unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
         AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
+    EVT VT = Op.getValueType();
+    EVT IntVT = VT.changeTypeToInteger();
+    auto *M = cast<MemSDNode>(Op);
+    EVT LoadVT = Op.getValueType();
+
+    if (LoadVT.getScalarType() == MVT::f16)
+      return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+                                 M, DAG, Ops);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+                                   M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_raw_buffer_load:
+  case Intrinsic::amdgcn_raw_buffer_load_format: {
+    auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // rsrc
+      DAG.getConstant(0, DL, MVT::i32), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(4), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(5), // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1), // idxen
+    };
+
+    unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ?
+        AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
     EVT VT = Op.getValueType();
     EVT IntVT = VT.changeTypeToInteger();
     auto *M = cast<MemSDNode>(Op);
     EVT LoadVT = Op.getValueType();
-    bool IsD16 = LoadVT.getScalarType() == MVT::f16;
-    if (IsD16)
-      return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG);
 
+    if (LoadVT.getScalarType() == MVT::f16)
+      return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+                                 M, DAG, Ops);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+                                   M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_struct_buffer_load:
+  case Intrinsic::amdgcn_struct_buffer_load_format: {
+    auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // rsrc
+      Op.getOperand(3), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(5), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(6), // cachepolicy
+      DAG.getConstant(1, DL, MVT::i1), // idxen
+    };
+
+    unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ?
+        AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+
+    EVT VT = Op.getValueType();
+    EVT IntVT = VT.changeTypeToInteger();
+    auto *M = cast<MemSDNode>(Op);
+    EVT LoadVT = Op.getValueType();
+
+    if (LoadVT.getScalarType() == MVT::f16)
+      return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+                                 M, DAG, Ops);
     return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
                                    M->getMemOperand());
   }
   case Intrinsic::amdgcn_tbuffer_load: {
     MemSDNode *M = cast<MemSDNode>(Op);
     EVT LoadVT = Op.getValueType();
-    bool IsD16 = LoadVT.getScalarType() == MVT::f16;
-    if (IsD16) {
-      return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, M, DAG);
-    }
 
+    unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+    unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+    unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+      IdxEn = Idx->getZExtValue() != 0;
     SDValue Ops[] = {
       Op.getOperand(0),  // Chain
       Op.getOperand(2),  // rsrc
@@ -5164,12 +5580,62 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(4),  // voffset
       Op.getOperand(5),  // soffset
       Op.getOperand(6),  // offset
-      Op.getOperand(7),  // dfmt
-      Op.getOperand(8),  // nfmt
-      Op.getOperand(9),  // glc
-      Op.getOperand(10)   // slc
+      DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+    };
+
+    if (LoadVT.getScalarType() == MVT::f16)
+      return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+                                 M, DAG, Ops);
+    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+                                   Op->getVTList(), Ops, LoadVT,
+                                   M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_raw_tbuffer_load: {
+    MemSDNode *M = cast<MemSDNode>(Op);
+    EVT LoadVT = Op.getValueType();
+    auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+
+    SDValue Ops[] = {
+      Op.getOperand(0),  // Chain
+      Op.getOperand(2),  // rsrc
+      DAG.getConstant(0, DL, MVT::i32), // vindex
+      Offsets.first,     // voffset
+      Op.getOperand(4),  // soffset
+      Offsets.second,    // offset
+      Op.getOperand(5),  // format
+      Op.getOperand(6),  // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1), // idxen
+    };
+
+    if (LoadVT.getScalarType() == MVT::f16)
+      return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+                                 M, DAG, Ops);
+    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+                                   Op->getVTList(), Ops, LoadVT,
+                                   M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_struct_tbuffer_load: {
+    MemSDNode *M = cast<MemSDNode>(Op);
+    EVT LoadVT = Op.getValueType();
+    auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+
+    SDValue Ops[] = {
+      Op.getOperand(0),  // Chain
+      Op.getOperand(2),  // rsrc
+      Op.getOperand(3),  // vindex
+      Offsets.first,     // voffset
+      Op.getOperand(5),  // soffset
+      Offsets.second,    // offset
+      Op.getOperand(6),  // format
+      Op.getOperand(7),  // cachepolicy
+      DAG.getConstant(1, DL, MVT::i1), // idxen
     };
 
+    if (LoadVT.getScalarType() == MVT::f16)
+      return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+                                 M, DAG, Ops);
     return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
                                    Op->getVTList(), Ops, LoadVT,
                                    M->getMemOperand());
@@ -5184,14 +5650,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   case Intrinsic::amdgcn_buffer_atomic_and:
   case Intrinsic::amdgcn_buffer_atomic_or:
   case Intrinsic::amdgcn_buffer_atomic_xor: {
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+      IdxEn = Idx->getZExtValue() != 0;
     SDValue Ops[] = {
       Op.getOperand(0), // Chain
       Op.getOperand(2), // vdata
       Op.getOperand(3), // rsrc
       Op.getOperand(4), // vindex
-      Op.getOperand(5), // offset
-      Op.getOperand(6)  // slc
+      SDValue(),        // voffset -- will be set by setBufferOffsets
+      SDValue(),        // soffset -- will be set by setBufferOffsets
+      SDValue(),        // offset -- will be set by setBufferOffsets
+      DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
     };
+    setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
     EVT VT = Op.getValueType();
 
     auto *M = cast<MemSDNode>(Op);
@@ -5235,16 +5709,193 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
                                    M->getMemOperand());
   }
+  case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+  case Intrinsic::amdgcn_raw_buffer_atomic_add:
+  case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+  case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+  case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+  case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+  case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+  case Intrinsic::amdgcn_raw_buffer_atomic_and:
+  case Intrinsic::amdgcn_raw_buffer_atomic_or:
+  case Intrinsic::amdgcn_raw_buffer_atomic_xor: {
+    auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // vdata
+      Op.getOperand(3), // rsrc
+      DAG.getConstant(0, DL, MVT::i32), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(5), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(6), // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1), // idxen
+    };
+    EVT VT = Op.getValueType();
+
+    auto *M = cast<MemSDNode>(Op);
+    unsigned Opcode = 0;
+
+    switch (IntrID) {
+    case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_add:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_and:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_or:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+      break;
+    case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+      break;
+    default:
+      llvm_unreachable("unhandled atomic opcode");
+    }
+
+    return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+                                   M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+  case Intrinsic::amdgcn_struct_buffer_atomic_add:
+  case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+  case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+  case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+  case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+  case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+  case Intrinsic::amdgcn_struct_buffer_atomic_and:
+  case Intrinsic::amdgcn_struct_buffer_atomic_or:
+  case Intrinsic::amdgcn_struct_buffer_atomic_xor: {
+    auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // vdata
+      Op.getOperand(3), // rsrc
+      Op.getOperand(4), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(6), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(7), // cachepolicy
+      DAG.getConstant(1, DL, MVT::i1), // idxen
+    };
+    EVT VT = Op.getValueType();
+
+    auto *M = cast<MemSDNode>(Op);
+    unsigned Opcode = 0;
+
+    switch (IntrID) {
+    case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_add:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_and:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_or:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
+      break;
+    case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+      Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
+      break;
+    default:
+      llvm_unreachable("unhandled atomic opcode");
+    }
 
+    return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+                                   M->getMemOperand());
+  }
   case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(5)))
+      IdxEn = Idx->getZExtValue() != 0;
     SDValue Ops[] = {
       Op.getOperand(0), // Chain
       Op.getOperand(2), // src
       Op.getOperand(3), // cmp
       Op.getOperand(4), // rsrc
       Op.getOperand(5), // vindex
-      Op.getOperand(6), // offset
-      Op.getOperand(7)  // slc
+      SDValue(),        // voffset -- will be set by setBufferOffsets
+      SDValue(),        // soffset -- will be set by setBufferOffsets
+      SDValue(),        // offset -- will be set by setBufferOffsets
+      DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+    };
+    setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+    EVT VT = Op.getValueType();
+    auto *M = cast<MemSDNode>(Op);
+
+    return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+                                   Op->getVTList(), Ops, VT, M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: {
+    auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // src
+      Op.getOperand(3), // cmp
+      Op.getOperand(4), // rsrc
+      DAG.getConstant(0, DL, MVT::i32), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(6), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(7), // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1), // idxen
+    };
+    EVT VT = Op.getValueType();
+    auto *M = cast<MemSDNode>(Op);
+
+    return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
+                                   Op->getVTList(), Ops, VT, M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: {
+    auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // src
+      Op.getOperand(3), // cmp
+      Op.getOperand(4), // rsrc
+      Op.getOperand(5), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(7), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(8), // cachepolicy
+      DAG.getConstant(1, DL, MVT::i1), // idxen
     };
     EVT VT = Op.getValueType();
     auto *M = cast<MemSDNode>(Op);
@@ -5360,19 +6011,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
                        Op.getOperand(2), Op.getOperand(3));
   }
-  case AMDGPUIntrinsic::AMDGPU_kill: {
-    SDValue Src = Op.getOperand(2);
-    if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
-      if (!K->isNegative())
-        return Chain;
-
-      SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
-      return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
-    }
-
-    SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
-    return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
-  }
   case Intrinsic::amdgcn_s_barrier: {
     if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
       const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -5383,69 +6021,79 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     }
     return SDValue();
   };
-  case AMDGPUIntrinsic::SI_tbuffer_store: {
-
-    // Extract vindex and voffset from vaddr as appropriate
-    const ConstantSDNode *OffEn = cast<ConstantSDNode>(Op.getOperand(10));
-    const ConstantSDNode *IdxEn = cast<ConstantSDNode>(Op.getOperand(11));
-    SDValue VAddr = Op.getOperand(5);
-
-    SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
-
-    assert(!(OffEn->isOne() && IdxEn->isOne()) &&
-           "Legacy intrinsic doesn't support both offset and index - use new version");
-
-    SDValue VIndex = IdxEn->isOne() ? VAddr : Zero;
-    SDValue VOffset = OffEn->isOne() ? VAddr : Zero;
-
-    // Deal with the vec-3 case
-    const ConstantSDNode *NumChannels = cast<ConstantSDNode>(Op.getOperand(4));
-    auto Opcode = NumChannels->getZExtValue() == 3 ?
-      AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
-
+  case Intrinsic::amdgcn_tbuffer_store: {
+    SDValue VData = Op.getOperand(2);
+    bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+    if (IsD16)
+      VData = handleD16VData(VData, DAG);
+    unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+    unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+    unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+      IdxEn = Idx->getZExtValue() != 0;
     SDValue Ops[] = {
-     Chain,
-     Op.getOperand(3),  // vdata
-     Op.getOperand(2),  // rsrc
-     VIndex,
-     VOffset,
-     Op.getOperand(6),  // soffset
-     Op.getOperand(7),  // inst_offset
-     Op.getOperand(8),  // dfmt
-     Op.getOperand(9),  // nfmt
-     Op.getOperand(12), // glc
-     Op.getOperand(13), // slc
+      Chain,
+      VData,             // vdata
+      Op.getOperand(3),  // rsrc
+      Op.getOperand(4),  // vindex
+      Op.getOperand(5),  // voffset
+      Op.getOperand(6),  // soffset
+      Op.getOperand(7),  // offset
+      DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
     };
-
-    assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 &&
-           "Value of tfe other than zero is unsupported");
-
-    EVT VT = Op.getOperand(3).getValueType();
-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-      MachinePointerInfo(),
-      MachineMemOperand::MOStore,
-      VT.getStoreSize(), 4);
-    return DAG.getMemIntrinsicNode(Opcode, DL,
-                                   Op->getVTList(), Ops, VT, MMO);
+    unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+                           AMDGPUISD::TBUFFER_STORE_FORMAT;
+    MemSDNode *M = cast<MemSDNode>(Op);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+                                   M->getMemoryVT(), M->getMemOperand());
   }
 
-  case Intrinsic::amdgcn_tbuffer_store: {
+  case Intrinsic::amdgcn_struct_tbuffer_store: {
     SDValue VData = Op.getOperand(2);
     bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
     if (IsD16)
       VData = handleD16VData(VData, DAG);
+    auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
     SDValue Ops[] = {
       Chain,
       VData,             // vdata
       Op.getOperand(3),  // rsrc
       Op.getOperand(4),  // vindex
-      Op.getOperand(5),  // voffset
+      Offsets.first,     // voffset
       Op.getOperand(6),  // soffset
-      Op.getOperand(7),  // offset
-      Op.getOperand(8),  // dfmt
-      Op.getOperand(9),  // nfmt
-      Op.getOperand(10), // glc
-      Op.getOperand(11)  // slc
+      Offsets.second,    // offset
+      Op.getOperand(7),  // format
+      Op.getOperand(8),  // cachepolicy
+      DAG.getConstant(1, DL, MVT::i1), // idexen
+    };
+    unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+                           AMDGPUISD::TBUFFER_STORE_FORMAT;
+    MemSDNode *M = cast<MemSDNode>(Op);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+                                   M->getMemoryVT(), M->getMemOperand());
+  }
+
+  case Intrinsic::amdgcn_raw_tbuffer_store: {
+    SDValue VData = Op.getOperand(2);
+    bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+    if (IsD16)
+      VData = handleD16VData(VData, DAG);
+    auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+    SDValue Ops[] = {
+      Chain,
+      VData,             // vdata
+      Op.getOperand(3),  // rsrc
+      DAG.getConstant(0, DL, MVT::i32), // vindex
+      Offsets.first,     // voffset
+      Op.getOperand(5),  // soffset
+      Offsets.second,    // offset
+      Op.getOperand(6),  // format
+      Op.getOperand(7),  // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1), // idexen
     };
     unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
                            AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -5460,15 +6108,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
     if (IsD16)
       VData = handleD16VData(VData, DAG);
+    unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+      IdxEn = Idx->getZExtValue() != 0;
     SDValue Ops[] = {
       Chain,
-      VData,            // vdata
+      VData,
       Op.getOperand(3), // rsrc
       Op.getOperand(4), // vindex
-      Op.getOperand(5), // offset
-      Op.getOperand(6), // glc
-      Op.getOperand(7)  // slc
+      SDValue(), // voffset -- will be set by setBufferOffsets
+      SDValue(), // soffset -- will be set by setBufferOffsets
+      SDValue(), // offset -- will be set by setBufferOffsets
+      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
     };
+    setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
     unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
                    AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
     Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
@@ -5476,6 +6132,59 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
                                    M->getMemoryVT(), M->getMemOperand());
   }
+
+  case Intrinsic::amdgcn_raw_buffer_store:
+  case Intrinsic::amdgcn_raw_buffer_store_format: {
+    SDValue VData = Op.getOperand(2);
+    bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+    if (IsD16)
+      VData = handleD16VData(VData, DAG);
+    auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+    SDValue Ops[] = {
+      Chain,
+      VData,
+      Op.getOperand(3), // rsrc
+      DAG.getConstant(0, DL, MVT::i32), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(5), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(6), // cachepolicy
+      DAG.getConstant(0, DL, MVT::i1), // idxen
+    };
+    unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ?
+                   AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+    Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+    MemSDNode *M = cast<MemSDNode>(Op);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+                                   M->getMemoryVT(), M->getMemOperand());
+  }
+
+  case Intrinsic::amdgcn_struct_buffer_store:
+  case Intrinsic::amdgcn_struct_buffer_store_format: {
+    SDValue VData = Op.getOperand(2);
+    bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+    if (IsD16)
+      VData = handleD16VData(VData, DAG);
+    auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+    SDValue Ops[] = {
+      Chain,
+      VData,
+      Op.getOperand(3), // rsrc
+      Op.getOperand(4), // vindex
+      Offsets.first,    // voffset
+      Op.getOperand(6), // soffset
+      Offsets.second,   // offset
+      Op.getOperand(7), // cachepolicy
+      DAG.getConstant(1, DL, MVT::i1), // idxen
+    };
+    unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
+                   AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+    Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+    MemSDNode *M = cast<MemSDNode>(Op);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+                                   M->getMemoryVT(), M->getMemOperand());
+  }
+
   default: {
     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
             AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -5486,6 +6195,94 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   }
 }
 
+// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
+// offset (the offset that is included in bounds checking and swizzling, to be
+// split between the instruction's voffset and immoffset fields) and soffset
+// (the offset that is excluded from bounds checking and swizzling, to go in
+// the instruction's soffset field).  This function takes the first kind of
+// offset and figures out how to split it between voffset and immoffset.
+std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
+    SDValue Offset, SelectionDAG &DAG) const {
+  SDLoc DL(Offset);
+  const unsigned MaxImm = 4095;
+  SDValue N0 = Offset;
+  ConstantSDNode *C1 = nullptr;
+
+  if ((C1 = dyn_cast<ConstantSDNode>(N0)))
+    N0 = SDValue();
+  else if (DAG.isBaseWithConstantOffset(N0)) {
+    C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    N0 = N0.getOperand(0);
+  }
+
+  if (C1) {
+    unsigned ImmOffset = C1->getZExtValue();
+    // If the immediate value is too big for the immoffset field, put the value
+    // and -4096 into the immoffset field so that the value that is copied/added
+    // for the voffset field is a multiple of 4096, and it stands more chance
+    // of being CSEd with the copy/add for another similar load/store.
+    // However, do not do that rounding down to a multiple of 4096 if that is a
+    // negative number, as it appears to be illegal to have a negative offset
+    // in the vgpr, even if adding the immediate offset makes it positive.
+    unsigned Overflow = ImmOffset & ~MaxImm;
+    ImmOffset -= Overflow;
+    if ((int32_t)Overflow < 0) {
+      Overflow += ImmOffset;
+      ImmOffset = 0;
+    }
+    C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+    if (Overflow) {
+      auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
+      if (!N0)
+        N0 = OverflowVal;
+      else {
+        SDValue Ops[] = { N0, OverflowVal };
+        N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops);
+      }
+    }
+  }
+  if (!N0)
+    N0 = DAG.getConstant(0, DL, MVT::i32);
+  if (!C1)
+    C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+  return {N0, SDValue(C1, 0)};
+}
+
+// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
+// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
+// pointed to by Offsets.
+void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
+                                        SelectionDAG &DAG, SDValue *Offsets,
+                                        unsigned Align) const {
+  SDLoc DL(CombinedOffset);
+  if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
+    uint32_t Imm = C->getZExtValue();
+    uint32_t SOffset, ImmOffset;
+    if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
+      Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
+      Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
+      Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+      return;
+    }
+  }
+  if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
+    SDValue N0 = CombinedOffset.getOperand(0);
+    SDValue N1 = CombinedOffset.getOperand(1);
+    uint32_t SOffset, ImmOffset;
+    int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
+    if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
+                                                Subtarget, Align)) {
+      Offsets[0] = N0;
+      Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
+      Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+      return;
+    }
+  }
+  Offsets[0] = CombinedOffset;
+  Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
+  Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
+}
+
 static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
                                  ISD::LoadExtType ExtType, SDValue Op,
                                  const SDLoc &SL, EVT VT) {
@@ -5513,8 +6310,8 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const
 
   // FIXME: Constant loads should all be marked invariant.
   unsigned AS = Ld->getAddressSpace();
-  if (AS != AMDGPUASI.CONSTANT_ADDRESS &&
-      AS != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
+  if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
+      AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
       (AS != AMDGPUAS::GLOBAL_ADDRESS || !Ld->isInvariant()))
     return SDValue();
 
@@ -5625,15 +6422,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   // If there is a possibilty that flat instruction access scratch memory
   // then we need to use the same legalization rules we use for private.
-  if (AS == AMDGPUASI.FLAT_ADDRESS)
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
     AS = MFI->hasFlatScratchInit() ?
-         AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
+         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
 
   unsigned NumElements = MemVT.getVectorNumElements();
 
-  if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
-      AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
-    if (!Op->isDivergent() && Alignment >= 4)
+  if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+    if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32)
       return SDValue();
     // Non-uniform loads will be selected to MUBUF instructions, so they
     // have the same legalization requirements as global and private
@@ -5641,28 +6438,28 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     //
   }
 
-  if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
-      AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
-      AS == AMDGPUASI.GLOBAL_ADDRESS) {
+  if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+      AS == AMDGPUAS::GLOBAL_ADDRESS) {
     if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
         !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
-        Alignment >= 4)
+        Alignment >= 4 && NumElements < 32)
       return SDValue();
     // Non-uniform loads will be selected to MUBUF instructions, so they
     // have the same legalization requirements as global and private
     // loads.
     //
   }
-  if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
-      AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
-      AS == AMDGPUASI.GLOBAL_ADDRESS ||
-      AS == AMDGPUASI.FLAT_ADDRESS) {
+  if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+      AS == AMDGPUAS::GLOBAL_ADDRESS ||
+      AS == AMDGPUAS::FLAT_ADDRESS) {
     if (NumElements > 4)
       return SplitVectorLoad(Op, DAG);
     // v4 loads are supported for private and global memory.
     return SDValue();
   }
-  if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+  if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
     // Depending on the setting of the private_element_size field in the
     // resource descriptor, we can only make private accesses up to a certain
     // size.
@@ -5681,7 +6478,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     default:
       llvm_unreachable("unsupported private_element_size");
     }
-  } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
     // Use ds_read_b128 if possible.
     if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
         MemVT.getStoreSize() == 16)
@@ -5689,6 +6486,17 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
     if (NumElements > 2)
       return SplitVectorLoad(Op, DAG);
+
+    // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+    // address is negative, then the instruction is incorrectly treated as
+    // out-of-bounds even if base + offsets is in bounds. Split vectorized
+    // loads here to avoid emitting ds_read2_b32. We may re-combine the
+    // load later in the SILoadStoreOptimizer.
+    if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+        NumElements == 2 && MemVT.getStoreSize() == 8 &&
+        Load->getAlignment() < 8) {
+      return SplitVectorLoad(Op, DAG);
+    }
   }
   return SDValue();
 }
@@ -6058,17 +6866,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   // If there is a possibilty that flat instruction access scratch memory
   // then we need to use the same legalization rules we use for private.
-  if (AS == AMDGPUASI.FLAT_ADDRESS)
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
     AS = MFI->hasFlatScratchInit() ?
-         AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
+         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
 
   unsigned NumElements = VT.getVectorNumElements();
-  if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
-      AS == AMDGPUASI.FLAT_ADDRESS) {
+  if (AS == AMDGPUAS::GLOBAL_ADDRESS ||
+      AS == AMDGPUAS::FLAT_ADDRESS) {
     if (NumElements > 4)
       return SplitVectorStore(Op, DAG);
     return SDValue();
-  } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
+  } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
     switch (Subtarget->getMaxPrivateElementSize()) {
     case 4:
       return scalarizeVectorStore(Store, DAG);
@@ -6083,7 +6891,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
     default:
       llvm_unreachable("unsupported private_element_size");
     }
-  } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
     // Use ds_write_b128 if possible.
     if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
         VT.getStoreSize() == 16)
@@ -6091,6 +6899,18 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 
     if (NumElements > 2)
       return SplitVectorStore(Op, DAG);
+
+    // SI has a hardware bug in the LDS / GDS boounds checking: if the base
+    // address is negative, then the instruction is incorrectly treated as
+    // out-of-bounds even if base + offsets is in bounds. Split vectorized
+    // stores here to avoid emitting ds_write2_b32. We may re-combine the
+    // store later in the SILoadStoreOptimizer.
+    if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+        NumElements == 2 && VT.getStoreSize() == 8 &&
+        Store->getAlignment() < 8) {
+      return SplitVectorStore(Op, DAG);
+    }
+
     return SDValue();
   } else {
     llvm_unreachable("unhandled address space");
@@ -6101,17 +6921,24 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
   SDValue Arg = Op.getOperand(0);
+  SDValue TrigVal;
+
   // TODO: Should this propagate fast-math-flags?
-  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
-                                  DAG.getNode(ISD::FMUL, DL, VT, Arg,
-                                              DAG.getConstantFP(0.5/M_PI, DL,
-                                                                VT)));
+
+  SDValue OneOver2Pi = DAG.getConstantFP(0.5 / M_PI, DL, VT);
+
+  if (Subtarget->hasTrigReducedRange()) {
+    SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+    TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal);
+  } else {
+    TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+  }
 
   switch (Op.getOpcode()) {
   case ISD::FCOS:
-    return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
+    return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal);
   case ISD::FSIN:
-    return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
+    return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal);
   default:
     llvm_unreachable("Wrong trig opcode");
   }
@@ -6123,7 +6950,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
   unsigned AS = AtomicNode->getAddressSpace();
 
   // No custom lowering required for local address space
-  if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
+  if (!isFlatGlobalAddrSpace(AS))
     return Op;
 
   // Non-local address space requires custom lowering for atomic compare
@@ -6475,6 +7302,29 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
     }
   }
 
+  if (RHS.getOpcode() == ISD::SETCC && LHS.getOpcode() == AMDGPUISD::FP_CLASS)
+    std::swap(LHS, RHS);
+
+  if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+      RHS.hasOneUse()) {
+    ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+    // and (fcmp seto), (fp_class x, mask) -> fp_class x, mask & ~(p_nan | n_nan)
+    // and (fcmp setuo), (fp_class x, mask) -> fp_class x, mask & (p_nan | n_nan)
+    const ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+    if ((LCC == ISD::SETO || LCC == ISD::SETUO) && Mask &&
+        (RHS.getOperand(0) == LHS.getOperand(0) &&
+         LHS.getOperand(0) == LHS.getOperand(1))) {
+      const unsigned OrdMask = SIInstrFlags::S_NAN | SIInstrFlags::Q_NAN;
+      unsigned NewMask = LCC == ISD::SETO ?
+        Mask->getZExtValue() & ~OrdMask :
+        Mask->getZExtValue() & OrdMask;
+
+      SDLoc DL(N);
+      return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, RHS.getOperand(0),
+                         DAG.getConstant(NewMask, DL, MVT::i32));
+    }
+  }
+
   if (VT == MVT::i32 &&
       (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
     // and x, (sext cc from i1) => select cc, x, 0
@@ -6798,158 +7648,294 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
   return AMDGPUTargetLowering::performRcpCombine(N, DCI);
 }
 
-static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
-  if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
+bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
+                                       unsigned MaxDepth) const {
+  unsigned Opcode = Op.getOpcode();
+  if (Opcode == ISD::FCANONICALIZE)
     return true;
 
-  return DAG.isKnownNeverNaN(Op);
-}
+  if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+    auto F = CFP->getValueAPF();
+    if (F.isNaN() && F.isSignaling())
+      return false;
+    return !F.isDenormal() || denormalsEnabledForType(Op.getValueType());
+  }
 
-static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
-                            const GCNSubtarget *ST, unsigned MaxDepth=5) {
   // If source is a result of another standard FP operation it is already in
   // canonical form.
+  if (MaxDepth == 0)
+    return false;
 
-  switch (Op.getOpcode()) {
-  default:
-    break;
-
+  switch (Opcode) {
   // These will flush denorms if required.
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
-  case ISD::FSQRT:
   case ISD::FCEIL:
   case ISD::FFLOOR:
   case ISD::FMA:
   case ISD::FMAD:
-
-  case ISD::FCANONICALIZE:
-    return true;
-
+  case ISD::FSQRT:
+  case ISD::FDIV:
+  case ISD::FREM:
   case ISD::FP_ROUND:
-    return Op.getValueType().getScalarType() != MVT::f16 ||
-           ST->hasFP16Denormals();
-
   case ISD::FP_EXTEND:
-    return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
-           ST->hasFP16Denormals();
+  case AMDGPUISD::FMUL_LEGACY:
+  case AMDGPUISD::FMAD_FTZ:
+  case AMDGPUISD::RCP:
+  case AMDGPUISD::RSQ:
+  case AMDGPUISD::RSQ_CLAMP:
+  case AMDGPUISD::RCP_LEGACY:
+  case AMDGPUISD::RSQ_LEGACY:
+  case AMDGPUISD::RCP_IFLAG:
+  case AMDGPUISD::TRIG_PREOP:
+  case AMDGPUISD::DIV_SCALE:
+  case AMDGPUISD::DIV_FMAS:
+  case AMDGPUISD::DIV_FIXUP:
+  case AMDGPUISD::FRACT:
+  case AMDGPUISD::LDEXP:
+  case AMDGPUISD::CVT_PKRTZ_F16_F32:
+  case AMDGPUISD::CVT_F32_UBYTE0:
+  case AMDGPUISD::CVT_F32_UBYTE1:
+  case AMDGPUISD::CVT_F32_UBYTE2:
+  case AMDGPUISD::CVT_F32_UBYTE3:
+    return true;
 
   // It can/will be lowered or combined as a bit operation.
   // Need to check their input recursively to handle.
   case ISD::FNEG:
   case ISD::FABS:
-    return (MaxDepth > 0) &&
-           isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1);
+  case ISD::FCOPYSIGN:
+    return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
 
   case ISD::FSIN:
   case ISD::FCOS:
   case ISD::FSINCOS:
     return Op.getValueType().getScalarType() != MVT::f16;
 
-  // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms.
-  // For such targets need to check their input recursively.
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FMINNAN:
-  case ISD::FMAXNAN:
-
-    if (ST->supportsMinMaxDenormModes() &&
-        DAG.isKnownNeverNaN(Op.getOperand(0)) &&
-        DAG.isKnownNeverNaN(Op.getOperand(1)))
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
+  case AMDGPUISD::CLAMP:
+  case AMDGPUISD::FMED3:
+  case AMDGPUISD::FMAX3:
+  case AMDGPUISD::FMIN3: {
+    // FIXME: Shouldn't treat the generic operations different based these.
+    // However, we aren't really required to flush the result from
+    // minnum/maxnum..
+
+    // snans will be quieted, so we only need to worry about denormals.
+    if (Subtarget->supportsMinMaxDenormModes() ||
+        denormalsEnabledForType(Op.getValueType()))
       return true;
 
-    return (MaxDepth > 0) &&
-           isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1) &&
-           isCanonicalized(DAG, Op.getOperand(1), ST, MaxDepth - 1);
+    // Flushing may be required.
+    // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
+    // targets need to check their input recursively.
+
+    // FIXME: Does this apply with clamp? It's implemented with max.
+    for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
+      if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
+        return false;
+    }
+
+    return true;
+  }
+  case ISD::SELECT: {
+    return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
+           isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
+  }
+  case ISD::BUILD_VECTOR: {
+    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+      SDValue SrcOp = Op.getOperand(i);
+      if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
+        return false;
+    }
 
-  case ISD::ConstantFP: {
-    auto F = cast<ConstantFPSDNode>(Op)->getValueAPF();
-    return !F.isDenormal() && !(F.isNaN() && F.isSignaling());
+    return true;
   }
+  case ISD::EXTRACT_VECTOR_ELT:
+  case ISD::EXTRACT_SUBVECTOR: {
+    return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
   }
-  return false;
+  case ISD::INSERT_VECTOR_ELT: {
+    return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
+           isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
+  }
+  case ISD::UNDEF:
+    // Could be anything.
+    return false;
+
+  case ISD::BITCAST: {
+    // Hack round the mess we make when legalizing extract_vector_elt
+    SDValue Src = Op.getOperand(0);
+    if (Src.getValueType() == MVT::i16 &&
+        Src.getOpcode() == ISD::TRUNCATE) {
+      SDValue TruncSrc = Src.getOperand(0);
+      if (TruncSrc.getValueType() == MVT::i32 &&
+          TruncSrc.getOpcode() == ISD::BITCAST &&
+          TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
+        return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
+      }
+    }
+
+    return false;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntrinsicID
+      = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    // TODO: Handle more intrinsics
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_cvt_pkrtz:
+    case Intrinsic::amdgcn_cubeid:
+    case Intrinsic::amdgcn_frexp_mant:
+    case Intrinsic::amdgcn_fdot2:
+      return true;
+    default:
+      break;
+    }
+
+    LLVM_FALLTHROUGH;
+  }
+  default:
+    return denormalsEnabledForType(Op.getValueType()) &&
+           DAG.isKnownNeverSNaN(Op);
+  }
+
+  llvm_unreachable("invalid operation");
 }
 
 // Constant fold canonicalize.
+SDValue SITargetLowering::getCanonicalConstantFP(
+  SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
+  // Flush denormals to 0 if not enabled.
+  if (C.isDenormal() && !denormalsEnabledForType(VT))
+    return DAG.getConstantFP(0.0, SL, VT);
+
+  if (C.isNaN()) {
+    APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+    if (C.isSignaling()) {
+      // Quiet a signaling NaN.
+      // FIXME: Is this supposed to preserve payload bits?
+      return DAG.getConstantFP(CanonicalQNaN, SL, VT);
+    }
+
+    // Make sure it is the canonical NaN bitpattern.
+    //
+    // TODO: Can we use -1 as the canonical NaN value since it's an inline
+    // immediate?
+    if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
+      return DAG.getConstantFP(CanonicalQNaN, SL, VT);
+  }
+
+  // Already canonical.
+  return DAG.getConstantFP(C, SL, VT);
+}
+
+static bool vectorEltWillFoldAway(SDValue Op) {
+  return Op.isUndef() || isa<ConstantFPSDNode>(Op);
+}
+
 SDValue SITargetLowering::performFCanonicalizeCombine(
   SDNode *N,
   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
 
   // fcanonicalize undef -> qnan
   if (N0.isUndef()) {
-    EVT VT = N->getValueType(0);
     APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
     return DAG.getConstantFP(QNaN, SDLoc(N), VT);
   }
 
-  ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0);
-  if (!CFP) {
-    SDValue N0 = N->getOperand(0);
-    EVT VT = N0.getValueType().getScalarType();
-    auto ST = getSubtarget();
-
-    if (((VT == MVT::f32 && ST->hasFP32Denormals()) ||
-         (VT == MVT::f64 && ST->hasFP64Denormals()) ||
-         (VT == MVT::f16 && ST->hasFP16Denormals())) &&
-        DAG.isKnownNeverNaN(N0))
-      return N0;
+  if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0)) {
+    EVT VT = N->getValueType(0);
+    return getCanonicalConstantFP(DAG, SDLoc(N), VT, CFP->getValueAPF());
+  }
 
-    bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+  // fcanonicalize (build_vector x, k) -> build_vector (fcanonicalize x),
+  //                                                   (fcanonicalize k)
+  //
+  // fcanonicalize (build_vector x, undef) -> build_vector (fcanonicalize x), 0
 
-    if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
-        isCanonicalized(DAG, N0, ST))
-      return N0;
+  // TODO: This could be better with wider vectors that will be split to v2f16,
+  // and to consider uses since there aren't that many packed operations.
+  if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
+      isTypeLegal(MVT::v2f16)) {
+    SDLoc SL(N);
+    SDValue NewElts[2];
+    SDValue Lo = N0.getOperand(0);
+    SDValue Hi = N0.getOperand(1);
+    EVT EltVT = Lo.getValueType();
+
+    if (vectorEltWillFoldAway(Lo) || vectorEltWillFoldAway(Hi)) {
+      for (unsigned I = 0; I != 2; ++I) {
+        SDValue Op = N0.getOperand(I);
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+          NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
+                                              CFP->getValueAPF());
+        } else if (Op.isUndef()) {
+          // Handled below based on what the other operand is.
+          NewElts[I] = Op;
+        } else {
+          NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
+        }
+      }
 
-    return SDValue();
-  }
+      // If one half is undef, and one is constant, perfer a splat vector rather
+      // than the normal qNaN. If it's a register, prefer 0.0 since that's
+      // cheaper to use and may be free with a packed operation.
+      if (NewElts[0].isUndef()) {
+        if (isa<ConstantFPSDNode>(NewElts[1]))
+          NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
+            NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
+      }
 
-  const APFloat &C = CFP->getValueAPF();
+      if (NewElts[1].isUndef()) {
+        NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
+          NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
+      }
 
-  // Flush denormals to 0 if not enabled.
-  if (C.isDenormal()) {
-    EVT VT = N->getValueType(0);
-    EVT SVT = VT.getScalarType();
-    if (SVT == MVT::f32 && !Subtarget->hasFP32Denormals())
-      return DAG.getConstantFP(0.0, SDLoc(N), VT);
+      return DAG.getBuildVector(VT, SL, NewElts);
+    }
+  }
 
-    if (SVT == MVT::f64 && !Subtarget->hasFP64Denormals())
-      return DAG.getConstantFP(0.0, SDLoc(N), VT);
+  unsigned SrcOpc = N0.getOpcode();
 
-    if (SVT == MVT::f16 && !Subtarget->hasFP16Denormals())
-      return DAG.getConstantFP(0.0, SDLoc(N), VT);
-  }
+  // If it's free to do so, push canonicalizes further up the source, which may
+  // find a canonical source.
+  //
+  // TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for
+  // sNaNs.
+  if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) {
+    auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+    if (CRHS && N0.hasOneUse()) {
+      SDLoc SL(N);
+      SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
+                                   N0.getOperand(0));
+      SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
+      DCI.AddToWorklist(Canon0.getNode());
 
-  if (C.isNaN()) {
-    EVT VT = N->getValueType(0);
-    APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
-    if (C.isSignaling()) {
-      // Quiet a signaling NaN.
-      return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
+      return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
     }
-
-    // Make sure it is the canonical NaN bitpattern.
-    //
-    // TODO: Can we use -1 as the canonical NaN value since it's an inline
-    // immediate?
-    if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
-      return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
   }
 
-  return N0;
+  return isCanonicalized(DAG, N0) ? N0 : SDValue();
 }
 
 static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
   switch (Opc) {
   case ISD::FMAXNUM:
+  case ISD::FMAXNUM_IEEE:
     return AMDGPUISD::FMAX3;
   case ISD::SMAX:
     return AMDGPUISD::SMAX3;
   case ISD::UMAX:
     return AMDGPUISD::UMAX3;
   case ISD::FMINNUM:
+  case ISD::FMINNUM_IEEE:
     return AMDGPUISD::FMIN3;
   case ISD::SMIN:
     return AMDGPUISD::SMIN3;
@@ -7044,11 +8030,18 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
     // then give the other result, which is different from med3 with a NaN
     // input.
     SDValue Var = Op0.getOperand(0);
-    if (!isKnownNeverSNan(DAG, Var))
+    if (!DAG.isKnownNeverSNaN(Var))
       return SDValue();
 
-    return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
-                       Var, SDValue(K0, 0), SDValue(K1, 0));
+    const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+    if ((!K0->hasOneUse() ||
+         TII->isInlineConstant(K0->getValueAPF().bitcastToAPInt())) &&
+        (!K1->hasOneUse() ||
+         TII->isInlineConstant(K1->getValueAPF().bitcastToAPInt()))) {
+      return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
+                         Var, SDValue(K0, 0), SDValue(K1, 0));
+    }
   }
 
   return SDValue();
@@ -7109,6 +8102,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
 
   // fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
   if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
+       (Opc == ISD::FMINNUM_IEEE && Op0.getOpcode() == ISD::FMAXNUM_IEEE) ||
        (Opc == AMDGPUISD::FMIN_LEGACY &&
         Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
       (VT == MVT::f32 || VT == MVT::f64 ||
@@ -7216,9 +8210,11 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
 
     switch(Opc) {
     default:
-      return SDValue();
+      break;
       // TODO: Support other binary operations.
     case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
     case ISD::ADD:
     case ISD::UMIN:
     case ISD::UMAX:
@@ -7226,25 +8222,54 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
     case ISD::SMAX:
     case ISD::FMAXNUM:
     case ISD::FMINNUM:
-      return DAG.getNode(Opc, SL, EltVT,
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
-                                     Vec.getOperand(0), Idx),
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
-                                     Vec.getOperand(1), Idx));
+    case ISD::FMAXNUM_IEEE:
+    case ISD::FMINNUM_IEEE: {
+      SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+                                 Vec.getOperand(0), Idx);
+      SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+                                 Vec.getOperand(1), Idx);
+
+      DCI.AddToWorklist(Elt0.getNode());
+      DCI.AddToWorklist(Elt1.getNode());
+      return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags());
+    }
     }
   }
 
-  if (!DCI.isBeforeLegalize())
-    return SDValue();
-
   unsigned VecSize = VecVT.getSizeInBits();
   unsigned EltSize = EltVT.getSizeInBits();
 
+  // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
+  // This elminates non-constant index and subsequent movrel or scratch access.
+  // Sub-dword vectors of size 2 dword or less have better implementation.
+  // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
+  // instructions.
+  if (VecSize <= 256 && (VecSize > 64 || EltSize >= 32) &&
+      !isa<ConstantSDNode>(N->getOperand(1))) {
+    SDLoc SL(N);
+    SDValue Idx = N->getOperand(1);
+    EVT IdxVT = Idx.getValueType();
+    SDValue V;
+    for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+      SDValue IC = DAG.getConstant(I, SL, IdxVT);
+      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
+      if (I == 0)
+        V = Elt;
+      else
+        V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
+    }
+    return V;
+  }
+
+  if (!DCI.isBeforeLegalize())
+    return SDValue();
+
   // Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
   // elements. This exposes more load reduction opportunities by replacing
   // multiple small extract_vector_elements with a single 32-bit extract.
   auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (EltSize <= 16 &&
+  if (isa<MemSDNode>(Vec) &&
+      EltSize <= 16 &&
       EltVT.isByteSized() &&
       VecSize > 32 &&
       VecSize % 32 == 0 &&
@@ -7274,46 +8299,40 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
   return SDValue();
 }
 
-static bool convertBuildVectorCastElt(SelectionDAG &DAG,
-                                      SDValue &Lo, SDValue &Hi) {
-  if (Hi.getOpcode() == ISD::BITCAST &&
-      Hi.getOperand(0).getValueType() == MVT::f16 &&
-      (isa<ConstantSDNode>(Lo) || Lo.isUndef())) {
-    Lo = DAG.getNode(ISD::BITCAST, SDLoc(Lo), MVT::f16, Lo);
-    Hi = Hi.getOperand(0);
-    return true;
-  }
-
-  return false;
-}
-
-SDValue SITargetLowering::performBuildVectorCombine(
-  SDNode *N, DAGCombinerInfo &DCI) const {
-  SDLoc SL(N);
+SDValue
+SITargetLowering::performInsertVectorEltCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(2);
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  unsigned VecSize = VecVT.getSizeInBits();
+  unsigned EltSize = EltVT.getSizeInBits();
 
-  if (!isTypeLegal(MVT::v2i16))
+  // INSERT_VECTOR_ELT (<n x e>, var-idx)
+  // => BUILD_VECTOR n x select (e, const-idx)
+  // This elminates non-constant index and subsequent movrel or scratch access.
+  // Sub-dword vectors of size 2 dword or less have better implementation.
+  // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
+  // instructions.
+  if (isa<ConstantSDNode>(Idx) ||
+      VecSize > 256 || (VecSize <= 64 && EltSize < 32))
     return SDValue();
-  SelectionDAG &DAG = DCI.DAG;
-  EVT VT = N->getValueType(0);
-
-  if (VT == MVT::v2i16) {
-    SDValue Lo = N->getOperand(0);
-    SDValue Hi = N->getOperand(1);
 
-    // v2i16 build_vector (const|undef), (bitcast f16:$x)
-    // -> bitcast (v2f16 build_vector const|undef, $x
-    if (convertBuildVectorCastElt(DAG, Lo, Hi)) {
-      SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Lo, Hi  });
-      return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
-    }
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+  SDValue Ins = N->getOperand(1);
+  EVT IdxVT = Idx.getValueType();
 
-    if (convertBuildVectorCastElt(DAG, Hi, Lo)) {
-      SDValue NewVec = DAG.getBuildVector(MVT::v2f16, SL, { Hi, Lo  });
-      return DAG.getNode(ISD::BITCAST, SL, VT, NewVec);
-    }
+  SmallVector<SDValue, 16> Ops;
+  for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
+    SDValue IC = DAG.getConstant(I, SL, IdxVT);
+    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
+    SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ);
+    Ops.push_back(V);
   }
 
-  return SDValue();
+  return DAG.getBuildVector(VecVT, SL, Ops);
 }
 
 unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
@@ -7568,7 +8587,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
   EVT VT = N->getValueType(0);
   SDLoc SL(N);
 
-  if (!Subtarget->hasDLInsts() || VT != MVT::f32)
+  if (!Subtarget->hasDotInsts() || VT != MVT::f32)
     return SDValue();
 
   // FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
@@ -7705,16 +8724,26 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
                                            VT != MVT::f16))
     return SDValue();
 
-  // Match isinf pattern
+  // Match isinf/isfinite pattern
   // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
-  if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
+  // (fcmp one (fabs x), inf) -> (fp_class x,
+  // (p_normal | n_normal | p_subnormal | n_subnormal | p_zero | n_zero)
+  if ((CC == ISD::SETOEQ || CC == ISD::SETONE) && LHS.getOpcode() == ISD::FABS) {
     const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
     if (!CRHS)
       return SDValue();
 
     const APFloat &APF = CRHS->getValueAPF();
     if (APF.isInfinity() && !APF.isNegative()) {
-      unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
+      const unsigned IsInfMask = SIInstrFlags::P_INFINITY |
+                                 SIInstrFlags::N_INFINITY;
+      const unsigned IsFiniteMask = SIInstrFlags::N_ZERO |
+                                    SIInstrFlags::P_ZERO |
+                                    SIInstrFlags::N_NORMAL |
+                                    SIInstrFlags::P_NORMAL |
+                                    SIInstrFlags::N_SUBNORMAL |
+                                    SIInstrFlags::P_SUBNORMAL;
+      unsigned Mask = CC == ISD::SETOEQ ? IsInfMask : IsFiniteMask;
       return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
                          DAG.getConstant(Mask, SL, MVT::i32));
     }
@@ -7759,8 +8788,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
   TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                         !DCI.isBeforeLegalizeOps());
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) ||
-      TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
+  if (TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
     DCI.CommitTargetLoweringOpt(TLO);
   }
 
@@ -7792,6 +8820,9 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
 
 SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
+  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+    return SDValue();
+
   switch (N->getOpcode()) {
   default:
     return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -7810,17 +8841,15 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     return performSetCCCombine(N, DCI);
   case ISD::FMAXNUM:
   case ISD::FMINNUM:
+  case ISD::FMAXNUM_IEEE:
+  case ISD::FMINNUM_IEEE:
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
   case ISD::UMIN:
   case AMDGPUISD::FMIN_LEGACY:
-  case AMDGPUISD::FMAX_LEGACY: {
-    if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
-        getTargetMachine().getOptLevel() > CodeGenOpt::None)
-      return performMinMaxCombine(N, DCI);
-    break;
-  }
+  case AMDGPUISD::FMAX_LEGACY:
+    return performMinMaxCombine(N, DCI);
   case ISD::FMA:
     return performFMACombine(N, DCI);
   case ISD::LOAD: {
@@ -7912,8 +8941,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::EXTRACT_VECTOR_ELT:
     return performExtractVectorEltCombine(N, DCI);
-  case ISD::BUILD_VECTOR:
-    return performBuildVectorCombine(N, DCI);
+  case ISD::INSERT_VECTOR_ELT:
+    return performInsertVectorEltCombine(N, DCI);
   }
   return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
 }
@@ -7926,6 +8955,7 @@ static unsigned SubIdx2Lane(unsigned Idx) {
   case AMDGPU::sub1: return 1;
   case AMDGPU::sub2: return 2;
   case AMDGPU::sub3: return 3;
+  case AMDGPU::sub4: return 4; // Possible with TFE/LWE
   }
 }
 
@@ -7939,11 +8969,16 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
   if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
     return Node; // not implemented for D16
 
-  SDNode *Users[4] = { nullptr };
+  SDNode *Users[5] = { nullptr };
   unsigned Lane = 0;
   unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
   unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
   unsigned NewDmask = 0;
+  unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
+  unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
+  bool UsesTFC = (Node->getConstantOperandVal(TFEIdx) ||
+                  Node->getConstantOperandVal(LWEIdx)) ? 1 : 0;
+  unsigned TFCLane = 0;
   bool HasChain = Node->getNumValues() > 1;
 
   if (OldDmask == 0) {
@@ -7951,6 +8986,12 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
     return Node;
   }
 
+  unsigned OldBitsSet = countPopulation(OldDmask);
+  // Work out which is the TFE/LWE lane if that is enabled.
+  if (UsesTFC) {
+    TFCLane = OldBitsSet;
+  }
+
   // Try to figure out the used register components
   for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
        I != E; ++I) {
@@ -7970,28 +9011,49 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
     // set, etc.
     Lane = SubIdx2Lane(I->getConstantOperandVal(1));
 
-    // Set which texture component corresponds to the lane.
-    unsigned Comp;
-    for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
-      Comp = countTrailingZeros(Dmask);
-      Dmask &= ~(1 << Comp);
-    }
+    // Check if the use is for the TFE/LWE generated result at VGPRn+1.
+    if (UsesTFC && Lane == TFCLane) {
+      Users[Lane] = *I;
+    } else {
+      // Set which texture component corresponds to the lane.
+      unsigned Comp;
+      for (unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
+        Comp = countTrailingZeros(Dmask);
+        Dmask &= ~(1 << Comp);
+      }
 
-    // Abort if we have more than one user per component
-    if (Users[Lane])
-      return Node;
+      // Abort if we have more than one user per component.
+      if (Users[Lane])
+        return Node;
 
-    Users[Lane] = *I;
-    NewDmask |= 1 << Comp;
+      Users[Lane] = *I;
+      NewDmask |= 1 << Comp;
+    }
   }
 
+  // Don't allow 0 dmask, as hardware assumes one channel enabled.
+  bool NoChannels = !NewDmask;
+  if (NoChannels) {
+    // If the original dmask has one channel - then nothing to do
+    if (OldBitsSet == 1)
+      return Node;
+    // Use an arbitrary dmask - required for the instruction to work
+    NewDmask = 1;
+  }
   // Abort if there's no change
   if (NewDmask == OldDmask)
     return Node;
 
   unsigned BitsSet = countPopulation(NewDmask);
 
-  int NewOpcode = AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet);
+  // Check for TFE or LWE - increase the number of channels by one to account
+  // for the extra return value
+  // This will need adjustment for D16 if this is also included in
+  // adjustWriteMask (this function) but at present D16 are excluded.
+  unsigned NewChannels = BitsSet + UsesTFC;
+
+  int NewOpcode =
+      AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), NewChannels);
   assert(NewOpcode != -1 &&
          NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
          "failed to find equivalent MIMG op");
@@ -8004,8 +9066,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
 
   MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();
 
-  MVT ResultVT = BitsSet == 1 ?
-    SVT : MVT::getVectorVT(SVT, BitsSet == 3 ? 4 : BitsSet);
+  MVT ResultVT = NewChannels == 1 ?
+    SVT : MVT::getVectorVT(SVT, NewChannels == 3 ? 4 :
+                           NewChannels == 5 ? 8 : NewChannels);
   SDVTList NewVTList = HasChain ?
     DAG.getVTList(ResultVT, MVT::Other) : DAG.getVTList(ResultVT);
 
@@ -8015,11 +9078,11 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
 
   if (HasChain) {
     // Update chain.
-    NewNode->setMemRefs(Node->memoperands_begin(), Node->memoperands_end());
+    DAG.setNodeMemRefs(NewNode, Node->memoperands());
     DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
   }
 
-  if (BitsSet == 1) {
+  if (NewChannels == 1) {
     assert(Node->hasNUsesOfValue(1, 0));
     SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY,
                                       SDLoc(Node), Users[Lane]->getValueType(0),
@@ -8029,19 +9092,24 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
   }
 
   // Update the users of the node with the new indices
-  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
     SDNode *User = Users[i];
-    if (!User)
-      continue;
-
-    SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
-    DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+    if (!User) {
+      // Handle the special case of NoChannels. We set NewDmask to 1 above, but
+      // Users[0] is still nullptr because channel 0 doesn't really have a use.
+      if (i || !NoChannels)
+        continue;
+    } else {
+      SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
+      DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
+    }
 
     switch (Idx) {
     default: break;
     case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
     case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
     case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+    case AMDGPU::sub3: Idx = AMDGPU::sub4; break;
     }
   }
 
@@ -8457,49 +9525,56 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
   Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
 }
 
+LLVM_ATTRIBUTE_UNUSED
+static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
+  assert(N->getOpcode() == ISD::CopyFromReg);
+  do {
+    // Follow the chain until we find an INLINEASM node.
+    N = N->getOperand(0).getNode();
+    if (N->getOpcode() == ISD::INLINEASM)
+      return true;
+  } while (N->getOpcode() == ISD::CopyFromReg);
+  return false;
+}
+
 bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
-  FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const
+  FunctionLoweringInfo * FLI, LegacyDivergenceAnalysis * KDA) const
 {
   switch (N->getOpcode()) {
-    case ISD::Register:
     case ISD::CopyFromReg:
     {
-      const RegisterSDNode *R = nullptr;
-      if (N->getOpcode() == ISD::Register) {
-        R = dyn_cast<RegisterSDNode>(N);
-      }
-      else {
-        R = dyn_cast<RegisterSDNode>(N->getOperand(1));
-      }
-      if (R)
-      {
-        const MachineFunction * MF = FLI->MF;
-        const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
-        const MachineRegisterInfo &MRI = MF->getRegInfo();
-        const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
-        unsigned Reg = R->getReg();
-        if (TRI.isPhysicalRegister(Reg))
-          return TRI.isVGPR(MRI, Reg);
-
-        if (MRI.isLiveIn(Reg)) {
-          // workitem.id.x workitem.id.y workitem.id.z
-          // Any VGPR formal argument is also considered divergent
-          if (TRI.isVGPR(MRI, Reg))
-              return true;
-          // Formal arguments of non-entry functions
-          // are conservatively considered divergent
-          else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
-            return true;
-        }
-        return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg));
+      const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
+      const MachineFunction * MF = FLI->MF;
+      const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+      const MachineRegisterInfo &MRI = MF->getRegInfo();
+      const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
+      unsigned Reg = R->getReg();
+      if (TRI.isPhysicalRegister(Reg))
+        return !TRI.isSGPRReg(MRI, Reg);
+
+      if (MRI.isLiveIn(Reg)) {
+        // workitem.id.x workitem.id.y workitem.id.z
+        // Any VGPR formal argument is also considered divergent
+        if (!TRI.isSGPRReg(MRI, Reg))
+          return true;
+        // Formal arguments of non-entry functions
+        // are conservatively considered divergent
+        else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
+          return true;
+        return false;
       }
+      const Value *V = FLI->getValueFromVirtualReg(Reg);
+      if (V)
+        return KDA->isDivergent(V);
+      assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
+      return !TRI.isSGPRReg(MRI, Reg);
     }
     break;
     case ISD::LOAD: {
-      const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
-      if (L->getMemOperand()->getAddrSpace() ==
-          Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
-        return true;
+      const LoadSDNode *L = cast<LoadSDNode>(N);
+      unsigned AS = L->getAddressSpace();
+      // A flat load may access private memory.
+      return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
     } break;
     case ISD::CALLSEQ_END:
     return true;
@@ -8522,3 +9597,30 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
   }
   return false;
 }
+
+bool SITargetLowering::denormalsEnabledForType(EVT VT) const {
+  switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+  case MVT::f32:
+    return Subtarget->hasFP32Denormals();
+  case MVT::f64:
+    return Subtarget->hasFP64Denormals();
+  case MVT::f16:
+    return Subtarget->hasFP16Denormals();
+  default:
+    return false;
+  }
+}
+
+bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
+                                                    const SelectionDAG &DAG,
+                                                    bool SNaN,
+                                                    unsigned Depth) const {
+  if (Op.getOpcode() == AMDGPUISD::CLAMP) {
+    if (Subtarget->enableDX10Clamp())
+      return true; // Clamped to 0.
+    return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
+  }
+
+  return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
+                                                            SNaN, Depth);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 5b3d49b3d8e3..bcef519ee663 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -60,11 +60,22 @@ private:
                                  MVT VT, unsigned Offset) const;
   SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
                      SelectionDAG &DAG) const;
+  SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
+                       SDValue GLC, SelectionDAG &DAG) const;
 
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
 
+  // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
+  // (the offset that is included in bounds checking and swizzling, to be split
+  // between the instruction's voffset and immoffset fields) and soffset (the
+  // offset that is excluded from bounds checking and swizzling, to go in the
+  // instruction's soffset field).  This function takes the first kind of
+  // offset and figures out how to split it between voffset and immoffset.
+  std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset,
+                                                 SelectionDAG &DAG) const;
+
   SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -81,7 +92,7 @@ private:
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
-                              SelectionDAG &DAG,
+                              SelectionDAG &DAG, ArrayRef<SDValue> Ops,
                               bool IsIntrinsic = false) const;
 
   SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
@@ -99,6 +110,7 @@ private:
 
   /// Custom lowering for ISD::FP_ROUND for MVT::f16.
   SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
                              SelectionDAG &DAG) const;
@@ -130,6 +142,8 @@ private:
   SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
+                                 const APFloat &C) const;
   SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
@@ -140,7 +154,7 @@ private:
   SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-  SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   unsigned getFusedOpcode(const SelectionDAG &DAG,
                           const SDNode *N0, const SDNode *N1) const;
@@ -156,7 +170,6 @@ private:
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   bool isLegalFlatAddressingMode(const AddrMode &AM) const;
-  bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
   bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
 
   unsigned isCFIntrinsic(const SDNode *Intr) const;
@@ -175,6 +188,12 @@ private:
   /// global value \p GV, false otherwise.
   bool shouldEmitPCReloc(const GlobalValue *GV) const;
 
+  // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
+  // three offsets (voffset, soffset and instoffset) into the SDValue[3] array
+  // pointed to by Offsets.
+  void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
+                        SDValue *Offsets, unsigned Align = 4) const;
+
 public:
   SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
 
@@ -192,6 +211,7 @@ public:
                             SmallVectorImpl<Value*> &/*Ops*/,
                             Type *&/*AccessTy*/) const override;
 
+  bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS,
                              Instruction *I = nullptr) const override;
@@ -215,7 +235,7 @@ public:
   bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
 
   TargetLoweringBase::LegalizeTypeAction
-  getPreferredVectorAction(EVT VT) const override;
+  getPreferredVectorAction(MVT VT) const override;
 
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                         Type *Ty) const override;
@@ -248,11 +268,11 @@ public:
 
   void passSpecialInputs(
     CallLoweringInfo &CLI,
+    CCState &CCInfo,
     const SIMachineFunctionInfo &Info,
     SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
     SmallVectorImpl<SDValue> &MemOpChains,
-    SDValue Chain,
-    SDValue StackPtr) const;
+    SDValue Chain) const;
 
   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                           CallingConv::ID CallConv, bool isVarArg,
@@ -322,7 +342,16 @@ public:
                                      unsigned Depth = 0) const override;
 
   bool isSDNodeSourceOfDivergence(const SDNode *N,
-    FunctionLoweringInfo *FLI, DivergenceAnalysis *DA) const override;
+    FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
+
+  bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
+                       unsigned MaxDepth = 5) const;
+  bool denormalsEnabledForType(EVT VT) const;
+
+  bool isKnownNeverNaNForTargetNode(SDValue Op,
+                                    const SelectionDAG &DAG,
+                                    bool SNaN = false,
+                                    unsigned Depth = 0) const override;
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index dc9397cf7b85..ba21a5ce1293 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -66,6 +66,8 @@ private:
 
   bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
 
+  bool optimizeVccBranch(MachineInstr &MI) const;
+
 public:
   static char ID;
 
@@ -320,6 +322,96 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
   return true;
 }
 
+bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
+  // Match:
+  // sreg = -1
+  // vcc = S_AND_B64 exec, sreg
+  // S_CBRANCH_VCC[N]Z
+  // =>
+  // S_CBRANCH_EXEC[N]Z
+  bool Changed = false;
+  MachineBasicBlock &MBB = *MI.getParent();
+  const unsigned CondReg = AMDGPU::VCC;
+  const unsigned ExecReg = AMDGPU::EXEC;
+  const unsigned And = AMDGPU::S_AND_B64;
+
+  MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
+                                      E = MBB.rend();
+  bool ReadsCond = false;
+  unsigned Threshold = 5;
+  for (++A ; A != E ; ++A) {
+    if (!--Threshold)
+      return false;
+    if (A->modifiesRegister(ExecReg, TRI))
+      return false;
+    if (A->modifiesRegister(CondReg, TRI)) {
+      if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
+        return false;
+      break;
+    }
+    ReadsCond |= A->readsRegister(CondReg, TRI);
+  }
+  if (A == E)
+    return false;
+
+  MachineOperand &Op1 = A->getOperand(1);
+  MachineOperand &Op2 = A->getOperand(2);
+  if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
+    TII->commuteInstruction(*A);
+    Changed = true;
+  }
+  if (Op1.getReg() != ExecReg)
+    return Changed;
+  if (Op2.isImm() && Op2.getImm() != -1)
+    return Changed;
+
+  unsigned SReg = AMDGPU::NoRegister;
+  if (Op2.isReg()) {
+    SReg = Op2.getReg();
+    auto M = std::next(A);
+    bool ReadsSreg = false;
+    for ( ; M != E ; ++M) {
+      if (M->definesRegister(SReg, TRI))
+        break;
+      if (M->modifiesRegister(SReg, TRI))
+        return Changed;
+      ReadsSreg |= M->readsRegister(SReg, TRI);
+    }
+    if (M == E ||
+        !M->isMoveImmediate() ||
+        !M->getOperand(1).isImm() ||
+        M->getOperand(1).getImm() != -1)
+      return Changed;
+    // First if sreg is only used in and instruction fold the immediate
+    // into that and.
+    if (!ReadsSreg && Op2.isKill()) {
+      A->getOperand(2).ChangeToImmediate(-1);
+      M->eraseFromParent();
+    }
+  }
+
+  if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
+      MI.killsRegister(CondReg, TRI))
+    A->eraseFromParent();
+
+  bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
+  if (SReg == ExecReg) {
+    if (IsVCCZ) {
+      MI.eraseFromParent();
+      return true;
+    }
+    MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+  } else {
+    MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
+                               : AMDGPU::S_CBRANCH_EXECNZ));
+  }
+
+  MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
+  MI.addImplicitDefUseOperands(*MBB.getParent());
+
+  return true;
+}
+
 bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();
@@ -384,7 +476,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
         kill(MI);
 
         if (ExecBranchStack.empty()) {
-          if (skipIfDead(MI, *NextBB)) {
+          if (NextBB != BE && skipIfDead(MI, *NextBB)) {
             HaveSkipBlock = true;
             NextBB = std::next(BI);
             BE = MF.end();
@@ -417,6 +509,11 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
         }
         break;
 
+      case AMDGPU::S_CBRANCH_VCCZ:
+      case AMDGPU::S_CBRANCH_VCCNZ:
+        MadeChange |= optimizeVccBranch(MI);
+        break;
+
       default:
         break;
       }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index d456e3d9b94d..afc0b4467610 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -13,6 +13,14 @@
 /// Memory reads and writes are issued asynchronously, so we need to insert
 /// S_WAITCNT instructions when we want to access any of their results or
 /// overwrite any register that's used asynchronously.
+///
+/// TODO: This pass currently keeps one timeline per hardware counter. A more
+/// finely-grained approach that keeps one timeline per event type could
+/// sometimes get away with generating weaker s_waitcnt instructions. For
+/// example, when both SMEM and LDS are in flight and we need to wait for
+/// the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient,
+/// but the pass will currently generate a conservative lgkmcnt(0) because
+/// multiple event types are in flight.
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,7 +41,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -69,6 +76,25 @@ static cl::opt<unsigned> ForceEmitZeroFlag(
 
 namespace {
 
+template <typename EnumT>
+class enum_iterator
+    : public iterator_facade_base<enum_iterator<EnumT>,
+                                  std::forward_iterator_tag, const EnumT> {
+  EnumT Value;
+public:
+  enum_iterator() = default;
+  enum_iterator(EnumT Value) : Value(Value) {}
+
+  enum_iterator &operator++() {
+    Value = static_cast<EnumT>(Value + 1);
+    return *this;
+  }
+
+  bool operator==(const enum_iterator &RHS) const { return Value == RHS.Value; }
+
+  EnumT operator*() const { return Value; }
+};
+
 // Class of object that encapsulates latest instruction counter score
 // associated with the operand.  Used for determining whether
 // s_waitcnt instruction needs to be emited.
@@ -77,12 +103,17 @@ namespace {
 
 enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
 
+iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
+  return make_range(enum_iterator<InstCounterType>(VM_CNT),
+                    enum_iterator<InstCounterType>(NUM_INST_CNTS));
+}
+
 using RegInterval = std::pair<signed, signed>;
 
 struct {
-  int32_t VmcntMax;
-  int32_t ExpcntMax;
-  int32_t LgkmcntMax;
+  uint32_t VmcntMax;
+  uint32_t ExpcntMax;
+  uint32_t LgkmcntMax;
   int32_t NumVGPRsMax;
   int32_t NumSGPRsMax;
 } HardwareLimits;
@@ -108,6 +139,14 @@ enum WaitEventType {
   NUM_WAIT_EVENTS,
 };
 
+static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = {
+  (1 << VMEM_ACCESS),
+  (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
+      (1 << SQ_MESSAGE),
+  (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
+      (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS),
+};
+
 // The mapping is:
 //  0                .. SQ_MAX_PGM_VGPRS-1               real VGPRs
 //  SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1                  extra VGPR-like slots
@@ -122,30 +161,38 @@ enum RegisterMapping {
   NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
 };
 
-#define ForAllWaitEventType(w)                                                 \
-  for (enum WaitEventType w = (enum WaitEventType)0;                           \
-       (w) < (enum WaitEventType)NUM_WAIT_EVENTS;                              \
-       (w) = (enum WaitEventType)((w) + 1))
+void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
+  switch (T) {
+  case VM_CNT:
+    Wait.VmCnt = std::min(Wait.VmCnt, Count);
+    break;
+  case EXP_CNT:
+    Wait.ExpCnt = std::min(Wait.ExpCnt, Count);
+    break;
+  case LGKM_CNT:
+    Wait.LgkmCnt = std::min(Wait.LgkmCnt, Count);
+    break;
+  default:
+    llvm_unreachable("bad InstCounterType");
+  }
+}
 
-// This is a per-basic-block object that maintains current score brackets
-// of each wait counter, and a per-register scoreboard for each wait counter.
+// This objects maintains the current score brackets of each wait counter, and
+// a per-register scoreboard for each wait counter.
+//
 // We also maintain the latest score for every event type that can change the
 // waitcnt in order to know if there are multiple types of events within
 // the brackets. When multiple types of event happen in the bracket,
 // wait count may get decreased out of order, therefore we need to put in
 // "s_waitcnt 0" before use.
-class BlockWaitcntBrackets {
+class WaitcntBrackets {
 public:
-  BlockWaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
+  WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
+    for (auto T : inst_counter_types())
       memset(VgprScores[T], 0, sizeof(VgprScores[T]));
-    }
   }
 
-  ~BlockWaitcntBrackets() = default;
-
-  static int32_t getWaitCountMax(InstCounterType T) {
+  static uint32_t getWaitCountMax(InstCounterType T) {
     switch (T) {
     case VM_CNT:
       return HardwareLimits.VmcntMax;
@@ -159,33 +206,14 @@ public:
     return 0;
   }
 
-  void setScoreLB(InstCounterType T, int32_t Val) {
-    assert(T < NUM_INST_CNTS);
-    if (T >= NUM_INST_CNTS)
-      return;
-    ScoreLBs[T] = Val;
-  }
-
-  void setScoreUB(InstCounterType T, int32_t Val) {
-    assert(T < NUM_INST_CNTS);
-    if (T >= NUM_INST_CNTS)
-      return;
-    ScoreUBs[T] = Val;
-    if (T == EXP_CNT) {
-      int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
-      if (ScoreLBs[T] < UB)
-        ScoreLBs[T] = UB;
-    }
-  }
-
-  int32_t getScoreLB(InstCounterType T) {
+  uint32_t getScoreLB(InstCounterType T) const {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return 0;
     return ScoreLBs[T];
   }
 
-  int32_t getScoreUB(InstCounterType T) {
+  uint32_t getScoreUB(InstCounterType T) const {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return 0;
@@ -194,89 +222,56 @@ public:
 
   // Mapping from event to counter.
   InstCounterType eventCounter(WaitEventType E) {
-    switch (E) {
-    case VMEM_ACCESS:
+    if (E == VMEM_ACCESS)
       return VM_CNT;
-    case LDS_ACCESS:
-    case GDS_ACCESS:
-    case SQ_MESSAGE:
-    case SMEM_ACCESS:
+    if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
       return LGKM_CNT;
-    case EXP_GPR_LOCK:
-    case GDS_GPR_LOCK:
-    case VMW_GPR_LOCK:
-    case EXP_POS_ACCESS:
-    case EXP_PARAM_ACCESS:
-      return EXP_CNT;
-    default:
-      llvm_unreachable("unhandled event type");
-    }
-    return NUM_INST_CNTS;
+    assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
+    return EXP_CNT;
   }
 
-  void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
-    if (GprNo < NUM_ALL_VGPRS) {
-      if (GprNo > VgprUB) {
-        VgprUB = GprNo;
-      }
-      VgprScores[T][GprNo] = Val;
-    } else {
-      assert(T == LGKM_CNT);
-      if (GprNo - NUM_ALL_VGPRS > SgprUB) {
-        SgprUB = GprNo - NUM_ALL_VGPRS;
-      }
-      SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
-    }
-  }
-
-  int32_t getRegScore(int GprNo, InstCounterType T) {
+  uint32_t getRegScore(int GprNo, InstCounterType T) {
     if (GprNo < NUM_ALL_VGPRS) {
       return VgprScores[T][GprNo];
     }
+    assert(T == LGKM_CNT);
     return SgprScores[GprNo - NUM_ALL_VGPRS];
   }
 
   void clear() {
     memset(ScoreLBs, 0, sizeof(ScoreLBs));
     memset(ScoreUBs, 0, sizeof(ScoreUBs));
-    memset(EventUBs, 0, sizeof(EventUBs));
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
+    PendingEvents = 0;
+    memset(MixedPendingEvents, 0, sizeof(MixedPendingEvents));
+    for (auto T : inst_counter_types())
       memset(VgprScores[T], 0, sizeof(VgprScores[T]));
-    }
     memset(SgprScores, 0, sizeof(SgprScores));
   }
 
+  bool merge(const WaitcntBrackets &Other);
+
   RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
                              const MachineRegisterInfo *MRI,
                              const SIRegisterInfo *TRI, unsigned OpNo,
                              bool Def) const;
 
-  void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
-                   const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
-                   unsigned OpNo, int32_t Val);
-
-  void setWaitAtBeginning() { WaitAtBeginning = true; }
-  void clearWaitAtBeginning() { WaitAtBeginning = false; }
-  bool getWaitAtBeginning() const { return WaitAtBeginning; }
-  void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; }
   int32_t getMaxVGPR() const { return VgprUB; }
   int32_t getMaxSGPR() const { return SgprUB; }
 
-  int32_t getEventUB(enum WaitEventType W) const {
-    assert(W < NUM_WAIT_EVENTS);
-    return EventUBs[W];
-  }
-
-  bool counterOutOfOrder(InstCounterType T);
-  unsigned int updateByWait(InstCounterType T, int ScoreToWait);
+  bool counterOutOfOrder(InstCounterType T) const;
+  bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
+  bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
+  void determineWait(InstCounterType T, uint32_t ScoreToWait,
+                     AMDGPU::Waitcnt &Wait) const;
+  void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
+  void applyWaitcnt(InstCounterType T, unsigned Count);
   void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
                      const MachineRegisterInfo *MRI, WaitEventType E,
                      MachineInstr &MI);
 
-  bool hasPendingSMEM() const {
-    return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
-            EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]);
+  bool hasPending() const { return PendingEvents != 0; }
+  bool hasPendingEvent(WaitEventType E) const {
+    return PendingEvents & (1 << E);
   }
 
   bool hasPendingFlat() const {
@@ -291,75 +286,71 @@ public:
     LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT];
   }
 
-  int pendingFlat(InstCounterType Ct) const { return LastFlat[Ct]; }
-
-  void setLastFlat(InstCounterType Ct, int Val) { LastFlat[Ct] = Val; }
-
-  bool getRevisitLoop() const { return RevisitLoop; }
-  void setRevisitLoop(bool RevisitLoopIn) { RevisitLoop = RevisitLoopIn; }
+  void print(raw_ostream &);
+  void dump() { print(dbgs()); }
 
-  void setPostOrder(int32_t PostOrderIn) { PostOrder = PostOrderIn; }
-  int32_t getPostOrder() const { return PostOrder; }
+private:
+  struct MergeInfo {
+    uint32_t OldLB;
+    uint32_t OtherLB;
+    uint32_t MyShift;
+    uint32_t OtherShift;
+  };
+  static bool mergeScore(const MergeInfo &M, uint32_t &Score,
+                         uint32_t OtherScore);
+
+  void setScoreLB(InstCounterType T, uint32_t Val) {
+    assert(T < NUM_INST_CNTS);
+    if (T >= NUM_INST_CNTS)
+      return;
+    ScoreLBs[T] = Val;
+  }
 
-  void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; }
-  void clearWaitcnt() { Waitcnt = nullptr; }
-  MachineInstr *getWaitcnt() const { return Waitcnt; }
+  void setScoreUB(InstCounterType T, uint32_t Val) {
+    assert(T < NUM_INST_CNTS);
+    if (T >= NUM_INST_CNTS)
+      return;
+    ScoreUBs[T] = Val;
+    if (T == EXP_CNT) {
+      uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
+      if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
+        ScoreLBs[T] = UB;
+    }
+  }
 
-  bool mixedExpTypes() const { return MixedExpTypes; }
-  void setMixedExpTypes(bool MixedExpTypesIn) {
-    MixedExpTypes = MixedExpTypesIn;
+  void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
+    if (GprNo < NUM_ALL_VGPRS) {
+      if (GprNo > VgprUB) {
+        VgprUB = GprNo;
+      }
+      VgprScores[T][GprNo] = Val;
+    } else {
+      assert(T == LGKM_CNT);
+      if (GprNo - NUM_ALL_VGPRS > SgprUB) {
+        SgprUB = GprNo - NUM_ALL_VGPRS;
+      }
+      SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
+    }
   }
 
-  void print(raw_ostream &);
-  void dump() { print(dbgs()); }
+  void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
+                   const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
+                   unsigned OpNo, uint32_t Val);
 
-private:
   const GCNSubtarget *ST = nullptr;
-  bool WaitAtBeginning = false;
-  bool RevisitLoop = false;
-  bool MixedExpTypes = false;
-  int32_t PostOrder = 0;
-  MachineInstr *Waitcnt = nullptr;
-  int32_t ScoreLBs[NUM_INST_CNTS] = {0};
-  int32_t ScoreUBs[NUM_INST_CNTS] = {0};
-  int32_t EventUBs[NUM_WAIT_EVENTS] = {0};
+  uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
+  uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
+  uint32_t PendingEvents = 0;
+  bool MixedPendingEvents[NUM_INST_CNTS] = {false};
   // Remember the last flat memory operation.
-  int32_t LastFlat[NUM_INST_CNTS] = {0};
+  uint32_t LastFlat[NUM_INST_CNTS] = {0};
   // wait_cnt scores for every vgpr.
   // Keep track of the VgprUB and SgprUB to make merge at join efficient.
   int32_t VgprUB = 0;
   int32_t SgprUB = 0;
-  int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+  uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
   // Wait cnt scores for every sgpr, only lgkmcnt is relevant.
-  int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
-};
-
-// This is a per-loop-region object that records waitcnt status at the end of
-// loop footer from the previous iteration. We also maintain an iteration
-// count to track the number of times the loop has been visited. When it
-// doesn't converge naturally, we force convergence by inserting s_waitcnt 0
-// at the end of the loop footer.
-class LoopWaitcntData {
-public:
-  LoopWaitcntData() = default;
-  ~LoopWaitcntData() = default;
-
-  void incIterCnt() { IterCnt++; }
-  void resetIterCnt() { IterCnt = 0; }
-  unsigned getIterCnt() { return IterCnt; }
-
-  void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
-  MachineInstr *getWaitcnt() const { return LfWaitcnt; }
-
-  void print() { LLVM_DEBUG(dbgs() << "  iteration " << IterCnt << '\n';); }
-
-private:
-  // s_waitcnt added at the end of loop footer to stablize wait scores
-  // at the end of the loop footer.
-  MachineInstr *LfWaitcnt = nullptr;
-  // Number of iterations the loop has been visited, not including the initial
-  // walk over.
-  int32_t IterCnt = 0;
+  uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
 };
 
 class SIInsertWaitcnts : public MachineFunctionPass {
@@ -368,22 +359,21 @@ private:
   const SIInstrInfo *TII = nullptr;
   const SIRegisterInfo *TRI = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
-  const MachineLoopInfo *MLI = nullptr;
-  AMDGPU::IsaInfo::IsaVersion IV;
-  AMDGPUAS AMDGPUASI;
+  AMDGPU::IsaVersion IV;
 
-  DenseSet<MachineBasicBlock *> BlockVisitedSet;
   DenseSet<MachineInstr *> TrackedWaitcntSet;
   DenseSet<MachineInstr *> VCCZBugHandledSet;
 
-  DenseMap<MachineBasicBlock *, std::unique_ptr<BlockWaitcntBrackets>>
-      BlockWaitcntBracketsMap;
-
-  std::vector<MachineBasicBlock *> BlockWaitcntProcessedSet;
+  struct BlockInfo {
+    MachineBasicBlock *MBB;
+    std::unique_ptr<WaitcntBrackets> Incoming;
+    bool Dirty = true;
 
-  DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
+    explicit BlockInfo(MachineBasicBlock *MBB) : MBB(MBB) {}
+  };
 
-  std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
+  std::vector<BlockInfo> BlockInfos; // by reverse post-order traversal index
+  DenseMap<MachineBasicBlock *, unsigned> RpotIdxMap;
 
   // ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
   // because of amdgpu-waitcnt-forcezero flag
@@ -407,20 +397,11 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<MachineLoopInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
-    // The waitcnt information is copied because it changes as the block is
-    // traversed.
-    KillWaitBrackets.push_back(
-        llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
-  }
-
   bool isForceEmitWaitcnt() const {
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1))
+    for (auto T : inst_counter_types())
       if (ForceEmitWaitcnt[T])
         return true;
     return false;
@@ -454,27 +435,22 @@ public:
   }
 
   bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
-  void generateWaitcntInstBefore(MachineInstr &MI,
-                                  BlockWaitcntBrackets *ScoreBrackets);
+  bool generateWaitcntInstBefore(MachineInstr &MI,
+                                 WaitcntBrackets &ScoreBrackets,
+                                 MachineInstr *OldWaitcntInstr);
   void updateEventWaitcntAfter(MachineInstr &Inst,
-                               BlockWaitcntBrackets *ScoreBrackets);
-  void mergeInputScoreBrackets(MachineBasicBlock &Block);
-  bool isLoopBottom(const MachineLoop *Loop, const MachineBasicBlock *Block);
-  unsigned countNumBottomBlocks(const MachineLoop *Loop);
-  void insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block);
-  void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
-  bool isWaitcntStronger(unsigned LHS, unsigned RHS);
-  unsigned combineWaitcnt(unsigned LHS, unsigned RHS);
+                               WaitcntBrackets *ScoreBrackets);
+  bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
+                            WaitcntBrackets &ScoreBrackets);
 };
 
 } // end anonymous namespace
 
-RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
-                                                 const SIInstrInfo *TII,
-                                                 const MachineRegisterInfo *MRI,
-                                                 const SIRegisterInfo *TRI,
-                                                 unsigned OpNo,
-                                                 bool Def) const {
+RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
+                                            const SIInstrInfo *TII,
+                                            const MachineRegisterInfo *MRI,
+                                            const SIRegisterInfo *TRI,
+                                            unsigned OpNo, bool Def) const {
   const MachineOperand &Op = MI->getOperand(OpNo);
   if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
       (Def && !Op.isDef()))
@@ -512,11 +488,11 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
   return Result;
 }
 
-void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
-                                       const SIInstrInfo *TII,
-                                       const SIRegisterInfo *TRI,
-                                       const MachineRegisterInfo *MRI,
-                                       unsigned OpNo, int32_t Val) {
+void WaitcntBrackets::setExpScore(const MachineInstr *MI,
+                                  const SIInstrInfo *TII,
+                                  const SIRegisterInfo *TRI,
+                                  const MachineRegisterInfo *MRI, unsigned OpNo,
+                                  uint32_t Val) {
   RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
   LLVM_DEBUG({
     const MachineOperand &Opnd = MI->getOperand(OpNo);
@@ -527,26 +503,26 @@ void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
   }
 }
 
-void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
-                                         const SIRegisterInfo *TRI,
-                                         const MachineRegisterInfo *MRI,
-                                         WaitEventType E, MachineInstr &Inst) {
+void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
+                                    const SIRegisterInfo *TRI,
+                                    const MachineRegisterInfo *MRI,
+                                    WaitEventType E, MachineInstr &Inst) {
   const MachineRegisterInfo &MRIA = *MRI;
   InstCounterType T = eventCounter(E);
-  int32_t CurrScore = getScoreUB(T) + 1;
-  // EventUB and ScoreUB need to be update regardless if this event changes
-  // the score of a register or not.
+  uint32_t CurrScore = getScoreUB(T) + 1;
+  if (CurrScore == 0)
+    report_fatal_error("InsertWaitcnt score wraparound");
+  // PendingEvents and ScoreUB need to be update regardless if this event
+  // changes the score of a register or not.
   // Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
-  EventUBs[E] = CurrScore;
+  if (!hasPendingEvent(E)) {
+    if (PendingEvents & WaitEventMaskForInst[T])
+      MixedPendingEvents[T] = true;
+    PendingEvents |= 1 << E;
+  }
   setScoreUB(T, CurrScore);
 
   if (T == EXP_CNT) {
-    // Check for mixed export types. If they are mixed, then a waitcnt exp(0)
-    // is required.
-    if (!MixedExpTypes) {
-      MixedExpTypes = counterOutOfOrder(EXP_CNT);
-    }
-
     // Put score on the source vgprs. If this is a store, just use those
     // specific register(s).
     if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
@@ -671,12 +647,11 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
   }
 }
 
-void BlockWaitcntBrackets::print(raw_ostream &OS) {
+void WaitcntBrackets::print(raw_ostream &OS) {
   OS << '\n';
-  for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-       T = (enum InstCounterType)(T + 1)) {
-    int LB = getScoreLB(T);
-    int UB = getScoreUB(T);
+  for (auto T : inst_counter_types()) {
+    uint32_t LB = getScoreLB(T);
+    uint32_t UB = getScoreUB(T);
 
     switch (T) {
     case VM_CNT:
@@ -696,10 +671,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
     if (LB < UB) {
       // Print vgpr scores.
       for (int J = 0; J <= getMaxVGPR(); J++) {
-        int RegScore = getRegScore(J, T);
+        uint32_t RegScore = getRegScore(J, T);
         if (RegScore <= LB)
           continue;
-        int RelScore = RegScore - LB - 1;
+        uint32_t RelScore = RegScore - LB - 1;
         if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
           OS << RelScore << ":v" << J << " ";
         } else {
@@ -709,10 +684,10 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
       // Also need to print sgpr scores for lgkm_cnt.
       if (T == LGKM_CNT) {
         for (int J = 0; J <= getMaxSGPR(); J++) {
-          int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+          uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
           if (RegScore <= LB)
             continue;
-          int RelScore = RegScore - LB - 1;
+          uint32_t RelScore = RegScore - LB - 1;
           OS << RelScore << ":s" << J << " ";
         }
       }
@@ -722,23 +697,31 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
   OS << '\n';
 }
 
-unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
-                                                int ScoreToWait) {
-  unsigned int NeedWait = 0;
-  if (ScoreToWait == -1) {
-    // The score to wait is unknown. This implies that it was not encountered
-    // during the path of the CFG walk done during the current traversal but
-    // may be seen on a different path. Emit an s_wait counter with a
-    // conservative value of 0 for the counter.
-    NeedWait = CNT_MASK(T);
-    setScoreLB(T, getScoreUB(T));
-    return NeedWait;
-  }
+/// Simplify the waitcnt, in the sense of removing redundant counts, and return
+/// whether a waitcnt instruction is needed at all.
+bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
+  return simplifyWaitcnt(VM_CNT, Wait.VmCnt) |
+         simplifyWaitcnt(EXP_CNT, Wait.ExpCnt) |
+         simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+}
 
+bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
+                                      unsigned &Count) const {
+  const uint32_t LB = getScoreLB(T);
+  const uint32_t UB = getScoreUB(T);
+  if (Count < UB && UB - Count > LB)
+    return true;
+
+  Count = ~0u;
+  return false;
+}
+
+void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait,
+                                    AMDGPU::Waitcnt &Wait) const {
   // If the score of src_operand falls within the bracket, we need an
   // s_waitcnt instruction.
-  const int32_t LB = getScoreLB(T);
-  const int32_t UB = getScoreUB(T);
+  const uint32_t LB = getScoreLB(T);
+  const uint32_t UB = getScoreUB(T);
   if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
     if ((T == VM_CNT || T == LGKM_CNT) &&
         hasPendingFlat() &&
@@ -746,90 +729,46 @@ unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
       // If there is a pending FLAT operation, and this is a VMem or LGKM
       // waitcnt and the target can report early completion, then we need
       // to force a waitcnt 0.
-      NeedWait = CNT_MASK(T);
-      setScoreLB(T, getScoreUB(T));
+      addWait(Wait, T, 0);
     } else if (counterOutOfOrder(T)) {
       // Counter can get decremented out-of-order when there
       // are multiple types event in the bracket. Also emit an s_wait counter
       // with a conservative value of 0 for the counter.
-      NeedWait = CNT_MASK(T);
-      setScoreLB(T, getScoreUB(T));
+      addWait(Wait, T, 0);
     } else {
-      NeedWait = CNT_MASK(T);
-      setScoreLB(T, ScoreToWait);
+      addWait(Wait, T, UB - ScoreToWait);
     }
   }
+}
 
-  return NeedWait;
+void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
+  applyWaitcnt(VM_CNT, Wait.VmCnt);
+  applyWaitcnt(EXP_CNT, Wait.ExpCnt);
+  applyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
 }
 
-// Where there are multiple types of event in the bracket of a counter,
-// the decrement may go out of order.
-bool BlockWaitcntBrackets::counterOutOfOrder(InstCounterType T) {
-  switch (T) {
-  case VM_CNT:
-    return false;
-  case LGKM_CNT: {
-    if (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
-        EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]) {
-      // Scalar memory read always can go out of order.
-      return true;
-    }
-    int NumEventTypes = 0;
-    if (EventUBs[LDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
-        EventUBs[LDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
-      NumEventTypes++;
-    }
-    if (EventUBs[GDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
-        EventUBs[GDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
-      NumEventTypes++;
-    }
-    if (EventUBs[SQ_MESSAGE] > ScoreLBs[LGKM_CNT] &&
-        EventUBs[SQ_MESSAGE] <= ScoreUBs[LGKM_CNT]) {
-      NumEventTypes++;
-    }
-    if (NumEventTypes <= 1) {
-      return false;
-    }
-    break;
+void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
+  const uint32_t UB = getScoreUB(T);
+  if (Count >= UB)
+    return;
+  if (Count != 0) {
+    if (counterOutOfOrder(T))
+      return;
+    setScoreLB(T, std::max(getScoreLB(T), UB - Count));
+  } else {
+    setScoreLB(T, UB);
+    MixedPendingEvents[T] = false;
+    PendingEvents &= ~WaitEventMaskForInst[T];
   }
-  case EXP_CNT: {
-    // If there has been a mixture of export types, then a waitcnt exp(0) is
-    // required.
-    if (MixedExpTypes)
-      return true;
-    int NumEventTypes = 0;
-    if (EventUBs[EXP_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
-        EventUBs[EXP_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
-      NumEventTypes++;
-    }
-    if (EventUBs[GDS_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
-        EventUBs[GDS_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
-      NumEventTypes++;
-    }
-    if (EventUBs[VMW_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
-        EventUBs[VMW_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
-      NumEventTypes++;
-    }
-    if (EventUBs[EXP_PARAM_ACCESS] > ScoreLBs[EXP_CNT] &&
-        EventUBs[EXP_PARAM_ACCESS] <= ScoreUBs[EXP_CNT]) {
-      NumEventTypes++;
-    }
-
-    if (EventUBs[EXP_POS_ACCESS] > ScoreLBs[EXP_CNT] &&
-        EventUBs[EXP_POS_ACCESS] <= ScoreUBs[EXP_CNT]) {
-      NumEventTypes++;
-    }
+}
 
-    if (NumEventTypes <= 1) {
-      return false;
-    }
-    break;
-  }
-  default:
-    break;
-  }
-  return true;
+// Where there are multiple types of event in the bracket of a counter,
+// the decrement may go out of order.
+bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
+  // Scalar memory read always can go out of order.
+  if (T == LGKM_CNT && hasPendingEvent(SMEM_ACCESS))
+    return true;
+  return MixedPendingEvents[T];
 }
 
 INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
@@ -851,29 +790,6 @@ static bool readsVCCZ(const MachineInstr &MI) {
          !MI.getOperand(1).isUndef();
 }
 
-/// Given wait count encodings checks if LHS is stronger than RHS.
-bool SIInsertWaitcnts::isWaitcntStronger(unsigned LHS, unsigned RHS) {
-  if (AMDGPU::decodeVmcnt(IV, LHS) > AMDGPU::decodeVmcnt(IV, RHS))
-    return false;
-  if (AMDGPU::decodeLgkmcnt(IV, LHS) > AMDGPU::decodeLgkmcnt(IV, RHS))
-    return false;
-  if (AMDGPU::decodeExpcnt(IV, LHS) > AMDGPU::decodeExpcnt(IV, RHS))
-    return false;
-  return true;
-}
-
-/// Given wait count encodings create a new encoding which is stronger
-/// or equal to both.
-unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
-  unsigned VmCnt = std::min(AMDGPU::decodeVmcnt(IV, LHS),
-                            AMDGPU::decodeVmcnt(IV, RHS));
-  unsigned LgkmCnt = std::min(AMDGPU::decodeLgkmcnt(IV, LHS),
-                              AMDGPU::decodeLgkmcnt(IV, RHS));
-  unsigned ExpCnt = std::min(AMDGPU::decodeExpcnt(IV, LHS),
-                             AMDGPU::decodeExpcnt(IV, RHS));
-  return AMDGPU::encodeWaitcnt(IV, VmCnt, ExpCnt, LgkmCnt);
-}
-
 ///  Generate s_waitcnt instruction to be placed before cur_Inst.
 ///  Instructions of a given type are returned in order,
 ///  but instructions of different types can complete out of order.
@@ -884,51 +800,23 @@ unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
 ///  and if so what the value of each counter is.
 ///  The "score bracket" is bound by the lower bound and upper bound
 ///  scores (*_score_LB and *_score_ub respectively).
-void SIInsertWaitcnts::generateWaitcntInstBefore(
-    MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets) {
-  // To emit, or not to emit - that's the question!
-  // Start with an assumption that there is no need to emit.
-  unsigned int EmitWaitcnt = 0;
-
-  // No need to wait before phi. If a phi-move exists, then the wait should
-  // has been inserted before the move. If a phi-move does not exist, then
-  // wait should be inserted before the real use. The same is true for
-  // sc-merge. It is not a coincident that all these cases correspond to the
-  // instructions that are skipped in the assembling loop.
-  bool NeedLineMapping = false; // TODO: Check on this.
-
-  // ForceEmitZeroWaitcnt: force a single s_waitcnt 0 due to hw bug
-  bool ForceEmitZeroWaitcnt = false;
-
+bool SIInsertWaitcnts::generateWaitcntInstBefore(
+    MachineInstr &MI, WaitcntBrackets &ScoreBrackets,
+    MachineInstr *OldWaitcntInstr) {
   setForceEmitWaitcnt();
   bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
 
-  if (MI.isDebugInstr() &&
-      // TODO: any other opcode?
-      !NeedLineMapping) {
-    return;
-  }
+  if (MI.isDebugInstr())
+    return false;
 
-  // See if an s_waitcnt is forced at block entry, or is needed at
-  // program end.
-  if (ScoreBrackets->getWaitAtBeginning()) {
-    // Note that we have already cleared the state, so we don't need to update
-    // it.
-    ScoreBrackets->clearWaitAtBeginning();
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
-      EmitWaitcnt |= CNT_MASK(T);
-      ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
-    }
-  }
+  AMDGPU::Waitcnt Wait;
 
   // See if this instruction has a forced S_WAITCNT VM.
   // TODO: Handle other cases of NeedsWaitcntVmBefore()
-  else if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
-           MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
-           MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
-    EmitWaitcnt |=
-        ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+  if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
+      MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
+      MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+    Wait.VmCnt = 0;
   }
 
   // All waits must be resolved at call return.
@@ -936,23 +824,14 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
   //   with knowledge of the called routines.
   if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
       MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
-      if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
-        ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
-        EmitWaitcnt |= CNT_MASK(T);
-      }
-    }
+    Wait = AMDGPU::Waitcnt::allZero();
   }
   // Resolve vm waits before gs-done.
   else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
             MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
            ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) ==
             AMDGPU::SendMsg::ID_GS_DONE)) {
-    if (ScoreBrackets->getScoreUB(VM_CNT) > ScoreBrackets->getScoreLB(VM_CNT)) {
-      ScoreBrackets->setScoreLB(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
-      EmitWaitcnt |= CNT_MASK(VM_CNT);
-    }
+    Wait.VmCnt = 0;
   }
 #if 0 // TODO: the following blocks of logic when we have fence.
   else if (MI.getOpcode() == SC_FENCE) {
@@ -1016,14 +895,12 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
     if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
       // Export and GDS are tracked individually, either may trigger a waitcnt
       // for EXEC.
-      EmitWaitcnt |= ScoreBrackets->updateByWait(
-          EXP_CNT, ScoreBrackets->getEventUB(EXP_GPR_LOCK));
-      EmitWaitcnt |= ScoreBrackets->updateByWait(
-          EXP_CNT, ScoreBrackets->getEventUB(EXP_PARAM_ACCESS));
-      EmitWaitcnt |= ScoreBrackets->updateByWait(
-          EXP_CNT, ScoreBrackets->getEventUB(EXP_POS_ACCESS));
-      EmitWaitcnt |= ScoreBrackets->updateByWait(
-          EXP_CNT, ScoreBrackets->getEventUB(GDS_GPR_LOCK));
+      if (ScoreBrackets.hasPendingEvent(EXP_GPR_LOCK) ||
+          ScoreBrackets.hasPendingEvent(EXP_PARAM_ACCESS) ||
+          ScoreBrackets.hasPendingEvent(EXP_POS_ACCESS) ||
+          ScoreBrackets.hasPendingEvent(GDS_GPR_LOCK)) {
+        Wait.ExpCnt = 0;
+      }
     }
 
 #if 0 // TODO: the following code to handle CALL.
@@ -1051,27 +928,27 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
     // instruction.
     for (const MachineMemOperand *Memop : MI.memoperands()) {
       unsigned AS = Memop->getAddrSpace();
-      if (AS != AMDGPUASI.LOCAL_ADDRESS)
+      if (AS != AMDGPUAS::LOCAL_ADDRESS)
         continue;
       unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
       // VM_CNT is only relevant to vgpr or LDS.
-      EmitWaitcnt |= ScoreBrackets->updateByWait(
-          VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+      ScoreBrackets.determineWait(
+          VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
     }
 
     for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
       const MachineOperand &Op = MI.getOperand(I);
       const MachineRegisterInfo &MRIA = *MRI;
       RegInterval Interval =
-          ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, false);
+          ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
       for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
         if (TRI->isVGPR(MRIA, Op.getReg())) {
           // VM_CNT is only relevant to vgpr or LDS.
-          EmitWaitcnt |= ScoreBrackets->updateByWait(
-              VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+          ScoreBrackets.determineWait(
+              VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
         }
-        EmitWaitcnt |= ScoreBrackets->updateByWait(
-            LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+        ScoreBrackets.determineWait(
+            LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
       }
     }
     // End of for loop that looks at all source operands to decide vm_wait_cnt
@@ -1086,29 +963,29 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
       // FIXME: Should not be relying on memoperands.
       for (const MachineMemOperand *Memop : MI.memoperands()) {
         unsigned AS = Memop->getAddrSpace();
-        if (AS != AMDGPUASI.LOCAL_ADDRESS)
+        if (AS != AMDGPUAS::LOCAL_ADDRESS)
           continue;
         unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
-        EmitWaitcnt |= ScoreBrackets->updateByWait(
-            VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
-        EmitWaitcnt |= ScoreBrackets->updateByWait(
-            EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+        ScoreBrackets.determineWait(
+            VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+        ScoreBrackets.determineWait(
+            EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
       }
     }
     for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
       MachineOperand &Def = MI.getOperand(I);
       const MachineRegisterInfo &MRIA = *MRI;
       RegInterval Interval =
-          ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, true);
+          ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
       for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
         if (TRI->isVGPR(MRIA, Def.getReg())) {
-          EmitWaitcnt |= ScoreBrackets->updateByWait(
-              VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
-          EmitWaitcnt |= ScoreBrackets->updateByWait(
-              EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+          ScoreBrackets.determineWait(
+              VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+          ScoreBrackets.determineWait(
+              EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
         }
-        EmitWaitcnt |= ScoreBrackets->updateByWait(
-            LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+        ScoreBrackets.determineWait(
+            LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
       }
     } // End of for loop that looks at all dest operands.
   }
@@ -1119,182 +996,79 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
   // requiring a WAITCNT beforehand.
   if (MI.getOpcode() == AMDGPU::S_BARRIER &&
       !ST->hasAutoWaitcntBeforeBarrier()) {
-    EmitWaitcnt |=
-        ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
-    EmitWaitcnt |= ScoreBrackets->updateByWait(
-        EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
-    EmitWaitcnt |= ScoreBrackets->updateByWait(
-        LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT));
+    Wait = AMDGPU::Waitcnt::allZero();
   }
 
   // TODO: Remove this work-around, enable the assert for Bug 457939
   //       after fixing the scheduler. Also, the Shader Compiler code is
   //       independent of target.
   if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
-    if (ScoreBrackets->getScoreLB(LGKM_CNT) <
-            ScoreBrackets->getScoreUB(LGKM_CNT) &&
-        ScoreBrackets->hasPendingSMEM()) {
-      // Wait on everything, not just LGKM.  vccz reads usually come from
-      // terminators, and we always wait on everything at the end of the
-      // block, so if we only wait on LGKM here, we might end up with
-      // another s_waitcnt inserted right after this if there are non-LGKM
-      // instructions still outstanding.
-      // FIXME: this is too conservative / the comment is wrong.
-      // We don't wait on everything at the end of the block and we combine
-      // waitcnts so we should never have back-to-back waitcnts.
-      ForceEmitZeroWaitcnt = true;
-      EmitWaitcnt = true;
+    if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+            ScoreBrackets.getScoreUB(LGKM_CNT) &&
+        ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+      Wait.LgkmCnt = 0;
     }
   }
 
-  // Does this operand processing indicate s_wait counter update?
-  if (EmitWaitcnt || IsForceEmitWaitcnt) {
-    int CntVal[NUM_INST_CNTS];
-
-    bool UseDefaultWaitcntStrategy = true;
-    if (ForceEmitZeroWaitcnt || ForceEmitZeroWaitcnts) {
-      // Force all waitcnts to 0.
-      for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-           T = (enum InstCounterType)(T + 1)) {
-        ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
-      }
-      CntVal[VM_CNT] = 0;
-      CntVal[EXP_CNT] = 0;
-      CntVal[LGKM_CNT] = 0;
-      UseDefaultWaitcntStrategy = false;
-    }
-
-    if (UseDefaultWaitcntStrategy) {
-      for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-           T = (enum InstCounterType)(T + 1)) {
-        if (EmitWaitcnt & CNT_MASK(T)) {
-          int Delta =
-              ScoreBrackets->getScoreUB(T) - ScoreBrackets->getScoreLB(T);
-          int MaxDelta = ScoreBrackets->getWaitCountMax(T);
-          if (Delta >= MaxDelta) {
-            Delta = -1;
-            if (T != EXP_CNT) {
-              ScoreBrackets->setScoreLB(
-                  T, ScoreBrackets->getScoreUB(T) - MaxDelta);
-            }
-            EmitWaitcnt &= ~CNT_MASK(T);
-          }
-          CntVal[T] = Delta;
-        } else {
-          // If we are not waiting for a particular counter then encode
-          // it as -1 which means "don't care."
-          CntVal[T] = -1;
-        }
+  // Early-out if no wait is indicated.
+  if (!ScoreBrackets.simplifyWaitcnt(Wait) && !IsForceEmitWaitcnt) {
+    bool Modified = false;
+    if (OldWaitcntInstr) {
+      if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
+        TrackedWaitcntSet.erase(OldWaitcntInstr);
+        OldWaitcntInstr->eraseFromParent();
+        Modified = true;
+      } else {
+        int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
+        ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
       }
+      Modified = true;
     }
+    return Modified;
+  }
 
-    // If we are not waiting on any counter we can skip the wait altogether.
-    if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
-      MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
-      int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
-      if (!OldWaitcnt ||
-          (AMDGPU::decodeVmcnt(IV, Imm) !=
-                          (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
-          (AMDGPU::decodeExpcnt(IV, Imm) !=
-           (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
-          (AMDGPU::decodeLgkmcnt(IV, Imm) !=
-           (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
-        MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
-        if (ContainingLoop) {
-          MachineBasicBlock *TBB = ContainingLoop->getHeader();
-          BlockWaitcntBrackets *ScoreBracket =
-              BlockWaitcntBracketsMap[TBB].get();
-          if (!ScoreBracket) {
-            assert(!BlockVisitedSet.count(TBB));
-            BlockWaitcntBracketsMap[TBB] =
-                llvm::make_unique<BlockWaitcntBrackets>(ST);
-            ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
-          }
-          ScoreBracket->setRevisitLoop(true);
-          LLVM_DEBUG(dbgs()
-                         << "set-revisit2: Block"
-                         << ContainingLoop->getHeader()->getNumber() << '\n';);
-        }
-      }
+  if (ForceEmitZeroWaitcnts)
+    Wait = AMDGPU::Waitcnt::allZero();
 
-      // Update an existing waitcount, or make a new one.
-      unsigned Enc = AMDGPU::encodeWaitcnt(IV,
-                      ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
-                      ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
-                      ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
-      // We don't remove waitcnts that existed prior to the waitcnt
-      // pass. Check if the waitcnt to-be-inserted can be avoided
-      // or if the prev waitcnt can be updated.
-      bool insertSWaitInst = true;
-      for (MachineBasicBlock::iterator I = MI.getIterator(),
-                                       B = MI.getParent()->begin();
-           insertSWaitInst && I != B; --I) {
-        if (I == MI.getIterator())
-          continue;
+  if (ForceEmitWaitcnt[VM_CNT])
+    Wait.VmCnt = 0;
+  if (ForceEmitWaitcnt[EXP_CNT])
+    Wait.ExpCnt = 0;
+  if (ForceEmitWaitcnt[LGKM_CNT])
+    Wait.LgkmCnt = 0;
 
-        switch (I->getOpcode()) {
-        case AMDGPU::S_WAITCNT:
-          if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
-            insertSWaitInst = false;
-          else if (!OldWaitcnt) {
-            OldWaitcnt = &*I;
-            Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
-          }
-          break;
-        // TODO: skip over instructions which never require wait.
-        }
-        break;
-      }
-      if (insertSWaitInst) {
-        if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
-          if (ForceEmitZeroWaitcnts)
-            LLVM_DEBUG(
-                dbgs()
-                << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
-          if (IsForceEmitWaitcnt)
-            LLVM_DEBUG(dbgs()
-                       << "Force emit a s_waitcnt due to debug counter\n");
-
-          OldWaitcnt->getOperand(0).setImm(Enc);
-          if (!OldWaitcnt->getParent())
-            MI.getParent()->insert(MI, OldWaitcnt);
-
-          LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
-                            << "Old Instr: " << MI << '\n'
-                            << "New Instr: " << *OldWaitcnt << '\n');
-        } else {
-            auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
-                               MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
-                             .addImm(Enc);
-            TrackedWaitcntSet.insert(SWaitInst);
-
-            LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
-                              << "Old Instr: " << MI << '\n'
-                              << "New Instr: " << *SWaitInst << '\n');
-        }
-      }
+  ScoreBrackets.applyWaitcnt(Wait);
 
-      if (CntVal[EXP_CNT] == 0) {
-        ScoreBrackets->setMixedExpTypes(false);
-      }
-    }
+  AMDGPU::Waitcnt OldWait;
+  if (OldWaitcntInstr) {
+    OldWait =
+        AMDGPU::decodeWaitcnt(IV, OldWaitcntInstr->getOperand(0).getImm());
   }
-}
+  if (OldWait.dominates(Wait))
+    return false;
 
-void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB,
-                                             MachineInstr *Waitcnt) {
-  if (MBB.empty()) {
-    MBB.push_back(Waitcnt);
-    return;
-  }
+  if (OldWaitcntInstr && !TrackedWaitcntSet.count(OldWaitcntInstr))
+    Wait = Wait.combined(OldWait);
 
-  MachineBasicBlock::iterator It = MBB.end();
-  MachineInstr *MI = &*(--It);
-  if (MI->isBranch()) {
-    MBB.insert(It, Waitcnt);
+  unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
+  if (OldWaitcntInstr) {
+    OldWaitcntInstr->getOperand(0).setImm(Enc);
+
+    LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+                      << "Old Instr: " << MI << '\n'
+                      << "New Instr: " << *OldWaitcntInstr << '\n');
   } else {
-    MBB.push_back(Waitcnt);
+    auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
+                             MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+                         .addImm(Enc);
+    TrackedWaitcntSet.insert(SWaitInst);
+
+    LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+                      << "Old Instr: " << MI << '\n'
+                      << "New Instr: " << *SWaitInst << '\n');
   }
+
+  return true;
 }
 
 // This is a flat memory operation. Check to see if it has memory
@@ -1305,15 +1079,15 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
 
   for (const MachineMemOperand *Memop : MI.memoperands()) {
     unsigned AS = Memop->getAddrSpace();
-    if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS)
+    if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
       return true;
   }
 
   return false;
 }
 
-void SIInsertWaitcnts::updateEventWaitcntAfter(
-    MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets) {
+void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
+                                               WaitcntBrackets *ScoreBrackets) {
   // Now look at the instruction opcode. If it is a memory access
   // instruction, update the upper-bound of the appropriate counter's
   // bracket and the destination operand scores.
@@ -1379,342 +1153,124 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(
   }
 }
 
-// Merge the score brackets of the Block's predecessors;
-// this merged score bracket is used when adding waitcnts to the Block
-void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
-  BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
-  int32_t MaxPending[NUM_INST_CNTS] = {0};
-  int32_t MaxFlat[NUM_INST_CNTS] = {0};
-  bool MixedExpTypes = false;
-
-  // For single basic block loops, we need to retain the Block's
-  // score bracket to have accurate Pred info. So, make a copy of Block's
-  // score bracket, clear() it (which retains several important bits of info),
-  // populate, and then replace en masse. For non-single basic block loops,
-  // just clear Block's current score bracket and repopulate in-place.
-  bool IsSelfPred;
-  std::unique_ptr<BlockWaitcntBrackets> S;
-
-  IsSelfPred = (std::find(Block.pred_begin(), Block.pred_end(), &Block))
-    != Block.pred_end();
-  if (IsSelfPred) {
-    S = llvm::make_unique<BlockWaitcntBrackets>(*ScoreBrackets);
-    ScoreBrackets = S.get();
-  }
-
-  ScoreBrackets->clear();
-
-  // See if there are any uninitialized predecessors. If so, emit an
-  // s_waitcnt 0 at the beginning of the block.
-  for (MachineBasicBlock *Pred : Block.predecessors()) {
-    BlockWaitcntBrackets *PredScoreBrackets =
-        BlockWaitcntBracketsMap[Pred].get();
-    bool Visited = BlockVisitedSet.count(Pred);
-    if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
-      continue;
-    }
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
-      int span =
-          PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
-      MaxPending[T] = std::max(MaxPending[T], span);
-      span =
-          PredScoreBrackets->pendingFlat(T) - PredScoreBrackets->getScoreLB(T);
-      MaxFlat[T] = std::max(MaxFlat[T], span);
-    }
-
-    MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
-  }
-
-  // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
-  // Also handle kills for exit block.
-  if (Block.succ_empty() && !KillWaitBrackets.empty()) {
-    for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
-      for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-           T = (enum InstCounterType)(T + 1)) {
-        int Span = KillWaitBrackets[I]->getScoreUB(T) -
-                   KillWaitBrackets[I]->getScoreLB(T);
-        MaxPending[T] = std::max(MaxPending[T], Span);
-        Span = KillWaitBrackets[I]->pendingFlat(T) -
-               KillWaitBrackets[I]->getScoreLB(T);
-        MaxFlat[T] = std::max(MaxFlat[T], Span);
-      }
-
-      MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
-    }
-  }
-
-  // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
-  for (MachineBasicBlock *Pred : Block.predecessors()) {
-    BlockWaitcntBrackets *PredScoreBrackets =
-        BlockWaitcntBracketsMap[Pred].get();
-    bool Visited = BlockVisitedSet.count(Pred);
-    if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
-      continue;
-    }
-
-    int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) -
-                  PredScoreBrackets->getScoreLB(EXP_CNT);
-    MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
-    int EXPSpan = PredScoreBrackets->getEventUB(EXP_GPR_LOCK) -
-                  PredScoreBrackets->getScoreLB(EXP_CNT);
-    MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
-  }
-
-  // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
-  if (Block.succ_empty() && !KillWaitBrackets.empty()) {
-    for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
-      int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
-                    KillWaitBrackets[I]->getScoreLB(EXP_CNT);
-      MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
-      int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
-                    KillWaitBrackets[I]->getScoreLB(EXP_CNT);
-      MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
-    }
-  }
-
-#if 0
-  // LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
-  // TODO: how does LC distinguish between function entry and main entry?
-  // If this is the entry to a function, force a wait.
-  MachineBasicBlock &Entry = Block.getParent()->front();
-  if (Entry.getNumber() == Block.getNumber()) {
-    ScoreBrackets->setWaitAtBeginning();
-    return;
-  }
-#endif
-
-  // Now set the current Block's brackets to the largest ending bracket.
-  for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-       T = (enum InstCounterType)(T + 1)) {
-    ScoreBrackets->setScoreUB(T, MaxPending[T]);
-    ScoreBrackets->setScoreLB(T, 0);
-    ScoreBrackets->setLastFlat(T, MaxFlat[T]);
-  }
-
-  ScoreBrackets->setMixedExpTypes(MixedExpTypes);
-
-  // Set the register scoreboard.
-  for (MachineBasicBlock *Pred : Block.predecessors()) {
-    if (!BlockVisitedSet.count(Pred)) {
-      continue;
-    }
-
-    BlockWaitcntBrackets *PredScoreBrackets =
-        BlockWaitcntBracketsMap[Pred].get();
-
-    // Now merge the gpr_reg_score information
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
-      int PredLB = PredScoreBrackets->getScoreLB(T);
-      int PredUB = PredScoreBrackets->getScoreUB(T);
-      if (PredLB < PredUB) {
-        int PredScale = MaxPending[T] - PredUB;
-        // Merge vgpr scores.
-        for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
-          int PredRegScore = PredScoreBrackets->getRegScore(J, T);
-          if (PredRegScore <= PredLB)
-            continue;
-          int NewRegScore = PredScale + PredRegScore;
-          ScoreBrackets->setRegScore(
-              J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
-        }
-        // Also need to merge sgpr scores for lgkm_cnt.
-        if (T == LGKM_CNT) {
-          for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
-            int PredRegScore =
-                PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
-            if (PredRegScore <= PredLB)
-              continue;
-            int NewRegScore = PredScale + PredRegScore;
-            ScoreBrackets->setRegScore(
-                J + NUM_ALL_VGPRS, LGKM_CNT,
-                std::max(
-                    ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
-                    NewRegScore));
-          }
-        }
-      }
-    }
-
-    // Also merge the WaitEvent information.
-    ForAllWaitEventType(W) {
-      enum InstCounterType T = PredScoreBrackets->eventCounter(W);
-      int PredEventUB = PredScoreBrackets->getEventUB(W);
-      if (PredEventUB > PredScoreBrackets->getScoreLB(T)) {
-        int NewEventUB =
-            MaxPending[T] + PredEventUB - PredScoreBrackets->getScoreUB(T);
-        if (NewEventUB > 0) {
-          ScoreBrackets->setEventUB(
-              W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
-        }
-      }
-    }
-  }
-
-  // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
-  // Set the register scoreboard.
-  if (Block.succ_empty() && !KillWaitBrackets.empty()) {
-    for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
-      // Now merge the gpr_reg_score information.
-      for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-           T = (enum InstCounterType)(T + 1)) {
-        int PredLB = KillWaitBrackets[I]->getScoreLB(T);
-        int PredUB = KillWaitBrackets[I]->getScoreUB(T);
-        if (PredLB < PredUB) {
-          int PredScale = MaxPending[T] - PredUB;
-          // Merge vgpr scores.
-          for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
-            int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
-            if (PredRegScore <= PredLB)
-              continue;
-            int NewRegScore = PredScale + PredRegScore;
-            ScoreBrackets->setRegScore(
-                J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
-          }
-          // Also need to merge sgpr scores for lgkm_cnt.
-          if (T == LGKM_CNT) {
-            for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
-              int PredRegScore =
-                  KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
-              if (PredRegScore <= PredLB)
-                continue;
-              int NewRegScore = PredScale + PredRegScore;
-              ScoreBrackets->setRegScore(
-                  J + NUM_ALL_VGPRS, LGKM_CNT,
-                  std::max(
-                      ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
-                      NewRegScore));
-            }
-          }
-        }
-      }
-
-      // Also merge the WaitEvent information.
-      ForAllWaitEventType(W) {
-        enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
-        int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
-        if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
-          int NewEventUB =
-              MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
-          if (NewEventUB > 0) {
-            ScoreBrackets->setEventUB(
-                W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
-          }
-        }
-      }
-    }
-  }
-
-  // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
-  // sequencing predecessors, because changes to EXEC require waitcnts due to
-  // the delayed nature of these operations.
-  for (MachineBasicBlock *Pred : Block.predecessors()) {
-    if (!BlockVisitedSet.count(Pred)) {
-      continue;
-    }
+bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score,
+                                 uint32_t OtherScore) {
+  uint32_t MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift;
+  uint32_t OtherShifted =
+      OtherScore <= M.OtherLB ? 0 : OtherScore + M.OtherShift;
+  Score = std::max(MyShifted, OtherShifted);
+  return OtherShifted > MyShifted;
+}
 
-    BlockWaitcntBrackets *PredScoreBrackets =
-        BlockWaitcntBracketsMap[Pred].get();
-
-    int pred_gds_ub = PredScoreBrackets->getEventUB(GDS_GPR_LOCK);
-    if (pred_gds_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
-      int new_gds_ub = MaxPending[EXP_CNT] + pred_gds_ub -
-                       PredScoreBrackets->getScoreUB(EXP_CNT);
-      if (new_gds_ub > 0) {
-        ScoreBrackets->setEventUB(
-            GDS_GPR_LOCK,
-            std::max(ScoreBrackets->getEventUB(GDS_GPR_LOCK), new_gds_ub));
-      }
-    }
-    int pred_exp_ub = PredScoreBrackets->getEventUB(EXP_GPR_LOCK);
-    if (pred_exp_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
-      int new_exp_ub = MaxPending[EXP_CNT] + pred_exp_ub -
-                       PredScoreBrackets->getScoreUB(EXP_CNT);
-      if (new_exp_ub > 0) {
-        ScoreBrackets->setEventUB(
-            EXP_GPR_LOCK,
-            std::max(ScoreBrackets->getEventUB(EXP_GPR_LOCK), new_exp_ub));
+/// Merge the pending events and associater score brackets of \p Other into
+/// this brackets status.
+///
+/// Returns whether the merge resulted in a change that requires tighter waits
+/// (i.e. the merged brackets strictly dominate the original brackets).
+bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
+  bool StrictDom = false;
+
+  for (auto T : inst_counter_types()) {
+    // Merge event flags for this counter
+    const bool OldOutOfOrder = counterOutOfOrder(T);
+    const uint32_t OldEvents = PendingEvents & WaitEventMaskForInst[T];
+    const uint32_t OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T];
+    if (OtherEvents & ~OldEvents)
+      StrictDom = true;
+    if (Other.MixedPendingEvents[T] ||
+        (OldEvents && OtherEvents && OldEvents != OtherEvents))
+      MixedPendingEvents[T] = true;
+    PendingEvents |= OtherEvents;
+
+    // Merge scores for this counter
+    const uint32_t MyPending = ScoreUBs[T] - ScoreLBs[T];
+    const uint32_t OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T];
+    MergeInfo M;
+    M.OldLB = ScoreLBs[T];
+    M.OtherLB = Other.ScoreLBs[T];
+    M.MyShift = OtherPending > MyPending ? OtherPending - MyPending : 0;
+    M.OtherShift = ScoreUBs[T] - Other.ScoreUBs[T] + M.MyShift;
+
+    const uint32_t NewUB = ScoreUBs[T] + M.MyShift;
+    if (NewUB < ScoreUBs[T])
+      report_fatal_error("waitcnt score overflow");
+    ScoreUBs[T] = NewUB;
+    ScoreLBs[T] = std::min(M.OldLB + M.MyShift, M.OtherLB + M.OtherShift);
+
+    StrictDom |= mergeScore(M, LastFlat[T], Other.LastFlat[T]);
+
+    bool RegStrictDom = false;
+    for (int J = 0, E = std::max(getMaxVGPR(), Other.getMaxVGPR()) + 1; J != E;
+         J++) {
+      RegStrictDom |= mergeScore(M, VgprScores[T][J], Other.VgprScores[T][J]);
+    }
+
+    if (T == LGKM_CNT) {
+      for (int J = 0, E = std::max(getMaxSGPR(), Other.getMaxSGPR()) + 1;
+           J != E; J++) {
+        RegStrictDom |= mergeScore(M, SgprScores[J], Other.SgprScores[J]);
       }
     }
-  }
 
-  // if a single block loop, update the score brackets. Not needed for other
-  // blocks, as we did this in-place
-  if (IsSelfPred) {
-    BlockWaitcntBracketsMap[&Block] = llvm::make_unique<BlockWaitcntBrackets>(*ScoreBrackets);
+    if (RegStrictDom && !OldOutOfOrder)
+      StrictDom = true;
   }
-}
 
-/// Return true if the given basic block is a "bottom" block of a loop.
-/// This works even if the loop is discontiguous. This also handles
-/// multiple back-edges for the same "header" block of a loop.
-bool SIInsertWaitcnts::isLoopBottom(const MachineLoop *Loop,
-                                    const MachineBasicBlock *Block) {
-  for (MachineBasicBlock *MBB : Loop->blocks()) {
-    if (MBB == Block && MBB->isSuccessor(Loop->getHeader())) {
-      return true;
-    }
-  }
-  return false;
-}
+  VgprUB = std::max(getMaxVGPR(), Other.getMaxVGPR());
+  SgprUB = std::max(getMaxSGPR(), Other.getMaxSGPR());
 
-/// Count the number of "bottom" basic blocks of a loop.
-unsigned SIInsertWaitcnts::countNumBottomBlocks(const MachineLoop *Loop) {
-  unsigned Count = 0;
-  for (MachineBasicBlock *MBB : Loop->blocks()) {
-    if (MBB->isSuccessor(Loop->getHeader())) {
-      Count++;
-    }
-  }
-  return Count;
+  return StrictDom;
 }
 
 // Generate s_waitcnt instructions where needed.
-void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
-                                            MachineBasicBlock &Block) {
-  // Initialize the state information.
-  mergeInputScoreBrackets(Block);
-
-  BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
+                                            MachineBasicBlock &Block,
+                                            WaitcntBrackets &ScoreBrackets) {
+  bool Modified = false;
 
   LLVM_DEBUG({
     dbgs() << "*** Block" << Block.getNumber() << " ***";
-    ScoreBrackets->dump();
+    ScoreBrackets.dump();
   });
 
   // Walk over the instructions.
+  MachineInstr *OldWaitcntInstr = nullptr;
+
   for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
        Iter != E;) {
     MachineInstr &Inst = *Iter;
+
     // Remove any previously existing waitcnts.
     if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
-      // Leave pre-existing waitcnts, but note their existence via setWaitcnt.
-      // Remove the waitcnt-pass-generated waitcnts; the pass will add them back
-      // as needed.
-      if (!TrackedWaitcntSet.count(&Inst))
-        ++Iter;
-      else {
-        ++Iter;
-        Inst.removeFromParent();
+      if (OldWaitcntInstr) {
+        if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
+          TrackedWaitcntSet.erase(OldWaitcntInstr);
+          OldWaitcntInstr->eraseFromParent();
+          OldWaitcntInstr = nullptr;
+        } else if (!TrackedWaitcntSet.count(&Inst)) {
+          // Two successive s_waitcnt's, both of which are pre-existing and
+          // are therefore preserved.
+          int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
+          ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+        } else {
+          ++Iter;
+          Inst.eraseFromParent();
+          Modified = true;
+          continue;
+        }
       }
-      ScoreBrackets->setWaitcnt(&Inst);
-      continue;
-    }
 
-    // Kill instructions generate a conditional branch to the endmain block.
-    // Merge the current waitcnt state into the endmain block information.
-    // TODO: Are there other flavors of KILL instruction?
-    if (Inst.getOpcode() == AMDGPU::KILL) {
-      addKillWaitBracket(ScoreBrackets);
+      OldWaitcntInstr = &Inst;
+      ++Iter;
+      continue;
     }
 
     bool VCCZBugWorkAround = false;
     if (readsVCCZ(Inst) &&
         (!VCCZBugHandledSet.count(&Inst))) {
-      if (ScoreBrackets->getScoreLB(LGKM_CNT) <
-              ScoreBrackets->getScoreUB(LGKM_CNT) &&
-          ScoreBrackets->hasPendingSMEM()) {
+      if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+              ScoreBrackets.getScoreUB(LGKM_CNT) &&
+          ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
         if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
           VCCZBugWorkAround = true;
       }
@@ -1722,9 +1278,10 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
 
     // Generate an s_waitcnt instruction to be placed before
     // cur_Inst, if needed.
-    generateWaitcntInstBefore(Inst, ScoreBrackets);
+    Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
+    OldWaitcntInstr = nullptr;
 
-    updateEventWaitcntAfter(Inst, ScoreBrackets);
+    updateEventWaitcntAfter(Inst, &ScoreBrackets);
 
 #if 0 // TODO: implement resource type check controlled by options with ub = LB.
     // If this instruction generates a S_SETVSKIP because it is an
@@ -1737,11 +1294,9 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
     }
 #endif
 
-    ScoreBrackets->clearWaitcnt();
-
     LLVM_DEBUG({
       Inst.print(dbgs());
-      ScoreBrackets->dump();
+      ScoreBrackets.dump();
     });
 
     // Check to see if this is a GWS instruction. If so, and if this is CI or
@@ -1753,10 +1308,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
         Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
         Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
       // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
-      ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
-      ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
-      ScoreBrackets->updateByWait(LGKM_CNT,
-                                  ScoreBrackets->getScoreUB(LGKM_CNT));
+      ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero());
     }
 
     // TODO: Remove this work-around after fixing the scheduler and enable the
@@ -1769,71 +1321,13 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
               AMDGPU::VCC)
           .addReg(AMDGPU::VCC);
       VCCZBugHandledSet.insert(&Inst);
+      Modified = true;
     }
 
     ++Iter;
   }
 
-  // Check if we need to force convergence at loop footer.
-  MachineLoop *ContainingLoop = MLI->getLoopFor(&Block);
-  if (ContainingLoop && isLoopBottom(ContainingLoop, &Block)) {
-    LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
-    WaitcntData->print();
-    LLVM_DEBUG(dbgs() << '\n';);
-
-    // The iterative waitcnt insertion algorithm aims for optimal waitcnt
-    // placement, but doesn't guarantee convergence for a loop. Each
-    // loop should take at most (n+1) iterations for it to converge naturally,
-    // where n is the number of bottom blocks. If this threshold is reached and
-    // the result hasn't converged, then we force convergence by inserting
-    // a s_waitcnt at the end of loop footer.
-    if (WaitcntData->getIterCnt() > (countNumBottomBlocks(ContainingLoop) + 1)) {
-      // To ensure convergence, need to make wait events at loop footer be no
-      // more than those from the previous iteration.
-      // As a simplification, instead of tracking individual scores and
-      // generating the precise wait count, just wait on 0.
-      bool HasPending = false;
-      MachineInstr *SWaitInst = WaitcntData->getWaitcnt();
-      for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-           T = (enum InstCounterType)(T + 1)) {
-        if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
-          ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
-          HasPending = true;
-          break;
-        }
-      }
-
-      if (HasPending) {
-        if (!SWaitInst) {
-          SWaitInst = BuildMI(Block, Block.getFirstNonPHI(),
-                              DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
-                              .addImm(0);
-          TrackedWaitcntSet.insert(SWaitInst);
-#if 0 // TODO: Format the debug output
-          OutputTransformBanner("insertWaitcntInBlock",0,"Create:",context);
-          OutputTransformAdd(SWaitInst, context);
-#endif
-        }
-#if 0 // TODO: ??
-        _DEV( REPORTED_STATS->force_waitcnt_converge = 1; )
-#endif
-      }
-
-      if (SWaitInst) {
-        LLVM_DEBUG({
-          SWaitInst->print(dbgs());
-          dbgs() << "\nAdjusted score board:";
-          ScoreBrackets->dump();
-        });
-
-        // Add this waitcnt to the block. It is either newly created or
-        // created in previous iterations and added back since block traversal
-        // always removes waitcnts.
-        insertWaitcntBeforeCF(Block, SWaitInst);
-        WaitcntData->setWaitcnt(SWaitInst);
-      }
-    }
-  }
+  return Modified;
 }
 
 bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
@@ -1841,14 +1335,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
   TII = ST->getInstrInfo();
   TRI = &TII->getRegisterInfo();
   MRI = &MF.getRegInfo();
-  MLI = &getAnalysis<MachineLoopInfo>();
-  IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+  IV = AMDGPU::getIsaVersion(ST->getCPU());
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  AMDGPUASI = ST->getAMDGPUAS();
 
   ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
-  for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-       T = (enum InstCounterType)(T + 1))
+  for (auto T : inst_counter_types())
     ForceEmitWaitcnt[T] = false;
 
   HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
@@ -1868,93 +1359,70 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
       RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
 
   TrackedWaitcntSet.clear();
-  BlockVisitedSet.clear();
   VCCZBugHandledSet.clear();
-  LoopWaitcntDataMap.clear();
-  BlockWaitcntProcessedSet.clear();
+  RpotIdxMap.clear();
+  BlockInfos.clear();
+
+  // Keep iterating over the blocks in reverse post order, inserting and
+  // updating s_waitcnt where needed, until a fix point is reached.
+  for (MachineBasicBlock *MBB :
+       ReversePostOrderTraversal<MachineFunction *>(&MF)) {
+    RpotIdxMap[MBB] = BlockInfos.size();
+    BlockInfos.emplace_back(MBB);
+  }
 
-  // Walk over the blocks in reverse post-dominator order, inserting
-  // s_waitcnt where needed.
-  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+  std::unique_ptr<WaitcntBrackets> Brackets;
   bool Modified = false;
-  for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
-           I = RPOT.begin(),
-           E = RPOT.end(), J = RPOT.begin();
-       I != E;) {
-    MachineBasicBlock &MBB = **I;
-
-    BlockVisitedSet.insert(&MBB);
-
-    BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
-    if (!ScoreBrackets) {
-      BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
-      ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
-    }
-    ScoreBrackets->setPostOrder(MBB.getNumber());
-    MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB);
-    if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr)
-      LoopWaitcntDataMap[ContainingLoop] = llvm::make_unique<LoopWaitcntData>();
-
-    // If we are walking into the block from before the loop, then guarantee
-    // at least 1 re-walk over the loop to propagate the information, even if
-    // no S_WAITCNT instructions were generated.
-    if (ContainingLoop && ContainingLoop->getHeader() == &MBB) {
-      unsigned Count = countNumBottomBlocks(ContainingLoop);
-
-      // If the loop has multiple back-edges, and so more than one "bottom"
-      // basic block, we have to guarantee a re-walk over every blocks.
-      if ((std::count(BlockWaitcntProcessedSet.begin(),
-                      BlockWaitcntProcessedSet.end(), &MBB) < (int)Count)) {
-        BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
-        LLVM_DEBUG(dbgs() << "set-revisit1: Block"
-                          << ContainingLoop->getHeader()->getNumber() << '\n';);
+  bool Repeat;
+  do {
+    Repeat = false;
+
+    for (BlockInfo &BI : BlockInfos) {
+      if (!BI.Dirty)
+        continue;
+
+      unsigned Idx = std::distance(&*BlockInfos.begin(), &BI);
+
+      if (BI.Incoming) {
+        if (!Brackets)
+          Brackets = llvm::make_unique<WaitcntBrackets>(*BI.Incoming);
+        else
+          *Brackets = *BI.Incoming;
+      } else {
+        if (!Brackets)
+          Brackets = llvm::make_unique<WaitcntBrackets>(ST);
+        else
+          Brackets->clear();
       }
-    }
 
-    // Walk over the instructions.
-    insertWaitcntInBlock(MF, MBB);
-
-    // Record that waitcnts have been processed at least once for this block.
-    BlockWaitcntProcessedSet.push_back(&MBB);
-
-    // See if we want to revisit the loop. If a loop has multiple back-edges,
-    // we shouldn't revisit the same "bottom" basic block.
-    if (ContainingLoop && isLoopBottom(ContainingLoop, &MBB) &&
-        std::count(BlockWaitcntProcessedSet.begin(),
-                   BlockWaitcntProcessedSet.end(), &MBB) == 1) {
-      MachineBasicBlock *EntryBB = ContainingLoop->getHeader();
-      BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
-      if (EntrySB && EntrySB->getRevisitLoop()) {
-        EntrySB->setRevisitLoop(false);
-        J = I;
-        int32_t PostOrder = EntrySB->getPostOrder();
-        // TODO: Avoid this loop. Find another way to set I.
-        for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
-                 X = RPOT.begin(),
-                 Y = RPOT.end();
-             X != Y; ++X) {
-          MachineBasicBlock &MBBX = **X;
-          if (MBBX.getNumber() == PostOrder) {
-            I = X;
-            break;
+      Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
+      BI.Dirty = false;
+
+      if (Brackets->hasPending()) {
+        BlockInfo *MoveBracketsToSucc = nullptr;
+        for (MachineBasicBlock *Succ : BI.MBB->successors()) {
+          unsigned SuccIdx = RpotIdxMap[Succ];
+          BlockInfo &SuccBI = BlockInfos[SuccIdx];
+          if (!SuccBI.Incoming) {
+            SuccBI.Dirty = true;
+            if (SuccIdx <= Idx)
+              Repeat = true;
+            if (!MoveBracketsToSucc) {
+              MoveBracketsToSucc = &SuccBI;
+            } else {
+              SuccBI.Incoming = llvm::make_unique<WaitcntBrackets>(*Brackets);
+            }
+          } else if (SuccBI.Incoming->merge(*Brackets)) {
+            SuccBI.Dirty = true;
+            if (SuccIdx <= Idx)
+              Repeat = true;
           }
         }
-        LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
-        WaitcntData->incIterCnt();
-        LLVM_DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';);
-        continue;
-      } else {
-        LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
-        // Loop converged, reset iteration count. If this loop gets revisited,
-        // it must be from an outer loop, the counter will restart, this will
-        // ensure we don't force convergence on such revisits.
-        WaitcntData->resetIterCnt();
+        if (MoveBracketsToSucc)
+          MoveBracketsToSucc->Incoming = std::move(Brackets);
       }
     }
-
-    J = I;
-    ++I;
-  }
+  } while (Repeat);
 
   SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index b73d30940fc3..65ffc27b8b60 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -121,6 +121,10 @@ class InstSI <dag outs, dag ins, string asm = "",
   // This bit indicates that this is a D16 buffer instruction.
   field bit D16Buf = 0;
 
+  // This bit indicates that this uses the floating point double precision
+  // rounding mode flags
+  field bit FPDPRounding = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = SALU;
   let TSFlags{1} = VALU;
@@ -178,6 +182,8 @@ class InstSI <dag outs, dag ins, string asm = "",
 
   let TSFlags{50} = D16Buf;
 
+  let TSFlags{51} = FPDPRounding;
+
   let SchedRW = [Write32Bit];
 
   field bits<1> DisableSIDecoder = 0;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f3745382a6f4..2370d5fa7b27 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -264,9 +265,10 @@ static bool isStride64(unsigned Opc) {
   }
 }
 
-bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
-                                        int64_t &Offset,
-                                        const TargetRegisterInfo *TRI) const {
+bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+                                          MachineOperand *&BaseOp,
+                                          int64_t &Offset,
+                                          const TargetRegisterInfo *TRI) const {
   unsigned Opc = LdSt.getOpcode();
 
   if (isDS(LdSt)) {
@@ -274,11 +276,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
         getNamedOperand(LdSt, AMDGPU::OpName::offset);
     if (OffsetImm) {
       // Normal, single offset LDS instruction.
-      const MachineOperand *AddrReg =
-          getNamedOperand(LdSt, AMDGPU::OpName::addr);
-
-      BaseReg = AddrReg->getReg();
+      BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
       Offset = OffsetImm->getImm();
+      assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                                "operands of type register.");
       return true;
     }
 
@@ -309,10 +310,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
       if (isStride64(Opc))
         EltSize *= 64;
 
-      const MachineOperand *AddrReg =
-          getNamedOperand(LdSt, AMDGPU::OpName::addr);
-      BaseReg = AddrReg->getReg();
+      BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
       Offset = EltSize * Offset0;
+      assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                                "operands of type register.");
       return true;
     }
 
@@ -324,19 +325,20 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
     if (SOffset && SOffset->isReg())
       return false;
 
-    const MachineOperand *AddrReg =
-        getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+    MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (!AddrReg)
       return false;
 
     const MachineOperand *OffsetImm =
         getNamedOperand(LdSt, AMDGPU::OpName::offset);
-    BaseReg = AddrReg->getReg();
+    BaseOp = AddrReg;
     Offset = OffsetImm->getImm();
 
     if (SOffset) // soffset can be an inline immediate.
       Offset += SOffset->getImm();
 
+    assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                              "operands of type register.");
     return true;
   }
 
@@ -346,36 +348,46 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
     if (!OffsetImm)
       return false;
 
-    const MachineOperand *SBaseReg =
-        getNamedOperand(LdSt, AMDGPU::OpName::sbase);
-    BaseReg = SBaseReg->getReg();
+    MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+    BaseOp = SBaseReg;
     Offset = OffsetImm->getImm();
+    assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                              "operands of type register.");
     return true;
   }
 
   if (isFLAT(LdSt)) {
-    const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+    MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (VAddr) {
       // Can't analyze 2 offsets.
       if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
         return false;
 
-      BaseReg = VAddr->getReg();
+      BaseOp = VAddr;
     } else {
       // scratch instructions have either vaddr or saddr.
-      BaseReg = getNamedOperand(LdSt, AMDGPU::OpName::saddr)->getReg();
+      BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
     }
 
     Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
+    assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                              "operands of type register.");
     return true;
   }
 
   return false;
 }
 
-static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
-                                  const MachineInstr &MI2, unsigned BaseReg2) {
-  if (BaseReg1 == BaseReg2)
+static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
+                                  const MachineOperand &BaseOp1,
+                                  const MachineInstr &MI2,
+                                  const MachineOperand &BaseOp2) {
+  // Support only base operands with base registers.
+  // Note: this could be extended to support FI operands.
+  if (!BaseOp1.isReg() || !BaseOp2.isReg())
+    return false;
+
+  if (BaseOp1.isIdenticalTo(BaseOp2))
     return true;
 
   if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
@@ -401,12 +413,13 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1,
   return Base1 == Base2;
 }
 
-bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
-                                      unsigned BaseReg1,
-                                      MachineInstr &SecondLdSt,
-                                      unsigned BaseReg2,
+bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
+                                      MachineOperand &BaseOp2,
                                       unsigned NumLoads) const {
-  if (!memOpsHaveSameBasePtr(FirstLdSt, BaseReg1, SecondLdSt, BaseReg2))
+  MachineInstr &FirstLdSt = *BaseOp1.getParent();
+  MachineInstr &SecondLdSt = *BaseOp2.getParent();
+
+  if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
     return false;
 
   const MachineOperand *FirstDst = nullptr;
@@ -863,7 +876,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
-  DebugLoc DL = MBB.findDebugLoc(MI);
+  const DebugLoc &DL = MBB.findDebugLoc(MI);
 
   unsigned Size = FrameInfo.getObjectSize(FrameIndex);
   unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
@@ -907,16 +920,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     return;
   }
 
-  if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
-    LLVMContext &Ctx = MF->getFunction().getContext();
-    Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
-                  " spill register");
-    BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
-      .addReg(SrcReg);
-
-    return;
-  }
-
   assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
 
   unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
@@ -972,9 +975,9 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
-  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
-  DebugLoc DL = MBB.findDebugLoc(MI);
+  const DebugLoc &DL = MBB.findDebugLoc(MI);
   unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
   unsigned Size = FrameInfo.getObjectSize(FrameIndex);
   unsigned SpillSize = TRI->getSpillSize(*RC);
@@ -986,6 +989,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     PtrInfo, MachineMemOperand::MOLoad, Size, Align);
 
   if (RI.isSGPRClass(RC)) {
+    MFI->setHasSpilledSGPRs();
+
     // FIXME: Maybe this should not include a memoperand because it will be
     // lowered to non-memory instructions.
     const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
@@ -1009,15 +1014,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     return;
   }
 
-  if (!ST.isVGPRSpillingEnabled(MF->getFunction())) {
-    LLVMContext &Ctx = MF->getFunction().getContext();
-    Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
-                  " restore register");
-    BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
-
-    return;
-  }
-
   assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
 
   unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
@@ -1036,7 +1032,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
-  DebugLoc DL = MBB.findDebugLoc(MI);
+  const DebugLoc &DL = MBB.findDebugLoc(MI);
   unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
   unsigned WavefrontSize = ST.getWavefrontSize();
 
@@ -1044,7 +1040,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
   if (!MFI->hasCalculatedTID()) {
     MachineBasicBlock &Entry = MBB.getParent()->front();
     MachineBasicBlock::iterator Insert = Entry.front();
-    DebugLoc DL = Insert->getDebugLoc();
+    const DebugLoc &DL = Insert->getDebugLoc();
 
     TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
                                    *MF);
@@ -1421,10 +1417,15 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
 // TargetInstrInfo::commuteInstruction uses it.
 bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
                                         unsigned &SrcOpIdx1) const {
-  if (!MI.isCommutable())
+  return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1);
+}
+
+bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0,
+                                        unsigned &SrcOpIdx1) const {
+  if (!Desc.isCommutable())
     return false;
 
-  unsigned Opc = MI.getOpcode();
+  unsigned Opc = Desc.getOpcode();
   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   if (Src0Idx == -1)
     return false;
@@ -1549,8 +1550,9 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
   //   buzz;
 
   RS->enterBasicBlockEnd(MBB);
-  unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
-                                       MachineBasicBlock::iterator(GetPC), 0);
+  unsigned Scav = RS->scavengeRegisterBackwards(
+    AMDGPU::SReg_64RegClass,
+    MachineBasicBlock::iterator(GetPC), false, 0);
   MRI.replaceRegWith(PCReg, Scav);
   MRI.clearVirtRegs();
   RS->setRegUsed(Scav);
@@ -1644,7 +1646,34 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                                 SmallVectorImpl<MachineOperand> &Cond,
                                 bool AllowModify) const {
   MachineBasicBlock::iterator I = MBB.getFirstTerminator();
-  if (I == MBB.end())
+  auto E = MBB.end();
+  if (I == E)
+    return false;
+
+  // Skip over the instructions that are artificially terminators for special
+  // exec management.
+  while (I != E && !I->isBranch() && !I->isReturn() &&
+         I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
+    switch (I->getOpcode()) {
+    case AMDGPU::SI_MASK_BRANCH:
+    case AMDGPU::S_MOV_B64_term:
+    case AMDGPU::S_XOR_B64_term:
+    case AMDGPU::S_ANDN2_B64_term:
+      break;
+    case AMDGPU::SI_IF:
+    case AMDGPU::SI_ELSE:
+    case AMDGPU::SI_KILL_I1_TERMINATOR:
+    case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
+      // FIXME: It's messy that these need to be considered here at all.
+      return true;
+    default:
+      llvm_unreachable("unexpected non-branch terminator inst");
+    }
+
+    ++I;
+  }
+
+  if (I == E)
     return false;
 
   if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
@@ -1933,20 +1962,20 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
 }
 
 unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
-    PseudoSourceValue::PSVKind Kind) const {
+    unsigned Kind) const {
   switch(Kind) {
   case PseudoSourceValue::Stack:
   case PseudoSourceValue::FixedStack:
-    return ST.getAMDGPUAS().PRIVATE_ADDRESS;
+    return AMDGPUAS::PRIVATE_ADDRESS;
   case PseudoSourceValue::ConstantPool:
   case PseudoSourceValue::GOT:
   case PseudoSourceValue::JumpTable:
   case PseudoSourceValue::GlobalValueCallEntry:
   case PseudoSourceValue::ExternalSymbolCallEntry:
   case PseudoSourceValue::TargetCustom:
-    return ST.getAMDGPUAS().CONSTANT_ADDRESS;
+    return AMDGPUAS::CONSTANT_ADDRESS;
   }
-  return ST.getAMDGPUAS().FLAT_ADDRESS;
+  return AMDGPUAS::FLAT_ADDRESS;
 }
 
 static void removeModOperands(MachineInstr &MI) {
@@ -2066,12 +2095,40 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
     if (Src2->isReg() && Src2->getReg() == Reg) {
       // Not allowed to use constant bus for another operand.
       // We can however allow an inline immediate as src0.
-      if (!Src0->isImm() &&
-          (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
-        return false;
+      bool Src0Inlined = false;
+      if (Src0->isReg()) {
+        // Try to inline constant if possible.
+        // If the Def moves immediate and the use is single
+        // We are saving VGPR here.
+        MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
+        if (Def && Def->isMoveImmediate() &&
+          isInlineConstant(Def->getOperand(1)) &&
+          MRI->hasOneUse(Src0->getReg())) {
+          Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+          Src0Inlined = true;
+        } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
+            RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
+            (RI.isVirtualRegister(Src0->getReg()) &&
+            RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+          return false;
+          // VGPR is okay as Src0 - fallthrough
+      }
 
-      if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
-        return false;
+      if (Src1->isReg() && !Src0Inlined ) {
+        // We have one slot for inlinable constant so far - try to fill it
+        MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
+        if (Def && Def->isMoveImmediate() &&
+            isInlineConstant(Def->getOperand(1)) &&
+            MRI->hasOneUse(Src1->getReg()) &&
+            commuteInstruction(UseMI)) {
+            Src0->ChangeToImmediate(Def->getOperand(1).getImm());
+        } else if ((RI.isPhysicalRegister(Src1->getReg()) &&
+            RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
+            (RI.isVirtualRegister(Src1->getReg()) &&
+            RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
+          return false;
+          // VGPR is okay as Src1 - fallthrough
+      }
 
       const int64_t Imm = ImmOp->getImm();
 
@@ -2117,11 +2174,13 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
 
 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
                                                MachineInstr &MIb) const {
-  unsigned BaseReg0, BaseReg1;
+  MachineOperand *BaseOp0, *BaseOp1;
   int64_t Offset0, Offset1;
 
-  if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
-      getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
+  if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
+      getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
+    if (!BaseOp0->isIdenticalTo(*BaseOp1))
+      return false;
 
     if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
       // FIXME: Handle ds_read2 / ds_write2.
@@ -2129,8 +2188,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
     }
     unsigned Width0 = (*MIa.memoperands_begin())->getSize();
     unsigned Width1 = (*MIb.memoperands_begin())->getSize();
-    if (BaseReg0 == BaseReg1 &&
-        offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
+    if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
       return true;
     }
   }
@@ -2398,8 +2456,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
     int32_t Trunc = static_cast<int32_t>(Imm);
-    return Trunc == Imm &&
-           AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
+    return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
   }
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -2523,6 +2580,115 @@ bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
          hasModifiersSet(MI, AMDGPU::OpName::omod);
 }
 
+bool SIInstrInfo::canShrink(const MachineInstr &MI,
+                            const MachineRegisterInfo &MRI) const {
+  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+  // Can't shrink instruction with three operands.
+  // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+  // a special case for it.  It can only be shrunk if the third operand
+  // is vcc.  We should handle this the same way we handle vopc, by addding
+  // a register allocation hint pre-regalloc and then do the shrinking
+  // post-regalloc.
+  if (Src2) {
+    switch (MI.getOpcode()) {
+      default: return false;
+
+      case AMDGPU::V_ADDC_U32_e64:
+      case AMDGPU::V_SUBB_U32_e64:
+      case AMDGPU::V_SUBBREV_U32_e64: {
+        const MachineOperand *Src1
+          = getNamedOperand(MI, AMDGPU::OpName::src1);
+        if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()))
+          return false;
+        // Additional verification is needed for sdst/src2.
+        return true;
+      }
+      case AMDGPU::V_MAC_F32_e64:
+      case AMDGPU::V_MAC_F16_e64:
+      case AMDGPU::V_FMAC_F32_e64:
+        if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
+            hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
+          return false;
+        break;
+
+      case AMDGPU::V_CNDMASK_B32_e64:
+        break;
+    }
+  }
+
+  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+  if (Src1 && (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()) ||
+               hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
+    return false;
+
+  // We don't need to check src0, all input types are legal, so just make sure
+  // src0 isn't using any modifiers.
+  if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
+    return false;
+
+  // Can it be shrunk to a valid 32 bit opcode?
+  if (!hasVALU32BitEncoding(MI.getOpcode()))
+    return false;
+
+  // Check output modifiers
+  return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+         !hasModifiersSet(MI, AMDGPU::OpName::clamp);
+}
+
+// Set VCC operand with all flags from \p Orig, except for setting it as
+// implicit.
+static void copyFlagsToImplicitVCC(MachineInstr &MI,
+                                   const MachineOperand &Orig) {
+
+  for (MachineOperand &Use : MI.implicit_operands()) {
+    if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+      Use.setIsUndef(Orig.isUndef());
+      Use.setIsKill(Orig.isKill());
+      return;
+    }
+  }
+}
+
+MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
+                                           unsigned Op32) const {
+  MachineBasicBlock *MBB = MI.getParent();;
+  MachineInstrBuilder Inst32 =
+    BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+
+  // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
+  // For VOPC instructions, this is replaced by an implicit def of vcc.
+  int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
+  if (Op32DstIdx != -1) {
+    // dst
+    Inst32.add(MI.getOperand(0));
+  } else {
+    assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+           "Unexpected case");
+  }
+
+  Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
+
+  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+  if (Src1)
+    Inst32.add(*Src1);
+
+  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+
+  if (Src2) {
+    int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+    if (Op32Src2Idx != -1) {
+      Inst32.add(*Src2);
+    } else {
+      // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+      // replaced with an implicit read of vcc. This was already added
+      // during the initial BuildMI, so find it to preserve the flags.
+      copyFlagsToImplicitVCC(*Inst32, *Src2);
+    }
+  }
+
+  return Inst32;
+}
+
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
                                   const MachineOperand &MO,
                                   const MCOperandInfo &OpInfo) const {
@@ -2806,6 +2972,42 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  // Verify MIMG
+  if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
+    // Ensure that the return type used is large enough for all the options
+    // being used TFE/LWE require an extra result register.
+    const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
+    if (DMask) {
+      uint64_t DMaskImm = DMask->getImm();
+      uint32_t RegCount =
+          isGather4(MI.getOpcode()) ? 4 : countPopulation(DMaskImm);
+      const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe);
+      const MachineOperand *LWE = getNamedOperand(MI, AMDGPU::OpName::lwe);
+      const MachineOperand *D16 = getNamedOperand(MI, AMDGPU::OpName::d16);
+
+      // Adjust for packed 16 bit values
+      if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
+        RegCount >>= 1;
+
+      // Adjust if using LWE or TFE
+      if ((LWE && LWE->getImm()) || (TFE && TFE->getImm()))
+        RegCount += 1;
+
+      const uint32_t DstIdx =
+          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+      const MachineOperand &Dst = MI.getOperand(DstIdx);
+      if (Dst.isReg()) {
+        const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
+        uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
+        if (RegCount > DstSize) {
+          ErrInfo = "MIMG instruction returns too many registers for dst "
+                    "register class";
+          return false;
+        }
+      }
+    }
+  }
+
   // Verify VOP*. Ignore multiple sgpr operands on writelane.
   if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
       && (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) {
@@ -3001,6 +3203,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
   case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
   case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
   case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+  case AMDGPU::S_XNOR_B32:
+    return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
   case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
   case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
   case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
@@ -3438,8 +3642,13 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
   // pointer value is uniform.
   MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase);
   if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) {
-      unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
-      SBase->setReg(SGPR);
+    unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
+    SBase->setReg(SGPR);
+  }
+  MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
+  if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
+    unsigned SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
+    SOff->setReg(SGPR);
   }
 }
 
@@ -3475,7 +3684,191 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
     FoldImmediate(*Copy, *Def, OpReg, &MRI);
 }
 
-void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
+// Emit the actual waterfall loop, executing the wrapped instruction for each
+// unique value of \p Rsrc across all lanes. In the best case we execute 1
+// iteration, in the worst case we execute 64 (once per lane).
+static void
+emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
+                          MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
+                          const DebugLoc &DL, MachineOperand &Rsrc) {
+  MachineBasicBlock::iterator I = LoopBB.begin();
+
+  unsigned VRsrc = Rsrc.getReg();
+  unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
+
+  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+  // Beginning of the loop, read the next Rsrc variant.
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
+      .addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
+      .addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
+      .addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
+      .addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
+
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
+      .addReg(SRsrcSub0)
+      .addImm(AMDGPU::sub0)
+      .addReg(SRsrcSub1)
+      .addImm(AMDGPU::sub1)
+      .addReg(SRsrcSub2)
+      .addImm(AMDGPU::sub2)
+      .addReg(SRsrcSub3)
+      .addImm(AMDGPU::sub3);
+
+  // Update Rsrc operand to use the SGPR Rsrc.
+  Rsrc.setReg(SRsrc);
+  Rsrc.setIsKill(true);
+
+  // Identify all lanes with identical Rsrc operands in their VGPRs.
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
+      .addReg(SRsrc, 0, AMDGPU::sub0_sub1)
+      .addReg(VRsrc, 0, AMDGPU::sub0_sub1);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
+      .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
+      .addReg(VRsrc, 0, AMDGPU::sub2_sub3);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
+      .addReg(CondReg0)
+      .addReg(CondReg1);
+
+  MRI.setSimpleHint(SaveExec, AndCond);
+
+  // Update EXEC to matching lanes, saving original to SaveExec.
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
+      .addReg(AndCond, RegState::Kill);
+
+  // The original instruction is here; we insert the terminators after it.
+  I = LoopBB.end();
+
+  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
+      .addReg(AMDGPU::EXEC)
+      .addReg(SaveExec);
+  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
+}
+
+// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
+// with SGPRs by iterating over all unique values across all lanes.
+static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
+                              MachineOperand &Rsrc, MachineDominatorTree *MDT) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineBasicBlock::iterator I(&MI);
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+  // Save the EXEC mask
+  BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
+      .addReg(AMDGPU::EXEC);
+
+  // Killed uses in the instruction we are waterfalling around will be
+  // incorrect due to the added control-flow.
+  for (auto &MO : MI.uses()) {
+    if (MO.isReg() && MO.isUse()) {
+      MRI.clearKillFlags(MO.getReg());
+    }
+  }
+
+  // To insert the loop we need to split the block. Move everything after this
+  // point to a new block, and insert a new empty block between the two.
+  MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
+  MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
+  MachineFunction::iterator MBBI(MBB);
+  ++MBBI;
+
+  MF.insert(MBBI, LoopBB);
+  MF.insert(MBBI, RemainderBB);
+
+  LoopBB->addSuccessor(LoopBB);
+  LoopBB->addSuccessor(RemainderBB);
+
+  // Move MI to the LoopBB, and the remainder of the block to RemainderBB.
+  MachineBasicBlock::iterator J = I++;
+  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+  LoopBB->splice(LoopBB->begin(), &MBB, J);
+
+  MBB.addSuccessor(LoopBB);
+
+  // Update dominators. We know that MBB immediately dominates LoopBB, that
+  // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
+  // dominates all of the successors transferred to it from MBB that MBB used
+  // to dominate.
+  if (MDT) {
+    MDT->addNewBlock(LoopBB, &MBB);
+    MDT->addNewBlock(RemainderBB, LoopBB);
+    for (auto &Succ : RemainderBB->successors()) {
+      if (MDT->dominates(&MBB, Succ)) {
+        MDT->changeImmediateDominator(Succ, RemainderBB);
+      }
+    }
+  }
+
+  emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
+
+  // Restore the EXEC mask
+  MachineBasicBlock::iterator First = RemainderBB->begin();
+  BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+      .addReg(SaveExec);
+}
+
+// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
+static std::tuple<unsigned, unsigned>
+extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Extract the ptr from the resource descriptor.
+  unsigned RsrcPtr =
+      TII.buildExtractSubReg(MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
+                             AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
+
+  // Create an empty resource descriptor
+  unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+  uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat();
+
+  // Zero64 = 0
+  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B64), Zero64)
+      .addImm(0);
+
+  // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
+      .addImm(RsrcDataFormat & 0xFFFFFFFF);
+
+  // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
+      .addImm(RsrcDataFormat >> 32);
+
+  // NewSRsrc = {Zero64, SRsrcFormat}
+  BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+      .addReg(Zero64)
+      .addImm(AMDGPU::sub0_sub1)
+      .addReg(SRsrcFormatLo)
+      .addImm(AMDGPU::sub2)
+      .addReg(SRsrcFormatHi)
+      .addImm(AMDGPU::sub3);
+
+  return std::make_tuple(RsrcPtr, NewSRsrc);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr &MI,
+                                   MachineDominatorTree *MDT) const {
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
@@ -3617,75 +4010,56 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
     return;
   }
 
-  // Legalize MUBUF* instructions by converting to addr64 form.
-  // FIXME: If we start using the non-addr64 instructions for compute, we
-  // may need to legalize them as above. This especially applies to the
-  // buffer_load_format_* variants and variants with idxen (or bothen).
-  int SRsrcIdx =
+  // Legalize MUBUF* instructions.
+  int RsrcIdx =
       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
-  if (SRsrcIdx != -1) {
+  if (RsrcIdx != -1) {
     // We have an MUBUF instruction
-    MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx);
-    unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass;
-    if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
-                                             RI.getRegClass(SRsrcRC))) {
+    MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
+    unsigned RsrcRC = get(MI.getOpcode()).OpInfo[RsrcIdx].RegClass;
+    if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
+                             RI.getRegClass(RsrcRC))) {
       // The operands are legal.
       // FIXME: We may need to legalize operands besided srsrc.
       return;
     }
 
-    MachineBasicBlock &MBB = *MI.getParent();
-
-    // Extract the ptr from the resource descriptor.
-    unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
-      &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
+    // Legalize a VGPR Rsrc.
+    //
+    // If the instruction is _ADDR64, we can avoid a waterfall by extracting
+    // the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
+    // a zero-value SRsrc.
+    //
+    // If the instruction is _OFFSET (both idxen and offen disabled), and we
+    // support ADDR64 instructions, we can convert to ADDR64 and do the same as
+    // above.
+    //
+    // Otherwise we are on non-ADDR64 hardware, and/or we have
+    // idxen/offen/bothen and we fall back to a waterfall loop.
 
-    // Create an empty resource descriptor
-    unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-    unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-    unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-    unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
-    uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
-
-    // Zero64 = 0
-    BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64)
-        .addImm(0);
-
-    // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
-    BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo)
-        .addImm(RsrcDataFormat & 0xFFFFFFFF);
-
-    // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
-    BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi)
-        .addImm(RsrcDataFormat >> 32);
-
-    // NewSRsrc = {Zero64, SRsrcFormat}
-    BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
-        .addReg(Zero64)
-        .addImm(AMDGPU::sub0_sub1)
-        .addReg(SRsrcFormatLo)
-        .addImm(AMDGPU::sub2)
-        .addReg(SRsrcFormatHi)
-        .addImm(AMDGPU::sub3);
+    MachineBasicBlock &MBB = *MI.getParent();
 
     MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
-    unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
-    if (VAddr) {
+    if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) {
       // This is already an ADDR64 instruction so we need to add the pointer
       // extracted from the resource descriptor to the current value of VAddr.
       unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
       unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
-      // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+      unsigned RsrcPtr, NewSRsrc;
+      std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
+
+      // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
       DebugLoc DL = MI.getDebugLoc();
       BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
-        .addReg(SRsrcPtr, 0, AMDGPU::sub0)
-        .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+          .addReg(RsrcPtr, 0, AMDGPU::sub0)
+          .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
 
-      // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+      // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
       BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
-        .addReg(SRsrcPtr, 0, AMDGPU::sub1)
-        .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+          .addReg(RsrcPtr, 0, AMDGPU::sub1)
+          .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
 
       // NewVaddr = {NewVaddrHi, NewVaddrLo}
       BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
@@ -3693,13 +4067,20 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
           .addImm(AMDGPU::sub0)
           .addReg(NewVAddrHi)
           .addImm(AMDGPU::sub1);
-    } else {
+
+      VAddr->setReg(NewVAddr);
+      Rsrc->setReg(NewSRsrc);
+    } else if (!VAddr && ST.hasAddr64()) {
       // This instructions is the _OFFSET variant, so we need to convert it to
       // ADDR64.
       assert(MBB.getParent()->getSubtarget<GCNSubtarget>().getGeneration()
              < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
              "FIXME: Need to emit flat atomics here");
 
+      unsigned RsrcPtr, NewSRsrc;
+      std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
+
+      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
       MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata);
       MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset);
       MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
@@ -3715,10 +4096,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
         MachineInstrBuilder MIB =
             BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
                 .add(*VData)
-                .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
-                // This will be replaced later
-                // with the new value of vaddr.
-                .add(*SRsrc)
+                .addReg(NewVAddr)
+                .addReg(NewSRsrc)
                 .add(*SOffset)
                 .add(*Offset);
 
@@ -3735,21 +4114,19 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
           MIB.addImm(TFE->getImm());
         }
 
-        MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+        MIB.cloneMemRefs(MI);
         Addr64 = MIB;
       } else {
         // Atomics with return.
         Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
                      .add(*VData)
                      .add(*VDataIn)
-                     .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
-                     // This will be replaced later
-                     // with the new value of vaddr.
-                     .add(*SRsrc)
+                     .addReg(NewVAddr)
+                     .addReg(NewSRsrc)
                      .add(*SOffset)
                      .add(*Offset)
                      .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
-                     .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+                     .cloneMemRefs(MI);
       }
 
       MI.removeFromParent();
@@ -3757,23 +4134,20 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
       // NewVaddr = {NewVaddrHi, NewVaddrLo}
       BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
               NewVAddr)
-          .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+          .addReg(RsrcPtr, 0, AMDGPU::sub0)
           .addImm(AMDGPU::sub0)
-          .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+          .addReg(RsrcPtr, 0, AMDGPU::sub1)
           .addImm(AMDGPU::sub1);
-
-      VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr);
-      SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc);
+    } else {
+      // This is another variant; legalize Rsrc with waterfall loop from VGPRs
+      // to SGPRs.
+      loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
     }
-
-    // Update the instruction to use NewVaddr
-    VAddr->setReg(NewVAddr);
-    // Update the instruction to use NewSRsrc
-    SRsrc->setReg(NewSRsrc);
   }
 }
 
-void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
+                             MachineDominatorTree *MDT) const {
   SetVectorType Worklist;
   Worklist.insert(&TopInst);
 
@@ -3791,34 +4165,62 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
       break;
     case AMDGPU::S_ADD_U64_PSEUDO:
     case AMDGPU::S_SUB_U64_PSEUDO:
-      splitScalar64BitAddSub(Worklist, Inst);
+      splitScalar64BitAddSub(Worklist, Inst, MDT);
       Inst.eraseFromParent();
       continue;
     case AMDGPU::S_ADD_I32:
     case AMDGPU::S_SUB_I32:
       // FIXME: The u32 versions currently selected use the carry.
-      if (moveScalarAddSub(Worklist, Inst))
+      if (moveScalarAddSub(Worklist, Inst, MDT))
         continue;
 
       // Default handling
       break;
     case AMDGPU::S_AND_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_OR_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_XOR_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
+      Inst.eraseFromParent();
+      continue;
+
+    case AMDGPU::S_NAND_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
+      Inst.eraseFromParent();
+      continue;
+
+    case AMDGPU::S_NOR_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
+      Inst.eraseFromParent();
+      continue;
+
+    case AMDGPU::S_XNOR_B64:
+      if (ST.hasDLInsts())
+        splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
+      else
+        splitScalar64BitXnor(Worklist, Inst, MDT);
+      Inst.eraseFromParent();
+      continue;
+
+    case AMDGPU::S_ANDN2_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
+      Inst.eraseFromParent();
+      continue;
+
+    case AMDGPU::S_ORN2_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
       Inst.eraseFromParent();
       continue;
 
     case AMDGPU::S_NOT_B64:
-      splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
+      splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
       Inst.eraseFromParent();
       continue;
 
@@ -3899,90 +4301,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
       Inst.eraseFromParent();
       continue;
 
-    case AMDGPU::S_XNOR_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
+    case AMDGPU::S_NAND_B32:
+      splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
       Inst.eraseFromParent();
       continue;
 
-    case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
-      unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-      const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
-      auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
-      unsigned Offset = 0;
-
-      // FIXME: This isn't safe because the addressing mode doesn't work
-      // correctly if vaddr is negative.
-      //
-      // FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
-      //
-      // See if we can extract an immediate offset by recognizing one of these:
-      //   V_ADD_I32_e32 dst, imm, src1
-      //   V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
-      // V_ADD will be removed by "Remove dead machine instructions".
-      if (Add &&
-          (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
-           Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
-        static const unsigned SrcNames[2] = {
-          AMDGPU::OpName::src0,
-          AMDGPU::OpName::src1,
-        };
-
-        // Find a literal offset in one of source operands.
-        for (int i = 0; i < 2; i++) {
-          const MachineOperand *Src =
-            getNamedOperand(*Add, SrcNames[i]);
-
-          if (Src->isReg()) {
-            auto Mov = MRI.getUniqueVRegDef(Src->getReg());
-            if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
-              Src = &Mov->getOperand(1);
-          }
-
-          if (Src) {
-            if (Src->isImm())
-              Offset = Src->getImm();
-            else if (Src->isCImm())
-              Offset = Src->getCImm()->getZExtValue();
-          }
-
-          if (Offset && isLegalMUBUFImmOffset(Offset)) {
-            VAddr = getNamedOperand(*Add, SrcNames[!i]);
-            break;
-          }
-
-          Offset = 0;
-        }
-      }
+    case AMDGPU::S_NOR_B32:
+      splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
+      Inst.eraseFromParent();
+      continue;
 
-      MachineInstr *NewInstr =
-        BuildMI(*MBB, Inst, Inst.getDebugLoc(),
-              get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
-        .add(*VAddr) // vaddr
-        .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
-        .addImm(0) // soffset
-        .addImm(Offset) // offset
-        .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
-        .addImm(0) // slc
-        .addImm(0) // tfe
-        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
-        .getInstr();
-
-      MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
-                         VDst);
-      addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
+    case AMDGPU::S_ANDN2_B32:
+      splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
       Inst.eraseFromParent();
+      continue;
 
-      // Legalize all operands other than the offset. Notably, convert the srsrc
-      // into SGPRs using v_readfirstlane if needed.
-      legalizeOperands(*NewInstr);
+    case AMDGPU::S_ORN2_B32:
+      splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
+      Inst.eraseFromParent();
       continue;
     }
-    }
 
     if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
       // We cannot move this instruction to the VALU, so we should try to
       // legalize its operands instead.
-      legalizeOperands(Inst);
+      legalizeOperands(Inst, MDT);
       continue;
     }
 
@@ -4071,7 +4414,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
     }
 
     // Legalize the operands
-    legalizeOperands(Inst);
+    legalizeOperands(Inst, MDT);
 
     if (HasDst)
      addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
@@ -4079,8 +4422,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
 }
 
 // Add/sub require special handling to deal with carry outs.
-bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
-                                   MachineInstr &Inst) const {
+bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+                                   MachineDominatorTree *MDT) const {
   if (ST.hasAddNoCarry()) {
     // Assume there is no user of scc since we don't select this in that case.
     // Since scc isn't used, it doesn't really matter if the i32 or u32 variant
@@ -4104,7 +4447,7 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist,
     Inst.setDesc(get(NewOpc));
     Inst.addImplicitDefUseOperands(*MBB.getParent());
     MRI.replaceRegWith(OldDstReg, ResultReg);
-    legalizeOperands(Inst);
+    legalizeOperands(Inst, MDT);
 
     addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
     return true;
@@ -4151,23 +4494,116 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
   MachineOperand &Src0 = Inst.getOperand(1);
   MachineOperand &Src1 = Inst.getOperand(2);
 
-  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
-  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
-
-  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   if (ST.hasDLInsts()) {
+    unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
+    legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
+
     BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
       .add(Src0)
       .add(Src1);
+
+    MRI.replaceRegWith(Dest.getReg(), NewDest);
+    addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
   } else {
-    unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-    BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
-      .add(Src0)
+    // Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
+    // invert either source and then perform the XOR. If either source is a
+    // scalar register, then we can leave the inversion on the scalar unit to
+    // acheive a better distrubution of scalar and vector instructions.
+    bool Src0IsSGPR = Src0.isReg() &&
+                      RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
+    bool Src1IsSGPR = Src1.isReg() &&
+                      RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
+    MachineInstr *Not = nullptr;
+    MachineInstr *Xor = nullptr;
+    unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+    unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+    // Build a pair of scalar instructions and add them to the work list.
+    // The next iteration over the work list will lower these to the vector
+    // unit as necessary.
+    if (Src0IsSGPR) {
+      Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
+        .add(Src0);
+      Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
+      .addReg(Temp)
       .add(Src1);
+    } else if (Src1IsSGPR) {
+      Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
+        .add(Src1);
+      Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
+      .add(Src0)
+      .addReg(Temp);
+    } else {
+      Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
+        .add(Src0)
+        .add(Src1);
+      Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
+        .addReg(Temp);
+      Worklist.insert(Not);
+    }
+
+    MRI.replaceRegWith(Dest.getReg(), NewDest);
 
-    BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), NewDest)
-      .addReg(Xor);
+    Worklist.insert(Xor);
+
+    addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
   }
+}
+
+void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
+                                      MachineInstr &Inst,
+                                      unsigned Opcode) const {
+  MachineBasicBlock &MBB = *Inst.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineBasicBlock::iterator MII = Inst;
+  const DebugLoc &DL = Inst.getDebugLoc();
+
+  MachineOperand &Dest = Inst.getOperand(0);
+  MachineOperand &Src0 = Inst.getOperand(1);
+  MachineOperand &Src1 = Inst.getOperand(2);
+
+  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+  unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+  MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
+    .add(Src0)
+    .add(Src1);
+
+  MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
+    .addReg(Interm);
+
+  Worklist.insert(&Op);
+  Worklist.insert(&Not);
+
+  MRI.replaceRegWith(Dest.getReg(), NewDest);
+  addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
+}
+
+void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
+                                     MachineInstr &Inst,
+                                     unsigned Opcode) const {
+  MachineBasicBlock &MBB = *Inst.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineBasicBlock::iterator MII = Inst;
+  const DebugLoc &DL = Inst.getDebugLoc();
+
+  MachineOperand &Dest = Inst.getOperand(0);
+  MachineOperand &Src0 = Inst.getOperand(1);
+  MachineOperand &Src1 = Inst.getOperand(2);
+
+  unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+  unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+  MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
+    .add(Src1);
+
+  MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
+    .add(Src0)
+    .addReg(Interm);
+
+  Worklist.insert(&Not);
+  Worklist.insert(&Op);
 
   MRI.replaceRegWith(Dest.getReg(), NewDest);
   addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
@@ -4200,13 +4636,13 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
   const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
 
   unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
-  BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
+  MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
 
   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub1, Src0SubRC);
 
   unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
-  BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
+  MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
 
   unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -4217,6 +4653,9 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
 
   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
 
+  Worklist.insert(&LoHalf);
+  Worklist.insert(&HiHalf);
+
   // We don't need to legalizeOperands here because for a single operand, src0
   // will support any kind of input.
 
@@ -4224,8 +4663,9 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
   addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
-void SIInstrInfo::splitScalar64BitAddSub(
-  SetVectorType &Worklist, MachineInstr &Inst) const {
+void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
+                                         MachineInstr &Inst,
+                                         MachineDominatorTree *MDT) const {
   bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
 
   MachineBasicBlock &MBB = *Inst.getParent();
@@ -4285,16 +4725,16 @@ void SIInstrInfo::splitScalar64BitAddSub(
 
   // Try to legalize the operands in case we need to swap the order to keep it
   // valid.
-  legalizeOperands(*LoHalf);
-  legalizeOperands(*HiHalf);
+  legalizeOperands(*LoHalf, MDT);
+  legalizeOperands(*HiHalf, MDT);
 
   // Move all users of this moved vlaue.
   addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
-void SIInstrInfo::splitScalar64BitBinaryOp(
-    SetVectorType &Worklist, MachineInstr &Inst,
-    unsigned Opcode) const {
+void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
+                                           MachineInstr &Inst, unsigned Opcode,
+                                           MachineDominatorTree *MDT) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
@@ -4321,6 +4761,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
                                                        AMDGPU::sub0, Src0SubRC);
   MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub0, Src1SubRC);
+  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+                                                       AMDGPU::sub1, Src0SubRC);
+  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
+                                                       AMDGPU::sub1, Src1SubRC);
 
   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
   const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -4331,11 +4775,6 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
                               .add(SrcReg0Sub0)
                               .add(SrcReg1Sub0);
 
-  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
-                                                       AMDGPU::sub1, Src0SubRC);
-  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
-                                                       AMDGPU::sub1, Src1SubRC);
-
   unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
   MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
                               .add(SrcReg0Sub1)
@@ -4350,22 +4789,62 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
 
   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
 
-  // Try to legalize the operands in case we need to swap the order to keep it
-  // valid.
-  legalizeOperands(LoHalf);
-  legalizeOperands(HiHalf);
+  Worklist.insert(&LoHalf);
+  Worklist.insert(&HiHalf);
 
   // Move all users of this moved vlaue.
   addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
+void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
+                                       MachineInstr &Inst,
+                                       MachineDominatorTree *MDT) const {
+  MachineBasicBlock &MBB = *Inst.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  MachineOperand &Dest = Inst.getOperand(0);
+  MachineOperand &Src0 = Inst.getOperand(1);
+  MachineOperand &Src1 = Inst.getOperand(2);
+  const DebugLoc &DL = Inst.getDebugLoc();
+
+  MachineBasicBlock::iterator MII = Inst;
+
+  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+
+  unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+  MachineOperand* Op0;
+  MachineOperand* Op1;
+
+  if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
+    Op0 = &Src0;
+    Op1 = &Src1;
+  } else {
+    Op0 = &Src1;
+    Op1 = &Src0;
+  }
+
+  BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
+    .add(*Op0);
+
+  unsigned NewDest = MRI.createVirtualRegister(DestRC);
+
+  MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
+    .addReg(Interm)
+    .add(*Op1);
+
+  MRI.replaceRegWith(Dest.getReg(), NewDest);
+
+  Worklist.insert(&Xor);
+}
+
 void SIInstrInfo::splitScalar64BitBCNT(
     SetVectorType &Worklist, MachineInstr &Inst) const {
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
 
   MachineBasicBlock::iterator MII = Inst;
-  DebugLoc DL = Inst.getDebugLoc();
+  const DebugLoc &DL = Inst.getDebugLoc();
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src = Inst.getOperand(1);
@@ -4401,7 +4880,7 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   MachineBasicBlock::iterator MII = Inst;
-  DebugLoc DL = Inst.getDebugLoc();
+  const DebugLoc &DL = Inst.getDebugLoc();
 
   MachineOperand &Dest = Inst.getOperand(0);
   uint32_t Imm = Inst.getOperand(2).getImm();
@@ -4546,10 +5025,10 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(
        make_range(MachineBasicBlock::iterator(SCCDefInst),
                       SCCDefInst.getParent()->end())) {
     // Exit if we find another SCC def.
-    if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
+    if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
       return;
 
-    if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
+    if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
       Worklist.insert(&MI);
   }
 }
@@ -4716,7 +5195,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
     return AMDGPU::NoRegister;
 
   assert(!MI.memoperands_empty() &&
-         (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
+         (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
 
   FrameIndex = Addr->getIndex();
   return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -4777,12 +5256,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
 
   // If we have a definitive size, we can use it. Otherwise we need to inspect
   // the operands to know the size.
-  //
-  // FIXME: Instructions that have a base 32-bit encoding report their size as
-  // 4, even though they are really 8 bytes if they have a literal operand.
-  if (DescSize != 0 && DescSize != 4)
-    return DescSize;
-
   if (isFixedSize(MI))
     return DescSize;
 
@@ -4791,23 +5264,27 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   if (isVALU(MI) || isSALU(MI)) {
     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
     if (Src0Idx == -1)
-      return 4; // No operands.
+      return DescSize; // No operands.
 
     if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
-      return 8;
+      return DescSize + 4;
 
     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
     if (Src1Idx == -1)
-      return 4;
+      return DescSize;
 
     if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
-      return 8;
+      return DescSize + 4;
 
-    return 4;
-  }
+    int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+    if (Src2Idx == -1)
+      return DescSize;
+
+    if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
+      return DescSize + 4;
 
-  if (DescSize == 4)
-    return 4;
+    return DescSize;
+  }
 
   switch (Opc) {
   case TargetOpcode::IMPLICIT_DEF:
@@ -4823,7 +5300,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
   }
   default:
-    llvm_unreachable("unable to find instruction size");
+    return DescSize;
   }
 }
 
@@ -4835,7 +5312,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
     return true;
 
   for (const MachineMemOperand *MMO : MI.memoperands()) {
-    if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
+    if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
       return true;
   }
   return false;
@@ -5069,3 +5546,84 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
 
   return MCOp;
 }
+
+static
+TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
+  assert(RegOpnd.isReg());
+  return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
+                             getRegSubRegPair(RegOpnd);
+}
+
+TargetInstrInfo::RegSubRegPair
+llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
+  assert(MI.isRegSequence());
+  for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
+    if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
+      auto &RegOp = MI.getOperand(1 + 2 * I);
+      return getRegOrUndef(RegOp);
+    }
+  return TargetInstrInfo::RegSubRegPair();
+}
+
+// Try to find the definition of reg:subreg in subreg-manipulation pseudos
+// Following a subreg of reg:subreg isn't supported
+static bool followSubRegDef(MachineInstr &MI,
+                            TargetInstrInfo::RegSubRegPair &RSR) {
+  if (!RSR.SubReg)
+    return false;
+  switch (MI.getOpcode()) {
+  default: break;
+  case AMDGPU::REG_SEQUENCE:
+    RSR = getRegSequenceSubReg(MI, RSR.SubReg);
+    return true;
+  // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
+  case AMDGPU::INSERT_SUBREG:
+    if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
+      // inserted the subreg we're looking for
+      RSR = getRegOrUndef(MI.getOperand(2));
+    else { // the subreg in the rest of the reg
+      auto R1 = getRegOrUndef(MI.getOperand(1));
+      if (R1.SubReg) // subreg of subreg isn't supported
+        return false;
+      RSR.Reg = R1.Reg;
+    }
+    return true;
+  }
+  return false;
+}
+
+MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+                                     MachineRegisterInfo &MRI) {
+  assert(MRI.isSSA());
+  if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
+    return nullptr;
+
+  auto RSR = P;
+  auto *DefInst = MRI.getVRegDef(RSR.Reg);
+  while (auto *MI = DefInst) {
+    DefInst = nullptr;
+    switch (MI->getOpcode()) {
+    case AMDGPU::COPY:
+    case AMDGPU::V_MOV_B32_e32: {
+      auto &Op1 = MI->getOperand(1);
+      if (Op1.isReg() &&
+        TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
+        if (Op1.isUndef())
+          return nullptr;
+        RSR = getRegSubRegPair(Op1);
+        DefInst = MRI.getVRegDef(RSR.Reg);
+      }
+      break;
+    }
+    default:
+      if (followSubRegDef(*MI, RSR)) {
+        if (!RSR.Reg)
+          return nullptr;
+        DefInst = MRI.getVRegDef(RSR.Reg);
+      }
+    }
+    if (!DefInst)
+      return MI;
+  }
+  return nullptr;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index d681b926504e..5b1a05f3785e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -37,6 +37,7 @@
 namespace llvm {
 
 class APInt;
+class MachineDominatorTree;
 class MachineRegisterInfo;
 class RegScavenger;
 class GCNSubtarget;
@@ -79,8 +80,8 @@ public:
 private:
   void swapOperands(MachineInstr &Inst) const;
 
-  bool moveScalarAddSub(SetVectorType &Worklist,
-                        MachineInstr &Inst) const;
+  bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+                        MachineDominatorTree *MDT = nullptr) const;
 
   void lowerScalarAbs(SetVectorType &Worklist,
                       MachineInstr &Inst) const;
@@ -88,14 +89,26 @@ private:
   void lowerScalarXnor(SetVectorType &Worklist,
                        MachineInstr &Inst) const;
 
+  void splitScalarNotBinop(SetVectorType &Worklist,
+                           MachineInstr &Inst,
+                           unsigned Opcode) const;
+
+  void splitScalarBinOpN2(SetVectorType &Worklist,
+                          MachineInstr &Inst,
+                          unsigned Opcode) const;
+
   void splitScalar64BitUnaryOp(SetVectorType &Worklist,
                                MachineInstr &Inst, unsigned Opcode) const;
 
-  void splitScalar64BitAddSub(SetVectorType &Worklist,
-                              MachineInstr &Inst) const;
+  void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+                              MachineDominatorTree *MDT = nullptr) const;
+
+  void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
+                                unsigned Opcode,
+                                MachineDominatorTree *MDT = nullptr) const;
 
-  void splitScalar64BitBinaryOp(SetVectorType &Worklist,
-                                MachineInstr &Inst, unsigned Opcode) const;
+  void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
+                                MachineDominatorTree *MDT = nullptr) const;
 
   void splitScalar64BitBCNT(SetVectorType &Worklist,
                             MachineInstr &Inst) const;
@@ -160,12 +173,11 @@ public:
                                int64_t &Offset1,
                                int64_t &Offset2) const override;
 
-  bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
-                             int64_t &Offset,
-                             const TargetRegisterInfo *TRI) const final;
+  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+                               int64_t &Offset,
+                               const TargetRegisterInfo *TRI) const final;
 
-  bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
-                           MachineInstr &SecondLdSt, unsigned BaseReg2,
+  bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
                            unsigned NumLoads) const override;
 
   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
@@ -225,6 +237,9 @@ public:
   bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
+  bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
+   unsigned & SrcOpIdx1) const;
+
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
 
@@ -276,7 +291,7 @@ public:
                           unsigned TrueReg, unsigned FalseReg) const;
 
   unsigned getAddressSpaceForPseudoSourceKind(
-             PseudoSourceValue::PSVKind Kind) const override;
+             unsigned Kind) const override;
 
   bool
   areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
@@ -589,6 +604,14 @@ public:
       return MI.getDesc().TSFlags & ClampFlags;
   }
 
+  static bool usesFPDPRounding(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
+  }
+
+  bool usesFPDPRounding(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
+  }
+
   bool isVGPRCopy(const MachineInstr &MI) const {
     assert(MI.isCopy());
     unsigned Dest = MI.getOperand(0).getReg();
@@ -689,6 +712,12 @@ public:
                        unsigned OpName) const;
   bool hasAnyModifiersSet(const MachineInstr &MI) const;
 
+  bool canShrink(const MachineInstr &MI,
+                 const MachineRegisterInfo &MRI) const;
+
+  MachineInstr *buildShrunkInst(MachineInstr &MI,
+                                unsigned NewOpcode) const;
+
   bool verifyInstruction(const MachineInstr &MI,
                          StringRef &ErrInfo) const override;
 
@@ -719,6 +748,16 @@ public:
   /// This form should usually be preferred since it handles operands
   /// with unknown register classes.
   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
+    const MachineOperand &MO = MI.getOperand(OpNo);
+    if (MO.isReg()) {
+      if (unsigned SubReg = MO.getSubReg()) {
+        assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
+                                   MI.getParent()->getParent()->getRegInfo().
+                                     getRegClass(MO.getReg()), SubReg)) >= 32 &&
+               "Sub-dword subregs are not supported");
+        return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
+      }
+    }
     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
   }
 
@@ -777,14 +816,16 @@ public:
                               MachineOperand &Op, MachineRegisterInfo &MRI,
                               const DebugLoc &DL) const;
 
-  /// Legalize all operands in this instruction.  This function may
-  /// create new instruction and insert them before \p MI.
-  void legalizeOperands(MachineInstr &MI) const;
+  /// Legalize all operands in this instruction.  This function may create new
+  /// instructions and control-flow around \p MI.  If present, \p MDT is
+  /// updated.
+  void legalizeOperands(MachineInstr &MI,
+                        MachineDominatorTree *MDT = nullptr) const;
 
   /// Replace this instruction's opcode with the equivalent VALU
   /// opcode.  This function will also move the users of \p MI to the
-  /// VALU if necessary.
-  void moveToVALU(MachineInstr &MI) const;
+  /// VALU if necessary. If present, \p MDT is updated.
+  void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
 
   void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
                         int Count) const;
@@ -885,9 +926,36 @@ public:
   /// Return -1 if the target-specific opcode for the pseudo instruction does
   /// not exist. If Opcode is not a pseudo instruction, this is identity.
   int pseudoToMCOpcode(int Opcode) const;
-
 };
 
+/// \brief Returns true if a reg:subreg pair P has a TRC class
+inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
+                         const TargetRegisterClass &TRC,
+                         MachineRegisterInfo &MRI) {
+  auto *RC = MRI.getRegClass(P.Reg);
+  if (!P.SubReg)
+    return RC == &TRC;
+  auto *TRI = MRI.getTargetRegisterInfo();
+  return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
+}
+
+/// \brief Create RegSubRegPair from a register MachineOperand
+inline
+TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
+  assert(O.isReg());
+  return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
+}
+
+/// \brief Return the SubReg component from REG_SEQUENCE
+TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
+                                                    unsigned SubReg);
+
+/// \brief Return the defining instruction for a given reg:subreg pair
+/// skipping copy like instructions and subreg-manipulation pseudos.
+/// Following another subreg of a reg:subreg isn't supported.
+MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+                               MachineRegisterInfo &MRI);
+
 namespace AMDGPU {
 
   LLVM_READONLY
@@ -900,6 +968,9 @@ namespace AMDGPU {
   int getSDWAOp(uint16_t Opcode);
 
   LLVM_READONLY
+  int getDPPOp32(uint16_t Opcode);
+
+  LLVM_READONLY
   int getBasicFromSDWAOp(uint16_t Opcode);
 
   LLVM_READONLY
@@ -911,6 +982,12 @@ namespace AMDGPU {
   LLVM_READONLY
   int getAddr64Inst(uint16_t Opcode);
 
+  /// Check if \p Opcode is an Addr64 opcode.
+  ///
+  /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
+  LLVM_READONLY
+  int getIfAddr64Inst(uint16_t Opcode);
+
   LLVM_READONLY
   int getMUBUFNoLdsInst(uint16_t Opcode);
 
@@ -923,6 +1000,9 @@ namespace AMDGPU {
   LLVM_READONLY
   int getSOPKOp(uint16_t Opcode);
 
+  LLVM_READONLY
+  int getGlobalSaddrOp(uint16_t Opcode);
+
   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8fa37aa83dae..13afa4d4974b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -40,9 +40,9 @@ def SIEncodingFamily {
 
 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
 
-def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
-  SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
-                      [SDNPMayLoad, SDNPMemOperand]
+def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
+  SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
+  [SDNPMayLoad, SDNPMemOperand]
 >;
 
 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
@@ -69,36 +69,34 @@ def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
   [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
 >;
 
-def SDTbuffer_load : SDTypeProfile<1, 9,
+def SDTtbuffer_load : SDTypeProfile<1, 8,
   [                     // vdata
    SDTCisVT<1, v4i32>,  // rsrc
    SDTCisVT<2, i32>,    // vindex(VGPR)
    SDTCisVT<3, i32>,    // voffset(VGPR)
    SDTCisVT<4, i32>,    // soffset(SGPR)
    SDTCisVT<5, i32>,    // offset(imm)
-   SDTCisVT<6, i32>,    // dfmt(imm)
-   SDTCisVT<7, i32>,    // nfmt(imm)
-   SDTCisVT<8, i32>,    // glc(imm)
-   SDTCisVT<9, i32>     // slc(imm)
+   SDTCisVT<6, i32>,    // format(imm)
+   SDTCisVT<7, i32>,    // cachecontrol(imm)
+   SDTCisVT<8, i1>      // idxen(imm)
   ]>;
 
-def SItbuffer_load :   SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTbuffer_load,
+def SItbuffer_load :   SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
                               [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
 def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
-                                SDTbuffer_load,
+                                SDTtbuffer_load,
                                 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
 
-def SDTtbuffer_store : SDTypeProfile<0, 10,
+def SDTtbuffer_store : SDTypeProfile<0, 9,
     [                     // vdata
      SDTCisVT<1, v4i32>,  // rsrc
      SDTCisVT<2, i32>,    // vindex(VGPR)
      SDTCisVT<3, i32>,    // voffset(VGPR)
      SDTCisVT<4, i32>,    // soffset(SGPR)
      SDTCisVT<5, i32>,    // offset(imm)
-     SDTCisVT<6, i32>,    // dfmt(imm)
-     SDTCisVT<7, i32>,    // nfmt(imm)
-     SDTCisVT<8, i32>,    // glc(imm)
-     SDTCisVT<9, i32>     // slc(imm)
+     SDTCisVT<6, i32>,    // format(imm)
+     SDTCisVT<7, i32>,    // cachecontrol(imm)
+     SDTCisVT<8, i1>      // idxen(imm)
     ]>;
 
 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
@@ -110,13 +108,15 @@ def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
                                 SDTtbuffer_store,
                                 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
 
-def SDTBufferLoad : SDTypeProfile<1, 5,
+def SDTBufferLoad : SDTypeProfile<1, 7,
     [                    // vdata
      SDTCisVT<1, v4i32>, // rsrc
-     SDTCisVT<2, i32>,   // vindex
-     SDTCisVT<3, i32>,   // offset
-     SDTCisVT<4, i1>,    // glc
-     SDTCisVT<5, i1>]>;  // slc
+     SDTCisVT<2, i32>,   // vindex(VGPR)
+     SDTCisVT<3, i32>,   // voffset(VGPR)
+     SDTCisVT<4, i32>,   // soffset(SGPR)
+     SDTCisVT<5, i32>,   // offset(imm)
+     SDTCisVT<6, i32>,   // cachepolicy(imm)
+     SDTCisVT<7, i1>]>;  // idxen(imm)
 
 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
                             [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
@@ -126,13 +126,15 @@ def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
                                 SDTBufferLoad,
                                 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
 
-def SDTBufferStore : SDTypeProfile<0, 6,
+def SDTBufferStore : SDTypeProfile<0, 8,
     [                    // vdata
      SDTCisVT<1, v4i32>, // rsrc
-     SDTCisVT<2, i32>,   // vindex
-     SDTCisVT<3, i32>,   // offset
-     SDTCisVT<4, i1>,    // glc
-     SDTCisVT<5, i1>]>;  // slc
+     SDTCisVT<2, i32>,   // vindex(VGPR)
+     SDTCisVT<3, i32>,   // voffset(VGPR)
+     SDTCisVT<4, i32>,   // soffset(SGPR)
+     SDTCisVT<5, i32>,   // offset(imm)
+     SDTCisVT<6, i32>,   // cachepolicy(imm)
+     SDTCisVT<7, i1>]>;  // idxen(imm)
 
 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
                              [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -144,13 +146,16 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
 
 class SDBufferAtomic<string opcode> : SDNode <opcode,
-  SDTypeProfile<1, 5,
+  SDTypeProfile<1, 8,
       [SDTCisVT<0, i32>,   // dst
        SDTCisVT<1, i32>,   // vdata
        SDTCisVT<2, v4i32>, // rsrc
-       SDTCisVT<3, i32>,   // vindex
-       SDTCisVT<4, i32>,   // offset
-       SDTCisVT<5, i1>]>,  // slc
+       SDTCisVT<3, i32>,   // vindex(VGPR)
+       SDTCisVT<4, i32>,   // voffset(VGPR)
+       SDTCisVT<5, i32>,   // soffset(SGPR)
+       SDTCisVT<6, i32>,   // offset(imm)
+       SDTCisVT<7, i32>,   // cachepolicy(imm)
+       SDTCisVT<8, i1>]>,  // idxen(imm)
   [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
 >;
 
@@ -166,14 +171,17 @@ def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
 def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
 
 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
-  SDTypeProfile<1, 6,
+  SDTypeProfile<1, 9,
     [SDTCisVT<0, i32>,   // dst
      SDTCisVT<1, i32>,   // src
      SDTCisVT<2, i32>,   // cmp
      SDTCisVT<3, v4i32>, // rsrc
-     SDTCisVT<4, i32>,   // vindex
-     SDTCisVT<5, i32>,   // offset
-     SDTCisVT<6, i1>]>,  // slc
+     SDTCisVT<4, i32>,   // vindex(VGPR)
+     SDTCisVT<5, i32>,   // voffset(VGPR)
+     SDTCisVT<6, i32>,   // soffset(SGPR)
+     SDTCisVT<7, i32>,   // offset(imm)
+     SDTCisVT<8, i32>,   // cachepolicy(imm)
+     SDTCisVT<9, i1>]>,  // idxen(imm)
   [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
 >;
 
@@ -487,24 +495,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
 }]>;
 
 class VGPRImm <dag frag> : PatLeaf<frag, [{
-  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    return false;
-  }
-  const SIRegisterInfo *SIRI =
-      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
-  unsigned Limit = 0;
-  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
-         Limit < 10 && U != E; ++U, ++Limit) {
-    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
-
-    // If the register class is unknown, it could be an unknown
-    // register class that needs to be an SGPR, e.g. an inline asm
-    // constraint
-    if (!RC || SIRI->isSGPRClass(RC))
-      return false;
-  }
-
-  return Limit < 10;
+  return isVGPRImm(N);
 }]>;
 
 def NegateImm : SDNodeXForm<imm, [{
@@ -746,14 +737,13 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
 def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
-def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
+def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
 def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
 def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
 def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
 
-def DFMT : NamedOperandU8<"DFMT", NamedMatchClass<"DFMT">>;
-def NFMT : NamedOperandU8<"NFMT", NamedMatchClass<"NFMT">>;
+def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
 
 def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
 
@@ -1632,7 +1622,7 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
                     0, // 64-bit dst - No DPP or SDWA for 64-bit operands
                     !if(!eq(Src0VT.Size, 64),
                         0, // 64-bit src0
-                        !if(!eq(Src0VT.Size, 64),
+                        !if(!eq(Src1VT.Size, 64),
                             0, // 64-bit src2
                             1
                         )
@@ -1641,6 +1631,12 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
             );
 }
 
+class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+                 ValueType Src1VT = i32> {
+  bit ret = !if(!eq(NumSrcArgs, 0), 0,
+                getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
+}
+
 class BitOr<bit a, bit b> {
   bit ret = !if(a, 1, !if(b, 1, 0));
 }
@@ -1649,6 +1645,11 @@ class BitAnd<bit a, bit b> {
   bit ret = !if(a, !if(b, 1, 0), 0);
 }
 
+def PatGenMode {
+  int NoPattern = 0;
+  int Pattern   = 1;
+}
+
 class VOPProfile <list<ValueType> _ArgVT> {
 
   field list<ValueType> ArgVT = _ArgVT;
@@ -1715,7 +1716,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field bit HasSDWAOMod = isFloatType<DstVT>.ret;
 
   field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
-  field bit HasSDWA9 = HasExt;
+  field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+  field bit HasExtSDWA = HasExt;
+  field bit HasExtSDWA9 = HasExt;
+  field int NeedPatGen = PatGenMode.NoPattern;
 
   field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
   field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -1743,8 +1747,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
                                            getOpSelMod<Src0VT>.ret,
                                            getOpSelMod<Src1VT>.ret,
                                            getOpSelMod<Src2VT>.ret>.ret;
-  field dag InsDPP = getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
-                               HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+  field dag InsDPP = !if(HasExtDPP,
+                         getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+                                   HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
+                         (ins));
   field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
                                  HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
                                  DstVT>.ret;
@@ -1758,14 +1764,21 @@ class VOPProfile <list<ValueType> _ArgVT> {
                                               HasSrc0FloatMods,
                                               HasSrc1FloatMods,
                                               HasSrc2FloatMods>.ret;
-  field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+  field string AsmDPP = !if(HasExtDPP,
+                            getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
   field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
   field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
 }
 
 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
   let HasExt = 0;
-  let HasSDWA9 = 0;
+  let HasExtDPP = 0;
+  let HasExtSDWA = 0;
+  let HasExtSDWA9 = 0;
+}
+
+class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
+  let NeedPatGen = mode;
 }
 
 def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
@@ -1788,6 +1801,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
 
 def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
 def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
+def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
 
 def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
 
@@ -1925,6 +1940,15 @@ def getBasicFromSDWAOp : InstrMapping {
   let ValueCols = [["Default"]];
 }
 
+// Maps ordinary instructions to their DPP counterparts
+def getDPPOp32 : InstrMapping {
+  let FilterClass = "VOP";
+  let RowFields = ["OpName"];
+  let ColFields = ["AsmVariantName"];
+  let KeyCol = ["Default"];
+  let ValueCols = [["DPP"]];
+}
+
 // Maps an commuted opcode to its original version
 def getCommuteOrig : InstrMapping {
   let FilterClass = "Commutable_REV";
@@ -1977,6 +2001,14 @@ def getAddr64Inst : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+def getIfAddr64Inst : InstrMapping {
+  let FilterClass = "MUBUFAddr64Table";
+  let RowFields = ["OpName"];
+  let ColFields = ["IsAddr64"];
+  let KeyCol = ["1"];
+  let ValueCols = [["1"]];
+}
+
 def getMUBUFNoLdsInst : InstrMapping {
   let FilterClass = "MUBUFLdsTable";
   let RowFields = ["OpName"];
@@ -2003,6 +2035,15 @@ def getAtomicNoRetOp : InstrMapping {
   let ValueCols = [["0"]];
 }
 
+// Maps a GLOBAL to its SADDR form.
+def getGlobalSaddrOp : InstrMapping {
+  let FilterClass = "GlobalSaddrTable";
+  let RowFields = ["SaddrOp"];
+  let ColFields = ["IsSaddr"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
 include "SIInstructions.td"
 
 include "DSInstructions.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5c10646161b3..b6b00c2e4257 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -15,8 +15,8 @@ class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateContro
   let SubtargetPredicate = isGCN;
 }
 
-include "VOPInstructions.td"
 include "SOPInstructions.td"
+include "VOPInstructions.td"
 include "SMInstructions.td"
 include "FLATInstructions.td"
 include "BUFInstructions.td"
@@ -164,29 +164,26 @@ def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
 
 } // End usesCustomInserter = 1, Defs = [SCC]
 
-let usesCustomInserter = 1, SALU = 1 in {
-def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
+let usesCustomInserter = 1 in {
+def GET_GROUPSTATICSIZE : SPseudoInstSI <(outs SReg_32:$sdst), (ins),
   [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
 } // End let usesCustomInserter = 1, SALU = 1
 
-def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_MOV_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
    (ins SSrc_b64:$src0)> {
-  let SALU = 1;
   let isAsCheapAsAMove = 1;
   let isTerminator = 1;
 }
 
-def S_XOR_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_XOR_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
    (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
-  let SALU = 1;
   let isAsCheapAsAMove = 1;
   let isTerminator = 1;
   let Defs = [SCC];
 }
 
-def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+def S_ANDN2_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
    (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
-  let SALU = 1;
   let isAsCheapAsAMove = 1;
   let isTerminator = 1;
 }
@@ -250,7 +247,7 @@ def SI_LOOP : CFPseudoInstSI <
   (outs), (ins SReg_64:$saved, brtarget:$target),
   [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
   let Size = 8;
-  let isBranch = 0;
+  let isBranch = 1;
   let hasSideEffects = 1;
 }
 
@@ -267,14 +264,6 @@ def SI_END_CF : CFPseudoInstSI <
   let mayStore = 1;
 }
 
-def SI_BREAK : CFPseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$src),
-  [(set i64:$dst, (int_amdgcn_break i64:$src))], 1> {
-  let Size = 4;
-  let isAsCheapAsAMove = 1;
-  let isReMaterializable = 1;
-}
-
 def SI_IF_BREAK : CFPseudoInstSI <
   (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
   [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
@@ -283,14 +272,6 @@ def SI_IF_BREAK : CFPseudoInstSI <
   let isReMaterializable = 1;
 }
 
-def SI_ELSE_BREAK : CFPseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1),
-  [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]> {
-  let Size = 4;
-  let isAsCheapAsAMove = 1;
-  let isReMaterializable = 1;
-}
-
 let Uses = [EXEC] in {
 
 multiclass PseudoInstKill <dag ins> {
@@ -326,6 +307,7 @@ def SI_ILLEGAL_COPY : SPseudoInstSI <
 def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
   let isTerminator = 1;
   let usesCustomInserter = 1;
+  let isBranch = 1;
 }
 
 def SI_PS_LIVE : PseudoInstSI <
@@ -598,7 +580,13 @@ def : Pat <
   (int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm<f32>:$imm, cond:$cond))),
   (SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
 >;
-// TODO: we could add more variants for other types of conditionals
+
+  // TODO: we could add more variants for other types of conditionals
+
+def : Pat <
+  (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+  (COPY $src) // Return the SGPRs representing i1 src
+>;
 
 //===----------------------------------------------------------------------===//
 // VOP1 Patterns
@@ -730,12 +718,14 @@ defm : SelectPat <i32, V_CNDMASK_B32_e64>;
 defm : SelectPat <f16, V_CNDMASK_B32_e64>;
 defm : SelectPat <f32, V_CNDMASK_B32_e64>;
 
+let AddedComplexity = 1 in {
 def : GCNPat <
-  (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+  (i32 (add (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)), i32:$val)),
   (V_BCNT_U32_B32_e64 $popcnt, $val)
 >;
+}
 def : GCNPat <
-  (i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)),
+  (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
   (V_BCNT_U32_B32_e64 $popcnt, $val)
 >;
 
@@ -867,6 +857,8 @@ def : BitConvert <f64, v2f32, VReg_64>;
 def : BitConvert <v2f32, f64, VReg_64>;
 def : BitConvert <f64, v2i32, VReg_64>;
 def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v4i16, v4f16, VReg_64>;
+def : BitConvert <v4f16, v4i16, VReg_64>;
 
 // FIXME: Make SGPR
 def : BitConvert <v2i32, v4f16, VReg_64>;
@@ -1324,6 +1316,38 @@ def : GCNPat <
 >;
 
 def : GCNPat <
+  (i1 (add i1:$src0, i1:$src1)),
+  (S_XOR_B64 $src0, $src1)
+>;
+
+def : GCNPat <
+  (i1 (sub i1:$src0, i1:$src1)),
+  (S_XOR_B64 $src0, $src1)
+>;
+
+let AddedComplexity = 1 in {
+def : GCNPat <
+  (i1 (add i1:$src0, (i1 -1))),
+  (S_NOT_B64 $src0)
+>;
+
+def : GCNPat <
+  (i1 (sub i1:$src0, (i1 -1))),
+  (S_NOT_B64 $src0)
+>;
+}
+
+def : GCNPat <
+  (f16 (sint_to_fp i1:$src)),
+  (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+>;
+
+def : GCNPat <
+  (f16 (uint_to_fp i1:$src)),
+  (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+>;
+
+def : GCNPat <
   (f32 (sint_to_fp i1:$src)),
   (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
 >;
@@ -1464,13 +1488,32 @@ class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPa
 def : ExpPattern<AMDGPUexport, i32, EXP>;
 def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
 
-// COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs
+// COPY is workaround tablegen bug from multiple outputs
 // from S_LSHL_B32's multiple outputs from implicit scc def.
 def : GCNPat <
   (v2i16 (build_vector (i16 0), i16:$src1)),
-  (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0))
+  (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
+>;
+
+def : GCNPat <
+  (v2i16 (build_vector i16:$src0, (i16 undef))),
+  (v2i16 (COPY $src0))
+>;
+
+def : GCNPat <
+  (v2f16 (build_vector f16:$src0, (f16 undef))),
+  (v2f16 (COPY $src0))
+>;
+
+def : GCNPat <
+  (v2i16 (build_vector (i16 undef), i16:$src1)),
+  (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
 >;
 
+def : GCNPat <
+  (v2f16 (build_vector (f16 undef), f16:$src1)),
+  (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
+>;
 
 let SubtargetPredicate = HasVOP3PInsts in {
 def : GCNPat <
@@ -1501,15 +1544,15 @@ def : GCNPat <
 } // End SubtargetPredicate = HasVOP3PInsts
 
 
-// def : GCNPat <
-//   (v2f16 (scalar_to_vector f16:$src0)),
-//   (COPY $src0)
-// >;
+def : GCNPat <
+  (v2f16 (scalar_to_vector f16:$src0)),
+  (COPY $src0)
+>;
 
-// def : GCNPat <
-//   (v2i16 (scalar_to_vector i16:$src0)),
-//   (COPY $src0)
-// >;
+def : GCNPat <
+  (v2i16 (scalar_to_vector i16:$src0)),
+  (COPY $src0)
+>;
 
 def : GCNPat <
   (v4i16 (scalar_to_vector i16:$src0)),
@@ -1587,18 +1630,19 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
 defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
 defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
 
-def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
-def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
+defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
+defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
 
 }
 
 // This matches 16 permutations of
 // max(min(x, y), min(max(x, y), z))
 class FPMed3Pat<ValueType vt,
+                //SDPatternOperator max, SDPatternOperator min,
                 Instruction med3Inst> : GCNPat<
-  (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+  (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
                            (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
-           (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+           (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
                                            (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
                            (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
   (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
@@ -1606,28 +1650,41 @@ class FPMed3Pat<ValueType vt,
 
 class FP16Med3Pat<ValueType vt,
                 Instruction med3Inst> : GCNPat<
-  (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
-                           (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
-           (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
-                                           (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+  (fmaxnum_like (fminnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+                                     (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+           (fminnum_like_oneuse (fmaxnum_like_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+                                                     (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
                            (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
   (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
 >;
 
-class Int16Med3Pat<Instruction med3Inst,
+multiclass Int16Med3Pat<Instruction med3Inst,
+                   SDPatternOperator min,
                    SDPatternOperator max,
                    SDPatternOperator max_oneuse,
                    SDPatternOperator min_oneuse,
-                   ValueType vt = i32> : GCNPat<
+                   ValueType vt = i16> {
+  // This matches 16 permutations of
+  // max(min(x, y), min(max(x, y), z))
+  def : GCNPat <
   (max (min_oneuse vt:$src0, vt:$src1),
        (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
   (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
 >;
 
+  // This matches 16 permutations of
+  // min(max(a, b), max(min(a, b), c))
+  def : GCNPat <
+  (min (max_oneuse vt:$src0, vt:$src1),
+      (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+  (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+}
+
 def : FPMed3Pat<f32, V_MED3_F32>;
 
 let OtherPredicates = [isGFX9] in {
 def : FP16Med3Pat<f16, V_MED3_F16>;
-def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
-def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
+defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
 } // End Predicates = [isGFX9]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
index 7b7cf1635050..e51ff4b4bc50 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
@@ -16,36 +16,4 @@
 let TargetPrefix = "SI", isTarget = 1 in {
   def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
-  def int_SI_tbuffer_store : Intrinsic <
-    [],
-    [llvm_anyint_ty, // rsrc(SGPR)
-     llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
-     llvm_i32_ty,    // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
-     llvm_i32_ty,    // vaddr(VGPR)
-     llvm_i32_ty,    // soffset(SGPR)
-     llvm_i32_ty,    // inst_offset(imm)
-     llvm_i32_ty,    // dfmt(imm)
-     llvm_i32_ty,    // nfmt(imm)
-     llvm_i32_ty,    // offen(imm)
-     llvm_i32_ty,    // idxen(imm)
-     llvm_i32_ty,    // glc(imm)
-     llvm_i32_ty,    // slc(imm)
-     llvm_i32_ty],   // tfe(imm)
-    []>;
-
-  // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
-  def int_SI_buffer_load_dword : Intrinsic <
-    [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
-    [llvm_anyint_ty,  // rsrc(SGPR)
-     llvm_anyint_ty,  // vaddr(VGPR)
-     llvm_i32_ty,     // soffset(SGPR)
-     llvm_i32_ty,     // inst_offset(imm)
-     llvm_i32_ty,     // offen(imm)
-     llvm_i32_ty,     // idxen(imm)
-     llvm_i32_ty,     // glc(imm)
-     llvm_i32_ty,     // slc(imm)
-     llvm_i32_ty],    // tfe(imm)
-    [IntrReadMem, IntrArgMemOnly]>;
-
 } // End TargetPrefix = "SI", isTarget = 1
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4b537540046f..be291b127301 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -20,6 +20,26 @@
 // ==>
 //  s_buffer_load_dwordx2 s[4:5], s[0:3], 4
 //
+// This pass also tries to promote constant offset to the immediate by
+// adjusting the base. It tries to use a base from the nearby instructions that
+// allows it to have a 13bit constant offset and then promotes the 13bit offset
+// to the immediate.
+// E.g.
+//  s_movk_i32 s0, 0x1800
+//  v_add_co_u32_e32 v0, vcc, s0, v2
+//  v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
+//
+//  s_movk_i32 s0, 0x1000
+//  v_add_co_u32_e32 v5, vcc, s0, v2
+//  v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+//  global_load_dwordx2 v[5:6], v[5:6], off
+//  global_load_dwordx2 v[0:1], v[0:1], off
+// =>
+//  s_movk_i32 s0, 0x1000
+//  v_add_co_u32_e32 v5, vcc, s0, v2
+//  v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+//  global_load_dwordx2 v[5:6], v[5:6], off
+//  global_load_dwordx2 v[0:1], v[5:6], off offset:2048
 //
 // Future improvements:
 //
@@ -43,9 +63,9 @@
 
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
@@ -74,23 +94,38 @@ using namespace llvm;
 #define DEBUG_TYPE "si-load-store-opt"
 
 namespace {
+enum InstClassEnum {
+  UNKNOWN,
+  DS_READ,
+  DS_WRITE,
+  S_BUFFER_LOAD_IMM,
+  BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN,
+  BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+  BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN,
+  BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+  BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact,
+  BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact,
+  BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact,
+  BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact,
+};
 
-class SILoadStoreOptimizer : public MachineFunctionPass {
-  enum InstClassEnum {
-    DS_READ_WRITE,
-    S_BUFFER_LOAD_IMM,
-    BUFFER_LOAD_OFFEN,
-    BUFFER_LOAD_OFFSET,
-    BUFFER_STORE_OFFEN,
-    BUFFER_STORE_OFFSET,
-  };
+enum RegisterEnum {
+  SBASE = 0x1,
+  SRSRC = 0x2,
+  SOFFSET = 0x4,
+  VADDR = 0x8,
+  ADDR = 0x10,
+};
 
+class SILoadStoreOptimizer : public MachineFunctionPass {
   struct CombineInfo {
     MachineBasicBlock::iterator I;
     MachineBasicBlock::iterator Paired;
     unsigned EltSize;
     unsigned Offset0;
     unsigned Offset1;
+    unsigned Width0;
+    unsigned Width1;
     unsigned BaseOff;
     InstClassEnum InstClass;
     bool GLC0;
@@ -98,9 +133,23 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
     bool SLC0;
     bool SLC1;
     bool UseST64;
-    bool IsX2;
-    SmallVector<MachineInstr*, 8> InstsToMove;
-   };
+    SmallVector<MachineInstr *, 8> InstsToMove;
+  };
+
+  struct BaseRegisters {
+    unsigned LoReg = 0;
+    unsigned HiReg = 0;
+
+    unsigned LoSubReg = 0;
+    unsigned HiSubReg = 0;
+  };
+
+  struct MemAddress {
+    BaseRegisters Base;
+    int64_t Offset = 0;
+  };
+
+  using MemInfoMap = DenseMap<MachineInstr *, MemAddress>;
 
 private:
   const GCNSubtarget *STM = nullptr;
@@ -108,9 +157,16 @@ private:
   const SIRegisterInfo *TRI = nullptr;
   MachineRegisterInfo *MRI = nullptr;
   AliasAnalysis *AA = nullptr;
-  unsigned CreatedX2;
+  bool OptimizeAgain;
 
   static bool offsetsCanBeCombined(CombineInfo &CI);
+  static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
+  static unsigned getNewOpcode(const CombineInfo &CI);
+  static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
+  const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
+  unsigned getOpcodeWidth(const MachineInstr &MI);
+  InstClassEnum getInstClass(unsigned Opc);
+  unsigned getRegs(unsigned Opc);
 
   bool findMatchingInst(CombineInfo &CI);
 
@@ -123,10 +179,21 @@ private:
   MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
   MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
   MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
-  unsigned promoteBufferStoreOpcode(const MachineInstr &I, bool &IsX2,
-                                    bool &IsOffen) const;
   MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
 
+  void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
+                           int32_t NewOffset);
+  unsigned computeBase(MachineInstr &MI, const MemAddress &Addr);
+  MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI);
+  Optional<int32_t> extractConstOffset(const MachineOperand &Op);
+  void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr);
+  /// Promotes constant offset to the immediate by adjusting the base. It
+  /// tries to use a base from the nearby instructions that allows it to have
+  /// a 13bit constant offset which gets promoted to the immediate.
+  bool promoteConstantOffsetToImm(MachineInstr &CI,
+                                  MemInfoMap &Visited,
+                                  SmallPtrSet<MachineInstr *, 4> &Promoted);
+
 public:
   static char ID;
 
@@ -153,8 +220,8 @@ public:
 INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
                       "SI Load Store Optimizer", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
-                    "SI Load Store Optimizer", false, false)
+INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE, "SI Load Store Optimizer",
+                    false, false)
 
 char SILoadStoreOptimizer::ID = 0;
 
@@ -165,7 +232,7 @@ FunctionPass *llvm::createSILoadStoreOptimizerPass() {
 }
 
 static void moveInstsAfter(MachineBasicBlock::iterator I,
-                           ArrayRef<MachineInstr*> InstsToMove) {
+                           ArrayRef<MachineInstr *> InstsToMove) {
   MachineBasicBlock *MBB = I->getParent();
   ++I;
   for (MachineInstr *MI : InstsToMove) {
@@ -191,21 +258,19 @@ static void addDefsUsesToList(const MachineInstr &MI,
 static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
                                       MachineBasicBlock::iterator B,
                                       const SIInstrInfo *TII,
-                                      AliasAnalysis * AA) {
+                                      AliasAnalysis *AA) {
   // RAW or WAR - cannot reorder
   // WAW - cannot reorder
   // RAR - safe to reorder
   return !(A->mayStore() || B->mayStore()) ||
-    TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
+         TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
 }
 
 // Add MI and its defs to the lists if MI reads one of the defs that are
 // already in the list. Returns true in that case.
-static bool
-addToListsIfDependent(MachineInstr &MI,
-                      DenseSet<unsigned> &RegDefs,
-                      DenseSet<unsigned> &PhysRegUses,
-                      SmallVectorImpl<MachineInstr*> &Insts) {
+static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
+                                  DenseSet<unsigned> &PhysRegUses,
+                                  SmallVectorImpl<MachineInstr *> &Insts) {
   for (MachineOperand &Use : MI.operands()) {
     // If one of the defs is read, then there is a use of Def between I and the
     // instruction that I will potentially be merged with. We will need to move
@@ -228,18 +293,16 @@ addToListsIfDependent(MachineInstr &MI,
   return false;
 }
 
-static bool
-canMoveInstsAcrossMemOp(MachineInstr &MemOp,
-                        ArrayRef<MachineInstr*> InstsToMove,
-                        const SIInstrInfo *TII,
-                        AliasAnalysis *AA) {
+static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
+                                    ArrayRef<MachineInstr *> InstsToMove,
+                                    const SIInstrInfo *TII, AliasAnalysis *AA) {
   assert(MemOp.mayLoadOrStore());
 
   for (MachineInstr *InstToMove : InstsToMove) {
     if (!InstToMove->mayLoadOrStore())
       continue;
     if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
-        return false;
+      return false;
   }
   return true;
 }
@@ -260,10 +323,9 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
   CI.BaseOff = 0;
 
   // Handle SMEM and VMEM instructions.
-  if (CI.InstClass != DS_READ_WRITE) {
-    unsigned Diff = CI.IsX2 ? 2 : 1;
-    return (EltOffset0 + Diff == EltOffset1 ||
-            EltOffset1 + Diff == EltOffset0) &&
+  if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
+    return (EltOffset0 + CI.Width0 == EltOffset1 ||
+            EltOffset1 + CI.Width1 == EltOffset0) &&
            CI.GLC0 == CI.GLC1 &&
            (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
   }
@@ -305,42 +367,176 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
   return false;
 }
 
+bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
+                                     const CombineInfo &CI) {
+  const unsigned Width = (CI.Width0 + CI.Width1);
+  switch (CI.InstClass) {
+  default:
+    return (Width <= 4) && (STM.hasDwordx3LoadStores() || (Width != 3));
+  case S_BUFFER_LOAD_IMM:
+    switch (Width) {
+    default:
+      return false;
+    case 2:
+    case 4:
+      return true;
+    }
+  }
+}
+
+unsigned SILoadStoreOptimizer::getOpcodeWidth(const MachineInstr &MI) {
+  const unsigned Opc = MI.getOpcode();
+
+  if (TII->isMUBUF(MI)) {
+    return AMDGPU::getMUBUFDwords(Opc);
+  }
+
+  switch (Opc) {
+  default:
+    return 0;
+  case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+    return 1;
+  case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+    return 2;
+  case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+    return 4;
+  }
+}
+
+InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) {
+  if (TII->isMUBUF(Opc)) {
+    const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc);
+
+    // If we couldn't identify the opcode, bail out.
+    if (baseOpcode == -1) {
+      return UNKNOWN;
+    }
+
+    switch (baseOpcode) {
+    default:
+      return UNKNOWN;
+    case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
+      return BUFFER_LOAD_OFFEN;
+    case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
+      return BUFFER_LOAD_OFFSET;
+    case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
+      return BUFFER_STORE_OFFEN;
+    case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
+      return BUFFER_STORE_OFFSET;
+    case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact:
+      return BUFFER_LOAD_OFFEN_exact;
+    case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact:
+      return BUFFER_LOAD_OFFSET_exact;
+    case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
+      return BUFFER_STORE_OFFEN_exact;
+    case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
+      return BUFFER_STORE_OFFSET_exact;
+    }
+  }
+
+  switch (Opc) {
+  default:
+    return UNKNOWN;
+  case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+  case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+  case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+    return S_BUFFER_LOAD_IMM;
+  case AMDGPU::DS_READ_B32:
+  case AMDGPU::DS_READ_B64:
+  case AMDGPU::DS_READ_B32_gfx9:
+  case AMDGPU::DS_READ_B64_gfx9:
+    return DS_READ;
+  case AMDGPU::DS_WRITE_B32:
+  case AMDGPU::DS_WRITE_B64:
+  case AMDGPU::DS_WRITE_B32_gfx9:
+  case AMDGPU::DS_WRITE_B64_gfx9:
+    return DS_WRITE;
+  }
+}
+
+unsigned SILoadStoreOptimizer::getRegs(unsigned Opc) {
+  if (TII->isMUBUF(Opc)) {
+    unsigned result = 0;
+
+    if (AMDGPU::getMUBUFHasVAddr(Opc)) {
+      result |= VADDR;
+    }
+
+    if (AMDGPU::getMUBUFHasSrsrc(Opc)) {
+      result |= SRSRC;
+    }
+
+    if (AMDGPU::getMUBUFHasSoffset(Opc)) {
+      result |= SOFFSET;
+    }
+
+    return result;
+  }
+
+  switch (Opc) {
+  default:
+    return 0;
+  case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+  case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+  case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+    return SBASE;
+  case AMDGPU::DS_READ_B32:
+  case AMDGPU::DS_READ_B64:
+  case AMDGPU::DS_READ_B32_gfx9:
+  case AMDGPU::DS_READ_B64_gfx9:
+  case AMDGPU::DS_WRITE_B32:
+  case AMDGPU::DS_WRITE_B64:
+  case AMDGPU::DS_WRITE_B32_gfx9:
+  case AMDGPU::DS_WRITE_B64_gfx9:
+    return ADDR;
+  }
+}
+
 bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
   MachineBasicBlock *MBB = CI.I->getParent();
   MachineBasicBlock::iterator E = MBB->end();
   MachineBasicBlock::iterator MBBI = CI.I;
 
-  unsigned AddrOpName[3] = {0};
-  int AddrIdx[3];
-  const MachineOperand *AddrReg[3];
+  const unsigned Opc = CI.I->getOpcode();
+  const InstClassEnum InstClass = getInstClass(Opc);
+
+  if (InstClass == UNKNOWN) {
+    return false;
+  }
+
+  const unsigned Regs = getRegs(Opc);
+
+  unsigned AddrOpName[5] = {0};
+  int AddrIdx[5];
+  const MachineOperand *AddrReg[5];
   unsigned NumAddresses = 0;
 
-  switch (CI.InstClass) {
-  case DS_READ_WRITE:
+  if (Regs & ADDR) {
     AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
-    break;
-  case S_BUFFER_LOAD_IMM:
+  }
+
+  if (Regs & SBASE) {
     AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
-    break;
-  case BUFFER_LOAD_OFFEN:
-  case BUFFER_STORE_OFFEN:
-    AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
-    AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
-    AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
-    break;
-  case BUFFER_LOAD_OFFSET:
-  case BUFFER_STORE_OFFSET:
+  }
+
+  if (Regs & SRSRC) {
     AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
+  }
+
+  if (Regs & SOFFSET) {
     AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
-    break;
+  }
+
+  if (Regs & VADDR) {
+    AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
   }
 
   for (unsigned i = 0; i < NumAddresses; i++) {
     AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]);
     AddrReg[i] = &CI.I->getOperand(AddrIdx[i]);
 
-    // We only ever merge operations with the same base address register, so don't
-    // bother scanning forward if there are no other uses.
+    // We only ever merge operations with the same base address register, so
+    // don't bother scanning forward if there are no other uses.
     if (AddrReg[i]->isReg() &&
         (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) ||
          MRI->hasOneNonDBGUse(AddrReg[i]->getReg())))
@@ -353,8 +549,11 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
   DenseSet<unsigned> PhysRegUsesToMove;
   addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
 
-  for ( ; MBBI != E; ++MBBI) {
-    if (MBBI->getOpcode() != CI.I->getOpcode()) {
+  for (; MBBI != E; ++MBBI) {
+    const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE);
+
+    if ((getInstClass(MBBI->getOpcode()) != InstClass) ||
+        (IsDS && (MBBI->getOpcode() != Opc))) {
       // This is not a matching DS instruction, but we can keep looking as
       // long as one of these conditions are met:
       // 1. It is safe to move I down past MBBI.
@@ -368,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
       }
 
       if (MBBI->mayLoadOrStore() &&
-        (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
-         !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
+          (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
+           !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
         // We fail condition #1, but we may still be able to satisfy condition
         // #2.  Add this instruction to the move list and then we will check
         // if condition #2 holds once we have selected the matching instruction.
@@ -413,8 +612,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
         continue;
       }
 
-      // Check same base pointer. Be careful of subregisters, which can occur with
-      // vectors of pointers.
+      // Check same base pointer. Be careful of subregisters, which can occur
+      // with vectors of pointers.
       if (AddrReg[i]->getReg() != AddrRegNext.getReg() ||
           AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) {
         Match = false;
@@ -423,13 +622,15 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
     }
 
     if (Match) {
-      int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
-                                                 AMDGPU::OpName::offset);
+      int OffsetIdx =
+          AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset);
       CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm();
+      CI.Width0 = getOpcodeWidth(*CI.I);
       CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm();
+      CI.Width1 = getOpcodeWidth(*MBBI);
       CI.Paired = MBBI;
 
-      if (CI.InstClass == DS_READ_WRITE) {
+      if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) {
         CI.Offset0 &= 0xffff;
         CI.Offset1 &= 0xffff;
       } else {
@@ -445,7 +646,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
       // We also need to go through the list of instructions that we plan to
       // move and make sure they are all safe to move down past the merged
       // instruction.
-      if (offsetsCanBeCombined(CI))
+      if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
         if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
           return true;
     }
@@ -472,12 +673,12 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
   if (STM->ldsRequiresM0Init())
     return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
 
-  return (EltSize == 4) ?
-    AMDGPU::DS_READ2ST64_B32_gfx9 : AMDGPU::DS_READ2ST64_B64_gfx9;
+  return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32_gfx9
+                        : AMDGPU::DS_READ2ST64_B64_gfx9;
 }
 
-MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
-  CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
   MachineBasicBlock *MBB = CI.I->getParent();
 
   // Be careful, since the addresses could be subregisters themselves in weird
@@ -489,8 +690,8 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
 
   unsigned NewOffset0 = CI.Offset0;
   unsigned NewOffset1 = CI.Offset1;
-  unsigned Opc = CI.UseST64 ?
-    read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
+  unsigned Opc =
+      CI.UseST64 ? read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
 
   unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
   unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
@@ -502,39 +703,40 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
   }
 
   assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
-         (NewOffset0 != NewOffset1) &&
-         "Computed offset doesn't fit");
+         (NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
 
   const MCInstrDesc &Read2Desc = TII->get(Opc);
 
-  const TargetRegisterClass *SuperRC
-    = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+  const TargetRegisterClass *SuperRC =
+      (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
   unsigned DestReg = MRI->createVirtualRegister(SuperRC);
 
   DebugLoc DL = CI.I->getDebugLoc();
 
   unsigned BaseReg = AddrReg->getReg();
+  unsigned BaseSubReg = AddrReg->getSubReg();
   unsigned BaseRegFlags = 0;
   if (CI.BaseOff) {
     unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
     BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
-      .addImm(CI.BaseOff);
+        .addImm(CI.BaseOff);
 
     BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     BaseRegFlags = RegState::Kill;
 
     TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
-      .addReg(ImmReg)
-      .addReg(AddrReg->getReg());
+        .addReg(ImmReg)
+        .addReg(AddrReg->getReg(), 0, BaseSubReg);
+    BaseSubReg = 0;
   }
 
   MachineInstrBuilder Read2 =
-    BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
-      .addReg(BaseReg, BaseRegFlags) // addr
-      .addImm(NewOffset0)            // offset0
-      .addImm(NewOffset1)            // offset1
-      .addImm(0)                     // gds
-      .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+      BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
+          .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+          .addImm(NewOffset0)                        // offset0
+          .addImm(NewOffset1)                        // offset1
+          .addImm(0)                                 // gds
+          .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   (void)Read2;
 
@@ -561,32 +763,36 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
 unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const {
   if (STM->ldsRequiresM0Init())
     return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
-  return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9 : AMDGPU::DS_WRITE2_B64_gfx9;
+  return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9
+                        : AMDGPU::DS_WRITE2_B64_gfx9;
 }
 
 unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
   if (STM->ldsRequiresM0Init())
-    return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+    return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32
+                          : AMDGPU::DS_WRITE2ST64_B64;
 
-  return (EltSize == 4) ?
-    AMDGPU::DS_WRITE2ST64_B32_gfx9 : AMDGPU::DS_WRITE2ST64_B64_gfx9;
+  return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32_gfx9
+                        : AMDGPU::DS_WRITE2ST64_B64_gfx9;
 }
 
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
-  CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
   MachineBasicBlock *MBB = CI.I->getParent();
 
   // Be sure to use .addOperand(), and not .addReg() with these. We want to be
   // sure we preserve the subregister index and any register flags set on them.
-  const MachineOperand *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
-  const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
-  const MachineOperand *Data1
-    = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
+  const MachineOperand *AddrReg =
+      TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+  const MachineOperand *Data0 =
+      TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
+  const MachineOperand *Data1 =
+      TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
 
   unsigned NewOffset0 = CI.Offset0;
   unsigned NewOffset1 = CI.Offset1;
-  unsigned Opc = CI.UseST64 ?
-    write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
+  unsigned Opc =
+      CI.UseST64 ? write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
 
   if (NewOffset0 > NewOffset1) {
     // Canonicalize the merged instruction so the smaller offset comes first.
@@ -595,36 +801,37 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
   }
 
   assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
-         (NewOffset0 != NewOffset1) &&
-         "Computed offset doesn't fit");
+         (NewOffset0 != NewOffset1) && "Computed offset doesn't fit");
 
   const MCInstrDesc &Write2Desc = TII->get(Opc);
   DebugLoc DL = CI.I->getDebugLoc();
 
   unsigned BaseReg = AddrReg->getReg();
+  unsigned BaseSubReg = AddrReg->getSubReg();
   unsigned BaseRegFlags = 0;
   if (CI.BaseOff) {
     unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
     BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
-      .addImm(CI.BaseOff);
+        .addImm(CI.BaseOff);
 
     BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     BaseRegFlags = RegState::Kill;
 
     TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
-      .addReg(ImmReg)
-      .addReg(AddrReg->getReg());
+        .addReg(ImmReg)
+        .addReg(AddrReg->getReg(), 0, BaseSubReg);
+    BaseSubReg = 0;
   }
 
   MachineInstrBuilder Write2 =
-    BuildMI(*MBB, CI.Paired, DL, Write2Desc)
-      .addReg(BaseReg, BaseRegFlags) // addr
-      .add(*Data0)                   // data0
-      .add(*Data1)                   // data1
-      .addImm(NewOffset0)            // offset0
-      .addImm(NewOffset1)            // offset1
-      .addImm(0)                     // gds
-      .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+      BuildMI(*MBB, CI.Paired, DL, Write2Desc)
+          .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+          .add(*Data0)                               // data0
+          .add(*Data1)                               // data1
+          .addImm(NewOffset0)                        // offset0
+          .addImm(NewOffset1)                        // offset1
+          .addImm(0)                                 // gds
+          .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   moveInstsAfter(Write2, CI.InstsToMove);
 
@@ -636,15 +843,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
   return Next;
 }
 
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
-  CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
   MachineBasicBlock *MBB = CI.I->getParent();
   DebugLoc DL = CI.I->getDebugLoc();
-  unsigned Opcode = CI.IsX2 ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM :
-                              AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+  const unsigned Opcode = getNewOpcode(CI);
+
+  const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
 
-  const TargetRegisterClass *SuperRC =
-    CI.IsX2 ? &AMDGPU::SReg_128RegClass : &AMDGPU::SReg_64_XEXECRegClass;
   unsigned DestReg = MRI->createVirtualRegister(SuperRC);
   unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
 
@@ -652,14 +858,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
       .addImm(MergedOffset) // offset
       .addImm(CI.GLC0)      // glc
-      .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
-
-  unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
-  unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+      .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
-  // Handle descending offsets
-  if (CI.Offset0 > CI.Offset1)
-    std::swap(SubRegIdx0, SubRegIdx1);
+  std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+  const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+  const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
 
   // Copy to the old destination registers.
   const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
@@ -681,29 +884,25 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
   return Next;
 }
 
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
-  CombineInfo &CI) {
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
   MachineBasicBlock *MBB = CI.I->getParent();
   DebugLoc DL = CI.I->getDebugLoc();
-  unsigned Opcode;
 
-  if (CI.InstClass == BUFFER_LOAD_OFFEN) {
-    Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN :
-                       AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
-  } else {
-    Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET :
-                       AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
-  }
+  const unsigned Opcode = getNewOpcode(CI);
 
-  const TargetRegisterClass *SuperRC =
-    CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
+  const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+
+  // Copy to the new source register.
   unsigned DestReg = MRI->createVirtualRegister(SuperRC);
   unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
 
   auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
 
-  if (CI.InstClass == BUFFER_LOAD_OFFEN)
-      MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+  const unsigned Regs = getRegs(Opcode);
+
+  if (Regs & VADDR)
+    MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
 
   MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -711,14 +910,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
       .addImm(CI.GLC0)      // glc
       .addImm(CI.SLC0)      // slc
       .addImm(0)            // tfe
-      .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
-
-  unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
-  unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+      .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
-  // Handle descending offsets
-  if (CI.Offset0 > CI.Offset1)
-    std::swap(SubRegIdx0, SubRegIdx1);
+  std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+  const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+  const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
 
   // Copy to the old destination registers.
   const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
@@ -740,57 +936,137 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
   return Next;
 }
 
-unsigned SILoadStoreOptimizer::promoteBufferStoreOpcode(
-  const MachineInstr &I, bool &IsX2, bool &IsOffen) const {
-  IsX2 = false;
-  IsOffen = false;
-
-  switch (I.getOpcode()) {
-  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
-    IsOffen = true;
-    return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
-  case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact:
-    IsOffen = true;
-    return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact;
-  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
-    IsX2 = true;
-    IsOffen = true;
-    return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
-  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN_exact:
-    IsX2 = true;
-    IsOffen = true;
-    return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN_exact;
-  case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
-    return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
-  case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact:
-    return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact;
-  case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
-    IsX2 = true;
-    return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
-  case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET_exact:
-    IsX2 = true;
-    return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET_exact;
-  }
-  return 0;
-}
-
-MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
-  CombineInfo &CI) {
+unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
+  const unsigned Width = CI.Width0 + CI.Width1;
+
+  switch (CI.InstClass) {
+  default:
+    return AMDGPU::getMUBUFOpcode(CI.InstClass, Width);
+  case UNKNOWN:
+    llvm_unreachable("Unknown instruction class");
+  case S_BUFFER_LOAD_IMM:
+    switch (Width) {
+    default:
+      return 0;
+    case 2:
+      return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
+    case 4:
+      return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
+    }
+  }
+}
+
+std::pair<unsigned, unsigned>
+SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) {
+  if (CI.Offset0 > CI.Offset1) {
+    switch (CI.Width0) {
+    default:
+      return std::make_pair(0, 0);
+    case 1:
+      switch (CI.Width1) {
+      default:
+        return std::make_pair(0, 0);
+      case 1:
+        return std::make_pair(AMDGPU::sub1, AMDGPU::sub0);
+      case 2:
+        return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1);
+      case 3:
+        return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2);
+      }
+    case 2:
+      switch (CI.Width1) {
+      default:
+        return std::make_pair(0, 0);
+      case 1:
+        return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0);
+      case 2:
+        return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1);
+      }
+    case 3:
+      switch (CI.Width1) {
+      default:
+        return std::make_pair(0, 0);
+      case 1:
+        return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0);
+      }
+    }
+  } else {
+    switch (CI.Width0) {
+    default:
+      return std::make_pair(0, 0);
+    case 1:
+      switch (CI.Width1) {
+      default:
+        return std::make_pair(0, 0);
+      case 1:
+        return std::make_pair(AMDGPU::sub0, AMDGPU::sub1);
+      case 2:
+        return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2);
+      case 3:
+        return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3);
+      }
+    case 2:
+      switch (CI.Width1) {
+      default:
+        return std::make_pair(0, 0);
+      case 1:
+        return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2);
+      case 2:
+        return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3);
+      }
+    case 3:
+      switch (CI.Width1) {
+      default:
+        return std::make_pair(0, 0);
+      case 1:
+        return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3);
+      }
+    }
+  }
+}
+
+const TargetRegisterClass *
+SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) {
+  if (CI.InstClass == S_BUFFER_LOAD_IMM) {
+    switch (CI.Width0 + CI.Width1) {
+    default:
+      return nullptr;
+    case 2:
+      return &AMDGPU::SReg_64_XEXECRegClass;
+    case 4:
+      return &AMDGPU::SReg_128RegClass;
+    case 8:
+      return &AMDGPU::SReg_256RegClass;
+    case 16:
+      return &AMDGPU::SReg_512RegClass;
+    }
+  } else {
+    switch (CI.Width0 + CI.Width1) {
+    default:
+      return nullptr;
+    case 2:
+      return &AMDGPU::VReg_64RegClass;
+    case 3:
+      return &AMDGPU::VReg_96RegClass;
+    case 4:
+      return &AMDGPU::VReg_128RegClass;
+    }
+  }
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
   MachineBasicBlock *MBB = CI.I->getParent();
   DebugLoc DL = CI.I->getDebugLoc();
-  bool Unused1, Unused2;
-  unsigned Opcode = promoteBufferStoreOpcode(*CI.I, Unused1, Unused2);
 
-  unsigned SubRegIdx0 = CI.IsX2 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
-  unsigned SubRegIdx1 = CI.IsX2 ? AMDGPU::sub2_sub3 : AMDGPU::sub1;
+  const unsigned Opcode = getNewOpcode(CI);
 
-  // Handle descending offsets
-  if (CI.Offset0 > CI.Offset1)
-    std::swap(SubRegIdx0, SubRegIdx1);
+  std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+  const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+  const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
 
   // Copy to the new source register.
-  const TargetRegisterClass *SuperRC =
-    CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass;
+  const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
   unsigned SrcReg = MRI->createVirtualRegister(SuperRC);
 
   const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
@@ -803,18 +1079,20 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
       .addImm(SubRegIdx1);
 
   auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
-      .addReg(SrcReg, RegState::Kill);
+                 .addReg(SrcReg, RegState::Kill);
 
-  if (CI.InstClass == BUFFER_STORE_OFFEN)
-      MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+  const unsigned Regs = getRegs(Opcode);
+
+  if (Regs & VADDR)
+    MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
 
   MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
       .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
-      .addImm(CI.GLC0)      // glc
-      .addImm(CI.SLC0)      // slc
-      .addImm(0)            // tfe
-      .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+      .addImm(CI.GLC0)                          // glc
+      .addImm(CI.SLC0)                          // slc
+      .addImm(0)                                // tfe
+      .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   moveInstsAfter(MIB, CI.InstsToMove);
 
@@ -824,105 +1102,399 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
   return Next;
 }
 
+MachineOperand
+SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) {
+  APInt V(32, Val, true);
+  if (TII->isInlineConstant(V))
+    return MachineOperand::CreateImm(Val);
+
+  unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  MachineInstr *Mov =
+  BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+          TII->get(AMDGPU::S_MOV_B32), Reg)
+    .addImm(Val);
+  (void)Mov;
+  LLVM_DEBUG(dbgs() << "    "; Mov->dump());
+  return MachineOperand::CreateReg(Reg, false);
+}
+
+// Compute base address using Addr and return the final register.
+unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
+                                           const MemAddress &Addr) {
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineBasicBlock::iterator MBBI = MI.getIterator();
+  DebugLoc DL = MI.getDebugLoc();
+
+  assert((TRI->getRegSizeInBits(Addr.Base.LoReg, *MRI) == 32 ||
+          Addr.Base.LoSubReg) &&
+         "Expected 32-bit Base-Register-Low!!");
+
+  assert((TRI->getRegSizeInBits(Addr.Base.HiReg, *MRI) == 32 ||
+          Addr.Base.HiSubReg) &&
+         "Expected 32-bit Base-Register-Hi!!");
+
+  LLVM_DEBUG(dbgs() << "  Re-Computed Anchor-Base:\n");
+  MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
+  MachineOperand OffsetHi =
+    createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
+  unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+  unsigned DeadCarryReg =
+    MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+  unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  MachineInstr *LoHalf =
+    BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
+      .addReg(CarryReg, RegState::Define)
+      .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
+    .add(OffsetLo);
+  (void)LoHalf;
+  LLVM_DEBUG(dbgs() << "    "; LoHalf->dump(););
+
+  MachineInstr *HiHalf =
+  BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADDC_U32_e64), DestSub1)
+    .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
+    .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
+    .add(OffsetHi)
+    .addReg(CarryReg, RegState::Kill);
+  (void)HiHalf;
+  LLVM_DEBUG(dbgs() << "    "; HiHalf->dump(););
+
+  unsigned FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass);
+  MachineInstr *FullBase =
+    BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+      .addReg(DestSub0)
+      .addImm(AMDGPU::sub0)
+      .addReg(DestSub1)
+      .addImm(AMDGPU::sub1);
+  (void)FullBase;
+  LLVM_DEBUG(dbgs() << "    "; FullBase->dump(); dbgs() << "\n";);
+
+  return FullDestReg;
+}
+
+// Update base and offset with the NewBase and NewOffset in MI.
+void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
+                                               unsigned NewBase,
+                                               int32_t NewOffset) {
+  TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase);
+  TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset);
+}
+
+Optional<int32_t>
+SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) {
+  if (Op.isImm())
+    return Op.getImm();
+
+  if (!Op.isReg())
+    return None;
+
+  MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
+  if (!Def || Def->getOpcode() != AMDGPU::S_MOV_B32 ||
+      !Def->getOperand(1).isImm())
+    return None;
+
+  return Def->getOperand(1).getImm();
+}
+
+// Analyze Base and extracts:
+//  - 32bit base registers, subregisters
+//  - 64bit constant offset
+// Expecting base computation as:
+//   %OFFSET0:sgpr_32 = S_MOV_B32 8000
+//   %LO:vgpr_32, %c:sreg_64_xexec =
+//       V_ADD_I32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
+//   %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec
+//   %Base:vreg_64 =
+//       REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
+void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base,
+                                                      MemAddress &Addr) {
+  if (!Base.isReg())
+    return;
+
+  MachineInstr *Def = MRI->getUniqueVRegDef(Base.getReg());
+  if (!Def || Def->getOpcode() != AMDGPU::REG_SEQUENCE
+      || Def->getNumOperands() != 5)
+    return;
+
+  MachineOperand BaseLo = Def->getOperand(1);
+  MachineOperand BaseHi = Def->getOperand(3);
+  if (!BaseLo.isReg() || !BaseHi.isReg())
+    return;
+
+  MachineInstr *BaseLoDef = MRI->getUniqueVRegDef(BaseLo.getReg());
+  MachineInstr *BaseHiDef = MRI->getUniqueVRegDef(BaseHi.getReg());
+
+  if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_I32_e64 ||
+      !BaseHiDef || BaseHiDef->getOpcode() != AMDGPU::V_ADDC_U32_e64)
+    return;
+
+  const auto *Src0 = TII->getNamedOperand(*BaseLoDef, AMDGPU::OpName::src0);
+  const auto *Src1 = TII->getNamedOperand(*BaseLoDef, AMDGPU::OpName::src1);
+
+  auto Offset0P = extractConstOffset(*Src0);
+  if (Offset0P)
+    BaseLo = *Src1;
+  else {
+    if (!(Offset0P = extractConstOffset(*Src1)))
+      return;
+    BaseLo = *Src0;
+  }
+
+  Src0 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src0);
+  Src1 = TII->getNamedOperand(*BaseHiDef, AMDGPU::OpName::src1);
+
+  if (Src0->isImm())
+    std::swap(Src0, Src1);
+
+  if (!Src1->isImm())
+    return;
+
+  uint64_t Offset1 = Src1->getImm();
+  BaseHi = *Src0;
+
+  Addr.Base.LoReg = BaseLo.getReg();
+  Addr.Base.HiReg = BaseHi.getReg();
+  Addr.Base.LoSubReg = BaseLo.getSubReg();
+  Addr.Base.HiSubReg = BaseHi.getSubReg();
+  Addr.Offset = (*Offset0P & 0x00000000ffffffff) | (Offset1 << 32);
+}
+
+bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
+    MachineInstr &MI,
+    MemInfoMap &Visited,
+    SmallPtrSet<MachineInstr *, 4> &AnchorList) {
+
+  // TODO: Support flat and scratch.
+  if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 ||
+      TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL)
+    return false;
+
+  // TODO: Support Store.
+  if (!MI.mayLoad())
+    return false;
+
+  if (AnchorList.count(&MI))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "\nTryToPromoteConstantOffsetToImmFor "; MI.dump());
+
+  if (TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm()) {
+    LLVM_DEBUG(dbgs() << "  Const-offset is already promoted.\n";);
+    return false;
+  }
+
+  // Step1: Find the base-registers and a 64bit constant offset.
+  MachineOperand &Base = *TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+  MemAddress MAddr;
+  if (Visited.find(&MI) == Visited.end()) {
+    processBaseWithConstOffset(Base, MAddr);
+    Visited[&MI] = MAddr;
+  } else
+    MAddr = Visited[&MI];
+
+  if (MAddr.Offset == 0) {
+    LLVM_DEBUG(dbgs() << "  Failed to extract constant-offset or there are no"
+                         " constant offsets that can be promoted.\n";);
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "  BASE: {" << MAddr.Base.HiReg << ", "
+             << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset << "\n\n";);
+
+  // Step2: Traverse through MI's basic block and find an anchor(that has the
+  // same base-registers) with the highest 13bit distance from MI's offset.
+  // E.g. (64bit loads)
+  // bb:
+  //   addr1 = &a + 4096;   load1 = load(addr1,  0)
+  //   addr2 = &a + 6144;   load2 = load(addr2,  0)
+  //   addr3 = &a + 8192;   load3 = load(addr3,  0)
+  //   addr4 = &a + 10240;  load4 = load(addr4,  0)
+  //   addr5 = &a + 12288;  load5 = load(addr5,  0)
+  //
+  // Starting from the first load, the optimization will try to find a new base
+  // from which (&a + 4096) has 13 bit distance. Both &a + 6144 and &a + 8192
+  // has 13bit distance from &a + 4096. The heuristic considers &a + 8192
+  // as the new-base(anchor) because of the maximum distance which can
+  // accomodate more intermediate bases presumeably.
+  //
+  // Step3: move (&a + 8192) above load1. Compute and promote offsets from
+  // (&a + 8192) for load1, load2, load4.
+  //   addr = &a + 8192
+  //   load1 = load(addr,       -4096)
+  //   load2 = load(addr,       -2048)
+  //   load3 = load(addr,       0)
+  //   load4 = load(addr,       2048)
+  //   addr5 = &a + 12288;  load5 = load(addr5,  0)
+  //
+  MachineInstr *AnchorInst = nullptr;
+  MemAddress AnchorAddr;
+  uint32_t MaxDist = std::numeric_limits<uint32_t>::min();
+  SmallVector<std::pair<MachineInstr *, int64_t>, 4> InstsWCommonBase;
+
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineBasicBlock::iterator E = MBB->end();
+  MachineBasicBlock::iterator MBBI = MI.getIterator();
+  ++MBBI;
+  const SITargetLowering *TLI =
+    static_cast<const SITargetLowering *>(STM->getTargetLowering());
+
+  for ( ; MBBI != E; ++MBBI) {
+    MachineInstr &MINext = *MBBI;
+    // TODO: Support finding an anchor(with same base) from store addresses or
+    // any other load addresses where the opcodes are different.
+    if (MINext.getOpcode() != MI.getOpcode() ||
+        TII->getNamedOperand(MINext, AMDGPU::OpName::offset)->getImm())
+      continue;
+
+    const MachineOperand &BaseNext =
+      *TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr);
+    MemAddress MAddrNext;
+    if (Visited.find(&MINext) == Visited.end()) {
+      processBaseWithConstOffset(BaseNext, MAddrNext);
+      Visited[&MINext] = MAddrNext;
+    } else
+      MAddrNext = Visited[&MINext];
+
+    if (MAddrNext.Base.LoReg != MAddr.Base.LoReg ||
+        MAddrNext.Base.HiReg != MAddr.Base.HiReg ||
+        MAddrNext.Base.LoSubReg != MAddr.Base.LoSubReg ||
+        MAddrNext.Base.HiSubReg != MAddr.Base.HiSubReg)
+      continue;
+
+    InstsWCommonBase.push_back(std::make_pair(&MINext, MAddrNext.Offset));
+
+    int64_t Dist = MAddr.Offset - MAddrNext.Offset;
+    TargetLoweringBase::AddrMode AM;
+    AM.HasBaseReg = true;
+    AM.BaseOffs = Dist;
+    if (TLI->isLegalGlobalAddressingMode(AM) &&
+        (uint32_t)std::abs(Dist) > MaxDist) {
+      MaxDist = std::abs(Dist);
+
+      AnchorAddr = MAddrNext;
+      AnchorInst = &MINext;
+    }
+  }
+
+  if (AnchorInst) {
+    LLVM_DEBUG(dbgs() << "  Anchor-Inst(with max-distance from Offset): ";
+               AnchorInst->dump());
+    LLVM_DEBUG(dbgs() << "  Anchor-Offset from BASE: "
+               <<  AnchorAddr.Offset << "\n\n");
+
+    // Instead of moving up, just re-compute anchor-instruction's base address.
+    unsigned Base = computeBase(MI, AnchorAddr);
+
+    updateBaseAndOffset(MI, Base, MAddr.Offset - AnchorAddr.Offset);
+    LLVM_DEBUG(dbgs() << "  After promotion: "; MI.dump(););
+
+    for (auto P : InstsWCommonBase) {
+      TargetLoweringBase::AddrMode AM;
+      AM.HasBaseReg = true;
+      AM.BaseOffs = P.second - AnchorAddr.Offset;
+
+      if (TLI->isLegalGlobalAddressingMode(AM)) {
+        LLVM_DEBUG(dbgs() << "  Promote Offset(" << P.second;
+                   dbgs() << ")"; P.first->dump());
+        updateBaseAndOffset(*P.first, Base, P.second - AnchorAddr.Offset);
+        LLVM_DEBUG(dbgs() << "     After promotion: "; P.first->dump());
+      }
+    }
+    AnchorList.insert(AnchorInst);
+    return true;
+  }
+
+  return false;
+}
+
 // Scan through looking for adjacent LDS operations with constant offsets from
 // the same base register. We rely on the scheduler to do the hard work of
 // clustering nearby loads, and assume these are all adjacent.
 bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
   bool Modified = false;
 
+  // Contain the list
+  MemInfoMap Visited;
+  // Contains the list of instructions for which constant offsets are being
+  // promoted to the IMM.
+  SmallPtrSet<MachineInstr *, 4> AnchorList;
+
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
     MachineInstr &MI = *I;
 
+    if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
+      Modified = true;
+
     // Don't combine if volatile.
     if (MI.hasOrderedMemoryRef()) {
       ++I;
       continue;
     }
 
+    const unsigned Opc = MI.getOpcode();
+
     CombineInfo CI;
     CI.I = I;
-    unsigned Opc = MI.getOpcode();
-    if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64 ||
-        Opc == AMDGPU::DS_READ_B32_gfx9 || Opc == AMDGPU::DS_READ_B64_gfx9) {
+    CI.InstClass = getInstClass(Opc);
 
-      CI.InstClass = DS_READ_WRITE;
+    switch (CI.InstClass) {
+    default:
+      break;
+    case DS_READ:
       CI.EltSize =
-        (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4;
-
+          (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8
+                                                                          : 4;
       if (findMatchingInst(CI)) {
         Modified = true;
         I = mergeRead2Pair(CI);
       } else {
         ++I;
       }
-
       continue;
-    } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64 ||
-               Opc == AMDGPU::DS_WRITE_B32_gfx9 ||
-               Opc == AMDGPU::DS_WRITE_B64_gfx9) {
-      CI.InstClass = DS_READ_WRITE;
-      CI.EltSize
-        = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 : 4;
-
+    case DS_WRITE:
+      CI.EltSize =
+          (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8
+                                                                            : 4;
       if (findMatchingInst(CI)) {
         Modified = true;
         I = mergeWrite2Pair(CI);
       } else {
         ++I;
       }
-
       continue;
-    }
-    if (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
-        Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM) {
-      // EltSize is in units of the offset encoding.
-      CI.InstClass = S_BUFFER_LOAD_IMM;
+    case S_BUFFER_LOAD_IMM:
       CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);
-      CI.IsX2 = Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
       if (findMatchingInst(CI)) {
         Modified = true;
         I = mergeSBufferLoadImmPair(CI);
-        if (!CI.IsX2)
-          CreatedX2++;
+        OptimizeAgain |= (CI.Width0 + CI.Width1) < 16;
       } else {
         ++I;
       }
       continue;
-    }
-    if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
-        Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
-        Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFSET ||
-        Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET) {
-      if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN ||
-          Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN)
-        CI.InstClass = BUFFER_LOAD_OFFEN;
-      else
-        CI.InstClass = BUFFER_LOAD_OFFSET;
-
+    case BUFFER_LOAD_OFFEN:
+    case BUFFER_LOAD_OFFSET:
+    case BUFFER_LOAD_OFFEN_exact:
+    case BUFFER_LOAD_OFFSET_exact:
       CI.EltSize = 4;
-      CI.IsX2 = Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN ||
-                Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
       if (findMatchingInst(CI)) {
         Modified = true;
         I = mergeBufferLoadPair(CI);
-        if (!CI.IsX2)
-          CreatedX2++;
+        OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
       } else {
         ++I;
       }
       continue;
-    }
-
-    bool StoreIsX2, IsOffen;
-    if (promoteBufferStoreOpcode(*I, StoreIsX2, IsOffen)) {
-      CI.InstClass = IsOffen ? BUFFER_STORE_OFFEN : BUFFER_STORE_OFFSET;
+    case BUFFER_STORE_OFFEN:
+    case BUFFER_STORE_OFFSET:
+    case BUFFER_STORE_OFFEN_exact:
+    case BUFFER_STORE_OFFSET_exact:
       CI.EltSize = 4;
-      CI.IsX2 = StoreIsX2;
       if (findMatchingInst(CI)) {
         Modified = true;
         I = mergeBufferStorePair(CI);
-        if (!CI.IsX2)
-          CreatedX2++;
+        OptimizeAgain |= (CI.Width0 + CI.Width1) < 4;
       } else {
         ++I;
       }
@@ -956,12 +1528,10 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
   bool Modified = false;
 
   for (MachineBasicBlock &MBB : MF) {
-    CreatedX2 = 0;
-    Modified |= optimizeBlock(MBB);
-
-    // Run again to convert x2 to x4.
-    if (CreatedX2 >= 1)
+    do {
+      OptimizeAgain = false;
       Modified |= optimizeBlock(MBB);
+    } while (OptimizeAgain);
   }
 
   return Modified;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index ad30317c344c..1aa1feebbdae 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -85,9 +85,7 @@ private:
 
   void emitIf(MachineInstr &MI);
   void emitElse(MachineInstr &MI);
-  void emitBreak(MachineInstr &MI);
   void emitIfBreak(MachineInstr &MI);
-  void emitElseBreak(MachineInstr &MI);
   void emitLoop(MachineInstr &MI);
   void emitEndCf(MachineInstr &MI);
 
@@ -329,20 +327,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
 }
 
-void SILowerControlFlow::emitBreak(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  const DebugLoc &DL = MI.getDebugLoc();
-  unsigned Dst = MI.getOperand(0).getReg();
-
-  MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-                         .addReg(AMDGPU::EXEC)
-                         .add(MI.getOperand(1));
-
-  if (LIS)
-    LIS->ReplaceMachineInstrInMaps(MI, *Or);
-  MI.eraseFromParent();
-}
-
 void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
@@ -384,11 +368,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
-void SILowerControlFlow::emitElseBreak(MachineInstr &MI) {
-  // Lowered in the same way as emitIfBreak above.
-  emitIfBreak(MI);
-}
-
 void SILowerControlFlow::emitLoop(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
@@ -515,18 +494,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
         emitElse(MI);
         break;
 
-      case AMDGPU::SI_BREAK:
-        emitBreak(MI);
-        break;
-
       case AMDGPU::SI_IF_BREAK:
         emitIfBreak(MI);
         break;
 
-      case AMDGPU::SI_ELSE_BREAK:
-        emitElseBreak(MI);
-        break;
-
       case AMDGPU::SI_LOOP:
         emitLoop(MI);
         break;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index ecc6cff407e1..eb038bb5d5fc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -5,37 +5,61 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-/// i1 values are usually inserted by the CFG Structurize pass and they are
-/// unique in that they can be copied from VALU to SALU registers.
-/// This is not possible for any other value type.  Since there are no
-/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
-///
 //===----------------------------------------------------------------------===//
 //
+// This pass lowers all occurrences of i1 values (with a vreg_1 register class)
+// to lane masks (64-bit scalar registers). The pass assumes machine SSA form
+// and a wave-level control flow graph.
+//
+// Before this pass, values that are semantically i1 and are defined and used
+// within the same basic block are already represented as lane masks in scalar
+// registers. However, values that cross basic blocks are always transferred
+// between basic blocks in vreg_1 virtual registers and are lowered by this
+// pass.
+//
+// The only instructions that use or define vreg_1 virtual registers are COPY,
+// PHI, and IMPLICIT_DEF.
+//
+//===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "si-i1-copies"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPULaneDominator.h"
-#include "llvm/CodeGen/LiveIntervals.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetMachine.h"
 
+#define DEBUG_TYPE "si-i1-copies"
+
 using namespace llvm;
 
+static unsigned createLaneMaskReg(MachineFunction &MF);
+static unsigned insertUndefLaneMask(MachineBasicBlock &MBB);
+
 namespace {
 
 class SILowerI1Copies : public MachineFunctionPass {
 public:
   static char ID;
 
+private:
+  MachineFunction *MF = nullptr;
+  MachineDominatorTree *DT = nullptr;
+  MachinePostDominatorTree *PDT = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  const GCNSubtarget *ST = nullptr;
+  const SIInstrInfo *TII = nullptr;
+
+  DenseSet<unsigned> ConstrainRegs;
+
 public:
   SILowerI1Copies() : MachineFunctionPass(ID) {
     initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
@@ -47,14 +71,337 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
+
+private:
+  void lowerCopiesFromI1();
+  void lowerPhis();
+  void lowerCopiesToI1();
+  bool isConstantLaneMask(unsigned Reg, bool &Val) const;
+  void buildMergeLaneMasks(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, const DebugLoc &DL,
+                           unsigned DstReg, unsigned PrevReg, unsigned CurReg);
+  MachineBasicBlock::iterator
+  getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+
+  bool isLaneMaskReg(unsigned Reg) const {
+    return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
+           TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
+               ST->getWavefrontSize();
+  }
+};
+
+/// Helper class that determines the relationship between incoming values of a
+/// phi in the control flow graph to determine where an incoming value can
+/// simply be taken as a scalar lane mask as-is, and where it needs to be
+/// merged with another, previously defined lane mask.
+///
+/// The approach is as follows:
+///  - Determine all basic blocks which, starting from the incoming blocks,
+///    a wave may reach before entering the def block (the block containing the
+///    phi).
+///  - If an incoming block has no predecessors in this set, we can take the
+///    incoming value as a scalar lane mask as-is.
+///  -- A special case of this is when the def block has a self-loop.
+///  - Otherwise, the incoming value needs to be merged with a previously
+///    defined lane mask.
+///  - If there is a path into the set of reachable blocks that does _not_ go
+///    through an incoming block where we can take the scalar lane mask as-is,
+///    we need to invent an available value for the SSAUpdater. Choices are
+///    0 and undef, with differing consequences for how to merge values etc.
+///
+/// TODO: We could use region analysis to quickly skip over SESE regions during
+///       the traversal.
+///
+class PhiIncomingAnalysis {
+  MachinePostDominatorTree &PDT;
+
+  // For each reachable basic block, whether it is a source in the induced
+  // subgraph of the CFG.
+  DenseMap<MachineBasicBlock *, bool> ReachableMap;
+  SmallVector<MachineBasicBlock *, 4> ReachableOrdered;
+  SmallVector<MachineBasicBlock *, 4> Stack;
+  SmallVector<MachineBasicBlock *, 4> Predecessors;
+
+public:
+  PhiIncomingAnalysis(MachinePostDominatorTree &PDT) : PDT(PDT) {}
+
+  /// Returns whether \p MBB is a source in the induced subgraph of reachable
+  /// blocks.
+  bool isSource(MachineBasicBlock &MBB) const {
+    return ReachableMap.find(&MBB)->second;
+  }
+
+  ArrayRef<MachineBasicBlock *> predecessors() const { return Predecessors; }
+
+  void analyze(MachineBasicBlock &DefBlock,
+               ArrayRef<MachineBasicBlock *> IncomingBlocks) {
+    assert(Stack.empty());
+    ReachableMap.clear();
+    ReachableOrdered.clear();
+    Predecessors.clear();
+
+    // Insert the def block first, so that it acts as an end point for the
+    // traversal.
+    ReachableMap.try_emplace(&DefBlock, false);
+    ReachableOrdered.push_back(&DefBlock);
+
+    for (MachineBasicBlock *MBB : IncomingBlocks) {
+      if (MBB == &DefBlock) {
+        ReachableMap[&DefBlock] = true; // self-loop on DefBlock
+        continue;
+      }
+
+      ReachableMap.try_emplace(MBB, false);
+      ReachableOrdered.push_back(MBB);
+
+      // If this block has a divergent terminator and the def block is its
+      // post-dominator, the wave may first visit the other successors.
+      bool Divergent = false;
+      for (MachineInstr &MI : MBB->terminators()) {
+        if (MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
+            MI.getOpcode() == AMDGPU::SI_IF ||
+            MI.getOpcode() == AMDGPU::SI_ELSE ||
+            MI.getOpcode() == AMDGPU::SI_LOOP) {
+          Divergent = true;
+          break;
+        }
+      }
+
+      if (Divergent && PDT.dominates(&DefBlock, MBB)) {
+        for (MachineBasicBlock *Succ : MBB->successors())
+          Stack.push_back(Succ);
+      }
+    }
+
+    while (!Stack.empty()) {
+      MachineBasicBlock *MBB = Stack.pop_back_val();
+      if (!ReachableMap.try_emplace(MBB, false).second)
+        continue;
+      ReachableOrdered.push_back(MBB);
+
+      for (MachineBasicBlock *Succ : MBB->successors())
+        Stack.push_back(Succ);
+    }
+
+    for (MachineBasicBlock *MBB : ReachableOrdered) {
+      bool HaveReachablePred = false;
+      for (MachineBasicBlock *Pred : MBB->predecessors()) {
+        if (ReachableMap.count(Pred)) {
+          HaveReachablePred = true;
+        } else {
+          Stack.push_back(Pred);
+        }
+      }
+      if (!HaveReachablePred)
+        ReachableMap[MBB] = true;
+      if (HaveReachablePred) {
+        for (MachineBasicBlock *UnreachablePred : Stack) {
+          if (llvm::find(Predecessors, UnreachablePred) == Predecessors.end())
+            Predecessors.push_back(UnreachablePred);
+        }
+      }
+      Stack.clear();
+    }
+  }
+};
+
+/// Helper class that detects loops which require us to lower an i1 COPY into
+/// bitwise manipulation.
+///
+/// Unfortunately, we cannot use LoopInfo because LoopInfo does not distinguish
+/// between loops with the same header. Consider this example:
+///
+///  A-+-+
+///  | | |
+///  B-+ |
+///  |   |
+///  C---+
+///
+/// A is the header of a loop containing A, B, and C as far as LoopInfo is
+/// concerned. However, an i1 COPY in B that is used in C must be lowered to
+/// bitwise operations to combine results from different loop iterations when
+/// B has a divergent branch (since by default we will compile this code such
+/// that threads in a wave are merged at the entry of C).
+///
+/// The following rule is implemented to determine whether bitwise operations
+/// are required: use the bitwise lowering for a def in block B if a backward
+/// edge to B is reachable without going through the nearest common
+/// post-dominator of B and all uses of the def.
+///
+/// TODO: This rule is conservative because it does not check whether the
+///       relevant branches are actually divergent.
+///
+/// The class is designed to cache the CFG traversal so that it can be re-used
+/// for multiple defs within the same basic block.
+///
+/// TODO: We could use region analysis to quickly skip over SESE regions during
+///       the traversal.
+///
+class LoopFinder {
+  MachineDominatorTree &DT;
+  MachinePostDominatorTree &PDT;
+
+  // All visited / reachable block, tagged by level (level 0 is the def block,
+  // level 1 are all blocks reachable including but not going through the def
+  // block's IPDOM, etc.).
+  DenseMap<MachineBasicBlock *, unsigned> Visited;
+
+  // Nearest common dominator of all visited blocks by level (level 0 is the
+  // def block). Used for seeding the SSAUpdater.
+  SmallVector<MachineBasicBlock *, 4> CommonDominators;
+
+  // Post-dominator of all visited blocks.
+  MachineBasicBlock *VisitedPostDom = nullptr;
+
+  // Level at which a loop was found: 0 is not possible; 1 = a backward edge is
+  // reachable without going through the IPDOM of the def block (if the IPDOM
+  // itself has an edge to the def block, the loop level is 2), etc.
+  unsigned FoundLoopLevel = ~0u;
+
+  MachineBasicBlock *DefBlock = nullptr;
+  SmallVector<MachineBasicBlock *, 4> Stack;
+  SmallVector<MachineBasicBlock *, 4> NextLevel;
+
+public:
+  LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT)
+      : DT(DT), PDT(PDT) {}
+
+  void initialize(MachineBasicBlock &MBB) {
+    Visited.clear();
+    CommonDominators.clear();
+    Stack.clear();
+    NextLevel.clear();
+    VisitedPostDom = nullptr;
+    FoundLoopLevel = ~0u;
+
+    DefBlock = &MBB;
+  }
+
+  /// Check whether a backward edge can be reached without going through the
+  /// given \p PostDom of the def block.
+  ///
+  /// Return the level of \p PostDom if a loop was found, or 0 otherwise.
+  unsigned findLoop(MachineBasicBlock *PostDom) {
+    MachineDomTreeNode *PDNode = PDT.getNode(DefBlock);
+
+    if (!VisitedPostDom)
+      advanceLevel();
+
+    unsigned Level = 0;
+    while (PDNode->getBlock() != PostDom) {
+      if (PDNode->getBlock() == VisitedPostDom)
+        advanceLevel();
+      PDNode = PDNode->getIDom();
+      Level++;
+      if (FoundLoopLevel == Level)
+        return Level;
+    }
+
+    return 0;
+  }
+
+  /// Add undef values dominating the loop and the optionally given additional
+  /// blocks, so that the SSA updater doesn't have to search all the way to the
+  /// function entry.
+  void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
+                      ArrayRef<MachineBasicBlock *> Blocks = {}) {
+    assert(LoopLevel < CommonDominators.size());
+
+    MachineBasicBlock *Dom = CommonDominators[LoopLevel];
+    for (MachineBasicBlock *MBB : Blocks)
+      Dom = DT.findNearestCommonDominator(Dom, MBB);
+
+    if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
+      SSAUpdater.AddAvailableValue(Dom, insertUndefLaneMask(*Dom));
+    } else {
+      // The dominator is part of the loop or the given blocks, so add the
+      // undef value to unreachable predecessors instead.
+      for (MachineBasicBlock *Pred : Dom->predecessors()) {
+        if (!inLoopLevel(*Pred, LoopLevel, Blocks))
+          SSAUpdater.AddAvailableValue(Pred, insertUndefLaneMask(*Pred));
+      }
+    }
+  }
+
+private:
+  bool inLoopLevel(MachineBasicBlock &MBB, unsigned LoopLevel,
+                   ArrayRef<MachineBasicBlock *> Blocks) const {
+    auto DomIt = Visited.find(&MBB);
+    if (DomIt != Visited.end() && DomIt->second <= LoopLevel)
+      return true;
+
+    if (llvm::find(Blocks, &MBB) != Blocks.end())
+      return true;
+
+    return false;
+  }
+
+  void advanceLevel() {
+    MachineBasicBlock *VisitedDom;
+
+    if (!VisitedPostDom) {
+      VisitedPostDom = DefBlock;
+      VisitedDom = DefBlock;
+      Stack.push_back(DefBlock);
+    } else {
+      VisitedPostDom = PDT.getNode(VisitedPostDom)->getIDom()->getBlock();
+      VisitedDom = CommonDominators.back();
+
+      for (unsigned i = 0; i < NextLevel.size();) {
+        if (PDT.dominates(VisitedPostDom, NextLevel[i])) {
+          Stack.push_back(NextLevel[i]);
+
+          NextLevel[i] = NextLevel.back();
+          NextLevel.pop_back();
+        } else {
+          i++;
+        }
+      }
+    }
+
+    unsigned Level = CommonDominators.size();
+    while (!Stack.empty()) {
+      MachineBasicBlock *MBB = Stack.pop_back_val();
+      if (!PDT.dominates(VisitedPostDom, MBB))
+        NextLevel.push_back(MBB);
+
+      Visited[MBB] = Level;
+      VisitedDom = DT.findNearestCommonDominator(VisitedDom, MBB);
+
+      for (MachineBasicBlock *Succ : MBB->successors()) {
+        if (Succ == DefBlock) {
+          if (MBB == VisitedPostDom)
+            FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
+          else
+            FoundLoopLevel = std::min(FoundLoopLevel, Level);
+          continue;
+        }
+
+        if (Visited.try_emplace(Succ, ~0u).second) {
+          if (MBB == VisitedPostDom)
+            NextLevel.push_back(Succ);
+          else
+            Stack.push_back(Succ);
+        }
+      }
+    }
+
+    CommonDominators.push_back(VisitedDom);
+  }
 };
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS(SILowerI1Copies, DEBUG_TYPE,
-                "SI Lower i1 Copies", false, false)
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false,
+                    false)
 
 char SILowerI1Copies::ID = 0;
 
@@ -64,104 +411,415 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
   return new SILowerI1Copies();
 }
 
-bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+static unsigned createLaneMaskReg(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
+  return MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+}
+
+static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
+  MachineFunction &MF = *MBB.getParent();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+  unsigned UndefReg = createLaneMaskReg(MF);
+  BuildMI(MBB, MBB.getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
+          UndefReg);
+  return UndefReg;
+}
 
-  std::vector<unsigned> I1Defs;
+/// Lower all instructions that def or use vreg_1 registers.
+///
+/// In a first pass, we lower COPYs from vreg_1 to vector registers, as can
+/// occur around inline assembly. We do this first, before vreg_1 registers
+/// are changed to scalar mask registers.
+///
+/// Then we lower all defs of vreg_1 registers. Phi nodes are lowered before
+/// all others, because phi lowering looks through copies and can therefore
+/// often make copy lowering unnecessary.
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
+  MF = &TheMF;
+  MRI = &MF->getRegInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  PDT = &getAnalysis<MachinePostDominatorTree>();
 
-  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-                                                  BI != BE; ++BI) {
+  ST = &MF->getSubtarget<GCNSubtarget>();
+  TII = ST->getInstrInfo();
 
-    MachineBasicBlock &MBB = *BI;
-    MachineBasicBlock::iterator I, Next;
-    for (I = MBB.begin(); I != MBB.end(); I = Next) {
-      Next = std::next(I);
-      MachineInstr &MI = *I;
+  lowerCopiesFromI1();
+  lowerPhis();
+  lowerCopiesToI1();
 
-      if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
-        unsigned Reg = MI.getOperand(0).getReg();
-        const TargetRegisterClass *RC = MRI.getRegClass(Reg);
-        if (RC == &AMDGPU::VReg_1RegClass)
-          MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
-        continue;
-      }
+  for (unsigned Reg : ConstrainRegs)
+    MRI->constrainRegClass(Reg, &AMDGPU::SReg_64_XEXECRegClass);
+  ConstrainRegs.clear();
+
+  return true;
+}
 
+void SILowerI1Copies::lowerCopiesFromI1() {
+  SmallVector<MachineInstr *, 4> DeadCopies;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
       if (MI.getOpcode() != AMDGPU::COPY)
         continue;
 
-      const MachineOperand &Dst = MI.getOperand(0);
-      const MachineOperand &Src = MI.getOperand(1);
-
-      if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) ||
-          !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+      unsigned DstReg = MI.getOperand(0).getReg();
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+          MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
         continue;
 
-      const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
-      const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
+      if (isLaneMaskReg(DstReg) ||
+          (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+           MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+        continue;
 
+      // Copy into a 32-bit vector register.
+      LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI);
       DebugLoc DL = MI.getDebugLoc();
-      MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
-      if (DstRC == &AMDGPU::VReg_1RegClass &&
-          TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
-        I1Defs.push_back(Dst.getReg());
-
-        if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
-          if (DefInst->getOperand(1).isImm()) {
-            I1Defs.push_back(Dst.getReg());
-
-            int64_t Val = DefInst->getOperand(1).getImm();
-            assert(Val == 0 || Val == -1);
-
-            BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
-                .add(Dst)
-                .addImm(Val);
-            MI.eraseFromParent();
-            continue;
+
+      assert(TII->getRegisterInfo().getRegSizeInBits(DstReg, *MRI) == 32);
+      assert(!MI.getOperand(0).getSubReg());
+
+      ConstrainRegs.insert(SrcReg);
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+          .addImm(0)
+          .addImm(-1)
+          .addReg(SrcReg);
+      DeadCopies.push_back(&MI);
+    }
+
+    for (MachineInstr *MI : DeadCopies)
+      MI->eraseFromParent();
+    DeadCopies.clear();
+  }
+}
+
+void SILowerI1Copies::lowerPhis() {
+  MachineSSAUpdater SSAUpdater(*MF);
+  LoopFinder LF(*DT, *PDT);
+  PhiIncomingAnalysis PIA(*PDT);
+  SmallVector<MachineInstr *, 4> DeadPhis;
+  SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
+  SmallVector<unsigned, 4> IncomingRegs;
+  SmallVector<unsigned, 4> IncomingUpdated;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    LF.initialize(MBB);
+
+    for (MachineInstr &MI : MBB.phis()) {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
+
+      MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+
+      // Collect incoming values.
+      for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+        assert(i + 1 < MI.getNumOperands());
+        unsigned IncomingReg = MI.getOperand(i).getReg();
+        MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+        MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
+
+        if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+          IncomingReg = IncomingDef->getOperand(1).getReg();
+          assert(isLaneMaskReg(IncomingReg));
+          assert(!IncomingDef->getOperand(1).getSubReg());
+        } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+          continue;
+        } else {
+          assert(IncomingDef->isPHI());
+        }
+
+        IncomingBlocks.push_back(IncomingMBB);
+        IncomingRegs.push_back(IncomingReg);
+      }
+
+      // Phis in a loop that are observed outside the loop receive a simple but
+      // conservatively correct treatment.
+      MachineBasicBlock *PostDomBound = &MBB;
+      for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
+        PostDomBound =
+            PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
+      }
+
+      unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
+
+      SSAUpdater.Initialize(DstReg);
+
+      if (FoundLoopLevel) {
+        LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
+
+        for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+          IncomingUpdated.push_back(createLaneMaskReg(*MF));
+          SSAUpdater.AddAvailableValue(IncomingBlocks[i],
+                                       IncomingUpdated.back());
+        }
+
+        for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+          MachineBasicBlock &IMBB = *IncomingBlocks[i];
+          buildMergeLaneMasks(
+              IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+              SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
+        }
+      } else {
+        // The phi is not observed from outside a loop. Use a more accurate
+        // lowering.
+        PIA.analyze(MBB, IncomingBlocks);
+
+        for (MachineBasicBlock *MBB : PIA.predecessors())
+          SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+
+        for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+          MachineBasicBlock &IMBB = *IncomingBlocks[i];
+          if (PIA.isSource(IMBB)) {
+            IncomingUpdated.push_back(0);
+            SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]);
+          } else {
+            IncomingUpdated.push_back(createLaneMaskReg(*MF));
+            SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back());
           }
         }
 
-        unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc)
-            .add(Src);
-        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
-            .add(Dst)
-            .addImm(0)
-            .addImm(-1)
-            .addReg(TmpSrc);
-        MI.eraseFromParent();
-      } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
-                 SrcRC == &AMDGPU::VReg_1RegClass) {
-        if (DefInst->getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
-            DefInst->getOperand(1).isImm() && DefInst->getOperand(2).isImm() &&
-            DefInst->getOperand(1).getImm() == 0 &&
-            DefInst->getOperand(2).getImm() != 0 &&
-            DefInst->getOperand(3).isReg() &&
-            TargetRegisterInfo::isVirtualRegister(
-              DefInst->getOperand(3).getReg()) &&
-            TRI->getCommonSubClass(
-              MRI.getRegClass(DefInst->getOperand(3).getReg()),
-              &AMDGPU::SGPR_64RegClass) &&
-            AMDGPU::laneDominates(DefInst->getParent(), &MBB)) {
-          BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
-              .add(Dst)
-              .addReg(AMDGPU::EXEC)
-              .add(DefInst->getOperand(3));
-        } else {
-          BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
-              .add(Dst)
-              .add(Src)
-              .addImm(0);
+        for (unsigned i = 0; i < IncomingRegs.size(); ++i) {
+          if (!IncomingUpdated[i])
+            continue;
+
+          MachineBasicBlock &IMBB = *IncomingBlocks[i];
+          buildMergeLaneMasks(
+              IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
+              SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]);
         }
-        MI.eraseFromParent();
+      }
+
+      unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
+      if (NewReg != DstReg) {
+        MRI->replaceRegWith(NewReg, DstReg);
+
+        // Ensure that DstReg has a single def and mark the old PHI node for
+        // deletion.
+        MI.getOperand(0).setReg(NewReg);
+        DeadPhis.push_back(&MI);
+      }
+
+      IncomingBlocks.clear();
+      IncomingRegs.clear();
+      IncomingUpdated.clear();
+    }
+
+    for (MachineInstr *MI : DeadPhis)
+      MI->eraseFromParent();
+    DeadPhis.clear();
+  }
+}
+
+void SILowerI1Copies::lowerCopiesToI1() {
+  MachineSSAUpdater SSAUpdater(*MF);
+  LoopFinder LF(*DT, *PDT);
+  SmallVector<MachineInstr *, 4> DeadCopies;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    LF.initialize(MBB);
+
+    for (MachineInstr &MI : MBB) {
+      if (MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
+          MI.getOpcode() != AMDGPU::COPY)
+        continue;
+
+      unsigned DstReg = MI.getOperand(0).getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
+          MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+        continue;
+
+      if (MRI->use_empty(DstReg)) {
+        DeadCopies.push_back(&MI);
+        continue;
+      }
+
+      LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
+
+      MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+      if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
+        continue;
+
+      DebugLoc DL = MI.getDebugLoc();
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      assert(!MI.getOperand(1).getSubReg());
+
+      if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+          !isLaneMaskReg(SrcReg)) {
+        assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
+        unsigned TmpReg = createLaneMaskReg(*MF);
+        BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
+            .addReg(SrcReg)
+            .addImm(0);
+        MI.getOperand(1).setReg(TmpReg);
+        SrcReg = TmpReg;
+      }
+
+      // Defs in a loop that are observed outside the loop must be transformed
+      // into appropriate bit manipulation.
+      MachineBasicBlock *PostDomBound = &MBB;
+      for (MachineInstr &Use : MRI->use_instructions(DstReg)) {
+        PostDomBound =
+            PDT->findNearestCommonDominator(PostDomBound, Use.getParent());
+      }
+
+      unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
+      if (FoundLoopLevel) {
+        SSAUpdater.Initialize(DstReg);
+        SSAUpdater.AddAvailableValue(&MBB, DstReg);
+        LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
+
+        buildMergeLaneMasks(MBB, MI, DL, DstReg,
+                            SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
+        DeadCopies.push_back(&MI);
       }
     }
+
+    for (MachineInstr *MI : DeadCopies)
+      MI->eraseFromParent();
+    DeadCopies.clear();
   }
+}
 
-  for (unsigned Reg : I1Defs)
-    MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
+  const MachineInstr *MI;
+  for (;;) {
+    MI = MRI->getUniqueVRegDef(Reg);
+    if (MI->getOpcode() != AMDGPU::COPY)
+      break;
+
+    Reg = MI->getOperand(1).getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      return false;
+    if (!isLaneMaskReg(Reg))
+      return false;
+  }
+
+  if (MI->getOpcode() != AMDGPU::S_MOV_B64)
+    return false;
+
+  if (!MI->getOperand(1).isImm())
+    return false;
+
+  int64_t Imm = MI->getOperand(1).getImm();
+  if (Imm == 0) {
+    Val = false;
+    return true;
+  }
+  if (Imm == -1) {
+    Val = true;
+    return true;
+  }
 
   return false;
 }
+
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+  Def = false;
+  Use = false;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+      if (MO.isUse())
+        Use = true;
+      else
+        Def = true;
+    }
+  }
+}
+
+/// Return a point at the end of the given \p MBB to insert SALU instructions
+/// for lane mask calculation. Take terminators and SCC into account.
+MachineBasicBlock::iterator
+SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
+  auto InsertionPt = MBB.getFirstTerminator();
+  bool TerminatorsUseSCC = false;
+  for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
+    bool DefsSCC;
+    instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+    if (TerminatorsUseSCC || DefsSCC)
+      break;
+  }
+
+  if (!TerminatorsUseSCC)
+    return InsertionPt;
+
+  while (InsertionPt != MBB.begin()) {
+    InsertionPt--;
+
+    bool DefSCC, UseSCC;
+    instrDefsUsesSCC(*InsertionPt, DefSCC, UseSCC);
+    if (DefSCC)
+      return InsertionPt;
+  }
+
+  // We should have at least seen an IMPLICIT_DEF or COPY
+  llvm_unreachable("SCC used by terminator but no def in block");
+}
+
+void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          const DebugLoc &DL, unsigned DstReg,
+                                          unsigned PrevReg, unsigned CurReg) {
+  bool PrevVal;
+  bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
+  bool CurVal;
+  bool CurConstant = isConstantLaneMask(CurReg, CurVal);
+
+  if (PrevConstant && CurConstant) {
+    if (PrevVal == CurVal) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
+    } else if (CurVal) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
+    } else {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
+          .addReg(AMDGPU::EXEC)
+          .addImm(-1);
+    }
+    return;
+  }
+
+  unsigned PrevMaskedReg = 0;
+  unsigned CurMaskedReg = 0;
+  if (!PrevConstant) {
+    if (CurConstant && CurVal) {
+      PrevMaskedReg = PrevReg;
+    } else {
+      PrevMaskedReg = createLaneMaskReg(*MF);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
+          .addReg(PrevReg)
+          .addReg(AMDGPU::EXEC);
+    }
+  }
+  if (!CurConstant) {
+    // TODO: check whether CurReg is already masked by EXEC
+    if (PrevConstant && PrevVal) {
+      CurMaskedReg = CurReg;
+    } else {
+      CurMaskedReg = createLaneMaskReg(*MF);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
+          .addReg(CurReg)
+          .addReg(AMDGPU::EXEC);
+    }
+  }
+
+  if (PrevConstant && !PrevVal) {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
+        .addReg(CurMaskedReg);
+  } else if (CurConstant && !CurVal) {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
+        .addReg(PrevMaskedReg);
+  } else if (PrevConstant && PrevVal) {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
+        .addReg(CurMaskedReg)
+        .addReg(AMDGPU::EXEC);
+  } else {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
+        .addReg(PrevMaskedReg)
+        .addReg(CurMaskedReg ? CurMaskedReg : (unsigned)AMDGPU::EXEC);
+  }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0d5ff75e37ed..181cc41bd5ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -117,7 +117,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   }
 
   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-  bool MaySpill = ST.isVGPRSpillingEnabled(F);
   bool HasStackObjects = FrameInfo.hasStackObjects();
 
   if (isEntryFunction()) {
@@ -126,21 +125,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     if (WorkItemIDZ)
       WorkItemIDY = true;
 
-    if (HasStackObjects || MaySpill) {
-      PrivateSegmentWaveByteOffset = true;
+    PrivateSegmentWaveByteOffset = true;
 
     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
-      ArgInfo.PrivateSegmentWaveByteOffset
-        = ArgDescriptor::createRegister(AMDGPU::SGPR5);
-    }
+      ArgInfo.PrivateSegmentWaveByteOffset =
+          ArgDescriptor::createRegister(AMDGPU::SGPR5);
   }
 
-  bool IsCOV2 = ST.isAmdCodeObjectV2(F);
-  if (IsCOV2) {
-    if (HasStackObjects || MaySpill)
-      PrivateSegmentBuffer = true;
+  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+  if (isAmdHsaOrMesa) {
+    PrivateSegmentBuffer = true;
 
     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
       DispatchPtr = true;
@@ -151,14 +147,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     if (F.hasFnAttribute("amdgpu-dispatch-id"))
       DispatchID = true;
   } else if (ST.isMesaGfxShader(F)) {
-    if (HasStackObjects || MaySpill)
-      ImplicitBufferPtr = true;
+    ImplicitBufferPtr = true;
   }
 
   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
     KernargSegmentPtr = true;
 
-  if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
     // TODO: This could be refined a lot. The attribute is a poor way of
     // detecting calls that may require it before argument lowering.
     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 18754442898f..fb7e670068fe 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -471,7 +471,7 @@ void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(DAG);
+    DAG->dumpNode(*SuccSU);
     dbgs() << " has been released too many times!\n";
     llvm_unreachable(nullptr);
   }
@@ -611,13 +611,11 @@ void SIScheduleBlock::printDebug(bool full) {
 
   dbgs() << "\nInstructions:\n";
   if (!Scheduled) {
-    for (SUnit* SU : SUnits) {
-      SU->dump(DAG);
-    }
+    for (const SUnit* SU : SUnits)
+      DAG->dumpNode(*SU);
   } else {
-    for (SUnit* SU : SUnits) {
-      SU->dump(DAG);
-    }
+    for (const SUnit* SU : SUnits)
+      DAG->dumpNode(*SU);
   }
 
   dbgs() << "///////////////////////\n";
@@ -1933,7 +1931,7 @@ void SIScheduleDAGMI::schedule()
   LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
 
   buildDAGWithRegPressure();
-  LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
+  LLVM_DEBUG(dump());
 
   topologicalSort();
   findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -1957,12 +1955,12 @@ void SIScheduleDAGMI::schedule()
 
   for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
     SUnit *SU = &SUnits[i];
-    unsigned BaseLatReg;
+    MachineOperand *BaseLatOp;
     int64_t OffLatReg;
     if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
       IsLowLatencySU[i] = 1;
-      if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
-                                       TRI))
+      if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
+                                         TRI))
         LowLatencyOffset[i] = OffLatReg;
     } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
       IsHighLatencySU[i] = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 938cdaf1ef8f..b4a4e9e33133 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -202,8 +202,6 @@ public:
 
 class SIMemOpAccess final {
 private:
-
-  AMDGPUAS SIAddrSpaceInfo;
   AMDGPUMachineModuleInfo *MMI = nullptr;
 
   /// Reports unsupported message \p Msg for \p MI to LLVM context.
@@ -255,7 +253,7 @@ protected:
   /// Instruction info.
   const SIInstrInfo *TII = nullptr;
 
-  IsaInfo::IsaVersion IV;
+  IsaVersion IV;
 
   SICacheControl(const GCNSubtarget &ST);
 
@@ -453,22 +451,21 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
 }
 
 SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
-  if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
     return SIAtomicAddrSpace::FLAT;
-  if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
+  if (AS == AMDGPUAS::GLOBAL_ADDRESS)
     return SIAtomicAddrSpace::GLOBAL;
-  if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
+  if (AS == AMDGPUAS::LOCAL_ADDRESS)
     return SIAtomicAddrSpace::LDS;
-  if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
+  if (AS == AMDGPUAS::PRIVATE_ADDRESS)
     return SIAtomicAddrSpace::SCRATCH;
-  if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
+  if (AS == AMDGPUAS::REGION_ADDRESS)
     return SIAtomicAddrSpace::GDS;
 
   return SIAtomicAddrSpace::OTHER;
 }
 
 SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
-  SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
 }
 
@@ -608,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
 
 SICacheControl::SICacheControl(const GCNSubtarget &ST) {
   TII = ST.getInstrInfo();
-  IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
+  IV = getIsaVersion(ST.getCPU());
 }
 
 /* static */
@@ -815,6 +812,12 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
   MachineBasicBlock &MBB = *MI->getParent();
   DebugLoc DL = MI->getDebugLoc();
 
+  const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
+
+  const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
+                             ? AMDGPU::BUFFER_WBINVL1
+                             : AMDGPU::BUFFER_WBINVL1_VOL;
+
   if (Pos == Position::AFTER)
     ++MI;
 
@@ -822,7 +825,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
     switch (Scope) {
     case SIAtomicScope::SYSTEM:
     case SIAtomicScope::AGENT:
-      BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
+      BuildMI(MBB, MI, DL, TII->get(Flush));
       Changed = true;
       break;
     case SIAtomicScope::WORKGROUP:
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
new file mode 100644
index 000000000000..883fd308f2f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -0,0 +1,406 @@
+//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass inserts changes to the Mode register settings as required.
+/// Note that currently it only deals with the Double Precision Floating Point
+/// rounding mode setting, but is intended to be generic enough to be easily
+/// expanded.
+///
+//===----------------------------------------------------------------------===//
+//
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <queue>
+
+#define DEBUG_TYPE "si-mode-register"
+
+STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
+
+using namespace llvm;
+
+struct Status {
+  // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
+  // known value
+  unsigned Mask;
+  unsigned Mode;
+
+  Status() : Mask(0), Mode(0){};
+
+  Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
+    Mode &= Mask;
+  };
+
+  // merge two status values such that only values that don't conflict are
+  // preserved
+  Status merge(const Status &S) const {
+    return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
+  }
+
+  // merge an unknown value by using the unknown value's mask to remove bits
+  // from the result
+  Status mergeUnknown(unsigned newMask) {
+    return Status(Mask & ~newMask, Mode & ~newMask);
+  }
+
+  // intersect two Status values to produce a mode and mask that is a subset
+  // of both values
+  Status intersect(const Status &S) const {
+    unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
+    unsigned NewMode = (Mode & NewMask);
+    return Status(NewMask, NewMode);
+  }
+
+  // produce the delta required to change the Mode to the required Mode
+  Status delta(const Status &S) const {
+    return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
+  }
+
+  bool operator==(const Status &S) const {
+    return (Mask == S.Mask) && (Mode == S.Mode);
+  }
+
+  bool operator!=(const Status &S) const { return !(*this == S); }
+
+  bool isCompatible(Status &S) {
+    return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
+  }
+
+  bool isCombinable(Status &S) {
+    return !(Mask & S.Mask) || isCompatible(S);
+  }
+};
+
+class BlockData {
+public:
+  // The Status that represents the mode register settings required by the
+  // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
+  Status Require;
+
+  // The Status that represents the net changes to the Mode register made by
+  // this block, Calculated in Phase 1.
+  Status Change;
+
+  // The Status that represents the mode register settings on exit from this
+  // block. Calculated in Phase 2.
+  Status Exit;
+
+  // The Status that represents the intersection of exit Mode register settings
+  // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
+  Status Pred;
+
+  // In Phase 1 we record the first instruction that has a mode requirement,
+  // which is used in Phase 3 if we need to insert a mode change.
+  MachineInstr *FirstInsertionPoint;
+
+  BlockData() : FirstInsertionPoint(nullptr) {};
+};
+
+namespace {
+
+class SIModeRegister : public MachineFunctionPass {
+public:
+  static char ID;
+
+  std::vector<std::unique_ptr<BlockData>> BlockInfo;
+  std::queue<MachineBasicBlock *> Phase2List;
+
+  // The default mode register setting currently only caters for the floating
+  // point double precision rounding mode.
+  // We currently assume the default rounding mode is Round to Nearest
+  // NOTE: this should come from a per function rounding mode setting once such
+  // a setting exists.
+  unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
+  Status DefaultStatus =
+      Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
+
+public:
+  SIModeRegister() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+  void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+  void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
+
+  Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
+
+  void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
+                    const SIInstrInfo *TII, Status InstrMode);
+};
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
+                "Insert required mode register values", false, false)
+
+char SIModeRegister::ID = 0;
+
+char &llvm::SIModeRegisterID = SIModeRegister::ID;
+
+FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
+
+// Determine the Mode register setting required for this instruction.
+// Instructions which don't use the Mode register return a null Status.
+// Note this currently only deals with instructions that use the floating point
+// double precision setting.
+Status SIModeRegister::getInstructionMode(MachineInstr &MI,
+                                          const SIInstrInfo *TII) {
+  if (TII->usesFPDPRounding(MI)) {
+    switch (MI.getOpcode()) {
+    case AMDGPU::V_INTERP_P1LL_F16:
+    case AMDGPU::V_INTERP_P1LV_F16:
+    case AMDGPU::V_INTERP_P2_F16:
+      // f16 interpolation instructions need double precision round to zero
+      return Status(FP_ROUND_MODE_DP(3),
+                    FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
+    default:
+      return DefaultStatus;
+    }
+  }
+  return Status();
+}
+
+// Insert a setreg instruction to update the Mode register.
+// It is possible (though unlikely) for an instruction to require a change to
+// the value of disjoint parts of the Mode register when we don't know the
+// value of the intervening bits. In that case we need to use more than one
+// setreg instruction.
+void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
+                                  const SIInstrInfo *TII, Status InstrMode) {
+  while (InstrMode.Mask) {
+    unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
+    unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
+    unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
+    BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
+        .addImm(Value)
+        .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
+                (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+                (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
+    ++NumSetregInserted;
+    InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
+  }
+}
+
+// In Phase 1 we iterate through the instructions of the block and for each
+// instruction we get its mode usage. If the instruction uses the Mode register
+// we:
+// - update the Change status, which tracks the changes to the Mode register
+//   made by this block
+// - if this instruction's requirements are compatible with the current setting
+//   of the Mode register we merge the modes
+// - if it isn't compatible and an InsertionPoint isn't set, then we set the
+//   InsertionPoint to the current instruction, and we remember the current
+//   mode
+// - if it isn't compatible and InsertionPoint is set we insert a seteg before
+//   that instruction (unless this instruction forms part of the block's
+//   entry requirements in which case the insertion is deferred until Phase 3
+//   when predecessor exit values are known), and move the insertion point to
+//   this instruction
+// - if this is a setreg instruction we treat it as an incompatible instruction.
+//   This is sub-optimal but avoids some nasty corner cases, and is expected to
+//   occur very rarely.
+// - on exit we have set the Require, Change, and initial Exit modes.
+void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
+                                        const SIInstrInfo *TII) {
+  auto NewInfo = llvm::make_unique<BlockData>();
+  MachineInstr *InsertionPoint = nullptr;
+  // RequirePending is used to indicate whether we are collecting the initial
+  // requirements for the block, and need to defer the first InsertionPoint to
+  // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
+  // we discover an explict setreg that means this block doesn't have any
+  // initial requirements.
+  bool RequirePending = true;
+  Status IPChange;
+  for (MachineInstr &MI : MBB) {
+    Status InstrMode = getInstructionMode(MI, TII);
+    if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
+        (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
+      // We preserve any explicit mode register setreg instruction we encounter,
+      // as we assume it has been inserted by a higher authority (this is
+      // likely to be a very rare occurrence).
+      unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
+      if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
+          AMDGPU::Hwreg::ID_MODE)
+        continue;
+
+      unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
+                        AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
+                       1;
+      unsigned Offset =
+          (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
+      unsigned Mask = ((1 << Width) - 1) << Offset;
+
+      // If an InsertionPoint is set we will insert a setreg there.
+      if (InsertionPoint) {
+        insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
+        InsertionPoint = nullptr;
+      }
+      // If this is an immediate then we know the value being set, but if it is
+      // not an immediate then we treat the modified bits of the mode register
+      // as unknown.
+      if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
+        unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
+        unsigned Mode = (Val << Offset) & Mask;
+        Status Setreg = Status(Mask, Mode);
+        // If we haven't already set the initial requirements for the block we
+        // don't need to as the requirements start from this explicit setreg.
+        RequirePending = false;
+        NewInfo->Change = NewInfo->Change.merge(Setreg);
+      } else {
+        NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
+      }
+    } else if (!NewInfo->Change.isCompatible(InstrMode)) {
+      // This instruction uses the Mode register and its requirements aren't
+      // compatible with the current mode.
+      if (InsertionPoint) {
+        // If the required mode change cannot be included in the current
+        // InsertionPoint changes, we need a setreg and start a new
+        // InsertionPoint.
+        if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
+          if (RequirePending) {
+            // This is the first insertionPoint in the block so we will defer
+            // the insertion of the setreg to Phase 3 where we know whether or
+            // not it is actually needed.
+            NewInfo->FirstInsertionPoint = InsertionPoint;
+            NewInfo->Require = NewInfo->Change;
+            RequirePending = false;
+          } else {
+            insertSetreg(MBB, InsertionPoint, TII,
+                         IPChange.delta(NewInfo->Change));
+            IPChange = NewInfo->Change;
+          }
+          // Set the new InsertionPoint
+          InsertionPoint = &MI;
+        }
+        NewInfo->Change = NewInfo->Change.merge(InstrMode);
+      } else {
+        // No InsertionPoint is currently set - this is either the first in
+        // the block or we have previously seen an explicit setreg.
+        InsertionPoint = &MI;
+        IPChange = NewInfo->Change;
+        NewInfo->Change = NewInfo->Change.merge(InstrMode);
+      }
+    }
+  }
+  if (RequirePending) {
+    // If we haven't yet set the initial requirements for the block we set them
+    // now.
+    NewInfo->FirstInsertionPoint = InsertionPoint;
+    NewInfo->Require = NewInfo->Change;
+  } else if (InsertionPoint) {
+    // We need to insert a setreg at the InsertionPoint
+    insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
+  }
+  NewInfo->Exit = NewInfo->Change;
+  BlockInfo[MBB.getNumber()] = std::move(NewInfo);
+}
+
+// In Phase 2 we revisit each block and calculate the common Mode register
+// value provided by all predecessor blocks. If the Exit value for the block
+// is changed, then we add the successor blocks to the worklist so that the
+// exit value is propagated.
+void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
+                                        const SIInstrInfo *TII) {
+//  BlockData *BI = BlockInfo[MBB.getNumber()];
+  unsigned ThisBlock = MBB.getNumber();
+  if (MBB.pred_empty()) {
+    // There are no predecessors, so use the default starting status.
+    BlockInfo[ThisBlock]->Pred = DefaultStatus;
+  } else {
+    // Build a status that is common to all the predecessors by intersecting
+    // all the predecessor exit status values.
+    MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
+    MachineBasicBlock &PB = *(*P);
+    BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
+
+    for (P = std::next(P); P != E; P = std::next(P)) {
+      MachineBasicBlock *Pred = *P;
+      BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
+    }
+  }
+  Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
+  if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
+    BlockInfo[ThisBlock]->Exit = TmpStatus;
+    // Add the successors to the work list so we can propagate the changed exit
+    // status.
+    for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+                                          E = MBB.succ_end();
+         S != E; S = std::next(S)) {
+      MachineBasicBlock &B = *(*S);
+      Phase2List.push(&B);
+    }
+  }
+}
+
+// In Phase 3 we revisit each block and if it has an insertion point defined we
+// check whether the predecessor mode meets the block's entry requirements. If
+// not we insert an appropriate setreg instruction to modify the Mode register.
+void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
+                                        const SIInstrInfo *TII) {
+//  BlockData *BI = BlockInfo[MBB.getNumber()];
+  unsigned ThisBlock = MBB.getNumber();
+  if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
+    Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
+    if (BlockInfo[ThisBlock]->FirstInsertionPoint)
+      insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
+    else
+      insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
+  }
+}
+
+bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
+  BlockInfo.resize(MF.getNumBlockIDs());
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  // Processing is performed in a number of phases
+
+  // Phase 1 - determine the initial mode required by each block, and add setreg
+  // instructions for intra block requirements.
+  for (MachineBasicBlock &BB : MF)
+    processBlockPhase1(BB, TII);
+
+  // Phase 2 - determine the exit mode from each block. We add all blocks to the
+  // list here, but will also add any that need to be revisited during Phase 2
+  // processing.
+  for (MachineBasicBlock &BB : MF)
+    Phase2List.push(&BB);
+  while (!Phase2List.empty()) {
+    processBlockPhase2(*Phase2List.front(), TII);
+    Phase2List.pop();
+  }
+
+  // Phase 3 - add an initial setreg to each block where the required entry mode
+  // is not satisfied by the exit mode of all its predecessors.
+  for (MachineBasicBlock &BB : MF)
+    processBlockPhase3(BB, TII);
+
+  BlockInfo.clear();
+
+  return NumSetregInserted > 0;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 7b678d12ba81..c671fed34bdf 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -103,6 +103,122 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI,
   return SaveExecInst;
 }
 
+// Optimize sequence
+//    %sel = V_CNDMASK_B32_e64 0, 1, %cc
+//    %cmp = V_CMP_NE_U32 1, %1
+//    $vcc = S_AND_B64 $exec, %cmp
+//    S_CBRANCH_VCC[N]Z
+// =>
+//    $vcc = S_ANDN2_B64 $exec, %cc
+//    S_CBRANCH_VCC[N]Z
+//
+// It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the
+// rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but
+// only 3 first instructions are really needed. S_AND_B64 with exec is a
+// required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
+// lanes.
+//
+// Returns %cc register on success.
+static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
+                                     const GCNSubtarget &ST,
+                                     MachineRegisterInfo &MRI,
+                                     LiveIntervals *LIS) {
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const unsigned AndOpc = AMDGPU::S_AND_B64;
+  const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
+  const unsigned CondReg = AMDGPU::VCC;
+  const unsigned ExecReg = AMDGPU::EXEC;
+
+  auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
+                           unsigned Opc = MI.getOpcode();
+                           return Opc == AMDGPU::S_CBRANCH_VCCZ ||
+                                  Opc == AMDGPU::S_CBRANCH_VCCNZ; });
+  if (I == MBB.terminators().end())
+    return AMDGPU::NoRegister;
+
+  auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister,
+                                   *I, MRI, LIS);
+  if (!And || And->getOpcode() != AndOpc ||
+      !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
+    return AMDGPU::NoRegister;
+
+  MachineOperand *AndCC = &And->getOperand(1);
+  unsigned CmpReg = AndCC->getReg();
+  unsigned CmpSubReg = AndCC->getSubReg();
+  if (CmpReg == ExecReg) {
+    AndCC = &And->getOperand(2);
+    CmpReg = AndCC->getReg();
+    CmpSubReg = AndCC->getSubReg();
+  } else if (And->getOperand(2).getReg() != ExecReg) {
+    return AMDGPU::NoRegister;
+  }
+
+  auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, MRI, LIS);
+  if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
+                Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
+      Cmp->getParent() != And->getParent())
+    return AMDGPU::NoRegister;
+
+  MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
+  MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
+  if (Op1->isImm() && Op2->isReg())
+    std::swap(Op1, Op2);
+  if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
+    return AMDGPU::NoRegister;
+
+  unsigned SelReg = Op1->getReg();
+  auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS);
+  if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+    return AMDGPU::NoRegister;
+
+  Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
+  Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
+  MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
+  if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
+      Op1->getImm() != 0 || Op2->getImm() != 1)
+    return AMDGPU::NoRegister;
+
+  LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
+                    << *Cmp << '\t' << *And);
+
+  unsigned CCReg = CC->getReg();
+  LIS->RemoveMachineInstrFromMaps(*And);
+  MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
+                                TII->get(Andn2Opc), And->getOperand(0).getReg())
+                            .addReg(ExecReg)
+                            .addReg(CCReg, CC->getSubReg());
+  And->eraseFromParent();
+  LIS->InsertMachineInstrInMaps(*Andn2);
+
+  LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
+
+  // Try to remove compare. Cmp value should not used in between of cmp
+  // and s_and_b64 if VCC or just unused if any other register.
+  if ((TargetRegisterInfo::isVirtualRegister(CmpReg) &&
+       MRI.use_nodbg_empty(CmpReg)) ||
+      (CmpReg == CondReg &&
+       std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
+                    [&](const MachineInstr &MI) {
+                      return MI.readsRegister(CondReg, TRI); }))) {
+    LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
+
+    LIS->RemoveMachineInstrFromMaps(*Cmp);
+    Cmp->eraseFromParent();
+
+    // Try to remove v_cndmask_b32.
+    if (TargetRegisterInfo::isVirtualRegister(SelReg) &&
+        MRI.use_nodbg_empty(SelReg)) {
+      LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+      LIS->RemoveMachineInstrFromMaps(*Sel);
+      Sel->eraseFromParent();
+    }
+  }
+
+  return CCReg;
+}
+
 bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -117,9 +233,24 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineBasicBlock &MBB : MF) {
 
+    if (unsigned Reg = optimizeVcndVcmpPair(MBB, ST, MRI, LIS)) {
+      RecalcRegs.insert(Reg);
+      RecalcRegs.insert(AMDGPU::VCC_LO);
+      RecalcRegs.insert(AMDGPU::VCC_HI);
+      RecalcRegs.insert(AMDGPU::SCC);
+      Changed = true;
+    }
+
     // Try to remove unneeded instructions before s_endpgm.
     if (MBB.succ_empty()) {
-      if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
+      if (MBB.empty())
+        continue;
+
+      // Skip this if the endpgm has any implicit uses, otherwise we would need
+      // to be careful to update / remove them.
+      MachineInstr &Term = MBB.back();
+      if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
+          Term.getNumOperands() != 0)
         continue;
 
       SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 0e000b72962e..2d43d5d05ef6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -90,7 +90,9 @@ public:
   bool runOnMachineFunction(MachineFunction &MF) override;
   void matchSDWAOperands(MachineBasicBlock &MBB);
   std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
-  bool isConvertibleToSDWA(const MachineInstr &MI, const GCNSubtarget &ST) const;
+  bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const;
+  void pseudoOpConvertToVOP2(MachineInstr &MI,
+                             const GCNSubtarget &ST) const;
   bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
   void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
 
@@ -854,7 +856,82 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
   }
 }
 
-bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
+// Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
+// V_ADD_I32_e64 into V_ADD_I32_e32. This allows isConvertibleToSDWA
+// to perform its transformation on V_ADD_I32_e32 into V_ADD_I32_sdwa.
+//
+// We are transforming from a VOP3 into a VOP2 form of the instruction.
+//   %19:vgpr_32 = V_AND_B32_e32 255,
+//       killed %16:vgpr_32, implicit $exec
+//   %47:vgpr_32, %49:sreg_64_xexec = V_ADD_I32_e64
+//       %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
+//  %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
+//       %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
+//
+// becomes
+//   %47:vgpr_32 = V_ADD_I32_sdwa
+//       0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
+//       implicit-def $vcc, implicit $exec
+//  %48:vgpr_32 = V_ADDC_U32_e32
+//       0, %26.sub1:vreg_64, implicit-def $vcc, implicit $vcc, implicit $exec
+void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
+                                           const GCNSubtarget &ST) const {
+  int Opc = MI.getOpcode();
+  assert((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64) &&
+         "Currently only handles V_ADD_I32_e64 or V_SUB_I32_e64");
+
+  // Can the candidate MI be shrunk?
+  if (!TII->canShrink(MI, *MRI))
+    return;
+  Opc = AMDGPU::getVOPe32(Opc);
+  // Find the related ADD instruction.
+  const MachineOperand *Sdst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+  if (!Sdst)
+    return;
+  MachineOperand *NextOp = findSingleRegUse(Sdst, MRI);
+  if (!NextOp)
+    return;
+  MachineInstr &MISucc = *NextOp->getParent();
+  // Can the successor be shrunk?
+  if (!TII->canShrink(MISucc, *MRI))
+    return;
+  int SuccOpc = AMDGPU::getVOPe32(MISucc.getOpcode());
+  // Make sure the carry in/out are subsequently unused.
+  MachineOperand *CarryIn = TII->getNamedOperand(MISucc, AMDGPU::OpName::src2);
+  if (!CarryIn)
+    return;
+  MachineOperand *CarryOut = TII->getNamedOperand(MISucc, AMDGPU::OpName::sdst);
+  if (!CarryOut)
+    return;
+  if (!MRI->hasOneUse(CarryIn->getReg()) || !MRI->use_empty(CarryOut->getReg()))
+    return;
+  // Make sure VCC or its subregs are dead before MI.
+  MachineBasicBlock &MBB = *MI.getParent();
+  auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
+  if (Liveness != MachineBasicBlock::LQR_Dead)
+    return;
+  // Check if VCC is referenced in range of (MI,MISucc].
+  for (auto I = std::next(MI.getIterator()), E = MISucc.getIterator();
+       I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::VCC, TRI))
+      return;
+  }
+  // Make the two new e32 instruction variants.
+  // Replace MI with V_{SUB|ADD}_I32_e32
+  auto NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc));
+  NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst));
+  NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+  NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1));
+  MI.eraseFromParent();
+  // Replace MISucc with V_{SUBB|ADDC}_U32_e32
+  auto NewInst = BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc));
+  NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst));
+  NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0));
+  NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1));
+  MISucc.eraseFromParent();
+}
+
+bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
                                          const GCNSubtarget &ST) const {
   // Check if this is already an SDWA instruction
   unsigned Opc = MI.getOpcode();
@@ -1127,6 +1204,22 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF) {
     bool Changed = false;
     do {
+      // Preprocess the ADD/SUB pairs so they could be SDWA'ed.
+      // Look for a possible ADD or SUB that resulted from a previously lowered
+      // V_{ADD|SUB}_U64_PSEUDO. The function pseudoOpConvertToVOP2
+      // lowers the pair of instructions into e32 form.
+      matchSDWAOperands(MBB);
+      for (const auto &OperandPair : SDWAOperands) {
+        const auto &Operand = OperandPair.second;
+        MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
+        if (PotentialMI &&
+           (PotentialMI->getOpcode() == AMDGPU::V_ADD_I32_e64 ||
+            PotentialMI->getOpcode() == AMDGPU::V_SUB_I32_e64))
+          pseudoOpConvertToVOP2(*PotentialMI, ST);
+      }
+      SDWAOperands.clear();
+
+      // Generate potential match list.
       matchSDWAOperands(MBB);
 
       for (const auto &OperandPair : SDWAOperands) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 624607f6ea54..97cfde2b2354 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -18,9 +18,12 @@
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 
@@ -495,15 +498,16 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     return false;
 
   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
-  MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
-    .add(*Reg)
-    .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
-    .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
-    .addImm(Offset)
-    .addImm(0) // glc
-    .addImm(0) // slc
-    .addImm(0) // tfe
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MachineInstrBuilder NewMI =
+      BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+          .add(*Reg)
+          .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+          .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+          .addImm(Offset)
+          .addImm(0) // glc
+          .addImm(0) // slc
+          .addImm(0) // tfe
+          .cloneMemRefs(*MI);
 
   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
                                                        AMDGPU::OpName::vdata_in);
@@ -900,7 +904,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
         .addImm(0)                        // glc
         .addMemOperand(MMO);
 
-      if (NumSubRegs > 1)
+      if (NumSubRegs > 1 && i == 0)
         MIB.addReg(SuperReg, RegState::ImplicitDefine);
 
       continue;
@@ -914,7 +918,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
         .addReg(Spill.VGPR)
         .addImm(Spill.Lane);
 
-      if (NumSubRegs > 1)
+      if (NumSubRegs > 1 && i == 0)
         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     } else {
       if (OnlyToVGPR)
@@ -1598,3 +1602,57 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
     llvm_unreachable("not implemented");
   }
 }
+
+// Find reaching register definition
+MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
+                                              MachineInstr &Use,
+                                              MachineRegisterInfo &MRI,
+                                              LiveIntervals *LIS) const {
+  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
+  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
+  SlotIndex DefIdx;
+
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    if (!LIS->hasInterval(Reg))
+      return nullptr;
+    LiveInterval &LI = LIS->getInterval(Reg);
+    LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
+                                  : MRI.getMaxLaneMaskForVReg(Reg);
+    VNInfo *V = nullptr;
+    if (LI.hasSubRanges()) {
+      for (auto &S : LI.subranges()) {
+        if ((S.LaneMask & SubLanes) == SubLanes) {
+          V = S.getVNInfoAt(UseIdx);
+          break;
+        }
+      }
+    } else {
+      V = LI.getVNInfoAt(UseIdx);
+    }
+    if (!V)
+      return nullptr;
+    DefIdx = V->def;
+  } else {
+    // Find last def.
+    for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
+      LiveRange &LR = LIS->getRegUnit(*Units);
+      if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
+        if (!DefIdx.isValid() ||
+            MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
+                          LIS->getInstructionFromIndex(V->def)))
+          DefIdx = V->def;
+      } else {
+        return nullptr;
+      }
+    }
+  }
+
+  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
+
+  if (!Def || !MDT.dominates(Def, &Use))
+    return nullptr;
+
+  assert(Def->modifiesRegister(Reg, this));
+
+  return Def;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 5a51b67ca719..b82fefde47e1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -228,6 +228,12 @@ public:
   getConstrainedRegClassForOperand(const MachineOperand &MO,
                                  const MachineRegisterInfo &MRI) const override;
 
+  // Find reaching register definition
+  MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
+                                MachineInstr &Use,
+                                MachineRegisterInfo &MRI,
+                                LiveIntervals *LIS) const;
+
 private:
   void buildSpillLoadStore(MachineBasicBlock::iterator MI,
                            unsigned LoadStoreOp,
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f87a0763b353..c625ecc9b750 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -435,7 +435,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
   let AllocationPriority = 7;
 }
 
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
   let CopyCost = 1;
   let AllocationPriority = 8;
 }
@@ -444,13 +444,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add
   let isAllocatable = 0;
 }
 
-def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
+def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
   (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
   let CopyCost = 1;
   let AllocationPriority = 8;
 }
 
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
   (add SReg_64_XEXEC, EXEC)> {
   let CopyCost = 1;
   let AllocationPriority = 8;
@@ -459,15 +459,15 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1, v4i16, v4f16], 32,
 // Requires 2 s_mov_b64 to copy
 let CopyCost = 2 in {
 
-def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> {
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> {
   let AllocationPriority = 10;
 }
 
-def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> {
   let isAllocatable = 0;
 }
 
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64, v2f64], 32,
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
   (add SGPR_128, TTMP_128)> {
   let AllocationPriority = 10;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4189bcce52ea..6ad7dd0e3a7c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -64,59 +64,6 @@ FunctionPass *llvm::createSIShrinkInstructionsPass() {
   return new SIShrinkInstructions();
 }
 
-static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
-                      const SIRegisterInfo &TRI,
-                      const MachineRegisterInfo &MRI) {
-
-  const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
-  // Can't shrink instruction with three operands.
-  // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
-  // a special case for it.  It can only be shrunk if the third operand
-  // is vcc.  We should handle this the same way we handle vopc, by addding
-  // a register allocation hint pre-regalloc and then do the shrinking
-  // post-regalloc.
-  if (Src2) {
-    switch (MI.getOpcode()) {
-      default: return false;
-
-      case AMDGPU::V_ADDC_U32_e64:
-      case AMDGPU::V_SUBB_U32_e64:
-      case AMDGPU::V_SUBBREV_U32_e64: {
-        const MachineOperand *Src1
-          = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
-        if (!Src1->isReg() || !TRI.isVGPR(MRI, Src1->getReg()))
-          return false;
-        // Additional verification is needed for sdst/src2.
-        return true;
-      }
-      case AMDGPU::V_MAC_F32_e64:
-      case AMDGPU::V_MAC_F16_e64:
-      case AMDGPU::V_FMAC_F32_e64:
-        if (!Src2->isReg() || !TRI.isVGPR(MRI, Src2->getReg()) ||
-            TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
-          return false;
-        break;
-
-      case AMDGPU::V_CNDMASK_B32_e64:
-        break;
-    }
-  }
-
-  const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
-  if (Src1 && (!Src1->isReg() || !TRI.isVGPR(MRI, Src1->getReg()) ||
-               TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)))
-    return false;
-
-  // We don't need to check src0, all input types are legal, so just make sure
-  // src0 isn't using any modifiers.
-  if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
-    return false;
-
-  // Check output modifiers
-  return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
-         !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
-}
-
 /// This function checks \p MI for operands defined by a move immediate
 /// instruction and then folds the literal constant into the instruction if it
 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
@@ -173,19 +120,6 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
   return false;
 }
 
-// Copy MachineOperand with all flags except setting it as implicit.
-static void copyFlagsToImplicitVCC(MachineInstr &MI,
-                                   const MachineOperand &Orig) {
-
-  for (MachineOperand &Use : MI.implicit_operands()) {
-    if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
-      Use.setIsUndef(Orig.isUndef());
-      Use.setIsKill(Orig.isKill());
-      return;
-    }
-  }
-}
-
 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
   return isInt<16>(Src.getImm()) &&
     !TII->isInlineConstant(*Src.getParent(),
@@ -278,6 +212,245 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
   }
 }
 
+/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
+/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
+/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
+/// XNOR (as a ^ b == ~(a ^ ~b)).
+/// \returns true if the caller should continue the machine function iterator
+static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
+                                MachineRegisterInfo &MRI,
+                                const SIInstrInfo *TII,
+                                MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  const MachineOperand *Dest = &MI.getOperand(0);
+  MachineOperand *Src0 = &MI.getOperand(1);
+  MachineOperand *Src1 = &MI.getOperand(2);
+  MachineOperand *SrcReg = Src0;
+  MachineOperand *SrcImm = Src1;
+
+  if (SrcImm->isImm() &&
+      !AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm())) {
+    uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
+    uint32_t NewImm = 0;
+
+    if (Opc == AMDGPU::S_AND_B32) {
+      if (isPowerOf2_32(~Imm)) {
+        NewImm = countTrailingOnes(Imm);
+        Opc = AMDGPU::S_BITSET0_B32;
+      } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+        NewImm = ~Imm;
+        Opc = AMDGPU::S_ANDN2_B32;
+      }
+    } else if (Opc == AMDGPU::S_OR_B32) {
+      if (isPowerOf2_32(Imm)) {
+        NewImm = countTrailingZeros(Imm);
+        Opc = AMDGPU::S_BITSET1_B32;
+      } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+        NewImm = ~Imm;
+        Opc = AMDGPU::S_ORN2_B32;
+      }
+    } else if (Opc == AMDGPU::S_XOR_B32) {
+      if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+        NewImm = ~Imm;
+        Opc = AMDGPU::S_XNOR_B32;
+      }
+    } else {
+      llvm_unreachable("unexpected opcode");
+    }
+
+    if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
+        SrcImm == Src0) {
+      if (!TII->commuteInstruction(MI, false, 1, 2))
+        NewImm = 0;
+    }
+
+    if (NewImm != 0) {
+      if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
+        SrcReg->isReg()) {
+        MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
+        MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+        return true;
+      }
+
+      if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
+        MI.setDesc(TII->get(Opc));
+        if (Opc == AMDGPU::S_BITSET0_B32 ||
+            Opc == AMDGPU::S_BITSET1_B32) {
+          Src0->ChangeToImmediate(NewImm);
+          MI.RemoveOperand(2);
+        } else {
+          SrcImm->setImm(NewImm);
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+// This is the same as MachineInstr::readsRegister/modifiesRegister except
+// it takes subregs into account.
+static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
+                          unsigned Reg, unsigned SubReg,
+                          const SIRegisterInfo &TRI) {
+  for (const MachineOperand &MO : R) {
+    if (!MO.isReg())
+      continue;
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+        TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+      if (TRI.regsOverlap(Reg, MO.getReg()))
+        return true;
+    } else if (MO.getReg() == Reg &&
+               TargetRegisterInfo::isVirtualRegister(Reg)) {
+      LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
+                            TRI.getSubRegIndexLaneMask(MO.getSubReg());
+      if (Overlap.any())
+        return true;
+    }
+  }
+  return false;
+}
+
+static bool instReadsReg(const MachineInstr *MI,
+                         unsigned Reg, unsigned SubReg,
+                         const SIRegisterInfo &TRI) {
+  return instAccessReg(MI->uses(), Reg, SubReg, TRI);
+}
+
+static bool instModifiesReg(const MachineInstr *MI,
+                            unsigned Reg, unsigned SubReg,
+                            const SIRegisterInfo &TRI) {
+  return instAccessReg(MI->defs(), Reg, SubReg, TRI);
+}
+
+static TargetInstrInfo::RegSubRegPair
+getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
+                  const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
+  if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
+    } else {
+      LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
+      Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
+    }
+  }
+  return TargetInstrInfo::RegSubRegPair(Reg, Sub);
+}
+
+// Match:
+// mov t, x
+// mov x, y
+// mov y, t
+//
+// =>
+//
+// mov t, x (t is potentially dead and move eliminated)
+// v_swap_b32 x, y
+//
+// Returns next valid instruction pointer if was able to create v_swap_b32.
+//
+// This shall not be done too early not to prevent possible folding which may
+// remove matched moves, and this should prefereably be done before RA to
+// release saved registers and also possibly after RA which can insert copies
+// too.
+//
+// This is really just a generic peephole that is not a canocical shrinking,
+// although requirements match the pass placement and it reduces code size too.
+static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
+                               const SIInstrInfo *TII) {
+  assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+         MovT.getOpcode() == AMDGPU::COPY);
+
+  unsigned T = MovT.getOperand(0).getReg();
+  unsigned Tsub = MovT.getOperand(0).getSubReg();
+  MachineOperand &Xop = MovT.getOperand(1);
+
+  if (!Xop.isReg())
+    return nullptr;
+  unsigned X = Xop.getReg();
+  unsigned Xsub = Xop.getSubReg();
+
+  unsigned Size = TII->getOpSize(MovT, 0) / 4;
+
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  if (!TRI.isVGPR(MRI, X))
+    return nullptr;
+
+  for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
+    if (YTop.getSubReg() != Tsub)
+      continue;
+
+    MachineInstr &MovY = *YTop.getParent();
+    if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+         MovY.getOpcode() != AMDGPU::COPY) ||
+        MovY.getOperand(1).getSubReg() != Tsub)
+      continue;
+
+    unsigned Y = MovY.getOperand(0).getReg();
+    unsigned Ysub = MovY.getOperand(0).getSubReg();
+
+    if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+      continue;
+
+    MachineInstr *MovX = nullptr;
+    auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
+    for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
+      if (instReadsReg(&*I, X, Xsub, TRI) ||
+          instModifiesReg(&*I, Y, Ysub, TRI) ||
+          instModifiesReg(&*I, T, Tsub, TRI) ||
+          (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
+        MovX = nullptr;
+        break;
+      }
+      if (!instReadsReg(&*I, Y, Ysub, TRI)) {
+        if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
+          MovX = nullptr;
+          break;
+        }
+        continue;
+      }
+      if (MovX ||
+          (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+           I->getOpcode() != AMDGPU::COPY) ||
+          I->getOperand(0).getReg() != X ||
+          I->getOperand(0).getSubReg() != Xsub) {
+        MovX = nullptr;
+        break;
+      }
+      MovX = &*I;
+    }
+
+    if (!MovX || I == E)
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+
+    for (unsigned I = 0; I < Size; ++I) {
+      TargetInstrInfo::RegSubRegPair X1, Y1;
+      X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
+      Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
+      BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
+                TII->get(AMDGPU::V_SWAP_B32))
+        .addDef(X1.Reg, 0, X1.SubReg)
+        .addDef(Y1.Reg, 0, Y1.SubReg)
+        .addReg(Y1.Reg, 0, Y1.SubReg)
+        .addReg(X1.Reg, 0, X1.SubReg).getInstr();
+    }
+    MovX->eraseFromParent();
+    MovY.eraseFromParent();
+    MachineInstr *Next = &*std::next(MovT.getIterator());
+    if (MRI.use_nodbg_empty(T))
+      MovT.eraseFromParent();
+    else
+      Xop.setIsKill(false);
+
+    return Next;
+  }
+
+  return nullptr;
+}
+
 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -285,7 +458,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
 
   std::vector<unsigned> I1Defs;
 
@@ -319,6 +491,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         }
       }
 
+      if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+                           MI.getOpcode() == AMDGPU::COPY)) {
+        if (auto *NextMI = matchSwap(MI, MRI, TII)) {
+          Next = NextMI->getIterator();
+          continue;
+        }
+      }
+
       // Combine adjacent s_nops to use the immediate operand encoding how long
       // to wait.
       //
@@ -408,14 +588,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
+      // Shrink scalar logic operations.
+      if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
+          MI.getOpcode() == AMDGPU::S_OR_B32 ||
+          MI.getOpcode() == AMDGPU::S_XOR_B32) {
+        if (shrinkScalarLogicOp(ST, MRI, TII, MI))
+          continue;
+      }
+
       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
         continue;
 
-      if (!canShrink(MI, TII, TRI, MRI)) {
+      if (!TII->canShrink(MI, MRI)) {
         // Try commuting the instruction and see if that enables us to shrink
         // it.
         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
-            !canShrink(MI, TII, TRI, MRI))
+            !TII->canShrink(MI, MRI))
           continue;
       }
 
@@ -488,40 +676,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       // We can shrink this instruction
       LLVM_DEBUG(dbgs() << "Shrinking " << MI);
 
-      MachineInstrBuilder Inst32 =
-          BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
-
-      // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
-      // For VOPC instructions, this is replaced by an implicit def of vcc.
-      int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
-      if (Op32DstIdx != -1) {
-        // dst
-        Inst32.add(MI.getOperand(0));
-      } else {
-        assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
-               "Unexpected case");
-      }
-
-
-      Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
-
-      const MachineOperand *Src1 =
-          TII->getNamedOperand(MI, AMDGPU::OpName::src1);
-      if (Src1)
-        Inst32.add(*Src1);
-
-      if (Src2) {
-        int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
-        if (Op32Src2Idx != -1) {
-          Inst32.add(*Src2);
-        } else {
-          // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
-          // replaced with an implicit read of vcc. This was already added
-          // during the initial BuildMI, so find it to preserve the flags.
-          copyFlagsToImplicitVCC(*Inst32, *Src2);
-        }
-      }
-
+      MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
       ++NumInstructionsShrunk;
 
       // Copy extra operands not present in the instruction definition.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
index 7485326017b2..8a063e1a4867 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -375,83 +375,6 @@ defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
 }
 
 //===----------------------------------------------------------------------===//
-// Scalar Memory Patterns
-//===----------------------------------------------------------------------===//
-
-
-def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
-  auto Ld = cast<LoadSDNode>(N);
-  return Ld->getAlignment() >= 4  &&
-    ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
-    (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
-     !Ld->isVolatile() && !N->isDivergent() &&
-    static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
-}]>;
-
-def SMRDImm         : ComplexPattern<i64, 2, "SelectSMRDImm">;
-def SMRDImm32       : ComplexPattern<i64, 2, "SelectSMRDImm32">;
-def SMRDSgpr        : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
-def SMRDBufferImm   : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
-def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
-
-multiclass SMRD_Pattern <string Instr, ValueType vt> {
-
-  // 1. IMM offset
-  def : GCNPat <
-    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
-    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
-  >;
-
-  // 2. SGPR offset
-  def : GCNPat <
-    (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
-    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
-  >;
-}
-
-let OtherPredicates = [isSICI] in {
-def : GCNPat <
-  (i64 (readcyclecounter)),
-  (S_MEMTIME)
->;
-}
-
-// Global and constant loads can be selected to either MUBUF or SMRD
-// instructions, but SMRD instructions are faster so we want the instruction
-// selector to prefer those.
-let AddedComplexity = 100 in {
-
-defm : SMRD_Pattern <"S_LOAD_DWORD",    i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX2",  v2i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX4",  v4i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX8",  v8i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
-
-// 1. Offset as an immediate
-def SM_LOAD_PATTERN : GCNPat <  // name this pattern to reuse AddedComplexity on CI
-  (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
-  (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0)
->;
-
-// 2. Offset loaded in an 32bit SGPR
-def : GCNPat <
-  (SIload_constant v4i32:$sbase, i32:$offset),
-  (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0)
->;
-
-} // End let AddedComplexity = 100
-
-let OtherPredicates = [isVI] in {
-
-def : GCNPat <
-  (i64 (readcyclecounter)),
-  (S_MEMREALTIME)
->;
-
-} // let OtherPredicates = [isVI]
-
-
-//===----------------------------------------------------------------------===//
 // Targets
 //===----------------------------------------------------------------------===//
 
@@ -757,25 +680,97 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
 
 def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
 
-let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in {
+//===----------------------------------------------------------------------===//
+// Scalar Memory Patterns
+//===----------------------------------------------------------------------===//
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>;
+
+def SMRDImm         : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32       : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr        : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm   : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
+
+  // 1. IMM offset
+  def : GCNPat <
+    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+  >;
+
+  // 2. 32-bit IMM offset on CI
+  def : GCNPat <
+    (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
+    let OtherPredicates = [isCIOnly];
+  }
 
-class SMRD_Pattern_ci <string Instr, ValueType vt> : GCNPat <
-  (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
-  (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
-  let OtherPredicates = [isCIOnly];
+  // 3. SGPR offset
+  def : GCNPat <
+    (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+  >;
 }
 
-def : SMRD_Pattern_ci <"S_LOAD_DWORD",    i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX2",  v2i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX4",  v4i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX8",  v8i32>;
-def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>;
+multiclass SMLoad_Pattern <string Instr, ValueType vt> {
+  // 1. Offset as an immediate
+  def : GCNPat <
+    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+  >;
 
+  // 2. 32-bit IMM offset on CI
+  def : GCNPat <
+    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
+    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
+    let OtherPredicates = [isCIOnly];
+  }
+
+  // 3. Offset loaded in an 32bit SGPR
+  def : GCNPat <
+    (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+  >;
+}
+
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
+
+defm : SMRD_Pattern <"S_LOAD_DWORD",    i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2",  v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4",  v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8",  v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8i32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16i32>;
+
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8f32>;
+defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16f32>;
+} // End let AddedComplexity = 100
+
+let OtherPredicates = [isSICI] in {
 def : GCNPat <
-  (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
-  (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> {
-  let OtherPredicates = [isCI]; // should this be isCIOnly?
+  (i64 (readcyclecounter)),
+  (S_MEMTIME)
+>;
 }
 
-} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity
+let OtherPredicates = [isVI] in {
 
+def : GCNPat <
+  (i64 (readcyclecounter)),
+  (S_MEMREALTIME)
+>;
+
+} // let OtherPredicates = [isVI]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 6f5db9644c86..ca5e981ac5c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -336,42 +336,54 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
   "$sdst, $src0, $src1", pattern
 >;
 
+class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
+  (ops node:$src0),
+  (Op $src0),
+  [{ return !N->isDivergent(); }]
+>;
+
+class UniformBinFrag<SDPatternOperator Op> : PatFrag <
+  (ops node:$src0, node:$src1),
+  (Op $src0, $src1),
+  [{ return !N->isDivergent(); }]
+>;
+
 let Defs = [SCC] in { // Carry out goes to SCC
 let isCommutable = 1 in {
 def S_ADD_U32 : SOP2_32 <"s_add_u32">;
 def S_ADD_I32 : SOP2_32 <"s_add_i32",
-  [(set i32:$sdst, (add SSrc_b32:$src0, SSrc_b32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<add> SSrc_b32:$src0, SSrc_b32:$src1))]
 >;
 } // End isCommutable = 1
 
 def S_SUB_U32 : SOP2_32 <"s_sub_u32">;
 def S_SUB_I32 : SOP2_32 <"s_sub_i32",
-  [(set i32:$sdst, (sub SSrc_b32:$src0, SSrc_b32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<sub> SSrc_b32:$src0, SSrc_b32:$src1))]
 >;
 
 let Uses = [SCC] in { // Carry in comes from SCC
 let isCommutable = 1 in {
 def S_ADDC_U32 : SOP2_32 <"s_addc_u32",
-  [(set i32:$sdst, (adde (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+  [(set i32:$sdst, (UniformBinFrag<adde> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
 } // End isCommutable = 1
 
 def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
-  [(set i32:$sdst, (sube (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+  [(set i32:$sdst, (UniformBinFrag<sube> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
 } // End Uses = [SCC]
 
 
 let isCommutable = 1 in {
 def S_MIN_I32 : SOP2_32 <"s_min_i32",
-  [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<smin> i32:$src0, i32:$src1))]
 >;
 def S_MIN_U32 : SOP2_32 <"s_min_u32",
-  [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<umin> i32:$src0, i32:$src1))]
 >;
 def S_MAX_I32 : SOP2_32 <"s_max_i32",
-  [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<smax> i32:$src0, i32:$src1))]
 >;
 def S_MAX_U32 : SOP2_32 <"s_max_u32",
-  [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<umax> i32:$src0, i32:$src1))]
 >;
 } // End isCommutable = 1
 } // End Defs = [SCC]
@@ -385,27 +397,27 @@ let Uses = [SCC] in {
 let Defs = [SCC] in {
 let isCommutable = 1 in {
 def S_AND_B32 : SOP2_32 <"s_and_b32",
-  [(set i32:$sdst, (and i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, i32:$src1))]
 >;
 
 def S_AND_B64 : SOP2_64 <"s_and_b64",
-  [(set i64:$sdst, (and i64:$src0, i64:$src1))]
+  [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, i64:$src1))]
 >;
 
 def S_OR_B32 : SOP2_32 <"s_or_b32",
-  [(set i32:$sdst, (or i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, i32:$src1))]
 >;
 
 def S_OR_B64 : SOP2_64 <"s_or_b64",
-  [(set i64:$sdst, (or i64:$src0, i64:$src1))]
+  [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, i64:$src1))]
 >;
 
 def S_XOR_B32 : SOP2_32 <"s_xor_b32",
-  [(set i32:$sdst, (xor i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<xor> i32:$src0, i32:$src1))]
 >;
 
 def S_XOR_B64 : SOP2_64 <"s_xor_b64",
-  [(set i64:$sdst, (xor i64:$src0, i64:$src1))]
+  [(set i64:$sdst, (UniformBinFrag<xor> i64:$src0, i64:$src1))]
 >;
 
 def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
@@ -415,45 +427,71 @@ def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
 def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
   [(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))]
 >;
+
+def S_NAND_B32 : SOP2_32 <"s_nand_b32",
+  [(set i32:$sdst, (not (and_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_NAND_B64 : SOP2_64 <"s_nand_b64",
+  [(set i64:$sdst, (not (and_oneuse i64:$src0, i64:$src1)))]
+>;
+
+def S_NOR_B32 : SOP2_32 <"s_nor_b32",
+  [(set i32:$sdst, (not (or_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_NOR_B64 : SOP2_64 <"s_nor_b64",
+  [(set i64:$sdst, (not (or_oneuse i64:$src0, i64:$src1)))]
+>;
 } // End isCommutable = 1
 
-def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32">;
-def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64">;
-def S_ORN2_B32 : SOP2_32 <"s_orn2_b32">;
-def S_ORN2_B64 : SOP2_64 <"s_orn2_b64">;
-def S_NAND_B32 : SOP2_32 <"s_nand_b32">;
-def S_NAND_B64 : SOP2_64 <"s_nand_b64">;
-def S_NOR_B32 : SOP2_32 <"s_nor_b32">;
-def S_NOR_B64 : SOP2_64 <"s_nor_b64">;
+def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32",
+  [(set i32:$sdst, (UniformBinFrag<and> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+>;
+
+def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64",
+  [(set i64:$sdst, (UniformBinFrag<and> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+>;
+
+def S_ORN2_B32 : SOP2_32 <"s_orn2_b32",
+  [(set i32:$sdst, (UniformBinFrag<or> i32:$src0, (UniformUnaryFrag<not> i32:$src1)))]
+>;
+
+def S_ORN2_B64 : SOP2_64 <"s_orn2_b64",
+  [(set i64:$sdst, (UniformBinFrag<or> i64:$src0, (UniformUnaryFrag<not> i64:$src1)))]
+>;
 } // End Defs = [SCC]
 
 // Use added complexity so these patterns are preferred to the VALU patterns.
 let AddedComplexity = 1 in {
 
 let Defs = [SCC] in {
+// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
-  [(set i32:$sdst, (shl i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
 >;
 def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
-  [(set i64:$sdst, (shl i64:$src0, i32:$src1))]
+  [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
 >;
 def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
-  [(set i32:$sdst, (srl i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
 >;
 def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
-  [(set i64:$sdst, (srl i64:$src0, i32:$src1))]
+  [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
 >;
 def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
-  [(set i32:$sdst, (sra i32:$src0, i32:$src1))]
+  [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
 >;
 def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
-  [(set i64:$sdst, (sra i64:$src0, i32:$src1))]
+  [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
 >;
 } // End Defs = [SCC]
 
 def S_BFM_B32 : SOP2_32 <"s_bfm_b32",
-  [(set i32:$sdst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
+  [(set i32:$sdst, (UniformBinFrag<AMDGPUbfm> i32:$src0, i32:$src1))]>;
 def S_BFM_B64 : SOP2_64_32_32 <"s_bfm_b64">;
+
+// TODO: S_MUL_I32 require V_MUL_LO_I32 from VOP3 change
 def S_MUL_I32 : SOP2_32 <"s_mul_i32",
   [(set i32:$sdst, (mul i32:$src0, i32:$src1))]> {
   let isCommutable = 1;
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4eba19382315..54c866bdc63c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -128,6 +128,49 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
   return NewInfo ? NewInfo->Opcode : -1;
 }
 
+struct MUBUFInfo {
+  uint16_t Opcode;
+  uint16_t BaseOpcode;
+  uint8_t dwords;
+  bool has_vaddr;
+  bool has_srsrc;
+  bool has_soffset;
+};
+
+#define GET_MUBUFInfoTable_DECL
+#define GET_MUBUFInfoTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+int getMUBUFBaseOpcode(unsigned Opc) {
+  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
+  return Info ? Info->BaseOpcode : -1;
+}
+
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
+  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
+  return Info ? Info->Opcode : -1;
+}
+
+int getMUBUFDwords(unsigned Opc) {
+  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+  return Info ? Info->dwords : 0;
+}
+
+bool getMUBUFHasVAddr(unsigned Opc) {
+  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+  return Info ? Info->has_vaddr : false;
+}
+
+bool getMUBUFHasSrsrc(unsigned Opc) {
+  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+  return Info ? Info->has_srsrc : false;
+}
+
+bool getMUBUFHasSoffset(unsigned Opc) {
+  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+  return Info ? Info->has_soffset : false;
+}
+
 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
 // header files, so we need to wrap it in a function that takes unsigned
 // instead.
@@ -137,122 +180,75 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
 
 namespace IsaInfo {
 
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
-  // GCN GFX6 (Southern Islands (SI)).
-  if (Features.test(FeatureISAVersion6_0_0))
-    return {6, 0, 0};
-  if (Features.test(FeatureISAVersion6_0_1))
-    return {6, 0, 1};
-
-  // GCN GFX7 (Sea Islands (CI)).
-  if (Features.test(FeatureISAVersion7_0_0))
-    return {7, 0, 0};
-  if (Features.test(FeatureISAVersion7_0_1))
-    return {7, 0, 1};
-  if (Features.test(FeatureISAVersion7_0_2))
-    return {7, 0, 2};
-  if (Features.test(FeatureISAVersion7_0_3))
-    return {7, 0, 3};
-  if (Features.test(FeatureISAVersion7_0_4))
-    return {7, 0, 4};
-  if (Features.test(FeatureSeaIslands))
-    return {7, 0, 0};
-
-  // GCN GFX8 (Volcanic Islands (VI)).
-  if (Features.test(FeatureISAVersion8_0_1))
-    return {8, 0, 1};
-  if (Features.test(FeatureISAVersion8_0_2))
-    return {8, 0, 2};
-  if (Features.test(FeatureISAVersion8_0_3))
-    return {8, 0, 3};
-  if (Features.test(FeatureISAVersion8_1_0))
-    return {8, 1, 0};
-  if (Features.test(FeatureVolcanicIslands))
-    return {8, 0, 0};
-
-  // GCN GFX9.
-  if (Features.test(FeatureISAVersion9_0_0))
-    return {9, 0, 0};
-  if (Features.test(FeatureISAVersion9_0_2))
-    return {9, 0, 2};
-  if (Features.test(FeatureISAVersion9_0_4))
-    return {9, 0, 4};
-  if (Features.test(FeatureISAVersion9_0_6))
-    return {9, 0, 6};
-  if (Features.test(FeatureGFX9))
-    return {9, 0, 0};
-
-  if (Features.test(FeatureSouthernIslands))
-    return {0, 0, 0};
-  return {7, 0, 0};
-}
-
 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
   auto TargetTriple = STI->getTargetTriple();
-  auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
+  auto Version = getIsaVersion(STI->getCPU());
 
   Stream << TargetTriple.getArchName() << '-'
          << TargetTriple.getVendorName() << '-'
          << TargetTriple.getOSName() << '-'
          << TargetTriple.getEnvironmentName() << '-'
          << "gfx"
-         << ISAVersion.Major
-         << ISAVersion.Minor
-         << ISAVersion.Stepping;
+         << Version.Major
+         << Version.Minor
+         << Version.Stepping;
 
   if (hasXNACK(*STI))
     Stream << "+xnack";
+  if (hasSRAMECC(*STI))
+    Stream << "+sram-ecc";
 
   Stream.flush();
 }
 
 bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
-  return STI->getFeatureBits().test(FeatureCodeObjectV3);
+  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
+             STI->getFeatureBits().test(FeatureCodeObjectV3);
 }
 
-unsigned getWavefrontSize(const FeatureBitset &Features) {
-  if (Features.test(FeatureWavefrontSize16))
+unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
+  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
     return 16;
-  if (Features.test(FeatureWavefrontSize32))
+  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
     return 32;
 
   return 64;
 }
 
-unsigned getLocalMemorySize(const FeatureBitset &Features) {
-  if (Features.test(FeatureLocalMemorySize32768))
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
+  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
     return 32768;
-  if (Features.test(FeatureLocalMemorySize65536))
+  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
     return 65536;
 
   return 0;
 }
 
-unsigned getEUsPerCU(const FeatureBitset &Features) {
+unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
   return 4;
 }
 
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
                                unsigned FlatWorkGroupSize) {
-  if (!Features.test(FeatureGCN))
+  if (!STI->getFeatureBits().test(FeatureGCN))
     return 8;
-  unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
   if (N == 1)
     return 40;
   N = 40 / N;
   return std::min(N, 16u);
 }
 
-unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
-  return getMaxWavesPerEU() * getEUsPerCU(Features);
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
+  return getMaxWavesPerEU() * getEUsPerCU(STI);
 }
 
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
                           unsigned FlatWorkGroupSize) {
-  return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
 }
 
-unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
   return 1;
 }
 
@@ -261,89 +257,89 @@ unsigned getMaxWavesPerEU() {
   return 10;
 }
 
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
                           unsigned FlatWorkGroupSize) {
-  return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
-                 getEUsPerCU(Features)) / getEUsPerCU(Features);
+  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
+                 getEUsPerCU(STI)) / getEUsPerCU(STI);
 }
 
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
   return 1;
 }
 
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
   return 2048;
 }
 
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
                               unsigned FlatWorkGroupSize) {
-  return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
-                 getWavefrontSize(Features);
+  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
+                 getWavefrontSize(STI);
 }
 
-unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
-  IsaVersion Version = getIsaVersion(Features);
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
+  IsaVersion Version = getIsaVersion(STI->getCPU());
   if (Version.Major >= 8)
     return 16;
   return 8;
 }
 
-unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
   return 8;
 }
 
-unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
-  IsaVersion Version = getIsaVersion(Features);
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
+  IsaVersion Version = getIsaVersion(STI->getCPU());
   if (Version.Major >= 8)
     return 800;
   return 512;
 }
 
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
-  if (Features.test(FeatureSGPRInitBug))
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
+  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 
-  IsaVersion Version = getIsaVersion(Features);
+  IsaVersion Version = getIsaVersion(STI->getCPU());
   if (Version.Major >= 8)
     return 102;
   return 104;
 }
 
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
   assert(WavesPerEU != 0);
 
   if (WavesPerEU >= getMaxWavesPerEU())
     return 0;
 
-  unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
-  if (Features.test(FeatureTrapHandler))
+  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
+  if (STI->getFeatureBits().test(FeatureTrapHandler))
     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
-  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
-  return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
+  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
 }
 
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
                         bool Addressable) {
   assert(WavesPerEU != 0);
 
-  IsaVersion Version = getIsaVersion(Features);
-  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+  IsaVersion Version = getIsaVersion(STI->getCPU());
+  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
   if (Version.Major >= 8 && !Addressable)
     AddressableNumSGPRs = 112;
-  unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
-  if (Features.test(FeatureTrapHandler))
+  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
+  if (STI->getFeatureBits().test(FeatureTrapHandler))
     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
-  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
+  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 }
 
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
                           bool FlatScrUsed, bool XNACKUsed) {
   unsigned ExtraSGPRs = 0;
   if (VCCUsed)
     ExtraSGPRs = 2;
 
-  IsaVersion Version = getIsaVersion(Features);
+  IsaVersion Version = getIsaVersion(STI->getCPU());
   if (Version.Major < 8) {
     if (FlatScrUsed)
       ExtraSGPRs = 4;
@@ -358,74 +354,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
   return ExtraSGPRs;
 }
 
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
                           bool FlatScrUsed) {
-  return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
-                          Features[AMDGPU::FeatureXNACK]);
+  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
+                          STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
 }
 
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
-  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
+  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
   // SGPRBlocks is actual number of SGPR blocks minus 1.
-  return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
 }
 
-unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
   return 4;
 }
 
-unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
-  return getVGPRAllocGranule(Features);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
+  return getVGPRAllocGranule(STI);
 }
 
-unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
   return 256;
 }
 
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
-  return getTotalNumVGPRs(Features);
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+  return getTotalNumVGPRs(STI);
 }
 
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
   assert(WavesPerEU != 0);
 
   if (WavesPerEU >= getMaxWavesPerEU())
     return 0;
   unsigned MinNumVGPRs =
-      alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
-                getVGPRAllocGranule(Features)) + 1;
-  return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+      alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
+                getVGPRAllocGranule(STI)) + 1;
+  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
 }
 
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
   assert(WavesPerEU != 0);
 
-  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
-                                   getVGPRAllocGranule(Features));
-  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
+                                   getVGPRAllocGranule(STI));
+  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 }
 
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
-  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
+  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
   // VGPRBlocks is actual number of VGPR blocks minus 1.
-  return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
 }
 
 } // end namespace IsaInfo
 
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
-                               const FeatureBitset &Features) {
-  IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
+                               const MCSubtargetInfo *STI) {
+  IsaVersion Version = getIsaVersion(STI->getCPU());
 
   memset(&Header, 0, sizeof(Header));
 
   Header.amd_kernel_code_version_major = 1;
   Header.amd_kernel_code_version_minor = 2;
   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
-  Header.amd_machine_version_major = ISA.Major;
-  Header.amd_machine_version_minor = ISA.Minor;
-  Header.amd_machine_version_stepping = ISA.Stepping;
+  Header.amd_machine_version_major = Version.Major;
+  Header.amd_machine_version_minor = Version.Minor;
+  Header.amd_machine_version_stepping = Version.Stepping;
   Header.kernel_code_entry_byte_offset = sizeof(Header);
   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
   Header.wavefront_size = 6;
@@ -513,7 +509,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
   return Ints;
 }
 
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getVmcntBitMask(const IsaVersion &Version) {
   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
   if (Version.Major < 9)
     return VmcntLo;
@@ -522,15 +518,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
   return VmcntLo | VmcntHi;
 }
 
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getExpcntBitMask(const IsaVersion &Version) {
   return (1 << getExpcntBitWidth()) - 1;
 }
 
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getLgkmcntBitMask(const IsaVersion &Version) {
   return (1 << getLgkmcntBitWidth()) - 1;
 }
 
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getWaitcntBitMask(const IsaVersion &Version) {
   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
@@ -542,7 +538,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
   return Waitcnt | VmcntHi;
 }
 
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
   unsigned VmcntLo =
       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
   if (Version.Major < 9)
@@ -554,22 +550,30 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
   return VmcntLo | VmcntHi;
 }
 
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 }
 
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 }
 
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
   Vmcnt = decodeVmcnt(Version, Waitcnt);
   Expcnt = decodeExpcnt(Version, Waitcnt);
   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 }
 
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
+  Waitcnt Decoded;
+  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
+  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
+  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
+  return Decoded;
+}
+
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
                      unsigned Vmcnt) {
   Waitcnt =
       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
@@ -580,17 +584,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 }
 
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
                       unsigned Expcnt) {
   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 }
 
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
                        unsigned Lgkmcnt) {
   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 }
 
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
   unsigned Waitcnt = getWaitcntBitMask(Version);
   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -599,6 +603,10 @@ unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
   return Waitcnt;
 }
 
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
+  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
+}
+
 unsigned getInitialPSInputAddr(const Function &F) {
   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 }
@@ -643,6 +651,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) {
   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
 }
 
+bool hasSRAMECC(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
+}
+
 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
 }
@@ -798,6 +810,7 @@ unsigned getRegBitWidth(unsigned RCID) {
   case AMDGPU::VS_64RegClassID:
   case AMDGPU::SReg_64RegClassID:
   case AMDGPU::VReg_64RegClassID:
+  case AMDGPU::SReg_64_XEXECRegClassID:
     return 64;
   case AMDGPU::VReg_96RegClassID:
     return 96;
@@ -935,27 +948,50 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
 }
 
-} // end namespace AMDGPU
-
-} // end namespace llvm
-
-namespace llvm {
-namespace AMDGPU {
-
-AMDGPUAS getAMDGPUAS(Triple T) {
-  AMDGPUAS AS;
-  AS.FLAT_ADDRESS = 0;
-  AS.PRIVATE_ADDRESS = 5;
-  AS.REGION_ADDRESS = 2;
-  return AS;
-}
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+                      const GCNSubtarget *Subtarget, uint32_t Align) {
+  const uint32_t MaxImm = alignDown(4095, Align);
+  uint32_t Overflow = 0;
+
+  if (Imm > MaxImm) {
+    if (Imm <= MaxImm + 64) {
+      // Use an SOffset inline constant for 4..64
+      Overflow = Imm - MaxImm;
+      Imm = MaxImm;
+    } else {
+      // Try to keep the same value in SOffset for adjacent loads, so that
+      // the corresponding register contents can be re-used.
+      //
+      // Load values with all low-bits (except for alignment bits) set into
+      // SOffset, so that a larger range of values can be covered using
+      // s_movk_i32.
+      //
+      // Atomic operations fail to work correctly when individual address
+      // components are unaligned, even if their sum is aligned.
+      uint32_t High = (Imm + Align) & ~4095;
+      uint32_t Low = (Imm + Align) & 4095;
+      Imm = Low;
+      Overflow = High - Align;
+    }
+  }
 
-AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
-  return getAMDGPUAS(M.getTargetTriple());
-}
+  // There is a hardware bug in SI and CI which prevents address clamping in
+  // MUBUF instructions from working correctly with SOffsets. The immediate
+  // offset is unaffected.
+  if (Overflow > 0 &&
+      Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+    return false;
 
-AMDGPUAS getAMDGPUAS(const Module &M) {
-  return getAMDGPUAS(Triple(M.getTargetTriple()));
+  ImmOffset = Imm;
+  SOffset = Overflow;
+  return true;
 }
 
 namespace {
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 5b7af8268cda..20123ed4ac81 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -19,6 +19,7 @@
 #include "llvm/Support/AMDHSAKernelDescriptor.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
 #include <cstdint>
 #include <string>
 #include <utility>
@@ -26,8 +27,10 @@
 namespace llvm {
 
 class Argument;
+class AMDGPUSubtarget;
 class FeatureBitset;
 class Function;
+class GCNSubtarget;
 class GlobalValue;
 class MCContext;
 class MCRegisterClass;
@@ -54,16 +57,6 @@ enum {
   TRAP_NUM_SGPRS = 16
 };
 
-/// Instruction set architecture version.
-struct IsaVersion {
-  unsigned Major;
-  unsigned Minor;
-  unsigned Stepping;
-};
-
-/// \returns Isa version for given subtarget \p Features.
-IsaVersion getIsaVersion(const FeatureBitset &Features);
-
 /// Streams isa version string for given subtarget \p STI into \p Stream.
 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
 
@@ -71,114 +64,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
 /// false otherwise.
 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
 
-/// \returns Wavefront size for given subtarget \p Features.
-unsigned getWavefrontSize(const FeatureBitset &Features);
+/// \returns Wavefront size for given subtarget \p STI.
+unsigned getWavefrontSize(const MCSubtargetInfo *STI);
 
-/// \returns Local memory size in bytes for given subtarget \p Features.
-unsigned getLocalMemorySize(const FeatureBitset &Features);
+/// \returns Local memory size in bytes for given subtarget \p STI.
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
 
 /// \returns Number of execution units per compute unit for given subtarget \p
-/// Features.
-unsigned getEUsPerCU(const FeatureBitset &Features);
+/// STI.
+unsigned getEUsPerCU(const MCSubtargetInfo *STI);
 
 /// \returns Maximum number of work groups per compute unit for given subtarget
-/// \p Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+/// \p STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
                                unsigned FlatWorkGroupSize);
 
 /// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features without any kind of limitation.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+/// STI without any kind of limitation.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
 
 /// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
                           unsigned FlatWorkGroupSize);
 
 /// \returns Minimum number of waves per execution unit for given subtarget \p
-/// Features.
-unsigned getMinWavesPerEU(const FeatureBitset &Features);
+/// STI.
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
 
 /// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features without any kind of limitation.
+/// STI without any kind of limitation.
 unsigned getMaxWavesPerEU();
 
 /// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
                           unsigned FlatWorkGroupSize);
 
-/// \returns Minimum flat work group size for given subtarget \p Features.
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Minimum flat work group size for given subtarget \p STI.
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
 
-/// \returns Maximum flat work group size for given subtarget \p Features.
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Maximum flat work group size for given subtarget \p STI.
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
 
-/// \returns Number of waves per work group for given subtarget \p Features and
+/// \returns Number of waves per work group for given subtarget \p STI and
 /// limited by given \p FlatWorkGroupSize.
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
                               unsigned FlatWorkGroupSize);
 
-/// \returns SGPR allocation granularity for given subtarget \p Features.
-unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+/// \returns SGPR allocation granularity for given subtarget \p STI.
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
 
-/// \returns SGPR encoding granularity for given subtarget \p Features.
-unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+/// \returns SGPR encoding granularity for given subtarget \p STI.
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
 
-/// \returns Total number of SGPRs for given subtarget \p Features.
-unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+/// \returns Total number of SGPRs for given subtarget \p STI.
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
 
-/// \returns Addressable number of SGPRs for given subtarget \p Features.
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of SGPRs for given subtarget \p STI.
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
 
 /// \returns Minimum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 
 /// \returns Maximum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
                         bool Addressable);
 
 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
                           bool FlatScrUsed, bool XNACKUsed);
 
 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used. XNACK is inferred from
-/// \p Features.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used. XNACK is inferred from
+/// \p STI.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
                           bool FlatScrUsed);
 
-/// \returns Number of SGPR blocks needed for given subtarget \p Features when
+/// \returns Number of SGPR blocks needed for given subtarget \p STI when
 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
 /// register counts.
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
 
-/// \returns VGPR allocation granularity for given subtarget \p Features.
-unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+/// \returns VGPR allocation granularity for given subtarget \p STI.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
 
-/// \returns VGPR encoding granularity for given subtarget \p Features.
-unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+/// \returns VGPR encoding granularity for given subtarget \p STI.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
 
-/// \returns Total number of VGPRs for given subtarget \p Features.
-unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+/// \returns Total number of VGPRs for given subtarget \p STI.
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
 
-/// \returns Addressable number of VGPRs for given subtarget \p Features.
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of VGPRs for given subtarget \p STI.
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
 
 /// \returns Minimum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 
 /// \returns Maximum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 
-/// \returns Number of VGPR blocks needed for given subtarget \p Features when
+/// \returns Number of VGPR blocks needed for given subtarget \p STI when
 /// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
 
 } // end namespace IsaInfo
 
@@ -191,6 +184,7 @@ struct MIMGBaseOpcodeInfo {
   bool Atomic;
   bool AtomicX2;
   bool Sampler;
+  bool Gather4;
 
   uint8_t NumExtraArgs;
   bool Gradients;
@@ -228,10 +222,28 @@ LLVM_READONLY
 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
 
 LLVM_READONLY
+int getMUBUFBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
+
+LLVM_READONLY
+int getMUBUFDwords(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasVAddr(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSrsrc(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSoffset(unsigned Opc);
+
+LLVM_READONLY
 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
-                               const FeatureBitset &Features);
+                               const MCSubtargetInfo *STI);
 
 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
 
@@ -265,26 +277,52 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
                                             std::pair<int, int> Default,
                                             bool OnlyFirstRequired = false);
 
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct Waitcnt {
+  unsigned VmCnt = ~0u;
+  unsigned ExpCnt = ~0u;
+  unsigned LgkmCnt = ~0u;
+
+  Waitcnt() {}
+  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
+      : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+
+  static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+
+  bool dominates(const Waitcnt &Other) const {
+    return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
+           LgkmCnt <= Other.LgkmCnt;
+  }
+
+  Waitcnt combined(const Waitcnt &Other) const {
+    return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
+                   std::min(LgkmCnt, Other.LgkmCnt));
+  }
+};
+
 /// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getVmcntBitMask(const IsaVersion &Version);
 
 /// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getExpcntBitMask(const IsaVersion &Version);
 
 /// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
 
 /// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getWaitcntBitMask(const IsaVersion &Version);
 
 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
 
 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
 
 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
 
 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -295,19 +333,21 @@ unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
 ///     \p Expcnt = \p Waitcnt[6:4]
 ///     \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
 
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
                      unsigned Vmcnt);
 
 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
                       unsigned Expcnt);
 
 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
                        unsigned Lgkmcnt);
 
 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
@@ -322,9 +362,11 @@ unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 ///
 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
 /// isa \p Version.
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
 
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
 unsigned getInitialPSInputAddr(const Function &F);
 
 LLVM_READNONE
@@ -349,6 +391,7 @@ inline bool isKernel(CallingConv::ID CC) {
 }
 
 bool hasXNACK(const MCSubtargetInfo &STI);
+bool hasSRAMECC(const MCSubtargetInfo &STI);
 bool hasMIMG_R128(const MCSubtargetInfo &STI);
 bool hasPackedD16(const MCSubtargetInfo &STI);
 
@@ -447,6 +490,9 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 /// not the encoded offset.
 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+                      const GCNSubtarget *Subtarget, uint32_t Align = 4);
+
 /// \returns true if the intrinsic is divergent
 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp
deleted file mode 100644
index 1924f71f11c8..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- AMDGPULaneDominator.cpp - Determine Lane Dominators ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// MBB A lane-dominates MBB B if
-// 1. A dominates B in the usual sense, i.e. every path from the entry to B
-//    goes through A, and
-// 2. whenever B executes, every active lane during that execution of B was
-//    also active during the most recent execution of A.
-//
-// The simplest example where A dominates B but does not lane-dominate it is
-// where A is a loop:
-//
-//     |
-//     +--+
-//     A  |
-//     +--+
-//     |
-//     B
-//
-// Unfortunately, the second condition is not fully captured by the control
-// flow graph when it is unstructured (as may happen when branch conditions are
-// uniform).
-//
-// The following replacement of the second condition is a conservative
-// approximation. It is an equivalent condition when the CFG is fully
-// structured:
-//
-// 2'. every cycle in the CFG that contains A also contains B.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPULaneDominator.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-
-namespace llvm {
-
-namespace AMDGPU {
-
-// Given machine basic blocks A and B where A dominates B, check whether
-// A lane-dominates B.
-//
-// The check is conservative, i.e. there can be false-negatives.
-bool laneDominates(MachineBasicBlock *A, MachineBasicBlock *B) {
-  // Check whether A is reachable from itself without going through B.
-  DenseSet<MachineBasicBlock *> Reachable;
-  SmallVector<MachineBasicBlock *, 8> Stack;
-
-  Stack.push_back(A);
-  do {
-    MachineBasicBlock *MBB = Stack.back();
-    Stack.pop_back();
-
-    for (MachineBasicBlock *Succ : MBB->successors()) {
-      if (Succ == A)
-        return false;
-      if (Succ != B && Reachable.insert(Succ).second)
-        Stack.push_back(Succ);
-    }
-  } while (!Stack.empty());
-
-  return true;
-}
-
-} // namespace AMDGPU
-
-} // namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h
deleted file mode 100644
index 4f33a89a364b..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPULaneDominator.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- AMDGPULaneDominator.h ------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
-#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
-
-namespace llvm {
-
-class MachineBasicBlock;
-
-namespace AMDGPU {
-
-bool laneDominates(MachineBasicBlock *MBBA, MachineBasicBlock *MBBB);
-
-} // end namespace AMDGPU
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEDOMINATOR_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 9f0a4d29b5e4..82ffdef8e674 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -46,6 +46,7 @@
    int64_t Value = 0; \
    if (!expectAbsExpression(MCParser, Value, Err)) \
      return false; \
+   C.compute_pgm_resource_registers &= ~(SetMacro(0xFFFFFFFFFFFFFFFFULL) << Shift); \
    C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
    return true; \
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 4c7a92219755..68446ab79720 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -84,6 +84,10 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
   let AsmMatchConverter = "cvtSdwaVOP1";
 }
 
+class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+  VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
 class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
   list<dag> ret =
     !if(P.HasModifiers,
@@ -103,6 +107,8 @@ multiclass VOP1Inst <string opName, VOPProfile P,
   def _e32 : VOP1_Pseudo <opName, P>;
   def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
   def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+  foreach _ = BoolToList<P.HasExtDPP>.ret in
+    def _dpp : VOP1_DPP_Pseudo <opName, P>;
 }
 
 // Special profile for instructions which have clamp
@@ -173,7 +179,9 @@ defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
 defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
 defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
 defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
+let FPDPRounding = 1 in {
 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+} // End FPDPRounding = 1
 defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
 defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
 defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
@@ -226,7 +234,9 @@ defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
 let SchedRW = [WriteDoubleAdd] in {
 defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
 defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
+let FPDPRounding = 1 in {
 defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
+} // End FPDPRounding = 1
 } // End SchedRW = [WriteDoubleAdd]
 
 defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
@@ -242,7 +252,9 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
   let Src0RC64 = VRegSrc_32;
 
   let HasExt = 0;
-  let HasSDWA9 = 0;
+  let HasExtDPP = 0;
+  let HasExtSDWA = 0;
+  let HasExtSDWA9 = 0;
 }
 
 // Special case because there are no true output operands.  Hack vdst
@@ -271,7 +283,10 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
   let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
 
   let HasExt = 0;
-  let HasSDWA9 = 0;
+  let HasExtDPP = 0;
+  let HasExtSDWA = 0;
+  let HasExtSDWA9 = 0;
+
   let HasDst = 0;
   let EmitDst = 1; // force vdst emission
 }
@@ -328,8 +343,10 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
 
 let SubtargetPredicate = Has16BitInsts in {
 
+let FPDPRounding = 1 in {
 defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
 defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
+} // End FPDPRounding = 1
 defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
 defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
 let SchedRW = [WriteQuarterRate32] in {
@@ -347,7 +364,9 @@ defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
 defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;
 defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;
 defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;
+let FPDPRounding = 1 in {
 defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
+} // End FPDPRounding = 1
 
 }
 
@@ -495,13 +514,8 @@ defm V_EXP_LEGACY_F32    : VOP1_Real_ci <0x46>;
 // VI
 //===----------------------------------------------------------------------===//
 
-class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
-  VOP_DPP <ps.OpName, P> {
-  let Defs = ps.Defs;
-  let Uses = ps.Uses;
-  let SchedRW = ps.SchedRW;
-  let hasSideEffects = ps.hasSideEffects;
-
+class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+  VOP_DPPe <P> {
   bits<8> vdst;
   let Inst{8-0}   = 0xfa; // dpp
   let Inst{16-9}  = op;
@@ -539,9 +553,10 @@ multiclass VOP1_Real_vi <bits<10> op> {
     VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
     VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
 
-  // For now left dpp only for asm/dasm
-  // TODO: add corresponding pseudo
-  def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+  foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+    def _dpp_vi :
+      VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+      VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
 }
 
 defm V_NOP               : VOP1_Real_vi <0x0>;
@@ -712,9 +727,11 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
     VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
     VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
 
-  // For now left dpp only for asm/dasm
-  // TODO: add corresponding pseudo
-  def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+  foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+    def _dpp_gfx9 :
+      VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+      VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+
 }
 
 defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 5ec1a15c5cd2..e3fd7b5f9fad 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -105,6 +105,11 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
   let AsmMatchConverter = "cvtSdwaVOP2";
 }
 
+class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+  VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
+
 class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
   list<dag> ret = !if(P.HasModifiers,
     [(set P.DstVT:$vdst,
@@ -116,22 +121,49 @@ class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
     [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
 }
 
-multiclass VOP2Inst <string opName,
-                     VOPProfile P,
-                     SDPatternOperator node = null_frag,
-                     string revOp = opName,
-                     bit GFX9Renamed = 0> {
-
+multiclass VOP2Inst_e32<string opName,
+                        VOPProfile P,
+                        SDPatternOperator node = null_frag,
+                        string revOp = opName,
+                        bit GFX9Renamed = 0> {
   let renamedInGFX9 = GFX9Renamed in {
-
-    def _e32 : VOP2_Pseudo <opName, P>,
+    def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
                Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+  } // End renamedInGFX9 = GFX9Renamed
+}
 
+multiclass VOP2Inst_e64<string opName,
+                        VOPProfile P,
+                        SDPatternOperator node = null_frag,
+                        string revOp = opName,
+                        bit GFX9Renamed = 0> {
+  let renamedInGFX9 = GFX9Renamed in {
     def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
                Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+  } // End renamedInGFX9 = GFX9Renamed
+}
 
-    def _sdwa  : VOP2_SDWA_Pseudo <opName, P>;
+multiclass VOP2Inst_sdwa<string opName,
+                         VOPProfile P,
+                         SDPatternOperator node = null_frag,
+                         string revOp = opName,
+                         bit GFX9Renamed = 0> {
+  let renamedInGFX9 = GFX9Renamed in {
+    def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
+  } // End renamedInGFX9 = GFX9Renamed
+}
 
+multiclass VOP2Inst<string opName,
+                    VOPProfile P,
+                    SDPatternOperator node = null_frag,
+                    string revOp = opName,
+                    bit GFX9Renamed = 0> :
+    VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
+    VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
+    VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> {
+  let renamedInGFX9 = GFX9Renamed in {
+    foreach _ = BoolToList<P.HasExtDPP>.ret in
+      def _dpp  : VOP2_DPP_Pseudo <opName, P>;
   }
 }
 
@@ -144,12 +176,14 @@ multiclass VOP2bInst <string opName,
   let renamedInGFX9 = GFX9Renamed in {
     let SchedRW = [Write32Bit, WriteSALU] in {
       let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
-        def _e32 : VOP2_Pseudo <opName, P>,
+        def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
                    Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
 
         def _sdwa  : VOP2_SDWA_Pseudo <opName, P> {
           let AsmMatchConverter = "cvtSdwaVOP2b";
         }
+        foreach _ = BoolToList<P.HasExtDPP>.ret in
+          def _dpp  : VOP2_DPP_Pseudo <opName, P>;
       }
 
       def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -172,6 +206,9 @@ multiclass VOP2eInst <string opName,
       def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
         let AsmMatchConverter = "cvtSdwaVOP2b";
       }
+
+      foreach _ = BoolToList<P.HasExtDPP>.ret in
+        def _dpp  : VOP2_DPP_Pseudo <opName, P>;
     }
 
     def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -211,9 +248,9 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
   let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
                        0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
-  let InsDPP = (ins DstRCDPP:$old,
-                    Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+  let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+                    VGPR_32:$src2, // stub argument
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
 
@@ -230,21 +267,15 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
   let HasSrc2 = 0;
   let HasSrc2Mods = 0;
-  let HasExt = 1;
-  let HasSDWA9 = 0;
-}
 
-def VOP_MAC_F16 : VOP_MAC <f16> {
-  // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
-  // 'not a string initializer' error.
-  let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f16>.ret;
+  let HasExt = 1;
+  let HasExtDPP = 1;
+  let HasExtSDWA = 1;
+  let HasExtSDWA9 = 0;
 }
 
-def VOP_MAC_F32 : VOP_MAC <f32> {
-  // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
-  // 'not a string initializer' error.
-  let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f32>.ret;
-}
+def VOP_MAC_F16 : VOP_MAC <f16>;
+def VOP_MAC_F32 : VOP_MAC <f32>;
 
 // Write out to vcc or arbitrary SGPR.
 def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
@@ -290,7 +321,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
   let HasExt = 1;
-  let HasSDWA9 = 1;
+  let HasExtDPP = 1;
+  let HasExtSDWA = 1;
+  let HasExtSDWA9 = 1;
 }
 
 // Read in from vcc or arbitrary SGPR
@@ -321,7 +354,9 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
   let HasExt = 1;
-  let HasSDWA9 = 1;
+  let HasExtDPP = 1;
+  let HasExtSDWA = 1;
+  let HasExtSDWA9 = 1;
 }
 
 def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
@@ -331,8 +366,11 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
   let Ins64 = Ins32;
   let Asm32 = " $vdst, $src0, $src1";
   let Asm64 = Asm32;
+
   let HasExt = 0;
-  let HasSDWA9 = 0;
+  let HasExtDPP = 0;
+  let HasExtSDWA = 0;
+  let HasExtSDWA9 = 0;
 }
 
 def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
@@ -342,20 +380,23 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
   let Ins64 = Ins32;
   let Asm32 = " $vdst, $src0, $src1";
   let Asm64 = Asm32;
-  let HasExt = 0;
-  let HasSDWA9 = 0;
   let HasSrc2 = 0;
   let HasSrc2Mods = 0;
+
+  let HasExt = 0;
+  let HasExtDPP = 0;
+  let HasExtSDWA = 0;
+  let HasExtSDWA9 = 0;
 }
 
 //===----------------------------------------------------------------------===//
 // VOP2 Instructions
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isGCN in {
+let SubtargetPredicate = isGCN, Predicates = [isGCN] in {
 
 defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
-def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, [], "">;
+def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
 
 let isCommutable = 1 in {
 defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
@@ -363,29 +404,29 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
 defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
 defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
 defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
-defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
-defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
-defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
-defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
-defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
-defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
-defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
-defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
-defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
+defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
+defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
+defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
+defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
+defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
+defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
 defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
 defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
 defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
-defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
-defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
-defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
+defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
+defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
+defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
 
 let Constraints = "$vdst = $src2", DisableEncoding="$src2",
     isConvertibleToThreeAddress = 1 in {
 defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
 }
 
-def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, [], "">;
+def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
 
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
@@ -411,11 +452,11 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub
 // These are special and do not read the exec mask.
 let isConvergent = 1, Uses = []<Register> in {
 def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
-  [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
+  [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
 
 let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
 def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
-  [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))], "">;
+  [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
 } // End $vdst = $vdst_in, DisableEncoding $vdst_in
 } // End isConvergent = 1
 
@@ -425,13 +466,13 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
 defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
 defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
-defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_i16_f32>;
-defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_u16_f32>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>;
-defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_u16_u32>;
-defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_i16_i32>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
 
-} // End SubtargetPredicate = isGCN
+} // End SubtargetPredicate = isGCN, Predicates = [isGCN]
 
 def : GCNPat<
     (AMDGPUadde i32:$src0, i32:$src1, i1:$src2),
@@ -444,40 +485,99 @@ def : GCNPat<
 >;
 
 // These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
 
 defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
 defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
 
 let isCommutable = 1 in {
 defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
 } // End isCommutable = 1
 
-} // End let SubtargetPredicate = SICI
+} // End let SubtargetPredicate = SICI, Predicates = [isSICI]
+
+class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
+  GCNPat<
+      (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
+      !if(!cast<Commutable_REV>(Inst).IsOrig,
+        (Inst $src0, $src1),
+        (Inst $src1, $src0)
+      )
+  >;
+
+let AddedComplexity = 1 in {
+  def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
+  def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
+  def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
+}
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+  def : DivergentBinOp<add, V_ADD_U32_e32>;
+  def : DivergentBinOp<sub, V_SUB_U32_e32>;
+  def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+}
+
+
+def : DivergentBinOp<add, V_ADD_I32_e32>;
+
+def : DivergentBinOp<add, V_ADD_I32_e64>;
+def : DivergentBinOp<sub, V_SUB_I32_e32>;
+
+def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
+
+def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
+def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
+def : DivergentBinOp<shl, V_LSHLREV_B32_e32>;
+def : DivergentBinOp<adde, V_ADDC_U32_e32>;
+def : DivergentBinOp<sube, V_SUBB_U32_e32>;
+
+class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
+  GCNPat<
+      (getDivergentFrag<Op>.ret i64:$src0, i64:$src1),
+      (REG_SEQUENCE VReg_64,
+        (Inst
+          (i32 (EXTRACT_SUBREG $src0, sub0)),
+          (i32 (EXTRACT_SUBREG $src1, sub0))
+        ), sub0,
+        (Inst
+          (i32 (EXTRACT_SUBREG $src0, sub1)),
+          (i32 (EXTRACT_SUBREG $src1, sub1))
+        ), sub1
+      )
+  >;
+
+def :  divergent_i64_BinOp <and, V_AND_B32_e32>;
+def :  divergent_i64_BinOp <or,  V_OR_B32_e32>;
+def :  divergent_i64_BinOp <xor, V_XOR_B32_e32>;
 
 let SubtargetPredicate = Has16BitInsts in {
 
+let FPDPRounding = 1 in {
 def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
+defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
+} // End FPDPRounding = 1
+
 defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
 defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
 defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>;
-defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
 
 let isCommutable = 1 in {
+let FPDPRounding = 1 in {
 defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
 defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
 defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
 defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
 def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
+} // End FPDPRounding = 1
 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
 defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
-defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>;
-defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>;
+defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
+defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
 defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
 defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
 defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
@@ -698,13 +798,8 @@ defm V_CVT_PK_I16_I32     : VOP2_Real_e32e64_si <0x31>;
 // VI
 //===----------------------------------------------------------------------===//
 
-class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> :
-  VOP_DPP <OpName, P> {
-  let Defs = ps.Defs;
-  let Uses = ps.Uses;
-  let SchedRW = ps.SchedRW;
-  let hasSideEffects = ps.hasSideEffects;
-
+class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+  VOP_DPPe <P> {
   bits<8> vdst;
   bits<8> src1;
   let Inst{8-0}   = 0xfa; //dpp
@@ -716,12 +811,6 @@ class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfil
 
 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
 
-multiclass VOP32_Real_vi <bits<10> op> {
-  def _vi :
-    VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
-    VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
-}
-
 multiclass VOP2_Real_MADK_vi <bits<6> op> {
   def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
             VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
@@ -791,8 +880,13 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
       VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
       let AsmString = AsmName # ps.AsmOperands;
     }
-  def _dpp :
-    VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName>;
+  foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+    def _dpp_vi :
+      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
+      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+        VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+        let AsmString = AsmName # ps.AsmOperands;
+      }
 }
 }
 
@@ -819,10 +913,14 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
       VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
       let AsmString = AsmName # ps.AsmOperands;
     }
-  def _dpp_gfx9 :
-    VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName> {
-      let DecoderNamespace = "SDWA9";
-    }
+  foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+    def _dpp_gfx9 :
+      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
+      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+        VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+        let AsmString = AsmName # ps.AsmOperands;
+        let DecoderNamespace = "SDWA9";
+      }
 }
 
 multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
@@ -840,19 +938,23 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
     VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
     VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
     }
-  def _dpp_gfx9 :
-    VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
-      let DecoderNamespace = "SDWA9";
-    }
+  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+    def _dpp_gfx9 :
+      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
+        let DecoderNamespace = "SDWA9";
+      }
 }
 
 } // AssemblerPredicates = [isGFX9]
 
 multiclass VOP2_Real_e32e64_vi <bits<6> op> :
   Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
-  // For now left dpp only for asm/dasm
-  // TODO: add corresponding pseudo
-  def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+
+  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+    def _dpp_vi :
+      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
 }
 
 defm V_CNDMASK_B32        : VOP2_Real_e32e64_vi <0x0>;
@@ -899,9 +1001,6 @@ defm V_ADD_U32            : VOP2_Real_e32e64_gfx9 <0x34>;
 defm V_SUB_U32            : VOP2_Real_e32e64_gfx9 <0x35>;
 defm V_SUBREV_U32         : VOP2_Real_e32e64_gfx9 <0x36>;
 
-defm V_READLANE_B32       : VOP32_Real_vi <0x289>;
-defm V_WRITELANE_B32      : VOP32_Real_vi <0x28a>;
-
 defm V_BFM_B32            : VOP2_Real_e64only_vi <0x293>;
 defm V_BCNT_U32_B32       : VOP2_Real_e64only_vi <0x28b>;
 defm V_MBCNT_LO_U32_B32   : VOP2_Real_e64only_vi <0x28c>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 26bc5260e17f..4b8c1f208a0e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -17,16 +17,16 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
     (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
 
   list<dag> ret3 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT src0),
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
           (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
           (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
 
   list<dag> ret2 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT src0),
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
           (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
 
   list<dag> ret1 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT src0)))];
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
 
   list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
                   !if(!eq(P.NumSrcArgs, 2), ret2,
@@ -35,18 +35,18 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
 
 class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
   list<dag> ret3 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
                                     (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
           (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
           (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
 
   list<dag> ret2 = [(set P.DstVT:$vdst,
-    (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+    (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
                           (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
           (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
 
   list<dag> ret1 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
 
   list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
                   !if(!eq(P.NumSrcArgs, 2), ret2,
@@ -55,18 +55,18 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
 
 class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
   list<dag> ret3 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
                                     (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
           (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
           (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
 
   list<dag> ret2 = [(set P.DstVT:$vdst,
-    (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+    (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
                           (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
           (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
 
   list<dag> ret1 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
 
   list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
                   !if(!eq(P.NumSrcArgs, 2), ret2,
@@ -75,18 +75,18 @@ class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
 
 class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
   list<dag> ret3 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
                                     (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
           (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
           (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
 
   list<dag> ret2 = [(set P.DstVT:$vdst,
-    (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+    (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
                           (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
           (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
 
   list<dag> ret1 = [(set P.DstVT:$vdst,
-    (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+    (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
 
   list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
                   !if(!eq(P.NumSrcArgs, 2), ret2,
@@ -94,9 +94,9 @@ class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
 }
 
 class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
-  list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
-  list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
-  list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))];
+  list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
+  list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1))];
+  list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0))];
   list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
                   !if(!eq(P.NumSrcArgs, 2), ret2,
                   ret1));
@@ -185,6 +185,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
                   getAsm64<HasDst, NumSrcArgs, HasIntClamp,
                            HasModifiers, HasOMod, DstVT>.ret,
                   P.Asm64));
+  let NeedPatGen = P.NeedPatGen;
 }
 
 class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
@@ -219,7 +220,8 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
 // VOP3 INTERP
 //===----------------------------------------------------------------------===//
 
-class VOP3Interp<string OpName, VOPProfile P> : VOP3_Pseudo<OpName, P> {
+class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
+                 VOP3_Pseudo<OpName, P, pattern> {
   let AsmMatchConverter = "cvtVOP3Interp";
 }
 
@@ -291,11 +293,13 @@ def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
 def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
 
 let SchedRW = [WriteDoubleAdd] in {
+let FPDPRounding = 1 in {
 def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
 def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>;
 def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
-def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum, 1>;
-def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum, 1>;
+} // End FPDPRounding = 1
+def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
+def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
 } // End SchedRW = [WriteDoubleAdd]
 
 let SchedRW = [WriteQuarterRate32] in {
@@ -323,6 +327,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
 def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
   getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
   let SchedRW = [WriteDouble];
+  let FPDPRounding = 1;
 }
 } // End Uses = [VCC, EXEC]
 
@@ -353,10 +358,10 @@ def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CL
 def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
 def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
 
-let SchedRW = [WriteDoubleAdd] in {
+let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
 def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
 def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
-} // End SchedRW = [WriteDoubleAdd]
+} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
 
 def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
   let SchedRW = [WriteFloatFMA, WriteSALU];
@@ -367,6 +372,7 @@ def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32,
 def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
   let SchedRW = [WriteDouble, WriteSALU];
   let AsmMatchConverter = "";
+  let FPDPRounding = 1;
 }
 
 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
@@ -381,12 +387,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
 
 let SchedRW = [Write64Bit] in {
 // These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>;
-def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>>;
-def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>>;
+let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
+def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
+def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
+def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
 def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isSICI
+} // End SubtargetPredicate = isSICI, Predicates = [isSICI]
 
 let SubtargetPredicate = isVI in {
 def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
@@ -395,6 +401,22 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
 } // End SubtargetPredicate = isVI
 } // End SchedRW = [Write64Bit]
 
+let Predicates = [isVI] in {
+def : GCNPat <
+ (getDivergentFrag<shl>.ret i64:$x, i32:$y),
+ (V_LSHLREV_B64 $y, $x)
+>;
+def : AMDGPUPat <
+ (getDivergentFrag<srl>.ret i64:$x, i32:$y),
+ (V_LSHRREV_B64 $y, $x)
+>;
+def : AMDGPUPat <
+ (getDivergentFrag<sra>.ret i64:$x, i32:$y),
+ (V_ASHRREV_I64 $y, $x)
+>;
+}
+
+
 let SubtargetPredicate = isCIVI in {
 
 let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
@@ -414,33 +436,51 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
 
 def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup> {
   let Predicates = [Has16BitInsts, isVIOnly];
+  let FPDPRounding = 1;
 }
 def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
                                       VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup> {
   let renamedInGFX9 = 1;
   let Predicates = [Has16BitInsts, isGFX9];
+  let FPDPRounding = 1;
+}
+
+def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
+  let Predicates = [Has16BitInsts, isVIOnly];
+  let FPDPRounding = 1;
+}
+def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
+  let renamedInGFX9 = 1;
+  let Predicates = [Has16BitInsts, isGFX9];
+  let FPDPRounding = 1;
 }
 
 let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
 
 let renamedInGFX9 = 1 in {
-def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
 def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
 def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
+let FPDPRounding = 1 in {
+def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
+let Uses = [M0, EXEC] in {
 def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
-}
+} // End Uses = [M0, EXEC]
+} // End FPDPRounding = 1
+} // End renamedInGFX9 = 1
 
 let SubtargetPredicate = isGFX9 in {
-def V_MAD_F16_gfx9   : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+def V_MAD_F16_gfx9   : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> {
+  let FPDPRounding = 1;
+}
 def V_MAD_U16_gfx9   : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
 def V_MAD_I16_gfx9   : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
-def V_FMA_F16_gfx9   : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
 def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
 } // End SubtargetPredicate = isGFX9
 
+let Uses = [M0, EXEC], FPDPRounding = 1 in {
 def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>;
 def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
+} // End Uses = [M0, EXEC], FPDPRounding = 1
 
 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
 
@@ -468,6 +508,37 @@ defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
 
 } // End Predicates = [Has16BitInsts]
 
+class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
+  (ops node:$x, node:$y, node:$z),
+  // When the inner operation is used multiple times, selecting 3-op
+  // instructions may still be beneficial -- if the other users can be
+  // combined similarly. Let's be conservative for now.
+  (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z),
+  [{
+    // Only use VALU ops when the result is divergent.
+    if (!N->isDivergent())
+      return false;
+
+    // Check constant bus limitations.
+    //
+    // Note: Use !isDivergent as a conservative proxy for whether the value
+    //       is in an SGPR (uniform values can end up in VGPRs as well).
+    unsigned ConstantBusUses = 0;
+    for (unsigned i = 0; i < 3; ++i) {
+      if (!Operands[i]->isDivergent() &&
+          !isInlineImmediate(Operands[i].getNode())) {
+        ConstantBusUses++;
+        if (ConstantBusUses >= 2)
+          return false;
+      }
+    }
+
+    return true;
+  }]
+> {
+  let PredicateCodeUsesOperands = 1;
+}
+
 let SubtargetPredicate = isGFX9 in {
 def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
 def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -502,6 +573,22 @@ def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B3
 
 def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
 def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
+
+
+class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
+  // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
+  (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
+  (inst i32:$src0, i32:$src1, i32:$src2)
+>;
+
+def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32>;
+def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32>;
+def : ThreeOp_i32_Pats<add, add, V_ADD3_U32>;
+def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32>;
+def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32>;
+def : ThreeOp_i32_Pats<or, or, V_OR3_B32>;
+def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
+
 } // End SubtargetPredicate = isGFX9
 
 //===----------------------------------------------------------------------===//
@@ -651,23 +738,23 @@ defm V_MAD_I64_I32      : VOP3be_Real_ci <0x177>;
 let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
 
 multiclass VOP3_Real_vi<bits<10> op> {
-  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
-            VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
 }
 
 multiclass VOP3be_Real_vi<bits<10> op> {
-  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
-            VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3be_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
 }
 
 multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
-  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
-            VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME).Pfl>;
 }
 
 multiclass VOP3Interp_Real_vi<bits<10> op> {
-  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
-            VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
 }
 
 } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
@@ -775,12 +862,15 @@ defm V_FMA_F16          : VOP3_F16_Real_vi <0x1ee>;
 defm V_DIV_FIXUP_F16    : VOP3_F16_Real_vi <0x1ef>;
 defm V_INTERP_P2_F16    : VOP3Interp_F16_Real_vi <0x276>;
 
+let FPDPRounding = 1 in {
 defm V_MAD_LEGACY_F16       : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16",       "v_mad_legacy_f16">;
-defm V_MAD_LEGACY_U16       : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16",       "v_mad_legacy_u16">;
-defm V_MAD_LEGACY_I16       : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16",       "v_mad_legacy_i16">;
 defm V_FMA_LEGACY_F16       : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16",       "v_fma_legacy_f16">;
 defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">;
 defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16", "v_interp_p2_legacy_f16">;
+} // End FPDPRounding = 1
+
+defm V_MAD_LEGACY_U16       : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16",       "v_mad_legacy_u16">;
+defm V_MAD_LEGACY_I16       : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16",       "v_mad_legacy_i16">;
 
 defm V_MAD_F16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
 defm V_MAD_U16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
@@ -813,6 +903,9 @@ defm V_MUL_LO_I32       : VOP3_Real_vi <0x285>;
 defm V_MUL_HI_U32       : VOP3_Real_vi <0x286>;
 defm V_MUL_HI_I32       : VOP3_Real_vi <0x287>;
 
+defm V_READLANE_B32     : VOP3_Real_vi <0x289>;
+defm V_WRITELANE_B32    : VOP3_Real_vi <0x28a>;
+
 defm V_LSHLREV_B64      : VOP3_Real_vi <0x28f>;
 defm V_LSHRREV_B64      : VOP3_Real_vi <0x290>;
 defm V_ASHRREV_I64      : VOP3_Real_vi <0x291>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index b51828b54679..91b45583c848 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -42,14 +42,16 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
 }
 
 let isCommutable = 1 in {
-def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
 def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
 def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
 
+let FPDPRounding = 1 in {
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
 def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
 def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
-def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
-def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum>;
+} // End FPDPRounding = 1
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
 
 def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
 def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
@@ -137,12 +139,14 @@ let SubtargetPredicate = HasMadMixInsts in {
 let isCommutable = 1 in {
 def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
 
+let FPDPRounding = 1 in {
 // Clamp modifier is applied after conversion to f16.
 def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
 
 let ClampLo = 0, ClampHi = 1 in {
 def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
 }
+} // End FPDPRounding = 1
 }
 
 defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
@@ -154,18 +158,99 @@ let SubtargetPredicate = HasFmaMixInsts in {
 let isCommutable = 1 in {
 def V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
 
+let FPDPRounding = 1 in {
 // Clamp modifier is applied after conversion to f16.
 def V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
 
 let ClampLo = 0, ClampHi = 1 in {
 def V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
 }
+} // End FPDPRounding = 1
 }
 
 defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
 }
 
-let SubtargetPredicate = HasDLInsts in {
+// Defines patterns that extract signed 4bit from each Idx[0].
+foreach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in
+  def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src),
+                                          (sra (shl node:$src, (i32 Idx[1])), (i32 28))>;
+
+// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex.
+class Extract<int FromBitIndex, int BitMask, bit U>: PatFrag<
+  (ops node:$src),
+  !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element
+       !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))),
+       !if (!eq (FromBitIndex, 0), // first element
+            !if (U, (and node:$src, (i32 BitMask)),
+                 !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
+                                         (sext_inreg node:$src, i8))),
+            !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)),
+                 !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
+                      (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>;
+
+
+foreach Type = ["I", "U"] in
+  foreach Index = 0-3 in {
+    // Defines patterns that extract each Index'ed 8bit from an unsigned
+    // 32bit scalar value;
+    def #Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
+
+    // Defines multiplication patterns where the multiplication is happening on each
+    // Index'ed 8bit of a 32bit scalar value.
+
+    def Mul#Type#_Elt#Index : PatFrag<
+      (ops node:$src0, node:$src1),
+      (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse))
+                            (!cast<Extract>(#Type#Index#"_8bit") node:$src0),
+                            (!cast<Extract>(#Type#Index#"_8bit") node:$src1))>;
+  }
+
+// Different variants of dot8 patterns cause a huge increase in the compile time.
+// Define non-associative/commutative add/mul to prevent permutation in the dot8
+// pattern.
+def NonACAdd        : SDNode<"ISD::ADD"       , SDTIntBinOp>;
+def NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>;
+
+def NonACAMDGPUmul_u24        : SDNode<"AMDGPUISD::MUL_U24"       , SDTIntBinOp>;
+def NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>;
+
+def NonACAMDGPUmul_i24        : SDNode<"AMDGPUISD::MUL_I24"       , SDTIntBinOp>;
+def NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>;
+
+foreach Type = ["I", "U"] in
+  foreach Index = 0-7 in {
+    // Defines patterns that extract each Index'ed 4bit from an unsigned
+    // 32bit scalar value;
+    def #Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
+
+    // Defines multiplication patterns where the multiplication is happening on each
+    // Index'ed 8bit of a 32bit scalar value.
+    def Mul#Type#Index#"_4bit" : PatFrag<
+      (ops node:$src0, node:$src1),
+      (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse))
+                             (!cast<Extract>(#Type#Index#"_4bit") node:$src0),
+                             (!cast<Extract>(#Type#Index#"_4bit") node:$src1))>;
+  }
+
+class UDot2Pat<Instruction Inst> : GCNPat <
+  (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)),
+                                         (srl i32:$src1, (i32 16))), i32:$src2),
+       (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
+                             (and i32:$src1, (i32 65535)))
+   ),
+  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
+>;
+
+class SDot2Pat<Instruction Inst> : GCNPat <
+  (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
+                                         (sra i32:$src1, (i32 16))), i32:$src2),
+       (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
+                             (sext_inreg i32:$src1, i16))),
+  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
+>;
+
+let SubtargetPredicate = HasDotInsts in {
 
 def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
 def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
@@ -192,7 +277,32 @@ defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
 defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
 defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
 
-} // End SubtargetPredicate = HasDLInsts
+def : UDot2Pat<V_DOT2_U32_U16>;
+def : SDot2Pat<V_DOT2_I32_I16>;
+
+foreach Type = ["U", "I"] in
+  def : GCNPat <
+    !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
+                      (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
+    (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+foreach Type = ["U", "I"] in
+  def : GCNPat <
+    !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+                      [1, 2, 3, 4, 5, 6, 7], lhs, y,
+                      (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+    (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
+// in the compile time. Directly handle the pattern generated by the FE here.
+foreach Type = ["U", "I"] in
+  def : GCNPat <
+    !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
+                      [7, 1, 2, 3, 4, 5, 6], lhs, y,
+                      (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
+    (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
+
+} // End SubtargetPredicate = HasDotInsts
 
 multiclass VOP3P_Real_vi<bits<10> op> {
   def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -242,7 +352,7 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
 }
 
 
-let SubtargetPredicate = HasDLInsts in {
+let SubtargetPredicate = HasDotInsts in {
 
 defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
 defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
@@ -252,4 +362,4 @@ defm V_DOT4_U32_U8  : VOP3P_Real_vi <0x3a9>;
 defm V_DOT8_I32_I4  : VOP3P_Real_vi <0x3aa>;
 defm V_DOT8_U32_U4  : VOP3P_Real_vi <0x3ab>;
 
-} // End SubtargetPredicate = HasDLInsts
+} // End SubtargetPredicate = HasDotInsts
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index cc6b8116afee..091cac8cd35c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -635,6 +635,17 @@ def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
 def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
 def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
 
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
+def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
+
 class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
   (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
                    (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
@@ -656,6 +667,14 @@ def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
 def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
 def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
 
+def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
+
+
 def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
 def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
 def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
@@ -670,6 +689,13 @@ def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
 def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
 def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
 
+def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
+def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
+
 //===----------------------------------------------------------------------===//
 // Target
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f0f7f259f71d..7de7d90d27b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -420,10 +420,10 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
   let SDWA = 1;
   let Uses = [EXEC];
 
-  let SubtargetPredicate = !if(P.HasExt, HasSDWA, DisableInst);
-  let AssemblerPredicate = !if(P.HasExt, HasSDWA, DisableInst);
-  let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
-                                     AMDGPUAsmVariants.Disable);
+  let SubtargetPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+  let AssemblerPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+  let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
+                                         AMDGPUAsmVariants.Disable);
   let DecoderNamespace = "SDWA";
 
   VOPProfile Pfl = P;
@@ -471,10 +471,10 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
   let Constraints     = ps.Constraints;
   let DisableEncoding = ps.DisableEncoding;
 
-  let SubtargetPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
-  let AssemblerPredicate = !if(ps.Pfl.HasSDWA9, HasSDWA9, DisableInst);
-  let AsmVariantName = !if(ps.Pfl.HasSDWA9, AMDGPUAsmVariants.SDWA9,
-                                            AMDGPUAsmVariants.Disable);
+  let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+  let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+  let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
+                                               AMDGPUAsmVariants.Disable);
   let DecoderNamespace = "SDWA9";
 
   // Copy relevant pseudo op flags
@@ -505,9 +505,14 @@ class VOP_DPPe<VOPProfile P> : Enc64 {
   let Inst{63-60} = row_mask;
 }
 
-class VOP_DPP <string OpName, VOPProfile P> :
-  InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
-  VOP_DPPe<P> {
+class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+  InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, pattern>,
+  VOP <OpName>,
+  SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE>,
+  MnemonicAlias <OpName#"_dpp", OpName> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
 
   let mayLoad = 0;
   let mayStore = 0;
@@ -517,15 +522,99 @@ class VOP_DPP <string OpName, VOPProfile P> :
   let VALU = 1;
   let DPP = 1;
   let Size = 8;
+  let Uses = [EXEC];
+  let isConvergent = 1;
+
+  string Mnemonic = OpName;
+  string AsmOperands = P.AsmDPP;
 
   let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
   let SubtargetPredicate = HasDPP;
-  let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
-  let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
-                                     AMDGPUAsmVariants.Disable);
+  let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+  let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
+                                        AMDGPUAsmVariants.Disable);
   let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
   let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
   let DecoderNamespace = "DPP";
+
+  VOPProfile Pfl = P;
+}
+
+class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  let Defs = ps.Defs;
+  let Uses = ps.Uses;
+  let SchedRW = ps.SchedRW;
+  let hasSideEffects = ps.hasSideEffects;
+
+  let Constraints     = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  // Copy relevant pseudo op flags
+  let isConvergent         = ps.isConvergent;
+  let SubtargetPredicate   = ps.SubtargetPredicate;
+  let AssemblerPredicate   = ps.AssemblerPredicate;
+  let AsmMatchConverter    = ps.AsmMatchConverter;
+  let AsmVariantName       = ps.AsmVariantName;
+  let UseNamedOperandTable = ps.UseNamedOperandTable;
+  let DecoderNamespace     = ps.DecoderNamespace;
+  let Constraints          = ps.Constraints;
+  let DisableEncoding      = ps.DisableEncoding;
+  let TSFlags              = ps.TSFlags;
+}
+
+class getNumNodeArgs<SDPatternOperator Op> {
+  SDNode N = !cast<SDNode>(Op);
+  SDTypeProfile TP = N.TypeProfile;
+  int ret = TP.NumOperands;
+}
+
+
+class getDivergentFrag<SDPatternOperator Op> {
+
+  int NumSrcArgs = getNumNodeArgs<Op>.ret;
+  PatFrag ret = PatFrag <
+    !if(!eq(NumSrcArgs, 1),
+             (ops node:$src0),
+             !if(!eq(NumSrcArgs, 2),
+               (ops node:$src0, node:$src1),
+               (ops node:$src0, node:$src1, node:$src2))),
+    !if(!eq(NumSrcArgs, 1),
+             (Op $src0),
+             !if(!eq(NumSrcArgs, 2),
+               (Op $src0, $src1),
+               (Op $src0, $src1, $src2))),
+    [{ return N->isDivergent(); }]
+  >;
+}
+
+class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
+
+  PatFrag Operator = getDivergentFrag < Op >.ret;
+
+  dag Ins = !foreach(tmp, P.Ins32, !subst(ins, Operator,
+                                         !subst(P.Src0RC32, P.Src0VT,
+                                               !subst(P.Src1RC32, P.Src1VT, tmp))));
+
+
+  dag Outs = !foreach(tmp, P.Outs32, !subst(outs, set,
+                                           !subst(P.DstRC, P.DstVT, tmp)));
+
+  list<dag> ret =  [!con(Outs, (set Ins))];
+}
+
+class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
+  list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
+}
+
+class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> {
+  SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern),
+   !if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op);
 }
 
 include "VOPCInstructions.td"
diff --git a/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 1acae3a88870..6f5bbd3b4ef3 100644
--- a/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -26,12 +26,6 @@ static Reloc::Model getRelocModel(Optional<Reloc::Model> RM) {
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 /// ARCTargetMachine ctor - Create an ILP32 architecture model
 ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
                                    StringRef CPU, StringRef FS,
@@ -43,7 +37,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
                         "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-"
                         "f32:32:32-i64:32-f64:32-a:0:32-n32",
                         TT, CPU, FS, Options, getRelocModel(RM),
-                        getEffectiveCodeModel(CM), OL),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp b/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
index 0c627d04698b..9c820c2fc595 100644
--- a/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
@@ -20,7 +20,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -29,6 +28,12 @@ using namespace llvm;
 
 #include "ARCGenAsmWriter.inc"
 
+template <class T>
+static const char *BadConditionCode(T cc) {
+  LLVM_DEBUG(dbgs() << "Unknown condition code passed: " << cc << "\n");
+  return "{unknown-cc}";
+}
+
 static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
   switch (BRCC) {
   case ARCCC::BREQ:
@@ -44,7 +49,7 @@ static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
   case ARCCC::BRHS:
     return "hs";
   }
-  llvm_unreachable("Unhandled ARCCC::BRCondCode");
+  return BadConditionCode(BRCC);
 }
 
 static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
@@ -86,7 +91,7 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
   case ARCCC::Z:
     return "z";
   }
-  llvm_unreachable("Unhandled ARCCC::CondCode");
+  return BadConditionCode(CC);
 }
 
 void ARCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 2e62a0790418..3db60f1c16d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -61,6 +61,11 @@ def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                              "floating point",
                                              [FeatureFPARMv8]>;
 
+def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+                                             "Enable full half-precision "
+                                             "floating point fml instructions",
+                                             [FeatureFullFP16]>;
+
 def FeatureVFPOnlySP      : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
                                              "Floating point unit supports "
                                              "single precision only">;
@@ -194,6 +199,10 @@ def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
                                               "SlowLoadDSubregister", "true",
                                               "Loading into D subregs is slow">;
 
+def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp",
+                                               "UseWideStrideVFP", "true",
+                                               "Use a wide stride when allocating VFP registers">;
+
 // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
 def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
                                              "DontWidenVMOVS", "true",
@@ -256,6 +265,9 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
 def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
                                              "Prefer 32-bit Thumb instrs">;
 
+def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2",
+                                              "Prefer 32-bit alignment for loops">;
+
 /// Some instructions update CPSR partially, which can add false dependency for
 /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
 /// mapped to a separate physical register. Avoid partial CPSR update for these
@@ -351,6 +363,11 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
                                     "Use alias analysis during codegen">;
 
+// Armv8.5-A extensions
+
+def FeatureSB       : SubtargetFeature<"sb", "HasSB", "true",
+  "Enable v8.5a Speculation Barrier" >;
+
 //===----------------------------------------------------------------------===//
 // ARM architecture class
 //
@@ -440,6 +457,10 @@ def HasV8_4aOps   : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
                                    "Support ARM v8.4a instructions",
                                    [HasV8_3aOps, FeatureDotProd]>;
 
+def HasV8_5aOps   : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
+                                   "Support ARM v8.5a instructions",
+                                   [HasV8_4aOps, FeatureSB]>;
+
 //===----------------------------------------------------------------------===//
 // ARM Processor subtarget features.
 //
@@ -482,8 +503,25 @@ def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
 def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                    "Swift ARM processors", []>;
 
-def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
-                                    "Samsung Exynos-Mx processors", []>;
+def ProcExynos  : SubtargetFeature<"exynos", "ARMProcFamily", "Exynos",
+                                   "Samsung Exynos processors",
+                                   [FeatureZCZeroing,
+                                    FeatureUseWideStrideVFP,
+                                    FeatureUseAA,
+                                    FeatureSplatVFPToNeon,
+                                    FeatureSlowVGETLNi32,
+                                    FeatureSlowVDUP32,
+                                    FeatureSlowFPBrcc,
+                                    FeatureProfUnpredicate,
+                                    FeatureHWDivThumb,
+                                    FeatureHWDivARM,
+                                    FeatureHasSlowFPVMLx,
+                                    FeatureHasRetAddrStack,
+                                    FeatureFuseLiterals,
+                                    FeatureFuseAES,
+                                    FeatureExpandMLx,
+                                    FeatureCrypto,
+                                    FeatureCRC]>;
 
 def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
                                    "Cortex-R4 ARM processors", []>;
@@ -659,6 +697,20 @@ def ARMv84a   : Architecture<"armv8.4-a", "ARMv84a",  [HasV8_4aOps,
                                                        FeatureRAS,
                                                        FeatureDotProd]>;
 
+def ARMv85a   : Architecture<"armv8.5-a", "ARMv85a",  [HasV8_5aOps,
+                                                       FeatureAClass,
+                                                       FeatureDB,
+                                                       FeatureFPARMv8,
+                                                       FeatureNEON,
+                                                       FeatureDSP,
+                                                       FeatureTrustZone,
+                                                       FeatureMP,
+                                                       FeatureVirtualization,
+                                                       FeatureCrypto,
+                                                       FeatureCRC,
+                                                       FeatureRAS,
+                                                       FeatureDotProd]>;
+
 def ARMv8r    : Architecture<"armv8-r",   "ARMv8r",   [HasV8Ops,
                                                        FeatureRClass,
                                                        FeatureDB,
@@ -865,6 +917,7 @@ def : ProcessorModel<"swift",       SwiftModel,         [ARMv7a, ProcSwift,
                                                          FeatureHasRetAddrStack,
                                                          FeatureNEONForFP,
                                                          FeatureVFP4,
+                                                         FeatureUseWideStrideVFP,
                                                          FeatureMP,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
@@ -926,6 +979,7 @@ def : ProcessorModel<"cortex-r8",   CortexA8Model,      [ARMv7r,
 
 def : ProcessorModel<"cortex-m3", CortexM3Model,        [ARMv7m,
                                                          ProcM3,
+                                                         FeaturePrefLoopAlign32,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcessorModel<"sc300",     CortexM3Model,        [ARMv7m,
@@ -936,6 +990,8 @@ def : ProcessorModel<"cortex-m4", CortexM3Model,        [ARMv7em,
                                                          FeatureVFP4,
                                                          FeatureVFPOnlySP,
                                                          FeatureD16,
+                                                         FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-m7",                           [ARMv7em,
@@ -950,6 +1006,8 @@ def : ProcessorModel<"cortex-m33", CortexM3Model,       [ARMv8mMainline,
                                                          FeatureFPARMv8,
                                                          FeatureD16,
                                                          FeatureVFPOnlySP,
+                                                         FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-a32",                           [ARMv8a,
@@ -985,7 +1043,7 @@ def : ProcessorModel<"cortex-a57",  CortexA57Model,     [ARMv8a, ProcA57,
                                                          FeatureAvoidPartialCPSR,
                                                          FeatureCheapPredicableCPSR]>;
 
-def : ProcNoItin<"cortex-a72",                          [ARMv8a, ProcA72,
+def : ProcessorModel<"cortex-a72",  CortexA57Model,     [ARMv8a, ProcA72,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
                                                          FeatureCrypto,
@@ -1017,29 +1075,12 @@ def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
                                                          FeatureZCZeroing,
                                                          FeatureNoPostRASched]>;
 
-def : ProcNoItin<"exynos-m1",                           [ARMv8a, ProcExynosM1,
-                                                         FeatureHWDivThumb,
-                                                         FeatureHWDivARM,
-                                                         FeatureCrypto,
-                                                         FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m2",                           [ARMv8a, ProcExynosM1,
-                                                         FeatureHWDivThumb,
-                                                         FeatureHWDivARM,
-                                                         FeatureCrypto,
-                                                         FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m3",                           [ARMv8a, ProcExynosM1,
-                                                         FeatureHWDivThumb,
-                                                         FeatureHWDivARM,
-                                                         FeatureCrypto,
-                                                         FeatureCRC]>;
-
-def : ProcNoItin<"exynos-m4",                           [ARMv8a, ProcExynosM1,
-                                                         FeatureHWDivThumb,
-                                                         FeatureHWDivARM,
-                                                         FeatureCrypto,
-                                                         FeatureCRC]>;
+def : ProcNoItin<"exynos-m1",                           [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m2",                           [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m3",                           [ARMv8a, ProcExynos]>;
+def : ProcNoItin<"exynos-m4",                           [ARMv82a, ProcExynos,
+                                                         FeatureFullFP16,
+                                                         FeatureDotProd]>;
 
 def : ProcNoItin<"kryo",                                [ARMv8a, ProcKryo,
                                                          FeatureHWDivThumb,
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index b227eaed8d61..b7cd3a0c2dae 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -367,6 +367,18 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
 
       unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
       unsigned RC;
+      bool FirstHalf;
+      const ARMBaseTargetMachine &ATM =
+        static_cast<const ARMBaseTargetMachine &>(TM);
+
+      // 'Q' should correspond to the low order register and 'R' to the high
+      // order register.  Whether this corresponds to the upper or lower half
+      // depends on the endianess mode.
+      if (ExtraCode[0] == 'Q')
+        FirstHalf = ATM.isLittleEndian();
+      else
+        // ExtraCode[0] == 'R'.
+        FirstHalf = !ATM.isLittleEndian();
       const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
       if (InlineAsm::hasRegClassConstraint(Flags, RC) &&
           ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) {
@@ -376,14 +388,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
         if (!MO.isReg())
           return true;
         const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-        unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
+        unsigned Reg = TRI->getSubReg(MO.getReg(), FirstHalf ?
             ARM::gsub_0 : ARM::gsub_1);
         O << ARMInstPrinter::getRegisterName(Reg);
         return false;
       }
       if (NumVals != 2)
         return true;
-      unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1;
+      unsigned RegOp = FirstHalf ? OpNum : OpNum + 1;
       if (RegOp >= MI->getNumOperands())
         return true;
       const MachineOperand &MO = MI->getOperand(RegOp);
@@ -815,15 +827,31 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
     assert(Subtarget->isTargetWindows() &&
            "Windows is the only supported COFF target");
 
-    bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+    bool IsIndirect =
+        (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB));
     if (!IsIndirect)
       return getSymbol(GV);
 
     SmallString<128> Name;
-    Name = "__imp_";
+    if (TargetFlags & ARMII::MO_DLLIMPORT)
+      Name = "__imp_";
+    else if (TargetFlags & ARMII::MO_COFFSTUB)
+      Name = ".refptr.";
     getNameWithPrefix(Name, GV);
 
-    return OutContext.getOrCreateSymbol(Name);
+    MCSymbol *MCSym = OutContext.getOrCreateSymbol(Name);
+
+    if (TargetFlags & ARMII::MO_COFFSTUB) {
+      MachineModuleInfoCOFF &MMICOFF =
+          MMI->getObjFileInfo<MachineModuleInfoCOFF>();
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+          MMICOFF.getGVStubEntry(MCSym);
+
+      if (!StubSym.getPointer())
+        StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), true);
+    }
+
+    return MCSym;
   } else if (Subtarget->isTargetELF()) {
     return getSymbol(GV);
   }
@@ -1043,10 +1071,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
   const MachineFunction &MF = *MI->getParent()->getParent();
-  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  const TargetRegisterInfo *TargetRegInfo =
+    MF.getSubtarget().getRegisterInfo();
+  const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo();
   const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
 
-  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned FramePtr = TargetRegInfo->getFrameRegister(MF);
   unsigned Opc = MI->getOpcode();
   unsigned SrcReg, DstReg;
 
@@ -1103,7 +1133,9 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         if (MO.isUndef()) {
           assert(RegList.empty() &&
                  "Pad registers must come before restored ones");
-          Pad += 4;
+          unsigned Width =
+            TargetRegInfo->getRegSizeInBits(MO.getReg(), MachineRegInfo) / 8;
+          Pad += Width;
           continue;
         }
         RegList.push_back(MO.getReg());
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1c2031c7d7b..bbebed59c851 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -708,8 +708,12 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     return MCID.getSize();
 
   // If this machine instr is an inline asm, measure it.
-  if (MI.getOpcode() == ARM::INLINEASM)
-    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+  if (MI.getOpcode() == ARM::INLINEASM) {
+    unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+    if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+      Size = alignTo(Size, 4);
+    return Size;
+  }
   unsigned Opc = MI.getOpcode();
   switch (Opc) {
   default:
@@ -935,9 +939,9 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Mov->addRegisterKilled(SrcReg, TRI);
 }
 
-bool ARMBaseInstrInfo::isCopyInstr(const MachineInstr &MI,
-                                   const MachineOperand *&Src,
-                                   const MachineOperand *&Dest) const {
+bool ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+                                       const MachineOperand *&Src,
+                                       const MachineOperand *&Dest) const {
   // VMOVRRD is also a copy instruction but it requires
   // special way of handling. It is more complex copy version
   // and since that we are not considering it. For recognition
@@ -971,8 +975,6 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC,
                     const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
@@ -984,7 +986,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   switch (TRI->getSpillSize(*RC)) {
     case 2:
       if (ARM::HPRRegClass.hasSubClassEq(RC)) {
-        BuildMI(MBB, I, DL, get(ARM::VSTRH))
+        BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
             .addReg(SrcReg, getKillRegState(isKill))
             .addFrameIndex(FI)
             .addImm(0)
@@ -995,14 +997,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       break;
     case 4:
       if (ARM::GPRRegClass.hasSubClassEq(RC)) {
-        BuildMI(MBB, I, DL, get(ARM::STRi12))
+        BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))
             .addReg(SrcReg, getKillRegState(isKill))
             .addFrameIndex(FI)
             .addImm(0)
             .addMemOperand(MMO)
             .add(predOps(ARMCC::AL));
       } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
-        BuildMI(MBB, I, DL, get(ARM::VSTRS))
+        BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))
             .addReg(SrcReg, getKillRegState(isKill))
             .addFrameIndex(FI)
             .addImm(0)
@@ -1013,7 +1015,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       break;
     case 8:
       if (ARM::DPRRegClass.hasSubClassEq(RC)) {
-        BuildMI(MBB, I, DL, get(ARM::VSTRD))
+        BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))
             .addReg(SrcReg, getKillRegState(isKill))
             .addFrameIndex(FI)
             .addImm(0)
@@ -1021,7 +1023,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
             .add(predOps(ARMCC::AL));
       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
         if (Subtarget.hasV5TEOps()) {
-          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+          MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
           MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
@@ -1029,7 +1031,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         } else {
           // Fallback to STM instruction, which has existed since the dawn of
           // time.
-          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
+          MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))
                                         .addFrameIndex(FI)
                                         .addMemOperand(MMO)
                                         .add(predOps(ARMCC::AL));
@@ -1043,14 +1045,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       if (ARM::DPairRegClass.hasSubClassEq(RC)) {
         // Use aligned spills if the stack can be realigned.
         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
-          BuildMI(MBB, I, DL, get(ARM::VST1q64))
+          BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
               .addFrameIndex(FI)
               .addImm(16)
               .addReg(SrcReg, getKillRegState(isKill))
               .addMemOperand(MMO)
               .add(predOps(ARMCC::AL));
         } else {
-          BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+          BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))
               .addReg(SrcReg, getKillRegState(isKill))
               .addFrameIndex(FI)
               .addMemOperand(MMO)
@@ -1063,14 +1065,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
         // Use aligned spills if the stack can be realigned.
         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
-          BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+          BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
               .addFrameIndex(FI)
               .addImm(16)
               .addReg(SrcReg, getKillRegState(isKill))
               .addMemOperand(MMO)
               .add(predOps(ARMCC::AL));
         } else {
-          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+          MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
+                                            get(ARM::VSTMDIA))
                                         .addFrameIndex(FI)
                                         .add(predOps(ARMCC::AL))
                                         .addMemOperand(MMO);
@@ -1086,14 +1089,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
           // FIXME: It's possible to only store part of the QQ register if the
           // spilled def has a sub-register index.
-          BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+          BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))
               .addFrameIndex(FI)
               .addImm(16)
               .addReg(SrcReg, getKillRegState(isKill))
               .addMemOperand(MMO)
               .add(predOps(ARMCC::AL));
         } else {
-          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+          MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
+                                            get(ARM::VSTMDIA))
                                         .addFrameIndex(FI)
                                         .add(predOps(ARMCC::AL))
                                         .addMemOperand(MMO);
@@ -1107,7 +1111,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       break;
     case 64:
       if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
-        MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+        MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
                                       .addFrameIndex(FI)
                                       .add(predOps(ARMCC::AL))
                                       .addMemOperand(MMO);
@@ -1172,8 +1176,14 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
 
 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
                                                     int &FrameIndex) const {
-  const MachineMemOperand *Dummy;
-  return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  SmallVector<const MachineMemOperand *, 1> Accesses;
+  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+    FrameIndex =
+        cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+            ->getFrameIndex();
+    return true;
+  }
+  return false;
 }
 
 void ARMBaseInstrInfo::
@@ -1386,8 +1396,14 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
 
 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
                                                      int &FrameIndex) const {
-  const MachineMemOperand *Dummy;
-  return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  SmallVector<const MachineMemOperand *, 1> Accesses;
+  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+    FrameIndex =
+        cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+            ->getFrameIndex();
+    return true;
+  }
+  return false;
 }
 
 /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
@@ -1432,9 +1448,8 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
   SmallVector<unsigned, 6> ScratchRegs;
   for(unsigned I = 5; I < MI->getNumOperands(); ++I)
     ScratchRegs.push_back(MI->getOperand(I).getReg());
-  llvm::sort(ScratchRegs.begin(), ScratchRegs.end(),
-             [&TRI](const unsigned &Reg1,
-                    const unsigned &Reg2) -> bool {
+  llvm::sort(ScratchRegs,
+             [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
                return TRI.getEncodingValue(Reg1) <
                       TRI.getEncodingValue(Reg2);
              });
@@ -1590,11 +1605,10 @@ void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
     MachineFunction &MF = *MBB.getParent();
     unsigned CPI = Orig.getOperand(1).getIndex();
     unsigned PCLabelId = duplicateCPV(MF, CPI);
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
-            .addConstantPoolIndex(CPI)
-            .addImm(PCLabelId);
-    MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
+    BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
+        .addConstantPoolIndex(CPI)
+        .addImm(PCLabelId)
+        .cloneMemRefs(Orig);
     break;
   }
   }
@@ -2185,6 +2199,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
   {ARM::tSUBSi8, ARM::tSUBi8},
   {ARM::tSUBSrr, ARM::tSUBrr},
   {ARM::tSBCS, ARM::tSBC},
+  {ARM::tRSBS, ARM::tRSB},
 
   {ARM::t2ADDSri, ARM::t2ADDri},
   {ARM::t2ADDSrr, ARM::t2ADDrr},
@@ -2949,6 +2964,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
   for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
     OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
 
+  MI->clearRegisterDeads(ARM::CPSR);
+
   return true;
 }
 
@@ -4534,9 +4551,9 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
 
   MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
   MIB.addReg(Reg, RegState::Kill)
-     .addImm(0)
-     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
-     .add(predOps(ARMCC::AL));
+      .addImm(0)
+      .cloneMemRefs(*MI)
+      .add(predOps(ARMCC::AL));
 }
 
 bool
@@ -5061,3 +5078,32 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
   }
   llvm_unreachable("Target dependent opcode missing");
 }
+
+std::pair<unsigned, unsigned>
+ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+  const unsigned Mask = ARMII::MO_OPTION_MASK;
+  return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+  using namespace ARMII;
+
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
+  return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+  using namespace ARMII;
+
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_COFFSTUB, "arm-coffstub"},
+      {MO_GOT, "arm-got"},
+      {MO_SBREL, "arm-sbrel"},
+      {MO_DLLIMPORT, "arm-dllimport"},
+      {MO_SECREL, "arm-secrel"},
+      {MO_NONLAZY, "arm-nonlazy"}};
+  return makeArrayRef(TargetFlags);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b54be15097b1..de1f307083ba 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -101,6 +101,12 @@ protected:
                                        unsigned OpIdx1,
                                        unsigned OpIdx2) const override;
 
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+                       const MachineOperand *&Destination) const override;
+
 public:
   // Return whether the target has an explicit NOP encoding.
   bool hasNOP() const;
@@ -201,9 +207,6 @@ public:
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
 
-  bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
-                   const MachineOperand *&Dest) const override;
-
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
                            unsigned SrcReg, bool isKill, int FrameIndex,
@@ -331,6 +334,13 @@ public:
   /// Get the number of addresses by LDM or VLDM or zero for unknown.
   unsigned getNumLDMAddresses(const MachineInstr &MI) const;
 
+  std::pair<unsigned, unsigned>
+  decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableDirectMachineOperandTargetFlags() const override;
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableBitmaskMachineOperandTargetFlags() const override;
+
 private:
   unsigned getInstBundleLength(const MachineInstr &MI) const;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 5342e6e2cd13..02b3daf3c6fd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -209,6 +209,11 @@ getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+bool ARMBaseRegisterInfo::
+isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const {
+  return !getReservedRegs(MF).test(PhysReg);
+}
+
 const TargetRegisterClass *
 ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
                                                const MachineFunction &) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index f755f66a0f3a..45d29ebc0bd3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -131,6 +131,8 @@ public:
                                              CallingConv::ID) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
+  bool isAsmClobberable(const MachineFunction &MF,
+                       unsigned PhysReg) const override;
 
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
@@ -154,7 +156,6 @@ public:
 
   void updateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const override;
-  bool enableMultipleCopyHints() const override { return true; }
 
   bool hasBasePointer(const MachineFunction &MF) const;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 47f998b696f5..8e80c32bcf89 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -237,7 +237,7 @@ void ARMCallLowering::splitToValueTypes(
 /// Lower the return value for the already existing \p Ret. This assumes that
 /// \p MIRBuilder's insertion point is correct.
 bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
-                                     const Value *Val, unsigned VReg,
+                                     const Value *Val, ArrayRef<unsigned> VRegs,
                                      MachineInstrBuilder &Ret) const {
   if (!Val)
     // Nothing to do here.
@@ -251,16 +251,24 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
   if (!isSupportedType(DL, TLI, Val->getType()))
     return false;
 
-  SmallVector<ArgInfo, 4> SplitVTs;
-  SmallVector<unsigned, 4> Regs;
-  ArgInfo RetInfo(VReg, Val->getType());
-  setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
-  splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) {
-    Regs.push_back(Reg);
-  });
+  SmallVector<EVT, 4> SplitEVTs;
+  ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+  assert(VRegs.size() == SplitEVTs.size() &&
+         "For each split Type there should be exactly one VReg.");
 
-  if (Regs.size() > 1)
-    MIRBuilder.buildUnmerge(Regs, VReg);
+  SmallVector<ArgInfo, 4> SplitVTs;
+  LLVMContext &Ctx = Val->getType()->getContext();
+  for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+    ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx));
+    setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+
+    SmallVector<unsigned, 4> Regs;
+    splitToValueTypes(
+        CurArgInfo, SplitVTs, MF,
+        [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); });
+    if (Regs.size() > 1)
+      MIRBuilder.buildUnmerge(Regs, VRegs[i]);
+  }
 
   CCAssignFn *AssignFn =
       TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
@@ -270,14 +278,15 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
 }
 
 bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                  const Value *Val, unsigned VReg) const {
-  assert(!Val == !VReg && "Return value without a vreg");
+                                  const Value *Val,
+                                  ArrayRef<unsigned> VRegs) const {
+  assert(!Val == VRegs.empty() && "Return value without a vreg");
 
   auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
   unsigned Opcode = ST.getReturnOpcode();
   auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL));
 
-  if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
+  if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))
     return false;
 
   MIRBuilder.insertInstr(Ret);
@@ -420,7 +429,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   auto &TLI = *getTLI<ARMTargetLowering>();
   auto Subtarget = TLI.getSubtarget();
 
-  if (Subtarget->isThumb())
+  if (Subtarget->isThumb1Only())
     return false;
 
   // Quick exit if there aren't any args
@@ -491,6 +500,22 @@ struct CallReturnHandler : public IncomingValueHandler {
   MachineInstrBuilder MIB;
 };
 
+// FIXME: This should move to the ARMSubtarget when it supports all the opcodes.
+unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) {
+  if (isDirect)
+    return STI.isThumb() ? ARM::tBL : ARM::BL;
+
+  if (STI.isThumb())
+    return ARM::tBLXr;
+
+  if (STI.hasV5TOps())
+    return ARM::BLX;
+
+  if (STI.hasV4TOps())
+    return ARM::BX_CALL;
+
+  return ARM::BMOVPCRX_CALL;
+}
 } // end anonymous namespace
 
 bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
@@ -508,27 +533,34 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   if (STI.genLongCalls())
     return false;
 
+  if (STI.isThumb1Only())
+    return false;
+
   auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN);
 
   // Create the call instruction so we can add the implicit uses of arg
   // registers, but don't insert it yet.
   bool isDirect = !Callee.isReg();
-  auto CallOpcode =
-      isDirect ? ARM::BL
-               : STI.hasV5TOps()
-                     ? ARM::BLX
-                     : STI.hasV4TOps() ? ARM::BX_CALL : ARM::BMOVPCRX_CALL;
-  auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode)
-                 .add(Callee)
-                 .addRegMask(TRI->getCallPreservedMask(MF, CallConv));
-  if (Callee.isReg()) {
+  auto CallOpcode = getCallOpcode(STI, isDirect);
+  auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
+
+  bool isThumb = STI.isThumb();
+  if (isThumb)
+    MIB.add(predOps(ARMCC::AL));
+
+  MIB.add(Callee);
+  if (!isDirect) {
     auto CalleeReg = Callee.getReg();
-    if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg))
-      MIB->getOperand(0).setReg(constrainOperandRegClass(
+    if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) {
+      unsigned CalleeIdx = isThumb ? 2 : 0;
+      MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
           MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
-          *MIB.getInstr(), MIB->getDesc(), Callee, 0));
+          *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx));
+    }
   }
 
+  MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv));
+
   SmallVector<ArgInfo, 8> ArgInfos;
   for (auto Arg : OrigArgs) {
     if (!isSupportedType(DL, TLI, Arg.Ty))
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
index 86854c53f179..45a988a2f00e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -33,8 +33,8 @@ class ARMCallLowering : public CallLowering {
 public:
   ARMCallLowering(const ARMTargetLowering &TLI);
 
-  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
-                   unsigned VReg) const override;
+  bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                   ArrayRef<unsigned> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
                             ArrayRef<unsigned> VRegs) const override;
@@ -45,7 +45,8 @@ public:
 
 private:
   bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
-                      unsigned VReg, MachineInstrBuilder &Ret) const;
+                      ArrayRef<unsigned> VRegs,
+                      MachineInstrBuilder &Ret) const;
 
   using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 24071277427a..b631c2bc687b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -54,47 +54,108 @@ EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
                    cl::desc("Use DSP instructions for scalar operations\
                             with immediate operands"));
 
-namespace {
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+//   define hidden i32 @cmp(i8 zeroext) {
+//     %2 = add i8 %0, -49
+//     %3 = icmp ult i8 %2, 3
+//     ..
+//   }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+//   t19: i32 = add t4, Constant:i32<-49>
+//     t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+//   subs  r0, #49
+//   uxtb  r1, r0
+//   cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+//   sub.w r1, r0, #49
+//   cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+//   define i32 @cmp(i8 zeroext %c) {
+//     %0 = zext i8 %c to i32
+//     %c.off = add i32 %0, -49
+//     %1 = icmp ult i32 %c.off, 3
+//     ..
+//   }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
 
+namespace {
 class IRPromoter {
   SmallPtrSet<Value*, 8> NewInsts;
-  SmallVector<Instruction*, 4> InstsToRemove;
+  SmallPtrSet<Instruction*, 4> InstsToRemove;
+  DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
+  SmallPtrSet<Value*, 8> Promoted;
   Module *M = nullptr;
   LLVMContext &Ctx;
+  IntegerType *ExtTy = nullptr;
+  IntegerType *OrigTy = nullptr;
+  SmallPtrSetImpl<Value*> *Visited;
+  SmallPtrSetImpl<Value*> *Sources;
+  SmallPtrSetImpl<Instruction*> *Sinks;
+  SmallPtrSetImpl<Instruction*> *SafeToPromote;
+
+  void ReplaceAllUsersOfWith(Value *From, Value *To);
+  void PrepareConstants(void);
+  void ExtendSources(void);
+  void ConvertTruncs(void);
+  void PromoteTree(void);
+  void TruncateSinks(void);
+  void Cleanup(void);
 
 public:
-  IRPromoter(Module *M) : M(M), Ctx(M->getContext()) { }
+  IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
+                          ExtTy(Type::getInt32Ty(Ctx)) { }
 
-  void Cleanup() {
-    for (auto *I : InstsToRemove) {
-      LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
-      I->dropAllReferences();
-      I->eraseFromParent();
-    }
-    InstsToRemove.clear();
-    NewInsts.clear();
-  }
 
   void Mutate(Type *OrigTy,
               SmallPtrSetImpl<Value*> &Visited,
-              SmallPtrSetImpl<Value*> &Leaves,
-              SmallPtrSetImpl<Instruction*> &Roots);
+              SmallPtrSetImpl<Value*> &Sources,
+              SmallPtrSetImpl<Instruction*> &Sinks,
+              SmallPtrSetImpl<Instruction*> &SafeToPromote);
 };
 
 class ARMCodeGenPrepare : public FunctionPass {
   const ARMSubtarget *ST = nullptr;
   IRPromoter *Promoter = nullptr;
   std::set<Value*> AllVisited;
-  Type *OrigTy = nullptr;
-  unsigned TypeSize = 0;
+  SmallPtrSet<Instruction*, 8> SafeToPromote;
 
-  bool isNarrowInstSupported(Instruction *I);
+  bool isSafeOverflow(Instruction *I);
   bool isSupportedValue(Value *V);
   bool isLegalToPromote(Value *V);
   bool TryToPromote(Value *V);
 
 public:
   static char ID;
+  static unsigned TypeSize;
+  Type *OrigTy = nullptr;
 
   ARMCodeGenPrepare() : FunctionPass(ID) {}
 
@@ -111,8 +172,7 @@ public:
 
 }
 
-/// Can the given value generate sign bits.
-static bool isSigned(Value *V) {
+static bool generateSignBits(Value *V) {
   if (!isa<Instruction>(V))
     return false;
 
@@ -121,120 +181,226 @@ static bool isSigned(Value *V) {
          Opc == Instruction::SRem;
 }
 
+static bool EqualTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessOrEqualTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
+}
+
+static bool GreaterThanTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
+}
+
+static bool LessThanTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
+}
+
 /// Some instructions can use 8- and 16-bit operands, and we don't need to
 /// promote anything larger. We disallow booleans to make life easier when
 /// dealing with icmps but allow any other integer that is <= 16 bits. Void
 /// types are accepted so we can handle switches.
 static bool isSupportedType(Value *V) {
-  if (V->getType()->isVoidTy())
+  Type *Ty = V->getType();
+
+  // Allow voids and pointers, these won't be promoted.
+  if (Ty->isVoidTy() || Ty->isPointerTy())
     return true;
 
-  const IntegerType *IntTy = dyn_cast<IntegerType>(V->getType());
-  if (!IntTy)
-    return false;
+  if (auto *Ld = dyn_cast<LoadInst>(V))
+    Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
 
-  // Don't try to promote boolean values.
-  if (IntTy->getBitWidth() == 1)
+  if (!isa<IntegerType>(Ty) ||
+      cast<IntegerType>(V->getType())->getBitWidth() == 1)
     return false;
 
-  if (auto *ZExt = dyn_cast<ZExtInst>(V))
-    return isSupportedType(ZExt->getOperand(0));
+  return LessOrEqualTypeSize(V);
+}
 
-  return IntTy->getBitWidth() <= 16;
+/// Return true if the given value is a source in the use-def chain, producing
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+static bool isSource(Value *V) {
+  if (!isa<IntegerType>(V->getType()))
+    return false;
+
+  // TODO Allow zext to be sources.
+  if (isa<Argument>(V))
+    return true;
+  else if (isa<LoadInst>(V))
+    return true;
+  else if (isa<BitCastInst>(V))
+    return true;
+  else if (auto *Call = dyn_cast<CallInst>(V))
+    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+  else if (auto *Trunc = dyn_cast<TruncInst>(V))
+    return EqualTypeSize(Trunc);
+  return false;
 }
 
 /// Return true if V will require any promoted values to be truncated for the
-/// use to be valid.
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
 static bool isSink(Value *V) {
-  auto UsesNarrowValue = [](Value *V) {
-    return V->getType()->getScalarSizeInBits() <= 32;
-  };
-
+  // TODO The truncate also isn't actually necessary because we would already
+  // proved that the data value is kept within the range of the original data
+  // type.
+
+  // Sinks are:
+  // - points where the value in the register is being observed, such as an
+  //   icmp, switch or store.
+  // - points where value types have to match, such as calls and returns.
+  // - zext are included to ease the transformation and are generally removed
+  //   later on.
   if (auto *Store = dyn_cast<StoreInst>(V))
-    return UsesNarrowValue(Store->getValueOperand());
+    return LessOrEqualTypeSize(Store->getValueOperand());
   if (auto *Return = dyn_cast<ReturnInst>(V))
-    return UsesNarrowValue(Return->getReturnValue());
+    return LessOrEqualTypeSize(Return->getReturnValue());
+  if (auto *ZExt = dyn_cast<ZExtInst>(V))
+    return GreaterThanTypeSize(ZExt);
+  if (auto *Switch = dyn_cast<SwitchInst>(V))
+    return LessThanTypeSize(Switch->getCondition());
+  if (auto *ICmp = dyn_cast<ICmpInst>(V))
+    return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
 
   return isa<CallInst>(V);
 }
 
-/// Return true if the given value is a leaf that will need to be zext'd.
-static bool isSource(Value *V) {
-  if (isa<Argument>(V) && isSupportedType(V))
-    return true;
-  else if (isa<TruncInst>(V))
-    return true;
-  else if (auto *ZExt = dyn_cast<ZExtInst>(V))
-    // ZExt can be a leaf if its the only user of a load.
-    return isa<LoadInst>(ZExt->getOperand(0)) &&
-                         ZExt->getOperand(0)->hasOneUse();
-  else if (auto *Call = dyn_cast<CallInst>(V))
-    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
-  else if (auto *Load = dyn_cast<LoadInst>(V)) {
-    if (!isa<IntegerType>(Load->getType()))
-      return false;
-    // A load is a leaf, unless its already just being zext'd.
-    if (Load->hasOneUse() && isa<ZExtInst>(*Load->use_begin()))
-      return false;
-
-    return true;
-  }
-  return false;
-}
-
 /// Return whether the instruction can be promoted within any modifications to
-/// it's operands or result.
-static bool isSafeOverflow(Instruction *I) {
+/// its operands or result.
+bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
+  // FIXME Do we need NSW too?
   if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
     return true;
 
+  // We can support a, potentially, overflowing instruction (I) if:
+  // - It is only used by an unsigned icmp.
+  // - The icmp uses a constant.
+  // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+  //   around zero to become a larger number than before.
+  // - The underflowing instruction (I) also uses a constant.
+  //
+  // We can then use the two constants to calculate whether the result would
+  // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+  // just underflows the range, the icmp would give the same result whether the
+  // result has been truncated or not. We calculate this by:
+  // - Zero extending both constants, if needed, to 32-bits.
+  // - Take the absolute value of I's constant, adding this to the icmp const.
+  // - Check that this value is not out of range for small type. If it is, it
+  //   means that it has underflowed enough to wrap around the icmp constant.
+  //
+  // For example:
+  //
+  // %sub = sub i8 %a, 2
+  // %cmp = icmp ule i8 %sub, 254
+  //
+  // If %a = 0, %sub = -2 == FE == 254
+  // But if this is evalulated as a i32
+  // %sub = -2 == FF FF FF FE == 4294967294
+  // So the unsigned compares (i8 and i32) would not yield the same result.
+  //
+  // Another way to look at it is:
+  // %a - 2 <= 254
+  // %a + 2 <= 254 + 2
+  // %a <= 256
+  // And we can't represent 256 in the i8 format, so we don't support it.
+  //
+  // Whereas:
+  //
+  // %sub i8 %a, 1
+  // %cmp = icmp ule i8 %sub, 254
+  //
+  // If %a = 0, %sub = -1 == FF == 255
+  // As i32:
+  // %sub = -1 == FF FF FF FF == 4294967295
+  //
+  // In this case, the unsigned compare results would be the same and this
+  // would also be true for ult, uge and ugt:
+  // - (255 < 254) == (0xFFFFFFFF < 254) == false
+  // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+  // - (255 > 254) == (0xFFFFFFFF > 254) == true
+  // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+  //
+  // To demonstrate why we can't handle increasing values:
+  // 
+  // %add = add i8 %a, 2
+  // %cmp = icmp ult i8 %add, 127
+  //
+  // If %a = 254, %add = 256 == (i8 1)
+  // As i32:
+  // %add = 256
+  //
+  // (1 < 127) != (256 < 127)
+
   unsigned Opc = I->getOpcode();
-  if (Opc == Instruction::Add || Opc == Instruction::Sub) {
-    // We don't care if the add or sub could wrap if the value is decreasing
-    // and is only being used by an unsigned compare.
-    if (!I->hasOneUse() ||
-        !isa<ICmpInst>(*I->user_begin()) ||
-        !isa<ConstantInt>(I->getOperand(1)))
-      return false;
+  if (Opc != Instruction::Add && Opc != Instruction::Sub)
+    return false;
 
-    auto *CI = cast<ICmpInst>(*I->user_begin());
-    if (CI->isSigned())
-      return false;
+  if (!I->hasOneUse() ||
+      !isa<ICmpInst>(*I->user_begin()) ||
+      !isa<ConstantInt>(I->getOperand(1)))
+    return false;
 
-    bool NegImm = cast<ConstantInt>(I->getOperand(1))->isNegative();
-    bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
-                        ((Opc == Instruction::Add) && NegImm);
-    if (!IsDecreasing)
-      return false;
+  ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
+  bool NegImm = OverflowConst->isNegative();
+  bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
+                       ((Opc == Instruction::Add) && NegImm);
+  if (!IsDecreasing)
+    return false;
 
-    LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
-    return true;
-  }
+  // Don't support an icmp that deals with sign bits.
+  auto *CI = cast<ICmpInst>(*I->user_begin());
+  if (CI->isSigned() || CI->isEquality())
+    return false;
 
-  // Otherwise, if an instruction is using a negative immediate we will need
-  // to fix it up during the promotion.
-  for (auto &Op : I->operands()) {
-    if (auto *Const = dyn_cast<ConstantInt>(Op))
-      if (Const->isNegative())
-        return false;
-  }
-  return false;
+  ConstantInt *ICmpConst = nullptr;
+  if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+    ICmpConst = Const;
+  else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+    ICmpConst = Const;
+  else
+    return false;
+
+  // Now check that the result can't wrap on itself.
+  APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
+    ICmpConst->getValue().zext(32) : ICmpConst->getValue();
+
+  Total += OverflowConst->getValue().getBitWidth() < 32 ?
+    OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
+
+  APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
+
+  if (Total.getBitWidth() > Max.getBitWidth()) {
+    if (Total.ugt(Max.zext(Total.getBitWidth())))
+      return false;
+  } else if (Max.getBitWidth() > Total.getBitWidth()) {
+    if (Total.zext(Max.getBitWidth()).ugt(Max))
+      return false;
+  } else if (Total.ugt(Max))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+  return true;
 }
 
 static bool shouldPromote(Value *V) {
-  auto *I = dyn_cast<Instruction>(V);
-  if (!I)
+  if (!isa<IntegerType>(V->getType()) || isSink(V))
     return false;
 
-  if (!isa<IntegerType>(V->getType()))
-    return false;
+  if (isSource(V))
+    return true;
 
-  if (isa<StoreInst>(I) || isa<TerminatorInst>(I) || isa<TruncInst>(I) ||
-      isa<ICmpInst>(I))
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
     return false;
 
-  if (auto *ZExt = dyn_cast<ZExtInst>(I))
-    return !ZExt->getDestTy()->isIntegerTy(32);
+  if (isa<ICmpInst>(I))
+    return false;
 
   return true;
 }
@@ -245,24 +411,16 @@ static bool isPromotedResultSafe(Value *V) {
   if (!isa<Instruction>(V))
     return true;
 
-  if (isSigned(V))
+  if (generateSignBits(V))
     return false;
 
-  // If I is only being used by something that will require its value to be
-  // truncated, then we don't care about the promoted result.
-  auto *I = cast<Instruction>(V);
-  if (I->hasOneUse() && isSink(*I->use_begin()))
-    return true;
-
-  if (isa<OverflowingBinaryOperator>(I))
-    return isSafeOverflow(I);
-  return true;
+  return !isa<OverflowingBinaryOperator>(V);
 }
 
 /// Return the intrinsic for the instruction that can perform the same
 /// operation but on a narrow type. This is using the parallel dsp intrinsics
 /// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
+static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
   // Whether we use the signed or unsigned versions of these intrinsics
   // doesn't matter because we're not using the GE bits that they set in
   // the APSR.
@@ -270,124 +428,163 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
   default:
     break;
   case Instruction::Add:
-    return TypeSize == 16 ? Intrinsic::arm_uadd16 :
+    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
       Intrinsic::arm_uadd8;
   case Instruction::Sub:
-    return TypeSize == 16 ? Intrinsic::arm_usub16 :
+    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
       Intrinsic::arm_usub8;
   }
   llvm_unreachable("unhandled opcode for narrow intrinsic");
 }
 
-void IRPromoter::Mutate(Type *OrigTy,
-                        SmallPtrSetImpl<Value*> &Visited,
-                        SmallPtrSetImpl<Value*> &Leaves,
-                        SmallPtrSetImpl<Instruction*> &Roots) {
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
+  SmallVector<Instruction*, 4> Users;
+  Instruction *InstTo = dyn_cast<Instruction>(To);
+  bool ReplacedAll = true;
+
+  LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
+             << "\n");
+
+  for (Use &U : From->uses()) {
+    auto *User = cast<Instruction>(U.getUser());
+    if (InstTo && User->isIdenticalTo(InstTo)) {
+      ReplacedAll = false;
+      continue;
+    }
+    Users.push_back(User);
+  }
+
+  for (auto *U : Users)
+    U->replaceUsesOfWith(From, To);
+
+  if (ReplacedAll)
+    if (auto *I = dyn_cast<Instruction>(From))
+      InstsToRemove.insert(I);
+}
+
+void IRPromoter::PrepareConstants() {
   IRBuilder<> Builder{Ctx};
-  Type *ExtTy = Type::getInt32Ty(M->getContext());
-  unsigned TypeSize = OrigTy->getPrimitiveSizeInBits();
-  SmallPtrSet<Value*, 8> Promoted;
-  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << TypeSize
-        << " to 32-bits\n");
-
-  auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
-    SmallVector<Instruction*, 4> Users;
-    Instruction *InstTo = dyn_cast<Instruction>(To);
-    for (Use &U : From->uses()) {
-      auto *User = cast<Instruction>(U.getUser());
-      if (InstTo && User->isIdenticalTo(InstTo))
+  // First step is to prepare the instructions for mutation. Most constants
+  // just need to be zero extended into their new type, but complications arise
+  // because:
+  // - For nuw binary operators, negative immediates would need sign extending;
+  //   however, instead we'll change them to positive and zext them. We can do
+  //   this because:
+  //   > The operators that can wrap are: add, sub, mul and shl.
+  //   > shl interprets its second operand as unsigned and if the first operand
+  //     is an immediate, it will need zext to be nuw.
+  //   > I'm assuming mul has to interpret immediates as unsigned for nuw.
+  //   > Which leaves the nuw add and sub to be handled; as with shl, if an
+  //     immediate is used as operand 0, it will need zext to be nuw.
+  // - We also allow add and sub to safely overflow in certain circumstances
+  //   and only when the value (operand 0) is being decreased.
+  //
+  // For adds and subs, that are either nuw or safely wrap and use a negative
+  // immediate as operand 1, we create an equivalent instruction using a
+  // positive immediate. That positive immediate can then be zext along with
+  // all the other immediates later.
+  for (auto *V : *Visited) {
+    if (!isa<Instruction>(V))
+      continue;
+
+    auto *I = cast<Instruction>(V);
+    if (SafeToPromote->count(I)) {
+
+      if (!isa<OverflowingBinaryOperator>(I))
         continue;
-      Users.push_back(User);
-    }
 
-    for (auto &U : Users)
-      U->replaceUsesOfWith(From, To);
-  };
+      if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
+        if (!Const->isNegative())
+          break;
 
-  auto FixConst = [&](ConstantInt *Const, Instruction *I) {
-    Constant *NewConst = nullptr;
-    if (isSafeOverflow(I)) {
-      NewConst = (Const->isNegative()) ?
-        ConstantExpr::getSExt(Const, ExtTy) :
-        ConstantExpr::getZExt(Const, ExtTy);
-    } else {
-      uint64_t NewVal = *Const->getValue().getRawData();
-      if (Const->getType() == Type::getInt16Ty(Ctx))
-        NewVal &= 0xFFFF;
-      else
-        NewVal &= 0xFF;
-      NewConst = ConstantInt::get(ExtTy, NewVal);
+        unsigned Opc = I->getOpcode();
+        if (Opc != Instruction::Add && Opc != Instruction::Sub)
+          continue;
+
+        LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+        auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+        Builder.SetInsertPoint(I);
+        Value *NewVal = Opc == Instruction::Sub ?
+          Builder.CreateAdd(I->getOperand(0), NewConst) :
+          Builder.CreateSub(I->getOperand(0), NewConst);
+        LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
+
+        if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+          NewInst->copyIRFlags(I);
+          NewInsts.insert(NewInst);
+        }
+        InstsToRemove.insert(I);
+        I->replaceAllUsesWith(NewVal);
+      }
     }
-    I->replaceUsesOfWith(Const, NewConst);
-  };
+  }
+  for (auto *I : NewInsts)
+    Visited->insert(I);
+}
 
-  auto InsertDSPIntrinsic = [&](Instruction *I) {
-    LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
-               << *I << "\n");
-    Function *DSPInst =
-      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I, TypeSize));
-    Builder.SetInsertPoint(I);
-    Builder.SetCurrentDebugLocation(I->getDebugLoc());
-    Value *Args[] = { I->getOperand(0), I->getOperand(1) };
-    CallInst *Call = Builder.CreateCall(DSPInst, Args);
-    ReplaceAllUsersOfWith(I, Call);
-    InstsToRemove.push_back(I);
-    NewInsts.insert(Call);
-  };
+void IRPromoter::ExtendSources() {
+  IRBuilder<> Builder{Ctx};
 
   auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
+    assert(V->getType() != ExtTy && "zext already extends to i32");
     LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
     Builder.SetInsertPoint(InsertPt);
     if (auto *I = dyn_cast<Instruction>(V))
       Builder.SetCurrentDebugLocation(I->getDebugLoc());
-    auto *ZExt = cast<Instruction>(Builder.CreateZExt(V, ExtTy));
-    if (isa<Argument>(V))
-      ZExt->moveBefore(InsertPt);
-    else
-      ZExt->moveAfter(InsertPt);
+
+    Value *ZExt = Builder.CreateZExt(V, ExtTy);
+    if (auto *I = dyn_cast<Instruction>(ZExt)) {
+      if (isa<Argument>(V))
+        I->moveBefore(InsertPt);
+      else
+        I->moveAfter(InsertPt);
+      NewInsts.insert(I);
+    }
+
     ReplaceAllUsersOfWith(V, ZExt);
-    NewInsts.insert(ZExt);
   };
 
-  // First, insert extending instructions between the leaves and their users.
-  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
-  for (auto V : Leaves) {
+  // Now, insert extending instructions between the sources and their users.
+  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
+  for (auto V : *Sources) {
     LLVM_DEBUG(dbgs() << " - " << *V << "\n");
-    if (auto *ZExt = dyn_cast<ZExtInst>(V))
-      ZExt->mutateType(ExtTy);
-    else if (auto *I = dyn_cast<Instruction>(V))
+    if (auto *I = dyn_cast<Instruction>(V))
       InsertZExt(I, I);
     else if (auto *Arg = dyn_cast<Argument>(V)) {
       BasicBlock &BB = Arg->getParent()->front();
       InsertZExt(Arg, &*BB.getFirstInsertionPt());
     } else {
-      llvm_unreachable("unhandled leaf that needs extending");
+      llvm_unreachable("unhandled source that needs extending");
     }
     Promoted.insert(V);
   }
+}
 
+void IRPromoter::PromoteTree() {
   LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
-  // Then mutate the types of the instructions within the tree. Here we handle
-  // constant operands.
-  for (auto *V : Visited) {
-    if (Leaves.count(V))
-      continue;
 
-    if (!isa<Instruction>(V))
+  IRBuilder<> Builder{Ctx};
+
+  // Mutate the types of the instructions within the tree. Here we handle
+  // constant operands.
+  for (auto *V : *Visited) {
+    if (Sources->count(V))
       continue;
 
     auto *I = cast<Instruction>(V);
-    if (Roots.count(I))
+    if (Sinks->count(I))
       continue;
 
-    for (auto &U : I->operands()) {
-      if ((U->getType() == ExtTy) || !isSupportedType(&*U))
+    for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+      Value *Op = I->getOperand(i);
+      if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
         continue;
 
-      if (auto *Const = dyn_cast<ConstantInt>(&*U))
-        FixConst(Const, I);
-      else if (isa<UndefValue>(&*U))
-        U->mutateType(ExtTy);
+      if (auto *Const = dyn_cast<ConstantInt>(Op)) {
+        Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+        I->setOperand(i, NewConst);
+      } else if (isa<UndefValue>(Op))
+        I->setOperand(i, UndefValue::get(ExtTy));
     }
 
     if (shouldPromote(I)) {
@@ -396,91 +593,215 @@ void IRPromoter::Mutate(Type *OrigTy,
     }
   }
 
-  // Now we need to remove any zexts that have become unnecessary, as well
-  // as insert any intrinsics.
-  for (auto *V : Visited) {
-    if (Leaves.count(V))
+  // Finally, any instructions that should be promoted but haven't yet been,
+  // need to be handled using intrinsics.
+  for (auto *V : *Visited) {
+    auto *I = dyn_cast<Instruction>(V);
+    if (!I)
       continue;
-    if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
-      if (ZExt->getDestTy() != ExtTy) {
-        ZExt->mutateType(ExtTy);
-        Promoted.insert(ZExt);
-      }
-      else if (ZExt->getSrcTy() == ExtTy) {
-        ReplaceAllUsersOfWith(V, ZExt->getOperand(0));
-        InstsToRemove.push_back(ZExt);
-      }
+
+    if (Sources->count(I) || Sinks->count(I))
       continue;
-    }
 
-    if (!shouldPromote(V) || isPromotedResultSafe(V))
+    if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
       continue;
+  
+    assert(EnableDSP && "DSP intrinisc insertion not enabled!");
 
     // Replace unsafe instructions with appropriate intrinsic calls.
-    InsertDSPIntrinsic(cast<Instruction>(V));
+    LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
+               << *I << "\n");
+    Function *DSPInst =
+      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
+    Builder.SetInsertPoint(I);
+    Builder.SetCurrentDebugLocation(I->getDebugLoc());
+    Value *Args[] = { I->getOperand(0), I->getOperand(1) };
+    CallInst *Call = Builder.CreateCall(DSPInst, Args);
+    NewInsts.insert(Call);
+    ReplaceAllUsersOfWith(I, Call);
   }
+}
+
+void IRPromoter::TruncateSinks() {
+  LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
+
+  IRBuilder<> Builder{Ctx};
+
+  auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+    if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
+      return nullptr;
+
+    if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
+      return nullptr;
+
+    LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
+               << *V << "\n");
+    Builder.SetInsertPoint(cast<Instruction>(V));
+    auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+    if (Trunc)
+      NewInsts.insert(Trunc);
+    return Trunc;
+  };
 
-  LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
   // Fix up any stores or returns that use the results of the promoted
   // chain.
-  for (auto I : Roots) {
-    LLVM_DEBUG(dbgs() << " - " << *I << "\n");
-    Type *TruncTy = OrigTy;
-    if (auto *Store = dyn_cast<StoreInst>(I)) {
-      auto *PtrTy = cast<PointerType>(Store->getPointerOperandType());
-      TruncTy = PtrTy->getElementType();
-    } else if (isa<ReturnInst>(I)) {
-      Function *F = I->getParent()->getParent();
-      TruncTy = F->getFunctionType()->getReturnType();
+  for (auto I : *Sinks) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
+
+    // Handle calls separately as we need to iterate over arg operands.
+    if (auto *Call = dyn_cast<CallInst>(I)) {
+      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+        Value *Arg = Call->getArgOperand(i);
+        Type *Ty = TruncTysMap[Call][i];
+        if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
+          Trunc->moveBefore(Call);
+          Call->setArgOperand(i, Trunc);
+        }
+      }
+      continue;
     }
 
+    // Special case switches because we need to truncate the condition.
+    if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+      Type *Ty = TruncTysMap[Switch][0];
+      if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+        Trunc->moveBefore(Switch);
+        Switch->setCondition(Trunc);
+      }
+      continue;
+    }
+
+    // Now handle the others.
     for (unsigned i = 0; i < I->getNumOperands(); ++i) {
-      Value *V = I->getOperand(i);
-      if (Promoted.count(V) || NewInsts.count(V)) {
-        if (auto *Op = dyn_cast<Instruction>(V)) {
-
-          if (auto *Call = dyn_cast<CallInst>(I))
-            TruncTy = Call->getFunctionType()->getParamType(i);
-
-          if (TruncTy == ExtTy)
-            continue;
-
-          LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy
-                     << " Trunc for " << *Op << "\n");
-          Builder.SetInsertPoint(Op);
-          auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy));
-          Trunc->moveBefore(I);
-          I->setOperand(i, Trunc);
-          NewInsts.insert(Trunc);
-        }
+      Type *Ty = TruncTysMap[I][i];
+      if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
+        Trunc->moveBefore(I);
+        I->setOperand(i, Trunc);
       }
     }
   }
-  LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
 }
 
-bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
-  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
-    return false;
+void IRPromoter::Cleanup() {
+  // Some zexts will now have become redundant, along with their trunc
+  // operands, so remove them
+  for (auto V : *Visited) {
+    if (!isa<CastInst>(V))
+      continue;
 
-  if (ST->isThumb() && !ST->hasThumb2())
-    return false;
+    auto ZExt = cast<CastInst>(V);
+    if (ZExt->getDestTy() != ExtTy)
+      continue;
 
-  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
-    return false;
+    Value *Src = ZExt->getOperand(0);
+    if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+      LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
+                 << "\n");
+      ReplaceAllUsersOfWith(ZExt, Src);
+      continue;
+    }
 
-  // TODO
-  // Would it be profitable? For Thumb code, these parallel DSP instructions
-  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
-  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
-  // halved. They also do not take immediates as operands.
-  for (auto &Op : I->operands()) {
-    if (isa<Constant>(Op)) {
-      if (!EnableDSPWithImms)
-        return false;
+    // For any truncs that we insert to handle zexts, we can replace the
+    // result of the zext with the input to the trunc.
+    if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
+      auto *Trunc = cast<TruncInst>(Src);
+      assert(Trunc->getOperand(0)->getType() == ExtTy &&
+             "expected inserted trunc to be operating on i32");
+      ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
     }
   }
-  return true;
+
+  for (auto *I : InstsToRemove) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
+    I->dropAllReferences();
+    I->eraseFromParent();
+  }
+
+  InstsToRemove.clear();
+  NewInsts.clear();
+  TruncTysMap.clear();
+  Promoted.clear();
+}
+
+void IRPromoter::ConvertTruncs() {
+  IRBuilder<> Builder{Ctx};
+
+  for (auto *V : *Visited) {
+    if (!isa<TruncInst>(V) || Sources->count(V))
+      continue;
+
+    auto *Trunc = cast<TruncInst>(V);
+    assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
+
+    Builder.SetInsertPoint(Trunc);
+    unsigned NumBits =
+      cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
+    ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+    Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+
+    if (auto *I = dyn_cast<Instruction>(Masked))
+      NewInsts.insert(I);
+
+    ReplaceAllUsersOfWith(Trunc, Masked);
+  }
+}
+
+void IRPromoter::Mutate(Type *OrigTy,
+                        SmallPtrSetImpl<Value*> &Visited,
+                        SmallPtrSetImpl<Value*> &Sources,
+                        SmallPtrSetImpl<Instruction*> &Sinks,
+                        SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
+             << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
+
+  assert(isa<IntegerType>(OrigTy) && "expected integer type");
+  this->OrigTy = cast<IntegerType>(OrigTy);
+  assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
+         "original type not smaller than extended type");
+
+  this->Visited = &Visited;
+  this->Sources = &Sources;
+  this->Sinks = &Sinks;
+  this->SafeToPromote = &SafeToPromote;
+
+  // Cache original types of the values that will likely need truncating
+  for (auto *I : Sinks) {
+    if (auto *Call = dyn_cast<CallInst>(I)) {
+      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+        Value *Arg = Call->getArgOperand(i);
+        TruncTysMap[Call].push_back(Arg->getType());
+      }
+    } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+      TruncTysMap[I].push_back(Switch->getCondition()->getType());
+    else {
+      for (unsigned i = 0; i < I->getNumOperands(); ++i)
+        TruncTysMap[I].push_back(I->getOperand(i)->getType());
+    }
+  }
+
+  // Convert adds and subs using negative immediates to equivalent instructions
+  // that use positive constants.
+  PrepareConstants();
+
+  // Insert zext instructions between sources and their users.
+  ExtendSources();
+
+  // Convert any truncs, that aren't sources, into AND masks.
+  ConvertTruncs();
+
+  // Promote visited instructions, mutating their types in place. Also insert
+  // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
+  // promote.
+  PromoteTree();
+
+  // Insert trunc instructions for use by calls, stores etc...
+  TruncateSinks();
+
+  // Finally, remove unecessary zexts and truncs, delete old instructions and
+  // clear the data structures.
+  Cleanup();
+
+  LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
 }
 
 /// We accept most instructions, as well as Arguments and ConstantInsts. We
@@ -488,102 +809,133 @@ bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
 /// return value is zeroext. We don't allow opcodes that can introduce sign
 /// bits.
 bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
-
-  // Non-instruction values that we can handle.
-  if (isa<ConstantInt>(V) || isa<Argument>(V))
-    return true;
+  if (auto *I = dyn_cast<ICmpInst>(V)) {
+    // Now that we allow small types than TypeSize, only allow icmp of
+    // TypeSize because they will require a trunc to be legalised.
+    // TODO: Allow icmp of smaller types, and calculate at the end
+    // whether the transform would be beneficial.
+    if (isa<PointerType>(I->getOperand(0)->getType()))
+      return true;
+    return EqualTypeSize(I->getOperand(0));
+  }
 
   // Memory instructions
-  if (isa<StoreInst>(V) || isa<LoadInst>(V) || isa<GetElementPtrInst>(V))
+  if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
     return true;
 
   // Branches and targets.
-  if (auto *ICmp = dyn_cast<ICmpInst>(V))
-    return ICmp->isEquality() || !ICmp->isSigned();
-
   if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
     return true;
 
-  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V))
-    return true;
+  // Non-instruction values that we can handle.
+  if ((isa<Constant>(V) && !isa<ConstantExpr>(V)) || isa<Argument>(V))
+    return isSupportedType(V);
+
+  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
+      isa<LoadInst>(V))
+    return isSupportedType(V);
+
+  if (isa<SExtInst>(V))
+    return false;
+
+  if (auto *Cast = dyn_cast<CastInst>(V))
+    return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
 
   // Special cases for calls as we need to check for zeroext
   // TODO We should accept calls even if they don't have zeroext, as they can
-  // still be roots.
+  // still be sinks.
   if (auto *Call = dyn_cast<CallInst>(V))
-    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
-  else if (auto *Cast = dyn_cast<CastInst>(V)) {
-    if (isa<ZExtInst>(Cast))
-      return Cast->getDestTy()->getScalarSizeInBits() <= 32;
-    else if (auto *Trunc = dyn_cast<TruncInst>(V))
-      return Trunc->getDestTy()->getScalarSizeInBits() <= TypeSize;
-    else {
-      LLVM_DEBUG(dbgs() << "ARM CGP: No, unsupported cast.\n");
-      return false;
-    }
-  } else if (!isa<BinaryOperator>(V)) {
-    LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
+    return isSupportedType(Call) &&
+           Call->hasRetAttr(Attribute::AttrKind::ZExt);
+
+  if (!isa<BinaryOperator>(V))
+    return false;
+
+  if (!isSupportedType(V))
     return false;
-  }
 
-  bool res = !isSigned(V);
-  if (!res)
-    LLVM_DEBUG(dbgs() << "ARM CGP: No, it's a signed instruction.\n");
-  return res;
+  if (generateSignBits(V)) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+    return false;
+  }
+  return true;
 }
 
 /// Check that the type of V would be promoted and that the original type is
 /// smaller than the targeted promoted type. Check that we're not trying to
 /// promote something larger than our base 'TypeSize' type.
 bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
-  if (!isSupportedType(V))
-    return false;
 
-  unsigned VSize = 0;
-  if (auto *Ld = dyn_cast<LoadInst>(V)) {
-    auto *PtrTy = cast<PointerType>(Ld->getPointerOperandType());
-    VSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
-  } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
-    VSize = ZExt->getOperand(0)->getType()->getPrimitiveSizeInBits();
-  } else {
-    VSize = V->getType()->getPrimitiveSizeInBits();
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return true;
+
+  if (SafeToPromote.count(I))
+   return true;
+
+  if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
+    SafeToPromote.insert(I);
+    return true;
   }
 
-  if (VSize > TypeSize)
+  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
     return false;
 
-  if (isPromotedResultSafe(V))
-    return true;
+  // If promotion is not safe, can we use a DSP instruction to natively
+  // handle the narrow type?
+  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
+    return false;
 
-  if (auto *I = dyn_cast<Instruction>(V))
-    return isNarrowInstSupported(I);
+  if (ST->isThumb() && !ST->hasThumb2())
+    return false;
 
-  return false;
+  // TODO
+  // Would it be profitable? For Thumb code, these parallel DSP instructions
+  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
+  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
+  // halved. They also do not take immediates as operands.
+  for (auto &Op : I->operands()) {
+    if (isa<Constant>(Op)) {
+      if (!EnableDSPWithImms)
+        return false;
+    }
+  }
+  LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
+  return true;
 }
 
 bool ARMCodeGenPrepare::TryToPromote(Value *V) {
   OrigTy = V->getType();
   TypeSize = OrigTy->getPrimitiveSizeInBits();
+  if (TypeSize > 16 || TypeSize < 8)
+    return false;
+
+  SafeToPromote.clear();
 
   if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
     return false;
 
-  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << "\n");
+  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
+             << TypeSize << "\n");
 
   SetVector<Value*> WorkList;
-  SmallPtrSet<Value*, 8> Leaves;
-  SmallPtrSet<Instruction*, 4> Roots;
-  WorkList.insert(V);
+  SmallPtrSet<Value*, 8> Sources;
+  SmallPtrSet<Instruction*, 4> Sinks;
   SmallPtrSet<Value*, 16> CurrentVisited;
-  CurrentVisited.clear();
+  WorkList.insert(V);
 
-  // Return true if the given value can, or has been, visited. Add V to the
-  // worklist if needed.
+  // Return true if V was added to the worklist as a supported instruction,
+  // if it was already visited, or if we don't need to explore it (e.g.
+  // pointer values and GEPs), and false otherwise.
   auto AddLegalInst = [&](Value *V) {
     if (CurrentVisited.count(V))
       return true;
 
+    // Ignore GEPs because they don't need promoting and the constant indices
+    // will prevent the transformation.
+    if (isa<GetElementPtrInst>(V))
+      return true;
+
     if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
       LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
       return false;
@@ -600,6 +952,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     if (CurrentVisited.count(V))
       continue;
 
+    // Ignore non-instructions, other than arguments.
     if (!isa<Instruction>(V) && !isSource(V))
       continue;
 
@@ -607,24 +960,26 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     // the tree has already been explored.
     // TODO: This could limit the transform, ie if we try to promote something
     // from an i8 and fail first, before trying an i16.
-    if (AllVisited.count(V)) {
-      LLVM_DEBUG(dbgs() << "ARM CGP: Already visited this: " << *V << "\n");
+    if (AllVisited.count(V))
       return false;
-    }
 
     CurrentVisited.insert(V);
     AllVisited.insert(V);
 
     // Calls can be both sources and sinks.
     if (isSink(V))
-      Roots.insert(cast<Instruction>(V));
+      Sinks.insert(cast<Instruction>(V));
+
     if (isSource(V))
-      Leaves.insert(V);
-    else if (auto *I = dyn_cast<Instruction>(V)) {
-      // Visit operands of any instruction visited.
-      for (auto &U : I->operands()) {
-        if (!AddLegalInst(U))
-          return false;
+      Sources.insert(V);
+
+    if (!isSink(V) && !isSource(V)) {
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        // Visit operands of any instruction visited.
+        for (auto &U : I->operands()) {
+          if (!AddLegalInst(U))
+            return false;
+        }
       }
     }
 
@@ -638,43 +993,23 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     }
   }
 
-  unsigned NumToPromote = 0;
-  unsigned Cost = 0;
+  LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
+             for (auto *I : CurrentVisited)
+               I->dump();
+             );
+  unsigned ToPromote = 0;
   for (auto *V : CurrentVisited) {
-    // Truncs will cause a uxt and no zeroext arguments will often require
-    // a uxt somewhere.
-    if (isa<TruncInst>(V))
-      ++Cost;
-    else if (auto *Arg = dyn_cast<Argument>(V)) {
-      if (!Arg->hasZExtAttr())
-        ++Cost;
-    }
-
-    // Mem ops can automatically be extended/truncated and non-instructions
-    // don't need anything done.
-    if (Leaves.count(V) || isa<StoreInst>(V) || !isa<Instruction>(V))
+    if (Sources.count(V))
       continue;
-
-    // Will need to truncate calls args and returns.
-    if (Roots.count(cast<Instruction>(V))) {
-      ++Cost;
+    if (Sinks.count(cast<Instruction>(V)))
       continue;
-    }
-
-    if (shouldPromote(V))
-      ++NumToPromote;
+    ++ToPromote;
   }
 
-  LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
-             for (auto *I : CurrentVisited)
-               I->dump();
-             );
-  LLVM_DEBUG(dbgs() << "ARM CGP: Cost of promoting " << NumToPromote
-             << " instructions = " << Cost << "\n");
-  if (Cost > NumToPromote || (NumToPromote == 0))
+  if (ToPromote < 2)
     return false;
 
-  Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
+  Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
   return true;
 }
 
@@ -711,19 +1046,15 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
           continue;
 
         LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
+
         for (auto &Op : CI.operands()) {
-          if (auto *I = dyn_cast<Instruction>(Op)) {
-            if (isa<ZExtInst>(I))
-              MadeChange |= TryToPromote(I->getOperand(0));
-            else
-              MadeChange |= TryToPromote(I);
-          }
+          if (auto *I = dyn_cast<Instruction>(Op))
+            MadeChange |= TryToPromote(I);
         }
       }
     }
-    Promoter->Cleanup();
     LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
-                dbgs();
+                dbgs() << F;
                 report_fatal_error("Broken function after type promotion");
                });
   }
@@ -744,6 +1075,7 @@ INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
                     false, false)
 
 char ARMCodeGenPrepare::ID = 0;
+unsigned ARMCodeGenPrepare::TypeSize = 0;
 
 FunctionPass *llvm::createARMCodeGenPreparePass() {
   return new ARMCodeGenPrepare();
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 2c4738d3cb74..5e97c4cb35e3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1420,6 +1420,22 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
       MI = LastIT;
   }
 
+  // Avoid splitting a MOVW+MOVT pair with a relocation on Windows.
+  // On Windows, this instruction pair is covered by one single
+  // IMAGE_REL_ARM_MOV32T relocation which covers both instructions. If a
+  // constant island is injected inbetween them, the relocation will clobber
+  // the instruction and fail to update the MOVT instruction.
+  // (These instructions are bundled up until right before the ConstantIslands
+  // pass.)
+  if (STI->isTargetWindows() && isThumb && MI->getOpcode() == ARM::t2MOVTi16 &&
+      (MI->getOperand(2).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
+          ARMII::MO_HI16) {
+    --MI;
+    assert(MI->getOpcode() == ARM::t2MOVi16 &&
+           (MI->getOperand(1).getTargetFlags() & ARMII::MO_OPTION_MASK) ==
+               ARMII::MO_LO16);
+  }
+
   // We really must not split an IT block.
   LLVM_DEBUG(unsigned PredReg; assert(
                  !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 5dac6ec0b799..eecd0a10dc7d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -570,7 +570,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
   TransferImpOps(MI, MIB, MIB);
 
   // Transfer memoperands.
-  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIB.cloneMemRefs(MI);
 
   MI.eraseFromParent();
 }
@@ -645,7 +645,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   TransferImpOps(MI, MIB, MIB);
 
   // Transfer memoperands.
-  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIB.cloneMemRefs(MI);
 
   MI.eraseFromParent();
 }
@@ -735,7 +735,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
     MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
   TransferImpOps(MI, MIB, MIB);
   // Transfer memoperands.
-  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIB.cloneMemRefs(MI);
   MI.eraseFromParent();
 }
 
@@ -848,8 +848,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
     LO16 = LO16.addImm(SOImmValV1);
     HI16 = HI16.addImm(SOImmValV2);
-    LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-    HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    LO16.cloneMemRefs(MI);
+    HI16.cloneMemRefs(MI);
     LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
     HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
     if (isCC)
@@ -899,8 +899,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
   }
   }
 
-  LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  LO16.cloneMemRefs(MI);
+  HI16.cloneMemRefs(MI);
   LO16.addImm(Pred).addReg(PredReg);
   HI16.addImm(Pred).addReg(PredReg);
 
@@ -1030,10 +1030,10 @@ static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
   if (IsThumb) {
     unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
     unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
-    MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead()));
-    MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead()));
+    MIB.addReg(RegLo, Flags);
+    MIB.addReg(RegHi, Flags);
   } else
-    MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead()));
+    MIB.addReg(Reg.getReg(), Flags);
 }
 
 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
@@ -1103,7 +1103,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
   //     bne .Lloadcmp
   unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
   MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
-  addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
+  unsigned Flags = getKillRegState(New.isDead());
+  addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
   MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
 
   unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
@@ -1425,7 +1426,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         MIB.addExternalSymbol("__aeabi_read_tp", 0);
       }
 
-      MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MIB.cloneMemRefs(MI);
       TransferImpOps(MI, MIB, MIB);
       MI.eraseFromParent();
       return true;
@@ -1440,7 +1441,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
               .add(MI.getOperand(1))
               .add(predOps(ARMCC::AL));
-      MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MIB1.cloneMemRefs(MI);
       MachineInstrBuilder MIB2 =
           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
               .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1544,7 +1545,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       if (isARM) {
         MIB3.add(predOps(ARMCC::AL));
         if (Opcode == ARM::MOV_ga_pcrel_ldr)
-          MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+          MIB3.cloneMemRefs(MI);
       }
       TransferImpOps(MI, MIB1, MIB3);
       MI.eraseFromParent();
@@ -1596,7 +1597,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       // Add an implicit def for the super-register.
       MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
       TransferImpOps(MI, MIB, MIB);
-      MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MIB.cloneMemRefs(MI);
       MI.eraseFromParent();
       return true;
     }
@@ -1629,7 +1630,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         MIB->addRegisterKilled(SrcReg, TRI, true);
 
       TransferImpOps(MI, MIB, MIB);
-      MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MIB.cloneMemRefs(MI);
       MI.eraseFromParent();
       return true;
     }
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index a66cd7053c0a..a50abfdbee44 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2951,7 +2951,8 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   unsigned ResultReg = MI->getOperand(0).getReg();
   if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
     return false;
-  MI->eraseFromParent();
+  MachineBasicBlock::iterator I(MI);
+  removeDeadCode(I, std::next(I));
   return true;
 }
 
@@ -2970,12 +2971,16 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
   unsigned ConstAlign =
       MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
   unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+  MachineMemOperand *CPMMO =
+      MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+                               MachineMemOperand::MOLoad, 4, 4);
 
   unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
   unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
   MachineInstrBuilder MIB =
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
-          .addConstantPoolIndex(Idx);
+          .addConstantPoolIndex(Idx)
+          .addMemOperand(CPMMO);
   if (Opc == ARM::LDRcp)
     MIB.addImm(0);
   MIB.add(predOps(ARMCC::AL));
@@ -2988,6 +2993,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
             .addReg(TempReg)
             .addImm(ARMPCLabelIndex);
+
   if (!Subtarget->isThumb())
     MIB.add(predOps(ARMCC::AL));
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 56ad7a0f0446..a9d87ced31f3 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -79,12 +79,11 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
     : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
       STI(sti) {}
 
-bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
   // iOS always has a FP for backtracking, force other targets to keep their FP
   // when doing FastISel. The emitted code is currently superior, and in cases
   // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
-  return TargetFrameLowering::noFramePointerElim(MF) ||
-         MF.getSubtarget<ARMSubtarget>().useFastISel();
+  return MF.getSubtarget<ARMSubtarget>().useFastISel();
 }
 
 /// Returns true if the target can safely skip saving callee-saved registers
@@ -526,6 +525,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
         .setMIFlags(MachineInstr::FrameSetup);
 
     switch (TM.getCodeModel()) {
+    case CodeModel::Tiny:
+      llvm_unreachable("Tiny code model not available on ARM.");
     case CodeModel::Small:
     case CodeModel::Medium:
     case CodeModel::Kernel:
@@ -909,6 +910,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
       assert(RegInfo->hasBasePointer(MF) &&
              "VLAs and dynamic stack alignment, but missing base pointer!");
       FrameReg = RegInfo->getBaseRegister();
+      Offset -= SPAdj;
     }
     return Offset;
   }
@@ -1006,8 +1008,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     if (Regs.empty())
       continue;
 
-    llvm::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
-                                             const RegAndKill &RHS) {
+    llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
       return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
     });
 
@@ -1103,7 +1104,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     if (Regs.empty())
       continue;
 
-    llvm::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
+    llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
       return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
     });
 
@@ -1921,9 +1922,13 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
                         << "\n");
     }
 
+    // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
+    // restore LR in that case.
+    bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
+
     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
     // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
-    if (!LRSpilled && CS1Spilled) {
+    if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
       SavedRegs.set(ARM::LR);
       NumGPRSpills++;
       SmallVectorImpl<unsigned>::iterator LRPos;
@@ -1949,7 +1954,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
           // Windows on ARM, accept R11 (frame pointer)
           if (!AFI->isThumbFunction() ||
               (STI.isTargetWindows() && Reg == ARM::R11) ||
-              isARMLowRegister(Reg) || Reg == ARM::LR) {
+              isARMLowRegister(Reg) ||
+              (Reg == ARM::LR && !ExpensiveLRRestore)) {
             SavedRegs.set(Reg);
             LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
                               << " to make up alignment\n");
@@ -2151,9 +2157,15 @@ void ARMFrameLowering::adjustForSegmentedStacks(
 
   // Do not generate a prologue for leaf functions with a stack of size zero.
   // For non-leaf functions we have to allow for the possibility that the
-  // call is to a non-split function, as in PR37807.
-  if (StackSize == 0 && !MFI.hasTailCall())
+  // callis to a non-split function, as in PR37807. This function could also
+  // take the address of a non-split function. When the linker tries to adjust
+  // its non-existent prologue, it would fail with an error. Mark the object
+  // file so that such failures are not errors. See this Go language bug-report
+  // https://go-review.googlesource.com/c/go/+/148819/
+  if (StackSize == 0 && !MFI.hasTailCall()) {
+    MF.getMMI().setHasNosplitStack(true);
     return;
+  }
 
   // Use R4 and R5 as scratch registers.
   // We save R4 and R5 before use and restore them before leaving the function.
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index e994cab28fe7..2f7e23840e75 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -42,7 +42,7 @@ public:
                                   std::vector<CalleeSavedInfo> &CSI,
                                   const TargetRegisterInfo *TRI) const override;
 
-  bool noFramePointerElim(const MachineFunction &MF) const override;
+  bool keepFramePointer(const MachineFunction &MF) const override;
 
   bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9592dd53c347..8e0e82388251 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1345,9 +1345,8 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
 }
 
 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 }
 
 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
@@ -1764,12 +1763,14 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
   default: llvm_unreachable("unhandled vld type");
     // Double-register operations:
   case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4f16:
   case MVT::v4i16: OpcodeIndex = 1; break;
   case MVT::v2f32:
   case MVT::v2i32: OpcodeIndex = 2; break;
   case MVT::v1i64: OpcodeIndex = 3; break;
     // Quad-register operations:
   case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8f16:
   case MVT::v8i16: OpcodeIndex = 1; break;
   case MVT::v4f32:
   case MVT::v4i32: OpcodeIndex = 2; break;
@@ -1854,9 +1855,8 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
   }
 
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
 
   if (NumVecs == 1) {
     ReplaceNode(N, VLd);
@@ -1893,8 +1893,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return;
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 
   SDValue Chain = N->getOperand(0);
   EVT VT = N->getOperand(Vec0Idx).getValueType();
@@ -1983,7 +1982,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
     // Transfer memoperands.
-    cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+    CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
 
     ReplaceNode(N, VSt);
     return;
@@ -2007,7 +2006,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
                                         MemAddr.getValueType(),
                                         MVT::Other, OpsA);
-  cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
   Chain = SDValue(VStA, 1);
 
   // Store the odd D registers.
@@ -2026,7 +2025,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   Ops.push_back(Chain);
   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
                                         Ops);
-  cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
   ReplaceNode(N, VStB);
 }
 
@@ -2045,8 +2044,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
     return;
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 
   SDValue Chain = N->getOperand(0);
   unsigned Lane =
@@ -2135,7 +2133,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
                                   QOpcodes[OpcodeIndex]);
   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
-  cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
   if (!IsLoad) {
     ReplaceNode(N, VLdLn);
     return;
@@ -2264,9 +2262,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
   }
 
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
 
   // Extract the subregisters.
   if (NumVecs == 1) {
@@ -2309,6 +2306,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
                                 Srl_imm)) {
         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
 
+        // Mask off the unnecessary bits of the AND immediate; normally
+        // DAGCombine will do this, but that might not happen if
+        // targetShrinkDemandedConstant chooses a different immediate.
+        And_imm &= -1U >> Srl_imm;
+
         // Note: The width operand is encoded as width-1.
         unsigned Width = countTrailingOnes(And_imm) - 1;
         unsigned LSB = Srl_imm;
@@ -2476,9 +2478,8 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
       Opcode, SDLoc(N),
       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
 
   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
@@ -2627,12 +2628,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
       // queries work properly. This e.g. gives the register allocation the
       // required information for rematerialization.
       MachineFunction& MF = CurDAG->getMachineFunction();
-      MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
-      MemOp[0] = MF.getMachineMemOperand(
-          MachinePointerInfo::getConstantPool(MF),
-          MachineMemOperand::MOLoad, 4, 4);
+      MachineMemOperand *MemOp =
+          MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+                                  MachineMemOperand::MOLoad, 4, 4);
 
-      cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
+      CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
 
       ReplaceNode(N, ResNode);
       return;
@@ -3030,11 +3030,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     switch (VT.getSimpleVT().SimpleTy) {
     default: return;
     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
+    case MVT::v4f16:
     case MVT::v4i16: Opc = ARM::VZIPd16; break;
     case MVT::v2f32:
     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
     case MVT::v2i32: Opc = ARM::VTRNd32; break;
     case MVT::v16i8: Opc = ARM::VZIPq8; break;
+    case MVT::v8f16:
     case MVT::v8i16: Opc = ARM::VZIPq16; break;
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VZIPq32; break;
@@ -3051,11 +3053,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     switch (VT.getSimpleVT().SimpleTy) {
     default: return;
     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
+    case MVT::v4f16:
     case MVT::v4i16: Opc = ARM::VUZPd16; break;
     case MVT::v2f32:
     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
     case MVT::v2i32: Opc = ARM::VTRNd32; break;
     case MVT::v16i8: Opc = ARM::VUZPq8; break;
+    case MVT::v8f16:
     case MVT::v8i16: Opc = ARM::VUZPq16; break;
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VUZPq32; break;
@@ -3072,10 +3076,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     switch (VT.getSimpleVT().SimpleTy) {
     default: return;
     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
+    case MVT::v4f16:
     case MVT::v4i16: Opc = ARM::VTRNd16; break;
     case MVT::v2f32:
     case MVT::v2i32: Opc = ARM::VTRNd32; break;
     case MVT::v16i8: Opc = ARM::VTRNq8; break;
+    case MVT::v8f16:
     case MVT::v8i16: Opc = ARM::VTRNq16; break;
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VTRNq32; break;
@@ -3410,9 +3416,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
                        CurDAG->getRegister(0, MVT::i32), Chain};
       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-      cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
 
       // Remap uses.
       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
@@ -3478,9 +3483,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
 
       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-      cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+      MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
 
       ReplaceNode(N, St);
       return;
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ede276dd91bb..21de0f6a7630 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -651,9 +651,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     // it have a FP_TO_[SU]INT instruction with a narrower destination than
     // source.
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
 
     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
@@ -665,8 +669,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
-    setOperationAction(ISD::CTPOP,      MVT::v1i64, Expand);
-    setOperationAction(ISD::CTPOP,      MVT::v2i64, Expand);
+    setOperationAction(ISD::CTPOP,      MVT::v1i64, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v2i64, Custom);
 
     setOperationAction(ISD::CTLZ,       MVT::v1i64, Expand);
     setOperationAction(ISD::CTLZ,       MVT::v2i64, Expand);
@@ -846,8 +850,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   }
   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
+  }
 
   // @llvm.readcyclecounter requires the Performance Monitors extension.
   // Default to the 0 expansion on unsupported platforms.
@@ -950,6 +956,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
+  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
 
   // Use the default implementation.
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
@@ -977,7 +984,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
     // On v8, we have particularly efficient implementations of atomic fences
     // if they can be combined with nearby atomic loads and stores.
-    if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
+    if (!Subtarget->hasAcquireRelease() ||
+        getTargetMachine().getOptLevel() == 0) {
       // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
       InsertFencesForAtomic = true;
     }
@@ -1136,14 +1144,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   if (Subtarget->hasNEON()) {
     // vmin and vmax aren't available in a scalar form, so we use
     // a NEON instruction with an undef lane instead.
-    setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
+
+    if (Subtarget->hasFullFP16()) {
+      setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
+      setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
+
+      setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
+      setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
+      setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
+      setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
+    }
   }
 
   // We have target-specific dag combine patterns for the following nodes:
@@ -1181,6 +1201,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   // Prefer likely predicted branches to selects on out-of-order cores.
   PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
 
+  setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
+
   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
 }
 
@@ -1261,6 +1283,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 
   case ARMISD::CMOV:          return "ARMISD::CMOV";
+  case ARMISD::SUBS:          return "ARMISD::SUBS";
 
   case ARMISD::SSAT:          return "ARMISD::SSAT";
   case ARMISD::USAT:          return "ARMISD::USAT";
@@ -3052,41 +3075,8 @@ static bool allUsersAreInFunction(const Value *V, const Function *F) {
   return true;
 }
 
-/// Return true if all users of V are within some (any) function, looking through
-/// ConstantExprs. In other words, are there any global constant users?
-static bool allUsersAreInFunctions(const Value *V) {
-  SmallVector<const User*,4> Worklist;
-  for (auto *U : V->users())
-    Worklist.push_back(U);
-  while (!Worklist.empty()) {
-    auto *U = Worklist.pop_back_val();
-    if (isa<ConstantExpr>(U)) {
-      for (auto *UU : U->users())
-        Worklist.push_back(UU);
-      continue;
-    }
-
-    if (!isa<Instruction>(U))
-      return false;
-  }
-  return true;
-}
-
-// Return true if T is an integer, float or an array/vector of either.
-static bool isSimpleType(Type *T) {
-  if (T->isIntegerTy() || T->isFloatingPointTy())
-    return true;
-  Type *SubT = nullptr;
-  if (T->isArrayTy())
-    SubT = T->getArrayElementType();
-  else if (T->isVectorTy())
-    SubT = T->getVectorElementType();
-  else
-    return false;
-  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
-}
-
-static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
+static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
+                                     const GlobalValue *GV, SelectionDAG &DAG,
                                      EVT PtrVT, const SDLoc &dl) {
   // If we're creating a pool entry for a constant global with unnamed address,
   // and the global is small enough, we can emit it inline into the constant pool
@@ -3113,11 +3103,11 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
       !GVar->hasLocalLinkage())
     return SDValue();
 
-  // Ensure that we don't try and inline any type that contains pointers. If
-  // we inline a value that contains relocations, we move the relocations from
-  // .data to .text which is not ideal.
+  // If we inline a value that contains relocations, we move the relocations
+  // from .data to .text. This is not allowed in position-independent code.
   auto *Init = GVar->getInitializer();
-  if (!isSimpleType(Init->getType()))
+  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
+      Init->needsRelocation())
     return SDValue();
 
   // The constant islands pass can only really deal with alignment requests
@@ -3128,7 +3118,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
   // that are strings for simplicity.
   auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
   unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
-  unsigned Align = GVar->getAlignment();
+  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
   unsigned RequiredPadding = 4 - (Size % 4);
   bool PaddingPossible =
     RequiredPadding == 4 || (CDAInit && CDAInit->isString());
@@ -3149,12 +3139,14 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
         ConstpoolPromotionMaxTotal)
       return SDValue();
 
-  // This is only valid if all users are in a single function OR it has users
-  // in multiple functions but it no larger than a pointer. We also check if
-  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
-  // address taken.
-  if (!allUsersAreInFunction(GVar, &F) &&
-      !(Size <= 4 && allUsersAreInFunctions(GVar)))
+  // This is only valid if all users are in a single function; we can't clone
+  // the constant in general. The LLVM IR unnamed_addr allows merging
+  // constants, but not cloning them.
+  //
+  // We could potentially allow cloning if we could prove all uses of the
+  // constant in the current function don't care about the address, like
+  // printf format strings. But that isn't implemented for now.
+  if (!allUsersAreInFunction(GVar, &F))
     return SDValue();
 
   // We're going to inline this global. Pad it out if needed.
@@ -3182,9 +3174,11 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
 
 bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-    GV = GA->getBaseObject();
-  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
-         isa<Function>(GV);
+    if (!(GV = GA->getBaseObject()))
+      return false;
+  if (const auto *V = dyn_cast<GlobalVariable>(GV))
+    return V->isConstant();
+  return isa<Function>(GV);
 }
 
 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -3210,7 +3204,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 
   // promoteToConstantPool only if not generating XO text section
   if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
-    if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
+    if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
       return V;
 
   if (isPositionIndependent()) {
@@ -3299,9 +3293,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
   assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
          "ROPI/RWPI not currently supported for Windows");
 
+  const TargetMachine &TM = getTargetMachine();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  const ARMII::TOF TargetFlags =
-    (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
+  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
+  if (GV->hasDLLImportStorageClass())
+    TargetFlags = ARMII::MO_DLLIMPORT;
+  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+    TargetFlags = ARMII::MO_COFFSTUB;
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result;
   SDLoc DL(Op);
@@ -3313,7 +3311,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
   Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
                                                   TargetFlags));
-  if (GV->hasDLLImportStorageClass())
+  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
                          MachinePointerInfo::getGOT(DAG.getMachineFunction()));
   return Result;
@@ -3412,7 +3410,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                          Op.getOperand(1), Op.getOperand(2));
     }
     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
-      ? ISD::FMINNAN : ISD::FMAXNAN;
+      ? ISD::FMINIMUM : ISD::FMAXIMUM;
     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   }
@@ -4832,12 +4830,24 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
     return DAG.UnrollVectorOp(Op.getNode());
   }
 
-  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
-         "Invalid type for custom lowering!");
-  if (VT != MVT::v4i16)
+  const bool HasFullFP16 =
+    static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+  EVT NewTy;
+  const EVT OpTy = Op.getOperand(0).getValueType();
+  if (OpTy == MVT::v4f32)
+    NewTy = MVT::v4i32;
+  else if (OpTy == MVT::v4f16 && HasFullFP16)
+    NewTy = MVT::v4i16;
+  else if (OpTy == MVT::v8f16 && HasFullFP16)
+    NewTy = MVT::v8i16;
+  else
+    llvm_unreachable("Invalid type for custom lowering!");
+
+  if (VT != MVT::v4i16 && VT != MVT::v8i16)
     return DAG.UnrollVectorOp(Op.getNode());
 
-  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+  Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
 }
 
@@ -4870,9 +4880,21 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
     return DAG.UnrollVectorOp(Op.getNode());
   }
 
-  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+  assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
+          Op.getOperand(0).getValueType() == MVT::v8i16) &&
          "Invalid type for custom lowering!");
-  if (VT != MVT::v4f32)
+
+  const bool HasFullFP16 =
+    static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+  EVT DestVecType;
+  if (VT == MVT::v4f32)
+    DestVecType = MVT::v4i32;
+  else if (VT == MVT::v4f16 && HasFullFP16)
+    DestVecType = MVT::v4i16;
+  else if (VT == MVT::v8f16 && HasFullFP16)
+    DestVecType = MVT::v8i16;
+  else
     return DAG.UnrollVectorOp(Op.getNode());
 
   unsigned CastOpc;
@@ -4889,7 +4911,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
     break;
   }
 
-  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+  Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
   return DAG.getNode(Opc, dl, VT, Op);
 }
 
@@ -5392,10 +5414,6 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
 
     // Compute with: cttz(x) = ctpop(lsb - 1)
 
-    // Since we can only compute the number of bits in a byte with vcnt.8, we
-    // have to gather the result with pairwise addition (vpaddl) for i16, i32,
-    // and i64.
-
     // Compute LSB - 1.
     SDValue Bits;
     if (ElemTy == MVT::i64) {
@@ -5408,32 +5426,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
                                 DAG.getTargetConstant(1, dl, ElemTy));
       Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
     }
-
-    // Count #bits with vcnt.8.
-    EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
-    SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
-    SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
-
-    // Gather the #bits with vpaddl (pairwise add.)
-    EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
-    SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
-        DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
-        Cnt8);
-    if (ElemTy == MVT::i16)
-      return Cnt16;
-
-    EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
-    SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
-        DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
-        Cnt16);
-    if (ElemTy == MVT::i32)
-      return Cnt32;
-
-    assert(ElemTy == MVT::i64);
-    SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-        DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
-        Cnt32);
-    return Cnt64;
+    return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
   }
 
   if (!ST->hasV6T2Ops())
@@ -5443,112 +5436,37 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
 }
 
-/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
-/// for each 16-bit element from operand, repeated.  The basic idea is to
-/// leverage vcnt to get the 8-bit counts, gather and add the results.
-///
-/// Trace for v4i16:
-/// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
-/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
-/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
-/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
-///            [b0 b1 b2 b3 b4 b5 b6 b7]
-///           +[b1 b0 b3 b2 b5 b4 b7 b6]
-/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
-/// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
-static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-  SDLoc DL(N);
-
-  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
-  SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
-  SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
-  SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
-  SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
-  return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
-}
-
-/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
-/// bit-count for each 16-bit element from the operand.  We need slightly
-/// different sequencing for v4i16 and v8i16 to stay within NEON's available
-/// 64/128-bit registers.
-///
-/// Trace for v4i16:
-/// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
-/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
-/// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
-/// v4i16:Extracted = [k0    k1    k2    k3    ]
-static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+                          const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
-  if (VT.is64BitVector()) {
-    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
-                       DAG.getIntPtrConstant(0, DL));
-  } else {
-    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
-                                    BitCounts, DAG.getIntPtrConstant(0, DL));
-    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
-  }
-}
-
-/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
-/// bit-count for each 32-bit element from the operand.  The idea here is
-/// to split the vector into 16-bit elements, leverage the 16-bit count
-/// routine, and then combine the results.
-///
-/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
-/// input    = [v0    v1    ] (vi: 32-bit elements)
-/// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
-/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
-/// vrev: N0 = [k1 k0 k3 k2 ]
-///            [k0 k1 k2 k3 ]
-///       N1 =+[k1 k0 k3 k2 ]
-///            [k0 k2 k1 k3 ]
-///       N2 =+[k1 k3 k0 k2 ]
-///            [k0    k2    k1    k3    ]
-/// Extended =+[k1    k3    k0    k2    ]
-///            [k0    k2    ]
-/// Extracted=+[k1    k3    ]
-///
-static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-  SDLoc DL(N);
+  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
+          VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
+         "Unexpected type for custom ctpop lowering");
 
-  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+  SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
+  Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
 
-  SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
-  SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
-  SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
-  SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
-  SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
+  unsigned EltSize = 8;
+  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
+  while (EltSize != VT.getScalarSizeInBits()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
+                                  TLI.getPointerTy(DAG.getDataLayout())));
+    Ops.push_back(Res);
 
-  if (VT.is64BitVector()) {
-    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
-                       DAG.getIntPtrConstant(0, DL));
-  } else {
-    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
-                                    DAG.getIntPtrConstant(0, DL));
-    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+    EltSize *= 2;
+    NumElts /= 2;
+    MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
+    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
   }
-}
 
-static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
-                          const ARMSubtarget *ST) {
-  EVT VT = N->getValueType(0);
-
-  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
-  assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
-          VT == MVT::v4i16 || VT == MVT::v8i16) &&
-         "Unexpected type for custom ctpop lowering");
-
-  if (VT.getVectorElementType() == MVT::i32)
-    return lowerCTPOP32BitElements(N, DAG);
-  else
-    return lowerCTPOP16BitElements(N, DAG);
+  return Res;
 }
 
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
@@ -7878,6 +7796,50 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
   return LowerCallTo(CLI).first;
 }
 
+// This is a code size optimisation: return the original SDIV node to
+// DAGCombiner when we don't want to expand SDIV into a sequence of
+// instructions, and an empty node otherwise which will cause the
+// SDIV to be expanded in DAGCombine.
+SDValue
+ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                 SelectionDAG &DAG,
+                                 SmallVectorImpl<SDNode *> &Created) const {
+  // TODO: Support SREM
+  if (N->getOpcode() != ISD::SDIV)
+    return SDValue();
+
+  const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
+  const auto &MF = DAG.getMachineFunction();
+  const bool MinSize = MF.getFunction().optForMinSize();
+  const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
+                                      : ST.hasDivideInARMMode();
+
+  // Don't touch vector types; rewriting this may lead to scalarizing
+  // the int divs.
+  if (N->getOperand(0).getValueType().isVector())
+    return SDValue();
+
+  // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
+  // hwdiv support for this to be really profitable.
+  if (!(MinSize && HasDivide))
+    return SDValue();
+
+  // ARM mode is a bit simpler than Thumb: we can handle large power
+  // of 2 immediates with 1 mov instruction; no further checks required,
+  // just return the sdiv node.
+  if (!ST.isThumb())
+    return SDValue(N, 0);
+
+  // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
+  // and thus lose the code size benefits of a MOVS that requires only 2.
+  // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
+  // but as it's doing exactly this, it's not worth the trouble to get TTI.
+  if (Divisor.sgt(128))
+    return SDValue();
+
+  return SDValue(N, 0);
+}
+
 SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
                                             bool Signed) const {
   assert(Op.getValueType() == MVT::i32 &&
@@ -7990,10 +7952,8 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
       ARM::CMP_SWAP_64, SDLoc(N),
       DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
 
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
 
   bool isBigEndian = DAG.getDataLayout().isBigEndian();
 
@@ -9169,6 +9129,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
   // IP.
 
   switch (TM.getCodeModel()) {
+  case CodeModel::Tiny:
+    llvm_unreachable("Tiny code model not available on ARM.");
   case CodeModel::Small:
   case CodeModel::Medium:
   case CodeModel::Kernel:
@@ -9244,6 +9206,42 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
   return ContBB;
 }
 
+// The CPSR operand of SelectItr might be missing a kill marker
+// because there were multiple uses of CPSR, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
+                                   MachineBasicBlock* BB,
+                                   const TargetRegisterInfo* TRI) {
+  // Scan forward through BB for a use/def of CPSR.
+  MachineBasicBlock::iterator miI(std::next(SelectItr));
+  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+    const MachineInstr& mi = *miI;
+    if (mi.readsRegister(ARM::CPSR))
+      return false;
+    if (mi.definesRegister(ARM::CPSR))
+      break; // Should have kill-flag - update below.
+  }
+
+  // If we hit the end of the block, check whether CPSR is live into a
+  // successor.
+  if (miI == BB->end()) {
+    for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+                                          sEnd = BB->succ_end();
+         sItr != sEnd; ++sItr) {
+      MachineBasicBlock* succ = *sItr;
+      if (succ->isLiveIn(ARM::CPSR))
+        return false;
+    }
+  }
+
+  // We found a def, or hit the end of the basic block and CPSR wasn't live
+  // out. SelectMI should have a kill flag on CPSR.
+  SelectItr->addRegisterKilled(ARM::CPSR, TRI);
+  return true;
+}
+
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                MachineBasicBlock *BB) const {
@@ -9343,6 +9341,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     F->insert(It, copy0MBB);
     F->insert(It, sinkMBB);
 
+    // Check whether CPSR is live past the tMOVCCr_pseudo.
+    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+    if (!MI.killsRegister(ARM::CPSR) &&
+        !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
+      copy0MBB->addLiveIn(ARM::CPSR);
+      sinkMBB->addLiveIn(ARM::CPSR);
+    }
+
     // Transfer the remainder of BB and its successor edges to sinkMBB.
     sinkMBB->splice(sinkMBB->begin(), BB,
                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
@@ -10407,6 +10413,37 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
   return SDValue();
 }
 
+bool
+ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
+                                                 CombineLevel Level) const {
+  if (Level == BeforeLegalizeTypes)
+    return true;
+
+  if (Subtarget->isThumb() && Subtarget->isThumb1Only())
+    return true;
+
+  if (N->getOpcode() != ISD::SHL)
+    return true;
+
+  // Turn off commute-with-shift transform after legalization, so it doesn't
+  // conflict with PerformSHLSimplify.  (We could try to detect when
+  // PerformSHLSimplify would trigger more precisely, but it isn't
+  // really necessary.)
+  return false;
+}
+
+bool
+ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+                                             CombineLevel Level) const {
+  if (!Subtarget->isThumb1Only())
+    return true;
+
+  if (Level == BeforeLegalizeTypes)
+    return true;
+
+  return false;
+}
+
 static SDValue PerformSHLSimplify(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const ARMSubtarget *ST) {
@@ -10506,9 +10543,7 @@ static SDValue PerformSHLSimplify(SDNode *N,
   LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
              SHL.dump(); N->dump());
   LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
-
-  DAG.ReplaceAllUsesWith(SDValue(N, 0), Res);
-  return SDValue(N, 0);
+  return Res;
 }
 
 
@@ -10712,6 +10747,12 @@ static SDValue CombineANDShift(SDNode *N,
   if (!C2 || C2 >= 32)
     return SDValue();
 
+  // Clear irrelevant bits in the mask.
+  if (LeftShift)
+    C1 &= (-1U << C2);
+  else
+    C1 &= (-1U >> C2);
+
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
 
@@ -10719,9 +10760,7 @@ static SDValue CombineANDShift(SDNode *N,
   // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
   // transform to a pair of shifts, to save materializing c1.
 
-  // First pattern: right shift, and c1+1 is a power of two.
-  // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
-  // of two).
+  // First pattern: right shift, then mask off leading bits.
   // FIXME: Use demanded bits?
   if (!LeftShift && isMask_32(C1)) {
     uint32_t C3 = countLeadingZeros(C1);
@@ -10733,13 +10772,23 @@ static SDValue CombineANDShift(SDNode *N,
     }
   }
 
-  // Second pattern: left shift, and (c1>>c2)+1 is a power of two.
-  // FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
-  // is a power of two).
+  // First pattern, reversed: left shift, then mask off trailing bits.
+  if (LeftShift && isMask_32(~C1)) {
+    uint32_t C3 = countTrailingZeros(C1);
+    if (C2 < C3) {
+      SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+                                DAG.getConstant(C3 - C2, DL, MVT::i32));
+      return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+                         DAG.getConstant(C3, DL, MVT::i32));
+    }
+  }
+
+  // Second pattern: left shift, then mask off leading bits.
   // FIXME: Use demanded bits?
   if (LeftShift && isShiftedMask_32(C1)) {
+    uint32_t Trailing = countTrailingZeros(C1);
     uint32_t C3 = countLeadingZeros(C1);
-    if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
+    if (Trailing == C2 && C2 + C3 < 32) {
       SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
                                 DAG.getConstant(C2 + C3, DL, MVT::i32));
       return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
@@ -10747,6 +10796,19 @@ static SDValue CombineANDShift(SDNode *N,
     }
   }
 
+  // Second pattern, reversed: right shift, then mask off trailing bits.
+  // FIXME: Handle other patterns of known/demanded bits.
+  if (!LeftShift && isShiftedMask_32(C1)) {
+    uint32_t Leading = countLeadingZeros(C1);
+    uint32_t C3 = countTrailingZeros(C1);
+    if (Leading == C2 && C2 + C3 < 32) {
+      SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+                                DAG.getConstant(C2 + C3, DL, MVT::i32));
+      return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+                         DAG.getConstant(C3, DL, MVT::i32));
+    }
+  }
+
   // FIXME: Transform "(and (shl x, c2) c1)" ->
   // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
   // c1.
@@ -11541,8 +11603,15 @@ static SDValue CombineBaseUpdate(SDNode *N,
       continue;
 
     // Check that the add is independent of the load/store.  Otherwise, folding
-    // it would create a cycle.
-    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+    // it would create a cycle. We can avoid searching through Addr as it's a
+    // predecessor to both.
+    SmallPtrSet<const SDNode *, 32> Visited;
+    SmallVector<const SDNode *, 16> Worklist;
+    Visited.insert(Addr.getNode());
+    Worklist.push_back(N);
+    Worklist.push_back(User);
+    if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+        SDNode::hasPredecessorHelper(User, Visited, Worklist))
       continue;
 
     // Find the new opcode for the updating load/store.
@@ -12507,8 +12576,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
 
   // Lastly, can we determine that the bits defined by OrCI
   // are zero in Y?
-  KnownBits Known;
-  DAG.computeKnownBits(Y, Known);
+  KnownBits Known = DAG.computeKnownBits(Y);
   if ((OrCI & Known.Zero) != OrCI)
     return SDValue();
 
@@ -12679,30 +12747,38 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
                         DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
         Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
       }
-    } else if (CC == ARMCC::NE && LHS != RHS &&
+    } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
                (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
       // This seems pointless but will allow us to combine it further below.
-      // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
-      SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+      // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
+      SDValue Sub =
+          DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+      SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+                                          Sub.getValue(1), SDValue());
       Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
-                        N->getOperand(3), Cmp);
+                        N->getOperand(3), CPSRGlue.getValue(1));
+      FalseVal = Sub;
     }
   } else if (isNullConstant(TrueVal)) {
-    if (CC == ARMCC::EQ && LHS != RHS &&
+    if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
         (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
       // This seems pointless but will allow us to combine it further below
       // Note that we change == for != as this is the dual for the case above.
-      // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
-      SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+      // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
+      SDValue Sub =
+          DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+      SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
+                                          Sub.getValue(1), SDValue());
       Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
                         DAG.getConstant(ARMCC::NE, dl, MVT::i32),
-                        N->getOperand(3), Cmp);
+                        N->getOperand(3), CPSRGlue.getValue(1));
+      FalseVal = Sub;
     }
   }
 
   // On Thumb1, the DAG above may be further combined if z is a power of 2
   // (z == 2 ^ K).
-  // CMOV (SUB x, y), z, !=, (CMPZ x, y) ->
+  // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
   //       merge t3, t4
   // where t1 = (SUBCARRY (SUB x, y), z, 0)
   //       t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
@@ -12710,8 +12786,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   //       t4 = (SUB 1, t2:1)   [ we want a carry, not a borrow ]
   const APInt *TrueConst;
   if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
-      (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
-      (FalseVal.getOperand(1) == RHS) &&
+      (FalseVal.getOpcode() == ARMISD::SUBS) &&
+      (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
       (TrueConst = isPowerOf2Constant(TrueVal))) {
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
     unsigned ShiftAmount = TrueConst->logBase2();
@@ -12730,8 +12806,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   }
 
   if (Res.getNode()) {
-    KnownBits Known;
-    DAG.computeKnownBits(SDValue(N,0), Known);
+    KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
     // Capture demanded bits information that would be otherwise lost.
     if (Known.Zero == 0xfffffffe)
       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -13522,12 +13597,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     break;
   case ARMISD::CMOV: {
     // Bits are known zero/one if known on the LHS and RHS.
-    DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
     if (Known.isUnknown())
       return;
 
-    KnownBits KnownRHS;
-    DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
+    KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
     Known.Zero &= KnownRHS.Zero;
     Known.One  &= KnownRHS.One;
     return;
@@ -13549,7 +13623,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   case ARMISD::BFI: {
     // Conservatively, we can recurse down the first operand
     // and just mask out all affected bits.
-    DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
+    Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
 
     // The operand to BFI is already a mask suitable for removing the bits it
     // sets.
@@ -13559,9 +13633,120 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     Known.One &= Mask;
     return;
   }
+  case ARMISD::VGETLANEs:
+  case ARMISD::VGETLANEu: {
+    const SDValue &SrcSV = Op.getOperand(0);
+    EVT VecVT = SrcSV.getValueType();
+    assert(VecVT.isVector() && "VGETLANE expected a vector type");
+    const unsigned NumSrcElts = VecVT.getVectorNumElements();
+    ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
+    assert(Pos->getAPIntValue().ult(NumSrcElts) &&
+           "VGETLANE index out of bounds");
+    unsigned Idx = Pos->getZExtValue();
+    APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+    Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
+
+    EVT VT = Op.getValueType();
+    const unsigned DstSz = VT.getScalarSizeInBits();
+    const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
+    assert(SrcSz == Known.getBitWidth());
+    assert(DstSz > SrcSz);
+    if (Op.getOpcode() == ARMISD::VGETLANEs)
+      Known = Known.sext(DstSz);
+    else {
+      Known = Known.zext(DstSz);
+      Known.Zero.setBitsFrom(SrcSz);
+    }
+    assert(DstSz == Known.getBitWidth());
+    break;
+  }
   }
 }
 
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+                                                const APInt &DemandedAPInt,
+                                                TargetLoweringOpt &TLO) const {
+  // Delay optimization, so we don't have to deal with illegal types, or block
+  // optimizations.
+  if (!TLO.LegalOps)
+    return false;
+
+  // Only optimize AND for now.
+  if (Op.getOpcode() != ISD::AND)
+    return false;
+
+  EVT VT = Op.getValueType();
+
+  // Ignore vectors.
+  if (VT.isVector())
+    return false;
+
+  assert(VT == MVT::i32 && "Unexpected integer type");
+
+  // Make sure the RHS really is a constant.
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!C)
+    return false;
+
+  unsigned Mask = C->getZExtValue();
+
+  unsigned Demanded = DemandedAPInt.getZExtValue();
+  unsigned ShrunkMask = Mask & Demanded;
+  unsigned ExpandedMask = Mask | ~Demanded;
+
+  // If the mask is all zeros, let the target-independent code replace the
+  // result with zero.
+  if (ShrunkMask == 0)
+    return false;
+
+  // If the mask is all ones, erase the AND. (Currently, the target-independent
+  // code won't do this, so we have to do it explicitly to avoid an infinite
+  // loop in obscure cases.)
+  if (ExpandedMask == ~0U)
+    return TLO.CombineTo(Op, Op.getOperand(0));
+
+  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+    return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+  };
+  auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+    if (NewMask == Mask)
+      return true;
+    SDLoc DL(Op);
+    SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+    SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+    return TLO.CombineTo(Op, NewOp);
+  };
+
+  // Prefer uxtb mask.
+  if (IsLegalMask(0xFF))
+    return UseMask(0xFF);
+
+  // Prefer uxth mask.
+  if (IsLegalMask(0xFFFF))
+    return UseMask(0xFFFF);
+
+  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+  if (ShrunkMask < 256)
+    return UseMask(ShrunkMask);
+
+  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+    return UseMask(ExpandedMask);
+
+  // Potential improvements:
+  //
+  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+  // We could try to prefer Thumb1 immediates which can be lowered to a
+  // two-instruction sequence.
+  // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //                           ARM Inline Assembly Support
 //===----------------------------------------------------------------------===//
@@ -14412,16 +14597,18 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
              : AtomicExpansionKind::None;
 }
 
-bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
-    AtomicCmpXchgInst *AI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
   // implement cmpxchg without spilling. If the address being exchanged is also
   // on the stack and close enough to the spill slot, this can lead to a
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  bool hasAtomicCmpXchg =
+  bool HasAtomicCmpXchg =
       !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
-  return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
+  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+    return AtomicExpansionKind::LLSC;
+  return AtomicExpansionKind::None;
 }
 
 bool ARMTargetLowering::shouldInsertFencesForAtomic(
@@ -14548,6 +14735,11 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
               Addr});
 }
 
+
+bool ARMTargetLowering::alignLoopsWithOptSize() const {
+  return Subtarget->isMClass();
+}
+
 /// A helper function for determining the number of interleaved accesses we
 /// will generate when lowering accesses of the given type.
 unsigned
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 50b4c2977fb5..7a9fc739fc13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -85,6 +85,7 @@ class VectorType;
       FMSTAT,       // ARM fmstat instruction.
 
       CMOV,         // ARM conditional move instructions.
+      SUBS,         // Flag-setting subtraction.
 
       SSAT,         // Signed saturation
       USAT,         // Unsigned saturation
@@ -389,6 +390,9 @@ class VectorType;
                                        const SelectionDAG &DAG,
                                        unsigned Depth) const override;
 
+    bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                      TargetLoweringOpt &TLO) const override;
+
 
     bool ExpandInlineAsm(CallInst *CI) const override;
 
@@ -535,7 +539,8 @@ class VectorType;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
-    bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+    TargetLoweringBase::AtomicExpansionKind
+    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
 
     bool useLoadStackGuardNode() const override;
 
@@ -572,6 +577,8 @@ class VectorType;
     bool isLegalInterleavedAccessType(VectorType *VecTy,
                                       const DataLayout &DL) const;
 
+    bool alignLoopsWithOptSize() const override;
+
     /// Returns the number of interleaved accesses that will be generated when
     /// lowering accesses of the given type.
     unsigned getNumInterleavedAccesses(VectorType *VecTy,
@@ -583,6 +590,11 @@ class VectorType;
     unsigned getABIAlignmentForCallingConv(Type *ArgTy,
                                            DataLayout DL) const override;
 
+    bool isDesirableToCommuteWithShift(const SDNode *N,
+                                       CombineLevel Level) const override;
+
+    bool shouldFoldShiftPairToMask(const SDNode *N,
+                                   CombineLevel Level) const override;
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -685,6 +697,9 @@ class VectorType;
     unsigned getRegisterByName(const char* RegName, EVT VT,
                                SelectionDAG &DAG) const override;
 
+    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                          SmallVectorImpl<SDNode *> &Created) const override;
+
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
     /// expanded to FMAs when this method returns true, otherwise fmuladd is
@@ -764,6 +779,8 @@ class VectorType;
 
     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
 
+    bool shouldConsiderGEPOffsetSplit() const override { return true; }
+
     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
                     SDValue ARMcc, SDValue CCR, SDValue Cmp,
                     SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 1d3b1414f090..0df48ba61299 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -2580,6 +2580,37 @@ class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4,
   let Inst{3-0}   = Vm{3-0};
 }
 
+// In Armv8.2-A, some NEON instructions are added that encode Vn and Vm
+// differently:
+//    if Q == ‘1’ then UInt(N:Vn) else UInt(Vn:N);
+//    if Q == ‘1’ then UInt(M:Vm) else UInt(Vm:M);
+// Class N3VCP8 above describes the Q=1 case, and this class the Q=0 case.
+class N3VCP8Q0<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4,
+             dag oops, dag iops, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, dt, asm, cstr, pattern> {
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let DecoderNamespace = "VFPV8";
+  // These have the same encodings in ARM and Thumb2
+  let PostEncoderMethod = "";
+
+  let Inst{31-25} = 0b1111110;
+  let Inst{24-23} = op24_23;
+  let Inst{22}    = Vd{4};
+  let Inst{21-20} = op21_20;
+  let Inst{19-16} = Vn{4-1};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{11-8}  = 0b1000;
+  let Inst{7}     = Vn{0};
+  let Inst{6}     = op6;
+  let Inst{5}     = Vm{0};
+  let Inst{4}     = op4;
+  let Inst{3-0}   = Vm{4-1};
+}
+
 // Operand types for complex instructions
 class ComplexRotationOperand<int Angle, int Remainder, string Type, string Diag>
   : AsmOperandClass {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 397c9dadb4ac..bcc31f5fa4cc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -132,34 +132,6 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
   BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
       .addReg(Reg, RegState::Kill)
       .addImm(0)
-      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+      .cloneMemRefs(*MI)
       .add(predOps(ARMCC::AL));
 }
-
-std::pair<unsigned, unsigned>
-ARMInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
-  const unsigned Mask = ARMII::MO_OPTION_MASK;
-  return std::make_pair(TF & Mask, TF & ~Mask);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-ARMInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
-  using namespace ARMII;
-
-  static const std::pair<unsigned, const char *> TargetFlags[] = {
-      {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
-  return makeArrayRef(TargetFlags);
-}
-
-ArrayRef<std::pair<unsigned, const char *>>
-ARMInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
-  using namespace ARMII;
-
-  static const std::pair<unsigned, const char *> TargetFlags[] = {
-      {MO_GOT, "arm-got"},
-      {MO_SBREL, "arm-sbrel"},
-      {MO_DLLIMPORT, "arm-dllimport"},
-      {MO_SECREL, "arm-secrel"},
-      {MO_NONLAZY, "arm-nonlazy"}};
-  return makeArrayRef(TargetFlags);
-}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index c54c987134df..c87fb97448c9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -38,13 +38,6 @@ public:
   ///
   const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
 
-  std::pair<unsigned, unsigned>
-  decomposeMachineOperandsTargetFlags(unsigned TF) const override;
-  ArrayRef<std::pair<unsigned, const char *>>
-  getSerializableDirectMachineOperandTargetFlags() const override;
-  ArrayRef<std::pair<unsigned, const char *>>
-  getSerializableBitmaskMachineOperandTargetFlags() const override;
-
 private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
 };
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index d4c342cee5c0..13abdc9687ec 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -144,6 +144,7 @@ def ARMintretflag    : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
                               [SDNPInGlue]>;
+def ARMsubs          : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
 
 def ARMssatnoshift   : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
 
@@ -221,6 +222,7 @@ def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
 def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
 def HasV5T           : Predicate<"Subtarget->hasV5TOps()">,
                                  AssemblerPredicate<"HasV5TOps", "armv5t">;
+def NoV5T            : Predicate<"!Subtarget->hasV5TOps()">;
 def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
                                  AssemblerPredicate<"HasV5TEOps", "armv5te">;
 def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
@@ -255,6 +257,8 @@ def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
                                  AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
 def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
                                  AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
+                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
                                  AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -285,6 +289,8 @@ def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                  AssemblerPredicate<"FeatureFP16","half-float conversions">;
 def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
                                  AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
+                                 AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
 def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
                                  AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
 def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -351,23 +357,24 @@ def UseNegativeImmediates :
 let RecomputePerFunction = 1 in {
   def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
   def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
-  def UseMovtInPic          : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
-  def DontUseMovtInPic      : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+  def UseMovtInPic     : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
+  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+
+  def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
+                           "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
+                           "MF->getFunction().optForMinSize())">;
 }
-def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
 def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
 
 // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
 // Do not use them for Darwin platforms.
 def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
                                  " FPOpFusion::Fast && "
                                  " Subtarget->hasVFP4()) && "
-                                 "!Subtarget->isTargetDarwin()">;
-def DontUseFusedMAC  : Predicate<"!(TM.Options.AllowFPOpFusion =="
-                                 " FPOpFusion::Fast &&"
-                                 " Subtarget->hasVFP4()) || "
-                                 "Subtarget->isTargetDarwin()">;
+                                 "!Subtarget->isTargetDarwin() &&"
+                                 "Subtarget->useFPVMLx()">;
 
 def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
 def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
@@ -387,6 +394,10 @@ let RecomputePerFunction = 1 in {
 
 def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
 
+// Armv8.5-A extensions
+def HasSB            : Predicate<"Subtarget->hasSB()">,
+                       AssemblerPredicate<"FeatureSB", "sb">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -415,24 +426,22 @@ def imm16_31 : ImmLeaf<i32, [{
 
 // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
 def sext_16_node : PatLeaf<(i32 GPR:$a), [{
-  if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
-    return true;
-
-  if (N->getOpcode() != ISD::SRA)
-    return false;
-  if (N->getOperand(0).getOpcode() != ISD::SHL)
-    return false;
-
-  auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (!ShiftVal || ShiftVal->getZExtValue() != 16)
-    return false;
+  return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
 
-  ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
-  if (!ShiftVal || ShiftVal->getZExtValue() != 16)
-    return false;
+def sext_bottom_16 : PatFrag<(ops node:$a),
+                             (sext_inreg node:$a, i16)>;
+def sext_top_16 : PatFrag<(ops node:$a),
+                          (i32 (sra node:$a, (i32 16)))>;
 
-  return true;
-}]>;
+def bb_mul : PatFrag<(ops node:$a, node:$b),
+                     (mul (sext_bottom_16 node:$a), (sext_bottom_16 node:$b))>;
+def bt_mul : PatFrag<(ops node:$a, node:$b),
+                     (mul (sext_bottom_16 node:$a), (sra node:$b, (i32 16)))>;
+def tb_mul : PatFrag<(ops node:$a, node:$b),
+                     (mul (sra node:$a, (i32 16)), (sext_bottom_16 node:$b))>;
+def tt_mul : PatFrag<(ops node:$a, node:$b),
+                     (mul (sra node:$a, (i32 16)), (sra node:$b, (i32 16)))>;
 
 /// Split a 32-bit immediate into two 16 bit parts.
 def hi16 : SDNodeXForm<imm, [{
@@ -713,7 +722,20 @@ def arm_i32imm : PatLeaf<(imm), [{
   if (Subtarget->useMovt(*MF))
     return true;
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]>;
+}]> {
+  // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
+  // the MachineFunction.
+  let GISelPredicateCode = [{
+    const auto &MF = *MI.getParent()->getParent();
+    if (STI.useMovt(MF))
+      return true;
+
+    const auto &MO = MI.getOperand(1);
+    if (!MO.isCImm())
+      return false;
+    return ARM_AM::isSOImmTwoPartVal(MO.getCImm()->getZExtValue());
+  }];
+}
 
 /// imm0_1 predicate - Immediate in the range [0,1].
 def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
@@ -2191,6 +2213,9 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
   let Inst = 0xe7ffdefe;
 }
 
+def : Pat<(debugtrap), (BKPT 0)>, Requires<[IsARM, HasV5T]>;
+def : Pat<(debugtrap), (UDF 254)>, Requires<[IsARM, NoV5T]>;
+
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD  : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -3321,7 +3346,7 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
 
 let hasSideEffects = 0 in {
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
 defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
                          IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">;
 
@@ -3519,10 +3544,14 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot),
 def SXTB16  : AI_ext_rrot_np<0b01101000, "sxtb16">;
 def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src),
                (SXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(int_arm_sxtb16 (rotr GPR:$Src, rot_imm:$rot)),
+               (SXTB16 GPR:$Src, rot_imm:$rot)>;
 
 def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
 def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS),
                (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
+def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)),
+               (SXTAB16 GPR:$LHS, GPR:$RHS, rot_imm:$rot)>;
 
 // Zero extenders
 
@@ -3544,6 +3573,8 @@ def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
                (UXTB16 GPR:$Src, 1)>;
 def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src),
                (UXTB16 GPR:$Src, 0)>;
+def : ARMV6Pat<(int_arm_uxtb16 (rotr GPR:$Src, rot_imm:$rot)),
+               (UXTB16 GPR:$Src, rot_imm:$rot)>;
 
 def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
                         BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -3560,6 +3591,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
 def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
 def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS),
                (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
+def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)),
+               (UXTAB16 GPR:$LHS, GPR:$RHS, rot_imm:$rot)>;
 
 
 def SBFX  : I<(outs GPRnopc:$Rd),
@@ -3620,6 +3653,14 @@ let isAdd = 1 in
 defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
 defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
 
+def : ARMPat<(ARMsubs GPR:$Rn, mod_imm:$imm), (SUBSri $Rn, mod_imm:$imm)>;
+def : ARMPat<(ARMsubs GPR:$Rn, GPR:$Rm), (SUBSrr $Rn, $Rm)>;
+def : ARMPat<(ARMsubs GPR:$Rn, so_reg_imm:$shift),
+             (SUBSrsi $Rn, so_reg_imm:$shift)>;
+def : ARMPat<(ARMsubs GPR:$Rn, so_reg_reg:$shift),
+             (SUBSrsr $Rn, so_reg_reg:$shift)>;
+
+
 let isAdd = 1 in
 defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
@@ -4211,29 +4252,25 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
 multiclass AI_smul<string opc> {
   def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
               IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
-              [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
-                                      (sext_inreg GPR:$Rm, i16)))]>,
+              [(set GPR:$Rd, (bb_mul GPR:$Rn, GPR:$Rm))]>,
            Requires<[IsARM, HasV5TE]>,
            Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
 
   def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
               IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
-              [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
-                                      (sra GPR:$Rm, (i32 16))))]>,
+              [(set GPR:$Rd, (bt_mul GPR:$Rn, GPR:$Rm))]>,
            Requires<[IsARM, HasV5TE]>,
            Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
 
   def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
               IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
-              [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
-                                      (sext_inreg GPR:$Rm, i16)))]>,
+              [(set GPR:$Rd, (tb_mul GPR:$Rn, GPR:$Rm))]>,
            Requires<[IsARM, HasV5TE]>,
            Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
 
   def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
               IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
-              [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
-                                      (sra GPR:$Rm, (i32 16))))]>,
+              [(set GPR:$Rd, (tt_mul GPR:$Rn, GPR:$Rm))]>,
             Requires<[IsARM, HasV5TE]>,
            Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
 
@@ -4257,35 +4294,31 @@ multiclass AI_smla<string opc> {
               (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
               IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set GPRnopc:$Rd, (add GPR:$Ra,
-                               (mul (sext_inreg GPRnopc:$Rn, i16),
-                                       (sext_inreg GPRnopc:$Rm, i16))))]>,
+                                      (bb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
            Requires<[IsARM, HasV5TE, UseMulOps]>,
            Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
 
   def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
               (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
               IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set GPRnopc:$Rd,
-                    (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
-                                          (sra GPRnopc:$Rm, (i32 16)))))]>,
+              [(set GPRnopc:$Rd, (add GPR:$Ra,
+                                      (bt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
            Requires<[IsARM, HasV5TE, UseMulOps]>,
            Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
 
   def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
               (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
               IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set GPRnopc:$Rd,
-                    (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
-                                          (sext_inreg GPRnopc:$Rm, i16))))]>,
+              [(set GPRnopc:$Rd, (add GPR:$Ra,
+                                      (tb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
            Requires<[IsARM, HasV5TE, UseMulOps]>,
            Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
 
   def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
               (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
               IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
-             [(set GPRnopc:$Rd,
-                   (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
-                                         (sra GPRnopc:$Rm, (i32 16)))))]>,
+             [(set GPRnopc:$Rd, (add GPR:$Ra,
+                                     (tt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>,
             Requires<[IsARM, HasV5TE, UseMulOps]>,
             Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
 
@@ -4863,6 +4896,14 @@ def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary,
 
 }
 
+// Armv8.5-A speculation barrier
+def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
+         Requires<[IsARM, HasSB]>, Sched<[]> {
+  let Inst{31-0} = 0xf57ff070;
+  let Unpredictable = 0x000fff0f;
+  let hasSideEffects = 1;
+}
+
 let usesCustomInserter = 1, Defs = [CPSR] in {
 
 // Pseudo instruction that combines movs + predicated rsbmi
@@ -4870,7 +4911,7 @@ let usesCustomInserter = 1, Defs = [CPSR] in {
   def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
 }
 
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, Defs = [CPSR] in {
     def COPY_STRUCT_BYVAL_I32 : PseudoInst<
       (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
       NoItinerary,
@@ -5778,26 +5819,21 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
 
 // smul* and smla*
 def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
-                 (SMULBB GPR:$a, GPR:$b)>,
-      Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
-                 (SMULBT GPR:$a, GPR:$b)>,
-      Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
-                (SMULTB GPR:$a, GPR:$b)>,
-      Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
-                      (mul sext_16_node:$a, sext_16_node:$b)),
-                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
-      Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
-                      (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
-                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
-      Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
-def : ARMV5MOPat<(add GPR:$acc,
-                      (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
-                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
-      Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_bottom_16 GPR:$b)),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sext_top_16 GPR:$b)),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sext_top_16 GPR:$a), sext_16_node:$b),
+                 (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_bottom_16 GPR:$b))),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_top_16 GPR:$b))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5MOPat<(add GPR:$acc, (mul (sext_top_16 GPR:$a), sext_16_node:$b)),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
 
 def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b),
                  (SMULBB GPR:$a, GPR:$b)>;
@@ -5902,6 +5938,8 @@ include "ARMInstrNEON.td"
 // Memory barriers
 def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>;
 def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"ssbb", (DSB 0x0), 1>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"pssbb", (DSB 0x4), 1>, Requires<[IsARM, HasDB]>;
 def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>;
 // Armv8-R 'Data Full Barrier'
 def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 4525eec8da03..96986e74415b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4305,17 +4305,29 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
                            (v2f32 (EXTRACT_SUBREG QPR:$src2,
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
-
+def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
+                       (v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
+          (v8f16 (VMULslhq(v8f16 QPR:$src1),
+                           (v4f16 (EXTRACT_SUBREG QPR:$src2,
+                                   (DSubReg_i16_reg imm:$lane))),
+                           (SubReg_i16_lane imm:$lane)))>;
 
 def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
           (VMULslfd DPR:$Rn,
             (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
             (i32 0))>;
+def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+          (VMULslhd DPR:$Rn,
+            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+            (i32 0))>;
 def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
           (VMULslfq QPR:$Rn,
             (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
             (i32 0))>;
-
+def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+          (VMULslhq QPR:$Rn,
+            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+            (i32 0))>;
 
 //   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
 defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
@@ -4390,16 +4402,16 @@ defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
                           v2f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
                           v4f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
                           v4f16, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
                           v8f16, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4620,16 +4632,16 @@ defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
                           v2f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
                           v4f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, UseFPVMLx]>;
 def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
                           v4f16, fmul, fsub>,
-                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
                           v8f16, fmul, fsub>,
-                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4734,6 +4746,12 @@ def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
                 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
 
 // Match @llvm.fma.* intrinsics
+def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+          (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+          Requires<[HasNEON,HasFullFP16]>;
+def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+          (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+          Requires<[HasNEON,HasFullFP16]>;
 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
           (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
           Requires<[HasVFP4]>;
@@ -5066,7 +5084,7 @@ def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
                         "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
                  Requires<[HasNEON, HasFullFP16]>;
 def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
-                        "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+                        "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
                  Requires<[HasNEON, HasFullFP16]>;
 //   VTST     : Vector Test Bits
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
@@ -5091,6 +5109,54 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
                    (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
 }
 
+// +fp16fml Floating Point Multiplication Variants
+let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
+
+class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
+                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+  : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
+                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+  : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class VFMQ0<string opc, bits<2> S>
+  : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
+               (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
+               IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+  bit idx;
+  let Inst{3} = idx;
+  let Inst{19-16} = Vn{4-1};
+  let Inst{7}     = Vn{0};
+  let Inst{5}     = Vm{0};
+  let Inst{2-0}   = Vm{3-1};
+}
+
+class VFMQ1<string opc, bits<2> S>
+  : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
+               (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
+               IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+  bits<2> idx;
+  let Inst{5} = idx{1};
+  let Inst{3} = idx{0};
+}
+
+let hasNoSchedulingInfo = 1 in {
+//                                                op1   op2   op3
+def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
+def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
+def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
+def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
+def VFMALDI : VFMQ0<"vfmal", 0b00>;
+def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
+def VFMALQI : VFMQ1<"vfmal", 0b00>;
+def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
+}
+} // HasNEON, HasFP16FML
+
+
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
                    (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
@@ -5455,17 +5521,17 @@ defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
                            "vmax", "u", umax, 1>;
 def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
                         "vmax", "f32",
-                        v2f32, v2f32, fmaxnan, 1>;
+                        v2f32, v2f32, fmaximum, 1>;
 def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmax", "f32",
-                        v4f32, v4f32, fmaxnan, 1>;
+                        v4f32, v4f32, fmaximum, 1>;
 def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
                         "vmax", "f16",
-                        v4f16, v4f16, fmaxnan, 1>,
+                        v4f16, v4f16, fmaximum, 1>,
                 Requires<[HasNEON, HasFullFP16]>;
 def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmax", "f16",
-                        v8f16, v8f16, fmaxnan, 1>,
+                        v8f16, v8f16, fmaximum, 1>,
                 Requires<[HasNEON, HasFullFP16]>;
 
 // VMAXNM
@@ -5497,17 +5563,17 @@ defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
                            "vmin", "u", umin, 1>;
 def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
                         "vmin", "f32",
-                        v2f32, v2f32, fminnan, 1>;
+                        v2f32, v2f32, fminimum, 1>;
 def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmin", "f32",
-                        v4f32, v4f32, fminnan, 1>;
+                        v4f32, v4f32, fminimum, 1>;
 def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
                         "vmin", "f16",
-                        v4f16, v4f16, fminnan, 1>,
+                        v4f16, v4f16, fminimum, 1>,
                 Requires<[HasNEON, HasFullFP16]>;
 def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmin", "f16",
-                        v8f16, v8f16, fminnan, 1>,
+                        v8f16, v8f16, fminimum, 1>,
                 Requires<[HasNEON, HasFullFP16]>;
 
 // VMINNM
@@ -6318,6 +6384,9 @@ def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
   let Inst{19} = lane{0};
 }
 
+def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
+          (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
           (VDUPLN32d DPR:$Vm, imm:$lane)>;
 
@@ -6332,6 +6401,10 @@ def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
           (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
                                     (DSubReg_i16_reg imm:$lane))),
                             (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
+          (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i16_reg imm:$lane))),
+                            (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
           (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
                                     (DSubReg_i32_reg imm:$lane))),
@@ -6341,12 +6414,18 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
+def : Pat<(v4f16 (NEONvdup HPR:$src)),
+          (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
+                             HPR:$src, ssub_0), (i32 0)))>;
 def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
           (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
                              SPR:$src, ssub_0), (i32 0)))>;
 def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
           (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
                              SPR:$src, ssub_0), (i32 0)))>;
+def : Pat<(v8f16 (NEONvdup HPR:$src)),
+          (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
+                             HPR:$src, ssub_0), (i32 0)))>;
 
 //   VMOVN    : Vector Narrowing Move
 defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
@@ -6558,6 +6637,8 @@ def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
+def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
 
 //   VREV32   : Vector Reverse elements within 32-bit words
 
@@ -6647,13 +6728,14 @@ def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
   let Inst{10-9} = index{1-0};
   let Inst{8}    = 0b0;
 }
+def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
+          (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
   let Inst{10}     = index{0};
   let Inst{9-8}    = 0b00;
 }
-def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
-                           (v2f32 DPR:$Vm),
-                           (i32 imm:$index))),
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
           (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
 
 def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
@@ -6663,6 +6745,9 @@ def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
+def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
+          (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
@@ -6671,9 +6756,7 @@ def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
   let Inst{11} = index{0};
   let Inst{10-8}    = 0b000;
 }
-def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
-                           (v4f32 QPR:$Vm),
-                           (i32 imm:$index))),
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
           (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
 
 //   VTRN     : Vector Transpose
@@ -7001,19 +7084,19 @@ def : N3VSPat<fadd, VADDfd>;
 def : N3VSPat<fsub, VSUBfd>;
 def : N3VSPat<fmul, VMULfd>;
 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
       Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
       Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
 def : N2VSPat<fabs, VABSfd>;
 def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPatFP16<fmaxnan, VMAXhd>, Requires<[HasFullFP16]>;
-def : N3VSPatFP16<fminnan, VMINhd>, Requires<[HasFullFP16]>;
-def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
-def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
+def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
+def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
+def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 88aab47a79bf..b20b34eaa6a9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -781,7 +781,7 @@ defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
 // These require base address to be written back or one of the loaded regs.
 let hasSideEffects = 0 in {
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
 def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
         IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
   bits<3> Rn;
@@ -826,7 +826,8 @@ def : InstAlias<"ldm${p} $Rn!, $regs",
                 (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs), 0>,
         Requires<[IsThumb, IsThumb1Only]>;
 
-let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
+    variadicOpsAreDefs = 1 in
 def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
                IIC_iPop,
                "pop${p}\t$regs", []>,
@@ -1343,8 +1344,20 @@ let hasPostISelHook = 1, Defs = [CPSR] in {
                                                            tGPR:$Rm))]>,
                 Requires<[IsThumb1Only]>,
                 Sched<[WriteALU]>;
+
+  def tRSBS   : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn),
+                            2, IIC_iALUr,
+                            [(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>,
+                Requires<[IsThumb1Only]>,
+                Sched<[WriteALU]>;
 }
 
+
+def : T1Pat<(ARMsubs tGPR:$Rn, tGPR:$Rm), (tSUBSrr $Rn, $Rm)>;
+def : T1Pat<(ARMsubs tGPR:$Rn, imm0_7:$imm3), (tSUBSi3 $Rn, imm0_7:$imm3)>;
+def : T1Pat<(ARMsubs tGPR:$Rn, imm0_255:$imm8), (tSUBSi8 $Rn, imm0_255:$imm8)>;
+
+
 // Sign-extend byte
 def tSXTB :                     // A8.6.222
   T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1380,6 +1393,9 @@ def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
   let Inst{7-0} = imm8;
 }
 
+def : Pat<(debugtrap), (tBKPT 0)>, Requires<[IsThumb, HasV5T]>;
+def : Pat<(debugtrap), (tUDF 254)>, Requires<[IsThumb, NoV5T]>;
+
 def t__brkdiv0 : TI<(outs), (ins), IIC_Br, "__brkdiv0",
                     [(int_arm_undefined 249)]>, Encoding16,
     Requires<[IsThumb, IsWindows]> {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f67075fbf9fd..7a6673b49d57 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1775,7 +1775,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
 
 let hasSideEffects = 0 in {
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
 defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
 
 multiclass thumb2_st_mult<string asm, InstrItinClass itin,
@@ -1997,6 +1997,10 @@ def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn),
                    (t2SXTB16 rGPR:$Rn, 0)>;
 def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm),
                    (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_sxtb16 (rotr rGPR:$Rn, rot_imm:$rot)),
+                   (t2SXTB16 rGPR:$Rn, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, (rotr rGPR:$Rm, rot_imm:$rot)),
+                   (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
 
 
 // A simple right-shift can also be used in most cases (the exception is the
@@ -2032,6 +2036,8 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
 
 def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm),
                    (t2UXTB16 rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_uxtb16 (rotr rGPR:$Rn, rot_imm:$rot)),
+                   (t2UXTB16 rGPR:$Rn, rot_imm:$rot)>;
 
 // FIXME: This pattern incorrectly assumes the shl operator is a rotate.
 //        The transformation should probably be done as a combiner action
@@ -2062,6 +2068,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
                        (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
 def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm),
                       (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
+def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, (rotr rGPR:$Rm, rot_imm:$rot)),
+                   (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
 }
 
 
@@ -2086,6 +2094,12 @@ defm t2SUB  : T2I_bin_ii12rs<0b101, "sub", sub>;
 defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMaddc, 1>;
 defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMsubc>;
 
+def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_imm:$imm),
+            (t2SUBSri $Rn, t2_so_imm:$imm)>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>;
+def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
+            (t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>;
+
 let hasPostISelHook = 1 in {
 defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
 defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
@@ -2718,28 +2732,25 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
 }
 
 def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb",
-             [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
-                                   (sext_inreg rGPR:$Rm, i16)))]>;
+             [(set rGPR:$Rd, (bb_mul rGPR:$Rn, rGPR:$Rm))]>;
 def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt",
-             [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
-                                   (sra rGPR:$Rm, (i32 16))))]>;
+             [(set rGPR:$Rd, (bt_mul rGPR:$Rn, rGPR:$Rm))]>;
 def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
-             [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
-                                   (sext_inreg rGPR:$Rm, i16)))]>;
+             [(set rGPR:$Rd, (tb_mul rGPR:$Rn, rGPR:$Rm))]>;
 def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
-             [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
-                                   (sra rGPR:$Rm, (i32 16))))]>;
+             [(set rGPR:$Rd, (tt_mul rGPR:$Rn, rGPR:$Rm))]>;
 def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
              [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
 def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
              [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
 
-def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
-                   (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
-def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_bottom_16 rGPR:$Rm)),
+                   (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_top_16 rGPR:$Rm)),
                    (t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
-def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+def : Thumb2DSPPat<(mul (sext_top_16 rGPR:$Rn), sext_16_node:$Rm),
                    (t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
 def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm),
                    (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
 def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm),
@@ -2767,18 +2778,13 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
 }
 
 def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb",
-             [(set rGPR:$Rd, (add rGPR:$Ra,
-                               (mul (sext_inreg rGPR:$Rn, i16),
-                                     (sext_inreg rGPR:$Rm, i16))))]>;
+             [(set rGPR:$Rd, (add rGPR:$Ra, (bb_mul rGPR:$Rn, rGPR:$Rm)))]>;
 def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt",
-             [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16),
-                                                 (sra rGPR:$Rm, (i32 16)))))]>;
+             [(set rGPR:$Rd, (add rGPR:$Ra, (bt_mul rGPR:$Rn, rGPR:$Rm)))]>;
 def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
-             [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
-                                                (sext_inreg rGPR:$Rm, i16))))]>;
+             [(set rGPR:$Rd, (add rGPR:$Ra, (tb_mul rGPR:$Rn, rGPR:$Rm)))]>;
 def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
-             [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
-                                                 (sra rGPR:$Rm, (i32 16)))))]>;
+             [(set rGPR:$Rd, (add rGPR:$Ra, (tt_mul rGPR:$Rn, rGPR:$Rm)))]>;
 def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
              [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
 def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
@@ -2786,11 +2792,14 @@ def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
 
 def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
                       (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
-                        (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, 
+                                          (sext_bottom_16 rGPR:$Rm))),
+                      (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn,
+                                          (sext_top_16 rGPR:$Rm))),
                       (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-def : Thumb2DSPMulPat<(add rGPR:$Ra,
-                        (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sext_top_16 rGPR:$Rn),
+                                          sext_16_node:$Rm)),
                       (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
 
 def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
@@ -3223,6 +3232,14 @@ def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary,
 }
 }
 
+// Armv8.5-A speculation barrier
+def t2SB : Thumb2XI<(outs), (ins), AddrModeNone, 4, NoItinerary, "sb", "", []>,
+           Requires<[IsThumb2, HasSB]>, Sched<[]> {
+  let Inst{31-0} = 0xf3bf8f70;
+  let Unpredictable = 0x000f2f0f;
+  let hasSideEffects = 1;
+}
+
 class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
                 list<dag> pattern, bits<4> rt2 = 0b1111>
@@ -4429,13 +4446,13 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
 def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
             (t2STRs     GPR:$val, t2addrmode_so_reg:$addr)>;
 
-let AddedComplexity = 8 in {
-  def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr),  (t2LDAB addr_offset_none:$addr)>;
-  def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
-  def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA  addr_offset_none:$addr)>;
-  def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val),  (t2STLB GPR:$val, addr_offset_none:$addr)>;
-  def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
-  def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL  GPR:$val, addr_offset_none:$addr)>;
+let AddedComplexity = 8, Predicates = [IsThumb, HasAcquireRelease, HasV7Clrex] in {
+  def : Pat<(atomic_load_acquire_8 addr_offset_none:$addr),  (t2LDAB addr_offset_none:$addr)>;
+  def : Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+  def : Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA  addr_offset_none:$addr)>;
+  def : Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val),  (t2STLB GPR:$val, addr_offset_none:$addr)>;
+  def : Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+  def : Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL  GPR:$val, addr_offset_none:$addr)>;
 }
 
 
@@ -4538,6 +4555,12 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
 def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
 def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
 def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+
+// Non-predicable aliases of a predicable DSB: the predicate is (14, 0) where
+// 14 = AL (always execute) and 0 = "instruction doesn't read the CPSR".
+def : InstAlias<"ssbb", (t2DSB 0x0, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
+def : InstAlias<"pssbb", (t2DSB 0x4, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
+
 // Armv8-R 'Data Full Barrier'
 def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index 2f14b78c91fd..b58730c452f7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -725,9 +725,11 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
 }
 
 def : FullFP16Pat<(f64 (fpextend HPR:$Sm)),
-                  (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+                  (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>,
+                  Requires<[HasFPARMv8, HasDPVFP]>;
 def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
-              (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+              (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
+              Requires<[HasFPARMv8, HasDPVFP]>;
 
 def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
                    (outs SPR:$Sd), (ins DPR:$Dm),
@@ -746,9 +748,11 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
 }
 
 def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
-                  (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>;
+                  (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>,
+                  Requires<[HasFPARMv8, HasDPVFP]>;
 def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
-              (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+              (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>,
+                   Requires<[HasFPARMv8, HasDPVFP]>;
 
 def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
@@ -1810,7 +1814,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
                  [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
                                           (f64 DPR:$Ddin)))]>,
               RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+              Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
               Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
 
 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
@@ -1819,7 +1823,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
                   [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
               Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
@@ -1832,17 +1836,17 @@ def VMLAH : AHbI<0b11100, 0b00, 0, 0,
                   [(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
                                            HPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+              Requires<[HasFullFP16,UseFPVMLx]>;
 
 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
           (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
 def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
           (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
 def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
           (VMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
-          Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx]>;
 
 
 def VMLSD : ADbI<0b11100, 0b00, 1, 0,
@@ -1851,7 +1855,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
                  [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                           (f64 DPR:$Ddin)))]>,
               RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+              Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
               Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
 
 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
@@ -1860,7 +1864,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
                   [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
               Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
@@ -1873,17 +1877,17 @@ def VMLSH : AHbI<0b11100, 0b00, 1, 0,
                   [(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
                                            HPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+              Requires<[HasFullFP16,UseFPVMLx]>;
 
 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
           (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
           (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
           (VMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
-          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
 
 def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1891,7 +1895,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
                   [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                           (f64 DPR:$Ddin)))]>,
                 RegConstraint<"$Ddin = $Dd">,
-                Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+                Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
                 Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
 
 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
@@ -1900,7 +1904,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
                   [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                            SPR:$Sdin))]>,
                 RegConstraint<"$Sdin = $Sd">,
-                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
                 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
@@ -1913,29 +1917,29 @@ def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
                   [(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
                                            HPR:$Sdin))]>,
                 RegConstraint<"$Sdin = $Sd">,
-                Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+                Requires<[HasFullFP16,UseFPVMLx]>;
 
 // (-(a * b) - dst) -> -(dst + (a * b))
 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
           (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
           (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx (fneg (fmul_su HPR:$a, HPR:$b)), HPR:$dstin),
           (VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
-          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
 
 // (-dst - (a * b)) -> -(dst + (a * b))
 def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
           (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
           (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su HPR:$a, HPR:$b)),
           (VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
-          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
 
 def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1943,7 +1947,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
                   [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
                                            (f64 DPR:$Ddin)))]>,
                RegConstraint<"$Ddin = $Dd">,
-               Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+               Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
                Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
 
 def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
@@ -1951,7 +1955,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
                   IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
              [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
                          RegConstraint<"$Sdin = $Sd">,
-                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
              Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
@@ -1963,17 +1967,17 @@ def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
                   IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
              [(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
                          RegConstraint<"$Sdin = $Sd">,
-                Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+                Requires<[HasFullFP16,UseFPVMLx]>;
 
 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
           (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
           (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
 def : Pat<(fsub_mlx (fmul_su HPR:$a, HPR:$b), HPR:$dstin),
           (VNMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
-          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+          Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
 
 //===----------------------------------------------------------------------===//
 // Fused FP Multiply-Accumulate Operations.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 6692a4d41420..293e734c97cd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -76,6 +76,42 @@ private:
   const ARMRegisterBankInfo &RBI;
   const ARMSubtarget &STI;
 
+  // Store the opcodes that we might need, so we don't have to check what kind
+  // of subtarget (ARM vs Thumb) we have all the time.
+  struct OpcodeCache {
+    unsigned ZEXT16;
+    unsigned SEXT16;
+
+    unsigned ZEXT8;
+    unsigned SEXT8;
+
+    // Used for implementing ZEXT/SEXT from i1
+    unsigned AND;
+    unsigned RSB;
+
+    unsigned STORE32;
+    unsigned LOAD32;
+
+    unsigned STORE16;
+    unsigned LOAD16;
+
+    unsigned STORE8;
+    unsigned LOAD8;
+
+    OpcodeCache(const ARMSubtarget &STI);
+  } const Opcodes;
+
+  // Select the opcode for simple extensions (that translate to a single SXT/UXT
+  // instruction). Extension operations more complicated than that should not
+  // invoke this. Returns the original opcode if it doesn't know how to select a
+  // better one.
+  unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) const;
+
+  // Select the opcode for simple loads and stores. Returns the original opcode
+  // if it doesn't know how to select a better one.
+  unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
+                                 unsigned Size) const;
+
 #define GET_GLOBALISEL_PREDICATES_DECL
 #include "ARMGenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATES_DECL
@@ -107,7 +143,7 @@ ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM,
                                                const ARMSubtarget &STI,
                                                const ARMRegisterBankInfo &RBI)
     : InstructionSelector(), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI),
+      TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), Opcodes(STI),
 #define GET_GLOBALISEL_PREDICATES_INIT
 #include "ARMGenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATES_INIT
@@ -225,41 +261,63 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
   return true;
 }
 
-/// Select the opcode for simple extensions (that translate to a single SXT/UXT
-/// instruction). Extension operations more complicated than that should not
-/// invoke this. Returns the original opcode if it doesn't know how to select a
-/// better one.
-static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
+ARMInstructionSelector::OpcodeCache::OpcodeCache(const ARMSubtarget &STI) {
+  bool isThumb = STI.isThumb();
+
+  using namespace TargetOpcode;
+
+#define STORE_OPCODE(VAR, OPC) VAR = isThumb ? ARM::t2##OPC : ARM::OPC
+  STORE_OPCODE(SEXT16, SXTH);
+  STORE_OPCODE(ZEXT16, UXTH);
+
+  STORE_OPCODE(SEXT8, SXTB);
+  STORE_OPCODE(ZEXT8, UXTB);
+
+  STORE_OPCODE(AND, ANDri);
+  STORE_OPCODE(RSB, RSBri);
+
+  STORE_OPCODE(STORE32, STRi12);
+  STORE_OPCODE(LOAD32, LDRi12);
+
+  // LDRH/STRH are special...
+  STORE16 = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+  LOAD16 = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+
+  STORE_OPCODE(STORE8, STRBi12);
+  STORE_OPCODE(LOAD8, LDRBi12);
+#undef MAP_OPCODE
+}
+
+unsigned ARMInstructionSelector::selectSimpleExtOpc(unsigned Opc,
+                                                    unsigned Size) const {
   using namespace TargetOpcode;
 
   if (Size != 8 && Size != 16)
     return Opc;
 
   if (Opc == G_SEXT)
-    return Size == 8 ? ARM::SXTB : ARM::SXTH;
+    return Size == 8 ? Opcodes.SEXT8 : Opcodes.SEXT16;
 
   if (Opc == G_ZEXT)
-    return Size == 8 ? ARM::UXTB : ARM::UXTH;
+    return Size == 8 ? Opcodes.ZEXT8 : Opcodes.ZEXT16;
 
   return Opc;
 }
 
-/// Select the opcode for simple loads and stores. For types smaller than 32
-/// bits, the value will be zero extended. Returns the original opcode if it
-/// doesn't know how to select a better one.
-static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
-                                      unsigned Size) {
+unsigned ARMInstructionSelector::selectLoadStoreOpCode(unsigned Opc,
+                                                       unsigned RegBank,
+                                                       unsigned Size) const {
   bool isStore = Opc == TargetOpcode::G_STORE;
 
   if (RegBank == ARM::GPRRegBankID) {
     switch (Size) {
     case 1:
     case 8:
-      return isStore ? ARM::STRBi12 : ARM::LDRBi12;
+      return isStore ? Opcodes.STORE8 : Opcodes.LOAD8;
     case 16:
-      return isStore ? ARM::STRH : ARM::LDRH;
+      return isStore ? Opcodes.STORE16 : Opcodes.LOAD16;
     case 32:
-      return isStore ? ARM::STRi12 : ARM::LDRi12;
+      return isStore ? Opcodes.STORE32 : Opcodes.LOAD32;
     default:
       return Opc;
     }
@@ -702,7 +760,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     switch (SrcSize) {
     case 1: {
       // ZExt boils down to & 0x1; for SExt we also subtract that from 0
-      I.setDesc(TII.get(ARM::ANDri));
+      I.setDesc(TII.get(Opcodes.AND));
       MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
 
       if (isSExt) {
@@ -714,7 +772,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
 
         auto InsertBefore = std::next(I.getIterator());
         auto SubI =
-            BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
+            BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.RSB))
                 .addDef(SExtResult)
                 .addUse(AndResult)
                 .addImm(0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 891418306903..4a0c24d58474 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -75,13 +75,48 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
   const LLT s32 = LLT::scalar(32);
   const LLT s64 = LLT::scalar(64);
 
-  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
-  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+  if (ST.isThumb1Only()) {
+    // Thumb1 is not supported yet.
+    computeTables();
+    verify(*ST.getInstrInfo());
+    return;
+  }
+
+  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
+      .legalForCartesianProduct({s32}, {s1, s8, s16});
 
   getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
       .legalFor({s32})
       .minScalar(0, s32);
 
+  getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
+  getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
+
+  getActionDefinitionsBuilder(G_CONSTANT)
+      .legalFor({s32, p0})
+      .clampScalar(0, s32, s32);
+
+  // We're keeping these builders around because we'll want to add support for
+  // floating point to them.
+  auto &LoadStoreBuilder =
+      getActionDefinitionsBuilder({G_LOAD, G_STORE})
+          .legalForTypesWithMemSize({
+              {s1, p0, 8},
+              {s8, p0, 8},
+              {s16, p0, 16},
+              {s32, p0, 32},
+              {p0, p0, 32}});
+
+  if (ST.isThumb()) {
+    // FIXME: merge with the code for non-Thumb.
+    computeTables();
+    verify(*ST.getInstrInfo());
+    return;
+  }
+
+  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+
   if (ST.hasDivideInARMMode())
     getActionDefinitionsBuilder({G_SDIV, G_UDIV})
         .legalFor({s32})
@@ -101,14 +136,24 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
       setAction({Op, s32}, Libcall);
   }
 
-  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
-      .legalForCartesianProduct({s32}, {s1, s8, s16});
-
-  getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
-  getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
-
   getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32});
 
+  if (ST.hasV5TOps()) {
+    getActionDefinitionsBuilder(G_CTLZ)
+        .legalFor({s32})
+        .clampScalar(0, s32, s32);
+    getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+        .lowerFor({s32})
+        .clampScalar(0, s32, s32);
+  } else {
+    getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+        .libcallFor({s32})
+        .clampScalar(0, s32, s32);
+    getActionDefinitionsBuilder(G_CTLZ)
+        .lowerFor({s32})
+        .clampScalar(0, s32, s32);
+  }
+
   getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}});
 
   getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0},
@@ -116,20 +161,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
 
   getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
 
-  getActionDefinitionsBuilder(G_CONSTANT)
-      .legalFor({s32, p0})
-      .clampScalar(0, s32, s32);
-
   getActionDefinitionsBuilder(G_ICMP)
       .legalForCartesianProduct({s1}, {s32, p0})
       .minScalar(1, s32);
 
   // We're keeping these builders around because we'll want to add support for
   // floating point to them.
-  auto &LoadStoreBuilder =
-      getActionDefinitionsBuilder({G_LOAD, G_STORE})
-          .legalForCartesianProduct({s1, s8, s16, s32, p0}, {p0});
-
   auto &PhiBuilder =
       getActionDefinitionsBuilder(G_PHI).legalFor({s32, p0}).minScalar(0, s32);
 
@@ -302,7 +339,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
 
 bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
                                       MachineRegisterInfo &MRI,
-                                      MachineIRBuilder &MIRBuilder) const {
+                                      MachineIRBuilder &MIRBuilder,
+                                      GISelChangeObserver &Observer) const {
   using namespace TargetOpcode;
 
   MIRBuilder.setInstr(MI);
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
index 78ab9412c04b..527bf87f1093 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
 
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/IR/Instructions.h"
@@ -29,7 +30,8 @@ public:
   ARMLegalizerInfo(const ARMSubtarget &ST);
 
   bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
-                      MachineIRBuilder &MIRBuilder) const override;
+                      MachineIRBuilder &MIRBuilder,
+                      GISelChangeObserver &Observer) const override;
 
 private:
   void setFCmpLibcallsGNU();
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index db5f28480e90..6da7430a8e51 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1027,6 +1027,18 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
     if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
       CanMergeToLSMulti = CanMergeToLSDouble = false;
 
+    // vldm / vstm limit are 32 for S variants, 16 for D variants.
+    unsigned Limit;
+    switch (Opcode) {
+    default:
+      Limit = UINT_MAX;
+      break;
+    case ARM::VLDRD:
+    case ARM::VSTRD:
+      Limit = 16;
+      break;
+    }
+
     // Merge following instructions where possible.
     for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
       int NewOffset = MemOps[I].Offset;
@@ -1036,6 +1048,8 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
       unsigned Reg = MO.getReg();
       if (Reg == ARM::SP || Reg == ARM::PC)
         break;
+      if (Count == Limit)
+        break;
 
       // See if the current load/store may be part of a multi load/store.
       unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
@@ -1303,7 +1317,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
     MIB.add(MI->getOperand(OpNum));
 
   // Transfer memoperands.
-  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MIB.setMemRefs(MI->memoperands());
 
   MBB.erase(MBBI);
   return true;
@@ -1527,7 +1541,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
   // Transfer implicit operands.
   for (const MachineOperand &MO : MI.implicit_operands())
     MIB.add(MO);
-  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIB.setMemRefs(MI.memoperands());
 
   MBB.erase(MBBI);
   return true;
@@ -1834,7 +1848,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
     return M0->InsertPos < M1->InsertPos;
   };
-  llvm::sort(Candidates.begin(), Candidates.end(), LessThan);
+  llvm::sort(Candidates, LessThan);
 
   // Go through list of candidates and merge.
   bool Changed = false;
@@ -2172,13 +2186,12 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
   bool RetVal = false;
 
   // Sort by offset (in reverse order).
-  llvm::sort(Ops.begin(), Ops.end(),
-             [](const MachineInstr *LHS, const MachineInstr *RHS) {
-               int LOffset = getMemoryOpOffset(*LHS);
-               int ROffset = getMemoryOpOffset(*RHS);
-               assert(LHS == RHS || LOffset != ROffset);
-               return LOffset > ROffset;
-             });
+  llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
+    int LOffset = getMemoryOpOffset(*LHS);
+    int ROffset = getMemoryOpOffset(*RHS);
+    assert(LHS == RHS || LOffset != ROffset);
+    return LOffset > ROffset;
+  });
 
   // The loads / stores of the same base are in order. Scan them from first to
   // last and check for the following:
@@ -2290,7 +2303,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             if (!isT2)
               MIB.addReg(0);
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
-            MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+            MIB.cloneMergedMemRefs({Op0, Op1});
             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
             ++NumLDRDFormed;
           } else {
@@ -2304,7 +2317,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             if (!isT2)
               MIB.addReg(0);
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
-            MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
+            MIB.cloneMergedMemRefs({Op0, Op1});
             LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
             ++NumSTRDFormed;
           }
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index d11fe9d5c502..df1da9d8e474 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -23,20 +23,13 @@ namespace llvm {
 static bool isAESPair(const MachineInstr *FirstMI,
                       const MachineInstr &SecondMI) {
   // Assume the 1st instr to be a wildcard if it is unspecified.
-  unsigned FirstOpcode =
-      FirstMI ? FirstMI->getOpcode()
-              : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
-  unsigned SecondOpcode = SecondMI.getOpcode();
-
-  switch(SecondOpcode) {
+  switch(SecondMI.getOpcode()) {
   // AES encode.
   case ARM::AESMC :
-    return FirstOpcode == ARM::AESE ||
-           FirstOpcode == ARM::INSTRUCTION_LIST_END;
+    return FirstMI == nullptr || FirstMI->getOpcode() == ARM::AESE;
   // AES decode.
   case ARM::AESIMC:
-    return FirstOpcode == ARM::AESD ||
-           FirstOpcode == ARM::INSTRUCTION_LIST_END;
+    return FirstMI == nullptr || FirstMI->getOpcode() == ARM::AESD;
   }
 
   return false;
@@ -46,15 +39,8 @@ static bool isAESPair(const MachineInstr *FirstMI,
 static bool isLiteralsPair(const MachineInstr *FirstMI,
                            const MachineInstr &SecondMI) {
   // Assume the 1st instr to be a wildcard if it is unspecified.
-  unsigned FirstOpcode =
-      FirstMI ? FirstMI->getOpcode()
-              : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
-  unsigned SecondOpcode = SecondMI.getOpcode();
-
-  // 32 bit immediate.
-  if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
-       FirstOpcode == ARM::MOVi16) &&
-      SecondOpcode == ARM::MOVTi16)
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == ARM::MOVi16) &&
+      SecondMI.getOpcode() == ARM::MOVTi16)
     return true;
 
   return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
index 1e4fc6687eae..b3abd7b593a1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMacroFusion.h
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACROFUSION_H
+#define LLVM_LIB_TARGET_ARM_ARMMACROFUSION_H
+
 #include "llvm/CodeGen/MachineScheduler.h"
 
 namespace llvm {
@@ -22,3 +25,5 @@ namespace llvm {
 std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
 
 } // llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index 9d5478b76c18..fc3258914f92 100644
--- a/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -42,6 +42,10 @@ using namespace PatternMatch;
 
 STATISTIC(NumSMLAD , "Number of smlad instructions generated");
 
+static cl::opt<bool>
+DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
+                   cl::desc("Disable the ARM Parallel DSP pass"));
+
 namespace {
   struct OpChain;
   struct BinOpChain;
@@ -67,7 +71,7 @@ namespace {
     virtual ~OpChain() = default;
 
     void SetMemoryLocations() {
-      const auto Size = MemoryLocation::UnknownSize;
+      const auto Size = LocationSize::unknown();
       for (auto *V : AllValues) {
         if (auto *I = dyn_cast<Instruction>(V)) {
           if (I->mayWriteToMemory())
@@ -88,12 +92,15 @@ namespace {
   struct BinOpChain : public OpChain {
     ValueList     LHS;      // List of all (narrow) left hand operands.
     ValueList     RHS;      // List of all (narrow) right hand operands.
+    bool Exchange = false;
 
     BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
       OpChain(I, lhs), LHS(lhs), RHS(rhs) {
         for (auto *V : RHS)
           AllValues.push_back(V);
       }
+
+    bool AreSymmetrical(BinOpChain *Other);
   };
 
   struct Reduction {
@@ -101,9 +108,9 @@ namespace {
                                       // pattern matching.
     Instruction     *AccIntAdd;       // The accumulating integer add statement,
                                       // i.e, the reduction statement.
-
     OpChainList     MACCandidates;    // The MAC candidates associated with
                                       // this reduction statement.
+    PMACPairList    PMACPairs;
     Reduction (PHINode *P, Instruction *Acc) : Phi(P), AccIntAdd(Acc) { };
   };
 
@@ -116,12 +123,16 @@ namespace {
     Loop              *L;
     const DataLayout  *DL;
     Module            *M;
+    std::map<LoadInst*, LoadInst*> LoadPairs;
+    std::map<LoadInst*, SmallVector<LoadInst*, 4>> SequentialLoads;
 
-    bool InsertParallelMACs(Reduction &Reduction, PMACPairList &PMACPairs);
+    bool RecordSequentialLoads(BasicBlock *Header);
+    bool InsertParallelMACs(Reduction &Reduction);
     bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
-    PMACPairList CreateParallelMACPairs(OpChainList &Candidates);
+    void CreateParallelMACPairs(Reduction &R);
     Instruction *CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
-                                 Instruction *Acc, Instruction *InsertAfter);
+                                 Instruction *Acc, bool Exchange,
+                                 Instruction *InsertAfter);
 
     /// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
     /// Dual performs two signed 16x16-bit multiplications. It adds the
@@ -149,6 +160,8 @@ namespace {
     }
 
     bool runOnLoop(Loop *TheLoop, LPPassManager &) override {
+      if (DisableParallelDSP)
+        return false;
       L = TheLoop;
       SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
       AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -192,7 +205,14 @@ namespace {
       LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI);
       bool Changes = false;
 
-      LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n\n");
+      LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
+      LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
+
+      if (!RecordSequentialLoads(Header)) {
+        LLVM_DEBUG(dbgs() << " - No sequential loads found.\n");
+        return false;
+      }
+
       Changes = MatchSMLAD(F);
       return Changes;
     }
@@ -245,57 +265,14 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
   return false;
 }
 
-// Element-by-element comparison of Value lists returning true if they are
-// instructions with the same opcode or constants with the same value.
-static bool AreSymmetrical(const ValueList &VL0,
-                           const ValueList &VL1) {
-  if (VL0.size() != VL1.size()) {
-    LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
-                      << VL0.size() << " != " << VL1.size() << "\n");
-    return false;
-  }
-
-  const unsigned Pairs = VL0.size();
-  LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
-
-  for (unsigned i = 0; i < Pairs; ++i) {
-    const Value *V0 = VL0[i];
-    const Value *V1 = VL1[i];
-    const auto *Inst0 = dyn_cast<Instruction>(V0);
-    const auto *Inst1 = dyn_cast<Instruction>(V1);
-
-    LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
-               dbgs() << "mul1: "; V0->dump();
-               dbgs() << "mul2: "; V1->dump());
-
-    if (!Inst0 || !Inst1)
-      return false;
-
-    if (Inst0->isSameOperationAs(Inst1)) {
-      LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
-      continue;
-    }
-
-    const APInt *C0, *C1;
-    if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
-      return false;
-  }
-
-  LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
-  return true;
-}
-
 template<typename MemInst>
 static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
-                                  MemInstList &VecMem, const DataLayout &DL,
-                                  ScalarEvolution &SE) {
+                                  const DataLayout &DL, ScalarEvolution &SE) {
   if (!MemOp0->isSimple() || !MemOp1->isSimple()) {
     LLVM_DEBUG(dbgs() << "No, not touching volatile access\n");
     return false;
   }
   if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE)) {
-    VecMem.push_back(MemOp0);
-    VecMem.push_back(MemOp1);
     LLVM_DEBUG(dbgs() << "OK: accesses are consecutive.\n");
     return true;
   }
@@ -318,82 +295,156 @@ bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
     return false;
   }
 
-  return AreSequentialAccesses<LoadInst>(Ld0, Ld1, VecMem, *DL, *SE);
+  if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
+    return false;
+
+  VecMem.clear();
+  VecMem.push_back(Ld0);
+  VecMem.push_back(Ld1);
+  return true;
 }
 
-PMACPairList
-ARMParallelDSP::CreateParallelMACPairs(OpChainList &Candidates) {
+/// Iterate through the block and record base, offset pairs of loads as well as
+/// maximal sequences of sequential loads.
+bool ARMParallelDSP::RecordSequentialLoads(BasicBlock *Header) {
+  SmallVector<LoadInst*, 8> Loads;
+  for (auto &I : *Header) {
+    auto *Ld = dyn_cast<LoadInst>(&I);
+    if (!Ld)
+      continue;
+    Loads.push_back(Ld);
+  }
+
+  std::map<LoadInst*, LoadInst*> BaseLoads;
+
+  for (auto *Ld0 : Loads) {
+    for (auto *Ld1 : Loads) {
+      if (Ld0 == Ld1)
+        continue;
+
+      if (AreSequentialAccesses<LoadInst>(Ld0, Ld1, *DL, *SE)) {
+        LoadPairs[Ld0] = Ld1;
+        if (BaseLoads.count(Ld0)) {
+          LoadInst *Base = BaseLoads[Ld0];
+          BaseLoads[Ld1] = Base;
+          SequentialLoads[Base].push_back(Ld1);
+        } else {
+          BaseLoads[Ld1] = Ld0;
+          SequentialLoads[Ld0].push_back(Ld1);
+        }
+      }
+    }
+  }
+  return LoadPairs.size() > 1;
+}
+
+void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
+  OpChainList &Candidates = R.MACCandidates;
+  PMACPairList &PMACPairs = R.PMACPairs;
   const unsigned Elems = Candidates.size();
-  PMACPairList PMACPairs;
 
   if (Elems < 2)
-    return PMACPairs;
+    return;
 
-  // TODO: for now we simply try to match consecutive pairs i and i+1.
-  // We can compare all elements, but then we need to compare and evaluate
-  // different solutions.
-  for(unsigned i=0; i<Elems-1; i+=2) {
-    BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
-    BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[i+1].get());
-    const Instruction *Mul0 = PMul0->Root;
-    const Instruction *Mul1 = PMul1->Root;
+  auto CanPair = [&](BinOpChain *PMul0, BinOpChain *PMul1) {
+    if (!PMul0->AreSymmetrical(PMul1))
+      return false;
+
+    // The first elements of each vector should be loads with sexts. If we
+    // find that its two pairs of consecutive loads, then these can be
+    // transformed into two wider loads and the users can be replaced with
+    // DSP intrinsics.
+    for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
+      auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
+      auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
+      auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
+      auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
+
+      if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
+        return false;
 
-    if (Mul0 == Mul1)
+      LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n"
+                 << "\t Ld0: " << *Ld0 << "\n"
+                 << "\t Ld1: " << *Ld1 << "\n"
+                 << "and operands " << x + 2 << ":\n"
+                 << "\t Ld2: " << *Ld2 << "\n"
+                 << "\t Ld3: " << *Ld3 << "\n");
+
+      if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+        if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+          PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+          return true;
+        } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
+          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+          LLVM_DEBUG(dbgs() << "    exchanging Ld2 and Ld3\n");
+          PMul1->Exchange = true;
+          PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+          return true;
+        }
+      } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+                 AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+        LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+        LLVM_DEBUG(dbgs() << "    exchanging Ld0 and Ld1\n");
+        LLVM_DEBUG(dbgs() << "    and swapping muls\n");
+        PMul0->Exchange = true;
+        // Only the second operand can be exchanged, so swap the muls.
+        PMACPairs.push_back(std::make_pair(PMul1, PMul0));
+        return true;
+      }
+    }
+    return false;
+  };
+
+  SmallPtrSet<const Instruction*, 4> Paired;
+  for (unsigned i = 0; i < Elems; ++i) {
+    BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
+    if (Paired.count(PMul0->Root))
       continue;
 
-    LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
-               dbgs() << "- "; Mul0->dump();
-               dbgs() << "- "; Mul1->dump());
+    for (unsigned j = 0; j < Elems; ++j) {
+      if (i == j)
+        continue;
 
-    const ValueList &Mul0_LHS = PMul0->LHS;
-    const ValueList &Mul0_RHS = PMul0->RHS;
-    const ValueList &Mul1_LHS = PMul1->LHS;
-    const ValueList &Mul1_RHS = PMul1->RHS;
+      BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[j].get());
+      if (Paired.count(PMul1->Root))
+        continue;
 
-    if (!AreSymmetrical(Mul0_LHS, Mul1_LHS) ||
-        !AreSymmetrical(Mul0_RHS, Mul1_RHS))
-      continue;
+      const Instruction *Mul0 = PMul0->Root;
+      const Instruction *Mul1 = PMul1->Root;
+      if (Mul0 == Mul1)
+        continue;
 
-    LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
-    // The first elements of each vector should be loads with sexts. If we find
-    // that its two pairs of consecutive loads, then these can be transformed
-    // into two wider loads and the users can be replaced with DSP
-    // intrinsics.
-    for (unsigned x = 0; x < Mul0_LHS.size(); x += 2) {
-      auto *Ld0 = dyn_cast<LoadInst>(Mul0_LHS[x]);
-      auto *Ld1 = dyn_cast<LoadInst>(Mul1_LHS[x]);
-      auto *Ld2 = dyn_cast<LoadInst>(Mul0_RHS[x]);
-      auto *Ld3 = dyn_cast<LoadInst>(Mul1_RHS[x]);
-
-      LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n";
-                 dbgs() << "\t mul1: "; Mul0_LHS[x]->dump();
-                 dbgs() << "\t mul2: "; Mul1_LHS[x]->dump();
-                 dbgs() << "and operands " << x + 2 << ":\n";
-                 dbgs() << "\t mul1: "; Mul0_RHS[x]->dump();
-                 dbgs() << "\t mul2: "; Mul1_RHS[x]->dump());
-
-      if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd) &&
-          AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
-        LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-        PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+      assert(PMul0 != PMul1 && "expected different chains");
+
+      LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
+                 dbgs() << "- "; Mul0->dump();
+                 dbgs() << "- "; Mul1->dump());
+
+      LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
+      if (CanPair(PMul0, PMul1)) {
+        Paired.insert(Mul0);
+        Paired.insert(Mul1);
+        break;
       }
     }
   }
-  return PMACPairs;
 }
 
-bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction,
-                                        PMACPairList &PMACPairs) {
+bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction) {
   Instruction *Acc = Reduction.Phi;
   Instruction *InsertAfter = Reduction.AccIntAdd;
 
-  for (auto &Pair : PMACPairs) {
+  for (auto &Pair : Reduction.PMACPairs) {
+    BinOpChain *PMul0 = Pair.first;
+    BinOpChain *PMul1 = Pair.second;
     LLVM_DEBUG(dbgs() << "Found parallel MACs!!\n";
-               dbgs() << "- "; Pair.first->Root->dump();
-               dbgs() << "- "; Pair.second->Root->dump());
-    auto *VecLd0 = cast<LoadInst>(Pair.first->VecLd[0]);
-    auto *VecLd1 = cast<LoadInst>(Pair.second->VecLd[0]);
-    Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, InsertAfter);
+               dbgs() << "- "; PMul0->Root->dump();
+               dbgs() << "- "; PMul1->Root->dump());
+
+    auto *VecLd0 = cast<LoadInst>(PMul0->VecLd[0]);
+    auto *VecLd1 = cast<LoadInst>(PMul1->VecLd[0]);
+    Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, PMul1->Exchange, InsertAfter);
     InsertAfter = Acc;
   }
 
@@ -420,7 +471,7 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
 
   for (PHINode &Phi : Header->phis()) {
     const auto *Ty = Phi.getType();
-    if (!Ty->isIntegerTy(32))
+    if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
       continue;
 
     const bool IsReduction =
@@ -447,10 +498,11 @@ static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
 }
 
 static void AddMACCandidate(OpChainList &Candidates,
-                            const Instruction *Acc,
-                            Value *MulOp0, Value *MulOp1, int MulOpNum) {
-  Instruction *Mul = dyn_cast<Instruction>(Acc->getOperand(MulOpNum));
+                            Instruction *Mul,
+                            Value *MulOp0, Value *MulOp1) {
   LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());
+  assert(Mul->getOpcode() == Instruction::Mul &&
+         "expected mul instruction");
   ValueList LHS;
   ValueList RHS;
   if (IsNarrowSequence<16>(MulOp0, LHS) &&
@@ -462,31 +514,38 @@ static void AddMACCandidate(OpChainList &Candidates,
 
 static void MatchParallelMACSequences(Reduction &R,
                                       OpChainList &Candidates) {
-  const Instruction *Acc = R.AccIntAdd;
-  Value *A, *MulOp0, *MulOp1;
-  LLVM_DEBUG(dbgs() << "\n- Analysing:\t"; Acc->dump());
-
-  // Pattern 1: the accumulator is the RHS of the mul.
-  while(match(Acc, m_Add(m_Mul(m_Value(MulOp0), m_Value(MulOp1)),
-                         m_Value(A)))){
-    AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 0);
-    Acc = dyn_cast<Instruction>(A);
-  }
-  // Pattern 2: the accumulator is the LHS of the mul.
-  while(match(Acc, m_Add(m_Value(A),
-                         m_Mul(m_Value(MulOp0), m_Value(MulOp1))))) {
-    AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 1);
-    Acc = dyn_cast<Instruction>(A);
-  }
+  Instruction *Acc = R.AccIntAdd;
+  LLVM_DEBUG(dbgs() << "\n- Analysing:\t" << *Acc);
 
-  // The last mul in the chain has a slightly different pattern:
-  // the mul is the first operand
-  if (match(Acc, m_Add(m_Mul(m_Value(MulOp0), m_Value(MulOp1)), m_Value(A))))
-    AddMACCandidate(Candidates, Acc, MulOp0, MulOp1, 0);
+  // Returns false to signal the search should be stopped.
+  std::function<bool(Value*)> Match =
+    [&Candidates, &Match](Value *V) -> bool {
 
-  // Because we start at the bottom of the chain, and we work our way up,
-  // the muls are added in reverse program order to the list.
-  std::reverse(Candidates.begin(), Candidates.end());
+    auto *I = dyn_cast<Instruction>(V);
+    if (!I)
+      return false;
+
+    switch (I->getOpcode()) {
+    case Instruction::Add:
+      if (Match(I->getOperand(0)) || (Match(I->getOperand(1))))
+        return true;
+      break;
+    case Instruction::Mul: {
+      Value *MulOp0 = I->getOperand(0);
+      Value *MulOp1 = I->getOperand(1);
+      if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1))
+        AddMACCandidate(Candidates, I, MulOp0, MulOp1);
+      return false;
+    }
+    case Instruction::SExt:
+      return Match(I->getOperand(0));
+    }
+    return false;
+  };
+
+  while (Match (Acc));
+  LLVM_DEBUG(dbgs() << "Finished matching MAC sequences, found "
+             << Candidates.size() << " candidates.\n");
 }
 
 // Collects all instructions that are not part of the MAC chains, which is the
@@ -621,45 +680,100 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
   for (auto &R : Reductions) {
     if (AreAliased(AA, Reads, Writes, R.MACCandidates))
       return false;
-    PMACPairList PMACPairs = CreateParallelMACPairs(R.MACCandidates);
-    Changed |= InsertParallelMACs(R, PMACPairs);
+    CreateParallelMACPairs(R);
+    Changed |= InsertParallelMACs(R);
   }
 
   LLVM_DEBUG(if (Changed) dbgs() << "Header block:\n"; Header->dump(););
   return Changed;
 }
 
-static void CreateLoadIns(IRBuilder<NoFolder> &IRB, Instruction *Acc,
-                          LoadInst **VecLd) {
-  const Type *AccTy = Acc->getType();
-  const unsigned AddrSpace = (*VecLd)->getPointerAddressSpace();
+static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst &BaseLoad,
+                               const Type *LoadTy) {
+  const unsigned AddrSpace = BaseLoad.getPointerAddressSpace();
 
-  Value *VecPtr = IRB.CreateBitCast((*VecLd)->getPointerOperand(),
-                                    AccTy->getPointerTo(AddrSpace));
-  *VecLd = IRB.CreateAlignedLoad(VecPtr, (*VecLd)->getAlignment());
+  Value *VecPtr = IRB.CreateBitCast(BaseLoad.getPointerOperand(),
+                                    LoadTy->getPointerTo(AddrSpace));
+  return IRB.CreateAlignedLoad(VecPtr, BaseLoad.getAlignment());
 }
 
 Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
-                                             Instruction *Acc,
+                                             Instruction *Acc, bool Exchange,
                                              Instruction *InsertAfter) {
-  LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n";
-             dbgs() << "- "; VecLd0->dump();
-             dbgs() << "- "; VecLd1->dump();
-             dbgs() << "- "; Acc->dump());
+  LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n"
+             << "- " << *VecLd0 << "\n"
+             << "- " << *VecLd1 << "\n"
+             << "- " << *Acc << "\n"
+             << "Exchange: " << Exchange << "\n");
 
   IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
                               ++BasicBlock::iterator(InsertAfter));
 
   // Replace the reduction chain with an intrinsic call
-  CreateLoadIns(Builder, Acc, &VecLd0);
-  CreateLoadIns(Builder, Acc, &VecLd1);
-  Value* Args[] = { VecLd0, VecLd1, Acc };
-  Function *SMLAD = Intrinsic::getDeclaration(M, Intrinsic::arm_smlad);
+  const Type *Ty = IntegerType::get(M->getContext(), 32);
+  LoadInst *NewLd0 = CreateLoadIns(Builder, VecLd0[0], Ty);
+  LoadInst *NewLd1 = CreateLoadIns(Builder, VecLd1[0], Ty);
+  Value* Args[] = { NewLd0, NewLd1, Acc };
+  Function *SMLAD = nullptr;
+  if (Exchange)
+    SMLAD = Acc->getType()->isIntegerTy(32) ?
+      Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
+      Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
+  else
+    SMLAD = Acc->getType()->isIntegerTy(32) ?
+      Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
+      Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
   CallInst *Call = Builder.CreateCall(SMLAD, Args);
   NumSMLAD++;
   return Call;
 }
 
+// Compare the value lists in Other to this chain.
+bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
+  // Element-by-element comparison of Value lists returning true if they are
+  // instructions with the same opcode or constants with the same value.
+  auto CompareValueList = [](const ValueList &VL0,
+                             const ValueList &VL1) {
+    if (VL0.size() != VL1.size()) {
+      LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: "
+                        << VL0.size() << " != " << VL1.size() << "\n");
+      return false;
+    }
+
+    const unsigned Pairs = VL0.size();
+    LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
+
+    for (unsigned i = 0; i < Pairs; ++i) {
+      const Value *V0 = VL0[i];
+      const Value *V1 = VL1[i];
+      const auto *Inst0 = dyn_cast<Instruction>(V0);
+      const auto *Inst1 = dyn_cast<Instruction>(V1);
+
+      LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
+                dbgs() << "mul1: "; V0->dump();
+                dbgs() << "mul2: "; V1->dump());
+
+      if (!Inst0 || !Inst1)
+        return false;
+
+      if (Inst0->isSameOperationAs(Inst1)) {
+        LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
+        continue;
+      }
+
+      const APInt *C0, *C1;
+      if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
+        return false;
+    }
+
+    LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
+    return true;
+  };
+
+  return CompareValueList(LHS, Other->LHS) &&
+         CompareValueList(RHS, Other->RHS);
+}
+
 Pass *llvm::createARMParallelDSPPass() {
   return new ARMParallelDSP();
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 0e16d6bcfe2b..4f28f2dafc70 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -234,6 +234,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case G_GEP:
   case G_INTTOPTR:
   case G_PTRTOINT:
+  case G_CTLZ:
     // FIXME: We're abusing the fact that everything lives in a GPR for now; in
     // the real world we would use different mappings.
     OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index f42cbbda1b71..b1d0761e3231 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -188,8 +188,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   assert(hasV6T2Ops() || !hasThumb2());
 
   // Execute only support requires movt support
-  if (genExecuteOnly())
-    assert(hasV8MBaselineOps() && !NoMovt && "Cannot generate execute-only code for this target");
+  if (genExecuteOnly()) {
+    NoMovt = false;
+    assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target");
+  }
 
   // Keep a pointer to static instruction cost data for the specified CPU.
   SchedModel = getSchedModelForCPU(CPUString);
@@ -287,7 +289,13 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   case CortexR7:
   case CortexM3:
   case CortexR52:
-  case ExynosM1:
+    break;
+  case Exynos:
+    LdStMultipleTiming = SingleIssuePlusExtras;
+    MaxInterleaveFactor = 4;
+    if (!isThumb())
+      PrefLoopAlignment = 3;
+    break;
   case Kryo:
     break;
   case Krait:
@@ -370,7 +378,8 @@ bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
   // For general targets, the prologue can grow when VFPs are allocated with
   // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
   // format which it's more important to get right.
-  return isTargetWatchABI() || (isSwift() && !MF.getFunction().optForMinSize());
+  return isTargetWatchABI() ||
+         (useWideStrideVFP() && !MF.getFunction().optForMinSize());
 }
 
 bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 74aee9a8ed38..11841b4467a2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -68,7 +68,7 @@ protected:
     CortexR5,
     CortexR52,
     CortexR7,
-    ExynosM1,
+    Exynos,
     Krait,
     Kryo,
     Swift
@@ -106,6 +106,7 @@ protected:
     ARMv82a,
     ARMv83a,
     ARMv84a,
+    ARMv85a,
     ARMv8a,
     ARMv8mBaseline,
     ARMv8mMainline,
@@ -153,6 +154,7 @@ protected:
   bool HasV8_2aOps = false;
   bool HasV8_3aOps = false;
   bool HasV8_4aOps = false;
+  bool HasV8_5aOps = false;
   bool HasV8MBaselineOps = false;
   bool HasV8MMainlineOps = false;
 
@@ -227,6 +229,9 @@ protected:
   /// HasFullFP16 - True if subtarget supports half-precision FP operations
   bool HasFullFP16 = false;
 
+  /// HasFP16FML - True if subtarget supports half-precision FP fml operations
+  bool HasFP16FML = false;
+
   /// HasD16 - True if subtarget is limited to 16 double precision
   /// FP registers for VFPv3.
   bool HasD16 = false;
@@ -353,6 +358,9 @@ protected:
   /// If true, loading into a D subregister will be penalized.
   bool SlowLoadDSubregister = false;
 
+  /// If true, use a wider stride when allocating VFP registers.
+  bool UseWideStrideVFP = false;
+
   /// If true, the AGU and NEON/FPU units are multiplexed.
   bool HasMuxedUnits = false;
 
@@ -408,6 +416,9 @@ protected:
   /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
   bool UseSjLjEH = false;
 
+  /// Has speculation barrier
+  bool HasSB = false;
+
   /// Implicitly convert an instruction to a different one if its immediates
   /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
   bool NegativeImmediates = true;
@@ -432,6 +443,9 @@ protected:
   /// operand cycle returned by the itinerary data for pre-ISel operands.
   int PreISelOperandLatencyAdjustment = 2;
 
+  /// What alignment is preferred for loop bodies, in log2(bytes).
+  unsigned PrefLoopAlignment = 0;
+
   /// IsLittle - The target is Little Endian
   bool IsLittle;
 
@@ -529,6 +543,7 @@ public:
   bool hasV8_2aOps() const { return HasV8_2aOps; }
   bool hasV8_3aOps() const { return HasV8_3aOps; }
   bool hasV8_4aOps() const { return HasV8_4aOps; }
+  bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
 
@@ -596,6 +611,7 @@ public:
   bool hasVMLxHazards() const { return HasVMLxHazards; }
   bool hasSlowOddRegister() const { return SlowOddRegister; }
   bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
+  bool useWideStrideVFP() const { return UseWideStrideVFP; }
   bool hasMuxedUnits() const { return HasMuxedUnits; }
   bool dontWidenVMOVS() const { return DontWidenVMOVS; }
   bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
@@ -612,12 +628,14 @@ public:
   bool hasDSP() const { return HasDSP; }
   bool useNaClTrap() const { return UseNaClTrap; }
   bool useSjLjEH() const { return UseSjLjEH; }
+  bool hasSB() const { return HasSB; }
   bool genLongCalls() const { return GenLongCalls; }
   bool genExecuteOnly() const { return GenExecuteOnly; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
   bool hasFullFP16() const { return HasFullFP16; }
+  bool hasFP16FML() const { return HasFP16FML; }
 
   bool hasFuseAES() const { return HasFuseAES; }
   bool hasFuseLiterals() const { return HasFuseLiterals; }
@@ -796,6 +814,10 @@ public:
   bool allowPositionIndependentMovt() const {
     return isROPI() || !isTargetELF();
   }
+
+  unsigned getPrefLoopAlignment() const {
+    return PrefLoopAlignment;
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 519f789fc215..ec02c840d5e1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -194,12 +194,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 /// Create an ARM architecture model.
 ///
 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
@@ -210,7 +204,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
                                            CodeGenOpt::Level OL, bool isLittle)
     : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
                         CPU, FS, Options, getEffectiveRelocModel(TT, RM),
-                        getEffectiveCodeModel(CM), OL),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TargetABI(computeTargetABI(TT, CPU, Options)),
       TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index d0620761ea9c..9c13359cba71 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -32,7 +32,8 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                         const TargetMachine &TM) {
   const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM);
   bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS;
-  //  genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+  bool genExecuteOnly =
+      ARM_TM.getMCSubtargetInfo()->hasFeature(ARM::FeatureExecuteOnly);
 
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(isAAPCS_ABI);
@@ -40,6 +41,17 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
   if (isAAPCS_ABI) {
     LSDASection = nullptr;
   }
+
+  // Make code section unreadable when in execute-only mode
+  if (genExecuteOnly) {
+    unsigned Type = ELF::SHT_PROGBITS;
+    unsigned Flags =
+        ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
+    // Since we cannot modify flags for an existing section, we create a new
+    // section with the right flags, and use 0 as the unique ID for
+    // execute-only text
+    TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
+  }
 }
 
 const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 94f9cefe429c..f72bb8632eb7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -77,8 +77,8 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
       return 1;
     return ST->hasV6T2Ops() ? 2 : 3;
   }
-  // Thumb1.
-  if (SImmVal >= 0 && SImmVal < 256)
+  // Thumb1, any i8 imm cost 1.
+  if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
     return 1;
   if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
     return 2;
@@ -400,10 +400,29 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 
 int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
-  // We only handle costs of reverse and select shuffles for now.
-  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
-    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+  if (Kind == TTI::SK_Broadcast) {
+    static const CostTblEntry NEONDupTbl[] = {
+        // VDUP handles these cases.
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i8,  1},
+
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
 
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+    if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+
+    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+  }
   if (Kind == TTI::SK_Reverse) {
     static const CostTblEntry NEONShuffleTbl[] = {
         // Reverse shuffle cost one instruction if we are shuffling within a
@@ -412,6 +431,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
         {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
         {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
         {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i8,  1},
 
         {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
         {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
@@ -542,14 +563,17 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                            unsigned Factor,
                                            ArrayRef<unsigned> Indices,
                                            unsigned Alignment,
-                                           unsigned AddressSpace) {
+                                           unsigned AddressSpace,
+                                           bool UseMaskForCond,
+                                           bool UseMaskForGaps) {
   assert(Factor >= 2 && "Invalid interleave factor");
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
   // vldN/vstN doesn't support vector types of i64/f64 element.
   bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
 
-  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
+      !UseMaskForCond && !UseMaskForGaps) {
     unsigned NumElts = VecTy->getVectorNumElements();
     auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
 
@@ -562,7 +586,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   }
 
   return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+                                           Alignment, AddressSpace,
+                                           UseMaskForCond, UseMaskForGaps);
 }
 
 void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index e0cd2d8e26a6..2dd143d48a15 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -57,7 +57,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   const FeatureBitset InlineFeatureWhitelist = {
       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
-      ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+      ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
@@ -169,7 +169,9 @@ public:
 
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
-                                 unsigned AddressSpace);
+                                 unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
 
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index a5fbbbf26be9..3832b0112b87 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMFeatures.h"
+#include "InstPrinter/ARMInstPrinter.h"
 #include "Utils/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
@@ -631,6 +632,8 @@ public:
   void ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses, SMLoc IDLoc,
                         OperandVector &Operands);
 
+  void doBeforeLabelEmit(MCSymbol *Symbol) override;
+
   void onLabelParsed(MCSymbol *Symbol) override;
 };
 
@@ -3203,17 +3206,26 @@ public:
 } // end anonymous namespace.
 
 void ARMOperand::print(raw_ostream &OS) const {
+  auto RegName = [](unsigned Reg) {
+    if (Reg)
+      return ARMInstPrinter::getRegisterName(Reg);
+    else
+      return "noreg";
+  };
+
   switch (Kind) {
   case k_CondCode:
     OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
     break;
   case k_CCOut:
-    OS << "<ccout " << getReg() << ">";
+    OS << "<ccout " << RegName(getReg()) << ">";
     break;
   case k_ITCondMask: {
     static const char *const MaskStr[] = {
-      "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
-      "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+      "(invalid)", "(teee)", "(tee)", "(teet)",
+      "(te)",      "(tete)", "(tet)", "(tett)",
+      "(t)",       "(ttee)", "(tte)", "(ttet)",
+      "(tt)",      "(ttte)", "(ttt)", "(tttt)"
     };
     assert((ITMask.Mask & 0xf) == ITMask.Mask);
     OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
@@ -3247,13 +3259,25 @@ void ARMOperand::print(raw_ostream &OS) const {
     OS << "<ARM_TSB::" << TraceSyncBOptToString(getTraceSyncBarrierOpt()) << ">";
     break;
   case k_Memory:
-    OS << "<memory "
-       << " base:" << Memory.BaseRegNum;
+    OS << "<memory";
+    if (Memory.BaseRegNum)
+      OS << " base:" << RegName(Memory.BaseRegNum);
+    if (Memory.OffsetImm)
+      OS << " offset-imm:" << *Memory.OffsetImm;
+    if (Memory.OffsetRegNum)
+      OS << " offset-reg:" << (Memory.isNegative ? "-" : "")
+         << RegName(Memory.OffsetRegNum);
+    if (Memory.ShiftType != ARM_AM::no_shift) {
+      OS << " shift-type:" << ARM_AM::getShiftOpcStr(Memory.ShiftType);
+      OS << " shift-imm:" << Memory.ShiftImm;
+    }
+    if (Memory.Alignment)
+      OS << " alignment:" << Memory.Alignment;
     OS << ">";
     break;
   case k_PostIndexRegister:
     OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-")
-       << PostIdxReg.RegNum;
+       << RegName(PostIdxReg.RegNum);
     if (PostIdxReg.ShiftTy != ARM_AM::no_shift)
       OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " "
          << PostIdxReg.ShiftImm;
@@ -3269,23 +3293,21 @@ void ARMOperand::print(raw_ostream &OS) const {
     break;
   }
   case k_Register:
-    OS << "<register " << getReg() << ">";
+    OS << "<register " << RegName(getReg()) << ">";
     break;
   case k_ShifterImmediate:
     OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl")
        << " #" << ShifterImm.Imm << ">";
     break;
   case k_ShiftedRegister:
-    OS << "<so_reg_reg "
-       << RegShiftedReg.SrcReg << " "
-       << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
-       << " " << RegShiftedReg.ShiftReg << ">";
+    OS << "<so_reg_reg " << RegName(RegShiftedReg.SrcReg) << " "
+       << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy) << " "
+       << RegName(RegShiftedReg.ShiftReg) << ">";
     break;
   case k_ShiftedImmediate:
-    OS << "<so_reg_imm "
-       << RegShiftedImm.SrcReg << " "
-       << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
-       << " #" << RegShiftedImm.ShiftImm << ">";
+    OS << "<so_reg_imm " << RegName(RegShiftedImm.SrcReg) << " "
+       << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy) << " #"
+       << RegShiftedImm.ShiftImm << ">";
     break;
   case k_RotateImmediate:
     OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -3309,7 +3331,7 @@ void ARMOperand::print(raw_ostream &OS) const {
     const SmallVectorImpl<unsigned> &RegList = getRegList();
     for (SmallVectorImpl<unsigned>::const_iterator
            I = RegList.begin(), E = RegList.end(); I != E; ) {
-      OS << *I;
+      OS << RegName(*I);
       if (++I < E) OS << ", ";
     }
 
@@ -3318,15 +3340,15 @@ void ARMOperand::print(raw_ostream &OS) const {
   }
   case k_VectorList:
     OS << "<vector_list " << VectorList.Count << " * "
-       << VectorList.RegNum << ">";
+       << RegName(VectorList.RegNum) << ">";
     break;
   case k_VectorListAllLanes:
     OS << "<vector_list(all lanes) " << VectorList.Count << " * "
-       << VectorList.RegNum << ">";
+       << RegName(VectorList.RegNum) << ">";
     break;
   case k_VectorListIndexed:
     OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
-       << VectorList.Count << " * " << VectorList.RegNum << ">";
+       << VectorList.Count << " * " << RegName(VectorList.RegNum) << ">";
     break;
   case k_Token:
     OS << "'" << getToken() << "'";
@@ -5626,7 +5648,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
       Mnemonic == "bxns"  || Mnemonic == "blxns" ||
       Mnemonic == "vudot" || Mnemonic == "vsdot" ||
-      Mnemonic == "vcmla" || Mnemonic == "vcadd")
+      Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+      Mnemonic == "vfmal" || Mnemonic == "vfmsl")
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -5716,7 +5739,10 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
       (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
       Mnemonic == "vmovx" || Mnemonic == "vins" ||
       Mnemonic == "vudot" || Mnemonic == "vsdot" ||
-      Mnemonic == "vcmla" || Mnemonic == "vcadd") {
+      Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+      Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+      Mnemonic == "sb"    || Mnemonic == "ssbb"  ||
+      Mnemonic == "pssbb") {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
@@ -6819,6 +6845,26 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                                                "code specified");
     break;
   }
+  case ARM::DSB:
+  case ARM::t2DSB: {
+
+    if (Inst.getNumOperands() < 2)
+      break;
+
+    unsigned Option = Inst.getOperand(0).getImm();
+    unsigned Pred = Inst.getOperand(1).getImm();
+
+    // SSBB and PSSBB (DSB #0|#4) are not predicable (pred must be AL).
+    if (Option == 0 && Pred != ARMCC::AL)
+      return Error(Operands[1]->getStartLoc(),
+                   "instruction 'ssbb' is not predicable, but condition code "
+                   "specified");
+    if (Option == 4 && Pred != ARMCC::AL)
+      return Error(Operands[1]->getStartLoc(),
+                   "instruction 'pssbb' is not predicable, but condition code "
+                   "specified");
+    break;
+  }
   case ARM::VMOVRRS: {
     // Source registers must be sequential.
     const unsigned Sm = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -6837,6 +6883,15 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "destination operands must be sequential");
     break;
   }
+  case ARM::VLDMDIA:
+  case ARM::VSTMDIA: {
+    ARMOperand &Op = static_cast<ARMOperand&>(*Operands[3]);
+    auto &RegList = Op.getRegList();
+    if (RegList.size() < 1 || RegList.size() > 16)
+      return Error(Operands[3]->getStartLoc(),
+                   "list of registers must be at least 1 and at most 16");
+    break;
+  }
   }
 
   return false;
@@ -9122,33 +9177,9 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
 
   // Any arithmetic instruction which writes to the PC also terminates the IT
   // block.
-  for (unsigned OpIdx = 0; OpIdx < MCID.getNumDefs(); ++OpIdx) {
-    MCOperand &Op = Inst.getOperand(OpIdx);
-    if (Op.isReg() && Op.getReg() == ARM::PC)
-      return true;
-  }
-
-  if (MCID.hasImplicitDefOfPhysReg(ARM::PC, MRI))
+  if (MCID.hasDefOfPhysReg(Inst, ARM::PC, *MRI))
     return true;
 
-  // Instructions with variable operand lists, which write to the variable
-  // operands. We only care about Thumb instructions here, as ARM instructions
-  // obviously can't be in an IT block.
-  switch (Inst.getOpcode()) {
-  case ARM::tLDMIA:
-  case ARM::t2LDMIA:
-  case ARM::t2LDMIA_UPD:
-  case ARM::t2LDMDB:
-  case ARM::t2LDMDB_UPD:
-    if (listContainsReg(Inst, 3, ARM::PC))
-      return true;
-    break;
-  case ARM::tPOP:
-    if (listContainsReg(Inst, 2, ARM::PC))
-      return true;
-    break;
-  }
-
   return false;
 }
 
@@ -9255,6 +9286,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   switch (MatchResult) {
   case Match_Success:
+    LLVM_DEBUG(dbgs() << "Parsed as: ";
+               Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
+               dbgs() << "\n");
+
     // Context sensitive operand constraints aren't handled by the matcher,
     // so check them here.
     if (validateInstruction(Inst, Operands)) {
@@ -9272,7 +9307,9 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       // individual transformations can chain off each other. E.g.,
       // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
       while (processInstruction(Inst, Operands, Out))
-        ;
+        LLVM_DEBUG(dbgs() << "Changed to: ";
+                   Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
+                   dbgs() << "\n");
 
       // Only after the instruction is fully processed, we can validate it
       if (wasInITBlock && hasV8Ops() && isThumb() &&
@@ -9441,10 +9478,13 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
   return false;
 }
 
-void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+void ARMAsmParser::doBeforeLabelEmit(MCSymbol *Symbol) {
   // We need to flush the current implicit IT block on a label, because it is
   // not legal to branch into an IT block.
   flushPendingInstructions(getStreamer());
+}
+
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
   if (NextSymbolIsThumb) {
     getParser().getStreamer().EmitThumbFunc(Symbol);
     NextSymbolIsThumb = false;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index bfc32073ba18..2f84719c4c4f 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -273,6 +273,21 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::t2TSB:
     O << "\ttsb\tcsync";
     return;
+  case ARM::t2DSB:
+    switch (MI->getOperand(0).getImm()) {
+    default:
+      if (!printAliasInstr(MI, STI, O))
+        printInstruction(MI, STI, O);
+      break;
+    case 0:
+      O << "\tssbb";
+      break;
+    case 4:
+      O << "\tpssbb";
+      break;
+    }
+    printAnnotation(O, Annot);
+    return;
   }
 
   if (!printAliasInstr(MI, STI, O))
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index f472b2154314..e1ea5964cf67 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/bit.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
@@ -627,27 +628,22 @@ namespace ARM_AM {
   //
   inline float getFPImmFloat(unsigned Imm) {
     // We expect an 8-bit binary encoding of a floating-point number here.
-    union {
-      uint32_t I;
-      float F;
-    } FPUnion;
 
     uint8_t Sign = (Imm >> 7) & 0x1;
     uint8_t Exp = (Imm >> 4) & 0x7;
     uint8_t Mantissa = Imm & 0xf;
 
-    //   8-bit FP    iEEEE Float Encoding
+    //   8-bit FP    IEEE Float Encoding
     //   abcd efgh   aBbbbbbc defgh000 00000000 00000000
     //
     // where B = NOT(b);
-
-    FPUnion.I = 0;
-    FPUnion.I |= Sign << 31;
-    FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
-    FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
-    FPUnion.I |= (Exp & 0x3) << 23;
-    FPUnion.I |= Mantissa << 19;
-    return FPUnion.F;
+    uint32_t I = 0;
+    I |= Sign << 31;
+    I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
+    I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
+    I |= (Exp & 0x3) << 23;
+    I |= Mantissa << 19;
+    return bit_cast<float>(I);
   }
 
   /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index f524a0081301..c2a07d4ddcef 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -373,6 +373,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
   // interfere with checking valid expressions.
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
     if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) &&
+        A->getSymbol().isExternal() &&
         (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
          Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
          Kind == ARM::fixup_t2_movt_hi16))
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index beeb5dec4baf..33c32d5464af 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -248,6 +248,11 @@ namespace ARMII {
     /// just that part of the flag set.
     MO_OPTION_MASK = 0x3,
 
+    /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the ".refptrp.FOO" symbol.  This is used for
+    /// stub symbols on windows.
+    MO_COFFSTUB = 0x4,
+
     /// MO_GOT - On a symbol operand, this represents a GOT relative relocation.
     MO_GOT = 0x8,
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 7d04c73fb3f2..b8ba7584911b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -14,6 +14,7 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -40,6 +41,8 @@ namespace {
 
     bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
+
+    void addTargetSectionFlags(MCContext &Ctx, MCSectionELF &Sec) override;
   };
 
 } // end anonymous namespace
@@ -236,6 +239,21 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   }
 }
 
+void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx,
+                                               MCSectionELF &Sec) {
+  // The mix of execute-only and non-execute-only at link time is
+  // non-execute-only. To avoid the empty implicitly created .text
+  // section from making the whole .text section non-execute-only, we
+  // mark it execute-only if it is empty and there is at least one
+  // execute-only section in the object.
+  MCSectionELF *TextSection =
+      static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
+  if (Sec.getKind().isExecuteOnly() && !TextSection->hasInstructions() &&
+      !TextSection->hasData()) {
+    TextSection->setFlags(TextSection->getFlags() | ELF::SHF_ARM_PURECODE);
+  }
+}
+
 std::unique_ptr<MCObjectTargetWriter>
 llvm::createARMELFObjectWriter(uint8_t OSABI) {
   return llvm::make_unique<ARMELFObjectWriter>(OSABI);
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 3373d691db50..d3744fffac32 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -465,6 +465,11 @@ public:
   void emitPad(int64_t Offset);
   void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
   void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
+  void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+                SMLoc Loc) override {
+    EmitDataMappingSymbol();
+    MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+  }
 
   void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
     LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
@@ -861,6 +866,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
   case ARM::ArchKind::ARMV8_2A:
   case ARM::ArchKind::ARMV8_3A:
   case ARM::ArchKind::ARMV8_4A:
+  case ARM::ArchKind::ARMV8_5A:
     setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1071,7 +1077,7 @@ void ARMTargetELFStreamer::finishAttributeSection() {
   if (Contents.empty())
     return;
 
-  llvm::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+  llvm::sort(Contents, AttributeItem::LessTag);
 
   ARMELFStreamer &Streamer = getStreamer();
 
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 0cef683778e5..3ee63ac374b3 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -31,6 +31,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
 
   SupportsDebugInformation = true;
 
+  // Conditional Thumb 4-byte instructions can have an implicit IT.
+  MaxInstLength = 6;
+
   // Exceptions handling
   ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI())
                        ? ExceptionHandling::SjLj
@@ -56,6 +59,9 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
 
   SupportsDebugInformation = true;
 
+  // Conditional Thumb 4-byte instructions can have an implicit IT.
+  MaxInstLength = 6;
+
   // Exceptions handling
   switch (TheTriple.getOS()) {
   case Triple::NetBSD:
@@ -90,6 +96,9 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
   PrivateGlobalPrefix = "$M";
   PrivateLabelPrefix = "$M";
   CommentString = ";";
+
+  // Conditional Thumb 4-byte instructions can have an implicit IT.
+  MaxInstLength = 6;
 }
 
 void ARMCOFFMCAsmInfoGNU::anchor() { }
@@ -110,5 +119,7 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
 
   UseIntegratedAssembler = true;
   DwarfRegNumForCFI = false;
-}
 
+  // Conditional Thumb 4-byte instructions can have an implicit IT.
+  MaxInstLength = 6;
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 4b4956e914f2..0ced8195790d 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -22,6 +22,8 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ScopedPrinter.h"
+
 using namespace llvm;
 
 namespace {
@@ -144,6 +146,15 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
                                  MCValue Target,
                                  uint64_t &FixedValue) {
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+
+  if (FixupOffset & 0xff000000) {
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                 "can not encode offset '0x" +
+                                     to_hexString(FixupOffset) +
+                                     "' in resulting scattered relocation.");
+    return;
+  }
+
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned Type = MachO::ARM_RELOC_HALF;
 
@@ -250,6 +261,15 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
                                                     unsigned Log2Size,
                                                     uint64_t &FixedValue) {
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+
+  if (FixupOffset & 0xff000000) {
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                 "can not encode offset '0x" +
+                                     to_hexString(FixupOffset) +
+                                     "' in resulting scattered relocation.");
+    return;
+  }
+
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
 
   // See <reloc.h>.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 63aa9735e8a4..91836cff95c8 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/MC/ConstantPools.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 8ae713b7b489..30cbde1ca71f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -75,8 +75,8 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx,
   case ARM::fixup_t2_condbranch:
     return COFF::IMAGE_REL_ARM_BRANCH20T;
   case ARM::fixup_t2_uncondbranch:
-    return COFF::IMAGE_REL_ARM_BRANCH24T;
   case ARM::fixup_arm_thumb_bl:
+    return COFF::IMAGE_REL_ARM_BRANCH24T;
   case ARM::fixup_arm_thumb_blx:
     return COFF::IMAGE_REL_ARM_BLX23T;
   case ARM::fixup_t2_movw_lo16:
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1a91a7030657..d567d3339049 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -146,9 +146,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 
-  if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
-      RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
-      RC == &ARM::GPRnopcRegClass) {
+  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
     BuildMI(MBB, I, DL, get(ARM::t2STRi12))
         .addReg(SrcReg, getKillRegState(isKill))
         .addFrameIndex(FI)
@@ -190,9 +188,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
-      RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
-      RC == &ARM::GPRnopcRegClass) {
+  if (ARM::GPRRegClass.hasSubClassEq(RC)) {
     BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
         .addFrameIndex(FI)
         .addImm(0)
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index abf54ba7e87c..65889fc4e28b 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -122,6 +122,7 @@ namespace {
   { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
   { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
   { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2TEQrr, ARM::tEOR,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
   { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
   { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
   { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
@@ -485,7 +486,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
                    .addReg(Rt, IsStore ? 0 : RegState::Define);
 
     // Transfer memoperands.
-    MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+    MIB.setMemRefs(MI->memoperands());
 
     // Transfer MI flags.
     MIB.setMIFlags(MI->getFlags());
@@ -605,7 +606,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     MIB.add(MI->getOperand(OpNum));
 
   // Transfer memoperands.
-  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MIB.setMemRefs(MI->memoperands());
 
   // Transfer MI flags.
   MIB.setMIFlags(MI->getFlags());
@@ -717,6 +718,16 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       return true;
     return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
   }
+  case ARM::t2TEQrr: {
+    unsigned PredReg = 0;
+    // Can only convert to eors if we're not in an IT block.
+    if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
+      break;
+    // TODO if Operand 0 is not killed but Operand 1 is, then we could write
+    // to Op1 instead.
+    if (MI->getOperand(0).isKill())
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+  }
   }
   return false;
 }
@@ -903,9 +914,24 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
-  MIB.add(MI->getOperand(0));
-  if (NewMCID.hasOptionalDef())
-    MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+
+  // TEQ is special in that it doesn't define a register but we're converting
+  // it into an EOR which does. So add the first operand as a def and then
+  // again as a use.
+  if (MCID.getOpcode() == ARM::t2TEQrr) {
+    MIB.add(MI->getOperand(0));
+    MIB->getOperand(0).setIsKill(false);
+    MIB->getOperand(0).setIsDef(true);
+    MIB->getOperand(0).setIsDead(true);
+
+    if (NewMCID.hasOptionalDef())
+      MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+    MIB.add(MI->getOperand(0));
+  } else {
+    MIB.add(MI->getOperand(0));
+    if (NewMCID.hasOptionalDef())
+      MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+  }
 
   // Transfer the rest of operands.
   unsigned NumOps = MCID.getNumOperands();
diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index fec7080081d0..536a54759c77 100644
--- a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -568,8 +568,8 @@ bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
     llvm_unreachable("Unknown operand type!");
   }
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -617,8 +617,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
     buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
   }
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -648,8 +648,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
     .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
     .addReg(SrcReg, RegState::Kill);
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -679,8 +679,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
     .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
     .addReg(SrcReg, RegState::Kill);
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -734,8 +734,8 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
     buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
   }
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -782,8 +782,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
     buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
   }
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1003,8 +1003,8 @@ bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
   MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
   MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1031,8 +1031,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
     .addImm(1)
     .addReg(SrcHiReg, getKillRegState(SrcIsKill));
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1065,8 +1065,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
     .addReg(SrcHiReg, getKillRegState(SrcIsKill))
     .addImm(Imm);
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1099,8 +1099,8 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
     .addReg(SrcLoReg, getKillRegState(SrcIsKill))
     .addImm(Imm);
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1133,8 +1133,8 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
     .addImm(Imm + 1)
     .addReg(SrcHiReg, getKillRegState(SrcIsKill));
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1163,8 +1163,8 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
     .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
     .addImm(Imm + 1);
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1194,8 +1194,8 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
     .addImm(Imm)
     .addReg(SrcLoReg, getKillRegState(SrcIsKill));
 
-  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBLO.setMemRefs(MI.memoperands());
+  MIBHI.setMemRefs(MI.memoperands());
 
   MI.eraseFromParent();
   return true;
@@ -1251,24 +1251,26 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
   bool DstIsDead = MI.getOperand(0).isDead();
   bool DstIsKill = MI.getOperand(1).isKill();
   bool ImpIsDead = MI.getOperand(2).isDead();
-  OpLo = AVR::LSLRd;
-  OpHi = AVR::ROLRd;
+  OpLo = AVR::ADDRdRr; // ADD Rd, Rd <==> LSL Rd
+  OpHi = AVR::ADCRdRr; // ADC Rd, Rd <==> ROL Rd
   TRI->splitReg(DstReg, DstLoReg, DstHiReg);
 
   // Low part
   buildMI(MBB, MBBI, OpLo)
     .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg)
     .addReg(DstLoReg, getKillRegState(DstIsKill));
 
   auto MIBHI = buildMI(MBB, MBBI, OpHi)
     .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg)
     .addReg(DstHiReg, getKillRegState(DstIsKill));
 
   if (ImpIsDead)
-    MIBHI->getOperand(2).setIsDead();
+    MIBHI->getOperand(3).setIsDead();
 
   // SREG is always implicitly killed
-  MIBHI->getOperand(3).setIsKill();
+  MIBHI->getOperand(4).setIsKill();
 
   MI.eraseFromParent();
   return true;
@@ -1387,8 +1389,9 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
       .addReg(SrcReg, getKillRegState(SrcIsKill));
   }
 
-  buildMI(MBB, MBBI, AVR::LSLRd)
+  buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr
     .addReg(DstHiReg, RegState::Define)
+    .addReg(DstHiReg)
     .addReg(DstHiReg, RegState::Kill);
 
   auto SBC = buildMI(MBB, MBBI, AVR::SBCRdRr)
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index b0b23effc6c6..85abf42eaa67 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -350,9 +350,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) {
   SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, MVT::Other, Ops);
 
   // Transfer memory operands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = ST->getMemOperand();
-  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {ST->getMemOperand()});
 
   ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
   CurDAG->RemoveDeadNode(N);
@@ -407,9 +405,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
   }
 
   // Transfer memory operands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = LD->getMemOperand();
-  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {LD->getMemOperand()});
 
   ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
   ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 1b412a9c6813..57fc978b54bb 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1430,6 +1430,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
                                                   MachineBasicBlock *BB) const {
   unsigned Opc;
   const TargetRegisterClass *RC;
+  bool HasRepeatedOperand = false;
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
@@ -1440,8 +1441,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
   default:
     llvm_unreachable("Invalid shift opcode!");
   case AVR::Lsl8:
-    Opc = AVR::LSLRd;
+    Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
     RC = &AVR::GPR8RegClass;
+    HasRepeatedOperand = true;
     break;
   case AVR::Lsl16:
     Opc = AVR::LSLWRd;
@@ -1464,8 +1466,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
     RC = &AVR::DREGSRegClass;
     break;
   case AVR::Rol8:
-    Opc = AVR::ROLRd;
+    Opc = AVR::ADCRdRr; // ROL is an alias of ADC Rd, Rd
     RC = &AVR::GPR8RegClass;
+    HasRepeatedOperand = true;
     break;
   case AVR::Rol16:
     Opc = AVR::ROLWRd;
@@ -1535,7 +1538,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
       .addMBB(BB)
       .addReg(ShiftAmtReg2)
       .addMBB(LoopBB);
-  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+
+  auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+  if (HasRepeatedOperand)
+    ShiftMI.addReg(ShiftReg);
+
   BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2)
       .addReg(ShiftAmtReg)
       .addImm(1);
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
index a2129cc0e2e9..5720af7d8df6 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -730,15 +730,15 @@ Defs = [SREG] in
 // TST Rd
 // Test for zero of minus.
 // This operation is identical to a `Rd AND Rd`.
-//def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd), 1>;
+def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd)>;
 
-let Defs = [SREG] in
-def TSTRd : FTST<0b0010,
-                  0b00,
-                  (outs),
-                  (ins GPR8:$rd),
-                  "tst\t$rd",
-                  [(AVRtst i8:$rd)]>;
+// SBR Rd, K
+//
+// Mnemonic alias to 'ORI Rd, K'. Same bit pattern, same operands,
+// same everything.
+def : InstAlias<"sbr\t$rd, $k",
+                (ORIRdK LD8:$rd, imm_ldi8:$k),
+                /* Disable display, so we don't override ORI */ 0>;
 
 //===----------------------------------------------------------------------===//
 // Jump instructions
@@ -1222,7 +1222,7 @@ isReMaterializable = 1 in
   // ldd Rd,   P+q
   // ldd Rd+1, P+q+1
   let Constraints = "@earlyclobber $dst" in
-  def LDDWRdPtrQ : Pseudo<(outs DREGS:$dst),
+  def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_Z_WORKAROUND:$dst),
                           (ins memri:$memri),
                           "lddw\t$dst, $memri",
                           [(set i16:$dst, (load addr:$memri))]>,
@@ -1632,12 +1632,7 @@ def LATZRd : FZRd<0b111,
 let Constraints = "$src = $rd",
 Defs = [SREG] in
 {
-  def LSLRd : FRdRr<0b0000,
-                    0b11,
-                    (outs GPR8:$rd),
-                    (ins GPR8:$src),
-                    "lsl\t$rd",
-                    [(set i8:$rd, (AVRlsl i8:$src)), (implicit SREG)]>;
+  // 8-bit LSL is an alias of ADD Rd, Rd
 
   def LSLWRd : Pseudo<(outs DREGS:$rd),
                       (ins DREGS:$src),
@@ -1671,12 +1666,7 @@ Defs = [SREG] in
   // Bit rotate operations.
   let Uses = [SREG] in
   {
-    def ROLRd : FRdRr<0b0001,
-                      0b11,
-                      (outs GPR8:$rd),
-                      (ins GPR8:$src),
-                      "rol\t$rd",
-                      [(set i8:$rd, (AVRrol i8:$src)), (implicit SREG)]>;
+    // 8-bit ROL is an alias of ADC Rd, Rd
 
     def ROLWRd : Pseudo<(outs DREGS:$rd),
                         (ins DREGS:$src),
@@ -1743,15 +1733,6 @@ def BLD : FRdB<0b00,
 let Constraints = "$src = $rd",
 Defs = [SREG] in
 {
-  // SBR Rd, K
-  // Alias for ORI Rd, K
-  def SBRRdK : FRdK<0b0110,
-                    (outs LD8:$rd),
-                    (ins LD8:$src, imm_ldi8:$k),
-                    "sbr\t$rd, $k",
-                    [(set i8:$rd, (or i8:$src, imm:$k)),
-                     (implicit SREG)]>;
-
   // CBR Rd, K
   // Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
   // FIXME: This uses the 'complement' encoder. We need it to also use the
@@ -1769,6 +1750,14 @@ Defs = [SREG] in
 // Clears all bits in a register.
 def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8:$rd, GPR8:$rd)>;
 
+// LSL Rd
+// Alias for ADD Rd, Rd
+// --------------
+// Logical shift left one bit.
+def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+
+def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8:$rd, GPR8:$rd)>;
+
 // SER Rd
 // Alias for LDI Rd, 0xff
 // ---------
@@ -2107,3 +2096,13 @@ def : Pat<(i8 (trunc (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr
 def : Pat<(shl i16:$src1, (i8 1)),
           (LSLWRd i16:$src1)>;
 
+// Lowering of 'tst' node to 'TST' instruction.
+// TST is an alias of AND Rd, Rd.
+def : Pat<(AVRtst i8:$rd),
+          (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+
+// Lowering of 'lsl' node to 'LSL' instruction.
+// LSL is an alias of 'ADD Rd, Rd'
+def : Pat<(AVRlsl i8:$rd),
+          (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index d171a620760e..808a85e459c1 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -152,6 +152,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.getOpcode() == AVR::FRMIDX) {
     MI.setDesc(TII.get(AVR::MOVWRdRr));
     MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false);
+    MI.RemoveOperand(2);
 
     assert(Offset > 0 && "Invalid offset");
 
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
index 8162f12052be..d55252bcac46 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -157,6 +157,26 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
     R9R8, R7R6, R5R4, R3R2, R1R0
   )>;
 
+// The 16-bit DREGS register class, excluding the Z pointer register.
+//
+// This is used by instructions which cause high pointer register
+// contention which leads to an assertion in the register allocator.
+//
+// There is no technical reason why instructions that use this class
+// cannot use Z; it's simply a workaround a regalloc bug.
+//
+// More information can be found in PR39553.
+def DREGS_WITHOUT_Z_WORKAROUND : RegisterClass<"AVR", [i16], 8,
+  (
+    // Return value and arguments.
+    add R25R24, R19R18, R21R20, R23R22,
+    // Scratch registers.
+    R27R26,
+    // Callee saved registers.
+    R29R28, R17R16, R15R14, R13R12, R11R10,
+    R9R8, R7R6, R5R4, R3R2, R1R0
+  )>;
+
 // 16-bit register class for immediate instructions.
 def DLDREGS : RegisterClass<"AVR", [i16], 8,
   (
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 74300d9a451c..9828cdab68c3 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -40,12 +40,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return RM.hasValue() ? *RM : Reloc::Static;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
                                    StringRef CPU, StringRef FS,
                                    const TargetOptions &Options,
@@ -53,8 +47,8 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
                                    Optional<CodeModel::Model> CM,
                                    CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(T, AVRDataLayout, TT, getCPU(CPU), FS, Options,
-                        getEffectiveRelocModel(RM), getEffectiveCodeModel(CM),
-                        OL),
+                        getEffectiveRelocModel(RM),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       SubTarget(TT, getCPU(CPU), FS, *this) {
   this->TLOF = make_unique<AVRTargetObjectFile>();
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index d57cc098497f..f2bb59265271 100644
--- a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -34,8 +34,9 @@
 
 #define DEBUG_TYPE "avr-asm-parser"
 
-namespace llvm {
+using namespace llvm;
 
+namespace {
 /// Parses AVR assembly from a stream.
 class AVRAsmParser : public MCTargetAsmParser {
   const MCSubtargetInfo &STI;
@@ -245,6 +246,8 @@ public:
   }
 };
 
+} // end anonymous namespace.
+
 // Auto-generated Match Functions
 
 /// Maps from the set of all register names to a register number.
@@ -510,6 +513,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) {
     case AsmToken::Real:
       if (!tryParseExpression(Operands))
         return false;
+      break;
     default:
       break;
     }
@@ -708,5 +712,3 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
   }
   return Match_InvalidOperand;
 }
-
-} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 496f2befde58..8890fb8adf4d 100644
--- a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -357,8 +357,8 @@ BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
   case AsmToken::Plus: {
     if (getLexer().peekTok().is(AsmToken::Integer))
       return MatchOperand_NoMatch;
+    LLVM_FALLTHROUGH;
   }
-  // Fall through.
 
   case AsmToken::Equal:
   case AsmToken::Greater:
diff --git a/contrib/llvm/lib/Target/BPF/BPF.h b/contrib/llvm/lib/Target/BPF/BPF.h
index 76d3e1ca5f6f..9749e369c2c1 100644
--- a/contrib/llvm/lib/Target/BPF/BPF.h
+++ b/contrib/llvm/lib/Target/BPF/BPF.h
@@ -19,9 +19,11 @@ class BPFTargetMachine;
 FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
 FunctionPass *createBPFMIPeepholePass();
 FunctionPass *createBPFMIPreEmitPeepholePass();
+FunctionPass *createBPFMIPreEmitCheckingPass();
 
 void initializeBPFMIPeepholePass(PassRegistry&);
 void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
+void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 705211b486bf..ada5eb923f40 100644
--- a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "BPFInstrInfo.h"
 #include "BPFMCInstLower.h"
 #include "BPFTargetMachine.h"
+#include "BTFDebug.h"
 #include "InstPrinter/BPFInstPrinter.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -40,6 +41,7 @@ public:
       : AsmPrinter(TM, std::move(Streamer)) {}
 
   StringRef getPassName() const override { return "BPF Assembly Printer"; }
+  bool doInitialization(Module &M) override;
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
@@ -52,6 +54,18 @@ public:
 };
 } // namespace
 
+bool BPFAsmPrinter::doInitialization(Module &M) {
+  AsmPrinter::doInitialization(M);
+
+  if (MAI->doesSupportDebugInformation()) {
+    Handlers.push_back(HandlerInfo(new BTFDebug(this), "emit",
+                                   "Debug Info Emission", "BTF",
+                                   "BTF Emission"));
+  }
+
+  return false;
+}
+
 void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNum);
diff --git a/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp b/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp
new file mode 100644
index 000000000000..0a311378e777
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -0,0 +1,96 @@
+//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs checking to signal errors for certain illegal usages at
+// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
+// not be used. The pass is done at the PreEmit pass right before the
+// machine code is emitted at which point the register liveness information
+// is still available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFInstrInfo.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-mi-checking"
+
+namespace {
+
+struct BPFMIPreEmitChecking : public MachineFunctionPass {
+
+  static char ID;
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+
+  BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
+    initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize class variables.
+  void initialize(MachineFunction &MFParm);
+
+  void checkingIllegalXADD(void);
+
+public:
+
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (!skipFunction(MF.getFunction())) {
+      initialize(MF);
+      checkingIllegalXADD();
+    }
+    return false;
+  }
+};
+
+// Initialize class variables.
+void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+  LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
+}
+
+void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+      if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
+        continue;
+
+      LLVM_DEBUG(MI.dump());
+      if (!MI.allDefsAreDead()) {
+        DebugLoc Empty;
+        const DebugLoc &DL = MI.getDebugLoc();
+        if (DL != Empty)
+          report_fatal_error("line " + std::to_string(DL.getLine()) +
+                             ": Invalid usage of the XADD return value", false);
+        else
+          report_fatal_error("Invalid usage of the XADD return value", false);
+      }
+    }
+  }
+
+  return;
+}
+
+} // end default namespace
+
+INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
+                "BPF PreEmit Checking", false, false)
+
+char BPFMIPreEmitChecking::ID = 0;
+FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
+{
+  return new BPFMIPreEmitChecking();
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
index bb0d6bcf5450..4202850e9eb9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h
@@ -29,8 +29,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 84d89bff74fe..350465b118ed 100644
--- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -51,12 +51,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
                                    StringRef CPU, StringRef FS,
                                    const TargetOptions &Options,
@@ -64,13 +58,14 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
                                    Optional<CodeModel::Model> CM,
                                    CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
-                        getEffectiveRelocModel(RM), getEffectiveCodeModel(CM),
-                        OL),
+                        getEffectiveRelocModel(RM),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
 
-  BPFMCAsmInfo *MAI = static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo));
+  BPFMCAsmInfo *MAI =
+      static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo.get()));
   MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS());
 }
 namespace {
@@ -115,6 +110,7 @@ void BPFPassConfig::addMachineSSAOptimization() {
 void BPFPassConfig::addPreEmitPass() {
   const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl();
 
+  addPass(createBPFMIPreEmitCheckingPass());
   if (getOptLevel() != CodeGenOpt::None)
     if (Subtarget->getHasAlu32() && !DisableMIPeephole)
       addPass(createBPFMIPreEmitPeepholePass());
diff --git a/contrib/llvm/lib/Target/BPF/BTF.def b/contrib/llvm/lib/Target/BPF/BTF.def
new file mode 100644
index 000000000000..54c5bc3cf092
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTF.def
@@ -0,0 +1,33 @@
+//===- BTF.def - BTF definitions --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for BTF.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(HANDLE_BTF_KIND)
+#error "Missing macro definition of HANDLE_BTF_*"
+#endif
+
+HANDLE_BTF_KIND(0, UNKN)
+HANDLE_BTF_KIND(1, INT)
+HANDLE_BTF_KIND(2, PTR)
+HANDLE_BTF_KIND(3, ARRAY)
+HANDLE_BTF_KIND(4, STRUCT)
+HANDLE_BTF_KIND(5, UNION)
+HANDLE_BTF_KIND(6, ENUM)
+HANDLE_BTF_KIND(7, FWD)
+HANDLE_BTF_KIND(8, TYPEDEF)
+HANDLE_BTF_KIND(9, VOLATILE)
+HANDLE_BTF_KIND(10, CONST)
+HANDLE_BTF_KIND(11, RESTRICT)
+HANDLE_BTF_KIND(12, FUNC)
+HANDLE_BTF_KIND(13, FUNC_PROTO)
+
+#undef HANDLE_BTF_KIND
diff --git a/contrib/llvm/lib/Target/BPF/BTF.h b/contrib/llvm/lib/Target/BPF/BTF.h
new file mode 100644
index 000000000000..1e1680faf1b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTF.h
@@ -0,0 +1,209 @@
+//===-- BTF.h --------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the layout of .BTF and .BTF.ext ELF sections.
+///
+/// The binary layout for .BTF section:
+///   struct Header
+///   Type and Str subsections
+/// The Type subsection is a collection of types with type id starting with 1.
+/// The Str subsection is simply a collection of strings.
+///
+/// The binary layout for .BTF.ext section:
+///   struct ExtHeader
+///   FuncInfo and LineInfo subsections
+/// The FuncInfo subsection is defined as below:
+///   BTFFuncInfo Size
+///   struct SecFuncInfo for ELF section #1
+///   A number of struct BPFFuncInfo for ELF section #1
+///   struct SecFuncInfo for ELF section #2
+///   A number of struct BPFFuncInfo for ELF section #2
+///   ...
+/// The LineInfo subsection is defined as below:
+///   BPFLineInfo Size
+///   struct SecLineInfo for ELF section #1
+///   A number of struct BPFLineInfo for ELF section #1
+///   struct SecLineInfo for ELF section #2
+///   A number of struct BPFLineInfo for ELF section #2
+///   ...
+///
+/// The section formats are also defined at
+///    https://github.com/torvalds/linux/blob/master/include/uapi/linux/btf.h
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BTF_H
+#define LLVM_LIB_TARGET_BPF_BTF_H
+
+namespace llvm {
+namespace BTF {
+
+enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 };
+
+/// Sizes in bytes of various things in the BTF format.
+enum {
+  HeaderSize = 24,
+  ExtHeaderSize = 24,
+  CommonTypeSize = 12,
+  BTFArraySize = 12,
+  BTFEnumSize = 8,
+  BTFMemberSize = 12,
+  BTFParamSize = 8,
+  SecFuncInfoSize = 8,
+  SecLineInfoSize = 8,
+  BPFFuncInfoSize = 8,
+  BPFLineInfoSize = 16
+};
+
+/// The .BTF section header definition.
+struct Header {
+  uint16_t Magic;  ///< Magic value
+  uint8_t Version; ///< Version number
+  uint8_t Flags;   ///< Extra flags
+  uint32_t HdrLen; ///< Length of this header
+
+  /// All offsets are in bytes relative to the end of this header.
+  uint32_t TypeOff; ///< Offset of type section
+  uint32_t TypeLen; ///< Length of type section
+  uint32_t StrOff;  ///< Offset of string section
+  uint32_t StrLen;  ///< Length of string section
+};
+
+enum : uint32_t {
+  MAX_VLEN = 0xffff         ///< Max # of struct/union/enum members or func args
+};
+
+enum TypeKinds : uint8_t {
+#define HANDLE_BTF_KIND(ID, NAME) BTF_KIND_##NAME = ID,
+#include "BTF.def"
+};
+
+/// The BTF common type definition. Different kinds may have
+/// additional information after this structure data.
+struct CommonType {
+  /// Type name offset in the string table.
+  uint32_t NameOff;
+
+  /// "Info" bits arrangement:
+  /// Bits  0-15: vlen (e.g. # of struct's members)
+  /// Bits 16-23: unused
+  /// Bits 24-27: kind (e.g. int, ptr, array...etc)
+  /// Bits 28-30: unused
+  /// Bit     31: kind_flag, currently used by
+  ///             struct, union and fwd
+  uint32_t Info;
+
+  /// "Size" is used by INT, ENUM, STRUCT and UNION.
+  /// "Size" tells the size of the type it is describing.
+  ///
+  /// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+  /// FUNC and FUNC_PROTO.
+  /// "Type" is a type_id referring to another type.
+  union {
+    uint32_t Size;
+    uint32_t Type;
+  };
+};
+
+// For some specific BTF_KIND, "struct CommonType" is immediately
+// followed by extra data.
+
+// BTF_KIND_INT is followed by a u32 and the following
+// is the 32 bits arrangement:
+// BTF_INT_ENCODING(VAL) : (((VAL) & 0x0f000000) >> 24)
+// BTF_INT_OFFSET(VAL) : (((VAL & 0x00ff0000)) >> 16)
+// BTF_INT_BITS(VAL) : ((VAL) & 0x000000ff)
+
+/// Attributes stored in the INT_ENCODING.
+enum : uint8_t { INT_SIGNED = (1 << 0), INT_CHAR = (1 << 1), INT_BOOL = (1 << 2) };
+
+/// BTF_KIND_ENUM is followed by multiple "struct BTFEnum".
+/// The exact number of btf_enum is stored in the vlen (of the
+/// info in "struct CommonType").
+struct BTFEnum {
+  uint32_t NameOff; ///< Enum name offset in the string table
+  int32_t Val;      ///< Enum member value
+};
+
+/// BTF_KIND_ARRAY is followed by one "struct BTFArray".
+struct BTFArray {
+  uint32_t ElemType;  ///< Element type
+  uint32_t IndexType; ///< Index type
+  uint32_t Nelems;    ///< Number of elements for this array
+};
+
+/// BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+/// by multiple "struct BTFMember".  The exact number
+/// of BTFMember is stored in the vlen (of the info in
+/// "struct CommonType").
+///
+/// If the struct/union contains any bitfield member,
+/// the Offset below represents BitOffset (bits 0 - 23)
+/// and BitFieldSize(bits 24 - 31) with BitFieldSize = 0
+/// for non bitfield members. Otherwise, the Offset
+/// represents the BitOffset.
+struct BTFMember {
+  uint32_t NameOff; ///< Member name offset in the string table
+  uint32_t Type;    ///< Member type
+  uint32_t Offset;  ///< BitOffset or BitFieldSize+BitOffset
+};
+
+/// BTF_KIND_FUNC_PROTO are followed by multiple "struct BTFParam".
+/// The exist number of BTFParam is stored in the vlen (of the info
+/// in "struct CommonType").
+struct BTFParam {
+  uint32_t NameOff;
+  uint32_t Type;
+};
+
+/// The .BTF.ext section header definition.
+struct ExtHeader {
+  uint16_t Magic;
+  uint8_t Version;
+  uint8_t Flags;
+  uint32_t HdrLen;
+
+  uint32_t FuncInfoOff; ///< Offset of func info section
+  uint32_t FuncInfoLen; ///< Length of func info section
+  uint32_t LineInfoOff; ///< Offset of line info section
+  uint32_t LineInfoLen; ///< Length of line info section
+};
+
+/// Specifying one function info.
+struct BPFFuncInfo {
+  uint32_t InsnOffset; ///< Byte offset in the section
+  uint32_t TypeId;     ///< Type id referring to .BTF type section
+};
+
+/// Specifying function info's in one section.
+struct SecFuncInfo {
+  uint32_t SecNameOff;  ///< Section name index in the .BTF string table
+  uint32_t NumFuncInfo; ///< Number of func info's in this section
+};
+
+/// Specifying one line info.
+struct BPFLineInfo {
+  uint32_t InsnOffset;  ///< Byte offset in this section
+  uint32_t FileNameOff; ///< File name index in the .BTF string table
+  uint32_t LineOff;     ///< Line index in the .BTF string table
+  uint32_t LineCol;     ///< Line num: line_col >> 10,
+                        ///  col num: line_col & 0x3ff
+};
+
+/// Specifying line info's in one section.
+struct SecLineInfo {
+  uint32_t SecNameOff;  ///< Section name index in the .BTF string tble
+  uint32_t NumLineInfo; ///< Number of line info's in this section
+};
+
+} // End namespace BTF.
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp
new file mode 100644
index 000000000000..96efea4ba8ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -0,0 +1,759 @@
+//===- BTFDebug.cpp - BTF Generator ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing BTF debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BTFDebug.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include <fstream>
+#include <sstream>
+
+using namespace llvm;
+
+static const char *BTFKindStr[] = {
+#define HANDLE_BTF_KIND(ID, NAME) "BTF_KIND_" #NAME,
+#include "BTF.def"
+};
+
+/// Emit a BTF common type.
+void BTFTypeBase::emitType(MCStreamer &OS) {
+  OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
+                ")");
+  OS.EmitIntValue(BTFType.NameOff, 4);
+  OS.AddComment("0x" + Twine::utohexstr(BTFType.Info));
+  OS.EmitIntValue(BTFType.Info, 4);
+  OS.EmitIntValue(BTFType.Size, 4);
+}
+
+BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
+    : DTy(DTy) {
+  switch (Tag) {
+  case dwarf::DW_TAG_pointer_type:
+    Kind = BTF::BTF_KIND_PTR;
+    break;
+  case dwarf::DW_TAG_const_type:
+    Kind = BTF::BTF_KIND_CONST;
+    break;
+  case dwarf::DW_TAG_volatile_type:
+    Kind = BTF::BTF_KIND_VOLATILE;
+    break;
+  case dwarf::DW_TAG_typedef:
+    Kind = BTF::BTF_KIND_TYPEDEF;
+    break;
+  case dwarf::DW_TAG_restrict_type:
+    Kind = BTF::BTF_KIND_RESTRICT;
+    break;
+  default:
+    llvm_unreachable("Unknown DIDerivedType Tag");
+  }
+  BTFType.Info = Kind << 24;
+}
+
+void BTFTypeDerived::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(DTy->getName());
+
+  // The base type for PTR/CONST/VOLATILE could be void.
+  const DIType *ResolvedType = DTy->getBaseType().resolve();
+  if (!ResolvedType) {
+    assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST ||
+            Kind == BTF::BTF_KIND_VOLATILE) &&
+           "Invalid null basetype");
+    BTFType.Type = 0;
+  } else {
+    BTFType.Type = BDebug.getTypeId(ResolvedType);
+  }
+}
+
+void BTFTypeDerived::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+/// Represent a struct/union forward declaration.
+BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
+  Kind = BTF::BTF_KIND_FWD;
+  BTFType.Info = IsUnion << 31 | Kind << 24;
+  BTFType.Type = 0;
+}
+
+void BTFTypeFwd::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeFwd::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+BTFTypeInt::BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits,
+                       uint32_t OffsetInBits, StringRef TypeName)
+    : Name(TypeName) {
+  // Translate IR int encoding to BTF int encoding.
+  uint8_t BTFEncoding;
+  switch (Encoding) {
+  case dwarf::DW_ATE_boolean:
+    BTFEncoding = BTF::INT_BOOL;
+    break;
+  case dwarf::DW_ATE_signed:
+  case dwarf::DW_ATE_signed_char:
+    BTFEncoding = BTF::INT_SIGNED;
+    break;
+  case dwarf::DW_ATE_unsigned:
+  case dwarf::DW_ATE_unsigned_char:
+    BTFEncoding = 0;
+    break;
+  default:
+    llvm_unreachable("Unknown BTFTypeInt Encoding");
+  }
+
+  Kind = BTF::BTF_KIND_INT;
+  BTFType.Info = Kind << 24;
+  BTFType.Size = roundupToBytes(SizeInBits);
+  IntVal = (BTFEncoding << 24) | OffsetInBits << 16 | SizeInBits;
+}
+
+void BTFTypeInt::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeInt::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+  OS.AddComment("0x" + Twine::utohexstr(IntVal));
+  OS.EmitIntValue(IntVal, 4);
+}
+
+BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
+  Kind = BTF::BTF_KIND_ENUM;
+  BTFType.Info = Kind << 24 | VLen;
+  BTFType.Size = roundupToBytes(ETy->getSizeInBits());
+}
+
+void BTFTypeEnum::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(ETy->getName());
+
+  DINodeArray Elements = ETy->getElements();
+  for (const auto Element : Elements) {
+    const auto *Enum = cast<DIEnumerator>(Element);
+
+    struct BTF::BTFEnum BTFEnum;
+    BTFEnum.NameOff = BDebug.addString(Enum->getName());
+    // BTF enum value is 32bit, enforce it.
+    BTFEnum.Val = static_cast<uint32_t>(Enum->getValue());
+    EnumValues.push_back(BTFEnum);
+  }
+}
+
+void BTFTypeEnum::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+  for (const auto &Enum : EnumValues) {
+    OS.EmitIntValue(Enum.NameOff, 4);
+    OS.EmitIntValue(Enum.Val, 4);
+  }
+}
+
+BTFTypeArray::BTFTypeArray(const DICompositeType *ATy) : ATy(ATy) {
+  Kind = BTF::BTF_KIND_ARRAY;
+  BTFType.Info = Kind << 24;
+}
+
+/// Represent a BTF array. BTF does not record array dimensions,
+/// so conceptually a BTF array is a one-dimensional array.
+void BTFTypeArray::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(ATy->getName());
+  BTFType.Size = 0;
+
+  auto *BaseType = ATy->getBaseType().resolve();
+  ArrayInfo.ElemType = BDebug.getTypeId(BaseType);
+
+  // The IR does not really have a type for the index.
+  // A special type for array index should have been
+  // created during initial type traversal. Just
+  // retrieve that type id.
+  ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
+
+  // Get the number of array elements.
+  // If the array size is 0, set the number of elements as 0.
+  // Otherwise, recursively traverse the base types to
+  // find the element size. The number of elements is
+  // the totoal array size in bits divided by
+  // element size in bits.
+  uint64_t ArraySizeInBits = ATy->getSizeInBits();
+  if (!ArraySizeInBits) {
+    ArrayInfo.Nelems = 0;
+  } else {
+    uint32_t BaseTypeSize = BaseType->getSizeInBits();
+    while (!BaseTypeSize) {
+      const auto *DDTy = cast<DIDerivedType>(BaseType);
+      BaseType = DDTy->getBaseType().resolve();
+      assert(BaseType);
+      BaseTypeSize = BaseType->getSizeInBits();
+    }
+    ArrayInfo.Nelems = ATy->getSizeInBits() / BaseTypeSize;
+  }
+}
+
+void BTFTypeArray::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+  OS.EmitIntValue(ArrayInfo.ElemType, 4);
+  OS.EmitIntValue(ArrayInfo.IndexType, 4);
+  OS.EmitIntValue(ArrayInfo.Nelems, 4);
+}
+
+/// Represent either a struct or a union.
+BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
+                             bool HasBitField, uint32_t Vlen)
+    : STy(STy), HasBitField(HasBitField) {
+  Kind = IsStruct ? BTF::BTF_KIND_STRUCT : BTF::BTF_KIND_UNION;
+  BTFType.Size = roundupToBytes(STy->getSizeInBits());
+  BTFType.Info = (HasBitField << 31) | (Kind << 24) | Vlen;
+}
+
+void BTFTypeStruct::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(STy->getName());
+
+  // Add struct/union members.
+  const DINodeArray Elements = STy->getElements();
+  for (const auto *Element : Elements) {
+    struct BTF::BTFMember BTFMember;
+    const auto *DDTy = cast<DIDerivedType>(Element);
+
+    BTFMember.NameOff = BDebug.addString(DDTy->getName());
+    if (HasBitField) {
+      uint8_t BitFieldSize = DDTy->isBitField() ? DDTy->getSizeInBits() : 0;
+      BTFMember.Offset = BitFieldSize << 24 | DDTy->getOffsetInBits();
+    } else {
+      BTFMember.Offset = DDTy->getOffsetInBits();
+    }
+    BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType().resolve());
+    Members.push_back(BTFMember);
+  }
+}
+
+void BTFTypeStruct::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+  for (const auto &Member : Members) {
+    OS.EmitIntValue(Member.NameOff, 4);
+    OS.EmitIntValue(Member.Type, 4);
+    OS.AddComment("0x" + Twine::utohexstr(Member.Offset));
+    OS.EmitIntValue(Member.Offset, 4);
+  }
+}
+
+/// The Func kind represents both subprogram and pointee of function
+/// pointers. If the FuncName is empty, it represents a pointee of function
+/// pointer. Otherwise, it represents a subprogram. The func arg names
+/// are empty for pointee of function pointer case, and are valid names
+/// for subprogram.
+BTFTypeFuncProto::BTFTypeFuncProto(
+    const DISubroutineType *STy, uint32_t VLen,
+    const std::unordered_map<uint32_t, StringRef> &FuncArgNames)
+    : STy(STy), FuncArgNames(FuncArgNames) {
+  Kind = BTF::BTF_KIND_FUNC_PROTO;
+  BTFType.Info = (Kind << 24) | VLen;
+}
+
+void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
+  DITypeRefArray Elements = STy->getTypeArray();
+  auto RetType = Elements[0].resolve();
+  BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0;
+  BTFType.NameOff = 0;
+
+  // For null parameter which is typically the last one
+  // to represent the vararg, encode the NameOff/Type to be 0.
+  for (unsigned I = 1, N = Elements.size(); I < N; ++I) {
+    struct BTF::BTFParam Param;
+    auto Element = Elements[I].resolve();
+    if (Element) {
+      Param.NameOff = BDebug.addString(FuncArgNames[I]);
+      Param.Type = BDebug.getTypeId(Element);
+    } else {
+      Param.NameOff = 0;
+      Param.Type = 0;
+    }
+    Parameters.push_back(Param);
+  }
+}
+
+void BTFTypeFuncProto::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+  for (const auto &Param : Parameters) {
+    OS.EmitIntValue(Param.NameOff, 4);
+    OS.EmitIntValue(Param.Type, 4);
+  }
+}
+
+BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId)
+    : Name(FuncName) {
+  Kind = BTF::BTF_KIND_FUNC;
+  BTFType.Info = Kind << 24;
+  BTFType.Type = ProtoTypeId;
+}
+
+void BTFTypeFunc::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFTypeFunc::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+
+uint32_t BTFStringTable::addString(StringRef S) {
+  // Check whether the string already exists.
+  for (auto &OffsetM : OffsetToIdMap) {
+    if (Table[OffsetM.second] == S)
+      return OffsetM.first;
+  }
+  // Not find, add to the string table.
+  uint32_t Offset = Size;
+  OffsetToIdMap[Offset] = Table.size();
+  Table.push_back(S);
+  Size += S.size() + 1;
+  return Offset;
+}
+
+BTFDebug::BTFDebug(AsmPrinter *AP)
+    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), SkipInstruction(false),
+      LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0) {
+  addString("\0");
+}
+
+void BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
+                       const DIType *Ty) {
+  TypeEntry->setId(TypeEntries.size() + 1);
+  DIToIdMap[Ty] = TypeEntry->getId();
+  TypeEntries.push_back(std::move(TypeEntry));
+}
+
+uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
+  TypeEntry->setId(TypeEntries.size() + 1);
+  uint32_t Id = TypeEntry->getId();
+  TypeEntries.push_back(std::move(TypeEntry));
+  return Id;
+}
+
+void BTFDebug::visitBasicType(const DIBasicType *BTy) {
+  // Only int types are supported in BTF.
+  uint32_t Encoding = BTy->getEncoding();
+  if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
+      Encoding != dwarf::DW_ATE_signed_char &&
+      Encoding != dwarf::DW_ATE_unsigned &&
+      Encoding != dwarf::DW_ATE_unsigned_char)
+    return;
+
+  // Create a BTF type instance for this DIBasicType and put it into
+  // DIToIdMap for cross-type reference check.
+  auto TypeEntry = llvm::make_unique<BTFTypeInt>(
+      Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
+  addType(std::move(TypeEntry), BTy);
+}
+
+/// Handle subprogram or subroutine types.
+void BTFDebug::visitSubroutineType(
+    const DISubroutineType *STy, bool ForSubprog,
+    const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
+    uint32_t &TypeId) {
+  DITypeRefArray Elements = STy->getTypeArray();
+  uint32_t VLen = Elements.size() - 1;
+  if (VLen > BTF::MAX_VLEN)
+    return;
+
+  // Subprogram has a valid non-zero-length name, and the pointee of
+  // a function pointer has an empty name. The subprogram type will
+  // not be added to DIToIdMap as it should not be referenced by
+  // any other types.
+  auto TypeEntry = llvm::make_unique<BTFTypeFuncProto>(STy, VLen, FuncArgNames);
+  if (ForSubprog)
+    TypeId = addType(std::move(TypeEntry)); // For subprogram
+  else
+    addType(std::move(TypeEntry), STy); // For func ptr
+
+  // Visit return type and func arg types.
+  for (const auto Element : Elements) {
+    visitTypeEntry(Element.resolve());
+  }
+}
+
+/// Handle structure/union types.
+void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
+  const DINodeArray Elements = CTy->getElements();
+  uint32_t VLen = Elements.size();
+  if (VLen > BTF::MAX_VLEN)
+    return;
+
+  // Check whether we have any bitfield members or not
+  bool HasBitField = false;
+  for (const auto *Element : Elements) {
+    auto E = cast<DIDerivedType>(Element);
+    if (E->isBitField()) {
+      HasBitField = true;
+      break;
+    }
+  }
+
+  auto TypeEntry =
+      llvm::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
+  addType(std::move(TypeEntry), CTy);
+
+  // Visit all struct members.
+  for (const auto *Element : Elements)
+    visitTypeEntry(cast<DIDerivedType>(Element));
+}
+
+void BTFDebug::visitArrayType(const DICompositeType *CTy) {
+  auto TypeEntry = llvm::make_unique<BTFTypeArray>(CTy);
+  addType(std::move(TypeEntry), CTy);
+
+  // The IR does not have a type for array index while BTF wants one.
+  // So create an array index type if there is none.
+  if (!ArrayIndexTypeId) {
+    auto TypeEntry = llvm::make_unique<BTFTypeInt>(dwarf::DW_ATE_unsigned, 32,
+                                                   0, "__ARRAY_SIZE_TYPE__");
+    ArrayIndexTypeId = addType(std::move(TypeEntry));
+  }
+
+  // Visit array element type.
+  visitTypeEntry(CTy->getBaseType().resolve());
+}
+
+void BTFDebug::visitEnumType(const DICompositeType *CTy) {
+  DINodeArray Elements = CTy->getElements();
+  uint32_t VLen = Elements.size();
+  if (VLen > BTF::MAX_VLEN)
+    return;
+
+  auto TypeEntry = llvm::make_unique<BTFTypeEnum>(CTy, VLen);
+  addType(std::move(TypeEntry), CTy);
+  // No need to visit base type as BTF does not encode it.
+}
+
+/// Handle structure/union forward declarations.
+void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion) {
+  auto TypeEntry = llvm::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
+  addType(std::move(TypeEntry), CTy);
+}
+
+/// Handle structure, union, array and enumeration types.
+void BTFDebug::visitCompositeType(const DICompositeType *CTy) {
+  auto Tag = CTy->getTag();
+  if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+    // Handle forward declaration differently as it does not have members.
+    if (CTy->isForwardDecl())
+      visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type);
+    else
+      visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type);
+  } else if (Tag == dwarf::DW_TAG_array_type)
+    visitArrayType(CTy);
+  else if (Tag == dwarf::DW_TAG_enumeration_type)
+    visitEnumType(CTy);
+}
+
+/// Handle pointer, typedef, const, volatile, restrict and member types.
+void BTFDebug::visitDerivedType(const DIDerivedType *DTy) {
+  unsigned Tag = DTy->getTag();
+
+  if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
+      Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+      Tag == dwarf::DW_TAG_restrict_type) {
+    auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag);
+    addType(std::move(TypeEntry), DTy);
+  } else if (Tag != dwarf::DW_TAG_member) {
+    return;
+  }
+
+  // Visit base type of pointer, typedef, const, volatile, restrict or
+  // struct/union member.
+  visitTypeEntry(DTy->getBaseType().resolve());
+}
+
+void BTFDebug::visitTypeEntry(const DIType *Ty) {
+  if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end())
+    return;
+
+  uint32_t TypeId;
+  if (const auto *BTy = dyn_cast<DIBasicType>(Ty))
+    visitBasicType(BTy);
+  else if (const auto *STy = dyn_cast<DISubroutineType>(Ty))
+    visitSubroutineType(STy, false, std::unordered_map<uint32_t, StringRef>(),
+                        TypeId);
+  else if (const auto *CTy = dyn_cast<DICompositeType>(Ty))
+    visitCompositeType(CTy);
+  else if (const auto *DTy = dyn_cast<DIDerivedType>(Ty))
+    visitDerivedType(DTy);
+  else
+    llvm_unreachable("Unknown DIType");
+}
+
+/// Read file contents from the actual file or from the source
+std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
+  auto File = SP->getFile();
+  std::string FileName;
+
+  if (File->getDirectory().size())
+    FileName = File->getDirectory().str() + "/" + File->getFilename().str();
+  else
+    FileName = File->getFilename();
+
+  // No need to populate the contends if it has been populated!
+  if (FileContent.find(FileName) != FileContent.end())
+    return FileName;
+
+  std::vector<std::string> Content;
+  std::string Line;
+  Content.push_back(Line); // Line 0 for empty string
+
+  auto Source = File->getSource();
+  if (Source) {
+    std::istringstream InputString(Source.getValue());
+    while (std::getline(InputString, Line))
+      Content.push_back(Line);
+  } else {
+    std::ifstream InputFile(FileName);
+    while (std::getline(InputFile, Line))
+      Content.push_back(Line);
+  }
+
+  FileContent[FileName] = Content;
+  return FileName;
+}
+
+void BTFDebug::constructLineInfo(const DISubprogram *SP, MCSymbol *Label,
+                                 uint32_t Line, uint32_t Column) {
+  std::string FileName = populateFileContent(SP);
+  BTFLineInfo LineInfo;
+
+  LineInfo.Label = Label;
+  LineInfo.FileNameOff = addString(FileName);
+  // If file content is not available, let LineOff = 0.
+  if (Line < FileContent[FileName].size())
+    LineInfo.LineOff = addString(FileContent[FileName][Line]);
+  else
+    LineInfo.LineOff = 0;
+  LineInfo.LineNum = Line;
+  LineInfo.ColumnNum = Column;
+  LineInfoTable[SecNameOff].push_back(LineInfo);
+}
+
+void BTFDebug::emitCommonHeader() {
+  OS.AddComment("0x" + Twine::utohexstr(BTF::MAGIC));
+  OS.EmitIntValue(BTF::MAGIC, 2);
+  OS.EmitIntValue(BTF::VERSION, 1);
+  OS.EmitIntValue(0, 1);
+}
+
+void BTFDebug::emitBTFSection() {
+  MCContext &Ctx = OS.getContext();
+  OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
+
+  // Emit header.
+  emitCommonHeader();
+  OS.EmitIntValue(BTF::HeaderSize, 4);
+
+  uint32_t TypeLen = 0, StrLen;
+  for (const auto &TypeEntry : TypeEntries)
+    TypeLen += TypeEntry->getSize();
+  StrLen = StringTable.getSize();
+
+  OS.EmitIntValue(0, 4);
+  OS.EmitIntValue(TypeLen, 4);
+  OS.EmitIntValue(TypeLen, 4);
+  OS.EmitIntValue(StrLen, 4);
+
+  // Emit type table.
+  for (const auto &TypeEntry : TypeEntries)
+    TypeEntry->emitType(OS);
+
+  // Emit string table.
+  uint32_t StringOffset = 0;
+  for (const auto &S : StringTable.getTable()) {
+    OS.AddComment("string offset=" + std::to_string(StringOffset));
+    OS.EmitBytes(S);
+    OS.EmitBytes(StringRef("\0", 1));
+    StringOffset += S.size() + 1;
+  }
+}
+
+void BTFDebug::emitBTFExtSection() {
+  MCContext &Ctx = OS.getContext();
+  OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
+
+  // Emit header.
+  emitCommonHeader();
+  OS.EmitIntValue(BTF::ExtHeaderSize, 4);
+
+  // Account for FuncInfo/LineInfo record size as well.
+  uint32_t FuncLen = 4, LineLen = 4;
+  for (const auto &FuncSec : FuncInfoTable) {
+    FuncLen += BTF::SecFuncInfoSize;
+    FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
+  }
+  for (const auto &LineSec : LineInfoTable) {
+    LineLen += BTF::SecLineInfoSize;
+    LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
+  }
+
+  OS.EmitIntValue(0, 4);
+  OS.EmitIntValue(FuncLen, 4);
+  OS.EmitIntValue(FuncLen, 4);
+  OS.EmitIntValue(LineLen, 4);
+
+  // Emit func_info table.
+  OS.AddComment("FuncInfo");
+  OS.EmitIntValue(BTF::BPFFuncInfoSize, 4);
+  for (const auto &FuncSec : FuncInfoTable) {
+    OS.AddComment("FuncInfo section string offset=" +
+                  std::to_string(FuncSec.first));
+    OS.EmitIntValue(FuncSec.first, 4);
+    OS.EmitIntValue(FuncSec.second.size(), 4);
+    for (const auto &FuncInfo : FuncSec.second) {
+      Asm->EmitLabelReference(FuncInfo.Label, 4);
+      OS.EmitIntValue(FuncInfo.TypeId, 4);
+    }
+  }
+
+  // Emit line_info table.
+  OS.AddComment("LineInfo");
+  OS.EmitIntValue(BTF::BPFLineInfoSize, 4);
+  for (const auto &LineSec : LineInfoTable) {
+    OS.AddComment("LineInfo section string offset=" +
+                  std::to_string(LineSec.first));
+    OS.EmitIntValue(LineSec.first, 4);
+    OS.EmitIntValue(LineSec.second.size(), 4);
+    for (const auto &LineInfo : LineSec.second) {
+      Asm->EmitLabelReference(LineInfo.Label, 4);
+      OS.EmitIntValue(LineInfo.FileNameOff, 4);
+      OS.EmitIntValue(LineInfo.LineOff, 4);
+      OS.AddComment("Line " + std::to_string(LineInfo.LineNum) + " Col " +
+                    std::to_string(LineInfo.ColumnNum));
+      OS.EmitIntValue(LineInfo.LineNum << 10 | LineInfo.ColumnNum, 4);
+    }
+  }
+}
+
+void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
+  auto *SP = MF->getFunction().getSubprogram();
+  auto *Unit = SP->getUnit();
+
+  if (Unit->getEmissionKind() == DICompileUnit::NoDebug) {
+    SkipInstruction = true;
+    return;
+  }
+  SkipInstruction = false;
+
+  // Collect all types locally referenced in this function.
+  // Use RetainedNodes so we can collect all argument names
+  // even if the argument is not used.
+  std::unordered_map<uint32_t, StringRef> FuncArgNames;
+  for (const DINode *DN : SP->getRetainedNodes()) {
+    if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+      visitTypeEntry(DV->getType().resolve());
+
+      // Collect function arguments for subprogram func type.
+      uint32_t Arg = DV->getArg();
+      if (Arg)
+        FuncArgNames[Arg] = DV->getName();
+    }
+  }
+
+  // Construct subprogram func proto type.
+  uint32_t ProtoTypeId;
+  visitSubroutineType(SP->getType(), true, FuncArgNames, ProtoTypeId);
+
+  // Construct subprogram func type
+  auto FuncTypeEntry =
+      llvm::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
+  uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
+
+  // Construct funcinfo and the first lineinfo for the function.
+  MCSymbol *FuncLabel = Asm->getFunctionBegin();
+  BTFFuncInfo FuncInfo;
+  FuncInfo.Label = FuncLabel;
+  FuncInfo.TypeId = FuncTypeId;
+  if (FuncLabel->isInSection()) {
+    MCSection &Section = FuncLabel->getSection();
+    const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
+    assert(SectionELF && "Null section for Function Label");
+    SecNameOff = addString(SectionELF->getSectionName());
+  } else {
+    SecNameOff = addString(".text");
+  }
+  FuncInfoTable[SecNameOff].push_back(FuncInfo);
+}
+
+void BTFDebug::endFunctionImpl(const MachineFunction *MF) {
+  SkipInstruction = false;
+  LineInfoGenerated = false;
+  SecNameOff = 0;
+}
+
+void BTFDebug::beginInstruction(const MachineInstr *MI) {
+  DebugHandlerBase::beginInstruction(MI);
+
+  if (SkipInstruction || MI->isMetaInstruction() ||
+      MI->getFlag(MachineInstr::FrameSetup))
+    return;
+
+  if (MI->isInlineAsm()) {
+    // Count the number of register definitions to find the asm string.
+    unsigned NumDefs = 0;
+    for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+         ++NumDefs)
+      ;
+
+    // Skip this inline asm instruction if the asmstr is empty.
+    const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+    if (AsmStr[0] == 0)
+      return;
+  }
+
+  // Skip this instruction if no DebugLoc or the DebugLoc
+  // is the same as the previous instruction.
+  const DebugLoc &DL = MI->getDebugLoc();
+  if (!DL || PrevInstLoc == DL) {
+    // This instruction will be skipped, no LineInfo has
+    // been generated, construct one based on function signature.
+    if (LineInfoGenerated == false) {
+      auto *S = MI->getMF()->getFunction().getSubprogram();
+      MCSymbol *FuncLabel = Asm->getFunctionBegin();
+      constructLineInfo(S, FuncLabel, S->getLine(), 0);
+      LineInfoGenerated = true;
+    }
+
+    return;
+  }
+
+  // Create a temporary label to remember the insn for lineinfo.
+  MCSymbol *LineSym = OS.getContext().createTempSymbol();
+  OS.EmitLabel(LineSym);
+
+  // Construct the lineinfo.
+  auto SP = DL.get()->getScope()->getSubprogram();
+  constructLineInfo(SP, LineSym, DL.getLine(), DL.getCol());
+
+  LineInfoGenerated = true;
+  PrevInstLoc = DL;
+}
+
+void BTFDebug::endModule() {
+  // Collect all types referenced by globals.
+  const Module *M = MMI->getModule();
+  for (const DICompileUnit *CUNode : M->debug_compile_units()) {
+    for (const auto *GVE : CUNode->getGlobalVariables()) {
+      DIGlobalVariable *GV = GVE->getVariable();
+      visitTypeEntry(GV->getType().resolve());
+    }
+  }
+
+  // Complete BTF type cross refereences.
+  for (const auto &TypeEntry : TypeEntries)
+    TypeEntry->completeType(*this);
+
+  // Emit BTF sections.
+  emitBTFSection();
+  emitBTFExtSection();
+}
diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm/lib/Target/BPF/BTFDebug.h
new file mode 100644
index 000000000000..afd4ed87f63d
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/BTFDebug.h
@@ -0,0 +1,285 @@
+//===- BTFDebug.h -----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains support for writing BTF debug info.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BTFDEBUG_H
+#define LLVM_LIB_TARGET_BPF_BTFDEBUG_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include <unordered_map>
+#include "BTF.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class BTFDebug;
+class DIType;
+class MCStreamer;
+class MCSymbol;
+class MachineFunction;
+
+/// The base class for BTF type generation.
+class BTFTypeBase {
+protected:
+  uint8_t Kind;
+  uint32_t Id;
+  struct BTF::CommonType BTFType;
+
+public:
+  virtual ~BTFTypeBase() = default;
+  void setId(uint32_t Id) { this->Id = Id; }
+  uint32_t getId() { return Id; }
+  uint32_t roundupToBytes(uint32_t NumBits) { return (NumBits + 7) >> 3; }
+  /// Get the size of this BTF type entry.
+  virtual uint32_t getSize() { return BTF::CommonTypeSize; }
+  /// Complete BTF type generation after all related DebugInfo types
+  /// have been visited so their BTF type id's are available
+  /// for cross referece.
+  virtual void completeType(BTFDebug &BDebug) {}
+  /// Emit types for this BTF type entry.
+  virtual void emitType(MCStreamer &OS);
+};
+
+/// Handle several derived types include pointer, const,
+/// volatile, typedef and restrict.
+class BTFTypeDerived : public BTFTypeBase {
+  const DIDerivedType *DTy;
+
+public:
+  BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag);
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle struct or union forward declaration.
+class BTFTypeFwd : public BTFTypeBase {
+  StringRef Name;
+
+public:
+  BTFTypeFwd(StringRef Name, bool IsUnion);
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle int type.
+class BTFTypeInt : public BTFTypeBase {
+  StringRef Name;
+  uint32_t IntVal; ///< Encoding, offset, bits
+
+public:
+  BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits, uint32_t OffsetInBits,
+             StringRef TypeName);
+  uint32_t getSize() { return BTFTypeBase::getSize() + sizeof(uint32_t); }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle enumerate type.
+class BTFTypeEnum : public BTFTypeBase {
+  const DICompositeType *ETy;
+  std::vector<struct BTF::BTFEnum> EnumValues;
+
+public:
+  BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues);
+  uint32_t getSize() {
+    return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize;
+  }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle array type.
+class BTFTypeArray : public BTFTypeBase {
+  const DICompositeType *ATy;
+  struct BTF::BTFArray ArrayInfo;
+
+public:
+  BTFTypeArray(const DICompositeType *ATy);
+  uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle struct/union type.
+class BTFTypeStruct : public BTFTypeBase {
+  const DICompositeType *STy;
+  bool HasBitField;
+  std::vector<struct BTF::BTFMember> Members;
+
+public:
+  BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
+                uint32_t NumMembers);
+  uint32_t getSize() {
+    return BTFTypeBase::getSize() + Members.size() * BTF::BTFMemberSize;
+  }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle function pointer.
+class BTFTypeFuncProto : public BTFTypeBase {
+  const DISubroutineType *STy;
+  std::unordered_map<uint32_t, StringRef> FuncArgNames;
+  std::vector<struct BTF::BTFParam> Parameters;
+
+public:
+  BTFTypeFuncProto(const DISubroutineType *STy, uint32_t NumParams,
+                   const std::unordered_map<uint32_t, StringRef> &FuncArgNames);
+  uint32_t getSize() {
+    return BTFTypeBase::getSize() + Parameters.size() * BTF::BTFParamSize;
+  }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle subprogram
+class BTFTypeFunc : public BTFTypeBase {
+  StringRef Name;
+
+public:
+  BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId);
+  uint32_t getSize() { return BTFTypeBase::getSize(); }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// String table.
+class BTFStringTable {
+  /// String table size in bytes.
+  uint32_t Size;
+  /// A mapping from string table offset to the index
+  /// of the Table. It is used to avoid putting
+  /// duplicated strings in the table.
+  std::unordered_map<uint32_t, uint32_t> OffsetToIdMap;
+  /// A vector of strings to represent the string table.
+  std::vector<std::string> Table;
+
+public:
+  BTFStringTable() : Size(0) {}
+  uint32_t getSize() { return Size; }
+  std::vector<std::string> &getTable() { return Table; }
+  /// Add a string to the string table and returns its offset
+  /// in the table.
+  uint32_t addString(StringRef S);
+};
+
+/// Represent one func and its type id.
+struct BTFFuncInfo {
+  const MCSymbol *Label; ///< Func MCSymbol
+  uint32_t TypeId;       ///< Type id referring to .BTF type section
+};
+
+/// Represent one line info.
+struct BTFLineInfo {
+  MCSymbol *Label;      ///< MCSymbol identifying insn for the lineinfo
+  uint32_t FileNameOff; ///< file name offset in the .BTF string table
+  uint32_t LineOff;     ///< line offset in the .BTF string table
+  uint32_t LineNum;     ///< the line number
+  uint32_t ColumnNum;   ///< the column number
+};
+
+/// Collect and emit BTF information.
+class BTFDebug : public DebugHandlerBase {
+  MCStreamer &OS;
+  bool SkipInstruction;
+  bool LineInfoGenerated;
+  uint32_t SecNameOff;
+  uint32_t ArrayIndexTypeId;
+  BTFStringTable StringTable;
+  std::vector<std::unique_ptr<BTFTypeBase>> TypeEntries;
+  std::unordered_map<const DIType *, uint32_t> DIToIdMap;
+  std::unordered_map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
+  std::unordered_map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+  StringMap<std::vector<std::string>> FileContent;
+
+  /// Add types to TypeEntries.
+  /// @{
+  /// Add types to TypeEntries and DIToIdMap.
+  void addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
+  /// Add types to TypeEntries only and return type id.
+  uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry);
+  /// @}
+
+  /// IR type visiting functions.
+  /// @{
+  void visitTypeEntry(const DIType *Ty);
+  void visitBasicType(const DIBasicType *BTy);
+  void visitSubroutineType(
+      const DISubroutineType *STy, bool ForSubprog,
+      const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
+      uint32_t &TypeId);
+  void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion);
+  void visitCompositeType(const DICompositeType *CTy);
+  void visitStructType(const DICompositeType *STy, bool IsStruct);
+  void visitArrayType(const DICompositeType *ATy);
+  void visitEnumType(const DICompositeType *ETy);
+  void visitDerivedType(const DIDerivedType *DTy);
+  /// @}
+
+  /// Get the file content for the subprogram. Certain lines of the file
+  /// later may be put into string table and referenced by line info.
+  std::string populateFileContent(const DISubprogram *SP);
+
+  /// Construct a line info.
+  void constructLineInfo(const DISubprogram *SP, MCSymbol *Label, uint32_t Line,
+                         uint32_t Column);
+
+  /// Emit common header of .BTF and .BTF.ext sections.
+  void emitCommonHeader();
+
+  /// Emit the .BTF section.
+  void emitBTFSection();
+
+  /// Emit the .BTF.ext section.
+  void emitBTFExtSection();
+
+protected:
+  /// Gather pre-function debug information.
+  void beginFunctionImpl(const MachineFunction *MF) override;
+
+  /// Post process after all instructions in this function are processed.
+  void endFunctionImpl(const MachineFunction *MF) override;
+
+public:
+  BTFDebug(AsmPrinter *AP);
+
+  /// Get the special array index type id.
+  uint32_t getArrayIndexTypeId() {
+    assert(ArrayIndexTypeId);
+    return ArrayIndexTypeId;
+  }
+
+  /// Add string to the string table.
+  size_t addString(StringRef S) { return StringTable.addString(S); }
+
+  /// Get the type id for a particular DIType.
+  uint32_t getTypeId(const DIType *Ty) {
+    assert(Ty && "Invalid null Type");
+    assert(DIToIdMap.find(Ty) != DIToIdMap.end() &&
+           "DIType not added in the BDIToIdMap");
+    return DIToIdMap[Ty];
+  }
+
+  void setSymbolSize(const MCSymbol *Symbol, uint64_t Size) override {}
+
+  /// Process beginning of an instruction.
+  void beginInstruction(const MachineInstr *MI) override;
+
+  /// Complete all the types and emit the BTF sections.
+  void endModule() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index e7790ddb3d7e..9f80b762fe36 100644
--- a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -11,8 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BPF.h"
-#include "BPFSubtarget.h"
 #include "MCTargetDesc/BPFMCTargetDesc.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCAsmInfo.h"
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 134e890dfe49..32e79d0f527e 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstdint>
 
@@ -50,6 +51,23 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
   case FK_Data_8:
     return ELF::R_BPF_64_64;
   case FK_Data_4:
+    // .BTF.ext generates FK_Data_4 relocations for
+    // insn offset by creating temporary labels.
+    // The insn offset is within the code section and
+    // already been fulfilled by applyFixup(). No
+    // further relocation is needed.
+    if (const MCSymbolRefExpr *A = Target.getSymA()) {
+      if (A->getSymbol().isTemporary()) {
+        MCSection &Section = A->getSymbol().getSection();
+        const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
+        assert(SectionELF && "Null section for reloc symbol");
+
+        // The reloc symbol should be in text section.
+        unsigned Flags = SectionELF->getFlags();
+        if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
+          return ELF::R_BPF_NONE;
+      }
+    }
     return ELF::R_BPF_64_32;
   }
 }
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 171f7f607ff4..af3ad5315253 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -30,8 +30,8 @@ public:
     WeakRefDirective = "\t.weak\t";
 
     UsesELFSectionDirectiveForBSS = true;
-    HasSingleParameterDotFile = false;
-    HasDotTypeDotSizeDirective = false;
+    HasSingleParameterDotFile = true;
+    HasDotTypeDotSizeDirective = true;
 
     SupportsDebugInformation = true;
     ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 92bda224f3dc..2eb1f0fc8bd9 100644
--- a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -313,8 +313,6 @@ public:
   bool iss30_2Imm() const { return true; }
   bool iss29_3Imm() const { return true; }
   bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); }
-  bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
-  bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
   bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
   bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
   bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
@@ -581,6 +579,7 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
   case Match_MnemonicFail:
     return Error(IDLoc, "unrecognized instruction");
   case Match_InvalidOperand:
+  case Match_InvalidTiedOperand:
     SMLoc ErrorLoc = IDLoc;
     if (ErrorInfo != ~0U) {
       if (ErrorInfo >= InstOperands.size())
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 1a619ebda84e..428b42eba30d 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -9,7 +9,6 @@
 
 #define DEBUG_TYPE "hexagon-disassembler"
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
@@ -118,6 +117,10 @@ DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
                                              uint64_t Address,
                                              const void *Decoder);
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
+                                              unsigned RegNo,
+                                              uint64_t Address,
+                                              const void *Decoder);
 static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                 uint64_t Address,
                                                 const void *Decoder);
@@ -146,62 +149,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
                                     uint64_t /*Address*/, const void *Decoder);
 static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                     const void *Decoder);
-
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<4>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                    const void *Decoder) {
-  signedDecoder<14>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<8>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<7>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                    const void *Decoder) {
-  signedDecoder<12>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<3>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                    const void *Decoder) {
-  signedDecoder<13>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<6>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<9>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<5>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
-                                   const void *Decoder) {
-  signedDecoder<6>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
+#include "HexagonDepDecoders.h"
 #include "HexagonGenDisassemblerTables.inc"
 
 static MCDisassembler *createHexagonDisassembler(const Target &T,
@@ -664,6 +612,18 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
   return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable));
 }
 
+LLVM_ATTRIBUTE_UNUSED  // Suppress warning temporarily.
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
+                                              unsigned RegNo,
+                                              uint64_t /*Address*/,
+                                              const void *Decoder) {
+  static const MCPhysReg HvxVQRDecoderTable[] = {
+      Hexagon::VQ0,  Hexagon::VQ1,  Hexagon::VQ2,  Hexagon::VQ3,
+      Hexagon::VQ4,  Hexagon::VQ5,  Hexagon::VQ6,  Hexagon::VQ7};
+
+  return DecodeRegisterClass(Inst, RegNo >> 2, HvxVQRDecoderTable);
+}
+
 static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                 uint64_t /*Address*/,
                                                 const void *Decoder) {
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index 6ec52d18cdc4..c18492da803b 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -15,33 +15,6 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
 
-#define Hexagon_POINTER_SIZE 4
-
-#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
-#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
-#define Hexagon_WordSize Hexagon_PointerSize
-#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
-
-// allocframe saves LR and FP on stack before allocating
-// a new stack frame. This takes 8 bytes.
-#define HEXAGON_LRFP_SIZE 8
-
-// Normal instruction size (in bytes).
-#define HEXAGON_INSTR_SIZE 4
-
-// Maximum number of words and instructions in a packet.
-#define HEXAGON_PACKET_SIZE 4
-#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
-// Minimum number of instructions in an end-loop packet.
-#define HEXAGON_PACKET_INNER_SIZE 2
-#define HEXAGON_PACKET_OUTER_SIZE 3
-// Maximum number of instructions in a packet before shuffling,
-// including a compound one or a duplex or an extender.
-#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
-
-// Name of the global offset table as defined by the Hexagon ABI
-#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
-
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 69e263a425f8..868353e18832 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -25,6 +25,9 @@ include "llvm/Target/Target.td"
 include "HexagonDepArch.td"
 
 // Hexagon ISA Extensions
+def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true",
+      "Hexagon ZReg extension instructions">;
+
 def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion",
       "Hexagon::ArchEnum::V60", "Hexagon HVX instructions">;
 def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion",
@@ -32,10 +35,14 @@ def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion",
       [ExtensionHVX]>;
 def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion",
       "Hexagon::ArchEnum::V62", "Hexagon HVX instructions",
-      [ExtensionHVX,ExtensionHVXV60]>;
+      [ExtensionHVX, ExtensionHVXV60]>;
 def ExtensionHVXV65: SubtargetFeature<"hvxv65", "HexagonHVXVersion",
       "Hexagon::ArchEnum::V65", "Hexagon HVX instructions",
-      [ExtensionHVX,ExtensionHVXV60, ExtensionHVXV62]>;
+      [ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62]>;
+def ExtensionHVXV66: SubtargetFeature<"hvxv66", "HexagonHVXVersion",
+      "Hexagon::ArchEnum::V66", "Hexagon HVX instructions",
+      [ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
+       ExtensionZReg]>;
 
 def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
       "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
@@ -60,6 +67,9 @@ def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true",
       "Enable generation of duplex instruction">;
 def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
       "true", "Reserve register R19">;
+def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
+      "NoreturnStackElim", "true",
+      "Eliminate stack allocation in a noreturn function when possible">;
 
 //===----------------------------------------------------------------------===//
 // Hexagon Instruction Predicate Definitions.
@@ -78,6 +88,10 @@ def UseHVXV62          : Predicate<"HST->useHVXOps()">,
                          AssemblerPredicate<"ExtensionHVXV62">;
 def UseHVXV65          : Predicate<"HST->useHVXOps()">,
                          AssemblerPredicate<"ExtensionHVXV65">;
+def UseHVXV66          : Predicate<"HST->useHVXOps()">,
+                         AssemblerPredicate<"ExtensionHVXV66">;
+def UseZReg            : Predicate<"HST->useZRegOps()">,
+                         AssemblerPredicate<"ExtensionZReg">;
 
 def Hvx64:  HwMode<"+hvx-length64b">;
 def Hvx128: HwMode<"+hvx-length128b">;
@@ -309,8 +323,6 @@ include "HexagonPatternsHVX.td"
 include "HexagonPatternsV65.td"
 include "HexagonDepMappings.td"
 include "HexagonIntrinsics.td"
-include "HexagonMapAsm2IntrinV62.gen.td"
-include "HexagonMapAsm2IntrinV65.gen.td"
 
 def HexagonInstrInfo : InstrInfo;
 
@@ -323,31 +335,31 @@ class Proc<string Name, SchedMachineModel Model,
  : ProcessorModel<Name, Model, Features>;
 
 def : Proc<"generic", HexagonModelV60,
-           [ArchV4, ArchV5, ArchV55, ArchV60,
-            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
-            FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv4",  HexagonModelV4,
-           [ArchV4,
+           [ArchV5, ArchV55, ArchV60,
             FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
             FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv5",  HexagonModelV4,
-           [ArchV4, ArchV5,
+def : Proc<"hexagonv5",  HexagonModelV5,
+           [ArchV5,
             FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
             FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv55", HexagonModelV55,
-           [ArchV4, ArchV5, ArchV55,
+           [ArchV5, ArchV55,
             FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
             FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv60", HexagonModelV60,
-           [ArchV4, ArchV5, ArchV55, ArchV60,
+           [ArchV5, ArchV55, ArchV60,
             FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
             FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv62", HexagonModelV62,
-           [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62,
+           [ArchV5, ArchV55, ArchV60, ArchV62,
             FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
             FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv65", HexagonModelV65,
-           [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+           [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+            FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
+            FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv66", HexagonModelV66,
+           [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66,
             FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
             FeatureNVS, FeaturePackets, FeatureSmallData]>;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 0ac83ea7c5fc..f44fb16e2d8e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -755,7 +755,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
 
   if (MI->isBundle()) {
-    assert(Subtarget->usePackets() && "Support for packets is disabled");
     const MachineBasicBlock* MBB = MI->getParent();
     MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index ba255d30fede..1bdebe557a8c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1985,6 +1985,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
     case Hexagon::S2_storeri_io:
       V = int32_t(U);
       break;
+    default:
+      // Opc is already checked above to be one of the three store instructions.
+      // This silences a -Wuninitialized false positive on GCC 5.4.
+      llvm_unreachable("Unexpected store opcode");
   }
   if (!isInt<8>(V))
     return false;
@@ -2223,6 +2227,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI,
   for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
     // The number of leading zeros here should be the number of trailing
     // non-zeros in RC.
+    unsigned SRC = MRI.getRegClass(S)->getID();
+    if (SRC != Hexagon::IntRegsRegClassID &&
+        SRC != Hexagon::DoubleRegsRegClassID)
+      continue;
     if (!BT.has(S))
       continue;
     const BitTracker::RegisterCell &SC = BT.lookup(S);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 94aacbed6af6..92b6da871a4c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -93,11 +93,12 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
   const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
   unsigned ID = RC.getID();
   uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
-  auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
+  const auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
   bool IsSubLo = (Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo));
   switch (ID) {
     case Hexagon::DoubleRegsRegClassID:
     case Hexagon::HvxWRRegClassID:
+    case Hexagon::HvxVQRRegClassID:
       return IsSubLo ? BT::BitMask(0, RW-1)
                      : BT::BitMask(RW, 2*RW-1);
     default:
@@ -114,9 +115,13 @@ uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const {
   assert(TargetRegisterInfo::isPhysicalRegister(Reg));
 
   using namespace Hexagon;
-  for (auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass})
-    if (RC.contains(Reg))
-      return TRI.getRegSizeInBits(RC);
+  const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  if (HST.useHVXOps()) {
+    for (auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass,
+                     HvxVQRRegClass})
+      if (RC.contains(Reg))
+        return TRI.getRegSizeInBits(RC);
+  }
   // Default treatment for other physical registers.
   if (const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg))
     return TRI.getRegSizeInBits(*RC);
@@ -142,6 +147,8 @@ const TargetRegisterClass &HexagonEvaluator::composeWithSubRegIndex(
       return Hexagon::IntRegsRegClass;
     case Hexagon::HvxWRRegClassID:
       return Hexagon::HvxVRRegClass;
+    case Hexagon::HvxVQRRegClassID:
+      return Hexagon::HvxWRRegClass;
     default:
       break;
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index cbce61bc63c9..ba9f638796eb 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -376,7 +376,7 @@ namespace {
     using IndexList = SetVector<unsigned>;
     using ExtenderInit = std::pair<ExtValue, ExtExpr>;
     using AssignmentMap = std::map<ExtenderInit, IndexList>;
-    using LocDefMap = std::map<Loc, IndexList>;
+    using LocDefList = std::vector<std::pair<Loc, IndexList>>;
 
     const HexagonInstrInfo *HII = nullptr;
     const HexagonRegisterInfo *HRI = nullptr;
@@ -399,7 +399,7 @@ namespace {
     void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
                      AssignmentMap &IMap);
     void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
-                            LocDefMap &Defs);
+                            LocDefList &Defs);
     Register insertInitializer(Loc DefL, const ExtenderInit &ExtI);
     bool replaceInstrExact(const ExtDesc &ED, Register ExtR);
     bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
@@ -730,21 +730,13 @@ bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const {
     }
     case MachineOperand::MO_ExternalSymbol:
       return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName);
-    case MachineOperand::MO_GlobalAddress: {
-      // Global values may not have names, so compare their positions
-      // in the parent module.
-      const Module &M = *V.GV->getParent();
-      auto FindPos = [&M] (const GlobalValue &V) {
-        unsigned P = 0;
-        for (const GlobalValue &T : M.global_values()) {
-          if (&T == &V)
-            return P;
-          P++;
-        }
-        llvm_unreachable("Global value not found in module");
-      };
-      return FindPos(*V.GV) < FindPos(*ER.V.GV);
-    }
+    case MachineOperand::MO_GlobalAddress:
+      // Do not use GUIDs, since they depend on the source path. Moving the
+      // source file to a different directory could cause different GUID
+      // values for a pair of given symbols. These symbols could then compare
+      // "less" in one directory, but "greater" in another.
+      assert(!V.GV->getName().empty() && !ER.V.GV->getName().empty());
+      return V.GV->getName() < ER.V.GV->getName();
     case MachineOperand::MO_BlockAddress: {
       const BasicBlock *ThisB = V.BA->getBasicBlock();
       const BasicBlock *OtherB = ER.V.BA->getBasicBlock();
@@ -796,6 +788,7 @@ HCE::ExtValue::operator MachineOperand() const {
       return MachineOperand::CreateCPI(V.ImmVal, Offset, TF);
     case MachineOperand::MO_JumpTableIndex:
       assert(Offset == 0);
+      return MachineOperand::CreateJTI(V.ImmVal, TF);
     default:
       llvm_unreachable("Unhandled kind");
  }
@@ -1215,12 +1208,19 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
       case Hexagon::S4_subaddi:       // (__: ## - Rs<<0)
         ED.Expr.Rs = MI.getOperand(OpNum+1);
         ED.Expr.Neg = true;
+        break;
       default:                        // (__: ## + __<<_)
         break;
     }
   }
 
   ED.UseMI = &MI;
+
+  // Ignore unnamed globals.
+  ExtRoot ER(ED.getOp());
+  if (ER.Kind == MachineOperand::MO_GlobalAddress)
+    if (ER.V.GV->getName().empty())
+      return;
   Extenders.push_back(ED);
 }
 
@@ -1243,9 +1243,13 @@ void HCE::collectInstr(MachineInstr &MI) {
 
 void HCE::collect(MachineFunction &MF) {
   Extenders.clear();
-  for (MachineBasicBlock &MBB : MF)
+  for (MachineBasicBlock &MBB : MF) {
+    // Skip unreachable blocks.
+    if (MBB.getNumber() == -1)
+      continue;
     for (MachineInstr &MI : MBB)
       collectInstr(MI);
+  }
 }
 
 void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
@@ -1470,7 +1474,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
 }
 
 void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
-      LocDefMap &Defs) {
+      LocDefList &Defs) {
   if (Refs.empty())
     return;
 
@@ -1517,7 +1521,7 @@ void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
     It = DomB->getFirstTerminator();
   }
   Loc DefLoc(DomB, It);
-  Defs.emplace(DefLoc, Refs);
+  Defs.emplace_back(DefLoc, Refs);
 }
 
 HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
@@ -1629,7 +1633,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
       else
         MIB.add(MachineOperand(ExtR));
     }
-    MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    MIB.cloneMemRefs(MI);
     MBB.erase(MI);
     return true;
   }
@@ -1680,7 +1684,7 @@ bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
     // Add the stored value for stores.
     if (MI.mayStore())
       MIB.add(getStoredValueOp(MI));
-    MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    MIB.cloneMemRefs(MI);
     MBB.erase(MI);
     return true;
   }
@@ -1715,6 +1719,15 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
 
     // Clamp Diff to the 16 bit range.
     int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768);
+    if (Diff > 32767) {
+      // Split Diff into two values: one that is close to min/max int16,
+      // and the other being the rest, and such that both have the same
+      // "alignment" as Diff.
+      uint32_t UD = Diff;
+      OffsetRange R = getOffsetRange(MI.getOperand(0));
+      uint32_t A = std::min<uint32_t>(R.Align, 1u << countTrailingZeros(UD));
+      D &= ~(A-1);
+    }
     BuildMI(MBB, At, dl, HII->get(IdxOpc))
       .add(MI.getOperand(0))
       .add(MachineOperand(ExtR))
@@ -1797,7 +1810,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
     // Add the stored value for stores.
     if (MI.mayStore())
       MIB.add(getStoredValueOp(MI));
-    MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    MIB.cloneMemRefs(MI);
     MBB.erase(MI);
     return true;
   }
@@ -1878,7 +1891,7 @@ bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {
 }
 
 bool HCE::replaceExtenders(const AssignmentMap &IMap) {
-  LocDefMap Defs;
+  LocDefList Defs;
   bool Changed = false;
 
   for (const std::pair<ExtenderInit,IndexList> &P : IMap) {
@@ -1931,6 +1944,11 @@ const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const {
 bool HCE::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
+  if (MF.getFunction().hasPersonalityFn()) {
+    LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF.getName()
+                      << " due to exception handling\n");
+    return false;
+  }
   LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
 
   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
@@ -1940,10 +1958,24 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) {
   AssignmentMap IMap;
 
   collect(MF);
-  llvm::sort(Extenders.begin(), Extenders.end(),
-    [](const ExtDesc &A, const ExtDesc &B) {
-      return ExtValue(A) < ExtValue(B);
-    });
+  llvm::sort(Extenders, [this](const ExtDesc &A, const ExtDesc &B) {
+    ExtValue VA(A), VB(B);
+    if (VA != VB)
+      return VA < VB;
+    const MachineInstr *MA = A.UseMI;
+    const MachineInstr *MB = B.UseMI;
+    if (MA == MB) {
+      // If it's the same instruction, compare operand numbers.
+      return A.OpNum < B.OpNum;
+    }
+
+    const MachineBasicBlock *BA = MA->getParent();
+    const MachineBasicBlock *BB = MB->getParent();
+    assert(BA->getNumber() != -1 && BB->getNumber() != -1);
+    if (BA != BB)
+      return BA->getNumber() < BB->getNumber();
+    return MDT->dominates(MA, MB);
+  });
 
   bool Changed = false;
   LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 8f22a71dc1f3..fa192391313e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2463,6 +2463,7 @@ APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX,
     case Hexagon::A4_cmpheqi:    // s8
     case Hexagon::C4_cmpneqi:   // s8
       Signed = true;
+      break;
     case Hexagon::A4_cmpbeqi:    // u8
       break;
     case Hexagon::C2_cmpgtui:      // u9
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index fccde96d8a32..28965b69e284 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -555,8 +555,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
     if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
       continue;
 
-    // Check that the two instructions are combinable. V4 allows more
-    // instructions to be merged into a combine.
+    // Check that the two instructions are combinable.
     // The order matters because in a A2_tfrsi we might can encode a int8 as
     // the hi reg operand but only a uint6 as the low reg operand.
     if ((IsI2LowReg && !areCombinableOperations(TRI, I1, *I2, AllowC64)) ||
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
index dc75f8f63400..dff2b2f471d0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -1,4 +1,4 @@
-//===- HexagonDepArch.h ---------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,12 +10,11 @@
 //===----------------------------------------------------------------------===//
 
 
-
 #ifndef HEXAGON_DEP_ARCH_H
 #define HEXAGON_DEP_ARCH_H
 namespace llvm {
 namespace Hexagon {
-enum class ArchEnum { V4,V5,V55,V60,V62,V65 };
+enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66 };
 } // namespace Hexagon
 } // namespace llvm;
 #endif // HEXAGON_DEP_ARCH_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 3594379aa841..f1aadae555c8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -1,4 +1,4 @@
-//===- HexagonDepArch.td --------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,7 +9,8 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
+def ArchV66: SubtargetFeature<"v66", "HexagonArchVersion", "Hexagon::ArchEnum::V66", "Enable Hexagon V66 architecture">;
+def HasV66 : Predicate<"HST->hasV66Ops()">, AssemblerPredicate<"ArchV66">;
 def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">;
 def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<"ArchV65">;
 def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">;
@@ -18,7 +19,5 @@ def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V
 def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
 def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
 def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
-def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">;
-def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">;
 def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
 def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h
new file mode 100644
index 000000000000..9f78412f45d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepDecoders.h
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+// clang-format off
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<4>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<14>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<8>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<7>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<12>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<3>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<13>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<6>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<9>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<5>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<6>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+// clang-format on
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index b27cdae81a28..9e3dea9f3e9b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -1,4 +1,4 @@
-//===- HexagonDepIICHVX.td ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,1849 +9,2549 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
-def tc_0317c6ca : InstrItinClass;
-def tc_1b93bdc6 : InstrItinClass;
-def tc_2171ebae : InstrItinClass;
-def tc_28978789 : InstrItinClass;
-def tc_29841470 : InstrItinClass;
-def tc_316c637c : InstrItinClass;
-def tc_354299ad : InstrItinClass;
-def tc_35e92f8e : InstrItinClass;
-def tc_38208312 : InstrItinClass;
-def tc_4105d6b5 : InstrItinClass;
-def tc_41f4b64e : InstrItinClass;
-def tc_41f99e1c : InstrItinClass;
-def tc_45453b98 : InstrItinClass;
-def tc_4e2a5159 : InstrItinClass;
-def tc_4f190ba3 : InstrItinClass;
-def tc_4fd8566e : InstrItinClass;
-def tc_51cd3aab : InstrItinClass;
-def tc_5a9fc4ec : InstrItinClass;
-def tc_5c03dc63 : InstrItinClass;
-def tc_5c120602 : InstrItinClass;
-def tc_5cbf490b : InstrItinClass;
-def tc_63e3d94c : InstrItinClass;
-def tc_644584f8 : InstrItinClass;
-def tc_66bb62ea : InstrItinClass;
-def tc_69b6dd20 : InstrItinClass;
-def tc_6b78cf13 : InstrItinClass;
-def tc_6fd9ad30 : InstrItinClass;
-def tc_71337255 : InstrItinClass;
-def tc_72ad7b54 : InstrItinClass;
-def tc_7474003e : InstrItinClass;
-def tc_77a4c701 : InstrItinClass;
-def tc_7c3f55c4 : InstrItinClass;
-def tc_7e9f581b : InstrItinClass;
-def tc_7fa82b08 : InstrItinClass;
-def tc_7fa8b40f : InstrItinClass;
-def tc_85d237e3 : InstrItinClass;
-def tc_8a6eb39a : InstrItinClass;
-def tc_8b6a873f : InstrItinClass;
-def tc_908a4c8c : InstrItinClass;
-def tc_9311da3f : InstrItinClass;
-def tc_94f43c04 : InstrItinClass;
-def tc_9777e6bf : InstrItinClass;
-def tc_97c165b9 : InstrItinClass;
-def tc_98733e9d : InstrItinClass;
-def tc_99093773 : InstrItinClass;
-def tc_9b9642a1 : InstrItinClass;
-def tc_9c267309 : InstrItinClass;
-def tc_a3127e12 : InstrItinClass;
-def tc_a4c9df3b : InstrItinClass;
-def tc_a807365d : InstrItinClass;
-def tc_aedb9f9e : InstrItinClass;
-def tc_b06ab583 : InstrItinClass;
-def tc_b712833a : InstrItinClass;
-def tc_b77635b4 : InstrItinClass;
-def tc_bbaf280e : InstrItinClass;
-def tc_bf142ae2 : InstrItinClass;
-def tc_bfe309d5 : InstrItinClass;
-def tc_c00bf9c9 : InstrItinClass;
-def tc_c4b515c5 : InstrItinClass;
-def tc_cbf6d1dc : InstrItinClass;
-def tc_cedf314b : InstrItinClass;
-def tc_d2cb81ea : InstrItinClass;
-def tc_d5090f3e : InstrItinClass;
-def tc_d642eff3 : InstrItinClass;
-def tc_d725e5b0 : InstrItinClass;
-def tc_d7bea0ec : InstrItinClass;
-def tc_d98f4d63 : InstrItinClass;
-def tc_da979fb3 : InstrItinClass;
-def tc_db5b9e2f : InstrItinClass;
-def tc_df54ad52 : InstrItinClass;
-def tc_e172d86a : InstrItinClass;
-def tc_e231aa4f : InstrItinClass;
-def tc_e3748cdf : InstrItinClass;
-def tc_e5053c8f : InstrItinClass;
-def tc_e6299d16 : InstrItinClass;
-def tc_eb669007 : InstrItinClass;
-def tc_ec58f88a : InstrItinClass;
-def tc_eda67dcd : InstrItinClass;
-def tc_ee927c0e : InstrItinClass;
-def tc_f3fc3f83 : InstrItinClass;
-def tc_fa99dc24 : InstrItinClass;
+def tc_04da405a : InstrItinClass;
+def tc_05058f6f : InstrItinClass;
+def tc_05ac6f98 : InstrItinClass;
+def tc_05ca8cfd : InstrItinClass;
+def tc_08a4f1b6 : InstrItinClass;
+def tc_0b04c6c7 : InstrItinClass;
+def tc_0ec46cf9 : InstrItinClass;
+def tc_131f1c81 : InstrItinClass;
+def tc_1381a97c : InstrItinClass;
+def tc_15fdf750 : InstrItinClass;
+def tc_16ff9ef8 : InstrItinClass;
+def tc_191381c1 : InstrItinClass;
+def tc_1ad8a370 : InstrItinClass;
+def tc_1ba8a0cd : InstrItinClass;
+def tc_20a4bbec : InstrItinClass;
+def tc_257f6f7c : InstrItinClass;
+def tc_26a377fe : InstrItinClass;
+def tc_2c745bb8 : InstrItinClass;
+def tc_2d4051cd : InstrItinClass;
+def tc_2e8f5f6e : InstrItinClass;
+def tc_309dbb4f : InstrItinClass;
+def tc_3904b926 : InstrItinClass;
+def tc_3aacf4a8 : InstrItinClass;
+def tc_3ad719fb : InstrItinClass;
+def tc_3c56e5ce : InstrItinClass;
+def tc_3ce09744 : InstrItinClass;
+def tc_3e2aaafc : InstrItinClass;
+def tc_447d9895 : InstrItinClass;
+def tc_453fe68d : InstrItinClass;
+def tc_46d6c3e0 : InstrItinClass;
+def tc_51d0ecc3 : InstrItinClass;
+def tc_52447ecc : InstrItinClass;
+def tc_540c3da3 : InstrItinClass;
+def tc_54a0dc47 : InstrItinClass;
+def tc_561aaa58 : InstrItinClass;
+def tc_56c4f9fe : InstrItinClass;
+def tc_56e64202 : InstrItinClass;
+def tc_58d21193 : InstrItinClass;
+def tc_5bf8afbb : InstrItinClass;
+def tc_61bf7c03 : InstrItinClass;
+def tc_649072c2 : InstrItinClass;
+def tc_660769f1 : InstrItinClass;
+def tc_663c80a7 : InstrItinClass;
+def tc_6942b6e0 : InstrItinClass;
+def tc_6e7fa133 : InstrItinClass;
+def tc_71646d06 : InstrItinClass;
+def tc_7177e272 : InstrItinClass;
+def tc_718b5c53 : InstrItinClass;
+def tc_7273323b : InstrItinClass;
+def tc_7417e785 : InstrItinClass;
+def tc_767c4e9d : InstrItinClass;
+def tc_7e6a3e89 : InstrItinClass;
+def tc_8772086c : InstrItinClass;
+def tc_87adc037 : InstrItinClass;
+def tc_8e420e4d : InstrItinClass;
+def tc_90bcc1db : InstrItinClass;
+def tc_933f2b39 : InstrItinClass;
+def tc_946013d8 : InstrItinClass;
+def tc_9d1dc972 : InstrItinClass;
+def tc_9f363d21 : InstrItinClass;
+def tc_a02a10a8 : InstrItinClass;
+def tc_a0dbea28 : InstrItinClass;
+def tc_a7e6707d : InstrItinClass;
+def tc_ab23f776 : InstrItinClass;
+def tc_abe8c3b2 : InstrItinClass;
+def tc_ac4046bc : InstrItinClass;
+def tc_af25efd9 : InstrItinClass;
+def tc_b091f1c6 : InstrItinClass;
+def tc_b28e51aa : InstrItinClass;
+def tc_b4416217 : InstrItinClass;
+def tc_b9db8205 : InstrItinClass;
+def tc_c0749f3c : InstrItinClass;
+def tc_c127de3a : InstrItinClass;
+def tc_c4edf264 : InstrItinClass;
+def tc_c5dba46e : InstrItinClass;
+def tc_c7039829 : InstrItinClass;
+def tc_cd94bfe0 : InstrItinClass;
+def tc_d8287c14 : InstrItinClass;
+def tc_db5555f3 : InstrItinClass;
+def tc_dd5b0695 : InstrItinClass;
+def tc_df80eeb0 : InstrItinClass;
+def tc_e2d2e9e5 : InstrItinClass;
+def tc_e35c1e93 : InstrItinClass;
+def tc_e3f68a46 : InstrItinClass;
+def tc_e675c45a : InstrItinClass;
+def tc_e699ae41 : InstrItinClass;
+def tc_e8797b98 : InstrItinClass;
+def tc_e99d4c2e : InstrItinClass;
+def tc_f1de44ef : InstrItinClass;
+def tc_f21e8abb : InstrItinClass;
+def tc_fd7610da : InstrItinClass;
 
 class DepHVXItinV55 {
   list<InstrItinData> DepHVXItinV55_list = [
-    InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+    InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3, 2],
-      [HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_29841470, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [],
+      []>,
+
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_38208312, /*SLOT01,LOAD*/
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 2],
-      [HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
-      [HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_71337255, /*SLOT0123,VA*/
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
       [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
       [HVX_FWD]>,
 
-    InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_8772086c, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9311da3f, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
       [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
-      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
-      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9b9642a1, /*SLOT0123,VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
       [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_ALL]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
-      [InstrStage<1, [SLOT1], 0>,
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_ALL]>], [3],
       [HVX_FWD]>,
 
-    InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+    InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+  ];
+}
 
-    InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
+class DepHVXItinV60 {
+  list<InstrItinData> DepHVXItinV60_list = [
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
+    InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
       [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_ALL]>], [],
       []>,
 
-    InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>
-  ];
-}
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-class DepHVXItinV60 {
-  list<InstrItinData> DepHVXItinV60_list = [
-    InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3, 2],
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
       [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_29841470, /*SLOT0,STORE*/
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+      [HVX_FWD]>,
 
-    InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_38208312, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 2],
+       InstrStage<1, [CVI_ALL]>], [3, 2],
       [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+    InstrItinData <tc_8772086c, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
       [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
       [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_71337255, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+       InstrStage<1, [CVI_ALL]>], [3],
       [HVX_FWD]>,
 
-    InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_9311da3f, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
-      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+  ];
+}
 
-    InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+class DepHVXItinV62 {
+  list<InstrItinData> DepHVXItinV62_list = [
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
+    InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
       [InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
-      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9b9642a1, /*SLOT0123,VS*/
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [],
+      []>,
+
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [2],
-      [Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3],
-      [HVX_FWD]>,
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [],
-      []>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>
-  ];
-}
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-class DepHVXItinV62 {
-  list<InstrItinData> DepHVXItinV62_list = [
-    InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3, 2],
-      [HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_29841470, /*SLOT0,STORE*/
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+      [HVX_FWD]>,
 
-    InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_38208312, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 2],
+       InstrStage<1, [CVI_ALL]>], [3, 2],
       [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+    InstrItinData <tc_8772086c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
       [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
       [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_71337255, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+       InstrStage<1, [CVI_ALL]>], [3],
       [HVX_FWD]>,
 
-    InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_9311da3f, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
-      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+  ];
+}
 
-    InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+class DepHVXItinV65 {
+  list<InstrItinData> DepHVXItinV65_list = [
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
+    InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
       [InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
-      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9b9642a1, /*SLOT0123,VA*/
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [],
+      []>,
+
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [2],
-      [Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3],
-      [HVX_FWD]>,
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
       [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
       [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [],
-      []>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+      [HVX_FWD]>,
+
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3, 2],
+      [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>
-  ];
-}
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-class DepHVXItinV65 {
-  list<InstrItinData> DepHVXItinV65_list = [
-    InstrItinData <tc_0317c6ca, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_1b93bdc6, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2171ebae, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_28978789, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3, 2],
-      [HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_29841470, /*SLOT0,STORE*/
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_316c637c, /*SLOT0123,VA_DV*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_354299ad, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_35e92f8e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_38208312, /*SLOT01,LOAD*/
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4105d6b5, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 2],
-      [HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41f4b64e, /*SLOT0123,VS*/
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_41f99e1c, /*SLOT23,VX_DV*/
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_45453b98, /*SLOT0123,VS*/
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ALL]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_4e2a5159, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4f190ba3, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_4fd8566e, /*SLOT0,NOSLOT1,LOAD,VP*/
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_51cd3aab, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5a9fc4ec, /*SLOT0123,VA*/
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5c03dc63, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5c120602, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_5cbf490b, /*SLOT01,LOAD,VA*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_63e3d94c, /*SLOT1,LOAD,VA*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_644584f8, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ALL]>], [3],
+      [HVX_FWD]>,
+
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_66bb62ea, /*SLOT1,LOAD,VA*/
+    InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
       [InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_LD], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_69b6dd20, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_6b78cf13, /*SLOT23,VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
-      [HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fd9ad30, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+  ];
+}
 
-    InstrItinData <tc_71337255, /*SLOT0123,VA*/
+class DepHVXItinV66 {
+  list<InstrItinData> DepHVXItinV66_list = [
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+       InstrStage<1, [CVI_XLSHF]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_72ad7b54, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7474003e, /*SLOT2,VX_DV*/
-      [InstrStage<1, [SLOT2], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_77a4c701, /*SLOT01,LOAD*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7c3f55c4, /*SLOT23,VX_DV*/
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
       [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7e9f581b, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa82b08, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_7fa8b40f, /*SLOT0123,VS*/
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [],
+      []>,
+
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_85d237e3, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_8a6eb39a, /*SLOT0123,VA_DV*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
-      [HVX_FWD]>,
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8b6a873f, /*SLOT0,STORE*/
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_908a4c8c, /*SLOT23,VX*/
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9311da3f, /*SLOT23,VX*/
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_94f43c04, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9777e6bf, /*SLOT0,VA*/
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
-      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_97c165b9, /*SLOT0123,VA_DV*/
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_98733e9d, /*SLOT1,LOAD,VA_DV*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_99093773, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
-      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9b9642a1, /*SLOT0123,VA*/
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_9c267309, /*SLOT01,LOAD*/
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
        InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a3127e12, /*SLOT0123,VA*/
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a4c9df3b, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_a807365d, /*SLOT23,VS_VX*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+      [HVX_FWD]>,
 
-    InstrItinData <tc_aedb9f9e, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_b06ab583, /*SLOT0123,VA*/
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
-      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b712833a, /*SLOT01,LOAD,VA*/
-      [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3, 2],
+      [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77635b4, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [2],
-      [Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_bbaf280e, /*SLOT0123,VA*/
+    InstrItinData <tc_8772086c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
       [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_bf142ae2, /*SLOT0123,VP*/
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bfe309d5, /*SLOT1,LOAD,VA_DV*/
-      [InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_c00bf9c9, /*SLOT0123,VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c4b515c5, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cbf6d1dc, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_cedf314b, /*SLOT0123,4SLOT*/
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [3],
-      [HVX_FWD]>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d2cb81ea, /*SLOT0123,VS*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_SHIFT]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d5090f3e, /*SLOT0,STORE*/
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d642eff3, /*SLOT0,NOSLOT1,STORE,VP*/
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [SLOT1], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d725e5b0, /*SLOT23,VX*/
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d7bea0ec, /*SLOT0123,VP_VS*/
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLSHF]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_d98f4d63, /*SLOT23,VX_DV*/
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
       [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_da979fb3, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_db5b9e2f, /*SLOT0,STORE*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
-      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
-
-    InstrItinData <tc_df54ad52, /*SLOT0,STORE,VA*/
-      [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
-      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e172d86a, /*SLOT23,VX_DV*/
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e231aa4f, /*SLOT23,VX*/
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e3748cdf, /*SLOT0,STORE,VA*/
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
       [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e5053c8f, /*SLOT0123,4SLOT*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_ALL]>], [],
-      []>,
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_e6299d16, /*SLOT0123,VP*/
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5],
-      [HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_eb669007, /*SLOT01,LOAD,VA*/
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
       [InstrStage<1, [SLOT0, SLOT1], 0>,
-       InstrStage<1, [CVI_LD], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
-      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_ec58f88a, /*SLOT0,STORE,VA_DV*/
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
       [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
        InstrStage<1, [CVI_ST], 0>,
-       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
-      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_eda67dcd, /*SLOT23,VX_DV*/
-      [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3],
+      [HVX_FWD]>,
 
-    InstrItinData <tc_ee927c0e, /*SLOT23,VS_VX*/
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
-       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
-      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3fc3f83, /*SLOT0123,VP*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
-       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
 
-    InstrItinData <tc_fa99dc24, /*SLOT2,VX_DV*/
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
       [InstrStage<1, [SLOT2], 0>,
        InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
-      [HVX_FWD, HVX_FWD, Hex_FWD]>
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
   ];
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 931504b56ccb..9da25952fb1c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -1,4 +1,4 @@
-//===- HexagonDepIICScalar.td ---------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,3087 +9,3789 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
-def tc_00afc57e : InstrItinClass;
-def tc_00e7c26e : InstrItinClass;
-def tc_03220ffa : InstrItinClass;
-def tc_038a1342 : InstrItinClass;
-def tc_04c9decc : InstrItinClass;
-def tc_05b6c987 : InstrItinClass;
-def tc_0cd51c76 : InstrItinClass;
-def tc_0dc560de : InstrItinClass;
-def tc_0fc1ae07 : InstrItinClass;
-def tc_10b97e27 : InstrItinClass;
-def tc_1372bca1 : InstrItinClass;
-def tc_14cd4cfa : InstrItinClass;
-def tc_15411484 : InstrItinClass;
-def tc_16d0d8d5 : InstrItinClass;
-def tc_181af5d0 : InstrItinClass;
-def tc_1853ea6d : InstrItinClass;
-def tc_1b82a277 : InstrItinClass;
-def tc_1b9c9ee5 : InstrItinClass;
-def tc_1d5a38a8 : InstrItinClass;
-def tc_1e856f58 : InstrItinClass;
-def tc_234a11a5 : InstrItinClass;
-def tc_238d91d2 : InstrItinClass;
-def tc_29175780 : InstrItinClass;
-def tc_2a160009 : InstrItinClass;
-def tc_2b2f4060 : InstrItinClass;
-def tc_2b6f77c6 : InstrItinClass;
-def tc_2f185f5c : InstrItinClass;
-def tc_2fc0c436 : InstrItinClass;
-def tc_351fed2d : InstrItinClass;
-def tc_3669266a : InstrItinClass;
-def tc_367f7f3d : InstrItinClass;
-def tc_36c68ad1 : InstrItinClass;
-def tc_395dc00f : InstrItinClass;
-def tc_3bc2c5d3 : InstrItinClass;
-def tc_3cb8ea06 : InstrItinClass;
-def tc_3d04548d : InstrItinClass;
-def tc_3da80ba5 : InstrItinClass;
-def tc_3e07fb90 : InstrItinClass;
-def tc_41d5298e : InstrItinClass;
-def tc_4403ca65 : InstrItinClass;
-def tc_44126683 : InstrItinClass;
-def tc_452f85af : InstrItinClass;
-def tc_481e5e5c : InstrItinClass;
-def tc_49eb22c8 : InstrItinClass;
-def tc_4ca572d4 : InstrItinClass;
-def tc_4d9914c9 : InstrItinClass;
-def tc_4d99bca9 : InstrItinClass;
-def tc_4f7cd700 : InstrItinClass;
-def tc_513bef45 : InstrItinClass;
-def tc_51b866be : InstrItinClass;
-def tc_523fcf30 : InstrItinClass;
-def tc_5274e61a : InstrItinClass;
-def tc_52d7bbea : InstrItinClass;
-def tc_53bc8a6a : InstrItinClass;
-def tc_53bdb2f6 : InstrItinClass;
-def tc_540fdfbc : InstrItinClass;
-def tc_55050d58 : InstrItinClass;
-def tc_57288781 : InstrItinClass;
-def tc_594ab548 : InstrItinClass;
-def tc_59a01ead : InstrItinClass;
-def tc_5acef64a : InstrItinClass;
-def tc_5ba5997d : InstrItinClass;
-def tc_5eb851fc : InstrItinClass;
-def tc_5f6847a1 : InstrItinClass;
-def tc_60571023 : InstrItinClass;
-def tc_609d2efe : InstrItinClass;
-def tc_63fe3df7 : InstrItinClass;
-def tc_66888ded : InstrItinClass;
-def tc_6792d5ff : InstrItinClass;
-def tc_681a2300 : InstrItinClass;
-def tc_68cb12ce : InstrItinClass;
-def tc_6aa5711a : InstrItinClass;
-def tc_6ac37025 : InstrItinClass;
-def tc_6ebb4a12 : InstrItinClass;
-def tc_6efc556e : InstrItinClass;
-def tc_6fa4db47 : InstrItinClass;
-def tc_73043bf4 : InstrItinClass;
-def tc_746baa8e : InstrItinClass;
-def tc_74e47fd9 : InstrItinClass;
-def tc_7934b9df : InstrItinClass;
-def tc_7a830544 : InstrItinClass;
-def tc_7f881c76 : InstrItinClass;
-def tc_84df2cd3 : InstrItinClass;
-def tc_855b0b61 : InstrItinClass;
-def tc_87735c3b : InstrItinClass;
-def tc_897d1a9d : InstrItinClass;
-def tc_8b15472a : InstrItinClass;
-def tc_8fd5f294 : InstrItinClass;
-def tc_8fe6b782 : InstrItinClass;
-def tc_90f3e30c : InstrItinClass;
-def tc_976ddc4f : InstrItinClass;
-def tc_97743097 : InstrItinClass;
-def tc_994333cd : InstrItinClass;
-def tc_999d32db : InstrItinClass;
-def tc_99be14ca : InstrItinClass;
-def tc_9c00ce8d : InstrItinClass;
-def tc_9c98e8af : InstrItinClass;
-def tc_9d5941c7 : InstrItinClass;
-def tc_9ef61e5c : InstrItinClass;
-def tc_9faf76ae : InstrItinClass;
-def tc_9fdb5406 : InstrItinClass;
-def tc_a21dc435 : InstrItinClass;
-def tc_a27582fa : InstrItinClass;
-def tc_a46f0df5 : InstrItinClass;
-def tc_a788683e : InstrItinClass;
-def tc_a8acdac0 : InstrItinClass;
-def tc_a904d137 : InstrItinClass;
-def tc_adb14c66 : InstrItinClass;
-def tc_b13761ae : InstrItinClass;
-def tc_b166348b : InstrItinClass;
-def tc_b44c6e2a : InstrItinClass;
-def tc_b77c481f : InstrItinClass;
-def tc_b7dd427e : InstrItinClass;
-def tc_b9488031 : InstrItinClass;
-def tc_b9c0b731 : InstrItinClass;
-def tc_b9c4623f : InstrItinClass;
-def tc_bad2bcaf : InstrItinClass;
-def tc_bcc96cee : InstrItinClass;
-def tc_bde7aaf4 : InstrItinClass;
-def tc_be706f30 : InstrItinClass;
-def tc_c2f7d806 : InstrItinClass;
-def tc_c5e2426d : InstrItinClass;
-def tc_c6aa82f7 : InstrItinClass;
-def tc_c6ce9b3f : InstrItinClass;
-def tc_c6ebf8dd : InstrItinClass;
-def tc_c74f796f : InstrItinClass;
-def tc_c82dc1ff : InstrItinClass;
-def tc_caaebcba : InstrItinClass;
-def tc_cd7374a0 : InstrItinClass;
-def tc_cde8b071 : InstrItinClass;
-def tc_cf47a43f : InstrItinClass;
-def tc_cf59f215 : InstrItinClass;
-def tc_d088982c : InstrItinClass;
-def tc_d1090e34 : InstrItinClass;
-def tc_d24b2d85 : InstrItinClass;
-def tc_d580173f : InstrItinClass;
-def tc_d6bf0472 : InstrItinClass;
-def tc_d9709180 : InstrItinClass;
-def tc_d9f95eef : InstrItinClass;
-def tc_daa058fa : InstrItinClass;
-def tc_dbdffe3d : InstrItinClass;
-def tc_e0739b8c : InstrItinClass;
-def tc_e1e99bfa : InstrItinClass;
-def tc_e216a5db : InstrItinClass;
-def tc_e421e012 : InstrItinClass;
-def tc_e7624c08 : InstrItinClass;
-def tc_e7d02c66 : InstrItinClass;
-def tc_e913dc32 : InstrItinClass;
-def tc_e9c822f7 : InstrItinClass;
-def tc_e9fae2d6 : InstrItinClass;
-def tc_ef52ed71 : InstrItinClass;
-def tc_ef84f62f : InstrItinClass;
-def tc_f2704b9a : InstrItinClass;
-def tc_f3eaa14b : InstrItinClass;
-def tc_f47d212f : InstrItinClass;
-def tc_f49e76f4 : InstrItinClass;
-def tc_f7dd9c9f : InstrItinClass;
-def tc_f86c328a : InstrItinClass;
-def tc_f8eeed7a : InstrItinClass;
-def tc_fcab4871 : InstrItinClass;
-def tc_ff9ee76e : InstrItinClass;
-
-class DepScalarItinV4 {
-  list<InstrItinData> DepScalarItinV4_list = [
-    InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_181af5d0, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_3669266a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_367f7f3d, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_36c68ad1, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_395dc00f, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3bc2c5d3, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3cb8ea06, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_3d04548d, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3da80ba5, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3e07fb90, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_41d5298e, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_4403ca65, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_44126683, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_452f85af, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_481e5e5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_49eb22c8, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_4ca572d4, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_4d9914c9, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_4d99bca9, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_4f7cd700, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_513bef45, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_51b866be, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_681a2300, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_68cb12ce, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_6aa5711a, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_7934b9df, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_9c98e8af, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_9d5941c7, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_9ef61e5c, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_9faf76ae, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_9fdb5406, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_a21dc435, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_a27582fa, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_a46f0df5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_a788683e, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_a8acdac0, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_a904d137, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_adb14c66, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_b9c0b731, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c5e2426d, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_c6aa82f7, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c6ce9b3f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c6ebf8dd, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c74f796f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c82dc1ff, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_caaebcba, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_cd7374a0, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_cde8b071, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_cf47a43f, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_cf59f215, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_d088982c, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_d1090e34, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_d24b2d85, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_d580173f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_d6bf0472, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_d9709180, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_d9f95eef, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_fcab4871, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_ff9ee76e, [InstrStage<1, [SLOT0]>]>  ];
-}
+def tc_002cb246 : InstrItinClass;
+def tc_0371abea : InstrItinClass;
+def tc_05c070ec : InstrItinClass;
+def tc_05d3a09b : InstrItinClass;
+def tc_0663f615 : InstrItinClass;
+def tc_096199d3 : InstrItinClass;
+def tc_0a705168 : InstrItinClass;
+def tc_0ae0825c : InstrItinClass;
+def tc_0b2be201 : InstrItinClass;
+def tc_0d8f5752 : InstrItinClass;
+def tc_13bfbcf9 : InstrItinClass;
+def tc_14b272fa : InstrItinClass;
+def tc_14b5c689 : InstrItinClass;
+def tc_15aa71c5 : InstrItinClass;
+def tc_174516e8 : InstrItinClass;
+def tc_17e0d2cd : InstrItinClass;
+def tc_1a2fd869 : InstrItinClass;
+def tc_1ad90acd : InstrItinClass;
+def tc_1ae57e39 : InstrItinClass;
+def tc_1b6f7cec : InstrItinClass;
+def tc_1c4528a2 : InstrItinClass;
+def tc_1c80410a : InstrItinClass;
+def tc_1d81e60e : InstrItinClass;
+def tc_1fc97744 : InstrItinClass;
+def tc_20cdee80 : InstrItinClass;
+def tc_2332b92e : InstrItinClass;
+def tc_24b66c99 : InstrItinClass;
+def tc_25a78932 : InstrItinClass;
+def tc_2b8da4c2 : InstrItinClass;
+def tc_2eabeebe : InstrItinClass;
+def tc_2f7c551d : InstrItinClass;
+def tc_2ff964b4 : InstrItinClass;
+def tc_30b9bb4a : InstrItinClass;
+def tc_32779c6f : InstrItinClass;
+def tc_36153880 : InstrItinClass;
+def tc_362c6592 : InstrItinClass;
+def tc_3962fa26 : InstrItinClass;
+def tc_39dfefe8 : InstrItinClass;
+def tc_3a867367 : InstrItinClass;
+def tc_3b470976 : InstrItinClass;
+def tc_3b5b7ef9 : InstrItinClass;
+def tc_3bd75825 : InstrItinClass;
+def tc_3c76b0ff : InstrItinClass;
+def tc_3d495a39 : InstrItinClass;
+def tc_40116ca8 : InstrItinClass;
+def tc_434c8e1e : InstrItinClass;
+def tc_4414d8b1 : InstrItinClass;
+def tc_44d3da28 : InstrItinClass;
+def tc_4560740b : InstrItinClass;
+def tc_4837eefb : InstrItinClass;
+def tc_49a8207d : InstrItinClass;
+def tc_4ae7b58b : InstrItinClass;
+def tc_4b68bce4 : InstrItinClass;
+def tc_4c5ba658 : InstrItinClass;
+def tc_4d5fa3a1 : InstrItinClass;
+def tc_53559e35 : InstrItinClass;
+def tc_56336eb0 : InstrItinClass;
+def tc_56f114f4 : InstrItinClass;
+def tc_57890846 : InstrItinClass;
+def tc_5a2711e5 : InstrItinClass;
+def tc_5abb5e3f : InstrItinClass;
+def tc_5aee39f7 : InstrItinClass;
+def tc_5b54b33f : InstrItinClass;
+def tc_5b7c0967 : InstrItinClass;
+def tc_5bf126a6 : InstrItinClass;
+def tc_5d7f5414 : InstrItinClass;
+def tc_5ef37dc4 : InstrItinClass;
+def tc_6132ba3d : InstrItinClass;
+def tc_61830035 : InstrItinClass;
+def tc_640086b5 : InstrItinClass;
+def tc_643b4717 : InstrItinClass;
+def tc_67435e81 : InstrItinClass;
+def tc_675e4897 : InstrItinClass;
+def tc_679309b8 : InstrItinClass;
+def tc_6b25e783 : InstrItinClass;
+def tc_703e822c : InstrItinClass;
+def tc_7186d325 : InstrItinClass;
+def tc_7646c131 : InstrItinClass;
+def tc_76851da1 : InstrItinClass;
+def tc_779080bf : InstrItinClass;
+def tc_784490da : InstrItinClass;
+def tc_785f65a7 : InstrItinClass;
+def tc_7a91e76a : InstrItinClass;
+def tc_838b34ea : InstrItinClass;
+def tc_85c9c08f : InstrItinClass;
+def tc_85d5d03f : InstrItinClass;
+def tc_862b3e70 : InstrItinClass;
+def tc_88b4f13d : InstrItinClass;
+def tc_89e94ad3 : InstrItinClass;
+def tc_8b121f4a : InstrItinClass;
+def tc_8b3e402a : InstrItinClass;
+def tc_8c945be0 : InstrItinClass;
+def tc_8c99de45 : InstrItinClass;
+def tc_8d9d0154 : InstrItinClass;
+def tc_8fb7ab1b : InstrItinClass;
+def tc_9461ff31 : InstrItinClass;
+def tc_946df596 : InstrItinClass;
+def tc_9ad9998f : InstrItinClass;
+def tc_9bfd761f : InstrItinClass;
+def tc_9c3ecd83 : InstrItinClass;
+def tc_9ca930f7 : InstrItinClass;
+def tc_9da59d12 : InstrItinClass;
+def tc_9debc299 : InstrItinClass;
+def tc_9e313203 : InstrItinClass;
+def tc_9fc3dae0 : InstrItinClass;
+def tc_a1123dda : InstrItinClass;
+def tc_a1c00888 : InstrItinClass;
+def tc_a58fd5cc : InstrItinClass;
+def tc_a5d4aeec : InstrItinClass;
+def tc_a6b1eca9 : InstrItinClass;
+def tc_a813cf9a : InstrItinClass;
+def tc_a9d88b22 : InstrItinClass;
+def tc_ae53734a : InstrItinClass;
+def tc_b31c2e97 : InstrItinClass;
+def tc_b343892a : InstrItinClass;
+def tc_b43e7930 : InstrItinClass;
+def tc_b4407292 : InstrItinClass;
+def tc_b44ecf75 : InstrItinClass;
+def tc_b4b5c03a : InstrItinClass;
+def tc_b51dc29a : InstrItinClass;
+def tc_b83e6d73 : InstrItinClass;
+def tc_b857bf4e : InstrItinClass;
+def tc_b8bffe55 : InstrItinClass;
+def tc_b90a29b1 : InstrItinClass;
+def tc_b9272d6c : InstrItinClass;
+def tc_b9e09e03 : InstrItinClass;
+def tc_bab0eed9 : InstrItinClass;
+def tc_bafaade3 : InstrItinClass;
+def tc_bcf98408 : InstrItinClass;
+def tc_bd8382d1 : InstrItinClass;
+def tc_bdceeac1 : InstrItinClass;
+def tc_be9602ff : InstrItinClass;
+def tc_bf061958 : InstrItinClass;
+def tc_bfec0f01 : InstrItinClass;
+def tc_c4db48cb : InstrItinClass;
+def tc_c4f596e3 : InstrItinClass;
+def tc_c79a189f : InstrItinClass;
+def tc_c8ce0b5c : InstrItinClass;
+def tc_cd374165 : InstrItinClass;
+def tc_cf8126ae : InstrItinClass;
+def tc_cfd8378a : InstrItinClass;
+def tc_d08ee0f4 : InstrItinClass;
+def tc_d1aa9eaa : InstrItinClass;
+def tc_d2e63d61 : InstrItinClass;
+def tc_d5b7b0c1 : InstrItinClass;
+def tc_d5c0729a : InstrItinClass;
+def tc_d63f638c : InstrItinClass;
+def tc_d65dbf51 : InstrItinClass;
+def tc_d773585a : InstrItinClass;
+def tc_d9d43ecb : InstrItinClass;
+def tc_da4a37ed : InstrItinClass;
+def tc_da97ee82 : InstrItinClass;
+def tc_db2bce9c : InstrItinClass;
+def tc_de4df740 : InstrItinClass;
+def tc_de554571 : InstrItinClass;
+def tc_df3319ed : InstrItinClass;
+def tc_e06f432a : InstrItinClass;
+def tc_e4a7f9f0 : InstrItinClass;
+def tc_e4b3cb20 : InstrItinClass;
+def tc_e78647bd : InstrItinClass;
+def tc_e86aa961 : InstrItinClass;
+def tc_e93a3d71 : InstrItinClass;
+def tc_e95795ec : InstrItinClass;
+def tc_e9f3243f : InstrItinClass;
+def tc_f429765c : InstrItinClass;
+def tc_f675fee8 : InstrItinClass;
+def tc_f8e23f0b : InstrItinClass;
+def tc_f9058dd7 : InstrItinClass;
+def tc_fc3999b4 : InstrItinClass;
+def tc_fcc3ddf9 : InstrItinClass;
+def tc_fe211424 : InstrItinClass;
 
 class DepScalarItinV5 {
   list<InstrItinData> DepScalarItinV5_list = [
-    InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_181af5d0, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_3669266a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_367f7f3d, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_36c68ad1, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_395dc00f, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3bc2c5d3, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3cb8ea06, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_3d04548d, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3da80ba5, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_3e07fb90, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_41d5298e, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_4403ca65, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_44126683, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_452f85af, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_481e5e5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_49eb22c8, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_4ca572d4, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_4d9914c9, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_4d99bca9, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_4f7cd700, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_513bef45, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_51b866be, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_681a2300, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_68cb12ce, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_6aa5711a, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_7934b9df, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_9c98e8af, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_9d5941c7, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_9ef61e5c, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_9faf76ae, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_9fdb5406, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_a21dc435, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_a27582fa, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_a46f0df5, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_a788683e, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_a8acdac0, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_a904d137, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_adb14c66, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_b9c0b731, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c5e2426d, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_c6aa82f7, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c6ce9b3f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c6ebf8dd, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c74f796f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_c82dc1ff, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_caaebcba, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_cd7374a0, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_cde8b071, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_cf47a43f, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_cf59f215, [InstrStage<1, [SLOT3]>]>,
-    InstrItinData <tc_d088982c, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_d1090e34, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_d24b2d85, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_d580173f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_d6bf0472, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-    InstrItinData <tc_d9709180, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_d9f95eef, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
-    InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
-    InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
-    InstrItinData <tc_fcab4871, [InstrStage<1, [SLOT0]>]>,
-    InstrItinData <tc_ff9ee76e, [InstrStage<1, [SLOT0]>]>  ];
+    InstrItinData <tc_002cb246, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_0371abea, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_05c070ec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_05d3a09b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_0663f615, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_096199d3, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_0a705168, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_0ae0825c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_0b2be201, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_0d8f5752, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_13bfbcf9, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_14b272fa, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_14b5c689, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_15aa71c5, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_174516e8, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_17e0d2cd, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_1a2fd869, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_1ad90acd, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_1ae57e39, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_1b6f7cec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_1c4528a2, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_1c80410a, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_1d81e60e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_1fc97744, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_20cdee80, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_2332b92e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_24b66c99, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_25a78932, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_2b8da4c2, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_2eabeebe, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_2f7c551d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_2ff964b4, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_30b9bb4a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_32779c6f, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_36153880, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_362c6592, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_3962fa26, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_39dfefe8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_3a867367, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_3b470976, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_3b5b7ef9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_3bd75825, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_3c76b0ff, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_3d495a39, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_40116ca8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_434c8e1e, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_4414d8b1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_44d3da28, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_4560740b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_4837eefb, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_49a8207d, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_4ae7b58b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_4b68bce4, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_4c5ba658, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_4d5fa3a1, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_53559e35, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_56336eb0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_56f114f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_57890846, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_5a2711e5, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_5abb5e3f, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_5aee39f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_5b54b33f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_5b7c0967, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_5bf126a6, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_5d7f5414, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_5ef37dc4, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_6132ba3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_61830035, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_640086b5, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_643b4717, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_67435e81, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_675e4897, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_679309b8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_6b25e783, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_703e822c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_7186d325, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_7646c131, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_76851da1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_779080bf, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_784490da, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_785f65a7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_7a91e76a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_838b34ea, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_85c9c08f, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_85d5d03f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_862b3e70, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_88b4f13d, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_89e94ad3, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_8b121f4a, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_8b3e402a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_8c945be0, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_8c99de45, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_8d9d0154, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_8fb7ab1b, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_9461ff31, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_946df596, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_9ad9998f, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_9bfd761f, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_9c3ecd83, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_9ca930f7, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_9da59d12, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_9debc299, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_9e313203, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_9fc3dae0, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_a1123dda, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_a1c00888, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_a58fd5cc, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_a5d4aeec, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_a6b1eca9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_a813cf9a, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_a9d88b22, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_ae53734a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_b31c2e97, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_b343892a, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_b43e7930, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_b4407292, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_b44ecf75, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_b4b5c03a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_b51dc29a, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_b83e6d73, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_b857bf4e, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_b8bffe55, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_b90a29b1, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_b9272d6c, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_b9e09e03, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_bab0eed9, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_bafaade3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_bcf98408, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_bd8382d1, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_bdceeac1, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_be9602ff, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_bf061958, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_bfec0f01, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_c4db48cb, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_c4f596e3, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_c79a189f, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_c8ce0b5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_cd374165, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_cf8126ae, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_cfd8378a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_d08ee0f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_d1aa9eaa, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_d2e63d61, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_d5b7b0c1, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_d5c0729a, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_d63f638c, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_d65dbf51, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_d773585a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_d9d43ecb, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_da4a37ed, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_da97ee82, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_db2bce9c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_de4df740, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_de554571, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_df3319ed, [InstrStage<1, [SLOT3]>]>,
+    InstrItinData <tc_e06f432a, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_e4a7f9f0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_e4b3cb20, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_e78647bd, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_e86aa961, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_e93a3d71, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_e95795ec, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_e9f3243f, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_f429765c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_f675fee8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_f8e23f0b, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_f9058dd7, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_fc3999b4, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_fcc3ddf9, [InstrStage<1, [SLOT0]>]>,
+    InstrItinData <tc_fe211424, [InstrStage<1, [SLOT0]>]>  ];
 }
 
 class DepScalarItinV55 {
   list<InstrItinData> DepScalarItinV55_list = [
-    InstrItinData <tc_00afc57e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_002cb246, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0371abea, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_00e7c26e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_05c070ec, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_03220ffa, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_05d3a09b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_038a1342, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0663f615, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_04c9decc, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_096199d3, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_05b6c987, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+    InstrItinData <tc_0a705168, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0cd51c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0ae0825c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0dc560de, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+    InstrItinData <tc_0b2be201, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0fc1ae07, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_0d8f5752, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_10b97e27, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_13bfbcf9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14b272fa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14b5c689, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1372bca1, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_15aa71c5, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_14cd4cfa, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2],
+    InstrItinData <tc_174516e8, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_15411484, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_17e0d2cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_16d0d8d5, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+    InstrItinData <tc_1a2fd869, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_181af5d0, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [3, 1],
+    InstrItinData <tc_1ad90acd, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1853ea6d, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+    InstrItinData <tc_1ae57e39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1b82a277, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
-      [Hex_FWD]>,
-
-    InstrItinData <tc_1b9c9ee5, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1b6f7cec, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_1d5a38a8, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1c4528a2, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1e856f58, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+    InstrItinData <tc_1c80410a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_234a11a5, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_1d81e60e, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_238d91d2, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+    InstrItinData <tc_1fc97744, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_29175780, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_20cdee80, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2a160009, /*tc_2early*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_2332b92e, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b2f4060, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_24b66c99, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b6f77c6, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_25a78932, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2b8da4c2, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2f185f5c, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_2eabeebe, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_2fc0c436, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+    InstrItinData <tc_2f7c551d, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2ff964b4, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_351fed2d, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_30b9bb4a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3669266a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_32779c6f, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_367f7f3d, /*tc_st*/
+    InstrItinData <tc_36153880, /*tc_3stall*/
       [InstrStage<1, [SLOT0]>], [],
       []>,
 
-    InstrItinData <tc_36c68ad1, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [],
-      []>,
-
-    InstrItinData <tc_395dc00f, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 3, 1],
+    InstrItinData <tc_362c6592, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3bc2c5d3, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_3962fa26, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3cb8ea06, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_39dfefe8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [],
+      []>,
 
-    InstrItinData <tc_3d04548d, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 2],
+    InstrItinData <tc_3a867367, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3da80ba5, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_3b470976, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3e07fb90, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41d5298e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3bd75825, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_4403ca65, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+    InstrItinData <tc_3c76b0ff, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_44126683, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+    InstrItinData <tc_3d495a39, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_452f85af, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_40116ca8, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_481e5e5c, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_434c8e1e, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_49eb22c8, /*tc_1*/
+    InstrItinData <tc_4414d8b1, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4ca572d4, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [],
-      []>,
+    InstrItinData <tc_44d3da28, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4d9914c9, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4560740b, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4d99bca9, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
+    InstrItinData <tc_4837eefb, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4f7cd700, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_49a8207d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_513bef45, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 2, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4ae7b58b, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_51b866be, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+    InstrItinData <tc_4b68bce4, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_523fcf30, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4c5ba658, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5274e61a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4d5fa3a1, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_52d7bbea, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_53559e35, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bc8a6a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_56336eb0, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bdb2f6, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+    InstrItinData <tc_56f114f4, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_540fdfbc, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_57890846, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5a2711e5, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_55050d58, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_57288781, /*tc_st*/
+    InstrItinData <tc_5aee39f7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b54b33f, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b7c0967, /*tc_2early*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_594ab548, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5bf126a6, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_59a01ead, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5d7f5414, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_5acef64a, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+    InstrItinData <tc_5ef37dc4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5ba5997d, /*tc_2*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_6132ba3d, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5eb851fc, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [1, 3, 2],
+    InstrItinData <tc_61830035, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5f6847a1, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+    InstrItinData <tc_640086b5, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_60571023, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_609d2efe, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+    InstrItinData <tc_643b4717, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63fe3df7, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_67435e81, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_66888ded, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_675e4897, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6792d5ff, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_679309b8, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_681a2300, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2],
+    InstrItinData <tc_6b25e783, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_68cb12ce, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_703e822c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6aa5711a, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7186d325, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ac37025, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+    InstrItinData <tc_7646c131, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ebb4a12, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+    InstrItinData <tc_76851da1, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6efc556e, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_779080bf, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fa4db47, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_784490da, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_73043bf4, /*tc_2early*/
-      [InstrStage<1, [SLOT3]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_785f65a7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_746baa8e, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7a91e76a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_74e47fd9, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+    InstrItinData <tc_838b34ea, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7934b9df, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
+    InstrItinData <tc_85c9c08f, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7a830544, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+    InstrItinData <tc_85d5d03f, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7f881c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+    InstrItinData <tc_862b3e70, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_88b4f13d, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_84df2cd3, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+    InstrItinData <tc_89e94ad3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8b121f4a, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_8b3e402a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8c945be0, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8c99de45, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_8d9d0154, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8fb7ab1b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_855b0b61, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+    InstrItinData <tc_9461ff31, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_87735c3b, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_897d1a9d, /*tc_1*/
+    InstrItinData <tc_946df596, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8b15472a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9ad9998f, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_8fd5f294, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_9bfd761f, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8fe6b782, /*tc_2*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9c3ecd83, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_90f3e30c, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+    InstrItinData <tc_9ca930f7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_976ddc4f, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_9da59d12, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_97743097, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_9debc299, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e313203, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_994333cd, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_9fc3dae0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_999d32db, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
+    InstrItinData <tc_a1123dda, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
       [Hex_FWD]>,
 
-    InstrItinData <tc_99be14ca, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_9c00ce8d, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_9c98e8af, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_a1c00888, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9d5941c7, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a58fd5cc, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9ef61e5c, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+    InstrItinData <tc_a5d4aeec, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9faf76ae, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_a6b1eca9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9fdb5406, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a813cf9a, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a21dc435, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_a9d88b22, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a27582fa, /*tc_2early*/
+    InstrItinData <tc_ae53734a, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_a46f0df5, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_a788683e, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_a8acdac0, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+    InstrItinData <tc_b31c2e97, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a904d137, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b343892a, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_adb14c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b43e7930, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b13761ae, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [],
+    InstrItinData <tc_b4407292, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
       []>,
 
-    InstrItinData <tc_b166348b, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+    InstrItinData <tc_b44ecf75, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b4b5c03a, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b44c6e2a, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b51dc29a, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [3, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77c481f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+    InstrItinData <tc_b83e6d73, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b7dd427e, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b857bf4e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_b9488031, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_b8bffe55, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c0b731, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b90a29b1, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c4623f, /*tc_2*/
+    InstrItinData <tc_b9272d6c, /*tc_3x*/
       [InstrStage<1, [SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bad2bcaf, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b9e09e03, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bcc96cee, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+    InstrItinData <tc_bab0eed9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bde7aaf4, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+    InstrItinData <tc_bafaade3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_be706f30, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_c2f7d806, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_c5e2426d, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
+    InstrItinData <tc_bcf98408, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6aa82f7, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+    InstrItinData <tc_bd8382d1, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ce9b3f, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+    InstrItinData <tc_bdceeac1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_be9602ff, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c74f796f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_bf061958, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c82dc1ff, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [1],
-      [Hex_FWD]>,
-
-    InstrItinData <tc_caaebcba, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_cd7374a0, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+    InstrItinData <tc_bfec0f01, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cde8b071, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_c4db48cb, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf47a43f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+    InstrItinData <tc_c4f596e3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf59f215, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
+    InstrItinData <tc_c79a189f, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_cd374165, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d088982c, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_cf8126ae, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d1090e34, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_cfd8378a, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d24b2d85, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+    InstrItinData <tc_d08ee0f4, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d1aa9eaa, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d580173f, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d2e63d61, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d6bf0472, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_d9709180, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d5c0729a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9f95eef, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+    InstrItinData <tc_d63f638c, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d65dbf51, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_daa058fa, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [1, 1],
+    InstrItinData <tc_d773585a, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d9d43ecb, /*tc_2early*/
+      [InstrStage<1, [SLOT3]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_dbdffe3d, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_da4a37ed, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e0739b8c, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_da97ee82, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e1e99bfa, /*tc_2early*/
+    InstrItinData <tc_db2bce9c, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e216a5db, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 2, 2],
+    InstrItinData <tc_de4df740, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e421e012, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_de554571, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e7624c08, /*tc_3stall*/
+    InstrItinData <tc_df3319ed, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e06f432a, /*tc_3stall*/
       [InstrStage<1, [SLOT0]>], [3],
       [Hex_FWD]>,
 
-    InstrItinData <tc_e7d02c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_e913dc32, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e4a7f9f0, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9c822f7, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
-      [Hex_FWD]>,
+    InstrItinData <tc_e4b3cb20, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9fae2d6, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_e78647bd, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef52ed71, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e86aa961, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef84f62f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e93a3d71, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f2704b9a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e95795ec, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3eaa14b, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e9f3243f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f47d212f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+    InstrItinData <tc_f429765c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f49e76f4, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_f7dd9c9f, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_f675fee8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f86c328a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+    InstrItinData <tc_f8e23f0b, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f8eeed7a, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+    InstrItinData <tc_f9058dd7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_fcab4871, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_fc3999b4, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_ff9ee76e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 3],
+    InstrItinData <tc_fcc3ddf9, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fe211424, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
       [Hex_FWD, Hex_FWD]>
   ];
 }
 
 class DepScalarItinV60 {
   list<InstrItinData> DepScalarItinV60_list = [
-    InstrItinData <tc_00afc57e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_002cb246, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0371abea, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_00e7c26e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_05c070ec, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_03220ffa, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_05d3a09b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_038a1342, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0663f615, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_04c9decc, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_096199d3, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_05b6c987, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+    InstrItinData <tc_0a705168, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0cd51c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0ae0825c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0dc560de, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+    InstrItinData <tc_0b2be201, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0fc1ae07, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_0d8f5752, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_10b97e27, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_13bfbcf9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14b272fa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14b5c689, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1372bca1, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_15aa71c5, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_14cd4cfa, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2],
+    InstrItinData <tc_174516e8, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_15411484, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_17e0d2cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_16d0d8d5, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+    InstrItinData <tc_1a2fd869, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_181af5d0, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [3, 1],
+    InstrItinData <tc_1ad90acd, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1853ea6d, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+    InstrItinData <tc_1ae57e39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1b82a277, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
-      [Hex_FWD]>,
-
-    InstrItinData <tc_1b9c9ee5, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1b6f7cec, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_1d5a38a8, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1c4528a2, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1e856f58, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+    InstrItinData <tc_1c80410a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_234a11a5, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_1d81e60e, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_238d91d2, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+    InstrItinData <tc_1fc97744, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_29175780, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_20cdee80, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2a160009, /*tc_2early*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_2332b92e, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b2f4060, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_24b66c99, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b6f77c6, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_25a78932, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2f185f5c, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_2eabeebe, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_2fc0c436, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+    InstrItinData <tc_2f7c551d, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2ff964b4, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_351fed2d, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_30b9bb4a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3669266a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_32779c6f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_367f7f3d, /*tc_st*/
+    InstrItinData <tc_36153880, /*tc_newvjump*/
       [InstrStage<1, [SLOT0]>], [],
       []>,
 
-    InstrItinData <tc_36c68ad1, /*tc_ld*/
+    InstrItinData <tc_362c6592, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3962fa26, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_39dfefe8, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [],
       []>,
 
-    InstrItinData <tc_395dc00f, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+    InstrItinData <tc_3a867367, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3b470976, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3cb8ea06, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3bd75825, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_3d04548d, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3c76b0ff, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3da80ba5, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_3d495a39, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3e07fb90, /*tc_st*/
+    InstrItinData <tc_40116ca8, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41d5298e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_434c8e1e, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4414d8b1, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44d3da28, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4403ca65, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+    InstrItinData <tc_4560740b, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_44126683, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+    InstrItinData <tc_4837eefb, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_452f85af, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+    InstrItinData <tc_49a8207d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_481e5e5c, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4ae7b58b, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_49eb22c8, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_4b68bce4, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4ca572d4, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [],
-      []>,
-
-    InstrItinData <tc_4d9914c9, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4c5ba658, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4d99bca9, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_4f7cd700, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_53559e35, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_513bef45, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+    InstrItinData <tc_56336eb0, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_51b866be, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+    InstrItinData <tc_56f114f4, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_523fcf30, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_5274e61a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_52d7bbea, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_57890846, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bc8a6a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_5a2711e5, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bdb2f6, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+    InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_540fdfbc, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5aee39f7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_55050d58, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5b54b33f, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_57288781, /*tc_st*/
+    InstrItinData <tc_5b7c0967, /*tc_2early*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_594ab548, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5bf126a6, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_59a01ead, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5d7f5414, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_5acef64a, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+    InstrItinData <tc_5ef37dc4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5ba5997d, /*tc_2*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_6132ba3d, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5eb851fc, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [2, 3, 2],
+    InstrItinData <tc_61830035, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5f6847a1, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+    InstrItinData <tc_640086b5, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_60571023, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_609d2efe, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+    InstrItinData <tc_643b4717, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63fe3df7, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_67435e81, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_66888ded, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_675e4897, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6792d5ff, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+    InstrItinData <tc_679309b8, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_681a2300, /*tc_3stall*/
-      [InstrStage<1, [SLOT2]>], [2],
+    InstrItinData <tc_6b25e783, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_68cb12ce, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_703e822c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6aa5711a, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7186d325, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ac37025, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+    InstrItinData <tc_7646c131, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ebb4a12, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+    InstrItinData <tc_76851da1, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6efc556e, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_779080bf, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fa4db47, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_784490da, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_73043bf4, /*tc_2early*/
-      [InstrStage<1, [SLOT3]>], [1, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_785f65a7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_746baa8e, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7a91e76a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_74e47fd9, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+    InstrItinData <tc_838b34ea, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7934b9df, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
+    InstrItinData <tc_85c9c08f, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7a830544, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+    InstrItinData <tc_85d5d03f, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7f881c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+    InstrItinData <tc_862b3e70, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_88b4f13d, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_84df2cd3, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_89e94ad3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8b121f4a, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_8b3e402a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8c945be0, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8c99de45, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_8d9d0154, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8fb7ab1b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_855b0b61, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+    InstrItinData <tc_9461ff31, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_87735c3b, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_897d1a9d, /*tc_1*/
+    InstrItinData <tc_946df596, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8b15472a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9ad9998f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_8fd5f294, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8fe6b782, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9c3ecd83, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_90f3e30c, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+    InstrItinData <tc_9ca930f7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_976ddc4f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_9da59d12, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_97743097, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_9debc299, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e313203, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_994333cd, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_9fc3dae0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_999d32db, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
+    InstrItinData <tc_a1123dda, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
       [Hex_FWD]>,
 
-    InstrItinData <tc_99be14ca, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_9c00ce8d, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_9c98e8af, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_a1c00888, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9d5941c7, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a58fd5cc, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9ef61e5c, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+    InstrItinData <tc_a5d4aeec, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9faf76ae, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_a6b1eca9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9fdb5406, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a813cf9a, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a21dc435, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_a9d88b22, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a27582fa, /*tc_2early*/
+    InstrItinData <tc_ae53734a, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_a46f0df5, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b31c2e97, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a788683e, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+    InstrItinData <tc_b343892a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b43e7930, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a8acdac0, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+    InstrItinData <tc_b4407292, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_b44ecf75, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b4b5c03a, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a904d137, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+    InstrItinData <tc_b51dc29a, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [3, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_adb14c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+    InstrItinData <tc_b83e6d73, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b857bf4e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b8bffe55, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b90a29b1, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b13761ae, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [],
-      []>,
+    InstrItinData <tc_b9272d6c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b166348b, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+    InstrItinData <tc_b9e09e03, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bab0eed9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b44c6e2a, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_bafaade3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77c481f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+    InstrItinData <tc_bcf98408, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b7dd427e, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_bdceeac1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9488031, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_be9602ff, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c0b731, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+    InstrItinData <tc_bf061958, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c4623f, /*tc_2*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_bfec0f01, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c4db48cb, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bad2bcaf, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_c4f596e3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bcc96cee, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+    InstrItinData <tc_c79a189f, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+    InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_be706f30, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_cd374165, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c2f7d806, /*tc_2*/
+    InstrItinData <tc_cf8126ae, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c5e2426d, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
+    InstrItinData <tc_cfd8378a, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6aa82f7, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+    InstrItinData <tc_d08ee0f4, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ce9b3f, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d1aa9eaa, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_d2e63d61, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c74f796f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_c82dc1ff, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [1],
+    InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_caaebcba, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+    InstrItinData <tc_d5c0729a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cd7374a0, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+    InstrItinData <tc_d63f638c, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d65dbf51, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cde8b071, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_d773585a, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d9d43ecb, /*tc_2early*/
+      [InstrStage<1, [SLOT3]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf47a43f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+    InstrItinData <tc_da4a37ed, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_da97ee82, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf59f215, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
+    InstrItinData <tc_db2bce9c, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d088982c, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_de4df740, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_de554571, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d1090e34, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_df3319ed, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d24b2d85, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+    InstrItinData <tc_e06f432a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_e4a7f9f0, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e4b3cb20, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d580173f, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+    InstrItinData <tc_e78647bd, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e86aa961, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e93a3d71, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e95795ec, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e9f3243f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f429765c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d6bf0472, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+    InstrItinData <tc_f675fee8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9709180, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+    InstrItinData <tc_f8e23f0b, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9f95eef, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_f9058dd7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_daa058fa, /*tc_3stall*/
+    InstrItinData <tc_fc3999b4, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_fcc3ddf9, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fe211424, /*tc_3stall*/
       [InstrStage<1, [SLOT0]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>
+  ];
+}
+
+class DepScalarItinV62 {
+  list<InstrItinData> DepScalarItinV62_list = [
+    InstrItinData <tc_002cb246, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0371abea, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_dbdffe3d, /*tc_1*/
+    InstrItinData <tc_05c070ec, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_05d3a09b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0663f615, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e0739b8c, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_096199d3, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a705168, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ae0825c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e1e99bfa, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_0b2be201, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0d8f5752, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e216a5db, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_13bfbcf9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e421e012, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_14b272fa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e7624c08, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3],
+    InstrItinData <tc_14b5c689, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_15aa71c5, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_174516e8, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_e7d02c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_17e0d2cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e913dc32, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+    InstrItinData <tc_1a2fd869, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9c822f7, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
-      [Hex_FWD]>,
+    InstrItinData <tc_1ad90acd, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9fae2d6, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_1ae57e39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1b6f7cec, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_1c4528a2, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef52ed71, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1c80410a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef84f62f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1d81e60e, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f2704b9a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_1fc97744, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20cdee80, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3eaa14b, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+    InstrItinData <tc_2332b92e, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f47d212f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_24b66c99, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f49e76f4, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_25a78932, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f7dd9c9f, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+    InstrItinData <tc_2eabeebe, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_2f7c551d, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f86c328a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+    InstrItinData <tc_2ff964b4, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f8eeed7a, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_30b9bb4a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_32779c6f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_fcab4871, /*tc_newvjump*/
+    InstrItinData <tc_36153880, /*tc_newvjump*/
       [InstrStage<1, [SLOT0]>], [],
       []>,
 
-    InstrItinData <tc_ff9ee76e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 3],
-      [Hex_FWD, Hex_FWD]>
-  ];
-}
+    InstrItinData <tc_362c6592, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-class DepScalarItinV62 {
-  list<InstrItinData> DepScalarItinV62_list = [
-    InstrItinData <tc_00afc57e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_3962fa26, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_39dfefe8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [],
+      []>,
+
+    InstrItinData <tc_3a867367, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_00e7c26e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1],
+    InstrItinData <tc_3b470976, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3bd75825, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_03220ffa, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3c76b0ff, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_038a1342, /*tc_4x*/
+    InstrItinData <tc_3d495a39, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_40116ca8, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_434c8e1e, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4414d8b1, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44d3da28, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4560740b, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_04c9decc, /*tc_3stall*/
+    InstrItinData <tc_4837eefb, /*tc_3stall*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_05b6c987, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_49a8207d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_0cd51c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_4ae7b58b, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_0dc560de, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+    InstrItinData <tc_4b68bce4, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4c5ba658, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0fc1ae07, /*tc_ld*/
+    InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
       [InstrStage<1, [SLOT0]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_10b97e27, /*tc_3*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_53559e35, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56336eb0, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56f114f4, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_57890846, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1372bca1, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+    InstrItinData <tc_5a2711e5, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_14cd4cfa, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_15411484, /*tc_3*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_5aee39f7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_16d0d8d5, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+    InstrItinData <tc_5b54b33f, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_181af5d0, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [3, 1],
+    InstrItinData <tc_5b7c0967, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1853ea6d, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5bf126a6, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1b82a277, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+    InstrItinData <tc_5d7f5414, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_1b9c9ee5, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_5ef37dc4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6132ba3d, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1d5a38a8, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+    InstrItinData <tc_61830035, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1e856f58, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+    InstrItinData <tc_640086b5, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_234a11a5, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_643b4717, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_67435e81, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_675e4897, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_238d91d2, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+    InstrItinData <tc_679309b8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_29175780, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_6b25e783, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_703e822c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2a160009, /*tc_2early*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_7186d325, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b2f4060, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7646c131, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_76851da1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b6f77c6, /*tc_2*/
+    InstrItinData <tc_779080bf, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2f185f5c, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_784490da, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2fc0c436, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+    InstrItinData <tc_785f65a7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7a91e76a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_351fed2d, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+    InstrItinData <tc_838b34ea, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_85c9c08f, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3669266a, /*tc_2early*/
+    InstrItinData <tc_85d5d03f, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_862b3e70, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_367f7f3d, /*tc_st*/
+    InstrItinData <tc_88b4f13d, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_89e94ad3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8b121f4a, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_8b3e402a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8c945be0, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8c99de45, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [],
       []>,
 
-    InstrItinData <tc_36c68ad1, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [],
+    InstrItinData <tc_8d9d0154, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8fb7ab1b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9461ff31, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_946df596, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9ad9998f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
       []>,
 
-    InstrItinData <tc_395dc00f, /*tc_newvjump*/
+    InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9c3ecd83, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9ca930f7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9da59d12, /*tc_newvjump*/
       [InstrStage<1, [SLOT0]>], [3, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_9debc299, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3cb8ea06, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
+    InstrItinData <tc_9e313203, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3d04548d, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2],
+    InstrItinData <tc_9fc3dae0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3da80ba5, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1],
+    InstrItinData <tc_a1123dda, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
       [Hex_FWD]>,
 
-    InstrItinData <tc_3e07fb90, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a1c00888, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41d5298e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_a58fd5cc, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4403ca65, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+    InstrItinData <tc_a5d4aeec, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_44126683, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+    InstrItinData <tc_a6b1eca9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_452f85af, /*tc_1*/
+    InstrItinData <tc_a813cf9a, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a9d88b22, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ae53734a, /*tc_2early*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_481e5e5c, /*tc_2early*/
+    InstrItinData <tc_b31c2e97, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_49eb22c8, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_b343892a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4ca572d4, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [],
-      []>,
-
-    InstrItinData <tc_4d9914c9, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1, 2],
+    InstrItinData <tc_b43e7930, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4d99bca9, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+    InstrItinData <tc_b4407292, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_b44ecf75, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4f7cd700, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
+    InstrItinData <tc_b4b5c03a, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b51dc29a, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [3, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_513bef45, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+    InstrItinData <tc_b83e6d73, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_51b866be, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b857bf4e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_523fcf30, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b8bffe55, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5274e61a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+    InstrItinData <tc_b90a29b1, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_52d7bbea, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_b9272d6c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bc8a6a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_b9e09e03, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bdb2f6, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+    InstrItinData <tc_bab0eed9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bafaade3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_540fdfbc, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_bcf98408, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_55050d58, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_bdceeac1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_be9602ff, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_57288781, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+    InstrItinData <tc_bf061958, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bfec0f01, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c4db48cb, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_594ab548, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+    InstrItinData <tc_c4f596e3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_59a01ead, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_5acef64a, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+    InstrItinData <tc_c79a189f, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5ba5997d, /*tc_2*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5eb851fc, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [2, 3, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_cd374165, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5f6847a1, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_cf8126ae, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_60571023, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_cfd8378a, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_609d2efe, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+    InstrItinData <tc_d08ee0f4, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63fe3df7, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+    InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_66888ded, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+    InstrItinData <tc_d2e63d61, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6792d5ff, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d5b7b0c1, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_681a2300, /*tc_3stall*/
-      [InstrStage<1, [SLOT2]>], [2],
+    InstrItinData <tc_d5c0729a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d63f638c, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_68cb12ce, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d65dbf51, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6aa5711a, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d773585a, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ac37025, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d9d43ecb, /*tc_2early*/
+      [InstrStage<1, [SLOT3]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ebb4a12, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+    InstrItinData <tc_da4a37ed, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6efc556e, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_da97ee82, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6fa4db47, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_db2bce9c, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_73043bf4, /*tc_2early*/
-      [InstrStage<1, [SLOT3]>], [1, 2],
+    InstrItinData <tc_de4df740, /*tc_2early*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_de554571, /*tc_2early*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_746baa8e, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2],
+    InstrItinData <tc_df3319ed, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_74e47fd9, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+    InstrItinData <tc_e06f432a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_e4a7f9f0, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e4b3cb20, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7934b9df, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
+    InstrItinData <tc_e78647bd, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7a830544, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e86aa961, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7f881c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e93a3d71, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_84df2cd3, /*tc_2*/
+    InstrItinData <tc_e95795ec, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e9f3243f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f429765c, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_855b0b61, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_f675fee8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_87735c3b, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_f8e23f0b, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f9058dd7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_897d1a9d, /*tc_2*/
+    InstrItinData <tc_fc3999b4, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_fcc3ddf9, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fe211424, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>
+  ];
+}
+
+class DepScalarItinV65 {
+  list<InstrItinData> DepScalarItinV65_list = [
+    InstrItinData <tc_002cb246, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8b15472a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+    InstrItinData <tc_0371abea, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_05c070ec, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8fd5f294, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_05d3a09b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0663f615, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8fe6b782, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_096199d3, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_90f3e30c, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+    InstrItinData <tc_0a705168, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ae0825c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_976ddc4f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0b2be201, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_994333cd, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_0d8f5752, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_97743097, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_13bfbcf9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14b272fa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14b5c689, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_999d32db, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
+    InstrItinData <tc_15aa71c5, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_174516e8, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_99be14ca, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+    InstrItinData <tc_17e0d2cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9c00ce8d, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+    InstrItinData <tc_1a2fd869, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9c98e8af, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_1ad90acd, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9d5941c7, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1ae57e39, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9ef61e5c, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1b6f7cec, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_9faf76ae, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_1c4528a2, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9fdb5406, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1c80410a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a21dc435, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_1d81e60e, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a27582fa, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_1fc97744, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a46f0df5, /*tc_2early*/
+    InstrItinData <tc_20cdee80, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a788683e, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+    InstrItinData <tc_2332b92e, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a8acdac0, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_a904d137, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_24b66c99, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_adb14c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+    InstrItinData <tc_25a78932, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b13761ae, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [],
+    InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2eabeebe, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
       []>,
 
-    InstrItinData <tc_b166348b, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+    InstrItinData <tc_2f7c551d, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2ff964b4, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b44c6e2a, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_30b9bb4a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77c481f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+    InstrItinData <tc_32779c6f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_36153880, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_362c6592, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b7dd427e, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+    InstrItinData <tc_3962fa26, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9488031, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_39dfefe8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [],
+      []>,
+
+    InstrItinData <tc_3a867367, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3b470976, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c0b731, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c4623f, /*tc_2*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3bd75825, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_bad2bcaf, /*tc_ld*/
+    InstrItinData <tc_3c76b0ff, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bcc96cee, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+    InstrItinData <tc_3d495a39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_be706f30, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_c2f7d806, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_40116ca8, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c5e2426d, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
+    InstrItinData <tc_434c8e1e, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6aa82f7, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+    InstrItinData <tc_4414d8b1, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ce9b3f, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+    InstrItinData <tc_44d3da28, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ebf8dd, /*tc_3x*/
+    InstrItinData <tc_4560740b, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4837eefb, /*tc_3stall*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c74f796f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_49a8207d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_4ae7b58b, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_4b68bce4, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4c5ba658, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c82dc1ff, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [1],
+    InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_caaebcba, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+    InstrItinData <tc_53559e35, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cd7374a0, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_56336eb0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cde8b071, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_56f114f4, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_57890846, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf47a43f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+    InstrItinData <tc_5a2711e5, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5aee39f7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf59f215, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5b54b33f, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d088982c, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_5b7c0967, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d1090e34, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_5bf126a6, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d24b2d85, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_d580173f, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_5d7f5414, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_d6bf0472, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+    InstrItinData <tc_5ef37dc4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9709180, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_6132ba3d, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9f95eef, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_61830035, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_daa058fa, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [1, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_640086b5, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_dbdffe3d, /*tc_1*/
+    InstrItinData <tc_643b4717, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e0739b8c, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
+    InstrItinData <tc_67435e81, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_675e4897, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e1e99bfa, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_679309b8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6b25e783, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_703e822c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e216a5db, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 2, 2],
+    InstrItinData <tc_7186d325, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e421e012, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_7646c131, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e7624c08, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3],
-      [Hex_FWD]>,
+    InstrItinData <tc_76851da1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e7d02c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+    InstrItinData <tc_779080bf, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_784490da, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e913dc32, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+    InstrItinData <tc_785f65a7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7a91e76a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9c822f7, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
-      [Hex_FWD]>,
+    InstrItinData <tc_838b34ea, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9fae2d6, /*tc_2early*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_85c9c08f, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef52ed71, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_85d5d03f, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef84f62f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_862b3e70, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_f2704b9a, /*tc_2early*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_88b4f13d, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_89e94ad3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3eaa14b, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+    InstrItinData <tc_8b121f4a, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_8b3e402a, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8c945be0, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f47d212f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+    InstrItinData <tc_8c99de45, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_8d9d0154, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8fb7ab1b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f49e76f4, /*tc_2*/
+    InstrItinData <tc_9461ff31, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f7dd9c9f, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+    InstrItinData <tc_946df596, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f86c328a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9ad9998f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_f8eeed7a, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_fcab4871, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_9c3ecd83, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ff9ee76e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 3],
-      [Hex_FWD, Hex_FWD]>
-  ];
-}
+    InstrItinData <tc_9ca930f7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-class DepScalarItinV65 {
-  list<InstrItinData> DepScalarItinV65_list = [
-    InstrItinData <tc_00afc57e, /*tc_2*/
+    InstrItinData <tc_9da59d12, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9debc299, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e313203, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_00e7c26e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1],
+    InstrItinData <tc_9fc3dae0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a1123dda, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
       [Hex_FWD]>,
 
-    InstrItinData <tc_03220ffa, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a1c00888, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_038a1342, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a58fd5cc, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_04c9decc, /*tc_3stall*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a5d4aeec, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_05b6c987, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+    InstrItinData <tc_a6b1eca9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0cd51c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a813cf9a, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0dc560de, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_a9d88b22, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_0fc1ae07, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [2],
+    InstrItinData <tc_ae53734a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_10b97e27, /*tc_3*/
-      [InstrStage<1, [SLOT2]>], [2, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b31c2e97, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1372bca1, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+    InstrItinData <tc_b343892a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_14cd4cfa, /*tc_2early*/
-      [InstrStage<1, [SLOT2]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_b43e7930, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_15411484, /*tc_3*/
-      [InstrStage<1, [SLOT2]>], [1],
-      [Hex_FWD]>,
+    InstrItinData <tc_b4407292, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
 
-    InstrItinData <tc_16d0d8d5, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+    InstrItinData <tc_b44ecf75, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b4b5c03a, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_181af5d0, /*tc_1*/
+    InstrItinData <tc_b51dc29a, /*tc_1*/
       [InstrStage<1, [SLOT2]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1853ea6d, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+    InstrItinData <tc_b83e6d73, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1b82a277, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+    InstrItinData <tc_b857bf4e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_1b9c9ee5, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_b8bffe55, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1d5a38a8, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+    InstrItinData <tc_b90a29b1, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b9272d6c, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b9e09e03, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_1e856f58, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_bab0eed9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bafaade3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_234a11a5, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_bcf98408, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_238d91d2, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+    InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_29175780, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_bdceeac1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_be9602ff, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bf061958, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bfec0f01, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c4db48cb, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2a160009, /*tc_2early*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_c4f596e3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b2f4060, /*tc_2latepred*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+    InstrItinData <tc_c79a189f, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2b6f77c6, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2f185f5c, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_cd374165, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2fc0c436, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_cf8126ae, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_351fed2d, /*tc_1*/
+    InstrItinData <tc_cfd8378a, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3669266a, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
-      [Hex_FWD]>,
-
-    InstrItinData <tc_367f7f3d, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_d08ee0f4, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_36c68ad1, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [],
-      []>,
+    InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_395dc00f, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 3, 1],
+    InstrItinData <tc_d2e63d61, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [2],
+    InstrItinData <tc_d5b7b0c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_3cb8ea06, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_3d04548d, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 1],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d5c0729a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_3da80ba5, /*tc_ld*/
+    InstrItinData <tc_d63f638c, /*tc_ld*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_3e07fb90, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+    InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_41d5298e, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_d773585a, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4403ca65, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d9d43ecb, /*tc_1*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_44126683, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+    InstrItinData <tc_da4a37ed, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_452f85af, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
-      [Hex_FWD]>,
-
-    InstrItinData <tc_481e5e5c, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+    InstrItinData <tc_da97ee82, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_49eb22c8, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_db2bce9c, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_de4df740, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4ca572d4, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [],
-      []>,
+    InstrItinData <tc_de554571, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4d9914c9, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [1, 2],
+    InstrItinData <tc_df3319ed, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4d99bca9, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+    InstrItinData <tc_e06f432a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_e4a7f9f0, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_4f7cd700, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
+    InstrItinData <tc_e4b3cb20, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e78647bd, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_513bef45, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e86aa961, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_51b866be, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e93a3d71, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_523fcf30, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+    InstrItinData <tc_e95795ec, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5274e61a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+    InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_52d7bbea, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
-      []>,
+    InstrItinData <tc_f429765c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bc8a6a, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_f675fee8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_53bdb2f6, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+    InstrItinData <tc_f8e23f0b, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f9058dd7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fc3999b4, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_fcc3ddf9, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fe211424, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>
+  ];
+}
+
+class DepScalarItinV66 {
+  list<InstrItinData> DepScalarItinV66_list = [
+    InstrItinData <tc_002cb246, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_540fdfbc, /*tc_1*/
+    InstrItinData <tc_0371abea, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_05c070ec, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_05d3a09b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0663f615, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_55050d58, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_096199d3, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_57288781, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+    InstrItinData <tc_0a705168, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ae0825c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_594ab548, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+    InstrItinData <tc_0b2be201, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_59a01ead, /*tc_3stall*/
-      [InstrStage<1, [SLOT2]>], [4, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_0d8f5752, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5acef64a, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+    InstrItinData <tc_13bfbcf9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5ba5997d, /*tc_2*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_14b272fa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5eb851fc, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [2, 3, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_14b5c689, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_5f6847a1, /*tc_2latepred*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_15aa71c5, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_60571023, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_174516e8, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_609d2efe, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+    InstrItinData <tc_17e0d2cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_63fe3df7, /*tc_latepredldaia*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_1a2fd869, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_66888ded, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+    InstrItinData <tc_1ad90acd, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1ae57e39, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6792d5ff, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+    InstrItinData <tc_1b6f7cec, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_1c4528a2, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1c80410a, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_681a2300, /*tc_3stall*/
-      [InstrStage<1, [SLOT2]>], [2],
-      [Hex_FWD]>,
+    InstrItinData <tc_1d81e60e, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_68cb12ce, /*tc_1*/
+    InstrItinData <tc_1fc97744, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20cdee80, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6aa5711a, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [4, 1],
+    InstrItinData <tc_2332b92e, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ac37025, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_24b66c99, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6ebb4a12, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_25a78932, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_6efc556e, /*tc_1*/
+    InstrItinData <tc_2eabeebe, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
       []>,
 
-    InstrItinData <tc_6fa4db47, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_2f7c551d, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_73043bf4, /*tc_1*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_2ff964b4, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_746baa8e, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2],
+    InstrItinData <tc_30b9bb4a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_32779c6f, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_74e47fd9, /*tc_latepredstaia*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+    InstrItinData <tc_36153880, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_362c6592, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3962fa26, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7934b9df, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 1],
+    InstrItinData <tc_39dfefe8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [],
+      []>,
+
+    InstrItinData <tc_3a867367, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7a830544, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_3b470976, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_7f881c76, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+    InstrItinData <tc_3b5b7ef9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_84df2cd3, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_3bd75825, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_3c76b0ff, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_855b0b61, /*tc_1*/
+    InstrItinData <tc_3d495a39, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_87735c3b, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+    InstrItinData <tc_40116ca8, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_897d1a9d, /*tc_2*/
+    InstrItinData <tc_434c8e1e, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4414d8b1, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8b15472a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+    InstrItinData <tc_44d3da28, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8fd5f294, /*tc_3x*/
+    InstrItinData <tc_4560740b, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4837eefb, /*tc_3stall*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_8fe6b782, /*tc_1*/
+    InstrItinData <tc_49a8207d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_4ae7b58b, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_4b68bce4, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4c5ba658, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_53559e35, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56336eb0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56f114f4, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_90f3e30c, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+    InstrItinData <tc_57890846, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_976ddc4f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+    InstrItinData <tc_5a2711e5, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_97743097, /*tc_1*/
-      [InstrStage<1, [SLOT2]>], [2, 2],
+    InstrItinData <tc_5abb5e3f, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5aee39f7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b54b33f, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b7c0967, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_994333cd, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_5bf126a6, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_999d32db, /*tc_3stall*/
+    InstrItinData <tc_5d7f5414, /*tc_3stall*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_99be14ca, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+    InstrItinData <tc_5ef37dc4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6132ba3d, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9c00ce8d, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+    InstrItinData <tc_61830035, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_640086b5, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_643b4717, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_67435e81, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9c98e8af, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+    InstrItinData <tc_675e4897, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9d5941c7, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_9ef61e5c, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_679309b8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_9faf76ae, /*tc_1*/
-      [InstrStage<1, [SLOT2]>], [2],
+    InstrItinData <tc_6b25e783, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_9fdb5406, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+    InstrItinData <tc_703e822c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7186d325, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7646c131, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_76851da1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_779080bf, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_784490da, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_785f65a7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7a91e76a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_838b34ea, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a21dc435, /*tc_3stall*/
-      [InstrStage<1, [SLOT3]>], [4, 1],
+    InstrItinData <tc_85c9c08f, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a27582fa, /*tc_3*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+    InstrItinData <tc_85d5d03f, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_862b3e70, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_a46f0df5, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_88b4f13d, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a788683e, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+    InstrItinData <tc_89e94ad3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_a8acdac0, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_8b121f4a, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
 
-    InstrItinData <tc_a904d137, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_8b3e402a, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_adb14c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_8c945be0, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b13761ae, /*tc_3stall*/
-      [InstrStage<1, [SLOT2]>], [],
+    InstrItinData <tc_8c99de45, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
       []>,
 
-    InstrItinData <tc_b166348b, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+    InstrItinData <tc_8d9d0154, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_8fb7ab1b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b44c6e2a, /*tc_2*/
+    InstrItinData <tc_9461ff31, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b77c481f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+    InstrItinData <tc_946df596, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b7dd427e, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9ad9998f, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
 
-    InstrItinData <tc_b9488031, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_9bfd761f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c0b731, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9c3ecd83, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_b9c4623f, /*tc_2*/
-      [InstrStage<1, [SLOT3]>], [4, 2],
+    InstrItinData <tc_9ca930f7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bad2bcaf, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+    InstrItinData <tc_9da59d12, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9debc299, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bcc96cee, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9e313203, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_9fc3dae0, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_be706f30, /*tc_1*/
+    InstrItinData <tc_a1123dda, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_a1c00888, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c2f7d806, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+    InstrItinData <tc_a58fd5cc, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a5d4aeec, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a6b1eca9, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a813cf9a, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c5e2426d, /*tc_3stall*/
+    InstrItinData <tc_a9d88b22, /*tc_3x*/
       [InstrStage<1, [SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6aa82f7, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_ae53734a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_c6ce9b3f, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+    InstrItinData <tc_b31c2e97, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c6ebf8dd, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+    InstrItinData <tc_b343892a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c74f796f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+    InstrItinData <tc_b43e7930, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b4407292, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_b44ecf75, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b4b5c03a, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_c82dc1ff, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [1],
+    InstrItinData <tc_b51dc29a, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b83e6d73, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b857bf4e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
 
-    InstrItinData <tc_caaebcba, /*tc_3x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_b8bffe55, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cd7374a0, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+    InstrItinData <tc_b90a29b1, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cde8b071, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+    InstrItinData <tc_b9272d6c, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf47a43f, /*tc_ld*/
+    InstrItinData <tc_b9e09e03, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bab0eed9, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_cf59f215, /*tc_3x*/
-      [InstrStage<1, [SLOT3]>], [2, 2],
-      [Hex_FWD, Hex_FWD]>,
-
-    InstrItinData <tc_d088982c, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
-      [Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_bafaade3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d1090e34, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+    InstrItinData <tc_bcf98408, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d24b2d85, /*tc_latepredstaia*/
-      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d580173f, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+    InstrItinData <tc_bdceeac1, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d6bf0472, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_be9602ff, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9709180, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+    InstrItinData <tc_bf061958, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_d9f95eef, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+    InstrItinData <tc_bfec0f01, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_daa058fa, /*tc_3stall*/
-      [InstrStage<1, [SLOT0]>], [1, 1],
+    InstrItinData <tc_c4db48cb, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_dbdffe3d, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+    InstrItinData <tc_c4f596e3, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c79a189f, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e0739b8c, /*tc_1*/
-      [InstrStage<1, [SLOT2]>], [2, 2],
+    InstrItinData <tc_cd374165, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e1e99bfa, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_cf8126ae, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e216a5db, /*tc_ld*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+    InstrItinData <tc_cfd8378a, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d08ee0f4, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e421e012, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+    InstrItinData <tc_d1aa9eaa, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e7624c08, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [3],
+    InstrItinData <tc_d2e63d61, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d5b7b0c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
       [Hex_FWD]>,
 
-    InstrItinData <tc_e7d02c66, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_d5c0729a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e913dc32, /*tc_3x*/
+    InstrItinData <tc_d63f638c, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d773585a, /*tc_3x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_e9c822f7, /*tc_2latepred*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4],
-      [Hex_FWD]>,
-
-    InstrItinData <tc_e9fae2d6, /*tc_1*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+    InstrItinData <tc_d9d43ecb, /*tc_1*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef52ed71, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_da4a37ed, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ef84f62f, /*tc_2*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+    InstrItinData <tc_da97ee82, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f2704b9a, /*tc_1*/
+    InstrItinData <tc_db2bce9c, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_de4df740, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_de554571, /*tc_1*/
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f3eaa14b, /*tc_4x*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+    InstrItinData <tc_df3319ed, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f47d212f, /*tc_ld*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
-      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e06f432a, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
 
-    InstrItinData <tc_f49e76f4, /*tc_2*/
+    InstrItinData <tc_e4a7f9f0, /*tc_2*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f7dd9c9f, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [1, 2, 3],
-      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData <tc_e4b3cb20, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f86c328a, /*tc_st*/
-      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+    InstrItinData <tc_e78647bd, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e86aa961, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e93a3d71, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e95795ec, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f429765c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_f8eeed7a, /*tc_1*/
-      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+    InstrItinData <tc_f675fee8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_fcab4871, /*tc_newvjump*/
-      [InstrStage<1, [SLOT0]>], [],
-      []>,
+    InstrItinData <tc_f8e23f0b, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_ff9ee76e, /*tc_st*/
-      [InstrStage<1, [SLOT0]>], [2, 3],
+    InstrItinData <tc_f9058dd7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fc3999b4, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_fcc3ddf9, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fe211424, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
       [Hex_FWD, Hex_FWD]>
   ];
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
index 7e06ccede6e7..81e3971e21d2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.h
@@ -1,4 +1,4 @@
-//===- HexagonDepITypes.h -------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,7 +9,6 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
 namespace llvm {
 namespace HexagonII {
 enum Type {
@@ -45,6 +44,7 @@ enum Type {
   TypeCVI_VX = 29,
   TypeCVI_VX_DV = 30,
   TypeCVI_VX_LATE = 31,
+  TypeCVI_ZW = 32,
   TypeDUPLEX = 33,
   TypeENDLOOP = 34,
   TypeEXTENDER = 35,
@@ -59,7 +59,7 @@ enum Type {
   TypeS_2op = 44,
   TypeS_3op = 45,
   TypeV2LDST = 48,
-  TypeV4LDST = 49
+  TypeV4LDST = 49,
 };
 }
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
index 0a385bf938fe..f694062a5232 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepITypes.td
@@ -1,4 +1,4 @@
-//===- HexagonDepITypes.td ------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,8 +9,7 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
-class IType<bits<6> t> { bits<6> Value = t; }
+class IType<bits<7> t> { bits<7> Value = t; }
 def TypeALU32_2op : IType<0>;
 def TypeALU32_3op : IType<1>;
 def TypeALU32_ADDI : IType<2>;
@@ -43,6 +42,7 @@ def TypeCVI_VS_VX : IType<28>;
 def TypeCVI_VX : IType<29>;
 def TypeCVI_VX_DV : IType<30>;
 def TypeCVI_VX_LATE : IType<31>;
+def TypeCVI_ZW : IType<32>;
 def TypeDUPLEX : IType<33>;
 def TypeENDLOOP : IType<34>;
 def TypeEXTENDER : IType<35>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index 9f98da3a1dee..ffe212ef9d97 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1,4 +1,4 @@
-//===- HexagonDepInstrFormats.td ------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,7 +9,6 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
 class Enc_890909 : OpcodeHexagon {
   bits <5> Rs32;
   let Inst{20-16} = Rs32{4-0};
@@ -61,14 +60,6 @@ class Enc_27b757 : OpcodeHexagon {
   bits <5> Vs32;
   let Inst{4-0} = Vs32{4-0};
 }
-class Enc_8d04c3 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_1de724 : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -87,12 +78,6 @@ class Enc_0e41fa : OpcodeHexagon {
   bits <5> Vd32;
   let Inst{4-0} = Vd32{4-0};
 }
-class Enc_2a736a : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_3d6d37 : OpcodeHexagon {
   bits <2> Qs4;
   let Inst{6-5} = Qs4{1-0};
@@ -121,14 +106,6 @@ class Enc_802dc0 : OpcodeHexagon {
   bits <2> Qv4;
   let Inst{23-22} = Qv4{1-0};
 }
-class Enc_6a4549 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_6b197f : OpcodeHexagon {
   bits <4> Ii;
   let Inst{8-5} = Ii{3-0};
@@ -137,22 +114,6 @@ class Enc_6b197f : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_1f3376 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vxx32;
-  let Inst{7-3} = Vxx32{4-0};
-}
-class Enc_1f5d8f : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Ryy32;
-  let Inst{4-0} = Ryy32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_51436c : OpcodeHexagon {
   bits <16> Ii;
   let Inst{23-22} = Ii{15-14};
@@ -249,6 +210,14 @@ class Enc_d7dc10 : OpcodeHexagon {
   bits <2> Pd4;
   let Inst{1-0} = Pd4{1-0};
 }
+class Enc_6baed4 : OpcodeHexagon {
+  bits <3> Ii;
+  let Inst{10-8} = Ii{2-0};
+  bits <2> Pv4;
+  let Inst{12-11} = Pv4{1-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_736575 : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -291,14 +260,6 @@ class Enc_509701 : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_c84567 : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_830e5d : OpcodeHexagon {
   bits <8> Ii;
   let Inst{12-5} = Ii{7-0};
@@ -310,12 +271,6 @@ class Enc_830e5d : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_ae0040 : OpcodeHexagon {
-  bits <5> Rs32;
-  let Inst{20-16} = Rs32{4-0};
-  bits <6> Sd64;
-  let Inst{5-0} = Sd64{5-0};
-}
 class Enc_79b8c8 : OpcodeHexagon {
   bits <6> Ii;
   let Inst{6-3} = Ii{5-2};
@@ -336,16 +291,6 @@ class Enc_58a8bf : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_e8ddd5 : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Vss32;
-  let Inst{7-3} = Vss32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_041d7b : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -369,14 +314,6 @@ class Enc_f44229 : OpcodeHexagon {
   bits <3> Nt8;
   let Inst{10-8} = Nt8{2-0};
 }
-class Enc_fc563d : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_aad80c : OpcodeHexagon {
   bits <5> Vuu32;
   let Inst{12-8} = Vuu32{4-0};
@@ -434,6 +371,14 @@ class Enc_ee5ed0 : OpcodeHexagon {
   bits <2> n1;
   let Inst{9-8} = n1{1-0};
 }
+class Enc_bddee3 : OpcodeHexagon {
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <5> Vyyyy32;
+  let Inst{4-0} = Vyyyy32{4-0};
+  bits <3> Rx8;
+  let Inst{18-16} = Rx8{2-0};
+}
 class Enc_935d9b : OpcodeHexagon {
   bits <5> Ii;
   let Inst{6-3} = Ii{4-1};
@@ -573,6 +518,14 @@ class Enc_27fd0e : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
+class Enc_d7bc34 : OpcodeHexagon {
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <3> Rt8;
+  let Inst{18-16} = Rt8{2-0};
+  bits <5> Vyyyy32;
+  let Inst{4-0} = Vyyyy32{4-0};
+}
 class Enc_93af4c : OpcodeHexagon {
   bits <7> Ii;
   let Inst{10-4} = Ii{6-0};
@@ -620,12 +573,6 @@ class Enc_14640c : OpcodeHexagon {
   let Inst{24-22} = n1{3-1};
   let Inst{13-13} = n1{0-0};
 }
-class Enc_2516bf : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_31db33 : OpcodeHexagon {
   bits <2> Qt4;
   let Inst{6-5} = Qt4{1-0};
@@ -656,24 +603,6 @@ class Enc_784502 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_9a9d62 : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Rt32;
-  let Inst{12-8} = Rt32{4-0};
-  bits <5> Vs32;
-  let Inst{7-3} = Vs32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
-class Enc_3a81ac : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_6413b6 : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -703,13 +632,13 @@ class Enc_84bff1 : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_74aef2 : OpcodeHexagon {
+class Enc_f4413a : OpcodeHexagon {
   bits <4> Ii;
   let Inst{8-5} = Ii{3-0};
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Ryy32;
-  let Inst{4-0} = Ryy32{4-0};
+  bits <2> Pt4;
+  let Inst{10-9} = Pt4{1-0};
+  bits <5> Rd32;
+  let Inst{4-0} = Rd32{4-0};
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
@@ -753,16 +682,6 @@ class Enc_e39bb2 : OpcodeHexagon {
   bits <4> Rd16;
   let Inst{3-0} = Rd16{3-0};
 }
-class Enc_7db2f8 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{13-9} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{8-4} = Vv32{4-0};
-  bits <4> Vdd16;
-  let Inst{3-0} = Vdd16{3-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_1b64fb : OpcodeHexagon {
   bits <16> Ii;
   let Inst{26-25} = Ii{15-14};
@@ -772,6 +691,16 @@ class Enc_1b64fb : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{12-8} = Rt32{4-0};
 }
+class Enc_c1d806 : OpcodeHexagon {
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <5> Vv32;
+  let Inst{20-16} = Vv32{4-0};
+  bits <5> Vd32;
+  let Inst{4-0} = Vd32{4-0};
+  bits <2> Qe4;
+  let Inst{6-5} = Qe4{1-0};
+}
 class Enc_c6220b : OpcodeHexagon {
   bits <2> Ii;
   let Inst{13-13} = Ii{1-1};
@@ -841,10 +770,6 @@ class Enc_fcf7a7 : OpcodeHexagon {
   bits <2> Pd4;
   let Inst{1-0} = Pd4{1-0};
 }
-class Enc_2c3281 : OpcodeHexagon {
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_55355c : OpcodeHexagon {
   bits <2> Ii;
   let Inst{13-13} = Ii{1-1};
@@ -877,6 +802,16 @@ class Enc_6185fe : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
+class Enc_74aef2 : OpcodeHexagon {
+  bits <4> Ii;
+  let Inst{8-5} = Ii{3-0};
+  bits <1> Mu2;
+  let Inst{13-13} = Mu2{0-0};
+  bits <5> Ryy32;
+  let Inst{4-0} = Ryy32{4-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_cd4705 : OpcodeHexagon {
   bits <3> Ii;
   let Inst{7-5} = Ii{2-0};
@@ -920,10 +855,6 @@ class Enc_fef969 : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_b2ffce : OpcodeHexagon {
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_63eaeb : OpcodeHexagon {
   bits <2> Ii;
   let Inst{1-0} = Ii{1-0};
@@ -948,12 +879,6 @@ class Enc_372c9d : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_9e9047 : OpcodeHexagon {
-  bits <2> Pt4;
-  let Inst{9-8} = Pt4{1-0};
-  bits <5> Rs32;
-  let Inst{20-16} = Rs32{4-0};
-}
 class Enc_4dff07 : OpcodeHexagon {
   bits <2> Qv4;
   let Inst{12-11} = Qv4{1-0};
@@ -1000,16 +925,6 @@ class Enc_b388cf : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_880793 : OpcodeHexagon {
-  bits <3> Qt8;
-  let Inst{2-0} = Qt8{2-0};
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_ad1c74 : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -1086,14 +1001,6 @@ class Enc_88d4d9 : OpcodeHexagon {
   bits <5> Rs32;
   let Inst{20-16} = Rs32{4-0};
 }
-class Enc_c0cdde : OpcodeHexagon {
-  bits <9> Ii;
-  let Inst{13-5} = Ii{8-0};
-  bits <5> Rs32;
-  let Inst{20-16} = Rs32{4-0};
-  bits <2> Pd4;
-  let Inst{1-0} = Pd4{1-0};
-}
 class Enc_226535 : OpcodeHexagon {
   bits <8> Ii;
   let Inst{12-7} = Ii{7-2};
@@ -1102,14 +1009,6 @@ class Enc_226535 : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{4-0} = Rt32{4-0};
 }
-class Enc_96f0fd : OpcodeHexagon {
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
-  bits <3> Qdd8;
-  let Inst{2-0} = Qdd8{2-0};
-}
 class Enc_31aa6a : OpcodeHexagon {
   bits <5> Ii;
   let Inst{6-3} = Ii{4-1};
@@ -1120,12 +1019,6 @@ class Enc_31aa6a : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_932b58 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-}
 class Enc_397f23 : OpcodeHexagon {
   bits <8> Ii;
   let Inst{13-13} = Ii{7-7};
@@ -1192,14 +1085,6 @@ class Enc_01d3d0 : OpcodeHexagon {
   bits <5> Vdd32;
   let Inst{4-0} = Vdd32{4-0};
 }
-class Enc_3126d7 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_b0e9d8 : OpcodeHexagon {
   bits <10> Ii;
   let Inst{21-21} = Ii{9-9};
@@ -1209,6 +1094,14 @@ class Enc_b0e9d8 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{4-0} = Rx32{4-0};
 }
+class Enc_1bd127 : OpcodeHexagon {
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <3> Rt8;
+  let Inst{18-16} = Rt8{2-0};
+  bits <5> Vdddd32;
+  let Inst{4-0} = Vdddd32{4-0};
+}
 class Enc_3694bd : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -1276,12 +1169,6 @@ class Enc_88c16c : OpcodeHexagon {
   bits <5> Rxx32;
   let Inst{4-0} = Rxx32{4-0};
 }
-class Enc_e7408c : OpcodeHexagon {
-  bits <6> Sss64;
-  let Inst{21-16} = Sss64{5-0};
-  bits <5> Rdd32;
-  let Inst{4-0} = Rdd32{4-0};
-}
 class Enc_770858 : OpcodeHexagon {
   bits <2> Ps4;
   let Inst{6-5} = Ps4{1-0};
@@ -1323,15 +1210,14 @@ class Enc_412ff0 : OpcodeHexagon {
   bits <5> Rxx32;
   let Inst{12-8} = Rxx32{4-0};
 }
-class Enc_8e9fbd : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-  bits <5> Vy32;
-  let Inst{12-8} = Vy32{4-0};
+class Enc_ef601b : OpcodeHexagon {
+  bits <4> Ii;
+  let Inst{13-13} = Ii{3-3};
+  let Inst{10-8} = Ii{2-0};
+  bits <2> Pv4;
+  let Inst{12-11} = Pv4{1-0};
+  bits <5> Rt32;
+  let Inst{20-16} = Rt32{4-0};
 }
 class Enc_c9a18e : OpcodeHexagon {
   bits <11> Ii;
@@ -1356,19 +1242,6 @@ class Enc_e6abcf : OpcodeHexagon {
   bits <5> Rtt32;
   let Inst{12-8} = Rtt32{4-0};
 }
-class Enc_6339d5 : OpcodeHexagon {
-  bits <2> Ii;
-  let Inst{13-13} = Ii{1-1};
-  let Inst{7-7} = Ii{0-0};
-  bits <2> Pv4;
-  let Inst{6-5} = Pv4{1-0};
-  bits <5> Rs32;
-  let Inst{20-16} = Rs32{4-0};
-  bits <5> Ru32;
-  let Inst{12-8} = Ru32{4-0};
-  bits <5> Rt32;
-  let Inst{4-0} = Rt32{4-0};
-}
 class Enc_d6990d : OpcodeHexagon {
   bits <5> Vuu32;
   let Inst{12-8} = Vuu32{4-0};
@@ -1377,16 +1250,6 @@ class Enc_d6990d : OpcodeHexagon {
   bits <5> Vxx32;
   let Inst{4-0} = Vxx32{4-0};
 }
-class Enc_6c4697 : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Rt32;
-  let Inst{12-8} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_6c9440 : OpcodeHexagon {
   bits <10> Ii;
   let Inst{21-21} = Ii{9-9};
@@ -1445,15 +1308,13 @@ class Enc_9d1247 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_f4413a : OpcodeHexagon {
-  bits <4> Ii;
-  let Inst{8-5} = Ii{3-0};
-  bits <2> Pt4;
-  let Inst{10-9} = Pt4{1-0};
-  bits <5> Rd32;
-  let Inst{4-0} = Rd32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
+class Enc_7b7ba8 : OpcodeHexagon {
+  bits <2> Qu4;
+  let Inst{9-8} = Qu4{1-0};
+  bits <5> Rt32;
+  let Inst{20-16} = Rt32{4-0};
+  bits <5> Vd32;
+  let Inst{4-0} = Vd32{4-0};
 }
 class Enc_f7430e : OpcodeHexagon {
   bits <4> Ii;
@@ -1531,12 +1392,6 @@ class Enc_a803e0 : OpcodeHexagon {
   bits <5> Rs32;
   let Inst{20-16} = Rs32{4-0};
 }
-class Enc_fde0e3 : OpcodeHexagon {
-  bits <5> Rtt32;
-  let Inst{20-16} = Rtt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_45364e : OpcodeHexagon {
   bits <5> Vu32;
   let Inst{12-8} = Vu32{4-0};
@@ -1557,12 +1412,6 @@ class Enc_b909d2 : OpcodeHexagon {
   let Inst{13-13} = n1{1-1};
   let Inst{8-8} = n1{0-0};
 }
-class Enc_790d6e : OpcodeHexagon {
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_e6c957 : OpcodeHexagon {
   bits <10> Ii;
   let Inst{21-21} = Ii{9-9};
@@ -1570,15 +1419,6 @@ class Enc_e6c957 : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_fa3ba4 : OpcodeHexagon {
-  bits <14> Ii;
-  let Inst{26-25} = Ii{13-12};
-  let Inst{13-5} = Ii{11-3};
-  bits <5> Rs32;
-  let Inst{20-16} = Rs32{4-0};
-  bits <5> Rdd32;
-  let Inst{4-0} = Rdd32{4-0};
-}
 class Enc_0d8870 : OpcodeHexagon {
   bits <12> Ii;
   let Inst{26-25} = Ii{11-10};
@@ -1623,14 +1463,6 @@ class Enc_0ed752 : OpcodeHexagon {
   bits <5> Cdd32;
   let Inst{4-0} = Cdd32{4-0};
 }
-class Enc_908985 : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Vss32;
-  let Inst{7-3} = Vss32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_143445 : OpcodeHexagon {
   bits <13> Ii;
   let Inst{26-25} = Ii{12-11};
@@ -1658,16 +1490,6 @@ class Enc_3e3989 : OpcodeHexagon {
   let Inst{25-22} = n1{4-1};
   let Inst{8-8} = n1{0-0};
 }
-class Enc_12dd8f : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
-}
 class Enc_152467 : OpcodeHexagon {
   bits <5> Ii;
   let Inst{8-5} = Ii{4-1};
@@ -1676,22 +1498,23 @@ class Enc_152467 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_6b1bc4 : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <3> Qt8;
-  let Inst{10-8} = Qt8{2-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_daea09 : OpcodeHexagon {
-  bits <17> Ii;
-  let Inst{23-22} = Ii{16-15};
-  let Inst{20-16} = Ii{14-10};
-  let Inst{13-13} = Ii{9-9};
-  let Inst{7-1} = Ii{8-2};
+class Enc_9ac432 : OpcodeHexagon {
+  bits <2> Ps4;
+  let Inst{17-16} = Ps4{1-0};
+  bits <2> Pt4;
+  let Inst{9-8} = Pt4{1-0};
   bits <2> Pu4;
-  let Inst{9-8} = Pu4{1-0};
+  let Inst{7-6} = Pu4{1-0};
+  bits <2> Pd4;
+  let Inst{1-0} = Pd4{1-0};
+}
+class Enc_a90628 : OpcodeHexagon {
+  bits <2> Qv4;
+  let Inst{23-22} = Qv4{1-0};
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <5> Vx32;
+  let Inst{4-0} = Vx32{4-0};
 }
 class Enc_f37377 : OpcodeHexagon {
   bits <8> Ii;
@@ -1712,12 +1535,6 @@ class Enc_a198f6 : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_a265b7 : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_4e4a80 : OpcodeHexagon {
   bits <2> Qs4;
   let Inst{6-5} = Qs4{1-0};
@@ -1728,16 +1545,6 @@ class Enc_4e4a80 : OpcodeHexagon {
   bits <5> Vvv32;
   let Inst{4-0} = Vvv32{4-0};
 }
-class Enc_8d5d98 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vxx32;
-  let Inst{7-3} = Vxx32{4-0};
-}
 class Enc_3dac0b : OpcodeHexagon {
   bits <2> Qt4;
   let Inst{6-5} = Qt4{1-0};
@@ -1780,16 +1587,6 @@ class Enc_2df31d : OpcodeHexagon {
   bits <4> Rd16;
   let Inst{3-0} = Rd16{3-0};
 }
-class Enc_b0e553 : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_25bef0 : OpcodeHexagon {
   bits <16> Ii;
   let Inst{26-25} = Ii{15-14};
@@ -1905,10 +1702,14 @@ class Enc_bd1cbc : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_d0fe02 : OpcodeHexagon {
-  bits <5> Rxx32;
-  let Inst{20-16} = Rxx32{4-0};
-  bits <0> sgp10;
+class Enc_c85e2a : OpcodeHexagon {
+  bits <5> Ii;
+  let Inst{12-8} = Ii{4-0};
+  bits <5> II;
+  let Inst{22-21} = II{4-3};
+  let Inst{7-5} = II{2-0};
+  bits <5> Rd32;
+  let Inst{4-0} = Rd32{4-0};
 }
 class Enc_a30110 : OpcodeHexagon {
   bits <5> Vu32;
@@ -1920,24 +1721,12 @@ class Enc_a30110 : OpcodeHexagon {
   bits <5> Vd32;
   let Inst{4-0} = Vd32{4-0};
 }
-class Enc_f3f408 : OpcodeHexagon {
-  bits <4> Ii;
-  let Inst{13-13} = Ii{3-3};
-  let Inst{10-8} = Ii{2-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{4-0} = Vd32{4-0};
-}
-class Enc_ce4c54 : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
+class Enc_33f8ba : OpcodeHexagon {
+  bits <8> Ii;
+  let Inst{12-8} = Ii{7-3};
+  let Inst{4-2} = Ii{2-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
 }
 class Enc_690862 : OpcodeHexagon {
   bits <13> Ii;
@@ -1949,20 +1738,6 @@ class Enc_690862 : OpcodeHexagon {
   bits <3> Nt8;
   let Inst{10-8} = Nt8{2-0};
 }
-class Enc_e570b0 : OpcodeHexagon {
-  bits <5> Rtt32;
-  let Inst{20-16} = Rtt32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_3c46e8 : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{12-8} = Vuu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_2a3787 : OpcodeHexagon {
   bits <13> Ii;
   let Inst{26-25} = Ii{12-11};
@@ -2010,22 +1785,6 @@ class Enc_729ff7 : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_5883d0 : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_ff0e49 : OpcodeHexagon {
-  bits <5> Rss32;
-  let Inst{20-16} = Rss32{4-0};
-  bits <6> Sdd64;
-  let Inst{5-0} = Sdd64{5-0};
-}
 class Enc_217147 : OpcodeHexagon {
   bits <2> Qv4;
   let Inst{23-22} = Qv4{1-0};
@@ -2060,14 +1819,6 @@ class Enc_541f26 : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{12-8} = Rt32{4-0};
 }
-class Enc_9aae4a : OpcodeHexagon {
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
-  bits <3> Qd8;
-  let Inst{2-0} = Qd8{2-0};
-}
 class Enc_724154 : OpcodeHexagon {
   bits <6> II;
   let Inst{5-0} = II{5-0};
@@ -2114,16 +1865,6 @@ class Enc_b84c4c : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_9ac432 : OpcodeHexagon {
-  bits <2> Ps4;
-  let Inst{17-16} = Ps4{1-0};
-  bits <2> Pt4;
-  let Inst{9-8} = Pt4{1-0};
-  bits <2> Pu4;
-  let Inst{7-6} = Pu4{1-0};
-  bits <2> Pd4;
-  let Inst{1-0} = Pd4{1-0};
-}
 class Enc_8203bb : OpcodeHexagon {
   bits <6> Ii;
   let Inst{12-7} = Ii{5-0};
@@ -2228,12 +1969,6 @@ class Enc_96ce4f : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_2bbae6 : OpcodeHexagon {
-  bits <6> Ss64;
-  let Inst{21-16} = Ss64{5-0};
-  bits <5> Rd32;
-  let Inst{4-0} = Rd32{4-0};
-}
 class Enc_143a3c : OpcodeHexagon {
   bits <6> Ii;
   let Inst{13-8} = Ii{5-0};
@@ -2281,13 +2016,14 @@ class Enc_de0214 : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_a90628 : OpcodeHexagon {
-  bits <2> Qv4;
-  let Inst{23-22} = Qv4{1-0};
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Vx32;
-  let Inst{4-0} = Vx32{4-0};
+class Enc_daea09 : OpcodeHexagon {
+  bits <17> Ii;
+  let Inst{23-22} = Ii{16-15};
+  let Inst{20-16} = Ii{14-10};
+  let Inst{13-13} = Ii{9-9};
+  let Inst{7-1} = Ii{8-2};
+  bits <2> Pu4;
+  let Inst{9-8} = Pu4{1-0};
 }
 class Enc_fda92c : OpcodeHexagon {
   bits <17> Ii;
@@ -2365,26 +2101,6 @@ class Enc_b43b67 : OpcodeHexagon {
   bits <2> Qx4;
   let Inst{6-5} = Qx4{1-0};
 }
-class Enc_1cd70f : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
-class Enc_3a527f : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Vs32;
-  let Inst{7-3} = Vs32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_4aca3a : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -2403,12 +2119,6 @@ class Enc_b38ffc : OpcodeHexagon {
   bits <4> Rt16;
   let Inst{3-0} = Rt16{3-0};
 }
-class Enc_5c3a80 : OpcodeHexagon {
-  bits <3> Qt8;
-  let Inst{10-8} = Qt8{2-0};
-  bits <3> Qd8;
-  let Inst{5-3} = Qd8{2-0};
-}
 class Enc_cda00a : OpcodeHexagon {
   bits <12> Ii;
   let Inst{19-16} = Ii{11-8};
@@ -2426,24 +2136,6 @@ class Enc_2fbf3c : OpcodeHexagon {
   bits <4> Rd16;
   let Inst{3-0} = Rd16{3-0};
 }
-class Enc_a4ae28 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <3> Qd8;
-  let Inst{5-3} = Qd8{2-0};
-}
-class Enc_dd5f9f : OpcodeHexagon {
-  bits <3> Qtt8;
-  let Inst{2-0} = Qtt8{2-0};
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vvv32;
-  let Inst{12-8} = Vvv32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_70b24b : OpcodeHexagon {
   bits <6> Ii;
   let Inst{8-5} = Ii{5-2};
@@ -2490,16 +2182,6 @@ class Enc_08d755 : OpcodeHexagon {
   bits <2> Pd4;
   let Inst{1-0} = Pd4{1-0};
 }
-class Enc_a7ca29 : OpcodeHexagon {
-  bits <3> Qt8;
-  let Inst{2-0} = Qt8{2-0};
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_1178da : OpcodeHexagon {
   bits <3> Ii;
   let Inst{7-5} = Ii{2-0};
@@ -2518,14 +2200,6 @@ class Enc_8dbe85 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_17a474 : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Vs32;
-  let Inst{7-3} = Vs32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_5a18b3 : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -2586,14 +2260,6 @@ class Enc_12b6e9 : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_9a895f : OpcodeHexagon {
-  bits <1> Mu2;
-  let Inst{13-13} = Mu2{0-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_6f70ca : OpcodeHexagon {
   bits <8> Ii;
   let Inst{8-4} = Ii{7-3};
@@ -2605,12 +2271,7 @@ class Enc_7222b7 : OpcodeHexagon {
   let Inst{1-0} = Qd4{1-0};
 }
 class Enc_e3b0c4 : OpcodeHexagon {
-}
-class Enc_d7e8ba : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
+
 }
 class Enc_a255dc : OpcodeHexagon {
   bits <3> Ii;
@@ -2628,16 +2289,6 @@ class Enc_cb785b : OpcodeHexagon {
   bits <5> Vdd32;
   let Inst{4-0} = Vdd32{4-0};
 }
-class Enc_5b76ab : OpcodeHexagon {
-  bits <10> Ii;
-  let Inst{21-21} = Ii{9-9};
-  let Inst{13-8} = Ii{8-3};
-  let Inst{2-0} = Ii{2-0};
-  bits <5> Vs32;
-  let Inst{7-3} = Vs32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_cb4b4e : OpcodeHexagon {
   bits <2> Pu4;
   let Inst{6-5} = Pu4{1-0};
@@ -2648,23 +2299,13 @@ class Enc_cb4b4e : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_fbacc2 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vxx32;
-  let Inst{7-3} = Vxx32{4-0};
-  bits <5> Vy32;
-  let Inst{12-8} = Vy32{4-0};
-}
-class Enc_2ad23d : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
+class Enc_1f5d8f : OpcodeHexagon {
+  bits <1> Mu2;
+  let Inst{13-13} = Mu2{0-0};
+  bits <5> Ryy32;
+  let Inst{4-0} = Ryy32{4-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
 }
 class Enc_9cdba7 : OpcodeHexagon {
   bits <8> Ii;
@@ -2683,10 +2324,6 @@ class Enc_5cd7e9 : OpcodeHexagon {
   bits <5> Ryy32;
   let Inst{4-0} = Ryy32{4-0};
 }
-class Enc_e7c9de : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-}
 class Enc_454a26 : OpcodeHexagon {
   bits <2> Pt4;
   let Inst{9-8} = Pt4{1-0};
@@ -2786,14 +2423,6 @@ class Enc_d2c7f1 : OpcodeHexagon {
   bits <2> Pe4;
   let Inst{6-5} = Pe4{1-0};
 }
-class Enc_dcfcbb : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vvv32;
-  let Inst{12-8} = Vvv32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_3680c2 : OpcodeHexagon {
   bits <7> Ii;
   let Inst{11-5} = Ii{6-0};
@@ -2822,31 +2451,13 @@ class Enc_e957fb : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{12-8} = Rt32{4-0};
 }
-class Enc_2146c1 : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vvv32;
-  let Inst{12-8} = Vvv32{4-0};
-  bits <3> Qss8;
-  let Inst{2-0} = Qss8{2-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
-class Enc_a662ae : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vvv32;
-  let Inst{12-8} = Vvv32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_8f7cc3 : OpcodeHexagon {
-  bits <3> Qtt8;
-  let Inst{10-8} = Qtt8{2-0};
-  bits <3> Qdd8;
-  let Inst{5-3} = Qdd8{2-0};
+class Enc_c0cdde : OpcodeHexagon {
+  bits <9> Ii;
+  let Inst{13-5} = Ii{8-0};
+  bits <5> Rs32;
+  let Inst{20-16} = Rs32{4-0};
+  bits <2> Pd4;
+  let Inst{1-0} = Pd4{1-0};
 }
 class Enc_c9e3bc : OpcodeHexagon {
   bits <4> Ii;
@@ -2886,33 +2497,18 @@ class Enc_6f83e7 : OpcodeHexagon {
   bits <5> Vd32;
   let Inst{4-0} = Vd32{4-0};
 }
-class Enc_46f33d : OpcodeHexagon {
-  bits <5> Rss32;
-  let Inst{20-16} = Rss32{4-0};
-  bits <5> Rt32;
-  let Inst{12-8} = Rt32{4-0};
-}
-class Enc_c1652e : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
+class Enc_6339d5 : OpcodeHexagon {
+  bits <2> Ii;
+  let Inst{13-13} = Ii{1-1};
+  let Inst{7-7} = Ii{0-0};
+  bits <2> Pv4;
+  let Inst{6-5} = Pv4{1-0};
+  bits <5> Rs32;
+  let Inst{20-16} = Rs32{4-0};
+  bits <5> Ru32;
+  let Inst{12-8} = Ru32{4-0};
   bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <3> Qd8;
-  let Inst{5-3} = Qd8{2-0};
-}
-class Enc_b5b643 : OpcodeHexagon {
-  bits <5> Rtt32;
-  let Inst{20-16} = Rtt32{4-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
-}
-class Enc_85daf5 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rtt32;
-  let Inst{20-16} = Rtt32{4-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
+  let Inst{4-0} = Rt32{4-0};
 }
 class Enc_d483b9 : OpcodeHexagon {
   bits <1> Ii;
@@ -2952,13 +2548,14 @@ class Enc_6c9ee0 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_72a92d : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{12-8} = Vuu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vxx32;
-  let Inst{7-3} = Vxx32{4-0};
+class Enc_fa3ba4 : OpcodeHexagon {
+  bits <14> Ii;
+  let Inst{26-25} = Ii{13-12};
+  let Inst{13-5} = Ii{11-3};
+  bits <5> Rs32;
+  let Inst{20-16} = Rs32{4-0};
+  bits <5> Rdd32;
+  let Inst{4-0} = Rdd32{4-0};
 }
 class Enc_44661f : OpcodeHexagon {
   bits <1> Mu2;
@@ -3006,14 +2603,6 @@ class Enc_da664b : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_7b7ba8 : OpcodeHexagon {
-  bits <2> Qu4;
-  let Inst{9-8} = Qu4{1-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{4-0} = Vd32{4-0};
-}
 class Enc_47ee5e : OpcodeHexagon {
   bits <2> Ii;
   let Inst{13-13} = Ii{1-1};
@@ -3116,14 +2705,6 @@ class Enc_8e583a : OpcodeHexagon {
   let Inst{25-23} = n1{3-1};
   let Inst{13-13} = n1{0-0};
 }
-class Enc_334c2b : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{12-8} = Vuu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_b886fd : OpcodeHexagon {
   bits <5> Ii;
   let Inst{6-3} = Ii{4-1};
@@ -3177,36 +2758,12 @@ class Enc_8dbdfe : OpcodeHexagon {
   bits <3> Nt8;
   let Inst{10-8} = Nt8{2-0};
 }
-class Enc_7dc746 : OpcodeHexagon {
-  bits <3> Quu8;
-  let Inst{10-8} = Quu8{2-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <3> Qdd8;
-  let Inst{5-3} = Qdd8{2-0};
-}
 class Enc_90cd8b : OpcodeHexagon {
   bits <5> Rss32;
   let Inst{20-16} = Rss32{4-0};
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_b8513b : OpcodeHexagon {
-  bits <5> Vuu32;
-  let Inst{20-16} = Vuu32{4-0};
-  bits <5> Vvv32;
-  let Inst{12-8} = Vvv32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
-class Enc_b3bac4 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rtt32;
-  let Inst{20-16} = Rtt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-}
 class Enc_bd0b33 : OpcodeHexagon {
   bits <10> Ii;
   let Inst{21-21} = Ii{9-9};
@@ -3216,16 +2773,6 @@ class Enc_bd0b33 : OpcodeHexagon {
   bits <2> Pd4;
   let Inst{1-0} = Pd4{1-0};
 }
-class Enc_843e80 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vd32;
-  let Inst{7-3} = Vd32{4-0};
-  bits <3> Qxx8;
-  let Inst{2-0} = Qxx8{2-0};
-}
 class Enc_8b8927 : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{20-16} = Rt32{4-0};
@@ -3359,6 +2906,16 @@ class Enc_e07374 : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
+class Enc_e0820b : OpcodeHexagon {
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <5> Vv32;
+  let Inst{20-16} = Vv32{4-0};
+  bits <2> Qs4;
+  let Inst{6-5} = Qs4{1-0};
+  bits <5> Vd32;
+  let Inst{4-0} = Vd32{4-0};
+}
 class Enc_323f2d : OpcodeHexagon {
   bits <6> II;
   let Inst{11-8} = II{5-2};
@@ -3381,16 +2938,6 @@ class Enc_1a9974 : OpcodeHexagon {
   bits <5> Rtt32;
   let Inst{4-0} = Rtt32{4-0};
 }
-class Enc_9ce456 : OpcodeHexagon {
-  bits <10> Ii;
-  let Inst{21-21} = Ii{9-9};
-  let Inst{13-8} = Ii{8-3};
-  let Inst{2-0} = Ii{2-0};
-  bits <5> Vss32;
-  let Inst{7-3} = Vss32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_5de85f : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -3416,14 +2963,6 @@ class Enc_0b51ce : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_b5e54d : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rs32;
-  let Inst{20-16} = Rs32{4-0};
-  bits <5> Rdd32;
-  let Inst{4-0} = Rdd32{4-0};
-}
 class Enc_b4e6cf : OpcodeHexagon {
   bits <10> Ii;
   let Inst{21-21} = Ii{9-9};
@@ -3479,16 +3018,6 @@ class Enc_645d54 : OpcodeHexagon {
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
-class Enc_b5d5a7 : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vs32;
-  let Inst{7-3} = Vs32{4-0};
-}
 class Enc_667b39 : OpcodeHexagon {
   bits <5> Css32;
   let Inst{20-16} = Css32{4-0};
@@ -3511,6 +3040,14 @@ class Enc_163a3c : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{4-0} = Rt32{4-0};
 }
+class Enc_a75aa6 : OpcodeHexagon {
+  bits <5> Rs32;
+  let Inst{20-16} = Rs32{4-0};
+  bits <5> Rt32;
+  let Inst{12-8} = Rt32{4-0};
+  bits <1> Mu2;
+  let Inst{13-13} = Mu2{0-0};
+}
 class Enc_b087ac : OpcodeHexagon {
   bits <5> Vu32;
   let Inst{12-8} = Vu32{4-0};
@@ -3519,6 +3056,14 @@ class Enc_b087ac : OpcodeHexagon {
   bits <5> Vd32;
   let Inst{4-0} = Vd32{4-0};
 }
+class Enc_691712 : OpcodeHexagon {
+  bits <2> Pv4;
+  let Inst{12-11} = Pv4{1-0};
+  bits <1> Mu2;
+  let Inst{13-13} = Mu2{0-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_b1e1fb : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -3546,16 +3091,6 @@ class Enc_b8c967 : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_f106e0 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{8-4} = Vv32{4-0};
-  bits <5> Vt32;
-  let Inst{13-9} = Vt32{4-0};
-  bits <4> Vdd16;
-  let Inst{3-0} = Vdd16{3-0};
-}
 class Enc_fb6577 : OpcodeHexagon {
   bits <2> Pu4;
   let Inst{9-8} = Pu4{1-0};
@@ -3564,20 +3099,6 @@ class Enc_fb6577 : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_37c406 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vv32;
-  let Inst{12-8} = Vv32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <4> Vdd16;
-  let Inst{7-4} = Vdd16{3-0};
-}
-class Enc_403871 : OpcodeHexagon {
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
 class Enc_2bae10 : OpcodeHexagon {
   bits <4> Ii;
   let Inst{10-8} = Ii{3-1};
@@ -3586,22 +3107,6 @@ class Enc_2bae10 : OpcodeHexagon {
   bits <4> Rd16;
   let Inst{3-0} = Rd16{3-0};
 }
-class Enc_f3adb6 : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
-}
-class Enc_aac08c : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
-}
 class Enc_c4dc92 : OpcodeHexagon {
   bits <2> Qv4;
   let Inst{23-22} = Qv4{1-0};
@@ -3743,12 +3248,14 @@ class Enc_134437 : OpcodeHexagon {
   bits <2> Qd4;
   let Inst{1-0} = Qd4{1-0};
 }
-class Enc_33f8ba : OpcodeHexagon {
-  bits <8> Ii;
-  let Inst{12-8} = Ii{7-3};
-  let Inst{4-2} = Ii{2-0};
-  bits <5> Rx32;
-  let Inst{20-16} = Rx32{4-0};
+class Enc_f3f408 : OpcodeHexagon {
+  bits <4> Ii;
+  let Inst{13-13} = Ii{3-3};
+  let Inst{10-8} = Ii{2-0};
+  bits <5> Rt32;
+  let Inst{20-16} = Rt32{4-0};
+  bits <5> Vd32;
+  let Inst{4-0} = Vd32{4-0};
 }
 class Enc_97d666 : OpcodeHexagon {
   bits <4> Rs16;
@@ -3766,16 +3273,6 @@ class Enc_f82eaf : OpcodeHexagon {
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
-class Enc_57e245 : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-  bits <5> Vy32;
-  let Inst{12-8} = Vy32{4-0};
-}
 class Enc_69d63b : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -3842,24 +3339,6 @@ class Enc_7eaeb6 : OpcodeHexagon {
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
-class Enc_274a4c : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{20-16} = Vu32{4-0};
-  bits <3> Rt8;
-  let Inst{2-0} = Rt8{2-0};
-  bits <5> Vx32;
-  let Inst{7-3} = Vx32{4-0};
-  bits <5> Vy32;
-  let Inst{12-8} = Vy32{4-0};
-}
-class Enc_aceeef : OpcodeHexagon {
-  bits <5> Vu32;
-  let Inst{12-8} = Vu32{4-0};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_f55a0c : OpcodeHexagon {
   bits <6> Ii;
   let Inst{11-8} = Ii{5-2};
@@ -3898,16 +3377,6 @@ class Enc_7b523d : OpcodeHexagon {
   bits <5> Vxx32;
   let Inst{4-0} = Vxx32{4-0};
 }
-class Enc_c39a8b : OpcodeHexagon {
-  bits <16> Ii;
-  let Inst{21-21} = Ii{15-15};
-  let Inst{13-8} = Ii{14-9};
-  let Inst{2-0} = Ii{8-6};
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vss32;
-  let Inst{7-3} = Vss32{4-0};
-}
 class Enc_47ef61 : OpcodeHexagon {
   bits <3> Ii;
   let Inst{7-5} = Ii{2-0};
@@ -4006,6 +3475,14 @@ class Enc_a6ce9c : OpcodeHexagon {
   bits <4> Rs16;
   let Inst{7-4} = Rs16{3-0};
 }
+class Enc_3b7631 : OpcodeHexagon {
+  bits <5> Vu32;
+  let Inst{12-8} = Vu32{4-0};
+  bits <5> Vdddd32;
+  let Inst{4-0} = Vdddd32{4-0};
+  bits <3> Rx8;
+  let Inst{18-16} = Rx8{2-0};
+}
 class Enc_eca7c8 : OpcodeHexagon {
   bits <2> Ii;
   let Inst{13-13} = Ii{1-1};
@@ -4017,16 +3494,6 @@ class Enc_eca7c8 : OpcodeHexagon {
   bits <5> Rt32;
   let Inst{4-0} = Rt32{4-0};
 }
-class Enc_598f6c : OpcodeHexagon {
-  bits <5> Rtt32;
-  let Inst{12-8} = Rtt32{4-0};
-}
-class Enc_41dcc3 : OpcodeHexagon {
-  bits <5> Rt32;
-  let Inst{20-16} = Rt32{4-0};
-  bits <5> Vdd32;
-  let Inst{7-3} = Vdd32{4-0};
-}
 class Enc_4b39e4 : OpcodeHexagon {
   bits <3> Ii;
   let Inst{7-5} = Ii{2-0};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index b6824fa33106..3ef1c49eb7ee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -1,4 +1,4 @@
-//===- HexagonDepInstrInfo.td ---------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,12 +9,11 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
 def A2_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = abs($Rs32)",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001100100;
 let hasNewValue = 1;
@@ -25,7 +24,7 @@ def A2_absp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = abs($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000000100;
 let prefersSlot3 = 1;
@@ -34,7 +33,7 @@ def A2_abssat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = abs($Rs32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10001100100;
 let hasNewValue = 1;
@@ -46,7 +45,7 @@ def A2_add : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = add($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011000;
@@ -62,7 +61,7 @@ def A2_addh_h16_hh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.h,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -74,7 +73,7 @@ def A2_addh_h16_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.h,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -86,7 +85,7 @@ def A2_addh_h16_lh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -98,7 +97,7 @@ def A2_addh_h16_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -110,7 +109,7 @@ def A2_addh_h16_sat_hh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -123,7 +122,7 @@ def A2_addh_h16_sat_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -136,7 +135,7 @@ def A2_addh_h16_sat_lh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -149,7 +148,7 @@ def A2_addh_h16_sat_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101010;
@@ -162,7 +161,7 @@ def A2_addh_l16_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.h)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101000;
@@ -174,7 +173,7 @@ def A2_addh_l16_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.l)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101000;
@@ -186,7 +185,7 @@ def A2_addh_l16_sat_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.h):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101000;
@@ -199,7 +198,7 @@ def A2_addh_l16_sat_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = add($Rt32.l,$Rs32.l):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101000;
@@ -212,7 +211,7 @@ def A2_addi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = add($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
 let Inst{31-28} = 0b1011;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -231,7 +230,7 @@ def A2_addp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = add($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -242,7 +241,7 @@ def A2_addpsat : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = add($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
@@ -254,7 +253,7 @@ def A2_addsat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = add($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110010;
@@ -269,14 +268,14 @@ def A2_addsp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "$Rdd32 = add($Rs32,$Rtt32)",
-tc_897d1a9d, TypeALU64> {
+tc_679309b8, TypeALU64> {
 let isPseudo = 1;
 }
 def A2_addsph : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = add($Rss32,$Rtt32):raw:hi",
-tc_897d1a9d, TypeALU64>, Enc_a56825 {
+tc_679309b8, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
@@ -286,7 +285,7 @@ def A2_addspl : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = add($Rss32,$Rtt32):raw:lo",
-tc_897d1a9d, TypeALU64>, Enc_a56825 {
+tc_679309b8, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
@@ -296,7 +295,7 @@ def A2_and : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = and($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110001000;
@@ -312,7 +311,7 @@ def A2_andir : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = and($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
 let Inst{31-22} = 0b0111011000;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -328,7 +327,7 @@ def A2_andp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = and($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011111;
@@ -338,7 +337,7 @@ def A2_aslh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = aslh($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01110000000;
 let hasNewValue = 1;
@@ -350,7 +349,7 @@ def A2_asrh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = asrh($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01110000001;
 let hasNewValue = 1;
@@ -362,7 +361,7 @@ def A2_combine_hh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = combine($Rt32.h,$Rs32.h)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011100;
@@ -374,7 +373,7 @@ def A2_combine_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = combine($Rt32.h,$Rs32.l)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011101;
@@ -386,7 +385,7 @@ def A2_combine_lh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = combine($Rt32.l,$Rs32.h)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011110;
@@ -398,7 +397,7 @@ def A2_combine_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = combine($Rt32.l,$Rs32.l)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011111;
@@ -410,7 +409,7 @@ def A2_combineii : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins s32_0Imm:$Ii, s8_0Imm:$II),
 "$Rdd32 = combine(#$Ii,#$II)",
-tc_b9488031, TypeALU32_2op>, Enc_18c338 {
+tc_5a2711e5, TypeALU32_2op>, Enc_18c338 {
 let Inst{31-23} = 0b011111000;
 let isReMaterializable = 1;
 let isAsCheapAsAMove = 1;
@@ -425,7 +424,7 @@ def A2_combinew : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = combine($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_be32a5, PredNewRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_be32a5, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110101000;
@@ -437,7 +436,7 @@ def A2_max : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = max($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101110;
@@ -449,7 +448,7 @@ def A2_maxp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = max($Rss32,$Rtt32)",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -459,7 +458,7 @@ def A2_maxu : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = maxu($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101110;
@@ -471,7 +470,7 @@ def A2_maxup : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = maxu($Rss32,$Rtt32)",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -481,7 +480,7 @@ def A2_min : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = min($Rt32,$Rs32)",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101101;
@@ -493,7 +492,7 @@ def A2_minp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = min($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -503,7 +502,7 @@ def A2_minu : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = minu($Rt32,$Rs32)",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101101;
@@ -515,7 +514,7 @@ def A2_minup : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = minu($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -525,7 +524,7 @@ def A2_neg : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = neg($Rs32)",
-tc_68cb12ce, TypeALU32_2op> {
+tc_57890846, TypeALU32_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -535,7 +534,7 @@ def A2_negp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = neg($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10000000100;
 }
@@ -543,7 +542,7 @@ def A2_negsat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = neg($Rs32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_5e2823 {
+tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10001100100;
 let hasNewValue = 1;
@@ -555,7 +554,7 @@ def A2_nop : HInst<
 (outs),
 (ins),
 "nop",
-tc_6efc556e, TypeALU32_2op>, Enc_e3b0c4 {
+tc_2eabeebe, TypeALU32_2op>, Enc_e3b0c4 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-16} = 0b0111111100000000;
 }
@@ -563,7 +562,7 @@ def A2_not : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = not($Rs32)",
-tc_68cb12ce, TypeALU32_2op> {
+tc_57890846, TypeALU32_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -573,7 +572,7 @@ def A2_notp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = not($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000000100;
 }
@@ -581,7 +580,7 @@ def A2_or : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = or($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110001001;
@@ -597,7 +596,7 @@ def A2_orir : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = or($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
 let Inst{31-22} = 0b0111011010;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -613,7 +612,7 @@ def A2_orp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = or($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011111;
@@ -623,7 +622,7 @@ def A2_paddf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111011000;
@@ -639,7 +638,7 @@ def A2_paddfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111011000;
@@ -656,7 +655,7 @@ def A2_paddif : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
 "if (!$Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b011101001;
 let isPredicated = 1;
@@ -676,7 +675,7 @@ def A2_paddifnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
 "if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b1;
 let Inst{31-23} = 0b011101001;
 let isPredicated = 1;
@@ -697,7 +696,7 @@ def A2_paddit : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
 "if ($Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b011101000;
 let isPredicated = 1;
@@ -716,7 +715,7 @@ def A2_padditnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
 "if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b1;
 let Inst{31-23} = 0b011101000;
 let isPredicated = 1;
@@ -736,7 +735,7 @@ def A2_paddt : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111011000;
@@ -751,7 +750,7 @@ def A2_paddtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111011000;
@@ -767,7 +766,7 @@ def A2_pandf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111001000;
@@ -781,7 +780,7 @@ def A2_pandfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111001000;
@@ -796,7 +795,7 @@ def A2_pandt : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111001000;
@@ -809,7 +808,7 @@ def A2_pandtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111001000;
@@ -823,7 +822,7 @@ def A2_porf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111001001;
@@ -837,7 +836,7 @@ def A2_porfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111001001;
@@ -852,7 +851,7 @@ def A2_port : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111001001;
@@ -865,7 +864,7 @@ def A2_portnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111001001;
@@ -879,7 +878,7 @@ def A2_psubf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111011001;
@@ -893,7 +892,7 @@ def A2_psubfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111011001;
@@ -908,7 +907,7 @@ def A2_psubt : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111011001;
@@ -921,7 +920,7 @@ def A2_psubtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111011001;
@@ -935,7 +934,7 @@ def A2_pxorf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111001011;
@@ -949,7 +948,7 @@ def A2_pxorfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111001011;
@@ -964,7 +963,7 @@ def A2_pxort : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111001011;
@@ -977,7 +976,7 @@ def A2_pxortnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111001011;
@@ -991,7 +990,7 @@ def A2_roundsat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = round($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000110;
 let hasNewValue = 1;
@@ -1003,7 +1002,7 @@ def A2_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = sat($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001000110;
 let hasNewValue = 1;
@@ -1014,7 +1013,7 @@ def A2_satb : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = satb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10001100110;
 let hasNewValue = 1;
@@ -1025,7 +1024,7 @@ def A2_sath : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sath($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001100110;
 let hasNewValue = 1;
@@ -1036,7 +1035,7 @@ def A2_satub : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = satub($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10001100110;
 let hasNewValue = 1;
@@ -1047,7 +1046,7 @@ def A2_satuh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = satuh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10001100110;
 let hasNewValue = 1;
@@ -1058,7 +1057,7 @@ def A2_sub : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011001;
@@ -1073,7 +1072,7 @@ def A2_subh_h16_hh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.h,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1085,7 +1084,7 @@ def A2_subh_h16_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.h,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1097,7 +1096,7 @@ def A2_subh_h16_lh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.h):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1109,7 +1108,7 @@ def A2_subh_h16_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.l):<<16",
-tc_897d1a9d, TypeALU64>, Enc_bd6011 {
+tc_679309b8, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1121,7 +1120,7 @@ def A2_subh_h16_sat_hh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1134,7 +1133,7 @@ def A2_subh_h16_sat_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1147,7 +1146,7 @@ def A2_subh_h16_sat_lh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1160,7 +1159,7 @@ def A2_subh_h16_sat_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101011;
@@ -1173,7 +1172,7 @@ def A2_subh_l16_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.h)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101001;
@@ -1185,7 +1184,7 @@ def A2_subh_l16_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.l)",
-tc_1b9c9ee5, TypeALU64>, Enc_bd6011 {
+tc_4414d8b1, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101001;
@@ -1197,7 +1196,7 @@ def A2_subh_l16_sat_hl : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.h):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101001;
@@ -1210,7 +1209,7 @@ def A2_subh_l16_sat_ll : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32.l,$Rs32.l):sat",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101001;
@@ -1223,7 +1222,7 @@ def A2_subp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = sub($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -1232,7 +1231,7 @@ def A2_subri : HInst<
 (outs IntRegs:$Rd32),
 (ins s32_0Imm:$Ii, IntRegs:$Rs32),
 "$Rd32 = sub(#$Ii,$Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
 let Inst{31-22} = 0b0111011001;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -1248,7 +1247,7 @@ def A2_subsat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110110;
@@ -1262,7 +1261,7 @@ def A2_svaddh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vaddh($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110000;
@@ -1275,7 +1274,7 @@ def A2_svaddhs : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vaddh($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110001;
@@ -1290,7 +1289,7 @@ def A2_svadduhs : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vadduh($Rs32,$Rt32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be {
+tc_61830035, TypeALU32_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110011;
@@ -1305,12 +1304,13 @@ def A2_svavgh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vavgh($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be {
+tc_1c80410a, TypeALU32_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110111000;
 let hasNewValue = 1;
 let opNewValue = 0;
+let prefersSlot3 = 1;
 let InputType = "reg";
 let isCommutable = 1;
 }
@@ -1318,12 +1318,13 @@ def A2_svavghs : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vavgh($Rs32,$Rt32):rnd",
-tc_8fe6b782, TypeALU32_3op>, Enc_5ab2be {
+tc_d08ee0f4, TypeALU32_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110111001;
 let hasNewValue = 1;
 let opNewValue = 0;
+let prefersSlot3 = 1;
 let InputType = "reg";
 let isCommutable = 1;
 }
@@ -1331,19 +1332,20 @@ def A2_svnavgh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = vnavgh($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_1c80410a, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110111011;
 let hasNewValue = 1;
 let opNewValue = 0;
+let prefersSlot3 = 1;
 let InputType = "reg";
 }
 def A2_svsubh : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = vsubh($Rt32,$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110100;
@@ -1355,7 +1357,7 @@ def A2_svsubhs : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = vsubh($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110101;
@@ -1369,7 +1371,7 @@ def A2_svsubuhs : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = vsubuh($Rt32,$Rs32):sat",
-tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 {
+tc_61830035, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110110111;
@@ -1383,7 +1385,7 @@ def A2_swiz : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = swiz($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10001100100;
 let hasNewValue = 1;
@@ -1393,7 +1395,7 @@ def A2_sxtb : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sxtb($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01110000101;
 let hasNewValue = 1;
@@ -1405,7 +1407,7 @@ def A2_sxth : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sxth($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01110000111;
 let hasNewValue = 1;
@@ -1417,7 +1419,7 @@ def A2_sxtw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = sxtw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10000100010;
 }
@@ -1425,7 +1427,7 @@ def A2_tfr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = $Rs32",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01110000011;
 let hasNewValue = 1;
@@ -1438,7 +1440,7 @@ def A2_tfrcrr : HInst<
 (outs IntRegs:$Rd32),
 (ins CtrRegs:$Cs32),
 "$Rd32 = $Cs32",
-tc_29175780, TypeCR>, Enc_0cb018 {
+tc_b9272d6c, TypeCR>, Enc_0cb018 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01101010000;
 let hasNewValue = 1;
@@ -1448,7 +1450,7 @@ def A2_tfrf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = $Rs32",
-tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
 let isPredicated = 1;
 let isPredicatedFalse = 1;
 let hasNewValue = 1;
@@ -1463,7 +1465,7 @@ def A2_tfrfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = $Rs32",
-tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
 let isPredicated = 1;
 let isPredicatedFalse = 1;
 let hasNewValue = 1;
@@ -1479,7 +1481,7 @@ def A2_tfrih : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, u16_0Imm:$Ii),
 "$Rx32.h = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_51436c {
+tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
 let Inst{21-21} = 0b1;
 let Inst{31-24} = 0b01110010;
 let hasNewValue = 1;
@@ -1490,7 +1492,7 @@ def A2_tfril : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, u16_0Imm:$Ii),
 "$Rx32.l = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_51436c {
+tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
 let Inst{21-21} = 0b1;
 let Inst{31-24} = 0b01110001;
 let hasNewValue = 1;
@@ -1501,7 +1503,7 @@ def A2_tfrp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
 let BaseOpcode = "A2_tfrp";
 let isPredicable = 1;
 let isPseudo = 1;
@@ -1510,7 +1512,7 @@ def A2_tfrpf : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, DoubleRegs:$Rss32),
 "if (!$Pu4) $Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
 let isPredicated = 1;
 let isPredicatedFalse = 1;
 let BaseOpcode = "A2_tfrp";
@@ -1520,7 +1522,7 @@ def A2_tfrpfnew : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, DoubleRegs:$Rss32),
 "if (!$Pu4.new) $Rdd32 = $Rss32",
-tc_5f6847a1, TypeALU32_2op>, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, PredNewRel {
 let isPredicated = 1;
 let isPredicatedFalse = 1;
 let isPredicatedNew = 1;
@@ -1531,7 +1533,7 @@ def A2_tfrpi : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins s8_0Imm:$Ii),
 "$Rdd32 = #$Ii",
-tc_b9488031, TypeALU64> {
+tc_5a2711e5, TypeALU64> {
 let isReMaterializable = 1;
 let isAsCheapAsAMove = 1;
 let isMoveImm = 1;
@@ -1541,7 +1543,7 @@ def A2_tfrpt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, DoubleRegs:$Rss32),
 "if ($Pu4) $Rdd32 = $Rss32",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
 let isPredicated = 1;
 let BaseOpcode = "A2_tfrp";
 let isPseudo = 1;
@@ -1550,7 +1552,7 @@ def A2_tfrptnew : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, DoubleRegs:$Rss32),
 "if ($Pu4.new) $Rdd32 = $Rss32",
-tc_5f6847a1, TypeALU32_2op>, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, PredNewRel {
 let isPredicated = 1;
 let isPredicatedNew = 1;
 let BaseOpcode = "A2_tfrp";
@@ -1560,7 +1562,7 @@ def A2_tfrrcr : HInst<
 (outs CtrRegs:$Cd32),
 (ins IntRegs:$Rs32),
 "$Cd32 = $Rs32",
-tc_a21dc435, TypeCR>, Enc_bd811a {
+tc_434c8e1e, TypeCR>, Enc_bd811a {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01100010001;
 let hasNewValue = 1;
@@ -1570,7 +1572,7 @@ def A2_tfrsi : HInst<
 (outs IntRegs:$Rd32),
 (ins s32_0Imm:$Ii),
 "$Rd32 = #$Ii",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
+tc_57890846, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
 let Inst{21-21} = 0b0;
 let Inst{31-24} = 0b01111000;
 let hasNewValue = 1;
@@ -1592,7 +1594,7 @@ def A2_tfrt : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = $Rs32",
-tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
 let isPredicated = 1;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -1606,7 +1608,7 @@ def A2_tfrtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = $Rs32",
-tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
 let isPredicated = 1;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -1621,7 +1623,7 @@ def A2_vabsh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vabsh($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000000010;
 let prefersSlot3 = 1;
@@ -1630,7 +1632,7 @@ def A2_vabshsat : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vabsh($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10000000010;
 let prefersSlot3 = 1;
@@ -1640,7 +1642,7 @@ def A2_vabsw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vabsw($Rss32)",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000000010;
 let prefersSlot3 = 1;
@@ -1649,7 +1651,7 @@ def A2_vabswsat : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vabsw($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10000000010;
 let prefersSlot3 = 1;
@@ -1659,7 +1661,7 @@ def A2_vaddb_map : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeMAPPING> {
+tc_946df596, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -1667,7 +1669,7 @@ def A2_vaddh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1676,7 +1678,7 @@ def A2_vaddhs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1687,7 +1689,7 @@ def A2_vaddub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddub($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1696,7 +1698,7 @@ def A2_vaddubs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddub($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1707,7 +1709,7 @@ def A2_vadduhs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vadduh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1718,7 +1720,7 @@ def A2_vaddw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1727,7 +1729,7 @@ def A2_vaddws : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vaddw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_a56825 {
+tc_779080bf, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011000;
@@ -1738,16 +1740,17 @@ def A2_vavgh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
 }
 def A2_vavghcr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgh($Rss32,$Rtt32):crnd",
-tc_2b6f77c6, TypeALU64>, Enc_a56825 {
+tc_002cb246, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
@@ -1757,79 +1760,87 @@ def A2_vavghr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgh($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
 }
 def A2_vavgub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgub($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
 }
 def A2_vavgubr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgub($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
 }
 def A2_vavguh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavguh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
 }
 def A2_vavguhr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavguh($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
 }
 def A2_vavguw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavguw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
 }
 def A2_vavguwr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavguw($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
 }
 def A2_vavgw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgw($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_6132ba3d, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
 }
 def A2_vavgwcr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgw($Rss32,$Rtt32):crnd",
-tc_2b6f77c6, TypeALU64>, Enc_a56825 {
+tc_002cb246, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
@@ -1839,16 +1850,17 @@ def A2_vavgwr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vavgw($Rss32,$Rtt32):rnd",
-tc_dbdffe3d, TypeALU64>, Enc_a56825 {
+tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
 }
 def A2_vcmpbeq : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmpb.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b110000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1857,7 +1869,7 @@ def A2_vcmpbgtu : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmpb.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b111000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1866,7 +1878,7 @@ def A2_vcmpheq : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmph.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1875,7 +1887,7 @@ def A2_vcmphgt : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmph.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1884,7 +1896,7 @@ def A2_vcmphgtu : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmph.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b101000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1893,7 +1905,7 @@ def A2_vcmpweq : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmpw.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1902,7 +1914,7 @@ def A2_vcmpwgt : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmpw.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1911,7 +1923,7 @@ def A2_vcmpwgtu : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmpw.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010000;
@@ -1920,7 +1932,7 @@ def A2_vconj : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vconj($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_b9c5fb {
+tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10000000100;
 let prefersSlot3 = 1;
@@ -1930,7 +1942,7 @@ def A2_vmaxb : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vmaxb($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -1940,7 +1952,7 @@ def A2_vmaxh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vmaxh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -1950,7 +1962,7 @@ def A2_vmaxub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vmaxub($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -1960,7 +1972,7 @@ def A2_vmaxuh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vmaxuh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -1970,7 +1982,7 @@ def A2_vmaxuw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vmaxuw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -1980,7 +1992,7 @@ def A2_vmaxw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vmaxw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -1990,7 +2002,7 @@ def A2_vminb : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vminb($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011110;
@@ -2000,7 +2012,7 @@ def A2_vminh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vminh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -2010,7 +2022,7 @@ def A2_vminub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vminub($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -2020,7 +2032,7 @@ def A2_vminuh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vminuh($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -2030,7 +2042,7 @@ def A2_vminuw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vminuw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -2040,7 +2052,7 @@ def A2_vminw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vminw($Rtt32,$Rss32)",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011101;
@@ -2050,16 +2062,17 @@ def A2_vnavgh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vnavgh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
 }
 def A2_vnavghcr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011100;
@@ -2070,7 +2083,7 @@ def A2_vnavghr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011100;
@@ -2081,16 +2094,17 @@ def A2_vnavgw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vnavgw($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
 }
 def A2_vnavgwcr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011100;
@@ -2101,7 +2115,7 @@ def A2_vnavgwr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat",
-tc_2b6f77c6, TypeALU64>, Enc_ea23e4 {
+tc_002cb246, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011100;
@@ -2112,7 +2126,7 @@ def A2_vraddub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vraddub($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -2122,7 +2136,7 @@ def A2_vraddub_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vraddub($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010010;
@@ -2133,7 +2147,7 @@ def A2_vrsadub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrsadub($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -2143,7 +2157,7 @@ def A2_vrsadub_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrsadub($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010010;
@@ -2154,7 +2168,7 @@ def A2_vsubb_map : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vsubb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeMAPPING> {
+tc_946df596, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -2162,7 +2176,7 @@ def A2_vsubh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2171,7 +2185,7 @@ def A2_vsubhs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubh($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2182,7 +2196,7 @@ def A2_vsubub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubub($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2191,7 +2205,7 @@ def A2_vsububs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubub($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2202,7 +2216,7 @@ def A2_vsubuhs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubuh($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2213,7 +2227,7 @@ def A2_vsubw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubw($Rtt32,$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2222,7 +2236,7 @@ def A2_vsubws : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vsubw($Rtt32,$Rss32):sat",
-tc_b44c6e2a, TypeALU64>, Enc_ea23e4 {
+tc_779080bf, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011001;
@@ -2233,7 +2247,7 @@ def A2_xor : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = xor($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110001011;
@@ -2248,7 +2262,7 @@ def A2_xorp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = xor($Rss32,$Rtt32)",
-tc_540fdfbc, TypeALU64>, Enc_a56825 {
+tc_946df596, TypeALU64>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011111;
@@ -2258,7 +2272,7 @@ def A2_zxtb : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, PredNewRel {
 let hasNewValue = 1;
 let opNewValue = 0;
 let BaseOpcode = "A2_zxtb";
@@ -2270,7 +2284,7 @@ def A2_zxth : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = zxth($Rs32)",
-tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01110000110;
 let hasNewValue = 1;
@@ -2282,7 +2296,7 @@ def A4_addp_c : HInst<
 (outs DoubleRegs:$Rdd32, PredRegs:$Px4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
 "$Rdd32 = add($Rss32,$Rtt32,$Px4):carry",
-tc_523fcf30, TypeS_3op>, Enc_2b3f60 {
+tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000010110;
@@ -2293,7 +2307,7 @@ def A4_andn : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = and($Rt32,~$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110001100;
@@ -2305,7 +2319,7 @@ def A4_andnp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = and($Rtt32,~$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011111;
@@ -2314,7 +2328,7 @@ def A4_bitsplit : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = bitsplit($Rs32,$Rt32)",
-tc_1b9c9ee5, TypeALU64>, Enc_be32a5 {
+tc_4414d8b1, TypeALU64>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010100001;
@@ -2324,7 +2338,7 @@ def A4_bitspliti : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rdd32 = bitsplit($Rs32,#$Ii)",
-tc_1b9c9ee5, TypeS_2op>, Enc_311abd {
+tc_4414d8b1, TypeS_2op>, Enc_311abd {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001000110;
@@ -2334,14 +2348,14 @@ def A4_boundscheck : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "$Pd4 = boundscheck($Rs32,$Rtt32)",
-tc_1e856f58, TypeALU64> {
+tc_85d5d03f, TypeALU64> {
 let isPseudo = 1;
 }
 def A4_boundscheck_hi : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b101000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11010010000;
@@ -2350,7 +2364,7 @@ def A4_boundscheck_lo : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11010010000;
@@ -2359,7 +2373,7 @@ def A4_cmpbeq : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmpb.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b110000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111110;
@@ -2372,7 +2386,7 @@ def A4_cmpbeqi : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u8_0Imm:$Ii),
 "$Pd4 = cmpb.eq($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
 let Inst{4-2} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011101000;
@@ -2385,7 +2399,7 @@ def A4_cmpbgt : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmpb.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111110;
@@ -2397,7 +2411,7 @@ def A4_cmpbgti : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s8_0Imm:$Ii),
 "$Pd4 = cmpb.gt($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
 let Inst{4-2} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011101001;
@@ -2409,7 +2423,7 @@ def A4_cmpbgtu : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmpb.gtu($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b111000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111110;
@@ -2421,7 +2435,7 @@ def A4_cmpbgtui : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Pd4 = cmpb.gtu($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
 let Inst{4-2} = 0b000;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b11011101010;
@@ -2438,7 +2452,7 @@ def A4_cmpheq : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmph.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111110;
@@ -2451,7 +2465,7 @@ def A4_cmpheqi : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = cmph.eq($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
 let Inst{4-2} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011101000;
@@ -2469,7 +2483,7 @@ def A4_cmphgt : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmph.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111110;
@@ -2481,7 +2495,7 @@ def A4_cmphgti : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = cmph.gt($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
 let Inst{4-2} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011101001;
@@ -2498,7 +2512,7 @@ def A4_cmphgtu : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmph.gtu($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b101000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111110;
@@ -2510,7 +2524,7 @@ def A4_cmphgtui : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Pd4 = cmph.gtu($Rs32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
 let Inst{4-2} = 0b010;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b11011101010;
@@ -2527,7 +2541,7 @@ def A4_combineii : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins s8_0Imm:$Ii, u32_0Imm:$II),
 "$Rdd32 = combine(#$Ii,#$II)",
-tc_b9488031, TypeALU32_2op>, Enc_f0cca7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_f0cca7 {
 let Inst{31-21} = 0b01111100100;
 let isExtendable = 1;
 let opExtendable = 2;
@@ -2539,7 +2553,7 @@ def A4_combineir : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins s32_0Imm:$Ii, IntRegs:$Rs32),
 "$Rdd32 = combine(#$Ii,$Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_9cdba7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b01110011001;
 let isExtendable = 1;
@@ -2552,7 +2566,7 @@ def A4_combineri : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rdd32 = combine($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_9cdba7 {
+tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b01110011000;
 let isExtendable = 1;
@@ -2565,7 +2579,7 @@ def A4_cround_ri : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = cround($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100111;
@@ -2577,7 +2591,7 @@ def A4_cround_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = cround($Rs32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110110;
@@ -2589,14 +2603,14 @@ def A4_ext : HInst<
 (outs),
 (ins u26_6Imm:$Ii),
 "immext(#$Ii)",
-tc_452f85af, TypeEXTENDER>, Enc_2b518f {
+tc_862b3e70, TypeEXTENDER>, Enc_2b518f {
 let Inst{31-28} = 0b0000;
 }
 def A4_modwrapu : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = modwrap($Rs32,$Rt32)",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011111;
@@ -2608,7 +2622,7 @@ def A4_orn : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = or($Rt32,~$Rs32)",
-tc_b9488031, TypeALU32_3op>, Enc_bd6011 {
+tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110001101;
@@ -2620,7 +2634,7 @@ def A4_ornp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = or($Rtt32,~$Rss32)",
-tc_540fdfbc, TypeALU64>, Enc_ea23e4 {
+tc_946df596, TypeALU64>, Enc_ea23e4 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010011111;
@@ -2629,7 +2643,7 @@ def A4_paslhf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = aslh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b01110000000;
@@ -2643,7 +2657,7 @@ def A4_paslhfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = aslh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1011;
 let Inst{31-21} = 0b01110000000;
@@ -2658,7 +2672,7 @@ def A4_paslht : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = aslh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b01110000000;
@@ -2671,7 +2685,7 @@ def A4_paslhtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = aslh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b01110000000;
@@ -2685,7 +2699,7 @@ def A4_pasrhf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = asrh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b01110000001;
@@ -2699,7 +2713,7 @@ def A4_pasrhfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = asrh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1011;
 let Inst{31-21} = 0b01110000001;
@@ -2714,7 +2728,7 @@ def A4_pasrht : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = asrh($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b01110000001;
@@ -2727,7 +2741,7 @@ def A4_pasrhtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = asrh($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b01110000001;
@@ -2741,7 +2755,7 @@ def A4_psxtbf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = sxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b01110000101;
@@ -2755,7 +2769,7 @@ def A4_psxtbfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1011;
 let Inst{31-21} = 0b01110000101;
@@ -2770,7 +2784,7 @@ def A4_psxtbt : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = sxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b01110000101;
@@ -2783,7 +2797,7 @@ def A4_psxtbtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b01110000101;
@@ -2797,7 +2811,7 @@ def A4_psxthf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = sxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b01110000111;
@@ -2811,7 +2825,7 @@ def A4_psxthfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = sxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1011;
 let Inst{31-21} = 0b01110000111;
@@ -2826,7 +2840,7 @@ def A4_psxtht : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = sxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b01110000111;
@@ -2839,7 +2853,7 @@ def A4_psxthtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = sxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b01110000111;
@@ -2853,7 +2867,7 @@ def A4_pzxtbf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b01110000100;
@@ -2867,7 +2881,7 @@ def A4_pzxtbfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1011;
 let Inst{31-21} = 0b01110000100;
@@ -2882,7 +2896,7 @@ def A4_pzxtbt : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = zxtb($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b01110000100;
@@ -2895,7 +2909,7 @@ def A4_pzxtbtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b01110000100;
@@ -2909,7 +2923,7 @@ def A4_pzxthf : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) $Rd32 = zxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b01110000110;
@@ -2923,7 +2937,7 @@ def A4_pzxthfnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) $Rd32 = zxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1011;
 let Inst{31-21} = 0b01110000110;
@@ -2938,7 +2952,7 @@ def A4_pzxtht : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) $Rd32 = zxth($Rs32)",
-tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b01110000110;
@@ -2951,7 +2965,7 @@ def A4_pzxthtnew : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) $Rd32 = zxth($Rs32)",
-tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b01110000110;
@@ -2965,7 +2979,7 @@ def A4_rcmpeq : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = cmp.eq($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011010;
@@ -2979,7 +2993,7 @@ def A4_rcmpeqi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = cmp.eq($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b01110011010;
 let hasNewValue = 1;
@@ -2996,7 +3010,7 @@ def A4_rcmpneq : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = !cmp.eq($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110011011;
@@ -3010,7 +3024,7 @@ def A4_rcmpneqi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = !cmp.eq($Rs32,#$Ii)",
-tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b01110011011;
 let hasNewValue = 1;
@@ -3027,7 +3041,7 @@ def A4_round_ri : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = round($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100111;
@@ -3039,7 +3053,7 @@ def A4_round_ri_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = round($Rs32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100111;
@@ -3052,7 +3066,7 @@ def A4_round_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = round($Rs32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110110;
@@ -3064,7 +3078,7 @@ def A4_round_rr_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = round($Rs32,$Rt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_5ab2be {
+tc_002cb246, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110110;
@@ -3077,7 +3091,7 @@ def A4_subp_c : HInst<
 (outs DoubleRegs:$Rdd32, PredRegs:$Px4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
 "$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry",
-tc_523fcf30, TypeS_3op>, Enc_2b3f60 {
+tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000010111;
@@ -3088,7 +3102,7 @@ def A4_tfrcpp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins CtrRegs64:$Css32),
 "$Rdd32 = $Css32",
-tc_29175780, TypeCR>, Enc_667b39 {
+tc_b9272d6c, TypeCR>, Enc_667b39 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01101000000;
 }
@@ -3096,7 +3110,7 @@ def A4_tfrpcp : HInst<
 (outs CtrRegs64:$Cdd32),
 (ins DoubleRegs:$Rss32),
 "$Cdd32 = $Rss32",
-tc_a21dc435, TypeCR>, Enc_0ed752 {
+tc_434c8e1e, TypeCR>, Enc_0ed752 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01100011001;
 }
@@ -3104,7 +3118,7 @@ def A4_tlbmatch : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Pd4 = tlbmatch($Rss32,$Rt32)",
-tc_04c9decc, TypeALU64>, Enc_03833b {
+tc_4837eefb, TypeALU64>, Enc_03833b {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11010010000;
@@ -3114,7 +3128,7 @@ def A4_vcmpbeq_any : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11010010000;
@@ -3123,7 +3137,7 @@ def A4_vcmpbeqi : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, u8_0Imm:$Ii),
 "$Pd4 = vcmpb.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
 let Inst{4-2} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011100000;
@@ -3132,7 +3146,7 @@ def A4_vcmpbgt : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = vcmpb.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11010010000;
@@ -3141,7 +3155,7 @@ def A4_vcmpbgti : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
 "$Pd4 = vcmpb.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
 let Inst{4-2} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011100001;
@@ -3150,7 +3164,7 @@ def A4_vcmpbgtui : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
 "$Pd4 = vcmpb.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
 let Inst{4-2} = 0b000;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b11011100010;
@@ -3159,7 +3173,7 @@ def A4_vcmpheqi : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
 "$Pd4 = vcmph.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
 let Inst{4-2} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011100000;
@@ -3168,7 +3182,7 @@ def A4_vcmphgti : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
 "$Pd4 = vcmph.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
 let Inst{4-2} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011100001;
@@ -3177,7 +3191,7 @@ def A4_vcmphgtui : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
 "$Pd4 = vcmph.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
 let Inst{4-2} = 0b010;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b11011100010;
@@ -3186,7 +3200,7 @@ def A4_vcmpweqi : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
 "$Pd4 = vcmpw.eq($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
 let Inst{4-2} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011100000;
@@ -3195,7 +3209,7 @@ def A4_vcmpwgti : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
 "$Pd4 = vcmpw.gt($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_0d8adb {
+tc_643b4717, TypeALU64>, Enc_0d8adb {
 let Inst{4-2} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11011100001;
@@ -3204,7 +3218,7 @@ def A4_vcmpwgtui : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
 "$Pd4 = vcmpw.gtu($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_3680c2 {
+tc_643b4717, TypeALU64>, Enc_3680c2 {
 let Inst{4-2} = 0b100;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b11011100010;
@@ -3213,7 +3227,7 @@ def A4_vrmaxh : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrmaxh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011001;
@@ -3224,7 +3238,7 @@ def A4_vrmaxuh : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrmaxuh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11001011001;
@@ -3235,7 +3249,7 @@ def A4_vrmaxuw : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrmaxuw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11001011001;
@@ -3246,7 +3260,7 @@ def A4_vrmaxw : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrmaxw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011001;
@@ -3257,7 +3271,7 @@ def A4_vrminh : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrminh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011001;
@@ -3268,7 +3282,7 @@ def A4_vrminuh : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrminuh($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11001011001;
@@ -3279,7 +3293,7 @@ def A4_vrminuw : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrminuw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11001011001;
@@ -3290,7 +3304,7 @@ def A4_vrminw : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
 "$Rxx32 = vrminw($Rss32,$Ru32)",
-tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 {
+tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011001;
@@ -3301,7 +3315,7 @@ def A5_ACS : HInst<
 (outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
-tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55]> {
+tc_d1aa9eaa, TypeM>, Enc_831a7d, Requires<[HasV55]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010101;
@@ -3314,7 +3328,7 @@ def A5_vaddhubs : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vaddhub($Rss32,$Rtt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> {
+tc_002cb246, TypeS_3op>, Enc_d2216a {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001010;
@@ -3327,7 +3341,7 @@ def A6_vcmpbeq_notany : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
+tc_1fc97744, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11010010000;
@@ -3336,7 +3350,7 @@ def A6_vminub_RdP : HInst<
 (outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
-tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
+tc_f9058dd7, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010111;
@@ -3347,7 +3361,7 @@ def C2_all8 : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4),
 "$Pd4 = all8($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
 let Inst{13-2} = 0b000000000000;
 let Inst{31-18} = 0b01101011101000;
 }
@@ -3355,7 +3369,7 @@ def C2_and : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Pt4, PredRegs:$Ps4),
 "$Pd4 = and($Pt4,$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
 let Inst{7-2} = 0b000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011000000;
@@ -3364,7 +3378,7 @@ def C2_andn : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Pt4, PredRegs:$Ps4),
 "$Pd4 = and($Pt4,!$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
 let Inst{7-2} = 0b000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011011000;
@@ -3373,7 +3387,7 @@ def C2_any8 : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4),
 "$Pd4 = any8($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
 let Inst{13-2} = 0b000000000000;
 let Inst{31-18} = 0b01101011100000;
 }
@@ -3381,7 +3395,7 @@ def C2_bitsclr : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = bitsclr($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111100;
@@ -3390,7 +3404,7 @@ def C2_bitsclri : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u6_0Imm:$Ii),
 "$Pd4 = bitsclr($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_5d6c34 {
+tc_643b4717, TypeS_2op>, Enc_5d6c34 {
 let Inst{7-2} = 0b000000;
 let Inst{31-21} = 0b10000101100;
 }
@@ -3398,7 +3412,7 @@ def C2_bitsset : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = bitsset($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111010;
@@ -3407,7 +3421,7 @@ def C2_ccombinewf : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111101000;
@@ -3419,7 +3433,7 @@ def C2_ccombinewnewf : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111101000;
@@ -3432,7 +3446,7 @@ def C2_ccombinewnewt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11111101000;
@@ -3444,7 +3458,7 @@ def C2_ccombinewt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11111101000;
@@ -3455,7 +3469,7 @@ def C2_cmoveif : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if (!$Pu4) $Rd32 = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b0;
 let Inst{20-20} = 0b0;
 let Inst{31-23} = 0b011111101;
@@ -3477,7 +3491,7 @@ def C2_cmoveit : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if ($Pu4) $Rd32 = #$Ii",
-tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b0;
 let Inst{20-20} = 0b0;
 let Inst{31-23} = 0b011111100;
@@ -3498,7 +3512,7 @@ def C2_cmovenewif : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if (!$Pu4.new) $Rd32 = #$Ii",
-tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b1;
 let Inst{20-20} = 0b0;
 let Inst{31-23} = 0b011111101;
@@ -3521,7 +3535,7 @@ def C2_cmovenewit : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if ($Pu4.new) $Rd32 = #$Ii",
-tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
 let Inst{13-13} = 0b1;
 let Inst{20-20} = 0b0;
 let Inst{31-23} = 0b011111100;
@@ -3543,7 +3557,7 @@ def C2_cmpeq : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmp.eq($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110010000;
@@ -3556,7 +3570,7 @@ def C2_cmpeqi : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = cmp.eq($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
 let Inst{4-2} = 0b000;
 let Inst{31-22} = 0b0111010100;
 let CextOpcode = "C2_cmpeq";
@@ -3572,7 +3586,7 @@ def C2_cmpeqp : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = cmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010100;
@@ -3583,7 +3597,7 @@ def C2_cmpgei : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s8_0Imm:$Ii),
 "$Pd4 = cmp.ge($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op> {
+tc_56f114f4, TypeALU32_2op> {
 let isCompare = 1;
 let isPseudo = 1;
 }
@@ -3591,7 +3605,7 @@ def C2_cmpgeui : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u8_0Imm:$Ii),
 "$Pd4 = cmp.geu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op> {
+tc_56f114f4, TypeALU32_2op> {
 let isCompare = 1;
 let isPseudo = 1;
 }
@@ -3599,7 +3613,7 @@ def C2_cmpgt : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmp.gt($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110010010;
@@ -3611,7 +3625,7 @@ def C2_cmpgti : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = cmp.gt($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
 let Inst{4-2} = 0b000;
 let Inst{31-22} = 0b0111010101;
 let CextOpcode = "C2_cmpgt";
@@ -3627,7 +3641,7 @@ def C2_cmpgtp : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = cmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010100;
@@ -3637,7 +3651,7 @@ def C2_cmpgtu : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmp.gtu($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110010011;
@@ -3649,7 +3663,7 @@ def C2_cmpgtui : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Pd4 = cmp.gtu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
 let Inst{4-2} = 0b000;
 let Inst{31-21} = 0b01110101100;
 let CextOpcode = "C2_cmpgtu";
@@ -3665,7 +3679,7 @@ def C2_cmpgtup : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = cmp.gtu($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010100;
@@ -3675,7 +3689,7 @@ def C2_cmplt : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmp.lt($Rs32,$Rt32)",
-tc_6ebb4a12, TypeALU32_3op> {
+tc_56f114f4, TypeALU32_3op> {
 let isCompare = 1;
 let isPseudo = 1;
 let isCodeGenOnly = 1;
@@ -3684,7 +3698,7 @@ def C2_cmpltu : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = cmp.ltu($Rs32,$Rt32)",
-tc_6ebb4a12, TypeALU32_3op> {
+tc_56f114f4, TypeALU32_3op> {
 let isCompare = 1;
 let isPseudo = 1;
 let isCodeGenOnly = 1;
@@ -3693,7 +3707,7 @@ def C2_mask : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4),
 "$Rdd32 = mask($Pt4)",
-tc_cde8b071, TypeS_2op>, Enc_78e566 {
+tc_0ae0825c, TypeS_2op>, Enc_78e566 {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b0000;
 let Inst{31-16} = 0b1000011000000000;
@@ -3702,7 +3716,7 @@ def C2_mux : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mux($Pu4,$Rs32,$Rt32)",
-tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54 {
+tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54 {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110100000;
@@ -3714,7 +3728,7 @@ def C2_muxii : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II),
 "$Rd32 = mux($Pu4,#$Ii,#$II)",
-tc_d6bf0472, TypeALU32_2op>, Enc_830e5d {
+tc_4c5ba658, TypeALU32_2op>, Enc_830e5d {
 let Inst{31-25} = 0b0111101;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -3728,7 +3742,7 @@ def C2_muxir : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = mux($Pu4,$Rs32,#$Ii)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b011100110;
 let hasNewValue = 1;
@@ -3744,7 +3758,7 @@ def C2_muxri : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32),
 "$Rd32 = mux($Pu4,#$Ii,$Rs32)",
-tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f {
+tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b011100111;
 let hasNewValue = 1;
@@ -3760,7 +3774,7 @@ def C2_not : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4),
 "$Pd4 = not($Ps4)",
-tc_f2704b9a, TypeCR>, Enc_65d691 {
+tc_de554571, TypeCR>, Enc_65d691 {
 let Inst{13-2} = 0b000000000000;
 let Inst{31-18} = 0b01101011110000;
 }
@@ -3768,7 +3782,7 @@ def C2_or : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Pt4, PredRegs:$Ps4),
 "$Pd4 = or($Pt4,$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
 let Inst{7-2} = 0b000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011001000;
@@ -3777,7 +3791,7 @@ def C2_orn : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Pt4, PredRegs:$Ps4),
 "$Pd4 = or($Pt4,!$Ps4)",
-tc_53bc8a6a, TypeCR>, Enc_454a26 {
+tc_640086b5, TypeCR>, Enc_454a26 {
 let Inst{7-2} = 0b000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011111000;
@@ -3786,7 +3800,7 @@ def C2_pxfer_map : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4),
 "$Pd4 = $Ps4",
-tc_53bc8a6a, TypeMAPPING> {
+tc_640086b5, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -3794,7 +3808,7 @@ def C2_tfrpr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Ps4),
 "$Rd32 = $Ps4",
-tc_cde8b071, TypeS_2op>, Enc_f5e933 {
+tc_0ae0825c, TypeS_2op>, Enc_f5e933 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-18} = 0b10001001010000;
 let hasNewValue = 1;
@@ -3804,7 +3818,7 @@ def C2_tfrrp : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32),
 "$Pd4 = $Rs32",
-tc_351fed2d, TypeS_2op>, Enc_48b75f {
+tc_cfd8378a, TypeS_2op>, Enc_48b75f {
 let Inst{13-2} = 0b000000000000;
 let Inst{31-21} = 0b10000101010;
 }
@@ -3812,7 +3826,7 @@ def C2_vitpack : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Ps4, PredRegs:$Pt4),
 "$Rd32 = vitpack($Ps4,$Pt4)",
-tc_1b9c9ee5, TypeS_2op>, Enc_527412 {
+tc_4414d8b1, TypeS_2op>, Enc_527412 {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b10001001000000;
@@ -3824,7 +3838,7 @@ def C2_vmux : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)",
-tc_f8eeed7a, TypeALU64>, Enc_329361 {
+tc_b4b5c03a, TypeALU64>, Enc_329361 {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010001000;
@@ -3833,7 +3847,7 @@ def C2_xor : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4),
 "$Pd4 = xor($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
 let Inst{7-2} = 0b000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011010000;
@@ -3842,7 +3856,7 @@ def C4_addipc : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii),
 "$Rd32 = add(pc,#$Ii)",
-tc_b9c4623f, TypeCR>, Enc_607661 {
+tc_a813cf9a, TypeCR>, Enc_607661 {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0110101001001001;
@@ -3858,7 +3872,7 @@ def C4_and_and : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = and($Ps4,and($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011000100;
@@ -3867,7 +3881,7 @@ def C4_and_andn : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = and($Ps4,and($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011100100;
@@ -3876,7 +3890,7 @@ def C4_and_or : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = and($Ps4,or($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011001100;
@@ -3885,7 +3899,7 @@ def C4_and_orn : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = and($Ps4,or($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011101100;
@@ -3894,7 +3908,7 @@ def C4_cmplte : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = !cmp.gt($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b000100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110010010;
@@ -3906,7 +3920,7 @@ def C4_cmpltei : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = !cmp.gt($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
 let Inst{4-2} = 0b100;
 let Inst{31-22} = 0b0111010101;
 let CextOpcode = "C4_cmplte";
@@ -3922,7 +3936,7 @@ def C4_cmplteu : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = !cmp.gtu($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b000100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110010011;
@@ -3934,7 +3948,7 @@ def C4_cmplteui : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Pd4 = !cmp.gtu($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
 let Inst{4-2} = 0b100;
 let Inst{31-21} = 0b01110101100;
 let CextOpcode = "C4_cmplteu";
@@ -3950,7 +3964,7 @@ def C4_cmpneq : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = !cmp.eq($Rs32,$Rt32)",
-tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
 let Inst{7-2} = 0b000100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110010000;
@@ -3963,7 +3977,7 @@ def C4_cmpneqi : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = !cmp.eq($Rs32,#$Ii)",
-tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
 let Inst{4-2} = 0b100;
 let Inst{31-22} = 0b0111010100;
 let CextOpcode = "C4_cmpneq";
@@ -3979,7 +3993,7 @@ def C4_fastcorner9 : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4),
 "$Pd4 = fastcorner9($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
 let Inst{7-2} = 0b100100;
 let Inst{13-10} = 0b1000;
 let Inst{31-18} = 0b01101011000000;
@@ -3988,7 +4002,7 @@ def C4_fastcorner9_not : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4),
 "$Pd4 = !fastcorner9($Ps4,$Pt4)",
-tc_53bc8a6a, TypeCR>, Enc_284ebb {
+tc_640086b5, TypeCR>, Enc_284ebb {
 let Inst{7-2} = 0b100100;
 let Inst{13-10} = 0b1000;
 let Inst{31-18} = 0b01101011000100;
@@ -3997,7 +4011,7 @@ def C4_nbitsclr : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = !bitsclr($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111101;
@@ -4006,7 +4020,7 @@ def C4_nbitsclri : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u6_0Imm:$Ii),
 "$Pd4 = !bitsclr($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_5d6c34 {
+tc_643b4717, TypeS_2op>, Enc_5d6c34 {
 let Inst{7-2} = 0b000000;
 let Inst{31-21} = 0b10000101101;
 }
@@ -4014,7 +4028,7 @@ def C4_nbitsset : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = !bitsset($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111011;
@@ -4023,7 +4037,7 @@ def C4_or_and : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = or($Ps4,and($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011010100;
@@ -4032,7 +4046,7 @@ def C4_or_andn : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = or($Ps4,and($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011110100;
@@ -4041,7 +4055,7 @@ def C4_or_or : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = or($Ps4,or($Pt4,$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011011100;
@@ -4050,7 +4064,7 @@ def C4_or_orn : HInst<
 (outs PredRegs:$Pd4),
 (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
 "$Pd4 = or($Ps4,or($Pt4,!$Pu4))",
-tc_481e5e5c, TypeCR>, Enc_9ac432 {
+tc_b31c2e97, TypeCR>, Enc_9ac432 {
 let Inst{5-2} = 0b0000;
 let Inst{13-10} = 0b0000;
 let Inst{31-18} = 0b01101011111100;
@@ -4059,7 +4073,7 @@ def F2_conv_d2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_d2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000011;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@@ -4069,7 +4083,7 @@ def F2_conv_d2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_d2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000010;
 let hasNewValue = 1;
@@ -4081,7 +4095,7 @@ def F2_conv_df2d : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2d($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@@ -4091,7 +4105,7 @@ def F2_conv_df2d_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2d($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@@ -4101,7 +4115,7 @@ def F2_conv_df2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000000;
 let hasNewValue = 1;
@@ -4113,7 +4127,7 @@ def F2_conv_df2ud : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2ud($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@@ -4123,7 +4137,7 @@ def F2_conv_df2ud_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2ud($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@@ -4133,7 +4147,7 @@ def F2_conv_df2uw : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2uw($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000011;
 let hasNewValue = 1;
@@ -4145,7 +4159,7 @@ def F2_conv_df2uw_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2uw($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000101;
 let hasNewValue = 1;
@@ -4157,7 +4171,7 @@ def F2_conv_df2w : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2w($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000100;
 let hasNewValue = 1;
@@ -4169,7 +4183,7 @@ def F2_conv_df2w_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2w($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000111;
 let hasNewValue = 1;
@@ -4181,7 +4195,7 @@ def F2_conv_sf2d : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2d($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4191,7 +4205,7 @@ def F2_conv_sf2d_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2d($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4201,7 +4215,7 @@ def F2_conv_sf2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4211,7 +4225,7 @@ def F2_conv_sf2ud : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2ud($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000011;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4221,7 +4235,7 @@ def F2_conv_sf2ud_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2ud($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4231,7 +4245,7 @@ def F2_conv_sf2uw : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2uw($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011011;
 let hasNewValue = 1;
@@ -4243,7 +4257,7 @@ def F2_conv_sf2uw_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2uw($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001011011;
 let hasNewValue = 1;
@@ -4255,7 +4269,7 @@ def F2_conv_sf2w : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2w($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011100;
 let hasNewValue = 1;
@@ -4267,7 +4281,7 @@ def F2_conv_sf2w_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2w($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001011100;
 let hasNewValue = 1;
@@ -4279,7 +4293,7 @@ def F2_conv_ud2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_ud2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@@ -4289,7 +4303,7 @@ def F2_conv_ud2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_ud2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000001;
 let hasNewValue = 1;
@@ -4301,7 +4315,7 @@ def F2_conv_uw2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_uw2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4311,7 +4325,7 @@ def F2_conv_uw2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_uw2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011001;
 let hasNewValue = 1;
@@ -4323,7 +4337,7 @@ def F2_conv_w2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_w2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@@ -4333,7 +4347,7 @@ def F2_conv_w2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_w2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011010;
 let hasNewValue = 1;
@@ -4341,11 +4355,22 @@ let opNewValue = 0;
 let isFP = 1;
 let Uses = [USR];
 }
+def F2_dfadd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfadd($Rss32,$Rtt32)",
+tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let isFP = 1;
+let Uses = [USR];
+}
 def F2_dfclass : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
 "$Pd4 = dfclass($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> {
+tc_643b4717, TypeALU64>, Enc_1f19b5 {
 let Inst{4-2} = 0b100;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b11011100100;
@@ -4356,7 +4381,7 @@ def F2_dfcmpeq : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@@ -4368,7 +4393,7 @@ def F2_dfcmpge : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@@ -4380,7 +4405,7 @@ def F2_dfcmpgt : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@@ -4392,7 +4417,7 @@ def F2_dfcmpuo : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@@ -4404,7 +4429,7 @@ def F2_dfimm_n : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins u10_0Imm:$Ii),
 "$Rdd32 = dfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_e6c957 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101100101;
 let prefersSlot3 = 1;
@@ -4413,16 +4438,27 @@ def F2_dfimm_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins u10_0Imm:$Ii),
 "$Rdd32 = dfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_e6c957 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101100100;
 let prefersSlot3 = 1;
 }
+def F2_dfsub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfsub($Rss32,$Rtt32)",
+tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let isFP = 1;
+let Uses = [USR];
+}
 def F2_sfadd : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfadd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011000;
@@ -4436,7 +4472,7 @@ def F2_sfclass : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Pd4 = sfclass($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000101111;
@@ -4447,7 +4483,7 @@ def F2_sfcmpeq : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@@ -4459,7 +4495,7 @@ def F2_sfcmpge : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.ge($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@@ -4471,7 +4507,7 @@ def F2_sfcmpgt : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@@ -4483,7 +4519,7 @@ def F2_sfcmpuo : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.uo($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@@ -4495,7 +4531,7 @@ def F2_sffixupd : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sffixupd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011110;
@@ -4507,7 +4543,7 @@ def F2_sffixupn : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sffixupn($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011110;
@@ -4519,7 +4555,7 @@ def F2_sffixupr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sffixupr($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_3a867367, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011101;
 let hasNewValue = 1;
@@ -4530,7 +4566,7 @@ def F2_sffma : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -4544,7 +4580,7 @@ def F2_sffma_lib : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -4558,7 +4594,7 @@ def F2_sffma_sc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
 "$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
-tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> {
+tc_4560740b, TypeM>, Enc_437f33 {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111011;
@@ -4572,7 +4608,7 @@ def F2_sffms : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -4586,7 +4622,7 @@ def F2_sffms_lib : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_a58fd5cc, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -4600,7 +4636,7 @@ def F2_sfimm_n : HInst<
 (outs IntRegs:$Rd32),
 (ins u10_0Imm:$Ii),
 "$Rd32 = sfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_6c9440 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101011001;
 let hasNewValue = 1;
@@ -4611,7 +4647,7 @@ def F2_sfimm_p : HInst<
 (outs IntRegs:$Rd32),
 (ins u10_0Imm:$Ii),
 "$Rd32 = sfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_9e313203, TypeALU64>, Enc_6c9440 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101011000;
 let hasNewValue = 1;
@@ -4622,7 +4658,7 @@ def F2_sfinvsqrta : HInst<
 (outs IntRegs:$Rd32, PredRegs:$Pe4),
 (ins IntRegs:$Rs32),
 "$Rd32,$Pe4 = sfinvsqrta($Rs32)",
-tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> {
+tc_b8bffe55, TypeS_2op>, Enc_890909 {
 let Inst{13-7} = 0b0000000;
 let Inst{31-21} = 0b10001011111;
 let hasNewValue = 1;
@@ -4634,7 +4670,7 @@ def F2_sfmax : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfmax($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_88b4f13d, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011100;
@@ -4648,7 +4684,7 @@ def F2_sfmin : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfmin($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_88b4f13d, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011100;
@@ -4662,7 +4698,7 @@ def F2_sfmpy : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfmpy($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011010;
@@ -4676,7 +4712,7 @@ def F2_sfrecipa : HInst<
 (outs IntRegs:$Rd32, PredRegs:$Pe4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
-tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> {
+tc_2ff964b4, TypeM>, Enc_a94f3b {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011111;
@@ -4689,7 +4725,7 @@ def F2_sfsub : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfsub($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_3b470976, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011000;
@@ -4702,7 +4738,7 @@ def G4_tfrgcpp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins GuestRegs64:$Gss32),
 "$Rdd32 = $Gss32",
-tc_6fa4db47, TypeCR>, Enc_0aa344 {
+tc_0d8f5752, TypeCR>, Enc_0aa344 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01101000001;
 }
@@ -4710,7 +4746,7 @@ def G4_tfrgcrr : HInst<
 (outs IntRegs:$Rd32),
 (ins GuestRegs:$Gs32),
 "$Rd32 = $Gs32",
-tc_6fa4db47, TypeCR>, Enc_44271f {
+tc_0d8f5752, TypeCR>, Enc_44271f {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01101010001;
 let hasNewValue = 1;
@@ -4720,7 +4756,7 @@ def G4_tfrgpcp : HInst<
 (outs GuestRegs64:$Gdd32),
 (ins DoubleRegs:$Rss32),
 "$Gdd32 = $Rss32",
-tc_994333cd, TypeCR>, Enc_ed5027 {
+tc_bcf98408, TypeCR>, Enc_ed5027 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01100011000;
 let hasNewValue = 1;
@@ -4730,7 +4766,7 @@ def G4_tfrgrcr : HInst<
 (outs GuestRegs:$Gd32),
 (ins IntRegs:$Rs32),
 "$Gd32 = $Rs32",
-tc_994333cd, TypeCR>, Enc_621fba {
+tc_bcf98408, TypeCR>, Enc_621fba {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b01100010000;
 let hasNewValue = 1;
@@ -4740,7 +4776,7 @@ def J2_call : HInst<
 (outs),
 (ins a30_2Imm:$Ii),
 "call $Ii",
-tc_a27582fa, TypeJ>, Enc_81ac1d, PredRel {
+tc_4ae7b58b, TypeJ>, Enc_81ac1d, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{31-25} = 0b0101101;
 let isCall = 1;
@@ -4762,7 +4798,7 @@ def J2_callf : HInst<
 (outs),
 (ins PredRegs:$Pu4, a30_2Imm:$Ii),
 "if (!$Pu4) call $Ii",
-tc_2f185f5c, TypeJ>, Enc_daea09, PredRel {
+tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b000;
 let Inst{21-21} = 0b1;
@@ -4789,7 +4825,7 @@ def J2_callr : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "callr $Rs32",
-tc_15411484, TypeJ>, Enc_ecbcc8 {
+tc_3bd75825, TypeJ>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b01010000101;
 let isCall = 1;
@@ -4803,7 +4839,7 @@ def J2_callrf : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) callr $Rs32",
-tc_10b97e27, TypeJ>, Enc_88d4d9 {
+tc_1ad90acd, TypeJ>, Enc_88d4d9 {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b01010001001;
@@ -4821,7 +4857,7 @@ def J2_callrt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) callr $Rs32",
-tc_10b97e27, TypeJ>, Enc_88d4d9 {
+tc_1ad90acd, TypeJ>, Enc_88d4d9 {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b01010001000;
@@ -4838,7 +4874,7 @@ def J2_callt : HInst<
 (outs),
 (ins PredRegs:$Pu4, a30_2Imm:$Ii),
 "if ($Pu4) call $Ii",
-tc_2f185f5c, TypeJ>, Enc_daea09, PredRel {
+tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b000;
 let Inst{21-21} = 0b0;
@@ -4864,7 +4900,7 @@ def J2_endloop0 : HInst<
 (outs),
 (ins),
 "endloop0",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
 let Uses = [LC0, SA0];
 let Defs = [LC0, P3, PC, USR];
 let isBranch = 1;
@@ -4875,7 +4911,7 @@ def J2_endloop01 : HInst<
 (outs),
 (ins),
 "endloop01",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
 let Uses = [LC0, LC1, SA0, SA1];
 let Defs = [LC0, LC1, P3, PC, USR];
 let isPseudo = 1;
@@ -4884,7 +4920,7 @@ def J2_endloop1 : HInst<
 (outs),
 (ins),
 "endloop1",
-tc_52d7bbea, TypeJ> {
+tc_1b6f7cec, TypeJ> {
 let Uses = [LC1, SA1];
 let Defs = [LC1, PC];
 let isBranch = 1;
@@ -4895,7 +4931,7 @@ def J2_jump : HInst<
 (outs),
 (ins b30_2Imm:$Ii),
 "jump $Ii",
-tc_3669266a, TypeJ>, Enc_81ac1d, PredNewRel {
+tc_ae53734a, TypeJ>, Enc_81ac1d, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{31-25} = 0b0101100;
 let isTerminator = 1;
@@ -4917,7 +4953,7 @@ def J2_jumpf : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if (!$Pu4) jump:nt $Ii",
-tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel {
+tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b000;
 let Inst{21-21} = 0b1;
@@ -4943,7 +4979,7 @@ def J2_jumpf_nopred_map : HInst<
 (outs),
 (ins PredRegs:$Pu4, b15_2Imm:$Ii),
 "if (!$Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
+tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -4951,7 +4987,7 @@ def J2_jumpfnew : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if (!$Pu4.new) jump:nt $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b010;
 let Inst{21-21} = 0b1;
@@ -4978,7 +5014,7 @@ def J2_jumpfnewpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if (!$Pu4.new) jump:t $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b110;
 let Inst{21-21} = 0b1;
@@ -5005,7 +5041,7 @@ def J2_jumpfpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if (!$Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b100;
 let Inst{21-21} = 0b1;
@@ -5031,7 +5067,7 @@ def J2_jumpr : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "jumpr $Rs32",
-tc_9faf76ae, TypeJ>, Enc_ecbcc8, PredNewRel {
+tc_d5b7b0c1, TypeJ>, Enc_ecbcc8, PredNewRel {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b01010010100;
 let isTerminator = 1;
@@ -5048,7 +5084,7 @@ def J2_jumprf : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) jumpr:nt $Rs32",
-tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b01010011011;
@@ -5067,7 +5103,7 @@ def J2_jumprf_nopred_map : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
+tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -5075,7 +5111,7 @@ def J2_jumprfnew : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) jumpr:nt $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0010;
 let Inst{31-21} = 0b01010011011;
@@ -5095,7 +5131,7 @@ def J2_jumprfnewpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4.new) jumpr:t $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0110;
 let Inst{31-21} = 0b01010011011;
@@ -5115,7 +5151,7 @@ def J2_jumprfpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if (!$Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0100;
 let Inst{31-21} = 0b01010011011;
@@ -5134,7 +5170,7 @@ def J2_jumprgtez : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32>=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b0;
 let Inst{31-22} = 0b0110000101;
@@ -5152,7 +5188,7 @@ def J2_jumprgtezpt : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32>=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b1;
 let Inst{31-22} = 0b0110000101;
@@ -5170,7 +5206,7 @@ def J2_jumprltez : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32<=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b0;
 let Inst{31-22} = 0b0110000111;
@@ -5188,7 +5224,7 @@ def J2_jumprltezpt : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32<=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b1;
 let Inst{31-22} = 0b0110000111;
@@ -5206,7 +5242,7 @@ def J2_jumprnz : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32==#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b0;
 let Inst{31-22} = 0b0110000110;
@@ -5224,7 +5260,7 @@ def J2_jumprnzpt : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32==#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b1;
 let Inst{31-22} = 0b0110000110;
@@ -5242,7 +5278,7 @@ def J2_jumprt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) jumpr:nt $Rs32",
-tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b01010011010;
@@ -5260,7 +5296,7 @@ def J2_jumprt_nopred_map : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
+tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -5268,7 +5304,7 @@ def J2_jumprtnew : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) jumpr:nt $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0010;
 let Inst{31-21} = 0b01010011010;
@@ -5287,7 +5323,7 @@ def J2_jumprtnewpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4.new) jumpr:t $Rs32",
-tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0110;
 let Inst{31-21} = 0b01010011010;
@@ -5306,7 +5342,7 @@ def J2_jumprtpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
 "if ($Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
 let Inst{7-0} = 0b00000000;
 let Inst{13-10} = 0b0100;
 let Inst{31-21} = 0b01010011010;
@@ -5324,7 +5360,7 @@ def J2_jumprz : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32!=#0) jump:nt $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b0;
 let Inst{31-22} = 0b0110000100;
@@ -5342,7 +5378,7 @@ def J2_jumprzpt : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
 "if ($Rs32!=#0) jump:t $Ii",
-tc_73043bf4, TypeCR>, Enc_0fa531 {
+tc_d9d43ecb, TypeCR>, Enc_0fa531 {
 let Inst{0-0} = 0b0;
 let Inst{12-12} = 0b1;
 let Inst{31-22} = 0b0110000100;
@@ -5360,7 +5396,7 @@ def J2_jumpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if ($Pu4) jump:nt $Ii",
-tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel {
+tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b000;
 let Inst{21-21} = 0b0;
@@ -5385,7 +5421,7 @@ def J2_jumpt_nopred_map : HInst<
 (outs),
 (ins PredRegs:$Pu4, b15_2Imm:$Ii),
 "if ($Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
+tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -5393,7 +5429,7 @@ def J2_jumptnew : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if ($Pu4.new) jump:nt $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b010;
 let Inst{21-21} = 0b0;
@@ -5419,7 +5455,7 @@ def J2_jumptnewpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if ($Pu4.new) jump:t $Ii",
-tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel {
+tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b110;
 let Inst{21-21} = 0b0;
@@ -5445,7 +5481,7 @@ def J2_jumptpt : HInst<
 (outs),
 (ins PredRegs:$Pu4, b30_2Imm:$Ii),
 "if ($Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
 let Inst{0-0} = 0b0;
 let Inst{12-10} = 0b100;
 let Inst{21-21} = 0b0;
@@ -5470,7 +5506,7 @@ def J2_loop0i : HInst<
 (outs),
 (ins b30_2Imm:$Ii, u10_0Imm:$II),
 "loop0($Ii,#$II)",
-tc_cf59f215, TypeCR>, Enc_4dc228 {
+tc_a9d88b22, TypeCR>, Enc_4dc228 {
 let Inst{2-2} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01101001000;
@@ -5487,7 +5523,7 @@ def J2_loop0r : HInst<
 (outs),
 (ins b30_2Imm:$Ii, IntRegs:$Rs32),
 "loop0($Ii,$Rs32)",
-tc_7934b9df, TypeCR>, Enc_864a5a {
+tc_df3319ed, TypeCR>, Enc_864a5a {
 let Inst{2-0} = 0b000;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5505,7 +5541,7 @@ def J2_loop1i : HInst<
 (outs),
 (ins b30_2Imm:$Ii, u10_0Imm:$II),
 "loop1($Ii,#$II)",
-tc_cf59f215, TypeCR>, Enc_4dc228 {
+tc_a9d88b22, TypeCR>, Enc_4dc228 {
 let Inst{2-2} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01101001001;
@@ -5522,7 +5558,7 @@ def J2_loop1r : HInst<
 (outs),
 (ins b30_2Imm:$Ii, IntRegs:$Rs32),
 "loop1($Ii,$Rs32)",
-tc_7934b9df, TypeCR>, Enc_864a5a {
+tc_df3319ed, TypeCR>, Enc_864a5a {
 let Inst{2-0} = 0b000;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5540,7 +5576,7 @@ def J2_pause : HInst<
 (outs),
 (ins u8_0Imm:$Ii),
 "pause(#$Ii)",
-tc_681a2300, TypeJ>, Enc_a51a9a {
+tc_8d9d0154, TypeJ>, Enc_a51a9a {
 let Inst{1-0} = 0b00;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5551,7 +5587,7 @@ def J2_ploop1si : HInst<
 (outs),
 (ins b30_2Imm:$Ii, u10_0Imm:$II),
 "p3 = sp1loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
 let Inst{2-2} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01101001101;
@@ -5569,7 +5605,7 @@ def J2_ploop1sr : HInst<
 (outs),
 (ins b30_2Imm:$Ii, IntRegs:$Rs32),
 "p3 = sp1loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
 let Inst{2-0} = 0b000;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5588,7 +5624,7 @@ def J2_ploop2si : HInst<
 (outs),
 (ins b30_2Imm:$Ii, u10_0Imm:$II),
 "p3 = sp2loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
 let Inst{2-2} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01101001110;
@@ -5606,7 +5642,7 @@ def J2_ploop2sr : HInst<
 (outs),
 (ins b30_2Imm:$Ii, IntRegs:$Rs32),
 "p3 = sp2loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
 let Inst{2-0} = 0b000;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5625,7 +5661,7 @@ def J2_ploop3si : HInst<
 (outs),
 (ins b30_2Imm:$Ii, u10_0Imm:$II),
 "p3 = sp3loop0($Ii,#$II)",
-tc_c5e2426d, TypeCR>, Enc_4dc228 {
+tc_1c4528a2, TypeCR>, Enc_4dc228 {
 let Inst{2-2} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01101001111;
@@ -5643,7 +5679,7 @@ def J2_ploop3sr : HInst<
 (outs),
 (ins b30_2Imm:$Ii, IntRegs:$Rs32),
 "p3 = sp3loop0($Ii,$Rs32)",
-tc_4f7cd700, TypeCR>, Enc_864a5a {
+tc_32779c6f, TypeCR>, Enc_864a5a {
 let Inst{2-0} = 0b000;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5662,18 +5698,19 @@ def J2_trap0 : HInst<
 (outs),
 (ins u8_0Imm:$Ii),
 "trap0(#$Ii)",
-tc_14cd4cfa, TypeJ>, Enc_a51a9a {
+tc_fc3999b4, TypeJ>, Enc_a51a9a {
 let Inst{1-0} = 0b00;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0101010000000000;
 let isSolo = 1;
+let hasSideEffects = 1;
 }
 def J2_trap1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, u8_0Imm:$Ii),
 "trap1($Rx32,#$Ii)",
-tc_59a01ead, TypeJ>, Enc_33f8ba {
+tc_b9e09e03, TypeJ>, Enc_33f8ba {
 let Inst{1-0} = 0b00;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
@@ -5683,13 +5720,15 @@ let opNewValue = 0;
 let isSolo = 1;
 let Uses = [GOSP];
 let Defs = [GOSP, PC];
+let hasSideEffects = 1;
 let Constraints = "$Rx32 = $Rx32in";
 }
 def J2_trap1_noregmap : HInst<
 (outs),
 (ins u8_0Imm:$Ii),
 "trap1(#$Ii)",
-tc_59a01ead, TypeMAPPING> {
+tc_b9e09e03, TypeMAPPING> {
+let hasSideEffects = 1;
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -5697,7 +5736,7 @@ def J4_cmpeq_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -5723,7 +5762,7 @@ def J4_cmpeq_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -5749,7 +5788,7 @@ def J4_cmpeq_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001010001;
@@ -5775,7 +5814,7 @@ def J4_cmpeq_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b10;
 let Inst{31-22} = 0b0001010001;
@@ -5801,7 +5840,7 @@ def J4_cmpeq_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-22} = 0b0001010001;
@@ -5827,7 +5866,7 @@ def J4_cmpeq_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b11;
 let Inst{31-22} = 0b0001010001;
@@ -5853,7 +5892,7 @@ def J4_cmpeq_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -5878,7 +5917,7 @@ def J4_cmpeq_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -5903,7 +5942,7 @@ def J4_cmpeq_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001010000;
@@ -5928,7 +5967,7 @@ def J4_cmpeq_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b10;
 let Inst{31-22} = 0b0001010000;
@@ -5953,7 +5992,7 @@ def J4_cmpeq_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-22} = 0b0001010000;
@@ -5978,7 +6017,7 @@ def J4_cmpeq_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b11;
 let Inst{31-22} = 0b0001010000;
@@ -6003,7 +6042,7 @@ def J4_cmpeqi_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -6029,7 +6068,7 @@ def J4_cmpeqi_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -6055,7 +6094,7 @@ def J4_cmpeqi_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001000001;
@@ -6081,7 +6120,7 @@ def J4_cmpeqi_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001000001;
@@ -6107,7 +6146,7 @@ def J4_cmpeqi_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001001001;
@@ -6133,7 +6172,7 @@ def J4_cmpeqi_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001001001;
@@ -6159,7 +6198,7 @@ def J4_cmpeqi_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -6184,7 +6223,7 @@ def J4_cmpeqi_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -6209,7 +6248,7 @@ def J4_cmpeqi_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001000000;
@@ -6234,7 +6273,7 @@ def J4_cmpeqi_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001000000;
@@ -6259,7 +6298,7 @@ def J4_cmpeqi_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001001000;
@@ -6284,7 +6323,7 @@ def J4_cmpeqi_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001001000;
@@ -6309,7 +6348,7 @@ def J4_cmpeqn1_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_e90a15, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_e90a15, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{19-19} = 0b0;
@@ -6335,7 +6374,7 @@ def J4_cmpeqn1_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_5a18b3, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_5a18b3, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{19-19} = 0b0;
@@ -6361,7 +6400,7 @@ def J4_cmpeqn1_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_1de724, PredRel {
+tc_3d495a39, TypeCJ>, Enc_1de724, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{31-22} = 0b0001000111;
@@ -6387,7 +6426,7 @@ def J4_cmpeqn1_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14640c, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14640c, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{31-22} = 0b0001000111;
@@ -6413,7 +6452,7 @@ def J4_cmpeqn1_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_668704, PredRel {
+tc_3d495a39, TypeCJ>, Enc_668704, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{31-22} = 0b0001001111;
@@ -6439,7 +6478,7 @@ def J4_cmpeqn1_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_800e04, PredRel {
+tc_3d495a39, TypeCJ>, Enc_800e04, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{31-22} = 0b0001001111;
@@ -6465,7 +6504,7 @@ def J4_cmpeqn1_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_4aca3a, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_4aca3a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{19-19} = 0b0;
@@ -6490,7 +6529,7 @@ def J4_cmpeqn1_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_f7ea77, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_f7ea77, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{19-19} = 0b0;
@@ -6515,7 +6554,7 @@ def J4_cmpeqn1_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_405228, PredRel {
+tc_3d495a39, TypeCJ>, Enc_405228, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{31-22} = 0b0001000110;
@@ -6540,7 +6579,7 @@ def J4_cmpeqn1_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_3a2484, PredRel {
+tc_3d495a39, TypeCJ>, Enc_3a2484, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{31-22} = 0b0001000110;
@@ -6565,7 +6604,7 @@ def J4_cmpeqn1_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_736575, PredRel {
+tc_3d495a39, TypeCJ>, Enc_736575, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{31-22} = 0b0001001110;
@@ -6590,7 +6629,7 @@ def J4_cmpeqn1_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_8e583a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_8e583a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{31-22} = 0b0001001110;
@@ -6615,7 +6654,7 @@ def J4_cmpgt_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -6641,7 +6680,7 @@ def J4_cmpgt_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -6667,7 +6706,7 @@ def J4_cmpgt_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001010011;
@@ -6693,7 +6732,7 @@ def J4_cmpgt_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b10;
 let Inst{31-22} = 0b0001010011;
@@ -6719,7 +6758,7 @@ def J4_cmpgt_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-22} = 0b0001010011;
@@ -6745,7 +6784,7 @@ def J4_cmpgt_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b11;
 let Inst{31-22} = 0b0001010011;
@@ -6771,7 +6810,7 @@ def J4_cmpgt_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -6796,7 +6835,7 @@ def J4_cmpgt_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -6821,7 +6860,7 @@ def J4_cmpgt_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001010010;
@@ -6846,7 +6885,7 @@ def J4_cmpgt_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b10;
 let Inst{31-22} = 0b0001010010;
@@ -6871,7 +6910,7 @@ def J4_cmpgt_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-22} = 0b0001010010;
@@ -6896,7 +6935,7 @@ def J4_cmpgt_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b11;
 let Inst{31-22} = 0b0001010010;
@@ -6921,7 +6960,7 @@ def J4_cmpgti_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -6947,7 +6986,7 @@ def J4_cmpgti_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -6973,7 +7012,7 @@ def J4_cmpgti_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001000011;
@@ -6999,7 +7038,7 @@ def J4_cmpgti_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001000011;
@@ -7025,7 +7064,7 @@ def J4_cmpgti_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001001011;
@@ -7051,7 +7090,7 @@ def J4_cmpgti_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001001011;
@@ -7077,7 +7116,7 @@ def J4_cmpgti_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -7102,7 +7141,7 @@ def J4_cmpgti_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -7127,7 +7166,7 @@ def J4_cmpgti_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001000010;
@@ -7152,7 +7191,7 @@ def J4_cmpgti_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001000010;
@@ -7177,7 +7216,7 @@ def J4_cmpgti_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001001010;
@@ -7202,7 +7241,7 @@ def J4_cmpgti_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001001010;
@@ -7227,7 +7266,7 @@ def J4_cmpgtn1_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_3694bd, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_3694bd, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{19-19} = 0b0;
@@ -7253,7 +7292,7 @@ def J4_cmpgtn1_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_a6853f, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_a6853f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{19-19} = 0b0;
@@ -7279,7 +7318,7 @@ def J4_cmpgtn1_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_a42857, PredRel {
+tc_3d495a39, TypeCJ>, Enc_a42857, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000001;
 let Inst{31-22} = 0b0001000111;
@@ -7305,7 +7344,7 @@ def J4_cmpgtn1_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_f6fe0b, PredRel {
+tc_3d495a39, TypeCJ>, Enc_f6fe0b, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100001;
 let Inst{31-22} = 0b0001000111;
@@ -7331,7 +7370,7 @@ def J4_cmpgtn1_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_3e3989, PredRel {
+tc_3d495a39, TypeCJ>, Enc_3e3989, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000001;
 let Inst{31-22} = 0b0001001111;
@@ -7357,7 +7396,7 @@ def J4_cmpgtn1_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_b909d2, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b909d2, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100001;
 let Inst{31-22} = 0b0001001111;
@@ -7383,7 +7422,7 @@ def J4_cmpgtn1_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_f82302, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_f82302, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{19-19} = 0b0;
@@ -7408,7 +7447,7 @@ def J4_cmpgtn1_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
 "if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_6413b6, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_6413b6, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{19-19} = 0b0;
@@ -7433,7 +7472,7 @@ def J4_cmpgtn1_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_b78edd, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b78edd, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000001;
 let Inst{31-22} = 0b0001000110;
@@ -7458,7 +7497,7 @@ def J4_cmpgtn1_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_041d7b, PredRel {
+tc_3d495a39, TypeCJ>, Enc_041d7b, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100001;
 let Inst{31-22} = 0b0001000110;
@@ -7483,7 +7522,7 @@ def J4_cmpgtn1_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_b1e1fb, PredRel {
+tc_3d495a39, TypeCJ>, Enc_b1e1fb, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000001;
 let Inst{31-22} = 0b0001001110;
@@ -7508,7 +7547,7 @@ def J4_cmpgtn1_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
 "p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_178717, PredRel {
+tc_3d495a39, TypeCJ>, Enc_178717, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100001;
 let Inst{31-22} = 0b0001001110;
@@ -7533,7 +7572,7 @@ def J4_cmpgtu_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -7559,7 +7598,7 @@ def J4_cmpgtu_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -7585,7 +7624,7 @@ def J4_cmpgtu_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001010101;
@@ -7611,7 +7650,7 @@ def J4_cmpgtu_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b10;
 let Inst{31-22} = 0b0001010101;
@@ -7637,7 +7676,7 @@ def J4_cmpgtu_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-22} = 0b0001010101;
@@ -7663,7 +7702,7 @@ def J4_cmpgtu_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b11;
 let Inst{31-22} = 0b0001010101;
@@ -7689,7 +7728,7 @@ def J4_cmpgtu_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -7714,7 +7753,7 @@ def J4_cmpgtu_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
 "if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -7739,7 +7778,7 @@ def J4_cmpgtu_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001010100;
@@ -7764,7 +7803,7 @@ def J4_cmpgtu_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b10;
 let Inst{31-22} = 0b0001010100;
@@ -7789,7 +7828,7 @@ def J4_cmpgtu_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-22} = 0b0001010100;
@@ -7814,7 +7853,7 @@ def J4_cmpgtu_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel {
+tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b11;
 let Inst{31-22} = 0b0001010100;
@@ -7839,7 +7878,7 @@ def J4_cmpgtui_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -7865,7 +7904,7 @@ def J4_cmpgtui_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -7891,7 +7930,7 @@ def J4_cmpgtui_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001000101;
@@ -7917,7 +7956,7 @@ def J4_cmpgtui_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001000101;
@@ -7943,7 +7982,7 @@ def J4_cmpgtui_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001001101;
@@ -7969,7 +8008,7 @@ def J4_cmpgtui_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001001101;
@@ -7995,7 +8034,7 @@ def J4_cmpgtui_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -8020,7 +8059,7 @@ def J4_cmpgtui_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
 "if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel {
+tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -8045,7 +8084,7 @@ def J4_cmpgtui_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001000100;
@@ -8070,7 +8109,7 @@ def J4_cmpgtui_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001000100;
@@ -8095,7 +8134,7 @@ def J4_cmpgtui_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-22} = 0b0001001100;
@@ -8120,7 +8159,7 @@ def J4_cmpgtui_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
 "p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel {
+tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-22} = 0b0001001100;
@@ -8145,7 +8184,7 @@ def J4_cmplt_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -8171,7 +8210,7 @@ def J4_cmplt_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -8197,7 +8236,7 @@ def J4_cmplt_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -8222,7 +8261,7 @@ def J4_cmplt_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -8247,7 +8286,7 @@ def J4_cmpltu_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -8273,7 +8312,7 @@ def J4_cmpltu_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -8299,7 +8338,7 @@ def J4_cmpltu_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{19-19} = 0b0;
@@ -8324,7 +8363,7 @@ def J4_cmpltu_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel {
+tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
 let Inst{0-0} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{19-19} = 0b0;
@@ -8349,7 +8388,7 @@ def J4_hintjumpr : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "hintjr($Rs32)",
-tc_9faf76ae, TypeJ>, Enc_ecbcc8 {
+tc_d5b7b0c1, TypeJ>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b01010010101;
 let isTerminator = 1;
@@ -8361,7 +8400,7 @@ def J4_jumpseti : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins u6_0Imm:$II, b30_2Imm:$Ii),
 "$Rd16 = #$II ; jump $Ii",
-tc_49eb22c8, TypeCJ>, Enc_9e4c3f {
+tc_0663f615, TypeCJ>, Enc_9e4c3f {
 let Inst{0-0} = 0b0;
 let Inst{31-22} = 0b0001011000;
 let hasNewValue = 1;
@@ -8381,7 +8420,7 @@ def J4_jumpsetr : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "$Rd16 = $Rs16 ; jump $Ii",
-tc_49eb22c8, TypeCJ>, Enc_66bce1 {
+tc_0663f615, TypeCJ>, Enc_66bce1 {
 let Inst{0-0} = 0b0;
 let Inst{13-12} = 0b00;
 let Inst{31-22} = 0b0001011100;
@@ -8402,7 +8441,7 @@ def J4_tstbit0_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (!tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{19-19} = 0b0;
@@ -8427,7 +8466,7 @@ def J4_tstbit0_f_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (!tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{19-19} = 0b0;
@@ -8452,7 +8491,7 @@ def J4_tstbit0_fp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000011;
 let Inst{31-22} = 0b0001000111;
@@ -8477,7 +8516,7 @@ def J4_tstbit0_fp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100011;
 let Inst{31-22} = 0b0001000111;
@@ -8502,7 +8541,7 @@ def J4_tstbit0_fp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000011;
 let Inst{31-22} = 0b0001001111;
@@ -8527,7 +8566,7 @@ def J4_tstbit0_fp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100011;
 let Inst{31-22} = 0b0001001111;
@@ -8552,7 +8591,7 @@ def J4_tstbit0_t_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000000;
 let Inst{19-19} = 0b0;
@@ -8576,7 +8615,7 @@ def J4_tstbit0_t_jumpnv_t : HInst<
 (outs),
 (ins IntRegs:$Ns8, b30_2Imm:$Ii),
 "if (tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_746baa8e, TypeNCJ>, Enc_69d63b {
+tc_8c945be0, TypeNCJ>, Enc_69d63b {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100000;
 let Inst{19-19} = 0b0;
@@ -8600,7 +8639,7 @@ def J4_tstbit0_tp0_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000011;
 let Inst{31-22} = 0b0001000110;
@@ -8624,7 +8663,7 @@ def J4_tstbit0_tp0_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100011;
 let Inst{31-22} = 0b0001000110;
@@ -8648,7 +8687,7 @@ def J4_tstbit0_tp1_jump_nt : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b000011;
 let Inst{31-22} = 0b0001001110;
@@ -8672,7 +8711,7 @@ def J4_tstbit0_tp1_jump_t : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
 "p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii",
-tc_3cb8ea06, TypeCJ>, Enc_ad1c74 {
+tc_2332b92e, TypeCJ>, Enc_ad1c74 {
 let Inst{0-0} = 0b0;
 let Inst{13-8} = 0b100011;
 let Inst{31-22} = 0b0001001110;
@@ -8696,7 +8735,7 @@ def L2_deallocframe : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = deallocframe($Rs32):raw",
-tc_d1090e34, TypeLD>, Enc_3a3d62 {
+tc_15aa71c5, TypeLD>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10010000000;
 let accessSize = DoubleWordAccess;
@@ -8708,7 +8747,7 @@ def L2_loadalignb_io : HInst<
 (outs DoubleRegs:$Ryy32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Ryy32 = memb_fifo($Rs32+#$Ii)",
-tc_ef52ed71, TypeLD>, Enc_a27588 {
+tc_5ef37dc4, TypeLD>, Enc_a27588 {
 let Inst{24-21} = 0b0100;
 let Inst{31-27} = 0b10010;
 let addrMode = BaseImmOffset;
@@ -8725,9 +8764,10 @@ def L2_loadalignb_pbr : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Ryy32 = memb_fifo($Rx32++$Mu2:brev)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011110100;
+let addrMode = PostInc;
 let accessSize = ByteAccess;
 let mayLoad = 1;
 let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
@@ -8736,7 +8776,7 @@ def L2_loadalignb_pci : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
 "$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_03220ffa, TypeLD>, Enc_74aef2 {
+tc_785f65a7, TypeLD>, Enc_74aef2 {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011000100;
 let addrMode = PostInc;
@@ -8749,7 +8789,7 @@ def L2_loadalignb_pcr : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011000100;
 let addrMode = PostInc;
@@ -8762,7 +8802,7 @@ def L2_loadalignb_pi : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "$Ryy32 = memb_fifo($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_6b197f {
+tc_3c76b0ff, TypeLD>, Enc_6b197f {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011010100;
 let addrMode = PostInc;
@@ -8774,7 +8814,7 @@ def L2_loadalignb_pr : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Ryy32 = memb_fifo($Rx32++$Mu2)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011100100;
 let addrMode = PostInc;
@@ -8786,7 +8826,7 @@ def L2_loadalignb_zomap : HInst<
 (outs DoubleRegs:$Ryy32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
 "$Ryy32 = memb_fifo($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let Constraints = "$Ryy32 = $Ryy32in";
@@ -8795,7 +8835,7 @@ def L2_loadalignh_io : HInst<
 (outs DoubleRegs:$Ryy32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Ryy32 = memh_fifo($Rs32+#$Ii)",
-tc_ef52ed71, TypeLD>, Enc_5cd7e9 {
+tc_5ef37dc4, TypeLD>, Enc_5cd7e9 {
 let Inst{24-21} = 0b0010;
 let Inst{31-27} = 0b10010;
 let addrMode = BaseImmOffset;
@@ -8812,9 +8852,10 @@ def L2_loadalignh_pbr : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Ryy32 = memh_fifo($Rx32++$Mu2:brev)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011110010;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayLoad = 1;
 let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
@@ -8823,7 +8864,7 @@ def L2_loadalignh_pci : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
 "$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_03220ffa, TypeLD>, Enc_9e2e1c {
+tc_785f65a7, TypeLD>, Enc_9e2e1c {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011000010;
 let addrMode = PostInc;
@@ -8836,7 +8877,7 @@ def L2_loadalignh_pcr : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011000010;
 let addrMode = PostInc;
@@ -8849,7 +8890,7 @@ def L2_loadalignh_pi : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "$Ryy32 = memh_fifo($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_bd1cbc {
+tc_3c76b0ff, TypeLD>, Enc_bd1cbc {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011010010;
 let addrMode = PostInc;
@@ -8861,7 +8902,7 @@ def L2_loadalignh_pr : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Ryy32 = memh_fifo($Rx32++$Mu2)",
-tc_bad2bcaf, TypeLD>, Enc_1f5d8f {
+tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011100010;
 let addrMode = PostInc;
@@ -8873,7 +8914,7 @@ def L2_loadalignh_zomap : HInst<
 (outs DoubleRegs:$Ryy32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
 "$Ryy32 = memh_fifo($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let Constraints = "$Ryy32 = $Ryy32in";
@@ -8882,7 +8923,7 @@ def L2_loadbsw2_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Rd32 = membh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214 {
+tc_17e0d2cd, TypeLD>, Enc_de0214 {
 let Inst{24-21} = 0b0001;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -8900,11 +8941,12 @@ def L2_loadbsw2_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = membh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011110001;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -8913,7 +8955,7 @@ def L2_loadbsw2_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011000001;
 let hasNewValue = 1;
@@ -8928,7 +8970,7 @@ def L2_loadbsw2_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = membh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011000001;
 let hasNewValue = 1;
@@ -8943,7 +8985,7 @@ def L2_loadbsw2_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii),
 "$Rd32 = membh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467 {
+tc_44d3da28, TypeLD>, Enc_152467 {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011010001;
 let hasNewValue = 1;
@@ -8957,7 +8999,7 @@ def L2_loadbsw2_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = membh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011100001;
 let hasNewValue = 1;
@@ -8971,7 +9013,7 @@ def L2_loadbsw2_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = membh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -8981,7 +9023,7 @@ def L2_loadbsw4_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii),
 "$Rdd32 = membh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2d7491 {
+tc_17e0d2cd, TypeLD>, Enc_2d7491 {
 let Inst{24-21} = 0b0111;
 let Inst{31-27} = 0b10010;
 let addrMode = BaseImmOffset;
@@ -8997,9 +9039,10 @@ def L2_loadbsw4_pbr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = membh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011110111;
+let addrMode = PostInc;
 let accessSize = WordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9008,7 +9051,7 @@ def L2_loadbsw4_pci : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
 "$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_70b24b {
+tc_e93a3d71, TypeLD>, Enc_70b24b {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011000111;
 let addrMode = PostInc;
@@ -9021,7 +9064,7 @@ def L2_loadbsw4_pcr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = membh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011000111;
 let addrMode = PostInc;
@@ -9034,7 +9077,7 @@ def L2_loadbsw4_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii),
 "$Rdd32 = membh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_71f1b4 {
+tc_44d3da28, TypeLD>, Enc_71f1b4 {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011010111;
 let addrMode = PostInc;
@@ -9046,7 +9089,7 @@ def L2_loadbsw4_pr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = membh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011100111;
 let addrMode = PostInc;
@@ -9058,7 +9101,7 @@ def L2_loadbsw4_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = membh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -9066,7 +9109,7 @@ def L2_loadbzw2_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Rd32 = memubh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214 {
+tc_17e0d2cd, TypeLD>, Enc_de0214 {
 let Inst{24-21} = 0b0011;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -9084,11 +9127,12 @@ def L2_loadbzw2_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memubh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011110011;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9097,7 +9141,7 @@ def L2_loadbzw2_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011000011;
 let hasNewValue = 1;
@@ -9112,7 +9156,7 @@ def L2_loadbzw2_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memubh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011000011;
 let hasNewValue = 1;
@@ -9127,7 +9171,7 @@ def L2_loadbzw2_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii),
 "$Rd32 = memubh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467 {
+tc_44d3da28, TypeLD>, Enc_152467 {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011010011;
 let hasNewValue = 1;
@@ -9141,7 +9185,7 @@ def L2_loadbzw2_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memubh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011100011;
 let hasNewValue = 1;
@@ -9155,7 +9199,7 @@ def L2_loadbzw2_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memubh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -9165,7 +9209,7 @@ def L2_loadbzw4_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii),
 "$Rdd32 = memubh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2d7491 {
+tc_17e0d2cd, TypeLD>, Enc_2d7491 {
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b10010;
 let addrMode = BaseImmOffset;
@@ -9181,9 +9225,10 @@ def L2_loadbzw4_pbr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = memubh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011110101;
+let addrMode = PostInc;
 let accessSize = WordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9192,7 +9237,7 @@ def L2_loadbzw4_pci : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
 "$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_70b24b {
+tc_e93a3d71, TypeLD>, Enc_70b24b {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011000101;
 let addrMode = PostInc;
@@ -9205,7 +9250,7 @@ def L2_loadbzw4_pcr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = memubh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011000101;
 let addrMode = PostInc;
@@ -9218,7 +9263,7 @@ def L2_loadbzw4_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii),
 "$Rdd32 = memubh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_71f1b4 {
+tc_44d3da28, TypeLD>, Enc_71f1b4 {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011010101;
 let addrMode = PostInc;
@@ -9230,7 +9275,7 @@ def L2_loadbzw4_pr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = memubh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011100101;
 let addrMode = PostInc;
@@ -9242,7 +9287,7 @@ def L2_loadbzw4_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = memubh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -9250,7 +9295,7 @@ def L2_loadrb_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = memb($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1000;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -9271,11 +9316,12 @@ def L2_loadrb_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memb($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011111000;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = ByteAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9284,7 +9330,7 @@ def L2_loadrb_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = memb($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e0a47a {
+tc_e93a3d71, TypeLD>, Enc_e0a47a {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011001000;
 let hasNewValue = 1;
@@ -9299,7 +9345,7 @@ def L2_loadrb_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memb($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011001000;
 let hasNewValue = 1;
@@ -9314,7 +9360,7 @@ def L2_loadrb_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii),
 "$Rd32 = memb($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011011000;
 let hasNewValue = 1;
@@ -9331,7 +9377,7 @@ def L2_loadrb_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memb($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011101000;
 let hasNewValue = 1;
@@ -9345,7 +9391,7 @@ def L2_loadrb_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memb($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -9355,7 +9401,7 @@ def L2_loadrbgp : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii),
 "$Rd32 = memb(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
 let Inst{24-21} = 0b1000;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -9374,7 +9420,7 @@ def L2_loadrd_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, s29_3Imm:$Ii),
 "$Rdd32 = memd($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1110;
 let Inst{31-27} = 0b10010;
 let addrMode = BaseImmOffset;
@@ -9393,9 +9439,10 @@ def L2_loadrd_pbr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = memd($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011111110;
+let addrMode = PostInc;
 let accessSize = DoubleWordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9404,7 +9451,7 @@ def L2_loadrd_pci : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2),
 "$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_b05839 {
+tc_e93a3d71, TypeLD>, Enc_b05839 {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011001110;
 let addrMode = PostInc;
@@ -9417,7 +9464,7 @@ def L2_loadrd_pcr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = memd($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011001110;
 let addrMode = PostInc;
@@ -9430,7 +9477,7 @@ def L2_loadrd_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_3Imm:$Ii),
 "$Rdd32 = memd($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011011110;
 let addrMode = PostInc;
@@ -9445,7 +9492,7 @@ def L2_loadrd_pr : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rdd32 = memd($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_7eee72 {
+tc_44d3da28, TypeLD>, Enc_7eee72 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011101110;
 let addrMode = PostInc;
@@ -9457,7 +9504,7 @@ def L2_loadrd_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = memd($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -9465,7 +9512,7 @@ def L2_loadrdgp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins u29_3Imm:$Ii),
 "$Rdd32 = memd(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
 let Inst{24-21} = 0b1110;
 let Inst{31-27} = 0b01001;
 let accessSize = DoubleWordAccess;
@@ -9482,7 +9529,7 @@ def L2_loadrh_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Rd32 = memh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1010;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -9503,11 +9550,12 @@ def L2_loadrh_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011111010;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9516,7 +9564,7 @@ def L2_loadrh_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = memh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011001010;
 let hasNewValue = 1;
@@ -9531,7 +9579,7 @@ def L2_loadrh_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011001010;
 let hasNewValue = 1;
@@ -9546,7 +9594,7 @@ def L2_loadrh_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii),
 "$Rd32 = memh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011011010;
 let hasNewValue = 1;
@@ -9563,7 +9611,7 @@ def L2_loadrh_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011101010;
 let hasNewValue = 1;
@@ -9577,7 +9625,7 @@ def L2_loadrh_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -9587,7 +9635,7 @@ def L2_loadrhgp : HInst<
 (outs IntRegs:$Rd32),
 (ins u31_1Imm:$Ii),
 "$Rd32 = memh(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
 let Inst{24-21} = 0b1010;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -9606,7 +9654,7 @@ def L2_loadri_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii),
 "$Rd32 = memw($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1100;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -9627,11 +9675,12 @@ def L2_loadri_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memw($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011111100;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = WordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9640,7 +9689,7 @@ def L2_loadri_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = memw($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_27fd0e {
+tc_e93a3d71, TypeLD>, Enc_27fd0e {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011001100;
 let hasNewValue = 1;
@@ -9655,7 +9704,7 @@ def L2_loadri_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memw($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011001100;
 let hasNewValue = 1;
@@ -9670,7 +9719,7 @@ def L2_loadri_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii),
 "$Rd32 = memw($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011011100;
 let hasNewValue = 1;
@@ -9687,7 +9736,7 @@ def L2_loadri_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memw($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011101100;
 let hasNewValue = 1;
@@ -9701,7 +9750,7 @@ def L2_loadri_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memw($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -9711,7 +9760,7 @@ def L2_loadrigp : HInst<
 (outs IntRegs:$Rd32),
 (ins u30_2Imm:$Ii),
 "$Rd32 = memw(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
 let Inst{24-21} = 0b1100;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -9730,7 +9779,7 @@ def L2_loadrub_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = memub($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1001;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -9751,11 +9800,12 @@ def L2_loadrub_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memub($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011111001;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = ByteAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9764,7 +9814,7 @@ def L2_loadrub_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = memub($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e0a47a {
+tc_e93a3d71, TypeLD>, Enc_e0a47a {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011001001;
 let hasNewValue = 1;
@@ -9779,7 +9829,7 @@ def L2_loadrub_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memub($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011001001;
 let hasNewValue = 1;
@@ -9794,7 +9844,7 @@ def L2_loadrub_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii),
 "$Rd32 = memub($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011011001;
 let hasNewValue = 1;
@@ -9811,7 +9861,7 @@ def L2_loadrub_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memub($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011101001;
 let hasNewValue = 1;
@@ -9825,7 +9875,7 @@ def L2_loadrub_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memub($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -9835,7 +9885,7 @@ def L2_loadrubgp : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii),
 "$Rd32 = memub(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
 let Inst{24-21} = 0b1001;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -9854,7 +9904,7 @@ def L2_loadruh_io : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Rd32 = memuh($Rs32+#$Ii)",
-tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1011;
 let Inst{31-27} = 0b10010;
 let hasNewValue = 1;
@@ -9875,11 +9925,12 @@ def L2_loadruh_pbr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memuh($Rx32++$Mu2:brev)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011111011;
 let hasNewValue = 1;
 let opNewValue = 0;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayLoad = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -9888,7 +9939,7 @@ def L2_loadruh_pci : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
 "$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))",
-tc_4403ca65, TypeLD>, Enc_e83554 {
+tc_e93a3d71, TypeLD>, Enc_e83554 {
 let Inst{12-9} = 0b0000;
 let Inst{31-21} = 0b10011001011;
 let hasNewValue = 1;
@@ -9903,7 +9954,7 @@ def L2_loadruh_pcr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memuh($Rx32++I:circ($Mu2))",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b10011001011;
 let hasNewValue = 1;
@@ -9918,7 +9969,7 @@ def L2_loadruh_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii),
 "$Rd32 = memuh($Rx32++#$Ii)",
-tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
 let Inst{13-9} = 0b00000;
 let Inst{31-21} = 0b10011011011;
 let hasNewValue = 1;
@@ -9935,7 +9986,7 @@ def L2_loadruh_pr : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Rd32 = memuh($Rx32++$Mu2)",
-tc_2fc0c436, TypeLD>, Enc_74d4e5 {
+tc_44d3da28, TypeLD>, Enc_74d4e5 {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b10011101011;
 let hasNewValue = 1;
@@ -9949,7 +10000,7 @@ def L2_loadruh_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memuh($Rs32)",
-tc_7f881c76, TypeMAPPING> {
+tc_17e0d2cd, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -9959,7 +10010,7 @@ def L2_loadruhgp : HInst<
 (outs IntRegs:$Rd32),
 (ins u31_1Imm:$Ii),
 "$Rd32 = memuh(gp+#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
 let Inst{24-21} = 0b1011;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -9978,7 +10029,7 @@ def L2_loadw_locked : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = memw_locked($Rs32)",
-tc_6aa5711a, TypeLD>, Enc_5e2823 {
+tc_b43e7930, TypeLD>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10010010000;
 let hasNewValue = 1;
@@ -9991,7 +10042,7 @@ def L2_ploadrbf_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000101000;
 let isPredicated = 1;
@@ -10013,7 +10064,7 @@ def L2_ploadrbf_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011011000;
 let isPredicated = 1;
@@ -10030,7 +10081,7 @@ def L2_ploadrbf_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4) $Rd32 = memb($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10040,7 +10091,7 @@ def L2_ploadrbfnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000111000;
 let isPredicated = 1;
@@ -10063,7 +10114,7 @@ def L2_ploadrbfnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011011000;
 let isPredicated = 1;
@@ -10081,7 +10132,7 @@ def L2_ploadrbfnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4.new) $Rd32 = memb($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10091,7 +10142,7 @@ def L2_ploadrbt_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000001000;
 let isPredicated = 1;
@@ -10112,7 +10163,7 @@ def L2_ploadrbt_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011011000;
 let isPredicated = 1;
@@ -10128,7 +10179,7 @@ def L2_ploadrbt_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4) $Rd32 = memb($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10138,7 +10189,7 @@ def L2_ploadrbtnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000011000;
 let isPredicated = 1;
@@ -10160,7 +10211,7 @@ def L2_ploadrbtnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011011000;
 let isPredicated = 1;
@@ -10177,7 +10228,7 @@ def L2_ploadrbtnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4.new) $Rd32 = memb($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10187,7 +10238,7 @@ def L2_ploadrdf_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
 "if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000101110;
 let isPredicated = 1;
@@ -10207,7 +10258,7 @@ def L2_ploadrdf_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
 "if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011011110;
 let isPredicated = 1;
@@ -10222,7 +10273,7 @@ def L2_ploadrdf_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4) $Rdd32 = memd($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -10230,7 +10281,7 @@ def L2_ploadrdfnew_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
 "if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000111110;
 let isPredicated = 1;
@@ -10251,7 +10302,7 @@ def L2_ploadrdfnew_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
 "if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011011110;
 let isPredicated = 1;
@@ -10267,7 +10318,7 @@ def L2_ploadrdfnew_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4.new) $Rdd32 = memd($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -10275,7 +10326,7 @@ def L2_ploadrdt_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
 "if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000001110;
 let isPredicated = 1;
@@ -10294,7 +10345,7 @@ def L2_ploadrdt_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
 "if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011011110;
 let isPredicated = 1;
@@ -10308,7 +10359,7 @@ def L2_ploadrdt_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4) $Rdd32 = memd($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -10316,7 +10367,7 @@ def L2_ploadrdtnew_io : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
 "if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000011110;
 let isPredicated = 1;
@@ -10336,7 +10387,7 @@ def L2_ploadrdtnew_pi : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
 "if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011011110;
 let isPredicated = 1;
@@ -10351,7 +10402,7 @@ def L2_ploadrdtnew_zomap : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4.new) $Rdd32 = memd($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -10359,7 +10410,7 @@ def L2_ploadrhf_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000101010;
 let isPredicated = 1;
@@ -10381,7 +10432,7 @@ def L2_ploadrhf_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011011010;
 let isPredicated = 1;
@@ -10398,7 +10449,7 @@ def L2_ploadrhf_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4) $Rd32 = memh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10408,7 +10459,7 @@ def L2_ploadrhfnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000111010;
 let isPredicated = 1;
@@ -10431,7 +10482,7 @@ def L2_ploadrhfnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011011010;
 let isPredicated = 1;
@@ -10449,7 +10500,7 @@ def L2_ploadrhfnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4.new) $Rd32 = memh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10459,7 +10510,7 @@ def L2_ploadrht_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if ($Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000001010;
 let isPredicated = 1;
@@ -10480,7 +10531,7 @@ def L2_ploadrht_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if ($Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011011010;
 let isPredicated = 1;
@@ -10496,7 +10547,7 @@ def L2_ploadrht_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4) $Rd32 = memh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10506,7 +10557,7 @@ def L2_ploadrhtnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000011010;
 let isPredicated = 1;
@@ -10528,7 +10579,7 @@ def L2_ploadrhtnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011011010;
 let isPredicated = 1;
@@ -10545,7 +10596,7 @@ def L2_ploadrhtnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4.new) $Rd32 = memh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10555,7 +10606,7 @@ def L2_ploadrif_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
 "if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000101100;
 let isPredicated = 1;
@@ -10577,7 +10628,7 @@ def L2_ploadrif_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
 "if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011011100;
 let isPredicated = 1;
@@ -10594,7 +10645,7 @@ def L2_ploadrif_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4) $Rd32 = memw($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10604,7 +10655,7 @@ def L2_ploadrifnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000111100;
 let isPredicated = 1;
@@ -10627,7 +10678,7 @@ def L2_ploadrifnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011011100;
 let isPredicated = 1;
@@ -10645,7 +10696,7 @@ def L2_ploadrifnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4.new) $Rd32 = memw($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10655,7 +10706,7 @@ def L2_ploadrit_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
 "if ($Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000001100;
 let isPredicated = 1;
@@ -10676,7 +10727,7 @@ def L2_ploadrit_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
 "if ($Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011011100;
 let isPredicated = 1;
@@ -10692,7 +10743,7 @@ def L2_ploadrit_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4) $Rd32 = memw($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10702,7 +10753,7 @@ def L2_ploadritnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000011100;
 let isPredicated = 1;
@@ -10724,7 +10775,7 @@ def L2_ploadritnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011011100;
 let isPredicated = 1;
@@ -10741,7 +10792,7 @@ def L2_ploadritnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4.new) $Rd32 = memw($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10751,7 +10802,7 @@ def L2_ploadrubf_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000101001;
 let isPredicated = 1;
@@ -10773,7 +10824,7 @@ def L2_ploadrubf_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011011001;
 let isPredicated = 1;
@@ -10790,7 +10841,7 @@ def L2_ploadrubf_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4) $Rd32 = memub($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10800,7 +10851,7 @@ def L2_ploadrubfnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000111001;
 let isPredicated = 1;
@@ -10823,7 +10874,7 @@ def L2_ploadrubfnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011011001;
 let isPredicated = 1;
@@ -10841,7 +10892,7 @@ def L2_ploadrubfnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4.new) $Rd32 = memub($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10851,7 +10902,7 @@ def L2_ploadrubt_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000001001;
 let isPredicated = 1;
@@ -10872,7 +10923,7 @@ def L2_ploadrubt_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011011001;
 let isPredicated = 1;
@@ -10888,7 +10939,7 @@ def L2_ploadrubt_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4) $Rd32 = memub($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10898,7 +10949,7 @@ def L2_ploadrubtnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000011001;
 let isPredicated = 1;
@@ -10920,7 +10971,7 @@ def L2_ploadrubtnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011011001;
 let isPredicated = 1;
@@ -10937,7 +10988,7 @@ def L2_ploadrubtnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4.new) $Rd32 = memub($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10947,7 +10998,7 @@ def L2_ploadruhf_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000101011;
 let isPredicated = 1;
@@ -10969,7 +11020,7 @@ def L2_ploadruhf_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011011011;
 let isPredicated = 1;
@@ -10986,7 +11037,7 @@ def L2_ploadruhf_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4) $Rd32 = memuh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -10996,7 +11047,7 @@ def L2_ploadruhfnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000111011;
 let isPredicated = 1;
@@ -11019,7 +11070,7 @@ def L2_ploadruhfnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011011011;
 let isPredicated = 1;
@@ -11037,7 +11088,7 @@ def L2_ploadruhfnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if (!$Pt4.new) $Rd32 = memuh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -11047,7 +11098,7 @@ def L2_ploadruht_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000001011;
 let isPredicated = 1;
@@ -11068,7 +11119,7 @@ def L2_ploadruht_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel {
+tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011011011;
 let isPredicated = 1;
@@ -11084,7 +11135,7 @@ def L2_ploadruht_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4) $Rd32 = memuh($Rs32)",
-tc_ef52ed71, TypeMAPPING> {
+tc_5ef37dc4, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -11094,7 +11145,7 @@ def L2_ploadruhtnew_io : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b01000011011;
 let isPredicated = 1;
@@ -11116,7 +11167,7 @@ def L2_ploadruhtnew_pi : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Rx32),
 (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel {
+tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011011011;
 let isPredicated = 1;
@@ -11133,7 +11184,7 @@ def L2_ploadruhtnew_zomap : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, IntRegs:$Rs32),
 "if ($Pt4.new) $Rd32 = memuh($Rs32)",
-tc_2fc0c436, TypeMAPPING> {
+tc_44d3da28, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -11143,7 +11194,7 @@ def L4_add_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110000;
@@ -11162,7 +11213,7 @@ def L4_add_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memb($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11170,7 +11221,7 @@ def L4_add_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110001;
@@ -11189,7 +11240,7 @@ def L4_add_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memh($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11197,7 +11248,7 @@ def L4_add_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+#$Ii) += $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110010;
@@ -11216,7 +11267,7 @@ def L4_add_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memw($Rs32) += $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11224,7 +11275,7 @@ def L4_and_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
 let Inst{6-5} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110000;
@@ -11243,7 +11294,7 @@ def L4_and_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memb($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11251,7 +11302,7 @@ def L4_and_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
 let Inst{6-5} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110001;
@@ -11270,7 +11321,7 @@ def L4_and_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memh($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11278,7 +11329,7 @@ def L4_and_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+#$Ii) &= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
 let Inst{6-5} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110010;
@@ -11297,7 +11348,7 @@ def L4_and_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memw($Rs32) &= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11305,7 +11356,7 @@ def L4_iadd_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
 "memb($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111000;
@@ -11324,7 +11375,7 @@ def L4_iadd_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memb($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11332,7 +11383,7 @@ def L4_iadd_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
 "memh($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111001;
@@ -11351,7 +11402,7 @@ def L4_iadd_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memh($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11359,7 +11410,7 @@ def L4_iadd_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
 "memw($Rs32+#$Ii) += #$II",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
 let Inst{6-5} = 0b00;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111010;
@@ -11378,7 +11429,7 @@ def L4_iadd_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memw($Rs32) += #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11386,7 +11437,7 @@ def L4_iand_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
 "memb($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
 let Inst{6-5} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111000;
@@ -11405,7 +11456,7 @@ def L4_iand_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memb($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11413,7 +11464,7 @@ def L4_iand_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
 "memh($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
 let Inst{6-5} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111001;
@@ -11432,7 +11483,7 @@ def L4_iand_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memh($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11440,7 +11491,7 @@ def L4_iand_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
 "memw($Rs32+#$Ii) = clrbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
 let Inst{6-5} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111010;
@@ -11459,7 +11510,7 @@ def L4_iand_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memw($Rs32) = clrbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11467,7 +11518,7 @@ def L4_ior_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
 "memb($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
 let Inst{6-5} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111000;
@@ -11486,7 +11537,7 @@ def L4_ior_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memb($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11494,7 +11545,7 @@ def L4_ior_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
 "memh($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
 let Inst{6-5} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111001;
@@ -11513,7 +11564,7 @@ def L4_ior_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memh($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11521,7 +11572,7 @@ def L4_ior_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
 "memw($Rs32+#$Ii) = setbit(#$II)",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
 let Inst{6-5} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111010;
@@ -11540,7 +11591,7 @@ def L4_ior_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memw($Rs32) = setbit(#$II)",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11548,7 +11599,7 @@ def L4_isub_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
 "memb($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_46c951 {
+tc_096199d3, TypeV4LDST>, Enc_46c951 {
 let Inst{6-5} = 0b01;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111000;
@@ -11567,7 +11618,7 @@ def L4_isub_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memb($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11575,7 +11626,7 @@ def L4_isub_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
 "memh($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_e66a97 {
+tc_096199d3, TypeV4LDST>, Enc_e66a97 {
 let Inst{6-5} = 0b01;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111001;
@@ -11594,7 +11645,7 @@ def L4_isub_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memh($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11602,7 +11653,7 @@ def L4_isub_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
 "memw($Rs32+#$Ii) -= #$II",
-tc_44126683, TypeV4LDST>, Enc_84b2cd {
+tc_096199d3, TypeV4LDST>, Enc_84b2cd {
 let Inst{6-5} = 0b01;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111111010;
@@ -11621,7 +11672,7 @@ def L4_isub_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, u5_0Imm:$II),
 "memw($Rs32) -= #$II",
-tc_44126683, TypeMAPPING> {
+tc_096199d3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -11629,7 +11680,7 @@ def L4_loadalignb_ap : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Re32),
 (ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
 "$Ryy32 = memb_fifo($Re32=#$II)",
-tc_5acef64a, TypeLD>, Enc_f394d3 {
+tc_7a91e76a, TypeLD>, Enc_f394d3 {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011010100;
@@ -11649,7 +11700,7 @@ def L4_loadalignb_ur : HInst<
 (outs DoubleRegs:$Ryy32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)",
-tc_0cd51c76, TypeLD>, Enc_04c959 {
+tc_a5d4aeec, TypeLD>, Enc_04c959 {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011100100;
 let addrMode = BaseLongOffset;
@@ -11669,7 +11720,7 @@ def L4_loadalignh_ap : HInst<
 (outs DoubleRegs:$Ryy32, IntRegs:$Re32),
 (ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
 "$Ryy32 = memh_fifo($Re32=#$II)",
-tc_5acef64a, TypeLD>, Enc_f394d3 {
+tc_7a91e76a, TypeLD>, Enc_f394d3 {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011010010;
@@ -11689,7 +11740,7 @@ def L4_loadalignh_ur : HInst<
 (outs DoubleRegs:$Ryy32),
 (ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)",
-tc_0cd51c76, TypeLD>, Enc_04c959 {
+tc_a5d4aeec, TypeLD>, Enc_04c959 {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011100010;
 let addrMode = BaseLongOffset;
@@ -11709,7 +11760,7 @@ def L4_loadbsw2_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = membh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011010001;
@@ -11730,7 +11781,7 @@ def L4_loadbsw2_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = membh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b {
+tc_bab0eed9, TypeLD>, Enc_4f677b {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011100001;
 let hasNewValue = 1;
@@ -11751,7 +11802,7 @@ def L4_loadbsw4_ap : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rdd32 = membh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011010111;
@@ -11770,7 +11821,7 @@ def L4_loadbsw4_ur : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rdd32 = membh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe {
+tc_bab0eed9, TypeLD>, Enc_6185fe {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011100111;
 let addrMode = BaseLongOffset;
@@ -11789,7 +11840,7 @@ def L4_loadbzw2_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = memubh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011010011;
@@ -11810,7 +11861,7 @@ def L4_loadbzw2_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b {
+tc_bab0eed9, TypeLD>, Enc_4f677b {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011100011;
 let hasNewValue = 1;
@@ -11831,7 +11882,7 @@ def L4_loadbzw4_ap : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rdd32 = memubh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011010101;
@@ -11850,7 +11901,7 @@ def L4_loadbzw4_ur : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rdd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe {
+tc_bab0eed9, TypeLD>, Enc_6185fe {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011100101;
 let addrMode = BaseLongOffset;
@@ -11869,7 +11920,7 @@ def L4_loadd_locked : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = memd_locked($Rs32)",
-tc_6aa5711a, TypeLD>, Enc_3a3d62 {
+tc_b43e7930, TypeLD>, Enc_3a3d62 {
 let Inst{13-5} = 0b010000000;
 let Inst{31-21} = 0b10010010000;
 let accessSize = DoubleWordAccess;
@@ -11880,7 +11931,7 @@ def L4_loadrb_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = memb($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011011000;
@@ -11901,7 +11952,7 @@ def L4_loadrb_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111010000;
 let hasNewValue = 1;
@@ -11918,7 +11969,7 @@ def L4_loadrb_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = memb($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011101000;
 let hasNewValue = 1;
@@ -11940,7 +11991,7 @@ def L4_loadrd_ap : HInst<
 (outs DoubleRegs:$Rdd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rdd32 = memd($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_7fa7f6 {
+tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011011110;
@@ -11959,7 +12010,7 @@ def L4_loadrd_rr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111010110;
 let addrMode = BaseRegOffset;
@@ -11974,7 +12025,7 @@ def L4_loadrd_ur : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rdd32 = memd($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011101110;
 let addrMode = BaseLongOffset;
@@ -11994,7 +12045,7 @@ def L4_loadrh_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = memh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011011010;
@@ -12015,7 +12066,7 @@ def L4_loadrh_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111010010;
 let hasNewValue = 1;
@@ -12032,7 +12083,7 @@ def L4_loadrh_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = memh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011101010;
 let hasNewValue = 1;
@@ -12054,7 +12105,7 @@ def L4_loadri_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = memw($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011011100;
@@ -12075,7 +12126,7 @@ def L4_loadri_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111010100;
 let hasNewValue = 1;
@@ -12092,7 +12143,7 @@ def L4_loadri_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = memw($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011101100;
 let hasNewValue = 1;
@@ -12114,7 +12165,7 @@ def L4_loadrub_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = memub($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011011001;
@@ -12135,7 +12186,7 @@ def L4_loadrub_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111010001;
 let hasNewValue = 1;
@@ -12152,7 +12203,7 @@ def L4_loadrub_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = memub($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011101001;
 let hasNewValue = 1;
@@ -12174,7 +12225,7 @@ def L4_loadruh_ap : HInst<
 (outs IntRegs:$Rd32, IntRegs:$Re32),
 (ins u32_0Imm:$II),
 "$Rd32 = memuh($Re32=#$II)",
-tc_b77c481f, TypeLD>, Enc_323f2d {
+tc_3b5b7ef9, TypeLD>, Enc_323f2d {
 let Inst{7-7} = 0b0;
 let Inst{13-12} = 0b01;
 let Inst{31-21} = 0b10011011011;
@@ -12195,7 +12246,7 @@ def L4_loadruh_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111010011;
 let hasNewValue = 1;
@@ -12212,7 +12263,7 @@ def L4_loadruh_ur : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
 "$Rd32 = memuh($Rt32<<#$Ii+#$II)",
-tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
 let Inst{12-12} = 0b1;
 let Inst{31-21} = 0b10011101011;
 let hasNewValue = 1;
@@ -12234,7 +12285,7 @@ def L4_or_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
 let Inst{6-5} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110000;
@@ -12253,7 +12304,7 @@ def L4_or_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memb($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -12261,7 +12312,7 @@ def L4_or_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
 let Inst{6-5} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110001;
@@ -12280,7 +12331,7 @@ def L4_or_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memh($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -12288,7 +12339,7 @@ def L4_or_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+#$Ii) |= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
 let Inst{6-5} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110010;
@@ -12307,7 +12358,7 @@ def L4_or_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memw($Rs32) |= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -12315,7 +12366,7 @@ def L4_ploadrbf_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memb(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011111000;
@@ -12340,7 +12391,7 @@ def L4_ploadrbf_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110001000;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12357,7 +12408,7 @@ def L4_ploadrbfnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memb(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011111000;
@@ -12383,7 +12434,7 @@ def L4_ploadrbfnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110011000;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12401,7 +12452,7 @@ def L4_ploadrbt_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memb(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011111000;
@@ -12425,7 +12476,7 @@ def L4_ploadrbt_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110000000;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -12441,7 +12492,7 @@ def L4_ploadrbtnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memb(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011111000;
@@ -12466,7 +12517,7 @@ def L4_ploadrbtnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110010000;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -12483,7 +12534,7 @@ def L4_ploadrdf_abs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4) $Rdd32 = memd(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011111110;
@@ -12506,7 +12557,7 @@ def L4_ploadrdf_rr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
 let Inst{31-21} = 0b00110001110;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12521,7 +12572,7 @@ def L4_ploadrdfnew_abs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011111110;
@@ -12545,7 +12596,7 @@ def L4_ploadrdfnew_rr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
 let Inst{31-21} = 0b00110011110;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12561,7 +12612,7 @@ def L4_ploadrdt_abs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4) $Rdd32 = memd(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011111110;
@@ -12583,7 +12634,7 @@ def L4_ploadrdt_rr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
 let Inst{31-21} = 0b00110000110;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -12597,7 +12648,7 @@ def L4_ploadrdtnew_abs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011111110;
@@ -12620,7 +12671,7 @@ def L4_ploadrdtnew_rr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
 let Inst{31-21} = 0b00110010110;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -12635,7 +12686,7 @@ def L4_ploadrhf_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011111010;
@@ -12660,7 +12711,7 @@ def L4_ploadrhf_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110001010;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12677,7 +12728,7 @@ def L4_ploadrhfnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011111010;
@@ -12703,7 +12754,7 @@ def L4_ploadrhfnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110011010;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12721,7 +12772,7 @@ def L4_ploadrht_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011111010;
@@ -12745,7 +12796,7 @@ def L4_ploadrht_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110000010;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -12761,7 +12812,7 @@ def L4_ploadrhtnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011111010;
@@ -12786,7 +12837,7 @@ def L4_ploadrhtnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110010010;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -12803,7 +12854,7 @@ def L4_ploadrif_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memw(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011111100;
@@ -12828,7 +12879,7 @@ def L4_ploadrif_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110001100;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12845,7 +12896,7 @@ def L4_ploadrifnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memw(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011111100;
@@ -12871,7 +12922,7 @@ def L4_ploadrifnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110011100;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -12889,7 +12940,7 @@ def L4_ploadrit_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memw(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011111100;
@@ -12913,7 +12964,7 @@ def L4_ploadrit_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110000100;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -12929,7 +12980,7 @@ def L4_ploadritnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memw(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011111100;
@@ -12954,7 +13005,7 @@ def L4_ploadritnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110010100;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -12971,7 +13022,7 @@ def L4_ploadrubf_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memub(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011111001;
@@ -12996,7 +13047,7 @@ def L4_ploadrubf_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110001001;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -13013,7 +13064,7 @@ def L4_ploadrubfnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memub(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011111001;
@@ -13039,7 +13090,7 @@ def L4_ploadrubfnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110011001;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -13057,7 +13108,7 @@ def L4_ploadrubt_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memub(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011111001;
@@ -13081,7 +13132,7 @@ def L4_ploadrubt_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110000001;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -13097,7 +13148,7 @@ def L4_ploadrubtnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memub(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011111001;
@@ -13122,7 +13173,7 @@ def L4_ploadrubtnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110010001;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -13139,7 +13190,7 @@ def L4_ploadruhf_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4) $Rd32 = memuh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b101;
 let Inst{31-21} = 0b10011111011;
@@ -13164,7 +13215,7 @@ def L4_ploadruhf_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110001011;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -13181,7 +13232,7 @@ def L4_ploadruhfnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if (!$Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b111;
 let Inst{31-21} = 0b10011111011;
@@ -13207,7 +13258,7 @@ def L4_ploadruhfnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110011011;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -13225,7 +13276,7 @@ def L4_ploadruht_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4) $Rd32 = memuh(#$Ii)",
-tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b100;
 let Inst{31-21} = 0b10011111011;
@@ -13249,7 +13300,7 @@ def L4_ploadruht_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110000011;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -13265,7 +13316,7 @@ def L4_ploadruhtnew_abs : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pt4, u32_0Imm:$Ii),
 "if ($Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
 let Inst{7-5} = 0b100;
 let Inst{13-11} = 0b110;
 let Inst{31-21} = 0b10011111011;
@@ -13290,7 +13341,7 @@ def L4_ploadruhtnew_rr : HInst<
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
 let Inst{31-21} = 0b00110010011;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -13307,7 +13358,7 @@ def L4_return : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = dealloc_return($Rs32):raw",
-tc_3d04548d, TypeLD>, Enc_3a3d62, PredNewRel {
+tc_675e4897, TypeLD>, Enc_3a3d62, PredNewRel {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10010110000;
 let isTerminator = 1;
@@ -13328,7 +13379,7 @@ def L4_return_f : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32),
 "if (!$Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1100;
 let Inst{31-21} = 0b10010110000;
@@ -13350,7 +13401,7 @@ def L4_return_fnew_pnt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32),
 "if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1010;
 let Inst{31-21} = 0b10010110000;
@@ -13373,7 +13424,7 @@ def L4_return_fnew_pt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32),
 "if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b1110;
 let Inst{31-21} = 0b10010110000;
@@ -13396,7 +13447,7 @@ def L4_return_map_to_raw_f : HInst<
 (outs),
 (ins PredRegs:$Pv4),
 "if (!$Pv4) dealloc_return",
-tc_513bef45, TypeMAPPING>, Requires<[HasV65]> {
+tc_2b8da4c2, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13404,7 +13455,7 @@ def L4_return_map_to_raw_fnew_pnt : HInst<
 (outs),
 (ins PredRegs:$Pv4),
 "if (!$Pv4.new) dealloc_return:nt",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
+tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13412,7 +13463,7 @@ def L4_return_map_to_raw_fnew_pt : HInst<
 (outs),
 (ins PredRegs:$Pv4),
 "if (!$Pv4.new) dealloc_return:t",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
+tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13420,7 +13471,7 @@ def L4_return_map_to_raw_t : HInst<
 (outs),
 (ins PredRegs:$Pv4),
 "if ($Pv4) dealloc_return",
-tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65]> {
+tc_4d5fa3a1, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13428,7 +13479,7 @@ def L4_return_map_to_raw_tnew_pnt : HInst<
 (outs),
 (ins PredRegs:$Pv4),
 "if ($Pv4.new) dealloc_return:nt",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
+tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13436,7 +13487,7 @@ def L4_return_map_to_raw_tnew_pt : HInst<
 (outs),
 (ins PredRegs:$Pv4),
 "if ($Pv4.new) dealloc_return:t",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
+tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13444,7 +13495,7 @@ def L4_return_t : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32),
 "if ($Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b0100;
 let Inst{31-21} = 0b10010110000;
@@ -13465,7 +13516,7 @@ def L4_return_tnew_pnt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32),
 "if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b0010;
 let Inst{31-21} = 0b10010110000;
@@ -13487,7 +13538,7 @@ def L4_return_tnew_pt : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins PredRegs:$Pv4, IntRegs:$Rs32),
 "if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
 let Inst{7-5} = 0b000;
 let Inst{13-10} = 0b0110;
 let Inst{31-21} = 0b10010110000;
@@ -13509,7 +13560,7 @@ def L4_sub_memopb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_d44e31 {
+tc_7186d325, TypeV4LDST>, Enc_d44e31 {
 let Inst{6-5} = 0b01;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110000;
@@ -13528,7 +13579,7 @@ def L4_sub_memopb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memb($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13536,7 +13587,7 @@ def L4_sub_memoph_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_163a3c {
+tc_7186d325, TypeV4LDST>, Enc_163a3c {
 let Inst{6-5} = 0b01;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110001;
@@ -13555,7 +13606,7 @@ def L4_sub_memoph_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memh($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13563,7 +13614,7 @@ def L4_sub_memopw_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+#$Ii) -= $Rt32",
-tc_44126683, TypeV4LDST>, Enc_226535 {
+tc_7186d325, TypeV4LDST>, Enc_226535 {
 let Inst{6-5} = 0b01;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00111110010;
@@ -13582,7 +13633,7 @@ def L4_sub_memopw_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memw($Rs32) -= $Rt32",
-tc_44126683, TypeMAPPING> {
+tc_7186d325, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13590,15 +13641,26 @@ def L6_deallocframe_map_to_raw : HInst<
 (outs),
 (ins),
 "deallocframe",
-tc_d1090e34, TypeMAPPING>, Requires<[HasV65]> {
+tc_15aa71c5, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
+def L6_memcpy : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, ModRegs:$Mu2),
+"memcpy($Rs32,$Rt32,$Mu2)",
+tc_a6b1eca9, TypeLD>, Enc_a75aa6, Requires<[HasV66]> {
+let Inst{7-0} = 0b01000000;
+let Inst{31-21} = 0b10010010000;
+let mayLoad = 1;
+let isSolo = 1;
+let mayStore = 1;
+}
 def L6_return_map_to_raw : HInst<
 (outs),
 (ins),
 "dealloc_return",
-tc_3d04548d, TypeMAPPING>, Requires<[HasV65]> {
+tc_675e4897, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -13606,7 +13668,7 @@ def M2_acci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += add($Rs32,$Rt32)",
-tc_c74f796f, TypeM>, Enc_2ae154, ImmRegRel {
+tc_f675fee8, TypeM>, Enc_2ae154, ImmRegRel {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -13621,7 +13683,7 @@ def M2_accii : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rx32 += add($Rs32,#$Ii)",
-tc_c74f796f, TypeM>, Enc_c90aca, ImmRegRel {
+tc_f675fee8, TypeM>, Enc_c90aca, ImmRegRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100010000;
 let hasNewValue = 1;
@@ -13640,7 +13702,7 @@ def M2_cmaci_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += cmpyi($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111000;
@@ -13651,7 +13713,7 @@ def M2_cmacr_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += cmpyr($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111000;
@@ -13662,7 +13724,7 @@ def M2_cmacs_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += cmpy($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111000;
@@ -13674,7 +13736,7 @@ def M2_cmacs_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111100;
@@ -13686,7 +13748,7 @@ def M2_cmacsc_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += cmpy($Rs32,$Rt32*):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111010;
@@ -13698,7 +13760,7 @@ def M2_cmacsc_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111110;
@@ -13710,7 +13772,7 @@ def M2_cmpyi_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = cmpyi($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101000;
@@ -13720,7 +13782,7 @@ def M2_cmpyr_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = cmpyr($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101000;
@@ -13730,7 +13792,7 @@ def M2_cmpyrs_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = cmpy($Rs32,$Rt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101001;
@@ -13743,7 +13805,7 @@ def M2_cmpyrs_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101101;
@@ -13756,7 +13818,7 @@ def M2_cmpyrsc_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101011;
@@ -13769,7 +13831,7 @@ def M2_cmpyrsc_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101111;
@@ -13782,7 +13844,7 @@ def M2_cmpys_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = cmpy($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101000;
@@ -13793,7 +13855,7 @@ def M2_cmpys_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101100;
@@ -13804,7 +13866,7 @@ def M2_cmpysc_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = cmpy($Rs32,$Rt32*):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101010;
@@ -13815,7 +13877,7 @@ def M2_cmpysc_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101110;
@@ -13826,7 +13888,7 @@ def M2_cnacs_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= cmpy($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111000;
@@ -13838,7 +13900,7 @@ def M2_cnacs_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111100;
@@ -13850,7 +13912,7 @@ def M2_cnacsc_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= cmpy($Rs32,$Rt32*):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111010;
@@ -13862,7 +13924,7 @@ def M2_cnacsc_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111110;
@@ -13874,7 +13936,7 @@ def M2_dpmpyss_acc_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111000;
@@ -13885,7 +13947,7 @@ def M2_dpmpyss_nac_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111001;
@@ -13896,7 +13958,7 @@ def M2_dpmpyss_rnd_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101001;
@@ -13908,7 +13970,7 @@ def M2_dpmpyss_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101000;
@@ -13918,7 +13980,7 @@ def M2_dpmpyuu_acc_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111010;
@@ -13929,7 +13991,7 @@ def M2_dpmpyuu_nac_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111011;
@@ -13940,7 +14002,7 @@ def M2_dpmpyuu_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101010;
@@ -13950,7 +14012,7 @@ def M2_hmmpyh_rs1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101101;
@@ -13963,7 +14025,7 @@ def M2_hmmpyh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101101;
@@ -13976,7 +14038,7 @@ def M2_hmmpyl_rs1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101111;
@@ -13989,7 +14051,7 @@ def M2_hmmpyl_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101101;
@@ -14002,7 +14064,7 @@ def M2_maci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyi($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_2ae154, ImmRegRel {
+tc_d773585a, TypeM>, Enc_2ae154, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -14017,7 +14079,7 @@ def M2_macsin : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Rx32 -= mpyi($Rs32,#$Ii)",
-tc_16d0d8d5, TypeM>, Enc_c90aca {
+tc_05d3a09b, TypeM>, Enc_c90aca {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100001100;
 let hasNewValue = 1;
@@ -14035,7 +14097,7 @@ def M2_macsip : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Rx32 += mpyi($Rs32,#$Ii)",
-tc_16d0d8d5, TypeM>, Enc_c90aca, ImmRegRel {
+tc_05d3a09b, TypeM>, Enc_c90aca, ImmRegRel {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100001000;
 let hasNewValue = 1;
@@ -14054,7 +14116,7 @@ def M2_mmachs_rs0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@@ -14066,7 +14128,7 @@ def M2_mmachs_rs1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010101;
@@ -14078,7 +14140,7 @@ def M2_mmachs_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywoh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -14090,7 +14152,7 @@ def M2_mmachs_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010100;
@@ -14102,7 +14164,7 @@ def M2_mmacls_rs0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@@ -14114,7 +14176,7 @@ def M2_mmacls_rs1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010101;
@@ -14126,7 +14188,7 @@ def M2_mmacls_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -14138,7 +14200,7 @@ def M2_mmacls_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010100;
@@ -14150,7 +14212,7 @@ def M2_mmacuhs_rs0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010011;
@@ -14162,7 +14224,7 @@ def M2_mmacuhs_rs1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010111;
@@ -14174,7 +14236,7 @@ def M2_mmacuhs_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywouh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010010;
@@ -14186,7 +14248,7 @@ def M2_mmacuhs_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010110;
@@ -14198,7 +14260,7 @@ def M2_mmaculs_rs0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010011;
@@ -14210,7 +14272,7 @@ def M2_mmaculs_rs1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010111;
@@ -14222,7 +14284,7 @@ def M2_mmaculs_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010010;
@@ -14234,7 +14296,7 @@ def M2_mmaculs_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010110;
@@ -14246,7 +14308,7 @@ def M2_mmpyh_rs0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000001;
@@ -14257,7 +14319,7 @@ def M2_mmpyh_rs1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -14268,7 +14330,7 @@ def M2_mmpyh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywoh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -14279,7 +14341,7 @@ def M2_mmpyh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000100;
@@ -14290,7 +14352,7 @@ def M2_mmpyl_rs0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000001;
@@ -14301,7 +14363,7 @@ def M2_mmpyl_rs1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -14312,7 +14374,7 @@ def M2_mmpyl_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -14323,7 +14385,7 @@ def M2_mmpyl_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000100;
@@ -14334,7 +14396,7 @@ def M2_mmpyuh_rs0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000011;
@@ -14345,7 +14407,7 @@ def M2_mmpyuh_rs1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000111;
@@ -14356,7 +14418,7 @@ def M2_mmpyuh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywouh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -14367,7 +14429,7 @@ def M2_mmpyuh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000110;
@@ -14378,7 +14440,7 @@ def M2_mmpyul_rs0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000011;
@@ -14389,7 +14451,7 @@ def M2_mmpyul_rs1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000111;
@@ -14400,7 +14462,7 @@ def M2_mmpyul_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -14411,18 +14473,31 @@ def M2_mmpyul_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000110;
 let prefersSlot3 = 1;
 let Defs = [USR_OVF];
 }
+def M2_mnaci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyi($Rs32,$Rt32)",
+tc_bdceeac1, TypeM>, Enc_2ae154, Requires<[HasV66]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
 def M2_mpy_acc_hh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14435,7 +14510,7 @@ def M2_mpy_acc_hh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14448,7 +14523,7 @@ def M2_mpy_acc_hl_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14461,7 +14536,7 @@ def M2_mpy_acc_hl_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14474,7 +14549,7 @@ def M2_mpy_acc_lh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14487,7 +14562,7 @@ def M2_mpy_acc_lh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14500,7 +14575,7 @@ def M2_mpy_acc_ll_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14513,7 +14588,7 @@ def M2_mpy_acc_ll_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14526,7 +14601,7 @@ def M2_mpy_acc_sat_hh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14540,7 +14615,7 @@ def M2_mpy_acc_sat_hh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14554,7 +14629,7 @@ def M2_mpy_acc_sat_hl_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14568,7 +14643,7 @@ def M2_mpy_acc_sat_hl_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14582,7 +14657,7 @@ def M2_mpy_acc_sat_lh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14596,7 +14671,7 @@ def M2_mpy_acc_sat_lh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14610,7 +14685,7 @@ def M2_mpy_acc_sat_ll_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110000;
@@ -14624,7 +14699,7 @@ def M2_mpy_acc_sat_ll_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110100;
@@ -14638,7 +14713,7 @@ def M2_mpy_hh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -14650,7 +14725,7 @@ def M2_mpy_hh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -14662,7 +14737,7 @@ def M2_mpy_hl_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -14674,7 +14749,7 @@ def M2_mpy_hl_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -14686,7 +14761,7 @@ def M2_mpy_lh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -14698,7 +14773,7 @@ def M2_mpy_lh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -14710,7 +14785,7 @@ def M2_mpy_ll_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -14722,7 +14797,7 @@ def M2_mpy_ll_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -14734,7 +14809,7 @@ def M2_mpy_nac_hh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14747,7 +14822,7 @@ def M2_mpy_nac_hh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14760,7 +14835,7 @@ def M2_mpy_nac_hl_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14773,7 +14848,7 @@ def M2_mpy_nac_hl_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14786,7 +14861,7 @@ def M2_mpy_nac_lh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14799,7 +14874,7 @@ def M2_mpy_nac_lh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14812,7 +14887,7 @@ def M2_mpy_nac_ll_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14825,7 +14900,7 @@ def M2_mpy_nac_ll_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14838,7 +14913,7 @@ def M2_mpy_nac_sat_hh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14852,7 +14927,7 @@ def M2_mpy_nac_sat_hh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14866,7 +14941,7 @@ def M2_mpy_nac_sat_hl_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14880,7 +14955,7 @@ def M2_mpy_nac_sat_hl_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14894,7 +14969,7 @@ def M2_mpy_nac_sat_lh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.h):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14908,7 +14983,7 @@ def M2_mpy_nac_sat_lh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14922,7 +14997,7 @@ def M2_mpy_nac_sat_ll_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.l):sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110001;
@@ -14936,7 +15011,7 @@ def M2_mpy_nac_sat_ll_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110101;
@@ -14950,7 +15025,7 @@ def M2_mpy_rnd_hh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -14962,7 +15037,7 @@ def M2_mpy_rnd_hh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -14974,7 +15049,7 @@ def M2_mpy_rnd_hl_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -14986,7 +15061,7 @@ def M2_mpy_rnd_hl_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -14998,7 +15073,7 @@ def M2_mpy_rnd_lh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -15010,7 +15085,7 @@ def M2_mpy_rnd_lh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -15022,7 +15097,7 @@ def M2_mpy_rnd_ll_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -15034,7 +15109,7 @@ def M2_mpy_rnd_ll_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -15046,7 +15121,7 @@ def M2_mpy_sat_hh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -15059,7 +15134,7 @@ def M2_mpy_sat_hh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -15072,7 +15147,7 @@ def M2_mpy_sat_hl_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -15085,7 +15160,7 @@ def M2_mpy_sat_hl_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -15098,7 +15173,7 @@ def M2_mpy_sat_lh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -15111,7 +15186,7 @@ def M2_mpy_sat_lh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -15124,7 +15199,7 @@ def M2_mpy_sat_ll_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100000;
@@ -15137,7 +15212,7 @@ def M2_mpy_sat_ll_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100100;
@@ -15150,7 +15225,7 @@ def M2_mpy_sat_rnd_hh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -15163,7 +15238,7 @@ def M2_mpy_sat_rnd_hh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -15176,7 +15251,7 @@ def M2_mpy_sat_rnd_hl_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -15189,7 +15264,7 @@ def M2_mpy_sat_rnd_hl_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -15202,7 +15277,7 @@ def M2_mpy_sat_rnd_lh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -15215,7 +15290,7 @@ def M2_mpy_sat_rnd_lh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -15228,7 +15303,7 @@ def M2_mpy_sat_rnd_ll_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100001;
@@ -15241,7 +15316,7 @@ def M2_mpy_sat_rnd_ll_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100101;
@@ -15254,7 +15329,7 @@ def M2_mpy_up : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101000;
@@ -15266,7 +15341,7 @@ def M2_mpy_up_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101101;
@@ -15278,7 +15353,7 @@ def M2_mpy_up_s1_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpy($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101111;
@@ -15291,7 +15366,7 @@ def M2_mpyd_acc_hh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110000;
@@ -15302,7 +15377,7 @@ def M2_mpyd_acc_hh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110100;
@@ -15313,7 +15388,7 @@ def M2_mpyd_acc_hl_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110000;
@@ -15324,7 +15399,7 @@ def M2_mpyd_acc_hl_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110100;
@@ -15335,7 +15410,7 @@ def M2_mpyd_acc_lh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110000;
@@ -15346,7 +15421,7 @@ def M2_mpyd_acc_lh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110100;
@@ -15357,7 +15432,7 @@ def M2_mpyd_acc_ll_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110000;
@@ -15368,7 +15443,7 @@ def M2_mpyd_acc_ll_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110100;
@@ -15379,7 +15454,7 @@ def M2_mpyd_hh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100000;
@@ -15389,7 +15464,7 @@ def M2_mpyd_hh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100100;
@@ -15399,7 +15474,7 @@ def M2_mpyd_hl_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100000;
@@ -15409,7 +15484,7 @@ def M2_mpyd_hl_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100100;
@@ -15419,7 +15494,7 @@ def M2_mpyd_lh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100000;
@@ -15429,7 +15504,7 @@ def M2_mpyd_lh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100100;
@@ -15439,7 +15514,7 @@ def M2_mpyd_ll_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100000;
@@ -15449,7 +15524,7 @@ def M2_mpyd_ll_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100100;
@@ -15459,7 +15534,7 @@ def M2_mpyd_nac_hh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110001;
@@ -15470,7 +15545,7 @@ def M2_mpyd_nac_hh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110101;
@@ -15481,7 +15556,7 @@ def M2_mpyd_nac_hl_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110001;
@@ -15492,7 +15567,7 @@ def M2_mpyd_nac_hl_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110101;
@@ -15503,7 +15578,7 @@ def M2_mpyd_nac_lh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110001;
@@ -15514,7 +15589,7 @@ def M2_mpyd_nac_lh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110101;
@@ -15525,7 +15600,7 @@ def M2_mpyd_nac_ll_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110001;
@@ -15536,7 +15611,7 @@ def M2_mpyd_nac_ll_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110101;
@@ -15547,7 +15622,7 @@ def M2_mpyd_rnd_hh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100001;
@@ -15557,7 +15632,7 @@ def M2_mpyd_rnd_hh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100101;
@@ -15567,7 +15642,7 @@ def M2_mpyd_rnd_hl_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100001;
@@ -15577,7 +15652,7 @@ def M2_mpyd_rnd_hl_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100101;
@@ -15587,7 +15662,7 @@ def M2_mpyd_rnd_lh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100001;
@@ -15597,7 +15672,7 @@ def M2_mpyd_rnd_lh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100101;
@@ -15607,7 +15682,7 @@ def M2_mpyd_rnd_ll_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100001;
@@ -15617,7 +15692,7 @@ def M2_mpyd_rnd_ll_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100101;
@@ -15627,7 +15702,7 @@ def M2_mpyi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyi($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be, ImmRegRel {
+tc_bafaade3, TypeM>, Enc_5ab2be, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101000;
@@ -15641,7 +15716,7 @@ def M2_mpysin : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u8_0Imm:$Ii),
 "$Rd32 = -mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, Enc_b8c967 {
+tc_c8ce0b5c, TypeM>, Enc_b8c967 {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100000100;
 let hasNewValue = 1;
@@ -15652,7 +15727,7 @@ def M2_mpysip : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Rd32 = +mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, Enc_b8c967 {
+tc_c8ce0b5c, TypeM>, Enc_b8c967 {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100000000;
 let hasNewValue = 1;
@@ -15668,7 +15743,7 @@ def M2_mpysmi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, m32_0Imm:$Ii),
 "$Rd32 = mpyi($Rs32,#$Ii)",
-tc_1853ea6d, TypeM>, ImmRegRel {
+tc_c8ce0b5c, TypeM>, ImmRegRel {
 let hasNewValue = 1;
 let opNewValue = 0;
 let CextOpcode = "M2_mpyi";
@@ -15684,7 +15759,7 @@ def M2_mpysu_up : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpysu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101011;
@@ -15696,7 +15771,7 @@ def M2_mpyu_acc_hh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110010;
@@ -15709,7 +15784,7 @@ def M2_mpyu_acc_hh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110110;
@@ -15722,7 +15797,7 @@ def M2_mpyu_acc_hl_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110010;
@@ -15735,7 +15810,7 @@ def M2_mpyu_acc_hl_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110110;
@@ -15748,7 +15823,7 @@ def M2_mpyu_acc_lh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110010;
@@ -15761,7 +15836,7 @@ def M2_mpyu_acc_lh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110110;
@@ -15774,7 +15849,7 @@ def M2_mpyu_acc_ll_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110010;
@@ -15787,7 +15862,7 @@ def M2_mpyu_acc_ll_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110110;
@@ -15800,7 +15875,7 @@ def M2_mpyu_hh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100010;
@@ -15812,7 +15887,7 @@ def M2_mpyu_hh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100110;
@@ -15824,7 +15899,7 @@ def M2_mpyu_hl_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100010;
@@ -15836,7 +15911,7 @@ def M2_mpyu_hl_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100110;
@@ -15848,7 +15923,7 @@ def M2_mpyu_lh_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100010;
@@ -15860,7 +15935,7 @@ def M2_mpyu_lh_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100110;
@@ -15872,7 +15947,7 @@ def M2_mpyu_ll_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100010;
@@ -15884,7 +15959,7 @@ def M2_mpyu_ll_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101100110;
@@ -15896,7 +15971,7 @@ def M2_mpyu_nac_hh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110011;
@@ -15909,7 +15984,7 @@ def M2_mpyu_nac_hh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110111;
@@ -15922,7 +15997,7 @@ def M2_mpyu_nac_hl_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110011;
@@ -15935,7 +16010,7 @@ def M2_mpyu_nac_hl_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110111;
@@ -15948,7 +16023,7 @@ def M2_mpyu_nac_lh_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110011;
@@ -15961,7 +16036,7 @@ def M2_mpyu_nac_lh_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110111;
@@ -15974,7 +16049,7 @@ def M2_mpyu_nac_ll_s0 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110011;
@@ -15987,7 +16062,7 @@ def M2_mpyu_nac_ll_s1 : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101110111;
@@ -16000,7 +16075,7 @@ def M2_mpyu_up : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101010;
@@ -16012,7 +16087,7 @@ def M2_mpyud_acc_hh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110010;
@@ -16023,7 +16098,7 @@ def M2_mpyud_acc_hh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110110;
@@ -16034,7 +16109,7 @@ def M2_mpyud_acc_hl_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110010;
@@ -16045,7 +16120,7 @@ def M2_mpyud_acc_hl_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110110;
@@ -16056,7 +16131,7 @@ def M2_mpyud_acc_lh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110010;
@@ -16067,7 +16142,7 @@ def M2_mpyud_acc_lh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110110;
@@ -16078,7 +16153,7 @@ def M2_mpyud_acc_ll_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110010;
@@ -16089,7 +16164,7 @@ def M2_mpyud_acc_ll_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110110;
@@ -16100,7 +16175,7 @@ def M2_mpyud_hh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100010;
@@ -16110,7 +16185,7 @@ def M2_mpyud_hh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100110;
@@ -16120,7 +16195,7 @@ def M2_mpyud_hl_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100010;
@@ -16130,7 +16205,7 @@ def M2_mpyud_hl_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100110;
@@ -16140,7 +16215,7 @@ def M2_mpyud_lh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100010;
@@ -16150,7 +16225,7 @@ def M2_mpyud_lh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100110;
@@ -16160,7 +16235,7 @@ def M2_mpyud_ll_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100010;
@@ -16170,7 +16245,7 @@ def M2_mpyud_ll_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100100110;
@@ -16180,7 +16255,7 @@ def M2_mpyud_nac_hh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110011;
@@ -16191,7 +16266,7 @@ def M2_mpyud_nac_hh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110111;
@@ -16202,7 +16277,7 @@ def M2_mpyud_nac_hl_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110011;
@@ -16213,7 +16288,7 @@ def M2_mpyud_nac_hl_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110111;
@@ -16224,7 +16299,7 @@ def M2_mpyud_nac_lh_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110011;
@@ -16235,7 +16310,7 @@ def M2_mpyud_nac_lh_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110111;
@@ -16246,7 +16321,7 @@ def M2_mpyud_nac_ll_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110011;
@@ -16257,7 +16332,7 @@ def M2_mpyud_nac_ll_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100110111;
@@ -16268,7 +16343,7 @@ def M2_mpyui : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = mpyui($Rs32,$Rt32)",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -16278,7 +16353,7 @@ def M2_nacci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= add($Rs32,$Rt32)",
-tc_c74f796f, TypeM>, Enc_2ae154 {
+tc_f675fee8, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111100;
@@ -16292,7 +16367,7 @@ def M2_naccii : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rx32 -= add($Rs32,#$Ii)",
-tc_c74f796f, TypeM>, Enc_c90aca {
+tc_f675fee8, TypeM>, Enc_c90aca {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100010100;
 let hasNewValue = 1;
@@ -16310,7 +16385,7 @@ def M2_subacc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rx32 += sub($Rt32,$Rs32)",
-tc_c74f796f, TypeM>, Enc_a568d4 {
+tc_f675fee8, TypeM>, Enc_a568d4 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@@ -16324,7 +16399,7 @@ def M2_vabsdiffh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vabsdiffh($Rtt32,$Rss32)",
-tc_2b6f77c6, TypeM>, Enc_ea23e4 {
+tc_002cb246, TypeM>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000011;
@@ -16334,7 +16409,7 @@ def M2_vabsdiffw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vabsdiffw($Rtt32,$Rss32)",
-tc_2b6f77c6, TypeM>, Enc_ea23e4 {
+tc_002cb246, TypeM>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000001;
@@ -16344,7 +16419,7 @@ def M2_vcmac_s0_sat_i : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vcmpyi($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010010;
@@ -16356,7 +16431,7 @@ def M2_vcmac_s0_sat_r : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vcmpyr($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@@ -16368,7 +16443,7 @@ def M2_vcmpy_s0_sat_i : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vcmpyi($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -16379,7 +16454,7 @@ def M2_vcmpy_s0_sat_r : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vcmpyr($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000001;
@@ -16390,7 +16465,7 @@ def M2_vcmpy_s1_sat_i : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000110;
@@ -16401,7 +16476,7 @@ def M2_vcmpy_s1_sat_r : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -16412,7 +16487,7 @@ def M2_vdmacs_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vdmpy($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -16424,7 +16499,7 @@ def M2_vdmacs_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010100;
@@ -16436,7 +16511,7 @@ def M2_vdmpyrs_s0 : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101001000;
@@ -16449,7 +16524,7 @@ def M2_vdmpyrs_s1 : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101001100;
@@ -16462,7 +16537,7 @@ def M2_vdmpys_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vdmpy($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -16473,7 +16548,7 @@ def M2_vdmpys_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000100;
@@ -16484,7 +16559,7 @@ def M2_vmac2 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpyh($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111001;
@@ -16495,7 +16570,7 @@ def M2_vmac2es : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyeh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@@ -16506,7 +16581,7 @@ def M2_vmac2es_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyeh($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -16518,7 +16593,7 @@ def M2_vmac2es_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010100;
@@ -16530,7 +16605,7 @@ def M2_vmac2s_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpyh($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111000;
@@ -16542,7 +16617,7 @@ def M2_vmac2s_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111100;
@@ -16554,7 +16629,7 @@ def M2_vmac2su_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpyhsu($Rs32,$Rt32):sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111011;
@@ -16566,7 +16641,7 @@ def M2_vmac2su_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111111;
@@ -16578,7 +16653,7 @@ def M2_vmpy2es_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyeh($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -16589,7 +16664,7 @@ def M2_vmpy2es_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000100;
@@ -16600,7 +16675,7 @@ def M2_vmpy2s_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vmpyh($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101000;
@@ -16611,7 +16686,7 @@ def M2_vmpy2s_s0pack : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101001;
@@ -16624,7 +16699,7 @@ def M2_vmpy2s_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101100;
@@ -16635,7 +16710,7 @@ def M2_vmpy2s_s1pack : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM>, Enc_5ab2be {
+tc_bafaade3, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101101101;
@@ -16648,7 +16723,7 @@ def M2_vmpy2su_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vmpyhsu($Rs32,$Rt32):sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101000;
@@ -16659,7 +16734,7 @@ def M2_vmpy2su_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101100;
@@ -16670,7 +16745,7 @@ def M2_vraddh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vraddh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101001001;
@@ -16682,7 +16757,7 @@ def M2_vradduh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vradduh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101001000;
@@ -16694,7 +16769,7 @@ def M2_vrcmaci_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrcmpyi($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -16705,7 +16780,7 @@ def M2_vrcmaci_s0c : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrcmpyi($Rss32,$Rtt32*)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010010;
@@ -16716,7 +16791,7 @@ def M2_vrcmacr_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrcmpyr($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -16727,7 +16802,7 @@ def M2_vrcmacr_s0c : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrcmpyr($Rss32,$Rtt32*)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010011;
@@ -16738,7 +16813,7 @@ def M2_vrcmpyi_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrcmpyi($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -16748,7 +16823,7 @@ def M2_vrcmpyi_s0c : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrcmpyi($Rss32,$Rtt32*)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -16758,7 +16833,7 @@ def M2_vrcmpyr_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrcmpyr($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -16768,7 +16843,7 @@ def M2_vrcmpyr_s0c : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrcmpyr($Rss32,$Rtt32*)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000011;
@@ -16778,7 +16853,7 @@ def M2_vrcmpys_acc_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM> {
+tc_d773585a, TypeM> {
 let isPseudo = 1;
 let Constraints = "$Rxx32 = $Rxx32in";
 }
@@ -16786,7 +16861,7 @@ def M2_vrcmpys_acc_s1_h : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010101;
@@ -16798,7 +16873,7 @@ def M2_vrcmpys_acc_s1_l : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010111;
@@ -16810,14 +16885,14 @@ def M2_vrcmpys_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
 let isPseudo = 1;
 }
 def M2_vrcmpys_s1_h : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -16828,7 +16903,7 @@ def M2_vrcmpys_s1_l : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000111;
@@ -16839,7 +16914,7 @@ def M2_vrcmpys_s1rp : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeM> {
+tc_bafaade3, TypeM> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -16848,7 +16923,7 @@ def M2_vrcmpys_s1rp_h : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101001101;
@@ -16861,7 +16936,7 @@ def M2_vrcmpys_s1rp_l : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo",
-tc_8fd5f294, TypeM>, Enc_d2216a {
+tc_bafaade3, TypeM>, Enc_d2216a {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101001101;
@@ -16874,7 +16949,7 @@ def M2_vrmac_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpyh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010000;
@@ -16885,7 +16960,7 @@ def M2_vrmpy_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpyh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000000;
@@ -16895,7 +16970,7 @@ def M2_xor_xacc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 ^= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111100;
@@ -16909,7 +16984,7 @@ def M4_and_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111010;
@@ -16923,7 +16998,7 @@ def M4_and_andn : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111001;
@@ -16937,7 +17012,7 @@ def M4_and_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111010;
@@ -16951,7 +17026,7 @@ def M4_and_xor : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111010;
@@ -16965,7 +17040,7 @@ def M4_cmpyi_wh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@@ -16978,7 +17053,7 @@ def M4_cmpyi_whc : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@@ -16991,7 +17066,7 @@ def M4_cmpyr_wh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@@ -17004,7 +17079,7 @@ def M4_cmpyr_whc : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@@ -17017,7 +17092,7 @@ def M4_mac_up_s1_sat : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += mpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111011;
@@ -17032,7 +17107,7 @@ def M4_mpyri_addi : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II),
 "$Rd32 = add(#$Ii,mpyi($Rs32,#$II))",
-tc_16d0d8d5, TypeALU64>, Enc_322e1b, ImmRegRel {
+tc_05d3a09b, TypeALU64>, Enc_322e1b, ImmRegRel {
 let Inst{31-24} = 0b11011000;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -17048,7 +17123,7 @@ def M4_mpyri_addr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii),
 "$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))",
-tc_16d0d8d5, TypeALU64>, Enc_420cf3, ImmRegRel {
+tc_05d3a09b, TypeALU64>, Enc_420cf3, ImmRegRel {
 let Inst{31-23} = 0b110111111;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -17065,7 +17140,7 @@ def M4_mpyri_addr_u2 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32),
 "$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))",
-tc_bcc96cee, TypeALU64>, Enc_277737 {
+tc_1a2fd869, TypeALU64>, Enc_277737 {
 let Inst{31-23} = 0b110111110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -17075,7 +17150,7 @@ def M4_mpyrr_addi : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))",
-tc_e913dc32, TypeALU64>, Enc_a7b8e8, ImmRegRel {
+tc_d773585a, TypeALU64>, Enc_a7b8e8, ImmRegRel {
 let Inst{31-23} = 0b110101110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -17092,7 +17167,7 @@ def M4_mpyrr_addr : HInst<
 (outs IntRegs:$Ry32),
 (ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32),
 "$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))",
-tc_e913dc32, TypeM>, Enc_7f1a05, ImmRegRel {
+tc_d773585a, TypeM>, Enc_7f1a05, ImmRegRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100011000;
@@ -17107,7 +17182,7 @@ def M4_nac_up_s1_sat : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= mpy($Rs32,$Rt32):<<1:sat",
-tc_e913dc32, TypeM>, Enc_2ae154 {
+tc_d773585a, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111011;
@@ -17122,7 +17197,7 @@ def M4_or_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111010;
@@ -17136,7 +17211,7 @@ def M4_or_andn : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111001;
@@ -17150,7 +17225,7 @@ def M4_or_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111110;
@@ -17164,7 +17239,7 @@ def M4_or_xor : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= xor($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111110;
@@ -17178,7 +17253,7 @@ def M4_pmpyw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = pmpyw($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101010;
@@ -17188,7 +17263,7 @@ def M4_pmpyw_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 ^= pmpyw($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111001;
@@ -17199,7 +17274,7 @@ def M4_vpmpyh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vpmpyh($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101110;
@@ -17209,7 +17284,7 @@ def M4_vpmpyh_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 ^= vpmpyh($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111101;
@@ -17220,7 +17295,7 @@ def M4_vrmpyeh_acc_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpyweh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@@ -17231,7 +17306,7 @@ def M4_vrmpyeh_acc_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010101;
@@ -17242,7 +17317,7 @@ def M4_vrmpyeh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpyweh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000010;
@@ -17252,7 +17327,7 @@ def M4_vrmpyeh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000110;
@@ -17262,7 +17337,7 @@ def M4_vrmpyoh_acc_s0 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpywoh($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010011;
@@ -17273,7 +17348,7 @@ def M4_vrmpyoh_acc_s1 : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010111;
@@ -17284,7 +17359,7 @@ def M4_vrmpyoh_s0 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpywoh($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000001;
@@ -17294,7 +17369,7 @@ def M4_vrmpyoh_s1 : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -17304,7 +17379,7 @@ def M4_xor_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 ^= and($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111110;
@@ -17318,7 +17393,7 @@ def M4_xor_andn : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 ^= and($Rs32,~$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111001;
@@ -17332,7 +17407,7 @@ def M4_xor_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 ^= or($Rs32,$Rt32)",
-tc_84df2cd3, TypeM>, Enc_2ae154 {
+tc_f429765c, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111110;
@@ -17346,7 +17421,7 @@ def M4_xor_xacc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 ^= xor($Rss32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_88c16c {
+tc_f429765c, TypeS_3op>, Enc_88c16c {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001010100;
@@ -17357,7 +17432,7 @@ def M5_vdmacbsu : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@@ -17369,7 +17444,7 @@ def M5_vdmpybsu : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -17380,7 +17455,7 @@ def M5_vmacbsu : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpybsu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111110;
@@ -17391,7 +17466,7 @@ def M5_vmacbuu : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rxx32 += vmpybu($Rs32,$Rt32)",
-tc_e913dc32, TypeM>, Enc_61f0b0 {
+tc_d773585a, TypeM>, Enc_61f0b0 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100111100;
@@ -17402,7 +17477,7 @@ def M5_vmpybsu : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vmpybsu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101010;
@@ -17412,7 +17487,7 @@ def M5_vmpybuu : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = vmpybu($Rs32,$Rt32)",
-tc_8fd5f294, TypeM>, Enc_be32a5 {
+tc_bafaade3, TypeM>, Enc_be32a5 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11100101100;
@@ -17422,7 +17497,7 @@ def M5_vrmacbsu : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpybsu($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010110;
@@ -17433,7 +17508,7 @@ def M5_vrmacbuu : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vrmpybu($Rss32,$Rtt32)",
-tc_e913dc32, TypeM>, Enc_88c16c {
+tc_d773585a, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010100;
@@ -17444,7 +17519,7 @@ def M5_vrmpybsu : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpybsu($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000110;
@@ -17454,7 +17529,7 @@ def M5_vrmpybuu : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vrmpybu($Rss32,$Rtt32)",
-tc_8fd5f294, TypeM>, Enc_a56825 {
+tc_bafaade3, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000100;
@@ -17464,7 +17539,7 @@ def M6_vabsdiffb : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000111;
@@ -17474,7 +17549,7 @@ def M6_vabsdiffub : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@@ -17484,7 +17559,7 @@ def PS_loadrbabs : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii),
 "$Rd32 = memb(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
 let Inst{24-21} = 0b1000;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -17507,7 +17582,7 @@ def PS_loadrdabs : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins u29_3Imm:$Ii),
 "$Rdd32 = memd(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
 let Inst{24-21} = 0b1110;
 let Inst{31-27} = 0b01001;
 let addrMode = Absolute;
@@ -17528,7 +17603,7 @@ def PS_loadrhabs : HInst<
 (outs IntRegs:$Rd32),
 (ins u31_1Imm:$Ii),
 "$Rd32 = memh(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
 let Inst{24-21} = 0b1010;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -17551,7 +17626,7 @@ def PS_loadriabs : HInst<
 (outs IntRegs:$Rd32),
 (ins u30_2Imm:$Ii),
 "$Rd32 = memw(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
 let Inst{24-21} = 0b1100;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -17574,7 +17649,7 @@ def PS_loadrubabs : HInst<
 (outs IntRegs:$Rd32),
 (ins u32_0Imm:$Ii),
 "$Rd32 = memub(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
 let Inst{24-21} = 0b1001;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -17597,7 +17672,7 @@ def PS_loadruhabs : HInst<
 (outs IntRegs:$Rd32),
 (ins u31_1Imm:$Ii),
 "$Rd32 = memuh(#$Ii)",
-tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
 let Inst{24-21} = 0b1011;
 let Inst{31-27} = 0b01001;
 let hasNewValue = 1;
@@ -17620,7 +17695,7 @@ def PS_storerbabs : HInst<
 (outs),
 (ins u32_0Imm:$Ii, IntRegs:$Rt32),
 "memb(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
 let Inst{24-21} = 0b0000;
 let Inst{31-27} = 0b01001;
 let addrMode = Absolute;
@@ -17642,7 +17717,7 @@ def PS_storerbnewabs : HInst<
 (outs),
 (ins u32_0Imm:$Ii, IntRegs:$Nt8),
 "memb(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
 let Inst{12-11} = 0b00;
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b01001;
@@ -17668,7 +17743,7 @@ def PS_storerdabs : HInst<
 (outs),
 (ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "memd(#$Ii) = $Rtt32",
-tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
 let Inst{24-21} = 0b0110;
 let Inst{31-27} = 0b01001;
 let addrMode = Absolute;
@@ -17689,7 +17764,7 @@ def PS_storerfabs : HInst<
 (outs),
 (ins u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh(#$Ii) = $Rt32.h",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
 let Inst{24-21} = 0b0011;
 let Inst{31-27} = 0b01001;
 let addrMode = Absolute;
@@ -17710,7 +17785,7 @@ def PS_storerhabs : HInst<
 (outs),
 (ins u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
 let Inst{24-21} = 0b0010;
 let Inst{31-27} = 0b01001;
 let addrMode = Absolute;
@@ -17732,7 +17807,7 @@ def PS_storerhnewabs : HInst<
 (outs),
 (ins u31_1Imm:$Ii, IntRegs:$Nt8),
 "memh(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
 let Inst{12-11} = 0b01;
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b01001;
@@ -17758,7 +17833,7 @@ def PS_storeriabs : HInst<
 (outs),
 (ins u30_2Imm:$Ii, IntRegs:$Rt32),
 "memw(#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
 let Inst{24-21} = 0b0100;
 let Inst{31-27} = 0b01001;
 let addrMode = Absolute;
@@ -17780,7 +17855,7 @@ def PS_storerinewabs : HInst<
 (outs),
 (ins u30_2Imm:$Ii, IntRegs:$Nt8),
 "memw(#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
 let Inst{12-11} = 0b10;
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b01001;
@@ -17806,7 +17881,7 @@ def S2_addasl_rrri : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii),
 "$Rd32 = addasl($Rt32,$Rs32,#$Ii)",
-tc_c74f796f, TypeS_3op>, Enc_47ef61 {
+tc_f675fee8, TypeS_3op>, Enc_47ef61 {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000100000;
 let hasNewValue = 1;
@@ -17817,7 +17892,7 @@ def S2_allocframe : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, u11_3Imm:$Ii),
 "allocframe($Rx32,#$Ii):raw",
-tc_e216a5db, TypeST>, Enc_22c845 {
+tc_b44ecf75, TypeST>, Enc_22c845 {
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b10100000100;
 let hasNewValue = 1;
@@ -17833,7 +17908,7 @@ def S2_asl_i_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = asl($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b10000000000;
 }
@@ -17841,7 +17916,7 @@ def S2_asl_i_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 += asl($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b110;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -17851,7 +17926,7 @@ def S2_asl_i_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 &= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -17861,7 +17936,7 @@ def S2_asl_i_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 -= asl($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -17871,7 +17946,7 @@ def S2_asl_i_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 |= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b110;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -17881,7 +17956,7 @@ def S2_asl_i_p_xacc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 ^= asl($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b10000010100;
 let prefersSlot3 = 1;
@@ -17891,7 +17966,7 @@ def S2_asl_i_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = asl($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100000;
@@ -17902,7 +17977,7 @@ def S2_asl_i_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 += asl($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -17915,7 +17990,7 @@ def S2_asl_i_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 &= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -17928,7 +18003,7 @@ def S2_asl_i_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 -= asl($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -17941,7 +18016,7 @@ def S2_asl_i_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 |= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -17954,7 +18029,7 @@ def S2_asl_i_r_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = asl($Rs32,#$Ii):sat",
-tc_b44c6e2a, TypeS_2op>, Enc_a05677 {
+tc_779080bf, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100010;
@@ -17967,7 +18042,7 @@ def S2_asl_i_r_xacc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 ^= asl($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110100;
@@ -17980,7 +18055,7 @@ def S2_asl_i_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vaslh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
 let Inst{7-5} = 0b010;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10000000100;
@@ -17989,7 +18064,7 @@ def S2_asl_i_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
 "$Rdd32 = vaslw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000000010;
@@ -17998,7 +18073,7 @@ def S2_asl_r_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = asl($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011100;
@@ -18007,7 +18082,7 @@ def S2_asl_r_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 += asl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011110;
@@ -18018,7 +18093,7 @@ def S2_asl_r_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 &= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011010;
@@ -18029,7 +18104,7 @@ def S2_asl_r_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 -= asl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011100;
@@ -18040,7 +18115,7 @@ def S2_asl_r_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 |= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011000;
@@ -18051,7 +18126,7 @@ def S2_asl_r_p_xor : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 ^= asl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011011;
@@ -18062,7 +18137,7 @@ def S2_asl_r_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = asl($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110010;
@@ -18073,7 +18148,7 @@ def S2_asl_r_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += asl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100110;
@@ -18086,7 +18161,7 @@ def S2_asl_r_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= asl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100010;
@@ -18099,7 +18174,7 @@ def S2_asl_r_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= asl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100100;
@@ -18112,7 +18187,7 @@ def S2_asl_r_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= asl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100000;
@@ -18125,7 +18200,7 @@ def S2_asl_r_r_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = asl($Rs32,$Rt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_5ab2be {
+tc_779080bf, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110000;
@@ -18138,7 +18213,7 @@ def S2_asl_r_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vaslh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011010;
@@ -18147,7 +18222,7 @@ def S2_asl_r_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vaslw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011000;
@@ -18156,7 +18231,7 @@ def S2_asr_i_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = asr($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b10000000000;
 }
@@ -18164,7 +18239,7 @@ def S2_asr_i_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 += asr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -18174,7 +18249,7 @@ def S2_asr_i_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 &= asr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -18184,7 +18259,7 @@ def S2_asr_i_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 -= asr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -18194,7 +18269,7 @@ def S2_asr_i_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 |= asr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -18204,7 +18279,7 @@ def S2_asr_i_p_rnd : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = asr($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_5eac98 {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b10000000110;
 let prefersSlot3 = 1;
@@ -18213,14 +18288,14 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = asrrnd($Rss32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
 let isPseudo = 1;
 }
 def S2_asr_i_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = asr($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100000;
@@ -18231,7 +18306,7 @@ def S2_asr_i_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 += asr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -18244,7 +18319,7 @@ def S2_asr_i_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 &= asr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -18257,7 +18332,7 @@ def S2_asr_i_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 -= asr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -18270,7 +18345,7 @@ def S2_asr_i_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 |= asr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -18283,7 +18358,7 @@ def S2_asr_i_r_rnd : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = asr($Rs32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_a05677 {
+tc_002cb246, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100010;
@@ -18295,7 +18370,7 @@ def S2_asr_i_r_rnd_goodsyntax : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = asrrnd($Rs32,#$Ii)",
-tc_2b6f77c6, TypeS_2op> {
+tc_002cb246, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -18304,7 +18379,7 @@ def S2_asr_i_svw_trun : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
 "$Rd32 = vasrw($Rss32,#$Ii)",
-tc_1b9c9ee5, TypeS_2op>, Enc_8dec2e {
+tc_4414d8b1, TypeS_2op>, Enc_8dec2e {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001000110;
@@ -18316,7 +18391,7 @@ def S2_asr_i_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vasrh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
 let Inst{7-5} = 0b000;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10000000100;
@@ -18325,7 +18400,7 @@ def S2_asr_i_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
 "$Rdd32 = vasrw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000000010;
@@ -18334,7 +18409,7 @@ def S2_asr_r_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = asr($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011100;
@@ -18343,7 +18418,7 @@ def S2_asr_r_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 += asr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011110;
@@ -18354,7 +18429,7 @@ def S2_asr_r_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 &= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011010;
@@ -18365,7 +18440,7 @@ def S2_asr_r_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 -= asr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011100;
@@ -18376,7 +18451,7 @@ def S2_asr_r_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 |= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011000;
@@ -18387,7 +18462,7 @@ def S2_asr_r_p_xor : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 ^= asr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011011;
@@ -18398,7 +18473,7 @@ def S2_asr_r_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = asr($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110010;
@@ -18409,7 +18484,7 @@ def S2_asr_r_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += asr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100110;
@@ -18422,7 +18497,7 @@ def S2_asr_r_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= asr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100010;
@@ -18435,7 +18510,7 @@ def S2_asr_r_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= asr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100100;
@@ -18448,7 +18523,7 @@ def S2_asr_r_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= asr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100000;
@@ -18461,7 +18536,7 @@ def S2_asr_r_r_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = asr($Rs32,$Rt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_5ab2be {
+tc_779080bf, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110000;
@@ -18474,7 +18549,7 @@ def S2_asr_r_svw_trun : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = vasrw($Rss32,$Rt32)",
-tc_1b9c9ee5, TypeS_3op>, Enc_3d5b28 {
+tc_4414d8b1, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@@ -18486,7 +18561,7 @@ def S2_asr_r_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vasrh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011010;
@@ -18495,7 +18570,7 @@ def S2_asr_r_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vasrw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011000;
@@ -18504,7 +18579,7 @@ def S2_brev : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = brev($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10001100010;
 let hasNewValue = 1;
@@ -18515,7 +18590,7 @@ def S2_brevp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = brev($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000000110;
 let prefersSlot3 = 1;
@@ -18524,7 +18599,7 @@ def S2_cabacdecbin : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = decbin($Rss32,$Rtt32)",
-tc_c6ebf8dd, TypeS_3op>, Enc_a56825 {
+tc_76851da1, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001110;
@@ -18536,7 +18611,7 @@ def S2_cl0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = cl0($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10001100000;
 let hasNewValue = 1;
@@ -18547,7 +18622,7 @@ def S2_cl0p : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = cl0($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10001000010;
 let hasNewValue = 1;
@@ -18558,7 +18633,7 @@ def S2_cl1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = cl1($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10001100000;
 let hasNewValue = 1;
@@ -18569,7 +18644,7 @@ def S2_cl1p : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = cl1($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001000010;
 let hasNewValue = 1;
@@ -18580,7 +18655,7 @@ def S2_clb : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = clb($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001100000;
 let hasNewValue = 1;
@@ -18591,7 +18666,7 @@ def S2_clbnorm : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = normamt($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10001100000;
 let hasNewValue = 1;
@@ -18602,7 +18677,7 @@ def S2_clbp : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = clb($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001000010;
 let hasNewValue = 1;
@@ -18613,7 +18688,7 @@ def S2_clrbit_i : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = clrbit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100110;
@@ -18624,7 +18699,7 @@ def S2_clrbit_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = clrbit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110100;
@@ -18635,7 +18710,7 @@ def S2_ct0 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = ct0($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001100010;
 let hasNewValue = 1;
@@ -18646,7 +18721,7 @@ def S2_ct0p : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = ct0($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10001000111;
 let hasNewValue = 1;
@@ -18657,7 +18732,7 @@ def S2_ct1 : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = ct1($Rs32)",
-tc_d088982c, TypeS_2op>, Enc_5e2823 {
+tc_14b5c689, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10001100010;
 let hasNewValue = 1;
@@ -18668,7 +18743,7 @@ def S2_ct1p : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = ct1($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001000111;
 let hasNewValue = 1;
@@ -18679,7 +18754,7 @@ def S2_deinterleave : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = deinterleave($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000000110;
 let prefersSlot3 = 1;
@@ -18688,7 +18763,7 @@ def S2_extractu : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
 "$Rd32 = extractu($Rs32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b388cf {
+tc_f675fee8, TypeS_2op>, Enc_b388cf {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b100011010;
 let hasNewValue = 1;
@@ -18699,7 +18774,7 @@ def S2_extractu_rp : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "$Rd32 = extractu($Rs32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_e07374 {
+tc_002cb246, TypeS_3op>, Enc_e07374 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001001000;
@@ -18711,7 +18786,7 @@ def S2_extractup : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
 "$Rdd32 = extractu($Rss32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b84c4c {
+tc_f675fee8, TypeS_2op>, Enc_b84c4c {
 let Inst{31-24} = 0b10000001;
 let prefersSlot3 = 1;
 }
@@ -18719,7 +18794,7 @@ def S2_extractup_rp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = extractu($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001000;
@@ -18729,7 +18804,7 @@ def S2_insert : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
 "$Rx32 = insert($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op>, Enc_a1e29d {
+tc_bfec0f01, TypeS_2op>, Enc_a1e29d {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b100011110;
 let hasNewValue = 1;
@@ -18741,7 +18816,7 @@ def S2_insert_rp : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "$Rx32 = insert($Rs32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_179b35 {
+tc_f429765c, TypeS_3op>, Enc_179b35 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001000000;
@@ -18754,7 +18829,7 @@ def S2_insertp : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
 "$Rxx32 = insert($Rss32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op>, Enc_143a3c {
+tc_bfec0f01, TypeS_2op>, Enc_143a3c {
 let Inst{31-24} = 0b10000011;
 let prefersSlot3 = 1;
 let Constraints = "$Rxx32 = $Rxx32in";
@@ -18763,7 +18838,7 @@ def S2_insertp_rp : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 = insert($Rss32,$Rtt32)",
-tc_84df2cd3, TypeS_3op>, Enc_88c16c {
+tc_f429765c, TypeS_3op>, Enc_88c16c {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001010000;
@@ -18774,7 +18849,7 @@ def S2_interleave : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = interleave($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_b9c5fb {
+tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10000000110;
 let prefersSlot3 = 1;
@@ -18783,7 +18858,7 @@ def S2_lfsp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = lfs($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001100;
@@ -18793,7 +18868,7 @@ def S2_lsl_r_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = lsl($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011100;
@@ -18802,7 +18877,7 @@ def S2_lsl_r_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 += lsl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011110;
@@ -18813,7 +18888,7 @@ def S2_lsl_r_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 &= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011010;
@@ -18824,7 +18899,7 @@ def S2_lsl_r_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 -= lsl($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011100;
@@ -18835,7 +18910,7 @@ def S2_lsl_r_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 |= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011000;
@@ -18846,7 +18921,7 @@ def S2_lsl_r_p_xor : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 ^= lsl($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011011;
@@ -18857,7 +18932,7 @@ def S2_lsl_r_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = lsl($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110010;
@@ -18868,7 +18943,7 @@ def S2_lsl_r_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += lsl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100110;
@@ -18881,7 +18956,7 @@ def S2_lsl_r_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= lsl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100010;
@@ -18894,7 +18969,7 @@ def S2_lsl_r_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= lsl($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100100;
@@ -18907,7 +18982,7 @@ def S2_lsl_r_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= lsl($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100000;
@@ -18920,7 +18995,7 @@ def S2_lsl_r_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vlslh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011010;
@@ -18929,7 +19004,7 @@ def S2_lsl_r_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vlslw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011000;
@@ -18938,7 +19013,7 @@ def S2_lsr_i_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = lsr($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_5eac98 {
+tc_946df596, TypeS_2op>, Enc_5eac98 {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b10000000000;
 }
@@ -18946,7 +19021,7 @@ def S2_lsr_i_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 += lsr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -18956,7 +19031,7 @@ def S2_lsr_i_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 &= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -18966,7 +19041,7 @@ def S2_lsr_i_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 -= lsr($Rss32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_70fb07 {
+tc_f675fee8, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -18976,7 +19051,7 @@ def S2_lsr_i_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 |= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -18986,7 +19061,7 @@ def S2_lsr_i_p_xacc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 ^= lsr($Rss32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_70fb07 {
+tc_f429765c, TypeS_2op>, Enc_70fb07 {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b10000010100;
 let prefersSlot3 = 1;
@@ -18996,7 +19071,7 @@ def S2_lsr_i_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = lsr($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100000;
@@ -19007,7 +19082,7 @@ def S2_lsr_i_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 += lsr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -19020,7 +19095,7 @@ def S2_lsr_i_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 &= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -19033,7 +19108,7 @@ def S2_lsr_i_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 -= lsr($Rs32,#$Ii)",
-tc_c74f796f, TypeS_2op>, Enc_28a2dc {
+tc_f675fee8, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -19046,7 +19121,7 @@ def S2_lsr_i_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 |= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -19059,7 +19134,7 @@ def S2_lsr_i_r_xacc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 ^= lsr($Rs32,#$Ii)",
-tc_84df2cd3, TypeS_2op>, Enc_28a2dc {
+tc_f429765c, TypeS_2op>, Enc_28a2dc {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110100;
@@ -19072,7 +19147,7 @@ def S2_lsr_i_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vlsrh($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_12b6e9 {
+tc_946df596, TypeS_2op>, Enc_12b6e9 {
 let Inst{7-5} = 0b001;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10000000100;
@@ -19081,7 +19156,7 @@ def S2_lsr_i_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
 "$Rdd32 = vlsrw($Rss32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_7e5a82 {
+tc_946df596, TypeS_2op>, Enc_7e5a82 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000000010;
@@ -19090,7 +19165,7 @@ def S2_lsr_r_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = lsr($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011100;
@@ -19099,7 +19174,7 @@ def S2_lsr_r_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 += lsr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011110;
@@ -19110,7 +19185,7 @@ def S2_lsr_r_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 &= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011010;
@@ -19121,7 +19196,7 @@ def S2_lsr_r_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 -= lsr($Rss32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_1aa186 {
+tc_f675fee8, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011100;
@@ -19132,7 +19207,7 @@ def S2_lsr_r_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 |= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011000;
@@ -19143,7 +19218,7 @@ def S2_lsr_r_p_xor : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 ^= lsr($Rss32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_1aa186 {
+tc_f429765c, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001011011;
@@ -19154,7 +19229,7 @@ def S2_lsr_r_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = lsr($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110010;
@@ -19165,7 +19240,7 @@ def S2_lsr_r_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += lsr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100110;
@@ -19178,7 +19253,7 @@ def S2_lsr_r_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 &= lsr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100010;
@@ -19191,7 +19266,7 @@ def S2_lsr_r_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= lsr($Rs32,$Rt32)",
-tc_c74f796f, TypeS_3op>, Enc_2ae154 {
+tc_f675fee8, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100100;
@@ -19204,7 +19279,7 @@ def S2_lsr_r_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 |= lsr($Rs32,$Rt32)",
-tc_84df2cd3, TypeS_3op>, Enc_2ae154 {
+tc_f429765c, TypeS_3op>, Enc_2ae154 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001100000;
@@ -19217,7 +19292,7 @@ def S2_lsr_r_vh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vlsrh($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011010;
@@ -19226,16 +19301,28 @@ def S2_lsr_r_vw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vlsrw($Rss32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_927852 {
+tc_946df596, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011000;
 }
+def S2_mask : HInst<
+(outs IntRegs:$Rd32),
+(ins u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = mask(#$Ii,#$II)",
+tc_9461ff31, TypeS_2op>, Enc_c85e2a, Requires<[HasV66]> {
+let Inst{13-13} = 0b1;
+let Inst{20-16} = 0b00000;
+let Inst{31-23} = 0b100011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
 def S2_packhl : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = packhl($Rs32,$Rt32)",
-tc_b9488031, TypeALU32_3op>, Enc_be32a5 {
+tc_5a2711e5, TypeALU32_3op>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11110101100;
@@ -19245,7 +19332,7 @@ def S2_parityp : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = parity($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeALU64>, Enc_d2216a {
+tc_002cb246, TypeALU64>, Enc_d2216a {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010000000;
@@ -19257,7 +19344,7 @@ def S2_pstorerbf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000100000;
 let isPredicated = 1;
@@ -19279,7 +19366,7 @@ def S2_pstorerbf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19297,7 +19384,7 @@ def S2_pstorerbf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4) memb($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19305,7 +19392,7 @@ def S2_pstorerbfnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel {
+tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19324,7 +19411,7 @@ def S2_pstorerbnewf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b01000100101;
@@ -19350,7 +19437,7 @@ def S2_pstorerbnewf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b100;
@@ -19372,7 +19459,7 @@ def S2_pstorerbnewf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if (!$Pv4) memb($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -19381,7 +19468,7 @@ def S2_pstorerbnewfnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b100;
@@ -19404,7 +19491,7 @@ def S2_pstorerbnewt_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b01000000101;
@@ -19429,7 +19516,7 @@ def S2_pstorerbnewt_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b100;
@@ -19450,7 +19537,7 @@ def S2_pstorerbnewt_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if ($Pv4) memb($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -19459,7 +19546,7 @@ def S2_pstorerbnewtnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b100;
@@ -19481,7 +19568,7 @@ def S2_pstorerbt_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000000000;
 let isPredicated = 1;
@@ -19502,7 +19589,7 @@ def S2_pstorerbt_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19519,7 +19606,7 @@ def S2_pstorerbt_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4) memb($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19527,7 +19614,7 @@ def S2_pstorerbtnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel {
+tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19545,7 +19632,7 @@ def S2_pstorerdf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000100110;
 let isPredicated = 1;
@@ -19566,7 +19653,7 @@ def S2_pstorerdf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19584,7 +19671,7 @@ def S2_pstorerdf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "if (!$Pv4) memd($Rs32) = $Rtt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19592,7 +19679,7 @@ def S2_pstorerdfnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19611,7 +19698,7 @@ def S2_pstorerdt_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000000110;
 let isPredicated = 1;
@@ -19631,7 +19718,7 @@ def S2_pstorerdt_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19648,7 +19735,7 @@ def S2_pstorerdt_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "if ($Pv4) memd($Rs32) = $Rtt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19656,7 +19743,7 @@ def S2_pstorerdtnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19674,7 +19761,7 @@ def S2_pstorerff_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000100011;
 let isPredicated = 1;
@@ -19695,7 +19782,7 @@ def S2_pstorerff_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19713,7 +19800,7 @@ def S2_pstorerff_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rs32) = $Rt32.h",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19721,7 +19808,7 @@ def S2_pstorerffnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19740,7 +19827,7 @@ def S2_pstorerft_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000000011;
 let isPredicated = 1;
@@ -19760,7 +19847,7 @@ def S2_pstorerft_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19777,7 +19864,7 @@ def S2_pstorerft_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4) memh($Rs32) = $Rt32.h",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19785,7 +19872,7 @@ def S2_pstorerftnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19803,7 +19890,7 @@ def S2_pstorerhf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000100010;
 let isPredicated = 1;
@@ -19825,7 +19912,7 @@ def S2_pstorerhf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -19843,7 +19930,7 @@ def S2_pstorerhf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -19851,7 +19938,7 @@ def S2_pstorerhfnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -19870,7 +19957,7 @@ def S2_pstorerhnewf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b01000100101;
@@ -19896,7 +19983,7 @@ def S2_pstorerhnewf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b101;
@@ -19918,7 +20005,7 @@ def S2_pstorerhnewf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if (!$Pv4) memh($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -19927,7 +20014,7 @@ def S2_pstorerhnewfnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b101;
@@ -19950,7 +20037,7 @@ def S2_pstorerhnewt_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b01000000101;
@@ -19975,7 +20062,7 @@ def S2_pstorerhnewt_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b101;
@@ -19996,7 +20083,7 @@ def S2_pstorerhnewt_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if ($Pv4) memh($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -20005,7 +20092,7 @@ def S2_pstorerhnewtnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b101;
@@ -20027,7 +20114,7 @@ def S2_pstorerht_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000000010;
 let isPredicated = 1;
@@ -20048,7 +20135,7 @@ def S2_pstorerht_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -20065,7 +20152,7 @@ def S2_pstorerht_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4) memh($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -20073,7 +20160,7 @@ def S2_pstorerhtnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel {
+tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -20091,7 +20178,7 @@ def S2_pstorerif_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000100100;
 let isPredicated = 1;
@@ -20113,7 +20200,7 @@ def S2_pstorerif_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -20131,7 +20218,7 @@ def S2_pstorerif_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4) memw($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -20139,7 +20226,7 @@ def S2_pstorerifnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -20159,7 +20246,7 @@ def S2_pstorerinewf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b01000100101;
@@ -20185,7 +20272,7 @@ def S2_pstorerinewf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b110;
@@ -20207,7 +20294,7 @@ def S2_pstorerinewf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if (!$Pv4) memw($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -20216,7 +20303,7 @@ def S2_pstorerinewfnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b110;
@@ -20239,7 +20326,7 @@ def S2_pstorerinewt_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b01000000101;
@@ -20264,7 +20351,7 @@ def S2_pstorerinewt_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel {
+tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b110;
@@ -20285,7 +20372,7 @@ def S2_pstorerinewt_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if ($Pv4) memw($Rs32) = $Nt8.new",
-tc_594ab548, TypeMAPPING> {
+tc_8fb7ab1b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -20294,7 +20381,7 @@ def S2_pstorerinewtnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel {
+tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b110;
@@ -20316,7 +20403,7 @@ def S2_pstorerit_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000000100;
 let isPredicated = 1;
@@ -20337,7 +20424,7 @@ def S2_pstorerit_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
@@ -20354,7 +20441,7 @@ def S2_pstorerit_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4) memw($Rs32) = $Rt32",
-tc_8b15472a, TypeMAPPING> {
+tc_f8e23f0b, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -20362,7 +20449,7 @@ def S2_pstoreritnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -20380,7 +20467,7 @@ def S2_setbit_i : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = setbit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100110;
@@ -20391,7 +20478,7 @@ def S2_setbit_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = setbit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110100;
@@ -20402,7 +20489,7 @@ def S2_shuffeb : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = shuffeb($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001000;
@@ -20411,7 +20498,7 @@ def S2_shuffeh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = shuffeh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001000;
@@ -20420,7 +20507,7 @@ def S2_shuffob : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = shuffob($Rtt32,$Rss32)",
-tc_540fdfbc, TypeS_3op>, Enc_ea23e4 {
+tc_946df596, TypeS_3op>, Enc_ea23e4 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001000;
@@ -20429,7 +20516,7 @@ def S2_shuffoh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
 "$Rdd32 = shuffoh($Rtt32,$Rss32)",
-tc_540fdfbc, TypeS_3op>, Enc_ea23e4 {
+tc_946df596, TypeS_3op>, Enc_ea23e4 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001100;
@@ -20438,7 +20525,7 @@ def S2_storerb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1000;
 let Inst{31-27} = 0b10100;
 let addrMode = BaseImmOffset;
@@ -20459,9 +20546,10 @@ def S2_storerb_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memb($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101111000;
+let addrMode = PostInc;
 let accessSize = ByteAccess;
 let mayStore = 1;
 let BaseOpcode = "S2_storerb_pbr";
@@ -20472,7 +20560,7 @@ def S2_storerb_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
 "memb($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_b15941, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_b15941, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b10101001000;
@@ -20488,7 +20576,7 @@ def S2_storerb_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memb($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
 let Inst{7-0} = 0b00000010;
 let Inst{31-21} = 0b10101001000;
 let addrMode = PostInc;
@@ -20503,7 +20591,7 @@ def S2_storerb_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
@@ -20521,7 +20609,7 @@ def S2_storerb_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memb($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101101000;
 let addrMode = PostInc;
@@ -20534,7 +20622,7 @@ def S2_storerb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memb($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -20542,7 +20630,7 @@ def S2_storerbgp : HInst<
 (outs),
 (ins u32_0Imm:$Ii, IntRegs:$Rt32),
 "memb(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
 let Inst{24-21} = 0b0000;
 let Inst{31-27} = 0b01001;
 let accessSize = ByteAccess;
@@ -20560,7 +20648,7 @@ def S2_storerbnew_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8),
 "memb($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_4df4e9, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_4df4e9, AddrModeRel {
 let Inst{12-11} = 0b00;
 let Inst{24-21} = 0b1101;
 let Inst{31-27} = 0b10100;
@@ -20585,10 +20673,11 @@ def S2_storerbnew_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memb($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
 let Inst{7-0} = 0b00000000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
 let accessSize = ByteAccess;
 let isNVStore = 1;
 let isNewValue = 1;
@@ -20602,7 +20691,7 @@ def S2_storerbnew_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
 "memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_96ce4f, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_96ce4f, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{12-11} = 0b00;
@@ -20622,7 +20711,7 @@ def S2_storerbnew_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memb($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
 let Inst{7-0} = 0b00000010;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b10101001101;
@@ -20641,7 +20730,7 @@ def S2_storerbnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
 "memb($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_c7cd90, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_c7cd90, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b000;
@@ -20662,7 +20751,7 @@ def S2_storerbnew_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memb($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
 let Inst{7-0} = 0b00000000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b10101101101;
@@ -20679,7 +20768,7 @@ def S2_storerbnew_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Nt8),
 "memb($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 1;
@@ -20688,7 +20777,7 @@ def S2_storerbnewgp : HInst<
 (outs),
 (ins u32_0Imm:$Ii, IntRegs:$Nt8),
 "memb(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
 let Inst{12-11} = 0b00;
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b01001;
@@ -20710,7 +20799,7 @@ def S2_storerd_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "memd($Rs32+#$Ii) = $Rtt32",
-tc_05b6c987, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1110;
 let Inst{31-27} = 0b10100;
 let addrMode = BaseImmOffset;
@@ -20730,9 +20819,10 @@ def S2_storerd_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
 "memd($Rx32++$Mu2:brev) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101111110;
+let addrMode = PostInc;
 let accessSize = DoubleWordAccess;
 let mayStore = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -20741,7 +20831,7 @@ def S2_storerd_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32),
 "memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32",
-tc_9fdb5406, TypeST>, Enc_395cc4 {
+tc_e86aa961, TypeST>, Enc_395cc4 {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b10101001110;
@@ -20755,7 +20845,7 @@ def S2_storerd_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
 "memd($Rx32++I:circ($Mu2)) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
 let Inst{7-0} = 0b00000010;
 let Inst{31-21} = 0b10101001110;
 let addrMode = PostInc;
@@ -20768,7 +20858,7 @@ def S2_storerd_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
 "memd($Rx32++#$Ii) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
@@ -20785,7 +20875,7 @@ def S2_storerd_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
 "memd($Rx32++$Mu2) = $Rtt32",
-tc_f86c328a, TypeST>, Enc_928ca1 {
+tc_da97ee82, TypeST>, Enc_928ca1 {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101101110;
 let addrMode = PostInc;
@@ -20797,7 +20887,7 @@ def S2_storerd_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "memd($Rs32) = $Rtt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -20805,7 +20895,7 @@ def S2_storerdgp : HInst<
 (outs),
 (ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "memd(gp+#$Ii) = $Rtt32",
-tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
 let Inst{24-21} = 0b0110;
 let Inst{31-27} = 0b01001;
 let accessSize = DoubleWordAccess;
@@ -20822,7 +20912,7 @@ def S2_storerf_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) = $Rt32.h",
-tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1011;
 let Inst{31-27} = 0b10100;
 let addrMode = BaseImmOffset;
@@ -20842,9 +20932,10 @@ def S2_storerf_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++$Mu2:brev) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101111011;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayStore = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -20853,7 +20944,7 @@ def S2_storerf_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h",
-tc_9fdb5406, TypeST>, Enc_935d9b {
+tc_e86aa961, TypeST>, Enc_935d9b {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b10101001011;
@@ -20867,7 +20958,7 @@ def S2_storerf_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++I:circ($Mu2)) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
 let Inst{7-0} = 0b00000010;
 let Inst{31-21} = 0b10101001011;
 let addrMode = PostInc;
@@ -20880,7 +20971,7 @@ def S2_storerf_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rx32++#$Ii) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
@@ -20897,7 +20988,7 @@ def S2_storerf_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++$Mu2) = $Rt32.h",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101101011;
 let addrMode = PostInc;
@@ -20909,7 +21000,7 @@ def S2_storerf_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memh($Rs32) = $Rt32.h",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -20917,7 +21008,7 @@ def S2_storerfgp : HInst<
 (outs),
 (ins u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh(gp+#$Ii) = $Rt32.h",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
 let Inst{24-21} = 0b0011;
 let Inst{31-27} = 0b01001;
 let accessSize = HalfWordAccess;
@@ -20934,7 +21025,7 @@ def S2_storerh_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1010;
 let Inst{31-27} = 0b10100;
 let addrMode = BaseImmOffset;
@@ -20955,9 +21046,10 @@ def S2_storerh_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101111010;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let mayStore = 1;
 let BaseOpcode = "S2_storerh_pbr";
@@ -20968,7 +21060,7 @@ def S2_storerh_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_935d9b, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_935d9b, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b10101001010;
@@ -20984,7 +21076,7 @@ def S2_storerh_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
 let Inst{7-0} = 0b00000010;
 let Inst{31-21} = 0b10101001010;
 let addrMode = PostInc;
@@ -20999,7 +21091,7 @@ def S2_storerh_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
@@ -21017,7 +21109,7 @@ def S2_storerh_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memh($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101101010;
 let addrMode = PostInc;
@@ -21030,7 +21122,7 @@ def S2_storerh_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memh($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -21038,7 +21130,7 @@ def S2_storerhgp : HInst<
 (outs),
 (ins u31_1Imm:$Ii, IntRegs:$Rt32),
 "memh(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
 let Inst{24-21} = 0b0010;
 let Inst{31-27} = 0b01001;
 let accessSize = HalfWordAccess;
@@ -21056,7 +21148,7 @@ def S2_storerhnew_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8),
 "memh($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_0d8870, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_0d8870, AddrModeRel {
 let Inst{12-11} = 0b01;
 let Inst{24-21} = 0b1101;
 let Inst{31-27} = 0b10100;
@@ -21081,10 +21173,11 @@ def S2_storerhnew_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memh($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
 let Inst{7-0} = 0b00000000;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
 let accessSize = HalfWordAccess;
 let isNVStore = 1;
 let isNewValue = 1;
@@ -21098,7 +21191,7 @@ def S2_storerhnew_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
 "memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_91b9fe, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_91b9fe, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{12-11} = 0b01;
@@ -21118,7 +21211,7 @@ def S2_storerhnew_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memh($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
 let Inst{7-0} = 0b00000010;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b10101001101;
@@ -21137,7 +21230,7 @@ def S2_storerhnew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
 "memh($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_e26546, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_e26546, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b001;
@@ -21158,7 +21251,7 @@ def S2_storerhnew_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memh($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
 let Inst{7-0} = 0b00000000;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b10101101101;
@@ -21175,7 +21268,7 @@ def S2_storerhnew_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Nt8),
 "memh($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 1;
@@ -21184,7 +21277,7 @@ def S2_storerhnewgp : HInst<
 (outs),
 (ins u31_1Imm:$Ii, IntRegs:$Nt8),
 "memh(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
 let Inst{12-11} = 0b01;
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b01001;
@@ -21206,7 +21299,7 @@ def S2_storeri_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+#$Ii) = $Rt32",
-tc_05b6c987, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
+tc_30b9bb4a, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
 let Inst{24-21} = 0b1100;
 let Inst{31-27} = 0b10100;
 let addrMode = BaseImmOffset;
@@ -21227,9 +21320,10 @@ def S2_storeri_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memw($Rx32++$Mu2:brev) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101111100;
+let addrMode = PostInc;
 let accessSize = WordAccess;
 let mayStore = 1;
 let BaseOpcode = "S2_storeri_pbr";
@@ -21240,7 +21334,7 @@ def S2_storeri_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
 "memw($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_9fdb5406, TypeST>, Enc_79b8c8, AddrModeRel {
+tc_e86aa961, TypeST>, Enc_79b8c8, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b10101001100;
@@ -21256,7 +21350,7 @@ def S2_storeri_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memw($Rx32++I:circ($Mu2)) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
 let Inst{7-0} = 0b00000010;
 let Inst{31-21} = 0b10101001100;
 let addrMode = PostInc;
@@ -21271,7 +21365,7 @@ def S2_storeri_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rx32++#$Ii) = $Rt32",
-tc_f86c328a, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
+tc_da97ee82, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
@@ -21289,7 +21383,7 @@ def S2_storeri_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
 "memw($Rx32++$Mu2) = $Rt32",
-tc_f86c328a, TypeST>, Enc_d5c73f {
+tc_da97ee82, TypeST>, Enc_d5c73f {
 let Inst{7-0} = 0b00000000;
 let Inst{31-21} = 0b10101101100;
 let addrMode = PostInc;
@@ -21302,7 +21396,7 @@ def S2_storeri_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memw($Rs32) = $Rt32",
-tc_05b6c987, TypeMAPPING> {
+tc_30b9bb4a, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -21310,7 +21404,7 @@ def S2_storerigp : HInst<
 (outs),
 (ins u30_2Imm:$Ii, IntRegs:$Rt32),
 "memw(gp+#$Ii) = $Rt32",
-tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
 let Inst{24-21} = 0b0100;
 let Inst{31-27} = 0b01001;
 let accessSize = WordAccess;
@@ -21328,7 +21422,7 @@ def S2_storerinew_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8),
 "memw($Rs32+#$Ii) = $Nt8.new",
-tc_f7dd9c9f, TypeST>, Enc_690862, AddrModeRel {
+tc_be9602ff, TypeST>, Enc_690862, AddrModeRel {
 let Inst{12-11} = 0b10;
 let Inst{24-21} = 0b1101;
 let Inst{31-27} = 0b10100;
@@ -21353,10 +21447,11 @@ def S2_storerinew_pbr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memw($Rx32++$Mu2:brev) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
 let Inst{7-0} = 0b00000000;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b10101111101;
+let addrMode = PostInc;
 let accessSize = WordAccess;
 let isNVStore = 1;
 let isNewValue = 1;
@@ -21370,7 +21465,7 @@ def S2_storerinew_pci : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
 "memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_9d5941c7, TypeST>, Enc_3f97c8, AddrModeRel {
+tc_d5c0729a, TypeST>, Enc_3f97c8, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{12-11} = 0b10;
@@ -21390,7 +21485,7 @@ def S2_storerinew_pcr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memw($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
 let Inst{7-0} = 0b00000010;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b10101001101;
@@ -21409,7 +21504,7 @@ def S2_storerinew_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
 "memw($Rx32++#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_223005, AddrModeRel {
+tc_c79a189f, TypeST>, Enc_223005, AddrModeRel {
 let Inst{2-0} = 0b000;
 let Inst{7-7} = 0b0;
 let Inst{13-11} = 0b010;
@@ -21429,7 +21524,7 @@ def S2_storerinew_pr : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
 "memw($Rx32++$Mu2) = $Nt8.new",
-tc_e7d02c66, TypeST>, Enc_8dbe85 {
+tc_c79a189f, TypeST>, Enc_8dbe85 {
 let Inst{7-0} = 0b00000000;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b10101101101;
@@ -21446,7 +21541,7 @@ def S2_storerinew_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Nt8),
 "memw($Rs32) = $Nt8.new",
-tc_f7dd9c9f, TypeMAPPING> {
+tc_be9602ff, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 1;
@@ -21455,7 +21550,7 @@ def S2_storerinewgp : HInst<
 (outs),
 (ins u30_2Imm:$Ii, IntRegs:$Nt8),
 "memw(gp+#$Ii) = $Nt8.new",
-tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
 let Inst{12-11} = 0b10;
 let Inst{24-21} = 0b0101;
 let Inst{31-27} = 0b01001;
@@ -21477,7 +21572,7 @@ def S2_storew_locked : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "memw_locked($Rs32,$Pd4) = $Rt32",
-tc_1372bca1, TypeST>, Enc_c2b48e {
+tc_5abb5e3f, TypeST>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10100000101;
@@ -21490,7 +21585,7 @@ def S2_svsathb : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = vsathb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001100100;
 let hasNewValue = 1;
@@ -21501,7 +21596,7 @@ def S2_svsathub : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = vsathub($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10001100100;
 let hasNewValue = 1;
@@ -21512,7 +21607,7 @@ def S2_tableidxb : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
 "$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
 let Inst{31-22} = 0b1000011100;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -21523,7 +21618,7 @@ def S2_tableidxb_goodsyntax : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
 "$Rx32 = tableidxb($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -21534,7 +21629,7 @@ def S2_tableidxd : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
 "$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
 let Inst{31-22} = 0b1000011111;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -21545,7 +21640,7 @@ def S2_tableidxd_goodsyntax : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
 "$Rx32 = tableidxd($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -21555,7 +21650,7 @@ def S2_tableidxh : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
 "$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
 let Inst{31-22} = 0b1000011101;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -21566,7 +21661,7 @@ def S2_tableidxh_goodsyntax : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
 "$Rx32 = tableidxh($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -21576,7 +21671,7 @@ def S2_tableidxw : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
 "$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw",
-tc_87735c3b, TypeS_2op>, Enc_cd82bc {
+tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
 let Inst{31-22} = 0b1000011110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -21587,7 +21682,7 @@ def S2_tableidxw_goodsyntax : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
 "$Rx32 = tableidxw($Rs32,#$Ii,#$II)",
-tc_87735c3b, TypeS_2op> {
+tc_bfec0f01, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -21597,7 +21692,7 @@ def S2_togglebit_i : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = togglebit($Rs32,#$Ii)",
-tc_540fdfbc, TypeS_2op>, Enc_a05677 {
+tc_946df596, TypeS_2op>, Enc_a05677 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100110;
@@ -21608,7 +21703,7 @@ def S2_togglebit_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = togglebit($Rs32,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_5ab2be {
+tc_946df596, TypeS_3op>, Enc_5ab2be {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110100;
@@ -21619,7 +21714,7 @@ def S2_tstbit_i : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Pd4 = tstbit($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64 {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000101000;
@@ -21628,7 +21723,7 @@ def S2_tstbit_r : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = tstbit($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111000;
@@ -21637,7 +21732,7 @@ def S2_valignib : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii),
 "$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)",
-tc_f8eeed7a, TypeS_3op>, Enc_729ff7 {
+tc_b4b5c03a, TypeS_3op>, Enc_729ff7 {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000000000;
 }
@@ -21645,7 +21740,7 @@ def S2_valignrb : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4),
 "$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)",
-tc_f8eeed7a, TypeS_3op>, Enc_8c6530 {
+tc_b4b5c03a, TypeS_3op>, Enc_8c6530 {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000010000;
@@ -21654,7 +21749,7 @@ def S2_vcnegh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vcnegh($Rss32,$Rt32)",
-tc_b44c6e2a, TypeS_3op>, Enc_927852 {
+tc_779080bf, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011110;
@@ -21665,7 +21760,7 @@ def S2_vcrotate : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rdd32 = vcrotate($Rss32,$Rt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_927852 {
+tc_002cb246, TypeS_3op>, Enc_927852 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000011110;
@@ -21676,7 +21771,7 @@ def S2_vrcnegh : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rxx32 += vrcnegh($Rss32,$Rt32)",
-tc_e913dc32, TypeS_3op>, Enc_1aa186 {
+tc_d773585a, TypeS_3op>, Enc_1aa186 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b11001011001;
@@ -21687,7 +21782,7 @@ def S2_vrndpackwh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vrndwh($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001000100;
 let hasNewValue = 1;
@@ -21698,7 +21793,7 @@ def S2_vrndpackwhs : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vrndwh($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b {
+tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10001000100;
 let hasNewValue = 1;
@@ -21710,7 +21805,7 @@ def S2_vsathb : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vsathb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10001000000;
 let hasNewValue = 1;
@@ -21721,7 +21816,7 @@ def S2_vsathb_nopack : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vsathb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10000000000;
 let Defs = [USR_OVF];
@@ -21730,7 +21825,7 @@ def S2_vsathub : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vsathub($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001000000;
 let hasNewValue = 1;
@@ -21741,7 +21836,7 @@ def S2_vsathub_nopack : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vsathub($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000000000;
 let Defs = [USR_OVF];
@@ -21750,7 +21845,7 @@ def S2_vsatwh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vsatwh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10001000000;
 let hasNewValue = 1;
@@ -21761,7 +21856,7 @@ def S2_vsatwh_nopack : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vsatwh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000000000;
 let Defs = [USR_OVF];
@@ -21770,7 +21865,7 @@ def S2_vsatwuh : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vsatwuh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10001000000;
 let hasNewValue = 1;
@@ -21781,7 +21876,7 @@ def S2_vsatwuh_nopack : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = vsatwuh($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_b9c5fb {
+tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10000000000;
 let Defs = [USR_OVF];
@@ -21790,7 +21885,7 @@ def S2_vsplatrb : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = vsplatb($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_5e2823 {
+tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10001100010;
 let hasNewValue = 1;
@@ -21802,7 +21897,7 @@ def S2_vsplatrh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = vsplath($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10000100010;
 let isReMaterializable = 1;
@@ -21812,7 +21907,7 @@ def S2_vspliceib : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii),
 "$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)",
-tc_f8eeed7a, TypeS_3op>, Enc_d50cd3 {
+tc_b4b5c03a, TypeS_3op>, Enc_d50cd3 {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000000100;
 }
@@ -21820,7 +21915,7 @@ def S2_vsplicerb : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4),
 "$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)",
-tc_f8eeed7a, TypeS_3op>, Enc_dbd70c {
+tc_b4b5c03a, TypeS_3op>, Enc_dbd70c {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000010100;
@@ -21829,7 +21924,7 @@ def S2_vsxtbh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = vsxtbh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10000100000;
 let isReMaterializable = 1;
@@ -21839,7 +21934,7 @@ def S2_vsxthw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = vsxthw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000100000;
 let isReMaterializable = 1;
@@ -21849,7 +21944,7 @@ def S2_vtrunehb : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vtrunehb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10001000100;
 let hasNewValue = 1;
@@ -21859,7 +21954,7 @@ def S2_vtrunewh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vtrunewh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001100;
@@ -21868,7 +21963,7 @@ def S2_vtrunohb : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = vtrunohb($Rss32)",
-tc_cde8b071, TypeS_2op>, Enc_90cd8b {
+tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001000100;
 let hasNewValue = 1;
@@ -21878,7 +21973,7 @@ def S2_vtrunowh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vtrunowh($Rss32,$Rtt32)",
-tc_540fdfbc, TypeS_3op>, Enc_a56825 {
+tc_946df596, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001100;
@@ -21887,7 +21982,7 @@ def S2_vzxtbh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = vzxtbh($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10000100000;
 let isReMaterializable = 1;
@@ -21897,7 +21992,7 @@ def S2_vzxthw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = vzxthw($Rs32)",
-tc_cde8b071, TypeS_2op>, Enc_3a3d62 {
+tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000100000;
 let isReMaterializable = 1;
@@ -21907,7 +22002,7 @@ def S4_addaddi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii),
 "$Rd32 = add($Rs32,add($Ru32,#$Ii))",
-tc_c74f796f, TypeALU64>, Enc_8b8d61 {
+tc_f675fee8, TypeALU64>, Enc_8b8d61 {
 let Inst{31-23} = 0b110110110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -21922,7 +22017,7 @@ def S4_addi_asl_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = add(#$Ii,asl($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b100;
 let Inst{4-4} = 0b0;
 let Inst{31-24} = 0b11011110;
@@ -21940,7 +22035,7 @@ def S4_addi_lsr_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = add(#$Ii,lsr($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b100;
 let Inst{4-4} = 0b1;
 let Inst{31-24} = 0b11011110;
@@ -21958,7 +22053,7 @@ def S4_andi_asl_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = and(#$Ii,asl($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b000;
 let Inst{4-4} = 0b0;
 let Inst{31-24} = 0b11011110;
@@ -21976,7 +22071,7 @@ def S4_andi_lsr_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = and(#$Ii,lsr($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b000;
 let Inst{4-4} = 0b1;
 let Inst{31-24} = 0b11011110;
@@ -21994,7 +22089,7 @@ def S4_clbaddi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s6_0Imm:$Ii),
 "$Rd32 = add(clb($Rs32),#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_9fae8a {
+tc_002cb246, TypeS_2op>, Enc_9fae8a {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b10001100001;
 let hasNewValue = 1;
@@ -22005,7 +22100,7 @@ def S4_clbpaddi : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, s6_0Imm:$Ii),
 "$Rd32 = add(clb($Rss32),#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Enc_a1640c {
+tc_002cb246, TypeS_2op>, Enc_a1640c {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b10001000011;
 let hasNewValue = 1;
@@ -22016,7 +22111,7 @@ def S4_clbpnorm : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = normamt($Rss32)",
-tc_d088982c, TypeS_2op>, Enc_90cd8b {
+tc_14b5c689, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001000011;
 let hasNewValue = 1;
@@ -22027,7 +22122,7 @@ def S4_extract : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
 "$Rd32 = extract($Rs32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b388cf {
+tc_f675fee8, TypeS_2op>, Enc_b388cf {
 let Inst{13-13} = 0b0;
 let Inst{31-23} = 0b100011011;
 let hasNewValue = 1;
@@ -22038,7 +22133,7 @@ def S4_extract_rp : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "$Rd32 = extract($Rs32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_e07374 {
+tc_002cb246, TypeS_3op>, Enc_e07374 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11001001000;
@@ -22050,7 +22145,7 @@ def S4_extractp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
 "$Rdd32 = extract($Rss32,#$Ii,#$II)",
-tc_c74f796f, TypeS_2op>, Enc_b84c4c {
+tc_f675fee8, TypeS_2op>, Enc_b84c4c {
 let Inst{31-24} = 0b10001010;
 let prefersSlot3 = 1;
 }
@@ -22058,7 +22153,7 @@ def S4_extractp_rp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = extract($Rss32,$Rtt32)",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001110;
@@ -22068,7 +22163,7 @@ def S4_lsli : HInst<
 (outs IntRegs:$Rd32),
 (ins s6_0Imm:$Ii, IntRegs:$Rt32),
 "$Rd32 = lsl(#$Ii,$Rt32)",
-tc_540fdfbc, TypeS_3op>, Enc_fef969 {
+tc_946df596, TypeS_3op>, Enc_fef969 {
 let Inst{7-6} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000110100;
@@ -22079,7 +22174,7 @@ def S4_ntstbit_i : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Pd4 = !tstbit($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64 {
+tc_643b4717, TypeS_2op>, Enc_83ee64 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000101001;
@@ -22088,7 +22183,7 @@ def S4_ntstbit_r : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = !tstbit($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e {
+tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111001;
@@ -22097,7 +22192,7 @@ def S4_or_andi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rx32 |= and($Rs32,#$Ii)",
-tc_84df2cd3, TypeALU64>, Enc_b0e9d8 {
+tc_f429765c, TypeALU64>, Enc_b0e9d8 {
 let Inst{31-22} = 0b1101101000;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -22114,7 +22209,7 @@ def S4_or_andix : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii),
 "$Rx32 = or($Ru32,and($Rx32in,#$Ii))",
-tc_84df2cd3, TypeALU64>, Enc_b4e6cf {
+tc_f429765c, TypeALU64>, Enc_b4e6cf {
 let Inst{31-22} = 0b1101101001;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -22130,7 +22225,7 @@ def S4_or_ori : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rx32 |= or($Rs32,#$Ii)",
-tc_84df2cd3, TypeALU64>, Enc_b0e9d8 {
+tc_f429765c, TypeALU64>, Enc_b0e9d8 {
 let Inst{31-22} = 0b1101101010;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -22147,7 +22242,7 @@ def S4_ori_asl_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = or(#$Ii,asl($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b010;
 let Inst{4-4} = 0b0;
 let Inst{31-24} = 0b11011110;
@@ -22165,7 +22260,7 @@ def S4_ori_lsr_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = or(#$Ii,lsr($Rx32in,#$II))",
-tc_84df2cd3, TypeALU64>, Enc_c31910 {
+tc_f429765c, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b010;
 let Inst{4-4} = 0b1;
 let Inst{31-24} = 0b11011110;
@@ -22183,7 +22278,7 @@ def S4_parity : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = parity($Rs32,$Rt32)",
-tc_2b6f77c6, TypeALU64>, Enc_5ab2be {
+tc_002cb246, TypeALU64>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101111;
@@ -22195,7 +22290,7 @@ def S4_pstorerbf_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memb(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -22220,7 +22315,7 @@ def S4_pstorerbf_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110101000;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -22236,7 +22331,7 @@ def S4_pstorerbfnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memb(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -22262,7 +22357,7 @@ def S4_pstorerbfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000110000;
 let isPredicated = 1;
@@ -22285,7 +22380,7 @@ def S4_pstorerbfnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110111000;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -22302,7 +22397,7 @@ def S4_pstorerbfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4.new) memb($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -22310,7 +22405,7 @@ def S4_pstorerbnewf_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memb(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b000;
@@ -22338,7 +22433,7 @@ def S4_pstorerbnewf_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b00;
 let Inst{31-21} = 0b00110101101;
 let isPredicated = 1;
@@ -22358,7 +22453,7 @@ def S4_pstorerbnewfnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b100;
@@ -22387,7 +22482,7 @@ def S4_pstorerbnewfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b01000110101;
@@ -22414,7 +22509,7 @@ def S4_pstorerbnewfnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b00;
 let Inst{31-21} = 0b00110111101;
 let isPredicated = 1;
@@ -22435,7 +22530,7 @@ def S4_pstorerbnewfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if (!$Pv4.new) memb($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -22444,7 +22539,7 @@ def S4_pstorerbnewt_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memb(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b000;
@@ -22471,7 +22566,7 @@ def S4_pstorerbnewt_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b00;
 let Inst{31-21} = 0b00110100101;
 let isPredicated = 1;
@@ -22490,7 +22585,7 @@ def S4_pstorerbnewtnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b100;
@@ -22518,7 +22613,7 @@ def S4_pstorerbnewtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b01000010101;
@@ -22544,7 +22639,7 @@ def S4_pstorerbnewtnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b00;
 let Inst{31-21} = 0b00110110101;
 let isPredicated = 1;
@@ -22564,7 +22659,7 @@ def S4_pstorerbnewtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if ($Pv4.new) memb($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -22573,7 +22668,7 @@ def S4_pstorerbt_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memb(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -22597,7 +22692,7 @@ def S4_pstorerbt_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110100000;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -22612,7 +22707,7 @@ def S4_pstorerbtnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memb(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -22637,7 +22732,7 @@ def S4_pstorerbtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000010000;
 let isPredicated = 1;
@@ -22659,7 +22754,7 @@ def S4_pstorerbtnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110110000;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -22675,7 +22770,7 @@ def S4_pstorerbtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4.new) memb($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -22683,7 +22778,7 @@ def S4_pstorerdf_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4) memd(#$Ii) = $Rtt32",
-tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -22707,7 +22802,7 @@ def S4_pstorerdf_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
 let Inst{31-21} = 0b00110101110;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -22722,7 +22817,7 @@ def S4_pstorerdfnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4.new) memd(#$Ii) = $Rtt32",
-tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -22747,7 +22842,7 @@ def S4_pstorerdfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000110110;
 let isPredicated = 1;
@@ -22769,7 +22864,7 @@ def S4_pstorerdfnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
 let Inst{31-21} = 0b00110111110;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -22785,7 +22880,7 @@ def S4_pstorerdfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "if (!$Pv4.new) memd($Rs32) = $Rtt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -22793,7 +22888,7 @@ def S4_pstorerdt_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4) memd(#$Ii) = $Rtt32",
-tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -22816,7 +22911,7 @@ def S4_pstorerdt_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
 let Inst{31-21} = 0b00110100110;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -22830,7 +22925,7 @@ def S4_pstorerdtnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4.new) memd(#$Ii) = $Rtt32",
-tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -22854,7 +22949,7 @@ def S4_pstorerdtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000010110;
 let isPredicated = 1;
@@ -22875,7 +22970,7 @@ def S4_pstorerdtnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
 "if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
 let Inst{31-21} = 0b00110110110;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -22890,7 +22985,7 @@ def S4_pstorerdtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "if ($Pv4.new) memd($Rs32) = $Rtt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -22898,7 +22993,7 @@ def S4_pstorerff_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh(#$Ii) = $Rt32.h",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -22922,7 +23017,7 @@ def S4_pstorerff_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110101011;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -22937,7 +23032,7 @@ def S4_pstorerffnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -22962,7 +23057,7 @@ def S4_pstorerffnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000110011;
 let isPredicated = 1;
@@ -22984,7 +23079,7 @@ def S4_pstorerffnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110111011;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -23000,7 +23095,7 @@ def S4_pstorerffnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rs32) = $Rt32.h",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -23008,7 +23103,7 @@ def S4_pstorerft_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh(#$Ii) = $Rt32.h",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -23031,7 +23126,7 @@ def S4_pstorerft_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110100011;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -23045,7 +23140,7 @@ def S4_pstorerftnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -23069,7 +23164,7 @@ def S4_pstorerftnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000010011;
 let isPredicated = 1;
@@ -23090,7 +23185,7 @@ def S4_pstorerftnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110110011;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -23105,7 +23200,7 @@ def S4_pstorerftnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rs32) = $Rt32.h",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -23113,7 +23208,7 @@ def S4_pstorerhf_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -23138,7 +23233,7 @@ def S4_pstorerhf_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110101010;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -23154,7 +23249,7 @@ def S4_pstorerhfnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -23180,7 +23275,7 @@ def S4_pstorerhfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000110010;
 let isPredicated = 1;
@@ -23203,7 +23298,7 @@ def S4_pstorerhfnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110111010;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -23220,7 +23315,7 @@ def S4_pstorerhfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4.new) memh($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -23228,7 +23323,7 @@ def S4_pstorerhnewf_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memh(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b001;
@@ -23256,7 +23351,7 @@ def S4_pstorerhnewf_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b01;
 let Inst{31-21} = 0b00110101101;
 let isPredicated = 1;
@@ -23276,7 +23371,7 @@ def S4_pstorerhnewfnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b101;
@@ -23305,7 +23400,7 @@ def S4_pstorerhnewfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b01000110101;
@@ -23332,7 +23427,7 @@ def S4_pstorerhnewfnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b01;
 let Inst{31-21} = 0b00110111101;
 let isPredicated = 1;
@@ -23353,7 +23448,7 @@ def S4_pstorerhnewfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if (!$Pv4.new) memh($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -23362,7 +23457,7 @@ def S4_pstorerhnewt_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memh(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b001;
@@ -23389,7 +23484,7 @@ def S4_pstorerhnewt_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b01;
 let Inst{31-21} = 0b00110100101;
 let isPredicated = 1;
@@ -23408,7 +23503,7 @@ def S4_pstorerhnewtnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b101;
@@ -23436,7 +23531,7 @@ def S4_pstorerhnewtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b01000010101;
@@ -23462,7 +23557,7 @@ def S4_pstorerhnewtnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b01;
 let Inst{31-21} = 0b00110110101;
 let isPredicated = 1;
@@ -23482,7 +23577,7 @@ def S4_pstorerhnewtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if ($Pv4.new) memh($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -23491,7 +23586,7 @@ def S4_pstorerht_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -23515,7 +23610,7 @@ def S4_pstorerht_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110100010;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -23530,7 +23625,7 @@ def S4_pstorerhtnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -23555,7 +23650,7 @@ def S4_pstorerhtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000010010;
 let isPredicated = 1;
@@ -23577,7 +23672,7 @@ def S4_pstorerhtnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110110010;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -23593,7 +23688,7 @@ def S4_pstorerhtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4.new) memh($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -23601,7 +23696,7 @@ def S4_pstorerif_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memw(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -23626,7 +23721,7 @@ def S4_pstorerif_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110101100;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -23642,7 +23737,7 @@ def S4_pstorerifnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memw(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -23668,7 +23763,7 @@ def S4_pstorerifnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000110100;
 let isPredicated = 1;
@@ -23691,7 +23786,7 @@ def S4_pstorerifnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110111100;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -23708,7 +23803,7 @@ def S4_pstorerifnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if (!$Pv4.new) memw($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -23716,7 +23811,7 @@ def S4_pstorerinewf_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memw(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b010;
@@ -23744,7 +23839,7 @@ def S4_pstorerinewf_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b10;
 let Inst{31-21} = 0b00110101101;
 let isPredicated = 1;
@@ -23764,7 +23859,7 @@ def S4_pstorerinewfnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b1;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b110;
@@ -23793,7 +23888,7 @@ def S4_pstorerinewfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b01000110101;
@@ -23820,7 +23915,7 @@ def S4_pstorerinewfnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b10;
 let Inst{31-21} = 0b00110111101;
 let isPredicated = 1;
@@ -23841,7 +23936,7 @@ def S4_pstorerinewfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if (!$Pv4.new) memw($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -23850,7 +23945,7 @@ def S4_pstorerinewt_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memw(#$Ii) = $Nt8.new",
-tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel {
+tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b010;
@@ -23877,7 +23972,7 @@ def S4_pstorerinewt_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b10;
 let Inst{31-21} = 0b00110100101;
 let isPredicated = 1;
@@ -23896,7 +23991,7 @@ def S4_pstorerinewtnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-11} = 0b110;
@@ -23924,7 +24019,7 @@ def S4_pstorerinewtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b01000010101;
@@ -23950,7 +24045,7 @@ def S4_pstorerinewtnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
 let Inst{4-3} = 0b10;
 let Inst{31-21} = 0b00110110101;
 let isPredicated = 1;
@@ -23970,7 +24065,7 @@ def S4_pstorerinewtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
 "if ($Pv4.new) memw($Rs32) = $Nt8.new",
-tc_e7d02c66, TypeMAPPING> {
+tc_c79a189f, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 let opNewValue = 2;
@@ -23979,7 +24074,7 @@ def S4_pstorerit_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memw(#$Ii) = $Rt32",
-tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
@@ -24003,7 +24098,7 @@ def S4_pstorerit_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel {
+tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110100100;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -24018,7 +24113,7 @@ def S4_pstoreritnew_abs : HInst<
 (outs),
 (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memw(#$Ii) = $Rt32",
-tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
@@ -24043,7 +24138,7 @@ def S4_pstoreritnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
 let Inst{2-2} = 0b0;
 let Inst{31-21} = 0b01000010100;
 let isPredicated = 1;
@@ -24065,7 +24160,7 @@ def S4_pstoreritnew_rr : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel {
+tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
 let Inst{31-21} = 0b00110110100;
 let isPredicated = 1;
 let addrMode = BaseRegOffset;
@@ -24081,7 +24176,7 @@ def S4_pstoreritnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
 "if ($Pv4.new) memw($Rs32) = $Rt32",
-tc_f86c328a, TypeMAPPING> {
+tc_da97ee82, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24089,7 +24184,7 @@ def S4_stored_locked : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "memd_locked($Rs32,$Pd4) = $Rtt32",
-tc_1372bca1, TypeST>, Enc_d7dc10 {
+tc_5abb5e3f, TypeST>, Enc_d7dc10 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10100000111;
@@ -24102,7 +24197,7 @@ def S4_storeirb_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
 "memb($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_8203bb, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_8203bb, PredNewRel {
 let Inst{31-21} = 0b00111100000;
 let addrMode = BaseImmOffset;
 let accessSize = ByteAccess;
@@ -24121,7 +24216,7 @@ def S4_storeirb_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, s8_0Imm:$II),
 "memb($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24129,7 +24224,7 @@ def S4_storeirbf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
 "if (!$Pv4) memb($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
 let Inst{31-21} = 0b00111000100;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -24149,7 +24244,7 @@ def S4_storeirbf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if (!$Pv4) memb($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24157,7 +24252,7 @@ def S4_storeirbfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
 "if (!$Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
 let Inst{31-21} = 0b00111001100;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -24178,7 +24273,7 @@ def S4_storeirbfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if (!$Pv4.new) memb($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24186,7 +24281,7 @@ def S4_storeirbt_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
 "if ($Pv4) memb($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
 let Inst{31-21} = 0b00111000000;
 let isPredicated = 1;
 let addrMode = BaseImmOffset;
@@ -24205,7 +24300,7 @@ def S4_storeirbt_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if ($Pv4) memb($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24213,7 +24308,7 @@ def S4_storeirbtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
 "if ($Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
 let Inst{31-21} = 0b00111001000;
 let isPredicated = 1;
 let addrMode = BaseImmOffset;
@@ -24233,7 +24328,7 @@ def S4_storeirbtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if ($Pv4.new) memb($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24241,7 +24336,7 @@ def S4_storeirh_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
 "memh($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_a803e0, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_a803e0, PredNewRel {
 let Inst{31-21} = 0b00111100001;
 let addrMode = BaseImmOffset;
 let accessSize = HalfWordAccess;
@@ -24260,7 +24355,7 @@ def S4_storeirh_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, s8_0Imm:$II),
 "memh($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24268,7 +24363,7 @@ def S4_storeirhf_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
 "if (!$Pv4) memh($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_f20719, PredNewRel {
+tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
 let Inst{31-21} = 0b00111000101;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -24288,7 +24383,7 @@ def S4_storeirhf_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if (!$Pv4) memh($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24296,7 +24391,7 @@ def S4_storeirhfnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
 "if (!$Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_f20719, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
 let Inst{31-21} = 0b00111001101;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -24317,7 +24412,7 @@ def S4_storeirhfnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if (!$Pv4.new) memh($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24325,7 +24420,7 @@ def S4_storeirht_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
 "if ($Pv4) memh($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_f20719, PredNewRel {
+tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
 let Inst{31-21} = 0b00111000001;
 let isPredicated = 1;
 let addrMode = BaseImmOffset;
@@ -24344,7 +24439,7 @@ def S4_storeirht_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if ($Pv4) memh($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24352,7 +24447,7 @@ def S4_storeirhtnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
 "if ($Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_f20719, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
 let Inst{31-21} = 0b00111001001;
 let isPredicated = 1;
 let addrMode = BaseImmOffset;
@@ -24372,7 +24467,7 @@ def S4_storeirhtnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if ($Pv4.new) memh($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24380,7 +24475,7 @@ def S4_storeiri_io : HInst<
 (outs),
 (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
 "memw($Rs32+#$Ii) = #$II",
-tc_05b6c987, TypeST>, Enc_f37377, PredNewRel {
+tc_b83e6d73, TypeST>, Enc_f37377, PredNewRel {
 let Inst{31-21} = 0b00111100010;
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
@@ -24399,7 +24494,7 @@ def S4_storeiri_zomap : HInst<
 (outs),
 (ins IntRegs:$Rs32, s8_0Imm:$II),
 "memw($Rs32) = #$II",
-tc_05b6c987, TypeMAPPING> {
+tc_b83e6d73, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24407,7 +24502,7 @@ def S4_storeirif_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
 "if (!$Pv4) memw($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
 let Inst{31-21} = 0b00111000110;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -24427,7 +24522,7 @@ def S4_storeirif_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if (!$Pv4) memw($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24435,7 +24530,7 @@ def S4_storeirifnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
 "if (!$Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
 let Inst{31-21} = 0b00111001110;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -24456,7 +24551,7 @@ def S4_storeirifnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if (!$Pv4.new) memw($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24464,7 +24559,7 @@ def S4_storeirit_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
 "if ($Pv4) memw($Rs32+#$Ii) = #$II",
-tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
 let Inst{31-21} = 0b00111000010;
 let isPredicated = 1;
 let addrMode = BaseImmOffset;
@@ -24483,7 +24578,7 @@ def S4_storeirit_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if ($Pv4) memw($Rs32) = #$II",
-tc_8b15472a, TypeMAPPING> {
+tc_0b2be201, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24491,7 +24586,7 @@ def S4_storeiritnew_io : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
 "if ($Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel {
+tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
 let Inst{31-21} = 0b00111001010;
 let isPredicated = 1;
 let addrMode = BaseImmOffset;
@@ -24511,7 +24606,7 @@ def S4_storeiritnew_zomap : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
 "if ($Pv4.new) memw($Rs32) = #$II",
-tc_f86c328a, TypeMAPPING> {
+tc_c4f596e3, TypeMAPPING> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -24519,7 +24614,7 @@ def S4_storerb_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Rt32),
 "memb($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10101011000;
@@ -24540,7 +24635,7 @@ def S4_storerb_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111011000;
 let addrMode = BaseRegOffset;
@@ -24556,7 +24651,7 @@ def S4_storerb_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
 "memb($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
 let Inst{7-7} = 0b1;
 let Inst{31-21} = 0b10101101000;
 let addrMode = BaseLongOffset;
@@ -24578,7 +24673,7 @@ def S4_storerbnew_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Nt8),
 "memb($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
 let Inst{7-6} = 0b10;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b10101011101;
@@ -24602,7 +24697,7 @@ def S4_storerbnew_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
 let Inst{6-3} = 0b0000;
 let Inst{31-21} = 0b00111011101;
 let addrMode = BaseRegOffset;
@@ -24621,7 +24716,7 @@ def S4_storerbnew_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
 "memb($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
 let Inst{7-7} = 0b1;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b10101101101;
@@ -24646,7 +24741,7 @@ def S4_storerd_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, DoubleRegs:$Rtt32),
 "memd($Re32=#$II) = $Rtt32",
-tc_66888ded, TypeST>, Enc_c7a204 {
+tc_da4a37ed, TypeST>, Enc_c7a204 {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10101011110;
@@ -24666,7 +24761,7 @@ def S4_storerd_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
 "memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_d9709180, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111011110;
 let addrMode = BaseRegOffset;
@@ -24681,7 +24776,7 @@ def S4_storerd_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32),
 "memd($Ru32<<#$Ii+#$II) = $Rtt32",
-tc_0dc560de, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
 let Inst{7-7} = 0b1;
 let Inst{31-21} = 0b10101101110;
 let addrMode = BaseLongOffset;
@@ -24702,7 +24797,7 @@ def S4_storerf_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Rt32),
 "memh($Re32=#$II) = $Rt32.h",
-tc_66888ded, TypeST>, Enc_8bcba4 {
+tc_da4a37ed, TypeST>, Enc_8bcba4 {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10101011011;
@@ -24722,7 +24817,7 @@ def S4_storerf_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111011011;
 let addrMode = BaseRegOffset;
@@ -24737,7 +24832,7 @@ def S4_storerf_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
 "memh($Ru32<<#$Ii+#$II) = $Rt32.h",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
 let Inst{7-7} = 0b1;
 let Inst{31-21} = 0b10101101011;
 let addrMode = BaseLongOffset;
@@ -24758,7 +24853,7 @@ def S4_storerh_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Rt32),
 "memh($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10101011010;
@@ -24779,7 +24874,7 @@ def S4_storerh_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111011010;
 let addrMode = BaseRegOffset;
@@ -24795,7 +24890,7 @@ def S4_storerh_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
 "memh($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
 let Inst{7-7} = 0b1;
 let Inst{31-21} = 0b10101101010;
 let addrMode = BaseLongOffset;
@@ -24817,7 +24912,7 @@ def S4_storerhnew_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Nt8),
 "memh($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
 let Inst{7-6} = 0b10;
 let Inst{13-11} = 0b001;
 let Inst{31-21} = 0b10101011101;
@@ -24841,7 +24936,7 @@ def S4_storerhnew_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
 let Inst{6-3} = 0b0001;
 let Inst{31-21} = 0b00111011101;
 let addrMode = BaseRegOffset;
@@ -24860,7 +24955,7 @@ def S4_storerhnew_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
 "memh($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
 let Inst{7-7} = 0b1;
 let Inst{12-11} = 0b01;
 let Inst{31-21} = 0b10101101101;
@@ -24885,7 +24980,7 @@ def S4_storeri_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Rt32),
 "memw($Re32=#$II) = $Rt32",
-tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10101011100;
@@ -24906,7 +25001,7 @@ def S4_storeri_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
 let Inst{6-5} = 0b00;
 let Inst{31-21} = 0b00111011100;
 let addrMode = BaseRegOffset;
@@ -24922,7 +25017,7 @@ def S4_storeri_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
 "memw($Ru32<<#$Ii+#$II) = $Rt32",
-tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
 let Inst{7-7} = 0b1;
 let Inst{31-21} = 0b10101101100;
 let addrMode = BaseLongOffset;
@@ -24944,7 +25039,7 @@ def S4_storerinew_ap : HInst<
 (outs IntRegs:$Re32),
 (ins u32_0Imm:$II, IntRegs:$Nt8),
 "memw($Re32=#$II) = $Nt8.new",
-tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel {
+tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
 let Inst{7-6} = 0b10;
 let Inst{13-11} = 0b010;
 let Inst{31-21} = 0b10101011101;
@@ -24968,7 +25063,7 @@ def S4_storerinew_rr : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
 "memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel {
+tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
 let Inst{6-3} = 0b0010;
 let Inst{31-21} = 0b00111011101;
 let addrMode = BaseRegOffset;
@@ -24987,7 +25082,7 @@ def S4_storerinew_ur : HInst<
 (outs),
 (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
 "memw($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel {
+tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
 let Inst{7-7} = 0b1;
 let Inst{12-11} = 0b10;
 let Inst{31-21} = 0b10101101101;
@@ -25012,7 +25107,7 @@ def S4_subaddi : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32),
 "$Rd32 = add($Rs32,sub(#$Ii,$Ru32))",
-tc_c74f796f, TypeALU64>, Enc_8b8d61 {
+tc_f675fee8, TypeALU64>, Enc_8b8d61 {
 let Inst{31-23} = 0b110110111;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25027,7 +25122,7 @@ def S4_subi_asl_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = sub(#$Ii,asl($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b110;
 let Inst{4-4} = 0b0;
 let Inst{31-24} = 0b11011110;
@@ -25045,7 +25140,7 @@ def S4_subi_lsr_ri : HInst<
 (outs IntRegs:$Rx32),
 (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
 "$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))",
-tc_c74f796f, TypeALU64>, Enc_c31910 {
+tc_f675fee8, TypeALU64>, Enc_c31910 {
 let Inst{2-0} = 0b110;
 let Inst{4-4} = 0b1;
 let Inst{31-24} = 0b11011110;
@@ -25063,7 +25158,7 @@ def S4_vrcrotate : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_b9c0b731, TypeS_3op>, Enc_645d54 {
+tc_13bfbcf9, TypeS_3op>, Enc_645d54 {
 let Inst{7-6} = 0b11;
 let Inst{31-21} = 0b11000011110;
 let prefersSlot3 = 1;
@@ -25072,7 +25167,7 @@ def S4_vrcrotate_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
 "$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_60571023, TypeS_3op>, Enc_b72622 {
+tc_9debc299, TypeS_3op>, Enc_b72622 {
 let Inst{7-6} = 0b00;
 let Inst{31-21} = 0b11001011101;
 let prefersSlot3 = 1;
@@ -25082,7 +25177,7 @@ def S4_vxaddsubh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001010;
@@ -25093,7 +25188,7 @@ def S4_vxaddsubhr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001110;
@@ -25104,7 +25199,7 @@ def S4_vxaddsubw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001010;
@@ -25115,7 +25210,7 @@ def S4_vxsubaddh : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001010;
@@ -25126,7 +25221,7 @@ def S4_vxsubaddhr : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_2b6f77c6, TypeS_3op>, Enc_a56825 {
+tc_002cb246, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001110;
@@ -25137,7 +25232,7 @@ def S4_vxsubaddw : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat",
-tc_b44c6e2a, TypeS_3op>, Enc_a56825 {
+tc_779080bf, TypeS_3op>, Enc_a56825 {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001010;
@@ -25148,7 +25243,7 @@ def S5_asrhub_rnd_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rd32 = vasrhub($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_11a146 {
 let Inst{7-5} = 0b100;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10001000011;
@@ -25161,7 +25256,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -25170,7 +25265,7 @@ def S5_asrhub_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rd32 = vasrhub($Rss32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_11a146 {
 let Inst{7-5} = 0b101;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10001000011;
@@ -25183,7 +25278,7 @@ def S5_popcountp : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = popcount($Rss32)",
-tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_703e822c, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000011;
 let Inst{31-21} = 0b10001000011;
 let hasNewValue = 1;
@@ -25194,7 +25289,7 @@ def S5_vasrhrnd : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vasrh($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op>, Enc_12b6e9 {
 let Inst{7-5} = 0b000;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10000000001;
@@ -25204,14 +25299,14 @@ def S5_vasrhrnd_goodsyntax : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vasrh($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_002cb246, TypeS_2op> {
 let isPseudo = 1;
 }
 def S6_allocframe_to_raw : HInst<
 (outs),
 (ins u11_3Imm:$Ii),
 "allocframe(#$Ii)",
-tc_e216a5db, TypeMAPPING>, Requires<[HasV65]> {
+tc_b44ecf75, TypeMAPPING>, Requires<[HasV65]> {
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
@@ -25219,7 +25314,7 @@ def S6_rol_i_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = rol($Rss32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
+tc_1fc97744, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{31-21} = 0b10000000000;
 }
@@ -25227,7 +25322,7 @@ def S6_rol_i_p_acc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 += rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -25237,7 +25332,7 @@ def S6_rol_i_p_and : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 &= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -25247,7 +25342,7 @@ def S6_rol_i_p_nac : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 -= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{31-21} = 0b10000010000;
 let prefersSlot3 = 1;
@@ -25257,7 +25352,7 @@ def S6_rol_i_p_or : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 |= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b10000010010;
 let prefersSlot3 = 1;
@@ -25267,7 +25362,7 @@ def S6_rol_i_p_xacc : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rxx32 ^= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{31-21} = 0b10000010100;
 let prefersSlot3 = 1;
@@ -25277,7 +25372,7 @@ def S6_rol_i_r : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rd32 = rol($Rs32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
+tc_1fc97744, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001100000;
@@ -25288,7 +25383,7 @@ def S6_rol_i_r_acc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 += rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -25301,7 +25396,7 @@ def S6_rol_i_r_and : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 &= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -25314,7 +25409,7 @@ def S6_rol_i_r_nac : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 -= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110000;
@@ -25327,7 +25422,7 @@ def S6_rol_i_r_or : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 |= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110010;
@@ -25340,7 +25435,7 @@ def S6_rol_i_r_xacc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Rx32 ^= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10001110100;
@@ -25353,7 +25448,7 @@ def S6_vsplatrbp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = vsplatb($Rs32)",
-tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
+tc_a1c00888, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000100010;
 }
@@ -25361,7 +25456,7 @@ def S6_vtrunehb_ppp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vtrunehb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001100;
@@ -25370,7 +25465,7 @@ def S6_vtrunohb_ppp : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vtrunohb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001100;
@@ -25379,7 +25474,7 @@ def SA1_addi : HInst<
 (outs GeneralSubRegs:$Rx16),
 (ins IntRegs:$Rx16in, s32_0Imm:$Ii),
 "$Rx16 = add($Rx16in,#$Ii)",
-tc_609d2efe, TypeSUBINSN>, Enc_93af4c {
+tc_0a705168, TypeSUBINSN>, Enc_93af4c {
 let Inst{12-11} = 0b00;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25396,7 +25491,7 @@ def SA1_addrx : HInst<
 (outs GeneralSubRegs:$Rx16),
 (ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16),
 "$Rx16 = add($Rx16in,$Rs16)",
-tc_609d2efe, TypeSUBINSN>, Enc_0527db {
+tc_0a705168, TypeSUBINSN>, Enc_0527db {
 let Inst{12-8} = 0b11000;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25408,7 +25503,7 @@ def SA1_addsp : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins u6_2Imm:$Ii),
 "$Rd16 = add(r29,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_2df31d {
+tc_9fc3dae0, TypeSUBINSN>, Enc_2df31d {
 let Inst{12-10} = 0b011;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25420,7 +25515,7 @@ def SA1_and1 : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = and($Rs16,#1)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10010;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25431,7 +25526,7 @@ def SA1_clrf : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins),
 "if (!p0) $Rd16 = #0",
-tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 {
+tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
 let Inst{12-4} = 0b110100111;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -25445,7 +25540,7 @@ def SA1_clrfnew : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins),
 "if (!p0.new) $Rd16 = #0",
-tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 {
+tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
 let Inst{12-4} = 0b110100101;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -25460,7 +25555,7 @@ def SA1_clrt : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins),
 "if (p0) $Rd16 = #0",
-tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 {
+tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
 let Inst{12-4} = 0b110100110;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -25473,7 +25568,7 @@ def SA1_clrtnew : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins),
 "if (p0.new) $Rd16 = #0",
-tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 {
+tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
 let Inst{12-4} = 0b110100100;
 let isPredicated = 1;
 let hasNewValue = 1;
@@ -25487,7 +25582,7 @@ def SA1_cmpeqi : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii),
 "p0 = cmp.eq($Rs16,#$Ii)",
-tc_90f3e30c, TypeSUBINSN>, Enc_63eaeb {
+tc_5b7c0967, TypeSUBINSN>, Enc_63eaeb {
 let Inst{3-2} = 0b00;
 let Inst{12-8} = 0b11001;
 let AsmVariantName = "NonParsable";
@@ -25498,7 +25593,7 @@ def SA1_combine0i : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins u2_0Imm:$Ii),
 "$Rdd8 = combine(#0,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
 let Inst{4-3} = 0b00;
 let Inst{12-7} = 0b111000;
 let hasNewValue = 1;
@@ -25510,7 +25605,7 @@ def SA1_combine1i : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins u2_0Imm:$Ii),
 "$Rdd8 = combine(#1,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
 let Inst{4-3} = 0b01;
 let Inst{12-7} = 0b111000;
 let hasNewValue = 1;
@@ -25522,7 +25617,7 @@ def SA1_combine2i : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins u2_0Imm:$Ii),
 "$Rdd8 = combine(#2,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
 let Inst{4-3} = 0b10;
 let Inst{12-7} = 0b111000;
 let hasNewValue = 1;
@@ -25534,7 +25629,7 @@ def SA1_combine3i : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins u2_0Imm:$Ii),
 "$Rdd8 = combine(#3,#$Ii)",
-tc_a904d137, TypeSUBINSN>, Enc_ed48be {
+tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
 let Inst{4-3} = 0b11;
 let Inst{12-7} = 0b111000;
 let hasNewValue = 1;
@@ -25546,7 +25641,7 @@ def SA1_combinerz : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins GeneralSubRegs:$Rs16),
 "$Rdd8 = combine($Rs16,#0)",
-tc_a904d137, TypeSUBINSN>, Enc_399e12 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
 let Inst{3-3} = 0b1;
 let Inst{12-8} = 0b11101;
 let hasNewValue = 1;
@@ -25558,7 +25653,7 @@ def SA1_combinezr : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins GeneralSubRegs:$Rs16),
 "$Rdd8 = combine(#0,$Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_399e12 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
 let Inst{3-3} = 0b0;
 let Inst{12-8} = 0b11101;
 let hasNewValue = 1;
@@ -25570,7 +25665,7 @@ def SA1_dec : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, n1Const:$n1),
 "$Rd16 = add($Rs16,#$n1)",
-tc_609d2efe, TypeSUBINSN>, Enc_ee5ed0 {
+tc_0a705168, TypeSUBINSN>, Enc_ee5ed0 {
 let Inst{12-8} = 0b10011;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25581,7 +25676,7 @@ def SA1_inc : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = add($Rs16,#1)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10001;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25592,7 +25687,7 @@ def SA1_seti : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins u32_0Imm:$Ii),
 "$Rd16 = #$Ii",
-tc_a904d137, TypeSUBINSN>, Enc_e39bb2 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_e39bb2 {
 let Inst{12-10} = 0b010;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25608,7 +25703,7 @@ def SA1_setin1 : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins n1Const:$n1),
 "$Rd16 = #$n1",
-tc_a904d137, TypeSUBINSN>, Enc_7a0ea6 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_7a0ea6 {
 let Inst{12-4} = 0b110100000;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25619,7 +25714,7 @@ def SA1_sxtb : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = sxtb($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10101;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25630,7 +25725,7 @@ def SA1_sxth : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = sxth($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10100;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25641,7 +25736,7 @@ def SA1_tfr : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = $Rs16",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10000;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25652,7 +25747,7 @@ def SA1_zxtb : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = and($Rs16,#255)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10111;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25663,7 +25758,7 @@ def SA1_zxth : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16),
 "$Rd16 = zxth($Rs16)",
-tc_a904d137, TypeSUBINSN>, Enc_97d666 {
+tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
 let Inst{12-8} = 0b10110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25674,7 +25769,7 @@ def SL1_loadri_io : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
 "$Rd16 = memw($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_53dca9 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_53dca9 {
 let Inst{12-12} = 0b0;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25688,7 +25783,7 @@ def SL1_loadrub_io : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
 "$Rd16 = memub($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_c175d0 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_c175d0 {
 let Inst{12-12} = 0b1;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25702,7 +25797,7 @@ def SL2_deallocframe : HInst<
 (outs),
 (ins),
 "deallocframe",
-tc_36c68ad1, TypeSUBINSN>, Enc_e3b0c4 {
+tc_39dfefe8, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111100000000;
 let accessSize = DoubleWordAccess;
 let AsmVariantName = "NonParsable";
@@ -25715,7 +25810,7 @@ def SL2_jumpr31 : HInst<
 (outs),
 (ins),
 "jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111111000000;
 let isTerminator = 1;
 let isIndirectBranch = 1;
@@ -25730,7 +25825,7 @@ def SL2_jumpr31_f : HInst<
 (outs),
 (ins),
 "if (!p0) jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111111000101;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -25748,7 +25843,7 @@ def SL2_jumpr31_fnew : HInst<
 (outs),
 (ins),
 "if (!p0.new) jumpr:nt r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111111000111;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -25767,7 +25862,7 @@ def SL2_jumpr31_t : HInst<
 (outs),
 (ins),
 "if (p0) jumpr r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111111000100;
 let isPredicated = 1;
 let isTerminator = 1;
@@ -25784,7 +25879,7 @@ def SL2_jumpr31_tnew : HInst<
 (outs),
 (ins),
 "if (p0.new) jumpr:nt r31",
-tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 {
+tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111111000110;
 let isPredicated = 1;
 let isTerminator = 1;
@@ -25802,7 +25897,7 @@ def SL2_loadrb_io : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii),
 "$Rd16 = memb($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2fbf3c {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2fbf3c {
 let Inst{12-11} = 0b10;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25816,7 +25911,7 @@ def SL2_loadrd_sp : HInst<
 (outs GeneralDoubleLow8Regs:$Rdd8),
 (ins u5_3Imm:$Ii),
 "$Rdd8 = memd(r29+#$Ii)",
-tc_9c98e8af, TypeSUBINSN>, Enc_86a14b {
+tc_c4db48cb, TypeSUBINSN>, Enc_86a14b {
 let Inst{12-8} = 0b11110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25831,7 +25926,7 @@ def SL2_loadrh_io : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
 "$Rd16 = memh($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2bae10 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
 let Inst{12-11} = 0b00;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25845,7 +25940,7 @@ def SL2_loadri_sp : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins u5_2Imm:$Ii),
 "$Rd16 = memw(r29+#$Ii)",
-tc_9c98e8af, TypeSUBINSN>, Enc_51635c {
+tc_c4db48cb, TypeSUBINSN>, Enc_51635c {
 let Inst{12-9} = 0b1110;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25860,7 +25955,7 @@ def SL2_loadruh_io : HInst<
 (outs GeneralSubRegs:$Rd16),
 (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
 "$Rd16 = memuh($Rs16+#$Ii)",
-tc_7f881c76, TypeSUBINSN>, Enc_2bae10 {
+tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
 let Inst{12-11} = 0b01;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -25874,7 +25969,7 @@ def SL2_return : HInst<
 (outs),
 (ins),
 "dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111101000000;
 let isTerminator = 1;
 let isIndirectBranch = 1;
@@ -25892,7 +25987,7 @@ def SL2_return_f : HInst<
 (outs),
 (ins),
 "if (!p0) dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111101000101;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -25913,7 +26008,7 @@ def SL2_return_fnew : HInst<
 (outs),
 (ins),
 "if (!p0.new) dealloc_return:nt",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111101000111;
 let isPredicated = 1;
 let isPredicatedFalse = 1;
@@ -25935,7 +26030,7 @@ def SL2_return_t : HInst<
 (outs),
 (ins),
 "if (p0) dealloc_return",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111101000100;
 let isPredicated = 1;
 let isTerminator = 1;
@@ -25955,7 +26050,7 @@ def SL2_return_tnew : HInst<
 (outs),
 (ins),
 "if (p0.new) dealloc_return:nt",
-tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 {
+tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
 let Inst{12-0} = 0b1111101000110;
 let isPredicated = 1;
 let isTerminator = 1;
@@ -25976,7 +26071,7 @@ def SS1_storeb_io : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16),
 "memb($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_b38ffc {
+tc_30b9bb4a, TypeSUBINSN>, Enc_b38ffc {
 let Inst{12-12} = 0b1;
 let addrMode = BaseImmOffset;
 let accessSize = ByteAccess;
@@ -25988,7 +26083,7 @@ def SS1_storew_io : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16),
 "memw($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_f55a0c {
+tc_30b9bb4a, TypeSUBINSN>, Enc_f55a0c {
 let Inst{12-12} = 0b0;
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
@@ -26000,7 +26095,7 @@ def SS2_allocframe : HInst<
 (outs),
 (ins u5_3Imm:$Ii),
 "allocframe(#$Ii)",
-tc_0fc1ae07, TypeSUBINSN>, Enc_6f70ca {
+tc_49a8207d, TypeSUBINSN>, Enc_6f70ca {
 let Inst{3-0} = 0b0000;
 let Inst{12-9} = 0b1110;
 let addrMode = BaseImmOffset;
@@ -26015,7 +26110,7 @@ def SS2_storebi0 : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
 "memb($Rs16+#$Ii) = #0",
-tc_57288781, TypeSUBINSN>, Enc_84d359 {
+tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
 let Inst{12-8} = 0b10010;
 let addrMode = BaseImmOffset;
 let accessSize = ByteAccess;
@@ -26027,7 +26122,7 @@ def SS2_storebi1 : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
 "memb($Rs16+#$Ii) = #1",
-tc_57288781, TypeSUBINSN>, Enc_84d359 {
+tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
 let Inst{12-8} = 0b10011;
 let addrMode = BaseImmOffset;
 let accessSize = ByteAccess;
@@ -26039,7 +26134,7 @@ def SS2_stored_sp : HInst<
 (outs),
 (ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8),
 "memd(r29+#$Ii) = $Rtt8",
-tc_a788683e, TypeSUBINSN>, Enc_b8309d {
+tc_0371abea, TypeSUBINSN>, Enc_b8309d {
 let Inst{12-9} = 0b0101;
 let addrMode = BaseImmOffset;
 let accessSize = DoubleWordAccess;
@@ -26052,7 +26147,7 @@ def SS2_storeh_io : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16),
 "memh($Rs16+#$Ii) = $Rt16",
-tc_05b6c987, TypeSUBINSN>, Enc_625deb {
+tc_30b9bb4a, TypeSUBINSN>, Enc_625deb {
 let Inst{12-11} = 0b00;
 let addrMode = BaseImmOffset;
 let accessSize = HalfWordAccess;
@@ -26064,7 +26159,7 @@ def SS2_storew_sp : HInst<
 (outs),
 (ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16),
 "memw(r29+#$Ii) = $Rt16",
-tc_a788683e, TypeSUBINSN>, Enc_87c142 {
+tc_0371abea, TypeSUBINSN>, Enc_87c142 {
 let Inst{12-9} = 0b0100;
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
@@ -26077,7 +26172,7 @@ def SS2_storewi0 : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
 "memw($Rs16+#$Ii) = #0",
-tc_57288781, TypeSUBINSN>, Enc_a6ce9c {
+tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
 let Inst{12-8} = 0b10000;
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
@@ -26089,7 +26184,7 @@ def SS2_storewi1 : HInst<
 (outs),
 (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
 "memw($Rs16+#$Ii) = #1",
-tc_57288781, TypeSUBINSN>, Enc_a6ce9c {
+tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
 let Inst{12-8} = 0b10001;
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
@@ -26227,7 +26322,7 @@ def V6_extractw : HInst<
 (outs IntRegs:$Rd32),
 (ins HvxVR:$Vu32, IntRegs:$Rs32),
 "$Rd32 = vextract($Vu32,$Rs32)",
-tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> {
+tc_540c3da3, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10010010000;
@@ -26448,7 +26543,7 @@ def V6_lvsplatb : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32),
 "$Vd32.b = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b00011001110;
 let hasNewValue = 1;
@@ -26459,7 +26554,7 @@ def V6_lvsplath : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32),
 "$Vd32.h = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b00011001110;
 let hasNewValue = 1;
@@ -26470,7 +26565,7 @@ def V6_lvsplatw : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32),
 "$Vd32 = vsplat($Rt32)",
-tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> {
+tc_c4edf264, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b00011001101;
 let hasNewValue = 1;
@@ -26481,7 +26576,7 @@ def V6_pred_and : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4 = and($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000000;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26494,7 +26589,7 @@ def V6_pred_and_n : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4 = and($Qs4,!$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000101;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26507,7 +26602,7 @@ def V6_pred_not : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4),
 "$Qd4 = not($Qs4)",
-tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000010;
 let Inst{13-10} = 0b0000;
 let Inst{31-16} = 0b0001111000000011;
@@ -26519,7 +26614,7 @@ def V6_pred_or : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4 = or($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000001;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26532,7 +26627,7 @@ def V6_pred_or_n : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4 = or($Qs4,!$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000100;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26545,7 +26640,7 @@ def V6_pred_scalar2 : HInst<
 (outs HvxQR:$Qd4),
 (ins IntRegs:$Rt32),
 "$Qd4 = vsetq($Rt32)",
-tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> {
+tc_5bf8afbb, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> {
 let Inst{13-2} = 0b000000010001;
 let Inst{31-21} = 0b00011001101;
 let hasNewValue = 1;
@@ -26556,7 +26651,7 @@ def V6_pred_scalar2v2 : HInst<
 (outs HvxQR:$Qd4),
 (ins IntRegs:$Rt32),
 "$Qd4 = vsetq2($Rt32)",
-tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> {
+tc_5bf8afbb, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> {
 let Inst{13-2} = 0b000000010011;
 let Inst{31-21} = 0b00011001101;
 let hasNewValue = 1;
@@ -26567,7 +26662,7 @@ def V6_pred_xor : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4 = xor($Qs4,$Qt4)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000011;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26580,7 +26675,7 @@ def V6_shuffeqh : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
 let Inst{7-2} = 0b000110;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26593,7 +26688,7 @@ def V6_shuffeqw : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxQR:$Qs4, HvxQR:$Qt4),
 "$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> {
 let Inst{7-2} = 0b000111;
 let Inst{13-10} = 0b0000;
 let Inst{21-16} = 0b000011;
@@ -26743,7 +26838,7 @@ def V6_vL32Ub_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32 = vmemu($Rt32+#$Ii)",
-tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
+tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000000;
@@ -26760,7 +26855,7 @@ def V6_vL32Ub_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32 = vmemu($Rx32++#$Ii)",
-tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001000;
@@ -26779,7 +26874,7 @@ def V6_vL32Ub_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32 = vmemu($Rx32++$Mu2)",
-tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> {
+tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> {
 let Inst{12-5} = 0b00000111;
 let Inst{31-21} = 0b00101011000;
 let hasNewValue = 1;
@@ -26796,7 +26891,7 @@ def V6_vL32b_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32 = vmem($Rt32+#$Ii)",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000000;
@@ -26816,7 +26911,7 @@ def V6_vL32b_cur_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32.cur = vmem($Rt32+#$Ii)",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b001;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000000;
@@ -26836,7 +26931,7 @@ def V6_vL32b_cur_npred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -26856,7 +26951,7 @@ def V6_vL32b_cur_npred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -26878,7 +26973,7 @@ def V6_vL32b_cur_npred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000101;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -26899,7 +26994,7 @@ def V6_vL32b_cur_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32.cur = vmem($Rx32++#$Ii)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b001;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001000;
@@ -26920,7 +27015,7 @@ def V6_vL32b_cur_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.cur = vmem($Rx32++$Mu2)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
 let Inst{12-5} = 0b00000001;
 let Inst{31-21} = 0b00101011000;
 let hasNewValue = 1;
@@ -26940,7 +27035,7 @@ def V6_vL32b_cur_pred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -26959,7 +27054,7 @@ def V6_vL32b_cur_pred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -26980,7 +27075,7 @@ def V6_vL32b_cur_pred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000100;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -27000,7 +27095,7 @@ def V6_vL32b_npred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b011;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -27019,7 +27114,7 @@ def V6_vL32b_npred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -27040,7 +27135,7 @@ def V6_vL32b_npred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000011;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -27060,7 +27155,7 @@ def V6_vL32b_nt_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32 = vmem($Rt32+#$Ii):nt",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000010;
@@ -27081,7 +27176,7 @@ def V6_vL32b_nt_cur_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b001;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000010;
@@ -27102,7 +27197,7 @@ def V6_vL32b_nt_cur_npred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27123,7 +27218,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27146,7 +27241,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000101;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27168,7 +27263,7 @@ def V6_vL32b_nt_cur_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b001;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001010;
@@ -27190,7 +27285,7 @@ def V6_vL32b_nt_cur_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
 let Inst{12-5} = 0b00000001;
 let Inst{31-21} = 0b00101011010;
 let hasNewValue = 1;
@@ -27211,7 +27306,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27231,7 +27326,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27253,7 +27348,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000100;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27274,7 +27369,7 @@ def V6_vL32b_nt_npred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b011;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27294,7 +27389,7 @@ def V6_vL32b_nt_npred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27316,7 +27411,7 @@ def V6_vL32b_nt_npred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000011;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27337,7 +27432,7 @@ def V6_vL32b_nt_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32 = vmem($Rx32++#$Ii):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b000;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001010;
@@ -27359,7 +27454,7 @@ def V6_vL32b_nt_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32 = vmem($Rx32++$Mu2):nt",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b00101011010;
 let hasNewValue = 1;
@@ -27380,7 +27475,7 @@ def V6_vL32b_nt_pred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27399,7 +27494,7 @@ def V6_vL32b_nt_pred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27420,7 +27515,7 @@ def V6_vL32b_nt_pred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000010;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27440,7 +27535,7 @@ def V6_vL32b_nt_tmp_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000010;
@@ -27460,7 +27555,7 @@ def V6_vL32b_nt_tmp_npred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27480,7 +27575,7 @@ def V6_vL32b_nt_tmp_npred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27502,7 +27597,7 @@ def V6_vL32b_nt_tmp_npred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000111;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27523,7 +27618,7 @@ def V6_vL32b_nt_tmp_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001010;
@@ -27544,7 +27639,7 @@ def V6_vL32b_nt_tmp_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
 let Inst{12-5} = 0b00000010;
 let Inst{31-21} = 0b00101011010;
 let hasNewValue = 1;
@@ -27564,7 +27659,7 @@ def V6_vL32b_nt_tmp_pred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b110;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27583,7 +27678,7 @@ def V6_vL32b_nt_tmp_pred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27604,7 +27699,7 @@ def V6_vL32b_nt_tmp_pred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000110;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27624,7 +27719,7 @@ def V6_vL32b_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32 = vmem($Rx32++#$Ii)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b000;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001000;
@@ -27645,7 +27740,7 @@ def V6_vL32b_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32 = vmem($Rx32++$Mu2)",
-tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b00101011000;
 let hasNewValue = 1;
@@ -27665,7 +27760,7 @@ def V6_vL32b_pred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
-tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_abe8c3b2, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -27683,7 +27778,7 @@ def V6_vL32b_pred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -27703,7 +27798,7 @@ def V6_vL32b_pred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
-tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_453fe68d, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000010;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -27722,7 +27817,7 @@ def V6_vL32b_tmp_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "$Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
+tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000000;
@@ -27741,7 +27836,7 @@ def V6_vL32b_tmp_npred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -27760,7 +27855,7 @@ def V6_vL32b_tmp_npred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -27781,7 +27876,7 @@ def V6_vL32b_tmp_npred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000111;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -27801,7 +27896,7 @@ def V6_vL32b_tmp_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "$Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel {
 let Inst{7-5} = 0b010;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001000;
@@ -27821,7 +27916,7 @@ def V6_vL32b_tmp_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
+tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel {
 let Inst{12-5} = 0b00000010;
 let Inst{31-21} = 0b00101011000;
 let hasNewValue = 1;
@@ -27840,7 +27935,7 @@ def V6_vL32b_tmp_pred_ai : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
-tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
+tc_3904b926, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b110;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -27858,7 +27953,7 @@ def V6_vL32b_tmp_pred_pi : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -27878,7 +27973,7 @@ def V6_vL32b_tmp_pred_ppu : HInst<
 (outs HvxVR:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
-tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
+tc_b9db8205, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel {
 let Inst{10-5} = 0b000110;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -27897,7 +27992,7 @@ def V6_vS32Ub_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "vmemu($Rt32+#$Ii) = $Vs32",
-tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b111;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000001;
@@ -27912,7 +28007,7 @@ def V6_vS32Ub_npred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
-tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_131f1c81, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b00101000101;
 let isPredicated = 1;
@@ -27927,7 +28022,7 @@ def V6_vS32Ub_npred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001101;
@@ -27944,7 +28039,7 @@ def V6_vS32Ub_npred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-5} = 0b000111;
 let Inst{31-21} = 0b00101011101;
 let isPredicated = 1;
@@ -27960,7 +28055,7 @@ def V6_vS32Ub_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "vmemu($Rx32++#$Ii) = $Vs32",
-tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b111;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001001;
@@ -27976,7 +28071,7 @@ def V6_vS32Ub_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "vmemu($Rx32++$Mu2) = $Vs32",
-tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{12-5} = 0b00000111;
 let Inst{31-21} = 0b00101011001;
 let addrMode = PostInc;
@@ -27991,7 +28086,7 @@ def V6_vS32Ub_pred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
-tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_131f1c81, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b110;
 let Inst{31-21} = 0b00101000101;
 let isPredicated = 1;
@@ -28005,7 +28100,7 @@ def V6_vS32Ub_pred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001101;
@@ -28021,7 +28116,7 @@ def V6_vS32Ub_pred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
-tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_c7039829, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-5} = 0b000110;
 let Inst{31-21} = 0b00101011101;
 let isPredicated = 1;
@@ -28036,7 +28131,7 @@ def V6_vS32b_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "vmem($Rt32+#$Ii) = $Vs32",
-tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000001;
@@ -28052,7 +28147,7 @@ def V6_vS32b_new_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
 "vmem($Rt32+#$Ii) = $Os8.new",
-tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b00100;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000001;
@@ -28071,7 +28166,7 @@ def V6_vS32b_new_npred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
 "if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01101;
 let Inst{31-21} = 0b00101000101;
 let isPredicated = 1;
@@ -28090,7 +28185,7 @@ def V6_vS32b_new_npred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
 "if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001101;
@@ -28111,7 +28206,7 @@ def V6_vS32b_new_npred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
 "if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-3} = 0b00001101;
 let Inst{31-21} = 0b00101011101;
 let isPredicated = 1;
@@ -28131,7 +28226,7 @@ def V6_vS32b_new_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
 "vmem($Rx32++#$Ii) = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b00100;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001001;
@@ -28151,7 +28246,7 @@ def V6_vS32b_new_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
 "vmem($Rx32++$Mu2) = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{12-3} = 0b0000000100;
 let Inst{31-21} = 0b00101011001;
 let addrMode = PostInc;
@@ -28170,7 +28265,7 @@ def V6_vS32b_new_pred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
 "if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01000;
 let Inst{31-21} = 0b00101000101;
 let isPredicated = 1;
@@ -28188,7 +28283,7 @@ def V6_vS32b_new_pred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
 "if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001101;
@@ -28208,7 +28303,7 @@ def V6_vS32b_new_pred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
 "if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-3} = 0b00001000;
 let Inst{31-21} = 0b00101011101;
 let isPredicated = 1;
@@ -28227,7 +28322,7 @@ def V6_vS32b_npred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b00101000101;
 let isPredicated = 1;
@@ -28243,7 +28338,7 @@ def V6_vS32b_npred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001101;
@@ -28261,7 +28356,7 @@ def V6_vS32b_npred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-5} = 0b000001;
 let Inst{31-21} = 0b00101011101;
 let isPredicated = 1;
@@ -28278,7 +28373,7 @@ def V6_vS32b_nqpred_ai : HInst<
 (outs),
 (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b00101000100;
 let addrMode = BaseImmOffset;
@@ -28290,7 +28385,7 @@ def V6_vS32b_nqpred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -28304,7 +28399,7 @@ def V6_vS32b_nqpred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
 let Inst{10-5} = 0b000001;
 let Inst{31-21} = 0b00101011100;
 let addrMode = PostInc;
@@ -28317,7 +28412,7 @@ def V6_vS32b_nt_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "vmem($Rt32+#$Ii):nt = $Vs32",
-tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
+tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000011;
@@ -28334,7 +28429,7 @@ def V6_vS32b_nt_new_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
 "vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
+tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b00100;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000011;
@@ -28354,7 +28449,7 @@ def V6_vS32b_nt_new_npred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
 "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01111;
 let Inst{31-21} = 0b00101000111;
 let isPredicated = 1;
@@ -28374,7 +28469,7 @@ def V6_vS32b_nt_new_npred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
 "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001111;
@@ -28396,7 +28491,7 @@ def V6_vS32b_nt_new_npred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
 "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-3} = 0b00001111;
 let Inst{31-21} = 0b00101011111;
 let isPredicated = 1;
@@ -28417,7 +28512,7 @@ def V6_vS32b_nt_new_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
 "vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b00100;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001011;
@@ -28438,7 +28533,7 @@ def V6_vS32b_nt_new_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
 "vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
+tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{12-3} = 0b0000000100;
 let Inst{31-21} = 0b00101011011;
 let addrMode = PostInc;
@@ -28458,7 +28553,7 @@ def V6_vS32b_nt_new_pred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8),
 "if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
-tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
+tc_7177e272, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01010;
 let Inst{31-21} = 0b00101000111;
 let isPredicated = 1;
@@ -28477,7 +28572,7 @@ def V6_vS32b_nt_new_pred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8),
 "if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-3} = 0b01010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001111;
@@ -28498,7 +28593,7 @@ def V6_vS32b_nt_new_pred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8),
 "if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
-tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
+tc_e99d4c2e, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-3} = 0b00001010;
 let Inst{31-21} = 0b00101011111;
 let isPredicated = 1;
@@ -28518,7 +28613,7 @@ def V6_vS32b_nt_npred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b00101000111;
 let isPredicated = 1;
@@ -28535,7 +28630,7 @@ def V6_vS32b_nt_npred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001111;
@@ -28554,7 +28649,7 @@ def V6_vS32b_nt_npred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-5} = 0b000001;
 let Inst{31-21} = 0b00101011111;
 let isPredicated = 1;
@@ -28572,7 +28667,7 @@ def V6_vS32b_nt_nqpred_ai : HInst<
 (outs),
 (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b00101000110;
 let addrMode = BaseImmOffset;
@@ -28585,7 +28680,7 @@ def V6_vS32b_nt_nqpred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -28600,7 +28695,7 @@ def V6_vS32b_nt_nqpred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
 let Inst{10-5} = 0b000001;
 let Inst{31-21} = 0b00101011110;
 let addrMode = PostInc;
@@ -28614,7 +28709,7 @@ def V6_vS32b_nt_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "vmem($Rx32++#$Ii):nt = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001011;
@@ -28632,7 +28727,7 @@ def V6_vS32b_nt_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "vmem($Rx32++$Mu2):nt = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b00101011011;
 let addrMode = PostInc;
@@ -28649,7 +28744,7 @@ def V6_vS32b_nt_pred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b00101000111;
 let isPredicated = 1;
@@ -28665,7 +28760,7 @@ def V6_vS32b_nt_pred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001111;
@@ -28683,7 +28778,7 @@ def V6_vS32b_nt_pred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-5} = 0b000000;
 let Inst{31-21} = 0b00101011111;
 let isPredicated = 1;
@@ -28700,7 +28795,7 @@ def V6_vS32b_nt_qpred_ai : HInst<
 (outs),
 (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b00101000110;
 let addrMode = BaseImmOffset;
@@ -28713,7 +28808,7 @@ def V6_vS32b_nt_qpred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -28728,7 +28823,7 @@ def V6_vS32b_nt_qpred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
 let Inst{10-5} = 0b000000;
 let Inst{31-21} = 0b00101011110;
 let addrMode = PostInc;
@@ -28742,7 +28837,7 @@ def V6_vS32b_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "vmem($Rx32++#$Ii) = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001001;
@@ -28759,7 +28854,7 @@ def V6_vS32b_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "vmem($Rx32++$Mu2) = $Vs32",
-tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
+tc_3e2aaafc, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b00101011001;
 let addrMode = PostInc;
@@ -28774,7 +28869,7 @@ def V6_vS32b_pred_ai : HInst<
 (outs),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
+tc_a02a10a8, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b00101000101;
 let isPredicated = 1;
@@ -28789,7 +28884,7 @@ def V6_vS32b_pred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001101;
@@ -28806,7 +28901,7 @@ def V6_vS32b_pred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
+tc_54a0dc47, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel {
 let Inst{10-5} = 0b000000;
 let Inst{31-21} = 0b00101011101;
 let isPredicated = 1;
@@ -28822,7 +28917,7 @@ def V6_vS32b_qpred_ai : HInst<
 (outs),
 (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
-tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
+tc_447d9895, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b00101000100;
 let addrMode = BaseImmOffset;
@@ -28834,7 +28929,7 @@ def V6_vS32b_qpred_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32),
 "if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -28848,7 +28943,7 @@ def V6_vS32b_qpred_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32),
 "if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
-tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
+tc_191381c1, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> {
 let Inst{10-5} = 0b000000;
 let Inst{31-21} = 0b00101011100;
 let addrMode = PostInc;
@@ -28861,7 +28956,7 @@ def V6_vS32b_srls_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii),
 "vmem($Rt32+#$Ii):scatter_release",
-tc_29841470, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> {
+tc_3ce09744, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> {
 let Inst{7-0} = 0b00101000;
 let Inst{12-11} = 0b00;
 let Inst{31-21} = 0b00101000001;
@@ -28875,7 +28970,7 @@ def V6_vS32b_srls_pi : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, s3_0Imm:$Ii),
 "vmem($Rx32++#$Ii):scatter_release",
-tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> {
+tc_20a4bbec, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> {
 let Inst{7-0} = 0b00101000;
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b00101001001;
@@ -28890,7 +28985,7 @@ def V6_vS32b_srls_ppu : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "vmem($Rx32++$Mu2):scatter_release",
-tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> {
+tc_20a4bbec, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> {
 let Inst{12-0} = 0b0000000101000;
 let Inst{31-21} = 0b00101011001;
 let addrMode = PostInc;
@@ -28904,7 +28999,7 @@ def V6_vabsb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.b = vabs($Vu32.b)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -28927,7 +29022,7 @@ def V6_vabsb_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.b = vabs($Vu32.b):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -28950,7 +29045,7 @@ def V6_vabsdiffh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -28973,7 +29068,7 @@ def V6_vabsdiffub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -28996,7 +29091,7 @@ def V6_vabsdiffuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -29019,7 +29114,7 @@ def V6_vabsdiffw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -29042,7 +29137,7 @@ def V6_vabsh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.h = vabs($Vu32.h)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -29065,7 +29160,7 @@ def V6_vabsh_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.h = vabs($Vu32.h):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -29088,7 +29183,7 @@ def V6_vabsub_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.ub = vabs($Vu32.b)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -29099,7 +29194,7 @@ def V6_vabsuh_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.uh = vabs($Vu32.h)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -29110,7 +29205,7 @@ def V6_vabsuw_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.uw = vabs($Vu32.w)",
-tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -29121,7 +29216,7 @@ def V6_vabsw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.w = vabs($Vu32.w)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -29144,7 +29239,7 @@ def V6_vabsw_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.w = vabs($Vu32.w):sat",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -29167,7 +29262,7 @@ def V6_vaddb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vadd($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -29190,7 +29285,7 @@ def V6_vaddb_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -29213,7 +29308,7 @@ def V6_vaddbnq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if (!$Qv4) $Vx32.b += $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -29241,7 +29336,7 @@ def V6_vaddbq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if ($Qv4) $Vx32.b += $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -29269,7 +29364,7 @@ def V6_vaddbsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -29292,7 +29387,7 @@ def V6_vaddbsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -29315,7 +29410,7 @@ def V6_vaddcarry : HInst<
 (outs HvxVR:$Vd32, HvxQR:$Qx4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in),
 "$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
-tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
+tc_7e6a3e89, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100101;
@@ -29324,11 +29419,37 @@ let opNewValue = 0;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
+def V6_vaddcarryo : HInst<
+(outs HvxVR:$Vd32, HvxQR:$Qe4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w,$Qe4 = vadd($Vu32.w,$Vv32.w):carry",
+tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddcarrysat : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qs4),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qs4):carry:sat",
+tc_257f6f7c, TypeCVI_VA>, Enc_e0820b, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vaddclbh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -29340,7 +29461,7 @@ def V6_vaddclbw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -29352,7 +29473,7 @@ def V6_vaddh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vadd($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -29375,7 +29496,7 @@ def V6_vaddh_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -29398,7 +29519,7 @@ def V6_vaddhnq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if (!$Qv4) $Vx32.h += $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -29426,7 +29547,7 @@ def V6_vaddhq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if ($Qv4) $Vx32.h += $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -29454,7 +29575,7 @@ def V6_vaddhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -29477,7 +29598,7 @@ def V6_vaddhsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -29500,7 +29621,7 @@ def V6_vaddhw : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -29512,7 +29633,7 @@ def V6_vaddhw_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -29550,7 +29671,7 @@ def V6_vaddubh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -29562,7 +29683,7 @@ def V6_vaddubh_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100010;
@@ -29600,7 +29721,7 @@ def V6_vaddubsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -29623,7 +29744,7 @@ def V6_vaddubsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -29646,7 +29767,7 @@ def V6_vaddububb_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -29658,7 +29779,7 @@ def V6_vadduhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -29681,7 +29802,7 @@ def V6_vadduhsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -29704,7 +29825,7 @@ def V6_vadduhw : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -29716,7 +29837,7 @@ def V6_vadduhw_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100010;
@@ -29754,7 +29875,7 @@ def V6_vadduwsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -29777,7 +29898,7 @@ def V6_vadduwsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -29800,7 +29921,7 @@ def V6_vaddw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vadd($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -29823,7 +29944,7 @@ def V6_vaddw_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -29846,7 +29967,7 @@ def V6_vaddwnq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if (!$Qv4) $Vx32.w += $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -29874,7 +29995,7 @@ def V6_vaddwq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if ($Qv4) $Vx32.w += $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -29902,7 +30023,7 @@ def V6_vaddwsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -29925,7 +30046,7 @@ def V6_vaddwsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -29948,7 +30069,7 @@ def V6_valignb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = valign($Vu32,$Vv32,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -29960,7 +30081,7 @@ def V6_valignbi : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
 "$Vd32 = valign($Vu32,$Vv32,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011110001;
 let hasNewValue = 1;
@@ -29971,7 +30092,7 @@ def V6_vand : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32 = vand($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -29983,7 +30104,7 @@ def V6_vandnqrt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qu4, IntRegs:$Rt32),
 "$Vd32 = vand(!$Qu4,$Rt32)",
-tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
+tc_ac4046bc, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b101;
 let Inst{13-10} = 0b0001;
 let Inst{31-21} = 0b00011001101;
@@ -29995,7 +30116,7 @@ def V6_vandnqrt_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
 "$Vx32 |= vand(!$Qu4,$Rt32)",
-tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
+tc_2e8f5f6e, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-10} = 0b1001;
 let Inst{31-21} = 0b00011001011;
@@ -30033,7 +30154,7 @@ def V6_vandqrt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qu4, IntRegs:$Rt32),
 "$Vd32 = vand($Qu4,$Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b00011001101;
@@ -30045,7 +30166,7 @@ def V6_vandqrt_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
 "$Vx32 |= vand($Qu4,$Rt32)",
-tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-10} = 0b1000;
 let Inst{31-21} = 0b00011001011;
@@ -30083,7 +30204,7 @@ def V6_vandvnqv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qv4, HvxVR:$Vu32),
 "$Vd32 = vand(!$Qv4,$Vu32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000011;
@@ -30096,7 +30217,7 @@ def V6_vandvqv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qv4, HvxVR:$Vu32),
 "$Vd32 = vand($Qv4,$Vu32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000011;
@@ -30109,7 +30230,7 @@ def V6_vandvrt : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Qd4 = vand($Vu32,$Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001101;
@@ -30121,7 +30242,7 @@ def V6_vandvrt_acc : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Qx4 |= vand($Vu32,$Rt32)",
-tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001011;
@@ -30155,7 +30276,7 @@ def V6_vaslh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.h = vasl($Vu32.h,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -30167,7 +30288,7 @@ def V6_vaslh_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.h += vasl($Vu32.h,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001101;
@@ -30205,7 +30326,7 @@ def V6_vaslhv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vasl($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -30228,7 +30349,7 @@ def V6_vaslw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vasl($Vu32.w,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -30240,7 +30361,7 @@ def V6_vaslw_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vasl($Vu32.w,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001011;
@@ -30278,7 +30399,7 @@ def V6_vaslwv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vasl($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -30297,11 +30418,36 @@ let isPseudo = 1;
 let isCodeGenOnly = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vasr_into : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32.w = vasrinto($Vu32.w,$Vv32.w)",
+tc_df80eeb0, TypeCVI_VP_VS>, Enc_3fc427, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vasr_into_alt : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32 = vasrinto($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
 def V6_vasrh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.h = vasr($Vu32.h,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -30313,7 +30459,7 @@ def V6_vasrh_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.h += vasr($Vu32.h,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -30351,7 +30497,7 @@ def V6_vasrhbrndsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -30363,7 +30509,7 @@ def V6_vasrhbrndsat_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30373,7 +30519,7 @@ def V6_vasrhbsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011000;
@@ -30385,7 +30531,7 @@ def V6_vasrhubrndsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -30397,7 +30543,7 @@ def V6_vasrhubrndsat_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30407,7 +30553,7 @@ def V6_vasrhubsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -30419,7 +30565,7 @@ def V6_vasrhubsat_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30429,7 +30575,7 @@ def V6_vasrhv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vasr($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -30452,7 +30598,7 @@ def V6_vasruhubrndsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011000;
@@ -30464,7 +30610,7 @@ def V6_vasruhubsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011000;
@@ -30476,7 +30622,7 @@ def V6_vasruwuhrndsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011000;
@@ -30488,7 +30634,7 @@ def V6_vasruwuhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011000;
@@ -30500,7 +30646,7 @@ def V6_vasrw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vasr($Vu32.w,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -30512,7 +30658,7 @@ def V6_vasrw_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vasr($Vu32.w,$Rt32)",
-tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_309dbb4f, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001011;
@@ -30550,7 +30696,7 @@ def V6_vasrwh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -30562,7 +30708,7 @@ def V6_vasrwh_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30572,7 +30718,7 @@ def V6_vasrwhrndsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -30584,7 +30730,7 @@ def V6_vasrwhrndsat_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30594,7 +30740,7 @@ def V6_vasrwhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -30606,7 +30752,7 @@ def V6_vasrwhsat_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30616,7 +30762,7 @@ def V6_vasrwuhrndsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011000;
@@ -30628,7 +30774,7 @@ def V6_vasrwuhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
-tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_16ff9ef8, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -30640,7 +30786,7 @@ def V6_vasrwuhsat_alt : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
+tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -30650,7 +30796,7 @@ def V6_vasrwv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vasr($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -30673,7 +30819,7 @@ def V6_vassign : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32 = $Vu32",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-16} = 0b0001111000000011;
@@ -30695,7 +30841,7 @@ def V6_vavgb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vavg($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -30718,7 +30864,7 @@ def V6_vavgbrnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vavg($Vu32.b,$Vv32.b):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -30741,7 +30887,7 @@ def V6_vavgh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vavg($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -30764,7 +30910,7 @@ def V6_vavghrnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -30787,7 +30933,7 @@ def V6_vavgub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -30810,7 +30956,7 @@ def V6_vavgubrnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -30833,7 +30979,7 @@ def V6_vavguh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -30856,7 +31002,7 @@ def V6_vavguhrnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -30879,7 +31025,7 @@ def V6_vavguw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uw = vavg($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -30902,7 +31048,7 @@ def V6_vavguwrnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uw = vavg($Vu32.uw,$Vv32.uw):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -30925,7 +31071,7 @@ def V6_vavgw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vavg($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100110;
@@ -30948,7 +31094,7 @@ def V6_vavgwrnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -30971,7 +31117,7 @@ def V6_vccombine : HInst<
 (outs HvxWR:$Vdd32),
 (ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32),
 "if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
-tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
+tc_af25efd9, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011010011;
@@ -30984,7 +31130,7 @@ def V6_vcl0h : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.uh = vcl0($Vu32.uh)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -31007,7 +31153,7 @@ def V6_vcl0w : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.uw = vcl0($Vu32.uw)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -31030,7 +31176,7 @@ def V6_vcmov : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Ps4, HvxVR:$Vu32),
 "if ($Ps4) $Vd32 = $Vu32",
-tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
+tc_3aacf4a8, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001101000000000;
@@ -31043,7 +31189,7 @@ def V6_vcombine : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32 = vcombine($Vu32,$Vv32)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -31067,7 +31213,7 @@ def V6_vdd0 : HInst<
 (outs HvxWR:$Vdd32),
 (ins),
 "$Vdd32 = #0",
-tc_8a6eb39a, TypeMAPPING>, Requires<[UseHVXV65]> {
+tc_718b5c53, TypeMAPPING>, Requires<[UseHVXV65]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -31078,7 +31224,7 @@ def V6_vdeal : HInst<
 (outs HvxVR:$Vy32, HvxVR:$Vx32),
 (ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32),
 "vdeal($Vy32,$Vx32,$Rt32)",
-tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
+tc_561aaa58, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001111;
@@ -31093,7 +31239,7 @@ def V6_vdealb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.b = vdeal($Vu32.b)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -31105,7 +31251,7 @@ def V6_vdealb4w : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -31139,7 +31285,7 @@ def V6_vdealh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.h = vdeal($Vu32.h)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -31162,7 +31308,7 @@ def V6_vdealvdd : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -31174,7 +31320,7 @@ def V6_vdelta : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32 = vdelta($Vu32,$Vv32)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -31186,7 +31332,7 @@ def V6_vdmpybus : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -31198,7 +31344,7 @@ def V6_vdmpybus_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -31236,7 +31382,7 @@ def V6_vdmpybus_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -31248,7 +31394,7 @@ def V6_vdmpybus_dv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -31286,7 +31432,7 @@ def V6_vdmpyhb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -31298,7 +31444,7 @@ def V6_vdmpyhb_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -31336,7 +31482,7 @@ def V6_vdmpyhb_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -31348,7 +31494,7 @@ def V6_vdmpyhb_dv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -31386,7 +31532,7 @@ def V6_vdmpyhisat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -31398,7 +31544,7 @@ def V6_vdmpyhisat_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -31436,7 +31582,7 @@ def V6_vdmpyhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -31448,7 +31594,7 @@ def V6_vdmpyhsat_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -31486,7 +31632,7 @@ def V6_vdmpyhsuisat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -31498,7 +31644,7 @@ def V6_vdmpyhsuisat_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -31536,7 +31682,7 @@ def V6_vdmpyhsusat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -31548,7 +31694,7 @@ def V6_vdmpyhsusat_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -31586,7 +31732,7 @@ def V6_vdmpyhvsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -31598,7 +31744,7 @@ def V6_vdmpyhvsat_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -31636,7 +31782,7 @@ def V6_vdsaduh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -31648,7 +31794,7 @@ def V6_vdsaduh_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001011;
@@ -31686,7 +31832,7 @@ def V6_veqb : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -31698,7 +31844,7 @@ def V6_veqb_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31709,7 +31855,7 @@ def V6_veqb_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31721,7 +31867,7 @@ def V6_veqb_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31732,7 +31878,7 @@ def V6_veqh : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -31744,7 +31890,7 @@ def V6_veqh_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31755,7 +31901,7 @@ def V6_veqh_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31767,7 +31913,7 @@ def V6_veqh_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31778,7 +31924,7 @@ def V6_veqw : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -31790,7 +31936,7 @@ def V6_veqw_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31801,7 +31947,7 @@ def V6_veqw_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31813,7 +31959,7 @@ def V6_veqw_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31824,7 +31970,7 @@ def V6_vgathermh : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
 "vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h",
-tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
+tc_e8797b98, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
 let Inst{12-5} = 0b00001000;
 let Inst{31-21} = 0b00101111000;
 let hasNewValue = 1;
@@ -31840,7 +31986,7 @@ def V6_vgathermhq : HInst<
 (outs),
 (ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
 "if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h",
-tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
+tc_05ac6f98, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
 let Inst{12-7} = 0b001010;
 let Inst{31-21} = 0b00101111000;
 let hasNewValue = 1;
@@ -31856,7 +32002,7 @@ def V6_vgathermhw : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
 "vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_bfe309d5, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
+tc_05058f6f, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
 let Inst{12-5} = 0b00010000;
 let Inst{31-21} = 0b00101111000;
 let hasNewValue = 1;
@@ -31872,7 +32018,7 @@ def V6_vgathermhwq : HInst<
 (outs),
 (ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
 "if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_98733e9d, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
+tc_fd7610da, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
 let Inst{12-7} = 0b001100;
 let Inst{31-21} = 0b00101111000;
 let hasNewValue = 1;
@@ -31888,7 +32034,7 @@ def V6_vgathermw : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
 "vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w",
-tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
+tc_e8797b98, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> {
 let Inst{12-5} = 0b00000000;
 let Inst{31-21} = 0b00101111000;
 let hasNewValue = 1;
@@ -31904,7 +32050,7 @@ def V6_vgathermwq : HInst<
 (outs),
 (ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32),
 "if ($Qs4) vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w",
-tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
+tc_05ac6f98, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> {
 let Inst{12-7} = 0b001000;
 let Inst{31-21} = 0b00101111000;
 let hasNewValue = 1;
@@ -31920,7 +32066,7 @@ def V6_vgtb : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -31932,7 +32078,7 @@ def V6_vgtb_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31943,7 +32089,7 @@ def V6_vgtb_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31955,7 +32101,7 @@ def V6_vgtb_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31966,7 +32112,7 @@ def V6_vgth : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -31978,7 +32124,7 @@ def V6_vgth_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -31989,7 +32135,7 @@ def V6_vgth_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32001,7 +32147,7 @@ def V6_vgth_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32012,7 +32158,7 @@ def V6_vgtub : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -32024,7 +32170,7 @@ def V6_vgtub_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32035,7 +32181,7 @@ def V6_vgtub_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32047,7 +32193,7 @@ def V6_vgtub_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b101000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32058,7 +32204,7 @@ def V6_vgtuh : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b001001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -32070,7 +32216,7 @@ def V6_vgtuh_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b001001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32081,7 +32227,7 @@ def V6_vgtuh_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b011001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32093,7 +32239,7 @@ def V6_vgtuh_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b101001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32104,7 +32250,7 @@ def V6_vgtuw : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b001010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -32116,7 +32262,7 @@ def V6_vgtuw_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b001010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32127,7 +32273,7 @@ def V6_vgtuw_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b011010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32139,7 +32285,7 @@ def V6_vgtuw_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b101010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32150,7 +32296,7 @@ def V6_vgtw : HInst<
 (outs HvxQR:$Qd4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111100;
@@ -32162,7 +32308,7 @@ def V6_vgtw_and : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b000110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32173,7 +32319,7 @@ def V6_vgtw_or : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b010110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32185,7 +32331,7 @@ def V6_vgtw_xor : HInst<
 (outs HvxQR:$Qx4),
 (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
-tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
 let Inst{7-2} = 0b100110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
@@ -32196,7 +32342,7 @@ def V6_vhist : HInst<
 (outs),
 (ins),
 "vhist",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
 let Inst{13-0} = 0b10000010000000;
 let Inst{31-16} = 0b0001111000000000;
 let DecoderNamespace = "EXT_mmvec";
@@ -32205,7 +32351,7 @@ def V6_vhistq : HInst<
 (outs),
 (ins HvxQR:$Qv4),
 "vhist($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
 let Inst{13-0} = 0b10000010000000;
 let Inst{21-16} = 0b000010;
 let Inst{31-24} = 0b00011110;
@@ -32215,7 +32361,7 @@ def V6_vinsertwr : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, IntRegs:$Rt32),
 "$Vx32.w = vinsert($Rt32)",
-tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> {
 let Inst{13-5} = 0b100000001;
 let Inst{31-21} = 0b00011001101;
 let hasNewValue = 1;
@@ -32227,7 +32373,7 @@ def V6_vlalignb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011011;
@@ -32239,7 +32385,7 @@ def V6_vlalignbi : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
 "$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011110011;
 let hasNewValue = 1;
@@ -32250,7 +32396,7 @@ def V6_vlsrb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -32262,7 +32408,7 @@ def V6_vlsrh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -32285,7 +32431,7 @@ def V6_vlsrhv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -32308,7 +32454,7 @@ def V6_vlsrw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
-tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_7417e785, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -32331,7 +32477,7 @@ def V6_vlsrwv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111101;
@@ -32354,7 +32500,7 @@ def V6_vlut4 : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vd32.h = vlut4($Vu32.uh,$Rtt32.h)",
-tc_fa99dc24, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> {
+tc_f1de44ef, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -32366,7 +32512,7 @@ def V6_vlutvvb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -32378,7 +32524,7 @@ def V6_vlutvvb_nm : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
-tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> {
+tc_56e64202, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011000;
@@ -32390,7 +32536,7 @@ def V6_vlutvvb_oracc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -32404,7 +32550,7 @@ def V6_vlutvvb_oracci : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
 "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100110;
 let hasNewValue = 1;
@@ -32417,7 +32563,7 @@ def V6_vlutvvbi : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
 "$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
-tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> {
+tc_56e64202, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110001;
 let hasNewValue = 1;
@@ -32428,7 +32574,7 @@ def V6_vlutvwh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -32440,7 +32586,7 @@ def V6_vlutvwh_nm : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-24} = 0b00011000;
@@ -32452,7 +32598,7 @@ def V6_vlutvwh_oracc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -32466,7 +32612,7 @@ def V6_vlutvwh_oracci : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
 "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
-tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> {
+tc_9d1dc972, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> {
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100111;
 let hasNewValue = 1;
@@ -32479,7 +32625,7 @@ def V6_vlutvwhi : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii),
 "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> {
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110011;
 let hasNewValue = 1;
@@ -32490,7 +32636,7 @@ def V6_vmaxb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vmax($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -32513,7 +32659,7 @@ def V6_vmaxh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vmax($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32536,7 +32682,7 @@ def V6_vmaxub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32559,7 +32705,7 @@ def V6_vmaxuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32582,7 +32728,7 @@ def V6_vmaxw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmax($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -32605,7 +32751,7 @@ def V6_vminb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vmin($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -32628,7 +32774,7 @@ def V6_vminh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vmin($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32651,7 +32797,7 @@ def V6_vminub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32674,7 +32820,7 @@ def V6_vminuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32697,7 +32843,7 @@ def V6_vminw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmin($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111000;
@@ -32720,7 +32866,7 @@ def V6_vmpabus : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -32732,7 +32878,7 @@ def V6_vmpabus_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -32770,7 +32916,7 @@ def V6_vmpabusv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -32793,7 +32939,7 @@ def V6_vmpabuu : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.h = vmpa($Vuu32.ub,$Rt32.ub)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -32805,7 +32951,7 @@ def V6_vmpabuu_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.h += vmpa($Vuu32.ub,$Rt32.ub)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001101;
@@ -32843,7 +32989,7 @@ def V6_vmpabuuv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -32866,7 +33012,7 @@ def V6_vmpahb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -32878,7 +33024,7 @@ def V6_vmpahb_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -32916,7 +33062,7 @@ def V6_vmpahhsat : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vx32.h = vmpa($Vx32in.h,$Vu32.h,$Rtt32.h):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -32929,7 +33075,7 @@ def V6_vmpauhb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -32941,7 +33087,7 @@ def V6_vmpauhb_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -32979,7 +33125,7 @@ def V6_vmpauhuhsat : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vx32.h = vmpa($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -32992,7 +33138,7 @@ def V6_vmpsuhuhsat : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vx32.h = vmps($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat",
-tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
+tc_90bcc1db, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -33005,7 +33151,7 @@ def V6_vmpybus : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001001;
@@ -33017,7 +33163,7 @@ def V6_vmpybus_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001001;
@@ -33055,7 +33201,7 @@ def V6_vmpybusv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -33067,7 +33213,7 @@ def V6_vmpybusv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -33105,7 +33251,7 @@ def V6_vmpybv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -33117,7 +33263,7 @@ def V6_vmpybv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -33155,7 +33301,7 @@ def V6_vmpyewuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -33167,7 +33313,7 @@ def V6_vmpyewuh_64 : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -33190,7 +33336,7 @@ def V6_vmpyh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001010;
@@ -33202,7 +33348,7 @@ def V6_vmpyh_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vxx32.w += vmpy($Vu32.h,$Rt32.h)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001101;
@@ -33240,7 +33386,7 @@ def V6_vmpyhsat_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001010;
@@ -33267,7 +33413,7 @@ def V6_vmpyhsrs : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001010;
@@ -33290,7 +33436,7 @@ def V6_vmpyhss : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001010;
@@ -33313,7 +33459,7 @@ def V6_vmpyhus : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -33325,7 +33471,7 @@ def V6_vmpyhus_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -33363,7 +33509,7 @@ def V6_vmpyhv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -33375,7 +33521,7 @@ def V6_vmpyhv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -33413,7 +33559,7 @@ def V6_vmpyhvsrs : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -33436,7 +33582,7 @@ def V6_vmpyieoh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -33448,7 +33594,7 @@ def V6_vmpyiewh_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100010;
@@ -33475,7 +33621,7 @@ def V6_vmpyiewuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -33487,7 +33633,7 @@ def V6_vmpyiewuh_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -33525,7 +33671,7 @@ def V6_vmpyih : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -33537,7 +33683,7 @@ def V6_vmpyih_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -33575,7 +33721,7 @@ def V6_vmpyihb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -33587,7 +33733,7 @@ def V6_vmpyihb_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001011;
@@ -33625,7 +33771,7 @@ def V6_vmpyiowh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -33648,7 +33794,7 @@ def V6_vmpyiwb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001101;
@@ -33660,7 +33806,7 @@ def V6_vmpyiwb_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001010;
@@ -33698,7 +33844,7 @@ def V6_vmpyiwh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -33710,7 +33856,7 @@ def V6_vmpyiwh_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001010;
@@ -33748,7 +33894,7 @@ def V6_vmpyiwub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001100;
@@ -33760,7 +33906,7 @@ def V6_vmpyiwub_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -33798,7 +33944,7 @@ def V6_vmpyowh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -33810,7 +33956,7 @@ def V6_vmpyowh_64_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -33835,7 +33981,7 @@ def V6_vmpyowh_rnd : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -33858,7 +34004,7 @@ def V6_vmpyowh_rnd_sacc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -33884,7 +34030,7 @@ def V6_vmpyowh_sacc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -33910,7 +34056,7 @@ def V6_vmpyub : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001110;
@@ -33922,7 +34068,7 @@ def V6_vmpyub_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -33960,7 +34106,7 @@ def V6_vmpyubv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -33972,7 +34118,7 @@ def V6_vmpyubv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -34010,7 +34156,7 @@ def V6_vmpyuh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001010;
@@ -34022,7 +34168,7 @@ def V6_vmpyuh_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001010;
@@ -34060,7 +34206,7 @@ def V6_vmpyuhe : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.uw = vmpye($Vu32.uh,$Rt32.uh)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -34072,7 +34218,7 @@ def V6_vmpyuhe_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.uw += vmpye($Vu32.uh,$Rt32.uh)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001100;
@@ -34086,7 +34232,7 @@ def V6_vmpyuhv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -34098,7 +34244,7 @@ def V6_vmpyuhv_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100001;
@@ -34136,7 +34282,7 @@ def V6_vmux : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
-tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011110111;
@@ -34148,7 +34294,7 @@ def V6_vnavgb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vnavg($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011111000;
@@ -34171,7 +34317,7 @@ def V6_vnavgh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -34194,7 +34340,7 @@ def V6_vnavgub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -34217,7 +34363,7 @@ def V6_vnavgw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100111;
@@ -34240,7 +34386,7 @@ def V6_vnccombine : HInst<
 (outs HvxWR:$Vdd32),
 (ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32),
 "if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
-tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
+tc_af25efd9, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011010010;
@@ -34254,7 +34400,7 @@ def V6_vncmov : HInst<
 (outs HvxVR:$Vd32),
 (ins PredRegs:$Ps4, HvxVR:$Vu32),
 "if (!$Ps4) $Vd32 = $Vu32",
-tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
+tc_3aacf4a8, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001101000100000;
@@ -34268,7 +34414,7 @@ def V6_vnormamth : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.h = vnormamt($Vu32.h)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000011;
@@ -34291,7 +34437,7 @@ def V6_vnormamtw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.w = vnormamt($Vu32.w)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000011;
@@ -34314,7 +34460,7 @@ def V6_vnot : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32 = vnot($Vu32)",
-tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_0ec46cf9, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000000;
@@ -34326,7 +34472,7 @@ def V6_vor : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32 = vor($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -34338,7 +34484,7 @@ def V6_vpackeb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -34361,7 +34507,7 @@ def V6_vpackeh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -34384,7 +34530,7 @@ def V6_vpackhb_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -34407,7 +34553,7 @@ def V6_vpackhub_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -34430,7 +34576,7 @@ def V6_vpackob : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -34453,7 +34599,7 @@ def V6_vpackoh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -34476,7 +34622,7 @@ def V6_vpackwh_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -34499,7 +34645,7 @@ def V6_vpackwuh_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -34522,7 +34668,7 @@ def V6_vpopcounth : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.h = vpopcount($Vu32.h)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -34545,7 +34691,7 @@ def V6_vprefixqb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qv4),
 "$Vd32.b = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
 let Inst{13-5} = 0b100000010;
 let Inst{21-16} = 0b000011;
 let Inst{31-24} = 0b00011110;
@@ -34557,7 +34703,7 @@ def V6_vprefixqh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qv4),
 "$Vd32.h = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
 let Inst{13-5} = 0b100001010;
 let Inst{21-16} = 0b000011;
 let Inst{31-24} = 0b00011110;
@@ -34569,7 +34715,7 @@ def V6_vprefixqw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxQR:$Qv4),
 "$Vd32.w = prefixsum($Qv4)",
-tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
+tc_51d0ecc3, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> {
 let Inst{13-5} = 0b100010010;
 let Inst{21-16} = 0b000011;
 let Inst{31-24} = 0b00011110;
@@ -34581,7 +34727,7 @@ def V6_vrdelta : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32 = vrdelta($Vu32,$Vv32)",
-tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_46d6c3e0, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -34593,7 +34739,7 @@ def V6_vrmpybub_rtt : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vdd32.w = vrmpy($Vu32.b,$Rtt32.ub)",
-tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
+tc_cd94bfe0, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001110;
@@ -34605,7 +34751,7 @@ def V6_vrmpybub_rtt_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vxx32.w += vrmpy($Vu32.b,$Rtt32.ub)",
-tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
+tc_15fdf750, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001101;
@@ -34643,7 +34789,7 @@ def V6_vrmpybus : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -34655,7 +34801,7 @@ def V6_vrmpybus_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -34693,7 +34839,7 @@ def V6_vrmpybusi : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
 "$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001010;
@@ -34705,7 +34851,7 @@ def V6_vrmpybusi_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
 "$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
 let Inst{7-6} = 0b10;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001010;
@@ -34743,7 +34889,7 @@ def V6_vrmpybusv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -34755,7 +34901,7 @@ def V6_vrmpybusv_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -34793,7 +34939,7 @@ def V6_vrmpybv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -34805,7 +34951,7 @@ def V6_vrmpybv_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -34843,7 +34989,7 @@ def V6_vrmpyub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
-tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_649072c2, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -34855,7 +35001,7 @@ def V6_vrmpyub_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
-tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
+tc_b091f1c6, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -34893,7 +35039,7 @@ def V6_vrmpyub_rtt : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vdd32.uw = vrmpy($Vu32.ub,$Rtt32.ub)",
-tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
+tc_cd94bfe0, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001110;
@@ -34905,7 +35051,7 @@ def V6_vrmpyub_rtt_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32),
 "$Vxx32.uw += vrmpy($Vu32.ub,$Rtt32.ub)",
-tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
+tc_15fdf750, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001101;
@@ -34943,7 +35089,7 @@ def V6_vrmpyubi : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
 "$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
 let Inst{7-6} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001101;
@@ -34955,7 +35101,7 @@ def V6_vrmpyubi_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
 "$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
 let Inst{7-6} = 0b11;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001011;
@@ -34993,7 +35139,7 @@ def V6_vrmpyubv : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
-tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100000;
@@ -35005,7 +35151,7 @@ def V6_vrmpyubv_acc : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
-tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100000;
@@ -35039,11 +35185,276 @@ let isPseudo = 1;
 let isCodeGenOnly = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vrmpyzbb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzbb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzbb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzbb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzbub_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rt8.ub)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzbub_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rt8.ub)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzbub_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vrmpyz($Vu32.b,$Rx8.ub++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzbub_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vrmpyz($Vu32.b,$Rx8.ub++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzcb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr16mpyz($Vu32.c,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzcb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr16mpyz($Vu32.c,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzcb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr16mpyz($Vu32.c,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzcb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr16mpyz($Vu32.c,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyzcbs_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr16mpyzs($Vu32.c,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyzcbs_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr16mpyzs($Vu32.c,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyzcbs_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr16mpyzs($Vu32.c,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyzcbs_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr16mpyzs($Vu32.c,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
+def V6_vrmpyznb_rt : HInst<
+(outs HvxVQR:$Vdddd32),
+(ins HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vdddd32.w = vr8mpyz($Vu32.n,$Rt8.b)",
+tc_61bf7c03, TypeCVI_4SLOT_MPY>, Enc_1bd127, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyznb_rt_acc : HInst<
+(outs HvxVQR:$Vyyyy32),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegsLow8:$Rt8),
+"$Vyyyy32.w += vr8mpyz($Vu32.n,$Rt8.b)",
+tc_933f2b39, TypeCVI_4SLOT_MPY>, Enc_d7bc34, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in";
+}
+def V6_vrmpyznb_rx : HInst<
+(outs HvxVQR:$Vdddd32, IntRegsLow8:$Rx8),
+(ins HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vdddd32.w = vr8mpyz($Vu32.n,$Rx8.b++)",
+tc_26a377fe, TypeCVI_4SLOT_MPY>, Enc_3b7631, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-19} = 0b0001100111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx8 = $Rx8in";
+}
+def V6_vrmpyznb_rx_acc : HInst<
+(outs HvxVQR:$Vyyyy32, IntRegsLow8:$Rx8),
+(ins HvxVQR:$Vyyyy32in, HvxVR:$Vu32, IntRegs:$Rx8in),
+"$Vyyyy32.w += vr8mpyz($Vu32.n,$Rx8.b++)",
+tc_2d4051cd, TypeCVI_4SLOT_MPY>, Enc_bddee3, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-19} = 0b0001100111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
+}
 def V6_vror : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, IntRegs:$Rt32),
 "$Vd32 = vror($Vu32,$Rt32)",
-tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> {
+tc_6e7fa133, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001011;
@@ -35051,11 +35462,34 @@ let hasNewValue = 1;
 let opNewValue = 0;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vrotr : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.uw = vrotr($Vu32.uw,$Vv32.uw)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrotr_alt : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32 = vrotr($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vroundhb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -35078,7 +35512,7 @@ def V6_vroundhub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -35101,7 +35535,7 @@ def V6_vrounduhub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -35124,7 +35558,7 @@ def V6_vrounduwuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111111;
@@ -35147,7 +35581,7 @@ def V6_vroundwh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -35170,7 +35604,7 @@ def V6_vroundwuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
-tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -35193,7 +35627,7 @@ def V6_vrsadubi : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
 "$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
+tc_1ad8a370, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> {
 let Inst{7-6} = 0b11;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001010;
@@ -35205,7 +35639,7 @@ def V6_vrsadubi_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
 "$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
-tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
+tc_e675c45a, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> {
 let Inst{7-6} = 0b11;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001010;
@@ -35239,11 +35673,23 @@ let isPseudo = 1;
 let isCodeGenOnly = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vsatdw : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w = vsatdw($Vu32.w,$Vv32.w)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV66]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vsathub : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
-tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -35266,7 +35712,7 @@ def V6_vsatuwuh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -35289,7 +35735,7 @@ def V6_vsatwh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vsat($Vu32.w,$Vv32.w)",
-tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111011;
@@ -35312,7 +35758,7 @@ def V6_vsb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.h = vsxt($Vu32.b)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -35335,7 +35781,7 @@ def V6_vscattermh : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
 "vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b001;
 let Inst{31-21} = 0b00101111001;
 let accessSize = HalfWordAccess;
@@ -35346,7 +35792,7 @@ def V6_vscattermh_add : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
 "vscatter($Rt32,$Mu2,$Vv32.h).h += $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101111001;
 let accessSize = HalfWordAccess;
@@ -35377,7 +35823,7 @@ def V6_vscattermhq : HInst<
 (outs),
 (ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
 "if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32",
-tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
+tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
 let Inst{7-7} = 0b1;
 let Inst{31-21} = 0b00101111100;
 let accessSize = HalfWordAccess;
@@ -35397,7 +35843,7 @@ def V6_vscattermhw : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
 "vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32",
-tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
+tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b010;
 let Inst{31-21} = 0b00101111001;
 let accessSize = HalfWordAccess;
@@ -35408,7 +35854,7 @@ def V6_vscattermhw_add : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
 "vscatter($Rt32,$Mu2,$Vvv32.w).h += $Vw32",
-tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
+tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b110;
 let Inst{31-21} = 0b00101111001;
 let accessSize = HalfWordAccess;
@@ -35420,7 +35866,7 @@ def V6_vscattermhwq : HInst<
 (outs),
 (ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
 "if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32",
-tc_94f43c04, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
+tc_58d21193, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b00101111101;
 let accessSize = HalfWordAccess;
@@ -35431,7 +35877,7 @@ def V6_vscattermw : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
 "vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b000;
 let Inst{31-21} = 0b00101111001;
 let accessSize = WordAccess;
@@ -35442,7 +35888,7 @@ def V6_vscattermw_add : HInst<
 (outs),
 (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
 "vscatter($Rt32,$Mu2,$Vv32.w).w += $Vw32",
-tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
+tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101111001;
 let accessSize = WordAccess;
@@ -35501,7 +35947,7 @@ def V6_vscattermwq : HInst<
 (outs),
 (ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
 "if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32",
-tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
+tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
 let Inst{7-7} = 0b0;
 let Inst{31-21} = 0b00101111100;
 let accessSize = WordAccess;
@@ -35521,7 +35967,7 @@ def V6_vsh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.w = vsxt($Vu32.h)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -35544,7 +35990,7 @@ def V6_vshufeh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -35567,7 +36013,7 @@ def V6_vshuff : HInst<
 (outs HvxVR:$Vy32, HvxVR:$Vx32),
 (ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32),
 "vshuff($Vy32,$Vx32,$Rt32)",
-tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
+tc_561aaa58, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001111;
@@ -35582,7 +36028,7 @@ def V6_vshuffb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.b = vshuff($Vu32.b)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -35605,7 +36051,7 @@ def V6_vshuffeb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -35628,7 +36074,7 @@ def V6_vshuffh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.h = vshuff($Vu32.h)",
-tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
+tc_946013d8, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -35651,7 +36097,7 @@ def V6_vshuffob : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -35674,7 +36120,7 @@ def V6_vshuffvdd : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
 "$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
-tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
+tc_87adc037, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{31-24} = 0b00011011;
@@ -35686,7 +36132,7 @@ def V6_vshufoeb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -35709,7 +36155,7 @@ def V6_vshufoeh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -35732,7 +36178,7 @@ def V6_vshufoh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111010;
@@ -35755,7 +36201,7 @@ def V6_vsubb : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vsub($Vu32.b,$Vv32.b)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -35778,7 +36224,7 @@ def V6_vsubb_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -35801,7 +36247,7 @@ def V6_vsubbnq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if (!$Qv4) $Vx32.b -= $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000010;
@@ -35827,7 +36273,7 @@ def V6_vsubbq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if ($Qv4) $Vx32.b -= $Vu32.b",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -35853,7 +36299,7 @@ def V6_vsubbsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111001;
@@ -35876,7 +36322,7 @@ def V6_vsubbsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -35899,7 +36345,7 @@ def V6_vsubcarry : HInst<
 (outs HvxVR:$Vd32, HvxQR:$Qx4),
 (ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in),
 "$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
-tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
+tc_7e6a3e89, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100101;
@@ -35908,11 +36354,25 @@ let opNewValue = 0;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
+def V6_vsubcarryo : HInst<
+(outs HvxVR:$Vd32, HvxQR:$Qe4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.w,$Qe4 = vsub($Vu32.w,$Vv32.w):carry",
+tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vsubh : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vsub($Vu32.h,$Vv32.h)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -35935,7 +36395,7 @@ def V6_vsubh_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -35958,7 +36418,7 @@ def V6_vsubhnq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if (!$Qv4) $Vx32.h -= $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000010;
@@ -35984,7 +36444,7 @@ def V6_vsubhq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if ($Qv4) $Vx32.h -= $Vu32.h",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000001;
@@ -36010,7 +36470,7 @@ def V6_vsubhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -36033,7 +36493,7 @@ def V6_vsubhsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -36056,7 +36516,7 @@ def V6_vsubhw : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -36079,7 +36539,7 @@ def V6_vsububh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -36102,7 +36562,7 @@ def V6_vsububsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -36125,7 +36585,7 @@ def V6_vsububsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -36148,7 +36608,7 @@ def V6_vsubububb_sat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -36160,7 +36620,7 @@ def V6_vsubuhsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -36183,7 +36643,7 @@ def V6_vsubuhsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -36206,7 +36666,7 @@ def V6_vsubuhw : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
-tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -36229,7 +36689,7 @@ def V6_vsubuwsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011111110;
@@ -36252,7 +36712,7 @@ def V6_vsubuwsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -36275,7 +36735,7 @@ def V6_vsubw : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vsub($Vu32.w,$Vv32.w)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100010;
@@ -36298,7 +36758,7 @@ def V6_vsubw_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100100;
@@ -36321,7 +36781,7 @@ def V6_vsubwnq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if (!$Qv4) $Vx32.w -= $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000010;
@@ -36347,7 +36807,7 @@ def V6_vsubwq : HInst<
 (outs HvxVR:$Vx32),
 (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32),
 "if ($Qv4) $Vx32.w -= $Vu32.w",
-tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
+tc_257f6f7c, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{21-16} = 0b000010;
@@ -36373,7 +36833,7 @@ def V6_vsubwsat : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100011;
@@ -36396,7 +36856,7 @@ def V6_vsubwsat_dv : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, HvxWR:$Vvv32),
 "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
-tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
+tc_db5555f3, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100101;
@@ -36419,7 +36879,7 @@ def V6_vswap : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
-tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> {
+tc_71646d06, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> {
 let Inst{7-7} = 0b0;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011110101;
@@ -36431,7 +36891,7 @@ def V6_vtmpyb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -36443,7 +36903,7 @@ def V6_vtmpyb_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -36481,7 +36941,7 @@ def V6_vtmpybus : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001000;
@@ -36493,7 +36953,7 @@ def V6_vtmpybus_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -36531,7 +36991,7 @@ def V6_vtmpyhb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
-tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
+tc_0b04c6c7, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011001101;
@@ -36543,7 +37003,7 @@ def V6_vtmpyhb_acc : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32),
 "$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
-tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
+tc_660769f1, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011001000;
@@ -36595,7 +37055,7 @@ def V6_vunpackb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.h = vunpack($Vu32.b)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -36618,7 +37078,7 @@ def V6_vunpackh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.w = vunpack($Vu32.h)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b011;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -36641,7 +37101,7 @@ def V6_vunpackob : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32),
 "$Vxx32.h |= vunpacko($Vu32.b)",
-tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
+tc_2c745bb8, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b1;
 let Inst{31-16} = 0b0001111000000000;
@@ -36667,7 +37127,7 @@ def V6_vunpackoh : HInst<
 (outs HvxWR:$Vxx32),
 (ins HvxWR:$Vxx32in, HvxVR:$Vu32),
 "$Vxx32.w |= vunpacko($Vu32.h)",
-tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
+tc_2c745bb8, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b1;
 let Inst{31-16} = 0b0001111000000000;
@@ -36694,7 +37154,7 @@ def V6_vunpackub : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.uh = vunpack($Vu32.ub)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -36717,7 +37177,7 @@ def V6_vunpackuh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.uw = vunpack($Vu32.uh)",
-tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_04da405a, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -36740,7 +37200,7 @@ def V6_vwhist128 : HInst<
 (outs),
 (ins),
 "vwhist128",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
 let Inst{13-0} = 0b10010010000000;
 let Inst{31-16} = 0b0001111000000000;
 let DecoderNamespace = "EXT_mmvec";
@@ -36749,7 +37209,7 @@ def V6_vwhist128m : HInst<
 (outs),
 (ins u1_0Imm:$Ii),
 "vwhist128(#$Ii)",
-tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
+tc_b28e51aa, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
 let Inst{7-0} = 0b10000000;
 let Inst{13-9} = 0b10011;
 let Inst{31-16} = 0b0001111000000000;
@@ -36759,7 +37219,7 @@ def V6_vwhist128q : HInst<
 (outs),
 (ins HvxQR:$Qv4),
 "vwhist128($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
 let Inst{13-0} = 0b10010010000000;
 let Inst{21-16} = 0b000010;
 let Inst{31-24} = 0b00011110;
@@ -36769,7 +37229,7 @@ def V6_vwhist128qm : HInst<
 (outs),
 (ins HvxQR:$Qv4, u1_0Imm:$Ii),
 "vwhist128($Qv4,#$Ii)",
-tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> {
+tc_767c4e9d, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> {
 let Inst{7-0} = 0b10000000;
 let Inst{13-9} = 0b10011;
 let Inst{21-16} = 0b000010;
@@ -36780,7 +37240,7 @@ def V6_vwhist256 : HInst<
 (outs),
 (ins),
 "vwhist256",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
 let Inst{13-0} = 0b10001010000000;
 let Inst{31-16} = 0b0001111000000000;
 let DecoderNamespace = "EXT_mmvec";
@@ -36789,7 +37249,7 @@ def V6_vwhist256_sat : HInst<
 (outs),
 (ins),
 "vwhist256:sat",
-tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
+tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
 let Inst{13-0} = 0b10001110000000;
 let Inst{31-16} = 0b0001111000000000;
 let DecoderNamespace = "EXT_mmvec";
@@ -36798,7 +37258,7 @@ def V6_vwhist256q : HInst<
 (outs),
 (ins HvxQR:$Qv4),
 "vwhist256($Qv4)",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
 let Inst{13-0} = 0b10001010000000;
 let Inst{21-16} = 0b000010;
 let Inst{31-24} = 0b00011110;
@@ -36808,7 +37268,7 @@ def V6_vwhist256q_sat : HInst<
 (outs),
 (ins HvxQR:$Qv4),
 "vwhist256($Qv4):sat",
-tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
+tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
 let Inst{13-0} = 0b10001110000000;
 let Inst{21-16} = 0b000010;
 let Inst{31-24} = 0b00011110;
@@ -36818,7 +37278,7 @@ def V6_vxor : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vd32 = vxor($Vu32,$Vv32)",
-tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011100001;
@@ -36830,7 +37290,7 @@ def V6_vzb : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.uh = vzxt($Vu32.ub)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -36853,7 +37313,7 @@ def V6_vzh : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32),
 "$Vdd32.uw = vzxt($Vu32.uh)",
-tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
+tc_b4416217, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> {
 let Inst{7-5} = 0b010;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000010;
@@ -36872,11 +37332,122 @@ let isPseudo = 1;
 let isCodeGenOnly = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_zLd_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"z = vmem($Rt32+#$Ii)",
+tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101100000;
+let addrMode = BaseImmOffset;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zLd_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"z = vmem($Rx32++#$Ii)",
+tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101101000;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"z = vmem($Rx32++$Mu2)",
+tc_a0dbea28, TypeCVI_ZW>, Enc_44661f, Requires<[UseHVXV66,UseZReg]> {
+let Inst{12-0} = 0b0000000000001;
+let Inst{31-21} = 0b00101101000;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) z = vmem($Rt32+#$Ii)",
+tc_dd5b0695, TypeCVI_ZW>, Enc_ef601b, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b00101100100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zLd_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) z = vmem($Rx32++#$Ii)",
+tc_3ad719fb, TypeCVI_ZW>, Enc_6baed4, Requires<[UseHVXV66,UseZReg]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101101100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zLd_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) z = vmem($Rx32++$Mu2)",
+tc_3ad719fb, TypeCVI_ZW>, Enc_691712, Requires<[UseHVXV66,UseZReg]> {
+let Inst{10-0} = 0b00000000001;
+let Inst{31-21} = 0b00101101100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_zextract : HInst<
+(outs HvxVR:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = zextract($Rt32)",
+tc_5bf8afbb, TypeCVI_VP>, Enc_a5ed8a, Requires<[UseHVXV66,UseZReg]> {
+let Inst{13-5} = 0b000001001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zld0 : HInst<
+(outs),
+(ins IntRegs:$Rt32),
+"z = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_zldp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32),
+"if ($Pv4) z = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def Y2_barrier : HInst<
 (outs),
 (ins),
 "barrier",
-tc_367f7f3d, TypeST>, Enc_e3b0c4 {
+tc_8c99de45, TypeST>, Enc_e3b0c4 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-16} = 0b1010100000000000;
 let isSoloAX = 1;
@@ -36886,7 +37457,7 @@ def Y2_break : HInst<
 (outs),
 (ins),
 "brkpt",
-tc_4ca572d4, TypeCR>, Enc_e3b0c4 {
+tc_9ad9998f, TypeCR>, Enc_e3b0c4 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-16} = 0b0110110000100000;
 let isSolo = 1;
@@ -36895,7 +37466,7 @@ def Y2_dccleana : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "dccleana($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b10100000000;
 let isRestrictSlot1AOK = 1;
@@ -36905,7 +37476,7 @@ def Y2_dccleaninva : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "dccleaninva($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b10100000010;
 let isRestrictSlot1AOK = 1;
@@ -36915,7 +37486,7 @@ def Y2_dcfetch : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "dcfetch($Rs32)",
-tc_3da80ba5, TypeMAPPING> {
+tc_d63f638c, TypeMAPPING> {
 let hasSideEffects = 1;
 let isPseudo = 1;
 let isCodeGenOnly = 1;
@@ -36924,7 +37495,7 @@ def Y2_dcfetchbo : HInst<
 (outs),
 (ins IntRegs:$Rs32, u11_3Imm:$Ii),
 "dcfetch($Rs32+#$Ii)",
-tc_4d9914c9, TypeLD>, Enc_2d829e {
+tc_9ca930f7, TypeLD>, Enc_2d829e {
 let Inst{13-11} = 0b000;
 let Inst{31-21} = 0b10010100000;
 let addrMode = BaseImmOffset;
@@ -36935,7 +37506,7 @@ def Y2_dcinva : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "dcinva($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b10100000001;
 let isRestrictSlot1AOK = 1;
@@ -36945,7 +37516,7 @@ def Y2_dczeroa : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "dczeroa($Rs32)",
-tc_00e7c26e, TypeST>, Enc_ecbcc8 {
+tc_b857bf4e, TypeST>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b10100000110;
 let isRestrictSlot1AOK = 1;
@@ -36956,7 +37527,7 @@ def Y2_icinva : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "icinva($Rs32)",
-tc_999d32db, TypeJ>, Enc_ecbcc8 {
+tc_5d7f5414, TypeJ>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b01010110110;
 let isSolo = 1;
@@ -36965,7 +37536,7 @@ def Y2_isync : HInst<
 (outs),
 (ins),
 "isync",
-tc_b13761ae, TypeJ>, Enc_e3b0c4 {
+tc_8b121f4a, TypeJ>, Enc_e3b0c4 {
 let Inst{13-0} = 0b00000000000010;
 let Inst{31-16} = 0b0101011111000000;
 let isSolo = 1;
@@ -36974,16 +37545,25 @@ def Y2_syncht : HInst<
 (outs),
 (ins),
 "syncht",
-tc_367f7f3d, TypeST>, Enc_e3b0c4 {
+tc_8c99de45, TypeST>, Enc_e3b0c4 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-16} = 0b1010100001000000;
 let isSolo = 1;
 }
+def Y2_wait : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"wait($Rs32)",
+tc_174516e8, TypeCR>, Enc_ecbcc8, Requires<[HasV65]> {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100100010;
+let isSolo = 1;
+}
 def Y4_l2fetch : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "l2fetch($Rs32,$Rt32)",
-tc_daa058fa, TypeST>, Enc_ca3887 {
+tc_fe211424, TypeST>, Enc_ca3887 {
 let Inst{7-0} = 0b00000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10100110000;
@@ -36995,7 +37575,7 @@ def Y4_trace : HInst<
 (outs),
 (ins IntRegs:$Rs32),
 "trace($Rs32)",
-tc_c82dc1ff, TypeCR>, Enc_ecbcc8 {
+tc_6b25e783, TypeCR>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b01100010010;
 let isSoloAX = 1;
@@ -37004,7 +37584,7 @@ def Y5_l2fetch : HInst<
 (outs),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "l2fetch($Rs32,$Rtt32)",
-tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> {
+tc_fe211424, TypeST>, Enc_e6abcf {
 let Inst{7-0} = 0b00000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10100110100;
@@ -37016,7 +37596,7 @@ def dep_A2_addsat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = add($Rs32,$Rt32):sat:deprecated",
-tc_b44c6e2a, TypeALU64>, Enc_5ab2be {
+tc_779080bf, TypeALU64>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101100;
@@ -37029,7 +37609,7 @@ def dep_A2_subsat : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rt32, IntRegs:$Rs32),
 "$Rd32 = sub($Rt32,$Rs32):sat:deprecated",
-tc_b44c6e2a, TypeALU64>, Enc_bd6011 {
+tc_779080bf, TypeALU64>, Enc_bd6011 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010101100;
@@ -37042,7 +37622,7 @@ def dep_S2_packhl : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rdd32 = packhl($Rs32,$Rt32):deprecated",
-tc_540fdfbc, TypeALU64>, Enc_be32a5 {
+tc_946df596, TypeALU64>, Enc_be32a5 {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010100000;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
new file mode 100644
index 000000000000..2346fa572626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -0,0 +1,3337 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+
+// V5 Scalar Instructions.
+
+def: Pat<(int_hexagon_S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh DoubleRegs:$src1),
+         (S2_vsatwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysu_up IntRegs:$src1, IntRegs:$src2),
+         (M2_mpysu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2),
+         (M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2),
+         (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1),
+         (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+         (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2),
+         (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_notp DoubleRegs:$src1),
+         (A2_notp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brevp DoubleRegs:$src1),
+         (S2_brevp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1 IntRegs:$src1),
+         (S2_cl1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplte IntRegs:$src1, IntRegs:$src2),
+         (C4_cmplte IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+         (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1),
+         (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abssat IntRegs:$src1),
+         (A2_abssat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2),
+         (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2),
+         (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2),
+         (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2),
+         (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1),
+         (F2_conv_df2uw_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheq IntRegs:$src1, IntRegs:$src2),
+         (A4_cmpheq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+         (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2),
+         (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1),
+         (F2_conv_w2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_not PredRegs:$src1),
+         (C2_not PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrpr PredRegs:$src1),
+         (C2_tfrpr PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2),
+         (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
+         (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_orp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+         (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2),
+         (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2),
+         (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2),
+         (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2),
+         (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2),
+         (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2),
+         (M4_pmpyw IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb DoubleRegs:$src1),
+         (S2_vsathb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+         (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_absp DoubleRegs:$src1),
+         (A2_absp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_all8 PredRegs:$src1),
+         (C2_all8 PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2),
+         (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2),
+         (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2),
+         (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2),
+         (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2),
+         (M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9 PredRegs:$src1, PredRegs:$src2),
+         (C4_fastcorner9 PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2),
+         (M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subsat IntRegs:$src1, IntRegs:$src2),
+         (A2_subsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r IntRegs:$src1, IntRegs:$src2),
+         (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1),
+         (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1),
+         (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2),
+         (F2_sfadd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sub IntRegs:$src1, IntRegs:$src2),
+         (A2_sub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4),
+         (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2),
+         (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+         (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubhs IntRegs:$src1, IntRegs:$src2),
+         (A2_svsubhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1),
+         (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2),
+         (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1),
+         (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+         (S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+         (S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbp DoubleRegs:$src1),
+         (S2_clbp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_deinterleave DoubleRegs:$src1),
+         (S2_deinterleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_any8 PredRegs:$src1),
+         (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2),
+         (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1),
+         (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1),
+         (S2_vsathb_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_r IntRegs:$src1, IntRegs:$src2),
+         (S2_clrbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+         (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2),
+         (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1),
+         (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2),
+         (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+         (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2),
+         (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+         (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hh IntRegs:$src1, IntRegs:$src2),
+         (A2_combine_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negsat IntRegs:$src1),
+         (A2_negsat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitsplit IntRegs:$src1, IntRegs:$src2),
+         (A4_bitsplit IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabshsat DoubleRegs:$src1),
+         (A2_vabshsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyui IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupn IntRegs:$src1, IntRegs:$src2),
+         (F2_sffixupn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xorp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_xorp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tfrpcp DoubleRegs:$src1),
+         (A4_tfrpcp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$src1),
+         (A2_zxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$src1),
+         (A2_zxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2),
+         (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1),
+         (A2_sat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsat IntRegs:$src1, IntRegs:$src2),
+         (A2_addsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2),
+         (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2),
+         (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+         (A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_xor PredRegs:$src1, PredRegs:$src2),
+         (C2_xor PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud_chop DoubleRegs:$src1),
+         (F2_conv_df2ud_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub DoubleRegs:$src1),
+         (S2_vsathub DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2sf DoubleRegs:$src1),
+         (F2_conv_df2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2),
+         (M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$src1),
+         (A2_sxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$src1),
+         (A2_sxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtw IntRegs:$src1),
+         (A2_sxtw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1p DoubleRegs:$src1),
+         (S2_cl1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andnp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A4_andnp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpeq IntRegs:$src1, IntRegs:$src2),
+         (F2_sfcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddb_map DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbnorm IntRegs:$src1),
+         (S2_clbnorm IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
+         (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2),
+         (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3),
+         (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2),
+         (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+         (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1),
+         (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
+         (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2),
+         (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2),
+         (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1),
+         (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
+         (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2),
+         (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1 IntRegs:$src1),
+         (S2_ct1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0 IntRegs:$src1),
+         (S2_ct0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+         (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1),
+         (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2),
+         (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2),
+         (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2),
+         (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2),
+         (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2),
+         (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2),
+         (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2),
+         (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2),
+         (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2),
+         (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2),
+         (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1),
+         (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combinew IntRegs:$src1, IntRegs:$src2),
+         (A2_combinew IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2),
+         (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_modwrapu IntRegs:$src1, IntRegs:$src2),
+         (A4_modwrapu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneq IntRegs:$src1, IntRegs:$src2),
+         (A4_rcmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1),
+         (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1),
+         (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2),
+         (M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1),
+         (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2),
+         (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2),
+         (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1),
+         (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minu IntRegs:$src1, IntRegs:$src2),
+         (A2_minu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpge IntRegs:$src1, IntRegs:$src2),
+         (F2_sfcmpge IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmin IntRegs:$src1, IntRegs:$src2),
+         (F2_sfmin IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpgt IntRegs:$src1, IntRegs:$src2),
+         (F2_sfcmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh IntRegs:$src1, IntRegs:$src2),
+         (M4_vpmpyh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_roundsat DoubleRegs:$src1),
+         (A2_roundsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1p DoubleRegs:$src1),
+         (S2_ct1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2),
+         (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2),
+         (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1),
+         (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2),
+         (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtu IntRegs:$src1, IntRegs:$src2),
+         (A4_cmphgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+         (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sath IntRegs:$src1),
+         (A2_sath IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satb IntRegs:$src1),
+         (A2_satb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4),
+         (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_asrh IntRegs:$src1),
+         (A2_asrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2),
+         (S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_or PredRegs:$src1, PredRegs:$src2),
+         (C2_or PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$src1, IntRegs:$src2),
+         (A2_xor IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_add IntRegs:$src1, IntRegs:$src2),
+         (A2_add IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfinvsqrta IntRegs:$src1),
+         (F2_sfinvsqrta IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0p DoubleRegs:$src1),
+         (S2_ct0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddh IntRegs:$src1, IntRegs:$src2),
+         (A2_svaddh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcrotate DoubleRegs:$src1, IntRegs:$src2),
+         (S2_vcrotate DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_aslh IntRegs:$src1),
+         (A2_aslh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2),
+         (M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrh IntRegs:$src1),
+         (S2_vsplatrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r IntRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrb IntRegs:$src1),
+         (S2_vsplatrb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3),
+         (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_pxfer_map PredRegs:$src1),
+         (C2_pxfer_map PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2),
+         (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3),
+         (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+         (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1),
+         (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+         (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2),
+         (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmpy IntRegs:$src1, IntRegs:$src2),
+         (F2_sfmpy IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mask PredRegs:$src1),
+         (C2_mask PredRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2),
+         (A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrrcr IntRegs:$src1),
+         (A2_tfrrcr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2),
+         (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1),
+         (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2),
+         (C2_cmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d_chop DoubleRegs:$src1),
+         (F2_conv_df2d_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1),
+         (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2),
+         (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3),
+         (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+         (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2),
+         (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2),
+         (M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpnorm DoubleRegs:$src1),
+         (S4_clbpnorm DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr_sat IntRegs:$src1, IntRegs:$src2),
+         (A4_round_rr_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2uw IntRegs:$src1),
+         (F2_conv_sf2uw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud IntRegs:$src1),
+         (F2_conv_sf2ud IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2uw_chop IntRegs:$src1),
+         (F2_conv_sf2uw_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh_nopack DoubleRegs:$src1),
+         (S2_vsatwuh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubuhs IntRegs:$src1, IntRegs:$src2),
+         (A2_svsubuhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybsu IntRegs:$src1, IntRegs:$src2),
+         (M5_vmpybsu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2),
+         (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r IntRegs:$src1, IntRegs:$src2),
+         (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+         (A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vitpack PredRegs:$src1, PredRegs:$src2),
+         (C2_vitpack PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+         (S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclr IntRegs:$src1, IntRegs:$src2),
+         (C4_nbitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2),
+         (F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2),
+         (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1),
+         (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+         (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1),
+         (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2),
+         (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneq IntRegs:$src1, IntRegs:$src2),
+         (C4_cmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clb IntRegs:$src1),
+         (S2_clb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2),
+         (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2),
+         (S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1),
+         (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1),
+         (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2),
+         (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1),
+         (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3),
+         (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3),
+         (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2),
+         (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2),
+         (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2),
+         (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2),
+         (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2),
+         (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2),
+         (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2),
+         (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2),
+         (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2),
+         (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_or IntRegs:$src1, IntRegs:$src2),
+         (A2_or IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2),
+         (F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeq IntRegs:$src1, IntRegs:$src2),
+         (C2_cmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrp DoubleRegs:$src1),
+         (A2_tfrp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub_nopack DoubleRegs:$src1),
+         (S2_vsathub_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satuh IntRegs:$src1),
+         (A2_satuh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satub IntRegs:$src1),
+         (A2_satub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2),
+         (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2),
+         (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2),
+         (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2),
+         (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3),
+         (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1),
+         (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1),
+         (A2_vabsw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1),
+         (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2),
+         (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3),
+         (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_swiz IntRegs:$src1),
+         (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2),
+         (A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w_chop DoubleRegs:$src1),
+         (F2_conv_df2w_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_and IntRegs:$src1, IntRegs:$src2),
+         (A2_and IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+         (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1),
+         (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfr IntRegs:$src1),
+         (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2),
+         (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2),
+         (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brev IntRegs:$src1),
+         (S2_brev IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+         (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3),
+         (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1),
+         (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4),
+         (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1),
+         (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2),
+         (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2),
+         (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2),
+         (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2df IntRegs:$src1),
+         (F2_conv_sf2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunohb DoubleRegs:$src1),
+         (S2_vtrunohb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d_chop IntRegs:$src1),
+         (F2_conv_sf2d_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1),
+         (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2),
+         (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1),
+         (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svadduhs IntRegs:$src1, IntRegs:$src2),
+         (A2_svadduhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w_chop IntRegs:$src1),
+         (F2_conv_sf2w_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathub IntRegs:$src1),
+         (S2_svsathub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_r IntRegs:$src1, IntRegs:$src2),
+         (S2_setbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4),
+         (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2),
+         (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1),
+         (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1),
+         (F2_conv_df2uw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2),
+         (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2),
+         (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minup DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_minup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+         (S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_lh IntRegs:$src1, IntRegs:$src2),
+         (A2_combine_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_combine_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2),
+         (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2),
+         (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vminw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vminh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminb DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vminb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2),
+         (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+         (S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrrp IntRegs:$src1),
+         (C2_tfrrp IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abs IntRegs:$src1),
+         (A2_abs IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2),
+         (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1),
+         (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1),
+         (S2_vsatwuh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2),
+         (F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1),
+         (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2),
+         (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2),
+         (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2),
+         (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2),
+         (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2),
+         (S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3),
+         (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+         (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2),
+         (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2),
+         (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1),
+         (S2_vsxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_subp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+         (M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_parity IntRegs:$src1, IntRegs:$src2),
+         (S4_parity IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2),
+         (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud_chop IntRegs:$src1),
+         (F2_conv_sf2ud_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+         (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2),
+         (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2),
+         (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2),
+         (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+         (M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwh DoubleRegs:$src1),
+         (S2_vrndpackwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2),
+         (A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2),
+         (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2),
+         (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2),
+         (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+         (C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+         (A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2),
+         (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2),
+         (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2),
+         (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2),
+         (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>;
+
+// V55 Scalar Instructions.
+
+def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+         (A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV55]>;
+
+// V60 Scalar Instructions.
+
+def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2),
+         (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2),
+         (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3),
+         (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3),
+         (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>;
+
+// V62 Scalar Instructions.
+
+def: Pat<(int_hexagon_S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+         (S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_V6_ldntnt0 IntRegs:$src1),
+         (V6_ldntnt0 IntRegs:$src1)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2),
+         (M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+         (S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2),
+         (M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2),
+         (A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vsplatrbp IntRegs:$src1),
+         (S6_vsplatrbp IntRegs:$src1)>, Requires<[HasV62]>;
+
+// V65 Scalar Instructions.
+
+def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2),
+         (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65]>;
+
+// V66 Scalar Instructions.
+
+def: Pat<(int_hexagon_F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2),
+         (F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2),
+         (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+         (M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2),
+         (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>;
+
+// V60 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminub HvxVR:$src1, HvxVR:$src2),
+         (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsathub HvxVR:$src1, HvxVR:$src2),
+         (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsathub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+         (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+         (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwv HvxVR:$src1, HvxVR:$src2),
+         (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+         (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+         (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgh HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgub HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv HvxVR:$src1, HvxVR:$src2),
+         (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhb HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmux_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeb HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavghrnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavghrnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtran2x2_map_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdelta HvxVR:$src1, HvxVR:$src2),
+         (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdelta_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb HvxWR:$src1, IntRegs:$src2),
+         (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackob HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackob_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lo HvxWR:$src1),
+         (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lo_128B HvxWR:$src1),
+         (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldu0 IntRegs:$src1),
+         (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldu0_128B IntRegs:$src1),
+         (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgh HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsh HvxVR:$src1),
+         (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsh_128B HvxVR:$src1),
+         (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and_n HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_n_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsb HvxVR:$src1),
+         (V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsb_128B HvxVR:$src1),
+         (V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhv HvxVR:$src1, HvxVR:$src2),
+         (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffh HvxVR:$src1),
+         (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffh_128B HvxVR:$src1),
+         (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgub HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv HvxVR:$src1, HvxVR:$src2),
+         (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamth HvxVR:$src1),
+         (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamth_128B HvxVR:$src1),
+         (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguh HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrwv HvxVR:$src1, HvxVR:$src2),
+         (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrwv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrhv HvxVR:$src1, HvxVR:$src2),
+         (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrhv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzh HvxVR:$src1),
+         (V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzh_128B HvxVR:$src1),
+         (V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxub HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeh HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh HvxVR:$src1, HvxVR:$src2),
+         (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv HvxWR:$src1, HvxWR:$src2),
+         (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vror HvxVR:$src1, IntRegs:$src2),
+         (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vror_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat HvxVR:$src1),
+         (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat_128B HvxVR:$src1),
+         (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or_n HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_n_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb HvxVR:$src1),
+         (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb_128B HvxVR:$src1),
+         (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzb HvxVR:$src1),
+         (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzb_128B HvxVR:$src1),
+         (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeb HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb HvxWR:$src1, IntRegs:$src2),
+         (V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabusv HvxWR:$src1, HvxWR:$src2),
+         (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabusv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vswap_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw HvxVR:$src1, IntRegs:$src2),
+         (V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackub HvxVR:$src1),
+         (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackub_128B HvxVR:$src1),
+         (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_extractw HvxVR:$src1, IntRegs:$src2),
+         (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_extractw_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackob HvxWR:$src1, HvxVR:$src2),
+         (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackob_128B HvxWR:$src1, HvxVR:$src2),
+         (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt HvxQR:$src1, IntRegs:$src2),
+         (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_128B HvxQR:$src1, IntRegs:$src2),
+         (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vxor HvxVR:$src1, HvxVR:$src2),
+         (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vxor_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhw HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb4w HvxVR:$src1, HvxVR:$src2),
+         (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb4w_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffob HvxVR:$src1, HvxVR:$src2),
+         (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffob_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamtw HvxVR:$src1),
+         (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamtw_128B HvxVR:$src1),
+         (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackuh HvxVR:$src1),
+         (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackuh_128B HvxVR:$src1),
+         (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackoh HvxWR:$src1, HvxVR:$src2),
+         (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackoh_128B HvxWR:$src1, HvxVR:$src2),
+         (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhss HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhss_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_hi HvxWR:$src1),
+         (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_hi_128B HvxWR:$src1),
+         (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw HvxVR:$src1, HvxVR:$src2),
+         (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh HvxWR:$src1, IntRegs:$src2),
+         (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubw HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubw_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubw_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus HvxVR:$src1, IntRegs:$src2),
+         (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+         (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+         (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1),
+         (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1),
+         (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrw HvxVR:$src1, IntRegs:$src2),
+         (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrw_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh HvxVR:$src1),
+         (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_128B HvxVR:$src1),
+         (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrh HvxVR:$src1, IntRegs:$src2),
+         (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrh_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackoh HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackoh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcombine HvxVR:$src1, HvxVR:$src2),
+         (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcombine_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+         (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+         (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslhv HvxVR:$src1, HvxVR:$src2),
+         (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslhv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vinsertwr HvxVR:$src1, IntRegs:$src2),
+         (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vinsertwr_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffb HvxVR:$src1),
+         (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffb_128B HvxVR:$src1),
+         (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vand HvxVR:$src1, HvxVR:$src2),
+         (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vand_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub HvxVR:$src1, IntRegs:$src2),
+         (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0h HvxVR:$src1),
+         (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0h_128B HvxVR:$src1),
+         (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+         (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+         (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufeh HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufeh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ld0 IntRegs:$src1),
+         (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ld0_128B IntRegs:$src1),
+         (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpopcounth HvxVR:$src1),
+         (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpopcounth_128B HvxVR:$src1),
+         (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnt0 IntRegs:$src1),
+         (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnt0_128B IntRegs:$src1),
+         (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeh HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminh HvxVR:$src1, HvxVR:$src2),
+         (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2 IntRegs:$src1),
+         (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2_128B IntRegs:$src1),
+         (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealh HvxVR:$src1),
+         (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealh_128B HvxVR:$src1),
+         (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh HvxVR:$src1, IntRegs:$src2),
+         (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vor HvxVR:$src1, HvxVR:$src2),
+         (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vor_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiowh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiowh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+         (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+         (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt HvxVR:$src1, IntRegs:$src2),
+         (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw HvxVR:$src1, HvxVR:$src2),
+         (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0w HvxVR:$src1),
+         (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0w_128B HvxVR:$src1),
+         (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus HvxWR:$src1, IntRegs:$src2),
+         (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vd0 ),
+         (V6_vd0 )>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+         (V6_vd0 )>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc_128B HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassign HvxVR:$src1),
+         (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassign_128B HvxVR:$src1),
+         (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackb HvxVR:$src1),
+         (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackb_128B HvxVR:$src1),
+         (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackh HvxVR:$src1),
+         (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackh_128B HvxVR:$src1),
+         (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2),
+         (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrdelta HvxVR:$src1, HvxVR:$src2),
+         (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrdelta_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwh HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffeb HvxVR:$src1, HvxVR:$src2),
+         (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffeb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth HvxVR:$src1, HvxVR:$src2),
+         (V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnot HvxVR:$src1),
+         (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnot_128B HvxVR:$src1),
+         (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxw HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslwv HvxVR:$src1, HvxVR:$src2),
+         (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslwv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat HvxVR:$src1),
+         (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat_128B HvxVR:$src1),
+         (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhub HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassignp HvxWR:$src1),
+         (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassignp_128B HvxWR:$src1),
+         (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb HvxVR:$src1, HvxVR:$src2),
+         (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububh HvxVR:$src1, HvxVR:$src2),
+         (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatw IntRegs:$src1),
+         (V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatw_128B IntRegs:$src1),
+         (V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_not HvxQR:$src1),
+         (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_not_128B HvxQR:$src1),
+         (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+         (V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+         (V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgw HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv HvxVR:$src1, HvxVR:$src2),
+         (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+         (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3),
+         (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2),
+         (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_xor HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_xor_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+         (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4),
+         (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2),
+         (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh HvxVR:$src1, IntRegs:$src2),
+         (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+
+// V62 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+         (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+         (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbh HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqh HvxQR:$src1, HvxQR:$src2),
+         (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqh_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqw HvxQR:$src1, HvxQR:$src2),
+         (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqw_128B HvxQR:$src1, HvxQR:$src2),
+         (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+         (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+         (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminb HvxVR:$src1, HvxVR:$src2),
+         (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2),
+         (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+         (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+         (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldpnt0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldpnt0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvnqv HvxQR:$src1, HvxVR:$src2),
+         (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvnqv_128B HvxQR:$src1, HvxVR:$src2),
+         (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatb IntRegs:$src1),
+         (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatb_128B IntRegs:$src1),
+         (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplath IntRegs:$src1),
+         (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplath_128B IntRegs:$src1),
+         (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtpnt0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtnp0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtnp0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduhub HvxVR:$src1, HvxVR:$src2),
+         (V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduhub_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcp0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcp0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldtnpnt0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandnqrt HvxQR:$src1, IntRegs:$src2),
+         (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_128B HvxQR:$src1, IntRegs:$src2),
+         (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxb HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvqv HvxQR:$src1, HvxVR:$src2),
+         (V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvqv_128B HvxQR:$src1, HvxVR:$src2),
+         (V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+         (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+         (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+         (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldnp0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldnp0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh HvxVR:$src1, HvxVR:$src2),
+         (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2v2 IntRegs:$src1),
+         (V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2v2_128B IntRegs:$src1),
+         (V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldp0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldp0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbw HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcpnt0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2),
+         (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+         (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2),
+         (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+         (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4),
+         (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+
+// V65 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2),
+         (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
+         (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2),
+         (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2),
+         (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+         (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2),
+         (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
+         (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2),
+         (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+         (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+         (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+         (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1),
+         (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1),
+         (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1),
+         (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1),
+         (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1),
+         (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1),
+         (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1),
+         (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1),
+         (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdd0 ),
+         (V6_vdd0 )>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B ),
+         (V6_vdd0 )>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2),
+         (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1),
+         (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1),
+         (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+
+// V66 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+         (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarrysat_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+         (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasr_into_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatdw HvxVR:$src1, HvxVR:$src2),
+         (V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatdw_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrotr HvxVR:$src1, HvxVR:$src2),
+         (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrotr_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
index 03c504ff0b08..b3132d41b903 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepMappings.td
@@ -1,4 +1,4 @@
-//===- HexagonDepMappings.td ----------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,7 +9,6 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
 def A2_negAlias : InstAlias<"$Rd32 = neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>;
 def A2_notAlias : InstAlias<"$Rd32 = not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>;
 def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32 = $Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
@@ -252,6 +251,7 @@ def V6_vaslhv_altAlias : InstAlias<"$Vd32 = vaslh($Vu32,$Vv32)", (V6_vaslhv HvxV
 def V6_vaslw_acc_altAlias : InstAlias<"$Vx32 += vaslw($Vu32,$Rt32)", (V6_vaslw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
 def V6_vaslw_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Rt32)", (V6_vaslw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
 def V6_vaslwv_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Vv32)", (V6_vaslwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasr_into_altAlias : InstAlias<"$Vxx32 = vasrinto($Vu32,$Vv32)", (V6_vasr_into HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_vasrh_acc_altAlias : InstAlias<"$Vx32 += vasrh($Vu32,$Rt32)", (V6_vasrh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
 def V6_vasrh_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Rt32)", (V6_vasrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
 def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
@@ -402,6 +402,7 @@ def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
 def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
 def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32 += vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_vrmpyubv_altAlias : InstAlias<"$Vd32 = vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrotr_altAlias : InstAlias<"$Vd32 = vrotr($Vu32,$Vv32)", (V6_vrotr HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_vroundhb_altAlias : InstAlias<"$Vd32 = vroundhb($Vu32,$Vv32):sat", (V6_vroundhb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_vroundhub_altAlias : InstAlias<"$Vd32 = vroundhub($Vu32,$Vv32):sat", (V6_vroundhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_vrounduhub_altAlias : InstAlias<"$Vd32 = vrounduhub($Vu32,$Vv32):sat", (V6_vrounduhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
@@ -473,4 +474,6 @@ def V6_vunpackub_altAlias : InstAlias<"$Vdd32 = vunpackub($Vu32)", (V6_vunpackub
 def V6_vunpackuh_altAlias : InstAlias<"$Vdd32 = vunpackuh($Vu32)", (V6_vunpackuh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
 def V6_vzb_altAlias : InstAlias<"$Vdd32 = vzxtb($Vu32)", (V6_vzb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
 def V6_vzh_altAlias : InstAlias<"$Vdd32 = vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
+def V6_zld0Alias : InstAlias<"z = vmem($Rt32)", (V6_zLd_ai IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_zldp0Alias : InstAlias<"if ($Pv4) z = vmem($Rt32)", (V6_zLd_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
 def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
index 9d960953f8f5..ef2d4fa45702 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonDepOperands.td ----------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,16 +9,12 @@
 // Automatically generated file, please consult code owner before editing.
 //===----------------------------------------------------------------------===//
 
-
 def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
 def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
 def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
 def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
 def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
 def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
-def s10_6ImmOperand : AsmOperandClass { let Name = "s10_6Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s10_6Imm : Operand<i32> { let ParserMatchClass = s10_6ImmOperand; let DecoderMethod = "s10_6ImmDecoder"; }
-def s10_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 6>(N->getSExtValue());}]>;
 def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
 def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
 def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
@@ -130,6 +126,3 @@ def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtVal
 def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
 def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
 def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
-def s10_0ImmOperand : AsmOperandClass { let Name = "s10_0Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s10_0Imm : Operand<i32> { let ParserMatchClass = s10_0ImmOperand; let DecoderMethod = "s10_0ImmDecoder"; }
-def s10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 0>(N->getSExtValue());}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h b/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
index 656c83f2d0c4..0fd55e8b7997 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -1,4 +1,4 @@
-//===- HexagonDepTimingClasses.h ------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,7 +10,6 @@
 //===----------------------------------------------------------------------===//
 
 
-
 #ifndef TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
 #define TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
 
@@ -20,19 +19,25 @@ namespace llvm {
 
 inline bool is_TC3x(unsigned SchedClass) {
   switch (SchedClass) {
-  case Hexagon::Sched::tc_16d0d8d5:
-  case Hexagon::Sched::tc_1853ea6d:
-  case Hexagon::Sched::tc_60571023:
-  case Hexagon::Sched::tc_7934b9df:
-  case Hexagon::Sched::tc_8fd5f294:
-  case Hexagon::Sched::tc_b9c0b731:
-  case Hexagon::Sched::tc_bcc96cee:
-  case Hexagon::Sched::tc_c6ce9b3f:
-  case Hexagon::Sched::tc_c6ebf8dd:
-  case Hexagon::Sched::tc_c82dc1ff:
-  case Hexagon::Sched::tc_caaebcba:
-  case Hexagon::Sched::tc_cf59f215:
-  case Hexagon::Sched::tc_e913dc32:
+  case Hexagon::Sched::tc_05d3a09b:
+  case Hexagon::Sched::tc_0d8f5752:
+  case Hexagon::Sched::tc_13bfbcf9:
+  case Hexagon::Sched::tc_174516e8:
+  case Hexagon::Sched::tc_1a2fd869:
+  case Hexagon::Sched::tc_1c4528a2:
+  case Hexagon::Sched::tc_32779c6f:
+  case Hexagon::Sched::tc_5b54b33f:
+  case Hexagon::Sched::tc_6b25e783:
+  case Hexagon::Sched::tc_76851da1:
+  case Hexagon::Sched::tc_9debc299:
+  case Hexagon::Sched::tc_a9d88b22:
+  case Hexagon::Sched::tc_bafaade3:
+  case Hexagon::Sched::tc_bcf98408:
+  case Hexagon::Sched::tc_bdceeac1:
+  case Hexagon::Sched::tc_c8ce0b5c:
+  case Hexagon::Sched::tc_d1aa9eaa:
+  case Hexagon::Sched::tc_d773585a:
+  case Hexagon::Sched::tc_df3319ed:
     return true;
   default:
     return false;
@@ -41,8 +46,8 @@ inline bool is_TC3x(unsigned SchedClass) {
 
 inline bool is_TC2early(unsigned SchedClass) {
   switch (SchedClass) {
-  case Hexagon::Sched::tc_14cd4cfa:
-  case Hexagon::Sched::tc_2a160009:
+  case Hexagon::Sched::tc_b4407292:
+  case Hexagon::Sched::tc_fc3999b4:
     return true;
   default:
     return false;
@@ -51,12 +56,13 @@ inline bool is_TC2early(unsigned SchedClass) {
 
 inline bool is_TC4x(unsigned SchedClass) {
   switch (SchedClass) {
-  case Hexagon::Sched::tc_038a1342:
-  case Hexagon::Sched::tc_4d99bca9:
-  case Hexagon::Sched::tc_6792d5ff:
-  case Hexagon::Sched::tc_9c00ce8d:
-  case Hexagon::Sched::tc_d580173f:
-  case Hexagon::Sched::tc_f3eaa14b:
+  case Hexagon::Sched::tc_2f7c551d:
+  case Hexagon::Sched::tc_2ff964b4:
+  case Hexagon::Sched::tc_3a867367:
+  case Hexagon::Sched::tc_3b470976:
+  case Hexagon::Sched::tc_4560740b:
+  case Hexagon::Sched::tc_a58fd5cc:
+  case Hexagon::Sched::tc_b8bffe55:
     return true;
   default:
     return false;
@@ -65,23 +71,27 @@ inline bool is_TC4x(unsigned SchedClass) {
 
 inline bool is_TC2(unsigned SchedClass) {
   switch (SchedClass) {
-  case Hexagon::Sched::tc_00afc57e:
-  case Hexagon::Sched::tc_1b9c9ee5:
-  case Hexagon::Sched::tc_234a11a5:
-  case Hexagon::Sched::tc_2b6f77c6:
-  case Hexagon::Sched::tc_41d5298e:
-  case Hexagon::Sched::tc_5ba5997d:
-  case Hexagon::Sched::tc_84df2cd3:
-  case Hexagon::Sched::tc_87735c3b:
-  case Hexagon::Sched::tc_897d1a9d:
-  case Hexagon::Sched::tc_976ddc4f:
-  case Hexagon::Sched::tc_b44c6e2a:
-  case Hexagon::Sched::tc_b9c4623f:
-  case Hexagon::Sched::tc_c2f7d806:
-  case Hexagon::Sched::tc_c74f796f:
-  case Hexagon::Sched::tc_d088982c:
-  case Hexagon::Sched::tc_ef84f62f:
-  case Hexagon::Sched::tc_f49e76f4:
+  case Hexagon::Sched::tc_002cb246:
+  case Hexagon::Sched::tc_14b5c689:
+  case Hexagon::Sched::tc_1c80410a:
+  case Hexagon::Sched::tc_4414d8b1:
+  case Hexagon::Sched::tc_6132ba3d:
+  case Hexagon::Sched::tc_61830035:
+  case Hexagon::Sched::tc_679309b8:
+  case Hexagon::Sched::tc_703e822c:
+  case Hexagon::Sched::tc_779080bf:
+  case Hexagon::Sched::tc_784490da:
+  case Hexagon::Sched::tc_88b4f13d:
+  case Hexagon::Sched::tc_9461ff31:
+  case Hexagon::Sched::tc_9e313203:
+  case Hexagon::Sched::tc_a813cf9a:
+  case Hexagon::Sched::tc_bfec0f01:
+  case Hexagon::Sched::tc_cf8126ae:
+  case Hexagon::Sched::tc_d08ee0f4:
+  case Hexagon::Sched::tc_e4a7f9f0:
+  case Hexagon::Sched::tc_f429765c:
+  case Hexagon::Sched::tc_f675fee8:
+  case Hexagon::Sched::tc_f9058dd7:
     return true;
   default:
     return false;
@@ -90,45 +100,43 @@ inline bool is_TC2(unsigned SchedClass) {
 
 inline bool is_TC1(unsigned SchedClass) {
   switch (SchedClass) {
-  case Hexagon::Sched::tc_181af5d0:
-  case Hexagon::Sched::tc_1b82a277:
-  case Hexagon::Sched::tc_1e856f58:
-  case Hexagon::Sched::tc_351fed2d:
-  case Hexagon::Sched::tc_3669266a:
-  case Hexagon::Sched::tc_3cb8ea06:
-  case Hexagon::Sched::tc_452f85af:
-  case Hexagon::Sched::tc_481e5e5c:
-  case Hexagon::Sched::tc_49eb22c8:
-  case Hexagon::Sched::tc_523fcf30:
-  case Hexagon::Sched::tc_52d7bbea:
-  case Hexagon::Sched::tc_53bc8a6a:
-  case Hexagon::Sched::tc_540fdfbc:
-  case Hexagon::Sched::tc_55050d58:
-  case Hexagon::Sched::tc_609d2efe:
-  case Hexagon::Sched::tc_68cb12ce:
-  case Hexagon::Sched::tc_6ebb4a12:
-  case Hexagon::Sched::tc_6efc556e:
-  case Hexagon::Sched::tc_73043bf4:
-  case Hexagon::Sched::tc_7a830544:
-  case Hexagon::Sched::tc_855b0b61:
-  case Hexagon::Sched::tc_8fe6b782:
-  case Hexagon::Sched::tc_90f3e30c:
-  case Hexagon::Sched::tc_97743097:
-  case Hexagon::Sched::tc_99be14ca:
-  case Hexagon::Sched::tc_9faf76ae:
-  case Hexagon::Sched::tc_a46f0df5:
-  case Hexagon::Sched::tc_a904d137:
-  case Hexagon::Sched::tc_b9488031:
-  case Hexagon::Sched::tc_be706f30:
-  case Hexagon::Sched::tc_c6aa82f7:
-  case Hexagon::Sched::tc_cde8b071:
-  case Hexagon::Sched::tc_d6bf0472:
-  case Hexagon::Sched::tc_dbdffe3d:
-  case Hexagon::Sched::tc_e0739b8c:
-  case Hexagon::Sched::tc_e1e99bfa:
-  case Hexagon::Sched::tc_e9fae2d6:
-  case Hexagon::Sched::tc_f2704b9a:
-  case Hexagon::Sched::tc_f8eeed7a:
+  case Hexagon::Sched::tc_0663f615:
+  case Hexagon::Sched::tc_0a705168:
+  case Hexagon::Sched::tc_0ae0825c:
+  case Hexagon::Sched::tc_1b6f7cec:
+  case Hexagon::Sched::tc_1fc97744:
+  case Hexagon::Sched::tc_20cdee80:
+  case Hexagon::Sched::tc_2332b92e:
+  case Hexagon::Sched::tc_2eabeebe:
+  case Hexagon::Sched::tc_3d495a39:
+  case Hexagon::Sched::tc_4c5ba658:
+  case Hexagon::Sched::tc_56336eb0:
+  case Hexagon::Sched::tc_56f114f4:
+  case Hexagon::Sched::tc_57890846:
+  case Hexagon::Sched::tc_5a2711e5:
+  case Hexagon::Sched::tc_5b7c0967:
+  case Hexagon::Sched::tc_640086b5:
+  case Hexagon::Sched::tc_643b4717:
+  case Hexagon::Sched::tc_85c9c08f:
+  case Hexagon::Sched::tc_85d5d03f:
+  case Hexagon::Sched::tc_862b3e70:
+  case Hexagon::Sched::tc_946df596:
+  case Hexagon::Sched::tc_9c3ecd83:
+  case Hexagon::Sched::tc_9fc3dae0:
+  case Hexagon::Sched::tc_a1123dda:
+  case Hexagon::Sched::tc_a1c00888:
+  case Hexagon::Sched::tc_ae53734a:
+  case Hexagon::Sched::tc_b31c2e97:
+  case Hexagon::Sched::tc_b4b5c03a:
+  case Hexagon::Sched::tc_b51dc29a:
+  case Hexagon::Sched::tc_cd374165:
+  case Hexagon::Sched::tc_cfd8378a:
+  case Hexagon::Sched::tc_d5b7b0c1:
+  case Hexagon::Sched::tc_d9d43ecb:
+  case Hexagon::Sched::tc_db2bce9c:
+  case Hexagon::Sched::tc_de4df740:
+  case Hexagon::Sched::tc_de554571:
+  case Hexagon::Sched::tc_e78647bd:
     return true;
   default:
     return false;
@@ -136,4 +144,4 @@ inline bool is_TC1(unsigned SchedClass) {
 }
 } // namespace llvm
 
-#endif
+#endif
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 557e6384be6a..8e2f5093038e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -731,9 +731,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
       MIB.add(MO);
 
     // Set memory references.
-    MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
-    MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-    MIB.setMemRefs(MMOBegin, MMOEnd);
+    MIB.cloneMemRefs(*MI);
 
     MI->eraseFromParent();
     return;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 7e774674e0c0..1a762c0c9de7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -891,14 +891,7 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
       MB.add(MO);
     Ox++;
   }
-
-  MachineFunction &MF = *B.getParent();
-  MachineInstr::mmo_iterator I = MI.memoperands_begin();
-  unsigned NR = std::distance(I, MI.memoperands_end());
-  MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
-  for (unsigned i = 0; i < NR; ++i)
-    MemRefs[i] = *I++;
-  MB.setMemRefs(MemRefs, MemRefs+NR);
+  MB.cloneMemRefs(MI);
 
   MachineInstr *NewI = MB;
   NewI->clearKillInfo();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 97b02e2b34cb..f5736546a87c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -550,6 +550,37 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
   }
 }
 
+/// Returns true if the target can safely skip saving callee-saved registers
+/// for noreturn nounwind functions.
+bool HexagonFrameLowering::enableCalleeSaveSkip(
+    const MachineFunction &MF) const {
+  const auto &F = MF.getFunction();
+  assert(F.hasFnAttribute(Attribute::NoReturn) &&
+         F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+         !F.getFunction().hasFnAttribute(Attribute::UWTable));
+  (void)F;
+
+  // No need to save callee saved registers if the function does not return.
+  return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
+}
+
+// Helper function used to determine when to eliminate the stack frame for
+// functions marked as noreturn and when the noreturn-stack-elim options are
+// specified. When both these conditions are true, then a FP may not be needed
+// if the function makes a call. It is very similar to enableCalleeSaveSkip,
+// but it used to check if the allocframe can be eliminated as well.
+static bool enableAllocFrameElim(const MachineFunction &MF) {
+  const auto &F = MF.getFunction();
+  const auto &MFI = MF.getFrameInfo();
+  const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  assert(!MFI.hasVarSizedObjects() &&
+         !HST.getRegisterInfo()->needsStackRealignment(MF));
+  return F.hasFnAttribute(Attribute::NoReturn) &&
+    F.hasFnAttribute(Attribute::NoUnwind) &&
+    !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
+    MFI.getStackSize() == 0;
+}
+
 void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
       bool PrologueStubs) const {
   MachineFunction &MF = *MBB.getParent();
@@ -994,7 +1025,7 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
   }
 
   const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
-  if (MFI.hasCalls() || HMFI.hasClobberLR())
+  if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
     return true;
 
   return false;
@@ -1266,7 +1297,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
 
     // Call spill function.
     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
-                                  : MBB.getLastNonDebugInstr()->getDebugLoc();
+                                  : MBB.findDebugLoc(MBB.end());
     MachineInstr *DeallocCall = nullptr;
 
     if (HasTC) {
@@ -1579,10 +1610,10 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
 
   // S2_storeri_io FI, 0, TmpR
   BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
-    .addFrameIndex(FI)
-    .addImm(0)
-    .addReg(TmpR, RegState::Kill)
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addReg(TmpR, RegState::Kill)
+      .cloneMemRefs(*MI);
 
   NewRegs.push_back(TmpR);
   B.erase(It);
@@ -1604,9 +1635,9 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
   // TmpR = L2_loadri_io FI, 0
   unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
-    .addFrameIndex(FI)
-    .addImm(0)
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addFrameIndex(FI)
+      .addImm(0)
+      .cloneMemRefs(*MI);
 
   // DstR = C2_tfrrp TmpR   if DstR is a predicate register
   // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
@@ -1708,7 +1739,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
   // register that is entirely undefined.
   LivePhysRegs LPR(HRI);
   LPR.addLiveIns(B);
-  SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
+  SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
   for (auto R = B.begin(); R != It; ++R) {
     Clobbers.clear();
     LPR.stepForward(*R, Clobbers);
@@ -1731,10 +1762,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
     StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
                                      : Hexagon::V6_vS32Ub_ai;
     BuildMI(B, It, DL, HII.get(StoreOpc))
-      .addFrameIndex(FI)
-      .addImm(0)
-      .addReg(SrcLo, getKillRegState(IsKill))
-      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+        .addFrameIndex(FI)
+        .addImm(0)
+        .addReg(SrcLo, getKillRegState(IsKill))
+        .cloneMemRefs(*MI);
   }
 
   // Store high part.
@@ -1742,10 +1773,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
     StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
                                                      : Hexagon::V6_vS32Ub_ai;
     BuildMI(B, It, DL, HII.get(StoreOpc))
-      .addFrameIndex(FI)
-      .addImm(Size)
-      .addReg(SrcHi, getKillRegState(IsKill))
-      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+        .addFrameIndex(FI)
+        .addImm(Size)
+        .addReg(SrcHi, getKillRegState(IsKill))
+        .cloneMemRefs(*MI);
   }
 
   B.erase(It);
@@ -1777,17 +1808,17 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
   LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
                                   : Hexagon::V6_vL32Ub_ai;
   BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
-    .addFrameIndex(FI)
-    .addImm(0)
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addFrameIndex(FI)
+      .addImm(0)
+      .cloneMemRefs(*MI);
 
   // Load high part.
   LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
                                                   : Hexagon::V6_vL32Ub_ai;
   BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
-    .addFrameIndex(FI)
-    .addImm(Size)
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addFrameIndex(FI)
+      .addImm(Size)
+      .cloneMemRefs(*MI);
 
   B.erase(It);
   return true;
@@ -1813,10 +1844,10 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
   unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
                                             : Hexagon::V6_vS32Ub_ai;
   BuildMI(B, It, DL, HII.get(StoreOpc))
-    .addFrameIndex(FI)
-    .addImm(0)
-    .addReg(SrcR, getKillRegState(IsKill))
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addReg(SrcR, getKillRegState(IsKill))
+      .cloneMemRefs(*MI);
 
   B.erase(It);
   return true;
@@ -1841,9 +1872,9 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
   unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
                                            : Hexagon::V6_vL32Ub_ai;
   BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
-    .addFrameIndex(FI)
-    .addImm(0)
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addFrameIndex(FI)
+      .addImm(0)
+      .cloneMemRefs(*MI);
 
   B.erase(It);
   return true;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 988718860c5b..d65d870750f8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -41,6 +41,8 @@ public:
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
       override {}
 
+  bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
       const TargetRegisterInfo *TRI) const override {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp
deleted file mode 100644
index 63ec9c3d3124..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGatherPacketize.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- HexagonGatherPacketize.cpp -----------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass ensures that producer and consumer of VTMP are paired in a bundle.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "gather-packetize"
-
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-cl::opt<bool> EnableGatherPacketize(
-    "hexagon-enable-gather-packetize", cl::Hidden, cl::init(true),
-    cl::desc("Generate gather packets before packetization"));
-
-namespace llvm {
-FunctionPass *createHexagonGatherPacketize();
-void initializeHexagonGatherPacketizePass(PassRegistry &);
-}
-
-namespace {
-class HexagonGatherPacketize : public MachineFunctionPass {
-public:
-  static char ID;
-  HexagonGatherPacketize() : MachineFunctionPass(ID) {
-    PassRegistry &Registry = *PassRegistry::getPassRegistry();
-    initializeHexagonGatherPacketizePass(Registry);
-  }
-
-  StringRef getPassName() const override {
-    return "Hexagon Gather Packetize Code";
-  }
-  bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-char HexagonGatherPacketize::ID = 0;
-
-static inline bool isVtmpDef(const MachineInstr &MI) {
-  for (const MachineOperand &MO : MI.operands())
-    if (MO.isReg() && MO.isDef() && MO.isImplicit() &&
-        (MO.getReg() == Hexagon::VTMP)) {
-      return true;
-    }
-  return false;
-}
-
-static inline bool isVtmpUse(const MachineInstr &MI) {
-  return (MI.mayStore() && (MI.getOperand(2)).isReg() &&
-          ((MI.getOperand(2)).getReg() == Hexagon::VTMP));
-}
-
-bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) {
-  if (!EnableGatherPacketize)
-    return false;
-  auto &ST = Fn.getSubtarget<HexagonSubtarget>();
-  bool HasV65 = ST.hasV65Ops();
-  bool UseHVX = ST.useHVXOps();
-  if (!(HasV65 & UseHVX))
-    return false;
-
-  for (auto &MBB : Fn) {
-    bool VtmpDef = false;
-    MachineBasicBlock::iterator MII, MIE, DefMII;
-    for (MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) {
-      MachineInstr &MI = *MII;
-      if (VtmpDef) {
-        if (!isVtmpUse(MI))
-          continue;
-        MBB.splice(std::next(DefMII), &MBB, MII);
-        finalizeBundle(MBB, DefMII.getInstrIterator(),
-                       std::next(MII).getInstrIterator());
-        VtmpDef = false;
-        continue;
-      }
-      if (!(isVtmpDef(MI)))
-        continue;
-      VtmpDef = true;
-      DefMII = MII;
-    }
-    assert(!VtmpDef && "VTMP producer and consumer not in same block");
-  }
-  return true;
-}
-}
-
-//===----------------------------------------------------------------------===//
-//                         Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-INITIALIZE_PASS(HexagonGatherPacketize, "hexagon-gather-packetize",
-                "Hexagon gather packetize Code", false, false)
-
-FunctionPass *llvm::createHexagonGatherPacketize() {
-  return new HexagonGatherPacketize();
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 2582a021e956..e3492e7374e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -632,7 +632,7 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
   SortableVectorType VRs;
   for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
     VRs.push_back(I->first);
-  llvm::sort(VRs.begin(), VRs.end(), LexCmp);
+  llvm::sort(VRs, LexCmp);
   // Transfer the results to the outgoing register ordering.
   for (unsigned i = 0, n = VRs.size(); i < n; ++i)
     RO.insert(std::make_pair(VRs[i], i));
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 0e33976a58ac..239cf49ca8a2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1011,10 +1011,9 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
 /// the use of the hardware loop instruction.
 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
     bool IsInnerHWLoop) const {
-  const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
-  LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
-  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = Blocks[i];
+  LLVM_DEBUG(dbgs() << "\nhw_loop head, "
+                    << printMBBReference(**L->block_begin()));
+  for (MachineBasicBlock *MBB : L->getBlocks()) {
     for (MachineBasicBlock::iterator
            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
       const MachineInstr *MI = &*MII;
@@ -1368,11 +1367,10 @@ bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
                                         const MachineOperand *MO,
                                         LoopFeederMap &LoopFeederPhi) const {
   if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
-    const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
-    LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
+    LLVM_DEBUG(dbgs() << "\nhw_loop head, "
+                      << printMBBReference(**L->block_begin()));
     // Ignore all BBs that form Loop.
-    for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
-      MachineBasicBlock *MBB = Blocks[i];
+    for (MachineBasicBlock *MBB : L->getBlocks()) {
       if (A == MBB)
         return false;
     }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index efb4c2eb0fc3..470b05bda4c6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -127,8 +127,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
   }
 
   SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = LD->getMemOperand();
+  MachineMemOperand *MemOp = LD->getMemOperand();
 
   auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
         -> MachineSDNode* {
@@ -159,7 +158,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
                                               MVT::i32, MVT::Other, Base,
                                               IncV, Chain);
-    L->setMemRefs(MemOp, MemOp+1);
+    CurDAG->setNodeMemRefs(L, {MemOp});
     To[1] = SDValue(L, 1); // Next address.
     To[2] = SDValue(L, 2); // Chain.
     // Handle special case for extension to i64.
@@ -170,7 +169,7 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
                                               Base, Zero, Chain);
-    L->setMemRefs(MemOp, MemOp+1);
+    CurDAG->setNodeMemRefs(L, {MemOp});
     To[2] = SDValue(L, 1); // Chain.
     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
                                               Base, IncV);
@@ -344,9 +343,8 @@ bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
         FLI->second, dl, RTys,
         {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
 
-    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-    MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
-    Res->setMemRefs(MemOp, MemOp + 1);
+    MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
+    CurDAG->setNodeMemRefs(Res, {MemOp});
 
     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
@@ -525,8 +523,7 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
   }
 
   SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = ST->getMemOperand();
+  MachineMemOperand *MemOp = ST->getMemOperand();
 
   //                  Next address   Chain
   SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
@@ -537,14 +534,14 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
     SDValue Ops[] = { Base, IncV, Value, Chain };
     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
                                               Ops);
-    S->setMemRefs(MemOp, MemOp + 1);
+    CurDAG->setNodeMemRefs(S, {MemOp});
     To[0] = SDValue(S, 0);
     To[1] = SDValue(S, 1);
   } else {
     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
     SDValue Ops[] = { Base, Zero, Value, Chain };
     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
-    S->setMemRefs(MemOp, MemOp + 1);
+    CurDAG->setNodeMemRefs(S, {MemOp});
     To[1] = SDValue(S, 0);
     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
                                               Base, IncV);
@@ -1550,6 +1547,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
         return true;
       }
     }
+    break;
   }
   default:
     break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 8aef9b4560d5..b796e442d4fa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -120,7 +120,7 @@ struct Coloring {
     return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
   }
 
-  void dump() const;
+  LLVM_DUMP_METHOD void dump() const;
 
 private:
   ArrayRef<Node> Order;
@@ -267,7 +267,7 @@ bool Coloring::color() {
   return true;
 }
 
-LLVM_DUMP_METHOD
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void Coloring::dump() const {
   dbgs() << "{ Order:   {";
   for (unsigned I = 0; I != Order.size(); ++I) {
@@ -309,6 +309,7 @@ void Coloring::dump() const {
     dbgs() << "    " << C.first << " -> " << ColorKindToName(C.second) << "\n";
   dbgs() << "  }\n}\n";
 }
+#endif
 
 namespace {
 // Base class of for reordering networks. They don't strictly need to be
@@ -651,6 +652,7 @@ struct OpRef {
     IndexBits = 28,
   };
 
+  LLVM_DUMP_METHOD
   void print(raw_ostream &OS, const SelectionDAG &G) const;
 
 private:
@@ -663,7 +665,7 @@ struct NodeTemplate {
   MVT Ty = MVT::Other;
   std::vector<OpRef> Ops;
 
-  void print(raw_ostream &OS, const SelectionDAG &G) const;
+  LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
 };
 
 struct ResultStack {
@@ -699,10 +701,12 @@ struct ResultStack {
 
   BaseType List;
 
+  LLVM_DUMP_METHOD
   void print(raw_ostream &OS, const SelectionDAG &G) const;
 };
 } // namespace
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
   if (isValue()) {
     OpV.getNode()->print(OS, &G);
@@ -752,6 +756,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
     OS << '\n';
   }
 }
+#endif
 
 namespace {
 struct ShuffleMask {
@@ -1327,6 +1332,32 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
   return vmuxp(Bytes, L, R, Results);
 }
 
+namespace {
+  struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
+    template <typename T>
+    Deleter(SelectionDAG &D, T &C)
+      : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
+                                                  C.erase(N);
+                                                }) {}
+  };
+
+  template <typename T>
+  struct NullifyingVector : public T {
+    DenseMap<SDNode*, SDNode**> Refs;
+    NullifyingVector(T &&V) : T(V) {
+      for (unsigned i = 0, e = T::size(); i != e; ++i) {
+        SDNode *&N = T::operator[](i);
+        Refs[N] = &N;
+      }
+    }
+    void erase(SDNode *N) {
+      auto F = Refs.find(N);
+      if (F != Refs.end())
+        *F->second = nullptr;
+    }
+  };
+}
+
 bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
                                    MVT ResTy, SDValue Va, SDValue Vb,
                                    SDNode *N) {
@@ -1337,10 +1368,30 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
   bool HavePairs = (2*HwLen == VecLen);
   MVT SingleTy = getSingleVT(MVT::i8);
 
+  // The prior attempts to handle this shuffle may have left a bunch of
+  // dead nodes in the DAG (such as constants). These nodes will be added
+  // at the end of DAG's node list, which at that point had already been
+  // sorted topologically. In the main selection loop, the node list is
+  // traversed backwards from the root node, which means that any new
+  // nodes (from the end of the list) will not be visited.
+  // Scalarization will replace the shuffle node with the scalarized
+  // expression, and if that expression reused any if the leftoever (dead)
+  // nodes, these nodes would not be selected (since the "local" selection
+  // only visits nodes that are not in AllNodes).
+  // To avoid this issue, remove all dead nodes from the DAG now.
+  DAG.RemoveDeadNodes();
+  DenseSet<SDNode*> AllNodes;
+  for (SDNode &S : DAG.allnodes())
+    AllNodes.insert(&S);
+
+  Deleter DUA(DAG, AllNodes);
+
   SmallVector<SDValue,128> Ops;
+  LLVMContext &Ctx = *DAG.getContext();
+  MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
   for (int I : Mask) {
     if (I < 0) {
-      Ops.push_back(ISel.selectUndef(dl, ElemTy));
+      Ops.push_back(ISel.selectUndef(dl, LegalTy));
       continue;
     }
     SDValue Vec;
@@ -1360,7 +1411,7 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
       }
     }
     SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
-    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx});
+    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
     SDValue L = Lower.LowerOperation(Ex, DAG);
     assert(L.getNode());
     Ops.push_back(L);
@@ -1384,32 +1435,55 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
 
   assert(!N->use_empty());
   ISel.ReplaceNode(N, LV.getNode());
-  DAG.RemoveDeadNodes();
 
-  std::deque<SDNode*> SubNodes;
-  SubNodes.push_back(LV.getNode());
+  if (AllNodes.count(LV.getNode())) {
+    DAG.RemoveDeadNodes();
+    return true;
+  }
+
+  // The lowered build-vector node will now need to be selected. It needs
+  // to be done here because this node and its submodes are not included
+  // in the main selection loop.
+  // Implement essentially the same topological ordering algorithm as is
+  // used in SelectionDAGISel.
+
+  SetVector<SDNode*> SubNodes, TmpQ;
+  std::map<SDNode*,unsigned> NumOps;
+
+  SubNodes.insert(LV.getNode());
   for (unsigned I = 0; I != SubNodes.size(); ++I) {
-    for (SDValue Op : SubNodes[I]->ops())
-      SubNodes.push_back(Op.getNode());
+    unsigned OpN = 0;
+    SDNode *S = SubNodes[I];
+    for (SDValue Op : S->ops()) {
+      if (AllNodes.count(Op.getNode()))
+        continue;
+      SubNodes.insert(Op.getNode());
+      ++OpN;
+    }
+    NumOps.insert({S, OpN});
+    if (OpN == 0)
+      TmpQ.insert(S);
   }
-  while (!SubNodes.empty()) {
-    SDNode *S = SubNodes.front();
-    SubNodes.pop_front();
-    if (S->use_empty())
-      continue;
-    // This isn't great, but users need to be selected before any nodes that
-    // they use. (The reason is to match larger patterns, and avoid nodes that
-    // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
-    bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
-                         return llvm::any_of(SubNodes, [U](const SDNode *T) {
-                           return T == U;
-                         });
-                       });
-    if (PendingUser)
-      SubNodes.push_back(S);
-    else
-      ISel.Select(S);
+
+  for (unsigned I = 0; I != TmpQ.size(); ++I) {
+    SDNode *S = TmpQ[I];
+    for (SDNode *U : S->uses()) {
+      if (!SubNodes.count(U))
+        continue;
+      auto F = NumOps.find(U);
+      assert(F != NumOps.end());
+      assert(F->second > 0);
+      if (!--F->second)
+        TmpQ.insert(F->first);
+    }
   }
+  assert(SubNodes.size() == TmpQ.size());
+  NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
+
+  Deleter DUQ(DAG, Queue);
+  for (SDNode *S : reverse(Queue))
+    if (S != nullptr)
+      ISel.Select(S);
 
   DAG.RemoveDeadNodes();
   return true;
@@ -2048,10 +2122,6 @@ void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
 }
 
 void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
-  if (!HST->usePackets()) {
-    report_fatal_error("Support for gather requires packets, "
-                       "which are disabled");
-  }
   const SDLoc &dl(N);
   SDValue Chain = N->getOperand(0);
   SDValue Address = N->getOperand(2);
@@ -2083,18 +2153,13 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
   SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
   SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 
   ReplaceNode(N, Result);
 }
 
 void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
-  if (!HST->usePackets()) {
-    report_fatal_error("Support for gather requires packets, "
-                       "which are disabled");
-  }
   const SDLoc &dl(N);
   SDValue Chain = N->getOperand(0);
   SDValue Address = N->getOperand(2);
@@ -2125,9 +2190,8 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
   SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
   SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 
   ReplaceNode(N, Result);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 604d84994b6c..1edf3e498dfa 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -151,16 +152,6 @@ static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 #include "HexagonGenCallingConv.inc"
 
 
-void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
-  if (VT != PromotedLdStVT) {
-    setOperationAction(ISD::LOAD, VT, Promote);
-    AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
-
-    setOperationAction(ISD::STORE, VT, Promote);
-    AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
-  }
-}
-
 SDValue
 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
       const {
@@ -250,6 +241,18 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   return true;
 }
 
+unsigned  HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                              SelectionDAG &DAG) const {
+  // Just support r19, the linux kernel uses it.
+  unsigned Reg = StringSwitch<unsigned>(RegName)
+                     .Case("r19", Hexagon::R19)
+                     .Default(0);
+  if (Reg)
+    return Reg;
+
+  report_fatal_error("Invalid register name global variable");
+}
+
 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 /// appropriate copies out of appropriate physical registers.  This assumes that
 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
@@ -1225,7 +1228,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
                                              const HexagonSubtarget &ST)
     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
       Subtarget(ST) {
-  bool IsV4 = !Subtarget.hasV5Ops();
   auto &HRI = *Subtarget.getRegisterInfo();
 
   setPrefLoopAlignment(4);
@@ -1267,10 +1269,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
 
-  if (Subtarget.hasV5Ops()) {
-    addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
-    addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
-  }
+  addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
 
   //
   // Handling of scalar operations.
@@ -1284,21 +1284,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   // which default to "expand" for at least one type.
 
   // Misc operations.
-  setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
-  setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
-
-  setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
-  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
-  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
-  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
-  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
-  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
-  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+  setOperationAction(ISD::ConstantFP,           MVT::f32,   Legal);
+  setOperationAction(ISD::ConstantFP,           MVT::f64,   Legal);
+  setOperationAction(ISD::TRAP,                 MVT::Other, Legal);
+  setOperationAction(ISD::ConstantPool,         MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,            MVT::i32,   Custom);
+  setOperationAction(ISD::BUILD_PAIR,           MVT::i64,   Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG,    MVT::i1,    Expand);
+  setOperationAction(ISD::INLINEASM,            MVT::Other, Custom);
+  setOperationAction(ISD::PREFETCH,             MVT::Other, Custom);
+  setOperationAction(ISD::READCYCLECOUNTER,     MVT::i64,   Custom);
+  setOperationAction(ISD::INTRINSIC_VOID,       MVT::Other, Custom);
+  setOperationAction(ISD::EH_RETURN,            MVT::Other, Custom);
+  setOperationAction(ISD::GLOBAL_OFFSET_TABLE,  MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,     MVT::i32,   Custom);
+  setOperationAction(ISD::ATOMIC_FENCE,         MVT::Other, Custom);
 
   // Custom legalize GlobalAddress nodes into CONST32.
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -1348,8 +1348,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
 
-  // In V5, popcount can count # of 1s in i64 but returns i32.
-  // On V4 it will be expanded (set later).
+  // Popcount can count # of 1s in i64 but returns i32.
   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
@@ -1360,6 +1359,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
 
+  setOperationAction(ISD::FSHL, MVT::i32, Legal);
+  setOperationAction(ISD::FSHL, MVT::i64, Legal);
+  setOperationAction(ISD::FSHR, MVT::i32, Legal);
+  setOperationAction(ISD::FSHR, MVT::i64, Legal);
+
   for (unsigned IntExpOp :
        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
@@ -1403,12 +1407,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   // Handling of vector operations.
   //
 
-  promoteLdStType(MVT::v4i8,  MVT::i32);
-  promoteLdStType(MVT::v2i16, MVT::i32);
-  promoteLdStType(MVT::v8i8,  MVT::i64);
-  promoteLdStType(MVT::v4i16, MVT::i64);
-  promoteLdStType(MVT::v2i32, MVT::i64);
-
   // Set the action for vector operations to "expand", then override it with
   // either "custom" or "legal" for specific cases.
   static const unsigned VectExpOps[] = {
@@ -1488,9 +1486,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   }
 
   // Custom lower unaligned loads.
-  for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
-                    MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
-    setOperationAction(ISD::LOAD, VecVT, Custom);
+  // Also, for both loads and stores, verify the alignment of the address
+  // in case it is a compile-time constant. This is a usability feature to
+  // provide a meaningful error message to users.
+  for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
+                 MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
+    setOperationAction(ISD::LOAD,  VT, Custom);
+    setOperationAction(ISD::STORE, VT, Custom);
   }
 
   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
@@ -1508,63 +1510,27 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
 
-  // Subtarget-specific operation actions.
-  //
-  if (Subtarget.hasV60Ops()) {
-    setOperationAction(ISD::ROTL, MVT::i32, Custom);
-    setOperationAction(ISD::ROTL, MVT::i64, Custom);
-  }
-  if (Subtarget.hasV5Ops()) {
-    setOperationAction(ISD::FMA,  MVT::f64, Expand);
-    setOperationAction(ISD::FADD, MVT::f64, Expand);
-    setOperationAction(ISD::FSUB, MVT::f64, Expand);
-    setOperationAction(ISD::FMUL, MVT::f64, Expand);
-
-    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-
-    setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-  } else { // V4
-    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
-    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
-    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
-    setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
-    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
-
-    setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
-    setOperationAction(ISD::CTPOP, MVT::i16, Expand);
-    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
-    // Expand these operations for both f32 and f64:
-    for (unsigned FPExpOpV4 :
-         {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
-      setOperationAction(FPExpOpV4, MVT::f32, Expand);
-      setOperationAction(FPExpOpV4, MVT::f64, Expand);
-    }
-
-    for (ISD::CondCode FPExpCCV4 :
-         {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
-          ISD::SETUO,  ISD::SETO}) {
-      setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
-      setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
-    }
-  }
+  // V5+.
+  setOperationAction(ISD::FMA,  MVT::f64, Expand);
+  setOperationAction(ISD::FADD, MVT::f64, Expand);
+  setOperationAction(ISD::FSUB, MVT::f64, Expand);
+  setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
+  setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 
   // Handling of indexed loads/stores: default is "expand".
   //
@@ -1574,6 +1540,19 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
   }
 
+  // Subtarget-specific operation actions.
+  //
+  if (Subtarget.hasV60Ops()) {
+    setOperationAction(ISD::ROTL, MVT::i32, Legal);
+    setOperationAction(ISD::ROTL, MVT::i64, Legal);
+    setOperationAction(ISD::ROTR, MVT::i32, Legal);
+    setOperationAction(ISD::ROTR, MVT::i64, Legal);
+  }
+  if (Subtarget.hasV66Ops()) {
+    setOperationAction(ISD::FADD, MVT::f64, Legal);
+    setOperationAction(ISD::FSUB, MVT::f64, Legal);
+  }
+
   if (Subtarget.useHVXOps())
     initializeHVXLowering();
 
@@ -1600,42 +1579,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
 
-  if (IsV4) {
-    // Handle single-precision floating point operations on V4.
-    if (FastMath) {
-      setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
-      setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
-      setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
-      setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
-      setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
-      // Double-precision compares.
-      setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
-      setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
-    } else {
-      setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
-      setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
-      setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
-      setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
-      setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
-      // Double-precision compares.
-      setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
-      setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
-    }
-  }
-
   // This is the only fast library function for sqrtd.
   if (FastMath)
     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
 
   // Prefix is: nothing  for "slow-math",
-  //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
+  //            "fast2_" for V5+ fast-math double-precision
   // (actually, keep fast-math and fast-math2 separate for now)
   if (FastMath) {
     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
-    // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
   } else {
     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
@@ -1645,44 +1600,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
   }
 
-  if (Subtarget.hasV5Ops()) {
-    if (FastMath)
-      setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
-    else
-      setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
-  } else {
-    // V4
-    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
-    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
-    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
-    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
-    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
-    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
-    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
-    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
-    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
-    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
-    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
-    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
-    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
-    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
-    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
-    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
-    setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
-    setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
-    setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
-    setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
-    setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
-    setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
-    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
-    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
-    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
-    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
-    setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
-    setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
-    setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
-    setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
-  }
+  if (FastMath)
+    setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+  else
+    setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
 
   // These cause problems when the shift amount is non-constant.
   setLibcallName(RTLIB::SHL_I128, nullptr);
@@ -1738,6 +1659,26 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   return nullptr;
 }
 
+void
+HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+      unsigned NeedAlign) const {
+  auto *CA = dyn_cast<ConstantSDNode>(Ptr);
+  if (!CA)
+    return;
+  unsigned Addr = CA->getZExtValue();
+  unsigned HaveAlign = Addr != 0 ? 1u << countTrailingZeros(Addr) : NeedAlign;
+  if (HaveAlign < NeedAlign) {
+    std::string ErrMsg;
+    raw_string_ostream O(ErrMsg);
+    O << "Misaligned constant address: " << format_hex(Addr, 10)
+      << " has alignment " << HaveAlign
+      << ", but the memory access requires " << NeedAlign;
+    if (DebugLoc DL = dl.getDebugLoc())
+      DL.print(O << ", at ");
+    report_fatal_error(O.str());
+  }
+}
+
 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
 // intrinsic.
 static bool isBrevLdIntrinsic(const Value *Inst) {
@@ -1834,11 +1775,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     // The intrinsic function call is of the form { ElTy, i8* }
     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
     // should be derived from ElTy.
-    PointerType *PtrTy = I.getCalledFunction()
-                             ->getReturnType()
-                             ->getContainedType(0)
-                             ->getPointerTo();
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
+    Info.memVT = MVT::getVT(ElTy);
     llvm::Value *BasePtrVal = I.getOperand(0);
     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
     // The offset value comes through Modifier register. For now, assume the
@@ -1904,12 +1842,12 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
 }
 
 TargetLoweringBase::LegalizeTypeAction
-HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
+HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
   if (VT.getVectorNumElements() == 1)
     return TargetLoweringBase::TypeScalarizeVector;
 
   // Always widen vectors of i1.
-  MVT ElemTy = VT.getSimpleVT().getVectorElementType();
+  MVT ElemTy = VT.getVectorElementType();
   if (ElemTy == MVT::i1)
     return TargetLoweringBase::TypeWidenVector;
 
@@ -2341,8 +2279,9 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
     // position 0.
     assert(ty(IdxV) == MVT::i32);
+    unsigned VecRep = 8 / VecWidth;
     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
-                             DAG.getConstant(8*Scale, dl, MVT::i32));
+                             DAG.getConstant(8*VecRep, dl, MVT::i32));
     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
     while (Scale > 1) {
@@ -2643,12 +2582,37 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
 }
 
 SDValue
+HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
+  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+  unsigned ClaimAlign = LN->getAlignment();
+  validateConstPtrAlignment(LN->getBasePtr(), SDLoc(Op), ClaimAlign);
+  // Call LowerUnalignedLoad for all loads, it recognizes loads that
+  // don't need extra aligning.
+  return LowerUnalignedLoad(Op, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+  unsigned ClaimAlign = SN->getAlignment();
+  SDValue Ptr = SN->getBasePtr();
+  const SDLoc &dl(Op);
+  validateConstPtrAlignment(Ptr, dl, ClaimAlign);
+
+  MVT StoreTy = SN->getMemoryVT().getSimpleVT();
+  unsigned NeedAlign = Subtarget.getTypeAlignment(StoreTy);
+  if (ClaimAlign < NeedAlign)
+    return expandUnalignedStore(SN, DAG);
+  return Op;
+}
+
+SDValue
 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
       const {
   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
-  unsigned HaveAlign = LN->getAlignment();
   MVT LoadTy = ty(Op);
   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
+  unsigned HaveAlign = LN->getAlignment();
   if (HaveAlign >= NeedAlign)
     return Op;
 
@@ -2802,7 +2766,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
-    case ISD::LOAD:                 return LowerUnalignedLoad(Op, DAG);
+    case ISD::LOAD:                 return LowerLoad(Op, DAG);
+    case ISD::STORE:                return LowerStore(Op, DAG);
     case ISD::ADDCARRY:
     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
     case ISD::SRA:
@@ -2834,6 +2799,19 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 }
 
 void
+HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
+                                             SmallVectorImpl<SDValue> &Results,
+                                             SelectionDAG &DAG) const {
+  // We are only custom-lowering stores to verify the alignment of the
+  // address if it is a compile-time constant. Since a store can be modified
+  // during type-legalization (the value being stored may need legalization),
+  // return empty Results here to indicate that we don't really make any
+  // changes in the custom lowering.
+  if (N->getOpcode() != ISD::STORE)
+    return TargetLowering::LowerOperationWrapper(N, Results, DAG);
+}
+
+void
 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
                                           SmallVectorImpl<SDValue> &Results,
                                           SelectionDAG &DAG) const {
@@ -2946,7 +2924,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  return Subtarget.hasV5Ops();
+  return true;
 }
 
 /// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -3110,6 +3088,25 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
   return TargetLowering::findRepresentativeClass(TRI, VT);
 }
 
+bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
+      ISD::LoadExtType ExtTy, EVT NewVT) const {
+  // TODO: This may be worth removing. Check regression tests for diffs.
+  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
+    return false;
+
+  auto *L = cast<LoadSDNode>(Load);
+  std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
+  // Small-data object, do not shrink.
+  if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
+    return false;
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
+    auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
+    const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
+    return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
+  }
+  return true;
+}
+
 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       AtomicOrdering Ord) const {
   BasicBlock *BB = Builder.GetInsertBlock();
@@ -3154,9 +3151,12 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
 }
 
-bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
-      AtomicCmpXchgInst *AI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
+    AtomicCmpXchgInst *AI) const {
   const DataLayout &DL = AI->getModule()->getDataLayout();
   unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
-  return Size >= 4 && Size <= 8;
+  if (Size >= 4 && Size <= 8)
+    return AtomicExpansionKind::LLSC;
+  return AtomicExpansionKind::None;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3d94bd1ff6ed..265c37e6ae61 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -101,7 +101,6 @@ namespace HexagonISD {
 
     bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
         const;
-    void promoteLdStType(MVT VT, MVT PromotedLdStVT);
 
   public:
     explicit HexagonTargetLowering(const TargetMachine &TM,
@@ -142,10 +141,12 @@ namespace HexagonISD {
         unsigned DefinedValues) const override;
 
     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
-    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
         const override;
 
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+    void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                               SelectionDAG &DAG) const override;
     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                             SelectionDAG &DAG) const override;
 
@@ -164,6 +165,8 @@ namespace HexagonISD {
     SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
 
@@ -220,6 +223,9 @@ namespace HexagonISD {
 
     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
 
+    unsigned getRegisterByName(const char* RegName, EVT VT,
+                               SelectionDAG &DAG) const override;
+
     /// If a physical register, this returns the register that receives the
     /// exception address on entry to an EH pad.
     unsigned
@@ -298,6 +304,9 @@ namespace HexagonISD {
     SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
                                      const override;
 
+    bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+                               EVT NewVT) const override;
+
     // Handling of atomic RMW instructions.
     Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
         AtomicOrdering Ord) const override;
@@ -305,7 +314,8 @@ namespace HexagonISD {
         Value *Addr, AtomicOrdering Ord) const override;
     AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+    AtomicExpansionKind
+    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
 
     AtomicExpansionKind
     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
@@ -314,6 +324,9 @@ namespace HexagonISD {
 
   private:
     void initializeHVXLowering();
+    void validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
+                                   unsigned NeedAlign) const;
+
     std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const;
 
     bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 2566194ca9c6..a6400b5d8266 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -400,6 +400,76 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
                        MachinePointerInfo::getConstantPool(MF), Align);
   }
 
+  // A special case is a situation where the vector is built entirely from
+  // elements extracted from another vector. This could be done via a shuffle
+  // more efficiently, but typically, the size of the source vector will not
+  // match the size of the vector being built (which precludes the use of a
+  // shuffle directly).
+  // This only handles a single source vector, and the vector being built
+  // should be of a sub-vector type of the source vector type.
+  auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
+                                             SmallVectorImpl<int> &SrcIdx) {
+    SDValue Vec;
+    for (SDValue V : Values) {
+      if (isUndef(V)) {
+        SrcIdx.push_back(-1);
+        continue;
+      }
+      if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+        return false;
+      // All extracts should come from the same vector.
+      SDValue T = V.getOperand(0);
+      if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
+        return false;
+      Vec = T;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+      if (C == nullptr)
+        return false;
+      int I = C->getSExtValue();
+      assert(I >= 0 && "Negative element index");
+      SrcIdx.push_back(I);
+    }
+    SrcVec = Vec;
+    return true;
+  };
+
+  SmallVector<int,128> ExtIdx;
+  SDValue ExtVec;
+  if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
+    MVT ExtTy = ty(ExtVec);
+    unsigned ExtLen = ExtTy.getVectorNumElements();
+    if (ExtLen == VecLen || ExtLen == 2*VecLen) {
+      // Construct a new shuffle mask that will produce a vector with the same
+      // number of elements as the input vector, and such that the vector we
+      // want will be the initial subvector of it.
+      SmallVector<int,128> Mask;
+      BitVector Used(ExtLen);
+
+      for (int M : ExtIdx) {
+        Mask.push_back(M);
+        if (M >= 0)
+          Used.set(M);
+      }
+      // Fill the rest of the mask with the unused elements of ExtVec in hopes
+      // that it will result in a permutation of ExtVec's elements. It's still
+      // fine if it doesn't (e.g. if undefs are present, or elements are
+      // repeated), but permutations can always be done efficiently via vdelta
+      // and vrdelta.
+      for (unsigned I = 0; I != ExtLen; ++I) {
+        if (Mask.size() == ExtLen)
+          break;
+        if (!Used.test(I))
+          Mask.push_back(I);
+      }
+
+      SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
+                                       DAG.getUNDEF(ExtTy), Mask);
+      if (ExtLen == VecLen)
+        return S;
+      return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S);
+    }
+  }
+
   // Construct two halves in parallel, then or them together.
   assert(4*Words.size() == Subtarget.getVectorLength());
   SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
@@ -1356,7 +1426,8 @@ SDValue
 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
   // Sign- and zero-extends are legal.
   assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
-  return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op));
+  return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
+                     Op.getOperand(0));
 }
 
 SDValue
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 1bb3bc1ea31b..2236140d5dd7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -69,101 +69,101 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
 
   // Instruction type according to the ISA.
   IType Type = type;
-  let TSFlags{5-0} = Type.Value;
+  let TSFlags{6-0} = Type.Value;
 
   // Solo instructions, i.e., those that cannot be in a packet with others.
   bits<1> isSolo = 0;
-  let TSFlags{6} = isSolo;
+  let TSFlags{7} = isSolo;
   // Packed only with A or X-type instructions.
   bits<1> isSoloAX = 0;
-  let TSFlags{7} = isSoloAX;
+  let TSFlags{8} = isSoloAX;
   // Restricts slot 1 to ALU-only instructions.
   bits<1> isRestrictSlot1AOK = 0;
-  let TSFlags{8} = isRestrictSlot1AOK;
+  let TSFlags{9} = isRestrictSlot1AOK;
 
   // Predicated instructions.
   bits<1> isPredicated = 0;
-  let TSFlags{9} = isPredicated;
+  let TSFlags{10} = isPredicated;
   bits<1> isPredicatedFalse = 0;
-  let TSFlags{10} = isPredicatedFalse;
+  let TSFlags{11} = isPredicatedFalse;
   bits<1> isPredicatedNew = 0;
-  let TSFlags{11} = isPredicatedNew;
+  let TSFlags{12} = isPredicatedNew;
   bits<1> isPredicateLate = 0;
-  let TSFlags{12} = isPredicateLate; // Late predicate producer insn.
+  let TSFlags{13} = isPredicateLate; // Late predicate producer insn.
 
   // New-value insn helper fields.
   bits<1> isNewValue = 0;
-  let TSFlags{13} = isNewValue; // New-value consumer insn.
+  let TSFlags{14} = isNewValue; // New-value consumer insn.
   bits<1> hasNewValue = 0;
-  let TSFlags{14} = hasNewValue; // New-value producer insn.
+  let TSFlags{15} = hasNewValue; // New-value producer insn.
   bits<3> opNewValue = 0;
-  let TSFlags{17-15} = opNewValue; // New-value produced operand.
+  let TSFlags{18-16} = opNewValue; // New-value produced operand.
   bits<1> isNVStorable = 0;
-  let TSFlags{18} = isNVStorable; // Store that can become new-value store.
+  let TSFlags{19} = isNVStorable; // Store that can become new-value store.
   bits<1> isNVStore = 0;
-  let TSFlags{19} = isNVStore; // New-value store insn.
+  let TSFlags{20} = isNVStore; // New-value store insn.
   bits<1> isCVLoadable = 0;
-  let TSFlags{20} = isCVLoadable; // Load that can become cur-value load.
+  let TSFlags{21} = isCVLoadable; // Load that can become cur-value load.
   bits<1> isCVLoad = 0;
-  let TSFlags{21} = isCVLoad; // Cur-value load insn.
+  let TSFlags{22} = isCVLoad; // Cur-value load insn.
 
   // Immediate extender helper fields.
   bits<1> isExtendable = 0;
-  let TSFlags{22} = isExtendable; // Insn may be extended.
+  let TSFlags{23} = isExtendable; // Insn may be extended.
   bits<1> isExtended = 0;
-  let TSFlags{23} = isExtended; // Insn must be extended.
+  let TSFlags{24} = isExtended; // Insn must be extended.
   bits<3> opExtendable = 0;
-  let TSFlags{26-24} = opExtendable; // Which operand may be extended.
+  let TSFlags{27-25} = opExtendable; // Which operand may be extended.
   bits<1> isExtentSigned = 0;
-  let TSFlags{27} = isExtentSigned; // Signed or unsigned range.
+  let TSFlags{28} = isExtentSigned; // Signed or unsigned range.
   bits<5> opExtentBits = 0;
-  let TSFlags{32-28} = opExtentBits; //Number of bits of range before extending.
+  let TSFlags{33-29} = opExtentBits; //Number of bits of range before extending.
   bits<2> opExtentAlign = 0;
-  let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending.
+  let TSFlags{35-34} = opExtentAlign; // Alignment exponent before extending.
 
   bit cofMax1 = 0;
-  let TSFlags{35} = cofMax1;
+  let TSFlags{36} = cofMax1;
   bit cofRelax1 = 0;
-  let TSFlags{36} = cofRelax1;
+  let TSFlags{37} = cofRelax1;
   bit cofRelax2 = 0;
-  let TSFlags{37} = cofRelax2;
+  let TSFlags{38} = cofRelax2;
 
   bit isRestrictNoSlot1Store = 0;
-  let TSFlags{38} = isRestrictNoSlot1Store;
+  let TSFlags{39} = isRestrictNoSlot1Store;
 
   // Addressing mode for load/store instructions.
   AddrModeType addrMode = NoAddrMode;
-  let TSFlags{43-41} = addrMode.Value;
+  let TSFlags{44-42} = addrMode.Value;
 
   // Memory access size for mem access instructions (load/store)
   MemAccessSize accessSize = NoMemAccess;
-  let TSFlags{47-44} = accessSize.Value;
+  let TSFlags{48-45} = accessSize.Value;
 
   bits<1> isTaken = 0;
-  let TSFlags {48} = isTaken; // Branch prediction.
+  let TSFlags {49} = isTaken; // Branch prediction.
 
   bits<1> isFP = 0;
-  let TSFlags {49} = isFP; // Floating-point.
+  let TSFlags {50} = isFP; // Floating-point.
 
   bits<1> isSomeOK = 0;
-  let TSFlags {50} = isSomeOK; // Relax some grouping constraints.
+  let TSFlags {51} = isSomeOK; // Relax some grouping constraints.
 
   bits<1> hasNewValue2 = 0;
-  let TSFlags{51} = hasNewValue2; // Second New-value producer insn.
+  let TSFlags{52} = hasNewValue2; // Second New-value producer insn.
   bits<3> opNewValue2 = 0;
-  let TSFlags{54-52} = opNewValue2; // Second New-value produced operand.
+  let TSFlags{55-53} = opNewValue2; // Second New-value produced operand.
 
   bits<1> isAccumulator = 0;
-  let TSFlags{55} = isAccumulator;
+  let TSFlags{56} = isAccumulator;
 
   bits<1> prefersSlot3 = 0;
-  let TSFlags{56} = prefersSlot3; // Complex XU
+  let TSFlags{57} = prefersSlot3; // Complex XU
 
   bits<1> hasTmpDst = 0;
-  let TSFlags{59} = hasTmpDst;  // v65 : 'fake" register VTMP is set
+  let TSFlags{60} = hasTmpDst;  // v65 : 'fake" register VTMP is set
 
   bit CVINew = 0;
-  let TSFlags{61} = CVINew;
+  let TSFlags{62} = CVINew;
 
   // Fields used for relation models.
   bit isNonTemporal = 0;
@@ -194,8 +194,6 @@ class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
 //                         Instruction Classes Definitions +
 //===----------------------------------------------------------------------===//
 
-// LD Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
 let mayLoad = 1 in
 class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
@@ -205,9 +203,6 @@ class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
 
-// ST Instruction Class in V2/V3 can take SLOT0 only.
-// ST Instruction Class in V4    can take SLOT0 & SLOT1.
-// Definition of the instruction class CHANGED from V2/V3 to V4.
 let mayStore = 1 in
 class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
              string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
@@ -235,15 +230,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 //                         Instruction Classes Definitions -
 //===----------------------------------------------------------------------===//
 
-//===----------------------------------------------------------------------===//
-// V4 Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrFormatsV4.td"
-
-//===----------------------------------------------------------------------===//
-// V60+ Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
+include "HexagonInstrFormatsV5.td"
 include "HexagonInstrFormatsV60.td"
 include "HexagonInstrFormatsV65.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
index c5fa25995212..c8de5cbcc1e0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
@@ -1,4 +1,4 @@
-//==- HexagonInstrFormatsV4.td - Hexagon Instruction Formats --*- tablegen -==//
+//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file describes the Hexagon V4 instruction classes in TableGen format.
+// This file describes the Hexagon V5 instruction classes in TableGen format.
 //
 //===----------------------------------------------------------------------===//
 
@@ -49,39 +49,39 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
 
   // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
 
-  let TSFlags{5-0} = Type.Value;
+  let TSFlags{6-0} = Type.Value;
 
   // Predicated instructions.
   bits<1> isPredicated = 0;
-  let TSFlags{6} = isPredicated;
+  let TSFlags{7} = isPredicated;
   bits<1> isPredicatedFalse = 0;
-  let TSFlags{7} = isPredicatedFalse;
+  let TSFlags{8} = isPredicatedFalse;
   bits<1> isPredicatedNew = 0;
-  let TSFlags{8} = isPredicatedNew;
+  let TSFlags{9} = isPredicatedNew;
 
   // New-value insn helper fields.
   bits<1> isNewValue = 0;
-  let TSFlags{9} = isNewValue; // New-value consumer insn.
+  let TSFlags{10} = isNewValue; // New-value consumer insn.
   bits<1> hasNewValue = 0;
-  let TSFlags{10} = hasNewValue; // New-value producer insn.
+  let TSFlags{11} = hasNewValue; // New-value producer insn.
   bits<3> opNewValue = 0;
-  let TSFlags{13-11} = opNewValue; // New-value produced operand.
+  let TSFlags{14-12} = opNewValue; // New-value produced operand.
   bits<1> isNVStorable = 0;
-  let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+  let TSFlags{15} = isNVStorable; // Store that can become new-value store.
   bits<1> isNVStore = 0;
-  let TSFlags{15} = isNVStore; // New-value store insn.
+  let TSFlags{16} = isNVStore; // New-value store insn.
 
   // Immediate extender helper fields.
   bits<1> isExtendable = 0;
-  let TSFlags{16} = isExtendable; // Insn may be extended.
+  let TSFlags{17} = isExtendable; // Insn may be extended.
   bits<1> isExtended = 0;
-  let TSFlags{17} = isExtended; // Insn must be extended.
+  let TSFlags{18} = isExtended; // Insn must be extended.
   bits<3> opExtendable = 0;
-  let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+  let TSFlags{21-19} = opExtendable; // Which operand may be extended.
   bits<1> isExtentSigned = 0;
-  let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+  let TSFlags{22} = isExtentSigned; // Signed or unsigned range.
   bits<5> opExtentBits = 0;
-  let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+  let TSFlags{27-23} = opExtentBits; //Number of bits of range before extending.
   bits<2> opExtentAlign = 0;
-  let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+  let TSFlags{29-28} = opExtentAlign; // Alignment exponent before extending.
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 6019c7c5d024..de0d6c4d9e4e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -335,37 +335,37 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
 /// This function checks if the instruction or bundle of instructions
 /// has load from stack slot and returns frameindex and machine memory
 /// operand of that instruction if true.
-bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
-                                           const MachineMemOperand *&MMO,
-                                           int &FrameIndex) const {
+bool HexagonInstrInfo::hasLoadFromStackSlot(
+    const MachineInstr &MI,
+    SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
   if (MI.isBundle()) {
     const MachineBasicBlock *MBB = MI.getParent();
     MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
     for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
-      if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex))
+      if (TargetInstrInfo::hasLoadFromStackSlot(*MII, Accesses))
         return true;
     return false;
   }
 
-  return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex);
+  return TargetInstrInfo::hasLoadFromStackSlot(MI, Accesses);
 }
 
 /// This function checks if the instruction or bundle of instructions
 /// has store to stack slot and returns frameindex and machine memory
 /// operand of that instruction if true.
-bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
-                                           const MachineMemOperand *&MMO,
-                                           int &FrameIndex) const {
+bool HexagonInstrInfo::hasStoreToStackSlot(
+    const MachineInstr &MI,
+    SmallVectorImpl<const MachineMemOperand *> &Accesses) const {
   if (MI.isBundle()) {
     const MachineBasicBlock *MBB = MI.getParent();
     MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
     for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
-      if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex))
+      if (TargetInstrInfo::hasStoreToStackSlot(*MII, Accesses))
         return true;
     return false;
   }
 
-  return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex);
+  return TargetInstrInfo::hasStoreToStackSlot(MI, Accesses);
 }
 
 /// This function can analyze one/two way branching only and should (mostly) be
@@ -1086,19 +1086,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai;
       unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass);
 
-      MachineInstr *MI1New =
-          BuildMI(MBB, MI, DL, get(NewOpc))
-              .add(MI.getOperand(0))
-              .addImm(MI.getOperand(1).getImm())
-              .addReg(SrcSubLo)
-              .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc))
+                                 .add(MI.getOperand(0))
+                                 .addImm(MI.getOperand(1).getImm())
+                                 .addReg(SrcSubLo)
+                                 .cloneMemRefs(MI);
       MI1New->getOperand(0).setIsKill(false);
       BuildMI(MBB, MI, DL, get(NewOpc))
           .add(MI.getOperand(0))
           // The Vectors are indexed in multiples of vector size.
           .addImm(MI.getOperand(1).getImm() + Offset)
           .addReg(SrcSubHi)
-          .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+          .cloneMemRefs(MI);
       MBB.erase(MI);
       return true;
     }
@@ -1111,15 +1110,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 
       MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
                                      HRI.getSubReg(DstReg, Hexagon::vsub_lo))
-              .add(MI.getOperand(1))
-              .addImm(MI.getOperand(2).getImm())
-              .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+                                 .add(MI.getOperand(1))
+                                 .addImm(MI.getOperand(2).getImm())
+                                 .cloneMemRefs(MI);
       MI1New->getOperand(1).setIsKill(false);
       BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
           .add(MI.getOperand(1))
           // The Vectors are indexed in multiples of vector size.
           .addImm(MI.getOperand(2).getImm() + Offset)
-          .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+          .cloneMemRefs(MI);
       MBB.erase(MI);
       return true;
     }
@@ -1294,7 +1293,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
         auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
                      .add(Op0)
                      .addReg(PReg, S)
-                     .add(Op1)
                      .addReg(SrcHi)
                      .addReg(SrcLo);
         if (IsDestLive)
@@ -1342,81 +1340,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MI.setDesc(get(Hexagon::J2_jumprfnew));
       return true;
 
-    case Hexagon::V6_vgathermh_pseudo:
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3));
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
-          .add(MI.getOperand(0))
-          .addImm(0)
-          .addReg(Hexagon::VTMP);
-      MBB.erase(MI);
-      return true;
-
-    case Hexagon::V6_vgathermw_pseudo:
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3));
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
-          .add(MI.getOperand(0))
-          .addImm(0)
-          .addReg(Hexagon::VTMP);
-      MBB.erase(MI);
-      return true;
-
-    case Hexagon::V6_vgathermhw_pseudo:
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3));
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
-          .add(MI.getOperand(0))
-          .addImm(0)
-          .addReg(Hexagon::VTMP);
-      MBB.erase(MI);
-      return true;
-
-    case Hexagon::V6_vgathermhq_pseudo:
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3))
-          .add(MI.getOperand(4));
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
-          .add(MI.getOperand(0))
-          .addImm(0)
-          .addReg(Hexagon::VTMP);
-      MBB.erase(MI);
-      return true;
-
-    case Hexagon::V6_vgathermwq_pseudo:
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3))
-          .add(MI.getOperand(4));
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
-          .add(MI.getOperand(0))
-          .addImm(0)
-          .addReg(Hexagon::VTMP);
-      MBB.erase(MI);
-      return true;
-
-    case Hexagon::V6_vgathermhwq_pseudo:
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
-          .add(MI.getOperand(1))
-          .add(MI.getOperand(2))
-          .add(MI.getOperand(3))
-          .add(MI.getOperand(4));
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
-          .add(MI.getOperand(0))
-          .addImm(0)
-          .addReg(Hexagon::VTMP);
-      MBB.erase(MI);
-      return true;
-
     case Hexagon::PS_loadrub_pci:
       return RealCirc(Hexagon::L2_loadrub_pci, /*HasImm*/true,  /*MxOp*/4);
     case Hexagon::PS_loadrb_pci:
@@ -1466,6 +1389,93 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   return false;
 }
 
+MachineBasicBlock::instr_iterator
+HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
+  MachineBasicBlock &MBB = *MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+  MachineBasicBlock::iterator First;
+
+  switch (Opc) {
+    case Hexagon::V6_vgathermh_pseudo:
+      First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+                  .add(MI.getOperand(1))
+                  .add(MI.getOperand(2))
+                  .add(MI.getOperand(3));
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+          .add(MI.getOperand(0))
+          .addImm(0)
+          .addReg(Hexagon::VTMP);
+      MBB.erase(MI);
+      return First.getInstrIterator();
+
+    case Hexagon::V6_vgathermw_pseudo:
+      First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
+                  .add(MI.getOperand(1))
+                  .add(MI.getOperand(2))
+                  .add(MI.getOperand(3));
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+          .add(MI.getOperand(0))
+          .addImm(0)
+          .addReg(Hexagon::VTMP);
+      MBB.erase(MI);
+      return First.getInstrIterator();
+
+    case Hexagon::V6_vgathermhw_pseudo:
+      First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
+                  .add(MI.getOperand(1))
+                  .add(MI.getOperand(2))
+                  .add(MI.getOperand(3));
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+          .add(MI.getOperand(0))
+          .addImm(0)
+          .addReg(Hexagon::VTMP);
+      MBB.erase(MI);
+      return First.getInstrIterator();
+
+    case Hexagon::V6_vgathermhq_pseudo:
+      First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
+                  .add(MI.getOperand(1))
+                  .add(MI.getOperand(2))
+                  .add(MI.getOperand(3))
+                  .add(MI.getOperand(4));
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+          .add(MI.getOperand(0))
+          .addImm(0)
+          .addReg(Hexagon::VTMP);
+      MBB.erase(MI);
+      return First.getInstrIterator();
+
+    case Hexagon::V6_vgathermwq_pseudo:
+      First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
+                  .add(MI.getOperand(1))
+                  .add(MI.getOperand(2))
+                  .add(MI.getOperand(3))
+                  .add(MI.getOperand(4));
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+          .add(MI.getOperand(0))
+          .addImm(0)
+          .addReg(Hexagon::VTMP);
+      MBB.erase(MI);
+      return First.getInstrIterator();
+
+    case Hexagon::V6_vgathermhwq_pseudo:
+      First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
+                  .add(MI.getOperand(1))
+                  .add(MI.getOperand(2))
+                  .add(MI.getOperand(3))
+                  .add(MI.getOperand(4));
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+          .add(MI.getOperand(0))
+          .addImm(0)
+          .addReg(Hexagon::VTMP);
+      MBB.erase(MI);
+      return First.getInstrIterator();
+  }
+
+  return MI.getIterator();
+}
+
 // We indicate that we want to reverse the branch by
 // inserting the reversed branching opcode.
 bool HexagonInstrInfo::reverseBranchCondition(
@@ -2883,14 +2893,15 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
 }
 
 /// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
-      unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
-      const {
+bool HexagonInstrInfo::getMemOperandWithOffset(
+    MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+    const TargetRegisterInfo *TRI) const {
   unsigned AccessSize = 0;
-  int OffsetVal = 0;
-  BaseReg = getBaseAndOffset(LdSt, OffsetVal, AccessSize);
-  Offset = OffsetVal;
-  return BaseReg != 0;
+  BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
+  assert((!BaseOp || BaseOp->isReg()) &&
+         "getMemOperandWithOffset only supports base "
+         "operands of type register.");
+  return BaseOp != nullptr;
 }
 
 /// Can these instructions execute at the same time in a bundle.
@@ -3097,21 +3108,22 @@ unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
 
 // Returns the base register in a memory access (load/store). The offset is
 // returned in Offset and the access size is returned in AccessSize.
-// If the base register has a subregister or the offset field does not contain
-// an immediate value, return 0.
-unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
-      int &Offset, unsigned &AccessSize) const {
+// If the base operand has a subregister or the offset field does not contain
+// an immediate value, return nullptr.
+MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
+                                                   int64_t &Offset,
+                                                   unsigned &AccessSize) const {
   // Return if it is not a base+offset type instruction or a MemOp.
   if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
       getAddrMode(MI) != HexagonII::BaseLongOffset &&
       !isMemOp(MI) && !isPostIncrement(MI))
-    return 0;
+    return nullptr;
 
   AccessSize = getMemAccessSize(MI);
 
   unsigned BasePos = 0, OffsetPos = 0;
   if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos))
-    return 0;
+    return nullptr;
 
   // Post increment updates its EA after the mem access,
   // so we need to treat its offset as zero.
@@ -3120,14 +3132,14 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
   } else {
     const MachineOperand &OffsetOp = MI.getOperand(OffsetPos);
     if (!OffsetOp.isImm())
-      return 0;
+      return nullptr;
     Offset = OffsetOp.getImm();
   }
 
   const MachineOperand &BaseOp = MI.getOperand(BasePos);
   if (BaseOp.getSubReg() != 0)
-    return 0;
-  return BaseOp.getReg();
+    return nullptr;
+  return &const_cast<MachineOperand&>(BaseOp);
 }
 
 /// Return the position of the base and offset operands for this instruction.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 96b4ffaba02f..9b840762e88a 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -69,16 +69,16 @@ public:
   /// Check if the instruction or the bundle of instructions has
   /// load from stack slots. Return the frameindex and machine memory operand
   /// if true.
-  bool hasLoadFromStackSlot(const MachineInstr &MI,
-                           const MachineMemOperand *&MMO,
-                           int &FrameIndex) const override;
+  bool hasLoadFromStackSlot(
+      const MachineInstr &MI,
+      SmallVectorImpl<const MachineMemOperand *> &Accesses) const override;
 
   /// Check if the instruction or the bundle of instructions has
   /// store to stack slots. Return the frameindex and machine memory operand
   /// if true.
-  bool hasStoreToStackSlot(const MachineInstr &MI,
-                           const MachineMemOperand *&MMO,
-                           int &FrameIndex) const override;
+  bool hasStoreToStackSlot(
+      const MachineInstr &MI,
+      SmallVectorImpl<const MachineMemOperand *> &Accesses) const override;
 
   /// Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -216,9 +216,9 @@ public:
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
   /// Get the base register and byte offset of a load/store instr.
-  bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
-                             int64_t &Offset,
-                             const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+                               int64_t &Offset,
+                               const TargetRegisterInfo *TRI) const override;
 
   /// Reverses the branch condition of the specified condition list,
   /// returning false on success and true if it cannot be reversed.
@@ -436,8 +436,8 @@ public:
   bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
 
   unsigned getAddrMode(const MachineInstr &MI) const;
-  unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
-                            unsigned &AccessSize) const;
+  MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset,
+                                   unsigned &AccessSize) const;
   SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
   unsigned getCExtOpNum(const MachineInstr &MI) const;
   HexagonII::CompoundGroup
@@ -472,6 +472,8 @@ public:
   uint64_t getType(const MachineInstr &MI) const;
   unsigned getUnits(const MachineInstr &MI) const;
 
+  MachineBasicBlock::instr_iterator expandVGatherPseudo(MachineInstr &MI) const;
+
   /// getInstrTimingClassLatency - Compute the instruction latency of a given
   /// instruction using Timing Class information, if available.
   unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index b25e316709c5..9cab5748bef2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -6,726 +6,78 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V2 Architecture
-// Application-Level Specification
-// 80-V9418-8 Rev. B
-// March 4, 2008
-//===----------------------------------------------------------------------===//
 
-class T_I_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID imm:$Is),
-         (MI imm:$Is)>;
+// These intrinsic patterns are not auto-generated.
 
 class T_R_pat <InstHexagon MI, Intrinsic IntID>
   : Pat <(IntID I32:$Rs),
          (MI I32:$Rs)>;
 
-class T_P_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs),
-         (MI I64:$Rs)>;
-
-class T_II_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
-  : Pat<(IntID Imm1:$Is, Imm2:$It),
-        (MI Imm1:$Is, Imm2:$It)>;
-
-class T_RI_pat <InstHexagon MI, Intrinsic IntID,
-                PatLeaf ImmPred = PatLeaf<(i32 imm)>>
-  : Pat<(IntID I32:$Rs, ImmPred:$It),
-        (MI I32:$Rs, ImmPred:$It)>;
-
-class T_IR_pat <InstHexagon MI, Intrinsic IntID,
-                PatFrag ImmPred = PatLeaf<(i32 imm)>>
-  : Pat<(IntID ImmPred:$Is, I32:$Rt),
-        (MI ImmPred:$Is, I32:$Rt)>;
-
-class T_PI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID I64:$Rs, imm:$It),
-        (MI I64:$Rs, imm:$It)>;
-
-class T_RP_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID I32:$Rs, I64:$Rt),
-        (MI I32:$Rs, I64:$Rt)>;
-
 class T_RR_pat <InstHexagon MI, Intrinsic IntID>
   : Pat <(IntID I32:$Rs, I32:$Rt),
          (MI I32:$Rs, I32:$Rt)>;
 
-class T_PP_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I64:$Rt),
-         (MI I64:$Rs, I64:$Rt)>;
-
-class T_QQ_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rs, I32:$Rt),
-         (MI (C2_tfrrp I32:$Rs), (C2_tfrrp I32:$Rt))>;
-
-class T_QII_pat <InstHexagon MI, Intrinsic IntID, PatFrag Imm1, PatFrag Imm2>
-  : Pat <(IntID I32:$Rp, Imm1:$Is, Imm2:$It),
-         (MI (C2_tfrrp I32:$Rp), Imm1:$Is, Imm2:$It)>;
-
-class T_QRR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rp, I32:$Rs, I32:$Rt),
-         (MI (C2_tfrrp I32:$Rp), I32:$Rs, I32:$Rt)>;
-
-class T_QRI_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
-  : Pat <(IntID I32:$Rp, I32:$Rs, ImmPred:$Is),
-         (MI (C2_tfrrp I32:$Rp), I32:$Rs, ImmPred:$Is)>;
-
-class T_QIR_pat <InstHexagon MI, Intrinsic IntID, PatFrag ImmPred>
-  : Pat <(IntID I32:$Rp, ImmPred:$Is, I32:$Rs),
-         (MI (C2_tfrrp I32:$Rp), ImmPred:$Is, I32:$Rs)>;
-
-class T_QPP_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rp, I64:$Rs, I64:$Rt),
-         (MI (C2_tfrrp I32:$Rp), I64:$Rs, I64:$Rt)>;
-
-class T_RRI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu),
-         (MI I32:$Rs, I32:$Rt, imm:$Iu)>;
-
-class T_RII_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rs, imm:$It, imm:$Iu),
-         (MI I32:$Rs, imm:$It, imm:$Iu)>;
-
-class T_IRI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID imm:$It, I32:$Rs, imm:$Iu),
-         (MI imm:$It, I32:$Rs, imm:$Iu)>;
-
-class T_IRR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID imm:$Is, I32:$Rs, I32:$Rt),
-         (MI imm:$Is, I32:$Rs, I32:$Rt)>;
-
-class T_RIR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rs, imm:$Is, I32:$Rt),
-         (MI I32:$Rs, imm:$Is, I32:$Rt)>;
-
-class T_RRR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rs, I32:$Rt, I32:$Ru),
-         (MI I32:$Rs, I32:$Rt, I32:$Ru)>;
-
-class T_PPI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu),
-         (MI I64:$Rs, I64:$Rt, imm:$Iu)>;
-
-class T_PII_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu),
-         (MI I64:$Rs, imm:$It, imm:$Iu)>;
-
-class T_PPP_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru),
-         (MI I64:$Rs, I64:$Rt, I64:$Ru)>;
-
-class T_PPR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru),
-         (MI I64:$Rs, I64:$Rt, I32:$Ru)>;
-
-class T_PRR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru),
-         (MI I64:$Rs, I32:$Rt, I32:$Ru)>;
-
-class T_PPQ_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Rp),
-         (MI I64:$Rs, I64:$Rt, (C2_tfrrp I32:$Rp))>;
-
-class T_PR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I32:$Rt),
-         (MI I64:$Rs, I32:$Rt)>;
-
-class T_D_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID (F64:$Rs)),
-        (MI (F64:$Rs))>;
-
-class T_DI_pat <InstHexagon MI, Intrinsic IntID,
-                PatLeaf ImmPred = PatLeaf<(i32 imm)>>
-  : Pat<(IntID F64:$Rs, ImmPred:$It),
-        (MI F64:$Rs, ImmPred:$It)>;
-
-class T_F_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F32:$Rs),
-        (MI F32:$Rs)>;
-
-class T_FI_pat <InstHexagon MI, Intrinsic IntID,
-                PatLeaf ImmPred = PatLeaf<(i32 imm)>>
-  : Pat<(IntID F32:$Rs, ImmPred:$It),
-        (MI F32:$Rs, ImmPred:$It)>;
-
-class T_FF_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F32:$Rs, F32:$Rt),
-        (MI F32:$Rs, F32:$Rt)>;
-
-class T_DD_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F64:$Rs, F64:$Rt),
-        (MI F64:$Rs, F64:$Rt)>;
-
-class T_FFF_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F32:$Rs, F32:$Rt, F32:$Ru),
-        (MI F32:$Rs, F32:$Rt, F32:$Ru)>;
-
-class T_FFFQ_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, I32:$Rp),
-         (MI F32:$Rs, F32:$Rt, F32:$Ru, (C2_tfrrp I32:$Rp))>;
-
-class T_Q_RI_pat <InstHexagon MI, Intrinsic IntID,
-                  PatLeaf ImmPred = PatLeaf<(i32 imm)>>
-  : Pat<(IntID I32:$Rs, ImmPred:$It),
-        (C2_tfrpr (MI I32:$Rs, ImmPred:$It))>;
-
-class T_Q_RR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rs, I32:$Rt),
-         (C2_tfrpr (MI I32:$Rs, I32:$Rt))>;
-
-class T_Q_RP_pat <InstHexagon MI, Intrinsic IntID>
+class T_RP_pat <InstHexagon MI, Intrinsic IntID>
   : Pat <(IntID I32:$Rs, I64:$Rt),
-         (C2_tfrpr (MI I32:$Rs, I64:$Rt))>;
-
-class T_Q_PR_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I32:$Rt),
-         (C2_tfrpr (MI I64:$Rs, I32:$Rt))>;
-
-class T_Q_PI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID I64:$Rs, imm:$It),
-        (C2_tfrpr (MI I64:$Rs, imm:$It))>;
-
-class T_Q_PP_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I64:$Rs, I64:$Rt),
-         (C2_tfrpr (MI I64:$Rs, I64:$Rt))>;
-
-class T_Q_Q_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rp),
-         (C2_tfrpr (MI (C2_tfrrp I32:$Rp)))>;
-
-class T_Q_QQ_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rp, I32:$Rq),
-         (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq)))>;
-
-class T_Q_FF_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F32:$Rs, F32:$Rt),
-        (C2_tfrpr (MI F32:$Rs, F32:$Rt))>;
-
-class T_Q_DD_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F64:$Rs, F64:$Rt),
-        (C2_tfrpr (MI F64:$Rs, F64:$Rt))>;
-
-class T_Q_FI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F32:$Rs, imm:$It),
-        (C2_tfrpr (MI F32:$Rs, imm:$It))>;
-
-class T_Q_DI_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat<(IntID F64:$Rs, imm:$It),
-        (C2_tfrpr (MI F64:$Rs, imm:$It))>;
-
-class T_Q_QQQ_pat <InstHexagon MI, Intrinsic IntID>
-  : Pat <(IntID I32:$Rp, I32:$Rq, I32:$Rs),
-         (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq),
-                       (C2_tfrrp I32:$Rs)))>;
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords
-//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_RR_pat <M2_mpy_ll_s1, int_hexagon_M2_mpy_ll_s1>;
-def : T_RR_pat <M2_mpy_ll_s0, int_hexagon_M2_mpy_ll_s0>;
-def : T_RR_pat <M2_mpy_lh_s1, int_hexagon_M2_mpy_lh_s1>;
-def : T_RR_pat <M2_mpy_lh_s0, int_hexagon_M2_mpy_lh_s0>;
-def : T_RR_pat <M2_mpy_hl_s1, int_hexagon_M2_mpy_hl_s1>;
-def : T_RR_pat <M2_mpy_hl_s0, int_hexagon_M2_mpy_hl_s0>;
-def : T_RR_pat <M2_mpy_hh_s1, int_hexagon_M2_mpy_hh_s1>;
-def : T_RR_pat <M2_mpy_hh_s0, int_hexagon_M2_mpy_hh_s0>;
-
-def : T_RR_pat <M2_mpyu_ll_s1, int_hexagon_M2_mpyu_ll_s1>;
-def : T_RR_pat <M2_mpyu_ll_s0, int_hexagon_M2_mpyu_ll_s0>;
-def : T_RR_pat <M2_mpyu_lh_s1, int_hexagon_M2_mpyu_lh_s1>;
-def : T_RR_pat <M2_mpyu_lh_s0, int_hexagon_M2_mpyu_lh_s0>;
-def : T_RR_pat <M2_mpyu_hl_s1, int_hexagon_M2_mpyu_hl_s1>;
-def : T_RR_pat <M2_mpyu_hl_s0, int_hexagon_M2_mpyu_hl_s0>;
-def : T_RR_pat <M2_mpyu_hh_s1, int_hexagon_M2_mpyu_hh_s1>;
-def : T_RR_pat <M2_mpyu_hh_s0, int_hexagon_M2_mpyu_hh_s0>;
-
-def : T_RR_pat <M2_mpy_sat_ll_s1, int_hexagon_M2_mpy_sat_ll_s1>;
-def : T_RR_pat <M2_mpy_sat_ll_s0, int_hexagon_M2_mpy_sat_ll_s0>;
-def : T_RR_pat <M2_mpy_sat_lh_s1, int_hexagon_M2_mpy_sat_lh_s1>;
-def : T_RR_pat <M2_mpy_sat_lh_s0, int_hexagon_M2_mpy_sat_lh_s0>;
-def : T_RR_pat <M2_mpy_sat_hl_s1, int_hexagon_M2_mpy_sat_hl_s1>;
-def : T_RR_pat <M2_mpy_sat_hl_s0, int_hexagon_M2_mpy_sat_hl_s0>;
-def : T_RR_pat <M2_mpy_sat_hh_s1, int_hexagon_M2_mpy_sat_hh_s1>;
-def : T_RR_pat <M2_mpy_sat_hh_s0, int_hexagon_M2_mpy_sat_hh_s0>;
-
-def : T_RR_pat <M2_mpy_rnd_ll_s1, int_hexagon_M2_mpy_rnd_ll_s1>;
-def : T_RR_pat <M2_mpy_rnd_ll_s0, int_hexagon_M2_mpy_rnd_ll_s0>;
-def : T_RR_pat <M2_mpy_rnd_lh_s1, int_hexagon_M2_mpy_rnd_lh_s1>;
-def : T_RR_pat <M2_mpy_rnd_lh_s0, int_hexagon_M2_mpy_rnd_lh_s0>;
-def : T_RR_pat <M2_mpy_rnd_hl_s1, int_hexagon_M2_mpy_rnd_hl_s1>;
-def : T_RR_pat <M2_mpy_rnd_hl_s0, int_hexagon_M2_mpy_rnd_hl_s0>;
-def : T_RR_pat <M2_mpy_rnd_hh_s1, int_hexagon_M2_mpy_rnd_hh_s1>;
-def : T_RR_pat <M2_mpy_rnd_hh_s0, int_hexagon_M2_mpy_rnd_hh_s0>;
-
-def : T_RR_pat <M2_mpy_sat_rnd_ll_s1, int_hexagon_M2_mpy_sat_rnd_ll_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_ll_s0, int_hexagon_M2_mpy_sat_rnd_ll_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_lh_s1, int_hexagon_M2_mpy_sat_rnd_lh_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_lh_s0, int_hexagon_M2_mpy_sat_rnd_lh_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_hl_s1, int_hexagon_M2_mpy_sat_rnd_hl_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_hl_s0, int_hexagon_M2_mpy_sat_rnd_hl_s0>;
-def : T_RR_pat <M2_mpy_sat_rnd_hh_s1, int_hexagon_M2_mpy_sat_rnd_hh_s1>;
-def : T_RR_pat <M2_mpy_sat_rnd_hh_s0, int_hexagon_M2_mpy_sat_rnd_hh_s0>;
-
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the accumulator.
-//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_RRR_pat <M2_mpy_acc_ll_s1, int_hexagon_M2_mpy_acc_ll_s1>;
-def : T_RRR_pat <M2_mpy_acc_ll_s0, int_hexagon_M2_mpy_acc_ll_s0>;
-def : T_RRR_pat <M2_mpy_acc_lh_s1, int_hexagon_M2_mpy_acc_lh_s1>;
-def : T_RRR_pat <M2_mpy_acc_lh_s0, int_hexagon_M2_mpy_acc_lh_s0>;
-def : T_RRR_pat <M2_mpy_acc_hl_s1, int_hexagon_M2_mpy_acc_hl_s1>;
-def : T_RRR_pat <M2_mpy_acc_hl_s0, int_hexagon_M2_mpy_acc_hl_s0>;
-def : T_RRR_pat <M2_mpy_acc_hh_s1, int_hexagon_M2_mpy_acc_hh_s1>;
-def : T_RRR_pat <M2_mpy_acc_hh_s0, int_hexagon_M2_mpy_acc_hh_s0>;
-
-def : T_RRR_pat <M2_mpyu_acc_ll_s1, int_hexagon_M2_mpyu_acc_ll_s1>;
-def : T_RRR_pat <M2_mpyu_acc_ll_s0, int_hexagon_M2_mpyu_acc_ll_s0>;
-def : T_RRR_pat <M2_mpyu_acc_lh_s1, int_hexagon_M2_mpyu_acc_lh_s1>;
-def : T_RRR_pat <M2_mpyu_acc_lh_s0, int_hexagon_M2_mpyu_acc_lh_s0>;
-def : T_RRR_pat <M2_mpyu_acc_hl_s1, int_hexagon_M2_mpyu_acc_hl_s1>;
-def : T_RRR_pat <M2_mpyu_acc_hl_s0, int_hexagon_M2_mpyu_acc_hl_s0>;
-def : T_RRR_pat <M2_mpyu_acc_hh_s1, int_hexagon_M2_mpyu_acc_hh_s1>;
-def : T_RRR_pat <M2_mpyu_acc_hh_s0, int_hexagon_M2_mpyu_acc_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_nac_ll_s1, int_hexagon_M2_mpy_nac_ll_s1>;
-def : T_RRR_pat <M2_mpy_nac_ll_s0, int_hexagon_M2_mpy_nac_ll_s0>;
-def : T_RRR_pat <M2_mpy_nac_lh_s1, int_hexagon_M2_mpy_nac_lh_s1>;
-def : T_RRR_pat <M2_mpy_nac_lh_s0, int_hexagon_M2_mpy_nac_lh_s0>;
-def : T_RRR_pat <M2_mpy_nac_hl_s1, int_hexagon_M2_mpy_nac_hl_s1>;
-def : T_RRR_pat <M2_mpy_nac_hl_s0, int_hexagon_M2_mpy_nac_hl_s0>;
-def : T_RRR_pat <M2_mpy_nac_hh_s1, int_hexagon_M2_mpy_nac_hh_s1>;
-def : T_RRR_pat <M2_mpy_nac_hh_s0, int_hexagon_M2_mpy_nac_hh_s0>;
-
-def : T_RRR_pat <M2_mpyu_nac_ll_s1, int_hexagon_M2_mpyu_nac_ll_s1>;
-def : T_RRR_pat <M2_mpyu_nac_ll_s0, int_hexagon_M2_mpyu_nac_ll_s0>;
-def : T_RRR_pat <M2_mpyu_nac_lh_s1, int_hexagon_M2_mpyu_nac_lh_s1>;
-def : T_RRR_pat <M2_mpyu_nac_lh_s0, int_hexagon_M2_mpyu_nac_lh_s0>;
-def : T_RRR_pat <M2_mpyu_nac_hl_s1, int_hexagon_M2_mpyu_nac_hl_s1>;
-def : T_RRR_pat <M2_mpyu_nac_hl_s0, int_hexagon_M2_mpyu_nac_hl_s0>;
-def : T_RRR_pat <M2_mpyu_nac_hh_s1, int_hexagon_M2_mpyu_nac_hh_s1>;
-def : T_RRR_pat <M2_mpyu_nac_hh_s0, int_hexagon_M2_mpyu_nac_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_acc_sat_ll_s1, int_hexagon_M2_mpy_acc_sat_ll_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_ll_s0, int_hexagon_M2_mpy_acc_sat_ll_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_lh_s1, int_hexagon_M2_mpy_acc_sat_lh_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_lh_s0, int_hexagon_M2_mpy_acc_sat_lh_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_hl_s1, int_hexagon_M2_mpy_acc_sat_hl_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_hl_s0, int_hexagon_M2_mpy_acc_sat_hl_s0>;
-def : T_RRR_pat <M2_mpy_acc_sat_hh_s1, int_hexagon_M2_mpy_acc_sat_hh_s1>;
-def : T_RRR_pat <M2_mpy_acc_sat_hh_s0, int_hexagon_M2_mpy_acc_sat_hh_s0>;
-
-def : T_RRR_pat <M2_mpy_nac_sat_ll_s1, int_hexagon_M2_mpy_nac_sat_ll_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_ll_s0, int_hexagon_M2_mpy_nac_sat_ll_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_lh_s1, int_hexagon_M2_mpy_nac_sat_lh_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_lh_s0, int_hexagon_M2_mpy_nac_sat_lh_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_hl_s1, int_hexagon_M2_mpy_nac_sat_hl_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_hl_s0, int_hexagon_M2_mpy_nac_sat_hl_s0>;
-def : T_RRR_pat <M2_mpy_nac_sat_hh_s1, int_hexagon_M2_mpy_nac_sat_hh_s1>;
-def : T_RRR_pat <M2_mpy_nac_sat_hh_s0, int_hexagon_M2_mpy_nac_sat_hh_s0>;
-
-
-//===----------------------------------------------------------------------===//
-// Multiply signed/unsigned halfwords with and without saturation and rounding
-// into a 64-bits destination register.
-//===----------------------------------------------------------------------===//
-
-def : T_RR_pat <M2_mpyd_hh_s0, int_hexagon_M2_mpyd_hh_s0>;
-def : T_RR_pat <M2_mpyd_hl_s0, int_hexagon_M2_mpyd_hl_s0>;
-def : T_RR_pat <M2_mpyd_lh_s0, int_hexagon_M2_mpyd_lh_s0>;
-def : T_RR_pat <M2_mpyd_ll_s0, int_hexagon_M2_mpyd_ll_s0>;
-def : T_RR_pat <M2_mpyd_hh_s1, int_hexagon_M2_mpyd_hh_s1>;
-def : T_RR_pat <M2_mpyd_hl_s1, int_hexagon_M2_mpyd_hl_s1>;
-def : T_RR_pat <M2_mpyd_lh_s1, int_hexagon_M2_mpyd_lh_s1>;
-def : T_RR_pat <M2_mpyd_ll_s1, int_hexagon_M2_mpyd_ll_s1>;
-
-def : T_RR_pat <M2_mpyd_rnd_hh_s0, int_hexagon_M2_mpyd_rnd_hh_s0>;
-def : T_RR_pat <M2_mpyd_rnd_hl_s0, int_hexagon_M2_mpyd_rnd_hl_s0>;
-def : T_RR_pat <M2_mpyd_rnd_lh_s0, int_hexagon_M2_mpyd_rnd_lh_s0>;
-def : T_RR_pat <M2_mpyd_rnd_ll_s0, int_hexagon_M2_mpyd_rnd_ll_s0>;
-def : T_RR_pat <M2_mpyd_rnd_hh_s1, int_hexagon_M2_mpyd_rnd_hh_s1>;
-def : T_RR_pat <M2_mpyd_rnd_hl_s1, int_hexagon_M2_mpyd_rnd_hl_s1>;
-def : T_RR_pat <M2_mpyd_rnd_lh_s1, int_hexagon_M2_mpyd_rnd_lh_s1>;
-def : T_RR_pat <M2_mpyd_rnd_ll_s1, int_hexagon_M2_mpyd_rnd_ll_s1>;
-
-def : T_RR_pat <M2_mpyud_hh_s0, int_hexagon_M2_mpyud_hh_s0>;
-def : T_RR_pat <M2_mpyud_hl_s0, int_hexagon_M2_mpyud_hl_s0>;
-def : T_RR_pat <M2_mpyud_lh_s0, int_hexagon_M2_mpyud_lh_s0>;
-def : T_RR_pat <M2_mpyud_ll_s0, int_hexagon_M2_mpyud_ll_s0>;
-def : T_RR_pat <M2_mpyud_hh_s1, int_hexagon_M2_mpyud_hh_s1>;
-def : T_RR_pat <M2_mpyud_hl_s1, int_hexagon_M2_mpyud_hl_s1>;
-def : T_RR_pat <M2_mpyud_lh_s1, int_hexagon_M2_mpyud_lh_s1>;
-def : T_RR_pat <M2_mpyud_ll_s1, int_hexagon_M2_mpyud_ll_s1>;
-
-//===----------------------------------------------------------------------===//
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the 64-bit destination register.
-//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-def : T_PRR_pat <M2_mpyd_acc_hh_s0, int_hexagon_M2_mpyd_acc_hh_s0>;
-def : T_PRR_pat <M2_mpyd_acc_hl_s0, int_hexagon_M2_mpyd_acc_hl_s0>;
-def : T_PRR_pat <M2_mpyd_acc_lh_s0, int_hexagon_M2_mpyd_acc_lh_s0>;
-def : T_PRR_pat <M2_mpyd_acc_ll_s0, int_hexagon_M2_mpyd_acc_ll_s0>;
-
-def : T_PRR_pat <M2_mpyd_acc_hh_s1, int_hexagon_M2_mpyd_acc_hh_s1>;
-def : T_PRR_pat <M2_mpyd_acc_hl_s1, int_hexagon_M2_mpyd_acc_hl_s1>;
-def : T_PRR_pat <M2_mpyd_acc_lh_s1, int_hexagon_M2_mpyd_acc_lh_s1>;
-def : T_PRR_pat <M2_mpyd_acc_ll_s1, int_hexagon_M2_mpyd_acc_ll_s1>;
-
-def : T_PRR_pat <M2_mpyd_nac_hh_s0, int_hexagon_M2_mpyd_nac_hh_s0>;
-def : T_PRR_pat <M2_mpyd_nac_hl_s0, int_hexagon_M2_mpyd_nac_hl_s0>;
-def : T_PRR_pat <M2_mpyd_nac_lh_s0, int_hexagon_M2_mpyd_nac_lh_s0>;
-def : T_PRR_pat <M2_mpyd_nac_ll_s0, int_hexagon_M2_mpyd_nac_ll_s0>;
-
-def : T_PRR_pat <M2_mpyd_nac_hh_s1, int_hexagon_M2_mpyd_nac_hh_s1>;
-def : T_PRR_pat <M2_mpyd_nac_hl_s1, int_hexagon_M2_mpyd_nac_hl_s1>;
-def : T_PRR_pat <M2_mpyd_nac_lh_s1, int_hexagon_M2_mpyd_nac_lh_s1>;
-def : T_PRR_pat <M2_mpyd_nac_ll_s1, int_hexagon_M2_mpyd_nac_ll_s1>;
-
-def : T_PRR_pat <M2_mpyud_acc_hh_s0, int_hexagon_M2_mpyud_acc_hh_s0>;
-def : T_PRR_pat <M2_mpyud_acc_hl_s0, int_hexagon_M2_mpyud_acc_hl_s0>;
-def : T_PRR_pat <M2_mpyud_acc_lh_s0, int_hexagon_M2_mpyud_acc_lh_s0>;
-def : T_PRR_pat <M2_mpyud_acc_ll_s0, int_hexagon_M2_mpyud_acc_ll_s0>;
-
-def : T_PRR_pat <M2_mpyud_acc_hh_s1, int_hexagon_M2_mpyud_acc_hh_s1>;
-def : T_PRR_pat <M2_mpyud_acc_hl_s1, int_hexagon_M2_mpyud_acc_hl_s1>;
-def : T_PRR_pat <M2_mpyud_acc_lh_s1, int_hexagon_M2_mpyud_acc_lh_s1>;
-def : T_PRR_pat <M2_mpyud_acc_ll_s1, int_hexagon_M2_mpyud_acc_ll_s1>;
-
-def : T_PRR_pat <M2_mpyud_nac_hh_s0, int_hexagon_M2_mpyud_nac_hh_s0>;
-def : T_PRR_pat <M2_mpyud_nac_hl_s0, int_hexagon_M2_mpyud_nac_hl_s0>;
-def : T_PRR_pat <M2_mpyud_nac_lh_s0, int_hexagon_M2_mpyud_nac_lh_s0>;
-def : T_PRR_pat <M2_mpyud_nac_ll_s0, int_hexagon_M2_mpyud_nac_ll_s0>;
-
-def : T_PRR_pat <M2_mpyud_nac_hh_s1, int_hexagon_M2_mpyud_nac_hh_s1>;
-def : T_PRR_pat <M2_mpyud_nac_hl_s1, int_hexagon_M2_mpyud_nac_hl_s1>;
-def : T_PRR_pat <M2_mpyud_nac_lh_s1, int_hexagon_M2_mpyud_nac_lh_s1>;
-def : T_PRR_pat <M2_mpyud_nac_ll_s1, int_hexagon_M2_mpyud_nac_ll_s1>;
-
-// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vcmpy_s1_sat_i, int_hexagon_M2_vcmpy_s1_sat_i>;
-def : T_PP_pat <M2_vcmpy_s0_sat_i, int_hexagon_M2_vcmpy_s0_sat_i>;
-
-// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vcmpy_s1_sat_r, int_hexagon_M2_vcmpy_s1_sat_r>;
-def : T_PP_pat <M2_vcmpy_s0_sat_r, int_hexagon_M2_vcmpy_s0_sat_r>;
-
-// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vdmpys_s1, int_hexagon_M2_vdmpys_s1>;
-def : T_PP_pat <M2_vdmpys_s0, int_hexagon_M2_vdmpys_s0>;
-
-// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
-def : T_PP_pat <M2_vmpy2es_s1, int_hexagon_M2_vmpy2es_s1>;
-def : T_PP_pat <M2_vmpy2es_s0, int_hexagon_M2_vmpy2es_s0>;
-
-//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyh_s0,  int_hexagon_M2_mmpyh_s0>;
-def : T_PP_pat <M2_mmpyh_s1,  int_hexagon_M2_mmpyh_s1>;
-def : T_PP_pat <M2_mmpyh_rs0, int_hexagon_M2_mmpyh_rs0>;
-def : T_PP_pat <M2_mmpyh_rs1, int_hexagon_M2_mmpyh_rs1>;
-
-//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyl_s0,  int_hexagon_M2_mmpyl_s0>;
-def : T_PP_pat <M2_mmpyl_s1,  int_hexagon_M2_mmpyl_s1>;
-def : T_PP_pat <M2_mmpyl_rs0, int_hexagon_M2_mmpyl_rs0>;
-def : T_PP_pat <M2_mmpyl_rs1, int_hexagon_M2_mmpyl_rs1>;
-
-//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyuh_s0,  int_hexagon_M2_mmpyuh_s0>;
-def : T_PP_pat <M2_mmpyuh_s1,  int_hexagon_M2_mmpyuh_s1>;
-def : T_PP_pat <M2_mmpyuh_rs0, int_hexagon_M2_mmpyuh_rs0>;
-def : T_PP_pat <M2_mmpyuh_rs1, int_hexagon_M2_mmpyuh_rs1>;
-
-//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PP_pat <M2_mmpyul_s0,  int_hexagon_M2_mmpyul_s0>;
-def : T_PP_pat <M2_mmpyul_s1,  int_hexagon_M2_mmpyul_s1>;
-def : T_PP_pat <M2_mmpyul_rs0, int_hexagon_M2_mmpyul_rs0>;
-def : T_PP_pat <M2_mmpyul_rs1, int_hexagon_M2_mmpyul_rs1>;
-
-// Vector reduce add unsigned bytes: Rdd32[+]=vrmpybu(Rss32,Rtt32)
-def : T_PP_pat  <A2_vraddub,     int_hexagon_A2_vraddub>;
-def : T_PPP_pat <A2_vraddub_acc, int_hexagon_A2_vraddub_acc>;
-
-// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
-def : T_PP_pat  <A2_vrsadub,     int_hexagon_A2_vrsadub>;
-def : T_PPP_pat <A2_vrsadub_acc, int_hexagon_A2_vrsadub_acc>;
-
-// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
-def : T_PP_pat <M2_vabsdiffh, int_hexagon_M2_vabsdiffh>;
-
-// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
-def : T_PP_pat <M2_vabsdiffw, int_hexagon_M2_vabsdiffw>;
-
-// Vector reduce complex multiply real or imaginary:
-// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
-def : T_PP_pat  <M2_vrcmpyi_s0,  int_hexagon_M2_vrcmpyi_s0>;
-def : T_PP_pat  <M2_vrcmpyi_s0c, int_hexagon_M2_vrcmpyi_s0c>;
-def : T_PPP_pat <M2_vrcmaci_s0,  int_hexagon_M2_vrcmaci_s0>;
-def : T_PPP_pat <M2_vrcmaci_s0c, int_hexagon_M2_vrcmaci_s0c>;
-
-def : T_PP_pat  <M2_vrcmpyr_s0,  int_hexagon_M2_vrcmpyr_s0>;
-def : T_PP_pat  <M2_vrcmpyr_s0c, int_hexagon_M2_vrcmpyr_s0c>;
-def : T_PPP_pat <M2_vrcmacr_s0,  int_hexagon_M2_vrcmacr_s0>;
-def : T_PPP_pat <M2_vrcmacr_s0c, int_hexagon_M2_vrcmacr_s0c>;
-
-// Vector reduce halfwords
-// Rdd[+]=vrmpyh(Rss,Rtt)
-def : T_PP_pat  <M2_vrmpy_s0, int_hexagon_M2_vrmpy_s0>;
-def : T_PPP_pat <M2_vrmac_s0, int_hexagon_M2_vrmac_s0>;
-
-//===----------------------------------------------------------------------===//
-// Vector Multipy with accumulation
-//===----------------------------------------------------------------------===//
-
-// Vector multiply word by signed half with accumulation
-// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PPP_pat <M2_mmacls_s1, int_hexagon_M2_mmacls_s1>;
-def : T_PPP_pat <M2_mmacls_s0, int_hexagon_M2_mmacls_s0>;
-def : T_PPP_pat <M2_mmacls_rs1, int_hexagon_M2_mmacls_rs1>;
-def : T_PPP_pat <M2_mmacls_rs0, int_hexagon_M2_mmacls_rs0>;
-def : T_PPP_pat <M2_mmachs_s1, int_hexagon_M2_mmachs_s1>;
-def : T_PPP_pat <M2_mmachs_s0, int_hexagon_M2_mmachs_s0>;
-def : T_PPP_pat <M2_mmachs_rs1, int_hexagon_M2_mmachs_rs1>;
-def : T_PPP_pat <M2_mmachs_rs0, int_hexagon_M2_mmachs_rs0>;
-
-// Vector multiply word by unsigned half with accumulation
-// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
-def : T_PPP_pat <M2_mmaculs_s1, int_hexagon_M2_mmaculs_s1>;
-def : T_PPP_pat <M2_mmaculs_s0, int_hexagon_M2_mmaculs_s0>;
-def : T_PPP_pat <M2_mmaculs_rs1, int_hexagon_M2_mmaculs_rs1>;
-def : T_PPP_pat <M2_mmaculs_rs0, int_hexagon_M2_mmaculs_rs0>;
-def : T_PPP_pat <M2_mmacuhs_s1, int_hexagon_M2_mmacuhs_s1>;
-def : T_PPP_pat <M2_mmacuhs_s0, int_hexagon_M2_mmacuhs_s0>;
-def : T_PPP_pat <M2_mmacuhs_rs1, int_hexagon_M2_mmacuhs_rs1>;
-def : T_PPP_pat <M2_mmacuhs_rs0, int_hexagon_M2_mmacuhs_rs0>;
-
-// Vector multiply even halfwords with accumulation
-// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
-def : T_PPP_pat <M2_vmac2es, int_hexagon_M2_vmac2es>;
-def : T_PPP_pat <M2_vmac2es_s1, int_hexagon_M2_vmac2es_s1>;
-def : T_PPP_pat <M2_vmac2es_s0, int_hexagon_M2_vmac2es_s0>;
-
-// Vector dual multiply with accumulation
-// Rxx+=vdmpy(Rss,Rtt)[:sat]
-def : T_PPP_pat <M2_vdmacs_s1, int_hexagon_M2_vdmacs_s1>;
-def : T_PPP_pat <M2_vdmacs_s0, int_hexagon_M2_vdmacs_s0>;
-
-// Vector complex multiply real or imaginary with accumulation
-// Rxx+=vcmpy[ir](Rss,Rtt):sat
-def : T_PPP_pat <M2_vcmac_s0_sat_r, int_hexagon_M2_vcmac_s0_sat_r>;
-def : T_PPP_pat <M2_vcmac_s0_sat_i, int_hexagon_M2_vcmac_s0_sat_i>;
-
-//===----------------------------------------------------------------------===//
-// Add/Subtract halfword
-// Rd=add(Rt.L,Rs.[HL])[:sat]
-// Rd=sub(Rt.L,Rs.[HL])[:sat]
-// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
-// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
-//===----------------------------------------------------------------------===//
-
-//Rd=add(Rt.L,Rs.[LH])
-def : T_RR_pat <A2_addh_l16_ll,     int_hexagon_A2_addh_l16_ll>;
-def : T_RR_pat <A2_addh_l16_hl,     int_hexagon_A2_addh_l16_hl>;
-
-//Rd=add(Rt.L,Rs.[LH]):sat
-def : T_RR_pat <A2_addh_l16_sat_ll, int_hexagon_A2_addh_l16_sat_ll>;
-def : T_RR_pat <A2_addh_l16_sat_hl, int_hexagon_A2_addh_l16_sat_hl>;
-
-//Rd=sub(Rt.L,Rs.[LH])
-def : T_RR_pat <A2_subh_l16_ll,     int_hexagon_A2_subh_l16_ll>;
-def : T_RR_pat <A2_subh_l16_hl,     int_hexagon_A2_subh_l16_hl>;
-
-//Rd=sub(Rt.L,Rs.[LH]):sat
-def : T_RR_pat <A2_subh_l16_sat_ll, int_hexagon_A2_subh_l16_sat_ll>;
-def : T_RR_pat <A2_subh_l16_sat_hl, int_hexagon_A2_subh_l16_sat_hl>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):<<16
-def : T_RR_pat <A2_addh_h16_ll,     int_hexagon_A2_addh_h16_ll>;
-def : T_RR_pat <A2_addh_h16_lh,     int_hexagon_A2_addh_h16_lh>;
-def : T_RR_pat <A2_addh_h16_hl,     int_hexagon_A2_addh_h16_hl>;
-def : T_RR_pat <A2_addh_h16_hh,     int_hexagon_A2_addh_h16_hh>;
-
-//Rd=sub(Rt.[LH],Rs.[LH]):<<16
-def : T_RR_pat <A2_subh_h16_ll,     int_hexagon_A2_subh_h16_ll>;
-def : T_RR_pat <A2_subh_h16_lh,     int_hexagon_A2_subh_h16_lh>;
-def : T_RR_pat <A2_subh_h16_hl,     int_hexagon_A2_subh_h16_hl>;
-def : T_RR_pat <A2_subh_h16_hh,     int_hexagon_A2_subh_h16_hh>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
-def : T_RR_pat <A2_addh_h16_sat_ll, int_hexagon_A2_addh_h16_sat_ll>;
-def : T_RR_pat <A2_addh_h16_sat_lh, int_hexagon_A2_addh_h16_sat_lh>;
-def : T_RR_pat <A2_addh_h16_sat_hl, int_hexagon_A2_addh_h16_sat_hl>;
-def : T_RR_pat <A2_addh_h16_sat_hh, int_hexagon_A2_addh_h16_sat_hh>;
-
-//Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
-def : T_RR_pat <A2_subh_h16_sat_ll, int_hexagon_A2_subh_h16_sat_ll>;
-def : T_RR_pat <A2_subh_h16_sat_lh, int_hexagon_A2_subh_h16_sat_lh>;
-def : T_RR_pat <A2_subh_h16_sat_hl, int_hexagon_A2_subh_h16_sat_hl>;
-def : T_RR_pat <A2_subh_h16_sat_hh, int_hexagon_A2_subh_h16_sat_hh>;
-
-// ALU64 / ALU / min max
-def : T_RR_pat<A2_max,  int_hexagon_A2_max>;
-def : T_RR_pat<A2_min,  int_hexagon_A2_min>;
-def : T_RR_pat<A2_maxu, int_hexagon_A2_maxu>;
-def : T_RR_pat<A2_minu, int_hexagon_A2_minu>;
-
-// Shift and accumulate
-def : T_RRI_pat <S2_asr_i_r_nac,  int_hexagon_S2_asr_i_r_nac>;
-def : T_RRI_pat <S2_lsr_i_r_nac,  int_hexagon_S2_lsr_i_r_nac>;
-def : T_RRI_pat <S2_asl_i_r_nac,  int_hexagon_S2_asl_i_r_nac>;
-def : T_RRI_pat <S2_asr_i_r_acc,  int_hexagon_S2_asr_i_r_acc>;
-def : T_RRI_pat <S2_lsr_i_r_acc,  int_hexagon_S2_lsr_i_r_acc>;
-def : T_RRI_pat <S2_asl_i_r_acc,  int_hexagon_S2_asl_i_r_acc>;
-
-def : T_RRI_pat <S2_asr_i_r_and,  int_hexagon_S2_asr_i_r_and>;
-def : T_RRI_pat <S2_lsr_i_r_and,  int_hexagon_S2_lsr_i_r_and>;
-def : T_RRI_pat <S2_asl_i_r_and,  int_hexagon_S2_asl_i_r_and>;
-def : T_RRI_pat <S2_asr_i_r_or,   int_hexagon_S2_asr_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_or,   int_hexagon_S2_lsr_i_r_or>;
-def : T_RRI_pat <S2_asl_i_r_or,   int_hexagon_S2_asl_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
-def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
-
-def : T_PPI_pat <S2_asr_i_p_nac,  int_hexagon_S2_asr_i_p_nac>;
-def : T_PPI_pat <S2_lsr_i_p_nac,  int_hexagon_S2_lsr_i_p_nac>;
-def : T_PPI_pat <S2_asl_i_p_nac,  int_hexagon_S2_asl_i_p_nac>;
-def : T_PPI_pat <S2_asr_i_p_acc,  int_hexagon_S2_asr_i_p_acc>;
-def : T_PPI_pat <S2_lsr_i_p_acc,  int_hexagon_S2_lsr_i_p_acc>;
-def : T_PPI_pat <S2_asl_i_p_acc,  int_hexagon_S2_asl_i_p_acc>;
-
-def : T_PPI_pat <S2_asr_i_p_and,  int_hexagon_S2_asr_i_p_and>;
-def : T_PPI_pat <S2_lsr_i_p_and,  int_hexagon_S2_lsr_i_p_and>;
-def : T_PPI_pat <S2_asl_i_p_and,  int_hexagon_S2_asl_i_p_and>;
-def : T_PPI_pat <S2_asr_i_p_or,   int_hexagon_S2_asr_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_or,   int_hexagon_S2_lsr_i_p_or>;
-def : T_PPI_pat <S2_asl_i_p_or,   int_hexagon_S2_asl_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
-def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
-
-def : T_RRR_pat <S2_asr_r_r_nac,  int_hexagon_S2_asr_r_r_nac>;
-def : T_RRR_pat <S2_lsr_r_r_nac,  int_hexagon_S2_lsr_r_r_nac>;
-def : T_RRR_pat <S2_asl_r_r_nac,  int_hexagon_S2_asl_r_r_nac>;
-def : T_RRR_pat <S2_lsl_r_r_nac,  int_hexagon_S2_lsl_r_r_nac>;
-def : T_RRR_pat <S2_asr_r_r_acc,  int_hexagon_S2_asr_r_r_acc>;
-def : T_RRR_pat <S2_lsr_r_r_acc,  int_hexagon_S2_lsr_r_r_acc>;
-def : T_RRR_pat <S2_asl_r_r_acc,  int_hexagon_S2_asl_r_r_acc>;
-def : T_RRR_pat <S2_lsl_r_r_acc,  int_hexagon_S2_lsl_r_r_acc>;
-
-def : T_RRR_pat <S2_asr_r_r_and,  int_hexagon_S2_asr_r_r_and>;
-def : T_RRR_pat <S2_lsr_r_r_and,  int_hexagon_S2_lsr_r_r_and>;
-def : T_RRR_pat <S2_asl_r_r_and,  int_hexagon_S2_asl_r_r_and>;
-def : T_RRR_pat <S2_lsl_r_r_and,  int_hexagon_S2_lsl_r_r_and>;
-def : T_RRR_pat <S2_asr_r_r_or,   int_hexagon_S2_asr_r_r_or>;
-def : T_RRR_pat <S2_lsr_r_r_or,   int_hexagon_S2_lsr_r_r_or>;
-def : T_RRR_pat <S2_asl_r_r_or,   int_hexagon_S2_asl_r_r_or>;
-def : T_RRR_pat <S2_lsl_r_r_or,   int_hexagon_S2_lsl_r_r_or>;
-
-def : T_PPR_pat <S2_asr_r_p_nac,  int_hexagon_S2_asr_r_p_nac>;
-def : T_PPR_pat <S2_lsr_r_p_nac,  int_hexagon_S2_lsr_r_p_nac>;
-def : T_PPR_pat <S2_asl_r_p_nac,  int_hexagon_S2_asl_r_p_nac>;
-def : T_PPR_pat <S2_lsl_r_p_nac,  int_hexagon_S2_lsl_r_p_nac>;
-def : T_PPR_pat <S2_asr_r_p_acc,  int_hexagon_S2_asr_r_p_acc>;
-def : T_PPR_pat <S2_lsr_r_p_acc,  int_hexagon_S2_lsr_r_p_acc>;
-def : T_PPR_pat <S2_asl_r_p_acc,  int_hexagon_S2_asl_r_p_acc>;
-def : T_PPR_pat <S2_lsl_r_p_acc,  int_hexagon_S2_lsl_r_p_acc>;
-
-def : T_PPR_pat <S2_asr_r_p_and,  int_hexagon_S2_asr_r_p_and>;
-def : T_PPR_pat <S2_lsr_r_p_and,  int_hexagon_S2_lsr_r_p_and>;
-def : T_PPR_pat <S2_asl_r_p_and,  int_hexagon_S2_asl_r_p_and>;
-def : T_PPR_pat <S2_lsl_r_p_and,  int_hexagon_S2_lsl_r_p_and>;
-def : T_PPR_pat <S2_asr_r_p_or,   int_hexagon_S2_asr_r_p_or>;
-def : T_PPR_pat <S2_lsr_r_p_or,   int_hexagon_S2_lsr_r_p_or>;
-def : T_PPR_pat <S2_asl_r_p_or,   int_hexagon_S2_asl_r_p_or>;
-def : T_PPR_pat <S2_lsl_r_p_or,   int_hexagon_S2_lsl_r_p_or>;
-
-def : T_RRI_pat <S2_asr_i_r_nac,  int_hexagon_S2_asr_i_r_nac>;
-def : T_RRI_pat <S2_lsr_i_r_nac,  int_hexagon_S2_lsr_i_r_nac>;
-def : T_RRI_pat <S2_asl_i_r_nac,  int_hexagon_S2_asl_i_r_nac>;
-def : T_RRI_pat <S2_asr_i_r_acc,  int_hexagon_S2_asr_i_r_acc>;
-def : T_RRI_pat <S2_lsr_i_r_acc,  int_hexagon_S2_lsr_i_r_acc>;
-def : T_RRI_pat <S2_asl_i_r_acc,  int_hexagon_S2_asl_i_r_acc>;
-
-def : T_RRI_pat <S2_asr_i_r_and,  int_hexagon_S2_asr_i_r_and>;
-def : T_RRI_pat <S2_lsr_i_r_and,  int_hexagon_S2_lsr_i_r_and>;
-def : T_RRI_pat <S2_asl_i_r_and,  int_hexagon_S2_asl_i_r_and>;
-def : T_RRI_pat <S2_asr_i_r_or,   int_hexagon_S2_asr_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_or,   int_hexagon_S2_lsr_i_r_or>;
-def : T_RRI_pat <S2_asl_i_r_or,   int_hexagon_S2_asl_i_r_or>;
-def : T_RRI_pat <S2_lsr_i_r_xacc, int_hexagon_S2_lsr_i_r_xacc>;
-def : T_RRI_pat <S2_asl_i_r_xacc, int_hexagon_S2_asl_i_r_xacc>;
-
-def : T_PPI_pat <S2_asr_i_p_nac,  int_hexagon_S2_asr_i_p_nac>;
-def : T_PPI_pat <S2_lsr_i_p_nac,  int_hexagon_S2_lsr_i_p_nac>;
-def : T_PPI_pat <S2_asl_i_p_nac,  int_hexagon_S2_asl_i_p_nac>;
-def : T_PPI_pat <S2_asr_i_p_acc,  int_hexagon_S2_asr_i_p_acc>;
-def : T_PPI_pat <S2_lsr_i_p_acc,  int_hexagon_S2_lsr_i_p_acc>;
-def : T_PPI_pat <S2_asl_i_p_acc,  int_hexagon_S2_asl_i_p_acc>;
-
-def : T_PPI_pat <S2_asr_i_p_and,  int_hexagon_S2_asr_i_p_and>;
-def : T_PPI_pat <S2_lsr_i_p_and,  int_hexagon_S2_lsr_i_p_and>;
-def : T_PPI_pat <S2_asl_i_p_and,  int_hexagon_S2_asl_i_p_and>;
-def : T_PPI_pat <S2_asr_i_p_or,   int_hexagon_S2_asr_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_or,   int_hexagon_S2_lsr_i_p_or>;
-def : T_PPI_pat <S2_asl_i_p_or,   int_hexagon_S2_asl_i_p_or>;
-def : T_PPI_pat <S2_lsr_i_p_xacc, int_hexagon_S2_lsr_i_p_xacc>;
-def : T_PPI_pat <S2_asl_i_p_xacc, int_hexagon_S2_asl_i_p_xacc>;
-
-def : T_RRR_pat <S2_asr_r_r_nac,  int_hexagon_S2_asr_r_r_nac>;
-def : T_RRR_pat <S2_lsr_r_r_nac,  int_hexagon_S2_lsr_r_r_nac>;
-def : T_RRR_pat <S2_asl_r_r_nac,  int_hexagon_S2_asl_r_r_nac>;
-def : T_RRR_pat <S2_lsl_r_r_nac,  int_hexagon_S2_lsl_r_r_nac>;
-def : T_RRR_pat <S2_asr_r_r_acc,  int_hexagon_S2_asr_r_r_acc>;
-def : T_RRR_pat <S2_lsr_r_r_acc,  int_hexagon_S2_lsr_r_r_acc>;
-def : T_RRR_pat <S2_asl_r_r_acc,  int_hexagon_S2_asl_r_r_acc>;
-def : T_RRR_pat <S2_lsl_r_r_acc,  int_hexagon_S2_lsl_r_r_acc>;
-
-def : T_RRR_pat <S2_asr_r_r_and,  int_hexagon_S2_asr_r_r_and>;
-def : T_RRR_pat <S2_lsr_r_r_and,  int_hexagon_S2_lsr_r_r_and>;
-def : T_RRR_pat <S2_asl_r_r_and,  int_hexagon_S2_asl_r_r_and>;
-def : T_RRR_pat <S2_lsl_r_r_and,  int_hexagon_S2_lsl_r_r_and>;
-def : T_RRR_pat <S2_asr_r_r_or,   int_hexagon_S2_asr_r_r_or>;
-def : T_RRR_pat <S2_lsr_r_r_or,   int_hexagon_S2_lsr_r_r_or>;
-def : T_RRR_pat <S2_asl_r_r_or,   int_hexagon_S2_asl_r_r_or>;
-def : T_RRR_pat <S2_lsl_r_r_or,   int_hexagon_S2_lsl_r_r_or>;
-
-def : T_PPR_pat <S2_asr_r_p_nac,  int_hexagon_S2_asr_r_p_nac>;
-def : T_PPR_pat <S2_lsr_r_p_nac,  int_hexagon_S2_lsr_r_p_nac>;
-def : T_PPR_pat <S2_asl_r_p_nac,  int_hexagon_S2_asl_r_p_nac>;
-def : T_PPR_pat <S2_lsl_r_p_nac,  int_hexagon_S2_lsl_r_p_nac>;
-def : T_PPR_pat <S2_asr_r_p_acc,  int_hexagon_S2_asr_r_p_acc>;
-def : T_PPR_pat <S2_lsr_r_p_acc,  int_hexagon_S2_lsr_r_p_acc>;
-def : T_PPR_pat <S2_asl_r_p_acc,  int_hexagon_S2_asl_r_p_acc>;
-def : T_PPR_pat <S2_lsl_r_p_acc,  int_hexagon_S2_lsl_r_p_acc>;
-
-def : T_PPR_pat <S2_asr_r_p_and,  int_hexagon_S2_asr_r_p_and>;
-def : T_PPR_pat <S2_lsr_r_p_and,  int_hexagon_S2_lsr_r_p_and>;
-def : T_PPR_pat <S2_asl_r_p_and,  int_hexagon_S2_asl_r_p_and>;
-def : T_PPR_pat <S2_lsl_r_p_and,  int_hexagon_S2_lsl_r_p_and>;
-def : T_PPR_pat <S2_asr_r_p_or,   int_hexagon_S2_asr_r_p_or>;
-def : T_PPR_pat <S2_lsr_r_p_or,   int_hexagon_S2_lsr_r_p_or>;
-def : T_PPR_pat <S2_asl_r_p_or,   int_hexagon_S2_asl_r_p_or>;
-def : T_PPR_pat <S2_lsl_r_p_or,   int_hexagon_S2_lsl_r_p_or>;
-
-//*******************************************************************
-//           ALU32/ALU
-//*******************************************************************
-def : T_RR_pat<A2_add,      int_hexagon_A2_add>;
-def : T_RI_pat<A2_addi,     int_hexagon_A2_addi>;
-def : T_RR_pat<A2_sub,      int_hexagon_A2_sub>;
-def : T_IR_pat<A2_subri,    int_hexagon_A2_subri>;
-def : T_RR_pat<A2_and,      int_hexagon_A2_and>;
-def : T_RI_pat<A2_andir,    int_hexagon_A2_andir>;
-def : T_RR_pat<A2_or,       int_hexagon_A2_or>;
-def : T_RI_pat<A2_orir,     int_hexagon_A2_orir>;
-def : T_RR_pat<A2_xor,      int_hexagon_A2_xor>;
-def : T_RR_pat<A2_combinew, int_hexagon_A2_combinew>;
+         (MI I32:$Rs, I64:$Rt)>;
+
+def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
+         (A2_add IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
+         (A2_addi IntRegs:$Rs, imm:$s16)>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
+         (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
+         (A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
+         (A2_subri imm:$s10, IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
+         (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$Rs, IntRegs:$Rt),
+         (M2_mpyi IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$Rs, IntRegs:$Rt), // Same as M2_mpyi
+         (M2_mpyi IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$Rs, imm:$s9),
+         (M2_mpysmi IntRegs:$Rs, imm:$s9)>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
+         (M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
+         (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
+         (S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
+         (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
+         (S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
+         (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
+         (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
+         (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
+
+def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
+         (A2_and IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
+         (A2_andir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
+         (A2_or IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
+         (A2_orir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
+         (A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$Rs),
+         (A2_sxtb IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$Rs),
+         (A2_sxth IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$Rs),
+         (A2_zxtb IntRegs:$Rs)>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$Rs),
+         (A2_zxth IntRegs:$Rs)>;
 
 // Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32)
 def : Pat <(int_hexagon_A2_not I32:$Rs),
@@ -757,16 +109,6 @@ def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
 def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
            (S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
 
-// Transfer immediate
-def  : Pat <(int_hexagon_A2_tfril I32:$Rs, u16_0ImmPred:$Is),
-            (A2_tfril I32:$Rs, u16_0ImmPred:$Is)>;
-def  : Pat <(int_hexagon_A2_tfrih I32:$Rs, u16_0ImmPred:$Is),
-            (A2_tfrih I32:$Rs, u16_0ImmPred:$Is)>;
-
-//  Transfer Register/immediate.
-def : T_R_pat <A2_tfr, int_hexagon_A2_tfr>;
-def : T_I_pat <A2_tfrsi, int_hexagon_A2_tfrsi>;
-
 def ImmExt64: SDNodeXForm<imm, [{
   int64_t V = N->getSExtValue();
   return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i64);
@@ -783,49 +125,6 @@ def ImmExt64: SDNodeXForm<imm, [{
 def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
           (A2_tfrpi (ImmExt64 $Is))>;
 
-// Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32)
-def : Pat<(int_hexagon_A2_tfrp I64:$src),
-          (A2_combinew (HiReg I64:$src), (LoReg I64:$src))>;
-
-//*******************************************************************
-//           ALU32/PERM
-//*******************************************************************
-// Combine
-def: T_RR_pat<A2_combine_hh, int_hexagon_A2_combine_hh>;
-def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
-def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
-def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32_0ImmPred, s8_0ImmPred>;
-
-// Mux
-def : T_QRR_pat<C2_mux,   int_hexagon_C2_mux>;
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32_0ImmPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32_0ImmPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>;
-
-// Shift halfword
-def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
-def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
-
-// Sign/zero extend
-def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
-def : T_R_pat<A2_sxtb, int_hexagon_A2_sxtb>;
-def : T_R_pat<A2_zxth, int_hexagon_A2_zxth>;
-def : T_R_pat<A2_zxtb, int_hexagon_A2_zxtb>;
-
-//*******************************************************************
-//           ALU32/PRED
-//*******************************************************************
-// Compare
-def : T_Q_RR_pat<C2_cmpeq,  int_hexagon_C2_cmpeq>;
-def : T_Q_RR_pat<C2_cmpgt,  int_hexagon_C2_cmpgt>;
-def : T_Q_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-
-def : T_Q_RI_pat<C2_cmpeqi,  int_hexagon_C2_cmpeqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C2_cmpgti,  int_hexagon_C2_cmpgti, s32_0ImmPred>;
-def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32_0ImmPred>;
-
 def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
            (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
 
@@ -839,420 +138,6 @@ def : Pat <(int_hexagon_C2_cmplt I32:$src1, I32:$src2),
 def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
            (C2_tfrpr (C2_cmpgtu I32:$src2, I32:$src1))>;
 
-//*******************************************************************
-//           ALU32/VH
-//*******************************************************************
-// Vector add, subtract, average halfwords
-def: T_RR_pat<A2_svaddh,   int_hexagon_A2_svaddh>;
-def: T_RR_pat<A2_svaddhs,  int_hexagon_A2_svaddhs>;
-def: T_RR_pat<A2_svadduhs, int_hexagon_A2_svadduhs>;
-
-def: T_RR_pat<A2_svsubh,   int_hexagon_A2_svsubh>;
-def: T_RR_pat<A2_svsubhs,  int_hexagon_A2_svsubhs>;
-def: T_RR_pat<A2_svsubuhs, int_hexagon_A2_svsubuhs>;
-
-def: T_RR_pat<A2_svavgh,   int_hexagon_A2_svavgh>;
-def: T_RR_pat<A2_svavghs,  int_hexagon_A2_svavghs>;
-def: T_RR_pat<A2_svnavgh,  int_hexagon_A2_svnavgh>;
-
-//*******************************************************************
-//           ALU64/ALU
-//*******************************************************************
-def: T_RR_pat<A2_addsat,     int_hexagon_A2_addsat>;
-def: T_RR_pat<A2_subsat,     int_hexagon_A2_subsat>;
-def: T_PP_pat<A2_addp,       int_hexagon_A2_addp>;
-def: T_PP_pat<A2_subp,       int_hexagon_A2_subp>;
-
-def: T_PP_pat<A2_andp,       int_hexagon_A2_andp>;
-def: T_PP_pat<A2_orp,        int_hexagon_A2_orp>;
-def: T_PP_pat<A2_xorp,       int_hexagon_A2_xorp>;
-
-def: T_Q_PP_pat<C2_cmpeqp,   int_hexagon_C2_cmpeqp>;
-def: T_Q_PP_pat<C2_cmpgtp,   int_hexagon_C2_cmpgtp>;
-def: T_Q_PP_pat<C2_cmpgtup,  int_hexagon_C2_cmpgtup>;
-
-def: T_PP_pat<S2_parityp,    int_hexagon_S2_parityp>;
-def: T_RR_pat<S2_packhl,     int_hexagon_S2_packhl>;
-
-//*******************************************************************
-//           ALU64/VB
-//*******************************************************************
-// ALU64 - Vector add
-def : T_PP_pat <A2_vaddub,   int_hexagon_A2_vaddub>;
-def : T_PP_pat <A2_vaddubs,  int_hexagon_A2_vaddubs>;
-def : T_PP_pat <A2_vaddh,    int_hexagon_A2_vaddh>;
-def : T_PP_pat <A2_vaddhs,   int_hexagon_A2_vaddhs>;
-def : T_PP_pat <A2_vadduhs,  int_hexagon_A2_vadduhs>;
-def : T_PP_pat <A2_vaddw,    int_hexagon_A2_vaddw>;
-def : T_PP_pat <A2_vaddws,   int_hexagon_A2_vaddws>;
-
-// ALU64 - Vector average
-def : T_PP_pat <A2_vavgub,   int_hexagon_A2_vavgub>;
-def : T_PP_pat <A2_vavgubr,  int_hexagon_A2_vavgubr>;
-def : T_PP_pat <A2_vavgh,    int_hexagon_A2_vavgh>;
-def : T_PP_pat <A2_vavghr,   int_hexagon_A2_vavghr>;
-def : T_PP_pat <A2_vavghcr,  int_hexagon_A2_vavghcr>;
-def : T_PP_pat <A2_vavguh,   int_hexagon_A2_vavguh>;
-def : T_PP_pat <A2_vavguhr,  int_hexagon_A2_vavguhr>;
-
-def : T_PP_pat <A2_vavgw,    int_hexagon_A2_vavgw>;
-def : T_PP_pat <A2_vavgwr,   int_hexagon_A2_vavgwr>;
-def : T_PP_pat <A2_vavgwcr,  int_hexagon_A2_vavgwcr>;
-def : T_PP_pat <A2_vavguw,   int_hexagon_A2_vavguw>;
-def : T_PP_pat <A2_vavguwr,  int_hexagon_A2_vavguwr>;
-
-// ALU64 - Vector negative average
-def : T_PP_pat <A2_vnavgh,   int_hexagon_A2_vnavgh>;
-def : T_PP_pat <A2_vnavghr,  int_hexagon_A2_vnavghr>;
-def : T_PP_pat <A2_vnavghcr, int_hexagon_A2_vnavghcr>;
-def : T_PP_pat <A2_vnavgw,   int_hexagon_A2_vnavgw>;
-def : T_PP_pat <A2_vnavgwr,  int_hexagon_A2_vnavgwr>;
-def : T_PP_pat <A2_vnavgwcr, int_hexagon_A2_vnavgwcr>;
-
-// ALU64 - Vector max
-def : T_PP_pat <A2_vmaxh,    int_hexagon_A2_vmaxh>;
-def : T_PP_pat <A2_vmaxw,    int_hexagon_A2_vmaxw>;
-def : T_PP_pat <A2_vmaxub,   int_hexagon_A2_vmaxub>;
-def : T_PP_pat <A2_vmaxuh,   int_hexagon_A2_vmaxuh>;
-def : T_PP_pat <A2_vmaxuw,   int_hexagon_A2_vmaxuw>;
-
-// ALU64 - Vector min
-def : T_PP_pat <A2_vminh,    int_hexagon_A2_vminh>;
-def : T_PP_pat <A2_vminw,    int_hexagon_A2_vminw>;
-def : T_PP_pat <A2_vminub,   int_hexagon_A2_vminub>;
-def : T_PP_pat <A2_vminuh,   int_hexagon_A2_vminuh>;
-def : T_PP_pat <A2_vminuw,   int_hexagon_A2_vminuw>;
-
-// ALU64 - Vector sub
-def : T_PP_pat <A2_vsubub,   int_hexagon_A2_vsubub>;
-def : T_PP_pat <A2_vsububs,  int_hexagon_A2_vsububs>;
-def : T_PP_pat <A2_vsubh,    int_hexagon_A2_vsubh>;
-def : T_PP_pat <A2_vsubhs,   int_hexagon_A2_vsubhs>;
-def : T_PP_pat <A2_vsubuhs,  int_hexagon_A2_vsubuhs>;
-def : T_PP_pat <A2_vsubw,    int_hexagon_A2_vsubw>;
-def : T_PP_pat <A2_vsubws,   int_hexagon_A2_vsubws>;
-
-// ALU64 - Vector compare bytes
-def : T_Q_PP_pat <A2_vcmpbeq,  int_hexagon_A2_vcmpbeq>;
-def : T_Q_PP_pat <A4_vcmpbgt,  int_hexagon_A4_vcmpbgt>;
-def : T_Q_PP_pat <A2_vcmpbgtu, int_hexagon_A2_vcmpbgtu>;
-
-// ALU64 - Vector compare halfwords
-def : T_Q_PP_pat <A2_vcmpheq,  int_hexagon_A2_vcmpheq>;
-def : T_Q_PP_pat <A2_vcmphgt,  int_hexagon_A2_vcmphgt>;
-def : T_Q_PP_pat <A2_vcmphgtu, int_hexagon_A2_vcmphgtu>;
-
-// ALU64 - Vector compare words
-def : T_Q_PP_pat <A2_vcmpweq,  int_hexagon_A2_vcmpweq>;
-def : T_Q_PP_pat <A2_vcmpwgt,  int_hexagon_A2_vcmpwgt>;
-def : T_Q_PP_pat <A2_vcmpwgtu, int_hexagon_A2_vcmpwgtu>;
-
-// ALU64 / VB / Vector mux.
-def : T_QPP_pat <C2_vmux,      int_hexagon_C2_vmux>;
-
-// MPY - Multiply and use full result
-// Rdd = mpy[u](Rs, Rt)
-def : T_RR_pat <M2_dpmpyss_s0, int_hexagon_M2_dpmpyss_s0>;
-def : T_RR_pat <M2_dpmpyuu_s0, int_hexagon_M2_dpmpyuu_s0>;
-
-// Complex multiply real or imaginary
-def : T_RR_pat <M2_cmpyi_s0,   int_hexagon_M2_cmpyi_s0>;
-def : T_RR_pat <M2_cmpyr_s0,   int_hexagon_M2_cmpyr_s0>;
-
-// Complex multiply
-def : T_RR_pat <M2_cmpys_s0,   int_hexagon_M2_cmpys_s0>;
-def : T_RR_pat <M2_cmpysc_s0,  int_hexagon_M2_cmpysc_s0>;
-def : T_RR_pat <M2_cmpys_s1,   int_hexagon_M2_cmpys_s1>;
-def : T_RR_pat <M2_cmpysc_s1,  int_hexagon_M2_cmpysc_s1>;
-
-// Vector multiply halfwords
-// Rdd=vmpyh(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2s_s0,  int_hexagon_M2_vmpy2s_s0>;
-def : T_RR_pat <M2_vmpy2s_s1,  int_hexagon_M2_vmpy2s_s1>;
-
-// Rxx[+-]= mpy[u](Rs,Rt)
-def : T_PRR_pat <M2_dpmpyss_acc_s0, int_hexagon_M2_dpmpyss_acc_s0>;
-def : T_PRR_pat <M2_dpmpyss_nac_s0, int_hexagon_M2_dpmpyss_nac_s0>;
-def : T_PRR_pat <M2_dpmpyuu_acc_s0, int_hexagon_M2_dpmpyuu_acc_s0>;
-def : T_PRR_pat <M2_dpmpyuu_nac_s0, int_hexagon_M2_dpmpyuu_nac_s0>;
-
-// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_cmacs_s0, int_hexagon_M2_cmacs_s0>;
-def : T_PRR_pat <M2_cnacs_s0, int_hexagon_M2_cnacs_s0>;
-def : T_PRR_pat <M2_cmacs_s1, int_hexagon_M2_cmacs_s1>;
-def : T_PRR_pat <M2_cnacs_s1, int_hexagon_M2_cnacs_s1>;
-
-// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
-def : T_PRR_pat <M2_cmacsc_s0, int_hexagon_M2_cmacsc_s0>;
-def : T_PRR_pat <M2_cnacsc_s0, int_hexagon_M2_cnacsc_s0>;
-def : T_PRR_pat <M2_cmacsc_s1, int_hexagon_M2_cmacsc_s1>;
-def : T_PRR_pat <M2_cnacsc_s1, int_hexagon_M2_cnacsc_s1>;
-
-// Rxx+=cmpy[ir](Rs,Rt)
-def : T_PRR_pat <M2_cmaci_s0, int_hexagon_M2_cmaci_s0>;
-def : T_PRR_pat <M2_cmacr_s0, int_hexagon_M2_cmacr_s0>;
-
-// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
-def : T_PRR_pat <M2_vmac2, int_hexagon_M2_vmac2>;
-def : T_PRR_pat <M2_vmac2s_s0, int_hexagon_M2_vmac2s_s0>;
-def : T_PRR_pat <M2_vmac2s_s1, int_hexagon_M2_vmac2s_s1>;
-
-//*******************************************************************
-//           CR
-//*******************************************************************
-def: T_Q_Q_pat<C2_not,       int_hexagon_C2_not>;
-def: T_Q_Q_pat<C2_all8,      int_hexagon_C2_all8>;
-def: T_Q_Q_pat<C2_any8,      int_hexagon_C2_any8>;
-def: T_Q_Q_pat<C2_pxfer_map, int_hexagon_C2_pxfer_map>;
-
-def: T_Q_QQ_pat<C2_and,      int_hexagon_C2_and>;
-def: T_Q_QQ_pat<C2_andn,     int_hexagon_C2_andn>;
-def: T_Q_QQ_pat<C2_or,       int_hexagon_C2_or>;
-def: T_Q_QQ_pat<C2_orn,      int_hexagon_C2_orn>;
-def: T_Q_QQ_pat<C2_xor,      int_hexagon_C2_xor>;
-
-// Multiply 32x32 and use lower result
-def : T_RRI_pat <M2_macsip, int_hexagon_M2_macsip>;
-def : T_RRI_pat <M2_macsin, int_hexagon_M2_macsin>;
-def : T_RRR_pat <M2_maci,   int_hexagon_M2_maci>;
-
-// Subtract and accumulate
-def : T_RRR_pat <M2_subacc, int_hexagon_M2_subacc>;
-
-// Add and accumulate
-def : T_RRR_pat <M2_acci,   int_hexagon_M2_acci>;
-def : T_RRR_pat <M2_nacci,  int_hexagon_M2_nacci>;
-def : T_RRI_pat <M2_accii,  int_hexagon_M2_accii>;
-def : T_RRI_pat <M2_naccii, int_hexagon_M2_naccii>;
-
-// XOR and XOR with destination
-def : T_RRR_pat <M2_xor_xacc, int_hexagon_M2_xor_xacc>;
-
-// Vector dual multiply with round and pack
-def : T_PP_pat <M2_vdmpyrs_s0, int_hexagon_M2_vdmpyrs_s0>;
-def : T_PP_pat <M2_vdmpyrs_s1, int_hexagon_M2_vdmpyrs_s1>;
-
-// Vector multiply halfwords with round and pack
-def : T_RR_pat <M2_vmpy2s_s0pack, int_hexagon_M2_vmpy2s_s0pack>;
-def : T_RR_pat <M2_vmpy2s_s1pack, int_hexagon_M2_vmpy2s_s1pack>;
-
-// Multiply and use lower result
-def : T_RR_pat <M2_mpyi, int_hexagon_M2_mpyi>;
-def : T_RI_pat <M2_mpysmi, int_hexagon_M2_mpysmi>;
-
-// Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32)
-def : T_RR_pat <M2_mpyi, int_hexagon_M2_mpyui>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpy_up, int_hexagon_M2_mpy_up>;
-def : T_RR_pat <M2_mpyu_up, int_hexagon_M2_mpyu_up>;
-def : T_RR_pat <M2_hmmpyh_rs1, int_hexagon_M2_hmmpyh_rs1>;
-def : T_RR_pat <M2_hmmpyl_rs1, int_hexagon_M2_hmmpyl_rs1>;
-def : T_RR_pat <M2_dpmpyss_rnd_s0, int_hexagon_M2_dpmpyss_rnd_s0>;
-
-// Complex multiply with round and pack
-// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
-def : T_RR_pat <M2_cmpyrs_s0, int_hexagon_M2_cmpyrs_s0>;
-def : T_RR_pat <M2_cmpyrs_s1, int_hexagon_M2_cmpyrs_s1>;
-def : T_RR_pat <M2_cmpyrsc_s0, int_hexagon_M2_cmpyrsc_s0>;
-def : T_RR_pat <M2_cmpyrsc_s1, int_hexagon_M2_cmpyrsc_s1>;
-
-//*******************************************************************
-//           STYPE/ALU
-//*******************************************************************
-def : T_P_pat <A2_absp, int_hexagon_A2_absp>;
-def : T_P_pat <A2_negp, int_hexagon_A2_negp>;
-def : T_P_pat <A2_notp, int_hexagon_A2_notp>;
-
-//*******************************************************************
-//           STYPE/BIT
-//*******************************************************************
-
-// Count leading/trailing
-def: T_R_pat<S2_cl0,     int_hexagon_S2_cl0>;
-def: T_P_pat<S2_cl0p,    int_hexagon_S2_cl0p>;
-def: T_R_pat<S2_cl1,     int_hexagon_S2_cl1>;
-def: T_P_pat<S2_cl1p,    int_hexagon_S2_cl1p>;
-def: T_R_pat<S2_clb,     int_hexagon_S2_clb>;
-def: T_P_pat<S2_clbp,    int_hexagon_S2_clbp>;
-def: T_R_pat<S2_clbnorm, int_hexagon_S2_clbnorm>;
-def: T_R_pat<S2_ct0,     int_hexagon_S2_ct0>;
-def: T_R_pat<S2_ct1,     int_hexagon_S2_ct1>;
-
-// Compare bit mask
-def: T_RR_pat<C2_bitsclr,  int_hexagon_C2_bitsclr>;
-def: T_RI_pat<C2_bitsclri, int_hexagon_C2_bitsclri>;
-def: T_RR_pat<C2_bitsset,  int_hexagon_C2_bitsset>;
-
-// Vector shuffle
-def : T_PP_pat <S2_shuffeb, int_hexagon_S2_shuffeb>;
-def : T_PP_pat <S2_shuffob, int_hexagon_S2_shuffob>;
-def : T_PP_pat <S2_shuffeh, int_hexagon_S2_shuffeh>;
-def : T_PP_pat <S2_shuffoh, int_hexagon_S2_shuffoh>;
-
-// Vector truncate
-def : T_PP_pat <S2_vtrunewh, int_hexagon_S2_vtrunewh>;
-def : T_PP_pat <S2_vtrunowh, int_hexagon_S2_vtrunowh>;
-
-// Linear feedback-shift Iteration.
-def : T_PP_pat <S2_lfsp, int_hexagon_S2_lfsp>;
-
-// Vector align
-// Need custom lowering
-def : T_PPQ_pat <S2_valignrb, int_hexagon_S2_valignrb>;
-def : T_PPI_pat <S2_valignib, int_hexagon_S2_valignib>;
-
-// Vector splice
-def : T_PPQ_pat <S2_vsplicerb, int_hexagon_S2_vsplicerb>;
-def : T_PPI_pat <S2_vspliceib, int_hexagon_S2_vspliceib>;
-
-// Shift by immediate and add
-def : T_RRI_pat<S2_addasl_rrri, int_hexagon_S2_addasl_rrri>;
-
-// Extract bitfield
-def : T_PII_pat<S2_extractup,    int_hexagon_S2_extractup>;
-def : T_RII_pat<S2_extractu,     int_hexagon_S2_extractu>;
-def : T_RP_pat <S2_extractu_rp,  int_hexagon_S2_extractu_rp>;
-def : T_PP_pat <S2_extractup_rp, int_hexagon_S2_extractup_rp>;
-
-// Insert bitfield
-def : Pat <(int_hexagon_S2_insert_rp I32:$src1, I32:$src2, I64:$src3),
-           (S2_insert_rp I32:$src1, I32:$src2, I64:$src3)>;
-
-def : Pat<(i64 (int_hexagon_S2_insertp_rp I64:$src1, I64:$src2, I64:$src3)),
-          (i64 (S2_insertp_rp I64:$src1, I64:$src2, I64:$src3))>;
-
-def : Pat<(int_hexagon_S2_insert I32:$src1, I32:$src2,
-                                 u5_0ImmPred:$src3, u5_0ImmPred:$src4),
-          (S2_insert I32:$src1, I32:$src2,
-                     u5_0ImmPred:$src3, u5_0ImmPred:$src4)>;
-
-def : Pat<(i64 (int_hexagon_S2_insertp I64:$src1, I64:$src2,
-                                       u6_0ImmPred:$src3, u6_0ImmPred:$src4)),
-          (i64 (S2_insertp I64:$src1, I64:$src2,
-                           u6_0ImmPred:$src3, u6_0ImmPred:$src4))>;
-
-// Innterleave/deinterleave
-def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
-def : T_P_pat <S2_deinterleave, int_hexagon_S2_deinterleave>;
-
-// Set/Clear/Toggle Bit
-def: T_RI_pat<S2_setbit_i,    int_hexagon_S2_setbit_i>;
-def: T_RI_pat<S2_clrbit_i,    int_hexagon_S2_clrbit_i>;
-def: T_RI_pat<S2_togglebit_i, int_hexagon_S2_togglebit_i>;
-
-def: T_RR_pat<S2_setbit_r,    int_hexagon_S2_setbit_r>;
-def: T_RR_pat<S2_clrbit_r,    int_hexagon_S2_clrbit_r>;
-def: T_RR_pat<S2_togglebit_r, int_hexagon_S2_togglebit_r>;
-
-// Test Bit
-def: T_Q_RI_pat<S2_tstbit_i,  int_hexagon_S2_tstbit_i>;
-def: T_Q_RR_pat<S2_tstbit_r,  int_hexagon_S2_tstbit_r>;
-
-//*******************************************************************
-//           STYPE/COMPLEX
-//*******************************************************************
-// Vector Complex conjugate
-def : T_P_pat <A2_vconj, int_hexagon_A2_vconj>;
-
-// Vector Complex rotate
-def : T_PR_pat <S2_vcrotate, int_hexagon_S2_vcrotate>;
-
-//*******************************************************************
-//           STYPE/PERM
-//*******************************************************************
-
-// Vector saturate without pack
-def : T_P_pat <S2_vsathb_nopack, int_hexagon_S2_vsathb_nopack>;
-def : T_P_pat <S2_vsathub_nopack, int_hexagon_S2_vsathub_nopack>;
-def : T_P_pat <S2_vsatwh_nopack, int_hexagon_S2_vsatwh_nopack>;
-def : T_P_pat <S2_vsatwuh_nopack, int_hexagon_S2_vsatwuh_nopack>;
-
-//*******************************************************************
-//           STYPE/PRED
-//*******************************************************************
-
-// Predicate transfer
-def: Pat<(i32 (int_hexagon_C2_tfrpr I32:$Rs)),
-         (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>;
-def: Pat<(i32 (int_hexagon_C2_tfrrp I32:$Rs)),
-         (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>;
-
-// Mask generate from predicate
-def: Pat<(i64 (int_hexagon_C2_mask I32:$Rs)),
-         (i64 (C2_mask (C2_tfrrp I32:$Rs)))>;
-
-// Viterbi pack even and odd predicate bits
-def: T_QQ_pat<C2_vitpack, int_hexagon_C2_vitpack>;
-
-//*******************************************************************
-//           STYPE/SHIFT
-//*******************************************************************
-
-def : T_PI_pat <S2_asr_i_p, int_hexagon_S2_asr_i_p>;
-def : T_PI_pat <S2_lsr_i_p, int_hexagon_S2_lsr_i_p>;
-def : T_PI_pat <S2_asl_i_p, int_hexagon_S2_asl_i_p>;
-
-def : T_PR_pat <S2_asr_r_p, int_hexagon_S2_asr_r_p>;
-def : T_PR_pat <S2_lsr_r_p, int_hexagon_S2_lsr_r_p>;
-def : T_PR_pat <S2_asl_r_p, int_hexagon_S2_asl_r_p>;
-def : T_PR_pat <S2_lsl_r_p, int_hexagon_S2_lsl_r_p>;
-
-def : T_RR_pat <S2_asr_r_r, int_hexagon_S2_asr_r_r>;
-def : T_RR_pat <S2_lsr_r_r, int_hexagon_S2_lsr_r_r>;
-def : T_RR_pat <S2_asl_r_r, int_hexagon_S2_asl_r_r>;
-def : T_RR_pat <S2_lsl_r_r, int_hexagon_S2_lsl_r_r>;
-
-def : T_RR_pat <S2_asr_r_r_sat, int_hexagon_S2_asr_r_r_sat>;
-def : T_RR_pat <S2_asl_r_r_sat, int_hexagon_S2_asl_r_r_sat>;
-
-def : T_R_pat <S2_vsxtbh,   int_hexagon_S2_vsxtbh>;
-def : T_R_pat <S2_vzxtbh,   int_hexagon_S2_vzxtbh>;
-def : T_R_pat <S2_vsxthw,   int_hexagon_S2_vsxthw>;
-def : T_R_pat <S2_vzxthw,   int_hexagon_S2_vzxthw>;
-def : T_R_pat <S2_vsplatrh, int_hexagon_S2_vsplatrh>;
-def : T_R_pat <A2_sxtw,     int_hexagon_A2_sxtw>;
-
-// Vector saturate and pack
-def : T_R_pat <S2_svsathb,  int_hexagon_S2_svsathb>;
-def : T_R_pat <S2_svsathub, int_hexagon_S2_svsathub>;
-def : T_P_pat <S2_vsathub,  int_hexagon_S2_vsathub>;
-def : T_P_pat <S2_vsatwh,   int_hexagon_S2_vsatwh>;
-def : T_P_pat <S2_vsatwuh,  int_hexagon_S2_vsatwuh>;
-def : T_P_pat <S2_vsathb,   int_hexagon_S2_vsathb>;
-
-def : T_P_pat <S2_vtrunohb,    int_hexagon_S2_vtrunohb>;
-def : T_P_pat <S2_vtrunehb,    int_hexagon_S2_vtrunehb>;
-def : T_P_pat <S2_vrndpackwh,  int_hexagon_S2_vrndpackwh>;
-def : T_P_pat <S2_vrndpackwhs, int_hexagon_S2_vrndpackwhs>;
-def : T_R_pat <S2_brev,        int_hexagon_S2_brev>;
-def : T_R_pat <S2_vsplatrb,    int_hexagon_S2_vsplatrb>;
-
-def : T_R_pat <A2_abs,    int_hexagon_A2_abs>;
-def : T_R_pat <A2_abssat, int_hexagon_A2_abssat>;
-def : T_R_pat <A2_negsat, int_hexagon_A2_negsat>;
-
-def : T_R_pat <A2_swiz,   int_hexagon_A2_swiz>;
-
-def : T_P_pat <A2_sat,    int_hexagon_A2_sat>;
-def : T_R_pat <A2_sath,   int_hexagon_A2_sath>;
-def : T_R_pat <A2_satuh,  int_hexagon_A2_satuh>;
-def : T_R_pat <A2_satub,  int_hexagon_A2_satub>;
-def : T_R_pat <A2_satb,   int_hexagon_A2_satb>;
-
-// Vector arithmetic shift right by immediate with truncate and pack.
-def : T_PI_pat<S2_asr_i_svw_trun, int_hexagon_S2_asr_i_svw_trun>;
-
-def : T_RI_pat <S2_asr_i_r,     int_hexagon_S2_asr_i_r>;
-def : T_RI_pat <S2_lsr_i_r,     int_hexagon_S2_lsr_i_r>;
-def : T_RI_pat <S2_asl_i_r,     int_hexagon_S2_asl_i_r>;
-def : T_RI_pat <S2_asr_i_r_rnd, int_hexagon_S2_asr_i_r_rnd>;
-def : T_RI_pat <S2_asr_i_r_rnd_goodsyntax,
-                int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
-
-// Shift left by immediate with saturation.
-def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
-
 //===----------------------------------------------------------------------===//
 // Template 'def pat' to map tableidx[bhwd] intrinsics to :raw instructions.
 //===----------------------------------------------------------------------===//
@@ -1277,11 +162,8 @@ def SDEC3 : SDNodeXForm<imm, [{
 // values from the 4th input operand. Please note that subtraction is not
 // needed for int_hexagon_S2_tableidxb_goodsyntax.
 
-def : Pat <(int_hexagon_S2_tableidxb_goodsyntax I32:$src1, I32:$src2,
-                                              u4_0ImmPred:$src3, u5_0ImmPred:$src4),
-           (S2_tableidxb I32:$src1, I32:$src2,
-                         u4_0ImmPred:$src3, u5_0ImmPred:$src4)>;
-
+def : S2op_tableidx_pat <int_hexagon_S2_tableidxb_goodsyntax, S2_tableidxb,
+                         IdImm>;
 def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
                          SDEC1>;
 def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
@@ -1289,52 +171,6 @@ def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
 def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
                          SDEC3>;
 
-//*******************************************************************
-//           STYPE/VH
-//*******************************************************************
-
-// Vector absolute value halfwords with and without saturation
-// Rdd64=vabsh(Rss64)[:sat]
-def : T_P_pat <A2_vabsh, int_hexagon_A2_vabsh>;
-def : T_P_pat <A2_vabshsat, int_hexagon_A2_vabshsat>;
-
-// Vector shift halfwords by immediate
-// Rdd64=[vaslh/vasrh/vlsrh](Rss64,u4)
-def : T_PI_pat <S2_asr_i_vh, int_hexagon_S2_asr_i_vh>;
-def : T_PI_pat <S2_lsr_i_vh, int_hexagon_S2_lsr_i_vh>;
-def : T_PI_pat <S2_asl_i_vh, int_hexagon_S2_asl_i_vh>;
-
-// Vector shift halfwords by register
-// Rdd64=[vaslw/vasrw/vlslw/vlsrw](Rss64,Rt32)
-def : T_PR_pat <S2_asr_r_vh, int_hexagon_S2_asr_r_vh>;
-def : T_PR_pat <S2_lsr_r_vh, int_hexagon_S2_lsr_r_vh>;
-def : T_PR_pat <S2_asl_r_vh, int_hexagon_S2_asl_r_vh>;
-def : T_PR_pat <S2_lsl_r_vh, int_hexagon_S2_lsl_r_vh>;
-
-//*******************************************************************
-//           STYPE/VW
-//*******************************************************************
-
-// Vector absolute value words with and without saturation
-def : T_P_pat <A2_vabsw, int_hexagon_A2_vabsw>;
-def : T_P_pat <A2_vabswsat, int_hexagon_A2_vabswsat>;
-
-// Vector shift words by immediate.
-// Rdd64=[vasrw/vlsrw|vaslw](Rss64,u5)
-def : T_PI_pat <S2_asr_i_vw, int_hexagon_S2_asr_i_vw>;
-def : T_PI_pat <S2_lsr_i_vw, int_hexagon_S2_lsr_i_vw>;
-def : T_PI_pat <S2_asl_i_vw, int_hexagon_S2_asl_i_vw>;
-
-// Vector shift words by register.
-// Rdd64=[vasrw/vlsrw|vaslw|vlslw](Rss64,Rt32)
-def : T_PR_pat <S2_asr_r_vw, int_hexagon_S2_asr_r_vw>;
-def : T_PR_pat <S2_lsr_r_vw, int_hexagon_S2_lsr_r_vw>;
-def : T_PR_pat <S2_asl_r_vw, int_hexagon_S2_asl_r_vw>;
-def : T_PR_pat <S2_lsl_r_vw, int_hexagon_S2_lsl_r_vw>;
-
-// Vector shift words with truncate and pack
-def : T_PR_pat <S2_asr_r_svw_trun, int_hexagon_S2_asr_r_svw_trun>;
-
 // Load/store locked.
 def : T_R_pat<L2_loadw_locked, int_hexagon_L2_loadw_locked>;
 def : T_R_pat<L4_loadd_locked, int_hexagon_L4_loadd_locked>;
@@ -1370,10 +206,13 @@ def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
 
 multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
   def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
-            (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>;
+            (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+        Requires<[UseHVX]>;
+
   def : Pat<(!cast<Intrinsic>(IntID#"_128B") HvxQR:$src1, IntRegs:$src2,
                                              HvxVR:$src3),
-            (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>;
+            (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+        Requires<[UseHVX]>;
 }
 
 defm : MaskedStore <V6_vS32b_qpred_ai, int_hexagon_V6_vmaskedstoreq>;
@@ -1398,7 +237,241 @@ def: T_R_pat<Y2_dczeroa,     int_hexagon_Y2_dczeroa>;
 def: T_RR_pat<Y4_l2fetch,    int_hexagon_Y4_l2fetch>;
 def: T_RP_pat<Y5_l2fetch,    int_hexagon_Y5_l2fetch>;
 
-include "HexagonIntrinsicsV3.td"
-include "HexagonIntrinsicsV4.td"
-include "HexagonIntrinsicsV5.td"
-include "HexagonIntrinsicsV60.td"
+//
+// Patterns for optimizing code generations for HVX.
+
+def u3_64_ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)(64 - N->getSExtValue());
+  return isUInt<3>(v);
+}]>;
+
+def u3_128_ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)(128 - N->getSExtValue());
+  return isUInt<3>(v);
+}]>;
+
+def SUB_64_VAL : SDNodeXForm<imm, [{
+   int32_t Imm = N->getSExtValue();
+   return CurDAG->getTargetConstant(64 - Imm, SDLoc(N), MVT::i32);
+}]>;
+
+def SUB_128_VAL : SDNodeXForm<imm, [{
+   int32_t Imm = N->getSExtValue();
+   return CurDAG->getTargetConstant(128 - Imm, SDLoc(N), MVT::i32);
+}]>;
+
+let AddedComplexity = 100 in {
+def : Pat <(v16i32 (int_hexagon_V6_lo (v32i32 HvxWR:$src1))),
+           (v16i32 (EXTRACT_SUBREG (v32i32 HvxWR:$src1), vsub_lo))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v16i32 (int_hexagon_V6_hi (v32i32 HvxWR:$src1))),
+           (v16i32 (EXTRACT_SUBREG (v32i32 HvxWR:$src1), vsub_hi))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (int_hexagon_V6_lo_128B (v64i32 HvxWR:$src1))),
+           (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_lo))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
+           (v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi))>,
+           Requires<[UseHVX]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
+           (v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
+           (v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8  HvxVR:$src1))),
+           (v512i1 (V6_vandvrt (v64i8  HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
+           (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
+           (v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v64i8  (bitconvert (v512i1 HvxQR:$src1))),
+           (v64i8  (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
+           (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
+           (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
+           (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
+           (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
+           (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
+           (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+           (V6_vS32b_ai IntRegs:$addr, 0,
+           (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+           (v512i1 (V6_vandvrt
+           (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+           (V6_vS32b_ai IntRegs:$addr, 0,
+           (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
+           Requires<[UseHVX]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+           (v1024i1 (V6_vandvrt
+           (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+           Requires<[UseHVX]>;
+}
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+         (v64i16 (V6_vpackwh_sat
+                 (v32i32 (V6_hi HvxWR:$Vdd)),
+                 (v32i32 (V6_lo HvxWR:$Vdd))))>,
+     Requires<[UseHVX]>;
+
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, IntRegs:$src2),
+         (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV55]>;
+
+multiclass T_VI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID HvxVR:$src1, u3_0ImmPred:$src2),
+           (MI    HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
+       Requires<[UseHVX]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, u3_0ImmPred:$src2),
+           (MI                 HvxVR:$src1, HvxVR:$src1, u3_0ImmPred:$src2)>,
+       Requires<[UseHVX]>;
+}
+
+multiclass T_VI_inv_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID HvxVR:$src1, u3_64_ImmPred:$src2),
+           (MI    HvxVR:$src1, HvxVR:$src1,
+                  (SUB_64_VAL u3_64_ImmPred:$src2))>,
+       Requires<[UseHVX]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, u3_128_ImmPred:$src2),
+           (MI HvxVR:$src1, HvxVR:$src1, (SUB_128_VAL u3_128_ImmPred:$src2))>,
+       Requires<[UseHVX]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3),
+           (MI    HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>,
+       Requires<[UseHVX]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+                                            u3_0ImmPred:$src3),
+           (MI                              HvxVR:$src1, HvxVR:$src2,
+                                            u3_0ImmPred:$src3)>,
+       Requires<[UseHVX]>;
+}
+
+multiclass T_VVI_inv_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, u3_64_ImmPred:$src3),
+           (MI    HvxVR:$src1, HvxVR:$src2,
+                                    (SUB_64_VAL u3_64_ImmPred:$src3))>,
+       Requires<[UseHVX]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+                                            u3_128_ImmPred:$src3),
+           (MI                              HvxVR:$src1, HvxVR:$src2,
+                                          (SUB_128_VAL u3_128_ImmPred:$src3))>,
+       Requires<[UseHVX]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+           (MI    HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>,
+       Requires<[UseHVX]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") HvxVR:$src1, HvxVR:$src2,
+                                            IntRegs:$src3),
+           (MI                              HvxVR:$src1, HvxVR:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVX]>;
+}
+
+defm : T_VI_pat <V6_valignbi, int_hexagon_V6_vror>;
+defm : T_VI_inv_pat <V6_vlalignbi, int_hexagon_V6_vror>;
+
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignb>;
+defm : T_VVI_inv_pat <V6_vlalignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_inv_pat <V6_vlalignbi, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignb>;
+defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignbi>;
+defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignbi>;
+
+def: Pat<(int_hexagon_V6_vd0),
+         (V6_vd0)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+         (V6_vd0)>, Requires<[HasV60, UseHVX128B]>;
+
+def: Pat<(int_hexagon_V6_vdd0),
+         (V6_vdd0)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B),
+         (V6_vdd0)>, Requires<[HasV65, UseHVX128B]>;
+
+def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+         (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+         (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+         (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+         (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+         (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+         (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+         (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+         (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+         (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+         (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+         (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+
+include "HexagonDepMapAsm2Intrin.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
deleted file mode 100644
index 6152cb098825..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
+++ /dev/null
@@ -1,27 +0,0 @@
-//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Vector reduce complex multiply real or imaginary
-def : T_PR_pat <M2_vrcmpys_s1,     int_hexagon_M2_vrcmpys_s1>;
-def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
-def : T_PR_pat <M2_vrcmpys_s1rp,   int_hexagon_M2_vrcmpys_s1rp>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
-
-def: T_RP_pat<A2_addsp,   int_hexagon_A2_addsp>;
-def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
-def: T_PP_pat<A2_minp,    int_hexagon_A2_minp>;
-def: T_PP_pat<A2_minup,   int_hexagon_A2_minup>;
-def: T_PP_pat<A2_maxp,    int_hexagon_A2_maxp>;
-def: T_PP_pat<A2_maxup,   int_hexagon_A2_maxup>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
deleted file mode 100644
index 2affe531515d..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ /dev/null
@@ -1,305 +0,0 @@
-//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V4 Architecture Extensions
-// Application-Level Specification
-// 80-V9418-12 Rev. A
-// June 15, 2010
-
-// Vector reduce multiply word by signed half (32x16)
-//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
-def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
-def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
-
-//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
-
-// Vector multiply halfwords, signed by unsigned
-// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
-def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
-
-// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
-def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
-
-// Vector polynomial multiply halfwords
-// Rdd=vpmpyh(Rs,Rt)
-def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
-// Rxx[^]=vpmpyh(Rs,Rt)
-def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
-
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
-// Rxx^=pmpyw(Rs,Rt)
-def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
-
-//Rxx^=asr(Rss,Rt)
-def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
-//Rxx^=asl(Rss,Rt)
-def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
-//Rxx^=lsr(Rss,Rt)
-def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
-//Rxx^=lsl(Rss,Rt)
-def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
-def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
-def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
-def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
-def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
-
-def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
-def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
-
-def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
-def: T_P_pat<S2_ct0p,  int_hexagon_S2_ct0p>;
-def: T_P_pat<S2_ct1p,  int_hexagon_S2_ct1p>;
-
-def: T_Q_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
-def: T_Q_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
-def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
-
-def : T_Q_PI_pat<A4_vcmpbeqi,     int_hexagon_A4_vcmpbeqi>;
-def : T_Q_PI_pat<A4_vcmpbgti,     int_hexagon_A4_vcmpbgti>;
-def : T_Q_PI_pat<A4_vcmpbgtui,    int_hexagon_A4_vcmpbgtui>;
-def : T_Q_PI_pat<A4_vcmpheqi,     int_hexagon_A4_vcmpheqi>;
-def : T_Q_PI_pat<A4_vcmphgti,     int_hexagon_A4_vcmphgti>;
-def : T_Q_PI_pat<A4_vcmphgtui,    int_hexagon_A4_vcmphgtui>;
-def : T_Q_PI_pat<A4_vcmpweqi,     int_hexagon_A4_vcmpweqi>;
-def : T_Q_PI_pat<A4_vcmpwgti,     int_hexagon_A4_vcmpwgti>;
-def : T_Q_PI_pat<A4_vcmpwgtui,    int_hexagon_A4_vcmpwgtui>;
-def : T_Q_PP_pat<A4_vcmpbeq_any,  int_hexagon_A4_vcmpbeq_any>;
-
-def : T_Q_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
-def : T_Q_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
-def : T_Q_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
-def : T_Q_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
-def : T_Q_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
-def : T_Q_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
-
-def : T_Q_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
-def : T_Q_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
-def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
-
-def : T_Q_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
-def : T_Q_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
-def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
-
-def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
-def : T_Q_PR_pat<A4_tlbmatch,    int_hexagon_A4_tlbmatch>;
-
-def : T_RRR_pat <M4_mpyrr_addr,    int_hexagon_M4_mpyrr_addr>;
-def : T_IRR_pat <M4_mpyrr_addi,    int_hexagon_M4_mpyrr_addi>;
-def : T_IRI_pat <M4_mpyri_addi,    int_hexagon_M4_mpyri_addi>;
-def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
-def : T_RRI_pat <M4_mpyri_addr,    int_hexagon_M4_mpyri_addr>;
-def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
-def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
-
-// Complex multiply 32x16
-def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
-def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
-
-def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
-def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
-
-def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
-def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
-
-// Complex add/sub halfwords/words
-def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
-def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
-def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
-def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
-
-def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
-def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
-
-// Extract bitfield
-def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
-def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
-def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
-def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
-
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
-
-// Shift an immediate left by register amount
-def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
-
-// Vector reduce maximum halfwords
-def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
-def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
-
-// Vector reduce maximum words
-def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
-def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
-
-// Vector reduce minimum halfwords
-def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
-def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
-
-// Vector reduce minimum words
-def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
-def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
-
-// Rotate and reduce bytes
-def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
-                                     u2_0ImmPred:$src3),
-           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
-
-// Rotate and reduce bytes with accumulation
-// Rxx+=vrcrotate(Rss,Rt,#u2)
-def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
-                                         IntRegs:$src3, u2_0ImmPred:$src4),
-           (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
-                             IntRegs:$src3, u2_0ImmPred:$src4)>;
-
-// Vector conditional negate
-def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
-
-// Logical xor with xor accumulation
-def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
-
-// ALU64 - Vector min/max byte
-def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
-def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
-
-// Shift and add/sub/and/or
-def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
-def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
-def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
-def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
-def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
-def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
-def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
-def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
-
-// Split bitfield
-def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
-def : T_RR_pat <A4_bitsplit,  int_hexagon_A4_bitsplit>;
-
-def: T_RR_pat<S4_parity,      int_hexagon_S4_parity>;
-
-def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
-def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
-
-def: T_RI_pat<S4_clbaddi,     int_hexagon_S4_clbaddi>;
-def: T_PI_pat<S4_clbpaddi,    int_hexagon_S4_clbpaddi>;
-def: T_P_pat <S4_clbpnorm,    int_hexagon_S4_clbpnorm>;
-
-//*******************************************************************
-//            ALU32/ALU
-//*******************************************************************
-
-// ALU32 / ALU / Logical Operations.
-def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
-def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
-
-//*******************************************************************
-//            ALU32/PERM
-//*******************************************************************
-
-// Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
-
-//*******************************************************************
-//           ALU32/PRED
-//*******************************************************************
-
-// Compare
-def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
-
-// Compare To General Register.
-def: T_Q_RR_pat<C4_cmpneq,  int_hexagon_C4_cmpneq>;
-def: T_Q_RR_pat<C4_cmplte,  int_hexagon_C4_cmplte>;
-def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
-
-def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
-def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
-
-def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
-def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
-
-//*******************************************************************
-//           CR
-//*******************************************************************
-
-// CR / Logical Operations On Predicates.
-def: T_Q_QQQ_pat<C4_and_and,  int_hexagon_C4_and_and>;
-def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
-def: T_Q_QQQ_pat<C4_and_or,   int_hexagon_C4_and_or>;
-def: T_Q_QQQ_pat<C4_and_orn,  int_hexagon_C4_and_orn>;
-def: T_Q_QQQ_pat<C4_or_and,   int_hexagon_C4_or_and>;
-def: T_Q_QQQ_pat<C4_or_andn,  int_hexagon_C4_or_andn>;
-def: T_Q_QQQ_pat<C4_or_or,    int_hexagon_C4_or_or>;
-def: T_Q_QQQ_pat<C4_or_orn,   int_hexagon_C4_or_orn>;
-
-//*******************************************************************
-//           XTYPE/ALU
-//*******************************************************************
-
-// Add And Accumulate.
-
-def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
-def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
-
-
-// XTYPE / ALU / Logical-logical Words.
-def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
-def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
-def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
-def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
-def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
-def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
-def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
-def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
-def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
-def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
-def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
-
-def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
-def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
-def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
-
-// Modulo wrap.
-def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
-
-// Arithmetic/Convergent round
-// Rd=[cround|round](Rs,Rt)[:sat]
-// Rd=[cround|round](Rs,#u5)[:sat]
-def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
-def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
-
-def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
-def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
-
-def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
-def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
-
-def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index 29f67cffcf89..a852394f2160 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -7,9 +7,314 @@
 //
 //===----------------------------------------------------------------------===//
 
+def : T_PR_pat <M2_vrcmpys_s1,     int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp,   int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp,   int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp,    int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup,   int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp,    int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup,   int_hexagon_A2_maxup>;
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
+def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
+def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
+def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
+def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
+
+def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
+def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
+
+def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
+def: T_P_pat<S2_ct0p,  int_hexagon_S2_ct0p>;
+def: T_P_pat<S2_ct1p,  int_hexagon_S2_ct1p>;
+
+def: T_Q_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
+def: T_Q_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
+def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+def : T_Q_PI_pat<A4_vcmpbeqi,     int_hexagon_A4_vcmpbeqi>;
+def : T_Q_PI_pat<A4_vcmpbgti,     int_hexagon_A4_vcmpbgti>;
+def : T_Q_PI_pat<A4_vcmpbgtui,    int_hexagon_A4_vcmpbgtui>;
+def : T_Q_PI_pat<A4_vcmpheqi,     int_hexagon_A4_vcmpheqi>;
+def : T_Q_PI_pat<A4_vcmphgti,     int_hexagon_A4_vcmphgti>;
+def : T_Q_PI_pat<A4_vcmphgtui,    int_hexagon_A4_vcmphgtui>;
+def : T_Q_PI_pat<A4_vcmpweqi,     int_hexagon_A4_vcmpweqi>;
+def : T_Q_PI_pat<A4_vcmpwgti,     int_hexagon_A4_vcmpwgti>;
+def : T_Q_PI_pat<A4_vcmpwgtui,    int_hexagon_A4_vcmpwgtui>;
+def : T_Q_PP_pat<A4_vcmpbeq_any,  int_hexagon_A4_vcmpbeq_any>;
+
+def : T_Q_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
+def : T_Q_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
+def : T_Q_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
+def : T_Q_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
+def : T_Q_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
+def : T_Q_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
+
+def : T_Q_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
+def : T_Q_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
+def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_Q_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
+def : T_Q_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
+def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
+def : T_Q_PR_pat<A4_tlbmatch,    int_hexagon_A4_tlbmatch>;
+
+def : T_RRR_pat <M4_mpyrr_addr,    int_hexagon_M4_mpyrr_addr>;
+def : T_IRR_pat <M4_mpyrr_addi,    int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi,    int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr,    int_hexagon_M4_mpyri_addr>;
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
+
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
+
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
+
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
+
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+                                     u2_0ImmPred:$src3),
+           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                                         IntRegs:$src3, u2_0ImmPred:$src4),
+           (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                             IntRegs:$src3, u2_0ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
+
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
+
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit,  int_hexagon_A4_bitsplit>;
+
+def: T_RR_pat<S4_parity,      int_hexagon_S4_parity>;
+
+def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
+def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi,     int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi,    int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm,    int_hexagon_S4_clbpnorm>;
+
+//*******************************************************************
+//            ALU32/ALU
+//*******************************************************************
+
+// ALU32 / ALU / Logical Operations.
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
+
+//*******************************************************************
+//            ALU32/PERM
+//*******************************************************************
+
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
+
+//*******************************************************************
+//           ALU32/PRED
+//*******************************************************************
+
+// Compare
+def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
+
+// Compare To General Register.
+def: T_Q_RR_pat<C4_cmpneq,  int_hexagon_C4_cmpneq>;
+def: T_Q_RR_pat<C4_cmplte,  int_hexagon_C4_cmplte>;
+def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
+
+def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+
+def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
+
+//*******************************************************************
+//           CR
+//*******************************************************************
+
+// CR / Logical Operations On Predicates.
+def: T_Q_QQQ_pat<C4_and_and,  int_hexagon_C4_and_and>;
+def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
+def: T_Q_QQQ_pat<C4_and_or,   int_hexagon_C4_and_or>;
+def: T_Q_QQQ_pat<C4_and_orn,  int_hexagon_C4_and_orn>;
+def: T_Q_QQQ_pat<C4_or_and,   int_hexagon_C4_or_and>;
+def: T_Q_QQQ_pat<C4_or_andn,  int_hexagon_C4_or_andn>;
+def: T_Q_QQQ_pat<C4_or_or,    int_hexagon_C4_or_or>;
+def: T_Q_QQQ_pat<C4_or_orn,   int_hexagon_C4_or_orn>;
+
+//*******************************************************************
+//           XTYPE/ALU
+//*******************************************************************
+
+// Add And Accumulate.
+
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
+
+
+// XTYPE / ALU / Logical-logical Words.
+def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
+
 //Rdd[+]=vrmpybsu(Rss,Rtt)
 //Rdd[+]=vrmpybuu(Rss,Rtt)
-let Predicates = [HasV5]  in {
 def : T_PP_pat  <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
 def : T_PP_pat  <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
 
@@ -31,7 +336,6 @@ def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
 
 // Rd=vaddhub(Rss,Rtt):sat
 def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
-}
 
 def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
 def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index f9ed03909233..985f41f3a7d9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1970,12 +1970,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
   // Get the location that may be stored across the loop.  Since the access
   // is strided positively through memory, we say that the modified location
   // starts at the pointer and has infinite size.
-  LocationSize AccessSize = MemoryLocation::UnknownSize;
+  LocationSize AccessSize = LocationSize::unknown();
 
   // If the loop iterates a fixed number of times, we can refine the access
   // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
-    AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+    AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
+                                       StoreSize);
 
   // TODO: For this to be really effective, we have to dive into the pointer
   // operand in the store.  Store to &A[i] of 100 will always return may alias
@@ -2360,7 +2361,7 @@ bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
   auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
     return DT->dominates(BB, EB);
   };
-  if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
+  if (!all_of(ExitBlocks, DominatedByBB))
     return false;
 
   bool MadeChange = false;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 74c550ce8226..908ce24136c7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -105,6 +105,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
   default:
     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
       return false;
+    break;
   case TargetOpcode::EXTRACT_SUBREG:
   case TargetOpcode::INSERT_SUBREG:
   case TargetOpcode::SUBREG_TO_REG:
@@ -215,8 +216,7 @@ void VLIWMachineScheduler::schedule() {
                   ++su) if (SUnits[su].getDepth() > maxD) maxD =
                  SUnits[su].getDepth();
              dbgs() << "Max Depth " << maxD << "\n";);
-  LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
-                 .dumpAll(this));
+  LLVM_DEBUG(dump());
 
   initQueues(TopRoots, BotRoots);
 
@@ -489,7 +489,7 @@ void ConvergingVLIWScheduler::traceCandidate(const char *Label,
   else
     dbgs() << "     ";
   dbgs() << "cost(" << Cost << ")\t";
-  SU->dump(DAG);
+  DAG->dumpNode(*SU);
 }
 
 // Very detailed queue dump, to be used with higher verbosity levels.
@@ -982,7 +982,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
                     << " Scheduling instruction in cycle "
                     << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
                     << reportPackets() << ")\n";
-             SU->dump(DAG));
+             DAG->dumpNode(*SU));
   return SU;
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 29c044b3b729..c3a5bd5d57bf 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -502,7 +502,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
       MIB.add(ImmOp);
       OpStart = 4;
       Changed = true;
-    } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
+    } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset &&
+               OldMI->getOperand(2).isImm()) {
       short NewOpCode = HII->changeAddrMode_io_abs(*OldMI);
       assert(NewOpCode >= 0 && "Invalid New opcode\n");
       MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode))
@@ -518,17 +519,19 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
 
     LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
     LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
-  } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) {
-    short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
-    assert(NewOpCode >= 0 && "Invalid New opcode\n");
-    MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
-    MIB.add(OldMI->getOperand(0));
-    MIB.add(OldMI->getOperand(1));
-    MIB.add(ImmOp);
-    OpStart = 4;
-    Changed = true;
-    LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
-    LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+  } else if (ImmOpNum == 2) {
+    if (OldMI->getOperand(3).isImm() && OldMI->getOperand(3).getImm() == 0) {
+      short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
+      assert(NewOpCode >= 0 && "Invalid New opcode\n");
+      MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
+      MIB.add(OldMI->getOperand(0));
+      MIB.add(OldMI->getOperand(1));
+      MIB.add(ImmOp);
+      OpStart = 4;
+      Changed = true;
+      LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+      LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+    }
   }
 
   if (Changed)
@@ -758,11 +761,13 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
       // This could happen, for example, when DefR = R4, but the used
       // register is D2.
 
+      // Change UseMI if replacement is possible. If any replacement failed,
+      // or wasn't attempted, make sure to keep the TFR.
+      bool Xformed = false;
       if (UseMOnum >= 0 && InstrEvalResult[UseMI])
-        // Change UseMI if replacement is possible.
-        Changed |= xformUseMI(MI, UseMI, UseN, UseMOnum);
-      else
-        KeepTfr = true;
+        Xformed = xformUseMI(MI, UseMI, UseN, UseMOnum);
+      Changed |=  Xformed;
+      KeepTfr |= !Xformed;
     }
     if (!KeepTfr)
       Deleted.insert(MI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 384fda4ce39a..89177564057e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -177,6 +177,11 @@ def UDEC32: SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
 }]>;
 
+class Subi<int From>: SDNodeXForm<imm,
+  "int32_t V = " # From # " - N->getSExtValue();" #
+  "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);"
+>;
+
 def Log2_32: SDNodeXForm<imm, [{
   uint32_t V = N->getZExtValue();
   return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
@@ -218,6 +223,8 @@ def I1toI32:  OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
 def I32toI1:  OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
+def ToAext64: OutPatFrag<(ops node:$Rs),
+  (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>;
 
 def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt),
   (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>;
@@ -246,6 +253,9 @@ def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
 def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
 def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
 
+def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>;
+def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>;
+
 def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
          (PS_fi (i32 AddrFI:$Rs), imm:$off)>;
 
@@ -257,6 +267,23 @@ class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
 class Not2<PatFrag P>
   : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
 
+// If there is a constant operand that feeds the and/or instruction,
+// do not generate the compound instructions.
+// It is not always profitable, as some times we end up with a transfer.
+// Check the below example.
+// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra)
+// Instead this is preferable.
+// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra)
+class Su_ni1<PatFrag Op>
+  : PatFrag<Op.Operands, !head(Op.Fragments), [{
+            if (hasOneUse(N)){
+              // Check if Op1 is an immediate operand.
+              SDValue Op1 = N->getOperand(1);
+              return !dyn_cast<ConstantSDNode>(Op1);
+            }
+            return false;}],
+            Op.OperandTransform>;
+
 class Su<PatFrag Op>
   : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
             Op.OperandTransform>;
@@ -348,38 +375,34 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
 // --(2) Type cast -------------------------------------------------------
 //
 
-let Predicates = [HasV5] in {
-  def: OpR_R_pat<F2_conv_sf2df,      pf1<fpextend>,   f64, F32>;
-  def: OpR_R_pat<F2_conv_df2sf,      pf1<fpround>,    f32, F64>;
+def: OpR_R_pat<F2_conv_sf2df,      pf1<fpextend>,   f64, F32>;
+def: OpR_R_pat<F2_conv_df2sf,      pf1<fpround>,    f32, F64>;
 
-  def: OpR_R_pat<F2_conv_w2sf,       pf1<sint_to_fp>, f32, I32>;
-  def: OpR_R_pat<F2_conv_d2sf,       pf1<sint_to_fp>, f32, I64>;
-  def: OpR_R_pat<F2_conv_w2df,       pf1<sint_to_fp>, f64, I32>;
-  def: OpR_R_pat<F2_conv_d2df,       pf1<sint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_w2sf,       pf1<sint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_d2sf,       pf1<sint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_w2df,       pf1<sint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_d2df,       pf1<sint_to_fp>, f64, I64>;
 
-  def: OpR_R_pat<F2_conv_uw2sf,      pf1<uint_to_fp>, f32, I32>;
-  def: OpR_R_pat<F2_conv_ud2sf,      pf1<uint_to_fp>, f32, I64>;
-  def: OpR_R_pat<F2_conv_uw2df,      pf1<uint_to_fp>, f64, I32>;
-  def: OpR_R_pat<F2_conv_ud2df,      pf1<uint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_uw2sf,      pf1<uint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_ud2sf,      pf1<uint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_uw2df,      pf1<uint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_ud2df,      pf1<uint_to_fp>, f64, I64>;
 
-  def: OpR_R_pat<F2_conv_sf2w_chop,  pf1<fp_to_sint>, i32, F32>;
-  def: OpR_R_pat<F2_conv_df2w_chop,  pf1<fp_to_sint>, i32, F64>;
-  def: OpR_R_pat<F2_conv_sf2d_chop,  pf1<fp_to_sint>, i64, F32>;
-  def: OpR_R_pat<F2_conv_df2d_chop,  pf1<fp_to_sint>, i64, F64>;
+def: OpR_R_pat<F2_conv_sf2w_chop,  pf1<fp_to_sint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2w_chop,  pf1<fp_to_sint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2d_chop,  pf1<fp_to_sint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2d_chop,  pf1<fp_to_sint>, i64, F64>;
 
-  def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
-  def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
-  def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
-  def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
-}
+def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
 
 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5] in {
-  def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
-  def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
-  def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
-  def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
-}
+def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
+def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
+def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
+def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
 
 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
   def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
@@ -403,52 +426,48 @@ def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>;
 def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>;
 def: Pat<(sext_inreg I64:$Rs, i8),  (A2_sxtw (A2_sxtb (LoReg $Rs)))>;
 
-def: Pat<(i64 (sext I1:$Pu)),
-         (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
-                   (C2_muxii PredRegs:$Pu, -1, 0))>;
-
-def: Pat<(i32   (sext I1:$Pu)),   (C2_muxii I1:$Pu, -1, 0)>;
-def: Pat<(i32   (zext I1:$Pu)),   (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64   (zext I1:$Pu)),   (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
-def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
-def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
-def: Pat<(v4i8  (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
-def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
-def: Pat<(v8i8  (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
-
 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
 def: Pat<(Zext64 I32:$Rs),     (ToZext64 $Rs)>;
 def: Pat<(Aext64 I32:$Rs),     (ToZext64 $Rs)>;
 
 def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>;
-def: Pat<(i1 (trunc I64:$Rs)),  (C2_tfrrp (LoReg $Rs))>;
+def: Pat<(i1 (trunc I32:$Rs)),  (S2_tstbit_i I32:$Rs, 0)>;
+def: Pat<(i1 (trunc I64:$Rs)),  (S2_tstbit_i (LoReg $Rs), 0)>;
 
 let AddedComplexity = 20 in {
   def: Pat<(and I32:$Rs, 255),   (A2_zxtb I32:$Rs)>;
   def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>;
 }
 
-def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+// Extensions from i1 or vectors of i1.
+def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
+def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(i32  (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
+def: Pat<(i64  (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
+                                         (C2_muxii PredRegs:$Pu, -1, 0))>;
+
+def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
+def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8  (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
+def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
+def: Pat<(v8i8  (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
 
 def Vsplatpi: OutPatFrag<(ops node:$V),
                          (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
-def: Pat<(v8i8 (zext V8I1:$Pu)),
-         (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
-def: Pat<(v4i16 (zext V4I1:$Pu)),
-         (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
-def: Pat<(v2i32 (zext V2I1:$Pu)),
-         (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
 
-def: Pat<(v4i8 (zext V4I1:$Pu)),
-         (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
-def: Pat<(v2i16 (zext V2I1:$Pu)),
+def: Pat<(v2i16 (azext V2I1:$Pu)),
          (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
+def: Pat<(v2i32 (azext V2I1:$Pu)),
+         (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
+def: Pat<(v4i8 (azext V4I1:$Pu)),
+         (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
+def: Pat<(v4i16 (azext V4I1:$Pu)),
+         (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
+def: Pat<(v8i8 (azext V8I1:$Pu)),
+         (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
 
-def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (azext  V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (azext  V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
 def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
 def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
 
@@ -582,31 +601,29 @@ def: OpR_RR_pat<A2_vcmpwgtu,  RevCmp<setult>, v2i1, V2I32>;
 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         v2i1, V2I32>;
 
-let Predicates = [HasV5] in {
-  def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   setgt,          i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   setge,          i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpeq,   setoeq,         i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   setogt,         i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   setoge,         i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setolt>, i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setole>, i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setlt>,  i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setle>,  i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;
-
-  def: OpR_RR_pat<F2_dfcmpeq,   seteq,          i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   setgt,          i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   setge,          i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpeq,   setoeq,         i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   setogt,         i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   setoge,         i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setolt>, i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setole>, i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setlt>,  i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setle>,  i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpuo,   setuo,          i1, F64>;
-}
+def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   setgt,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   setge,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpeq,   setoeq,         i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   setogt,         i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   setoge,         i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setolt>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setole>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setlt>,  i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setle>,  i1, F32>;
+def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;
+
+def: OpR_RR_pat<F2_dfcmpeq,   seteq,          i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   setgt,          i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   setge,          i1, F64>;
+def: OpR_RR_pat<F2_dfcmpeq,   setoeq,         i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   setogt,         i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   setoge,         i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setolt>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setole>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setlt>,  i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setle>,  i1, F64>;
+def: OpR_RR_pat<F2_dfcmpuo,   setuo,          i1, F64>;
 
 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.
 
@@ -729,32 +746,28 @@ class Cmpud<InstHexagon MI>:  T3<C2_or,  F2_dfcmpuo, MI>;
 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
 
-let Predicates = [HasV5] in {
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>,  setueq,         i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  setuge,         i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  setugt,         i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  RevCmp<setule>, i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  RevCmp<setult>, i1, F32>;
-  def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>,  setueq,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  setuge,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  setugt,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  RevCmp<setule>, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  RevCmp<setult>, i1, F32>;
+def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune,         i1, F32>;
 
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>,  setueq,         i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  setuge,         i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  setugt,         i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  RevCmp<setule>, i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  RevCmp<setult>, i1, F64>;
-  def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune,         i1, F64>;
-}
+def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>,  setueq,         i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  setuge,         i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  setugt,         i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  RevCmp<setule>, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  RevCmp<setult>, i1, F64>;
+def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune,         i1, F64>;
 
-let Predicates = [HasV5] in {
-  def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
-  def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;
 
-  def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
-  def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;
 
-  def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto,   i1, F32>;
-  def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto,   i1, F64>;
-}
+def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto,   i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto,   i1, F64>;
 
 
 // --(6) Select ----------------------------------------------------------
@@ -784,32 +797,30 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
          (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
                    (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
 
-let Predicates = [HasV5] in {
-  def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
-           (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
-  def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
-           (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-  def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
-           (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
-  def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
-           (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
-                     (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
+def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
+         (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
+         (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
+def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
+         (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
+def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
+         (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
+                   (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
 
-  def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
-           (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
-  def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
-           (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
+def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
+         (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
+def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
+         (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
 
-  def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
-           (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
-  def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
-           (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-}
+def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
+         (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
+         (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
 
 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
-         (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+         (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
 def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
-         (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+         (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
 def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
          (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
                    (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
@@ -872,7 +883,7 @@ let AddedComplexity = 200 in {
   defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
 }
 
-let AddedComplexity = 100, Predicates = [HasV5] in {
+let AddedComplexity = 100 in {
   defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
   defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
   defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@@ -989,15 +1000,95 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
 def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
 def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
 
-let Predicates = [HasV60] in {
+// Funnel shifts.
+def IsMul8_U3: PatLeaf<(i32 imm), [{
+  uint64_t V = N->getZExtValue();
+  return V % 8 == 0 && isUInt<3>(V / 8);
+}]>;
+
+def Divu8: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32);
+}]>;
+
+// Funnel shift-left.
+def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+  (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>;
+def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+  (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>;
+
+def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+  (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S),  $Rs, (Subi<64> $S))>;
+def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+  (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+
+// Combined SDNodeXForm: (Divu8 (Subi<64> $S))
+def Divu64_8: SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8,
+                                   SDLoc(N), MVT::i32);
+}]>;
+
+// Special cases:
+let AddedComplexity = 100 in {
+  def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)),
+           (A2_combine_hl I32:$Rs, I32:$Rt)>;
+  def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S),
+           (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>;
+}
+
+let Predicates = [HasV60], AddedComplexity = 50 in {
   def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
   def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
 }
+let AddedComplexity = 30 in {
+  def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S),          (FShl32i $Rs, $Rs, imm:$S)>;
+  def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S),          (FShl64i $Rs, $Rs, imm:$S)>;
+  def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>;
+  def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>;
+}
+def: Pat<(rotl I32:$Rs, I32:$Rt),           (FShl32r $Rs, $Rs, $Rt)>;
+def: Pat<(rotl I64:$Rs, I32:$Rt),           (FShl64r $Rs, $Rs, $Rt)>;
+def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru),  (FShl32r $Rs, $Rt, $Ru)>;
+def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru),  (FShl64r $Rs, $Rt, $Ru)>;
+
+// Funnel shift-right.
+def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+  (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>;
+def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+  (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>;
+
+def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
+  (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S),  $Rs, (Subi<64> $S))>;
+def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
+  (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
+
+// Special cases:
+let AddedComplexity = 100 in {
+  def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)),
+           (A2_combine_hl I32:$Rs, I32:$Rt)>;
+  def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S),
+           (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>;
+}
+
+let Predicates = [HasV60], AddedComplexity = 50 in {
+  def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>;
+  def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>;
+}
+let AddedComplexity = 30 in {
+  def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S),          (FShr32i $Rs, $Rs, imm:$S)>;
+  def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S),          (FShr64i $Rs, $Rs, imm:$S)>;
+  def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>;
+  def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>;
+}
+def: Pat<(rotr I32:$Rs, I32:$Rt),           (FShr32r $Rs, $Rs, $Rt)>;
+def: Pat<(rotr I64:$Rs, I32:$Rt),           (FShr64r $Rs, $Rs, $Rt)>;
+def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru),  (FShr32r $Rs, $Rt, $Ru)>;
+def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru),  (FShr64r $Rs, $Rt, $Ru)>;
+
 
 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
          (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
-         (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
+         (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;
 
 // Prefer S2_addasl_rrri over S2_asl_i_r_acc.
 let AddedComplexity = 120 in
@@ -1119,14 +1210,6 @@ def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)),
          (Combinew (A2_combine_ll I32:$d, I32:$c),
                    (A2_combine_ll I32:$b, I32:$a))>;
 
-def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
-                               (i32 8)),
-                          (i32 (zextloadi8 (add I32:$b, 2)))),
-                      (i32 16)),
-                 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
-             (zextloadi8 I32:$b)),
-         (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
-
 let AddedComplexity = 200 in {
   def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))),
            (A2_combine_ll I32:$Rt, I32:$Rs)>;
@@ -1172,6 +1255,19 @@ def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
 def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
          (S2_asl_i_vh V4I16:$b, imm:$c)>;
 
+def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S),
+         (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S),
+         (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S),
+         (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>;
+def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt),
+         (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>;
+def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt),
+         (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>;
+def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt),
+         (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>;
+
 
 // --(9) Arithmetic/bitwise ----------------------------------------------
 //
@@ -1182,17 +1278,15 @@ def: Pat<(not  I32:$Rs), (A2_subri -1, I32:$Rs)>;
 def: Pat<(not  I64:$Rs), (A2_notp  I64:$Rs)>;
 def: Pat<(ineg I64:$Rs), (A2_negp  I64:$Rs)>;
 
-let Predicates = [HasV5] in {
-  def: Pat<(fabs F32:$Rs), (S2_clrbit_i    F32:$Rs, 31)>;
-  def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
+def: Pat<(fabs F32:$Rs), (S2_clrbit_i    F32:$Rs, 31)>;
+def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
 
-  def: Pat<(fabs F64:$Rs),
-           (Combinew (S2_clrbit_i (HiReg $Rs), 31),
-                     (i32 (LoReg $Rs)))>;
-  def: Pat<(fneg F64:$Rs),
-           (Combinew (S2_togglebit_i (HiReg $Rs), 31),
-                     (i32 (LoReg $Rs)))>;
-}
+def: Pat<(fabs F64:$Rs),
+         (Combinew (S2_clrbit_i (HiReg $Rs), 31),
+                   (i32 (LoReg $Rs)))>;
+def: Pat<(fneg F64:$Rs),
+         (Combinew (S2_togglebit_i (HiReg $Rs), 31),
+                   (i32 (LoReg $Rs)))>;
 
 def: Pat<(add I32:$Rs, anyimm:$s16),   (A2_addi   I32:$Rs,  imm:$s16)>;
 def: Pat<(or  I32:$Rs, anyimm:$s10),   (A2_orir   I32:$Rs,  imm:$s10)>;
@@ -1258,12 +1352,15 @@ def: OpR_RR_pat<C2_and,       Mul,        v2i1,  V2I1>;
 def: OpR_RR_pat<C2_and,       Mul,        v4i1,  V4I1>;
 def: OpR_RR_pat<C2_and,       Mul,        v8i1,  V8I1>;
 
-let Predicates = [HasV5] in {
-  def: OpR_RR_pat<F2_sfadd,     pf2<fadd>,    f32, F32>;
-  def: OpR_RR_pat<F2_sfsub,     pf2<fsub>,    f32, F32>;
-  def: OpR_RR_pat<F2_sfmpy,     pf2<fmul>,    f32, F32>;
-  def: OpR_RR_pat<F2_sfmin,     pf2<fminnum>, f32, F32>;
-  def: OpR_RR_pat<F2_sfmax,     pf2<fmaxnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfadd,     pf2<fadd>,    f32, F32>;
+def: OpR_RR_pat<F2_sfsub,     pf2<fsub>,    f32, F32>;
+def: OpR_RR_pat<F2_sfmpy,     pf2<fmul>,    f32, F32>;
+def: OpR_RR_pat<F2_sfmin,     pf2<fminnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfmax,     pf2<fmaxnum>, f32, F32>;
+
+let Predicates = [HasV66] in {
+  def: OpR_RR_pat<F2_dfadd,     pf2<fadd>,    f64, F64>;
+  def: OpR_RR_pat<F2_dfsub,     pf2<fsub>,    f64, F64>;
 }
 
 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
@@ -1272,6 +1369,8 @@ let AddedComplexity = 10 in {
   def: AccRRI_pat<M2_macsip,    Add, Su<Mul>, I32, u32_0ImmPred>;
   def: AccRRI_pat<M2_macsin,    Sub, Su<Mul>, I32, u32_0ImmPred>;
   def: AccRRR_pat<M2_maci,      Add, Su<Mul>, I32, I32, I32>;
+  let Predicates = [HasV66] in
+  def: AccRRR_pat<M2_mnaci,     Sub, Su<Mul>, I32, I32, I32>;
 }
 
 def: AccRRI_pat<M2_naccii,    Sub, Su<Add>, I32, s32_0ImmPred>;
@@ -1344,16 +1443,16 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
 def: Pat<(add Sext64:$Rs, I64:$Rt),
          (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
 
-def: AccRRR_pat<M4_and_and,   And, Su<And>,       I32,  I32,  I32>;
-def: AccRRR_pat<M4_and_or,    And, Su<Or>,        I32,  I32,  I32>;
-def: AccRRR_pat<M4_and_xor,   And, Su<Xor>,       I32,  I32,  I32>;
-def: AccRRR_pat<M4_or_and,    Or,  Su<And>,       I32,  I32,  I32>;
-def: AccRRR_pat<M4_or_or,     Or,  Su<Or>,        I32,  I32,  I32>;
-def: AccRRR_pat<M4_or_xor,    Or,  Su<Xor>,       I32,  I32,  I32>;
-def: AccRRR_pat<M4_xor_and,   Xor, Su<And>,       I32,  I32,  I32>;
-def: AccRRR_pat<M4_xor_or,    Xor, Su<Or>,        I32,  I32,  I32>;
-def: AccRRR_pat<M2_xor_xacc,  Xor, Su<Xor>,       I32,  I32,  I32>;
-def: AccRRR_pat<M4_xor_xacc,  Xor, Su<Xor>,       I64,  I64,  I64>;
+def: AccRRR_pat<M4_and_and,   And, Su_ni1<And>,  I32,  I32,  I32>;
+def: AccRRR_pat<M4_and_or,    And, Su_ni1<Or>,   I32,  I32,  I32>;
+def: AccRRR_pat<M4_and_xor,   And, Su<Xor>,      I32,  I32,  I32>;
+def: AccRRR_pat<M4_or_and,    Or,  Su_ni1<And>,  I32,  I32,  I32>;
+def: AccRRR_pat<M4_or_or,     Or,  Su_ni1<Or>,   I32,  I32,  I32>;
+def: AccRRR_pat<M4_or_xor,    Or,  Su<Xor>,      I32,  I32,  I32>;
+def: AccRRR_pat<M4_xor_and,   Xor, Su_ni1<And>,  I32,  I32,  I32>;
+def: AccRRR_pat<M4_xor_or,    Xor, Su_ni1<Or>,   I32,  I32,  I32>;
+def: AccRRR_pat<M2_xor_xacc,  Xor, Su<Xor>,      I32,  I32,  I32>;
+def: AccRRR_pat<M4_xor_xacc,  Xor, Su<Xor>,      I64,  I64,  I64>;
 
 // For dags like (or (and (not _), _), (shl _, _)) where the "or" with
 // one argument matches the patterns below, and with the other argument
@@ -1497,14 +1596,12 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
          (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
 
 
-let Predicates = [HasV5] in {
-  def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
-           (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
-  def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
-           (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-  def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
-           (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-}
+def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
+         (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
+         (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
+         (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
 
 
 def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
@@ -1515,9 +1612,9 @@ def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
 // Add/subtract two v4i8: Hexagon does not have an insn for this one, so
 // we use the double add v8i8, and use only the low part of the result.
 def: Pat<(add V4I8:$Rs, V4I8:$Rt),
-         (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+         (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>;
 def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
-         (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+         (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>;
 
 // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two
 // half-words, and saturates the result to a 32-bit value, except the
@@ -1531,14 +1628,12 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
 
 // Multiplies two v4i8 vectors.
 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
-         (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
-     Requires<[HasV5]>;
+         (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;
 
 // Multiplies two v8i8 vectors.
 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
          (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
-                   (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
-     Requires<[HasV5]>;
+                   (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;
 
 
 // --(10) Bit ------------------------------------------------------------
@@ -1868,10 +1963,10 @@ let AddedComplexity = 20 in {
 }
 
 let AddedComplexity = 30 in {
-  defm: Loadxim_pat<extloadi1,    i64, ToZext64, anyimm0, L2_loadrub_io>;
-  defm: Loadxim_pat<extloadi8,    i64, ToZext64, anyimm0, L2_loadrub_io>;
-  defm: Loadxim_pat<extloadi16,   i64, ToZext64, anyimm1, L2_loadruh_io>;
-  defm: Loadxim_pat<extloadi32,   i64, ToZext64, anyimm2, L2_loadri_io>;
+  defm: Loadxim_pat<extloadi1,    i64, ToAext64, anyimm0, L2_loadrub_io>;
+  defm: Loadxim_pat<extloadi8,    i64, ToAext64, anyimm0, L2_loadrub_io>;
+  defm: Loadxim_pat<extloadi16,   i64, ToAext64, anyimm1, L2_loadruh_io>;
+  defm: Loadxim_pat<extloadi32,   i64, ToAext64, anyimm2, L2_loadri_io>;
   defm: Loadxim_pat<zextloadi1,   i64, ToZext64, anyimm0, L2_loadrub_io>;
   defm: Loadxim_pat<zextloadi8,   i64, ToZext64, anyimm0, L2_loadrub_io>;
   defm: Loadxim_pat<zextloadi16,  i64, ToZext64, anyimm1, L2_loadruh_io>;
@@ -1906,13 +2001,13 @@ let AddedComplexity  = 60 in {
 
   def: Loadxum_pat<sextloadi8,  i64, anyimm0, ToSext64, L4_loadrb_ur>;
   def: Loadxum_pat<zextloadi8,  i64, anyimm0, ToZext64, L4_loadrub_ur>;
-  def: Loadxum_pat<extloadi8,   i64, anyimm0, ToZext64, L4_loadrub_ur>;
+  def: Loadxum_pat<extloadi8,   i64, anyimm0, ToAext64, L4_loadrub_ur>;
   def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>;
   def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
-  def: Loadxum_pat<extloadi16,  i64, anyimm1, ToZext64, L4_loadruh_ur>;
+  def: Loadxum_pat<extloadi16,  i64, anyimm1, ToAext64, L4_loadruh_ur>;
   def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>;
   def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
-  def: Loadxum_pat<extloadi32,  i64, anyimm2, ToZext64, L4_loadri_ur>;
+  def: Loadxum_pat<extloadi32,  i64, anyimm2, ToAext64, L4_loadri_ur>;
 }
 
 let AddedComplexity = 40 in {
@@ -1952,25 +2047,25 @@ let AddedComplexity = 20 in {
 }
 
 let AddedComplexity = 40 in {
-  def: Loadxrm_shl_pat<extloadi8,    i64, ToZext64, L4_loadrub_rr>;
+  def: Loadxrm_shl_pat<extloadi8,    i64, ToAext64, L4_loadrub_rr>;
   def: Loadxrm_shl_pat<zextloadi8,   i64, ToZext64, L4_loadrub_rr>;
   def: Loadxrm_shl_pat<sextloadi8,   i64, ToSext64, L4_loadrb_rr>;
-  def: Loadxrm_shl_pat<extloadi16,   i64, ToZext64, L4_loadruh_rr>;
+  def: Loadxrm_shl_pat<extloadi16,   i64, ToAext64, L4_loadruh_rr>;
   def: Loadxrm_shl_pat<zextloadi16,  i64, ToZext64, L4_loadruh_rr>;
   def: Loadxrm_shl_pat<sextloadi16,  i64, ToSext64, L4_loadrh_rr>;
-  def: Loadxrm_shl_pat<extloadi32,   i64, ToZext64, L4_loadri_rr>;
+  def: Loadxrm_shl_pat<extloadi32,   i64, ToAext64, L4_loadri_rr>;
   def: Loadxrm_shl_pat<zextloadi32,  i64, ToZext64, L4_loadri_rr>;
   def: Loadxrm_shl_pat<sextloadi32,  i64, ToSext64, L4_loadri_rr>;
 }
 
 let AddedComplexity = 20 in {
-  def: Loadxrm_add_pat<extloadi8,    i64, ToZext64, L4_loadrub_rr>;
+  def: Loadxrm_add_pat<extloadi8,    i64, ToAext64, L4_loadrub_rr>;
   def: Loadxrm_add_pat<zextloadi8,   i64, ToZext64, L4_loadrub_rr>;
   def: Loadxrm_add_pat<sextloadi8,   i64, ToSext64, L4_loadrb_rr>;
-  def: Loadxrm_add_pat<extloadi16,   i64, ToZext64, L4_loadruh_rr>;
+  def: Loadxrm_add_pat<extloadi16,   i64, ToAext64, L4_loadruh_rr>;
   def: Loadxrm_add_pat<zextloadi16,  i64, ToZext64, L4_loadruh_rr>;
   def: Loadxrm_add_pat<sextloadi16,  i64, ToSext64, L4_loadrh_rr>;
-  def: Loadxrm_add_pat<extloadi32,   i64, ToZext64, L4_loadri_rr>;
+  def: Loadxrm_add_pat<extloadi32,   i64, ToAext64, L4_loadri_rr>;
   def: Loadxrm_add_pat<zextloadi32,  i64, ToZext64, L4_loadri_rr>;
   def: Loadxrm_add_pat<sextloadi32,  i64, ToSext64, L4_loadri_rr>;
 }
@@ -2002,13 +2097,13 @@ let AddedComplexity  = 60 in {
 }
 
 let AddedComplexity  = 30 in {
-  def: Loadam_pat<extloadi8,      i64, anyimm0, ToZext64, PS_loadrubabs>;
+  def: Loadam_pat<extloadi8,      i64, anyimm0, ToAext64, PS_loadrubabs>;
   def: Loadam_pat<sextloadi8,     i64, anyimm0, ToSext64, PS_loadrbabs>;
   def: Loadam_pat<zextloadi8,     i64, anyimm0, ToZext64, PS_loadrubabs>;
-  def: Loadam_pat<extloadi16,     i64, anyimm1, ToZext64, PS_loadruhabs>;
+  def: Loadam_pat<extloadi16,     i64, anyimm1, ToAext64, PS_loadruhabs>;
   def: Loadam_pat<sextloadi16,    i64, anyimm1, ToSext64, PS_loadrhabs>;
   def: Loadam_pat<zextloadi16,    i64, anyimm1, ToZext64, PS_loadruhabs>;
-  def: Loadam_pat<extloadi32,     i64, anyimm2, ToZext64, PS_loadriabs>;
+  def: Loadam_pat<extloadi32,     i64, anyimm2, ToAext64, PS_loadriabs>;
   def: Loadam_pat<sextloadi32,    i64, anyimm2, ToSext64, PS_loadriabs>;
   def: Loadam_pat<zextloadi32,    i64, anyimm2, ToZext64, PS_loadriabs>;
 
@@ -2044,13 +2139,13 @@ let AddedComplexity  = 100 in {
 }
 
 let AddedComplexity  = 70 in {
-  def: Loadam_pat<extloadi8,      i64, addrgp,  ToZext64, L2_loadrubgp>;
+  def: Loadam_pat<extloadi8,      i64, addrgp,  ToAext64, L2_loadrubgp>;
   def: Loadam_pat<sextloadi8,     i64, addrgp,  ToSext64, L2_loadrbgp>;
   def: Loadam_pat<zextloadi8,     i64, addrgp,  ToZext64, L2_loadrubgp>;
-  def: Loadam_pat<extloadi16,     i64, addrgp,  ToZext64, L2_loadruhgp>;
+  def: Loadam_pat<extloadi16,     i64, addrgp,  ToAext64, L2_loadruhgp>;
   def: Loadam_pat<sextloadi16,    i64, addrgp,  ToSext64, L2_loadrhgp>;
   def: Loadam_pat<zextloadi16,    i64, addrgp,  ToZext64, L2_loadruhgp>;
-  def: Loadam_pat<extloadi32,     i64, addrgp,  ToZext64, L2_loadrigp>;
+  def: Loadam_pat<extloadi32,     i64, addrgp,  ToAext64, L2_loadrigp>;
   def: Loadam_pat<sextloadi32,    i64, addrgp,  ToSext64, L2_loadrigp>;
   def: Loadam_pat<zextloadi32,    i64, addrgp,  ToZext64, L2_loadrigp>;
 
@@ -2306,16 +2401,26 @@ let AddedComplexity = 140 in {
 
 // GP-relative address
 let AddedComplexity = 120 in {
-  def: Storea_pat<truncstorei8,             I32, addrgp, S2_storerbgp>;
-  def: Storea_pat<truncstorei16,            I32, addrgp, S2_storerhgp>;
-  def: Storea_pat<store,                    I32, addrgp, S2_storerigp>;
-  def: Storea_pat<store,                    I64, addrgp, S2_storerdgp>;
-  def: Storea_pat<store,                    F32, addrgp, S2_storerigp>;
-  def: Storea_pat<store,                    F64, addrgp, S2_storerdgp>;
-  def: Storea_pat<AtomSt<atomic_store_8>,   I32, addrgp, S2_storerbgp>;
-  def: Storea_pat<AtomSt<atomic_store_16>,  I32, addrgp, S2_storerhgp>;
-  def: Storea_pat<AtomSt<atomic_store_32>,  I32, addrgp, S2_storerigp>;
-  def: Storea_pat<AtomSt<atomic_store_64>,  I64, addrgp, S2_storerdgp>;
+  def: Storea_pat<truncstorei8,               I32, addrgp, S2_storerbgp>;
+  def: Storea_pat<truncstorei16,              I32, addrgp, S2_storerhgp>;
+  def: Storea_pat<store,                      I32, addrgp, S2_storerigp>;
+  def: Storea_pat<store,                     V4I8, addrgp, S2_storerigp>;
+  def: Storea_pat<store,                    V2I16, addrgp, S2_storerigp>;
+  def: Storea_pat<store,                      I64, addrgp, S2_storerdgp>;
+  def: Storea_pat<store,                     V8I8, addrgp, S2_storerdgp>;
+  def: Storea_pat<store,                    V4I16, addrgp, S2_storerdgp>;
+  def: Storea_pat<store,                    V2I32, addrgp, S2_storerdgp>;
+  def: Storea_pat<store,                      F32, addrgp, S2_storerigp>;
+  def: Storea_pat<store,                      F64, addrgp, S2_storerdgp>;
+  def: Storea_pat<AtomSt<atomic_store_8>,     I32, addrgp, S2_storerbgp>;
+  def: Storea_pat<AtomSt<atomic_store_16>,    I32, addrgp, S2_storerhgp>;
+  def: Storea_pat<AtomSt<atomic_store_32>,    I32, addrgp, S2_storerigp>;
+  def: Storea_pat<AtomSt<atomic_store_32>,   V4I8, addrgp, S2_storerigp>;
+  def: Storea_pat<AtomSt<atomic_store_32>,  V2I16, addrgp, S2_storerigp>;
+  def: Storea_pat<AtomSt<atomic_store_64>,    I64, addrgp, S2_storerdgp>;
+  def: Storea_pat<AtomSt<atomic_store_64>,   V8I8, addrgp, S2_storerdgp>;
+  def: Storea_pat<AtomSt<atomic_store_64>,  V4I16, addrgp, S2_storerdgp>;
+  def: Storea_pat<AtomSt<atomic_store_64>,  V2I32, addrgp, S2_storerdgp>;
 
   def: Stoream_pat<truncstorei8,  I64, addrgp, LoReg,    S2_storerbgp>;
   def: Stoream_pat<truncstorei16, I64, addrgp, LoReg,    S2_storerhgp>;
@@ -2325,16 +2430,26 @@ let AddedComplexity = 120 in {
 
 // Absolute address
 let AddedComplexity = 110 in {
-  def: Storea_pat<truncstorei8,             I32, anyimm0, PS_storerbabs>;
-  def: Storea_pat<truncstorei16,            I32, anyimm1, PS_storerhabs>;
-  def: Storea_pat<store,                    I32, anyimm2, PS_storeriabs>;
-  def: Storea_pat<store,                    I64, anyimm3, PS_storerdabs>;
-  def: Storea_pat<store,                    F32, anyimm2, PS_storeriabs>;
-  def: Storea_pat<store,                    F64, anyimm3, PS_storerdabs>;
-  def: Storea_pat<AtomSt<atomic_store_8>,   I32, anyimm0, PS_storerbabs>;
-  def: Storea_pat<AtomSt<atomic_store_16>,  I32, anyimm1, PS_storerhabs>;
-  def: Storea_pat<AtomSt<atomic_store_32>,  I32, anyimm2, PS_storeriabs>;
-  def: Storea_pat<AtomSt<atomic_store_64>,  I64, anyimm3, PS_storerdabs>;
+  def: Storea_pat<truncstorei8,               I32, anyimm0, PS_storerbabs>;
+  def: Storea_pat<truncstorei16,              I32, anyimm1, PS_storerhabs>;
+  def: Storea_pat<store,                      I32, anyimm2, PS_storeriabs>;
+  def: Storea_pat<store,                     V4I8, anyimm2, PS_storeriabs>;
+  def: Storea_pat<store,                    V2I16, anyimm2, PS_storeriabs>;
+  def: Storea_pat<store,                      I64, anyimm3, PS_storerdabs>;
+  def: Storea_pat<store,                     V8I8, anyimm3, PS_storerdabs>;
+  def: Storea_pat<store,                    V4I16, anyimm3, PS_storerdabs>;
+  def: Storea_pat<store,                    V2I32, anyimm3, PS_storerdabs>;
+  def: Storea_pat<store,                      F32, anyimm2, PS_storeriabs>;
+  def: Storea_pat<store,                      F64, anyimm3, PS_storerdabs>;
+  def: Storea_pat<AtomSt<atomic_store_8>,     I32, anyimm0, PS_storerbabs>;
+  def: Storea_pat<AtomSt<atomic_store_16>,    I32, anyimm1, PS_storerhabs>;
+  def: Storea_pat<AtomSt<atomic_store_32>,    I32, anyimm2, PS_storeriabs>;
+  def: Storea_pat<AtomSt<atomic_store_32>,   V4I8, anyimm2, PS_storeriabs>;
+  def: Storea_pat<AtomSt<atomic_store_32>,  V2I16, anyimm2, PS_storeriabs>;
+  def: Storea_pat<AtomSt<atomic_store_64>,    I64, anyimm3, PS_storerdabs>;
+  def: Storea_pat<AtomSt<atomic_store_64>,   V8I8, anyimm3, PS_storerdabs>;
+  def: Storea_pat<AtomSt<atomic_store_64>,  V4I16, anyimm3, PS_storerdabs>;
+  def: Storea_pat<AtomSt<atomic_store_64>,  V2I32, anyimm3, PS_storerdabs>;
 
   def: Stoream_pat<truncstorei8,  I64, anyimm0, LoReg,    PS_storerbabs>;
   def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg,    PS_storerhabs>;
@@ -2344,12 +2459,17 @@ let AddedComplexity = 110 in {
 
 // Reg<<S + Imm
 let AddedComplexity = 100 in {
-  def: Storexu_shl_pat<truncstorei8,  I32, anyimm0, S4_storerb_ur>;
-  def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
-  def: Storexu_shl_pat<store,         I32, anyimm2, S4_storeri_ur>;
-  def: Storexu_shl_pat<store,         I64, anyimm3, S4_storerd_ur>;
-  def: Storexu_shl_pat<store,         F32, anyimm2, S4_storeri_ur>;
-  def: Storexu_shl_pat<store,         F64, anyimm3, S4_storerd_ur>;
+  def: Storexu_shl_pat<truncstorei8,    I32, anyimm0, S4_storerb_ur>;
+  def: Storexu_shl_pat<truncstorei16,   I32, anyimm1, S4_storerh_ur>;
+  def: Storexu_shl_pat<store,           I32, anyimm2, S4_storeri_ur>;
+  def: Storexu_shl_pat<store,          V4I8, anyimm2, S4_storeri_ur>;
+  def: Storexu_shl_pat<store,         V2I16, anyimm2, S4_storeri_ur>;
+  def: Storexu_shl_pat<store,           I64, anyimm3, S4_storerd_ur>;
+  def: Storexu_shl_pat<store,          V8I8, anyimm3, S4_storerd_ur>;
+  def: Storexu_shl_pat<store,         V4I16, anyimm3, S4_storerd_ur>;
+  def: Storexu_shl_pat<store,         V2I32, anyimm3, S4_storerd_ur>;
+  def: Storexu_shl_pat<store,           F32, anyimm2, S4_storeri_ur>;
+  def: Storexu_shl_pat<store,           F64, anyimm3, S4_storerd_ur>;
 
   def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)),
            (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>;
@@ -2357,12 +2477,17 @@ let AddedComplexity = 100 in {
 
 // Reg<<S + Reg
 let AddedComplexity = 90 in {
-  def: Storexr_shl_pat<truncstorei8,  I32, S4_storerb_rr>;
-  def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
-  def: Storexr_shl_pat<store,         I32, S4_storeri_rr>;
-  def: Storexr_shl_pat<store,         I64, S4_storerd_rr>;
-  def: Storexr_shl_pat<store,         F32, S4_storeri_rr>;
-  def: Storexr_shl_pat<store,         F64, S4_storerd_rr>;
+  def: Storexr_shl_pat<truncstorei8,    I32, S4_storerb_rr>;
+  def: Storexr_shl_pat<truncstorei16,   I32, S4_storerh_rr>;
+  def: Storexr_shl_pat<store,           I32, S4_storeri_rr>;
+  def: Storexr_shl_pat<store,          V4I8, S4_storeri_rr>;
+  def: Storexr_shl_pat<store,         V2I16, S4_storeri_rr>;
+  def: Storexr_shl_pat<store,           I64, S4_storerd_rr>;
+  def: Storexr_shl_pat<store,          V8I8, S4_storerd_rr>;
+  def: Storexr_shl_pat<store,         V4I16, S4_storerd_rr>;
+  def: Storexr_shl_pat<store,         V2I32, S4_storerd_rr>;
+  def: Storexr_shl_pat<store,           F32, S4_storeri_rr>;
+  def: Storexr_shl_pat<store,           F64, S4_storerd_rr>;
 
   def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
            (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
@@ -2414,20 +2539,30 @@ let AddedComplexity = 70 in {
 
 // Fi+Imm, Fi, store-register
 let AddedComplexity = 60 in {
-  defm: Storexi_fi_add_pat<truncstorei8,  I32, anyimm, S2_storerb_io>;
-  defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
-  defm: Storexi_fi_add_pat<store,         I32, anyimm, S2_storeri_io>;
-  defm: Storexi_fi_add_pat<store,         I64, anyimm, S2_storerd_io>;
-  defm: Storexi_fi_add_pat<store,         F32, anyimm, S2_storeri_io>;
-  defm: Storexi_fi_add_pat<store,         F64, anyimm, S2_storerd_io>;
+  defm: Storexi_fi_add_pat<truncstorei8,    I32, anyimm, S2_storerb_io>;
+  defm: Storexi_fi_add_pat<truncstorei16,   I32, anyimm, S2_storerh_io>;
+  defm: Storexi_fi_add_pat<store,           I32, anyimm, S2_storeri_io>;
+  defm: Storexi_fi_add_pat<store,          V4I8, anyimm, S2_storeri_io>;
+  defm: Storexi_fi_add_pat<store,         V2I16, anyimm, S2_storeri_io>;
+  defm: Storexi_fi_add_pat<store,           I64, anyimm, S2_storerd_io>;
+  defm: Storexi_fi_add_pat<store,          V8I8, anyimm, S2_storerd_io>;
+  defm: Storexi_fi_add_pat<store,         V4I16, anyimm, S2_storerd_io>;
+  defm: Storexi_fi_add_pat<store,         V2I32, anyimm, S2_storerd_io>;
+  defm: Storexi_fi_add_pat<store,           F32, anyimm, S2_storeri_io>;
+  defm: Storexi_fi_add_pat<store,           F64, anyimm, S2_storerd_io>;
   defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>;
 
-  def: Storexi_fi_pat<truncstorei8,   I32, S2_storerb_io>;
-  def: Storexi_fi_pat<truncstorei16,  I32, S2_storerh_io>;
-  def: Storexi_fi_pat<store,          I32, S2_storeri_io>;
-  def: Storexi_fi_pat<store,          I64, S2_storerd_io>;
-  def: Storexi_fi_pat<store,          F32, S2_storeri_io>;
-  def: Storexi_fi_pat<store,          F64, S2_storerd_io>;
+  def: Storexi_fi_pat<truncstorei8,     I32, S2_storerb_io>;
+  def: Storexi_fi_pat<truncstorei16,    I32, S2_storerh_io>;
+  def: Storexi_fi_pat<store,            I32, S2_storeri_io>;
+  def: Storexi_fi_pat<store,           V4I8, S2_storeri_io>;
+  def: Storexi_fi_pat<store,          V2I16, S2_storeri_io>;
+  def: Storexi_fi_pat<store,            I64, S2_storerd_io>;
+  def: Storexi_fi_pat<store,           V8I8, S2_storerd_io>;
+  def: Storexi_fi_pat<store,          V4I16, S2_storerd_io>;
+  def: Storexi_fi_pat<store,          V2I32, S2_storerd_io>;
+  def: Storexi_fi_pat<store,            F32, S2_storeri_io>;
+  def: Storexi_fi_pat<store,            F64, S2_storerd_io>;
   def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>;
 }
 
@@ -2452,32 +2587,47 @@ let AddedComplexity = 50 in {
 
 // Reg+Imm, store-register
 let AddedComplexity = 40 in {
-  defm: Storexi_pat<truncstorei8,   I32, anyimm0, S2_storerb_io>;
-  defm: Storexi_pat<truncstorei16,  I32, anyimm1, S2_storerh_io>;
-  defm: Storexi_pat<store,          I32, anyimm2, S2_storeri_io>;
-  defm: Storexi_pat<store,          I64, anyimm3, S2_storerd_io>;
-  defm: Storexi_pat<store,          F32, anyimm2, S2_storeri_io>;
-  defm: Storexi_pat<store,          F64, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<truncstorei8,     I32, anyimm0, S2_storerb_io>;
+  defm: Storexi_pat<truncstorei16,    I32, anyimm1, S2_storerh_io>;
+  defm: Storexi_pat<store,            I32, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<store,           V4I8, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<store,          V2I16, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<store,            I64, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<store,           V8I8, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<store,          V4I16, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<store,          V2I32, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<store,            F32, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<store,            F64, anyimm3, S2_storerd_io>;
 
   defm: Storexim_pat<truncstorei8,  I64, anyimm0, LoReg,   S2_storerb_io>;
   defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg,   S2_storerh_io>;
   defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg,   S2_storeri_io>;
   defm: Storexim_pat<store,         I1,  anyimm0, I1toI32, S2_storerb_io>;
 
-  defm: Storexi_pat<AtomSt<atomic_store_8>,  I32, anyimm0, S2_storerb_io>;
-  defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
-  defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
-  defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_8>,     I32, anyimm0, S2_storerb_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_16>,    I32, anyimm1, S2_storerh_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_32>,    I32, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_32>,   V4I8, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_32>,  V2I16, anyimm2, S2_storeri_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_64>,    I64, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_64>,   V8I8, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_64>,  V4I16, anyimm3, S2_storerd_io>;
+  defm: Storexi_pat<AtomSt<atomic_store_64>,  V2I32, anyimm3, S2_storerd_io>;
 }
 
 // Reg+Reg
 let AddedComplexity = 30 in {
-  def: Storexr_add_pat<truncstorei8,  I32, S4_storerb_rr>;
-  def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
-  def: Storexr_add_pat<store,         I32, S4_storeri_rr>;
-  def: Storexr_add_pat<store,         I64, S4_storerd_rr>;
-  def: Storexr_add_pat<store,         F32, S4_storeri_rr>;
-  def: Storexr_add_pat<store,         F64, S4_storerd_rr>;
+  def: Storexr_add_pat<truncstorei8,    I32, S4_storerb_rr>;
+  def: Storexr_add_pat<truncstorei16,   I32, S4_storerh_rr>;
+  def: Storexr_add_pat<store,           I32, S4_storeri_rr>;
+  def: Storexr_add_pat<store,          V4I8, S4_storeri_rr>;
+  def: Storexr_add_pat<store,         V2I16, S4_storeri_rr>;
+  def: Storexr_add_pat<store,           I64, S4_storerd_rr>;
+  def: Storexr_add_pat<store,          V8I8, S4_storerd_rr>;
+  def: Storexr_add_pat<store,         V4I16, S4_storerd_rr>;
+  def: Storexr_add_pat<store,         V2I32, S4_storerd_rr>;
+  def: Storexr_add_pat<store,           F32, S4_storeri_rr>;
+  def: Storexr_add_pat<store,           F64, S4_storerd_rr>;
 
   def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)),
            (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>;
@@ -2496,22 +2646,32 @@ let AddedComplexity = 20 in {
 
 // Reg, store-register
 let AddedComplexity = 10 in {
-  def: Storexi_base_pat<truncstorei8,   I32, S2_storerb_io>;
-  def: Storexi_base_pat<truncstorei16,  I32, S2_storerh_io>;
-  def: Storexi_base_pat<store,          I32, S2_storeri_io>;
-  def: Storexi_base_pat<store,          I64, S2_storerd_io>;
-  def: Storexi_base_pat<store,          F32, S2_storeri_io>;
-  def: Storexi_base_pat<store,          F64, S2_storerd_io>;
+  def: Storexi_base_pat<truncstorei8,     I32, S2_storerb_io>;
+  def: Storexi_base_pat<truncstorei16,    I32, S2_storerh_io>;
+  def: Storexi_base_pat<store,            I32, S2_storeri_io>;
+  def: Storexi_base_pat<store,           V4I8, S2_storeri_io>;
+  def: Storexi_base_pat<store,          V2I16, S2_storeri_io>;
+  def: Storexi_base_pat<store,            I64, S2_storerd_io>;
+  def: Storexi_base_pat<store,           V8I8, S2_storerd_io>;
+  def: Storexi_base_pat<store,          V4I16, S2_storerd_io>;
+  def: Storexi_base_pat<store,          V2I32, S2_storerd_io>;
+  def: Storexi_base_pat<store,            F32, S2_storeri_io>;
+  def: Storexi_base_pat<store,            F64, S2_storerd_io>;
 
   def: Storexim_base_pat<truncstorei8,  I64, LoReg,   S2_storerb_io>;
   def: Storexim_base_pat<truncstorei16, I64, LoReg,   S2_storerh_io>;
   def: Storexim_base_pat<truncstorei32, I64, LoReg,   S2_storeri_io>;
   def: Storexim_base_pat<store,         I1,  I1toI32, S2_storerb_io>;
 
-  def: Storexi_base_pat<AtomSt<atomic_store_8>,   I32, S2_storerb_io>;
-  def: Storexi_base_pat<AtomSt<atomic_store_16>,  I32, S2_storerh_io>;
-  def: Storexi_base_pat<AtomSt<atomic_store_32>,  I32, S2_storeri_io>;
-  def: Storexi_base_pat<AtomSt<atomic_store_64>,  I64, S2_storerd_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_8>,     I32, S2_storerb_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_16>,    I32, S2_storerh_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_32>,    I32, S2_storeri_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_32>,   V4I8, S2_storeri_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_32>,  V2I16, S2_storeri_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_64>,    I64, S2_storerd_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_64>,   V8I8, S2_storerd_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_64>,  V4I16, S2_storerd_io>;
+  def: Storexi_base_pat<AtomSt<atomic_store_64>,  V2I32, S2_storerd_io>;
 }
 
 
@@ -2922,6 +3082,8 @@ def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
 def: Pat<(HexagonBARRIER), (Y2_barrier)>;
 
+def: Pat<(trap), (J2_trap0 (i32 0))>;
+
 // Read cycle counter.
 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
 def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
index fd7466349ecd..b9748c7e189c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -208,6 +208,7 @@ class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
     let isPredicable = 0;  // !if(isPred, 0, 1);
     let isPredicated = 0;  // isPred;
     let isPredicatedFalse = isFalse;
+    let Itinerary = itin;
 }
 
 def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
@@ -525,11 +526,11 @@ let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
     addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
   def NAME#_pci : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
        (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Cs),
-       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_4403ca65>;
+       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e93a3d71>;
 
   def NAME#_pcr : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
        (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Cs),
-       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_2fc0c436>;
+       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_44d3da28>;
 }
 }
 
@@ -546,11 +547,11 @@ let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
     addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
   def NAME#_pci : STInst<(outs IntRegs:$Rx32),
        (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
-       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_9fdb5406>;
+       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e86aa961>;
 
   def NAME#_pcr : STInst<(outs IntRegs:$Rx32),
        (ins IntRegs:$Rx32in, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
-       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_f86c328a>;
+       ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_da97ee82>;
 }
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2e11f875c0f9..9b8f4e07376f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -118,18 +118,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
   bool HasEHReturn = MF->getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
 
-  switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
-  case Hexagon::ArchEnum::V4:
-  case Hexagon::ArchEnum::V5:
-  case Hexagon::ArchEnum::V55:
-  case Hexagon::ArchEnum::V60:
-  case Hexagon::ArchEnum::V62:
-  case Hexagon::ArchEnum::V65:
-    return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
-  }
-
-  llvm_unreachable("Callee saved registers requested for unknown architecture "
-                   "version");
+  return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
 }
 
 
@@ -323,6 +312,7 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
 
   static const unsigned ISub[] = { Hexagon::isub_lo, Hexagon::isub_hi };
   static const unsigned VSub[] = { Hexagon::vsub_lo, Hexagon::vsub_hi };
+  static const unsigned WSub[] = { Hexagon::wsub_lo, Hexagon::wsub_hi };
 
   switch (RC.getID()) {
     case Hexagon::CtrRegs64RegClassID:
@@ -330,6 +320,8 @@ unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
       return ISub[GenIdx];
     case Hexagon::HvxWRRegClassID:
       return VSub[GenIdx];
+    case Hexagon::HvxVQRRegClassID:
+      return WSub[GenIdx];
   }
 
   if (const TargetRegisterClass *SuperRC = *RC.getSuperClasses())
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 497dc45236b1..3e7b63a462f0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -39,8 +39,6 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
         unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 1fe1ef4ac572..da90911e2c05 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -82,13 +82,14 @@ let Namespace = "Hexagon" in {
   def isub_hi  : SubRegIndex<32, 32>;
   def vsub_lo  : SubRegIndex<512>;
   def vsub_hi  : SubRegIndex<512, 512>;
+  def wsub_lo  : SubRegIndex<1024>;
+  def wsub_hi  : SubRegIndex<1024, 1024>;
   def subreg_overflow : SubRegIndex<1, 0>;
 
   // Integer registers.
   foreach i = 0-28 in {
     def R#i  : Ri<i, "r"#i>,  DwarfRegNum<[i]>;
   }
-
   def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
   def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
   def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
@@ -206,6 +207,18 @@ let Namespace = "Hexagon" in {
   def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
   }
 
+  // Aliases of the V* registers used to hold quad vec values.
+  let SubRegIndices = [wsub_lo, wsub_hi], CoveredBySubRegs = 1 in {
+  def VQ0  : Rd< 0, "v3:0",   [W0,  W1]>,  DwarfRegNum<[252]>;
+  def VQ1  : Rd< 4, "v7:4",   [W2,  W3]>,  DwarfRegNum<[253]>;
+  def VQ2  : Rd< 8, "v11:8",  [W4,  W5]>,  DwarfRegNum<[254]>;
+  def VQ3  : Rd<12, "v15:12", [W6,  W7]>,  DwarfRegNum<[255]>;
+  def VQ4  : Rd<16, "v19:16", [W8,  W9]>,  DwarfRegNum<[256]>;
+  def VQ5  : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>;
+  def VQ6  : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>;
+  def VQ7  : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>;
+  }
+
   // Vector Predicate registers.
   def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
   def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
@@ -295,29 +308,6 @@ def VecQ32:  ValueTypeByHwMode<[Hvx64,  Hvx128,  DefaultMode],
 
 // HVX register classes
 
-// Register classes.
-//
-// FIXME: the register order should be defined in terms of the preferred
-// allocation order...
-//
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
-  (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
-       R10, R11, R29, R30, R31)>;
-
-// Registers are listed in reverse order for allocation preference reasons.
-def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
-  (add R23, R22, R21, R20, R19, R18, R17, R16,
-       R7, R6, R5, R4, R3, R2, R1, R0)>;
-
-def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
-  (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
-
-def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
-  (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
-
-def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
-  (add D11, D10, D9, D8, D3, D2, D1, D0)>;
-
 def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
   (add (sequence "V%u", 0, 31), VTMP)> {
   let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
@@ -336,6 +326,32 @@ def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
     [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
 }
 
+def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,
+  (add (sequence "VQ%u", 0, 7))> {
+  let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
+    [RegInfo<2048,2048,2048>, RegInfo<4096,4096,4096>, RegInfo<2048,2048,2048>]>;
+}
+
+// Core register classes
+
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+  (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
+       R10, R11, R29, R30, R31)>;
+
+// Registers are listed in reverse order for allocation preference reasons.
+def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
+  (add R23, R22, R21, R20, R19, R18, R17, R16,
+       R7, R6, R5, R4, R3, R2, R1, R0)>;
+
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+  (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
+  (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
+  (add D11, D10, D9, D8, D3, D2, D1, D0)>;
+
 let Size = 32 in
 def PredRegs : RegisterClass<"Hexagon",
   [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, (add P0, P1, P2, P3)>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
index a1dfb66017a5..1024198e9b3f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -27,6 +27,7 @@ def CVI_SHIFT  : FuncUnit;
 def CVI_MPY0   : FuncUnit;
 def CVI_MPY1   : FuncUnit;
 def CVI_LD     : FuncUnit;
+def CVI_ZW     : FuncUnit; // Z register write port
 
 // Combined functional units.
 def CVI_XLSHF  : FuncUnit;
@@ -57,10 +58,10 @@ include "HexagonDepIICScalar.td"
 include "HexagonDepIICHVX.td"
 
 //===----------------------------------------------------------------------===//
-// V4 Machine Info +
+// V5 Machine Info +
 //===----------------------------------------------------------------------===//
 
-include "HexagonScheduleV4.td"
+include "HexagonScheduleV5.td"
 
 // V55 Machine Info +
 include "HexagonScheduleV55.td"
@@ -84,3 +85,9 @@ include "HexagonScheduleV62.td"
 //===----------------------------------------------------------------------===//
 
 include "HexagonScheduleV65.td"
+
+//===----------------------------------------------------------------------===//
+// V66 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV66.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
index 69b704a805b8..9a893f6dde02 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
@@ -1,4 +1,4 @@
-//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,8 +10,8 @@
 def LD_tc_ld_SLOT01 : InstrItinClass;
 def ST_tc_st_SLOT01 : InstrItinClass;
 
-class HexagonV4PseudoItin {
-  list<InstrItinData> V4PseudoItin_list = [
+class HexagonV5PseudoItin {
+  list<InstrItinData> V5PseudoItin_list = [
     InstrItinData<PSEUDO,     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
     InstrItinData<PSEUDOM,    [InstrStage<1, [SLOT2, SLOT3], 0>,
                                InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -20,27 +20,27 @@ class HexagonV4PseudoItin {
   ];
 }
 
-def HexagonV4ItinList : DepScalarItinV4, HexagonV4PseudoItin {
-  list<InstrItinData> V4Itin_list = [
+def HexagonV5ItinList : DepScalarItinV5, HexagonV5PseudoItin {
+  list<InstrItinData> V5Itin_list = [
     InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>
   ];
   list<InstrItinData> ItinList =
-    !listconcat(V4Itin_list, DepScalarItinV4_list, V4PseudoItin_list);
+    !listconcat(V5Itin_list, DepScalarItinV5_list, V5PseudoItin_list);
 }
 
-def HexagonItinerariesV4 :
+def HexagonItinerariesV5 :
       ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP],
-                           [Hex_FWD], HexagonV4ItinList.ItinList>;
+                           [Hex_FWD], HexagonV5ItinList.ItinList>;
 
-def HexagonModelV4 : SchedMachineModel {
+def HexagonModelV5 : SchedMachineModel {
   // Max issue per cycle == bundle width.
   let IssueWidth = 4;
-  let Itineraries = HexagonItinerariesV4;
+  let Itineraries = HexagonItinerariesV5;
   let LoadLatency = 1;
   let CompleteModel = 0;
 }
 
 //===----------------------------------------------------------------------===//
-// Hexagon V4 Resource Definitions -
+// Hexagon V5 Resource Definitions -
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
index a2544c92a72c..861a8d2b0339 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -65,7 +65,7 @@ def HexagonItinerariesV60 :
       ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
                             CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
                             CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
-                            CVI_ALL_NOMEM],
+                            CVI_ALL_NOMEM, CVI_ZW],
                             [Hex_FWD, HVX_FWD], HexagonV60ItinList.ItinList>;
 
 def HexagonModelV60 : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
index a0a8595f185f..1c274191277c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -21,7 +21,7 @@ def HexagonItinerariesV62 :
       ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
                             CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
                             CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
-                            CVI_ALL_NOMEM],
+                            CVI_ALL_NOMEM, CVI_ZW],
                            [Hex_FWD, HVX_FWD], HexagonV62ItinList.ItinList>;
 
 def HexagonModelV62 : SchedMachineModel {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
index e3b1313923f5..46a79d521795 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV65.td
@@ -23,7 +23,7 @@ def HexagonItinerariesV65 :
       ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
                             CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
                             CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
-                            CVI_ALL_NOMEM],
+                            CVI_ALL_NOMEM, CVI_ZW],
                             [Hex_FWD, HVX_FWD],
                             HexagonV65ItinList.ItinList>;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td
new file mode 100644
index 000000000000..38e3d21d3701
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV66.td
@@ -0,0 +1,41 @@
+//=-HexagonScheduleV66.td - HexagonV66 Scheduling Definitions *- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin and HVXItin contain some old itineraries
+// still used by a handful of instructions. Hopefully, we will be able
+// to get rid of them soon.
+
+def HexagonV66ItinList : DepScalarItinV66, ScalarItin,
+                         DepHVXItinV66, HVXItin, PseudoItin {
+  list<InstrItinData> ItinList =
+    !listconcat(DepScalarItinV66_list, ScalarItin_list,
+                DepHVXItinV66_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV66 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+                            CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+                            CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+                            CVI_ALL_NOMEM, CVI_ZW],
+                            [Hex_FWD, HVX_FWD],
+                            HexagonV66ItinList.ItinList>;
+
+def HexagonModelV66 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV66;
+  let LoadLatency = 1;
+  let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V66 Resource Definitions -
+//===----------------------------------------------------------------------===//
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index c41f0d3c085c..55de25120943 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -63,7 +63,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
   auto &HST = Fn.getSubtarget<HexagonSubtarget>();
   auto &HTM = static_cast<const HexagonTargetMachine&>(Fn.getTarget());
   auto &TLOF = *HTM.getObjFileLowering();
-  if (HST.useSmallData() && TLOF.isSmallDataEnabled())
+  if (HST.useSmallData() && TLOF.isSmallDataEnabled(HTM))
     return false;
 
   const TargetInstrInfo *TII = HST.getInstrInfo();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 991af047387e..61c2121163b8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -578,7 +578,7 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
   };
   for (auto &G : SGs) {
     assert(G.size() > 1 && "Store group with fewer than 2 elements");
-    llvm::sort(G.begin(), G.end(), Less);
+    llvm::sort(G, Less);
 
     Changed |= processStoreGroup(G);
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 0686d6eb6118..9c77135c2f2f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -93,12 +93,12 @@ HexagonSubtarget &
 HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
       {"generic", Hexagon::ArchEnum::V60},
-      {"hexagonv4", Hexagon::ArchEnum::V4},
       {"hexagonv5", Hexagon::ArchEnum::V5},
       {"hexagonv55", Hexagon::ArchEnum::V55},
       {"hexagonv60", Hexagon::ArchEnum::V60},
       {"hexagonv62", Hexagon::ArchEnum::V62},
       {"hexagonv65", Hexagon::ArchEnum::V65},
+      {"hexagonv66", Hexagon::ArchEnum::V66},
   };
 
   auto FoundIt = CpuTable.find(CPUString);
@@ -276,11 +276,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
     if (!L0.mayLoad() || L0.mayStore() ||
         HII.getAddrMode(L0) != HexagonII::BaseImmOffset)
       continue;
-    int Offset0;
+    int64_t Offset0;
     unsigned Size0;
-    unsigned Base0 = HII.getBaseAndOffset(L0, Offset0, Size0);
+    MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
     // Is the access size is longer than the L1 cache line, skip the check.
-    if (Base0 == 0 || Size0 >= 32)
+    if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32)
       continue;
     // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
     for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
@@ -289,10 +289,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
       if (!L1.mayLoad() || L1.mayStore() ||
           HII.getAddrMode(L1) != HexagonII::BaseImmOffset)
         continue;
-      int Offset1;
+      int64_t Offset1;
       unsigned Size1;
-      unsigned Base1 = HII.getBaseAndOffset(L1, Offset1, Size1);
-      if (Base1 == 0 || Size1 >= 32 || Base0 != Base1)
+      MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
+      if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 ||
+          BaseOp0->getReg() != BaseOp1->getReg())
         continue;
       // Check bits 3 and 4 of the offset: if they differ, a bank conflict
       // is unlikely.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index dc8d173a5057..3a5acb53682c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -52,14 +52,16 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
   bool UseNewValueJumps = false;
   bool UseNewValueStores = false;
   bool UseSmallData = false;
+  bool UseZRegOps = false;
 
   bool HasMemNoShuf = false;
   bool EnableDuplex = false;
   bool ReservedR19 = false;
+  bool NoreturnStackElim = false;
 
 public:
   Hexagon::ArchEnum HexagonArchVersion;
-  Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4;
+  Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::NoArch;
   CodeGenOpt::Level OptLevel;
   /// True if the target should use Back-Skip-Back scheduling. This is the
   /// default for V60.
@@ -150,6 +152,12 @@ public:
   bool hasV65OpsOnly() const {
     return getHexagonArchVersion() == Hexagon::ArchEnum::V65;
   }
+  bool hasV66Ops() const {
+    return getHexagonArchVersion() >= Hexagon::ArchEnum::V66;
+  }
+  bool hasV66OpsOnly() const {
+    return getHexagonArchVersion() == Hexagon::ArchEnum::V66;
+  }
 
   bool useLongCalls() const { return UseLongCalls; }
   bool useMemops() const { return UseMemops; }
@@ -157,8 +165,11 @@ public:
   bool useNewValueJumps() const { return UseNewValueJumps; }
   bool useNewValueStores() const { return UseNewValueStores; }
   bool useSmallData() const { return UseSmallData; }
+  bool useZRegOps() const { return UseZRegOps; }
 
-  bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; }
+  bool useHVXOps() const {
+    return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
+  }
   bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
   bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
 
@@ -166,6 +177,8 @@ public:
   bool hasReservedR19() const { return ReservedR19; }
   bool usePredicatedCalls() const;
 
+  bool noreturnStackElim() const { return NoreturnStackElim; }
+
   bool useBSBScheduling() const { return UseBSBScheduling; }
   bool enableMachineScheduler() const override;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 2c75e9139ad7..ddfda7e27793 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -97,6 +97,10 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
 static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
   cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
 
+static cl::opt<bool> EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup",
+  cl::Hidden, cl::ZeroOrMore, cl::init(true),
+  cl::desc("Simplify the CFG after atomic expansion pass"));
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -149,7 +153,6 @@ namespace llvm {
   FunctionPass *createHexagonCopyToCombine();
   FunctionPass *createHexagonEarlyIfConversion();
   FunctionPass *createHexagonFixupHwLoops();
-  FunctionPass *createHexagonGatherPacketize();
   FunctionPass *createHexagonGenExtract();
   FunctionPass *createHexagonGenInsert();
   FunctionPass *createHexagonGenMux();
@@ -161,7 +164,7 @@ namespace llvm {
   FunctionPass *createHexagonNewValueJump();
   FunctionPass *createHexagonOptimizeSZextends();
   FunctionPass *createHexagonOptAddrMode();
-  FunctionPass *createHexagonPacketizer();
+  FunctionPass *createHexagonPacketizer(bool Minimal);
   FunctionPass *createHexagonPeephole();
   FunctionPass *createHexagonRDFOpt();
   FunctionPass *createHexagonSplitConst32AndConst64();
@@ -177,12 +180,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 extern "C" void LLVMInitializeHexagonTarget() {
   // Register the target.
   RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
@@ -219,7 +216,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
           "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
           "v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048",
           TT, CPU, FS, Options, getEffectiveRelocModel(RM),
-          getEffectiveCodeModel(CM), (HexagonNoOpt ? CodeGenOpt::None : OL)),
+          getEffectiveCodeModel(CM, CodeModel::Small),
+          (HexagonNoOpt ? CodeGenOpt::None : OL)),
       TLOF(make_unique<HexagonTargetObjectFile>()) {
   initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
   initAsmInfo();
@@ -311,7 +309,10 @@ void HexagonPassConfig::addIRPasses() {
   }
 
   addPass(createAtomicExpandPass());
+
   if (!NoOpt) {
+    if (EnableInitialCFGCleanup)
+      addPass(createCFGSimplificationPass(1, true, true, false, true));
     if (EnableLoopPrefetch)
       addPass(createLoopDataPrefetchPass());
     if (EnableCommGEP)
@@ -402,7 +403,6 @@ void HexagonPassConfig::addPreEmitPass() {
 
   addPass(createHexagonBranchRelaxation());
 
-  // Create Packets.
   if (!NoOpt) {
     if (!DisableHardwareLoops)
       addPass(createHexagonFixupHwLoops());
@@ -411,12 +411,8 @@ void HexagonPassConfig::addPreEmitPass() {
       addPass(createHexagonGenMux());
   }
 
-  // Create packets for 2 instructions that consitute a gather instruction.
-  // Do this regardless of the opt level.
-  addPass(createHexagonGatherPacketize(), false);
-
-  if (!NoOpt)
-    addPass(createHexagonPacketizer(), false);
+  // Packetization is mandatory: it handles gather/scatter at all opt levels.
+  addPass(createHexagonPacketizer(NoOpt), false);
 
   if (EnableVectorPrint)
     addPass(createHexagonVectorPrint(), false);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index e771f383dffa..2185bf8eebc6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -199,6 +199,10 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
 /// section.
 bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
       const TargetMachine &TM) const {
+  bool HaveSData = isSmallDataEnabled(TM);
+  if (!HaveSData)
+    LLVM_DEBUG(dbgs() << "Small-data allocation is disabled, but symbols "
+                         "may have explicit section assignments...\n");
   // Only global variables, not functions.
   LLVM_DEBUG(dbgs() << "Checking if value is in small-data, -G"
                     << SmallDataThreshold << ": \"" << GO->getName() << "\": ");
@@ -218,6 +222,12 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
     return IsSmall;
   }
 
+  // If sdata is disabled, stop the checks here.
+  if (!HaveSData) {
+    LLVM_DEBUG(dbgs() << "no, small-data allocation is disabled\n");
+    return false;
+  }
+
   if (GVar->isConstant()) {
     LLVM_DEBUG(dbgs() << "no, is a constant\n");
     return false;
@@ -263,8 +273,9 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
   return true;
 }
 
-bool HexagonTargetObjectFile::isSmallDataEnabled() const {
-  return SmallDataThreshold > 0;
+bool HexagonTargetObjectFile::isSmallDataEnabled(const TargetMachine &TM)
+    const {
+  return SmallDataThreshold > 0 && !TM.isPositionIndependent();
 }
 
 unsigned HexagonTargetObjectFile::getSmallDataSize() const {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
index eff44f097e03..18863630fde2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -29,7 +29,7 @@ namespace llvm {
     bool isGlobalInSmallSection(const GlobalObject *GO,
                                 const TargetMachine &TM) const;
 
-    bool isSmallDataEnabled() const;
+    bool isSmallDataEnabled(const TargetMachine &TM) const;
 
     unsigned getSmallDataSize() const;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index a496a17788d5..c942f645aa88 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -54,7 +54,7 @@ bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
     return false;
   if (ST.isHVXVectorType(VecVT.getSimpleVT()))
     return true;
-  auto Action = TLI.getPreferredVectorAction(VecVT);
+  auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
   return Action == TargetLoweringBase::TypeWidenVector;
 }
 
@@ -206,9 +206,13 @@ unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 
 unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
       Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-      unsigned Alignment, unsigned AddressSpace) {
-  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+      unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+      bool UseMaskForGaps) {
+  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
+    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace,
+                                             UseMaskForCond, UseMaskForGaps);
+  return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
 }
 
 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index a232f99fc407..5c6f85584ec2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -98,6 +98,9 @@ public:
   bool prefersVectorizedAddressing() {
     return false;
   }
+  bool enableInterleavedAccessVectorization() {
+    return true;
+  }
 
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
@@ -120,7 +123,8 @@ public:
             bool VariableMask, unsigned Alignment);
   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
             unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
-            unsigned AddressSpace);
+            unsigned AddressSpace, bool UseMaskForCond = false,
+            bool UseMaskForGaps = false);
   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
             const Instruction *I);
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 56ab69db9bd1..722699907ca0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -77,7 +77,7 @@ extern cl::opt<bool> ScheduleInlineAsm;
 
 namespace llvm {
 
-FunctionPass *createHexagonPacketizer();
+FunctionPass *createHexagonPacketizer(bool Minimal);
 void initializeHexagonPacketizerPass(PassRegistry&);
 
 } // end namespace llvm
@@ -88,7 +88,8 @@ namespace {
   public:
     static char ID;
 
-    HexagonPacketizer() : MachineFunctionPass(ID) {}
+    HexagonPacketizer(bool Min = false)
+      : MachineFunctionPass(ID), Minimal(Min) {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
@@ -112,6 +113,7 @@ namespace {
   private:
     const HexagonInstrInfo *HII;
     const HexagonRegisterInfo *HRI;
+    const bool Minimal;
   };
 
 } // end anonymous namespace
@@ -129,8 +131,9 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
 
 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
       MachineLoopInfo &MLI, AliasAnalysis *AA,
-      const MachineBranchProbabilityInfo *MBPI)
-    : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+      const MachineBranchProbabilityInfo *MBPI, bool Minimal)
+    : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
+      Minimal(Minimal) {
   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
 
@@ -200,9 +203,6 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
 
 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
-  if (DisablePacketizer || !HST.usePackets() || skipFunction(MF.getFunction()))
-    return false;
-
   HII = HST.getInstrInfo();
   HRI = HST.getRegisterInfo();
   auto &MLI = getAnalysis<MachineLoopInfo>();
@@ -213,7 +213,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
     HII->genAllInsnTimingClasses(MF);
 
   // Instantiate the packetizer.
-  HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
+  bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
+                 skipFunction(MF.getFunction());
+  HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
 
   // DFA state table should not be empty.
   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -226,7 +228,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
   // Here, Insn 1 will result in the dependence graph not emitting an output
   // dependence between Insn 0 and Insn 2. This can lead to incorrect
   // packetization
-  for (auto &MB : MF) {
+  for (MachineBasicBlock &MB : MF) {
     auto End = MB.end();
     auto MI = MB.begin();
     while (MI != End) {
@@ -766,7 +768,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
 
   // Make sure that for non-POST_INC stores:
   // 1. The only use of reg is DepReg and no other registers.
-  //    This handles V4 base+index registers.
+  //    This handles base+index registers.
   //    The following store can not be dot new.
   //    Eg.   r0 = add(r0, #3)
   //          memw(r1+r0<<#2) = r0
@@ -836,11 +838,7 @@ static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
   return false;
 }
 
-// Check to see if an instruction can be dot new
-// There are three kinds.
-// 1. dot new on predicate - V2/V3/V4
-// 2. dot new on stores NV/ST - V4
-// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+// Check to see if an instruction can be dot new.
 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
       const TargetRegisterClass* RC) {
@@ -1073,9 +1071,6 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
   if (MI.isInlineAsm() && !ScheduleInlineAsm)
     return true;
 
-  // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
-  // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
-  // They must not be grouped with other instructions in a packet.
   if (isSchedBarrier(MI))
     return true;
 
@@ -1110,6 +1105,10 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
     return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
            MJ.isCall() || MJ.isTerminator();
 
+  // New-value stores cannot coexist with any other stores.
+  if (HII.isNewValueStore(MI) && MJ.mayStore())
+    return true;
+
   switch (MI.getOpcode()) {
   case Hexagon::S2_storew_locked:
   case Hexagon::S4_stored_locked:
@@ -1283,8 +1282,8 @@ bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
   return false;
 }
 
-bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
-                                                    const MachineInstr &J) {
+bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
+                                                   const MachineInstr &J) {
   bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
   bool StoreI = I.mayStore(), StoreJ = J.mayStore();
   if ((SysI && StoreJ) || (SysJ && StoreI))
@@ -1337,10 +1336,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   if (Dependence)
     return false;
 
-  // V4 allows dual stores. It does not allow second store, if the first
-  // store is not in SLOT0. New value store, new value jump, dealloc_return
-  // and memop always take SLOT0. Arch spec 3.4.4.2.
-  Dependence = hasV4SpecificDependence(I, J);
+  // Dual-store does not allow second store, if the first store is not
+  // in SLOT0. New value store, new value jump, dealloc_return and memop
+  // always take SLOT0. Arch spec 3.4.4.2.
+  Dependence = hasDualStoreDependence(I, J);
   if (Dependence)
     return false;
 
@@ -1499,10 +1498,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     }
 
     // For Order dependences:
-    // 1. On V4 or later, volatile loads/stores can be packetized together,
-    //    unless other rules prevent is.
+    // 1. Volatile loads/stores can be packetized together, unless other
+    //    rules prevent is.
     // 2. Store followed by a load is not allowed.
-    // 3. Store followed by a store is only valid on V4 or later.
+    // 3. Store followed by a store is valid.
     // 4. Load followed by any memory operation is allowed.
     if (DepType == SDep::Order) {
       if (!PacketizeVolatiles) {
@@ -1549,7 +1548,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       continue;
     }
 
-    // For V4, special case ALLOCFRAME. Even though there is dependency
+    // Special case for ALLOCFRAME: even though there is dependency
     // between ALLOCFRAME and subsequent store, allow it to be packetized
     // in a same packet. This implies that the store is using the caller's
     // SP. Hence, offset needs to be updated accordingly.
@@ -1569,6 +1568,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
             if (GlueAllocframeStore)
               continue;
           }
+          break;
         default:
           break;
       }
@@ -1652,6 +1652,9 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
     return false;
   }
 
+  if (!Coexist)
+    return false;
+
   if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
     FoundSequentialDependence = false;
     Dependence = false;
@@ -1759,8 +1762,8 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
 }
 
 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
-                                      MachineBasicBlock::iterator MI) {
-  // Replace VLIWPacketizerList::endPacket(MBB, MI).
+                                      MachineBasicBlock::iterator EndMI) {
+  // Replace VLIWPacketizerList::endPacket(MBB, EndMI).
 
   bool memShufDisabled = getmemShufDisabled();
   if (memShufDisabled && !foundLSInPacket()) {
@@ -1769,25 +1772,32 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
   }
   memShufDisabled = getmemShufDisabled();
 
-  if (CurrentPacketMIs.size() > 1) {
-    MachineBasicBlock::instr_iterator FirstMI(CurrentPacketMIs.front());
-    MachineBasicBlock::instr_iterator LastMI(MI.getInstrIterator());
-    finalizeBundle(*MBB, FirstMI, LastMI);
+  OldPacketMIs.clear();
+  for (MachineInstr *MI : CurrentPacketMIs) {
+    MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
+    for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
+      OldPacketMIs.push_back(&I);
+  }
+  CurrentPacketMIs.clear();
 
+  if (OldPacketMIs.size() > 1) {
+    MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
+    MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
+    finalizeBundle(*MBB, FirstMI, LastMI);
     auto BundleMII = std::prev(FirstMI);
     if (memShufDisabled)
       HII->setBundleNoShuf(BundleMII);
 
     setmemShufDisabled(false);
   }
-  OldPacketMIs = CurrentPacketMIs;
-  CurrentPacketMIs.clear();
 
   ResourceTracker->clearResources();
   LLVM_DEBUG(dbgs() << "End packet\n");
 }
 
 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
+  if (Minimal)
+    return false;
   return !producesStall(MI);
 }
 
@@ -1860,6 +1870,6 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonPacketizer() {
-  return new HexagonPacketizer();
+FunctionPass *llvm::createHexagonPacketizer(bool Minimal) {
+  return new HexagonPacketizer(Minimal);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 40dcee3441a2..ca70cf967a46 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -66,11 +66,13 @@ protected:
 private:
   const HexagonInstrInfo *HII;
   const HexagonRegisterInfo *HRI;
+  const bool Minimal;
 
 public:
   HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
                         AliasAnalysis *AA,
-                        const MachineBranchProbabilityInfo *MBPI);
+                        const MachineBranchProbabilityInfo *MBPI,
+                        bool Minimal);
 
   // initPacketizerState - initialize some internal flags.
   void initPacketizerState() override;
@@ -147,7 +149,7 @@ protected:
   bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
   bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
   bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
-  bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
+  bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
   bool producesStall(const MachineInstr &MI);
 };
 
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index cb504b5c3d5d..6543d8313900 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -26,11 +26,9 @@ namespace llvm {
 /// instruction info tracks.
 namespace HexagonII {
   unsigned const TypeCVI_FIRST = TypeCVI_4SLOT_MPY;
-  unsigned const TypeCVI_LAST = TypeCVI_VX_LATE;
+  unsigned const TypeCVI_LAST = TypeCVI_ZW;
 
   enum SubTarget {
-    HasV4SubT     = 0x3f,
-    HasV5SubT     = 0x3e,
     HasV55SubT    = 0x3c,
     HasV60SubT    = 0x38,
   };
@@ -57,117 +55,117 @@ namespace HexagonII {
   // MCInstrDesc TSFlags
   // *** Must match HexagonInstrFormat*.td ***
   enum {
-    // This 5-bit field describes the insn type.
-    TypePos  = 0,
-    TypeMask = 0x3f,
+    // This 7-bit field describes the insn type.
+    TypePos = 0,
+    TypeMask = 0x7f,
 
     // Solo instructions.
-    SoloPos  = 6,
+    SoloPos = 7,
     SoloMask = 0x1,
     // Packed only with A or X-type instructions.
-    SoloAXPos  = 7,
+    SoloAXPos = 8,
     SoloAXMask = 0x1,
     // Only A-type instruction in first slot or nothing.
-    RestrictSlot1AOKPos  = 8,
+    RestrictSlot1AOKPos = 9,
     RestrictSlot1AOKMask = 0x1,
 
     // Predicated instructions.
-    PredicatedPos  = 9,
+    PredicatedPos = 10,
     PredicatedMask = 0x1,
-    PredicatedFalsePos  = 10,
+    PredicatedFalsePos = 11,
     PredicatedFalseMask = 0x1,
-    PredicatedNewPos  = 11,
+    PredicatedNewPos = 12,
     PredicatedNewMask = 0x1,
-    PredicateLatePos  = 12,
+    PredicateLatePos = 13,
     PredicateLateMask = 0x1,
 
     // New-Value consumer instructions.
-    NewValuePos  = 13,
+    NewValuePos = 14,
     NewValueMask = 0x1,
     // New-Value producer instructions.
-    hasNewValuePos  = 14,
+    hasNewValuePos = 15,
     hasNewValueMask = 0x1,
     // Which operand consumes or produces a new value.
-    NewValueOpPos  = 15,
+    NewValueOpPos = 16,
     NewValueOpMask = 0x7,
     // Stores that can become new-value stores.
-    mayNVStorePos  = 18,
+    mayNVStorePos = 19,
     mayNVStoreMask = 0x1,
     // New-value store instructions.
-    NVStorePos  = 19,
+    NVStorePos = 20,
     NVStoreMask = 0x1,
     // Loads that can become current-value loads.
-    mayCVLoadPos  = 20,
+    mayCVLoadPos = 21,
     mayCVLoadMask = 0x1,
     // Current-value load instructions.
-    CVLoadPos  = 21,
+    CVLoadPos = 22,
     CVLoadMask = 0x1,
 
     // Extendable insns.
-    ExtendablePos  = 22,
+    ExtendablePos = 23,
     ExtendableMask = 0x1,
     // Insns must be extended.
-    ExtendedPos  = 23,
+    ExtendedPos = 24,
     ExtendedMask = 0x1,
     // Which operand may be extended.
-    ExtendableOpPos  = 24,
+    ExtendableOpPos = 25,
     ExtendableOpMask = 0x7,
     // Signed or unsigned range.
-    ExtentSignedPos  = 27,
+    ExtentSignedPos = 28,
     ExtentSignedMask = 0x1,
     // Number of bits of range before extending operand.
-    ExtentBitsPos  = 28,
+    ExtentBitsPos = 29,
     ExtentBitsMask = 0x1f,
     // Alignment power-of-two before extending operand.
-    ExtentAlignPos  = 33,
+    ExtentAlignPos = 34,
     ExtentAlignMask = 0x3,
 
-    CofMax1Pos = 35,
+    CofMax1Pos = 36,
     CofMax1Mask = 0x1,
-    CofRelax1Pos = 36,
+    CofRelax1Pos = 37,
     CofRelax1Mask = 0x1,
-    CofRelax2Pos = 37,
+    CofRelax2Pos = 38,
     CofRelax2Mask = 0x1,
 
-    RestrictNoSlot1StorePos  = 38,
+    RestrictNoSlot1StorePos = 39,
     RestrictNoSlot1StoreMask = 0x1,
 
     // Addressing mode for load/store instructions.
-    AddrModePos  = 41,
+    AddrModePos = 42,
     AddrModeMask = 0x7,
     // Access size for load/store instructions.
-    MemAccessSizePos = 44,
+    MemAccessSizePos = 45,
     MemAccesSizeMask = 0xf,
 
     // Branch predicted taken.
-    TakenPos = 48,
+    TakenPos = 49,
     TakenMask = 0x1,
 
     // Floating-point instructions.
-    FPPos  = 49,
+    FPPos = 50,
     FPMask = 0x1,
 
     // New-Value producer-2 instructions.
-    hasNewValuePos2  = 51,
+    hasNewValuePos2 = 52,
     hasNewValueMask2 = 0x1,
     // Which operand consumes or produces a new value.
-    NewValueOpPos2  = 52,
+    NewValueOpPos2 = 53,
     NewValueOpMask2 = 0x7,
 
     // Accumulator instructions.
-    AccumulatorPos = 55,
+    AccumulatorPos = 56,
     AccumulatorMask = 0x1,
 
     // Complex XU, prevent xu competition by preferring slot3
-    PrefersSlot3Pos = 56,
+    PrefersSlot3Pos = 57,
     PrefersSlot3Mask = 0x1,
 
     // v65
-    HasTmpDstPos = 59,
+    HasTmpDstPos = 60,
     HasTmpDstMask = 0x1,
 
-    CVINewPos = 61,
-    CVINewMask = 0x1
+    CVINewPos = 62,
+    CVINewMask = 0x1,
   };
 
   // *** The code above must match HexagonInstrFormat*.td *** //
@@ -176,7 +174,7 @@ namespace HexagonII {
   enum HexagonMOTargetFlagVal {
     // Hexagon-specific MachineOperand target flags.
     //
-    // When chaning these, make sure to update
+    // When changing these, make sure to update
     // getSerializableDirectMachineOperandTargetFlags and
     // getSerializableBitmaskMachineOperandTargetFlags if needed.
     MO_NO_FLAG,
@@ -189,7 +187,8 @@ namespace HexagonII {
     MO_GOT,
 
     // Low or high part of a symbol.
-    MO_LO16, MO_HI16,
+    MO_LO16,
+    MO_HI16,
 
     // Offset from the base of the SDA.
     MO_GPREL,
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 3b3a15b990f1..687e79a7dbab 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -28,26 +28,8 @@ using namespace llvm;
 #define GET_INSTRUCTION_NAME
 #include "HexagonGenAsmWriter.inc"
 
-HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
-                                       MCInstrInfo const &MII,
-                                       MCRegisterInfo const &MRI)
-    : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
-}
-
-StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return MII.getName(Opcode);
-}
-
 void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
-  O << getRegName(RegNo);
-}
-
-StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
-  return getRegisterName(RegNo);
-}
-
-void HexagonInstPrinter::setExtender(MCInst const &MCI) {
-  HasExtender = HexagonMCInstrInfo::isImmext(MCI);
+  O << getRegisterName(RegNo);
 }
 
 void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
@@ -65,7 +47,7 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
       printInstruction(MCI.getOperand(0).getInst(), OS);
     } else
       printInstruction(&MCI, OS);
-    setExtender(MCI);
+    HasExtender = HexagonMCInstrInfo::isImmext(MCI);
     OS << "\n";
   }
 
@@ -97,72 +79,6 @@ void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
   }
 }
 
-void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
-                                         raw_ostream &O) const {
-  printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
-                                                 unsigned OpNo,
-                                                 raw_ostream &O) const {
-  O << MI->getOperand(OpNo).getImm();
-}
-
-void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
-                                            raw_ostream &O) const {
-  O << -MI->getOperand(OpNo).getImm();
-}
-
-void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
-                                             raw_ostream &O) const {
-  O << -1;
-}
-
-void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
-                                            raw_ostream &O) const {
-  printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
-                                        raw_ostream &O) const {
-  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
-  printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
-                                           raw_ostream &O) const {
-  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
-  printOperand(MI, OpNo, O);
-}
-
-void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
-                                            raw_ostream &O) const {
-  // Branches can take an immediate operand.  This is used by the branch
-  // selection pass to print $+8, an eight byte displacement from the PC.
-  llvm_unreachable("Unknown branch operand.");
-}
-
-void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
-                                          raw_ostream &O) const {}
-
-void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
-                                             raw_ostream &O) const {}
-
-void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
-                                               raw_ostream &O) const {}
-
-void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
-                                     raw_ostream &O, bool hi) const {
-  assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand");
-
-  O << '#' << (hi ? "HI" : "LO") << '(';
-  O << '#';
-  printOperand(MI, OpNo, O);
-  O << ')';
-}
-
 void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
                                        raw_ostream &O) const {
   MCOperand const &MO = MI->getOperand(OpNo);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index ac8e391905e0..17af046ce090 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -26,57 +26,25 @@ namespace llvm {
 class HexagonInstPrinter : public MCInstPrinter {
 public:
   explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
-                              MCRegisterInfo const &MRI);
+                              MCRegisterInfo const &MRI)
+    : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
+
   void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
                  const MCSubtargetInfo &STI) override;
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
-  void printInstruction(MCInst const *MI, raw_ostream &O);
+  void printRegName(raw_ostream &O, unsigned RegNo) const override;
 
-  StringRef getRegName(unsigned RegNo) const;
   static char const *getRegisterName(unsigned RegNo);
-  void printRegName(raw_ostream &O, unsigned RegNo) const override;
 
+  void printInstruction(MCInst const *MI, raw_ostream &O);
   void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
-  void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
-  void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
-                               raw_ostream &O) const;
-  void printNegImmOperand(MCInst const *MI, unsigned OpNo,
-                          raw_ostream &O) const;
-  void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
-                           raw_ostream &O) const;
-  void printBranchOperand(MCInst const *MI, unsigned OpNo,
-                          raw_ostream &O) const;
-  void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
-  void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
-                           raw_ostream &O) const;
-  void printPredicateOperand(MCInst const *MI, unsigned OpNo,
-                             raw_ostream &O) const;
-  void printGlobalOperand(MCInst const *MI, unsigned OpNo,
-                          raw_ostream &O) const;
-  void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
   void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
 
-  void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
-
-  void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
-    printSymbol(MI, OpNo, O, true);
-  }
-  void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
-    printSymbol(MI, OpNo, O, false);
-  }
-
   MCAsmInfo const &getMAI() const { return MAI; }
   MCInstrInfo const &getMII() const { return MII; }
 
-protected:
-  void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
-                   bool hi) const;
-
 private:
   MCInstrInfo const &MII;
-
-  bool HasExtender;
-  void setExtender(MCInst const &MCI);
+  bool HasExtender = false;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index b208a3668124..f0654d612b4b 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -127,6 +127,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
     case HexagonII::HSIG_A:
       return 0x4;
     }
+    break;
   case HexagonII::HSIG_L2:
     switch (Gb) {
     default:
@@ -138,6 +139,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
     case HexagonII::HSIG_A:
       return 0x5;
     }
+    break;
   case HexagonII::HSIG_S1:
     switch (Gb) {
     default:
@@ -151,6 +153,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
     case HexagonII::HSIG_A:
       return 0x6;
     }
+    break;
   case HexagonII::HSIG_S2:
     switch (Gb) {
     default:
@@ -166,6 +169,7 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
     case HexagonII::HSIG_A:
       return 0x7;
     }
+    break;
   case HexagonII::HSIG_A:
     switch (Gb) {
     default:
@@ -173,11 +177,13 @@ unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) {
     case HexagonII::HSIG_A:
       return 0x3;
     }
+    break;
   case HexagonII::HSIG_Compound:
     switch (Gb) {
     case HexagonII::HSIG_Compound:
       return 0xFFFFFFFF;
     }
+    break;
   }
   return 0xFFFFFFFF;
 }
@@ -634,8 +640,7 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
       return false;
   }
 
-  if (STI.getCPU().equals_lower("hexagonv4") ||
-      STI.getCPU().equals_lower("hexagonv5") ||
+  if (STI.getCPU().equals_lower("hexagonv5") ||
       STI.getCPU().equals_lower("hexagonv55") ||
       STI.getCPU().equals_lower("hexagonv60")) {
     // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index 9fbe299d7d52..f0689252b396 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -9,11 +9,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index b211a81524fb..92ce7345f358 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -61,8 +61,6 @@ cl::opt<bool> llvm::HexagonDisableDuplex
    cl::desc("Disable looking for duplex instructions for Hexagon"));
 
 namespace { // These flags are to be deprecated
-cl::opt<bool> MV4("mv4", cl::Hidden, cl::desc("Build for Hexagon V4"),
-                  cl::init(false));
 cl::opt<bool> MV5("mv5", cl::Hidden, cl::desc("Build for Hexagon V5"),
                   cl::init(false));
 cl::opt<bool> MV55("mv55", cl::Hidden, cl::desc("Build for Hexagon V55"),
@@ -73,6 +71,8 @@ cl::opt<bool> MV62("mv62", cl::Hidden, cl::desc("Build for Hexagon V62"),
                    cl::init(false));
 cl::opt<bool> MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"),
                    cl::init(false));
+cl::opt<bool> MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"),
+                   cl::init(false));
 } // namespace
 
 cl::opt<Hexagon::ArchEnum>
@@ -82,19 +82,20 @@ cl::opt<Hexagon::ArchEnum>
         clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"),
         clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
         clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
-        // Sentinal for no value specified
-        clEnumValN(Hexagon::ArchEnum::V5, "", "")),
-      // Sentinal for flag not present
-      cl::init(Hexagon::ArchEnum::V4), cl::ValueOptional);
+        clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
+        // Sentinel for no value specified.
+        clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
+      // Sentinel for flag not present.
+      cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+
 static cl::opt<bool>
-  DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions"));
+  DisableHVX("mno-hvx", cl::Hidden,
+             cl::desc("Disable Hexagon Vector eXtensions"));
 
 
 static StringRef DefaultArch = "hexagonv60";
 
 static StringRef HexagonGetArchVariant() {
-  if (MV4)
-    return "hexagonv4";
   if (MV5)
     return "hexagonv5";
   if (MV55)
@@ -105,6 +106,8 @@ static StringRef HexagonGetArchVariant() {
     return "hexagonv62";
   if (MV65)
     return "hexagonv65";
+  if (MV66)
+    return "hexagonv66";
   return "";
 }
 
@@ -123,7 +126,7 @@ StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
   return ArchV;
 }
 
-unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
+unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV5FU::SLOT3; }
 
 namespace {
 
@@ -279,6 +282,7 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
     Result.push_back(FS);
 
   switch (EnableHVX) {
+  case Hexagon::ArchEnum::V5:
   case Hexagon::ArchEnum::V55:
     break;
   case Hexagon::ArchEnum::V60:
@@ -290,14 +294,18 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
   case Hexagon::ArchEnum::V65:
     Result.push_back("+hvxv65");
     break;
-  case Hexagon::ArchEnum::V5:{
+  case Hexagon::ArchEnum::V66:
+    Result.push_back("+hvxv66");
+    break;
+  case Hexagon::ArchEnum::Generic:{
     Result.push_back(StringSwitch<StringRef>(CPU)
              .Case("hexagonv60", "+hvxv60")
              .Case("hexagonv62", "+hvxv62")
-             .Case("hexagonv65", "+hvxv65"));
+             .Case("hexagonv65", "+hvxv65")
+             .Case("hexagonv66", "+hvxv66"));
     break;
   }
-  case Hexagon::ArchEnum::V4:
+  case Hexagon::ArchEnum::NoArch:
     // Sentinal if -mhvx isn't specified
     break;
   }
@@ -307,15 +315,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
 
 static bool isCPUValid(std::string CPU)
 {
-  std::vector<std::string> table
-  {
-    "generic",
-    "hexagonv4",
-    "hexagonv5",
-    "hexagonv55",
-    "hexagonv60",
-    "hexagonv62",
-    "hexagonv65",
+  std::vector<std::string> table {
+    "generic",    "hexagonv5",  "hexagonv55", "hexagonv60",
+    "hexagonv62", "hexagonv65", "hexagonv66",
   };
 
   return std::find(table.begin(), table.end(), CPU) != table.end();
@@ -336,8 +338,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
   // Make sure that +hvx-length turns hvx on, and that "hvx" alone
   // turns on hvxvNN, corresponding to the existing ArchVNN.
   FeatureBitset FB = S;
-  unsigned CpuArch = ArchV4;
-  for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5, ArchV4}) {
+  unsigned CpuArch = ArchV5;
+  for (unsigned F : {ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
     if (!FB.test(F))
       continue;
     CpuArch = F;
@@ -351,7 +353,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
     break;
   }
   bool HasHvxVer = false;
-  for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65}) {
+  for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
+                     ExtensionHVXV66}) {
     if (!FB.test(F))
       continue;
     HasHvxVer = true;
@@ -364,6 +367,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
 
   // HasHvxVer is false, and UseHvx is true.
   switch (CpuArch) {
+    case ArchV66:
+      FB.set(ExtensionHVXV66);
+      LLVM_FALLTHROUGH;
     case ArchV65:
       FB.set(ExtensionHVXV65);
       LLVM_FALLTHROUGH;
@@ -402,12 +408,12 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
 
 unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
   static std::map<StringRef,unsigned> ElfFlags = {
-    {"hexagonv4",  ELF::EF_HEXAGON_MACH_V4},
     {"hexagonv5",  ELF::EF_HEXAGON_MACH_V5},
     {"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
     {"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
     {"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
     {"hexagonv65", ELF::EF_HEXAGON_MACH_V65},
+    {"hexagonv66", ELF::EF_HEXAGON_MACH_V66},
   };
 
   auto F = ElfFlags.find(STI.getCPU());
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 6cd1b3a4691f..d6ea664222d3 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -18,6 +18,33 @@
 #include <cstdint>
 #include <string>
 
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words and instructions in a packet.
+#define HEXAGON_PACKET_SIZE 4
+#define HEXAGON_MAX_PACKET_SIZE (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE)
+// Minimum number of instructions in an end-loop packet.
+#define HEXAGON_PACKET_INNER_SIZE 2
+#define HEXAGON_PACKET_OUTER_SIZE 3
+// Maximum number of instructions in a packet before shuffling,
+// including a compound one or a duplex or an extender.
+#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3)
+
+// Name of the global offset table as defined by the Hexagon ABI
+#define HEXAGON_GOT_SYM_NAME "_GLOBAL_OFFSET_TABLE_"
+
 namespace llvm {
 
 struct InstrItinerary;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 59f3caa6af94..f4ee2bbfaaaa 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -138,6 +138,8 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
       UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
   (*TUL)[HexagonII::TypeCVI_SCATTER_NEW_ST] =
       UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_4SLOT_MPY] = UnitsAndLanes(CVI_XLANE, 4);
+  (*TUL)[HexagonII::TypeCVI_ZW] = UnitsAndLanes(CVI_ZW, 1);
 }
 
 HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
@@ -300,6 +302,7 @@ bool HexagonShuffler::check() {
   // Number of memory operations, loads, solo loads, stores, solo stores, single
   // stores.
   unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+  unsigned NonZCVIloads = 0, AllCVIloads = 0, CVIstores = 0;
   // Number of duplex insns
   unsigned duplex = 0;
   unsigned pSlot3Cnt = 0;
@@ -331,6 +334,11 @@ bool HexagonShuffler::check() {
     case HexagonII::TypeCVI_VM_TMP_LD:
     case HexagonII::TypeCVI_GATHER:
     case HexagonII::TypeCVI_GATHER_RST:
+      ++NonZCVIloads;
+      LLVM_FALLTHROUGH;
+    case HexagonII::TypeCVI_ZW:
+      ++AllCVIloads;
+      LLVM_FALLTHROUGH;
     case HexagonII::TypeLD:
       ++loads;
       ++memory;
@@ -348,6 +356,8 @@ bool HexagonShuffler::check() {
     case HexagonII::TypeCVI_SCATTER_RST:
     case HexagonII::TypeCVI_SCATTER_NEW_RST:
     case HexagonII::TypeCVI_SCATTER_NEW_ST:
+      ++CVIstores;
+      LLVM_FALLTHROUGH;
     case HexagonII::TypeST:
       ++stores;
       ++memory;
@@ -405,7 +415,11 @@ bool HexagonShuffler::check() {
   applySlotRestrictions();
 
   // Check if the packet is legal.
-  if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory))) {
+  const unsigned ZCVIloads = AllCVIloads - NonZCVIloads;
+  const bool ValidHVXMem =
+      NonZCVIloads <= 1 && ZCVIloads <= 1 && CVIstores <= 1;
+  if ((load0 > 1 || store0 > 1 || !ValidHVXMem) ||
+      (duplex > 1 || (duplex && memory))) {
     reportError(llvm::Twine("invalid instruction packet"));
     return false;
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 37f90bc46ac7..ef50c5bebbfb 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -75,7 +75,8 @@ private:
     CVI_XLANE = 1 << 0,
     CVI_SHIFT = 1 << 1,
     CVI_MPY0 = 1 << 2,
-    CVI_MPY1 = 1 << 3
+    CVI_MPY1 = 1 << 3,
+    CVI_ZW = 1 << 4
   };
 
   // Count of adjacent slots that the insn requires to be executed.
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index da339bfd3ff4..8dcd485d65e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -214,7 +214,7 @@ bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
       return false;
     return A.Id < B.Id;
   };
-  llvm::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+  llvm::sort(DRNs, UsesFirst);
 
   if (trace())
     dbgs() << "Removing dead ref nodes:\n";
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
index 3d1ec31dada7..d8ca08e70505 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -1471,7 +1471,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
   // and add a def for each S in the closure.
 
   // Sort the refs so that the phis will be created in a deterministic order.
-  llvm::sort(MaxRefs.begin(), MaxRefs.end());
+  llvm::sort(MaxRefs);
   // Remove duplicates.
   auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
   MaxRefs.erase(NewEnd, MaxRefs.end());
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
index c257d754ddf9..9ff48d25a026 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -207,7 +207,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
   };
 
   std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
-  llvm::sort(Tmp.begin(), Tmp.end(), Less);
+  llvm::sort(Tmp, Less);
 
   // The vector is a list of instructions, so that defs coming from
   // the same instruction don't need to be artificially ordered.
@@ -813,7 +813,7 @@ void Liveness::computeLiveIns() {
       std::vector<RegisterRef> LV;
       for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
         LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
-      llvm::sort(LV.begin(), LV.end());
+      llvm::sort(LV);
       dbgs() << printMBBReference(B) << "\t rec = {";
       for (auto I : LV)
         dbgs() << ' ' << Print<RegisterRef>(I, DFG);
@@ -824,7 +824,7 @@ void Liveness::computeLiveIns() {
       const RegisterAggr &LG = LiveMap[&B];
       for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
         LV.push_back(*I);
-      llvm::sort(LV.begin(), LV.end());
+      llvm::sort(LV);
       dbgs() << "\tcomp = {";
       for (auto I : LV)
         dbgs() << ' ' << Print<RegisterRef>(I, DFG);
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 045a897c4126..0411704be6fb 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1498,8 +1498,8 @@ void LanaiTargetLowering::computeKnownBitsForTargetNode(
     break;
   case LanaiISD::SELECT_CC:
     KnownBits Known2;
-    DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
-    DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+    Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+    Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
     Known.Zero &= Known2.Zero;
     Known.One &= Known2.One;
     break;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 493d02bef37c..196768fdc56a 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -101,12 +101,12 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
   // the width doesn't overlap the offset of a higher memory access,
   // then the memory accesses are different.
   const TargetRegisterInfo *TRI = &getRegisterInfo();
-  unsigned BaseRegA = 0, BaseRegB = 0;
+  MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned int WidthA = 0, WidthB = 0;
-  if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
-      getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
-    if (BaseRegA == BaseRegB) {
+  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
       int LowOffset = std::min(OffsetA, OffsetB);
       int HighOffset = std::max(OffsetA, OffsetB);
       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -733,8 +733,13 @@ unsigned LanaiInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
     if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    const MachineMemOperand *Dummy;
-    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+    SmallVector<const MachineMemOperand *, 1> Accesses;
+    if (hasLoadFromStackSlot(MI, Accesses)){
+      FrameIndex =
+          cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+              ->getFrameIndex();
+      return 1;
+    }
   }
   return 0;
 }
@@ -750,9 +755,9 @@ unsigned LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   return 0;
 }
 
-bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth(
-    MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
-    const TargetRegisterInfo * /*TRI*/) const {
+bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
+    MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+    unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
   // Handle only loads/stores with base register followed by immediate offset
   // and with add as ALU op.
   if (LdSt.getNumOperands() != 4)
@@ -782,14 +787,17 @@ bool LanaiInstrInfo::getMemOpBaseRegImmOfsWidth(
     break;
   }
 
-  BaseReg = LdSt.getOperand(1).getReg();
+  BaseOp = &LdSt.getOperand(1);
   Offset = LdSt.getOperand(2).getImm();
+  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                            "operands of type register.");
   return true;
 }
 
-bool LanaiInstrInfo::getMemOpBaseRegImmOfs(
-    MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
-    const TargetRegisterInfo *TRI) const {
+bool LanaiInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
+                                        MachineOperand *&BaseOp,
+                                        int64_t &Offset,
+                                        const TargetRegisterInfo *TRI) const {
   switch (LdSt.getOpcode()) {
   default:
     return false;
@@ -803,6 +811,6 @@ bool LanaiInstrInfo::getMemOpBaseRegImmOfs(
   case Lanai::LDBs_RI:
   case Lanai::LDBz_RI:
     unsigned Width;
-    return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
+    return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
   }
 }
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index fe22fde2470b..bdcf9a361b5f 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -68,13 +68,13 @@ public:
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
-  bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
-                             int64_t &Offset,
-                             const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+                               int64_t &Offset,
+                               const TargetRegisterInfo *TRI) const override;
 
-  bool getMemOpBaseRegImmOfsWidth(MachineInstr &LdSt, unsigned &BaseReg,
-                                  int64_t &Offset, unsigned &Width,
-                                  const TargetRegisterInfo *TRI) const;
+  bool getMemOperandWithOffsetWidth(MachineInstr &LdSt, MachineOperand *&BaseOp,
+                                    int64_t &Offset, unsigned &Width,
+                                    const TargetRegisterInfo *TRI) const;
 
   std::pair<unsigned, unsigned>
   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 35e2542dfb13..54500b0e52e3 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -277,8 +277,7 @@ void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB,
     InstrBuilder.addImm(LPAC::makePostOp(AluOpcode));
 
   // Transfer memory operands.
-  InstrBuilder->setMemRefs(MemInstr->memoperands_begin(),
-                           MemInstr->memoperands_end());
+  InstrBuilder.setMemRefs(MemInstr->memoperands());
 }
 
 // Function determines if ALU operation (in alu_iter) can be combined with
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 2c21a53b13bb..10bd9e2c65d2 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -53,12 +53,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Medium;
-}
-
 LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
                                        StringRef Cpu, StringRef FeatureString,
                                        const TargetOptions &Options,
@@ -67,7 +61,8 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
                                        CodeGenOpt::Level OptLevel, bool JIT)
     : LLVMTargetMachine(T, computeDataLayout(), TT, Cpu, FeatureString, Options,
                         getEffectiveRelocModel(RM),
-                        getEffectiveCodeModel(CodeModel), OptLevel),
+                        getEffectiveCodeModel(CodeModel, CodeModel::Medium),
+                        OptLevel),
       Subtarget(TT, Cpu, FeatureString, *this, Options, getCodeModel(),
                 OptLevel),
       TLOF(new LanaiTargetObjectFile()) {
diff --git a/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
new file mode 100644
index 000000000000..1ad70ac72c73
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -0,0 +1,580 @@
+//===- MSP430AsmParser.cpp - Parse MSP430 assembly to MCInst instructions -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430RegisterInfo.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define DEBUG_TYPE "msp430-asm-parser"
+
+using namespace llvm;
+
+namespace {
+
+/// Parses MSP430 assembly from a stream.
+class MSP430AsmParser : public MCTargetAsmParser {
+  const MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  const MCRegisterInfo *MRI;
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  bool ParseDirective(AsmToken DirectiveID) override;
+  bool ParseDirectiveRefSym(AsmToken DirectiveID);
+
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+                                      unsigned Kind) override;
+
+  bool parseJccInstruction(ParseInstructionInfo &Info, StringRef Name,
+                           SMLoc NameLoc, OperandVector &Operands);
+
+  bool ParseOperand(OperandVector &Operands);
+
+  bool ParseLiteralValues(unsigned Size, SMLoc L);
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  /// @name Auto-generated Matcher Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MSP430GenAsmMatcher.inc"
+
+  /// }
+
+public:
+  MSP430AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+                  const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) {
+    MCAsmParserExtension::Initialize(Parser);
+    MRI = getContext().getRegisterInfo();
+
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+};
+
+/// A parsed MSP430 assembly operand.
+class MSP430Operand : public MCParsedAsmOperand {
+  typedef MCParsedAsmOperand Base;
+
+  enum KindTy {
+    k_Imm,
+    k_Reg,
+    k_Tok,
+    k_Mem,
+    k_IndReg,
+    k_PostIndReg
+  } Kind;
+
+  struct Memory {
+    unsigned Reg;
+    const MCExpr *Offset;
+  };
+  union {
+    const MCExpr *Imm;
+    unsigned      Reg;
+    StringRef     Tok;
+    Memory        Mem;
+  };
+
+  SMLoc Start, End;
+
+public:
+  MSP430Operand(StringRef Tok, SMLoc const &S)
+      : Base(), Kind(k_Tok), Tok(Tok), Start(S), End(S) {}
+  MSP430Operand(KindTy Kind, unsigned Reg, SMLoc const &S, SMLoc const &E)
+      : Base(), Kind(Kind), Reg(Reg), Start(S), End(E) {}
+  MSP430Operand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
+      : Base(), Kind(k_Imm), Imm(Imm), Start(S), End(E) {}
+  MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, SMLoc const &E)
+      : Base(), Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {}
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert((Kind == k_Reg || Kind == k_IndReg || Kind == k_PostIndReg) &&
+        "Unexpected operand kind");
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createReg(Reg));
+  }
+
+  void addExprOperand(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediate when possible
+    if (!Expr)
+      Inst.addOperand(MCOperand::createImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::createExpr(Expr));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(Kind == k_Imm && "Unexpected operand kind");
+    assert(N == 1 && "Invalid number of operands!");
+
+    addExprOperand(Inst, Imm);
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert(Kind == k_Mem && "Unexpected operand kind");
+    assert(N == 2 && "Invalid number of operands");
+
+    Inst.addOperand(MCOperand::createReg(Mem.Reg));
+    addExprOperand(Inst, Mem.Offset);
+  }
+
+  bool isReg() const        { return Kind == k_Reg; }
+  bool isImm() const        { return Kind == k_Imm; }
+  bool isToken() const      { return Kind == k_Tok; }
+  bool isMem() const        { return Kind == k_Mem; }
+  bool isIndReg() const     { return Kind == k_IndReg; }
+  bool isPostIndReg() const { return Kind == k_PostIndReg; }
+
+  bool isCGImm() const {
+    if (Kind != k_Imm)
+      return false;
+
+    int64_t Val;
+    if (!Imm->evaluateAsAbsolute(Val))
+      return false;
+    
+    if (Val == 0 || Val == 1 || Val == 2 || Val == 4 || Val == 8 || Val == -1)
+      return true;
+
+    return false;
+  }
+
+  StringRef getToken() const {
+    assert(Kind == k_Tok && "Invalid access!");
+    return Tok;
+  }
+
+  unsigned getReg() const {
+    assert(Kind == k_Reg && "Invalid access!");
+    return Reg;
+  }
+
+  void setReg(unsigned RegNo) {
+    assert(Kind == k_Reg && "Invalid access!");
+    Reg = RegNo;
+  }
+
+  static std::unique_ptr<MSP430Operand> CreateToken(StringRef Str, SMLoc S) {
+    return make_unique<MSP430Operand>(Str, S);
+  }
+
+  static std::unique_ptr<MSP430Operand> CreateReg(unsigned RegNum, SMLoc S,
+                                                  SMLoc E) {
+    return make_unique<MSP430Operand>(k_Reg, RegNum, S, E);
+  }
+
+  static std::unique_ptr<MSP430Operand> CreateImm(const MCExpr *Val, SMLoc S,
+                                                  SMLoc E) {
+    return make_unique<MSP430Operand>(Val, S, E);
+  }
+
+  static std::unique_ptr<MSP430Operand> CreateMem(unsigned RegNum,
+                                                  const MCExpr *Val,
+                                                  SMLoc S, SMLoc E) {
+    return make_unique<MSP430Operand>(RegNum, Val, S, E);
+  }
+
+  static std::unique_ptr<MSP430Operand> CreateIndReg(unsigned RegNum, SMLoc S,
+                                                  SMLoc E) {
+    return make_unique<MSP430Operand>(k_IndReg, RegNum, S, E);
+  }
+
+  static std::unique_ptr<MSP430Operand> CreatePostIndReg(unsigned RegNum, SMLoc S,
+                                                  SMLoc E) {
+    return make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
+  }
+
+  SMLoc getStartLoc() const { return Start; }
+  SMLoc getEndLoc() const { return End; }
+
+  virtual void print(raw_ostream &O) const {
+    switch (Kind) {
+    case k_Tok:
+      O << "Token " << Tok;
+      break;
+    case k_Reg:
+      O << "Register " << Reg;
+      break;
+    case k_Imm:
+      O << "Immediate " << *Imm;
+      break;
+    case k_Mem:
+      O << "Memory ";
+      O << *Mem.Offset << "(" << Reg << ")";
+      break;
+    case k_IndReg:
+      O << "RegInd " << Reg;
+      break;
+    case k_PostIndReg:
+      O << "PostInc " << Reg;
+      break;
+    }
+  }
+};
+} // end anonymous namespace
+
+bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+                                              OperandVector &Operands,
+                                              MCStreamer &Out,
+                                              uint64_t &ErrorInfo,
+                                              bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult =
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+  switch (MatchResult) {
+  case Match_Success:
+    Inst.setLoc(Loc);
+    Out.EmitInstruction(Inst, STI);
+    return false;
+  case Match_MnemonicFail:
+    return Error(Loc, "invalid instruction mnemonic");
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = Loc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(ErrorLoc, "too few operands for instruction");
+
+      ErrorLoc = ((MSP430Operand &)*Operands[ErrorInfo]).getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = Loc;
+    }
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  default:
+    return true;
+  }
+}
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+static unsigned MatchRegisterAltName(StringRef Name);
+
+bool MSP430AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                    SMLoc &EndLoc) {
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    auto Name = getLexer().getTok().getIdentifier().lower();
+    RegNo = MatchRegisterName(Name);
+    if (RegNo == MSP430::NoRegister) {
+      RegNo = MatchRegisterAltName(Name);
+      if (RegNo == MSP430::NoRegister)
+        return true;
+    }
+
+    AsmToken const &T = getParser().getTok();
+    StartLoc = T.getLoc();
+    EndLoc = T.getEndLoc();
+    getLexer().Lex(); // eat register token
+
+    return false;
+  }
+
+  return Error(StartLoc, "invalid register name");
+}
+
+bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
+                                          StringRef Name, SMLoc NameLoc,
+                                          OperandVector &Operands) {
+  if (!Name.startswith_lower("j"))
+    return true;
+
+  auto CC = Name.drop_front().lower();
+  unsigned CondCode;
+  if (CC == "ne" || CC == "nz")
+    CondCode = MSP430CC::COND_NE;
+  else if (CC == "eq" || CC == "z")
+    CondCode = MSP430CC::COND_E;
+  else if (CC == "lo" || CC == "nc")
+    CondCode = MSP430CC::COND_LO;
+  else if (CC == "hs" || CC == "c")
+    CondCode = MSP430CC::COND_HS;
+  else if (CC == "n")
+    CondCode = MSP430CC::COND_N;
+  else if (CC == "ge")
+    CondCode = MSP430CC::COND_GE;
+  else if (CC == "l")
+    CondCode = MSP430CC::COND_L;
+  else if (CC == "mp")
+    CondCode = MSP430CC::COND_NONE;
+  else
+    return Error(NameLoc, "unknown instruction");
+
+  if (CondCode == (unsigned)MSP430CC::COND_NONE)
+    Operands.push_back(MSP430Operand::CreateToken("jmp", NameLoc));
+  else {
+    Operands.push_back(MSP430Operand::CreateToken("j", NameLoc));
+    const MCExpr *CCode = MCConstantExpr::create(CondCode, getContext());
+    Operands.push_back(MSP430Operand::CreateImm(CCode, SMLoc(), SMLoc()));
+  }
+
+  // Skip optional '$' sign.
+  if (getLexer().getKind() == AsmToken::Dollar)
+    getLexer().Lex(); // Eat '$'
+
+  const MCExpr *Val;
+  SMLoc ExprLoc = getLexer().getLoc();
+  if (getParser().parseExpression(Val))
+    return Error(ExprLoc, "expected expression operand");
+
+  int64_t Res;
+  if (Val->evaluateAsAbsolute(Res))
+    if (Res < -512 || Res > 511)
+      return Error(ExprLoc, "invalid jump offset");
+
+  Operands.push_back(MSP430Operand::CreateImm(Val, ExprLoc,
+    getLexer().getLoc()));
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = getLexer().getLoc();
+    getParser().eatToEndOfStatement();
+    return Error(Loc, "unexpected token");
+  }
+
+  getParser().Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                       StringRef Name, SMLoc NameLoc,
+                                       OperandVector &Operands) {
+  // Drop .w suffix
+  if (Name.endswith_lower(".w"))
+    Name = Name.drop_back(2);
+
+  if (!parseJccInstruction(Info, Name, NameLoc, Operands))
+    return false;
+
+  // First operand is instruction mnemonic
+  Operands.push_back(MSP430Operand::CreateToken(Name, NameLoc));
+
+  // If there are no more operands, then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse first operand
+  if (ParseOperand(Operands))
+    return true;
+
+  // Parse second operand if any
+  if (getLexer().is(AsmToken::Comma)) {
+    getLexer().Lex(); // Eat ','
+    if (ParseOperand(Operands))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = getLexer().getLoc();
+    getParser().eatToEndOfStatement();
+    return Error(Loc, "unexpected token");
+  }
+
+  getParser().Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool MSP430AsmParser::ParseDirectiveRefSym(AsmToken DirectiveID) {
+    StringRef Name;
+    if (getParser().parseIdentifier(Name))
+      return TokError("expected identifier in directive");
+
+    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+    getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+    return false;
+}
+
+bool MSP430AsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal.lower() == ".long") {
+    ParseLiteralValues(4, DirectiveID.getLoc());
+  } else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
+    ParseLiteralValues(2, DirectiveID.getLoc());
+  } else if (IDVal.lower() == ".byte") {
+    ParseLiteralValues(1, DirectiveID.getLoc());
+  } else if (IDVal.lower() == ".refsym") {
+    return ParseDirectiveRefSym(DirectiveID);
+  }
+  return true;
+}
+
+bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
+  switch (getLexer().getKind()) {
+    default: return true;
+    case AsmToken::Identifier: {
+      // try rN
+      unsigned RegNo;
+      SMLoc StartLoc, EndLoc;
+      if (!ParseRegister(RegNo, StartLoc, EndLoc)) {
+        Operands.push_back(MSP430Operand::CreateReg(RegNo, StartLoc, EndLoc));
+        return false;
+      }
+      LLVM_FALLTHROUGH;
+    }
+    case AsmToken::Integer:
+    case AsmToken::Plus:
+    case AsmToken::Minus: {
+      SMLoc StartLoc = getParser().getTok().getLoc();
+      const MCExpr *Val;
+      // Try constexpr[(rN)]
+      if (!getParser().parseExpression(Val)) {
+        unsigned RegNo = MSP430::PC;
+        SMLoc EndLoc = getParser().getTok().getLoc();
+        // Try (rN)
+        if (getLexer().getKind() == AsmToken::LParen) {
+          getLexer().Lex(); // Eat '('
+          SMLoc RegStartLoc;
+          if (ParseRegister(RegNo, RegStartLoc, EndLoc))
+            return true;
+          if (getLexer().getKind() != AsmToken::RParen)
+            return true;
+          EndLoc = getParser().getTok().getEndLoc();
+          getLexer().Lex(); // Eat ')'
+        }
+        Operands.push_back(MSP430Operand::CreateMem(RegNo, Val, StartLoc,
+          EndLoc));
+        return false;
+      }
+      return true;
+    }
+    case AsmToken::Amp: {
+      // Try &constexpr
+      SMLoc StartLoc = getParser().getTok().getLoc();
+      getLexer().Lex(); // Eat '&'
+      const MCExpr *Val;
+      if (!getParser().parseExpression(Val)) {
+        SMLoc EndLoc = getParser().getTok().getLoc();
+        Operands.push_back(MSP430Operand::CreateMem(MSP430::SR, Val, StartLoc,
+          EndLoc));
+        return false;
+      }
+      return true;
+    }
+    case AsmToken::At: {
+      // Try @rN[+]
+      SMLoc StartLoc = getParser().getTok().getLoc();
+      getLexer().Lex(); // Eat '@'
+      unsigned RegNo;
+      SMLoc RegStartLoc, EndLoc;
+      if (ParseRegister(RegNo, RegStartLoc, EndLoc))
+        return true;
+      if (getLexer().getKind() == AsmToken::Plus) {
+        Operands.push_back(MSP430Operand::CreatePostIndReg(RegNo, StartLoc, EndLoc));
+        getLexer().Lex(); // Eat '+'
+        return false;
+      }
+      if (Operands.size() > 1) // Emulate @rd in destination position as 0(rd)
+        Operands.push_back(MSP430Operand::CreateMem(RegNo,
+            MCConstantExpr::create(0, getContext()), StartLoc, EndLoc));
+      else
+        Operands.push_back(MSP430Operand::CreateIndReg(RegNo, StartLoc, EndLoc));
+      return false;
+    }
+    case AsmToken::Hash:
+      // Try #constexpr
+      SMLoc StartLoc = getParser().getTok().getLoc();
+      getLexer().Lex(); // Eat '#'
+      const MCExpr *Val;
+      if (!getParser().parseExpression(Val)) {
+        SMLoc EndLoc = getParser().getTok().getLoc();
+        Operands.push_back(MSP430Operand::CreateImm(Val, StartLoc, EndLoc));
+        return false;
+      }
+      return true;
+  }
+}
+
+bool MSP430AsmParser::ParseLiteralValues(unsigned Size, SMLoc L) {
+  auto parseOne = [&]() -> bool {
+    const MCExpr *Value;
+    if (getParser().parseExpression(Value))
+      return true;
+    getParser().getStreamer().EmitValue(Value, Size, L);
+    return false;
+  };
+  return (parseMany(parseOne));
+}
+
+extern "C" void LLVMInitializeMSP430AsmParser() {
+  RegisterMCAsmParser<MSP430AsmParser> X(getTheMSP430Target());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MSP430GenAsmMatcher.inc"
+
+static unsigned convertGR16ToGR8(unsigned Reg) {
+  switch (Reg) {
+  default:
+    llvm_unreachable("Unknown GR16 register");
+  case MSP430::PC:  return MSP430::PCB;
+  case MSP430::SP:  return MSP430::SPB;
+  case MSP430::SR:  return MSP430::SRB;
+  case MSP430::CG:  return MSP430::CGB;
+  case MSP430::FP:  return MSP430::FPB;
+  case MSP430::R5:  return MSP430::R5B;
+  case MSP430::R6:  return MSP430::R6B;
+  case MSP430::R7:  return MSP430::R7B;
+  case MSP430::R8:  return MSP430::R8B;
+  case MSP430::R9:  return MSP430::R9B;
+  case MSP430::R10: return MSP430::R10B;
+  case MSP430::R11: return MSP430::R11B;
+  case MSP430::R12: return MSP430::R12B;
+  case MSP430::R13: return MSP430::R13B;
+  case MSP430::R14: return MSP430::R14B;
+  case MSP430::R15: return MSP430::R15B;
+  }
+}
+
+unsigned MSP430AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+                                                     unsigned Kind) {
+  MSP430Operand &Op = static_cast<MSP430Operand &>(AsmOp);
+
+  if (!Op.isReg())
+    return Match_InvalidOperand;
+
+  unsigned Reg = Op.getReg();
+  bool isGR16 =
+      MSP430MCRegisterClasses[MSP430::GR16RegClassID].contains(Reg);
+
+  if (isGR16 && (Kind == MCK_GR8)) {
+    Op.setReg(convertGR16ToGR8(Reg));
+    return Match_Success;
+  }
+
+  return Match_InvalidOperand;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
new file mode 100644
index 000000000000..e5da130f9bbb
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -0,0 +1,387 @@
+//===-- MSP430Disassembler.cpp - Disassembler for MSP430 ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430Disassembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "msp430-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class MSP430Disassembler : public MCDisassembler {
+  DecodeStatus getInstructionI(MCInst &MI, uint64_t &Size,
+                               ArrayRef<uint8_t> Bytes, uint64_t Address,
+                               raw_ostream &VStream,
+                               raw_ostream &CStream) const;
+
+  DecodeStatus getInstructionII(MCInst &MI, uint64_t &Size,
+                                ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                raw_ostream &VStream,
+                                raw_ostream &CStream) const;
+
+  DecodeStatus getInstructionCJ(MCInst &MI, uint64_t &Size,
+                                ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                raw_ostream &VStream,
+                                raw_ostream &CStream) const;
+
+public:
+  MSP430Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+      : MCDisassembler(STI, Ctx) {}
+
+  DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createMSP430Disassembler(const Target &T,
+                                                const MCSubtargetInfo &STI,
+                                                MCContext &Ctx) {
+  return new MSP430Disassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeMSP430Disassembler() {
+  TargetRegistry::RegisterMCDisassembler(getTheMSP430Target(),
+                                         createMSP430Disassembler);
+}
+
+static const unsigned GR8DecoderTable[] = {
+  MSP430::PCB,  MSP430::SPB,  MSP430::SRB,  MSP430::CGB,
+  MSP430::FPB,  MSP430::R5B,  MSP430::R6B,  MSP430::R7B,
+  MSP430::R8B,  MSP430::R9B,  MSP430::R10B, MSP430::R11B,
+  MSP430::R12B, MSP430::R13B, MSP430::R14B, MSP430::R15B
+};
+
+static DecodeStatus DecodeGR8RegisterClass(MCInst &MI, uint64_t RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = GR8DecoderTable[RegNo];
+  MI.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static const unsigned GR16DecoderTable[] = {
+  MSP430::PC,  MSP430::SP,  MSP430::SR,  MSP430::CG,
+  MSP430::FP,  MSP430::R5,  MSP430::R6,  MSP430::R7,
+  MSP430::R8,  MSP430::R9,  MSP430::R10, MSP430::R11,
+  MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
+};
+
+static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = GR16DecoderTable[RegNo];
+  MI.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
+                                const void *Decoder);
+
+static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+#include "MSP430GenDisassemblerTables.inc"
+
+static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
+                                const void *Decoder) {
+  int64_t Imm;
+  switch (Bits) {
+  default:
+    llvm_unreachable("Invalid immediate value");
+  case 0x22: Imm =  4; break;
+  case 0x32: Imm =  8; break;
+  case 0x03: Imm =  0; break;
+  case 0x13: Imm =  1; break;
+  case 0x23: Imm =  2; break;
+  case 0x33: Imm = -1; break;
+  }
+  MI.addOperand(MCOperand::createImm(Imm));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  unsigned Reg = Bits & 15;
+  unsigned Imm = Bits >> 4;
+
+  if (DecodeGR16RegisterClass(MI, Reg, Address, Decoder) !=
+      MCDisassembler::Success)
+    return MCDisassembler::Fail;
+  
+  MI.addOperand(MCOperand::createImm((int16_t)Imm));
+  return MCDisassembler::Success;
+}
+
+enum AddrMode {
+  amInvalid = 0,
+  amRegister,
+  amIndexed,
+  amIndirect,
+  amIndirectPost,
+  amSymbolic,
+  amImmediate,
+  amAbsolute,
+  amConstant
+};
+
+static AddrMode DecodeSrcAddrMode(unsigned Rs, unsigned As) {
+  switch (Rs) {
+  case 0:
+    if (As == 1) return amSymbolic;
+    if (As == 2) return amInvalid;
+    if (As == 3) return amImmediate;
+    break;
+  case 2:
+    if (As == 1) return amAbsolute;
+    if (As == 2) return amConstant;
+    if (As == 3) return amConstant;
+    break;
+  case 3:
+    return amConstant;
+  default:
+    break;
+  }
+  switch (As) {
+  case 0: return amRegister;
+  case 1: return amIndexed;
+  case 2: return amIndirect;
+  case 3: return amIndirectPost;
+  default:
+    llvm_unreachable("As out of range");
+  }
+}
+
+static AddrMode DecodeSrcAddrModeI(unsigned Insn) {
+  unsigned Rs = fieldFromInstruction(Insn, 8, 4);
+  unsigned As = fieldFromInstruction(Insn, 4, 2);
+  return DecodeSrcAddrMode(Rs, As);
+}
+
+static AddrMode DecodeSrcAddrModeII(unsigned Insn) {
+  unsigned Rs = fieldFromInstruction(Insn, 0, 4);
+  unsigned As = fieldFromInstruction(Insn, 4, 2);
+  return DecodeSrcAddrMode(Rs, As);
+}
+
+static AddrMode DecodeDstAddrMode(unsigned Insn) {
+  unsigned Rd = fieldFromInstruction(Insn, 0, 4);
+  unsigned Ad = fieldFromInstruction(Insn, 7, 1);
+  switch (Rd) {
+  case 0: return Ad ? amSymbolic : amRegister;
+  case 2: return Ad ? amAbsolute : amRegister;
+  default:
+    break;
+  }
+  return Ad ? amIndexed : amRegister;
+}
+
+static const uint8_t *getDecoderTable(AddrMode SrcAM, unsigned Words) {
+  assert(0 < Words && Words < 4 && "Incorrect number of words");
+  switch (SrcAM) {
+  default:
+    llvm_unreachable("Invalid addressing mode");
+  case amRegister:
+    assert(Words < 3 && "Incorrect number of words");
+    return Words == 2 ? DecoderTableAlpha32 : DecoderTableAlpha16;
+  case amConstant:
+    assert(Words < 3 && "Incorrect number of words");
+    return Words == 2 ? DecoderTableBeta32 : DecoderTableBeta16;
+  case amIndexed:
+  case amSymbolic:
+  case amImmediate:
+  case amAbsolute:
+    assert(Words > 1 && "Incorrect number of words");
+    return Words == 2 ? DecoderTableGamma32 : DecoderTableGamma48;
+  case amIndirect:
+  case amIndirectPost:
+    assert(Words < 3 && "Incorrect number of words");
+    return Words == 2 ? DecoderTableDelta32 : DecoderTableDelta16;
+  }
+}
+
+DecodeStatus MSP430Disassembler::getInstructionI(MCInst &MI, uint64_t &Size,
+                                                 ArrayRef<uint8_t> Bytes,
+                                                 uint64_t Address,
+                                                 raw_ostream &VStream,
+                                                 raw_ostream &CStream) const {
+  uint64_t Insn = support::endian::read16le(Bytes.data());
+  AddrMode SrcAM = DecodeSrcAddrModeI(Insn);
+  AddrMode DstAM = DecodeDstAddrMode(Insn);
+  if (SrcAM == amInvalid || DstAM == amInvalid) {
+    Size = 2; // skip one word and let disassembler to try further
+    return MCDisassembler::Fail;
+  }
+
+  unsigned Words = 1;
+  switch (SrcAM) {
+  case amIndexed:
+  case amSymbolic:
+  case amImmediate:
+  case amAbsolute:
+    if (Bytes.size() < (Words + 1) * 2) {
+      Size = 2;
+      return DecodeStatus::Fail;
+    }
+    Insn |= (uint64_t)support::endian::read16le(Bytes.data() + 2) << 16;
+    ++Words;
+    break;
+  default:
+    break;
+  }
+  switch (DstAM) {
+  case amIndexed:
+  case amSymbolic:
+  case amAbsolute:
+    if (Bytes.size() < (Words + 1) * 2) {
+      Size = 2;
+      return DecodeStatus::Fail;
+    }
+    Insn |= (uint64_t)support::endian::read16le(Bytes.data() + Words * 2)
+        << (Words * 16);
+    ++Words;
+    break;
+  default:
+    break;
+  }
+
+  DecodeStatus Result = decodeInstruction(getDecoderTable(SrcAM, Words), MI,
+                                          Insn, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
+    Size = Words * 2;
+    return Result;
+  }
+
+  Size = 2;
+  return DecodeStatus::Fail;
+}
+
+DecodeStatus MSP430Disassembler::getInstructionII(MCInst &MI, uint64_t &Size,
+                                                  ArrayRef<uint8_t> Bytes,
+                                                  uint64_t Address,
+                                                  raw_ostream &VStream,
+                                                  raw_ostream &CStream) const {
+  uint64_t Insn = support::endian::read16le(Bytes.data());
+  AddrMode SrcAM = DecodeSrcAddrModeII(Insn);
+  if (SrcAM == amInvalid) {
+    Size = 2; // skip one word and let disassembler to try further
+    return MCDisassembler::Fail;
+  }
+
+  unsigned Words = 1;
+  switch (SrcAM) {
+  case amIndexed:
+  case amSymbolic:
+  case amImmediate:
+  case amAbsolute:
+    if (Bytes.size() < (Words + 1) * 2) {
+      Size = 2;
+      return DecodeStatus::Fail;
+    }
+    Insn |= (uint64_t)support::endian::read16le(Bytes.data() + 2) << 16;
+    ++Words;
+    break;
+  default:
+    break;
+  }
+
+  const uint8_t *DecoderTable = Words == 2 ? DecoderTable32 : DecoderTable16;
+  DecodeStatus Result = decodeInstruction(DecoderTable, MI, Insn, Address,
+                                          this, STI);
+  if (Result != MCDisassembler::Fail) {
+    Size = Words * 2;
+    return Result;
+  }
+
+  Size = 2;
+  return DecodeStatus::Fail;
+}
+
+static MSP430CC::CondCodes getCondCode(unsigned Cond) {
+  switch (Cond) {
+  case 0: return MSP430CC::COND_NE;
+  case 1: return MSP430CC::COND_E;
+  case 2: return MSP430CC::COND_LO;
+  case 3: return MSP430CC::COND_HS;
+  case 4: return MSP430CC::COND_N;
+  case 5: return MSP430CC::COND_GE;
+  case 6: return MSP430CC::COND_L;
+  case 7: return MSP430CC::COND_NONE;
+  default:
+    llvm_unreachable("Cond out of range");
+  }
+}
+
+DecodeStatus MSP430Disassembler::getInstructionCJ(MCInst &MI, uint64_t &Size,
+                                                  ArrayRef<uint8_t> Bytes,
+                                                  uint64_t Address,
+                                                  raw_ostream &VStream,
+                                                  raw_ostream &CStream) const {
+  uint64_t Insn = support::endian::read16le(Bytes.data());
+  unsigned Cond = fieldFromInstruction(Insn, 10, 3);
+  unsigned Offset = fieldFromInstruction(Insn, 0, 10);
+
+  MI.addOperand(MCOperand::createImm(SignExtend32(Offset, 10)));
+
+  if (Cond == 7)
+    MI.setOpcode(MSP430::JMP);
+  else {
+    MI.setOpcode(MSP430::JCC);
+    MI.addOperand(MCOperand::createImm(getCondCode(Cond)));
+  }
+
+  Size = 2;
+  return DecodeStatus::Success;
+}
+
+DecodeStatus MSP430Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                ArrayRef<uint8_t> Bytes,
+                                                uint64_t Address,
+                                                raw_ostream &VStream,
+                                                raw_ostream &CStream) const {
+  if (Bytes.size() < 2) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  uint64_t Insn = support::endian::read16le(Bytes.data());
+  unsigned Opc = fieldFromInstruction(Insn, 13, 3);
+  switch (Opc) {
+  case 0:
+    return getInstructionII(MI, Size, Bytes, Address, VStream, CStream);
+  case 1:
+    return getInstructionCJ(MI, Size, Bytes, Address, VStream, CStream);
+  default:
+    return getInstructionI(MI, Size, Bytes, Address, VStream, CStream);
+  }
+}
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index be6d1a84a377..4d62547bc65b 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -16,28 +16,34 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
-
 // Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
 #include "MSP430GenAsmWriter.inc"
 
 void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
                                   StringRef Annot, const MCSubtargetInfo &STI) {
-  printInstruction(MI, O);
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
 
 void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
                                              raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm())
-    O << Op.getImm();
-  else {
+  if (Op.isImm()) {
+    int64_t Imm = Op.getImm() * 2 + 2;
+    O << "$";
+    if (Imm >= 0)
+      O << '+';
+    O << Imm;
+  } else {
     assert(Op.isExpr() && "unknown pcrel immediate operand");
     Op.getExpr()->print(O, &MAI);
   }
@@ -72,7 +78,7 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
   // vs
   //   mov.w glb(r1), r2
   // Otherwise (!) msp430-as will silently miscompile the output :(
-  if (!Base.getReg())
+  if (Base.getReg() == MSP430::SR)
     O << '&';
 
   if (Disp.isExpr())
@@ -83,10 +89,23 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
   }
 
   // Print register base field
-  if (Base.getReg())
+  if ((Base.getReg() != MSP430::SR) &&
+      (Base.getReg() != MSP430::PC))
     O << '(' << getRegisterName(Base.getReg()) << ')';
 }
 
+void MSP430InstPrinter::printIndRegOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  O << "@" << getRegisterName(Base.getReg());
+}
+
+void MSP430InstPrinter::printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+                                               raw_ostream &O) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  O << "@" << getRegisterName(Base.getReg()) << "+";
+}
+
 void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   unsigned CC = MI->getOperand(OpNo).getImm();
@@ -112,5 +131,8 @@ void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
   case MSP430CC::COND_L:
    O << 'l';
    break;
+  case MSP430CC::COND_N:
+   O << 'n';
+   break;
   }
 }
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 72afec18becb..cd02c4fa645a 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -28,13 +28,20 @@ namespace llvm {
 
     // Autogenerated by tblgen.
     void printInstruction(const MCInst *MI, raw_ostream &O);
+    bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+    void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                                 unsigned PrintMethodIdx, raw_ostream &O);
     static const char *getRegisterName(unsigned RegNo);
 
+private:
     void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                       const char *Modifier = nullptr);
     void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
     void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                             const char *Modifier = nullptr);
+    void printIndRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+    void printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O);
     void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
   };
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
new file mode 100644
index 000000000000..bd69a9d8d795
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -0,0 +1,178 @@
+//===-- MSP430AsmBackend.cpp - MSP430 Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MSP430FixupKinds.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class MSP430AsmBackend : public MCAsmBackend {
+  uint8_t OSABI;
+
+  uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+                            MCContext &Ctx) const;
+
+public:
+  MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI)
+      : MCAsmBackend(support::little), OSABI(OSABI) {}
+  ~MSP430AsmBackend() override {}
+
+  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+                  const MCValue &Target, MutableArrayRef<char> Data,
+                  uint64_t Value, bool IsResolved,
+                  const MCSubtargetInfo *STI) const override;
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    return createMSP430ELFObjectWriter(OSABI);
+  }
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    return false;
+  }
+
+  bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+                                    uint64_t Value,
+                                    const MCRelaxableFragment *DF,
+                                    const MCAsmLayout &Layout,
+                                    const bool WasForced) const override {
+    return false;
+  }
+
+  unsigned getNumFixupKinds() const override {
+    return MSP430::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+    const static MCFixupKindInfo Infos[MSP430::NumTargetFixupKinds] = {
+      // This table must be in the same order of enum in MSP430FixupKinds.h.
+      //
+      // name            offset bits flags
+      {"fixup_32",            0, 32, 0},
+      {"fixup_10_pcrel",      0, 10, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_16",            0, 16, 0},
+      {"fixup_16_pcrel",      0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_16_byte",       0, 16, 0},
+      {"fixup_16_pcrel_byte", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_2x_pcrel",      0, 10, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_rl_pcrel",      0, 16, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_8",             0,  8, 0},
+      {"fixup_sym_diff",      0, 32, 0},
+    };
+    static_assert((array_lengthof(Infos)) == MSP430::NumTargetFixupKinds,
+                  "Not all fixup kinds added to Infos array");
+  
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+  
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  bool mayNeedRelaxation(const MCInst &Inst,
+                         const MCSubtargetInfo &STI) const override {
+    return false;
+  }
+
+  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+                        MCInst &Res) const override {}
+
+  bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+};
+
+uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
+                                            uint64_t Value,
+                                            MCContext &Ctx) const {
+  unsigned Kind = Fixup.getKind();
+  switch (Kind) {
+  case MSP430::fixup_10_pcrel: {
+    if (Value & 0x1)
+      Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned");
+
+    // Offset is signed
+    int16_t Offset = Value;
+    // Jumps are in words
+    Offset >>= 1;
+    // PC points to the next instruction so decrement by one
+    --Offset;
+
+    if (Offset < -512 || Offset > 511)
+      Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+
+    // Mask 10 bits
+    Offset &= 0x3ff;
+
+    return Offset;
+  }
+  default:
+    return Value;
+  }
+}
+
+void MSP430AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+                                  const MCValue &Target,
+                                  MutableArrayRef<char> Data,
+                                  uint64_t Value, bool IsResolved,
+                                  const MCSubtargetInfo *STI) const {
+  Value = adjustFixupValue(Fixup, Value, Asm.getContext());
+  MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+  if (!Value)
+    return; // Doesn't change encoding.
+
+  // Shift the value into position.
+  Value <<= Info.TargetOffset;
+
+  unsigned Offset = Fixup.getOffset();
+  unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
+
+  assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the
+  // bits from the fixup value.
+  for (unsigned i = 0; i != NumBytes; ++i) {
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+  }
+}
+
+bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+  if ((Count % 2) != 0)
+    return false;
+
+  // The canonical nop on MSP430 is mov #0, r3
+  uint64_t NopCount = Count / 2;
+  while (NopCount--)
+    OS.write("\x03\x43", 2);
+
+  return true;
+}
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createMSP430MCAsmBackend(const Target &T,
+                                             const MCSubtargetInfo &STI,
+                                             const MCRegisterInfo &MRI,
+                                             const MCTargetOptions &Options) {
+  return new MSP430AsmBackend(STI, ELF::ELFOSABI_STANDALONE);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
new file mode 100644
index 000000000000..e47db2400a05
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -0,0 +1,59 @@
+//===-- MSP430ELFObjectWriter.cpp - MSP430 ELF Writer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MSP430FixupKinds.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class MSP430ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  MSP430ELFObjectWriter(uint8_t OSABI)
+    : MCELFObjectTargetWriter(false, OSABI, ELF::EM_MSP430,
+                              /*HasRelocationAddend*/ true) {}
+
+  ~MSP430ELFObjectWriter() override {}
+
+protected:
+  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+                        const MCFixup &Fixup, bool IsPCRel) const override {
+    // Translate fixup kind to ELF relocation type.
+    switch ((unsigned)Fixup.getKind()) {
+    case FK_Data_1:                   return ELF::R_MSP430_8;
+    case FK_Data_2:                   return ELF::R_MSP430_16_BYTE;
+    case FK_Data_4:                   return ELF::R_MSP430_32;
+    case MSP430::fixup_32:            return ELF::R_MSP430_32;
+    case MSP430::fixup_10_pcrel:      return ELF::R_MSP430_10_PCREL;
+    case MSP430::fixup_16:            return ELF::R_MSP430_16;
+    case MSP430::fixup_16_pcrel:      return ELF::R_MSP430_16_PCREL;
+    case MSP430::fixup_16_byte:       return ELF::R_MSP430_16_BYTE;
+    case MSP430::fixup_16_pcrel_byte: return ELF::R_MSP430_16_PCREL_BYTE;
+    case MSP430::fixup_2x_pcrel:      return ELF::R_MSP430_2X_PCREL;
+    case MSP430::fixup_rl_pcrel:      return ELF::R_MSP430_RL_PCREL;
+    case MSP430::fixup_8:             return ELF::R_MSP430_8;
+    case MSP430::fixup_sym_diff:      return ELF::R_MSP430_SYM_DIFF;
+    default:
+      llvm_unreachable("Invalid fixup kind");
+    }
+  }
+};
+} // end of anonymous namespace
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createMSP430ELFObjectWriter(uint8_t OSABI) {
+  return llvm::make_unique<MSP430ELFObjectWriter>(OSABI);
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
new file mode 100644
index 000000000000..9449cb278024
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -0,0 +1,81 @@
+//===-- MSP430ELFStreamer.cpp - MSP430 ELF Target Streamer Methods --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class MSP430TargetELFStreamer : public MCTargetStreamer {
+public:
+  MCELFStreamer &getStreamer();
+  MSP430TargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+};
+
+// This part is for ELF object output.
+MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
+                                                 const MCSubtargetInfo &STI)
+    : MCTargetStreamer(S) {
+  MCAssembler &MCA = getStreamer().getAssembler();
+  unsigned EFlags = MCA.getELFHeaderEFlags();
+  MCA.setELFHeaderEFlags(EFlags);
+
+  // Emit build attributes section according to
+  // MSP430 EABI (slaa534.pdf, part 13).
+  MCSection *AttributeSection = getStreamer().getContext().getELFSection(
+      ".MSP430.attributes", ELF::SHT_MSP430_ATTRIBUTES, 0);
+  Streamer.SwitchSection(AttributeSection);
+
+  // Format version.
+  Streamer.EmitIntValue(0x41, 1);
+  // Subsection length.
+  Streamer.EmitIntValue(22, 4);
+  // Vendor name string, zero-terminated.
+  Streamer.EmitBytes("mspabi");
+  Streamer.EmitIntValue(0, 1);
+
+  // Attribute vector scope tag. 1 stands for the entire file.
+  Streamer.EmitIntValue(1, 1);
+  // Attribute vector length.
+  Streamer.EmitIntValue(11, 4);
+  // OFBA_MSPABI_Tag_ISA(4) = 1, MSP430
+  Streamer.EmitIntValue(4, 1);
+  Streamer.EmitIntValue(1, 1);
+  // OFBA_MSPABI_Tag_Code_Model(6) = 1, Small
+  Streamer.EmitIntValue(6, 1);
+  Streamer.EmitIntValue(1, 1);
+  // OFBA_MSPABI_Tag_Data_Model(8) = 1, Small
+  Streamer.EmitIntValue(8, 1);
+  Streamer.EmitIntValue(1, 1);
+}
+
+MCELFStreamer &MSP430TargetELFStreamer::getStreamer() {
+  return static_cast<MCELFStreamer &>(Streamer);
+}
+
+MCTargetStreamer *
+createMSP430ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  const Triple &TT = STI.getTargetTriple();
+  if (TT.isOSBinFormatELF())
+    return new MSP430TargetELFStreamer(S, STI);
+  return nullptr;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
new file mode 100644
index 000000000000..1eb6a2759423
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
@@ -0,0 +1,53 @@
+//===-- MSP430FixupKinds.h - MSP430 Specific Fixup Entries ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430FIXUPKINDS_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+#undef MSP430
+
+namespace llvm {
+namespace MSP430 {
+
+// This table must be in the same order of
+// MCFixupKindInfo Infos[MSP430::NumTargetFixupKinds]
+// in MSP430AsmBackend.cpp.
+//
+enum Fixups {
+  // A 32 bit absolute fixup.
+  fixup_32 = FirstTargetFixupKind,
+  // A 10 bit PC relative fixup.
+  fixup_10_pcrel,
+  // A 16 bit absolute fixup.
+  fixup_16,
+  // A 16 bit PC relative fixup.
+  fixup_16_pcrel,
+  // A 16 bit absolute fixup for byte operations.
+  fixup_16_byte,
+  // A 16 bit PC relative fixup for command address.
+  fixup_16_pcrel_byte,
+  // A 10 bit PC relative fixup for complicated polymorphs.
+  fixup_2x_pcrel,
+  // A 16 bit relaxable fixup.
+  fixup_rl_pcrel,
+  // A 8 bit absolute fixup.
+  fixup_8,
+  // A 32 bit symbol difference fixup.
+  fixup_sym_diff,
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace MSP430
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 82e6731ecd78..36e9a9c31075 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -20,6 +20,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
   CodePointerSize = CalleeSaveStackSlotSize = 2;
 
   CommentString = ";";
+  SeparatorString = "{";
 
   AlignmentIsInBytes = false;
   UsesELFSectionDirectiveForBSS = true;
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
new file mode 100644
index 000000000000..06f9f307cb1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -0,0 +1,211 @@
+//===-- MSP430MCCodeEmitter.cpp - Convert MSP430 code to machine code -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "MCTargetDesc/MSP430FixupKinds.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace llvm {
+
+class MSP430MCCodeEmitter : public MCCodeEmitter {
+  MCContext &Ctx;
+  MCInstrInfo const &MCII;
+
+  // Offset keeps track of current word number being emitted
+  // inside a particular instruction.
+  mutable unsigned Offset;
+
+  /// TableGen'erated function for getting the binary encoding for an
+  /// instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  /// Returns the binary encoding of operands.
+  ///
+  /// If an operand requires relocation, the relocation is recorded
+  /// and zero is returned.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  unsigned getMemOpValue(const MCInst &MI, unsigned Op,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const;
+
+  unsigned getPCRelImmOpValue(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+
+  unsigned getCGImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups,
+                           const MCSubtargetInfo &STI) const;
+
+  unsigned getCCOpValue(const MCInst &MI, unsigned Op,
+                        SmallVectorImpl<MCFixup> &Fixups,
+                        const MCSubtargetInfo &STI) const;
+
+public:
+  MSP430MCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
+      : Ctx(ctx), MCII(MCII) {}
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+};
+
+void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  // Get byte count of instruction.
+  unsigned Size = Desc.getSize();
+
+  // Initialize fixup offset
+  Offset = 2;
+
+  uint64_t BinaryOpCode = getBinaryCodeForInstr(MI, Fixups, STI);
+  size_t WordCount = Size / 2;
+
+  while (WordCount--) {
+    support::endian::write(OS, (uint16_t)BinaryOpCode, support::little);
+    BinaryOpCode >>= 16;
+  }
+}
+
+unsigned MSP430MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                                const MCOperand &MO,
+                                                SmallVectorImpl<MCFixup> &Fixups,
+                                                const MCSubtargetInfo &STI) const {
+  if (MO.isReg())
+    return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+  if (MO.isImm()) {
+    Offset += 2;
+    return MO.getImm();
+  }
+
+  assert(MO.isExpr() && "Expected expr operand");
+  Fixups.push_back(MCFixup::create(Offset, MO.getExpr(),
+      static_cast<MCFixupKind>(MSP430::fixup_16_byte), MI.getLoc()));
+  Offset += 2;
+  return 0;
+}
+
+unsigned MSP430MCCodeEmitter::getMemOpValue(const MCInst &MI, unsigned Op,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  const MCOperand &MO1 = MI.getOperand(Op);
+  assert(MO1.isReg() && "Register operand expected");
+  unsigned Reg = Ctx.getRegisterInfo()->getEncodingValue(MO1.getReg());
+
+  const MCOperand &MO2 = MI.getOperand(Op + 1);
+  if (MO2.isImm()) {
+    Offset += 2;
+    return ((unsigned)MO2.getImm() << 4) | Reg;
+  }
+
+  assert(MO2.isExpr() && "Expr operand expected");
+  MSP430::Fixups FixupKind;
+  switch (Reg) {
+  case 0:
+    FixupKind = MSP430::fixup_16_pcrel_byte;
+    break;
+  case 2:
+    FixupKind = MSP430::fixup_16_byte;
+    break;
+  default:
+    FixupKind = MSP430::fixup_16_byte;
+    break;
+  }
+  Fixups.push_back(MCFixup::create(Offset, MO2.getExpr(),
+    static_cast<MCFixupKind>(FixupKind), MI.getLoc()));
+  Offset += 2;
+  return Reg;
+}
+
+unsigned MSP430MCCodeEmitter::getPCRelImmOpValue(const MCInst &MI, unsigned Op,
+                                                 SmallVectorImpl<MCFixup> &Fixups,
+                                                 const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(Op);
+  if (MO.isImm())
+    return MO.getImm();
+
+  assert(MO.isExpr() && "Expr operand expected");
+  Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+    static_cast<MCFixupKind>(MSP430::fixup_10_pcrel), MI.getLoc()));
+  return 0;
+}
+
+unsigned MSP430MCCodeEmitter::getCGImmOpValue(const MCInst &MI, unsigned Op,
+                                              SmallVectorImpl<MCFixup> &Fixups,
+                                              const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(Op);
+  assert(MO.isImm() && "Expr operand expected");
+  
+  int64_t Imm = MO.getImm();
+  switch (Imm) {
+  default:
+    llvm_unreachable("Invalid immediate value");
+  case 4:  return 0x22;
+  case 8:  return 0x32;
+  case 0:  return 0x03;
+  case 1:  return 0x13;
+  case 2:  return 0x23;
+  case -1: return 0x33;
+  }
+}
+
+unsigned MSP430MCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned Op,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(Op);
+  assert(MO.isImm() && "Immediate operand expected");
+  switch (MO.getImm()) {
+  case MSP430CC::COND_NE: return 0;
+  case MSP430CC::COND_E:  return 1;
+  case MSP430CC::COND_LO: return 2;
+  case MSP430CC::COND_HS: return 3;
+  case MSP430CC::COND_N:  return 4;
+  case MSP430CC::COND_GE: return 5;
+  case MSP430CC::COND_L:  return 6;
+  default:
+    llvm_unreachable("Unknown condition code");
+  }
+}
+
+MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
+                                         const MCRegisterInfo &MRI,
+                                         MCContext &Ctx) {
+  return new MSP430MCCodeEmitter(Ctx, MCII);
+}
+
+#include "MSP430GenMCCodeEmitter.inc"
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 8c715500f38b..b21145d3904a 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -58,22 +58,15 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
 }
 
 extern "C" void LLVMInitializeMSP430TargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfo<MSP430MCAsmInfo> X(getTheMSP430Target());
+  Target &T = getTheMSP430Target();
 
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(getTheMSP430Target(),
-                                      createMSP430MCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(getTheMSP430Target(),
-                                    createMSP430MCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(getTheMSP430Target(),
-                                          createMSP430MCSubtargetInfo);
-
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(getTheMSP430Target(),
-                                        createMSP430MCInstPrinter);
+  RegisterMCAsmInfo<MSP430MCAsmInfo> X(T);
+  TargetRegistry::RegisterMCInstrInfo(T, createMSP430MCInstrInfo);
+  TargetRegistry::RegisterMCRegInfo(T, createMSP430MCRegisterInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(T, createMSP430MCSubtargetInfo);
+  TargetRegistry::RegisterMCInstPrinter(T, createMSP430MCInstPrinter);
+  TargetRegistry::RegisterMCCodeEmitter(T, createMSP430MCCodeEmitter);
+  TargetRegistry::RegisterMCAsmBackend(T, createMSP430MCAsmBackend);
+  TargetRegistry::RegisterObjectTargetStreamer(
+      T, createMSP430ObjectTargetStreamer);
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index b901c5f09794..e484c79c9ee9 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -15,12 +15,39 @@
 #define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
 
 #include "llvm/Support/DataTypes.h"
+#include <memory>
 
 namespace llvm {
 class Target;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCInstrInfo;
+class MCSubtargetInfo;
+class MCRegisterInfo;
+class MCContext;
+class MCTargetOptions;
+class MCObjectTargetWriter;
+class MCStreamer;
+class MCTargetStreamer;
 
 Target &getTheMSP430Target();
 
+/// Creates a machine code emitter for MSP430.
+MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
+                                         const MCRegisterInfo &MRI,
+                                         MCContext &Ctx);
+
+MCAsmBackend *createMSP430MCAsmBackend(const Target &T,
+                                       const MCSubtargetInfo &STI,
+                                       const MCRegisterInfo &MRI,
+                                       const MCTargetOptions &Options);
+
+MCTargetStreamer *
+createMSP430ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
+std::unique_ptr<MCObjectTargetWriter>
+createMSP430ELFObjectWriter(uint8_t OSABI);
+
 } // End llvm namespace
 
 // Defines symbolic names for MSP430 registers.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm/lib/Target/MSP430/MSP430.h
index 796f25233123..7a5314a10844 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.h
@@ -27,6 +27,8 @@ namespace MSP430CC {
     COND_LO = 3,  // aka COND_NC
     COND_GE = 4,
     COND_L  = 5,
+    COND_N  = 6,  // jump if negative
+    COND_NONE,    // unconditional
 
     COND_INVALID = -1
   };
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
index 203864dd4065..8fa99dc13dd5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -64,11 +64,29 @@ include "MSP430InstrInfo.td"
 
 def MSP430InstrInfo : InstrInfo;
 
+//===---------------------------------------------------------------------===//
+// Assembly Printers
+//===---------------------------------------------------------------------===//
+
+def MSP430AsmWriter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+}
+
+//===---------------------------------------------------------------------===//
+// Assembly Parsers
+//===---------------------------------------------------------------------===//
+
+def MSP430AsmParser : AsmParser {
+  let AllowDuplicateRegisterNames = 1;
+  let ShouldEmitMatchRegisterAltName = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Target Declaration
 //===----------------------------------------------------------------------===//
 
 def MSP430 : Target {
   let InstructionSet = MSP430InstrInfo;
+  let AssemblyParsers = [MSP430AsmParser];
 }
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 005f5f44a635..7a1998ad355d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -98,6 +98,7 @@ namespace {
     MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
         : SelectionDAGISel(TM, OptLevel) {}
 
+  private:
     StringRef getPassName() const override {
       return "MSP430 DAG->DAG Pattern Instruction Selection";
     }
@@ -112,8 +113,9 @@ namespace {
     // Include the pieces autogenerated from the target description.
   #include "MSP430GenDAGISel.inc"
 
-  private:
+    // Main method to transform nodes into machine nodes.
     void Select(SDNode *N) override;
+
     bool tryIndexedLoad(SDNode *Op);
     bool tryIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8,
                          unsigned Opc16);
@@ -250,11 +252,9 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
   if (MatchAddress(N, AM))
     return false;
 
-  EVT VT = N.getValueType();
-  if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+  if (AM.BaseType == MSP430ISelAddressMode::RegBase)
     if (!AM.Base.Reg.getNode())
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
-  }
+      AM.Base.Reg = CurDAG->getRegister(MSP430::SR, MVT::i16);
 
   Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase)
              ? CurDAG->getTargetFrameIndex(
@@ -336,10 +336,10 @@ bool MSP430DAGToDAGISel::tryIndexedLoad(SDNode *N) {
   unsigned Opcode = 0;
   switch (VT.SimpleTy) {
   case MVT::i8:
-    Opcode = MSP430::MOV8rm_POST;
+    Opcode = MSP430::MOV8rp;
     break;
   case MVT::i16:
-    Opcode = MSP430::MOV16rm_POST;
+    Opcode = MSP430::MOV16rp;
     break;
   default:
     return false;
@@ -362,12 +362,11 @@ bool MSP430DAGToDAGISel::tryIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
 
     MVT VT = LD->getMemoryVT().getSimpleVT();
     unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
-    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-    MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+    MachineMemOperand *MemRef = cast<MemSDNode>(N1)->getMemOperand();
     SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
     SDNode *ResNode =
       CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0);
-    cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+    CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemRef});
     // Transfer chain.
     ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
     // Transfer writeback.
@@ -413,47 +412,47 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) {
     break;
   case ISD::ADD:
     if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
-                        MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+                        MSP430::ADD8rp, MSP430::ADD16rp))
       return;
     else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
-                             MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+                             MSP430::ADD8rp, MSP430::ADD16rp))
       return;
 
     // Other cases are autogenerated.
     break;
   case ISD::SUB:
     if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
-                        MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+                        MSP430::SUB8rp, MSP430::SUB16rp))
       return;
 
     // Other cases are autogenerated.
     break;
   case ISD::AND:
     if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
-                        MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+                        MSP430::AND8rp, MSP430::AND16rp))
       return;
     else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
-                             MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+                             MSP430::AND8rp, MSP430::AND16rp))
       return;
 
     // Other cases are autogenerated.
     break;
   case ISD::OR:
     if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
-                        MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+                        MSP430::BIS8rp, MSP430::BIS16rp))
       return;
     else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
-                             MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+                             MSP430::BIS8rp, MSP430::BIS16rp))
       return;
 
     // Other cases are autogenerated.
     break;
   case ISD::XOR:
     if (tryIndexedBinOp(Node, Node->getOperand(0), Node->getOperand(1),
-                        MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+                        MSP430::XOR8rp, MSP430::XOR16rp))
       return;
     else if (tryIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
-                             MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+                             MSP430::XOR8rp, MSP430::XOR16rp))
       return;
 
     // Other cases are autogenerated.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index f5b2bda5d1e4..3e706134afc5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -95,6 +95,8 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SIGN_EXTEND,      MVT::i16,   Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+  setOperationAction(ISD::STACKSAVE,        MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,     MVT::Other, Expand);
 
   setOperationAction(ISD::CTTZ,             MVT::i8,    Expand);
   setOperationAction(ISD::CTTZ,             MVT::i16,   Expand);
@@ -217,8 +219,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
     // { RTLIB::NEG_F64,  "__mspabi_negd", ISD::SETCC_INVALID },
     // { RTLIB::NEG_F32,  "__mspabi_negf", ISD::SETCC_INVALID },
 
-    // TODO: SLL/SRA/SRL are in libgcc, RLL isn't
-
     // Universal Integer Operations - EABI Table 9
     { RTLIB::SDIV_I16,   "__mspabi_divi", ISD::SETCC_INVALID },
     { RTLIB::SDIV_I32,   "__mspabi_divli", ISD::SETCC_INVALID },
@@ -233,6 +233,13 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
     { RTLIB::UREM_I32,   "__mspabi_remul", ISD::SETCC_INVALID },
     { RTLIB::UREM_I64,   "__mspabi_remull", ISD::SETCC_INVALID },
 
+    // Bitwise Operations - EABI Table 10
+    // TODO: __mspabi_[srli/srai/slli] ARE implemented in libgcc
+    { RTLIB::SRL_I32,    "__mspabi_srll", ISD::SETCC_INVALID },
+    { RTLIB::SRA_I32,    "__mspabi_sral", ISD::SETCC_INVALID },
+    { RTLIB::SHL_I32,    "__mspabi_slll", ISD::SETCC_INVALID },
+    // __mspabi_[srlll/srall/sllll/rlli/rlll] are NOT implemented in libgcc
+
   };
 
   for (const auto &LC : LibraryCalls) {
@@ -940,30 +947,40 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
 
   // Expand non-constant shifts to loops:
   if (!isa<ConstantSDNode>(N->getOperand(1)))
-    switch (Opc) {
-    default: llvm_unreachable("Invalid shift opcode!");
-    case ISD::SHL:
-      return DAG.getNode(MSP430ISD::SHL, dl,
-                         VT, N->getOperand(0), N->getOperand(1));
-    case ISD::SRA:
-      return DAG.getNode(MSP430ISD::SRA, dl,
-                         VT, N->getOperand(0), N->getOperand(1));
-    case ISD::SRL:
-      return DAG.getNode(MSP430ISD::SRL, dl,
-                         VT, N->getOperand(0), N->getOperand(1));
-    }
+    return Op;
 
   uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
 
   // Expand the stuff into sequence of shifts.
-  // FIXME: for some shift amounts this might be done better!
-  // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
   SDValue Victim = N->getOperand(0);
 
+  if (ShiftAmount >= 8) {
+    assert(VT == MVT::i16 && "Can not shift i8 by 8 and more");
+    switch(Opc) {
+    default:
+      llvm_unreachable("Unknown shift");
+    case ISD::SHL:
+      // foo << (8 + N) => swpb(zext(foo)) << N
+      Victim = DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
+      Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
+      break;
+    case ISD::SRA:
+    case ISD::SRL:
+      // foo >> (8 + N) => sxt(swpb(foo)) >> N
+      Victim = DAG.getNode(ISD::BSWAP, dl, VT, Victim);
+      Victim = (Opc == ISD::SRA)
+                   ? DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Victim,
+                                 DAG.getValueType(MVT::i8))
+                   : DAG.getZeroExtendInReg(Victim, dl, MVT::i8);
+      break;
+    }
+    ShiftAmount -= 8;
+  }
+
   if (Opc == ISD::SRL && ShiftAmount) {
     // Emit a special goodness here:
     // srl A, 1 => clrc; rrc A
-    Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+    Victim = DAG.getNode(MSP430ISD::RRCL, dl, VT, Victim);
     ShiftAmount -= 1;
   }
 
@@ -1342,15 +1359,14 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MSP430ISD::RRA:                return "MSP430ISD::RRA";
   case MSP430ISD::RLA:                return "MSP430ISD::RLA";
   case MSP430ISD::RRC:                return "MSP430ISD::RRC";
+  case MSP430ISD::RRCL:               return "MSP430ISD::RRCL";
   case MSP430ISD::CALL:               return "MSP430ISD::CALL";
   case MSP430ISD::Wrapper:            return "MSP430ISD::Wrapper";
   case MSP430ISD::BR_CC:              return "MSP430ISD::BR_CC";
   case MSP430ISD::CMP:                return "MSP430ISD::CMP";
   case MSP430ISD::SETCC:              return "MSP430ISD::SETCC";
   case MSP430ISD::SELECT_CC:          return "MSP430ISD::SELECT_CC";
-  case MSP430ISD::SHL:                return "MSP430ISD::SHL";
-  case MSP430ISD::SRA:                return "MSP430ISD::SRA";
-  case MSP430ISD::SRL:                return "MSP430ISD::SRL";
+  case MSP430ISD::DADD:               return "MSP430ISD::DADD";
   }
   return nullptr;
 }
@@ -1397,33 +1413,49 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
   const TargetInstrInfo &TII = *F->getSubtarget().getInstrInfo();
 
   unsigned Opc;
+  bool ClearCarry = false;
   const TargetRegisterClass * RC;
   switch (MI.getOpcode()) {
   default: llvm_unreachable("Invalid shift opcode!");
   case MSP430::Shl8:
-   Opc = MSP430::SHL8r1;
-   RC = &MSP430::GR8RegClass;
-   break;
+    Opc = MSP430::ADD8rr;
+    RC = &MSP430::GR8RegClass;
+    break;
   case MSP430::Shl16:
-   Opc = MSP430::SHL16r1;
-   RC = &MSP430::GR16RegClass;
-   break;
+    Opc = MSP430::ADD16rr;
+    RC = &MSP430::GR16RegClass;
+    break;
   case MSP430::Sra8:
-   Opc = MSP430::SAR8r1;
-   RC = &MSP430::GR8RegClass;
-   break;
+    Opc = MSP430::RRA8r;
+    RC = &MSP430::GR8RegClass;
+    break;
   case MSP430::Sra16:
-   Opc = MSP430::SAR16r1;
-   RC = &MSP430::GR16RegClass;
-   break;
+    Opc = MSP430::RRA16r;
+    RC = &MSP430::GR16RegClass;
+    break;
   case MSP430::Srl8:
-   Opc = MSP430::SAR8r1c;
-   RC = &MSP430::GR8RegClass;
-   break;
+    ClearCarry = true;
+    Opc = MSP430::RRC8r;
+    RC = &MSP430::GR8RegClass;
+    break;
   case MSP430::Srl16:
-   Opc = MSP430::SAR16r1c;
-   RC = &MSP430::GR16RegClass;
-   break;
+    ClearCarry = true;
+    Opc = MSP430::RRC16r;
+    RC = &MSP430::GR16RegClass;
+    break;
+  case MSP430::Rrcl8:
+  case MSP430::Rrcl16: {
+    BuildMI(*BB, MI, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
+      .addReg(MSP430::SR).addImm(1);
+    unsigned SrcReg = MI.getOperand(1).getReg();
+    unsigned DstReg = MI.getOperand(0).getReg();
+    unsigned RrcOpc = MI.getOpcode() == MSP430::Rrcl16
+                    ? MSP430::RRC16r : MSP430::RRC8r;
+    BuildMI(*BB, MI, dl, TII.get(RrcOpc), DstReg)
+      .addReg(SrcReg);
+    MI.eraseFromParent(); // The pseudo instruction is gone now.
+    return BB;
+  }
   }
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1476,8 +1508,16 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
   BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
     .addReg(ShiftAmtSrcReg).addMBB(BB)
     .addReg(ShiftAmtReg2).addMBB(LoopBB);
-  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
-    .addReg(ShiftReg);
+  if (ClearCarry)
+    BuildMI(LoopBB, dl, TII.get(MSP430::BIC16rc), MSP430::SR)
+      .addReg(MSP430::SR).addImm(1);
+  if (Opc == MSP430::ADD8rr || Opc == MSP430::ADD16rr)
+    BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+      .addReg(ShiftReg)
+      .addReg(ShiftReg);
+  else
+    BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+      .addReg(ShiftReg);
   BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
     .addReg(ShiftAmtReg).addImm(1);
   BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
@@ -1499,9 +1539,10 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                   MachineBasicBlock *BB) const {
   unsigned Opc = MI.getOpcode();
 
-  if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
-      Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
-      Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+  if (Opc == MSP430::Shl8  || Opc == MSP430::Shl16 ||
+      Opc == MSP430::Sra8  || Opc == MSP430::Sra16 ||
+      Opc == MSP430::Srl8  || Opc == MSP430::Srl16 ||
+      Opc == MSP430::Rrcl8 || Opc == MSP430::Rrcl16)
     return EmitShiftInstr(MI, BB);
 
   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 842d03df32fc..731bc1406711 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -36,6 +36,9 @@ namespace llvm {
       /// Y = RRC X, rotate right via carry
       RRC,
 
+      /// Rotate right via carry, carry gets cleared beforehand by clrc
+      RRCL,
+
       /// CALL - These operations represent an abstract call
       /// instruction, which includes a bunch of information.
       CALL,
@@ -61,8 +64,9 @@ namespace llvm {
       /// is condition code and operand 4 is flag operand.
       SELECT_CC,
 
-      /// SHL, SRA, SRL - Non-constant shifts.
-      SHL, SRA, SRL
+      /// DADD - Decimal addition with carry
+      /// TODO Nothing generates a node of this type yet.
+      DADD,
     };
   }
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
index a9e87dad0cd8..e2e4503db20c 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrFormats.td
@@ -11,201 +11,431 @@
 //  Describe MSP430 instructions format here
 //
 
-// Format specifies the encoding used by the instruction.  This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<2> val> {
-  bits<2> Value = val;
-}
-
-def PseudoFrm   : Format<0>;
-def SingleOpFrm : Format<1>;
-def DoubleOpFrm : Format<2>;
-def CondJumpFrm : Format<3>;
-
 class SourceMode<bits<2> val> {
   bits<2> Value = val;
 }
 
-def SrcReg      : SourceMode<0>;
-def SrcMem      : SourceMode<1>;
-def SrcIndReg   : SourceMode<2>;
-def SrcPostInc  : SourceMode<3>;
-def SrcImm      : SourceMode<3>;
+def SrcReg      : SourceMode<0>; // r
+def SrcMem      : SourceMode<1>; // m
+def SrcIndReg   : SourceMode<2>; // n
+def SrcPostInc  : SourceMode<3>; // p
+def SrcImm      : SourceMode<3>; // i
+//  SrcCGImm    : SourceMode< >; // c
 
 class DestMode<bit val> {
   bit Value = val;
 }
 
-def DstReg      : DestMode<0>;
-def DstMem      : DestMode<1>;
-
-class SizeVal<bits<3> val> {
-  bits<3> Value = val;
-}
-
-def SizeUnknown : SizeVal<0>; // Unknown / unset size
-def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
-def Size2Bytes  : SizeVal<2>;
-def Size4Bytes  : SizeVal<3>;
-def Size6Bytes  : SizeVal<4>;
+def DstReg      : DestMode<0>;   // r
+def DstMem      : DestMode<1>;   // m
 
 // Generic MSP430 Format
-class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
-                 string asmstr> : Instruction {
-  field bits<16> Inst;
+class MSP430Inst<dag outs, dag ins, int size, string asmstr> : Instruction {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
 
   let Namespace = "MSP430";
 
   dag OutOperandList = outs;
   dag InOperandList  = ins;
 
-  Format Form = f;
-  SizeVal Sz = sz;
-
-  // Define how we want to layout our TargetSpecific information field... This
-  // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
-  let TSFlags{1-0} = Form.Value;
-  let TSFlags{4-2} = Sz.Value;
-
-  let AsmString   = asmstr;
+  let AsmString = asmstr;
+  let Size = size;
 }
 
-// FIXME: Create different classes for different addressing modes.
-
 // MSP430 Double Operand (Format I) Instructions
-class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+class IForm<bits<4> opcode, DestMode ad, bit bw, SourceMode as, int size,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+  : MSP430Inst<outs, ins, size, asmstr> {
   let Pattern = pattern;
 
-  DestMode ad = dest;
-  SourceMode as = src;
-  
-  let Inst{12-15} = opcode;
+  bits<4> rs;
+  bits<4> rd;
+
+  let Inst{15-12} = opcode;
+  let Inst{11-8}  = rs;
   let Inst{7}     = ad.Value;
   let Inst{6}     = bw;
-  let Inst{4-5}   = as.Value;
+  let Inst{5-4}   = as.Value;
+  let Inst{3-0}   = rd;
 }
 
 // 8 bit IForm instructions
-class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, int size,
              dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+  : IForm<opcode, dest, 1, src, size, outs, ins, asmstr, pattern>;
 
 class I8rr<bits<4> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+  : IForm8<opcode, DstReg, SrcReg, 2, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Alpha";
+}
 
 class I8ri<bits<4> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IForm8<opcode, DstReg, SrcImm, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<16> imm;
+  let Inst{31-16} = imm;
+  let rs = 0b0000;
+}
+
+class I8rc<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, 2, asmstr> {
+  let DecoderNamespace = "Beta";
+  let Pattern = pattern;
+
+  bits<6> imm;
+  bits<4> rd;
+
+  let Inst{15-12} = opcode;
+  let Inst{11-8}  = imm{3-0};
+  let Inst{7}     = DstReg.Value;
+  let Inst{6}     = 1;
+  let Inst{5-4}   = imm{5-4};
+  let Inst{3-0}   = rd;
+}
 
 class I8rm<bits<4> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IForm8<opcode, DstReg, SrcMem, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<20> src;
+  let rs = src{3-0};
+  let Inst{31-16} = src{19-4};
+}
+
+class I8rn<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcIndReg, 2, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+}
+
+class I8rp<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcPostInc, 2, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+}
 
 class I8mr<bits<4> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IForm8<opcode, DstMem, SrcReg, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Alpha";
+  bits<20> dst;
+  let rd = dst{3-0};
+  let Inst{31-16} = dst{19-4};
+}
 
 class I8mi<bits<4> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+  : IForm8<opcode, DstMem, SrcImm, 6, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<16> imm;
+  bits<20> dst;
+  let rs = 0b0000;
+  let Inst{31-16} = imm;
+  let rd = dst{3-0};
+  let Inst{47-32} = dst{19-4};
+}
+
+class I8mc<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, 4, asmstr> {
+  let DecoderNamespace = "Beta";
+  let Pattern = pattern;
+
+  bits<6> imm;
+  bits<20> dst;
+
+  let Inst{31-16} = dst{19-4};
+  let Inst{15-12} = opcode;
+  let Inst{11-8}  = imm{3-0};
+  let Inst{7}     = DstMem.Value;
+  let Inst{6}     = 1;
+  let Inst{5-4}   = imm{5-4};
+  let Inst{3-0}   = dst{3-0};
+}
 
 class I8mm<bits<4> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+  : IForm8<opcode, DstMem, SrcMem, 6, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<20> src;
+  bits<20> dst;
+  let rs = src{3-0};
+  let Inst{31-16} = src{19-4};
+  let rd = dst{3-0};
+  let Inst{47-32} = dst{19-4};
+}
+
+class I8mn<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcIndReg, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+  bits<20> dst;
+  let rd = dst{3-0};
+  let Inst{31-16} = dst{19-4};
+}
+
+class I8mp<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcPostInc, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+  bits<20> dst;
+  let rd = dst{3-0};
+  let Inst{31-16} = dst{19-4};
+}
 
 // 16 bit IForm instructions
-class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, int size,
               dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+  : IForm<opcode, dest, 0, src, size, outs, ins, asmstr, pattern>;
 
 class I16rr<bits<4> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+  : IForm16<opcode, DstReg, SrcReg, 2, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Alpha";
+}
 
 class I16ri<bits<4> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IForm16<opcode, DstReg, SrcImm, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<16> imm;
+  let Inst{31-16} = imm;
+  let rs = 0b0000;
+}
+
+class I16rc<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, 2, asmstr> {
+  let DecoderNamespace = "Beta";
+  let Pattern = pattern;
+
+  bits<6> imm;
+  bits<4> rd;
+
+  let Inst{15-12} = opcode;
+  let Inst{11-8}  = imm{3-0};
+  let Inst{7}     = DstReg.Value;
+  let Inst{6}     = 0;
+  let Inst{5-4}   = imm{5-4};
+  let Inst{3-0}   = rd;
+}
 
 class I16rm<bits<4> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IForm16<opcode, DstReg, SrcMem, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<20> src;
+  let rs = src{3-0};
+  let Inst{31-16} = src{19-4};
+}
+
+class I16rn<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcIndReg, 2, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+}
+
+class I16rp<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcPostInc, 2, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+}
 
 class I16mr<bits<4> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IForm16<opcode, DstMem, SrcReg, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Alpha";
+  bits<20> dst;
+  let rd = dst{3-0};
+  let Inst{31-16} = dst{19-4};
+}
 
 class I16mi<bits<4> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+  : IForm16<opcode, DstMem, SrcImm, 6, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<16> imm;
+  bits<20> dst;
+  let Inst{31-16} = imm;
+  let rs = 0b0000;
+  let rd = dst{3-0};
+  let Inst{47-32} = dst{19-4};
+}
+
+class I16mc<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, 4, asmstr> {
+  let DecoderNamespace = "Beta";
+  let Pattern = pattern;
+
+  bits<6> imm;
+  bits<20> dst;
+
+  let Inst{31-16} = dst{19-4};
+  let Inst{15-12} = opcode;
+  let Inst{11-8}  = imm{3-0};
+  let Inst{7}     = DstMem.Value;
+  let Inst{6}     = 0;
+  let Inst{5-4}   = imm{5-4};
+  let Inst{3-0}   = dst{3-0};
+}
 
 class I16mm<bits<4> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+  : IForm16<opcode, DstMem, SrcMem, 6, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Gamma";
+  bits<20> src;
+  bits<20> dst;
+  let rs = src{3-0};
+  let Inst{31-16} = src{19-4};
+  let rd = dst{3-0};
+  let Inst{47-32} = dst{19-4};
+}
+
+class I16mn<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcIndReg, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+  bits<20> dst;
+  let rd = dst{3-0};
+  let Inst{31-16} = dst{19-4};
+}
+
+class I16mp<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcPostInc, 4, outs, ins, asmstr, pattern> {
+  let DecoderNamespace = "Delta";
+  bits<20> dst;
+  let rd = dst{3-0};
+  let Inst{31-16} = dst{19-4};
+}
 
 // MSP430 Single Operand (Format II) Instructions
-class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+class IIForm<bits<3> opcode, bit bw, SourceMode as, int size,
              dag outs, dag ins, string asmstr, list<dag> pattern>
-  : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+  : MSP430Inst<outs, ins, size, asmstr> {
   let Pattern = pattern;
-  
-  SourceMode as = src;
 
-  let Inst{7-15} = opcode;
-  let Inst{6}    = bw;
-  let Inst{4-5}  = as.Value;
+  bits<4> rs;
+
+  let Inst{15-10} = 0b000100;
+  let Inst{9-7}   = opcode;
+  let Inst{6}     = bw;
+  let Inst{5-4}   = as.Value;
+  let Inst{3-0}   = rs;
 }
 
 // 8 bit IIForm instructions
-class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+class IIForm8<bits<3> opcode, SourceMode src, int size,
               dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+  : IIForm<opcode, 1, src, size, outs, ins, asmstr, pattern>;
+
+class II8r<bits<3> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcReg, 2, outs, ins, asmstr, pattern>;
 
-class II8r<bits<9> opcode,
+class II8m<bits<3> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+  : IIForm8<opcode, SrcMem, 4, outs, ins, asmstr, pattern> {
+  bits<20> src;
+  let rs = src{3-0};
+  let Inst{31-16} = src{19-4};
+}
 
-class II8m<bits<9> opcode,
+class II8i<bits<3> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IIForm8<opcode, SrcImm, 4, outs, ins, asmstr, pattern> {
+  bits<16> imm;
+  let rs = 0b0000;
+  let Inst{31-16} = imm;
+}
 
-class II8i<bits<9> opcode,
+class II8c<bits<3> opcode,
            dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+  : MSP430Inst<outs, ins, 2, asmstr> {
+  let Pattern = pattern;
+
+  bits<6> imm;
+
+  let Inst{15-10} = 0b000100;
+  let Inst{9-7}   = opcode;
+  let Inst{6}     = 1;
+  let Inst{5-0}   = imm;
+}
+
+class II8n<bits<3> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcIndReg, 2, outs, ins, asmstr, pattern>;
+
+class II8p<bits<3> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcPostInc, 2, outs, ins, asmstr, pattern>;
 
 // 16 bit IIForm instructions
-class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+class IIForm16<bits<3> opcode, SourceMode src, int size,
                dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+  : IIForm<opcode, 0, src, size, outs, ins, asmstr, pattern>;
 
-class II16r<bits<9> opcode,
+class II16r<bits<3> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+  : IIForm16<opcode, SrcReg, 2, outs, ins, asmstr, pattern>;
 
-class II16m<bits<9> opcode,
+class II16m<bits<3> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IIForm16<opcode, SrcMem, 4, outs, ins, asmstr, pattern> {
+  bits<20> src;
+  let rs = src{3-0};
+  let Inst{31-16} = src{19-4};
+}
 
-class II16i<bits<9> opcode,
+class II16i<bits<3> opcode,
             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+  : IIForm16<opcode, SrcImm, 4, outs, ins, asmstr, pattern> {
+  bits<16> imm;
+  let rs = 0b0000;
+  let Inst{31-16} = imm;
+}
+
+class II16c<bits<3> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, 2, asmstr> {
+  let Pattern = pattern;
+
+  bits<6> imm;
+
+  let Inst{15-10} = 0b000100;
+  let Inst{9-7}   = opcode;
+  let Inst{6}     = 0;
+  let Inst{5-0}   = imm;
+}
+
+class II16n<bits<3> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcIndReg, 2, outs, ins, asmstr, pattern>;
+
+class II16p<bits<3> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcPostInc, 2, outs, ins, asmstr, pattern>;
 
 // MSP430 Conditional Jumps Instructions
-class CJForm<bits<3> opcode, bits<3> cond,
-             dag outs, dag ins, string asmstr, list<dag> pattern>
-  : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+class CJForm<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, 2, asmstr> {
   let Pattern = pattern;
   
-  let Inst{13-15} = opcode;
-  let Inst{10-12} = cond;
+  bits<3> cond;
+  bits<10> dst;
+
+  let Inst{15-13} = 0b001;
+  let Inst{12-10} = cond;
+  let Inst{9-0} = dst;
 }
 
 // Pseudo instructions
 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+  : MSP430Inst<outs, ins, 0, asmstr> {
   let Pattern = pattern;
-  let Inst{15-0} = 0;
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index dd1b30a3e470..c136933a51bc 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -301,35 +301,20 @@ unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB,
 unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MCInstrDesc &Desc = MI.getDesc();
 
-  switch (Desc.TSFlags & MSP430II::SizeMask) {
-  default:
-    switch (Desc.getOpcode()) {
-    default: llvm_unreachable("Unknown instruction size!");
-    case TargetOpcode::CFI_INSTRUCTION:
-    case TargetOpcode::EH_LABEL:
-    case TargetOpcode::IMPLICIT_DEF:
-    case TargetOpcode::KILL:
-    case TargetOpcode::DBG_VALUE:
-      return 0;
-    case TargetOpcode::INLINEASM: {
-      const MachineFunction *MF = MI.getParent()->getParent();
-      const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
-      return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
-                                    *MF->getTarget().getMCAsmInfo());
-    }
-    }
-  case MSP430II::SizeSpecial:
-    switch (MI.getOpcode()) {
-    default: llvm_unreachable("Unknown instruction size!");
-    case MSP430::SAR8r1c:
-    case MSP430::SAR16r1c:
-      return 4;
-    }
-  case MSP430II::Size2Bytes:
-    return 2;
-  case MSP430II::Size4Bytes:
-    return 4;
-  case MSP430II::Size6Bytes:
-    return 6;
+  switch (Desc.getOpcode()) {
+  case TargetOpcode::CFI_INSTRUCTION:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::DBG_VALUE:
+    return 0;
+  case TargetOpcode::INLINEASM: {
+    const MachineFunction *MF = MI.getParent()->getParent();
+    const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+    return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
+                                  *MF->getTarget().getMCAsmInfo());
   }
+  }
+
+  return Desc.getSize();
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 45357f54c9c6..fee3bea9b8d6 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -24,22 +24,6 @@ namespace llvm {
 
 class MSP430Subtarget;
 
-/// MSP430II - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MSP430II {
-  enum {
-    SizeShift   = 2,
-    SizeMask    = 7 << SizeShift,
-
-    SizeUnknown = 0 << SizeShift,
-    SizeSpecial = 1 << SizeShift,
-    Size2Bytes  = 2 << SizeShift,
-    Size4Bytes  = 3 << SizeShift,
-    Size6Bytes  = 4 << SizeShift
-  };
-}
-
 class MSP430InstrInfo : public MSP430GenInstrInfo {
   const MSP430RegisterInfo RI;
   virtual void anchor();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
index cec43040f60d..25c81d94f75b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -34,8 +34,9 @@ def SDT_MSP430BrCC         : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
 def SDT_MSP430SelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
                                                   SDTCisSameAs<1, 2>, 
                                                   SDTCisVT<3, i8>]>;
-def SDT_MSP430Shift        : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
-                                                  SDTCisI8<2>]>;
+def SDT_MSP430DAdd         : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<0, 2>,
+                                                  SDTCisInt<0>]>;
 
 //===----------------------------------------------------------------------===//
 // MSP430 Specific Node Definitions.
@@ -48,6 +49,7 @@ def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
 def MSP430rra     : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
 def MSP430rla     : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
 def MSP430rrc     : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+def MSP430rrcl    : SDNode<"MSP430ISD::RRCL", SDTIntUnaryOp, []>;
 
 def MSP430call    : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
                      [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
@@ -63,33 +65,88 @@ def MSP430brcc    : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
                             [SDNPHasChain, SDNPInGlue]>;
 def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
                             [SDNPInGlue]>;
-def MSP430shl     : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
-def MSP430sra     : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
-def MSP430srl     : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+def MSP430dadd    : SDNode<"MSP430ISD::DADD", SDT_MSP430DAdd, []>;
 
 //===----------------------------------------------------------------------===//
 // MSP430 Operand Definitions.
 //===----------------------------------------------------------------------===//
 
+def MemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+}
+
 // Address operands
 def memsrc : Operand<i16> {
   let PrintMethod = "printSrcMemOperand";
   let MIOperandInfo = (ops GR16, i16imm);
+  let ParserMatchClass = MemAsmOperand;
+  let EncoderMethod = "getMemOpValue";
+  let DecoderMethod = "DecodeMemOperand";
 }
 
 def memdst : Operand<i16> {
   let PrintMethod = "printSrcMemOperand";
   let MIOperandInfo = (ops GR16, i16imm);
+  let ParserMatchClass = MemAsmOperand;
+  let EncoderMethod = "getMemOpValue";
+  let DecoderMethod = "DecodeMemOperand";
+}
+
+def IndRegAsmOperand : AsmOperandClass {
+  let Name = "IndReg";
+  let RenderMethod = "addRegOperands";
+}
+
+def indreg : Operand<i16> {
+  let PrintMethod = "printIndRegOperand";
+  let MIOperandInfo = (ops GR16);
+  let ParserMatchClass = IndRegAsmOperand;
+  let DecoderMethod = "DecodeGR16RegisterClass";
+}
+
+def PostIndRegAsmOperand : AsmOperandClass {
+  let Name = "PostIndReg";
+  let RenderMethod = "addRegOperands";
+}
+
+def postreg : Operand<i16> {
+  let PrintMethod = "printPostIndRegOperand";
+  let MIOperandInfo = (ops GR16);
+  let ParserMatchClass = PostIndRegAsmOperand;
+  let DecoderMethod = "DecodeGR16RegisterClass";
 }
 
 // Short jump targets have OtherVT type and are printed as pcrel imm values.
 def jmptarget : Operand<OtherVT> {
   let PrintMethod = "printPCRelImmOperand";
+  let EncoderMethod = "getPCRelImmOpValue";
 }
 
 // Operand for printing out a condition code.
 def cc : Operand<i8> {
   let PrintMethod = "printCCOperand";
+  let EncoderMethod = "getCCOpValue";
+}
+
+def CGImmAsmOperand : AsmOperandClass {
+  let Name = "CGImm";
+  let RenderMethod = "addImmOperands";
+}
+
+def cg8imm : Operand<i8>,
+             ImmLeaf<i8, [{return Imm == 0 || Imm == 1 || Imm == 2 ||
+                                  Imm == 4 || Imm == 8 || Imm == -1;}]> {
+  let ParserMatchClass = CGImmAsmOperand;
+  let EncoderMethod = "getCGImmOpValue";
+  let DecoderMethod = "DecodeCGImm";
+}
+
+def cg16imm : Operand<i16>,
+              ImmLeaf<i16, [{return Imm == 0 || Imm == 1 || Imm == 2 ||
+                                    Imm == 4 || Imm == 8 || Imm == -1;}]> {
+  let ParserMatchClass = CGImmAsmOperand;
+  let EncoderMethod = "getCGImmOpValue";
+  let DecoderMethod = "DecodeCGImm";
 }
 
 //===----------------------------------------------------------------------===//
@@ -102,6 +159,7 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
 // Pattern Fragments
 def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
 def  extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def bic : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, (not node:$rhs))>;
 def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
   return N->hasOneUse();
 }]>;
@@ -113,21 +171,21 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
 // pointer before prolog-epilog rewriting occurs.
 // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
 // sub / add which can clobber SR.
-let Defs = [SP, SR], Uses = [SP] in {
+let isCodeGenOnly = 1, Defs = [SP, SR], Uses = [SP] in {
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
-                              "#ADJCALLSTACKDOWN",
+                              "#ADJCALLSTACKDOWN $amt1 $amt2",
                               [(MSP430callseq_start timm:$amt1, timm:$amt2)]>;
 def ADJCALLSTACKUP   : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
-                              "#ADJCALLSTACKUP",
+                              "#ADJCALLSTACKUP $amt1 $amt2",
                               [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-let Defs = [SR], Uses = [SP] in {
+let isCodeGenOnly = 1, Defs = [SR], Uses = [SP] in {
 def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset),
                       "# ADDframe PSEUDO", []>;
 }
 
-let usesCustomInserter = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
   let Uses = [SR] in {
   def Select8  : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
                         "# Select8 PSEUDO",
@@ -141,38 +199,44 @@ let usesCustomInserter = 1 in {
   let Defs = [SR] in {
   def Shl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
                         "# Shl8 PSEUDO",
-                        [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+                        [(set GR8:$dst, (shl GR8:$src, GR8:$cnt))]>;
   def Shl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
                         "# Shl16 PSEUDO",
-                        [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+                        [(set GR16:$dst, (shl GR16:$src, GR8:$cnt))]>;
   def Sra8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
                         "# Sra8 PSEUDO",
-                        [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+                        [(set GR8:$dst, (sra GR8:$src, GR8:$cnt))]>;
   def Sra16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
                         "# Sra16 PSEUDO",
-                        [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+                        [(set GR16:$dst, (sra GR16:$src, GR8:$cnt))]>;
   def Srl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
                         "# Srl8 PSEUDO",
-                        [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+                        [(set GR8:$dst, (srl GR8:$src, GR8:$cnt))]>;
   def Srl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
                         "# Srl16 PSEUDO",
-                        [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
-
+                        [(set GR16:$dst, (srl GR16:$src, GR8:$cnt))]>;
+  def Rrcl8    : Pseudo<(outs GR8:$dst), (ins GR8:$src), "",
+                        [(set GR8:$dst, (MSP430rrcl GR8:$src))]>;
+  def Rrcl16   : Pseudo<(outs GR16:$dst), (ins GR16:$src), "",
+                        [(set GR16:$dst, (MSP430rrcl GR16:$src))]>;
   }
 }
 
-let hasSideEffects = 0 in
-def NOP : Pseudo<(outs), (ins), "nop", []>;
-
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions...
 //
 
-// FIXME: Provide proper encoding!
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-  def RET  : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                     (outs), (ins), "ret",  [(MSP430retflag)]>;
-  def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+  def RET  : IForm16<0b0100, DstReg, SrcPostInc, 2,
+                     (outs), (ins), "ret",  [(MSP430retflag)]> {
+    let DecoderNamespace = "Delta";
+    let rs = 1;
+    let rd = 0;
+  }
+  def RETI : IIForm16<0b110, SrcReg, 2,
+                      (outs), (ins), "reti", [(MSP430retiflag)]> {
+    let rs = 0;
+  }
 }
 
 let isBranch = 1, isTerminator = 1 in {
@@ -182,120 +246,143 @@ let isBranch = 1, isTerminator = 1 in {
 // Direct branch
 let isBarrier = 1 in {
   // Short branch
-  def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+  def JMP : CJForm<(outs), (ins jmptarget:$dst),
                    "jmp\t$dst",
-                   [(br bb:$dst)]>;
-  let isIndirectBranch = 1 in {
+                   [(br bb:$dst)]> {
+    let cond = 0b111;
+  }
+  let isIndirectBranch = 1, rd = 0 in {
     // Long branches
-    def Bi  : I16ri<0, (outs), (ins i16imm:$brdst),
-                    "br\t$brdst",
-                    [(brind tblockaddress:$brdst)]>;
-    def Br  : I16rr<0, (outs), (ins GR16:$brdst),
-                    "br\t$brdst",
-                    [(brind GR16:$brdst)]>;
-    def Bm  : I16rm<0, (outs), (ins memsrc:$brdst),
-                    "br\t$brdst",
-                    [(brind (load addr:$brdst))]>;
+    def Bi  : I16ri<0b0100, (outs), (ins i16imm:$imm),
+                    "br\t$imm",
+                    [(brind tblockaddress:$imm)]>;
+    def Br  : I16rr<0b0100, (outs), (ins GR16:$rs),
+                    "br\t$rs",
+                    [(brind GR16:$rs)]>;
+    def Bm  : I16rm<0b0100, (outs), (ins memsrc:$src),
+                    "br\t$src",
+                    [(brind (load addr:$src))]>;
   }
 }
 
 // Conditional branches
 let Uses = [SR] in
-  def JCC : CJForm<0, 0,
-                   (outs), (ins jmptarget:$dst, cc:$cc),
-                   "j$cc\t$dst",
-                   [(MSP430brcc bb:$dst, imm:$cc)]>;
+  def JCC : CJForm<(outs), (ins jmptarget:$dst, cc:$cond),
+                   "j$cond\t$dst",
+                   [(MSP430brcc bb:$dst, imm:$cond)]>;
 } // isBranch, isTerminator
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
 //
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. SPW is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [R11, R12, R13, R14, R15, SR],
-      Uses = [SP] in {
-    def CALLi     : II16i<0x0,
-                          (outs), (ins i16imm:$dst),
-                          "call\t$dst", [(MSP430call imm:$dst)]>;
-    def CALLr     : II16r<0x0,
-                          (outs), (ins GR16:$dst),
-                          "call\t$dst", [(MSP430call GR16:$dst)]>;
-    def CALLm     : II16m<0x0,
-                          (outs), (ins memsrc:$dst),
-                          "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
-  }
-
+// All calls clobber the non-callee saved registers. SPW is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1,
+    Defs = [R11, R12, R13, R14, R15, SR],
+    Uses = [SP] in {
+  def CALLi     : II16i<0b101,
+                        (outs), (ins i16imm:$imm),
+                        "call\t$imm", [(MSP430call imm:$imm)]>;
+  def CALLr     : II16r<0b101,
+                        (outs), (ins GR16:$rs),
+                        "call\t$rs", [(MSP430call GR16:$rs)]>;
+  def CALLm     : II16m<0b101,
+                        (outs), (ins memsrc:$src),
+                        "call\t$src", [(MSP430call (load addr:$src))]>;
+  def CALLn     : II16n<0b101, (outs), (ins indreg:$rs), "call\t$rs", []>;
+  def CALLp     : II16p<0b101, (outs), (ins postreg:$rs), "call\t$rs", []>;
+}
 
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions...
 //
-let Defs = [SP], Uses = [SP], hasSideEffects=0 in {
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in {
 let mayLoad = 1 in
-def POP16r   : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                       (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+def POP16r   : IForm16<0b0100, DstReg, SrcPostInc, 2,
+                       (outs GR16:$rd), (ins), "pop\t$rd", []> {
+  let DecoderNamespace = "Delta";
+  let rs = 1;
+}
 
 let mayStore = 1 in
-def PUSH16r  : II16r<0x0,
-                     (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+def PUSH8r :  II8r<0b100, (outs), (ins GR8:$rs), "push.b\t$rs", []>;
+def PUSH16r : II16r<0b100, (outs), (ins GR16:$rs), "push\t$rs", []>;
+def PUSH16c : II16c<0b100, (outs), (ins cg16imm:$imm), "push\t$imm", []>;
+def PUSH16i : II16i<0b100, (outs), (ins i16imm:$imm), "push\t$imm", []>;
 }
 
 //===----------------------------------------------------------------------===//
 // Move Instructions
 
-// FIXME: Provide proper encoding!
 let hasSideEffects = 0 in {
-def MOV8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src),
-                   "mov.b\t{$src, $dst}",
+def MOV8rr  : I8rr<0b0100,
+                   (outs GR8:$rd), (ins GR8:$rs),
+                   "mov.b\t{$rs, $rd}",
                    []>;
-def MOV16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src),
-                    "mov.w\t{$src, $dst}",
+def MOV16rr : I16rr<0b0100,
+                    (outs GR16:$rd), (ins GR16:$rs),
+                    "mov\t{$rs, $rd}",
                     []>;
 }
 
-// FIXME: Provide proper encoding!
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins i8imm:$src),
-                   "mov.b\t{$src, $dst}",
-                   [(set GR8:$dst, imm:$src)]>;
-def MOV16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins i16imm:$src),
-                    "mov.w\t{$src, $dst}",
-                    [(set GR16:$dst, imm:$src)]>;
+def MOV8rc : I8rc<0b0100,
+                   (outs GR8:$rd), (ins cg8imm:$imm),
+                   "mov.b\t$imm, $rd",
+                   [(set GR8:$rd, cg8imm:$imm)]>;
+def MOV16rc : I16rc<0b0100,
+                    (outs GR16:$rd), (ins cg16imm:$imm),
+                    "mov\t$imm, $rd",
+                    [(set GR16:$rd, cg16imm:$imm)]>;
+def MOV8ri  : I8ri<0b0100,
+                   (outs GR8:$rd), (ins i8imm:$imm),
+                   "mov.b\t{$imm, $rd}",
+                   [(set GR8:$rd, imm:$imm)]>;
+def MOV16ri : I16ri<0b0100,
+                    (outs GR16:$rd), (ins i16imm:$imm),
+                    "mov\t{$imm, $rd}",
+                    [(set GR16:$rd, imm:$imm)]>;
 }
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def MOV8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins memsrc:$src),
-                   "mov.b\t{$src, $dst}",
-                   [(set GR8:$dst, (load addr:$src))]>;
-def MOV16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins memsrc:$src),
-                    "mov.w\t{$src, $dst}",
-                    [(set GR16:$dst, (load addr:$src))]>;
+def MOV8rm  : I8rm<0b0100,
+                   (outs GR8:$rd), (ins memsrc:$src),
+                   "mov.b\t{$src, $rd}",
+                   [(set GR8:$rd, (load addr:$src))]>;
+def MOV16rm : I16rm<0b0100,
+                    (outs GR16:$rd), (ins memsrc:$src),
+                    "mov\t{$src, $rd}",
+                    [(set GR16:$rd, (load addr:$src))]>;
+def MOV8rn  : I8rn<0b0100,
+                   (outs GR8:$rd), (ins indreg:$rs),
+                   "mov.b\t{$rs, $rd}",
+                   [(set GR8:$rd, (load addr:$rs))]>;
+def MOV16rn : I16rn<0b0100,
+                    (outs GR16:$rd), (ins indreg:$rs),
+                    "mov\t{$rs, $rd}",
+                    [(set GR16:$rd, (load addr:$rs))]>;
+}
+
+let isCodeGenOnly = 1 in {
+def MOVZX16rr8 : I8rr<0b0100,
+                      (outs GR16:$rd), (ins GR8:$rs),
+                      "mov.b\t{$rs, $rd}",
+                      [(set GR16:$rd, (zext GR8:$rs))]>;
+def MOVZX16rm8 : I8rm<0b0100,
+                      (outs GR16:$rd), (ins memsrc:$src),
+                      "mov.b\t{$src, $rd}",
+                      [(set GR16:$rd, (zextloadi16i8 addr:$src))]>;
 }
 
-def MOVZX16rr8 : I8rr<0x0,
-                      (outs GR16:$dst), (ins GR8:$src),
-                      "mov.b\t{$src, $dst}",
-                      [(set GR16:$dst, (zext GR8:$src))]>;
-def MOVZX16rm8 : I8rm<0x0,
-                      (outs GR16:$dst), (ins memsrc:$src),
-                      "mov.b\t{$src, $dst}",
-                      [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
-def MOV8rm_POST  : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
-                         (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
-                         "mov.b\t{@$base+, $dst}", []>;
-def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                           (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
-                           "mov.w\t{@$base+, $dst}", []>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$rs = $wb" in {
+def MOV8rp  : I8rp<0b0100,
+                   (outs GR8:$rd, GR16:$wb), (ins postreg:$rs),
+                   "mov.b\t{$rs, $rd}", []>;
+def MOV16rp : I16rp<0b0100,
+                    (outs GR16:$rd, GR16:$wb), (ins postreg:$rs),
+                    "mov\t{$rs, $rd}", []>;
 }
 
 // Any instruction that defines a 8-bit result leaves the high half of the
@@ -313,821 +400,557 @@ def def8 : PatLeaf<(i8 GR8:$src), [{
 def : Pat<(i16 (zext def8:$src)),
           (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
 
-def MOV8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "mov.b\t{$src, $dst}",
-                   [(store (i8 imm:$src), addr:$dst)]>;
-def MOV16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "mov.w\t{$src, $dst}",
-                    [(store (i16 imm:$src), addr:$dst)]>;
-
-def MOV8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "mov.b\t{$src, $dst}",
-                   [(store GR8:$src, addr:$dst)]>;
-def MOV16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "mov.w\t{$src, $dst}",
-                    [(store GR16:$src, addr:$dst)]>;
-
-def MOV8mm  : I8mm<0x0,
+def MOV8mc  : I8mc<0b0100,
+                   (outs), (ins memdst:$dst, cg8imm:$imm),
+                   "mov.b\t{$imm, $dst}",
+                   [(store (i8 cg8imm:$imm), addr:$dst)]>;
+def MOV16mc : I16mc<0b0100,
+                    (outs), (ins memdst:$dst, cg16imm:$imm),
+                    "mov\t{$imm, $dst}",
+                    [(store (i16 cg16imm:$imm), addr:$dst)]>;
+
+def MOV8mi  : I8mi<0b0100,
+                   (outs), (ins memdst:$dst, i8imm:$imm),
+                   "mov.b\t{$imm, $dst}",
+                   [(store (i8 imm:$imm), addr:$dst)]>;
+def MOV16mi : I16mi<0b0100,
+                    (outs), (ins memdst:$dst, i16imm:$imm),
+                    "mov\t{$imm, $dst}",
+                    [(store (i16 imm:$imm), addr:$dst)]>;
+
+def MOV8mr  : I8mr<0b0100,
+                   (outs), (ins memdst:$dst, GR8:$rs),
+                   "mov.b\t{$rs, $dst}",
+                   [(store GR8:$rs, addr:$dst)]>;
+def MOV16mr : I16mr<0b0100,
+                    (outs), (ins memdst:$dst, GR16:$rs),
+                    "mov\t{$rs, $dst}",
+                    [(store GR16:$rs, addr:$dst)]>;
+
+def MOV8mm  : I8mm<0b0100,
                    (outs), (ins memdst:$dst, memsrc:$src),
                    "mov.b\t{$src, $dst}",
                    [(store (i8 (load addr:$src)), addr:$dst)]>;
-def MOV16mm : I16mm<0x0,
+def MOV16mm : I16mm<0b0100,
                     (outs), (ins memdst:$dst, memsrc:$src),
-                    "mov.w\t{$src, $dst}",
+                    "mov\t{$src, $dst}",
                     [(store (i16 (load addr:$src)), addr:$dst)]>;
 
+def MOV8mn  : I8mn<0b0100, (outs), (ins memdst:$dst, indreg:$rs),
+                   "mov.b\t{$rs, $dst}", []>;
+def MOV16mn : I16mn<0b0100, (outs), (ins memdst:$dst, indreg:$rs),
+                    "mov\t{$rs, $dst}", []>;
+
 //===----------------------------------------------------------------------===//
 // Arithmetic Instructions
 
-let Constraints = "$src = $dst" in {
-
-let Defs = [SR] in {
-
-let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
-
-def ADD8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "add.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+multiclass Arith<bits<4> opcode, string asmstring, SDNode node,
+                 bit commutes, list<Register> uses> {
+  let Defs = [SR], Uses = uses in {
+  let Constraints = "$src2 = $rd" in {
+  let isCommutable = commutes in {
+  def 8rr : I8rr<opcode, (outs GR8:$rd), (ins GR8:$src2, GR8:$rs),
+                 !strconcat(asmstring, ".b\t$rs, $rd"),
+                 [(set GR8:$rd, (node GR8:$src2, GR8:$rs)),
+                  (implicit SR)]>;
+  def 16rr : I16rr<opcode, (outs GR16:$rd), (ins GR16:$src2, GR16:$rs),
+                   !strconcat(asmstring, "\t$rs, $rd"),
+                   [(set GR16:$rd, (node GR16:$src2, GR16:$rs)),
                     (implicit SR)]>;
-def ADD16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "add.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
-                     (implicit SR)]>;
-}
-
-def ADD8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "add.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+  }
+  def 8rm : I8rm<opcode, (outs GR8:$rd), (ins GR8:$src2, memsrc:$src),
+                 !strconcat(asmstring, ".b\t$src, $rd"),
+                 [(set GR8:$rd, (node GR8:$src2, (load addr:$src))),
+                  (implicit SR)]>;
+  def 16rm : I16rm<opcode, (outs GR16:$rd), (ins GR16:$src2, memsrc:$src),
+                   !strconcat(asmstring, "\t$src, $rd"),
+                   [(set GR16:$rd, (node GR16:$src2, (load addr:$src))),
                     (implicit SR)]>;
-def ADD16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "add.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
-                     (implicit SR)]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
-Constraints = "$base = $base_wb, $src = $dst" in {
-def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
-                         (outs GR8:$dst, GR16:$base_wb),
-                         (ins GR8:$src, GR16:$base),
-                         "add.b\t{@$base+, $dst}", []>;
-def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                           (outs GR16:$dst, GR16:$base_wb),
-                           (ins GR16:$src, GR16:$base),
-                          "add.w\t{@$base+, $dst}", []>;
-}
-
-
-def ADD8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                   "add.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+  def 8rn : I8rn<opcode, (outs GR8:$rd), (ins GR8:$src2, indreg:$rs),
+                 !strconcat(asmstring, ".b\t$rs, $rd"), []>;
+  def 16rn : I16rn<opcode, (outs GR16:$rd), (ins GR16:$src2, indreg:$rs),
+                   !strconcat(asmstring, "\t$rs, $rd"), []>;
+  let mayLoad = 1,
+      hasExtraDefRegAllocReq = 1,
+      Constraints = "$rs = $wb, $src2 = $rd" in {
+  def 8rp : I8rp<opcode, (outs GR8:$rd, GR16:$wb), (ins GR8:$src2, postreg:$rs),
+                 !strconcat(asmstring, ".b\t$rs, $rd"), []>;
+  def 16rp : I16rp<opcode, (outs GR16:$rd, GR16:$wb), (ins GR16:$src2, postreg:$rs),
+                   !strconcat(asmstring, "\t$rs, $rd"), []>;
+  }
+  def 8rc : I8rc<opcode, (outs GR8:$rd), (ins GR8:$src2, cg8imm:$imm),
+                 !strconcat(asmstring, ".b\t$imm, $rd"),
+                 [(set GR8:$rd, (node GR8:$src2, cg8imm:$imm)),
+                  (implicit SR)]>;
+  def 16rc : I16rc<opcode, (outs GR16:$rd), (ins GR16:$src2, cg16imm:$imm),
+                 !strconcat(asmstring, "\t$imm, $rd"),
+                 [(set GR16:$rd, (node GR16:$src2, cg16imm:$imm)),
+                  (implicit SR)]>;
+  def 8ri : I8ri<opcode, (outs GR8:$rd), (ins GR8:$src2, i8imm:$imm),
+                 !strconcat(asmstring, ".b\t$imm, $rd"),
+                 [(set GR8:$rd, (node GR8:$src2, imm:$imm)),
+                  (implicit SR)]>;
+  def 16ri : I16ri<opcode, (outs GR16:$rd), (ins GR16:$src2, i16imm:$imm),
+                 !strconcat(asmstring, "\t$imm, $rd"),
+                 [(set GR16:$rd, (node GR16:$src2, imm:$imm)),
+                  (implicit SR)]>;
+  }
+  def 8mr : I8mr<opcode, (outs), (ins memdst:$dst, GR8:$rs),
+                 !strconcat(asmstring, ".b\t$rs, $dst"),
+                 [(store (node (load addr:$dst), GR8:$rs), addr:$dst),
+                  (implicit SR)]>;
+  def 16mr : I16mr<opcode, (outs), (ins memdst:$dst, GR16:$rs),
+                   !strconcat(asmstring, "\t$rs, $dst"),
+                   [(store (node (load addr:$dst), GR16:$rs), addr:$dst),
                     (implicit SR)]>;
-def ADD16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                    "add.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (add GR16:$src, imm:$src2)),
-                     (implicit SR)]>;
-
-let Constraints = "" in {
-def ADD8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "add.b\t{$src, $dst}",
-                   [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+  def 8mc : I8mc<opcode, (outs), (ins memdst:$dst, cg8imm:$imm),
+                 !strconcat(asmstring, ".b\t$imm, $dst"),
+                 [(store (node (load addr:$dst), (i8 cg8imm:$imm)), addr:$dst),
+                  (implicit SR)]>;
+  def 16mc : I16mc<opcode, (outs), (ins memdst:$dst, cg16imm:$imm),
+                   !strconcat(asmstring, "\t$imm, $dst"),
+                   [(store (node (load addr:$dst), (i16 cg16imm:$imm)), addr:$dst),
                     (implicit SR)]>;
-def ADD16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "add.w\t{$src, $dst}",
-                    [(store (add (load addr:$dst), GR16:$src), addr:$dst),
-                     (implicit SR)]>;
-
-def ADD8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "add.b\t{$src, $dst}",
-                   [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+  def 8mi : I8mi<opcode, (outs), (ins memdst:$dst, i8imm:$imm),
+                 !strconcat(asmstring, ".b\t$imm, $dst"),
+                 [(store (node (load addr:$dst), (i8 imm:$imm)), addr:$dst),
+                  (implicit SR)]>;
+  def 16mi : I16mi<opcode, (outs), (ins memdst:$dst, i16imm:$imm),
+                   !strconcat(asmstring, "\t$imm, $dst"),
+                   [(store (node (load addr:$dst), (i16 imm:$imm)), addr:$dst),
                     (implicit SR)]>;
-def ADD16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "add.w\t{$src, $dst}",
-                    [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
-                     (implicit SR)]>;
-
-def ADD8mm  : I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "add.b\t{$src, $dst}",
-                   [(store (add (load addr:$dst), 
-                                (i8 (load addr:$src))), addr:$dst),
+  def 8mm : I8mm<opcode, (outs), (ins memdst:$dst, memsrc:$src),
+                 !strconcat(asmstring, ".b\t$src, $dst"),
+                 [(store (node (load addr:$dst), 
+                               (i8 (load addr:$src))), addr:$dst),
+                  (implicit SR)]>;
+  def 16mm : I16mm<opcode, (outs), (ins memdst:$dst, memsrc:$src),
+                   !strconcat(asmstring, "\t$src, $dst"),
+                   [(store (node (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
                     (implicit SR)]>;
-def ADD16mm : I16mm<0x0,
-                    (outs), (ins memdst:$dst, memsrc:$src),
-                    "add.w\t{$src, $dst}",
-                    [(store (add (load addr:$dst), 
-                                  (i16 (load addr:$src))), addr:$dst),
-                     (implicit SR)]>;
+  def 8mn : I8mn<opcode, (outs), (ins memdst:$dst, indreg:$rs),
+                 !strconcat(asmstring, ".b\t$rs, $dst"), []>;
+  def 16mn : I16mn<opcode, (outs), (ins memdst:$dst, indreg:$rs),
+                   !strconcat(asmstring, "\t$rs, $dst"), []>;
+  def 8mp : I8mp<opcode, (outs), (ins memdst:$dst, postreg:$rs),
+                 !strconcat(asmstring, ".b\t$rs, $dst"), []>;
+  def 16mp : I16mp<opcode, (outs), (ins memdst:$dst, postreg:$rs),
+                   !strconcat(asmstring, "\t$rs, $dst"), []>;
+  }
 }
 
-let Uses = [SR] in {
-
-let isCommutable = 1 in { // X = ADDC Y, Z  == X = ADDC Z, Y
-def ADC8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "addc.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
-                    (implicit SR)]>;
-def ADC16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "addc.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
-                     (implicit SR)]>;
-} // isCommutable
-
-def ADC8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                   "addc.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
-                    (implicit SR)]>;
-def ADC16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                    "addc.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
-                     (implicit SR)]>;
-
-def ADC8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "addc.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
-                    (implicit SR)]>;
-def ADC16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "addc.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
-                     (implicit SR)]>;
+defm ADD  : Arith<0b0101, "add",  add,  1, []>;
+defm ADDC : Arith<0b0110, "addc", adde, 1, [SR]>;
+defm AND  : Arith<0b1111, "and",  and,  1, []>;
+defm BIS  : Arith<0b1101, "bis",  or,   1, []>;
+defm BIC  : Arith<0b1100, "bic",  bic,  0, []>;
+defm XOR  : Arith<0b1110, "xor",  xor,  1, []>;
+defm SUB  : Arith<0b1000, "sub",  sub,  0, []>;
+defm SUBC : Arith<0b0111, "subc", sube, 0, [SR]>;
+defm DADD : Arith<0b1010, "dadd", MSP430dadd, 1, [SR]>;
+
+def ADC8r   : InstAlias<"adc.b\t$dst",  (ADDC8rc   GR8:$dst,     0)>;
+def ADC16r  : InstAlias<"adc\t$dst",    (ADDC16rc  GR16:$dst,    0)>;
+def ADC8m   : InstAlias<"adc.b\t$dst",  (ADDC8mc   memdst:$dst,  0)>;
+def ADC16m  : InstAlias<"adc\t$dst",    (ADDC16mc  memdst:$dst,  0)>;
+
+def DADC8r  : InstAlias<"dadc.b\t$dst", (DADD8rc   GR8:$dst,     0)>;
+def DADC16r : InstAlias<"dadc\t$dst",   (DADD16rc  GR16:$dst,    0)>;
+def DADC8m  : InstAlias<"dadc.b\t$dst", (DADD8mc   memdst:$dst,  0)>;
+def DADC16m : InstAlias<"dadc\t$dst",   (DADD16mc  memdst:$dst,  0)>;
+
+def DEC8r   : InstAlias<"dec.b\t$dst",  (SUB8rc    GR8:$dst,     1)>;
+def DEC16r  : InstAlias<"dec\t$dst",    (SUB16rc   GR16:$dst,    1)>;
+def DEC8m   : InstAlias<"dec.b\t$dst",  (SUB8mc    memdst:$dst,  1)>;
+def DEC16m  : InstAlias<"dec\t$dst",    (SUB16mc   memdst:$dst,  1)>;
+
+def DECD8r  : InstAlias<"decd.b\t$dst", (SUB8rc    GR8:$dst,     2)>;
+def DECD16r : InstAlias<"decd\t$dst",   (SUB16rc   GR16:$dst,    2)>;
+def DECD8m  : InstAlias<"decd.b\t$dst", (SUB8mc    memdst:$dst,  2)>;
+def DECD16m : InstAlias<"decd\t$dst",   (SUB16mc   memdst:$dst,  2)>;
+
+def INC8r   : InstAlias<"inc.b\t$dst",  (ADD8rc    GR8:$dst,     1)>;
+def INC16r  : InstAlias<"inc\t$dst",    (ADD16rc   GR16:$dst,    1)>;
+def INC8m   : InstAlias<"inc.b\t$dst",  (ADD8mc    memdst:$dst,  1)>;
+def INC16m  : InstAlias<"inc\t$dst",    (ADD16mc   memdst:$dst,  1)>;
+
+def INCD8r  : InstAlias<"incd.b\t$dst", (ADD8rc    GR8:$dst,     2)>;
+def INCD16r : InstAlias<"incd\t$dst",   (ADD16rc   GR16:$dst,    2)>;
+def INCD8m  : InstAlias<"incd.b\t$dst", (ADD8mc    memdst:$dst,  2)>;
+def INCD16m : InstAlias<"incd\t$dst",   (ADD16mc   memdst:$dst,  2)>;
+
+def SBC8r   : InstAlias<"sbc.b\t$dst",  (SUBC8rc   GR8:$dst,     0)>;
+def SBC16r  : InstAlias<"sbc\t$dst",    (SUBC16rc  GR16:$dst,    0)>;
+def SBC8m   : InstAlias<"sbc.b\t$dst",  (SUBC8mc   memdst:$dst,  0)>;
+def SBC16m  : InstAlias<"sbc\t$dst",    (SUBC16mc  memdst:$dst,  0)>;
+
+def INV8r   : InstAlias<"inv.b\t$dst",  (XOR8rc    GR8:$dst,    -1)>;
+def INV16r  : InstAlias<"inv\t$dst",    (XOR16rc   GR16:$dst,   -1)>;
+def INV8m   : InstAlias<"inv.b\t$dst",  (XOR8mc    memdst:$dst, -1)>;
+def INV16m  : InstAlias<"inv\t$dst",    (XOR16mc   memdst:$dst, -1)>;
+
+// printAliasInstr() doesn't check $dst operands are actually equal
+// for RLA and RLC aliases below, so disable printing aliases.
+
+def RLA8r   : InstAlias<"rla.b\t$dst",  (ADD8rr    GR8:$dst,     GR8:$dst),    0>;
+def RLA16r  : InstAlias<"rla\t$dst",    (ADD16rr   GR16:$dst,    GR16:$dst),   0>;
+def RLA8m   : InstAlias<"rla.b\t$dst",  (ADD8mm    memdst:$dst,  memdst:$dst), 0>;
+def RLA16m  : InstAlias<"rla\t$dst",    (ADD16mm   memdst:$dst,  memdst:$dst), 0>;
+
+def RLC8r   : InstAlias<"rlc.b\t$dst",  (ADDC8rr   GR8:$dst,     GR8:$dst),    0>;
+def RLC16r  : InstAlias<"rlc\t$dst",    (ADDC16rr  GR16:$dst,    GR16:$dst),   0>;
+def RLC8m   : InstAlias<"rlc.b\t$dst",  (ADDC8mm   memdst:$dst,  memdst:$dst), 0>;
+def RLC16m  : InstAlias<"rlc\t$dst",    (ADDC16mm  memdst:$dst,  memdst:$dst), 0>;
+
+def DINT : InstAlias<"dint", (BIC16rc SR, 8)>;
+def EINT : InstAlias<"eint", (BIS16rc SR, 8)>;
+
+def NOP  : InstAlias<"nop",  (MOV16rc CG, 0)>;
+
+def CLR8r   : InstAlias<"clr.b\t$dst",  (MOV8rc    GR8:$dst,     0)>;
+def CLR16r  : InstAlias<"clr\t$dst",    (MOV16rc   GR16:$dst,    0)>;
+def CLR8m   : InstAlias<"clr.b\t$dst",  (MOV8mc    memdst:$dst,  0)>;
+def CLR16m  : InstAlias<"clr\t$dst",    (MOV16mc   memdst:$dst,  0)>;
+
+def CLRC : InstAlias<"clrc", (BIC16rc SR, 1)>;
+def CLRN : InstAlias<"clrn", (BIC16rc SR, 4)>;
+def CLRZ : InstAlias<"clrz", (BIC16rc SR, 2)>;
+def SETC : InstAlias<"setc", (BIS16rc SR, 1)>;
+def SETN : InstAlias<"setn", (BIS16rc SR, 4)>;
+def SETZ : InstAlias<"setz", (BIS16rc SR, 2)>;
+
+def : Pat<(MSP430rla GR8:$dst),  (ADD8rr  $dst, $dst)>;
+def : Pat<(MSP430rla GR16:$dst), (ADD16rr $dst, $dst)>;
+
+// Format-II (Single Operand) Instruction
+// Register mode
+let Constraints = "$rs = $rd" in {
 
-let Constraints = "" in {
-def ADC8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "addc.b\t{$src, $dst}",
-                   [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit SR)]>;
-def ADC16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "addc.w\t{$src, $dst}",
-                    [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
-                     (implicit SR)]>;
-
-def ADC8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "addc.b\t{$src, $dst}",
-                   [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+let Defs = [SR] in {
+def RRA8r :   II8r<0b010,
+                   (outs GR8:$rd), (ins GR8:$rs),
+                   "rra.b\t$rd",
+                   [(set GR8:$rd, (MSP430rra GR8:$rs)),
                     (implicit SR)]>;
-def ADC16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "addc.w\t{$src, $dst}",
-                    [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+def RRA16r : II16r<0b010,
+                    (outs GR16:$rd), (ins GR16:$rs),
+                    "rra\t$rd",
+                    [(set GR16:$rd, (MSP430rra GR16:$rs)),
                      (implicit SR)]>;
 
-def ADC8mm  : I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "addc.b\t{$src, $dst}",
-                   [(store (adde (load addr:$dst), 
-                                 (i8 (load addr:$src))), addr:$dst),
+let Uses = [SR] in {
+def RRC8r :   II8r<0b000,
+                   (outs GR8:$rd), (ins GR8:$rs),
+                   "rrc.b\t$rd",
+                   [(set GR8:$rd, (MSP430rrc GR8:$rs)),
                     (implicit SR)]>;
-def ADC16mm : I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "addc.w\t{$src, $dst}",
-                   [(store (adde (load addr:$dst), 
-                                 (i16 (load addr:$src))), addr:$dst),
+def RRC16r : II16r<0b000,
+                   (outs GR16:$rd), (ins GR16:$rs),
+                   "rrc\t$rd",
+                   [(set GR16:$rd, (MSP430rrc GR16:$rs)),
                     (implicit SR)]>;
-}
-
 } // Uses = [SR]
 
-let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
-def AND8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "and.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
-                    (implicit SR)]>;
-def AND16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "and.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+def SEXT16r : II16r<0b011,
+                    (outs GR16:$rd), (ins GR16:$rs),
+                    "sxt\t$rd",
+                    [(set GR16:$rd, (sext_inreg GR16:$rs, i8)),
                      (implicit SR)]>;
-}
 
-def AND8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                   "and.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (and GR8:$src, imm:$src2)),
-                    (implicit SR)]>;
-def AND16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                    "and.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (and GR16:$src, imm:$src2)),
-                     (implicit SR)]>;
+} // Defs = [SR]
 
-def AND8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "and.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
-                    (implicit SR)]>;
-def AND16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "and.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
-                     (implicit SR)]>;
+let isCodeGenOnly = 1 in
+def ZEXT16r : I8rr<0b0100,
+                   (outs GR16:$rd), (ins GR16:$rs),
+                   "mov.b\t{$rs, $rd}",
+                   [(set GR16:$rd, (zext (trunc GR16:$rs)))]>;
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
-Constraints = "$base = $base_wb, $src = $dst" in {
-def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
-                         (outs GR8:$dst, GR16:$base_wb),
-                         (ins GR8:$src, GR16:$base),
-                         "and.b\t{@$base+, $dst}", []>;
-def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                           (outs GR16:$dst, GR16:$base_wb),
-                           (ins GR16:$src, GR16:$base),
-                           "and.w\t{@$base+, $dst}", []>;
-}
+def SWPB16r : II16r<0b001,
+                    (outs GR16:$rd), (ins GR16:$rs),
+                    "swpb\t$rd",
+                    [(set GR16:$rd, (bswap GR16:$rs))]>;
 
-let Constraints = "" in {
-def AND8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "and.b\t{$src, $dst}",
-                   [(store (and (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit SR)]>;
-def AND16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "and.w\t{$src, $dst}",
-                    [(store (and (load addr:$dst), GR16:$src), addr:$dst),
-                     (implicit SR)]>;
+} // Constraints = "$src = $dst"
 
-def AND8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "and.b\t{$src, $dst}",
-                   [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+// Indexed, indirect register and indirect autoincrement modes
+let Defs = [SR] in {
+def RRA8m  : II8m<0b010,
+                   (outs), (ins memsrc:$src),
+                   "rra.b\t$src",
+                   [(store (MSP430rra (i8 (load addr:$src))), addr:$src),
                     (implicit SR)]>;
-def AND16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "and.w\t{$src, $dst}",
-                    [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
-                     (implicit SR)]>;
-
-def AND8mm  : I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "and.b\t{$src, $dst}",
-                   [(store (and (load addr:$dst), 
-                                (i8 (load addr:$src))), addr:$dst),
+def RRA16m : II16m<0b010,
+                   (outs), (ins memsrc:$src),
+                   "rra\t$src",
+                   [(store (MSP430rra (i16 (load addr:$src))), addr:$src),
                     (implicit SR)]>;
-def AND16mm : I16mm<0x0,
-                    (outs), (ins memdst:$dst, memsrc:$src),
-                    "and.w\t{$src, $dst}",
-                    [(store (and (load addr:$dst), 
-                                 (i16 (load addr:$src))), addr:$dst),
-                     (implicit SR)]>;
-}
-
-let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
-def OR8rr  : I8rr<0x0,
-                  (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                  "bis.b\t{$src2, $dst}",
-                  [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
-def OR16rr : I16rr<0x0,
-                   (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                   "bis.w\t{$src2, $dst}",
-                   [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
-}
-
-def OR8ri  : I8ri<0x0,
-                  (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                  "bis.b\t{$src2, $dst}",
-                  [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
-def OR16ri : I16ri<0x0,
-                   (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                   "bis.w\t{$src2, $dst}",
-                   [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
-
-def OR8rm  : I8rm<0x0,
-                  (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                  "bis.b\t{$src2, $dst}",
-                  [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
-def OR16rm : I16rm<0x0,
-                   (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                   "bis.w\t{$src2, $dst}",
-                   [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
-Constraints = "$base = $base_wb, $src = $dst" in {
-def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
-                        (outs GR8:$dst, GR16:$base_wb),
-                        (ins GR8:$src, GR16:$base),
-                        "bis.b\t{@$base+, $dst}", []>;
-def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                          (outs GR16:$dst, GR16:$base_wb),
-                          (ins GR16:$src, GR16:$base),
-                          "bis.w\t{@$base+, $dst}", []>;
-}
-
-let Constraints = "" in {
-def OR8mr  : I8mr<0x0,
-                  (outs), (ins memdst:$dst, GR8:$src),
-                  "bis.b\t{$src, $dst}",
-                  [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
-def OR16mr : I16mr<0x0,
-                   (outs), (ins memdst:$dst, GR16:$src),
-                   "bis.w\t{$src, $dst}",
-                   [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
-
-def OR8mi  : I8mi<0x0, 
-                  (outs), (ins memdst:$dst, i8imm:$src),
-                  "bis.b\t{$src, $dst}",
-                  [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def OR16mi : I16mi<0x0,
-                   (outs), (ins memdst:$dst, i16imm:$src),
-                   "bis.w\t{$src, $dst}",
-                   [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
-
-def OR8mm  : I8mm<0x0,
-                  (outs), (ins memdst:$dst, memsrc:$src),
-                  "bis.b\t{$src, $dst}",
-                  [(store (or (i8 (load addr:$dst)),
-                              (i8 (load addr:$src))), addr:$dst)]>;
-def OR16mm : I16mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "bis.w\t{$src, $dst}",
-                   [(store (or (i16 (load addr:$dst)),
-                               (i16 (load addr:$src))), addr:$dst)]>;
-}
-
-// bic does not modify condition codes
-def BIC8rr :  I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "bic.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
-def BIC16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "bic.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
-
-def BIC8rm :  I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "bic.b\t{$src2, $dst}",
-                    [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
-def BIC16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "bic.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
-
-let Constraints = "" in {
-def BIC8mr :  I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "bic.b\t{$src, $dst}",
-                   [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
-def BIC16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "bic.w\t{$src, $dst}",
-                    [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
-
-def BIC8mm :  I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "bic.b\t{$src, $dst}",
-                   [(store (and (load addr:$dst),
-                                (not (i8 (load addr:$src)))), addr:$dst)]>;
-def BIC16mm : I16mm<0x0,
-                    (outs), (ins memdst:$dst, memsrc:$src),
-                    "bic.w\t{$src, $dst}",
-                    [(store (and (load addr:$dst),
-                                 (not (i16 (load addr:$src)))), addr:$dst)]>;
-}
 
-let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
-def XOR8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "xor.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
-                    (implicit SR)]>;
-def XOR16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "xor.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
-                     (implicit SR)]>;
-}
+def RRA8n  : II8n<0b010, (outs), (ins indreg:$rs), "rra.b\t$rs", []>;
+def RRA16n : II16n<0b010, (outs), (ins indreg:$rs), "rra\t$rs", []>;
+def RRA8p  : II8p<0b010, (outs), (ins postreg:$rs), "rra.b\t$rs", []>;
+def RRA16p : II16p<0b010, (outs), (ins postreg:$rs), "rra\t$rs", []>;
 
-def XOR8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                   "xor.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+let Uses = [SR] in {
+def RRC8m  : II8m<0b000,
+                   (outs), (ins memsrc:$src),
+                   "rrc.b\t$src",
+                   [(store (MSP430rrc (i8 (load addr:$src))), addr:$src),
                     (implicit SR)]>;
-def XOR16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                    "xor.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
-                     (implicit SR)]>;
-
-def XOR8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "xor.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+def RRC16m : II16m<0b000,
+                   (outs), (ins memsrc:$src),
+                   "rrc\t$src",
+                   [(store (MSP430rrc (i16 (load addr:$src))), addr:$src),
                     (implicit SR)]>;
-def XOR16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "xor.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
-                     (implicit SR)]>;
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
-Constraints = "$base = $base_wb, $src = $dst" in {
-def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
-                         (outs GR8:$dst, GR16:$base_wb),
-                         (ins GR8:$src, GR16:$base),
-                         "xor.b\t{@$base+, $dst}", []>;
-def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                           (outs GR16:$dst, GR16:$base_wb),
-                           (ins GR16:$src, GR16:$base),
-                           "xor.w\t{@$base+, $dst}", []>;
-}
+def RRC8n  : II8n<0b000, (outs), (ins indreg:$rs), "rrc.b\t$rs", []>;
+def RRC16n : II16n<0b000, (outs), (ins indreg:$rs), "rrc\t$rs", []>;
+def RRC8p  : II8p<0b000, (outs), (ins postreg:$rs), "rrc.b\t$rs", []>;
+def RRC16p : II16p<0b000, (outs), (ins postreg:$rs), "rrc\t$rs", []>;
 
-let Constraints = "" in {
-def XOR8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "xor.b\t{$src, $dst}",
-                   [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit SR)]>;
-def XOR16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "xor.w\t{$src, $dst}",
-                    [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
-                     (implicit SR)]>;
-
-def XOR8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "xor.b\t{$src, $dst}",
-                   [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
-                    (implicit SR)]>;
-def XOR16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "xor.w\t{$src, $dst}",
-                    [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
-                     (implicit SR)]>;
+} // Uses = [SR]
 
-def XOR8mm  : I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "xor.b\t{$src, $dst}",
-                   [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
-                    (implicit SR)]>;
-def XOR16mm : I16mm<0x0,
-                    (outs), (ins memdst:$dst, memsrc:$src),
-                    "xor.w\t{$src, $dst}",
-                    [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+def SEXT16m : II16m<0b011,
+                    (outs), (ins memsrc:$src),
+                    "sxt\t$src",
+                    [(store (sext_inreg (extloadi16i8 addr:$src), i8),
+                             addr:$src),
                      (implicit SR)]>;
-}
+def SEXT16n : II16n<0b011, (outs), (ins indreg:$rs), "sxt\t$rs", []>;
+def SEXT16p : II16p<0b011, (outs), (ins postreg:$rs), "sxt\t$rs", []>;
 
+} // Defs = [SR]
 
-def SUB8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "sub.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
-                    (implicit SR)]>;
-def SUB16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "sub.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
-                     (implicit SR)]>;
+def SWPB16m : II16m<0b001,
+                   (outs), (ins memsrc:$src),
+                   "swpb\t$src",
+                   [(store (bswap (i16 (load addr:$src))), addr:$src)]>;
+def SWPB16n : II16n<0b001, (outs), (ins indreg:$rs), "swpb\t$rs", []>;
+def SWPB16p : II16p<0b001, (outs), (ins postreg:$rs), "swpb\t$rs", []>;
 
-def SUB8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                   "sub.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+// Integer comparisons
+let Defs = [SR] in {
+def CMP8rr  : I8rr<0b1001,
+                   (outs), (ins GR8:$rd, GR8:$rs),
+                   "cmp.b\t$rs, $rd",
+                   [(MSP430cmp GR8:$rd, GR8:$rs), (implicit SR)]>;
+def CMP16rr : I16rr<0b1001,
+                    (outs), (ins GR16:$rd, GR16:$rs),
+                    "cmp\t$rs, $rd",
+                    [(MSP430cmp GR16:$rd, GR16:$rs), (implicit SR)]>;
+
+def CMP8rc  : I8rc<0b1001,
+                   (outs), (ins GR8:$rd, cg8imm:$imm),
+                   "cmp.b\t$imm, $rd",
+                   [(MSP430cmp GR8:$rd, cg8imm:$imm), (implicit SR)]>;
+def CMP16rc : I16rc<0b1001,
+                    (outs), (ins GR16:$rd, cg16imm:$imm),
+                    "cmp\t$imm, $rd",
+                    [(MSP430cmp GR16:$rd, cg16imm:$imm), (implicit SR)]>;
+
+def CMP8ri  : I8ri<0b1001,
+                   (outs), (ins GR8:$rd, i8imm:$imm),
+                   "cmp.b\t$imm, $rd",
+                   [(MSP430cmp GR8:$rd, imm:$imm), (implicit SR)]>;
+def CMP16ri : I16ri<0b1001,
+                    (outs), (ins GR16:$rd, i16imm:$imm),
+                    "cmp\t$imm, $rd",
+                    [(MSP430cmp GR16:$rd, imm:$imm), (implicit SR)]>;
+
+def CMP8mc  : I8mc<0b1001,
+                   (outs), (ins memsrc:$dst, cg8imm:$imm),
+                   "cmp.b\t$imm, $dst",
+                   [(MSP430cmp (load addr:$dst), (i8 cg8imm:$imm)),
                     (implicit SR)]>;
-def SUB16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                    "sub.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+def CMP16mc : I16mc<0b1001,
+                    (outs), (ins memsrc:$dst, cg16imm:$imm),
+                    "cmp\t$imm, $dst",
+                    [(MSP430cmp (load addr:$dst), (i16 cg16imm:$imm)),
                      (implicit SR)]>;
 
-def SUB8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "sub.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+def CMP8mi  : I8mi<0b1001,
+                   (outs), (ins memsrc:$dst, i8imm:$imm),
+                   "cmp.b\t$imm, $dst",
+                   [(MSP430cmp (load addr:$dst),
+                               (i8 imm:$imm)), (implicit SR)]>;
+def CMP16mi : I16mi<0b1001,
+                    (outs), (ins memsrc:$dst, i16imm:$imm),
+                    "cmp\t$imm, $dst",
+                     [(MSP430cmp (load addr:$dst),
+                                 (i16 imm:$imm)), (implicit SR)]>;
+
+def CMP8rm  : I8rm<0b1001,
+                   (outs), (ins GR8:$rd, memsrc:$src),
+                   "cmp.b\t$src, $rd",
+                   [(MSP430cmp GR8:$rd, (load addr:$src)), 
                     (implicit SR)]>;
-def SUB16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "sub.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+def CMP16rm : I16rm<0b1001,
+                    (outs), (ins GR16:$rd, memsrc:$src),
+                    "cmp\t$src, $rd",
+                    [(MSP430cmp GR16:$rd, (load addr:$src)),
                      (implicit SR)]>;
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
-Constraints = "$base = $base_wb, $src = $dst" in {
-def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
-                         (outs GR8:$dst, GR16:$base_wb),
-                         (ins GR8:$src, GR16:$base),
-                         "sub.b\t{@$base+, $dst}", []>;
-def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
-                          (outs GR16:$dst, GR16:$base_wb),
-                          (ins GR16:$src, GR16:$base),
-                          "sub.w\t{@$base+, $dst}", []>;
-}
+def CMP8rn  : I8rn<0b1001,
+                   (outs), (ins GR8:$rd, indreg:$rs), "cmp.b\t$rs, $rd", []>;
+def CMP16rn : I16rn<0b1001,
+                    (outs), (ins GR16:$rd, indreg:$rs), "cmp\t$rs, $rd", []>;
 
-let Constraints = "" in {
-def SUB8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "sub.b\t{$src, $dst}",
-                   [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
-                    (implicit SR)]>;
-def SUB16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "sub.w\t{$src, $dst}",
-                    [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
-                     (implicit SR)]>;
+def CMP8rp  : I8rp<0b1001,
+                   (outs), (ins GR8:$rd, postreg:$rs), "cmp.b\t$rs, $rd", []>;
+def CMP16rp : I16rp<0b1001,
+                    (outs), (ins GR16:$rd, postreg:$rs), "cmp\t$rs, $rd", []>;
 
-def SUB8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "sub.b\t{$src, $dst}",
-                   [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+def CMP8mr  : I8mr<0b1001,
+                   (outs), (ins memsrc:$dst, GR8:$rs),
+                   "cmp.b\t$rs, $dst",
+                   [(MSP430cmp (load addr:$dst), GR8:$rs),
                     (implicit SR)]>;
-def SUB16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "sub.w\t{$src, $dst}",
-                    [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+def CMP16mr : I16mr<0b1001,
+                    (outs), (ins memsrc:$dst, GR16:$rs),
+                    "cmp\t$rs, $dst",
+                    [(MSP430cmp (load addr:$dst), GR16:$rs), 
                      (implicit SR)]>;
-
-def SUB8mm  : I8mm<0x0,
+def CMP8mm  : I8mm<0b1001,
                    (outs), (ins memdst:$dst, memsrc:$src),
-                   "sub.b\t{$src, $dst}",
-                   [(store (sub (load addr:$dst), 
-                                (i8 (load addr:$src))), addr:$dst),
-                    (implicit SR)]>;
-def SUB16mm : I16mm<0x0,
-                    (outs), (ins memdst:$dst, memsrc:$src),
-                    "sub.w\t{$src, $dst}",
-                    [(store (sub (load addr:$dst), 
-                                 (i16 (load addr:$src))), addr:$dst),
-                     (implicit SR)]>;
-}
-
-let Uses = [SR] in {
-def SBC8rr  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
-                   "subc.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
-                    (implicit SR)]>;
-def SBC16rr : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
-                    "subc.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
-                     (implicit SR)]>;
-
-def SBC8ri  : I8ri<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
-                   "subc.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
-                    (implicit SR)]>;
-def SBC16ri : I16ri<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
-                    "subc.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
-                     (implicit SR)]>;
-
-def SBC8rm  : I8rm<0x0,
-                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
-                   "subc.b\t{$src2, $dst}",
-                   [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+                   "cmp.b\t$src, $dst",
+                   [(MSP430cmp (load addr:$dst), (i8 (load addr:$src))),
                     (implicit SR)]>;
-def SBC16rm : I16rm<0x0,
-                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
-                    "subc.w\t{$src2, $dst}",
-                    [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+def CMP16mm : I16mm<0b1001, (outs), (ins memdst:$dst, memsrc:$src),
+                    "cmp\t$src, $dst",
+                    [(MSP430cmp (load addr:$dst), (i16 (load addr:$src))),
                      (implicit SR)]>;
 
-let Constraints = "" in {
-def SBC8mr  : I8mr<0x0,
-                   (outs), (ins memdst:$dst, GR8:$src),
-                   "subc.b\t{$src, $dst}",
-                  [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
-                   (implicit SR)]>;
-def SBC16mr : I16mr<0x0,
-                    (outs), (ins memdst:$dst, GR16:$src),
-                    "subc.w\t{$src, $dst}",
-                    [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
-                     (implicit SR)]>;
+def CMP8mn  : I8mn<0b1001, (outs), (ins memsrc:$dst, indreg:$rs),
+                   "cmp.b\t$rs, $dst", []>;
+def CMP16mn : I16mn<0b1001, (outs), (ins memsrc:$dst, indreg:$rs),
+                    "cmp\t$rs, $dst", []>;
 
-def SBC8mi  : I8mi<0x0,
-                   (outs), (ins memdst:$dst, i8imm:$src),
-                   "subc.b\t{$src, $dst}",
-                   [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
-                    (implicit SR)]>;
-def SBC16mi : I16mi<0x0,
-                    (outs), (ins memdst:$dst, i16imm:$src),
-                    "subc.w\t{$src, $dst}",
-                    [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
-                     (implicit SR)]>;
+def CMP8mp  : I8mp<0b1001, (outs), (ins memsrc:$dst, postreg:$rs),
+                   "cmp.b\t$rs, $dst", []>;
+def CMP16mp : I16mp<0b1001, (outs), (ins memsrc:$dst, postreg:$rs),
+                    "cmp\t$rs, $dst", []>;
 
-def SBC8mm  : I8mm<0x0,
-                   (outs), (ins memdst:$dst, memsrc:$src),
-                   "subc.b\t{$src, $dst}",
-                   [(store (sube (load addr:$dst),
-                                 (i8 (load addr:$src))), addr:$dst),
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr  : I8rr<0b1011,
+                   (outs), (ins GR8:$rd, GR8:$rs),
+                   "bit.b\t$rs, $rd",
+                   [(MSP430cmp (and_su GR8:$rd, GR8:$rs), 0),
                     (implicit SR)]>;
-def SBC16mm : I16mm<0x0,
-                    (outs), (ins memdst:$dst, memsrc:$src),
-                    "subc.w\t{$src, $dst}",
-                    [(store (sube (load addr:$dst),
-                            (i16 (load addr:$src))), addr:$dst),
+def BIT16rr : I16rr<0b1011,
+                    (outs), (ins GR16:$rd, GR16:$rs),
+                    "bit\t$rs, $rd",
+                    [(MSP430cmp (and_su GR16:$rd, GR16:$rs), 0),
                      (implicit SR)]>;
 }
-
-} // Uses = [SR]
-
-// FIXME: memory variant!
-def SAR8r1  : II8r<0x0,
-                   (outs GR8:$dst), (ins GR8:$src),
-                   "rra.b\t$dst",
-                   [(set GR8:$dst, (MSP430rra GR8:$src)),
+def BIT8rc  : I8rc<0b1011,
+                   (outs), (ins GR8:$rd, cg8imm:$imm),
+                   "bit.b\t$imm, $rd",
+                   [(MSP430cmp (and_su GR8:$rd, cg8imm:$imm), 0),
                     (implicit SR)]>;
-def SAR16r1 : II16r<0x0,
-                    (outs GR16:$dst), (ins GR16:$src),
-                    "rra.w\t$dst",
-                    [(set GR16:$dst, (MSP430rra GR16:$src)),
+def BIT16rc : I16rc<0b1011,
+                    (outs), (ins GR16:$rd, cg16imm:$imm),
+                    "bit\t$imm, $rd",
+                    [(MSP430cmp (and_su GR16:$rd, cg16imm:$imm), 0),
                      (implicit SR)]>;
 
-def SHL8r1  : I8rr<0x0,
-                   (outs GR8:$dst), (ins GR8:$src),
-                   "rla.b\t$dst",
-                   [(set GR8:$dst, (MSP430rla GR8:$src)),
+def BIT8ri  : I8ri<0b1011,
+                   (outs), (ins GR8:$rd, i8imm:$imm),
+                   "bit.b\t$imm, $rd",
+                   [(MSP430cmp (and_su GR8:$rd, imm:$imm), 0),
                     (implicit SR)]>;
-def SHL16r1 : I16rr<0x0,
-                    (outs GR16:$dst), (ins GR16:$src),
-                    "rla.w\t$dst",
-                    [(set GR16:$dst, (MSP430rla GR16:$src)),
-                     (implicit SR)]>;
-
-def SAR8r1c  : Pseudo<(outs GR8:$dst), (ins GR8:$src),
-                      "clrc\n\t"
-                      "rrc.b\t$dst",
-                      [(set GR8:$dst, (MSP430rrc GR8:$src)),
-                       (implicit SR)]>;
-def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
-                      "clrc\n\t"
-                      "rrc.w\t$dst",
-                      [(set GR16:$dst, (MSP430rrc GR16:$src)),
-                       (implicit SR)]>;
-
-// FIXME: Memory sext's ?
-def SEXT16r : II16r<0x0,
-                    (outs GR16:$dst), (ins GR16:$src),
-                    "sxt\t$dst",
-                    [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+def BIT16ri : I16ri<0b1011,
+                    (outs), (ins GR16:$rd, i16imm:$imm),
+                    "bit\t$imm, $rd",
+                    [(MSP430cmp (and_su GR16:$rd, imm:$imm), 0),
                      (implicit SR)]>;
 
-} // Defs = [SR]
-
-def ZEXT16r : I8rr<0x0,
-                   (outs GR16:$dst), (ins GR16:$src),
-                   "mov.b\t{$src, $dst}",
-                   [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
-
-// FIXME: Memory bitswaps?
-def SWPB16r : II16r<0x0,
-                    (outs GR16:$dst), (ins GR16:$src),
-                    "swpb\t$dst",
-                    [(set GR16:$dst, (bswap GR16:$src))]>;
-
-} // Constraints = "$src = $dst"
-
-// Integer comparisons
-let Defs = [SR] in {
-def CMP8rr  : I8rr<0x0,
-                   (outs), (ins GR8:$src, GR8:$src2),
-                   "cmp.b\t{$src2, $src}",
-                   [(MSP430cmp GR8:$src, GR8:$src2), (implicit SR)]>;
-def CMP16rr : I16rr<0x0,
-                    (outs), (ins GR16:$src, GR16:$src2),
-                    "cmp.w\t{$src2, $src}",
-                    [(MSP430cmp GR16:$src, GR16:$src2), (implicit SR)]>;
-
-def CMP8ri  : I8ri<0x0,
-                   (outs), (ins GR8:$src, i8imm:$src2),
-                   "cmp.b\t{$src2, $src}",
-                   [(MSP430cmp GR8:$src, imm:$src2), (implicit SR)]>;
-def CMP16ri : I16ri<0x0,
-                    (outs), (ins GR16:$src, i16imm:$src2),
-                    "cmp.w\t{$src2, $src}",
-                    [(MSP430cmp GR16:$src, imm:$src2), (implicit SR)]>;
-
-def CMP8mi  : I8mi<0x0,
-                   (outs), (ins memsrc:$src, i8imm:$src2),
-                   "cmp.b\t{$src2, $src}",
-                   [(MSP430cmp (load addr:$src),
-                               (i8 imm:$src2)), (implicit SR)]>;
-def CMP16mi : I16mi<0x0,
-                    (outs), (ins memsrc:$src, i16imm:$src2),
-                    "cmp.w\t{$src2, $src}",
-                     [(MSP430cmp (load addr:$src),
-                                 (i16 imm:$src2)), (implicit SR)]>;
-
-def CMP8rm  : I8rm<0x0,
-                   (outs), (ins GR8:$src, memsrc:$src2),
-                   "cmp.b\t{$src2, $src}",
-                   [(MSP430cmp GR8:$src, (load addr:$src2)), 
+def BIT8rm  : I8rm<0b1011,
+                   (outs), (ins GR8:$rd, memdst:$src),
+                   "bit.b\t$src, $rd",
+                   [(MSP430cmp (and_su GR8:$rd,  (load addr:$src)), 0),
                     (implicit SR)]>;
-def CMP16rm : I16rm<0x0,
-                    (outs), (ins GR16:$src, memsrc:$src2),
-                    "cmp.w\t{$src2, $src}",
-                    [(MSP430cmp GR16:$src, (load addr:$src2)),
+def BIT16rm : I16rm<0b1011,
+                    (outs), (ins GR16:$rd, memdst:$src),
+                    "bit\t$src, $rd",
+                    [(MSP430cmp (and_su GR16:$rd,  (load addr:$src)), 0),
                      (implicit SR)]>;
 
-def CMP8mr  : I8mr<0x0,
-                   (outs), (ins memsrc:$src, GR8:$src2),
-                   "cmp.b\t{$src2, $src}",
-                   [(MSP430cmp (load addr:$src), GR8:$src2),
-                    (implicit SR)]>;
-def CMP16mr : I16mr<0x0,
-                    (outs), (ins memsrc:$src, GR16:$src2),
-                    "cmp.w\t{$src2, $src}",
-                    [(MSP430cmp (load addr:$src), GR16:$src2), 
-                     (implicit SR)]>;
+def BIT8rn  : I8rn<0b1011, (outs), (ins GR8:$rd, indreg:$rs),
+                   "bit.b\t$rs, $rd", []>;
+def BIT16rn : I16rn<0b1011, (outs), (ins GR16:$rd, indreg:$rs),
+                    "bit\t$rs, $rd", []>;
 
+def BIT8rp  : I8rp<0b1011, (outs), (ins GR8:$rd, postreg:$rs),
+                   "bit.b\t$rs, $rd", []>;
+def BIT16rp : I16rp<0b1011, (outs), (ins GR16:$rd, postreg:$rs),
+                    "bit\t$rs, $rd", []>;
 
-// BIT TESTS, just sets condition codes
-// Note that the C condition is set differently than when using CMP.
-let isCommutable = 1 in {
-def BIT8rr  : I8rr<0x0,
-                   (outs), (ins GR8:$src, GR8:$src2),
-                   "bit.b\t{$src2, $src}",
-                   [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
-                    (implicit SR)]>;
-def BIT16rr : I16rr<0x0,
-                    (outs), (ins GR16:$src, GR16:$src2),
-                    "bit.w\t{$src2, $src}",
-                    [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
-                     (implicit SR)]>;
-}
-def BIT8ri  : I8ri<0x0,
-                   (outs), (ins GR8:$src, i8imm:$src2),
-                   "bit.b\t{$src2, $src}",
-                   [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
-                    (implicit SR)]>;
-def BIT16ri : I16ri<0x0,
-                    (outs), (ins GR16:$src, i16imm:$src2),
-                    "bit.w\t{$src2, $src}",
-                    [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+def BIT8mr  : I8mr<0b1011,
+                  (outs), (ins memsrc:$dst, GR8:$rs),
+                  "bit.b\t$rs, $dst",
+                  [(MSP430cmp (and_su (load addr:$dst), GR8:$rs), 0),
+                   (implicit SR)]>;
+def BIT16mr : I16mr<0b1011,
+                    (outs), (ins memsrc:$dst, GR16:$rs),
+                    "bit\t$rs, $dst",
+                    [(MSP430cmp (and_su (load addr:$dst), GR16:$rs), 0),
                      (implicit SR)]>;
 
-def BIT8rm  : I8rm<0x0,
-                   (outs), (ins GR8:$src, memdst:$src2),
-                   "bit.b\t{$src2, $src}",
-                   [(MSP430cmp (and_su GR8:$src,  (load addr:$src2)), 0),
+def BIT8mc  : I8mc<0b1011,
+                   (outs), (ins memsrc:$dst, cg8imm:$imm),
+                   "bit.b\t$imm, $dst",
+                   [(MSP430cmp (and_su (load addr:$dst), (i8 cg8imm:$imm)), 0),
                     (implicit SR)]>;
-def BIT16rm : I16rm<0x0,
-                    (outs), (ins GR16:$src, memdst:$src2),
-                    "bit.w\t{$src2, $src}",
-                    [(MSP430cmp (and_su GR16:$src,  (load addr:$src2)), 0),
+def BIT16mc : I16mc<0b1011,
+                    (outs), (ins memdst:$dst, cg16imm:$imm),
+                    "bit\t$imm, $dst",
+                    [(MSP430cmp (and_su (load addr:$dst), (i16 cg16imm:$imm)), 0),
                      (implicit SR)]>;
 
-def BIT8mr  : I8mr<0x0,
-                  (outs), (ins memsrc:$src, GR8:$src2),
-                  "bit.b\t{$src2, $src}",
-                  [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
-                   (implicit SR)]>;
-def BIT16mr : I16mr<0x0,
-                    (outs), (ins memsrc:$src, GR16:$src2),
-                    "bit.w\t{$src2, $src}",
-                    [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
-                     (implicit SR)]>;
-
-def BIT8mi  : I8mi<0x0,
-                   (outs), (ins memsrc:$src, i8imm:$src2),
-                   "bit.b\t{$src2, $src}",
-                   [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+def BIT8mi  : I8mi<0b1011,
+                   (outs), (ins memsrc:$dst, i8imm:$imm),
+                   "bit.b\t$imm, $dst",
+                   [(MSP430cmp (and_su (load addr:$dst), (i8 imm:$imm)), 0),
                     (implicit SR)]>;
-def BIT16mi : I16mi<0x0,
-                    (outs), (ins memsrc:$src, i16imm:$src2),
-                    "bit.w\t{$src2, $src}",
-                    [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+def BIT16mi : I16mi<0b1011,
+                    (outs), (ins memsrc:$dst, i16imm:$imm),
+                    "bit\t$imm, $dst",
+                    [(MSP430cmp (and_su (load addr:$dst), (i16 imm:$imm)), 0),
                      (implicit SR)]>;
 
-def BIT8mm  : I8mm<0x0,
-                   (outs), (ins memsrc:$src, memsrc:$src2),
-                   "bit.b\t{$src2, $src}",
-                   [(MSP430cmp (and_su (i8 (load addr:$src)),
-                                       (load addr:$src2)),
+def BIT8mm  : I8mm<0b1011,
+                   (outs), (ins memsrc:$dst, memsrc:$src),
+                   "bit.b\t$src, $dst",
+                   [(MSP430cmp (and_su (i8 (load addr:$dst)),
+                                       (load addr:$src)),
                                  0),
                       (implicit SR)]>;
-def BIT16mm : I16mm<0x0,
-                    (outs), (ins memsrc:$src, memsrc:$src2),
-                    "bit.w\t{$src2, $src}",
-                    [(MSP430cmp (and_su (i16 (load addr:$src)),
-                                        (load addr:$src2)),
+def BIT16mm : I16mm<0b1011,
+                    (outs), (ins memsrc:$dst, memsrc:$src),
+                    "bit\t$src, $dst",
+                    [(MSP430cmp (and_su (i16 (load addr:$dst)),
+                                        (load addr:$src)),
                                  0),
                      (implicit SR)]>;
+def BIT8mn  : I8mn<0b1011, (outs), (ins memsrc:$dst, indreg:$rs),
+                   "bit.b\t$rs, $dst", []>;
+def BIT16mn : I16mn<0b1011, (outs), (ins memsrc:$dst, indreg:$rs),
+                    "bit\t$rs, $dst", []>;
+
+def BIT8mp  : I8mp<0b1011, (outs), (ins memsrc:$dst, postreg:$rs),
+                   "bit.b\t$rs, $dst", []>;
+def BIT16mp : I16mp<0b1011, (outs), (ins memsrc:$dst, postreg:$rs),
+                    "bit\t$rs, $dst", []>;
+
 } // Defs = [SR]
 
+def TST8r   : InstAlias<"tst.b\t$dst",  (CMP8rc    GR8:$dst,     0)>;
+def TST16r  : InstAlias<"tst\t$dst",    (CMP16rc   GR16:$dst,    0)>;
+def TST8m   : InstAlias<"tst.b\t$dst",  (CMP8mc    memdst:$dst,  0)>;
+def TST16m  : InstAlias<"tst\t$dst",    (CMP16mc   memdst:$dst,  0)>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index e7716382b222..860c0006f782 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -110,6 +110,9 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
   return MCOperand::createExpr(Expr);
 }
 
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
 void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
index b5a6ed0f0a56..1e86bdf34a0b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -11,26 +11,31 @@
 //  Declarations that describe the MSP430 register file
 //===----------------------------------------------------------------------===//
 
-class MSP430Reg<bits<4> num, string n> : Register<n> {
+class MSP430Reg<bits<4> num, string n, list<string> alt = []> : Register<n> {
   field bits<4> Num = num;
   let Namespace = "MSP430";
+  let HWEncoding{3-0} = num;
+  let AltNames = alt;
 }
 
-class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs> 
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs,
+                           list<string> alt = []> 
   : RegisterWithSubRegs<n, subregs> {
   field bits<4> Num = num;
   let Namespace = "MSP430";
+  let HWEncoding{3-0} = num;
+  let AltNames = alt;
 }
 
 //===----------------------------------------------------------------------===//
 //  Registers
 //===----------------------------------------------------------------------===//
 
-def PCB  : MSP430Reg<0,  "r0">;
-def SPB  : MSP430Reg<1,  "r1">;
-def SRB  : MSP430Reg<2,  "r2">;
-def CGB  : MSP430Reg<3,  "r3">;
-def FPB  : MSP430Reg<4,  "r4">;
+def PCB  : MSP430Reg<0,  "r0", ["pc"]>;
+def SPB  : MSP430Reg<1,  "r1", ["sp"]>;
+def SRB  : MSP430Reg<2,  "r2", ["sr"]>;
+def CGB  : MSP430Reg<3,  "r3", ["cg"]>;
+def FPB  : MSP430Reg<4,  "r4", ["fp"]>;
 def R5B  : MSP430Reg<5,  "r5">;
 def R6B  : MSP430Reg<6,  "r6">;
 def R7B  : MSP430Reg<7,  "r7">;
@@ -46,11 +51,11 @@ def R15B : MSP430Reg<15, "r15">;
 def subreg_8bit : SubRegIndex<8> { let Namespace = "MSP430"; }
 
 let SubRegIndices = [subreg_8bit] in {
-def PC  : MSP430RegWithSubregs<0,  "r0",  [PCB]>;
-def SP  : MSP430RegWithSubregs<1,  "r1",  [SPB]>;
-def SR  : MSP430RegWithSubregs<2,  "r2",  [SRB]>;
-def CG  : MSP430RegWithSubregs<3,  "r3",  [CGB]>;
-def FP  : MSP430RegWithSubregs<4,  "r4",  [FPB]>;
+def PC  : MSP430RegWithSubregs<0,  "r0",  [PCB], ["pc"]>;
+def SP  : MSP430RegWithSubregs<1,  "r1",  [SPB], ["sp"]>;
+def SR  : MSP430RegWithSubregs<2,  "r2",  [SRB], ["sr"]>;
+def CG  : MSP430RegWithSubregs<3,  "r3",  [CGB], ["cg"]>;
+def FP  : MSP430RegWithSubregs<4,  "r4",  [FPB], ["fp"]>;
 def R5  : MSP430RegWithSubregs<5,  "r5",  [R5B]>;
 def R6  : MSP430RegWithSubregs<6,  "r6",  [R6B]>;
 def R7  : MSP430RegWithSubregs<7,  "r7",  [R7B]>;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 01f44e266d7b..9f6ebba75ec6 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -32,12 +32,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 static std::string computeDataLayout(const Triple &TT, StringRef CPU,
                                      const TargetOptions &Options) {
   return "e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16";
@@ -51,7 +45,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
                                          CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
                         Options, getEffectiveRelocModel(RM),
-                        getEffectiveCodeModel(CM), OL),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ce7db657f5e9..d2fed6861477 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -39,6 +39,7 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -64,6 +65,11 @@ class MCInstrInfo;
 
 } // end namespace llvm
 
+static cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+              cl::init(true));
+
 namespace {
 
 class MipsAssemblerOptions {
@@ -195,7 +201,6 @@ class MipsAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseImm(OperandVector &Operands);
   OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
   OperandMatchResultTy parseInvNum(OperandVector &Operands);
-  OperandMatchResultTy parseMovePRegPair(OperandVector &Operands);
   OperandMatchResultTy parseRegisterList(OperandVector &Operands);
 
   bool searchSymbolAlias(OperandVector &Operands);
@@ -760,7 +765,6 @@ private:
     k_RegisterIndex, /// A register index in one or more RegKind.
     k_Token,         /// A simple token
     k_RegList,       /// A physical register list
-    k_RegPair        /// A pair of physical register
   } Kind;
 
 public:
@@ -769,16 +773,15 @@ public:
 
   ~MipsOperand() override {
     switch (Kind) {
-    case k_Immediate:
-      break;
     case k_Memory:
       delete Mem.Base;
       break;
     case k_RegList:
       delete RegList.List;
+      break;
+    case k_Immediate:
     case k_RegisterIndex:
     case k_Token:
-    case k_RegPair:
       break;
     }
   }
@@ -1038,6 +1041,17 @@ public:
     Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
   }
 
+  void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+  }
+
+  void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst,
+                                               unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getGPRMM16Reg()));
+  }
+
   /// Render the operand to an MCInst as a GPR64
   /// Asserts if the wrong number of operands are requested, or the operand
   /// is not a k_RegisterIndex compatible with RegKind_GPR
@@ -1217,29 +1231,6 @@ public:
       Inst.addOperand(MCOperand::createReg(RegNo));
   }
 
-  void addRegPairOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && "Invalid number of operands!");
-    assert((RegIdx.Kind & RegKind_GPR) && "Invalid access!");
-    unsigned RegNo = getRegPair();
-    AsmParser.warnIfRegIndexIsAT(RegNo, StartLoc);
-    Inst.addOperand(MCOperand::createReg(
-      RegIdx.RegInfo->getRegClass(
-        AsmParser.getABI().AreGprs64bit()
-          ? Mips::GPR64RegClassID
-          : Mips::GPR32RegClassID).getRegister(RegNo++)));
-    Inst.addOperand(MCOperand::createReg(
-      RegIdx.RegInfo->getRegClass(
-        AsmParser.getABI().AreGprs64bit()
-          ? Mips::GPR64RegClassID
-          : Mips::GPR32RegClassID).getRegister(RegNo)));
-  }
-
-  void addMovePRegPairOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 2 && "Invalid number of operands!");
-    for (auto RegNo : getRegList())
-      Inst.addOperand(MCOperand::createReg(RegNo));
-  }
-
   bool isReg() const override {
     // As a special case until we sort out the definition of div/divu, accept
     // $0/$zero here so that MCK_ZERO works correctly.
@@ -1406,34 +1397,6 @@ public:
 
   bool isRegList() const { return Kind == k_RegList; }
 
-  bool isMovePRegPair() const {
-    if (Kind != k_RegList || RegList.List->size() != 2)
-      return false;
-
-    unsigned R0 = RegList.List->front();
-    unsigned R1 = RegList.List->back();
-
-    if ((R0 == Mips::A1 && R1 == Mips::A2) ||
-        (R0 == Mips::A1 && R1 == Mips::A3) ||
-        (R0 == Mips::A2 && R1 == Mips::A3) ||
-        (R0 == Mips::A0 && R1 == Mips::S5) ||
-        (R0 == Mips::A0 && R1 == Mips::S6) ||
-        (R0 == Mips::A0 && R1 == Mips::A1) ||
-        (R0 == Mips::A0 && R1 == Mips::A2) ||
-        (R0 == Mips::A0 && R1 == Mips::A3) ||
-        (R0 == Mips::A1_64 && R1 == Mips::A2_64) ||
-        (R0 == Mips::A1_64 && R1 == Mips::A3_64) ||
-        (R0 == Mips::A2_64 && R1 == Mips::A3_64) ||
-        (R0 == Mips::A0_64 && R1 == Mips::S5_64) ||
-        (R0 == Mips::A0_64 && R1 == Mips::S6_64) ||
-        (R0 == Mips::A0_64 && R1 == Mips::A1_64) ||
-        (R0 == Mips::A0_64 && R1 == Mips::A2_64) ||
-        (R0 == Mips::A0_64 && R1 == Mips::A3_64))
-      return true;
-
-    return false;
-  }
-
   StringRef getToken() const {
     assert(Kind == k_Token && "Invalid access!");
     return StringRef(Tok.Data, Tok.Length);
@@ -1481,11 +1444,6 @@ public:
     return *(RegList.List);
   }
 
-  unsigned getRegPair() const {
-    assert((Kind == k_RegPair) && "Invalid access!");
-    return RegIdx.Index;
-  }
-
   static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
                                                   MipsAsmParser &Parser) {
     auto Op = llvm::make_unique<MipsOperand>(k_Token, Parser);
@@ -1593,18 +1551,6 @@ public:
     return Op;
   }
 
-  static std::unique_ptr<MipsOperand> CreateRegPair(const MipsOperand &MOP,
-                                                    SMLoc S, SMLoc E,
-                                                    MipsAsmParser &Parser) {
-    auto Op = llvm::make_unique<MipsOperand>(k_RegPair, Parser);
-    Op->RegIdx.Index = MOP.RegIdx.Index;
-    Op->RegIdx.RegInfo = MOP.RegIdx.RegInfo;
-    Op->RegIdx.Kind = MOP.RegIdx.Kind;
-    Op->StartLoc = S;
-    Op->EndLoc = E;
-    return Op;
-  }
-
  bool isGPRZeroAsmReg() const {
     return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0;
   }
@@ -1640,6 +1586,19 @@ public:
       (RegIdx.Index >= 16 && RegIdx.Index <= 20));
   }
 
+  bool isMM16AsmRegMovePPairFirst() const {
+    if (!(isRegIdx() && RegIdx.Kind))
+      return false;
+    return RegIdx.Index >= 4 && RegIdx.Index <= 6;
+  }
+
+  bool isMM16AsmRegMovePPairSecond() const {
+    if (!(isRegIdx() && RegIdx.Kind))
+      return false;
+    return (RegIdx.Index == 21 || RegIdx.Index == 22 ||
+      (RegIdx.Index >= 5 && RegIdx.Index <= 7));
+  }
+
   bool isFGRAsmReg() const {
     // AFGR64 is $0-$15 but we handle this in getAFGR64()
     return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
@@ -1720,9 +1679,6 @@ public:
         OS << Reg << " ";
       OS <<  ">";
       break;
-    case k_RegPair:
-      OS << "RegPair<" << RegIdx.Index << "," << RegIdx.Index + 1 << ">";
-      break;
     }
   }
 
@@ -1755,14 +1711,23 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
   return MipsInsts[Opcode];
 }
 
-static bool hasShortDelaySlot(unsigned Opcode) {
-  switch (Opcode) {
+static bool hasShortDelaySlot(MCInst &Inst) {
+  switch (Inst.getOpcode()) {
+    case Mips::BEQ_MM:
+    case Mips::BNE_MM:
+    case Mips::BLTZ_MM:
+    case Mips::BGEZ_MM:
+    case Mips::BLEZ_MM:
+    case Mips::BGTZ_MM:
+    case Mips::JRC16_MM:
     case Mips::JALS_MM:
     case Mips::JALRS_MM:
     case Mips::JALRS16_MM:
     case Mips::BGEZALS_MM:
     case Mips::BLTZALS_MM:
       return true;
+    case Mips::J_MM:
+      return !Inst.getOperand(0).isReg();
     default:
       return false;
   }
@@ -2115,9 +2080,21 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     JalrInst.addOperand(MCOperand::createReg(Mips::RA));
     JalrInst.addOperand(MCOperand::createReg(Mips::T9));
 
-    // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR.
-    // This relocation is supposed to be an optimization hint for the linker
-    // and is not necessary for correctness.
+    if (EmitJalrReloc) {
+      // As an optimization hint for the linker, before the JALR we add:
+      // .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+      // tmplabel:
+      MCSymbol *TmpLabel = getContext().createTempSymbol();
+      const MCExpr *TmpExpr = MCSymbolRefExpr::create(TmpLabel, getContext());
+      const MCExpr *RelocJalrExpr =
+          MCSymbolRefExpr::create(JalSym, MCSymbolRefExpr::VK_None,
+                                  getContext(), IDLoc);
+
+      TOut.getStreamer().EmitRelocDirective(*TmpExpr,
+          inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+          RelocJalrExpr, IDLoc, *STI);
+      TOut.getStreamer().EmitLabel(TmpLabel);
+    }
 
     Inst = JalrInst;
     ExpandedJalSym = true;
@@ -2288,6 +2265,22 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         if (Inst.getOperand(0).getReg() == Mips::RA)
           return Error(IDLoc, "invalid operand for instruction");
         break;
+      case Mips::MOVEP_MM:
+      case Mips::MOVEP_MMR6: {
+        unsigned R0 = Inst.getOperand(0).getReg();
+        unsigned R1 = Inst.getOperand(1).getReg();
+        bool RegPair = ((R0 == Mips::A1 && R1 == Mips::A2) ||
+                        (R0 == Mips::A1 && R1 == Mips::A3) ||
+                        (R0 == Mips::A2 && R1 == Mips::A3) ||
+                        (R0 == Mips::A0 && R1 == Mips::S5) ||
+                        (R0 == Mips::A0 && R1 == Mips::S6) ||
+                        (R0 == Mips::A0 && R1 == Mips::A1) ||
+                        (R0 == Mips::A0 && R1 == Mips::A2) ||
+                        (R0 == Mips::A0 && R1 == Mips::A3));
+        if (!RegPair)
+          return Error(IDLoc, "invalid operand for instruction");
+        break;
+      }
     }
   }
 
@@ -2318,7 +2311,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
   // If this instruction has a delay slot and .set reorder is active,
   // emit a NOP after it.
   if (FillDelaySlot) {
-    TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst.getOpcode()), IDLoc, STI);
+    TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc, STI);
     TOut.emitDirectiveSetReorder();
   }
 
@@ -2330,7 +2323,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
       // If .set reorder has been used, we've already emitted a NOP.
       // If .set noreorder has been used, we need to emit a NOP at this point.
       if (!AssemblerOptions.back()->isReorder())
-        TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst.getOpcode()), IDLoc,
+        TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc,
                                 STI);
 
       // Load the $gp from the stack.
@@ -2617,7 +2610,7 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
   // emit a NOP after it.
   const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode());
   if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
-    TOut.emitEmptyDelaySlot(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc,
+    TOut.emitEmptyDelaySlot(hasShortDelaySlot(JalrInst), IDLoc,
                             STI);
 
   return false;
@@ -6278,45 +6271,6 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-OperandMatchResultTy
-MipsAsmParser::parseMovePRegPair(OperandVector &Operands) {
-  MCAsmParser &Parser = getParser();
-  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
-  SmallVector<unsigned, 10> Regs;
-
-  if (Parser.getTok().isNot(AsmToken::Dollar))
-    return MatchOperand_ParseFail;
-
-  SMLoc S = Parser.getTok().getLoc();
-
-  if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
-    return MatchOperand_ParseFail;
-
-  MipsOperand *Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
-  unsigned RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
-  Regs.push_back(RegNo);
-
-  SMLoc E = Parser.getTok().getLoc();
-  if (Parser.getTok().isNot(AsmToken::Comma)) {
-    Error(E, "',' expected");
-    return MatchOperand_ParseFail;
-  }
-
-  // Remove comma.
-  Parser.Lex();
-
-  if (parseAnyRegister(TmpOperands) != MatchOperand_Success)
-    return MatchOperand_ParseFail;
-
-  Reg = &static_cast<MipsOperand &>(*TmpOperands.back());
-  RegNo = isGP64bit() ? Reg->getGPR64Reg() : Reg->getGPR32Reg();
-  Regs.push_back(RegNo);
-
-  Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
-
-  return MatchOperand_Success;
-}
-
 /// Sometimes (i.e. load/stores) the operand may be followed immediately by
 /// either this.
 /// ::= '(', register, ')'
@@ -6371,6 +6325,9 @@ bool MipsAsmParser::parseBracketSuffix(StringRef Name,
   return false;
 }
 
+static std::string MipsMnemonicSpellCheck(StringRef S, uint64_t FBS,
+                                          unsigned VariantID = 0);
+
 bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                      SMLoc NameLoc, OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
@@ -6381,7 +6338,9 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // Check if we have valid mnemonic
   if (!mnemonicIsValid(Name, 0)) {
-    return Error(NameLoc, "unknown instruction");
+    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    std::string Suggestion = MipsMnemonicSpellCheck(Name, FBS);
+    return Error(NameLoc, "unknown instruction" + Suggestion);
   }
   // First operand in MCInst is instruction mnemonic.
   Operands.push_back(MipsOperand::CreateToken(Name, NameLoc, *this));
@@ -8257,6 +8216,7 @@ extern "C" void LLVMInitializeMipsAsmParser() {
 
 #define GET_REGISTER_MATCHER
 #define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
 #include "MipsGenAsmMatcher.inc"
 
 bool MipsAsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index b94afb9520e3..27b27ff1e1e2 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -538,6 +538,9 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
                                        uint64_t Address,
                                        const void *Decoder);
 
+static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
+                                        uint64_t Address, const void *Decoder);
+
 namespace llvm {
 
 Target &getTheMipselTarget();
@@ -2450,6 +2453,32 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
+  if (DecodeMovePRegPair(Inst, RegPair, Address, Decoder) ==
+      MCDisassembler::Fail)
+    return MCDisassembler::Fail;
+
+  unsigned RegRs;
+  if (static_cast<const MipsDisassembler*>(Decoder)->hasMips32r6())
+    RegRs = fieldFromInstruction(Insn, 0, 2) |
+            (fieldFromInstruction(Insn, 3, 1) << 2);
+  else
+    RegRs = fieldFromInstruction(Insn, 1, 3);
+  if (DecodeGPRMM16MovePRegisterClass(Inst, RegRs, Address, Decoder) ==
+      MCDisassembler::Fail)
+    return MCDisassembler::Fail;
+
+  unsigned RegRt = fieldFromInstruction(Insn, 4, 3);
+  if (DecodeGPRMM16MovePRegisterClass(Inst, RegRt, Address, Decoder) ==
+      MCDisassembler::Fail)
+    return MCDisassembler::Fail;
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
                                        uint64_t Address, const void *Decoder) {
   switch (RegPair) {
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index bf1390880281..18d7dd99be34 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -55,6 +55,8 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
     return MipsABIInfo::N32();
   if (Options.getABIName().startswith("n64"))
     return MipsABIInfo::N64();
+  if (TT.getEnvironment() == llvm::Triple::GNUABIN32)
+    return MipsABIInfo::N32();
   assert(Options.getABIName().empty() && "Unknown ABI option for MIPS");
 
   if (TT.isMIPS64())
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 3b1b94acb149..7d528fe3eab1 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -13,6 +13,7 @@
 //
 
 #include "MCTargetDesc/MipsAsmBackend.h"
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
@@ -339,6 +340,8 @@ Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
             (MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_HI16)
       .Case("R_MICROMIPS_TLS_TPREL_LO16",
             (MCFixupKind)Mips::fixup_MICROMIPS_TLS_TPREL_LO16)
+      .Case("R_MIPS_JALR", (MCFixupKind)Mips::fixup_Mips_JALR)
+      .Case("R_MICROMIPS_JALR", (MCFixupKind)Mips::fixup_MICROMIPS_JALR)
       .Default(MCAsmBackend::getFixupKind(Name));
 }
 
@@ -417,7 +420,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_TLS_TPREL_HI16",  0,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_LO16",  0,     16,   0 },
     { "fixup_Mips_SUB",                  0,     64,   0 },
-    { "fixup_MICROMIPS_SUB",             0,     64,   0 }
+    { "fixup_MICROMIPS_SUB",             0,     64,   0 },
+    { "fixup_Mips_JALR",                 0,     32,   0 },
+    { "fixup_MICROMIPS_JALR",            0,     32,   0 }
   };
   static_assert(array_lengthof(LittleEndianInfos) == Mips::NumTargetFixupKinds,
                 "Not all MIPS little endian fixup kinds added!");
@@ -495,7 +500,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_TLS_TPREL_HI16",  16,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_LO16",  16,     16,   0 },
     { "fixup_Mips_SUB",                   0,     64,   0 },
-    { "fixup_MICROMIPS_SUB",              0,     64,   0 }
+    { "fixup_MICROMIPS_SUB",              0,     64,   0 },
+    { "fixup_Mips_JALR",                  0,     32,   0 },
+    { "fixup_MICROMIPS_JALR",             0,     32,   0 }
   };
   static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds,
                 "Not all MIPS big endian fixup kinds added!");
@@ -553,6 +560,7 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   case Mips::fixup_Mips_TLSLDM:
   case Mips::fixup_Mips_TPREL_HI:
   case Mips::fixup_Mips_TPREL_LO:
+  case Mips::fixup_Mips_JALR:
   case Mips::fixup_MICROMIPS_CALL16:
   case Mips::fixup_MICROMIPS_GOT_DISP:
   case Mips::fixup_MICROMIPS_GOT_PAGE:
@@ -565,6 +573,7 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   case Mips::fixup_MICROMIPS_TLS_LDM:
   case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
   case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
+  case Mips::fixup_MICROMIPS_JALR:
     return true;
   }
 }
@@ -581,6 +590,6 @@ MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo &MRI,
                                          const MCTargetOptions &Options) {
-  return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
-                            Options.ABIName == "n32");
+  MipsABIInfo ABI = MipsABIInfo::computeTargetABI(STI.getTargetTriple(), STI.getCPU(), Options);
+  return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), ABI.IsN32());
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 3dc753772e5f..8ace2895d681 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -11,6 +11,7 @@
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCObjectWriter.h"
@@ -225,7 +226,9 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
   case Mips::fixup_Mips_NONE:
     return ELF::R_MIPS_NONE;
   case FK_Data_1:
-    report_fatal_error("MIPS does not support one byte relocations");
+    Ctx.reportError(Fixup.getLoc(),
+                    "MIPS does not support one byte relocations");
+    return ELF::R_MIPS_NONE;
   case Mips::fixup_Mips_16:
   case FK_Data_2:
     return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
@@ -236,6 +239,10 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
 
   if (IsPCRel) {
     switch (Kind) {
+    case FK_Data_8:
+      Ctx.reportError(Fixup.getLoc(),
+                      "MIPS does not support 64-bit PC-relative relocations");
+      return ELF::R_MIPS_NONE;
     case Mips::fixup_Mips_Branch_PCRel:
     case Mips::fixup_Mips_PC16:
       return ELF::R_MIPS_PC16;
@@ -401,6 +408,10 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_MICROMIPS_HIGHER;
   case Mips::fixup_MICROMIPS_HIGHEST:
     return ELF::R_MICROMIPS_HIGHEST;
+  case Mips::fixup_Mips_JALR:
+    return ELF::R_MIPS_JALR;
+  case Mips::fixup_MICROMIPS_JALR:
+    return ELF::R_MICROMIPS_JALR;
   }
 
   llvm_unreachable("invalid fixup kind!");
@@ -453,7 +464,7 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
     return;
 
   // Sort relocations by the address they are applied to.
-  llvm::sort(Relocs.begin(), Relocs.end(),
+  llvm::sort(Relocs,
              [](const ELFRelocationEntry &A, const ELFRelocationEntry &B) {
                return A.Offset < B.Offset;
              });
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index d7f6cf91db73..eedad16dddc3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -222,6 +222,10 @@ namespace Mips {
     fixup_Mips_SUB,
     fixup_MICROMIPS_SUB,
 
+    // resulting in - R_MIPS_JALR/R_MICROMIPS_JALR
+    fixup_Mips_JALR,
+    fixup_MICROMIPS_JALR,
+
     // Marker
     LastTargetFixupKind,
     NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index f498d830c8f0..1506b4a83649 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -21,9 +21,8 @@ void MipsMCAsmInfo::anchor() { }
 MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
   IsLittleEndian = TheTriple.isLittleEndian();
 
-  if (TheTriple.isMIPS64()) {
+  if (TheTriple.isMIPS64() && TheTriple.getEnvironment() != Triple::GNUABIN32)
     CodePointerSize = CalleeSaveStackSlotSize = 8;
-  }
 
   // FIXME: This condition isn't quite right but it's the best we can do until
   //        this object can identify the ABI. It will misbehave when using O32
@@ -50,21 +49,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
   ExceptionsType = ExceptionHandling::DwarfCFI;
   DwarfRegNumForCFI = true;
   HasMipsExpressions = true;
-
-  // Enable IAS by default for O32.
-  if (TheTriple.isMIPS32())
-    UseIntegratedAssembler = true;
-
-  // Enable IAS by default for Debian mips64/mips64el.
-  if (TheTriple.getEnvironment() == Triple::GNUABI64)
-    UseIntegratedAssembler = true;
-
-  // Enable IAS by default for Android mips64el that uses N64 ABI.
-  if (TheTriple.getArch() == Triple::mips64el && TheTriple.isAndroid())
-    UseIntegratedAssembler = true;
-
-  // Enable IAS by default for FreeBSD / OpenBSD mips64/mips64el.
-  if (TheTriple.isOSFreeBSD() ||
-      TheTriple.isOSOpenBSD())
-    UseIntegratedAssembler = true;
+  UseIntegratedAssembler = true;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index cd34b0ab70b4..f43a4d980f92 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -213,6 +213,12 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
       TmpInst.setOpcode (NewOpcode);
       Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
     }
+
+    if (((MI.getOpcode() == Mips::MOVEP_MM) ||
+         (MI.getOpcode() == Mips::MOVEP_MMR6))) {
+      unsigned RegPair = getMovePRegPairOpValue(MI, 0, Fixups, STI);
+      Binary = (Binary & 0xFFFFFC7F) | (RegPair << 7);
+    }
   }
 
   const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
@@ -607,6 +613,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
     case MipsMCExpr::MEK_Special:
       llvm_unreachable("Unhandled fixup kind!");
       break;
+    case MipsMCExpr::MEK_DTPREL:
+      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+      break;
     case MipsMCExpr::MEK_CALL_HI16:
       FixupKind = Mips::fixup_Mips_CALL_HI16;
       break;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 0bddba781453..99857e083c6c 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -43,6 +43,9 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
   case MEK_Special:
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
+  case MEK_DTPREL:
+    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+    break;
   case MEK_CALL_HI16:
     OS << "%call_hi";
     break;
@@ -157,6 +160,8 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
     case MEK_None:
     case MEK_Special:
       llvm_unreachable("MEK_None and MEK_Special are invalid");
+    case MEK_DTPREL:
+      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
     case MEK_DTPREL_HI:
     case MEK_DTPREL_LO:
     case MEK_GOT:
@@ -244,6 +249,9 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case MEK_Special:
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
+  case MEK_DTPREL:
+    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+    break;
   case MEK_CALL_HI16:
   case MEK_CALL_LO16:
   case MEK_GOT:
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 495d525ccff4..bf3274ab5d17 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -22,6 +22,7 @@ public:
     MEK_None,
     MEK_CALL_HI16,
     MEK_CALL_LO16,
+    MEK_DTPREL,
     MEK_DTPREL_HI,
     MEK_DTPREL_LO,
     MEK_GOT,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index ce208b7f98bc..a8cd7b0d9b03 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -47,10 +47,17 @@ using namespace llvm;
 /// FIXME: Merge with the copy in MipsSubtarget.cpp
 StringRef MIPS_MC::selectMipsCPU(const Triple &TT, StringRef CPU) {
   if (CPU.empty() || CPU == "generic") {
-    if (TT.isMIPS32())
-      CPU = "mips32";
-    else
-      CPU = "mips64";
+    if (TT.getSubArch() == llvm::Triple::MipsSubArch_r6) {
+      if (TT.isMIPS32())
+        CPU = "mips32r6";
+      else
+        CPU = "mips64r6";
+    } else {
+      if (TT.isMIPS32())
+        CPU = "mips32";
+      else
+        CPU = "mips64";
+    }
   }
   return CPU;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 1eb21b6cc826..58f9717e1cc6 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -248,7 +248,11 @@ void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc,
 }
 
 void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
-  emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+  const FeatureBitset &Features = STI->getFeatureBits();
+  if (Features[Mips::FeatureMicroMips])
+    emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI);
+  else
+    emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
 }
 
 /// Emit the $gp restore operation for .cprestore.
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 6b0aa7756eab..814918d25e70 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -159,6 +159,7 @@ class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6;
 class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">;
 class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>;
 class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">;
+class SIGRIE_MMR6_ENC : SIGRIE_FM_MM, MMR6Arch<"sigrie">;
 class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
 class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
 class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>;
@@ -1101,7 +1102,9 @@ class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">
 class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>,
       MMR6Arch<"li16">, IsAsCheapAsAMove;
 class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">;
-class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMoveP>, MMR6Arch<"movep">;
+class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
+                                  GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
+                        MMR6Arch<"movep">;
 class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">;
 class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
       MMR6Arch<"subu16"> {
@@ -1160,6 +1163,14 @@ class SDBBP_MMR6_DESC : MipsR6Inst {
   InstrItinClass Itinerary = II_SDBBP;
 }
 
+class SIGRIE_MMR6_DESC : MipsR6Inst {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins uimm16:$code_);
+  string AsmString = !strconcat("sigrie", "\t$code_");
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_SIGRIE;
+}
+
 class LWM16_MMR6_DESC
     : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
                       !strconcat("lwm16", "\t$rt, $addr"), [],
@@ -1425,6 +1436,7 @@ def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
 def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
                   ISA_MICROMIPS32R6;
 def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
+def SIGRIE_MMR6 : R6MMR6Rel, SIGRIE_MMR6_DESC, SIGRIE_MMR6_ENC, ISA_MICROMIPS32R6;
 def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
 def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
 let DecoderMethod = "DecodeMemMMImm16" in {
@@ -1633,6 +1645,7 @@ def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
 }
 def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6;
 def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"sigrie", (SIGRIE_MMR6 0), 1>, ISA_MICROMIPS32R6;
 def : MipsInstAlias<"rdhwr $rt, $rs",
                     (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>,
                     ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 84ae0eddf980..1731afc1961f 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -243,6 +243,8 @@ let DecoderNamespace = "MicroMipsFP64" in {
                      MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_64;
   def MFHC1_D64_MM : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>,
                      MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_64;
+  def MTC1_D64_MM : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>,
+                    MFC1_FM_MM<0xa0>, ISA_MICROMIPS, FGR_64;
 }
 
 let DecoderNamespace = "MicroMips" in {
@@ -405,6 +407,9 @@ let AddedComplexity = 40 in {
   def : StoreRegImmPat<SWC1_MM, f32>, ISA_MICROMIPS;
 }
 
+def : MipsPat<(MipsMTC1_D64 GPR32Opnd:$src),
+              (MTC1_D64_MM GPR32Opnd:$src)>, ISA_MICROMIPS, FGR_64;
+
 def : MipsPat<(f32 fpimm0), (MTC1_MM ZERO)>, ISA_MICROMIPS32_NOT_MIPS32R6;
 def : MipsPat<(f32 fpimm0neg), (FNEG_S_MM (MTC1_MM ZERO))>,
       ISA_MICROMIPS32_NOT_MIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index a9c53e08b810..2a4cc279ef0d 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -933,6 +933,17 @@ class SDBBP_FM_MM : MMArch {
   let Inst{5-0}   = 0x3c;
 }
 
+class SIGRIE_FM_MM : MMArch {
+  bits<16> code_;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x0;
+  let Inst{25-22} = 0x0;
+  let Inst{21-6} = code_;
+  let Inst{5-0} = 0b111111;
+}
+
 class RDHWR_FM_MM : MMArch {
   bits<5> rt;
   bits<5> rd;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index ebadb59a0432..af380a0ec71e 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -231,27 +231,14 @@ class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
   bit mayStore = 1;
 }
 
-/// A register pair used by movep instruction.
-def MovePRegPairAsmOperand : AsmOperandClass {
-  let Name = "MovePRegPair";
-  let ParserMethod = "parseMovePRegPair";
-  let PredicateMethod = "isMovePRegPair";
-}
-
-def movep_regpair : Operand<i32> {
-  let EncoderMethod = "getMovePRegPairOpValue";
-  let ParserMatchClass = MovePRegPairAsmOperand;
-  let PrintMethod = "printRegisterList";
-  let DecoderMethod = "DecodeMovePRegPair";
-  let MIOperandInfo = (ops ptr_rc, ptr_rc);
-}
-
-class MovePMM16<string opstr, RegisterOperand RO> :
-MicroMipsInst16<(outs movep_regpair:$dst_regs), (ins RO:$rs, RO:$rt),
-                 !strconcat(opstr, "\t$dst_regs, $rs, $rt"), [],
+class MovePMM16<string opstr, RegisterOperand RO1, RegisterOperand RO2,
+                RegisterOperand RO3> :
+MicroMipsInst16<(outs RO1:$rd1, RO2:$rd2), (ins RO3:$rs, RO3:$rt),
+                 !strconcat(opstr, "\t$rd1, $rd2, $rs, $rt"), [],
                  NoItinerary, FrmR> {
   let isReMaterializable = 1;
   let isMoveReg = 1;
+  let DecoderMethod = "DecodeMovePOperands";
 }
 
 class StorePairMM<string opstr, ComplexPattern Addr = addr>
@@ -682,8 +669,9 @@ def MFLO16_MM : MoveFromHILOMM<"mflo16", GPR32Opnd, AC0>,
                 MFHILO_FM_MM16<0x12>, ISA_MICROMIPS32_NOT_MIPS32R6;
 def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>,
                 ISA_MICROMIPS32_NOT_MIPS32R6;
-def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16,
-               ISA_MICROMIPS32_NOT_MIPS32R6;
+def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMovePPairFirst,
+                         GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>,
+               MOVEP_FM_MM16, ISA_MICROMIPS32_NOT_MIPS32R6;
 def LI16_MM : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, LI_FM_MM16,
               IsAsCheapAsAMove, ISA_MICROMIPS32_NOT_MIPS32R6;
 def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
@@ -1116,6 +1104,27 @@ let DecoderNamespace = "MicroMips" in {
                  ISA_MICROMIPS32_NOT_MIPS32R6;
 }
 
+let AdditionalPredicates = [NotDSP] in {
+  def PseudoMULT_MM : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>,
+                      ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMULTu_MM : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>,
+                       ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMFHI_MM : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>,
+                      ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMFLO_MM : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>,
+                      ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMTLOHI_MM : PseudoMTLOHI<ACC64, GPR32>,
+                        ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMADD_MM : MAddSubPseudo<MADD, MipsMAdd, II_MADD>,
+                      ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMADDU_MM : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>,
+                       ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMSUB_MM : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>,
+                      ISA_MICROMIPS32_NOT_MIPS32R6;
+  def PseudoMSUBU_MM : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>,
+                       ISA_MICROMIPS32_NOT_MIPS32R6;
+}
+
 def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
 
 def TAILCALLREG_MM  : TailCallReg<JRC16_MM, GPR32Opnd>,
@@ -1262,6 +1271,8 @@ let AddedComplexity = 40 in
 def : MipsPat<(bswap GPR32:$rt), (ROTR_MM (WSBH_MM GPR32:$rt), 16)>,
       ISA_MICROMIPS;
 
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+              (JAL_MM texternalsym:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
 def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
               (TAILCALL_MM tglobaladdr:$dst)>, ISA_MICROMIPS32_NOT_MIPS32R6;
 def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 568cdfb5b110..f9062cc23da2 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -31,13 +31,14 @@ namespace {
 /// Order of operands to transfer
 // TODO: Will be extended when additional optimizations are added
 enum OperandTransfer {
-  OT_NA,          ///< Not applicable
-  OT_OperandsAll, ///< Transfer all operands
-  OT_Operands02,  ///< Transfer operands 0 and 2
-  OT_Operand2,    ///< Transfer just operand 2
-  OT_OperandsXOR, ///< Transfer operands for XOR16
-  OT_OperandsLwp, ///< Transfer operands for LWP
-  OT_OperandsSwp, ///< Transfer operands for SWP
+  OT_NA,            ///< Not applicable
+  OT_OperandsAll,   ///< Transfer all operands
+  OT_Operands02,    ///< Transfer operands 0 and 2
+  OT_Operand2,      ///< Transfer just operand 2
+  OT_OperandsXOR,   ///< Transfer operands for XOR16
+  OT_OperandsLwp,   ///< Transfer operands for LWP
+  OT_OperandsSwp,   ///< Transfer operands for SWP
+  OT_OperandsMovep, ///< Transfer operands for MOVEP
 };
 
 /// Reduction type
@@ -170,6 +171,10 @@ private:
   // returns true on success.
   static bool ReduceSXtoSX16(ReduceEntryFunArgs *Arguments);
 
+  // Attempts to reduce two MOVE instructions into MOVEP instruction,
+  // returns true on success.
+  static bool ReduceMoveToMovep(ReduceEntryFunArgs *Arguments);
+
   // Attempts to reduce arithmetic instructions, returns true on success.
   static bool ReduceArithmeticInstructions(ReduceEntryFunArgs *Arguments);
 
@@ -243,6 +248,8 @@ ReduceEntryVector MicroMipsSizeReduce::ReduceTable = {
      OpInfo(OT_OperandsLwp), ImmField(0, -2048, 2048, 2)},
     {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
      OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+    {RT_TwoInstr, OpCodes(Mips::MOVE16_MM, Mips::MOVEP_MM), ReduceMoveToMovep,
+     OpInfo(OT_OperandsMovep), ImmField(0, 0, 0, -1)},
     {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16,
      OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
     {RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16,
@@ -562,6 +569,89 @@ bool MicroMipsSizeReduce::ReduceSXtoSX16(ReduceEntryFunArgs *Arguments) {
   return ReplaceInstruction(MI, Entry);
 }
 
+// Returns true if Reg can be a source register
+// of MOVEP instruction
+static bool IsMovepSrcRegister(unsigned Reg) {
+
+  if (Reg == Mips::ZERO || Reg == Mips::V0 || Reg == Mips::V1 ||
+      Reg == Mips::S0 || Reg == Mips::S1 || Reg == Mips::S2 ||
+      Reg == Mips::S3 || Reg == Mips::S4)
+    return true;
+
+  return false;
+}
+
+// Returns true if Reg can be a destination register
+// of MOVEP instruction
+static bool IsMovepDestinationReg(unsigned Reg) {
+
+  if (Reg == Mips::A0 || Reg == Mips::A1 || Reg == Mips::A2 ||
+      Reg == Mips::A3 || Reg == Mips::S5 || Reg == Mips::S6)
+    return true;
+
+  return false;
+}
+
+// Returns true if the registers can be a pair of destination
+// registers in MOVEP instruction
+static bool IsMovepDestinationRegPair(unsigned R0, unsigned R1) {
+
+  if ((R0 == Mips::A0 && R1 == Mips::S5) ||
+      (R0 == Mips::A0 && R1 == Mips::S6) ||
+      (R0 == Mips::A0 && R1 == Mips::A1) ||
+      (R0 == Mips::A0 && R1 == Mips::A2) ||
+      (R0 == Mips::A0 && R1 == Mips::A3) ||
+      (R0 == Mips::A1 && R1 == Mips::A2) ||
+      (R0 == Mips::A1 && R1 == Mips::A3) ||
+      (R0 == Mips::A2 && R1 == Mips::A3))
+    return true;
+
+  return false;
+}
+
+bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) {
+
+  const ReduceEntry &Entry = Arguments->Entry;
+  MachineBasicBlock::instr_iterator &NextMII = Arguments->NextMII;
+  const MachineBasicBlock::instr_iterator &E =
+      Arguments->MI->getParent()->instr_end();
+
+  if (NextMII == E)
+    return false;
+
+  MachineInstr *MI1 = Arguments->MI;
+  MachineInstr *MI2 = &*NextMII;
+
+  unsigned RegDstMI1 = MI1->getOperand(0).getReg();
+  unsigned RegSrcMI1 = MI1->getOperand(1).getReg();
+
+  if (!IsMovepSrcRegister(RegSrcMI1))
+    return false;
+
+  if (!IsMovepDestinationReg(RegDstMI1))
+    return false;
+
+  if (MI2->getOpcode() != Entry.WideOpc())
+    return false;
+
+  unsigned RegDstMI2 = MI2->getOperand(0).getReg();
+  unsigned RegSrcMI2 = MI2->getOperand(1).getReg();
+
+  if (!IsMovepSrcRegister(RegSrcMI2))
+    return false;
+
+  bool ConsecutiveForward;
+  if (IsMovepDestinationRegPair(RegDstMI1, RegDstMI2)) {
+    ConsecutiveForward = true;
+  } else if (IsMovepDestinationRegPair(RegDstMI2, RegDstMI1)) {
+    ConsecutiveForward = false;
+  } else
+    return false;
+
+  NextMII = std::next(NextMII);
+  return ReplaceInstruction(MI1, Entry, MI2, ConsecutiveForward);
+}
+
 bool MicroMipsSizeReduce::ReduceXORtoXOR16(ReduceEntryFunArgs *Arguments) {
 
   MachineInstr *MI = Arguments->MI;
@@ -641,18 +731,25 @@ bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI,
       }
       break;
     }
+    case OT_OperandsMovep:
     case OT_OperandsLwp:
     case OT_OperandsSwp: {
       if (ConsecutiveForward) {
         MIB.add(MI->getOperand(0));
         MIB.add(MI2->getOperand(0));
         MIB.add(MI->getOperand(1));
-        MIB.add(MI->getOperand(2));
+        if (OpTransfer == OT_OperandsMovep)
+          MIB.add(MI2->getOperand(1));
+        else
+          MIB.add(MI->getOperand(2));
       } else { // consecutive backward
         MIB.add(MI2->getOperand(0));
         MIB.add(MI->getOperand(0));
         MIB.add(MI2->getOperand(1));
-        MIB.add(MI2->getOperand(2));
+        if (OpTransfer == OT_OperandsMovep)
+          MIB.add(MI->getOperand(1));
+        else
+          MIB.add(MI2->getOperand(2));
       }
 
       LLVM_DEBUG(dbgs() << "and converting 32-bit: " << *MI2
diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h
index ef3a807c7648..6bb7aecc867a 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.h
+++ b/contrib/llvm/lib/Target/Mips/Mips.h
@@ -38,6 +38,7 @@ namespace llvm {
   FunctionPass *createMipsConstantIslandPass();
   FunctionPass *createMicroMipsSizeReducePass();
   FunctionPass *createMipsExpandPseudoPass();
+  FunctionPass *createMipsPreLegalizeCombiner();
 
   InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
                                                      MipsSubtarget &,
@@ -46,6 +47,7 @@ namespace llvm {
   void initializeMipsDelaySlotFillerPass(PassRegistry &);
   void initializeMipsBranchExpansionPass(PassRegistry &);
   void initializeMicroMipsSizeReducePass(PassRegistry &);
+  void initializeMipsPreLegalizerCombinerPass(PassRegistry&);
 } // end namespace llvm;
 
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index c310d9491af8..f237bb6d4006 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -74,16 +74,18 @@ static FPReturnVariant whichFPReturnVariant(Type *T) {
     return FRet;
   case Type::DoubleTyID:
     return DRet;
-  case Type::StructTyID:
-    if (T->getStructNumElements() != 2)
+  case Type::StructTyID: {
+    StructType *ST = cast<StructType>(T);
+    if (ST->getNumElements() != 2)
       break;
-    if ((T->getContainedType(0)->isFloatTy()) &&
-        (T->getContainedType(1)->isFloatTy()))
+    if ((ST->getElementType(0)->isFloatTy()) &&
+        (ST->getElementType(1)->isFloatTy()))
       return CFRet;
-    if ((T->getContainedType(0)->isDoubleTy()) &&
-        (T->getContainedType(1)->isDoubleTy()))
+    if ((ST->getElementType(0)->isDoubleTy()) &&
+        (ST->getElementType(1)->isDoubleTy()))
       return CDRet;
     break;
+  }
   default:
     break;
   }
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 8ce47e3f669d..79df622241a0 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -386,27 +386,22 @@ const char* Mips16TargetLowering::
   }
   else if (RetTy ->isDoubleTy()) {
     result = dfMips16Helper[stubNum];
-  }
-  else if (RetTy->isStructTy()) {
+  } else if (StructType *SRetTy = dyn_cast<StructType>(RetTy)) {
     // check if it's complex
-    if (RetTy->getNumContainedTypes() == 2) {
-      if ((RetTy->getContainedType(0)->isFloatTy()) &&
-          (RetTy->getContainedType(1)->isFloatTy())) {
+    if (SRetTy->getNumElements() == 2) {
+      if ((SRetTy->getElementType(0)->isFloatTy()) &&
+          (SRetTy->getElementType(1)->isFloatTy())) {
         result = scMips16Helper[stubNum];
-      }
-      else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
-               (RetTy->getContainedType(1)->isDoubleTy())) {
+      } else if ((SRetTy->getElementType(0)->isDoubleTy()) &&
+                 (SRetTy->getElementType(1)->isDoubleTy())) {
         result = dcMips16Helper[stubNum];
-      }
-      else {
+      } else {
         llvm_unreachable("Uncovered condition");
       }
-    }
-    else {
+    } else {
       llvm_unreachable("Uncovered condition");
     }
-  }
-  else {
+  } else {
     if (stubNum == 0) {
       needHelper = false;
       return "";
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 219f1ad33586..efebc99b5dae 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -97,9 +97,9 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     MIB.addReg(SrcReg, getKillRegState(KillSrc));
 }
 
-bool Mips16InstrInfo::isCopyInstr(const MachineInstr &MI,
-                                  const MachineOperand *&Src,
-                                  const MachineOperand *&Dest) const {
+bool Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+                                      const MachineOperand *&Src,
+                                      const MachineOperand *&Dest) const {
   if (MI.isMoveReg()) {
     Dest = &MI.getOperand(0);
     Src = &MI.getOperand(1);
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 8190be6187ea..6a802e4cce5d 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -53,9 +53,6 @@ public:
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
 
-  bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
-                   const MachineOperand *&Dest) const override;
-
   void storeRegToStack(MachineBasicBlock &MBB,
                        MachineBasicBlock::iterator MBBI,
                        unsigned SrcReg, bool isKill, int FrameIndex,
@@ -105,6 +102,14 @@ public:
 
   void BuildAddiuSpImm
     (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+
+protected:
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+                       const MachineOperand *&Destination) const override;
+
 private:
   unsigned getAnalyzableBrOpc(unsigned Opc) const override;
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
index e1d08cad88b7..623af570a5e6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -87,6 +87,7 @@ def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
 def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
 def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
 def OPCODE5_BGEZAL : OPCODE5<0b10001>;
+def OPCODE5_SIGRIE : OPCODE5<0b10111>;
 // The next four constants are unnamed in the spec. These names are taken from
 // the OPGROUP names they are used with.
 def OPCODE5_LDC2   : OPCODE5<0b01110>;
@@ -602,3 +603,12 @@ class SPECIAL3_GINV<bits<2> ginv> : MipsR6Inst {
   let Inst{7-6}   = ginv;
   let Inst{5-0}   = 0b111101;
 }
+
+class SIGRIE_FM : MipsR6Inst {
+  bits<16> code_;
+
+  let Inst{31-26} = OPGROUP_REGIMM.Value;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = OPCODE5_SIGRIE.Value;
+  let Inst{15-0} = code_;
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index d86fc3f658ae..2bd0cf2d59a6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -200,6 +200,8 @@ class CRC32CW_ENC : SPECIAL3_2R_SZ_CRC<2,1>;
 class GINVI_ENC : SPECIAL3_GINV<0>;
 class GINVT_ENC : SPECIAL3_GINV<2>;
 
+class SIGRIE_ENC : SIGRIE_FM;
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Multiclasses
@@ -846,6 +848,14 @@ class GINVI_DESC : GINV_DESC_BASE<"ginvi", GPR32Opnd, II_GINVI> {
 }
 class GINVT_DESC : GINV_DESC_BASE<"ginvt", GPR32Opnd, II_GINVT>;
 
+class SIGRIE_DESC {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins uimm16:$code_);
+  string AsmString = "sigrie\t$code_";
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_SIGRIE;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Definitions
@@ -961,6 +971,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def SEL_S : R6MMR6Rel, SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
   def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
   def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
+  def SIGRIE : SIGRIE_ENC, SIGRIE_DESC, ISA_MIPS32R6;
 }
 
 let AdditionalPredicates = [NotInMicroMips] in {
@@ -988,6 +999,7 @@ def : MipsInstAlias<"evp", (EVP ZERO), 0>, ISA_MIPS32R6;
 
 let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+def : MipsInstAlias<"sigrie", (SIGRIE 0)>, ISA_MIPS32R6;
 def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6, GPR_32;
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index b5317bec70c4..5729182deafb 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -416,6 +416,13 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
 // long branches.  See the comment in file MipsLongBranch.cpp for detailed
 // explanation.
 
+// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_LUi2Op_64 : PseudoSE<(outs GPR64Opnd:$dst),
+  (ins brtarget:$tgt), []>, GPR_64;
+// Expands to: addiu $dst, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_DADDiu2Op : PseudoSE<(outs GPR64Opnd:$dst),
+  (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64;
+
 // Expands to: daddiu $dst, $src, %PART($tgt - $baltgt)
 // where %PART may be %hi or %lo, depending on the relocation kind
 // that $tgt is annotated with.
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 2e0c25de2bc8..362431fd42a6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -561,6 +561,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
         O << '$' << MipsInstPrinter::getRegisterName(Reg);
         return false;
       }
+      break;
     }
     case 'w':
       // Print MSA registers for the 'f' constraint
@@ -1203,18 +1204,23 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
 
 // Emit .dtprelword or .dtpreldword directive
 // and value for debug thread local expression.
-void MipsAsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
-                                          unsigned Size) const {
-  switch (Size) {
-  case 4:
-    OutStreamer->EmitDTPRel32Value(Value);
-    break;
-  case 8:
-    OutStreamer->EmitDTPRel64Value(Value);
-    break;
-  default:
-    llvm_unreachable("Unexpected size of expression value.");
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
+  if (auto *MipsExpr = dyn_cast<MipsMCExpr>(Value)) {
+    if (MipsExpr && MipsExpr->getKind() == MipsMCExpr::MEK_DTPREL) {
+      switch (Size) {
+      case 4:
+        OutStreamer->EmitDTPRel32Value(MipsExpr->getSubExpr());
+        break;
+      case 8:
+        OutStreamer->EmitDTPRel64Value(MipsExpr->getSubExpr());
+        break;
+      default:
+        llvm_unreachable("Unexpected size of expression value.");
+      }
+      return;
+    }
   }
+  AsmPrinter::EmitDebugValue(Value, Size);
 }
 
 // Align all targets of indirect branches on bundle size.  Used only if target
@@ -1240,8 +1246,12 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
 
 bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const {
   return (Opcode == Mips::LONG_BRANCH_LUi
+          || Opcode == Mips::LONG_BRANCH_LUi2Op
+          || Opcode == Mips::LONG_BRANCH_LUi2Op_64
           || Opcode == Mips::LONG_BRANCH_ADDiu
-          || Opcode == Mips::LONG_BRANCH_DADDiu);
+          || Opcode == Mips::LONG_BRANCH_ADDiu2Op
+          || Opcode == Mips::LONG_BRANCH_DADDiu
+          || Opcode == Mips::LONG_BRANCH_DADDiu2Op);
 }
 
 // Force static initialization.
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 999b6f896bae..eb58234e3e77 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -160,7 +160,7 @@ public:
   void EmitStartOfAsmFile(Module &M) override;
   void EmitEndOfAsmFile(Module &M) override;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
-  void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const override;
+  void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index af936e6fc96b..e59267c4fd9b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -128,6 +128,7 @@ struct MBBInfo {
   uint64_t Size = 0;
   bool HasLongBranch = false;
   MachineInstr *Br = nullptr;
+  uint64_t Offset = 0;
   MBBInfo() = default;
 };
 
@@ -154,8 +155,11 @@ private:
   void splitMBB(MachineBasicBlock *MBB);
   void initMBBInfo();
   int64_t computeOffset(const MachineInstr *Br);
+  uint64_t computeOffsetFromTheBeginning(int MBB);
   void replaceBranch(MachineBasicBlock &MBB, Iter Br, const DebugLoc &DL,
                      MachineBasicBlock *MBBOpnd);
+  bool buildProperJumpMI(MachineBasicBlock *MBB,
+                         MachineBasicBlock::iterator Pos, DebugLoc DL);
   void expandToLongBranch(MBBInfo &Info);
   bool handleForbiddenSlot();
   bool handlePossibleLongBranch();
@@ -167,7 +171,6 @@ private:
   SmallVector<MBBInfo, 16> MBBInfos;
   bool IsPIC;
   MipsABIInfo ABI;
-  unsigned LongBranchSeqSize;
   bool ForceLongBranchFirstPass = false;
 };
 
@@ -176,7 +179,7 @@ private:
 char MipsBranchExpansion::ID = 0;
 
 INITIALIZE_PASS(MipsBranchExpansion, DEBUG_TYPE,
-                "Expand out of range branch instructions and prevent forbidden"
+                "Expand out of range branch instructions and fix forbidden"
                 " slot hazards",
                 false, false)
 
@@ -268,7 +271,8 @@ void MipsBranchExpansion::splitMBB(MachineBasicBlock *MBB) {
   // Insert NewMBB and fix control flow.
   MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
   NewMBB->transferSuccessors(MBB);
-  NewMBB->removeSuccessor(Tgt, true);
+  if (Tgt != getTargetMBB(*LastBr))
+    NewMBB->removeSuccessor(Tgt, true);
   MBB->addSuccessor(NewMBB);
   MBB->addSuccessor(Tgt);
   MFp->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
@@ -294,14 +298,6 @@ void MipsBranchExpansion::initMBBInfo() {
     for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
          MI != MBB->instr_end(); ++MI)
       MBBInfos[I].Size += TII->getInstSizeInBytes(*MI);
-
-    // Search for MBB's branch instruction.
-    ReverseIter End = MBB->rend();
-    ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
-
-    if ((Br != End) && !Br->isIndirectBranch() &&
-        (Br->isConditionalBranch() || (Br->isUnconditionalBranch() && IsPIC)))
-      MBBInfos[I].Br = &*Br;
   }
 }
 
@@ -326,6 +322,14 @@ int64_t MipsBranchExpansion::computeOffset(const MachineInstr *Br) {
   return -Offset + 4;
 }
 
+// Returns the distance in bytes up until MBB
+uint64_t MipsBranchExpansion::computeOffsetFromTheBeginning(int MBB) {
+  uint64_t Offset = 0;
+  for (int N = 0; N < MBB; ++N)
+    Offset += MBBInfos[N].Size;
+  return Offset;
+}
+
 // Replace Br with a branch which has the opposite condition code and a
 // MachineBasicBlock operand MBBOpnd.
 void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
@@ -359,6 +363,35 @@ void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
   Br->eraseFromParent();
 }
 
+bool MipsBranchExpansion::buildProperJumpMI(MachineBasicBlock *MBB,
+                                            MachineBasicBlock::iterator Pos,
+                                            DebugLoc DL) {
+  bool HasR6 = ABI.IsN64() ? STI->hasMips64r6() : STI->hasMips32r6();
+  bool AddImm = HasR6 && !STI->useIndirectJumpsHazard();
+
+  unsigned JR = ABI.IsN64() ? Mips::JR64 : Mips::JR;
+  unsigned JIC = ABI.IsN64() ? Mips::JIC64 : Mips::JIC;
+  unsigned JR_HB = ABI.IsN64() ? Mips::JR_HB64 : Mips::JR_HB;
+  unsigned JR_HB_R6 = ABI.IsN64() ? Mips::JR_HB64_R6 : Mips::JR_HB_R6;
+
+  unsigned JumpOp;
+  if (STI->useIndirectJumpsHazard())
+    JumpOp = HasR6 ? JR_HB_R6 : JR_HB;
+  else
+    JumpOp = HasR6 ? JIC : JR;
+
+  if (JumpOp == Mips::JIC && STI->inMicroMipsMode())
+    JumpOp = Mips::JIC_MMR6;
+
+  unsigned ATReg = ABI.IsN64() ? Mips::AT_64 : Mips::AT;
+  MachineInstrBuilder Instr =
+      BuildMI(*MBB, Pos, DL, TII->get(JumpOp)).addReg(ATReg);
+  if (AddImm)
+    Instr.addImm(0);
+
+  return !AddImm;
+}
+
 // Expand branch instructions to long branches.
 // TODO: This function has to be fixed for beqz16 and bnez16, because it
 // currently assumes that all branches have 16-bit offsets, and will produce
@@ -479,33 +512,21 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
 
       // In NaCl, modifying the sp is not allowed in branch delay slot.
       // For MIPS32R6, we can skip using a delay slot branch.
-      if (STI->isTargetNaCl() ||
-          (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()))
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+      bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
+
+      if (STI->isTargetNaCl() || !hasDelaySlot) {
+        BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::ADDiu), Mips::SP)
             .addReg(Mips::SP)
             .addImm(8);
-
-      if (STI->hasMips32r6() && !STI->useIndirectJumpsHazard()) {
-        const unsigned JICOp =
-            STI->inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC;
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp))
-            .addReg(Mips::AT)
-            .addImm(0);
-
-      } else {
-        unsigned JROp =
-            STI->useIndirectJumpsHazard()
-                ? (STI->hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
-                : Mips::JR;
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
-
+      }
+      if (hasDelaySlot) {
         if (STI->isTargetNaCl()) {
           BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
-        } else
+        } else {
           BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
               .addReg(Mips::SP)
               .addImm(8);
-
+        }
         BalTgtMBB->rbegin()->bundleWithPred();
       }
     } else {
@@ -597,46 +618,94 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
           .addReg(Mips::SP_64)
           .addImm(0);
 
-      if (STI->hasMips64r6() && !STI->useIndirectJumpsHazard()) {
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+      bool hasDelaySlot = buildProperJumpMI(BalTgtMBB, Pos, DL);
+      // If there is no delay slot, Insert stack adjustment before
+      if (!hasDelaySlot) {
+        BuildMI(*BalTgtMBB, std::prev(Pos), DL, TII->get(Mips::DADDiu),
+                Mips::SP_64)
             .addReg(Mips::SP_64)
             .addImm(16);
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JIC64))
-            .addReg(Mips::AT_64)
-            .addImm(0);
       } else {
-        unsigned JROp =
-            STI->useIndirectJumpsHazard()
-                ? (STI->hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
-                : Mips::JR64;
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
         BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
             .addReg(Mips::SP_64)
             .addImm(16);
         BalTgtMBB->rbegin()->bundleWithPred();
       }
     }
-
-    assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize);
-  } else {
-    // Pre R6:                  R6:
-    // $longbr:                 $longbr:
-    //  j $tgt                   bc $tgt
-    //  nop                     $fallthrough
-    // $fallthrough:
-    //
+  } else { // Not PIC
     Pos = LongBrMBB->begin();
     LongBrMBB->addSuccessor(TgtMBB);
-    if (STI->hasMips32r6())
+
+    // Compute the position of the potentiall jump instruction (basic blocks
+    // before + 4 for the instruction)
+    uint64_t JOffset = computeOffsetFromTheBeginning(MBB->getNumber()) +
+                       MBBInfos[MBB->getNumber()].Size + 4;
+    uint64_t TgtMBBOffset = computeOffsetFromTheBeginning(TgtMBB->getNumber());
+    // If it's a forward jump, then TgtMBBOffset will be shifted by two
+    // instructions
+    if (JOffset < TgtMBBOffset)
+      TgtMBBOffset += 2 * 4;
+    // Compare 4 upper bits to check if it's the same segment
+    bool SameSegmentJump = JOffset >> 28 == TgtMBBOffset >> 28;
+
+    if (STI->hasMips32r6() && TII->isBranchOffsetInRange(Mips::BC, I.Offset)) {
+      // R6:
+      // $longbr:
+      //  bc $tgt
+      // $fallthrough:
+      //
       BuildMI(*LongBrMBB, Pos, DL,
               TII->get(STI->inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC))
           .addMBB(TgtMBB);
-    else
+    } else if (SameSegmentJump) {
+      // Pre R6:
+      // $longbr:
+      //  j $tgt
+      //  nop
+      // $fallthrough:
+      //
       MIBundleBuilder(*LongBrMBB, Pos)
           .append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB))
           .append(BuildMI(*MFp, DL, TII->get(Mips::NOP)));
-
-    assert(LongBrMBB->size() == LongBranchSeqSize);
+    } else {
+      // At this point, offset where we need to branch does not fit into
+      // immediate field of the branch instruction and is not in the same
+      // segment as jump instruction. Therefore we will break it into couple
+      // instructions, where we first load the offset into register, and then we
+      // do branch register.
+      if (ABI.IsN64()) {
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi2Op_64),
+                Mips::AT_64)
+            .addMBB(TgtMBB, MipsII::MO_HIGHEST);
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+                Mips::AT_64)
+            .addReg(Mips::AT_64)
+            .addMBB(TgtMBB, MipsII::MO_HIGHER);
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+            .addReg(Mips::AT_64)
+            .addImm(16);
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+                Mips::AT_64)
+            .addReg(Mips::AT_64)
+            .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
+            .addReg(Mips::AT_64)
+            .addImm(16);
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu2Op),
+                Mips::AT_64)
+            .addReg(Mips::AT_64)
+            .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+      } else {
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi2Op),
+                Mips::AT)
+            .addMBB(TgtMBB, MipsII::MO_ABS_HI);
+        BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_ADDiu2Op),
+                Mips::AT)
+            .addReg(Mips::AT)
+            .addMBB(TgtMBB, MipsII::MO_ABS_LO);
+      }
+      buildProperJumpMI(LongBrMBB, Pos, DL);
+    }
   }
 
   if (I.Br->isUnconditionalBranch()) {
@@ -666,8 +735,6 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
   if (!STI->hasMips32r6() || STI->inMicroMipsMode())
     return false;
 
-  const MipsInstrInfo *TII = STI->getInstrInfo();
-
   bool Changed = false;
 
   for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) {
@@ -704,66 +771,65 @@ bool MipsBranchExpansion::handleForbiddenSlot() {
 }
 
 bool MipsBranchExpansion::handlePossibleLongBranch() {
-
-  LongBranchSeqSize = IsPIC ? ((ABI.IsN64() || STI->isTargetNaCl()) ? 10 : 9)
-                            : (STI->hasMips32r6() ? 1 : 2);
-
   if (STI->inMips16Mode() || !STI->enableLongBranchPass())
     return false;
 
   if (SkipLongBranch)
     return false;
 
-  initMBBInfo();
-
-  SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
   bool EverMadeChange = false, MadeChange = true;
 
   while (MadeChange) {
     MadeChange = false;
 
-    for (I = MBBInfos.begin(); I != E; ++I) {
-      // Skip if this MBB doesn't have a branch or the branch has already been
-      // converted to a long branch.
-      if (!I->Br || I->HasLongBranch)
-        continue;
+    initMBBInfo();
 
-      int64_t Offset = computeOffset(I->Br);
+    for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
+      MachineBasicBlock *MBB = MFp->getBlockNumbered(I);
+      // Search for MBB's branch instruction.
+      ReverseIter End = MBB->rend();
+      ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
 
-      if (STI->isTargetNaCl()) {
-        // The offset calculation does not include sandboxing instructions
-        // that will be added later in the MC layer.  Since at this point we
-        // don't know the exact amount of code that "sandboxing" will add, we
-        // conservatively estimate that code will not grow more than 100%.
-        Offset *= 2;
-      }
+      if ((Br != End) && Br->isBranch() && !Br->isIndirectBranch() &&
+          (Br->isConditionalBranch() ||
+           (Br->isUnconditionalBranch() && IsPIC))) {
+        int64_t Offset = computeOffset(&*Br);
 
-      // Check if offset fits into the immediate field of the branch.
-      if (!ForceLongBranchFirstPass &&
-          TII->isBranchOffsetInRange(I->Br->getOpcode(), Offset))
-        continue;
+        if (STI->isTargetNaCl()) {
+          // The offset calculation does not include sandboxing instructions
+          // that will be added later in the MC layer.  Since at this point we
+          // don't know the exact amount of code that "sandboxing" will add, we
+          // conservatively estimate that code will not grow more than 100%.
+          Offset *= 2;
+        }
 
-      I->HasLongBranch = true;
-      I->Size += LongBranchSeqSize * 4;
-      ++LongBranches;
-      EverMadeChange = MadeChange = true;
-    }
-  }
+        if (ForceLongBranchFirstPass ||
+            !TII->isBranchOffsetInRange(Br->getOpcode(), Offset)) {
+          MBBInfos[I].Offset = Offset;
+          MBBInfos[I].Br = &*Br;
+        }
+      }
+    } // End for
 
-  ForceLongBranchFirstPass = false;
+    ForceLongBranchFirstPass = false;
 
-  if (!EverMadeChange)
-    return false;
+    SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end();
+
+    for (I = MBBInfos.begin(); I != E; ++I) {
+      // Skip if this MBB doesn't have a branch or the branch has already been
+      // converted to a long branch.
+      if (!I->Br)
+        continue;
 
-  // Do the expansion.
-  for (I = MBBInfos.begin(); I != E; ++I)
-    if (I->HasLongBranch) {
       expandToLongBranch(*I);
+      ++LongBranches;
+      EverMadeChange = MadeChange = true;
     }
 
-  MFp->RenumberBlocks();
+    MFp->RenumberBlocks();
+  }
 
-  return true;
+  return EverMadeChange;
 }
 
 bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
index 81a1cced93b7..90cb3f437bd5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -24,10 +24,10 @@ static bool isF128SoftLibCall(const char *CallSym) {
       "__lttf2",       "__multf3",     "__netf2",       "__powitf2",
       "__subtf3",      "__trunctfdf2", "__trunctfsf2",  "__unordtf2",
       "ceill",         "copysignl",    "cosl",          "exp2l",
-      "expl",          "floorl",       "fmal",          "fmodl",
-      "log10l",        "log2l",        "logl",          "nearbyintl",
-      "powl",          "rintl",        "roundl",        "sinl",
-      "sqrtl",         "truncl"};
+      "expl",          "floorl",       "fmal",          "fmaxl",
+      "fmodl",         "log10l",       "log2l",         "logl",
+      "nearbyintl",    "powl",         "rintl",         "roundl",
+      "sinl",          "sqrtl",        "truncl"};
 
   // Check that LibCalls is sorted alphabetically.
   auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
index a705ebb6b193..c550fadf6632 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -16,6 +16,7 @@
 #include "MipsCallLowering.h"
 #include "MipsCCState.h"
 #include "MipsTargetMachine.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 
 using namespace llvm;
@@ -23,48 +24,89 @@ using namespace llvm;
 MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
     : CallLowering(&TLI) {}
 
-bool MipsCallLowering::MipsHandler::assign(const CCValAssign &VA,
-                                           unsigned vreg) {
+bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
+                                           const CCValAssign &VA) {
   if (VA.isRegLoc()) {
-    assignValueToReg(vreg, VA.getLocReg());
+    assignValueToReg(VReg, VA);
   } else if (VA.isMemLoc()) {
-    unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
-    unsigned Offset = VA.getLocMemOffset();
-    MachinePointerInfo MPO;
-    unsigned StackAddr = getStackAddress(Size, Offset, MPO);
-    assignValueToAddress(vreg, StackAddr, Size, MPO);
+    assignValueToAddress(VReg, VA);
   } else {
     return false;
   }
   return true;
 }
 
+bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<unsigned> VRegs,
+                                                ArrayRef<CCValAssign> ArgLocs,
+                                                unsigned ArgLocsStartIndex) {
+  for (unsigned i = 0; i < VRegs.size(); ++i)
+    if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i]))
+      return false;
+  return true;
+}
+
+void MipsCallLowering::MipsHandler::setLeastSignificantFirst(
+    SmallVectorImpl<unsigned> &VRegs) {
+  if (!MIRBuilder.getMF().getDataLayout().isLittleEndian())
+    std::reverse(VRegs.begin(), VRegs.end());
+}
+
+bool MipsCallLowering::MipsHandler::handle(
+    ArrayRef<CCValAssign> ArgLocs, ArrayRef<CallLowering::ArgInfo> Args) {
+  SmallVector<unsigned, 4> VRegs;
+  unsigned SplitLength;
+  const Function &F = MIRBuilder.getMF().getFunction();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  const MipsTargetLowering &TLI = *static_cast<const MipsTargetLowering *>(
+      MIRBuilder.getMF().getSubtarget().getTargetLowering());
+
+  for (unsigned ArgsIndex = 0, ArgLocsIndex = 0; ArgsIndex < Args.size();
+       ++ArgsIndex, ArgLocsIndex += SplitLength) {
+    EVT VT = TLI.getValueType(DL, Args[ArgsIndex].Ty);
+    SplitLength = TLI.getNumRegistersForCallingConv(F.getContext(),
+                                                    F.getCallingConv(), VT);
+    if (SplitLength > 1) {
+      VRegs.clear();
+      MVT RegisterVT = TLI.getRegisterTypeForCallingConv(
+          F.getContext(), F.getCallingConv(), VT);
+      for (unsigned i = 0; i < SplitLength; ++i)
+        VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT}));
+
+      if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg))
+        return false;
+    } else {
+      if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex]))
+        return false;
+    }
+  }
+  return true;
+}
+
 namespace {
 class IncomingValueHandler : public MipsCallLowering::MipsHandler {
 public:
   IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
       : MipsHandler(MIRBuilder, MRI) {}
 
-  bool handle(ArrayRef<CCValAssign> ArgLocs,
-              ArrayRef<CallLowering::ArgInfo> Args);
-
 private:
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg) override;
+  void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+
+  unsigned getStackAddress(const CCValAssign &VA,
+                           MachineMemOperand *&MMO) override;
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
-                           MachinePointerInfo &MPO) override;
+  void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
-                            MachinePointerInfo &MPO) override;
+  bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+                   ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
+                   unsigned ArgsReg) override;
 
   virtual void markPhysRegUsed(unsigned PhysReg) {
     MIRBuilder.getMBB().addLiveIn(PhysReg);
   }
 
-  void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
-                 MachinePointerInfo &MPO) {
-    MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand(
-        MPO, MachineMemOperand::MOLoad, Size, Alignment);
+  void buildLoad(unsigned Val, const CCValAssign &VA) {
+    MachineMemOperand *MMO;
+    unsigned Addr = getStackAddress(VA, MMO);
     MIRBuilder.buildLoad(Val, Addr, *MMO);
   }
 };
@@ -86,17 +128,34 @@ private:
 } // end anonymous namespace
 
 void IncomingValueHandler::assignValueToReg(unsigned ValVReg,
-                                            unsigned PhysReg) {
-  MIRBuilder.buildCopy(ValVReg, PhysReg);
+                                            const CCValAssign &VA) {
+  unsigned PhysReg = VA.getLocReg();
+  switch (VA.getLocInfo()) {
+  case CCValAssign::LocInfo::SExt:
+  case CCValAssign::LocInfo::ZExt:
+  case CCValAssign::LocInfo::AExt: {
+    auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+    MIRBuilder.buildTrunc(ValVReg, Copy);
+    break;
+  }
+  default:
+    MIRBuilder.buildCopy(ValVReg, PhysReg);
+    break;
+  }
   markPhysRegUsed(PhysReg);
 }
 
-unsigned IncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
-                                               MachinePointerInfo &MPO) {
+unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA,
+                                               MachineMemOperand *&MMO) {
+  unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+  unsigned Offset = VA.getLocMemOffset();
   MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
 
   int FI = MFI.CreateFixedObject(Size, Offset, true);
-  MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+  MachinePointerInfo MPO =
+      MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+  MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOLoad,
+                                                Size, /* Alignment */ 0);
 
   unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
   MIRBuilder.buildFrameIndex(AddrReg, FI);
@@ -104,19 +163,26 @@ unsigned IncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
   return AddrReg;
 }
 
-void IncomingValueHandler::assignValueToAddress(unsigned ValVReg, unsigned Addr,
-                                                uint64_t Size,
-                                                MachinePointerInfo &MPO) {
-  // If the value is not extended, a simple load will suffice.
-  buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+void IncomingValueHandler::assignValueToAddress(unsigned ValVReg,
+                                                const CCValAssign &VA) {
+  if (VA.getLocInfo() == CCValAssign::SExt ||
+      VA.getLocInfo() == CCValAssign::ZExt ||
+      VA.getLocInfo() == CCValAssign::AExt) {
+    unsigned LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+    buildLoad(LoadReg, VA);
+    MIRBuilder.buildTrunc(ValVReg, LoadReg);
+  } else
+    buildLoad(ValVReg, VA);
 }
 
-bool IncomingValueHandler::handle(ArrayRef<CCValAssign> ArgLocs,
-                                  ArrayRef<CallLowering::ArgInfo> Args) {
-  for (unsigned i = 0, ArgsSize = Args.size(); i < ArgsSize; ++i) {
-    if (!assign(ArgLocs[i], Args[i].Reg))
-      return false;
-  }
+bool IncomingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+                                       ArrayRef<CCValAssign> ArgLocs,
+                                       unsigned ArgLocsStartIndex,
+                                       unsigned ArgsReg) {
+  if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+    return false;
+  setLeastSignificantFirst(VRegs);
+  MIRBuilder.buildMerge(ArgsReg, VRegs);
   return true;
 }
 
@@ -127,103 +193,179 @@ public:
                        MachineInstrBuilder &MIB)
       : MipsHandler(MIRBuilder, MRI), MIB(MIB) {}
 
-  bool handle(ArrayRef<CCValAssign> ArgLocs,
-              ArrayRef<CallLowering::ArgInfo> Args);
-
 private:
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg) override;
+  void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+
+  unsigned getStackAddress(const CCValAssign &VA,
+                           MachineMemOperand *&MMO) override;
+
+  void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
-                           MachinePointerInfo &MPO) override;
+  bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+                   ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
+                   unsigned ArgsReg) override;
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
-                            MachinePointerInfo &MPO) override;
+  unsigned extendRegister(unsigned ValReg, const CCValAssign &VA);
 
   MachineInstrBuilder &MIB;
 };
 } // end anonymous namespace
 
 void OutgoingValueHandler::assignValueToReg(unsigned ValVReg,
-                                            unsigned PhysReg) {
-  MIRBuilder.buildCopy(PhysReg, ValVReg);
+                                            const CCValAssign &VA) {
+  unsigned PhysReg = VA.getLocReg();
+  unsigned ExtReg = extendRegister(ValVReg, VA);
+  MIRBuilder.buildCopy(PhysReg, ExtReg);
   MIB.addUse(PhysReg, RegState::Implicit);
 }
 
-unsigned OutgoingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,
-                                               MachinePointerInfo &MPO) {
+unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
+                                               MachineMemOperand *&MMO) {
   LLT p0 = LLT::pointer(0, 32);
   LLT s32 = LLT::scalar(32);
   unsigned SPReg = MRI.createGenericVirtualRegister(p0);
   MIRBuilder.buildCopy(SPReg, Mips::SP);
 
   unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+  unsigned Offset = VA.getLocMemOffset();
   MIRBuilder.buildConstant(OffsetReg, Offset);
 
   unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
   MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
 
-  MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+  MachinePointerInfo MPO =
+      MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+  unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+  MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOStore,
+                                                Size, /* Alignment */ 0);
+
   return AddrReg;
 }
 
-void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg, unsigned Addr,
-                                                uint64_t Size,
-                                                MachinePointerInfo &MPO) {
-  MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand(
-      MPO, MachineMemOperand::MOStore, Size, /* Alignment */ 0);
-  MIRBuilder.buildStore(ValVReg, Addr, *MMO);
+void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg,
+                                                const CCValAssign &VA) {
+  MachineMemOperand *MMO;
+  unsigned Addr = getStackAddress(VA, MMO);
+  unsigned ExtReg = extendRegister(ValVReg, VA);
+  MIRBuilder.buildStore(ExtReg, Addr, *MMO);
 }
 
-bool OutgoingValueHandler::handle(ArrayRef<CCValAssign> ArgLocs,
-                                  ArrayRef<CallLowering::ArgInfo> Args) {
-  for (unsigned i = 0; i < Args.size(); ++i) {
-    if (!assign(ArgLocs[i], Args[i].Reg))
-      return false;
+unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
+                                              const CCValAssign &VA) {
+  LLT LocTy{VA.getLocVT()};
+  switch (VA.getLocInfo()) {
+  case CCValAssign::SExt: {
+    unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildSExt(ExtReg, ValReg);
+    return ExtReg;
+  }
+  case CCValAssign::ZExt: {
+    unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildZExt(ExtReg, ValReg);
+    return ExtReg;
   }
+  case CCValAssign::AExt: {
+    unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildAnyExt(ExtReg, ValReg);
+    return ExtReg;
+  }
+  // TODO : handle upper extends
+  case CCValAssign::Full:
+    return ValReg;
+  default:
+    break;
+  }
+  llvm_unreachable("unable to extend register");
+}
+
+bool OutgoingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+                                       ArrayRef<CCValAssign> ArgLocs,
+                                       unsigned ArgLocsStartIndex,
+                                       unsigned ArgsReg) {
+  MIRBuilder.buildUnmerge(VRegs, ArgsReg);
+  setLeastSignificantFirst(VRegs);
+  if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+    return false;
+
   return true;
 }
 
 static bool isSupportedType(Type *T) {
-  if (T->isIntegerTy() && T->getScalarSizeInBits() == 32)
+  if (T->isIntegerTy())
     return true;
   if (T->isPointerTy())
     return true;
   return false;
 }
 
+static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT,
+                                             const ISD::ArgFlagsTy &Flags) {
+  // > does not mean loss of information as type RegisterVT can't hold type VT,
+  // it means that type VT is split into multiple registers of type RegisterVT
+  if (VT.getSizeInBits() >= RegisterVT.getSizeInBits())
+    return CCValAssign::LocInfo::Full;
+  if (Flags.isSExt())
+    return CCValAssign::LocInfo::SExt;
+  if (Flags.isZExt())
+    return CCValAssign::LocInfo::ZExt;
+  return CCValAssign::LocInfo::AExt;
+}
+
+template <typename T>
+static void setLocInfo(SmallVectorImpl<CCValAssign> &ArgLocs,
+                       const SmallVectorImpl<T> &Arguments) {
+  for (unsigned i = 0; i < ArgLocs.size(); ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    CCValAssign::LocInfo LocInfo = determineLocInfo(
+        Arguments[i].VT, Arguments[i].ArgVT, Arguments[i].Flags);
+    if (VA.isMemLoc())
+      ArgLocs[i] =
+          CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+                              VA.getLocMemOffset(), VA.getLocVT(), LocInfo);
+    else
+      ArgLocs[i] = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+                                       VA.getLocReg(), VA.getLocVT(), LocInfo);
+  }
+}
+
 bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                   const Value *Val, unsigned VReg) const {
+                                   const Value *Val,
+                                   ArrayRef<unsigned> VRegs) const {
 
   MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
 
-  if (Val != nullptr) {
-    if (!isSupportedType(Val->getType()))
-      return false;
+  if (Val != nullptr && !isSupportedType(Val->getType()))
+    return false;
 
+  if (!VRegs.empty()) {
     MachineFunction &MF = MIRBuilder.getMF();
     const Function &F = MF.getFunction();
     const DataLayout &DL = MF.getDataLayout();
     const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
+    LLVMContext &Ctx = Val->getType()->getContext();
+
+    SmallVector<EVT, 4> SplitEVTs;
+    ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+    assert(VRegs.size() == SplitEVTs.size() &&
+           "For each split Type there should be exactly one VReg.");
 
     SmallVector<ArgInfo, 8> RetInfos;
     SmallVector<unsigned, 8> OrigArgIndices;
 
-    ArgInfo ArgRetInfo(VReg, Val->getType());
-    setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F);
-    splitToValueTypes(ArgRetInfo, 0, RetInfos, OrigArgIndices);
+    for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+      ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+      setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+      splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices);
+    }
 
     SmallVector<ISD::OutputArg, 8> Outs;
-    subTargetRegTypeForCallingConv(
-        MIRBuilder, RetInfos, OrigArgIndices,
-        [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
-            unsigned origIdx, unsigned partOffs) {
-          Outs.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
-        });
+    subTargetRegTypeForCallingConv(F, RetInfos, OrigArgIndices, Outs);
 
     SmallVector<CCValAssign, 16> ArgLocs;
     MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
                        F.getContext());
     CCInfo.AnalyzeReturn(Outs, TLI.CCAssignFnForReturn());
+    setLocInfo(ArgLocs, Outs);
 
     OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
     if (!RetHandler.handle(ArgLocs, RetInfos)) {
@@ -266,12 +408,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   }
 
   SmallVector<ISD::InputArg, 8> Ins;
-  subTargetRegTypeForCallingConv(
-      MIRBuilder, ArgInfos, OrigArgIndices,
-      [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used, unsigned origIdx,
-          unsigned partOffs) {
-        Ins.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
-      });
+  subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Ins);
 
   SmallVector<CCValAssign, 16> ArgLocs;
   MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
@@ -283,6 +420,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(F.getCallingConv()),
                        1);
   CCInfo.AnalyzeFormalArguments(Ins, TLI.CCAssignFnForCall());
+  setLocInfo(ArgLocs, Ins);
 
   IncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
   if (!Handler.handle(ArgLocs, ArgInfos))
@@ -347,12 +485,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   }
 
   SmallVector<ISD::OutputArg, 8> Outs;
-  subTargetRegTypeForCallingConv(
-      MIRBuilder, ArgInfos, OrigArgIndices,
-      [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used, unsigned origIdx,
-          unsigned partOffs) {
-        Outs.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
-      });
+  subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Outs);
 
   SmallVector<CCValAssign, 8> ArgLocs;
   MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
@@ -361,6 +494,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
   const char *Call = Callee.isSymbol() ? Callee.getSymbolName() : nullptr;
   CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call);
+  setLocInfo(ArgLocs, Outs);
 
   OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), MIB);
   if (!RetHandler.handle(ArgLocs, ArgInfos)) {
@@ -383,18 +517,14 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     splitToValueTypes(OrigRet, 0, ArgInfos, OrigRetIndices);
 
     SmallVector<ISD::InputArg, 8> Ins;
-    subTargetRegTypeForCallingConv(
-        MIRBuilder, ArgInfos, OrigRetIndices,
-        [&](ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
-            unsigned origIdx, unsigned partOffs) {
-          Ins.emplace_back(flags, vt, argvt, used, origIdx, partOffs);
-        });
+    subTargetRegTypeForCallingConv(F, ArgInfos, OrigRetIndices, Ins);
 
     SmallVector<CCValAssign, 8> ArgLocs;
     MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs,
                        F.getContext());
 
     CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), OrigRet.Ty, Call);
+    setLocInfo(ArgLocs, Ins);
 
     CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB);
     if (!Handler.handle(ArgLocs, ArgInfos))
@@ -406,11 +536,10 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   return true;
 }
 
+template <typename T>
 void MipsCallLowering::subTargetRegTypeForCallingConv(
-    MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
-    ArrayRef<unsigned> OrigArgIndices, const FunTy &PushBack) const {
-  MachineFunction &MF = MIRBuilder.getMF();
-  const Function &F = MF.getFunction();
+    const Function &F, ArrayRef<ArgInfo> Args,
+    ArrayRef<unsigned> OrigArgIndices, SmallVectorImpl<T> &ISDArgs) const {
   const DataLayout &DL = F.getParent()->getDataLayout();
   const MipsTargetLowering &TLI = *getTLI<MipsTargetLowering>();
 
@@ -420,12 +549,20 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
     EVT VT = TLI.getValueType(DL, Arg.Ty);
     MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(),
                                                        F.getCallingConv(), VT);
+    unsigned NumRegs = TLI.getNumRegistersForCallingConv(
+        F.getContext(), F.getCallingConv(), VT);
 
-    ISD::ArgFlagsTy Flags = Arg.Flags;
-    Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
+    for (unsigned i = 0; i < NumRegs; ++i) {
+      ISD::ArgFlagsTy Flags = Arg.Flags;
 
-    PushBack(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo], 0);
+      if (i == 0)
+        Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
+      else
+        Flags.setOrigAlign(1);
 
+      ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo],
+                           0);
+    }
     ++ArgNo;
   }
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.h b/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
index e23c10cec563..9916b04ef50c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.h
@@ -31,27 +31,38 @@ public:
 
     virtual ~MipsHandler() = default;
 
+    bool handle(ArrayRef<CCValAssign> ArgLocs,
+                ArrayRef<CallLowering::ArgInfo> Args);
+
   protected:
-    bool assign(const CCValAssign &VA, unsigned vreg);
+    bool assignVRegs(ArrayRef<unsigned> VRegs, ArrayRef<CCValAssign> ArgLocs,
+                     unsigned Index);
+
+    void setLeastSignificantFirst(SmallVectorImpl<unsigned> &VRegs);
 
     MachineIRBuilder &MIRBuilder;
     MachineRegisterInfo &MRI;
 
   private:
-    virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
-                                     MachinePointerInfo &MPO) = 0;
+    bool assign(unsigned VReg, const CCValAssign &VA);
+
+    virtual unsigned getStackAddress(const CCValAssign &VA,
+                                     MachineMemOperand *&MMO) = 0;
 
-    virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg) = 0;
+    virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0;
 
-    virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
-                                      uint64_t Size,
-                                      MachinePointerInfo &MPO) = 0;
+    virtual void assignValueToAddress(unsigned ValVReg,
+                                      const CCValAssign &VA) = 0;
+
+    virtual bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+                             ArrayRef<CCValAssign> ArgLocs,
+                             unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0;
   };
 
   MipsCallLowering(const MipsTargetLowering &TLI);
 
-  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
-                   unsigned VReg) const override;
+  bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                   ArrayRef<unsigned> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
                             ArrayRef<unsigned> VRegs) const override;
@@ -61,21 +72,16 @@ public:
                  ArrayRef<ArgInfo> OrigArgs) const override;
 
 private:
-  using FunTy =
-      std::function<void(ISD::ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
-                         unsigned origIdx, unsigned partOffs)>;
-
   /// Based on registers available on target machine split or extend
   /// type if needed, also change pointer type to appropriate integer
-  /// type. Lambda will fill some info so we can tell MipsCCState to
-  /// assign physical registers.
-  void subTargetRegTypeForCallingConv(MachineIRBuilder &MIRBuilder,
-                                      ArrayRef<ArgInfo> Args,
+  /// type.
+  template <typename T>
+  void subTargetRegTypeForCallingConv(const Function &F, ArrayRef<ArgInfo> Args,
                                       ArrayRef<unsigned> OrigArgIndices,
-                                      const FunTy &PushBack) const;
+                                      SmallVectorImpl<T> &ISDArgs) const;
 
   /// Split structures and arrays, save original argument indices since
-  /// Mips calling conv needs info about original argument type.
+  /// Mips calling convention needs info about original argument type.
   void splitToValueTypes(const ArgInfo &OrigArg, unsigned OriginalIndex,
                          SmallVectorImpl<ArgInfo> &SplitArgs,
                          SmallVectorImpl<unsigned> &SplitArgsOrigIndices) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCondMov.td b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
index 39dc2654aa6a..0d7e3e200b5f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCondMov.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCondMov.td
@@ -296,3 +296,13 @@ def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F<GPR64Opnd>;
 def PseudoSELECTFP_F_S : SelectFP_Pseudo_F<FGR32Opnd>;
 def PseudoSELECTFP_F_D32 : SelectFP_Pseudo_F<AFGR64Opnd>, FGR_32;
 def PseudoSELECTFP_F_D64 : SelectFP_Pseudo_F<FGR64Opnd>, FGR_64;
+
+let usesCustomInserter = 1 in {
+class D_SELECT_CLASS<RegisterOperand RC> :
+  PseudoSE<(outs RC:$dst1, RC:$dst2),
+           (ins GPR32Opnd:$cond, RC:$a1, RC:$a2, RC:$b1, RC:$b2), []>,
+  ISA_MIPS1_NOT_4_32;
+}
+
+def PseudoD_SELECT_I   : D_SELECT_CLASS<GPR32Opnd>;
+def PseudoD_SELECT_I64 : D_SELECT_CLASS<GPR64Opnd>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 33f03b954a8c..e3823e0dfdb8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -728,9 +728,10 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
          Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
       continue;
-     // Instructions LWP/SWP should not be in a delay slot as that
+     // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
      // results in unpredictable behaviour
-     if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM))
+     if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM ||
+                             Opcode == Mips::MOVEP_MM))
        continue;
 
     Filler = CurrI;
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0677d378a115..8c2a364cdfa9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1396,6 +1396,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case Mips::PseudoSELECTFP_T_D32:
   case Mips::PseudoSELECTFP_T_D64:
     return emitPseudoSELECT(MI, BB, true, Mips::BC1T);
+  case Mips::PseudoD_SELECT_I:
+  case Mips::PseudoD_SELECT_I64:
+    return emitPseudoD_SELECT(MI, BB);
   }
 }
 
@@ -2427,6 +2430,16 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
                              DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32));
   SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
                             DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
+
+  if (!(Subtarget.hasMips4() || Subtarget.hasMips32())) {
+    SDVTList VTList = DAG.getVTList(VT, VT);
+    return DAG.getNode(Subtarget.isGP64bit() ? Mips::PseudoD_SELECT_I64
+                                             : Mips::PseudoD_SELECT_I,
+                       DL, VTList, Cond, ShiftRightHi,
+                       IsSRA ? Ext : DAG.getConstant(0, DL, VT), Or,
+                       ShiftRightHi);
+  }
+
   Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
   Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
                    IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi);
@@ -2563,10 +2576,12 @@ static SDValue lowerUnalignedIntStore(StoreSDNode *SD, SelectionDAG &DAG,
 }
 
 // Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr).
-static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG) {
+static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG,
+                                     bool SingleFloat) {
   SDValue Val = SD->getValue();
 
-  if (Val.getOpcode() != ISD::FP_TO_SINT)
+  if (Val.getOpcode() != ISD::FP_TO_SINT ||
+      (Val.getValueSizeInBits() > 32 && SingleFloat))
     return SDValue();
 
   EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits());
@@ -2587,7 +2602,7 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       ((MemVT == MVT::i32) || (MemVT == MVT::i64)))
     return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle());
 
-  return lowerFP_TO_SINT_STORE(SD, DAG);
+  return lowerFP_TO_SINT_STORE(SD, DAG, Subtarget.isSingleFloat());
 }
 
 SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
@@ -2603,6 +2618,9 @@ SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
 
 SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
                                             SelectionDAG &DAG) const {
+  if (Op.getValueSizeInBits() > 32 && Subtarget.isSingleFloat())
+    return SDValue();
+
   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
   SDValue Trunc = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Op), FPTy,
                               Op.getOperand(0));
@@ -4340,6 +4358,81 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoSELECT(MachineInstr &MI,
   return BB;
 }
 
+MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
+                                                          MachineBasicBlock *BB) const {
+  assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) &&
+         "Subtarget already supports SELECT nodes with the use of"
+         "conditional-move instructions.");
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // D_SELECT substitutes two SELECT nodes that goes one after another and
+  // have the same condition operand. On machines which don't have
+  // conditional-move instruction, it reduces unnecessary branch instructions
+  // which are result of using two diamond patterns that are result of two
+  // SELECT pseudo instructions.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = ++BB->getIterator();
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  // bne rs, $0, sinkMBB
+  BuildMI(BB, DL, TII->get(Mips::BNE))
+      .addReg(MI.getOperand(2).getReg())
+      .addReg(Mips::ZERO)
+      .addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+  //  ...
+  BB = sinkMBB;
+
+  // Use two PHI nodes to select two reults
+  BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(0).getReg())
+      .addReg(MI.getOperand(3).getReg())
+      .addMBB(thisMBB)
+      .addReg(MI.getOperand(5).getReg())
+      .addMBB(copy0MBB);
+  BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(1).getReg())
+      .addReg(MI.getOperand(4).getReg())
+      .addMBB(thisMBB)
+      .addReg(MI.getOperand(6).getReg())
+      .addMBB(copy0MBB);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+
+  return BB;
+}
+
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
 unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index 5a0de45c44f3..e043f133a09f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -699,6 +699,8 @@ class TargetRegisterClass;
     MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const;
     MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB,
                                         bool isFPCmp, unsigned Opc) const;
+    MachineBasicBlock *emitPseudoD_SELECT(MachineInstr &MI,
+                                          MachineBasicBlock *BB) const;
   };
 
   /// Create MipsTargetLowering objects.
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index e986942ad8fa..4cb8574e08f6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -130,6 +130,15 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
   HARDFLOAT,
   NeverHasSideEffects;
 
+class CVT_PS_S_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin, bit IsComm,
+              SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs DstRC:$fd), (ins SrcRC:$fs, SrcRC:$ft),
+         !strconcat(opstr, "\t$fd, $fs, $ft"),
+         [(set DstRC:$fd, (OpNode SrcRC:$fs, SrcRC:$ft))], Itin, FrmFR, opstr>,
+  HARDFLOAT {
+  let isCommutable = IsComm;
+}
+
 multiclass ABSS_M<string opstr, InstrItinClass Itin,
                   SDPatternOperator OpNode= null_frag> {
   def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
@@ -432,6 +441,29 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>,
                   ABSS_FM<0x21, 20>, ISA_MIPS1, FGR_32;
 }
+
+let DecoderNamespace = "MipsFP64" in {
+  let AdditionalPredicates = [NotInMicroMips] in {
+    def PLL_PS64    : ADDS_FT<"pll.ps", FGR64Opnd, II_CVT, 0>,
+                      ADDS_FM<0x2C, 22>,
+                      ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+    def PLU_PS64    : ADDS_FT<"plu.ps", FGR64Opnd, II_CVT, 0>,
+                      ADDS_FM<0x2D, 22>,
+                      ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+
+    def CVT_S_PU64  : ABSS_FT<"cvt.s.pu", FGR32Opnd, FGR64Opnd, II_CVT>,
+                      ABSS_FM<0x20, 22>,
+                      ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+    def CVT_S_PL64  : ABSS_FT<"cvt.s.pl", FGR32Opnd, FGR64Opnd, II_CVT>,
+                      ABSS_FM<0x28, 22>,
+                      ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+
+    def CVT_PS_S64  : CVT_PS_S_FT<"cvt.ps.s", FGR64Opnd, FGR32Opnd, II_CVT, 0>,
+                      ADDS_FM<0x26, 16>,
+                      ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+  }
+}
+
 let DecoderNamespace = "MipsFP64" in {
   let AdditionalPredicates = [NotInMicroMips] in {
     def CVT_S_L   : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 0e0e712dba19..bfb4c775205d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -280,6 +280,8 @@ bool MipsInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset)
   switch (BranchOpc) {
   case Mips::B:
   case Mips::BAL:
+  case Mips::BAL_BR:
+  case Mips::BAL_BR_MM:
   case Mips::BC1F:
   case Mips::BC1FL:
   case Mips::BC1T:
@@ -661,8 +663,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
   }
 
   MIB.copyImplicitOps(*I);
-
-  MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
+  MIB.cloneMemRefs(*I);
   return MIB;
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 0faa13d4d63f..d9398b7d6024 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -2002,13 +2002,19 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
 // branches.  See the comment in file MipsLongBranch.cpp for detailed
 // explanation.
 
-// Expands to: lui $dst, %hi($tgt - $baltgt)
+// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt - $baltgt)
 def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst),
   (ins brtarget:$tgt, brtarget:$baltgt), []>;
+// Expands to: lui $dst, highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_LUi2Op : PseudoSE<(outs GPR32Opnd:$dst),
+  (ins brtarget:$tgt), []>;
 
-// Expands to: addiu $dst, $src, %lo($tgt - $baltgt)
+// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt - $baltgt)
 def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
   (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt)
+def LONG_BRANCH_ADDiu2Op : PseudoSE<(outs GPR32Opnd:$dst),
+  (ins GPR32Opnd:$src, brtarget:$tgt), []>;
 
 //===----------------------------------------------------------------------===//
 // Instruction definition
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 6c5b83021f74..b041590ee343 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -15,6 +15,7 @@
 #include "MipsRegisterBankInfo.h"
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 
 #define DEBUG_TYPE "mips-isel"
 
@@ -144,6 +145,42 @@ bool MipsInstructionSelector::select(MachineInstr &I,
              .addMemOperand(*I.memoperands_begin());
     break;
   }
+  case G_UDIV:
+  case G_UREM:
+  case G_SDIV:
+  case G_SREM: {
+    unsigned HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+    bool IsSigned = I.getOpcode() == G_SREM || I.getOpcode() == G_SDIV;
+    bool IsDiv = I.getOpcode() == G_UDIV || I.getOpcode() == G_SDIV;
+
+    MachineInstr *PseudoDIV, *PseudoMove;
+    PseudoDIV = BuildMI(MBB, I, I.getDebugLoc(),
+                        TII.get(IsSigned ? Mips::PseudoSDIV : Mips::PseudoUDIV))
+                    .addDef(HILOReg)
+                    .add(I.getOperand(1))
+                    .add(I.getOperand(2));
+    if (!constrainSelectedInstRegOperands(*PseudoDIV, TII, TRI, RBI))
+      return false;
+
+    PseudoMove = BuildMI(MBB, I, I.getDebugLoc(),
+                         TII.get(IsDiv ? Mips::PseudoMFLO : Mips::PseudoMFHI))
+                     .addDef(I.getOperand(0).getReg())
+                     .addUse(HILOReg);
+    if (!constrainSelectedInstRegOperands(*PseudoMove, TII, TRI, RBI))
+      return false;
+
+    I.eraseFromParent();
+    return true;
+  }
+  case G_SELECT: {
+    // Handle operands with pointer type.
+    MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MOVN_I_I))
+             .add(I.getOperand(0))
+             .add(I.getOperand(2))
+             .add(I.getOperand(1))
+             .add(I.getOperand(3));
+    break;
+  }
   case G_CONSTANT: {
     int Imm = I.getOperand(1).getCImm()->getValue().getLimitedValue();
     unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
@@ -193,7 +230,85 @@ bool MipsInstructionSelector::select(MachineInstr &I,
     I.eraseFromParent();
     return true;
   }
+  case G_ICMP: {
+    struct Instr {
+      unsigned Opcode, Def, LHS, RHS;
+      Instr(unsigned Opcode, unsigned Def, unsigned LHS, unsigned RHS)
+          : Opcode(Opcode), Def(Def), LHS(LHS), RHS(RHS){};
+
+      bool hasImm() const {
+        if (Opcode == Mips::SLTiu || Opcode == Mips::XORi)
+          return true;
+        return false;
+      }
+    };
+
+    SmallVector<struct Instr, 2> Instructions;
+    unsigned ICMPReg = I.getOperand(0).getReg();
+    unsigned Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    unsigned LHS = I.getOperand(2).getReg();
+    unsigned RHS = I.getOperand(3).getReg();
+    CmpInst::Predicate Cond =
+        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+
+    switch (Cond) {
+    case CmpInst::ICMP_EQ: // LHS == RHS -> (LHS ^ RHS) < 1
+      Instructions.emplace_back(Mips::XOR, Temp, LHS, RHS);
+      Instructions.emplace_back(Mips::SLTiu, ICMPReg, Temp, 1);
+      break;
+    case CmpInst::ICMP_NE: // LHS != RHS -> 0 < (LHS ^ RHS)
+      Instructions.emplace_back(Mips::XOR, Temp, LHS, RHS);
+      Instructions.emplace_back(Mips::SLTu, ICMPReg, Mips::ZERO, Temp);
+      break;
+    case CmpInst::ICMP_UGT: // LHS >  RHS -> RHS < LHS
+      Instructions.emplace_back(Mips::SLTu, ICMPReg, RHS, LHS);
+      break;
+    case CmpInst::ICMP_UGE: // LHS >= RHS -> !(LHS < RHS)
+      Instructions.emplace_back(Mips::SLTu, Temp, LHS, RHS);
+      Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+      break;
+    case CmpInst::ICMP_ULT: // LHS <  RHS -> LHS < RHS
+      Instructions.emplace_back(Mips::SLTu, ICMPReg, LHS, RHS);
+      break;
+    case CmpInst::ICMP_ULE: // LHS <= RHS -> !(RHS < LHS)
+      Instructions.emplace_back(Mips::SLTu, Temp, RHS, LHS);
+      Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+      break;
+    case CmpInst::ICMP_SGT: // LHS >  RHS -> RHS < LHS
+      Instructions.emplace_back(Mips::SLT, ICMPReg, RHS, LHS);
+      break;
+    case CmpInst::ICMP_SGE: // LHS >= RHS -> !(LHS < RHS)
+      Instructions.emplace_back(Mips::SLT, Temp, LHS, RHS);
+      Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+      break;
+    case CmpInst::ICMP_SLT: // LHS <  RHS -> LHS < RHS
+      Instructions.emplace_back(Mips::SLT, ICMPReg, LHS, RHS);
+      break;
+    case CmpInst::ICMP_SLE: // LHS <= RHS -> !(RHS < LHS)
+      Instructions.emplace_back(Mips::SLT, Temp, RHS, LHS);
+      Instructions.emplace_back(Mips::XORi, ICMPReg, Temp, 1);
+      break;
+    default:
+      return false;
+    }
+
+    MachineIRBuilder B(I);
+    for (const struct Instr &Instruction : Instructions) {
+      MachineInstrBuilder MIB = B.buildInstr(
+          Instruction.Opcode, {Instruction.Def}, {Instruction.LHS});
+
+      if (Instruction.hasImm())
+        MIB.addImm(Instruction.RHS);
+      else
+        MIB.addUse(Instruction.RHS);
 
+      if (!MIB.constrainAllUses(TII, TRI, RBI))
+        return false;
+    }
+
+    I.eraseFromParent();
+    return true;
+  }
   default:
     return false;
   }
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index fb259516be09..c629f02af00e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -13,23 +13,53 @@
 
 #include "MipsLegalizerInfo.h"
 #include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 
 using namespace llvm;
 
 MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
   using namespace TargetOpcode;
 
+  const LLT s1 = LLT::scalar(1);
   const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
   const LLT p0 = LLT::pointer(0, 32);
 
-  getActionDefinitionsBuilder(G_ADD).legalFor({s32});
+  getActionDefinitionsBuilder(G_ADD)
+      .legalFor({s32})
+      .clampScalar(0, s32, s32);
+
+  getActionDefinitionsBuilder(G_UADDE)
+      .lowerFor({{s32, s1}});
 
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
       .legalForCartesianProduct({p0, s32}, {p0});
 
-  getActionDefinitionsBuilder(G_CONSTANT)
+  getActionDefinitionsBuilder(G_SELECT)
+      .legalForCartesianProduct({p0, s32}, {s32})
+      .minScalar(0, s32)
+      .minScalar(1, s32);
+
+  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+      .legalFor({s32})
+      .clampScalar(0, s32, s32);
+
+  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
       .legalFor({s32});
 
+  getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UREM, G_UDIV})
+      .legalFor({s32})
+      .minScalar(0, s32)
+      .libcallFor({s64});
+
+  getActionDefinitionsBuilder(G_ICMP)
+      .legalFor({{s32, s32}})
+      .minScalar(0, s32);
+
+  getActionDefinitionsBuilder(G_CONSTANT)
+      .legalFor({s32})
+      .clampScalar(0, s32, s32);
+
   getActionDefinitionsBuilder(G_GEP)
       .legalFor({{p0, s32}});
 
@@ -42,3 +72,15 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
   computeTables();
   verify(*ST.getInstrInfo());
 }
+
+bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
+                                       MachineRegisterInfo &MRI,
+                                       MachineIRBuilder &MIRBuilder,
+                                       GISelChangeObserver &Observer) const {
+
+  using namespace TargetOpcode;
+
+  MIRBuilder.setInstr(MI);
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
index 36dd39c8c1c1..75fadd6cf613 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_LIB_TARGET_MIPS_MIPSMACHINELEGALIZER_H
 #define LLVM_LIB_TARGET_MIPS_MIPSMACHINELEGALIZER_H
 
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 
 namespace llvm {
@@ -24,6 +25,10 @@ class MipsSubtarget;
 class MipsLegalizerInfo : public LegalizerInfo {
 public:
   MipsLegalizerInfo(const MipsSubtarget &ST);
+
+  bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      MachineIRBuilder &MIRBuilder,
+                      GISelChangeObserver &Observer) const override;
 };
 } // end namespace llvm
 #endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 2b7f64099923..46b37ceae391 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -298,12 +298,16 @@ bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI,
   default:
     return false;
   case Mips::LONG_BRANCH_LUi:
+  case Mips::LONG_BRANCH_LUi2Op:
+  case Mips::LONG_BRANCH_LUi2Op_64:
     lowerLongBranchLUi(MI, OutMI);
     return true;
   case Mips::LONG_BRANCH_ADDiu:
+  case Mips::LONG_BRANCH_ADDiu2Op:
     lowerLongBranchADDiu(MI, OutMI, Mips::ADDiu);
     return true;
   case Mips::LONG_BRANCH_DADDiu:
+  case Mips::LONG_BRANCH_DADDiu2Op:
     lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu);
     return true;
   }
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index d83f75ffa1c1..eecc7c573df1 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -107,6 +107,18 @@ class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
           (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
 
 // ISD::SETFALSE cannot occur
+def vfseteq_v4f32 : vfsetcc_type<v4i32, v4f32, SETEQ>;
+def vfseteq_v2f64 : vfsetcc_type<v2i64, v2f64, SETEQ>;
+def vfsetge_v4f32 : vfsetcc_type<v4i32, v4f32, SETGE>;
+def vfsetge_v2f64 : vfsetcc_type<v2i64, v2f64, SETGE>;
+def vfsetgt_v4f32 : vfsetcc_type<v4i32, v4f32, SETGT>;
+def vfsetgt_v2f64 : vfsetcc_type<v2i64, v2f64, SETGT>;
+def vfsetle_v4f32 : vfsetcc_type<v4i32, v4f32, SETLE>;
+def vfsetle_v2f64 : vfsetcc_type<v2i64, v2f64, SETLE>;
+def vfsetlt_v4f32 : vfsetcc_type<v4i32, v4f32, SETLT>;
+def vfsetlt_v2f64 : vfsetcc_type<v2i64, v2f64, SETLT>;
+def vfsetne_v4f32 : vfsetcc_type<v4i32, v4f32, SETNE>;
+def vfsetne_v2f64 : vfsetcc_type<v2i64, v2f64, SETNE>;
 def vfsetoeq_v4f32 : vfsetcc_type<v4i32, v4f32, SETOEQ>;
 def vfsetoeq_v2f64 : vfsetcc_type<v2i64, v2f64, SETOEQ>;
 def vfsetoge_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGE>;
@@ -4038,3 +4050,20 @@ def : MSAPat<
          (SPLAT_D v2f64:$ws,
            (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)),
          sub_64))>;
+
+def : MSAPat<(vfseteq_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+             (FCEQ_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfseteq_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+             (FCEQ_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetle_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+             (FCLE_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetle_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+             (FCLE_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetlt_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+             (FCLT_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetlt_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+             (FCLT_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
+def : MSAPat<(vfsetne_v4f32 MSA128WOpnd:$a, MSA128WOpnd:$b),
+             (FCNE_W MSA128WOpnd:$a, MSA128WOpnd:$b)>;
+def : MSAPat<(vfsetne_v2f64 MSA128DOpnd:$a, MSA128DOpnd:$b),
+             (FCNE_D MSA128DOpnd:$a, MSA128DOpnd:$b)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..1cff1c8396ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -0,0 +1,92 @@
+//=== lib/CodeGen/GlobalISel/MipsPreLegalizerCombiner.cpp --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#define DEBUG_TYPE "mips-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+class MipsPreLegalizerCombinerInfo : public CombinerInfo {
+public:
+  MipsPreLegalizerCombinerInfo()
+      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+                     /*LegalizerInfo*/ nullptr) {}
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+                       MachineIRBuilder &B) const override;
+};
+
+bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+                                           MachineInstr &MI,
+                                           MachineIRBuilder &B) const {
+  return false;
+}
+
+// Pass boilerplate
+// ================
+
+class MipsPreLegalizerCombiner : public MachineFunctionPass {
+public:
+  static char ID;
+
+  MipsPreLegalizerCombiner();
+
+  StringRef getPassName() const override { return "MipsPreLegalizerCombiner"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void MipsPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  AU.setPreservesCFG();
+  getSelectionDAGFallbackAnalysisUsage(AU);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MipsPreLegalizerCombiner::MipsPreLegalizerCombiner() : MachineFunctionPass(ID) {
+  initializeMipsPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool MipsPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  auto *TPC = &getAnalysis<TargetPassConfig>();
+  MipsPreLegalizerCombinerInfo PCInfo;
+  Combiner C(PCInfo, TPC);
+  return C.combineMachineInstrs(MF, nullptr);
+}
+
+char MipsPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(MipsPreLegalizerCombiner, DEBUG_TYPE,
+                      "Combine Mips machine instrs before legalization", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(MipsPreLegalizerCombiner, DEBUG_TYPE,
+                    "Combine Mips machine instrs before legalization", false,
+                    false)
+
+namespace llvm {
+FunctionPass *createMipsPreLegalizeCombiner() {
+  return new MipsPreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 351135079217..6af1f10189df 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -57,7 +57,10 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
   switch (RC.getID()) {
   case Mips::GPR32RegClassID:
   case Mips::CPU16Regs_and_GPRMM16ZeroRegClassID:
+  case Mips::GPRMM16MovePPairFirstRegClassID:
+  case Mips::CPU16Regs_and_GPRMM16MovePPairSecondRegClassID:
   case Mips::GPRMM16MoveP_and_CPU16Regs_and_GPRMM16ZeroRegClassID:
+  case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID:
   case Mips::SP32RegClassID:
     return getRegBank(Mips::GPRBRegBankID);
   default:
@@ -84,6 +87,16 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case G_LOAD:
   case G_STORE:
   case G_GEP:
+  case G_AND:
+  case G_OR:
+  case G_XOR:
+  case G_SHL:
+  case G_ASHR:
+  case G_LSHR:
+  case G_SDIV:
+  case G_UDIV:
+  case G_SREM:
+  case G_UREM:
     OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
     break;
   case G_CONSTANT:
@@ -92,6 +105,19 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OperandsMapping =
         getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
     break;
+  case G_ICMP:
+    OperandsMapping =
+        getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+                            &Mips::ValueMappings[Mips::GPRIdx],
+                            &Mips::ValueMappings[Mips::GPRIdx]});
+    break;
+  case G_SELECT:
+    OperandsMapping =
+        getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
+                            &Mips::ValueMappings[Mips::GPRIdx],
+                            &Mips::ValueMappings[Mips::GPRIdx],
+                            &Mips::ValueMappings[Mips::GPRIdx]});
+    break;
   default:
     return getInvalidInstructionMapping();
   }
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 4cc50fb981ba..b84aaad05eb5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -57,8 +57,6 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index c85ee20273c0..a943a0ad4094 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -335,6 +335,16 @@ def GPRMM16MoveP : RegisterClass<"Mips", [i32], 32, (add
   // Callee save
   S0, S2, S3, S4)>;
 
+def GPRMM16MovePPairFirst : RegisterClass<"Mips", [i32], 32, (add
+  // Arguments
+  A0, A1, A2)>;
+
+def GPRMM16MovePPairSecond : RegisterClass<"Mips", [i32], 32, (add
+  // Arguments
+  A1, A2, A3,
+  // Callee save
+  S5, S6)>;
+
 def GPR64 : RegisterClass<"Mips", [i64], 64, (add
   // Reserved
   ZERO_64, AT_64,
@@ -522,6 +532,16 @@ def GPRMM16AsmOperandMoveP : MipsAsmRegOperand {
   let PredicateMethod = "isMM16AsmRegMoveP";
 }
 
+def GPRMM16AsmOperandMovePPairFirst : MipsAsmRegOperand {
+  let Name = "GPRMM16AsmRegMovePPairFirst";
+  let PredicateMethod = "isMM16AsmRegMovePPairFirst";
+}
+
+def GPRMM16AsmOperandMovePPairSecond : MipsAsmRegOperand {
+  let Name = "GPRMM16AsmRegMovePPairSecond";
+  let PredicateMethod = "isMM16AsmRegMovePPairSecond";
+}
+
 def ACC64DSPAsmOperand : MipsAsmRegOperand {
   let Name = "ACC64DSPAsmReg";
   let PredicateMethod = "isACCAsmReg";
@@ -613,6 +633,14 @@ def GPRMM16OpndMoveP : RegisterOperand<GPRMM16MoveP> {
   let EncoderMethod = "getMovePRegSingleOpValue";
 }
 
+def GPRMM16OpndMovePPairFirst : RegisterOperand<GPRMM16MovePPairFirst> {
+  let ParserMatchClass = GPRMM16AsmOperandMovePPairFirst;
+}
+
+def GPRMM16OpndMovePPairSecond : RegisterOperand<GPRMM16MovePPairSecond> {
+  let ParserMatchClass = GPRMM16AsmOperandMovePPairSecond;
+}
+
 def GPR64Opnd : RegisterOperand<GPR64> {
   let ParserMatchClass = GPR64AsmOperand;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cf2899dd375e..cf196b597278 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -244,7 +244,7 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
           MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true));
           break;
         }
-      // fallthrough
+        LLVM_FALLTHROUGH;
       case Mips::BuildPairF64:
       case Mips::ExtractElementF64:
         if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1())
@@ -795,6 +795,24 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
   switch(Opcode) {
   default: break;
 
+  case Mips::PseudoD_SELECT_I:
+  case Mips::PseudoD_SELECT_I64: {
+    MVT VT = Subtarget->isGP64bit() ? MVT::i64 : MVT::i32;
+    SDValue cond = Node->getOperand(0);
+    SDValue Hi1 = Node->getOperand(1);
+    SDValue Lo1 = Node->getOperand(2);
+    SDValue Hi2 = Node->getOperand(3);
+    SDValue Lo2 = Node->getOperand(4);
+
+    SDValue ops[] = {cond, Hi1, Lo1, Hi2, Lo2};
+    EVT NodeTys[] = {VT, VT};
+    ReplaceNode(Node, CurDAG->getMachineNode(Subtarget->isGP64bit()
+                                                 ? Mips::PseudoD_SELECT_I64
+                                                 : Mips::PseudoD_SELECT_I,
+                                             DL, NodeTys, ops));
+    return true;
+  }
+
   case ISD::ADDE: {
     selectAddE(Node, DL);
     return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index f625a2903bd7..a78e544c35f0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -158,8 +158,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
     setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
     setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
     setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
-    setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
-    setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+    setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+    setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
 
     setTargetDAGCombine(ISD::AND);
     setTargetDAGCombine(ISD::OR);
@@ -2360,24 +2360,6 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
   }
 }
 
-/// Check if the given BuildVectorSDNode is a splat.
-/// This method currently relies on DAG nodes being reused when equivalent,
-/// so it's possible for this to return false even when isConstantSplat returns
-/// true.
-static bool isSplatVector(const BuildVectorSDNode *N) {
-  unsigned int nOps = N->getNumOperands();
-  assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
-
-  SDValue Operand0 = N->getOperand(0);
-
-  for (unsigned int i = 1; i < nOps; ++i) {
-    if (N->getOperand(i) != Operand0)
-      return false;
-  }
-
-  return true;
-}
-
 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
 //
 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
@@ -2488,7 +2470,7 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
 
     return Result;
-  } else if (isSplatVector(Node))
+  } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
     return Op;
   else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index e8589fc53492..c7ab90ed2a3b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -222,9 +222,9 @@ static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) {
 /// We check for the common case of 'or', as it's MIPS' preferred instruction
 /// for GPRs but we have to check the operands to ensure that is the case.
 /// Other move instructions for MIPS are directly identifiable.
-bool MipsSEInstrInfo::isCopyInstr(const MachineInstr &MI,
-                                  const MachineOperand *&Src,
-                                  const MachineOperand *&Dest) const {
+bool MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+                                      const MachineOperand *&Src,
+                                      const MachineOperand *&Dest) const {
   bool isDSPControlWrite = false;
   // Condition is made to match the creation of WRDSP/RDDSP copy instruction
   // from copyPhysReg function.
@@ -421,12 +421,16 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandERet(MBB, MI);
     break;
   case Mips::PseudoMFHI:
-    Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
-    expandPseudoMFHiLo(MBB, MI, Opc);
+    expandPseudoMFHiLo(MBB, MI, Mips::MFHI);
+    break;
+  case Mips::PseudoMFHI_MM:
+    expandPseudoMFHiLo(MBB, MI, Mips::MFHI16_MM);
     break;
   case Mips::PseudoMFLO:
-    Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO;
-    expandPseudoMFHiLo(MBB, MI, Opc);
+    expandPseudoMFHiLo(MBB, MI, Mips::MFLO);
+    break;
+  case Mips::PseudoMFLO_MM:
+    expandPseudoMFHiLo(MBB, MI, Mips::MFLO16_MM);
     break;
   case Mips::PseudoMFHI64:
     expandPseudoMFHiLo(MBB, MI, Mips::MFHI64);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index fc55716d598a..fce0fe5f58ad 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -47,9 +47,6 @@ public:
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
 
-  bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
-                   const MachineOperand *&Dest) const override;
-
   void storeRegToStack(MachineBasicBlock &MBB,
                        MachineBasicBlock::iterator MI,
                        unsigned SrcReg, bool isKill, int FrameIndex,
@@ -79,6 +76,13 @@ public:
                          MachineBasicBlock::iterator II, const DebugLoc &DL,
                          unsigned *NewImm) const;
 
+protected:
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+                       const MachineOperand *&Destination) const override;
+
 private:
   unsigned getAnalyzableBrOpc(unsigned Opc) const override;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 64db815a0f4c..410fa655a225 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -154,6 +154,7 @@ def II_DERET            : InstrItinClass;
 def II_ERETNC           : InstrItinClass;
 def II_EHB              : InstrItinClass;
 def II_SDBBP            : InstrItinClass;
+def II_SIGRIE           : InstrItinClass;
 def II_SSNOP            : InstrItinClass;
 def II_SYSCALL          : InstrItinClass;
 def II_PAUSE            : InstrItinClass;
@@ -546,6 +547,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_ERETNC          , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_EHB             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SDBBP           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SIGRIE          , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SSNOP           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SYSCALL         , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_PAUSE           , [InstrStage<1,  [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index 79c55dbb9e03..80ffe7ada7c8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -179,7 +179,7 @@ def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
 def : ItinRW<[GenericWriteTrap], [II_BREAK, II_SYSCALL, II_TEQ, II_TEQI,
                                   II_TGE, II_TGEI, II_TGEIU, II_TGEU, II_TNE,
                                   II_TNEI, II_TLT, II_TLTI, II_TLTU, II_TTLTIU,
-                                  II_TRAP, II_SDBBP]>;
+                                  II_TRAP, II_SDBBP, II_SIGRIE]>;
 
 // COP0 Pipeline
 // =============
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 896dd0eb0a5e..ad8f4848b870 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -295,8 +295,10 @@ public:
   bool inMips16HardFloat() const {
     return inMips16Mode() && InMips16HardFloat;
   }
-  bool inMicroMipsMode() const { return InMicroMipsMode; }
-  bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
+  bool inMicroMipsMode() const { return InMicroMipsMode && !InMips16Mode; }
+  bool inMicroMips32r6Mode() const {
+    return inMicroMipsMode() && hasMips32r6();
+  }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
   bool hasDSPR3() const { return HasDSPR3; }
@@ -312,14 +314,14 @@ public:
   }
   bool useSmallSection() const { return UseSmallSection; }
 
-  bool hasStandardEncoding() const { return !inMips16Mode(); }
+  bool hasStandardEncoding() const { return !InMips16Mode && !InMicroMipsMode; }
 
   bool useSoftFloat() const { return IsSoftFloat; }
 
   bool useLongCalls() const { return UseLongCalls; }
 
   bool enableLongBranchPass() const {
-    return hasStandardEncoding() || allowMixed16_32();
+    return hasStandardEncoding() || inMicroMipsMode() || allowMixed16_32();
   }
 
   /// Features related to the presence of specific instructions.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 1e6fe2b9f7e7..8466298cf36f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -56,6 +56,7 @@ extern "C" void LLVMInitializeMipsTarget() {
   initializeMipsDelaySlotFillerPass(*PR);
   initializeMipsBranchExpansionPass(*PR);
   initializeMicroMipsSizeReducePass(*PR);
+  initializeMipsPreLegalizerCombinerPass(*PR);
 }
 
 static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -101,12 +102,6 @@ static Reloc::Model getEffectiveRelocModel(bool JIT,
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 // On function prologue, the stack is created by decrementing
 // its pointer. Once decremented, all references are done with positive
 // offset from the stack/frame pointer, using StackGrowsUp enables
@@ -121,7 +116,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
                                      bool isLittle)
     : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
                         CPU, FS, Options, getEffectiveRelocModel(JIT, RM),
-                        getEffectiveCodeModel(CM), OL),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       isLittle(isLittle), TLOF(llvm::make_unique<MipsTargetObjectFile>()),
       ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
       Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this,
@@ -240,8 +235,8 @@ public:
   bool addInstSelector() override;
   void addPreEmitPass() override;
   void addPreRegAlloc() override;
-  void addPreEmit2() ;
   bool addIRTranslator() override;
+  void addPreLegalizeMachineIR() override;
   bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
   bool addGlobalInstructionSelect() override;
@@ -286,9 +281,6 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
   return TargetTransformInfo(BasicTTIImpl(this, F));
 }
 
-void MipsPassConfig::addPreEmit2() {
-}
-
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
@@ -322,6 +314,10 @@ bool MipsPassConfig::addIRTranslator() {
   return false;
 }
 
+void MipsPassConfig::addPreLegalizeMachineIR() {
+  addPass(createMipsPreLegalizeCombiner());
+}
+
 bool MipsPassConfig::addLegalizeMachineIR() {
   addPass(new Legalizer());
   return false;
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index f767c8321988..f53ee0631b5e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -10,6 +10,7 @@
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsMCExpr.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -189,6 +190,7 @@ const MCExpr *
 MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
   const MCExpr *Expr =
       MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
-  return MCBinaryExpr::createAdd(
+  Expr = MCBinaryExpr::createAdd(
       Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+  return MipsMCExpr::create(MipsMCExpr::MEK_DTPREL, Expr, getContext());
 }
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index ab494d5bf41b..22be564b6502 100644
--- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -32,17 +32,18 @@ Target &llvm::getTheMips64elTarget() {
 extern "C" void LLVMInitializeMipsTargetInfo() {
   RegisterTarget<Triple::mips,
                  /*HasJIT=*/true>
-      X(getTheMipsTarget(), "mips", "Mips", "Mips");
+      X(getTheMipsTarget(), "mips", "MIPS (32-bit big endian)", "Mips");
 
   RegisterTarget<Triple::mipsel,
                  /*HasJIT=*/true>
-      Y(getTheMipselTarget(), "mipsel", "Mipsel", "Mips");
+      Y(getTheMipselTarget(), "mipsel", "MIPS (32-bit little endian)", "Mips");
 
   RegisterTarget<Triple::mips64,
                  /*HasJIT=*/true>
-      A(getTheMips64Target(), "mips64", "Mips64 [experimental]", "Mips");
+      A(getTheMips64Target(), "mips64", "MIPS (64-bit big endian)", "Mips");
 
   RegisterTarget<Triple::mips64el,
                  /*HasJIT=*/true>
-      B(getTheMips64elTarget(), "mips64el", "Mips64el [experimental]", "Mips");
+      B(getTheMips64elTarget(), "mips64el", "MIPS (64-bit little endian)",
+        "Mips");
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index aeb90eca3a05..f7b4cf3a0f72 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -25,6 +25,12 @@ NVPTXTargetStreamer::NVPTXTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 
 NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
 
+void NVPTXTargetStreamer::outputDwarfFileDirectives() {
+  for (const std::string &S : DwarfFiles)
+    getStreamer().EmitRawText(S.data());
+  DwarfFiles.clear();
+}
+
 void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
   DwarfFiles.emplace_back(Directive);
 }
@@ -82,9 +88,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
     OS << "//\t}\n";
   if (isDwarfSection(FI, Section)) {
     // Emit DWARF .file directives in the outermost scope.
-    for (const std::string &S : DwarfFiles)
-      getStreamer().EmitRawText(S.data());
-    DwarfFiles.clear();
+    outputDwarfFileDirectives();
     OS << "//\t.section";
     Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(),
                                   FI->getTargetTriple(), OS, SubSection);
@@ -92,3 +96,30 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
     OS << "//\t{\n";
   }
 }
+
+void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
+  const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+  const char *Directive = MAI->getData8bitsDirective();
+  unsigned NumElements = Data.size();
+  const unsigned MaxLen = 40;
+  unsigned NumChunks = 1 + ((NumElements - 1) / MaxLen);
+  // Split the very long directives into several parts if the limit is
+  // specified.
+  for (unsigned I = 0; I < NumChunks; ++I) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+
+    const char *Label = Directive;
+    for (auto It = std::next(Data.bytes_begin(), I * MaxLen),
+              End = (I == NumChunks - 1)
+                        ? Data.bytes_end()
+                        : std::next(Data.bytes_begin(), (I + 1) * MaxLen);
+         It != End; ++It) {
+      OS << Label << (unsigned)*It;
+      if (Label == Directive)
+        Label = ",";
+    }
+    Streamer.EmitRawText(OS.str());
+  }
+}
+
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
index 30831ab8bbeb..f18e61cdca57 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
@@ -24,6 +24,9 @@ public:
   NVPTXTargetStreamer(MCStreamer &S);
   ~NVPTXTargetStreamer() override;
 
+  /// Outputs the list of the DWARF '.file' directives to the streamer.
+  void outputDwarfFileDirectives();
+
   /// Record DWARF file directives for later output.
   /// According to PTX ISA, CUDA Toolkit documentation, 11.5.3. Debugging
   /// Directives: .file
@@ -39,6 +42,10 @@ public:
   void emitDwarfFileDirective(StringRef Directive) override;
   void changeSection(const MCSection *CurSection, MCSection *Section,
                      const MCExpr *SubSection, raw_ostream &OS) override;
+  /// Emit the bytes in \p Data into the output.
+  ///
+  /// This is used to emit bytes in \p Data as sequence of .byte directives.
+  void emitRawBytes(StringRef Data) override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 902d1b25e7dd..07bfc58a8da7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -46,13 +46,14 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
 ModulePass *createNVPTXAssignValidGlobalNamesPass();
 ModulePass *createGenericToNVVMPass();
 FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
-FunctionPass *createNVVMReflectPass();
+FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
 MachineFunctionPass *createNVPTXPrologEpilogPass();
 MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
 FunctionPass *createNVPTXImageOptimizerPass();
 FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
 BasicBlockPass *createNVPTXLowerAllocaPass();
 MachineFunctionPass *createNVPTXPeephole();
+MachineFunctionPass *createNVPTXProxyRegErasurePass();
 
 Target &getTheNVPTXTarget32();
 Target &getTheNVPTXTarget64();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
index 6494c46f54ab..3731b2f37f6c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -54,6 +54,8 @@ def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70",
                              "Target SM 7.0">;
 def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
                              "Target SM 7.2">;
+def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
+                             "Target SM 7.5">;
 
 // PTX Versions
 def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
@@ -72,6 +74,8 @@ def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60",
                              "Use PTX version 6.0">;
 def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61",
                              "Use PTX version 6.1">;
+def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
+                             "Use PTX version 6.3">;
 
 //===----------------------------------------------------------------------===//
 // NVPTX supported processors.
@@ -94,6 +98,7 @@ def : Proc<"sm_61", [SM61, PTX50]>;
 def : Proc<"sm_62", [SM62, PTX50]>;
 def : Proc<"sm_70", [SM70, PTX60]>;
 def : Proc<"sm_72", [SM72, PTX61]>;
+def : Proc<"sm_75", [SM75, PTX63]>;
 
 def NVPTXInstrInfo : InstrInfo {
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index bed52293197d..bf922eb8a195 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -41,7 +41,7 @@ public:
 bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
   bool functionModified = false;
   Function::iterator I = function.begin();
-  TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+  Instruction *firstTerminatorInst = (I++)->getTerminator();
 
   for (Function::iterator E = function.end(); I != E; ++I) {
     for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index a966b9928400..6284ad8b82e8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "InstPrinter/NVPTXInstPrinter.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "MCTargetDesc/NVPTXTargetStreamer.h"
 #include "NVPTX.h"
 #include "NVPTXMCExpr.h"
 #include "NVPTXMachineFunctionInfo.h"
@@ -199,7 +200,7 @@ bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
 
 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
   // Ewwww
-  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
+  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
   const char *Sym = MFI->getImageHandleSymbol(Index);
@@ -218,11 +219,12 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
     return;
   }
 
+  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
 
     MCOperand MCOp;
-    if (!nvptxSubtarget->hasImageHandles()) {
+    if (!STI.hasImageHandles()) {
       if (lowerImageHandleOperand(MI, i, MCOp)) {
         OutMI.addOperand(MCOp);
         continue;
@@ -328,11 +330,12 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
 
 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
   const DataLayout &DL = getDataLayout();
-  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
+  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
+  const TargetLowering *TLI = STI.getTargetLowering();
 
   Type *Ty = F->getReturnType();
 
-  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
+  bool isABI = (STI.getSmVersion() >= 20);
 
   if (Ty->getTypeID() == Type::VoidTyID)
     return;
@@ -473,7 +476,6 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
 }
 
 bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
-  nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
   bool Result = AsmPrinter::runOnMachineFunction(F);
   // Emit closing brace for the body of function F.
   // The closing brace must be emitted here because we need to emit additional
@@ -507,8 +509,9 @@ void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
     OutStreamer->AddComment(Twine("implicit-def: ") +
                             getVirtualRegisterName(RegNo));
   } else {
+    const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
     OutStreamer->AddComment(Twine("implicit-def: ") +
-                            nvptxSubtarget->getRegisterInfo()->getName(RegNo));
+                            STI.getRegisterInfo()->getName(RegNo));
   }
   OutStreamer->AddBlankLine();
 }
@@ -727,6 +730,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
     const Function *F = &*FI;
 
+    if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
+      emitDeclaration(F, O);
+      continue;
+    }
+
     if (F->isDeclaration()) {
       if (F->use_empty())
         continue;
@@ -785,11 +793,8 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   // Construct a default subtarget off of the TargetMachine defaults. The
   // rest of NVPTX isn't friendly to change subtargets per function and
   // so the default TargetMachine will have all of the options.
-  const Triple &TT = TM.getTargetTriple();
-  StringRef CPU = TM.getTargetCPU();
-  StringRef FS = TM.getTargetFeatureString();
   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
-  const NVPTXSubtarget STI(TT, CPU, FS, NTM);
+  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
 
   if (M.alias_size()) {
     report_fatal_error("Module has aliases, which NVPTX does not support.");
@@ -813,7 +818,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
 
   // Emit header before any dwarf directives are emitted below.
-  emitHeader(M, OS1, STI);
+  emitHeader(M, OS1, *STI);
   OutStreamer->EmitRawText(OS1.str());
 
   // Emit module-level inline asm if it exists.
@@ -880,8 +885,22 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
   if (NTM.getDrvInterface() == NVPTX::NVCL)
     O << ", texmode_independent";
 
+  bool HasFullDebugInfo = false;
+  for (DICompileUnit *CU : M.debug_compile_units()) {
+    switch(CU->getEmissionKind()) {
+    case DICompileUnit::NoDebug:
+    case DICompileUnit::DebugDirectivesOnly:
+      break;
+    case DICompileUnit::LineTablesOnly:
+    case DICompileUnit::FullDebug:
+      HasFullDebugInfo = true;
+      break;
+    }
+    if (HasFullDebugInfo)
+      break;
+  }
   // FIXME: remove comment once debug info is properly supported.
-  if (MMI && MMI->hasDebugInfo())
+  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
     O << "//, debug";
 
   O << "\n";
@@ -938,6 +957,10 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
   if (HasDebugInfo)
     OutStreamer->EmitRawText("//\t}");
 
+  // Output last DWARF .file directives, if any.
+  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
+      ->outputDwarfFileDirectives();
+
   return ret;
 
   //bool Result = AsmPrinter::doFinalization(M);
@@ -1412,12 +1435,14 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   const DataLayout &DL = getDataLayout();
   const AttributeList &PAL = F->getAttributes();
-  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
+  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
+  const TargetLowering *TLI = STI.getTargetLowering();
   Function::const_arg_iterator I, E;
   unsigned paramIndex = 0;
   bool first = true;
   bool isKernelFunc = isKernelFunction(*F);
-  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
+  bool isABI = (STI.getSmVersion() >= 20);
+  bool hasImageHandles = STI.hasImageHandles();
   MVT thePointerTy = TLI->getPointerTy(DL);
 
   if (F->arg_empty()) {
@@ -1441,7 +1466,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         if (isImage(*I)) {
           std::string sname = I->getName();
           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
-            if (nvptxSubtarget->hasImageHandles())
+            if (hasImageHandles)
               O << "\t.param .u64 .ptr .surfref ";
             else
               O << "\t.param .surfref ";
@@ -1449,7 +1474,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
             O << "_param_" << paramIndex;
           }
           else { // Default image is read_only
-            if (nvptxSubtarget->hasImageHandles())
+            if (hasImageHandles)
               O << "\t.param .u64 .ptr .texref ";
             else
               O << "\t.param .texref ";
@@ -1457,7 +1482,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
             O << "_param_" << paramIndex;
           }
         } else {
-          if (nvptxSubtarget->hasImageHandles())
+          if (hasImageHandles)
             O << "\t.param .u64 .ptr .samplerref ";
           else
             O << "\t.param .samplerref ";
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index efe98003b1c8..44a09f5fe513 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -258,9 +258,6 @@ private:
   typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
   VRegRCMap VRegMapping;
 
-  // Cache the subtarget here.
-  const NVPTXSubtarget *nvptxSubtarget;
-
   // List of variables demoted to a function scope.
   std::map<const Function *, std::vector<const GlobalVariable *>> localDecls;
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 21939d836dc7..ffc6a59cd6c8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -981,9 +981,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
   if (!NVPTXLD)
     return false;
 
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
 
   ReplaceNode(N, NVPTXLD);
   return true;
@@ -1221,9 +1220,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
     LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
   }
 
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
 
   ReplaceNode(N, LD);
   return true;
@@ -1659,9 +1657,8 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
     LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
   }
 
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = Mem->getMemOperand();
-  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = Mem->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
 
   // For automatic generation of LDG (through SelectLoad[Vector], not the
   // intrinsics), we may have an extending load like:
@@ -1864,9 +1861,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
   if (!NVPTXST)
     return false;
 
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
   ReplaceNode(N, NVPTXST);
   return true;
 }
@@ -2088,9 +2084,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
 
   ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
 
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
 
   ReplaceNode(N, ST);
   return true;
@@ -2236,9 +2231,8 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
     return false;
 
   SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
 
   ReplaceNode(N, Ret);
   return true;
@@ -2341,9 +2335,8 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
   SDNode *Ret =
       CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
-  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
-  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
 
   ReplaceNode(N, Ret);
   return true;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2536623fb853..bec8ece29050 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -180,6 +180,18 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
     return;
   }
 
+  // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
+  if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    auto const *SL = DL.getStructLayout(STy);
+    auto ElementNum = 0;
+    for(auto *EI : STy->elements()) {
+      ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
+                         StartingOffset + SL->getElementOffset(ElementNum));
+      ++ElementNum;
+    }
+    return;
+  }
+
   ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
     EVT VT = TempVTs[i];
@@ -560,8 +572,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   }
   setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
   setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
-  setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
-  setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+  setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+  setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
 
   // No FEXP2, FLOG2.  The PTX ex2 and log2 functions are always approximate.
   // No FPOW or FREM in PTX.
@@ -651,6 +663,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "NVPTXISD::CallSeqEnd";
   case NVPTXISD::CallPrototype:
     return "NVPTXISD::CallPrototype";
+  case NVPTXISD::ProxyReg:
+    return "NVPTXISD::ProxyReg";
   case NVPTXISD::LoadV2:
     return "NVPTXISD::LoadV2";
   case NVPTXISD::LoadV4:
@@ -1170,7 +1184,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
 }
 
 TargetLoweringBase::LegalizeTypeAction
-NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
   if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
     return TypeSplitVector;
   if (VT == MVT::v2f16)
@@ -1649,7 +1663,24 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
   }
 
-  if (!Func) {
+  // Both indirect calls and libcalls have nullptr Func. In order to distinguish
+  // between them we must rely on the call site value which is valid for
+  // indirect calls but is always null for libcalls.
+  bool isIndirectCall = !Func && CS;
+
+  if (isa<ExternalSymbolSDNode>(Callee)) {
+    Function* CalleeFunc = nullptr;
+
+    // Try to find the callee in the current module.
+    Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
+    assert(CalleeFunc != nullptr && "Libcall callee must be set.");
+
+    // Set the "libcall callee" attribute to indicate that the function
+    // must always have a declaration.
+    CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
+  }
+
+  if (isIndirectCall) {
     // This is indirect function call case : PTX requires a prototype of the
     // form
     // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
@@ -1673,7 +1704,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
   };
   // We model convergent calls as separate opcodes.
-  unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall;
+  unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
   if (CLI.IsConvergent)
     Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
                                               : NVPTXISD::PrintConvergentCall;
@@ -1707,12 +1738,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   }
   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
   SDValue CallArgEndOps[] = { Chain,
-                              DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
+                              DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
                               InFlag };
   Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
   InFlag = Chain.getValue(1);
 
-  if (!Func) {
+  if (isIndirectCall) {
     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue PrototypeOps[] = { Chain,
                                DAG.getConstant(uniqueCallSite, dl, MVT::i32),
@@ -1721,6 +1752,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     InFlag = Chain.getValue(1);
   }
 
+  SmallVector<SDValue, 16> ProxyRegOps;
+  SmallVector<Optional<MVT>, 16> ProxyRegTruncates;
+
   // Generate loads from param memory/moves from registers for result
   if (Ins.size() > 0) {
     SmallVector<EVT, 16> VTs;
@@ -1791,11 +1825,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
             MachineMemOperand::MOLoad);
 
         for (unsigned j = 0; j < NumElts; ++j) {
-          SDValue Ret = RetVal.getValue(j);
+          ProxyRegOps.push_back(RetVal.getValue(j));
+
           if (needTruncate)
-            Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
-          InVals.push_back(Ret);
+            ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT));
+          else
+            ProxyRegTruncates.push_back(Optional<MVT>());
         }
+
         Chain = RetVal.getValue(NumElts);
         InFlag = RetVal.getValue(NumElts + 1);
 
@@ -1811,8 +1848,29 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                              DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
                                                    true),
                              InFlag, dl);
+  InFlag = Chain.getValue(1);
   uniqueCallSite++;
 
+  // Append ProxyReg instructions to the chain to make sure that `callseq_end`
+  // will not get lost. Otherwise, during libcalls expansion, the nodes can become
+  // dangling.
+  for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
+    SDValue Ret = DAG.getNode(
+      NVPTXISD::ProxyReg, dl,
+      DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
+      { Chain, ProxyRegOps[i], InFlag }
+    );
+
+    Chain = Ret.getValue(1);
+    InFlag = Ret.getValue(2);
+
+    if (ProxyRegTruncates[i].hasValue()) {
+      Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
+    }
+
+    InVals.push_back(Ret);
+  }
+
   // set isTailCall to false for now, until we figure out how to express
   // tail call optimization in PTX
   isTailCall = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ef04a8573d45..66fab2b6f480 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -51,6 +51,7 @@ enum NodeType : unsigned {
   CallSeqBegin,
   CallSeqEnd,
   CallPrototype,
+  ProxyReg,
   FUN_SHFL_CLAMP,
   FUN_SHFR_CLAMP,
   MUL_WIDE_SIGNED,
@@ -511,7 +512,7 @@ public:
   }
 
   TargetLoweringBase::LegalizeTypeAction
-  getPreferredVectorAction(EVT VT) const override;
+  getPreferredVectorAction(MVT VT) const override;
 
   // Get the degree of precision we want from 32-bit floating point division
   // operations.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 443b077184c7..02a40b9f5262 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1318,9 +1318,6 @@ def ROTR64reg_sw :
 
 // Create SDNodes so they can be used in the DAG code, e.g.
 // NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
-def SDTIntShiftDOp :
-  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
-                       SDTCisInt<0>, SDTCisInt<3>]>;
 def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
 def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
 
@@ -1888,6 +1885,7 @@ def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
+def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
 
 def DeclareParam :
   SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
@@ -1975,6 +1973,9 @@ def PseudoUseParam :
 def RETURNNode :
   SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
          [SDNPHasChain, SDNPSideEffect]>;
+def ProxyReg :
+  SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
+         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 
 let mayLoad = 1 in {
   class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
@@ -2252,6 +2253,21 @@ def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
 
+class ProxyRegInst<string SzStr, NVPTXRegClass regclass> :
+  NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
+            !strconcat("mov.", SzStr, " \t$dst, $src;"),
+            [(set regclass:$dst, (ProxyReg regclass:$src))]>;
+
+let isCodeGenOnly=1, isPseudo=1 in {
+  def ProxyRegI1    : ProxyRegInst<"pred", Int1Regs>;
+  def ProxyRegI16   : ProxyRegInst<"b16",  Int16Regs>;
+  def ProxyRegI32   : ProxyRegInst<"b32",  Int32Regs>;
+  def ProxyRegI64   : ProxyRegInst<"b64",  Int64Regs>;
+  def ProxyRegF16   : ProxyRegInst<"b16",  Float16Regs>;
+  def ProxyRegF32   : ProxyRegInst<"f32",  Float32Regs>;
+  def ProxyRegF64   : ProxyRegInst<"f64",  Float64Regs>;
+  def ProxyRegF16x2 : ProxyRegInst<"b32",  Float16x2Regs>;
+}
 
 //
 // Load / Store Handling
@@ -2544,7 +2560,7 @@ let mayStore=1, hasSideEffects=0 in {
 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
   NVPTXRegClass regclassOut> :
            NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
-           !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
+           !strconcat("mov.b", SzStr, " \t$d, $a;"),
      [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
 
 def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
@@ -2625,32 +2641,20 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
 def : Pat<(i1 (fp_to_sint Float16Regs:$a)),
           (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
-          (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
           (CVT_s16_f16 Float16Regs:$a, CvtRZI)>;
 def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
-          (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
           (CVT_s32_f16 Float16Regs:$a, CvtRZI)>;
 def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
-          (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
           (CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
 
 // f16 -> uint
 def : Pat<(i1 (fp_to_uint Float16Regs:$a)),
           (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
-          (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
           (CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
 def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
-          (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
           (CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
 def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
-          (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
           (CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
 
 // f32 -> sint
@@ -2948,14 +2952,10 @@ def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
 
 // fpround f32 -> f16
 def : Pat<(f16 (fpround Float32Regs:$a)),
-          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f16 (fpround Float32Regs:$a)),
           (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
 
 // fpround f64 -> f16
 def : Pat<(f16 (fpround Float64Regs:$a)),
-          (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f16 (fpround Float64Regs:$a)),
           (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
 
 // fpround f64 -> f32
@@ -2972,8 +2972,6 @@ def : Pat<(f32 (fpextend Float16Regs:$a)),
 
 // fpextend f16 -> f64
 def : Pat<(f64 (fpextend Float16Regs:$a)),
-          (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f64 (fpextend Float16Regs:$a)),
           (CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
 
 // fpextend f32 -> f64
@@ -2988,9 +2986,7 @@ def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
 // fceil, ffloor, fround, ftrunc.
 
 def : Pat<(fceil Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(fceil Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
+          (CVT_f16_f16 Float16Regs:$a, CvtRPI)>;
 def : Pat<(fceil Float32Regs:$a),
           (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(fceil Float32Regs:$a),
@@ -2999,9 +2995,7 @@ def : Pat<(fceil Float64Regs:$a),
           (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
 
 def : Pat<(ffloor Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(ffloor Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
+          (CVT_f16_f16 Float16Regs:$a, CvtRMI)>;
 def : Pat<(ffloor Float32Regs:$a),
           (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(ffloor Float32Regs:$a),
@@ -3009,10 +3003,8 @@ def : Pat<(ffloor Float32Regs:$a),
 def : Pat<(ffloor Float64Regs:$a),
           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
 
-def : Pat<(fround Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(f16 (fround Float16Regs:$a)),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
 def : Pat<(fround Float32Regs:$a),
           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(f32 (fround Float32Regs:$a)),
@@ -3021,9 +3013,7 @@ def : Pat<(f64 (fround Float64Regs:$a)),
           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
 
 def : Pat<(ftrunc Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(ftrunc Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
+          (CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
 def : Pat<(ftrunc Float32Regs:$a),
           (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(ftrunc Float32Regs:$a),
@@ -3036,9 +3026,7 @@ def : Pat<(ftrunc Float64Regs:$a),
 // matches what CUDA's "libm" does.
 
 def : Pat<(fnearbyint Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(fnearbyint Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
 def : Pat<(fnearbyint Float32Regs:$a),
           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(fnearbyint Float32Regs:$a),
@@ -3047,9 +3035,7 @@ def : Pat<(fnearbyint Float64Regs:$a),
           (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
 
 def : Pat<(frint Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(frint Float16Regs:$a),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
 def : Pat<(frint Float32Regs:$a),
           (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
 def : Pat<(frint Float32Regs:$a),
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5bb4fc3edd09..2ca0ccf2dfa7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -60,6 +61,24 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
       for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
         if (!MI.getOperand(i).isFI())
           continue;
+
+        // Frame indices in debug values are encoded in a target independent
+        // way with simply the frame index and offset rather than any
+        // target-specific addressing mode.
+        if (MI.isDebugValue()) {
+          assert(i == 0 && "Frame indices can only appear as the first "
+                           "operand of a DBG_VALUE machine instruction");
+          unsigned Reg;
+          int64_t Offset =
+              TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+          MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
+          MI.getOperand(0).setIsDebug();
+          auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
+                                               DIExpression::NoDeref, Offset);
+          MI.getOperand(3).setMetadata(DIExpr);
+          continue;
+        }
+
         TRI.eliminateFrameIndex(MI, 0, i, nullptr);
         Modified = true;
       }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
new file mode 100644
index 000000000000..f60d841c1683
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -0,0 +1,122 @@
+//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The pass is needed to remove ProxyReg instructions and restore related
+// registers. The instructions were needed at instruction selection stage to
+// make sure that callseq_end nodes won't be removed as "dead nodes". This can
+// happen when we expand instructions into libcalls and the call site doesn't
+// care about the libcall chain. Call site cares about data flow only, and the
+// latest data flow node happens to be before callseq_end. Therefore the node
+// becomes dangling and "dead". The ProxyReg acts like an additional data flow
+// node *after* the callseq_end in the chain and ensures that everything will be
+// preserved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
+}
+
+namespace {
+
+struct NVPTXProxyRegErasure : public MachineFunctionPass {
+public:
+  static char ID;
+  NVPTXProxyRegErasure() : MachineFunctionPass(ID) {
+    initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "NVPTX Proxy Register Instruction Erasure";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI);
+
+  void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From,
+                            MachineOperand &To);
+};
+
+} // namespace
+
+char NVPTXProxyRegErasure::ID = 0;
+
+INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false)
+
+bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
+  SmallVector<MachineInstr *, 16> RemoveList;
+
+  for (auto &BB : MF) {
+    for (auto &MI : BB) {
+      switch (MI.getOpcode()) {
+      case NVPTX::ProxyRegI1:
+      case NVPTX::ProxyRegI16:
+      case NVPTX::ProxyRegI32:
+      case NVPTX::ProxyRegI64:
+      case NVPTX::ProxyRegF16:
+      case NVPTX::ProxyRegF16x2:
+      case NVPTX::ProxyRegF32:
+      case NVPTX::ProxyRegF64:
+        replaceMachineInstructionUsage(MF, MI);
+        RemoveList.push_back(&MI);
+        break;
+      }
+    }
+  }
+
+  for (auto *MI : RemoveList) {
+    MI->eraseFromParent();
+  }
+
+  return !RemoveList.empty();
+}
+
+void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF,
+                                                          MachineInstr &MI) {
+  auto &InOp = *MI.uses().begin();
+  auto &OutOp = *MI.defs().begin();
+
+  assert(InOp.isReg() && "ProxyReg input operand should be a register.");
+  assert(OutOp.isReg() && "ProxyReg output operand should be a register.");
+
+  for (auto &BB : MF) {
+    for (auto &I : BB) {
+      replaceRegisterUsage(I, OutOp, InOp);
+    }
+  }
+}
+
+void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr,
+                                                MachineOperand &From,
+                                                MachineOperand &To) {
+  for (auto &Op : Instr.uses()) {
+    if (Op.isReg() && Op.getReg() == From.getReg()) {
+      Op.setReg(To.getReg());
+    }
+  }
+}
+
+MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() {
+  return new NVPTXProxyRegErasure();
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a1b160441df3..8ec0ddb9b3d5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -68,6 +68,7 @@ void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 
 } // end namespace llvm
 
@@ -87,6 +88,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
+  initializeNVPTXProxyRegErasurePass(PR);
 }
 
 static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -102,12 +104,6 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
   return Ret;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
@@ -118,7 +114,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
     // specified, as it is the only relocation model currently supported.
     : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
                         CPU, FS, Options, Reloc::PIC_,
-                        getEffectiveCodeModel(CM), OL),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
       TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
@@ -166,6 +162,7 @@ public:
 
   void addIRPasses() override;
   bool addInstSelector() override;
+  void addPreRegAlloc() override;
   void addPostRegAlloc() override;
   void addMachineSSAOptimization() override;
 
@@ -195,7 +192,7 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
   Builder.addExtension(
     PassManagerBuilder::EP_EarlyAsPossible,
     [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
-      PM.add(createNVVMReflectPass());
+      PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
       PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
     });
 }
@@ -258,7 +255,8 @@ void NVPTXPassConfig::addIRPasses() {
   // it here does nothing.  But since we need it for correctness when lowering
   // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
   // call addEarlyAsPossiblePasses.
-  addPass(createNVVMReflectPass());
+  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
+  addPass(createNVVMReflectPass(ST.getSmVersion()));
 
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createNVPTXImageOptimizerPass());
@@ -306,6 +304,11 @@ bool NVPTXPassConfig::addInstSelector() {
   return false;
 }
 
+void NVPTXPassConfig::addPreRegAlloc() {
+  // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
+  addPass(createNVPTXProxyRegErasurePass());
+}
+
 void NVPTXPassConfig::addPostRegAlloc() {
   addPass(createNVPTXPrologEpilogPass(), false);
   if (getOptLevel() != CodeGenOpt::None) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index a631055d36a0..14e93f7447dd 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -49,6 +49,19 @@ public:
     return AddressSpace::ADDRESS_SPACE_GENERIC;
   }
 
+  // Loads and stores can be vectorized if the alignment is at least as big as
+  // the load/store we want to vectorize.
+  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+                                   unsigned Alignment,
+                                   unsigned AddrSpace) const {
+    return Alignment >= ChainSizeInBytes;
+  }
+  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+                                    unsigned Alignment,
+                                    unsigned AddrSpace) const {
+    return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
+  }
+
   // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
   // We conservatively return 1 here which is just enough to enable the
   // vectorizers but disables heuristics based on the number of registers.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 60971b48adfc..64c262664fda 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -50,7 +50,9 @@ namespace {
 class NVVMReflect : public FunctionPass {
 public:
   static char ID;
-  NVVMReflect() : FunctionPass(ID) {
+  unsigned int SmVersion;
+  NVVMReflect() : NVVMReflect(0) {}
+  explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) {
     initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
   }
 
@@ -58,7 +60,9 @@ public:
 };
 }
 
-FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
+FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) {
+  return new NVVMReflect(SmVersion);
+}
 
 static cl::opt<bool>
 NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -163,6 +167,8 @@ bool NVVMReflect::runOnFunction(Function &F) {
       if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
               F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
         ReflectVal = Flag->getSExtValue();
+    } else if (ReflectArg == "__CUDA_ARCH") {
+      ReflectVal = SmVersion * 10;
     }
     Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal));
     ToRemove.push_back(Call);
diff --git a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp b/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp
deleted file mode 100644
index de0a5f9e84ea..000000000000
--- a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- Nios2InstPrinter.cpp - Convert Nios2 MCInst to assembly syntax-----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Nios2 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2InstPrinter.h"
-
-#include "Nios2InstrInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define PRINT_ALIAS_INSTR
-#include "Nios2GenAsmWriter.inc"
-
-void Nios2InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
-}
-
-void Nios2InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot, const MCSubtargetInfo &STI) {
-  // Try to print any aliases first.
-  if (!printAliasInstr(MI, STI, O))
-    printInstruction(MI, STI, O);
-  printAnnotation(O, Annot);
-}
-
-void Nios2InstPrinter::printOperand(const MCInst *MI, int OpNo,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    printRegName(O, Op.getReg());
-    return;
-  }
-
-  if (Op.isImm()) {
-    O << Op.getImm();
-    return;
-  }
-
-  assert(Op.isExpr() && "unknown operand kind in printOperand");
-  Op.getExpr()->print(O, &MAI, true);
-}
-
-void Nios2InstPrinter::printMemOperand(const MCInst *MI, int opNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O, const char *Modifier) {
-  // Load/Store memory operands -- imm($reg)
-  printOperand(MI, opNum + 1, STI, O);
-  O << "(";
-  printOperand(MI, opNum, STI, O);
-  O << ")";
-}
diff --git a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h b/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h
deleted file mode 100644
index 43a12951baea..000000000000
--- a/contrib/llvm/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//= Nios2InstPrinter.h - Convert Nios2 MCInst to assembly syntax -*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a Nios2 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H
-#define LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class Nios2InstPrinter : public MCInstPrinter {
-public:
-  Nios2InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                       raw_ostream &O);
-
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx,
-                               const MCSubtargetInfo &STI, raw_ostream &O);
-  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
-                    raw_ostream &OS);
-  void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
-                       raw_ostream &OS, const char *Modifier = nullptr);
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp
deleted file mode 100644
index 8ac08c6837d9..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===-- Nios2AsmBackend.cpp - Nios2 Asm Backend  --------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Nios2AsmBackend class.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "MCTargetDesc/Nios2AsmBackend.h"
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-using namespace llvm;
-
-// Prepare value for the target space for it
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value) {
-
-  unsigned Kind = Fixup.getKind();
-
-  // Add/subtract and shift
-  switch (Kind) {
-  default:
-    return 0;
-  case Nios2::fixup_Nios2_LO16:
-    break;
-  case Nios2::fixup_Nios2_HI16:
-    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
-    Value = ((Value + 0x8000) >> 16) & 0xffff;
-    break;
-  }
-
-  return Value;
-}
-
-// Calculate index for Nios2 specific little endian byte order
-static unsigned calculateLEIndex(unsigned i) {
-  assert(i <= 3 && "Index out of range!");
-
-  return (1 - i / 2) * 2 + i % 2;
-}
-
-/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided
-/// data fragment, at the offset specified by the fixup and following the
-/// fixup kind as appropriate.
-void Nios2AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
-                                 const MCValue &Target,
-                                 MutableArrayRef<char> Data, uint64_t Value,
-                                 bool IsResolved) const {
-  MCFixupKind Kind = Fixup.getKind();
-  Value = adjustFixupValue(Fixup, Value);
-
-  if (!Value)
-    return; // Doesn't change encoding.
-
-  // Where do we start in the object
-  unsigned Offset = Fixup.getOffset();
-  // Number of bytes we need to fixup
-  unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
-  // Grab current value, if any, from bits.
-  uint64_t CurVal = 0;
-
-  for (unsigned i = 0; i != NumBytes; ++i) {
-    unsigned Idx = calculateLEIndex(i);
-    CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8);
-  }
-
-  uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
-  CurVal |= Value & Mask;
-
-  // Write out the fixed up bytes back to the code/data bits.
-  for (unsigned i = 0; i != NumBytes; ++i) {
-    unsigned Idx = calculateLEIndex(i);
-    Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
-  }
-}
-
-Optional<MCFixupKind> Nios2AsmBackend::getFixupKind(StringRef Name) const {
-  return StringSwitch<Optional<MCFixupKind>>(Name)
-      .Case("R_NIOS2_NONE", (MCFixupKind)Nios2::fixup_Nios2_32)
-      .Case("R_NIOS2_32", FK_Data_4)
-      .Default(MCAsmBackend::getFixupKind(Name));
-}
-
-//@getFixupKindInfo {
-const MCFixupKindInfo &
-Nios2AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
-  const static MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds] = {
-      // This table *must* be in same the order of fixup_* kinds in
-      // Nios2FixupKinds.h.
-      //
-      // name                        offset  bits  flags
-      {"fixup_Nios2_32", 0, 32, 0},
-      {"fixup_Nios2_HI16", 0, 16, 0},
-      {"fixup_Nios2_LO16", 0, 16, 0}};
-
-  if (Kind < FirstTargetFixupKind)
-    return MCAsmBackend::getFixupKindInfo(Kind);
-
-  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
-         "Invalid kind!");
-  return Infos[Kind - FirstTargetFixupKind];
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-Nios2AsmBackend::createObjectTargetWriter() const {
-  return createNios2ELFObjectWriter(MCELFObjectTargetWriter::getOSABI(OSType));
-}
-
-bool Nios2AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
-  return true;
-}
-
-// MCAsmBackend
-MCAsmBackend *llvm::createNios2AsmBackend(const Target &T,
-                                          const MCSubtargetInfo &STI,
-                                          const MCRegisterInfo &MRI,
-                                          const MCTargetOptions &Options) {
-  return new Nios2AsmBackend(T, STI.getTargetTriple().getOS());
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h
deleted file mode 100644
index 1f114bd869b1..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- Nios2AsmBackend.h - Nios2 Asm Backend  ----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Nios2AsmBackend class.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H
-
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmBackend.h"
-
-namespace llvm {
-
-class MCAssembler;
-struct MCFixupKindInfo;
-class Target;
-class MCObjectWriter;
-
-class Nios2AsmBackend : public MCAsmBackend {
-  Triple::OSType OSType;
-
-public:
-  Nios2AsmBackend(const Target &T, Triple::OSType OSType)
-      : MCAsmBackend(support::little), OSType(OSType) {}
-
-  std::unique_ptr<MCObjectTargetWriter>
-  createObjectTargetWriter() const override;
-
-  bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
-
-  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
-                  const MCValue &Target, MutableArrayRef<char> Data,
-                  uint64_t Value, bool IsResolved) const override;
-
-  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
-  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-
-  unsigned getNumFixupKinds() const override {
-    return Nios2::NumTargetFixupKinds;
-  }
-
-  /// MayNeedRelaxation - Check whether the given instruction may need
-  /// relaxation.
-  ///
-  /// \param Inst - The instruction to test.
-  bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
-
-  /// fixupNeedsRelaxation - Target specific predicate for whether a given
-  /// fixup requires the associated instruction to be relaxed.
-  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                            const MCRelaxableFragment *DF,
-                            const MCAsmLayout &Layout) const override {
-    // FIXME.
-    llvm_unreachable("RelaxInstruction() unimplemented");
-    return false;
-  }
-
-  /// RelaxInstruction - Relax the instruction in the given fragment
-  /// to the next wider instruction.
-  ///
-  /// \param Inst - The instruction to relax, which may be the same
-  /// as the output.
-  /// \param [out] Res On return, the relaxed instruction.
-  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                        MCInst &Res) const override {}
-
-}; // class Nios2AsmBackend
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h
deleted file mode 100644
index 225671ebc8d8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- Nios2BaseInfo.h - Top level definitions for NIOS2 MC ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the Nios2 target useful for the compiler back-end and the MC libraries.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H
-
-namespace llvm {
-
-/// Nios2FG - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-namespace Nios2FG {
-/// Target Operand Flag enum.
-enum TOF {
-  //===------------------------------------------------------------------===//
-  // Nios2 Specific MachineOperand flags.
-
-  MO_NO_FLAG,
-
-  /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
-  /// address.
-  MO_ABS_HI,
-  MO_ABS_LO,
-
-};
-} // namespace Nios2FG
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp
deleted file mode 100644
index db432d15120d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Nios2ELFObjectWriter.cpp - Nios2 ELF Writer -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/Nios2FixupKinds.h"
-#include "MCTargetDesc/Nios2MCExpr.h"
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCObjectWriter.h"
-
-using namespace llvm;
-
-namespace {
-class Nios2ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
-  Nios2ELFObjectWriter(uint8_t OSABI)
-      : MCELFObjectTargetWriter(false, OSABI, ELF::EM_ALTERA_NIOS2, false) {}
-
-  ~Nios2ELFObjectWriter() override;
-
-  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
-                        const MCFixup &Fixup, bool IsPCRel) const override;
-};
-} // namespace
-
-Nios2ELFObjectWriter::~Nios2ELFObjectWriter() {}
-
-unsigned Nios2ELFObjectWriter::getRelocType(MCContext &Ctx,
-                                            const MCValue &Target,
-                                            const MCFixup &Fixup,
-                                            bool IsPCRel) const {
-  return 0;
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-llvm::createNios2ELFObjectWriter(uint8_t OSABI) {
-  return llvm::make_unique<Nios2ELFObjectWriter>(OSABI);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h
deleted file mode 100644
index c169a1b19371..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- Nios2FixupKinds.h - Nios2 Specific Fixup Entries --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace Nios2 {
-// Although most of the current fixup types reflect a unique relocation
-// one can have multiple fixup types for a given relocation and thus need
-// to be uniquely named.
-//
-// This table *must* be in the save order of
-// MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds]
-// in Nios2AsmBackend.cpp.
-enum Fixups {
-  // Pure upper 32 bit fixup resulting in - R_NIOS2_32.
-  fixup_Nios2_32 = FirstTargetFixupKind,
-
-  // Pure upper 16 bit fixup resulting in - R_NIOS2_HI16.
-  fixup_Nios2_HI16,
-
-  // Pure lower 16 bit fixup resulting in - R_NIOS2_LO16.
-  fixup_Nios2_LO16,
-
-  // Marker
-  LastTargetFixupKind,
-  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-} // namespace Nios2
-} // namespace llvm
-
-#endif // LLVM_NIOS2_NIOS2FIXUPKINDS_H
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp
deleted file mode 100644
index e3c66e6776c2..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- Nios2MCAsmInfo.cpp - Nios2 Asm Properties -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the Nios2MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MCAsmInfo.h"
-
-#include "llvm/ADT/Triple.h"
-
-using namespace llvm;
-
-void Nios2MCAsmInfo::anchor() {}
-
-Nios2MCAsmInfo::Nios2MCAsmInfo(const Triple &TheTriple) {
-  if ((TheTriple.getArch() == Triple::nios2))
-    IsLittleEndian = true; // the default of IsLittleEndian is true
-
-  AlignmentIsInBytes = false;
-  Data16bitsDirective = "\t.2byte\t";
-  Data32bitsDirective = "\t.4byte\t";
-  Data64bitsDirective = "\t.8byte\t";
-  PrivateLabelPrefix = ".LC";
-  CommentString = "#";
-  ZeroDirective = "\t.space\t";
-  GPRel32Directive = "\t.gpword\t";
-  GPRel64Directive = "\t.gpdword\t";
-  WeakRefDirective = "\t.weak\t";
-  GlobalDirective = "\t.global\t";
-  AscizDirective = "\t.string\t";
-  UseAssignmentForEHBegin = true;
-
-  SupportsDebugInformation = true;
-  ExceptionsType = ExceptionHandling::DwarfCFI;
-  DwarfRegNumForCFI = true;
-  UsesELFSectionDirectiveForBSS = true;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h
deleted file mode 100644
index 0c81276f84d8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- Nios2MCAsmInfo.h - Nios2 Asm Info ----------------------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Nios2MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H
-
-#include "llvm/MC/MCAsmInfoELF.h"
-
-namespace llvm {
-class Triple;
-
-class Nios2MCAsmInfo : public MCAsmInfoELF {
-  void anchor() override;
-
-public:
-  explicit Nios2MCAsmInfo(const Triple &TheTriple);
-};
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp
deleted file mode 100644
index 0f12c9e93378..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- Nios2MCExpr.cpp - Nios2 specific MC expression classes ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-
-#include "Nios2MCExpr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2mcexpr"
-
-const Nios2MCExpr *Nios2MCExpr::create(Nios2MCExpr::Nios2ExprKind Kind,
-                                       const MCExpr *Expr, MCContext &Ctx) {
-  return new (Ctx) Nios2MCExpr(Kind, Expr);
-}
-
-const Nios2MCExpr *Nios2MCExpr::create(const MCSymbol *Symbol,
-                                       Nios2MCExpr::Nios2ExprKind Kind,
-                                       MCContext &Ctx) {
-  const MCSymbolRefExpr *MCSym =
-      MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
-  return new (Ctx) Nios2MCExpr(Kind, MCSym);
-}
-
-void Nios2MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
-
-  switch (Kind) {
-  case CEK_None:
-  case CEK_Special:
-    llvm_unreachable("CEK_None and CEK_Special are invalid");
-    break;
-  case CEK_ABS_HI:
-    OS << "%hiadj";
-    break;
-  case CEK_ABS_LO:
-    OS << "%lo";
-    break;
-  }
-
-  OS << '(';
-  Expr->print(OS, MAI, true);
-  OS << ')';
-}
-
-bool Nios2MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
-                                            const MCAsmLayout *Layout,
-                                            const MCFixup *Fixup) const {
-  return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
-}
-
-void Nios2MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
-  Streamer.visitUsedExpr(*getSubExpr());
-}
-
-void Nios2MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
-  switch (getKind()) {
-  case CEK_None:
-  case CEK_Special:
-    llvm_unreachable("CEK_None and CEK_Special are invalid");
-    break;
-  case CEK_ABS_HI:
-  case CEK_ABS_LO:
-    break;
-  }
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h
deleted file mode 100644
index 5b49005eb648..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Nios2MCExpr.h - Nios2 specific MC expression classes ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H
-
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCValue.h"
-
-namespace llvm {
-
-class Nios2MCExpr : public MCTargetExpr {
-public:
-  enum Nios2ExprKind {
-    CEK_None,
-    CEK_ABS_HI,
-    CEK_ABS_LO,
-    CEK_Special,
-  };
-
-private:
-  const Nios2ExprKind Kind;
-  const MCExpr *Expr;
-
-  explicit Nios2MCExpr(Nios2ExprKind Kind, const MCExpr *Expr)
-      : Kind(Kind), Expr(Expr) {}
-
-public:
-  static const Nios2MCExpr *create(Nios2ExprKind Kind, const MCExpr *Expr,
-                                   MCContext &Ctx);
-  static const Nios2MCExpr *create(const MCSymbol *Symbol,
-                                   Nios2MCExpr::Nios2ExprKind Kind,
-                                   MCContext &Ctx);
-
-  /// Get the kind of this expression.
-  Nios2ExprKind getKind() const { return Kind; }
-
-  /// Get the child of this expression.
-  const MCExpr *getSubExpr() const { return Expr; }
-
-  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
-  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
-                                 const MCFixup *Fixup) const override;
-  void visitUsedExpr(MCStreamer &Streamer) const override;
-  MCFragment *findAssociatedFragment() const override {
-    return getSubExpr()->findAssociatedFragment();
-  }
-
-  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
deleted file mode 100644
index e57b44d3cfdc..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MCTargetDesc.h"
-#include "InstPrinter/Nios2InstPrinter.h"
-#include "Nios2MCAsmInfo.h"
-#include "Nios2TargetStreamer.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_MC_DESC
-#include "Nios2GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "Nios2GenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "Nios2GenRegisterInfo.inc"
-
-static MCInstrInfo *createNios2MCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitNios2MCInstrInfo(X); // defined in Nios2GenInstrInfo.inc
-  return X;
-}
-
-static MCRegisterInfo *createNios2MCRegisterInfo(const Triple &TT) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitNios2MCRegisterInfo(X, Nios2::R15); // defined in Nios2GenRegisterInfo.inc
-  return X;
-}
-
-static MCSubtargetInfo *
-createNios2MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  if (CPU.empty() || CPU == "generic")
-    CPU = "nios2r1";
-  return createNios2MCSubtargetInfoImpl(TT, CPU, FS);
-  // createNios2MCSubtargetInfoImpl defined in Nios2GenSubtargetInfo.inc
-}
-
-static MCAsmInfo *createNios2MCAsmInfo(const MCRegisterInfo &MRI,
-                                       const Triple &TT) {
-  MCAsmInfo *MAI = new Nios2MCAsmInfo(TT);
-
-  unsigned SP = MRI.getDwarfRegNum(Nios2::SP, true);
-  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
-  MAI->addInitialFrameState(Inst);
-
-  return MAI;
-}
-
-static MCInstPrinter *createNios2MCInstPrinter(const Triple &T,
-                                               unsigned SyntaxVariant,
-                                               const MCAsmInfo &MAI,
-                                               const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI) {
-  return new Nios2InstPrinter(MAI, MII, MRI);
-}
-
-static MCTargetStreamer *createNios2AsmTargetStreamer(MCStreamer &S,
-                                                      formatted_raw_ostream &OS,
-                                                      MCInstPrinter *InstPrint,
-                                                      bool isVerboseAsm) {
-  return new Nios2TargetAsmStreamer(S, OS);
-}
-
-extern "C" void LLVMInitializeNios2TargetMC() {
-  Target *T = &getTheNios2Target();
-
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(*T, createNios2MCAsmInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(*T, createNios2MCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(*T, createNios2MCRegisterInfo);
-
-  // Register the asm target streamer.
-  TargetRegistry::RegisterAsmTargetStreamer(*T, createNios2AsmTargetStreamer);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(*T, createNios2MCSubtargetInfo);
-  // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(*T, createNios2MCInstPrinter);
-
-  // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(*T, createNios2AsmBackend);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
deleted file mode 100644
index a7c4b16c6a3b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
-#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H
-
-#include <memory>
-
-namespace llvm {
-class MCAsmBackend;
-class MCObjectTargetWriter;
-class MCRegisterInfo;
-class MCSubtargetInfo;
-class MCTargetOptions;
-class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
-
-Target &getTheNios2Target();
-
-MCAsmBackend *createNios2AsmBackend(const Target &T, const MCSubtargetInfo &STI,
-                                    const MCRegisterInfo &MRI,
-                                    const MCTargetOptions &Options);
-
-std::unique_ptr<MCObjectTargetWriter> createNios2ELFObjectWriter(uint8_t OSABI);
-
-} // namespace llvm
-
-// Defines symbolic names for Nios2 registers.  This defines a mapping from
-// register name to register number.
-#define GET_REGINFO_ENUM
-#include "Nios2GenRegisterInfo.inc"
-
-// Defines symbolic names for the Nios2 instructions.
-#define GET_INSTRINFO_ENUM
-#include "Nios2GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "Nios2GenSubtargetInfo.inc"
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp
deleted file mode 100644
index 795fd0084aa3..000000000000
--- a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- Nios2TargetStreamer.cpp - Nios2 Target Streamer Methods -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Nios2 specific target streamer methods.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetStreamer.h"
-
-using namespace llvm;
-
-Nios2TargetStreamer::Nios2TargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-
-Nios2TargetAsmStreamer::Nios2TargetAsmStreamer(MCStreamer &S,
-                                               formatted_raw_ostream &OS)
-    : Nios2TargetStreamer(S) {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.h b/contrib/llvm/lib/Target/Nios2/Nios2.h
deleted file mode 100644
index d6c5c1e49662..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in
-// the LLVM Nios2 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2_H
-
-#include "MCTargetDesc/Nios2MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class FunctionPass;
-class formatted_raw_ostream;
-class Nios2TargetMachine;
-class AsmPrinter;
-class MachineInstr;
-class MCInst;
-
-FunctionPass *createNios2ISelDag(Nios2TargetMachine &TM,
-                                 CodeGenOpt::Level OptLevel);
-void LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI,
-                                   AsmPrinter &AP);
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.td b/contrib/llvm/lib/Target/Nios2/Nios2.td
deleted file mode 100644
index 1acf4c70c42c..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2.td
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-include "Nios2RegisterInfo.td"
-include "Nios2Schedule.td"
-include "Nios2InstrInfo.td"
-include "Nios2CallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Nios2 Subtarget features
-//===----------------------------------------------------------------------===//
-def FeatureNios2r1     : SubtargetFeature<"nios2r1", "Nios2ArchVersion", 
-                                "Nios2r1", "Nios2 R1 ISA Support">;
-def FeatureNios2r2     : SubtargetFeature<"nios2r2", "Nios2ArchVersion",                      
-                               "Nios2r2", "Nios2 R2 ISA Support">;
-
-//===----------------------------------------------------------------------===//
-// Nios2 processors supported.
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, Nios2GenericItineraries, Features>;
-
-def : Proc<"nios2r1", [FeatureNios2r1]>;
-def : Proc<"nios2r2", [FeatureNios2r2]>;
-
-def Nios2InstrInfo : InstrInfo;
-
-def Nios2AsmParser : AsmParser {
-  let ShouldEmitMatchRegisterName = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-
-def Nios2AsmWriter : AsmWriter {
-  string AsmWriterClassName  = "InstPrinter";
-  int PassSubtarget = 1;
-  int Variant = 0;
-}
-
-def Nios2 : Target {
-// def Nios2InstrInfo : InstrInfo as before.
-  let InstructionSet = Nios2InstrInfo;
-  let AssemblyParsers = [Nios2AsmParser];
-  let AssemblyWriters = [Nios2AsmWriter];
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp b/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp
deleted file mode 100644
index 1abf19591774..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2AsmPrinter.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- Nios2AsmPrinter.cpp - Nios2 LLVM Assembly Printer -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format NIOS2 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#include "InstPrinter/Nios2InstPrinter.h"
-#include "MCTargetDesc/Nios2BaseInfo.h"
-#include "Nios2.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-asm-printer"
-
-namespace {
-
-class Nios2AsmPrinter : public AsmPrinter {
-
-public:
-  explicit Nios2AsmPrinter(TargetMachine &TM,
-                           std::unique_ptr<MCStreamer> Streamer)
-      : AsmPrinter(TM, std::move(Streamer)) {}
-
-  StringRef getPassName() const override { return "Nios2 Assembly Printer"; }
-
-  //- EmitInstruction() must exists or will have run time error.
-  void EmitInstruction(const MachineInstr *MI) override;
-  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
-  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O) override;
-  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-  void EmitFunctionEntryLabel() override;
-};
-} // namespace
-
-//- EmitInstruction() must exists or will have run time error.
-void Nios2AsmPrinter::EmitInstruction(const MachineInstr *MI) {
-
-  //  Print out both ordinary instruction and boudle instruction
-  MachineBasicBlock::const_instr_iterator I = MI->getIterator();
-  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
-
-  do {
-
-    if (I->isPseudo()) {
-      llvm_unreachable("Pseudo opcode found in EmitInstruction()");
-    }
-
-    MCInst TmpInst0;
-    LowerNios2MachineInstToMCInst(&*I, TmpInst0, *this);
-    EmitToStreamer(*OutStreamer, TmpInst0);
-  } while ((++I != E) && I->isInsideBundle()); // Delay slot check
-}
-
-//		.type	main,@function
-//->		.ent	main                    # @main
-//	main:
-void Nios2AsmPrinter::EmitFunctionEntryLabel() {
-  OutStreamer->EmitLabel(CurrentFnSym);
-}
-
-// Print out an operand for an inline asm expression.
-bool Nios2AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                      unsigned AsmVariant,
-                                      const char *ExtraCode, raw_ostream &O) {
-  printOperand(MI, OpNum, O);
-  return false;
-}
-
-bool Nios2AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNum, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier
-
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "($" << Nios2InstPrinter::getRegisterName(MO.getReg()) << ")";
-
-  return false;
-}
-
-void Nios2AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  bool closeP = false;
-
-  if (MO.getTargetFlags())
-    closeP = true;
-
-  switch (MO.getTargetFlags()) {
-  case Nios2FG::MO_ABS_HI:
-    O << "%hiadj(";
-    break;
-  case Nios2FG::MO_ABS_LO:
-    O << "%lo(";
-    break;
-  }
-
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    O << '$'
-      << StringRef(Nios2InstPrinter::getRegisterName(MO.getReg())).lower();
-    break;
-
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    break;
-
-  case MachineOperand::MO_MachineBasicBlock:
-    MO.getMBB()->getSymbol()->print(O, MAI);
-    return;
-
-  case MachineOperand::MO_GlobalAddress:
-    getSymbol(MO.getGlobal())->print(O, MAI);
-    break;
-
-  case MachineOperand::MO_BlockAddress:
-    O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
-    break;
-
-  case MachineOperand::MO_ExternalSymbol:
-    O << MO.getSymbolName();
-    break;
-
-  default:
-    llvm_unreachable("<unknown operand type>");
-  }
-
-  if (closeP)
-    O << ")";
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNios2AsmPrinter() {
-  RegisterAsmPrinter<Nios2AsmPrinter> X(getTheNios2Target());
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td b/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td
deleted file mode 100644
index f0b172f8422d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2CallingConv.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- Nios2CallingConv.td - Calling Conventions for Nios2 -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for Nios2 architecture.
-//===----------------------------------------------------------------------===//
-
-/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
-  CCIf<!strconcat("State.getTarget().getSubtarget<Nios2Subtarget>().", F), A>;
-
-def CC_Nios2 : CallingConv<[
-  // i32 f32 arguments get passed in integer registers if there is space.
-  CCIfType<[i32, f32], CCAssignToReg<[R4, R5, R6, R7]>>,
-
-  // Alternatively, they are assigned to the stack in 4-byte aligned units.
-  CCAssignToStack<4, 4>
-]>;
-
-def RetCC_Nios2EABI : CallingConv<[
-  // i32 are returned in registers R2, R3
-  CCIfType<[i32], CCAssignToReg<[R2, R3]>>,
-  // In case of floating point (FPH2 instr.) also use the same register set
-  CCIfType<[f32], CCAssignToReg<[R2, R3]>>,
-  CCIfByVal<CCPassByVal<4, 4>>,
-  // Stack parameter slots for i32 is 32-bit words and 4-byte aligned.
-  CCIfType<[i32], CCAssignToStack<4, 4>>
-]>;
-
-def CSR : CalleeSavedRegs<(add RA, FP, (sequence "R%u", 16, 23))>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp b/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp
deleted file mode 100644
index 6fb28a6fd638..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- Nios2FrameLowering.cpp - Nios2 Frame Information ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2FrameLowering.h"
-
-#include "Nios2Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-
-using namespace llvm;
-
-bool Nios2FrameLowering::hasFP(const MachineFunction &MF) const { return true; }
-
-void Nios2FrameLowering::emitPrologue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {}
-
-void Nios2FrameLowering::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h b/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h
deleted file mode 100644
index 4ffb01dda36a..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2FrameLowering.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- Nios2FrameLowering.h - Define frame lowering for Nios2 --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2FRAMELOWERING_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-
-namespace llvm {
-class Nios2Subtarget;
-
-class Nios2FrameLowering : public TargetFrameLowering {
-protected:
-  const Nios2Subtarget &STI;
-
-public:
-  explicit Nios2FrameLowering(const Nios2Subtarget &sti)
-      : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0, 4),
-        STI(sti) {}
-
-  bool hasFP(const MachineFunction &MF) const override;
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp
deleted file mode 100644
index 5f9679466115..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- Nios2ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Nios2 ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the NIOS2 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-isel"
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Nios2DAGToDAGISel - NIOS2 specific code to select NIOS2 machine
-// instructions for SelectionDAG operations.
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class Nios2DAGToDAGISel : public SelectionDAGISel {
-  /// Subtarget - Keep a pointer to the Nios2 Subtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const Nios2Subtarget *Subtarget;
-
-public:
-  explicit Nios2DAGToDAGISel(Nios2TargetMachine &TM, CodeGenOpt::Level OL)
-      : SelectionDAGISel(TM, OL) {}
-
-  bool runOnMachineFunction(MachineFunction &MF) override {
-    Subtarget = &MF.getSubtarget<Nios2Subtarget>();
-    return SelectionDAGISel::runOnMachineFunction(MF);
-  }
-
-  void Select(SDNode *N) override;
-
-  // Pass Name
-  StringRef getPassName() const override {
-    return "NIOS2 DAG->DAG Pattern Instruction Selection";
-  }
-
-#include "Nios2GenDAGISel.inc"
-};
-} // namespace
-
-// Select instructions not customized! Used for
-// expanded, promoted and normal instructions
-void Nios2DAGToDAGISel::Select(SDNode *Node) {
-
-  // If we have a custom node, we already have selected!
-  if (Node->isMachineOpcode()) {
-    LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
-    Node->setNodeId(-1);
-    return;
-  }
-
-  // Select the default instruction
-  SelectCode(Node);
-}
-
-FunctionPass *llvm::createNios2ISelDag(Nios2TargetMachine &TM,
-                                       CodeGenOpt::Level OptLevel) {
-  return new Nios2DAGToDAGISel(TM, OptLevel);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp b/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp
deleted file mode 100644
index 008ce1570722..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===-- Nios2ISelLowering.cpp - Nios2 DAG Lowering Implementation ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the interfaces that Nios2 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2ISelLowering.h"
-#include "Nios2MachineFunction.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "Nios2GenCallingConv.inc"
-
-SDValue
-Nios2TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-                                 bool IsVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 const SDLoc &DL, SelectionDAG &DAG) const {
-  // CCValAssign - represent the assignment of
-  // the return value to a location
-  SmallVector<CCValAssign, 16> RVLocs;
-  MachineFunction &MF = DAG.getMachineFunction();
-
-  // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
-  // Analyze return values.
-  CCInfo.CheckReturn(Outs, RetCC_Nios2EABI);
-
-  SDValue Flag;
-  SmallVector<SDValue, 4> RetOps(1, Chain);
-
-  // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    SDValue Val = OutVals[i];
-    CCValAssign &VA = RVLocs[i];
-    assert(VA.isRegLoc() && "Can only return in registers!");
-
-    if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
-      Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
-
-    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
-
-    // Guarantee that all emitted copies are stuck together with flags.
-    Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
-  }
-
-  if (Flag.getNode())
-    RetOps.push_back(Flag);
-
-  return DAG.getNode(Nios2ISD::Ret, DL, MVT::Other, RetOps);
-}
-
-// addLiveIn - This helper function adds the specified physical register to the
-// MachineFunction as a live in value.  It also creates a corresponding
-// virtual register for it.
-static unsigned addLiveIn(MachineFunction &MF, unsigned PReg,
-                          const TargetRegisterClass *RC) {
-  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
-  MF.getRegInfo().addLiveIn(PReg, VReg);
-  return VReg;
-}
-
-//===----------------------------------------------------------------------===//
-//            Formal Arguments Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-// LowerFormalArguments - transform physical registers into virtual registers
-// and generate load operations for arguments places on the stack.
-SDValue Nios2TargetLowering::LowerFormalArguments(
-    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
-    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
-    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
-
-  // Assign locations to all of the incoming arguments.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
-                 *DAG.getContext());
-
-  CCInfo.AnalyzeFormalArguments(Ins, CC_Nios2);
-
-  // Used with vargs to acumulate store chains.
-  std::vector<SDValue> OutChains;
-
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-
-    EVT ValVT = VA.getValVT();
-
-    // Arguments stored on registers
-    if (VA.isRegLoc()) {
-      MVT RegVT = VA.getLocVT();
-      unsigned ArgReg = VA.getLocReg();
-      const TargetRegisterClass *RC = getRegClassFor(RegVT);
-
-      // Transform the arguments stored on
-      // physical registers into virtual ones
-      unsigned Reg = addLiveIn(MF, ArgReg, RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
-
-      // If this is an 8 or 16-bit value, it has been passed promoted
-      // to 32 bits.  Insert an assert[sz]ext to capture this, then
-      // truncate to the right size.
-      if (VA.getLocInfo() != CCValAssign::Full) {
-        unsigned Opcode = 0;
-        if (VA.getLocInfo() == CCValAssign::SExt)
-          Opcode = ISD::AssertSext;
-        else if (VA.getLocInfo() == CCValAssign::ZExt)
-          Opcode = ISD::AssertZext;
-        if (Opcode)
-          ArgValue =
-              DAG.getNode(Opcode, DL, RegVT, ArgValue, DAG.getValueType(ValVT));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
-      }
-
-      // Handle floating point arguments passed in integer registers.
-      if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
-          (RegVT == MVT::i64 && ValVT == MVT::f64))
-        ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
-      InVals.push_back(ArgValue);
-    } else { // VA.isRegLoc()
-      MVT LocVT = VA.getLocVT();
-
-      // sanity check
-      assert(VA.isMemLoc());
-
-      // The stack pointer offset is relative to the caller stack frame.
-      int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
-                                     VA.getLocMemOffset(), true);
-
-      // Create load nodes to retrieve arguments from the stack
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-      SDValue Load = DAG.getLoad(
-          LocVT, DL, Chain, FIN,
-          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
-      InVals.push_back(Load);
-      OutChains.push_back(Load.getValue(1));
-    }
-  }
-  if (!OutChains.empty()) {
-    OutChains.push_back(Chain);
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
-  }
-
-  return Chain;
-}
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation
-//===----------------------------------------------------------------------===//
-
-Nios2TargetLowering::Nios2TargetLowering(const TargetMachine &TM,
-                                         const Nios2Subtarget &STI)
-    : TargetLowering(TM), Subtarget(&STI) {
-
-  addRegisterClass(MVT::i32, &Nios2::CPURegsRegClass);
-  computeRegisterProperties(Subtarget->getRegisterInfo());
-}
-
-const char *Nios2TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  case Nios2ISD::Hi:
-    return "Nios2ISD::Hi";
-  case Nios2ISD::Lo:
-    return "Nios2ISD::Lo";
-  case Nios2ISD::Ret:
-    return "Nios2ISD::Ret";
-  }
-  return nullptr;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h b/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h
deleted file mode 100644
index c3c8179054bb..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2ISelLowering.h
+++ /dev/null
@@ -1,63 +0,0 @@
-//===-- Nios2ISelLowering.h - Nios2 DAG Lowering Interface ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Nios2 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetLowering.h"
-
-namespace llvm {
-class Nios2Subtarget;
-
-namespace Nios2ISD {
-enum NodeType {
-  // Start the numbering from where ISD NodeType finishes.
-  FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-  // Get the Higher 16 bits from a 32-bit immediate
-  // No relation with Nios2 Hi register
-  Hi,
-  // Get the Lower 16 bits from a 32-bit immediate
-  // No relation with Nios2 Lo register
-  Lo,
-  // Return
-  Ret
-};
-}
-
-class Nios2TargetLowering : public TargetLowering {
-  const Nios2Subtarget *Subtarget;
-
-public:
-  Nios2TargetLowering(const TargetMachine &TM, const Nios2Subtarget &STI);
-
-  /// getTargetNodeName - This method returns the name of a target specific
-  //  DAG node.
-  const char *getTargetNodeName(unsigned Opcode) const override;
-
-  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
-                               bool IsVarArg,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               const SDLoc &dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) const override;
-
-  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                      const SmallVectorImpl<ISD::OutputArg> &Outs,
-                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
-                      SelectionDAG &DAG) const override;
-};
-} // end namespace llvm
-
-#endif // NIOS2_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td
deleted file mode 100644
index f57bf03bba3c..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td
+++ /dev/null
@@ -1,235 +0,0 @@
-//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//  Describe NIOS2 instructions format
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Format specifies the encoding used by the instruction.  This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<6> val> {
-  bits<6> Value = val;
-}
-
-def Pseudo     : Format<0>;
-// Nios2 R1 instr formats:
-def FrmI       : Format<1>;
-def FrmR       : Format<2>;
-def FrmJ       : Format<3>;
-def FrmOther   : Format<4>;  // Instruction w/ a custom format
-// Nios2 R2 instr 32-bit formats:
-def FrmL26     : Format<5>;  // corresponds to J format in R1
-def FrmF2I16   : Format<6>;  // corresponds to I format in R1
-def FrmF2X4I12 : Format<7>;
-def FrmF1X4I12 : Format<8>;
-def FrmF1X4L17 : Format<9>;
-def FrmF3X6L5  : Format<10>; // corresponds to R format in R1
-def FrmF2X6L10 : Format<11>;
-def FrmF3X6    : Format<12>; // corresponds to R format in R1
-def FrmF3X8    : Format<13>; // corresponds to custom format in R1
-// Nios2 R2 instr 16-bit formats:
-def FrmI10     : Format<14>;
-def FrmT1I7    : Format<15>; 
-def FrmT2I4    : Format<16>;
-def FrmT1X1I6  : Format<17>;
-def FrmX1I7    : Format<18>;
-def FrmL5I4X1  : Format<19>;
-def FrmT2X1L3  : Format<20>;
-def FrmT2X1I3  : Format<21>;
-def FrmT3X1    : Format<22>;
-def FrmT2X3    : Format<23>;
-def FrmF1X1    : Format<24>;
-def FrmX2L5    : Format<25>;
-def FrmF1I5    : Format<26>;
-def FrmF2      : Format<27>;
-
-//===----------------------------------------------------------------------===//
-// Instruction Predicates:
-//===----------------------------------------------------------------------===//
-
-def isNios2r1 : Predicate<"Subtarget->isNios2r1()">;
-def isNios2r2 : Predicate<"Subtarget->isNios2r2()">;
-
-class PredicateControl {
-  // Predicates related to specific target CPU features
-  list<Predicate> FeaturePredicates = [];
-  // Predicates for the instruction group membership in given ISA
-  list<Predicate> InstrPredicates = [];
-  
-  list<Predicate> Predicates = !listconcat(FeaturePredicates, InstrPredicates);
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes for 32-bit, 16-bit and pseudo instructions
-//===----------------------------------------------------------------------===//
-
-class Nios2Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
-                  InstrItinClass itin, Format f>: Instruction,
-		                                  PredicateControl {
-  field bits<32> Inst;
-  Format Form = f;
-
-  let Namespace = "Nios2";
-  let Size = 4;
-
-  bits<6> Opcode = 0;
-
-  // Bottom 6 bits are the 'opcode' field
-  let Inst{5-0} = Opcode;
-
-  let OutOperandList = outs;
-  let InOperandList  = ins;
-
-  let AsmString = asmstr;
-  let Pattern   = pattern;
-  let Itinerary = itin;
-
-  // Attributes specific to Nios2 instructions:
-
-  // TSFlags layout should be kept in sync with Nios2InstrInfo.h.
-  let TSFlags{5-0} = Form.Value;
-  let DecoderNamespace = "Nios2";
-  field bits<32> SoftFail = 0;
-}
-
-class Nios2Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern,
-      InstrItinClass Itin = IIPseudo>:
-  Nios2Inst32<outs, ins, asmstr, pattern, Itin, Pseudo> {
-
-  let isCodeGenOnly = 1;
-  let isPseudo = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Base classes for R1 and R2 instructions
-//===----------------------------------------------------------------------===//
-
-class Nios2R1Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
-                    InstrItinClass itin, Format f>: 
-      Nios2Inst32<outs, ins, asmstr, pattern, itin, f> {
-  let DecoderNamespace = "Nios2";
-  let InstrPredicates = [isNios2r1];
-}
-
-class Nios2R2Inst32<dag outs, dag ins, string asmstr, list<dag> pattern,
-                    InstrItinClass itin, Format f>: 
-      Nios2Inst32<outs, ins, asmstr, pattern, itin, f> {
-  let DecoderNamespace = "Nios2r2";
-  let InstrPredicates = [isNios2r2];
-}
-
-//===----------------------------------------------------------------------===//
-// Format I instruction class in Nios2 : <|A|B|immediate|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: Nios2R1Inst32<outs, ins, asmstr,
-	                                     pattern, itin, FrmI> {
-
-  bits<5>  rA;
-  bits<5>  rB;
-  bits<16> imm;
-
-  let Opcode = op;
-
-  let Inst{31-27} = rA;
-  let Inst{26-22} = rB;
-  let Inst{21-6} = imm;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Format R instruction : <|A|B|C|opx|imm|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FR<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: Nios2R1Inst32<outs, ins, asmstr,
-	                                     pattern, itin, FrmR> {
-  bits<5> rA;
-  bits<5> rB;
-  bits<5> rC;
-  bits<5> imm = 0;
-
-  let Opcode = 0x3a; /* opcode is always 0x3a for R instr. */
-
-  let Inst{31-27} = rA;
-  let Inst{26-22} = rB;
-  let Inst{21-17} = rC;
-  let Inst{16-11} = opx; /* opx stands for opcode extension */
-  let Inst{10-6}  = imm; /* optional 5-bit immediate value */
-}
-
-//===----------------------------------------------------------------------===//
-// Format J instruction class in Nios2 : <|address|opcode|>
-//===----------------------------------------------------------------------===//
-
-class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>:
-      Nios2R1Inst32<outs, ins, asmstr, pattern, itin, FrmJ> {
-  bits<26> addr;
-  let Opcode = op;
-  let Inst{31-6} = addr;
-}
-
-//===----------------------------------------------------------------------===//
-// Format F3X6 (R2) instruction : <|opx|RSV|C|B|A|opcode|>
-//===----------------------------------------------------------------------===//
-
-class F3X6<bits<6> opx, dag outs, dag ins, string asmstr, list<dag> pattern,
-           InstrItinClass itin>:
-      Nios2R2Inst32<outs, ins, asmstr, pattern, itin, FrmF3X6> {
-  bits<5> rC;
-  bits<5> rB;
-  bits<5> rA;
-  bits<5> rsv = 0;
-
-  let Opcode = 0x20; /* opcode is always 0x20 (OPX group) for F3X6 instr. */
-
-  let Inst{31-26} = opx; /* opx stands for opcode extension */
-  let Inst{25-21} = rsv;
-  let Inst{20-16} = rC;
-  let Inst{15-11} = rB;
-  let Inst{10-6}  = rA;
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclasses for common instructions of both R1 and R2:
-//===----------------------------------------------------------------------===//
-
-// Multiclass for instructions that have R format in R1 and F3X6 format in R2
-// and their opx values differ between R1 and R2
-multiclass CommonInstr_R_F3X6_opx<bits<6> opxR1, bits<6> opxR2, dag outs,
-                                  dag ins, string asmstr, list<dag> pattern,
-                                  InstrItinClass itin> {
-  def NAME#_R1 : FR<opxR1, outs, ins, asmstr, pattern, itin>;
-  def NAME#_R2 : F3X6<opxR2, outs, ins, asmstr, pattern, itin>;
-}
-
-// Multiclass for instructions that have R format in R1 and F3X6 format in R2
-// and their opx values are the same in R1 and R2
-multiclass CommonInstr_R_F3X6<bits<6> opx, dag outs, dag ins, string asmstr,
-                              list<dag> pattern, InstrItinClass itin> :
-  CommonInstr_R_F3X6_opx<opx, opx, outs, ins, asmstr, pattern, itin>;
-
-// Multiclass for instructions that have I format in R1 and F2I16 format in R2
-// and their op code values differ between R1 and R2
-multiclass CommonInstr_I_F2I16_op<bits<6> opR1, bits<6> opR2, dag outs, dag ins,
-                                  string asmstr, list<dag> pattern,
-                                  InstrItinClass itin> {
-  def NAME#_R1 : FI<opR1, outs, ins, asmstr, pattern, itin>;
-}
-
-// Multiclass for instructions that have I format in R1 and F2I16 format in R2
-// and their op code values are the same in R1 and R2
-multiclass CommonInstr_I_F2I16<bits<6> op, dag outs, dag ins, string asmstr,
-                               list<dag> pattern, InstrItinClass itin> :
-  CommonInstr_I_F2I16_op<op, op, outs, ins, asmstr, pattern, itin>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp
deleted file mode 100644
index 9700cba3595b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Nios2InstrInfo.cpp - Nios2 Instruction Information ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2InstrInfo.h"
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-
-using namespace llvm;
-
-#define GET_INSTRINFO_CTOR_DTOR
-#include "Nios2GenInstrInfo.inc"
-
-// Pin the vtable to this file.
-void Nios2InstrInfo::anchor() {}
-
-Nios2InstrInfo::Nios2InstrInfo(Nios2Subtarget &ST)
-    : Nios2GenInstrInfo(), RI(ST), Subtarget(ST) {}
-
-/// Expand Pseudo instructions into real backend instructions
-bool Nios2InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
-  MachineBasicBlock &MBB = *MI.getParent();
-
-  switch (MI.getDesc().getOpcode()) {
-  default:
-    return false;
-  case Nios2::RetRA:
-    BuildMI(MBB, MI, MI.getDebugLoc(), get(Nios2::RET_R1)).addReg(Nios2::RA);
-    break;
-  }
-
-  MBB.erase(MI);
-  return true;
-}
-
-void Nios2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I,
-                                 const DebugLoc &DL, unsigned DestReg,
-                                 unsigned SrcReg, bool KillSrc) const {
-  unsigned opc = Subtarget.hasNios2r2() ? Nios2::ADD_R2 : Nios2::ADD_R1;
-  BuildMI(MBB, I, DL, get(opc))
-    .addReg(DestReg, RegState::Define)
-    .addReg(Nios2::ZERO)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h
deleted file mode 100644
index 52f6e7e9c7c8..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- Nios2InstrInfo.h - Nios2 Instruction Information --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H
-
-#include "Nios2RegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "Nios2GenInstrInfo.inc"
-
-namespace llvm {
-
-class Nios2Subtarget;
-
-class Nios2InstrInfo : public Nios2GenInstrInfo {
-  const Nios2RegisterInfo RI;
-  const Nios2Subtarget &Subtarget;
-  virtual void anchor();
-
-public:
-  explicit Nios2InstrInfo(Nios2Subtarget &ST);
-
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  const Nios2RegisterInfo &getRegisterInfo() const { return RI; };
-
-  bool expandPostRAPseudo(MachineInstr &MI) const override;
-
-  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                   const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
-                   bool KillSrc) const override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td
deleted file mode 100644
index dee84f74bcbe..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "Nios2InstrFormats.td"
-
-
-//===----------------------------------------------------------------------===//
-// Nios2 Operand, Complex Patterns and Transformations Definitions.
-//===----------------------------------------------------------------------===//
-
-def simm16     : Operand<i32> {
-  let DecoderMethod= "DecodeSimm16";
-}
-
-// Node immediate fits as 16-bit sign extended on target immediate.
-// e.g. addi, andi
-def immSExt16  : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
-
-// Custom return SDNode
-def Nios2Ret : SDNode<"Nios2ISD::Ret", SDTNone,
-    [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
-//===----------------------------------------------------------------------===//
-// Instructions specific format
-//===----------------------------------------------------------------------===//
-
-// Arithmetic and logical instructions with 2 registers and 16-bit immediate
-// value.
-multiclass ArithLogicRegImm16<bits<6> op, string mnemonic, SDNode opNode,
-                              Operand immOp, PatLeaf immType>:
-           CommonInstr_I_F2I16<op, (outs CPURegs:$rB),
-	                       (ins CPURegs:$rA, immOp:$imm),
-                               !strconcat(mnemonic, "\t$rB, $rA, $imm"),
-                               [(set CPURegs:$rB,
-			         (opNode CPURegs:$rA, immType:$imm))],
-                               IIAlu>;
-
-// Arithmetic and logical instructions with 3 register operands.
-// Defines R1 and R2 instruction at the same time.
-multiclass ArithLogicReg<bits<6> opx, string mnemonic,
-                         SDNode opNode>:
-  CommonInstr_R_F3X6<opx, (outs CPURegs:$rC),
-                     (ins CPURegs:$rA, CPURegs:$rB),
-                     !strconcat(mnemonic, "\t$rC, $rA, $rB"),
-                     [(set CPURegs:$rC, (opNode CPURegs:$rA, CPURegs:$rB))],
-                     IIAlu>;
-
-multiclass Return<bits<6> opx, dag outs, dag ins, string mnemonic> {
-  let rB = 0, rC = 0,
-      isReturn = 1,
-      isCodeGenOnly = 1,
-      hasCtrlDep = 1,
-      hasExtraSrcRegAllocReq = 1 in {
-    defm NAME# : CommonInstr_R_F3X6<opx, outs, ins, mnemonic, [], IIBranch>;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Nios2 Instructions
-//===----------------------------------------------------------------------===//
-
-/// Arithmetic instructions operating on registers.
-let isCommutable = 1 ,
-    isReMaterializable = 1 in {
-  defm ADD    : ArithLogicReg<0x31, "add",    add>;
-  defm AND    : ArithLogicReg<0x0e, "and",    and>;
-  defm OR     : ArithLogicReg<0x16, "or",     or>;
-  defm XOR    : ArithLogicReg<0x1e, "xor",    xor>;
-  defm MUL    : ArithLogicReg<0x27, "mul",    mul>;
-}
-
-let isReMaterializable = 1 in {
-  defm SUB    : ArithLogicReg<0x39, "sub",    sub>;
-}
-
-defm DIVU : ArithLogicReg<0x24, "divu",   udiv>;
-defm DIV  : ArithLogicReg<0x25, "div",    sdiv>;
-
-defm SLL : ArithLogicReg<0x13, "sll",  shl>;
-defm SRL : ArithLogicReg<0x1b, "srl",  srl>;
-defm SRA : ArithLogicReg<0x3b, "sra",  sra>;
-
-/// Arithmetic Instructions (ALU Immediate)
-defm ADDI  : ArithLogicRegImm16<0x04, "addi",  add, simm16, immSExt16>;
-
-// Returns:
-defm RET  : Return<0x05, (outs), (ins CPURegs:$rA), "ret">;
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions
-//===----------------------------------------------------------------------===//
-
-// Return RA.
-let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : Nios2Pseudo<(outs), (ins), "", [(Nios2Ret)]>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp b/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp
deleted file mode 100644
index c43af879b8a6..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MCInstLower.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===-- Nios2MCInstLower.cpp - Convert Nios2 MachineInstr to MCInst -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower Nios2 MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/Nios2BaseInfo.h"
-#include "MCTargetDesc/Nios2MCExpr.h"
-#include "Nios2.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-
-using namespace llvm;
-
-static MCOperand LowerSymbolOperand(const MachineOperand &MO, AsmPrinter &AP) {
-  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
-  Nios2MCExpr::Nios2ExprKind TargetKind = Nios2MCExpr::CEK_None;
-  const MCSymbol *Symbol;
-
-  switch (MO.getTargetFlags()) {
-  default:
-    llvm_unreachable("Invalid target flag!");
-  case Nios2FG::MO_NO_FLAG:
-    break;
-  case Nios2FG::MO_ABS_HI:
-    TargetKind = Nios2MCExpr::CEK_ABS_HI;
-    break;
-  case Nios2FG::MO_ABS_LO:
-    TargetKind = Nios2MCExpr::CEK_ABS_LO;
-    break;
-  }
-
-  switch (MO.getType()) {
-  case MachineOperand::MO_GlobalAddress:
-    Symbol = AP.getSymbol(MO.getGlobal());
-    break;
-
-  case MachineOperand::MO_MachineBasicBlock:
-    Symbol = MO.getMBB()->getSymbol();
-    break;
-
-  case MachineOperand::MO_BlockAddress:
-    Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress());
-    break;
-
-  case MachineOperand::MO_ExternalSymbol:
-    Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName());
-    break;
-
-  case MachineOperand::MO_JumpTableIndex:
-    Symbol = AP.GetJTISymbol(MO.getIndex());
-    break;
-
-  case MachineOperand::MO_ConstantPoolIndex:
-    Symbol = AP.GetCPISymbol(MO.getIndex());
-    break;
-
-  default:
-    llvm_unreachable("<unknown operand type>");
-  }
-
-  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, AP.OutContext);
-
-  if (TargetKind != Nios2MCExpr::CEK_None)
-    Expr = Nios2MCExpr::create(TargetKind, Expr, AP.OutContext);
-
-  return MCOperand::createExpr(Expr);
-}
-
-static MCOperand LowerOperand(const MachineOperand &MO, AsmPrinter &AP) {
-
-  switch (MO.getType()) {
-  default:
-    llvm_unreachable("unknown operand type");
-  case MachineOperand::MO_Register:
-    // Ignore all implicit register operands.
-    if (MO.isImplicit())
-      break;
-    return MCOperand::createReg(MO.getReg());
-  case MachineOperand::MO_Immediate:
-    return MCOperand::createImm(MO.getImm());
-  case MachineOperand::MO_MachineBasicBlock:
-  case MachineOperand::MO_ExternalSymbol:
-  case MachineOperand::MO_JumpTableIndex:
-  case MachineOperand::MO_BlockAddress:
-  case MachineOperand::MO_GlobalAddress:
-  case MachineOperand::MO_ConstantPoolIndex:
-    return LowerSymbolOperand(MO, AP);
-  case MachineOperand::MO_RegisterMask:
-    break;
-  }
-
-  return MCOperand();
-}
-
-void llvm::LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI,
-                                         AsmPrinter &AP) {
-
-  OutMI.setOpcode(MI->getOpcode());
-
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    MCOperand MCOp = LowerOperand(MO, AP);
-
-    if (MCOp.isValid())
-      OutMI.addOperand(MCOp);
-  }
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp b/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp
deleted file mode 100644
index be5b8829fe36..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- Nios2MachineFunctionInfo.cpp - Private data used for Nios2 --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2MachineFunction.h"
-
-using namespace llvm;
-
-void Nios2FunctionInfo::anchor() {}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h b/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h
deleted file mode 100644
index 73baf9694790..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2MachineFunction.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- Nios2MachineFunctionInfo.h - Private data used for Nios2 --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of MachineFunctionInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// Nios2FunctionInfo - This class is derived from MachineFunction private
-/// Nios2 target-specific information for each MachineFunction.
-class Nios2FunctionInfo : public MachineFunctionInfo {
-  virtual void anchor();
-
-private:
-  unsigned GlobalBaseReg;
-
-  /// VarArgsFrameOffset - Frame offset to start of varargs area.
-  int VarArgsFrameOffset;
-
-  /// SRetReturnReg - Holds the virtual register into which the sret
-  /// argument is passed.
-  unsigned SRetReturnReg;
-
-  /// IsLeafProc - True if the function is a leaf procedure.
-  bool IsLeafProc;
-
-public:
-  Nios2FunctionInfo()
-      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
-        IsLeafProc(false) {}
-  explicit Nios2FunctionInfo(MachineFunction &MF)
-      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
-        IsLeafProc(false) {}
-
-  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
-  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
-  int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
-  void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
-
-  unsigned getSRetReturnReg() const { return SRetReturnReg; }
-  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
-
-  void setLeafProc(bool rhs) { IsLeafProc = rhs; }
-  bool isLeafProc() const { return IsLeafProc; }
-};
-
-} // end of namespace llvm
-
-#endif // NIOS2_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp
deleted file mode 100644
index 9b892f917535..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- Nios2RegisterInfo.cpp - Nios2 Register Information -== ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "nios2-reg-info"
-
-#include "Nios2RegisterInfo.h"
-
-#include "Nios2.h"
-#include "Nios2Subtarget.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "Nios2GenRegisterInfo.inc"
-
-using namespace llvm;
-
-Nios2RegisterInfo::Nios2RegisterInfo(const Nios2Subtarget &ST)
-    : Nios2GenRegisterInfo(Nios2::RA), Subtarget(ST) {}
-
-const TargetRegisterClass *Nios2RegisterInfo::intRegClass(unsigned Size) const {
-  return &Nios2::CPURegsRegClass;
-}
-
-const MCPhysReg *
-Nios2RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  return CSR_SaveList;
-}
-
-BitVector Nios2RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  static const MCPhysReg ReservedCPURegs[] = {Nios2::ZERO, Nios2::AT, Nios2::SP,
-                                             Nios2::RA,   Nios2::PC, Nios2::GP};
-  BitVector Reserved(getNumRegs());
-
-  for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
-    Reserved.set(ReservedCPURegs[I]);
-
-  return Reserved;
-}
-
-void Nios2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, unsigned FIOperandNum,
-                                            RegScavenger *RS) const {}
-
-unsigned Nios2RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  return Nios2::SP;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h
deleted file mode 100644
index 3658343b1d2e..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- Nios2RegisterInfo.h - Nios2 Register Information Impl ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Nios2 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2REGISTERINFO_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2REGISTERINFO_H
-
-#include "Nios2.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "Nios2GenRegisterInfo.inc"
-
-namespace llvm {
-class Nios2Subtarget;
-class TargetInstrInfo;
-class Type;
-
-class Nios2RegisterInfo : public Nios2GenRegisterInfo {
-protected:
-  const Nios2Subtarget &Subtarget;
-
-public:
-  Nios2RegisterInfo(const Nios2Subtarget &Subtarget);
-
-  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
-
-  BitVector getReservedRegs(const MachineFunction &MF) const override;
-
-  /// Stack Frame Processing Methods
-  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                           unsigned FIOperandNum,
-                           RegScavenger *RS = nullptr) const override;
-
-  /// Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
-
-  /// Return GPR register class.
-  const TargetRegisterClass *intRegClass(unsigned Size) const;
-};
-
-} // end namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td
deleted file mode 100644
index 1808815816f3..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// We have bank of 32 registers.
-class Nios2Reg<string n> : Register<n> {
-  field bits<5> Num;
-  let Namespace = "Nios2";
-}
-
-// Nios2 CPU Registers
-class Nios2GPRReg<bits<5> num, string n> : Nios2Reg<n> {
-  let Num = num;
-}
-
-//===----------------------------------------------------------------------===//
-//  Registers
-//===----------------------------------------------------------------------===//
-
-let Namespace = "Nios2" in {
-  // General Purpose Registers
-  def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>;
-  def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>;
-  foreach RegNum = 2 - 23 in {
-    def R #RegNum : Nios2GPRReg<RegNum, "r" #RegNum>, DwarfRegNum<[ RegNum ]>;
-  }
-  def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>;
-  def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>;
-  def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>;
-  def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>;
-  def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>;
-  def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>;
-  def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>;
-  def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>;
-  def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Register Classes
-//===----------------------------------------------------------------------===//
-
-def CPURegs : RegisterClass<"Nios2", [ i32 ], 32,
-                            (add
-                            // Reserved
-                            ZERO,
-                            AT,
-                            // Return Values and Arguments
-                            (sequence "R%u", 2, 7),
-                            // Not preserved across procedure calls
-                            // Caller saved
-                            (sequence "R%u", 8, 15),
-                            // Callee saved
-                            (sequence "R%u", 16, 23),
-                            // Reserved
-                            ET, BT, GP, SP, FP, EA, BA, RA, PC)>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td b/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td
deleted file mode 100644
index 2d1d9d5e5f3f..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Schedule.td
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- Nios2Schedule.td - Nios2 Scheduling Definitions ----*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Functional units across Nios2 chips sets. Based on GCC/Nios2 backend files.
-//===----------------------------------------------------------------------===//
-def ALU     : FuncUnit;
-def IMULDIV : FuncUnit;
-
-//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Nios2
-//===----------------------------------------------------------------------===//
-def IIAlu              : InstrItinClass;
-def IILoad             : InstrItinClass;
-def IIStore            : InstrItinClass;
-def IIFlush            : InstrItinClass;
-def IIIdiv             : InstrItinClass;
-def IIBranch           : InstrItinClass;
-
-def IIPseudo           : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Nios2 Generic instruction itineraries.
-//===----------------------------------------------------------------------===//
-//@ http://llvm.org/docs/doxygen/html/structllvm_1_1InstrStage.html
-def Nios2GenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
-  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
-  InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IIFlush            , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IIIdiv             , [InstrStage<38, [IMULDIV]>]>,
-  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>
-]>;
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp b/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp
deleted file mode 100644
index 196bed20cdcc..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- Nios2Subtarget.cpp - Nios2 Subtarget Information ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Nios2 specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2Subtarget.h"
-#include "Nios2.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2-subtarget"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "Nios2GenSubtargetInfo.inc"
-
-void Nios2Subtarget::anchor() {}
-
-Nios2Subtarget::Nios2Subtarget(const Triple &TT, const std::string &CPU,
-                               const std::string &FS, const TargetMachine &TM)
-    :
-
-      // Nios2GenSubtargetInfo will display features by llc -march=nios2
-      // -mcpu=help
-      Nios2GenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      FrameLowering(*this) {}
-
-Nios2Subtarget &Nios2Subtarget::initializeSubtargetDependencies(StringRef CPU,
-                                                                StringRef FS) {
-  if (TargetTriple.getArch() == Triple::nios2) {
-    if (CPU != "nios2r2") {
-      CPU = "nios2r1";
-      Nios2ArchVersion = Nios2r1;
-    } else {
-      Nios2ArchVersion = Nios2r2;
-    }
-  } else {
-    errs() << "!!!Error, TargetTriple.getArch() = " << TargetTriple.getArch()
-           << "CPU = " << CPU << "\n";
-    exit(0);
-  }
-
-  // Parse features string.
-  ParseSubtargetFeatures(CPU, FS);
-
-  return *this;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h b/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h
deleted file mode 100644
index a822dff33b5b..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2Subtarget.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- Nios2Subtarget.h - Define Subtarget for the Nios2 -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H
-
-#include "Nios2FrameLowering.h"
-#include "Nios2ISelLowering.h"
-#include "Nios2InstrInfo.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-
-#define GET_SUBTARGETINFO_HEADER
-#include "Nios2GenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRef;
-
-class Nios2TargetMachine;
-
-class Nios2Subtarget : public Nios2GenSubtargetInfo {
-  virtual void anchor();
-
-public:
-  // Nios2 R2 features
-  // Bit manipulation instructions extension
-  bool HasBMX;
-  // Code Density instructions extension
-  bool HasCDX;
-  // Multi-Processor instructions extension
-  bool HasMPX;
-  // New mandatory instructions
-  bool HasR2Mandatory;
-
-protected:
-  enum Nios2ArchEnum {
-    // Nios2 R1 ISA
-    Nios2r1,
-    // Nios2 R2 ISA
-    Nios2r2
-  };
-
-  // Nios2 architecture version
-  Nios2ArchEnum Nios2ArchVersion;
-
-  Triple TargetTriple;
-
-  Nios2InstrInfo InstrInfo;
-  Nios2TargetLowering TLInfo;
-  SelectionDAGTargetInfo TSInfo;
-  Nios2FrameLowering FrameLowering;
-
-public:
-  /// This constructor initializes the data members to match that
-  /// of the specified triple.
-  Nios2Subtarget(const Triple &TT, const std::string &CPU,
-                 const std::string &FS, const TargetMachine &TM);
-
-  /// ParseSubtargetFeatures - Parses features string setting specified
-  /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
-  bool hasNios2r1() const { return Nios2ArchVersion >= Nios2r1; }
-  bool isNios2r1() const { return Nios2ArchVersion == Nios2r1; }
-  bool hasNios2r2() const { return Nios2ArchVersion >= Nios2r2; }
-  bool isNios2r2() const { return Nios2ArchVersion == Nios2r2; }
-
-  Nios2Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
-
-  const Nios2InstrInfo *getInstrInfo() const override { return &InstrInfo; }
-  const TargetFrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
-  }
-  const Nios2RegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo.getRegisterInfo();
-  }
-  const Nios2TargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
-    return &TSInfo;
-  }
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp
deleted file mode 100644
index b7594dde709d..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements the info about Nios2 target spec.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetMachine.h"
-#include "Nios2.h"
-#include "Nios2TargetObjectFile.h"
-
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "nios2"
-
-extern "C" void LLVMInitializeNios2Target() {
-  // Register the target.
-  RegisterTargetMachine<Nios2TargetMachine> X(getTheNios2Target());
-}
-
-static std::string computeDataLayout() {
-  return "e-p:32:32:32-i8:8:32-i16:16:32-n32";
-}
-
-static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
-  if (!RM.hasValue())
-    return Reloc::Static;
-  return *RM;
-}
-
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
-                                              Reloc::Model RM, bool JIT) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
-Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT,
-                                       StringRef CPU, StringRef FS,
-                                       const TargetOptions &Options,
-                                       Optional<Reloc::Model> RM,
-                                       Optional<CodeModel::Model> CM,
-                                       CodeGenOpt::Level OL, bool JIT)
-    : LLVMTargetMachine(
-          T, computeDataLayout(), TT, CPU, FS, Options,
-          getEffectiveRelocModel(RM),
-          getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
-      TLOF(make_unique<Nios2TargetObjectFile>()),
-      Subtarget(TT, CPU, FS, *this) {
-  initAsmInfo();
-}
-
-Nios2TargetMachine::~Nios2TargetMachine() {}
-
-const Nios2Subtarget *
-Nios2TargetMachine::getSubtargetImpl(const Function &F) const {
-  Attribute CPUAttr = F.getFnAttribute("target-cpu");
-  Attribute FSAttr = F.getFnAttribute("target-features");
-
-  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
-                        ? CPUAttr.getValueAsString().str()
-                        : TargetCPU;
-  std::string FS = !FSAttr.hasAttribute(Attribute::None)
-                       ? FSAttr.getValueAsString().str()
-                       : TargetFS;
-
-  auto &I = SubtargetMap[CPU + FS];
-  if (!I) {
-    // This needs to be done before we create a new subtarget since any
-    // creation will depend on the TM and the code generation flags on the
-    // function that reside in TargetOptions.
-    resetTargetOptions(F);
-    I = llvm::make_unique<Nios2Subtarget>(TargetTriple, CPU, FS, *this);
-  }
-  return I.get();
-}
-
-namespace {
-/// Nios2 Code Generator Pass Configuration Options.
-class Nios2PassConfig : public TargetPassConfig {
-public:
-  Nios2PassConfig(Nios2TargetMachine &TM, PassManagerBase *PM)
-      : TargetPassConfig(TM, *PM) {}
-
-  Nios2TargetMachine &getNios2TargetMachine() const {
-    return getTM<Nios2TargetMachine>();
-  }
-
-  void addCodeGenPrepare() override;
-  bool addInstSelector() override;
-  void addIRPasses() override;
-};
-} // namespace
-
-TargetPassConfig *Nios2TargetMachine::createPassConfig(PassManagerBase &PM) {
-  return new Nios2PassConfig(*this, &PM);
-}
-
-void Nios2PassConfig::addCodeGenPrepare() {
-  TargetPassConfig::addCodeGenPrepare();
-}
-
-void Nios2PassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); }
-
-// Install an instruction selector pass using
-// the ISelDag to gen Nios2 code.
-bool Nios2PassConfig::addInstSelector() {
-  addPass(createNios2ISelDag(getNios2TargetMachine(), getOptLevel()));
-  return false;
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h
deleted file mode 100644
index 1ebfb397383e..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Nios2 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H
-
-#include "Nios2Subtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-class Nios2TargetMachine : public LLVMTargetMachine {
-  mutable StringMap<std::unique_ptr<Nios2Subtarget>> SubtargetMap;
-  std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  Nios2Subtarget Subtarget;
-
-public:
-  Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
-                     StringRef FS, const TargetOptions &Options,
-                     Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
-                     CodeGenOpt::Level OL, bool JIT);
-  ~Nios2TargetMachine() override;
-
-  const Nios2Subtarget *getSubtargetImpl() const { return &Subtarget; }
-  const Nios2Subtarget *getSubtargetImpl(const Function &F) const override;
-
-  TargetLoweringObjectFile *getObjFileLowering() const override {
-    return TLOF.get();
-  }
-
-  // Pass Pipeline Configuration
-  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-};
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp
deleted file mode 100644
index 5fc85ef487e6..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//===-- Nios2TargetObjectFile.cpp - Nios2 Object Files --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2TargetObjectFile.h"
-
-using namespace llvm;
-
-void Nios2TargetObjectFile::Initialize(MCContext &Ctx,
-                                       const TargetMachine &TM) {
-  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
-  InitializeELF(TM.Options.UseInitArray);
-}
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h
deleted file mode 100644
index e9ed6e31d937..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetObjectFile.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- llvm/Target/Nios2TargetObjectFile.h - Nios2 Object Info -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H
-
-#include "Nios2TargetMachine.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-
-namespace llvm {
-
-class Nios2TargetObjectFile : public TargetLoweringObjectFileELF {
-public:
-  Nios2TargetObjectFile() : TargetLoweringObjectFileELF() {}
-
-  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-};
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h
deleted file mode 100644
index 1520ac27e94f..000000000000
--- a/contrib/llvm/lib/Target/Nios2/Nios2TargetStreamer.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- Nios2TargetStreamer.h - Nios2 Target Streamer ----------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H
-#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H
-
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-
-namespace llvm {
-
-class Nios2TargetStreamer : public MCTargetStreamer {
-public:
-  Nios2TargetStreamer(MCStreamer &S);
-};
-
-// This part is for ascii assembly output
-class Nios2TargetAsmStreamer : public Nios2TargetStreamer {
-public:
-  Nios2TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
-};
-
-} // namespace llvm
-#endif
diff --git a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
deleted file mode 100644
index d808a96db772..000000000000
--- a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Nios2.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-Target &llvm::getTheNios2Target() {
-  static Target TheNios2Target;
-  return TheNios2Target;
-}
-
-extern "C" void LLVMInitializeNios2TargetInfo() {
-  RegisterTarget<Triple::nios2,
-                 /*HasJIT=*/true>
-      X(getTheNios2Target(), "nios2", "Nios2", "Nios2");
-}
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 56307a84f2e5..8b3480f772e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -21,7 +21,6 @@
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
@@ -31,169 +30,7 @@
 
 using namespace llvm;
 
-static const MCPhysReg RRegs[32] = {
-  PPC::R0,  PPC::R1,  PPC::R2,  PPC::R3,
-  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
-  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
-  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
-  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-  PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg RRegsNoR0[32] = {
-  PPC::ZERO,
-            PPC::R1,  PPC::R2,  PPC::R3,
-  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
-  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
-  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
-  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-  PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-static const MCPhysReg XRegs[32] = {
-  PPC::X0,  PPC::X1,  PPC::X2,  PPC::X3,
-  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
-  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
-  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
-  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-  PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg XRegsNoX0[32] = {
-  PPC::ZERO8,
-            PPC::X1,  PPC::X2,  PPC::X3,
-  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
-  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
-  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
-  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-  PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-static const MCPhysReg FRegs[32] = {
-  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
-  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
-  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
-  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
-  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
-  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
-  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
-  PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-static const MCPhysReg SPERegs[32] = {
-  PPC::S0,  PPC::S1,  PPC::S2,  PPC::S3,
-  PPC::S4,  PPC::S5,  PPC::S6,  PPC::S7,
-  PPC::S8,  PPC::S9,  PPC::S10, PPC::S11,
-  PPC::S12, PPC::S13, PPC::S14, PPC::S15,
-  PPC::S16, PPC::S17, PPC::S18, PPC::S19,
-  PPC::S20, PPC::S21, PPC::S22, PPC::S23,
-  PPC::S24, PPC::S25, PPC::S26, PPC::S27,
-  PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-static const MCPhysReg VFRegs[32] = {
-  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
-  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
-  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
-  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
-  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
-  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
-  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
-  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VRegs[32] = {
-  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
-  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
-  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
-  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
-  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
-  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-  PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSRegs[64] = {
-  PPC::VSL0,  PPC::VSL1,  PPC::VSL2,  PPC::VSL3,
-  PPC::VSL4,  PPC::VSL5,  PPC::VSL6,  PPC::VSL7,
-  PPC::VSL8,  PPC::VSL9,  PPC::VSL10, PPC::VSL11,
-  PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
-  PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
-  PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
-  PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
-  PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
-  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
-  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
-  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
-  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
-  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
-  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-  PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-static const MCPhysReg VSFRegs[64] = {
-  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
-  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
-  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
-  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
-  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
-  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
-  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
-  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
-  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
-  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
-  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
-  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
-  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
-  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
-  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
-  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static const MCPhysReg VSSRegs[64] = {
-  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
-  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
-  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
-  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
-  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
-  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
-  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
-  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
-  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
-  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
-  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
-  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
-  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
-  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
-  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
-  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-static unsigned QFRegs[32] = {
-  PPC::QF0,  PPC::QF1,  PPC::QF2,  PPC::QF3,
-  PPC::QF4,  PPC::QF5,  PPC::QF6,  PPC::QF7,
-  PPC::QF8,  PPC::QF9,  PPC::QF10, PPC::QF11,
-  PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
-  PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
-  PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
-  PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
-  PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-static const MCPhysReg CRBITRegs[32] = {
-  PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
-  PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
-  PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-  PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-  PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-  PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
-  PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
-  PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-static const MCPhysReg CRRegs[8] = {
-  PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
-  PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
+DEFINE_PPC_REGCLASSES;
 
 // Evaluate an expression containing condition register
 // or condition register field symbols.  Returns positive
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index db01271b87e1..26869f250823 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -17,6 +17,8 @@
 
 using namespace llvm;
 
+DEFINE_PPC_REGCLASSES;
+
 #define DEBUG_TYPE "ppc-disassembler"
 
 typedef MCDisassembler::DecodeStatus DecodeStatus;
@@ -62,184 +64,9 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
 // FIXME: These can be generated by TableGen from the existing register
 // encoding values!
 
-static const unsigned CRRegs[] = {
-  PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
-  PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
-};
-
-static const unsigned CRBITRegs[] = {
-  PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
-  PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
-  PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-  PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-  PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-  PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
-  PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
-  PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
-};
-
-static const unsigned FRegs[] = {
-  PPC::F0, PPC::F1, PPC::F2, PPC::F3,
-  PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-  PPC::F8, PPC::F9, PPC::F10, PPC::F11,
-  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
-  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
-  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
-  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
-  PPC::F28, PPC::F29, PPC::F30, PPC::F31
-};
-
-static const unsigned VFRegs[] = {
-  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
-  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
-  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
-  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
-  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
-  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
-  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
-  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VRegs[] = {
-  PPC::V0, PPC::V1, PPC::V2, PPC::V3,
-  PPC::V4, PPC::V5, PPC::V6, PPC::V7,
-  PPC::V8, PPC::V9, PPC::V10, PPC::V11,
-  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
-  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
-  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-  PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSRegs[] = {
-  PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
-  PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
-  PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
-  PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
-  PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
-  PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
-  PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
-  PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
-
-  PPC::V0, PPC::V1, PPC::V2, PPC::V3,
-  PPC::V4, PPC::V5, PPC::V6, PPC::V7,
-  PPC::V8, PPC::V9, PPC::V10, PPC::V11,
-  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
-  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
-  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-  PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-static const unsigned VSFRegs[] = {
-  PPC::F0, PPC::F1, PPC::F2, PPC::F3,
-  PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-  PPC::F8, PPC::F9, PPC::F10, PPC::F11,
-  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
-  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
-  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
-  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
-  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
-  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
-  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
-  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
-  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
-  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
-  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
-  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
-  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned VSSRegs[] = {
-  PPC::F0, PPC::F1, PPC::F2, PPC::F3,
-  PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-  PPC::F8, PPC::F9, PPC::F10, PPC::F11,
-  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
-  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
-  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
-  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
-  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
-
-  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
-  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
-  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
-  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
-  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
-  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
-  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
-  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
-};
-
-static const unsigned GPRegs[] = {
-  PPC::R0, PPC::R1, PPC::R2, PPC::R3,
-  PPC::R4, PPC::R5, PPC::R6, PPC::R7,
-  PPC::R8, PPC::R9, PPC::R10, PPC::R11,
-  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
-  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-  PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned GP0Regs[] = {
-  PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
-  PPC::R4, PPC::R5, PPC::R6, PPC::R7,
-  PPC::R8, PPC::R9, PPC::R10, PPC::R11,
-  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
-  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-  PPC::R28, PPC::R29, PPC::R30, PPC::R31
-};
-
-static const unsigned G8Regs[] = {
-  PPC::X0, PPC::X1, PPC::X2, PPC::X3,
-  PPC::X4, PPC::X5, PPC::X6, PPC::X7,
-  PPC::X8, PPC::X9, PPC::X10, PPC::X11,
-  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
-  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-  PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned G80Regs[] = {
-  PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
-  PPC::X4, PPC::X5, PPC::X6, PPC::X7,
-  PPC::X8, PPC::X9, PPC::X10, PPC::X11,
-  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
-  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-  PPC::X28, PPC::X29, PPC::X30, PPC::X31
-};
-
-static const unsigned QFRegs[] = {
-  PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
-  PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
-  PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
-  PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
-  PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
-  PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
-  PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
-  PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
-};
-
-static const unsigned SPERegs[] = {
-  PPC::S0, PPC::S1, PPC::S2, PPC::S3,
-  PPC::S4, PPC::S5, PPC::S6, PPC::S7,
-  PPC::S8, PPC::S9, PPC::S10, PPC::S11,
-  PPC::S12, PPC::S13, PPC::S14, PPC::S15,
-  PPC::S16, PPC::S17, PPC::S18, PPC::S19,
-  PPC::S20, PPC::S21, PPC::S22, PPC::S23,
-  PPC::S24, PPC::S25, PPC::S26, PPC::S27,
-  PPC::S28, PPC::S29, PPC::S30, PPC::S31
-};
-
 template <std::size_t N>
 static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                        const unsigned (&Regs)[N]) {
+                                        const MCPhysReg (&Regs)[N]) {
   assert(RegNo < N && "Invalid register number");
   Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
   return MCDisassembler::Success;
@@ -308,25 +135,25 @@ static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
 static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, GPRegs);
+  return decodeRegisterClass(Inst, RegNo, RRegs);
 }
 
 static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, GP0Regs);
+  return decodeRegisterClass(Inst, RegNo, RRegsNoR0);
 }
 
 static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, G8Regs);
+  return decodeRegisterClass(Inst, RegNo, XRegs);
 }
 
 static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, G80Regs);
+  return decodeRegisterClass(Inst, RegNo, XRegsNoX0);
 }
 
 #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
@@ -341,7 +168,7 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
 static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, GPRegs);
+  return decodeRegisterClass(Inst, RegNo, RRegs);
 }
 
 static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
@@ -388,19 +215,19 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
   case PPC::LFSU:
   case PPC::LFDU:
     // Add the tied output operand.
-    Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+    Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
     break;
   case PPC::STBU:
   case PPC::STHU:
   case PPC::STWU:
   case PPC::STFSU:
   case PPC::STFDU:
-    Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+    Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
     break;
   }
 
   Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
-  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   return MCDisassembler::Success;
 }
 
@@ -416,12 +243,12 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
 
   if (Inst.getOpcode() == PPC::LDU)
     // Add the tied output operand.
-    Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+    Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   else if (Inst.getOpcode() == PPC::STDU)
-    Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+    Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
 
   Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
-  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   return MCDisassembler::Success;
 }
 
@@ -436,7 +263,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
   assert(Base < 32 && "Invalid base register");
 
   Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4)));
-  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   return MCDisassembler::Success;
 }
 
@@ -451,7 +278,7 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
   assert(Base < 32 && "Invalid base register");
 
   Inst.addOperand(MCOperand::createImm(Disp << 3));
-  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   return MCDisassembler::Success;
 }
 
@@ -466,7 +293,7 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
   assert(Base < 32 && "Invalid base register");
 
   Inst.addOperand(MCOperand::createImm(Disp << 2));
-  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   return MCDisassembler::Success;
 }
 
@@ -481,7 +308,7 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
   assert(Base < 32 && "Invalid base register");
 
   Inst.addOperand(MCOperand::createImm(Disp << 1));
-  Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+  Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
   return MCDisassembler::Success;
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index fd7f81591426..fc29e4effbb1 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -499,43 +499,14 @@ bool PPCInstPrinter::showRegistersWithPrefix() const {
   return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
 }
 
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left.
-static const char *stripRegisterPrefix(const char *RegName) {
-  switch (RegName[0]) {
-  case 'r':
-  case 'f':
-  case 'q': // for QPX
-  case 'v':
-    if (RegName[1] == 's')
-      return RegName + 2;
-    return RegName + 1;
-  case 'c': if (RegName[1] == 'r') return RegName + 2;
-  }
-
-  return RegName;
-}
-
 void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
-
-    // There are VSX instructions that use VSX register numbering (vs0 - vs63)
-    // as well as those that use VMX register numbering (v0 - v31 which
-    // correspond to vs32 - vs63). If we have an instruction that uses VSX
-    // numbering, we need to convert the VMX registers to VSX registers.
-    // Namely, we print 32-63 when the instruction operates on one of the
-    // VMX registers.
-    // (Please synchronize with PPCAsmPrinter::printOperand)
-    if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
-        !ShowVSRNumsAsVR) {
-      if (PPCInstrInfo::isVRRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::V0);
-      else if (PPCInstrInfo::isVFRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::VF0);
-    }
+    if (!ShowVSRNumsAsVR)
+      Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
+                                              Reg, OpNo);
 
     const char *RegName;
     RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
@@ -544,7 +515,7 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     if (showRegistersWithPercentPrefix(RegName))
       O << "%";
     if (!showRegistersWithPrefix())
-      RegName = stripRegisterPrefix(RegName);
+      RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
 
     O << RegName;
     return;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 57bda1403c62..8c15ade6f9c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -13,18 +13,13 @@
 
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "PPCInstrInfo.h"
+#include "PPCMCCodeEmitter.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -39,117 +34,6 @@ using namespace llvm;
 
 STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
 
-namespace {
-
-class PPCMCCodeEmitter : public MCCodeEmitter {
-  const MCInstrInfo &MCII;
-  const MCContext &CTX;
-  bool IsLittleEndian;
-
-public:
-  PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
-      : MCII(mcii), CTX(ctx),
-        IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
-  PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
-  void operator=(const PPCMCCodeEmitter &) = delete;
-  ~PPCMCCodeEmitter() override = default;
-
-  unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
-                               SmallVectorImpl<MCFixup> &Fixups,
-                               const MCSubtargetInfo &STI) const;
-  unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-  unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
-                                  SmallVectorImpl<MCFixup> &Fixups,
-                                  const MCSubtargetInfo &STI) const;
-  unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                const MCSubtargetInfo &STI) const;
-  unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-  unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
-                            SmallVectorImpl<MCFixup> &Fixups,
-                            const MCSubtargetInfo &STI) const;
-  unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-  unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
-                               SmallVectorImpl<MCFixup> &Fixups,
-                               const MCSubtargetInfo &STI) const;
-  unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
-  unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
-  unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
-  unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-  unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
-  unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
-                               SmallVectorImpl<MCFixup> &Fixups,
-                               const MCSubtargetInfo &STI) const;
-
-  /// getMachineOpValue - Return binary encoding of operand. If the machine
-  /// operand requires relocation, record the relocation and return zero.
-  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
-
-  // getBinaryCodeForInstr - TableGen'erated function for getting the
-  // binary encoding for an instruction.
-  uint64_t getBinaryCodeForInstr(const MCInst &MI,
-                                 SmallVectorImpl<MCFixup> &Fixups,
-                                 const MCSubtargetInfo &STI) const;
-
-  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
-                         SmallVectorImpl<MCFixup> &Fixups,
-                         const MCSubtargetInfo &STI) const override {
-    verifyInstructionPredicates(MI,
-                                computeAvailableFeatures(STI.getFeatureBits()));
-
-    unsigned Opcode = MI.getOpcode();
-    const MCInstrDesc &Desc = MCII.get(Opcode);
-
-    uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
-
-    // Output the constant in big/little endian byte order.
-    unsigned Size = Desc.getSize();
-    support::endianness E = IsLittleEndian ? support::little : support::big;
-    switch (Size) {
-    case 0:
-      break;
-    case 4:
-      support::endian::write<uint32_t>(OS, Bits, E);
-      break;
-    case 8:
-      // If we emit a pair of instructions, the first one is
-      // always in the top 32 bits, even on little-endian.
-      support::endian::write<uint32_t>(OS, Bits >> 32, E);
-      support::endian::write<uint32_t>(OS, Bits, E);
-      break;
-    default:
-      llvm_unreachable("Invalid instruction size");
-    }
-
-    ++MCNumEmitted;  // Keep track of the # of mi's emitted.
-  }
-
-private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
-};
-
-} // end anonymous namespace
-
 MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                             const MCRegisterInfo &MRI,
                                             MCContext &Ctx) {
@@ -264,10 +148,16 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
   unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
 
   const MCOperand &MO = MI.getOperand(OpNo);
-  assert(MO.isImm() && !(MO.getImm() % 16) &&
-         "Expecting an immediate that is a multiple of 16");
+  if (MO.isImm()) {
+    assert(!(MO.getImm() % 16) &&
+           "Expecting an immediate that is a multiple of 16");
+    return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+  }
 
-  return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+  // Otherwise add a fixup for the displacement field.
+  Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_half16ds));
+  return RegBits;
 }
 
 unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
@@ -354,6 +244,20 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
   return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
 }
 
+// Get the index for this operand in this instruction. This is needed for
+// computing the register number in PPCInstrInfo::getRegNumForOperand() for
+// any instructions that use a different numbering scheme for registers in
+// different operands.
+static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
+  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+    const MCOperand &Op = MI.getOperand(i);
+    if (&Op == &MO)
+      return i;
+  }
+  llvm_unreachable("This operand is not part of this instruction");
+  return ~0U; // Silence any warnings about no return.
+}
+
 unsigned PPCMCCodeEmitter::
 getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups,
@@ -364,14 +268,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
             MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    unsigned Reg = MO.getReg();
-    unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
-
-    if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
-      if (PPCInstrInfo::isVRRegister(Reg))
-        Encode += 32;
-
-    return Encode;
+    unsigned OpNo = getOpIdxForMO(MI, MO);
+    unsigned Reg =
+      PPCInstrInfo::getRegNumForOperand(MCII.get(MI.getOpcode()),
+                                        MO.getReg(), OpNo);
+    return CTX.getRegisterInfo()->getEncodingValue(Reg);
   }
 
   assert(MO.isImm() &&
@@ -379,5 +280,42 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   return MO.getImm();
 }
 
+void PPCMCCodeEmitter::encodeInstruction(
+    const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
+  uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+
+  // Output the constant in big/little endian byte order.
+  unsigned Size = getInstSizeInBytes(MI);
+  support::endianness E = IsLittleEndian ? support::little : support::big;
+  switch (Size) {
+  case 0:
+    break;
+  case 4:
+    support::endian::write<uint32_t>(OS, Bits, E);
+    break;
+  case 8:
+    // If we emit a pair of instructions, the first one is
+    // always in the top 32 bits, even on little-endian.
+    support::endian::write<uint32_t>(OS, Bits >> 32, E);
+    support::endian::write<uint32_t>(OS, Bits, E);
+    break;
+  default:
+    llvm_unreachable("Invalid instruction size");
+  }
+
+  ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+// Get the number of bytes used to encode the given MCInst.
+unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MCII.get(Opcode);
+  return Desc.getSize();
+}
+
 #define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
new file mode 100644
index 000000000000..a4bcff4b9450
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -0,0 +1,109 @@
+//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+#define LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+
+class PPCMCCodeEmitter : public MCCodeEmitter {
+  const MCInstrInfo &MCII;
+  const MCContext &CTX;
+  bool IsLittleEndian;
+
+public:
+  PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+      : MCII(mcii), CTX(ctx),
+        IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+  PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+  void operator=(const PPCMCCodeEmitter &) = delete;
+  ~PPCMCCodeEmitter() override = default;
+
+  unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups,
+                               const MCSubtargetInfo &STI) const;
+  unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+  unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups,
+                                  const MCSubtargetInfo &STI) const;
+  unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                                SmallVectorImpl<MCFixup> &Fixups,
+                                const MCSubtargetInfo &STI) const;
+  unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+  unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+  unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+  unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups,
+                               const MCSubtargetInfo &STI) const;
+  unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+  unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+  unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+  unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+  unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+  unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups,
+                               const MCSubtargetInfo &STI) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  // Get the number of bytes used to encode the given MCInst.
+  unsigned getInstSizeInBytes(const MCInst &MI) const;
+
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_PPC_MCCODEEMITTER_PPCCODEEMITTER_H
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 316fd2ccf358..d6e450cba0d7 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,6 +17,7 @@
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
 
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include <cstdint>
 #include <memory>
@@ -104,4 +105,63 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
 #define GET_SUBTARGETINFO_ENUM
 #include "PPCGenSubtargetInfo.inc"
 
+#define PPC_REGS0_31(X)                                                        \
+  {                                                                            \
+    X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11,  \
+        X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21,  \
+        X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31   \
+  }
+
+#define PPC_REGS_NO0_31(Z, X)                                                  \
+  {                                                                            \
+    Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11,     \
+        X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21,  \
+        X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31   \
+  }
+
+#define PPC_REGS_LO_HI(LO, HI)                                                 \
+  {                                                                            \
+    LO##0, LO##1, LO##2, LO##3, LO##4, LO##5, LO##6, LO##7, LO##8, LO##9,      \
+        LO##10, LO##11, LO##12, LO##13, LO##14, LO##15, LO##16, LO##17,        \
+        LO##18, LO##19, LO##20, LO##21, LO##22, LO##23, LO##24, LO##25,        \
+        LO##26, LO##27, LO##28, LO##29, LO##30, LO##31, HI##0, HI##1, HI##2,   \
+        HI##3, HI##4, HI##5, HI##6, HI##7, HI##8, HI##9, HI##10, HI##11,       \
+        HI##12, HI##13, HI##14, HI##15, HI##16, HI##17, HI##18, HI##19,        \
+        HI##20, HI##21, HI##22, HI##23, HI##24, HI##25, HI##26, HI##27,        \
+        HI##28, HI##29, HI##30, HI##31                                         \
+  }
+
+using llvm::MCPhysReg;
+
+#define DEFINE_PPC_REGCLASSES \
+  static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
+  static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
+  static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+  static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
+  static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
+  static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
+  static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
+  static const MCPhysReg RRegsNoR0[32] = \
+    PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
+  static const MCPhysReg XRegsNoX0[32] = \
+    PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \
+  static const MCPhysReg VSRegs[64] = \
+    PPC_REGS_LO_HI(PPC::VSL, PPC::V); \
+  static const MCPhysReg VSFRegs[64] = \
+    PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+  static const MCPhysReg VSSRegs[64] = \
+    PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+  static const MCPhysReg CRBITRegs[32] = { \
+    PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \
+    PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \
+    PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
+    PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
+    PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
+  static const MCPhysReg CRRegs[8] = { \
+    PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, \
+    PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7}
+
 #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
index c6cbb9037ede..17c37964c562 100644
--- a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -111,11 +111,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
     (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
     (instregex "POPCNT(D|W)$"),
     (instregex "CMPB(8)?$"),
+    (instregex "SETB(8)?$"),
     XSTDIVDP,
     XSTSQRTDP,
     XSXSIGDP,
     XSCVSPDPN,
-    SETB,
     BPERMD
 )>;
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 80ad4962a20f..98e6e98e6974 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -305,11 +305,11 @@ def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat,
                                            FeatureMFTB]>;
 def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
-                                          FeatureICBT, FeatureBookE, 
+                                          FeatureICBT, FeatureBookE,
                                           FeatureMSYNC, FeatureMFTB]>;
 def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
                                           FeatureFRES, FeatureFRSQRTE,
-                                          FeatureICBT, FeatureBookE, 
+                                          FeatureICBT, FeatureBookE,
                                           FeatureMSYNC, FeatureMFTB]>;
 def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>;
 def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU,
@@ -348,7 +348,7 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                             FeatureFRES, FeatureFRSQRTE,
                                             FeatureMFTB]>;
 def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
-                                           FeatureFRES, FeatureFRSQRTE, 
+                                           FeatureFRES, FeatureFRSQRTE,
                                            FeatureMFTB]>;
 
 def : ProcessorModel<"970", G5Model,
@@ -369,11 +369,11 @@ def : ProcessorModel<"e500", PPCE500Model,
                    FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc,
-                   FeatureSTFIWX, FeatureICBT, FeatureBookE, 
+                   FeatureSTFIWX, FeatureICBT, FeatureBookE,
                    FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"e5500", PPCE5500Model,
                   [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
-                   FeatureSTFIWX, FeatureICBT, FeatureBookE, 
+                   FeatureSTFIWX, FeatureICBT, FeatureBookE,
                    FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"a2", PPCA2Model,
                   [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
@@ -428,7 +428,7 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; 
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
 def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
                                        FeatureMFTB]>;
 def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -478,3 +478,9 @@ def PPC : Target {
   let AssemblyParserVariants = [PPCAsmParserVariant];
   let AllowRegisterRenaming = 1;
 }
+
+//===----------------------------------------------------------------------===//
+// Pfm Counters
+//===----------------------------------------------------------------------===//
+
+include "PPCPfmCounters.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a9da64cc216f..04aa3c9b1e22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -158,23 +158,6 @@ public:
 
 } // end anonymous namespace
 
-/// stripRegisterPrefix - This method strips the character prefix from a
-/// register name so that only the number is left.  Used by for linux asm.
-static const char *stripRegisterPrefix(const char *RegName) {
-  switch (RegName[0]) {
-    case 'r':
-    case 'f':
-    case 'q': // for QPX
-    case 'v':
-      if (RegName[1] == 's')
-        return RegName + 2;
-      return RegName + 1;
-    case 'c': if (RegName[1] == 'r') return RegName + 2;
-  }
-
-  return RegName;
-}
-
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
   const DataLayout &DL = getDataLayout();
@@ -182,27 +165,15 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
-    unsigned Reg = MO.getReg();
-
-    // There are VSX instructions that use VSX register numbering (vs0 - vs63)
-    // as well as those that use VMX register numbering (v0 - v31 which
-    // correspond to vs32 - vs63). If we have an instruction that uses VSX
-    // numbering, we need to convert the VMX registers to VSX registers.
-    // Namely, we print 32-63 when the instruction operates on one of the
-    // VMX registers.
-    // (Please synchronize with PPCInstPrinter::printOperand)
-    if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
-      if (PPCInstrInfo::isVRRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::V0);
-      else if (PPCInstrInfo::isVFRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::VF0);
-    }
+    unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
+                                                     MO.getReg(), OpNo);
+
     const char *RegName = PPCInstPrinter::getRegisterName(Reg);
 
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
     if (!Subtarget->isDarwin())
-      RegName = stripRegisterPrefix(RegName);
+      RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
     O << RegName;
     return;
   }
@@ -279,6 +250,21 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       if (MI->getOperand(OpNo).isImm())
         O << "i";
       return false;
+    case 'x':
+      if(!MI->getOperand(OpNo).isReg())
+        return true;
+      // This operand uses VSX numbering.
+      // If the operand is a VMX register, convert it to a VSX register.
+      unsigned Reg = MI->getOperand(OpNo).getReg();
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::V0);
+      else if (PPCInstrInfo::isVFRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+      const char *RegName;
+      RegName = PPCInstPrinter::getRegisterName(Reg);
+      RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+      O << RegName;
+      return false;
     }
   }
 
@@ -303,7 +289,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
       {
         const char *RegName = "r0";
         if (!Subtarget->isDarwin())
-          RegName = stripRegisterPrefix(RegName);
+          RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
         O << RegName << ", ";
         printOperand(MI, OpNo, O);
         return false;
@@ -341,7 +327,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
 }
 
 void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  SM.serializeToStackMapSection();
+  emitStackMaps(SM);
 }
 
 void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 12c581023234..22842d516e7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -338,7 +338,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
 // coldcc calling convection marks most registers as non-volatile.
 // Do not include r1 since the stack pointer is never considered a CSR.
 // Do not include r2, since it is the TOC register and is added depending
-// on wether or not the function uses the TOC and is a non-leaf.
+// on whether or not the function uses the TOC and is a non-leaf.
 // Do not include r0,r11,r13 as they are optional in functional linkage
 // and value may be altered by inter-library calls.
 // Do not include r12 as it is used as a scratch register.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index fe41e1b36a5d..a03e691ef5bb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -392,7 +392,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
     // liveness state at the end of MBB (liveOut of MBB) as the liveIn for
     // NewSuccessor. Otherwise, will cause cyclic dependence.
     LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
-    SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+    SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers;
     for (MachineInstr &MI : *MBB)
       LPR.stepForward(MI, Clobbers);
     for (auto &LI : LPR)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index f212894035db..3b2d92db78b9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -861,8 +861,20 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
     }
   }
 
+  unsigned SrcReg1 = getRegForValue(SrcValue1);
+  if (SrcReg1 == 0)
+    return false;
+
+  unsigned SrcReg2 = 0;
+  if (!UseImm) {
+    SrcReg2 = getRegForValue(SrcValue2);
+    if (SrcReg2 == 0)
+      return false;
+  }
+
   unsigned CmpOpc;
   bool NeedsExt = false;
+  auto RC = MRI.getRegClass(SrcReg1);
   switch (SrcVT.SimpleTy) {
     default: return false;
     case MVT::f32:
@@ -879,8 +891,15 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
             CmpOpc = PPC::EFSCMPGT;
             break;
         }
-      } else
+      } else {
         CmpOpc = PPC::FCMPUS;
+        if (isVSSRCRegClass(RC)) {
+          unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
+          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+                  TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
+          SrcReg1 = TmpReg;
+        }
+      }
       break;
     case MVT::f64:
       if (HasSPE) {
@@ -896,14 +915,17 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
             CmpOpc = PPC::EFDCMPGT;
             break;
         }
-      } else
+      } else if (isVSFRCRegClass(RC)) {
+        CmpOpc = PPC::XSCMPUDP;
+      } else {
         CmpOpc = PPC::FCMPUD;
+      }
       break;
     case MVT::i1:
     case MVT::i8:
     case MVT::i16:
       NeedsExt = true;
-      // Intentional fall-through.
+      LLVM_FALLTHROUGH;
     case MVT::i32:
       if (!UseImm)
         CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
@@ -918,17 +940,6 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
       break;
   }
 
-  unsigned SrcReg1 = getRegForValue(SrcValue1);
-  if (SrcReg1 == 0)
-    return false;
-
-  unsigned SrcReg2 = 0;
-  if (!UseImm) {
-    SrcReg2 = getRegForValue(SrcValue2);
-    if (SrcReg2 == 0)
-      return false;
-  }
-
   if (NeedsExt) {
     unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
     if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
@@ -2354,7 +2365,8 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
         PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
     return false;
 
-  MI->eraseFromParent();
+  MachineBasicBlock::iterator I(MI);
+  removeDeadCode(I, std::next(I));
   return true;
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 84dacf396462..8263954994d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -17,6 +17,7 @@
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,16 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "framelowering"
+STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
+STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
+STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+
+static cl::opt<bool>
+EnablePEVectorSpills("ppc-enable-pe-vector-spills",
+                     cl::desc("Enable spills in prologue to vector registers."),
+                     cl::init(false), cl::Hidden);
+
 /// VRRegNo - Map from a numbered VR register to its enum value.
 ///
 static const MCPhysReg VRRegNo[] = {
@@ -466,6 +477,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // Check whether we can skip adjusting the stack pointer (by using red zone)
   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
+    NumNoNeedForFrame++;
     // No need for frame
     if (UpdateMF)
       MFI.setStackSize(0);
@@ -1213,11 +1225,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
         continue;
       }
 
-      int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
-      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-          nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      if (CSI[I].isSpilledToReg()) {
+        unsigned SpilledReg = CSI[I].getDstReg();
+        unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
+            nullptr, MRI->getDwarfRegNum(Reg, true),
+            MRI->getDwarfRegNum(SpilledReg, true)));
+        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIRegister);
+      } else {
+        int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+            nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex);
+      }
     }
   }
 }
@@ -1822,17 +1843,19 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     // Move general register save area spill slots down, taking into account
     // the size of the Floating-point register save area.
     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
-      int FI = GPRegs[i].getFrameIdx();
-
-      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      if (!GPRegs[i].isSpilledToReg()) {
+        int FI = GPRegs[i].getFrameIdx();
+        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      }
     }
 
     // Move general register save area spill slots down, taking into account
     // the size of the Floating-point register save area.
     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
-      int FI = G8Regs[i].getFrameIdx();
-
-      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      if (!G8Regs[i].isSpilledToReg()) {
+        int FI = G8Regs[i].getFrameIdx();
+        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      }
     }
 
     unsigned MinReg =
@@ -1947,6 +1970,64 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
   }
 }
 
+// This function checks if a callee saved gpr can be spilled to a volatile
+// vector register. This occurs for leaf functions when the option
+// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
+// which were not spilled to vectors, return false so the target independent
+// code can handle them by assigning a FrameIdx to a stack slot.
+bool PPCFrameLowering::assignCalleeSavedSpillSlots(
+    MachineFunction &MF, const TargetRegisterInfo *TRI,
+    std::vector<CalleeSavedInfo> &CSI) const {
+
+  if (CSI.empty())
+    return true; // Early exit if no callee saved registers are modified!
+
+  // Early exit if cannot spill gprs to volatile vector registers.
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
+    return false;
+
+  // Build a BitVector of VSRs that can be used for spilling GPRs.
+  BitVector BVAllocatable = TRI->getAllocatableSet(MF);
+  BitVector BVCalleeSaved(TRI->getNumRegs());
+  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    BVCalleeSaved.set(CSRegs[i]);
+
+  for (unsigned Reg : BVAllocatable.set_bits()) {
+    // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+    // used in the function.
+    if (BVCalleeSaved[Reg] ||
+        (!PPC::F8RCRegClass.contains(Reg) &&
+         !PPC::VFRCRegClass.contains(Reg)) ||
+        (MF.getRegInfo().isPhysRegUsed(Reg)))
+      BVAllocatable.reset(Reg);
+  }
+
+  bool AllSpilledToReg = true;
+  for (auto &CS : CSI) {
+    if (BVAllocatable.none())
+      return false;
+
+    unsigned Reg = CS.getReg();
+    if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+      AllSpilledToReg = false;
+      continue;
+    }
+
+    unsigned VolatileVFReg = BVAllocatable.find_first();
+    if (VolatileVFReg < BVAllocatable.size()) {
+      CS.setDstReg(VolatileVFReg);
+      BVAllocatable.reset(VolatileVFReg);
+    } else {
+      AllSpilledToReg = false;
+    }
+  }
+  return AllSpilledToReg;
+}
+
+
 bool
 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MI,
@@ -2012,12 +2093,18 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          CSI[i].getFrameIdx()));
       }
     } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      // Use !IsLiveIn for the kill flag.
-      // We do not want to kill registers that are live in this function
-      // before their use because they will become undefined registers.
-      TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
-                              CSI[i].getFrameIdx(), RC, TRI);
+      if (CSI[i].isSpilledToReg()) {
+        NumPESpillVSR++;
+        BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
+          .addReg(Reg, getKillRegState(true));
+      } else {
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        // Use !IsLiveIn for the kill flag.
+        // We do not want to kill registers that are live in this function
+        // before their use because they will become undefined registers.
+        TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
+                                CSI[i].getFrameIdx(), RC, TRI);
+      }
     }
   }
   return true;
@@ -2157,13 +2244,19 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         CR2Spilled = CR3Spilled = CR4Spilled = false;
       }
 
-      // Default behavior for non-CR saves.
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
-                               RC, TRI);
-      assert(I != MBB.begin() &&
-             "loadRegFromStackSlot didn't insert any code!");
+      if (CSI[i].isSpilledToReg()) {
+        DebugLoc DL;
+        NumPEReloadVSR++;
+        BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
+            .addReg(CSI[i].getDstReg(), getKillRegState(true));
+      } else {
+       // Default behavior for non-CR saves.
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+        assert(I != MBB.begin() &&
+               "loadRegFromStackSlot didn't insert any code!");
       }
+    }
 
     // Insert in reverse order.
     if (AtStart)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 01c155594c44..69bd1484d6e5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -99,6 +99,13 @@ public:
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
                                  const TargetRegisterInfo *TRI) const override;
+  /// This function will assign callee saved gprs to volatile vector registers
+  /// for prologue spills when applicable. It returns false if there are any
+  /// registers which were not spilled to volatile vector registers.
+  bool
+  assignCalleeSavedSpillSlots(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const override;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 793a4dd7f624..5f6966cecd61 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -103,7 +103,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
   case PPC::Sched::IIC_LdStLHA:
   case PPC::Sched::IIC_LdStLHAU:
   case PPC::Sched::IIC_LdStLWA:
-  case PPC::Sched::IIC_LdStSTDU:
+  case PPC::Sched::IIC_LdStSTU:
   case PPC::Sched::IIC_LdStSTFDU:
     NSlots = 2;
     break;
@@ -112,7 +112,7 @@ bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
   case PPC::Sched::IIC_LdStLHAUX:
   case PPC::Sched::IIC_LdStLWARX:
   case PPC::Sched::IIC_LdStLDARX:
-  case PPC::Sched::IIC_LdStSTDUX:
+  case PPC::Sched::IIC_LdStSTUX:
   case PPC::Sched::IIC_LdStSTDCX:
   case PPC::Sched::IIC_LdStSTWCX:
   case PPC::Sched::IIC_BrMCRX: // mtcr
@@ -180,9 +180,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
       CurGroup.clear();
       CurSlots = CurBranches = 0;
     } else {
-      LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << SU->NodeNum
-                        << "): ");
-      LLVM_DEBUG(DAG->dumpNode(SU));
+      LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
+      LLVM_DEBUG(DAG->dumpNode(*SU));
 
       unsigned NSlots;
       bool MustBeFirst = mustComeFirst(MCID, NSlots);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6cec664d1e66..31acd0ff870f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,
           "Number of logical ops on i1 values calculated in GPR.");
 STATISTIC(OmittedForNonExtendUses,
           "Number of compares not eliminated as they have non-extending uses.");
+STATISTIC(NumP9Setb,
+          "Number of compares lowered to setb.");
 
 // FIXME: Remove this once the bug has been fixed!
 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
@@ -327,7 +329,6 @@ private:
 
     bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
     void transferMemOperands(SDNode *N, SDNode *Result);
-    MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
   };
 
 } // end anonymous namespace
@@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
   if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
 
   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
-  const TerminatorInst *BBTerm = BB->getTerminator();
+  const Instruction *BBTerm = BB->getTerminator();
 
   if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
 
@@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
   SDValue Op1 = N->getOperand(1);
   SDLoc dl(N);
 
-  KnownBits LKnown, RKnown;
-  CurDAG->computeKnownBits(Op0, LKnown);
-  CurDAG->computeKnownBits(Op1, RKnown);
+  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
+  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
 
   unsigned TargetMask = LKnown.Zero.getZExtValue();
   unsigned InsertMask = RKnown.Zero.getZExtValue();
@@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
        // The AND mask might not be a constant, and we need to make sure that
        // if we're going to fold the masking with the insert, all bits not
        // know to be zero in the mask are known to be one.
-        KnownBits MKnown;
-        CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+        KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
         bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
 
         unsigned SHOpc = Op1.getOperand(0).getOpcode();
@@ -1083,9 +1082,14 @@ class BitPermutationSelector {
     // lowest-order bit.
     unsigned Idx;
 
+    // ConstZero means a bit we need to mask off.
+    // Variable is a bit comes from an input variable.
+    // VariableKnownToBeZero is also a bit comes from an input variable,
+    // but it is known to be already zero. So we do not need to mask them.
     enum Kind {
       ConstZero,
-      Variable
+      Variable,
+      VariableKnownToBeZero
     } K;
 
     ValueBit(SDValue V, unsigned I, Kind K = Variable)
@@ -1094,11 +1098,11 @@ class BitPermutationSelector {
       : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
 
     bool isZero() const {
-      return K == ConstZero;
+      return K == ConstZero || K == VariableKnownToBeZero;
     }
 
     bool hasValue() const {
-      return K == Variable;
+      return K == Variable || K == VariableKnownToBeZero;
     }
 
     SDValue getValue() const {
@@ -1248,8 +1252,14 @@ class BitPermutationSelector {
         for (unsigned i = 0; i < NumBits; ++i)
           if (((Mask >> i) & 1) == 1)
             Bits[i] = (*LHSBits)[i];
-          else
-            Bits[i] = ValueBit(ValueBit::ConstZero);
+          else {
+            // AND instruction masks this bit. If the input is already zero,
+            // we have nothing to do here. Otherwise, make the bit ConstZero.
+            if ((*LHSBits)[i].isZero())
+              Bits[i] = (*LHSBits)[i];
+            else
+              Bits[i] = ValueBit(ValueBit::ConstZero);
+          }
 
         return std::make_pair(Interesting, &Bits);
       }
@@ -1259,8 +1269,26 @@ class BitPermutationSelector {
       const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
 
       bool AllDisjoint = true;
-      for (unsigned i = 0; i < NumBits; ++i)
-        if (LHSBits[i].isZero())
+      SDValue LastVal = SDValue();
+      unsigned LastIdx = 0;
+      for (unsigned i = 0; i < NumBits; ++i) {
+        if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+          // If both inputs are known to be zero and one is ConstZero and
+          // another is VariableKnownToBeZero, we can select whichever
+          // we like. To minimize the number of bit groups, we select
+          // VariableKnownToBeZero if this bit is the next bit of the same
+          // input variable from the previous bit. Otherwise, we select
+          // ConstZero.
+          if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
+              LHSBits[i].getValueBitIndex() == LastIdx + 1)
+            Bits[i] = LHSBits[i];
+          else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
+                   RHSBits[i].getValueBitIndex() == LastIdx + 1)
+            Bits[i] = RHSBits[i];
+          else
+            Bits[i] = ValueBit(ValueBit::ConstZero);
+        }
+        else if (LHSBits[i].isZero())
           Bits[i] = RHSBits[i];
         else if (RHSBits[i].isZero())
           Bits[i] = LHSBits[i];
@@ -1268,6 +1296,16 @@ class BitPermutationSelector {
           AllDisjoint = false;
           break;
         }
+        // We remember the value and bit index of this bit.
+        if (Bits[i].hasValue()) {
+          LastVal = Bits[i].getValue();
+          LastIdx = Bits[i].getValueBitIndex();
+        }
+        else {
+          if (LastVal) LastVal = SDValue();
+          LastIdx = 0;
+        }
+      }
 
       if (!AllDisjoint)
         break;
@@ -1293,6 +1331,72 @@ class BitPermutationSelector {
 
       return std::make_pair(Interesting, &Bits);
     }
+    case ISD::TRUNCATE: {
+      EVT FromType = V.getOperand(0).getValueType();
+      EVT ToType = V.getValueType();
+      // We support only the case with truncate from i64 to i32.
+      if (FromType != MVT::i64 || ToType != MVT::i32)
+        break;
+      const unsigned NumAllBits = FromType.getSizeInBits();
+      SmallVector<ValueBit, 64> *InBits;
+      std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
+                                                    NumAllBits);
+      const unsigned NumValidBits = ToType.getSizeInBits();
+
+      // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
+      // So, we cannot include this truncate.
+      bool UseUpper32bit = false;
+      for (unsigned i = 0; i < NumValidBits; ++i)
+        if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
+          UseUpper32bit = true;
+          break;
+        }
+      if (UseUpper32bit)
+        break;
+
+      for (unsigned i = 0; i < NumValidBits; ++i)
+        Bits[i] = (*InBits)[i];
+
+      return std::make_pair(Interesting, &Bits);
+    }
+    case ISD::AssertZext: {
+      // For AssertZext, we look through the operand and
+      // mark the bits known to be zero.
+      const SmallVector<ValueBit, 64> *LHSBits;
+      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
+                                                    NumBits);
+
+      EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+      const unsigned NumValidBits = FromType.getSizeInBits();
+      for (unsigned i = 0; i < NumValidBits; ++i)
+        Bits[i] = (*LHSBits)[i];
+
+      // These bits are known to be zero.
+      for (unsigned i = NumValidBits; i < NumBits; ++i)
+        Bits[i] = ValueBit((*LHSBits)[i].getValue(),
+                           (*LHSBits)[i].getValueBitIndex(),
+                           ValueBit::VariableKnownToBeZero);
+
+      return std::make_pair(Interesting, &Bits);
+    }
+    case ISD::LOAD:
+      LoadSDNode *LD = cast<LoadSDNode>(V);
+      if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
+        EVT VT = LD->getMemoryVT();
+        const unsigned NumValidBits = VT.getSizeInBits();
+
+        for (unsigned i = 0; i < NumValidBits; ++i)
+          Bits[i] = ValueBit(V, i);
+
+        // These bits are known to be zero.
+        for (unsigned i = NumValidBits; i < NumBits; ++i)
+          Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
+
+        // Zero-extending load itself cannot be optimized. So, it is not
+        // interesting by itself though it gives useful information.
+        return std::make_pair(Interesting = false, &Bits);
+      }
+      break;
     }
 
     for (unsigned i = 0; i < NumBits; ++i)
@@ -1304,7 +1408,7 @@ class BitPermutationSelector {
   // For each value (except the constant ones), compute the left-rotate amount
   // to get it from its original to final position.
   void computeRotationAmounts() {
-    HasZeros = false;
+    NeedMask = false;
     RLAmt.resize(Bits.size());
     for (unsigned i = 0; i < Bits.size(); ++i)
       if (Bits[i].hasValue()) {
@@ -1314,7 +1418,7 @@ class BitPermutationSelector {
         else
           RLAmt[i] = Bits.size() - (VBI - i);
       } else if (Bits[i].isZero()) {
-        HasZeros = true;
+        NeedMask = true;
         RLAmt[i] = UINT32_MAX;
       } else {
         llvm_unreachable("Unknown value bit type");
@@ -1330,6 +1434,7 @@ class BitPermutationSelector {
     unsigned LastRLAmt = RLAmt[0];
     SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
     unsigned LastGroupStartIdx = 0;
+    bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
     for (unsigned i = 1; i < Bits.size(); ++i) {
       unsigned ThisRLAmt = RLAmt[i];
       SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
@@ -1342,10 +1447,20 @@ class BitPermutationSelector {
           LastGroupStartIdx = 0;
       }
 
+      // If this bit is known to be zero and the current group is a bit group
+      // of zeros, we do not need to terminate the current bit group even the
+      // Value or RLAmt does not match here. Instead, we terminate this group
+      // when the first non-zero bit appears later.
+      if (IsGroupOfZeros && Bits[i].isZero())
+        continue;
+
       // If this bit has the same underlying value and the same rotate factor as
       // the last one, then they're part of the same group.
       if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
-        continue;
+        // We cannot continue the current group if this bits is not known to
+        // be zero in a bit group of zeros.
+        if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
+          continue;
 
       if (LastValue.getNode())
         BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1353,6 +1468,7 @@ class BitPermutationSelector {
       LastRLAmt = ThisRLAmt;
       LastValue = ThisValue;
       LastGroupStartIdx = i;
+      IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
     }
     if (LastValue.getNode())
       BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
@@ -1401,7 +1517,7 @@ class BitPermutationSelector {
     for (auto &I : ValueRots) {
       ValueRotsVec.push_back(I.second);
     }
-    llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+    llvm::sort(ValueRotsVec);
   }
 
   // In 64-bit mode, rlwinm and friends have a rotation operator that
@@ -1588,6 +1704,17 @@ class BitPermutationSelector {
     return ExtVal;
   }
 
+  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
+    if (V.getValueSizeInBits() == 32)
+      return V;
+
+    assert(V.getValueSizeInBits() == 64);
+    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+    SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
+                                                    MVT::i32, V, SubRegIdx), 0);
+    return SubVal;
+  }
+
   // Depending on the number of groups for a particular value, it might be
   // better to rotate, mask explicitly (using andi/andis), and then or the
   // result. Select this part of the result first.
@@ -1646,12 +1773,12 @@ class BitPermutationSelector {
       SDValue VRot;
       if (VRI.RLAmt) {
         SDValue Ops[] =
-          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
-            getI32Imm(31, dl) };
+          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+            getI32Imm(0, dl), getI32Imm(31, dl) };
         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
                                               Ops), 0);
       } else {
-        VRot = VRI.V;
+        VRot = TruncateToInt32(VRI.V, dl);
       }
 
       SDValue ANDIVal, ANDISVal;
@@ -1698,17 +1825,17 @@ class BitPermutationSelector {
     // If we've not yet selected a 'starting' instruction, and we have no zeros
     // to fill in, select the (Value, RLAmt) with the highest priority (largest
     // number of groups), and start with this rotated value.
-    if ((!HasZeros || LateMask) && !Res) {
+    if ((!NeedMask || LateMask) && !Res) {
       ValueRotInfo &VRI = ValueRotsVec[0];
       if (VRI.RLAmt) {
         if (InstCnt) *InstCnt += 1;
         SDValue Ops[] =
-          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
-            getI32Imm(31, dl) };
+          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
+            getI32Imm(0, dl), getI32Imm(31, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
                       0);
       } else {
-        Res = VRI.V;
+        Res = TruncateToInt32(VRI.V, dl);
       }
 
       // Now, remove all groups with this underlying value and rotation factor.
@@ -1723,13 +1850,13 @@ class BitPermutationSelector {
     for (auto &BG : BitGroups) {
       if (!Res) {
         SDValue Ops[] =
-          { BG.V, getI32Imm(BG.RLAmt, dl),
+          { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
             getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
       } else {
         SDValue Ops[] =
-          { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+          { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
               getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
@@ -2077,7 +2204,7 @@ class BitPermutationSelector {
     // If we've not yet selected a 'starting' instruction, and we have no zeros
     // to fill in, select the (Value, RLAmt) with the highest priority (largest
     // number of groups), and start with this rotated value.
-    if ((!HasZeros || LateMask) && !Res) {
+    if ((!NeedMask || LateMask) && !Res) {
       // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
       // groups will come first, and so the VRI representing the largest number
       // of groups might not be first (it might be the first Repl32 groups).
@@ -2230,7 +2357,7 @@ class BitPermutationSelector {
 
   SmallVector<ValueBit, 64> Bits;
 
-  bool HasZeros;
+  bool NeedMask;
   SmallVector<unsigned, 64> RLAmt;
 
   SmallVector<BitGroup, 16> BitGroups;
@@ -2259,10 +2386,10 @@ public:
                          " selection for:    ");
     LLVM_DEBUG(N->dump(CurDAG));
 
-    // Fill it RLAmt and set HasZeros.
+    // Fill it RLAmt and set NeedMask.
     computeRotationAmounts();
 
-    if (!HasZeros)
+    if (!NeedMask)
       return Select(N, false);
 
     // We currently have two techniques for handling results with zeros: early
@@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
 
 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
   // Transfer memoperands.
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 }
 
-/// This method returns a node after flipping the MSB of each element
-/// of vector integer type. Additionally, if SignBitVec is non-null,
-/// this method sets a node with one at MSB of all elements
-/// and zero at other bits in SignBitVec.
-MachineSDNode *
-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
-  SDLoc dl(N);
-  EVT VecVT = N.getValueType();
-  if (VecVT == MVT::v4i32) {
-    if (SignBitVec) {
-      SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
-      *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
-                                        SDValue(ZV, 0));
-    }
-    return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
-  }
-  else if (VecVT == MVT::v8i16) {
-    SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
-                                     getI32Imm(0x8000, dl));
-    SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
-                                         SDValue(Hi, 0),
-                                         getI32Imm(0x8000, dl));
-    SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
-                                         SDValue(ScaImm, 0));
-    /*
-    Alternatively, we can do this as follow to use VRF instead of GPR.
-      vspltish 5, 1
-      vspltish 6, 15
-      vslh 5, 6, 5
-    */
-    if (SignBitVec) *SignBitVec = VecImm;
-    return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
-                                  SDValue(VecImm, 0));
-  }
-  else if (VecVT == MVT::v16i8) {
-    SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
-                                         getI32Imm(0x80, dl));
-    if (SignBitVec) *SignBitVec = VecImm;
-    return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
-                                  SDValue(VecImm, 0));
+static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
+                         bool &NeedSwapOps, bool &IsUnCmp) {
+
+  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue TrueRes = N->getOperand(2);
+  SDValue FalseRes = N->getOperand(3);
+  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
+  if (!TrueConst)
+    return false;
+
+  assert((N->getSimpleValueType(0) == MVT::i64 ||
+          N->getSimpleValueType(0) == MVT::i32) &&
+         "Expecting either i64 or i32 here.");
+
+  // We are looking for any of:
+  // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
+  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs,  1, -1, cc2), seteq)
+  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs, -1,  1, cc2), seteq)
+  int64_t TrueResVal = TrueConst->getSExtValue();
+  if ((TrueResVal < -1 || TrueResVal > 1) ||
+      (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
+      (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
+      (TrueResVal == 0 &&
+       (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
+    return false;
+
+  bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
+  SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
+  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
+      SetOrSelCC.getOpcode() != ISD::SELECT_CC)
+    return false;
+
+  // Without this setb optimization, the outer SELECT_CC will be manually
+  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
+  // transforms pseduo instruction to isel instruction. When there are more than
+  // one use for result like zext/sext, with current optimization we only see
+  // isel is replaced by setb but can't see any significant gain. Since
+  // setb has longer latency than original isel, we should avoid this. Another
+  // point is that setb requires comparison always kept, it can break the
+  // oppotunity to get the comparison away if we have in future.
+  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
+    return false;
+
+  SDValue InnerLHS = SetOrSelCC.getOperand(0);
+  SDValue InnerRHS = SetOrSelCC.getOperand(1);
+  ISD::CondCode InnerCC =
+      cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
+  // If the inner comparison is a select_cc, make sure the true/false values are
+  // 1/-1 and canonicalize it if needed.
+  if (InnerIsSel) {
+    ConstantSDNode *SelCCTrueConst =
+        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
+    ConstantSDNode *SelCCFalseConst =
+        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
+    if (!SelCCTrueConst || !SelCCFalseConst)
+      return false;
+    int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
+    int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
+    // The values must be -1/1 (requiring a swap) or 1/-1.
+    if (SelCCTVal == -1 && SelCCFVal == 1) {
+      std::swap(InnerLHS, InnerRHS);
+    } else if (SelCCTVal != 1 || SelCCFVal != -1)
+      return false;
   }
-  else
-    llvm_unreachable("Unsupported vector data type for flipSignBit");
+
+  // Canonicalize unsigned case
+  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
+    IsUnCmp = true;
+    InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
+  }
+
+  bool InnerSwapped = false;
+  if (LHS == InnerRHS && RHS == InnerLHS)
+    InnerSwapped = true;
+  else if (LHS != InnerLHS || RHS != InnerRHS)
+    return false;
+
+  switch (CC) {
+  // (select_cc lhs, rhs,  0, \
+  //     (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
+  case ISD::SETEQ:
+    if (!InnerIsSel)
+      return false;
+    if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
+      return false;
+    NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
+    break;
+
+  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
+  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
+  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
+  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
+  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
+  case ISD::SETULT:
+    if (!IsUnCmp && InnerCC != ISD::SETNE)
+      return false;
+    IsUnCmp = true;
+    LLVM_FALLTHROUGH;
+  case ISD::SETLT:
+    if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
+        (InnerCC == ISD::SETLT && InnerSwapped))
+      NeedSwapOps = (TrueResVal == 1);
+    else
+      return false;
+    break;
+
+  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
+  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
+  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
+  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
+  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
+  case ISD::SETUGT:
+    if (!IsUnCmp && InnerCC != ISD::SETNE)
+      return false;
+    IsUnCmp = true;
+    LLVM_FALLTHROUGH;
+  case ISD::SETGT:
+    if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
+        (InnerCC == ISD::SETGT && InnerSwapped))
+      NeedSwapOps = (TrueResVal == -1);
+    else
+      return false;
+    break;
+
+  default:
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
+  LLVM_DEBUG(N->dump());
+
+  return true;
 }
 
 // Select - Convert the specified operand from a target-independent to a
@@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     int16_t Imm;
     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
         isIntS16Immediate(N->getOperand(1), Imm)) {
-      KnownBits LHSKnown;
-      CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
+      KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
 
       // If this is equivalent to an add, then we can fold it with the
       // FrameIndex calculation.
@@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
         N->getOperand(0).getValueType() == MVT::i1)
       break;
 
+    if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+      bool NeedSwapOps = false;
+      bool IsUnCmp = false;
+      if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
+        SDValue LHS = N->getOperand(0);
+        SDValue RHS = N->getOperand(1);
+        if (NeedSwapOps)
+          std::swap(LHS, RHS);
+
+        // Make use of SelectCC to generate the comparison to set CR bits, for
+        // equality comparisons having one literal operand, SelectCC probably
+        // doesn't need to materialize the whole literal and just use xoris to
+        // check it first, it leads the following comparison result can't
+        // exactly represent GT/LT relationship. So to avoid this we specify
+        // SETGT/SETUGT here instead of SETEQ.
+        SDValue GenCC =
+            SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
+        CurDAG->SelectNodeTo(
+            N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
+            N->getValueType(0), GenCC);
+        NumP9Setb++;
+        return;
+      }
+    }
+
     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
     if (!isPPC64)
       if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
     return;
   }
-  case ISD::VSELECT:
-    if (PPCSubTarget->hasVSX()) {
-      SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
-      CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
-      return;
-    }
-    break;
-
   case ISD::VECTOR_SHUFFLE:
     if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
                                   N->getValueType(0) == MVT::v2i64)) {
@@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
           SDValue Chain = LD->getChain();
           SDValue Ops[] = { Base, Offset, Chain };
-          MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-          MemOp[0] = LD->getMemOperand();
+          MachineMemOperand *MemOp = LD->getMemOperand();
           SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
                                               N->getValueType(0), Ops);
-          cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
+          CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
           return;
         }
       }
@@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
       case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
       }
 
+      // A signed comparison of i1 values produces the opposite result to an
+      // unsigned one if the condition code includes less-than or greater-than.
+      // This is because 1 is the most negative signed i1 number and the most
+      // positive unsigned i1 number. The CR-logical operations used for such
+      // comparisons are non-commutative so for signed comparisons vs. unsigned
+      // ones, the input operands just need to be swapped.
+      if (ISD::isSignedIntSetCC(CC))
+        Swap = !Swap;
+
       SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
                                              N->getOperand(Swap ? 3 : 2),
                                              N->getOperand(Swap ? 2 : 3)), 0);
@@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     SDValue TOCbase = N->getOperand(1);
     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
                                          TOCbase, GA);
-
-    if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
-        CModel == CodeModel::Large) {
+    if (PPCLowering->isAccessedAsGotIndirect(GA)) {
+      // If it is access as got-indirect, we need an extra LD to load
+      // the address.
       SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
                                           SDValue(Tmp, 0));
       transferMemOperands(N, MN);
@@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
       return;
     }
 
-    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
-      const GlobalValue *GV = G->getGlobal();
-      unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
-      if (GVFlags & PPCII::MO_NLP_FLAG) {
-        SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
-                                            SDValue(Tmp, 0));
-        transferMemOperands(N, MN);
-        ReplaceNode(N, MN);
-        return;
-      }
-    }
-
+    // Build the address relative to the TOC-pointer..
     ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
                                           SDValue(Tmp, 0), GA));
     return;
@@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
       return;
     }
   }
-  case ISD::ABS: {
-    assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
-
-    // For vector absolute difference, we use VABSDUW instruction of POWER9.
-    // Since VABSDU instructions are for unsigned integers, we need adjustment
-    // for signed integers.
-    // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
-    // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
-    // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
-    EVT VecVT = N->getOperand(0).getValueType();
-    SDNode *AbsOp = nullptr;
-    unsigned AbsOpcode;
-
-    if (VecVT == MVT::v4i32)
-      AbsOpcode = PPC::VABSDUW;
-    else if (VecVT == MVT::v8i16)
-      AbsOpcode = PPC::VABSDUH;
-    else if (VecVT == MVT::v16i8)
-      AbsOpcode = PPC::VABSDUB;
-    else
-      llvm_unreachable("Unsupported vector data type for ISD::ABS");
-
-    // Even for signed integers, we can skip adjustment if all values are
-    // known to be positive (as signed integer) due to zero-extended inputs.
-    if (N->getOperand(0).getOpcode() == ISD::SUB &&
-        N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
-        N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {
-      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
-                                     SDValue(N->getOperand(0)->getOperand(0)),
-                                     SDValue(N->getOperand(0)->getOperand(1)));
-      ReplaceNode(N, AbsOp);
-      return;
-    }
-    if (N->getOperand(0).getOpcode() == ISD::SUB) {
-      SDValue SubVal = N->getOperand(0);
-      SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
-      SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
-      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
-                                     SDValue(Op0, 0), SDValue(Op1, 0));
-    }
-    else {
-      SDNode *Op1 = nullptr;
-      SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
-      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
-                                     SDValue(Op1, 0));
-    }
-    ReplaceNode(N, AbsOp);
-    return;
-  }
   }
 
   SelectCode(N);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b5bdf47ce37a..39608cb74bee 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::UREM, MVT::i64, Expand);
   }
 
-  if (Subtarget.hasP9Vector()) {
-    setOperationAction(ISD::ABS, MVT::v4i32, Legal);
-    setOperationAction(ISD::ABS, MVT::v8i16, Legal);
-    setOperationAction(ISD::ABS, MVT::v16i8, Legal);
-  }
-
   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -323,12 +317,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   // to speed up scalar BSWAP64.
   // CTPOP or CTTZ were introduced in P8/P9 respectively
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  if (Subtarget.isISA3_0()) {
+  if (Subtarget.hasP9Vector())
     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
+  else
+    setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
+  if (Subtarget.isISA3_0()) {
     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
   } else {
-    setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
   }
@@ -554,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD, VT, Legal);
       setOperationAction(ISD::SUB, VT, Legal);
+      setOperationAction(ISD::ABS, VT, Custom);
 
       // Vector instructions introduced in P8
       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -586,6 +583,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
       setOperationAction(ISD::SELECT, VT, Promote);
       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::VSELECT, VT, Legal);
       setOperationAction(ISD::SELECT_CC, VT, Promote);
       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
       setOperationAction(ISD::STORE, VT, Promote);
@@ -626,7 +624,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::VSELECT, VT, Expand);
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
       setOperationAction(ISD::ROTL, VT, Expand);
       setOperationAction(ISD::ROTR, VT, Expand);
@@ -659,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 
+    // Without hasP8Altivec set, v2i64 SMAX isn't available.
+    // But ABS custom lowering requires SMAX support.
+    if (!Subtarget.hasP8Altivec())
+      setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+
     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -727,12 +729,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 
-      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
-      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
-
       // Share the Altivec comparison restrictions.
       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
@@ -792,12 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 
-      // Vector operation legalization checks the result type of
-      // SIGN_EXTEND_INREG, overall legalization checks the inner type.
-      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
-      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
-      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
-      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+      // Custom handling for partial vectors of integers converted to
+      // floating point. We already have optimal handling for v2i32 through
+      // the DAG combine, so those aren't necessary.
+      setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
 
       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
@@ -1055,6 +1056,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
@@ -1076,6 +1078,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::ANY_EXTEND);
 
+  setTargetDAGCombine(ISD::TRUNCATE);
+
   if (Subtarget.useCRBits()) {
     setTargetDAGCombine(ISD::TRUNCATE);
     setTargetDAGCombine(ISD::SETCC);
@@ -1088,6 +1092,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setTargetDAGCombine(ISD::FSQRT);
   }
 
+  if (Subtarget.hasP9Altivec()) {
+    setTargetDAGCombine(ISD::ABS);
+    setTargetDAGCombine(ISD::VSELECT);
+  }
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget.isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -1348,6 +1357,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
+  case PPCISD::VABSD:           return "PPCISD::VABSD";
   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
@@ -1355,6 +1365,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
+  case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
   }
   return nullptr;
 }
@@ -2214,11 +2225,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
     // If this is an or of disjoint bitfields, we can codegen this as an add
     // (for better address arithmetic) if the LHS and RHS of the OR are provably
     // disjoint.
-    KnownBits LHSKnown, RHSKnown;
-    DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+    KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
 
     if (LHSKnown.Zero.getBoolValue()) {
-      DAG.computeKnownBits(N.getOperand(1), RHSKnown);
+      KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
       // If all of the bits are known zero on the LHS or RHS, the add won't
       // carry.
       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
@@ -2317,8 +2327,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
       // If this is an or of disjoint bitfields, we can codegen this as an add
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
-      KnownBits LHSKnown;
-      DAG.computeKnownBits(N.getOperand(0), LHSKnown);
+      KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
 
       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
         // If all of the bits are known zero on the LHS or RHS, the add won't
@@ -2405,6 +2414,28 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
   return true;
 }
 
+/// Returns true if we should use a direct load into vector instruction
+/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
+static bool usePartialVectorLoads(SDNode *N) {
+  if (!N->hasOneUse())
+    return false;
+
+  // If there are any other uses other than scalar to vector, then we should
+  // keep it as a scalar load -> direct move pattern to prevent multiple
+  // loads.  Currently, only check for i64 since we have lxsd/lfd to do this
+  // efficiently, but no update equivalent.
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    EVT MemVT = LD->getMemoryVT();
+    if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
+      SDNode *User = *(LD->use_begin());
+      if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
+        return true;
+    }
+  }
+
+  return false;
+}
+
 /// getPreIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if the node's address
 /// can be legally represented as pre-indexed load / store address.
@@ -2430,6 +2461,13 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   } else
     return false;
 
+  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
+  // instructions because we can fold these into a more efficient instruction
+  // instead, (such as LXSD).
+  if (isLoad && usePartialVectorLoads(N)) {
+    return false;
+  }
+
   // PowerPC doesn't have preinc load/store instructions for vectors (except
   // for QPX, which does have preinc r+r forms).
   if (VT.isVector()) {
@@ -2674,7 +2712,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
 
   // 64-bit SVR4 ABI code is always position-independent.
   // The actual BlockAddress is stored in the TOC.
-  if (Subtarget.isSVR4ABI() && isPositionIndependent()) {
+  if (Subtarget.isSVR4ABI() &&
+      (Subtarget.isPPC64() || isPositionIndependent())) {
     if (Subtarget.isPPC64())
       setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
@@ -3480,9 +3519,14 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
       // Argument stored in memory.
       assert(VA.isMemLoc());
 
+      // Get the extended size of the argument type in stack
       unsigned ArgSize = VA.getLocVT().getStoreSize();
-      int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
-                                     isImmutable);
+      // Get the actual size of the argument type
+      unsigned ObjSize = VA.getValVT().getStoreSize();
+      unsigned ArgOffset = VA.getLocMemOffset();
+      // Stack objects in PPC32 are right justified.
+      ArgOffset += ArgSize - ObjSize;
+      int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3935,7 +3979,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
 
       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
              "Invalid QPX parameter type");
-      /* fall through */
+      LLVM_FALLTHROUGH;
 
     case MVT::v4f64:
     case MVT::v4i1:
@@ -5053,9 +5097,15 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
 
   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
   // into the call.
-  if (isSVR4ABI && isPPC64 && !isPatchPoint) {
+  // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
+  if (isSVR4ABI && isPPC64) {
     setUsesTOCBasePtr(DAG);
-    Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
+    // We cannot add X2 as an operand here for PATCHPOINT, because there is no
+    // way to mark dependencies as implicit here. We will add the X2 dependency
+    // in EmitInstrWithCustomInserter.
+    if (!isPatchPoint) 
+      Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
   }
 
   return CallOpc;
@@ -5437,10 +5487,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
       Arg = PtrOff;
     }
 
-    if (VA.isRegLoc()) {
-      if (Arg.getValueType() == MVT::i1)
-        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+    // When useCRBits() is true, there can be i1 arguments.
+    // It is because getRegisterType(MVT::i1) => MVT::i1,
+    // and for other integer types getRegisterType() => MVT::i32.
+    // Extend i1 and ensure callee will get i32.
+    if (Arg.getValueType() == MVT::i1)
+      Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                        dl, MVT::i32, Arg);
 
+    if (VA.isRegLoc()) {
       seenFloatArg |= VA.getLocVT().isFloatingPoint();
       // Put argument in a physical register.
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -6073,7 +6128,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
              "Invalid QPX parameter type");
 
-      /* fall through */
+      LLVM_FALLTHROUGH;
     case MVT::v4f64:
     case MVT::v4i1: {
       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
@@ -7228,10 +7283,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
   return FP;
 }
 
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
+
+  EVT VecVT = Vec.getValueType();
+  assert(VecVT.isVector() && "Expected a vector type.");
+  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
+
+  EVT EltVT = VecVT.getVectorElementType();
+  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumConcat);
+  Ops[0] = Vec;
+  SDValue UndefVec = DAG.getUNDEF(VecVT);
+  for (unsigned i = 1; i < NumConcat; ++i)
+    Ops[i] = UndefVec;
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+                                                const SDLoc &dl) const {
+
+  unsigned Opc = Op.getOpcode();
+  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
+         "Unexpected conversion type");
+  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
+         "Supports conversions to v2f64/v4f32 only.");
+
+  bool SignedConv = Opc == ISD::SINT_TO_FP;
+  bool FourEltRes = Op.getValueType() == MVT::v4f32;
+
+  SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
+  EVT WideVT = Wide.getValueType();
+  unsigned WideNumElts = WideVT.getVectorNumElements();
+  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
+
+  SmallVector<int, 16> ShuffV;
+  for (unsigned i = 0; i < WideNumElts; ++i)
+    ShuffV.push_back(i + WideNumElts);
+
+  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
+  int SaveElts = FourEltRes ? 4 : 2;
+  if (Subtarget.isLittleEndian())
+    for (int i = 0; i < SaveElts; i++)
+      ShuffV[i * Stride] = i;
+  else
+    for (int i = 1; i <= SaveElts; i++)
+      ShuffV[i * Stride - 1] = i - 1;
+
+  SDValue ShuffleSrc2 =
+      SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
+  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
+  unsigned ExtendOp =
+      SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
+
+  SDValue Extend;
+  if (!Subtarget.hasP9Altivec() && SignedConv) {
+    Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+    Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
+                         DAG.getValueType(Op.getOperand(0).getValueType()));
+  } else
+    Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
+
+  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
+}
+
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
+  EVT InVT = Op.getOperand(0).getValueType();
+  EVT OutVT = Op.getValueType();
+  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
+      isOperationCustom(Op.getOpcode(), InVT))
+    return LowerINT_TO_FPVector(Op, DAG, dl);
+
   // Conversions to f128 are legal.
   if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
     return Op;
@@ -8902,35 +9030,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getRegister(PPC::R2, MVT::i32);
   }
 
-  // We are looking for absolute values here.
-  // The idea is to try to fit one of two patterns:
-  //  max (a, (0-a))  OR  max ((0-a), a)
-  if (Subtarget.hasP9Vector() &&
-      (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
-       IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
-       IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
-    SDValue V1 = Op.getOperand(1);
-    SDValue V2 = Op.getOperand(2);
-    if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
-        (V1.getSimpleValueType() == MVT::v4i32 ||
-         V1.getSimpleValueType() == MVT::v8i16 ||
-         V1.getSimpleValueType() == MVT::v16i8)) {
-      if ( V1.getOpcode() == ISD::SUB &&
-           ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
-           V1.getOperand(1) == V2 ) {
-        // Generate the abs instruction with the operands
-        return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
-      }
-
-      if ( V2.getOpcode() == ISD::SUB &&
-           ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
-           V2.getOperand(1) == V1 ) {
-        // Generate the abs instruction with the operands
-        return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
-      }
-    }
-  }
-
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
   int CompareOpc;
@@ -9081,30 +9180,6 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
 }
 
-SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
-                                                  SelectionDAG &DAG) const {
-  SDLoc dl(Op);
-  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
-  // instructions), but for smaller types, we need to first extend up to v2i32
-  // before doing going farther.
-  if (Op.getValueType() == MVT::v2i64) {
-    EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    if (ExtVT != MVT::v2i32) {
-      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
-      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
-                       DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
-                                        ExtVT.getVectorElementType(), 4)));
-      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
-      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
-                       DAG.getValueType(MVT::v2i32));
-    }
-
-    return Op;
-  }
-
-  return SDValue();
-}
-
 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -9495,6 +9570,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   }
 }
 
+SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+
+  assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
+
+  EVT VT = Op.getValueType();
+  assert(VT.isVector() &&
+         "Only set vector abs as custom, scalar abs shouldn't reach here!");
+  assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+          VT == MVT::v16i8) &&
+         "Unexpected vector element type!");
+  assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
+         "Current subtarget doesn't support smax v2i64!");
+
+  // For vector abs, it can be lowered to:
+  // abs x
+  // ==>
+  // y = -x
+  // smax(x, y)
+
+  SDLoc dl(Op);
+  SDValue X = Op.getOperand(0);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
+  SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
+
+  // SMAX patch https://reviews.llvm.org/D47332
+  // hasn't landed yet, so use intrinsic first here.
+  // TODO: Should use SMAX directly once SMAX patch landed
+  Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
+  if (VT == MVT::v2i64)
+    BifID = Intrinsic::ppc_altivec_vmaxsd;
+  else if (VT == MVT::v8i16)
+    BifID = Intrinsic::ppc_altivec_vmaxsh;
+  else if (VT == MVT::v16i8)
+    BifID = Intrinsic::ppc_altivec_vmaxsb;
+  
+  return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9544,10 +9657,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
-  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
+  case ISD::ABS:                return LowerABS(Op, DAG);
 
   // For counter-based loop handling.
   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
@@ -9624,6 +9737,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
       return;
     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
+  case ISD::BITCAST:
+    // Don't handle bitcast here.
+    return;
   }
 }
 
@@ -9787,17 +9903,14 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
   return BB;
 }
 
-MachineBasicBlock *
-PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
-                                            MachineBasicBlock *BB,
-                                            bool is8bit, // operation
-                                            unsigned BinOpcode,
-                                            unsigned CmpOpcode,
-                                            unsigned CmpPred) const {
+MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
+    MachineInstr &MI, MachineBasicBlock *BB,
+    bool is8bit, // operation
+    unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
   // If we support part-word atomic mnemonics, just use them
   if (Subtarget.hasPartwordAtomics())
-    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
-                            CmpOpcode, CmpPred);
+    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
+                            CmpPred);
 
   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -9821,7 +9934,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
 
   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *loop2MBB =
-    CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
+      CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   F->insert(It, loopMBB);
   if (CmpOpcode)
@@ -9832,22 +9945,25 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
-  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
-                                          : &PPC::GPRCRegClass;
+  const TargetRegisterClass *RC =
+      is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
-  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
   unsigned ShiftReg =
-    isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
-  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
-  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
-  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
-  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
-  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
-  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
-  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
-  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+      isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+  unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+  unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+  unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+  unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+  unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+  unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+  unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+  unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
   unsigned Ptr1Reg;
-  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+  unsigned TmpReg =
+      (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
 
   //  thisMBB:
   //   ...
@@ -9876,82 +9992,107 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
   if (ptrA != ZeroReg) {
     Ptr1Reg = RegInfo.createVirtualRegister(RC);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
-      .addReg(ptrA).addReg(ptrB);
+        .addReg(ptrA)
+        .addReg(ptrB);
   } else {
     Ptr1Reg = ptrB;
   }
-  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
-      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+  // mode.
+  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+      .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+      .addImm(3)
+      .addImm(27)
+      .addImm(is8bit ? 28 : 27);
   if (!isLittleEndian)
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
-        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+    BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+        .addReg(Shift1Reg)
+        .addImm(is8bit ? 24 : 16);
   if (is64bit)
     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
-      .addReg(Ptr1Reg).addImm(0).addImm(61);
+        .addReg(Ptr1Reg)
+        .addImm(0)
+        .addImm(61);
   else
     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
-      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
-  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
-      .addReg(incr).addReg(ShiftReg);
+        .addReg(Ptr1Reg)
+        .addImm(0)
+        .addImm(0)
+        .addImm(29);
+  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
   if (is8bit)
     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
   else {
     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
-    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+    BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+        .addReg(Mask3Reg)
+        .addImm(65535);
   }
   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
-      .addReg(Mask2Reg).addReg(ShiftReg);
+      .addReg(Mask2Reg)
+      .addReg(ShiftReg);
 
   BB = loopMBB;
   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
-    .addReg(ZeroReg).addReg(PtrReg);
+      .addReg(ZeroReg)
+      .addReg(PtrReg);
   if (BinOpcode)
     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
-      .addReg(Incr2Reg).addReg(TmpDestReg);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
-    .addReg(TmpDestReg).addReg(MaskReg);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
-    .addReg(TmpReg).addReg(MaskReg);
+        .addReg(Incr2Reg)
+        .addReg(TmpDestReg);
+  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+      .addReg(TmpDestReg)
+      .addReg(MaskReg);
+  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
   if (CmpOpcode) {
     // For unsigned comparisons, we can directly compare the shifted values.
     // For signed comparisons we shift and sign extend.
-    unsigned SReg = RegInfo.createVirtualRegister(RC);
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
-      .addReg(TmpDestReg).addReg(MaskReg);
+    unsigned SReg = RegInfo.createVirtualRegister(GPRC);
+    BuildMI(BB, dl, TII->get(PPC::AND), SReg)
+        .addReg(TmpDestReg)
+        .addReg(MaskReg);
     unsigned ValueReg = SReg;
     unsigned CmpReg = Incr2Reg;
     if (CmpOpcode == PPC::CMPW) {
-      ValueReg = RegInfo.createVirtualRegister(RC);
+      ValueReg = RegInfo.createVirtualRegister(GPRC);
       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
-        .addReg(SReg).addReg(ShiftReg);
-      unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
+          .addReg(SReg)
+          .addReg(ShiftReg);
+      unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
-        .addReg(ValueReg);
+          .addReg(ValueReg);
       ValueReg = ValueSReg;
       CmpReg = incr;
     }
     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
-      .addReg(CmpReg).addReg(ValueReg);
+        .addReg(CmpReg)
+        .addReg(ValueReg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
+        .addImm(CmpPred)
+        .addReg(PPC::CR0)
+        .addMBB(exitMBB);
     BB->addSuccessor(loop2MBB);
     BB->addSuccessor(exitMBB);
     BB = loop2MBB;
   }
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
-    .addReg(Tmp3Reg).addReg(Tmp2Reg);
+  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
   BuildMI(BB, dl, TII->get(PPC::STWCX))
-    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+      .addReg(Tmp4Reg)
+      .addReg(ZeroReg)
+      .addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
-    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+      .addImm(PPC::PRED_NE)
+      .addReg(PPC::CR0)
+      .addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
   BB->addSuccessor(exitMBB);
 
   //  exitMBB:
   //   ...
   BB = exitMBB;
-  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
-    .addReg(ShiftReg);
+  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+      .addReg(TmpDestReg)
+      .addReg(ShiftReg);
   return BB;
 }
 
@@ -9968,10 +10109,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   const BasicBlock *BB = MBB->getBasicBlock();
   MachineFunction::iterator I = ++MBB->getIterator();
 
-  // Memory Reference
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
   unsigned DstReg = MI.getOperand(0).getReg();
   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
@@ -10034,10 +10171,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
     setUsesTOCBasePtr(*MBB->getParent());
     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
-            .addReg(PPC::X2)
-            .addImm(TOCOffset)
-            .addReg(BufReg);
-    MIB.setMemRefs(MMOBegin, MMOEnd);
+              .addReg(PPC::X2)
+              .addImm(TOCOffset)
+              .addReg(BufReg)
+              .cloneMemRefs(MI);
   }
 
   // Naked functions never have a base pointer, and so we use r1. For all
@@ -10052,8 +10189,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
             .addReg(BaseReg)
             .addImm(BPOffset)
-            .addReg(BufReg);
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+            .addReg(BufReg)
+            .cloneMemRefs(MI);
 
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
@@ -10086,8 +10223,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
             .addImm(LabelOffset)
             .addReg(BufReg);
   }
-
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.cloneMemRefs(MI);
 
   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
   mainMBB->addSuccessor(sinkMBB);
@@ -10111,10 +10247,6 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
-  // Memory Reference
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
-
   MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
@@ -10152,7 +10284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
             .addImm(0)
             .addReg(BufReg);
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.cloneMemRefs(MI);
 
   // Reload IP
   if (PVT == MVT::i64) {
@@ -10164,7 +10296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
             .addImm(LabelOffset)
             .addReg(BufReg);
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.cloneMemRefs(MI);
 
   // Reload SP
   if (PVT == MVT::i64) {
@@ -10176,7 +10308,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
             .addImm(SPOffset)
             .addReg(BufReg);
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.cloneMemRefs(MI);
 
   // Reload BP
   if (PVT == MVT::i64) {
@@ -10188,16 +10320,15 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
             .addImm(BPOffset)
             .addReg(BufReg);
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.cloneMemRefs(MI);
 
   // Reload TOC
   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
     setUsesTOCBasePtr(*MBB->getParent());
     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
-            .addImm(TOCOffset)
-            .addReg(BufReg);
-
-    MIB.setMemRefs(MMOBegin, MMOEnd);
+              .addImm(TOCOffset)
+              .addReg(BufReg)
+              .cloneMemRefs(MI);
   }
 
   // Jump
@@ -10221,7 +10352,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
       // way to mark the dependence as implicit there, and so the stackmap code
       // will confuse it with a regular operand. Instead, add the dependence
       // here.
-      setUsesTOCBasePtr(*BB->getParent());
       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
     }
 
@@ -10246,8 +10376,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   MachineFunction *F = BB->getParent();
 
   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
-       MI.getOpcode() == PPC::SELECT_CC_I8 ||
-       MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
+      MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
+      MI.getOpcode() == PPC::SELECT_I8) {
     SmallVector<MachineOperand, 2> Cond;
     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
         MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -10392,9 +10522,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
 
     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
-      .addReg(HiReg).addReg(ReadAgainReg);
+        .addReg(HiReg)
+        .addReg(ReadAgainReg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
+        .addImm(PPC::PRED_NE)
+        .addReg(CmpReg)
+        .addMBB(readMBB);
 
     BB->addSuccessor(readMBB);
     BB->addSuccessor(sinkMBB);
@@ -10564,27 +10697,35 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     //   st[bhwd]cx. dest, ptr
     // exitBB:
     BB = loop1MBB;
-    BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
-      .addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
-      .addReg(oldval).addReg(dest);
+        .addReg(oldval)
+        .addReg(dest);
     BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+        .addImm(PPC::PRED_NE)
+        .addReg(PPC::CR0)
+        .addMBB(midMBB);
     BB->addSuccessor(loop2MBB);
     BB->addSuccessor(midMBB);
 
     BB = loop2MBB;
     BuildMI(BB, dl, TII->get(StoreMnemonic))
-      .addReg(newval).addReg(ptrA).addReg(ptrB);
+        .addReg(newval)
+        .addReg(ptrA)
+        .addReg(ptrB);
     BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+        .addImm(PPC::PRED_NE)
+        .addReg(PPC::CR0)
+        .addMBB(loop1MBB);
     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
     BB->addSuccessor(loop1MBB);
     BB->addSuccessor(exitMBB);
 
     BB = midMBB;
     BuildMI(BB, dl, TII->get(StoreMnemonic))
-      .addReg(dest).addReg(ptrA).addReg(ptrB);
+        .addReg(dest)
+        .addReg(ptrA)
+        .addReg(ptrB);
     BB->addSuccessor(exitMBB);
 
     //  exitMBB:
@@ -10619,24 +10760,26 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
-    const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
-                                            : &PPC::GPRCRegClass;
+    const TargetRegisterClass *RC =
+        is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
-    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
     unsigned ShiftReg =
-      isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
-    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
-    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
-    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
-    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
-    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
-    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
-    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
-    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
-    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
-    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+        isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
+    unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
+    unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+    unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
     unsigned Ptr1Reg;
-    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+    unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
     //  thisMBB:
     //   ...
@@ -10673,74 +10816,107 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     if (ptrA != ZeroReg) {
       Ptr1Reg = RegInfo.createVirtualRegister(RC);
       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
-        .addReg(ptrA).addReg(ptrB);
+          .addReg(ptrA)
+          .addReg(ptrB);
     } else {
       Ptr1Reg = ptrB;
     }
-    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
-        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+
+    // We need use 32-bit subregister to avoid mismatch register class in 64-bit
+    // mode.
+    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
+        .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
+        .addImm(3)
+        .addImm(27)
+        .addImm(is8bit ? 28 : 27);
     if (!isLittleEndian)
-      BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
-          .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+      BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
+          .addReg(Shift1Reg)
+          .addImm(is8bit ? 24 : 16);
     if (is64bit)
       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
-        .addReg(Ptr1Reg).addImm(0).addImm(61);
+          .addReg(Ptr1Reg)
+          .addImm(0)
+          .addImm(61);
     else
       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
-        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+          .addReg(Ptr1Reg)
+          .addImm(0)
+          .addImm(0)
+          .addImm(29);
     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
-        .addReg(newval).addReg(ShiftReg);
+        .addReg(newval)
+        .addReg(ShiftReg);
     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
-        .addReg(oldval).addReg(ShiftReg);
+        .addReg(oldval)
+        .addReg(ShiftReg);
     if (is8bit)
       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
     else {
       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
-        .addReg(Mask3Reg).addImm(65535);
+          .addReg(Mask3Reg)
+          .addImm(65535);
     }
     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
-        .addReg(Mask2Reg).addReg(ShiftReg);
+        .addReg(Mask2Reg)
+        .addReg(ShiftReg);
     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
-        .addReg(NewVal2Reg).addReg(MaskReg);
+        .addReg(NewVal2Reg)
+        .addReg(MaskReg);
     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
-        .addReg(OldVal2Reg).addReg(MaskReg);
+        .addReg(OldVal2Reg)
+        .addReg(MaskReg);
 
     BB = loop1MBB;
     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
-        .addReg(ZeroReg).addReg(PtrReg);
-    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
-        .addReg(TmpDestReg).addReg(MaskReg);
+        .addReg(ZeroReg)
+        .addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
+        .addReg(TmpDestReg)
+        .addReg(MaskReg);
     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
-        .addReg(TmpReg).addReg(OldVal3Reg);
+        .addReg(TmpReg)
+        .addReg(OldVal3Reg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
-        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+        .addImm(PPC::PRED_NE)
+        .addReg(PPC::CR0)
+        .addMBB(midMBB);
     BB->addSuccessor(loop2MBB);
     BB->addSuccessor(midMBB);
 
     BB = loop2MBB;
-    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
-        .addReg(TmpDestReg).addReg(MaskReg);
-    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
-        .addReg(Tmp2Reg).addReg(NewVal3Reg);
-    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
-        .addReg(ZeroReg).addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
+        .addReg(TmpDestReg)
+        .addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
+        .addReg(Tmp2Reg)
+        .addReg(NewVal3Reg);
+    BuildMI(BB, dl, TII->get(PPC::STWCX))
+        .addReg(Tmp4Reg)
+        .addReg(ZeroReg)
+        .addReg(PtrReg);
     BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+        .addImm(PPC::PRED_NE)
+        .addReg(PPC::CR0)
+        .addMBB(loop1MBB);
     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
     BB->addSuccessor(loop1MBB);
     BB->addSuccessor(exitMBB);
 
     BB = midMBB;
-    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
-      .addReg(ZeroReg).addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::STWCX))
+        .addReg(TmpDestReg)
+        .addReg(ZeroReg)
+        .addReg(PtrReg);
     BB->addSuccessor(exitMBB);
 
     //  exitMBB:
     //   ...
     BB = exitMBB;
-    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
-      .addReg(ShiftReg);
+    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+        .addReg(TmpReg)
+        .addReg(ShiftReg);
   } else if (MI.getOpcode() == PPC::FADDrtz) {
     // This pseudo performs an FADD with rounding mode temporarily forced
     // to round-to-zero.  We emit this via custom inserter since the FPSCR
@@ -10777,9 +10953,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                  MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
-    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
-                                                  &PPC::GPRCRegClass :
-                                                  &PPC::G8RCRegClass);
+    unsigned Dest = RegInfo.createVirtualRegister(
+        Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
 
     DebugLoc dl = MI.getDebugLoc();
     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
@@ -11231,9 +11406,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.
-      KnownBits Op1Known, Op2Known;
-      DAG.computeKnownBits(N->getOperand(0), Op1Known);
-      DAG.computeKnownBits(N->getOperand(1), Op2Known);
+      KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
+      KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
 
       // We don't really care about what is known about the first bit (if
       // anything), so clear it in all masks prior to comparing them.
@@ -11750,6 +11924,37 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
       ShiftCst);
 }
 
+SDValue PPCTargetLowering::combineSetCC(SDNode *N,
+                                        DAGCombinerInfo &DCI) const {
+  assert(N->getOpcode() == ISD::SETCC &&
+         "Should be called with a SETCC node");
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+
+    // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
+    if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+        LHS.hasOneUse())
+      std::swap(LHS, RHS);
+
+    // x == 0-y --> x+y == 0
+    // x != 0-y --> x+y != 0
+    if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+        RHS.hasOneUse()) {
+      SDLoc DL(N);
+      SelectionDAG &DAG = DCI.DAG;
+      EVT VT = N->getValueType(0);
+      EVT OpVT = LHS.getValueType();
+      SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+      return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
+    }
+  }
+
+  return DAGCombineTruncBoolExt(N, DCI);
+}
+
 // Is this an extending load from an f32 to an f64?
 static bool isFPExtLoad(SDValue Op) {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
@@ -11869,7 +12074,8 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
   }
   // Not a build vector of (possibly fp_rounded) loads.
-  if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
+  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
+      N->getNumOperands() == 1)
     return SDValue();
 
   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
@@ -12450,6 +12656,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   SDLoc dl(N);
   switch (N->getOpcode()) {
   default: break;
+  case ISD::ADD:
+    return combineADD(N, DCI);
   case ISD::SHL:
     return combineSHL(N, DCI);
   case ISD::SRA:
@@ -12476,7 +12684,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ANY_EXTEND:
     return DAGCombineExtBoolTrunc(N, DCI);
   case ISD::TRUNCATE:
+    return combineTRUNCATE(N, DCI);
   case ISD::SETCC:
+    if (SDValue CSCC = combineSetCC(N, DCI))
+      return CSCC;
+    LLVM_FALLTHROUGH;
   case ISD::SELECT_CC:
     return DAGCombineTruncBoolExt(N, DCI);
   case ISD::SINT_TO_FP:
@@ -12499,9 +12711,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
 
-      // STBRX can only handle simple types.
+      // STBRX can only handle simple types and it makes no sense to store less
+      // two bytes in byte-reversed order.
       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
-      if (mVT.isExtended())
+      if (mVT.isExtended() || mVT.getSizeInBits() < 16)
         break;
 
       SDValue BSwapOp = N->getOperand(1).getOperand(0);
@@ -12877,6 +13090,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
           }
         }
       }
+
+      // Combine vmaxsw/h/b(a, a's negation) to abs(a)
+      // Expose the vabsduw/h/b opportunity for down stream
+      if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
+          (IID == Intrinsic::ppc_altivec_vmaxsw ||
+           IID == Intrinsic::ppc_altivec_vmaxsh ||
+           IID == Intrinsic::ppc_altivec_vmaxsb)) {
+        SDValue V1 = N->getOperand(1);
+        SDValue V2 = N->getOperand(2);
+        if ((V1.getSimpleValueType() == MVT::v4i32 ||
+             V1.getSimpleValueType() == MVT::v8i16 ||
+             V1.getSimpleValueType() == MVT::v16i8) &&
+            V1.getSimpleValueType() == V2.getSimpleValueType()) {
+          // (0-a, a)
+          if (V1.getOpcode() == ISD::SUB &&
+              ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
+              V1.getOperand(1) == V2) {
+            return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
+          }
+          // (a, 0-a)
+          if (V2.getOpcode() == ISD::SUB &&
+              ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
+              V2.getOperand(1) == V1) {
+            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+          }
+          // (x-y, y-x)
+          if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
+              V1.getOperand(0) == V2.getOperand(1) &&
+              V1.getOperand(1) == V2.getOperand(0)) {
+            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+          }
+        }
+      }
     }
 
     break;
@@ -13109,6 +13355,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::BUILD_VECTOR:
     return DAGCombineBuildVector(N, DCI);
+  case ISD::ABS: 
+    return combineABS(N, DCI);
+  case ISD::VSELECT: 
+    return combineVSelect(N, DCI);
   }
 
   return SDValue();
@@ -13251,7 +13501,8 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
   } else if (Constraint == "wc") { // individual CR bits.
     return C_RegisterClass;
   } else if (Constraint == "wa" || Constraint == "wd" ||
-             Constraint == "wf" || Constraint == "ws") {
+             Constraint == "wf" || Constraint == "ws" ||
+             Constraint == "wi") {
     return C_RegisterClass; // VSX registers.
   }
   return TargetLowering::getConstraintType(Constraint);
@@ -13281,6 +13532,8 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
     return CW_Register;
   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
     return CW_Register;
+  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
+    return CW_Register; // just hold 64-bit integers data.
 
   switch (*constraint) {
   default:
@@ -13363,7 +13616,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     // An individual CR bit.
     return std::make_pair(0U, &PPC::CRBITRCRegClass);
   } else if ((Constraint == "wa" || Constraint == "wd" ||
-             Constraint == "wf") && Subtarget.hasVSX()) {
+             Constraint == "wf" || Constraint == "wi") &&
+             Subtarget.hasVSX()) {
     return std::make_pair(0U, &PPC::VSRCRegClass);
   } else if (Constraint == "ws" && Subtarget.hasVSX()) {
     if (VT == MVT::f32 && Subtarget.hasP8Vector())
@@ -13598,6 +13852,35 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
   report_fatal_error("Invalid register name global variable");
 }
 
+bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
+  // 32-bit SVR4 ABI access everything as got-indirect.
+  if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+    return true;
+
+  CodeModel::Model CModel = getTargetMachine().getCodeModel();
+  // If it is small or large code model, module locals are accessed
+  // indirectly by loading their address from .toc/.got. The difference
+  // is that for large code model we have ADDISTocHa + LDtocL and for
+  // small code model we simply have LDtoc.
+  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
+    return true;
+
+  // JumpTable and BlockAddress are accessed as got-indirect. 
+  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
+    return true;
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+    const GlobalValue *GV = G->getGlobal();
+    unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
+    // The NLP flag indicates that a global access has to use an
+    // extra indirection.
+    if (GVFlags & PPCII::MO_NLP_FLAG)
+      return true;
+  }
+
+  return false;
+}
+
 bool
 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The PowerPC target isn't yet aware of offsets.
@@ -14116,7 +14399,30 @@ SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
     return Value;
 
-  return SDValue();
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!Subtarget.isISA3_0() ||
+      N0.getOpcode() != ISD::SIGN_EXTEND ||
+      N0.getOperand(0).getValueType() != MVT::i32 ||
+      CN1 == nullptr || N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  // We can't save an operation here if the value is already extended, and
+  // the existing shift is easier to combine.
+  SDValue ExtsSrc = N0.getOperand(0);
+  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
+      ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
+    return SDValue();
+
+  SDLoc DL(N0);
+  SDValue ShiftBy = SDValue(CN1, 0);
+  // We want the shift amount to be i32 on the extswli, but the shift could
+  // have an i64.
+  if (ShiftBy.getValueType() == MVT::i64)
+    ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
+
+  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
+                         ShiftBy);
 }
 
 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
@@ -14133,6 +14439,152 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
   return SDValue();
 }
 
+// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
+// Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
+// When C is zero, the equation (addi Z, -C) can be simplified to Z
+// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
+static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
+                                 const PPCSubtarget &Subtarget) {
+  if (!Subtarget.isPPC64())
+    return SDValue();
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  auto isZextOfCompareWithConstant = [](SDValue Op) {
+    if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
+        Op.getValueType() != MVT::i64)
+      return false;
+
+    SDValue Cmp = Op.getOperand(0);
+    if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
+        Cmp.getOperand(0).getValueType() != MVT::i64)
+      return false;
+
+    if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
+      int64_t NegConstant = 0 - Constant->getSExtValue();
+      // Due to the limitations of the addi instruction,
+      // -C is required to be [-32768, 32767].
+      return isInt<16>(NegConstant);
+    }
+
+    return false;
+  };
+
+  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
+  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
+
+  // If there is a pattern, canonicalize a zext operand to the RHS.
+  if (LHSHasPattern && !RHSHasPattern)
+    std::swap(LHS, RHS);
+  else if (!LHSHasPattern && !RHSHasPattern)
+    return SDValue();
+
+  SDLoc DL(N);
+  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
+  SDValue Cmp = RHS.getOperand(0);
+  SDValue Z = Cmp.getOperand(0);
+  auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
+
+  assert(Constant && "Constant Should not be a null pointer.");
+  int64_t NegConstant = 0 - Constant->getSExtValue();
+
+  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
+  default: break;
+  case ISD::SETNE: {
+    //                                 when C == 0
+    //                             --> addze X, (addic Z, -1).carry
+    //                            /
+    // add X, (zext(setne Z, C))--
+    //                            \    when -32768 <= -C <= 32767 && C != 0
+    //                             --> addze X, (addic (addi Z, -C), -1).carry
+    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+                              DAG.getConstant(NegConstant, DL, MVT::i64));
+    SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+    SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+                               AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
+    return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+                       SDValue(Addc.getNode(), 1));
+    }
+  case ISD::SETEQ: {
+    //                                 when C == 0
+    //                             --> addze X, (subfic Z, 0).carry
+    //                            /
+    // add X, (zext(sete  Z, C))--
+    //                            \    when -32768 <= -C <= 32767 && C != 0
+    //                             --> addze X, (subfic (addi Z, -C), 0).carry
+    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+                              DAG.getConstant(NegConstant, DL, MVT::i64));
+    SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+    SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+                               DAG.getConstant(0, DL, MVT::i64), AddOrZ);
+    return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+                       SDValue(Subc.getNode(), 1));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
+  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
+    return Value;
+
+  return SDValue();
+}
+
+// Detect TRUNCATE operations on bitcasts of float128 values.
+// What we are looking for here is the situtation where we extract a subset
+// of bits from a 128 bit float.
+// This can be of two forms:
+// 1) BITCAST of f128 feeding TRUNCATE
+// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
+// The reason this is required is because we do not have a legal i128 type
+// and so we want to prevent having to store the f128 and then reload part
+// of it.
+SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
+                                           DAGCombinerInfo &DCI) const {
+  // If we are using CRBits then try that first.
+  if (Subtarget.useCRBits()) {
+    // Check if CRBits did anything and return that if it did.
+    if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
+      return CRTruncValue;
+  }
+
+  SDLoc dl(N);
+  SDValue Op0 = N->getOperand(0);
+
+  // Looking for a truncate of i128 to i64.
+  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
+
+  // SRL feeding TRUNCATE.
+  if (Op0.getOpcode() == ISD::SRL) {
+    ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+    // The right shift has to be by 64 bits.
+    if (!ConstNode || ConstNode->getZExtValue() != 64)
+      return SDValue();
+
+    // Switch the element number to extract.
+    EltToExtract = EltToExtract ? 0 : 1;
+    // Update Op0 past the SRL.
+    Op0 = Op0.getOperand(0);
+  }
+
+  // BITCAST feeding a TRUNCATE possibly via SRL.
+  if (Op0.getOpcode() == ISD::BITCAST &&
+      Op0.getValueType() == MVT::i128 &&
+      Op0.getOperand(0).getValueType() == MVT::f128) {
+    SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
+    return DCI.DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
+        DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
+  }
+  return SDValue();
+}
+
 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
   if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
@@ -14168,6 +14620,15 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
 }
 
+bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+  if (!Subtarget.hasVSX())
+    return false;
+  if (Subtarget.hasP9Vector() && VT == MVT::f128)
+    return true;
+  return VT == MVT::f32 || VT == MVT::f64 ||
+    VT == MVT::v4f32 || VT == MVT::v2f64;
+}
+
 bool PPCTargetLowering::
 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
   const Value *Mask = AndI.getOperand(1);
@@ -14184,3 +14645,109 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
   // For non-constant masks, we can always use the record-form and.
   return true;
 }
+
+// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
+SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
+  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
+  assert(Subtarget.hasP9Altivec() &&
+         "Only combine this when P9 altivec supported!");
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+  if (N->getOperand(0).getOpcode() == ISD::SUB) {
+    // Even for signed integers, if it's known to be positive (as signed
+    // integer) due to zero-extended inputs.
+    unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
+    unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
+    if ((SubOpcd0 == ISD::ZERO_EXTEND ||
+         SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+        (SubOpcd1 == ISD::ZERO_EXTEND ||
+         SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
+      return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+                         N->getOperand(0)->getOperand(0),
+                         N->getOperand(0)->getOperand(1),
+                         DAG.getTargetConstant(0, dl, MVT::i32));
+    }
+
+    // For type v4i32, it can be optimized with xvnegsp + vabsduw
+    if (N->getOperand(0).getValueType() == MVT::v4i32 &&
+        N->getOperand(0).hasOneUse()) {
+      return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+                         N->getOperand(0)->getOperand(0),
+                         N->getOperand(0)->getOperand(1),
+                         DAG.getTargetConstant(1, dl, MVT::i32));
+    }
+  }
+
+  return SDValue();
+}
+
+// For type v4i32/v8ii16/v16i8, transform
+// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
+// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
+// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
+SDValue PPCTargetLowering::combineVSelect(SDNode *N,
+                                          DAGCombinerInfo &DCI) const {
+  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
+  assert(Subtarget.hasP9Altivec() &&
+         "Only combine this when P9 altivec supported!");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+  SDValue Cond = N->getOperand(0);
+  SDValue TrueOpnd = N->getOperand(1);
+  SDValue FalseOpnd = N->getOperand(2);
+  EVT VT = N->getOperand(1).getValueType();
+
+  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
+      FalseOpnd.getOpcode() != ISD::SUB)
+    return SDValue();
+
+  // ABSD only available for type v4i32/v8i16/v16i8
+  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+    return SDValue();
+
+  // At least to save one more dependent computation
+  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
+    return SDValue();
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+  // Can only handle unsigned comparison here
+  switch (CC) {
+  default:
+    return SDValue();
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    break;
+  case ISD::SETULT:
+  case ISD::SETULE:
+    std::swap(TrueOpnd, FalseOpnd);
+    break;
+  }
+
+  SDValue CmpOpnd1 = Cond.getOperand(0);
+  SDValue CmpOpnd2 = Cond.getOperand(1);
+
+  // SETCC CmpOpnd1 CmpOpnd2 cond
+  // TrueOpnd = CmpOpnd1 - CmpOpnd2
+  // FalseOpnd = CmpOpnd2 - CmpOpnd1
+  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
+      TrueOpnd.getOperand(1) == CmpOpnd2 &&
+      FalseOpnd.getOperand(0) == CmpOpnd2 &&
+      FalseOpnd.getOperand(1) == CmpOpnd1) {
+    return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
+                       CmpOpnd1, CmpOpnd2,
+                       DAG.getTargetConstant(0, dl, MVT::i32));
+  }
+
+  return SDValue();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f174943a8004..30acd60eba6f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -149,6 +149,10 @@ namespace llvm {
       /// For vector types, only the last n bits are used. See vsld.
       SRL, SRA, SHL,
 
+      /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+      /// word and shift left immediate.
+      EXTSWSLI,
+
       /// The combination of sra[wd]i and addze used to implemented signed
       /// integer division by a power of 2. The first operand is the dividend,
       /// and the second is the constant shift amount (representing the
@@ -369,6 +373,21 @@ namespace llvm {
       /// An SDNode for swaps that are not associated with any loads/stores
       /// and thereby have no chain.
       SWAP_NO_CHAIN,
+      
+      /// An SDNode for Power9 vector absolute value difference.
+      /// operand #0 vector
+      /// operand #1 vector
+      /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+      /// the most significant bit for signed i32
+      ///
+      /// Power9 VABSD* instructions are designed to support unsigned integer
+      /// vectors (byte/halfword/word), if we want to make use of them for signed
+      /// integer vectors, we have to flip their sign bits first. To flip sign bit
+      /// for byte/halfword integer vector would become inefficient, but for word
+      /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+      /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
+      ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
+      VABSD,
 
       /// QVFPERM = This corresponds to the QPX qvfperm instruction.
       QVFPERM,
@@ -557,6 +576,11 @@ namespace llvm {
     /// DAG node.
     const char *getTargetNodeName(unsigned Opcode) const override;
 
+    bool isSelectSupported(SelectSupportKind Kind) const override {
+      // PowerPC does not support scalar condition selects on vectors.
+      return (Kind != SelectSupportKind::ScalarCondVectorVal);
+    }
+
     /// getPreferredVectorAction - The code we generate when vector types are
     /// legalized by promoting the integer element type is often much worse
     /// than code we generate if we widen the type for applicable vector types.
@@ -565,7 +589,7 @@ namespace llvm {
     /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
     /// loads, moves back into VSR's (or memory ops if we don't have moves) and
     /// then the VPERM for the shuffle. All in all a very slow sequence.
-    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
       const override {
       if (VT.getScalarSizeInBits() % 8 == 0)
         return TypeWidenVector;
@@ -785,6 +809,9 @@ namespace llvm {
       return true;
     }
 
+    // Returns true if the address of the global is stored in TOC entry.
+    bool isAccessedAsGotIndirect(SDValue N) const;
+
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
     bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -923,6 +950,9 @@ namespace llvm {
     SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
                                      const SDLoc &dl) const;
 
+    SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+                                 const SDLoc &dl) const;
+
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
@@ -988,6 +1018,7 @@ namespace llvm {
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1088,6 +1119,11 @@ namespace llvm {
     SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
 
     /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
     /// SETCC with integer subtraction when (1) there is a legal way of doing it
@@ -1122,6 +1158,7 @@ namespace llvm {
     // tail call. This will cause the optimizers to attempt to move, or
     // duplicate return instructions to help enable tail call optimizations.
     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+    bool hasBitPreservingFPLogic(EVT VT) const override;
     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
   }; // end class PPCTargetLowering
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index cdd57c6a1118..2ce6ad3293eb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -94,7 +94,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
 }
 
 let Defs = [LR8] in
-  def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+  def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>,
                     PPC970_Unit_BRU;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -199,47 +199,45 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
 // clean this up in PPCMIPeephole with calls to
 // PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
 // in the first place.
-let usesCustomInserter = 1 in {
-  let Defs = [CR0] in {
-    def ATOMIC_LOAD_ADD_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
-      [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_SUB_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
-      [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_OR_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
-      [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_XOR_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
-      [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_AND_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
-      [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_NAND_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
-      [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_MIN_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
-      [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_MAX_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
-      [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_UMIN_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
-      [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
-    def ATOMIC_LOAD_UMAX_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
-      [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
-
-    def ATOMIC_CMP_SWAP_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
-      [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
-
-    def ATOMIC_SWAP_I64 : Pseudo<
-      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
-      [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
-  }
+let Defs = [CR0] in {
+  def ATOMIC_LOAD_ADD_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
+    [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_SUB_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
+    [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_OR_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
+    [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_XOR_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
+    [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_AND_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
+    [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_NAND_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
+    [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_MIN_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
+    [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_MAX_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
+    [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_UMIN_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
+    [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
+  def ATOMIC_LOAD_UMAX_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
+    [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
+
+  def ATOMIC_CMP_SWAP_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
+    [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+  def ATOMIC_SWAP_I64 : PPCCustomInserterPseudo<
+    (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
+    [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
 }
 
 // Instructions to support atomic operations
@@ -269,18 +267,18 @@ def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
 
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi8 :Pseudo< (outs),
+def TCRETURNdi8 :PPCEmitTimePseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
                  "#TC_RETURNd8 $dst $offset",
                  []>;
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai8 :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
                  "#TC_RETURNa8 $func $offset",
                  [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
                  "#TC_RETURNr8 $dst $offset",
                  []>;
 
@@ -347,14 +345,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
 } // hasExtraSrcRegAllocReq = 1
 } // hasSideEffects = 0
 
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
   let Defs = [CTR8] in
-  def EH_SjLj_SetJmp64  : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+  def EH_SjLj_SetJmp64  : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP64",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In64BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
   let isTerminator = 1 in
-  def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+  def EH_SjLj_LongJmp64 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
                             "#EH_SJLJ_LONGJMP64",
                             [(PPCeh_sjlj_longjmp addr:$buf)]>,
                           Requires<[In64BitMode]>;
@@ -396,10 +399,10 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
 // the POWER3.
 
 let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
                        [(set i64:$result,
                              (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
                        [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
 
 let Defs = [LR8] in {
@@ -717,9 +720,10 @@ defm SRADI  : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
                          "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
                          [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
 
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
                           "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
-                          []>, isPPC64;
+                          [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+                          isPPC64, Requires<[IsISA3_0]>;
 
 // For fast-isel:
 let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,8 +777,12 @@ def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC)
                        "maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
 def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
                        "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
-                     "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
+                       "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+  def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
+                       "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
+}
 def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L),
                      "darn $RT, $L", IIC_LdStLD>, isPPC64;
 def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
@@ -1018,19 +1026,19 @@ def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
 // The following four definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtoc: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtoc",
                   [(set i64:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocJTI: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocJTI",
                   [(set i64:$rD,
                      (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocCPT: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocCPT",
                   [(set i64:$rD,
                      (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocCPT",
                   [(set i64:$rD,
                      (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
@@ -1071,40 +1079,40 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
 // Support for medium and large code model.
 let hasSideEffects = 0 in {
 let isReMaterializable = 1 in {
-def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                        "#ADDIStocHA", []>, isPPC64;
-def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                      "#ADDItocL", []>, isPPC64;
 }
 let mayLoad = 1 in
-def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
                    "#LDtocL", []>, isPPC64;
 }
 
 // Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISgotTprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDISgotTprelHA",
                          [(set i64:$rD,
                            (PPCaddisGotTprelHA i64:$reg,
                                                tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
                         "#LDgotTprelL",
                         [(set i64:$rD,
                           (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
                  isPPC64;
 
-let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
-def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : PPCPostRAExpPseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
 
 def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
           (ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsgdHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIStlsgdHA",
                          [(set i64:$rD,
                            (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsgdL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        "#ADDItlsgdL",
                        [(set i64:$rD,
                          (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1115,7 +1123,7 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
 // correct because the branch select pass is relying on it.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
     Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                         "#GETtlsADDR",
                         [(set i64:$rD,
                           (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1125,7 +1133,7 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
     in
-def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsgdLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
                             (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
                             "#ADDItlsgdLADDR",
                             [(set i64:$rD,
@@ -1133,12 +1141,12 @@ def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
                                                  tglobaltlsaddr:$disp,
                                                  tglobaltlsaddr:$sym))]>,
                      isPPC64;
-def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIStlsldHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIStlsldHA",
                          [(set i64:$rD,
                            (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDItlsldL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                        "#ADDItlsldL",
                        [(set i64:$rD,
                          (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1147,7 +1155,7 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+def GETtlsldADDR : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                           "#GETtlsldADDR",
                           [(set i64:$rD,
                             (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
@@ -1157,7 +1165,7 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
     in
-def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+def ADDItlsldLADDR : PPCEmitTimePseudo<(outs g8rc:$rD),
                             (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
                             "#ADDItlsldLADDR",
                             [(set i64:$rD,
@@ -1165,13 +1173,13 @@ def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
                                                  tglobaltlsaddr:$disp,
                                                  tglobaltlsaddr:$sym))]>,
                      isPPC64;
-def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDISdtprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                           "#ADDISdtprelHA",
                           [(set i64:$rD,
                             (PPCaddisDtprelHA i64:$reg,
                                               tglobaltlsaddr:$disp))]>,
                    isPPC64;
-def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+def ADDIdtprelL : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
                          "#ADDIdtprelL",
                          [(set i64:$rD,
                            (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -1221,30 +1229,30 @@ def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
 let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
-                   "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+                   "stbu $rS, $dst", IIC_LdStSTU, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
-                   "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+                   "sthu $rS, $dst", IIC_LdStSTU, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
-                   "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+                   "stwu $rS, $dst", IIC_LdStSTU, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 
 def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
                           (ins g8rc:$rS, memrr:$dst),
-                          "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+                          "stbux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked;
 def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
                           (ins g8rc:$rS, memrr:$dst),
-                          "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+                          "sthux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked;
 def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
                           (ins g8rc:$rS, memrr:$dst),
-                          "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+                          "stwux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked;
@@ -1252,13 +1260,13 @@ def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
 
 def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res),
                    (ins g8rc:$rS, memrix:$dst),
-                   "stdu $rS, $dst", IIC_LdStSTDU, []>,
+                   "stdu $rS, $dst", IIC_LdStSTU, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
                    isPPC64;
 
 def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res),
                           (ins g8rc:$rS, memrr:$dst),
-                          "stdux $rS, $dst", IIC_LdStSTDUX, []>,
+                          "stdux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked, isPPC64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 24969d7ef853..69b19e45c3e9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1051,6 +1051,20 @@ def : Pat<(v4f32 (ftrunc v4f32:$vA)),
 def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
           (VRFIN $vA)>;
 
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4i32 (vselect v4i32:$vA, v4i32:$vB, v4i32:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2i64 (vselect v2i64:$vA, v2i64:$vB, v2i64:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
+          (VSEL $vC, $vB, $vA)>;
+
 } // end HasAltivec
 
 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index f5f4b46344cf..2fe765dd99e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -2153,7 +2153,9 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
 }
 
 //===----------------------------------------------------------------------===//
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+// EmitTimePseudo won't have encoding information for the [MC]CodeEmitter
+// stuff
+class PPCEmitTimePseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
     : I<0, OOL, IOL, asmstr, NoItinerary> {
   let isCodeGenOnly = 1;
   let PPC64 = 0;
@@ -2162,6 +2164,21 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
   let hasNoSchedulingInfo = 1;
 }
 
+// Instruction that require custom insertion support
+// a.k.a. ISelPseudos, however, these won't have isPseudo set
+class PPCCustomInserterPseudo<dag OOL, dag IOL, string asmstr,
+                              list<dag> pattern>
+    : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+  let usesCustomInserter = 1;
+}
+
+// PostRAPseudo will be expanded in expandPostRAPseudo, isPseudo flag in td
+// files is set only for PostRAPseudo
+class PPCPostRAExpPseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+    : PPCEmitTimePseudo<OOL, IOL, asmstr, pattern> {
+  let isPseudo = 1;
+}
+
 class PseudoXFormMemOp<dag OOL, dag IOL, string asmstr, list<dag> pattern>
-    : Pseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
+    : PPCPostRAExpPseudo<OOL, IOL, asmstr, pattern>, XFormMemOp;
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 6c4e2129087c..0efe797c765d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -20,8 +20,8 @@ def HTM_get_imm : SDNodeXForm<imm, [{
   return getI32Imm (N->getZExtValue(), SDLoc(N));
 }]>;
 
-let hasSideEffects = 1, usesCustomInserter = 1  in {
-def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+let hasSideEffects = 1 in {
+def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
 }
 
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0930f7d3b8d7..d754ce2990d2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -987,7 +987,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = PPC::XXLOR;
   else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
            PPC::VSSRCRegClass.contains(DestReg, SrcReg))
-    Opc = PPC::XXLORf;
+    Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
   else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::QVFMR;
   else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
@@ -1429,17 +1429,15 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
                                       : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
     } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
       MI.setDesc(get(PPC::BCLR));
-      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addReg(Pred[1].getReg());
+      MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
     } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
       MI.setDesc(get(PPC::BCLRn));
-      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addReg(Pred[1].getReg());
+      MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
     } else {
       MI.setDesc(get(PPC::BCCLR));
       MachineInstrBuilder(*MI.getParent()->getParent(), MI)
           .addImm(Pred[0].getImm())
-          .addReg(Pred[1].getReg());
+          .add(Pred[1]);
     }
 
     return true;
@@ -1454,7 +1452,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
 
       MI.setDesc(get(PPC::BC));
       MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addReg(Pred[1].getReg())
+          .add(Pred[1])
           .addMBB(MBB);
     } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
       MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1462,7 +1460,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
 
       MI.setDesc(get(PPC::BCn));
       MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addReg(Pred[1].getReg())
+          .add(Pred[1])
           .addMBB(MBB);
     } else {
       MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
@@ -1471,13 +1469,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
       MI.setDesc(get(PPC::BCC));
       MachineInstrBuilder(*MI.getParent()->getParent(), MI)
           .addImm(Pred[0].getImm())
-          .addReg(Pred[1].getReg())
+          .add(Pred[1])
           .addMBB(MBB);
     }
 
     return true;
-  } else if (OpC == PPC::BCTR  || OpC == PPC::BCTR8 ||
-             OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+  } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
+             OpC == PPC::BCTRL8) {
     if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
       llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
 
@@ -1487,14 +1485,12 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
     if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
       MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
                              : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
-      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addReg(Pred[1].getReg());
+      MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
       return true;
     } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
       MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
                              : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
-      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addReg(Pred[1].getReg());
+      MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
       return true;
     }
 
@@ -1502,7 +1498,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
                            : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
     MachineInstrBuilder(*MI.getParent()->getParent(), MI)
         .addImm(Pred[0].getImm())
-        .addReg(Pred[1].getReg());
+        .add(Pred[1]);
     return true;
   }
 
@@ -1822,7 +1818,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
 
   int NewOpC = -1;
   int MIOpC = MI->getOpcode();
-  if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+  if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
+      MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
     NewOpC = MIOpC;
   else {
     NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1912,14 +1909,36 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
     // compare).
 
     // Rotates are expensive instructions. If we're emitting a record-form
-    // rotate that can just be an andi, we should just emit the andi.
-    if ((MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) &&
-        MI->getOperand(2).getImm() == 0) {
+    // rotate that can just be an andi/andis, we should just emit that.
+    if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
+      unsigned GPRRes = MI->getOperand(0).getReg();
+      int64_t SH = MI->getOperand(2).getImm();
       int64_t MB = MI->getOperand(3).getImm();
       int64_t ME = MI->getOperand(4).getImm();
-      if (MB < ME && MB >= 16) {
-        uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
-        NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIo : PPC::ANDIo8;
+      // We can only do this if both the start and end of the mask are in the
+      // same halfword.
+      bool MBInLoHWord = MB >= 16;
+      bool MEInLoHWord = ME >= 16;
+      uint64_t Mask = ~0LLU;
+
+      if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
+        Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
+        // The mask value needs to shift right 16 if we're emitting andis.
+        Mask >>= MBInLoHWord ? 0 : 16;
+        NewOpC = MIOpC == PPC::RLWINM ?
+          (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
+          (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+      } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
+                 (ME - MB + 1 == SH) && (MB >= 16)) {
+        // If we are rotating by the exact number of bits as are in the mask
+        // and the mask is in the least significant bits of the register,
+        // that's just an andis. (as long as the GPR result has no uses).
+        Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
+        Mask >>= 16;
+        NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+      }
+      // If we've set the mask, we can transform.
+      if (Mask != ~0LLU) {
         MI->RemoveOperand(4);
         MI->RemoveOperand(3);
         MI->getOperand(2).setImm(Mask);
@@ -2088,11 +2107,9 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
     return true;
 }
 
-#ifndef NDEBUG
 static bool isAnImmediateOperand(const MachineOperand &MO) {
   return MO.isCPI() || MO.isGlobal() || MO.isImm();
 }
-#endif
 
 bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   auto &MBB = *MI.getParent();
@@ -2231,6 +2248,35 @@ static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
   return PPC::NoRegister;
 }
 
+void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
+                                              unsigned OpNo,
+                                              int64_t Imm) const {
+  assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
+  // Replace the REG with the Immediate.
+  unsigned InUseReg = MI.getOperand(OpNo).getReg();
+  MI.getOperand(OpNo).ChangeToImmediate(Imm);
+
+  if (empty(MI.implicit_operands()))
+    return;
+
+  // We need to make sure that the MI didn't have any implicit use
+  // of this REG any more.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
+  if (UseOpIdx >= 0) {
+    MachineOperand &MO = MI.getOperand(UseOpIdx);
+    if (MO.isImplicit())
+      // The operands must always be in the following order:
+      // - explicit reg defs,
+      // - other explicit operands (reg uses, immediates, etc.),
+      // - implicit reg defs
+      // - implicit reg uses
+      // Therefore, removing the implicit operand won't change the explicit
+      // operands layout.
+      MI.RemoveOperand(UseOpIdx);
+  }
+}
+
 // Replace an instruction with one that materializes a constant (and sets
 // CR0 if the original instruction was a record-form instruction).
 void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
@@ -2256,10 +2302,11 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
       .addImm(LII.Imm);
 }
 
-MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
-                                             unsigned &ConstOp,
-                                             bool &SeenIntermediateUse) const {
-  ConstOp = ~0U;
+MachineInstr *PPCInstrInfo::getForwardingDefMI(
+  MachineInstr &MI,
+  unsigned &OpNoForForwarding,
+  bool &SeenIntermediateUse) const {
+  OpNoForForwarding = ~0U;
   MachineInstr *DefMI = nullptr;
   MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
   const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2276,7 +2323,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
       if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
         DefMI = MRI->getVRegDef(TrueReg);
         if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
-          ConstOp = i;
+          OpNoForForwarding = i;
           break;
         }
       }
@@ -2297,7 +2344,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
       Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
       Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
       Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
-    if (!instrHasImmForm(MI, III) && !ConvertibleImmForm)
+    if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm)
       return nullptr;
 
     // Don't convert or %X, %Y, %Y since that's just a register move.
@@ -2319,15 +2366,22 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
         if (PPC::G8RCRegClass.contains(Reg))
           Reg = Reg - PPC::X0 + PPC::R0;
 
-        // Is this register defined by a load-immediate in this block?
+        // Is this register defined by some form of add-immediate (including
+        // load-immediate) within this basic block?
         for ( ; It != E; ++It) {
           if (It->modifiesRegister(Reg, &getRegisterInfo())) {
-            if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) {
-              ConstOp = i;
+            switch (It->getOpcode()) {
+            default: break;
+            case PPC::LI:
+            case PPC::LI8:
+            case PPC::ADDItocL:
+            case PPC::ADDI:
+            case PPC::ADDI8:
+              OpNoForForwarding = i;
               return &*It;
-            } else
-              break;
-          } else if (It->readsRegister(Reg, &getRegisterInfo()))
+            }
+            break;
+          } else if (It->readsRegister(Reg, &getRegisterInfo())) 
             // If we see another use of this reg between the def and the MI,
             // we want to flat it so the def isn't deleted.
             SeenIntermediateUse = true;
@@ -2335,7 +2389,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
       }
     }
   }
-  return ConstOp == ~0U ? nullptr : DefMI;
+  return OpNoForForwarding == ~0U ? nullptr : DefMI;
 }
 
 const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
@@ -2371,35 +2425,48 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
 }
 
 // If this instruction has an immediate form and one of its operands is a
-// result of a load-immediate, convert it to the immediate form if the constant
-// is in range.
+// result of a load-immediate or an add-immediate, convert it to
+// the immediate form if the constant is in range.
 bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
                                           MachineInstr **KilledDef) const {
   MachineFunction *MF = MI.getParent()->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
   bool PostRA = !MRI->isSSA();
   bool SeenIntermediateUse = true;
-  unsigned ConstantOperand = ~0U;
-  MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand,
-                                         SeenIntermediateUse);
-  if (!DefMI || !DefMI->getOperand(1).isImm())
+  unsigned ForwardingOperand = ~0U;
+  MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
+                                           SeenIntermediateUse);
+  if (!DefMI)
+    return false;
+  assert(ForwardingOperand < MI.getNumOperands() &&
+         "The forwarding operand needs to be valid at this point");
+  bool KillFwdDefMI = !SeenIntermediateUse &&
+    MI.getOperand(ForwardingOperand).isKill();
+  if (KilledDef && KillFwdDefMI)
+    *KilledDef = DefMI;
+
+  ImmInstrInfo III;
+  bool HasImmForm = instrHasImmForm(MI, III, PostRA);
+  // If this is a reg+reg instruction that has a reg+imm form,
+  // and one of the operands is produced by an add-immediate,
+  // try to convert it.
+  if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
+                                               *DefMI, KillFwdDefMI))
+    return true;
+
+  if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
+      !DefMI->getOperand(1).isImm())
     return false;
-  assert(ConstantOperand < MI.getNumOperands() &&
-         "The constant operand needs to be valid at this point");
 
   int64_t Immediate = DefMI->getOperand(1).getImm();
   // Sign-extend to 64-bits.
   int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
     (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
 
-  if (KilledDef && MI.getOperand(ConstantOperand).isKill() &&
-      !SeenIntermediateUse)
-    *KilledDef = DefMI;
-
-  // If this is a reg+reg instruction that has a reg+imm form, convert it now.
-  ImmInstrInfo III;
-  if (instrHasImmForm(MI, III))
-    return transformToImmForm(MI, III, ConstantOperand, SExtImm);
+  // If this is a reg+reg instruction that has a reg+imm form,
+  // and one of the operands is produced by LI, convert it now.
+  if (HasImmForm)
+    return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
 
   bool ReplaceWithLI = false;
   bool Is64BitLI = false;
@@ -2443,7 +2510,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
       // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
       if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
         CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
-        CompareUseMI.getOperand(1).ChangeToImmediate(0);
+        replaceInstrOperandWithImm(CompareUseMI, 1, 0);
         CompareUseMI.RemoveOperand(3);
         CompareUseMI.RemoveOperand(2);
         continue;
@@ -2602,18 +2669,23 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   return false;
 }
 
+static bool isVFReg(unsigned Reg) {
+  return PPC::VFRCRegClass.contains(Reg);
+}
+
 bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
-                                   ImmInstrInfo &III) const {
+                                   ImmInstrInfo &III, bool PostRA) const {
   unsigned Opc = MI.getOpcode();
   // The vast majority of the instructions would need their operand 2 replaced
   // with an immediate when switching to the reg+imm form. A marked exception
   // are the update form loads/stores for which a constant operand 2 would need
   // to turn into a displacement and move operand 1 to the operand 2 position.
   III.ImmOpNo = 2;
-  III.ConstantOpNo = 2;
+  III.OpNoForForwarding = 2;
   III.ImmWidth = 16;
   III.ImmMustBeMultipleOf = 1;
   III.TruncateImmTo = 0;
+  III.IsSummingOperands = false;
   switch (Opc) {
   default: return false;
   case PPC::ADD4:
@@ -2622,6 +2694,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
     III.ZeroIsSpecialOrig = 0;
     III.ZeroIsSpecialNew = 1;
     III.IsCommutative = true;
+    III.IsSummingOperands = true;
     III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
     break;
   case PPC::ADDC:
@@ -2630,6 +2703,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
     III.ZeroIsSpecialOrig = 0;
     III.ZeroIsSpecialNew = 0;
     III.IsCommutative = true;
+    III.IsSummingOperands = true;
     III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
     break;
   case PPC::ADDCo:
@@ -2637,6 +2711,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
     III.ZeroIsSpecialOrig = 0;
     III.ZeroIsSpecialNew = 0;
     III.IsCommutative = true;
+    III.IsSummingOperands = true;
     III.ImmOpcode = PPC::ADDICo;
     break;
   case PPC::SUBFC:
@@ -2809,8 +2884,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
     III.ZeroIsSpecialOrig = 1;
     III.ZeroIsSpecialNew = 2;
     III.IsCommutative = true;
+    III.IsSummingOperands = true;
     III.ImmOpNo = 1;
-    III.ConstantOpNo = 2;
+    III.OpNoForForwarding = 2;
     switch(Opc) {
     default: llvm_unreachable("Unknown opcode");
     case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
@@ -2866,8 +2942,9 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
     III.ZeroIsSpecialOrig = 2;
     III.ZeroIsSpecialNew = 3;
     III.IsCommutative = false;
+    III.IsSummingOperands = true;
     III.ImmOpNo = 2;
-    III.ConstantOpNo = 3;
+    III.OpNoForForwarding = 3;
     switch(Opc) {
     default: llvm_unreachable("Unknown opcode");
     case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
@@ -2898,21 +2975,30 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
     case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
     }
     break;
-  // Power9 only.
+  // Power9 and up only. For some of these, the X-Form version has access to all
+  // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
+  // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
+  // into or stored from is one of the VR registers.
   case PPC::LXVX:
   case PPC::LXSSPX:
   case PPC::LXSDX:
   case PPC::STXVX:
   case PPC::STXSSPX:
   case PPC::STXSDX:
+  case PPC::XFLOADf32:
+  case PPC::XFLOADf64:
+  case PPC::XFSTOREf32:
+  case PPC::XFSTOREf64:
     if (!Subtarget.hasP9Vector())
       return false;
     III.SignedImm = true;
     III.ZeroIsSpecialOrig = 1;
     III.ZeroIsSpecialNew = 2;
     III.IsCommutative = true;
+    III.IsSummingOperands = true;
     III.ImmOpNo = 1;
-    III.ConstantOpNo = 2;
+    III.OpNoForForwarding = 2;
+    III.ImmMustBeMultipleOf = 4;
     switch(Opc) {
     default: llvm_unreachable("Unknown opcode");
     case PPC::LXVX:
@@ -2920,24 +3006,64 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
       III.ImmMustBeMultipleOf = 16;
       break;
     case PPC::LXSSPX:
-      III.ImmOpcode = PPC::LXSSP;
-      III.ImmMustBeMultipleOf = 4;
+      if (PostRA) {
+        if (isVFReg(MI.getOperand(0).getReg()))
+          III.ImmOpcode = PPC::LXSSP;
+        else {
+          III.ImmOpcode = PPC::LFS;
+          III.ImmMustBeMultipleOf = 1;
+        }
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    case PPC::XFLOADf32:
+      III.ImmOpcode = PPC::DFLOADf32;
       break;
     case PPC::LXSDX:
-      III.ImmOpcode = PPC::LXSD;
-      III.ImmMustBeMultipleOf = 4;
+      if (PostRA) {
+        if (isVFReg(MI.getOperand(0).getReg()))
+          III.ImmOpcode = PPC::LXSD;
+        else {
+          III.ImmOpcode = PPC::LFD;
+          III.ImmMustBeMultipleOf = 1;
+        }
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    case PPC::XFLOADf64:
+      III.ImmOpcode = PPC::DFLOADf64;
       break;
     case PPC::STXVX:
       III.ImmOpcode = PPC::STXV;
       III.ImmMustBeMultipleOf = 16;
       break;
     case PPC::STXSSPX:
-      III.ImmOpcode = PPC::STXSSP;
-      III.ImmMustBeMultipleOf = 4;
+      if (PostRA) {
+        if (isVFReg(MI.getOperand(0).getReg()))
+          III.ImmOpcode = PPC::STXSSP;
+        else {
+          III.ImmOpcode = PPC::STFS;
+          III.ImmMustBeMultipleOf = 1;
+        }
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    case PPC::XFSTOREf32:
+      III.ImmOpcode = PPC::DFSTOREf32;
       break;
     case PPC::STXSDX:
-      III.ImmOpcode = PPC::STXSD;
-      III.ImmMustBeMultipleOf = 4;
+      if (PostRA) {
+        if (isVFReg(MI.getOperand(0).getReg()))
+          III.ImmOpcode = PPC::STXSD;
+        else {
+          III.ImmOpcode = PPC::STFD;
+          III.ImmMustBeMultipleOf = 1;
+        }
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    case PPC::XFSTOREf64:
+      III.ImmOpcode = PPC::DFSTOREf64;
       break;
     }
     break;
@@ -2984,13 +3110,264 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
   }
 }
 
-bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
-                                      unsigned ConstantOpNo,
-                                      int64_t Imm) const {
+// Check if the 'MI' that has the index OpNoForForwarding 
+// meets the requirement described in the ImmInstrInfo.
+bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
+                                               const ImmInstrInfo &III,
+                                               unsigned OpNoForForwarding
+                                               ) const {
+  // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
+  // would not work pre-RA, we can only do the check post RA.
+  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  if (MRI.isSSA())
+    return false;
+
+  // Cannot do the transform if MI isn't summing the operands.
+  if (!III.IsSummingOperands)
+    return false;
+
+  // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
+  if (!III.ZeroIsSpecialOrig)
+    return false;
+
+  // We cannot do the transform if the operand we are trying to replace
+  // isn't the same as the operand the instruction allows.
+  if (OpNoForForwarding != III.OpNoForForwarding)
+    return false;
+
+  // Check if the instruction we are trying to transform really has
+  // the special zero register as its operand.
+  if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
+      MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
+    return false;
+
+  // This machine instruction is convertible if it is,
+  // 1. summing the operands.
+  // 2. one of the operands is special zero register.
+  // 3. the operand we are trying to replace is allowed by the MI.
+  return true;
+}
+
+// Check if the DefMI is the add inst and set the ImmMO and RegMO
+// accordingly.
+bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
+                                               const ImmInstrInfo &III,
+                                               MachineOperand *&ImmMO,
+                                               MachineOperand *&RegMO) const {
+  unsigned Opc = DefMI.getOpcode();
+  if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
+    return false; 
+
+  assert(DefMI.getNumOperands() >= 3 &&
+         "Add inst must have at least three operands");
+  RegMO = &DefMI.getOperand(1);
+  ImmMO = &DefMI.getOperand(2);
+
+  // This DefMI is elgible for forwarding if it is:
+  // 1. add inst
+  // 2. one of the operands is Imm/CPI/Global.
+  return isAnImmediateOperand(*ImmMO);
+}
+
+bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
+                                             const MachineInstr &DefMI,
+                                             const MachineInstr &MI,
+                                             bool KillDefMI
+                                             ) const {
+  // x = addi y, imm
+  // ...
+  // z = lfdx 0, x   -> z = lfd imm(y)
+  // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
+  // of "y" between the DEF of "x" and "z".
+  // The query is only valid post RA.
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  if (MRI.isSSA())
+    return false;
+
+  // MachineInstr::readsRegister only returns true if the machine
+  // instruction reads the exact register or its super-register. It
+  // does not consider uses of sub-registers which seems like strange
+  // behaviour. Nonetheless, if we end up with a 64-bit register here,
+  // get the corresponding 32-bit register to check.
+  unsigned Reg = RegMO.getReg();
+  if (PPC::G8RCRegClass.contains(Reg))
+    Reg = Reg - PPC::X0 + PPC::R0;
+
+  // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
+  MachineBasicBlock::const_reverse_iterator It = MI;
+  MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
+  It++;
+  for (; It != E; ++It) {
+    if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+      return false;
+    // Made it to DefMI without encountering a clobber.
+    if ((&*It) == &DefMI)
+      break;
+  }
+  assert((&*It) == &DefMI && "DefMI is missing");
+
+  // If DefMI also uses the register to be forwarded, we can only forward it
+  // if DefMI is being erased.
+  if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+    return KillDefMI;
+
+  return true;
+}
+
+bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
+                                             const MachineInstr &DefMI,
+                                             const ImmInstrInfo &III,
+                                             int64_t &Imm) const {
+  assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
+  if (DefMI.getOpcode() == PPC::ADDItocL) {
+    // The operand for ADDItocL is CPI, which isn't imm at compiling time,
+    // However, we know that, it is 16-bit width, and has the alignment of 4.
+    // Check if the instruction met the requirement.
+    if (III.ImmMustBeMultipleOf > 4 ||
+       III.TruncateImmTo || III.ImmWidth != 16)
+      return false;
+
+    // Going from XForm to DForm loads means that the displacement needs to be
+    // not just an immediate but also a multiple of 4, or 16 depending on the
+    // load. A DForm load cannot be represented if it is a multiple of say 2.
+    // XForm loads do not have this restriction.
+    if (ImmMO.isGlobal() &&
+        ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf)
+      return false;
+
+    return true;
+  }
+
+  if (ImmMO.isImm()) {
+    // It is Imm, we need to check if the Imm fit the range.
+    int64_t Immediate = ImmMO.getImm();
+    // Sign-extend to 64-bits.
+    Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
+      (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+
+    if (Imm % III.ImmMustBeMultipleOf)
+      return false;
+    if (III.TruncateImmTo)
+      Imm &= ((1 << III.TruncateImmTo) - 1);
+    if (III.SignedImm) {
+      APInt ActualValue(64, Imm, true);
+      if (!ActualValue.isSignedIntN(III.ImmWidth))
+        return false;
+    } else {
+      uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
+      if ((uint64_t)Imm > UnsignedMax)
+        return false;
+    }
+  }
+  else
+    return false;
+
+  // This ImmMO is forwarded if it meets the requriement describle
+  // in ImmInstrInfo
+  return true;
+}
+
+// If an X-Form instruction is fed by an add-immediate and one of its operands
+// is the literal zero, attempt to forward the source of the add-immediate to
+// the corresponding D-Form instruction with the displacement coming from
+// the immediate being added.
+bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
+                                              const ImmInstrInfo &III,
+                                              unsigned OpNoForForwarding,
+                                              MachineInstr &DefMI,
+                                              bool KillDefMI) const {
+  //         RegMO ImmMO
+  //           |    |
+  // x = addi reg, imm  <----- DefMI
+  // y = op    0 ,  x   <----- MI
+  //                |
+  //         OpNoForForwarding
+  // Check if the MI meet the requirement described in the III.
+  if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
+    return false;
+
+  // Check if the DefMI meet the requirement
+  // described in the III. If yes, set the ImmMO and RegMO accordingly.
+  MachineOperand *ImmMO = nullptr;
+  MachineOperand *RegMO = nullptr;
+  if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
+    return false;
+  assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
+
+  // As we get the Imm operand now, we need to check if the ImmMO meet
+  // the requirement described in the III. If yes set the Imm.
+  int64_t Imm = 0;
+  if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
+    return false;
+
+  // Check if the RegMO can be forwarded to MI.
+  if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+    return false;
+
+  // We know that, the MI and DefMI both meet the pattern, and
+  // the Imm also meet the requirement with the new Imm-form.
+  // It is safe to do the transformation now.
+  LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+  LLVM_DEBUG(MI.dump());
+  LLVM_DEBUG(dbgs() << "Fed by:\n");
+  LLVM_DEBUG(DefMI.dump());
+
+  // Update the base reg first.
+  MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
+                                                        false, false,
+                                                        RegMO->isKill());
+
+  // Then, update the imm.
+  if (ImmMO->isImm()) {
+    // If the ImmMO is Imm, change the operand that has ZERO to that Imm
+    // directly.
+    replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm);
+  }
+  else {
+    // Otherwise, it is Constant Pool Index(CPI) or Global,
+    // which is relocation in fact. We need to replace the special zero
+    // register with ImmMO.
+    // Before that, we need to fixup the target flags for imm. 
+    // For some reason, we miss to set the flag for the ImmMO if it is CPI.
+    if (DefMI.getOpcode() == PPC::ADDItocL)
+      ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
+
+    // MI didn't have the interface such as MI.setOperand(i) though
+    // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
+    // ImmMO, we need to remove ZERO operand and all the operands behind it,
+    // and, add the ImmMO, then, move back all the operands behind ZERO.
+    SmallVector<MachineOperand, 2> MOps;
+    for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
+      MOps.push_back(MI.getOperand(i));
+      MI.RemoveOperand(i);
+    }
+
+    // Remove the last MO in the list, which is ZERO operand in fact.
+    MOps.pop_back();
+    // Add the imm operand.
+    MI.addOperand(*ImmMO);
+    // Now add the rest back.
+    for (auto &MO : MOps)
+      MI.addOperand(MO);
+  }
+
+  // Update the opcode.
+  MI.setDesc(get(III.ImmOpcode));
+
+  LLVM_DEBUG(dbgs() << "With:\n");
+  LLVM_DEBUG(MI.dump());
+
+  return true;
+}
+
+bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
+                                             const ImmInstrInfo &III,
+                                             unsigned ConstantOpNo,
+                                             int64_t Imm) const {
   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   bool PostRA = !MRI.isSSA();
   // Exit early if we can't convert this.
-  if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative)
+  if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
     return false;
   if (Imm % III.ImmMustBeMultipleOf)
     return false;
@@ -3035,7 +3412,7 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
     Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
 
   MI.setDesc(get(III.ImmOpcode));
-  if (ConstantOpNo == III.ConstantOpNo) {
+  if (ConstantOpNo == III.OpNoForForwarding) {
     // Converting shifts to immediate form is a bit tricky since they may do
     // one of three things:
     // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
@@ -3063,42 +3440,47 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
           uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
           uint64_t MB = RightShift ? ShAmt : 0;
           uint64_t ME = RightShift ? 31 : 31 - ShAmt;
-          MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+          replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
           MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
             .addImm(ME);
         } else {
           // Left shifts use (N, 63-N), right shifts use (64-N, N).
           uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
           uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
-          MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH);
+          replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
           MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
         }
       }
     } else
-      MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
+      replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
   }
   // Convert commutative instructions (switch the operands and convert the
   // desired one to an immediate.
   else if (III.IsCommutative) {
-    MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm);
-    swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo);
+    replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
+    swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
   } else
     llvm_unreachable("Should have exited early!");
 
   // For instructions for which the constant register replaces a different
   // operand than where the immediate goes, we need to swap them.
-  if (III.ConstantOpNo != III.ImmOpNo)
-    swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo);
+  if (III.OpNoForForwarding != III.ImmOpNo)
+    swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo);
 
-  // If the R0/X0 register is special for the original instruction and not for
-  // the new instruction (or vice versa), we need to fix up the register class.
+  // If the special R0/X0 register index are different for original instruction
+  // and new instruction, we need to fix up the register class in new
+  // instruction.
   if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
-    if (!III.ZeroIsSpecialOrig) {
+    if (III.ZeroIsSpecialNew) {
+      // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
+      // need to fix up register class.
       unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
-      const TargetRegisterClass *NewRC =
-        MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
-        &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
-      MRI.setRegClass(RegToModify, NewRC);
+      if (TargetRegisterInfo::isVirtualRegister(RegToModify)) {
+        const TargetRegisterClass *NewRC =
+          MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
+          &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
+        MRI.setRegClass(RegToModify, NewRC);
+      }
     }
   }
   return true;
@@ -3140,6 +3522,7 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
       Opcode == PPC::EXTSH  || Opcode == PPC::EXTSHo  ||
       Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8  ||
       Opcode == PPC::EXTSW  || Opcode == PPC::EXTSWo  ||
+      Opcode == PPC::SETB   || Opcode == PPC::SETB8   ||
       Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
       Opcode == PPC::EXTSB8_32_64)
     return true;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index ba82f56a2464..7ed558b835af 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -91,8 +91,8 @@ struct ImmInstrInfo {
   uint64_t ZeroIsSpecialNew : 3;
   // Is the operation commutative?
   uint64_t IsCommutative : 1;
-  // The operand number to check for load immediate.
-  uint64_t ConstantOpNo : 3;
+  // The operand number to check for add-immediate def.
+  uint64_t OpNoForForwarding : 3;
   // The operand number for the immediate.
   uint64_t ImmOpNo : 3;
   // The opcode of the new instruction.
@@ -101,6 +101,8 @@ struct ImmInstrInfo {
   uint64_t ImmWidth : 5;
   // The immediate should be truncated to N bits.
   uint64_t TruncateImmTo : 5;
+  // Is the instruction summing the operand
+  uint64_t IsSummingOperands : 1;
 };
 
 // Information required to convert an instruction to just a materialized
@@ -123,10 +125,42 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                             unsigned DestReg, int FrameIdx,
                             const TargetRegisterClass *RC,
                             SmallVectorImpl<MachineInstr *> &NewMIs) const;
-  bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
-                          unsigned ConstantOpNo, int64_t Imm) const;
-  MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp,
-                                 bool &SeenIntermediateUse) const;
+
+  // If the inst has imm-form and one of its operand is produced by a LI,
+  // put the imm into the inst directly and remove the LI if possible.
+  bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
+                                 unsigned ConstantOpNo, int64_t Imm) const;
+  // If the inst has imm-form and one of its operand is produced by an
+  // add-immediate, try to transform it when possible.
+  bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
+                                  unsigned ConstantOpNo,
+                                  MachineInstr &DefMI,
+                                  bool KillDefMI) const;
+  // Try to find that, if the instruction 'MI' contains any operand that
+  // could be forwarded from some inst that feeds it. If yes, return the
+  // Def of that operand. And OpNoForForwarding is the operand index in
+  // the 'MI' for that 'Def'. If we see another use of this Def between
+  // the Def and the MI, SeenIntermediateUse becomes 'true'.
+  MachineInstr *getForwardingDefMI(MachineInstr &MI,
+                                   unsigned &OpNoForForwarding,
+                                   bool &SeenIntermediateUse) const;
+
+  // Can the user MI have it's source at index \p OpNoForForwarding
+  // forwarded from an add-immediate that feeds it?
+  bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III,
+                                   unsigned OpNoForForwarding) const;
+  bool isDefMIElgibleForForwarding(MachineInstr &DefMI,
+                                   const ImmInstrInfo &III,
+                                   MachineOperand *&ImmMO,
+                                   MachineOperand *&RegMO) const;
+  bool isImmElgibleForForwarding(const MachineOperand &ImmMO,
+                                 const MachineInstr &DefMI,
+                                 const ImmInstrInfo &III,
+                                 int64_t &Imm) const;
+  bool isRegElgibleForForwarding(const MachineOperand &RegMO,
+                                 const MachineInstr &DefMI,
+                                 const MachineInstr &MI,
+                                 bool KillDefMI) const;
   const unsigned *getStoreOpcodesForSpillArray() const;
   const unsigned *getLoadOpcodesForSpillArray() const;
   virtual void anchor();
@@ -158,6 +192,16 @@ public:
   bool isXFormMemOp(unsigned Opcode) const {
     return get(Opcode).TSFlags & PPCII::XFormMemOp;
   }
+  static bool isSameClassPhysRegCopy(unsigned Opcode) {
+    unsigned CopyOpcodes[] =
+      { PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
+        PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
+        PPC::CROR, PPC::EVOR, -1U };
+    for (int i = 0; CopyOpcodes[i] != -1U; i++)
+      if (Opcode == CopyOpcodes[i])
+        return true;
+    return false;
+  }
 
   ScheduleHazardRecognizer *
   CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
@@ -369,8 +413,30 @@ public:
   bool convertToImmediateForm(MachineInstr &MI,
                               MachineInstr **KilledDef = nullptr) const;
   void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
-
-  bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
+  void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
+                                  int64_t Imm) const;
+
+  bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III,
+                       bool PostRA) const;
+
+  /// getRegNumForOperand - some operands use different numbering schemes
+  /// for the same registers. For example, a VSX instruction may have any of
+  /// vs0-vs63 allocated whereas an Altivec instruction could only have
+  /// vs32-vs63 allocated (numbered as v0-v31). This function returns the actual
+  /// register number needed for the opcode/operand number combination.
+  /// The operand number argument will be useful when we need to extend this
+  /// to instructions that use both Altivec and VSX numbering (for different
+  /// operands).
+  static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
+                                      unsigned OpNo) {
+    if (Desc.TSFlags & PPCII::UseVSXReg) {
+      if (isVRRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::V0);
+      else if (isVFRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+    }
+    return Reg;
+  }
 };
 
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1a43037e4a4b..dd3f1ac79089 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -114,6 +114,10 @@ def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
   SDTCisVec<0>, SDTCisPtrTy<1>
 ]>;
 
+def SDT_PPCextswsli : SDTypeProfile<1, 2, [  // extswsli
+  SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
+]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@@ -218,6 +222,8 @@ def PPCsrl        : SDNode<"PPCISD::SRL"       , SDTIntShiftOp>;
 def PPCsra        : SDNode<"PPCISD::SRA"       , SDTIntShiftOp>;
 def PPCshl        : SDNode<"PPCISD::SHL"       , SDTIntShiftOp>;
 
+def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
+
 // Move 2 i64 values into a VSX register
 def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
                            SDTypeProfile<1, 2,
@@ -1189,77 +1195,76 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Definitions.
 
-// Pseudo-instructions:
+// Pseudo instructions:
 
 let hasCtrlDep = 1 in {
 let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKDOWN : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
                               "#ADJCALLSTACKDOWN $amt1 $amt2",
                               [(callseq_start timm:$amt1, timm:$amt2)]>;
-def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+def ADJCALLSTACKUP   : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
                               "#ADJCALLSTACKUP $amt1 $amt2",
                               [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-def UPDATE_VRSAVE    : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
+def UPDATE_VRSAVE    : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS),
                               "UPDATE_VRSAVE $rD, $rS", []>;
 }
 
 let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
                        [(set i32:$result,
                              (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
-def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
                        [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
                          
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
-let usesCustomInserter = 1,    // Expanded after instruction selection.
-    PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
   // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
   // because either operand might become the first operand in an isel, and
   // that operand cannot be r0.
-  def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+  def SELECT_CC_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crrc:$cond,
                               gprc_nor0:$T, gprc_nor0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I4",
                               []>;
-  def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+  def SELECT_CC_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crrc:$cond,
                               g8rc_nox0:$T, g8rc_nox0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I8",
                               []>;
-  def SELECT_CC_F4  : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
+  def SELECT_CC_F4  : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F4",
                               []>;
-  def SELECT_CC_F8  : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
+  def SELECT_CC_F8  : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F8",
                               []>;
-  def SELECT_CC_F16  : Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+  def SELECT_CC_F16  : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F16",
                               []>;
-  def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+  def SELECT_CC_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
                               i32imm:$BROPC), "#SELECT_CC_VRRC",
                               []>;
 
   // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
   // register bit directly.
-  def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+  def SELECT_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crbitrc:$cond,
                           gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
                           [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
-  def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+  def SELECT_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
                           g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
                           [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
 let Predicates = [HasFPU] in {
-  def SELECT_F4  : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+  def SELECT_F4  : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
                           f4rc:$T, f4rc:$F), "#SELECT_F4",
                           [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
-  def SELECT_F8  : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+  def SELECT_F8  : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
                           f8rc:$T, f8rc:$F), "#SELECT_F8",
                           [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
-  def SELECT_F16  : Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+  def SELECT_F16  : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
                           vrrc:$T, vrrc:$F), "#SELECT_F16",
                           [(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>;
 }
-  def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+  def SELECT_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
                           vrrc:$T, vrrc:$F), "#SELECT_VRRC",
                           [(set v4i32:$dst,
                                 (select i1:$cond, v4i32:$T, v4i32:$F))]>;
@@ -1268,18 +1273,18 @@ let Predicates = [HasFPU] in {
 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
 // scavenge a register for it.
 let mayStore = 1 in {
-def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
+def SPILL_CR : PPCEmitTimePseudo<(outs), (ins crrc:$cond, memri:$F),
                      "#SPILL_CR", []>;
-def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+def SPILL_CRBIT : PPCEmitTimePseudo<(outs), (ins crbitrc:$cond, memri:$F),
                          "#SPILL_CRBIT", []>;
 }
 
 // RESTORE_CR - Indicate that we're restoring the CR register (previously
 // spilled), so we'll need to scavenge a register for it.
 let mayLoad = 1 in {
-def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
+def RESTORE_CR : PPCEmitTimePseudo<(outs crrc:$cond), (ins memri:$F),
                      "#RESTORE_CR", []>;
-def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
                            "#RESTORE_CRBIT", []>;
 }
 
@@ -1305,10 +1310,10 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
 }
 
 let Defs = [LR] in
-  def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+  def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
                    PPC970_Unit_BRU;
 let Defs = [LR] in
-  def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+  def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>,
                     PPC970_Unit_BRU;
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
@@ -1506,19 +1511,19 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
 }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNdi :Pseudo< (outs),
+def TCRETURNdi :PPCEmitTimePseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
                  "#TC_RETURNd $dst $offset",
                  []>;
 
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+def TCRETURNai :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
                  "#TC_RETURNa $func $offset",
                  [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
                  "#TC_RETURNr $dst $offset",
                  []>;
 
@@ -1544,14 +1549,19 @@ def TAILBA   : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
 
 }
 
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+// While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
+// is not.
+let hasSideEffects = 1 in {
   let Defs = [CTR] in
-  def EH_SjLj_SetJmp32  : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+  def EH_SjLj_SetJmp32  : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP32",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In32BitMode]>;
+}
+
+let hasSideEffects = 1, isBarrier = 1 in {
   let isTerminator = 1 in
-  def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+  def EH_SjLj_LongJmp32 : PPCCustomInserterPseudo<(outs), (ins memr:$buf),
                             "#EH_SJLJ_LONGJMP32",
                             [(PPCeh_sjlj_longjmp addr:$buf)]>,
                           Requires<[In32BitMode]>;
@@ -1561,7 +1571,7 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
 // a terminator.  Size is set to 0 to prevent the builtin assembler
 // from emitting it.
 let isBranch = 1, isTerminator = 1, Size = 0 in {
-  def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+  def EH_SjLj_Setup : PPCEmitTimePseudo<(outs), (ins directbrtarget:$dst),
                         "#EH_SjLj_Setup\t$dst", []>;
 }
 
@@ -1648,119 +1658,117 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
 // clean this up in PPCMIPeephole with calls to
 // PPCInstrInfo::convertToImmediateForm() but we should probably not emit them
 // in the first place.
-let usesCustomInserter = 1 in {
-  let Defs = [CR0] in {
-    def ATOMIC_LOAD_ADD_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
-      [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_SUB_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
-      [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_AND_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
-      [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_OR_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
-      [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_XOR_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
-      [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_NAND_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
-      [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_MIN_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
-      [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_MAX_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
-      [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_UMIN_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
-      [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_UMAX_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
-      [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_ADD_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
-      [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_SUB_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
-      [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_AND_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
-      [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_OR_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
-      [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_XOR_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
-      [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_NAND_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
-      [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_MIN_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
-      [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_MAX_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
-      [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_UMIN_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
-      [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_UMAX_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
-      [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_ADD_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
-      [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_SUB_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
-      [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_AND_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
-      [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_OR_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
-      [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_XOR_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
-      [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_NAND_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
-      [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_MIN_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
-      [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_MAX_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
-      [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_UMIN_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
-      [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
-    def ATOMIC_LOAD_UMAX_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
-      [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
-
-    def ATOMIC_CMP_SWAP_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
-      [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
-    def ATOMIC_CMP_SWAP_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
-      [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
-    def ATOMIC_CMP_SWAP_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
-      [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
-
-    def ATOMIC_SWAP_I8 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
-      [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
-    def ATOMIC_SWAP_I16 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
-      [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
-    def ATOMIC_SWAP_I32 : Pseudo<
-      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
-      [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
-  }
+let Defs = [CR0] in {
+  def ATOMIC_LOAD_ADD_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
+    [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_SUB_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
+    [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_AND_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
+    [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_OR_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
+    [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_XOR_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
+    [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_NAND_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
+    [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_MIN_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
+    [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_MAX_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
+    [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_UMIN_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
+    [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_UMAX_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
+    [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_ADD_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
+    [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_SUB_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
+    [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_AND_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
+    [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_OR_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
+    [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_XOR_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
+    [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_NAND_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
+    [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_MIN_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
+    [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_MAX_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
+    [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_UMIN_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
+    [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_UMAX_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
+    [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_ADD_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
+    [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_SUB_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
+    [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_AND_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
+    [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_OR_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
+    [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_XOR_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
+    [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_NAND_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
+    [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_MIN_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
+    [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_MAX_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
+    [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_UMIN_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
+    [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
+  def ATOMIC_LOAD_UMAX_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
+    [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
+
+  def ATOMIC_CMP_SWAP_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
+    [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+  def ATOMIC_CMP_SWAP_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+    [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+  def ATOMIC_CMP_SWAP_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+    [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+  def ATOMIC_SWAP_I8 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
+    [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+  def ATOMIC_SWAP_I16 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
+    [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+  def ATOMIC_SWAP_I32 : PPCCustomInserterPseudo<
+    (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
+    [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
 }
 
 def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
@@ -1988,15 +1996,15 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
 
 // Unindexed (r+i) Stores.
 let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
-def STB  : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
-                   "stb $rS, $src", IIC_LdStStore,
-                   [(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH  : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
-                   "sth $rS, $src", IIC_LdStStore,
-                   [(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW  : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
-                   "stw $rS, $src", IIC_LdStStore,
-                   [(store i32:$rS, iaddr:$src)]>;
+def STB  : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst),
+                   "stb $rS, $dst", IIC_LdStStore,
+                   [(truncstorei8 i32:$rS, iaddr:$dst)]>;
+def STH  : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst),
+                   "sth $rS, $dst", IIC_LdStStore,
+                   [(truncstorei16 i32:$rS, iaddr:$dst)]>;
+def STW  : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst),
+                   "stw $rS, $dst", IIC_LdStStore,
+                   [(store i32:$rS, iaddr:$dst)]>;
 let Predicates = [HasFPU] in {
 def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
                    "stfs $rS, $dst", IIC_LdStSTFD,
@@ -2010,13 +2018,13 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
 // Unindexed (r+i) Stores with Update (preinc).
 let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
 def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
-                    "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+                    "stbu $rS, $dst", IIC_LdStSTU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
-                    "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+                    "sthu $rS, $dst", IIC_LdStSTU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
-                    "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+                    "stwu $rS, $dst", IIC_LdStSTU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 let Predicates = [HasFPU] in {
 def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
@@ -2084,19 +2092,19 @@ def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
 let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
 def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
                           (ins gprc:$rS, memrr:$dst),
-                          "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+                          "stbux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked;
 def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
                           (ins gprc:$rS, memrr:$dst),
-                          "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+                          "sthux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked;
 def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
                           (ins gprc:$rS, memrr:$dst),
-                          "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+                          "stwux $rS, $dst", IIC_LdStSTUX, []>,
                           RegConstraint<"$dst.ptrreg = $ea_res">,
                           NoEncode<"$ea_res">,
                           PPC970_DGroup_Cracked;
@@ -2543,8 +2551,8 @@ def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
 
 // A pseudo-instruction used to implement the read of the 64-bit cycle counter
 // on a 32-bit target.
-let hasSideEffects = 1, usesCustomInserter = 1 in
-def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins),
+let hasSideEffects = 1 in
+def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins),
                     "#ReadTB", []>;
 
 let Uses = [CTR] in {
@@ -2603,13 +2611,13 @@ def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>;
 // SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
 // so we'll need to scavenge a register for it.
 let mayStore = 1 in
-def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
                      "#SPILL_VRSAVE", []>;
 
 // RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
 // spilled), so we'll need to scavenge a register for it.
 let mayLoad = 1 in
-def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
                      "#RESTORE_VRSAVE", []>;
 
 let hasSideEffects = 0 in {
@@ -2648,9 +2656,9 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins),
 } // hasSideEffects = 0
 
 let Predicates = [HasFPU] in {
-// Pseudo instruction to perform FADD in round-to-zero mode.
-let usesCustomInserter = 1, Uses = [RM] in {
-  def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
+// Custom inserter instruction to perform FADD in round-to-zero mode.
+let Uses = [RM] in {
+  def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
                       [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
 }
 
@@ -3022,23 +3030,23 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
           (ADDIS $in, tblockaddress:$g)>;
 
 // Support for thread-local storage.
-def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", 
+def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT", 
                 [(set i32:$rD, (PPCppc32GOT))]>;
 
 // Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
 // This uses two output registers, the first as the real output, the second as a
 // temporary register, used internally in code generation.
-def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", 
+def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", 
                 []>, NoEncode<"$rT">;
 
-def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
                            "#LDgotTprelL32",
                            [(set i32:$rD,
                              (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
 def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
           (ADD4TLS $in, tglobaltlsaddr:$g)>;
 
-def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsgdL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                          "#ADDItlsgdL32",
                          [(set i32:$rD,
                            (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3046,7 +3054,7 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
                           "GETtlsADDR32",
                           [(set i32:$rD,
                             (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
@@ -3054,14 +3062,14 @@ def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
 // are true defines while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsgdLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
                               (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
                               "#ADDItlsgdLADDR32",
                               [(set i32:$rD,
                                 (PPCaddiTlsgdLAddr i32:$reg,
                                                    tglobaltlsaddr:$disp,
                                                    tglobaltlsaddr:$sym))]>;
-def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDItlsldL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                           "#ADDItlsldL32",
                           [(set i32:$rD,
                             (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
@@ -3069,7 +3077,7 @@ def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
 // explicitly defined when this op is created, so not mentioned here.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+def GETtlsldADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
                             "GETtlsldADDR32",
                             [(set i32:$rD,
                               (PPCgetTlsldAddr i32:$reg,
@@ -3078,31 +3086,31 @@ def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
 // are true defines while the rest of the Defs are clobbers.
 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
     Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
-def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+def ADDItlsldLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD),
                               (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
                               "#ADDItlsldLADDR32",
                               [(set i32:$rD,
                                 (PPCaddiTlsldLAddr i32:$reg,
                                                    tglobaltlsaddr:$disp,
                                                    tglobaltlsaddr:$sym))]>;
-def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDIdtprelL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                            "#ADDIdtprelL32",
                            [(set i32:$rD,
                              (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
-def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
                             "#ADDISdtprelHA32",
                             [(set i32:$rD,
                               (PPCaddisDtprelHA i32:$reg,
                                                 tglobaltlsaddr:$disp))]>;
 
 // Support for Position-independent code
-def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
                    "#LWZtoc",
                    [(set i32:$rD,
                       (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
 // Get Global (GOT) Base Register offset, from the word immediately preceding
 // the function label.
-def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
 
 
 // Standard shifts.  These are represented separately from the real shifts above
@@ -3930,21 +3938,19 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
           (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
-let usesCustomInserter = 1 in {
-def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
                              "#ANDIo_1_EQ_BIT",
                              [(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
                              "#ANDIo_1_GT_BIT",
                              [(set i1:$dst, (trunc i32:$in))]>;
 
-def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
                               "#ANDIo_1_EQ_BIT8",
                               [(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
                               "#ANDIo_1_GT_BIT8",
                               [(set i1:$dst, (trunc i64:$in))]>;
-}
 
 def : Pat<(i1 (not (trunc i32:$in))),
            (ANDIo_1_EQ_BIT $in)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index c4bb02695b36..ef589ad01fd7 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -245,32 +245,30 @@ let Uses = [RM] in {
 
   // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
   // instruction selection into a branch sequence.
-  let usesCustomInserter = 1 in {
-    def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
-                                i32imm:$BROPC), "#SELECT_CC_QFRC",
-                                []>;
-    def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
-                                i32imm:$BROPC), "#SELECT_CC_QSRC",
-                                []>;
-    def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
-                                i32imm:$BROPC), "#SELECT_CC_QBRC",
-                                []>;
-
-    // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
-    // register bit directly.
-    def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
-                            qfrc:$T, qfrc:$F), "#SELECT_QFRC",
-                            [(set v4f64:$dst,
-                                  (select i1:$cond, v4f64:$T, v4f64:$F))]>;
-    def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
-                            qsrc:$T, qsrc:$F), "#SELECT_QSRC",
-                            [(set v4f32:$dst,
-                                  (select i1:$cond, v4f32:$T, v4f32:$F))]>;
-    def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
-                            qbrc:$T, qbrc:$F), "#SELECT_QBRC",
-                            [(set v4i1:$dst,
-                                  (select i1:$cond, v4i1:$T, v4i1:$F))]>;
-  }
+  def SELECT_CC_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+                              i32imm:$BROPC), "#SELECT_CC_QFRC",
+                              []>;
+  def SELECT_CC_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+                              i32imm:$BROPC), "#SELECT_CC_QSRC",
+                              []>;
+  def SELECT_CC_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+                              i32imm:$BROPC), "#SELECT_CC_QBRC",
+                              []>;
+
+  // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+  // register bit directly.
+  def SELECT_QFRC: PPCCustomInserterPseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+                          qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+                          [(set v4f64:$dst,
+                                (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+  def SELECT_QSRC: PPCCustomInserterPseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+                          qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+                          [(set v4f32:$dst,
+                                (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+  def SELECT_QBRC: PPCCustomInserterPseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+                          qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+                          [(set v4i1:$dst,
+                                (select i1:$cond, v4i1:$T, v4i1:$F))]>;
 
   // Convert and Round Instructions
   def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 96649efdc1bc..9f5891a45f22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -831,22 +831,20 @@ def : Pat<(f64 (fpextend f32:$src)),
 }
 
 let Predicates = [HasSPE] in {
-  let usesCustomInserter = 1 in {
-def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst),
+def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
                             (ins crrc:$cond, spe4rc:$T, spe4rc:$F,
                             i32imm:$BROPC), "#SELECT_CC_SPE4",
                             []>;
-def SELECT_CC_SPE  : Pseudo<(outs sperc:$dst),
+def SELECT_CC_SPE  : PPCCustomInserterPseudo<(outs sperc:$dst),
                             (ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC),
                             "#SELECT_CC_SPE",
                             []>;
-def SELECT_SPE4  : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE4  : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crbitrc:$cond,
                           spe4rc:$T, spe4rc:$F), "#SELECT_SPE4",
                           [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
-def SELECT_SPE   : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond,
+def SELECT_SPE   : PPCCustomInserterPseudo<(outs sperc:$dst), (ins crbitrc:$cond,
                           sperc:$T, sperc:$F), "#SELECT_SPE",
                           [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
-  }
 
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
           (SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 781a3277441a..0f073388dc74 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,6 +67,10 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
 def SDTVecConv : SDTypeProfile<1, 2, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
 ]>;
+def SDTVabsd : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
+]>;
+
 
 def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -79,6 +83,7 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
 def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
 def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
 def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
+def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
 
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
                     string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -132,7 +137,7 @@ let Uses = [RM] in {
                         []>;
 
     // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
-    let isPseudo = 1, CodeSize = 3 in
+    let CodeSize = 3 in
       def XFLOADf64  : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
                               "#XFLOADf64",
                               [(set f64:$XT, (load xoaddr:$src))]>;
@@ -163,7 +168,7 @@ let Uses = [RM] in {
                         []>;
 
     // Pseudo instruction XFSTOREf64  will be expanded to STXSDX or STFDX later
-    let isPseudo = 1, CodeSize = 3 in
+    let CodeSize = 3 in
       def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
                               "#XFSTOREf64",
                               [(store f64:$XT, xoaddr:$dst)]>;
@@ -898,37 +903,36 @@ let Uses = [RM] in {
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
-let usesCustomInserter = 1,    // Expanded after instruction selection.
-    PPC970_Single = 1 in {
+let PPC970_Single = 1 in {
 
-  def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+  def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
                              (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
                              "#SELECT_CC_VSRC",
                              []>;
-  def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+  def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
                           (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
                           "#SELECT_VSRC",
                           [(set v2f64:$dst,
                                 (select i1:$cond, v2f64:$T, v2f64:$F))]>;
-  def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+  def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
                               (ins crrc:$cond, f8rc:$T, f8rc:$F,
                                i32imm:$BROPC), "#SELECT_CC_VSFRC",
                               []>;
-  def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+  def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
                            (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
                            "#SELECT_VSFRC",
                            [(set f64:$dst,
                                  (select i1:$cond, f64:$T, f64:$F))]>;
-  def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+  def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
                               (ins crrc:$cond, f4rc:$T, f4rc:$F,
                                i32imm:$BROPC), "#SELECT_CC_VSSRC",
                               []>;
-  def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+  def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
                            (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
                            "#SELECT_VSSRC",
                            [(set f32:$dst,
                                  (select i1:$cond, f32:$T, f32:$F))]>;
-} // usesCustomInserter
+} 
 } // AddedComplexity
 
 def : InstAlias<"xvmovdp $XT, $XB",
@@ -1040,17 +1044,14 @@ def : Pat<(v2f64 (bitconvert v1i128:$A)),
 def : Pat<(v1i128 (bitconvert v2f64:$A)),
           (COPY_TO_REGCLASS $A, VRRC)>;
 
-// sign extension patterns
-// To extend "in place" from v2i32 to v2i64, we have input data like:
-// | undef | i32 | undef | i32 |
-// but xvcvsxwdp expects the input in big-Endian format:
-// | i32 | undef | i32 | undef |
-// so we need to shift everything to the left by one i32 (word) before
-// the conversion.
-def : Pat<(sext_inreg v2i64:$C, v2i32),
-          (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
-def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
-          (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+def : Pat<(v2i64 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+          (COPY_TO_REGCLASS $A, VRRC)>;
 
 def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
           (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
@@ -1069,10 +1070,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
   // Stores.
   def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
             (STXVD2X $rS, xoaddr:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
-            (STXVD2X $rS, xoaddr:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
-            (STXVW4X $rS, xoaddr:$dst)>;
   def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
 }
 let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
@@ -1159,6 +1156,26 @@ def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
 def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
           (XVRSQRTEDP $A)>;
 
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+          (COPY_TO_REGCLASS 
+                 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+                        (COPY_TO_REGCLASS $vB, VSRC), 
+                        (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+          (COPY_TO_REGCLASS 
+                 (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+                        (COPY_TO_REGCLASS $vB, VSRC), 
+                        (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+          (XXSEL $vC, $vB, $vA)>;
+
 let Predicates = [IsLittleEndian] in {
 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
           (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1200,6 +1217,27 @@ def ScalarLoads {
   dag Li32 = (i32 (load xoaddr:$src));
 }
 
+def DWToSPExtractConv {
+  dag El0US1 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+  dag El1US1 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+  dag El0US2 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+  dag El1US2 = (f32 (PPCfcfidus
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+  dag El0SS1 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+  dag El1SS1 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+  dag El0SS2 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+  dag El1SS2 = (f32 (PPCfcfids
+                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+  dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+  dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
 // The following VSX instructions were introduced in Power ISA 2.07
 /* FIXME: if the operands are v2i64, these patterns will not match.
    we should define new patterns or otherwise match the same patterns
@@ -1241,23 +1279,19 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
     def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
                           "lxsiwzx $XT, $src", IIC_LdStLFD, []>;
 
-    // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
-    // would cause these Pseudos are not expanded in expandPostRAPseudos()
-    let isPseudo = 1 in {
-      // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
-      let CodeSize = 3 in
-      def XFLOADf32  : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
-                              "#XFLOADf32",
-                              [(set f32:$XT, (load xoaddr:$src))]>;
-      // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
-      def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
-                         "#LIWAX",
-                         [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
-      // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
-      def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
-                         "#LIWZX",
-                         [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
-    }
+    // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+    let CodeSize = 3 in
+    def XFLOADf32  : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
+                            "#XFLOADf32",
+                            [(set f32:$XT, (load xoaddr:$src))]>;
+    // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+    def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+                       "#LIWAX",
+                       [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+    // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+    def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+                       "#LIWZX",
+                       [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
   } // mayLoad
 
   // VSX scalar stores introduced in ISA 2.07
@@ -1268,19 +1302,15 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
     def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
                           "stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
 
-    // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
-    // would cause these Pseudos are not expanded in expandPostRAPseudos()
-    let isPseudo = 1 in {
-      // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
-      let CodeSize = 3 in
-      def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
-                              "#XFSTOREf32",
-                              [(store f32:$XT, xoaddr:$dst)]>;
-      // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
-      def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
-                         "#STIWX",
-                        [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
-    }
+    // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+    let CodeSize = 3 in
+    def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
+                            "#XFSTOREf32",
+                            [(store f32:$XT, xoaddr:$dst)]>;
+    // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+    def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
+                       "#STIWX",
+                      [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
   } // mayStore
   } // UseVSXReg = 1
 
@@ -1443,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
   } // UseVSXReg = 1
 
   let Predicates = [IsLittleEndian] in {
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+  def : Pat<DWToSPExtractConv.El0SS1,
+            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1SS1,
             (f32 (XSCVSXDSP (COPY_TO_REGCLASS
-                              (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+                              (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El0US1,
+            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1US1,
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS
-                              (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+                              (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
   }
 
   let Predicates = [IsBigEndian] in {
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
-  def : Pat<(f32 (PPCfcfids
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
-            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
-            (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
-  def : Pat<(f32 (PPCfcfidus
-                   (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
-            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El0SS1,
+            (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1SS1,
+            (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El0US1,
+            (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+  def : Pat<DWToSPExtractConv.El1US1,
+            (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
   }
 
   // Instructions for converting float to i64 feeding a store.
@@ -1993,6 +2015,10 @@ let Predicates = [IsLittleEndian, HasVSX] in
   def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
             (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
 
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+            (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+            (STXVW4X $rS, xoaddr:$dst)>;
 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
 
@@ -2671,6 +2697,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
                           "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
 
+  def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+            (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
   // Extract Exponent/Significand DP/QP
   def XSXEXPDP : XX2_RT5_XO5_XB6<60,  0, 347, "xsxexpdp", []>;
   def XSXSIGDP : XX2_RT5_XO5_XB6<60,  1, 347, "xsxsigdp", []>;
@@ -2678,6 +2707,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def XSXEXPQP : X_VT5_XO5_VB5  <63,  2, 804, "xsxexpqp", []>;
   def XSXSIGQP : X_VT5_XO5_VB5  <63, 18, 804, "xsxsigqp", []>;
 
+  def : Pat<(i64 (int_ppc_scalar_extract_expq  f128:$vA)),
+            (i64 (MFVSRD (EXTRACT_SUBREG
+                           (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
   // Vector Insert Word
   let UseVSXReg = 1 in {
   // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
@@ -3238,20 +3271,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def : Pat<(f64 (PPCVexts f64:$A, 2)),
             (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
 
-  let isPseudo = 1 in {
-    def DFLOADf32  : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
-                            "#DFLOADf32",
-                            [(set f32:$XT, (load ixaddr:$src))]>;
-    def DFLOADf64  : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
-                            "#DFLOADf64",
-                            [(set f64:$XT, (load ixaddr:$src))]>;
-    def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
-                            "#DFSTOREf32",
-                            [(store f32:$XT, ixaddr:$dst)]>;
-    def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
-                            "#DFSTOREf64",
-                            [(store f64:$XT, ixaddr:$dst)]>;
-  }
+  def DFLOADf32  : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+                          "#DFLOADf32",
+                          [(set f32:$XT, (load ixaddr:$src))]>;
+  def DFLOADf64  : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+                          "#DFLOADf64",
+                          [(set f64:$XT, (load ixaddr:$src))]>;
+  def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+                          "#DFSTOREf32",
+                          [(store f32:$XT, ixaddr:$dst)]>;
+  def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+                          "#DFSTOREf64",
+                          [(store f64:$XT, ixaddr:$dst)]>;
+
   def : Pat<(f64 (extloadf32 ixaddr:$src)),
             (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
   def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
@@ -3533,22 +3565,20 @@ let AddedComplexity = 400 in {
 }
 
 let Predicates = [HasP9Vector] in {
-  let isPseudo = 1 in {
-    let mayStore = 1 in {
-      def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
-                                            (ins spilltovsrrc:$XT, memrr:$dst),
-                                            "#SPILLTOVSR_STX", []>;
-      def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
-                                "#SPILLTOVSR_ST", []>;
-    }
-    let mayLoad = 1 in {
-      def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
-                                            (ins memrr:$src),
-                                            "#SPILLTOVSR_LDX", []>;
-      def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
-                                "#SPILLTOVSR_LD", []>;
+  let mayStore = 1 in {
+    def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+                                          (ins spilltovsrrc:$XT, memrr:$dst),
+                                          "#SPILLTOVSR_STX", []>;
+    def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+                              "#SPILLTOVSR_ST", []>;
+  }
+  let mayLoad = 1 in {
+    def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+                                          (ins memrr:$src),
+                                          "#SPILLTOVSR_LDX", []>;
+    def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+                              "#SPILLTOVSR_LD", []>;
 
-    }
   }
 }
 // Integer extend helper dags 32 -> 64
@@ -3797,6 +3827,15 @@ let AddedComplexity = 400 in {
                                               (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
   }
 
+  let Predicates = [IsBigEndian, HasP8Vector] in {
+    def : Pat<DWToSPExtractConv.BVU,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+                              (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+    def : Pat<DWToSPExtractConv.BVS,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+                              (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+  }
+
   // Big endian, available on all targets with VSX
   let Predicates = [IsBigEndian, HasVSX] in {
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3825,6 +3864,15 @@ let AddedComplexity = 400 in {
               (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
   }
 
+  let Predicates = [IsLittleEndian, HasP8Vector] in {
+    def : Pat<DWToSPExtractConv.BVU,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+                              (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+    def : Pat<DWToSPExtractConv.BVS,
+              (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+                              (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+  }
+
   let Predicates = [IsLittleEndian, HasVSX] in {
   // Little endian, available on all targets with VSX
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3869,10 +3917,11 @@ let AddedComplexity = 400 in {
                         (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
                         (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
     def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
-              (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
-                                   (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
-                      (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
-                                   (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
+              (XXPERMDI
+                (COPY_TO_REGCLASS
+                  (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+                (COPY_TO_REGCLASS
+                  (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
     def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
               (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
   }
@@ -3884,10 +3933,11 @@ let AddedComplexity = 400 in {
                         (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
                         (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
     def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
-              (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
-                                   (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
-                      (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
-                                   (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
+              (XXPERMDI
+                (COPY_TO_REGCLASS
+                  (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+                (COPY_TO_REGCLASS
+                  (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
     def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
               (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
   }
@@ -3940,10 +3990,9 @@ let AddedComplexity = 400 in {
     def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
               (v2i64 (MTVSRDD $rB, $rA))>;
     def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
-              (VMRGOW
-                (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)),
-                (v4i32
-                  (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>;
+              (MTVSRDD
+                (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+                (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
   }
 
   let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
@@ -3953,10 +4002,9 @@ let AddedComplexity = 400 in {
     def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
               (v2i64 (MTVSRDD $rB, $rA))>;
     def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
-              (VMRGOW
-                (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)),
-                (v4i32
-                  (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>;
+              (MTVSRDD
+                (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+                (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
   }
   // P9 Altivec instructions that can be used to build vectors.
   // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
@@ -4005,3 +4053,21 @@ let AddedComplexity = 400 in {
   }
 }
 
+// Put this P9Altivec related definition here since it's possible to be 
+// selected to VSX instruction xvnegsp, avoid possible undef.
+let Predicates = [HasP9Altivec] in {
+
+  def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+            (v4i32 (VABSDUW $A, $B))>;
+
+  def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+            (v8i16 (VABSDUH $A, $B))>;
+
+  def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+            (v16i8 (VABSDUB $A, $B))>;
+
+  // As PPCVABSD description, the last operand indicates whether do the
+  // sign bit flip.
+  def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+            (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
index 230a04628504..d2a09f30c0f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPfmCounters.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
+//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,10 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines intrinsics that are used by all hw codegen targets.
+// This describes the available hardware counters for PPC.
 //
 //===----------------------------------------------------------------------===//
 
-let TargetPrefix = "AMDGPU", isTarget = 1 in {
-  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+def CpuCyclesPfmCounter : PfmCounter<"CYCLES">;
+
+def DefaultPfmCounters : ProcPfmCounters {
+  let CycleCounter = CpuCyclesPfmCounter;
 }
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 1892d1e3dc26..4458b92ceb5e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +34,8 @@ STATISTIC(NumRRConvertedInPreEmit,
           "Number of r+r instructions converted to r+i in pre-emit peephole");
 STATISTIC(NumRemovedInPreEmit,
           "Number of instructions deleted in pre-emit peephole");
+STATISTIC(NumberOfSelfCopies,
+          "Number of self copy instructions eliminated");
 
 static cl::opt<bool>
 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -60,9 +63,32 @@ namespace {
         return false;
       bool Changed = false;
       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
       SmallVector<MachineInstr *, 4> InstrsToErase;
       for (MachineBasicBlock &MBB : MF) {
         for (MachineInstr &MI : MBB) {
+          unsigned Opc = MI.getOpcode();
+          // Detect self copies - these can result from running AADB.
+          if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
+            const MCInstrDesc &MCID = TII->get(Opc);
+            if (MCID.getNumOperands() == 3 &&
+                MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+                MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+              NumberOfSelfCopies++;
+              LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+              LLVM_DEBUG(MI.dump());
+              InstrsToErase.push_back(&MI);
+              continue;
+            }
+            else if (MCID.getNumOperands() == 2 &&
+                     MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+              NumberOfSelfCopies++;
+              LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
+              LLVM_DEBUG(MI.dump());
+              InstrsToErase.push_back(&MI);
+              continue;
+            }
+          }
           MachineInstr *DefMIToErase = nullptr;
           if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
             Changed = true;
@@ -74,6 +100,75 @@ namespace {
             }
           }
         }
+
+        // Eliminate conditional branch based on a constant CR bit by
+        // CRSET or CRUNSET. We eliminate the conditional branch or
+        // convert it into an unconditional branch. Also, if the CR bit
+        // is not used by other instructions, we eliminate CRSET as well.
+        auto I = MBB.getFirstInstrTerminator();
+        if (I == MBB.instr_end())
+          continue;
+        MachineInstr *Br = &*I;
+        if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
+          continue;
+        MachineInstr *CRSetMI = nullptr;
+        unsigned CRBit = Br->getOperand(0).getReg();
+        unsigned CRReg = getCRFromCRBit(CRBit);
+        bool SeenUse = false;
+        MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
+        for (It++; It != Er; It++) {
+          if (It->modifiesRegister(CRBit, TRI)) {
+            if ((It->getOpcode() == PPC::CRUNSET ||
+                 It->getOpcode() == PPC::CRSET) &&
+                It->getOperand(0).getReg() == CRBit)
+              CRSetMI = &*It;
+            break;
+          }
+          if (It->readsRegister(CRBit, TRI))
+            SeenUse = true;
+        }
+        if (!CRSetMI) continue;
+
+        unsigned CRSetOp = CRSetMI->getOpcode();
+        if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
+            (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
+          // Remove this branch since it cannot be taken.
+          InstrsToErase.push_back(Br);
+          MBB.removeSuccessor(Br->getOperand(1).getMBB());
+        }
+        else {
+          // This conditional branch is always taken. So, remove all branches
+          // and insert an unconditional branch to the destination of this.
+          MachineBasicBlock::iterator It = Br, Er = MBB.end();
+          for (; It != Er; It++) {
+            if (It->isDebugInstr()) continue;
+            assert(It->isTerminator() && "Non-terminator after a terminator");
+            InstrsToErase.push_back(&*It);
+          }
+          if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
+            ArrayRef<MachineOperand> NoCond;
+            TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
+                              NoCond, Br->getDebugLoc());
+          }
+          for (auto &Succ : MBB.successors())
+            if (Succ != Br->getOperand(1).getMBB()) {
+              MBB.removeSuccessor(Succ);
+              break;
+            }
+        }
+
+        // If the CRBit is not used by another instruction, we can eliminate
+        // CRSET/CRUNSET instruction.
+        if (!SeenUse) {
+          // We need to check use of the CRBit in successors.
+          for (auto &SuccMBB : MBB.successors())
+            if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
+              SeenUse = true;
+              break;
+            }
+          if (!SeenUse)
+            InstrsToErase.push_back(CRSetMI);
+        }
       }
       for (MachineInstr *MI : InstrsToErase) {
         LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 96923a97a82c..3d067aa8e621 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -673,12 +673,15 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned SrcReg = MI.getOperand(0).getReg();
 
-  BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
-          getCRFromCRBit(SrcReg))
-          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
+  // We need to move the CR field that contains the CR bit we are spilling.
+  // The super register may not be explicitly defined (i.e. it can be defined
+  // by a CR-logical that only defines the subreg) so we state that the CR
+  // field is undef. Also, in order to preserve the kill flag on the CR bit,
+  // we add it as an implicit use.
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
-      .addReg(getCRFromCRBit(SrcReg));
+      .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+      .addReg(SrcReg,
+              RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
 
   // If the saved register wasn't CR0LT, shift the bits left so that the bit to
   // store is the first one. Mask all but that bit.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 91a98ee4efc7..e93fe4ce3453 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -85,8 +85,6 @@ public:
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   /// We require the register scavenger.
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
@@ -141,6 +139,23 @@ public:
   // Base pointer (stack realignment) support.
   unsigned getBaseRegister(const MachineFunction &MF) const;
   bool hasBasePointer(const MachineFunction &MF) const;
+
+  /// stripRegisterPrefix - This method strips the character prefix from a
+  /// register name so that only the number is left.  Used by for linux asm.
+  static const char *stripRegisterPrefix(const char *RegName) {
+    switch (RegName[0]) {
+      case 'r':
+      case 'f':
+      case 'q': // for QPX
+      case 'v':
+        if (RegName[1] == 's')
+          return RegName + 2;
+        return RegName + 1;
+      case 'c': if (RegName[1] == 'r') return RegName + 2;
+    }
+
+    return RegName;
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 0e641cf9e00a..d0d29b6d2c7d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -85,6 +85,12 @@ class VSRL<FPR SubReg, string n> : PPCReg<n> {
   let SubRegIndices = [sub_64];
 }
 
+// VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
+// and encoding to match.
+class VSXReg<bits<6> num, string n> : PPCReg<n> {
+  let HWEncoding{5-0} = num;
+}
+
 // CR - One of the 8 4-bit condition registers
 class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
   let HWEncoding{2-0} = num;
@@ -148,7 +154,7 @@ foreach Index = 0-31 in {
 // Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
 // asm printing.
 foreach Index = 32-63 in {
-  def VSX#Index : PPCReg<"vs"#Index>;
+  def VSX#Index : VSXReg<Index, "vs"#Index>;
 }
 
 // The reprsentation of r0 when treated as the constant 0.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 5ad0a517c117..c8fe7d7eea78 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -42,7 +42,6 @@ def IIC_LdStLoad     : InstrItinClass;
 def IIC_LdStLoadUpd  : InstrItinClass;
 def IIC_LdStLoadUpdX : InstrItinClass;
 def IIC_LdStStore    : InstrItinClass;
-def IIC_LdStStoreUpd : InstrItinClass;
 def IIC_LdStDSS      : InstrItinClass;
 def IIC_LdStICBI     : InstrItinClass;
 def IIC_LdStLD       : InstrItinClass;
@@ -63,8 +62,8 @@ def IIC_LdStSLBIA    : InstrItinClass;
 def IIC_LdStSLBIE    : InstrItinClass;
 def IIC_LdStSTD      : InstrItinClass;
 def IIC_LdStSTDCX    : InstrItinClass;
-def IIC_LdStSTDU     : InstrItinClass;
-def IIC_LdStSTDUX    : InstrItinClass;
+def IIC_LdStSTU      : InstrItinClass;
+def IIC_LdStSTUX     : InstrItinClass;
 def IIC_LdStSTFD     : InstrItinClass;
 def IIC_LdStSTFDU    : InstrItinClass;
 def IIC_LdStSTVEBX   : InstrItinClass;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
index 2455e5e52de5..646822eedbe0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -280,13 +280,6 @@ def PPC440Itineraries : ProcessorItineraries<
                                  InstrStage<2, [P440_LWB]>],
                                 [1, 1, 1],
                                 [NoBypass, P440_GPR_Bypass]>,
-  InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
-                                 InstrStage<1, [P440_LRACC]>,
-                                 InstrStage<1, [P440_AGEN]>,
-                                 InstrStage<1, [P440_CRD]>,
-                                 InstrStage<2, [P440_LWB]>],
-                                [2, 1, 1, 1],
-                                [NoBypass, P440_GPR_Bypass]>,
   InstrItinData<IIC_LdStICBI,   [InstrStage<1, [P440_DISS1, P440_DISS2]>,
                                  InstrStage<1, [P440_LRACC]>,
                                  InstrStage<1, [P440_AGEN]>,
@@ -373,14 +366,14 @@ def PPC440Itineraries : ProcessorItineraries<
                                  InstrStage<2, [P440_LWB]>],
                                 [4, 1, 1],
                                 [NoBypass, P440_GPR_Bypass]>,
-  InstrItinData<IIC_LdStSTDU,   [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+  InstrItinData<IIC_LdStSTU,    [InstrStage<1, [P440_DISS1, P440_DISS2]>,
                                  InstrStage<1, [P440_LRACC]>,
                                  InstrStage<1, [P440_AGEN]>,
                                  InstrStage<1, [P440_CRD]>,
                                  InstrStage<2, [P440_LWB]>],
                                 [2, 1, 1, 1],
                                 [NoBypass, P440_GPR_Bypass]>,
-  InstrItinData<IIC_LdStSTDUX,  [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+  InstrItinData<IIC_LdStSTUX,   [InstrStage<1, [P440_DISS1, P440_DISS2]>,
                                  InstrStage<1, [P440_LRACC]>,
                                  InstrStage<1, [P440_AGEN]>,
                                  InstrStage<1, [P440_CRD]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 54cfae5d74b7..f34c1accc0fd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -81,8 +81,6 @@ def PPCA2Itineraries : ProcessorItineraries<
                                  [6, 0, 0]>,
   InstrItinData<IIC_LdStStore,   [InstrStage<1, [A2_XU]>],
                                  [0, 0, 0]>,
-  InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
-                                 [2, 0, 0, 0]>,
   InstrItinData<IIC_LdStICBI,    [InstrStage<1, [A2_XU]>],
                                  [16, 0, 0]>,
   InstrItinData<IIC_LdStSTFD,    [InstrStage<1, [A2_XU]>],
@@ -105,9 +103,9 @@ def PPCA2Itineraries : ProcessorItineraries<
                                  [82, 0, 0]>, // L2 latency
   InstrItinData<IIC_LdStSTD,     [InstrStage<1, [A2_XU]>],
                                  [0, 0, 0]>,
-  InstrItinData<IIC_LdStSTDU,    [InstrStage<1, [A2_XU]>],
+  InstrItinData<IIC_LdStSTU,     [InstrStage<1, [A2_XU]>],
                                  [2, 0, 0, 0]>,
-  InstrItinData<IIC_LdStSTDUX,   [InstrStage<1, [A2_XU]>],
+  InstrItinData<IIC_LdStSTUX,    [InstrStage<1, [A2_XU]>],
                                  [2, 0, 0, 0]>,
   InstrItinData<IIC_LdStSTDCX,   [InstrStage<1, [A2_XU]>],
                                  [82, 0, 0]>, // L2 latency
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
index d7c2bd15a258..479a970b2537 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500.td
@@ -144,7 +144,13 @@ def PPCE500Itineraries : ProcessorItineraries<
                                   InstrStage<1, [E500_LSU_0]>],
                                  [6, 1], // Latency = 3
                                  [NoBypass, E500_GPR_Bypass]>,
-  InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+  InstrItinData<IIC_LdStSTU,     [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<1, [E500_SU0, E500_SU1], 0>,
+                                  InstrStage<1, [E500_LSU_0]>],
+                                 [6, 1], // Latency = 3
+                                 [NoBypass, E500_GPR_Bypass],
+                                 2>, // 2 micro-ops
+  InstrItinData<IIC_LdStSTUX,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<1, [E500_SU0, E500_SU1], 0>,
                                   InstrStage<1, [E500_LSU_0]>],
                                  [6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index 5f95f2a79f66..d8bda073833f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -157,7 +157,13 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                   InstrStage<1, [E500mc_LSU_0]>],
                                  [6, 1], // Latency = 3
                                  [NoBypass, E500mc_GPR_Bypass]>,
-  InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+  InstrItinData<IIC_LdStSTU,     [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
+                                  InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
+                                  InstrStage<1, [E500mc_LSU_0]>],
+                                 [6, 1], // Latency = 3
+                                 [NoBypass, E500mc_GPR_Bypass],
+                                 2>, // 2 micro-ops
+  InstrItinData<IIC_LdStSTUX,    [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>,
                                   InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>,
                                   InstrStage<1, [E500mc_LSU_0]>],
                                  [6, 1], // Latency = 3
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 32f8e652dd56..3e50803955c4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -206,12 +206,6 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                   InstrStage<1, [E5500_LSU_0]>],
                                  [7, 2], // Latency = 3, Repeat rate = 1
                                  [NoBypass, E5500_GPR_Bypass]>,
-  InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
-                                  InstrStage<1, [E5500_LSU_0]>],
-                                 [7, 2], // Latency = 3, Repeat rate = 1
-                                 [NoBypass, E5500_GPR_Bypass],
-                                 2>, // 2 micro-ops
   InstrItinData<IIC_LdStICBI,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_LSU_0]>],
                                  [7, 2], // Latency = 3, Repeat rate = 1
@@ -281,13 +275,13 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                   InstrStage<1, [E5500_LSU_0]>],
                                  [7, 2], // Latency = 3, Repeat rate = 1
                                  [NoBypass, E5500_GPR_Bypass]>,
-  InstrItinData<IIC_LdStSTDU,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+  InstrItinData<IIC_LdStSTU,     [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
                                   InstrStage<1, [E5500_LSU_0]>],
                                  [7, 2], // Latency = 3, Repeat rate = 1
                                  [NoBypass, E5500_GPR_Bypass],
                                  2>, // 2 micro-ops
-  InstrItinData<IIC_LdStSTDUX,   [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+  InstrItinData<IIC_LdStSTUX,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
                                   InstrStage<1, [E5500_LSU_0]>],
                                  [7, 2], // Latency = 3, Repeat rate = 1
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 21efd8f8f6c9..0995b7200d93 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -43,7 +43,8 @@ def G3Itineraries : ProcessorItineraries<
   InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,  
   InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,  
   InstrItinData<IIC_LdStStore   , [InstrStage<2, [G3_SLU]>]>,
-  InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,  
+  InstrItinData<IIC_LdStSTU     , [InstrStage<2, [G3_SLU]>]>,  
+  InstrItinData<IIC_LdStSTUX    , [InstrStage<2, [G3_SLU]>]>,  
   InstrItinData<IIC_LdStICBI    , [InstrStage<3, [G3_SLU]>]>,
   InstrItinData<IIC_LdStSTFD    , [InstrStage<2, [G3_SLU]>]>,
   InstrItinData<IIC_LdStSTFDU   , [InstrStage<2, [G3_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 340773ef7876..1b15c7b3c7ad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -48,7 +48,8 @@ def G4Itineraries : ProcessorItineraries<
   InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
   InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
   InstrItinData<IIC_LdStStore   , [InstrStage<2, [G4_SLU]>]>,
-  InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+  InstrItinData<IIC_LdStSTU     , [InstrStage<2, [G4_SLU]>]>,
+  InstrItinData<IIC_LdStSTUX    , [InstrStage<2, [G4_SLU]>]>,
   InstrItinData<IIC_LdStDSS     , [InstrStage<2, [G4_SLU]>]>,
   InstrItinData<IIC_LdStICBI    , [InstrStage<2, [G4_SLU]>]>,
   InstrItinData<IIC_LdStSTFD    , [InstrStage<2, [G4_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 1d9f13fcb850..0044c3c6a449 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -56,7 +56,6 @@ def G4PlusItineraries : ProcessorItineraries<
   InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStStore   , [InstrStage<3, [G4P_SLU]>]>,
-  InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStDSS     , [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStICBI    , [InstrStage<3, [G4P_IU2]>]>,
   InstrItinData<IIC_LdStSTFD    , [InstrStage<3, [G4P_SLU]>]>,
@@ -73,8 +72,8 @@ def G4PlusItineraries : ProcessorItineraries<
   InstrItinData<IIC_LdStLWARX   , [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStSTD     , [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStSTDCX   , [InstrStage<3, [G4P_SLU]>]>,
-  InstrItinData<IIC_LdStSTDU    , [InstrStage<3, [G4P_SLU]>]>,  
-  InstrItinData<IIC_LdStSTDUX   , [InstrStage<3, [G4P_SLU]>]>,  
+  InstrItinData<IIC_LdStSTU     , [InstrStage<3, [G4P_SLU]>]>,  
+  InstrItinData<IIC_LdStSTUX    , [InstrStage<3, [G4P_SLU]>]>,  
   InstrItinData<IIC_LdStSTVEBX  , [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStSTWCX   , [InstrStage<3, [G4P_SLU]>]>,
   InstrItinData<IIC_LdStSync    , [InstrStage<35, [G4P_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index b5a9f96d45ae..c802b80170fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -54,7 +54,6 @@ def G5Itineraries : ProcessorItineraries<
   InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
   InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
   InstrItinData<IIC_LdStStore   , [InstrStage<3, [G5_SLU]>]>,
-  InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
   InstrItinData<IIC_LdStDSS     , [InstrStage<10, [G5_SLU]>]>,
   InstrItinData<IIC_LdStICBI    , [InstrStage<40, [G5_SLU]>]>,
   InstrItinData<IIC_LdStSTFD    , [InstrStage<4, [G5_SLU]>]>,
@@ -76,8 +75,8 @@ def G5Itineraries : ProcessorItineraries<
   InstrItinData<IIC_LdStSLBIA   , [InstrStage<40, [G5_SLU]>]>, // needs work
   InstrItinData<IIC_LdStSLBIE   , [InstrStage<2, [G5_SLU]>]>,
   InstrItinData<IIC_LdStSTD     , [InstrStage<3, [G5_SLU]>]>,
-  InstrItinData<IIC_LdStSTDU    , [InstrStage<3, [G5_SLU]>]>,
-  InstrItinData<IIC_LdStSTDUX   , [InstrStage<3, [G5_SLU]>]>,
+  InstrItinData<IIC_LdStSTU     , [InstrStage<3, [G5_SLU]>]>,
+  InstrItinData<IIC_LdStSTUX    , [InstrStage<3, [G5_SLU]>]>,
   InstrItinData<IIC_LdStSTDCX   , [InstrStage<11, [G5_SLU]>]>,
   InstrItinData<IIC_LdStSTVEBX  , [InstrStage<5, [G5_SLU]>]>,
   InstrItinData<IIC_LdStSTWCX   , [InstrStage<11, [G5_SLU]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index a8678f56900e..1d6e509819da 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -114,6 +114,10 @@ def P7Itineraries : ProcessorItineraries<
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
                                   [4, 1, 1]>,
+  InstrItinData<IIC_IntMulHD    , [InstrStage<1, [P7_DU1, P7_DU2,
+                                                  P7_DU3, P7_DU4], 0>,
+                                   InstrStage<1, [P7_FX1, P7_FX2]>],
+                                  [4, 1, 1]>,
   InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P7_DU1, P7_DU2,
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -126,6 +130,10 @@ def P7Itineraries : ProcessorItineraries<
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
                                    [1, 1, 1]>,
+  InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2,
+                                                  P7_DU3, P7_DU4], 0>,
+                                   InstrStage<1, [P7_FX1, P7_FX2]>],
+                                   [1, 1, 1]>,
   InstrItinData<IIC_IntShift    , [InstrStage<1, [P7_DU1, P7_DU2,
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
@@ -253,13 +261,13 @@ def P7Itineraries : ProcessorItineraries<
                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
                                   [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTDU    , [InstrStage<1, [P7_DU1], 0>,
+  InstrItinData<IIC_LdStSTU     , [InstrStage<1, [P7_DU1], 0>,
                                    InstrStage<1, [P7_DU2], 0>,
                                    InstrStage<1, [P7_LS1, P7_LS2], 0>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>,
                                    InstrStage<1, [P7_FX1, P7_FX2]>],
                                   [2, 1, 1, 1]>,
-  InstrItinData<IIC_LdStSTDUX   , [InstrStage<1, [P7_DU1], 0>,
+  InstrItinData<IIC_LdStSTUX    , [InstrStage<1, [P7_DU1], 0>,
                                    InstrStage<1, [P7_DU2], 0>,
                                    InstrStage<1, [P7_DU3], 0>,
                                    InstrStage<1, [P7_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
index 79963dd6a3e9..ff39dfda7016 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -90,6 +90,10 @@ def P8Itineraries : ProcessorItineraries<
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
                                   [4, 1, 1]>,
+  InstrItinData<IIC_IntMulHD    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
+                                  [4, 1, 1]>,
   InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -102,6 +106,10 @@ def P8Itineraries : ProcessorItineraries<
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
                                    [1, 1, 1]>,
+  InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_FXU1, P8_FXU2]>],
+                                   [1, 1, 1]>,
   InstrItinData<IIC_IntShift    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
@@ -259,14 +267,14 @@ def P8Itineraries : ProcessorItineraries<
                                    InstrStage<1, [P8_LU1, P8_LU2,
                                                   P8_LSU1, P8_LSU2]>]
                                   [1, 1, 1]>,
-  InstrItinData<IIC_LdStSTDU    , [InstrStage<1, [P8_DU1], 0>,
+  InstrItinData<IIC_LdStSTU     , [InstrStage<1, [P8_DU1], 0>,
                                    InstrStage<1, [P8_DU2], 0>,
                                    InstrStage<1, [P8_LU1, P8_LU2,
                                                   P8_LSU1, P8_LSU2], 0>,
                                    InstrStage<1, [P8_FXU1, P8_FXU2]>],
                                   [2, 1, 1, 1]>,
   // First+last
-  InstrItinData<IIC_LdStSTDUX   , [InstrStage<1, [P8_DU1], 0>,
+  InstrItinData<IIC_LdStSTUX    , [InstrStage<1, [P8_DU1], 0>,
                                    InstrStage<1, [P8_DU2], 0>,
                                    InstrStage<1, [P8_DU3], 0>,
                                    InstrStage<1, [P8_DU4], 0>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index e1a480117315..a1e625c855e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -33,6 +33,12 @@ def P9Model : SchedMachineModel {
   // A dispatch group is 6 instructions.
   let LoopMicroOpBufferSize = 60;
 
+  // As iops are dispatched to a slice, they are held in an independent slice
+  // issue queue until all register sources and other dependencies have been
+  // resolved and they can be issued. Each of four execution slices has an
+  // 11-entry iop issue queue.
+  let MicroOpBufferSize = 44;
+
   let CompleteModel = 1;
 
   // Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index a8d7955ef548..580d057602f5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -181,6 +181,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 
 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
                                                  const TargetOptions &Options) {
+  if (TT.isOSDarwin())
+    report_fatal_error("Darwin is no longer supported for PowerPC");
+  
   if (Options.MCOptions.getABIName().startswith("elfv1"))
     return PPCTargetMachine::PPC_ABI_ELFv1;
   else if (Options.MCOptions.getABIName().startswith("elfv2"))
@@ -211,19 +214,24 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
   if (TT.isOSDarwin())
     return Reloc::DynamicNoPIC;
 
-  // Non-darwin 64-bit platforms are PIC by default.
-  if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
+  // Big Endian PPC is PIC by default.
+  if (TT.getArch() == Triple::ppc64)
     return Reloc::PIC_;
 
-  // 32-bit is static by default.
+  // Rest are static by default.
   return Reloc::Static;
 }
 
-static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
-                                              Optional<CodeModel::Model> CM,
-                                              bool JIT) {
-  if (CM)
+static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
+                                                 Optional<CodeModel::Model> CM,
+                                                 bool JIT) {
+  if (CM) {
+    if (*CM == CodeModel::Tiny)
+      report_fatal_error("Target does not support the tiny CodeModel");
+    if (*CM == CodeModel::Kernel)
+      report_fatal_error("Target does not support the kernel CodeModel");
     return *CM;
+  }
   if (!TT.isOSDarwin() && !JIT &&
       (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
     return CodeModel::Medium;
@@ -243,7 +251,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
                         computeFSAdditions(FS, OL, TT), Options,
                         getEffectiveRelocModel(TT, RM),
-                        getEffectiveCodeModel(TT, CM, JIT), OL),
+                        getEffectivePPCCodeModel(TT, CM, JIT), OL),
       TLOF(createTLOF(getTargetTriple())),
       TargetABI(computeTargetABI(TT, Options)) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b0da9b5a6d70..bc9bcab83a0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -473,7 +473,14 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                            unsigned Factor,
                                            ArrayRef<unsigned> Indices,
                                            unsigned Alignment,
-                                           unsigned AddressSpace) {
+                                           unsigned AddressSpace,
+                                           bool UseMaskForCond,
+                                           bool UseMaskForGaps) {
+  if (UseMaskForCond || UseMaskForGaps)
+    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace,
+                                             UseMaskForCond, UseMaskForGaps);
+
   assert(isa<VectorType>(VecTy) &&
          "Expect a vector type for interleaved memory op");
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 2ee2b3eb8084..9221a910288a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -90,7 +90,9 @@ public:
                                  unsigned Factor,
                                  ArrayRef<unsigned> Indices,
                                  unsigned Alignment,
-                                 unsigned AddressSpace);
+                                 unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
 
   /// @}
 };
diff --git a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 9a455c105482..1d1112cc5124 100644
--- a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -7,12 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
 #include "MCTargetDesc/RISCVMCExpr.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
 #include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "Utils/RISCVMatInt.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -39,6 +43,8 @@ namespace {
 struct RISCVOperand;
 
 class RISCVAsmParser : public MCTargetAsmParser {
+  SmallVector<FeatureBitset, 4> FeatureBitStack;
+
   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
   bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
 
@@ -73,6 +79,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
   // synthesize the desired immedate value into the destination register.
   void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
 
+  // Helper to emit pseudo instruction "lla" used in PC-rel addressing.
+  void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
   /// Helper for processing MC instructions that have been successfully matched
   /// by MatchAndEmitInstruction. Modifications to the emitted instructions,
   /// like the expansion of pseudo instructions (e.g., "li"), can be performed
@@ -83,13 +92,16 @@ class RISCVAsmParser : public MCTargetAsmParser {
 #define GET_ASSEMBLER_HEADER
 #include "RISCVGenAsmMatcher.inc"
 
+  OperandMatchResultTy parseCSRSystemRegister(OperandVector &Operands);
   OperandMatchResultTy parseImmediate(OperandVector &Operands);
   OperandMatchResultTy parseRegister(OperandVector &Operands,
                                      bool AllowParens = false);
   OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
   OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
+  OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
+  OperandMatchResultTy parseJALOffset(OperandVector &Operands);
 
-  bool parseOperand(OperandVector &Operands, bool ForceImmediate);
+  bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
 
   bool parseDirectiveOption();
 
@@ -108,6 +120,21 @@ class RISCVAsmParser : public MCTargetAsmParser {
           ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
     }
   }
+
+  void pushFeatureBits() {
+    FeatureBitStack.push_back(getSTI().getFeatureBits());
+  }
+
+  bool popFeatureBits() {
+    if (FeatureBitStack.empty())
+      return true;
+
+    FeatureBitset FeatureBits = FeatureBitStack.pop_back_val();
+    copySTI().setFeatureBits(FeatureBits);
+    setAvailableFeatures(ComputeAvailableFeatures(FeatureBits));
+
+    return false;
+  }
 public:
   enum RISCVMatchResultTy {
     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -139,6 +166,7 @@ struct RISCVOperand : public MCParsedAsmOperand {
     Token,
     Register,
     Immediate,
+    SystemRegister
   } Kind;
 
   bool IsRV64;
@@ -151,11 +179,20 @@ struct RISCVOperand : public MCParsedAsmOperand {
     const MCExpr *Val;
   };
 
+  struct SysRegOp {
+    const char *Data;
+    unsigned Length;
+    unsigned Encoding;
+    // FIXME: Add the Encoding parsed fields as needed for checks,
+    // e.g.: read/write or user/supervisor/machine privileges.
+  };
+
   SMLoc StartLoc, EndLoc;
   union {
     StringRef Tok;
     RegOp Reg;
     ImmOp Imm;
+    struct SysRegOp SysReg;
   };
 
   RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -176,6 +213,9 @@ public:
     case Token:
       Tok = o.Tok;
       break;
+    case SystemRegister:
+      SysReg = o.SysReg;
+      break;
     }
   }
 
@@ -183,19 +223,22 @@ public:
   bool isReg() const override { return Kind == Register; }
   bool isImm() const override { return Kind == Immediate; }
   bool isMem() const override { return false; }
+  bool isSystemRegister() const { return Kind == SystemRegister; }
 
-  bool evaluateConstantImm(int64_t &Imm, RISCVMCExpr::VariantKind &VK) const {
-    const MCExpr *Val = getImm();
-    bool Ret = false;
-    if (auto *RE = dyn_cast<RISCVMCExpr>(Val)) {
-      Ret = RE->evaluateAsConstant(Imm);
+  static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
+                                  RISCVMCExpr::VariantKind &VK) {
+    if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
       VK = RE->getKind();
-    } else if (auto CE = dyn_cast<MCConstantExpr>(Val)) {
-      Ret = true;
+      return RE->evaluateAsConstant(Imm);
+    }
+
+    if (auto CE = dyn_cast<MCConstantExpr>(Expr)) {
       VK = RISCVMCExpr::VK_RISCV_None;
       Imm = CE->getValue();
+      return true;
     }
-    return Ret;
+
+    return false;
   }
 
   // True if operand is a symbol with no modifiers, or a constant with no
@@ -205,7 +248,7 @@ public:
     RISCVMCExpr::VariantKind VK;
     if (!isImm())
       return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     bool IsValid;
     if (!IsConstantImm)
       IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
@@ -220,12 +263,14 @@ public:
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
     // Must be of 'immediate' type but not a constant.
-    if (!isImm() || evaluateConstantImm(Imm, VK))
+    if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
       return false;
     return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
+  bool isCSRSystemRegister() const { return isSystemRegister(); }
+
   /// Return true if the operand is a valid for the fence instruction e.g.
   /// ('iorw').
   bool isFenceArg() const {
@@ -265,12 +310,14 @@ public:
     return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid;
   }
 
-  bool isImmXLen() const {
+  bool isImmXLenLI() const {
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
     if (!isImm())
       return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    if (VK == RISCVMCExpr::VK_RISCV_LO || VK == RISCVMCExpr::VK_RISCV_PCREL_LO)
+      return true;
     // Given only Imm, ensuring that the actually specified constant is either
     // a signed or unsigned 64-bit number is unfortunately impossible.
     bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm);
@@ -282,7 +329,8 @@ public:
     RISCVMCExpr::VariantKind VK;
     if (!isImm())
       return false;
-    if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None)
+    if (!evaluateConstantImm(getImm(), Imm, VK) ||
+        VK != RISCVMCExpr::VK_RISCV_None)
       return false;
     return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
   }
@@ -292,7 +340,8 @@ public:
     RISCVMCExpr::VariantKind VK;
     if (!isImm())
       return false;
-    if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None)
+    if (!evaluateConstantImm(getImm(), Imm, VK) ||
+        VK != RISCVMCExpr::VK_RISCV_None)
       return false;
     if (Imm == 0)
       return false;
@@ -304,7 +353,7 @@ public:
     RISCVMCExpr::VariantKind VK;
     if (!isImm())
       return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
   }
 
@@ -313,66 +362,68 @@ public:
     RISCVMCExpr::VariantKind VK;
     if (!isImm())
       return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isSImm6() const {
+    if (!isImm())
+      return false;
     RISCVMCExpr::VariantKind VK;
     int64_t Imm;
-    bool IsValid;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
-    if (!IsConstantImm)
-      IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
-    else
-      IsValid = isInt<6>(Imm);
-    return IsValid &&
-           (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    return IsConstantImm && isInt<6>(Imm) &&
+           VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isSImm6NonZero() const {
+    if (!isImm())
+      return false;
     RISCVMCExpr::VariantKind VK;
     int64_t Imm;
-    bool IsValid;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
-    if (!IsConstantImm)
-      IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
-    else
-      IsValid = ((Imm != 0) && isInt<6>(Imm));
-    return IsValid &&
-           (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    return IsConstantImm && isInt<6>(Imm) && (Imm != 0) &&
+           VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isCLUIImm() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && (Imm != 0) &&
            (isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) &&
-            VK == RISCVMCExpr::VK_RISCV_None;
+           VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isUImm7Lsb00() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isShiftedUInt<5, 2>(Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isUImm8Lsb00() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isShiftedUInt<6, 2>(Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isUImm8Lsb000() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isShiftedUInt<5, 3>(Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
@@ -380,17 +431,21 @@ public:
   bool isSImm9Lsb0() const { return isBareSimmNLsb0<9>(); }
 
   bool isUImm9Lsb000() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isShiftedUInt<6, 3>(Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
   bool isUImm10Lsb00NonZero() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
@@ -401,54 +456,63 @@ public:
     bool IsValid;
     if (!isImm())
       return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     if (!IsConstantImm)
       IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
     else
       IsValid = isInt<12>(Imm);
-    return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
+    return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
                        VK == RISCVMCExpr::VK_RISCV_LO ||
                        VK == RISCVMCExpr::VK_RISCV_PCREL_LO);
   }
 
   bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
 
-  bool isUImm12() const {
-    int64_t Imm;
-    RISCVMCExpr::VariantKind VK;
-    if (!isImm())
-      return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
-    return IsConstantImm && isUInt<12>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
-  }
-
   bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
 
   bool isSImm10Lsb0000NonZero() const {
+    if (!isImm())
+      return false;
     int64_t Imm;
     RISCVMCExpr::VariantKind VK;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) &&
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
-  bool isUImm20() const {
+  bool isUImm20LUI() const {
     RISCVMCExpr::VariantKind VK;
     int64_t Imm;
     bool IsValid;
     if (!isImm())
       return false;
-    bool IsConstantImm = evaluateConstantImm(Imm, VK);
-    if (!IsConstantImm)
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    if (!IsConstantImm) {
       IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
-    else
-      IsValid = isUInt<20>(Imm);
-    return IsValid && (VK == RISCVMCExpr::VK_RISCV_None ||
-                       VK == RISCVMCExpr::VK_RISCV_HI ||
-                       VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+      return IsValid && VK == RISCVMCExpr::VK_RISCV_HI;
+    } else {
+      return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
+                                 VK == RISCVMCExpr::VK_RISCV_HI);
+    }
+  }
+
+  bool isUImm20AUIPC() const {
+    RISCVMCExpr::VariantKind VK;
+    int64_t Imm;
+    bool IsValid;
+    if (!isImm())
+      return false;
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    if (!IsConstantImm) {
+      IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
+      return IsValid && VK == RISCVMCExpr::VK_RISCV_PCREL_HI;
+    } else {
+      return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
+                                 VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+    }
   }
 
-  bool isSImm21Lsb0() const { return isBareSimmNLsb0<21>(); }
+  bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); }
 
   /// getStartLoc - Gets location of the first token of this operand
   SMLoc getStartLoc() const override { return StartLoc; }
@@ -462,6 +526,11 @@ public:
     return Reg.RegNum;
   }
 
+  StringRef getSysReg() const {
+    assert(Kind == SystemRegister && "Invalid access!");
+    return StringRef(SysReg.Data, SysReg.Length);
+  }
+
   const MCExpr *getImm() const {
     assert(Kind == Immediate && "Invalid type access!");
     return Imm.Val;
@@ -484,6 +553,9 @@ public:
     case Token:
       OS << "'" << getToken() << "'";
       break;
+    case SystemRegister:
+      OS << "<sysreg: " << getSysReg() << '>';
+      break;
     }
   }
 
@@ -517,16 +589,22 @@ public:
     return Op;
   }
 
+  static std::unique_ptr<RISCVOperand>
+  createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) {
+    auto Op = make_unique<RISCVOperand>(SystemRegister);
+    Op->SysReg.Data = Str.data();
+    Op->SysReg.Length = Str.size();
+    Op->SysReg.Encoding = Encoding;
+    Op->StartLoc = S;
+    Op->IsRV64 = IsRV64;
+    return Op;
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     assert(Expr && "Expr shouldn't be null!");
     int64_t Imm = 0;
-    bool IsConstant = false;
-    if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
-      IsConstant = RE->evaluateAsConstant(Imm);
-    } else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
-      IsConstant = true;
-      Imm = CE->getValue();
-    }
+    RISCVMCExpr::VariantKind VK;
+    bool IsConstant = evaluateConstantImm(Expr, Imm, VK);
 
     if (IsConstant)
       Inst.addOperand(MCOperand::createImm(Imm));
@@ -553,16 +631,22 @@ public:
     unsigned Imm = 0;
     for (char c : SE->getSymbol().getName()) {
       switch (c) {
-        default: llvm_unreachable("FenceArg must contain only [iorw]");
-        case 'i': Imm |= RISCVFenceField::I; break;
-        case 'o': Imm |= RISCVFenceField::O; break;
-        case 'r': Imm |= RISCVFenceField::R; break;
-        case 'w': Imm |= RISCVFenceField::W; break;
+      default:
+        llvm_unreachable("FenceArg must contain only [iorw]");
+      case 'i': Imm |= RISCVFenceField::I; break;
+      case 'o': Imm |= RISCVFenceField::O; break;
+      case 'r': Imm |= RISCVFenceField::R; break;
+      case 'w': Imm |= RISCVFenceField::W; break;
       }
     }
     Inst.addOperand(MCOperand::createImm(Imm));
   }
 
+  void addCSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
+  }
+
   // Returns the rounding mode represented by this RISCVOperand. Should only
   // be called after checking isFRMArg.
   RISCVFPRndMode::RoundingMode getRoundingMode() const {
@@ -590,40 +674,40 @@ public:
 // information from TableGen.
 unsigned convertFPR32ToFPR64(unsigned Reg) {
   switch (Reg) {
-    default:
-      llvm_unreachable("Not a recognised FPR32 register");
-    case RISCV::F0_32: return RISCV::F0_64;
-    case RISCV::F1_32: return RISCV::F1_64;
-    case RISCV::F2_32: return RISCV::F2_64;
-    case RISCV::F3_32: return RISCV::F3_64;
-    case RISCV::F4_32: return RISCV::F4_64;
-    case RISCV::F5_32: return RISCV::F5_64;
-    case RISCV::F6_32: return RISCV::F6_64;
-    case RISCV::F7_32: return RISCV::F7_64;
-    case RISCV::F8_32: return RISCV::F8_64;
-    case RISCV::F9_32: return RISCV::F9_64;
-    case RISCV::F10_32: return RISCV::F10_64;
-    case RISCV::F11_32: return RISCV::F11_64;
-    case RISCV::F12_32: return RISCV::F12_64;
-    case RISCV::F13_32: return RISCV::F13_64;
-    case RISCV::F14_32: return RISCV::F14_64;
-    case RISCV::F15_32: return RISCV::F15_64;
-    case RISCV::F16_32: return RISCV::F16_64;
-    case RISCV::F17_32: return RISCV::F17_64;
-    case RISCV::F18_32: return RISCV::F18_64;
-    case RISCV::F19_32: return RISCV::F19_64;
-    case RISCV::F20_32: return RISCV::F20_64;
-    case RISCV::F21_32: return RISCV::F21_64;
-    case RISCV::F22_32: return RISCV::F22_64;
-    case RISCV::F23_32: return RISCV::F23_64;
-    case RISCV::F24_32: return RISCV::F24_64;
-    case RISCV::F25_32: return RISCV::F25_64;
-    case RISCV::F26_32: return RISCV::F26_64;
-    case RISCV::F27_32: return RISCV::F27_64;
-    case RISCV::F28_32: return RISCV::F28_64;
-    case RISCV::F29_32: return RISCV::F29_64;
-    case RISCV::F30_32: return RISCV::F30_64;
-    case RISCV::F31_32: return RISCV::F31_64;
+  default:
+    llvm_unreachable("Not a recognised FPR32 register");
+  case RISCV::F0_32: return RISCV::F0_64;
+  case RISCV::F1_32: return RISCV::F1_64;
+  case RISCV::F2_32: return RISCV::F2_64;
+  case RISCV::F3_32: return RISCV::F3_64;
+  case RISCV::F4_32: return RISCV::F4_64;
+  case RISCV::F5_32: return RISCV::F5_64;
+  case RISCV::F6_32: return RISCV::F6_64;
+  case RISCV::F7_32: return RISCV::F7_64;
+  case RISCV::F8_32: return RISCV::F8_64;
+  case RISCV::F9_32: return RISCV::F9_64;
+  case RISCV::F10_32: return RISCV::F10_64;
+  case RISCV::F11_32: return RISCV::F11_64;
+  case RISCV::F12_32: return RISCV::F12_64;
+  case RISCV::F13_32: return RISCV::F13_64;
+  case RISCV::F14_32: return RISCV::F14_64;
+  case RISCV::F15_32: return RISCV::F15_64;
+  case RISCV::F16_32: return RISCV::F16_64;
+  case RISCV::F17_32: return RISCV::F17_64;
+  case RISCV::F18_32: return RISCV::F18_64;
+  case RISCV::F19_32: return RISCV::F19_64;
+  case RISCV::F20_32: return RISCV::F20_64;
+  case RISCV::F21_32: return RISCV::F21_64;
+  case RISCV::F22_32: return RISCV::F22_64;
+  case RISCV::F23_32: return RISCV::F23_64;
+  case RISCV::F24_32: return RISCV::F24_64;
+  case RISCV::F25_32: return RISCV::F25_64;
+  case RISCV::F26_32: return RISCV::F26_64;
+  case RISCV::F27_32: return RISCV::F27_64;
+  case RISCV::F28_32: return RISCV::F28_64;
+  case RISCV::F29_32: return RISCV::F29_64;
+  case RISCV::F30_32: return RISCV::F30_64;
+  case RISCV::F31_32: return RISCV::F31_64;
   }
 }
 
@@ -663,7 +747,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                              bool MatchingInlineAsm) {
   MCInst Inst;
 
-  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+  auto Result =
+    MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+  switch (Result) {
   default:
     break;
   case Match_Success:
@@ -684,7 +770,21 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     }
     return Error(ErrorLoc, "invalid operand for instruction");
   }
-  case Match_InvalidImmXLen:
+  }
+
+  // Handle the case when the error message is of specific type
+  // other than the generic Match_InvalidOperand, and the
+  // corresponding operand is missing.
+  if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+        return Error(ErrorLoc, "too few operands for instruction");
+  }
+
+  switch(Result) {
+  default:
+    break;
+  case Match_InvalidImmXLenLI:
     if (isRV64()) {
       SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
       return Error(ErrorLoc, "operand must be a constant 64-bit integer");
@@ -706,8 +806,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
                                       (1 << 5) - 1);
   case Match_InvalidSImm6NonZero:
-    return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
-                                      (1 << 5) - 1,
+    return generateImmOutOfRangeError(
+        Operands, ErrorInfo, -(1 << 5), (1 << 5) - 1,
         "immediate must be non-zero in the range");
   case Match_InvalidCLUIImm:
     return generateImmOutOfRangeError(
@@ -742,24 +842,36 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         Operands, ErrorInfo, -(1 << 9), (1 << 9) - 16,
         "immediate must be a multiple of 16 bytes and non-zero in the range");
   case Match_InvalidSImm12:
-    return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11),
-                                      (1 << 11) - 1);
+    return generateImmOutOfRangeError(
+        Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1,
+        "operand must be a symbol with %lo/%pcrel_lo modifier or an integer in "
+        "the range");
   case Match_InvalidSImm12Lsb0:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
         "immediate must be a multiple of 2 bytes in the range");
-  case Match_InvalidUImm12:
-    return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1);
   case Match_InvalidSImm13Lsb0:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2,
         "immediate must be a multiple of 2 bytes in the range");
-  case Match_InvalidUImm20:
-    return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1);
-  case Match_InvalidSImm21Lsb0:
+  case Match_InvalidUImm20LUI:
+    return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1,
+                                      "operand must be a symbol with %hi() "
+                                      "modifier or an integer in the range");
+  case Match_InvalidUImm20AUIPC:
+    return generateImmOutOfRangeError(
+        Operands, ErrorInfo, 0, (1 << 20) - 1,
+        "operand must be a symbol with %pcrel_hi() modifier or an integer in "
+        "the range");
+  case Match_InvalidSImm21Lsb0JAL:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
         "immediate must be a multiple of 2 bytes in the range");
+  case Match_InvalidCSRSystemRegister: {
+    return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1,
+                                      "operand must be a valid system register "
+                                      "name or an integer in the range");
+  }
   case Match_InvalidFenceArg: {
     SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
     return Error(
@@ -842,9 +954,9 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
   return MatchOperand_Success;
 }
 
-OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+OperandMatchResultTy
+RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
   SMLoc S = getLoc();
-  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
   const MCExpr *Res;
 
   switch (getLexer().getKind()) {
@@ -854,18 +966,77 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
-  case AsmToken::String:
+  case AsmToken::String: {
     if (getParser().parseExpression(Res))
       return MatchOperand_ParseFail;
-    break;
+
+    auto *CE = dyn_cast<MCConstantExpr>(Res);
+    if (CE) {
+      int64_t Imm = CE->getValue();
+      if (isUInt<12>(Imm)) {
+        auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+        // Accept an immediate representing a named or un-named Sys Reg
+        // if the range is valid, regardless of the required features.
+        Operands.push_back(RISCVOperand::createSysReg(
+            SysReg ? SysReg->Name : "", S, Imm, isRV64()));
+        return MatchOperand_Success;
+      }
+    }
+
+    Twine Msg = "immediate must be an integer in the range";
+    Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+    return MatchOperand_ParseFail;
+  }
   case AsmToken::Identifier: {
     StringRef Identifier;
     if (getParser().parseIdentifier(Identifier))
       return MatchOperand_ParseFail;
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
-    Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
-    break;
+
+    auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
+    // Accept a named Sys Reg if the required features are present.
+    if (SysReg) {
+      if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) {
+        Error(S, "system register use requires an option to be enabled");
+        return MatchOperand_ParseFail;
+      }
+      Operands.push_back(RISCVOperand::createSysReg(
+          Identifier, S, SysReg->Encoding, isRV64()));
+      return MatchOperand_Success;
+    }
+
+    Twine Msg = "operand must be a valid system register name "
+                "or an integer in the range";
+    Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+    return MatchOperand_ParseFail;
+  }
+  case AsmToken::Percent: {
+    // Discard operand with modifier.
+    Twine Msg = "immediate must be an integer in the range";
+    Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
+    return MatchOperand_ParseFail;
+  }
   }
+
+  return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+  const MCExpr *Res;
+
+  switch (getLexer().getKind()) {
+  default:
+    return MatchOperand_NoMatch;
+  case AsmToken::LParen:
+  case AsmToken::Minus:
+  case AsmToken::Plus:
+  case AsmToken::Integer:
+  case AsmToken::String:
+  case AsmToken::Identifier:
+    if (getParser().parseExpression(Res))
+      return MatchOperand_ParseFail;
+    break;
   case AsmToken::Percent:
     return parseOperandWithModifier(Operands);
   }
@@ -914,6 +1085,41 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+  const MCExpr *Res;
+
+  if (getLexer().getKind() != AsmToken::Identifier)
+    return MatchOperand_NoMatch;
+
+  StringRef Identifier;
+  if (getParser().parseIdentifier(Identifier))
+    return MatchOperand_ParseFail;
+
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
+  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+  Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
+  // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
+  // both being acceptable forms. When parsing `jal ra, foo` this function
+  // will be called for the `ra` register operand in an attempt to match the
+  // single-operand alias. parseJALOffset must fail for this case. It would
+  // seem logical to try parse the operand using parseImmediate and return
+  // NoMatch if the next token is a comma (meaning we must be parsing a jal in
+  // the second form rather than the first). We can't do this as there's no
+  // way of rewinding the lexer state. Instead, return NoMatch if this operand
+  // is an identifier and is followed by a comma.
+  if (getLexer().is(AsmToken::Identifier) &&
+      getLexer().peekTok().is(AsmToken::Comma))
+    return MatchOperand_NoMatch;
+
+  return parseImmediate(Operands);
+}
+
 OperandMatchResultTy
 RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
   if (getLexer().isNot(AsmToken::LParen)) {
@@ -942,13 +1148,19 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
 
 /// Looks at a token type and creates the relevant operand from this
 /// information, adding to Operands. If operand was parsed, returns false, else
-/// true. If ForceImmediate is true, no attempt will be made to parse the
-/// operand as a register, which is needed for pseudoinstructions such as
-/// call.
-bool RISCVAsmParser::parseOperand(OperandVector &Operands,
-                                  bool ForceImmediate) {
-  // Attempt to parse token as register, unless ForceImmediate.
-  if (!ForceImmediate && parseRegister(Operands, true) == MatchOperand_Success)
+/// true.
+bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy Result =
+      MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
+  if (Result == MatchOperand_Success)
+    return false;
+  if (Result == MatchOperand_ParseFail)
+    return true;
+
+  // Attempt to parse token as a register.
+  if (parseRegister(Operands, true) == MatchOperand_Success)
     return false;
 
   // Attempt to parse token as an immediate
@@ -967,6 +1179,21 @@ bool RISCVAsmParser::parseOperand(OperandVector &Operands,
 bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
                                       StringRef Name, SMLoc NameLoc,
                                       OperandVector &Operands) {
+  // Ensure that if the instruction occurs when relaxation is enabled,
+  // relocations are forced for the file. Ideally this would be done when there
+  // is enough information to reliably determine if the instruction itself may
+  // cause relaxations. Unfortunately instruction processing stage occurs in the
+  // same pass as relocation emission, so it's too late to set a 'sticky bit'
+  // for the entire file.
+  if (getSTI().getFeatureBits()[RISCV::FeatureRelax]) {
+    auto *Assembler = getTargetStreamer().getStreamer().getAssemblerPtr();
+    if (Assembler != nullptr) {
+      RISCVAsmBackend &MAB =
+          static_cast<RISCVAsmBackend &>(Assembler->getBackend());
+      MAB.setForceRelocs();
+    }
+  }
+
   // First operand is token for instruction
   Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64()));
 
@@ -975,18 +1202,20 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     return false;
 
   // Parse first operand
-  bool ForceImmediate = (Name == "call" || Name == "tail");
-  if (parseOperand(Operands, ForceImmediate))
+  if (parseOperand(Operands, Name))
     return true;
 
   // Parse until end of statement, consuming commas between operands
+  unsigned OperandIdx = 1;
   while (getLexer().is(AsmToken::Comma)) {
     // Consume comma token
     getLexer().Lex();
 
     // Parse next operand
-    if (parseOperand(Operands, false))
+    if (parseOperand(Operands, Name))
       return true;
+
+    ++OperandIdx;
   }
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -1068,6 +1297,33 @@ bool RISCVAsmParser::parseDirectiveOption() {
 
   StringRef Option = Tok.getIdentifier();
 
+  if (Option == "push") {
+    getTargetStreamer().emitDirectiveOptionPush();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    pushFeatureBits();
+    return false;
+  }
+
+  if (Option == "pop") {
+    SMLoc StartLoc = Parser.getTok().getLoc();
+    getTargetStreamer().emitDirectiveOptionPop();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    if (popFeatureBits())
+      return Error(StartLoc, ".option pop with no .option push");
+
+    return false;
+  }
+
   if (Option == "rvc") {
     getTargetStreamer().emitDirectiveOptionRVC();
 
@@ -1092,9 +1348,34 @@ bool RISCVAsmParser::parseDirectiveOption() {
     return false;
   }
 
+  if (Option == "relax") {
+    getTargetStreamer().emitDirectiveOptionRelax();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    setFeatureBits(RISCV::FeatureRelax, "relax");
+    return false;
+  }
+
+  if (Option == "norelax") {
+    getTargetStreamer().emitDirectiveOptionNoRelax();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    clearFeatureBits(RISCV::FeatureRelax, "relax");
+    return false;
+  }
+
   // Unknown option.
   Warning(Parser.getTok().getLoc(),
-          "unknown option, expected 'rvc' or 'norvc'");
+          "unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or "
+          "'norelax'");
   Parser.eatToEndOfStatement();
   return false;
 }
@@ -1108,80 +1389,54 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
 
 void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
                                  MCStreamer &Out) {
-  if (isInt<32>(Value)) {
-    // Emits the MC instructions for loading a 32-bit constant into a register.
-    //
-    // Depending on the active bits in the immediate Value v, the following
-    // instruction sequences are emitted:
-    //
-    // v == 0                        : ADDI(W)
-    // v[0,12) != 0 && v[12,32) == 0 : ADDI(W)
-    // v[0,12) == 0 && v[12,32) != 0 : LUI
-    // v[0,32) != 0                  : LUI+ADDI(W)
-    //
-    int64_t Hi20 = ((Value + 0x800) >> 12) & 0xFFFFF;
-    int64_t Lo12 = SignExtend64<12>(Value);
-    unsigned SrcReg = RISCV::X0;
-
-    if (Hi20) {
-      emitToStreamer(Out,
-                     MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Hi20));
-      SrcReg = DestReg;
+  RISCVMatInt::InstSeq Seq;
+  RISCVMatInt::generateInstSeq(Value, isRV64(), Seq);
+
+  unsigned SrcReg = RISCV::X0;
+  for (RISCVMatInt::Inst &Inst : Seq) {
+    if (Inst.Opc == RISCV::LUI) {
+      emitToStreamer(
+          Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
+    } else {
+      emitToStreamer(
+          Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
+                   Inst.Imm));
     }
 
-    if (Lo12 || Hi20 == 0) {
-      unsigned AddiOpcode =
-          STI->hasFeature(RISCV::Feature64Bit) ? RISCV::ADDIW : RISCV::ADDI;
-      emitToStreamer(Out, MCInstBuilder(AddiOpcode)
-                              .addReg(DestReg)
-                              .addReg(SrcReg)
-                              .addImm(Lo12));
-    }
-    return;
-  }
-  assert(STI->hasFeature(RISCV::Feature64Bit) &&
-         "Target must be 64-bit to support a >32-bit constant");
-
-  // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
-  // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
-  // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
-  // while the following ADDI instructions contribute up to 12 bits each.
-  //
-  // On the first glance, implementing this seems to be possible by simply
-  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
-  // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
-  // fact that ADDI performs a sign extended addition, doing it like that would
-  // only be possible when at most 11 bits of the ADDI instructions are used.
-  // Using all 12 bits of the ADDI instructions, like done by GAS, actually
-  // requires that the constant is processed starting with the least significant
-  // bit.
-  //
-  // In the following, constants are processed from LSB to MSB but instruction
-  // emission is performed from MSB to LSB by recursively calling
-  // emitLoadImm. In each recursion, first the lowest 12 bits are removed
-  // from the constant and the optimal shift amount, which can be greater than
-  // 12 bits if the constant is sparse, is determined. Then, the shifted
-  // remaining constant is processed recursively and gets emitted as soon as it
-  // fits into 32 bits. The emission of the shifts and additions is subsequently
-  // performed when the recursion returns.
-  //
-  int64_t Lo12 = SignExtend64<12>(Value);
-  int64_t Hi52 = (Value + 0x800) >> 12;
-  int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
-  Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
-
-  emitLoadImm(DestReg, Hi52, Out);
-
-  emitToStreamer(Out, MCInstBuilder(RISCV::SLLI)
-                          .addReg(DestReg)
-                          .addReg(DestReg)
-                          .addImm(ShiftAmount));
-
-  if (Lo12)
-    emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
-                            .addReg(DestReg)
-                            .addReg(DestReg)
-                            .addImm(Lo12));
+    // Only the first instruction has X0 as its source.
+    SrcReg = DestReg;
+  }
+}
+
+void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out) {
+  // The local load address pseudo-instruction "lla" is used in PC-relative
+  // addressing of symbols:
+  //   lla rdest, symbol
+  // expands to
+  //   TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+  //             ADDI rdest, %pcrel_lo(TmpLabel)
+  MCContext &Ctx = getContext();
+
+  MCSymbol *TmpLabel = Ctx.createTempSymbol(
+      "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
+  Out.EmitLabel(TmpLabel);
+
+  MCOperand DestReg = Inst.getOperand(0);
+  const RISCVMCExpr *Symbol = RISCVMCExpr::create(
+      Inst.getOperand(1).getExpr(), RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
+
+  emitToStreamer(
+      Out, MCInstBuilder(RISCV::AUIPC).addOperand(DestReg).addExpr(Symbol));
+
+  const MCExpr *RefToLinkTmpLabel =
+      RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
+                          RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
+
+  emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+                          .addOperand(DestReg)
+                          .addOperand(DestReg)
+                          .addExpr(RefToLinkTmpLabel));
 }
 
 bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
@@ -1189,7 +1444,17 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
   Inst.setLoc(IDLoc);
 
   if (Inst.getOpcode() == RISCV::PseudoLI) {
-    auto Reg = Inst.getOperand(0).getReg();
+    unsigned Reg = Inst.getOperand(0).getReg();
+    const MCOperand &Op1 = Inst.getOperand(1);
+    if (Op1.isExpr()) {
+      // We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar.
+      // Just convert to an addi. This allows compatibility with gas.
+      emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
+                              .addReg(Reg)
+                              .addReg(RISCV::X0)
+                              .addExpr(Op1.getExpr()));
+      return false;
+    }
     int64_t Imm = Inst.getOperand(1).getImm();
     // On RV32 the immediate here can either be a signed or an unsigned
     // 32-bit number. Sign extension has to be performed to ensure that Imm
@@ -1198,6 +1463,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
       Imm = SignExtend64<32>(Imm);
     emitLoadImm(Reg, Imm, Out);
     return false;
+  } else if (Inst.getOpcode() == RISCV::PseudoLLA) {
+    emitLoadLocalAddress(Inst, IDLoc, Out);
+    return false;
   }
 
   emitToStreamer(Out, Inst);
diff --git a/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 7bbb371a757f..eafa09d56315 100644
--- a/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
@@ -211,6 +212,15 @@ static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
 }
 
 template <unsigned N>
+static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+                                             int64_t Address,
+                                             const void *Decoder) {
+  if (Imm == 0)
+    return MCDisassembler::Fail;
+  return decodeUImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
 static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
                                       int64_t Address, const void *Decoder) {
   assert(isUInt<N>(Imm) && "Invalid immediate");
@@ -221,6 +231,15 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
 }
 
 template <unsigned N>
+static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+                                             int64_t Address,
+                                             const void *Decoder) {
+  if (Imm == 0)
+    return MCDisassembler::Fail;
+  return decodeSImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
 static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
                                              int64_t Address,
                                              const void *Decoder) {
@@ -243,6 +262,17 @@ static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm,
+                                 int64_t Address,
+                                 const void *Decoder) {
+  assert(isUInt<3>(Imm) && "Invalid immediate");
+  if (!llvm::RISCVFPRndMode::isValidRoundingMode(Imm))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createImm(Imm));
+  return MCDisassembler::Success;
+}
+
 #include "RISCVGenDisassemblerTables.inc"
 
 DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
@@ -257,11 +287,19 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   // It's a 32 bit instruction if bit 0 and 1 are 1.
   if ((Bytes[0] & 0x3) == 0x3) {
+    if (Bytes.size() < 4) {
+      Size = 0;
+      return MCDisassembler::Fail;
+    }
     Insn = support::endian::read32le(Bytes.data());
     LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n");
     Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
     Size = 4;
   } else {
+    if (Bytes.size() < 2) {
+      Size = 0;
+      return MCDisassembler::Fail;
+    }
     Insn = support::endian::read16le(Bytes.data());
 
     if (!STI.getFeatureBits()[RISCV::Feature64Bit]) {
diff --git a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
index 300e6fd9750a..979c8f4e2fa7 100644
--- a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCVInstPrinter.h"
-#include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVMCExpr.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -36,10 +36,9 @@ using namespace llvm;
 #include "RISCVGenCompressInstEmitter.inc"
 
 static cl::opt<bool>
-NoAliases("riscv-no-aliases",
-            cl::desc("Disable the emission of assembler pseudo instructions"),
-            cl::init(false),
-            cl::Hidden);
+    NoAliases("riscv-no-aliases",
+              cl::desc("Disable the emission of assembler pseudo instructions"),
+              cl::init(false), cl::Hidden);
 
 void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
                                  StringRef Annot, const MCSubtargetInfo &STI) {
@@ -49,7 +48,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   if (!NoAliases)
     Res = uncompressInst(UncompressedMI, *MI, MRI, STI);
   if (Res)
-    NewMI = const_cast<MCInst*>(&UncompressedMI);
+    NewMI = const_cast<MCInst *>(&UncompressedMI);
   if (NoAliases || !printAliasInstr(NewMI, STI, O))
     printInstruction(NewMI, STI, O);
   printAnnotation(O, Annot);
@@ -60,8 +59,8 @@ void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
 }
 
 void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O, const char *Modifier) {
+                                    const MCSubtargetInfo &STI, raw_ostream &O,
+                                    const char *Modifier) {
   assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
   const MCOperand &MO = MI->getOperand(OpNo);
 
@@ -79,10 +78,23 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   MO.getExpr()->print(O, &MAI);
 }
 
+void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+  if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
+    O << SysReg->Name;
+  else
+    O << Imm;
+}
+
 void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
                                      const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned FenceArg = MI->getOperand(OpNo).getImm();
+  assert (((FenceArg >> 4) == 0) && "Invalid immediate in printFenceArg");
+
   if ((FenceArg & RISCVFenceField::I) != 0)
     O << 'i';
   if ((FenceArg & RISCVFenceField::O) != 0)
@@ -91,11 +103,12 @@ void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
     O << 'r';
   if ((FenceArg & RISCVFenceField::W) != 0)
     O << 'w';
+  if (FenceArg == 0)
+    O << "unknown";
 }
 
 void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   auto FRMArg =
       static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
   O << RISCVFPRndMode::roundingModeToString(FRMArg);
diff --git a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
index 241be8daf113..0f9bed184996 100644
--- a/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
+++ b/contrib/llvm/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
@@ -32,6 +32,8 @@ public:
 
   void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, const char *Modifier = nullptr);
+  void printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
   void printFenceArg(const MCInst *MI, unsigned OpNo,
                      const MCSubtargetInfo &STI, raw_ostream &O);
   void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -48,6 +50,6 @@ public:
   static const char *getRegisterName(unsigned RegNo,
                                      unsigned AltIdx = RISCV::ABIRegAltName);
 };
-}
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 9ba7ebd0eb0f..7672fea5d95b 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -7,114 +7,58 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/RISCVFixupKinds.h"
-#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVAsmBackend.h"
+#include "RISCVMCExpr.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-namespace {
-class RISCVAsmBackend : public MCAsmBackend {
-  const MCSubtargetInfo &STI;
-  uint8_t OSABI;
-  bool Is64Bit;
+// If linker relaxation is enabled, or the relax option had previously been
+// enabled, always emit relocations even if the fixup can be resolved. This is
+// necessary for correctness as offsets may change during relaxation.
+bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+                                            const MCFixup &Fixup,
+                                            const MCValue &Target) {
+  bool ShouldForce = false;
 
-public:
-  RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
-      : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
-        Is64Bit(Is64Bit) {}
-  ~RISCVAsmBackend() override {}
-
-  // Generate diff expression relocations if the relax feature is enabled,
-  // otherwise it is safe for the assembler to calculate these internally.
-  bool requiresDiffExpressionRelocations() const override {
-    return STI.getFeatureBits()[RISCV::FeatureRelax];
-  }
-  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
-                  const MCValue &Target, MutableArrayRef<char> Data,
-                  uint64_t Value, bool IsResolved,
-                  const MCSubtargetInfo *STI) const override;
-
-  std::unique_ptr<MCObjectTargetWriter>
-  createObjectTargetWriter() const override;
-
-  // If linker relaxation is enabled, always emit relocations even if the fixup
-  // can be resolved. This is necessary for correctness as offsets may change
-  // during relaxation.
-  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
-                             const MCValue &Target) override {
-    return STI.getFeatureBits()[RISCV::FeatureRelax];
-  }
-
-  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                            const MCRelaxableFragment *DF,
-                            const MCAsmLayout &Layout) const override {
-    llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
-  }
-
-  bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
-                                    uint64_t Value,
-                                    const MCRelaxableFragment *DF,
-                                    const MCAsmLayout &Layout,
-                                    const bool WasForced) const override;
-
-  unsigned getNumFixupKinds() const override {
-    return RISCV::NumTargetFixupKinds;
-  }
-
-  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
-    const static MCFixupKindInfo Infos[] = {
-      // This table *must* be in the order that the fixup_* kinds are defined in
-      // RISCVFixupKinds.h.
-      //
-      // name                      offset bits  flags
-      { "fixup_riscv_hi20",         12,     20,  0 },
-      { "fixup_riscv_lo12_i",       20,     12,  0 },
-      { "fixup_riscv_lo12_s",        0,     32,  0 },
-      { "fixup_riscv_pcrel_hi20",   12,     20,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_pcrel_lo12_i", 20,     12,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_pcrel_lo12_s",  0,     32,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_jal",          12,     20,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_branch",        0,     32,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_rvc_jump",      2,     11,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_rvc_branch",    0,     16,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_call",          0,     64,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_relax",         0,      0,  0 }
-    };
-    static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
-                  "Not all fixup kinds added to Infos array");
-
-    if (Kind < FirstTargetFixupKind)
-      return MCAsmBackend::getFixupKindInfo(Kind);
-
-    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
-           "Invalid kind!");
-    return Infos[Kind - FirstTargetFixupKind];
+  switch ((unsigned)Fixup.getKind()) {
+  default:
+    break;
+  case RISCV::fixup_riscv_pcrel_lo12_i:
+  case RISCV::fixup_riscv_pcrel_lo12_s:
+    // For pcrel_lo12, force a relocation if the target of the corresponding
+    // pcrel_hi20 is not in the same fragment.
+    const MCFixup *T = cast<RISCVMCExpr>(Fixup.getValue())->getPCRelHiFixup();
+    if (!T) {
+      Asm.getContext().reportError(Fixup.getLoc(),
+                                   "could not find corresponding %pcrel_hi");
+      return false;
+    }
+
+    switch ((unsigned)T->getKind()) {
+    default:
+      llvm_unreachable("Unexpected fixup kind for pcrel_lo12");
+      break;
+    case RISCV::fixup_riscv_pcrel_hi20:
+      ShouldForce = T->getValue()->findAssociatedFragment() !=
+                    Fixup.getValue()->findAssociatedFragment();
+      break;
+    }
+    break;
   }
 
-  bool mayNeedRelaxation(const MCInst &Inst,
-                         const MCSubtargetInfo &STI) const override;
-  unsigned getRelaxedOpcode(unsigned Op) const;
-
-  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                        MCInst &Res) const override;
-
-
-  bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
-};
-
+  return ShouldForce || STI.getFeatureBits()[RISCV::FeatureRelax] ||
+         ForceRelocs;
+}
 
 bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
                                                    bool Resolved,
@@ -348,8 +292,6 @@ RISCVAsmBackend::createObjectTargetWriter() const {
   return createRISCVELFObjectWriter(OSABI, Is64Bit);
 }
 
-} // end anonymous namespace
-
 MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
                                           const MCSubtargetInfo &STI,
                                           const MCRegisterInfo &MRI,
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
new file mode 100644
index 000000000000..b98e45f4053f
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -0,0 +1,113 @@
+//===-- RISCVAsmBackend.h - RISCV Assembler Backend -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVASMBACKEND_H
+
+#include "MCTargetDesc/RISCVFixupKinds.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+class MCAssembler;
+class MCObjectTargetWriter;
+class raw_ostream;
+
+class RISCVAsmBackend : public MCAsmBackend {
+  const MCSubtargetInfo &STI;
+  uint8_t OSABI;
+  bool Is64Bit;
+  bool ForceRelocs = false;
+
+public:
+  RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
+      : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
+        Is64Bit(Is64Bit) {}
+  ~RISCVAsmBackend() override {}
+
+  void setForceRelocs() { ForceRelocs = true; }
+
+  // Generate diff expression relocations if the relax feature is enabled or had
+  // previously been enabled, otherwise it is safe for the assembler to
+  // calculate these internally.
+  bool requiresDiffExpressionRelocations() const override {
+    return STI.getFeatureBits()[RISCV::FeatureRelax] || ForceRelocs;
+  }
+  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+                  const MCValue &Target, MutableArrayRef<char> Data,
+                  uint64_t Value, bool IsResolved,
+                  const MCSubtargetInfo *STI) const override;
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override;
+
+  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+                             const MCValue &Target) override;
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
+  }
+
+  bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+                                    uint64_t Value,
+                                    const MCRelaxableFragment *DF,
+                                    const MCAsmLayout &Layout,
+                                    const bool WasForced) const override;
+
+  unsigned getNumFixupKinds() const override {
+    return RISCV::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+    const static MCFixupKindInfo Infos[] = {
+      // This table *must* be in the order that the fixup_* kinds are defined in
+      // RISCVFixupKinds.h.
+      //
+      // name                      offset bits  flags
+      { "fixup_riscv_hi20",         12,     20,  0 },
+      { "fixup_riscv_lo12_i",       20,     12,  0 },
+      { "fixup_riscv_lo12_s",        0,     32,  0 },
+      { "fixup_riscv_pcrel_hi20",   12,     20,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_pcrel_lo12_i", 20,     12,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_pcrel_lo12_s",  0,     32,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_jal",          12,     20,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_branch",        0,     32,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_rvc_jump",      2,     11,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_rvc_branch",    0,     16,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_call",          0,     64,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_relax",         0,      0,  0 }
+    };
+    static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
+                  "Not all fixup kinds added to Infos array");
+
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  bool mayNeedRelaxation(const MCInst &Inst,
+                         const MCSubtargetInfo &STI) const override;
+  unsigned getRelaxedOpcode(unsigned Op) const;
+
+  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+                        MCInst &Res) const override;
+
+
+  bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 6428b11cfe9c..a6ba1e41e964 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -38,5 +38,9 @@ MCELFStreamer &RISCVTargetELFStreamer::getStreamer() {
   return static_cast<MCELFStreamer &>(Streamer);
 }
 
+void RISCVTargetELFStreamer::emitDirectiveOptionPush() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionPop() {}
 void RISCVTargetELFStreamer::emitDirectiveOptionRVC() {}
 void RISCVTargetELFStreamer::emitDirectiveOptionNoRVC() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionRelax() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionNoRelax() {}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index daa7abfe1336..1f36bbc43882 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -20,8 +20,12 @@ public:
   MCELFStreamer &getStreamer();
   RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
 
+  virtual void emitDirectiveOptionPush();
+  virtual void emitDirectiveOptionPop();
   virtual void emitDirectiveOptionRVC();
   virtual void emitDirectiveOptionNoRVC();
+  virtual void emitDirectiveOptionRelax();
+  virtual void emitDirectiveOptionNoRelax();
 };
 }
 #endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 8a796a014b33..c5a4ffc0e360 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVFixupKinds.h"
 #include "MCTargetDesc/RISCVMCExpr.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -196,7 +196,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
   MCInstrDesc const &Desc = MCII.get(MI.getOpcode());
   unsigned MIFrm = Desc.TSFlags & RISCVII::InstFormatMask;
 
-  // If the destination is an immediate, there is nothing to do
+  // If the destination is an immediate, there is nothing to do.
   if (MO.isImm())
     return MO.getImm();
 
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 085dcd4e5f66..53648a5922c8 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -14,12 +14,12 @@
 
 #include "RISCV.h"
 #include "RISCVMCExpr.h"
+#include "RISCVFixupKinds.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -41,9 +41,90 @@ void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
     OS << ')';
 }
 
+const MCFixup *RISCVMCExpr::getPCRelHiFixup() const {
+  MCValue AUIPCLoc;
+  if (!getSubExpr()->evaluateAsRelocatable(AUIPCLoc, nullptr, nullptr))
+    return nullptr;
+
+  const MCSymbolRefExpr *AUIPCSRE = AUIPCLoc.getSymA();
+  if (!AUIPCSRE)
+    return nullptr;
+
+  const auto *DF =
+      dyn_cast_or_null<MCDataFragment>(AUIPCSRE->findAssociatedFragment());
+  if (!DF)
+    return nullptr;
+
+  const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+  for (const MCFixup &F : DF->getFixups()) {
+    if (F.getOffset() != AUIPCSymbol->getOffset())
+      continue;
+
+    switch ((unsigned)F.getKind()) {
+    default:
+      continue;
+    case RISCV::fixup_riscv_pcrel_hi20:
+      return &F;
+    }
+  }
+
+  return nullptr;
+}
+
+bool RISCVMCExpr::evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
+                                  const MCFixup *Fixup) const {
+  // VK_RISCV_PCREL_LO has to be handled specially.  The MCExpr inside is
+  // actually the location of a auipc instruction with a VK_RISCV_PCREL_HI fixup
+  // pointing to the real target.  We need to generate an MCValue in the form of
+  // (<real target> + <offset from this fixup to the auipc fixup>).  The Fixup
+  // is pcrel relative to the VK_RISCV_PCREL_LO fixup, so we need to add the
+  // offset to the VK_RISCV_PCREL_HI Fixup from VK_RISCV_PCREL_LO to correct.
+  MCValue AUIPCLoc;
+  if (!getSubExpr()->evaluateAsValue(AUIPCLoc, *Layout))
+    return false;
+
+  const MCSymbolRefExpr *AUIPCSRE = AUIPCLoc.getSymA();
+  // Don't try to evaluate %pcrel_hi/%pcrel_lo pairs that cross fragment
+  // boundries.
+  if (!AUIPCSRE ||
+      findAssociatedFragment() != AUIPCSRE->findAssociatedFragment())
+    return false;
+
+  const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+  if (!AUIPCSymbol)
+    return false;
+
+  const MCFixup *TargetFixup = getPCRelHiFixup();
+  if (!TargetFixup)
+    return false;
+
+  if ((unsigned)TargetFixup->getKind() != RISCV::fixup_riscv_pcrel_hi20)
+    return false;
+
+  MCValue Target;
+  if (!TargetFixup->getValue()->evaluateAsValue(Target, *Layout))
+    return false;
+
+  if (!Target.getSymA() || !Target.getSymA()->getSymbol().isInSection())
+    return false;
+
+  if (&Target.getSymA()->getSymbol().getSection() !=
+      findAssociatedFragment()->getParent())
+    return false;
+
+  uint64_t AUIPCOffset = AUIPCSymbol->getOffset();
+
+  Res = MCValue::get(Target.getSymA(), nullptr,
+                     Target.getConstant() + (Fixup->getOffset() - AUIPCOffset));
+  return true;
+}
+
 bool RISCVMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
                                             const MCAsmLayout *Layout,
                                             const MCFixup *Fixup) const {
+  if (Kind == VK_RISCV_PCREL_LO && evaluatePCRelLo(Res, Layout, Fixup))
+    return true;
+
   if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
     return false;
 
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index d2e0f6b6cdae..4eafcc08b51f 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -39,6 +39,9 @@ private:
 
   int64_t evaluateAsInt64(int64_t Value) const;
 
+  bool evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
+                       const MCFixup *Fixup) const;
+
   explicit RISCVMCExpr(const MCExpr *Expr, VariantKind Kind)
       : Expr(Expr), Kind(Kind) {}
 
@@ -50,6 +53,13 @@ public:
 
   const MCExpr *getSubExpr() const { return Expr; }
 
+  /// Get the MCExpr of the VK_RISCV_PCREL_HI Fixup that the
+  /// VK_RISCV_PCREL_LO points to.
+  ///
+  /// \returns nullptr if this isn't a VK_RISCV_PCREL_LO pointing to a
+  /// VK_RISCV_PCREL_HI.
+  const MCFixup *getPCRelHiFixup() const;
+
   void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
   bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 2d5205aa7ef7..8d5ef3dbd17f 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -23,6 +23,14 @@ RISCVTargetAsmStreamer::RISCVTargetAsmStreamer(MCStreamer &S,
                                                formatted_raw_ostream &OS)
     : RISCVTargetStreamer(S), OS(OS) {}
 
+void RISCVTargetAsmStreamer::emitDirectiveOptionPush() {
+  OS << "\t.option\tpush\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionPop() {
+  OS << "\t.option\tpop\n";
+}
+
 void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
   OS << "\t.option\trvc\n";
 }
@@ -30,3 +38,11 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
 void RISCVTargetAsmStreamer::emitDirectiveOptionNoRVC() {
   OS << "\t.option\tnorvc\n";
 }
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionRelax() {
+  OS << "\t.option\trelax\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionNoRelax() {
+  OS << "\t.option\tnorelax\n";
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 525c20810f24..74ec9e303933 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -18,8 +18,12 @@ class RISCVTargetStreamer : public MCTargetStreamer {
 public:
   RISCVTargetStreamer(MCStreamer &S);
 
+  virtual void emitDirectiveOptionPush() = 0;
+  virtual void emitDirectiveOptionPop() = 0;
   virtual void emitDirectiveOptionRVC() = 0;
   virtual void emitDirectiveOptionNoRVC() = 0;
+  virtual void emitDirectiveOptionRelax() = 0;
+  virtual void emitDirectiveOptionNoRelax() = 0;
 };
 
 // This part is for ascii assembly output
@@ -29,8 +33,12 @@ class RISCVTargetAsmStreamer : public RISCVTargetStreamer {
 public:
   RISCVTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
 
+  void emitDirectiveOptionPush() override;
+  void emitDirectiveOptionPop() override;
   void emitDirectiveOptionRVC() override;
   void emitDirectiveOptionNoRVC() override;
+  void emitDirectiveOptionRelax() override;
+  void emitDirectiveOptionNoRelax() override;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm/lib/Target/RISCV/RISCV.h
index 2e4f536aca35..b25aee46200d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.h
@@ -15,7 +15,8 @@
 #ifndef LLVM_LIB_TARGET_RISCV_RISCV_H
 #define LLVM_LIB_TARGET_RISCV_RISCV_H
 
-#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 class RISCVTargetMachine;
@@ -36,6 +37,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
 
 FunctionPass *createRISCVMergeBaseOffsetOptPass();
 void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
+
+FunctionPass *createRISCVExpandPseudoPass();
+void initializeRISCVExpandPseudoPass(PassRegistry &);
 }
 
 #endif
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm/lib/Target/RISCV/RISCV.td
index 281378cb2eee..0e86e2bc5e98 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.td
@@ -68,6 +68,12 @@ include "RISCVCallingConv.td"
 include "RISCVInstrInfo.td"
 
 //===----------------------------------------------------------------------===//
+// Named operands for CSR instructions.
+//===----------------------------------------------------------------------===//
+
+include "RISCVSystemOperands.td"
+
+//===----------------------------------------------------------------------===//
 // RISC-V processors supported.
 //===----------------------------------------------------------------------===//
 
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..35c185aa5edd
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -0,0 +1,556 @@
+//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVExpandPseudo : public MachineFunctionPass {
+public:
+  const RISCVInstrInfo *TII;
+  static char ID;
+
+  RISCVExpandPseudo() : MachineFunctionPass(ID) {
+    initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return RISCV_EXPAND_PSEUDO_NAME; }
+
+private:
+  bool expandMBB(MachineBasicBlock &MBB);
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
+  bool expandAtomicBinOp(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
+                         bool IsMasked, int Width,
+                         MachineBasicBlock::iterator &NextMBBI);
+  bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            AtomicRMWInst::BinOp, bool IsMasked, int Width,
+                            MachineBasicBlock::iterator &NextMBBI);
+  bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI, bool IsMasked,
+                           int Width, MachineBasicBlock::iterator &NextMBBI);
+};
+
+char RISCVExpandPseudo::ID = 0;
+
+bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  bool Modified = false;
+  for (auto &MBB : MF)
+    Modified |= expandMBB(MBB);
+  return Modified;
+}
+
+bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MBBI,
+                                 MachineBasicBlock::iterator &NextMBBI) {
+  switch (MBBI->getOpcode()) {
+  case RISCV::PseudoAtomicLoadNand32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
+                             NextMBBI);
+  case RISCV::PseudoMaskedAtomicSwap32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
+                             NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadAdd32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadSub32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadNand32:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
+                             NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadMax32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadMin32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadUMax32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoMaskedAtomicLoadUMin32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+                                NextMBBI);
+  case RISCV::PseudoCmpXchg32:
+    return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+  case RISCV::PseudoMaskedCmpXchg32:
+    return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+  }
+
+  return false;
+}
+
+static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::LR_W;
+  case AtomicOrdering::Acquire:
+    return RISCV::LR_W_AQ;
+  case AtomicOrdering::Release:
+    return RISCV::LR_W;
+  case AtomicOrdering::AcquireRelease:
+    return RISCV::LR_W_AQ;
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::LR_W_AQ_RL;
+  }
+}
+
+static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::SC_W;
+  case AtomicOrdering::Acquire:
+    return RISCV::SC_W;
+  case AtomicOrdering::Release:
+    return RISCV::SC_W_RL;
+  case AtomicOrdering::AcquireRelease:
+    return RISCV::SC_W_RL;
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::SC_W_AQ_RL;
+  }
+}
+
+static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
+                                   DebugLoc DL, MachineBasicBlock *ThisMBB,
+                                   MachineBasicBlock *LoopMBB,
+                                   MachineBasicBlock *DoneMBB,
+                                   AtomicRMWInst::BinOp BinOp, int Width) {
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned AddrReg = MI.getOperand(2).getReg();
+  unsigned IncrReg = MI.getOperand(3).getReg();
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+  // .loop:
+  //   lr.w dest, (addr)
+  //   binop scratch, dest, val
+  //   sc.w scratch, scratch, (addr)
+  //   bnez scratch, loop
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+      .addReg(AddrReg);
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Nand:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(-1);
+    break;
+  }
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+      .addReg(AddrReg)
+      .addReg(ScratchReg);
+  BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+      .addReg(ScratchReg)
+      .addReg(RISCV::X0)
+      .addMBB(LoopMBB);
+}
+
+static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
+                              MachineBasicBlock *MBB, unsigned DestReg,
+                              unsigned OldValReg, unsigned NewValReg,
+                              unsigned MaskReg, unsigned ScratchReg) {
+  assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
+  assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
+  assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
+
+  // We select bits from newval and oldval using:
+  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+  // r = oldval ^ ((oldval ^ newval) & masktargetdata);
+  BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
+      .addReg(OldValReg)
+      .addReg(NewValReg);
+  BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
+      .addReg(ScratchReg)
+      .addReg(MaskReg);
+  BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
+      .addReg(OldValReg)
+      .addReg(ScratchReg);
+}
+
+static void doMaskedAtomicBinOpExpansion(
+    const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+    MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
+    MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned AddrReg = MI.getOperand(2).getReg();
+  unsigned IncrReg = MI.getOperand(3).getReg();
+  unsigned MaskReg = MI.getOperand(4).getReg();
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
+
+  // .loop:
+  //   lr.w destreg, (alignedaddr)
+  //   binop scratch, destreg, incr
+  //   xor scratch, destreg, scratch
+  //   and scratch, scratch, masktargetdata
+  //   xor scratch, destreg, scratch
+  //   sc.w scratch, scratch, (alignedaddr)
+  //   bnez scratch, loop
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+      .addReg(AddrReg);
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Xchg:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+        .addReg(RISCV::X0)
+        .addReg(IncrReg);
+    break;
+  case AtomicRMWInst::Add:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    break;
+  case AtomicRMWInst::Sub:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    break;
+  case AtomicRMWInst::Nand:
+    BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+        .addReg(DestReg)
+        .addReg(IncrReg);
+    BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(-1);
+    break;
+  }
+
+  insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
+                    ScratchReg);
+
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+      .addReg(AddrReg)
+      .addReg(ScratchReg);
+  BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+      .addReg(ScratchReg)
+      .addReg(RISCV::X0)
+      .addMBB(LoopMBB);
+}
+
+bool RISCVExpandPseudo::expandAtomicBinOp(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  MachineFunction *MF = MBB.getParent();
+  auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Insert new MBBs.
+  MF->insert(++MBB.getIterator(), LoopMBB);
+  MF->insert(++LoopMBB->getIterator(), DoneMBB);
+
+  // Set up successors and transfer remaining instructions to DoneMBB.
+  LoopMBB->addSuccessor(LoopMBB);
+  LoopMBB->addSuccessor(DoneMBB);
+  DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+  DoneMBB->transferSuccessors(&MBB);
+  MBB.addSuccessor(LoopMBB);
+
+  if (!IsMasked)
+    doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+  else
+    doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
+                                 Width);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *LoopMBB);
+  computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+  return true;
+}
+
+static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
+                       MachineBasicBlock *MBB, unsigned ValReg,
+                       unsigned ShamtReg) {
+  BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
+      .addReg(ValReg)
+      .addReg(ShamtReg);
+  BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
+      .addReg(ValReg)
+      .addReg(ShamtReg);
+}
+
+bool RISCVExpandPseudo::expandAtomicMinMaxOp(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+    MachineBasicBlock::iterator &NextMBBI) {
+  assert(IsMasked == true &&
+         "Should only need to expand masked atomic max/min");
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB.getParent();
+  auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Insert new MBBs.
+  MF->insert(++MBB.getIterator(), LoopHeadMBB);
+  MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+  MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+  MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+  // Set up successors and transfer remaining instructions to DoneMBB.
+  LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+  LoopHeadMBB->addSuccessor(LoopTailMBB);
+  LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+  LoopTailMBB->addSuccessor(LoopHeadMBB);
+  LoopTailMBB->addSuccessor(DoneMBB);
+  DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+  DoneMBB->transferSuccessors(&MBB);
+  MBB.addSuccessor(LoopHeadMBB);
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned Scratch1Reg = MI.getOperand(1).getReg();
+  unsigned Scratch2Reg = MI.getOperand(2).getReg();
+  unsigned AddrReg = MI.getOperand(3).getReg();
+  unsigned IncrReg = MI.getOperand(4).getReg();
+  unsigned MaskReg = MI.getOperand(5).getReg();
+  bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
+
+  //
+  // .loophead:
+  //   lr.w destreg, (alignedaddr)
+  //   and scratch2, destreg, mask
+  //   mv scratch1, destreg
+  //   [sext scratch2 if signed min/max]
+  //   ifnochangeneeded scratch2, incr, .looptail
+  BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+      .addReg(AddrReg);
+  BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
+      .addReg(DestReg)
+      .addReg(MaskReg);
+  BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
+      .addReg(DestReg)
+      .addImm(0);
+
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Max: {
+    insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+        .addReg(Scratch2Reg)
+        .addReg(IncrReg)
+        .addMBB(LoopTailMBB);
+    break;
+  }
+  case AtomicRMWInst::Min: {
+    insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+        .addReg(IncrReg)
+        .addReg(Scratch2Reg)
+        .addMBB(LoopTailMBB);
+    break;
+  }
+  case AtomicRMWInst::UMax:
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+        .addReg(Scratch2Reg)
+        .addReg(IncrReg)
+        .addMBB(LoopTailMBB);
+    break;
+  case AtomicRMWInst::UMin:
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+        .addReg(IncrReg)
+        .addReg(Scratch2Reg)
+        .addMBB(LoopTailMBB);
+    break;
+  }
+
+  // .loopifbody:
+  //   xor scratch1, destreg, incr
+  //   and scratch1, scratch1, mask
+  //   xor scratch1, destreg, scratch1
+  insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+                    MaskReg, Scratch1Reg);
+
+  // .looptail:
+  //   sc.w scratch1, scratch1, (addr)
+  //   bnez scratch1, loop
+  BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+      .addReg(AddrReg)
+      .addReg(Scratch1Reg);
+  BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+      .addReg(Scratch1Reg)
+      .addReg(RISCV::X0)
+      .addMBB(LoopHeadMBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+  computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+  return true;
+}
+
+bool RISCVExpandPseudo::expandAtomicCmpXchg(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
+    int Width, MachineBasicBlock::iterator &NextMBBI) {
+  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB.getParent();
+  auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Insert new MBBs.
+  MF->insert(++MBB.getIterator(), LoopHeadMBB);
+  MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
+  MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+  // Set up successors and transfer remaining instructions to DoneMBB.
+  LoopHeadMBB->addSuccessor(LoopTailMBB);
+  LoopHeadMBB->addSuccessor(DoneMBB);
+  LoopTailMBB->addSuccessor(DoneMBB);
+  LoopTailMBB->addSuccessor(LoopHeadMBB);
+  DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+  DoneMBB->transferSuccessors(&MBB);
+  MBB.addSuccessor(LoopHeadMBB);
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned AddrReg = MI.getOperand(2).getReg();
+  unsigned CmpValReg = MI.getOperand(3).getReg();
+  unsigned NewValReg = MI.getOperand(4).getReg();
+  AtomicOrdering Ordering =
+      static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
+
+  if (!IsMasked) {
+    // .loophead:
+    //   lr.w dest, (addr)
+    //   bne dest, cmpval, done
+    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+        .addReg(AddrReg);
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+        .addReg(DestReg)
+        .addReg(CmpValReg)
+        .addMBB(DoneMBB);
+    // .looptail:
+    //   sc.w scratch, newval, (addr)
+    //   bnez scratch, loophead
+    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+        .addReg(AddrReg)
+        .addReg(NewValReg);
+    BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+        .addReg(ScratchReg)
+        .addReg(RISCV::X0)
+        .addMBB(LoopHeadMBB);
+  } else {
+    // .loophead:
+    //   lr.w dest, (addr)
+    //   and scratch, dest, mask
+    //   bne scratch, cmpval, done
+    unsigned MaskReg = MI.getOperand(5).getReg();
+    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+        .addReg(AddrReg);
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
+        .addReg(DestReg)
+        .addReg(MaskReg);
+    BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+        .addReg(ScratchReg)
+        .addReg(CmpValReg)
+        .addMBB(DoneMBB);
+
+    // .looptail:
+    //   xor scratch, dest, newval
+    //   and scratch, scratch, mask
+    //   xor scratch, dest, scratch
+    //   sc.w scratch, scratch, (adrr)
+    //   bnez scratch, loophead
+    insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
+                      MaskReg, ScratchReg);
+    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+        .addReg(AddrReg)
+        .addReg(ScratchReg);
+    BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+        .addReg(ScratchReg)
+        .addReg(RISCV::X0)
+        .addMBB(LoopHeadMBB);
+  }
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+  computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+  return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
+                RISCV_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVExpandPseudoPass() { return new RISCVExpandPseudo(); }
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index a816028f9d8b..74417899c8da 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -148,8 +148,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
   // Skip to before the restores of callee-saved registers
   // FIXME: assumes exactly one instruction is used to restore each
   // callee-saved register.
-  MachineBasicBlock::iterator LastFrameDestroy = MBBI;
-  std::advance(LastFrameDestroy, -MFI.getCalleeSavedInfo().size());
+  auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size());
 
   uint64_t StackSize = MFI.getStackSize();
 
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 04441b9a9b15..aa80365feb83 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "RISCV.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCV.h"
 #include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Debug.h"
@@ -56,20 +57,47 @@ public:
 
 private:
   void doPeepholeLoadStoreADDI();
-  void doPeepholeBuildPairF64SplitF64();
 };
 }
 
 void RISCVDAGToDAGISel::PostprocessISelDAG() {
   doPeepholeLoadStoreADDI();
-  doPeepholeBuildPairF64SplitF64();
 }
 
-void RISCVDAGToDAGISel::Select(SDNode *Node) {
-  unsigned Opcode = Node->getOpcode();
-  MVT XLenVT = Subtarget->getXLenVT();
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
+                         MVT XLenVT) {
+  RISCVMatInt::InstSeq Seq;
+  RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq);
+
+  SDNode *Result;
+  SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
+  for (RISCVMatInt::Inst &Inst : Seq) {
+    SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
+    if (Inst.Opc == RISCV::LUI)
+      Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
+    else
+      Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
+
+    // Only the first instruction has X0 as its source.
+    SrcReg = SDValue(Result, 0);
+  }
+
+  return Result;
+}
+
+// Returns true if the Node is an ISD::AND with a constant argument. If so,
+// set Mask to that constant value.
+static bool isConstantMask(SDNode *Node, uint64_t &Mask) {
+  if (Node->getOpcode() == ISD::AND &&
+      Node->getOperand(1).getOpcode() == ISD::Constant) {
+    Mask = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+    return true;
+  }
+  return false;
+}
 
-  // If we have a custom node, we have already selected
+void RISCVDAGToDAGISel::Select(SDNode *Node) {
+  // If we have a custom node, we have already selected.
   if (Node->isMachineOpcode()) {
     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
     Node->setNodeId(-1);
@@ -78,27 +106,58 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
   // Instruction Selection not handled by the auto-generated tablegen selection
   // should be handled here.
+  unsigned Opcode = Node->getOpcode();
+  MVT XLenVT = Subtarget->getXLenVT();
+  SDLoc DL(Node);
   EVT VT = Node->getValueType(0);
-  if (Opcode == ISD::Constant && VT == XLenVT) {
-    auto *ConstNode = cast<ConstantSDNode>(Node);
-    // Materialize zero constants as copies from X0. This allows the coalescer
-    // to propagate these into other instructions.
-    if (ConstNode->isNullValue()) {
+
+  switch (Opcode) {
+  case ISD::Constant: {
+    auto ConstNode = cast<ConstantSDNode>(Node);
+    if (VT == XLenVT && ConstNode->isNullValue()) {
       SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
                                            RISCV::X0, XLenVT);
       ReplaceNode(Node, New.getNode());
       return;
     }
+    int64_t Imm = ConstNode->getSExtValue();
+    if (XLenVT == MVT::i64) {
+      ReplaceNode(Node, selectImm(CurDAG, SDLoc(Node), Imm, XLenVT));
+      return;
+    }
+    break;
   }
-  if (Opcode == ISD::FrameIndex) {
-    SDLoc DL(Node);
+  case ISD::FrameIndex: {
     SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
-    EVT VT = Node->getValueType(0);
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
     ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
     return;
   }
+  case ISD::SRL: {
+    if (!Subtarget->is64Bit())
+      break;
+    SDValue Op0 = Node->getOperand(0);
+    SDValue Op1 = Node->getOperand(1);
+    uint64_t Mask;
+    // Match (srl (and val, mask), imm) where the result would be a
+    // zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
+    // is equivalent to this (SimplifyDemandedBits may have removed lower bits
+    // from the mask that aren't necessary due to the right-shifting).
+    if (Op1.getOpcode() == ISD::Constant &&
+        isConstantMask(Op0.getNode(), Mask)) {
+      uint64_t ShAmt = cast<ConstantSDNode>(Op1.getNode())->getZExtValue();
+
+      if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) {
+        SDValue ShAmtVal =
+            CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT);
+        CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0.getOperand(0),
+                             ShAmtVal);
+        return;
+      }
+    }
+  }
+  }
 
   // Select the default instruction.
   SelectCode(Node);
@@ -216,43 +275,6 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
   }
 }
 
-// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
-// built of two i32 values, only to be split apart again. This must be done
-// here as a peephole optimisation as the DAG has not been fully legalized at
-// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
-// some nodes would not yet have been replaced with libcalls.
-void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
-  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
-  ++Position;
-
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    // Skip dead nodes and any nodes other than SplitF64Pseudo.
-    if (N->use_empty() || !N->isMachineOpcode() ||
-        !(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
-      continue;
-
-    // If the operand to SplitF64 is a BuildPairF64, the split operation is
-    // redundant. Just use the operands to BuildPairF64 as the result.
-    SDValue F64Val = N->getOperand(0);
-    if (F64Val.isMachineOpcode() &&
-        F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
-      LLVM_DEBUG(
-          dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
-                    "with BuildPairF64Pseudo operands:\n");
-      LLVM_DEBUG(dbgs() << "N:    ");
-      LLVM_DEBUG(N->dump(CurDAG));
-      LLVM_DEBUG(dbgs() << "F64Val: ");
-      LLVM_DEBUG(F64Val->dump(CurDAG));
-      LLVM_DEBUG(dbgs() << "\n");
-      SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
-      SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
-      CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
-    }
-  }
-  CurDAG->RemoveDeadNodes();
-}
-
 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
 // for instruction scheduling.
 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 87796e5b1097..508dcbd009ed 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -80,6 +80,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 
+  if (Subtarget.is64Bit()) {
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::ANY_EXTEND);
+  }
+
   if (!Subtarget.hasStdExtM()) {
     setOperationAction(ISD::MUL, XLenVT, Expand);
     setOperationAction(ISD::MULHS, XLenVT, Expand);
@@ -111,6 +118,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
       ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
 
+  ISD::NodeType FPOpToExtend[] = {
+      ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
+
   if (Subtarget.hasStdExtF()) {
     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
@@ -119,6 +129,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
     setOperationAction(ISD::SELECT, MVT::f32, Custom);
     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+    for (auto Op : FPOpToExtend)
+      setOperationAction(Op, MVT::f32, Expand);
   }
 
   if (Subtarget.hasStdExtD()) {
@@ -131,16 +143,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+    for (auto Op : FPOpToExtend)
+      setOperationAction(Op, MVT::f64, Expand);
   }
 
   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
 
-  if (Subtarget.hasStdExtA())
+  if (Subtarget.hasStdExtA()) {
     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
-  else
+    setMinCmpXchgSizeInBits(32);
+  } else {
     setMaxAtomicSizeInBitsSupported(0);
+  }
 
   setBooleanContents(ZeroOrOneBooleanContent);
 
@@ -160,6 +176,34 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
   return VT.changeVectorElementTypeToInteger();
 }
 
+bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                             const CallInst &I,
+                                             MachineFunction &MF,
+                                             unsigned Intrinsic) const {
+  switch (Intrinsic) {
+  default:
+    return false;
+  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
+  case Intrinsic::riscv_masked_atomicrmw_add_i32:
+  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
+  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
+  case Intrinsic::riscv_masked_atomicrmw_max_i32:
+  case Intrinsic::riscv_masked_atomicrmw_min_i32:
+  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
+  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
+  case Intrinsic::riscv_masked_cmpxchg_i32:
+    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = 4;
+    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+                 MachineMemOperand::MOVolatile;
+    return true;
+  }
+}
+
 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                 const AddrMode &AM, Type *Ty,
                                                 unsigned AS,
@@ -228,6 +272,10 @@ bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   return TargetLowering::isZExtFree(Val, VT2);
 }
 
+bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
+  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
+}
+
 // Changes the condition code and swaps operands if necessary, so the SetCC
 // operation matches one of the comparisons supported directly in the RISC-V
 // ISA.
@@ -283,9 +331,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VASTART:
     return lowerVASTART(Op, DAG);
   case ISD::FRAMEADDR:
-    return LowerFRAMEADDR(Op, DAG);
+    return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:
-    return LowerRETURNADDR(Op, DAG);
+    return lowerRETURNADDR(Op, DAG);
   }
 }
 
@@ -298,7 +346,7 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
   int64_t Offset = N->getOffset();
   MVT XLenVT = Subtarget.getXLenVT();
 
-  if (isPositionIndependent() || Subtarget.is64Bit())
+  if (isPositionIndependent())
     report_fatal_error("Unable to lowerGlobalAddress");
   // In order to maximise the opportunity for common subexpression elimination,
   // emit a separate ADD node for the global address offset instead of folding
@@ -323,7 +371,7 @@ SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
   const BlockAddress *BA = N->getBlockAddress();
   int64_t Offset = N->getOffset();
 
-  if (isPositionIndependent() || Subtarget.is64Bit())
+  if (isPositionIndependent())
     report_fatal_error("Unable to lowerBlockAddress");
 
   SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
@@ -357,26 +405,6 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
   }
 }
 
-SDValue RISCVTargetLowering::lowerExternalSymbol(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  EVT Ty = Op.getValueType();
-  ExternalSymbolSDNode *N = cast<ExternalSymbolSDNode>(Op);
-  const char *Sym = N->getSymbol();
-
-  // TODO: should also handle gp-relative loads.
-
-  if (isPositionIndependent() || Subtarget.is64Bit())
-    report_fatal_error("Unable to lowerExternalSymbol");
-
-  SDValue GAHi = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_HI);
-  SDValue GALo = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_LO);
-  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
-  SDValue MNLo =
-    SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
-  return MNLo;
-}
-
 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue CondV = Op.getOperand(0);
   SDValue TrueV = Op.getOperand(1);
@@ -432,7 +460,7 @@ SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
                       MachinePointerInfo(SV));
 }
 
-SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op,
+SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
                                             SelectionDAG &DAG) const {
   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
   MachineFunction &MF = DAG.getMachineFunction();
@@ -455,7 +483,7 @@ SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op,
   return FrameAddr;
 }
 
-SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
+SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
                                              SelectionDAG &DAG) const {
   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
   MachineFunction &MF = DAG.getMachineFunction();
@@ -472,7 +500,7 @@ SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   if (Depth) {
     int Off = -XLenInBytes;
-    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
     SDValue Offset = DAG.getConstant(Off, DL, VT);
     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
@@ -485,6 +513,84 @@ SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op,
   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
 }
 
+// Return true if the given node is a shift with a non-constant shift amount.
+static bool isVariableShift(SDValue Val) {
+  switch (Val.getOpcode()) {
+  default:
+    return false;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    return Val.getOperand(1).getOpcode() != ISD::Constant;
+  }
+}
+
+// Returns true if the given node is an sdiv, udiv, or urem with non-constant
+// operands.
+static bool isVariableSDivUDivURem(SDValue Val) {
+  switch (Val.getOpcode()) {
+  default:
+    return false;
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::UREM:
+    return Val.getOperand(0).getOpcode() != ISD::Constant &&
+           Val.getOperand(1).getOpcode() != ISD::Constant;
+  }
+}
+
+SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
+                                               DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA: {
+    assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
+    if (!DCI.isBeforeLegalize())
+      break;
+    SDValue RHS = N->getOperand(1);
+    if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
+        (RHS->getOpcode() == ISD::AssertZext &&
+         cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
+      break;
+    SDValue LHS = N->getOperand(0);
+    SDLoc DL(N);
+    SDValue NewRHS =
+        DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
+                    DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
+    return DCI.CombineTo(
+        N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
+  }
+  case ISD::ANY_EXTEND: {
+    // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
+    // then instead sign-extend in order to increase the chance of being able
+    // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
+    SDValue Src = N->getOperand(0);
+    if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
+      break;
+    if (!isVariableShift(Src) &&
+        !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
+      break;
+    SDLoc DL(N);
+    return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));
+  }
+  case RISCVISD::SplitF64: {
+    // If the input to SplitF64 is just BuildPairF64 then the operation is
+    // redundant. Instead, use BuildPairF64's operands directly.
+    SDValue Op0 = N->getOperand(0);
+    if (Op0->getOpcode() != RISCVISD::BuildPairF64)
+      break;
+    return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+  }
+  }
+
+  return SDValue();
+}
+
 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
                                              MachineBasicBlock *BB) {
   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
@@ -807,10 +913,14 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
 
   if (Reg) {
     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  } else {
-    State.addLoc(
-        CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+    return false;
   }
+
+  if (ValVT == MVT::f32) {
+    LocVT = MVT::f32;
+    LocInfo = CCValAssign::Full;
+  }
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
   return false;
 }
 
@@ -859,6 +969,22 @@ void RISCVTargetLowering::analyzeOutputArgs(
   }
 }
 
+// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
+// values.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
+                                   const CCValAssign &VA, const SDLoc &DL) {
+  switch (VA.getLocInfo()) {
+  default:
+    llvm_unreachable("Unexpected CCValAssign::LocInfo");
+  case CCValAssign::Full:
+    break;
+  case CCValAssign::BCvt:
+    Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+    break;
+  }
+  return Val;
+}
+
 // The caller is responsible for loading the full value if the argument is
 // passed with CCValAssign::Indirect.
 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
@@ -866,21 +992,29 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   EVT LocVT = VA.getLocVT();
-  EVT ValVT = VA.getValVT();
   SDValue Val;
 
   unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
   RegInfo.addLiveIn(VA.getLocReg(), VReg);
   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
 
+  if (VA.getLocInfo() == CCValAssign::Indirect)
+    return Val;
+
+  return convertLocVTToValVT(DAG, Val, VA, DL);
+}
+
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
+                                   const CCValAssign &VA, const SDLoc &DL) {
+  EVT LocVT = VA.getLocVT();
+
   switch (VA.getLocInfo()) {
   default:
     llvm_unreachable("Unexpected CCValAssign::LocInfo");
   case CCValAssign::Full:
-  case CCValAssign::Indirect:
     break;
   case CCValAssign::BCvt:
-    Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+    Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
     break;
   }
   return Val;
@@ -995,7 +1129,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-    assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
     SDValue ArgValue;
     // Passing f64 on RV32D with a soft float ABI must be handled as a special
     // case.
@@ -1282,13 +1415,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
     // Promote the value if needed.
     // For now, only handle fully promoted and indirect arguments.
-    switch (VA.getLocInfo()) {
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::BCvt:
-      ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), ArgValue);
-      break;
-    case CCValAssign::Indirect: {
+    if (VA.getLocInfo() == CCValAssign::Indirect) {
       // Store the argument in a stack slot and pass its address.
       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -1310,10 +1437,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
         ++i;
       }
       ArgValue = SpillSlot;
-      break;
-    }
-    default:
-      llvm_unreachable("Unknown loc info!");
+    } else {
+      ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
     }
 
     // Use local copy if it is a byval arg.
@@ -1415,6 +1540,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
     // Glue the RetValue to the end of the call sequence
     Chain = RetValue.getValue(1);
     Glue = RetValue.getValue(2);
+
     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
       SDValue RetValue2 =
@@ -1425,15 +1551,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
                              RetValue2);
     }
 
-    switch (VA.getLocInfo()) {
-    default:
-      llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::BCvt:
-      RetValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), RetValue);
-      break;
-    }
+    RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
 
     InVals.push_back(RetValue);
   }
@@ -1456,22 +1574,6 @@ bool RISCVTargetLowering::CanLowerReturn(
   return true;
 }
 
-static SDValue packIntoRegLoc(SelectionDAG &DAG, SDValue Val,
-                              const CCValAssign &VA, const SDLoc &DL) {
-  EVT LocVT = VA.getLocVT();
-
-  switch (VA.getLocInfo()) {
-  default:
-    llvm_unreachable("Unexpected CCValAssign::LocInfo");
-  case CCValAssign::Full:
-    break;
-  case CCValAssign::BCvt:
-    Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
-    break;
-  }
-  return Val;
-}
-
 SDValue
 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                  bool IsVarArg,
@@ -1514,7 +1616,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
     } else {
       // Handle a 'normal' return.
-      Val = packIntoRegLoc(DAG, Val, VA, DL);
+      Val = convertValVTToLocVT(DAG, Val, VA, DL);
       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
 
       // Guarantee that all emitted copies are stuck together.
@@ -1616,3 +1718,83 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
     return Builder.CreateFence(AtomicOrdering::Acquire);
   return nullptr;
 }
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+  if (Size == 8 || Size == 16)
+    return AtomicExpansionKind::MaskedIntrinsic;
+  return AtomicExpansionKind::None;
+}
+
+static Intrinsic::ID
+getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  case AtomicRMWInst::Xchg:
+    return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+  case AtomicRMWInst::Add:
+    return Intrinsic::riscv_masked_atomicrmw_add_i32;
+  case AtomicRMWInst::Sub:
+    return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+  case AtomicRMWInst::Nand:
+    return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+  case AtomicRMWInst::Max:
+    return Intrinsic::riscv_masked_atomicrmw_max_i32;
+  case AtomicRMWInst::Min:
+    return Intrinsic::riscv_masked_atomicrmw_min_i32;
+  case AtomicRMWInst::UMax:
+    return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+  case AtomicRMWInst::UMin:
+    return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+  }
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
+    IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+    Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+  Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+  Type *Tys[] = {AlignedAddr->getType()};
+  Function *LrwOpScwLoop = Intrinsic::getDeclaration(
+      AI->getModule(),
+      getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+
+  // Must pass the shift amount needed to sign extend the loaded value prior
+  // to performing a signed comparison for min/max. ShiftAmt is the number of
+  // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
+  // is the number of bits to left+right shift the value in order to
+  // sign-extend.
+  if (AI->getOperation() == AtomicRMWInst::Min ||
+      AI->getOperation() == AtomicRMWInst::Max) {
+    const DataLayout &DL = AI->getModule()->getDataLayout();
+    unsigned ValWidth =
+        DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+    Value *SextShamt = Builder.CreateSub(
+        Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
+    return Builder.CreateCall(LrwOpScwLoop,
+                              {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+  }
+
+  return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+}
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
+    AtomicCmpXchgInst *CI) const {
+  unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
+  if (Size == 8 || Size == 16)
+    return AtomicExpansionKind::MaskedIntrinsic;
+  return AtomicExpansionKind::None;
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
+    IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
+    Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+  Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
+  Type *Tys[] = {AlignedAddr->getType()};
+  Function *MaskedCmpXchg = Intrinsic::getDeclaration(
+      CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
+  return Builder.CreateCall(MaskedCmpXchg,
+                            {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 280adb29fd02..6970900bb062 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -43,6 +43,9 @@ public:
   explicit RISCVTargetLowering(const TargetMachine &TM,
                                const RISCVSubtarget &STI);
 
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+                          MachineFunction &MF,
+                          unsigned Intrinsic) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS,
                              Instruction *I = nullptr) const override;
@@ -51,10 +54,13 @@ public:
   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
+  bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
 
   // Provide custom lowering hooks for some operations.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
+  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
   // This method returns the name of a target specific DAG node.
   const char *getTargetNodeName(unsigned Opcode) const override;
 
@@ -107,15 +113,27 @@ private:
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-  SDValue lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
 
   bool IsEligibleForTailCallOptimization(CCState &CCInfo,
     CallLoweringInfo &CLI, MachineFunction &MF,
     const SmallVector<CCValAssign, 16> &ArgLocs) const;
+
+  TargetLowering::AtomicExpansionKind
+  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+  virtual Value *emitMaskedAtomicRMWIntrinsic(
+      IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+      Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
+  TargetLowering::AtomicExpansionKind
+  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
+  virtual Value *
+  emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> &Builder, AtomicCmpXchgInst *CI,
+                                   Value *AlignedAddr, Value *CmpVal,
+                                   Value *NewVal, Value *Mask,
+                                   AtomicOrdering Ord) const override;
 };
 }
 
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 529e048045c6..ebd676a6056e 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -45,11 +45,12 @@ def InstFormatCSS    : InstFormat<10>;
 def InstFormatCIW    : InstFormat<11>;
 def InstFormatCL     : InstFormat<12>;
 def InstFormatCS     : InstFormat<13>;
-def InstFormatCB     : InstFormat<14>;
-def InstFormatCJ     : InstFormat<15>;
-def InstFormatOther  : InstFormat<16>;
+def InstFormatCA     : InstFormat<14>;
+def InstFormatCB     : InstFormat<15>;
+def InstFormatCJ     : InstFormat<16>;
+def InstFormatOther  : InstFormat<17>;
 
-// The following opcode names and match those given in Table 19.1 in the
+// The following opcode names match those given in Table 19.1 in the
 // RISC-V User-level ISA specification ("RISC-V base opcode map").
 class RISCVOpcode<bits<7> val> {
   bits<7> Value = val;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 6abcbd7cc8a1..bda8bbb558eb 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -118,6 +118,19 @@ class RVInst16CS<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
   let Inst{1-0} = opcode;
 }
 
+class RVInst16CA<bits<6> funct6, bits<2> funct2, bits<2> opcode, dag outs,
+                 dag ins, string opcodestr, string argstr>
+    : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCA> {
+  bits<3> rs2;
+  bits<3> rs1;
+
+  let Inst{15-10} = funct6;
+  let Inst{9-7} = rs1;
+  let Inst{6-5} = funct2;
+  let Inst{4-2} = rs2;
+  let Inst{1-0} = opcode;
+}
+
 class RVInst16CB<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
                  string opcodestr, string argstr>
     : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCB> {
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 327e4a7d615f..76c74368ca11 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -362,9 +362,8 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
   MachineFunction *MF = MBB.getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
-  const auto &STI = MF->getSubtarget<RISCVSubtarget>();
 
-  if (TM.isPositionIndependent() || STI.is64Bit())
+  if (TM.isPositionIndependent())
     report_fatal_error("Unable to insert indirect branch");
 
   if (!isInt<32>(BrOffset))
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b51e4e70330d..d7cc13d4fabd 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -125,11 +125,6 @@ def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
   }];
 }
 
-def uimm12 : Operand<XLenVT> {
-  let ParserMatchClass = UImmAsmOperand<12>;
-  let DecoderMethod = "decodeUImmOperand<12>";
-}
-
 // A 13-bit signed immediate where the least significant bit is zero.
 def simm13_lsb0 : Operand<OtherVT> {
   let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
@@ -143,8 +138,7 @@ def simm13_lsb0 : Operand<OtherVT> {
   }];
 }
 
-def uimm20 : Operand<XLenVT> {
-  let ParserMatchClass = UImmAsmOperand<20>;
+class UImm20Operand : Operand<XLenVT> {
   let EncoderMethod = "getImmOpValue";
   let DecoderMethod = "decodeUImmOperand<20>";
   let MCOperandPredicate = [{
@@ -155,9 +149,20 @@ def uimm20 : Operand<XLenVT> {
   }];
 }
 
+def uimm20_lui : UImm20Operand {
+  let ParserMatchClass = UImmAsmOperand<20, "LUI">;
+}
+def uimm20_auipc : UImm20Operand {
+  let ParserMatchClass = UImmAsmOperand<20, "AUIPC">;
+}
+
+def Simm21Lsb0JALAsmOperand : SImmAsmOperand<21, "Lsb0JAL"> {
+  let ParserMethod = "parseJALOffset";
+}
+
 // A 21-bit signed immediate where the least significant bit is zero.
-def simm21_lsb0 : Operand<OtherVT> {
-  let ParserMatchClass = SImmAsmOperand<21, "Lsb0">;
+def simm21_lsb0_jal : Operand<OtherVT> {
+  let ParserMatchClass = Simm21Lsb0JALAsmOperand;
   let EncoderMethod = "getImmOpValueAsr1";
   let DecoderMethod = "decodeSImmOperandAndLsl1<21>";
   let MCOperandPredicate = [{
@@ -172,24 +177,42 @@ def BareSymbol : AsmOperandClass {
   let Name = "BareSymbol";
   let RenderMethod = "addImmOperands";
   let DiagnosticType = "InvalidBareSymbol";
+  let ParserMethod = "parseBareSymbol";
 }
 
 // A bare symbol.
 def bare_symbol : Operand<XLenVT> {
   let ParserMatchClass = BareSymbol;
-  let MCOperandPredicate = [{
-     return MCOp.isBareSymbolRef();
-  }];
+}
+
+def CSRSystemRegister : AsmOperandClass {
+  let Name = "CSRSystemRegister";
+  let ParserMethod = "parseCSRSystemRegister";
+  let DiagnosticType = "InvalidCSRSystemRegister";
+}
+
+def csr_sysreg : Operand<XLenVT> {
+  let ParserMatchClass = CSRSystemRegister;
+  let PrintMethod = "printCSRSystemRegister";
+  let DecoderMethod = "decodeUImmOperand<12>";
 }
 
 // A parameterized register class alternative to i32imm/i64imm from Target.td.
-def ixlenimm : Operand<XLenVT> {
-  let ParserMatchClass = ImmXLenAsmOperand<"">;
+def ixlenimm : Operand<XLenVT>;
+
+def ixlenimm_li : Operand<XLenVT> {
+  let ParserMatchClass = ImmXLenAsmOperand<"", "LI">;
 }
 
 // Standalone (codegen-only) immleaf patterns.
 def simm32     : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
 def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
+// A mask value that won't affect significant shift bits.
+def immbottomxlenset : ImmLeaf<XLenVT, [{
+  if (Subtarget->is64Bit())
+    return countTrailingOnes<uint64_t>(Imm) >= 6;
+  return countTrailingOnes<uint64_t>(Imm) >= 5;
+}]>;
 
 // Addressing modes.
 // Necessary because a frameindex can't be matched directly in a pattern.
@@ -255,13 +278,13 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
 
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
 class CSR_ir<bits<3> funct3, string opcodestr>
-    : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins uimm12:$imm12, GPR:$rs1),
+    : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1),
               opcodestr, "$rd, $imm12, $rs1">;
 
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
 class CSR_ii<bits<3> funct3, string opcodestr>
     : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd),
-              (ins uimm12:$imm12, uimm5:$rs1),
+              (ins csr_sysreg:$imm12, uimm5:$rs1),
               opcodestr, "$rd, $imm12, $rs1">;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -285,14 +308,14 @@ class Priv<string opcodestr, bits<7> funct7>
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 0, isReMaterializable = 1, mayLoad = 0, mayStore = 0 in {
-def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20:$imm20),
+def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
                   "lui", "$rd, $imm20">;
 
-def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20:$imm20),
+def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20),
                     "auipc", "$rd, $imm20">;
 
 let isCall = 1 in
-def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0:$imm20),
+def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
                   "jal", "$rd, $imm20">;
 
 let isCall = 1 in
@@ -379,6 +402,15 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
   let rd = 0;
   let imm12 = 1;
 }
+
+// This is a de facto standard (as set by GNU binutils) 32-bit unimplemented
+// instruction (i.e., it should always trap, if your implementation has invalid
+// instruction traps).
+def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> {
+  let rs1 = 0;
+  let rd = 0;
+  let imm12 = 0b110000000000;
+}
 } // hasSideEffects = 1, mayLoad = 0, mayStore = 0
 
 def CSRRW : CSR_ir<0b001, "csrrw">;
@@ -467,7 +499,7 @@ def : InstAlias<"nop",           (ADDI      X0,      X0,       0)>;
 // expanded to real instructions immediately.
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 32,
     isCodeGenOnly = 0, isAsmParserOnly = 1 in
-def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm:$imm), [],
+def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm_li:$imm), [],
                       "li", "$rd, $imm">;
 
 def : InstAlias<"mv $rd, $rs",   (ADDI GPR:$rd, GPR:$rs,       0)>;
@@ -516,8 +548,8 @@ def : InstAlias<"bleu $rs, $rt, $offset",
                 (BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>;
 
 // "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction.
-def : InstAlias<"j $offset",   (JAL  X0, simm21_lsb0:$offset)>;
-def : InstAlias<"jal $offset", (JAL  X1, simm21_lsb0:$offset)>;
+def : InstAlias<"j $offset",   (JAL  X0, simm21_lsb0_jal:$offset)>;
+def : InstAlias<"jal $offset", (JAL  X1, simm21_lsb0_jal:$offset)>;
 def : InstAlias<"jr $rs",      (JALR X0, GPR:$rs, 0)>;
 def : InstAlias<"jalr $rs",    (JALR X1, GPR:$rs, 0)>;
 def : InstAlias<"ret",         (JALR X0,      X1, 0), 2>;
@@ -538,18 +570,67 @@ def : InstAlias<"rdcycleh $rd",   (CSRRS GPR:$rd, 0xC80, X0)>;
 def : InstAlias<"rdtimeh $rd",    (CSRRS GPR:$rd, 0xC81, X0)>;
 } // Predicates = [IsRV32]
 
-def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, uimm12:$csr,      X0)>;
-def : InstAlias<"csrw $csr, $rs", (CSRRW      X0, uimm12:$csr, GPR:$rs)>;
-def : InstAlias<"csrs $csr, $rs", (CSRRS      X0, uimm12:$csr, GPR:$rs)>;
-def : InstAlias<"csrc $csr, $rs", (CSRRC      X0, uimm12:$csr, GPR:$rs)>;
+def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr,      X0)>;
+def : InstAlias<"csrw $csr, $rs", (CSRRW      X0, csr_sysreg:$csr, GPR:$rs)>;
+def : InstAlias<"csrs $csr, $rs", (CSRRS      X0, csr_sysreg:$csr, GPR:$rs)>;
+def : InstAlias<"csrc $csr, $rs", (CSRRC      X0, csr_sysreg:$csr, GPR:$rs)>;
 
-def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, uimm12:$csr, uimm5:$imm)>;
-def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, uimm12:$csr, uimm5:$imm)>;
-def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, uimm12:$csr, uimm5:$imm)>;
+def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, csr_sysreg:$csr, uimm5:$imm)>;
+
+let EmitPriority = 0 in {
+def : InstAlias<"csrw $csr, $imm", (CSRRWI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrs $csr, $imm", (CSRRSI X0, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrc $csr, $imm", (CSRRCI X0, csr_sysreg:$csr, uimm5:$imm)>;
+
+def : InstAlias<"csrrw $rd, $csr, $imm", (CSRRWI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrrs $rd, $csr, $imm", (CSRRSI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>;
+}
 
 def : InstAlias<"sfence.vma",     (SFENCE_VMA      X0, X0)>;
 def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
 
+let EmitPriority = 0 in {
+def : InstAlias<"add $rd, $rs1, $imm12",
+                (ADDI  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"and $rd, $rs1, $imm12",
+                (ANDI  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"xor $rd, $rs1, $imm12",
+                (XORI  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"or $rd, $rs1, $imm12",
+                (ORI  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sll $rd, $rs1, $shamt",
+                (SLLI  GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : InstAlias<"srl $rd, $rs1, $shamt",
+                (SRLI  GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : InstAlias<"sra $rd, $rs1, $shamt",
+                (SRAI  GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
+let Predicates = [IsRV64] in {
+def : InstAlias<"addw $rd, $rs1, $imm12",
+                (ADDIW  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sllw $rd, $rs1, $shamt",
+                (SLLIW  GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+def : InstAlias<"srlw $rd, $rs1, $shamt",
+                (SRLIW  GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+def : InstAlias<"sraw $rd, $rs1, $shamt",
+                (SRAIW  GPR:$rd, GPR:$rs1, uimm5:$shamt)>;
+} // Predicates = [IsRV64]
+def : InstAlias<"slt $rd, $rs1, $imm12",
+                (SLTI  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+def : InstAlias<"sltu $rd, $rs1, $imm12",
+                (SLTIU  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
+}
+
+def : MnemonicAlias<"move", "mv">;
+
+// The SCALL and SBREAK instructions wererenamed to ECALL and EBREAK in
+// version 2.1 of the user-level ISA. Like the GNU toolchain, we still accept
+// the old name for backwards compatibility.
+def : MnemonicAlias<"scall", "ecall">;
+def : MnemonicAlias<"sbreak", "ebreak">;
+
 //===----------------------------------------------------------------------===//
 // Pseudo-instructions and codegen patterns
 //
@@ -560,7 +641,7 @@ def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
 
 /// Generic pattern classes
 
-class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst>
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
     : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
 class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
     : Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>;
@@ -573,12 +654,37 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
 def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
   return isOrEquivalentToAdd(N);
 }]>;
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def sexti32 : PatFrags<(ops node:$src),
+                       [(sext_inreg node:$src, i32),
+                        (assertsexti32 node:$src)]>;
+def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
+}]>;
+def zexti32 : PatFrags<(ops node:$src),
+                       [(and node:$src, 0xffffffff),
+                        (assertzexti32 node:$src)]>;
+// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
+// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
+// upper 32 bits.
+def shiftwamt_mask : ImmLeaf<XLenVT, [{
+  return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
+}]>;
+def shiftwamt : PatFrags<(ops node:$src),
+                         [(assertzexti5 (and node:$src, shiftwamt_mask)),
+                          (assertzexti5 node:$src)]>;
 
 /// Immediates
 
 def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>;
 def : Pat<(simm32hi20:$imm), (LUI (HI20 imm:$imm))>;
-def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>;
+def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>,
+      Requires<[IsRV32]>;
 
 /// Simple arithmetic operations
 
@@ -591,13 +697,23 @@ def : PatGprGpr<and, AND>;
 def : PatGprSimm12<and, ANDI>;
 def : PatGprGpr<xor, XOR>;
 def : PatGprSimm12<xor, XORI>;
-def : PatGprGpr<shl, SLL>;
 def : PatGprUimmLog2XLen<shl, SLLI>;
-def : PatGprGpr<srl, SRL>;
 def : PatGprUimmLog2XLen<srl, SRLI>;
-def : PatGprGpr<sra, SRA>;
 def : PatGprUimmLog2XLen<sra, SRAI>;
 
+// Match both a plain shift and one where the shift amount is masked (this is
+// typically introduced when the legalizer promotes the shift amount and
+// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
+// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
+class shiftop<SDPatternOperator operator>
+    : PatFrags<(ops node:$val, node:$count),
+               [(operator node:$val, node:$count),
+                (operator node:$val, (and node:$count, immbottomxlenset))]>;
+
+def : PatGprGpr<shiftop<shl>, SLL>;
+def : PatGprGpr<shiftop<srl>, SRL>;
+def : PatGprGpr<shiftop<sra>, SRA>;
+
 /// FrameIndex calculations
 
 def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
@@ -614,7 +730,9 @@ def : PatGprSimm12<setult, SLTIU>;
 
 // Define pattern expansions for setcc operations that aren't directly
 // handled by a RISC-V instruction.
+def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
 def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
 def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
 def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
 def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
@@ -638,7 +756,7 @@ def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
 // Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch
 // instruction.
 class BccPat<PatFrag CondOp, RVInstB Inst>
-    : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+    : Pat<(brcond (XLenVT (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
           (Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>;
 
 def : BccPat<seteq, BEQ>;
@@ -649,7 +767,7 @@ def : BccPat<setult, BLTU>;
 def : BccPat<setuge, BGEU>;
 
 class BccSwapPat<PatFrag CondOp, RVInst InstBcc>
-    : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
+    : Pat<(brcond (XLenVT (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12),
           (InstBcc GPR:$rs2, GPR:$rs1, bb:$imm12)>;
 
 // Condition codes that don't have matching RISC-V branch instructions, but
@@ -664,8 +782,8 @@ def : BccSwapPat<setule, BGEU>;
 def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>;
 
 let isBarrier = 1, isBranch = 1, isTerminator = 1 in
-def PseudoBR : Pseudo<(outs), (ins simm21_lsb0:$imm20), [(br bb:$imm20)]>,
-               PseudoInstExpansion<(JAL X0, simm21_lsb0:$imm20)>;
+def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
+               PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
 
 let isCall = 1, Defs=[X1] in
 let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
@@ -720,6 +838,11 @@ def : Pat<(Tail (iPTR tglobaladdr:$dst)),
 def : Pat<(Tail (iPTR texternalsym:$dst)),
           (PseudoTAIL texternalsym:$dst)>;
 
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1 in
+def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                       "lla", "$dst, $src">;
+
 /// Loads
 
 multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
@@ -737,7 +860,7 @@ defm : LdPat<sextloadi8, LB>;
 defm : LdPat<extloadi8, LB>;
 defm : LdPat<sextloadi16, LH>;
 defm : LdPat<extloadi16, LH>;
-defm : LdPat<load, LW>;
+defm : LdPat<load, LW>, Requires<[IsRV32]>;
 defm : LdPat<zextloadi8, LBU>;
 defm : LdPat<zextloadi16, LHU>;
 
@@ -756,7 +879,7 @@ multiclass StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
 
 defm : StPat<truncstorei8, SB, GPR>;
 defm : StPat<truncstorei16, SH, GPR>;
-defm : StPat<store, SW, GPR>;
+defm : StPat<store, SW, GPR>, Requires<[IsRV32]>;
 
 /// Fences
 
@@ -764,13 +887,13 @@ defm : StPat<store, SW, GPR>;
 // Manual: Volume I.
 
 // fence acquire -> fence r, rw
-def : Pat<(atomic_fence (i32 4), (imm)), (FENCE 0b10, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 4), (imm)), (FENCE 0b10, 0b11)>;
 // fence release -> fence rw, w
-def : Pat<(atomic_fence (i32 5), (imm)), (FENCE 0b11, 0b1)>;
+def : Pat<(atomic_fence (XLenVT 5), (imm)), (FENCE 0b11, 0b1)>;
 // fence acq_rel -> fence.tso
-def : Pat<(atomic_fence (i32 6), (imm)), (FENCE_TSO)>;
+def : Pat<(atomic_fence (XLenVT 6), (imm)), (FENCE_TSO)>;
 // fence seq_cst -> fence rw, rw
-def : Pat<(atomic_fence (i32 7), (imm)), (FENCE 0b11, 0b11)>;
+def : Pat<(atomic_fence (XLenVT 7), (imm)), (FENCE 0b11, 0b11)>;
 
 // Lowering for atomic load and store is defined in RISCVInstrInfoA.td.
 // Although these are lowered to fence+load/store instructions defined in the
@@ -788,6 +911,66 @@ def ADJCALLSTACKUP   : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
                               [(CallSeqEnd timm:$amt1, timm:$amt2)]>;
 } // Defs = [X2], Uses = [X2]
 
+/// RV64 patterns
+
+let Predicates = [IsRV64] in {
+
+/// sext and zext
+
+def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>;
+def : Pat<(and GPR:$rs1, 0xffffffff), (SRLI (SLLI GPR:$rs1, 32), 32)>;
+
+/// ALU operations
+
+def : Pat<(sext_inreg (add GPR:$rs1, GPR:$rs2), i32),
+          (ADDW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (add GPR:$rs1, simm12:$imm12), i32),
+          (ADDIW GPR:$rs1, simm12:$imm12)>;
+def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
+          (SUBW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
+          (SLLIW GPR:$rs1, uimm5:$shamt)>;
+// (srl (zexti32 ...), uimm5:$shamt) is matched with custom code due to the
+// need to undo manipulation of the mask value performed by DAGCombine.
+def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+          (SRAIW GPR:$rs1, uimm5:$shamt)>;
+
+// For variable-length shifts, we rely on assertzexti5 being inserted during
+// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
+// guarantee that selecting a 32-bit variable shift is legal (as the variable
+// shift is known to be <= 32). We must also be careful not to create
+// semantically incorrect patterns. For instance, selecting SRLW for
+// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
+// the result are used or not (in the case where rs2=0, this is a
+// sign-extension operation).
+
+def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
+          (SLLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
+          (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
+          (SRLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
+          (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
+          (SRAW GPR:$rs1, GPR:$rs2)>;
+
+/// Loads
+
+defm : LdPat<sextloadi32, LW>;
+defm : LdPat<extloadi32, LW>;
+defm : LdPat<zextloadi32, LWU>;
+defm : LdPat<load, LD>;
+
+/// Stores
+
+defm : StPat<truncstorei32, SW, GPR>;
+defm : StPat<store, SD, GPR>;
+} // Predicates = [IsRV64]
+
 //===----------------------------------------------------------------------===//
 // Standard extensions
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 379322060438..9cb1d2f0b627 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -44,6 +44,17 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
   def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
 }
 
+multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
+  def : Pat<(StoreOp GPR:$rs1, StTy:$rs2), (Inst StTy:$rs2, GPR:$rs1, 0)>;
+  def : Pat<(StoreOp AddrFI:$rs1, StTy:$rs2), (Inst StTy:$rs2, AddrFI:$rs1, 0)>;
+  def : Pat<(StoreOp (add GPR:$rs1, simm12:$imm12), StTy:$rs2),
+            (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
+  def : Pat<(StoreOp (add AddrFI:$rs1, simm12:$imm12), StTy:$rs2),
+            (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+  def : Pat<(StoreOp (IsOrAdd AddrFI:$rs1, simm12:$imm12), StTy:$rs2),
+            (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -91,7 +102,177 @@ defm : LdPat<atomic_load_8,  LB>;
 defm : LdPat<atomic_load_16, LH>;
 defm : LdPat<atomic_load_32, LW>;
 
-defm : StPat<atomic_store_8,  SB, GPR>;
-defm : StPat<atomic_store_16,  SH, GPR>;
-defm : StPat<atomic_store_32, SW, GPR>;
-} // Predicates = [HasStdExtF]
+defm : AtomicStPat<atomic_store_8,  SB, GPR>;
+defm : AtomicStPat<atomic_store_16, SH, GPR>;
+defm : AtomicStPat<atomic_store_32, SW, GPR>;
+
+/// AMOs
+
+multiclass AMOPat<string AtomicOp, string BaseInst> {
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+                  !cast<RVInst>(BaseInst)>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+                  !cast<RVInst>(BaseInst#"_AQ")>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+                  !cast<RVInst>(BaseInst#"_RL")>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+                  !cast<RVInst>(BaseInst#"_AQ_RL")>;
+  def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+                  !cast<RVInst>(BaseInst#"_AQ_RL")>;
+}
+
+defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
+defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
+defm : AMOPat<"atomic_load_and_32", "AMOAND_W">;
+defm : AMOPat<"atomic_load_or_32", "AMOOR_W">;
+defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">;
+defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">;
+defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
+defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
+defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
+
+def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
+          (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
+          (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
+          (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
+          (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
+          (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// Pseudo AMOs
+
+class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
+                         (ins GPR:$addr, GPR:$incr, ixlenimm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
+class PseudoMaskedAMO
+    : Pseudo<(outs GPR:$res, GPR:$scratch),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOMinMax
+    : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$sextshamt,
+              ixlenimm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+                    "@earlyclobber $scratch2";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOUMinUMax
+    : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+                    "@earlyclobber $scratch2";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
+    : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
+          (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+
+class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
+    : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+           imm:$ordering),
+          (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+           imm:$ordering)>;
+
+def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
+                         PseudoMaskedAtomicSwap32>;
+def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
+                         PseudoMaskedAtomicLoadAdd32>;
+def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
+                         PseudoMaskedAtomicLoadSub32>;
+def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
+                         PseudoMaskedAtomicLoadNand32>;
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
+                               PseudoMaskedAtomicLoadMax32>;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
+                               PseudoMaskedAtomicLoadMin32>;
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
+                         PseudoMaskedAtomicLoadUMax32>;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
+                         PseudoMaskedAtomicLoadUMin32>;
+
+/// Compare and exchange
+
+class PseudoCmpXchg
+    : Pseudo<(outs GPR:$res, GPR:$scratch),
+             (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst> {
+  def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new),
+            (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+  def : Pat<(!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new),
+            (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+  def : Pat<(!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new),
+            (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+  def : Pat<(!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new),
+            (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+  def : Pat<(!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new),
+            (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
+def PseudoCmpXchg32 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
+
+def PseudoMaskedCmpXchg32
+    : Pseudo<(outs GPR:$res, GPR:$scratch),
+             (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
+              i32imm:$ordering), []> {
+  let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+def : Pat<(int_riscv_masked_cmpxchg_i32
+            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+          (PseudoMaskedCmpXchg32
+            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+
+} // Predicates = [HasStdExtA]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 5d1c62c0b653..ad68b5a7dc97 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -167,7 +167,7 @@ def uimm10_lsb00nonzero : Operand<XLenVT>,
                           [{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
   let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
   let EncoderMethod = "getImmOpValue";
-  let DecoderMethod = "decodeUImmOperand<10>";
+  let DecoderMethod = "decodeUImmNonZeroOperand<10>";
   let MCOperandPredicate = [{
     int64_t Imm;
     if (!MCOp.evaluateAsConstantImm(Imm))
@@ -182,12 +182,12 @@ def simm10_lsb0000nonzero : Operand<XLenVT>,
                             [{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
   let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
   let EncoderMethod = "getImmOpValue";
-  let DecoderMethod = "decodeSImmOperand<10>";
+  let DecoderMethod = "decodeSImmNonZeroOperand<10>";
   let MCOperandPredicate = [{
     int64_t Imm;
     if (!MCOp.evaluateAsConstantImm(Imm))
       return false;
-    return isShiftedInt<6, 4>(Imm);
+    return isShiftedInt<6, 4>(Imm) && (Imm != 0);
   }];
 }
 
@@ -258,16 +258,13 @@ class Shift_right<bits<2> funct2, string OpcodeStr, RegisterClass cls,
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class CS_ALU<bits<2> funct2, string OpcodeStr, RegisterClass cls,
-             bit RV64only>
-    : RVInst16CS<0b100, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
+class CS_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr,
+             RegisterClass cls>
+    : RVInst16CA<funct6, funct2, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
                  OpcodeStr, "$rd, $rs2"> {
   bits<3> rd;
   let Constraints = "$rd = $rd_wb";
-  let Inst{12} = RV64only;
-  let Inst{11-10} = 0b11;
   let Inst{9-7} = rd;
-  let Inst{6-5} = funct2;
 }
 
 //===----------------------------------------------------------------------===//
@@ -411,14 +408,14 @@ def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:
   let Inst{6-2} = imm{4-0};
 }
 
-def C_SUB  : CS_ALU<0b00, "c.sub", GPRC, 0>;
-def C_XOR  : CS_ALU<0b01, "c.xor", GPRC, 0>;
-def C_OR   : CS_ALU<0b10, "c.or" , GPRC, 0>;
-def C_AND  : CS_ALU<0b11, "c.and", GPRC, 0>;
+def C_SUB  : CS_ALU<0b100011, 0b00, "c.sub", GPRC>;
+def C_XOR  : CS_ALU<0b100011, 0b01, "c.xor", GPRC>;
+def C_OR   : CS_ALU<0b100011, 0b10, "c.or" , GPRC>;
+def C_AND  : CS_ALU<0b100011, 0b11, "c.and", GPRC>;
 
 let Predicates = [HasStdExtC, IsRV64] in {
-def C_SUBW : CS_ALU<0b00, "c.subw", GPRC, 1>;
-def C_ADDW : CS_ALU<0b01, "c.addw", GPRC, 1>;
+def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>;
+def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>;
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -478,7 +475,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
                       "c.mv", "$rs1, $rs2">;
 
-let rs1 = 0, rs2 = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
 def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
@@ -517,6 +514,13 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
   let Inst{9-7}   = imm{8-6};
 }
 
+// The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU
+// binutils as 16-bit instruction known to be unimplemented (i.e., trapping).
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
+  let Inst{15-0} = 0;
+}
+
 } // Predicates = [HasStdExtC]
 
 //===----------------------------------------------------------------------===//
@@ -625,6 +629,8 @@ def : CompressPat<(AND GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
 } //  Predicates = [HasStdExtC]
 
 let Predicates = [HasStdExtC, IsRV64] in {
+def : CompressPat<(ADDIW GPRNoX0:$rd, X0, simm6:$imm),
+                  (C_LI GPRNoX0:$rd, simm6:$imm)>;
 def : CompressPat<(SUBW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
                   (C_SUBW GPRC:$rs1, GPRC:$rs2)>;
 def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
@@ -678,6 +684,7 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, X0),
 def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs2, 0),
                   (C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>;
 def : CompressPat<(EBREAK), (C_EBREAK)>;
+def : CompressPat<(UNIMP), (C_UNIMP)>;
 def : CompressPat<(JALR X1, GPRNoX0:$rs1, 0),
                   (C_JALR GPRNoX0:$rs1)>;
 def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs1, GPRNoX0:$rs2),
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 06b834d55ade..9f1cd50de595 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -212,13 +212,8 @@ let Predicates = [HasStdExtD] in {
 def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>;
 def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
 
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
-
-// [u]int->fp
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
+// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
 
 /// Float arithmetic operations
 
@@ -235,6 +230,22 @@ def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
 def : PatFpr64Fpr64<fcopysign, FSGNJ_D>;
 def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
 
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
+          (FMADD_D $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
+          (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
+          (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
+          (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
 // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
 // canonical NaN when giving a signaling NaN. This doesn't match the LLVM
 // behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the
@@ -287,3 +298,13 @@ def SplitF64Pseudo
              [(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64:$src))]>;
 
 } // Predicates = [HasStdExtD]
+
+let Predicates = [HasStdExtD, IsRV32] in {
+// double->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+
+// [u]int->double.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
+} // Predicates = [HasStdExtD, IsRV32]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6d7c59becf24..03bdac45873d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -27,7 +27,7 @@ def FRMArg : AsmOperandClass {
 def frmarg : Operand<XLenVT> {
   let ParserMatchClass = FRMArg;
   let PrintMethod = "printFRMArg";
-  let DecoderMethod = "decodeUImmOperand<3>";
+  let DecoderMethod = "decodeFRMArg";
 }
 
 //===----------------------------------------------------------------------===//
@@ -252,13 +252,8 @@ let Predicates = [HasStdExtF] in {
 def : Pat<(bitconvert GPR:$rs1), (FMV_W_X GPR:$rs1)>;
 def : Pat<(bitconvert FPR32:$rs1), (FMV_X_W FPR32:$rs1)>;
 
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
-
-// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
 
 /// Float arithmetic operations
 
@@ -275,6 +270,22 @@ def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
 def : PatFpr32Fpr32<fcopysign, FSGNJ_S>;
 def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
 
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
+          (FMADD_S $rs1, $rs2, $rs3, 0b111)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
+          (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
+          (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
+          (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
 // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
 // canonical NaN when given a signaling NaN. This doesn't match the LLVM
 // behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the
@@ -313,3 +324,13 @@ defm : LdPat<load, FLW>;
 defm : StPat<store, FSW, FPR32>;
 
 } // Predicates = [HasStdExtF]
+
+let Predicates = [HasStdExtF, IsRV32] in {
+// float->[u]int. Round-to-zero must be used.
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+
+// [u]int->float. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV32]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 2dd10ada4003..05dd3311ad54 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -49,3 +49,34 @@ def : PatGprGpr<udiv, DIVU>;
 def : PatGprGpr<srem, REM>;
 def : PatGprGpr<urem, REMU>;
 } // Predicates = [HasStdExtM]
+
+let Predicates = [HasStdExtM, IsRV64] in {
+def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
+          (MULW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
+                            (sexti32 GPR:$rs2)), i32),
+          (DIVW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
+                         (sexti32 GPR:$rs2))),
+          (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
+def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
+          (DIVUW GPR:$rs1, GPR:$rs2)>;
+// It's cheaper to perform a divuw and zero-extend the result than to
+// zero-extend both inputs to a udiv.
+def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
+          (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+// Although the sexti32 operands may not have originated from an i32 srem,
+// this pattern is safe as it is impossible for two sign extended inputs to
+// produce a result where res[63:32]=0 and res[31]=1.
+def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
+          (REMW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
+                            (sexti32 GPR:$rs2)), i32),
+          (REMW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
+          (REMUW GPR:$rs1, GPR:$rs2)>;
+// It's cheaper to perform a remuw and zero-extend the result than to
+// zero-extend both inputs to a urem.
+def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
+          (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+} // Predicates = [HasStdExtM, IsRV64]
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index b8fa8a97d41a..cea009c5447d 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -62,7 +62,7 @@ private:
   MachineRegisterInfo *MRI;
   std::set<MachineInstr *> DeadInstrs;
 };
-}; // end anonymous namespace
+} // end anonymous namespace
 
 char RISCVMergeBaseOffsetOpt::ID = 0;
 INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset",
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td
new file mode 100644
index 000000000000..f1b7984ffe6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -0,0 +1,352 @@
+//===- RISCVSystemOperands.td ----------------------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the symbolic operands permitted for various kinds of
+// RISC-V system instruction.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/TableGen/SearchableTable.td"
+
+//===----------------------------------------------------------------------===//
+// CSR (control and status register read/write) instruction options.
+//===----------------------------------------------------------------------===//
+
+class SysReg<string name, bits<12> op> {
+  string Name = name;
+  bits<12> Encoding = op;
+  // FIXME: add these additional fields when needed.
+  // Privilege Access: Read and Write = 0, 1, 2; Read-Only = 3.
+  // Privilege Mode: User = 0, System = 1 or Machine = 3.
+  // bits<2> ReadWrite = op{11 - 10};
+  // bits<2> XMode = op{9 - 8};
+  // Check Extra field name and what bits 7-6 correspond to.
+  // bits<2> Extra = op{7 - 6};
+  // Register number without the privilege bits.
+  // bits<6> Number = op{5 - 0};
+  code FeaturesRequired = [{ {} }];
+  bit isRV32Only = 0;
+}
+
+def SysRegsList : GenericTable {
+  let FilterClass = "SysReg";
+  // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
+  let Fields = [ "Name", "Encoding", "FeaturesRequired", "isRV32Only" ];
+
+  let PrimaryKey = [ "Encoding" ];
+  let PrimaryKeyName = "lookupSysRegByEncoding";
+}
+
+def lookupSysRegByName : SearchIndex {
+  let Table = SysRegsList;
+  let Key = [ "Name" ];
+}
+
+// The following CSR encodings match those given in Tables 2.2,
+// 2.3, 2.4 and  2.5 in the RISC-V Instruction Set Manual
+// Volume II: Privileged Architecture.
+
+//===--------------------------
+// User Trap Setup
+//===--------------------------
+def : SysReg<"ustatus", 0x000>;
+def : SysReg<"uie", 0x004>;
+def : SysReg<"utvec", 0x005>;
+
+//===--------------------------
+// User Trap Handling
+//===--------------------------
+def : SysReg<"uscratch", 0x040>;
+def : SysReg<"uepc", 0x041>;
+def : SysReg<"ucause", 0x042>;
+def : SysReg<"utval", 0x043>;
+def : SysReg<"uip", 0x044>;
+
+//===--------------------------
+// User Floating-Point CSRs
+//===--------------------------
+
+let FeaturesRequired = [{ {RISCV::FeatureStdExtF} }] in {
+def : SysReg<"fflags", 0x001>;
+def : SysReg<"frm", 0x002>;
+def : SysReg<"fcsr", 0x003>;
+}
+
+//===--------------------------
+// User Counter/Timers
+//===--------------------------
+def : SysReg<"cycle", 0xC00>;
+def : SysReg<"time", 0xC01>;
+def : SysReg<"instret", 0xC02>;
+
+def : SysReg<"hpmcounter3", 0xC03>;
+def : SysReg<"hpmcounter4", 0xC04>;
+def : SysReg<"hpmcounter5", 0xC05>;
+def : SysReg<"hpmcounter6", 0xC06>;
+def : SysReg<"hpmcounter7", 0xC07>;
+def : SysReg<"hpmcounter8", 0xC08>;
+def : SysReg<"hpmcounter9", 0xC09>;
+def : SysReg<"hpmcounter10", 0xC0A>;
+def : SysReg<"hpmcounter11", 0xC0B>;
+def : SysReg<"hpmcounter12", 0xC0C>;
+def : SysReg<"hpmcounter13", 0xC0D>;
+def : SysReg<"hpmcounter14", 0xC0E>;
+def : SysReg<"hpmcounter15", 0xC0F>;
+def : SysReg<"hpmcounter16", 0xC10>;
+def : SysReg<"hpmcounter17", 0xC11>;
+def : SysReg<"hpmcounter18", 0xC12>;
+def : SysReg<"hpmcounter19", 0xC13>;
+def : SysReg<"hpmcounter20", 0xC14>;
+def : SysReg<"hpmcounter21", 0xC15>;
+def : SysReg<"hpmcounter22", 0xC16>;
+def : SysReg<"hpmcounter23", 0xC17>;
+def : SysReg<"hpmcounter24", 0xC18>;
+def : SysReg<"hpmcounter25", 0xC19>;
+def : SysReg<"hpmcounter26", 0xC1A>;
+def : SysReg<"hpmcounter27", 0xC1B>;
+def : SysReg<"hpmcounter28", 0xC1C>;
+def : SysReg<"hpmcounter29", 0xC1D>;
+def : SysReg<"hpmcounter30", 0xC1E>;
+def : SysReg<"hpmcounter31", 0xC1F>;
+
+let isRV32Only = 1 in {
+def: SysReg<"cycleh", 0xC80>;
+def: SysReg<"timeh", 0xC81>;
+def: SysReg<"instreth", 0xC82>;
+
+def: SysReg<"hpmcounter3h", 0xC83>;
+def: SysReg<"hpmcounter4h", 0xC84>;
+def: SysReg<"hpmcounter5h", 0xC85>;
+def: SysReg<"hpmcounter6h", 0xC86>;
+def: SysReg<"hpmcounter7h", 0xC87>;
+def: SysReg<"hpmcounter8h", 0xC88>;
+def: SysReg<"hpmcounter9h", 0xC89>;
+def: SysReg<"hpmcounter10h", 0xC8A>;
+def: SysReg<"hpmcounter11h", 0xC8B>;
+def: SysReg<"hpmcounter12h", 0xC8C>;
+def: SysReg<"hpmcounter13h", 0xC8D>;
+def: SysReg<"hpmcounter14h", 0xC8E>;
+def: SysReg<"hpmcounter15h", 0xC8F>;
+def: SysReg<"hpmcounter16h", 0xC90>;
+def: SysReg<"hpmcounter17h", 0xC91>;
+def: SysReg<"hpmcounter18h", 0xC92>;
+def: SysReg<"hpmcounter19h", 0xC93>;
+def: SysReg<"hpmcounter20h", 0xC94>;
+def: SysReg<"hpmcounter21h", 0xC95>;
+def: SysReg<"hpmcounter22h", 0xC96>;
+def: SysReg<"hpmcounter23h", 0xC97>;
+def: SysReg<"hpmcounter24h", 0xC98>;
+def: SysReg<"hpmcounter25h", 0xC99>;
+def: SysReg<"hpmcounter26h", 0xC9A>;
+def: SysReg<"hpmcounter27h", 0xC9B>;
+def: SysReg<"hpmcounter28h", 0xC9C>;
+def: SysReg<"hpmcounter29h", 0xC9D>;
+def: SysReg<"hpmcounter30h", 0xC9E>;
+def: SysReg<"hpmcounter31h", 0xC9F>;
+}
+
+//===--------------------------
+// Supervisor Trap Setup
+//===--------------------------
+def : SysReg<"sstatus", 0x100>;
+def : SysReg<"sedeleg", 0x102>;
+def : SysReg<"sideleg", 0x103>;
+def : SysReg<"sie", 0x104>;
+def : SysReg<"stvec", 0x105>;
+def : SysReg<"scounteren", 0x106>;
+
+//===--------------------------
+// Supervisor Trap Handling
+//===--------------------------
+def : SysReg<"sscratch", 0x140>;
+def : SysReg<"sepc", 0x141>;
+def : SysReg<"scause", 0x142>;
+def : SysReg<"stval", 0x143>;
+def : SysReg<"sip", 0x144>;
+
+//===-------------------------------------
+// Supervisor Protection and Translation
+//===-------------------------------------
+def : SysReg<"satp", 0x180>;
+
+//===-----------------------------
+// Machine Information Registers
+//===-----------------------------
+
+def : SysReg<"mvendorid", 0xF11>;
+def : SysReg<"marchid", 0xF12>;
+def : SysReg<"mimpid", 0xF13>;
+def : SysReg<"mhartid", 0xF14>;
+
+//===-----------------------------
+// Machine Trap Setup
+//===-----------------------------
+def : SysReg<"mstatus", 0x300>;
+def : SysReg<"misa", 0x301>;
+def : SysReg<"medeleg", 0x302>;
+def : SysReg<"mideleg", 0x303>;
+def : SysReg<"mie", 0x304>;
+def : SysReg<"mtvec", 0x305>;
+def : SysReg<"mcounteren", 0x306>;
+
+//===-----------------------------
+// Machine Trap Handling
+//===-----------------------------
+def : SysReg<"mscratch", 0x340>;
+def : SysReg<"mepc", 0x341>;
+def : SysReg<"mcause", 0x342>;
+def : SysReg<"mtval", 0x343>;
+def : SysReg<"mip", 0x344>;
+
+//===----------------------------------
+// Machine Protection and Translation
+//===----------------------------------
+def : SysReg<"pmpcfg0", 0x3A0>;
+def : SysReg<"pmpcfg2", 0x3A2>;
+let isRV32Only = 1 in {
+def : SysReg<"pmpcfg1", 0x3A1>;
+def : SysReg<"pmpcfg3", 0x3A3>;
+}
+
+def : SysReg<"pmpaddr0", 0x3B0>;
+def : SysReg<"pmpaddr1", 0x3B1>;
+def : SysReg<"pmpaddr2", 0x3B2>;
+def : SysReg<"pmpaddr3", 0x3B3>;
+def : SysReg<"pmpaddr4", 0x3B4>;
+def : SysReg<"pmpaddr5", 0x3B5>;
+def : SysReg<"pmpaddr6", 0x3B6>;
+def : SysReg<"pmpaddr7", 0x3B7>;
+def : SysReg<"pmpaddr8", 0x3B8>;
+def : SysReg<"pmpaddr9", 0x3B9>;
+def : SysReg<"pmpaddr10", 0x3BA>;
+def : SysReg<"pmpaddr11", 0x3BB>;
+def : SysReg<"pmpaddr12", 0x3BC>;
+def : SysReg<"pmpaddr13", 0x3BD>;
+def : SysReg<"pmpaddr14", 0x3BE>;
+def : SysReg<"pmpaddr15", 0x3BF>;
+
+
+//===--------------------------
+// Machine Counter and Timers
+//===--------------------------
+def : SysReg<"mcycle", 0xB00>;
+def : SysReg<"minstret", 0xB02>;
+
+def : SysReg<"mhpmcounter3", 0xB03>;
+def : SysReg<"mhpmcounter4", 0xB04>;
+def : SysReg<"mhpmcounter5", 0xB05>;
+def : SysReg<"mhpmcounter6", 0xB06>;
+def : SysReg<"mhpmcounter7", 0xB07>;
+def : SysReg<"mhpmcounter8", 0xB08>;
+def : SysReg<"mhpmcounter9", 0xB09>;
+def : SysReg<"mhpmcounter10", 0xB0A>;
+def : SysReg<"mhpmcounter11", 0xB0B>;
+def : SysReg<"mhpmcounter12", 0xB0C>;
+def : SysReg<"mhpmcounter13", 0xB0D>;
+def : SysReg<"mhpmcounter14", 0xB0E>;
+def : SysReg<"mhpmcounter15", 0xB0F>;
+def : SysReg<"mhpmcounter16", 0xB10>;
+def : SysReg<"mhpmcounter17", 0xB11>;
+def : SysReg<"mhpmcounter18", 0xB12>;
+def : SysReg<"mhpmcounter19", 0xB13>;
+def : SysReg<"mhpmcounter20", 0xB14>;
+def : SysReg<"mhpmcounter21", 0xB15>;
+def : SysReg<"mhpmcounter22", 0xB16>;
+def : SysReg<"mhpmcounter23", 0xB17>;
+def : SysReg<"mhpmcounter24", 0xB18>;
+def : SysReg<"mhpmcounter25", 0xB19>;
+def : SysReg<"mhpmcounter26", 0xB1A>;
+def : SysReg<"mhpmcounter27", 0xB1B>;
+def : SysReg<"mhpmcounter28", 0xB1C>;
+def : SysReg<"mhpmcounter29", 0xB1D>;
+def : SysReg<"mhpmcounter30", 0xB1E>;
+def : SysReg<"mhpmcounter31", 0xB1F>;
+
+let isRV32Only = 1 in {
+def: SysReg<"mcycleh", 0xB80>;
+def: SysReg<"minstreth", 0xB82>;
+
+def: SysReg<"mhpmcounter3h", 0xB83>;
+def: SysReg<"mhpmcounter4h", 0xB84>;
+def: SysReg<"mhpmcounter5h", 0xB85>;
+def: SysReg<"mhpmcounter6h", 0xB86>;
+def: SysReg<"mhpmcounter7h", 0xB87>;
+def: SysReg<"mhpmcounter8h", 0xB88>;
+def: SysReg<"mhpmcounter9h", 0xB89>;
+def: SysReg<"mhpmcounter10h", 0xB8A>;
+def: SysReg<"mhpmcounter11h", 0xB8B>;
+def: SysReg<"mhpmcounter12h", 0xB8C>;
+def: SysReg<"mhpmcounter13h", 0xB8D>;
+def: SysReg<"mhpmcounter14h", 0xB8E>;
+def: SysReg<"mhpmcounter15h", 0xB8F>;
+def: SysReg<"mhpmcounter16h", 0xB90>;
+def: SysReg<"mhpmcounter17h", 0xB91>;
+def: SysReg<"mhpmcounter18h", 0xB92>;
+def: SysReg<"mhpmcounter19h", 0xB93>;
+def: SysReg<"mhpmcounter20h", 0xB94>;
+def: SysReg<"mhpmcounter21h", 0xB95>;
+def: SysReg<"mhpmcounter22h", 0xB96>;
+def: SysReg<"mhpmcounter23h", 0xB97>;
+def: SysReg<"mhpmcounter24h", 0xB98>;
+def: SysReg<"mhpmcounter25h", 0xB99>;
+def: SysReg<"mhpmcounter26h", 0xB9A>;
+def: SysReg<"mhpmcounter27h", 0xB9B>;
+def: SysReg<"mhpmcounter28h", 0xB9C>;
+def: SysReg<"mhpmcounter29h", 0xB9D>;
+def: SysReg<"mhpmcounter30h", 0xB9E>;
+def: SysReg<"mhpmcounter31h", 0xB9F>;
+}
+
+//===--------------------------
+// Machine Counter Setup
+//===--------------------------
+def : SysReg<"mhpmevent3", 0x323>;
+def : SysReg<"mhpmevent4", 0x324>;
+def : SysReg<"mhpmevent5", 0x325>;
+def : SysReg<"mhpmevent6", 0x326>;
+def : SysReg<"mhpmevent7", 0x327>;
+def : SysReg<"mhpmevent8", 0x328>;
+def : SysReg<"mhpmevent9", 0x329>;
+def : SysReg<"mhpmevent10", 0x32A>;
+def : SysReg<"mhpmevent11", 0x32B>;
+def : SysReg<"mhpmevent12", 0x32C>;
+def : SysReg<"mhpmevent13", 0x32D>;
+def : SysReg<"mhpmevent14", 0x32E>;
+def : SysReg<"mhpmevent15", 0x32F>;
+def : SysReg<"mhpmevent16", 0x330>;
+def : SysReg<"mhpmevent17", 0x331>;
+def : SysReg<"mhpmevent18", 0x332>;
+def : SysReg<"mhpmevent19", 0x333>;
+def : SysReg<"mhpmevent20", 0x334>;
+def : SysReg<"mhpmevent21", 0x335>;
+def : SysReg<"mhpmevent22", 0x336>;
+def : SysReg<"mhpmevent23", 0x337>;
+def : SysReg<"mhpmevent24", 0x338>;
+def : SysReg<"mhpmevent25", 0x339>;
+def : SysReg<"mhpmevent26", 0x33A>;
+def : SysReg<"mhpmevent27", 0x33B>;
+def : SysReg<"mhpmevent28", 0x33C>;
+def : SysReg<"mhpmevent29", 0x33D>;
+def : SysReg<"mhpmevent30", 0x33E>;
+def : SysReg<"mhpmevent31", 0x33F>;
+
+//===-----------------------------------------------
+// Debug/ Trace Registers (shared with Debug Mode)
+//===-----------------------------------------------
+def : SysReg<"tselect", 0x7A0>;
+def : SysReg<"tdata1", 0x7A1>;
+def : SysReg<"tdata2", 0x7A2>;
+def : SysReg<"tdata3", 0x7A3>;
+
+//===-----------------------------------------------
+// Debug Mode Registers
+//===-----------------------------------------------
+def : SysReg<"dcsr", 0x7B0>;
+def : SysReg<"dpc", 0x7B1>;
+def : SysReg<"dscratch", 0x7B2>;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index a2ebf5bf3e6b..8937ec200bd7 100644
--- a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,8 @@ using namespace llvm;
 extern "C" void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+  auto PR = PassRegistry::getPassRegistry();
+  initializeRISCVExpandPseudoPass(*PR);
 }
 
 static std::string computeDataLayout(const Triple &TT) {
@@ -45,12 +47,6 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
-  if (CM)
-    return *CM;
-  return CodeModel::Small;
-}
-
 RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
@@ -59,7 +55,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
                                        CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
                         getEffectiveRelocModel(TT, RM),
-                        getEffectiveCodeModel(CM), OL),
+                        getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(make_unique<RISCVELFTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
@@ -78,6 +74,7 @@ public:
   void addIRPasses() override;
   bool addInstSelector() override;
   void addPreEmitPass() override;
+  void addPreEmitPass2() override;
   void addPreRegAlloc() override;
 };
 }
@@ -99,6 +96,13 @@ bool RISCVPassConfig::addInstSelector() {
 
 void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
 
+void RISCVPassConfig::addPreEmitPass2() {
+  // Schedule the expansion of AMOs at the last possible moment, avoiding the
+  // possibility for other passes to break the requirements for forward
+  // progress in the LR/SC block.
+  addPass(createRISCVExpandPseudoPass());
+}
+
 void RISCVPassConfig::addPreRegAlloc() {
   addPass(createRISCVMergeBaseOffsetOptPass());
 }
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
new file mode 100644
index 000000000000..964af1f74cec
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -0,0 +1,9 @@
+#include "RISCVBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace RISCVSysReg {
+#define GET_SysRegsList_IMPL
+#include "RISCVGenSystemOperands.inc"
+} // namespace RISCVSysReg
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index b278a2ed3903..372e0e80bbaf 100644
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -14,9 +14,10 @@
 #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
 #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
 
-#include "RISCVMCTargetDesc.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/SubtargetFeature.h"
 
 namespace llvm {
 
@@ -38,9 +39,10 @@ enum {
   InstFormatCIW = 11,
   InstFormatCL = 12,
   InstFormatCS = 13,
-  InstFormatCB = 14,
-  InstFormatCJ = 15,
-  InstFormatOther = 16,
+  InstFormatCA = 14,
+  InstFormatCB = 15,
+  InstFormatCJ = 16,
+  InstFormatOther = 17,
 
   InstFormatMask = 31
 };
@@ -104,7 +106,53 @@ inline static RoundingMode stringToRoundingMode(StringRef Str) {
       .Case("dyn", RISCVFPRndMode::DYN)
       .Default(RISCVFPRndMode::Invalid);
 }
+
+inline static bool isValidRoundingMode(unsigned Mode) {
+  switch (Mode) {
+  default:
+    return false;
+  case RISCVFPRndMode::RNE:
+  case RISCVFPRndMode::RTZ:
+  case RISCVFPRndMode::RDN:
+  case RISCVFPRndMode::RUP:
+  case RISCVFPRndMode::RMM:
+  case RISCVFPRndMode::DYN:
+    return true;
+  }
+}
 } // namespace RISCVFPRndMode
+
+namespace RISCVSysReg {
+struct SysReg {
+  const char *Name;
+  unsigned Encoding;
+  // FIXME: add these additional fields when needed.
+  // Privilege Access: Read, Write, Read-Only.
+  // unsigned ReadWrite;
+  // Privilege Mode: User, System or Machine.
+  // unsigned Mode;
+  // Check field name.
+  // unsigned Extra;
+  // Register number without the privilege bits.
+  // unsigned Number;
+  FeatureBitset FeaturesRequired;
+  bool isRV32Only;
+
+  bool haveRequiredFeatures(FeatureBitset ActiveFeatures) const {
+    // Not in 32-bit mode.
+    if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
+      return false;
+    // No required feature associated with the system register.
+    if (FeaturesRequired.none())
+      return true;
+    return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+  }
+};
+
+#define GET_SysRegsList_DECL
+#include "RISCVGenSystemOperands.inc"
+} // end namespace RISCVSysReg
+
 } // namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
new file mode 100644
index 000000000000..3dc298246bc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
@@ -0,0 +1,79 @@
+//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMatInt.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace RISCVMatInt {
+void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
+  if (isInt<32>(Val)) {
+    // Depending on the active bits in the immediate Value v, the following
+    // instruction sequences are emitted:
+    //
+    // v == 0                        : ADDI
+    // v[0,12) != 0 && v[12,32) == 0 : ADDI
+    // v[0,12) == 0 && v[12,32) != 0 : LUI
+    // v[0,32) != 0                  : LUI+ADDI(W)
+    int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
+    int64_t Lo12 = SignExtend64<12>(Val);
+
+    if (Hi20)
+      Res.push_back(Inst(RISCV::LUI, Hi20));
+
+    if (Lo12 || Hi20 == 0) {
+      unsigned AddiOpc = (Is64Bit && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+      Res.push_back(Inst(AddiOpc, Lo12));
+    }
+    return;
+  }
+
+  assert(Is64Bit && "Can't emit >32-bit imm for non-RV64 target");
+
+  // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
+  // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
+  // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
+  // while the following ADDI instructions contribute up to 12 bits each.
+  //
+  // On the first glance, implementing this seems to be possible by simply
+  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
+  // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
+  // fact that ADDI performs a sign extended addition, doing it like that would
+  // only be possible when at most 11 bits of the ADDI instructions are used.
+  // Using all 12 bits of the ADDI instructions, like done by GAS, actually
+  // requires that the constant is processed starting with the least significant
+  // bit.
+  //
+  // In the following, constants are processed from LSB to MSB but instruction
+  // emission is performed from MSB to LSB by recursively calling
+  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
+  // from the constant and the optimal shift amount, which can be greater than
+  // 12 bits if the constant is sparse, is determined. Then, the shifted
+  // remaining constant is processed recursively and gets emitted as soon as it
+  // fits into 32 bits. The emission of the shifts and additions is subsequently
+  // performed when the recursion returns.
+
+  int64_t Lo12 = SignExtend64<12>(Val);
+  int64_t Hi52 = (Val + 0x800) >> 12;
+  int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
+  Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+
+  generateInstSeq(Hi52, Is64Bit, Res);
+
+  Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
+  if (Lo12)
+    Res.push_back(Inst(RISCV::ADDI, Lo12));
+}
+} // namespace RISCVMatInt
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
new file mode 100644
index 000000000000..49d1d89adc7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h
@@ -0,0 +1,36 @@
+//===- RISCVMatInt.h - Immediate materialisation ---------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
+#define LLVM_LIB_TARGET_RISCV_MATINT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MachineValueType.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace RISCVMatInt {
+struct Inst {
+  unsigned Opc;
+  int64_t Imm;
+
+  Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
+};
+using InstSeq = SmallVector<Inst, 8>;
+
+// Helper to generate an instruction sequence that will materialise the given
+// immediate value into a register. A sequence of instructions represented by
+// a simple struct produced rather than directly emitting the instructions in
+// order to allow this helper to be used from both the MC layer and during
+// instruction selection.
+void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
+} // namespace RISCVMatInt
+} // namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 35f52f7d279b..691421e533ea 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -78,6 +78,8 @@ class SparcAsmParser : public MCTargetAsmParser {
   // Custom parse functions for Sparc specific operands.
   OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
 
+  OperandMatchResultTy parseMembarTag(OperandVector &Operands);
+
   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
 
   OperandMatchResultTy
@@ -256,6 +258,7 @@ public:
   bool isMem() const override { return isMEMrr() || isMEMri(); }
   bool isMEMrr() const { return Kind == k_MemoryReg; }
   bool isMEMri() const { return Kind == k_MemoryImm; }
+  bool isMembarTag() const { return Kind == k_Immediate; }
 
   bool isIntReg() const {
     return (Kind == k_Register && Reg.Kind == rk_IntReg);
@@ -366,6 +369,12 @@ public:
     addExpr(Inst, Expr);
   }
 
+  void addMembarTagOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCExpr *Expr = getImm();
+    addExpr(Inst, Expr);
+  }
+
   static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
     auto Op = make_unique<SparcOperand>(k_Token);
     Op->Tok.Data = Str.data();
@@ -742,6 +751,52 @@ SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const MCExpr *EVal;
+  int64_t ImmVal = 0;
+
+  std::unique_ptr<SparcOperand> Mask;
+  if (parseSparcAsmOperand(Mask) == MatchOperand_Success) {
+    if (!Mask->isImm() || !Mask->getImm()->evaluateAsAbsolute(ImmVal) ||
+        ImmVal < 0 || ImmVal > 127) {
+      Error(S, "invalid membar mask number");
+      return MatchOperand_ParseFail;
+    }
+  }
+
+  while (getLexer().getKind() == AsmToken::Hash) {
+    SMLoc TagStart = getLexer().getLoc();
+    Parser.Lex(); // Eat the '#'.
+    unsigned MaskVal = StringSwitch<unsigned>(Parser.getTok().getString())
+      .Case("LoadLoad", 0x1)
+      .Case("StoreLoad", 0x2)
+      .Case("LoadStore", 0x4)
+      .Case("StoreStore", 0x8)
+      .Case("Lookaside", 0x10)
+      .Case("MemIssue", 0x20)
+      .Case("Sync", 0x40)
+      .Default(0);
+
+    Parser.Lex(); // Eat the identifier token.
+
+    if (!MaskVal) {
+      Error(TagStart, "unknown membar tag");
+      return MatchOperand_ParseFail;
+    }
+
+    ImmVal |= MaskVal;
+
+    if (getLexer().getKind() == AsmToken::Pipe)
+      Parser.Lex(); // Eat the '|'.
+  }
+
+  EVal = MCConstantExpr::create(ImmVal, getContext());
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(SparcOperand::CreateImm(EVal, S, E));
+  return MatchOperand_Success;
+}
+
 OperandMatchResultTy
 SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e30dae1537f..0045e63a824e 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -11,9 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
-#include "SparcRegisterInfo.h"
-#include "SparcSubtarget.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index c1512cbdc44f..d152efae6d1f 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -195,3 +195,26 @@ bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
   llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
   return true;
 }
+
+void SparcInstPrinter::printMembarTag(const MCInst *MI, int opNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  static const char *const TagNames[] = {
+      "#LoadLoad",  "#StoreLoad", "#LoadStore", "#StoreStore",
+      "#Lookaside", "#MemIssue",  "#Sync"};
+
+  unsigned Imm = MI->getOperand(opNum).getImm();
+
+  if (Imm > 127) {
+    O << Imm;
+    return;
+  }
+
+  bool First = true;
+  for (unsigned i = 0; i < sizeof(TagNames) / sizeof(char *); i++) {
+    if (Imm & (1 << i)) {
+      O << (First ? "" : " | ") << TagNames[i];
+      First = false;
+    }
+  }
+}
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 6f06d1ddae32..89015eb137c2 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -49,6 +49,8 @@ public:
                       raw_ostream &OS);
   bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                    raw_ostream &OS);
+  void printMembarTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                      raw_ostream &O);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
index a7dea068cb11..61e5f16e0a1e 100755
--- a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
@@ -58,3 +58,7 @@ def FixAllFDIVSQRT : SubtargetFeature<
   "true",
   "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store" 
 >;
+
+def LeonCycleCounter
+  : SubtargetFeature<"leoncyclecounter", "HasLeonCycleCounter", "true",
+                     "Use the Leon cycle counter register">;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 5f5e2ef7d45a..d7f1e3a1ab1d 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -100,6 +100,20 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   }
 }
 
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+    switch (Kind) {
+  default:
+    return 4;
+  case FK_Data_1:
+    return 1;
+  case FK_Data_2:
+    return 2;
+  case FK_Data_8:
+    return 8;
+  }
+}
+
 namespace {
   class SparcAsmBackend : public MCAsmBackend {
   protected:
@@ -290,13 +304,13 @@ namespace {
       Value = adjustFixupValue(Fixup.getKind(), Value);
       if (!Value) return;           // Doesn't change encoding.
 
+      unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
       unsigned Offset = Fixup.getOffset();
-
       // For each byte of the fragment that the fixup touches, mask in the bits
       // from the fixup value. The Value has been "split up" into the
       // appropriate bitfields above.
-      for (unsigned i = 0; i != 4; ++i) {
-        unsigned Idx = Endian == support::little ? i : 3 - i;
+      for (unsigned i = 0; i != NumBytes; ++i) {
+        unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
         Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
       }
     }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index f736a37a266c..4ddb72643a91 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Object/ELF.h"
 
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
index 3b503503abce..8bb418e39ab4 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
-#define LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCTARGETSTREAMER_H
 
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCStreamer.h"
@@ -33,7 +33,6 @@ public:
   SparcTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
   void emitSparcRegisterIgnore(unsigned reg) override;
   void emitSparcRegisterScratch(unsigned reg) override;
-
 };
 
 // This part is for ELF object output
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
index 2f9b57f76041..0412215be8ab 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -49,6 +49,9 @@ def FeatureVIS3
 def FeatureLeon
   : SubtargetFeature<"leon", "IsLeon", "true",
                      "Enable LEON extensions">;
+def FeaturePWRPSR
+  : SubtargetFeature<"leonpwrpsr", "HasPWRPSR", "true",
+                     "Enable the PWRPSR instruction">;
 
 def FeatureHardQuad
   : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
@@ -159,7 +162,8 @@ def : Processor<"leon4", LEON4Itineraries,
 // LEON 4 FT (GR740) 
 // TO DO: Place-holder: Processor specific features will be added *very* soon here.
 def : Processor<"gr740", LEON4Itineraries, 
-                [FeatureLeon, UMACSMACSupport, LeonCASA]>;
+                [FeatureLeon, UMACSMACSupport, LeonCASA, LeonCycleCounter,
+                 FeaturePWRPSR]>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 19fb94534b25..5f0e359a3b00 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -14,10 +14,10 @@
 
 #include "InstPrinter/SparcInstPrinter.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcTargetStreamer.h"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
-#include "SparcTargetStreamer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index b04c6b112682..ae2257618a55 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -780,6 +780,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
   const unsigned StackOffset = 92;
   bool hasStructRetAttr = false;
+  unsigned SRetArgSize = 0;
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
        i != e;
@@ -824,6 +825,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
       MemOpChains.push_back(
           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
       hasStructRetAttr = true;
+      // sret only allowed on first argument
+      assert(Outs[realArgIdx].OrigArgIndex == 0);
+      PointerType *Ty = cast<PointerType>(CLI.getArgs()[0].Ty);
+      Type *ElementTy = Ty->getElementType();
+      SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
       continue;
     }
 
@@ -846,12 +852,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
       if (VA.getLocVT() == MVT::f64) {
         // Move from the float value from float registers into the
         // integer registers.
-
-        // TODO: The f64 -> v2i32 conversion is super-inefficient for
-        // constants: it sticks them in the constant pool, then loads
-        // to a fp register, then stores to temp memory, then loads to
-        // integer registers.
-        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+        if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
+          Arg = bitcastConstantFPToInt(C, dl, DAG);
+        else
+          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
       }
 
       SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@@ -932,7 +936,6 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
     InFlag = Chain.getValue(1);
   }
 
-  unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
   bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
 
   // If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -1032,51 +1035,6 @@ unsigned SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT,
   report_fatal_error("Invalid register name global variable");
 }
 
-// This functions returns true if CalleeName is a ABI function that returns
-// a long double (fp128).
-static bool isFP128ABICall(const char *CalleeName)
-{
-  static const char *const ABICalls[] =
-    {  "_Q_add", "_Q_sub", "_Q_mul", "_Q_div",
-       "_Q_sqrt", "_Q_neg",
-       "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq",
-       "_Q_lltoq", "_Q_ulltoq",
-       nullptr
-    };
-  for (const char * const *I = ABICalls; *I != nullptr; ++I)
-    if (strcmp(CalleeName, *I) == 0)
-      return true;
-  return false;
-}
-
-unsigned
-SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
-{
-  const Function *CalleeFn = nullptr;
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    CalleeFn = dyn_cast<Function>(G->getGlobal());
-  } else if (ExternalSymbolSDNode *E =
-             dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    const Function &F = DAG.getMachineFunction().getFunction();
-    const Module *M = F.getParent();
-    const char *CalleeName = E->getSymbol();
-    CalleeFn = M->getFunction(CalleeName);
-    if (!CalleeFn && isFP128ABICall(CalleeName))
-      return 16; // Return sizeof(fp128)
-  }
-
-  if (!CalleeFn)
-    return 0;
-
-  // It would be nice to check for the sret attribute on CalleeFn here,
-  // but since it is not part of the function type, any check will misfire.
-
-  PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
-  Type *ElementTy = Ty->getElementType();
-  return DAG.getDataLayout().getTypeAllocSize(ElementTy);
-}
-
-
 // Fixup floating point arguments in the ... part of a varargs call.
 //
 // The SPARC v9 ABI requires that floating point arguments are treated the same
@@ -1587,9 +1545,6 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
 
-  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
-  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
-
   setOperationAction(ISD::ADDC, MVT::i32, Custom);
   setOperationAction(ISD::ADDE, MVT::i32, Custom);
   setOperationAction(ISD::SUBC, MVT::i32, Custom);
@@ -1841,6 +1796,13 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FMUL, MVT::f32, Promote);
   }
 
+  // Custom combine bitcast between f64 and v2i32
+  if (!Subtarget->is64Bit())
+    setTargetDAGCombine(ISD::BITCAST);
+
+  if (Subtarget->hasLeonCycleCounter())
+    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
   setMinFunctionAlignment(2);
@@ -1863,8 +1825,6 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::SELECT_ICC:      return "SPISD::SELECT_ICC";
   case SPISD::SELECT_XCC:      return "SPISD::SELECT_XCC";
   case SPISD::SELECT_FCC:      return "SPISD::SELECT_FCC";
-  case SPISD::EH_SJLJ_SETJMP:  return "SPISD::EH_SJLJ_SETJMP";
-  case SPISD::EH_SJLJ_LONGJMP: return "SPISD::EH_SJLJ_LONGJMP";
   case SPISD::Hi:              return "SPISD::Hi";
   case SPISD::Lo:              return "SPISD::Lo";
   case SPISD::FTOI:            return "SPISD::FTOI";
@@ -1906,8 +1866,8 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
   case SPISD::SELECT_ICC:
   case SPISD::SELECT_XCC:
   case SPISD::SELECT_FCC:
-    DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
-    DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
+    Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
+    Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
@@ -2537,20 +2497,6 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
                      DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
 }
 
-SDValue SparcTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG,
-    const SparcTargetLowering &TLI) const {
-  SDLoc DL(Op);
-  return DAG.getNode(SPISD::EH_SJLJ_SETJMP, DL,
-      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1));
-
-}
-
-SDValue SparcTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG,
-    const SparcTargetLowering &TLI) const {
-  SDLoc DL(Op);
-  return DAG.getNode(SPISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1));
-}
-
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                             const SparcTargetLowering &TLI) {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -2666,7 +2612,8 @@ static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
-                            const SparcSubtarget *Subtarget) {
+                            const SparcSubtarget *Subtarget,
+                            bool AlwaysFlush = false) {
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   MFI.setFrameAddressIsTaken(true);
 
@@ -2676,17 +2623,11 @@ static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
   unsigned stackBias = Subtarget->getStackPointerBias();
 
   SDValue FrameAddr;
-
-  if (depth == 0) {
-    FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
-    if (Subtarget->is64Bit())
-      FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
-                              DAG.getIntPtrConstant(stackBias, dl));
-    return FrameAddr;
-  }
+  SDValue Chain;
 
   // flush first to make sure the windowed registers' values are in stack
-  SDValue Chain = getFLUSHW(Op, DAG);
+  Chain = (depth || AlwaysFlush) ? getFLUSHW(Op, DAG) : DAG.getEntryNode();
+
   FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
 
   unsigned Offset = (Subtarget->is64Bit()) ? (stackBias + 112) : 56;
@@ -2735,7 +2676,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
   }
 
   // Need frame address to find return address of the caller.
-  SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget);
+  SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget, true);
 
   unsigned Offset = (Subtarget->is64Bit()) ? 120 : 60;
   SDValue Ptr = DAG.getNode(ISD::ADD,
@@ -3085,8 +3026,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
                                                   hasHardQuad);
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG, *this,
                                                       hasHardQuad);
-  case ISD::EH_SJLJ_SETJMP:     return LowerEH_SJLJ_SETJMP(Op, DAG, *this);
-  case ISD::EH_SJLJ_LONGJMP:    return LowerEH_SJLJ_LONGJMP(Op, DAG, *this);
   case ISD::VASTART:            return LowerVASTART(Op, DAG, *this);
   case ISD::VAARG:              return LowerVAARG(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
@@ -3120,6 +3059,40 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   }
 }
 
+SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
+                                                    const SDLoc &DL,
+                                                    SelectionDAG &DAG) const {
+  APInt V = C->getValueAPF().bitcastToAPInt();
+  SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
+  SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
+  if (DAG.getDataLayout().isLittleEndian())
+    std::swap(Lo, Hi);
+  return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
+}
+
+SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
+                                                   DAGCombinerInfo &DCI) const {
+  SDLoc dl(N);
+  SDValue Src = N->getOperand(0);
+
+  if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
+      Src.getSimpleValueType() == MVT::f64)
+    return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);
+
+  return SDValue();
+}
+
+SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
+                                               DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::BITCAST:
+    return PerformBITCASTCombine(N, DCI);
+  }
+  return SDValue();
+}
+
 MachineBasicBlock *
 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *BB) const {
@@ -3135,13 +3108,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case SP::SELECT_CC_DFP_FCC:
   case SP::SELECT_CC_QFP_FCC:
     return expandSelectCC(MI, BB, SP::FBCOND);
-  case SP::EH_SJLJ_SETJMP32ri:
-  case SP::EH_SJLJ_SETJMP32rr:
-    return emitEHSjLjSetJmp(MI, BB);
-  case SP::EH_SJLJ_LONGJMP32rr:
-  case SP::EH_SJLJ_LONGJMP32ri:
-    return emitEHSjLjLongJmp(MI, BB);
-
   }
 }
 
@@ -3201,205 +3167,6 @@ SparcTargetLowering::expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
   return SinkMBB;
 }
 
-MachineBasicBlock *
-SparcTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
-                                       MachineBasicBlock *MBB) const {
-  DebugLoc DL = MI.getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-
-  MachineFunction *MF = MBB->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-  MachineInstrBuilder MIB;
-
-  MVT PVT = getPointerTy(MF->getDataLayout());
-  unsigned RegSize = PVT.getStoreSize();
-  assert(PVT == MVT::i32 && "Invalid Pointer Size!");
-
-  unsigned Buf = MI.getOperand(0).getReg();
-  unsigned JmpLoc = MRI.createVirtualRegister(&SP::IntRegsRegClass);
-
-  // TO DO: If we do 64-bit handling, this perhaps should be FLUSHW, not TA 3
-  MIB = BuildMI(*MBB, MI, DL, TII->get(SP::TRAPri), SP::G0).addImm(3).addImm(SPCC::ICC_A);
-
-  // Instruction to restore FP
-  const unsigned FP  = SP::I6;
-  MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
-            .addReg(FP)
-            .addReg(Buf)
-            .addImm(0);
-
-  // Instruction to load jmp location
-  MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
-            .addReg(JmpLoc, RegState::Define)
-            .addReg(Buf)
-            .addImm(RegSize);
-
-  // Instruction to restore SP
-  const unsigned SP  = SP::O6;
-  MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
-            .addReg(SP)
-            .addReg(Buf)
-            .addImm(2 * RegSize);
-
-  // Instruction to restore I7
-  MIB = BuildMI(*MBB, MI, DL, TII->get(SP::LDri))
-            .addReg(SP::I7)
-            .addReg(Buf, RegState::Kill)
-            .addImm(3 * RegSize);
-
-  // Jump to JmpLoc
-  BuildMI(*MBB, MI, DL, TII->get(SP::JMPLrr)).addReg(SP::G0).addReg(JmpLoc, RegState::Kill).addReg(SP::G0);
-
-  MI.eraseFromParent();
-  return MBB;
-}
-
-MachineBasicBlock *
-SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
-                                      MachineBasicBlock *MBB) const {
-  DebugLoc DL = MI.getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
-  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
-
-  MachineFunction *MF = MBB->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-  MachineInstrBuilder MIB;
-
-  MVT PVT = getPointerTy(MF->getDataLayout());
-  unsigned RegSize = PVT.getStoreSize();
-  assert(PVT == MVT::i32 && "Invalid Pointer Size!");
-
-  unsigned DstReg = MI.getOperand(0).getReg();
-  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
-  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
-  (void)TRI;
-  unsigned mainDstReg = MRI.createVirtualRegister(RC);
-  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
-
-  // For v = setjmp(buf), we generate
-  //
-  // thisMBB:
-  //  buf[0] = FP
-  //  buf[RegSize] = restoreMBB <-- takes address of restoreMBB
-  //  buf[RegSize * 2] = O6
-  //  buf[RegSize * 3] = I7
-  //  Ensure restoreMBB remains in the relocations list (done using a bn instruction)
-  //  b mainMBB
-  //
-  // mainMBB:
-  //  v_main = 0
-  //  b sinkMBB
-  //
-  // restoreMBB:
-  //  v_restore = 1
-  //  --fall through--
-  //
-  // sinkMBB:
-  //  v = phi(main, restore)
-
-  const BasicBlock *BB = MBB->getBasicBlock();
-  MachineFunction::iterator It = ++MBB->getIterator();
-  MachineBasicBlock *thisMBB = MBB;
-  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
-  MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
-  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
-
-  MF->insert(It, mainMBB);
-  MF->insert(It, restoreMBB);
-  MF->insert(It, sinkMBB);
-  restoreMBB->setHasAddressTaken();
-
-  // Transfer the remainder of BB and its successor edges to sinkMBB.
-  sinkMBB->splice(sinkMBB->begin(), MBB,
-                  std::next(MachineBasicBlock::iterator(MI)),
-                  MBB->end());
-  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
-  unsigned LabelReg = MRI.createVirtualRegister(&SP::IntRegsRegClass);
-  unsigned LabelReg2 = MRI.createVirtualRegister(&SP::IntRegsRegClass);
-  unsigned BufReg = MI.getOperand(1).getReg();
-
-  // Instruction to store FP
-  const unsigned FP  = SP::I6;
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
-            .addReg(BufReg)
-            .addImm(0)
-            .addReg(FP);
-
-  // Instructions to store jmp location
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::SETHIi))
-            .addReg(LabelReg, RegState::Define)
-            .addMBB(restoreMBB, SparcMCExpr::VK_Sparc_HI);
-
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::ORri))
-            .addReg(LabelReg2, RegState::Define)
-            .addReg(LabelReg, RegState::Kill)
-            .addMBB(restoreMBB, SparcMCExpr::VK_Sparc_LO);
-
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
-            .addReg(BufReg)
-            .addImm(RegSize)
-            .addReg(LabelReg2, RegState::Kill);
-
-  // Instruction to store SP
-  const unsigned SP  = SP::O6;
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
-            .addReg(BufReg)
-            .addImm(2 * RegSize)
-            .addReg(SP);
-
-  // Instruction to store I7
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::STri))
-            .addReg(BufReg)
-            .addImm(3 * RegSize)
-            .addReg(SP::I7);
-
-
-  // FIX ME: This next instruction ensures that the restoreMBB block address remains
-  // valid through optimization passes and serves no other purpose. The ICC_N ensures
-  // that the branch is never taken. This commented-out code here was an alternative
-  // attempt to achieve this which brought myriad problems.
-  //MIB = BuildMI(thisMBB, DL, TII->get(SP::EH_SjLj_Setup)).addMBB(restoreMBB, SparcMCExpr::VK_Sparc_None);
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::BCOND))
-              .addMBB(restoreMBB)
-              .addImm(SPCC::ICC_N);
-
-  MIB = BuildMI(thisMBB, DL, TII->get(SP::BCOND))
-              .addMBB(mainMBB)
-              .addImm(SPCC::ICC_A);
-
-  thisMBB->addSuccessor(mainMBB);
-  thisMBB->addSuccessor(restoreMBB);
-
-
-  // mainMBB:
-  MIB = BuildMI(mainMBB, DL, TII->get(SP::ORrr))
-             .addReg(mainDstReg, RegState::Define)
-             .addReg(SP::G0)
-             .addReg(SP::G0);
-  MIB = BuildMI(mainMBB, DL, TII->get(SP::BCOND)).addMBB(sinkMBB).addImm(SPCC::ICC_A);
-
-  mainMBB->addSuccessor(sinkMBB);
-
-
-  // restoreMBB:
-  MIB = BuildMI(restoreMBB, DL, TII->get(SP::ORri))
-              .addReg(restoreDstReg, RegState::Define)
-              .addReg(SP::G0)
-              .addImm(1);
-  //MIB = BuildMI(restoreMBB, DL, TII->get(SP::BCOND)).addMBB(sinkMBB).addImm(SPCC::ICC_A);
-  restoreMBB->addSuccessor(sinkMBB);
-
-  // sinkMBB:
-  MIB = BuildMI(*sinkMBB, sinkMBB->begin(), DL,
-                TII->get(SP::PHI), DstReg)
-             .addReg(mainDstReg).addMBB(mainMBB)
-             .addReg(restoreDstReg).addMBB(restoreMBB);
-
-  MI.eraseFromParent();
-  return sinkMBB;
-}
-
 //===----------------------------------------------------------------------===//
 //                         Sparc Inline Assembly Support
 //===----------------------------------------------------------------------===//
@@ -3494,23 +3261,23 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       else
         return std::make_pair(0U, &SP::IntRegsRegClass);
     case 'f':
-      if (VT == MVT::f32)
+      if (VT == MVT::f32 || VT == MVT::i32)
         return std::make_pair(0U, &SP::FPRegsRegClass);
-      else if (VT == MVT::f64)
+      else if (VT == MVT::f64 || VT == MVT::i64)
         return std::make_pair(0U, &SP::LowDFPRegsRegClass);
       else if (VT == MVT::f128)
         return std::make_pair(0U, &SP::LowQFPRegsRegClass);
-      llvm_unreachable("Unknown ValueType for f-register-type!");
-      break;
+      // This will generate an error message
+      return std::make_pair(0U, nullptr);
     case 'e':
-      if (VT == MVT::f32)
+      if (VT == MVT::f32 || VT == MVT::i32)
         return std::make_pair(0U, &SP::FPRegsRegClass);
-      else if (VT == MVT::f64)
+      else if (VT == MVT::f64 || VT == MVT::i64 )
         return std::make_pair(0U, &SP::DFPRegsRegClass);
       else if (VT == MVT::f128)
         return std::make_pair(0U, &SP::QFPRegsRegClass);
-      llvm_unreachable("Unknown ValueType for e-register-type!");
-      break;
+      // This will generate an error message
+      return std::make_pair(0U, nullptr);
     }
   } else if (!Constraint.empty() && Constraint.size() <= 5
               && Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
@@ -3587,7 +3354,16 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
                                   getLibcallName(libCall),
                                   1));
     return;
-
+  case ISD::READCYCLECOUNTER: {
+    assert(Subtarget->hasLeonCycleCounter());
+    SDValue Lo = DAG.getCopyFromReg(N->getOperand(0), dl, SP::ASR23, MVT::i32);
+    SDValue Hi = DAG.getCopyFromReg(Lo, dl, SP::G0, MVT::i32);
+    SDValue Ops[] = { Lo, Hi };
+    SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops);
+    Results.push_back(Pair);
+    Results.push_back(N->getOperand(0));
+    return;
+  }
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     // Custom lower only if it involves f128 or i64.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index 0cbbda787881..718851db25bf 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -33,9 +33,6 @@ namespace llvm {
       SELECT_XCC,  // Select between two values using the current XCC flags.
       SELECT_FCC,  // Select between two values using the current FCC flags.
 
-      EH_SJLJ_SETJMP,  // builtin setjmp operation
-      EH_SJLJ_LONGJMP, // builtin longjmp operation
-
       Hi, Lo,      // Hi/Lo operations, typically on a global address.
 
       FTOI,        // FP to Int within a FP register.
@@ -171,12 +168,6 @@ namespace llvm {
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 
-    SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG,
-                                const SparcTargetLowering &TLI) const ;
-    SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG,
-                                 const SparcTargetLowering &TLI) const ;
-
-    unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
     SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
     SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
                          SelectionDAG &DAG) const;
@@ -192,6 +183,13 @@ namespace llvm {
 
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
+                                   SelectionDAG &DAG) const;
+
+    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
     bool ShouldShrinkFPConstant(EVT VT) const override {
       // Do not shrink FP constpool if VT == MVT::f128.
       // (ldd, call _Q_fdtoq) is more expensive than two ldds.
@@ -213,10 +211,6 @@ namespace llvm {
 
     MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
                                       unsigned BROpcode) const;
-    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
-                                        MachineBasicBlock *MBB) const;
-    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
-                                         MachineBasicBlock *MBB) const;
   };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index f6518c936ebc..0b94c6b614eb 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -239,7 +239,7 @@ let Predicates = [Is64Bit] in {
 let DecoderMethod = "DecodeLoadInt" in
   defm LDX   : Load<"ldx", 0b001011, load, I64Regs, i64>;
 
-let mayLoad = 1, isCodeGenOnly = 1, isAsmParserOnly = 1 in
+let mayLoad = 1, isAsmParserOnly = 1 in
   def TLS_LDXrr : F3_1<3, 0b001011,
                        (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
                        "ldx [$addr], $dst, $sym",
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 352090ed92c1..35987390d7ba 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -470,10 +470,15 @@ def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
 def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
 def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
 
+def : InstAlias<"pwr $rs2, %psr", (PWRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"pwr $simm13, %psr", (PWRPSRri G0, i32imm:$simm13), 0>;
 
 // flush -> flush %g0
 def : InstAlias<"flush", (FLUSH), 0>;
 
+// unimp -> unimp 0
+def : InstAlias<"unimp", (UNIMP 0), 0>;
+
 def : MnemonicAlias<"iflush", "flush">;
 
 def : MnemonicAlias<"stub", "stb">;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5b7fb3c485e8..558b37aeebcb 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -56,6 +56,11 @@ def HasHardQuad : Predicate<"Subtarget->hasHardQuad()">;
 // instruction
 def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
 
+// HasPWRPSR - This is true when the target processor supports partial
+// writes to the PSR register that only affects the ET field.
+def HasPWRPSR : Predicate<"Subtarget->hasPWRPSR()">,
+                AssemblerPredicate<"FeaturePWRPSR">;
+
 // HasUMAC_SMAC - This is true when the target processor supports the
 // UMAC and SMAC instructions
 def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;
@@ -89,10 +94,22 @@ def HI22 : SDNodeXForm<imm, [{
                                    MVT::i32);
 }]>;
 
+// Return the complement of a HI22 immediate value.
+def HI22_not : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~(unsigned)N->getZExtValue() >> 10, SDLoc(N),
+                                   MVT::i32);
+}]>;
+
 def SETHIimm : PatLeaf<(imm), [{
   return isShiftedUInt<22, 10>(N->getZExtValue());
 }], HI22>;
 
+// The N->hasOneUse() prevents the immediate from being instantiated in both
+// normal and complement form.
+def SETHIimm_not : PatLeaf<(i32 imm), [{
+  return N->hasOneUse() && isShiftedUInt<22, 10>(~(unsigned)N->getZExtValue());
+}], HI22_not>;
+
 // Addressing modes.
 def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
 def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
@@ -121,6 +138,16 @@ def MEMri : Operand<iPTR> {
 
 def TLSSym : Operand<iPTR>;
 
+def SparcMembarTagAsmOperand : AsmOperandClass {
+  let Name = "MembarTag";
+  let ParserMethod = "parseMembarTag";
+}
+
+def MembarTag : Operand<i32> {
+  let PrintMethod = "printMembarTag";
+  let ParserMatchClass = SparcMembarTagAsmOperand;
+}
+
 // Branch targets have OtherVT type.
 def brtarget : Operand<OtherVT> {
   let EncoderMethod = "getBranchTargetOpValue";
@@ -169,9 +196,6 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
 def SDTSPtlsld :
 SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
 
-def SDTSPeh_sjlj_setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
-def SDTSPeh_sjlj_longjmp: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
 def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
 def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
 def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
@@ -190,13 +214,6 @@ def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
 def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
 def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
 
-def SPsjlj_setjmp: SDNode<"SPISD::EH_SJLJ_SETJMP",
-                          SDTSPeh_sjlj_setjmp,
-                          [SDNPHasChain, SDNPSideEffect]>;
-def SPsjlj_longjmp: SDNode<"SPISD::EH_SJLJ_LONGJMP",
-                           SDTSPeh_sjlj_longjmp,
-                           [SDNPHasChain, SDNPSideEffect]>;
-
 //  These are target-independent nodes, but have target-specific formats.
 def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
                                           SDTCisVT<1, i32> ]>;
@@ -473,27 +490,6 @@ let usesCustomInserter = 1, Uses = [FCC0] in {
             [(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
 }
 
-let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
-  let Defs = [WIM] in
-  def EH_SJLJ_SETJMP32ri  : Pseudo<(outs IntRegs:$dst), (ins MEMri:$buf),
-                            "#EH_SJLJ_SETJMP32",
-                            [(set i32:$dst, (SPsjlj_setjmp ADDRri:$buf))]>,
-                            Requires<[Is32Bit]>;
-  def EH_SJLJ_SETJMP32rr  : Pseudo<(outs IntRegs:$dst), (ins MEMrr:$buf),
-                            "#EH_SJLJ_SETJMP32",
-                            [(set i32:$dst, (SPsjlj_setjmp ADDRrr:$buf))]>,
-                            Requires<[Is32Bit]>;
-  let isTerminator = 1 in
-  def EH_SJLJ_LONGJMP32ri : Pseudo<(outs), (ins MEMri:$buf),
-                            "#EH_SJLJ_LONGJMP32",
-                            [(SPsjlj_longjmp ADDRri:$buf)]>,
-                            Requires<[Is32Bit]>;
-  def EH_SJLJ_LONGJMP32rr : Pseudo<(outs), (ins MEMrr:$buf),
-                            "#EH_SJLJ_LONGJMP32",
-                            [(SPsjlj_longjmp ADDRrr:$buf)]>,
-                            Requires<[Is32Bit]>;
-}
-
 // Section B.1 - Load Integer Instructions, p. 90
 let DecoderMethod = "DecodeLoadInt" in {
   defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8,  IntRegs, i32>;
@@ -680,6 +676,12 @@ def XNORri  : F3_2<2, 0b000111,
                    (outs IntRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
                    "xnor $rs1, $simm13, $rd", []>;
 
+def : Pat<(and IntRegs:$rs1, SETHIimm_not:$rs2),
+          (ANDNrr i32:$rs1, (SETHIi SETHIimm_not:$rs2))>;
+
+def : Pat<(or IntRegs:$rs1, SETHIimm_not:$rs2),
+          (ORNrr i32:$rs1,  (SETHIi SETHIimm_not:$rs2))>;
+
 let Defs = [ICC] in {
   defm ANDCC  : F3_12np<"andcc",  0b010001>;
   defm ANDNCC : F3_12np<"andncc", 0b010101>;
@@ -1316,7 +1318,7 @@ let Defs = [FCC0], rd = 0, isCodeGenOnly = 1 in {
 //===----------------------------------------------------------------------===//
 // Instructions for Thread Local Storage(TLS).
 //===----------------------------------------------------------------------===//
-let isCodeGenOnly = 1, isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in {
 def TLS_ADDrr : F3_1<2, 0b000000,
                     (outs IntRegs:$rd),
                     (ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym),
@@ -1511,7 +1513,7 @@ def : Pat<(ctpop i32:$src),
           (POPCrr (SRLri $src, 0))>;
 
 let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
- def MEMBARi : F3_2<2, 0b101000, (outs), (ins simm13Op:$simm13),
+ def MEMBARi : F3_2<2, 0b101000, (outs), (ins MembarTag:$simm13),
                     "membar $simm13", []>;
 
 // The CAS instruction, unlike other instructions, only comes in a 
@@ -1569,6 +1571,17 @@ let Predicates = [HasUMAC_SMAC], Defs = [Y, ASR18], Uses = [Y, ASR18] in {
                    [], IIC_smac_umac>;
 }
 
+// The partial write WRPSR instruction has a non-zero destination
+// register value to separate it from the standard instruction.
+let Predicates = [HasPWRPSR], Defs = [PSR], rd=1 in {
+  def PWRPSRrr : F3_1<2, 0b110001,
+     (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
+     "pwr $rs1, $rs2, %psr", []>;
+  def PWRPSRri : F3_2<2, 0b110001,
+     (outs), (ins IntRegs:$rs1, simm13Op:$simm13),
+     "pwr $rs1, $simm13, %psr", []>;
+}
+
 let Defs = [ICC] in {
 defm TADDCC   : F3_12np<"taddcc",   0b100000>;
 defm TSUBCC   : F3_12np<"tsubcc",   0b100001>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index b9647eaa3d51..33caa66154ff 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -95,6 +95,10 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
+  // Reserve ASR1-ASR31
+  for (unsigned n = 0; n < 31; n++)
+    Reserved.set(SP::ASR1 + n);
+
   return Reserved;
 }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 2a279dad5ae2..8dd2569d10de 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -35,8 +35,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
                                                 unsigned Kind) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 40c5683f8495..5301fc30a006 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -44,9 +44,11 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
   // Leon features
   HasLeonCasa = false;
   HasUmacSmac = false;
+  HasPWRPSR = false;
   InsertNOPLoad = false;
   FixAllFDIVSQRT = false;
   DetectRoundChange = false;
+  HasLeonCycleCounter = false;
 
   // Determine default and user specified characteristics
   std::string CPUName = CPU;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index 588a6765bcdf..24ea41a266e7 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -47,9 +47,11 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
   // LEON features
   bool HasUmacSmac;
   bool HasLeonCasa;
+  bool HasPWRPSR;
   bool InsertNOPLoad;
   bool FixAllFDIVSQRT;
   bool DetectRoundChange;
+  bool HasLeonCycleCounter;
 
   SparcInstrInfo InstrInfo;
   SparcTargetLowering TLInfo;
@@ -92,9 +94,11 @@ public:
   // Leon options
   bool hasUmacSmac() const { return HasUmacSmac; }
   bool hasLeonCasa() const { return HasLeonCasa; }
+  bool hasPWRPSR() const { return HasPWRPSR; }
   bool insertNOPLoad() const { return InsertNOPLoad; }
   bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
   bool detectRoundChange() const { return DetectRoundChange; }
+  bool hasLeonCycleCounter() const { return HasLeonCycleCounter; }
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 07f9e7250bd9..5b467235f809 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -70,11 +70,16 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
 // pic32  PIC_    Medium     GOT < 2^32 bytes
 //
 // All code models require that the text segment is smaller than 2GB.
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
-                                              Reloc::Model RM, bool Is64Bit,
-                                              bool JIT) {
-  if (CM)
+static CodeModel::Model
+getEffectiveSparcCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
+                           bool Is64Bit, bool JIT) {
+  if (CM) {
+    if (*CM == CodeModel::Tiny)
+      report_fatal_error("Target does not support the tiny CodeModel");
+    if (*CM == CodeModel::Kernel)
+      report_fatal_error("Target does not support the kernel CodeModel");
     return *CM;
+  }
   if (Is64Bit) {
     if (JIT)
       return CodeModel::Large;
@@ -88,11 +93,11 @@ SparcTargetMachine::SparcTargetMachine(
     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
     const TargetOptions &Options, Optional<Reloc::Model> RM,
     Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT, bool is64bit)
-    : LLVMTargetMachine(
-          T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
-          getEffectiveRelocModel(RM),
-          getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), is64bit, JIT),
-          OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options,
+                        getEffectiveRelocModel(RM),
+                        getEffectiveSparcCodeModel(
+                            CM, getEffectiveRelocModel(RM), is64bit, JIT),
+                        OL),
       TLOF(make_unique<SparcELFTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index b0d76abeba7d..d1eb1d329a4c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -40,10 +40,6 @@ public:
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
-
-  bool isMachineVerifierClean() const override {
-    return false;
-  }
 };
 
 /// Sparc 32-bit target machine
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index bde067d6c129..91959b4151b3 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "InstPrinter/SystemZInstPrinter.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -243,6 +244,11 @@ public:
     return Kind == KindImmTLS;
   }
 
+  const ImmTLSOp getImmTLS() const {
+    assert(Kind == KindImmTLS && "Not a TLS immediate");
+    return ImmTLS;
+  }
+
   // Memory operands.
   bool isMem() const override {
     return Kind == KindMem;
@@ -270,6 +276,11 @@ public:
     return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100);
   }
 
+  const MemOp& getMem() const {
+    assert(Kind == KindMem && "Not a Mem operand");
+    return Mem;
+  }
+
   // Override MCParsedAsmOperand.
   SMLoc getStartLoc() const override { return StartLoc; }
   SMLoc getEndLoc() const override { return EndLoc; }
@@ -623,8 +634,61 @@ static struct InsnMatchEntry InsnMatchTable[] = {
     { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }
 };
 
+static void printMCExpr(const MCExpr *E, raw_ostream &OS) {
+  if (!E)
+    return;
+  if (auto *CE = dyn_cast<MCConstantExpr>(E))
+    OS << *CE;
+  else if (auto *UE = dyn_cast<MCUnaryExpr>(E))
+    OS << *UE;
+  else if (auto *BE = dyn_cast<MCBinaryExpr>(E))
+    OS << *BE;
+  else if (auto *SRE = dyn_cast<MCSymbolRefExpr>(E))
+    OS << *SRE;
+  else
+    OS << *E;
+}
+
 void SystemZOperand::print(raw_ostream &OS) const {
-  llvm_unreachable("Not implemented");
+  switch (Kind) {
+    break;
+  case KindToken:
+    OS << "Token:" << getToken();
+    break;
+  case KindReg:
+    OS << "Reg:" << SystemZInstPrinter::getRegisterName(getReg());
+    break;
+  case KindImm:
+    OS << "Imm:";
+    printMCExpr(getImm(), OS);
+    break;
+  case KindImmTLS:
+    OS << "ImmTLS:";
+    printMCExpr(getImmTLS().Imm, OS);
+    if (getImmTLS().Sym) {
+      OS << ", ";
+      printMCExpr(getImmTLS().Sym, OS);
+    }
+    break;
+  case KindMem: {
+    const MemOp &Op = getMem();
+    OS << "Mem:" << *cast<MCConstantExpr>(Op.Disp);
+    if (Op.Base) {
+      OS << "(";
+      if (Op.MemKind == BDLMem)
+        OS << *cast<MCConstantExpr>(Op.Length.Imm) << ",";
+      else if (Op.MemKind == BDRMem)
+        OS << SystemZInstPrinter::getRegisterName(Op.Length.Reg) << ",";
+      if (Op.Index)
+        OS << SystemZInstPrinter::getRegisterName(Op.Index) << ",";
+      OS << SystemZInstPrinter::getRegisterName(Op.Base);
+      OS << ")";
+    }
+    break;
+  }
+  case KindInvalid:
+    break;
+  }
 }
 
 // Parse one register of the form %<prefix><number>.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index bd99fabb48c9..e2de721be568 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -647,7 +647,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  SM.serializeToStackMapSection();
+  emitStackMaps(SM);
 }
 
 // Force static initialization.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 9edd1fc36406..668a77ac014f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -294,11 +294,10 @@ bool SystemZElimCompare::convertToLoadAndTest(
     return false;
 
   // Rebuild to get the CC operand in the right place.
-  MachineInstr *BuiltMI =
-    BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode));
+  auto MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode));
   for (const auto &MO : MI.operands())
-    BuiltMI->addOperand(MO);
-  BuiltMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    MIB.add(MO);
+  MIB.setMemRefs(MI.memoperands());
   MI.eraseFromParent();
 
   return true;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index b9e5788cf018..8726b56bc94f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -49,14 +49,14 @@ getNumDecoderSlots(SUnit *SU) const {
   if (!SC->isValid())
     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
 
-  if (SC->BeginGroup) {
-    if (!SC->EndGroup)
-      return 2; // Cracked instruction
-    else
-      return 3; // Expanded/group-alone instruction
-  }
-
-  return 1; // Normal instruction
+  assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
+         "Only cracked instruction can have 2 uops.");
+  assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
+         "Expanded instructions always group alone.");
+  assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
+         "Expanded instructions fill the group(s).");
+
+  return SC->NumMicroOps;
 }
 
 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
@@ -139,16 +139,21 @@ void SystemZHazardRecognizer::nextGroup() {
   LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
   LLVM_DEBUG(CurGroupDbg = "";);
 
-  GrpCount++;
+  int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
+  assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
+         "Current decoder group bad.");
 
   // Reset counter for next group.
   CurrGroupSize = 0;
   CurrGroupHas4RegOps = false;
 
-  // Decrease counters for execution units by one.
+  GrpCount += ((unsigned) NumGroups);
+
+  // Decrease counters for execution units.
   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
-    if (ProcResourceCounters[i] > 0)
-      ProcResourceCounters[i]--;
+    ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
+                                   ? (ProcResourceCounters[i] - NumGroups)
+                                   : 0);
 
   // Clear CriticalResourceIdx if it is now below the threshold.
   if (CriticalResourceIdx != UINT_MAX &&
@@ -323,13 +328,13 @@ EmitInstruction(SUnit *SU) {
   // in current group.
   CurrGroupSize += getNumDecoderSlots(SU);
   CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
-  unsigned GroupLim =
-    ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
-  assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
+  unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
+  assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
+         && "SU does not fit into decoder group!");
 
   // Check if current group is now full/ended. If so, move on to next
   // group to be ready to evaluate more candidates.
-  if (CurrGroupSize == GroupLim || SC->EndGroup)
+  if (CurrGroupSize >= GroupLim || SC->EndGroup)
     nextGroup();
 }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5425f1d16e5e..5bc2ab0ef2d8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -71,19 +71,19 @@ struct SystemZAddressingMode {
   // True if the address can (and must) include ADJDYNALLOC.
   bool isDynAlloc() { return Form == FormBDXDynAlloc; }
 
-  void dump() {
+  void dump(const llvm::SelectionDAG *DAG) {
     errs() << "SystemZAddressingMode " << this << '\n';
 
     errs() << " Base ";
     if (Base.getNode())
-      Base.getNode()->dump();
+      Base.getNode()->dump(DAG);
     else
       errs() << "null\n";
 
     if (hasIndexField()) {
       errs() << " Index ";
       if (Index.getNode())
-        Index.getNode()->dump();
+        Index.getNode()->dump(DAG);
       else
         errs() << "null\n";
     }
@@ -589,7 +589,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
   if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
     return false;
 
-  LLVM_DEBUG(AM.dump());
+  LLVM_DEBUG(AM.dump(CurDAG));
   return true;
 }
 
@@ -728,8 +728,7 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
   // The inner check covers all cases but is more expensive.
   uint64_t Used = allOnes(Op.getValueSizeInBits());
   if (Used != (AndMask | InsertMask)) {
-    KnownBits Known;
-    CurDAG->computeKnownBits(Op.getOperand(0), Known);
+    KnownBits Known = CurDAG->computeKnownBits(Op.getOperand(0));
     if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue()))
       return false;
   }
@@ -787,8 +786,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       // If some bits of Input are already known zeros, those bits will have
       // been removed from the mask.  See if adding them back in makes the
       // mask suitable.
-      KnownBits Known;
-      CurDAG->computeKnownBits(Input, Known);
+      KnownBits Known = CurDAG->computeKnownBits(Input);
       Mask |= Known.Zero.getZExtValue();
       if (!refineRxSBGMask(RxSBG, Mask))
         return false;
@@ -811,8 +809,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       // If some bits of Input are already known ones, those bits will have
       // been removed from the mask.  See if adding them back in makes the
       // mask suitable.
-      KnownBits Known;
-      CurDAG->computeKnownBits(Input, Known);
+      KnownBits Known = CurDAG->computeKnownBits(Input);
       Mask &= ~Known.One.getZExtValue();
       if (!refineRxSBGMask(RxSBG, Mask))
         return false;
@@ -1147,7 +1144,7 @@ bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
     return false;
 
   auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
-  if (!Load || !Load->hasOneUse())
+  if (!Load || !Load->hasNUsesOfValue(1, 0))
     return false;
   if (Load->getMemoryVT().getSizeInBits() !=
       Load->getValueType(0).getSizeInBits())
@@ -1308,7 +1305,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
     return false;
   case SystemZISD::SSUBO:
     NegateOperand = true;
-    /* fall through */
+    LLVM_FALLTHROUGH;
   case SystemZISD::SADDO:
     if (MemVT == MVT::i32)
       NewOpc = SystemZ::ASI;
@@ -1319,7 +1316,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
     break;
   case SystemZISD::USUBO:
     NegateOperand = true;
-    /* fall through */
+    LLVM_FALLTHROUGH;
   case SystemZISD::UADDO:
     if (MemVT == MVT::i32)
       NewOpc = SystemZ::ALSI;
@@ -1354,11 +1351,8 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
   SDValue Ops[] = { Base, Disp, Operand, InputChain };
   MachineSDNode *Result =
     CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops);
-
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
-  MemOp[0] = StoreNode->getMemOperand();
-  MemOp[1] = LoadNode->getMemOperand();
-  Result->setMemRefs(MemOp, MemOp + 2);
+  CurDAG->setNodeMemRefs(
+      Result, {StoreNode->getMemOperand(), LoadNode->getMemOperand()});
 
   ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
   ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e76fa71dacd7..2a825c1316f3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -452,29 +452,29 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 
     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::f64, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
 
     setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
     setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
 
     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
 
     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
 
     setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
-    setOperationAction(ISD::FMAXNAN, MVT::f128, Legal);
+    setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
-    setOperationAction(ISD::FMINNAN, MVT::f128, Legal);
+    setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
   }
 
   // We have fused multiply-addition for f32 and f64 but not f128.
@@ -523,10 +523,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+  setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::FP_EXTEND);
   setTargetDAGCombine(ISD::BSWAP);
+  setTargetDAGCombine(ISD::SDIV);
+  setTargetDAGCombine(ISD::UDIV);
+  setTargetDAGCombine(ISD::SREM);
+  setTargetDAGCombine(ISD::UREM);
 
   // Handle intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -2213,8 +2219,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
   auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
   if (!Mask)
     return;
-  KnownBits Known;
-  DAG.computeKnownBits(C.Op0.getOperand(0), Known);
+  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
   if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
     return;
 
@@ -2912,12 +2917,12 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
                          DAG.getConstant(32, DL, MVT::i64));
     }
     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
-    return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
+    return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
                                       DL, MVT::f32, Out64);
   }
   if (InVT == MVT::f32 && ResVT == MVT::i32) {
     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
-    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
+    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
                                              MVT::f64, SDValue(U64, 0), In);
     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
     if (Subtarget.hasHighWord())
@@ -3160,10 +3165,9 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
 
   // Get the known-zero masks for each operand.
-  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
-  KnownBits Known[2];
-  DAG.computeKnownBits(Ops[0], Known[0]);
-  DAG.computeKnownBits(Ops[1], Known[1]);
+  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
+  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
+                        DAG.computeKnownBits(Ops[1])};
 
   // See if the upper 32 bits of one operand and the lower 32 bits of the
   // other are known zero.  They are the low and high operands respectively.
@@ -3346,8 +3350,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
   }
 
   // Get the known-zero mask for the operand.
-  KnownBits Known;
-  DAG.computeKnownBits(Op, Known);
+  KnownBits Known = DAG.computeKnownBits(Op);
   unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
   if (NumSignificantBits == 0)
     return DAG.getConstant(0, DL, VT);
@@ -4475,6 +4478,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
   // Constants with undefs to get a full vector constant and use that
   // as the starting point.
   SDValue Result;
+  SDValue ReplicatedVal;
   if (NumConstants > 0) {
     for (unsigned I = 0; I < NumElements; ++I)
       if (!Constants[I].getNode())
@@ -4485,17 +4489,21 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
     // avoid a false dependency on any previous contents of the vector
     // register.
 
-    // Use a VLREP if at least one element is a load.
-    unsigned LoadElIdx = UINT_MAX;
+    // Use a VLREP if at least one element is a load. Make sure to replicate
+    // the load with the most elements having its value.
+    std::map<const SDNode*, unsigned> UseCounts;
+    SDNode *LoadMaxUses = nullptr;
     for (unsigned I = 0; I < NumElements; ++I)
       if (Elems[I].getOpcode() == ISD::LOAD &&
           cast<LoadSDNode>(Elems[I])->isUnindexed()) {
-        LoadElIdx = I;
-        break;
+        SDNode *Ld = Elems[I].getNode();
+        UseCounts[Ld]++;
+        if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
+          LoadMaxUses = Ld;
       }
-    if (LoadElIdx != UINT_MAX) {
-      Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
-      Done[LoadElIdx] = true;
+    if (LoadMaxUses != nullptr) {
+      ReplicatedVal = SDValue(LoadMaxUses, 0);
+      Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
     } else {
       // Try to use VLVGP.
       unsigned I1 = NumElements / 2 - 1;
@@ -4516,7 +4524,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
 
   // Use VLVGx to insert the other elements.
   for (unsigned I = 0; I < NumElements; ++I)
-    if (!Done[I] && !Elems[I].isUndef())
+    if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
                            DAG.getConstant(I, DL, MVT::i32));
   return Result;
@@ -5359,6 +5367,46 @@ SDValue SystemZTargetLowering::combineMERGE(
   return SDValue();
 }
 
+SDValue SystemZTargetLowering::combineLOAD(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT LdVT = N->getValueType(0);
+  if (LdVT.isVector() || LdVT.isInteger())
+    return SDValue();
+  // Transform a scalar load that is REPLICATEd as well as having other
+  // use(s) to the form where the other use(s) use the first element of the
+  // REPLICATE instead of the load. Otherwise instruction selection will not
+  // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
+  // point loads.
+
+  SDValue Replicate;
+  SmallVector<SDNode*, 8> OtherUses;
+  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+       UI != UE; ++UI) {
+    if (UI->getOpcode() == SystemZISD::REPLICATE) {
+      if (Replicate)
+        return SDValue(); // Should never happen
+      Replicate = SDValue(*UI, 0);
+    }
+    else if (UI.getUse().getResNo() == 0)
+      OtherUses.push_back(*UI);
+  }
+  if (!Replicate || OtherUses.empty())
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
+                              Replicate, DAG.getConstant(0, DL, MVT::i32));
+  // Update uses of the loaded Value while preserving old chains.
+  for (SDNode *U : OtherUses) {
+    SmallVector<SDValue, 8> Ops;
+    for (SDValue Op : U->ops())
+      Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
+    DAG.UpdateNodeOperands(U, Ops);
+  }
+  return SDValue(N, 0);
+}
+
 SDValue SystemZTargetLowering::combineSTORE(
     SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -5394,8 +5442,7 @@ SDValue SystemZTargetLowering::combineSTORE(
         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
 
       SDValue Ops[] = {
-        N->getOperand(0), BSwapOp, N->getOperand(2),
-        DAG.getValueType(Op1.getValueType())
+        N->getOperand(0), BSwapOp, N->getOperand(2)
       };
 
       return
@@ -5436,7 +5483,7 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
   // (fpround (extract_vector_elt X 0))
   // (fpround (extract_vector_elt X 1)) ->
   // (extract_vector_elt (VROUND X) 0)
-  // (extract_vector_elt (VROUND X) 1)
+  // (extract_vector_elt (VROUND X) 2)
   //
   // This is a special case since the target doesn't really support v2f32s.
   SelectionDAG &DAG = DCI.DAG;
@@ -5478,6 +5525,53 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
   return SDValue();
 }
 
+SDValue SystemZTargetLowering::combineFP_EXTEND(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  // (fpextend (extract_vector_elt X 0))
+  // (fpextend (extract_vector_elt X 2)) ->
+  // (extract_vector_elt (VEXTEND X) 0)
+  // (extract_vector_elt (VEXTEND X) 1)
+  //
+  // This is a special case since the target doesn't really support v2f32s.
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op0 = N->getOperand(0);
+  if (N->getValueType(0) == MVT::f64 &&
+      Op0.hasOneUse() &&
+      Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      Op0.getOperand(0).getValueType() == MVT::v4f32 &&
+      Op0.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+    SDValue Vec = Op0.getOperand(0);
+    for (auto *U : Vec->uses()) {
+      if (U != Op0.getNode() &&
+          U->hasOneUse() &&
+          U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          U->getOperand(0) == Vec &&
+          U->getOperand(1).getOpcode() == ISD::Constant &&
+          cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
+        SDValue OtherExtend = SDValue(*U->use_begin(), 0);
+        if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
+            OtherExtend.getOperand(0) == SDValue(U, 0) &&
+            OtherExtend.getValueType() == MVT::f64) {
+          SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+                                        MVT::v2f64, Vec);
+          DCI.AddToWorklist(VExtend.getNode());
+          SDValue Extract1 =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
+                        VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
+          DCI.AddToWorklist(Extract1.getNode());
+          DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+          SDValue Extract0 =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
+                        VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+          return Extract0;
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::combineBSWAP(
     SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -5492,13 +5586,14 @@ SDValue SystemZTargetLowering::combineBSWAP(
       // Create the byte-swapping load.
       SDValue Ops[] = {
         LD->getChain(),    // Chain
-        LD->getBasePtr(),  // Ptr
-        DAG.getValueType(N->getValueType(0)) // VT
+        LD->getBasePtr()   // Ptr
       };
+      EVT LoadVT = N->getValueType(0);
+      if (LoadVT == MVT::i16)
+        LoadVT = MVT::i32;
       SDValue BSLoad =
         DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
-                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
-                                              MVT::i64 : MVT::i32, MVT::Other),
+                                DAG.getVTList(LoadVT, MVT::Other),
                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
 
       // If this is an i16 load, insert the truncate.
@@ -5664,6 +5759,23 @@ SDValue SystemZTargetLowering::combineGET_CCMASK(
   return Select->getOperand(4);
 }
 
+SDValue SystemZTargetLowering::combineIntDIVREM(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  // In the case where the divisor is a vector of constants a cheaper
+  // sequence of instructions can replace the divide. BuildSDIV is called to
+  // do this during DAG combining, but it only succeeds when it can build a
+  // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
+  // since it is not Legal but Custom it can only happen before
+  // legalization. Therefore we must scalarize this early before Combine
+  // 1. For widened vectors, this is already the result of type legalization.
+  if (VT.isVector() && isTypeLegal(VT) &&
+      DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
+    return DAG.UnrollVectorOp(N);
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
@@ -5673,14 +5785,20 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SIGN_EXTEND_INREG:  return combineSIGN_EXTEND_INREG(N, DCI);
   case SystemZISD::MERGE_HIGH:
   case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
+  case ISD::LOAD:               return combineLOAD(N, DCI);
   case ISD::STORE:              return combineSTORE(N, DCI);
   case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
+  case ISD::FP_EXTEND:          return combineFP_EXTEND(N, DCI);
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
   case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
   case SystemZISD::GET_CCMASK:  return combineGET_CCMASK(N, DCI);
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:               return combineIntDIVREM(N, DCI);
   }
 
   return SDValue();
@@ -5791,10 +5909,10 @@ static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
                                   unsigned OpNo) {
   APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
   APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
-  unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
-  KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth);
-  DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1);
-  DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1);
+  KnownBits LHSKnown =
+      DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
+  KnownBits RHSKnown =
+      DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
   Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
   Known.One = LHSKnown.One & RHSKnown.One;
 }
@@ -5860,9 +5978,8 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     case Intrinsic::s390_vuplf: {
       SDValue SrcOp = Op.getOperand(1);
       unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
-      Known = KnownBits(SrcBitWidth);
       APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
-      DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1);
+      Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
       if (IsLogical) {
         Known = Known.zext(BitWidth);
         Known.Zero.setBitsFrom(SrcBitWidth);
@@ -5881,7 +5998,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       break;
     case SystemZISD::REPLICATE: {
       SDValue SrcOp = Op.getOperand(0);
-      DAG.computeKnownBits(SrcOp, Known, Depth + 1);
+      Known = DAG.computeKnownBits(SrcOp, Depth + 1);
       if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
         Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
       break;
@@ -6852,7 +6969,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
         .addImm(ThisLength)
         .add(SrcBase)
         .addImm(SrcDisp)
-        ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+        .setMemRefs(MI.memoperands());
     DestDisp += ThisLength;
     SrcDisp += ThisLength;
     Length -= ThisLength;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 267e31a85216..622da32e418d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -337,18 +337,8 @@ enum NodeType : unsigned {
   // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
   ATOMIC_CMP_SWAP_128,
 
-  // Byte swapping load.
-  //
-  // Operand 0: the address to load from
-  // Operand 1: the type of load (i16, i32, i64)
-  LRV,
-
-  // Byte swapping store.
-  //
-  // Operand 0: the value to store
-  // Operand 1: the address to store to
-  // Operand 2: the type of store (i16, i32, i64)
-  STRV,
+  // Byte swapping load/store.  Same operands as regular load/store.
+  LRV, STRV,
 
   // Prefetch from the second operand using the 4-bit control code in
   // the first operand.  The code is 1 for a load prefetch and 2 for
@@ -389,7 +379,7 @@ public:
     // want to clobber the upper 32 bits of a GPR unnecessarily.
     return MVT::i32;
   }
-  TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+  TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
     const override {
     // Widen subvectors to the full width rather than promoting integer
     // elements.  This is better because:
@@ -597,14 +587,17 @@ private:
   SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 4e47752ed122..1374ee91fa29 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -97,7 +97,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
             (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
 let Predicates = [FeatureVectorEnhancements1] in
   def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
-            (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
+            (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
 
 // fcopysign with an FP64 result.
 let isCodeGenOnly = 1 in
@@ -110,7 +110,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in
             (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
 let Predicates = [FeatureVectorEnhancements1] in
   def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
-            (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
+            (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
 
 // fcopysign with an FP128 result.  Use "upper" as the high half and leave
 // the low half as-is.
@@ -187,7 +187,7 @@ def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
 
 let Predicates = [FeatureNoVectorEnhancements1] in {
   def : Pat<(f32 (fpround FP128:$src)),
-            (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
+            (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
   def : Pat<(f64 (fpround FP128:$src)),
             (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
 }
@@ -446,13 +446,13 @@ def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load, 8>;
 def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
 def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
-                                FP32:$src1, subreg_r32), FP32:$src2)>;
+                                FP32:$src1, subreg_h32), FP32:$src2)>;
 
 // f64 multiplication of an FP32 register and an f32 memory.
 def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
 def : Pat<(fmul (f64 (fpextend FP32:$src1)),
                 (f64 (extloadf32 bdxaddr12only:$addr))),
-          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
+          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
                 bdxaddr12only:$addr)>;
 
 // f128 multiplication of two FP64 registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index e3f9a9645d13..1e904a86ea79 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2900,7 +2900,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
 }
 
 multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> {
-  let M4 = 0 in
+  let M4 = 0, Defs = [CC] in
     def "" : InstVRRa<opcode, (outs VR128:$V1),
                      (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M5),
                      mnemonic#"\t$V1, $V2, $M3, $M5", []>;
@@ -3472,7 +3472,9 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
 class BinaryVRRbSPairGeneric<string mnemonic, bits<16> opcode>
   : InstVRRb<opcode, (outs VR128:$V1),
              (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
-             mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+             mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> {
+  let Defs = [CC];
+}
 
 // Declare a pair of instructions, one which sets CC and one which doesn't.
 // The CC-setting form ends with "S" and sets the low bit of M5.
@@ -3496,9 +3498,10 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
 }
 
 multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
-  def "" : InstVRRb<opcode, (outs VR128:$V1),
-                   (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
-                   mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+  let Defs = [CC] in
+    def "" : InstVRRb<opcode, (outs VR128:$V1),
+                     (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+                     mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
   def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
                   (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
                                             imm32zx4:$M4, 0)>;
@@ -4185,9 +4188,10 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
 }
 
 multiclass TernaryOptVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
-  def "" : InstVRRb<opcode, (outs VR128:$V1),
-                   (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
-                   mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+  let Defs = [CC] in
+    def "" : InstVRRb<opcode, (outs VR128:$V1),
+                     (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+                     mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
   def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
                   (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
                                             imm32zx4:$M4, 0)>;
@@ -4385,7 +4389,8 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
 }
 
 multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
-  def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
+  let Defs = [CC] in
+    def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
   def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
                   (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
                                             VR128:$V4, imm32zx4:$M5, 0)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index f0f9211efd5d..b03b4edaa4ab 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -880,10 +880,10 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       SystemZ::FP128BitRegClass.contains(SrcReg)) {
     unsigned SrcRegHi =
       RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
-                             SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
     unsigned SrcRegLo =
       RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
-                             SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
 
     BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
       .addReg(SrcRegHi, getKillRegState(KillSrc))
@@ -894,10 +894,10 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       SystemZ::VR128BitRegClass.contains(SrcReg)) {
     unsigned DestRegHi =
       RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
-                             SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
     unsigned DestRegLo =
       RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
-                             SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+                             SystemZ::subreg_h64, &SystemZ::VR128BitRegClass);
 
     if (DestRegHi != SrcReg)
       copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index bb5b7aae883b..8d3b1011d0a7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -756,16 +756,15 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
 def LRVR  : UnaryRRE<"lrvr",  0xB91F, bswap, GR32, GR32>;
 def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
 
-// Byte-swapping loads.  Unlike normal loads, these instructions are
-// allowed to access storage more than once.
-def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
-def LRV  : UnaryRXY<"lrv",  0xE31E, z_lrv,  GR32, 4>;
-def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
-
-// Likewise byte-swapping stores.
-def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
-def STRV  : StoreRXY<"strv",  0xE33E, z_strv,  GR32, 4>;
-def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
+// Byte-swapping loads.
+def LRVH : UnaryRXY<"lrvh", 0xE31F, z_loadbswap16, GR32, 2>;
+def LRV  : UnaryRXY<"lrv",  0xE31E, z_loadbswap32, GR32, 4>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, z_loadbswap64, GR64, 8>;
+
+// Byte-swapping stores.
+def STRVH : StoreRXY<"strvh", 0xE33F, z_storebswap16, GR32, 2>;
+def STRV  : StoreRXY<"strv",  0xE33E, z_storebswap32, GR32, 4>;
+def STRVG : StoreRXY<"strvg", 0xE32F, z_storebswap64, GR64, 8>;
 
 // Byte-swapping memory-to-memory moves.
 let mayLoad = 1, mayStore = 1 in
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 92b86575235a..6c97b85277c3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -151,13 +151,13 @@ let Predicates = [FeatureVector] in {
   def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
   def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
   def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
-  def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+  def : Pat<(z_vllezf32 bdxaddr12only:$addr),
             (VLLEZF bdxaddr12only:$addr)>;
-  def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+  def : Pat<(z_vllezf64 bdxaddr12only:$addr),
             (VLLEZG bdxaddr12only:$addr)>;
   let Predicates = [FeatureVectorEnhancements1] in {
     def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>;
-    def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)),
+    def : Pat<(z_vllezlf32 bdxaddr12only:$addr),
               (VLLEZLF bdxaddr12only:$addr)>;
   }
 
@@ -1031,7 +1031,7 @@ let Predicates = [FeatureVector] in {
   // Maximum.
   multiclass VectorMax<Instruction insn, TypedReg tr> {
     def : FPMinMax<insn, fmaxnum, tr, 4>;
-    def : FPMinMax<insn, fmaxnan, tr, 1>;
+    def : FPMinMax<insn, fmaximum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
     def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
@@ -1055,7 +1055,7 @@ let Predicates = [FeatureVector] in {
   // Minimum.
   multiclass VectorMin<Instruction insn, TypedReg tr> {
     def : FPMinMax<insn, fminnum, tr, 4>;
-    def : FPMinMax<insn, fminnan, tr, 1>;
+    def : FPMinMax<insn, fminimum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
     def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
@@ -1405,8 +1405,8 @@ multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
             (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
                                  subreg), 0)>;
 }
-defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
-defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_h32>;
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_h64>;
 
 // Match v2f64 insertions.  The AddedComplexity counters the 3 added by
 // TableGen for the base register operand in VLVG-based integer insertions
@@ -1414,10 +1414,10 @@ defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
 let AddedComplexity = 4 in {
   def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
             (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
-                                 subreg_r64), VR128:$vec, 1)>;
+                                 subreg_h64), VR128:$vec, 1)>;
   def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
             (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
-                                             subreg_r64), 0)>;
+                                             subreg_h64), 0)>;
 }
 
 // We extract floating-point element X by replicating (for elements other
@@ -1426,14 +1426,14 @@ let AddedComplexity = 4 in {
 // extractions and ensures that this version is strictly better.
 let AddedComplexity = 4 in {
   def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
-            (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
+            (EXTRACT_SUBREG VR128:$vec, subreg_h32)>;
   def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
-            (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
+            (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_h32)>;
 
   def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
-            (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+            (EXTRACT_SUBREG VR128:$vec, subreg_h64)>;
   def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
-            (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+            (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_h64)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index 5103867e2d9a..626675bfb70c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -127,14 +127,6 @@ def SDT_ZIPM                : SDTypeProfile<1, 1,
 def SDT_ZPrefetch           : SDTypeProfile<0, 2,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>]>;
-def SDT_ZLoadBSwap          : SDTypeProfile<1, 2,
-                                            [SDTCisInt<0>,
-                                             SDTCisPtrTy<1>,
-                                             SDTCisVT<2, OtherVT>]>;
-def SDT_ZStoreBSwap         : SDTypeProfile<0, 3,
-                                            [SDTCisInt<0>,
-                                             SDTCisPtrTy<1>,
-                                             SDTCisVT<2, OtherVT>]>;
 def SDT_ZTBegin             : SDTypeProfile<1, 2,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>,
@@ -283,9 +275,9 @@ def z_subcarry_1        : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>;
 def z_membarrier        : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
                                  [SDNPHasChain, SDNPSideEffect]>;
 
-def z_loadbswap        : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
+def z_loadbswap        : SDNode<"SystemZISD::LRV", SDTLoad,
                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def z_storebswap       : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
+def z_storebswap       : SDNode<"SystemZISD::STRV", SDTStore,
                                  [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
 def z_tdc               : SDNode<"SystemZISD::TDC", SDT_ZTest>;
@@ -429,16 +421,28 @@ def z_vsrl              : SDNode<"ISD::SRL", SDT_ZVecBinary>;
 // Pattern fragments
 //===----------------------------------------------------------------------===//
 
-def z_lrvh  : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
-def z_lrv   : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
-def z_lrvg  : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
+def z_loadbswap16 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_loadbswap32 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_loadbswap64 : PatFrag<(ops node:$addr), (z_loadbswap node:$addr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
 
-def z_strvh : PatFrag<(ops node:$src, node:$addr),
-                      (z_storebswap node:$src, node:$addr, i16)>;
-def z_strv  : PatFrag<(ops node:$src, node:$addr),
-                      (z_storebswap node:$src, node:$addr, i32)>;
-def z_strvg : PatFrag<(ops node:$src, node:$addr),
-                      (z_storebswap node:$src, node:$addr, i64)>;
+def z_storebswap16 : PatFrag<(ops node:$src, node:$addr),
+                             (z_storebswap node:$src, node:$addr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def z_storebswap32 : PatFrag<(ops node:$src, node:$addr),
+                             (z_storebswap node:$src, node:$addr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def z_storebswap64 : PatFrag<(ops node:$src, node:$addr),
+                             (z_storebswap node:$src, node:$addr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
 
 // Fragments including CC as an implicit source.
 def z_br_ccmask
@@ -556,7 +560,6 @@ class NonvolatileLoad<SDPatternOperator load>
   auto *Load = cast<LoadSDNode>(N);
   return !Load->isVolatile();
 }]>;
-def nonvolatile_load          : NonvolatileLoad<load>;
 def nonvolatile_anyextloadi8  : NonvolatileLoad<anyextloadi8>;
 def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
 def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
@@ -567,7 +570,6 @@ class NonvolatileStore<SDPatternOperator store>
   auto *Store = cast<StoreSDNode>(N);
   return !Store->isVolatile();
 }]>;
-def nonvolatile_store         : NonvolatileStore<store>;
 def nonvolatile_truncstorei8  : NonvolatileStore<truncstorei8>;
 def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
 def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
@@ -743,37 +745,37 @@ class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
 def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
 def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
 def z_vllezi32 : z_vllez<i32, load, 1>;
-def z_vllezi64 : PatFrag<(ops node:$addr),
-                         (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+def z_vllezi64 : PatFrags<(ops node:$addr),
+                          [(z_vector_insert (z_vzero),
+                                            (i64 (load node:$addr)), (i32 0)),
+                           (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
 // We use high merges to form a v4f32 from four f32s.  Propagating zero
 // into all elements but index 1 gives this expression.
 def z_vllezf32 : PatFrag<(ops node:$addr),
-                         (bitconvert
-                          (z_merge_high
-                           (v2i64
-                            (z_unpackl_high
-                             (v4i32
-                              (bitconvert
-                               (v4f32 (scalar_to_vector
-                                       (f32 (load node:$addr)))))))),
-                           (v2i64 (z_vzero))))>;
+                         (z_merge_high
+                          (v2i64
+                           (z_unpackl_high
+                            (v4i32
+                             (bitconvert
+                              (v4f32 (scalar_to_vector
+                                      (f32 (load node:$addr)))))))),
+                          (v2i64 (z_vzero)))>;
 def z_vllezf64 : PatFrag<(ops node:$addr),
                          (z_merge_high
-                          (scalar_to_vector (f64 (load node:$addr))),
+                          (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
                           (z_vzero))>;
 
 // Similarly for the high element of a zeroed vector.
 def z_vllezli32 : z_vllez<i32, load, 0>;
 def z_vllezlf32 : PatFrag<(ops node:$addr),
-                          (bitconvert
-                           (z_merge_high
-                            (v2i64
-                             (bitconvert
-                              (z_merge_high
-                               (v4f32 (scalar_to_vector
-                                       (f32 (load node:$addr)))),
-                               (v4f32 (z_vzero))))),
-                            (v2i64 (z_vzero))))>;
+                          (z_merge_high
+                           (v2i64
+                            (bitconvert
+                             (z_merge_high
+                              (v4f32 (scalar_to_vector
+                                      (f32 (load node:$addr)))),
+                              (v4f32 (z_vzero))))),
+                           (v2i64 (z_vzero)))>;
 
 // Store one element of a vector.
 class z_vste<ValueType scalartype, SDPatternOperator store>
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 76ed6f80ba55..e9f9188048da 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -63,6 +63,10 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                            const LiveRegMatrix *Matrix) const {
   const MachineRegisterInfo *MRI = &MF.getRegInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+  bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
+      VirtReg, Order, Hints, MF, VRM, Matrix);
+
   if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
     SmallVector<unsigned, 8> Worklist;
     SmallSet<unsigned, 4> DoneRegs;
@@ -84,8 +88,18 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
             TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
                                    getRC32(TrueMO, VRM, MRI));
           if (RC && RC != &SystemZ::GRX32BitRegClass) {
+            // Pass the registers of RC as hints while making sure that if
+            // any of these registers are copy hints, hint them first.
+            SmallSet<unsigned, 4> CopyHints;
+            CopyHints.insert(Hints.begin(), Hints.end());
+            Hints.clear();
+            for (MCPhysReg Reg : Order)
+              if (CopyHints.count(Reg) &&
+                  RC->contains(Reg) && !MRI->isReserved(Reg))
+                Hints.push_back(Reg);
             for (MCPhysReg Reg : Order)
-              if (RC->contains(Reg) && !MRI->isReserved(Reg))
+              if (!CopyHints.count(Reg) &&
+                  RC->contains(Reg) && !MRI->isReserved(Reg))
                 Hints.push_back(Reg);
             // Return true to make these hints the only regs available to
             // RA. This may mean extra spilling but since the alternative is
@@ -102,8 +116,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
     }
   }
 
-  return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
-                                                   VRM, Matrix);
+  return BaseImplRetVal;
 }
 
 const MCPhysReg *
@@ -270,25 +283,30 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
 
   // Check that the two virtual registers are local to MBB.
   MachineBasicBlock *MBB = MI->getParent();
-  if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) ||
-      LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB))
+  MachineInstr *FirstMI_GR128 =
+    LIS.getInstructionFromIndex(IntGR128.beginIndex());
+  MachineInstr *FirstMI_GRNar =
+    LIS.getInstructionFromIndex(IntGRNar.beginIndex());
+  MachineInstr *LastMI_GR128 = LIS.getInstructionFromIndex(IntGR128.endIndex());
+  MachineInstr *LastMI_GRNar = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+  if ((!FirstMI_GR128 || FirstMI_GR128->getParent() != MBB) ||
+      (!FirstMI_GRNar || FirstMI_GRNar->getParent() != MBB) ||
+      (!LastMI_GR128 || LastMI_GR128->getParent() != MBB) ||
+      (!LastMI_GRNar || LastMI_GRNar->getParent() != MBB))
     return false;
 
-  // Find the first and last MIs of the registers.
-  MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+  MachineBasicBlock::iterator MII = nullptr, MEE = nullptr;
   if (WideOpNo == 1) {
-    FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex());
-    LastMI  = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+    MII = FirstMI_GR128;
+    MEE = LastMI_GRNar;
   } else {
-    FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex());
-    LastMI  = LIS.getInstructionFromIndex(IntGR128.endIndex());
+    MII = FirstMI_GRNar;
+    MEE = LastMI_GR128;
   }
-  assert (FirstMI && LastMI && "No instruction from index?");
 
   // Check if coalescing seems safe by finding the set of clobbered physreg
   // pairs in the region.
   BitVector PhysClobbered(getNumRegs());
-  MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI;
   MEE++;
   for (; MII != MEE; ++MII) {
     for (const MachineOperand &MO : MII->operands())
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 94781659a50a..9fd2e4ae4f00 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -57,8 +57,6 @@ public:
                              const VirtRegMap *VRM,
                              const LiveRegMatrix *Matrix) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   // Override TargetRegisterInfo.h.
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 79ba7534f92c..cea88c088b86 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -25,11 +25,8 @@ def subreg_l32   : SubRegIndex<32, 0>;  // Also acts as subreg_ll32.
 def subreg_h32   : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
 def subreg_l64   : SubRegIndex<64, 0>;
 def subreg_h64   : SubRegIndex<64, 64>;
-def subreg_r32   : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
-def subreg_r64   : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
 def subreg_hh32  : ComposedSubRegIndex<subreg_h64, subreg_h32>;
 def subreg_hl32  : ComposedSubRegIndex<subreg_h64, subreg_l32>;
-def subreg_hr32  : ComposedSubRegIndex<subreg_h64, subreg_r32>;
 }
 
 // Define a register class that contains values of types TYPES and an
@@ -188,7 +185,7 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> {
 class FPR64<bits<16> num, string n, FPR32 high>
  : SystemZRegWithSubregs<n, [high]> {
   let HWEncoding = num;
-  let SubRegIndices = [subreg_r32];
+  let SubRegIndices = [subreg_h32];
 }
 
 // 8 pairs of FPR64s, with a one-register gap inbetween.
@@ -231,7 +228,7 @@ defm FP128 : SystemZRegClass<"FP128", [f128], 128,
 class VR128<bits<16> num, string n, FPR64 high>
   : SystemZRegWithSubregs<n, [high]> {
   let HWEncoding = num;
-  let SubRegIndices = [subreg_r64];
+  let SubRegIndices = [subreg_h64];
 }
 
 // Full vector registers.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
index 385a94b5d6a9..83bf97e6841a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -12,11 +12,13 @@
 // These resources are used to express decoder grouping rules.  The number of
 // decoder slots needed by an instructions is normally one, but there are
 // exceptions.
-def NormalGr   : SchedWrite;
-def Cracked    : SchedWrite;
-def GroupAlone : SchedWrite;
-def BeginGroup : SchedWrite;
-def EndGroup   : SchedWrite;
+def NormalGr    : SchedWrite;
+def Cracked     : SchedWrite;
+def GroupAlone  : SchedWrite;
+def GroupAlone2 : SchedWrite;
+def GroupAlone3 : SchedWrite;
+def BeginGroup  : SchedWrite;
+def EndGroup    : SchedWrite;
 
 // A SchedWrite added to other SchedWrites to make LSU latency parameterizable.
 def LSULatency : SchedWrite;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index 5d32232107af..74e1dad87908 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -48,6 +48,16 @@ def : WriteRes<GroupAlone, []> {
   let BeginGroup  = 1;
   let EndGroup    = 1;
 }
+def : WriteRes<GroupAlone2, []> {
+  let NumMicroOps = 6;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+  let NumMicroOps = 9;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
 
 // Incoming latency removed from the register operand which is used together
 // with a memory operand by the instruction.
@@ -131,7 +141,7 @@ def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
 def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
 def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
 def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXa2, FXb2, GroupAlone],
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
              (instregex "B(R)?X(H|L).*$")>;
 
 // Compare and branch
@@ -474,13 +484,13 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXa2, LSU, GroupAlone],
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
 def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
              (instregex "DSG(F)?$")>;
 def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
 def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "DL(G)?$")>;
 
 //===----------------------------------------------------------------------===//
 // Shifts
@@ -490,7 +500,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
 def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
 def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
 def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
              (instregex "S(L|R)D(A|L)$")>;
 
 // Rotate
@@ -597,9 +607,9 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
              (instregex "CS(G|Y)?$")>;
 
 // Compare double and swap
-def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone],
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
              (instregex "CDS(Y)?$")>;
-def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone],
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, GroupAlone3],
              (instregex "CDSG$")>;
 
 // Compare and swap and store
@@ -620,7 +630,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
              (instregex "TRT$")>;
 def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
 def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -643,21 +653,21 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD],
 // Decimal arithmetic
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
              (instregex "CVBG$")>;
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
              (instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
 def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
 def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
 
-def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone],
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
              (instregex "(A|S|ZA)P$")>;
-def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone], (instregex "(M|D)P$")>;
-def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone], (instregex "SRP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
 def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
 def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
 def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
@@ -674,7 +684,7 @@ def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
 
 // Load/store access multiple (not modeled precisely)
 def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
 
 //===----------------------------------------------------------------------===//
 // Program mask and addressing mode
@@ -704,7 +714,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
 //===----------------------------------------------------------------------===//
 
 // Transaction begin
-def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>;
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
 
 // Transaction end
 def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
@@ -813,9 +823,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
 
 // Convert from fixed / logical
 def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
 def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked],
@@ -899,7 +909,7 @@ def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
 
 // Test Data Class
 def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
-def : InstRW<[WLat10, LSU2, VecDF4, GroupAlone], (instregex "TCXB$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
 
 //===----------------------------------------------------------------------===//
 // FP: Floating-point control register instructions
@@ -941,7 +951,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
 
 // Convert from fixed
 def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
 
 // Convert to fixed
 def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
@@ -1054,9 +1064,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
 
 // Convert from fixed / logical
 def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>;
 def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
@@ -1068,19 +1078,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
 
 // Convert from / to signed / unsigned packed
 def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
 def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
 
 // Convert from / to zoned
 def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
 
 // Convert from / to packed
 def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
 
@@ -1129,7 +1139,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
 
 // Reround
 def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
-def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
 
 // Shift significand left/right
 def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1137,7 +1147,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
 
 // Insert biased exponent
 def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
 
 //===----------------------------------------------------------------------===//
 // DFP: Comparisons
@@ -1200,7 +1210,7 @@ def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
 def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
 def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
 
 //===----------------------------------------------------------------------===//
@@ -1414,7 +1424,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STCT(L|G)$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
 def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
 def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
@@ -1458,8 +1468,8 @@ def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
 // System: Memory-move Instructions
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[WLat1, FXa, LSU5, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
 def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
 def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
 
@@ -1491,8 +1501,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
 def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
 def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
 def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
-def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
 def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
 def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index 515f968e5091..1962fdf3a1d1 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -48,6 +48,16 @@ def : WriteRes<GroupAlone, []> {
   let BeginGroup  = 1;
   let EndGroup    = 1;
 }
+def : WriteRes<GroupAlone2, []> {
+  let NumMicroOps = 6;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+  let NumMicroOps = 9;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
 
 // Incoming latency removed from the register operand which is used together
 // with a memory operand by the instruction.
@@ -132,7 +142,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
 def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
 def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
 def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXa2, FXb2, GroupAlone],
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
              (instregex "B(R)?X(H|L).*$")>;
 
 // Compare and branch
@@ -483,13 +493,14 @@ def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "D$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
 def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
              (instregex "DSG(F)?$")>;
 def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
 def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
-def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone], (instregex "DL(G)?$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+             (instregex "DL(G)?$")>;
 
 //===----------------------------------------------------------------------===//
 // Shifts
@@ -499,7 +510,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
 def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
 def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
 def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
              (instregex "S(L|R)D(A|L)$")>;
 
 // Rotate
@@ -606,10 +617,10 @@ def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
              (instregex "CS(G|Y)?$")>;
 
 // Compare double and swap
-def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone],
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
              (instregex "CDS(Y)?$")>;
 def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
-              GroupAlone], (instregex "CDSG$")>;
+              GroupAlone3], (instregex "CDSG$")>;
 
 // Compare and swap and store
 def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
@@ -629,7 +640,7 @@ def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
              (instregex "TRT$")>;
 def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
 def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -662,21 +673,21 @@ def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
 // Decimal arithmetic
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
              (instregex "CVBG$")>;
-def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
              (instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
 def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
 def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
 
-def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone],
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
              (instregex "(A|S|ZA)P$")>;
-def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone], (instregex "(M|D)P$")>;
-def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone], (instregex "SRP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "(M|D)P$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
 def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
 def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
 def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
@@ -693,7 +704,7 @@ def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
 
 // Load/store access multiple (not modeled precisely)
 def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
 
 //===----------------------------------------------------------------------===//
 // Program mask and addressing mode
@@ -723,7 +734,7 @@ def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
 //===----------------------------------------------------------------------===//
 
 // Transaction begin
-def : InstRW<[WLat9, LSU2, FXb5, GroupAlone], (instregex "TBEGIN(C)?$")>;
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
 
 // Transaction end
 def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
@@ -832,9 +843,9 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
 
 // Convert from fixed / logical
 def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
 def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked],
@@ -918,7 +929,7 @@ def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
 
 // Test Data Class
 def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
-def : InstRW<[WLat10, LSU2, VecDF4, GroupAlone], (instregex "TCXB$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
 
 //===----------------------------------------------------------------------===//
 // FP: Floating-point control register instructions
@@ -960,7 +971,7 @@ def : InstRW<[WLat8, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
 
 // Convert from fixed
 def : InstRW<[WLat8, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
 
 // Convert to fixed
 def : InstRW<[WLat10, WLat10, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
@@ -1071,9 +1082,9 @@ def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
 
 // Convert from fixed / logical
 def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)TR(A)?$")>;
 def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat30, FXb, VecDF4, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)TR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
@@ -1085,19 +1096,19 @@ def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
 
 // Convert from / to signed / unsigned packed
 def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
 def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
 
 // Convert from / to zoned
 def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
 
 // Convert from / to packed
 def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
-def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
 def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
 
@@ -1146,7 +1157,7 @@ def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
 
 // Reround
 def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
-def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
 
 // Shift significand left/right
 def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1154,7 +1165,7 @@ def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
 
 // Insert biased exponent
 def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
-def : InstRW<[WLat11, FXb, VecDF4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
 
 //===----------------------------------------------------------------------===//
 // DFP: Comparisons
@@ -1218,7 +1229,7 @@ def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
 def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
 def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
 
@@ -1469,7 +1480,7 @@ def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "VLIP$")>;
-def : InstRW<[WLat6, VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
 def : InstRW<[WLat1, VecDFX, FXb, LSU, Cracked], (instregex "VUPKZ$")>;
 def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVB(G)?$")>;
 def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], (instregex "VCVD(G)?$")>;
@@ -1489,7 +1500,7 @@ def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
-def : InstRW<[WLat20, GroupAlone], (instregex "LPSW(E)?$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>;
 def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
 def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
 def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
@@ -1502,7 +1513,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
-def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "STCT(L|G)$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
 def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
 def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
@@ -1547,8 +1558,8 @@ def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
 // System: Memory-move Instructions
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[WLat1, FXa, LSU5, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
 def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
 def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
 
@@ -1580,8 +1591,8 @@ def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
 def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
 def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
 def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
-def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
 def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
 def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 3012b565d5ef..7535739f813a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -39,15 +39,21 @@ let NumMicroOps = 1 in {
   def : WriteRes<BeginGroup, []> { let BeginGroup  = 1; }
   def : WriteRes<EndGroup, []>   { let EndGroup    = 1; }
 }
-def : WriteRes<Cracked, []> {
-  let NumMicroOps = 2;
-  let BeginGroup  = 1;
-}
 def : WriteRes<GroupAlone, []> {
   let NumMicroOps = 3;
   let BeginGroup  = 1;
   let EndGroup    = 1;
 }
+def : WriteRes<GroupAlone2, []> {
+  let NumMicroOps = 6;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+  let NumMicroOps = 9;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
 
 // Incoming latency removed from the register operand which is used together
 // with a memory operand by the instruction.
@@ -114,7 +120,7 @@ def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
 def : InstRW<[WLat1, LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>;
 def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCT(G|H)?$")>;
 def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXU3, LSU, GroupAlone],
+def : InstRW<[WLat1, FXU3, LSU, GroupAlone2],
              (instregex "B(R)?X(H|L).*$")>;
 
 // Compare and branch
@@ -439,14 +445,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone],
 // Division and remainder
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
              (instregex "D$")>;
-def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3],
              (instregex "DSG(F)?$")>;
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
              (instregex "DL(G)?$")>;
 
 //===----------------------------------------------------------------------===//
@@ -457,7 +463,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>;
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>;
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>;
 def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2],
              (instregex "S(L|R)D(A|L)$")>;
 
 // Rotate
@@ -560,7 +566,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone],
              (instregex "CS(G|Y)?$")>;
 
 // Compare double and swap
-def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2],
              (instregex "CDS(Y)?$")>;
 def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone],
              (instregex "CDSG$")>;
@@ -604,12 +610,12 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>
 // Decimal arithmetic
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2],
              (instregex "CVBG$")>;
-def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone],
+def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone2],
              (instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>;
-def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone3], (instregex "CVD(Y)?$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
 def : InstRW<[WLat10, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
@@ -701,13 +707,13 @@ def : InstRW<[], (instregex "Insn.*")>;
 
 // Load zero
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "LZ(DR|ER)$")>;
-def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LZXR$")>;
+def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LZXR$")>;
 
 // Load
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "LER$")>;
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "LD(R|R32|GR)$")>;
 def : InstRW<[WLat3, FXU, NormalGr], (instregex "LGDR$")>;
-def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>;
+def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
@@ -747,10 +753,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>;
 
 // Convert from fixed / logical
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>;
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>;
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -874,7 +880,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>;
 
 // Convert from fixed
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>;
 
 // Convert to fixed
 def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -986,11 +992,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>;
 // Convert from fixed / logical
 def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>;
 def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>;
-def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>;
 def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>;
+def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>;
@@ -1002,9 +1008,9 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>;
 
 // Convert from / to signed / unsigned packed
 def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>;
 def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>;
 
 // Perform floating-point operation
 def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
@@ -1051,7 +1057,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>;
 
 // Reround
 def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>;
-def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>;
 
 // Shift significand left/right
 def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1059,7 +1065,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>;
 
 // Insert biased exponent
 def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>;
-def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>;
 
 //===----------------------------------------------------------------------===//
 // DFP: Comparisons
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 892f493570d1..a21d2c4cef70 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -39,15 +39,21 @@ let NumMicroOps = 1 in {
   def : WriteRes<BeginGroup, []> { let BeginGroup  = 1; }
   def : WriteRes<EndGroup, []>   { let EndGroup    = 1; }
 }
-def : WriteRes<Cracked, []> {
-  let NumMicroOps = 2;
-  let BeginGroup  = 1;
-}
 def : WriteRes<GroupAlone, []> {
   let NumMicroOps = 3;
   let BeginGroup  = 1;
   let EndGroup    = 1;
 }
+def : WriteRes<GroupAlone2, []> {
+  let NumMicroOps = 6;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+  let NumMicroOps = 9;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
 
 // Incoming latency removed from the register operand which is used together
 // with a memory operand by the instruction.
@@ -119,7 +125,7 @@ def : InstRW<[WLat1, LSU, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
 def : InstRW<[WLat1, FXU, EndGroup], (instregex "BRCT(G)?$")>;
 def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BRCTH$")>;
 def : InstRW<[WLat1, FXU, LSU, GroupAlone], (instregex "BCT(G)?(R)?$")>;
-def : InstRW<[WLat1, FXU3, LSU, GroupAlone],
+def : InstRW<[WLat1, FXU3, LSU, GroupAlone2],
              (instregex "B(R)?X(H|L).*$")>;
 
 // Compare and branch
@@ -450,14 +456,14 @@ def : InstRW<[WLat7LSU, RegReadAdv, FXU2, LSU, GroupAlone],
 // Division and remainder
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DR$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
              (instregex "D$")>;
-def : InstRW<[WLat30, FPU4, FXU4, GroupAlone], (instregex "DSG(F)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU4, GroupAlone3], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU3, GroupAlone3],
              (instregex "DSG(F)?$")>;
-def : InstRW<[WLat30, FPU4, FXU5, GroupAlone], (instregex "DL(G)?R$")>;
-def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone],
+def : InstRW<[WLat30, FPU4, FXU5, GroupAlone3], (instregex "DL(G)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FPU4, LSU, FXU4, GroupAlone3],
              (instregex "DL(G)?$")>;
 
 //===----------------------------------------------------------------------===//
@@ -468,7 +474,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLL(G|K)?$")>;
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRL(G|K)?$")>;
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "SRA(G|K)?$")>;
 def : InstRW<[WLat1, FXU, NormalGr], (instregex "SLA(G|K)?$")>;
-def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU4, LSU, GroupAlone2],
              (instregex "S(L|R)D(A|L)$")>;
 
 // Rotate
@@ -572,7 +578,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, FXU2, LSU, GroupAlone],
              (instregex "CS(G|Y)?$")>;
 
 // Compare double and swap
-def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone],
+def : InstRW<[WLat5LSU, WLat5LSU, FXU5, LSU, GroupAlone2],
              (instregex "CDS(Y)?$")>;
 def : InstRW<[WLat12, WLat12, FXU6, LSU2, GroupAlone],
              (instregex "CDSG$")>;
@@ -595,7 +601,7 @@ def : InstRW<[WLat2LSU, WLat2LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
 //===----------------------------------------------------------------------===//
 
 def : InstRW<[WLat1, LSU, GroupAlone], (instregex "TR$")>;
-def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone],
+def : InstRW<[WLat30, WLat30, WLat30, FXU3, LSU2, GroupAlone2],
              (instregex "TRT$")>;
 def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
 def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
@@ -617,11 +623,11 @@ def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(KIMD|KLMD|KMAC|PCC)$")>
 // Decimal arithmetic
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone],
+def : InstRW<[WLat30, RegReadAdv, FXU, DFU2, LSU2, GroupAlone2],
              (instregex "CVBG$")>;
 def : InstRW<[WLat20, RegReadAdv, FXU, DFU, LSU, GroupAlone],
              (instregex "CVB(Y)?$")>;
-def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXU3, DFU4, LSU, GroupAlone3], (instregex "CVDG$")>;
 def : InstRW<[WLat1, FXU2, DFU, LSU, GroupAlone], (instregex "CVD(Y)?$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
 def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
@@ -785,10 +791,10 @@ def : InstRW<[WLat10, FPU4, GroupAlone], (instregex "LX(E|D)BR$")>;
 
 // Convert from fixed / logical
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)BR(A)?$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)BR(A?)$")>;
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CEL(F|G)BR$")>;
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "CDL(F|G)BR$")>;
-def : InstRW<[WLat11, FXU, FPU4, GroupAlone], (instregex "CXL(F|G)BR$")>;
+def : InstRW<[WLat11, FXU, FPU4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -912,7 +918,7 @@ def : InstRW<[WLat9, FPU4, GroupAlone], (instregex "LX(E|D)R$")>;
 
 // Convert from fixed
 def : InstRW<[WLat8, FXU, FPU, GroupAlone], (instregex "C(E|D)(F|G)R$")>;
-def : InstRW<[WLat10, FXU, FPU4, GroupAlone], (instregex "CX(F|G)R$")>;
+def : InstRW<[WLat10, FXU, FPU4, GroupAlone2], (instregex "CX(F|G)R$")>;
 
 // Convert to fixed
 def : InstRW<[WLat12, WLat12, FXU, FPU, GroupAlone],
@@ -1024,11 +1030,11 @@ def : InstRW<[WLat6, DFU4, GroupAlone], (instregex "LXDTR$")>;
 // Convert from fixed / logical
 def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDFTR$")>;
 def : InstRW<[WLat30, FXU, DFU, GroupAlone], (instregex "CDGTR(A)?$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXFTR(A)?$")>;
-def : InstRW<[WLat30, FXU, DFU4, GroupAlone], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXU, DFU4, GroupAlone2], (instregex "CXGTR(A)?$")>;
 def : InstRW<[WLat9, FXU, DFU, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[WLat9, FXU, DFU4, GroupAlone], (instregex "CXLFTR$")>;
-def : InstRW<[WLat5, FXU, DFU4, GroupAlone], (instregex "CXLGTR$")>;
+def : InstRW<[WLat9, FXU, DFU4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat5, FXU, DFU4, GroupAlone2], (instregex "CXLGTR$")>;
 
 // Convert to fixed / logical
 def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "CFDTR(A)?$")>;
@@ -1040,13 +1046,13 @@ def : InstRW<[WLat7, WLat7, FXU, DFU2, GroupAlone], (instregex "CL(F|G)XTR$")>;
 
 // Convert from / to signed / unsigned packed
 def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "CD(S|U)TR$")>;
-def : InstRW<[WLat8, FXU2, DFU4, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat8, FXU2, DFU4, GroupAlone2], (instregex "CX(S|U)TR$")>;
 def : InstRW<[WLat7, FXU, DFU, GroupAlone], (instregex "C(S|U)DTR$")>;
-def : InstRW<[WLat12, FXU2, DFU4, GroupAlone], (instregex "C(S|U)XTR$")>;
+def : InstRW<[WLat12, FXU2, DFU4, GroupAlone2], (instregex "C(S|U)XTR$")>;
 
 // Convert from / to zoned
 def : InstRW<[WLat4LSU, LSU, DFU2, GroupAlone], (instregex "CDZT$")>;
-def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[WLat11LSU, LSU2, DFU4, GroupAlone3], (instregex "CXZT$")>;
 def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZDT$")>;
 def : InstRW<[WLat1, FXU, LSU, DFU2, GroupAlone], (instregex "CZXT$")>;
 
@@ -1095,7 +1101,7 @@ def : InstRW<[WLat10, WLat10, DFU4, GroupAlone], (instregex "QAXTR$")>;
 
 // Reround
 def : InstRW<[WLat11, WLat11, FXU, DFU, GroupAlone], (instregex "RRDTR$")>;
-def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone], (instregex "RRXTR$")>;
+def : InstRW<[WLat30, WLat30, FXU, DFU4, GroupAlone2], (instregex "RRXTR$")>;
 
 // Shift significand left/right
 def : InstRW<[WLat7LSU, LSU, DFU, GroupAlone], (instregex "S(L|R)DT$")>;
@@ -1103,7 +1109,7 @@ def : InstRW<[WLat11LSU, LSU, DFU4, GroupAlone], (instregex "S(L|R)XT$")>;
 
 // Insert biased exponent
 def : InstRW<[WLat5, FXU, DFU, GroupAlone], (instregex "IEDTR$")>;
-def : InstRW<[WLat7, FXU, DFU4, GroupAlone], (instregex "IEXTR$")>;
+def : InstRW<[WLat7, FXU, DFU4, GroupAlone2], (instregex "IEXTR$")>;
 
 //===----------------------------------------------------------------------===//
 // DFP: Comparisons
@@ -1223,7 +1229,7 @@ def : InstRW<[WLat30, MCD], (instregex "SCKPF$")>;
 def : InstRW<[WLat30, MCD], (instregex "SCKC$")>;
 def : InstRW<[WLat30, MCD], (instregex "SPT$")>;
 def : InstRW<[WLat9, FXU, LSU2, GroupAlone], (instregex "STCK(F)?$")>;
-def : InstRW<[WLat20, LSU4, FXU2, GroupAlone], (instregex "STCKE$")>;
+def : InstRW<[WLat20, LSU4, FXU2, GroupAlone2], (instregex "STCKE$")>;
 def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
 def : InstRW<[WLat30, MCD], (instregex "STPT$")>;
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 9cd09b0f911e..fb030a207bc7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -19,6 +19,11 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_CTOR
 #include "SystemZGenSubtargetInfo.inc"
 
+static cl::opt<bool> UseSubRegLiveness(
+    "systemz-subreg-liveness",
+    cl::desc("Enable subregister liveness tracking for SystemZ (experimental)"),
+    cl::Hidden);
+
 // Pin the vtable to this file.
 void SystemZSubtarget::anchor() {}
 
@@ -54,6 +59,11 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this), TSInfo(), FrameLowering() {}
 
+
+bool SystemZSubtarget::enableSubRegLiveness() const {
+  return UseSubRegLiveness;
+}
+
 bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
                                        CodeModel::Model CM) const {
   // PC32DBL accesses require the low bit to be clear.  Note that a zero
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 8285b4277d11..cb6b21a1d465 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -102,6 +102,9 @@ public:
   // Always enable the early if-conversion pass.
   bool enableEarlyIfConversion() const override { return true; }
 
+  // Enable tracking of subregister liveness in register allocator.
+  bool enableSubRegLiveness() const override;
+
   // Automatically generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f3620dcf3b92..9596a2b6388d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -128,10 +128,16 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
 //   in range of LARL.  However, the JIT environment has no equivalent
 //   of copy relocs, so locally-binding data symbols might not be in
 //   the range of LARL.  We need the Medium model in that case.
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
-                                              Reloc::Model RM, bool JIT) {
-  if (CM)
+static CodeModel::Model
+getEffectiveSystemZCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
+                             bool JIT) {
+  if (CM) {
+    if (*CM == CodeModel::Tiny)
+      report_fatal_error("Target does not support the tiny CodeModel");
+    if (*CM == CodeModel::Kernel)
+      report_fatal_error("Target does not support the kernel CodeModel");
     return *CM;
+  }
   if (JIT)
     return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
   return CodeModel::Small;
@@ -146,7 +152,8 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(
           T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
           getEffectiveRelocModel(RM),
-          getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL),
+          getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
+          OL),
       TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index c5cdc22f2099..129610fe095b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -328,6 +328,25 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
   return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
 }
 
+// Return the bit size for the scalar type or vector element
+// type. getScalarSizeInBits() returns 0 for a pointer type.
+static unsigned getScalarSizeInBits(Type *Ty) {
+  unsigned Size =
+    (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits());
+  assert(Size > 0 && "Element must have non-zero size.");
+  return Size;
+}
+
+// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector
+// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
+// 3.
+static unsigned getNumVectorRegs(Type *Ty) {
+  assert(Ty->isVectorTy() && "Expected vector type");
+  unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements();
+  assert(WideBits > 0 && "Could not compute size of vector");
+  return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+}
+
 int SystemZTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty,
     TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
@@ -343,44 +362,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(
 
   unsigned ScalarBits = Ty->getScalarSizeInBits();
 
-  // Div with a constant which is a power of 2 will be converted by
-  // DAGCombiner to use shifts. With vector shift-element instructions, a
-  // vector sdiv costs about as much as a scalar one.
-  const unsigned SDivCostEstimate = 4;
-  bool SDivPow2 = false;
-  bool UDivPow2 = false;
-  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
-      Args.size() == 2) {
-    const ConstantInt *CI = nullptr;
+  // There are thre cases of division and remainder: Dividing with a register
+  // needs a divide instruction. A divisor which is a power of two constant
+  // can be implemented with a sequence of shifts. Any other constant needs a
+  // multiply and shifts.
+  const unsigned DivInstrCost = 20;
+  const unsigned DivMulSeqCost = 10;
+  const unsigned SDivPow2Cost = 4;
+
+  bool SignedDivRem =
+      Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+  bool UnsignedDivRem =
+      Opcode == Instruction::UDiv || Opcode == Instruction::URem;
+
+  // Check for a constant divisor.
+  bool DivRemConst = false;
+  bool DivRemConstPow2 = false;
+  if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
     if (const Constant *C = dyn_cast<Constant>(Args[1])) {
-      if (C->getType()->isVectorTy())
-        CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+      const ConstantInt *CVal =
+          (C->getType()->isVectorTy()
+               ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
+               : dyn_cast<const ConstantInt>(C));
+      if (CVal != nullptr &&
+          (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+        DivRemConstPow2 = true;
       else
-        CI = dyn_cast<const ConstantInt>(C);
-    }
-    if (CI != nullptr &&
-        (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
-      if (Opcode == Instruction::SDiv)
-        SDivPow2 = true;
-      else
-        UDivPow2 = true;
+        DivRemConst = true;
     }
   }
 
   if (Ty->isVectorTy()) {
-    assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+    assert(ST->hasVector() &&
+           "getArithmeticInstrCost() called with vector type.");
     unsigned VF = Ty->getVectorNumElements();
-    unsigned NumVectors = getNumberOfParts(Ty);
+    unsigned NumVectors = getNumVectorRegs(Ty);
 
     // These vector operations are custom handled, but are still supported
     // with one instruction per vector, regardless of element size.
     if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
-        Opcode == Instruction::AShr || UDivPow2) {
+        Opcode == Instruction::AShr) {
       return NumVectors;
     }
 
-    if (SDivPow2)
-      return (NumVectors * SDivCostEstimate);
+    if (DivRemConstPow2)
+      return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
+    if (DivRemConst)
+      return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+    if ((SignedDivRem || UnsignedDivRem) && VF > 4)
+      // Temporary hack: disable high vectorization factors with integer
+      // division/remainder, which will get scalarized and handled with
+      // GR128 registers. The mischeduler is not clever enough to avoid
+      // spilling yet.
+      return 1000;
 
     // These FP operations are supported with a single vector instruction for
     // double (base implementation assumes float generally costs 2). For
@@ -395,7 +429,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
           return NumVectors;
         // Return the cost of multiple scalar invocation plus the cost of
         // inserting and extracting the values.
-        unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+        unsigned ScalarCost =
+            getArithmeticInstrCost(Opcode, Ty->getScalarType());
         unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
         // FIXME: VF 2 for these FP operations are currently just as
         // expensive as for VF 4.
@@ -432,30 +467,22 @@ int SystemZTTIImpl::getArithmeticInstrCost(
     if (Opcode == Instruction::FRem)
       return LIBCALL_COST;
 
-    if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
-      return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
-
     // Or requires one instruction, although it has custom handling for i64.
     if (Opcode == Instruction::Or)
       return 1;
 
-    if (Opcode == Instruction::Xor && ScalarBits == 1)
-      // 2 * ipm sequences ; xor ; shift ; compare
-      return 7;
-
-    if (UDivPow2)
-      return 1;
-    if (SDivPow2)
-      return SDivCostEstimate;
-
-    // An extra extension for narrow types is needed.
-    if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
-      // sext of op(s) for narrow types
-      return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+    if (Opcode == Instruction::Xor && ScalarBits == 1) {
+      if (ST->hasLoadStoreOnCond2())
+        return 5; // 2 * (li 0; loc 1); xor
+      return 7; // 2 * ipm sequences ; xor ; shift ; compare
+    }
 
-    if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
-      // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
-      return (ScalarBits < 32 ? 4 : 2);
+    if (DivRemConstPow2)
+      return (SignedDivRem ? SDivPow2Cost : 1);
+    if (DivRemConst)
+      return DivMulSeqCost;
+    if (SignedDivRem || UnsignedDivRem)
+      return DivInstrCost;
   }
 
   // Fallback to the default implementation.
@@ -463,12 +490,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
                                        Opd1PropInfo, Opd2PropInfo, Args);
 }
 
-
 int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                    Type *SubTp) {
   assert (Tp->isVectorTy());
   assert (ST->hasVector() && "getShuffleCost() called.");
-  unsigned NumVectors = getNumberOfParts(Tp);
+  unsigned NumVectors = getNumVectorRegs(Tp);
 
   // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
 
@@ -523,7 +549,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
 
   // TODO: Since fp32 is expanded, the extract cost should always be 0.
 
-  unsigned NumParts = getNumberOfParts(SrcTy);
+  unsigned NumParts = getNumVectorRegs(SrcTy);
   if (NumParts <= 2)
     // Up to 2 vector registers can be truncated efficiently with pack or
     // permute. The latter requires an immediate mask to be loaded, which
@@ -566,7 +592,7 @@ getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
     // The bitmask will be truncated.
     PackCost = getVectorTruncCost(SrcTy, DstTy);
   else if (SrcScalarBits < DstScalarBits) {
-    unsigned DstNumParts = getNumberOfParts(DstTy);
+    unsigned DstNumParts = getNumVectorRegs(DstTy);
     // Each vector select needs its part of the bitmask unpacked.
     PackCost = Log2Diff * DstNumParts;
     // Extra cost for moving part of mask before unpacking.
@@ -602,6 +628,25 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
   return nullptr;
 }
 
+// Get the cost of converting a boolean vector to a vector with same width
+// and element size as Dst, plus the cost of zero extending if needed.
+unsigned SystemZTTIImpl::
+getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+                              const Instruction *I) {
+  assert (Dst->isVectorTy());
+  unsigned VF = Dst->getVectorNumElements();
+  unsigned Cost = 0;
+  // If we know what the widths of the compared operands, get any cost of
+  // converting it to match Dst. Otherwise assume same widths.
+  Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+  if (CmpOpTy != nullptr)
+    Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+  if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
+    // One 'vn' per dst vector with an immediate mask.
+    Cost += getNumVectorRegs(Dst);
+  return Cost;
+}
+
 int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                      const Instruction *I) {
   unsigned DstScalarBits = Dst->getScalarSizeInBits();
@@ -611,8 +656,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
     assert (Dst->isVectorTy());
     unsigned VF = Src->getVectorNumElements();
-    unsigned NumDstVectors = getNumberOfParts(Dst);
-    unsigned NumSrcVectors = getNumberOfParts(Src);
+    unsigned NumDstVectors = getNumVectorRegs(Dst);
+    unsigned NumSrcVectors = getNumVectorRegs(Src);
 
     if (Opcode == Instruction::Trunc) {
       if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
@@ -633,19 +678,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 
         return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
       }
-      else if (SrcScalarBits == 1) {
-        // This should be extension of a compare i1 result.
-        // If we know what the widths of the compared operands, get the
-        // cost of converting it to Dst. Otherwise assume same widths.
-        unsigned Cost = 0;
-        Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
-        if (CmpOpTy != nullptr)
-          Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
-        if (Opcode == Instruction::ZExt)
-          // One 'vn' per dst vector with an immediate mask.
-          Cost += NumDstVectors;
-        return Cost;
-      }
+      else if (SrcScalarBits == 1)
+        return getBoolVecToIntConversionCost(Opcode, Dst, I);
     }
 
     if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
@@ -654,8 +688,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
       // (seems to miss on differentiating on scalar/vector types).
 
       // Only 64 bit vector conversions are natively supported.
-      if (SrcScalarBits == 64 && DstScalarBits == 64)
-        return NumDstVectors;
+      if (DstScalarBits == 64) {
+        if (SrcScalarBits == 64)
+          return NumDstVectors;
+
+        if (SrcScalarBits == 1)
+          return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors;
+      }
 
       // Return the cost of multiple scalar invocation plus the cost of
       // inserting and extracting the values. Base implementation does not
@@ -672,7 +711,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
           (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
         NeedsExtracts = false;
 
-      TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+      TotCost += getScalarizationOverhead(Src, false, NeedsExtracts);
+      TotCost += getScalarizationOverhead(Dst, NeedsInserts, false);
 
       // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
       if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -702,11 +742,18 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
   else { // Scalar
     assert (!Dst->isVectorTy());
 
-    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
-      return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+      if (SrcScalarBits >= 32 ||
+          (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+        return 1;
+      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+    }
 
     if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
         Src->isIntegerTy(1)) {
+      if (ST->hasLoadStoreOnCond2())
+        return 2; // li 0; loc 1
+
       // This should be extension of a compare i1 result, which is done with
       // ipm and a varying sequence of instructions.
       unsigned Cost = 0;
@@ -718,7 +765,6 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
       if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
         // If operands of an fp-type was compared, this costs +1.
         Cost++;
-
       return Cost;
     }
   }
@@ -726,8 +772,20 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
   return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
 }
 
-int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                                       const Instruction *I) {
+// Scalar i8 / i16 operations will typically be made after first extending
+// the operands to i32.
+static unsigned getOperandsExtensionCost(const Instruction *I) {
+  unsigned ExtCost = 0;
+  for (Value *Op : I->operands())
+    // A load of i8 or i16 sign/zero extends to i32.
+    if (!isa<LoadInst>(Op) && !isa<ConstantInt>(Op))
+      ExtCost++;
+
+  return ExtCost;
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                       Type *CondTy, const Instruction *I) {
   if (ValTy->isVectorTy()) {
     assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
     unsigned VF = ValTy->getVectorNumElements();
@@ -759,7 +817,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
       // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
       // floats.  FIXME: <2 x float> generates same code as <4 x float>.
       unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
-      unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+      unsigned NumVecs_cmp = getNumVectorRegs(ValTy);
 
       unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
       return Cost;
@@ -775,20 +833,30 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
         PackCost =
           getVectorBitmaskConversionCost(CmpOpTy, ValTy);
 
-      return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+      return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;
     }
   }
   else { // Scalar
     switch (Opcode) {
     case Instruction::ICmp: {
+      // A loaded value compared with 0 with multiple users becomes Load and
+      // Test. The load is then not foldable, so return 0 cost for the ICmp.
+      unsigned ScalarBits = ValTy->getScalarSizeInBits();
+      if (I != nullptr && ScalarBits >= 32)
+        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+                C->getZExtValue() == 0)
+              return 0;
+
       unsigned Cost = 1;
       if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
-        Cost += 2; // extend both operands
+        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
       return Cost;
     }
     case Instruction::Select:
       if (ValTy->isFloatingPointTy())
-        return 4; // No load on condition for FP, so this costs a conditional jump.
+        return 4; // No load on condition for FP - costs a conditional jump.
       return 1; // Load On Condition.
     }
   }
@@ -804,7 +872,7 @@ getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
     return ((Index % 2 == 0) ? 1 : 0);
 
   if (Opcode == Instruction::ExtractElement) {
-    int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+    int Cost = ((getScalarSizeInBits(Val) == 1) ? 2 /*+test-under-mask*/ : 1);
 
     // Give a slight penalty for moving out of vector pipeline to FXU unit.
     if (Index == 0 && Val->isIntOrIntVectorTy())
@@ -816,58 +884,147 @@ getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
   return BaseT::getVectorInstrCost(Opcode, Val, Index);
 }
 
+// Check if a load may be folded as a memory operand in its user.
+bool SystemZTTIImpl::
+isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
+  if (!Ld->hasOneUse())
+    return false;
+  FoldedValue = Ld;
+  const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
+  unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
+  unsigned TruncBits = 0;
+  unsigned SExtBits = 0;
+  unsigned ZExtBits = 0;
+  if (UserI->hasOneUse()) {
+    unsigned UserBits = UserI->getType()->getScalarSizeInBits();
+    if (isa<TruncInst>(UserI))
+      TruncBits = UserBits;
+    else if (isa<SExtInst>(UserI))
+      SExtBits = UserBits;
+    else if (isa<ZExtInst>(UserI))
+      ZExtBits = UserBits;
+  }
+  if (TruncBits || SExtBits || ZExtBits) {
+    FoldedValue = UserI;
+    UserI = cast<Instruction>(*UserI->user_begin());
+    // Load (single use) -> trunc/extend (single use) -> UserI
+  }
+  if ((UserI->getOpcode() == Instruction::Sub ||
+       UserI->getOpcode() == Instruction::SDiv ||
+       UserI->getOpcode() == Instruction::UDiv) &&
+      UserI->getOperand(1) != FoldedValue)
+    return false; // Not commutative, only RHS foldable.
+  // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an
+  // extension was made of the load.
+  unsigned LoadOrTruncBits =
+      ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
+  switch (UserI->getOpcode()) {
+  case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
+  case Instruction::Sub:
+  case Instruction::ICmp:
+    if (LoadedBits == 32 && ZExtBits == 64)
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
+    if (UserI->getOpcode() != Instruction::ICmp) {
+      if (LoadedBits == 16 &&
+          (SExtBits == 32 ||
+           (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
+        return true;
+      if (LoadOrTruncBits == 16)
+        return true;
+    }
+    LLVM_FALLTHROUGH;
+  case Instruction::SDiv:// SE: 32->64
+    if (LoadedBits == 32 && SExtBits == 64)
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::UDiv:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // This also makes sense for float operations, but disabled for now due
+    // to regressions.
+    // case Instruction::FCmp:
+    // case Instruction::FAdd:
+    // case Instruction::FSub:
+    // case Instruction::FMul:
+    // case Instruction::FDiv:
+
+    // All possible extensions of memory checked above.
+
+    // Comparison between memory and immediate.
+    if (UserI->getOpcode() == Instruction::ICmp)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
+        if (isUInt<16>(CI->getZExtValue()))
+          return true;
+    return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
+    break;
+  }
+  return false;
+}
+
+static bool isBswapIntrinsicCall(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    if (auto *CI = dyn_cast<CallInst>(I))
+      if (auto *F = CI->getCalledFunction())
+        if (F->getIntrinsicID() == Intrinsic::bswap)
+          return true;
+  return false;
+}
+
 int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                     unsigned Alignment, unsigned AddressSpace,
                                     const Instruction *I) {
   assert(!Src->isVoidTy() && "Invalid type");
 
-  if (!Src->isVectorTy() && Opcode == Instruction::Load &&
-      I != nullptr && I->hasOneUse()) {
-      const Instruction *UserI = cast<Instruction>(*I->user_begin());
-      unsigned Bits = Src->getScalarSizeInBits();
-      bool FoldsLoad = false;
-      switch (UserI->getOpcode()) {
-      case Instruction::ICmp:
-      case Instruction::Add:
-      case Instruction::Sub:
-      case Instruction::Mul:
-      case Instruction::SDiv:
-      case Instruction::UDiv:
-      case Instruction::And:
-      case Instruction::Or:
-      case Instruction::Xor:
-      // This also makes sense for float operations, but disabled for now due
-      // to regressions.
-      // case Instruction::FCmp:
-      // case Instruction::FAdd:
-      // case Instruction::FSub:
-      // case Instruction::FMul:
-      // case Instruction::FDiv:
-        FoldsLoad = (Bits == 32 || Bits == 64);
-        break;
+  if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
+    // Store the load or its truncated or extended value in FoldedValue.
+    const Instruction *FoldedValue = nullptr;
+    if (isFoldableLoad(cast<LoadInst>(I), FoldedValue)) {
+      const Instruction *UserI = cast<Instruction>(*FoldedValue->user_begin());
+      assert (UserI->getNumOperands() == 2 && "Expected a binop.");
+
+      // UserI can't fold two loads, so in that case return 0 cost only
+      // half of the time.
+      for (unsigned i = 0; i < 2; ++i) {
+        if (UserI->getOperand(i) == FoldedValue)
+          continue;
+
+        if (Instruction *OtherOp = dyn_cast<Instruction>(UserI->getOperand(i))){
+          LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
+          if (!OtherLoad &&
+              (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
+               isa<ZExtInst>(OtherOp)))
+            OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
+          if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/))
+            return i == 0; // Both operands foldable.
+        }
       }
 
-      if (FoldsLoad) {
-        assert (UserI->getNumOperands() == 2 &&
-                "Expected to only handle binops.");
-
-        // UserI can't fold two loads, so in that case return 0 cost only
-        // half of the time.
-        for (unsigned i = 0; i < 2; ++i) {
-          if (UserI->getOperand(i) == I)
-            continue;
-          if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
-            if (LI->hasOneUse())
-              return i == 0;
-          }
-        }
+      return 0; // Only I is foldable in user.
+    }
+  }
 
+  unsigned NumOps =
+    (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
+
+  // Store/Load reversed saves one instruction.
+  if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
+    if (Opcode == Instruction::Load && I->hasOneUse()) {
+      const Instruction *LdUser = cast<Instruction>(*I->user_begin());
+      // In case of load -> bswap -> store, return normal cost for the load.
+      if (isBswapIntrinsicCall(LdUser) &&
+          (!LdUser->hasOneUse() || !isa<StoreInst>(*LdUser->user_begin())))
         return 0;
-      }
+    }
+    else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      const Value *StoredVal = SI->getValueOperand();
+      if (StoredVal->hasOneUse() && isBswapIntrinsicCall(StoredVal))
+        return 0;
+    }
   }
 
-  unsigned NumOps = getNumberOfParts(Src);
-
   if (Src->getScalarSizeInBits() == 128)
     // 128 bit scalars are held in a pair of two 64 bit registers.
     NumOps *= 2;
@@ -875,34 +1032,94 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   return  NumOps;
 }
 
+// The generic implementation of getInterleavedMemoryOpCost() is based on
+// adding costs of the memory operations plus all the extracts and inserts
+// needed for using / defining the vector operands. The SystemZ version does
+// roughly the same but bases the computations on vector permutations
+// instead.
 int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                                unsigned Factor,
                                                ArrayRef<unsigned> Indices,
                                                unsigned Alignment,
-                                               unsigned AddressSpace) {
+                                               unsigned AddressSpace,
+                                               bool UseMaskForCond,
+                                               bool UseMaskForGaps) {
+  if (UseMaskForCond || UseMaskForGaps)
+    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace,
+                                             UseMaskForCond, UseMaskForGaps);
   assert(isa<VectorType>(VecTy) &&
          "Expect a vector type for interleaved memory op");
 
-  unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
-     (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
-  assert (WideBits > 0 && "Could not compute size of vector");
-  int NumWideParts =
-    ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+  // Return the ceiling of dividing A by B.
+  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
+
+  unsigned NumElts = VecTy->getVectorNumElements();
+  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
+  unsigned VF = NumElts / Factor;
+  unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy));
+  unsigned NumVectorMemOps = getNumVectorRegs(VecTy);
+  unsigned NumPermutes = 0;
+
+  if (Opcode == Instruction::Load) {
+    // Loading interleave groups may have gaps, which may mean fewer
+    // loads. Find out how many vectors will be loaded in total, and in how
+    // many of them each value will be in.
+    BitVector UsedInsts(NumVectorMemOps, false);
+    std::vector<BitVector> ValueVecs(Factor, BitVector(NumVectorMemOps, false));
+    for (unsigned Index : Indices)
+      for (unsigned Elt = 0; Elt < VF; ++Elt) {
+        unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg;
+        UsedInsts.set(Vec);
+        ValueVecs[Index].set(Vec);
+      }
+    NumVectorMemOps = UsedInsts.count();
+
+    for (unsigned Index : Indices) {
+      // Estimate that each loaded source vector containing this Index
+      // requires one operation, except that vperm can handle two input
+      // registers first time for each dst vector.
+      unsigned NumSrcVecs = ValueVecs[Index].count();
+      unsigned NumDstVecs = ceil(VF * getScalarSizeInBits(VecTy), 128U);
+      assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources");
+      NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);
+    }
+  } else {
+    // Estimate the permutes for each stored vector as the smaller of the
+    // number of elements and the number of source vectors. Subtract one per
+    // dst vector for vperm (S.A.).
+    unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
+    unsigned NumDstVecs = NumVectorMemOps;
+    assert (NumSrcVecs > 1 && "Expected at least two source vectors.");
+    NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
+  }
 
-  // How many source vectors are handled to produce a vectorized operand?
-  int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
-  int NumSrcParts =
-    ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+  // Cost of load/store operations and the permutations needed.
+  return NumVectorMemOps + NumPermutes;
+}
 
-  // A Load group may have gaps.
-  unsigned NumOperands =
-    ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
+  if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
+    return getNumVectorRegs(RetTy); // VPERM
+  return -1;
+}
 
-  // Each needed permute takes two vectors as input.
-  if (NumSrcParts > 1)
-    NumSrcParts--;
-  int NumPermutes = NumSrcParts * NumOperands;
+int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                          ArrayRef<Value *> Args,
+                                          FastMathFlags FMF, unsigned VF) {
+  int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+  if (Cost != -1)
+    return Cost;
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
 
-  // Cost of load/store operations and the permutations needed.
-  return NumWideParts + NumPermutes;
+int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                          ArrayRef<Type *> Tys,
+                                          FastMathFlags FMF,
+                                          unsigned ScalarizationCostPassed) {
+  int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+  if (Cost != -1)
+    return Cost;
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
+                                      FMF, ScalarizationCostPassed);
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 4b11a6f0a837..e79bee1ea3a8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -37,6 +37,8 @@ public:
   /// \name Scalar TTI Implementations
   /// @{
 
+  unsigned getInliningThresholdMultiplier() { return 3; }
+
   int getIntImmCost(const APInt &Imm, Type *Ty);
 
   int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
@@ -78,11 +80,14 @@ public:
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
   unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+  unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+                                         const Instruction *I);
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                        const Instruction *I = nullptr);
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                          const Instruction *I = nullptr);
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace, const Instruction *I = nullptr);
 
@@ -90,7 +95,16 @@ public:
                                  unsigned Factor,
                                  ArrayRef<unsigned> Indices,
                                  unsigned Alignment,
-                                 unsigned AddressSpace);
+                                 unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
+
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            ArrayRef<Value *> Args, FastMathFlags FMF,
+                            unsigned VF = 1);
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            ArrayRef<Type *> Tys, FastMathFlags FMF,
+                            unsigned ScalarizationCostPassed = UINT_MAX);
   /// @}
 };
 
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 6bcf60fafc3e..bb937923b47e 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -45,6 +45,9 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
   Mang = new Mangler();
   InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), *Ctx,
                        TM.getCodeModel() == CodeModel::Large);
+
+  // Reset various EH DWARF encodings.
+  PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 092f5ea4104b..39d5705b2a53 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -40,12 +40,7 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
       RequireStructuredCFG(false), DefaultOptions(Options), Options(Options) {
 }
 
-TargetMachine::~TargetMachine() {
-  delete AsmInfo;
-  delete MRI;
-  delete MII;
-  delete STI;
-}
+TargetMachine::~TargetMachine() = default;
 
 bool TargetMachine::isPositionIndependent() const {
   return getRelocationModel() == Reloc::PIC_;
@@ -141,6 +136,15 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
   if (GV && GV->hasDLLImportStorageClass())
     return false;
 
+  // On MinGW, variables that haven't been declared with DLLImport may still
+  // end up automatically imported by the linker. To make this feasible,
+  // don't assume the variables to be DSO local unless we actually know
+  // that for sure. This only has to be done for variables; for functions
+  // the linker can insert thunks for calling functions from another DLL.
+  if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() &&
+      isa<GlobalVariable>(GV))
+    return false;
+
   // Every other GV is local on COFF.
   // Make an exception for windows OS in the triple: Some firmware builds use
   // *-win32-macho triples. This (accidentally?) produced windows relocations
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 37d398d580f8..bae45ae28c45 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -115,6 +115,15 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
     case LLVMRelocDynamicNoPic:
       RM = Reloc::DynamicNoPIC;
       break;
+    case LLVMRelocROPI:
+      RM = Reloc::ROPI;
+      break;
+    case LLVMRelocRWPI:
+      RM = Reloc::RWPI;
+      break;
+    case LLVMRelocROPI_RWPI:
+      RM = Reloc::ROPI_RWPI;
+      break;
     default:
       break;
   }
diff --git a/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 2d92b93ca704..0a5908f43790 100644
--- a/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -18,13 +18,14 @@
 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
 #include "WebAssembly.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -34,27 +35,10 @@ using namespace llvm;
 
 namespace {
 
-// We store register types as SimpleValueType to retain SIMD layout
-// information, but must also be able to supply them as the (unnamed)
-// register enum from WebAssemblyRegisterInfo.td/.inc.
-static unsigned MVTToWasmReg(MVT::SimpleValueType Type) {
-  switch(Type) {
-    case MVT::i32: return WebAssembly::I32_0;
-    case MVT::i64: return WebAssembly::I64_0;
-    case MVT::f32: return WebAssembly::F32_0;
-    case MVT::f64: return WebAssembly::F64_0;
-    case MVT::v16i8: return WebAssembly::V128_0;
-    case MVT::v8i16: return WebAssembly::V128_0;
-    case MVT::v4i32: return WebAssembly::V128_0;
-    case MVT::v4f32: return WebAssembly::V128_0;
-    default: return MVT::INVALID_SIMPLE_VALUE_TYPE;
-  }
-}
-
 /// WebAssemblyOperand - Instances of this class represent the operands in a
 /// parsed WASM machine instruction.
 struct WebAssemblyOperand : public MCParsedAsmOperand {
-  enum KindTy { Token, Local, Stack, Integer, Float, Symbol } Kind;
+  enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
 
   SMLoc StartLoc, EndLoc;
 
@@ -62,19 +46,6 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
     StringRef Tok;
   };
 
-  struct RegOp {
-    // This is a (virtual) local or stack register represented as 0..
-    unsigned RegNo;
-    // In most targets, the register number also encodes the type, but for
-    // wasm we have to track that seperately since we have an unbounded
-    // number of registers.
-    // This has the unfortunate side effect that we supply a different value
-    // to the table-gen matcher at different times in the process (when it
-    // calls getReg() or addRegOperands().
-    // TODO: While this works, it feels brittle. and would be nice to clean up.
-    MVT::SimpleValueType Type;
-  };
-
   struct IntOp {
     int64_t Val;
   };
@@ -87,37 +58,45 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
     const MCExpr *Exp;
   };
 
+  struct BrLOp {
+    std::vector<unsigned> List;
+  };
+
   union {
     struct TokOp Tok;
-    struct RegOp Reg;
     struct IntOp Int;
     struct FltOp Flt;
     struct SymOp Sym;
+    struct BrLOp BrL;
   };
 
   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
-    : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
-  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, RegOp R)
-    : Kind(K), StartLoc(Start), EndLoc(End), Reg(R) {}
+      : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
-    : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
+      : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
-    : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
+      : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
-    : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+      : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
+  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
+      : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
+
+  ~WebAssemblyOperand() {
+    if (isBrList())
+      BrL.~BrLOp();
+  }
 
   bool isToken() const override { return Kind == Token; }
-  bool isImm() const override { return Kind == Integer ||
-                                       Kind == Float ||
-                                       Kind == Symbol; }
-  bool isReg() const override { return Kind == Local || Kind == Stack; }
+  bool isImm() const override {
+    return Kind == Integer || Kind == Float || Kind == Symbol;
+  }
   bool isMem() const override { return false; }
+  bool isReg() const override { return false; }
+  bool isBrList() const { return Kind == BrList; }
 
   unsigned getReg() const override {
-    assert(isReg());
-    // This is called from the tablegen matcher (MatchInstructionImpl)
-    // where it expects to match the type of register, see RegOp above.
-    return MVTToWasmReg(Reg.Type);
+    llvm_unreachable("Assembly inspects a register operand");
+    return 0;
   }
 
   StringRef getToken() const {
@@ -128,19 +107,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
   SMLoc getStartLoc() const override { return StartLoc; }
   SMLoc getEndLoc() const override { return EndLoc; }
 
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    assert(isReg() && "Not a register operand!");
-    // This is called from the tablegen matcher (MatchInstructionImpl)
-    // where it expects to output the actual register index, see RegOp above.
-    unsigned R = Reg.RegNo;
-    if (Kind == Stack) {
-      // A stack register is represented as a large negative number.
-      // See WebAssemblyRegNumbering::runOnMachineFunction and
-      // getWARegStackId for why this | is needed.
-      R |= INT32_MIN;
-    }
-    Inst.addOperand(MCOperand::createReg(R));
+  void addRegOperands(MCInst &, unsigned) const {
+    // Required by the assembly matcher.
+    llvm_unreachable("Assembly matcher creates register operands");
   }
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
@@ -155,17 +124,17 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
       llvm_unreachable("Should be immediate or symbol!");
   }
 
+  void addBrListOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && isBrList() && "Invalid BrList!");
+    for (auto Br : BrL.List)
+      Inst.addOperand(MCOperand::createImm(Br));
+  }
+
   void print(raw_ostream &OS) const override {
     switch (Kind) {
     case Token:
       OS << "Tok:" << Tok.Tok;
       break;
-    case Local:
-      OS << "Loc:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
-      break;
-    case Stack:
-      OS << "Stk:" << Reg.RegNo << ":" << static_cast<int>(Reg.Type);
-      break;
     case Integer:
       OS << "Int:" << Int.Val;
       break;
@@ -175,6 +144,9 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
     case Symbol:
       OS << "Sym:" << Sym.Exp;
       break;
+    case BrList:
+      OS << "BrList:" << BrL.List.size();
+      break;
     }
   }
 };
@@ -182,352 +154,526 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
 class WebAssemblyAsmParser final : public MCTargetAsmParser {
   MCAsmParser &Parser;
   MCAsmLexer &Lexer;
-  // These are for the current function being parsed:
-  // These are vectors since register assignments are so far non-sparse.
-  // Replace by map if necessary.
-  std::vector<MVT::SimpleValueType> LocalTypes;
-  std::vector<MVT::SimpleValueType> StackTypes;
-  MCSymbol *LastLabel;
+
+  // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
+  std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
+
+  // Order of labels, directives and instructions in a .s file have no
+  // syntactical enforcement. This class is a callback from the actual parser,
+  // and yet we have to be feeding data to the streamer in a very particular
+  // order to ensure a correct binary encoding that matches the regular backend
+  // (the streamer does not enforce this). This "state machine" enum helps
+  // guarantee that correct order.
+  enum ParserState {
+    FileStart,
+    Label,
+    FunctionStart,
+    FunctionLocals,
+    Instructions,
+  } CurrentState = FileStart;
+
+  // For ensuring blocks are properly nested.
+  enum NestingType {
+    Function,
+    Block,
+    Loop,
+    Try,
+    If,
+    Else,
+    Undefined,
+  };
+  std::vector<NestingType> NestingStack;
+
+  // We track this to see if a .functype following a label is the same,
+  // as this is how we recognize the start of a function.
+  MCSymbol *LastLabel = nullptr;
 
 public:
-  WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
-                       const MCInstrInfo &mii, const MCTargetOptions &Options)
-      : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
-        Lexer(Parser.getLexer()), LastLabel(nullptr) {
+  WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+                       const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
+        Lexer(Parser.getLexer()) {
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   }
 
 #define GET_ASSEMBLER_HEADER
 #include "WebAssemblyGenAsmMatcher.inc"
 
   // TODO: This is required to be implemented, but appears unused.
-  bool ParseRegister(unsigned &/*RegNo*/, SMLoc &/*StartLoc*/,
-                     SMLoc &/*EndLoc*/) override {
+  bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
+                     SMLoc & /*EndLoc*/) override {
     llvm_unreachable("ParseRegister is not implemented.");
   }
 
-  bool Error(const StringRef &msg, const AsmToken &tok) {
-    return Parser.Error(tok.getLoc(), msg + tok.getString());
+  bool error(const Twine &Msg, const AsmToken &Tok) {
+    return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
+  }
+
+  bool error(const Twine &Msg) {
+    return Parser.Error(Lexer.getTok().getLoc(), Msg);
+  }
+
+  void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+    Signatures.push_back(std::move(Sig));
+  }
+
+  std::pair<StringRef, StringRef> nestingString(NestingType NT) {
+    switch (NT) {
+    case Function:
+      return {"function", "end_function"};
+    case Block:
+      return {"block", "end_block"};
+    case Loop:
+      return {"loop", "end_loop"};
+    case Try:
+      return {"try", "end_try"};
+    case If:
+      return {"if", "end_if"};
+    case Else:
+      return {"else", "end_if"};
+    default:
+      llvm_unreachable("unknown NestingType");
+    }
+  }
+
+  void push(NestingType NT) { NestingStack.push_back(NT); }
+
+  bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
+    if (NestingStack.empty())
+      return error(Twine("End of block construct with no start: ") + Ins);
+    auto Top = NestingStack.back();
+    if (Top != NT1 && Top != NT2)
+      return error(Twine("Block construct type mismatch, expected: ") +
+                   nestingString(Top).second + ", instead got: " + Ins);
+    NestingStack.pop_back();
+    return false;
+  }
+
+  bool ensureEmptyNestingStack() {
+    auto err = !NestingStack.empty();
+    while (!NestingStack.empty()) {
+      error(Twine("Unmatched block construct(s) at function end: ") +
+            nestingString(NestingStack.back()).first);
+      NestingStack.pop_back();
+    }
+    return err;
   }
 
-  bool IsNext(AsmToken::TokenKind Kind) {
-    auto ok = Lexer.is(Kind);
-    if (ok) Parser.Lex();
-    return ok;
+  bool isNext(AsmToken::TokenKind Kind) {
+    auto Ok = Lexer.is(Kind);
+    if (Ok)
+      Parser.Lex();
+    return Ok;
   }
 
-  bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
-    if (!IsNext(Kind))
-      return Error(std::string("Expected ") + KindName + ", instead got: ",
+  bool expect(AsmToken::TokenKind Kind, const char *KindName) {
+    if (!isNext(Kind))
+      return error(std::string("Expected ") + KindName + ", instead got: ",
                    Lexer.getTok());
     return false;
   }
 
-  MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
-    // Derive type from .param .local decls, or the instruction itself.
-    return StringSwitch<MVT::SimpleValueType>(RegType)
-        .Case("i32", MVT::i32)
-        .Case("i64", MVT::i64)
-        .Case("f32", MVT::f32)
-        .Case("f64", MVT::f64)
-        .Case("i8x16", MVT::v16i8)
-        .Case("i16x8", MVT::v8i16)
-        .Case("i32x4", MVT::v4i32)
-        .Case("f32x4", MVT::v4f32)
-        .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
+  StringRef expectIdent() {
+    if (!Lexer.is(AsmToken::Identifier)) {
+      error("Expected identifier, got: ", Lexer.getTok());
+      return StringRef();
+    }
+    auto Name = Lexer.getTok().getString();
+    Parser.Lex();
+    return Name;
   }
 
-  MVT::SimpleValueType &GetType(
-      std::vector<MVT::SimpleValueType> &Types, size_t i) {
-    Types.resize(std::max(i + 1, Types.size()), MVT::INVALID_SIMPLE_VALUE_TYPE);
-    return Types[i];
+  Optional<wasm::ValType> parseType(const StringRef &Type) {
+    // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
+    // "invalid" value.
+    if (Type == "i32")
+      return wasm::ValType::I32;
+    if (Type == "i64")
+      return wasm::ValType::I64;
+    if (Type == "f32")
+      return wasm::ValType::F32;
+    if (Type == "f64")
+      return wasm::ValType::F64;
+    if (Type == "v128" || Type == "i8x16" || Type == "i16x8" ||
+        Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
+        Type == "f64x2")
+      return wasm::ValType::V128;
+    return Optional<wasm::ValType>();
   }
 
-  bool ParseReg(OperandVector &Operands, StringRef TypePrefix) {
-    if (Lexer.is(AsmToken::Integer)) {
-      auto &Local = Lexer.getTok();
-      // This is a reference to a local, turn it into a virtual register.
-      auto LocalNo = static_cast<unsigned>(Local.getIntVal());
-      Operands.push_back(make_unique<WebAssemblyOperand>(
-                           WebAssemblyOperand::Local, Local.getLoc(),
-                           Local.getEndLoc(),
-                           WebAssemblyOperand::RegOp{LocalNo,
-                               GetType(LocalTypes, LocalNo)}));
-      Parser.Lex();
-    } else if (Lexer.is(AsmToken::Identifier)) {
-      auto &StackRegTok = Lexer.getTok();
-      // These are push/pop/drop pseudo stack registers, which we turn
-      // into virtual registers also. The stackify pass will later turn them
-      // back into implicit stack references if possible.
-      auto StackReg = StackRegTok.getString();
-      auto StackOp = StackReg.take_while([](char c) { return isalpha(c); });
-      auto Reg = StackReg.drop_front(StackOp.size());
-      unsigned long long ParsedRegNo = 0;
-      if (!Reg.empty() && getAsUnsignedInteger(Reg, 10, ParsedRegNo))
-        return Error("Cannot parse stack register index: ", StackRegTok);
-      unsigned RegNo = static_cast<unsigned>(ParsedRegNo);
-      if (StackOp == "push") {
-        // This defines a result, record register type.
-        auto RegType = ParseRegType(TypePrefix);
-        GetType(StackTypes, RegNo) = RegType;
-        Operands.push_back(make_unique<WebAssemblyOperand>(
-                             WebAssemblyOperand::Stack,
-                             StackRegTok.getLoc(),
-                             StackRegTok.getEndLoc(),
-                             WebAssemblyOperand::RegOp{RegNo, RegType}));
-      } else if (StackOp == "pop") {
-        // This uses a previously defined stack value.
-        auto RegType = GetType(StackTypes, RegNo);
-        Operands.push_back(make_unique<WebAssemblyOperand>(
-                             WebAssemblyOperand::Stack,
-                             StackRegTok.getLoc(),
-                             StackRegTok.getEndLoc(),
-                             WebAssemblyOperand::RegOp{RegNo, RegType}));
-      } else if (StackOp == "drop") {
-        // This operand will be dropped, since it is part of an instruction
-        // whose result is void.
-      } else {
-        return Error("Unknown stack register prefix: ", StackRegTok);
-      }
+  WebAssembly::ExprType parseBlockType(StringRef ID) {
+    return StringSwitch<WebAssembly::ExprType>(ID)
+        .Case("i32", WebAssembly::ExprType::I32)
+        .Case("i64", WebAssembly::ExprType::I64)
+        .Case("f32", WebAssembly::ExprType::F32)
+        .Case("f64", WebAssembly::ExprType::F64)
+        .Case("v128", WebAssembly::ExprType::V128)
+        .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+        .Case("void", WebAssembly::ExprType::Void)
+        .Default(WebAssembly::ExprType::Invalid);
+  }
+
+  bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
+    while (Lexer.is(AsmToken::Identifier)) {
+      auto Type = parseType(Lexer.getTok().getString());
+      if (!Type)
+        return true;
+      Types.push_back(Type.getValue());
       Parser.Lex();
-    } else {
-      return Error(
-            "Expected identifier/integer following $, instead got: ",
-            Lexer.getTok());
+      if (!isNext(AsmToken::Comma))
+        break;
     }
-    IsNext(AsmToken::Equal);
     return false;
   }
 
-  void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
+  void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
     auto &Int = Lexer.getTok();
     int64_t Val = Int.getIntVal();
-    if (IsNegative) Val = -Val;
+    if (IsNegative)
+      Val = -Val;
     Operands.push_back(make_unique<WebAssemblyOperand>(
-                         WebAssemblyOperand::Integer, Int.getLoc(),
-                         Int.getEndLoc(), WebAssemblyOperand::IntOp{Val}));
+        WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
+        WebAssemblyOperand::IntOp{Val}));
     Parser.Lex();
   }
 
-  bool ParseOperandStartingWithInteger(bool IsNegative,
-                                       OperandVector &Operands,
-                                       StringRef InstType) {
-    ParseSingleInteger(IsNegative, Operands);
-    if (Lexer.is(AsmToken::LParen)) {
-      // Parse load/store operands of the form: offset($reg)align
-      auto &LParen = Lexer.getTok();
-      Operands.push_back(
-            make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
-                                            LParen.getLoc(),
-                                            LParen.getEndLoc(),
-                                            WebAssemblyOperand::TokOp{
-                                              LParen.getString()}));
-      Parser.Lex();
-      if (Expect(AsmToken::Dollar, "register")) return true;
-      if (ParseReg(Operands, InstType)) return true;
-      auto &RParen = Lexer.getTok();
-      Operands.push_back(
-            make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token,
-                                            RParen.getLoc(),
-                                            RParen.getEndLoc(),
-                                            WebAssemblyOperand::TokOp{
-                                              RParen.getString()}));
-      if (Expect(AsmToken::RParen, ")")) return true;
-      if (Lexer.is(AsmToken::Integer)) {
-        ParseSingleInteger(false, Operands);
+  bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
+                                       StringRef InstName) {
+    parseSingleInteger(IsNegative, Operands);
+    // FIXME: there is probably a cleaner way to do this.
+    auto IsLoadStore = InstName.startswith("load") ||
+                       InstName.startswith("store") ||
+                       InstName.startswith("atomic_load") ||
+                       InstName.startswith("atomic_store");
+    if (IsLoadStore) {
+      // Parse load/store operands of the form: offset align
+      auto &Offset = Lexer.getTok();
+      if (Offset.is(AsmToken::Integer)) {
+        parseSingleInteger(false, Operands);
       } else {
         // Alignment not specified.
         // FIXME: correctly derive a default from the instruction.
+        // We can't just call WebAssembly::GetDefaultP2Align since we don't have
+        // an opcode until after the assembly matcher.
         Operands.push_back(make_unique<WebAssemblyOperand>(
-                             WebAssemblyOperand::Integer, RParen.getLoc(),
-                             RParen.getEndLoc(), WebAssemblyOperand::IntOp{0}));
+            WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
+            WebAssemblyOperand::IntOp{0}));
       }
     }
     return false;
   }
 
-  bool ParseInstruction(ParseInstructionInfo &/*Info*/, StringRef Name,
+  void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
+                           WebAssembly::ExprType BT) {
+    Operands.push_back(make_unique<WebAssemblyOperand>(
+        WebAssemblyOperand::Integer, NameLoc, NameLoc,
+        WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
+  }
+
+  bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
                         SMLoc NameLoc, OperandVector &Operands) override {
-    Operands.push_back(
-          make_unique<WebAssemblyOperand>(WebAssemblyOperand::Token, NameLoc,
-                                          SMLoc::getFromPointer(
-                                            NameLoc.getPointer() + Name.size()),
-                                          WebAssemblyOperand::TokOp{
-                                            StringRef(NameLoc.getPointer(),
-                                                    Name.size())}));
+    // Note: Name does NOT point into the sourcecode, but to a local, so
+    // use NameLoc instead.
+    Name = StringRef(NameLoc.getPointer(), Name.size());
+
+    // WebAssembly has instructions with / in them, which AsmLexer parses
+    // as seperate tokens, so if we find such tokens immediately adjacent (no
+    // whitespace), expand the name to include them:
+    for (;;) {
+      auto &Sep = Lexer.getTok();
+      if (Sep.getLoc().getPointer() != Name.end() ||
+          Sep.getKind() != AsmToken::Slash)
+        break;
+      // Extend name with /
+      Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
+      Parser.Lex();
+      // We must now find another identifier, or error.
+      auto &Id = Lexer.getTok();
+      if (Id.getKind() != AsmToken::Identifier ||
+          Id.getLoc().getPointer() != Name.end())
+        return error("Incomplete instruction name: ", Id);
+      Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
+      Parser.Lex();
+    }
+
+    // Now construct the name as first operand.
+    Operands.push_back(make_unique<WebAssemblyOperand>(
+        WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
+        WebAssemblyOperand::TokOp{Name}));
     auto NamePair = Name.split('.');
     // If no '.', there is no type prefix.
-    if (NamePair.second.empty()) std::swap(NamePair.first, NamePair.second);
+    auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
+
+    // If this instruction is part of a control flow structure, ensure
+    // proper nesting.
+    bool ExpectBlockType = false;
+    if (BaseName == "block") {
+      push(Block);
+      ExpectBlockType = true;
+    } else if (BaseName == "loop") {
+      push(Loop);
+      ExpectBlockType = true;
+    } else if (BaseName == "try") {
+      push(Try);
+      ExpectBlockType = true;
+    } else if (BaseName == "if") {
+      push(If);
+      ExpectBlockType = true;
+    } else if (BaseName == "else") {
+      if (pop(BaseName, If))
+        return true;
+      push(Else);
+    } else if (BaseName == "catch") {
+      if (pop(BaseName, Try))
+        return true;
+      push(Try);
+    } else if (BaseName == "catch_all") {
+      if (pop(BaseName, Try))
+        return true;
+      push(Try);
+    } else if (BaseName == "end_if") {
+      if (pop(BaseName, If, Else))
+        return true;
+    } else if (BaseName == "end_try") {
+      if (pop(BaseName, Try))
+        return true;
+    } else if (BaseName == "end_loop") {
+      if (pop(BaseName, Loop))
+        return true;
+    } else if (BaseName == "end_block") {
+      if (pop(BaseName, Block))
+        return true;
+    } else if (BaseName == "end_function") {
+      if (pop(BaseName, Function) || ensureEmptyNestingStack())
+        return true;
+    }
+
     while (Lexer.isNot(AsmToken::EndOfStatement)) {
       auto &Tok = Lexer.getTok();
       switch (Tok.getKind()) {
-      case AsmToken::Dollar: {
-        Parser.Lex();
-        if (ParseReg(Operands, NamePair.first)) return true;
-        break;
-      }
       case AsmToken::Identifier: {
         auto &Id = Lexer.getTok();
-        const MCExpr *Val;
-        SMLoc End;
-        if (Parser.parsePrimaryExpr(Val, End))
-          return Error("Cannot parse symbol: ", Lexer.getTok());
-        Operands.push_back(make_unique<WebAssemblyOperand>(
-                             WebAssemblyOperand::Symbol, Id.getLoc(),
-                             Id.getEndLoc(), WebAssemblyOperand::SymOp{Val}));
+        if (ExpectBlockType) {
+          // Assume this identifier is a block_type.
+          auto BT = parseBlockType(Id.getString());
+          if (BT == WebAssembly::ExprType::Invalid)
+            return error("Unknown block type: ", Id);
+          addBlockTypeOperand(Operands, NameLoc, BT);
+          Parser.Lex();
+        } else {
+          // Assume this identifier is a label.
+          const MCExpr *Val;
+          SMLoc End;
+          if (Parser.parsePrimaryExpr(Val, End))
+            return error("Cannot parse symbol: ", Lexer.getTok());
+          Operands.push_back(make_unique<WebAssemblyOperand>(
+              WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
+              WebAssemblyOperand::SymOp{Val}));
+        }
         break;
       }
       case AsmToken::Minus:
         Parser.Lex();
         if (Lexer.isNot(AsmToken::Integer))
-          return Error("Expected integer instead got: ", Lexer.getTok());
-        if (ParseOperandStartingWithInteger(true, Operands, NamePair.first))
+          return error("Expected integer instead got: ", Lexer.getTok());
+        if (parseOperandStartingWithInteger(true, Operands, BaseName))
           return true;
         break;
       case AsmToken::Integer:
-        if (ParseOperandStartingWithInteger(false, Operands, NamePair.first))
+        if (parseOperandStartingWithInteger(false, Operands, BaseName))
           return true;
         break;
       case AsmToken::Real: {
         double Val;
         if (Tok.getString().getAsDouble(Val, false))
-          return Error("Cannot parse real: ", Tok);
+          return error("Cannot parse real: ", Tok);
         Operands.push_back(make_unique<WebAssemblyOperand>(
-                             WebAssemblyOperand::Float, Tok.getLoc(),
-                             Tok.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
+            WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
+            WebAssemblyOperand::FltOp{Val}));
+        Parser.Lex();
+        break;
+      }
+      case AsmToken::LCurly: {
         Parser.Lex();
+        auto Op = make_unique<WebAssemblyOperand>(
+            WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
+        if (!Lexer.is(AsmToken::RCurly))
+          for (;;) {
+            Op->BrL.List.push_back(Lexer.getTok().getIntVal());
+            expect(AsmToken::Integer, "integer");
+            if (!isNext(AsmToken::Comma))
+              break;
+          }
+        expect(AsmToken::RCurly, "}");
+        Operands.push_back(std::move(Op));
         break;
       }
       default:
-        return Error("Unexpected token in operand: ", Tok);
+        return error("Unexpected token in operand: ", Tok);
       }
       if (Lexer.isNot(AsmToken::EndOfStatement)) {
-        if (Expect(AsmToken::Comma, ",")) return true;
-      }
-    }
-    Parser.Lex();
-    // Call instructions are vararg, but the tablegen matcher doesn't seem to
-    // support that, so for now we strip these extra operands.
-    // This is problematic if these arguments are not simple $pop stack
-    // registers, since e.g. a local register would get lost, so we check for
-    // this. This can be the case when using -disable-wasm-explicit-locals
-    // which currently s2wasm requires.
-    // TODO: Instead, we can move this code to MatchAndEmitInstruction below and
-    // actually generate get_local instructions on the fly.
-    // Or even better, improve the matcher to support vararg?
-    auto IsIndirect = NamePair.second == "call_indirect";
-    if (IsIndirect || NamePair.second == "call") {
-      // Figure out number of fixed operands from the instruction.
-      size_t CallOperands = 1;  // The name token.
-      if (!IsIndirect) CallOperands++;  // The function index.
-      if (!NamePair.first.empty()) CallOperands++;  // The result register.
-      if (Operands.size() > CallOperands) {
-        // Ensure operands we drop are all $pop.
-        for (size_t I = CallOperands; I < Operands.size(); I++) {
-          auto Operand =
-              reinterpret_cast<WebAssemblyOperand *>(Operands[I].get());
-          if (Operand->Kind != WebAssemblyOperand::Stack)
-            Parser.Error(NameLoc,
-              "Call instruction has non-stack arguments, if this code was "
-              "generated with -disable-wasm-explicit-locals please remove it");
-        }
-        // Drop unneeded operands.
-        Operands.resize(CallOperands);
+        if (expect(AsmToken::Comma, ","))
+          return true;
       }
     }
-    // Block instructions require a signature index, but these are missing in
-    // assembly, so we add a dummy one explicitly (since we have no control
-    // over signature tables here, we assume these will be regenerated when
-    // the wasm module is generated).
-    if (NamePair.second == "block" || NamePair.second == "loop") {
-      Operands.push_back(make_unique<WebAssemblyOperand>(
-                           WebAssemblyOperand::Integer, NameLoc,
-                           NameLoc, WebAssemblyOperand::IntOp{-1}));
-    }
-    // These don't specify the type, which has to derived from the local index.
-    if (NamePair.second == "get_local" || NamePair.second == "tee_local") {
-      if (Operands.size() >= 3 && Operands[1]->isReg() &&
-          Operands[2]->isImm()) {
-        auto Op1 = reinterpret_cast<WebAssemblyOperand *>(Operands[1].get());
-        auto Op2 = reinterpret_cast<WebAssemblyOperand *>(Operands[2].get());
-        auto Type = GetType(LocalTypes, static_cast<size_t>(Op2->Int.Val));
-        Op1->Reg.Type = Type;
-        GetType(StackTypes, Op1->Reg.RegNo) = Type;
-      }
+    if (ExpectBlockType && Operands.size() == 1) {
+      // Support blocks with no operands as default to void.
+      addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void);
     }
+    Parser.Lex();
     return false;
   }
 
   void onLabelParsed(MCSymbol *Symbol) override {
     LastLabel = Symbol;
+    CurrentState = Label;
   }
 
+  bool parseSignature(wasm::WasmSignature *Signature) {
+    if (expect(AsmToken::LParen, "("))
+      return true;
+    if (parseRegTypeList(Signature->Params))
+      return true;
+    if (expect(AsmToken::RParen, ")"))
+      return true;
+    if (expect(AsmToken::MinusGreater, "->"))
+      return true;
+    if (expect(AsmToken::LParen, "("))
+      return true;
+    if (parseRegTypeList(Signature->Returns))
+      return true;
+    if (expect(AsmToken::RParen, ")"))
+      return true;
+    return false;
+  }
+
+  // This function processes wasm-specific directives streamed to
+  // WebAssemblyTargetStreamer, all others go to the generic parser
+  // (see WasmAsmParser).
   bool ParseDirective(AsmToken DirectiveID) override {
+    // This function has a really weird return value behavior that is different
+    // from all the other parsing functions:
+    // - return true && no tokens consumed -> don't know this directive / let
+    //   the generic parser handle it.
+    // - return true && tokens consumed -> a parsing error occurred.
+    // - return false -> processed this directive successfully.
     assert(DirectiveID.getKind() == AsmToken::Identifier);
     auto &Out = getStreamer();
-    auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
-                   *Out.getTargetStreamer());
-    // TODO: we're just parsing the subset of directives we're interested in,
-    // and ignoring ones we don't recognise. We should ideally verify
-    // all directives here.
-    if (DirectiveID.getString() == ".type") {
-      // This could be the start of a function, check if followed by
-      // "label,@function"
-      if (!(IsNext(AsmToken::Identifier) &&
-            IsNext(AsmToken::Comma) &&
-            IsNext(AsmToken::At) &&
-            Lexer.is(AsmToken::Identifier)))
-        return Error("Expected label,@type declaration, got: ", Lexer.getTok());
-      if (Lexer.getTok().getString() == "function") {
-        // Track locals from start of function.
-        LocalTypes.clear();
-        StackTypes.clear();
-      }
-      Parser.Lex();
-      //Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
-    } else if (DirectiveID.getString() == ".param" ||
-               DirectiveID.getString() == ".local") {
-      // Track the number of locals, needed for correct virtual register
-      // assignment elsewhere.
-      // Also output a directive to the streamer.
-      std::vector<MVT> Params;
-      std::vector<MVT> Locals;
-      while (Lexer.is(AsmToken::Identifier)) {
-        auto RegType = ParseRegType(Lexer.getTok().getString());
-        if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE) return true;
-        LocalTypes.push_back(RegType);
-        if (DirectiveID.getString() == ".param") {
-          Params.push_back(RegType);
-        } else {
-          Locals.push_back(RegType);
-        }
-        Parser.Lex();
-        if (!IsNext(AsmToken::Comma)) break;
+    auto &TOut =
+        reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+
+    // TODO: any time we return an error, at least one token must have been
+    // consumed, otherwise this will not signal an error to the caller.
+    if (DirectiveID.getString() == ".globaltype") {
+      auto SymName = expectIdent();
+      if (SymName.empty())
+        return true;
+      if (expect(AsmToken::Comma, ","))
+        return true;
+      auto TypeTok = Lexer.getTok();
+      auto TypeName = expectIdent();
+      if (TypeName.empty())
+        return true;
+      auto Type = parseType(TypeName);
+      if (!Type)
+        return error("Unknown type in .globaltype directive: ", TypeTok);
+      // Now set this symbol with the correct type.
+      auto WasmSym = cast<MCSymbolWasm>(
+          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+      WasmSym->setGlobalType(
+          wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
+      // And emit the directive again.
+      TOut.emitGlobalType(WasmSym);
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
+    if (DirectiveID.getString() == ".functype") {
+      // This code has to send things to the streamer similar to
+      // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
+      // TODO: would be good to factor this into a common function, but the
+      // assembler and backend really don't share any common code, and this code
+      // parses the locals seperately.
+      auto SymName = expectIdent();
+      if (SymName.empty())
+        return true;
+      auto WasmSym = cast<MCSymbolWasm>(
+          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      if (CurrentState == Label && WasmSym == LastLabel) {
+        // This .functype indicates a start of a function.
+        if (ensureEmptyNestingStack())
+          return true;
+        CurrentState = FunctionStart;
+        push(Function);
       }
-      assert(LastLabel);
-      TOut.emitParam(LastLabel, Params);
+      auto Signature = make_unique<wasm::WasmSignature>();
+      if (parseSignature(Signature.get()))
+        return true;
+      WasmSym->setSignature(Signature.get());
+      addSignature(std::move(Signature));
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+      TOut.emitFunctionType(WasmSym);
+      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
+    if (DirectiveID.getString() == ".eventtype") {
+      auto SymName = expectIdent();
+      if (SymName.empty())
+        return true;
+      auto WasmSym = cast<MCSymbolWasm>(
+          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto Signature = make_unique<wasm::WasmSignature>();
+      if (parseRegTypeList(Signature->Params))
+        return true;
+      WasmSym->setSignature(Signature.get());
+      addSignature(std::move(Signature));
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+      TOut.emitEventType(WasmSym);
+      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
+    if (DirectiveID.getString() == ".local") {
+      if (CurrentState != FunctionStart)
+        return error(".local directive should follow the start of a function",
+                     Lexer.getTok());
+      SmallVector<wasm::ValType, 4> Locals;
+      if (parseRegTypeList(Locals))
+        return true;
       TOut.emitLocal(Locals);
-    } else {
-      // For now, ignore anydirective we don't recognize:
-      while (Lexer.isNot(AsmToken::EndOfStatement)) Parser.Lex();
+      CurrentState = FunctionLocals;
+      return expect(AsmToken::EndOfStatement, "EOL");
     }
-    return Expect(AsmToken::EndOfStatement, "EOL");
+
+    return true; // We didn't process this directive.
   }
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &/*Opcode*/,
-                               OperandVector &Operands,
-                               MCStreamer &Out, uint64_t &ErrorInfo,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
                                bool MatchingInlineAsm) override {
     MCInst Inst;
     unsigned MatchResult =
         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
     switch (MatchResult) {
     case Match_Success: {
+      if (CurrentState == FunctionStart) {
+        // This is the first instruction in a function, but we haven't seen
+        // a .local directive yet. The streamer requires locals to be encoded
+        // as a prelude to the instructions, so emit an empty list of locals
+        // here.
+        auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
+            *Out.getTargetStreamer());
+        TOut.emitLocal(SmallVector<wasm::ValType, 0>());
+      }
+      CurrentState = Instructions;
       Out.EmitInstruction(Inst, getSTI());
       return false;
     }
     case Match_MissingFeature:
-      return Parser.Error(IDLoc,
-          "instruction requires a WASM feature not currently enabled");
+      return Parser.Error(
+          IDLoc, "instruction requires a WASM feature not currently enabled");
     case Match_MnemonicFail:
       return Parser.Error(IDLoc, "invalid instruction");
     case Match_NearMisses:
@@ -547,6 +693,8 @@ public:
     }
     llvm_unreachable("Implement any new match types added!");
   }
+
+  void onEndOfFile() override { ensureEmptyNestingStack(); }
 };
 } // end anonymous namespace
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 2f0960271e30..6acc9b20eed2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -16,7 +16,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "WebAssembly.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
@@ -37,6 +36,8 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
 #include "WebAssemblyGenDisassemblerTables.inc"
 
 namespace {
+static constexpr int WebAssemblyInstructionTableSize = 256;
+
 class WebAssemblyDisassembler final : public MCDisassembler {
   std::unique_ptr<const MCInstrInfo> MCII;
 
@@ -75,31 +76,43 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
   return V;
 }
 
-static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
-                              ArrayRef<uint8_t> Bytes, bool Signed) {
+static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
+                    bool Signed = false) {
   unsigned N = 0;
   const char *Error = nullptr;
-  auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
-                                    Bytes.data() + Bytes.size(), &Error)
-                    : static_cast<int64_t>(
-                          decodeULEB128(Bytes.data() + Size, &N,
-                                        Bytes.data() + Bytes.size(), &Error));
+  Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
+                               Bytes.data() + Bytes.size(), &Error)
+               : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
+                                                    Bytes.data() + Bytes.size(),
+                                                    &Error));
   if (Error)
     return false;
   Size += N;
+  return true;
+}
+
+static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, bool Signed) {
+  int64_t Val;
+  if (!nextLEB(Val, Bytes, Size, Signed))
+    return false;
   MI.addOperand(MCOperand::createImm(Val));
   return true;
 }
 
 template <typename T>
-bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
+bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
   if (Size + sizeof(T) > Bytes.size())
     return false;
   T Val;
   memcpy(&Val, Bytes.data() + Size, sizeof(T));
   support::endian::byte_swap<T, support::endianness::little>(Val);
   Size += sizeof(T);
-  MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+  if (std::is_floating_point<T>::value) {
+    MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+  } else {
+    MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
+  }
   return true;
 }
 
@@ -108,7 +121,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     raw_ostream & /*OS*/, raw_ostream &CS) const {
   CommentStream = &CS;
   Size = 0;
-  auto Opc = nextByte(Bytes, Size);
+  int Opc = nextByte(Bytes, Size);
   if (Opc < 0)
     return MCDisassembler::Fail;
   const auto *WasmInst = &InstructionTable0[Opc];
@@ -124,10 +137,12 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     }
     if (!WasmInst)
       return MCDisassembler::Fail;
-    Opc = nextByte(Bytes, Size);
-    if (Opc < 0)
+    int64_t PrefixedOpc;
+    if (!nextLEB(PrefixedOpc, Bytes, Size))
       return MCDisassembler::Fail;
-    WasmInst += Opc;
+    if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
+      return MCDisassembler::Fail;
+    WasmInst += PrefixedOpc;
   }
   if (WasmInst->ET == ET_Unused)
     return MCDisassembler::Fail;
@@ -136,7 +151,8 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
   MI.setOpcode(WasmInst->Opcode);
   // Parse any operands.
   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
-    switch (WasmInst->Operands[OPI]) {
+    auto OT = OperandTable[WasmInst->OperandStart + OPI];
+    switch (OT) {
     // ULEB operands:
     case WebAssembly::OPERAND_BASIC_BLOCK:
     case WebAssembly::OPERAND_LOCAL:
@@ -152,32 +168,68 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     }
     // SLEB operands:
     case WebAssembly::OPERAND_I32IMM:
-    case WebAssembly::OPERAND_I64IMM:
-    case WebAssembly::OPERAND_SIGNATURE: {
+    case WebAssembly::OPERAND_I64IMM: {
       if (!parseLEBImmediate(MI, Size, Bytes, true))
         return MCDisassembler::Fail;
       break;
     }
+    // block_type operands (uint8_t).
+    case WebAssembly::OPERAND_SIGNATURE: {
+      if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+        return MCDisassembler::Fail;
+      break;
+    }
     // FP operands.
     case WebAssembly::OPERAND_F32IMM: {
-      if (!parseFPImmediate<float>(MI, Size, Bytes))
+      if (!parseImmediate<float>(MI, Size, Bytes))
         return MCDisassembler::Fail;
       break;
     }
     case WebAssembly::OPERAND_F64IMM: {
-      if (!parseFPImmediate<double>(MI, Size, Bytes))
+      if (!parseImmediate<double>(MI, Size, Bytes))
         return MCDisassembler::Fail;
       break;
     }
-    case MCOI::OPERAND_REGISTER: {
-      // These are NOT actually in the instruction stream, but MC is going to
-      // expect operands to be present for them!
-      // FIXME: can MC re-generate register assignments or do we have to
-      // do this? Since this function decodes a single instruction, we don't
-      // have the proper context for tracking an operand stack here.
-      MI.addOperand(MCOperand::createReg(0));
+    // Vector lane operands (not LEB encoded).
+    case WebAssembly::OPERAND_VEC_I8IMM: {
+      if (!parseImmediate<uint8_t>(MI, Size, Bytes))
+        return MCDisassembler::Fail;
+      break;
+    }
+    case WebAssembly::OPERAND_VEC_I16IMM: {
+      if (!parseImmediate<uint16_t>(MI, Size, Bytes))
+        return MCDisassembler::Fail;
+      break;
+    }
+    case WebAssembly::OPERAND_VEC_I32IMM: {
+      if (!parseImmediate<uint32_t>(MI, Size, Bytes))
+        return MCDisassembler::Fail;
+      break;
+    }
+    case WebAssembly::OPERAND_VEC_I64IMM: {
+      if (!parseImmediate<uint64_t>(MI, Size, Bytes))
+        return MCDisassembler::Fail;
+      break;
+    }
+    case WebAssembly::OPERAND_BRLIST: {
+      int64_t TargetTableLen;
+      if (!nextLEB(TargetTableLen, Bytes, Size, false))
+        return MCDisassembler::Fail;
+      for (int64_t I = 0; I < TargetTableLen; I++) {
+        if (!parseLEBImmediate(MI, Size, Bytes, false))
+          return MCDisassembler::Fail;
+      }
+      // Default case.
+      if (!parseLEBImmediate(MI, Size, Bytes, false))
+        return MCDisassembler::Fail;
       break;
     }
+    case MCOI::OPERAND_REGISTER:
+      // The tablegen header currently does not have any register operands since
+      // we use only the stack (_S) instructions.
+      // If you hit this that probably means a bad instruction definition in
+      // tablegen.
+      llvm_unreachable("Register operand in WebAssemblyDisassembler");
     default:
       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 10fa798ac8d7..15532d7ff1a6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -35,12 +35,12 @@ using namespace llvm;
 WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
                                                const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {}
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
                                           unsigned RegNo) const {
   assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
-  // Note that there's an implicit get_local/set_local here!
+  // Note that there's an implicit local.get/local.set here!
   OS << "$" << RegNo;
 }
 
@@ -57,9 +57,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
       // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
       // we have an extra flags operand which is not currently printed, for
       // compatiblity reasons.
-      if (i != 0 &&
-          (MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID ||
-           i != Desc.getNumOperands()))
+      if (i != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
+                      MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
+                     i != Desc.getNumOperands()))
         OS << ", ";
       printOperand(MI, i, OS);
     }
@@ -70,25 +70,76 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   if (CommentStream) {
     // Observe any effects on the control flow stack, for use in annotating
     // control flow label references.
-    switch (MI->getOpcode()) {
+    unsigned Opc = MI->getOpcode();
+    switch (Opc) {
     default:
       break;
-    case WebAssembly::LOOP: {
+
+    case WebAssembly::LOOP:
+    case WebAssembly::LOOP_S:
       printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
       ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
       break;
-    }
+
     case WebAssembly::BLOCK:
+    case WebAssembly::BLOCK_S:
       ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
       break;
+
+    case WebAssembly::TRY:
+    case WebAssembly::TRY_S:
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+      EHPadStack.push_back(EHPadStackCounter++);
+      LastSeenEHInst = TRY;
+      break;
+
     case WebAssembly::END_LOOP:
-      // Have to guard against an empty stack, in case of mismatched pairs
-      // in assembly parsing.
-      if (!ControlFlowStack.empty()) ControlFlowStack.pop_back();
+    case WebAssembly::END_LOOP_S:
+      if (ControlFlowStack.empty()) {
+        printAnnotation(OS, "End marker mismatch!");
+      } else {
+        ControlFlowStack.pop_back();
+      }
       break;
+
     case WebAssembly::END_BLOCK:
-      if (!ControlFlowStack.empty()) printAnnotation(
-          OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+    case WebAssembly::END_BLOCK_S:
+      if (ControlFlowStack.empty()) {
+        printAnnotation(OS, "End marker mismatch!");
+      } else {
+        printAnnotation(
+            OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+      }
+      break;
+
+    case WebAssembly::END_TRY:
+    case WebAssembly::END_TRY_S:
+      if (ControlFlowStack.empty()) {
+        printAnnotation(OS, "End marker mismatch!");
+      } else {
+        printAnnotation(
+            OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+        LastSeenEHInst = END_TRY;
+      }
+      break;
+
+    case WebAssembly::CATCH_I32:
+    case WebAssembly::CATCH_I32_S:
+    case WebAssembly::CATCH_I64:
+    case WebAssembly::CATCH_I64_S:
+    case WebAssembly::CATCH_ALL:
+    case WebAssembly::CATCH_ALL_S:
+      // There can be multiple catch instructions for one try instruction, so we
+      // print a label only for the first 'catch' label.
+      if (LastSeenEHInst != CATCH) {
+        if (EHPadStack.empty()) {
+          printAnnotation(OS, "try-catch mismatch!");
+        } else {
+          printAnnotation(OS,
+                          "catch" + utostr(EHPadStack.pop_back_val()) + ':');
+        }
+      }
+      LastSeenEHInst = CATCH;
       break;
     }
 
@@ -96,34 +147,61 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
     unsigned NumFixedOperands = Desc.NumOperands;
     SmallSet<uint64_t, 8> Printed;
     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
-      if (!(i < NumFixedOperands
-                ? (Desc.OpInfo[i].OperandType ==
-                   WebAssembly::OPERAND_BASIC_BLOCK)
-                : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
-        continue;
+      // See if this operand denotes a basic block target.
+      if (i < NumFixedOperands) {
+        // A non-variable_ops operand, check its type.
+        if (Desc.OpInfo[i].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
+          continue;
+      } else {
+        // A variable_ops operand, which currently can be immediates (used in
+        // br_table) which are basic block targets, or for call instructions
+        // when using -wasm-keep-registers (in which case they are registers,
+        // and should not be processed).
+        if (!MI->getOperand(i).isImm())
+          continue;
+      }
       uint64_t Depth = MI->getOperand(i).getImm();
       if (!Printed.insert(Depth).second)
         continue;
-      const auto &Pair = ControlFlowStack.rbegin()[Depth];
-      printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
-                              " to label" + utostr(Pair.first));
+
+      if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
+        if (Depth > EHPadStack.size()) {
+          printAnnotation(OS, "Invalid depth argument!");
+        } else if (Depth == EHPadStack.size()) {
+          // This can happen when rethrow instruction breaks out of all nests
+          // and throws up to the current function's caller.
+          printAnnotation(OS, utostr(Depth) + ": " + "to caller");
+        } else {
+          uint64_t CatchNo = EHPadStack.rbegin()[Depth];
+          printAnnotation(OS, utostr(Depth) + ": " + "down to catch" +
+                                  utostr(CatchNo));
+        }
+
+      } else {
+        if (Depth >= ControlFlowStack.size()) {
+          printAnnotation(OS, "Invalid depth argument!");
+        } else {
+          const auto &Pair = ControlFlowStack.rbegin()[Depth];
+          printAnnotation(OS, utostr(Depth) + ": " +
+                                  (Pair.second ? "up" : "down") + " to label" +
+                                  utostr(Pair.first));
+        }
+      }
     }
   }
 }
 
 static std::string toString(const APFloat &FP) {
   // Print NaNs with custom payloads specially.
-  if (FP.isNaN() &&
-      !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
+  if (FP.isNaN() && !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
       !FP.bitwiseIsEqual(
           APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
     APInt AI = FP.bitcastToAPInt();
-    return
-        std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
-        utohexstr(AI.getZExtValue() &
-                  (AI.getBitWidth() == 32 ? INT64_C(0x007fffff) :
-                                            INT64_C(0x000fffffffffffff)),
-                  /*LowerCase=*/true);
+    return std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
+           utohexstr(AI.getZExtValue() &
+                         (AI.getBitWidth() == 32 ? INT64_C(0x007fffff)
+                                                 : INT64_C(0x000fffffffffffff)),
+                     /*LowerCase=*/true);
   }
 
   // Use C99's hexadecimal floating-point representation.
@@ -141,9 +219,6 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                           raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
-    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
-            MII.get(MI->getOpcode()).TSFlags == 0) &&
-           "WebAssembly variable_ops register ops don't use TSFlags");
     unsigned WAReg = Op.getReg();
     if (int(WAReg) >= 0)
       printRegName(O, WAReg);
@@ -157,23 +232,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
       O << '=';
   } else if (Op.isImm()) {
-    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-    assert((OpNo < Desc.getNumOperands() ||
-            (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate)) &&
-           "WebAssemblyII::VariableOpIsImmediate should be set for "
-           "variable_ops immediate ops");
-    (void)Desc;
-    // TODO: (MII.get(MI->getOpcode()).TSFlags &
-    //        WebAssemblyII::VariableOpImmediateIsLabel)
-    // can tell us whether this is an immediate referencing a label in the
-    // control flow stack, and it may be nice to pretty-print.
     O << Op.getImm();
   } else if (Op.isFPImm()) {
     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-    assert(OpNo < Desc.getNumOperands() &&
-           "Unexpected floating-point immediate as a non-fixed operand");
-    assert(Desc.TSFlags == 0 &&
-           "WebAssembly variable_ops floating point ops don't use TSFlags");
     const MCOperandInfo &Info = Desc.OpInfo[OpNo];
     if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
       // TODO: MC converts all floating point immediate operands to double.
@@ -184,78 +245,66 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << ::toString(APFloat(Op.getFPImm()));
     }
   } else {
-    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
-            (MII.get(MI->getOpcode()).TSFlags &
-             WebAssemblyII::VariableOpIsImmediate)) &&
-           "WebAssemblyII::VariableOpIsImmediate should be set for "
-           "variable_ops expr ops");
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     Op.getExpr()->print(O, &MAI);
   }
 }
 
-void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(
-    const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+void WebAssemblyInstPrinter::printBrList(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << "{";
+  for (unsigned I = OpNo, E = MI->getNumOperands(); I != E; ++I) {
+    if (I != OpNo)
+      O << ", ";
+    O << MI->getOperand(I).getImm();
+  }
+  O << "}";
+}
+
+void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
+                                                            unsigned OpNo,
+                                                            raw_ostream &O) {
   int64_t Imm = MI->getOperand(OpNo).getImm();
   if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
     return;
   O << ":p2align=" << Imm;
 }
 
-void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(
-    const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-  int64_t Imm = MI->getOperand(OpNo).getImm();
-  switch (WebAssembly::ExprType(Imm)) {
-  case WebAssembly::ExprType::Void: break;
-  case WebAssembly::ExprType::I32: O << "i32"; break;
-  case WebAssembly::ExprType::I64: O << "i64"; break;
-  case WebAssembly::ExprType::F32: O << "f32"; break;
-  case WebAssembly::ExprType::F64: O << "f64"; break;
-  case WebAssembly::ExprType::I8x16: O << "i8x16"; break;
-  case WebAssembly::ExprType::I16x8: O << "i16x8"; break;
-  case WebAssembly::ExprType::I32x4: O << "i32x4"; break;
-  case WebAssembly::ExprType::F32x4: O << "f32x4"; break;
-  case WebAssembly::ExprType::B8x16: O << "b8x16"; break;
-  case WebAssembly::ExprType::B16x8: O << "b16x8"; break;
-  case WebAssembly::ExprType::B32x4: O << "b32x4"; break;
-  case WebAssembly::ExprType::ExceptRef: O << "except_ref"; break;
-  }
+void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
+                                                              unsigned OpNo,
+                                                              raw_ostream &O) {
+  auto Imm = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+  if (Imm != wasm::WASM_TYPE_NORESULT)
+    O << WebAssembly::anyTypeToString(Imm);
 }
 
-const char *llvm::WebAssembly::TypeToString(MVT Ty) {
-  switch (Ty.SimpleTy) {
-  case MVT::i32:
+// We have various enums representing a subset of these types, use this
+// function to convert any of them to text.
+const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
+  switch (Ty) {
+  case wasm::WASM_TYPE_I32:
     return "i32";
-  case MVT::i64:
+  case wasm::WASM_TYPE_I64:
     return "i64";
-  case MVT::f32:
+  case wasm::WASM_TYPE_F32:
     return "f32";
-  case MVT::f64:
+  case wasm::WASM_TYPE_F64:
     return "f64";
-  case MVT::v16i8:
-  case MVT::v8i16:
-  case MVT::v4i32:
-  case MVT::v4f32:
+  case wasm::WASM_TYPE_V128:
     return "v128";
-  case MVT::ExceptRef:
+  case wasm::WASM_TYPE_FUNCREF:
+    return "funcref";
+  case wasm::WASM_TYPE_FUNC:
+    return "func";
+  case wasm::WASM_TYPE_EXCEPT_REF:
     return "except_ref";
+  case wasm::WASM_TYPE_NORESULT:
+    return "void";
   default:
-    llvm_unreachable("unsupported type");
+    return "invalid_type";
   }
 }
 
-const char *llvm::WebAssembly::TypeToString(wasm::ValType Type) {
-  switch (Type) {
-  case wasm::ValType::I32:
-    return "i32";
-  case wasm::ValType::I64:
-    return "i64";
-  case wasm::ValType::F32:
-    return "f32";
-  case wasm::ValType::F64:
-    return "f64";
-  case wasm::ValType::EXCEPT_REF:
-    return "except_ref";
-  }
-  llvm_unreachable("unsupported type");
+const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) {
+  return anyTypeToString(static_cast<unsigned>(Ty));
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index f5b890a7615e..5ad45c7d5c7f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -25,8 +25,13 @@ namespace llvm {
 class MCSubtargetInfo;
 
 class WebAssemblyInstPrinter final : public MCInstPrinter {
-  uint64_t ControlFlowCounter;
-  SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack;
+  uint64_t ControlFlowCounter = 0;
+  uint64_t EHPadStackCounter = 0;
+  SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack;
+  SmallVector<uint64_t, 4> EHPadStack;
+
+  enum EHInstKind { TRY, CATCH, END_TRY };
+  EHInstKind LastSeenEHInst = END_TRY;
 
 public:
   WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
@@ -38,6 +43,7 @@ public:
 
   // Used by tblegen code.
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O);
   void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
@@ -50,8 +56,8 @@ public:
 
 namespace WebAssembly {
 
-const char *TypeToString(MVT Ty);
-const char *TypeToString(wasm::ValType Type);
+const char *typeToString(wasm::ValType Ty);
+const char *anyTypeToString(unsigned Ty);
 
 } // end namespace WebAssembly
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 244c2189b455..0726dd481174 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -73,13 +73,13 @@ public:
 const MCFixupKindInfo &
 WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   const static MCFixupKindInfo Infos[WebAssembly::NumTargetFixupKinds] = {
-    // This table *must* be in the order that the fixup_* kinds are defined in
-    // WebAssemblyFixupKinds.h.
-    //
-    // Name                     Offset (bits) Size (bits)     Flags
-    { "fixup_code_sleb128_i32", 0,            5*8,            0 },
-    { "fixup_code_sleb128_i64", 0,            10*8,           0 },
-    { "fixup_code_uleb128_i32", 0,            5*8,            0 },
+      // This table *must* be in the order that the fixup_* kinds are defined in
+      // WebAssemblyFixupKinds.h.
+      //
+      // Name                     Offset (bits) Size (bits)     Flags
+      {"fixup_code_sleb128_i32", 0, 5 * 8, 0},
+      {"fixup_code_sleb128_i64", 0, 10 * 8, 0},
+      {"fixup_code_uleb128_i32", 0, 5 * 8, 0},
   };
 
   if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
index b0af63c924bd..c2fac5f93a2f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -15,11 +15,9 @@
 namespace llvm {
 namespace WebAssembly {
 enum Fixups {
-  fixup_code_sleb128_i32 = FirstTargetFixupKind,      // 32-bit signed
-  fixup_code_sleb128_i64,                             // 64-bit signed
-  fixup_code_uleb128_i32,                             // 32-bit unsigned
-
-  fixup_code_global_index,                            // 32-bit unsigned
+  fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+  fixup_code_sleb128_i64,                        // 64-bit signed
+  fixup_code_uleb128_i32,                        // 32-bit unsigned
 
   // Marker
   LastTargetFixupKind,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 94ca94e1e18c..065a4dc94ca6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -67,13 +67,16 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
     OS << uint8_t(Binary);
   } else {
     assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet");
-    OS << uint8_t(Binary >> 8)
-       << uint8_t(Binary);
+    OS << uint8_t(Binary >> 8);
+    encodeULEB128(uint8_t(Binary), OS);
   }
 
   // For br_table instructions, encode the size of the table. In the MCInst,
-  // there's an index operand, one operand for each table entry, and the
-  // default operand.
+  // there's an index operand (if not a stack instruction), one operand for
+  // each table entry, and the default operand.
+  if (MI.getOpcode() == WebAssembly::BR_TABLE_I32_S ||
+      MI.getOpcode() == WebAssembly::BR_TABLE_I64_S)
+    encodeULEB128(MI.getNumOperands() - 1, OS);
   if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 ||
       MI.getOpcode() == WebAssembly::BR_TABLE_I64)
     encodeULEB128(MI.getNumOperands() - 2, OS);
@@ -83,36 +86,47 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
     const MCOperand &MO = MI.getOperand(i);
     if (MO.isReg()) {
       /* nothing to encode */
+
     } else if (MO.isImm()) {
       if (i < Desc.getNumOperands()) {
-        assert(Desc.TSFlags == 0 &&
-               "WebAssembly non-variable_ops don't use TSFlags");
         const MCOperandInfo &Info = Desc.OpInfo[i];
         LLVM_DEBUG(dbgs() << "Encoding immediate: type="
                           << int(Info.OperandType) << "\n");
-        if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+        switch (Info.OperandType) {
+        case WebAssembly::OPERAND_I32IMM:
           encodeSLEB128(int32_t(MO.getImm()), OS);
-        } else if (Info.OperandType == WebAssembly::OPERAND_OFFSET32) {
+          break;
+        case WebAssembly::OPERAND_OFFSET32:
           encodeULEB128(uint32_t(MO.getImm()), OS);
-        } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+          break;
+        case WebAssembly::OPERAND_I64IMM:
           encodeSLEB128(int64_t(MO.getImm()), OS);
-        } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
-          llvm_unreachable("wasm globals should only be accessed symbolicly");
-        } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
+          break;
+        case WebAssembly::OPERAND_SIGNATURE:
           OS << uint8_t(MO.getImm());
-        } else {
+          break;
+        case WebAssembly::OPERAND_VEC_I8IMM:
+          support::endian::write<uint8_t>(OS, MO.getImm(), support::little);
+          break;
+        case WebAssembly::OPERAND_VEC_I16IMM:
+          support::endian::write<uint16_t>(OS, MO.getImm(), support::little);
+          break;
+        case WebAssembly::OPERAND_VEC_I32IMM:
+          support::endian::write<uint32_t>(OS, MO.getImm(), support::little);
+          break;
+        case WebAssembly::OPERAND_VEC_I64IMM:
+          support::endian::write<uint64_t>(OS, MO.getImm(), support::little);
+          break;
+        case WebAssembly::OPERAND_GLOBAL:
+          llvm_unreachable("wasm globals should only be accessed symbolicly");
+        default:
           encodeULEB128(uint64_t(MO.getImm()), OS);
         }
       } else {
-        assert(Desc.TSFlags == (WebAssemblyII::VariableOpIsImmediate |
-                                WebAssemblyII::VariableOpImmediateIsLabel));
         encodeULEB128(uint64_t(MO.getImm()), OS);
       }
+
     } else if (MO.isFPImm()) {
-      assert(i < Desc.getNumOperands() &&
-             "Unexpected floating-point immediate as a non-fixed operand");
-      assert(Desc.TSFlags == 0 &&
-             "WebAssembly variable_ops floating point ops don't use TSFlags");
       const MCOperandInfo &Info = Desc.OpInfo[i];
       if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
         // TODO: MC converts all floating point immediate operands to double.
@@ -124,27 +138,31 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
         double d = MO.getFPImm();
         support::endian::write<double>(OS, d, support::little);
       }
+
     } else if (MO.isExpr()) {
       const MCOperandInfo &Info = Desc.OpInfo[i];
       llvm::MCFixupKind FixupKind;
       size_t PaddedSize = 5;
-      if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+      switch (Info.OperandType) {
+      case WebAssembly::OPERAND_I32IMM:
         FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
-      } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+        break;
+      case WebAssembly::OPERAND_I64IMM:
         FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
         PaddedSize = 10;
-      } else if (Info.OperandType == WebAssembly::OPERAND_FUNCTION32 ||
-                 Info.OperandType == WebAssembly::OPERAND_OFFSET32 ||
-                 Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+        break;
+      case WebAssembly::OPERAND_FUNCTION32:
+      case WebAssembly::OPERAND_OFFSET32:
+      case WebAssembly::OPERAND_TYPEINDEX:
+      case WebAssembly::OPERAND_GLOBAL:
+      case WebAssembly::OPERAND_EVENT:
         FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
-      } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
-        FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index);
-      } else {
+        break;
+      default:
         llvm_unreachable("unexpected symbolic operand kind");
       }
-      Fixups.push_back(MCFixup::create(
-          OS.tell() - Start, MO.getExpr(),
-          FixupKind, MI.getLoc()));
+      Fixups.push_back(MCFixup::create(OS.tell() - Start, MO.getExpr(),
+                                       FixupKind, MI.getLoc()));
       ++MCNumFixups;
       encodeULEB128(0, OS, PaddedSize);
     } else {
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index baf8a0c96c0a..390f367c2978 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -90,6 +90,10 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
   return new WebAssemblyTargetAsmStreamer(S, OS);
 }
 
+static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) {
+  return new WebAssemblyTargetNullStreamer(S);
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeWebAssemblyTargetMC() {
   for (Target *T :
@@ -120,16 +124,31 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
                                                  createObjectTargetStreamer);
     // Register the asm target streamer.
     TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+    // Register the null target streamer.
+    TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer);
   }
 }
 
 wasm::ValType WebAssembly::toValType(const MVT &Ty) {
   switch (Ty.SimpleTy) {
-  case MVT::i32: return wasm::ValType::I32;
-  case MVT::i64: return wasm::ValType::I64;
-  case MVT::f32: return wasm::ValType::F32;
-  case MVT::f64: return wasm::ValType::F64;
-  case MVT::ExceptRef: return wasm::ValType::EXCEPT_REF;
-  default: llvm_unreachable("unexpected type");
+  case MVT::i32:
+    return wasm::ValType::I32;
+  case MVT::i64:
+    return wasm::ValType::I64;
+  case MVT::f32:
+    return wasm::ValType::F32;
+  case MVT::f64:
+    return wasm::ValType::F64;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v2i64:
+  case MVT::v4f32:
+  case MVT::v2f64:
+    return wasm::ValType::V128;
+  case MVT::ExceptRef:
+    return wasm::ValType::EXCEPT_REF;
+  default:
+    llvm_unreachable("unexpected type");
   }
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index c1c8d243e920..a01517fb90c3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -59,6 +59,14 @@ enum OperandType {
   OPERAND_F32IMM,
   /// 64-bit floating-point immediates.
   OPERAND_F64IMM,
+  /// 8-bit vector lane immediate
+  OPERAND_VEC_I8IMM,
+  /// 16-bit vector lane immediate
+  OPERAND_VEC_I16IMM,
+  /// 32-bit vector lane immediate
+  OPERAND_VEC_I32IMM,
+  /// 64-bit vector lane immediate
+  OPERAND_VEC_I64IMM,
   /// 32-bit unsigned function indices.
   OPERAND_FUNCTION32,
   /// 32-bit unsigned memory offsets.
@@ -69,17 +77,24 @@ enum OperandType {
   OPERAND_SIGNATURE,
   /// type signature immediate for call_indirect.
   OPERAND_TYPEINDEX,
+  /// Event index.
+  OPERAND_EVENT,
+  /// A list of branch targets for br_list.
+  OPERAND_BRLIST,
 };
 } // end namespace WebAssembly
 
 namespace WebAssemblyII {
-enum {
-  // For variadic instructions, this flag indicates whether an operand
-  // in the variable_ops range is an immediate value.
-  VariableOpIsImmediate = (1 << 0),
-  // For immediate values in the variable_ops range, this flag indicates
-  // whether the value represents a control-flow label.
-  VariableOpImmediateIsLabel = (1 << 1)
+
+/// Target Operand Flag enum.
+enum TOF {
+  MO_NO_FLAG = 0,
+
+  // Flags to indicate the type of the symbol being referenced
+  MO_SYMBOL_FUNCTION = 0x1,
+  MO_SYMBOL_GLOBAL = 0x2,
+  MO_SYMBOL_EVENT = 0x4,
+  MO_SYMBOL_MASK = 0x7,
 };
 } // end namespace WebAssemblyII
 
@@ -149,6 +164,10 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S:
   case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
   case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
+  case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
+  case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S:
     return 0;
   case WebAssembly::LOAD16_S_I32:
   case WebAssembly::LOAD16_S_I32_S:
@@ -194,6 +213,10 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S:
   case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
   case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
+  case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
+  case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S:
     return 1;
   case WebAssembly::LOAD_I32:
   case WebAssembly::LOAD_I32_S:
@@ -241,6 +264,14 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::ATOMIC_RMW_XCHG_I32_S:
   case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
   case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S:
+  case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
+  case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
+  case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S:
+  case WebAssembly::ATOMIC_NOTIFY:
+  case WebAssembly::ATOMIC_NOTIFY_S:
+  case WebAssembly::ATOMIC_WAIT_I32:
+  case WebAssembly::ATOMIC_WAIT_I32_S:
     return 2;
   case WebAssembly::LOAD_I64:
   case WebAssembly::LOAD_I64_S:
@@ -266,7 +297,36 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::ATOMIC_RMW_XOR_I64_S:
   case WebAssembly::ATOMIC_RMW_XCHG_I64:
   case WebAssembly::ATOMIC_RMW_XCHG_I64_S:
+  case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
+  case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S:
+  case WebAssembly::ATOMIC_WAIT_I64:
+  case WebAssembly::ATOMIC_WAIT_I64_S:
     return 3;
+  case WebAssembly::LOAD_v16i8:
+  case WebAssembly::LOAD_v16i8_S:
+  case WebAssembly::LOAD_v8i16:
+  case WebAssembly::LOAD_v8i16_S:
+  case WebAssembly::LOAD_v4i32:
+  case WebAssembly::LOAD_v4i32_S:
+  case WebAssembly::LOAD_v2i64:
+  case WebAssembly::LOAD_v2i64_S:
+  case WebAssembly::LOAD_v4f32:
+  case WebAssembly::LOAD_v4f32_S:
+  case WebAssembly::LOAD_v2f64:
+  case WebAssembly::LOAD_v2f64_S:
+  case WebAssembly::STORE_v16i8:
+  case WebAssembly::STORE_v16i8_S:
+  case WebAssembly::STORE_v8i16:
+  case WebAssembly::STORE_v8i16_S:
+  case WebAssembly::STORE_v4i32:
+  case WebAssembly::STORE_v4i32_S:
+  case WebAssembly::STORE_v2i64:
+  case WebAssembly::STORE_v2i64_S:
+  case WebAssembly::STORE_v4f32:
+  case WebAssembly::STORE_v4f32_S:
+  case WebAssembly::STORE_v2f64:
+  case WebAssembly::STORE_v2f64_S:
+    return 4;
   default:
     llvm_unreachable("Only loads and stores have p2align values");
   }
@@ -282,19 +342,14 @@ static const unsigned StoreP2AlignOperandNo = 0;
 
 /// This is used to indicate block signatures.
 enum class ExprType : unsigned {
-  Void      = 0x40,
-  I32       = 0x7F,
-  I64       = 0x7E,
-  F32       = 0x7D,
-  F64       = 0x7C,
-  I8x16     = 0x7B,
-  I16x8     = 0x7A,
-  I32x4     = 0x79,
-  F32x4     = 0x78,
-  B8x16     = 0x77,
-  B16x8     = 0x76,
-  B32x4     = 0x75,
-  ExceptRef = 0x68
+  Void = 0x40,
+  I32 = 0x7F,
+  I64 = 0x7E,
+  F32 = 0x7D,
+  F64 = 0x7C,
+  V128 = 0x7B,
+  ExceptRef = 0x68,
+  Invalid = 0x00
 };
 
 /// Instruction opcodes emitted via means other than CodeGen.
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 5272e188e1d0..50143fb0ece3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -39,70 +39,80 @@ WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
 WebAssemblyTargetWasmStreamer::WebAssemblyTargetWasmStreamer(MCStreamer &S)
     : WebAssemblyTargetStreamer(S) {}
 
-static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
+static void printTypes(formatted_raw_ostream &OS,
+                       ArrayRef<wasm::ValType> Types) {
   bool First = true;
-  for (MVT Type : Types) {
+  for (auto Type : Types) {
     if (First)
       First = false;
     else
       OS << ", ";
-    OS << WebAssembly::TypeToString(Type);
+    OS << WebAssembly::typeToString(Type);
   }
   OS << '\n';
 }
 
-void WebAssemblyTargetAsmStreamer::emitParam(MCSymbol *Symbol,
-                                             ArrayRef<MVT> Types) {
+void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
   if (!Types.empty()) {
-    OS << "\t.param  \t";
-
-    // FIXME: Currently this applies to the "current" function; it may
-    // be cleaner to specify an explicit symbol as part of the directive.
-
-    PrintTypes(OS, Types);
+    OS << "\t.local  \t";
+    printTypes(OS, Types);
   }
 }
 
-void WebAssemblyTargetAsmStreamer::emitResult(MCSymbol *Symbol,
-                                              ArrayRef<MVT> Types) {
-  if (!Types.empty()) {
-    OS << "\t.result \t";
+void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
 
-    // FIXME: Currently this applies to the "current" function; it may
-    // be cleaner to specify an explicit symbol as part of the directive.
+void WebAssemblyTargetAsmStreamer::emitSignature(
+    const wasm::WasmSignature *Sig) {
+  OS << "(";
+  emitParamList(Sig);
+  OS << ") -> (";
+  emitReturnList(Sig);
+  OS << ")";
+}
 
-    PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitParamList(
+    const wasm::WasmSignature *Sig) {
+  auto &Params = Sig->Params;
+  for (auto &Ty : Params) {
+    if (&Ty != &Params[0])
+      OS << ", ";
+    OS << WebAssembly::typeToString(Ty);
   }
 }
 
-void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
-  if (!Types.empty()) {
-    OS << "\t.local  \t";
-    PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitReturnList(
+    const wasm::WasmSignature *Sig) {
+  auto &Returns = Sig->Returns;
+  for (auto &Ty : Returns) {
+    if (&Ty != &Returns[0])
+      OS << ", ";
+    OS << WebAssembly::typeToString(Ty);
   }
 }
 
-void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
+void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) {
+  assert(Sym->isFunction());
+  OS << "\t.functype\t" << Sym->getName() << " ";
+  emitSignature(Sym->getSignature());
+  OS << "\n";
+}
 
-void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
-    MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
-  OS << "\t.functype\t" << Symbol->getName();
-  if (Results.empty())
-    OS << ", void";
-  else {
-    assert(Results.size() == 1);
-    OS << ", " << WebAssembly::TypeToString(Results.front());
-  }
-  for (auto Ty : Params)
-    OS << ", " << WebAssembly::TypeToString(Ty);
-  OS << '\n';
+void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) {
+  assert(Sym->isGlobal());
+  OS << "\t.globaltype\t" << Sym->getName() << ", "
+     << WebAssembly::typeToString(
+            static_cast<wasm::ValType>(Sym->getGlobalType().Type))
+     << '\n';
 }
 
-void WebAssemblyTargetAsmStreamer::emitGlobalImport(StringRef name) {
-  OS << "\t.import_global\t" << name << '\n';
+void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
+  assert(Sym->isEvent());
+  OS << "\t.eventtype\t" << Sym->getName() << " ";
+  emitParamList(Sym->getSignature());
+  OS << "\n";
 }
 
-void WebAssemblyTargetAsmStreamer::emitImportModule(MCSymbolWasm *Sym,
+void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym,
                                                     StringRef ModuleName) {
   OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n';
 }
@@ -111,27 +121,9 @@ void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
   OS << "\t.indidx  \t" << *Value << '\n';
 }
 
-void WebAssemblyTargetWasmStreamer::emitParam(MCSymbol *Symbol,
-                                              ArrayRef<MVT> Types) {
-  SmallVector<wasm::ValType, 4> Params;
-  for (MVT Ty : Types)
-    Params.push_back(WebAssembly::toValType(Ty));
-
-  cast<MCSymbolWasm>(Symbol)->setParams(std::move(Params));
-}
-
-void WebAssemblyTargetWasmStreamer::emitResult(MCSymbol *Symbol,
-                                               ArrayRef<MVT> Types) {
-  SmallVector<wasm::ValType, 4> Returns;
-  for (MVT Ty : Types)
-    Returns.push_back(WebAssembly::toValType(Ty));
-
-  cast<MCSymbolWasm>(Symbol)->setReturns(std::move(Returns));
-}
-
-void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
-  SmallVector<std::pair<MVT, uint32_t>, 4> Grouped;
-  for (MVT Type : Types) {
+void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
+  SmallVector<std::pair<wasm::ValType, uint32_t>, 4> Grouped;
+  for (auto Type : Types) {
     if (Grouped.empty() || Grouped.back().first != Type)
       Grouped.push_back(std::make_pair(Type, 1));
     else
@@ -141,7 +133,7 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
   Streamer.EmitULEB128IntValue(Grouped.size());
   for (auto Pair : Grouped) {
     Streamer.EmitULEB128IntValue(Pair.second);
-    emitValueType(WebAssembly::toValType(Pair.first));
+    emitValueType(Pair.first);
   }
 }
 
@@ -152,34 +144,3 @@ void WebAssemblyTargetWasmStreamer::emitEndFunc() {
 void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
   llvm_unreachable(".indidx encoding not yet implemented");
 }
-
-void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
-    MCSymbol *Symbol, SmallVectorImpl<MVT> &Params,
-    SmallVectorImpl<MVT> &Results) {
-  MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Symbol);
-  if (WasmSym->isFunction()) {
-    // Symbol already has its arguments and result set.
-    return;
-  }
-
-  SmallVector<wasm::ValType, 4> ValParams;
-  for (MVT Ty : Params)
-    ValParams.push_back(WebAssembly::toValType(Ty));
-
-  SmallVector<wasm::ValType, 1> ValResults;
-  for (MVT Ty : Results)
-    ValResults.push_back(WebAssembly::toValType(Ty));
-
-  WasmSym->setParams(std::move(ValParams));
-  WasmSym->setReturns(std::move(ValResults));
-  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
-}
-
-void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
-  llvm_unreachable(".global_import is not needed for direct wasm output");
-}
-
-void WebAssemblyTargetWasmStreamer::emitImportModule(MCSymbolWasm *Sym,
-                                                     StringRef ModuleName) {
-  Sym->setModuleName(ModuleName);
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index cafcb04ccd11..3073938118b4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -31,24 +31,21 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer {
 public:
   explicit WebAssemblyTargetStreamer(MCStreamer &S);
 
-  /// .param
-  virtual void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
-  /// .result
-  virtual void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
   /// .local
-  virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+  virtual void emitLocal(ArrayRef<wasm::ValType> Types) = 0;
   /// .endfunc
   virtual void emitEndFunc() = 0;
   /// .functype
-  virtual void emitIndirectFunctionType(MCSymbol *Symbol,
-                                        SmallVectorImpl<MVT> &Params,
-                                        SmallVectorImpl<MVT> &Results) = 0;
+  virtual void emitFunctionType(const MCSymbolWasm *Sym) = 0;
   /// .indidx
   virtual void emitIndIdx(const MCExpr *Value) = 0;
-  /// .import_global
-  virtual void emitGlobalImport(StringRef name) = 0;
+  /// .globaltype
+  virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0;
+  /// .eventtype
+  virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
   /// .import_module
-  virtual void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) = 0;
+  virtual void emitImportModule(const MCSymbolWasm *Sym,
+                                StringRef ModuleName) = 0;
 
 protected:
   void emitValueType(wasm::ValType Type);
@@ -57,20 +54,20 @@ protected:
 /// This part is for ascii assembly output
 class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
   formatted_raw_ostream &OS;
+  void emitSignature(const wasm::WasmSignature *Sig);
+  void emitParamList(const wasm::WasmSignature *Sig);
+  void emitReturnList(const wasm::WasmSignature *Sig);
 
 public:
   WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
 
-  void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
-  void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
-  void emitLocal(ArrayRef<MVT> Types) override;
+  void emitLocal(ArrayRef<wasm::ValType> Types) override;
   void emitEndFunc() override;
-  void emitIndirectFunctionType(MCSymbol *Symbol,
-                                SmallVectorImpl<MVT> &Params,
-                                SmallVectorImpl<MVT> &Results) override;
+  void emitFunctionType(const MCSymbolWasm *Sym) override;
   void emitIndIdx(const MCExpr *Value) override;
-  void emitGlobalImport(StringRef name) override;
-  void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) override;
+  void emitGlobalType(const MCSymbolWasm *Sym) override;
+  void emitEventType(const MCSymbolWasm *Sym) override;
+  void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override;
 };
 
 /// This part is for Wasm object output
@@ -78,16 +75,29 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
 public:
   explicit WebAssemblyTargetWasmStreamer(MCStreamer &S);
 
-  void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
-  void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
-  void emitLocal(ArrayRef<MVT> Types) override;
+  void emitLocal(ArrayRef<wasm::ValType> Types) override;
   void emitEndFunc() override;
-  void emitIndirectFunctionType(MCSymbol *Symbol,
-                                SmallVectorImpl<MVT> &Params,
-                                SmallVectorImpl<MVT> &Results) override;
+  void emitFunctionType(const MCSymbolWasm *Sym) override {}
   void emitIndIdx(const MCExpr *Value) override;
-  void emitGlobalImport(StringRef name) override;
-  void emitImportModule(MCSymbolWasm *Sym, StringRef ModuleName) override;
+  void emitGlobalType(const MCSymbolWasm *Sym) override {}
+  void emitEventType(const MCSymbolWasm *Sym) override {}
+  void emitImportModule(const MCSymbolWasm *Sym,
+                        StringRef ModuleName) override {}
+};
+
+/// This part is for null output
+class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer {
+public:
+  explicit WebAssemblyTargetNullStreamer(MCStreamer &S)
+      : WebAssemblyTargetStreamer(S) {}
+
+  void emitLocal(ArrayRef<wasm::ValType>) override {}
+  void emitEndFunc() override {}
+  void emitFunctionType(const MCSymbolWasm *) override {}
+  void emitIndIdx(const MCExpr *) override {}
+  void emitGlobalType(const MCSymbolWasm *) override {}
+  void emitEventType(const MCSymbolWasm *) override {}
+  void emitImportModule(const MCSymbolWasm *, StringRef) override {}
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 4fb12d40b01b..763e30be8e02 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -81,16 +81,23 @@ static const MCSection *GetFixupSection(const MCExpr *Expr) {
   return nullptr;
 }
 
-unsigned
-WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
-                                          const MCFixup &Fixup) const {
+static bool IsGlobalType(const MCValue &Target) {
+  const MCSymbolRefExpr *RefA = Target.getSymA();
+  return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_GLOBAL;
+}
+
+static bool IsEventType(const MCValue &Target) {
+  const MCSymbolRefExpr *RefA = Target.getSymA();
+  return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_EVENT;
+}
+
+unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
+                                                   const MCFixup &Fixup) const {
   // WebAssembly functions are not allocated in the data address space. To
   // resolve a pointer to a function, we must use a special relocation type.
   bool IsFunction = IsFunctionExpr(Fixup.getValue());
 
   switch (unsigned(Fixup.getKind())) {
-  case WebAssembly::fixup_code_global_index:
-    return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
   case WebAssembly::fixup_code_sleb128_i32:
     if (IsFunction)
       return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
@@ -98,10 +105,14 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
   case WebAssembly::fixup_code_sleb128_i64:
     llvm_unreachable("fixup_sleb128_i64 not implemented yet");
   case WebAssembly::fixup_code_uleb128_i32:
+    if (IsGlobalType(Target))
+      return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
     if (IsFunctionType(Target))
       return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
     if (IsFunction)
       return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
+    if (IsEventType(Target))
+      return wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB;
     return wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB;
   case FK_Data_4:
     if (IsFunction)
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index ef0099f07efb..a154b4bf7ea8 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -94,10 +94,10 @@ WebAssemblyTargetLowering.
 //===---------------------------------------------------------------------===//
 
 Instead of the OptimizeReturned pass, which should consider preserving the
-"returned" attribute through to MachineInstrs and extending the StoreResults
-pass to do this optimization on calls too. That would also let the
-WebAssemblyPeephole pass clean up dead defs for such calls, as it does for
-stores.
+"returned" attribute through to MachineInstrs and extending the
+MemIntrinsicResults pass to do this optimization on calls too. That would also
+let the WebAssemblyPeephole pass clean up dead defs for such calls, as it does
+for stores.
 
 //===---------------------------------------------------------------------===//
 
@@ -120,8 +120,8 @@ code like this:
 It could be done with a smaller encoding like this:
 
     i32.const   $push5=, 0
-    tee_local   $push6=, $4=, $pop5
-    copy_local  $3=, $pop6
+    local.tee   $push6=, $4=, $pop5
+    local.copy  $3=, $pop6
 
 //===---------------------------------------------------------------------===//
 
@@ -180,11 +180,11 @@ floating-point constants.
 //===---------------------------------------------------------------------===//
 
 The function @dynamic_alloca_redzone in test/CodeGen/WebAssembly/userstack.ll
-ends up with a tee_local in its prolog which has an unused result, requiring
+ends up with a local.tee in its prolog which has an unused result, requiring
 an extra drop:
 
-    get_global  $push8=, 0
-    tee_local   $push9=, 1, $pop8
+    global.get  $push8=, 0
+    local.tee   $push9=, 1, $pop8
     drop        $pop9
     [...]
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 05b7b21fb597..45145c0a6527 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -39,10 +39,11 @@ FunctionPass *createWebAssemblyArgumentMove();
 FunctionPass *createWebAssemblySetP2AlignOperands();
 
 // Late passes.
+FunctionPass *createWebAssemblyEHRestoreStackPointer();
 FunctionPass *createWebAssemblyReplacePhysRegs();
 FunctionPass *createWebAssemblyPrepareForLiveIntervals();
 FunctionPass *createWebAssemblyOptimizeLiveIntervals();
-FunctionPass *createWebAssemblyStoreResults();
+FunctionPass *createWebAssemblyMemIntrinsicResults();
 FunctionPass *createWebAssemblyRegStackify();
 FunctionPass *createWebAssemblyRegColoring();
 FunctionPass *createWebAssemblyExplicitLocals();
@@ -63,10 +64,11 @@ void initializeFixFunctionBitcastsPass(PassRegistry &);
 void initializeOptimizeReturnedPass(PassRegistry &);
 void initializeWebAssemblyArgumentMovePass(PassRegistry &);
 void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &);
+void initializeWebAssemblyEHRestoreStackPointerPass(PassRegistry &);
 void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &);
 void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &);
 void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
-void initializeWebAssemblyStoreResultsPass(PassRegistry &);
+void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
 void initializeWebAssemblyRegStackifyPass(PassRegistry &);
 void initializeWebAssemblyRegColoringPass(PassRegistry &);
 void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index 2f301da8e422..6b218f8aa880 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -23,8 +23,15 @@ include "llvm/Target/Target.td"
 // WebAssembly Subtarget features.
 //===----------------------------------------------------------------------===//
 
-def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true",
+def FeatureSIMD128 : SubtargetFeature<"simd128", "SIMDLevel", "SIMD128",
                                       "Enable 128-bit SIMD">;
+
+def FeatureUnimplementedSIMD128 :
+      SubtargetFeature<"unimplemented-simd128",
+                       "SIMDLevel", "UnimplementedSIMD128",
+                       "Enable 128-bit SIMD not yet implemented in engines",
+                       [FeatureSIMD128]>;
+
 def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
                                       "Enable Atomics">;
 def FeatureNontrappingFPToInt :
@@ -71,7 +78,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
 
 // Latest and greatest experimental version of WebAssembly. Bugs included!
 def : ProcessorModel<"bleeding-edge", NoSchedModel,
-                      [FeatureSIMD128, FeatureAtomics]>;
+                      [FeatureSIMD128, FeatureAtomics,
+                       FeatureNontrappingFPToInt, FeatureSignExt]>;
 
 //===----------------------------------------------------------------------===//
 // Target Declaration
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index 4af9cd150bf7..e49e2b67f435 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -24,10 +24,10 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "wasm-add-missing-prototypes"
@@ -60,16 +60,17 @@ ModulePass *llvm::createWebAssemblyAddMissingPrototypes() {
 }
 
 bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
-  LLVM_DEBUG(dbgs() << "runnning AddMissingPrototypes\n");
+  LLVM_DEBUG(dbgs() << "********** Add Missing Prototypes **********\n");
 
-  std::vector<std::pair<Function*, Function*>> Replacements;
+  std::vector<std::pair<Function *, Function *>> Replacements;
 
   // Find all the prototype-less function declarations
   for (Function &F : M) {
     if (!F.isDeclaration() || !F.hasFnAttribute("no-prototype"))
       continue;
 
-    LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName() << "\n");
+    LLVM_DEBUG(dbgs() << "Found no-prototype function: " << F.getName()
+                      << "\n");
 
     // When clang emits prototype-less C functions it uses (...), i.e. varargs
     // function that take no arguments (have no sentinel).  When we see a
@@ -83,23 +84,29 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
           "Functions with 'no-prototype' attribute should not have params: " +
           F.getName());
 
-
     // Create a function prototype based on the first call site (first bitcast)
     // that we find.
     FunctionType *NewType = nullptr;
-    Function* NewF = nullptr;
+    Function *NewF = nullptr;
     for (Use &U : F.uses()) {
       LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
-      if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser())) {
-        FunctionType *DestType =
-            cast<FunctionType>(BC->getDestTy()->getPointerElementType());
-
-        // Create a new function with the correct type
-        NewType = DestType;
-        NewF = Function::Create(NewType, F.getLinkage(), F.getName());
-        NewF->setAttributes(F.getAttributes());
-        NewF->removeFnAttr("no-prototype");
-        break;
+      if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
+        if (auto *DestType = dyn_cast<FunctionType>(
+                BC->getDestTy()->getPointerElementType())) {
+          if (!NewType) {
+            // Create a new function with the correct type
+            NewType = DestType;
+            NewF = Function::Create(NewType, F.getLinkage(), F.getName());
+            NewF->setAttributes(F.getAttributes());
+            NewF->removeFnAttr("no-prototype");
+          } else {
+            if (NewType != DestType) {
+              report_fatal_error("Prototypeless function used with "
+                                 "conflicting signatures: " +
+                                 F.getName());
+            }
+          }
+        }
       }
     }
 
@@ -110,32 +117,42 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
       continue;
     }
 
-    for (Use &U : F.uses()) {
-      if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser())) {
-        FunctionType *DestType =
-            cast<FunctionType>(BC->getDestTy()->getPointerElementType());
-        if (NewType != DestType) {
-          report_fatal_error(
-              "Prototypeless function used with conflicting signatures: " +
-              F.getName());
-        }
-        BC->replaceAllUsesWith(NewF);
-        Replacements.emplace_back(&F, NewF);
-      } else {
-        dbgs() << *U.getUser()->getType() << "\n";
+    SmallVector<Instruction *, 4> DeadInsts;
+
+    for (Use &US : F.uses()) {
+      User *U = US.getUser();
+      if (auto *BC = dyn_cast<BitCastOperator>(U)) {
+        if (auto *Inst = dyn_cast<BitCastInst>(U)) {
+          // Replace with a new bitcast
+          IRBuilder<> Builder(Inst);
+          Value *NewCast = Builder.CreatePointerCast(NewF, BC->getDestTy());
+          Inst->replaceAllUsesWith(NewCast);
+          DeadInsts.push_back(Inst);
+        } else if (auto *Const = dyn_cast<ConstantExpr>(U)) {
+          Constant *NewConst =
+              ConstantExpr::getPointerCast(NewF, BC->getDestTy());
+          Const->replaceAllUsesWith(NewConst);
+        } else {
+          dbgs() << *U->getType() << "\n";
 #ifndef NDEBUG
-        U.getUser()->dump();
+          U->dump();
 #endif
-        report_fatal_error(
-            "unexpected use of prototypeless function: " + F.getName() + "\n");
+          report_fatal_error("unexpected use of prototypeless function: " +
+                             F.getName() + "\n");
+        }
       }
     }
+
+    for (auto I : DeadInsts)
+      I->eraseFromParent();
+    Replacements.emplace_back(&F, NewF);
   }
 
+
   // Finally replace the old function declarations with the new ones
   for (auto &Pair : Replacements) {
-    Function* Old = Pair.first;
-    Function* New = Pair.second;
+    Function *Old = Pair.first;
+    Function *New = Pair.second;
     Old->eraseFromParent();
     M.getFunctionList().push_back(New);
   }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 1f280e1d13fc..c4f03dfa7f9e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -50,7 +50,7 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
   const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
   for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
-                MVT::v4i32, MVT::v4f32})
+                MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64})
     if (TRI->isTypeLegalForClass(*TRC, T))
       return T;
   LLVM_DEBUG(errs() << "Unknown type for register number: " << RegNo);
@@ -78,24 +78,45 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
 //===----------------------------------------------------------------------===//
 
 void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  for (auto &It : OutContext.getSymbols()) {
+    // Emit a .globaltype and .eventtype declaration.
+    auto Sym = cast<MCSymbolWasm>(It.getValue());
+    if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL)
+      getTargetStreamer()->emitGlobalType(Sym);
+    else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_EVENT)
+      getTargetStreamer()->emitEventType(Sym);
+  }
+
   for (const auto &F : M) {
     // Emit function type info for all undefined functions
     if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
       SmallVector<MVT, 4> Results;
       SmallVector<MVT, 4> Params;
-      ComputeSignatureVTs(F, TM, Params, Results);
-      MCSymbol *Sym = getSymbol(&F);
-      getTargetStreamer()->emitIndirectFunctionType(Sym, Params, Results);
+      ComputeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
+      auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
+      Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+      if (!Sym->getSignature()) {
+        auto Signature = SignatureFromMVTs(Results, Params);
+        Sym->setSignature(Signature.get());
+        addSignature(std::move(Signature));
+      }
+      // FIXME: this was originally intended for post-linking and was only used
+      // for imports that were only called indirectly (i.e. s2wasm could not
+      // infer the type from a call). With object files it applies to all
+      // imports. so fix the names and the tests, or rethink how import
+      // delcarations work in asm files.
+      getTargetStreamer()->emitFunctionType(Sym);
 
       if (TM.getTargetTriple().isOSBinFormatWasm() &&
           F.hasFnAttribute("wasm-import-module")) {
-        MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
-        StringRef Name = F.getFnAttribute("wasm-import-module")
-                             .getValueAsString();
-        getTargetStreamer()->emitImportModule(WasmSym, Name);
+        StringRef Name =
+            F.getFnAttribute("wasm-import-module").getValueAsString();
+        Sym->setModuleName(Name);
+        getTargetStreamer()->emitImportModule(Sym, Name);
       }
     }
   }
+
   for (const auto &G : M.globals()) {
     if (!G.hasInitializer() && G.hasExternalLinkage()) {
       if (G.getValueType()->isSized()) {
@@ -137,10 +158,18 @@ void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
 }
 
 void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
-  getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams());
-
-  SmallVector<MVT, 4> ResultVTs;
   const Function &F = MF->getFunction();
+  SmallVector<MVT, 1> ResultVTs;
+  SmallVector<MVT, 4> ParamVTs;
+  ComputeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
+  auto Signature = SignatureFromMVTs(ResultVTs, ParamVTs);
+  auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
+  WasmSym->setSignature(Signature.get());
+  addSignature(std::move(Signature));
+  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+
+  // FIXME: clean up how params and results are emitted (use signatures)
+  getTargetStreamer()->emitFunctionType(WasmSym);
 
   // Emit the function index.
   if (MDNode *Idx = F.getMetadata("wasm.index")) {
@@ -150,16 +179,9 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
         cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue()));
   }
 
-  ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
-
-  // If the return type needs to be legalized it will get converted into
-  // passing a pointer.
-  if (ResultVTs.size() == 1)
-    getTargetStreamer()->emitResult(CurrentFnSym, ResultVTs);
-  else
-    getTargetStreamer()->emitResult(CurrentFnSym, ArrayRef<MVT>());
-
-  getTargetStreamer()->emitLocal(MFI->getLocals());
+  SmallVector<wasm::ValType, 16> Locals;
+  ValTypesFromMVTs(MFI->getLocals(), Locals);
+  getTargetStreamer()->emitLocal(Locals);
 
   AsmPrinter::EmitFunctionBodyStart();
 }
@@ -168,42 +190,63 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
 
   switch (MI->getOpcode()) {
-  case WebAssembly::ARGUMENT_I32:
-  case WebAssembly::ARGUMENT_I64:
-  case WebAssembly::ARGUMENT_F32:
-  case WebAssembly::ARGUMENT_F64:
+  case WebAssembly::ARGUMENT_i32:
+  case WebAssembly::ARGUMENT_i32_S:
+  case WebAssembly::ARGUMENT_i64:
+  case WebAssembly::ARGUMENT_i64_S:
+  case WebAssembly::ARGUMENT_f32:
+  case WebAssembly::ARGUMENT_f32_S:
+  case WebAssembly::ARGUMENT_f64:
+  case WebAssembly::ARGUMENT_f64_S:
   case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v16i8_S:
   case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v8i16_S:
   case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4i32_S:
+  case WebAssembly::ARGUMENT_v2i64:
+  case WebAssembly::ARGUMENT_v2i64_S:
   case WebAssembly::ARGUMENT_v4f32:
+  case WebAssembly::ARGUMENT_v4f32_S:
+  case WebAssembly::ARGUMENT_v2f64:
+  case WebAssembly::ARGUMENT_v2f64_S:
     // These represent values which are live into the function entry, so there's
     // no instruction to emit.
     break;
   case WebAssembly::FALLTHROUGH_RETURN_I32:
+  case WebAssembly::FALLTHROUGH_RETURN_I32_S:
   case WebAssembly::FALLTHROUGH_RETURN_I64:
+  case WebAssembly::FALLTHROUGH_RETURN_I64_S:
   case WebAssembly::FALLTHROUGH_RETURN_F32:
+  case WebAssembly::FALLTHROUGH_RETURN_F32_S:
   case WebAssembly::FALLTHROUGH_RETURN_F64:
+  case WebAssembly::FALLTHROUGH_RETURN_F64_S:
   case WebAssembly::FALLTHROUGH_RETURN_v16i8:
+  case WebAssembly::FALLTHROUGH_RETURN_v16i8_S:
   case WebAssembly::FALLTHROUGH_RETURN_v8i16:
+  case WebAssembly::FALLTHROUGH_RETURN_v8i16_S:
   case WebAssembly::FALLTHROUGH_RETURN_v4i32:
-  case WebAssembly::FALLTHROUGH_RETURN_v4f32: {
+  case WebAssembly::FALLTHROUGH_RETURN_v4i32_S:
+  case WebAssembly::FALLTHROUGH_RETURN_v2i64:
+  case WebAssembly::FALLTHROUGH_RETURN_v2i64_S:
+  case WebAssembly::FALLTHROUGH_RETURN_v4f32:
+  case WebAssembly::FALLTHROUGH_RETURN_v4f32_S:
+  case WebAssembly::FALLTHROUGH_RETURN_v2f64:
+  case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: {
     // These instructions represent the implicit return at the end of a
-    // function body. The operand is always a pop.
-    assert(MFI->isVRegStackified(MI->getOperand(0).getReg()));
-
+    // function body. Always pops one value off the stack.
     if (isVerbose()) {
-      OutStreamer->AddComment("fallthrough-return: $pop" +
-                              Twine(MFI->getWARegStackId(
-                                  MFI->getWAReg(MI->getOperand(0).getReg()))));
+      OutStreamer->AddComment("fallthrough-return-value");
       OutStreamer->AddBlankLine();
     }
     break;
   }
   case WebAssembly::FALLTHROUGH_RETURN_VOID:
+  case WebAssembly::FALLTHROUGH_RETURN_VOID_S:
     // This instruction represents the implicit return at the end of a
     // function body with no return value.
     if (isVerbose()) {
-      OutStreamer->AddComment("fallthrough-return");
+      OutStreamer->AddComment("fallthrough-return-void");
       OutStreamer->AddBlankLine();
     }
     break;
@@ -244,6 +287,9 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
       OS << MO.getImm();
       return false;
     case MachineOperand::MO_Register:
+      // FIXME: only opcode that still contains registers, as required by
+      // MachineInstr::getDebugVariable().
+      assert(MI->getOpcode() == WebAssembly::INLINEASM);
       OS << regToString(MO);
       return false;
     case MachineOperand::MO_GlobalAddress:
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index 23817b4e5126..f6cb5610bad3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -25,18 +25,23 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
   const WebAssemblySubtarget *Subtarget;
   const MachineRegisterInfo *MRI;
   WebAssemblyFunctionInfo *MFI;
+  // TODO: Do the uniquing of Signatures here instead of ObjectFileWriter?
+  std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
 
 public:
   explicit WebAssemblyAsmPrinter(TargetMachine &TM,
                                  std::unique_ptr<MCStreamer> Streamer)
-      : AsmPrinter(TM, std::move(Streamer)),
-        Subtarget(nullptr), MRI(nullptr), MFI(nullptr) {}
+      : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr), MRI(nullptr),
+        MFI(nullptr) {}
 
   StringRef getPassName() const override {
     return "WebAssembly Assembly Printer";
   }
 
   const WebAssemblySubtarget &getSubtarget() const { return *Subtarget; }
+  void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
+    Signatures.push_back(std::move(Sig));
+  }
 
   //===------------------------------------------------------------------===//
   // MachineFunctionPass Implementation.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 267a51433cd1..fc827e9d5780 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -11,14 +11,15 @@
 /// This file implements a CFG sorting pass.
 ///
 /// This pass reorders the blocks in a function to put them into topological
-/// order, ignoring loop backedges, and without any loop being interrupted
-/// by a block not dominated by the loop header, with special care to keep the
-/// order as similar as possible to the original order.
+/// order, ignoring loop backedges, and without any loop or exception being
+/// interrupted by a block not dominated by the its header, with special care
+/// to keep the order as similar as possible to the original order.
 ///
 ////===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
+#include "WebAssemblyExceptionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
 #include "llvm/ADT/PriorityQueue.h"
@@ -35,6 +36,73 @@ using namespace llvm;
 #define DEBUG_TYPE "wasm-cfg-sort"
 
 namespace {
+
+// Wrapper for loops and exceptions
+class Region {
+public:
+  virtual ~Region() = default;
+  virtual MachineBasicBlock *getHeader() const = 0;
+  virtual bool contains(const MachineBasicBlock *MBB) const = 0;
+  virtual unsigned getNumBlocks() const = 0;
+  using block_iterator = typename ArrayRef<MachineBasicBlock *>::const_iterator;
+  virtual iterator_range<block_iterator> blocks() const = 0;
+  virtual bool isLoop() const = 0;
+};
+
+template <typename T> class ConcreteRegion : public Region {
+  const T *Region;
+
+public:
+  ConcreteRegion(const T *Region) : Region(Region) {}
+  MachineBasicBlock *getHeader() const override { return Region->getHeader(); }
+  bool contains(const MachineBasicBlock *MBB) const override {
+    return Region->contains(MBB);
+  }
+  unsigned getNumBlocks() const override { return Region->getNumBlocks(); }
+  iterator_range<block_iterator> blocks() const override {
+    return Region->blocks();
+  }
+  bool isLoop() const override { return false; }
+};
+
+template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; }
+
+// This class has information of nested Regions; this is analogous to what
+// LoopInfo is for loops.
+class RegionInfo {
+  const MachineLoopInfo &MLI;
+  const WebAssemblyExceptionInfo &WEI;
+  std::vector<const Region *> Regions;
+  DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap;
+  DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap;
+
+public:
+  RegionInfo(const MachineLoopInfo &MLI, const WebAssemblyExceptionInfo &WEI)
+      : MLI(MLI), WEI(WEI) {}
+
+  // Returns a smallest loop or exception that contains MBB
+  const Region *getRegionFor(const MachineBasicBlock *MBB) {
+    const auto *ML = MLI.getLoopFor(MBB);
+    const auto *WE = WEI.getExceptionFor(MBB);
+    if (!ML && !WE)
+      return nullptr;
+    if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) {
+      // If the smallest region containing MBB is a loop
+      if (LoopMap.count(ML))
+        return LoopMap[ML].get();
+      LoopMap[ML] = llvm::make_unique<ConcreteRegion<MachineLoop>>(ML);
+      return LoopMap[ML].get();
+    } else {
+      // If the smallest region containing MBB is an exception
+      if (ExceptionMap.count(WE))
+        return ExceptionMap[WE].get();
+      ExceptionMap[WE] =
+          llvm::make_unique<ConcreteRegion<WebAssemblyException>>(WE);
+      return ExceptionMap[WE].get();
+    }
+  }
+};
+
 class WebAssemblyCFGSort final : public MachineFunctionPass {
   StringRef getPassName() const override { return "WebAssembly CFG Sort"; }
 
@@ -44,6 +112,8 @@ class WebAssemblyCFGSort final : public MachineFunctionPass {
     AU.addPreserved<MachineDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
     AU.addPreserved<MachineLoopInfo>();
+    AU.addRequired<WebAssemblyExceptionInfo>();
+    AU.addPreserved<WebAssemblyExceptionInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -81,10 +151,48 @@ static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
 }
 
 namespace {
+// EH pads are selected first regardless of the block comparison order.
+// When only one of the BBs is an EH pad, we give a higher priority to it, to
+// prevent common mismatches between possibly throwing calls and ehpads they
+// unwind to, as in the example below:
+//
+// bb0:
+//   call @foo      // If this throws, unwind to bb2
+// bb1:
+//   call @bar      // If this throws, unwind to bb3
+// bb2 (ehpad):
+//   handler_bb2
+// bb3 (ehpad):
+//   handler_bb3
+// continuing code
+//
+// Because this pass tries to preserve the original BB order, this order will
+// not change. But this will result in this try-catch structure in CFGStackify,
+// resulting in a mismatch:
+// try
+//   try
+//     call @foo
+//     call @bar    // This should unwind to bb3, not bb2!
+//   catch
+//     handler_bb2
+//   end
+// catch
+//   handler_bb3
+// end
+// continuing code
+//
+// If we give a higher priority to an EH pad whenever it is ready in this
+// example, when both bb1 and bb2 are ready, we would pick up bb2 first.
+
 /// Sort blocks by their number.
 struct CompareBlockNumbers {
   bool operator()(const MachineBasicBlock *A,
                   const MachineBasicBlock *B) const {
+    if (A->isEHPad() && !B->isEHPad())
+      return false;
+    if (!A->isEHPad() && B->isEHPad())
+      return true;
+
     return A->getNumber() > B->getNumber();
   }
 };
@@ -92,29 +200,36 @@ struct CompareBlockNumbers {
 struct CompareBlockNumbersBackwards {
   bool operator()(const MachineBasicBlock *A,
                   const MachineBasicBlock *B) const {
+    // We give a higher priority to an EH pad
+    if (A->isEHPad() && !B->isEHPad())
+      return false;
+    if (!A->isEHPad() && B->isEHPad())
+      return true;
+
     return A->getNumber() < B->getNumber();
   }
 };
-/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
-/// by the loop header among the loop's blocks.
+/// Bookkeeping for a region to help ensure that we don't mix blocks not
+/// dominated by the its header among its blocks.
 struct Entry {
-  const MachineLoop *Loop;
+  const Region *TheRegion;
   unsigned NumBlocksLeft;
 
   /// List of blocks not dominated by Loop's header that are deferred until
   /// after all of Loop's blocks have been seen.
   std::vector<MachineBasicBlock *> Deferred;
 
-  explicit Entry(const MachineLoop *L)
-      : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
+  explicit Entry(const class Region *R)
+      : TheRegion(R), NumBlocksLeft(R->getNumBlocks()) {}
 };
 } // end anonymous namespace
 
-/// Sort the blocks, taking special care to make sure that loops are not
+/// Sort the blocks, taking special care to make sure that regions are not
 /// interrupted by blocks not dominated by their header.
 /// TODO: There are many opportunities for improving the heuristics here.
 /// Explore them.
 static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+                       const WebAssemblyExceptionInfo &WEI,
                        const MachineDominatorTree &MDT) {
   // Prepare for a topological sort: Record the number of predecessors each
   // block has, ignoring loop backedges.
@@ -131,35 +246,39 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
   }
 
   // Topological sort the CFG, with additional constraints:
-  //  - Between a loop header and the last block in the loop, there can be
-  //    no blocks not dominated by the loop header.
+  //  - Between a region header and the last block in the region, there can be
+  //    no blocks not dominated by its header.
   //  - It's desirable to preserve the original block order when possible.
   // We use two ready lists; Preferred and Ready. Preferred has recently
   // processed successors, to help preserve block sequences from the original
-  // order. Ready has the remaining ready blocks.
+  // order. Ready has the remaining ready blocks. EH blocks are picked first
+  // from both queues.
   PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
                 CompareBlockNumbers>
       Preferred;
   PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
                 CompareBlockNumbersBackwards>
       Ready;
-  SmallVector<Entry, 4> Loops;
+
+  RegionInfo SUI(MLI, WEI);
+  SmallVector<Entry, 4> Entries;
   for (MachineBasicBlock *MBB = &MF.front();;) {
-    const MachineLoop *L = MLI.getLoopFor(MBB);
-    if (L) {
-      // If MBB is a loop header, add it to the active loop list. We can't put
-      // any blocks that it doesn't dominate until we see the end of the loop.
-      if (L->getHeader() == MBB)
-        Loops.push_back(Entry(L));
-      // For each active loop the block is in, decrement the count. If MBB is
-      // the last block in an active loop, take it off the list and pick up any
-      // blocks deferred because the header didn't dominate them.
-      for (Entry &E : Loops)
-        if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
+    const Region *R = SUI.getRegionFor(MBB);
+    if (R) {
+      // If MBB is a region header, add it to the active region list. We can't
+      // put any blocks that it doesn't dominate until we see the end of the
+      // region.
+      if (R->getHeader() == MBB)
+        Entries.push_back(Entry(R));
+      // For each active region the block is in, decrement the count. If MBB is
+      // the last block in an active region, take it off the list and pick up
+      // any blocks deferred because the header didn't dominate them.
+      for (Entry &E : Entries)
+        if (E.TheRegion->contains(MBB) && --E.NumBlocksLeft == 0)
           for (auto DeferredBlock : E.Deferred)
             Ready.push(DeferredBlock);
-      while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
-        Loops.pop_back();
+      while (!Entries.empty() && Entries.back().NumBlocksLeft == 0)
+        Entries.pop_back();
     }
     // The main topological sort logic.
     for (MachineBasicBlock *Succ : MBB->successors()) {
@@ -177,19 +296,19 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
     while (!Preferred.empty()) {
       Next = Preferred.top();
       Preferred.pop();
-      // If X isn't dominated by the top active loop header, defer it until that
-      // loop is done.
-      if (!Loops.empty() &&
-          !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
-        Loops.back().Deferred.push_back(Next);
+      // If X isn't dominated by the top active region header, defer it until
+      // that region is done.
+      if (!Entries.empty() &&
+          !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) {
+        Entries.back().Deferred.push_back(Next);
         Next = nullptr;
         continue;
       }
       // If Next was originally ordered before MBB, and it isn't because it was
       // loop-rotated above the header, it's not preferred.
       if (Next->getNumber() < MBB->getNumber() &&
-          (!L || !L->contains(Next) ||
-           L->getHeader()->getNumber() < Next->getNumber())) {
+          (!R || !R->contains(Next) ||
+           R->getHeader()->getNumber() < Next->getNumber())) {
         Ready.push(Next);
         Next = nullptr;
         continue;
@@ -207,11 +326,11 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
       for (;;) {
         Next = Ready.top();
         Ready.pop();
-        // If Next isn't dominated by the top active loop header, defer it until
-        // that loop is done.
-        if (!Loops.empty() &&
-            !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
-          Loops.back().Deferred.push_back(Next);
+        // If Next isn't dominated by the top active region header, defer it
+        // until that region is done.
+        if (!Entries.empty() &&
+            !MDT.dominates(Entries.back().TheRegion->getHeader(), Next)) {
+          Entries.back().Deferred.push_back(Next);
           continue;
         }
         break;
@@ -222,11 +341,11 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
     MaybeUpdateTerminator(MBB);
     MBB = Next;
   }
-  assert(Loops.empty() && "Active loop list not finished");
+  assert(Entries.empty() && "Active sort region list not finished");
   MF.RenumberBlocks();
 
 #ifndef NDEBUG
-  SmallSetVector<MachineLoop *, 8> OnStack;
+  SmallSetVector<const Region *, 8> OnStack;
 
   // Insert a sentinel representing the degenerate loop that starts at the
   // function entry block and includes the entire function as a "loop" that
@@ -235,29 +354,39 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
 
   for (auto &MBB : MF) {
     assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+    const Region *Region = SUI.getRegionFor(&MBB);
+
+    if (Region && &MBB == Region->getHeader()) {
+      if (Region->isLoop()) {
+        // Loop header. The loop predecessor should be sorted above, and the
+        // other predecessors should be backedges below.
+        for (auto Pred : MBB.predecessors())
+          assert(
+              (Pred->getNumber() < MBB.getNumber() || Region->contains(Pred)) &&
+              "Loop header predecessors must be loop predecessors or "
+              "backedges");
+      } else {
+        // Not a loop header. All predecessors should be sorted above.
+        for (auto Pred : MBB.predecessors())
+          assert(Pred->getNumber() < MBB.getNumber() &&
+                 "Non-loop-header predecessors should be topologically sorted");
+      }
+      assert(OnStack.insert(Region) &&
+             "Regions should be declared at most once.");
 
-    MachineLoop *Loop = MLI.getLoopFor(&MBB);
-    if (Loop && &MBB == Loop->getHeader()) {
-      // Loop header. The loop predecessor should be sorted above, and the other
-      // predecessors should be backedges below.
-      for (auto Pred : MBB.predecessors())
-        assert(
-            (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
-            "Loop header predecessors must be loop predecessors or backedges");
-      assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
     } else {
       // Not a loop header. All predecessors should be sorted above.
       for (auto Pred : MBB.predecessors())
         assert(Pred->getNumber() < MBB.getNumber() &&
                "Non-loop-header predecessors should be topologically sorted");
-      assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
-             "Blocks must be nested in their loops");
+      assert(OnStack.count(SUI.getRegionFor(&MBB)) &&
+             "Blocks must be nested in their regions");
     }
     while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))
       OnStack.pop_back();
   }
   assert(OnStack.pop_back_val() == nullptr &&
-         "The function entry block shouldn't actually be a loop header");
+         "The function entry block shouldn't actually be a region header");
   assert(OnStack.empty() &&
          "Control flow stack pushes and pops should be balanced.");
 #endif
@@ -269,12 +398,13 @@ bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
                     << MF.getName() << '\n');
 
   const auto &MLI = getAnalysis<MachineLoopInfo>();
+  const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
   auto &MDT = getAnalysis<MachineDominatorTree>();
   // Liveness is not tracked for VALUE_STACK physreg.
   MF.getRegInfo().invalidateLiveness();
 
-  // Sort the blocks, with contiguous loops.
-  SortBlocks(MF, MLI, MDT);
+  // Sort the blocks, with contiguous sort regions.
+  SortBlocks(MF, MLI, WEI, MDT);
 
   return true;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 70ce40cefed7..f8f5f4040c86 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -10,16 +10,21 @@
 /// \file
 /// This file implements a CFG stacking pass.
 ///
-/// This pass inserts BLOCK and LOOP markers to mark the start of scopes, since
-/// scope boundaries serve as the labels for WebAssembly's control transfers.
+/// This pass inserts BLOCK, LOOP, and TRY markers to mark the start of scopes,
+/// since scope boundaries serve as the labels for WebAssembly's control
+/// transfers.
 ///
 /// This is sufficient to convert arbitrary CFGs into a form that works on
 /// WebAssembly, provided that all loops are single-entry.
 ///
+/// In case we use exceptions, this pass also fixes mismatches in unwind
+/// destinations created during transforming CFG into wasm structured format.
+///
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
+#include "WebAssemblyExceptionInfo.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
@@ -29,6 +34,8 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -40,26 +47,57 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
   StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
     AU.addRequired<MachineDominatorTree>();
-    AU.addPreserved<MachineDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
-    AU.addPreserved<MachineLoopInfo>();
+    AU.addRequired<WebAssemblyExceptionInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  // For each block whose label represents the end of a scope, record the block
+  // which holds the beginning of the scope. This will allow us to quickly skip
+  // over scoped regions when walking blocks.
+  SmallVector<MachineBasicBlock *, 8> ScopeTops;
+
+  void placeMarkers(MachineFunction &MF);
+  void placeBlockMarker(MachineBasicBlock &MBB);
+  void placeLoopMarker(MachineBasicBlock &MBB);
+  void placeTryMarker(MachineBasicBlock &MBB);
+  void rewriteDepthImmediates(MachineFunction &MF);
+  void fixEndsAtEndOfFunction(MachineFunction &MF);
+
+  // For each BLOCK|LOOP|TRY, the corresponding END_(BLOCK|LOOP|TRY).
+  DenseMap<const MachineInstr *, MachineInstr *> BeginToEnd;
+  // For each END_(BLOCK|LOOP|TRY), the corresponding BLOCK|LOOP|TRY.
+  DenseMap<const MachineInstr *, MachineInstr *> EndToBegin;
+  // <TRY marker, EH pad> map
+  DenseMap<const MachineInstr *, MachineBasicBlock *> TryToEHPad;
+  // <EH pad, TRY marker> map
+  DenseMap<const MachineBasicBlock *, MachineInstr *> EHPadToTry;
+  // <LOOP|TRY marker, Loop/exception bottom BB> map
+  DenseMap<const MachineInstr *, MachineBasicBlock *> BeginToBottom;
+
+  // Helper functions to register scope information created by marker
+  // instructions.
+  void registerScope(MachineInstr *Begin, MachineInstr *End);
+  void registerTryScope(MachineInstr *Begin, MachineInstr *End,
+                        MachineBasicBlock *EHPad);
+
+  MachineBasicBlock *getBottom(const MachineInstr *Begin);
+
 public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
+  ~WebAssemblyCFGStackify() override { releaseMemory(); }
+  void releaseMemory() override;
 };
 } // end anonymous namespace
 
 char WebAssemblyCFGStackify::ID = 0;
 INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE,
-                "Insert BLOCK and LOOP markers for WebAssembly scopes",
-                false, false)
+                "Insert BLOCK and LOOP markers for WebAssembly scopes", false,
+                false)
 
 FunctionPass *llvm::createWebAssemblyCFGStackify() {
   return new WebAssemblyCFGStackify();
@@ -73,34 +111,121 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
 static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
                                  MachineBasicBlock *MBB) {
   for (MachineInstr &MI : Pred->terminators())
-    for (MachineOperand &MO : MI.explicit_operands())
-      if (MO.isMBB() && MO.getMBB() == MBB)
-        return true;
+    // Even if a rethrow takes a BB argument, it is not a branch
+    if (!WebAssembly::isRethrow(MI))
+      for (MachineOperand &MO : MI.explicit_operands())
+        if (MO.isMBB() && MO.getMBB() == MBB)
+          return true;
   return false;
 }
 
+// Returns an iterator to the earliest position possible within the MBB,
+// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
+// contains instructions that should go before the marker, and AfterSet contains
+// ones that should go after the marker. In this function, AfterSet is only
+// used for sanity checking.
+static MachineBasicBlock::iterator
+GetEarliestInsertPos(MachineBasicBlock *MBB,
+                     const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
+                     const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+  auto InsertPos = MBB->end();
+  while (InsertPos != MBB->begin()) {
+    if (BeforeSet.count(&*std::prev(InsertPos))) {
+#ifndef NDEBUG
+      // Sanity check
+      for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos)
+        assert(!AfterSet.count(&*std::prev(Pos)));
+#endif
+      break;
+    }
+    --InsertPos;
+  }
+  return InsertPos;
+}
+
+// Returns an iterator to the latest position possible within the MBB,
+// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
+// contains instructions that should go before the marker, and AfterSet contains
+// ones that should go after the marker. In this function, BeforeSet is only
+// used for sanity checking.
+static MachineBasicBlock::iterator
+GetLatestInsertPos(MachineBasicBlock *MBB,
+                   const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
+                   const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
+  auto InsertPos = MBB->begin();
+  while (InsertPos != MBB->end()) {
+    if (AfterSet.count(&*InsertPos)) {
+#ifndef NDEBUG
+      // Sanity check
+      for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos)
+        assert(!BeforeSet.count(&*Pos));
+#endif
+      break;
+    }
+    ++InsertPos;
+  }
+  return InsertPos;
+}
+
+void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin,
+                                           MachineInstr *End) {
+  BeginToEnd[Begin] = End;
+  EndToBegin[End] = Begin;
+}
+
+void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin,
+                                              MachineInstr *End,
+                                              MachineBasicBlock *EHPad) {
+  registerScope(Begin, End);
+  TryToEHPad[Begin] = EHPad;
+  EHPadToTry[EHPad] = Begin;
+}
+
+// Given a LOOP/TRY marker, returns its bottom BB. Use cached information if any
+// to prevent recomputation.
+MachineBasicBlock *
+WebAssemblyCFGStackify::getBottom(const MachineInstr *Begin) {
+  const auto &MLI = getAnalysis<MachineLoopInfo>();
+  const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+  if (BeginToBottom.count(Begin))
+    return BeginToBottom[Begin];
+  if (Begin->getOpcode() == WebAssembly::LOOP) {
+    MachineLoop *L = MLI.getLoopFor(Begin->getParent());
+    assert(L);
+    BeginToBottom[Begin] = WebAssembly::getBottom(L);
+  } else if (Begin->getOpcode() == WebAssembly::TRY) {
+    WebAssemblyException *WE = WEI.getExceptionFor(TryToEHPad[Begin]);
+    assert(WE);
+    BeginToBottom[Begin] = WebAssembly::getBottom(WE);
+  } else
+    assert(false);
+  return BeginToBottom[Begin];
+}
+
 /// Insert a BLOCK marker for branches to MBB (if needed).
-static void PlaceBlockMarker(
-    MachineBasicBlock &MBB, MachineFunction &MF,
-    SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-    DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
-    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
-    const WebAssemblyInstrInfo &TII,
-    const MachineLoopInfo &MLI,
-    MachineDominatorTree &MDT,
-    WebAssemblyFunctionInfo &MFI) {
+void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
+  // This should have been handled in placeTryMarker.
+  if (MBB.isEHPad())
+    return;
+
+  MachineFunction &MF = *MBB.getParent();
+  auto &MDT = getAnalysis<MachineDominatorTree>();
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
   // First compute the nearest common dominator of all forward non-fallthrough
   // predecessors so that we minimize the time that the BLOCK is on the stack,
   // which reduces overall stack height.
   MachineBasicBlock *Header = nullptr;
   bool IsBranchedTo = false;
   int MBBNumber = MBB.getNumber();
-  for (MachineBasicBlock *Pred : MBB.predecessors())
+  for (MachineBasicBlock *Pred : MBB.predecessors()) {
     if (Pred->getNumber() < MBBNumber) {
       Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
       if (ExplicitlyBranchesTo(Pred, &MBB))
         IsBranchedTo = true;
     }
+  }
   if (!Header)
     return;
   if (!IsBranchedTo)
@@ -125,43 +250,93 @@ static void PlaceBlockMarker(
   }
 
   // Decide where in Header to put the BLOCK.
-  MachineBasicBlock::iterator InsertPos;
-  MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
-  if (HeaderLoop &&
-      MBB.getNumber() > WebAssembly::getBottom(HeaderLoop)->getNumber()) {
-    // Header is the header of a loop that does not lexically contain MBB, so
-    // the BLOCK needs to be above the LOOP, after any END constructs.
-    InsertPos = Header->begin();
-    while (InsertPos->getOpcode() == WebAssembly::END_BLOCK ||
-           InsertPos->getOpcode() == WebAssembly::END_LOOP)
-      ++InsertPos;
-  } else {
-    // Otherwise, insert the BLOCK as late in Header as we can, but before the
-    // beginning of the local expression tree and any nested BLOCKs.
-    InsertPos = Header->getFirstTerminator();
-    while (InsertPos != Header->begin() &&
-           WebAssembly::isChild(*std::prev(InsertPos), MFI) &&
-           std::prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
-           std::prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
-           std::prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
-      --InsertPos;
+
+  // Instructions that should go before the BLOCK.
+  SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+  // Instructions that should go after the BLOCK.
+  SmallPtrSet<const MachineInstr *, 4> AfterSet;
+  for (const auto &MI : *Header) {
+    // If there is a previously placed LOOP/TRY marker and the bottom block of
+    // the loop/exception is above MBB, it should be after the BLOCK, because
+    // the loop/exception is nested in this block. Otherwise it should be before
+    // the BLOCK.
+    if (MI.getOpcode() == WebAssembly::LOOP ||
+        MI.getOpcode() == WebAssembly::TRY) {
+      if (MBB.getNumber() > getBottom(&MI)->getNumber())
+        AfterSet.insert(&MI);
+#ifndef NDEBUG
+      else
+        BeforeSet.insert(&MI);
+#endif
+    }
+
+    // All previously inserted BLOCK markers should be after the BLOCK because
+    // they are all nested blocks.
+    if (MI.getOpcode() == WebAssembly::BLOCK)
+      AfterSet.insert(&MI);
+
+#ifndef NDEBUG
+    // All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK.
+    if (MI.getOpcode() == WebAssembly::END_BLOCK ||
+        MI.getOpcode() == WebAssembly::END_LOOP ||
+        MI.getOpcode() == WebAssembly::END_TRY)
+      BeforeSet.insert(&MI);
+#endif
+
+    // Terminators should go after the BLOCK.
+    if (MI.isTerminator())
+      AfterSet.insert(&MI);
+  }
+
+  // Local expression tree should go after the BLOCK.
+  for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
+       --I) {
+    if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+      continue;
+    if (WebAssembly::isChild(*std::prev(I), MFI))
+      AfterSet.insert(&*std::prev(I));
+    else
+      break;
   }
 
   // Add the BLOCK.
+  auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
   MachineInstr *Begin =
       BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
               TII.get(WebAssembly::BLOCK))
           .addImm(int64_t(WebAssembly::ExprType::Void));
 
+  // Decide where in Header to put the END_BLOCK.
+  BeforeSet.clear();
+  AfterSet.clear();
+  for (auto &MI : MBB) {
+#ifndef NDEBUG
+    // END_BLOCK should precede existing LOOP and TRY markers.
+    if (MI.getOpcode() == WebAssembly::LOOP ||
+        MI.getOpcode() == WebAssembly::TRY)
+      AfterSet.insert(&MI);
+#endif
+
+    // If there is a previously placed END_LOOP marker and the header of the
+    // loop is above this block's header, the END_LOOP should be placed after
+    // the BLOCK, because the loop contains this block. Otherwise the END_LOOP
+    // should be placed before the BLOCK. The same for END_TRY.
+    if (MI.getOpcode() == WebAssembly::END_LOOP ||
+        MI.getOpcode() == WebAssembly::END_TRY) {
+      if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+        BeforeSet.insert(&MI);
+#ifndef NDEBUG
+      else
+        AfterSet.insert(&MI);
+#endif
+    }
+  }
+
   // Mark the end of the block.
-  InsertPos = MBB.begin();
-  while (InsertPos != MBB.end() &&
-         InsertPos->getOpcode() == WebAssembly::END_LOOP &&
-         LoopTops[&*InsertPos]->getParent()->getNumber() >= Header->getNumber())
-    ++InsertPos;
+  InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
   MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos),
                               TII.get(WebAssembly::END_BLOCK));
-  BlockTops[End] = Begin;
+  registerScope(Begin, End);
 
   // Track the farthest-spanning scope that ends at this point.
   int Number = MBB.getNumber();
@@ -171,11 +346,11 @@ static void PlaceBlockMarker(
 }
 
 /// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
-static void PlaceLoopMarker(
-    MachineBasicBlock &MBB, MachineFunction &MF,
-    SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
-    const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
+void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
+  MachineFunction &MF = *MBB.getParent();
+  const auto &MLI = getAnalysis<MachineLoopInfo>();
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
   MachineLoop *Loop = MLI.getLoopFor(&MBB);
   if (!Loop || Loop->getHeader() != &MBB)
     return;
@@ -193,22 +368,43 @@ static void PlaceLoopMarker(
   }
   MachineBasicBlock *AfterLoop = &*Iter;
 
-  // Mark the beginning of the loop (after the end of any existing loop that
-  // ends here).
-  auto InsertPos = MBB.begin();
-  while (InsertPos != MBB.end() &&
-         InsertPos->getOpcode() == WebAssembly::END_LOOP)
-    ++InsertPos;
+  // Decide where in Header to put the LOOP.
+  SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+  SmallPtrSet<const MachineInstr *, 4> AfterSet;
+  for (const auto &MI : MBB) {
+    // LOOP marker should be after any existing loop that ends here. Otherwise
+    // we assume the instruction belongs to the loop.
+    if (MI.getOpcode() == WebAssembly::END_LOOP)
+      BeforeSet.insert(&MI);
+#ifndef NDEBUG
+    else
+      AfterSet.insert(&MI);
+#endif
+  }
+
+  // Mark the beginning of the loop.
+  auto InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
   MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos),
                                 TII.get(WebAssembly::LOOP))
                             .addImm(int64_t(WebAssembly::ExprType::Void));
 
-  // Mark the end of the loop (using arbitrary debug location that branched
-  // to the loop end as its location).
+  // Decide where in Header to put the END_LOOP.
+  BeforeSet.clear();
+  AfterSet.clear();
+#ifndef NDEBUG
+  for (const auto &MI : MBB)
+    // Existing END_LOOP markers belong to parent loops of this loop
+    if (MI.getOpcode() == WebAssembly::END_LOOP)
+      AfterSet.insert(&MI);
+#endif
+
+  // Mark the end of the loop (using arbitrary debug location that branched to
+  // the loop end as its location).
+  InsertPos = GetEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
   DebugLoc EndDL = (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
-  MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), EndDL,
-                              TII.get(WebAssembly::END_LOOP));
-  LoopTops[End] = Begin;
+  MachineInstr *End =
+      BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP));
+  registerScope(Begin, End);
 
   assert((!ScopeTops[AfterLoop->getNumber()] ||
           ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -217,6 +413,183 @@ static void PlaceLoopMarker(
     ScopeTops[AfterLoop->getNumber()] = &MBB;
 }
 
+void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
+  if (!MBB.isEHPad())
+    return;
+
+  // catch_all terminate pad is grouped together with catch terminate pad and
+  // does not need a separate TRY and END_TRY marker.
+  if (WebAssembly::isCatchAllTerminatePad(MBB))
+    return;
+
+  MachineFunction &MF = *MBB.getParent();
+  auto &MDT = getAnalysis<MachineDominatorTree>();
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
+  const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+  // Compute the nearest common dominator of all unwind predecessors
+  MachineBasicBlock *Header = nullptr;
+  int MBBNumber = MBB.getNumber();
+  for (auto *Pred : MBB.predecessors()) {
+    if (Pred->getNumber() < MBBNumber) {
+      Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
+      assert(!ExplicitlyBranchesTo(Pred, &MBB) &&
+             "Explicit branch to an EH pad!");
+    }
+  }
+  if (!Header)
+    return;
+
+  // If this try is at the bottom of the function, insert a dummy block at the
+  // end.
+  WebAssemblyException *WE = WEI.getExceptionFor(&MBB);
+  assert(WE);
+  MachineBasicBlock *Bottom = WebAssembly::getBottom(WE);
+
+  auto Iter = std::next(MachineFunction::iterator(Bottom));
+  if (Iter == MF.end()) {
+    MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
+    // Give it a fake predecessor so that AsmPrinter prints its label.
+    Label->addSuccessor(Label);
+    MF.push_back(Label);
+    Iter = std::next(MachineFunction::iterator(Bottom));
+  }
+  MachineBasicBlock *AfterTry = &*Iter;
+
+  assert(AfterTry != &MF.front());
+  MachineBasicBlock *LayoutPred =
+      &*std::prev(MachineFunction::iterator(AfterTry));
+
+  // If the nearest common dominator is inside a more deeply nested context,
+  // walk out to the nearest scope which isn't more deeply nested.
+  for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
+    if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
+      if (ScopeTop->getNumber() > Header->getNumber()) {
+        // Skip over an intervening scope.
+        I = std::next(MachineFunction::iterator(ScopeTop));
+      } else {
+        // We found a scope level at an appropriate depth.
+        Header = ScopeTop;
+        break;
+      }
+    }
+  }
+
+  // Decide where in Header to put the TRY.
+
+  // Instructions that should go before the BLOCK.
+  SmallPtrSet<const MachineInstr *, 4> BeforeSet;
+  // Instructions that should go after the BLOCK.
+  SmallPtrSet<const MachineInstr *, 4> AfterSet;
+  for (const auto &MI : *Header) {
+    // If there is a previously placed LOOP marker and the bottom block of
+    // the loop is above MBB, the LOOP should be after the TRY, because the
+    // loop is nested in this try. Otherwise it should be before the TRY.
+    if (MI.getOpcode() == WebAssembly::LOOP) {
+      if (MBB.getNumber() > Bottom->getNumber())
+        AfterSet.insert(&MI);
+#ifndef NDEBUG
+      else
+        BeforeSet.insert(&MI);
+#endif
+    }
+
+    // All previously inserted TRY markers should be after the TRY because they
+    // are all nested trys.
+    if (MI.getOpcode() == WebAssembly::TRY)
+      AfterSet.insert(&MI);
+
+#ifndef NDEBUG
+    // All END_(LOOP/TRY) markers should be before the TRY.
+    if (MI.getOpcode() == WebAssembly::END_LOOP ||
+        MI.getOpcode() == WebAssembly::END_TRY)
+      BeforeSet.insert(&MI);
+#endif
+
+    // Terminators should go after the TRY.
+    if (MI.isTerminator())
+      AfterSet.insert(&MI);
+  }
+
+  // Local expression tree should go after the TRY.
+  for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E;
+       --I) {
+    if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+      continue;
+    if (WebAssembly::isChild(*std::prev(I), MFI))
+      AfterSet.insert(&*std::prev(I));
+    else
+      break;
+  }
+
+  // If Header unwinds to MBB (= Header contains 'invoke'), the try block should
+  // contain the call within it. So the call should go after the TRY. The
+  // exception is when the header's terminator is a rethrow instruction, in
+  // which case that instruction, not a call instruction before it, is gonna
+  // throw.
+  if (MBB.isPredecessor(Header)) {
+    auto TermPos = Header->getFirstTerminator();
+    if (TermPos == Header->end() || !WebAssembly::isRethrow(*TermPos)) {
+      for (const auto &MI : reverse(*Header)) {
+        if (MI.isCall()) {
+          AfterSet.insert(&MI);
+          break;
+        }
+      }
+    }
+  }
+
+  // Add the TRY.
+  auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+  MachineInstr *Begin =
+      BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
+              TII.get(WebAssembly::TRY))
+          .addImm(int64_t(WebAssembly::ExprType::Void));
+
+  // Decide where in Header to put the END_TRY.
+  BeforeSet.clear();
+  AfterSet.clear();
+  for (const auto &MI : *AfterTry) {
+#ifndef NDEBUG
+    // END_TRY should precede existing LOOP markers.
+    if (MI.getOpcode() == WebAssembly::LOOP)
+      AfterSet.insert(&MI);
+
+    // All END_TRY markers placed earlier belong to exceptions that contains
+    // this one.
+    if (MI.getOpcode() == WebAssembly::END_TRY)
+      AfterSet.insert(&MI);
+#endif
+
+    // If there is a previously placed END_LOOP marker and its header is after
+    // where TRY marker is, this loop is contained within the 'catch' part, so
+    // the END_TRY marker should go after that. Otherwise, the whole try-catch
+    // is contained within this loop, so the END_TRY should go before that.
+    if (MI.getOpcode() == WebAssembly::END_LOOP) {
+      if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+        BeforeSet.insert(&MI);
+#ifndef NDEBUG
+      else
+        AfterSet.insert(&MI);
+#endif
+    }
+  }
+
+  // Mark the end of the TRY.
+  InsertPos = GetEarliestInsertPos(AfterTry, BeforeSet, AfterSet);
+  MachineInstr *End =
+      BuildMI(*AfterTry, InsertPos, Bottom->findBranchDebugLoc(),
+              TII.get(WebAssembly::END_TRY));
+  registerTryScope(Begin, End, &MBB);
+
+  // Track the farthest-spanning scope that ends at this point.
+  int Number = AfterTry->getNumber();
+  if (!ScopeTops[Number] ||
+      ScopeTops[Number]->getNumber() > Header->getNumber())
+    ScopeTops[Number] = Header;
+}
+
 static unsigned
 GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
          const MachineBasicBlock *MBB) {
@@ -237,11 +610,8 @@ GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
 /// that end at the function end need to have a return type signature that
 /// matches the function signature, even though it's unreachable. This function
 /// checks for such cases and fixes up the signatures.
-static void FixEndsAtEndOfFunction(
-    MachineFunction &MF,
-    const WebAssemblyFunctionInfo &MFI,
-    DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
-    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops) {
+void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
+  const auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
   assert(MFI.getResults().size() <= 1);
 
   if (MFI.getResults().empty())
@@ -249,16 +619,31 @@ static void FixEndsAtEndOfFunction(
 
   WebAssembly::ExprType retType;
   switch (MFI.getResults().front().SimpleTy) {
-  case MVT::i32: retType = WebAssembly::ExprType::I32; break;
-  case MVT::i64: retType = WebAssembly::ExprType::I64; break;
-  case MVT::f32: retType = WebAssembly::ExprType::F32; break;
-  case MVT::f64: retType = WebAssembly::ExprType::F64; break;
-  case MVT::v16i8: retType = WebAssembly::ExprType::I8x16; break;
-  case MVT::v8i16: retType = WebAssembly::ExprType::I16x8; break;
-  case MVT::v4i32: retType = WebAssembly::ExprType::I32x4; break;
-  case MVT::v4f32: retType = WebAssembly::ExprType::F32x4; break;
-  case MVT::ExceptRef: retType = WebAssembly::ExprType::ExceptRef; break;
-  default: llvm_unreachable("unexpected return type");
+  case MVT::i32:
+    retType = WebAssembly::ExprType::I32;
+    break;
+  case MVT::i64:
+    retType = WebAssembly::ExprType::I64;
+    break;
+  case MVT::f32:
+    retType = WebAssembly::ExprType::F32;
+    break;
+  case MVT::f64:
+    retType = WebAssembly::ExprType::F64;
+    break;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v2i64:
+  case MVT::v4f32:
+  case MVT::v2f64:
+    retType = WebAssembly::ExprType::V128;
+    break;
+  case MVT::ExceptRef:
+    retType = WebAssembly::ExprType::ExceptRef;
+    break;
+  default:
+    llvm_unreachable("unexpected return type");
   }
 
   for (MachineBasicBlock &MBB : reverse(MF)) {
@@ -266,11 +651,11 @@ static void FixEndsAtEndOfFunction(
       if (MI.isPosition() || MI.isDebugInstr())
         continue;
       if (MI.getOpcode() == WebAssembly::END_BLOCK) {
-        BlockTops[&MI]->getOperand(0).setImm(int32_t(retType));
+        EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
         continue;
       }
       if (MI.getOpcode() == WebAssembly::END_LOOP) {
-        LoopTops[&MI]->getOperand(0).setImm(int32_t(retType));
+        EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
         continue;
       }
       // Something other than an `end`. We're done.
@@ -281,60 +666,108 @@ static void FixEndsAtEndOfFunction(
 
 // WebAssembly functions end with an end instruction, as if the function body
 // were a block.
-static void AppendEndToFunction(
-    MachineFunction &MF,
-    const WebAssemblyInstrInfo &TII) {
+static void AppendEndToFunction(MachineFunction &MF,
+                                const WebAssemblyInstrInfo &TII) {
   BuildMI(MF.back(), MF.back().end(),
           MF.back().findPrevDebugLoc(MF.back().end()),
           TII.get(WebAssembly::END_FUNCTION));
 }
 
-/// Insert LOOP and BLOCK markers at appropriate places.
-static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
-                         const WebAssemblyInstrInfo &TII,
-                         MachineDominatorTree &MDT,
-                         WebAssemblyFunctionInfo &MFI) {
-  // For each block whose label represents the end of a scope, record the block
-  // which holds the beginning of the scope. This will allow us to quickly skip
-  // over scoped regions when walking blocks. We allocate one more than the
-  // number of blocks in the function to accommodate for the possible fake block
-  // we may insert at the end.
-  SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
-
-  // For each LOOP_END, the corresponding LOOP.
-  DenseMap<const MachineInstr *, MachineInstr *> LoopTops;
-
-  // For each END_BLOCK, the corresponding BLOCK.
-  DenseMap<const MachineInstr *, MachineInstr *> BlockTops;
-
-  for (auto &MBB : MF) {
-    // Place the LOOP for MBB if MBB is the header of a loop.
-    PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
-
-    // Place the BLOCK for MBB if MBB is branched to from above.
-    PlaceBlockMarker(MBB, MF, ScopeTops, BlockTops, LoopTops, TII, MLI, MDT, MFI);
-  }
+/// Insert LOOP/TRY/BLOCK markers at appropriate places.
+void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) {
+  const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
+  // We allocate one more than the number of blocks in the function to
+  // accommodate for the possible fake block we may insert at the end.
+  ScopeTops.resize(MF.getNumBlockIDs() + 1);
+  // Place the LOOP for MBB if MBB is the header of a loop.
+  for (auto &MBB : MF)
+    placeLoopMarker(MBB);
+  // Place the TRY for MBB if MBB is the EH pad of an exception.
+  if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+      MF.getFunction().hasPersonalityFn())
+    for (auto &MBB : MF)
+      placeTryMarker(MBB);
+  // Place the BLOCK for MBB if MBB is branched to from above.
+  for (auto &MBB : MF)
+    placeBlockMarker(MBB);
+}
 
+void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   // Now rewrite references to basic blocks to be depth immediates.
+  // We need two stacks: one for normal scopes and the other for EH pad scopes.
+  // EH pad stack is used to rewrite depths in rethrow instructions.
   SmallVector<const MachineBasicBlock *, 8> Stack;
+  SmallVector<const MachineBasicBlock *, 8> EHPadStack;
   for (auto &MBB : reverse(MF)) {
-    for (auto &MI : reverse(MBB)) {
+    for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
+      MachineInstr &MI = *I;
       switch (MI.getOpcode()) {
       case WebAssembly::BLOCK:
-        assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <= MBB.getNumber() &&
-               "Block should be balanced");
+        assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
+                   MBB.getNumber() &&
+               "Block/try should be balanced");
         Stack.pop_back();
         break;
+
+      case WebAssembly::TRY:
+        assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
+                   MBB.getNumber() &&
+               "Block/try marker should be balanced");
+        Stack.pop_back();
+        EHPadStack.pop_back();
+        break;
+
+      case WebAssembly::CATCH_I32:
+      case WebAssembly::CATCH_I64:
+      case WebAssembly::CATCH_ALL:
+        // Currently the only case there are more than one catch for a try is
+        // for catch terminate pad, in the form of
+        //   try
+        //   catch
+        //     call @__clang_call_terminate
+        //     unreachable
+        //   catch_all
+        //     call @std::terminate
+        //     unreachable
+        //   end
+        // So we shouldn't push the current BB for the second catch_all block
+        // here.
+        if (!WebAssembly::isCatchAllTerminatePad(MBB))
+          EHPadStack.push_back(&MBB);
+        break;
+
       case WebAssembly::LOOP:
         assert(Stack.back() == &MBB && "Loop top should be balanced");
         Stack.pop_back();
         break;
+
       case WebAssembly::END_BLOCK:
+      case WebAssembly::END_TRY:
         Stack.push_back(&MBB);
         break;
+
       case WebAssembly::END_LOOP:
-        Stack.push_back(LoopTops[&MI]->getParent());
+        Stack.push_back(EndToBegin[&MI]->getParent());
+        break;
+
+      case WebAssembly::RETHROW: {
+        // Rewrite MBB operands to be depth immediates.
+        unsigned EHPadDepth = GetDepth(EHPadStack, MI.getOperand(0).getMBB());
+        MI.RemoveOperand(0);
+        MI.addOperand(MF, MachineOperand::CreateImm(EHPadDepth));
         break;
+      }
+
+      case WebAssembly::RETHROW_TO_CALLER: {
+        MachineInstr *Rethrow =
+            BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(WebAssembly::RETHROW))
+                .addImm(EHPadStack.size());
+        MI.eraseFromParent();
+        I = MachineBasicBlock::reverse_iterator(Rethrow);
+        break;
+      }
+
       default:
         if (MI.isTerminator()) {
           // Rewrite MBB operands to be depth immediates.
@@ -352,13 +785,15 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
     }
   }
   assert(Stack.empty() && "Control flow should be balanced");
+}
 
-  // Fix up block/loop signatures at the end of the function to conform to
-  // WebAssembly's rules.
-  FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
-
-  // Add an end instruction at the end of the function body.
-  AppendEndToFunction(MF, TII);
+void WebAssemblyCFGStackify::releaseMemory() {
+  ScopeTops.clear();
+  BeginToEnd.clear();
+  EndToBegin.clear();
+  TryToEHPad.clear();
+  EHPadToTry.clear();
+  BeginToBottom.clear();
 }
 
 bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -366,15 +801,27 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
                        "********** Function: "
                     << MF.getName() << '\n');
 
-  const auto &MLI = getAnalysis<MachineLoopInfo>();
-  auto &MDT = getAnalysis<MachineDominatorTree>();
+  releaseMemory();
+
   // Liveness is not tracked for VALUE_STACK physreg.
-  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
   MF.getRegInfo().invalidateLiveness();
 
-  // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
-  PlaceMarkers(MF, MLI, TII, MDT, MFI);
+  // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes.
+  placeMarkers(MF);
+
+  // Convert MBB operands in terminators to relative depth immediates.
+  rewriteDepthImmediates(MF);
+
+  // Fix up block/loop/try signatures at the end of the function to conform to
+  // WebAssembly's rules.
+  fixEndsAtEndOfFunction(MF);
+
+  // Add an end instruction at the end of the function body.
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  if (!MF.getSubtarget<WebAssemblySubtarget>()
+           .getTargetTriple()
+           .isOSBinFormatELF())
+    AppendEndToFunction(MF, TII);
 
   return true;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index c1820bf66bc0..aaa6d286598f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -64,16 +64,30 @@ FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
 static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
     using namespace WebAssembly;
-  case PCALL_INDIRECT_VOID: return CALL_INDIRECT_VOID;
-  case PCALL_INDIRECT_I32: return CALL_INDIRECT_I32;
-  case PCALL_INDIRECT_I64: return CALL_INDIRECT_I64;
-  case PCALL_INDIRECT_F32: return CALL_INDIRECT_F32;
-  case PCALL_INDIRECT_F64: return CALL_INDIRECT_F64;
-  case PCALL_INDIRECT_v16i8: return CALL_INDIRECT_v16i8;
-  case PCALL_INDIRECT_v8i16: return CALL_INDIRECT_v8i16;
-  case PCALL_INDIRECT_v4i32: return CALL_INDIRECT_v4i32;
-  case PCALL_INDIRECT_v4f32: return CALL_INDIRECT_v4f32;
-  default: return INSTRUCTION_LIST_END;
+  case PCALL_INDIRECT_VOID:
+    return CALL_INDIRECT_VOID;
+  case PCALL_INDIRECT_I32:
+    return CALL_INDIRECT_I32;
+  case PCALL_INDIRECT_I64:
+    return CALL_INDIRECT_I64;
+  case PCALL_INDIRECT_F32:
+    return CALL_INDIRECT_F32;
+  case PCALL_INDIRECT_F64:
+    return CALL_INDIRECT_F64;
+  case PCALL_INDIRECT_v16i8:
+    return CALL_INDIRECT_v16i8;
+  case PCALL_INDIRECT_v8i16:
+    return CALL_INDIRECT_v8i16;
+  case PCALL_INDIRECT_v4i32:
+    return CALL_INDIRECT_v4i32;
+  case PCALL_INDIRECT_v2i64:
+    return CALL_INDIRECT_v2i64;
+  case PCALL_INDIRECT_v4f32:
+    return CALL_INDIRECT_v4f32;
+  case PCALL_INDIRECT_v2f64:
+    return CALL_INDIRECT_v2f64;
+  default:
+    return INSTRUCTION_LIST_END;
   }
 }
 
@@ -84,7 +98,7 @@ static bool IsPseudoCallIndirect(const MachineInstr &MI) {
 
 bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n"
-                    << MF.getName() << '\n');
+                    << "********** Function: " << MF.getName() << '\n');
 
   bool Changed = false;
   const WebAssemblyInstrInfo *TII =
@@ -110,10 +124,8 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
         Ops.push_back(MachineOperand::CreateImm(0));
 
         for (const MachineOperand &MO :
-                 make_range(MI.operands_begin() +
-                                MI.getDesc().getNumDefs() + 1,
-                            MI.operands_begin() +
-                                MI.getNumExplicitOperands()))
+             make_range(MI.operands_begin() + MI.getDesc().getNumDefs() + 1,
+                        MI.operands_begin() + MI.getNumExplicitOperands()))
           Ops.push_back(MO);
         Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs()));
 
@@ -133,4 +145,3 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
-
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
new file mode 100644
index 000000000000..8ecc159951ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -0,0 +1,46 @@
+//===-- WebAssemblyDebugValueManager.cpp - WebAssembly DebugValue Manager -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the manager for MachineInstr DebugValues.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyDebugValueManager.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+using namespace llvm;
+
+WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(
+    MachineInstr *Instr) {
+  Instr->collectDebugValues(DbgValues);
+}
+
+void WebAssemblyDebugValueManager::move(MachineInstr *Insert) {
+  MachineBasicBlock *MBB = Insert->getParent();
+  for (MachineInstr *DBI : reverse(DbgValues))
+    MBB->splice(Insert, DBI->getParent(), DBI);
+}
+
+void WebAssemblyDebugValueManager::updateReg(unsigned Reg) {
+  for (auto *DBI : DbgValues)
+    DBI->getOperand(0).setReg(Reg);
+}
+
+void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
+                                         unsigned NewReg) {
+  MachineBasicBlock *MBB = Insert->getParent();
+  MachineFunction *MF = MBB->getParent();
+  for (MachineInstr *DBI : reverse(DbgValues)) {
+    MachineInstr *Clone = MF->CloneMachineInstr(DBI);
+    Clone->getOperand(0).setReg(NewReg);
+    MBB->insert(Insert, Clone);
+  }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
new file mode 100644
index 000000000000..73f317214058
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -0,0 +1,38 @@
+// WebAssemblyDebugValueManager.h - WebAssembly DebugValue Manager -*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the WebAssembly-specific
+/// manager for DebugValues associated with the specific MachineInstr.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+class WebAssemblyDebugValueManager {
+  SmallVector<MachineInstr *, 2> DbgValues;
+
+public:
+  WebAssemblyDebugValueManager(MachineInstr *Instr);
+
+  void move(MachineInstr *Insert);
+  void updateReg(unsigned Reg);
+  void clone(MachineInstr *Insert, unsigned NewReg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
new file mode 100644
index 000000000000..c86260ba408c
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
@@ -0,0 +1,87 @@
+//===-- WebAssemblyEHRestoreStackPointer.cpp - __stack_pointer restoration ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// After the stack is unwound due to a thrown exception, the __stack_pointer
+/// global can point to an invalid address. This inserts instructions that
+/// restore __stack_pointer global.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-eh-restore-stack-pointer"
+
+namespace {
+class WebAssemblyEHRestoreStackPointer final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyEHRestoreStackPointer() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "WebAssembly Restore Stack Pointer for Exception Handling";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyEHRestoreStackPointer::ID = 0;
+INITIALIZE_PASS(WebAssemblyEHRestoreStackPointer, DEBUG_TYPE,
+                "Restore Stack Pointer for Exception Handling", true, false)
+
+FunctionPass *llvm::createWebAssemblyEHRestoreStackPointer() {
+  return new WebAssemblyEHRestoreStackPointer();
+}
+
+bool WebAssemblyEHRestoreStackPointer::runOnMachineFunction(
+    MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "********** EH Restore Stack Pointer **********\n"
+                       "********** Function: "
+                    << MF.getName() << '\n');
+
+  const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
+      MF.getSubtarget().getFrameLowering());
+  if (!FrameLowering->needsPrologForEH(MF))
+    return false;
+  bool Changed = false;
+
+  for (auto &MBB : MF) {
+    if (!MBB.isEHPad())
+      continue;
+    Changed = true;
+
+    // Insert __stack_pointer restoring instructions at the beginning of each EH
+    // pad, after the catch instruction. (Catch instructions may have been
+    // reordered, and catch_all instructions have not been inserted yet, but
+    // those cases are handled in LateEHPrepare).
+    //
+    // Here it is safe to assume that SP32 holds the latest value of
+    // __stack_pointer, because the only exception for this case is when a
+    // function uses the red zone, but that only happens with leaf functions,
+    // and we don't restore __stack_pointer in leaf functions anyway.
+    auto InsertPos = MBB.begin();
+    if (WebAssembly::isCatch(*MBB.begin()))
+      InsertPos++;
+    FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
+                                   MBB.begin()->getDebugLoc());
+  }
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 84683d48a90a..6b3a3e765786 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssemblyExceptionInfo.h"
-#include "WebAssemblyUtilities.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -32,7 +32,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
 INITIALIZE_PASS_END(WebAssemblyExceptionInfo, DEBUG_TYPE,
                     "WebAssembly Exception Information", true, true)
 
-bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &F) {
+bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "********** Exception Info Calculation **********\n"
+                       "********** Function: "
+                    << MF.getName() << '\n');
   releaseMemory();
   auto &MDT = getAnalysis<MachineDominatorTree>();
   auto &MDF = getAnalysis<MachineDominanceFrontier>();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 8619cbdcb5ee..27aabe6ba0bd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -11,7 +11,7 @@
 /// This file converts any remaining registers into WebAssembly locals.
 ///
 /// After register stackification and register coloring, convert non-stackified
-/// registers into locals, inserting explicit get_local and set_local
+/// registers into locals, inserting explicit local.get and local.set
 /// instructions.
 ///
 //===----------------------------------------------------------------------===//
@@ -31,12 +31,14 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-explicit-locals"
 
-// A command-line option to disable this pass. Note that this produces output
-// which is not valid WebAssembly, though it may be more convenient for writing
-// LLVM unit tests with.
-static cl::opt<bool> DisableWebAssemblyExplicitLocals(
-    "disable-wasm-explicit-locals", cl::ReallyHidden,
-    cl::desc("WebAssembly: Disable emission of get_local/set_local."),
+// A command-line option to disable this pass, and keep implicit locals
+// for the purpose of testing with lit/llc ONLY.
+// This produces output which is not valid WebAssembly, and is not supported
+// by assemblers/disassemblers and other MC based tools.
+static cl::opt<bool> WasmDisableExplicitLocals(
+    "wasm-disable-explicit-locals", cl::Hidden,
+    cl::desc("WebAssembly: output implicit locals in"
+             " instruction output for test purposes only."),
     cl::init(false));
 
 namespace {
@@ -94,54 +96,54 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
   llvm_unreachable("Unexpected register class");
 }
 
-/// Get the appropriate get_local opcode for the given register class.
+/// Get the appropriate local.get opcode for the given register class.
 static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
-    return WebAssembly::GET_LOCAL_I32;
+    return WebAssembly::LOCAL_GET_I32;
   if (RC == &WebAssembly::I64RegClass)
-    return WebAssembly::GET_LOCAL_I64;
+    return WebAssembly::LOCAL_GET_I64;
   if (RC == &WebAssembly::F32RegClass)
-    return WebAssembly::GET_LOCAL_F32;
+    return WebAssembly::LOCAL_GET_F32;
   if (RC == &WebAssembly::F64RegClass)
-    return WebAssembly::GET_LOCAL_F64;
+    return WebAssembly::LOCAL_GET_F64;
   if (RC == &WebAssembly::V128RegClass)
-    return WebAssembly::GET_LOCAL_V128;
+    return WebAssembly::LOCAL_GET_V128;
   if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::GET_LOCAL_EXCEPT_REF;
+    return WebAssembly::LOCAL_GET_EXCEPT_REF;
   llvm_unreachable("Unexpected register class");
 }
 
-/// Get the appropriate set_local opcode for the given register class.
+/// Get the appropriate local.set opcode for the given register class.
 static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
-    return WebAssembly::SET_LOCAL_I32;
+    return WebAssembly::LOCAL_SET_I32;
   if (RC == &WebAssembly::I64RegClass)
-    return WebAssembly::SET_LOCAL_I64;
+    return WebAssembly::LOCAL_SET_I64;
   if (RC == &WebAssembly::F32RegClass)
-    return WebAssembly::SET_LOCAL_F32;
+    return WebAssembly::LOCAL_SET_F32;
   if (RC == &WebAssembly::F64RegClass)
-    return WebAssembly::SET_LOCAL_F64;
+    return WebAssembly::LOCAL_SET_F64;
   if (RC == &WebAssembly::V128RegClass)
-    return WebAssembly::SET_LOCAL_V128;
+    return WebAssembly::LOCAL_SET_V128;
   if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::SET_LOCAL_EXCEPT_REF;
+    return WebAssembly::LOCAL_SET_EXCEPT_REF;
   llvm_unreachable("Unexpected register class");
 }
 
-/// Get the appropriate tee_local opcode for the given register class.
+/// Get the appropriate local.tee opcode for the given register class.
 static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
-    return WebAssembly::TEE_LOCAL_I32;
+    return WebAssembly::LOCAL_TEE_I32;
   if (RC == &WebAssembly::I64RegClass)
-    return WebAssembly::TEE_LOCAL_I64;
+    return WebAssembly::LOCAL_TEE_I64;
   if (RC == &WebAssembly::F32RegClass)
-    return WebAssembly::TEE_LOCAL_F32;
+    return WebAssembly::LOCAL_TEE_F32;
   if (RC == &WebAssembly::F64RegClass)
-    return WebAssembly::TEE_LOCAL_F64;
+    return WebAssembly::LOCAL_TEE_F64;
   if (RC == &WebAssembly::V128RegClass)
-    return WebAssembly::TEE_LOCAL_V128;
+    return WebAssembly::LOCAL_TEE_V128;
   if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::TEE_LOCAL_EXCEPT_REF;
+    return WebAssembly::LOCAL_TEE_EXCEPT_REF;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -155,6 +157,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
     return MVT::f32;
   if (RC == &WebAssembly::F64RegClass)
     return MVT::f64;
+  if (RC == &WebAssembly::V128RegClass)
+    return MVT::v16i8;
   if (RC == &WebAssembly::EXCEPT_REFRegClass)
     return MVT::ExceptRef;
   llvm_unreachable("unrecognized register class");
@@ -162,7 +166,7 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
 
 /// Given a MachineOperand of a stackified vreg, return the instruction at the
 /// start of the expression tree.
-static MachineInstr *FindStartOfTree(MachineOperand &MO,
+static MachineInstr *findStartOfTree(MachineOperand &MO,
                                      MachineRegisterInfo &MRI,
                                      WebAssemblyFunctionInfo &MFI) {
   unsigned Reg = MO.getReg();
@@ -173,7 +177,7 @@ static MachineInstr *FindStartOfTree(MachineOperand &MO,
   for (MachineOperand &DefMO : Def->explicit_uses()) {
     if (!DefMO.isReg())
       continue;
-    return FindStartOfTree(DefMO, MRI, MFI);
+    return findStartOfTree(DefMO, MRI, MFI);
   }
 
   // If there were no stackified uses, we've reached the start.
@@ -186,7 +190,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
                     << MF.getName() << '\n');
 
   // Disable this pass if directed to do so.
-  if (DisableWebAssemblyExplicitLocals)
+  if (WasmDisableExplicitLocals)
     return false;
 
   bool Changed = false;
@@ -206,19 +210,19 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
       break;
     unsigned Reg = MI.getOperand(0).getReg();
     assert(!MFI.isVRegStackified(Reg));
-    Reg2Local[Reg] = MI.getOperand(1).getImm();
+    Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm());
     MI.eraseFromParent();
     Changed = true;
   }
 
   // Start assigning local numbers after the last parameter.
-  unsigned CurLocal = MFI.getParams().size();
+  unsigned CurLocal = static_cast<unsigned>(MFI.getParams().size());
 
   // Precompute the set of registers that are unused, so that we can insert
   // drops to their defs.
   BitVector UseEmpty(MRI.getNumVirtRegs());
-  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i)
-    UseEmpty[i] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(i));
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I)
+    UseEmpty[I] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(I));
 
   // Visit each instruction in the function.
   for (MachineBasicBlock &MBB : MF) {
@@ -229,8 +233,8 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
       if (MI.isDebugInstr() || MI.isLabel())
         continue;
 
-      // Replace tee instructions with tee_local. The difference is that tee
-      // instructins have two defs, while tee_local instructions have one def
+      // Replace tee instructions with local.tee. The difference is that tee
+      // instructions have two defs, while local.tee instructions have one def
       // and an index of a local to write to.
       if (WebAssembly::isTee(MI)) {
         assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
@@ -249,7 +253,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
           MFI.stackifyVReg(NewReg);
         }
 
-        // Replace the TEE with a TEE_LOCAL.
+        // Replace the TEE with a LOCAL_TEE.
         unsigned LocalId =
             getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
         unsigned Opc = getTeeLocalOpcode(RC);
@@ -263,7 +267,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      // Insert set_locals for any defs that aren't stackified yet. Currently
+      // Insert local.sets for any defs that aren't stackified yet. Currently
       // we handle at most one def.
       assert(MI.getDesc().getNumDefs() <= 1);
       if (MI.getDesc().getNumDefs() == 1) {
@@ -292,15 +296,16 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
                 .addReg(NewReg);
           }
           MI.getOperand(0).setReg(NewReg);
-          // This register operand is now being used by the inserted drop
-          // instruction, so make it undead.
+          // This register operand of the original instruction is now being used
+          // by the inserted drop or local.set instruction, so make it not dead
+          // yet.
           MI.getOperand(0).setIsDead(false);
           MFI.stackifyVReg(NewReg);
           Changed = true;
         }
       }
 
-      // Insert get_locals for any uses that aren't stackified yet.
+      // Insert local.gets for any uses that aren't stackified yet.
       MachineInstr *InsertPt = &MI;
       for (MachineOperand &MO : reverse(MI.explicit_uses())) {
         if (!MO.isReg())
@@ -314,15 +319,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         if (MO.isDef()) {
           assert(MI.getOpcode() == TargetOpcode::INLINEASM);
           unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
-          MRI.removeRegOperandFromUseList(&MO);
-          MO = MachineOperand::CreateImm(LocalId);
+          // If this register operand is tied to another operand, we can't
+          // change it to an immediate. Untie it first.
+          MI.untieRegOperand(MI.getOperandNo(&MO));
+          MO.ChangeToImmediate(LocalId);
           continue;
         }
 
         // If we see a stackified register, prepare to insert subsequent
-        // get_locals before the start of its tree.
+        // local.gets before the start of its tree.
         if (MFI.isVRegStackified(OldReg)) {
-          InsertPt = FindStartOfTree(MO, MRI, MFI);
+          InsertPt = findStartOfTree(MO, MRI, MFI);
           continue;
         }
 
@@ -330,12 +337,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         // indices as immediates.
         if (MI.getOpcode() == TargetOpcode::INLINEASM) {
           unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
-          MRI.removeRegOperandFromUseList(&MO);
-          MO = MachineOperand::CreateImm(LocalId);
+          // Untie it first if this reg operand is tied to another operand.
+          MI.untieRegOperand(MI.getOperandNo(&MO));
+          MO.ChangeToImmediate(LocalId);
           continue;
         }
 
-        // Insert a get_local.
+        // Insert a local.get.
         unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
         const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
         unsigned NewReg = MRI.createVirtualRegister(RC);
@@ -361,13 +369,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   // Define the locals.
   // TODO: Sort the locals for better compression.
   MFI.setNumLocals(CurLocal - MFI.getParams().size());
-  for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-    auto I = Reg2Local.find(Reg);
-    if (I == Reg2Local.end() || I->second < MFI.getParams().size())
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+    auto RL = Reg2Local.find(Reg);
+    if (RL == Reg2Local.end() || RL->second < MFI.getParams().size())
       continue;
 
-    MFI.setLocal(I->second - MFI.getParams().size(),
+    MFI.setLocal(RL->second - MFI.getParams().size(),
                  typeForRegClass(MRI.getRegClass(Reg)));
     Changed = true;
   }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 566ef68c027d..3856700cca94 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -37,7 +37,10 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
+
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "wasm-fastisel"
 
@@ -114,8 +117,8 @@ private:
   // Utility helper routines
   MVT::SimpleValueType getSimpleType(Type *Ty) {
     EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
-    return VT.isSimple() ? VT.getSimpleVT().SimpleTy :
-                           MVT::INVALID_SIMPLE_VALUE_TYPE;
+    return VT.isSimple() ? VT.getSimpleVT().SimpleTy
+                         : MVT::INVALID_SIMPLE_VALUE_TYPE;
   }
   MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) {
     switch (VT) {
@@ -138,6 +141,11 @@ private:
       if (Subtarget->hasSIMD128())
         return VT;
       break;
+    case MVT::v2i64:
+    case MVT::v2f64:
+      if (Subtarget->hasUnimplementedSIMD128())
+        return VT;
+      break;
     default:
       break;
     }
@@ -153,11 +161,9 @@ private:
                            MVT::SimpleValueType From);
   unsigned signExtendToI32(unsigned Reg, const Value *V,
                            MVT::SimpleValueType From);
-  unsigned zeroExtend(unsigned Reg, const Value *V,
-                      MVT::SimpleValueType From,
+  unsigned zeroExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
                       MVT::SimpleValueType To);
-  unsigned signExtend(unsigned Reg, const Value *V,
-                      MVT::SimpleValueType From,
+  unsigned signExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
                       MVT::SimpleValueType To);
   unsigned getRegForUnsignedValue(const Value *V);
   unsigned getRegForSignedValue(const Value *V);
@@ -374,14 +380,12 @@ void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
   if (Addr.isRegBase()) {
     unsigned Reg = Addr.getReg();
     if (Reg == 0) {
-      Reg = createResultReg(Subtarget->hasAddr64() ?
-                            &WebAssembly::I64RegClass :
-                            &WebAssembly::I32RegClass);
-      unsigned Opc = Subtarget->hasAddr64() ?
-                     WebAssembly::CONST_I64 :
-                     WebAssembly::CONST_I32;
+      Reg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+                                                   : &WebAssembly::I32RegClass);
+      unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
+                                            : WebAssembly::CONST_I32;
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg)
-         .addImm(0);
+          .addImm(0);
       Addr.setReg(Reg);
     }
   }
@@ -419,9 +423,10 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
         return getRegForValue(ICmp->getOperand(0));
       }
 
-  if (BinaryOperator::isNot(V) && V->getType()->isIntegerTy(32)) {
+  Value *NotV;
+  if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
     Not = true;
-    return getRegForValue(BinaryOperator::getNotArgument(V));
+    return getRegForValue(NotV);
   }
 
   Not = false;
@@ -438,13 +443,12 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
 
   switch (From) {
   case MVT::i1:
-    // If the value is naturally an i1, we don't need to mask it.
-    // TODO: Recursively examine selects, phis, and, or, xor, constants.
-    if (From == MVT::i1 && V != nullptr) {
-      if (isa<CmpInst>(V) ||
-          (isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr()))
-        return copyValue(Reg);
-    }
+    // If the value is naturally an i1, we don't need to mask it. We only know
+    // if a value is naturally an i1 if it is definitely lowered by FastISel,
+    // not a DAG ISel fallback.
+    if (V != nullptr && isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr())
+      return copyValue(Reg);
+    break;
   case MVT::i8:
   case MVT::i16:
     break;
@@ -457,13 +461,13 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
   unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
           TII.get(WebAssembly::CONST_I32), Imm)
-    .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
+      .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
 
   unsigned Result = createResultReg(&WebAssembly::I32RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
           TII.get(WebAssembly::AND_I32), Result)
-    .addReg(Reg)
-    .addReg(Imm);
+      .addReg(Reg)
+      .addReg(Imm);
 
   return Result;
 }
@@ -487,19 +491,19 @@ unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
   unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
           TII.get(WebAssembly::CONST_I32), Imm)
-    .addImm(32 - MVT(From).getSizeInBits());
+      .addImm(32 - MVT(From).getSizeInBits());
 
   unsigned Left = createResultReg(&WebAssembly::I32RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
           TII.get(WebAssembly::SHL_I32), Left)
-    .addReg(Reg)
-    .addReg(Imm);
+      .addReg(Reg)
+      .addReg(Imm);
 
   unsigned Right = createResultReg(&WebAssembly::I32RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
           TII.get(WebAssembly::SHR_S_I32), Right)
-    .addReg(Left)
-    .addReg(Imm);
+      .addReg(Left)
+      .addReg(Imm);
 
   return Right;
 }
@@ -562,8 +566,7 @@ unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
 
 unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
                                                      bool IsSigned) {
-  return IsSigned ? getRegForSignedValue(V) :
-                    getRegForUnsignedValue(V);
+  return IsSigned ? getRegForSignedValue(V) : getRegForUnsignedValue(V);
 }
 
 unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
@@ -572,15 +575,15 @@ unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
   unsigned NotReg = createResultReg(&WebAssembly::I32RegClass);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
           TII.get(WebAssembly::EQZ_I32), NotReg)
-    .addReg(Reg);
+      .addReg(Reg);
   return NotReg;
 }
 
 unsigned WebAssemblyFastISel::copyValue(unsigned Reg) {
   unsigned ResultReg = createResultReg(MRI.getRegClass(Reg));
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-          TII.get(WebAssembly::COPY), ResultReg)
-    .addReg(Reg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY),
+          ResultReg)
+      .addReg(Reg);
   return ResultReg;
 }
 
@@ -589,12 +592,11 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
       FuncInfo.StaticAllocaMap.find(AI);
 
   if (SI != FuncInfo.StaticAllocaMap.end()) {
-    unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ?
-                                         &WebAssembly::I64RegClass :
-                                         &WebAssembly::I32RegClass);
-    unsigned Opc = Subtarget->hasAddr64() ?
-                   WebAssembly::COPY_I64 :
-                   WebAssembly::COPY_I32;
+    unsigned ResultReg =
+        createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+                                               : &WebAssembly::I32RegClass);
+    unsigned Opc =
+        Subtarget->hasAddr64() ? WebAssembly::COPY_I64 : WebAssembly::COPY_I32;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
         .addFrameIndex(SI->second);
     return ResultReg;
@@ -605,14 +607,13 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 
 unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
-    unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ?
-                                         &WebAssembly::I64RegClass :
-                                         &WebAssembly::I32RegClass);
-    unsigned Opc = Subtarget->hasAddr64() ?
-                   WebAssembly::CONST_I64 :
-                   WebAssembly::CONST_I32;
+    unsigned ResultReg =
+        createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
+                                               : &WebAssembly::I32RegClass);
+    unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
+                                          : WebAssembly::CONST_I32;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-       .addGlobalAddress(GV);
+        .addGlobalAddress(GV);
     return ResultReg;
   }
 
@@ -651,19 +652,19 @@ bool WebAssemblyFastISel::fastLowerArguments() {
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
-      Opc = WebAssembly::ARGUMENT_I32;
+      Opc = WebAssembly::ARGUMENT_i32;
       RC = &WebAssembly::I32RegClass;
       break;
     case MVT::i64:
-      Opc = WebAssembly::ARGUMENT_I64;
+      Opc = WebAssembly::ARGUMENT_i64;
       RC = &WebAssembly::I64RegClass;
       break;
     case MVT::f32:
-      Opc = WebAssembly::ARGUMENT_F32;
+      Opc = WebAssembly::ARGUMENT_f32;
       RC = &WebAssembly::F32RegClass;
       break;
     case MVT::f64:
-      Opc = WebAssembly::ARGUMENT_F64;
+      Opc = WebAssembly::ARGUMENT_f64;
       RC = &WebAssembly::F64RegClass;
       break;
     case MVT::v16i8:
@@ -678,12 +679,20 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       Opc = WebAssembly::ARGUMENT_v4i32;
       RC = &WebAssembly::V128RegClass;
       break;
+    case MVT::v2i64:
+      Opc = WebAssembly::ARGUMENT_v2i64;
+      RC = &WebAssembly::V128RegClass;
+      break;
     case MVT::v4f32:
       Opc = WebAssembly::ARGUMENT_v4f32;
       RC = &WebAssembly::V128RegClass;
       break;
+    case MVT::v2f64:
+      Opc = WebAssembly::ARGUMENT_v2f64;
+      RC = &WebAssembly::V128RegClass;
+      break;
     case MVT::ExceptRef:
-      Opc = WebAssembly::ARGUMENT_EXCEPT_REF;
+      Opc = WebAssembly::ARGUMENT_ExceptRef;
       RC = &WebAssembly::EXCEPT_REFRegClass;
       break;
     default:
@@ -691,7 +700,7 @@ bool WebAssemblyFastISel::fastLowerArguments() {
     }
     unsigned ResultReg = createResultReg(RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addImm(i);
+        .addImm(i);
     updateValueMap(&Arg, ResultReg);
 
     ++i;
@@ -710,7 +719,8 @@ bool WebAssemblyFastISel::fastLowerArguments() {
   }
 
   if (!F->getReturnType()->isVoidTy()) {
-    MVT::SimpleValueType RetTy = getLegalType(getSimpleType(F->getReturnType()));
+    MVT::SimpleValueType RetTy =
+        getLegalType(getSimpleType(F->getReturnType()));
     if (RetTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
       MFI->clearParamsAndResults();
       return false;
@@ -768,23 +778,33 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
       ResultReg = createResultReg(&WebAssembly::F64RegClass);
       break;
     case MVT::v16i8:
-      Opc =
-          IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::PCALL_INDIRECT_v16i8;
+      Opc = IsDirect ? WebAssembly::CALL_v16i8
+                     : WebAssembly::PCALL_INDIRECT_v16i8;
       ResultReg = createResultReg(&WebAssembly::V128RegClass);
       break;
     case MVT::v8i16:
-      Opc =
-          IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::PCALL_INDIRECT_v8i16;
+      Opc = IsDirect ? WebAssembly::CALL_v8i16
+                     : WebAssembly::PCALL_INDIRECT_v8i16;
       ResultReg = createResultReg(&WebAssembly::V128RegClass);
       break;
     case MVT::v4i32:
-      Opc =
-          IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::PCALL_INDIRECT_v4i32;
+      Opc = IsDirect ? WebAssembly::CALL_v4i32
+                     : WebAssembly::PCALL_INDIRECT_v4i32;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v2i64:
+      Opc = IsDirect ? WebAssembly::CALL_v2i64
+                     : WebAssembly::PCALL_INDIRECT_v2i64;
       ResultReg = createResultReg(&WebAssembly::V128RegClass);
       break;
     case MVT::v4f32:
-      Opc =
-          IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::PCALL_INDIRECT_v4f32;
+      Opc = IsDirect ? WebAssembly::CALL_v4f32
+                     : WebAssembly::PCALL_INDIRECT_v4f32;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v2f64:
+      Opc = IsDirect ? WebAssembly::CALL_v2f64
+                     : WebAssembly::PCALL_INDIRECT_v2f64;
       ResultReg = createResultReg(&WebAssembly::V128RegClass);
       break;
     case MVT::ExceptRef:
@@ -853,11 +873,11 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
   const SelectInst *Select = cast<SelectInst>(I);
 
   bool Not;
-  unsigned CondReg  = getRegForI1Value(Select->getCondition(), Not);
+  unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
   if (CondReg == 0)
     return false;
 
-  unsigned TrueReg  = getRegForValue(Select->getTrueValue());
+  unsigned TrueReg = getRegForValue(Select->getTrueValue());
   if (TrueReg == 0)
     return false;
 
@@ -900,9 +920,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
 
   unsigned ResultReg = createResultReg(RC);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-    .addReg(TrueReg)
-    .addReg(FalseReg)
-    .addReg(CondReg);
+      .addReg(TrueReg)
+      .addReg(FalseReg)
+      .addReg(CondReg);
 
   updateValueMap(Select, ResultReg);
   return true;
@@ -1002,7 +1022,8 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
     Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
     isSigned = true;
     break;
-  default: return false;
+  default:
+    return false;
   }
 
   unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), isSigned);
@@ -1210,7 +1231,8 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
   case MVT::f64:
     Opc = WebAssembly::STORE_F64;
     break;
-  default: return false;
+  default:
+    return false;
   }
 
   materializeLoadStoreOperands(Addr);
@@ -1275,8 +1297,10 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
 
   unsigned Opc;
   switch (getSimpleType(RV->getType())) {
-  case MVT::i1: case MVT::i8:
-  case MVT::i16: case MVT::i32:
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
     Opc = WebAssembly::RETURN_I32;
     break;
   case MVT::i64:
@@ -1297,13 +1321,20 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   case MVT::v4i32:
     Opc = WebAssembly::RETURN_v4i32;
     break;
+  case MVT::v2i64:
+    Opc = WebAssembly::RETURN_v2i64;
+    break;
   case MVT::v4f32:
     Opc = WebAssembly::RETURN_v4f32;
     break;
+  case MVT::v2f64:
+    Opc = WebAssembly::RETURN_v2f64;
+    break;
   case MVT::ExceptRef:
     Opc = WebAssembly::RETURN_EXCEPT_REF;
     break;
-  default: return false;
+  default:
+    return false;
   }
 
   unsigned Reg;
@@ -1333,19 +1364,32 @@ bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
     if (selectCall(I))
       return true;
     break;
-  case Instruction::Select:      return selectSelect(I);
-  case Instruction::Trunc:       return selectTrunc(I);
-  case Instruction::ZExt:        return selectZExt(I);
-  case Instruction::SExt:        return selectSExt(I);
-  case Instruction::ICmp:        return selectICmp(I);
-  case Instruction::FCmp:        return selectFCmp(I);
-  case Instruction::BitCast:     return selectBitCast(I);
-  case Instruction::Load:        return selectLoad(I);
-  case Instruction::Store:       return selectStore(I);
-  case Instruction::Br:          return selectBr(I);
-  case Instruction::Ret:         return selectRet(I);
-  case Instruction::Unreachable: return selectUnreachable(I);
-  default: break;
+  case Instruction::Select:
+    return selectSelect(I);
+  case Instruction::Trunc:
+    return selectTrunc(I);
+  case Instruction::ZExt:
+    return selectZExt(I);
+  case Instruction::SExt:
+    return selectSExt(I);
+  case Instruction::ICmp:
+    return selectICmp(I);
+  case Instruction::FCmp:
+    return selectFCmp(I);
+  case Instruction::BitCast:
+    return selectBitCast(I);
+  case Instruction::Load:
+    return selectLoad(I);
+  case Instruction::Store:
+    return selectStore(I);
+  case Instruction::Br:
+    return selectBr(I);
+  case Instruction::Ret:
+    return selectRet(I);
+  case Instruction::Unreachable:
+    return selectUnreachable(I);
+  default:
+    break;
   }
 
   // Fall back to target-independent instruction selection.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index d5e47ee82513..1a416520f97d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -36,10 +36,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-fix-function-bitcasts"
 
-static cl::opt<bool> TemporaryWorkarounds(
-  "wasm-temporary-workarounds",
-  cl::desc("Apply certain temporary workarounds"),
-  cl::init(true), cl::Hidden);
+static cl::opt<bool>
+    TemporaryWorkarounds("wasm-temporary-workarounds",
+                         cl::desc("Apply certain temporary workarounds"),
+                         cl::init(true), cl::Hidden);
 
 namespace {
 class FixFunctionBitcasts final : public ModulePass {
@@ -103,14 +103,29 @@ static void FindUses(Value *V, Function &F,
 //  - Return value is not needed: drop it
 //  - Return value needed but not present: supply an undef
 //
-// For now, return nullptr without creating a wrapper if the wrapper cannot
-// be generated due to incompatible types.
+// If the all the argument types of trivially castable to one another (i.e.
+// I32 vs pointer type) then we don't create a wrapper at all (return nullptr
+// instead).
+//
+// If there is a type mismatch that we know would result in an invalid wasm
+// module then generate wrapper that contains unreachable (i.e. abort at
+// runtime).  Such programs are deep into undefined behaviour territory,
+// but we choose to fail at runtime rather than generate and invalid module
+// or fail at compiler time.  The reason we delay the error is that we want
+// to support the CMake which expects to be able to compile and link programs
+// that refer to functions with entirely incorrect signatures (this is how
+// CMake detects the existence of a function in a toolchain).
+//
+// For bitcasts that involve struct types we don't know at this stage if they
+// would be equivalent at the wasm level and so we can't know if we need to
+// generate a wrapper.
 static Function *CreateWrapper(Function *F, FunctionType *Ty) {
   Module *M = F->getParent();
 
-  Function *Wrapper =
-      Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
+  Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
+                                       F->getName() + "_bitcast", M);
   BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+  const DataLayout &DL = BB->getModule()->getDataLayout();
 
   // Determine what arguments to pass.
   SmallVector<Value *, 4> Args;
@@ -118,38 +133,103 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
   Function::arg_iterator AE = Wrapper->arg_end();
   FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
   FunctionType::param_iterator PE = F->getFunctionType()->param_end();
+  bool TypeMismatch = false;
+  bool WrapperNeeded = false;
+
+  Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
+  Type *RtnType = Ty->getReturnType();
+
+  if ((F->getFunctionType()->getNumParams() != Ty->getNumParams()) ||
+      (F->getFunctionType()->isVarArg() != Ty->isVarArg()) ||
+      (ExpectedRtnType != RtnType))
+    WrapperNeeded = true;
+
   for (; AI != AE && PI != PE; ++AI, ++PI) {
-    if (AI->getType() != *PI) {
-      Wrapper->eraseFromParent();
-      return nullptr;
+    Type *ArgType = AI->getType();
+    Type *ParamType = *PI;
+
+    if (ArgType == ParamType) {
+      Args.push_back(&*AI);
+    } else {
+      if (CastInst::isBitOrNoopPointerCastable(ArgType, ParamType, DL)) {
+        Instruction *PtrCast =
+            CastInst::CreateBitOrPointerCast(AI, ParamType, "cast");
+        BB->getInstList().push_back(PtrCast);
+        Args.push_back(PtrCast);
+      } else if (ArgType->isStructTy() || ParamType->isStructTy()) {
+        LLVM_DEBUG(dbgs() << "CreateWrapper: struct param type in bitcast: "
+                          << F->getName() << "\n");
+        WrapperNeeded = false;
+      } else {
+        LLVM_DEBUG(dbgs() << "CreateWrapper: arg type mismatch calling: "
+                          << F->getName() << "\n");
+        LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: "
+                          << *ParamType << " Got: " << *ArgType << "\n");
+        TypeMismatch = true;
+        break;
+      }
     }
-    Args.push_back(&*AI);
   }
-  for (; PI != PE; ++PI)
-    Args.push_back(UndefValue::get(*PI));
-  if (F->isVarArg())
-    for (; AI != AE; ++AI)
-      Args.push_back(&*AI);
 
-  CallInst *Call = CallInst::Create(F, Args, "", BB);
-
-  // Determine what value to return.
-  if (Ty->getReturnType()->isVoidTy())
-    ReturnInst::Create(M->getContext(), BB);
-  else if (F->getFunctionType()->getReturnType()->isVoidTy())
-    ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
-                       BB);
-  else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
-    ReturnInst::Create(M->getContext(), Call, BB);
-  else {
+  if (WrapperNeeded && !TypeMismatch) {
+    for (; PI != PE; ++PI)
+      Args.push_back(UndefValue::get(*PI));
+    if (F->isVarArg())
+      for (; AI != AE; ++AI)
+        Args.push_back(&*AI);
+
+    CallInst *Call = CallInst::Create(F, Args, "", BB);
+
+    Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
+    Type *RtnType = Ty->getReturnType();
+    // Determine what value to return.
+    if (RtnType->isVoidTy()) {
+      ReturnInst::Create(M->getContext(), BB);
+    } else if (ExpectedRtnType->isVoidTy()) {
+      LLVM_DEBUG(dbgs() << "Creating dummy return: " << *RtnType << "\n");
+      ReturnInst::Create(M->getContext(), UndefValue::get(RtnType), BB);
+    } else if (RtnType == ExpectedRtnType) {
+      ReturnInst::Create(M->getContext(), Call, BB);
+    } else if (CastInst::isBitOrNoopPointerCastable(ExpectedRtnType, RtnType,
+                                                    DL)) {
+      Instruction *Cast =
+          CastInst::CreateBitOrPointerCast(Call, RtnType, "cast");
+      BB->getInstList().push_back(Cast);
+      ReturnInst::Create(M->getContext(), Cast, BB);
+    } else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) {
+      LLVM_DEBUG(dbgs() << "CreateWrapper: struct return type in bitcast: "
+                        << F->getName() << "\n");
+      WrapperNeeded = false;
+    } else {
+      LLVM_DEBUG(dbgs() << "CreateWrapper: return type mismatch calling: "
+                        << F->getName() << "\n");
+      LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType
+                        << " Got: " << *RtnType << "\n");
+      TypeMismatch = true;
+    }
+  }
+
+  if (TypeMismatch) {
+    // Create a new wrapper that simply contains `unreachable`.
+    Wrapper->eraseFromParent();
+    Wrapper = Function::Create(Ty, Function::PrivateLinkage,
+                               F->getName() + "_bitcast_invalid", M);
+    BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+    new UnreachableInst(M->getContext(), BB);
+    Wrapper->setName(F->getName() + "_bitcast_invalid");
+  } else if (!WrapperNeeded) {
+    LLVM_DEBUG(dbgs() << "CreateWrapper: no wrapper needed: " << F->getName()
+                      << "\n");
     Wrapper->eraseFromParent();
     return nullptr;
   }
-
+  LLVM_DEBUG(dbgs() << "CreateWrapper: " << F->getName() << "\n");
   return Wrapper;
 }
 
 bool FixFunctionBitcasts::runOnModule(Module &M) {
+  LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
+
   Function *Main = nullptr;
   CallInst *CallMain = nullptr;
   SmallVector<std::pair<Use *, Function *>, 0> Uses;
@@ -166,19 +246,17 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
     if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
       Main = &F;
       LLVMContext &C = M.getContext();
-      Type *MainArgTys[] = {
-        PointerType::get(Type::getInt8PtrTy(C), 0),
-        Type::getInt32Ty(C)
-      };
+      Type *MainArgTys[] = {Type::getInt32Ty(C),
+                            PointerType::get(Type::getInt8PtrTy(C), 0)};
       FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
                                                /*isVarArg=*/false);
       if (F.getFunctionType() != MainTy) {
-        Value *Args[] = {
-          UndefValue::get(MainArgTys[0]),
-          UndefValue::get(MainArgTys[1])
-        };
-        Value *Casted = ConstantExpr::getBitCast(Main,
-                                                 PointerType::get(MainTy, 0));
+        LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
+                          << *F.getFunctionType() << "\n");
+        Value *Args[] = {UndefValue::get(MainArgTys[0]),
+                         UndefValue::get(MainArgTys[1])};
+        Value *Casted =
+            ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
         CallMain = CallInst::Create(Casted, Args, "call_main");
         Use *UseMain = &CallMain->getOperandUse(2);
         Uses.push_back(std::make_pair(UseMain, &F));
@@ -200,11 +278,6 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
     if (!Ty)
       continue;
 
-    // Bitcasted vararg functions occur in Emscripten's implementation of
-    // EM_ASM, so suppress wrappers for them for now.
-    if (TemporaryWorkarounds && (Ty->isVarArg() || F->isVarArg()))
-      continue;
-
     auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
     if (Pair.second)
       Pair.first->second = CreateWrapper(F, Ty);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index bea027be7711..108f2879a071 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements a pass that transforms irreducible control flow
-/// into reducible control flow. Irreducible control flow means multiple-entry
+/// This file implements a pass that transforms irreducible control flow into
+/// reducible control flow. Irreducible control flow means multiple-entry
 /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
 /// due to being unnatural.
 ///
@@ -17,12 +17,36 @@
 /// it linearizes control flow, turning diamonds into two triangles, which is
 /// both unnecessary and undesirable for WebAssembly.
 ///
-/// TODO: The transformation implemented here handles all irreducible control
-/// flow, without exponential code-size expansion, though it does so by creating
-/// inefficient code in many cases. Ideally, we should add other
-/// transformations, including code-duplicating cases, which can be more
-/// efficient in common cases, and they can fall back to this conservative
-/// implementation as needed.
+/// The big picture: Ignoring natural loops (seeing them monolithically), we
+/// find all the blocks which can return to themselves ("loopers"). Loopers
+/// reachable from the non-loopers are loop entries: if there are 2 or more,
+/// then we have irreducible control flow. We fix that as follows: a new block
+/// is created that can dispatch to each of the loop entries, based on the
+/// value of a label "helper" variable, and we replace direct branches to the
+/// entries with assignments to the label variable and a branch to the dispatch
+/// block. Then the dispatch block is the single entry in a new natural loop.
+///
+/// This is similar to what the Relooper [1] does, both identify looping code
+/// that requires multiple entries, and resolve it in a similar way. In
+/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
+/// also that like the Relooper, we implement a "minimal" intervention: we only
+/// use the "label" helper for the blocks we absolutely must and no others. We
+/// also prioritize code size and do not perform node splitting (i.e. we don't
+/// duplicate code in order to resolve irreducibility).
+///
+/// The difference between this code and the Relooper is that the Relooper also
+/// generates ifs and loops and works in a recursive manner, knowing at each
+/// point what the entries are, and recursively breaks down the problem. Here
+/// we just want to resolve irreducible control flow, and we also want to use
+/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
+/// identify natural loops, etc., and we start with the whole CFG and must
+/// identify both the looping code and its entries.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object oriented
+/// programming systems languages and applications companion (SPLASH '11). ACM,
+/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
 ///
 //===----------------------------------------------------------------------===//
 
@@ -46,141 +70,203 @@ using namespace llvm;
 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
 
 namespace {
-class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
-  StringRef getPassName() const override {
-    return "WebAssembly Fix Irreducible Control Flow";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addPreserved<MachineDominatorTree>();
-    AU.addRequired<MachineLoopInfo>();
-    AU.addPreserved<MachineLoopInfo>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop);
-
-public:
-  static char ID; // Pass identification, replacement for typeid
-  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
-};
-} // end anonymous namespace
-
-char WebAssemblyFixIrreducibleControlFlow::ID = 0;
-INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
-                "Removes irreducible control flow", false, false)
-
-FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
-  return new WebAssemblyFixIrreducibleControlFlow();
-}
-
-namespace {
-
-/// A utility for walking the blocks of a loop, handling a nested inner
-/// loop as a monolithic conceptual block.
-class MetaBlock {
-  MachineBasicBlock *Block;
-  SmallVector<MachineBasicBlock *, 2> Preds;
-  SmallVector<MachineBasicBlock *, 2> Succs;
 
+class LoopFixer {
 public:
-  explicit MetaBlock(MachineBasicBlock *MBB)
-      : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()),
-        Succs(MBB->succ_begin(), MBB->succ_end()) {}
-
-  explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) {
-    Loop->getExitBlocks(Succs);
-    for (MachineBasicBlock *Pred : Block->predecessors())
-      if (!Loop->contains(Pred))
-        Preds.push_back(Pred);
+  LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
+      : MF(MF), MLI(MLI), Loop(Loop) {}
+
+  // Run the fixer on the given inputs. Returns whether changes were made.
+  bool run();
+
+private:
+  MachineFunction &MF;
+  MachineLoopInfo &MLI;
+  MachineLoop *Loop;
+
+  MachineBasicBlock *Header;
+  SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
+
+  using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+  DenseMap<MachineBasicBlock *, BlockSet> Reachable;
+
+  // The worklist contains pairs of recent additions, (a, b), where we just
+  // added a link a => b.
+  using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
+  SmallVector<BlockPair, 4> WorkList;
+
+  // Get a canonical block to represent a block or a loop: the block, or if in
+  // an inner loop, the loop header, of it in an outer loop scope, we can
+  // ignore it. We need to call this on all blocks we work on.
+  MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
+    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+    if (InnerLoop == Loop) {
+      return MBB;
+    } else {
+      // This is either in an outer or an inner loop, and not in ours.
+      if (!LoopBlocks.count(MBB)) {
+        // It's in outer code, ignore it.
+        return nullptr;
+      }
+      assert(InnerLoop);
+      // It's in an inner loop, canonicalize it to the header of that loop.
+      return InnerLoop->getHeader();
+    }
   }
 
-  MachineBasicBlock *getBlock() const { return Block; }
-
-  const SmallVectorImpl<MachineBasicBlock *> &predecessors() const {
-    return Preds;
-  }
-  const SmallVectorImpl<MachineBasicBlock *> &successors() const {
-    return Succs;
+  // For a successor we can additionally ignore it if it's a branch back to a
+  // natural loop top, as when we are in the scope of a loop, we just care
+  // about internal irreducibility, and can ignore the loop we are in. We need
+  // to call this on all blocks in a context where they are a successor.
+  MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
+    if (Loop && MBB == Loop->getHeader()) {
+      // Ignore branches going to the loop's natural header.
+      return nullptr;
+    }
+    return canonicalize(MBB);
   }
 
-  bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; }
-  bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; }
+  // Potentially insert a new reachable edge, and if so, note it as further
+  // work.
+  void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+    assert(MBB == canonicalize(MBB));
+    assert(Succ);
+    // Succ may not be interesting as a sucessor.
+    Succ = canonicalizeSuccessor(Succ);
+    if (!Succ)
+      return;
+    if (Reachable[MBB].insert(Succ).second) {
+      // For there to be further work, it means that we have
+      //   X => MBB => Succ
+      // for some other X, and in that case X => Succ would be a new edge for
+      // us to discover later. However, if we don't care about MBB as a
+      // successor, then we don't care about that anyhow.
+      if (canonicalizeSuccessor(MBB)) {
+        WorkList.emplace_back(MBB, Succ);
+      }
+    }
+  }
 };
 
-class SuccessorList final : public MetaBlock {
-  size_t Index;
-  size_t Num;
+bool LoopFixer::run() {
+  Header = Loop ? Loop->getHeader() : &*MF.begin();
 
-public:
-  explicit SuccessorList(MachineBasicBlock *MBB)
-      : MetaBlock(MBB), Index(0), Num(successors().size()) {}
+  // Identify all the blocks in this loop scope.
+  if (Loop) {
+    for (auto *MBB : Loop->getBlocks()) {
+      LoopBlocks.insert(MBB);
+    }
+  } else {
+    for (auto &MBB : MF) {
+      LoopBlocks.insert(&MBB);
+    }
+  }
 
-  explicit SuccessorList(MachineLoop *Loop)
-      : MetaBlock(Loop), Index(0), Num(successors().size()) {}
+  // Compute which (canonicalized) blocks each block can reach.
 
-  bool HasNext() const { return Index != Num; }
+  // Add all the initial work.
+  for (auto *MBB : LoopBlocks) {
+    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
 
-  MachineBasicBlock *Next() {
-    assert(HasNext());
-    return successors()[Index++];
+    if (InnerLoop == Loop) {
+      for (auto *Succ : MBB->successors()) {
+        maybeInsert(MBB, Succ);
+      }
+    } else {
+      // It can't be in an outer loop - we loop on LoopBlocks - and so it must
+      // be an inner loop.
+      assert(InnerLoop);
+      // Check if we are the canonical block for this loop.
+      if (canonicalize(MBB) != MBB) {
+        continue;
+      }
+      // The successors are those of the loop.
+      SmallVector<MachineBasicBlock *, 2> ExitBlocks;
+      InnerLoop->getExitBlocks(ExitBlocks);
+      for (auto *Succ : ExitBlocks) {
+        maybeInsert(MBB, Succ);
+      }
+    }
   }
-};
 
-} // end anonymous namespace
+  // Do work until we are all done.
+  while (!WorkList.empty()) {
+    MachineBasicBlock *MBB;
+    MachineBasicBlock *Succ;
+    std::tie(MBB, Succ) = WorkList.pop_back_val();
+    // The worklist item is an edge we just added, so it must have valid blocks
+    // (and not something canonicalized to nullptr).
+    assert(MBB);
+    assert(Succ);
+    // The successor in that pair must also be a valid successor.
+    assert(MBB == canonicalizeSuccessor(MBB));
+    // We recently added MBB => Succ, and that means we may have enabled
+    // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
+    // is correct for both a block and a block representing a loop, as the loop
+    // is natural and so the predecessors are all predecessors of the loop
+    // header, which is the block we have here.
+    for (auto *Pred : MBB->predecessors()) {
+      // Canonicalize, make sure it's relevant, and check it's not the same
+      // block (an update to the block itself doesn't help compute that same
+      // block).
+      Pred = canonicalize(Pred);
+      if (Pred && Pred != MBB) {
+        maybeInsert(Pred, Succ);
+      }
+    }
+  }
 
-bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
-                                                     MachineLoopInfo &MLI,
-                                                     MachineLoop *Loop) {
-  MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
-  SetVector<MachineBasicBlock *> RewriteSuccs;
-
-  // DFS through Loop's body, looking for irreducible control flow. Loop is
-  // natural, and we stay in its body, and we treat any nested loops
-  // monolithically, so any cycles we encounter indicate irreducibility.
-  SmallPtrSet<MachineBasicBlock *, 8> OnStack;
-  SmallPtrSet<MachineBasicBlock *, 8> Visited;
-  SmallVector<SuccessorList, 4> LoopWorklist;
-  LoopWorklist.push_back(SuccessorList(Header));
-  OnStack.insert(Header);
-  Visited.insert(Header);
-  while (!LoopWorklist.empty()) {
-    SuccessorList &Top = LoopWorklist.back();
-    if (Top.HasNext()) {
-      MachineBasicBlock *Next = Top.Next();
-      if (Next == Header || (Loop && !Loop->contains(Next)))
-        continue;
-      if (LLVM_LIKELY(OnStack.insert(Next).second)) {
-        if (!Visited.insert(Next).second) {
-          OnStack.erase(Next);
-          continue;
-        }
-        MachineLoop *InnerLoop = MLI.getLoopFor(Next);
-        if (InnerLoop != Loop)
-          LoopWorklist.push_back(SuccessorList(InnerLoop));
-        else
-          LoopWorklist.push_back(SuccessorList(Next));
-      } else {
-        RewriteSuccs.insert(Top.getBlock());
+  // It's now trivial to identify the loopers.
+  SmallPtrSet<MachineBasicBlock *, 4> Loopers;
+  for (auto MBB : LoopBlocks) {
+    if (Reachable[MBB].count(MBB)) {
+      Loopers.insert(MBB);
+    }
+  }
+  // The header cannot be a looper. At the toplevel, LLVM does not allow the
+  // entry to be in a loop, and in a natural loop we should ignore the header.
+  assert(Loopers.count(Header) == 0);
+
+  // Find the entries, loopers reachable from non-loopers.
+  SmallPtrSet<MachineBasicBlock *, 4> Entries;
+  SmallVector<MachineBasicBlock *, 4> SortedEntries;
+  for (auto *Looper : Loopers) {
+    for (auto *Pred : Looper->predecessors()) {
+      Pred = canonicalize(Pred);
+      if (Pred && !Loopers.count(Pred)) {
+        Entries.insert(Looper);
+        SortedEntries.push_back(Looper);
+        break;
       }
-      continue;
     }
-    OnStack.erase(Top.getBlock());
-    LoopWorklist.pop_back();
   }
 
-  // Most likely, we didn't find any irreducible control flow.
-  if (LLVM_LIKELY(RewriteSuccs.empty()))
+  // Check if we found irreducible control flow.
+  if (LLVM_LIKELY(Entries.size() <= 1))
     return false;
 
-  LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n");
+  // Sort the entries to ensure a deterministic build.
+  llvm::sort(SortedEntries,
+             [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+               auto ANum = A->getNumber();
+               auto BNum = B->getNumber();
+               return ANum < BNum;
+             });
+
+#ifndef NDEBUG
+  for (auto Block : SortedEntries)
+    assert(Block->getNumber() != -1);
+  if (SortedEntries.size() > 1) {
+    for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
+         I != E; ++I) {
+      auto ANum = (*I)->getNumber();
+      auto BNum = (*(std::next(I)))->getNumber();
+      assert(ANum != BNum);
+    }
+  }
+#endif
 
-  // Ok. We have irreducible control flow! Create a dispatch block which will
-  // contains a jump table to any block in the problematic set of blocks.
+  // Create a dispatch block which will contain a jump table to the entries.
   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
   MF.insert(MF.end(), Dispatch);
   MLI.changeLoopFor(Dispatch, Loop);
@@ -196,43 +282,43 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
   unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
   MIB.addReg(Reg);
 
-  // Collect all the blocks which need to have their successors rewritten,
-  // add the successors to the jump table, and remember their index.
+  // Compute the indices in the superheader, one for each bad block, and
+  // add them as successors.
   DenseMap<MachineBasicBlock *, unsigned> Indices;
-  SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
-                                                   RewriteSuccs.end());
-  while (!SuccWorklist.empty()) {
-    MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
+  for (auto *MBB : SortedEntries) {
     auto Pair = Indices.insert(std::make_pair(MBB, 0));
-    if (!Pair.second)
+    if (!Pair.second) {
       continue;
+    }
 
     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
-    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index
-                      << "\n");
-
     Pair.first->second = Index;
-    for (auto Pred : MBB->predecessors())
-      RewriteSuccs.insert(Pred);
 
     MIB.addMBB(MBB);
     Dispatch->addSuccessor(MBB);
+  }
 
-    MetaBlock Meta(MBB);
-    for (auto *Succ : Meta.successors())
-      if (Succ != Header && (!Loop || Loop->contains(Succ)))
-        SuccWorklist.push_back(Succ);
+  // Rewrite the problematic successors for every block that wants to reach the
+  // bad blocks. For simplicity, we just introduce a new block for every edge
+  // we need to rewrite. (Fancier things are possible.)
+
+  SmallVector<MachineBasicBlock *, 4> AllPreds;
+  for (auto *MBB : SortedEntries) {
+    for (auto *Pred : MBB->predecessors()) {
+      if (Pred != Dispatch) {
+        AllPreds.push_back(Pred);
+      }
+    }
   }
 
-  // Rewrite the problematic successors for every block in RewriteSuccs.
-  // For simplicity, we just introduce a new block for every edge we need to
-  // rewrite. Fancier things are possible.
-  for (MachineBasicBlock *MBB : RewriteSuccs) {
+  for (MachineBasicBlock *MBB : AllPreds) {
     DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
     for (auto *Succ : MBB->successors()) {
-      if (!Indices.count(Succ))
+      if (!Entries.count(Succ)) {
         continue;
+      }
 
+      // This is a successor we need to rewrite.
       MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
       MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
                                              : MF.end(),
@@ -266,6 +352,55 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
   return true;
 }
 
+class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly Fix Irreducible Control Flow";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
+    // Visit the function body, which is identified as a null loop.
+    if (LoopFixer(MF, MLI, nullptr).run()) {
+      return true;
+    }
+
+    // Visit all the loops.
+    SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
+    while (!Worklist.empty()) {
+      MachineLoop *Loop = Worklist.pop_back_val();
+      Worklist.append(Loop->begin(), Loop->end());
+      if (LoopFixer(MF, MLI, Loop).run()) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyFixIrreducibleControlFlow::ID = 0;
+INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
+                "Removes irreducible control flow", false, false)
+
+FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
+  return new WebAssemblyFixIrreducibleControlFlow();
+}
+
 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
     MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
@@ -275,24 +410,19 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
   bool Changed = false;
   auto &MLI = getAnalysis<MachineLoopInfo>();
 
-  // Visit the function body, which is identified as a null loop.
-  Changed |= VisitLoop(MF, MLI, nullptr);
-
-  // Visit all the loops.
-  SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
-  while (!Worklist.empty()) {
-    MachineLoop *CurLoop = Worklist.pop_back_val();
-    Worklist.append(CurLoop->begin(), CurLoop->end());
-    Changed |= VisitLoop(MF, MLI, CurLoop);
-  }
-
-  // If we made any changes, completely recompute everything.
-  if (LLVM_UNLIKELY(Changed)) {
-    LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n");
+  // When we modify something, bail out and recompute MLI, then start again, as
+  // we create a new natural loop when we resolve irreducible control flow, and
+  // other loops may become nested in it, etc. In practice this is not an issue
+  // because irreducible control flow is rare, only very few cycles are needed
+  // here.
+  while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
+    // We rewrote part of the function; recompute MLI and start again.
+    LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
     MF.getRegInfo().invalidateLiveness();
     MF.RenumberBlocks();
     getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
     MLI.runOnMachineFunction(MF);
+    Changed = true;
   }
 
   return Changed;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 052c94e9d6a9..2d5aff28d27b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
@@ -42,8 +43,7 @@ using namespace llvm;
 /// require stricter alignment than the stack pointer itself.  Because we need
 /// to shift the stack pointer by some unknown amount to force the alignment,
 /// we need to record the value of the stack pointer on entry to the function.
-bool WebAssemblyFrameLowering::hasBP(
-    const MachineFunction &MF) const {
+bool WebAssemblyFrameLowering::hasBP(const MachineFunction &MF) const {
   const auto *RegInfo =
       MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
   return RegInfo->needsStackRealignment(MF);
@@ -78,36 +78,60 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
   return !MF.getFrameInfo().hasVarSizedObjects();
 }
 
+// Returns true if this function needs a local user-space stack pointer for its
+// local frame (not for exception handling).
+bool WebAssemblyFrameLowering::needsSPForLocalFrame(
+    const MachineFunction &MF) const {
+  auto &MFI = MF.getFrameInfo();
+  return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+}
+
+// In function with EH pads, we need to make a copy of the value of
+// __stack_pointer global in SP32 register, in order to use it when restoring
+// __stack_pointer after an exception is caught.
+bool WebAssemblyFrameLowering::needsPrologForEH(
+    const MachineFunction &MF) const {
+  auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType();
+  return EHType == ExceptionHandling::Wasm &&
+         MF.getFunction().hasPersonalityFn() && MF.getFrameInfo().hasCalls();
+}
 
 /// Returns true if this function needs a local user-space stack pointer.
 /// Unlike a machine stack pointer, the wasm user stack pointer is a global
 /// variable, so it is loaded into a register in the prolog.
-bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF,
-                                       const MachineFrameInfo &MFI) const {
-  return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF);
+bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const {
+  return needsSPForLocalFrame(MF) || needsPrologForEH(MF);
 }
 
 /// Returns true if the local user-space stack pointer needs to be written back
-/// to memory by this function (this is not meaningful if needsSP is false). If
-/// false, the stack red zone can be used and only a local SP is needed.
+/// to __stack_pointer global by this function (this is not meaningful if
+/// needsSP is false). If false, the stack red zone can be used and only a local
+/// SP is needed.
 bool WebAssemblyFrameLowering::needsSPWriteback(
-    const MachineFunction &MF, const MachineFrameInfo &MFI) const {
-  assert(needsSP(MF, MFI));
-  return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() ||
-         MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
+    const MachineFunction &MF) const {
+  auto &MFI = MF.getFrameInfo();
+  assert(needsSP(MF));
+  // When we don't need a local stack pointer for its local frame but only to
+  // support EH, we don't need to write SP back in the epilog, because we don't
+  // bump down the stack pointer in the prolog. We need to write SP back in the
+  // epilog only if
+  // 1. We need SP not only for EH support but also because we actually use
+  // stack or we have a frame address taken.
+  // 2. We cannot use the red zone.
+  bool CanUseRedZone = MFI.getStackSize() <= RedZoneSize && !MFI.hasCalls() &&
+                       !MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
+  return needsSPForLocalFrame(MF) && !CanUseRedZone;
 }
 
-static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
-                            MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator &InsertAddr,
-                            MachineBasicBlock::iterator &InsertStore,
-                            const DebugLoc &DL) {
+void WebAssemblyFrameLowering::writeSPToGlobal(
+    unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32))
-      .addExternalSymbol(SPSymbol)
+  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
+      .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL)
       .addReg(SrcReg);
 }
 
@@ -119,9 +143,9 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
          "Call frame pseudos should only be used for dynamic stack adjustment");
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
-      needsSPWriteback(MF, MF.getFrameInfo())) {
+      needsSPWriteback(MF)) {
     DebugLoc DL = I->getDebugLoc();
-    writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
+    writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL);
   }
   return MBB.erase(I);
 }
@@ -133,7 +157,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   assert(MFI.getCalleeSavedInfo().empty() &&
          "WebAssembly should not have callee-saved registers");
 
-  if (!needsSP(MF, MFI)) return;
+  if (!needsSP(MF))
+    return;
   uint64_t StackSize = MFI.getStackSize();
 
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -152,8 +177,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
 
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg)
-      .addExternalSymbol(SPSymbol);
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
+      .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL);
 
   bool HasBP = hasBP(MF);
   if (HasBP) {
@@ -177,7 +202,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
     unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC);
     unsigned Alignment = MFI.getMaxAlignment();
     assert((1u << countTrailingZeros(Alignment)) == Alignment &&
-      "Alignment must be a power of 2");
+           "Alignment must be a power of 2");
     BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
         .addImm((int)~(Alignment - 1));
     BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
@@ -189,20 +214,19 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
     // Unlike most conventional targets (where FP points to the saved FP),
     // FP points to the bottom of the fixed-size locals, so we can use positive
     // offsets in load/store instructions.
-    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY),
-            WebAssembly::FP32)
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32)
         .addReg(WebAssembly::SP32);
   }
-  if (StackSize && needsSPWriteback(MF, MFI)) {
-    writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
+  if (StackSize && needsSPWriteback(MF)) {
+    writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL);
   }
 }
 
 void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
-  auto &MFI = MF.getFrameInfo();
-  uint64_t StackSize = MFI.getStackSize();
-  if (!needsSP(MF, MFI) || !needsSPWriteback(MF, MFI)) return;
+  uint64_t StackSize = MF.getFrameInfo().getStackSize();
+  if (!needsSP(MF) || !needsSPWriteback(MF))
+    return;
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   auto &MRI = MF.getRegInfo();
   auto InsertPt = MBB.getFirstTerminator();
@@ -214,7 +238,6 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
   // Restore the stack pointer. If we had fixed-size locals, add the offset
   // subtracted in the prolog.
   unsigned SPReg = 0;
-  MachineBasicBlock::iterator InsertAddr = InsertPt;
   if (hasBP(MF)) {
     auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
     SPReg = FI->getBasePointerVreg();
@@ -222,9 +245,8 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
     const TargetRegisterClass *PtrRC =
         MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
     unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
-    InsertAddr =
-        BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
-            .addImm(StackSize);
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+        .addImm(StackSize);
     // In the epilog we don't need to write the result back to the SP32 physreg
     // because it won't be used again. We can use a stackified register instead.
     SPReg = MRI.createVirtualRegister(PtrRC);
@@ -235,5 +257,5 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
     SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
   }
 
-  writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL);
+  writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index fe23e418a3f1..c6fa8261b03f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -22,9 +22,10 @@ namespace llvm {
 class MachineFrameInfo;
 
 class WebAssemblyFrameLowering final : public TargetFrameLowering {
- public:
+public:
   /// Size of the red zone for the user stack (leaf functions can use this much
-  /// space below the stack pointer without writing it back to memory).
+  /// space below the stack pointer without writing it back to __stack_pointer
+  /// global).
   // TODO: (ABI) Revisit and decide how large it should be.
   static const size_t RedZoneSize = 128;
 
@@ -34,9 +35,9 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
                             /*TransientStackAlignment=*/16,
                             /*StackRealignable=*/true) {}
 
-  MachineBasicBlock::iterator eliminateCallFramePseudoInstr(
-      MachineFunction &MF, MachineBasicBlock &MBB,
-      MachineBasicBlock::iterator I) const override;
+  MachineBasicBlock::iterator
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
 
   /// These methods insert prolog and epilog code into the function.
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
@@ -45,13 +46,21 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
- private:
+  bool needsPrologForEH(const MachineFunction &MF) const;
+
+  /// Write SP back to __stack_pointer global.
+  void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF,
+                       MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator &InsertStore,
+                       const DebugLoc &DL) const;
+
+private:
   bool hasBP(const MachineFunction &MF) const;
-  bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const;
-  bool needsSPWriteback(const MachineFunction &MF,
-                        const MachineFrameInfo &MFI) const;
+  bool needsSPForLocalFrame(const MachineFunction &MF) const;
+  bool needsSP(const MachineFunction &MF) const;
+  bool needsSPWriteback(const MachineFunction &MF) const;
 };
 
-}  // end namespace llvm
+} // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index c12550feabbb..e987d7f7f43a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -21,5 +21,10 @@ HANDLE_NODETYPE(ARGUMENT)
 HANDLE_NODETYPE(Wrapper)
 HANDLE_NODETYPE(BR_IF)
 HANDLE_NODETYPE(BR_TABLE)
+HANDLE_NODETYPE(SHUFFLE)
+HANDLE_NODETYPE(VEC_SHL)
+HANDLE_NODETYPE(VEC_SHR_S)
+HANDLE_NODETYPE(VEC_SHR_U)
+HANDLE_NODETYPE(THROW)
 
 // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index fdf3a30a5c0e..0a7464cedc90 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -48,6 +48,10 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
+    LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n"
+                         "********** Function: "
+                      << MF.getName() << '\n');
+
     ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) ||
                   MF.getFunction().hasFnAttribute(Attribute::MinSize);
     Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 283e703e1f6c..003848e34227 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -21,8 +21,10 @@
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Function.h"
@@ -42,6 +44,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
 
   // Booleans always contain 0 or 1.
   setBooleanContents(ZeroOrOneBooleanContent);
+  // Except in SIMD vectors
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   // WebAssembly does not produce floating-point exceptions on normal floating
   // point operations.
   setHasFloatingPointExceptions(false);
@@ -60,6 +64,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
+    if (Subtarget->hasUnimplementedSIMD128()) {
+      addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
+      addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
+    }
   }
   // Compute derived properties from the register classes.
   computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -77,7 +85,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
   setOperationAction(ISD::VAEND, MVT::Other, Expand);
 
-  for (auto T : {MVT::f32, MVT::f64}) {
+  for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
     // Don't expand the floating-point types to constant pools.
     setOperationAction(ISD::ConstantFP, T, Legal);
     // Expand floating-point comparisons.
@@ -85,17 +93,17 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
       setCondCodeAction(CC, T, Expand);
     // Expand floating-point library function operators.
-    for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
-                    ISD::FMA})
+    for (auto Op :
+         {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
       setOperationAction(Op, T, Expand);
     // Note supported floating-point library function operators that otherwise
     // default to expand.
     for (auto Op :
          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
       setOperationAction(Op, T, Legal);
-    // Support minnan and maxnan, which otherwise default to expand.
-    setOperationAction(ISD::FMINNAN, T, Legal);
-    setOperationAction(ISD::FMAXNAN, T, Legal);
+    // Support minimum and maximum, which otherwise default to expand.
+    setOperationAction(ISD::FMINIMUM, T, Legal);
+    setOperationAction(ISD::FMAXIMUM, T, Legal);
     // WebAssembly currently has no builtin f16 support.
     setOperationAction(ISD::FP16_TO_FP, T, Expand);
     setOperationAction(ISD::FP_TO_FP16, T, Expand);
@@ -103,24 +111,75 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     setTruncStoreAction(T, MVT::f16, Expand);
   }
 
-  for (auto T : {MVT::i32, MVT::i64}) {
-    // Expand unavailable integer operations.
-    for (auto Op :
-         {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
-          ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
-          ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
-          ISD::SUBE}) {
+  // Support saturating add for i8x16 and i16x8
+  if (Subtarget->hasSIMD128())
+    for (auto T : {MVT::v16i8, MVT::v8i16})
+      for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
+        setOperationAction(Op, T, Legal);
+
+  // Expand unavailable integer operations.
+  for (auto Op :
+       {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
+        ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
+        ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
+    for (auto T : {MVT::i32, MVT::i64}) {
       setOperationAction(Op, T, Expand);
     }
+    if (Subtarget->hasSIMD128()) {
+      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+        setOperationAction(Op, T, Expand);
+      }
+      if (Subtarget->hasUnimplementedSIMD128()) {
+        setOperationAction(Op, MVT::v2i64, Expand);
+      }
+    }
+  }
+
+  // There is no i64x2.mul instruction
+  setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+
+  // We have custom shuffle lowering to expose the shuffle mask
+  if (Subtarget->hasSIMD128()) {
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+      setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
+    }
+    if (Subtarget->hasUnimplementedSIMD128()) {
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+    }
+  }
+
+  // Custom lowering since wasm shifts must have a scalar shift amount
+  if (Subtarget->hasSIMD128()) {
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+      for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+        setOperationAction(Op, T, Custom);
+    if (Subtarget->hasUnimplementedSIMD128())
+      for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+        setOperationAction(Op, MVT::v2i64, Custom);
   }
 
+  // There are no select instructions for vectors
+  if (Subtarget->hasSIMD128())
+    for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
+      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+        setOperationAction(Op, T, Expand);
+      if (Subtarget->hasUnimplementedSIMD128())
+        for (auto T : {MVT::v2i64, MVT::v2f64})
+          setOperationAction(Op, T, Expand);
+    }
+
   // As a special case, these operators use the type to mean the type to
   // sign-extend from.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
   if (!Subtarget->hasSignExt()) {
+    // Sign extends are legal only when extending a vector extract
+    auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
-      setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
   }
+  for (auto T : MVT::integer_vector_valuetypes())
+    setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
 
   // Dynamic stack allocation: use the default expansion.
   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -142,21 +201,72 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   //  - Floating-point extending loads.
   //  - Floating-point truncating stores.
   //  - i1 extending loads.
+  //  - extending/truncating SIMD loads/stores
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
   for (auto T : MVT::integer_valuetypes())
     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
       setLoadExtAction(Ext, T, MVT::i1, Promote);
+  if (Subtarget->hasSIMD128()) {
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
+                   MVT::v2f64}) {
+      for (auto MemT : MVT::vector_valuetypes()) {
+        if (MVT(T) != MemT) {
+          setTruncStoreAction(T, MemT, Expand);
+          for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
+            setLoadExtAction(Ext, T, MemT, Expand);
+        }
+      }
+    }
+  }
+
+  // Expand additional SIMD ops that V8 hasn't implemented yet
+  if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) {
+    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+    setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+  }
+
+  // Custom lower lane accesses to expand out variable indices
+  if (Subtarget->hasSIMD128()) {
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+    }
+    if (Subtarget->hasUnimplementedSIMD128()) {
+      for (auto T : {MVT::v2i64, MVT::v2f64}) {
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+        setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+      }
+    }
+  }
 
   // Trap lowers to wasm unreachable
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
   // Exception handling intrinsics
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 
   setMaxAtomicSizeInBitsSupported(64);
 }
 
+TargetLowering::AtomicExpansionKind
+WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  // We have wasm instructions for these
+  switch (AI->getOperation()) {
+  case AtomicRMWInst::Add:
+  case AtomicRMWInst::Sub:
+  case AtomicRMWInst::And:
+  case AtomicRMWInst::Or:
+  case AtomicRMWInst::Xor:
+  case AtomicRMWInst::Xchg:
+    return AtomicExpansionKind::None;
+  default:
+    break;
+  }
+  return AtomicExpansionKind::CmpXChg;
+}
+
 FastISel *WebAssemblyTargetLowering::createFastISel(
     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
   return WebAssembly::createFastISel(FuncInfo, LibInfo);
@@ -171,7 +281,8 @@ bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
                                                       EVT VT) const {
   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
-  if (BitWidth > 1 && BitWidth < 8) BitWidth = 8;
+  if (BitWidth > 1 && BitWidth < 8)
+    BitWidth = 8;
 
   if (BitWidth > 64) {
     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
@@ -190,17 +301,11 @@ MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
 // undefined result on invalid/overflow, to the WebAssembly opcode, which
 // traps on invalid/overflow.
-static MachineBasicBlock *
-LowerFPToInt(
-    MachineInstr &MI,
-    DebugLoc DL,
-    MachineBasicBlock *BB,
-    const TargetInstrInfo &TII,
-    bool IsUnsigned,
-    bool Int64,
-    bool Float64,
-    unsigned LoweredOpcode
-) {
+static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
+                                       MachineBasicBlock *BB,
+                                       const TargetInstrInfo &TII,
+                                       bool IsUnsigned, bool Int64,
+                                       bool Float64, unsigned LoweredOpcode) {
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
 
   unsigned OutReg = MI.getOperand(0).getReg();
@@ -232,8 +337,7 @@ LowerFPToInt(
 
   // Transfer the remainder of BB and its successor edges to DoneMBB.
   DoneMBB->splice(DoneMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   BB->addSuccessor(TrueMBB);
@@ -255,45 +359,33 @@ LowerFPToInt(
   if (IsUnsigned) {
     Tmp0 = InReg;
   } else {
-    BuildMI(BB, DL, TII.get(Abs), Tmp0)
-        .addReg(InReg);
+    BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
   }
   BuildMI(BB, DL, TII.get(FConst), Tmp1)
       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
-  BuildMI(BB, DL, TII.get(LT), CmpReg)
-      .addReg(Tmp0)
-      .addReg(Tmp1);
+  BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
 
   // For unsigned numbers, we have to do a separate comparison with zero.
   if (IsUnsigned) {
     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
-    unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    unsigned SecondCmpReg =
+        MRI.createVirtualRegister(&WebAssembly::I32RegClass);
     unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
     BuildMI(BB, DL, TII.get(FConst), Tmp1)
         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
-    BuildMI(BB, DL, TII.get(GE), SecondCmpReg)
-        .addReg(Tmp0)
-        .addReg(Tmp1);
-    BuildMI(BB, DL, TII.get(And), AndReg)
-        .addReg(CmpReg)
-        .addReg(SecondCmpReg);
+    BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
+    BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
     CmpReg = AndReg;
   }
 
-  BuildMI(BB, DL, TII.get(Eqz), EqzReg)
-      .addReg(CmpReg);
+  BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
 
   // Create the CFG diamond to select between doing the conversion or using
   // the substitute value.
-  BuildMI(BB, DL, TII.get(WebAssembly::BR_IF))
-      .addMBB(TrueMBB)
-      .addReg(EqzReg);
-  BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg)
-      .addReg(InReg);
-  BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR))
-      .addMBB(DoneMBB);
-  BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg)
-      .addImm(Substitute);
+  BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
+  BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
+  BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
+  BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
       .addReg(FalseReg)
       .addMBB(FalseMBB)
@@ -303,16 +395,14 @@ LowerFPToInt(
   return DoneMBB;
 }
 
-MachineBasicBlock *
-WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
-    MachineInstr &MI,
-    MachineBasicBlock *BB
-) const {
+MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr &MI, MachineBasicBlock *BB) const {
   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc DL = MI.getDebugLoc();
 
   switch (MI.getOpcode()) {
-  default: llvm_unreachable("Unexpected instr type to insert");
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
   case WebAssembly::FP_TO_SINT_I32_F32:
     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
                         WebAssembly::I32_TRUNC_S_F32);
@@ -337,17 +427,17 @@ WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
   case WebAssembly::FP_TO_UINT_I64_F64:
     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
                         WebAssembly::I64_TRUNC_U_F64);
-  llvm_unreachable("Unexpected instruction to emit with custom inserter");
+    llvm_unreachable("Unexpected instruction to emit with custom inserter");
   }
 }
 
-const char *WebAssemblyTargetLowering::getTargetNodeName(
-    unsigned Opcode) const {
+const char *
+WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
-    case WebAssemblyISD::FIRST_NUMBER:
-      break;
-#define HANDLE_NODETYPE(NODE) \
-  case WebAssemblyISD::NODE:  \
+  case WebAssemblyISD::FIRST_NUMBER:
+    break;
+#define HANDLE_NODETYPE(NODE)                                                  \
+  case WebAssemblyISD::NODE:                                                   \
     return "WebAssemblyISD::" #NODE;
 #include "WebAssemblyISD.def"
 #undef HANDLE_NODETYPE
@@ -362,21 +452,21 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
   // WebAssembly register class.
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
-      case 'r':
-        assert(VT != MVT::iPTR && "Pointer MVT not expected here");
-        if (Subtarget->hasSIMD128() && VT.isVector()) {
-          if (VT.getSizeInBits() == 128)
-            return std::make_pair(0U, &WebAssembly::V128RegClass);
-        }
-        if (VT.isInteger() && !VT.isVector()) {
-          if (VT.getSizeInBits() <= 32)
-            return std::make_pair(0U, &WebAssembly::I32RegClass);
-          if (VT.getSizeInBits() <= 64)
-            return std::make_pair(0U, &WebAssembly::I64RegClass);
-        }
-        break;
-      default:
-        break;
+    case 'r':
+      assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+      if (Subtarget->hasSIMD128() && VT.isVector()) {
+        if (VT.getSizeInBits() == 128)
+          return std::make_pair(0U, &WebAssembly::V128RegClass);
+      }
+      if (VT.isInteger() && !VT.isVector()) {
+        if (VT.getSizeInBits() <= 32)
+          return std::make_pair(0U, &WebAssembly::I32RegClass);
+        if (VT.getSizeInBits() <= 64)
+          return std::make_pair(0U, &WebAssembly::I64RegClass);
+      }
+      break;
+    default:
+      break;
     }
   }
 
@@ -395,16 +485,17 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
 
 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                       const AddrMode &AM,
-                                                      Type *Ty,
-                                                      unsigned AS,
+                                                      Type *Ty, unsigned AS,
                                                       Instruction *I) const {
   // WebAssembly offsets are added as unsigned without wrapping. The
   // isLegalAddressingMode gives us no way to determine if wrapping could be
   // happening, so we approximate this by accepting only non-negative offsets.
-  if (AM.BaseOffs < 0) return false;
+  if (AM.BaseOffs < 0)
+    return false;
 
   // WebAssembly has no scale register operands.
-  if (AM.Scale != 0) return false;
+  if (AM.Scale != 0)
+    return false;
 
   // Everything else is legal.
   return true;
@@ -418,7 +509,8 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
   // for the kinds of things that LLVM uses this for (merging adjacent stores
   // of constants, etc.), WebAssembly implementations will either want the
   // unaligned access or they'll split anyway.
-  if (Fast) *Fast = true;
+  if (Fast)
+    *Fast = true;
   return true;
 }
 
@@ -438,6 +530,46 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
   return TargetLowering::getSetCCResultType(DL, C, VT);
 }
 
+bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                                   const CallInst &I,
+                                                   MachineFunction &MF,
+                                                   unsigned Intrinsic) const {
+  switch (Intrinsic) {
+  case Intrinsic::wasm_atomic_notify:
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = 4;
+    // atomic.notify instruction does not really load the memory specified with
+    // this argument, but MachineMemOperand should either be load or store, so
+    // we set this to a load.
+    // FIXME Volatile isn't really correct, but currently all LLVM atomic
+    // instructions are treated as volatiles in the backend, so we should be
+    // consistent. The same applies for wasm_atomic_wait intrinsics too.
+    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+    return true;
+  case Intrinsic::wasm_atomic_wait_i32:
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = 4;
+    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+    return true;
+  case Intrinsic::wasm_atomic_wait_i64:
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::i64;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.align = 8;
+    Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
+    return true;
+  default:
+    return false;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // WebAssembly Lowering private implementation.
 //===----------------------------------------------------------------------===//
@@ -465,8 +597,9 @@ static bool CallingConvSupported(CallingConv::ID CallConv) {
          CallConv == CallingConv::CXX_FAST_TLS;
 }
 
-SDValue WebAssemblyTargetLowering::LowerCall(
-    CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const {
+SDValue
+WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                     SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG = CLI.DAG;
   SDLoc DL = CLI.DL;
   SDValue Chain = CLI.Chain;
@@ -568,9 +701,9 @@ SDValue WebAssemblyTargetLowering::LowerCall(
       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
                                 DAG.getConstant(Offset, DL, PtrVT));
-      Chains.push_back(DAG.getStore(
-          Chain, DL, Arg, Add,
-          MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
+      Chains.push_back(
+          DAG.getStore(Chain, DL, Arg, Add,
+                       MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
     }
     if (!Chains.empty())
       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
@@ -588,7 +721,8 @@ SDValue WebAssemblyTargetLowering::LowerCall(
   Ops.append(OutVals.begin(),
              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
   // Add a pointer to the vararg buffer.
-  if (IsVarArg) Ops.push_back(FINode);
+  if (IsVarArg)
+    Ops.push_back(FINode);
 
   SmallVector<EVT, 8> InTys;
   for (const auto &In : Ins) {
@@ -682,11 +816,10 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
     // Ignore In.getOrigAlign() because all our arguments are passed in
     // registers.
-    InVals.push_back(
-        In.Used
-            ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
-                          DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
-            : DAG.getUNDEF(In.VT));
+    InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
+                                           DAG.getTargetConstant(InVals.size(),
+                                                                 DL, MVT::i32))
+                             : DAG.getUNDEF(In.VT));
 
     // Record the number and types of arguments.
     MFI->addParam(In.VT);
@@ -706,12 +839,18 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
     MFI->addParam(PtrVT);
   }
 
-  // Record the number and types of results.
+  // Record the number and types of arguments and results.
   SmallVector<MVT, 4> Params;
   SmallVector<MVT, 4> Results;
-  ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results);
+  ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
+                      DAG.getTarget(), Params, Results);
   for (MVT VT : Results)
     MFI->addResult(VT);
+  // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
+  // the param logic here with ComputeSignatureVTs
+  assert(MFI->getParams().size() == Params.size() &&
+         std::equal(MFI->getParams().begin(), MFI->getParams().end(),
+                    Params.begin()));
 
   return Chain;
 }
@@ -724,34 +863,47 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc DL(Op);
   switch (Op.getOpcode()) {
-    default:
-      llvm_unreachable("unimplemented operation lowering");
-      return SDValue();
-    case ISD::FrameIndex:
-      return LowerFrameIndex(Op, DAG);
-    case ISD::GlobalAddress:
-      return LowerGlobalAddress(Op, DAG);
-    case ISD::ExternalSymbol:
-      return LowerExternalSymbol(Op, DAG);
-    case ISD::JumpTable:
-      return LowerJumpTable(Op, DAG);
-    case ISD::BR_JT:
-      return LowerBR_JT(Op, DAG);
-    case ISD::VASTART:
-      return LowerVASTART(Op, DAG);
-    case ISD::BlockAddress:
-    case ISD::BRIND:
-      fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
-      return SDValue();
-    case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
-      fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
-      return SDValue();
-    case ISD::FRAMEADDR:
-      return LowerFRAMEADDR(Op, DAG);
-    case ISD::CopyToReg:
-      return LowerCopyToReg(Op, DAG);
-    case ISD::INTRINSIC_WO_CHAIN:
-      return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  default:
+    llvm_unreachable("unimplemented operation lowering");
+    return SDValue();
+  case ISD::FrameIndex:
+    return LowerFrameIndex(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::ExternalSymbol:
+    return LowerExternalSymbol(Op, DAG);
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
+  case ISD::BR_JT:
+    return LowerBR_JT(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::BlockAddress:
+  case ISD::BRIND:
+    fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
+    return SDValue();
+  case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
+    fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
+    return SDValue();
+  case ISD::FRAMEADDR:
+    return LowerFRAMEADDR(Op, DAG);
+  case ISD::CopyToReg:
+    return LowerCopyToReg(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+  case ISD::INSERT_VECTOR_ELT:
+    return LowerAccessVectorElement(Op, DAG);
+  case ISD::INTRINSIC_VOID:
+    return LowerINTRINSIC_VOID(Op, DAG);
+  case ISD::SIGN_EXTEND_INREG:
+    return LowerSIGN_EXTEND_INREG(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    return LowerShift(Op, DAG);
   }
 }
 
@@ -763,21 +915,20 @@ SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
     // the FI to some LEA-like instruction, but since we don't have that, we
     // need to insert some kind of instruction that can take an FI operand and
     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
-    // copy_local between Op and its FI operand.
+    // local.copy between Op and its FI operand.
     SDValue Chain = Op.getOperand(0);
     SDLoc DL(Op);
     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
     EVT VT = Src.getValueType();
-    SDValue Copy(
-        DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
-                                          : WebAssembly::COPY_I64,
-                           DL, VT, Src),
-        0);
+    SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
+                                                   : WebAssembly::COPY_I64,
+                                    DL, VT, Src),
+                 0);
     return Op.getNode()->getNumValues() == 1
                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
-               : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4
-                                                            ? Op.getOperand(3)
-                                                            : SDValue());
+               : DAG.getCopyToReg(Chain, DL, Reg, Copy,
+                                  Op.getNumOperands() == 4 ? Op.getOperand(3)
+                                                           : SDValue());
   }
   return SDValue();
 }
@@ -817,8 +968,9 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
       DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
 }
 
-SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
-    SDValue Op, SelectionDAG &DAG) const {
+SDValue
+WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
+                                               SelectionDAG &DAG) const {
   SDLoc DL(Op);
   const auto *ES = cast<ExternalSymbolSDNode>(Op);
   EVT VT = Op.getValueType();
@@ -829,9 +981,10 @@ SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
   // we don't know anything about the symbol other than its name, because all
   // external symbols used in target-independent SelectionDAG code are for
   // functions.
-  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
-                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
-                                                 /*TargetFlags=*/0x1));
+  return DAG.getNode(
+      WebAssemblyISD::Wrapper, DL, VT,
+      DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
+                                  WebAssemblyII::MO_SYMBOL_FUNCTION));
 }
 
 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
@@ -860,7 +1013,8 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
 
   // Add an operand for each case.
-  for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB));
+  for (auto MBB : MBBs)
+    Ops.push_back(DAG.getBasicBlock(MBB));
 
   // TODO: For now, we just pick something arbitrary for a default case for now.
   // We really want to sniff out the guard and put in the real default case (and
@@ -893,10 +1047,181 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   default:
     return {}; // Don't custom lower most intrinsics.
 
-  case Intrinsic::wasm_lsda:
-    // TODO For now, just return 0 not to crash
-    return DAG.getConstant(0, DL, Op.getValueType());
+  case Intrinsic::wasm_lsda: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    EVT VT = Op.getValueType();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+    auto &Context = MF.getMMI().getContext();
+    MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
+                                            Twine(MF.getFunctionNumber()));
+    return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+                       DAG.getMCSymbol(S, PtrVT));
+  }
+  }
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  SDLoc DL(Op);
+
+  switch (IntNo) {
+  default:
+    return {}; // Don't custom lower most intrinsics.
+
+  case Intrinsic::wasm_throw: {
+    int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
+    switch (Tag) {
+    case CPP_EXCEPTION: {
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+      const char *SymName = MF.createExternalSymbolName("__cpp_exception");
+      SDValue SymNode =
+          DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+                      DAG.getTargetExternalSymbol(
+                          SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT));
+      return DAG.getNode(WebAssemblyISD::THROW, DL,
+                         MVT::Other, // outchain type
+                         {
+                             Op.getOperand(0), // inchain
+                             SymNode,          // exception symbol
+                             Op.getOperand(3)  // thrown value
+                         });
+    }
+    default:
+      llvm_unreachable("Invalid tag!");
+    }
+    break;
+  }
+  }
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  // If sign extension operations are disabled, allow sext_inreg only if operand
+  // is a vector extract. SIMD does not depend on sign extension operations, but
+  // allowing sext_inreg in this context lets us have simple patterns to select
+  // extract_lane_s instructions. Expanding sext_inreg everywhere would be
+  // simpler in this file, but would necessitate large and brittle patterns to
+  // undo the expansion and select extract_lane_s instructions.
+  assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
+  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
+    return Op;
+  // Otherwise expand
+  return SDValue();
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
+  MVT VecType = Op.getOperand(0).getSimpleValueType();
+  assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
+  size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
+
+  // Space for two vector args and sixteen mask indices
+  SDValue Ops[18];
+  size_t OpIdx = 0;
+  Ops[OpIdx++] = Op.getOperand(0);
+  Ops[OpIdx++] = Op.getOperand(1);
+
+  // Expand mask indices to byte indices and materialize them as operands
+  for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
+    for (size_t J = 0; J < LaneBytes; ++J) {
+      // Lower undefs (represented by -1 in mask) to zero
+      uint64_t ByteIndex =
+          Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
+      Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
+    }
+  }
+
+  return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  // Allow constant lane indices, expand variable lane indices
+  SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
+  if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
+    return Op;
+  else
+    // Perform default expansion
+    return SDValue();
+}
+
+static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
+  EVT LaneT = Op.getSimpleValueType().getVectorElementType();
+  // 32-bit and 64-bit unrolled shifts will have proper semantics
+  if (LaneT.bitsGE(MVT::i32))
+    return DAG.UnrollVectorOp(Op.getNode());
+  // Otherwise mask the shift value to get proper semantics from 32-bit shift
+  SDLoc DL(Op);
+  SDValue ShiftVal = Op.getOperand(1);
+  uint64_t MaskVal = LaneT.getSizeInBits() - 1;
+  SDValue MaskedShiftVal = DAG.getNode(
+      ISD::AND,                    // mask opcode
+      DL, ShiftVal.getValueType(), // masked value type
+      ShiftVal,                    // original shift value operand
+      DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
+  );
+
+  return DAG.UnrollVectorOp(
+      DAG.getNode(Op.getOpcode(),        // original shift opcode
+                  DL, Op.getValueType(), // original return type
+                  Op.getOperand(0),      // original vector operand,
+                  MaskedShiftVal         // new masked shift value operand
+                  )
+          .getNode());
+}
+
+SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+
+  // Only manually lower vector shifts
+  assert(Op.getSimpleValueType().isVector());
+
+  // Expand all vector shifts until V8 fixes its implementation
+  // TODO: remove this once V8 is fixed
+  if (!Subtarget->hasUnimplementedSIMD128())
+    return UnrollVectorShift(Op, DAG);
+
+  // Unroll non-splat vector shifts
+  BuildVectorSDNode *ShiftVec;
+  SDValue SplatVal;
+  if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
+      !(SplatVal = ShiftVec->getSplatValue()))
+    return UnrollVectorShift(Op, DAG);
+
+  // All splats except i64x2 const splats are handled by patterns
+  ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
+  if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
+    return Op;
+
+  // i64x2 const splats are custom lowered to avoid unnecessary wraps
+  unsigned Opcode;
+  switch (Op.getOpcode()) {
+  case ISD::SHL:
+    Opcode = WebAssemblyISD::VEC_SHL;
+    break;
+  case ISD::SRA:
+    Opcode = WebAssemblyISD::VEC_SHR_S;
+    break;
+  case ISD::SRL:
+    Opcode = WebAssemblyISD::VEC_SHR_U;
+    break;
+  default:
+    llvm_unreachable("unexpected opcode");
   }
+  APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
+  return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
+                     DAG.getConstant(Shift, DL, MVT::i32));
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 79819493ac6a..59f4230ed889 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -29,21 +29,22 @@ enum NodeType : unsigned {
 #undef HANDLE_NODETYPE
 };
 
-}  // end namespace WebAssemblyISD
+} // end namespace WebAssemblyISD
 
 class WebAssemblySubtarget;
 class WebAssemblyTargetMachine;
 
 class WebAssemblyTargetLowering final : public TargetLowering {
- public:
+public:
   WebAssemblyTargetLowering(const TargetMachine &TM,
                             const WebAssemblySubtarget &STI);
 
- private:
+private:
   /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
   /// right decision when generating code for different targets.
   const WebAssemblySubtarget *Subtarget;
 
+  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
   FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
                            const TargetLibraryInfo *LibInfo) const override;
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
@@ -52,9 +53,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
   const char *getTargetNodeName(unsigned Opcode) const override;
-  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
-      const TargetRegisterInfo *TRI, StringRef Constraint,
-      MVT VT) const override;
+  std::pair<unsigned, const TargetRegisterClass *>
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               StringRef Constraint, MVT VT) const override;
   bool isCheapToSpeculateCttz() const override;
   bool isCheapToSpeculateCtlz() const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -66,6 +67,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
 
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                          EVT VT) const override;
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+                          MachineFunction &MF,
+                          unsigned Intrinsic) const override;
 
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
@@ -94,13 +98,18 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
 };
 
 namespace WebAssembly {
 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                          const TargetLibraryInfo *libInfo);
-}  // end namespace WebAssembly
+} // end namespace WebAssembly
 
-}  // end namespace llvm
+} // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index d879932b3232..5fb8ef90bc43 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -16,10 +16,16 @@
 // Atomic loads
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
+multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                    list<dag> pattern_r, string asmstr_r = "",
+                    string asmstr_s = "", bits<32> inst = -1> {
+  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+              inst>,
+            Requires<[HasAtomics]>;
+}
+
 defm ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
 defm ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
-} // Defs = [ARGUMENTS]
 
 // Select loads with no constant offset.
 let Predicates = [HasAtomics] in {
@@ -54,13 +60,11 @@ def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
 
 // Extending loads. Note that there are only zero-extending atomic loads, no
 // sign-extending loads.
-let Defs = [ARGUMENTS] in {
 defm ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
 defm ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
 defm ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
 defm ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
 defm ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
-} // Defs = [ARGUMENTS]
 
 // Fragments for extending loads. These are different from regular loads because
 // the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
@@ -110,7 +114,7 @@ def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
 def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
 def : LoadPatNoOffset<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
 def : LoadPatNoOffset<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i32
+// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
 
 // Zero-extending loads with constant offset
 def : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, ATOMIC_LOAD8_U_I32>;
@@ -192,10 +196,8 @@ def : LoadPatExternSymOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
 // Atomic stores
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
 defm ATOMIC_STORE_I32 : WebAssemblyStore<I32, "i32.atomic.store", 0xfe17>;
 defm ATOMIC_STORE_I64 : WebAssemblyStore<I64, "i64.atomic.store", 0xfe18>;
-} // Defs = [ARGUMENTS]
 
 // We need an 'atomic' version of store patterns because store and atomic_store
 // nodes have different operand orders:
@@ -255,13 +257,11 @@ def : AStorePatExternSymOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
 } // Predicates = [HasAtomics]
 
 // Truncating stores.
-let Defs = [ARGUMENTS] in {
 defm ATOMIC_STORE8_I32 : WebAssemblyStore<I32, "i32.atomic.store8", 0xfe19>;
 defm ATOMIC_STORE16_I32 : WebAssemblyStore<I32, "i32.atomic.store16", 0xfe1a>;
 defm ATOMIC_STORE8_I64 : WebAssemblyStore<I64, "i64.atomic.store8", 0xfe1b>;
 defm ATOMIC_STORE16_I64 : WebAssemblyStore<I64, "i64.atomic.store16", 0xfe1c>;
 defm ATOMIC_STORE32_I64 : WebAssemblyStore<I64, "i64.atomic.store32", 0xfe1d>;
-} // Defs = [ARGUMENTS]
 
 // Fragments for truncating stores.
 
@@ -333,8 +333,6 @@ def : AStorePatExternSymOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
 // Atomic binary read-modify-writes
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
-
 multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
   defm "" : I<(outs rc:$dst),
               (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
@@ -346,83 +344,82 @@ multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
 defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0xfe1e>;
 defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0xfe1f>;
 defm ATOMIC_RMW8_U_ADD_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.add", 0xfe20>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0xfe20>;
 defm ATOMIC_RMW16_U_ADD_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.add", 0xfe21>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0xfe21>;
 defm ATOMIC_RMW8_U_ADD_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.add", 0xfe22>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0xfe22>;
 defm ATOMIC_RMW16_U_ADD_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.add", 0xfe23>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0xfe23>;
 defm ATOMIC_RMW32_U_ADD_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.add", 0xfe24>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0xfe24>;
 
 defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0xfe25>;
 defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0xfe26>;
 defm ATOMIC_RMW8_U_SUB_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.sub", 0xfe27>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0xfe27>;
 defm ATOMIC_RMW16_U_SUB_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.sub", 0xfe28>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0xfe28>;
 defm ATOMIC_RMW8_U_SUB_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.sub", 0xfe29>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0xfe29>;
 defm ATOMIC_RMW16_U_SUB_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.sub", 0xfe2a>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0xfe2a>;
 defm ATOMIC_RMW32_U_SUB_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.sub", 0xfe2b>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0xfe2b>;
 
 defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0xfe2c>;
 defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0xfe2d>;
 defm ATOMIC_RMW8_U_AND_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.and", 0xfe2e>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0xfe2e>;
 defm ATOMIC_RMW16_U_AND_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.and", 0xfe2f>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0xfe2f>;
 defm ATOMIC_RMW8_U_AND_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.and", 0xfe30>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0xfe30>;
 defm ATOMIC_RMW16_U_AND_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.and", 0xfe31>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0xfe31>;
 defm ATOMIC_RMW32_U_AND_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.and", 0xfe32>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0xfe32>;
 
 defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0xfe33>;
 defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0xfe34>;
 defm ATOMIC_RMW8_U_OR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.or", 0xfe35>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0xfe35>;
 defm ATOMIC_RMW16_U_OR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.or", 0xfe36>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0xfe36>;
 defm ATOMIC_RMW8_U_OR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.or", 0xfe37>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0xfe37>;
 defm ATOMIC_RMW16_U_OR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.or", 0xfe38>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0xfe38>;
 defm ATOMIC_RMW32_U_OR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.or", 0xfe39>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0xfe39>;
 
 defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0xfe3a>;
 defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0xfe3b>;
 defm ATOMIC_RMW8_U_XOR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xor", 0xfe3c>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0xfe3c>;
 defm ATOMIC_RMW16_U_XOR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xor", 0xfe3d>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0xfe3d>;
 defm ATOMIC_RMW8_U_XOR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xor", 0xfe3e>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0xfe3e>;
 defm ATOMIC_RMW16_U_XOR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xor", 0xfe3f>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0xfe3f>;
 defm ATOMIC_RMW32_U_XOR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xor", 0xfe40>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0xfe40>;
 
 defm ATOMIC_RMW_XCHG_I32 :
   WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0xfe41>;
 defm ATOMIC_RMW_XCHG_I64 :
   WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0xfe42>;
 defm ATOMIC_RMW8_U_XCHG_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xchg", 0xfe43>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0xfe43>;
 defm ATOMIC_RMW16_U_XCHG_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xchg", 0xfe44>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0xfe44>;
 defm ATOMIC_RMW8_U_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xchg", 0xfe45>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0xfe45>;
 defm ATOMIC_RMW16_U_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xchg", 0xfe46>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0xfe46>;
 defm ATOMIC_RMW32_U_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xchg", 0xfe47>;
-}
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0xfe47>;
 
 // Select binary RMWs with no constant offset.
 class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -533,7 +530,7 @@ class sext_bin_rmw_8_64<PatFrag kind> :
   PatFrag<(ops node:$addr, node:$val),
           (anyext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
 class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>;
-// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_s/i32
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s
 
 // Patterns for various addressing modes for truncating-extending binary RMWs.
 multiclass BinRMWTruncExtPattern<
@@ -655,3 +652,368 @@ defm : BinRMWTruncExtPattern<
   ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32,
   ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>;
 } // Predicates = [HasAtomics]
+
+//===----------------------------------------------------------------------===//
+// Atomic ternary read-modify-writes
+//===----------------------------------------------------------------------===//
+
+// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, success
+// flag}. When we use the success flag or both values, we can't make use of i64
+// truncate/extend versions of instructions for now, which is suboptimal.
+// Consider adding a pass after instruction selection that optimizes this case
+// if it is frequent.
+
+multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
+  defm "" : I<(outs rc:$dst),
+              (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
+                   rc:$new),
+              (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+              !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
+              !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+}
+
+defm ATOMIC_RMW_CMPXCHG_I32 :
+  WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0xfe48>;
+defm ATOMIC_RMW_CMPXCHG_I64 :
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0xfe49>;
+defm ATOMIC_RMW8_U_CMPXCHG_I32 :
+  WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0xfe4a>;
+defm ATOMIC_RMW16_U_CMPXCHG_I32 :
+  WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0xfe4b>;
+defm ATOMIC_RMW8_U_CMPXCHG_I64 :
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0xfe4c>;
+defm ATOMIC_RMW16_U_CMPXCHG_I64 :
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0xfe4d>;
+defm ATOMIC_RMW32_U_CMPXCHG_I64 :
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0xfe4e>;
+
+// Select ternary RMWs with no constant offset.
+class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)),
+      (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>;
+
+// Select ternary RMWs with a constant offset.
+
+// Pattern with address + immediate offset
+class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
+  Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
+      (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+class TerRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+                ty:$exp, ty:$new)),
+      (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+class TerRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+                ty:$exp, ty:$new)),
+      (inst 0, texternalsym:$off, I32:$addr, ty:$exp, ty:$new)>;
+
+// Select ternary RMWs with just a constant offset.
+class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
+      (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+class TerRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$exp, ty:$new)),
+      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+
+// Patterns for various addressing modes.
+multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
+                         NI inst_64> {
+  def : TerRMWPatNoOffset<i32, rmw_32, inst_32>;
+  def : TerRMWPatNoOffset<i64, rmw_64, inst_64>;
+
+  def : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
+  def : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
+  def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
+  def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
+
+  def : TerRMWPatGlobalAddr<i32, rmw_32, inst_32>;
+  def : TerRMWPatGlobalAddr<i64, rmw_64, inst_64>;
+
+  def : TerRMWPatExternalSym<i32, rmw_32, inst_32>;
+  def : TerRMWPatExternalSym<i64, rmw_64, inst_64>;
+
+  def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
+  def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
+
+  def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
+  def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
+
+  def : TerRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
+  def : TerRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
+                     ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
+} // Predicates = [HasAtomics]
+
+// Truncating & zero-extending ternary RMW patterns.
+// DAG legalization & optimization before instruction selection may introduce
+// additional nodes such as anyext or assertzext depending on operand types.
+class zext_ter_rmw_8_32<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$exp, node:$new),
+          (and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>;
+class zext_ter_rmw_16_32<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$exp, node:$new),
+          (and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>;
+class zext_ter_rmw_8_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$exp, node:$new),
+          (zext (i32 (assertzext (i32 (kind node:$addr,
+                                            (i32 (trunc (i64 node:$exp))),
+                                            (i32 (trunc (i64 node:$new))))))))>;
+class zext_ter_rmw_16_64<PatFrag kind> : zext_ter_rmw_8_64<kind>;
+class zext_ter_rmw_32_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$exp, node:$new),
+          (zext (i32 (kind node:$addr,
+                           (i32 (trunc (i64 node:$exp))),
+                           (i32 (trunc (i64 node:$new))))))>;
+
+// Truncating & sign-extending ternary RMW patterns.
+// We match subword RMWs (for 32-bit) and anyext RMWs (for 64-bit) and select a
+// zext RMW; the next instruction will be sext_inreg which is selected by
+// itself.
+class sext_ter_rmw_8_32<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$exp, node:$new),
+          (kind node:$addr, node:$exp, node:$new)>;
+class sext_ter_rmw_16_32<PatFrag kind> : sext_ter_rmw_8_32<kind>;
+class sext_ter_rmw_8_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$exp, node:$new),
+          (anyext (i32 (assertzext (i32
+            (kind node:$addr,
+                  (i32 (trunc (i64 node:$exp))),
+                  (i32 (trunc (i64 node:$new))))))))>;
+class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>;
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s
+
+// Patterns for various addressing modes for truncating-extending ternary RMWs.
+multiclass TerRMWTruncExtPattern<
+  PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+  NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> {
+  // Truncating-extending ternary RMWs with no constant offset
+  def : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+  def : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+  // Truncating-extending ternary RMWs with a constant offset
+  def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+  def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+  def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+  def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+  def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, inst32_64>;
+  def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+  def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+  def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+  def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+  def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
+
+  def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+  def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+  def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+  def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+  def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+  def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+  def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+  def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+
+  def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatGlobalAddr<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+  def : TerRMWPatGlobalAddr<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatGlobalAddr<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+  def : TerRMWPatExternalSym<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatExternalSym<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatExternalSym<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatExternalSym<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+  def : TerRMWPatExternalSym<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : TerRMWPatExternalSym<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatExternalSym<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatExternalSym<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatExternalSym<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+  // Truncating-extending ternary RMWs with just a constant offset
+  def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+  def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+  def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+  def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+  def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+  def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+  def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+  def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+  def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : TerRMWTruncExtPattern<
+  atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
+  ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
+  ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
+  ATOMIC_RMW32_U_CMPXCHG_I64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic wait / notify
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+defm ATOMIC_NOTIFY :
+  I<(outs I32:$dst),
+    (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
+    (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+    "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+    "atomic.notify \t${off}, ${p2align}", 0xfe00>;
+let mayLoad = 1 in {
+defm ATOMIC_WAIT_I32 :
+  I<(outs I32:$dst),
+    (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout),
+    (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+    "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+    "i32.atomic.wait \t${off}, ${p2align}", 0xfe01>;
+defm ATOMIC_WAIT_I64 :
+  I<(outs I32:$dst),
+    (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout),
+    (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+    "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+    "i64.atomic.wait \t${off}, ${p2align}", 0xfe02>;
+} // mayLoad = 1
+} // hasSideEffects = 1
+
+let Predicates = [HasAtomics] in {
+// Select notifys with no constant offset.
+class NotifyPatNoOffset<Intrinsic kind> :
+  Pat<(i32 (kind I32:$addr, I32:$count)),
+      (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
+def : NotifyPatNoOffset<int_wasm_atomic_notify>;
+
+// Select notifys with a constant offset.
+
+// Pattern with address + immediate offset
+class NotifyPatImmOff<Intrinsic kind, PatFrag operand> :
+  Pat<(i32 (kind (operand I32:$addr, imm:$off), I32:$count)),
+      (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
+def : NotifyPatImmOff<int_wasm_atomic_notify, regPlusImm>;
+def : NotifyPatImmOff<int_wasm_atomic_notify, or_is_add>;
+
+class NotifyPatGlobalAddr<Intrinsic kind> :
+  Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+                 I32:$count)),
+      (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
+def : NotifyPatGlobalAddr<int_wasm_atomic_notify>;
+
+class NotifyPatExternalSym<Intrinsic kind> :
+  Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+                 I32:$count)),
+      (ATOMIC_NOTIFY 0, texternalsym:$off, I32:$addr, I32:$count)>;
+def : NotifyPatExternalSym<int_wasm_atomic_notify>;
+
+// Select notifys with just a constant offset.
+class NotifyPatOffsetOnly<Intrinsic kind> :
+  Pat<(i32 (kind imm:$off, I32:$count)),
+      (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatOffsetOnly<int_wasm_atomic_notify>;
+
+class NotifyPatGlobalAddrOffOnly<Intrinsic kind> :
+  Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), I32:$count)),
+      (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatGlobalAddrOffOnly<int_wasm_atomic_notify>;
+
+class NotifyPatExternSymOffOnly<Intrinsic kind> :
+  Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), I32:$count)),
+      (ATOMIC_NOTIFY 0, texternalsym:$off, (CONST_I32 0), I32:$count)>;
+def : NotifyPatExternSymOffOnly<int_wasm_atomic_notify>;
+
+// Select waits with no constant offset.
+class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
+      (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select waits with a constant offset.
+
+// Pattern with address + immediate offset
+class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
+  Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
+      (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+                 ty:$exp, I64:$timeout)),
+      (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatExternalSym<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+                 ty:$exp, I64:$timeout)),
+      (inst 0, texternalsym:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatExternalSym<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatExternalSym<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
+class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+      (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
+      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatExternSymOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), ty:$exp,
+                 I64:$timeout)),
+      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatExternSymOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatExternSymOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+} // Predicates = [HasAtomics]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 34262752430c..07839b790114 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -15,8 +15,6 @@
 // TODO: addr64: These currently assume the callee address is 32-bit.
 // FIXME: add $type to first call_indirect asmstr (and maybe $flags)
 
-let Defs = [ARGUMENTS] in {
-
 // Call sequence markers. These have an immediate which represents the amount of
 // stack space to allocate or free, which is used for varargs lowering.
 let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in {
@@ -52,34 +50,35 @@ multiclass CALL<WebAssemblyRegClass vt, string prefix> {
 }
 
 multiclass SIMD_CALL<ValueType vt, string prefix> {
-  defm CALL_#vt : SIMD_I<(outs V128:$dst), (ins function32_op:$callee,
-                           variable_ops),
-                         (outs), (ins function32_op:$callee),
-                         [(set (vt V128:$dst),
-                            (WebAssemblycall1 (i32 imm:$callee)))],
-                         !strconcat(prefix, "call\t$dst, $callee"),
-                         !strconcat(prefix, "call\t$callee"),
-                         0x10>;
+
+  defm CALL_#vt : I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
+                    (outs), (ins function32_op:$callee),
+                    [(set (vt V128:$dst),
+                      (WebAssemblycall1 (i32 imm:$callee)))],
+                    !strconcat(prefix, "call\t$dst, $callee"),
+                    !strconcat(prefix, "call\t$callee"),
+                    0x10>,
+                  Requires<[HasSIMD128]>;
 
   let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
-                                     (ins I32:$callee, variable_ops),
-                                     (outs), (ins I32:$callee),
-                                     [(set (vt V128:$dst),
-                                           (WebAssemblycall1 I32:$callee))],
-                                     "PSEUDO CALL INDIRECT\t$callee",
-                                     "PSEUDO CALL INDIRECT\t$callee">;
+    defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
+                                (ins I32:$callee, variable_ops),
+                                (outs), (ins I32:$callee),
+                                [(set (vt V128:$dst),
+                                      (WebAssemblycall1 I32:$callee))],
+                                "PSEUDO CALL INDIRECT\t$callee",
+                                "PSEUDO CALL INDIRECT\t$callee">,
+                              Requires<[HasSIMD128]>;
   } // isCodeGenOnly = 1
 
-  defm CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
-                                  (ins TypeIndex:$type, i32imm:$flags,
-                                        variable_ops),
-                                  (outs), (ins TypeIndex:$type, i32imm:$flags),
-                                  [],
-                                  !strconcat(prefix,
-                                    "call_indirect\t$dst"),
-                                  !strconcat(prefix, "call_indirect\t$type"),
-                                  0x11>;
+  defm CALL_INDIRECT_#vt : I<(outs V128:$dst),
+                             (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+                             (outs), (ins TypeIndex:$type, i32imm:$flags),
+                             [],
+                             !strconcat(prefix, "call_indirect\t$dst"),
+                             !strconcat(prefix, "call_indirect\t$type"),
+                             0x11>,
+                           Requires<[HasSIMD128]>;
 }
 
 let Uses = [SP32, SP64], isCall = 1 in {
@@ -88,10 +87,12 @@ let Uses = [SP32, SP64], isCall = 1 in {
   defm "" : CALL<F32, "f32.">;
   defm "" : CALL<F64, "f64.">;
   defm "" : CALL<EXCEPT_REF, "except_ref.">;
-  defm "" : SIMD_CALL<v16i8, "i8x16.">;
-  defm "" : SIMD_CALL<v8i16, "i16x8.">;
-  defm "" : SIMD_CALL<v4i32, "i32x4.">;
-  defm "" : SIMD_CALL<v4f32, "f32x4.">;
+  defm "" : SIMD_CALL<v16i8, "v128.">;
+  defm "" : SIMD_CALL<v8i16, "v128.">;
+  defm "" : SIMD_CALL<v4i32, "v128.">;
+  defm "" : SIMD_CALL<v2i64, "v128.">;
+  defm "" : SIMD_CALL<v4f32, "v128.">;
+  defm "" : SIMD_CALL<v2f64, "v128.">;
 
   defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
                      (outs), (ins function32_op:$callee),
@@ -115,8 +116,6 @@ let Uses = [SP32, SP64], isCall = 1 in {
                               0x11>;
 } // Uses = [SP32,SP64], isCall = 1
 
-} // Defs = [ARGUMENTS]
-
 // Patterns for matching a direct call to a global address.
 def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_I32 tglobaladdr:$callee)>;
@@ -132,8 +131,12 @@ def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v2i64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(ExceptRef
            (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_EXCEPT_REF tglobaladdr:$callee)>;
@@ -155,8 +158,12 @@ def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v2i64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(ExceptRef
            (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_EXCEPT_REF texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index d90244b90662..7eb6cbf4d249 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -12,8 +12,6 @@
 ///
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
-
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 // The condition operand is a boolean value which WebAssembly represents as i32.
 defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
@@ -30,47 +28,37 @@ defm BR   : NRI<(outs), (ins bb_op:$dst),
 } // isBarrier = 1
 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
 
-} // Defs = [ARGUMENTS]
-
 def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
           (BR_IF bb_op:$dst, I32:$cond)>;
 def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
           (BR_UNLESS bb_op:$dst, I32:$cond)>;
 
-let Defs = [ARGUMENTS] in {
+// A list of branch targets enclosed in {} and separated by comma.
+// Used by br_table only.
+def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; }
+let OperandNamespace = "WebAssembly" in {
+let OperandType = "OPERAND_BRLIST" in {
+def brlist : Operand<i32> {
+  let ParserMatchClass = BrListAsmOperand;
+  let PrintMethod = "printBrList";
+}
+} // OPERAND_BRLIST
+} // OperandNamespace = "WebAssembly"
 
 // TODO: SelectionDAG's lowering insists on using a pointer as the index for
 // jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
 // currently.
-// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates.
-// Set TSFlags{1} to 1 to indicate that the immediates represent labels.
-// FIXME: this can't inherit from I<> since there is no way to inherit from a
-// multiclass and still have the let statements.
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-def BR_TABLE_I32 : NI<(outs), (ins I32:$index, variable_ops),
-                      [(WebAssemblybr_table I32:$index)], 0,
-                      "br_table \t$index", 0x0e> {
-  let TSFlags{0} = 1;
-  let TSFlags{1} = 1;
-}
-def BR_TABLE_I32_S : NI<(outs), (ins I32:$index),
-                        [], 1,
-                        "br_table \t$index", 0x0e> {
-  let TSFlags{0} = 1;
-  let TSFlags{1} = 1;
-}
-def BR_TABLE_I64 : NI<(outs), (ins I64:$index, variable_ops),
-                      [(WebAssemblybr_table I64:$index)], 0,
-                      "br_table \t$index"> {
-  let TSFlags{0} = 1;
-  let TSFlags{1} = 1;
-}
-def BR_TABLE_I64_S : NI<(outs), (ins I64:$index),
-                        [], 1,
-                        "br_table \t$index"> {
-  let TSFlags{0} = 1;
-  let TSFlags{1} = 1;
-}
+defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
+                      (outs), (ins brlist:$brl),
+                      [(WebAssemblybr_table I32:$index)],
+                      "br_table \t$index", "br_table \t$brl",
+                      0x0e>;
+defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
+                      (outs), (ins brlist:$brl),
+                      [(WebAssemblybr_table I64:$index)],
+                      "br_table \t$index", "br_table \t$brl",
+                      0x0e>;
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
 // This is technically a control-flow instruction, since all it affects is the
@@ -81,13 +69,19 @@ defm NOP : NRI<(outs), (ins), [], "nop", 0x01>;
 // These use/clobber VALUE_STACK to prevent them from being moved into the
 // middle of an expression tree.
 let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
-defm BLOCK     : NRI<(outs), (ins Signature:$sig), [], "block   \t$sig", 0x02>;
-defm LOOP      : NRI<(outs), (ins Signature:$sig), [], "loop    \t$sig", 0x03>;
+defm BLOCK : NRI<(outs), (ins Signature:$sig), [], "block   \t$sig", 0x02>;
+defm LOOP  : NRI<(outs), (ins Signature:$sig), [], "loop    \t$sig", 0x03>;
+
+defm IF : I<(outs), (ins Signature:$sig, I32:$cond),
+            (outs), (ins Signature:$sig),
+            [], "if    \t$sig, $cond", "if    \t$sig", 0x04>;
+defm ELSE : NRI<(outs), (ins), [], "else", 0x05>;
 
-// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode in
-// wasm.
+// END_BLOCK, END_LOOP, END_IF and END_FUNCTION are represented with the same
+// opcode in wasm.
 defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>;
 defm END_LOOP  : NRI<(outs), (ins), [], "end_loop", 0x0b>;
+defm END_IF    : NRI<(outs), (ins), [], "end_if", 0x0b>;
 let isTerminator = 1, isBarrier = 1 in
 defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
 } // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -103,14 +97,16 @@ multiclass RETURN<WebAssemblyRegClass vt> {
 }
 
 multiclass SIMD_RETURN<ValueType vt> {
-  defm RETURN_#vt : SIMD_I<(outs), (ins V128:$val), (outs), (ins),
-                           [(WebAssemblyreturn (vt V128:$val))],
-                           "return  \t$val", "return", 0x0f>;
+  defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
+                      [(WebAssemblyreturn (vt V128:$val))],
+                      "return  \t$val", "return", 0x0f>,
+                    Requires<[HasSIMD128]>;
   // Equivalent to RETURN_#vt, for use at the end of a function when wasm
   // semantics return by falling off the end of the block.
   let isCodeGenOnly = 1 in
-  defm FALLTHROUGH_RETURN_#vt : SIMD_I<(outs), (ins V128:$val), (outs), (ins),
-                                       []>;
+  defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
+                                  []>,
+                                Requires<[HasSIMD128]>;
 }
 
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
@@ -124,7 +120,9 @@ let isReturn = 1 in {
   defm "": SIMD_RETURN<v16i8>;
   defm "": SIMD_RETURN<v8i16>;
   defm "": SIMD_RETURN<v4i32>;
+  defm "": SIMD_RETURN<v2i64>;
   defm "": SIMD_RETURN<v4f32>;
+  defm "": SIMD_RETURN<v2f64>;
 
   defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>;
 
@@ -144,14 +142,16 @@ let Predicates = [HasExceptionHandling] in {
 
 // Throwing an exception: throw / rethrow
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-defm THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$val),
-                   (outs), (ins i32imm:$tag),
-                   [(int_wasm_throw imm:$tag, I32:$val)],
+defm THROW_I32 : I<(outs), (ins event_op:$tag, I32:$val),
+                   (outs), (ins event_op:$tag),
+                   [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
+                                      I32:$val)],
                    "throw   \t$tag, $val", "throw   \t$tag",
                    0x08>;
-defm THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$val),
-                   (outs), (ins i32imm:$tag),
-                   [(int_wasm_throw imm:$tag, I64:$val)],
+defm THROW_I64 : I<(outs), (ins event_op:$tag, I64:$val),
+                   (outs), (ins event_op:$tag),
+                   [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
+                                      I64:$val)],
                    "throw   \t$tag, $val", "throw   \t$tag",
                    0x08>;
 defm RETHROW : NRI<(outs), (ins bb_op:$dst), [], "rethrow \t$dst", 0x09>;
@@ -168,7 +168,7 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
 } // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
 
 // Catching an exception: catch / catch_all
-let hasCtrlDep = 1 in {
+let hasCtrlDep = 1, hasSideEffects = 1 in {
 defm CATCH_I32 : I<(outs I32:$dst), (ins i32imm:$tag),
                    (outs), (ins i32imm:$tag),
                    [(set I32:$dst, (int_wasm_catch imm:$tag))],
@@ -181,14 +181,10 @@ defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>;
 }
 
 // Pseudo instructions: cleanupret / catchret
-// They are not return instructions in wasm, but setting 'isReturn' to true as
-// in X86 is necessary for computing EH scope membership.
 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
-    isCodeGenOnly = 1, isReturn = 1 in {
+    isCodeGenOnly = 1, isEHScopeReturn = 1 in {
   defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "", 0>;
   defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from),
                    [(catchret bb:$dst, bb:$from)], "", 0>;
 }
 }
-
-} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index c89c1b549816..e128656a142c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -13,19 +13,17 @@
 ///
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
-
 defm I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
                       [(set I32:$dst, (trunc I64:$src))],
-                      "i32.wrap/i64\t$dst, $src", "i32.wrap/i64", 0xa7>;
+                      "i32.wrap_i64\t$dst, $src", "i32.wrap_i64", 0xa7>;
 
 defm I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins),
                           [(set I64:$dst, (sext I32:$src))],
-                          "i64.extend_s/i32\t$dst, $src", "i64.extend_s/i32",
+                          "i64.extend_i32_s\t$dst, $src", "i64.extend_i32_s",
                           0xac>;
 defm I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), (outs), (ins),
                           [(set I64:$dst, (zext I32:$src))],
-                          "i64.extend_u/i32\t$dst, $src", "i64.extend_u/i32",
+                          "i64.extend_i32_u\t$dst, $src", "i64.extend_i32_u",
                           0xad>;
 
 let Predicates = [HasSignExt] in {
@@ -51,58 +49,72 @@ defm I64_EXTEND32_S_I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
                             0xc4>;
 } // Predicates = [HasSignExt]
 
-} // defs = [ARGUMENTS]
-
 // Expand a "don't care" extend into zero-extend (chosen over sign-extend
 // somewhat arbitrarily, although it favors popular hardware architectures
 // and is conceptually a simpler operation).
 def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
 
-let Defs = [ARGUMENTS] in {
-
 // Conversion from floating point to integer instructions which don't trap on
 // overflow or invalid.
 defm I32_TRUNC_S_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
                              [(set I32:$dst, (fp_to_sint F32:$src))],
-                             "i32.trunc_s:sat/f32\t$dst, $src",
-                             "i32.trunc_s:sat/f32", 0xfc00>,
+                             "i32.trunc_sat_f32_s\t$dst, $src",
+                             "i32.trunc_sat_f32_s", 0xfc00>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I32_TRUNC_U_SAT_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
                              [(set I32:$dst, (fp_to_uint F32:$src))],
-                             "i32.trunc_u:sat/f32\t$dst, $src",
-                             "i32.trunc_u:sat/f32", 0xfc01>,
+                             "i32.trunc_sat_f32_u\t$dst, $src",
+                             "i32.trunc_sat_f32_u", 0xfc01>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I64_TRUNC_S_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
                              [(set I64:$dst, (fp_to_sint F32:$src))],
-                             "i64.trunc_s:sat/f32\t$dst, $src",
-                             "i64.trunc_s:sat/f32", 0xfc04>,
+                             "i64.trunc_sat_f32_s\t$dst, $src",
+                             "i64.trunc_sat_f32_s", 0xfc04>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I64_TRUNC_U_SAT_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
                              [(set I64:$dst, (fp_to_uint F32:$src))],
-                             "i64.trunc_u:sat/f32\t$dst, $src",
-                             "i64.trunc_u:sat/f32", 0xfc05>,
+                             "i64.trunc_sat_f32_u\t$dst, $src",
+                             "i64.trunc_sat_f32_u", 0xfc05>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I32_TRUNC_S_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
                              [(set I32:$dst, (fp_to_sint F64:$src))],
-                             "i32.trunc_s:sat/f64\t$dst, $src",
-                             "i32.trunc_s:sat/f64", 0xfc02>,
+                             "i32.trunc_sat_f64_s\t$dst, $src",
+                             "i32.trunc_sat_f64_s", 0xfc02>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I32_TRUNC_U_SAT_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
                              [(set I32:$dst, (fp_to_uint F64:$src))],
-                             "i32.trunc_u:sat/f64\t$dst, $src",
-                             "i32.trunc_u:sat/f64", 0xfc03>,
+                             "i32.trunc_sat_f64_u\t$dst, $src",
+                             "i32.trunc_sat_f64_u", 0xfc03>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I64_TRUNC_S_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
                              [(set I64:$dst, (fp_to_sint F64:$src))],
-                             "i64.trunc_s:sat/f64\t$dst, $src",
-                             "i64.trunc_s:sat/f64", 0xfc06>,
+                             "i64.trunc_sat_f64_s\t$dst, $src",
+                             "i64.trunc_sat_f64_s", 0xfc06>,
                              Requires<[HasNontrappingFPToInt]>;
 defm I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
                              [(set I64:$dst, (fp_to_uint F64:$src))],
-                             "i64.trunc_u:sat/f64\t$dst, $src",
-                             "i64.trunc_u:sat/f64", 0xfc07>,
+                             "i64.trunc_sat_f64_u\t$dst, $src",
+                             "i64.trunc_sat_f64_u", 0xfc07>,
                              Requires<[HasNontrappingFPToInt]>;
 
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
+          (I32_TRUNC_S_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
+          (I32_TRUNC_U_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
+          (I32_TRUNC_S_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
+          (I32_TRUNC_U_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F32:$src),
+          (I64_TRUNC_S_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src),
+          (I64_TRUNC_U_SAT_F32 F32:$src)>;
+def : Pat<(int_wasm_trunc_saturate_signed F64:$src),
+          (I64_TRUNC_S_SAT_F64 F64:$src)>;
+def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src),
+          (I64_TRUNC_U_SAT_F64 F64:$src)>;
+
 // Conversion from floating point to integer pseudo-instructions which don't
 // trap on overflow or invalid.
 let usesCustomInserter = 1, isCodeGenOnly = 1 in {
@@ -135,88 +147,86 @@ defm FP_TO_UINT_I64_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
 // Conversion from floating point to integer traps on overflow and invalid.
 let hasSideEffects = 1 in {
 defm I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
-                         [], "i32.trunc_s/f32\t$dst, $src", "i32.trunc_s/f32",
+                         [], "i32.trunc_f32_s\t$dst, $src", "i32.trunc_f32_s",
                          0xa8>;
 defm I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
-                         [], "i32.trunc_u/f32\t$dst, $src", "i32.trunc_u/f32",
+                         [], "i32.trunc_f32_u\t$dst, $src", "i32.trunc_f32_u",
                          0xa9>;
 defm I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
-                         [], "i64.trunc_s/f32\t$dst, $src", "i64.trunc_s/f32",
+                         [], "i64.trunc_f32_s\t$dst, $src", "i64.trunc_f32_s",
                          0xae>;
 defm I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src), (outs), (ins),
-                         [], "i64.trunc_u/f32\t$dst, $src", "i64.trunc_u/f32",
+                         [], "i64.trunc_f32_u\t$dst, $src", "i64.trunc_f32_u",
                          0xaf>;
 defm I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
-                         [], "i32.trunc_s/f64\t$dst, $src", "i32.trunc_s/f64",
+                         [], "i32.trunc_f64_s\t$dst, $src", "i32.trunc_f64_s",
                          0xaa>;
 defm I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src), (outs), (ins),
-                         [], "i32.trunc_u/f64\t$dst, $src", "i32.trunc_u/f64",
+                         [], "i32.trunc_f64_u\t$dst, $src", "i32.trunc_f64_u",
                          0xab>;
 defm I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
-                         [], "i64.trunc_s/f64\t$dst, $src", "i64.trunc_s/f64",
+                         [], "i64.trunc_f64_s\t$dst, $src", "i64.trunc_f64_s",
                          0xb0>;
 defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
-                         [], "i64.trunc_u/f64\t$dst, $src", "i64.trunc_u/f64",
+                         [], "i64.trunc_f64_u\t$dst, $src", "i64.trunc_f64_u",
                          0xb1>;
 } // hasSideEffects = 1
 
 defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
                            [(set F32:$dst, (sint_to_fp I32:$src))],
-                           "f32.convert_s/i32\t$dst, $src", "f32.convert_s/i32",
+                           "f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s",
                            0xb2>;
 defm F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
                            [(set F32:$dst, (uint_to_fp I32:$src))],
-                           "f32.convert_u/i32\t$dst, $src", "f32.convert_u/i32",
+                           "f32.convert_i32_u\t$dst, $src", "f32.convert_i32_u",
                            0xb3>;
 defm F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins),
                            [(set F64:$dst, (sint_to_fp I32:$src))],
-                           "f64.convert_s/i32\t$dst, $src", "f64.convert_s/i32",
+                           "f64.convert_i32_s\t$dst, $src", "f64.convert_i32_s",
                            0xb7>;
 defm F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src), (outs), (ins),
                            [(set F64:$dst, (uint_to_fp I32:$src))],
-                           "f64.convert_u/i32\t$dst, $src", "f64.convert_u/i32",
+                           "f64.convert_i32_u\t$dst, $src", "f64.convert_i32_u",
                            0xb8>;
 defm F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins),
                            [(set F32:$dst, (sint_to_fp I64:$src))],
-                           "f32.convert_s/i64\t$dst, $src", "f32.convert_s/i64",
+                           "f32.convert_i64_s\t$dst, $src", "f32.convert_i64_s",
                            0xb4>;
 defm F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src), (outs), (ins),
                            [(set F32:$dst, (uint_to_fp I64:$src))],
-                           "f32.convert_u/i64\t$dst, $src", "f32.convert_u/i64",
+                           "f32.convert_i64_u\t$dst, $src", "f32.convert_i64_u",
                            0xb5>;
 defm F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
                            [(set F64:$dst, (sint_to_fp I64:$src))],
-                           "f64.convert_s/i64\t$dst, $src", "f64.convert_s/i64",
+                           "f64.convert_i64_s\t$dst, $src", "f64.convert_i64_s",
                            0xb9>;
 defm F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
                            [(set F64:$dst, (uint_to_fp I64:$src))],
-                           "f64.convert_u/i64\t$dst, $src", "f64.convert_u/i64",
+                           "f64.convert_i64_u\t$dst, $src", "f64.convert_i64_u",
                            0xba>;
 
 defm F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src), (outs), (ins),
                          [(set F64:$dst, (fpextend F32:$src))],
-                         "f64.promote/f32\t$dst, $src", "f64.promote/f32",
+                         "f64.promote_f32\t$dst, $src", "f64.promote_f32",
                          0xbb>;
 defm F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src), (outs), (ins),
                         [(set F32:$dst, (fpround F64:$src))],
-                        "f32.demote/f64\t$dst, $src", "f32.demote/f64",
+                        "f32.demote_f64\t$dst, $src", "f32.demote_f64",
                         0xb6>;
 
 defm I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src), (outs), (ins),
                              [(set I32:$dst, (bitconvert F32:$src))],
-                             "i32.reinterpret/f32\t$dst, $src",
-                             "i32.reinterpret/f32", 0xbc>;
+                             "i32.reinterpret_f32\t$dst, $src",
+                             "i32.reinterpret_f32", 0xbc>;
 defm F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins),
                              [(set F32:$dst, (bitconvert I32:$src))],
-                             "f32.reinterpret/i32\t$dst, $src",
-                             "f32.reinterpret/i32", 0xbe>;
+                             "f32.reinterpret_i32\t$dst, $src",
+                             "f32.reinterpret_i32", 0xbe>;
 defm I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins),
                              [(set I64:$dst, (bitconvert F64:$src))],
-                             "i64.reinterpret/f64\t$dst, $src",
-                             "i64.reinterpret/f64", 0xbd>;
+                             "i64.reinterpret_f64\t$dst, $src",
+                             "i64.reinterpret_f64", 0xbd>;
 defm F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src), (outs), (ins),
                              [(set F64:$dst, (bitconvert I64:$src))],
-                             "f64.reinterpret/i64\t$dst, $src",
-                             "f64.reinterpret/i64", 0xbf>;
-
-} // Defs = [ARGUMENTS]
+                             "f64.reinterpret_i64\t$dst, $src",
+                             "f64.reinterpret_i64", 0xbf>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
index 41b39f69e51c..a251d60b89ee 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
@@ -12,8 +12,6 @@
 ///
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
-
 defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
                            (ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond),
                            (outs), (ins),
@@ -23,8 +21,6 @@ defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
                            "except_ref.select\t$dst, $lhs, $rhs, $cond",
                            "except_ref.select", 0x1b>;
 
-} // Defs = [ARGUMENTS]
-
 def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
           (SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>;
 def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 8db75d38942b..c5290f00b431 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -12,7 +12,38 @@
 ///
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
+multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst,
+                   bits<32> f64Inst> {
+  defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins),
+                [(set F32:$dst, (node F32:$src))],
+                !strconcat("f32.", !strconcat(name, "\t$dst, $src")),
+                !strconcat("f32.", name), f32Inst>;
+  defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins),
+                [(set F64:$dst, (node F64:$src))],
+                !strconcat("f64.", !strconcat(name, "\t$dst, $src")),
+                !strconcat("f64.", name), f64Inst>;
+}
+multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst,
+                    bits<32> f64Inst> {
+  defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
+                [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
+                !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("f32.", name), f32Inst>;
+  defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
+                [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
+                !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("f64.", name), f64Inst>;
+}
+multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
+  defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
+                [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
+                !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("f32.", name), f32Inst>;
+  defm  _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
+                [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
+                !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("f64.", name), f64Inst>;
+}
 
 let isCommutable = 1 in
 defm ADD : BinaryFP<fadd, "add ", 0x92, 0xa0>;
@@ -27,8 +58,8 @@ defm NEG : UnaryFP<fneg, "neg ", 0x8c, 0x9a>;
 defm COPYSIGN : BinaryFP<fcopysign, "copysign", 0x98, 0xa6>;
 
 let isCommutable = 1 in {
-defm MIN : BinaryFP<fminnan, "min ", 0x96, 0xa4>;
-defm MAX : BinaryFP<fmaxnan, "max ", 0x97, 0xa5>;
+defm MIN : BinaryFP<fminimum, "min ", 0x96, 0xa4>;
+defm MAX : BinaryFP<fmaximum, "max ", 0x97, 0xa5>;
 } // isCommutable = 1
 
 defm CEIL : UnaryFP<fceil, "ceil", 0x8d, 0x9b>;
@@ -36,8 +67,6 @@ defm FLOOR : UnaryFP<ffloor, "floor", 0x8e, 0x9c>;
 defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>;
 defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>;
 
-} // Defs = [ARGUMENTS]
-
 // DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
 def : Pat<(fcopysign F64:$lhs, F32:$rhs),
           (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
@@ -48,8 +77,6 @@ def : Pat<(fcopysign F32:$lhs, F64:$rhs),
 def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
 def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
 
-let Defs = [ARGUMENTS] in {
-
 let isCommutable = 1 in {
 defm EQ : ComparisonFP<SETOEQ, "eq  ", 0x5b, 0x61>;
 defm NE : ComparisonFP<SETUNE, "ne  ", 0x5c, 0x62>;
@@ -59,8 +86,6 @@ defm LE : ComparisonFP<SETOLE, "le  ", 0x5f, 0x65>;
 defm GT : ComparisonFP<SETOGT, "gt  ", 0x5e, 0x64>;
 defm GE : ComparisonFP<SETOGE, "ge  ", 0x60, 0x66>;
 
-} // Defs = [ARGUMENTS]
-
 // Don't care floating-point comparisons, supported via other comparisons.
 def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>;
 def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>;
@@ -75,8 +100,6 @@ def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>;
 def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>;
 def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>;
 
-let Defs = [ARGUMENTS] in {
-
 defm SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond),
                     (outs), (ins),
                     [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
@@ -86,8 +109,6 @@ defm SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond),
                     [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
                     "f64.select\t$dst, $lhs, $rhs, $cond", "f64.select", 0x1b>;
 
-} // Defs = [ARGUMENTS]
-
 // ISD::SELECT requires its operand to conform to getBooleanContents, but
 // WebAssembly's select interprets any non-zero value as true, so we can fold
 // a setne with 0 into a select.
@@ -101,3 +122,10 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs),
           (SELECT_F32 F32:$rhs, F32:$lhs, I32:$cond)>;
 def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs),
           (SELECT_F64 F64:$rhs, F64:$lhs, I32:$cond)>;
+
+// The legalizer inserts an unnecessary `and 1` to make input conform
+// to getBooleanContents, which we can lower away.
+def : Pat<(select (i32 (and I32:$cond, 1)), F32:$lhs, F32:$rhs),
+          (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (and I32:$cond, 1)), F64:$lhs, F64:$rhs),
+          (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 403152c80660..15a9714a55a1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -15,21 +15,24 @@
 // WebAssembly Instruction Format.
 // We instantiate 2 of these for every actual instruction (register based
 // and stack based), see below.
-class WebAssemblyInst<bits<32> inst, string asmstr, bit stack> : Instruction {
-  field bits<32> Inst = inst; // Instruction encoding.
-  field bit StackBased = stack;
+class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
+  Instruction {
+  bits<32> Inst = inst; // Instruction encoding.
+  string StackBased = stack;
+  string BaseName = NAME;
   let Namespace   = "WebAssembly";
   let Pattern     = [];
   let AsmString   = asmstr;
 }
 
 // Normal instructions. Default instantiation of a WebAssemblyInst.
-class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
-         bits<32> inst = -1>
+class NI<dag oops, dag iops, list<dag> pattern, string stack,
+         string asmstr = "", bits<32> inst = -1>
     : WebAssemblyInst<inst, asmstr, stack> {
   dag OutOperandList = oops;
   dag InOperandList  = iops;
   let Pattern        = pattern;
+  let Defs           = [ARGUMENTS];
 }
 
 // Generates both register and stack based versions of one actual instruction.
@@ -37,10 +40,10 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
 // based version of this instruction, as well as the corresponding asmstr.
 // The register versions have virtual-register operands which correspond to wasm
 // locals or stack locations. Each use and def of the register corresponds to an
-// implicit get_local / set_local or access of stack operands in wasm. These
+// implicit local.get / local.set or access of stack operands in wasm. These
 // instructions are used for ISel and all MI passes. The stack versions of the
 // instructions do not have register operands (they implicitly operate on the
-// stack), and get_locals and set_locals are explicit. The register instructions
+// stack), and local.gets and local.sets are explicit. The register instructions
 // are converted to their corresponding stack instructions before lowering to
 // MC.
 // Every instruction should want to be based on this multi-class to guarantee
@@ -48,8 +51,10 @@ class NI<dag oops, dag iops, list<dag> pattern, bit stack, string asmstr = "",
 multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
              list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "",
              bits<32> inst = -1> {
-  def "" : NI<oops_r, iops_r, pattern_r, 0, asmstr_r, inst>;
-  def _S : NI<oops_s, iops_s, [], 1, asmstr_s, inst>;
+  let isCodeGenOnly = 1 in
+  def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst>;
+  let BaseName = NAME in
+  def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst>;
 }
 
 // For instructions that have no register ops, so both sets are the same.
@@ -57,111 +62,3 @@ multiclass NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
                bits<32> inst = -1> {
   defm "": I<oops, iops, oops, iops, pattern, asmstr, asmstr, inst>;
 }
-
-multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
-                  list<dag> pattern_r, string asmstr_r = "",
-                  string asmstr_s = "", bits<32> inst = -1> {
-  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
-              inst>,
-            Requires<[HasSIMD128]>;
-}
-
-multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
-                    list<dag> pattern_r, string asmstr_r = "",
-                    string asmstr_s = "", bits<32> inst = -1> {
-  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
-              inst>,
-            Requires<[HasAtomics]>;
-}
-
-// Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst,
-                    bits<32> i64Inst> {
-  defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins),
-                [(set I32:$dst, (node I32:$src))],
-                !strconcat("i32.", !strconcat(name, "\t$dst, $src")),
-                !strconcat("i32.", name), i32Inst>;
-  defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
-                [(set I64:$dst, (node I64:$src))],
-                !strconcat("i64.", !strconcat(name, "\t$dst, $src")),
-                !strconcat("i64.", name), i64Inst>;
-}
-multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst,
-                     bits<32> i64Inst> {
-  defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
-                [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
-                !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("i32.", name), i32Inst>;
-  defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
-                [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
-                !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("i64.", name), i64Inst>;
-}
-multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst,
-                   bits<32> f64Inst> {
-  defm _F32 : I<(outs F32:$dst), (ins F32:$src), (outs), (ins),
-                [(set F32:$dst, (node F32:$src))],
-                !strconcat("f32.", !strconcat(name, "\t$dst, $src")),
-                !strconcat("f32.", name), f32Inst>;
-  defm _F64 : I<(outs F64:$dst), (ins F64:$src), (outs), (ins),
-                [(set F64:$dst, (node F64:$src))],
-                !strconcat("f64.", !strconcat(name, "\t$dst, $src")),
-                !strconcat("f64.", name), f64Inst>;
-}
-multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst,
-                    bits<32> f64Inst> {
-  defm _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
-                [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
-                !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("f32.", name), f32Inst>;
-  defm _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
-                [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
-                !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("f64.", name), f64Inst>;
-}
-multiclass SIMDBinary<SDNode node, SDNode fnode, string name> {
-  defm _I8x16 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
-                       (outs), (ins),
-                       [(set (v16i8 V128:$dst), (node V128:$lhs, V128:$rhs))],
-                       !strconcat("i8x16.",
-                         !strconcat(name, "\t$dst, $lhs, $rhs")),
-                       !strconcat("i8x16.", name)>;
-  defm _I16x8 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
-                       (outs), (ins),
-                       [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))],
-                       !strconcat("i16x8.",
-                         !strconcat(name, "\t$dst, $lhs, $rhs")),
-                       !strconcat("i16x8.", name)>;
-  defm _I32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
-                       (outs), (ins),
-                       [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))],
-                       !strconcat("i32x4.",
-                         !strconcat(name, "\t$dst, $lhs, $rhs")),
-                       !strconcat("i32x4.", name)>;
-  defm _F32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
-                       (outs), (ins),
-                       [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))],
-                       !strconcat("f32x4.",
-                         !strconcat(name, "\t$dst, $lhs, $rhs")),
-                       !strconcat("f32x4.", name)>;
-}
-multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
-  defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
-                [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
-                !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("i32.", name), i32Inst>;
-  defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
-                [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
-                !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("i64.", name), i64Inst>;
-}
-multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
-  defm _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), (outs), (ins),
-                [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
-                !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("f32.", name), f32Inst>;
-  defm  _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), (outs), (ins),
-                [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
-                !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
-                !strconcat("f64.", name), f64Inst>;
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index cd49bd1682ad..5efff32d6167 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -70,6 +70,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     CopyOpcode = WebAssembly::COPY_F32;
   else if (RC == &WebAssembly::F64RegClass)
     CopyOpcode = WebAssembly::COPY_F64;
+  else if (RC == &WebAssembly::V128RegClass)
+    CopyOpcode = WebAssembly::COPY_V128;
   else
     llvm_unreachable("Unexpected register class");
 
@@ -77,10 +79,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       .addReg(SrcReg, KillSrc ? RegState::Kill : 0);
 }
 
-MachineInstr *
-WebAssemblyInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
-                                             unsigned OpIdx1,
-                                             unsigned OpIdx2) const {
+MachineInstr *WebAssemblyInstrInfo::commuteInstructionImpl(
+    MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const {
   // If the operands are stackified, we can't reorder them.
   WebAssemblyFunctionInfo &MFI =
       *MI.getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>();
@@ -165,12 +165,9 @@ unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
   return Count;
 }
 
-unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
-                                            MachineBasicBlock *TBB,
-                                            MachineBasicBlock *FBB,
-                                            ArrayRef<MachineOperand> Cond,
-                                            const DebugLoc &DL,
-                                            int *BytesAdded) const {
+unsigned WebAssemblyInstrInfo::insertBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
   assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index aeb282a7febb..e3d795f2aab1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -20,6 +20,9 @@ def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
 def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
 def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
                            AssemblerPredicate<"FeatureSIMD128", "simd128">;
+def HasUnimplementedSIMD128 :
+    Predicate<"Subtarget->hasUnimplementedSIMD128()">,
+    AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">;
 def HasAtomics : Predicate<"Subtarget->hasAtomics()">,
                            AssemblerPredicate<"FeatureAtomics", "atomics">;
 def HasNontrappingFPToInt :
@@ -64,6 +67,7 @@ def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
 def SDT_WebAssemblyReturn   : SDTypeProfile<0, -1, []>;
 def SDT_WebAssemblyWrapper  : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
                                                    SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyThrow    : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific DAG Nodes.
@@ -90,6 +94,8 @@ def WebAssemblyreturn   : SDNode<"WebAssemblyISD::RETURN",
                                  SDT_WebAssemblyReturn, [SDNPHasChain]>;
 def WebAssemblywrapper  : SDNode<"WebAssemblyISD::Wrapper",
                                  SDT_WebAssemblyWrapper>;
+def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
+                              [SDNPHasChain]>;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific Operands.
@@ -118,6 +124,18 @@ def f32imm_op : Operand<f32>;
 let OperandType = "OPERAND_F64IMM" in
 def f64imm_op : Operand<f64>;
 
+let OperandType = "OPERAND_VEC_I8IMM" in
+def vec_i8imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I16IMM" in
+def vec_i16imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I32IMM" in
+def vec_i32imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_VEC_I64IMM" in
+def vec_i64imm_op : Operand<i64>;
+
 let OperandType = "OPERAND_FUNCTION32" in
 def function32_op : Operand<i32>;
 
@@ -128,6 +146,10 @@ let OperandType = "OPERAND_P2ALIGN" in {
 def P2Align : Operand<i32> {
   let PrintMethod = "printWebAssemblyP2AlignOperand";
 }
+
+let OperandType = "OPERAND_EVENT" in
+def event_op : Operand<i32>;
+
 } // OperandType = "OPERAND_P2ALIGN"
 
 let OperandType = "OPERAND_SIGNATURE" in {
@@ -142,6 +164,19 @@ def TypeIndex : Operand<i32>;
 } // OperandNamespace = "WebAssembly"
 
 //===----------------------------------------------------------------------===//
+// WebAssembly Register to Stack instruction mapping
+//===----------------------------------------------------------------------===//
+
+class StackRel;
+def getStackOpcode : InstrMapping {
+  let FilterClass = "StackRel";
+  let RowFields = ["BaseName"];
+  let ColFields = ["StackBased"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
 // WebAssembly Instruction Format Definitions.
 //===----------------------------------------------------------------------===//
 
@@ -151,74 +186,62 @@ include "WebAssemblyInstrFormats.td"
 // Additional instructions.
 //===----------------------------------------------------------------------===//
 
-multiclass ARGUMENT<WebAssemblyRegClass vt> {
-  let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
-  defm ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
-                        (outs), (ins i32imm:$argno),
-                        [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
+multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> {
+  let hasSideEffects = 1, isCodeGenOnly = 1,
+      Defs = []<Register>, Uses = [ARGUMENTS] in
+  defm ARGUMENT_#vt :
+    I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno),
+      [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>;
 }
-multiclass SIMD_ARGUMENT<ValueType vt> {
-  let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
-  defm ARGUMENT_#vt : SIMD_I<(outs V128:$res), (ins i32imm:$argno),
-                             (outs), (ins i32imm:$argno),
-                             [(set (vt V128:$res),
-                                  (WebAssemblyargument timm:$argno))]>;
-}
-defm "": ARGUMENT<I32>;
-defm "": ARGUMENT<I64>;
-defm "": ARGUMENT<F32>;
-defm "": ARGUMENT<F64>;
-defm "": ARGUMENT<EXCEPT_REF>;
-defm "": SIMD_ARGUMENT<v16i8>;
-defm "": SIMD_ARGUMENT<v8i16>;
-defm "": SIMD_ARGUMENT<v4i32>;
-defm "": SIMD_ARGUMENT<v4f32>;
-
-let Defs = [ARGUMENTS] in {
-
-// get_local and set_local are not generated by instruction selection; they
+defm "": ARGUMENT<I32, i32>;
+defm "": ARGUMENT<I64, i64>;
+defm "": ARGUMENT<F32, f32>;
+defm "": ARGUMENT<F64, f64>;
+defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
+
+// local.get and local.set are not generated by instruction selection; they
 // are implied by virtual register uses and defs.
 multiclass LOCAL<WebAssemblyRegClass vt> {
 let hasSideEffects = 0 in {
-  // COPY is not an actual instruction in wasm, but since we allow get_local and
-  // set_local to be implicit during most of codegen, we can have a COPY which
-  // is actually a no-op because all the work is done in the implied get_local
-  // and set_local. COPYs are eliminated (and replaced with
-  // get_local/set_local) in the ExplicitLocals pass.
+  // COPY is not an actual instruction in wasm, but since we allow local.get and
+  // local.set to be implicit during most of codegen, we can have a COPY which
+  // is actually a no-op because all the work is done in the implied local.get
+  // and local.set. COPYs are eliminated (and replaced with
+  // local.get/local.set) in the ExplicitLocals pass.
   let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
   defm COPY_#vt : I<(outs vt:$res), (ins vt:$src), (outs), (ins), [],
-                    "copy_local\t$res, $src", "copy_local">;
+                    "local.copy\t$res, $src", "local.copy">;
 
   // TEE is similar to COPY, but writes two copies of its result. Typically
   // this would be used to stackify one result and write the other result to a
   // local.
   let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
   defm TEE_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), (outs), (ins), [],
-                   "tee_local\t$res, $also, $src", "tee_local">;
+                   "local.tee\t$res, $also, $src", "local.tee">;
 
-  // This is the actual get_local instruction in wasm. These are made explicit
+  // This is the actual local.get instruction in wasm. These are made explicit
   // by the ExplicitLocals pass. It has mayLoad because it reads from a wasm
   // local, which is a side effect not otherwise modeled in LLVM.
   let mayLoad = 1, isAsCheapAsAMove = 1 in
-  defm GET_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local),
+  defm LOCAL_GET_#vt : I<(outs vt:$res), (ins local_op:$local),
                          (outs), (ins local_op:$local), [],
-                         "get_local\t$res, $local", "get_local\t$local", 0x20>;
+                         "local.get\t$res, $local", "local.get\t$local", 0x20>;
 
-  // This is the actual set_local instruction in wasm. These are made explicit
+  // This is the actual local.set instruction in wasm. These are made explicit
   // by the ExplicitLocals pass. It has mayStore because it writes to a wasm
   // local, which is a side effect not otherwise modeled in LLVM.
   let mayStore = 1, isAsCheapAsAMove = 1 in
-  defm SET_LOCAL_#vt : I<(outs), (ins local_op:$local, vt:$src),
+  defm LOCAL_SET_#vt : I<(outs), (ins local_op:$local, vt:$src),
                          (outs), (ins local_op:$local), [],
-                         "set_local\t$local, $src", "set_local\t$local", 0x21>;
+                         "local.set\t$local, $src", "local.set\t$local", 0x21>;
 
-  // This is the actual tee_local instruction in wasm. TEEs are turned into
-  // TEE_LOCALs by the ExplicitLocals pass. It has mayStore for the same reason
-  // as SET_LOCAL.
+  // This is the actual local.tee instruction in wasm. TEEs are turned into
+  // LOCAL_TEEs by the ExplicitLocals pass. It has mayStore for the same reason
+  // as LOCAL_SET.
   let mayStore = 1, isAsCheapAsAMove = 1 in
-  defm TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src),
+  defm LOCAL_TEE_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src),
                          (outs), (ins local_op:$local), [],
-                         "tee_local\t$res, $local, $src", "tee_local\t$local",
+                         "local.tee\t$res, $local, $src", "local.tee\t$local",
                          0x22>;
 
   // Unused values must be dropped in some contexts.
@@ -226,15 +249,15 @@ let hasSideEffects = 0 in {
                     "drop\t$src", "drop", 0x1a>;
 
   let mayLoad = 1 in
-  defm GET_GLOBAL_#vt : I<(outs vt:$res), (ins global_op:$local),
+  defm GLOBAL_GET_#vt : I<(outs vt:$res), (ins global_op:$local),
                           (outs), (ins global_op:$local), [],
-                          "get_global\t$res, $local", "get_global\t$local",
+                          "global.get\t$res, $local", "global.get\t$local",
                           0x23>;
 
   let mayStore = 1 in
-  defm SET_GLOBAL_#vt : I<(outs), (ins global_op:$local, vt:$src),
+  defm GLOBAL_SET_#vt : I<(outs), (ins global_op:$local, vt:$src),
                           (outs), (ins global_op:$local), [],
-                          "set_global\t$local, $src", "set_global\t$local",
+                          "global.set\t$local, $src", "global.set\t$local",
                           0x24>;
 
 } // hasSideEffects = 0
@@ -265,12 +288,12 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
                    "f64.const\t$res, $imm", "f64.const\t$imm", 0x44>;
 } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
 
-} // Defs = [ARGUMENTS]
-
 def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
           (CONST_I32 tglobaladdr:$addr)>;
 def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
           (CONST_I32 texternalsym:$addr)>;
+def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
+def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
 
 //===----------------------------------------------------------------------===//
 // Additional sets of instructions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index f9f21fd1d754..d5b63d643697 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -12,7 +12,38 @@
 ///
 //===----------------------------------------------------------------------===//
 
-let Defs = [ARGUMENTS] in {
+multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst,
+                    bits<32> i64Inst> {
+  defm _I32 : I<(outs I32:$dst), (ins I32:$src), (outs), (ins),
+                [(set I32:$dst, (node I32:$src))],
+                !strconcat("i32.", !strconcat(name, "\t$dst, $src")),
+                !strconcat("i32.", name), i32Inst>;
+  defm _I64 : I<(outs I64:$dst), (ins I64:$src), (outs), (ins),
+                [(set I64:$dst, (node I64:$src))],
+                !strconcat("i64.", !strconcat(name, "\t$dst, $src")),
+                !strconcat("i64.", name), i64Inst>;
+}
+multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst,
+                     bits<32> i64Inst> {
+  defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
+                [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
+                !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("i32.", name), i32Inst>;
+  defm _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
+                [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
+                !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("i64.", name), i64Inst>;
+}
+multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
+  defm _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), (outs), (ins),
+                [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
+                !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("i32.", name), i32Inst>;
+  defm _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), (outs), (ins),
+                [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
+                !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+                !strconcat("i64.", name), i64Inst>;
+}
 
 // The spaces after the names are for aesthetic purposes only, to make
 // operands line up vertically after tab expansion.
@@ -63,16 +94,12 @@ defm EQZ_I64 : I<(outs I32:$dst), (ins I64:$src), (outs), (ins),
                  [(set I32:$dst, (setcc I64:$src, 0, SETEQ))],
                  "i64.eqz \t$dst, $src", "i64.eqz", 0x50>;
 
-} // Defs = [ARGUMENTS]
-
 // Optimize away an explicit mask on a rotate count.
 def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>;
 def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>;
 def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>;
 def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>;
 
-let Defs = [ARGUMENTS] in {
-
 defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
                     (outs), (ins),
                     [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
@@ -82,8 +109,6 @@ defm SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond),
                     [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
                     "i64.select\t$dst, $lhs, $rhs, $cond", "i64.select", 0x1b>;
 
-} // Defs = [ARGUMENTS]
-
 // ISD::SELECT requires its operand to conform to getBooleanContents, but
 // WebAssembly's select interprets any non-zero value as true, so we can fold
 // a setne with 0 into a select.
@@ -97,3 +122,10 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
           (SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
 def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
           (SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
+
+// The legalizer inserts an unnecessary `and 1` to make input conform
+// to getBooleanContents, which we can lower away.
+def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
+          (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
+          (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 8a49325af2bd..518f81c61dc4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -33,10 +33,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
     return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
 
-  KnownBits Known0;
-  CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
-  KnownBits Known1;
-  CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
   return (~Known0.Zero & ~Known1.Zero) == 0;
 }]>;
 
@@ -53,15 +51,14 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
 // We don't need a regPlusES because external symbols never have constant
 // offsets folded into them, so we can just use add.
 
-let Defs = [ARGUMENTS] in {
-
 // Defines atomic and non-atomic loads, regular and extending.
 multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
+  let mayLoad = 1 in
   defm "": I<(outs rc:$dst),
              (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
              (outs), (ins P2Align:$p2align, offset32_op:$off),
              [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
-             !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+             !strconcat(Name, "\t${off}${p2align}"), Opcode>;
 }
 
 // Basic load.
@@ -72,8 +69,6 @@ defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>;
 defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>;
 defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>;
 
-} // Defs = [ARGUMENTS]
-
 // Select loads with no constant offset.
 class LoadPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>;
@@ -143,8 +138,6 @@ def : LoadPatExternSymOffOnly<i64, load, LOAD_I64>;
 def : LoadPatExternSymOffOnly<f32, load, LOAD_F32>;
 def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
 
-let Defs = [ARGUMENTS] in {
-
 // Extending load.
 defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
 defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
@@ -157,8 +150,6 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33>;
 defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>;
 defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>;
 
-} // Defs = [ARGUMENTS]
-
 // Select extending loads with no constant offset.
 def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>;
 def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>;
@@ -302,17 +293,15 @@ def : LoadPatExternSymOffOnly<i64, extloadi8, LOAD8_U_I64>;
 def : LoadPatExternSymOffOnly<i64, extloadi16, LOAD16_U_I64>;
 def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
 
-
-let Defs = [ARGUMENTS] in {
-
 // Defines atomic and non-atomic stores, regular and truncating
 multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
+  let mayStore = 1 in
   defm "" : I<(outs),
               (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
               (outs),
               (ins P2Align:$p2align, offset32_op:$off), [],
               !strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
-              !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+              !strconcat(Name, "\t${off}${p2align}"), Opcode>;
 }
 // Basic store.
 // Note: WebAssembly inverts SelectionDAG's usual operand order.
@@ -321,8 +310,6 @@ defm STORE_I64  : WebAssemblyStore<I64, "i64.store", 0x37>;
 defm STORE_F32  : WebAssemblyStore<F32, "f32.store", 0x38>;
 defm STORE_F64  : WebAssemblyStore<F64, "f64.store", 0x39>;
 
-} // Defs = [ARGUMENTS]
-
 // Select stores with no constant offset.
 class StorePatNoOffset<ValueType ty, PatFrag node, NI inst> :
   Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>;
@@ -387,9 +374,6 @@ def : StorePatExternSymOffOnly<i64, store, STORE_I64>;
 def : StorePatExternSymOffOnly<f32, store, STORE_F32>;
 def : StorePatExternSymOffOnly<f64, store, STORE_F64>;
 
-
-let Defs = [ARGUMENTS] in {
-
 // Truncating store.
 defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
 defm STORE16_I32 : WebAssemblyStore<I32, "i32.store16", 0x3b>;
@@ -397,8 +381,6 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
 defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
 defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
 
-} // Defs = [ARGUMENTS]
-
 // Select truncating stores with no constant offset.
 def : StorePatNoOffset<i32, truncstorei8, STORE8_I32>;
 def : StorePatNoOffset<i32, truncstorei16, STORE16_I32>;
@@ -446,8 +428,6 @@ def : StorePatExternSymOffOnly<i64, truncstorei8, STORE8_I64>;
 def : StorePatExternSymOffOnly<i64, truncstorei16, STORE16_I64>;
 def : StorePatExternSymOffOnly<i64, truncstorei32, STORE32_I64>;
 
-let Defs = [ARGUMENTS] in {
-
 // Current memory size.
 defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
                          (outs), (ins i32imm:$flags),
@@ -456,44 +436,13 @@ defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
                          "memory.size\t$dst, $flags", "memory.size\t$flags",
                          0x3f>,
                        Requires<[HasAddr32]>;
-defm MEM_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
-                      (outs), (ins i32imm:$flags),
-                      [(set I32:$dst, (int_wasm_mem_size (i32 imm:$flags)))],
-                      "mem.size\t$dst, $flags", "mem.size\t$flags", 0x3f>,
-                    Requires<[HasAddr32]>;
-defm CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
-                            (outs), (ins i32imm:$flags),
-                            [],
-                            "current_memory\t$dst",
-                            "current_memory\t$flags", 0x3f>,
-                          Requires<[HasAddr32]>;
 
 // Grow memory.
 defm MEMORY_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
-                         (outs), (ins i32imm:$flags, I32:$delta),
+                         (outs), (ins i32imm:$flags),
                          [(set I32:$dst,
                            (int_wasm_memory_grow (i32 imm:$flags),
                              I32:$delta))],
                          "memory.grow\t$dst, $flags, $delta",
-                         "memory.grow\t$flags, $delta", 0x3f>,
+                         "memory.grow\t$flags", 0x40>,
                        Requires<[HasAddr32]>;
-defm MEM_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
-                      (outs), (ins i32imm:$flags),
-                      [(set I32:$dst,
-                            (int_wasm_mem_grow (i32 imm:$flags), I32:$delta))],
-                      "mem.grow\t$dst, $flags, $delta", "mem.grow\t$flags",
-                      0x3f>,
-                    Requires<[HasAddr32]>;
-defm GROW_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
-                         (outs), (ins i32imm:$flags),
-                         [],
-                         "grow_memory\t$dst, $delta", "grow_memory\t$flags",
-                         0x40>,
-                       Requires<[HasAddr32]>;
-
-} // Defs = [ARGUMENTS]
-
-def : Pat<(int_wasm_current_memory),
-          (CURRENT_MEMORY_I32 0)>;
-def : Pat<(int_wasm_grow_memory I32:$delta),
-          (GROW_MEMORY_I32 0, $delta)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 7d1edccdeb3c..587515c5b299 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -12,8 +12,796 @@
 ///
 //===----------------------------------------------------------------------===//
 
+// Instructions requiring HasSIMD128 and the simd128 prefix byte
+multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                  list<dag> pattern_r, string asmstr_r = "",
+                  string asmstr_s = "", bits<32> simdop = -1> {
+  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+              !or(0xfd00, !and(0xff, simdop))>,
+            Requires<[HasSIMD128]>;
+}
+
+defm "" : ARGUMENT<V128, v16i8>;
+defm "" : ARGUMENT<V128, v8i16>;
+defm "" : ARGUMENT<V128, v4i32>;
+defm "" : ARGUMENT<V128, v2i64>;
+defm "" : ARGUMENT<V128, v4f32>;
+defm "" : ARGUMENT<V128, v2f64>;
+
+// Constrained immediate argument types
+foreach SIZE = [8, 16] in
+def ImmI#SIZE : ImmLeaf<i32,
+  "return ((uint64_t)Imm & ((1UL << "#SIZE#") - 1)) == (uint64_t)Imm;"
+>;
+foreach SIZE = [2, 4, 8, 16, 32] in
+def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
+
+//===----------------------------------------------------------------------===//
+// Load and store
+//===----------------------------------------------------------------------===//
+
+// Load: v128.load
+multiclass SIMDLoad<ValueType vec_t> {
+  let mayLoad = 1 in
+  defm LOAD_#vec_t :
+    SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
+           (outs), (ins P2Align:$align, offset32_op:$off), [],
+           "v128.load\t$dst, ${off}(${addr})$align",
+           "v128.load\t$off$align", 0>;
+}
+
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm "" : SIMDLoad<vec_t>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatExternalSym<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
+}
+
+// Store: v128.store
+multiclass SIMDStore<ValueType vec_t> {
+  let mayStore = 1 in
+  defm STORE_#vec_t :
+    SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
+           (outs), (ins P2Align:$align, offset32_op:$off), [],
+           "v128.store\t${off}(${addr})$align, $vec",
+           "v128.store\t$off$align", 1>;
+}
+
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm "" : SIMDStore<vec_t>;
+
+// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
+def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatExternalSym<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Constructing SIMD values
+//===----------------------------------------------------------------------===//
+
+// Constant: v128.const
+multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
+  let isMoveImm = 1, isReMaterializable = 1,
+    Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+  defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
+                                  [(set V128:$dst, (vec_t pat))],
+                                  "v128.const\t$dst, "#args,
+                                  "v128.const\t"#args, 2>;
+}
+
+defm "" : ConstVec<v16i8,
+                   (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
+                        vec_i8imm_op:$i2, vec_i8imm_op:$i3,
+                        vec_i8imm_op:$i4, vec_i8imm_op:$i5,
+                        vec_i8imm_op:$i6, vec_i8imm_op:$i7,
+                        vec_i8imm_op:$i8, vec_i8imm_op:$i9,
+                        vec_i8imm_op:$iA, vec_i8imm_op:$iB,
+                        vec_i8imm_op:$iC, vec_i8imm_op:$iD,
+                        vec_i8imm_op:$iE, vec_i8imm_op:$iF),
+                   (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
+                                 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
+                                 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
+                                 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
+                   !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
+                              "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
+defm "" : ConstVec<v8i16,
+                   (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
+                        vec_i16imm_op:$i2, vec_i16imm_op:$i3,
+                        vec_i16imm_op:$i4, vec_i16imm_op:$i5,
+                        vec_i16imm_op:$i6, vec_i16imm_op:$i7),
+                   (build_vector
+                     ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
+                     ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
+                   "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
+defm "" : ConstVec<v4i32,
+                   (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
+                        vec_i32imm_op:$i2, vec_i32imm_op:$i3),
+                   (build_vector (i32 imm:$i0), (i32 imm:$i1),
+                                 (i32 imm:$i2), (i32 imm:$i3)),
+                   "$i0, $i1, $i2, $i3">;
+defm "" : ConstVec<v2i64,
+                   (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
+                   (build_vector (i64 imm:$i0), (i64 imm:$i1)),
+                   "$i0, $i1">;
+defm "" : ConstVec<v4f32,
+                   (ins f32imm_op:$i0, f32imm_op:$i1,
+                        f32imm_op:$i2, f32imm_op:$i3),
+                   (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
+                                 (f32 fpimm:$i2), (f32 fpimm:$i3)),
+                   "$i0, $i1, $i2, $i3">;
+defm "" : ConstVec<v2f64,
+                  (ins f64imm_op:$i0, f64imm_op:$i1),
+                  (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
+                  "$i0, $i1">;
+
+// Shuffle lanes: shuffle
+defm SHUFFLE :
+  SIMD_I<(outs V128:$dst),
+         (ins V128:$x, V128:$y,
+           vec_i8imm_op:$m0, vec_i8imm_op:$m1,
+           vec_i8imm_op:$m2, vec_i8imm_op:$m3,
+           vec_i8imm_op:$m4, vec_i8imm_op:$m5,
+           vec_i8imm_op:$m6, vec_i8imm_op:$m7,
+           vec_i8imm_op:$m8, vec_i8imm_op:$m9,
+           vec_i8imm_op:$mA, vec_i8imm_op:$mB,
+           vec_i8imm_op:$mC, vec_i8imm_op:$mD,
+           vec_i8imm_op:$mE, vec_i8imm_op:$mF),
+         (outs),
+         (ins
+           vec_i8imm_op:$m0, vec_i8imm_op:$m1,
+           vec_i8imm_op:$m2, vec_i8imm_op:$m3,
+           vec_i8imm_op:$m4, vec_i8imm_op:$m5,
+           vec_i8imm_op:$m6, vec_i8imm_op:$m7,
+           vec_i8imm_op:$m8, vec_i8imm_op:$m9,
+           vec_i8imm_op:$mA, vec_i8imm_op:$mB,
+           vec_i8imm_op:$mC, vec_i8imm_op:$mD,
+           vec_i8imm_op:$mE, vec_i8imm_op:$mF),
+         [],
+         "v8x16.shuffle\t$dst, $x, $y, "#
+           "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
+           "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
+         "v8x16.shuffle\t"#
+           "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
+           "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
+         3>;
+
+// Shuffles after custom lowering
+def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
+def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
+            (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
+            (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
+            (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
+            (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
+            (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
+            (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
+            (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
+            (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
+          (vec_t (SHUFFLE (vec_t V128:$x), (vec_t V128:$y),
+            (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
+            (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
+            (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
+            (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
+            (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
+            (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
+            (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
+            (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
+}
+
+// Create vector with identical lanes: splat
+def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
+def splat4 : PatFrag<(ops node:$x), (build_vector
+                       node:$x, node:$x, node:$x, node:$x)>;
+def splat8 : PatFrag<(ops node:$x), (build_vector
+                       node:$x, node:$x, node:$x, node:$x,
+                       node:$x, node:$x, node:$x, node:$x)>;
+def splat16 : PatFrag<(ops node:$x), (build_vector
+                        node:$x, node:$x, node:$x, node:$x,
+                        node:$x, node:$x, node:$x, node:$x,
+                        node:$x, node:$x, node:$x, node:$x,
+                        node:$x, node:$x, node:$x, node:$x)>;
+
+multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
+                 PatFrag splat_pat, bits<32> simdop> {
+  // Prefer splats over v128.const for const splats (65 is lowest that works)
+  let AddedComplexity = 65 in
+  defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
+                             [(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
+                             vec#".splat\t$dst, $x", vec#".splat", simdop>;
+}
+
+defm "" : Splat<v16i8, "i8x16", I32, splat16, 4>;
+defm "" : Splat<v8i16, "i16x8", I32, splat8, 8>;
+defm "" : Splat<v4i32, "i32x4", I32, splat4, 12>;
+defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>;
+defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>;
+defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>;
+
+//===----------------------------------------------------------------------===//
+// Accessing lanes
+//===----------------------------------------------------------------------===//
+
+// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
+multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
+                       WebAssemblyRegClass reg_t, bits<32> simdop,
+                       string suffix = "", SDNode extract = vector_extract> {
+  defm EXTRACT_LANE_#vec_t#suffix :
+      SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
+             (outs), (ins vec_i8imm_op:$idx),
+             [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
+             vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
+             vec#".extract_lane"#suffix#"\t$idx", simdop>;
+}
+
+multiclass ExtractPat<ValueType lane_t, int mask> {
+  def _s : PatFrag<(ops node:$vec, node:$idx),
+                   (i32 (sext_inreg
+                     (i32 (vector_extract
+                       node:$vec,
+                       node:$idx
+                     )),
+                     lane_t
+                   ))>;
+  def _u : PatFrag<(ops node:$vec, node:$idx),
+                   (i32 (and
+                     (i32 (vector_extract
+                       node:$vec,
+                       node:$idx
+                     )),
+                     (i32 mask)
+                   ))>;
+}
+
+defm extract_i8x16 : ExtractPat<i8, 0xff>;
+defm extract_i16x8 : ExtractPat<i16, 0xffff>;
+
+multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
+  defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
+                        !cast<PatFrag>("extract_i8x16"#sign)>;
+  defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
+                        !cast<PatFrag>("extract_i16x8"#sign)>;
+}
+
+defm "" : ExtractLaneExtended<"_s", 5>;
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm "" : ExtractLaneExtended<"_u", 6>;
+defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
+defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
+defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
+defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
+
+// It would be more conventional to use unsigned extracts, but v8
+// doesn't implement them yet
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
+          (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
+          (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
+
+// Lower undef lane indices to zero
+def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
+          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
+          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
+          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
+def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
+          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
+          (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
+def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
+          (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), undef),
+          (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), undef),
+          (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), undef),
+          (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), undef),
+          (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+
+// Replace lane value: replace_lane
+multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
+                       WebAssemblyRegClass reg_t, ValueType lane_t,
+                       bits<32> simdop> {
+  defm REPLACE_LANE_#vec_t :
+      SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, reg_t:$x),
+             (outs), (ins vec_i8imm_op:$idx),
+             [(set V128:$dst, (vector_insert
+               (vec_t V128:$vec), (lane_t reg_t:$x), (i32 imm_t:$idx)))],
+             vec#".replace_lane\t$dst, $vec, $idx, $x",
+             vec#".replace_lane\t$idx", simdop>;
+}
+
+defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 7>;
+defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 11>;
+defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 14>;
+defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 17>;
+defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 20>;
+defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 23>;
+
+// Lower undef lane indices to zero
+def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
+          (REPLACE_LANE_v16i8 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
+          (REPLACE_LANE_v8i16 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
+          (REPLACE_LANE_v4i32 V128:$vec, 0, I32:$x)>;
+def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
+          (REPLACE_LANE_v2i64 V128:$vec, 0, I64:$x)>;
+def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
+          (REPLACE_LANE_v4f32 V128:$vec, 0, F32:$x)>;
+def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
+          (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
+
+// Arbitrary other BUILD_VECTOR patterns
+def : Pat<(v16i8 (build_vector
+            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
+            (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7),
+            (i32 I32:$x8), (i32 I32:$x9), (i32 I32:$x10), (i32 I32:$x11),
+            (i32 I32:$x12), (i32 I32:$x13), (i32 I32:$x14), (i32 I32:$x15)
+          )),
+          (v16i8 (REPLACE_LANE_v16i8
+            (v16i8 (REPLACE_LANE_v16i8
+              (v16i8 (REPLACE_LANE_v16i8
+                (v16i8 (REPLACE_LANE_v16i8
+                  (v16i8 (REPLACE_LANE_v16i8
+                    (v16i8 (REPLACE_LANE_v16i8
+                      (v16i8 (REPLACE_LANE_v16i8
+                        (v16i8 (REPLACE_LANE_v16i8
+                          (v16i8 (REPLACE_LANE_v16i8
+                            (v16i8 (REPLACE_LANE_v16i8
+                              (v16i8 (REPLACE_LANE_v16i8
+                                (v16i8 (REPLACE_LANE_v16i8
+                                  (v16i8 (REPLACE_LANE_v16i8
+                                    (v16i8 (REPLACE_LANE_v16i8
+                                      (v16i8 (REPLACE_LANE_v16i8
+                                        (v16i8 (SPLAT_v16i8 (i32 I32:$x0))),
+                                        1, I32:$x1
+                                      )),
+                                      2, I32:$x2
+                                    )),
+                                    3, I32:$x3
+                                  )),
+                                  4, I32:$x4
+                                )),
+                                5, I32:$x5
+                              )),
+                              6, I32:$x6
+                            )),
+                            7, I32:$x7
+                          )),
+                          8, I32:$x8
+                        )),
+                        9, I32:$x9
+                      )),
+                      10, I32:$x10
+                    )),
+                    11, I32:$x11
+                  )),
+                  12, I32:$x12
+                )),
+                13, I32:$x13
+              )),
+              14, I32:$x14
+            )),
+            15, I32:$x15
+          ))>;
+def : Pat<(v8i16 (build_vector
+            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
+            (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7)
+          )),
+          (v8i16 (REPLACE_LANE_v8i16
+            (v8i16 (REPLACE_LANE_v8i16
+              (v8i16 (REPLACE_LANE_v8i16
+                (v8i16 (REPLACE_LANE_v8i16
+                  (v8i16 (REPLACE_LANE_v8i16
+                    (v8i16 (REPLACE_LANE_v8i16
+                      (v8i16 (REPLACE_LANE_v8i16
+                        (v8i16 (SPLAT_v8i16 (i32 I32:$x0))),
+                        1, I32:$x1
+                      )),
+                      2, I32:$x2
+                    )),
+                    3, I32:$x3
+                  )),
+                  4, I32:$x4
+                )),
+                5, I32:$x5
+              )),
+              6, I32:$x6
+            )),
+            7, I32:$x7
+          ))>;
+def : Pat<(v4i32 (build_vector
+            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3)
+          )),
+          (v4i32 (REPLACE_LANE_v4i32
+            (v4i32 (REPLACE_LANE_v4i32
+              (v4i32 (REPLACE_LANE_v4i32
+                (v4i32 (SPLAT_v4i32 (i32 I32:$x0))),
+                1, I32:$x1
+              )),
+              2, I32:$x2
+            )),
+            3, I32:$x3
+          ))>;
+def : Pat<(v2i64 (build_vector (i64 I64:$x0), (i64 I64:$x1))),
+          (v2i64 (REPLACE_LANE_v2i64
+            (v2i64 (SPLAT_v2i64 (i64 I64:$x0))), 1, I64:$x1))>;
+def : Pat<(v4f32 (build_vector
+            (f32 F32:$x0), (f32 F32:$x1), (f32 F32:$x2), (f32 F32:$x3)
+          )),
+          (v4f32 (REPLACE_LANE_v4f32
+            (v4f32 (REPLACE_LANE_v4f32
+              (v4f32 (REPLACE_LANE_v4f32
+                (v4f32 (SPLAT_v4f32 (f32 F32:$x0))),
+                1, F32:$x1
+              )),
+              2, F32:$x2
+            )),
+            3, F32:$x3
+          ))>;
+def : Pat<(v2f64 (build_vector (f64 F64:$x0), (f64 F64:$x1))),
+          (v2f64 (REPLACE_LANE_v2f64
+            (v2f64 (SPLAT_v2f64 (f64 F64:$x0))), 1, F64:$x1))>;
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDCondition<ValueType vec_t, ValueType out_t, string vec,
+                         string name, CondCode cond, bits<32> simdop> {
+  defm _#vec_t :
+    SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
+           [(set (out_t V128:$dst),
+             (setcc (vec_t V128:$lhs), (vec_t V128:$rhs), cond)
+           )],
+           vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, simdop>;
+}
+
+multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
+  defm "" : SIMDCondition<v16i8, v16i8, "i8x16", name, cond, baseInst>;
+  defm "" : SIMDCondition<v8i16, v8i16, "i16x8", name, cond,
+                          !add(baseInst, 10)>;
+  defm "" : SIMDCondition<v4i32, v4i32, "i32x4", name, cond,
+                          !add(baseInst, 20)>;
+}
+
+multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
+  defm "" : SIMDCondition<v4f32, v4i32, "f32x4", name, cond, baseInst>;
+  defm "" : SIMDCondition<v2f64, v2i64, "f64x2", name, cond,
+                          !add(baseInst, 6)>;
+}
+
+// Equality: eq
+let isCommutable = 1 in {
+defm EQ : SIMDConditionInt<"eq", SETEQ, 24>;
+defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>;
+} // isCommutable = 1
+
+// Non-equality: ne
 let isCommutable = 1 in {
-defm ADD : SIMDBinary<add, fadd, "add ">;
-defm MUL: SIMDBinary<mul, fmul, "mul ">;
+defm NE : SIMDConditionInt<"ne", SETNE, 25>;
+defm NE : SIMDConditionFP<"ne", SETUNE, 65>;
 } // isCommutable = 1
-defm SUB: SIMDBinary<sub, fsub, "sub ">;
+
+// Less than: lt_s / lt_u / lt
+defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>;
+defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>;
+defm LT : SIMDConditionFP<"lt", SETOLT, 66>;
+
+// Greater than: gt_s / gt_u / gt
+defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>;
+defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>;
+defm GT : SIMDConditionFP<"gt", SETOGT, 67>;
+
+// Less than or equal: le_s / le_u / le
+defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>;
+defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>;
+defm LE : SIMDConditionFP<"le", SETOLE, 68>;
+
+// Greater than or equal: ge_s / ge_u / ge
+defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>;
+defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>;
+defm GE : SIMDConditionFP<"ge", SETOGE, 69>;
+
+// Lower float comparisons that don't care about NaN to standard WebAssembly
+// float comparisons. These instructions are generated in the target-independent
+// expansion of unordered comparisons and ordered ne.
+def : Pat<(v4i32 (seteq (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+          (v4i32 (EQ_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+def : Pat<(v4i32 (setne (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+          (v4i32 (NE_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+def : Pat<(v2i64 (seteq (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+          (v2i64 (EQ_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+def : Pat<(v2i64 (setne (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+          (v2i64 (NE_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+
+//===----------------------------------------------------------------------===//
+// Bitwise operations
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinary<ValueType vec_t, string vec, SDNode node, string name,
+                      bits<32> simdop> {
+  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                        (outs), (ins),
+                        [(set (vec_t V128:$dst),
+                          (node (vec_t V128:$lhs), (vec_t V128:$rhs))
+                        )],
+                        vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name,
+                        simdop>;
+}
+
+multiclass SIMDBitwise<SDNode node, string name, bits<32> simdop> {
+  defm "" : SIMDBinary<v16i8, "v128", node, name, simdop>;
+  defm "" : SIMDBinary<v8i16, "v128", node, name, simdop>;
+  defm "" : SIMDBinary<v4i32, "v128", node, name, simdop>;
+  defm "" : SIMDBinary<v2i64, "v128", node, name, simdop>;
+}
+
+multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name,
+                     bits<32> simdop> {
+  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+                        [(set (vec_t V128:$dst),
+                          (vec_t (node (vec_t V128:$vec)))
+                        )],
+                        vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+}
+
+// Bitwise logic: v128.not
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 76>;
+
+// Bitwise logic: v128.and / v128.or / v128.xor
+let isCommutable = 1 in {
+defm AND : SIMDBitwise<and, "and", 77>;
+defm OR : SIMDBitwise<or, "or", 78>;
+defm XOR : SIMDBitwise<xor, "xor", 79>;
+} // isCommutable = 1
+
+// Bitwise select: v128.bitselect
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+  defm BITSELECT_#vec_t :
+    SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins),
+           [(set (vec_t V128:$dst),
+             (vec_t (int_wasm_bitselect
+               (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c)
+             ))
+           )],
+           "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>;
+
+// Bitselect is equivalent to (c & v1) | (~c & v2)
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
+  def : Pat<(vec_t (or (and (vec_t V128:$c), (vec_t V128:$v1)),
+              (and (vnot V128:$c), (vec_t V128:$v2)))),
+            (!cast<Instruction>("BITSELECT_"#vec_t)
+              V128:$v1, V128:$v2, V128:$c)>;
+
+//===----------------------------------------------------------------------===//
+// Integer unary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>;
+  defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+  defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+  defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+}
+
+multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
+                         bits<32> simdop> {
+  defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+                        [(set I32:$dst, (i32 (op (vec_t V128:$vec))))],
+                        vec#"."#name#"\t$dst, $vec", vec#"."#name, simdop>;
+}
+
+multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> {
+  defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>;
+  defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 17)>;
+  defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 34)>;
+  defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 51)>;
+}
+
+// Integer vector negation
+def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+
+// Integer negation: neg
+defm NEG : SIMDUnaryInt<ivneg, "neg", 81>;
+
+// Any lane true: any_true
+defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>;
+
+// All lanes true: all_true
+defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>;
+
+//===----------------------------------------------------------------------===//
+// Bit shifts
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec,
+                     string name, bits<32> simdop> {
+  defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
+                        (outs), (ins),
+                        [(set (vec_t V128:$dst),
+                          (node V128:$vec, (vec_t shift_vec)))],
+                        vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
+}
+
+multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>;
+  defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name,
+                      !add(baseInst, 17)>;
+  defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name,
+                      !add(baseInst, 34)>;
+  defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))),
+                      name, !add(baseInst, 51)>;
+}
+
+// Left shift by scalar: shl
+defm SHL : SIMDShiftInt<shl, "shl", 84>;
+
+// Right shift by scalar: shr_s / shr_u
+defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>;
+defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>;
+
+// Truncate i64 shift operands to i32s
+foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
+          (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
+
+// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
+def wasm_shift_t : SDTypeProfile<1, 2,
+  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
+>;
+def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
+def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
+def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
+foreach shifts = [[wasm_shl, SHL_v2i64],
+                  [wasm_shr_s, SHR_S_v2i64],
+                  [wasm_shr_u, SHR_U_v2i64]] in
+def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)),
+          (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>;
+
+//===----------------------------------------------------------------------===//
+// Integer binary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
+  defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+}
+
+multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
+  defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+}
+
+multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
+  defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+}
+
+// Integer addition: add / add_saturate_s / add_saturate_u
+let isCommutable = 1 in {
+defm ADD : SIMDBinaryInt<add, "add", 87>;
+defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 88>;
+defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 89>;
+} // isCommutable = 1
+
+// Integer subtraction: sub / sub_saturate_s / sub_saturate_u
+defm SUB : SIMDBinaryInt<sub, "sub", 90>;
+defm SUB_SAT_S :
+  SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 91>;
+defm SUB_SAT_U :
+  SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>;
+
+// Integer multiplication: mul
+defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point unary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
+  defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+}
+
+// Absolute value: abs
+defm ABS : SIMDUnaryFP<fabs, "abs", 149>;
+
+// Negation: neg
+defm NEG : SIMDUnaryFP<fneg, "neg", 150>;
+
+// Square root: sqrt
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point binary arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
+  defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+}
+
+// Addition: add
+let isCommutable = 1 in
+defm ADD : SIMDBinaryFP<fadd, "add", 154>;
+
+// Subtraction: sub
+defm SUB : SIMDBinaryFP<fsub, "sub", 155>;
+
+// Multiplication: mul
+let isCommutable = 1 in
+defm MUL : SIMDBinaryFP<fmul, "mul", 156>;
+
+// Division: div
+let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+defm DIV : SIMDBinaryFP<fdiv, "div", 157>;
+
+// NaN-propagating minimum: min
+defm MIN : SIMDBinaryFP<fminimum, "min", 158>;
+
+// NaN-propagating maximum: max
+defm MAX : SIMDBinaryFP<fmaximum, "max", 159>;
+
+//===----------------------------------------------------------------------===//
+// Conversions
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
+                       string name, bits<32> simdop> {
+  defm op#_#vec_t#_#arg_t :
+    SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+           [(set (vec_t V128:$dst), (vec_t (op (arg_t V128:$vec))))],
+           name#"\t$dst, $vec", name, simdop>;
+}
+
+// Integer to floating point: convert
+defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 175>;
+defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 176>;
+defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_i64x2_s", 177>;
+defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_i64x2_u", 178>;
+
+// Floating point to integer with saturation: trunc_sat
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 171>;
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>;
+defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>;
+defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>;
+
+// Lower llvm.wasm.trunc.saturate.* to saturating instructions
+def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
+          (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
+          (fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
+def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))),
+          (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>;
+def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))),
+          (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>;
+
+// Bitcasts are nops
+// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
+foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
+foreach t2 = !foldl(
+  []<ValueType>, [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+  acc, cur, !if(!eq(!cast<string>(t1), !cast<string>(cur)),
+    acc, !listconcat(acc, [cur])
+  )
+) in
+def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index e42dcbc0a8ac..ad838dfb574a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -31,6 +31,7 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  bool removeUnnecessaryUnreachables(MachineFunction &MF);
   bool replaceFuncletReturns(MachineFunction &MF);
   bool hoistCatches(MachineFunction &MF);
   bool addCatchAlls(MachineFunction &MF);
@@ -47,7 +48,7 @@ public:
 
 char WebAssemblyLateEHPrepare::ID = 0;
 INITIALIZE_PASS(WebAssemblyLateEHPrepare, DEBUG_TYPE,
-                "WebAssembly Exception Preparation", false, false)
+                "WebAssembly Late Exception Preparation", false, false)
 
 FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
   return new WebAssemblyLateEHPrepare();
@@ -59,7 +60,7 @@ FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
 // possible search paths should be the same.
 // Returns nullptr in case it does not find any EH pad in the search, or finds
 // multiple different EH pads.
-MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
+static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
   MachineFunction *MF = MI->getParent()->getParent();
   SmallVector<MachineBasicBlock *, 2> WL;
   SmallPtrSet<MachineBasicBlock *, 2> Visited;
@@ -83,29 +84,35 @@ MachineBasicBlock *GetMatchingEHPad(MachineInstr *MI) {
   return EHPad;
 }
 
-// Erases the given BB and all its children from the function. If other BBs have
-// this BB as a successor, the successor relationships will be deleted as well.
-static void EraseBBAndChildren(MachineBasicBlock *MBB) {
-  SmallVector<MachineBasicBlock *, 8> WL;
-  WL.push_back(MBB);
+// Erase the specified BBs if the BB does not have any remaining predecessors,
+// and also all its dead children.
+template <typename Container>
+static void eraseDeadBBsAndChildren(const Container &MBBs) {
+  SmallVector<MachineBasicBlock *, 8> WL(MBBs.begin(), MBBs.end());
   while (!WL.empty()) {
     MachineBasicBlock *MBB = WL.pop_back_val();
-    for (auto *Pred : MBB->predecessors())
-      Pred->removeSuccessor(MBB);
-    for (auto *Succ : MBB->successors()) {
-      WL.push_back(Succ);
+    if (!MBB->pred_empty())
+      continue;
+    SmallVector<MachineBasicBlock *, 4> Succs(MBB->succ_begin(),
+                                              MBB->succ_end());
+    WL.append(MBB->succ_begin(), MBB->succ_end());
+    for (auto *Succ : Succs)
       MBB->removeSuccessor(Succ);
-    }
     MBB->eraseFromParent();
   }
 }
 
 bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "********** Late EH Prepare **********\n"
+                       "********** Function: "
+                    << MF.getName() << '\n');
+
   if (MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() !=
       ExceptionHandling::Wasm)
     return false;
 
   bool Changed = false;
+  Changed |= removeUnnecessaryUnreachables(MF);
   Changed |= addRethrows(MF);
   if (!MF.getFunction().hasPersonalityFn())
     return Changed;
@@ -118,6 +125,31 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
+    MachineFunction &MF) {
+  bool Changed = false;
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (!WebAssembly::isThrow(MI))
+        continue;
+      Changed = true;
+
+      // The instruction after the throw should be an unreachable or a branch to
+      // another BB that should eventually lead to an unreachable. Delete it
+      // because throw itself is a terminator, and also delete successors if
+      // any.
+      MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
+      SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
+                                                MBB.succ_end());
+      for (auto *Succ : Succs)
+        MBB.removeSuccessor(Succ);
+      eraseDeadBBsAndChildren(Succs);
+    }
+  }
+
+  return Changed;
+}
+
 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
   bool Changed = false;
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -179,7 +211,7 @@ bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
         Catches.push_back(&MI);
 
   for (auto *Catch : Catches) {
-    MachineBasicBlock *EHPad = GetMatchingEHPad(Catch);
+    MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
     assert(EHPad && "No matching EH pad for catch");
     if (EHPad->begin() == Catch)
       continue;
@@ -238,14 +270,18 @@ bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
         Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
                           TII.get(WebAssembly::RETHROW_TO_CALLER));
 
-      // Becasue __cxa_rethrow does not return, the instruction after the
+      // Because __cxa_rethrow does not return, the instruction after the
       // rethrow should be an unreachable or a branch to another BB that should
       // eventually lead to an unreachable. Delete it because rethrow itself is
       // a terminator, and also delete non-EH pad successors if any.
       MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
+      SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
       for (auto *Succ : MBB.successors())
         if (!Succ->isEHPad())
-          EraseBBAndChildren(Succ);
+          NonPadSuccessors.push_back(Succ);
+      for (auto *Succ : NonPadSuccessors)
+        MBB.removeSuccessor(Succ);
+      eraseDeadBBsAndChildren(NonPadSuccessors);
     }
   return Changed;
 }
@@ -255,7 +291,7 @@ bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
 //   %exn = catch 0
 //   call @__clang_call_terminate(%exn)
 //   unreachable
-// (There can be set_local and get_locals before the call if we didn't run
+// (There can be local.set and local.gets before the call if we didn't run
 // RegStackify)
 // But code transformations can change or add more control flow, so the call to
 // __clang_call_terminate() function may not be in the original EH pad anymore.
@@ -277,7 +313,7 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
 
   bool Changed = false;
   for (auto *Call : ClangCallTerminateCalls) {
-    MachineBasicBlock *EHPad = GetMatchingEHPad(Call);
+    MachineBasicBlock *EHPad = getMatchingEHPad(Call);
     assert(EHPad && "No matching EH pad for catch");
 
     // If it is already the form we want, skip it
@@ -294,7 +330,7 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
     // This runs after hoistCatches(), so catch instruction should be at the top
     assert(WebAssembly::isCatch(*Catch));
     // Takes the result register of the catch instruction as argument. There may
-    // have been some other set_local/get_locals in between, but at this point
+    // have been some other local.set/local.gets in between, but at this point
     // we don't care.
     Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
     auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
@@ -302,8 +338,11 @@ bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
     BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
             TII.get(WebAssembly::UNREACHABLE));
     EHPad->erase(InsertPos, EHPad->end());
-    for (auto *Succ : EHPad->successors())
-      EraseBBAndChildren(Succ);
+    SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
+                                              EHPad->succ_end());
+    for (auto *Succ : Succs)
+      EHPad->removeSuccessor(Succ);
+    eraseDeadBBsAndChildren(Succs);
   }
   return Changed;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 5fb97e38939a..c9a3527d3fbd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -78,30 +78,102 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
         MachineInstr *Def = MRI.getVRegDef(Cond);
         switch (Def->getOpcode()) {
           using namespace WebAssembly;
-        case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
-        case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
-        case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
-        case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
-        case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
-        case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
-        case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
-        case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
-        case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
-        case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
-        case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
-        case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
-        case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
-        case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
-        case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
-        case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
-        case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
-        case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
-        case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
-        case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
-        case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
-        case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
-        case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
-        case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+        case EQ_I32:
+          Def->setDesc(TII.get(NE_I32));
+          Inverted = true;
+          break;
+        case NE_I32:
+          Def->setDesc(TII.get(EQ_I32));
+          Inverted = true;
+          break;
+        case GT_S_I32:
+          Def->setDesc(TII.get(LE_S_I32));
+          Inverted = true;
+          break;
+        case GE_S_I32:
+          Def->setDesc(TII.get(LT_S_I32));
+          Inverted = true;
+          break;
+        case LT_S_I32:
+          Def->setDesc(TII.get(GE_S_I32));
+          Inverted = true;
+          break;
+        case LE_S_I32:
+          Def->setDesc(TII.get(GT_S_I32));
+          Inverted = true;
+          break;
+        case GT_U_I32:
+          Def->setDesc(TII.get(LE_U_I32));
+          Inverted = true;
+          break;
+        case GE_U_I32:
+          Def->setDesc(TII.get(LT_U_I32));
+          Inverted = true;
+          break;
+        case LT_U_I32:
+          Def->setDesc(TII.get(GE_U_I32));
+          Inverted = true;
+          break;
+        case LE_U_I32:
+          Def->setDesc(TII.get(GT_U_I32));
+          Inverted = true;
+          break;
+        case EQ_I64:
+          Def->setDesc(TII.get(NE_I64));
+          Inverted = true;
+          break;
+        case NE_I64:
+          Def->setDesc(TII.get(EQ_I64));
+          Inverted = true;
+          break;
+        case GT_S_I64:
+          Def->setDesc(TII.get(LE_S_I64));
+          Inverted = true;
+          break;
+        case GE_S_I64:
+          Def->setDesc(TII.get(LT_S_I64));
+          Inverted = true;
+          break;
+        case LT_S_I64:
+          Def->setDesc(TII.get(GE_S_I64));
+          Inverted = true;
+          break;
+        case LE_S_I64:
+          Def->setDesc(TII.get(GT_S_I64));
+          Inverted = true;
+          break;
+        case GT_U_I64:
+          Def->setDesc(TII.get(LE_U_I64));
+          Inverted = true;
+          break;
+        case GE_U_I64:
+          Def->setDesc(TII.get(LT_U_I64));
+          Inverted = true;
+          break;
+        case LT_U_I64:
+          Def->setDesc(TII.get(GE_U_I64));
+          Inverted = true;
+          break;
+        case LE_U_I64:
+          Def->setDesc(TII.get(GT_U_I64));
+          Inverted = true;
+          break;
+        case EQ_F32:
+          Def->setDesc(TII.get(NE_F32));
+          Inverted = true;
+          break;
+        case NE_F32:
+          Def->setDesc(TII.get(EQ_F32));
+          Inverted = true;
+          break;
+        case EQ_F64:
+          Def->setDesc(TII.get(NE_F64));
+          Inverted = true;
+          break;
+        case NE_F64:
+          Def->setDesc(TII.get(EQ_F64));
+          Inverted = true;
+          break;
         case EQZ_I32: {
           // Invert an eqz by replacing it with its operand.
           Cond = Def->getOperand(1).getReg();
@@ -109,7 +181,8 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
           Inverted = true;
           break;
         }
-        default: break;
+        default:
+          break;
         }
       }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index e9cb7c10113b..0491f71cea7f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -50,24 +50,21 @@
 ///
 /// In detail, this pass does following things:
 ///
-/// 1) Create three global variables: __THREW__, __threwValue, and __tempRet0.
-///    __tempRet0 will be set within __cxa_find_matching_catch() function in
-///    JS library, and __THREW__ and __threwValue will be set in invoke wrappers
+/// 1) Assumes the existence of global variables: __THREW__, __threwValue
+///    __THREW__ and __threwValue will be set in invoke wrappers
 ///    in JS glue code. For what invoke wrappers are, refer to 3). These
 ///    variables are used for both exceptions and setjmp/longjmps.
 ///    __THREW__ indicates whether an exception or a longjmp occurred or not. 0
 ///    means nothing occurred, 1 means an exception occurred, and other numbers
 ///    mean a longjmp occurred. In the case of longjmp, __threwValue variable
 ///    indicates the corresponding setjmp buffer the longjmp corresponds to.
-///    In exception handling, __tempRet0 indicates the type of an exception
-///    caught, and in setjmp/longjmp, it means the second argument to longjmp
-///    function.
 ///
 /// * Exception handling
 ///
-/// 2) Create setThrew and setTempRet0 functions.
-///    The global variables created in 1) will exist in wasm address space,
-///    but their values should be set in JS code, so we provide these functions
+/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
+///    at link time.
+///    The global variables in 1) will exist in wasm address space,
+///    but their values should be set in JS code, so these functions
 ///    as interfaces to JS glue code. These functions are equivalent to the
 ///    following JS functions, which actually exist in asm.js version of JS
 ///    library.
@@ -78,10 +75,12 @@
 ///        __threwValue = value;
 ///      }
 ///    }
+//
+///    setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
 ///
-///    function setTempRet0(value) {
-///      __tempRet0 = value;
-///    }
+///    In exception handling, getTempRet0 indicates the type of an exception
+///    caught, and in setjmp/longjmp, it means the second argument to longjmp
+///    function.
 ///
 /// 3) Lower
 ///      invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
@@ -118,11 +117,10 @@
 ///      ... use %val ...
 ///    into
 ///      %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
-///      %val = {%fmc, __tempRet0}
+///      %val = {%fmc, getTempRet0()}
 ///      ... use %val ...
 ///    Here N is a number calculated based on the number of clauses.
-///    Global variable __tempRet0 is set within __cxa_find_matching_catch() in
-///    JS glue code.
+///    setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
 ///
 /// 5) Lower
 ///      resume {%a, %b}
@@ -138,7 +136,17 @@
 ///
 /// * Setjmp / Longjmp handling
 ///
-/// 7) In the function entry that calls setjmp, initialize setjmpTable and
+/// In case calls to longjmp() exists
+///
+/// 1) Lower
+///      longjmp(buf, value)
+///    into
+///      emscripten_longjmp_jmpbuf(buf, value)
+///    emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
+///
+/// In case calls to setjmp() exists
+///
+/// 2) In the function entry that calls setjmp, initialize setjmpTable and
 ///    sejmpTableSize as follows:
 ///      setjmpTableSize = 4;
 ///      setjmpTable = (int *) malloc(40);
@@ -146,27 +154,22 @@
 ///    setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS
 ///    code.
 ///
-/// 8) Lower
+/// 3) Lower
 ///      setjmp(buf)
 ///    into
 ///      setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
-///      setjmpTableSize = __tempRet0;
+///      setjmpTableSize = getTempRet0();
 ///    For each dynamic setjmp call, setjmpTable stores its ID (a number which
 ///    is incrementally assigned from 0) and its label (a unique number that
 ///    represents each callsite of setjmp). When we need more entries in
 ///    setjmpTable, it is reallocated in saveSetjmp() in JS code and it will
 ///    return the new table address, and assign the new table size in
-///    __tempRet0. saveSetjmp also stores the setjmp's ID into the buffer buf.
-///    A BB with setjmp is split into two after setjmp call in order to make the
-///    post-setjmp BB the possible destination of longjmp BB.
+///    setTempRet0(). saveSetjmp also stores the setjmp's ID into the buffer
+///    buf. A BB with setjmp is split into two after setjmp call in order to
+///    make the post-setjmp BB the possible destination of longjmp BB.
 ///
-/// 9) Lower
-///      longjmp(buf, value)
-///    into
-///      emscripten_longjmp_jmpbuf(buf, value)
-///    emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
 ///
-/// 10) Lower every call that might longjmp into
+/// 4) Lower every call that might longjmp into
 ///      __THREW__ = 0;
 ///      call @__invoke_SIG(func, arg1, arg2)
 ///      %__THREW__.val = __THREW__;
@@ -176,32 +179,32 @@
 ///                            setjmpTableSize);
 ///        if (%label == 0)
 ///          emscripten_longjmp(%__THREW__.val, __threwValue);
-///        __tempRet0 = __threwValue;
+///        setTempRet0(__threwValue);
 ///      } else {
 ///        %label = -1;
 ///      }
-///      longjmp_result = __tempRet0;
+///      longjmp_result = getTempRet0();
 ///      switch label {
 ///        label 1: goto post-setjmp BB 1
 ///        label 2: goto post-setjmp BB 2
 ///        ...
 ///        default: goto splitted next BB
 ///      }
-///     testSetjmp examines setjmpTable to see if there is a matching setjmp
-///     call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
-///     will be the address of matching jmp_buf buffer and __threwValue be the
-///     second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
-///     stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
-///     each setjmp callsite. Label 0 means this longjmp buffer does not
-///     correspond to one of the setjmp callsites in this function, so in this
-///     case we just chain the longjmp to the caller. (Here we call
-///     emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
-///     emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
-///     emscripten_longjmp takes an int. Both of them will eventually be lowered
-///     to emscripten_longjmp in s2wasm, but here we need two signatures - we
-///     can't translate an int value to a jmp_buf.)
-///     Label -1 means no longjmp occurred. Otherwise we jump to the right
-///     post-setjmp BB based on the label.
+///    testSetjmp examines setjmpTable to see if there is a matching setjmp
+///    call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
+///    will be the address of matching jmp_buf buffer and __threwValue be the
+///    second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
+///    stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
+///    each setjmp callsite. Label 0 means this longjmp buffer does not
+///    correspond to one of the setjmp callsites in this function, so in this
+///    case we just chain the longjmp to the caller. (Here we call
+///    emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
+///    emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
+///    emscripten_longjmp takes an int. Both of them will eventually be lowered
+///    to emscripten_longjmp in s2wasm, but here we need two signatures - we
+///    can't translate an int value to a jmp_buf.)
+///    Label -1 means no longjmp occurred. Otherwise we jump to the right
+///    post-setjmp BB based on the label.
 ///
 ///===----------------------------------------------------------------------===//
 
@@ -239,7 +242,8 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
 
   GlobalVariable *ThrewGV;
   GlobalVariable *ThrewValueGV;
-  GlobalVariable *TempRet0GV;
+  Function *GetTempRet0Func;
+  Function *SetTempRet0Func;
   Function *ResumeF;
   Function *EHTypeIDF;
   Function *EmLongjmpF;
@@ -272,9 +276,6 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
   bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
   bool canLongjmp(Module &M, const Value *Callee) const;
 
-  void createSetThrewFunction(Module &M);
-  void createSetTempRet0Function(Module &M);
-
   void rebuildSSA(Function &F);
 
 public:
@@ -282,9 +283,10 @@ public:
 
   WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
       : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
-        ThrewGV(nullptr), ThrewValueGV(nullptr), TempRet0GV(nullptr),
-        ResumeF(nullptr), EHTypeIDF(nullptr), EmLongjmpF(nullptr),
-        EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr), TestSetjmpF(nullptr) {
+        ThrewGV(nullptr), ThrewValueGV(nullptr), GetTempRet0Func(nullptr),
+        SetTempRet0Func(nullptr), ResumeF(nullptr), EHTypeIDF(nullptr),
+        EmLongjmpF(nullptr), EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr),
+        TestSetjmpF(nullptr) {
     EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
   }
   bool runOnModule(Module &M) override;
@@ -333,13 +335,15 @@ static bool canThrow(const Value *V) {
   return true;
 }
 
-static GlobalVariable *createGlobalVariableI32(Module &M, IRBuilder<> &IRB,
-                                               const char *Name) {
+// Get a global variable with the given name.  If it doesn't exist declare it,
+// which will generate an import and asssumes that it will exist at link time.
+static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
+                                            const char *Name) {
   if (M.getNamedGlobal(Name))
     report_fatal_error(Twine("variable name is reserved: ") + Name);
 
   return new GlobalVariable(M, IRB.getInt32Ty(), false,
-                            GlobalValue::WeakODRLinkage, IRB.getInt32(0), Name);
+                            GlobalValue::ExternalLinkage, nullptr, Name);
 }
 
 // Simple function name mangler.
@@ -508,7 +512,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
   Function *ThrowF = M.getFunction("__cxa_throw");
   Function *TerminateF = M.getFunction("__clang_call_terminate");
   if (Callee == BeginCatchF || Callee == EndCatchF ||
-      Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF)
+      Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF ||
+      Callee == GetTempRet0Func || Callee == SetTempRet0Func)
     return false;
 
   // Otherwise we don't know
@@ -521,11 +526,11 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
 //   %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
 //   if (%label == 0)
 //     emscripten_longjmp(%__THREW__.val, threwValue);
-//   __tempRet0 = threwValue;
+//   setTempRet0(threwValue);
 // } else {
 //   %label = -1;
 // }
-// %longjmp_result = __tempRet0;
+// %longjmp_result = getTempRet0();
 //
 // As output parameters. returns %label, %longjmp_result, and the BB the last
 // instruction (%longjmp_result = ...) is in.
@@ -569,15 +574,15 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
   IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
   IRB.CreateUnreachable();
 
-  // __tempRet0 = threwValue;
+  // setTempRet0(threwValue);
   IRB.SetInsertPoint(EndBB2);
-  IRB.CreateStore(ThrewValue, TempRet0GV);
+  IRB.CreateCall(SetTempRet0Func, ThrewValue);
   IRB.CreateBr(EndBB1);
 
   IRB.SetInsertPoint(ElseBB1);
   IRB.CreateBr(EndBB1);
 
-  // longjmp_result = __tempRet0;
+  // longjmp_result = getTempRet0();
   IRB.SetInsertPoint(EndBB1);
   PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label");
   LabelPHI->addIncoming(ThenLabel, EndBB2);
@@ -587,68 +592,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
   // Output parameter assignment
   Label = LabelPHI;
   EndBB = EndBB1;
-  LongjmpResult = IRB.CreateLoad(TempRet0GV, "longjmp_result");
-}
-
-// Create setThrew function
-// function setThrew(threw, value) {
-//   if (__THREW__ == 0) {
-//     __THREW__ = threw;
-//     __threwValue = value;
-//   }
-// }
-void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
-  LLVMContext &C = M.getContext();
-  IRBuilder<> IRB(C);
-
-  if (M.getNamedGlobal("setThrew"))
-    report_fatal_error("setThrew already exists");
-
-  Type *Params[] = {IRB.getInt32Ty(), IRB.getInt32Ty()};
-  FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
-  Function *F =
-      Function::Create(FTy, GlobalValue::WeakODRLinkage, "setThrew", &M);
-  Argument *Arg1 = &*(F->arg_begin());
-  Argument *Arg2 = &*std::next(F->arg_begin());
-  Arg1->setName("threw");
-  Arg2->setName("value");
-  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
-  BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", F);
-  BasicBlock *EndBB = BasicBlock::Create(C, "if.end", F);
-
-  IRB.SetInsertPoint(EntryBB);
-  Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
-  Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(0), "cmp");
-  IRB.CreateCondBr(Cmp, ThenBB, EndBB);
-
-  IRB.SetInsertPoint(ThenBB);
-  IRB.CreateStore(Arg1, ThrewGV);
-  IRB.CreateStore(Arg2, ThrewValueGV);
-  IRB.CreateBr(EndBB);
-
-  IRB.SetInsertPoint(EndBB);
-  IRB.CreateRetVoid();
-}
-
-// Create setTempRet0 function
-// function setTempRet0(value) {
-//   __tempRet0 = value;
-// }
-void WebAssemblyLowerEmscriptenEHSjLj::createSetTempRet0Function(Module &M) {
-  LLVMContext &C = M.getContext();
-  IRBuilder<> IRB(C);
-
-  if (M.getNamedGlobal("setTempRet0"))
-    report_fatal_error("setTempRet0 already exists");
-  Type *Params[] = {IRB.getInt32Ty()};
-  FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
-  Function *F =
-      Function::Create(FTy, GlobalValue::WeakODRLinkage, "setTempRet0", &M);
-  F->arg_begin()->setName("value");
-  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
-  IRB.SetInsertPoint(EntryBB);
-  IRB.CreateStore(&*F->arg_begin(), TempRet0GV);
-  IRB.CreateRetVoid();
+  LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result");
 }
 
 void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
@@ -679,6 +623,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
 }
 
 bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
+  LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
+
   LLVMContext &C = M.getContext();
   IRBuilder<> IRB(C);
 
@@ -688,11 +634,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
   bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
   bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
 
-  // Create global variables __THREW__, threwValue, and __tempRet0, which are
-  // used in common for both exception handling and setjmp/longjmp handling
-  ThrewGV = createGlobalVariableI32(M, IRB, "__THREW__");
-  ThrewValueGV = createGlobalVariableI32(M, IRB, "__threwValue");
-  TempRet0GV = createGlobalVariableI32(M, IRB, "__tempRet0");
+  // Declare (or get) global variables __THREW__, __threwValue, and
+  // getTempRet0/setTempRet0 function which are used in common for both
+  // exception handling and setjmp/longjmp handling
+  ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__");
+  ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue");
+  GetTempRet0Func =
+      Function::Create(FunctionType::get(IRB.getInt32Ty(), false),
+                       GlobalValue::ExternalLinkage, "getTempRet0", &M);
+  SetTempRet0Func = Function::Create(
+      FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
+      GlobalValue::ExternalLinkage, "setTempRet0", &M);
+  GetTempRet0Func->setDoesNotThrow();
+  SetTempRet0Func->setDoesNotThrow();
 
   bool Changed = false;
 
@@ -721,22 +675,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
   if (DoSjLj) {
     Changed = true; // We have setjmp or longjmp somewhere
 
-    // Register saveSetjmp function
-    FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
-    SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
-                                     IRB.getInt32Ty(), Type::getInt32PtrTy(C),
-                                     IRB.getInt32Ty()};
-    FunctionType *FTy =
-        FunctionType::get(Type::getInt32PtrTy(C), Params, false);
-    SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
-                                   SaveSetjmpFName, &M);
-
-    // Register testSetjmp function
-    Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
-    FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
-    TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
-                                   TestSetjmpFName, &M);
-
     if (LongjmpF) {
       // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
       // defined in JS code
@@ -746,27 +684,43 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
 
       LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
     }
-    FTy = FunctionType::get(IRB.getVoidTy(),
-                            {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
-    EmLongjmpF =
-        Function::Create(FTy, GlobalValue::ExternalLinkage, EmLongjmpFName, &M);
-
-    // Only traverse functions that uses setjmp in order not to insert
-    // unnecessary prep / cleanup code in every function
-    SmallPtrSet<Function *, 8> SetjmpUsers;
-    for (User *U : SetjmpF->users()) {
-      auto *UI = cast<Instruction>(U);
-      SetjmpUsers.insert(UI->getFunction());
+
+    if (SetjmpF) {
+      // Register saveSetjmp function
+      FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+      SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
+                                       IRB.getInt32Ty(), Type::getInt32PtrTy(C),
+                                       IRB.getInt32Ty()};
+      FunctionType *FTy =
+          FunctionType::get(Type::getInt32PtrTy(C), Params, false);
+      SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                     SaveSetjmpFName, &M);
+
+      // Register testSetjmp function
+      Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
+      FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
+      TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                     TestSetjmpFName, &M);
+
+      FTy = FunctionType::get(IRB.getVoidTy(),
+                              {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
+      EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                    EmLongjmpFName, &M);
+
+      // Only traverse functions that uses setjmp in order not to insert
+      // unnecessary prep / cleanup code in every function
+      SmallPtrSet<Function *, 8> SetjmpUsers;
+      for (User *U : SetjmpF->users()) {
+        auto *UI = cast<Instruction>(U);
+        SetjmpUsers.insert(UI->getFunction());
+      }
+      for (Function *F : SetjmpUsers)
+        runSjLjOnFunction(*F);
     }
-    for (Function *F : SetjmpUsers)
-      runSjLjOnFunction(*F);
   }
 
   if (!Changed) {
     // Delete unused global variables and functions
-    ThrewGV->eraseFromParent();
-    ThrewValueGV->eraseFromParent();
-    TempRet0GV->eraseFromParent();
     if (ResumeF)
       ResumeF->eraseFromParent();
     if (EHTypeIDF)
@@ -780,12 +734,6 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
     return false;
   }
 
-  // If we have made any changes while doing exception handling or
-  // setjmp/longjmp handling, we have to create these functions for JavaScript
-  // to call.
-  createSetThrewFunction(M);
-  createSetTempRet0Function(M);
-
   return true;
 }
 
@@ -908,8 +856,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
     CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
     Value *Undef = UndefValue::get(LPI->getType());
     Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
-    Value *TempRet0 =
-        IRB.CreateLoad(TempRet0GV, TempRet0GV->getName() + ".val");
+    Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0");
     Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
 
     LPI->replaceAllUsesWith(Pair1);
@@ -990,7 +937,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
     Instruction *NewSetjmpTable =
         IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
     Instruction *NewSetjmpTableSize =
-        IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
+        IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize");
     SetjmpTableInsts.push_back(NewSetjmpTable);
     SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
     ToErase.push_back(CI);
@@ -1098,7 +1045,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
 
   // Free setjmpTable buffer before each return instruction
   for (BasicBlock &BB : F) {
-    TerminatorInst *TI = BB.getTerminator();
+    Instruction *TI = BB.getTerminator();
     if (isa<ReturnInst>(TI))
       CallInst::CreateFree(SetjmpTable, TI);
   }
@@ -1112,7 +1059,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
   // ...
   // somebb:
   //   setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
-  //   setjmpTableSize = __tempRet0;
+  //   setjmpTableSize = getTempRet0();
   // So we need to make sure the SSA for these variables is valid so that every
   // saveSetjmp and testSetjmp calls have the correct arguments.
   SSAUpdater SetjmpTableSSA;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index ee708d637b25..84c877cb8d02 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -18,15 +18,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssembly.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "wasm-lower-global-dtors"
@@ -59,6 +59,8 @@ ModulePass *llvm::createWebAssemblyLowerGlobalDtors() {
 }
 
 bool LowerGlobalDtors::runOnModule(Module &M) {
+  LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n");
+
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
   if (!GV)
     return false;
@@ -77,18 +79,20 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
 
   // Collect the contents of @llvm.global_dtors, collated by priority and
   // associated symbol.
-  std::map<uint16_t, MapVector<Constant *, std::vector<Constant *> > > DtorFuncs;
+  std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs;
   for (Value *O : InitList->operands()) {
     ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
-    if (!CS) continue; // Malformed.
+    if (!CS)
+      continue; // Malformed.
 
     ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
-    if (!Priority) continue; // Malformed.
+    if (!Priority)
+      continue; // Malformed.
     uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
 
     Constant *DtorFunc = CS->getOperand(1);
     if (DtorFunc->isNullValue())
-      break;  // Found a null terminator, skip the rest.
+      break; // Found a null terminator, skip the rest.
 
     Constant *Associated = CS->getOperand(2);
     Associated = cast<Constant>(Associated->stripPointerCastsNoFollowAliases());
@@ -101,31 +105,23 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
   // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
   LLVMContext &C = M.getContext();
   PointerType *VoidStar = Type::getInt8PtrTy(C);
-  Type *AtExitFuncArgs[] = { VoidStar };
-  FunctionType *AtExitFuncTy = FunctionType::get(
-          Type::getVoidTy(C),
-          AtExitFuncArgs,
-          /*isVarArg=*/false);
-
-  Type *AtExitArgs[] = {
-    PointerType::get(AtExitFuncTy, 0),
-    VoidStar,
-    VoidStar
-  };
-  FunctionType *AtExitTy = FunctionType::get(
-          Type::getInt32Ty(C),
-          AtExitArgs,
-          /*isVarArg=*/false);
+  Type *AtExitFuncArgs[] = {VoidStar};
+  FunctionType *AtExitFuncTy =
+      FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
+                        /*isVarArg=*/false);
+
+  Type *AtExitArgs[] = {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar};
+  FunctionType *AtExitTy = FunctionType::get(Type::getInt32Ty(C), AtExitArgs,
+                                             /*isVarArg=*/false);
   Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy);
 
   // Declare __dso_local.
   Constant *DsoHandle = M.getNamedValue("__dso_handle");
   if (!DsoHandle) {
     Type *DsoHandleTy = Type::getInt8Ty(C);
-    GlobalVariable *Handle =
-        new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true,
-                           GlobalVariable::ExternalWeakLinkage,
-                           nullptr, "__dso_handle");
+    GlobalVariable *Handle = new GlobalVariable(
+        M, DsoHandleTy, /*isConstant=*/true,
+        GlobalVariable::ExternalWeakLinkage, nullptr, "__dso_handle");
     Handle->setVisibility(GlobalVariable::HiddenVisibility);
     DsoHandle = Handle;
   }
@@ -139,13 +135,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
       Constant *Associated = AssociatedAndMore.first;
 
       Function *CallDtors = Function::Create(
-              AtExitFuncTy, Function::PrivateLinkage,
-              "call_dtors" +
-              (Priority != UINT16_MAX ?
-                 (Twine(".") + Twine(Priority)) : Twine()) +
-              (!Associated->isNullValue() ?
-                 (Twine(".") + Associated->getName()) : Twine()),
-              &M);
+          AtExitFuncTy, Function::PrivateLinkage,
+          "call_dtors" +
+              (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+                                      : Twine()) +
+              (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+                                          : Twine()),
+          &M);
       BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
 
       for (auto Dtor : AssociatedAndMore.second)
@@ -155,29 +151,29 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
       FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
                                                  /*isVarArg=*/false);
       Function *RegisterCallDtors = Function::Create(
-              VoidVoid, Function::PrivateLinkage,
-              "register_call_dtors" +
-              (Priority != UINT16_MAX ?
-                 (Twine(".") + Twine(Priority)) : Twine()) +
-              (!Associated->isNullValue() ?
-                 (Twine(".") + Associated->getName()) : Twine()),
-              &M);
+          VoidVoid, Function::PrivateLinkage,
+          "register_call_dtors" +
+              (Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
+                                      : Twine()) +
+              (!Associated->isNullValue() ? (Twine(".") + Associated->getName())
+                                          : Twine()),
+          &M);
       BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors);
       BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors);
       BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors);
 
       Value *Null = ConstantPointerNull::get(VoidStar);
-      Value *Args[] = { CallDtors, Null, DsoHandle };
+      Value *Args[] = {CallDtors, Null, DsoHandle};
       Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB);
       Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res,
                                 Constant::getNullValue(Res->getType()));
       BranchInst::Create(FailBB, RetBB, Cmp, EntryBB);
 
       // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave.
-      // This should be very rare, because if the process is running out of memory
-      // before main has even started, something is wrong.
-      CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap),
-                       "", FailBB);
+      // This should be very rare, because if the process is running out of
+      // memory before main has even started, something is wrong.
+      CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), "",
+                       FailBB);
       new UnreachableInst(C, FailBB);
 
       ReturnInst::Create(C, RetBB);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index d85db14fc679..fa862fbaa634 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -30,6 +30,21 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+// Defines llvm::WebAssembly::getStackOpcode to convert register instructions to
+// stack instructions
+#define GET_INSTRMAP_INFO 1
+#include "WebAssemblyGenInstrInfo.inc"
+
+// This disables the removal of registers when lowering into MC, as required
+// by some current tests.
+static cl::opt<bool>
+    WasmKeepRegisters("wasm-keep-registers", cl::Hidden,
+                      cl::desc("WebAssembly: output stack registers in"
+                               " instruction output for test purposes only."),
+                      cl::init(false));
+
+static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
+
 MCSymbol *
 WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
   const GlobalValue *Global = MO.getGlobal();
@@ -40,35 +55,13 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
     const TargetMachine &TM = MF.getTarget();
     const Function &CurrentFunc = MF.getFunction();
 
-    SmallVector<wasm::ValType, 4> Returns;
-    SmallVector<wasm::ValType, 4> Params;
-
-    wasm::ValType iPTR =
-        MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() ?
-        wasm::ValType::I64 :
-        wasm::ValType::I32;
-
-    SmallVector<MVT, 4> ResultMVTs;
-    ComputeLegalValueVTs(CurrentFunc, TM, FuncTy->getReturnType(), ResultMVTs);
-    // WebAssembly can't currently handle returning tuples.
-    if (ResultMVTs.size() <= 1)
-      for (MVT ResultMVT : ResultMVTs)
-        Returns.push_back(WebAssembly::toValType(ResultMVT));
-    else
-      Params.push_back(iPTR);
-
-    for (Type *Ty : FuncTy->params()) {
-      SmallVector<MVT, 4> ParamMVTs;
-      ComputeLegalValueVTs(CurrentFunc, TM, Ty, ParamMVTs);
-      for (MVT ParamMVT : ParamMVTs)
-        Params.push_back(WebAssembly::toValType(ParamMVT));
-    }
-
-    if (FuncTy->isVarArg())
-      Params.push_back(iPTR);
+    SmallVector<MVT, 1> ResultMVTs;
+    SmallVector<MVT, 4> ParamMVTs;
+    ComputeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
 
-    WasmSym->setReturns(std::move(Returns));
-    WasmSym->setParams(std::move(Params));
+    auto Signature = SignatureFromMVTs(ResultMVTs, ParamMVTs);
+    WasmSym->setSignature(Signature.get());
+    Printer.addSignature(std::move(Signature));
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
   }
 
@@ -82,10 +75,10 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
       cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
   const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
 
-  // __stack_pointer is a global variable; all other external symbols used by
-  // CodeGen are functions.  It's OK to hardcode knowledge of specific symbols
-  // here; this method is precisely there for fetching the signatures of known
-  // Clang-provided symbols.
+  // Except for the two exceptions (__stack_pointer and __cpp_exception), all
+  // other external symbols used by CodeGen are functions. It's OK to hardcode
+  // knowledge of specific symbols here; this method is precisely there for
+  // fetching the signatures of known Clang-provided symbols.
   if (strcmp(Name, "__stack_pointer") == 0) {
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
     WasmSym->setGlobalType(wasm::WasmGlobalType{
@@ -97,27 +90,55 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
 
   SmallVector<wasm::ValType, 4> Returns;
   SmallVector<wasm::ValType, 4> Params;
-  GetSignature(Subtarget, Name, Returns, Params);
+  if (strcmp(Name, "__cpp_exception") == 0) {
+    WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
+    // We can't confirm its signature index for now because there can be
+    // imported exceptions. Set it to be 0 for now.
+    WasmSym->setEventType(
+        {wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION, /* SigIndex */ 0});
+    // We may have multiple C++ compilation units to be linked together, each of
+    // which defines the exception symbol. To resolve them, we declare them as
+    // weak.
+    WasmSym->setWeak(true);
+    WasmSym->setExternal(true);
 
-  WasmSym->setReturns(std::move(Returns));
-  WasmSym->setParams(std::move(Params));
-  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+    // All C++ exceptions are assumed to have a single i32 (for wasm32) or i64
+    // (for wasm64) param type and void return type. The reaon is, all C++
+    // exception values are pointers, and to share the type section with
+    // functions, exceptions are assumed to have void return type.
+    Params.push_back(Subtarget.hasAddr64() ? wasm::ValType::I64
+                                           : wasm::ValType::I32);
+  } else { // Function symbols
+    WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+    GetLibcallSignature(Subtarget, Name, Returns, Params);
+  }
+  auto Signature =
+      make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
+  WasmSym->setSignature(Signature.get());
+  Printer.addSignature(std::move(Signature));
 
   return WasmSym;
 }
 
 MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
                                                      int64_t Offset,
-                                                     bool IsFunc) const {
+                                                     bool IsFunc, bool IsGlob,
+                                                     bool IsEvent) const {
   MCSymbolRefExpr::VariantKind VK =
       IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
-             : MCSymbolRefExpr::VK_None;
+             : IsGlob ? MCSymbolRefExpr::VK_WebAssembly_GLOBAL
+                      : IsEvent ? MCSymbolRefExpr::VK_WebAssembly_EVENT
+                                : MCSymbolRefExpr::VK_None;
 
   const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
 
   if (Offset != 0) {
     if (IsFunc)
       report_fatal_error("Function addresses with offsets not supported");
+    if (IsGlob)
+      report_fatal_error("Global indexes with offsets not supported");
+    if (IsEvent)
+      report_fatal_error("Event indexes with offsets not supported");
     Expr =
         MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
   }
@@ -135,6 +156,8 @@ static wasm::ValType getType(const TargetRegisterClass *RC) {
     return wasm::ValType::F32;
   if (RC == &WebAssembly::F64RegClass)
     return wasm::ValType::F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return wasm::ValType::V128;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -187,8 +210,10 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
             Params.pop_back();
 
           MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
-          WasmSym->setReturns(std::move(Returns));
-          WasmSym->setParams(std::move(Params));
+          auto Signature = make_unique<wasm::WasmSignature>(std::move(Returns),
+                                                            std::move(Params));
+          WasmSym->setSignature(Signature.get());
+          Printer.addSignature(std::move(Signature));
           WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
 
           const MCExpr *Expr = MCSymbolRefExpr::create(
@@ -212,21 +237,68 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
       break;
     }
     case MachineOperand::MO_GlobalAddress:
-      assert(MO.getTargetFlags() == 0 &&
+      assert(MO.getTargetFlags() == WebAssemblyII::MO_NO_FLAG &&
              "WebAssembly does not use target flags on GlobalAddresses");
       MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
-                                MO.getGlobal()->getValueType()->isFunctionTy());
+                                MO.getGlobal()->getValueType()->isFunctionTy(),
+                                false, false);
       break;
     case MachineOperand::MO_ExternalSymbol:
       // The target flag indicates whether this is a symbol for a
       // variable or a function.
-      assert((MO.getTargetFlags() & -2) == 0 &&
-             "WebAssembly uses only one target flag bit on ExternalSymbols");
-      MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0,
-                                MO.getTargetFlags() & 1);
+      assert((MO.getTargetFlags() & ~WebAssemblyII::MO_SYMBOL_MASK) == 0 &&
+             "WebAssembly uses only symbol flags on ExternalSymbols");
+      MCOp = LowerSymbolOperand(
+          GetExternalSymbolSymbol(MO), /*Offset=*/0,
+          (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_FUNCTION) != 0,
+          (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_GLOBAL) != 0,
+          (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_EVENT) != 0);
+      break;
+    case MachineOperand::MO_MCSymbol:
+      // This is currently used only for LSDA symbols (GCC_except_table),
+      // because global addresses or other external symbols are handled above.
+      assert(MO.getTargetFlags() == 0 &&
+             "WebAssembly does not use target flags on MCSymbol");
+      MCOp = LowerSymbolOperand(MO.getMCSymbol(), /*Offset=*/0, false, false,
+                                false);
       break;
     }
 
     OutMI.addOperand(MCOp);
   }
+
+  if (!WasmKeepRegisters)
+    removeRegisterOperands(MI, OutMI);
+}
+
+static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI) {
+  // Remove all uses of stackified registers to bring the instruction format
+  // into its final stack form used thruout MC, and transition opcodes to
+  // their _S variant.
+  // We do this seperate from the above code that still may need these
+  // registers for e.g. call_indirect signatures.
+  // See comments in lib/Target/WebAssembly/WebAssemblyInstrFormats.td for
+  // details.
+  // TODO: the code above creates new registers which are then removed here.
+  // That code could be slightly simplified by not doing that, though maybe
+  // it is simpler conceptually to keep the code above in "register mode"
+  // until this transition point.
+  // FIXME: we are not processing inline assembly, which contains register
+  // operands, because it is used by later target generic code.
+  if (MI->isDebugInstr() || MI->isLabel() || MI->isInlineAsm())
+    return;
+
+  // Transform to _S instruction.
+  auto RegOpcode = OutMI.getOpcode();
+  auto StackOpcode = WebAssembly::getStackOpcode(RegOpcode);
+  assert(StackOpcode != -1 && "Failed to stackify instruction");
+  OutMI.setOpcode(StackOpcode);
+
+  // Remove register operands.
+  for (auto I = OutMI.getNumOperands(); I; --I) {
+    auto &MO = OutMI.getOperand(I - 1);
+    if (MO.isReg()) {
+      OutMI.erase(&MO);
+    }
+  }
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 41b4313bb38c..fa7a0ea61b3b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -33,8 +33,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
 
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
-  MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset,
-                               bool IsFunc) const;
+  MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset, bool IsFunc,
+                               bool IsGlob, bool IsEvent) const;
 
 public:
   WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index e511e574050f..0157af0f8510 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -43,20 +43,38 @@ void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
   }
 }
 
-void llvm::ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+                               const TargetMachine &TM,
                                SmallVectorImpl<MVT> &Params,
                                SmallVectorImpl<MVT> &Results) {
-  ComputeLegalValueVTs(F, TM, F.getReturnType(), Results);
+  ComputeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
 
+  MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
   if (Results.size() > 1) {
     // WebAssembly currently can't lower returns of multiple values without
     // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
     // replace multiple return values with a pointer parameter.
     Results.clear();
-    Params.push_back(
-        MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()));
+    Params.push_back(PtrVT);
   }
 
-  for (auto &Arg : F.args())
-    ComputeLegalValueVTs(F, TM, Arg.getType(), Params);
+  for (auto *Param : Ty->params())
+    ComputeLegalValueVTs(F, TM, Param, Params);
+  if (Ty->isVarArg())
+    Params.push_back(PtrVT);
+}
+
+void llvm::ValTypesFromMVTs(const ArrayRef<MVT> &In,
+                            SmallVectorImpl<wasm::ValType> &Out) {
+  for (MVT Ty : In)
+    Out.push_back(WebAssembly::toValType(Ty));
+}
+
+std::unique_ptr<wasm::WasmSignature>
+llvm::SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+                        const SmallVectorImpl<MVT> &Params) {
+  auto Sig = make_unique<wasm::WasmSignature>();
+  ValTypesFromMVTs(Results, Sig->Returns);
+  ValTypesFromMVTs(Params, Sig->Params);
+  return Sig;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index a60b10fc5309..4be4beb85d04 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -17,7 +17,9 @@
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCSymbolWasm.h"
 
 namespace llvm {
 
@@ -50,7 +52,7 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   // overaligned values on the user stack.
   unsigned BasePtrVreg = -1U;
 
- public:
+public:
   explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
   ~WebAssemblyFunctionInfo() override;
 
@@ -60,7 +62,10 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   void addResult(MVT VT) { Results.push_back(VT); }
   const std::vector<MVT> &getResults() const { return Results; }
 
-  void clearParamsAndResults() { Params.clear(); Results.clear(); }
+  void clearParamsAndResults() {
+    Params.clear();
+    Results.clear();
+  }
 
   void setNumLocals(size_t NumLocals) { Locals.resize(NumLocals, MVT::i32); }
   void setLocal(size_t i, MVT VT) { Locals[i] = VT; }
@@ -115,13 +120,22 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   }
 };
 
-void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
-                          Type *Ty, SmallVectorImpl<MVT> &ValueVTs);
+void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
+                          SmallVectorImpl<MVT> &ValueVTs);
 
-void ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
-                         SmallVectorImpl<MVT> &Params,
+// Compute the signature for a given FunctionType (Ty). Note that it's not the
+// signature for F (F is just used to get varous context)
+void ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+                         const TargetMachine &TM, SmallVectorImpl<MVT> &Params,
                          SmallVectorImpl<MVT> &Results);
 
+void ValTypesFromMVTs(const ArrayRef<MVT> &In,
+                      SmallVectorImpl<wasm::ValType> &Out);
+
+std::unique_ptr<wasm::WasmSignature>
+SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+                  const SmallVectorImpl<MVT> &Params);
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index 893e8484c4c6..c4b5e96db0c7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -1,4 +1,4 @@
-//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,19 +8,22 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements an optimization pass using store result values.
+/// This file implements an optimization pass using memory intrinsic results.
 ///
-/// WebAssembly's store instructions return the stored value. This is to enable
-/// an optimization wherein uses of the stored value can be replaced by uses of
-/// the store's result value, making the stored value register more likely to
-/// be single-use, thus more likely to be useful to register stackifying, and
-/// potentially also exposing the store to register stackifying. These both can
-/// reduce get_local/set_local traffic.
+/// Calls to memory intrinsics (memcpy, memmove, memset) return the destination
+/// address. They are in the form of
+///   %dst_new = call @memcpy %dst, %src, %len
+/// where %dst and %dst_new registers contain the same value.
 ///
-/// This pass also performs this optimization for memcpy, memmove, and memset
-/// calls, since the LLVM intrinsics for these return void so they can't use the
-/// returned attribute and consequently aren't handled by the OptimizeReturned
-/// pass.
+/// This is to enable an optimization wherein uses of the %dst register used in
+/// the parameter can be replaced by uses of the %dst_new register used in the
+/// result, making the %dst register more likely to be single-use, thus more
+/// likely to be useful to register stackifying, and potentially also exposing
+/// the call instruction itself to register stackifying. These both can reduce
+/// local.get/local.set traffic.
+///
+/// The LLVM intrinsics for these return void so they can't use the returned
+/// attribute and consequently aren't handled by the OptimizeReturned pass.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -38,15 +41,17 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "wasm-store-results"
+#define DEBUG_TYPE "wasm-mem-intrinsic-results"
 
 namespace {
-class WebAssemblyStoreResults final : public MachineFunctionPass {
+class WebAssemblyMemIntrinsicResults final : public MachineFunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
-  WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+  WebAssemblyMemIntrinsicResults() : MachineFunctionPass(ID) {}
 
-  StringRef getPassName() const override { return "WebAssembly Store Results"; }
+  StringRef getPassName() const override {
+    return "WebAssembly Memory Intrinsic Results";
+  }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -67,12 +72,13 @@ private:
 };
 } // end anonymous namespace
 
-char WebAssemblyStoreResults::ID = 0;
-INITIALIZE_PASS(WebAssemblyStoreResults, DEBUG_TYPE,
-                "Optimize store result values for WebAssembly", false, false)
+char WebAssemblyMemIntrinsicResults::ID = 0;
+INITIALIZE_PASS(WebAssemblyMemIntrinsicResults, DEBUG_TYPE,
+                "Optimize memory intrinsic result values for WebAssembly",
+                false, false)
 
-FunctionPass *llvm::createWebAssemblyStoreResults() {
-  return new WebAssemblyStoreResults();
+FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() {
+  return new WebAssemblyMemIntrinsicResults();
 }
 
 // Replace uses of FromReg with ToReg if they are dominated by MI.
@@ -91,7 +97,8 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
 
   SmallVector<SlotIndex, 4> Indices;
 
-  for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); I != E;) {
+  for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end();
+       I != E;) {
     MachineOperand &O = *I++;
     MachineInstr *Where = O.getParent();
 
@@ -132,9 +139,9 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
 
     // If we replaced all dominated uses, FromReg is now killed at MI.
     if (!FromLI->liveAt(FromIdx.getDeadSlot()))
-      MI.addRegisterKilled(FromReg,
-                           MBB.getParent()->getSubtarget<WebAssemblySubtarget>()
-                                 .getRegisterInfo());
+      MI.addRegisterKilled(FromReg, MBB.getParent()
+                                        ->getSubtarget<WebAssemblySubtarget>()
+                                        .getRegisterInfo());
   }
 
   return Changed;
@@ -142,8 +149,7 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
 
 static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
                          const MachineRegisterInfo &MRI,
-                         MachineDominatorTree &MDT,
-                         LiveIntervals &LIS,
+                         MachineDominatorTree &MDT, LiveIntervals &LIS,
                          const WebAssemblyTargetLowering &TLI,
                          const TargetLibraryInfo &LibInfo) {
   MachineOperand &Op1 = MI.getOperand(1);
@@ -164,14 +170,14 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
   unsigned FromReg = MI.getOperand(2).getReg();
   unsigned ToReg = MI.getOperand(0).getReg();
   if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
-    report_fatal_error("Store results: call to builtin function with wrong "
-                       "signature, from/to mismatch");
+    report_fatal_error("Memory Intrinsic results: call to builtin function "
+                       "with wrong signature, from/to mismatch");
   return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
 }
 
-bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG({
-    dbgs() << "********** Store Results **********\n"
+    dbgs() << "********** Memory Intrinsic Results **********\n"
            << "********** Function: " << MF.getName() << '\n';
   });
 
@@ -186,7 +192,8 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
   // We don't preserve SSA form.
   MRI.leaveSSA();
 
-  assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking");
+  assert(MRI.tracksLiveness() &&
+         "MemIntrinsicResults expects liveness tracking");
 
   for (auto &MBB : MF) {
     LLVM_DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 04ac22a589ea..3d0a15244ee0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -65,7 +65,8 @@ FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() {
   return new WebAssemblyOptimizeLiveIntervals();
 }
 
-bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
+    MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n"
                        "********** Function: "
                     << MF.getName() << '\n');
@@ -76,11 +77,10 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF)
   // We don't preserve SSA form.
   MRI.leaveSSA();
 
-  assert(MRI.tracksLiveness() &&
-         "OptimizeLiveIntervals expects liveness");
+  assert(MRI.tracksLiveness() && "OptimizeLiveIntervals expects liveness");
 
   // Split multiple-VN LiveIntervals into multiple LiveIntervals.
-  SmallVector<LiveInterval*, 4> SplitLIs;
+  SmallVector<LiveInterval *, 4> SplitLIs;
   for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI.reg_nodbg_empty(Reg))
@@ -94,7 +94,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF)
   // instructions to satisfy LiveIntervals' requirement that all uses be
   // dominated by defs. Now that LiveIntervals has computed which of these
   // defs are actually needed and which are dead, remove the dead ones.
-  for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) {
+  for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE;) {
     MachineInstr *MI = &*MII++;
     if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
       LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 113ee2532bce..2c018d0785a7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -74,6 +74,10 @@ void OptimizeReturned::visitCallSite(CallSite CS) {
 }
 
 bool OptimizeReturned::runOnFunction(Function &F) {
+  LLVM_DEBUG(dbgs() << "********** Optimize returned Attributes **********\n"
+                       "********** Function: "
+                    << F.getName() << '\n');
+
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   visit(F);
   return true;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index a54484407805..2dfd85953f14 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -192,11 +192,21 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
             WebAssembly::COPY_V128);
         break;
+      case WebAssembly::RETURN_v2i64:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64,
+            WebAssembly::COPY_V128);
+        break;
       case WebAssembly::RETURN_v4f32:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
             WebAssembly::COPY_V128);
         break;
+      case WebAssembly::RETURN_v2f64:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64,
+            WebAssembly::COPY_V128);
+        break;
       case WebAssembly::RETURN_VOID:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index e44e7057e233..0be0ba657830 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -70,7 +70,8 @@ static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
   return false;
 }
 
-bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) {
+bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
+    MachineFunction &MF) {
   LLVM_DEBUG({
     dbgs() << "********** Prepare For LiveIntervals **********\n"
            << "********** Function: " << MF.getName() << '\n';
@@ -112,7 +113,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M
 
   // Move ARGUMENT_* instructions to the top of the entry block, so that their
   // liveness reflects the fact that these really are live-in values.
-  for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
+  for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
     MachineInstr &MI = *MII++;
     if (WebAssembly::isArgument(MI)) {
       MI.removeFromParent();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index d69a27937105..d97b13a8d699 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -118,16 +118,15 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
   // registers), by weight next, and then by position.
   // TODO: Investigate more intelligent sorting heuristics. For starters, we
   // should try to coalesce adjacent live intervals before non-adjacent ones.
-  llvm::sort(SortedIntervals.begin(), SortedIntervals.end(),
-             [MRI](LiveInterval *LHS, LiveInterval *RHS) {
-               if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
-                 return MRI->isLiveIn(LHS->reg);
-               if (LHS->weight != RHS->weight)
-                 return LHS->weight > RHS->weight;
-               if (LHS->empty() || RHS->empty())
-                 return !LHS->empty() && RHS->empty();
-               return *LHS < *RHS;
-             });
+  llvm::sort(SortedIntervals, [MRI](LiveInterval *LHS, LiveInterval *RHS) {
+    if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
+      return MRI->isLiveIn(LHS->reg);
+    if (LHS->weight != RHS->weight)
+      return LHS->weight > RHS->weight;
+    if (LHS->empty() || RHS->empty())
+      return !LHS->empty() && RHS->empty();
+    return *LHS < *RHS;
+  });
 
   LLVM_DEBUG(dbgs() << "Coloring register intervals:\n");
   SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 9f5d5bd87831..1eb32ed64494 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -22,9 +22,11 @@
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
 #include "WebAssembly.h"
+#include "WebAssemblyDebugValueManager.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -97,11 +99,11 @@ static void ImposeStackOrdering(MachineInstr *MI) {
 static void ConvertImplicitDefToConstZero(MachineInstr *MI,
                                           MachineRegisterInfo &MRI,
                                           const TargetInstrInfo *TII,
-                                          MachineFunction &MF) {
+                                          MachineFunction &MF,
+                                          LiveIntervals &LIS) {
   assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
 
-  const auto *RegClass =
-      MRI.getRegClass(MI->getOperand(0).getReg());
+  const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
   if (RegClass == &WebAssembly::I32RegClass) {
     MI->setDesc(TII->get(WebAssembly::CONST_I32));
     MI->addOperand(MachineOperand::CreateImm(0));
@@ -118,6 +120,14 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
     ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
         Type::getDoubleTy(MF.getFunction().getContext())));
     MI->addOperand(MachineOperand::CreateFPImm(Val));
+  } else if (RegClass == &WebAssembly::V128RegClass) {
+    unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
+    MI->addOperand(MachineOperand::CreateReg(TempReg, false));
+    MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                  TII->get(WebAssembly::CONST_I32), TempReg)
+                              .addImm(0);
+    LIS.InsertMachineInstrInMaps(*Const);
   } else {
     llvm_unreachable("Unexpected reg class");
   }
@@ -172,29 +182,24 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
   // Check for stores.
   if (MI.mayStore()) {
     Write = true;
-
-    // Check for stores to __stack_pointer.
-    for (auto MMO : MI.memoperands()) {
-      const MachinePointerInfo &MPI = MMO->getPointerInfo();
-      if (MPI.V.is<const PseudoSourceValue *>()) {
-        auto PSV = MPI.V.get<const PseudoSourceValue *>();
-        if (const ExternalSymbolPseudoSourceValue *EPSV =
-                dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
-          if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
-            StackPointer = true;
-          }
-      }
-    }
   } else if (MI.hasOrderedMemoryRef()) {
     switch (MI.getOpcode()) {
-    case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
-    case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
-    case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
-    case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
-    case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
-    case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
-    case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
-    case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+    case WebAssembly::DIV_S_I32:
+    case WebAssembly::DIV_S_I64:
+    case WebAssembly::REM_S_I32:
+    case WebAssembly::REM_S_I64:
+    case WebAssembly::DIV_U_I32:
+    case WebAssembly::DIV_U_I64:
+    case WebAssembly::REM_U_I32:
+    case WebAssembly::REM_U_I64:
+    case WebAssembly::I32_TRUNC_S_F32:
+    case WebAssembly::I64_TRUNC_S_F32:
+    case WebAssembly::I32_TRUNC_S_F64:
+    case WebAssembly::I64_TRUNC_S_F64:
+    case WebAssembly::I32_TRUNC_U_F32:
+    case WebAssembly::I64_TRUNC_U_F32:
+    case WebAssembly::I32_TRUNC_U_F64:
+    case WebAssembly::I64_TRUNC_U_F64:
       // These instruction have hasUnmodeledSideEffects() returning true
       // because they trap on overflow and invalid so they can't be arbitrarily
       // moved, however hasOrderedMemoryRef() interprets this plus their lack
@@ -214,14 +219,22 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
   // Check for side effects.
   if (MI.hasUnmodeledSideEffects()) {
     switch (MI.getOpcode()) {
-    case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
-    case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
-    case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
-    case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
-    case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
-    case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
-    case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
-    case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+    case WebAssembly::DIV_S_I32:
+    case WebAssembly::DIV_S_I64:
+    case WebAssembly::REM_S_I32:
+    case WebAssembly::REM_S_I64:
+    case WebAssembly::DIV_U_I32:
+    case WebAssembly::DIV_U_I64:
+    case WebAssembly::REM_U_I32:
+    case WebAssembly::REM_U_I64:
+    case WebAssembly::I32_TRUNC_S_F32:
+    case WebAssembly::I64_TRUNC_S_F32:
+    case WebAssembly::I32_TRUNC_S_F64:
+    case WebAssembly::I64_TRUNC_S_F64:
+    case WebAssembly::I32_TRUNC_U_F32:
+    case WebAssembly::I64_TRUNC_U_F32:
+    case WebAssembly::I32_TRUNC_U_F64:
+    case WebAssembly::I64_TRUNC_U_F64:
       // These instructions have hasUnmodeledSideEffects() returning true
       // because they trap on overflow and invalid so they can't be arbitrarily
       // moved, however in the specific case of register stackifying, it is safe
@@ -233,22 +246,15 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
     }
   }
 
+  // Check for writes to __stack_pointer global.
+  if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
+      strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
+    StackPointer = true;
+
   // Analyze calls.
   if (MI.isCall()) {
-    switch (MI.getOpcode()) {
-    case WebAssembly::CALL_VOID:
-    case WebAssembly::CALL_INDIRECT_VOID:
-      QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
-      break;
-    case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
-    case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
-    case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
-    case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
-      QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
-      break;
-    default:
-      llvm_unreachable("unexpected call opcode");
-    }
+    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
+    QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
   }
 }
 
@@ -263,8 +269,7 @@ static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
 // LiveIntervals to handle complex cases.
 static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
                                 const MachineRegisterInfo &MRI,
-                                const LiveIntervals &LIS)
-{
+                                const LiveIntervals &LIS) {
   // Most registers are in SSA form here so we try a quick MRI query first.
   if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
     return Def;
@@ -280,17 +285,16 @@ static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
 // Test whether Reg, as defined at Def, has exactly one use. This is a
 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
 // to handle complex cases.
-static bool HasOneUse(unsigned Reg, MachineInstr *Def,
-                      MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
-                      LiveIntervals &LIS) {
+static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
+                      MachineDominatorTree &MDT, LiveIntervals &LIS) {
   // Most registers are in SSA form here so we try a quick MRI query first.
   if (MRI.hasOneUse(Reg))
     return true;
 
   bool HasOne = false;
   const LiveInterval &LI = LIS.getInterval(Reg);
-  const VNInfo *DefVNI = LI.getVNInfoAt(
-      LIS.getInstructionIndex(*Def).getRegSlot());
+  const VNInfo *DefVNI =
+      LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
   assert(DefVNI);
   for (auto &I : MRI.use_nodbg_operands(Reg)) {
     const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
@@ -403,7 +407,6 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
     if (UseVNI != OneUseVNI)
       continue;
 
-    const MachineInstr *OneUseInst = OneUse.getParent();
     if (UseInst == OneUseInst) {
       // Another use in the same instruction. We need to ensure that the one
       // selected use happens "before" it.
@@ -415,8 +418,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
         // Actually, dominating is over-conservative. Test that the use would
         // happen after the one selected use in the stack evaluation order.
         //
-        // This is needed as a consequence of using implicit get_locals for
-        // uses and implicit set_locals for defs.
+        // This is needed as a consequence of using implicit local.gets for
+        // uses and implicit local.sets for defs.
         if (UseInst->getDesc().getNumDefs() == 0)
           return false;
         const MachineOperand &MO = UseInst->getOperand(0);
@@ -426,8 +429,8 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
         if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
             !MFI.isVRegStackified(DefReg))
           return false;
-        assert(MRI.hasOneUse(DefReg));
-        const MachineOperand &NewUse = *MRI.use_begin(DefReg);
+        assert(MRI.hasOneNonDBGUse(DefReg));
+        const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
         const MachineInstr *NewUseInst = NewUse.getParent();
         if (NewUseInst == OneUseInst) {
           if (&OneUse > &NewUse)
@@ -459,22 +462,23 @@ static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
 // Shrink LI to its uses, cleaning up LI.
 static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
   if (LIS.shrinkToUses(&LI)) {
-    SmallVector<LiveInterval*, 4> SplitLIs;
+    SmallVector<LiveInterval *, 4> SplitLIs;
     LIS.splitSeparateComponents(LI, SplitLIs);
   }
 }
 
 /// A single-use def in the same block with no intervening memory or register
 /// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
-                                      MachineInstr *Def,
-                                      MachineBasicBlock &MBB,
+static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
+                                      MachineInstr *Def, MachineBasicBlock &MBB,
                                       MachineInstr *Insert, LiveIntervals &LIS,
                                       WebAssemblyFunctionInfo &MFI,
                                       MachineRegisterInfo &MRI) {
   LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
 
+  WebAssemblyDebugValueManager DefDIs(Def);
   MBB.splice(Insert, &MBB, Def);
+  DefDIs.move(Insert);
   LIS.handleMove(*Def);
 
   if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
@@ -499,6 +503,8 @@ static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
 
     MFI.stackifyVReg(NewReg);
 
+    DefDIs.updateReg(NewReg);
+
     LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
   }
 
@@ -516,6 +522,8 @@ static MachineInstr *RematerializeCheapDef(
   LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
   LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
 
+  WebAssemblyDebugValueManager DefDIs(&Def);
+
   unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
   TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
   Op.setReg(NewReg);
@@ -536,6 +544,7 @@ static MachineInstr *RematerializeCheapDef(
   }
 
   // If that was the last use of the original, delete the original.
+  // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
   if (IsDead) {
     LLVM_DEBUG(dbgs() << " - Deleting original\n");
     SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
@@ -543,6 +552,11 @@ static MachineInstr *RematerializeCheapDef(
     LIS.removeInterval(Reg);
     LIS.RemoveMachineInstrFromMaps(Def);
     Def.eraseFromParent();
+
+    DefDIs.move(&*Insert);
+    DefDIs.updateReg(NewReg);
+  } else {
+    DefDIs.clone(&*Insert, NewReg);
   }
 
   return Clone;
@@ -566,7 +580,7 @@ static MachineInstr *RematerializeCheapDef(
 ///    INST ..., Reg, ...
 ///    INST ..., Reg, ...
 ///
-/// with DefReg and TeeReg stackified. This eliminates a get_local from the
+/// with DefReg and TeeReg stackified. This eliminates a local.get from the
 /// resulting code.
 static MachineInstr *MoveAndTeeForMultiUse(
     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
@@ -574,6 +588,8 @@ static MachineInstr *MoveAndTeeForMultiUse(
     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
   LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
 
+  WebAssemblyDebugValueManager DefDIs(Def);
+
   // Move Def into place.
   MBB.splice(Insert, &MBB, Def);
   LIS.handleMove(*Def);
@@ -592,6 +608,8 @@ static MachineInstr *MoveAndTeeForMultiUse(
   SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
   SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
 
+  DefDIs.move(Insert);
+
   // Tell LiveIntervals we moved the original vreg def from Def to Tee.
   LiveInterval &LI = LIS.getInterval(Reg);
   LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
@@ -608,6 +626,9 @@ static MachineInstr *MoveAndTeeForMultiUse(
   ImposeStackOrdering(Def);
   ImposeStackOrdering(Tee);
 
+  DefDIs.clone(Tee, DefReg);
+  DefDIs.clone(Insert, TeeReg);
+
   LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
   LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
   return Def;
@@ -672,8 +693,8 @@ public:
   /// operand in the tree that we haven't visited yet. Moving a definition of
   /// Reg to a point in the tree after that would change its value.
   ///
-  /// This is needed as a consequence of using implicit get_locals for
-  /// uses and implicit set_locals for defs.
+  /// This is needed as a consequence of using implicit local.gets for
+  /// uses and implicit local.sets for defs.
   bool IsOnStack(unsigned Reg) const {
     for (const RangeTy &Range : Worklist)
       for (const MachineOperand &MO : Range)
@@ -687,9 +708,9 @@ public:
 /// tried for the current instruction and didn't work.
 class CommutingState {
   /// There are effectively three states: the initial state where we haven't
-  /// started commuting anything and we don't know anything yet, the tenative
+  /// started commuting anything and we don't know anything yet, the tentative
   /// state where we've commuted the operands of the current instruction and are
-  /// revisting it, and the declined state where we've reverted the operands
+  /// revisiting it, and the declined state where we've reverted the operands
   /// back to their original order and will no longer commute it further.
   bool TentativelyCommuting;
   bool Declined;
@@ -831,7 +852,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         // to a constant 0 so that the def is explicit, and the push/pop
         // correspondence is maintained.
         if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
-          ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
+          ConvertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
 
         // We stackified an operand. Add the defining instruction's operands to
         // the worklist stack now to continue to build an ever deeper tree.
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index b6481ac2d4ae..1f0870865b06 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -22,9 +22,9 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 29f42b96b249..a7c3d177724d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -63,6 +63,6 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32, I32_0)>;
 def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64, I64_0)>;
 def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
 def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
-def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>;
+def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
+                               (add V128_0)>;
 def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>;
-
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index f432b367d156..e5a3e47a3bcd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -54,8 +54,8 @@ private:
 
 char WebAssemblyReplacePhysRegs::ID = 0;
 INITIALIZE_PASS(WebAssemblyReplacePhysRegs, DEBUG_TYPE,
-                "Replace physical registers with virtual registers",
-                false, false)
+                "Replace physical registers with virtual registers", false,
+                false)
 
 FunctionPass *llvm::createWebAssemblyReplacePhysRegs() {
   return new WebAssemblyReplacePhysRegs();
@@ -86,7 +86,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
     // Replace explicit uses of the physical register with a virtual register.
     const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
     unsigned VReg = WebAssembly::NoRegister;
-    for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) {
+    for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) {
       MachineOperand &MO = *I++;
       if (!MO.isImplicit()) {
         if (VReg == WebAssembly::NoRegister)
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index fe8a5e4c06f1..6cf81a9d77b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -88,7 +88,6 @@ enum RuntimeLibcallSignature {
   unsupported
 };
 
-
 struct RuntimeLibcallSignatureTable {
   std::vector<RuntimeLibcallSignature> Table;
 
@@ -486,18 +485,17 @@ struct StaticLibcallNameMap {
 
 } // end anonymous namespace
 
-
-
-void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
-                        RTLIB::Libcall LC, SmallVectorImpl<wasm::ValType> &Rets,
-                        SmallVectorImpl<wasm::ValType> &Params) {
+void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                               RTLIB::Libcall LC,
+                               SmallVectorImpl<wasm::ValType> &Rets,
+                               SmallVectorImpl<wasm::ValType> &Params) {
   assert(Rets.empty());
   assert(Params.empty());
 
   wasm::ValType iPTR =
       Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
 
-  auto& Table = RuntimeLibcallSignatures->Table;
+  auto &Table = RuntimeLibcallSignatures->Table;
   switch (Table[LC]) {
   case func:
     break;
@@ -834,11 +832,12 @@ void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
 static ManagedStatic<StaticLibcallNameMap> LibcallNameMap;
 // TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
 // other than here, just roll its logic into this version.
-void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, const char *Name,
-                        SmallVectorImpl<wasm::ValType> &Rets,
-                        SmallVectorImpl<wasm::ValType> &Params) {
-  auto& Map = LibcallNameMap->Map;
+void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                               const char *Name,
+                               SmallVectorImpl<wasm::ValType> &Rets,
+                               SmallVectorImpl<wasm::ValType> &Params) {
+  auto &Map = LibcallNameMap->Map;
   auto val = Map.find(Name);
   assert(val != Map.end() && "unexpected runtime library name");
-  return GetSignature(Subtarget, val->second, Rets, Params);
+  return GetLibcallSignature(Subtarget, val->second, Rets, Params);
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
index 2ba65ff5b716..7fa70bea96de 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -23,14 +23,15 @@ namespace llvm {
 
 class WebAssemblySubtarget;
 
-extern void GetSignature(const WebAssemblySubtarget &Subtarget,
-                         RTLIB::Libcall LC,
-                         SmallVectorImpl<wasm::ValType> &Rets,
-                         SmallVectorImpl<wasm::ValType> &Params);
-
-extern void GetSignature(const WebAssemblySubtarget &Subtarget,
-                         const char *Name, SmallVectorImpl<wasm::ValType> &Rets,
-                         SmallVectorImpl<wasm::ValType> &Params);
+extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                                RTLIB::Libcall LC,
+                                SmallVectorImpl<wasm::ValType> &Rets,
+                                SmallVectorImpl<wasm::ValType> &Params);
+
+extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+                                const char *Name,
+                                SmallVectorImpl<wasm::ValType> &Rets,
+                                SmallVectorImpl<wasm::ValType> &Params);
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 14221993603a..c95af88c6f43 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -60,8 +60,7 @@ static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
   assert(MI.hasOneMemOperand() &&
          "Load and store instructions have exactly one mem operand");
   assert((*MI.memoperands_begin())->getSize() ==
-             (UINT64_C(1)
-              << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
+             (UINT64_C(1) << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
          "Default p2align value should be natural");
   assert(MI.getDesc().OpInfo[OperandNo].OperandType ==
              WebAssembly::OPERAND_P2ALIGN &&
@@ -69,8 +68,8 @@ static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
   uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
 
   // WebAssembly does not currently support supernatural alignment.
-  P2Align = std::min(
-      P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
+  P2Align = std::min(P2Align,
+                     uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
 
   MI.getOperand(OperandNo).setImm(P2Align);
 }
@@ -90,6 +89,12 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::LOAD_I64:
       case WebAssembly::LOAD_F32:
       case WebAssembly::LOAD_F64:
+      case WebAssembly::LOAD_v16i8:
+      case WebAssembly::LOAD_v8i16:
+      case WebAssembly::LOAD_v4i32:
+      case WebAssembly::LOAD_v2i64:
+      case WebAssembly::LOAD_v4f32:
+      case WebAssembly::LOAD_v2f64:
       case WebAssembly::LOAD8_S_I32:
       case WebAssembly::LOAD8_U_I32:
       case WebAssembly::LOAD16_S_I32:
@@ -119,6 +124,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
       case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
       case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
+      case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
       case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
       case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
       case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
@@ -131,6 +138,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
       case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
       case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
+      case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
       case WebAssembly::ATOMIC_RMW_ADD_I32:
       case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
       case WebAssembly::ATOMIC_RMW_SUB_I32:
@@ -143,18 +152,30 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
       case WebAssembly::ATOMIC_RMW_XCHG_I32:
       case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
+      case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
       case WebAssembly::ATOMIC_RMW_ADD_I64:
       case WebAssembly::ATOMIC_RMW_SUB_I64:
       case WebAssembly::ATOMIC_RMW_AND_I64:
       case WebAssembly::ATOMIC_RMW_OR_I64:
       case WebAssembly::ATOMIC_RMW_XOR_I64:
       case WebAssembly::ATOMIC_RMW_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
+      case WebAssembly::ATOMIC_NOTIFY:
+      case WebAssembly::ATOMIC_WAIT_I32:
+      case WebAssembly::ATOMIC_WAIT_I64:
         RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
         break;
       case WebAssembly::STORE_I32:
       case WebAssembly::STORE_I64:
       case WebAssembly::STORE_F32:
       case WebAssembly::STORE_F64:
+      case WebAssembly::STORE_v16i8:
+      case WebAssembly::STORE_v8i16:
+      case WebAssembly::STORE_v4i32:
+      case WebAssembly::STORE_v2i64:
+      case WebAssembly::STORE_v4f32:
+      case WebAssembly::STORE_v2f64:
       case WebAssembly::STORE8_I32:
       case WebAssembly::STORE16_I32:
       case WebAssembly::STORE8_I64:
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index d6af0fb219d7..98133e2153a0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -40,10 +40,9 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
                                            const std::string &CPU,
                                            const std::string &FS,
                                            const TargetMachine &TM)
-    : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
-      HasAtomics(false), HasNontrappingFPToInt(false), HasSignExt(false),
-      HasExceptionHandling(false), CPUString(CPU), TargetTriple(TT),
-      FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+    : WebAssemblyGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
+      TargetTriple(TT), FrameLowering(),
+      InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
       TLInfo(TM, *this) {}
 
 bool WebAssemblySubtarget::enableMachineScheduler() const {
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index b170dbff3b32..0a0c04609ac4 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -29,11 +29,16 @@
 namespace llvm {
 
 class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
-  bool HasSIMD128;
-  bool HasAtomics;
-  bool HasNontrappingFPToInt;
-  bool HasSignExt;
-  bool HasExceptionHandling;
+  enum SIMDEnum {
+    NoSIMD,
+    SIMD128,
+    UnimplementedSIMD128,
+  } SIMDLevel = NoSIMD;
+
+  bool HasAtomics = false;
+  bool HasNontrappingFPToInt = false;
+  bool HasSignExt = false;
+  bool HasExceptionHandling = false;
 
   /// String name of used CPU.
   std::string CPUString;
@@ -77,7 +82,10 @@ public:
 
   // Predicates used by WebAssemblyInstrInfo.td.
   bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
-  bool hasSIMD128() const { return HasSIMD128; }
+  bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
+  bool hasUnimplementedSIMD128() const {
+    return SIMDLevel >= UnimplementedSIMD128;
+  }
   bool hasAtomics() const { return HasAtomics; }
   bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
   bool hasSignExt() const { return HasSignExt; }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 7c10f022cbbc..3bf8dd40892c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -58,10 +58,11 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
   initializeOptimizeReturnedPass(PR);
   initializeWebAssemblyArgumentMovePass(PR);
   initializeWebAssemblySetP2AlignOperandsPass(PR);
+  initializeWebAssemblyEHRestoreStackPointerPass(PR);
   initializeWebAssemblyReplacePhysRegsPass(PR);
   initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
   initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
-  initializeWebAssemblyStoreResultsPass(PR);
+  initializeWebAssemblyMemIntrinsicResultsPass(PR);
   initializeWebAssemblyRegStackifyPass(PR);
   initializeWebAssemblyRegColoringPass(PR);
   initializeWebAssemblyExplicitLocalsPass(PR);
@@ -81,8 +82,12 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
 //===----------------------------------------------------------------------===//
 
 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
-  if (!RM.hasValue())
-    return Reloc::PIC_;
+  if (!RM.hasValue()) {
+    // Default to static relocation model.  This should always be more optimial
+    // than PIC since the static linker can determine all global addresses and
+    // assume direct function calls.
+    return Reloc::Static;
+  }
   return *RM;
 }
 
@@ -96,7 +101,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
                         TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
                                          : "e-m:e-p:32:32-i64:64-n32:64-S128",
                         TT, CPU, FS, Options, getEffectiveRelocModel(RM),
-                        CM ? *CM : CodeModel::Large, OL),
+                        getEffectiveCodeModel(CM, CodeModel::Large), OL),
       TLOF(new WebAssemblyTargetObjectFile()) {
   // WebAssembly type-checks instructions, but a noreturn function with a return
   // type that doesn't match the context will cause a check failure. So we lower
@@ -149,7 +154,7 @@ class StripThreadLocal final : public ModulePass {
   // pass just converts all GlobalVariables to NotThreadLocal
   static char ID;
 
- public:
+public:
   StripThreadLocal() : ModulePass(ID) {}
   bool runOnModule(Module &M) override {
     for (auto &GV : M.globals())
@@ -280,6 +285,9 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
 void WebAssemblyPassConfig::addPreEmitPass() {
   TargetPassConfig::addPreEmitPass();
 
+  // Restore __stack_pointer global after an exception is thrown.
+  addPass(createWebAssemblyEHRestoreStackPointer());
+
   // Now that we have a prologue and epilogue and all frame indices are
   // rewritten, eliminate SP and FP. This allows them to be stackified,
   // colored, and numbered with the rest of the registers.
@@ -290,6 +298,12 @@ void WebAssemblyPassConfig::addPreEmitPass() {
   // order of the arguments.
   addPass(createWebAssemblyCallIndirectFixup());
 
+  // Eliminate multiple-entry loops.
+  addPass(createWebAssemblyFixIrreducibleControlFlow());
+
+  // Do various transformations for exception handling.
+  addPass(createWebAssemblyLateEHPrepare());
+
   if (getOptLevel() != CodeGenOpt::None) {
     // LiveIntervals isn't commonly run this late. Re-establish preconditions.
     addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -297,13 +311,14 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     // Depend on LiveIntervals and perform some optimizations on it.
     addPass(createWebAssemblyOptimizeLiveIntervals());
 
-    // Prepare store instructions for register stackifying.
-    addPass(createWebAssemblyStoreResults());
+    // Prepare memory intrinsic calls for register stackifying.
+    addPass(createWebAssemblyMemIntrinsicResults());
 
     // Mark registers as representing wasm's value stack. This is a key
     // code-compression technique in WebAssembly. We run this pass (and
-    // StoreResults above) very late, so that it sees as much code as possible,
-    // including code emitted by PEI and expanded by late tail duplication.
+    // MemIntrinsicResults above) very late, so that it sees as much code as
+    // possible, including code emitted by PEI and expanded by late tail
+    // duplication.
     addPass(createWebAssemblyRegStackify());
 
     // Run the register coloring pass to reduce the total number of registers.
@@ -312,17 +327,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     addPass(createWebAssemblyRegColoring());
   }
 
-  // Eliminate multiple-entry loops. Do this before inserting explicit get_local
-  // and set_local operators because we create a new variable that we want
-  // converted into a local.
-  addPass(createWebAssemblyFixIrreducibleControlFlow());
-
-  // Insert explicit get_local and set_local operators.
+  // Insert explicit local.get and local.set operators.
   addPass(createWebAssemblyExplicitLocals());
 
-  // Do various transformations for exception handling
-  addPass(createWebAssemblyLateEHPrepare());
-
   // Sort the blocks of the CFG into topological order, a prerequisite for
   // BLOCK and LOOP markers.
   addPass(createWebAssemblyCFGSort());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 5944cea5abd1..ada6fb9a96d7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -27,14 +27,26 @@ const char *const WebAssembly::PersonalityWrapperFn =
 
 bool WebAssembly::isArgument(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
-  case WebAssembly::ARGUMENT_I32:
-  case WebAssembly::ARGUMENT_I64:
-  case WebAssembly::ARGUMENT_F32:
-  case WebAssembly::ARGUMENT_F64:
+  case WebAssembly::ARGUMENT_i32:
+  case WebAssembly::ARGUMENT_i32_S:
+  case WebAssembly::ARGUMENT_i64:
+  case WebAssembly::ARGUMENT_i64_S:
+  case WebAssembly::ARGUMENT_f32:
+  case WebAssembly::ARGUMENT_f32_S:
+  case WebAssembly::ARGUMENT_f64:
+  case WebAssembly::ARGUMENT_f64_S:
   case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v16i8_S:
   case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v8i16_S:
   case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4i32_S:
+  case WebAssembly::ARGUMENT_v2i64:
+  case WebAssembly::ARGUMENT_v2i64_S:
   case WebAssembly::ARGUMENT_v4f32:
+  case WebAssembly::ARGUMENT_v4f32_S:
+  case WebAssembly::ARGUMENT_v2f64:
+  case WebAssembly::ARGUMENT_v2f64_S:
     return true;
   default:
     return false;
@@ -44,9 +56,15 @@ bool WebAssembly::isArgument(const MachineInstr &MI) {
 bool WebAssembly::isCopy(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::COPY_I32:
+  case WebAssembly::COPY_I32_S:
   case WebAssembly::COPY_I64:
+  case WebAssembly::COPY_I64_S:
   case WebAssembly::COPY_F32:
+  case WebAssembly::COPY_F32_S:
   case WebAssembly::COPY_F64:
+  case WebAssembly::COPY_F64_S:
+  case WebAssembly::COPY_V128:
+  case WebAssembly::COPY_V128_S:
     return true;
   default:
     return false;
@@ -56,9 +74,15 @@ bool WebAssembly::isCopy(const MachineInstr &MI) {
 bool WebAssembly::isTee(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::TEE_I32:
+  case WebAssembly::TEE_I32_S:
   case WebAssembly::TEE_I64:
+  case WebAssembly::TEE_I64_S:
   case WebAssembly::TEE_F32:
+  case WebAssembly::TEE_F32_S:
   case WebAssembly::TEE_F64:
+  case WebAssembly::TEE_F64_S:
+  case WebAssembly::TEE_V128:
+  case WebAssembly::TEE_V128_S:
     return true;
   default:
     return false;
@@ -81,15 +105,29 @@ bool WebAssembly::isChild(const MachineInstr &MI,
 bool WebAssembly::isCallDirect(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::CALL_VOID:
+  case WebAssembly::CALL_VOID_S:
   case WebAssembly::CALL_I32:
+  case WebAssembly::CALL_I32_S:
   case WebAssembly::CALL_I64:
+  case WebAssembly::CALL_I64_S:
   case WebAssembly::CALL_F32:
+  case WebAssembly::CALL_F32_S:
   case WebAssembly::CALL_F64:
+  case WebAssembly::CALL_F64_S:
   case WebAssembly::CALL_v16i8:
+  case WebAssembly::CALL_v16i8_S:
   case WebAssembly::CALL_v8i16:
+  case WebAssembly::CALL_v8i16_S:
   case WebAssembly::CALL_v4i32:
+  case WebAssembly::CALL_v4i32_S:
+  case WebAssembly::CALL_v2i64:
+  case WebAssembly::CALL_v2i64_S:
   case WebAssembly::CALL_v4f32:
+  case WebAssembly::CALL_v4f32_S:
+  case WebAssembly::CALL_v2f64:
+  case WebAssembly::CALL_v2f64_S:
   case WebAssembly::CALL_EXCEPT_REF:
+  case WebAssembly::CALL_EXCEPT_REF_S:
     return true;
   default:
     return false;
@@ -99,15 +137,29 @@ bool WebAssembly::isCallDirect(const MachineInstr &MI) {
 bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::CALL_INDIRECT_VOID:
+  case WebAssembly::CALL_INDIRECT_VOID_S:
   case WebAssembly::CALL_INDIRECT_I32:
+  case WebAssembly::CALL_INDIRECT_I32_S:
   case WebAssembly::CALL_INDIRECT_I64:
+  case WebAssembly::CALL_INDIRECT_I64_S:
   case WebAssembly::CALL_INDIRECT_F32:
+  case WebAssembly::CALL_INDIRECT_F32_S:
   case WebAssembly::CALL_INDIRECT_F64:
+  case WebAssembly::CALL_INDIRECT_F64_S:
   case WebAssembly::CALL_INDIRECT_v16i8:
+  case WebAssembly::CALL_INDIRECT_v16i8_S:
   case WebAssembly::CALL_INDIRECT_v8i16:
+  case WebAssembly::CALL_INDIRECT_v8i16_S:
   case WebAssembly::CALL_INDIRECT_v4i32:
+  case WebAssembly::CALL_INDIRECT_v4i32_S:
+  case WebAssembly::CALL_INDIRECT_v2i64:
+  case WebAssembly::CALL_INDIRECT_v2i64_S:
   case WebAssembly::CALL_INDIRECT_v4f32:
+  case WebAssembly::CALL_INDIRECT_v4f32_S:
+  case WebAssembly::CALL_INDIRECT_v2f64:
+  case WebAssembly::CALL_INDIRECT_v2f64_S:
   case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
+  case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
     return true;
   default:
     return false;
@@ -117,18 +169,54 @@ bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
 unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::CALL_VOID:
+  case WebAssembly::CALL_VOID_S:
   case WebAssembly::CALL_INDIRECT_VOID:
+  case WebAssembly::CALL_INDIRECT_VOID_S:
     return 0;
   case WebAssembly::CALL_I32:
+  case WebAssembly::CALL_I32_S:
   case WebAssembly::CALL_I64:
+  case WebAssembly::CALL_I64_S:
   case WebAssembly::CALL_F32:
+  case WebAssembly::CALL_F32_S:
   case WebAssembly::CALL_F64:
+  case WebAssembly::CALL_F64_S:
+  case WebAssembly::CALL_v16i8:
+  case WebAssembly::CALL_v16i8_S:
+  case WebAssembly::CALL_v8i16:
+  case WebAssembly::CALL_v8i16_S:
+  case WebAssembly::CALL_v4i32:
+  case WebAssembly::CALL_v4i32_S:
+  case WebAssembly::CALL_v2i64:
+  case WebAssembly::CALL_v2i64_S:
+  case WebAssembly::CALL_v4f32:
+  case WebAssembly::CALL_v4f32_S:
+  case WebAssembly::CALL_v2f64:
+  case WebAssembly::CALL_v2f64_S:
   case WebAssembly::CALL_EXCEPT_REF:
+  case WebAssembly::CALL_EXCEPT_REF_S:
   case WebAssembly::CALL_INDIRECT_I32:
+  case WebAssembly::CALL_INDIRECT_I32_S:
   case WebAssembly::CALL_INDIRECT_I64:
+  case WebAssembly::CALL_INDIRECT_I64_S:
   case WebAssembly::CALL_INDIRECT_F32:
+  case WebAssembly::CALL_INDIRECT_F32_S:
   case WebAssembly::CALL_INDIRECT_F64:
+  case WebAssembly::CALL_INDIRECT_F64_S:
+  case WebAssembly::CALL_INDIRECT_v16i8:
+  case WebAssembly::CALL_INDIRECT_v16i8_S:
+  case WebAssembly::CALL_INDIRECT_v8i16:
+  case WebAssembly::CALL_INDIRECT_v8i16_S:
+  case WebAssembly::CALL_INDIRECT_v4i32:
+  case WebAssembly::CALL_INDIRECT_v4i32_S:
+  case WebAssembly::CALL_INDIRECT_v2i64:
+  case WebAssembly::CALL_INDIRECT_v2i64_S:
+  case WebAssembly::CALL_INDIRECT_v4f32:
+  case WebAssembly::CALL_INDIRECT_v4f32_S:
+  case WebAssembly::CALL_INDIRECT_v2f64:
+  case WebAssembly::CALL_INDIRECT_v2f64_S:
   case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
+  case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
     return 1;
   default:
     llvm_unreachable("Not a call instruction");
@@ -138,11 +226,17 @@ unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
 bool WebAssembly::isMarker(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::BLOCK:
+  case WebAssembly::BLOCK_S:
   case WebAssembly::END_BLOCK:
+  case WebAssembly::END_BLOCK_S:
   case WebAssembly::LOOP:
+  case WebAssembly::LOOP_S:
   case WebAssembly::END_LOOP:
+  case WebAssembly::END_LOOP_S:
   case WebAssembly::TRY:
+  case WebAssembly::TRY_S:
   case WebAssembly::END_TRY:
+  case WebAssembly::END_TRY_S:
     return true;
   default:
     return false;
@@ -152,7 +246,9 @@ bool WebAssembly::isMarker(const MachineInstr &MI) {
 bool WebAssembly::isThrow(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::THROW_I32:
+  case WebAssembly::THROW_I32_S:
   case WebAssembly::THROW_I64:
+  case WebAssembly::THROW_I64_S:
     return true;
   default:
     return false;
@@ -162,7 +258,9 @@ bool WebAssembly::isThrow(const MachineInstr &MI) {
 bool WebAssembly::isRethrow(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::RETHROW:
+  case WebAssembly::RETHROW_S:
   case WebAssembly::RETHROW_TO_CALLER:
+  case WebAssembly::RETHROW_TO_CALLER_S:
     return true;
   default:
     return false;
@@ -172,8 +270,11 @@ bool WebAssembly::isRethrow(const MachineInstr &MI) {
 bool WebAssembly::isCatch(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::CATCH_I32:
+  case WebAssembly::CATCH_I32_S:
   case WebAssembly::CATCH_I64:
+  case WebAssembly::CATCH_I64_S:
   case WebAssembly::CATCH_ALL:
+  case WebAssembly::CATCH_ALL_S:
     return true;
   default:
     return false;
@@ -183,8 +284,11 @@ bool WebAssembly::isCatch(const MachineInstr &MI) {
 bool WebAssembly::mayThrow(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::THROW_I32:
+  case WebAssembly::THROW_I32_S:
   case WebAssembly::THROW_I64:
+  case WebAssembly::THROW_I64_S:
   case WebAssembly::RETHROW:
+  case WebAssembly::RETHROW_S:
     return true;
   }
   if (isCallIndirect(MI))
@@ -212,7 +316,9 @@ bool WebAssembly::isCatchTerminatePad(const MachineBasicBlock &MBB) {
   bool SeenCatch = false;
   for (auto &MI : MBB) {
     if (MI.getOpcode() == WebAssembly::CATCH_I32 ||
-        MI.getOpcode() == WebAssembly::CATCH_I64)
+        MI.getOpcode() == WebAssembly::CATCH_I64 ||
+        MI.getOpcode() == WebAssembly::CATCH_I32_S ||
+        MI.getOpcode() == WebAssembly::CATCH_I64_S)
       SeenCatch = true;
     if (SeenCatch && MI.isCall()) {
       const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
@@ -229,7 +335,8 @@ bool WebAssembly::isCatchAllTerminatePad(const MachineBasicBlock &MBB) {
     return false;
   bool SeenCatchAll = false;
   for (auto &MI : MBB) {
-    if (MI.getOpcode() == WebAssembly::CATCH_ALL)
+    if (MI.getOpcode() == WebAssembly::CATCH_ALL ||
+        MI.getOpcode() == WebAssembly::CATCH_ALL_S)
       SeenCatchAll = true;
     if (SeenCatchAll && MI.isCall()) {
       const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8b7b250e1a09..899b50d0f78f 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -804,8 +804,8 @@ private:
     return Parser.Error(L, Msg, Range);
   }
 
-  std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
-    Error(Loc, Msg);
+  std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
+    Error(Loc, Msg, R);
     return nullptr;
   }
 
@@ -835,7 +835,10 @@ private:
                                      InlineAsmIdentifierInfo &Info,
                                      bool IsUnevaluatedOperand, SMLoc &End);
 
-  std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc MemStart);
+  std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
+                                              const MCExpr *&Disp,
+                                              const SMLoc &StartLoc,
+                                              SMLoc &EndLoc);
 
   bool ParseIntelMemoryOperandSize(unsigned &Size);
   std::unique_ptr<X86Operand>
@@ -852,6 +855,7 @@ private:
   bool parseDirectiveFPOSetFrame(SMLoc L);
   bool parseDirectiveFPOPushReg(SMLoc L);
   bool parseDirectiveFPOStackAlloc(SMLoc L);
+  bool parseDirectiveFPOStackAlign(SMLoc L);
   bool parseDirectiveFPOEndPrologue(SMLoc L);
   bool parseDirectiveFPOEndProc(SMLoc L);
   bool parseDirectiveFPOData(SMLoc L);
@@ -1010,8 +1014,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
   // and then only in non-64-bit modes.
   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
       (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
-                       BaseReg != X86::SI && BaseReg != X86::DI)) &&
-      BaseReg != X86::DX) {
+                       BaseReg != X86::SI && BaseReg != X86::DI))) {
     ErrMsg = "invalid 16-bit base register";
     return true;
   }
@@ -1102,10 +1105,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
     if (RegNo == X86::RIZ || RegNo == X86::RIP ||
         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
         X86II::isX86_64NonExtLowByteReg(RegNo) ||
-        X86II::isX86_64ExtendedReg(RegNo))
-      return Error(StartLoc, "register %"
-                   + Tok.getString() + " is only available in 64-bit mode",
+        X86II::isX86_64ExtendedReg(RegNo)) {
+      StringRef RegName = Tok.getString();
+      Parser.Lex(); // Eat register name.
+      return Error(StartLoc,
+                   "register %" + RegName + " is only available in 64-bit mode",
                    SMRange(StartLoc, EndLoc));
+    }
   }
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -1935,49 +1941,61 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
   MCAsmParser &Parser = getParser();
   switch (getLexer().getKind()) {
-  default:
-    // Parse a memory operand with no segment register.
-    return ParseMemOperand(0, Parser.getTok().getLoc());
-  case AsmToken::Percent: {
-    // Read the register.
-    unsigned RegNo;
-    SMLoc Start, End;
-    if (ParseRegister(RegNo, Start, End)) return nullptr;
-    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
-      Error(Start, "%eiz and %riz can only be used as index registers",
-            SMRange(Start, End));
-      return nullptr;
-    }
-    if (RegNo == X86::RIP) {
-      Error(Start, "%rip can only be used as a base register",
-            SMRange(Start, End));
-      return nullptr;
-    }
-
-    // If this is a segment register followed by a ':', then this is the start
-    // of a memory reference, otherwise this is a normal register reference.
-    if (getLexer().isNot(AsmToken::Colon))
-      return X86Operand::CreateReg(RegNo, Start, End);
-
-    if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
-      return ErrorOperand(Start, "invalid segment register");
-
-    getParser().Lex(); // Eat the colon.
-    return ParseMemOperand(RegNo, Start);
-  }
   case AsmToken::Dollar: {
-    // $42 -> immediate.
+    // $42 or $ID -> immediate.
     SMLoc Start = Parser.getTok().getLoc(), End;
     Parser.Lex();
     const MCExpr *Val;
-    if (getParser().parseExpression(Val, End))
+    // This is an immediate, so we should not parse a register. Do a precheck
+    // for '%' to supercede intra-register parse errors.
+    SMLoc L = Parser.getTok().getLoc();
+    if (check(getLexer().is(AsmToken::Percent), L,
+              "expected immediate expression") ||
+        getParser().parseExpression(Val, End) ||
+        check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
       return nullptr;
     return X86Operand::CreateImm(Val, Start, End);
   }
-  case AsmToken::LCurly:{
+  case AsmToken::LCurly: {
     SMLoc Start = Parser.getTok().getLoc();
     return ParseRoundingModeOp(Start);
   }
+  default: {
+    // This a memory operand or a register. We have some parsing complications
+    // as a '(' may be part of an immediate expression or the addressing mode
+    // block. This is complicated by the fact that an assembler-level variable
+    // may refer either to a register or an immediate expression.
+
+    SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
+    const MCExpr *Expr = nullptr;
+    unsigned Reg = 0;
+    if (getLexer().isNot(AsmToken::LParen)) {
+      // No '(' so this is either a displacement expression or a register.
+      if (Parser.parseExpression(Expr, EndLoc))
+        return nullptr;
+      if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
+        // Segment Register. Reset Expr and copy value to register.
+        Expr = nullptr;
+        Reg = RE->getRegNo();
+
+        // Sanity check register.
+        if (Reg == X86::EIZ || Reg == X86::RIZ)
+          return ErrorOperand(
+              Loc, "%eiz and %riz can only be used as index registers",
+              SMRange(Loc, EndLoc));
+        if (Reg == X86::RIP)
+          return ErrorOperand(Loc, "%rip can only be used as a base register",
+                              SMRange(Loc, EndLoc));
+        // Return register that are not segment prefixes immediately.
+        if (!Parser.parseOptionalToken(AsmToken::Colon))
+          return X86Operand::CreateReg(Reg, Loc, EndLoc);
+        if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
+          return ErrorOperand(Loc, "invalid segment register");
+      }
+    }
+    // This is a Memory operand.
+    return ParseMemOperand(Reg, Expr, Loc, EndLoc);
+  }
   }
 }
 
@@ -2086,199 +2104,201 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
   return false;
 }
 
-/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
-/// has already been parsed if present.
+/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'.  The '%ds:' prefix
+/// has already been parsed if present. disp may be provided as well.
 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
-                                                          SMLoc MemStart) {
-
+                                                          const MCExpr *&Disp,
+                                                          const SMLoc &StartLoc,
+                                                          SMLoc &EndLoc) {
   MCAsmParser &Parser = getParser();
-  // We have to disambiguate a parenthesized expression "(4+5)" from the start
-  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
-  // only way to do this without lookahead is to eat the '(' and see what is
-  // after it.
-  const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
-  if (getLexer().isNot(AsmToken::LParen)) {
-    SMLoc ExprEnd;
-    if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
-    // Disp may be a variable, handle register values.
-    if (auto *RE = dyn_cast<X86MCExpr>(Disp))
-      return X86Operand::CreateReg(RE->getRegNo(), MemStart, ExprEnd);
-
-    // After parsing the base expression we could either have a parenthesized
-    // memory address or not.  If not, return now.  If so, eat the (.
-    if (getLexer().isNot(AsmToken::LParen)) {
-      // Unless we have a segment register, treat this as an immediate.
-      if (SegReg == 0)
-        return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
-      return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
-                                   MemStart, ExprEnd);
+  SMLoc Loc;
+  // Based on the initial passed values, we may be in any of these cases, we are
+  // in one of these cases (with current position (*)):
+
+  //   1. seg : * disp  (base-index-scale-expr)
+  //   2. seg : *(disp) (base-index-scale-expr)
+  //   3. seg :       *(base-index-scale-expr)
+  //   4.        disp  *(base-index-scale-expr)
+  //   5.      *(disp)  (base-index-scale-expr)
+  //   6.             *(base-index-scale-expr)
+  //   7.  disp *
+  //   8. *(disp)
+
+  // If we do not have an displacement yet, check if we're in cases 4 or 6 by
+  // checking if the first object after the parenthesis is a register (or an
+  // identifier referring to a register) and parse the displacement or default
+  // to 0 as appropriate.
+  auto isAtMemOperand = [this]() {
+    if (this->getLexer().isNot(AsmToken::LParen))
+      return false;
+    AsmToken Buf[2];
+    StringRef Id;
+    auto TokCount = this->getLexer().peekTokens(Buf, true);
+    if (TokCount == 0)
+      return false;
+    switch (Buf[0].getKind()) {
+    case AsmToken::Percent:
+    case AsmToken::Comma:
+      return true;
+    // These lower cases are doing a peekIdentifier.
+    case AsmToken::At:
+    case AsmToken::Dollar:
+      if ((TokCount > 1) &&
+          (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
+          (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
+        Id = StringRef(Buf[0].getLoc().getPointer(),
+                       Buf[1].getIdentifier().size() + 1);
+      break;
+    case AsmToken::Identifier:
+    case AsmToken::String:
+      Id = Buf[0].getIdentifier();
+      break;
+    default:
+      return false;
     }
+    // We have an ID. Check if it is bound to a register.
+    if (!Id.empty()) {
+      MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
+      if (Sym->isVariable()) {
+        auto V = Sym->getVariableValue(/*SetUsed*/ false);
+        return isa<X86MCExpr>(V);
+      }
+    }
+    return false;
+  };
 
-    // Eat the '('.
-    Parser.Lex();
-  } else {
-    // Okay, we have a '('.  We don't know if this is an expression or not, but
-    // so we have to eat the ( to see beyond it.
-    SMLoc LParenLoc = Parser.getTok().getLoc();
-    Parser.Lex(); // Eat the '('.
-
-    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
-      // Nothing to do here, fall into the code below with the '(' part of the
-      // memory operand consumed.
-    } else {
-      SMLoc ExprEnd;
-      getLexer().UnLex(AsmToken(AsmToken::LParen, "("));
-
-      // It must be either an parenthesized expression, or an expression that
-      // begins from a parenthesized expression, parse it now. Example: (1+2) or
-      // (1+2)+3
-      if (getParser().parseExpression(Disp, ExprEnd))
+  if (!Disp) {
+    // Parse immediate if we're not at a mem operand yet.
+    if (!isAtMemOperand()) {
+      if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
         return nullptr;
+      assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
+    } else {
+      // Disp is implicitly zero if we haven't parsed it yet.
+      Disp = MCConstantExpr::create(0, Parser.getContext());
+    }
+  }
 
-      // After parsing the base expression we could either have a parenthesized
-      // memory address or not.  If not, return now.  If so, eat the (.
-      if (getLexer().isNot(AsmToken::LParen)) {
-        // Unless we have a segment register, treat this as an immediate.
-        if (SegReg == 0)
-          return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
-                                       ExprEnd);
-        return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
-                                     MemStart, ExprEnd);
-      }
+  // We are now either at the end of the operand or at the '(' at the start of a
+  // base-index-scale-expr.
 
-      // Eat the '('.
-      Parser.Lex();
-    }
+  if (!parseOptionalToken(AsmToken::LParen)) {
+    if (SegReg == 0)
+      return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
+    return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
+                                 StartLoc, EndLoc);
   }
 
-  // If we reached here, then we just ate the ( of the memory operand.  Process
+  // If we reached here, then eat the '(' and Process
   // the rest of the memory operand.
   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
-  SMLoc IndexLoc, BaseLoc;
-
-  if (getLexer().is(AsmToken::Percent)) {
-    SMLoc StartLoc, EndLoc;
-    BaseLoc = Parser.getTok().getLoc();
-    if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
-    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
-      Error(StartLoc, "eiz and riz can only be used as index registers",
-            SMRange(StartLoc, EndLoc));
+  SMLoc BaseLoc = getLexer().getLoc();
+  const MCExpr *E;
+  StringRef ErrMsg;
+
+  // Parse BaseReg if one is provided.
+  if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
+    if (Parser.parseExpression(E, EndLoc) ||
+        check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
       return nullptr;
-    }
-  }
 
-  if (getLexer().is(AsmToken::Comma)) {
-    Parser.Lex(); // Eat the comma.
-    IndexLoc = Parser.getTok().getLoc();
+    // Sanity check register.
+    BaseReg = cast<X86MCExpr>(E)->getRegNo();
+    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
+      return ErrorOperand(BaseLoc,
+                          "eiz and riz can only be used as index registers",
+                          SMRange(BaseLoc, EndLoc));
+  }
 
+  if (parseOptionalToken(AsmToken::Comma)) {
     // Following the comma we should have either an index register, or a scale
     // value. We don't support the later form, but we want to parse it
     // correctly.
     //
-    // Not that even though it would be completely consistent to support syntax
-    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
-    if (getLexer().is(AsmToken::Percent)) {
-      SMLoc L;
-      if (ParseRegister(IndexReg, L, L))
+    // Even though it would be completely consistent to support syntax like
+    // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+    if (getLexer().isNot(AsmToken::RParen)) {
+      if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
         return nullptr;
-      if (BaseReg == X86::RIP) {
-        Error(IndexLoc, "%rip as base register can not have an index register");
-        return nullptr;
-      }
-      if (IndexReg == X86::RIP) {
-        Error(IndexLoc, "%rip is not allowed as an index register");
-        return nullptr;
-      }
-
-      if (getLexer().isNot(AsmToken::RParen)) {
-        // Parse the scale amount:
-        //  ::= ',' [scale-expression]
-        if (parseToken(AsmToken::Comma, "expected comma in scale expression"))
-          return nullptr;
 
-        if (getLexer().isNot(AsmToken::RParen)) {
-          SMLoc Loc = Parser.getTok().getLoc();
-
-          int64_t ScaleVal;
-          if (getParser().parseAbsoluteExpression(ScaleVal)){
-            Error(Loc, "expected scale expression");
-            return nullptr;
-          }
-
-          // Validate the scale amount.
-          if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
-              ScaleVal != 1) {
-            Error(Loc, "scale factor in 16-bit address must be 1");
-            return nullptr;
-          }
-          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
-              ScaleVal != 8) {
-            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
-            return nullptr;
+      if (!isa<X86MCExpr>(E)) {
+        // We've parsed an unexpected Scale Value instead of an index
+        // register. Interpret it as an absolute.
+        int64_t ScaleVal;
+        if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
+          return ErrorOperand(Loc, "expected absolute expression");
+        if (ScaleVal != 1)
+          Warning(Loc, "scale factor without index register is ignored");
+        Scale = 1;
+      } else { // IndexReg Found.
+        IndexReg = cast<X86MCExpr>(E)->getRegNo();
+
+        if (BaseReg == X86::RIP)
+          return ErrorOperand(
+              Loc, "%rip as base register can not have an index register");
+        if (IndexReg == X86::RIP)
+          return ErrorOperand(Loc, "%rip is not allowed as an index register");
+
+        if (parseOptionalToken(AsmToken::Comma)) {
+          // Parse the scale amount:
+          //  ::= ',' [scale-expression]
+
+          // A scale amount without an index is ignored.
+          if (getLexer().isNot(AsmToken::RParen)) {
+            int64_t ScaleVal;
+            if (Parser.parseTokenLoc(Loc) ||
+                Parser.parseAbsoluteExpression(ScaleVal))
+              return ErrorOperand(Loc, "expected scale expression");
+            Scale = (unsigned)ScaleVal;
+            // Validate the scale amount.
+            if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
+                Scale != 1)
+              return ErrorOperand(Loc,
+                                  "scale factor in 16-bit address must be 1");
+            if (checkScale(Scale, ErrMsg))
+              return ErrorOperand(Loc, ErrMsg);
           }
-          Scale = (unsigned)ScaleVal;
         }
       }
-    } else if (getLexer().isNot(AsmToken::RParen)) {
-      // A scale amount without an index is ignored.
-      // index.
-      SMLoc Loc = Parser.getTok().getLoc();
-
-      int64_t Value;
-      if (getParser().parseAbsoluteExpression(Value))
-        return nullptr;
-
-      if (Value != 1)
-        Warning(Loc, "scale factor without index register is ignored");
-      Scale = 1;
     }
   }
 
   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
-  SMLoc MemEnd = Parser.getTok().getEndLoc();
   if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
     return nullptr;
 
-  // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
-  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
-  // documented form in various unofficial manuals, so a lot of code uses it.
-  if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 &&
-      SegReg == 0 && isa<MCConstantExpr>(Disp) &&
-      cast<MCConstantExpr>(Disp)->getValue() == 0)
+  // This is to support otherwise illegal operand (%dx) found in various
+  // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
+  // be supported. Mark such DX variants separately fix only in special cases.
+  if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
+      isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
     return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
 
-  StringRef ErrMsg;
   if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
-                                      ErrMsg)) {
-    Error(BaseLoc, ErrMsg);
-    return nullptr;
-  }
+                                      ErrMsg))
+    return ErrorOperand(BaseLoc, ErrMsg);
 
   if (SegReg || BaseReg || IndexReg)
     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
-                                 IndexReg, Scale, MemStart, MemEnd);
-  return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
+                                 IndexReg, Scale, StartLoc, EndLoc);
+  return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
 }
 
 // Parse either a standard primary expression or a register.
 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   MCAsmParser &Parser = getParser();
-  if (Parser.parsePrimaryExpr(Res, EndLoc)) {
+  // See if this is a register first.
+  if (getTok().is(AsmToken::Percent) ||
+      (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
+       MatchRegisterName(Parser.getTok().getString()))) {
     SMLoc StartLoc = Parser.getTok().getLoc();
-    // Normal Expression parse fails, check if it could be a register.
     unsigned RegNo;
-    bool TryRegParse =
-        getTok().is(AsmToken::Percent) ||
-        (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier));
-    if (!TryRegParse || ParseRegister(RegNo, StartLoc, EndLoc))
+    if (ParseRegister(RegNo, StartLoc, EndLoc))
       return true;
-    // Clear previous parse error and return correct expression.
-    Parser.clearPendingErrors();
     Res = X86MCExpr::create(RegNo, Parser.getContext());
     return false;
   }
-
-  return false;
+  return Parser.parsePrimaryExpr(Res, EndLoc);
 }
 
 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -3282,7 +3302,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal.startswith(".code"))
     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
   else if (IDVal.startswith(".att_syntax")) {
-    getParser().setParsingInlineAsm(false);
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       if (Parser.getTok().getString() == "prefix")
         Parser.Lex();
@@ -3295,7 +3314,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
     return false;
   } else if (IDVal.startswith(".intel_syntax")) {
     getParser().setAssemblerDialect(1);
-    getParser().setParsingInlineAsm(true);
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       if (Parser.getTok().getString() == "noprefix")
         Parser.Lex();
@@ -3315,6 +3333,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
     return parseDirectiveFPOPushReg(DirectiveID.getLoc());
   else if (IDVal == ".cv_fpo_stackalloc")
     return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
+  else if (IDVal == ".cv_fpo_stackalign")
+    return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
   else if (IDVal == ".cv_fpo_endprologue")
     return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
   else if (IDVal == ".cv_fpo_endproc")
@@ -3429,6 +3449,16 @@ bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
   return getTargetStreamer().emitFPOStackAlloc(Offset, L);
 }
 
+// .cv_fpo_stackalign 8
+bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+  int64_t Offset;
+  if (Parser.parseIntToken(Offset, "expected offset") ||
+      Parser.parseEOL("unexpected tokens"))
+    return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
+  return getTargetStreamer().emitFPOStackAlign(Offset, L);
+}
+
 // .cv_fpo_endprologue
 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
   MCAsmParser &Parser = getParser();
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 1ac304f3be03..54d550b60652 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -983,45 +983,18 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
       insn->opcode == 0xE3)
     attrMask ^= ATTR_ADSIZE;
 
-  /*
-   * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
-   * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
-   */
-
-  if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {
-    switch (insn->opcode) {
-    case 0xE8:
-    case 0xE9:
-      // Take care of psubsb and other mmx instructions.
-      if (insn->opcodeType == ONEBYTE) {
-        attrMask ^= ATTR_OPSIZE;
-        insn->immediateSize = 4;
-        insn->displacementSize = 4;
-      }
-      break;
-    case 0x82:
-    case 0x83:
-    case 0x84:
-    case 0x85:
-    case 0x86:
-    case 0x87:
-    case 0x88:
-    case 0x89:
-    case 0x8A:
-    case 0x8B:
-    case 0x8C:
-    case 0x8D:
-    case 0x8E:
-    case 0x8F:
-      // Take care of lea and three byte ops.
-      if (insn->opcodeType == TWOBYTE) {
-        attrMask ^= ATTR_OPSIZE;
-        insn->immediateSize = 4;
-        insn->displacementSize = 4;
-      }
-      break;
-    }
-  }
+  // If we're in 16-bit mode and this is one of the relative jumps and opsize
+  // prefix isn't present, we need to force the opsize attribute since the
+  // prefix is inverted relative to 32-bit mode.
+  if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
+      insn->opcodeType == ONEBYTE &&
+      (insn->opcode == 0xE8 || insn->opcode == 0xE9))
+    attrMask |= ATTR_OPSIZE;
+
+  if (insn->mode == MODE_16BIT && !insn->hasOpSize &&
+      insn->opcodeType == TWOBYTE &&
+      insn->opcode >= 0x80 && insn->opcode <= 0x8F)
+    attrMask |= ATTR_OPSIZE;
 
   if (getIDWithAttrMask(&instructionID, insn, attrMask))
     return -1;
@@ -1420,7 +1393,7 @@ static int readModRM(struct InternalInstruction* insn) {
       break;
     case 0x1:
       insn->displacementSize = 1;
-      /* FALLTHROUGH */
+      LLVM_FALLTHROUGH;
     case 0x2:
       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
       switch (rm & 7) {
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 0e4c4398e49d..64e6fb9f0375 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -540,7 +540,6 @@ protected:
     unsigned InstrOffset = 0;
     unsigned StackAdjust = 0;
     unsigned StackSize = 0;
-    unsigned PrevStackSize = 0;
     unsigned NumDefCFAOffsets = 0;
 
     for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
@@ -588,7 +587,6 @@ protected:
         //  L0:
         //     .cfi_def_cfa_offset 80
         //
-        PrevStackSize = StackSize;
         StackSize = std::abs(Inst.getOffset()) / StackDivide;
         ++NumDefCFAOffsets;
         break;
@@ -635,16 +633,6 @@ protected:
       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
     } else {
-      // If the amount of the stack allocation is the size of a register, then
-      // we "push" the RAX/EAX register onto the stack instead of adjusting the
-      // stack pointer with a SUB instruction. We don't support the push of the
-      // RAX/EAX register with compact unwind. So we check for that situation
-      // here.
-      if ((NumDefCFAOffsets == SavedRegIdx + 1 &&
-           StackSize - PrevStackSize == 1) ||
-          (Instrs.size() == 1 && NumDefCFAOffsets == 1 && StackSize == 2))
-        return CU::UNWIND_MODE_DWARF;
-
       SubtractInstrIdx += InstrOffset;
       ++StackAdjust;
 
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 497e29fe628e..c85ce9bbd5a4 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -231,6 +231,11 @@ namespace X86II {
     /// to be an absolute symbol in range [0,128), so we can use the @ABS8
     /// symbol modifier.
     MO_ABS8,
+
+    /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the ".refptr.FOO" symbol.  This is used for
+    /// stub symbols on windows.
+    MO_COFFSTUB,
   };
 
   enum : uint64_t {
@@ -261,12 +266,12 @@ namespace X86II {
     RawFrmSrc      = 4,
 
     /// RawFrmDst - This form is for instructions that use the destination index
-    /// register DI/EDI/ESI.
+    /// register DI/EDI/RDI.
     RawFrmDst      = 5,
 
-    /// RawFrmSrc - This form is for instructions that use the source index
-    /// register SI/ESI/ERI with a possible segment override, and also the
-    /// destination index register DI/ESI/RDI.
+    /// RawFrmDstSrc - This form is for instructions that use the source index
+    /// register SI/ESI/RSI with a possible segment override, and also the
+    /// destination index register DI/EDI/RDI.
     RawFrmDstSrc   = 6,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index f1d15e66918b..ea4aaf14223d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
 #include "X86GenRegisterInfo.inc"
 
 #define GET_INSTRINFO_MC_DESC
-#define GET_GENINSTRINFO_MC_HELPERS
+#define GET_INSTRINFO_MC_HELPERS
 #include "X86GenInstrInfo.inc"
 
 #define GET_SUBTARGETINFO_MC_DESC
@@ -81,120 +81,193 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
     codeview::RegisterId CVReg;
     MCPhysReg Reg;
   } RegMap[] = {
-    { codeview::RegisterId::CVRegAL, X86::AL},
-    { codeview::RegisterId::CVRegCL, X86::CL},
-    { codeview::RegisterId::CVRegDL, X86::DL},
-    { codeview::RegisterId::CVRegBL, X86::BL},
-    { codeview::RegisterId::CVRegAH, X86::AH},
-    { codeview::RegisterId::CVRegCH, X86::CH},
-    { codeview::RegisterId::CVRegDH, X86::DH},
-    { codeview::RegisterId::CVRegBH, X86::BH},
-    { codeview::RegisterId::CVRegAX, X86::AX},
-    { codeview::RegisterId::CVRegCX, X86::CX},
-    { codeview::RegisterId::CVRegDX, X86::DX},
-    { codeview::RegisterId::CVRegBX, X86::BX},
-    { codeview::RegisterId::CVRegSP, X86::SP},
-    { codeview::RegisterId::CVRegBP, X86::BP},
-    { codeview::RegisterId::CVRegSI, X86::SI},
-    { codeview::RegisterId::CVRegDI, X86::DI},
-    { codeview::RegisterId::CVRegEAX, X86::EAX},
-    { codeview::RegisterId::CVRegECX, X86::ECX},
-    { codeview::RegisterId::CVRegEDX, X86::EDX},
-    { codeview::RegisterId::CVRegEBX, X86::EBX},
-    { codeview::RegisterId::CVRegESP, X86::ESP},
-    { codeview::RegisterId::CVRegEBP, X86::EBP},
-    { codeview::RegisterId::CVRegESI, X86::ESI},
-    { codeview::RegisterId::CVRegEDI, X86::EDI},
-
-    { codeview::RegisterId::CVRegEFLAGS, X86::EFLAGS},
-
-    { codeview::RegisterId::CVRegST0, X86::FP0},
-    { codeview::RegisterId::CVRegST1, X86::FP1},
-    { codeview::RegisterId::CVRegST2, X86::FP2},
-    { codeview::RegisterId::CVRegST3, X86::FP3},
-    { codeview::RegisterId::CVRegST4, X86::FP4},
-    { codeview::RegisterId::CVRegST5, X86::FP5},
-    { codeview::RegisterId::CVRegST6, X86::FP6},
-    { codeview::RegisterId::CVRegST7, X86::FP7},
-
-    { codeview::RegisterId::CVRegXMM0, X86::XMM0},
-    { codeview::RegisterId::CVRegXMM1, X86::XMM1},
-    { codeview::RegisterId::CVRegXMM2, X86::XMM2},
-    { codeview::RegisterId::CVRegXMM3, X86::XMM3},
-    { codeview::RegisterId::CVRegXMM4, X86::XMM4},
-    { codeview::RegisterId::CVRegXMM5, X86::XMM5},
-    { codeview::RegisterId::CVRegXMM6, X86::XMM6},
-    { codeview::RegisterId::CVRegXMM7, X86::XMM7},
-
-    { codeview::RegisterId::CVRegXMM8, X86::XMM8},
-    { codeview::RegisterId::CVRegXMM9, X86::XMM9},
-    { codeview::RegisterId::CVRegXMM10, X86::XMM10},
-    { codeview::RegisterId::CVRegXMM11, X86::XMM11},
-    { codeview::RegisterId::CVRegXMM12, X86::XMM12},
-    { codeview::RegisterId::CVRegXMM13, X86::XMM13},
-    { codeview::RegisterId::CVRegXMM14, X86::XMM14},
-    { codeview::RegisterId::CVRegXMM15, X86::XMM15},
-
-    { codeview::RegisterId::CVRegSIL, X86::SIL},
-    { codeview::RegisterId::CVRegDIL, X86::DIL},
-    { codeview::RegisterId::CVRegBPL, X86::BPL},
-    { codeview::RegisterId::CVRegSPL, X86::SPL},
-    { codeview::RegisterId::CVRegRAX, X86::RAX},
-    { codeview::RegisterId::CVRegRBX, X86::RBX},
-    { codeview::RegisterId::CVRegRCX, X86::RCX},
-    { codeview::RegisterId::CVRegRDX, X86::RDX},
-    { codeview::RegisterId::CVRegRSI, X86::RSI},
-    { codeview::RegisterId::CVRegRDI, X86::RDI},
-    { codeview::RegisterId::CVRegRBP, X86::RBP},
-    { codeview::RegisterId::CVRegRSP, X86::RSP},
-    { codeview::RegisterId::CVRegR8, X86::R8},
-    { codeview::RegisterId::CVRegR9, X86::R9},
-    { codeview::RegisterId::CVRegR10, X86::R10},
-    { codeview::RegisterId::CVRegR11, X86::R11},
-    { codeview::RegisterId::CVRegR12, X86::R12},
-    { codeview::RegisterId::CVRegR13, X86::R13},
-    { codeview::RegisterId::CVRegR14, X86::R14},
-    { codeview::RegisterId::CVRegR15, X86::R15},
-    { codeview::RegisterId::CVRegR8B, X86::R8B},
-    { codeview::RegisterId::CVRegR9B, X86::R9B},
-    { codeview::RegisterId::CVRegR10B, X86::R10B},
-    { codeview::RegisterId::CVRegR11B, X86::R11B},
-    { codeview::RegisterId::CVRegR12B, X86::R12B},
-    { codeview::RegisterId::CVRegR13B, X86::R13B},
-    { codeview::RegisterId::CVRegR14B, X86::R14B},
-    { codeview::RegisterId::CVRegR15B, X86::R15B},
-    { codeview::RegisterId::CVRegR8W, X86::R8W},
-    { codeview::RegisterId::CVRegR9W, X86::R9W},
-    { codeview::RegisterId::CVRegR10W, X86::R10W},
-    { codeview::RegisterId::CVRegR11W, X86::R11W},
-    { codeview::RegisterId::CVRegR12W, X86::R12W},
-    { codeview::RegisterId::CVRegR13W, X86::R13W},
-    { codeview::RegisterId::CVRegR14W, X86::R14W},
-    { codeview::RegisterId::CVRegR15W, X86::R15W},
-    { codeview::RegisterId::CVRegR8D, X86::R8D},
-    { codeview::RegisterId::CVRegR9D, X86::R9D},
-    { codeview::RegisterId::CVRegR10D, X86::R10D},
-    { codeview::RegisterId::CVRegR11D, X86::R11D},
-    { codeview::RegisterId::CVRegR12D, X86::R12D},
-    { codeview::RegisterId::CVRegR13D, X86::R13D},
-    { codeview::RegisterId::CVRegR14D, X86::R14D},
-    { codeview::RegisterId::CVRegR15D, X86::R15D},
-    { codeview::RegisterId::CVRegAMD64_YMM0, X86::YMM0},
-    { codeview::RegisterId::CVRegAMD64_YMM1, X86::YMM1},
-    { codeview::RegisterId::CVRegAMD64_YMM2, X86::YMM2},
-    { codeview::RegisterId::CVRegAMD64_YMM3, X86::YMM3},
-    { codeview::RegisterId::CVRegAMD64_YMM4, X86::YMM4},
-    { codeview::RegisterId::CVRegAMD64_YMM5, X86::YMM5},
-    { codeview::RegisterId::CVRegAMD64_YMM6, X86::YMM6},
-    { codeview::RegisterId::CVRegAMD64_YMM7, X86::YMM7},
-    { codeview::RegisterId::CVRegAMD64_YMM8, X86::YMM8},
-    { codeview::RegisterId::CVRegAMD64_YMM9, X86::YMM9},
-    { codeview::RegisterId::CVRegAMD64_YMM10, X86::YMM10},
-    { codeview::RegisterId::CVRegAMD64_YMM11, X86::YMM11},
-    { codeview::RegisterId::CVRegAMD64_YMM12, X86::YMM12},
-    { codeview::RegisterId::CVRegAMD64_YMM13, X86::YMM13},
-    { codeview::RegisterId::CVRegAMD64_YMM14, X86::YMM14},
-    { codeview::RegisterId::CVRegAMD64_YMM15, X86::YMM15},
+      {codeview::RegisterId::AL, X86::AL},
+      {codeview::RegisterId::CL, X86::CL},
+      {codeview::RegisterId::DL, X86::DL},
+      {codeview::RegisterId::BL, X86::BL},
+      {codeview::RegisterId::AH, X86::AH},
+      {codeview::RegisterId::CH, X86::CH},
+      {codeview::RegisterId::DH, X86::DH},
+      {codeview::RegisterId::BH, X86::BH},
+      {codeview::RegisterId::AX, X86::AX},
+      {codeview::RegisterId::CX, X86::CX},
+      {codeview::RegisterId::DX, X86::DX},
+      {codeview::RegisterId::BX, X86::BX},
+      {codeview::RegisterId::SP, X86::SP},
+      {codeview::RegisterId::BP, X86::BP},
+      {codeview::RegisterId::SI, X86::SI},
+      {codeview::RegisterId::DI, X86::DI},
+      {codeview::RegisterId::EAX, X86::EAX},
+      {codeview::RegisterId::ECX, X86::ECX},
+      {codeview::RegisterId::EDX, X86::EDX},
+      {codeview::RegisterId::EBX, X86::EBX},
+      {codeview::RegisterId::ESP, X86::ESP},
+      {codeview::RegisterId::EBP, X86::EBP},
+      {codeview::RegisterId::ESI, X86::ESI},
+      {codeview::RegisterId::EDI, X86::EDI},
+
+      {codeview::RegisterId::EFLAGS, X86::EFLAGS},
+
+      {codeview::RegisterId::ST0, X86::FP0},
+      {codeview::RegisterId::ST1, X86::FP1},
+      {codeview::RegisterId::ST2, X86::FP2},
+      {codeview::RegisterId::ST3, X86::FP3},
+      {codeview::RegisterId::ST4, X86::FP4},
+      {codeview::RegisterId::ST5, X86::FP5},
+      {codeview::RegisterId::ST6, X86::FP6},
+      {codeview::RegisterId::ST7, X86::FP7},
+
+      {codeview::RegisterId::XMM0, X86::XMM0},
+      {codeview::RegisterId::XMM1, X86::XMM1},
+      {codeview::RegisterId::XMM2, X86::XMM2},
+      {codeview::RegisterId::XMM3, X86::XMM3},
+      {codeview::RegisterId::XMM4, X86::XMM4},
+      {codeview::RegisterId::XMM5, X86::XMM5},
+      {codeview::RegisterId::XMM6, X86::XMM6},
+      {codeview::RegisterId::XMM7, X86::XMM7},
+
+      {codeview::RegisterId::XMM8, X86::XMM8},
+      {codeview::RegisterId::XMM9, X86::XMM9},
+      {codeview::RegisterId::XMM10, X86::XMM10},
+      {codeview::RegisterId::XMM11, X86::XMM11},
+      {codeview::RegisterId::XMM12, X86::XMM12},
+      {codeview::RegisterId::XMM13, X86::XMM13},
+      {codeview::RegisterId::XMM14, X86::XMM14},
+      {codeview::RegisterId::XMM15, X86::XMM15},
+
+      {codeview::RegisterId::SIL, X86::SIL},
+      {codeview::RegisterId::DIL, X86::DIL},
+      {codeview::RegisterId::BPL, X86::BPL},
+      {codeview::RegisterId::SPL, X86::SPL},
+      {codeview::RegisterId::RAX, X86::RAX},
+      {codeview::RegisterId::RBX, X86::RBX},
+      {codeview::RegisterId::RCX, X86::RCX},
+      {codeview::RegisterId::RDX, X86::RDX},
+      {codeview::RegisterId::RSI, X86::RSI},
+      {codeview::RegisterId::RDI, X86::RDI},
+      {codeview::RegisterId::RBP, X86::RBP},
+      {codeview::RegisterId::RSP, X86::RSP},
+      {codeview::RegisterId::R8, X86::R8},
+      {codeview::RegisterId::R9, X86::R9},
+      {codeview::RegisterId::R10, X86::R10},
+      {codeview::RegisterId::R11, X86::R11},
+      {codeview::RegisterId::R12, X86::R12},
+      {codeview::RegisterId::R13, X86::R13},
+      {codeview::RegisterId::R14, X86::R14},
+      {codeview::RegisterId::R15, X86::R15},
+      {codeview::RegisterId::R8B, X86::R8B},
+      {codeview::RegisterId::R9B, X86::R9B},
+      {codeview::RegisterId::R10B, X86::R10B},
+      {codeview::RegisterId::R11B, X86::R11B},
+      {codeview::RegisterId::R12B, X86::R12B},
+      {codeview::RegisterId::R13B, X86::R13B},
+      {codeview::RegisterId::R14B, X86::R14B},
+      {codeview::RegisterId::R15B, X86::R15B},
+      {codeview::RegisterId::R8W, X86::R8W},
+      {codeview::RegisterId::R9W, X86::R9W},
+      {codeview::RegisterId::R10W, X86::R10W},
+      {codeview::RegisterId::R11W, X86::R11W},
+      {codeview::RegisterId::R12W, X86::R12W},
+      {codeview::RegisterId::R13W, X86::R13W},
+      {codeview::RegisterId::R14W, X86::R14W},
+      {codeview::RegisterId::R15W, X86::R15W},
+      {codeview::RegisterId::R8D, X86::R8D},
+      {codeview::RegisterId::R9D, X86::R9D},
+      {codeview::RegisterId::R10D, X86::R10D},
+      {codeview::RegisterId::R11D, X86::R11D},
+      {codeview::RegisterId::R12D, X86::R12D},
+      {codeview::RegisterId::R13D, X86::R13D},
+      {codeview::RegisterId::R14D, X86::R14D},
+      {codeview::RegisterId::R15D, X86::R15D},
+      {codeview::RegisterId::AMD64_YMM0, X86::YMM0},
+      {codeview::RegisterId::AMD64_YMM1, X86::YMM1},
+      {codeview::RegisterId::AMD64_YMM2, X86::YMM2},
+      {codeview::RegisterId::AMD64_YMM3, X86::YMM3},
+      {codeview::RegisterId::AMD64_YMM4, X86::YMM4},
+      {codeview::RegisterId::AMD64_YMM5, X86::YMM5},
+      {codeview::RegisterId::AMD64_YMM6, X86::YMM6},
+      {codeview::RegisterId::AMD64_YMM7, X86::YMM7},
+      {codeview::RegisterId::AMD64_YMM8, X86::YMM8},
+      {codeview::RegisterId::AMD64_YMM9, X86::YMM9},
+      {codeview::RegisterId::AMD64_YMM10, X86::YMM10},
+      {codeview::RegisterId::AMD64_YMM11, X86::YMM11},
+      {codeview::RegisterId::AMD64_YMM12, X86::YMM12},
+      {codeview::RegisterId::AMD64_YMM13, X86::YMM13},
+      {codeview::RegisterId::AMD64_YMM14, X86::YMM14},
+      {codeview::RegisterId::AMD64_YMM15, X86::YMM15},
+      {codeview::RegisterId::AMD64_YMM16, X86::YMM16},
+      {codeview::RegisterId::AMD64_YMM17, X86::YMM17},
+      {codeview::RegisterId::AMD64_YMM18, X86::YMM18},
+      {codeview::RegisterId::AMD64_YMM19, X86::YMM19},
+      {codeview::RegisterId::AMD64_YMM20, X86::YMM20},
+      {codeview::RegisterId::AMD64_YMM21, X86::YMM21},
+      {codeview::RegisterId::AMD64_YMM22, X86::YMM22},
+      {codeview::RegisterId::AMD64_YMM23, X86::YMM23},
+      {codeview::RegisterId::AMD64_YMM24, X86::YMM24},
+      {codeview::RegisterId::AMD64_YMM25, X86::YMM25},
+      {codeview::RegisterId::AMD64_YMM26, X86::YMM26},
+      {codeview::RegisterId::AMD64_YMM27, X86::YMM27},
+      {codeview::RegisterId::AMD64_YMM28, X86::YMM28},
+      {codeview::RegisterId::AMD64_YMM29, X86::YMM29},
+      {codeview::RegisterId::AMD64_YMM30, X86::YMM30},
+      {codeview::RegisterId::AMD64_YMM31, X86::YMM31},
+      {codeview::RegisterId::AMD64_ZMM0, X86::ZMM0},
+      {codeview::RegisterId::AMD64_ZMM1, X86::ZMM1},
+      {codeview::RegisterId::AMD64_ZMM2, X86::ZMM2},
+      {codeview::RegisterId::AMD64_ZMM3, X86::ZMM3},
+      {codeview::RegisterId::AMD64_ZMM4, X86::ZMM4},
+      {codeview::RegisterId::AMD64_ZMM5, X86::ZMM5},
+      {codeview::RegisterId::AMD64_ZMM6, X86::ZMM6},
+      {codeview::RegisterId::AMD64_ZMM7, X86::ZMM7},
+      {codeview::RegisterId::AMD64_ZMM8, X86::ZMM8},
+      {codeview::RegisterId::AMD64_ZMM9, X86::ZMM9},
+      {codeview::RegisterId::AMD64_ZMM10, X86::ZMM10},
+      {codeview::RegisterId::AMD64_ZMM11, X86::ZMM11},
+      {codeview::RegisterId::AMD64_ZMM12, X86::ZMM12},
+      {codeview::RegisterId::AMD64_ZMM13, X86::ZMM13},
+      {codeview::RegisterId::AMD64_ZMM14, X86::ZMM14},
+      {codeview::RegisterId::AMD64_ZMM15, X86::ZMM15},
+      {codeview::RegisterId::AMD64_ZMM16, X86::ZMM16},
+      {codeview::RegisterId::AMD64_ZMM17, X86::ZMM17},
+      {codeview::RegisterId::AMD64_ZMM18, X86::ZMM18},
+      {codeview::RegisterId::AMD64_ZMM19, X86::ZMM19},
+      {codeview::RegisterId::AMD64_ZMM20, X86::ZMM20},
+      {codeview::RegisterId::AMD64_ZMM21, X86::ZMM21},
+      {codeview::RegisterId::AMD64_ZMM22, X86::ZMM22},
+      {codeview::RegisterId::AMD64_ZMM23, X86::ZMM23},
+      {codeview::RegisterId::AMD64_ZMM24, X86::ZMM24},
+      {codeview::RegisterId::AMD64_ZMM25, X86::ZMM25},
+      {codeview::RegisterId::AMD64_ZMM26, X86::ZMM26},
+      {codeview::RegisterId::AMD64_ZMM27, X86::ZMM27},
+      {codeview::RegisterId::AMD64_ZMM28, X86::ZMM28},
+      {codeview::RegisterId::AMD64_ZMM29, X86::ZMM29},
+      {codeview::RegisterId::AMD64_ZMM30, X86::ZMM30},
+      {codeview::RegisterId::AMD64_ZMM31, X86::ZMM31},
+      {codeview::RegisterId::AMD64_K0, X86::K0},
+      {codeview::RegisterId::AMD64_K1, X86::K1},
+      {codeview::RegisterId::AMD64_K2, X86::K2},
+      {codeview::RegisterId::AMD64_K3, X86::K3},
+      {codeview::RegisterId::AMD64_K4, X86::K4},
+      {codeview::RegisterId::AMD64_K5, X86::K5},
+      {codeview::RegisterId::AMD64_K6, X86::K6},
+      {codeview::RegisterId::AMD64_K7, X86::K7},
+      {codeview::RegisterId::AMD64_XMM16, X86::XMM16},
+      {codeview::RegisterId::AMD64_XMM17, X86::XMM17},
+      {codeview::RegisterId::AMD64_XMM18, X86::XMM18},
+      {codeview::RegisterId::AMD64_XMM19, X86::XMM19},
+      {codeview::RegisterId::AMD64_XMM20, X86::XMM20},
+      {codeview::RegisterId::AMD64_XMM21, X86::XMM21},
+      {codeview::RegisterId::AMD64_XMM22, X86::XMM22},
+      {codeview::RegisterId::AMD64_XMM23, X86::XMM23},
+      {codeview::RegisterId::AMD64_XMM24, X86::XMM24},
+      {codeview::RegisterId::AMD64_XMM25, X86::XMM25},
+      {codeview::RegisterId::AMD64_XMM26, X86::XMM26},
+      {codeview::RegisterId::AMD64_XMM27, X86::XMM27},
+      {codeview::RegisterId::AMD64_XMM28, X86::XMM28},
+      {codeview::RegisterId::AMD64_XMM29, X86::XMM29},
+      {codeview::RegisterId::AMD64_XMM30, X86::XMM30},
+      {codeview::RegisterId::AMD64_XMM31, X86::XMM31},
+
   };
   for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
     MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
@@ -307,83 +380,19 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
 public:
   X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {}
 
-  bool isDependencyBreaking(const MCSubtargetInfo &STI,
-                            const MCInst &Inst) const override;
+#define GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
+
   bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
                             APInt &Mask) const override;
+  std::vector<std::pair<uint64_t, uint64_t>>
+  findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+                 uint64_t GotSectionVA,
+                 const Triple &TargetTriple) const override;
 };
 
-bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI,
-                                              const MCInst &Inst) const {
-  if (STI.getCPU() == "btver2") {
-    // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and
-    // Jaguar pipeline", subsection 8 "Dependency-breaking instructions".
-    switch (Inst.getOpcode()) {
-    default:
-      return false;
-    case X86::SUB32rr:
-    case X86::SUB64rr:
-    case X86::SBB32rr:
-    case X86::SBB64rr:
-    case X86::XOR32rr:
-    case X86::XOR64rr:
-    case X86::XORPSrr:
-    case X86::XORPDrr:
-    case X86::VXORPSrr:
-    case X86::VXORPDrr:
-    case X86::ANDNPSrr:
-    case X86::VANDNPSrr:
-    case X86::ANDNPDrr:
-    case X86::VANDNPDrr:
-    case X86::PXORrr:
-    case X86::VPXORrr:
-    case X86::PANDNrr:
-    case X86::VPANDNrr:
-    case X86::PSUBBrr:
-    case X86::PSUBWrr:
-    case X86::PSUBDrr:
-    case X86::PSUBQrr:
-    case X86::VPSUBBrr:
-    case X86::VPSUBWrr:
-    case X86::VPSUBDrr:
-    case X86::VPSUBQrr:
-    case X86::PCMPEQBrr:
-    case X86::PCMPEQWrr:
-    case X86::PCMPEQDrr:
-    case X86::PCMPEQQrr:
-    case X86::VPCMPEQBrr:
-    case X86::VPCMPEQWrr:
-    case X86::VPCMPEQDrr:
-    case X86::VPCMPEQQrr:
-    case X86::PCMPGTBrr:
-    case X86::PCMPGTWrr:
-    case X86::PCMPGTDrr:
-    case X86::PCMPGTQrr:
-    case X86::VPCMPGTBrr:
-    case X86::VPCMPGTWrr:
-    case X86::VPCMPGTDrr:
-    case X86::VPCMPGTQrr:
-    case X86::MMX_PXORirr:
-    case X86::MMX_PANDNirr:
-    case X86::MMX_PSUBBirr:
-    case X86::MMX_PSUBDirr:
-    case X86::MMX_PSUBQirr:
-    case X86::MMX_PSUBWirr:
-    case X86::MMX_PCMPGTBirr:
-    case X86::MMX_PCMPGTDirr:
-    case X86::MMX_PCMPGTWirr:
-    case X86::MMX_PCMPEQBirr:
-    case X86::MMX_PCMPEQDirr:
-    case X86::MMX_PCMPEQWirr:
-      return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg();
-    case X86::CMP32rr:
-    case X86::CMP64rr:
-      return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg();
-    }
-  }
-
-  return false;
-}
+#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
+#include "X86GenSubtargetInfo.inc"
 
 bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
                                               const MCInst &Inst,
@@ -437,6 +446,64 @@ bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
   return Mask.getBoolValue();
 }
 
+static std::vector<std::pair<uint64_t, uint64_t>>
+findX86PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+                  uint64_t GotPltSectionVA) {
+  // Do a lightweight parsing of PLT entries.
+  std::vector<std::pair<uint64_t, uint64_t>> Result;
+  for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
+    // Recognize a jmp.
+    if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0xa3) {
+      // The jmp instruction at the beginning of each PLT entry jumps to the
+      // address of the base of the .got.plt section plus the immediate.
+      uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+      Result.push_back(
+          std::make_pair(PltSectionVA + Byte, GotPltSectionVA + Imm));
+      Byte += 6;
+    } else if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
+      // The jmp instruction at the beginning of each PLT entry jumps to the
+      // immediate.
+      uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+      Result.push_back(std::make_pair(PltSectionVA + Byte, Imm));
+      Byte += 6;
+    } else
+      Byte++;
+  }
+  return Result;
+}
+
+static std::vector<std::pair<uint64_t, uint64_t>>
+findX86_64PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents) {
+  // Do a lightweight parsing of PLT entries.
+  std::vector<std::pair<uint64_t, uint64_t>> Result;
+  for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
+    // Recognize a jmp.
+    if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
+      // The jmp instruction at the beginning of each PLT entry jumps to the
+      // address of the next instruction plus the immediate.
+      uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
+      Result.push_back(
+          std::make_pair(PltSectionVA + Byte, PltSectionVA + Byte + 6 + Imm));
+      Byte += 6;
+    } else
+      Byte++;
+  }
+  return Result;
+}
+
+std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
+    uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
+    uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+  switch (TargetTriple.getArch()) {
+    case Triple::x86:
+      return findX86PltEntries(PltSectionVA, PltContents, GotPltSectionVA);
+    case Triple::x86_64:
+      return findX86_64PltEntries(PltSectionVA, PltContents);
+    default:
+      return {};
+  }
+}
+
 } // end of namespace X86_MC
 
 } // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 595c26d31e3f..4e9f5ba60d2e 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -134,7 +134,7 @@ unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
 // Defines symbolic names for the X86 instructions.
 //
 #define GET_INSTRINFO_ENUM
-#define GET_GENINSTRINFO_MC_DECL
+#define GET_INSTRINFO_MC_HELPER_DECLS
 #include "X86GenInstrInfo.inc"
 
 #define GET_SUBTARGETINFO_ENUM
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
index 8d38cd32b82c..10a282dd2962 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
@@ -26,6 +26,7 @@ public:
   virtual bool emitFPOData(const MCSymbol *ProcSym, SMLoc L = {}) = 0;
   virtual bool emitFPOPushReg(unsigned Reg, SMLoc L = {}) = 0;
   virtual bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L = {}) = 0;
+  virtual bool emitFPOStackAlign(unsigned Align, SMLoc L = {}) = 0;
   virtual bool emitFPOSetFrame(unsigned Reg, SMLoc L = {}) = 0;
 };
 
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index a5e115e5ff4d..2aec695b2dbf 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -79,7 +79,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
     case FK_SecRel_4:
       return COFF::IMAGE_REL_AMD64_SECREL;
     default:
-      llvm_unreachable("unsupported relocation type");
+      Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+      return COFF::IMAGE_REL_AMD64_ADDR32;
     }
   } else if (getMachine() == COFF::IMAGE_FILE_MACHINE_I386) {
     switch (FixupKind) {
@@ -100,7 +101,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
     case FK_SecRel_4:
       return COFF::IMAGE_REL_I386_SECREL;
     default:
-      llvm_unreachable("unsupported relocation type");
+      Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+      return COFF::IMAGE_REL_I386_DIR32;
     }
   } else
     llvm_unreachable("Unsupported COFF machine type.");
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index 093dab4f2f96..bee9b7046338 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -38,6 +38,7 @@ public:
   bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override;
   bool emitFPOPushReg(unsigned Reg, SMLoc L) override;
   bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override;
+  bool emitFPOStackAlign(unsigned Align, SMLoc L) override;
   bool emitFPOSetFrame(unsigned Reg, SMLoc L) override;
 };
 
@@ -47,6 +48,7 @@ struct FPOInstruction {
   enum Operation {
     PushReg,
     StackAlloc,
+    StackAlign,
     SetFrame,
   } Op;
   unsigned RegOrOffset;
@@ -90,6 +92,7 @@ public:
   bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override;
   bool emitFPOPushReg(unsigned Reg, SMLoc L) override;
   bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override;
+  bool emitFPOStackAlign(unsigned Align, SMLoc L) override;
   bool emitFPOSetFrame(unsigned Reg, SMLoc L) override;
 };
 } // end namespace
@@ -133,6 +136,11 @@ bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc,
   return false;
 }
 
+bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
+  OS << "\t.cv_fpo_stackalign\t" << Align << '\n';
+  return false;
+}
+
 bool X86WinCOFFAsmTargetStreamer::emitFPOSetFrame(unsigned Reg, SMLoc L) {
   OS << "\t.cv_fpo_setframe\t";
   InstPrinter.printRegName(OS, Reg);
@@ -226,6 +234,24 @@ bool X86WinCOFFTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) {
   return false;
 }
 
+bool X86WinCOFFTargetStreamer::emitFPOStackAlign(unsigned Align, SMLoc L) {
+  if (checkInFPOPrologue(L))
+    return true;
+  if (!llvm::any_of(CurFPOData->Instructions, [](const FPOInstruction &Inst) {
+        return Inst.Op == FPOInstruction::SetFrame;
+      })) {
+    getContext().reportError(
+        L, "a frame register must be established before aligning the stack");
+    return true;
+  }
+  FPOInstruction Inst;
+  Inst.Label = emitFPOLabel();
+  Inst.Op = FPOInstruction::StackAlign;
+  Inst.RegOrOffset = Align;
+  CurFPOData->Instructions.push_back(Inst);
+  return false;
+}
+
 bool X86WinCOFFTargetStreamer::emitFPOEndPrologue(SMLoc L) {
   if (checkInFPOPrologue(L))
     return true;
@@ -250,6 +276,8 @@ struct FPOStateMachine {
   unsigned CurOffset = 0;
   unsigned LocalSize = 0;
   unsigned SavedRegSize = 0;
+  unsigned StackOffsetBeforeAlign = 0;
+  unsigned StackAlign = 0;
   unsigned Flags = 0; // FIXME: Set HasSEH / HasEH.
 
   SmallString<128> FrameFunc;
@@ -291,24 +319,39 @@ void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) {
   FrameFunc.clear();
   raw_svector_ostream FuncOS(FrameFunc);
   const MCRegisterInfo *MRI = OS.getContext().getRegisterInfo();
+  assert((StackAlign == 0 || FrameReg != 0) &&
+         "cannot align stack without frame reg");
+  StringRef CFAVar = StackAlign == 0 ? "$T0" : "$T1";
+
   if (FrameReg) {
     // CFA is FrameReg + FrameRegOff.
-    FuncOS << "$T0 " << printFPOReg(MRI, FrameReg) << " " << FrameRegOff
+    FuncOS << CFAVar << ' ' << printFPOReg(MRI, FrameReg) << ' ' << FrameRegOff
            << " + = ";
+
+    // Assign $T0, the VFRAME register, the value of ESP after it is aligned.
+    // Starting from the CFA, we subtract the size of all pushed registers, and
+    // align the result. While we don't store any CSRs in this area, $T0 is used
+    // by S_DEFRANGE_FRAMEPOINTER_REL records to find local variables.
+    if (StackAlign) {
+      FuncOS << "$T0 " << CFAVar << ' ' << StackOffsetBeforeAlign << " - "
+             << StackAlign << " @ = ";
+    }
   } else {
     // The address of return address is ESP + CurOffset, but we use .raSearch to
     // match MSVC. This seems to ask the debugger to subtract some combination
     // of LocalSize and SavedRegSize from ESP and grovel around in that memory
     // to find the address of a plausible return address.
-    FuncOS << "$T0 .raSearch = ";
+    FuncOS << CFAVar << " .raSearch = ";
   }
 
   // Caller's $eip should be dereferenced CFA, and $esp should be CFA plus 4.
-  FuncOS << "$eip $T0 ^ = $esp $T0 4 + = ";
+  FuncOS << "$eip " << CFAVar << " ^ = ";
+  FuncOS << "$esp " << CFAVar << " 4 + = ";
 
   // Each saved register is stored at an unchanging negative CFA offset.
   for (RegSaveOffset RO : RegSaveOffsets)
-    FuncOS << printFPOReg(MRI, RO.Reg) << " $T0 " << RO.Offset << " - ^ = ";
+    FuncOS << printFPOReg(MRI, RO.Reg) << ' ' << CFAVar << ' ' << RO.Offset
+           << " - ^ = ";
 
   // Add it to the CV string table.
   CodeViewContext &CVCtx = OS.getContext().getCVContext();
@@ -380,6 +423,10 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
       FSM.FrameReg = Inst.RegOrOffset;
       FSM.FrameRegOff = FSM.CurOffset;
       break;
+    case FPOInstruction::StackAlign:
+      FSM.StackOffsetBeforeAlign = FSM.CurOffset;
+      FSM.StackAlign = Inst.RegOrOffset;
+      break;
     case FPOInstruction::StackAlloc:
       FSM.CurOffset += Inst.RegOrOffset;
       FSM.LocalSize += Inst.RegOrOffset;
diff --git a/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp b/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
index 9a39455f9dd5..ab2cebcb58ee 100644
--- a/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
+++ b/contrib/llvm/lib/Target/X86/ShadowCallStack.cpp
@@ -31,10 +31,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-void initializeShadowCallStackPass(PassRegistry &);
-}
-
 namespace {
 
 class ShadowCallStack : public MachineFunctionPass {
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index fe567f4cece8..bed940d0d0e9 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -304,12 +304,12 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
   }
 }
 
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   for (int i = 0, e = RawMask.size(); i < e; ++i) {
     uint64_t M = RawMask[i];
-    if (M == (uint64_t)SM_SentinelUndef) {
-      ShuffleMask.push_back(M);
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
     // For 256/512-bit vectors the base of the shuffle is the 128-bit
@@ -336,7 +336,7 @@ void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
   }
 }
 
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
 
@@ -354,12 +354,12 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
   // 6 - Most significant bit of source byte replicated in all bit positions.
   // 7 - Invert most significant bit of source byte and replicate in all bit positions.
   for (int i = 0, e = RawMask.size(); i < e; ++i) {
-    uint64_t M = RawMask[i];
-    if (M == (uint64_t)SM_SentinelUndef) {
-      ShuffleMask.push_back(M);
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
 
+    uint64_t M = RawMask[i];
     uint64_t PermuteOp = (M >> 5) & 0x7;
     if (PermuteOp == 4) {
       ShuffleMask.push_back(SM_SentinelZero);
@@ -490,7 +490,7 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
 }
 
 void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
-                        ArrayRef<uint64_t> RawMask,
+                        ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                         SmallVectorImpl<int> &ShuffleMask) {
   unsigned VecSize = NumElts * ScalarBits;
   unsigned NumLanes = VecSize / 128;
@@ -500,6 +500,10 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
   assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
 
   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
     uint64_t M = RawMask[i];
     M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
     unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
@@ -508,7 +512,7 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
 }
 
 void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
-                         ArrayRef<uint64_t> RawMask,
+                         ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                          SmallVectorImpl<int> &ShuffleMask) {
   unsigned VecSize = NumElts * ScalarBits;
   unsigned NumLanes = VecSize / 128;
@@ -518,6 +522,11 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
   assert((NumElts == RawMask.size()) && "Unexpected mask size");
 
   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+
     // VPERMIL2 Operation.
     // Bits[3] - Match Bit.
     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
@@ -548,19 +557,29 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
   }
 }
 
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   uint64_t EltMaskSize = RawMask.size() - 1;
-  for (auto M : RawMask) {
+  for (int i = 0, e = RawMask.size(); i != e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+    uint64_t M = RawMask[i];
     M &= EltMaskSize;
     ShuffleMask.push_back((int)M);
   }
 }
 
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
-  for (auto M : RawMask) {
+  for (int i = 0, e = RawMask.size(); i != e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+    uint64_t M = RawMask[i];
     M &= EltMaskSize;
     ShuffleMask.push_back((int)M);
   }
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 6d13bd58a127..85cde14a3241 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
 #define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 
 //===----------------------------------------------------------------------===//
@@ -108,7 +109,7 @@ void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
 
 /// Decode a PSHUFB mask from a raw array of constants such as from
 /// BUILD_VECTOR.
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a BLEND immediate mask into a shuffle mask.
@@ -131,7 +132,7 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
 /// BUILD_VECTOR.
 /// This can only basic masks (permutes + zeros), not any of the other
 /// operations that VPPERM can perform.
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a zero extension instruction as a shuffle mask.
@@ -156,20 +157,20 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
 
 /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
 void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
-                        ArrayRef<uint64_t> RawMask,
+                        ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                         SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
 void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
-                         ArrayRef<uint64_t> RawMask,
+                         ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                          SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 } // llvm namespace
 
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 73bb0f2af285..1c8813815b86 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -75,6 +75,9 @@ FunctionPass *createX86OptimizeLEAs();
 /// Return a pass that transforms setcc + movzx pairs into xor + setcc.
 FunctionPass *createX86FixupSetCC();
 
+/// Return a pass that folds conditional branch jumps.
+FunctionPass *createX86CondBrFolding();
+
 /// Return a pass that avoids creating store forward block issues in the hardware.
 FunctionPass *createX86AvoidStoreForwardingBlocks();
 
@@ -112,8 +115,6 @@ FunctionPass *createX86FixupBWInsts();
 /// to another, when profitable.
 FunctionPass *createX86DomainReassignmentPass();
 
-void initializeFixupBWInstPassPass(PassRegistry &);
-
 /// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
 /// encoding when possible in order to reduce code size.
 FunctionPass *createX86EvexToVexInsts();
@@ -121,14 +122,33 @@ FunctionPass *createX86EvexToVexInsts();
 /// This pass creates the thunks for the retpoline feature.
 FunctionPass *createX86RetpolineThunksPass();
 
+/// This pass ensures instructions featuring a memory operand
+/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
+FunctionPass *createX86DiscriminateMemOpsPass();
+
+/// This pass applies profiling information to insert cache prefetches.
+FunctionPass *createX86InsertPrefetchPass();
+
 InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
                                                   X86Subtarget &,
                                                   X86RegisterBankInfo &);
 
-void initializeEvexToVexInstPassPass(PassRegistry &);
-
 FunctionPass *createX86SpeculativeLoadHardeningPass();
 
+void initializeEvexToVexInstPassPass(PassRegistry &);
+void initializeFixupBWInstPassPass(PassRegistry &);
+void initializeFixupLEAPassPass(PassRegistry &);
+void initializeShadowCallStackPass(PassRegistry &);
+void initializeWinEHStatePassPass(PassRegistry &);
+void initializeX86AvoidSFBPassPass(PassRegistry &);
+void initializeX86CallFrameOptimizationPass(PassRegistry &);
+void initializeX86CmovConverterPassPass(PassRegistry &);
+void initializeX86CondBrFoldingPassPass(PassRegistry &);
+void initializeX86DomainReassignmentPass(PassRegistry &);
+void initializeX86ExecutionDomainFixPass(PassRegistry &);
+void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 63c2dc4da6cc..6b1749fc7500 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -59,10 +59,7 @@ def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
                                        "Support xsaves instructions">;
 
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
-                                      "Enable SSE instructions",
-                                      // SSE codegen depends on cmovs, and all
-                                      // SSE1+ processors support them.
-                                      [FeatureCMOV]>;
+                                      "Enable SSE instructions">;
 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                       "Enable SSE2 instructions",
                                       [FeatureSSE1]>;
@@ -91,17 +88,19 @@ def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
                                       [Feature3DNow]>;
 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
-// without disabling 64-bit mode.
+// without disabling 64-bit mode. Nothing should imply this feature bit. It
+// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
-                                      "Support 64-bit instructions",
-                                      [FeatureCMOV]>;
+                                      "Support 64-bit instructions">;
 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
-                                      "64-bit with cmpxchg16b",
-                                      [Feature64Bit]>;
+                                      "64-bit with cmpxchg16b">;
 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
                                        "SHLD instruction is slow">;
 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
                                         "PMULLD instruction is slow">;
+def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
+                                          "true",
+                                          "PMADDWD is slower than PMULLD">;
 // FIXME: This should not apply to CPUs that do not have SSE.
 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
                                 "IsUAMem16Slow", "true",
@@ -362,17 +361,30 @@ def FeaturePrefer256Bit
     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
                        "Prefer 256-bit AVX instructions">;
 
-// Enable mitigation of some aspects of speculative execution related
-// vulnerabilities by removing speculatable indirect branches. This disables
-// jump-table formation, rewrites explicit `indirectbr` instructions into
-// `switch` instructions, and uses a special construct called a "retpoline" to
-// prevent speculation of the remaining indirect branches (indirect calls and
-// tail calls).
+// Lower indirect calls using a special construct called a `retpoline` to
+// mitigate potential Spectre v2 attacks against them.
+def FeatureRetpolineIndirectCalls
+    : SubtargetFeature<
+          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
+          "Remove speculation of indirect calls from the generated code.">;
+
+// Lower indirect branches and switches either using conditional branch trees
+// or using a special construct called a `retpoline` to mitigate potential
+// Spectre v2 attacks against them.
+def FeatureRetpolineIndirectBranches
+    : SubtargetFeature<
+          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
+          "Remove speculation of indirect branches from the generated code.">;
+
+// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
+// `retpoline-indirect-branches` above.
 def FeatureRetpoline
-    : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
                        "Remove speculation of indirect branches from the "
                        "generated code, either by avoiding them entirely or "
-                       "lowering them with a speculation blocking construct.">;
+                       "lowering them with a speculation blocking construct.",
+                       [FeatureRetpolineIndirectCalls,
+                        FeatureRetpolineIndirectBranches]>;
 
 // Rely on external thunks for the emitted retpoline calls. This allows users
 // to provide their own custom thunk definitions in highly specialized
@@ -380,8 +392,10 @@ def FeatureRetpoline
 def FeatureRetpolineExternalThunk
     : SubtargetFeature<
           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
-          "Enable retpoline, but with an externally provided thunk.",
-          [FeatureRetpoline]>;
+          "When lowering an indirect call or branch using a `retpoline`, rely "
+          "on the specified user provided thunk rather than emitting one "
+          "ourselves. Only has effect when combined with some other retpoline "
+          "feature.", [FeatureRetpolineIndirectCalls]>;
 
 // Direct Move instructions.
 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
@@ -389,6 +403,25 @@ def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
                                         "Support movdir64b instruction">;
 
+def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
+          "Indicates that the BEXTR instruction is implemented as a single uop "
+          "with good throughput.">;
+
+// Combine vector math operations with shuffles into horizontal math
+// instructions if a CPU implements horizontal operations (introduced with
+// SSE3) with better latency/throughput than the alternative sequence.
+def FeatureFastHorizontalOps
+    : SubtargetFeature<
+        "fast-hops", "HasFastHorizontalOps", "true",
+        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
+        "normal vector instructions with shuffles", [FeatureSSE3]>;
+
+// Merge branches using three-way conditional code.
+def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
+                                        "ThreewayBranchProfitable", "true",
+                                        "Merge branches to a three-way "
+                                        "conditional branch">;
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -416,6 +449,7 @@ include "X86SchedHaswell.td"
 include "X86SchedBroadwell.td"
 include "X86ScheduleSLM.td"
 include "X86ScheduleZnver1.td"
+include "X86ScheduleBdVer2.td"
 include "X86ScheduleBtVer2.td"
 include "X86SchedSkylakeClient.td"
 include "X86SchedSkylakeServer.td"
@@ -430,22 +464,6 @@ def ProcIntelGLP  : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
                     "Intel Goldmont Plus processors">;
 def ProcIntelTRM  : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
                     "Intel Tremont processors">;
-def ProcIntelHSW  : SubtargetFeature<"haswell", "X86ProcFamily",
-                    "IntelHaswell", "Intel Haswell processors">;
-def ProcIntelBDW  : SubtargetFeature<"broadwell", "X86ProcFamily",
-                    "IntelBroadwell", "Intel Broadwell processors">;
-def ProcIntelSKL  : SubtargetFeature<"skylake", "X86ProcFamily",
-                    "IntelSkylake", "Intel Skylake processors">;
-def ProcIntelKNL  : SubtargetFeature<"knl", "X86ProcFamily",
-                    "IntelKNL", "Intel Knights Landing processors">;
-def ProcIntelSKX  : SubtargetFeature<"skx", "X86ProcFamily",
-                    "IntelSKX", "Intel Skylake Server processors">;
-def ProcIntelCNL  : SubtargetFeature<"cannonlake", "X86ProcFamily",
-                    "IntelCannonlake", "Intel Cannonlake processors">;
-def ProcIntelICL  : SubtargetFeature<"icelake-client", "X86ProcFamily",
-                    "IntelIcelakeClient", "Intel Icelake processors">;
-def ProcIntelICX  : SubtargetFeature<"icelake-server", "X86ProcFamily",
-                    "IntelIcelakeServer", "Intel Icelake Server processors">;
 
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
@@ -466,7 +484,7 @@ def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 
 foreach P = ["pentium3", "pentium3m"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
-                 FeatureFXSR, FeatureNOPL]>;
+                 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 }
 
 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -481,12 +499,12 @@ foreach P = ["pentium3", "pentium3m"] in {
 
 def : ProcessorModel<"pentium-m", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                      FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+                      FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 
 foreach P = ["pentium4", "pentium4m"] in {
   def : ProcessorModel<P, GenericPostRAModel,
                        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                        FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+                        FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 }
 
 // Intel Quark.
@@ -495,19 +513,21 @@ def : Proc<"lakemont",        []>;
 // Intel Core Duo.
 def : ProcessorModel<"yonah", SandyBridgeModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureNOPL]>;
+                      FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 
 // NetBurst.
 def : ProcessorModel<"prescott", GenericPostRAModel,
                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureNOPL]>;
+                      FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 def : ProcessorModel<"nocona", GenericPostRAModel, [
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE3,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B
 ]>;
 
@@ -515,10 +535,12 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
 def : ProcessorModel<"core2", SandyBridgeModel, [
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSSE3,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureLAHFSAHF,
   FeatureMacroFusion
@@ -526,10 +548,12 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
 def : ProcessorModel<"penryn", SandyBridgeModel, [
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE41,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureLAHFSAHF,
   FeatureMacroFusion
@@ -540,10 +564,12 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
   ProcIntelAtom,
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSSE3,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
   FeatureLEAForSP,
@@ -560,15 +586,16 @@ def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
   ProcIntelSLM,
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
   FeaturePOPCNT,
   FeaturePCLMUL,
-  FeatureAES,
   FeatureSlowDivide64,
   FeatureSlowTwoMemOps,
   FeaturePRFCHW,
@@ -594,10 +621,12 @@ class ProcModel<string Name, SchedMachineModel Model,
 
 def GLMFeatures : ProcessorFeatures<[], [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureMOVBE,
   FeaturePOPCNT,
@@ -653,10 +682,12 @@ def : TremontProc<"tremont">;
 // "Arrandale" along with corei3 and corei5
 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeaturePOPCNT,
   FeatureLAHFSAHF,
@@ -669,13 +700,14 @@ def : NehalemProc<"corei7">;
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE42,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeaturePOPCNT,
-  FeatureAES,
   FeaturePCLMUL,
   FeatureLAHFSAHF,
   FeatureMacroFusion
@@ -686,13 +718,14 @@ def : WestmereProc<"westmere">;
 // rather than a superset.
 def SNBFeatures : ProcessorFeatures<[], [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeaturePOPCNT,
-  FeatureAES,
   FeatureSlowDivide64,
   FeaturePCLMUL,
   FeatureXSAVE,
@@ -702,6 +735,7 @@ def SNBFeatures : ProcessorFeatures<[], [
   FeatureFastScalarFSQRT,
   FeatureFastSHLDRotate,
   FeatureSlowIncDec,
+  FeatureMergeToThreeWayBranch,
   FeatureMacroFusion
 ]>;
 
@@ -741,7 +775,6 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
 
 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
                                            HSWFeatures.Value, [
-  ProcIntelHSW,
   FeaturePOPCNTFalseDeps,
   FeatureLZCNTFalseDeps
 ]>;
@@ -755,15 +788,14 @@ def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
 ]>;
 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
                                              BDWFeatures.Value, [
-  ProcIntelBDW,
   FeaturePOPCNTFalseDeps,
   FeatureLZCNTFalseDeps
 ]>;
 def : BroadwellProc<"broadwell">;
 
 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
+  FeatureAES,
   FeatureMPX,
-  FeatureRTM,
   FeatureXSAVEC,
   FeatureXSAVES,
   FeatureCLFLUSHOPT,
@@ -772,14 +804,32 @@ def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
 
 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
                                                  SKLFeatures.Value, [
-  ProcIntelSKL,
   FeatureHasFastGather,
   FeaturePOPCNTFalseDeps,
   FeatureSGX
 ]>;
 def : SkylakeClientProc<"skylake">;
 
-def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
+def KNLFeatures : ProcessorFeatures<[], [
+  FeatureX87,
+  FeatureCMOV,
+  FeatureMMX,
+  FeatureFXSR,
+  FeatureNOPL,
+  Feature64Bit,
+  FeatureCMPXCHG16B,
+  FeaturePOPCNT,
+  FeatureSlowDivide64,
+  FeaturePCLMUL,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureLAHFSAHF,
+  FeatureSlow3OpsLEA,
+  FeatureSlowIncDec,
+  FeatureAES,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
   FeatureAVX512,
   FeatureERI,
   FeatureCDI,
@@ -798,19 +848,19 @@ def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
 // FIXME: define KNL model
 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
                                                   KNLFeatures.Value, [
-  ProcIntelKNL,
   FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite,
-  FeatureHasFastGather
+  FeatureHasFastGather,
+  FeatureSlowPMADDWD
 ]>;
 def : KnightsLandingProc<"knl">;
 
 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
                                                KNLFeatures.Value, [
-  ProcIntelKNL,
   FeatureSlowTwoMemOps,
   FeatureFastPartialYMMorZMMWrite,
   FeatureHasFastGather,
+  FeatureSlowPMADDWD,
   FeatureVPOPCNTDQ
 ]>;
 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
@@ -827,13 +877,23 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
 
 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                                  SKXFeatures.Value, [
-  ProcIntelSKX,
   FeatureHasFastGather,
   FeaturePOPCNTFalseDeps
 ]>;
 def : SkylakeServerProc<"skylake-avx512">;
 def : SkylakeServerProc<"skx">; // Legacy alias.
 
+def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
+  FeatureVNNI
+]>;
+
+class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
+                                              CLXFeatures.Value, [
+  FeatureHasFastGather,
+  FeaturePOPCNTFalseDeps
+]>;
+def : CascadelakeProc<"cascadelake">;
+
 def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
   FeatureAVX512,
   FeatureCDI,
@@ -849,7 +909,6 @@ def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
 
 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                               CNLFeatures.Value, [
-  ProcIntelCNL,
   FeatureHasFastGather
 ]>;
 def : CannonlakeProc<"cannonlake">;
@@ -868,14 +927,12 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
 
 class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                                  ICLFeatures.Value, [
-  ProcIntelICL,
   FeatureHasFastGather
 ]>;
 def : IcelakeClientProc<"icelake-client">;
 
 class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
                                                  ICLFeatures.Value, [
-  ProcIntelICX,
   FeaturePCONFIG,
   FeatureWBNOINVD,
   FeatureHasFastGather
@@ -889,39 +946,43 @@ def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 
 foreach P = ["athlon", "athlon-tbird"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
                  FeatureNOPL, FeatureSlowSHLD]>;
 }
 
 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
                  Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
 }
 
 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
-                 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>;
+                 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
+                 FeatureCMOV]>;
 }
 
 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
-                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
+                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
+                 FeatureCMOV, Feature64Bit]>;
 }
 
 foreach P = ["amdfam10", "barcelona"] in {
   def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
                  FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
-                 FeatureSlowSHLD, FeatureLAHFSAHF]>;
+                 FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
 }
 
 // Bobcat
 def : Proc<"btver1", [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSSE3,
   FeatureSSE4A,
   FeatureFXSR,
   FeatureNOPL,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeaturePRFCHW,
   FeatureLZCNT,
@@ -934,11 +995,13 @@ def : Proc<"btver1", [
 // Jaguar
 def : ProcessorModel<"btver2", BtVer2Model, [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureNOPL,
   FeatureSSE4A,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeaturePRFCHW,
   FeatureAES,
@@ -954,14 +1017,18 @@ def : ProcessorModel<"btver2", BtVer2Model, [
   FeatureSlowSHLD,
   FeatureLAHFSAHF,
   FeatureFast15ByteNOP,
-  FeatureFastPartialYMMorZMMWrite
+  FeatureFastBEXTR,
+  FeatureFastPartialYMMorZMMWrite,
+  FeatureFastHorizontalOps
 ]>;
 
 // Bulldozer
-def : Proc<"bdver1", [
+def : ProcessorModel<"bdver1", BdVer2Model, [
   FeatureX87,
+  FeatureCMOV,
   FeatureXOP,
   FeatureFMA4,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
@@ -981,10 +1048,12 @@ def : Proc<"bdver1", [
   FeatureMacroFusion
 ]>;
 // Piledriver
-def : Proc<"bdver2", [
+def : ProcessorModel<"bdver2", BdVer2Model, [
   FeatureX87,
+  FeatureCMOV,
   FeatureXOP,
   FeatureFMA4,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
@@ -1005,14 +1074,17 @@ def : Proc<"bdver2", [
   FeatureSlowSHLD,
   FeatureLAHFSAHF,
   FeatureFast11ByteNOP,
+  FeatureFastBEXTR,
   FeatureMacroFusion
 ]>;
 
 // Steamroller
 def : Proc<"bdver3", [
   FeatureX87,
+  FeatureCMOV,
   FeatureXOP,
   FeatureFMA4,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
@@ -1035,18 +1107,21 @@ def : Proc<"bdver3", [
   FeatureFSGSBase,
   FeatureLAHFSAHF,
   FeatureFast11ByteNOP,
+  FeatureFastBEXTR,
   FeatureMacroFusion
 ]>;
 
 // Excavator
 def : Proc<"bdver4", [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureAVX2,
   FeatureFXSR,
   FeatureNOPL,
   FeatureXOP,
   FeatureFMA4,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureAES,
   FeaturePRFCHW,
@@ -1064,6 +1139,7 @@ def : Proc<"bdver4", [
   FeatureSlowSHLD,
   FeatureFSGSBase,
   FeatureLAHFSAHF,
+  FeatureFastBEXTR,
   FeatureFast11ByteNOP,
   FeatureMWAITX,
   FeatureMacroFusion
@@ -1078,6 +1154,8 @@ def: ProcessorModel<"znver1", Znver1Model, [
   FeatureBMI2,
   FeatureCLFLUSHOPT,
   FeatureCLZERO,
+  FeatureCMOV,
+  Feature64Bit,
   FeatureCMPXCHG16B,
   FeatureF16C,
   FeatureFMA,
@@ -1087,6 +1165,7 @@ def: ProcessorModel<"znver1", Znver1Model, [
   FeatureFastLZCNT,
   FeatureLAHFSAHF,
   FeatureLZCNT,
+  FeatureFastBEXTR,
   FeatureFast15ByteNOP,
   FeatureMacroFusion,
   FeatureMMX,
@@ -1112,7 +1191,7 @@ def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE1, FeatureFXSR]>;
+                               FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
 
 // We also provide a generic 64-bit specific x86 processor model which tries to
 // be good for modern chips without enabling instruction set encodings past the
@@ -1126,6 +1205,7 @@ def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 // forming a common base for them.
 def : ProcessorModel<"x86-64", SandyBridgeModel, [
   FeatureX87,
+  FeatureCMOV,
   FeatureMMX,
   FeatureSSE2,
   FeatureFXSR,
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 7d8f7b9dfe46..36cef98a1ef5 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -88,19 +88,19 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
 void X86AsmPrinter::EmitFunctionBodyStart() {
   if (EmitFPOData) {
-    X86TargetStreamer *XTS =
-        static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
-    unsigned ParamsSize =
-        MF->getInfo<X86MachineFunctionInfo>()->getArgumentStackSize();
-    XTS->emitFPOProc(CurrentFnSym, ParamsSize);
+    if (auto *XTS =
+        static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
+      XTS->emitFPOProc(
+          CurrentFnSym,
+          MF->getInfo<X86MachineFunctionInfo>()->getArgumentStackSize());
   }
 }
 
 void X86AsmPrinter::EmitFunctionBodyEnd() {
   if (EmitFPOData) {
-    X86TargetStreamer *XTS =
-        static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
-    XTS->emitFPOEndProc();
+    if (auto *XTS =
+            static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
+      XTS->emitFPOEndProc();
   }
 }
 
@@ -129,6 +129,9 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
       GVSym =
           P.OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+    else if (MO.getTargetFlags() == X86II::MO_COFFSTUB)
+      GVSym =
+          P.OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
 
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
@@ -161,6 +164,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
     break;
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DLLIMPORT:
+  case X86II::MO_COFFSTUB:
     // These affect the name of the symbol, not any suffix.
     break;
   case X86II::MO_GOT_ABSOLUTE_ADDRESS:
@@ -568,9 +572,9 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
 
       // Emitting an Elf_Prop for the CET properties.
       OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4);
-      OutStreamer->EmitIntValue(WordSize, 4);               // data size
-      OutStreamer->EmitIntValue(FeatureFlagsAnd, WordSize); // data
-      EmitAlignment(WordSize == 4 ? 2 : 3);                 // padding
+      OutStreamer->EmitIntValue(4, 4);               // data size
+      OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data
+      EmitAlignment(WordSize == 4 ? 2 : 3);          // padding
 
       OutStreamer->endSection(Nt);
       OutStreamer->SwitchSection(Cur);
@@ -583,21 +587,28 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (TT.isOSBinFormatCOFF()) {
     // Emit an absolute @feat.00 symbol.  This appears to be some kind of
     // compiler features bitfield read by link.exe.
+    MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
+    OutStreamer->BeginCOFFSymbolDef(S);
+    OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+    OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+    OutStreamer->EndCOFFSymbolDef();
+    int64_t Feat00Flags = 0;
+
     if (TT.getArch() == Triple::x86) {
-      MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
-      OutStreamer->BeginCOFFSymbolDef(S);
-      OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
-      OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
-      OutStreamer->EndCOFFSymbolDef();
       // According to the PE-COFF spec, the LSB of this value marks the object
       // for "registered SEH".  This means that all SEH handler entry points
       // must be registered in .sxdata.  Use of any unregistered handlers will
       // cause the process to terminate immediately.  LLVM does not know how to
       // register any SEH handlers, so its object files should be safe.
-      OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
-      OutStreamer->EmitAssignment(
-          S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
+      Feat00Flags |= 1;
     }
+
+    if (M.getModuleFlag("cfguardtable"))
+      Feat00Flags |= 0x800; // Object is CFG-aware.
+
+    OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+    OutStreamer->EmitAssignment(
+        S, MCConstantExpr::create(Feat00Flags, MMI->getContext()));
   }
   OutStreamer->EmitSyntaxDirective();
 
@@ -663,7 +674,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     emitNonLazyStubs(MMI, *OutStreamer);
 
     // Emit stack and fault map information.
-    SM.serializeToStackMapSection();
+    emitStackMaps(SM);
     FM.serializeToFaultMapSection();
 
     // This flag tells the linker that no global symbols contain code that fall
@@ -684,12 +695,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 
   if (TT.isOSBinFormatCOFF()) {
-    SM.serializeToStackMapSection();
+    emitStackMaps(SM);
     return;
   }
 
   if (TT.isOSBinFormatELF()) {
-    SM.serializeToStackMapSection();
+    emitStackMaps(SM);
     FM.serializeToFaultMapSection();
     return;
   }
diff --git a/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index ab2cbfc33e17..627a6cb14514 100644
--- a/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -52,10 +52,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86-avoid-SFB"
 
-namespace llvm {
-void initializeX86AvoidSFBPassPass(PassRegistry &);
-} // end namespace llvm
-
 static cl::opt<bool> DisableX86AvoidStoreForwardBlocks(
     "x86-disable-avoid-SFB", cl::Hidden,
     cl::desc("X86: Disable Store Forwarding Blocks fixup."), cl::init(false));
@@ -590,7 +586,7 @@ void X86AvoidSFBPass::breakBlockedCopies(
       StDisp2 += OverlapDelta;
       Size2 -= OverlapDelta;
     }
-    Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1));
+    Size1 = LdDisp2 - LdDisp1;
 
     // Build a copy for the point until the current blocking store's
     // displacement.
@@ -645,21 +641,22 @@ removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap) {
   if (BlockingStoresDispSizeMap.size() <= 1)
     return;
 
-  int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first;
-  unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second;
-  SmallVector<int64_t, 2> ForRemoval;
-  for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin());
-       DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) {
-    int64_t CurrDisp = DispSizePair->first;
-    unsigned CurrSize = DispSizePair->second;
-    if (CurrDisp + CurrSize <= PrevDisp + PrevSize) {
-      ForRemoval.push_back(PrevDisp);
+  SmallVector<std::pair<int64_t, unsigned>, 0> DispSizeStack;
+  for (auto DispSizePair : BlockingStoresDispSizeMap) {
+    int64_t CurrDisp = DispSizePair.first;
+    unsigned CurrSize = DispSizePair.second;
+    while (DispSizeStack.size()) {
+      int64_t PrevDisp = DispSizeStack.back().first;
+      unsigned PrevSize = DispSizeStack.back().second;
+      if (CurrDisp + CurrSize > PrevDisp + PrevSize)
+        break;
+      DispSizeStack.pop_back();
     }
-    PrevDisp = CurrDisp;
-    PrevSize = CurrSize;
+    DispSizeStack.push_back(DispSizePair);
   }
-  for (auto Disp : ForRemoval)
-    BlockingStoresDispSizeMap.erase(Disp);
+  BlockingStoresDispSizeMap.clear();
+  for (auto Disp : DispSizeStack)
+    BlockingStoresDispSizeMap.insert(Disp);
 }
 
 bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index dea95f56f4d5..903d24c9984a 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -56,10 +56,6 @@ static cl::opt<bool>
                cl::desc("Avoid optimizing x86 call frames for size"),
                cl::init(false), cl::Hidden);
 
-namespace llvm {
-void initializeX86CallFrameOptimizationPass(PassRegistry &);
-}
-
 namespace {
 
 class X86CallFrameOptimization : public MachineFunctionPass {
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
index 96ea64dc8c48..1dc83b76595d 100644
--- a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -65,10 +65,8 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
 
-  if (SplitVTs.size() != 1) {
-    // TODO: support struct/array split
-    return false;
-  }
+  if (OrigArg.Ty->isVoidTy())
+    return true;
 
   EVT VT = SplitVTs[0];
   unsigned NumParts = TLI.getNumRegisters(Context, VT);
@@ -185,27 +183,36 @@ protected:
 
 } // end anonymous namespace
 
-bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                  const Value *Val, unsigned VReg) const {
-  assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
-
+bool X86CallLowering::lowerReturn(
+    MachineIRBuilder &MIRBuilder, const Value *Val,
+    ArrayRef<unsigned> VRegs) const {
+  assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
+         "Return value without a vreg");
   auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
 
-  if (VReg) {
+  if (!VRegs.empty()) {
     MachineFunction &MF = MIRBuilder.getMF();
+    const Function &F = MF.getFunction();
     MachineRegisterInfo &MRI = MF.getRegInfo();
     auto &DL = MF.getDataLayout();
-    const Function &F = MF.getFunction();
+    LLVMContext &Ctx = Val->getType()->getContext();
+    const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
 
-    ArgInfo OrigArg{VReg, Val->getType()};
-    setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+    SmallVector<EVT, 4> SplitEVTs;
+    ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
+    assert(VRegs.size() == SplitEVTs.size() &&
+           "For each split Type there should be exactly one VReg.");
 
     SmallVector<ArgInfo, 8> SplitArgs;
-    if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
-                           [&](ArrayRef<unsigned> Regs) {
-                             MIRBuilder.buildUnmerge(Regs, VReg);
-                           }))
-      return false;
+    for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
+      ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
+      setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+      if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI,
+                             [&](ArrayRef<unsigned> Regs) {
+                               MIRBuilder.buildUnmerge(Regs, VRegs[i]);
+                             }))
+        return false;
+    }
 
     OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
     if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm/lib/Target/X86/X86CallLowering.h
index 6c9dc1565dad..f5f8f9a3ef6d 100644
--- a/contrib/llvm/lib/Target/X86/X86CallLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.h
@@ -29,8 +29,8 @@ class X86CallLowering : public CallLowering {
 public:
   X86CallLowering(const X86TargetLowering &TLI);
 
-  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
-                   unsigned VReg) const override;
+  bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                   ArrayRef<unsigned> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
                             ArrayRef<unsigned> VRegs) const override;
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index fcc9a296de93..fe49c9ffbd95 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -590,9 +590,11 @@ def CC_X86_64_HHVM_C : CallingConv<[
 
 // Calling convention used on Win64
 def CC_X86_Win64_C : CallingConv<[
-  // FIXME: Handle byval stuff.
   // FIXME: Handle varargs.
 
+  // Byval aggregates are passed by pointer
+  CCIfByVal<CCPassIndirect<i64>>,
+
   // Promote i1/v1i1 arguments to i8.
   CCIfType<[i1, v1i1], CCPromoteToType<i8>>,
 
diff --git a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
index 1c5f110d8c60..c3e76fd2a856 100644
--- a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -81,12 +81,6 @@ STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
 
-namespace llvm {
-
-void initializeX86CmovConverterPassPass(PassRegistry &);
-
-} // end namespace llvm
-
 // This internal switch can be used to turn off the cmov/branch optimization.
 static cl::opt<bool>
     EnableCmovConverter("x86-cmov-converter",
diff --git a/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp b/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp
new file mode 100644
index 000000000000..7ce443c4656a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CondBrFolding.cpp
@@ -0,0 +1,585 @@
+//===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines a pass that optimizes condition branches on x86 by taking
+// advantage of the three-way conditional code generated by compare
+// instructions.
+// Currently, it tries to hoisting EQ and NE conditional branch to a dominant
+// conditional branch condition where the same EQ/NE conditional code is
+// computed. An example:
+//   bb_0:
+//     cmp %0, 19
+//     jg bb_1
+//     jmp bb_2
+//   bb_1:
+//     cmp %0, 40
+//     jg bb_3
+//     jmp bb_4
+//   bb_4:
+//     cmp %0, 20
+//     je bb_5
+//     jmp bb_6
+// Here we could combine the two compares in bb_0 and bb_4 and have the
+// following code:
+//   bb_0:
+//     cmp %0, 20
+//     jg bb_1
+//     jl bb_2
+//     jmp bb_5
+//   bb_1:
+//     cmp %0, 40
+//     jg bb_3
+//     jmp bb_6
+// For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
+// height for bb_6 is also reduced. bb_4 is gone after the optimization.
+//
+// There are plenty of this code patterns, especially from the switch case
+// lowing where we generate compare of "pivot-1" for the inner nodes in the
+// binary search tree.
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-condbr-folding"
+
+STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
+
+namespace {
+class X86CondBrFoldingPass : public MachineFunctionPass {
+public:
+  X86CondBrFoldingPass() : MachineFunctionPass(ID) {
+    initializeX86CondBrFoldingPassPass(*PassRegistry::getPassRegistry());
+  }
+  StringRef getPassName() const override { return "X86 CondBr Folding"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineBranchProbabilityInfo>();
+  }
+
+public:
+  static char ID;
+};
+} // namespace
+
+char X86CondBrFoldingPass::ID = 0;
+INITIALIZE_PASS(X86CondBrFoldingPass, "X86CondBrFolding", "X86CondBrFolding", false, false)
+
+FunctionPass *llvm::createX86CondBrFolding() {
+  return new X86CondBrFoldingPass();
+}
+
+namespace {
+// A class the stores the auxiliary information for each MBB.
+struct TargetMBBInfo {
+  MachineBasicBlock *TBB;
+  MachineBasicBlock *FBB;
+  MachineInstr *BrInstr;
+  MachineInstr *CmpInstr;
+  X86::CondCode BranchCode;
+  unsigned SrcReg;
+  int CmpValue;
+  bool Modified;
+  bool CmpBrOnly;
+};
+
+// A class that optimizes the conditional branch by hoisting and merge CondCode.
+class X86CondBrFolding {
+public:
+  X86CondBrFolding(const X86InstrInfo *TII,
+                   const MachineBranchProbabilityInfo *MBPI,
+                   MachineFunction &MF)
+      : TII(TII), MBPI(MBPI), MF(MF) {}
+  bool optimize();
+
+private:
+  const X86InstrInfo *TII;
+  const MachineBranchProbabilityInfo *MBPI;
+  MachineFunction &MF;
+  std::vector<std::unique_ptr<TargetMBBInfo>> MBBInfos;
+  SmallVector<MachineBasicBlock *, 4> RemoveList;
+
+  void optimizeCondBr(MachineBasicBlock &MBB,
+                      SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+  void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
+                     SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+  void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
+                     MachineBasicBlock *NewDest);
+  void fixupModifiedCond(MachineBasicBlock *MBB);
+  std::unique_ptr<TargetMBBInfo> analyzeMBB(MachineBasicBlock &MBB);
+  static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+                             int &CmpValue);
+  bool findPath(MachineBasicBlock *MBB,
+                SmallVectorImpl<MachineBasicBlock *> &BranchPath);
+  TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
+    return MBBInfos[MBB->getNumber()].get();
+  }
+};
+} // namespace
+
+// Find a valid path that we can reuse the CondCode.
+// The resulted path (if return true) is stored in BranchPath.
+// Return value:
+//  false: is no valid path is found.
+//  true: a valid path is found and the targetBB can be reached.
+bool X86CondBrFolding::findPath(
+    MachineBasicBlock *MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
+  TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+  assert(MBBInfo && "Expecting a candidate MBB");
+  int CmpValue = MBBInfo->CmpValue;
+
+  MachineBasicBlock *PredMBB = *MBB->pred_begin();
+  MachineBasicBlock *SaveMBB = MBB;
+  while (PredMBB) {
+    TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
+    if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
+      return false;
+
+    assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
+    bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
+
+    X86::CondCode CC = PredMBBInfo->BranchCode;
+    assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
+    int PredCmpValue = PredMBBInfo->CmpValue;
+    bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
+                         (CmpValue > PredCmpValue && CC == X86::COND_G) ||
+                         (CmpValue == PredCmpValue && CC == X86::COND_E));
+    // Check if both the result of value compare and the branch target match.
+    if (!(ValueCmpTrue ^ IsFalseBranch)) {
+      LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
+      return false;
+    }
+
+    BranchPath.push_back(PredMBB);
+    // These are the conditions on which we could combine the compares.
+    if ((CmpValue == PredCmpValue) ||
+        (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
+        (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
+      return true;
+
+    // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
+    if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
+      return false;
+
+    SaveMBB = PredMBB;
+    PredMBB = *PredMBB->pred_begin();
+  }
+  return false;
+}
+
+// Fix up any PHI node in the successor of MBB.
+static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
+                          MachineBasicBlock *NewMBB) {
+  if (NewMBB == OldMBB)
+    return;
+  for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
+       MI != ME && MI->isPHI(); ++MI)
+    for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.getMBB() == OldMBB)
+        MO.setMBB(NewMBB);
+    }
+}
+
+// Utility function to set branch probability for edge MBB->SuccMBB.
+static inline bool setBranchProb(MachineBasicBlock *MBB,
+                                 MachineBasicBlock *SuccMBB,
+                                 BranchProbability Prob) {
+  auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
+  if (MBBI == MBB->succ_end())
+    return false;
+  MBB->setSuccProbability(MBBI, Prob);
+  return true;
+}
+
+// Utility function to find the unconditional br instruction in MBB.
+static inline MachineBasicBlock::iterator
+findUncondBrI(MachineBasicBlock *MBB) {
+  return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
+    return MI.getOpcode() == X86::JMP_1;
+  });
+}
+
+// Replace MBB's original successor, OrigDest, with NewDest.
+// Also update the MBBInfo for MBB.
+void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
+                                     MachineBasicBlock *OrigDest,
+                                     MachineBasicBlock *NewDest) {
+  TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+  MachineInstr *BrMI;
+  if (MBBInfo->TBB == OrigDest) {
+    BrMI = MBBInfo->BrInstr;
+    unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
+    MachineInstrBuilder MIB =
+        BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
+            .addMBB(NewDest);
+    MBBInfo->TBB = NewDest;
+    MBBInfo->BrInstr = MIB.getInstr();
+  } else { // Should be the unconditional jump stmt.
+    MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
+    BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
+        .addMBB(NewDest);
+    MBBInfo->FBB = NewDest;
+    BrMI = &*UncondBrI;
+  }
+  fixPHIsInSucc(NewDest, OrigDest, MBB);
+  BrMI->eraseFromParent();
+  MBB->addSuccessor(NewDest);
+  setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
+  MBB->removeSuccessor(OrigDest);
+}
+
+// Change the CondCode and BrInstr according to MBBInfo.
+void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
+  TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
+  if (!MBBInfo->Modified)
+    return;
+
+  MachineInstr *BrMI = MBBInfo->BrInstr;
+  X86::CondCode CC = MBBInfo->BranchCode;
+  MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
+                                    TII->get(GetCondBranchFromCond(CC)))
+                                .addMBB(MBBInfo->TBB);
+  BrMI->eraseFromParent();
+  MBBInfo->BrInstr = MIB.getInstr();
+
+  MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
+  BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
+      .addMBB(MBBInfo->FBB);
+  MBB->erase(UncondBrI);
+  MBBInfo->Modified = false;
+}
+
+//
+// Apply the transformation:
+//  RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
+//     \-2->           \-4->       \-6-> FalseMBB
+// ==>
+//             RootMBB -1-> ... PredMBB -7-> FalseMBB
+// TargetMBB <-8-/ \-2->           \-4->
+//
+// Note that PredMBB and RootMBB could be the same.
+// And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
+//
+// There are some special handling where the RootMBB is COND_E in which case
+// we directly short-cycle the brinstr.
+//
+void X86CondBrFolding::optimizeCondBr(
+    MachineBasicBlock &MBB, SmallVectorImpl<MachineBasicBlock *> &BranchPath) {
+
+  X86::CondCode CC;
+  TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
+  assert(MBBInfo && "Expecting a candidate MBB");
+  MachineBasicBlock *TargetMBB = MBBInfo->TBB;
+  BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
+
+  // Forward the jump from MBB's predecessor to MBB's false target.
+  MachineBasicBlock *PredMBB = BranchPath.front();
+  TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
+  assert(PredMBBInfo && "Expecting a candidate MBB");
+  if (PredMBBInfo->Modified)
+    fixupModifiedCond(PredMBB);
+  CC = PredMBBInfo->BranchCode;
+  // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
+  // We will short-cycle directly for this case.
+  if (!(CC == X86::COND_E && BranchPath.size() == 1))
+    replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
+
+  MachineBasicBlock *RootMBB = BranchPath.back();
+  TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
+  assert(RootMBBInfo && "Expecting a candidate MBB");
+  if (RootMBBInfo->Modified)
+    fixupModifiedCond(RootMBB);
+  CC = RootMBBInfo->BranchCode;
+
+  if (CC != X86::COND_E) {
+    MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
+    // RootMBB: Cond jump to the original not-taken MBB.
+    X86::CondCode NewCC;
+    switch (CC) {
+    case X86::COND_L:
+      NewCC = X86::COND_G;
+      break;
+    case X86::COND_G:
+      NewCC = X86::COND_L;
+      break;
+    default:
+      llvm_unreachable("unexpected condtional code.");
+    }
+    BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
+            TII->get(GetCondBranchFromCond(NewCC)))
+        .addMBB(RootMBBInfo->FBB);
+
+    // RootMBB: Jump to TargetMBB
+    BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
+            TII->get(X86::JMP_1))
+        .addMBB(TargetMBB);
+    RootMBB->addSuccessor(TargetMBB);
+    fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
+    RootMBB->erase(UncondBrI);
+  } else {
+    replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
+  }
+
+  // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
+  // directly. Move MBB's stmt to here as the opcode might be different.
+  if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
+    MachineInstr *NewCmp = MBBInfo->CmpInstr;
+    NewCmp->removeFromParent();
+    RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
+    RootMBBInfo->CmpInstr->eraseFromParent();
+  }
+
+  // Fix branch Probabilities.
+  auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
+    BranchProbability Prob;
+    for (auto &I : BranchPath) {
+      MachineBasicBlock *ThisMBB = I;
+      if (!ThisMBB->hasSuccessorProbabilities() ||
+          !ThisMBB->isSuccessor(NextMBB))
+        break;
+      Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
+      if (Prob.isUnknown())
+        break;
+      TargetProb = Prob * TargetProb;
+      Prob = Prob - TargetProb;
+      setBranchProb(ThisMBB, NextMBB, Prob);
+      if (ThisMBB == RootMBB) {
+        setBranchProb(ThisMBB, TargetMBB, TargetProb);
+      }
+      ThisMBB->normalizeSuccProbs();
+      if (ThisMBB == RootMBB)
+        break;
+      NextMBB = ThisMBB;
+    }
+    return true;
+  };
+  if (CC != X86::COND_E && !TargetProb.isUnknown())
+    fixBranchProb(MBBInfo->FBB);
+
+  if (CC != X86::COND_E)
+    RemoveList.push_back(&MBB);
+
+  // Invalidate MBBInfo just in case.
+  MBBInfos[MBB.getNumber()] = nullptr;
+  MBBInfos[RootMBB->getNumber()] = nullptr;
+
+  LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
+  if (BranchPath.size() > 1)
+    LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
+}
+
+// Driver function for optimization: find the valid candidate and apply
+// the transformation.
+bool X86CondBrFolding::optimize() {
+  bool Changed = false;
+  LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
+                    << " *****\n");
+  // Setup data structures.
+  MBBInfos.resize(MF.getNumBlockIDs());
+  for (auto &MBB : MF)
+    MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
+
+  for (auto &MBB : MF) {
+    TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
+    if (!MBBInfo || !MBBInfo->CmpBrOnly)
+      continue;
+    if (MBB.pred_size() != 1)
+      continue;
+    LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
+                      << " CmpValue: " << MBBInfo->CmpValue << "\n");
+    SmallVector<MachineBasicBlock *, 4> BranchPath;
+    if (!findPath(&MBB, BranchPath))
+      continue;
+
+#ifndef NDEBUG
+    LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
+    int Index = 1;
+    LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
+    for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
+      MachineBasicBlock *PMBB = *I;
+      TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
+      LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
+                        << ") is " << *PMBB);
+      LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
+                        << "  Val=" << PMBBInfo->CmpValue
+                        << "  CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
+    }
+#endif
+    optimizeCondBr(MBB, BranchPath);
+    Changed = true;
+  }
+  NumFixedCondBrs += RemoveList.size();
+  for (auto MBBI : RemoveList) {
+    while (!MBBI->succ_empty())
+      MBBI->removeSuccessor(MBBI->succ_end() - 1);
+
+    MBBI->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+// Analyze instructions that generate CondCode and extract information.
+bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+                                      int &CmpValue) {
+  unsigned SrcRegIndex = 0;
+  unsigned ValueIndex = 0;
+  switch (MI.getOpcode()) {
+  // TODO: handle test instructions.
+  default:
+    return false;
+  case X86::CMP64ri32:
+  case X86::CMP64ri8:
+  case X86::CMP32ri:
+  case X86::CMP32ri8:
+  case X86::CMP16ri:
+  case X86::CMP16ri8:
+  case X86::CMP8ri:
+    SrcRegIndex = 0;
+    ValueIndex = 1;
+    break;
+  case X86::SUB64ri32:
+  case X86::SUB64ri8:
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB16ri:
+  case X86::SUB16ri8:
+  case X86::SUB8ri:
+    SrcRegIndex = 1;
+    ValueIndex = 2;
+    break;
+  }
+  SrcReg = MI.getOperand(SrcRegIndex).getReg();
+  if (!MI.getOperand(ValueIndex).isImm())
+    return false;
+  CmpValue = MI.getOperand(ValueIndex).getImm();
+  return true;
+}
+
+// Analyze a candidate MBB and set the extract all the information needed.
+// The valid candidate will have two successors.
+// It also should have a sequence of
+//  Branch_instr,
+//  CondBr,
+//  UnCondBr.
+// Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
+std::unique_ptr<TargetMBBInfo>
+X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
+  MachineBasicBlock *TBB;
+  MachineBasicBlock *FBB;
+  MachineInstr *BrInstr;
+  MachineInstr *CmpInstr;
+  X86::CondCode CC;
+  unsigned SrcReg;
+  int CmpValue;
+  bool Modified;
+  bool CmpBrOnly;
+
+  if (MBB.succ_size() != 2)
+    return nullptr;
+
+  CmpBrOnly = true;
+  FBB = TBB = nullptr;
+  CmpInstr = nullptr;
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() == X86::JMP_1) {
+      if (FBB)
+        return nullptr;
+      FBB = I->getOperand(0).getMBB();
+      continue;
+    }
+    if (I->isBranch()) {
+      if (TBB)
+        return nullptr;
+      CC = X86::getCondFromBranchOpc(I->getOpcode());
+      switch (CC) {
+      default:
+        return nullptr;
+      case X86::COND_E:
+      case X86::COND_L:
+      case X86::COND_G:
+      case X86::COND_NE:
+      case X86::COND_LE:
+      case X86::COND_GE:
+        break;
+      }
+      TBB = I->getOperand(0).getMBB();
+      BrInstr = &*I;
+      continue;
+    }
+    if (analyzeCompare(*I, SrcReg, CmpValue)) {
+      if (CmpInstr)
+        return nullptr;
+      CmpInstr = &*I;
+      continue;
+    }
+    CmpBrOnly = false;
+    break;
+  }
+
+  if (!TBB || !FBB || !CmpInstr)
+    return nullptr;
+
+  // Simplify CondCode. Note this is only to simplify the findPath logic
+  // and will not change the instruction here.
+  switch (CC) {
+  case X86::COND_NE:
+    CC = X86::COND_E;
+    std::swap(TBB, FBB);
+    Modified = true;
+    break;
+  case X86::COND_LE:
+    if (CmpValue == INT_MAX)
+      return nullptr;
+    CC = X86::COND_L;
+    CmpValue += 1;
+    Modified = true;
+    break;
+  case X86::COND_GE:
+    if (CmpValue == INT_MIN)
+      return nullptr;
+    CC = X86::COND_G;
+    CmpValue -= 1;
+    Modified = true;
+    break;
+  default:
+    Modified = false;
+    break;
+  }
+  return llvm::make_unique<TargetMBBInfo>(TargetMBBInfo{
+      TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
+}
+
+bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
+  const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+  if (!ST.threewayBranchProfitable())
+    return false;
+  const X86InstrInfo *TII = ST.getInstrInfo();
+  const MachineBranchProbabilityInfo *MBPI =
+      &getAnalysis<MachineBranchProbabilityInfo>();
+
+  X86CondBrFolding CondBr(TII, MBPI, MF);
+  return CondBr.optimize();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
new file mode 100644
index 000000000000..3654bf04f4e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -0,0 +1,156 @@
+//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass aids profile-driven cache prefetch insertion by ensuring all
+/// instructions that have a memory operand are distinguishible from each other.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-discriminate-memops"
+
+namespace {
+
+using Location = std::pair<StringRef, unsigned>;
+
+Location diToLocation(const DILocation *Loc) {
+  return std::make_pair(Loc->getFilename(), Loc->getLine());
+}
+
+/// Ensure each instruction having a memory operand has a distinct <LineNumber,
+/// Discriminator> pair.
+void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) {
+  DebugLoc DL(Loc);
+  MI->setDebugLoc(DL);
+}
+
+class X86DiscriminateMemOps : public MachineFunctionPass {
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  StringRef getPassName() const override {
+    return "X86 Discriminate Memory Operands";
+  }
+
+public:
+  static char ID;
+
+  /// Default construct and initialize the pass.
+  X86DiscriminateMemOps();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//            Implementation
+//===----------------------------------------------------------------------===//
+
+char X86DiscriminateMemOps::ID = 0;
+
+/// Default construct and initialize the pass.
+X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
+
+bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+  DISubprogram *FDI = MF.getFunction().getSubprogram();
+  if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
+    return false;
+
+  // Have a default DILocation, if we find instructions with memops that don't
+  // have any debug info.
+  const DILocation *ReferenceDI =
+      DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI);
+
+  DenseMap<Location, unsigned> MemOpDiscriminators;
+  MemOpDiscriminators[diToLocation(ReferenceDI)] = 0;
+
+  // Figure out the largest discriminator issued for each Location. When we
+  // issue new discriminators, we can thus avoid issuing discriminators
+  // belonging to instructions that don't have memops. This isn't a requirement
+  // for the goals of this pass, however, it avoids unnecessary ambiguity.
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      const auto &DI = MI.getDebugLoc();
+      if (!DI)
+        continue;
+      Location Loc = diToLocation(DI);
+      MemOpDiscriminators[Loc] =
+          std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator());
+    }
+  }
+
+  // Keep track of the discriminators seen at each Location. If an instruction's
+  // DebugInfo has a Location and discriminator we've already seen, replace its
+  // discriminator with a new one, to guarantee uniqueness.
+  DenseMap<Location, DenseSet<unsigned>> Seen;
+
+  bool Changed = false;
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0)
+        continue;
+      const DILocation *DI = MI.getDebugLoc();
+      if (!DI) {
+        DI = ReferenceDI;
+      }
+      Location L = diToLocation(DI);
+      DenseSet<unsigned> &Set = Seen[L];
+      const std::pair<DenseSet<unsigned>::iterator, bool> TryInsert =
+          Set.insert(DI->getBaseDiscriminator());
+      if (!TryInsert.second) {
+        unsigned BF, DF, CI = 0;
+        DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI);
+        Optional<unsigned> EncodedDiscriminator = DILocation::encodeDiscriminator(
+            MemOpDiscriminators[L] + 1, DF, CI);
+
+        if (!EncodedDiscriminator) {
+          // FIXME(mtrofin): The assumption is that this scenario is infrequent/OK
+          // not to support. If evidence points otherwise, we can explore synthesizeing
+          // unique DIs by adding fake line numbers, or by constructing 64 bit
+          // discriminators.
+          LLVM_DEBUG(dbgs() << "Unable to create a unique discriminator "
+                     "for instruction with memory operand in: "
+                     << DI->getFilename() << " Line: " << DI->getLine()
+                     << " Column: " << DI->getColumn()
+                     << ". This is likely due to a large macro expansion. \n");
+          continue;
+        }
+        // Since we were able to encode, bump the MemOpDiscriminators.
+        ++MemOpDiscriminators[L];
+        DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue());
+        updateDebugInfo(&MI, DI);
+        Changed = true;
+        std::pair<DenseSet<unsigned>::iterator, bool> MustInsert =
+            Set.insert(DI->getBaseDiscriminator());
+        (void)MustInsert; // Silence warning in release build.
+        assert(MustInsert.second && "New discriminator shouldn't be present in set");
+      }
+
+      // Bump the reference DI to avoid cramming discriminators on line 0.
+      // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI
+      // in a block. It's more consistent than just relying on the last memop
+      // instruction we happened to see.
+      ReferenceDI = DI;
+    }
+  }
+  return Changed;
+}
+
+FunctionPass *llvm::createX86DiscriminateMemOpsPass() {
+  return new X86DiscriminateMemOps();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 62588e9509d3..d9ebbb506ca4 100644
--- a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -31,10 +31,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-void initializeX86DomainReassignmentPass(PassRegistry &);
-}
-
 #define DEBUG_TYPE "x86-domain-reassignment"
 
 STATISTIC(NumClosuresConverted, "Number of closures converted by the pass");
@@ -736,7 +732,10 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
   STI = &MF.getSubtarget<X86Subtarget>();
   // GPR->K is the only transformation currently supported, bail out early if no
   // AVX512.
-  if (!STI->hasAVX512())
+  // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and
+  // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register
+  // coalescer doesn't clean it up and we generate a spill we will crash.
+  if (!STI->hasAVX512() || !STI->hasBWI())
     return false;
 
   MRI = &MF.getRegInfo();
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index d082b42eefa9..9dd3f2652543 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1278,7 +1278,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     unsigned Reg = X86MFInfo->getSRetReturnReg();
     assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments()!");
-    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+    unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
     RetRegs.push_back(RetReg);
@@ -2900,23 +2900,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
         isCommutativeIntrinsic(II))
       std::swap(LHS, RHS);
 
-    bool UseIncDec = false;
-    if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
-      UseIncDec = true;
-
     unsigned BaseOpc, CondOpc;
     switch (II->getIntrinsicID()) {
     default: llvm_unreachable("Unexpected intrinsic!");
     case Intrinsic::sadd_with_overflow:
-      BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
-      CondOpc = X86::SETOr;
-      break;
+      BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
     case Intrinsic::uadd_with_overflow:
       BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
     case Intrinsic::ssub_with_overflow:
-      BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
-      CondOpc = X86::SETOr;
-      break;
+      BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
     case Intrinsic::usub_with_overflow:
       BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
     case Intrinsic::smul_with_overflow:
@@ -2938,9 +2930,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
         { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
       };
 
-      if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+      if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
+          CondOpc == X86::SETOr) {
+        // We can use INC/DEC.
         ResultReg = createResultReg(TLI.getRegClassFor(VT));
-        bool IsDec = BaseOpc == X86ISD::DEC;
+        bool IsDec = BaseOpc == ISD::SUB;
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
           .addReg(LHSReg, getKillRegState(LHSIsKill));
@@ -3222,8 +3216,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
       (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
     return false;
 
-  // Functions using retpoline should use SDISel for calls.
-  if (Subtarget->useRetpoline())
+  // Functions using retpoline for indirect calls need to use SDISel.
+  if (Subtarget->useRetpolineIndirectCalls())
     return false;
 
   // Handle only C, fastcc, and webkit_js calling conventions for now.
@@ -3734,9 +3728,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
   switch (VT.SimpleTy) {
   default: llvm_unreachable("Unexpected value type");
   case MVT::i1:
-    // TODO: Support this properly.
-    if (Subtarget->hasAVX512())
-      return 0;
     VT = MVT::i8;
     LLVM_FALLTHROUGH;
   case MVT::i8:  Opc = X86::MOV8ri;  break;
@@ -3744,7 +3735,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
   case MVT::i32: Opc = X86::MOV32ri; break;
   case MVT::i64: {
     if (isUInt<32>(Imm))
-      Opc = X86::MOV32ri;
+      Opc = X86::MOV32ri64;
     else if (isInt<32>(Imm))
       Opc = X86::MOV64ri32;
     else
@@ -3752,14 +3743,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
     break;
   }
   }
-  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
-    unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
-    unsigned ResultReg = createResultReg(&X86::GR64RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
-      .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
-    return ResultReg;
-  }
   return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 }
 
@@ -4009,7 +3992,8 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   }
 
   Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
-  MI->eraseFromParent();
+  MachineBasicBlock::iterator I(MI);
+  removeDeadCode(I, std::next(I));
   return true;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index d9bf60c2c9fb..ed297e678203 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -288,7 +288,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
   for (unsigned i = 1; i < NumArgs; ++i)
     MIB.add(MI->getOperand(i));
 
-  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  MIB.setMemRefs(MI->memoperands());
 
   return MIB;
 }
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index f3f7f6a37360..a346085a52cb 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -25,10 +25,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-namespace llvm {
-void initializeFixupLEAPassPass(PassRegistry &);
-}
-
 #define FIXUPLEA_DESC "X86 LEA Fixup"
 #define FIXUPLEA_NAME "x86-fixup-LEAs"
 
@@ -43,8 +39,8 @@ class FixupLEAPass : public MachineFunctionPass {
   /// Loop over all of the instructions in the basic block
   /// replacing applicable instructions with LEA instructions,
   /// where appropriate.
-  bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
-
+  bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI,
+                         bool IsSlowLEA, bool IsSlow3OpsLEA);
 
   /// Given a machine register, look for the instruction
   /// which writes it in the current basic block. If found,
@@ -62,10 +58,9 @@ class FixupLEAPass : public MachineFunctionPass {
                           MachineFunction::iterator MFI);
 
   /// Given a LEA instruction which is unprofitable
-  /// on Silvermont try to replace it with an equivalent ADD instruction
-  void processInstructionForSLM(MachineBasicBlock::iterator &I,
-                                MachineFunction::iterator MFI);
-
+  /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
+  void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
+                                    MachineFunction::iterator MFI);
 
   /// Given a LEA instruction which is unprofitable
   /// on SNB+ try to replace it with other instructions.
@@ -197,8 +192,11 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
 
   MF = &Func;
   const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
+  bool IsSlowLEA = ST.slowLEA();
+  bool IsSlow3OpsLEA = ST.slow3OpsLEA();
+
   OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize();
-  OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA();
+  OptLEA = ST.LEAusesAG() || IsSlowLEA || IsSlow3OpsLEA;
 
   if (!OptLEA && !OptIncDec)
     return false;
@@ -209,7 +207,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   // Process all basic blocks.
   for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
-    processBasicBlock(Func, I);
+    processBasicBlock(Func, I, IsSlowLEA, IsSlow3OpsLEA);
   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
 
   return true;
@@ -278,14 +276,16 @@ static inline bool isLEA(const int Opcode) {
 }
 
 static inline bool isInefficientLEAReg(unsigned int Reg) {
-  return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13;
+  return Reg == X86::EBP || Reg == X86::RBP ||
+         Reg == X86::R13D || Reg == X86::R13;
 }
 
 static inline bool isRegOperand(const MachineOperand &Op) {
   return Op.isReg() && Op.getReg() != X86::NoRegister;
 }
-/// hasIneffecientLEARegs - LEA that uses base and index registers
-/// where the base is EBP, RBP, or R13
+
+/// Returns true if this LEA uses base an index registers, and the base register
+/// is known to be inefficient for the subtarget.
 // TODO: use a variant scheduling class to model the latency profile
 // of LEA instructions, and implement this logic as a scheduling predicate.
 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
@@ -333,13 +333,12 @@ static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
 static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
   unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
   unsigned DstReg = LEA.getOperand(0).getReg();
-  unsigned AddrDispOp = 1 + X86::AddrDisp;
+  const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
   return SrcReg == DstReg &&
          LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
          LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
-         LEA.getOperand(AddrDispOp).isImm() &&
-         (LEA.getOperand(AddrDispOp).getImm() == 1 ||
-          LEA.getOperand(AddrDispOp).getImm() == -1);
+         AddrDisp.isImm() &&
+         (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
 }
 
 bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
@@ -351,7 +350,7 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
 
   if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
     int NewOpcode;
-    bool isINC = MI.getOperand(4).getImm() == 1;
+    bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
     switch (Opcode) {
     case X86::LEA16r:
       NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
@@ -368,7 +367,7 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
     MachineInstr *NewMI =
         BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
             .add(MI.getOperand(0))
-            .add(MI.getOperand(1));
+            .add(MI.getOperand(1 + X86::AddrBaseReg));
     MFI->erase(I);
     I = static_cast<MachineBasicBlock::iterator>(NewMI);
     return true;
@@ -414,21 +413,29 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
   }
 }
 
-void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
-                                            MachineFunction::iterator MFI) {
+void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
+                                                MachineFunction::iterator MFI) {
   MachineInstr &MI = *I;
   const int Opcode = MI.getOpcode();
   if (!isLEA(Opcode))
     return;
-  if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() ||
+
+  const MachineOperand &Dst =     MI.getOperand(0);
+  const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
+  const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
+  const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
+  const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
+  const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
+
+  if (Segment.getReg() != 0 || !Offset.isImm() ||
       !TII->isSafeToClobberEFLAGS(*MFI, I))
     return;
-  const unsigned DstR = MI.getOperand(0).getReg();
-  const unsigned SrcR1 = MI.getOperand(1).getReg();
-  const unsigned SrcR2 = MI.getOperand(3).getReg();
+  const unsigned DstR = Dst.getReg();
+  const unsigned SrcR1 = Base.getReg();
+  const unsigned SrcR2 = Index.getReg();
   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
     return;
-  if (MI.getOperand(2).getImm() > 1)
+  if (Scale.getImm() > 1)
     return;
   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
@@ -436,19 +443,19 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
   // Make ADD instruction for two registers writing to LEA's destination
   if (SrcR1 != 0 && SrcR2 != 0) {
     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
-    const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1);
+    const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
     NewMI =
         BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
     LLVM_DEBUG(NewMI->dump(););
   }
   // Make ADD instruction for immediate
-  if (MI.getOperand(4).getImm() != 0) {
+  if (Offset.getImm() != 0) {
     const MCInstrDesc &ADDri =
-        TII->get(getADDriFromLEA(Opcode, MI.getOperand(4)));
-    const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
+        TII->get(getADDriFromLEA(Opcode, Offset));
+    const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
     NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
                 .add(SrcR)
-                .addImm(MI.getOperand(4).getImm());
+                .addImm(Offset.getImm());
     LLVM_DEBUG(NewMI->dump(););
   }
   if (NewMI) {
@@ -465,12 +472,12 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
   if (!isLEA(LEAOpcode))
     return nullptr;
 
-  const MachineOperand &Dst = MI.getOperand(0);
-  const MachineOperand &Base = MI.getOperand(1);
-  const MachineOperand &Scale = MI.getOperand(2);
-  const MachineOperand &Index = MI.getOperand(3);
-  const MachineOperand &Offset = MI.getOperand(4);
-  const MachineOperand &Segment = MI.getOperand(5);
+  const MachineOperand &Dst =     MI.getOperand(0);
+  const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
+  const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
+  const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
+  const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
+  const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 
   if (!(TII->isThreeOperandsLEA(MI) ||
         hasInefficientLEABaseReg(Base, Index)) ||
@@ -570,26 +577,28 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
 }
 
 bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
-                                     MachineFunction::iterator MFI) {
-
+                                     MachineFunction::iterator MFI,
+                                     bool IsSlowLEA, bool IsSlow3OpsLEA) {
   for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
     if (OptIncDec)
       if (fixupIncDec(I, MFI))
         continue;
 
     if (OptLEA) {
-      if (MF.getSubtarget<X86Subtarget>().slowLEA())
-        processInstructionForSLM(I, MFI);
-
-      else {
-        if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()) {
-          if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
-            MFI->erase(I);
-            I = NewMI;
-          }
-        } else
-          processInstruction(I, MFI);
+      if (IsSlowLEA) {
+        processInstructionForSlowLEA(I, MFI);
+        continue;
       }
+      
+      if (IsSlow3OpsLEA) {
+        if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
+          MFI->erase(I);
+          I = NewMI;
+        }
+        continue;
+      }
+
+      processInstruction(I, MFI);
     }
   }
   return false;
diff --git a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index d2f2f21542a9..778aa505b2d9 100644
--- a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -1053,7 +1053,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
 
   MIB.addReg(CondReg);
 
-  MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end());
+  MIB.setMemRefs(SetCCI.memoperands());
 
   SetCCI.eraseFromParent();
   return;
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index e207c343fac8..984db12201ed 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -185,7 +185,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
     }
 
     for (auto CS : AvailableRegs)
-      if (!Uses.count(CS) && CS != X86::RIP)
+      if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP &&
+          CS != X86::ESP)
         return CS;
   }
   }
@@ -765,7 +766,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
 
   // FIXME: Add retpoline support and remove this.
-  if (Is64Bit && IsLargeCodeModel && STI.useRetpoline())
+  if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
     report_fatal_error("Emitting stack probe calls on 64-bit with the large "
                        "code model and retpoline not yet implemented.");
 
@@ -1103,15 +1104,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
       NumBytes = alignTo(NumBytes, MaxAlign);
 
-    // Get the offset of the stack slot for the EBP register, which is
-    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
-    // Update the frame offset adjustment.
-    if (!IsFunclet)
-      MFI.setOffsetAdjustment(-NumBytes);
-    else
-      assert(MFI.getOffsetAdjustment() == -(int)NumBytes &&
-             "should calculate same local variable offset for funclets");
-
     // Save EBP/RBP into the appropriate stack slot.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
       .addReg(MachineFramePtr, RegState::Kill)
@@ -1167,6 +1159,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
+  // Update the offset adjustment, which is mainly used by codeview to translate
+  // from ESP to VFRAME relative local variable offsets.
+  if (!IsFunclet) {
+    if (HasFP && TRI->needsStackRealignment(MF))
+      MFI.setOffsetAdjustment(-NumBytes);
+    else
+      MFI.setOffsetAdjustment(-StackSize);
+  }
+
   // For EH funclets, only allocate enough space for outgoing calls. Save the
   // NumBytes value that we would've used for the parent frame.
   unsigned ParentFrameNumBytes = NumBytes;
@@ -1208,6 +1209,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
     BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
+
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
+      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
+          .addImm(MaxAlign)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
   }
 
   // If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -1983,6 +1991,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
   }
 
   X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
+  MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
 
   // Assign slots for XMMs.
   for (unsigned i = CSI.size(); i != 0; --i) {
@@ -2262,9 +2271,15 @@ void X86FrameLowering::adjustForSegmentedStacks(
 
   // Do not generate a prologue for leaf functions with a stack of size zero.
   // For non-leaf functions we have to allow for the possibility that the
-  // call is to a non-split function, as in PR37807.
-  if (StackSize == 0 && !MFI.hasTailCall())
+  // callis to a non-split function, as in PR37807. This function could also
+  // take the address of a non-split function. When the linker tries to adjust
+  // its non-existent prologue, it would fail with an error. Mark the object
+  // file so that such failures are not errors. See this Go language bug-report
+  // https://go-review.googlesource.com/c/go/+/148819/
+  if (StackSize == 0 && !MFI.hasTailCall()) {
+    MF.getMMI().setHasNosplitStack(true);
     return;
+  }
 
   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -2437,7 +2452,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
     // is laid out within 2^31 bytes of each function body, but this seems
     // to be sufficient for JIT.
     // FIXME: Add retpoline support and remove the error here..
-    if (STI.useRetpoline())
+    if (STI.useRetpolineIndirectCalls())
       report_fatal_error("Emitting morestack calls on 64-bit with the large "
                          "code model and retpoline not yet implemented.");
     BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
@@ -2463,8 +2478,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
 
   allocMBB->addSuccessor(&PrologueMBB);
 
-  checkMBB->addSuccessor(allocMBB);
-  checkMBB->addSuccessor(&PrologueMBB);
+  checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
+  checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
 
 #ifdef EXPENSIVE_CHECKS
   MF.verify();
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index a28d4eac8393..5ac153244df9 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -41,6 +41,10 @@ using namespace llvm;
 
 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
 
+static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
+    cl::desc("Enable setting constant bits to reduce size of mask immediates"),
+    cl::Hidden);
+
 //===----------------------------------------------------------------------===//
 //                      Pattern Matcher Implementation
 //===----------------------------------------------------------------------===//
@@ -161,6 +165,9 @@ namespace {
     /// If true, selector should try to optimize for minimum code size.
     bool OptForMinSize;
 
+    /// Disable direct TLS access through segment registers.
+    bool IndirectTlsSegRefs;
+
   public:
     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
         : SelectionDAGISel(tm, OptLevel), OptForSize(false),
@@ -173,6 +180,8 @@ namespace {
     bool runOnMachineFunction(MachineFunction &MF) override {
       // Reset the subtarget each time through.
       Subtarget = &MF.getSubtarget<X86Subtarget>();
+      IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
+                             "indirect-tls-seg-refs");
       SelectionDAGISel::runOnMachineFunction(MF);
       return true;
     }
@@ -235,12 +244,6 @@ namespace {
       return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
     }
 
-    // Try to fold a vector load. This makes sure the load isn't non-temporal.
-    bool tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
-                        SDValue &Base, SDValue &Scale,
-                        SDValue &Index, SDValue &Disp,
-                        SDValue &Segment);
-
     /// Implement addressing mode selection for inline asm expressions.
     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                       unsigned ConstraintID,
@@ -443,6 +446,9 @@ namespace {
 
       switch (StoreSize) {
       default: llvm_unreachable("Unsupported store size");
+      case 4:
+      case 8:
+        return false;
       case 16:
         return Subtarget->hasSSE41();
       case 32:
@@ -453,15 +459,23 @@ namespace {
     }
 
     bool foldLoadStoreIntoMemOperand(SDNode *Node);
-    bool matchBEXTRFromAnd(SDNode *Node);
+    MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
+    bool matchBitExtract(SDNode *Node);
     bool shrinkAndImmediate(SDNode *N);
     bool isMaskZeroExtended(SDNode *N) const;
+    bool tryShiftAmountMod(SDNode *N);
 
     MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
                                 const SDLoc &dl, MVT VT, SDNode *Node);
     MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
                                 const SDLoc &dl, MVT VT, SDNode *Node,
                                 SDValue &InFlag);
+
+    bool tryOptimizeRem8Extend(SDNode *N);
+
+    bool onlyUsesZeroFlag(SDValue Flags) const;
+    bool hasNoSignFlagUses(SDValue Flags) const;
+    bool hasNoCarryFlagUses(SDValue Flags) const;
   };
 }
 
@@ -512,12 +526,18 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
   if (N.getOpcode() != ISD::LOAD)
     return true;
 
+  // Don't fold non-temporal loads if we have an instruction for them.
+  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
+    return false;
+
   // If N is a load, do additional profitability checks.
   if (U == Root) {
     switch (U->getOpcode()) {
     default: break;
     case X86ISD::ADD:
+    case X86ISD::ADC:
     case X86ISD::SUB:
+    case X86ISD::SBB:
     case X86ISD::AND:
     case X86ISD::XOR:
     case X86ISD::OR:
@@ -724,7 +744,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     if (OptLevel != CodeGenOpt::None &&
         // Only do this when the target can fold the load into the call or
         // jmp.
-        !Subtarget->useRetpoline() &&
+        !Subtarget->useRetpolineIndirectCalls() &&
         ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
          (N->getOpcode() == X86ISD::TC_RETURN &&
           (Subtarget->is64Bit() ||
@@ -827,24 +847,144 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
   }
 }
 
+// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
+bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
+  unsigned Opc = N->getMachineOpcode();
+  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
+      Opc != X86::MOVSX64rr8)
+    return false;
+
+  SDValue N0 = N->getOperand(0);
+
+  // We need to be extracting the lower bit of an extend.
+  if (!N0.isMachineOpcode() ||
+      N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
+      N0.getConstantOperandVal(1) != X86::sub_8bit)
+    return false;
+
+  // We're looking for either a movsx or movzx to match the original opcode.
+  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
+                                                : X86::MOVSX32rr8_NOREX;
+  SDValue N00 = N0.getOperand(0);
+  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
+    return false;
+
+  if (Opc == X86::MOVSX64rr8) {
+    // If we had a sign extend from 8 to 64 bits. We still need to go from 32
+    // to 64.
+    MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
+                                                   MVT::i64, N00);
+    ReplaceUses(N, Extend);
+  } else {
+    // Ok we can drop this extend and just use the original extend.
+    ReplaceUses(N, N00.getNode());
+  }
+
+  return true;
+}
 
 void X86DAGToDAGISel::PostprocessISelDAG() {
   // Skip peepholes at -O0.
   if (TM.getOptLevel() == CodeGenOpt::None)
     return;
 
-  // Attempt to remove vectors moves that were inserted to zero upper bits.
-
-  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
-  ++Position;
+  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
 
+  bool MadeChange = false;
   while (Position != CurDAG->allnodes_begin()) {
     SDNode *N = &*--Position;
     // Skip dead nodes and any non-machine opcodes.
     if (N->use_empty() || !N->isMachineOpcode())
       continue;
 
-    if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
+    if (tryOptimizeRem8Extend(N)) {
+      MadeChange = true;
+      continue;
+    }
+
+    // Look for a TESTrr+ANDrr pattern where both operands of the test are
+    // the same. Rewrite to remove the AND.
+    unsigned Opc = N->getMachineOpcode();
+    if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
+         Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
+        N->getOperand(0) == N->getOperand(1) &&
+        N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+        N->getOperand(0).isMachineOpcode()) {
+      SDValue And = N->getOperand(0);
+      unsigned N0Opc = And.getMachineOpcode();
+      if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
+          N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
+        MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
+                                                     MVT::i32,
+                                                     And.getOperand(0),
+                                                     And.getOperand(1));
+        ReplaceUses(N, Test);
+        MadeChange = true;
+        continue;
+      }
+      if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
+          N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
+        unsigned NewOpc;
+        switch (N0Opc) {
+        case X86::AND8rm:  NewOpc = X86::TEST8mr; break;
+        case X86::AND16rm: NewOpc = X86::TEST16mr; break;
+        case X86::AND32rm: NewOpc = X86::TEST32mr; break;
+        case X86::AND64rm: NewOpc = X86::TEST64mr; break;
+        }
+
+        // Need to swap the memory and register operand.
+        SDValue Ops[] = { And.getOperand(1),
+                          And.getOperand(2),
+                          And.getOperand(3),
+                          And.getOperand(4),
+                          And.getOperand(5),
+                          And.getOperand(0),
+                          And.getOperand(6)  /* Chain */ };
+        MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+                                                     MVT::i32, MVT::Other, Ops);
+        ReplaceUses(N, Test);
+        MadeChange = true;
+        continue;
+      }
+    }
+
+    // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
+    // used. We're doing this late so we can prefer to fold the AND into masked
+    // comparisons. Doing that can be better for the live range of the mask
+    // register.
+    if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
+         Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
+        N->getOperand(0) == N->getOperand(1) &&
+        N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+        N->getOperand(0).isMachineOpcode() &&
+        onlyUsesZeroFlag(SDValue(N, 0))) {
+      SDValue And = N->getOperand(0);
+      unsigned N0Opc = And.getMachineOpcode();
+      // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
+      // KAND instructions and KTEST use the same ISA feature.
+      if (N0Opc == X86::KANDBrr ||
+          (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
+          N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
+        unsigned NewOpc;
+        switch (Opc) {
+        default: llvm_unreachable("Unexpected opcode!");
+        case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
+        case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
+        case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
+        case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
+        }
+        MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
+                                                      MVT::i32,
+                                                      And.getOperand(0),
+                                                      And.getOperand(1));
+        ReplaceUses(N, KTest);
+        MadeChange = true;
+        continue;
+      }
+    }
+
+    // Attempt to remove vectors moves that were inserted to zero upper bits.
+    if (Opc != TargetOpcode::SUBREG_TO_REG)
       continue;
 
     unsigned SubRegIdx = N->getConstantOperandVal(2);
@@ -881,14 +1021,22 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
         In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
       continue;
 
+    // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
+    // the SHA instructions which use a legacy encoding.
+    uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
+    if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
+        (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
+        (TSFlags & X86II::EncodingMask) != X86II::XOP)
+      continue;
+
     // Producing instruction is another vector instruction. We can drop the
     // move.
     CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
-
-    // If the move is now dead, delete it.
-    if (Move.getNode()->use_empty())
-      CurDAG->RemoveDeadNode(Move.getNode());
+    MadeChange = true;
   }
+
+  if (MadeChange)
+    CurDAG->RemoveDeadNodes();
 }
 
 
@@ -964,6 +1112,7 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   // For more information see http://people.redhat.com/drepper/tls.pdf
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
     if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
+        !IndirectTlsSegRefs &&
         (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
          Subtarget->isTargetFuchsia()))
       switch (N->getPointerInfo().getAddrSpace()) {
@@ -1291,8 +1440,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   }
   APInt MaskedHighBits =
     APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
-  KnownBits Known;
-  DAG.computeKnownBits(X, Known);
+  KnownBits Known = DAG.computeKnownBits(X);
   if (MaskedHighBits != Known.Zero) return true;
 
   // We've identified a pattern that can be transformed into a single shift
@@ -1327,6 +1475,64 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   return false;
 }
 
+// Transform "(X >> SHIFT) & (MASK << C1)" to
+// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
+// matched to a BEXTR later. Returns false if the simplification is performed.
+static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
+                                   uint64_t Mask,
+                                   SDValue Shift, SDValue X,
+                                   X86ISelAddressMode &AM,
+                                   const X86Subtarget &Subtarget) {
+  if (Shift.getOpcode() != ISD::SRL ||
+      !isa<ConstantSDNode>(Shift.getOperand(1)) ||
+      !Shift.hasOneUse() || !N.hasOneUse())
+    return true;
+
+  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
+  if (!Subtarget.hasTBM() &&
+      !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
+    return true;
+
+  // We need to ensure that mask is a continuous run of bits.
+  if (!isShiftedMask_64(Mask)) return true;
+
+  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+
+  // The amount of shift we're trying to fit into the addressing mode is taken
+  // from the trailing zeros of the mask.
+  unsigned AMShiftAmt = countTrailingZeros(Mask);
+
+  // There is nothing we can do here unless the mask is removing some bits.
+  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+  MVT VT = N.getSimpleValueType();
+  SDLoc DL(N);
+  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
+  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
+  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
+  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
+  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
+
+  // Insert the new nodes into the topological ordering. We must do this in
+  // a valid topological ordering as nothing is going to go back and re-sort
+  // these nodes. We continually insert before 'N' in sequence as this is
+  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+  // hierarchy left to express.
+  insertDAGNode(DAG, N, NewSRLAmt);
+  insertDAGNode(DAG, N, NewSRL);
+  insertDAGNode(DAG, N, NewMask);
+  insertDAGNode(DAG, N, NewAnd);
+  insertDAGNode(DAG, N, NewSHLAmt);
+  insertDAGNode(DAG, N, NewSHL);
+  DAG.ReplaceAllUsesWith(N, NewSHL);
+
+  AM.Scale = 1 << AMShiftAmt;
+  AM.IndexReg = NewAnd;
+  return false;
+}
+
 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
   SDLoc dl(N);
@@ -1607,6 +1813,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // a scale on the outside of the mask.
     if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
       return false;
+
+    // Try to fold the mask and shift into BEXTR and scale.
+    if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+      return false;
+
     break;
   }
   }
@@ -2039,20 +2250,6 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
                     N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
-bool X86DAGToDAGISel::tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
-                                     SDValue &Base, SDValue &Scale,
-                                     SDValue &Index, SDValue &Disp,
-                                     SDValue &Segment) {
-  if (!ISD::isNON_EXTLoad(N.getNode()) ||
-      useNonTemporalLoad(cast<LoadSDNode>(N)) ||
-      !IsProfitableToFold(N, P, Root) ||
-      !IsLegalToFold(N, P, Root, OptLevel))
-    return false;
-
-  return selectAddr(N.getNode(),
-                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
-}
-
 /// Return an SDNode that returns the value of the global base register.
 /// Output instructions required to initialize the global base register,
 /// if necessary.
@@ -2077,18 +2274,30 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
          CR->getSignedMax().slt(1ull << Width);
 }
 
-/// Test whether the given X86ISD::CMP node has any uses which require the SF
-/// or OF bits to be accurate.
-static bool hasNoSignedComparisonUses(SDNode *N) {
+static X86::CondCode getCondFromOpc(unsigned Opc) {
+  X86::CondCode CC = X86::COND_INVALID;
+  if (CC == X86::COND_INVALID)
+    CC = X86::getCondFromBranchOpc(Opc);
+  if (CC == X86::COND_INVALID)
+    CC = X86::getCondFromSETOpc(Opc);
+  if (CC == X86::COND_INVALID)
+    CC = X86::getCondFromCMovOpc(Opc);
+
+  return CC;
+}
+
+/// Test whether the given X86ISD::CMP node has any users that use a flag
+/// other than ZF.
+bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
   // Examine each user of the node.
-  for (SDNode::use_iterator UI = N->use_begin(),
-         UE = N->use_end(); UI != UE; ++UI) {
-    // Only examine CopyToReg uses.
-    if (UI->getOpcode() != ISD::CopyToReg)
-      return false;
+  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+         UI != UE; ++UI) {
+    // Only check things that use the flags.
+    if (UI.getUse().getResNo() != Flags.getResNo())
+      continue;
     // Only examine CopyToReg uses that copy to EFLAGS.
-    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
-          X86::EFLAGS)
+    if (UI->getOpcode() != ISD::CopyToReg ||
+        cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
       return false;
     // Examine each user of the CopyToReg use.
     for (SDNode::use_iterator FlagUI = UI->use_begin(),
@@ -2097,105 +2306,52 @@ static bool hasNoSignedComparisonUses(SDNode *N) {
       if (FlagUI.getUse().getResNo() != 1) continue;
       // Anything unusual: assume conservatively.
       if (!FlagUI->isMachineOpcode()) return false;
-      // Examine the opcode of the user.
-      switch (FlagUI->getMachineOpcode()) {
-      // These comparisons don't treat the most significant bit specially.
-      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
-      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
-      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
-      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
-      case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
-      case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
-      case X86::CMOVA16rr: case X86::CMOVA16rm:
-      case X86::CMOVA32rr: case X86::CMOVA32rm:
-      case X86::CMOVA64rr: case X86::CMOVA64rm:
-      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
-      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
-      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
-      case X86::CMOVB16rr: case X86::CMOVB16rm:
-      case X86::CMOVB32rr: case X86::CMOVB32rm:
-      case X86::CMOVB64rr: case X86::CMOVB64rm:
-      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
-      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
-      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
-      case X86::CMOVE16rr: case X86::CMOVE16rm:
-      case X86::CMOVE32rr: case X86::CMOVE32rm:
-      case X86::CMOVE64rr: case X86::CMOVE64rm:
-      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
-      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
-      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
-      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
-      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
-      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
-      case X86::CMOVP16rr: case X86::CMOVP16rm:
-      case X86::CMOVP32rr: case X86::CMOVP32rm:
-      case X86::CMOVP64rr: case X86::CMOVP64rm:
+      // Examine the condition code of the user.
+      X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+      switch (CC) {
+      // Comparisons which only use the zero flag.
+      case X86::COND_E: case X86::COND_NE:
         continue;
       // Anything else: assume conservatively.
-      default: return false;
+      default:
+        return false;
       }
     }
   }
   return true;
 }
 
-/// Test whether the given node which sets flags has any uses which require the
-/// CF flag to be accurate.
-static bool hasNoCarryFlagUses(SDNode *N) {
+/// Test whether the given X86ISD::CMP node has any uses which require the SF
+/// flag to be accurate.
+bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
   // Examine each user of the node.
-  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
-       ++UI) {
+  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+         UI != UE; ++UI) {
     // Only check things that use the flags.
-    if (UI.getUse().getResNo() != 1)
+    if (UI.getUse().getResNo() != Flags.getResNo())
       continue;
-    // Only examine CopyToReg uses.
-    if (UI->getOpcode() != ISD::CopyToReg)
-      return false;
     // Only examine CopyToReg uses that copy to EFLAGS.
-    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+    if (UI->getOpcode() != ISD::CopyToReg ||
+        cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
       return false;
     // Examine each user of the CopyToReg use.
-    for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
-         FlagUI != FlagUE; ++FlagUI) {
+    for (SDNode::use_iterator FlagUI = UI->use_begin(),
+           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
       // Only examine the Flag result.
-      if (FlagUI.getUse().getResNo() != 1)
-        continue;
+      if (FlagUI.getUse().getResNo() != 1) continue;
       // Anything unusual: assume conservatively.
-      if (!FlagUI->isMachineOpcode())
-        return false;
-      // Examine the opcode of the user.
-      switch (FlagUI->getMachineOpcode()) {
-      // Comparisons which don't examine the CF flag.
-      case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
-      case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
-      case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
-      case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
-      case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
-      case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
-      case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
-      case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
-      case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
-      case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
-      case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
-      case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
-      case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
-      case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
-      case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
-      case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
-      case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
-      case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
-      case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
-      case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
-      case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
-      case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
-      case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
-      case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
-      case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
-      case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
-      case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
-      case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
-      case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
-      case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
+      if (!FlagUI->isMachineOpcode()) return false;
+      // Examine the condition code of the user.
+      X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+      switch (CC) {
+      // Comparisons which don't examine the SF flag.
+      case X86::COND_A: case X86::COND_AE:
+      case X86::COND_B: case X86::COND_BE:
+      case X86::COND_E: case X86::COND_NE:
+      case X86::COND_O: case X86::COND_NO:
+      case X86::COND_P: case X86::COND_NP:
         continue;
       // Anything else: assume conservatively.
       default:
@@ -2206,23 +2362,96 @@ static bool hasNoCarryFlagUses(SDNode *N) {
   return true;
 }
 
+static bool mayUseCarryFlag(X86::CondCode CC) {
+  switch (CC) {
+  // Comparisons which don't examine the CF flag.
+  case X86::COND_O: case X86::COND_NO:
+  case X86::COND_E: case X86::COND_NE:
+  case X86::COND_S: case X86::COND_NS:
+  case X86::COND_P: case X86::COND_NP:
+  case X86::COND_L: case X86::COND_GE:
+  case X86::COND_G: case X86::COND_LE:
+    return false;
+  // Anything else: assume conservatively.
+  default:
+    return true;
+  }
+}
+
+/// Test whether the given node which sets flags has any uses which require the
+/// CF flag to be accurate.
+ bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
+  // Examine each user of the node.
+  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+         UI != UE; ++UI) {
+    // Only check things that use the flags.
+    if (UI.getUse().getResNo() != Flags.getResNo())
+      continue;
+
+    unsigned UIOpc = UI->getOpcode();
+
+    if (UIOpc == ISD::CopyToReg) {
+      // Only examine CopyToReg uses that copy to EFLAGS.
+      if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+        return false;
+      // Examine each user of the CopyToReg use.
+      for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
+           FlagUI != FlagUE; ++FlagUI) {
+        // Only examine the Flag result.
+        if (FlagUI.getUse().getResNo() != 1)
+          continue;
+        // Anything unusual: assume conservatively.
+        if (!FlagUI->isMachineOpcode())
+          return false;
+        // Examine the condition code of the user.
+        X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+        if (mayUseCarryFlag(CC))
+          return false;
+      }
+
+      // This CopyToReg is ok. Move on to the next user.
+      continue;
+    }
+
+    // This might be an unselected node. So look for the pre-isel opcodes that
+    // use flags.
+    unsigned CCOpNo;
+    switch (UIOpc) {
+    default:
+      // Something unusual. Be conservative.
+      return false;
+    case X86ISD::SETCC:       CCOpNo = 0; break;
+    case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+    case X86ISD::CMOV:        CCOpNo = 2; break;
+    case X86ISD::BRCOND:      CCOpNo = 2; break;
+    }
+
+    X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
+    if (mayUseCarryFlag(CC))
+      return false;
+  }
+  return true;
+}
+
 /// Check whether or not the chain ending in StoreNode is suitable for doing
 /// the {load; op; store} to modify transformation.
 static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
                                         SDValue StoredVal, SelectionDAG *CurDAG,
+                                        unsigned LoadOpNo,
                                         LoadSDNode *&LoadNode,
                                         SDValue &InputChain) {
-  // is the stored value result 0 of the load?
+  // Is the stored value result 0 of the operation?
   if (StoredVal.getResNo() != 0) return false;
 
-  // are there other uses of the loaded value than the inc or dec?
+  // Are there other uses of the operation other than the store?
   if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
 
-  // is the store non-extending and non-indexed?
+  // Is the store non-extending and non-indexed?
   if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
     return false;
 
-  SDValue Load = StoredVal->getOperand(0);
+  SDValue Load = StoredVal->getOperand(LoadOpNo);
   // Is the stored value a non-extending and non-indexed load?
   if (!ISD::isNormalLoad(Load.getNode())) return false;
 
@@ -2351,26 +2580,37 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
   if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
       MemVT != MVT::i8)
     return false;
+
+  bool IsCommutable = false;
   switch (Opc) {
   default:
     return false;
-  case X86ISD::INC:
-  case X86ISD::DEC:
-  case X86ISD::ADD:
-  case X86ISD::ADC:
   case X86ISD::SUB:
   case X86ISD::SBB:
+    break;
+  case X86ISD::ADD:
+  case X86ISD::ADC:
   case X86ISD::AND:
   case X86ISD::OR:
   case X86ISD::XOR:
+    IsCommutable = true;
     break;
   }
 
+  unsigned LoadOpNo = 0;
   LoadSDNode *LoadNode = nullptr;
   SDValue InputChain;
-  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
-                                   InputChain))
-    return false;
+  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
+                                   LoadNode, InputChain)) {
+    if (!IsCommutable)
+      return false;
+
+    // This operation is commutable, try the other operand.
+    LoadOpNo = 1;
+    if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
+                                     LoadNode, InputChain))
+      return false;
+  }
 
   SDValue Base, Scale, Index, Disp, Segment;
   if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
@@ -2395,20 +2635,27 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
 
   MachineSDNode *Result;
   switch (Opc) {
-  case X86ISD::INC:
-  case X86ISD::DEC: {
-    unsigned NewOpc =
-        Opc == X86ISD::INC
-            ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
-            : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
-    const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
-    Result =
-        CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
-    break;
-  }
   case X86ISD::ADD:
-  case X86ISD::ADC:
   case X86ISD::SUB:
+    // Try to match inc/dec.
+    if (!Subtarget->slowIncDec() ||
+        CurDAG->getMachineFunction().getFunction().optForSize()) {
+      bool IsOne = isOneConstant(StoredVal.getOperand(1));
+      bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
+      // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
+      if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
+        unsigned NewOpc = 
+          ((Opc == X86ISD::ADD) == IsOne)
+              ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
+              : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
+        const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+        Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+                                        MVT::Other, Ops);
+        break;
+      }
+    }
+    LLVM_FALLTHROUGH;
+  case X86ISD::ADC:
   case X86ISD::SBB:
   case X86ISD::AND:
   case X86ISD::OR:
@@ -2488,7 +2735,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
     };
 
     unsigned NewOpc = SelectRegOpcode(Opc);
-    SDValue Operand = StoredVal->getOperand(1);
+    SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
 
     // See if the operand is a constant that we can fold into an immediate
     // operand.
@@ -2503,7 +2750,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
             (-OperandV).getMinSignedBits() <= 8) ||
            (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
             (-OperandV).getMinSignedBits() <= 32)) &&
-          hasNoCarryFlagUses(StoredVal.getNode())) {
+          hasNoCarryFlagUses(StoredVal.getValue(1))) {
         OperandV = -OperandV;
         Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
       }
@@ -2541,10 +2788,9 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
     llvm_unreachable("Invalid opcode!");
   }
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
-  MemOp[0] = StoreNode->getMemOperand();
-  MemOp[1] = LoadNode->getMemOperand();
-  Result->setMemRefs(MemOp, MemOp + 2);
+  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
+                                 LoadNode->getMemOperand()};
+  CurDAG->setNodeMemRefs(Result, MemOps);
 
   // Update Load Chain uses as well.
   ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
@@ -2554,39 +2800,273 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
   return true;
 }
 
+// See if this is an  X & Mask  that we can match to BEXTR/BZHI.
+// Where Mask is one of the following patterns:
+//   a) x &  (1 << nbits) - 1
+//   b) x & ~(-1 << nbits)
+//   c) x &  (-1 >> (32 - y))
+//   d) x << (32 - y) >> (32 - y)
+bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
+  assert(
+      (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
+      "Should be either an and-mask, or right-shift after clearing high bits.");
+
+  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
+  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
+    return false;
+
+  MVT NVT = Node->getSimpleValueType(0);
+
+  // Only supported for 32 and 64 bits.
+  if (NVT != MVT::i32 && NVT != MVT::i64)
+    return false;
+
+  unsigned Size = NVT.getSizeInBits();
+
+  SDValue NBits;
+
+  // If we have BMI2's BZHI, we are ok with muti-use patterns.
+  // Else, if we only have BMI1's BEXTR, we require one-use.
+  const bool CanHaveExtraUses = Subtarget->hasBMI2();
+  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
+    return CanHaveExtraUses ||
+           Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
+  };
+  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
+  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+
+  // a) x & ((1 << nbits) + (-1))
+  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+    // Match `add`. Must only have one use!
+    if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
+      return false;
+    // We should be adding all-ones constant (i.e. subtracting one.)
+    if (!isAllOnesConstant(Mask->getOperand(1)))
+      return false;
+    // Match `1 << nbits`. Must only have one use!
+    SDValue M0 = Mask->getOperand(0);
+    if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
+      return false;
+    if (!isOneConstant(M0->getOperand(0)))
+      return false;
+    NBits = M0->getOperand(1);
+    return true;
+  };
+
+  // b) x & ~(-1 << nbits)
+  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+    // Match `~()`. Must only have one use!
+    if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
+      return false;
+    // Match `-1 << nbits`. Must only have one use!
+    SDValue M0 = Mask->getOperand(0);
+    if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
+      return false;
+    if (!isAllOnesConstant(M0->getOperand(0)))
+      return false;
+    NBits = M0->getOperand(1);
+    return true;
+  };
+
+  // Match potentially-truncated (bitwidth - y)
+  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
+    // Skip over a truncate of the shift amount.
+    if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
+      ShiftAmt = ShiftAmt.getOperand(0);
+      // The trunc should have been the only user of the real shift amount.
+      if (!checkOneUse(ShiftAmt))
+        return false;
+    }
+    // Match the shift amount as: (bitwidth - y). It should go away, too.
+    if (ShiftAmt.getOpcode() != ISD::SUB)
+      return false;
+    auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
+    if (!V0 || V0->getZExtValue() != Size)
+      return false;
+    NBits = ShiftAmt.getOperand(1);
+    return true;
+  };
+
+  // c) x &  (-1 >> (32 - y))
+  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
+    // Match `l>>`. Must only have one use!
+    if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
+      return false;
+    // We should be shifting all-ones constant.
+    if (!isAllOnesConstant(Mask.getOperand(0)))
+      return false;
+    SDValue M1 = Mask.getOperand(1);
+    // The shift amount should not be used externally.
+    if (!checkOneUse(M1))
+      return false;
+    return matchShiftAmt(M1);
+  };
+
+  SDValue X;
+
+  // d) x << (32 - y) >> (32 - y)
+  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
+                        &X](SDNode *Node) -> bool {
+    if (Node->getOpcode() != ISD::SRL)
+      return false;
+    SDValue N0 = Node->getOperand(0);
+    if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
+      return false;
+    SDValue N1 = Node->getOperand(1);
+    SDValue N01 = N0->getOperand(1);
+    // Both of the shifts must be by the exact same value.
+    // There should not be any uses of the shift amount outside of the pattern.
+    if (N1 != N01 || !checkTwoUse(N1))
+      return false;
+    if (!matchShiftAmt(N1))
+      return false;
+    X = N0->getOperand(0);
+    return true;
+  };
+
+  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
+                          &matchPatternC](SDValue Mask) -> bool {
+    // FIXME: pattern c.
+    return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
+  };
+
+  if (Node->getOpcode() == ISD::AND) {
+    X = Node->getOperand(0);
+    SDValue Mask = Node->getOperand(1);
+
+    if (matchLowBitMask(Mask)) {
+      // Great.
+    } else {
+      std::swap(X, Mask);
+      if (!matchLowBitMask(Mask))
+        return false;
+    }
+  } else if (!matchPatternD(Node))
+    return false;
+
+  SDLoc DL(Node);
+
+  // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
+  // shifted (potentially with one-use trunc inbetween),
+  // and if so look past one-use truncation.
+  MVT XVT = NVT;
+  if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
+      X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
+    assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
+    X = X.getOperand(0);
+    XVT = X.getSimpleValueType();
+    assert(XVT == MVT::i64 && "Expected truncation from i64");
+  }
+
+  SDValue OrigNBits = NBits;
+  if (NBits.getValueType() != XVT) {
+    // Truncate the shift amount.
+    NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
+    insertDAGNode(*CurDAG, OrigNBits, NBits);
+
+    // Insert 8-bit NBits into lowest 8 bits of XVT-sized (32 or 64-bit)
+    // register. All the other bits are undefined, we do not care about them.
+    SDValue ImplDef =
+        SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
+    insertDAGNode(*CurDAG, OrigNBits, ImplDef);
+    NBits =
+        CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, XVT, ImplDef, NBits);
+    insertDAGNode(*CurDAG, OrigNBits, NBits);
+  }
+
+  if (Subtarget->hasBMI2()) {
+    // Great, just emit the the BZHI..
+    SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
+    ReplaceNode(Node, Extract.getNode());
+    SelectCode(Extract.getNode());
+    return true;
+  }
+
+  // Else, emitting BEXTR requires one more step.
+  // The 'control' of BEXTR has the pattern of:
+  // [15...8 bit][ 7...0 bit] location
+  // [ bit count][     shift] name
+  // I.e. 0b000000011'00000001 means  (x >> 0b1) & 0b11
+
+  // Shift NBits left by 8 bits, thus producing 'control'.
+  // This makes the low 8 bits to be zero.
+  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
+  SDValue Control = CurDAG->getNode(ISD::SHL, DL, XVT, NBits, C8);
+  insertDAGNode(*CurDAG, OrigNBits, Control);
+
+  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
+  if (X.getOpcode() == ISD::SRL) {
+    SDValue ShiftAmt = X.getOperand(1);
+    X = X.getOperand(0);
+
+    assert(ShiftAmt.getValueType() == MVT::i8 &&
+           "Expected shift amount to be i8");
+
+    // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
+    SDValue OrigShiftAmt = ShiftAmt;
+    ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, XVT, ShiftAmt);
+    insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
+
+    // And now 'or' these low 8 bits of shift amount into the 'control'.
+    Control = CurDAG->getNode(ISD::OR, DL, XVT, Control, ShiftAmt);
+    insertDAGNode(*CurDAG, OrigNBits, Control);
+  }
+
+  // And finally, form the BEXTR itself.
+  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
+
+  // The 'X' was originally truncated. Do that now.
+  if (XVT != NVT) {
+    insertDAGNode(*CurDAG, OrigNBits, Extract);
+    Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
+  }
+
+  ReplaceNode(Node, Extract.getNode());
+  SelectCode(Extract.getNode());
+
+  return true;
+}
+
 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
-bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
+MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
   MVT NVT = Node->getSimpleValueType(0);
   SDLoc dl(Node);
 
   SDValue N0 = Node->getOperand(0);
   SDValue N1 = Node->getOperand(1);
 
-  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
-    return false;
+  // If we have TBM we can use an immediate for the control. If we have BMI
+  // we should only do this if the BEXTR instruction is implemented well.
+  // Otherwise moving the control into a register makes this more costly.
+  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+  // hoisting the move immediate would make it worthwhile with a less optimal
+  // BEXTR?
+  if (!Subtarget->hasTBM() &&
+      !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
+    return nullptr;
 
   // Must have a shift right.
   if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
-    return false;
+    return nullptr;
 
   // Shift can't have additional users.
   if (!N0->hasOneUse())
-    return false;
+    return nullptr;
 
   // Only supported for 32 and 64 bits.
   if (NVT != MVT::i32 && NVT != MVT::i64)
-    return false;
+    return nullptr;
 
   // Shift amount and RHS of and must be constant.
   ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
   ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   if (!MaskCst || !ShiftCst)
-    return false;
+    return nullptr;
 
   // And RHS must be a mask.
   uint64_t Mask = MaskCst->getZExtValue();
   if (!isMask_64(Mask))
-    return false;
+    return nullptr;
 
   uint64_t Shift = ShiftCst->getZExtValue();
   uint64_t MaskSize = countPopulation(Mask);
@@ -2594,20 +3074,41 @@ bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
   // Don't interfere with something that can be handled by extracting AH.
   // TODO: If we are able to fold a load, BEXTR might still be better than AH.
   if (Shift == 8 && MaskSize == 8)
-    return false;
+    return nullptr;
 
   // Make sure we are only using bits that were in the original value, not
   // shifted in.
   if (Shift + MaskSize > NVT.getSizeInBits())
-    return false;
+    return nullptr;
+
+  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
+  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
+  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
+
+  // BMI requires the immediate to placed in a register.
+  if (!Subtarget->hasTBM()) {
+    ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
+    MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+    unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
+    New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
+  }
 
-  // Create a BEXTR node and run it through selection.
-  SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
-  SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
-                                N0->getOperand(0), C);
-  ReplaceNode(Node, New.getNode());
-  SelectCode(New.getNode());
-  return true;
+  MachineSDNode *NewNode;
+  SDValue Input = N0->getOperand(0);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
+    SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+    NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+    // Update the chain.
+    ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
+    // Record the mem-refs
+    CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
+  } else {
+    NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
+  }
+
+  return NewNode;
 }
 
 // Emit a PCMISTR(I/M) instruction.
@@ -2620,23 +3121,17 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
   const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
   Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
 
-  // If there is a load, it will be behind a bitcast. We don't need to check
-  // alignment on this load.
+  // Try to fold a load. No need to check alignment.
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (MayFoldLoad && N1->getOpcode() == ISD::BITCAST && N1->hasOneUse() &&
-      tryFoldVecLoad(Node, N1.getNode(), N1.getOperand(0), Tmp0, Tmp1, Tmp2,
-                     Tmp3, Tmp4)) {
-    SDValue Load = N1.getOperand(0);
+  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
     SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
-                      Load.getOperand(0) };
+                      N1.getOperand(0) };
     SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
     MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
     // Update the chain.
-    ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
+    ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
     // Record the mem-refs
-    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-    MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
-    CNode->setMemRefs(MemOp, MemOp + 1);
+    CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
     return CNode;
   }
 
@@ -2659,24 +3154,18 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
   const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
   Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
 
-  // If there is a load, it will be behind a bitcast. We don't need to check
-  // alignment on this load.
+  // Try to fold a load. No need to check alignment.
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (MayFoldLoad && N2->getOpcode() == ISD::BITCAST && N2->hasOneUse() &&
-      tryFoldVecLoad(Node, N2.getNode(), N2.getOperand(0), Tmp0, Tmp1, Tmp2,
-                     Tmp3, Tmp4)) {
-    SDValue Load = N2.getOperand(0);
+  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
     SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
-                      Load.getOperand(0), InFlag };
+                      N2.getOperand(0), InFlag };
     SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
     MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
     InFlag = SDValue(CNode, 3);
     // Update the chain.
-    ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
+    ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
     // Record the mem-refs
-    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-    MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
-    CNode->setMemRefs(MemOp, MemOp + 1);
+    CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
     return CNode;
   }
 
@@ -2687,6 +3176,93 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
   return CNode;
 }
 
+bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  // Only handle scalar shifts.
+  if (VT.isVector())
+    return false;
+
+  // Narrower shifts only mask to 5 bits in hardware.
+  unsigned Size = VT == MVT::i64 ? 64 : 32;
+
+  SDValue OrigShiftAmt = N->getOperand(1);
+  SDValue ShiftAmt = OrigShiftAmt;
+  SDLoc DL(N);
+
+  // Skip over a truncate of the shift amount.
+  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
+    ShiftAmt = ShiftAmt->getOperand(0);
+
+  // This function is called after X86DAGToDAGISel::matchBitExtract(),
+  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
+
+  SDValue NewShiftAmt;
+  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
+    SDValue Add0 = ShiftAmt->getOperand(0);
+    SDValue Add1 = ShiftAmt->getOperand(1);
+    // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+    // to avoid the ADD/SUB.
+    if (isa<ConstantSDNode>(Add1) &&
+        cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
+      NewShiftAmt = Add0;
+    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+    // generate a NEG instead of a SUB of a constant.
+    } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+               isa<ConstantSDNode>(Add0) &&
+               cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
+               cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
+      // Insert a negate op.
+      // TODO: This isn't guaranteed to replace the sub if there is a logic cone
+      // that uses it that's not a shift.
+      EVT SubVT = ShiftAmt.getValueType();
+      SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
+      SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
+      NewShiftAmt = Neg;
+
+      // Insert these operands into a valid topological order so they can
+      // get selected independently.
+      insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
+      insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
+    } else
+      return false;
+  } else
+    return false;
+
+  if (NewShiftAmt.getValueType() != MVT::i8) {
+    // Need to truncate the shift amount.
+    NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
+    // Add to a correct topological ordering.
+    insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+  }
+
+  // Insert a new mask to keep the shift amount legal. This should be removed
+  // by isel patterns.
+  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
+                                CurDAG->getConstant(Size - 1, DL, MVT::i8));
+  // Place in a correct topological ordering.
+  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+
+  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+                                                   NewShiftAmt);
+  if (UpdatedNode != N) {
+    // If we found an existing node, we should replace ourselves with that node
+    // and wait for it to be selected after its other users.
+    ReplaceNode(N, UpdatedNode);
+    return true;
+  }
+
+  // If the original shift amount is now dead, delete it so that we don't run
+  // it through isel.
+  if (OrigShiftAmt.getNode()->use_empty())
+    CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
+
+  // Now that we've optimized the shift amount, defer to normal isel to get
+  // load folding and legacy vs BMI2 selection without repeating it here.
+  SelectCode(N);
+  return true;
+}
+
 /// If the high bits of an 'and' operand are known zero, try setting the
 /// high bits of an 'and' constant operand to produce a smaller encoding by
 /// creating a small, sign-extended negative immediate rather than a large
@@ -2795,9 +3371,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     ReplaceNode(Node, getGlobalBaseReg());
     return;
 
-  case X86ISD::SELECT:
-  case X86ISD::SHRUNKBLEND: {
-    // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
+  case ISD::BITCAST:
+    // Just drop all 128/256/512-bit bitcasts.
+    if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
+        NVT == MVT::f128) {
+      ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+    break;
+
+  case X86ISD::BLENDV: {
+    // BLENDV selects like a regular VSELECT.
     SDValue VSelect = CurDAG->getNode(
         ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
         Node->getOperand(1), Node->getOperand(2));
@@ -2807,10 +3392,25 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     return;
   }
 
+  case ISD::SRL:
+    if (matchBitExtract(Node))
+      return;
+    LLVM_FALLTHROUGH;
+  case ISD::SRA:
+  case ISD::SHL:
+    if (tryShiftAmountMod(Node))
+      return;
+    break;
+
   case ISD::AND:
-    if (matchBEXTRFromAnd(Node))
+    if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
+      ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+    if (matchBitExtract(Node))
       return;
-    if (shrinkAndImmediate(Node))
+    if (AndImmShrink && shrinkAndImmediate(Node))
       return;
 
     LLVM_FALLTHROUGH;
@@ -2898,45 +3498,85 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
                            getI8Imm(ShlVal, dl));
     return;
   }
-  case X86ISD::UMUL8:
-  case X86ISD::SMUL8: {
-    SDValue N0 = Node->getOperand(0);
-    SDValue N1 = Node->getOperand(1);
-
-    unsigned Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
-
-    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
-                                          N0, SDValue()).getValue(1);
-
-    SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
-    SDValue Ops[] = {N1, InFlag};
-    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-
-    ReplaceNode(Node, CNode);
-    return;
-  }
-
+  case X86ISD::SMUL:
+    // i16/i32/i64 are handled with isel patterns.
+    if (NVT != MVT::i8)
+      break;
+    LLVM_FALLTHROUGH;
   case X86ISD::UMUL: {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
-    unsigned LoReg, Opc;
+    unsigned LoReg, ROpc, MOpc;
     switch (NVT.SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
-    // MVT::i8 is handled by X86ISD::UMUL8.
-    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
-    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
-    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+    case MVT::i8:
+      LoReg = X86::AL;
+      ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+      MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
+      break;
+    case MVT::i16:
+      LoReg = X86::AX;
+      ROpc = X86::MUL16r;
+      MOpc = X86::MUL16m;
+      break;
+    case MVT::i32:
+      LoReg = X86::EAX;
+      ROpc = X86::MUL32r;
+      MOpc = X86::MUL32m;
+      break;
+    case MVT::i64:
+      LoReg = X86::RAX;
+      ROpc = X86::MUL64r;
+      MOpc = X86::MUL64m;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    // Multiply is commmutative.
+    if (!FoldedLoad) {
+      FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+      if (FoldedLoad)
+        std::swap(N0, N1);
     }
 
     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
                                           N0, SDValue()).getValue(1);
 
-    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
-    SDValue Ops[] = {N1, InFlag};
-    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+    MachineSDNode *CNode;
+    if (FoldedLoad) {
+      // i16/i32/i64 use an instruction that produces a low and high result even
+      // though only the low result is used.
+      SDVTList VTs;
+      if (NVT == MVT::i8)
+        VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+      else
+        VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
+
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
+      // Record the mem-refs
+      CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
+    } else {
+      // i16/i32/i64 use an instruction that produces a low and high result even
+      // though only the low result is used.
+      SDVTList VTs;
+      if (NVT == MVT::i8)
+        VTs = CurDAG->getVTList(NVT, MVT::i32);
+      else
+        VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+
+      CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
+    }
 
-    ReplaceNode(Node, CNode);
+    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+    ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
+    CurDAG->RemoveDeadNode(Node);
     return;
   }
 
@@ -2947,14 +3587,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
 
     unsigned Opc, MOpc;
     bool isSigned = Opcode == ISD::SMUL_LOHI;
-    bool hasBMI2 = Subtarget->hasBMI2();
     if (!isSigned) {
       switch (NVT.SimpleTy) {
       default: llvm_unreachable("Unsupported VT!");
-      case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
-                     MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
-      case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
-                     MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
+      case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+      case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
       }
     } else {
       switch (NVT.SimpleTy) {
@@ -2975,12 +3612,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     case X86::MUL64r:
       SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
       break;
-    case X86::MULX32rr:
-      SrcReg = X86::EDX; LoReg = HiReg = 0;
-      break;
-    case X86::MULX64rr:
-      SrcReg = X86::RDX; LoReg = HiReg = 0;
-      break;
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2994,68 +3625,43 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
 
     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
                                           N0, SDValue()).getValue(1);
-    SDValue ResHi, ResLo;
-
     if (foldedLoad) {
       SDValue Chain;
       MachineSDNode *CNode = nullptr;
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
-      if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
-        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
-        CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
-        ResHi = SDValue(CNode, 0);
-        ResLo = SDValue(CNode, 1);
-        Chain = SDValue(CNode, 2);
-        InFlag = SDValue(CNode, 3);
-      } else {
-        SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
-        Chain = SDValue(CNode, 0);
-        InFlag = SDValue(CNode, 1);
-      }
+      SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+      CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+      Chain = SDValue(CNode, 0);
+      InFlag = SDValue(CNode, 1);
 
       // Update the chain.
       ReplaceUses(N1.getValue(1), Chain);
       // Record the mem-refs
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
-      CNode->setMemRefs(MemOp, MemOp + 1);
+      CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
     } else {
       SDValue Ops[] = { N1, InFlag };
-      if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
-        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-        ResHi = SDValue(CNode, 0);
-        ResLo = SDValue(CNode, 1);
-        InFlag = SDValue(CNode, 2);
-      } else {
-        SDVTList VTs = CurDAG->getVTList(MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-        InFlag = SDValue(CNode, 0);
-      }
+      SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+      SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+      InFlag = SDValue(CNode, 0);
     }
 
     // Copy the low half of the result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
-      if (!ResLo.getNode()) {
-        assert(LoReg && "Register for low half is not defined!");
-        ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
-                                       InFlag);
-        InFlag = ResLo.getValue(2);
-      }
+      assert(LoReg && "Register for low half is not defined!");
+      SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
+                                             NVT, InFlag);
+      InFlag = ResLo.getValue(2);
       ReplaceUses(SDValue(Node, 0), ResLo);
       LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
                  dbgs() << '\n');
     }
     // Copy the high half of the result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
-      if (!ResHi.getNode()) {
-        assert(HiReg && "Register for high half is not defined!");
-        ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
-                                       InFlag);
-        InFlag = ResHi.getValue(2);
-      }
+      assert(HiReg && "Register for high half is not defined!");
+      SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
+                                             NVT, InFlag);
+      InFlag = ResHi.getValue(2);
       ReplaceUses(SDValue(Node, 1), ResHi);
       LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
                  dbgs() << '\n');
@@ -3066,15 +3672,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
   }
 
   case ISD::SDIVREM:
-  case ISD::UDIVREM:
-  case X86ISD::SDIVREM8_SEXT_HREG:
-  case X86ISD::UDIVREM8_ZEXT_HREG: {
+  case ISD::UDIVREM: {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
     unsigned Opc, MOpc;
-    bool isSigned = (Opcode == ISD::SDIVREM ||
-                     Opcode == X86ISD::SDIVREM8_SEXT_HREG);
+    bool isSigned = Opcode == ISD::SDIVREM;
     if (!isSigned) {
       switch (NVT.SimpleTy) {
       default: llvm_unreachable("Unsupported VT!");
@@ -3124,20 +3727,22 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
       // Special case for div8, just use a move with zero extension to AX to
       // clear the upper 8 bits (AH).
-      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
+      MachineSDNode *Move;
       if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
-        Move =
-          SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
-                                         MVT::Other, Ops), 0);
-        Chain = Move.getValue(1);
+        Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
+                                      MVT::Other, Ops);
+        Chain = SDValue(Move, 1);
         ReplaceUses(N0.getValue(1), Chain);
+        // Record the mem-refs
+        CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
       } else {
-        Move =
-          SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
+        Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
         Chain = CurDAG->getEntryNode();
       }
-      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
+      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
+                                    SDValue());
       InFlag = Chain.getValue(1);
     } else {
       InFlag =
@@ -3188,9 +3793,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       // Update the chain.
       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
       // Record the mem-refs
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
-      CNode->setMemRefs(MemOp, MemOp + 1);
+      CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
     } else {
       InFlag =
         SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
@@ -3213,13 +3816,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Result(RNode, 0);
       InFlag = SDValue(RNode, 1);
 
-      if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
-          Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
-        assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
-      } else {
-        Result =
-            CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
-      }
+      Result =
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
+
       ReplaceUses(SDValue(Node, 1), Result);
       LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
                  dbgs() << '\n');
@@ -3250,8 +3849,31 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
-    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
-        hasNoSignedComparisonUses(Node))
+    // Optimizations for TEST compares.
+    if (!isNullConstant(N1))
+      break;
+
+    // Save the original VT of the compare.
+    MVT CmpVT = N0.getSimpleValueType();
+
+    // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
+    // by a test instruction. The test should be removed later by
+    // analyzeCompare if we are using only the zero flag.
+    // TODO: Should we check the users and use the BEXTR flags directly?
+    if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+      if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
+        unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
+                                             : X86::TEST32rr;
+        SDValue BEXTR = SDValue(NewNode, 0);
+        NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
+        ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+        CurDAG->RemoveDeadNode(Node);
+        return;
+      }
+    }
+
+    // We can peek through truncates, but we need to be careful below.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
       N0 = N0.getOperand(0);
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -3259,33 +3881,75 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     // Look past the truncate if CMP is the only use of it.
     if (N0.getOpcode() == ISD::AND &&
         N0.getNode()->hasOneUse() &&
-        N0.getValueType() != MVT::i8 &&
-        X86::isZeroNode(N1)) {
+        N0.getValueType() != MVT::i8) {
       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
       if (!C) break;
       uint64_t Mask = C->getZExtValue();
 
+      // Check if we can replace AND+IMM64 with a shift. This is possible for
+      // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
+      // flag.
+      if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
+          onlyUsesZeroFlag(SDValue(Node, 0))) {
+        if (isMask_64(~Mask)) {
+          unsigned TrailingZeros = countTrailingZeros(Mask);
+          SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
+          SDValue Shift =
+            SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
+                                           N0.getOperand(0), Imm), 0);
+          MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
+                                                       MVT::i32, Shift, Shift);
+          ReplaceNode(Node, Test);
+          return;
+        }
+        if (isMask_64(Mask)) {
+          unsigned LeadingZeros = countLeadingZeros(Mask);
+          SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
+          SDValue Shift =
+            SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
+                                           N0.getOperand(0), Imm), 0);
+          MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
+                                                       MVT::i32, Shift, Shift);
+          ReplaceNode(Node, Test);
+          return;
+        }
+      }
+
       MVT VT;
       int SubRegOp;
-      unsigned Op;
+      unsigned ROpc, MOpc;
+
+      // For each of these checks we need to be careful if the sign flag is
+      // being used. It is only safe to use the sign flag in two conditions,
+      // either the sign bit in the shrunken mask is zero or the final test
+      // size is equal to the original compare size.
 
       if (isUInt<8>(Mask) &&
-          (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
+          (!(Mask & 0x80) || CmpVT == MVT::i8 ||
+           hasNoSignFlagUses(SDValue(Node, 0)))) {
         // For example, convert "testl %eax, $8" to "testb %al, $8"
         VT = MVT::i8;
         SubRegOp = X86::sub_8bit;
-        Op = X86::TEST8ri;
+        ROpc = X86::TEST8ri;
+        MOpc = X86::TEST8mi;
       } else if (OptForMinSize && isUInt<16>(Mask) &&
-                 (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+                 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
+                  hasNoSignFlagUses(SDValue(Node, 0)))) {
         // For example, "testl %eax, $32776" to "testw %ax, $32776".
         // NOTE: We only want to form TESTW instructions if optimizing for
         // min size. Otherwise we only save one byte and possibly get a length
         // changing prefix penalty in the decoders.
         VT = MVT::i16;
         SubRegOp = X86::sub_16bit;
-        Op = X86::TEST16ri;
+        ROpc = X86::TEST16ri;
+        MOpc = X86::TEST16mi;
       } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
-                 (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
+                 ((!(Mask & 0x80000000) &&
+                   // Without minsize 16-bit Cmps can get here so we need to
+                   // be sure we calculate the correct sign flag if needed.
+                   (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
+                  CmpVT == MVT::i32 ||
+                  hasNoSignFlagUses(SDValue(Node, 0)))) {
         // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
         // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
         // Otherwize, we find ourselves in a position where we have to do
@@ -3293,21 +3957,37 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
         // they had a good reason not to and do not promote here.
         VT = MVT::i32;
         SubRegOp = X86::sub_32bit;
-        Op = X86::TEST32ri;
+        ROpc = X86::TEST32ri;
+        MOpc = X86::TEST32mi;
       } else {
         // No eligible transformation was found.
         break;
       }
 
+      // FIXME: We should be able to fold loads here.
+
       SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
       SDValue Reg = N0.getOperand(0);
 
-      // Extract the subregister if necessary.
-      if (N0.getValueType() != VT)
-        Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
-
       // Emit a testl or testw.
-      SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
+      MachineSDNode *NewNode;
+      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+      if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
+                          Reg.getOperand(0) };
+        NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
+        // Update the chain.
+        ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
+        // Record the mem-refs
+        CurDAG->setNodeMemRefs(NewNode,
+                               {cast<LoadSDNode>(Reg)->getMemOperand()});
+      } else {
+        // Extract the subregister if necessary.
+        if (N0.getValueType() != VT)
+          Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
+
+        NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
+      }
       // Replace CMP with TEST.
       ReplaceNode(Node, NewNode);
       return;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67a127fe0a2b..b6a692ee187d 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19,7 +19,6 @@
 #include "X86InstrBuilder.h"
 #include "X86IntrinsicsInfo.h"
 #include "X86MachineFunctionInfo.h"
-#include "X86ShuffleDecodeConstantPool.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "llvm/ADT/SmallBitVector.h"
@@ -196,6 +195,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::ABS          , MVT::i64  , Custom);
   }
 
+  // Funnel shifts.
+  for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+    setOperationAction(ShiftOp             , MVT::i16  , Custom);
+    setOperationAction(ShiftOp             , MVT::i32  , Custom);
+    if (Subtarget.is64Bit())
+      setOperationAction(ShiftOp           , MVT::i64  , Custom);
+  }
+
   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
   // operation.
   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
@@ -533,6 +540,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       // Use ANDPD and ORPD to simulate FCOPYSIGN.
       setOperationAction(ISD::FCOPYSIGN, VT, Custom);
 
+      // These might be better off as horizontal vector ops.
+      setOperationAction(ISD::FADD, VT, Custom);
+      setOperationAction(ISD::FSUB, VT, Custom);
+
       // We don't support sin/cos/fmod
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
@@ -543,15 +554,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
 
-    // Expand FP immediates into loads from the stack, except for the special
-    // cases we handle.
-    addLegalFPImmediate(APFloat(+0.0)); // xorpd
-    addLegalFPImmediate(APFloat(+0.0f)); // xorps
-  } else if (UseX87 && X86ScalarSSEf32) {
+  } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, &X86::FR32RegClass);
-    addRegisterClass(MVT::f64, &X86::RFP64RegClass);
+    if (UseX87)
+      addRegisterClass(MVT::f64, &X86::RFP64RegClass);
 
     // Use ANDPS to simulate FABS.
     setOperationAction(ISD::FABS , MVT::f32, Custom);
@@ -559,10 +567,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // Use XORP to simulate FNEG.
     setOperationAction(ISD::FNEG , MVT::f32, Custom);
 
-    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
+    if (UseX87)
+      setOperationAction(ISD::UNDEF, MVT::f64, Expand);
 
     // Use ANDPS and ORPS to simulate FCOPYSIGN.
-    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    if (UseX87)
+      setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
     // We don't support sin/cos/fmod
@@ -570,17 +580,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 
-    // Special cases we handle for FP constants.
-    addLegalFPImmediate(APFloat(+0.0f)); // xorps
-    addLegalFPImmediate(APFloat(+0.0)); // FLD0
-    addLegalFPImmediate(APFloat(+1.0)); // FLD1
-    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
-    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
-
-    // Always expand sin/cos functions even though x87 has an instruction.
-    setOperationAction(ISD::FSIN   , MVT::f64, Expand);
-    setOperationAction(ISD::FCOS   , MVT::f64, Expand);
-    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+    if (UseX87) {
+      // Always expand sin/cos functions even though x87 has an instruction.
+      setOperationAction(ISD::FSIN, MVT::f64, Expand);
+      setOperationAction(ISD::FCOS, MVT::f64, Expand);
+      setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+    }
   } else if (UseX87) {
     // f32 and f64 in x87.
     // Set up the FP register classes.
@@ -596,14 +601,27 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
     }
-    addLegalFPImmediate(APFloat(+0.0)); // FLD0
-    addLegalFPImmediate(APFloat(+1.0)); // FLD1
-    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
-    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
-    addLegalFPImmediate(APFloat(+0.0f)); // FLD0
-    addLegalFPImmediate(APFloat(+1.0f)); // FLD1
-    addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
-    addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+  }
+
+  // Expand FP32 immediates into loads from the stack, save special cases.
+  if (isTypeLegal(MVT::f32)) {
+    if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
+      addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+      addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+      addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+      addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+    } else // SSE immediates.
+      addLegalFPImmediate(APFloat(+0.0f)); // xorps
+  }
+  // Expand FP64 immediates into loads from the stack, save special cases.
+  if (isTypeLegal(MVT::f64)) {
+    if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
+      addLegalFPImmediate(APFloat(+0.0)); // FLD0
+      addLegalFPImmediate(APFloat(+1.0)); // FLD1
+      addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+      addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+    } else // SSE immediates.
+      addLegalFPImmediate(APFloat(+0.0)); // xorpd
   }
 
   // We don't support FMA.
@@ -613,7 +631,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // Long double always uses X87, except f128 in MMX.
   if (UseX87) {
     if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
-      addRegisterClass(MVT::f128, &X86::VR128RegClass);
+      addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                     : &X86::VR128RegClass);
       ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
       setOperationAction(ISD::FABS , MVT::f128, Custom);
       setOperationAction(ISD::FNEG , MVT::f128, Custom);
@@ -778,11 +797,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
                                                     : &X86::VR128RegClass);
 
+    for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
+                     MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
+      setOperationAction(ISD::SDIV, VT, Custom);
+      setOperationAction(ISD::SREM, VT, Custom);
+      setOperationAction(ISD::UDIV, VT, Custom);
+      setOperationAction(ISD::UREM, VT, Custom);
+    }
+
+    setOperationAction(ISD::MUL,                MVT::v2i8,  Custom);
+    setOperationAction(ISD::MUL,                MVT::v2i16, Custom);
+    setOperationAction(ISD::MUL,                MVT::v2i32, Custom);
+    setOperationAction(ISD::MUL,                MVT::v4i8,  Custom);
+    setOperationAction(ISD::MUL,                MVT::v4i16, Custom);
+    setOperationAction(ISD::MUL,                MVT::v8i8,  Custom);
+
     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
-    setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
-    setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
+    setOperationAction(ISD::MULHU,              MVT::v4i32, Custom);
+    setOperationAction(ISD::MULHS,              MVT::v4i32, Custom);
     setOperationAction(ISD::MULHU,              MVT::v16i8, Custom);
     setOperationAction(ISD::MULHS,              MVT::v16i8, Custom);
     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
@@ -799,6 +833,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
     }
 
+    setOperationAction(ISD::UADDSAT,            MVT::v16i8, Legal);
+    setOperationAction(ISD::SADDSAT,            MVT::v16i8, Legal);
+    setOperationAction(ISD::USUBSAT,            MVT::v16i8, Legal);
+    setOperationAction(ISD::SSUBSAT,            MVT::v16i8, Legal);
+    setOperationAction(ISD::UADDSAT,            MVT::v8i16, Legal);
+    setOperationAction(ISD::SADDSAT,            MVT::v8i16, Legal);
+    setOperationAction(ISD::USUBSAT,            MVT::v8i16, Legal);
+    setOperationAction(ISD::SSUBSAT,            MVT::v8i16, Legal);
+
+    if (!ExperimentalVectorWideningLegalization) {
+      // Use widening instead of promotion.
+      for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
+                       MVT::v4i16, MVT::v2i16 }) {
+        setOperationAction(ISD::UADDSAT, VT, Custom);
+        setOperationAction(ISD::SADDSAT, VT, Custom);
+        setOperationAction(ISD::USUBSAT, VT, Custom);
+        setOperationAction(ISD::SSUBSAT, VT, Custom);
+      }
+    }
+
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
@@ -813,7 +867,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::SETCC,              VT, Custom);
       setOperationAction(ISD::CTPOP,              VT, Custom);
-      setOperationAction(ISD::CTTZ,               VT, Custom);
+      setOperationAction(ISD::ABS,                VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -834,9 +888,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // scalars) and extend in-register to a legal 128-bit vector type. For sext
     // loads these must work with a single scalar load.
     for (MVT VT : MVT::integer_vector_valuetypes()) {
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
@@ -857,21 +908,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     }
 
-    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
-    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
-      setOperationPromotedToType(ISD::AND,    VT, MVT::v2i64);
-      setOperationPromotedToType(ISD::OR,     VT, MVT::v2i64);
-      setOperationPromotedToType(ISD::XOR,    VT, MVT::v2i64);
-      setOperationPromotedToType(ISD::LOAD,   VT, MVT::v2i64);
-      setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
-    }
-
     // Custom lower v2i64 and v2f64 selects.
     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v4i32, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v8i16, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v16i8, Custom);
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
+
+    // Custom legalize these to avoid over promotion or custom promotion.
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i8,  Custom);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i8,  Custom);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i8,  Custom);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i8,  Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i8,  Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i8,  Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i16, Custom);
+
+    // By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
+    // promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
+    // split again based on the input type, this will cause an AssertSExt i16 to
+    // be emitted instead of an AssertZExt. This will allow packssdw followed by
+    // packuswb to be used to truncate to v8i8. This is necessary since packusdw
+    // isn't available until sse4.1.
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
 
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
@@ -887,6 +953,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     for (MVT VT : MVT::fp_vector_valuetypes())
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
 
+    // We want to legalize this to an f64 load rather than an i64 load on
+    // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
+    // store.
+    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
+    setOperationAction(ISD::LOAD,               MVT::v2i32, Custom);
+    setOperationAction(ISD::LOAD,               MVT::v4i16, Custom);
+    setOperationAction(ISD::LOAD,               MVT::v8i8,  Custom);
+    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
+    setOperationAction(ISD::STORE,              MVT::v2i32, Custom);
+    setOperationAction(ISD::STORE,              MVT::v4i16, Custom);
+    setOperationAction(ISD::STORE,              MVT::v8i8,  Custom);
+
     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
@@ -897,6 +975,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
 
+    if (ExperimentalVectorWideningLegalization) {
+      setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+
+      setOperationAction(ISD::TRUNCATE,    MVT::v2i8,  Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v2i16, Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v2i32, Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v4i8,  Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v4i16, Custom);
+      setOperationAction(ISD::TRUNCATE,    MVT::v8i8,  Custom);
+    } else {
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
+    }
+
     // In the customized shift lowering, the legal v4i32/v2i64 cases
     // in AVX2 will be recognized.
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
@@ -907,7 +998,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     setOperationAction(ISD::ROTL,               MVT::v4i32, Custom);
     setOperationAction(ISD::ROTL,               MVT::v8i16, Custom);
-    setOperationAction(ISD::ROTL,               MVT::v16i8, Custom);
+
+    // With AVX512, expanding (and promoting the shifts) is better.
+    if (!Subtarget.hasAVX512())
+      setOperationAction(ISD::ROTL,             MVT::v16i8, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -919,6 +1013,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTLZ,               MVT::v8i16, Custom);
     setOperationAction(ISD::CTLZ,               MVT::v4i32, Custom);
     setOperationAction(ISD::CTLZ,               MVT::v2i64, Custom);
+
+    // These might be better off as horizontal vector ops.
+    setOperationAction(ISD::ADD,                MVT::i16, Custom);
+    setOperationAction(ISD::ADD,                MVT::i32, Custom);
+    setOperationAction(ISD::SUB,                MVT::i16, Custom);
+    setOperationAction(ISD::SUB,                MVT::i32, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
@@ -953,17 +1053,22 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
     }
 
-    for (MVT VT : MVT::integer_vector_valuetypes()) {
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
-      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+    if (!ExperimentalVectorWideningLegalization) {
+      // Avoid narrow result types when widening. The legal types are listed
+      // in the next loop.
+      for (MVT VT : MVT::integer_vector_valuetypes()) {
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
+        setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
+      }
     }
 
     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
     for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
       setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8,  Legal);
       setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8,  Legal);
-      setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8,  Legal);
+      if (!ExperimentalVectorWideningLegalization)
+        setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8,  Legal);
       setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8,  Legal);
       setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
       setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
@@ -1039,12 +1144,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SRA, VT, Custom);
     }
 
+    if (ExperimentalVectorWideningLegalization) {
+      // These types need custom splitting if their input is a 128-bit vector.
+      setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i64,  Custom);
+      setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i32, Custom);
+      setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i64,  Custom);
+      setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i32, Custom);
+    }
+
     setOperationAction(ISD::ROTL,              MVT::v8i32,  Custom);
     setOperationAction(ISD::ROTL,              MVT::v16i16, Custom);
-    setOperationAction(ISD::ROTL,              MVT::v32i8,  Custom);
+
+    // With BWI, expanding (and promoting the shifts) is the better.
+    if (!Subtarget.hasBWI())
+      setOperationAction(ISD::ROTL,            MVT::v32i8,  Custom);
 
     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
+    setOperationAction(ISD::SELECT,            MVT::v8i32, Custom);
+    setOperationAction(ISD::SELECT,            MVT::v16i16, Custom);
+    setOperationAction(ISD::SELECT,            MVT::v32i8, Custom);
     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
 
     for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1061,9 +1180,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::SETCC,           VT, Custom);
       setOperationAction(ISD::CTPOP,           VT, Custom);
-      setOperationAction(ISD::CTTZ,            VT, Custom);
       setOperationAction(ISD::CTLZ,            VT, Custom);
 
+      // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
+      setOperationAction(ISD::CTTZ,  VT, HasInt256 ? Expand : Custom);
+
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
       setCondCodeAction(ISD::SETLT, VT, Custom);
@@ -1086,19 +1207,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::MUL,       MVT::v16i16, HasInt256 ? Legal : Custom);
     setOperationAction(ISD::MUL,       MVT::v32i8,  Custom);
 
-    setOperationAction(ISD::UMUL_LOHI, MVT::v8i32,  Custom);
-    setOperationAction(ISD::SMUL_LOHI, MVT::v8i32,  Custom);
-
+    setOperationAction(ISD::MULHU,     MVT::v8i32,  Custom);
+    setOperationAction(ISD::MULHS,     MVT::v8i32,  Custom);
     setOperationAction(ISD::MULHU,     MVT::v16i16, HasInt256 ? Legal : Custom);
     setOperationAction(ISD::MULHS,     MVT::v16i16, HasInt256 ? Legal : Custom);
     setOperationAction(ISD::MULHU,     MVT::v32i8,  Custom);
     setOperationAction(ISD::MULHS,     MVT::v32i8,  Custom);
 
+    setOperationAction(ISD::ABS,       MVT::v4i64,  Custom);
     setOperationAction(ISD::SMAX,      MVT::v4i64,  Custom);
     setOperationAction(ISD::UMAX,      MVT::v4i64,  Custom);
     setOperationAction(ISD::SMIN,      MVT::v4i64,  Custom);
     setOperationAction(ISD::UMIN,      MVT::v4i64,  Custom);
 
+    setOperationAction(ISD::UADDSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SADDSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::USUBSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SSUBSAT,   MVT::v32i8,  HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::UADDSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SADDSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::USUBSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
+    setOperationAction(ISD::SSUBSAT,   MVT::v16i16, HasInt256 ? Legal : Custom);
+
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
       setOperationAction(ISD::ABS,  VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
@@ -1107,11 +1237,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
     }
 
-    if (HasInt256) {
-      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64,  Custom);
-      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32,  Custom);
-      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
+    for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+    }
 
+    if (HasInt256) {
       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
       // when we have a 256bit-wide blend with immediate.
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1156,15 +1287,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     if (HasInt256)
       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
 
-    // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
-    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
-      setOperationPromotedToType(ISD::AND,    VT, MVT::v4i64);
-      setOperationPromotedToType(ISD::OR,     VT, MVT::v4i64);
-      setOperationPromotedToType(ISD::XOR,    VT, MVT::v4i64);
-      setOperationPromotedToType(ISD::LOAD,   VT, MVT::v4i64);
-      setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
-    }
-
     if (HasInt256) {
       // Custom legalize 2x32 to get a little better code.
       setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
@@ -1224,6 +1346,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SETCC,            VT, Custom);
       setOperationAction(ISD::SELECT,           VT, Custom);
       setOperationAction(ISD::TRUNCATE,         VT, Custom);
+      setOperationAction(ISD::UADDSAT,          VT, Custom);
+      setOperationAction(ISD::SADDSAT,          VT, Custom);
+      setOperationAction(ISD::USUBSAT,          VT, Custom);
+      setOperationAction(ISD::SSUBSAT,          VT, Custom);
 
       setOperationAction(ISD::BUILD_VECTOR,     VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1307,6 +1433,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
 
+    if (ExperimentalVectorWideningLegalization) {
+      // Need to custom widen this if we don't have AVX512BW.
+      setOperationAction(ISD::ANY_EXTEND,         MVT::v8i8, Custom);
+      setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i8, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i8, Custom);
+    }
+
     for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
       setOperationAction(ISD::FFLOOR,           VT, Legal);
       setOperationAction(ISD::FCEIL,            VT, Legal);
@@ -1315,12 +1448,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FNEARBYINT,       VT, Legal);
     }
 
-    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64,  Custom);
-    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
-
     // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
-    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
-    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
+    for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+    }
 
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
@@ -1330,11 +1462,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::MUL,                MVT::v8i64, Custom);
     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
 
-    setOperationAction(ISD::UMUL_LOHI,          MVT::v16i32,  Custom);
-    setOperationAction(ISD::SMUL_LOHI,          MVT::v16i32,  Custom);
+    setOperationAction(ISD::MULHU,              MVT::v16i32,  Custom);
+    setOperationAction(ISD::MULHS,              MVT::v16i32,  Custom);
 
     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v16i32, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v32i16, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v64i8, Custom);
     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
 
     for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
@@ -1347,7 +1482,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SHL,              VT, Custom);
       setOperationAction(ISD::SRA,              VT, Custom);
       setOperationAction(ISD::CTPOP,            VT, Custom);
-      setOperationAction(ISD::CTTZ,             VT, Custom);
       setOperationAction(ISD::ROTL,             VT, Custom);
       setOperationAction(ISD::ROTR,             VT, Custom);
       setOperationAction(ISD::SETCC,            VT, Custom);
@@ -1358,13 +1492,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setCondCodeAction(ISD::SETLE, VT, Custom);
     }
 
-    // Need to promote to 64-bit even though we have 32-bit masked instructions
-    // because the IR optimizers rearrange bitcasts around logic ops leaving
-    // too many variations to handle if we don't promote them.
-    setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
-    setOperationPromotedToType(ISD::OR,  MVT::v16i32, MVT::v8i64);
-    setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
-
     if (Subtarget.hasDQI()) {
       setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
@@ -1378,7 +1505,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
       for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
         setOperationAction(ISD::CTLZ,            VT, Legal);
-        setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
       }
     } // Subtarget.hasCDI()
 
@@ -1407,16 +1533,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::MGATHER,             VT, Custom);
       setOperationAction(ISD::MSCATTER,            VT, Custom);
     }
-    for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
-      setOperationPromotedToType(ISD::LOAD,   VT, MVT::v8i64);
-      setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
-    }
-
     // Need to custom split v32i16/v64i8 bitcasts.
     if (!Subtarget.hasBWI()) {
       setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
       setOperationAction(ISD::BITCAST, MVT::v64i8,  Custom);
     }
+
+    if (Subtarget.hasVBMI2()) {
+      for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+        setOperationAction(ISD::FSHL, VT, Custom);
+        setOperationAction(ISD::FSHR, VT, Custom);
+      }
+    }
   }// has  AVX-512
 
   // This block controls legalization for operations that don't have
@@ -1468,7 +1596,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     if (Subtarget.hasCDI()) {
       for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
         setOperationAction(ISD::CTLZ,            VT, Legal);
-        setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
       }
     } // Subtarget.hasCDI()
 
@@ -1490,6 +1617,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SUB,                VT, Custom);
       setOperationAction(ISD::MUL,                VT, Custom);
       setOperationAction(ISD::VSELECT,            VT, Expand);
+      setOperationAction(ISD::UADDSAT,            VT, Custom);
+      setOperationAction(ISD::SADDSAT,            VT, Custom);
+      setOperationAction(ISD::USUBSAT,            VT, Custom);
+      setOperationAction(ISD::SSUBSAT,            VT, Custom);
 
       setOperationAction(ISD::TRUNCATE,           VT, Custom);
       setOperationAction(ISD::SETCC,              VT, Custom);
@@ -1550,6 +1681,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::BITREVERSE,         MVT::v64i8, Custom);
 
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
+    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
 
     setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
 
@@ -1563,17 +1695,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::MLOAD,        VT, Legal);
       setOperationAction(ISD::MSTORE,       VT, Legal);
       setOperationAction(ISD::CTPOP,        VT, Custom);
-      setOperationAction(ISD::CTTZ,         VT, Custom);
       setOperationAction(ISD::CTLZ,         VT, Custom);
       setOperationAction(ISD::SMAX,         VT, Legal);
       setOperationAction(ISD::UMAX,         VT, Legal);
       setOperationAction(ISD::SMIN,         VT, Legal);
       setOperationAction(ISD::UMIN,         VT, Legal);
       setOperationAction(ISD::SETCC,        VT, Custom);
+      setOperationAction(ISD::UADDSAT,      VT, Legal);
+      setOperationAction(ISD::SADDSAT,      VT, Legal);
+      setOperationAction(ISD::USUBSAT,      VT, Legal);
+      setOperationAction(ISD::SSUBSAT,      VT, Legal);
 
-      setOperationPromotedToType(ISD::AND,  VT, MVT::v8i64);
-      setOperationPromotedToType(ISD::OR,   VT, MVT::v8i64);
-      setOperationPromotedToType(ISD::XOR,  VT, MVT::v8i64);
+      // The condition codes aren't legal in SSE/AVX and under AVX512 we use
+      // setcc all the way to isel and prefer SETGT in some isel patterns.
+      setCondCodeAction(ISD::SETLT, VT, Custom);
+      setCondCodeAction(ISD::SETLE, VT, Custom);
     }
 
     for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
@@ -1584,6 +1720,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       for (auto VT : { MVT::v64i8, MVT::v32i16 })
         setOperationAction(ISD::CTPOP, VT, Legal);
     }
+
+    if (Subtarget.hasVBMI2()) {
+      setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
+      setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
+    }
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
@@ -1630,6 +1771,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
       setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
     }
+
+    if (Subtarget.hasVBMI2()) {
+      // TODO: Make these legal even without VLX?
+      for (auto VT : { MVT::v8i16,  MVT::v4i32, MVT::v2i64,
+                       MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+        setOperationAction(ISD::FSHL, VT, Custom);
+        setOperationAction(ISD::FSHR, VT, Custom);
+      }
+    }
   }
 
   // We want to custom lower some of our intrinsics.
@@ -1731,8 +1881,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
-  setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
-  setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
@@ -1787,13 +1935,13 @@ SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
 }
 
 TargetLoweringBase::LegalizeTypeAction
-X86TargetLowering::getPreferredVectorAction(EVT VT) const {
+X86TargetLowering::getPreferredVectorAction(MVT VT) const {
   if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
     return TypeSplitVector;
 
   if (ExperimentalVectorWideningLegalization &&
       VT.getVectorNumElements() != 1 &&
-      VT.getVectorElementType().getSimpleVT() != MVT::i1)
+      VT.getVectorElementType() != MVT::i1)
     return TypeWidenVector;
 
   return TargetLoweringBase::getPreferredVectorAction(VT);
@@ -1926,7 +2074,8 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
       if (Subtarget.hasSSE2())
         return MVT::v16i8;
       // TODO: Can SSE1 handle a byte vector?
-      if (Subtarget.hasSSE1())
+      // If we have SSE1 registers we should be able to use them.
+      if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
         return MVT::v4f32;
     } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
                !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
@@ -3138,7 +3287,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
     }
 
     // If value is passed via pointer - do a load.
-    if (VA.getLocInfo() == CCValAssign::Indirect)
+    if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
       ArgValue =
           DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
 
@@ -3621,13 +3770,29 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       Arg = DAG.getBitcast(RegVT, Arg);
       break;
     case CCValAssign::Indirect: {
-      // Store the argument.
-      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
-      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-      Chain = DAG.getStore(
-          Chain, dl, Arg, SpillSlot,
-          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
-      Arg = SpillSlot;
+      if (isByVal) {
+        // Memcpy the argument to a temporary stack slot to prevent
+        // the caller from seeing any modifications the callee may make
+        // as guaranteed by the `byval` attribute.
+        int FrameIdx = MF.getFrameInfo().CreateStackObject(
+            Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
+            false);
+        SDValue StackSlot =
+            DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
+        Chain =
+            CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
+        // From now on treat this as a regular pointer
+        Arg = StackSlot;
+        isByVal = false;
+      } else {
+        // Store the argument.
+        SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+        int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+        Chain = DAG.getStore(
+            Chain, dl, Arg, SpillSlot,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+        Arg = SpillSlot;
+      }
       break;
     }
     }
@@ -4405,6 +4570,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
   case X86ISD::VPERMV3:
     return true;
   // 'Faux' Target Shuffles.
+  case ISD::OR:
   case ISD::AND:
   case X86ISD::ANDNP:
     return true;
@@ -4686,6 +4852,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return true;
 }
 
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+  // If we are using XMM registers in the ABI and the condition of the select is
+  // a floating-point compare and we have blendv or conditional move, then it is
+  // cheaper to select instead of doing a cross-register move and creating a
+  // load that depends on the compare result.
+  return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
+}
+
 bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
   // TODO: It might be a win to ease or lift this restriction, but the generic
   // folds in DAGCombiner conflict with vector folds for an AVX512 target.
@@ -4695,6 +4869,31 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
   return true;
 }
 
+bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+  // TODO: We handle scalars using custom code, but generic combining could make
+  // that unnecessary.
+  APInt MulC;
+  if (!ISD::isConstantSplatVector(C.getNode(), MulC))
+    return false;
+
+  // If vector multiply is legal, assume that's faster than shl + add/sub.
+  // TODO: Multiply is a complex op with higher latency and lower througput in
+  //       most implementations, so this check could be loosened based on type
+  //       and/or a CPU attribute.
+  if (isOperationLegal(ISD::MUL, VT))
+    return false;
+
+  // shl+add, shl+sub, shl+add+neg
+  return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
+         (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
+}
+
+bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+                                                 bool IsSigned) const {
+  // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
+  return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
+}
+
 bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
                                                 unsigned Index) const {
   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
@@ -4709,6 +4908,18 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
   return (Index % ResVT.getVectorNumElements()) == 0;
 }
 
+bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+  // If the vector op is not supported, try to convert to scalar.
+  EVT VecVT = VecOp.getValueType();
+  if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
+    return true;
+
+  // If the vector op is supported, but the scalar op is not, the transform may
+  // not be worthwhile.
+  EVT ScalarVT = VecVT.getScalarType();
+  return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
+}
+
 bool X86TargetLowering::isCheapToSpeculateCttz() const {
   // Speculate cttz only if we can directly use TZCNT.
   return Subtarget.hasBMI();
@@ -4721,7 +4932,11 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
 
 bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
                                                 EVT BitcastVT) const {
-  if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
+  if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
+      BitcastVT.getVectorElementType() == MVT::i1)
+    return false;
+
+  if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
     return false;
 
   return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
@@ -4763,17 +4978,14 @@ bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
   if (VT != MVT::i32 && VT != MVT::i64)
     return false;
 
-  // A mask and compare against constant is ok for an 'andn' too
-  // even though the BMI instruction doesn't have an immediate form.
-
-  return true;
+  return !isa<ConstantSDNode>(Y);
 }
 
 bool X86TargetLowering::hasAndNot(SDValue Y) const {
   EVT VT = Y.getValueType();
 
-  if (!VT.isVector()) // x86 can't form 'andn' with an immediate.
-    return !isa<ConstantSDNode>(Y) && hasAndNotCompare(Y);
+  if (!VT.isVector())
+    return hasAndNotCompare(Y);
 
   // Vector.
 
@@ -4800,6 +5012,12 @@ bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
   return true;
 }
 
+bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
+  // Any legal vector type can be splatted more efficiently than
+  // loading/spilling from memory.
+  return isTypeLegal(VT);
+}
+
 MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
   MVT VT = MVT::getIntegerVT(NumBits);
   if (isTypeLegal(VT))
@@ -5408,24 +5626,29 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
   return DAG.getBitcast(VT, Vec);
 }
 
-static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
+static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
                               SelectionDAG &DAG) {
   EVT InVT = In.getValueType();
-  assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
-
-  if (VT.is128BitVector() && InVT.is128BitVector())
-    return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
-                                : DAG.getZeroExtendVectorInReg(In, DL, VT);
+  assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
 
   // For 256-bit vectors, we only need the lower (128-bit) input half.
   // For 512-bit vectors, we only need the lower input half or quarter.
-  if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
-    int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
+  if (InVT.getSizeInBits() > 128) {
+    assert(VT.getSizeInBits() == InVT.getSizeInBits() &&
+           "Expected VTs to be the same size!");
+    unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
     In = extractSubVector(In, 0, DAG, DL,
-                          std::max(128, (int)VT.getSizeInBits() / Scale));
+                          std::max(128U, VT.getSizeInBits() / Scale));
+    InVT = In.getValueType();
   }
 
-  return DAG.getNode(Opc, DL, VT, In);
+  if (VT.getVectorNumElements() == InVT.getVectorNumElements())
+    return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                       DL, VT, In);
+
+  return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
+                            : ISD::ZERO_EXTEND_VECTOR_INREG,
+                     DL, VT, In);
 }
 
 /// Returns a vector_shuffle node for an unpackl operation.
@@ -5463,19 +5686,6 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
 }
 
-static SDValue peekThroughBitcasts(SDValue V) {
-  while (V.getNode() && V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-  return V;
-}
-
-static SDValue peekThroughOneUseBitcasts(SDValue V) {
-  while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
-         V.getOperand(0).hasOneUse())
-    V = V.getOperand(0);
-  return V;
-}
-
 // Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
 static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
   while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
@@ -5496,10 +5706,10 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
     Ptr = Ptr->getOperand(0);
 
   auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
-  if (!CNode || CNode->isMachineConstantPoolEntry())
+  if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
     return nullptr;
 
-  return dyn_cast<Constant>(CNode->getConstVal());
+  return CNode->getConstVal();
 }
 
 // Extract raw constant bits from constant pools.
@@ -5632,15 +5842,34 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
     }
     return CastBitData(UndefSrcElts, SrcEltBits);
   }
+  if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
+    unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+    unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
+    APInt UndefSrcElts(NumSrcElts, 0);
+    SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
+    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+      const SDValue &Src = Op.getOperand(i);
+      if (Src.isUndef()) {
+        UndefSrcElts.setBit(i);
+        continue;
+      }
+      auto *Cst = cast<ConstantFPSDNode>(Src);
+      APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
+      SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
+    }
+    return CastBitData(UndefSrcElts, SrcEltBits);
+  }
 
   // Extract constant bits from constant pool vector.
   if (auto *Cst = getTargetConstantFromNode(Op)) {
     Type *CstTy = Cst->getType();
-    if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
+    unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
+    if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)
       return false;
 
     unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
-    unsigned NumSrcElts = CstTy->getVectorNumElements();
+    unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
 
     APInt UndefSrcElts(NumSrcElts, 0);
     SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
@@ -5685,19 +5914,107 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
     return CastBitData(UndefSrcElts, SrcEltBits);
   }
 
+  // Extract constant bits from a subvector's source.
+  if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+      isa<ConstantSDNode>(Op.getOperand(1))) {
+    // TODO - support extract_subvector through bitcasts.
+    if (EltSizeInBits != VT.getScalarSizeInBits())
+      return false;
+
+    if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+                                      UndefElts, EltBits, AllowWholeUndefs,
+                                      AllowPartialUndefs)) {
+      EVT SrcVT = Op.getOperand(0).getValueType();
+      unsigned NumSrcElts = SrcVT.getVectorNumElements();
+      unsigned NumSubElts = VT.getVectorNumElements();
+      unsigned BaseIdx = Op.getConstantOperandVal(1);
+      UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx);
+      if ((BaseIdx + NumSubElts) != NumSrcElts)
+        EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end());
+      if (BaseIdx != 0)
+        EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx);
+      return true;
+    }
+  }
+
+  // Extract constant bits from shuffle node sources.
+  if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) {
+    // TODO - support shuffle through bitcasts.
+    if (EltSizeInBits != VT.getScalarSizeInBits())
+      return false;
+
+    ArrayRef<int> Mask = SVN->getMask();
+    if ((!AllowWholeUndefs || !AllowPartialUndefs) &&
+        llvm::any_of(Mask, [](int M) { return M < 0; }))
+      return false;
+
+    APInt UndefElts0, UndefElts1;
+    SmallVector<APInt, 32> EltBits0, EltBits1;
+    if (isAnyInRange(Mask, 0, NumElts) &&
+        !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+                                       UndefElts0, EltBits0, AllowWholeUndefs,
+                                       AllowPartialUndefs))
+      return false;
+    if (isAnyInRange(Mask, NumElts, 2 * NumElts) &&
+        !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+                                       UndefElts1, EltBits1, AllowWholeUndefs,
+                                       AllowPartialUndefs))
+      return false;
+
+    UndefElts = APInt::getNullValue(NumElts);
+    for (int i = 0; i != (int)NumElts; ++i) {
+      int M = Mask[i];
+      if (M < 0) {
+        UndefElts.setBit(i);
+        EltBits.push_back(APInt::getNullValue(EltSizeInBits));
+      } else if (M < (int)NumElts) {
+        if (UndefElts0[M])
+          UndefElts.setBit(i);
+        EltBits.push_back(EltBits0[M]);
+      } else {
+        if (UndefElts1[M - NumElts])
+          UndefElts.setBit(i);
+        EltBits.push_back(EltBits1[M - NumElts]);
+      }
+    }
+    return true;
+  }
+
   return false;
 }
 
-static bool getTargetShuffleMaskIndices(SDValue MaskNode,
-                                        unsigned MaskEltSizeInBits,
-                                        SmallVectorImpl<uint64_t> &RawMask) {
+static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
   APInt UndefElts;
-  SmallVector<APInt, 64> EltBits;
+  SmallVector<APInt, 16> EltBits;
+  if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
+                                    UndefElts, EltBits, true, false)) {
+    int SplatIndex = -1;
+    for (int i = 0, e = EltBits.size(); i != e; ++i) {
+      if (UndefElts[i])
+        continue;
+      if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) {
+        SplatIndex = -1;
+        break;
+      }
+      SplatIndex = i;
+    }
+    if (0 <= SplatIndex) {
+      SplatVal = EltBits[SplatIndex];
+      return true;
+    }
+  }
+
+  return false;
+}
 
+static bool getTargetShuffleMaskIndices(SDValue MaskNode,
+                                        unsigned MaskEltSizeInBits,
+                                        SmallVectorImpl<uint64_t> &RawMask,
+                                        APInt &UndefElts) {
   // Extract the raw target constant bits.
-  // FIXME: We currently don't support UNDEF bits or mask entries.
+  SmallVector<APInt, 64> EltBits;
   if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
-                                     EltBits, /* AllowWholeUndefs */ false,
+                                     EltBits, /* AllowWholeUndefs */ true,
                                      /* AllowPartialUndefs */ false))
     return false;
 
@@ -5726,6 +6043,31 @@ static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
   }
 }
 
+// Split the demanded elts of a PACKSS/PACKUS node between its operands.
+static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
+                                APInt &DemandedLHS, APInt &DemandedRHS) {
+  int NumLanes = VT.getSizeInBits() / 128;
+  int NumElts = DemandedElts.getBitWidth();
+  int NumInnerElts = NumElts / 2;
+  int NumEltsPerLane = NumElts / NumLanes;
+  int NumInnerEltsPerLane = NumInnerElts / NumLanes;
+
+  DemandedLHS = APInt::getNullValue(NumInnerElts);
+  DemandedRHS = APInt::getNullValue(NumInnerElts);
+
+  // Map DemandedElts to the packed operands.
+  for (int Lane = 0; Lane != NumLanes; ++Lane) {
+    for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) {
+      int OuterIdx = (Lane * NumEltsPerLane) + Elt;
+      int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt;
+      if (DemandedElts[OuterIdx])
+        DemandedLHS.setBit(InnerIdx);
+      if (DemandedElts[OuterIdx + NumInnerEltsPerLane])
+        DemandedRHS.setBit(InnerIdx);
+    }
+  }
+}
+
 /// Calculates the shuffle mask corresponding to the target-specific opcode.
 /// If the mask could be calculated, returns it in \p Mask, returns the shuffle
 /// operands in \p Ops, and returns true.
@@ -5737,6 +6079,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
                                  SmallVectorImpl<SDValue> &Ops,
                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
   unsigned NumElems = VT.getVectorNumElements();
+  unsigned MaskEltSize = VT.getScalarSizeInBits();
+  SmallVector<uint64_t, 32> RawMask;
+  APInt RawUndefs;
   SDValue ImmN;
 
   assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
@@ -5744,26 +6089,26 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
 
   IsUnary = false;
   bool IsFakeUnary = false;
-  switch(N->getOpcode()) {
+  switch (N->getOpcode()) {
   case X86ISD::BLENDI:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodeBLENDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
   case X86ISD::SHUFP:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
-    DecodeSHUFPMask(NumElems, VT.getScalarSizeInBits(),
+    ImmN = N->getOperand(N->getNumOperands() - 1);
+    DecodeSHUFPMask(NumElems, MaskEltSize,
                     cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
   case X86ISD::INSERTPS:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
@@ -5773,8 +6118,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
         isa<ConstantSDNode>(N->getOperand(2))) {
       int BitLen = N->getConstantOperandVal(1);
       int BitIdx = N->getConstantOperandVal(2);
-      DecodeEXTRQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
-                       Mask);
+      DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
       IsUnary = true;
     }
     break;
@@ -5785,21 +6129,20 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
         isa<ConstantSDNode>(N->getOperand(3))) {
       int BitLen = N->getConstantOperandVal(2);
       int BitIdx = N->getConstantOperandVal(3);
-      DecodeINSERTQIMask(NumElems, VT.getScalarSizeInBits(), BitLen, BitIdx,
-                         Mask);
+      DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask);
       IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     }
     break;
   case X86ISD::UNPCKH:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    DecodeUNPCKHMask(NumElems, VT.getScalarSizeInBits(), Mask);
+    DecodeUNPCKHMask(NumElems, MaskEltSize, Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
   case X86ISD::UNPCKL:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    DecodeUNPCKLMask(NumElems, VT.getScalarSizeInBits(), Mask);
+    DecodeUNPCKLMask(NumElems, MaskEltSize, Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
   case X86ISD::MOVHLPS:
@@ -5818,7 +6161,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodePALIGNRMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                       Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -5844,21 +6187,21 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
   case X86ISD::PSHUFD:
   case X86ISD::VPERMILPI:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
-    DecodePSHUFMask(NumElems, VT.getScalarSizeInBits(),
+    ImmN = N->getOperand(N->getNumOperands() - 1);
+    DecodePSHUFMask(NumElems, MaskEltSize,
                     cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = true;
     break;
   case X86ISD::PSHUFHW:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodePSHUFHWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                       Mask);
     IsUnary = true;
     break;
   case X86ISD::PSHUFLW:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodePSHUFLWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                       Mask);
     IsUnary = true;
@@ -5891,14 +6234,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     IsUnary = true;
     SDValue MaskNode = N->getOperand(1);
-    unsigned MaskEltSize = VT.getScalarSizeInBits();
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
-      DecodeVPERMILPMask(NumElems, VT.getScalarSizeInBits(), RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMILPMask(C, MaskEltSize, Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                    RawUndefs)) {
+      DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -5909,20 +6247,15 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
     IsUnary = true;
     SDValue MaskNode = N->getOperand(1);
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
-      DecodePSHUFBMask(RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodePSHUFBMask(C, Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+      DecodePSHUFBMask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;
   }
   case X86ISD::VPERMI:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodeVPERMMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = true;
     break;
@@ -5935,7 +6268,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
   case X86ISD::VPERM2X128:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
     DecodeVPERM2X128Mask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                          Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -5943,10 +6276,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
   case X86ISD::SHUF128:
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
-    ImmN = N->getOperand(N->getNumOperands()-1);
-    decodeVSHUF64x2FamilyMask(NumElems, VT.getScalarSizeInBits(),
-                              cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                              Mask);
+    ImmN = N->getOperand(N->getNumOperands() - 1);
+    decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize,
+                              cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     break;
   case X86ISD::MOVSLDUP:
@@ -5968,19 +6300,14 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
-    unsigned MaskEltSize = VT.getScalarSizeInBits();
     SDValue MaskNode = N->getOperand(2);
     SDValue CtrlNode = N->getOperand(3);
     if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
       unsigned CtrlImm = CtrlOp->getZExtValue();
-      SmallVector<uint64_t, 32> RawMask;
-      if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
-        DecodeVPERMIL2PMask(NumElems, VT.getScalarSizeInBits(), CtrlImm,
-                            RawMask, Mask);
-        break;
-      }
-      if (auto *C = getTargetConstantFromNode(MaskNode)) {
-        DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
+      if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                      RawUndefs)) {
+        DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs,
+                            Mask);
         break;
       }
     }
@@ -5991,13 +6318,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     SDValue MaskNode = N->getOperand(2);
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
-      DecodeVPPERMMask(RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPPERMMask(C, Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+      DecodeVPPERMMask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -6008,14 +6330,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
     Ops.push_back(N->getOperand(1));
     SDValue MaskNode = N->getOperand(0);
-    SmallVector<uint64_t, 32> RawMask;
-    unsigned MaskEltSize = VT.getScalarSizeInBits();
-    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
-      DecodeVPERMVMask(RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMVMask(C, MaskEltSize, Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                    RawUndefs)) {
+      DecodeVPERMVMask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -6028,9 +6345,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     Ops.push_back(N->getOperand(0));
     Ops.push_back(N->getOperand(2));
     SDValue MaskNode = N->getOperand(1);
-    unsigned MaskEltSize = VT.getScalarSizeInBits();
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMV3Mask(C, MaskEltSize, Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                    RawUndefs)) {
+      DecodeVPERMV3Mask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -6147,6 +6464,12 @@ static bool setTargetShuffleZeroElements(SDValue N,
   return true;
 }
 
+// Forward declaration (for getFauxShuffleMask recursive check).
+static bool resolveTargetShuffleInputs(SDValue Op,
+                                       SmallVectorImpl<SDValue> &Inputs,
+                                       SmallVectorImpl<int> &Mask,
+                                       const SelectionDAG &DAG);
+
 // Attempt to decode ops that could be represented as a shuffle mask.
 // The decoded shuffle mask may contain a different number of elements to the
 // destination value type.
@@ -6200,6 +6523,78 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     Ops.push_back(IsAndN ? N1 : N0);
     return true;
   }
+  case ISD::OR: {
+    // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
+    // is a valid shuffle index.
+    SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+    SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
+    if (!N0.getValueType().isVector() || !N1.getValueType().isVector())
+      return false;
+    SmallVector<int, 64> SrcMask0, SrcMask1;
+    SmallVector<SDValue, 2> SrcInputs0, SrcInputs1;
+    if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
+        !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
+      return false;
+    int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
+    SmallVector<int, 64> Mask0, Mask1;
+    scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
+    scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
+    for (int i = 0; i != MaskSize; ++i) {
+      if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
+        Mask.push_back(SM_SentinelUndef);
+      else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
+        Mask.push_back(SM_SentinelZero);
+      else if (Mask1[i] == SM_SentinelZero)
+        Mask.push_back(Mask0[i]);
+      else if (Mask0[i] == SM_SentinelZero)
+        Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size()));
+      else
+        return false;
+    }
+    for (SDValue &Op : SrcInputs0)
+      Ops.push_back(Op);
+    for (SDValue &Op : SrcInputs1)
+      Ops.push_back(Op);
+    return true;
+  }
+  case ISD::INSERT_SUBVECTOR: {
+    // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where
+    // SRC0/SRC1 are both of the same valuetype VT.
+    // TODO - add peekThroughOneUseBitcasts support.
+    SDValue Src = N.getOperand(0);
+    SDValue Sub = N.getOperand(1);
+    EVT SubVT = Sub.getValueType();
+    unsigned NumSubElts = SubVT.getVectorNumElements();
+    if (!isa<ConstantSDNode>(N.getOperand(2)) ||
+        !N->isOnlyUserOf(Sub.getNode()))
+      return false;
+    SmallVector<int, 64> SubMask;
+    SmallVector<SDValue, 2> SubInputs;
+    if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) ||
+        SubMask.size() != NumSubElts)
+      return false;
+    Ops.push_back(Src);
+    for (SDValue &SubInput : SubInputs) {
+      if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+          SubInput.getOperand(0).getValueType() != VT ||
+          !isa<ConstantSDNode>(SubInput.getOperand(1)))
+        return false;
+      Ops.push_back(SubInput.getOperand(0));
+    }
+    int InsertIdx = N.getConstantOperandVal(2);
+    for (int i = 0; i != (int)NumElts; ++i)
+      Mask.push_back(i);
+    for (int i = 0; i != (int)NumSubElts; ++i) {
+      int M = SubMask[i];
+      if (0 <= M) {
+        int InputIdx = M / NumSubElts;
+        int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1);
+        M = (NumElts * (1 + InputIdx)) + ExtractIdx + (M % NumSubElts);
+      }
+      Mask[i + InsertIdx] = M;
+    }
+    return true;
+  }
   case ISD::SCALAR_TO_VECTOR: {
     // Match against a scalar_to_vector of an extract from a vector,
     // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
@@ -6334,14 +6729,14 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     return true;
   }
   case ISD::ZERO_EXTEND_VECTOR_INREG:
-  case X86ISD::VZEXT: {
+  case ISD::ZERO_EXTEND: {
     // TODO - add support for VPMOVZX with smaller input vector types.
     SDValue Src = N.getOperand(0);
     MVT SrcVT = Src.getSimpleValueType();
     if (NumSizeInBits != SrcVT.getSizeInBits())
       break;
-    DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), VT.getScalarSizeInBits(),
-                         VT.getVectorNumElements(), Mask);
+    DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
+                         Mask);
     Ops.push_back(Src);
     return true;
   }
@@ -6586,6 +6981,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
 /// Custom lower build_vector of v4i32 or v4f32.
 static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
                                      const X86Subtarget &Subtarget) {
+  // If this is a splat of a pair of elements, use MOVDDUP (unless the target
+  // has XOP; in that case defer lowering to potentially use VPERMIL2PS).
+  // Because we're creating a less complicated build vector here, we may enable
+  // further folding of the MOVDDUP via shuffle transforms.
+  if (Subtarget.hasSSE3() && !Subtarget.hasXOP() &&
+      Op.getOperand(0) == Op.getOperand(2) &&
+      Op.getOperand(1) == Op.getOperand(3) &&
+      Op.getOperand(0) != Op.getOperand(1)) {
+    SDLoc DL(Op);
+    MVT VT = Op.getSimpleValueType();
+    MVT EltVT = VT.getVectorElementType();
+    // Create a new build vector with the first 2 elements followed by undef
+    // padding, bitcast to v2f64, duplicate, and bitcast back.
+    SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
+                       DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+    SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
+    SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
+    return DAG.getBitcast(VT, Dup);
+  }
+
   // Find all zeroable elements.
   std::bitset<4> Zeroable;
   for (int i=0; i < 4; ++i) {
@@ -7059,9 +7474,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
     }
   }
 
-  // We need a splat of a single value to use broadcast, and it doesn't
-  // make any sense if the value is only in one element of the vector.
-  if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumUndefElts = UndefElements.count();
+  if (!Ld || (NumElts - NumUndefElts) <= 1) {
     APInt SplatValue, Undef;
     unsigned SplatBitSize;
     bool HasUndef;
@@ -7137,7 +7552,17 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
         }
       }
     }
-    return SDValue();
+
+    // If we are moving a scalar into a vector (Ld must be set and all elements
+    // but 1 are undef) and that operation is not obviously supported by
+    // vmovd/vmovq/vmovss/vmovsd, then keep trying to form a broadcast.
+    // That's better than general shuffling and may eliminate a load to GPR and
+    // move from scalar to vector register.
+    if (!Ld || NumElts - NumUndefElts != 1)
+      return SDValue();
+    unsigned ScalarSize = Ld.getValueSizeInBits();
+    if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64)))
+      return SDValue();
   }
 
   bool ConstSplatVal =
@@ -7434,13 +7859,14 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
   return DstVec;
 }
 
-/// Return true if \p N implements a horizontal binop and return the
-/// operands for the horizontal binop into V0 and V1.
-///
 /// This is a helper function of LowerToHorizontalOp().
 /// This function checks that the build_vector \p N in input implements a
-/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
-/// operation to match.
+/// 128-bit partial horizontal operation on a 256-bit vector, but that operation
+/// may not match the layout of an x86 256-bit horizontal instruction.
+/// In other words, if this returns true, then some extraction/insertion will
+/// be required to produce a valid horizontal instruction.
+///
+/// Parameter \p Opcode defines the kind of horizontal operation to match.
 /// For example, if \p Opcode is equal to ISD::ADD, then this function
 /// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
 /// is equal to ISD::SUB, then this function checks if this is a horizontal
@@ -7448,12 +7874,17 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
 ///
 /// This function only analyzes elements of \p N whose indices are
 /// in range [BaseIdx, LastIdx).
-static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
-                              SelectionDAG &DAG,
-                              unsigned BaseIdx, unsigned LastIdx,
-                              SDValue &V0, SDValue &V1) {
+///
+/// TODO: This function was originally used to match both real and fake partial
+/// horizontal operations, but the index-matching logic is incorrect for that.
+/// See the corrected implementation in isHopBuildVector(). Can we reduce this
+/// code because it is only used for partial h-op matching now?
+static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
+                                  SelectionDAG &DAG,
+                                  unsigned BaseIdx, unsigned LastIdx,
+                                  SDValue &V0, SDValue &V1) {
   EVT VT = N->getValueType(0);
-
+  assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops");
   assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
          "Invalid Vector in input!");
@@ -7623,7 +8054,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
   // adding/subtracting two integer/float elements.
   // Even-numbered elements in the input build vector are obtained from
   // subtracting/adding two integer/float elements.
-  unsigned Opc[2] {0, 0};
+  unsigned Opc[2] = {0, 0};
   for (unsigned i = 0, e = NumElts; i != e; ++i) {
     SDValue Op = BV->getOperand(i);
 
@@ -7794,17 +8225,158 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
   return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
 }
 
+static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
+                             unsigned &HOpcode, SDValue &V0, SDValue &V1) {
+  // Initialize outputs to known values.
+  MVT VT = BV->getSimpleValueType(0);
+  HOpcode = ISD::DELETED_NODE;
+  V0 = DAG.getUNDEF(VT);
+  V1 = DAG.getUNDEF(VT);
+
+  // x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit
+  // half of the result is calculated independently from the 128-bit halves of
+  // the inputs, so that makes the index-checking logic below more complicated.
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned GenericOpcode = ISD::DELETED_NODE;
+  unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1;
+  unsigned NumEltsIn128Bits = NumElts / Num128BitChunks;
+  unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2;
+  for (unsigned i = 0; i != Num128BitChunks; ++i) {
+    for (unsigned j = 0; j != NumEltsIn128Bits; ++j) {
+      // Ignore undef elements.
+      SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j);
+      if (Op.isUndef())
+        continue;
+
+      // If there's an opcode mismatch, we're done.
+      if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode)
+        return false;
+
+      // Initialize horizontal opcode.
+      if (HOpcode == ISD::DELETED_NODE) {
+        GenericOpcode = Op.getOpcode();
+        switch (GenericOpcode) {
+        case ISD::ADD: HOpcode = X86ISD::HADD; break;
+        case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+        case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+        case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+        default: return false;
+        }
+      }
+
+      SDValue Op0 = Op.getOperand(0);
+      SDValue Op1 = Op.getOperand(1);
+      if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+          Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+          Op0.getOperand(0) != Op1.getOperand(0) ||
+          !isa<ConstantSDNode>(Op0.getOperand(1)) ||
+          !isa<ConstantSDNode>(Op1.getOperand(1)) || !Op.hasOneUse())
+        return false;
+
+      // The source vector is chosen based on which 64-bit half of the
+      // destination vector is being calculated.
+      if (j < NumEltsIn64Bits) {
+        if (V0.isUndef())
+          V0 = Op0.getOperand(0);
+      } else {
+        if (V1.isUndef())
+          V1 = Op0.getOperand(0);
+      }
+
+      SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1;
+      if (SourceVec != Op0.getOperand(0))
+        return false;
+
+      // op (extract_vector_elt A, I), (extract_vector_elt A, I+1)
+      unsigned ExtIndex0 = Op0.getConstantOperandVal(1);
+      unsigned ExtIndex1 = Op1.getConstantOperandVal(1);
+      unsigned ExpectedIndex = i * NumEltsIn128Bits +
+                               (j % NumEltsIn64Bits) * 2;
+      if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1)
+        continue;
+
+      // If this is not a commutative op, this does not match.
+      if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD)
+        return false;
+
+      // Addition is commutative, so try swapping the extract indexes.
+      // op (extract_vector_elt A, I+1), (extract_vector_elt A, I)
+      if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1)
+        continue;
+
+      // Extract indexes do not match horizontal requirement.
+      return false;
+    }
+  }
+  // We matched. Opcode and operands are returned by reference as arguments.
+  return true;
+}
+
+static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
+                                    SelectionDAG &DAG, unsigned HOpcode,
+                                    SDValue V0, SDValue V1) {
+  // If either input vector is not the same size as the build vector,
+  // extract/insert the low bits to the correct size.
+  // This is free (examples: zmm --> xmm, xmm --> ymm).
+  MVT VT = BV->getSimpleValueType(0);
+  unsigned Width = VT.getSizeInBits();
+  if (V0.getValueSizeInBits() > Width)
+    V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width);
+  else if (V0.getValueSizeInBits() < Width)
+    V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width);
+
+  if (V1.getValueSizeInBits() > Width)
+    V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width);
+  else if (V1.getValueSizeInBits() < Width)
+    V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
+
+  return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
+}
+
 /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
                                    const X86Subtarget &Subtarget,
                                    SelectionDAG &DAG) {
+  // We need at least 2 non-undef elements to make this worthwhile by default.
+  unsigned NumNonUndefs = 0;
+  for (const SDValue &V : BV->op_values())
+    if (!V.isUndef())
+      ++NumNonUndefs;
+
+  if (NumNonUndefs < 2)
+    return SDValue();
+
+  // There are 4 sets of horizontal math operations distinguished by type:
+  // int/FP at 128-bit/256-bit. Each type was introduced with a different
+  // subtarget feature. Try to match those "native" patterns first.
   MVT VT = BV->getSimpleValueType(0);
+  unsigned HOpcode;
+  SDValue V0, V1;
+  if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3())
+    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+  if ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3())
+    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+  if ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX())
+    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+  if ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())
+    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
+      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+
+  // Try harder to match 256-bit ops by using extract/concat.
+  if (!Subtarget.hasAVX() || !VT.is256BitVector())
+    return SDValue();
+
+  // Count the number of UNDEF operands in the build_vector in input.
   unsigned NumElts = VT.getVectorNumElements();
+  unsigned Half = NumElts / 2;
   unsigned NumUndefsLO = 0;
   unsigned NumUndefsHI = 0;
-  unsigned Half = NumElts/2;
-
-  // Count the number of UNDEF operands in the build_vector in input.
   for (unsigned i = 0, e = Half; i != e; ++i)
     if (BV->getOperand(i)->isUndef())
       NumUndefsLO++;
@@ -7813,96 +8385,61 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
     if (BV->getOperand(i)->isUndef())
       NumUndefsHI++;
 
-  // Early exit if this is either a build_vector of all UNDEFs or all the
-  // operands but one are UNDEF.
-  if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
-    return SDValue();
-
   SDLoc DL(BV);
   SDValue InVec0, InVec1;
-  if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
-    // Try to match an SSE3 float HADD/HSUB.
-    if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
-      return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
-    if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
-      return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
-  } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
-    // Try to match an SSSE3 integer HADD/HSUB.
-    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
-      return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
-
-    if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
-      return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
-  }
-
-  if (!Subtarget.hasAVX())
-    return SDValue();
-
-  if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
-    // Try to match an AVX horizontal add/sub of packed single/double
-    // precision floating point values from 256-bit vectors.
-    SDValue InVec2, InVec3;
-    if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
-        ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-        ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
-      return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
-    if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
-        ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-        ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
-      return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
-  } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
-    // Try to match an AVX2 horizontal add/sub of signed integers.
+  if (VT == MVT::v8i32 || VT == MVT::v16i16) {
     SDValue InVec2, InVec3;
     unsigned X86Opcode;
     bool CanFold = true;
 
-    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+    if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+        isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2,
+                              InVec3) &&
         ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
         ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
       X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
-        isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
-        ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
-        ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
+    else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0,
+                                   InVec1) &&
+             isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2,
+                                   InVec3) &&
+             ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
+             ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
       X86Opcode = X86ISD::HSUB;
     else
       CanFold = false;
 
     if (CanFold) {
-      // Fold this build_vector into a single horizontal add/sub.
-      // Do this only if the target has AVX2.
-      if (Subtarget.hasAVX2())
-        return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
-
       // Do not try to expand this build_vector into a pair of horizontal
       // add/sub if we can emit a pair of scalar add/sub.
       if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
         return SDValue();
 
-      // Convert this build_vector into a pair of horizontal binop followed by
-      // a concat vector.
+      // Convert this build_vector into a pair of horizontal binops followed by
+      // a concat vector. We must adjust the outputs from the partial horizontal
+      // matching calls above to account for undefined vector halves.
+      SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0;
+      SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1;
+      assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");
       bool isUndefLO = NumUndefsLO == Half;
       bool isUndefHI = NumUndefsHI == Half;
-      return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
-                                   isUndefLO, isUndefHI);
+      return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO,
+                                   isUndefHI);
     }
   }
 
-  if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
-       VT == MVT::v16i16) && Subtarget.hasAVX()) {
+  if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
+      VT == MVT::v16i16) {
     unsigned X86Opcode;
-    if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+    if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
       X86Opcode = X86ISD::HADD;
-    else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0,
+                                   InVec1))
       X86Opcode = X86ISD::HSUB;
-    else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0,
+                                   InVec1))
       X86Opcode = X86ISD::FHADD;
-    else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+    else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0,
+                                   InVec1))
       X86Opcode = X86ISD::FHSUB;
     else
       return SDValue();
@@ -8370,9 +8907,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   // If we are inserting one variable into a vector of non-zero constants, try
   // to avoid loading each constant element as a scalar. Load the constants as a
   // vector and then insert the variable scalar element. If insertion is not
-  // supported, we assume that we will fall back to a shuffle to get the scalar
-  // blended with the constants. Insertion into a zero vector is handled as a
-  // special-case somewhere below here.
+  // supported, fall back to a shuffle to get the scalar blended with the
+  // constants. Insertion into a zero vector is handled as a special-case
+  // somewhere below here.
   if (NumConstants == NumElems - 1 && NumNonZero != 1 &&
       (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||
        isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {
@@ -8410,7 +8947,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     MachineFunction &MF = DAG.getMachineFunction();
     MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF);
     SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);
-    return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+    unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue();
+    unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits();
+    if (InsertC < NumEltsInLow128Bits)
+      return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);
+
+    // There's no good way to insert into the high elements of a >128-bit
+    // vector, so use shuffles to avoid an extract/insert sequence.
+    assert(VT.getSizeInBits() > 128 && "Invalid insertion index?");
+    assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector");
+    SmallVector<int, 8> ShuffleMask;
+    unsigned NumElts = VT.getVectorNumElements();
+    for (unsigned i = 0; i != NumElts; ++i)
+      ShuffleMask.push_back(i == InsertC ? NumElts : i);
+    SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);
+    return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask);
   }
 
   // Special case for single non-zero, non-undef, element.
@@ -9097,6 +9648,28 @@ static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
   return TargetMask;
 }
 
+// Attempt to create a shuffle mask from a VSELECT condition mask.
+static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
+                                         SDValue Cond) {
+  if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+    return false;
+
+  unsigned Size = Cond.getValueType().getVectorNumElements();
+  Mask.resize(Size, SM_SentinelUndef);
+
+  for (int i = 0; i != (int)Size; ++i) {
+    SDValue CondElt = Cond.getOperand(i);
+    Mask[i] = i;
+    // Arbitrarily choose from the 2nd operand if the select condition element
+    // is undef.
+    // TODO: Can we do better by matching patterns such as even/odd?
+    if (CondElt.isUndef() || isNullConstant(CondElt))
+      Mask[i] += Size;
+  }
+
+  return true;
+}
+
 // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
 // instructions.
 static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
@@ -9664,11 +10237,7 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
 
   SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
   V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
-  // We have to cast V2 around.
-  MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
-  V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
-                                      DAG.getBitcast(MaskVT, V1Mask),
-                                      DAG.getBitcast(MaskVT, V2)));
+  V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2);
   return DAG.getNode(ISD::OR, DL, VT, V1, V2);
 }
 
@@ -9762,7 +10331,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
   case MVT::v8f32:
     return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
                        DAG.getConstant(BlendMask, DL, MVT::i8));
-
   case MVT::v4i64:
   case MVT::v8i32:
     assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
@@ -9794,7 +10362,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
                           DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
                                       DAG.getConstant(BlendMask, DL, MVT::i8)));
   }
-
   case MVT::v16i16: {
     assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
     SmallVector<int, 8> RepeatedMask;
@@ -9808,6 +10375,20 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
       return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
                          DAG.getConstant(BlendMask, DL, MVT::i8));
     }
+    // Use PBLENDW for lower/upper lanes and then blend lanes.
+    // TODO - we should allow 2 PBLENDW here and leave shuffle combine to
+    // merge to VSELECT where useful.
+    uint64_t LoMask = BlendMask & 0xFF;
+    uint64_t HiMask = (BlendMask >> 8) & 0xFF;
+    if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
+      SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+                               DAG.getConstant(LoMask, DL, MVT::i8));
+      SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+                               DAG.getConstant(HiMask, DL, MVT::i8));
+      return DAG.getVectorShuffle(
+          MVT::v16i16, DL, Lo, Hi,
+          {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
+    }
     LLVM_FALLTHROUGH;
   }
   case MVT::v16i8:
@@ -9815,6 +10396,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
     assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
            "256-bit byte-blends require AVX2 support!");
 
+    // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+    if (SDValue Masked =
+            lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+      return Masked;
+
     if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
       MVT IntegerType =
           MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
@@ -9822,11 +10408,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
       return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
     }
 
-    // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
-    if (SDValue Masked =
-            lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
-      return Masked;
-
     // Scale the blend by the number of bytes per element.
     int Scale = VT.getScalarSizeInBits() / 8;
 
@@ -9834,6 +10415,15 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
     // type.
     MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
 
+    // x86 allows load folding with blendvb from the 2nd source operand. But
+    // we are still using LLVM select here (see comment below), so that's V1.
+    // If V2 can be load-folded and V1 cannot be load-folded, then commute to
+    // allow that load-folding possibility.
+    if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) {
+      ShuffleVectorSDNode::commuteMask(Mask);
+      std::swap(V1, V2);
+    }
+
     // Compute the VSELECT mask. Note that VSELECT is really confusing in the
     // mix of LLVM's code generator and the x86 backend. We tell the code
     // generator that boolean values in the elements of an x86 vector register
@@ -9884,7 +10474,8 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
 static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
                                                    SDValue V1, SDValue V2,
                                                    ArrayRef<int> Mask,
-                                                   SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG,
+                                                   bool ImmBlends = false) {
   // We build up the blend mask while checking whether a blend is a viable way
   // to reduce the shuffle.
   SmallVector<int, 32> BlendMask(Mask.size(), -1);
@@ -9904,10 +10495,168 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
     PermuteMask[i] = Mask[i] % Size;
   }
 
+  // If only immediate blends, then bail if the blend mask can't be widened to
+  // i16.
+  unsigned EltSize = VT.getScalarSizeInBits();
+  if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask))
+    return SDValue();
+
   SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
   return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
 }
 
+/// Try to lower as an unpack of elements from two inputs followed by
+/// a single-input permutation.
+///
+/// This matches the pattern where we can unpack elements from two inputs and
+/// then reduce the shuffle to a single-input (wider) permutation.
+static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
+                                                   SDValue V1, SDValue V2,
+                                                   ArrayRef<int> Mask,
+                                                   SelectionDAG &DAG) {
+  int NumElts = Mask.size();
+  int NumLanes = VT.getSizeInBits() / 128;
+  int NumLaneElts = NumElts / NumLanes;
+  int NumHalfLaneElts = NumLaneElts / 2;
+
+  bool MatchLo = true, MatchHi = true;
+  SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+
+  // Determine UNPCKL/UNPCKH type and operand order.
+  for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
+    for (int Elt = 0; Elt != NumLaneElts; ++Elt) {
+      int M = Mask[Lane + Elt];
+      if (M < 0)
+        continue;
+
+      SDValue &Op = Ops[Elt & 1];
+      if (M < NumElts && (Op.isUndef() || Op == V1))
+        Op = V1;
+      else if (NumElts <= M && (Op.isUndef() || Op == V2))
+        Op = V2;
+      else
+        return SDValue();
+
+      int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts;
+      MatchLo &= isUndefOrInRange(M, Lo, Mid) ||
+                 isUndefOrInRange(M, NumElts + Lo, NumElts + Mid);
+      MatchHi &= isUndefOrInRange(M, Mid, Hi) ||
+                 isUndefOrInRange(M, NumElts + Mid, NumElts + Hi);
+      if (!MatchLo && !MatchHi)
+        return SDValue();
+    }
+  }
+  assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI");
+
+  // Now check that each pair of elts come from the same unpack pair
+  // and set the permute mask based on each pair.
+  // TODO - Investigate cases where we permute individual elements.
+  SmallVector<int, 32> PermuteMask(NumElts, -1);
+  for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
+    for (int Elt = 0; Elt != NumLaneElts; Elt += 2) {
+      int M0 = Mask[Lane + Elt + 0];
+      int M1 = Mask[Lane + Elt + 1];
+      if (0 <= M0 && 0 <= M1 &&
+          (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts))
+        return SDValue();
+      if (0 <= M0)
+        PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts));
+      if (0 <= M1)
+        PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1;
+    }
+  }
+
+  unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+  SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops);
+  return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask);
+}
+
+/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
+/// permuting the elements of the result in place.
+static SDValue lowerVectorShuffleAsByteRotateAndPermute(
+    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+  if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
+      (VT.is256BitVector() && !Subtarget.hasAVX2()) ||
+      (VT.is512BitVector() && !Subtarget.hasBWI()))
+    return SDValue();
+
+  // We don't currently support lane crossing permutes.
+  if (is128BitLaneCrossingShuffleMask(VT, Mask))
+    return SDValue();
+
+  int Scale = VT.getScalarSizeInBits() / 8;
+  int NumLanes = VT.getSizeInBits() / 128;
+  int NumElts = VT.getVectorNumElements();
+  int NumEltsPerLane = NumElts / NumLanes;
+
+  // Determine range of mask elts.
+  bool Blend1 = true;
+  bool Blend2 = true;
+  std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN);
+  std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN);
+  for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
+    for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
+      int M = Mask[Lane + Elt];
+      if (M < 0)
+        continue;
+      if (M < NumElts) {
+        Blend1 &= (M == (Lane + Elt));
+        assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
+        M = M % NumEltsPerLane;
+        Range1.first = std::min(Range1.first, M);
+        Range1.second = std::max(Range1.second, M);
+      } else {
+        M -= NumElts;
+        Blend2 &= (M == (Lane + Elt));
+        assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask");
+        M = M % NumEltsPerLane;
+        Range2.first = std::min(Range2.first, M);
+        Range2.second = std::max(Range2.second, M);
+      }
+    }
+  }
+
+  // Bail if we don't need both elements.
+  // TODO - it might be worth doing this for unary shuffles if the permute
+  // can be widened.
+  if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) ||
+      !(0 <= Range2.first && Range2.second < NumEltsPerLane))
+    return SDValue();
+
+  if (VT.getSizeInBits() > 128 && (Blend1 || Blend2))
+    return SDValue();
+
+  // Rotate the 2 ops so we can access both ranges, then permute the result.
+  auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) {
+    MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+    SDValue Rotate = DAG.getBitcast(
+        VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
+                        DAG.getBitcast(ByteVT, Lo),
+                        DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
+    SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
+    for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
+      for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
+        int M = Mask[Lane + Elt];
+        if (M < 0)
+          continue;
+        if (M < NumElts)
+          PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);
+        else
+          PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);
+      }
+    }
+    return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask);
+  };
+
+  // Check if the ranges are small enough to rotate from either direction.
+  if (Range2.second < Range1.first)
+    return RotateAndPermute(V1, V2, Range1.first, 0);
+  if (Range1.second < Range2.first)
+    return RotateAndPermute(V2, V1, Range2.first, NumElts);
+  return SDValue();
+}
+
 /// Generic routine to decompose a shuffle and blend into independent
 /// blends and permutes.
 ///
@@ -9915,11 +10664,9 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
 /// shuffle+blend operations on newer X86 ISAs where we have very fast blend
 /// operations. It will try to pick the best arrangement of shuffles and
 /// blends.
-static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
-                                                          MVT VT, SDValue V1,
-                                                          SDValue V2,
-                                                          ArrayRef<int> Mask,
-                                                          SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
+    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   // Shuffle the input elements into the desired positions in V1 and V2 and
   // blend them together.
   SmallVector<int, 32> V1Mask(Mask.size(), -1);
@@ -9934,15 +10681,27 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
       BlendMask[i] = i + Size;
     }
 
-  // Try to lower with the simpler initial blend strategy unless one of the
-  // input shuffles would be a no-op. We prefer to shuffle inputs as the
-  // shuffle may be able to fold with a load or other benefit. However, when
-  // we'll have to do 2x as many shuffles in order to achieve this, blending
-  // first is a better strategy.
-  if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask))
+  // Try to lower with the simpler initial blend/unpack/rotate strategies unless
+  // one of the input shuffles would be a no-op. We prefer to shuffle inputs as
+  // the shuffle may be able to fold with a load or other benefit. However, when
+  // we'll have to do 2x as many shuffles in order to achieve this, a 2-input
+  // pre-shuffle first is a better strategy.
+  if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
+    // Only prefer immediate blends to unpack/rotate.
+    if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
+            DL, VT, V1, V2, Mask, DAG, true))
+      return BlendPerm;
+    if (SDValue UnpackPerm =
+            lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
+      return UnpackPerm;
+    if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute(
+            DL, VT, V1, V2, Mask, Subtarget, DAG))
+      return RotatePerm;
+    // Unpack/rotate failed - try again with variable blends.
     if (SDValue BlendPerm =
             lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
       return BlendPerm;
+  }
 
   V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
   V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
@@ -10452,7 +11211,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
     MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
                                  NumElements / Scale);
     InputV = ShuffleOffset(InputV);
-    InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
+    InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG);
     return DAG.getBitcast(VT, InputV);
   }
 
@@ -10930,7 +11689,8 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
       continue;
     }
     case ISD::CONCAT_VECTORS: {
-      int OperandSize = Mask.size() / V.getNumOperands();
+      int OperandSize =
+          V.getOperand(0).getSimpleValueType().getVectorNumElements();
       V = V.getOperand(BroadcastIdx / OperandSize);
       BroadcastIdx %= OperandSize;
       continue;
@@ -10989,7 +11749,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
   SDValue BC = peekThroughBitcasts(V);
 
   // Also check the simpler case, where we can directly reuse the scalar.
-  if (V.getOpcode() == ISD::BUILD_VECTOR ||
+  if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
       (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
     V = V.getOperand(BroadcastIdx);
 
@@ -11204,10 +11964,9 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
 /// because for floating point vectors we have a generalized SHUFPS lowering
 /// strategy that handles everything that doesn't *exactly* match an unpack,
 /// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
-                                                    SDValue V1, SDValue V2,
-                                                    ArrayRef<int> Mask,
-                                                    SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsPermuteAndUnpack(
+    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(!VT.isFloatingPoint() &&
          "This routine only supports integer vectors.");
   assert(VT.is128BitVector() &&
@@ -11276,6 +12035,12 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
     if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
       return Unpack;
 
+  // If we're shuffling with a zero vector then we're better off not doing
+  // VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements.
+  if (ISD::isBuildVectorAllZeros(V1.getNode()) ||
+      ISD::isBuildVectorAllZeros(V2.getNode()))
+    return SDValue();
+
   // If none of the unpack-rooted lowerings worked (or were profitable) try an
   // initial unpack.
   if (NumLoInputs == 0 || NumHiInputs == 0) {
@@ -11475,7 +12240,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // a permute. That will be faster than the domain cross.
   if (IsBlendSupported)
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
-                                                      Mask, DAG);
+                                                      Mask, Subtarget, DAG);
 
   // We implement this with SHUFPD which is pretty lame because it will likely
   // incur 2 cycles of stall for integer vectors on Nehalem and older chips.
@@ -11785,11 +12550,11 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     // a permute. That will be faster than the domain cross.
     if (IsBlendSupported)
       return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
-                                                        Mask, DAG);
+                                                        Mask, Subtarget, DAG);
 
     // Try to lower by permuting the inputs into an unpack instruction.
     if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
-            DL, MVT::v4i32, V1, V2, Mask, DAG))
+            DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
       return Unpack;
   }
 
@@ -12321,47 +13086,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
 /// blend if only one input is used.
 static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
-    bool &V2InUse) {
-  SDValue V1Mask[16];
-  SDValue V2Mask[16];
+    const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
+  assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
+         "Lane crossing shuffle masks not supported");
+
+  int NumBytes = VT.getSizeInBits() / 8;
+  int Size = Mask.size();
+  int Scale = NumBytes / Size;
+
+  SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8));
+  SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8));
   V1InUse = false;
   V2InUse = false;
 
-  int Size = Mask.size();
-  int Scale = 16 / Size;
-  for (int i = 0; i < 16; ++i) {
-    if (Mask[i / Scale] < 0) {
-      V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
-    } else {
-      const int ZeroMask = 0x80;
-      int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale
-                                          : ZeroMask;
-      int V2Idx = Mask[i / Scale] < Size
-                      ? ZeroMask
-                      : (Mask[i / Scale] - Size) * Scale + i % Scale;
-      if (Zeroable[i / Scale])
-        V1Idx = V2Idx = ZeroMask;
-      V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
-      V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
-      V1InUse |= (ZeroMask != V1Idx);
-      V2InUse |= (ZeroMask != V2Idx);
-    }
+  for (int i = 0; i < NumBytes; ++i) {
+    int M = Mask[i / Scale];
+    if (M < 0)
+      continue;
+
+    const int ZeroMask = 0x80;
+    int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask;
+    int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale;
+    if (Zeroable[i / Scale])
+      V1Idx = V2Idx = ZeroMask;
+
+    V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
+    V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
+    V1InUse |= (ZeroMask != V1Idx);
+    V2InUse |= (ZeroMask != V2Idx);
   }
 
+  MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes);
   if (V1InUse)
-    V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
-                     DAG.getBitcast(MVT::v16i8, V1),
-                     DAG.getBuildVector(MVT::v16i8, DL, V1Mask));
+    V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1),
+                     DAG.getBuildVector(ShufVT, DL, V1Mask));
   if (V2InUse)
-    V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
-                     DAG.getBitcast(MVT::v16i8, V2),
-                     DAG.getBuildVector(MVT::v16i8, DL, V2Mask));
+    V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2),
+                     DAG.getBuildVector(ShufVT, DL, V2Mask));
 
   // If we need shuffled inputs from both, blend the two.
   SDValue V;
   if (V1InUse && V2InUse)
-    V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+    V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2);
   else
     V = V1InUse ? V1 : V2;
 
@@ -12484,8 +13250,8 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     return BitBlend;
 
   // Try to lower by permuting the inputs into an unpack instruction.
-  if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
-                                                            V2, Mask, DAG))
+  if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+          DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
     return Unpack;
 
   // If we can't directly blend but can use PSHUFB, that will be better as it
@@ -12499,7 +13265,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // We can always bit-blend if we have to so the fallback strategy is to
   // decompose into single-input permutes and blends.
   return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
-                                                    Mask, DAG);
+                                                    Mask, Subtarget, DAG);
 }
 
 /// Check whether a compaction lowering can be done by dropping even
@@ -12632,6 +13398,10 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
             DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
       return Broadcast;
 
+    if (SDValue V =
+            lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+      return V;
+
     // Check whether we can widen this to an i16 shuffle by duplicating bytes.
     // Notably, this handles splat and partial-splat shuffles more efficiently.
     // However, it only makes sense if the pre-duplication shuffle simplifies
@@ -12769,12 +13539,18 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       // shuffles will both be pshufb, in which case we shouldn't bother with
       // this.
       if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
-              DL, MVT::v16i8, V1, V2, Mask, DAG))
+              DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
         return Unpack;
 
       // If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
       if (Subtarget.hasVBMI() && Subtarget.hasVLX())
         return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
+
+      // Use PALIGNR+Permute if possible - permute might become PSHUFB but the
+      // PALIGNR will be cheaper than the second PSHUFB+OR.
+      if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute(
+              DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+        return V;
     }
 
     return PSHUFB;
@@ -12830,7 +13606,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Handle multi-input cases by blending single-input shuffles.
   if (NumV2Elements > 0)
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
-                                                      Mask, DAG);
+                                                      Mask, Subtarget, DAG);
 
   // The fallback path for single-input shuffles widens this into two v8i16
   // vectors with unpacks, shuffles those, and then pulls them back together
@@ -13043,6 +13819,7 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
 static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
                                                 SDValue V1, SDValue V2,
                                                 ArrayRef<int> Mask,
+                                                const X86Subtarget &Subtarget,
                                                 SelectionDAG &DAG) {
   assert(!V2.isUndef() && "This routine must not be used to lower single-input "
          "shuffles as it could then recurse on itself.");
@@ -13069,7 +13846,7 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
   };
   if (DoBothBroadcast())
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
-                                                      DAG);
+                                                      Subtarget, DAG);
 
   // If the inputs all stem from a single 128-bit lane of each input, then we
   // split them rather than blending because the split will decompose to
@@ -13087,7 +13864,62 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
 
   // Otherwise, just fall back to decomposed shuffles and a blend. This requires
   // that the decomposed single-input shuffles don't end up here.
-  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+                                                    Subtarget, DAG);
+}
+
+/// Lower a vector shuffle crossing multiple 128-bit lanes as
+/// a lane permutation followed by a per-lane permutation.
+///
+/// This is mainly for cases where we can have non-repeating permutes
+/// in each lane.
+///
+/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes,
+/// we should investigate merging them.
+static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
+    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+  int NumElts = VT.getVectorNumElements();
+  int NumLanes = VT.getSizeInBits() / 128;
+  int NumEltsPerLane = NumElts / NumLanes;
+
+  SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
+  SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+  SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
+
+  for (int i = 0; i != NumElts; ++i) {
+    int M = Mask[i];
+    if (M < 0)
+      continue;
+
+    // Ensure that each lane comes from a single source lane.
+    int SrcLane = M / NumEltsPerLane;
+    int DstLane = i / NumEltsPerLane;
+    if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane))
+      return SDValue();
+    SrcLaneMask[DstLane] = SrcLane;
+
+    LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
+    PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
+  }
+
+  // If we're only shuffling a single lowest lane and the rest are identity
+  // then don't bother.
+  // TODO - isShuffleMaskInputInPlace could be extended to something like this.
+  int NumIdentityLanes = 0;
+  bool OnlyShuffleLowestLane = true;
+  for (int i = 0; i != NumLanes; ++i) {
+    if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane,
+                                   i * NumEltsPerLane))
+      NumIdentityLanes++;
+    else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes)
+      OnlyShuffleLowestLane = false;
+  }
+  if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
+    return SDValue();
+
+  SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask);
+  return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
 }
 
 /// Lower a vector shuffle crossing multiple 128-bit lanes as
@@ -13248,79 +14080,174 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
 /// Lower a vector shuffle by first fixing the 128-bit lanes and then
 /// shuffling each lane.
 ///
-/// This will only succeed when the result of fixing the 128-bit lanes results
-/// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
-/// each 128-bit lanes. This handles many cases where we can quickly blend away
-/// the lane crosses early and then use simpler shuffles within each lane.
+/// This attempts to create a repeated lane shuffle where each lane uses one
+/// or two of the lanes of the inputs. The lanes of the input vectors are
+/// shuffled in one or two independent shuffles to get the lanes into the
+/// position needed by the final shuffle.
 ///
-/// FIXME: It might be worthwhile at some point to support this without
-/// requiring the 128-bit lane-relative shuffles to be repeating, but currently
-/// in x86 only floating point has interesting non-repeating shuffles, and even
-/// those are still *marginally* more expensive.
+/// FIXME: This should be generalized to 512-bit shuffles.
 static SDValue lowerVectorShuffleByMerging128BitLanes(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(!V2.isUndef() && "This is only useful with multiple inputs.");
 
+  if (is128BitLaneRepeatedShuffleMask(VT, Mask))
+    return SDValue();
+
   int Size = Mask.size();
   int LaneSize = 128 / VT.getScalarSizeInBits();
   int NumLanes = Size / LaneSize;
-  assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");
+  assert(NumLanes == 2 && "Only handles 256-bit shuffles.");
+
+  SmallVector<int, 16> RepeatMask(LaneSize, -1);
+  int LaneSrcs[2][2] = { { -1, -1 }, { -1 , -1 } };
+
+  // First pass will try to fill in the RepeatMask from lanes that need two
+  // sources.
+  for (int Lane = 0; Lane != NumLanes; ++Lane) {
+    int Srcs[2] = { -1, -1 };
+    SmallVector<int, 16> InLaneMask(LaneSize, -1);
+    for (int i = 0; i != LaneSize; ++i) {
+      int M = Mask[(Lane * LaneSize) + i];
+      if (M < 0)
+        continue;
+      // Determine which of the 4 possible input lanes (2 from each source)
+      // this element comes from. Assign that as one of the sources for this
+      // lane. We can assign up to 2 sources for this lane. If we run out
+      // sources we can't do anything.
+      int LaneSrc = M / LaneSize;
+      int Src;
+      if (Srcs[0] < 0 || Srcs[0] == LaneSrc)
+        Src = 0;
+      else if (Srcs[1] < 0 || Srcs[1] == LaneSrc)
+        Src = 1;
+      else
+        return SDValue();
 
-  // See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
-  // check whether the in-128-bit lane shuffles share a repeating pattern.
-  SmallVector<int, 4> Lanes((unsigned)NumLanes, -1);
-  SmallVector<int, 4> InLaneMask((unsigned)LaneSize, -1);
-  for (int i = 0; i < Size; ++i) {
-    if (Mask[i] < 0)
+      Srcs[Src] = LaneSrc;
+      InLaneMask[i] = (M % LaneSize) + Src * Size;
+    }
+
+    // If this lane has two sources, see if it fits with the repeat mask so far.
+    if (Srcs[1] < 0)
       continue;
 
-    int j = i / LaneSize;
+    LaneSrcs[Lane][0] = Srcs[0];
+    LaneSrcs[Lane][1] = Srcs[1];
 
-    if (Lanes[j] < 0) {
-      // First entry we've seen for this lane.
-      Lanes[j] = Mask[i] / LaneSize;
-    } else if (Lanes[j] != Mask[i] / LaneSize) {
-      // This doesn't match the lane selected previously!
-      return SDValue();
+    auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) {
+      assert(M1.size() == M2.size() && "Unexpected mask size");
+      for (int i = 0, e = M1.size(); i != e; ++i)
+        if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i])
+          return false;
+      return true;
+    };
+
+    auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) {
+      assert(Mask.size() == MergedMask.size() && "Unexpected mask size");
+      for (int i = 0, e = MergedMask.size(); i != e; ++i) {
+        int M = Mask[i];
+        if (M < 0)
+          continue;
+        assert((MergedMask[i] < 0 || MergedMask[i] == M) &&
+               "Unexpected mask element");
+        MergedMask[i] = M;
+      }
+    };
+
+    if (MatchMasks(InLaneMask, RepeatMask)) {
+      // Merge this lane mask into the final repeat mask.
+      MergeMasks(InLaneMask, RepeatMask);
+      continue;
     }
 
-    // Check that within each lane we have a consistent shuffle mask.
-    int k = i % LaneSize;
-    if (InLaneMask[k] < 0) {
-      InLaneMask[k] = Mask[i] % LaneSize;
-    } else if (InLaneMask[k] != Mask[i] % LaneSize) {
-      // This doesn't fit a repeating in-lane mask.
-      return SDValue();
+    // Didn't find a match. Swap the operands and try again.
+    std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]);
+    ShuffleVectorSDNode::commuteMask(InLaneMask);
+
+    if (MatchMasks(InLaneMask, RepeatMask)) {
+      // Merge this lane mask into the final repeat mask.
+      MergeMasks(InLaneMask, RepeatMask);
+      continue;
     }
+
+    // Couldn't find a match with the operands in either order.
+    return SDValue();
   }
 
-  // First shuffle the lanes into place.
-  MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
-                                VT.getSizeInBits() / 64);
-  SmallVector<int, 8> LaneMask((unsigned)NumLanes * 2, -1);
-  for (int i = 0; i < NumLanes; ++i)
-    if (Lanes[i] >= 0) {
-      LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
-      LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
+  // Now handle any lanes with only one source.
+  for (int Lane = 0; Lane != NumLanes; ++Lane) {
+    // If this lane has already been processed, skip it.
+    if (LaneSrcs[Lane][0] >= 0)
+      continue;
+
+    for (int i = 0; i != LaneSize; ++i) {
+      int M = Mask[(Lane * LaneSize) + i];
+      if (M < 0)
+        continue;
+
+      // If RepeatMask isn't defined yet we can define it ourself.
+      if (RepeatMask[i] < 0)
+        RepeatMask[i] = M % LaneSize;
+
+      if (RepeatMask[i] < Size) {
+        if (RepeatMask[i] != M % LaneSize)
+          return SDValue();
+        LaneSrcs[Lane][0] = M / LaneSize;
+      } else {
+        if (RepeatMask[i] != ((M % LaneSize) + Size))
+          return SDValue();
+        LaneSrcs[Lane][1] = M / LaneSize;
+      }
     }
 
-  V1 = DAG.getBitcast(LaneVT, V1);
-  V2 = DAG.getBitcast(LaneVT, V2);
-  SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
+    if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0)
+      return SDValue();
+  }
+
+  SmallVector<int, 16> NewMask(Size, -1);
+  for (int Lane = 0; Lane != NumLanes; ++Lane) {
+    int Src = LaneSrcs[Lane][0];
+    for (int i = 0; i != LaneSize; ++i) {
+      int M = -1;
+      if (Src >= 0)
+        M = Src * LaneSize + i;
+      NewMask[Lane * LaneSize + i] = M;
+    }
+  }
+  SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+  // Ensure we didn't get back the shuffle we started with.
+  // FIXME: This is a hack to make up for some splat handling code in
+  // getVectorShuffle.
+  if (isa<ShuffleVectorSDNode>(NewV1) &&
+      cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)
+    return SDValue();
 
-  // Cast it back to the type we actually want.
-  LaneShuffle = DAG.getBitcast(VT, LaneShuffle);
+  for (int Lane = 0; Lane != NumLanes; ++Lane) {
+    int Src = LaneSrcs[Lane][1];
+    for (int i = 0; i != LaneSize; ++i) {
+      int M = -1;
+      if (Src >= 0)
+        M = Src * LaneSize + i;
+      NewMask[Lane * LaneSize + i] = M;
+    }
+  }
+  SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+  // Ensure we didn't get back the shuffle we started with.
+  // FIXME: This is a hack to make up for some splat handling code in
+  // getVectorShuffle.
+  if (isa<ShuffleVectorSDNode>(NewV2) &&
+      cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)
+    return SDValue();
 
-  // Now do a simple shuffle that isn't lane crossing.
-  SmallVector<int, 8> NewMask((unsigned)Size, -1);
-  for (int i = 0; i < Size; ++i)
-    if (Mask[i] >= 0)
-      NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
-  assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
-         "Must not introduce lane crosses at this point!");
+  for (int i = 0; i != Size; ++i) {
+    NewMask[i] = RepeatMask[i % LaneSize];
+    if (NewMask[i] < 0)
+      continue;
 
-  return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
+    NewMask[i] += (i / LaneSize) * LaneSize;
+  }
+  return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
 }
 
 /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
@@ -13731,6 +14658,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
             DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
       return V;
 
+    // Try to permute the lanes and then use a per-lane permute.
+    if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+            DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget))
+      return V;
+
     // Otherwise, fall back.
     return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
                                                    DAG, Subtarget);
@@ -13765,6 +14697,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
             DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
       return Result;
+
   // If we have VLX support, we can use VEXPAND.
   if (Subtarget.hasVLX())
     if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
@@ -13775,10 +14708,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // can fully permute the elements.
   if (Subtarget.hasAVX2())
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
-                                                      Mask, DAG);
+                                                      Mask, Subtarget, DAG);
 
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
+  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
+                                          Subtarget, DAG);
 }
 
 /// Handle lowering of 4-lane 64-bit integer shuffles.
@@ -13872,7 +14806,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Otherwise fall back on generic blend lowering.
   return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
-                                                    Mask, DAG);
+                                                    Mask, Subtarget, DAG);
 }
 
 /// Handle lowering of 8-lane 32-bit floating point shuffles.
@@ -13961,17 +14895,18 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // vpunpckhwd instrs than vblend.
   if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
     if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
-                                                     Mask, DAG))
+                                                     Mask, Subtarget, DAG))
       return V;
 
   // If we have AVX2 then we always want to lower with a blend because at v8 we
   // can fully permute the elements.
   if (Subtarget.hasAVX2())
     return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
-                                                      Mask, DAG);
+                                                      Mask, Subtarget, DAG);
 
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
+  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+                                          Subtarget, DAG);
 }
 
 /// Handle lowering of 8-lane 32-bit integer shuffles.
@@ -14000,8 +14935,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // vpunpcklwd and vpunpckhwd instrs.
   if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
       !Subtarget.hasAVX512())
-    if (SDValue V =
-            lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
+    if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2,
+                                                     Mask, Subtarget, DAG))
       return V;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
@@ -14084,7 +15019,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Otherwise fall back on generic blend lowering.
   return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
-                                                    Mask, DAG);
+                                                    Mask, Subtarget, DAG);
 }
 
 /// Handle lowering of 16-lane 16-bit integer shuffles.
@@ -14146,9 +15081,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   if (V2.isUndef()) {
     // There are no generalized cross-lane shuffle operations available on i16
     // element types.
-    if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
+    if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
+      if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+              DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+        return V;
+
       return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
                                                      Mask, DAG, Subtarget);
+    }
 
     SmallVector<int, 8> RepeatedMask;
     if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
@@ -14174,8 +15114,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
           DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
     return Result;
 
+  // Try to permute the lanes and then use a per-lane permute.
+  if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+          DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
+    return V;
+
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
+  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
+                                          Subtarget, DAG);
 }
 
 /// Handle lowering of 32-lane 8-bit integer shuffles.
@@ -14236,9 +15182,14 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // There are no generalized cross-lane shuffle operations available on i8
   // element types.
-  if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+  if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
+    if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+            DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+      return V;
+
     return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
                                                    DAG, Subtarget);
+  }
 
   if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
           DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
@@ -14254,8 +15205,14 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
           DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
     return Result;
 
+  // Try to permute the lanes and then use a per-lane permute.
+  if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+          DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
+    return V;
+
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
+  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
+                                          Subtarget, DAG);
 }
 
 /// High-level routine to lower various 256-bit x86 vector shuffles.
@@ -14757,6 +15714,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
           lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
     return V;
 
+  // Use dedicated pack instructions for masks that match their pattern.
+  if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
+                                             Subtarget))
+    return V;
+
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
                                                 Zeroable, Subtarget, DAG))
@@ -14845,6 +15807,39 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   }
 }
 
+// Determine if this shuffle can be implemented with a KSHIFT instruction.
+// Returns the shift amount if possible or -1 if not. This is a simplified
+// version of matchVectorShuffleAsShift.
+static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
+                                    int MaskOffset, const APInt &Zeroable) {
+  int Size = Mask.size();
+
+  auto CheckZeros = [&](int Shift, bool Left) {
+    for (int j = 0; j < Shift; ++j)
+      if (!Zeroable[j + (Left ? 0 : (Size - Shift))])
+        return false;
+
+    return true;
+  };
+
+  auto MatchShift = [&](int Shift, bool Left) {
+    unsigned Pos = Left ? Shift : 0;
+    unsigned Low = Left ? 0 : Shift;
+    unsigned Len = Size - Shift;
+    return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset);
+  };
+
+  for (int Shift = 1; Shift != Size; ++Shift)
+    for (bool Left : {true, false})
+      if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) {
+        Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR;
+        return Shift;
+      }
+
+  return -1;
+}
+
+
 // Lower vXi1 vector shuffles.
 // There is no a dedicated instruction on AVX-512 that shuffles the masks.
 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
@@ -14854,6 +15849,9 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                       const APInt &Zeroable,
                                       const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
+  assert(Subtarget.hasAVX512() &&
+         "Cannot lower 512-bit vectors w/o basic ISA!");
+
   unsigned NumElts = Mask.size();
 
   // Try to recognize shuffles that are just padding a subvector with zeros.
@@ -14880,9 +15878,21 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                        Extract, DAG.getIntPtrConstant(0, DL));
   }
 
+  // Try to match KSHIFTs.
+  // TODO: Support narrower than legal shifts by widening and extracting.
+  if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) {
+    unsigned Offset = 0;
+    for (SDValue V : { V1, V2 }) {
+      unsigned Opcode;
+      int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable);
+      if (ShiftAmt >= 0)
+        return DAG.getNode(Opcode, DL, VT, V,
+                           DAG.getConstant(ShiftAmt, DL, MVT::i8));
+      Offset += NumElts; // Increment for next iteration.
+    }
+  }
+
 
-  assert(Subtarget.hasAVX512() &&
-         "Cannot lower 512-bit vectors w/o basic ISA!");
   MVT ExtVT;
   switch (VT.SimpleTy) {
   default:
@@ -15069,6 +16079,14 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
   SmallVector<int, 16> WidenedMask;
   if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
       canWidenShuffleElements(ZeroableMask, WidenedMask)) {
+    // Shuffle mask widening should not interfere with a broadcast opportunity
+    // by obfuscating the operands with bitcasts.
+    // TODO: Avoid lowering directly from this top-level function: make this
+    // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
+    if (SDValue Broadcast =
+            lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+      return Broadcast;
+
     MVT NewEltVT = VT.isFloatingPoint()
                        ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
                        : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
@@ -15135,34 +16153,27 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
   SDValue Cond = Op.getOperand(0);
   SDValue LHS = Op.getOperand(1);
   SDValue RHS = Op.getOperand(2);
-  SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
 
-  if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
-    return SDValue();
-  auto *CondBV = cast<BuildVectorSDNode>(Cond);
-
   // Only non-legal VSELECTs reach this lowering, convert those into generic
   // shuffles and re-use the shuffle lowering path for blends.
   SmallVector<int, 32> Mask;
-  for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
-    SDValue CondElt = CondBV->getOperand(i);
-    int M = i;
-    // We can't map undef to undef here. They have different meanings. Treat
-    // as the same as zero.
-    if (CondElt.isUndef() || isNullConstant(CondElt))
-      M += Size;
-    Mask.push_back(M);
-  }
-  return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
+  if (createShuffleMaskFromVSELECT(Mask, Cond))
+    return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);
+
+  return SDValue();
 }
 
 SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = Op.getOperand(0);
+  SDValue LHS = Op.getOperand(1);
+  SDValue RHS = Op.getOperand(2);
+
   // A vselect where all conditions and data are constants can be optimized into
   // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
-  if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
-      ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
-      ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
+  if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
+      ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
+      ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
     return SDValue();
 
   // Try to lower this to a blend-style vector shuffle. This can handle all
@@ -15172,7 +16183,9 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
 
   // If this VSELECT has a vector if i1 as a mask, it will be directly matched
   // with patterns on the mask registers on AVX-512.
-  if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
+  MVT CondVT = Cond.getSimpleValueType();
+  unsigned CondEltSize = Cond.getScalarValueSizeInBits();
+  if (CondEltSize == 1)
     return Op;
 
   // Variable blends are only legal from SSE4.1 onward.
@@ -15181,24 +16194,32 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
 
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
+  unsigned EltSize = VT.getScalarSizeInBits();
+  unsigned NumElts = VT.getVectorNumElements();
 
   // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
   // into an i1 condition so that we can use the mask-based 512-bit blend
   // instructions.
   if (VT.getSizeInBits() == 512) {
-    SDValue Cond = Op.getOperand(0);
-    // The vNi1 condition case should be handled above as it can be trivially
-    // lowered.
-    assert(Cond.getValueType().getScalarSizeInBits() ==
-               VT.getScalarSizeInBits() &&
-           "Should have a size-matched integer condition!");
     // Build a mask by testing the condition against zero.
-    MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+    MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
     SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond,
-                                getZeroVector(VT, Subtarget, DAG, dl),
+                                DAG.getConstant(0, dl, CondVT),
                                 ISD::SETNE);
     // Now return a new VSELECT using the mask.
-    return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
+    return DAG.getSelect(dl, VT, Mask, LHS, RHS);
+  }
+
+  // SEXT/TRUNC cases where the mask doesn't match the destination size.
+  if (CondEltSize != EltSize) {
+    // If we don't have a sign splat, rely on the expansion.
+    if (CondEltSize != DAG.ComputeNumSignBits(Cond))
+      return SDValue();
+
+    MVT NewCondSVT = MVT::getIntegerVT(EltSize);
+    MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts);
+    Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT);
+    return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS);
   }
 
   // Only some types will be legal on some subtargets. If we can emit a legal
@@ -15219,10 +16240,10 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
   case MVT::v8i16:
   case MVT::v16i16: {
     // Bitcast everything to the vXi8 type and use a vXi8 vselect.
-    MVT CastVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
-    SDValue Cond = DAG.getBitcast(CastVT, Op->getOperand(0));
-    SDValue LHS = DAG.getBitcast(CastVT, Op->getOperand(1));
-    SDValue RHS = DAG.getBitcast(CastVT, Op->getOperand(2));
+    MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2);
+    Cond = DAG.getBitcast(CastVT, Cond);
+    LHS = DAG.getBitcast(CastVT, LHS);
+    RHS = DAG.getBitcast(CastVT, RHS);
     SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS);
     return DAG.getBitcast(VT, Select);
   }
@@ -15298,34 +16319,25 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
   }
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0) // the operation is legal
+    return Op;
 
-  // If the kshift instructions of the correct width aren't natively supported
-  // then we need to promote the vector to the native size to get the correct
-  // zeroing behavior.
-  if (VecVT.getVectorNumElements() < 16) {
-    VecVT = MVT::v16i1;
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
-                      DAG.getUNDEF(VecVT), Vec,
+  // Extend to natively supported kshift.
+  unsigned NumElems = VecVT.getVectorNumElements();
+  MVT WideVecVT = VecVT;
+  if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+    WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+                      DAG.getUNDEF(WideVecVT), Vec,
                       DAG.getIntPtrConstant(0, dl));
   }
 
-  // Extracts from element 0 are always allowed.
-  if (IdxVal != 0) {
-    // Use kshiftr instruction to move to the lower element.
-    Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
-                      DAG.getConstant(IdxVal, dl, MVT::i8));
-  }
-
-  // Shrink to v16i1 since that's always legal.
-  if (VecVT.getVectorNumElements() > 16) {
-    VecVT = MVT::v16i1;
-    Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
-                      DAG.getIntPtrConstant(0, dl));
-  }
+  // Use kshiftr instruction to move to the lower element.
+  Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+                    DAG.getConstant(IdxVal, dl, MVT::i8));
 
-  // Convert to a bitcast+aext/trunc.
-  MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
-  return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+                     DAG.getIntPtrConstant(0, dl));
 }
 
 SDValue
@@ -15793,7 +16805,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
-  if (isPositionIndependent() && !Subtarget.is64Bit()) {
+  if (OpFlag) {
     Result =
         DAG.getNode(ISD::ADD, DL, PtrVT,
                     DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
@@ -16173,6 +17185,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
 /// Lower SRA_PARTS and friends, which return two i32 values
 /// and take a 2 x i32 value to shift plus a shift amount.
+/// TODO: Can this be moved to general expansion code?
 static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   MVT VT = Op.getSimpleValueType();
@@ -16182,8 +17195,8 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
-  // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
-  // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+  // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
+  // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away
   // during isel.
   SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
                                   DAG.getConstant(VTBits - 1, dl, MVT::i8));
@@ -16193,10 +17206,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
 
   SDValue Tmp2, Tmp3;
   if (Op.getOpcode() == ISD::SHL_PARTS) {
-    Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+    Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
   } else {
-    Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+    Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
     Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
   }
 
@@ -16220,6 +17233,56 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues({ Lo, Hi }, dl);
 }
 
+static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
+                                SelectionDAG &DAG) {
+  MVT VT = Op.getSimpleValueType();
+  assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
+         "Unexpected funnel shift opcode!");
+
+  SDLoc DL(Op);
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+
+  bool IsFSHR = Op.getOpcode() == ISD::FSHR;
+
+  if (VT.isVector()) {
+    assert(Subtarget.hasVBMI2() && "Expected VBMI2");
+
+    if (IsFSHR)
+      std::swap(Op0, Op1);
+
+    APInt APIntShiftAmt;
+    if (isConstantSplat(Amt, APIntShiftAmt)) {
+      uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
+      return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
+                         Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
+    }
+
+    return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+                       Op0, Op1, Amt);
+  }
+
+  assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+         "Unexpected funnel shift type!");
+
+  // Expand slow SHLD/SHRD cases if we are not optimizing for size.
+  bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+  if (!OptForSize && Subtarget.isSHLDSlow())
+    return SDValue();
+
+  if (IsFSHR)
+    std::swap(Op0, Op1);
+
+  // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
+  if (VT == MVT::i16)
+    Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
+                      DAG.getConstant(15, DL, Amt.getValueType()));
+
+  unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
+  return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+}
+
 // Try to use a packed vector operation to handle i64 on 32-bit targets when
 // AVX512DQ is enabled.
 static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
@@ -16271,9 +17334,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
   // Legal.
   if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT))
     return Op;
-  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit()) {
+  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit())
     return Op;
-  }
 
   if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
     return V;
@@ -16331,7 +17393,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     Chain = Result.getValue(1);
     SDValue InFlag = Result.getValue(2);
 
-    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+    // FIXME: Currently the FST is glued to the FILD_FLAG. This
     // shouldn't be necessary except that RFP cannot be live across
     // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
@@ -16412,13 +17474,11 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
   SDValue Result;
 
   if (Subtarget.hasSSE3()) {
-    // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
+    // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'.
     Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
   } else {
-    SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
-    SDValue Shuffle = DAG.getVectorShuffle(MVT::v4i32, dl, S2F, S2F, {2,3,0,1});
-    Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
-                         DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
+    SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
+    Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
   }
 
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
@@ -16910,33 +17970,43 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
           InVT.getVectorElementType() == MVT::i32) &&
          "Unexpected element type");
 
+  // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
+  if (InVT == MVT::v8i8) {
+    if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+      return SDValue();
+
+    In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
+                     MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+    // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input.
+    return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In);
+  }
+
   if (Subtarget.hasInt256())
-    return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
+    return Op;
 
   // Optimize vectors in AVX mode:
   //
   //   v8i16 -> v8i32
-  //   Use vpunpcklwd for 4 lower elements  v8i16 -> v4i32.
+  //   Use vpmovzwd for 4 lower elements  v8i16 -> v4i32.
   //   Use vpunpckhwd for 4 upper elements  v8i16 -> v4i32.
   //   Concat upper and lower parts.
   //
   //   v4i32 -> v4i64
-  //   Use vpunpckldq for 4 lower elements  v4i32 -> v2i64.
+  //   Use vpmovzdq for 4 lower elements  v4i32 -> v2i64.
   //   Use vpunpckhdq for 4 upper elements  v4i32 -> v2i64.
   //   Concat upper and lower parts.
   //
 
-  SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                VT.getVectorNumElements() / 2);
+
+  SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In);
+
+  SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
   SDValue Undef = DAG.getUNDEF(InVT);
   bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
-  SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
   SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
-
-  MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
-                             VT.getVectorNumElements()/2);
-
-  OpLo = DAG.getBitcast(HVT, OpLo);
-  OpHi = DAG.getBitcast(HVT, OpHi);
+  OpHi = DAG.getBitcast(HalfVT, OpHi);
 
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
 }
@@ -16965,7 +18035,7 @@ static  SDValue LowerZERO_EXTEND_Mask(SDValue Op,
   SDLoc DL(Op);
   unsigned NumElts = VT.getVectorNumElements();
 
-  // For all vectors, but vXi8 we can just emit a sign_extend a shift. This
+  // For all vectors, but vXi8 we can just emit a sign_extend and a shift. This
   // avoids a constant pool load.
   if (VT.getVectorElementType() != MVT::i8) {
     SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
@@ -16995,7 +18065,7 @@ static  SDValue LowerZERO_EXTEND_Mask(SDValue Op,
   }
 
   SDValue One = DAG.getConstant(1, DL, WideVT);
-  SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, DL);
+  SDValue Zero = DAG.getConstant(0, DL, WideVT);
 
   SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
 
@@ -17035,9 +18105,10 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
                                       const X86Subtarget &Subtarget) {
   assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) &&
          "Unexpected PACK opcode");
+  assert(DstVT.isVector() && "VT not a vector?");
 
   // Requires SSE2 but AVX512 has fast vector truncate.
-  if (!Subtarget.hasSSE2() || Subtarget.hasAVX512() || !DstVT.isVector())
+  if (!Subtarget.hasSSE2())
     return SDValue();
 
   EVT SrcVT = In.getValueType();
@@ -17203,10 +18274,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
   }
   // If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m.
   if (Subtarget.hasDQI())
-    return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),
-                       In, ISD::SETGT);
-  return DAG.getSetCC(DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL),
-                      ISD::SETNE);
+    return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);
+  return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE);
 }
 
 SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@@ -17219,20 +18288,22 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
          "Invalid TRUNCATE operation");
 
+  // If called by the legalizer just return.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
+    return SDValue();
+
   if (VT.getVectorElementType() == MVT::i1)
     return LowerTruncateVecI1(Op, DAG, Subtarget);
 
   // vpmovqb/w/d, vpmovdb/w, vpmovwb
   if (Subtarget.hasAVX512()) {
-    // word to byte only under BWI
-    if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) { // v16i16 -> v16i8
-      // Make sure we're allowed to promote 512-bits.
-      if (Subtarget.canExtendTo512DQ())
-        return DAG.getNode(ISD::TRUNCATE, DL, VT,
-                           DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
-    } else {
+    // word to byte only under BWI. Otherwise we have to promoted to v16i32
+    // and then truncate that. But we should only do that if we haven't been
+    // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be
+    // handled by isel patterns.
+    if (InVT != MVT::v16i16 || Subtarget.hasBWI() ||
+        Subtarget.canExtendTo512DQ())
       return Op;
-    }
   }
 
   unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16);
@@ -17241,8 +18312,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   // Truncate with PACKUS if we are truncating a vector with leading zero bits
   // that extend all the way to the packed/truncated value.
   // Pre-SSE41 we can only use PACKUSWB.
-  KnownBits Known;
-  DAG.computeKnownBits(In, Known);
+  KnownBits Known = DAG.computeKnownBits(In);
   if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
     if (SDValue V =
             truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget))
@@ -17320,6 +18390,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getBitcast(MVT::v8i16, res);
   }
 
+  if (VT == MVT::v16i8 && InVT == MVT::v16i16) {
+    // Use an AND to zero uppper bits for PACKUS.
+    In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT));
+
+    SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
+                               DAG.getIntPtrConstant(0, DL));
+    SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
+                               DAG.getIntPtrConstant(8, DL));
+    return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
+  }
+
   // Handle truncation of V256 to V128 using shuffles.
   assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
 
@@ -17405,6 +18486,98 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
                                  In, DAG.getUNDEF(SVT)));
 }
 
+/// Horizontal vector math instructions may be slower than normal math with
+/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
+/// implementation, and likely shuffle complexity of the alternate sequence.
+static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
+                                  const X86Subtarget &Subtarget) {
+  bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
+  bool HasFastHOps = Subtarget.hasFastHorizontalOps();
+  return !IsSingleSource || IsOptimizingSize || HasFastHOps;
+}
+
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
+                                         const X86Subtarget &Subtarget) {
+  // If both operands have other uses, this is probably not profitable.
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  if (!LHS.hasOneUse() && !RHS.hasOneUse())
+    return Op;
+
+  // FP horizontal add/sub were added with SSE3. Integer with SSSE3.
+  bool IsFP = Op.getSimpleValueType().isFloatingPoint();
+  if (IsFP && !Subtarget.hasSSE3())
+    return Op;
+  if (!IsFP && !Subtarget.hasSSSE3())
+    return Op;
+
+  // Defer forming the minimal horizontal op if the vector source has more than
+  // the 2 extract element uses that we're matching here. In that case, we might
+  // form a horizontal op that includes more than 1 add/sub op.
+  if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      LHS.getOperand(0) != RHS.getOperand(0) ||
+      !LHS.getOperand(0)->hasNUsesOfValue(2, 0))
+    return Op;
+
+  if (!isa<ConstantSDNode>(LHS.getOperand(1)) ||
+      !isa<ConstantSDNode>(RHS.getOperand(1)) ||
+      !shouldUseHorizontalOp(true, DAG, Subtarget))
+    return Op;
+
+  // Allow commuted 'hadd' ops.
+  // TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
+  unsigned HOpcode;
+  switch (Op.getOpcode()) {
+    case ISD::ADD: HOpcode = X86ISD::HADD; break;
+    case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+    case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+    case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+    default:
+      llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
+  }
+  unsigned LExtIndex = LHS.getConstantOperandVal(1);
+  unsigned RExtIndex = RHS.getConstantOperandVal(1);
+  if (LExtIndex == 1 && RExtIndex == 0 &&
+      (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD))
+    std::swap(LExtIndex, RExtIndex);
+
+  // TODO: This can be extended to handle other adjacent extract pairs.
+  if (LExtIndex != 0 || RExtIndex != 1)
+    return Op;
+
+  SDValue X = LHS.getOperand(0);
+  EVT VecVT = X.getValueType();
+  unsigned BitWidth = VecVT.getSizeInBits();
+  assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) &&
+         "Not expecting illegal vector widths here");
+
+  // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
+  // equivalent, so extract the 256/512-bit source op to 128-bit.
+  // This is free: ymm/zmm -> xmm.
+  SDLoc DL(Op);
+  if (BitWidth == 256 || BitWidth == 512)
+    X = extract128BitVector(X, 0, DAG, DL);
+
+  // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
+  // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
+  // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
+  SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
+                     DAG.getIntPtrConstant(0, DL));
+}
+
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
+                             const X86Subtarget &Subtarget) {
+  assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
+         "Only expecting float/double");
+  return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+}
+
 /// The only differences between FABS and FNEG are the mask and the logic op.
 /// FNEG also has a folding opportunity for FNEG(FABS(x)).
 static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
@@ -17424,43 +18597,36 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
 
   bool IsF128 = (VT == MVT::f128);
+  assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
+          VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+          VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+         "Unexpected type in LowerFABSorFNEG");
 
   // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
   // decide if we should generate a 16-byte constant mask when we only need 4 or
   // 8 bytes for the scalar case.
 
-  MVT LogicVT;
-  MVT EltVT;
-
-  if (VT.isVector()) {
-    LogicVT = VT;
-    EltVT = VT.getVectorElementType();
-  } else if (IsF128) {
-    // SSE instructions are used for optimized f128 logical operations.
-    LogicVT = MVT::f128;
-    EltVT = VT;
-  } else {
-    // There are no scalar bitwise logical SSE/AVX instructions, so we
-    // generate a 16-byte vector constant and logic op even for the scalar case.
-    // Using a 16-byte mask allows folding the load of the mask with
-    // the logic op, so it can save (~4 bytes) on code size.
+  // There are no scalar bitwise logical SSE/AVX instructions, so we
+  // generate a 16-byte vector constant and logic op even for the scalar case.
+  // Using a 16-byte mask allows folding the load of the mask with
+  // the logic op, so it can save (~4 bytes) on code size.
+  bool IsFakeVector = !VT.isVector() && !IsF128;
+  MVT LogicVT = VT;
+  if (IsFakeVector)
     LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
-    EltVT = VT;
-  }
 
-  unsigned EltBits = EltVT.getSizeInBits();
+  unsigned EltBits = VT.getScalarSizeInBits();
   // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
-  APInt MaskElt =
-    IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
-  const fltSemantics &Sem =
-      EltVT == MVT::f64 ? APFloat::IEEEdouble() :
-          (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+  APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) :
+                           APInt::getSignMask(EltBits);
+  const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
   SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
 
   SDValue Op0 = Op.getOperand(0);
   bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
-  unsigned LogicOp =
-    IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
+  unsigned LogicOp = IsFABS  ? X86ISD::FAND :
+                     IsFNABS ? X86ISD::FOR  :
+                               X86ISD::FXOR;
   SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
 
   if (VT.isVector() || IsF128)
@@ -17496,10 +18662,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
           VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
          "Unexpected type in LowerFCOPYSIGN");
 
-  MVT EltVT = VT.getScalarType();
-  const fltSemantics &Sem =
-      EltVT == MVT::f64 ? APFloat::IEEEdouble()
-                        : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+  const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
 
   // Perform all scalar logic operations as 16-byte vectors because there are no
   // scalar FP logic instructions in SSE.
@@ -17516,7 +18679,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   SDValue SignMask = DAG.getConstantFP(
       APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
   SDValue MagMask = DAG.getConstantFP(
-      APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
+      APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT);
 
   // First, clear all bits but the sign bit from the second operand (sign).
   if (IsFakeVector)
@@ -17527,7 +18690,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   // TODO: If we had general constant folding for FP logic ops, this check
   // wouldn't be necessary.
   SDValue MagBits;
-  if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
+  if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) {
     APFloat APF = Op0CN->getValueAPF();
     APF.clearSign();
     MagBits = DAG.getConstantFP(APF, dl, LogicVT);
@@ -17572,7 +18735,8 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
 // Check whether an OR'd tree is PTEST-able.
 static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
                                       const X86Subtarget &Subtarget,
-                                      SelectionDAG &DAG) {
+                                      SelectionDAG &DAG,
+                                      SDValue &X86CC) {
   assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
 
   if (!Subtarget.hasSSE41())
@@ -17658,9 +18822,10 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
     VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
   }
 
-  SDValue Res = DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
-                            VecIns.back(), VecIns.back());
-  return getSETCC(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, Res, DL, DAG);
+  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
+                          DL, MVT::i8);
+  return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
+                     VecIns.back(), VecIns.back());
 }
 
 /// return true if \c Op has a use that doesn't just read flags.
@@ -17684,8 +18849,8 @@ static bool hasNonFlagsUse(SDValue Op) {
 
 /// Emit nodes that will be selected as "test Op0,Op0", or something
 /// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
-                                    SelectionDAG &DAG) const {
+static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
+                        SelectionDAG &DAG, const X86Subtarget &Subtarget) {
   // CF and OF aren't always set the way we want. Determine which
   // of these we need.
   bool NeedCF = false;
@@ -17728,159 +18893,26 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
   unsigned Opcode = 0;
   unsigned NumOperands = 0;
 
-  // Truncate operations may prevent the merge of the SETCC instruction
-  // and the arithmetic instruction before it. Attempt to truncate the operands
-  // of the arithmetic instruction and use a reduced bit-width instruction.
-  bool NeedTruncation = false;
   SDValue ArithOp = Op;
-  if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
-    SDValue Arith = Op->getOperand(0);
-    // Both the trunc and the arithmetic op need to have one user each.
-    if (Arith->hasOneUse())
-      switch (Arith.getOpcode()) {
-        default: break;
-        case ISD::ADD:
-        case ISD::SUB:
-        case ISD::AND:
-        case ISD::OR:
-        case ISD::XOR: {
-          NeedTruncation = true;
-          ArithOp = Arith;
-        }
-      }
-  }
-
-  // Sometimes flags can be set either with an AND or with an SRL/SHL
-  // instruction. SRL/SHL variant should be preferred for masks longer than this
-  // number of bits.
-  const int ShiftToAndMaxMaskWidth = 32;
-  const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
 
   // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
   // which may be the result of a CAST.  We use the variable 'Op', which is the
   // non-casted variable when we check for possible users.
   switch (ArithOp.getOpcode()) {
-  case ISD::ADD:
-    // We only want to rewrite this as a target-specific node with attached
-    // flags if there is a reasonable chance of either using that to do custom
-    // instructions selection that can fold some of the memory operands, or if
-    // only the flags are used. If there are other uses, leave the node alone
-    // and emit a test instruction.
-    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-         UE = Op.getNode()->use_end(); UI != UE; ++UI)
-      if (UI->getOpcode() != ISD::CopyToReg &&
-          UI->getOpcode() != ISD::SETCC &&
-          UI->getOpcode() != ISD::STORE)
-        goto default_case;
-
-    if (auto *C = dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
-      // An add of one will be selected as an INC.
-      if (C->isOne() &&
-          (!Subtarget.slowIncDec() ||
-           DAG.getMachineFunction().getFunction().optForSize())) {
-        Opcode = X86ISD::INC;
-        NumOperands = 1;
-        break;
-      }
-
-      // An add of negative one (subtract of one) will be selected as a DEC.
-      if (C->isAllOnesValue() &&
-          (!Subtarget.slowIncDec() ||
-           DAG.getMachineFunction().getFunction().optForSize())) {
-        Opcode = X86ISD::DEC;
-        NumOperands = 1;
-        break;
-      }
-    }
-
-    // Otherwise use a regular EFLAGS-setting add.
-    Opcode = X86ISD::ADD;
-    NumOperands = 2;
-    break;
-  case ISD::SHL:
-  case ISD::SRL:
-    // If we have a constant logical shift that's only used in a comparison
-    // against zero turn it into an equivalent AND. This allows turning it into
-    // a TEST instruction later.
-    if (ZeroCheck && Op->hasOneUse() &&
-        isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
-      EVT VT = Op.getValueType();
-      unsigned BitWidth = VT.getSizeInBits();
-      unsigned ShAmt = Op->getConstantOperandVal(1);
-      if (ShAmt >= BitWidth) // Avoid undefined shifts.
-        break;
-      APInt Mask = ArithOp.getOpcode() == ISD::SRL
-                       ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
-                       : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
-      if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
-        break;
-      Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
-                       DAG.getConstant(Mask, dl, VT));
-    }
-    break;
-
   case ISD::AND:
     // If the primary 'and' result isn't used, don't bother using X86ISD::AND,
-    // because a TEST instruction will be better. However, AND should be
-    // preferred if the instruction can be combined into ANDN.
-    if (!hasNonFlagsUse(Op)) {
-      SDValue Op0 = ArithOp->getOperand(0);
-      SDValue Op1 = ArithOp->getOperand(1);
-      EVT VT = ArithOp.getValueType();
-      bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
-      bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
-      bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
-
-      // If we cannot select an ANDN instruction, check if we can replace
-      // AND+IMM64 with a shift before giving up. This is possible for masks
-      // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
-      if (!isProperAndn) {
-        if (!ZeroCheck)
-          break;
-
-        assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
-        auto *CN = dyn_cast<ConstantSDNode>(Op1);
-        if (!CN)
-          break;
-
-        const APInt &Mask = CN->getAPIntValue();
-        if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
-          break; // Prefer TEST instruction.
-
-        unsigned BitWidth = Mask.getBitWidth();
-        unsigned LeadingOnes = Mask.countLeadingOnes();
-        unsigned TrailingZeros = Mask.countTrailingZeros();
-
-        if (LeadingOnes + TrailingZeros == BitWidth) {
-          assert(TrailingZeros < VT.getSizeInBits() &&
-                 "Shift amount should be less than the type width");
-          MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
-          SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
-          Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
-          break;
-        }
-
-        unsigned LeadingZeros = Mask.countLeadingZeros();
-        unsigned TrailingOnes = Mask.countTrailingOnes();
-
-        if (LeadingZeros + TrailingOnes == BitWidth) {
-          assert(LeadingZeros < VT.getSizeInBits() &&
-                 "Shift amount should be less than the type width");
-          MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
-          SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
-          Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
-          break;
-        }
+    // because a TEST instruction will be better.
+    if (!hasNonFlagsUse(Op))
+      break;
 
-        break;
-      }
-    }
     LLVM_FALLTHROUGH;
+  case ISD::ADD:
   case ISD::SUB:
   case ISD::OR:
   case ISD::XOR:
-    // Similar to ISD::ADD above, check if the uses will preclude useful
-    // lowering of the target-specific node.
+    // Transform to an x86-specific ALU node with flags if there is a chance of
+    // using an RMW op or only the flags are used. Otherwise, leave
+    // the node alone and emit a 'test' instruction.
     for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
            UE = Op.getNode()->use_end(); UI != UE; ++UI)
       if (UI->getOpcode() != ISD::CopyToReg &&
@@ -17891,6 +18923,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
     // Otherwise use a regular EFLAGS-setting instruction.
     switch (ArithOp.getOpcode()) {
     default: llvm_unreachable("unexpected operator!");
+    case ISD::ADD: Opcode = X86ISD::ADD; break;
     case ISD::SUB: Opcode = X86ISD::SUB; break;
     case ISD::XOR: Opcode = X86ISD::XOR; break;
     case ISD::AND: Opcode = X86ISD::AND; break;
@@ -17901,8 +18934,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
     break;
   case X86ISD::ADD:
   case X86ISD::SUB:
-  case X86ISD::INC:
-  case X86ISD::DEC:
   case X86ISD::OR:
   case X86ISD::XOR:
   case X86ISD::AND:
@@ -17912,36 +18943,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
     break;
   }
 
-  // If we found that truncation is beneficial, perform the truncation and
-  // update 'Op'.
-  if (NeedTruncation) {
-    EVT VT = Op.getValueType();
-    SDValue WideVal = Op->getOperand(0);
-    EVT WideVT = WideVal.getValueType();
-    unsigned ConvertedOp = 0;
-    // Use a target machine opcode to prevent further DAGCombine
-    // optimizations that may separate the arithmetic operations
-    // from the setcc node.
-    switch (WideVal.getOpcode()) {
-      default: break;
-      case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
-      case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
-      case ISD::AND: ConvertedOp = X86ISD::AND; break;
-      case ISD::OR:  ConvertedOp = X86ISD::OR;  break;
-      case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
-    }
-
-    if (ConvertedOp) {
-      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
-        SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
-        SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
-        SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-        Op = DAG.getNode(ConvertedOp, dl, VTs, V0, V1);
-      }
-    }
-  }
-
   if (Opcode == 0) {
     // Emit a CMP with 0, which is the TEST pattern.
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
@@ -17960,17 +18961,17 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
 SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
                                    const SDLoc &dl, SelectionDAG &DAG) const {
   if (isNullConstant(Op1))
-    return EmitTest(Op0, X86CC, dl, DAG);
-
-  assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
-         "Unexpected comparison operation for MVT::i1 operands");
+    return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
 
   if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
        Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
     // Only promote the compare up to I32 if it is a 16 bit operation
     // with an immediate.  16 bit immediates are to be avoided.
-    if ((Op0.getValueType() == MVT::i16 &&
-         (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1))) &&
+    if (Op0.getValueType() == MVT::i16 &&
+        ((isa<ConstantSDNode>(Op0) &&
+          !cast<ConstantSDNode>(Op0)->getAPIntValue().isSignedIntN(8)) ||
+         (isa<ConstantSDNode>(Op1) &&
+          !cast<ConstantSDNode>(Op1)->getAPIntValue().isSignedIntN(8))) &&
         !DAG.getMachineFunction().getFunction().optForMinSize() &&
         !Subtarget.isAtom()) {
       unsigned ExtendOp =
@@ -17983,6 +18984,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
     SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
     return SDValue(Sub.getNode(), 1);
   }
+  assert(Op0.getValueType().isFloatingPoint() && "Unexpected VT!");
   return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
 }
 
@@ -18103,39 +19105,11 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
   return 2;
 }
 
-/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
-/// according to equal/not-equal condition code \p CC.
-static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
-                                   const SDLoc &dl, SelectionDAG &DAG) {
-  // If Src is i8, promote it to i32 with any_extend.  There is no i8 BT
-  // instruction.  Since the shift amount is in-range-or-undefined, we know
-  // that doing a bittest on the i32 value is ok.  We extend to i32 because
-  // the encoding for the i16 version is larger than the i32 version.
-  // Also promote i16 to i32 for performance / code size reason.
-  if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
-    Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
-
-  // See if we can use the 32-bit instruction instead of the 64-bit one for a
-  // shorter encoding. Since the former takes the modulo 32 of BitNo and the
-  // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
-  // known to be zero.
-  if (Src.getValueType() == MVT::i64 &&
-      DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
-    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
-
-  // If the operand types disagree, extend the shift amount to match.  Since
-  // BT ignores high bits (like shifts) we can use anyextend.
-  if (Src.getValueType() != BitNo.getValueType())
-    BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
-
-  SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
-  X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-  return getSETCC(Cond, BT, dl , DAG);
-}
-
 /// Result of 'and' is compared against zero. Change to a BT node if possible.
+/// Returns the BT node and the condition code needed to use it.
 static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
-                            const SDLoc &dl, SelectionDAG &DAG) {
+                            const SDLoc &dl, SelectionDAG &DAG,
+                            SDValue &X86CC) {
   assert(And.getOpcode() == ISD::AND && "Expected AND node!");
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
@@ -18144,7 +19118,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
   if (Op1.getOpcode() == ISD::TRUNCATE)
     Op1 = Op1.getOperand(0);
 
-  SDValue LHS, RHS;
+  SDValue Src, BitNo;
   if (Op1.getOpcode() == ISD::SHL)
     std::swap(Op0, Op1);
   if (Op0.getOpcode() == ISD::SHL) {
@@ -18154,13 +19128,12 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
       unsigned BitWidth = Op0.getValueSizeInBits();
       unsigned AndBitWidth = And.getValueSizeInBits();
       if (BitWidth > AndBitWidth) {
-        KnownBits Known;
-        DAG.computeKnownBits(Op0, Known);
+        KnownBits Known = DAG.computeKnownBits(Op0);
         if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
           return SDValue();
       }
-      LHS = Op1;
-      RHS = Op0.getOperand(1);
+      Src = Op1;
+      BitNo = Op0.getOperand(1);
     }
   } else if (Op1.getOpcode() == ISD::Constant) {
     ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
@@ -18168,24 +19141,49 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
     SDValue AndLHS = Op0;
 
     if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
-      LHS = AndLHS.getOperand(0);
-      RHS = AndLHS.getOperand(1);
+      Src = AndLHS.getOperand(0);
+      BitNo = AndLHS.getOperand(1);
     } else {
       // Use BT if the immediate can't be encoded in a TEST instruction or we
       // are optimizing for size and the immedaite won't fit in a byte.
       bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
       if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
           isPowerOf2_64(AndRHSVal)) {
-        LHS = AndLHS;
-        RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType());
+        Src = AndLHS;
+        BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl,
+                                Src.getValueType());
       }
     }
   }
 
-  if (LHS.getNode())
-    return getBitTestCondition(LHS, RHS, CC, dl, DAG);
+  // No patterns found, give up.
+  if (!Src.getNode())
+    return SDValue();
 
-  return SDValue();
+  // If Src is i8, promote it to i32 with any_extend.  There is no i8 BT
+  // instruction.  Since the shift amount is in-range-or-undefined, we know
+  // that doing a bittest on the i32 value is ok.  We extend to i32 because
+  // the encoding for the i16 version is larger than the i32 version.
+  // Also promote i16 to i32 for performance / code size reason.
+  if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+    Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+
+  // See if we can use the 32-bit instruction instead of the 64-bit one for a
+  // shorter encoding. Since the former takes the modulo 32 of BitNo and the
+  // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
+  // known to be zero.
+  if (Src.getValueType() == MVT::i64 &&
+      DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
+    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+
+  // If the operand types disagree, extend the shift amount to match.  Since
+  // BT ignores high bits (like shifts) we can use anyextend.
+  if (Src.getValueType() != BitNo.getValueType())
+    BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
+
+  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
+                          dl, MVT::i8);
+  return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
 }
 
 /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
@@ -18292,34 +19290,32 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
   return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
 }
 
-/// Try to turn a VSETULT into a VSETULE by modifying its second
-/// operand \p Op1.  If non-trivial (for example because it's not constant)
-/// return an empty value.
-static SDValue ChangeVSETULTtoVSETULE(const SDLoc &dl, SDValue Op1,
-                                      SelectionDAG &DAG) {
-  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
+/// Given a simple buildvector constant, return a new vector constant with each
+/// element decremented. If decrementing would result in underflow or this
+/// is not a simple vector constant, return an empty value.
+static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
+  auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
   if (!BV)
     return SDValue();
 
-  MVT VT = Op1.getSimpleValueType();
-  MVT EVT = VT.getVectorElementType();
-  unsigned n = VT.getVectorNumElements();
-  SmallVector<SDValue, 8> ULTOp1;
-
-  for (unsigned i = 0; i < n; ++i) {
-    ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
-    if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
+  MVT VT = V.getSimpleValueType();
+  MVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 8> NewVecC;
+  SDLoc DL(V);
+  for (unsigned i = 0; i < NumElts; ++i) {
+    auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+    if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)
       return SDValue();
 
     // Avoid underflow.
-    APInt Val = Elt->getAPIntValue();
-    if (Val == 0)
+    if (Elt->getAPIntValue().isNullValue())
       return SDValue();
 
-    ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT));
+    NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT));
   }
 
-  return DAG.getBuildVector(VT, dl, ULTOp1);
+  return DAG.getBuildVector(VT, DL, NewVecC);
 }
 
 /// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
@@ -18348,7 +19344,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     // Only do this pre-AVX since vpcmp* is no longer destructive.
     if (Subtarget.hasAVX())
       return SDValue();
-    SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
+    SDValue ULEOp1 = decrementVectorConstant(Op1, DAG);
     if (!ULEOp1)
       return SDValue();
     Op1 = ULEOp1;
@@ -18362,9 +19358,9 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     break;
   }
 
-  SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1);
+  SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
   return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
-                     getZeroVector(VT, Subtarget, DAG, dl));
+                     DAG.getConstant(0, dl, VT));
 }
 
 static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
@@ -18527,13 +19523,26 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
   bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
                    !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
 
-  // Special case: Use min/max operations for unsigned compares. We only want
-  // to do this for unsigned compares if we need to flip signs or if it allows
-  // use to avoid an invert.
+  // Special case: Use min/max operations for unsigned compares.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (ISD::isUnsignedIntSetCC(Cond) &&
       (FlipSigns || ISD::isTrueWhenEqual(Cond)) &&
       TLI.isOperationLegal(ISD::UMIN, VT)) {
+    // If we have a constant operand, increment/decrement it and change the
+    // condition to avoid an invert.
+    // TODO: This could be extended to handle a non-splat constant by checking
+    // that each element of the constant is not the max/null value.
+    APInt C;
+    if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) {
+      // X > C --> X >= (C+1) --> X == umax(X, C+1)
+      Op1 = DAG.getConstant(C + 1, dl, VT);
+      Cond = ISD::SETUGE;
+    }
+    if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) {
+      // X < C --> X <= (C-1) --> X == umin(X, C-1)
+      Op1 = DAG.getConstant(C - 1, dl, VT);
+      Cond = ISD::SETULE;
+    }
     bool Invert = false;
     unsigned Opc;
     switch (Cond) {
@@ -18577,23 +19586,21 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
       assert(Subtarget.hasSSE2() && "Don't know how to lower!");
 
-      // First cast everything to the right type.
-      Op0 = DAG.getBitcast(MVT::v4i32, Op0);
-      Op1 = DAG.getBitcast(MVT::v4i32, Op1);
-
       // Since SSE has no unsigned integer comparisons, we need to flip the sign
       // bits of the inputs before performing those operations. The lower
       // compare is always unsigned.
       SDValue SB;
       if (FlipSigns) {
-        SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32);
+        SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64);
       } else {
-        SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32);
-        SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32);
-        SB = DAG.getBuildVector(MVT::v4i32, dl, {Sign, Zero, Sign, Zero});
+        SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64);
       }
-      Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
-      Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
+      Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB);
+      Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB);
+
+      // Cast everything to the right type.
+      Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+      Op1 = DAG.getBitcast(MVT::v4i32, Op1);
 
       // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
       SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
@@ -18658,10 +19665,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
   return Result;
 }
 
-// Try to select this as a KTEST+SETCC if possible.
-static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
-                         const SDLoc &dl, SelectionDAG &DAG,
-                         const X86Subtarget &Subtarget) {
+// Try to select this as a KORTEST+SETCC if possible.
+static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+                           const SDLoc &dl, SelectionDAG &DAG,
+                           const X86Subtarget &Subtarget,
+                           SDValue &X86CC) {
   // Only support equality comparisons.
   if (CC != ISD::SETEQ && CC != ISD::SETNE)
     return SDValue();
@@ -18677,12 +19685,12 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
       !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
     return SDValue();
 
-  X86::CondCode X86CC;
+  X86::CondCode X86Cond;
   if (isNullConstant(Op1)) {
-    X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+    X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
   } else if (isAllOnesConstant(Op1)) {
     // C flag is set for all ones.
-    X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
+    X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
   } else
     return SDValue();
 
@@ -18694,70 +19702,87 @@ static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
     RHS = Op0.getOperand(1);
   }
 
-  SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
-  return getSETCC(X86CC, KORTEST, dl, DAG);
+  X86CC = DAG.getConstant(X86Cond, dl, MVT::i8);
+  return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
 }
 
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
-  MVT VT = Op.getSimpleValueType();
-
-  if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
-
-  assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
-  SDValue Op0 = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
-  SDLoc dl(Op);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
+/// Emit flags for the given setcc condition and operands. Also returns the
+/// corresponding X86 condition code constant in X86CC.
+SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
+                                             ISD::CondCode CC, const SDLoc &dl,
+                                             SelectionDAG &DAG,
+                                             SDValue &X86CC) const {
   // Optimize to BT if possible.
   // Lower (X & (1 << N)) == 0 to BT(X, N).
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
   if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    if (SDValue NewSetCC = LowerAndToBT(Op0, CC, dl, DAG))
-      return NewSetCC;
+    if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
+      return BT;
   }
 
   // Try to use PTEST for a tree ORs equality compared with 0.
   // TODO: We could do AND tree with all 1s as well by using the C flag.
   if (Op0.getOpcode() == ISD::OR && isNullConstant(Op1) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    if (SDValue NewSetCC = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG))
-      return NewSetCC;
+    if (SDValue PTEST = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG, X86CC))
+      return PTEST;
   }
 
-  // Try to lower using KTEST.
-  if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
-    return NewSetCC;
+  // Try to lower using KORTEST.
+  if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
+    return KORTEST;
 
   // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
   // these.
   if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
     // If the input is a setcc, then reuse the input setcc or use a new one with
     // the inverted condition.
     if (Op0.getOpcode() == X86ISD::SETCC) {
-      X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
       bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
-      if (!Invert)
-        return Op0;
 
-      CCode = X86::GetOppositeBranchCondition(CCode);
-      return getSETCC(CCode, Op0.getOperand(1), dl, DAG);
+      X86CC = Op0.getOperand(0);
+      if (Invert) {
+        X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+        CCode = X86::GetOppositeBranchCondition(CCode);
+        X86CC = DAG.getConstant(CCode, dl, MVT::i8);
+      }
+
+      return Op0.getOperand(1);
     }
   }
 
   bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
-  X86::CondCode X86CC = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
-  if (X86CC == X86::COND_INVALID)
+  X86::CondCode CondCode = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
+  if (CondCode == X86::COND_INVALID)
     return SDValue();
 
-  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
+  SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG);
   EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
-  return getSETCC(X86CC, EFLAGS, dl, DAG);
+  X86CC = DAG.getConstant(CondCode, dl, MVT::i8);
+  return EFLAGS;
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+  MVT VT = Op.getSimpleValueType();
+
+  if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+  assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDLoc dl(Op);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  SDValue X86CC;
+  SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
+  if (!EFLAGS)
+    return SDValue();
+
+  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
 }
 
 SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
@@ -18781,6 +19806,70 @@ SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const
   return getSETCC(CC, Cmp.getValue(1), DL, DAG);
 }
 
+// This function returns three things: the arithmetic computation itself
+// (Value), an EFLAGS result (Overflow), and a condition code (Cond).  The
+// flag and the condition code define the case in which the arithmetic
+// computation overflows.
+static std::pair<SDValue, SDValue>
+getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
+  assert(Op.getResNo() == 0 && "Unexpected result number!");
+  SDValue Value, Overflow;
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  unsigned BaseOp = 0;
+  SDLoc DL(Op);
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown ovf instruction!");
+  case ISD::SADDO:
+    BaseOp = X86ISD::ADD;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UADDO:
+    BaseOp = X86ISD::ADD;
+    Cond = X86::COND_B;
+    break;
+  case ISD::SSUBO:
+    BaseOp = X86ISD::SUB;
+    Cond = X86::COND_O;
+    break;
+  case ISD::USUBO:
+    BaseOp = X86ISD::SUB;
+    Cond = X86::COND_B;
+    break;
+  case ISD::SMULO:
+    BaseOp = X86ISD::SMUL;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UMULO:
+    BaseOp = X86ISD::UMUL;
+    Cond = X86::COND_O;
+    break;
+  }
+
+  if (BaseOp) {
+    // Also sets EFLAGS.
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+    Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+    Overflow = Value.getValue(1);
+  }
+
+  return std::make_pair(Value, Overflow);
+}
+
+static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+  // looks for this combo and may remove the "setcc" instruction if the "setcc"
+  // has only one use.
+  SDLoc DL(Op);
+  X86::CondCode Cond;
+  SDValue Value, Overflow;
+  std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG);
+
+  SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
+  return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);
+}
+
 /// Return true if opcode is a X86 logical comparison.
 static bool isX86LogicalCmp(SDValue Op) {
   unsigned Opc = Op.getOpcode();
@@ -18789,12 +19878,8 @@ static bool isX86LogicalCmp(SDValue Op) {
     return true;
   if (Op.getResNo() == 1 &&
       (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
-       Opc == X86ISD::SBB || Opc == X86ISD::SMUL ||
-       Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
-       Opc == X86ISD::XOR || Opc == X86ISD::AND))
-    return true;
-
-  if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+       Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
+       Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
     return true;
 
   return false;
@@ -18845,7 +19930,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       // of 3 logic instructions for size savings and potentially speed.
       // Unfortunately, there is no scalar form of VBLENDV.
 
-      // If either operand is a constant, don't try this. We can expect to
+      // If either operand is a +0.0 constant, don't try this. We can expect to
       // optimize away at least one of the logic instructions later in that
       // case, so that sequence would be faster than a variable blend.
 
@@ -18853,13 +19938,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       // uses XMM0 as the selection register. That may need just as many
       // instructions as the AND/ANDN/OR sequence due to register moves, so
       // don't bother.
-
-      if (Subtarget.hasAVX() &&
-          !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
-
+      if (Subtarget.hasAVX() && !isNullFPConstant(Op1) &&
+          !isNullFPConstant(Op2)) {
         // Convert to vectors, do a VSELECT, and convert back to scalar.
         // All of the conversions should be optimized away.
-
         MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
         SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
         SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
@@ -18919,16 +20001,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
-  if (VT == MVT::v4i1 || VT == MVT::v2i1) {
-    SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
-    Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
-                      DAG.getUNDEF(MVT::v8i1), Op1, zeroConst);
-    Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
-                      DAG.getUNDEF(MVT::v8i1), Op2, zeroConst);
-    SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
-  }
-
   if (Cond.getOpcode() == ISD::SETCC) {
     if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
       Cond = NewCond;
@@ -18963,22 +20035,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       // (select (x == 0), 0, -1) -> neg & sbb
       if (isNullConstant(Y) &&
           (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
-        SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
         SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
-        SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
-        SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
-                                  DAG.getConstant(X86::COND_B, DL, MVT::i8),
-                                  SDValue(Neg.getNode(), 1));
-        return Res;
+        SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
+        SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+        Zero = DAG.getConstant(0, DL, Op.getValueType());
+        return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
       }
 
       Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
                         CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
       Cmp = ConvertCmpIfNecessary(Cmp, DAG);
 
+      SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+      SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
       SDValue Res =   // Res = 0 or -1.
-        DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
-                    DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
+        DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
 
       if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
         Res = DAG.getNOT(DL, Res, Res.getValueType());
@@ -19055,34 +20126,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
              CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
-             ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
-              Cond.getOperand(0).getValueType() != MVT::i8)) {
-    SDValue LHS = Cond.getOperand(0);
-    SDValue RHS = Cond.getOperand(1);
-    unsigned X86Opcode;
-    unsigned X86Cond;
-    SDVTList VTs;
-    switch (CondOpcode) {
-    case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
-    case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
-    case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
-    case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
-    case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
-    case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
-    default: llvm_unreachable("unexpected overflowing operator");
-    }
-    if (CondOpcode == ISD::UMULO)
-      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
-                          MVT::i32);
-    else
-      VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
-    SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
-
-    if (CondOpcode == ISD::UMULO)
-      Cond = X86Op.getValue(2);
-    else
-      Cond = X86Op.getValue(1);
+             CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
+    SDValue Value;
+    X86::CondCode X86Cond;
+    std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
 
     CC = DAG.getConstant(X86Cond, DL, MVT::i8);
     AddTest = false;
@@ -19096,9 +20143,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
     if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
-      if (SDValue NewSetCC = LowerAndToBT(Cond, ISD::SETNE, DL, DAG)) {
-        CC = NewSetCC.getOperand(0);
-        Cond = NewSetCC.getOperand(1);
+      SDValue BTCC;
+      if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
+        CC = BTCC;
+        Cond = BT;
         AddTest = false;
       }
     }
@@ -19106,7 +20154,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
   if (AddTest) {
     CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
-    Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
+    Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()),
+                   X86::COND_NE, DL, DAG);
   }
 
   // a <  b ? -1 :  0 -> RES = ~setcc_carry
@@ -19171,12 +20220,12 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
 
   unsigned NumElts = VT.getVectorNumElements();
 
-  // Extend VT if the scalar type is v8/v16 and BWI is not supported.
+  // Extend VT if the scalar type is i8/i16 and BWI is not supported.
   MVT ExtVT = VT;
   if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) {
     // If v16i32 is to be avoided, we'll need to split and concatenate.
     if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
-      return SplitAndExtendv16i1(ISD::SIGN_EXTEND, VT, In, dl, DAG);
+      return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG);
 
     ExtVT = MVT::getVectorVT(MVT::i32, NumElts);
   }
@@ -19195,10 +20244,10 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
   MVT WideEltVT = WideVT.getVectorElementType();
   if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) ||
       (Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) {
-    V = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, In);
+    V = DAG.getNode(Op.getOpcode(), dl, WideVT, In);
   } else {
-    SDValue NegOne = getOnesVector(WideVT, DAG, dl);
-    SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, dl);
+    SDValue NegOne = DAG.getConstant(-1, dl, WideVT);
+    SDValue Zero = DAG.getConstant(0, dl, WideVT);
     V = DAG.getSelect(dl, WideVT, In, NegOne, Zero);
   }
 
@@ -19238,7 +20287,6 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
   SDValue In = Op->getOperand(0);
   MVT VT = Op->getSimpleValueType(0);
   MVT InVT = In.getSimpleValueType();
-  assert(VT.getSizeInBits() == InVT.getSizeInBits());
 
   MVT SVT = VT.getVectorElementType();
   MVT InSVT = InVT.getVectorElementType();
@@ -19249,70 +20297,100 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
   if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
     return SDValue();
   if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
-      !(VT.is256BitVector() && Subtarget.hasInt256()) &&
+      !(VT.is256BitVector() && Subtarget.hasAVX()) &&
       !(VT.is512BitVector() && Subtarget.hasAVX512()))
     return SDValue();
 
   SDLoc dl(Op);
+  unsigned Opc = Op.getOpcode();
+  unsigned NumElts = VT.getVectorNumElements();
 
   // For 256-bit vectors, we only need the lower (128-bit) half of the input.
   // For 512-bit vectors, we need 128-bits or 256-bits.
-  if (VT.getSizeInBits() > 128) {
+  if (InVT.getSizeInBits() > 128) {
     // Input needs to be at least the same number of elements as output, and
     // at least 128-bits.
-    int InSize = InSVT.getSizeInBits() * VT.getVectorNumElements();
+    int InSize = InSVT.getSizeInBits() * NumElts;
     In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
+    InVT = In.getSimpleValueType();
   }
 
-  assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
-          InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
-
-  // SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
+  // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,
   // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
   // need to be handled here for 256/512-bit results.
   if (Subtarget.hasInt256()) {
     assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
-    unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
-                        X86ISD::VSEXT : X86ISD::VZEXT;
+
+    if (InVT.getVectorNumElements() != NumElts)
+      return DAG.getNode(Op.getOpcode(), dl, VT, In);
+
+    // FIXME: Apparently we create inreg operations that could be regular
+    // extends.
+    unsigned ExtOpc =
+        Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND
+                                             : ISD::ZERO_EXTEND;
     return DAG.getNode(ExtOpc, dl, VT, In);
   }
 
+  // pre-AVX2 256-bit extensions need to be split into 128-bit instructions.
+  if (Subtarget.hasAVX()) {
+    assert(VT.is256BitVector() && "256-bit vector expected");
+    int HalfNumElts = NumElts / 2;
+    MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts);
+
+    unsigned NumSrcElts = InVT.getVectorNumElements();
+    SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef);
+    for (int i = 0; i != HalfNumElts; ++i)
+      HiMask[i] = HalfNumElts + i;
+
+    SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In);
+    SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask);
+    Hi = DAG.getNode(Opc, dl, HalfVT, Hi);
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+  }
+
   // We should only get here for sign extend.
-  assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
-         "Unexpected opcode!");
+  assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!");
+  assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs");
 
   // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
   SDValue Curr = In;
-  MVT CurrVT = InVT;
+  SDValue SignExt = Curr;
 
   // As SRAI is only available on i16/i32 types, we expand only up to i32
   // and handle i64 separately.
-  while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
-    Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
-    MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
-    CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
-    Curr = DAG.getBitcast(CurrVT, Curr);
-  }
+  if (InVT != MVT::v4i32) {
+    MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
 
-  SDValue SignExt = Curr;
-  if (CurrVT != InVT) {
-    unsigned SignExtShift =
-        CurrVT.getScalarSizeInBits() - InSVT.getSizeInBits();
-    SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+    unsigned DestWidth = DestVT.getScalarSizeInBits();
+    unsigned Scale = DestWidth / InSVT.getSizeInBits();
+
+    unsigned InNumElts = InVT.getVectorNumElements();
+    unsigned DestElts = DestVT.getVectorNumElements();
+
+    // Build a shuffle mask that takes each input element and places it in the
+    // MSBs of the new element size.
+    SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef);
+    for (unsigned i = 0; i != DestElts; ++i)
+      Mask[i * Scale + (Scale - 1)] = i;
+
+    Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask);
+    Curr = DAG.getBitcast(DestVT, Curr);
+
+    unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
+    SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
                           DAG.getConstant(SignExtShift, dl, MVT::i8));
   }
 
-  if (CurrVT == VT)
-    return SignExt;
-
-  if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
-    SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
-                               DAG.getConstant(31, dl, MVT::i8));
-    SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
-    return DAG.getBitcast(VT, Ext);
+  if (VT == MVT::v2i64) {
+    assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT");
+    SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+    SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT);
+    SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5});
+    SignExt = DAG.getBitcast(VT, SignExt);
   }
 
-  return SDValue();
+  return SignExt;
 }
 
 static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
@@ -19337,38 +20415,40 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
           InVT.getVectorElementType() == MVT::i32) &&
          "Unexpected element type");
 
+  // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
+  if (InVT == MVT::v8i8) {
+    if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
+      return SDValue();
+
+    In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
+                     MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
+    return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In);
+  }
+
   if (Subtarget.hasInt256())
-    return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+    return Op;
 
   // Optimize vectors in AVX mode
   // Sign extend  v8i16 to v8i32 and
   //              v4i32 to v4i64
   //
   // Divide input vector into two parts
-  // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+  // for v4i32 the high shuffle mask will be {2, 3, -1, -1}
   // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
   // concat the vectors to original VT
 
-  unsigned NumElems = InVT.getVectorNumElements();
-  SDValue Undef = DAG.getUNDEF(InVT);
-
-  SmallVector<int,8> ShufMask1(NumElems, -1);
-  for (unsigned i = 0; i != NumElems/2; ++i)
-    ShufMask1[i] = i;
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                VT.getVectorNumElements() / 2);
 
-  SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);
+  SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In);
 
-  SmallVector<int,8> ShufMask2(NumElems, -1);
+  unsigned NumElems = InVT.getVectorNumElements();
+  SmallVector<int,8> ShufMask(NumElems, -1);
   for (unsigned i = 0; i != NumElems/2; ++i)
-    ShufMask2[i] = i + NumElems/2;
+    ShufMask[i] = i + NumElems/2;
 
-  SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
-
-  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
-                                VT.getVectorNumElements() / 2);
-
-  OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
-  OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
+  SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
+  OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi);
 
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
 }
@@ -19379,19 +20459,47 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   SDLoc dl(St);
   SDValue StoredVal = St->getValue();
 
-  // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
-  assert(StoredVal.getValueType().isVector() &&
-         StoredVal.getValueType().getVectorElementType() == MVT::i1 &&
-         StoredVal.getValueType().getVectorNumElements() <= 8 &&
-         "Unexpected VT");
-  assert(!St->isTruncatingStore() && "Expected non-truncating store");
-  assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
-         "Expected AVX512F without AVX512DQI");
+  // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
+  if (StoredVal.getValueType().isVector() &&
+      StoredVal.getValueType().getVectorElementType() == MVT::i1) {
+    assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
+           "Unexpected VT");
+    assert(!St->isTruncatingStore() && "Expected non-truncating store");
+    assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
+           "Expected AVX512F without AVX512DQI");
+
+    StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+                            DAG.getUNDEF(MVT::v16i1), StoredVal,
+                            DAG.getIntPtrConstant(0, dl));
+    StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+    StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+
+    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                        St->getPointerInfo(), St->getAlignment(),
+                        St->getMemOperand()->getFlags());
+  }
+
+  if (St->isTruncatingStore())
+    return SDValue();
 
-  StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
-                          DAG.getUNDEF(MVT::v8i1), StoredVal,
+  MVT StoreVT = StoredVal.getSimpleValueType();
+  assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
+         "Unexpected VT");
+  if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
+        TargetLowering::TypeWidenVector)
+    return SDValue();
+
+  // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+  // and store it.
+  MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
+                                StoreVT.getVectorNumElements() * 2);
+  StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
+                          DAG.getUNDEF(StoreVT));
+  MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+  MVT CastVT = MVT::getVectorVT(StVT, 2);
+  StoredVal = DAG.getBitcast(CastVT, StoredVal);
+  StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
                           DAG.getIntPtrConstant(0, dl));
-  StoredVal = DAG.getBitcast(MVT::i8, StoredVal);
 
   return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
                       St->getPointerInfo(), St->getAlignment(),
@@ -19400,7 +20508,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
 
 // Lower vector extended loads using a shuffle. If SSSE3 is not available we
 // may emit an illegal shuffle but the expansion is still better than scalar
-// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
+// code. We generate sext/sext_invec for SEXTLOADs if it's available, otherwise
 // we'll emit a shuffle and a arithmetic shift.
 // FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
 // TODO: It is possible to support ZExt by zeroing the undef values during
@@ -19408,16 +20516,16 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
 static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
                                  SelectionDAG &DAG) {
   MVT RegVT = Op.getSimpleValueType();
-  assert(RegVT.isVector() && "We only custom lower vector sext loads.");
+  assert(RegVT.isVector() && "We only custom lower vector loads.");
   assert(RegVT.isInteger() &&
-         "We only custom lower integer vector sext loads.");
+         "We only custom lower integer vector loads.");
 
   LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
   SDLoc dl(Ld);
   EVT MemVT = Ld->getMemoryVT();
 
   // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
-  if (RegVT.isVector() && RegVT.getVectorElementType() == MVT::i1) {
+  if (RegVT.getVectorElementType() == MVT::i1) {
     assert(EVT(RegVT) == MemVT && "Expected non-extending load");
     assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");
     assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
@@ -19429,12 +20537,12 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
 
     // Replace chain users with the new chain.
     assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!");
-    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1));
 
-    SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
-                                  DAG.getBitcast(MVT::v8i1, NewLd),
-                                  DAG.getIntPtrConstant(0, dl));
-    return DAG.getMergeValues({Extract, NewLd.getValue(1)}, dl);
+    SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd);
+    Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT,
+                      DAG.getBitcast(MVT::v16i1, Val),
+                      DAG.getIntPtrConstant(0, dl));
+    return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl);
   }
 
   // Nothing useful we can do without SSE2 shuffles.
@@ -19490,10 +20598,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
 
     // Replace chain users with the new chain.
     assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
-    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
 
     // Finally, do a normal sign-extend to the desired register.
-    return DAG.getSExtOrTrunc(Load, dl, RegVT);
+    SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT);
+    return DAG.getMergeValues({SExt, Load.getValue(1)}, dl);
   }
 
   // All sizes must be a power of two.
@@ -19521,26 +20629,26 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
   assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
          "Can only lower sext loads with a single scalar load!");
 
-  unsigned loadRegZize = RegSz;
+  unsigned loadRegSize = RegSz;
   if (Ext == ISD::SEXTLOAD && RegSz >= 256)
-    loadRegZize = 128;
+    loadRegSize = 128;
 
   // If we don't have BWI we won't be able to create the shuffle needed for
   // v8i8->v8i64.
   if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
       MemVT == MVT::v8i8)
-    loadRegZize = 128;
+    loadRegSize = 128;
 
   // Represent our vector as a sequence of elements which are the
   // largest scalar that we can load.
   EVT LoadUnitVecVT = EVT::getVectorVT(
-      *DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
+      *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits());
 
   // Represent the data using the same element type that is stored in
   // memory. In practice, we ''widen'' MemVT.
   EVT WideVecVT =
       EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
-                       loadRegZize / MemVT.getScalarSizeInBits());
+                       loadRegSize / MemVT.getScalarSizeInBits());
 
   assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
          "Invalid vector type");
@@ -19551,15 +20659,20 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
 
   SmallVector<SDValue, 8> Chains;
   SDValue Ptr = Ld->getBasePtr();
-  SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
+  unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8;
+  SDValue Increment = DAG.getConstant(OffsetInc, dl,
                                       TLI.getPointerTy(DAG.getDataLayout()));
   SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
 
+  unsigned Offset = 0;
   for (unsigned i = 0; i < NumLoads; ++i) {
+    unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset);
+
     // Perform a single load.
     SDValue ScalarLoad =
-        DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
-                    Ld->getAlignment(), Ld->getMemOperand()->getFlags());
+      DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr,
+                  Ld->getPointerInfo().getWithOffset(Offset),
+                  NewAlign, Ld->getMemOperand()->getFlags());
     Chains.push_back(ScalarLoad.getValue(1));
     // Create the first element type using SCALAR_TO_VECTOR in order to avoid
     // another round of DAGCombining.
@@ -19570,6 +20683,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
                         ScalarLoad, DAG.getIntPtrConstant(i, dl));
 
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+    Offset += OffsetInc;
   }
 
   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
@@ -19580,28 +20694,14 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
   unsigned SizeRatio = RegSz / MemSz;
 
   if (Ext == ISD::SEXTLOAD) {
-    // If we have SSE4.1, we can directly emit a VSEXT node.
-    if (Subtarget.hasSSE41()) {
-      SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
-      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
-      return Sext;
-    }
-
-    // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
-    // lanes.
-    assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
-           "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
-
-    SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
-    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
-    return Shuff;
+    SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG);
+    return DAG.getMergeValues({Sext, TF}, dl);
   }
 
   if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
       MemVT == MVT::v8i8) {
-    SDValue Sext = getExtendInVec(X86ISD::VZEXT, dl, RegVT, SlicedVec, DAG);
-    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
-    return Sext;
+    SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG);
+    return DAG.getMergeValues({Sext, TF}, dl);
   }
 
   // Redistribute the loaded elements into the different locations.
@@ -19614,8 +20714,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
 
   // Bitcast to the requested type.
   Shuff = DAG.getBitcast(RegVT, Shuff);
-  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
-  return Shuff;
+  return DAG.getMergeValues({Shuff, TF}, dl);
 }
 
 /// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
@@ -19712,49 +20811,13 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   CondOpcode = Cond.getOpcode();
   if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
       CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
-      ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
-       Cond.getOperand(0).getValueType() != MVT::i8)) {
-    SDValue LHS = Cond.getOperand(0);
-    SDValue RHS = Cond.getOperand(1);
-    unsigned X86Opcode;
-    unsigned X86Cond;
-    SDVTList VTs;
-    // Keep this in sync with LowerXALUO, otherwise we might create redundant
-    // instructions that can't be removed afterwards (i.e. X86ISD::ADD and
-    // X86ISD::INC).
-    switch (CondOpcode) {
-    case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
-    case ISD::SADDO:
-      if (isOneConstant(RHS)) {
-          X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
-          break;
-        }
-      X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
-    case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
-    case ISD::SSUBO:
-      if (isOneConstant(RHS)) {
-          X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
-          break;
-        }
-      X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
-    case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
-    case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
-    default: llvm_unreachable("unexpected overflowing operator");
-    }
-    if (Inverted)
-      X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
-    if (CondOpcode == ISD::UMULO)
-      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
-                          MVT::i32);
-    else
-      VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
-
-    SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
+      CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
+    SDValue Value;
+    X86::CondCode X86Cond;
+    std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
 
-    if (CondOpcode == ISD::UMULO)
-      Cond = X86Op.getValue(2);
-    else
-      Cond = X86Op.getValue(1);
+    if (Inverted)
+      X86Cond = X86::GetOppositeBranchCondition(X86Cond);
 
     CC = DAG.getConstant(X86Cond, dl, MVT::i8);
     addTest = false;
@@ -19855,34 +20918,17 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     } else if (Cond.getOpcode() == ISD::SETCC &&
                cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
       // For FCMP_UNE, we can emit
-      // two branches instead of an explicit AND instruction with a
-      // separate test. However, we only do this if this block doesn't
-      // have a fall-through edge, because this requires an explicit
-      // jmp when the condition is false.
-      if (Op.getNode()->hasOneUse()) {
-        SDNode *User = *Op.getNode()->use_begin();
-        // Look for an unconditional branch following this conditional branch.
-        // We need this because we need to reverse the successors in order
-        // to implement FCMP_UNE.
-        if (User->getOpcode() == ISD::BR) {
-          SDValue FalseBB = User->getOperand(1);
-          SDNode *NewBR =
-            DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
-          assert(NewBR == User);
-          (void)NewBR;
-
-          SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
-                                    Cond.getOperand(0), Cond.getOperand(1));
-          Cmp = ConvertCmpIfNecessary(Cmp, DAG);
-          CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
-          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
-                              Chain, Dest, CC, Cmp);
-          CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8);
-          Cond = Cmp;
-          addTest = false;
-          Dest = FalseBB;
-        }
-      }
+      // two branches instead of an explicit OR instruction with a
+      // separate test.
+      SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+                                Cond.getOperand(0), Cond.getOperand(1));
+      Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+      CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
+      Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                          Chain, Dest, CC, Cmp);
+      CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
+      Cond = Cmp;
+      addTest = false;
     }
   }
 
@@ -19894,9 +20940,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
     if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
-      if (SDValue NewSetCC = LowerAndToBT(Cond, ISD::SETNE, dl, DAG)) {
-        CC = NewSetCC.getOperand(0);
-        Cond = NewSetCC.getOperand(1);
+      SDValue BTCC;
+      if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, dl, DAG, BTCC)) {
+        CC = BTCC;
+        Cond = BT;
         addTest = false;
       }
     }
@@ -19905,7 +20952,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   if (addTest) {
     X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
     CC = DAG.getConstant(X86Cond, dl, MVT::i8);
-    Cond = EmitTest(Cond, X86Cond, dl, DAG);
+    Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()),
+                   X86Cond, dl, DAG);
   }
   Cond = ConvertCmpIfNecessary(Cond, DAG);
   return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -20141,6 +21189,25 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
+// Helper to get immediate/variable SSE shift opcode from other shift opcodes.
+static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) {
+  switch (Opc) {
+  case ISD::SHL:
+  case X86ISD::VSHL:
+  case X86ISD::VSHLI:
+    return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI;
+  case ISD::SRL:
+  case X86ISD::VSRL:
+  case X86ISD::VSRLI:
+    return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI;
+  case ISD::SRA:
+  case X86ISD::VSRA:
+  case X86ISD::VSRAI:
+    return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI;
+  }
+  llvm_unreachable("Unknown target vector shift node");
+}
+
 /// Handle vector element shifts where the shift amount is a constant.
 /// Takes immediate version of shift as input.
 static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
@@ -20236,46 +21303,57 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
     return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
                                       CShAmt->getZExtValue(), DAG);
 
-  // Change opcode to non-immediate version
-  switch (Opc) {
-    default: llvm_unreachable("Unknown target vector shift node");
-    case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
-    case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
-    case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
-  }
+  // Change opcode to non-immediate version.
+  Opc = getTargetVShiftUniformOpcode(Opc, true);
 
   // Need to build a vector containing shift amount.
   // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
-  // +=================+============+=======================================+
-  // | ShAmt is        | HasSSE4.1? | Construct ShAmt vector as             |
-  // +=================+============+=======================================+
-  // | i64             | Yes, No    | Use ShAmt as lowest elt               |
-  // | i32             | Yes        | zero-extend in-reg                    |
-  // | (i32 zext(i16)) | Yes        | zero-extend in-reg                    |
-  // | i16/i32         | No         | v4i32 build_vector(ShAmt, 0, ud, ud)) |
-  // +=================+============+=======================================+
+  // +====================+============+=======================================+
+  // | ShAmt is           | HasSSE4.1? | Construct ShAmt vector as             |
+  // +====================+============+=======================================+
+  // | i64                | Yes, No    | Use ShAmt as lowest elt               |
+  // | i32                | Yes        | zero-extend in-reg                    |
+  // | (i32 zext(i16/i8)) | Yes        | zero-extend in-reg                    |
+  // | (i32 zext(i16/i8)) | No         | byte-shift-in-reg                     |
+  // | i16/i32            | No         | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+  // +====================+============+=======================================+
 
   if (SVT == MVT::i64)
     ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
-  else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
-           ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
+  else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+           ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+           (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
+            ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
     ShAmt = ShAmt.getOperand(0);
-    ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
-    ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+    MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
+    ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
+    if (Subtarget.hasSSE41())
+      ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
+                          MVT::v2i64, ShAmt);
+    else {
+      SDValue ByteShift = DAG.getConstant(
+          (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
+      ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
+      ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+                          ByteShift);
+      ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
+                          ByteShift);
+    }
   } else if (Subtarget.hasSSE41() &&
              ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
-    ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
+    ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
+                        MVT::v2i64, ShAmt);
   } else {
-    SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT),
-                        DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+    SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
+                        DAG.getUNDEF(SVT)};
     ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
   }
 
   // The return type has to be a 128-bit type with the same element
   // type as the input type.
   MVT EltVT = VT.getVectorElementType();
-  MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+  MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits());
 
   ShAmt = DAG.getBitcast(ShVT, ShAmt);
   return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
@@ -20292,11 +21370,7 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
   if (X86::isZeroNode(Mask))
     return DAG.getConstant(0, dl, MaskVT);
 
-  if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
-    // Mask should be extended
-    Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
-  }
+  assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!");
 
   if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
     assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!");
@@ -20340,24 +21414,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
 
   SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
-  switch (Op.getOpcode()) {
-  default: break;
-  case X86ISD::CMPM:
-  case X86ISD::CMPM_RND:
-  case X86ISD::VPSHUFBITQMB:
-  case X86ISD::VFPCLASS:
-    return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
-  case ISD::TRUNCATE:
-  case X86ISD::VTRUNC:
-  case X86ISD::VTRUNCS:
-  case X86ISD::VTRUNCUS:
-  case X86ISD::CVTPS2PH:
-    // We can't use ISD::VSELECT here because it is not always "Legal"
-    // for the destination type. For example vpmovqb require only AVX512
-    // and vselect that can operate on byte element type require BWI
-    OpcodeSelect = X86ISD::SELECT;
-    break;
-  }
   if (PreservedSrc.isUndef())
     PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
   return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
@@ -20383,7 +21439,9 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
   SDLoc dl(Op);
 
   assert(Mask.getValueType() == MVT::i8 && "Unexpect type");
-  SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
+  SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1,
+                              DAG.getBitcast(MVT::v8i1, Mask),
+                              DAG.getIntPtrConstant(0, dl));
   if (Op.getOpcode() == X86ISD::FSETCCM ||
       Op.getOpcode() == X86ISD::FSETCCM_RND ||
       Op.getOpcode() == X86ISD::VFPCLASSS)
@@ -20486,13 +21544,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       }
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
     }
-    case INTR_TYPE_2OP:
-    case INTR_TYPE_2OP_IMM8: {
+    case INTR_TYPE_2OP: {
       SDValue Src2 = Op.getOperand(2);
 
-      if (IntrData->Type == INTR_TYPE_2OP_IMM8)
-        Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
-
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
       // (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -20724,38 +21778,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       // Swap Src1 and Src2 in the node creation
       return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
     }
-    case FMA_OP_MASKZ:
-    case FMA_OP_MASK: {
-      SDValue Src1 = Op.getOperand(1);
-      SDValue Src2 = Op.getOperand(2);
-      SDValue Src3 = Op.getOperand(3);
-      SDValue Mask = Op.getOperand(4);
-      MVT VT = Op.getSimpleValueType();
-      SDValue PassThru = SDValue();
-
-      // set PassThru element
-      if (IntrData->Type == FMA_OP_MASKZ)
-        PassThru = getZeroVector(VT, Subtarget, DAG, dl);
-      else
-        PassThru = Src1;
-
-      // We specify 2 possible opcodes for intrinsics with rounding modes.
-      // First, we check if the intrinsic may have non-default rounding mode,
-      // (IntrData->Opc1 != 0), then we check the rounding mode operand.
-      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0) {
-        SDValue Rnd = Op.getOperand(5);
-        if (!isRoundModeCurDirection(Rnd))
-          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                                  dl, Op.getValueType(),
-                                                  Src1, Src2, Src3, Rnd),
-                                      Mask, PassThru, Subtarget, DAG);
-      }
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
-                                              dl, Op.getValueType(),
-                                              Src1, Src2, Src3),
-                                  Mask, PassThru, Subtarget, DAG);
-    }
     case IFMA_OP:
       // NOTE: We need to swizzle the operands to pass the multiply operands
       // first.
@@ -20766,7 +21788,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       // does not change the value. Set it to 0 since it can change.
       return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
                          DAG.getIntPtrConstant(0, dl));
-    case CVTPD2PS_MASK: {
+    case CVTPD2PS_RND_MASK: {
       SDValue Src = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
@@ -20790,13 +21812,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                               DAG.getIntPtrConstant(0, dl)),
                                   Mask, PassThru, Subtarget, DAG);
     }
-    case FPCLASS: {
-      // FPclass intrinsics
-      SDValue Src1 = Op.getOperand(1);
-      MVT MaskVT = Op.getSimpleValueType();
-      SDValue Imm = Op.getOperand(2);
-      return DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
-    }
     case FPCLASSS: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Imm = Op.getOperand(2);
@@ -20811,32 +21826,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                 FPclassMask, DAG.getIntPtrConstant(0, dl));
       return DAG.getBitcast(MVT::i8, Ins);
     }
-    case CMP_MASK: {
-      // Comparison intrinsics with masks.
-      // Example of transformation:
-      // (i8 (int_x86_avx512_mask_pcmpeq_q_128
-      //             (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
-      // (i8 (bitcast
-      //   (v8i1 (insert_subvector zero,
-      //           (v2i1 (and (PCMPEQM %a, %b),
-      //                      (extract_subvector
-      //                         (v8i1 (bitcast %mask)), 0))), 0))))
-      MVT VT = Op.getOperand(1).getSimpleValueType();
-      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
-      SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
-      MVT BitcastVT = MVT::getVectorVT(MVT::i1,
-                                       Mask.getSimpleValueType().getSizeInBits());
-      SDValue Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
-                                Op.getOperand(2));
-      SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, SDValue(),
-                                             Subtarget, DAG);
-      // Need to fill with zeros to ensure the bitcast will produce zeroes
-      // for the upper bits in the v2i1/v4i1 case.
-      SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
-                                DAG.getConstant(0, dl, BitcastVT),
-                                CmpMask, DAG.getIntPtrConstant(0, dl));
-      return DAG.getBitcast(Op.getValueType(), Res);
-    }
 
     case CMP_MASK_CC: {
       MVT MaskVT = Op.getSimpleValueType();
@@ -21007,6 +21996,59 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
                          Op.getOperand(1), Op.getOperand(2), RoundingMode);
     }
+    // ADC/ADCX/SBB
+    case ADX: {
+      SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+      SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32);
+
+      SDValue Res;
+      // If the carry in is zero, then we should just use ADD/SUB instead of
+      // ADC/SBB.
+      if (isNullConstant(Op.getOperand(1))) {
+        Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2),
+                          Op.getOperand(3));
+      } else {
+        SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
+                                    DAG.getConstant(-1, dl, MVT::i8));
+        Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),
+                          Op.getOperand(3), GenCF.getValue(1));
+      }
+      SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
+      SDValue Results[] = { SetCC, Res };
+      return DAG.getMergeValues(Results, dl);
+    }
+    case CVTPD2PS_MASK:
+    case CVTPD2I_MASK:
+    case TRUNCATE_TO_REG: {
+      SDValue Src = Op.getOperand(1);
+      SDValue PassThru = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+
+      if (isAllOnesConstant(Mask))
+        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+      MVT SrcVT = Src.getSimpleValueType();
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+      Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+      return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+                         Mask);
+    }
+    case CVTPS2PH_MASK: {
+      SDValue Src = Op.getOperand(1);
+      SDValue Rnd = Op.getOperand(2);
+      SDValue PassThru = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+
+      if (isAllOnesConstant(Mask))
+        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd);
+
+      MVT SrcVT = Src.getSimpleValueType();
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+      Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+      return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd,
+                         PassThru, Mask);
+
+    }
     default:
       break;
     }
@@ -21018,6 +22060,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   // ptest and testp intrinsics. The intrinsic these come from are designed to
   // return an integer value, not just an instruction so lower it to the ptest
   // or testp pattern and a setcc for the result.
+  case Intrinsic::x86_avx512_ktestc_b:
+  case Intrinsic::x86_avx512_ktestc_w:
+  case Intrinsic::x86_avx512_ktestc_d:
+  case Intrinsic::x86_avx512_ktestc_q:
+  case Intrinsic::x86_avx512_ktestz_b:
+  case Intrinsic::x86_avx512_ktestz_w:
+  case Intrinsic::x86_avx512_ktestz_d:
+  case Intrinsic::x86_avx512_ktestz_q:
   case Intrinsic::x86_sse41_ptestz:
   case Intrinsic::x86_sse41_ptestc:
   case Intrinsic::x86_sse41_ptestnzc:
@@ -21036,15 +22086,30 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::x86_avx_vtestz_pd_256:
   case Intrinsic::x86_avx_vtestc_pd_256:
   case Intrinsic::x86_avx_vtestnzc_pd_256: {
-    bool IsTestPacked = false;
+    unsigned TestOpc = X86ISD::PTEST;
     X86::CondCode X86CC;
     switch (IntNo) {
     default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_avx512_ktestc_b:
+    case Intrinsic::x86_avx512_ktestc_w:
+    case Intrinsic::x86_avx512_ktestc_d:
+    case Intrinsic::x86_avx512_ktestc_q:
+      // CF = 1
+      TestOpc = X86ISD::KTEST;
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_avx512_ktestz_b:
+    case Intrinsic::x86_avx512_ktestz_w:
+    case Intrinsic::x86_avx512_ktestz_d:
+    case Intrinsic::x86_avx512_ktestz_q:
+      TestOpc = X86ISD::KTEST;
+      X86CC = X86::COND_E;
+      break;
     case Intrinsic::x86_avx_vtestz_ps:
     case Intrinsic::x86_avx_vtestz_pd:
     case Intrinsic::x86_avx_vtestz_ps_256:
     case Intrinsic::x86_avx_vtestz_pd_256:
-      IsTestPacked = true;
+      TestOpc = X86ISD::TESTP;
       LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestz:
     case Intrinsic::x86_avx_ptestz_256:
@@ -21055,7 +22120,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     case Intrinsic::x86_avx_vtestc_pd:
     case Intrinsic::x86_avx_vtestc_ps_256:
     case Intrinsic::x86_avx_vtestc_pd_256:
-      IsTestPacked = true;
+      TestOpc = X86ISD::TESTP;
       LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestc:
     case Intrinsic::x86_avx_ptestc_256:
@@ -21066,7 +22131,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     case Intrinsic::x86_avx_vtestnzc_pd:
     case Intrinsic::x86_avx_vtestnzc_ps_256:
     case Intrinsic::x86_avx_vtestnzc_pd_256:
-      IsTestPacked = true;
+      TestOpc = X86ISD::TESTP;
       LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestnzc:
     case Intrinsic::x86_avx_ptestnzc_256:
@@ -21077,7 +22142,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
     SDValue LHS = Op.getOperand(1);
     SDValue RHS = Op.getOperand(2);
-    unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
     SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
     SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -21196,14 +22260,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
   }
 
-  case Intrinsic::x86_seh_recoverfp: {
+  case Intrinsic::eh_recoverfp: {
     SDValue FnOp = Op.getOperand(1);
     SDValue IncomingFPOp = Op.getOperand(2);
     GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
     auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
     if (!Fn)
       report_fatal_error(
-          "llvm.x86.seh.recoverfp must take a function as the first argument");
+          "llvm.eh.recoverfp must take a function as the first argument");
     return recoverFramePointer(DAG, Fn, IncomingFPOp);
   }
 
@@ -21251,25 +22315,31 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                               SDValue Src, SDValue Mask, SDValue Base,
                               SDValue Index, SDValue ScaleOp, SDValue Chain,
                               const X86Subtarget &Subtarget) {
+  MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
   auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
   // Scale must be constant.
   if (!C)
     return SDValue();
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
-  MVT MaskVT = MVT::getVectorVT(MVT::i1,
-                             Index.getSimpleValueType().getVectorNumElements());
+  unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
+                              VT.getVectorNumElements());
+  MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
+
+  // We support two versions of the gather intrinsics. One with scalar mask and
+  // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+  if (Mask.getValueType() != MaskVT)
+    Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
-  SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
   // If source is undef or we know it won't be used, use a zero vector
   // to break register dependency.
   // TODO: use undef instead and let BreakFalseDeps deal with it?
-  if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
+  if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
     Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
-  SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
+  SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
   SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
   SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
   return DAG.getMergeValues(RetOps, dl);
@@ -21287,12 +22357,17 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
-  MVT MaskVT = MVT::getVectorVT(MVT::i1,
-                             Index.getSimpleValueType().getVectorNumElements());
+  unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
+                              Src.getSimpleValueType().getVectorNumElements());
+  MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
+
+  // We support two versions of the scatter intrinsics. One with scalar mask and
+  // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+  if (Mask.getValueType() != MaskVT)
+    Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
-  SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
   SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
-  SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain};
+  SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain};
   SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
   return SDValue(Res, 1);
 }
@@ -21433,39 +22508,39 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
   }
   SDValue Chain = HI.getValue(1);
 
+  SDValue TSC;
+  if (Subtarget.is64Bit()) {
+    // The EDX register is loaded with the high-order 32 bits of the MSR, and
+    // the EAX register is loaded with the low-order 32 bits.
+    TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+                      DAG.getConstant(32, DL, MVT::i8));
+    TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
+  } else {
+    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+    TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
+  }
+
   if (Opcode == X86ISD::RDTSCP_DAG) {
-    assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+    assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
 
     // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
     // the ECX register. Add 'ecx' explicitly to the chain.
     SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
                                      HI.getValue(2));
-    // Explicitly store the content of ECX at the location passed in input
-    // to the 'rdtscp' intrinsic.
-    Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
-                         MachinePointerInfo());
-  }
 
-  if (Subtarget.is64Bit()) {
-    // The EDX register is loaded with the high-order 32 bits of the MSR, and
-    // the EAX register is loaded with the low-order 32 bits.
-    SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
-                              DAG.getConstant(32, DL, MVT::i8));
-    Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
-    Results.push_back(Chain);
+    Results.push_back(TSC);
+    Results.push_back(ecx);
+    Results.push_back(ecx.getValue(1));
     return;
   }
 
-  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
-  SDValue Ops[] = { LO, HI };
-  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
-  Results.push_back(Pair);
+  Results.push_back(TSC);
   Results.push_back(Chain);
 }
 
 static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
                                      SelectionDAG &DAG) {
-  SmallVector<SDValue, 2> Results;
+  SmallVector<SDValue, 3> Results;
   SDLoc DL(Op);
   getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
                           Results);
@@ -21529,7 +22604,7 @@ EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
                       MachineMemOperand *MMO, SelectionDAG &DAG) {
 
   SDVTList VTs = DAG.getVTList(MVT::Other);
-  SDValue Ops[] = { Chain, Ptr, Mask, Val };
+  SDValue Ops[] = { Chain, Val, Ptr, Mask };
   return SignedSat ?
     DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
     DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
@@ -21689,20 +22764,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
                        Ret, SDValue(InTrans.getNode(), 1));
   }
-  // ADC/ADCX/SBB
-  case ADX: {
-    SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
-    SDVTList VTs = DAG.getVTList(Op.getOperand(3).getValueType(), MVT::i32);
-    SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
-                                DAG.getConstant(-1, dl, MVT::i8));
-    SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
-                              Op.getOperand(4), GenCF.getValue(1));
-    SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
-                                 Op.getOperand(5), MachinePointerInfo());
-    SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
-    SDValue Results[] = { SetCC, Store };
-    return DAG.getMergeValues(Results, dl);
-  }
   case TRUNCATE_TO_MEM_VI8:
   case TRUNCATE_TO_MEM_VI16:
   case TRUNCATE_TO_MEM_VI32: {
@@ -22255,11 +23316,10 @@ static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
   // we just take the hi result (by masking the lo result to zero before the
   // add).
   SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
-  SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL);
+  SDValue Zero = DAG.getConstant(0, DL, CurrVT);
 
-  SDValue NibbleMask = DAG.getConstant(0xF, DL, CurrVT);
   SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
-  SDValue Lo = DAG.getNode(ISD::AND, DL, CurrVT, Op0, NibbleMask);
+  SDValue Lo = Op0;
   SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
   SDValue HiZ;
   if (CurrVT.is512BitVector()) {
@@ -22377,38 +23437,23 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
   return Op;
 }
 
-static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
+                         SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   unsigned NumBits = VT.getScalarSizeInBits();
+  SDValue N0 = Op.getOperand(0);
   SDLoc dl(Op);
 
-  if (VT.isVector()) {
-    SDValue N0 = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0, dl, VT);
-
-    // lsb(x) = (x & -x)
-    SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
-                              DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
-
-    // cttz_undef(x) = (width - 1) - ctlz(lsb)
-    if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
-      SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
-      return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
-                         DAG.getNode(ISD::CTLZ, dl, VT, LSB));
-    }
-
-    // cttz(x) = ctpop(lsb - 1)
-    SDValue One = DAG.getConstant(1, dl, VT);
-    return DAG.getNode(ISD::CTPOP, dl, VT,
-                       DAG.getNode(ISD::SUB, dl, VT, LSB, One));
-  }
+  // Decompose 256-bit ops into smaller 128-bit ops.
+  if (VT.is256BitVector() && !Subtarget.hasInt256())
+    return Lower256IntUnary(Op, DAG);
 
-  assert(Op.getOpcode() == ISD::CTTZ &&
+  assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
          "Only scalar CTTZ requires custom lowering");
 
   // Issue a bsf (scan bits forward) which also sets EFLAGS.
   SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-  Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
+  Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0);
 
   // If src is zero (i.e. bsf sets ZF), returns NumBits.
   SDValue Ops[] = {
@@ -22422,7 +23467,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
 
 /// Break a 256-bit integer operation into two new 128-bit ones and then
 /// concatenate the result back.
-static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
+static SDValue split256IntArith(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
 
   assert(VT.is256BitVector() && VT.isInteger() &&
@@ -22451,7 +23496,7 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
 
 /// Break a 512-bit integer operation into two new 256-bit ones and then
 /// concatenate the result back.
-static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
+static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
 
   assert(VT.is512BitVector() && VT.isInteger() &&
@@ -22478,18 +23523,46 @@ static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
 }
 
-static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
+static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
+                           const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
+  if (VT == MVT::i16 || VT == MVT::i32)
+    return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+
   if (VT.getScalarType() == MVT::i1)
     return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
                        Op.getOperand(0), Op.getOperand(1));
+
   assert(Op.getSimpleValueType().is256BitVector() &&
          Op.getSimpleValueType().isInteger() &&
          "Only handle AVX 256-bit vector integer operation");
-  return Lower256IntArith(Op, DAG);
+  return split256IntArith(Op, DAG);
 }
 
-static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getSimpleValueType();
+  if (VT.getScalarType() == MVT::i1) {
+    SDLoc dl(Op);
+    switch (Op.getOpcode()) {
+    default: llvm_unreachable("Expected saturated arithmetic opcode");
+    case ISD::UADDSAT:
+    case ISD::SADDSAT:
+      return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
+    case ISD::USUBSAT:
+    case ISD::SSUBSAT:
+      return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+                         DAG.getNOT(dl, Op.getOperand(1), VT));
+    }
+  }
+
+  assert(Op.getSimpleValueType().is256BitVector() &&
+         Op.getSimpleValueType().isInteger() &&
+         "Only handle AVX 256-bit vector integer operation");
+  return split256IntArith(Op, DAG);
+}
+
+static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
+                        SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
     // Since X86 does not have CMOV for 8-bit integer, we don't convert
@@ -22503,10 +23576,23 @@ static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
   }
 
-  assert(Op.getSimpleValueType().is256BitVector() &&
-         Op.getSimpleValueType().isInteger() &&
-         "Only handle AVX 256-bit vector integer operation");
-  return Lower256IntUnary(Op, DAG);
+  // ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X).
+  if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) {
+    SDLoc DL(Op);
+    SDValue Src = Op.getOperand(0);
+    SDValue Sub =
+        DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+    return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src);
+  }
+
+  if (VT.is256BitVector() && !Subtarget.hasInt256()) {
+    assert(VT.isInteger() &&
+           "Only handle AVX 256-bit vector integer operation");
+    return Lower256IntUnary(Op, DAG);
+  }
+
+  // Default to expand.
+  return SDValue();
 }
 
 static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
@@ -22514,7 +23600,7 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
 
   // For AVX1 cases, split to use legal ops (everything but v4i64).
   if (VT.getScalarType() != MVT::i64 && VT.is256BitVector())
-    return Lower256IntArith(Op, DAG);
+    return split256IntArith(Op, DAG);
 
   SDLoc DL(Op);
   unsigned Opcode = Op.getOpcode();
@@ -22556,9 +23642,9 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   if (VT.getScalarType() == MVT::i1)
     return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
 
-  // Decompose 256-bit ops into smaller 128-bit ops.
+  // Decompose 256-bit ops into 128-bit ops.
   if (VT.is256BitVector() && !Subtarget.hasInt256())
-    return Lower256IntArith(Op, DAG);
+    return split256IntArith(Op, DAG);
 
   SDValue A = Op.getOperand(0);
   SDValue B = Op.getOperand(1);
@@ -22566,53 +23652,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   // Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
   // vector pairs, multiply and truncate.
   if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {
-    if (Subtarget.hasInt256()) {
-      // For 512-bit vectors, split into 256-bit vectors to allow the
-      // sign-extension to occur.
-      if (VT == MVT::v64i8)
-        return Lower512IntArith(Op, DAG);
-
-      // For 256-bit vectors, split into 128-bit vectors to allow the
-      // sign-extension to occur. We don't need this on AVX512BW as we can
-      // safely sign-extend to v32i16.
-      if (VT == MVT::v32i8 && !Subtarget.hasBWI())
-        return Lower256IntArith(Op, DAG);
+    unsigned NumElts = VT.getVectorNumElements();
 
+    if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||
+        (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
       MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
       return DAG.getNode(
           ISD::TRUNCATE, dl, VT,
           DAG.getNode(ISD::MUL, dl, ExVT,
-                      DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A),
-                      DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B)));
+                      DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A),
+                      DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B)));
     }
 
-    assert(VT == MVT::v16i8 &&
-           "Pre-AVX2 support only supports v16i8 multiplication");
-    MVT ExVT = MVT::v8i16;
+    MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
 
-    // Extract the lo parts and sign extend to i16
+    // Extract the lo/hi parts to any extend to i16.
     // We're going to mask off the low byte of each result element of the
     // pmullw, so it doesn't matter what's in the high byte of each 16-bit
     // element.
-    const int LoShufMask[] = {0, -1, 1, -1, 2, -1, 3, -1,
-                              4, -1, 5, -1, 6, -1, 7, -1};
-    SDValue ALo = DAG.getVectorShuffle(VT, dl, A, A, LoShufMask);
-    SDValue BLo = DAG.getVectorShuffle(VT, dl, B, B, LoShufMask);
-    ALo = DAG.getBitcast(ExVT, ALo);
-    BLo = DAG.getBitcast(ExVT, BLo);
-
-    // Extract the hi parts and sign extend to i16
-    // We're going to mask off the low byte of each result element of the
-    // pmullw, so it doesn't matter what's in the high byte of each 16-bit
-    // element.
-    const int HiShufMask[] = {8,  -1, 9,  -1, 10, -1, 11, -1,
-                              12, -1, 13, -1, 14, -1, 15, -1};
-    SDValue AHi = DAG.getVectorShuffle(VT, dl, A, A, HiShufMask);
-    SDValue BHi = DAG.getVectorShuffle(VT, dl, B, B, HiShufMask);
-    AHi = DAG.getBitcast(ExVT, AHi);
-    BHi = DAG.getBitcast(ExVT, BHi);
-
-    // Multiply, mask the lower 8bits of the lo/hi results and pack
+    SDValue Undef = DAG.getUNDEF(VT);
+    SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
+    SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));
+
+    SDValue BLo, BHi;
+    if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
+      // If the LHS is a constant, manually unpackl/unpackh.
+      SmallVector<SDValue, 16> LoOps, HiOps;
+      for (unsigned i = 0; i != NumElts; i += 16) {
+        for (unsigned j = 0; j != 8; ++j) {
+          LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
+                                               MVT::i16));
+          HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
+                                               MVT::i16));
+        }
+      }
+
+      BLo = DAG.getBuildVector(ExVT, dl, LoOps);
+      BHi = DAG.getBuildVector(ExVT, dl, HiOps);
+    } else {
+      BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
+      BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
+    }
+
+    // Multiply, mask the lower 8bits of the lo/hi results and pack.
     SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
     SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
     RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
@@ -22661,9 +23743,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   //
   //  Hi = psllqi(AloBhi + AhiBlo, 32);
   //  return AloBlo + Hi;
-  KnownBits AKnown, BKnown;
-  DAG.computeKnownBits(A, AKnown);
-  DAG.computeKnownBits(B, BKnown);
+  KnownBits AKnown = DAG.computeKnownBits(A);
+  KnownBits BKnown = DAG.computeKnownBits(B);
 
   APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
   bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero);
@@ -22673,7 +23754,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero);
   bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero);
 
-  SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
 
   // Only multiply lo/hi halves that aren't known to be zero.
   SDValue AloBlo = Zero;
@@ -22702,10 +23783,79 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
                          SelectionDAG &DAG) {
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
+  bool IsSigned = Op->getOpcode() == ISD::MULHS;
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue A = Op.getOperand(0);
+  SDValue B = Op.getOperand(1);
 
-  // Decompose 256-bit ops into smaller 128-bit ops.
+  // Decompose 256-bit ops into 128-bit ops.
   if (VT.is256BitVector() && !Subtarget.hasInt256())
-    return Lower256IntArith(Op, DAG);
+    return split256IntArith(Op, DAG);
+
+  if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
+    assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
+           (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
+           (VT == MVT::v16i32 && Subtarget.hasAVX512()));
+
+    // PMULxD operations multiply each even value (starting at 0) of LHS with
+    // the related value of RHS and produce a widen result.
+    // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+    // => <2 x i64> <ae|cg>
+    //
+    // In other word, to have all the results, we need to perform two PMULxD:
+    // 1. one with the even values.
+    // 2. one with the odd values.
+    // To achieve #2, with need to place the odd values at an even position.
+    //
+    // Place the odd value at an even position (basically, shift all values 1
+    // step to the left):
+    const int Mask[] = {1, -1,  3, -1,  5, -1,  7, -1,
+                        9, -1, 11, -1, 13, -1, 15, -1};
+    // <a|b|c|d> => <b|undef|d|undef>
+    SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A,
+                                        makeArrayRef(&Mask[0], NumElts));
+    // <e|f|g|h> => <f|undef|h|undef>
+    SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B,
+                                        makeArrayRef(&Mask[0], NumElts));
+
+    // Emit two multiplies, one for the lower 2 ints and one for the higher 2
+    // ints.
+    MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
+    unsigned Opcode =
+        (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ;
+    // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+    // => <2 x i64> <ae|cg>
+    SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+                                                  DAG.getBitcast(MulVT, A),
+                                                  DAG.getBitcast(MulVT, B)));
+    // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
+    // => <2 x i64> <bf|dh>
+    SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+                                                  DAG.getBitcast(MulVT, Odd0),
+                                                  DAG.getBitcast(MulVT, Odd1)));
+
+    // Shuffle it back into the right order.
+    SmallVector<int, 16> ShufMask(NumElts);
+    for (int i = 0; i != (int)NumElts; ++i)
+      ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
+
+    SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);
+
+    // If we have a signed multiply but no PMULDQ fix up the result of an
+    // unsigned multiply.
+    if (IsSigned && !Subtarget.hasSSE41()) {
+      SDValue Zero = DAG.getConstant(0, dl, VT);
+      SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);
+      SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);
+
+      SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
+      Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
+    }
+
+    return Res;
+  }
 
   // Only i8 vectors should need custom lowering after this.
   assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
@@ -22714,123 +23864,141 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
 
   // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
   // logical shift down the upper half and pack back to i8.
-  SDValue A = Op.getOperand(0);
-  SDValue B = Op.getOperand(1);
 
   // With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
   // and then ashr/lshr the upper bits down to the lower bits before multiply.
-  unsigned Opcode = Op.getOpcode();
-  unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
-  unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+  unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+  if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||
+      (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {
+    MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
+    SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A);
+    SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B);
+    SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB);
+    Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+  }
 
-  // For 512-bit vectors, split into 256-bit vectors to allow the
+  // For signed 512-bit vectors, split into 256-bit vectors to allow the
   // sign-extension to occur.
-  if (VT == MVT::v64i8)
-    return Lower512IntArith(Op, DAG);
+  if (VT == MVT::v64i8 && IsSigned)
+    return split512IntArith(Op, DAG);
 
-  // AVX2 implementations - extend xmm subvectors to ymm.
-  if (Subtarget.hasInt256()) {
-    unsigned NumElems = VT.getVectorNumElements();
+  // Signed AVX2 implementation - extend xmm subvectors to ymm.
+  if (VT == MVT::v32i8 && IsSigned) {
     SDValue Lo = DAG.getIntPtrConstant(0, dl);
-    SDValue Hi = DAG.getIntPtrConstant(NumElems / 2, dl);
-
-    if (VT == MVT::v32i8) {
-      if (Subtarget.canExtendTo512BW()) {
-        SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v32i16, A);
-        SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v32i16, B);
-        SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB);
-        Mul = DAG.getNode(ISD::SRL, dl, MVT::v32i16, Mul,
-                          DAG.getConstant(8, dl, MVT::v32i16));
-        return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
-      }
-      SDValue ALo = extract128BitVector(A, 0, DAG, dl);
-      SDValue BLo = extract128BitVector(B, 0, DAG, dl);
-      SDValue AHi = extract128BitVector(A, NumElems / 2, DAG, dl);
-      SDValue BHi = extract128BitVector(B, NumElems / 2, DAG, dl);
-      ALo = DAG.getNode(ExAVX, dl, MVT::v16i16, ALo);
-      BLo = DAG.getNode(ExAVX, dl, MVT::v16i16, BLo);
-      AHi = DAG.getNode(ExAVX, dl, MVT::v16i16, AHi);
-      BHi = DAG.getNode(ExAVX, dl, MVT::v16i16, BHi);
-      Lo = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
-                       DAG.getNode(ISD::MUL, dl, MVT::v16i16, ALo, BLo),
-                       DAG.getConstant(8, dl, MVT::v16i16));
-      Hi = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
-                       DAG.getNode(ISD::MUL, dl, MVT::v16i16, AHi, BHi),
-                       DAG.getConstant(8, dl, MVT::v16i16));
-      // The ymm variant of PACKUS treats the 128-bit lanes separately, so before
-      // using PACKUS we need to permute the inputs to the correct lo/hi xmm lane.
-      const int LoMask[] = {0,  1,  2,  3,  4,  5,  6,  7,
-                            16, 17, 18, 19, 20, 21, 22, 23};
-      const int HiMask[] = {8,  9,  10, 11, 12, 13, 14, 15,
-                            24, 25, 26, 27, 28, 29, 30, 31};
-      return DAG.getNode(X86ISD::PACKUS, dl, VT,
-                         DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, LoMask),
-                         DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
-    }
-
-    assert(VT == MVT::v16i8 && "Unexpected VT");
-
-    SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A);
-    SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B);
-    SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
-    Mul = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
-                      DAG.getConstant(8, dl, MVT::v16i16));
-    // If we have BWI we can use truncate instruction.
-    if (Subtarget.hasBWI())
-      return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
-    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Lo);
-    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Hi);
-    return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
-  }
-
-  assert(VT == MVT::v16i8 &&
-         "Pre-AVX2 support only supports v16i8 multiplication");
-  MVT ExVT = MVT::v8i16;
-  unsigned ExSSE41 = ISD::MULHU == Opcode ? ISD::ZERO_EXTEND_VECTOR_INREG
-                                          : ISD::SIGN_EXTEND_VECTOR_INREG;
+    SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl);
+
+    MVT ExVT = MVT::v16i16;
+    SDValue ALo = extract128BitVector(A, 0, DAG, dl);
+    SDValue BLo = extract128BitVector(B, 0, DAG, dl);
+    SDValue AHi = extract128BitVector(A, NumElts / 2, DAG, dl);
+    SDValue BHi = extract128BitVector(B, NumElts / 2, DAG, dl);
+    ALo = DAG.getNode(ExAVX, dl, ExVT, ALo);
+    BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
+    AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
+    BHi = DAG.getNode(ExAVX, dl, ExVT, BHi);
+    Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
+    Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
+    Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
+    Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
+
+    // Bitcast back to VT and then pack all the even elements from Lo and Hi.
+    // Shuffle lowering should turn this into PACKUS+PERMQ
+    Lo = DAG.getBitcast(VT, Lo);
+    Hi = DAG.getBitcast(VT, Hi);
+    return DAG.getVectorShuffle(VT, dl, Lo, Hi,
+                                { 0,  2,  4,  6,  8, 10, 12, 14,
+                                 16, 18, 20, 22, 24, 26, 28, 30,
+                                 32, 34, 36, 38, 40, 42, 44, 46,
+                                 48, 50, 52, 54, 56, 58, 60, 62});
+  }
+
+  // For signed v16i8 and all unsigned vXi8 we will unpack the low and high
+  // half of each 128 bit lane to widen to a vXi16 type. Do the multiplies,
+  // shift the results and pack the half lane results back together.
+
+  MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+
+  static const int PSHUFDMask[] = { 8,  9, 10, 11, 12, 13, 14, 15,
+                                   -1, -1, -1, -1, -1, -1, -1, -1};
 
   // Extract the lo parts and zero/sign extend to i16.
-  SDValue ALo, BLo;
-  if (Subtarget.hasSSE41()) {
-    ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
-    BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
+  // Only use SSE4.1 instructions for signed v16i8 where using unpack requires
+  // shifts to sign extend. Using unpack for unsigned only requires an xor to
+  // create zeros and a copy due to tied registers contraints pre-avx. But using
+  // zero_extend_vector_inreg would require an additional pshufd for the high
+  // part.
+
+  SDValue ALo, AHi;
+  if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) {
+    ALo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, A);
+
+    AHi = DAG.getVectorShuffle(VT, dl, A, A, PSHUFDMask);
+    AHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, AHi);
+  } else if (IsSigned) {
+    ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), A));
+    AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), A));
+
+    ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG);
+    AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG);
   } else {
-    const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-                            -1, 4, -1, 5, -1, 6, -1, 7};
-    ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
-    BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
-    ALo = DAG.getBitcast(ExVT, ALo);
-    BLo = DAG.getBitcast(ExVT, BLo);
-    ALo = DAG.getNode(ExShift, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
-    BLo = DAG.getNode(ExShift, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
-  }
-
-  // Extract the hi parts and zero/sign extend to i16.
-  SDValue AHi, BHi;
-  if (Subtarget.hasSSE41()) {
-    const int ShufMask[] = {8,  9,  10, 11, 12, 13, 14, 15,
-                            -1, -1, -1, -1, -1, -1, -1, -1};
-    AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
-    BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
-    AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
-    BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
+    ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A,
+                                          DAG.getConstant(0, dl, VT)));
+    AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A,
+                                          DAG.getConstant(0, dl, VT)));
+  }
+
+  SDValue BLo, BHi;
+  if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
+    // If the LHS is a constant, manually unpackl/unpackh and extend.
+    SmallVector<SDValue, 16> LoOps, HiOps;
+    for (unsigned i = 0; i != NumElts; i += 16) {
+      for (unsigned j = 0; j != 8; ++j) {
+        SDValue LoOp = B.getOperand(i + j);
+        SDValue HiOp = B.getOperand(i + j + 8);
+
+        if (IsSigned) {
+          LoOp = DAG.getSExtOrTrunc(LoOp, dl, MVT::i16);
+          HiOp = DAG.getSExtOrTrunc(HiOp, dl, MVT::i16);
+        } else {
+          LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16);
+          HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16);
+        }
+
+        LoOps.push_back(LoOp);
+        HiOps.push_back(HiOp);
+      }
+    }
+
+    BLo = DAG.getBuildVector(ExVT, dl, LoOps);
+    BHi = DAG.getBuildVector(ExVT, dl, HiOps);
+  } else if (IsSigned && VT == MVT::v16i8 && Subtarget.hasSSE41()) {
+    BLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, B);
+
+    BHi = DAG.getVectorShuffle(VT, dl, B, B, PSHUFDMask);
+    BHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, ExVT, BHi);
+  } else if (IsSigned) {
+    BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), B));
+    BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), B));
+
+    BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG);
+    BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG);
   } else {
-    const int ShufMask[] = {-1, 8,  -1, 9,  -1, 10, -1, 11,
-                            -1, 12, -1, 13, -1, 14, -1, 15};
-    AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
-    BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
-    AHi = DAG.getBitcast(ExVT, AHi);
-    BHi = DAG.getBitcast(ExVT, BHi);
-    AHi = DAG.getNode(ExShift, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
-    BHi = DAG.getNode(ExShift, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
+    BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B,
+                                          DAG.getConstant(0, dl, VT)));
+    BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B,
+                                          DAG.getConstant(0, dl, VT)));
   }
 
   // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
-  // pack back to v16i8.
+  // pack back to vXi8.
   SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
   SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
-  RLo = DAG.getNode(ISD::SRL, dl, ExVT, RLo, DAG.getConstant(8, dl, ExVT));
-  RHi = DAG.getNode(ISD::SRL, dl, ExVT, RHi, DAG.getConstant(8, dl, ExVT));
+  RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG);
+  RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG);
+
+  // Bitcast back to VT and then pack all the even elements from Lo and Hi.
   return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
 }
 
@@ -22890,105 +24058,6 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
   return DAG.getBitcast(VT, CallInfo.first);
 }
 
-static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
-                             SelectionDAG &DAG) {
-  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
-  MVT VT = Op0.getSimpleValueType();
-  SDLoc dl(Op);
-
-  // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.is256BitVector() && !Subtarget.hasInt256()) {
-    unsigned Opcode = Op.getOpcode();
-    unsigned NumElems = VT.getVectorNumElements();
-    MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), NumElems / 2);
-    SDValue Lo0 = extract128BitVector(Op0, 0, DAG, dl);
-    SDValue Lo1 = extract128BitVector(Op1, 0, DAG, dl);
-    SDValue Hi0 = extract128BitVector(Op0, NumElems / 2, DAG, dl);
-    SDValue Hi1 = extract128BitVector(Op1, NumElems / 2, DAG, dl);
-    SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Lo0, Lo1);
-    SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Hi0, Hi1);
-    SDValue Ops[] = {
-      DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(0), Hi.getValue(0)),
-      DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(1), Hi.getValue(1))
-    };
-    return DAG.getMergeValues(Ops, dl);
-  }
-
-  assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
-         (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
-         (VT == MVT::v16i32 && Subtarget.hasAVX512()));
-
-  int NumElts = VT.getVectorNumElements();
-
-  // PMULxD operations multiply each even value (starting at 0) of LHS with
-  // the related value of RHS and produce a widen result.
-  // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
-  // => <2 x i64> <ae|cg>
-  //
-  // In other word, to have all the results, we need to perform two PMULxD:
-  // 1. one with the even values.
-  // 2. one with the odd values.
-  // To achieve #2, with need to place the odd values at an even position.
-  //
-  // Place the odd value at an even position (basically, shift all values 1
-  // step to the left):
-  const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1};
-  // <a|b|c|d> => <b|undef|d|undef>
-  SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
-                                      makeArrayRef(&Mask[0], NumElts));
-  // <e|f|g|h> => <f|undef|h|undef>
-  SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
-                                      makeArrayRef(&Mask[0], NumElts));
-
-  // Emit two multiplies, one for the lower 2 ints and one for the higher 2
-  // ints.
-  MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
-  bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
-  unsigned Opcode =
-      (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
-  // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
-  // => <2 x i64> <ae|cg>
-  SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
-                                                DAG.getBitcast(MulVT, Op0),
-                                                DAG.getBitcast(MulVT, Op1)));
-  // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
-  // => <2 x i64> <bf|dh>
-  SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
-                                                DAG.getBitcast(MulVT, Odd0),
-                                                DAG.getBitcast(MulVT, Odd1)));
-
-  // Shuffle it back into the right order.
-  SmallVector<int, 16> HighMask(NumElts);
-  SmallVector<int, 16> LowMask(NumElts);
-  for (int i = 0; i != NumElts; ++i) {
-    HighMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
-    LowMask[i] = (i / 2) * 2 + ((i % 2) * NumElts);
-  }
-
-  SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
-  SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
-
-  // If we have a signed multiply but no PMULDQ fix up the high parts of a
-  // unsigned multiply.
-  if (IsSigned && !Subtarget.hasSSE41()) {
-    SDValue ShAmt = DAG.getConstant(
-        31, dl,
-        DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
-    SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
-                             DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
-    SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
-                             DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
-
-    SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
-    Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
-  }
-
-  // The first result of MUL_LOHI is actually the low value, followed by the
-  // high value.
-  SDValue Ops[] = {Lows, Highs};
-  return DAG.getMergeValues(Ops, dl);
-}
-
 // Return true if the required (according to Opcode) shift-imm form is natively
 // supported by the Subtarget
 static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
@@ -23042,9 +24111,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   SDLoc dl(Op);
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
-
-  unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
-    (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+  unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);
 
   auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
     assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
@@ -23055,8 +24122,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
     if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
       assert((VT != MVT::v4i64 || Subtarget.hasInt256()) &&
              "Unsupported PCMPGT op");
-      return DAG.getNode(X86ISD::PCMPGT, dl, VT,
-                         getZeroVector(VT, Subtarget, DAG, dl), R);
+      return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R);
     }
 
     if (ShiftAmt >= 32) {
@@ -23071,7 +24137,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
         Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
                                   {9, 1, 11, 3, 13, 5, 15, 7});
     } else {
-      // SRA upper i32, SHL whole i64 and select lower i32.
+      // SRA upper i32, SRL whole i64 and select lower i32.
       SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
                                                  ShiftAmt, DAG);
       SDValue Lower =
@@ -23087,199 +24153,123 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   };
 
   // Optimize shl/srl/sra with constant shift amount.
-  if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
-    if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
-      uint64_t ShiftAmt = ShiftConst->getZExtValue();
-
-      if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
-        return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
-
-      // i64 SRA needs to be performed as partial shifts.
-      if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
-           (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
-          Op.getOpcode() == ISD::SRA)
-        return ArithmeticShiftRight64(ShiftAmt);
-
-      if (VT == MVT::v16i8 ||
-          (Subtarget.hasInt256() && VT == MVT::v32i8) ||
-          VT == MVT::v64i8) {
-        unsigned NumElts = VT.getVectorNumElements();
-        MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
-
-        // Simple i8 add case
-        if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
-          return DAG.getNode(ISD::ADD, dl, VT, R, R);
-
-        // ashr(R, 7)  === cmp_slt(R, 0)
-        if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
-          SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
-          if (VT.is512BitVector()) {
-            assert(VT == MVT::v64i8 && "Unexpected element type!");
-            SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R,
-                                       ISD::SETGT);
-            return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
-          }
-          return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
-        }
+  APInt APIntShiftAmt;
+  if (!isConstantSplat(Amt, APIntShiftAmt))
+    return SDValue();
+  uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
 
-        // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
-        if (VT == MVT::v16i8 && Subtarget.hasXOP())
-          return SDValue();
+  if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+    return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
 
-        if (Op.getOpcode() == ISD::SHL) {
-          // Make a large shift.
-          SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
-                                                   R, ShiftAmt, DAG);
-          SHL = DAG.getBitcast(VT, SHL);
-          // Zero out the rightmost bits.
-          return DAG.getNode(ISD::AND, dl, VT, SHL,
-                             DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
-        }
-        if (Op.getOpcode() == ISD::SRL) {
-          // Make a large shift.
-          SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
-                                                   R, ShiftAmt, DAG);
-          SRL = DAG.getBitcast(VT, SRL);
-          // Zero out the leftmost bits.
-          return DAG.getNode(ISD::AND, dl, VT, SRL,
-                             DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
-        }
-        if (Op.getOpcode() == ISD::SRA) {
-          // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
-          SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
-
-          SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
-          Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
-          Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
-          return Res;
-        }
-        llvm_unreachable("Unknown shift opcode.");
-      }
-    }
-  }
+  // i64 SRA needs to be performed as partial shifts.
+  if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
+       (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
+      Op.getOpcode() == ISD::SRA)
+    return ArithmeticShiftRight64(ShiftAmt);
 
-  // Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
-  // TODO: Replace constant extraction with getTargetConstantBitsFromNode.
-  if (!Subtarget.hasXOP() &&
-      (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) ||
-       (Subtarget.hasAVX512() && VT == MVT::v8i64))) {
+  if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
+      VT == MVT::v64i8) {
+    unsigned NumElts = VT.getVectorNumElements();
+    MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
 
-    // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant.
-    unsigned SubVectorScale = 1;
-    if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
-      SubVectorScale =
-          Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits();
-      Amt = Amt.getOperand(0);
-    }
+    // Simple i8 add case
+    if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+      return DAG.getNode(ISD::ADD, dl, VT, R, R);
 
-    // Peek through any splat that was introduced for i64 shift vectorization.
-    int SplatIndex = -1;
-    if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
-      if (SVN->isSplat()) {
-        SplatIndex = SVN->getSplatIndex();
-        Amt = Amt.getOperand(0);
-        assert(SplatIndex < (int)VT.getVectorNumElements() &&
-               "Splat shuffle referencing second operand");
+    // ashr(R, 7)  === cmp_slt(R, 0)
+    if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
+      SDValue Zeros = DAG.getConstant(0, dl, VT);
+      if (VT.is512BitVector()) {
+        assert(VT == MVT::v64i8 && "Unexpected element type!");
+        SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT);
+        return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
       }
+      return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+    }
 
-    if (Amt.getOpcode() != ISD::BITCAST ||
-        Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
+    // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
+    if (VT == MVT::v16i8 && Subtarget.hasXOP())
       return SDValue();
 
-    Amt = Amt.getOperand(0);
-    unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
-                     (SubVectorScale * VT.getVectorNumElements());
-    unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
-    uint64_t ShiftAmt = 0;
-    unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
-    for (unsigned i = 0; i != Ratio; ++i) {
-      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
-      if (!C)
-        return SDValue();
-      // 6 == Log2(64)
-      ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
-    }
-
-    // Check remaining shift amounts (if not a splat).
-    if (SplatIndex < 0) {
-      for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
-        uint64_t ShAmt = 0;
-        for (unsigned j = 0; j != Ratio; ++j) {
-          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
-          if (!C)
-            return SDValue();
-          // 6 == Log2(64)
-          ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
-        }
-        if (ShAmt != ShiftAmt)
-          return SDValue();
-      }
+    if (Op.getOpcode() == ISD::SHL) {
+      // Make a large shift.
+      SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R,
+                                               ShiftAmt, DAG);
+      SHL = DAG.getBitcast(VT, SHL);
+      // Zero out the rightmost bits.
+      return DAG.getNode(ISD::AND, dl, VT, SHL,
+                         DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
+    }
+    if (Op.getOpcode() == ISD::SRL) {
+      // Make a large shift.
+      SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R,
+                                               ShiftAmt, DAG);
+      SRL = DAG.getBitcast(VT, SRL);
+      // Zero out the leftmost bits.
+      return DAG.getNode(ISD::AND, dl, VT, SRL,
+                         DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
+    }
+    if (Op.getOpcode() == ISD::SRA) {
+      // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
+      SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+
+      SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
+      Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+      Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+      return Res;
     }
-
-    if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
-      return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
-
-    if (Op.getOpcode() == ISD::SRA)
-      return ArithmeticShiftRight64(ShiftAmt);
+    llvm_unreachable("Unknown shift opcode.");
   }
 
   return SDValue();
 }
 
-// Determine if V is a splat value, and return the scalar.
-static SDValue IsSplatValue(MVT VT, SDValue V, const SDLoc &dl,
-                            SelectionDAG &DAG, const X86Subtarget &Subtarget,
-                            unsigned Opcode) {
-   V = peekThroughEXTRACT_SUBVECTORs(V);
-
-  // Check if this is a splat build_vector node.
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V)) {
-    SDValue SplatAmt = BV->getSplatValue();
-    if (SplatAmt && SplatAmt.isUndef())
-      return SDValue();
-    return SplatAmt;
-  }
-
-  // Check for SUB(SPLAT_BV, SPLAT) cases from rotate patterns.
-  if (V.getOpcode() == ISD::SUB &&
-      !SupportedVectorVarShift(VT, Subtarget, Opcode)) {
-    SDValue LHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(0));
-    SDValue RHS = peekThroughEXTRACT_SUBVECTORs(V.getOperand(1));
+// If V is a splat value, return the source vector and splat index;
+static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
+  V = peekThroughEXTRACT_SUBVECTORs(V);
 
-    // Ensure that the corresponding splat BV element is not UNDEF.
-    BitVector UndefElts;
-    BuildVectorSDNode *BV0 = dyn_cast<BuildVectorSDNode>(LHS);
-    ShuffleVectorSDNode *SVN1 = dyn_cast<ShuffleVectorSDNode>(RHS);
-    if (BV0 && SVN1 && BV0->getSplatValue(&UndefElts) && SVN1->isSplat()) {
-      unsigned SplatIdx = (unsigned)SVN1->getSplatIndex();
-      if (!UndefElts[SplatIdx])
-        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                           VT.getVectorElementType(), V,
-                           DAG.getIntPtrConstant(SplatIdx, dl));
+  EVT VT = V.getValueType();
+  unsigned Opcode = V.getOpcode();
+  switch (Opcode) {
+  default: {
+    APInt UndefElts;
+    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+    if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
+      // Handle case where all demanded elements are UNDEF.
+      if (DemandedElts.isSubsetOf(UndefElts)) {
+        SplatIdx = 0;
+        return DAG.getUNDEF(VT);
+      }
+      SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+      return V;
     }
+    break;
   }
-
-  // Check if this is a shuffle node doing a splat.
-  ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V);
-  if (!SVN || !SVN->isSplat())
-    return SDValue();
-
-  unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
-  SDValue InVec = V.getOperand(0);
-  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
-    assert((SplatIdx < VT.getVectorNumElements()) &&
-           "Unexpected shuffle index found!");
-    return InVec.getOperand(SplatIdx);
-  } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2)))
-      if (C->getZExtValue() == SplatIdx)
-        return InVec.getOperand(1);
+  case ISD::VECTOR_SHUFFLE: {
+    // Check if this is a shuffle node doing a splat.
+    // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+    // getTargetVShiftNode currently struggles without the splat source.
+    auto *SVN = cast<ShuffleVectorSDNode>(V);
+    if (!SVN->isSplat())
+      break;
+    int Idx = SVN->getSplatIndex();
+    int NumElts = V.getValueType().getVectorNumElements();
+    SplatIdx = Idx % NumElts;
+    return V.getOperand(Idx / NumElts);
   }
+  }
+
+  return SDValue();
+}
 
-  // Avoid introducing an extract element from a shuffle.
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                     VT.getVectorElementType(), InVec,
-                     DAG.getIntPtrConstant(SplatIdx, dl));
+static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
+                             SelectionDAG &DAG) {
+  int SplatIdx;
+  if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                       SrcVector.getValueType().getScalarType(), SrcVector,
+                       DAG.getIntPtrConstant(SplatIdx, dl));
+  return SDValue();
 }
 
 static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
@@ -23289,17 +24279,11 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
   unsigned Opcode = Op.getOpcode();
+  unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
+  unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
 
-  unsigned X86OpcI = (Opcode == ISD::SHL) ? X86ISD::VSHLI :
-    (Opcode == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
-
-  unsigned X86OpcV = (Opcode == ISD::SHL) ? X86ISD::VSHL :
-    (Opcode == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA;
-
-  Amt = peekThroughEXTRACT_SUBVECTORs(Amt);
-
-  if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
-    if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) {
+  if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
+    if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
       MVT EltVT = VT.getVectorElementType();
       assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
       if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
@@ -23309,6 +24293,50 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
 
       return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
     }
+
+    // vXi8 shifts - shift as v8i16 + mask result.
+    if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
+         (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
+         VT == MVT::v64i8) &&
+        !Subtarget.hasXOP()) {
+      unsigned NumElts = VT.getVectorNumElements();
+      MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+      if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
+        unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
+        unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
+        BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+        // Create the mask using vXi16 shifts. For shift-rights we need to move
+        // the upper byte down before splatting the vXi8 mask.
+        SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
+        BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
+                                      BaseShAmt, Subtarget, DAG);
+        if (Opcode != ISD::SHL)
+          BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
+                                               8, DAG);
+        BitMask = DAG.getBitcast(VT, BitMask);
+        BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask,
+                                       SmallVector<int, 64>(NumElts, 0));
+
+        SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
+                                          DAG.getBitcast(ExtVT, R), BaseShAmt,
+                                          Subtarget, DAG);
+        Res = DAG.getBitcast(VT, Res);
+        Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);
+
+        if (Opcode == ISD::SRA) {
+          // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
+          // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
+          SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
+          SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask,
+                                         BaseShAmt, Subtarget, DAG);
+          SignMask = DAG.getBitcast(VT, SignMask);
+          Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
+          Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
+        }
+        return Res;
+      }
+    }
   }
 
   // Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
@@ -23379,7 +24407,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
 
   // AVX2 can more effectively perform this as a zext/trunc to/from v8i32.
   if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) {
-    SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+    SDValue Z = DAG.getConstant(0, dl, VT);
     SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));
     SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));
     Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG);
@@ -23401,8 +24429,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
   SDLoc dl(Op);
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
   bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
 
+  unsigned Opc = Op.getOpcode();
+  unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true);
+  unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false);
+
   assert(VT.isVector() && "Custom lowering only for vector shifts!");
   assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");
 
@@ -23412,31 +24445,31 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
   if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
     return V;
 
-  if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
+  if (SupportedVectorVarShift(VT, Subtarget, Opc))
     return Op;
 
   // XOP has 128-bit variable logical/arithmetic shifts.
   // +ve/-ve Amt = shift left/right.
   if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
                              VT == MVT::v8i16 || VT == MVT::v16i8)) {
-    if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
+    if (Opc == ISD::SRL || Opc == ISD::SRA) {
       SDValue Zero = DAG.getConstant(0, dl, VT);
       Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
     }
-    if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
+    if (Opc == ISD::SHL || Opc == ISD::SRL)
       return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
-    if (Op.getOpcode() == ISD::SRA)
+    if (Opc == ISD::SRA)
       return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
   }
 
   // 2i64 vector logical shifts can efficiently avoid scalarization - do the
   // shifts per-lane and then shuffle the partial results back together.
-  if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
+  if (VT == MVT::v2i64 && Opc != ISD::SRA) {
     // Splat the shift amounts so the scalar shifts above will catch it.
     SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
     SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
-    SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
-    SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
+    SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+    SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
     return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
   }
 
@@ -23444,7 +24477,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
   // M = lshr(SIGN_MASK, Amt)
   // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
   if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
-      Op.getOpcode() == ISD::SRA) {
+      Opc == ISD::SRA) {
     SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
     SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
     R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
@@ -23489,36 +24522,34 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
 
     // Only perform this blend if we can perform it without loading a mask.
     if (ShuffleMask.size() == NumElts && Amt1 && Amt2 &&
-        isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2) &&
         (VT != MVT::v16i16 ||
          is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&
-        (VT == MVT::v4i32 || Subtarget.hasSSE41() ||
-         Op.getOpcode() != ISD::SHL || canWidenShuffleElements(ShuffleMask))) {
-      SDValue Splat1 =
-          DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
-      SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
-      SDValue Splat2 =
-          DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
-      SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
-      return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
+        (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL ||
+         canWidenShuffleElements(ShuffleMask))) {
+      auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1);
+      auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2);
+      if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) &&
+          Cst2->getAPIntValue().ult(EltSizeInBits)) {
+        SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
+                                                    Cst1->getZExtValue(), DAG);
+        SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,
+                                                    Cst2->getZExtValue(), DAG);
+        return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);
+      }
     }
   }
 
   // If possible, lower this packed shift into a vector multiply instead of
   // expanding it into a sequence of scalar shifts.
-  if (Op.getOpcode() == ISD::SHL)
+  if (Opc == ISD::SHL)
     if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
       return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
 
-  // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we
+  // Constant ISD::SRL can be performed efficiently on vXi16 vectors as we
   // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).
-  // TODO: Improve support for the shift by zero special case.
-  if (Op.getOpcode() == ISD::SRL && ConstantAmt &&
-      ((Subtarget.hasSSE41() && VT == MVT::v8i16) ||
-       DAG.isKnownNeverZero(Amt)) &&
-      (VT == MVT::v16i8 || VT == MVT::v8i16 ||
-       ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) {
-    SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT);
+  if (Opc == ISD::SRL && ConstantAmt &&
+      (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) {
+    SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
     SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
     if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
       SDValue Zero = DAG.getConstant(0, dl, VT);
@@ -23528,13 +24559,36 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
     }
   }
 
+  // Constant ISD::SRA can be performed efficiently on vXi16 vectors as we
+  // can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt).
+  // TODO: Special case handling for shift by 0/1, really we can afford either
+  // of these cases in pre-SSE41/XOP/AVX512 but not both.
+  if (Opc == ISD::SRA && ConstantAmt &&
+      (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) &&
+      ((Subtarget.hasSSE41() && !Subtarget.hasXOP() &&
+        !Subtarget.hasAVX512()) ||
+       DAG.isKnownNeverZero(Amt))) {
+    SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);
+    SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);
+    if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) {
+      SDValue Amt0 =
+          DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ);
+      SDValue Amt1 =
+          DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ);
+      SDValue Sra1 =
+          getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG);
+      SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale);
+      Res = DAG.getSelect(dl, VT, Amt0, R, Res);
+      return DAG.getSelect(dl, VT, Amt1, Sra1, Res);
+    }
+  }
+
   // v4i32 Non Uniform Shifts.
   // If the shift amount is constant we can shift each lane using the SSE2
   // immediate shifts, else we need to zero-extend each lane to the lower i64
   // and shift using the SSE2 variable shifts.
   // The separate results can then be blended together.
   if (VT == MVT::v4i32) {
-    unsigned Opc = Op.getOpcode();
     SDValue Amt0, Amt1, Amt2, Amt3;
     if (ConstantAmt) {
       Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
@@ -23542,26 +24596,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
       Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
     } else {
-      // ISD::SHL is handled above but we include it here for completeness.
-      switch (Opc) {
-      default:
-        llvm_unreachable("Unknown target vector shift node");
-      case ISD::SHL:
-        Opc = X86ISD::VSHL;
-        break;
-      case ISD::SRL:
-        Opc = X86ISD::VSRL;
-        break;
-      case ISD::SRA:
-        Opc = X86ISD::VSRA;
-        break;
-      }
       // The SSE2 shifts use the lower i64 as the same shift amount for
       // all lanes and the upper i64 is ignored. On AVX we're better off
       // just zero-extending, but for SSE just duplicating the top 16-bits is
       // cheaper and has the same effect for out of range values.
       if (Subtarget.hasAVX()) {
-        SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+        SDValue Z = DAG.getConstant(0, dl, VT);
         Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
         Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
         Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
@@ -23581,10 +24621,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       }
     }
 
-    SDValue R0 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt0));
-    SDValue R1 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt1));
-    SDValue R2 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt2));
-    SDValue R3 = DAG.getNode(Opc, dl, VT, R, DAG.getBitcast(VT, Amt3));
+    unsigned ShOpc = ConstantAmt ? Opc : X86OpcV;
+    SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));
+    SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));
+    SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));
+    SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));
 
     // Merge the shifted lane results optimally with/without PBLENDW.
     // TODO - ideally shuffle combining would handle this.
@@ -23611,19 +24652,66 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
            "Unexpected vector type");
     MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32;
     MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
-    unsigned ExtOpc =
-        Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
     R = DAG.getNode(ExtOpc, dl, ExtVT, R);
     Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt);
     return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
+                       DAG.getNode(Opc, dl, ExtVT, R, Amt));
+  }
+
+  // Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we
+  // extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI.
+  if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) &&
+      (VT == MVT::v16i8 || VT == MVT::v64i8 ||
+       (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
+      !Subtarget.hasXOP()) {
+    int NumElts = VT.getVectorNumElements();
+    SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
+
+    // Extend constant shift amount to vXi16 (it doesn't matter if the type
+    // isn't legal).
+    MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts);
+    Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT);
+    Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt);
+    Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt);
+    assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) &&
+           "Constant build vector expected");
+
+    if (VT == MVT::v16i8 && Subtarget.hasInt256()) {
+      R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT)
+                          : DAG.getZExtOrTrunc(R, dl, ExVT);
+      R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt);
+      R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8);
+      return DAG.getZExtOrTrunc(R, dl, VT);
+    }
+
+    SmallVector<SDValue, 16> LoAmt, HiAmt;
+    for (int i = 0; i != NumElts; i += 16) {
+      for (int j = 0; j != 8; ++j) {
+        LoAmt.push_back(Amt.getOperand(i + j));
+        HiAmt.push_back(Amt.getOperand(i + j + 8));
+      }
+    }
+
+    MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2);
+    SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt);
+    SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt);
+
+    SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R));
+    SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R));
+    LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8);
+    HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8);
+    LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA);
+    HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA);
+    LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8);
+    HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8);
+    return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
   }
 
   if (VT == MVT::v16i8 ||
       (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
       (VT == MVT::v64i8 && Subtarget.hasBWI())) {
     MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
-    unsigned ShiftOpcode = Op->getOpcode();
 
     auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
       if (VT.is512BitVector()) {
@@ -23648,7 +24736,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       // On pre-SSE41 targets we test for the sign bit by comparing to
       // zero - a negative value will set all bits of the lanes to true
       // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
-      SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
+      SDValue Z = DAG.getConstant(0, dl, SelVT);
       SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
       return DAG.getSelect(dl, SelVT, C, V0, V1);
     };
@@ -23657,49 +24745,46 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
     // We can safely do this using i16 shifts as we're only interested in
     // the 3 lower bits of each byte.
     Amt = DAG.getBitcast(ExtVT, Amt);
-    Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
+    Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG);
     Amt = DAG.getBitcast(VT, Amt);
 
-    if (Op->getOpcode() == ISD::SHL || Op->getOpcode() == ISD::SRL) {
+    if (Opc == ISD::SHL || Opc == ISD::SRL) {
       // r = VSELECT(r, shift(r, 4), a);
-      SDValue M =
-          DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+      SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));
       R = SignBitSelect(VT, Amt, M, R);
 
       // a += a
       Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
 
       // r = VSELECT(r, shift(r, 2), a);
-      M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+      M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT));
       R = SignBitSelect(VT, Amt, M, R);
 
       // a += a
       Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
 
       // return VSELECT(r, shift(r, 1), a);
-      M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+      M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT));
       R = SignBitSelect(VT, Amt, M, R);
       return R;
     }
 
-    if (Op->getOpcode() == ISD::SRA) {
+    if (Opc == ISD::SRA) {
       // For SRA we need to unpack each byte to the higher byte of a i16 vector
       // so we can correctly sign extend. We don't care what happens to the
       // lower byte.
-      SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
-      SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
-      SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
-      SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
+      SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
+      SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt);
+      SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R);
+      SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R);
       ALo = DAG.getBitcast(ExtVT, ALo);
       AHi = DAG.getBitcast(ExtVT, AHi);
       RLo = DAG.getBitcast(ExtVT, RLo);
       RHi = DAG.getBitcast(ExtVT, RHi);
 
       // r = VSELECT(r, shift(r, 4), a);
-      SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
-                                DAG.getConstant(4, dl, ExtVT));
-      SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
-                                DAG.getConstant(4, dl, ExtVT));
+      SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG);
+      SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG);
       RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
       RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
 
@@ -23708,10 +24793,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
 
       // r = VSELECT(r, shift(r, 2), a);
-      MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
-                        DAG.getConstant(2, dl, ExtVT));
-      MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
-                        DAG.getConstant(2, dl, ExtVT));
+      MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG);
+      MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG);
       RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
       RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
 
@@ -23720,45 +24803,38 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);
 
       // r = VSELECT(r, shift(r, 1), a);
-      MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
-                        DAG.getConstant(1, dl, ExtVT));
-      MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
-                        DAG.getConstant(1, dl, ExtVT));
+      MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG);
+      MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG);
       RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
       RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);
 
       // Logical shift the result back to the lower byte, leaving a zero upper
-      // byte
-      // meaning that we can safely pack with PACKUSWB.
-      RLo =
-          DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
-      RHi =
-          DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
+      // byte meaning that we can safely pack with PACKUSWB.
+      RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG);
+      RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG);
       return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
     }
   }
 
   if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
     MVT ExtVT = MVT::v8i32;
-    SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
-    SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
-    SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
-    SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R);
-    SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Z, R);
+    SDValue Z = DAG.getConstant(0, dl, VT);
+    SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z);
+    SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z);
+    SDValue RLo = getUnpackl(DAG, dl, VT, Z, R);
+    SDValue RHi = getUnpackh(DAG, dl, VT, Z, R);
     ALo = DAG.getBitcast(ExtVT, ALo);
     AHi = DAG.getBitcast(ExtVT, AHi);
     RLo = DAG.getBitcast(ExtVT, RLo);
     RHi = DAG.getBitcast(ExtVT, RHi);
-    SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
-    SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
-    Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
-    Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
+    SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo);
+    SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi);
+    Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG);
+    Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG);
     return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
   }
 
   if (VT == MVT::v8i16) {
-    unsigned ShiftOpcode = Op->getOpcode();
-
     // If we have a constant shift amount, the non-SSE41 path is best as
     // avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
     bool UseSSE41 = Subtarget.hasSSE41() &&
@@ -23778,7 +24854,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       // set all bits of the lanes to true and VSELECT uses that in
       // its OR(AND(V0,C),AND(V1,~C)) lowering.
       SDValue C =
-          DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
+          getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG);
       return DAG.getSelect(dl, VT, C, V0, V1);
     };
 
@@ -23788,42 +24864,42 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
       // bytes for PBLENDVB.
       Amt = DAG.getNode(
           ISD::OR, dl, VT,
-          DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
-          DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
+          getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG),
+          getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG));
     } else {
-      Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
+      Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG);
     }
 
     // r = VSELECT(r, shift(r, 8), a);
-    SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
+    SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG);
     R = SignBitSelect(Amt, M, R);
 
     // a += a
     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
 
     // r = VSELECT(r, shift(r, 4), a);
-    M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
+    M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG);
     R = SignBitSelect(Amt, M, R);
 
     // a += a
     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
 
     // r = VSELECT(r, shift(r, 2), a);
-    M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
+    M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG);
     R = SignBitSelect(Amt, M, R);
 
     // a += a
     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);
 
     // return VSELECT(r, shift(r, 1), a);
-    M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
+    M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG);
     R = SignBitSelect(Amt, M, R);
     return R;
   }
 
-  // Decompose 256-bit shifts into smaller 128-bit shifts.
+  // Decompose 256-bit shifts into 128-bit shifts.
   if (VT.is256BitVector())
-    return Lower256IntArith(Op, DAG);
+    return split256IntArith(Op, DAG);
 
   return SDValue();
 }
@@ -23838,20 +24914,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
   SDValue Amt = Op.getOperand(1);
   unsigned Opcode = Op.getOpcode();
   unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  int NumElts = VT.getVectorNumElements();
+
+  // Check for constant splat rotation amount.
+  APInt UndefElts;
+  SmallVector<APInt, 32> EltBits;
+  int CstSplatIndex = -1;
+  if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits))
+    for (int i = 0; i != NumElts; ++i)
+      if (!UndefElts[i]) {
+        if (CstSplatIndex < 0 || EltBits[i] == EltBits[CstSplatIndex]) {
+          CstSplatIndex = i;
+          continue;
+        }
+        CstSplatIndex = -1;
+        break;
+      }
 
+  // AVX512 implicitly uses modulo rotation amounts.
   if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
     // Attempt to rotate by immediate.
-    APInt UndefElts;
-    SmallVector<APInt, 16> EltBits;
-    if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
-      if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
-            return EltBits[0] == V;
-          })) {
-        unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
-        uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
-        return DAG.getNode(Op, DL, VT, R,
-                           DAG.getConstant(RotateAmt, DL, MVT::i8));
-      }
+    if (0 <= CstSplatIndex) {
+      unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+      uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+      return DAG.getNode(Op, DL, VT, R,
+                         DAG.getConstant(RotateAmt, DL, MVT::i8));
     }
 
     // Else, fall-back on VPROLV/VPRORV.
@@ -23862,20 +24949,17 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
 
   // XOP has 128-bit vector variable + immediate rotates.
   // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
+  // XOP implicitly uses modulo rotation amounts.
   if (Subtarget.hasXOP()) {
-    // Split 256-bit integers.
     if (VT.is256BitVector())
-      return Lower256IntArith(Op, DAG);
+      return split256IntArith(Op, DAG);
     assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
 
     // Attempt to rotate by immediate.
-    if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
-      if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
-        uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
-        assert(RotateAmt < EltSizeInBits && "Rotation out of range");
-        return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
-                           DAG.getConstant(RotateAmt, DL, MVT::i8));
-      }
+    if (0 <= CstSplatIndex) {
+      uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+      return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
+                         DAG.getConstant(RotateAmt, DL, MVT::i8));
     }
 
     // Use general rotate by variable (per-element).
@@ -23884,7 +24968,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
 
   // Split 256-bit integers on pre-AVX2 targets.
   if (VT.is256BitVector() && !Subtarget.hasAVX2())
-    return Lower256IntArith(Op, DAG);
+    return split256IntArith(Op, DAG);
 
   assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
           ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
@@ -23892,44 +24976,19 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
          "Only vXi32/vXi16/vXi8 vector rotates supported");
 
   // Rotate by an uniform constant - expand back to shifts.
-  // TODO - legalizers should be able to handle this.
-  if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
-    if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
-      uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
-      assert(RotateAmt < EltSizeInBits && "Rotation out of range");
-      if (RotateAmt == 0)
-        return R;
-
-      SDValue AmtR = DAG.getConstant(EltSizeInBits - RotateAmt, DL, VT);
-      SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
-      SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
-      return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
-    }
-  }
+  if (0 <= CstSplatIndex)
+    return SDValue();
 
-  // Rotate by splat - expand back to shifts.
-  // TODO - legalizers should be able to handle this.
-  if ((EltSizeInBits >= 16 || Subtarget.hasBWI()) &&
-      IsSplatValue(VT, Amt, DL, DAG, Subtarget, Opcode)) {
-    SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
-    AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
-    SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
-    SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
-    return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
-  }
+  bool IsSplatAmt = DAG.isSplatValue(Amt);
 
   // v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by
   // the amount bit.
-  if (EltSizeInBits == 8) {
-    if (Subtarget.hasBWI()) {
-      SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
-      AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
-      SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
-      SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR);
-      return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
-    }
+  if (EltSizeInBits == 8 && !IsSplatAmt) {
+    if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()))
+      return SDValue();
 
-    MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+    // We don't need ModuloAmt here as we just peek at individual bits.
+    MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
 
     auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
       if (Subtarget.hasSSE41()) {
@@ -23943,7 +25002,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
       // On pre-SSE41 targets we test for the sign bit by comparing to
       // zero - a negative value will set all bits of the lanes to true
       // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
-      SDValue Z = getZeroVector(SelVT, Subtarget, DAG, DL);
+      SDValue Z = DAG.getConstant(0, DL, SelVT);
       SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel);
       return DAG.getSelect(DL, SelVT, C, V0, V1);
     };
@@ -23984,14 +25043,17 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
     return SignBitSelect(VT, Amt, M, R);
   }
 
+  // ISD::ROT* uses modulo rotate amounts.
+  Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
+                    DAG.getConstant(EltSizeInBits - 1, DL, VT));
+
   bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
   bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
                         SupportedVectorVarShift(VT, Subtarget, ISD::SRL);
 
-  // Best to fallback for all supported variable shifts.
-  // AVX2 - best to fallback for non-constants as well.
-  // TODO - legalizers should be able to handle this.
-  if (LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
+  // Fallback for splats + all supported variable shifts.
+  // Fallback for non-constants AVX2 vXi16 as well.
+  if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) {
     SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);
     AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);
     SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt);
@@ -24032,78 +25094,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
                      DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7}));
 }
 
-static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
-  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
-  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
-  // looks for this combo and may remove the "setcc" instruction if the "setcc"
-  // has only one use.
-  SDNode *N = Op.getNode();
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  unsigned BaseOp = 0;
-  X86::CondCode Cond;
-  SDLoc DL(Op);
-  switch (Op.getOpcode()) {
-  default: llvm_unreachable("Unknown ovf instruction!");
-  case ISD::SADDO:
-    // A subtract of one will be selected as a INC. Note that INC doesn't
-    // set CF, so we can't do this for UADDO.
-    if (isOneConstant(RHS)) {
-      BaseOp = X86ISD::INC;
-      Cond = X86::COND_O;
-      break;
-    }
-    BaseOp = X86ISD::ADD;
-    Cond = X86::COND_O;
-    break;
-  case ISD::UADDO:
-    BaseOp = X86ISD::ADD;
-    Cond = X86::COND_B;
-    break;
-  case ISD::SSUBO:
-    // A subtract of one will be selected as a DEC. Note that DEC doesn't
-    // set CF, so we can't do this for USUBO.
-    if (isOneConstant(RHS)) {
-      BaseOp = X86ISD::DEC;
-      Cond = X86::COND_O;
-      break;
-    }
-    BaseOp = X86ISD::SUB;
-    Cond = X86::COND_O;
-    break;
-  case ISD::USUBO:
-    BaseOp = X86ISD::SUB;
-    Cond = X86::COND_B;
-    break;
-  case ISD::SMULO:
-    BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
-    Cond = X86::COND_O;
-    break;
-  case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
-    if (N->getValueType(0) == MVT::i8) {
-      BaseOp = X86ISD::UMUL8;
-      Cond = X86::COND_O;
-      break;
-    }
-    SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
-                                 MVT::i32);
-    SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
-
-    SDValue SetCC = getSETCC(X86::COND_O, SDValue(Sum.getNode(), 2), DL, DAG);
-
-    return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
-  }
-  }
-
-  // Also sets EFLAGS.
-  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
-  SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
-
-  SDValue SetCC = getSETCC(Cond, SDValue(Sum.getNode(), 1), DL, DAG);
-
-  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
-}
-
 /// Returns true if the operand type is exactly twice the native width, and
 /// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
 /// Used to know whether to use cmpxchg8/16b when expanding atomic operations
@@ -24246,7 +25236,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 
     SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
+    SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
     SDValue Ops[] = {
       DAG.getRegister(X86::ESP, MVT::i32),     // Base
       DAG.getTargetConstant(1, dl, MVT::i8),   // Scale
@@ -24256,7 +25246,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
       Zero,
       Chain
     };
-    SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
+    SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, dl, MVT::Other, Ops);
     return SDValue(Res, 0);
   }
 
@@ -24369,40 +25359,32 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
   if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
       SrcVT == MVT::i64) {
     assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
-    if (DstVT != MVT::f64)
+    if (DstVT != MVT::f64 && DstVT != MVT::i64 &&
+        !(DstVT == MVT::x86mmx && SrcVT.isVector()))
       // This conversion needs to be expanded.
       return SDValue();
 
-    SmallVector<SDValue, 16> Elts;
     SDLoc dl(Op);
-    unsigned NumElts;
-    MVT SVT;
     if (SrcVT.isVector()) {
-      NumElts = SrcVT.getVectorNumElements();
-      SVT = SrcVT.getVectorElementType();
-
       // Widen the vector in input in the case of MVT::v2i32.
       // Example: from MVT::v2i32 to MVT::v4i32.
-      for (unsigned i = 0, e = NumElts; i != e; ++i)
-        Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Src,
-                                   DAG.getIntPtrConstant(i, dl)));
+      MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(),
+                                   SrcVT.getVectorNumElements() * 2);
+      Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src,
+                        DAG.getUNDEF(SrcVT));
     } else {
       assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
              "Unexpected source type in LowerBITCAST");
-      Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
-                                 DAG.getIntPtrConstant(0, dl)));
-      Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
-                                 DAG.getIntPtrConstant(1, dl)));
-      NumElts = 2;
-      SVT = MVT::i32;
-    }
-    // Explicitly mark the extra elements as Undef.
-    Elts.append(NumElts, DAG.getUNDEF(SVT));
-
-    EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
-    SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
-    SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
+      Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+    }
+
+    MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64;
+    Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src);
+
+    if (DstVT == MVT::x86mmx)
+      return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src);
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src,
                        DAG.getIntPtrConstant(0, dl));
   }
 
@@ -24445,7 +25427,7 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
   // PSADBW instruction horizontally add all bytes and leave the result in i64
   // chunks, thus directly computes the pop count for v2i64 and v4i64.
   if (EltVT == MVT::i64) {
-    SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+    SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT);
     MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
     V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
     return DAG.getBitcast(VT, V);
@@ -24457,13 +25439,13 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
     // this is that it lines up the results of two PSADBW instructions to be
     // two v2i64 vectors which concatenated are the 4 population counts. We can
     // then use PACKUSWB to shrink and concatenate them into a v4i32 again.
-    SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
+    SDValue Zeros = DAG.getConstant(0, DL, VT);
     SDValue V32 = DAG.getBitcast(VT, V);
-    SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
-    SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);
+    SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);
+    SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);
 
     // Do the horizontal sums into two v2i64s.
-    Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
+    Zeros = DAG.getConstant(0, DL, ByteVecVT);
     MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
     Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
                       DAG.getBitcast(ByteVecVT, Low), Zeros);
@@ -24498,7 +25480,9 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
                                         SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   MVT EltVT = VT.getVectorElementType();
-  unsigned VecSize = VT.getSizeInBits();
+  int NumElts = VT.getVectorNumElements();
+  (void)EltVT;
+  assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported.");
 
   // Implement a lookup table in register by using an algorithm based on:
   // http://wm.ite.pl/articles/sse-popcount.html
@@ -24510,109 +25494,30 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
   // masked out higher ones) for each byte. PSHUFB is used separately with both
   // to index the in-register table. Next, both are added and the result is a
   // i8 vector where each element contains the pop count for input byte.
-  //
-  // To obtain the pop count for elements != i8, we follow up with the same
-  // approach and use additional tricks as described below.
-  //
   const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
                        /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
                        /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
                        /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4};
 
-  int NumByteElts = VecSize / 8;
-  MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
-  SDValue In = DAG.getBitcast(ByteVecVT, Op);
   SmallVector<SDValue, 64> LUTVec;
-  for (int i = 0; i < NumByteElts; ++i)
+  for (int i = 0; i < NumElts; ++i)
     LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
-  SDValue InRegLUT = DAG.getBuildVector(ByteVecVT, DL, LUTVec);
-  SDValue M0F = DAG.getConstant(0x0F, DL, ByteVecVT);
+  SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec);
+  SDValue M0F = DAG.getConstant(0x0F, DL, VT);
 
   // High nibbles
-  SDValue FourV = DAG.getConstant(4, DL, ByteVecVT);
-  SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);
+  SDValue FourV = DAG.getConstant(4, DL, VT);
+  SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV);
 
   // Low nibbles
-  SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);
+  SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F);
 
   // The input vector is used as the shuffle mask that index elements into the
   // LUT. After counting low and high nibbles, add the vector to obtain the
   // final pop count per i8 element.
-  SDValue HighPopCnt =
-      DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
-  SDValue LowPopCnt =
-      DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
-  SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);
-
-  if (EltVT == MVT::i8)
-    return PopCnt;
-
-  return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
-}
-
-static SDValue LowerVectorCTPOPBitmath(SDValue Op, const SDLoc &DL,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
-  MVT VT = Op.getSimpleValueType();
-  assert(VT.is128BitVector() &&
-         "Only 128-bit vector bitmath lowering supported.");
-
-  int VecSize = VT.getSizeInBits();
-  MVT EltVT = VT.getVectorElementType();
-  int Len = EltVT.getSizeInBits();
-
-  // This is the vectorized version of the "best" algorithm from
-  // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-  // with a minor tweak to use a series of adds + shifts instead of vector
-  // multiplications. Implemented for all integer vector types. We only use
-  // this when we don't have SSSE3 which allows a LUT-based lowering that is
-  // much faster, even faster than using native popcnt instructions.
-
-  auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
-    MVT VT = V.getSimpleValueType();
-    SDValue ShifterV = DAG.getConstant(Shifter, DL, VT);
-    return DAG.getNode(OpCode, DL, VT, V, ShifterV);
-  };
-  auto GetMask = [&](SDValue V, APInt Mask) {
-    MVT VT = V.getSimpleValueType();
-    SDValue MaskV = DAG.getConstant(Mask, DL, VT);
-    return DAG.getNode(ISD::AND, DL, VT, V, MaskV);
-  };
-
-  // We don't want to incur the implicit masks required to SRL vNi8 vectors on
-  // x86, so set the SRL type to have elements at least i16 wide. This is
-  // correct because all of our SRLs are followed immediately by a mask anyways
-  // that handles any bits that sneak into the high bits of the byte elements.
-  MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);
-
-  SDValue V = Op;
-
-  // v = v - ((v >> 1) & 0x55555555...)
-  SDValue Srl =
-      DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
-  SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
-  V = DAG.getNode(ISD::SUB, DL, VT, V, And);
-
-  // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
-  SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
-  Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
-  SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
-  V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);
-
-  // v = (v + (v >> 4)) & 0x0F0F0F0F...
-  Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
-  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
-  V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));
-
-  // At this point, V contains the byte-wise population count, and we are
-  // merely doing a horizontal sum if necessary to get the wider element
-  // counts.
-  if (EltVT == MVT::i8)
-    return V;
-
-  return LowerHorizontalByteSum(
-      DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
-      DAG);
+  SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles);
+  SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles);
+  return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt);
 }
 
 // Please ensure that any codegen change from LowerVectorCTPOP is reflected in
@@ -24638,12 +25543,6 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
     }
   }
 
-  if (!Subtarget.hasSSSE3()) {
-    // We can't use the fast LUT approach, so fall back on vectorized bitmath.
-    assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
-    return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
-  }
-
   // Decompose 256-bit ops into smaller 128-bit ops.
   if (VT.is256BitVector() && !Subtarget.hasInt256())
     return Lower256IntUnary(Op, DAG);
@@ -24652,6 +25551,18 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
   if (VT.is512BitVector() && !Subtarget.hasBWI())
     return Lower512IntUnary(Op, DAG);
 
+  // For element types greater than i8, do vXi8 pop counts and a bytesum.
+  if (VT.getScalarType() != MVT::i8) {
+    MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+    SDValue ByteOp = DAG.getBitcast(ByteVT, Op0);
+    SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp);
+    return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG);
+  }
+
+  // We can't use the fast LUT approach, so fall back on LegalizeDAG.
+  if (!Subtarget.hasSSSE3())
+    return SDValue();
+
   return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
 }
 
@@ -24759,8 +25670,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
 }
 
 static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
-                                        const X86Subtarget &Subtarget,
-                                        bool AllowIncDec = true) {
+                                        const X86Subtarget &Subtarget) {
   unsigned NewOpc = 0;
   switch (N->getOpcode()) {
   case ISD::ATOMIC_LOAD_ADD:
@@ -24784,25 +25694,6 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
 
   MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
 
-  if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
-    // Convert to inc/dec if they aren't slow or we are optimizing for size.
-    if (AllowIncDec && (!Subtarget.slowIncDec() ||
-                        DAG.getMachineFunction().getFunction().optForSize())) {
-      if ((NewOpc == X86ISD::LADD && C->isOne()) ||
-          (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
-        return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
-                                       DAG.getVTList(MVT::i32, MVT::Other),
-                                       {N->getOperand(0), N->getOperand(1)},
-                                       /*MemVT=*/N->getSimpleValueType(0), MMO);
-      if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
-          (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
-        return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
-                                       DAG.getVTList(MVT::i32, MVT::Other),
-                                       {N->getOperand(0), N->getOperand(1)},
-                                       /*MemVT=*/N->getSimpleValueType(0), MMO);
-    }
-  }
-
   return DAG.getMemIntrinsicNode(
       NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
       {N->getOperand(0), N->getOperand(1), N->getOperand(2)},
@@ -25120,8 +26011,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
   // VLX the vector should be widened to 512 bit
   unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
   MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
-  SDValue Src0 = N->getSrc0();
-  Src0 = ExtendToType(Src0, WideDataVT, DAG);
+  SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG);
 
   // Mask element has to be i1.
   assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
@@ -25131,7 +26021,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
 
   Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
   SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
-                                      N->getBasePtr(), Mask, Src0,
+                                      N->getBasePtr(), Mask, PassThru,
                                       N->getMemoryVT(), N->getMemOperand(),
                                       N->getExtensionType(),
                                       N->isExpandingLoad());
@@ -25194,7 +26084,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
   MVT VT = Op.getSimpleValueType();
   SDValue Index = N->getIndex();
   SDValue Mask = N->getMask();
-  SDValue Src0 = N->getValue();
+  SDValue PassThru = N->getPassThru();
   MVT IndexVT = Index.getSimpleValueType();
   MVT MaskVT = Mask.getSimpleValueType();
 
@@ -25219,12 +26109,12 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
     MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
 
-    Src0 = ExtendToType(Src0, VT, DAG);
+    PassThru = ExtendToType(PassThru, VT, DAG);
     Index = ExtendToType(Index, IndexVT, DAG);
     Mask = ExtendToType(Mask, MaskVT, DAG, true);
   }
 
-  SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index,
+  SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
                     N->getScale() };
   SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
       DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(),
@@ -25308,6 +26198,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SHL_PARTS:
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:          return LowerShiftParts(Op, DAG);
+  case ISD::FSHL:
+  case ISD::FSHR:               return LowerFunnelShift(Op, Subtarget, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
@@ -25322,6 +26214,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
   case ISD::LOAD:               return LowerLoad(Op, Subtarget, DAG);
   case ISD::STORE:              return LowerStore(Op, Subtarget, DAG);
+  case ISD::FADD:
+  case ISD::FSUB:               return lowerFaddFsub(Op, DAG, Subtarget);
   case ISD::FABS:
   case ISD::FNEG:               return LowerFABSorFNEG(Op, DAG);
   case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
@@ -25354,12 +26248,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:    return LowerCTLZ(Op, Subtarget, DAG);
   case ISD::CTTZ:
-  case ISD::CTTZ_ZERO_UNDEF:    return LowerCTTZ(Op, DAG);
+  case ISD::CTTZ_ZERO_UNDEF:    return LowerCTTZ(Op, Subtarget, DAG);
   case ISD::MUL:                return LowerMUL(Op, Subtarget, DAG);
   case ISD::MULHS:
   case ISD::MULHU:              return LowerMULH(Op, Subtarget, DAG);
-  case ISD::UMUL_LOHI:
-  case ISD::SMUL_LOHI:          return LowerMUL_LOHI(Op, Subtarget, DAG);
   case ISD::ROTL:
   case ISD::ROTR:               return LowerRotate(Op, Subtarget, DAG);
   case ISD::SRA:
@@ -25376,12 +26268,16 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADDCARRY:
   case ISD::SUBCARRY:           return LowerADDSUBCARRY(Op, DAG);
   case ISD::ADD:
-  case ISD::SUB:                return LowerADD_SUB(Op, DAG);
+  case ISD::SUB:                return lowerAddSub(Op, DAG, Subtarget);
+  case ISD::UADDSAT:
+  case ISD::SADDSAT:
+  case ISD::USUBSAT:
+  case ISD::SSUBSAT:            return LowerADDSAT_SUBSAT(Op, DAG);
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
   case ISD::UMIN:               return LowerMINMAX(Op, DAG);
-  case ISD::ABS:                return LowerABS(Op, DAG);
+  case ISD::ABS:                return LowerABS(Op, Subtarget, DAG);
   case ISD::FSINCOS:            return LowerFSINCOS(Op, Subtarget, DAG);
   case ISD::MLOAD:              return LowerMLOAD(Op, Subtarget, DAG);
   case ISD::MSTORE:             return LowerMSTORE(Op, Subtarget, DAG);
@@ -25421,32 +26317,70 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
   SDLoc dl(N);
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Do not know how to custom type legalize this operation!");
+  case ISD::MUL: {
+    EVT VT = N->getValueType(0);
+    assert(VT.isVector() && "Unexpected VT");
+    if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger &&
+        VT.getVectorNumElements() == 2) {
+      // Promote to a pattern that will be turned into PMULUDQ.
+      SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+                               N->getOperand(0));
+      SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64,
+                               N->getOperand(1));
+      SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1);
+      Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul));
+    } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
+               VT.getVectorElementType() == MVT::i8) {
+      // Pre-promote these to vXi16 to avoid op legalization thinking all 16
+      // elements are needed.
+      MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
+      SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));
+      SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));
+      SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1);
+      Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+      unsigned NumConcats = 16 / VT.getVectorNumElements();
+      SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+      ConcatOps[0] = Res;
+      Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps);
+      Results.push_back(Res);
+    }
+    return;
+  }
+  case ISD::UADDSAT:
+  case ISD::SADDSAT:
+  case ISD::USUBSAT:
+  case ISD::SSUBSAT:
+  case X86ISD::VPMADDWD:
   case X86ISD::AVG: {
-    // Legalize types for X86ISD::AVG by expanding vectors.
+    // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
+    // X86ISD::AVG/VPMADDWD by widening.
     assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
 
-    auto InVT = N->getValueType(0);
-    assert(InVT.getSizeInBits() < 128);
-    assert(128 % InVT.getSizeInBits() == 0);
+    EVT VT = N->getValueType(0);
+    EVT InVT = N->getOperand(0).getValueType();
+    assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
+           "Expected a VT that divides into 128 bits.");
     unsigned NumConcat = 128 / InVT.getSizeInBits();
 
-    EVT RegVT = EVT::getVectorVT(*DAG.getContext(),
-                                 InVT.getVectorElementType(),
-                                 NumConcat * InVT.getVectorNumElements());
+    EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
+                                    InVT.getVectorElementType(),
+                                    NumConcat * InVT.getVectorNumElements());
+    EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+                                  VT.getVectorElementType(),
+                                  NumConcat * VT.getVectorNumElements());
 
     SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
     Ops[0] = N->getOperand(0);
-    SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+    SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
     Ops[0] = N->getOperand(1);
-    SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+    SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
 
-    SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
-    if (getTypeAction(*DAG.getContext(), InVT) != TypeWidenVector)
-      Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+    SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
+    if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+      Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
                         DAG.getIntPtrConstant(0, dl));
     Results.push_back(Res);
     return;
@@ -25456,7 +26390,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     // setCC result type is v2i1 because type legalzation will end up with
     // a v4i1 setcc plus an extend.
     assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type");
-    if (N->getOperand(0).getValueType() != MVT::v2f32)
+    if (N->getOperand(0).getValueType() != MVT::v2f32 ||
+        getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector)
       return;
     SDValue UNDEF = DAG.getUNDEF(MVT::v2f32);
     SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
@@ -25465,9 +26400,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                               N->getOperand(1), UNDEF);
     SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS,
                               N->getOperand(2));
-    if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
-      Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
-                        DAG.getIntPtrConstant(0, dl));
+    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+                      DAG.getIntPtrConstant(0, dl));
     Results.push_back(Res);
     return;
   }
@@ -25489,13 +26423,198 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::SREM:
-  case ISD::UREM:
+  case ISD::UREM: {
+    EVT VT = N->getValueType(0);
+    if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) {
+      // If this RHS is a constant splat vector we can widen this and let
+      // division/remainder by constant optimize it.
+      // TODO: Can we do something for non-splat?
+      APInt SplatVal;
+      if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) {
+        unsigned NumConcats = 128 / VT.getSizeInBits();
+        SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT));
+        Ops0[0] = N->getOperand(0);
+        EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT);
+        SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0);
+        SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT);
+        SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1);
+        Results.push_back(Res);
+      }
+      return;
+    }
+
+    if (VT == MVT::v2i32) {
+      // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the
+      // v2i64 and unroll later. But then we create i64 scalar ops which
+      // might be slow in 64-bit mode or require a libcall in 32-bit mode.
+      Results.push_back(DAG.UnrollVectorOp(N));
+      return;
+    }
+
+    if (VT.isVector())
+      return;
+
+    LLVM_FALLTHROUGH;
+  }
   case ISD::SDIVREM:
   case ISD::UDIVREM: {
     SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
     Results.push_back(V);
     return;
   }
+  case ISD::TRUNCATE: {
+    MVT VT = N->getSimpleValueType(0);
+    if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+      return;
+
+    // The generic legalizer will try to widen the input type to the same
+    // number of elements as the widened result type. But this isn't always
+    // the best thing so do some custom legalization to avoid some cases.
+    MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
+    SDValue In = N->getOperand(0);
+    EVT InVT = In.getValueType();
+
+    unsigned InBits = InVT.getSizeInBits();
+    if (128 % InBits == 0) {
+      // 128 bit and smaller inputs should avoid truncate all together and
+      // just use a build_vector that will become a shuffle.
+      // TODO: Widen and use a shuffle directly?
+      MVT InEltVT = InVT.getSimpleVT().getVectorElementType();
+      EVT EltVT = VT.getVectorElementType();
+      unsigned WidenNumElts = WidenVT.getVectorNumElements();
+      SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+      // Use the original element count so we don't do more scalar opts than
+      // necessary.
+      unsigned MinElts = VT.getVectorNumElements();
+      for (unsigned i=0; i < MinElts; ++i) {
+        SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In,
+                                  DAG.getIntPtrConstant(i, dl));
+        Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val);
+      }
+      Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops));
+      return;
+    }
+    // With AVX512 there are some cases that can use a target specific
+    // truncate node to go from 256/512 to less than 128 with zeros in the
+    // upper elements of the 128 bit result.
+    if (Subtarget.hasAVX512() && isTypeLegal(InVT)) {
+      // We can use VTRUNC directly if for 256 bits with VLX or for any 512.
+      if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) {
+        Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
+        return;
+      }
+      // There's one case we can widen to 512 bits and use VTRUNC.
+      if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) {
+        In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In,
+                         DAG.getUNDEF(MVT::v4i64));
+        Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In));
+        return;
+      }
+    }
+    return;
+  }
+  case ISD::SIGN_EXTEND_VECTOR_INREG: {
+    if (ExperimentalVectorWideningLegalization)
+      return;
+
+    EVT VT = N->getValueType(0);
+    SDValue In = N->getOperand(0);
+    EVT InVT = In.getValueType();
+    if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+        (InVT == MVT::v16i16 || InVT == MVT::v32i8)) {
+      // Custom split this so we can extend i8/i16->i32 invec. This is better
+      // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
+      // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
+      // we allow the sra from the extend to i32 to be shared by the split.
+      EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
+                                       InVT.getVectorElementType(),
+                                       InVT.getVectorNumElements() / 2);
+      MVT ExtendVT = MVT::getVectorVT(MVT::i32,
+                                      VT.getVectorNumElements());
+      In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT,
+                       In, DAG.getIntPtrConstant(0, dl));
+      In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In);
+
+      // Fill a vector with sign bits for each element.
+      SDValue Zero = DAG.getConstant(0, dl, ExtendVT);
+      SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT);
+
+      EVT LoVT, HiVT;
+      std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+      // Create an unpackl and unpackh to interleave the sign bits then bitcast
+      // to vXi64.
+      SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits);
+      Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+      SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits);
+      Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+
+      SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+      Results.push_back(Res);
+      return;
+    }
+    return;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND: {
+    if (!ExperimentalVectorWideningLegalization)
+      return;
+
+    EVT VT = N->getValueType(0);
+    SDValue In = N->getOperand(0);
+    EVT InVT = In.getValueType();
+    if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
+        (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
+      // Custom split this so we can extend i8/i16->i32 invec. This is better
+      // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
+      // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
+      // we allow the sra from the extend to i32 to be shared by the split.
+      In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
+
+      // Fill a vector with sign bits for each element.
+      SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+      SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT);
+
+      // Create an unpackl and unpackh to interleave the sign bits then bitcast
+      // to v2i64.
+      SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+                                        {0, 4, 1, 5});
+      Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo);
+      SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits,
+                                        {2, 6, 3, 7});
+      Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi);
+
+      SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+      Results.push_back(Res);
+      return;
+    }
+
+    if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
+      // Perform custom splitting instead of the two stage extend we would get
+      // by default.
+      EVT LoVT, HiVT;
+      std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+      assert(isTypeLegal(LoVT) && "Split VT not legal?");
+
+      bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND;
+
+      SDValue Lo = getExtendInVec(IsSigned, dl, LoVT, In, DAG);
+
+      // We need to shift the input over by half the number of elements.
+      unsigned NumElts = InVT.getVectorNumElements();
+      unsigned HalfNumElts = NumElts / 2;
+      SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef);
+      for (unsigned i = 0; i != HalfNumElts; ++i)
+        ShufMask[i] = i + HalfNumElts;
+
+      SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
+      Hi = getExtendInVec(IsSigned, dl, HiVT, Hi, DAG);
+
+      SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+      Results.push_back(Res);
+    }
+    return;
+  }
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT: {
     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
@@ -25503,38 +26622,90 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     SDValue Src = N->getOperand(0);
     EVT SrcVT = Src.getValueType();
 
+    // Promote these manually to avoid over promotion to v2i64. Type
+    // legalization will revisit the v2i32 operation for more cleanup.
+    if ((VT == MVT::v2i8 || VT == MVT::v2i16) &&
+        getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) {
+      // AVX512DQ provides instructions that produce a v2i64 result.
+      if (Subtarget.hasDQI())
+        return;
+
+      SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src);
+      Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+                                                          : ISD::AssertSext,
+                        dl, MVT::v2i32, Res,
+                        DAG.getValueType(VT.getVectorElementType()));
+      Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+      Results.push_back(Res);
+      return;
+    }
+
+    if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
+      if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+        return;
+
+      // Try to create a 128 bit vector, but don't exceed a 32 bit element.
+      unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
+      MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
+                                       VT.getVectorNumElements());
+      SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
+
+      // Preserve what we know about the size of the original result. Except
+      // when the result is v2i32 since we can't widen the assert.
+      if (PromoteVT != MVT::v2i32)
+        Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+                                                            : ISD::AssertSext,
+                          dl, PromoteVT, Res,
+                          DAG.getValueType(VT.getVectorElementType()));
+
+      // Truncate back to the original width.
+      Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+
+      // Now widen to 128 bits.
+      unsigned NumConcats = 128 / VT.getSizeInBits();
+      MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
+                                      VT.getVectorNumElements() * NumConcats);
+      SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));
+      ConcatOps[0] = Res;
+      Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+      Results.push_back(Res);
+      return;
+    }
+
+
     if (VT == MVT::v2i32) {
       assert((IsSigned || Subtarget.hasAVX512()) &&
              "Can only handle signed conversion without AVX512");
       assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+      bool Widenv2i32 =
+        getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector;
       if (Src.getValueType() == MVT::v2f64) {
-        MVT ResVT = MVT::v4i32;
         unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
         if (!IsSigned && !Subtarget.hasVLX()) {
-          // Widen to 512-bits.
-          ResVT = MVT::v8i32;
+          // If v2i32 is widened, we can defer to the generic legalizer.
+          if (Widenv2i32)
+            return;
+          // Custom widen by doubling to a legal vector with. Isel will
+          // further widen to v8f64.
           Opc = ISD::FP_TO_UINT;
-          Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
-                            DAG.getUNDEF(MVT::v8f64),
-                            Src, DAG.getIntPtrConstant(0, dl));
+          Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64,
+                            Src, DAG.getUNDEF(MVT::v2f64));
         }
-        SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
-        bool WidenType = getTypeAction(*DAG.getContext(),
-                                       MVT::v2i32) == TypeWidenVector;
-        ResVT = WidenType ? MVT::v4i32 : MVT::v2i32;
-        Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Res,
-                          DAG.getIntPtrConstant(0, dl));
+        SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
+        if (!Widenv2i32)
+          Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+                            DAG.getIntPtrConstant(0, dl));
         Results.push_back(Res);
         return;
       }
-      if (SrcVT == MVT::v2f32) {
+      if (SrcVT == MVT::v2f32 &&
+          getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
         SDValue Idx = DAG.getIntPtrConstant(0, dl);
         SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
                                   DAG.getUNDEF(MVT::v2f32));
         Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
                                    : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
-        if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
-          Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+        Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
         Results.push_back(Res);
         return;
       }
@@ -25610,7 +26781,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_ROUND: {
-    if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+    if (!isTypeLegal(N->getOperand(0).getValueType()))
         return;
     SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
     Results.push_back(V);
@@ -25780,29 +26951,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     }
 
     if (SrcVT != MVT::f64 ||
-        (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
+        (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) ||
+        getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector)
       return;
 
     unsigned NumElts = DstVT.getVectorNumElements();
     EVT SVT = DstVT.getVectorElementType();
     EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
-    SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
-                                   MVT::v2f64, N->getOperand(0));
-    SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);
-
-    if (getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) {
-      // If we are legalizing vectors by widening, we already have the desired
-      // legal vector type, just return it.
-      Results.push_back(ToVecInt);
-      return;
-    }
-
-    SmallVector<SDValue, 8> Elts;
-    for (unsigned i = 0, e = NumElts; i != e; ++i)
-      Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
-                                   ToVecInt, DAG.getIntPtrConstant(i, dl)));
-
-    Results.push_back(DAG.getBuildVector(DstVT, dl, Elts));
+    SDValue Res;
+    Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0));
+    Res = DAG.getBitcast(WiderVT, Res);
+    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
+                      DAG.getIntPtrConstant(0, dl));
+    Results.push_back(Res);
     return;
   }
   case ISD::MGATHER: {
@@ -25814,9 +26975,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
         return;
       SDValue Mask = Gather->getMask();
       assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
-      SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
-                                 Gather->getValue(),
-                                 DAG.getUNDEF(MVT::v2f32));
+      SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
+                                     Gather->getPassThru(),
+                                     DAG.getUNDEF(MVT::v2f32));
       if (!Subtarget.hasVLX()) {
         // We need to widen the mask, but the instruction will only use 2
         // of its elements. So we can use undef.
@@ -25824,8 +26985,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                            DAG.getUNDEF(MVT::v2i1));
         Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
       }
-      SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
-                        Index, Gather->getScale() };
+      SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+                        Gather->getBasePtr(), Index, Gather->getScale() };
       SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
         DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
         Gather->getMemoryVT(), Gather->getMemOperand());
@@ -25838,9 +26999,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       SDValue Index = Gather->getIndex();
       SDValue Mask = Gather->getMask();
       assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type");
-      SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
-                                 Gather->getValue(),
-                                 DAG.getUNDEF(MVT::v2i32));
+      SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32,
+                                     Gather->getPassThru(),
+                                     DAG.getUNDEF(MVT::v2i32));
       // If the index is v2i64 we can use it directly.
       if (Index.getValueType() == MVT::v2i64 &&
           (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
@@ -25851,8 +27012,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                              DAG.getUNDEF(MVT::v2i1));
           Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
         }
-        SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
-                          Index, Gather->getScale() };
+        SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+                          Gather->getBasePtr(), Index, Gather->getScale() };
         SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
           DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl,
           Gather->getMemoryVT(), Gather->getMemOperand());
@@ -25864,28 +27025,56 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
         Results.push_back(Chain);
         return;
       }
-      EVT IndexVT = Index.getValueType();
-      EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
-                                        IndexVT.getScalarType(), 4);
-      // Otherwise we need to custom widen everything to avoid promotion.
-      Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
-                          DAG.getUNDEF(IndexVT));
-      Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
-                         DAG.getConstant(0, dl, MVT::v2i1));
-      SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
-                        Index, Gather->getScale() };
-      SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
-                                        Gather->getMemoryVT(), dl, Ops,
-                                        Gather->getMemOperand());
-      SDValue Chain = Res.getValue(1);
-      if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
-        Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
-                          DAG.getIntPtrConstant(0, dl));
-      Results.push_back(Res);
-      Results.push_back(Chain);
-      return;
+      if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) {
+        EVT IndexVT = Index.getValueType();
+        EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(),
+                                          IndexVT.getScalarType(), 4);
+        // Otherwise we need to custom widen everything to avoid promotion.
+        Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index,
+                            DAG.getUNDEF(IndexVT));
+        Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
+                           DAG.getConstant(0, dl, MVT::v2i1));
+        SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
+                          Gather->getBasePtr(), Index, Gather->getScale() };
+        SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other),
+                                          Gather->getMemoryVT(), dl, Ops,
+                                          Gather->getMemOperand());
+        SDValue Chain = Res.getValue(1);
+        if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector)
+          Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res,
+                            DAG.getIntPtrConstant(0, dl));
+        Results.push_back(Res);
+        Results.push_back(Chain);
+        return;
+      }
     }
-    break;
+    return;
+  }
+  case ISD::LOAD: {
+    // Use an f64/i64 load and a scalar_to_vector for v2f32/v2i32 loads. This
+    // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
+    // cast since type legalization will try to use an i64 load.
+    MVT VT = N->getSimpleValueType(0);
+    assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
+    if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+      return;
+    if (!ISD::isNON_EXTLoad(N))
+      return;
+    auto *Ld = cast<LoadSDNode>(N);
+    MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+    SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+                              Ld->getPointerInfo(),
+                              Ld->getAlignment(),
+                              Ld->getMemOperand()->getFlags());
+    SDValue Chain = Res.getValue(1);
+    MVT WideVT = MVT::getVectorVT(LdVT, 2);
+    Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+    MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                  VT.getVectorNumElements() * 2);
+    Res = DAG.getBitcast(CastVT, Res);
+    Results.push_back(Res);
+    Results.push_back(Chain);
+    return;
   }
   }
 }
@@ -25943,9 +27132,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::BLENDI:             return "X86ISD::BLENDI";
-  case X86ISD::SHRUNKBLEND:        return "X86ISD::SHRUNKBLEND";
-  case X86ISD::ADDUS:              return "X86ISD::ADDUS";
-  case X86ISD::SUBUS:              return "X86ISD::SUBUS";
+  case X86ISD::BLENDV:             return "X86ISD::BLENDV";
   case X86ISD::HADD:               return "X86ISD::HADD";
   case X86ISD::HSUB:               return "X86ISD::HSUB";
   case X86ISD::FHADD:              return "X86ISD::FHADD";
@@ -25988,15 +27175,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::LOR:                return "X86ISD::LOR";
   case X86ISD::LXOR:               return "X86ISD::LXOR";
   case X86ISD::LAND:               return "X86ISD::LAND";
-  case X86ISD::LINC:               return "X86ISD::LINC";
-  case X86ISD::LDEC:               return "X86ISD::LDEC";
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
-  case X86ISD::VZEXT:              return "X86ISD::VZEXT";
-  case X86ISD::VSEXT:              return "X86ISD::VSEXT";
   case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
   case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
   case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
+  case X86ISD::VMTRUNC:            return "X86ISD::VMTRUNC";
+  case X86ISD::VMTRUNCS:           return "X86ISD::VMTRUNCS";
+  case X86ISD::VMTRUNCUS:          return "X86ISD::VMTRUNCUS";
   case X86ISD::VTRUNCSTORES:       return "X86ISD::VTRUNCSTORES";
   case X86ISD::VTRUNCSTOREUS:      return "X86ISD::VTRUNCSTOREUS";
   case X86ISD::VMTRUNCSTORES:      return "X86ISD::VMTRUNCSTORES";
@@ -26005,6 +27191,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VFPEXT_RND:         return "X86ISD::VFPEXT_RND";
   case X86ISD::VFPEXTS_RND:        return "X86ISD::VFPEXTS_RND";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
+  case X86ISD::VMFPROUND:          return "X86ISD::VMFPROUND";
   case X86ISD::VFPROUND_RND:       return "X86ISD::VFPROUND_RND";
   case X86ISD::VFPROUNDS_RND:      return "X86ISD::VFPROUNDS_RND";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
@@ -26029,16 +27216,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SBB:                return "X86ISD::SBB";
   case X86ISD::SMUL:               return "X86ISD::SMUL";
   case X86ISD::UMUL:               return "X86ISD::UMUL";
-  case X86ISD::SMUL8:              return "X86ISD::SMUL8";
-  case X86ISD::UMUL8:              return "X86ISD::UMUL8";
-  case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
-  case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
-  case X86ISD::INC:                return "X86ISD::INC";
-  case X86ISD::DEC:                return "X86ISD::DEC";
   case X86ISD::OR:                 return "X86ISD::OR";
   case X86ISD::XOR:                return "X86ISD::XOR";
   case X86ISD::AND:                return "X86ISD::AND";
   case X86ISD::BEXTR:              return "X86ISD::BEXTR";
+  case X86ISD::BZHI:               return "X86ISD::BZHI";
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
   case X86ISD::MOVMSK:             return "X86ISD::MOVMSK";
   case X86ISD::PTEST:              return "X86ISD::PTEST";
@@ -26136,7 +27318,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::XTEST:              return "X86ISD::XTEST";
   case X86ISD::COMPRESS:           return "X86ISD::COMPRESS";
   case X86ISD::EXPAND:             return "X86ISD::EXPAND";
-  case X86ISD::SELECT:             return "X86ISD::SELECT";
   case X86ISD::SELECTS:            return "X86ISD::SELECTS";
   case X86ISD::ADDSUB:             return "X86ISD::ADDSUB";
   case X86ISD::RCP14:              return "X86ISD::RCP14";
@@ -26162,16 +27343,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FGETEXPS_RND:       return "X86ISD::FGETEXPS_RND";
   case X86ISD::SCALEF:             return "X86ISD::SCALEF";
   case X86ISD::SCALEFS:            return "X86ISD::SCALEFS";
-  case X86ISD::ADDS:               return "X86ISD::ADDS";
-  case X86ISD::SUBS:               return "X86ISD::SUBS";
   case X86ISD::AVG:                return "X86ISD::AVG";
   case X86ISD::MULHRS:             return "X86ISD::MULHRS";
   case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
   case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
   case X86ISD::CVTTP2SI:           return "X86ISD::CVTTP2SI";
   case X86ISD::CVTTP2UI:           return "X86ISD::CVTTP2UI";
+  case X86ISD::MCVTTP2SI:          return "X86ISD::MCVTTP2SI";
+  case X86ISD::MCVTTP2UI:          return "X86ISD::MCVTTP2UI";
   case X86ISD::CVTTP2SI_RND:       return "X86ISD::CVTTP2SI_RND";
   case X86ISD::CVTTP2UI_RND:       return "X86ISD::CVTTP2UI_RND";
+  case X86ISD::CVTTS2SI:           return "X86ISD::CVTTS2SI";
+  case X86ISD::CVTTS2UI:           return "X86ISD::CVTTS2UI";
   case X86ISD::CVTTS2SI_RND:       return "X86ISD::CVTTS2SI_RND";
   case X86ISD::CVTTS2UI_RND:       return "X86ISD::CVTTS2UI_RND";
   case X86ISD::CVTSI2P:            return "X86ISD::CVTSI2P";
@@ -26182,12 +27365,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
   case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
   case X86ISD::CVTPS2PH:           return "X86ISD::CVTPS2PH";
+  case X86ISD::MCVTPS2PH:          return "X86ISD::MCVTPS2PH";
   case X86ISD::CVTPH2PS:           return "X86ISD::CVTPH2PS";
   case X86ISD::CVTPH2PS_RND:       return "X86ISD::CVTPH2PS_RND";
   case X86ISD::CVTP2SI:            return "X86ISD::CVTP2SI";
   case X86ISD::CVTP2UI:            return "X86ISD::CVTP2UI";
+  case X86ISD::MCVTP2SI:           return "X86ISD::MCVTP2SI";
+  case X86ISD::MCVTP2UI:           return "X86ISD::MCVTP2UI";
   case X86ISD::CVTP2SI_RND:        return "X86ISD::CVTP2SI_RND";
   case X86ISD::CVTP2UI_RND:        return "X86ISD::CVTP2UI_RND";
+  case X86ISD::CVTS2SI:            return "X86ISD::CVTS2SI";
+  case X86ISD::CVTS2UI:            return "X86ISD::CVTS2UI";
   case X86ISD::CVTS2SI_RND:        return "X86ISD::CVTS2SI_RND";
   case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
   case X86ISD::LWPINS:             return "X86ISD::LWPINS";
@@ -26321,6 +27509,10 @@ bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
   return isInt<32>(Imm);
 }
 
+bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
+  return isInt<32>(Imm);
+}
+
 bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;
@@ -26434,7 +27626,7 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
 
 bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
   // If the subtarget is using retpolines, we need to not generate jump tables.
-  if (Subtarget.useRetpoline())
+  if (Subtarget.useRetpolineIndirectBranches())
     return false;
 
   // Otherwise, fallback on the generic logic.
@@ -26633,8 +27825,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
 
   // Memory Reference
   assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+  SmallVector<MachineMemOperand *, 1> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
 
   // Machine Information
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -26732,7 +27924,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
         .add(Index)
         .addDisp(Disp, UseFPOffset ? 4 : 0)
         .add(Segment)
-        .setMemRefs(MMOBegin, MMOEnd);
+        .setMemRefs(MMOs);
 
     // Check if there is enough room left to pull this argument.
     BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -26757,7 +27949,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
         .add(Index)
         .addDisp(Disp, 16)
         .add(Segment)
-        .setMemRefs(MMOBegin, MMOEnd);
+        .setMemRefs(MMOs);
 
     // Zero-extend the offset
     unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -26785,7 +27977,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
         .addDisp(Disp, UseFPOffset ? 4 : 0)
         .add(Segment)
         .addReg(NextOffsetReg)
-        .setMemRefs(MMOBegin, MMOEnd);
+        .setMemRefs(MMOs);
 
     // Jump to endMBB
     BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -26804,7 +27996,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
       .add(Index)
       .addDisp(Disp, 8)
       .add(Segment)
-      .setMemRefs(MMOBegin, MMOEnd);
+      .setMemRefs(MMOs);
 
   // If we need to align it, do so. Otherwise, just copy the address
   // to OverflowDestReg.
@@ -26841,7 +28033,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
       .addDisp(Disp, 8)
       .add(Segment)
       .addReg(NextAddrReg)
-      .setMemRefs(MMOBegin, MMOEnd);
+      .setMemRefs(MMOs);
 
   // If we branched, emit the PHI to the front of endMBB.
   if (offsetMBB) {
@@ -26981,19 +28173,17 @@ static bool isCMOVPseudo(MachineInstr &MI) {
   case X86::CMOV_RFP32:
   case X86::CMOV_RFP64:
   case X86::CMOV_RFP80:
-  case X86::CMOV_V2F64:
-  case X86::CMOV_V2I64:
-  case X86::CMOV_V4F32:
-  case X86::CMOV_V4F64:
-  case X86::CMOV_V4I64:
-  case X86::CMOV_V16F32:
-  case X86::CMOV_V8F32:
-  case X86::CMOV_V8F64:
-  case X86::CMOV_V8I64:
-  case X86::CMOV_V8I1:
-  case X86::CMOV_V16I1:
-  case X86::CMOV_V32I1:
-  case X86::CMOV_V64I1:
+  case X86::CMOV_VR128:
+  case X86::CMOV_VR128X:
+  case X86::CMOV_VR256:
+  case X86::CMOV_VR256X:
+  case X86::CMOV_VR512:
+  case X86::CMOV_VK2:
+  case X86::CMOV_VK4:
+  case X86::CMOV_VK8:
+  case X86::CMOV_VK16:
+  case X86::CMOV_VK32:
+  case X86::CMOV_VK64:
     return true;
 
   default:
@@ -27815,8 +29005,8 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
   MachineInstrBuilder MIB;
 
   // Memory Reference.
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
 
   // Initialize a register with zero.
   MVT PVT = getPointerTy(MF->getDataLayout());
@@ -27845,7 +29035,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
       MIB.add(MI.getOperand(MemOpndSlot + i));
   }
   MIB.addReg(SSPCopyReg);
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
 }
 
 MachineBasicBlock *
@@ -27861,8 +29051,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   MachineFunction::iterator I = ++MBB->getIterator();
 
   // Memory Reference
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
 
   unsigned DstReg;
   unsigned MemOpndSlot = 0;
@@ -27956,7 +29146,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
     MIB.addReg(LabelReg);
   else
     MIB.addMBB(restoreMBB);
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
 
   if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
     emitSetJmpShadowStackFix(MI, thisMBB);
@@ -28017,8 +29207,8 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   // Memory Reference
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
 
   MVT PVT = getPointerTy(MF->getDataLayout());
   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -28100,12 +29290,16 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
   MachineInstrBuilder MIB =
       BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
     if (i == X86::AddrDisp)
-      MIB.addDisp(MI.getOperand(i), SPPOffset);
+      MIB.addDisp(MO, SPPOffset);
+    else if (MO.isReg()) // Don't add the whole operand, we don't want to
+                         // preserve kill flags.
+      MIB.addReg(MO.getReg());
     else
-      MIB.add(MI.getOperand(i));
+      MIB.add(MO);
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
 
   // Subtract the current SSP from the previous SSP.
   unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
@@ -28189,8 +29383,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   // Memory Reference
-  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
-  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
+                                           MI.memoperands_end());
 
   MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
@@ -28221,19 +29415,29 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
 
   // Reload FP
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
-  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.add(MI.getOperand(i));
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (MO.isReg()) // Don't add the whole operand, we don't want to
+                    // preserve kill flags.
+      MIB.addReg(MO.getReg());
+    else
+      MIB.add(MO);
+  }
+  MIB.setMemRefs(MMOs);
 
   // Reload IP
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
     if (i == X86::AddrDisp)
-      MIB.addDisp(MI.getOperand(i), LabelOffset);
+      MIB.addDisp(MO, LabelOffset);
+    else if (MO.isReg()) // Don't add the whole operand, we don't want to
+                         // preserve kill flags.
+      MIB.addReg(MO.getReg());
     else
-      MIB.add(MI.getOperand(i));
+      MIB.add(MO);
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
 
   // Reload SP
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
@@ -28241,9 +29445,10 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
     if (i == X86::AddrDisp)
       MIB.addDisp(MI.getOperand(i), SPOffset);
     else
-      MIB.add(MI.getOperand(i));
+      MIB.add(MI.getOperand(i)); // We can preserve the kill flags here, it's
+                                 // the last instruction of the expansion.
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
 
   // Jump
   BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
@@ -28562,26 +29767,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     return EmitLoweredTLSCall(MI, BB);
   case X86::CMOV_FR32:
   case X86::CMOV_FR64:
-  case X86::CMOV_F128:
   case X86::CMOV_GR8:
   case X86::CMOV_GR16:
   case X86::CMOV_GR32:
   case X86::CMOV_RFP32:
   case X86::CMOV_RFP64:
   case X86::CMOV_RFP80:
-  case X86::CMOV_V2F64:
-  case X86::CMOV_V2I64:
-  case X86::CMOV_V4F32:
-  case X86::CMOV_V4F64:
-  case X86::CMOV_V4I64:
-  case X86::CMOV_V16F32:
-  case X86::CMOV_V8F32:
-  case X86::CMOV_V8F64:
-  case X86::CMOV_V8I64:
-  case X86::CMOV_V8I1:
-  case X86::CMOV_V16I1:
-  case X86::CMOV_V32I1:
-  case X86::CMOV_V64I1:
+  case X86::CMOV_VR128:
+  case X86::CMOV_VR128X:
+  case X86::CMOV_VR256:
+  case X86::CMOV_VR256X:
+  case X86::CMOV_VR512:
+  case X86::CMOV_VK2:
+  case X86::CMOV_VK4:
+  case X86::CMOV_VK8:
+  case X86::CMOV_VK16:
+  case X86::CMOV_VK32:
+  case X86::CMOV_VK64:
     return EmitLoweredSelect(MI, BB);
 
   case X86::RDFLAGS32:
@@ -28890,11 +30092,12 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     EVT SrcVT = Src.getValueType();
     APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
                                             Op.getConstantOperandVal(1));
-    DAG.computeKnownBits(Src, Known, DemandedElt, Depth + 1);
+    Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
     Known = Known.zextOrTrunc(BitWidth);
     Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
     break;
   }
+  case X86ISD::VSRAI:
   case X86ISD::VSHLI:
   case X86ISD::VSRLI: {
     if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -28903,72 +30106,62 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
         break;
       }
 
-      DAG.computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
       unsigned ShAmt = ShiftImm->getZExtValue();
       if (Opc == X86ISD::VSHLI) {
         Known.Zero <<= ShAmt;
         Known.One <<= ShAmt;
         // Low bits are known zero.
         Known.Zero.setLowBits(ShAmt);
-      } else {
+      } else if (Opc == X86ISD::VSRLI) {
         Known.Zero.lshrInPlace(ShAmt);
         Known.One.lshrInPlace(ShAmt);
         // High bits are known zero.
         Known.Zero.setHighBits(ShAmt);
+      } else {
+        Known.Zero.ashrInPlace(ShAmt);
+        Known.One.ashrInPlace(ShAmt);
       }
     }
     break;
   }
   case X86ISD::PACKUS: {
     // PACKUS is just a truncation if the upper half is zero.
-    // TODO: Add DemandedElts support.
+    APInt DemandedLHS, DemandedRHS;
+    getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+    Known.One = APInt::getAllOnesValue(BitWidth * 2);
+    Known.Zero = APInt::getAllOnesValue(BitWidth * 2);
+
     KnownBits Known2;
-    DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
-    DAG.computeKnownBits(Op.getOperand(1), Known2, Depth + 1);
-    Known.One &= Known2.One;
-    Known.Zero &= Known2.Zero;
+    if (!!DemandedLHS) {
+      Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
+    }
+    if (!!DemandedRHS) {
+      Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
+    }
+
     if (Known.countMinLeadingZeros() < BitWidth)
       Known.resetAll();
     Known = Known.trunc(BitWidth);
     break;
   }
-  case X86ISD::VZEXT: {
-    // TODO: Add DemandedElts support.
-    SDValue N0 = Op.getOperand(0);
-    unsigned NumElts = VT.getVectorNumElements();
-
-    EVT SrcVT = N0.getValueType();
-    unsigned InNumElts = SrcVT.getVectorNumElements();
-    unsigned InBitWidth = SrcVT.getScalarSizeInBits();
-    assert(InNumElts >= NumElts && "Illegal VZEXT input");
-
-    Known = KnownBits(InBitWidth);
-    APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
-    DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
-    Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(InBitWidth);
-    break;
-  }
   case X86ISD::CMOV: {
-    DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
+    Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
     // If we don't know any bits, early out.
     if (Known.isUnknown())
       break;
-    KnownBits Known2;
-    DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
+    KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
     Known.Zero &= Known2.Zero;
     break;
   }
-  case X86ISD::UDIVREM8_ZEXT_HREG:
-    // TODO: Support more than just the zero extended bits?
-    if (Op.getResNo() != 1)
-      break;
-    // The remainder is zero extended.
-    Known.Zero.setBitsFrom(8);
-    break;
   }
 
   // Handle target shuffles.
@@ -29013,8 +30206,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
         for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) {
           if (!DemandedOps[i])
             continue;
-          KnownBits Known2;
-          DAG.computeKnownBits(Ops[i], Known2, DemandedOps[i], Depth + 1);
+          KnownBits Known2 =
+              DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1);
           Known.One &= Known2.One;
           Known.Zero &= Known2.Zero;
         }
@@ -29033,14 +30226,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
     // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
     return VTBits;
 
-  case X86ISD::VSEXT: {
-    // TODO: Add DemandedElts support.
-    SDValue Src = Op.getOperand(0);
-    unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
-    Tmp += VTBits - Src.getScalarValueSizeInBits();
-    return Tmp;
-  }
-
   case X86ISD::VTRUNC: {
     // TODO: Add DemandedElts support.
     SDValue Src = Op.getOperand(0);
@@ -29054,10 +30239,16 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
 
   case X86ISD::PACKSS: {
     // PACKSS is just a truncation if the sign bits extend to the packed size.
-    // TODO: Add DemandedElts support.
+    APInt DemandedLHS, DemandedRHS;
+    getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS,
+                        DemandedRHS);
+
     unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits();
-    unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
-    unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+    unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
+    if (!!DemandedLHS)
+      Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+    if (!!DemandedRHS)
+      Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
     unsigned Tmp = std::min(Tmp0, Tmp1);
     if (Tmp > (SrcBits - VTBits))
       return Tmp - (SrcBits - VTBits);
@@ -29099,12 +30290,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
     unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
     return std::min(Tmp0, Tmp1);
   }
-  case X86ISD::SDIVREM8_SEXT_HREG:
-    // TODO: Support more than just the sign extended bits?
-    if (Op.getResNo() != 1)
-      break;
-    // The remainder is sign extended.
-    return VTBits - 7;
   }
 
   // Fallback case.
@@ -29117,21 +30302,6 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
   return N;
 }
 
-/// Returns true (and the GlobalValue and the offset) if the node is a
-/// GlobalAddress + offset.
-bool X86TargetLowering::isGAPlusOffset(SDNode *N,
-                                       const GlobalValue* &GA,
-                                       int64_t &Offset) const {
-  if (N->getOpcode() == X86ISD::Wrapper) {
-    if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
-      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
-      Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
-      return true;
-    }
-  }
-  return TargetLowering::isGAPlusOffset(N, GA, Offset);
-}
-
 // Attempt to match a combined shuffle mask against supported unary shuffle
 // instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
@@ -29170,10 +30340,12 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
                                             MVT::getIntegerVT(MaskEltSize);
         SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
 
-        if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) {
+        if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
           V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
-          Shuffle = unsigned(X86ISD::VZEXT);
-        } else
+
+        if (SrcVT.getVectorNumElements() == NumDstElts)
+          Shuffle = unsigned(ISD::ZERO_EXTEND);
+        else
           Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
 
         DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
@@ -29430,9 +30602,10 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
     }
   }
 
-  // Attempt to match against either a unary or binary PACKSS/PACKUS shuffle.
-  // TODO add support for 256/512-bit types.
-  if ((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) {
+  // Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
+  if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) ||
+      ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) ||
+      ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) {
     if (matchVectorShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG,
                                    Subtarget)) {
       DstVT = MaskVT;
@@ -29622,7 +30795,8 @@ static bool matchBinaryPermuteVectorShuffle(
 /// instruction but should only be used to replace chains over a certain depth.
 static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
                                       ArrayRef<int> BaseMask, int Depth,
-                                      bool HasVariableMask, SelectionDAG &DAG,
+                                      bool HasVariableMask,
+                                      bool AllowVariableMask, SelectionDAG &DAG,
                                       const X86Subtarget &Subtarget) {
   assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
   assert((Inputs.size() == 1 || Inputs.size() == 2) &&
@@ -29835,7 +31009,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
 
   // Depth threshold above which we can efficiently use variable mask shuffles.
   int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
-  bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask;
+  AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
 
   bool MaskContainsZeros =
       any_of(Mask, [](int M) { return M == SM_SentinelZero; });
@@ -30169,7 +31343,8 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
 static SDValue combineX86ShufflesRecursively(
     ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
     ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
-    bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+    bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+    const X86Subtarget &Subtarget) {
   // Bound the depth of our recursive combine because this is ultimately
   // quadratic in nature.
   const unsigned MaxRecursionDepth = 8;
@@ -30195,30 +31370,36 @@ static SDValue combineX86ShufflesRecursively(
   if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
     return SDValue();
 
-  assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
+  // TODO - Add support for more than 2 inputs.
+  if (2 < OpInputs.size())
+    return SDValue();
+
   SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
   SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
 
   // Add the inputs to the Ops list, avoiding duplicates.
   SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
 
-  int InputIdx0 = -1, InputIdx1 = -1;
-  for (int i = 0, e = Ops.size(); i < e; ++i) {
-    SDValue BC = peekThroughBitcasts(Ops[i]);
-    if (Input0 && BC == peekThroughBitcasts(Input0))
-      InputIdx0 = i;
-    if (Input1 && BC == peekThroughBitcasts(Input1))
-      InputIdx1 = i;
-  }
+  auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
+    if (!Input)
+      return -1;
+    // Attempt to find an existing match.
+    SDValue InputBC = peekThroughBitcasts(Input);
+    for (int i = 0, e = Ops.size(); i < e; ++i)
+      if (InputBC == peekThroughBitcasts(Ops[i]))
+        return i;
+    // Match failed - should we replace an existing Op?
+    if (InsertionPoint >= 0) {
+      Ops[InsertionPoint] = Input;
+      return InsertionPoint;
+    }
+    // Add to the end of the Ops list.
+    Ops.push_back(Input);
+    return Ops.size() - 1;
+  };
 
-  if (Input0 && InputIdx0 < 0) {
-    InputIdx0 = SrcOpIndex;
-    Ops[SrcOpIndex] = Input0;
-  }
-  if (Input1 && InputIdx1 < 0) {
-    InputIdx1 = Ops.size();
-    Ops.push_back(Input1);
-  }
+  int InputIdx0 = AddOp(Input0, SrcOpIndex);
+  int InputIdx1 = AddOp(Input1, -1);
 
   assert(((RootMask.size() > OpMask.size() &&
            RootMask.size() % OpMask.size() == 0) ||
@@ -30324,18 +31505,23 @@ static SDValue combineX86ShufflesRecursively(
   CombinedNodes.push_back(Op.getNode());
 
   // See if we can recurse into each shuffle source op (if it's a target
-  // shuffle). The source op should only be combined if it either has a
-  // single use (i.e. current Op) or all its users have already been combined.
+  // shuffle). The source op should only be generally combined if it either has
+  // a single use (i.e. current Op) or all its users have already been combined,
+  // if not then we can still combine but should prevent generation of variable
+  // shuffles to avoid constant pool bloat.
   // Don't recurse if we already have more source ops than we can combine in
   // the remaining recursion depth.
   if (Ops.size() < (MaxRecursionDepth - Depth)) {
-    for (int i = 0, e = Ops.size(); i < e; ++i)
+    for (int i = 0, e = Ops.size(); i < e; ++i) {
+      bool AllowVar = false;
       if (Ops[i].getNode()->hasOneUse() ||
           SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
-        if (SDValue Res = combineX86ShufflesRecursively(
-                Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
-                DAG, Subtarget))
-          return Res;
+        AllowVar = AllowVariableMask;
+      if (SDValue Res = combineX86ShufflesRecursively(
+              Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
+              AllowVar, DAG, Subtarget))
+        return Res;
+    }
   }
 
   // Attempt to constant fold all of the constant source ops.
@@ -30365,8 +31551,8 @@ static SDValue combineX86ShufflesRecursively(
   }
 
   // Finally, try to combine into a single shuffle instruction.
-  return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
-                                Subtarget);
+  return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
+                                AllowVariableMask, DAG, Subtarget);
 }
 
 /// Get the PSHUF-style mask from PSHUF node.
@@ -30545,74 +31731,6 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
   return V;
 }
 
-/// Search for a combinable shuffle across a chain ending in pshuflw or
-/// pshufhw.
-///
-/// We walk up the chain, skipping shuffles of the other half and looking
-/// through shuffles which switch halves trying to find a shuffle of the same
-/// pair of dwords.
-static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
-                                        SelectionDAG &DAG,
-                                        TargetLowering::DAGCombinerInfo &DCI) {
-  assert(
-      (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
-      "Called with something other than an x86 128-bit half shuffle!");
-  SDLoc DL(N);
-  unsigned CombineOpcode = N.getOpcode();
-
-  // Walk up a single-use chain looking for a combinable shuffle.
-  SDValue V = N.getOperand(0);
-  for (; V.hasOneUse(); V = V.getOperand(0)) {
-    switch (V.getOpcode()) {
-    default:
-      return false; // Nothing combined!
-
-    case ISD::BITCAST:
-      // Skip bitcasts as we always know the type for the target specific
-      // instructions.
-      continue;
-
-    case X86ISD::PSHUFLW:
-    case X86ISD::PSHUFHW:
-      if (V.getOpcode() == CombineOpcode)
-        break;
-
-      // Other-half shuffles are no-ops.
-      continue;
-    }
-    // Break out of the loop if we break out of the switch.
-    break;
-  }
-
-  if (!V.hasOneUse())
-    // We fell out of the loop without finding a viable combining instruction.
-    return false;
-
-  // Combine away the bottom node as its shuffle will be accumulated into
-  // a preceding shuffle.
-  DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
-
-  // Record the old value.
-  SDValue Old = V;
-
-  // Merge this node's mask and our incoming mask (adjusted to account for all
-  // the pshufd instructions encountered).
-  SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
-  for (int &M : Mask)
-    M = VMask[M];
-  V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
-                  getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
-
-  // Check that the shuffles didn't cancel each other out. If not, we need to
-  // combine to the new one.
-  if (Old != V)
-    // Replace the combinable shuffle with the combined one, updating all users
-    // so that we re-evaluate the chain here.
-    DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
-
-  return true;
-}
-
 /// Try to combine x86 target specific shuffles.
 static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
@@ -30667,7 +31785,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
         DemandedMask[i] = i;
       if (SDValue Res = combineX86ShufflesRecursively(
               {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
-              /*HasVarMask*/ false, DAG, Subtarget))
+              /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
         return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
                            DAG.getBitcast(SrcVT, Res));
     }
@@ -30679,40 +31797,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
     Mask = getPSHUFShuffleMask(N);
     assert(Mask.size() == 4);
     break;
-  case X86ISD::UNPCKL: {
-    // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
-    // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
-    // moves upper half elements into the lower half part. For example:
-    //
-    // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
-    //     undef:v16i8
-    // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
-    //
-    // will be combined to:
-    //
-    // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
-
-    // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
-    // happen due to advanced instructions.
-    if (!VT.is128BitVector())
-      return SDValue();
-
-    auto Op0 = N.getOperand(0);
-    auto Op1 = N.getOperand(1);
-    if (Op0.isUndef() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE) {
-      ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
-
-      unsigned NumElts = VT.getVectorNumElements();
-      SmallVector<int, 8> ExpectedMask(NumElts, -1);
-      std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
-                NumElts / 2);
-
-      auto ShufOp = Op1.getOperand(0);
-      if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
-        return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
-    }
-    return SDValue();
-  }
   case X86ISD::MOVSD:
   case X86ISD::MOVSS: {
     SDValue N0 = N.getOperand(0);
@@ -30844,9 +31928,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
   case X86ISD::PSHUFHW:
     assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
 
-    if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
-      return SDValue(); // We combined away this shuffle, so we're done.
-
     // See if this reduces to a PSHUFD which is no more expensive and can
     // combine with more operations. Note that it has to at least flip the
     // dwords as otherwise it would have been removed as a no-op.
@@ -31286,13 +32367,404 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     // a particular chain.
     if (SDValue Res = combineX86ShufflesRecursively(
             {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, DAG, Subtarget))
+            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
       return Res;
+
+    // Simplify source operands based on shuffle mask.
+    // TODO - merge this into combineX86ShufflesRecursively.
+    APInt KnownUndef, KnownZero;
+    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+    if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI))
+      return SDValue(N, 0);
+  }
+
+  // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
+  // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
+  // FIXME: This can probably go away once we default to widening legalization.
+  if (Subtarget.hasSSE41() && VT == MVT::v4i32 &&
+      N->getOpcode() == ISD::VECTOR_SHUFFLE &&
+      N->getOperand(0).getOpcode() == ISD::BITCAST &&
+      N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) {
+    SDValue BC = N->getOperand(0);
+    SDValue MULUDQ = BC.getOperand(0);
+    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+    ArrayRef<int> Mask = SVOp->getMask();
+    if (BC.hasOneUse() && MULUDQ.hasOneUse() &&
+        Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) {
+      SDValue Op0 = MULUDQ.getOperand(0);
+      SDValue Op1 = MULUDQ.getOperand(1);
+      if (Op0.getOpcode() == ISD::BITCAST &&
+          Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
+          Op0.getOperand(0).getValueType() == MVT::v4i32) {
+        ShuffleVectorSDNode *SVOp0 =
+          cast<ShuffleVectorSDNode>(Op0.getOperand(0));
+        ArrayRef<int> Mask2 = SVOp0->getMask();
+        if (Mask2[0] == 0 && Mask2[1] == -1 &&
+            Mask2[2] == 1 && Mask2[3] == -1) {
+          Op0 = SVOp0->getOperand(0);
+          Op1 = DAG.getBitcast(MVT::v4i32, Op1);
+          Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask);
+          return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
+        }
+      }
+      if (Op1.getOpcode() == ISD::BITCAST &&
+          Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
+          Op1.getOperand(0).getValueType() == MVT::v4i32) {
+        ShuffleVectorSDNode *SVOp1 =
+          cast<ShuffleVectorSDNode>(Op1.getOperand(0));
+        ArrayRef<int> Mask2 = SVOp1->getMask();
+        if (Mask2[0] == 0 && Mask2[1] == -1 &&
+            Mask2[2] == 1 && Mask2[3] == -1) {
+          Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+          Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask);
+          Op1 = SVOp1->getOperand(0);
+          return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1);
+        }
+      }
+    }
   }
 
   return SDValue();
 }
 
+bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+    SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+    TargetLoweringOpt &TLO, unsigned Depth) const {
+  int NumElts = DemandedElts.getBitWidth();
+  unsigned Opc = Op.getOpcode();
+  EVT VT = Op.getValueType();
+
+  // Handle special case opcodes.
+  switch (Opc) {
+  case X86ISD::VSHL:
+  case X86ISD::VSRL:
+  case X86ISD::VSRA: {
+    // We only need the bottom 64-bits of the (128-bit) shift amount.
+    SDValue Amt = Op.getOperand(1);
+    MVT AmtVT = Amt.getSimpleValueType();
+    assert(AmtVT.is128BitVector() && "Unexpected value type");
+    APInt AmtUndef, AmtZero;
+    unsigned NumAmtElts = AmtVT.getVectorNumElements();
+    APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
+    if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
+                                   Depth + 1))
+      return true;
+    LLVM_FALLTHROUGH;
+  }
+  case X86ISD::VSHLI:
+  case X86ISD::VSRLI:
+  case X86ISD::VSRAI: {
+    SDValue Src = Op.getOperand(0);
+    APInt SrcUndef;
+    if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
+                                   Depth + 1))
+      return true;
+    // TODO convert SrcUndef to KnownUndef.
+    break;
+  }
+  case X86ISD::CVTSI2P:
+  case X86ISD::CVTUI2P: {
+    SDValue Src = Op.getOperand(0);
+    MVT SrcVT = Src.getSimpleValueType();
+    APInt SrcUndef, SrcZero;
+    APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+    if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+                                   Depth + 1))
+      return true;
+    break;
+  }
+  case X86ISD::PACKSS:
+  case X86ISD::PACKUS: {
+    APInt DemandedLHS, DemandedRHS;
+    getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+    APInt SrcUndef, SrcZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
+                                   SrcZero, TLO, Depth + 1))
+      return true;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
+                                   SrcZero, TLO, Depth + 1))
+      return true;
+    break;
+  }
+  case X86ISD::VBROADCAST: {
+    SDValue Src = Op.getOperand(0);
+    MVT SrcVT = Src.getSimpleValueType();
+    if (!SrcVT.isVector())
+      return false;
+    // Don't bother broadcasting if we just need the 0'th element.
+    if (DemandedElts == 1) {
+      if(Src.getValueType() != VT)
+        Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
+                             SDLoc(Op));
+      return TLO.CombineTo(Op, Src);
+    }
+    APInt SrcUndef, SrcZero;
+    APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
+    if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+                                   Depth + 1))
+      return true;
+    break;
+  }
+  case X86ISD::PSHUFB: {
+    // TODO - simplify other variable shuffle masks.
+    SDValue Mask = Op.getOperand(1);
+    APInt MaskUndef, MaskZero;
+    if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+                                   Depth + 1))
+      return true;
+    break;
+  }
+  }
+
+  // Simplify target shuffles.
+  if (!isTargetShuffle(Opc) || !VT.isSimple())
+    return false;
+
+  // Get target shuffle mask.
+  bool IsUnary;
+  SmallVector<int, 64> OpMask;
+  SmallVector<SDValue, 2> OpInputs;
+  if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs,
+                            OpMask, IsUnary))
+    return false;
+
+  // Shuffle inputs must be the same type as the result.
+  if (llvm::any_of(OpInputs,
+                   [VT](SDValue V) { return VT != V.getValueType(); }))
+    return false;
+
+  // Clear known elts that might have been set above.
+  KnownZero.clearAllBits();
+  KnownUndef.clearAllBits();
+
+  // Check if shuffle mask can be simplified to undef/zero/identity.
+  int NumSrcs = OpInputs.size();
+  for (int i = 0; i != NumElts; ++i) {
+    int &M = OpMask[i];
+    if (!DemandedElts[i])
+      M = SM_SentinelUndef;
+    else if (0 <= M && OpInputs[M / NumElts].isUndef())
+      M = SM_SentinelUndef;
+  }
+
+  if (isUndefInRange(OpMask, 0, NumElts)) {
+    KnownUndef.setAllBits();
+    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+  }
+  if (isUndefOrZeroInRange(OpMask, 0, NumElts)) {
+    KnownZero.setAllBits();
+    return TLO.CombineTo(
+        Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
+  }
+  for (int Src = 0; Src != NumSrcs; ++Src)
+    if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
+      return TLO.CombineTo(Op, OpInputs[Src]);
+
+  // Attempt to simplify inputs.
+  for (int Src = 0; Src != NumSrcs; ++Src) {
+    int Lo = Src * NumElts;
+    APInt SrcElts = APInt::getNullValue(NumElts);
+    for (int i = 0; i != NumElts; ++i)
+      if (DemandedElts[i]) {
+        int M = OpMask[i] - Lo;
+        if (0 <= M && M < NumElts)
+          SrcElts.setBit(M);
+      }
+
+    APInt SrcUndef, SrcZero;
+    if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
+                                   TLO, Depth + 1))
+      return true;
+  }
+
+  // Extract known zero/undef elements.
+  // TODO - Propagate input undef/zero elts.
+  for (int i = 0; i != NumElts; ++i) {
+    if (OpMask[i] == SM_SentinelUndef)
+      KnownUndef.setBit(i);
+    if (OpMask[i] == SM_SentinelZero)
+      KnownZero.setBit(i);
+  }
+
+  return false;
+}
+
+bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
+    SDValue Op, const APInt &OriginalDemandedBits,
+    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+    unsigned Depth) const {
+  EVT VT = Op.getValueType();
+  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
+  unsigned Opc = Op.getOpcode();
+  switch(Opc) {
+  case X86ISD::PMULDQ:
+  case X86ISD::PMULUDQ: {
+    // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
+    KnownBits KnownOp;
+    SDValue LHS = Op.getOperand(0);
+    SDValue RHS = Op.getOperand(1);
+    // FIXME: Can we bound this better?
+    APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+    if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+      return true;
+    if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+      return true;
+    break;
+  }
+  case X86ISD::VSHLI: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
+      if (ShiftImm->getAPIntValue().uge(BitWidth))
+        break;
+
+      unsigned ShAmt = ShiftImm->getZExtValue();
+      APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
+
+      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the bottom bits (which are shifted
+      // out) are never demanded.
+      if (Op0.getOpcode() == X86ISD::VSRLI &&
+          OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
+        if (auto *Shift2Imm = dyn_cast<ConstantSDNode>(Op0.getOperand(1))) {
+          if (Shift2Imm->getAPIntValue().ult(BitWidth)) {
+            int Diff = ShAmt - Shift2Imm->getZExtValue();
+            if (Diff == 0)
+              return TLO.CombineTo(Op, Op0.getOperand(0));
+
+            unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
+            SDValue NewShift = TLO.DAG.getNode(
+                NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
+                TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
+            return TLO.CombineTo(Op, NewShift);
+          }
+        }
+      }
+
+      if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+                               TLO, Depth + 1))
+        return true;
+
+      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+      Known.Zero <<= ShAmt;
+      Known.One <<= ShAmt;
+
+      // Low bits known zero.
+      Known.Zero.setLowBits(ShAmt);
+    }
+    break;
+  }
+  case X86ISD::VSRLI: {
+    if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      if (ShiftImm->getAPIntValue().uge(BitWidth))
+        break;
+
+      unsigned ShAmt = ShiftImm->getZExtValue();
+      APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+                               OriginalDemandedElts, Known, TLO, Depth + 1))
+        return true;
+
+      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+      Known.Zero.lshrInPlace(ShAmt);
+      Known.One.lshrInPlace(ShAmt);
+
+      // High bits known zero.
+      Known.Zero.setHighBits(ShAmt);
+    }
+    break;
+  }
+  case X86ISD::VSRAI: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
+      if (ShiftImm->getAPIntValue().uge(BitWidth))
+        break;
+
+      unsigned ShAmt = ShiftImm->getZExtValue();
+      APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+      // If we just want the sign bit then we don't need to shift it.
+      if (OriginalDemandedBits.isSignMask())
+        return TLO.CombineTo(Op, Op0);
+
+      // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
+      if (Op0.getOpcode() == X86ISD::VSHLI && Op1 == Op0.getOperand(1)) {
+        SDValue Op00 = Op0.getOperand(0);
+        unsigned NumSignBits =
+            TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts);
+        if (ShAmt < NumSignBits)
+          return TLO.CombineTo(Op, Op00);
+      }
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
+        DemandedMask.setSignBit();
+
+      if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
+                               TLO, Depth + 1))
+        return true;
+
+      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+      Known.Zero.lshrInPlace(ShAmt);
+      Known.One.lshrInPlace(ShAmt);
+
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (Known.Zero[BitWidth - ShAmt - 1] ||
+          OriginalDemandedBits.countLeadingZeros() >= ShAmt)
+        return TLO.CombineTo(
+            Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));
+
+      // High bits are known one.
+      if (Known.One[BitWidth - ShAmt - 1])
+        Known.One.setHighBits(ShAmt);
+    }
+    break;
+  }
+  case X86ISD::MOVMSK: {
+    SDValue Src = Op.getOperand(0);
+    MVT SrcVT = Src.getSimpleValueType();
+    unsigned SrcBits = SrcVT.getScalarSizeInBits();
+    unsigned NumElts = SrcVT.getVectorNumElements();
+
+    // If we don't need the sign bits at all just return zero.
+    if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+    // Only demand the vector elements of the sign bits we need.
+    APInt KnownUndef, KnownZero;
+    APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
+    if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+                                   TLO, Depth + 1))
+      return true;
+
+    Known.Zero = KnownZero.zextOrSelf(BitWidth);
+    Known.Zero.setHighBits(BitWidth - NumElts);
+
+    // MOVMSK only uses the MSB from each vector element.
+    KnownBits KnownSrc;
+    if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts,
+                             KnownSrc, TLO, Depth + 1))
+      return true;
+
+    if (KnownSrc.One[SrcBits - 1])
+      Known.One.setLowBits(NumElts);
+    else if (KnownSrc.Zero[SrcBits - 1])
+      Known.Zero.setLowBits(NumElts);
+    return false;
+  }
+  }
+
+  return TargetLowering::SimplifyDemandedBitsForTargetNode(
+      Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
+}
+
 /// Check if a vector extract from a target-specific shuffle of a load can be
 /// folded into a single element load.
 /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
@@ -31344,9 +32816,13 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
   if (Idx == SM_SentinelUndef)
     return DAG.getUNDEF(EltVT);
 
+  // Bail if any mask element is SM_SentinelZero - getVectorShuffle below
+  // won't handle it.
+  if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; }))
+    return SDValue();
+
   assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
-  SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0]
-                                         : ShuffleOps[1];
+  SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1];
 
   // If inputs to shuffle are the same for both ops, then allow 2 uses
   unsigned AllowedUses =
@@ -31407,9 +32883,18 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
   if (!VT.isScalarInteger() || !VecVT.isSimple())
     return SDValue();
 
+  // If the input is a truncate from v16i8 or v32i8 go ahead and use a
+  // movmskb even with avx512. This will be better than truncating to vXi1 and
+  // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
+  // vpcmpeqb/vpcmpgtb.
+  bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+                     (N0.getOperand(0).getValueType() == MVT::v16i8 ||
+                      N0.getOperand(0).getValueType() == MVT::v32i8 ||
+                      N0.getOperand(0).getValueType() == MVT::v64i8);
+
   // With AVX512 vxi1 types are legal and we prefer using k-regs.
   // MOVMSK is supported in SSE2 or later.
-  if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+  if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
     return SDValue();
 
   // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
@@ -31423,23 +32908,19 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
   // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
   // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
   MVT SExtVT;
-  MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
   switch (VecVT.getSimpleVT().SimpleTy) {
   default:
     return SDValue();
   case MVT::v2i1:
     SExtVT = MVT::v2i64;
-    FPCastVT = MVT::v2f64;
     break;
   case MVT::v4i1:
     SExtVT = MVT::v4i32;
-    FPCastVT = MVT::v4f32;
     // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
     // sign-extend to a 256-bit operation to avoid truncation.
     if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
         N0->getOperand(0).getValueType().is256BitVector()) {
       SExtVT = MVT::v4i64;
-      FPCastVT = MVT::v4f64;
     }
     break;
   case MVT::v8i1:
@@ -31453,7 +32934,6 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
         (N0->getOperand(0).getValueType().is256BitVector() ||
          N0->getOperand(0).getValueType().is512BitVector())) {
       SExtVT = MVT::v8i32;
-      FPCastVT = MVT::v8f32;
     }
     break;
   case MVT::v16i1:
@@ -31466,26 +32946,37 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
   case MVT::v32i1:
     SExtVT = MVT::v32i8;
     break;
+  case MVT::v64i1:
+    // If we have AVX512F, but not AVX512BW and the input is truncated from
+    // v64i8 checked earlier. Then split the input and make two pmovmskbs.
+    if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
+      SExtVT = MVT::v64i8;
+      break;
+    }
+    return SDValue();
   };
 
   SDLoc DL(BitCast);
-  SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
+  SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
 
-  if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
+  if (SExtVT == MVT::v64i8) {
+    SDValue Lo, Hi;
+    std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
+    Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
+    Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
+    Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
+    Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+                     DAG.getConstant(32, DL, MVT::i8));
+    V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+  } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
     V = getPMOVMSKB(DL, V, DAG, Subtarget);
-    return DAG.getZExtOrTrunc(V, DL, VT);
+  } else {
+    if (SExtVT == MVT::v8i16)
+      V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
+                      DAG.getUNDEF(MVT::v8i16));
+    V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
   }
-
-  if (SExtVT == MVT::v8i16) {
-    assert(16 == DAG.ComputeNumSignBits(V) && "Expected all/none bit vector");
-    V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
-                    DAG.getUNDEF(MVT::v8i16));
-  } else
-    assert(SExtVT.getScalarType() != MVT::i16 &&
-           "Vectors of i16 must be packed");
-  if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
-    V = DAG.getBitcast(FPCastVT, V);
-  V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
   return DAG.getZExtOrTrunc(V, DL, VT);
 }
 
@@ -31806,65 +33297,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-// Match a binop + shuffle pyramid that represents a horizontal reduction over
-// the elements of a vector.
-// Returns the vector that is being reduced on, or SDValue() if a reduction
-// was not matched.
-static SDValue matchBinOpReduction(SDNode *Extract, unsigned &BinOp,
-                                   ArrayRef<ISD::NodeType> CandidateBinOps) {
-  // The pattern must end in an extract from index 0.
-  if ((Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ||
-      !isNullConstant(Extract->getOperand(1)))
-    return SDValue();
-
-  SDValue Op = Extract->getOperand(0);
-  unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
-
-  // Match against one of the candidate binary ops.
-  if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
-        return Op.getOpcode() == unsigned(BinOp);
-      }))
-    return SDValue();
-
-  // At each stage, we're looking for something that looks like:
-  // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
-  //                    <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
-  //                               i32 undef, i32 undef, i32 undef, i32 undef>
-  // %a = binop <8 x i32> %op, %s
-  // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
-  // we expect something like:
-  // <4,5,6,7,u,u,u,u>
-  // <2,3,u,u,u,u,u,u>
-  // <1,u,u,u,u,u,u,u>
-  unsigned CandidateBinOp = Op.getOpcode();
-  for (unsigned i = 0; i < Stages; ++i) {
-    if (Op.getOpcode() != CandidateBinOp)
-      return SDValue();
-
-    ShuffleVectorSDNode *Shuffle =
-        dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0).getNode());
-    if (Shuffle) {
-      Op = Op.getOperand(1);
-    } else {
-      Shuffle = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1).getNode());
-      Op = Op.getOperand(0);
-    }
-
-    // The first operand of the shuffle should be the same as the other operand
-    // of the binop.
-    if (!Shuffle || Shuffle->getOperand(0) != Op)
-      return SDValue();
-
-    // Verify the shuffle has the expected (at this stage of the pyramid) mask.
-    for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
-      if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
-        return SDValue();
-  }
-
-  BinOp = CandidateBinOp;
-  return Op;
-}
-
 // Given a select, detect the following pattern:
 // 1:    %2 = zext <N x i8> %0 to <N x i32>
 // 2:    %3 = zext <N x i8> %1 to <N x i32>
@@ -31979,8 +33411,8 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
     return SDValue();
 
   // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
-  unsigned BinOp;
-  SDValue Src = matchBinOpReduction(
+  ISD::NodeType BinOp;
+  SDValue Src = DAG.matchBinOpReduction(
       Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
   if (!Src)
     return SDValue();
@@ -32027,7 +33459,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
   // ready for the PHMINPOS.
   if (ExtractVT == MVT::i8) {
     SDValue Upper = DAG.getVectorShuffle(
-        SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
+        SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8),
         {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
     MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
   }
@@ -32059,8 +33491,8 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
     return SDValue();
 
   // Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
-  unsigned BinOp = 0;
-  SDValue Match = matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
+  ISD::NodeType BinOp;
+  SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
   if (!Match)
     return SDValue();
 
@@ -32142,8 +33574,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
     return SDValue();
 
   // Match shuffle + add pyramid.
-  unsigned BinOp = 0;
-  SDValue Root = matchBinOpReduction(Extract, BinOp, {ISD::ADD});
+  ISD::NodeType BinOp;
+  SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});
 
   // The operand is expected to be zero extended from i8
   // (verified in detectZextAbsDiff).
@@ -32238,6 +33670,15 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
       scaleShuffleMask<int>(Scale, Mask, ScaledMask);
       Mask = std::move(ScaledMask);
     } else if ((Mask.size() % NumSrcElts) == 0) {
+      // Simplify Mask based on demanded element.
+      int ExtractIdx = (int)N->getConstantOperandVal(1);
+      int Scale = Mask.size() / NumSrcElts;
+      int Lo = Scale * ExtractIdx;
+      int Hi = Scale * (ExtractIdx + 1);
+      for (int i = 0, e = (int)Mask.size(); i != e; ++i)
+        if (i < Lo || Hi <= i)
+          Mask[i] = SM_SentinelUndef;
+
       SmallVector<int, 16> WidenedMask;
       while (Mask.size() > NumSrcElts &&
              canWidenShuffleElements(Mask, WidenedMask))
@@ -32532,11 +33973,14 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
 /// If this is a *dynamic* select (non-constant condition) and we can match
 /// this node with one of the variable blend instructions, restructure the
 /// condition so that blends can use the high (sign) bit of each element.
-static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
+/// This function will also call SimplfiyDemandedBits on already created
+/// BLENDV to perform additional simplifications.
+static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
                                            TargetLowering::DAGCombinerInfo &DCI,
                                            const X86Subtarget &Subtarget) {
   SDValue Cond = N->getOperand(0);
-  if (N->getOpcode() != ISD::VSELECT ||
+  if ((N->getOpcode() != ISD::VSELECT &&
+       N->getOpcode() != X86ISD::BLENDV) ||
       ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
     return SDValue();
 
@@ -32578,7 +34022,9 @@ static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
   // TODO: Add other opcodes eventually lowered into BLEND.
   for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
        UI != UE; ++UI)
-    if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0)
+    if ((UI->getOpcode() != ISD::VSELECT &&
+         UI->getOpcode() != X86ISD::BLENDV) ||
+        UI.getOperandNo() != 0)
       return SDValue();
 
   APInt DemandedMask(APInt::getSignMask(BitWidth));
@@ -32594,9 +34040,13 @@ static SDValue combineVSelectToShrunkBlend(SDNode *N, SelectionDAG &DAG,
   // optimizations as we messed with the actual expectation for the vector
   // boolean values.
   for (SDNode *U : Cond->uses()) {
-    SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U), U->getValueType(0),
+    if (U->getOpcode() == X86ISD::BLENDV)
+      continue;
+
+    SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),
                              Cond, U->getOperand(1), U->getOperand(2));
     DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
+    DCI.AddToWorklist(U);
   }
   DCI.CommitTargetLoweringOpt(TLO);
   return SDValue(N, 0);
@@ -32608,9 +34058,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
                              const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   SDValue Cond = N->getOperand(0);
-  // Get the LHS/RHS of the select.
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
+
+  // Try simplification again because we use this function to optimize
+  // BLENDV nodes that are not handled by the generic combiner.
+  if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS))
+    return V;
+
   EVT VT = LHS.getValueType();
   EVT CondVT = Cond.getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -32618,18 +34073,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   // Convert vselects with constant condition into shuffles.
   if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
       DCI.isBeforeLegalizeOps()) {
-    SmallVector<int, 64> Mask(VT.getVectorNumElements(), -1);
-    for (int i = 0, Size = Mask.size(); i != Size; ++i) {
-      SDValue CondElt = Cond->getOperand(i);
-      Mask[i] = i;
-      // Arbitrarily choose from the 2nd operand if the select condition element
-      // is undef.
-      // TODO: Can we do better by matching patterns such as even/odd?
-      if (CondElt.isUndef() || isNullConstant(CondElt))
-        Mask[i] += Size;
-    }
-
-    return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
+    SmallVector<int, 64> Mask;
+    if (createShuffleMaskFromVSELECT(Mask, Cond))
+      return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
   }
 
   // If we have SSE[12] support, try to form min/max nodes. SSE min/max
@@ -32814,7 +34260,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   // Since SKX these selects have a proper lowering.
   if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
       CondVT.getVectorElementType() == MVT::i1 &&
-      VT.getVectorNumElements() > 4 &&
+      (ExperimentalVectorWideningLegalization ||
+       VT.getVectorNumElements() > 4) &&
       (VT.getVectorElementType() == MVT::i8 ||
        VT.getVectorElementType() == MVT::i16)) {
     Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -32855,15 +34302,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Early exit check
-  if (!TLI.isTypeLegal(VT))
-    return SDValue();
-
   // Match VSELECTs into subs with unsigned saturation.
   if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
-      // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
-      ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
-       (Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+      // psubus is available in SSE2 for i8 and i16 vectors.
+      Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
+      isPowerOf2_32(VT.getVectorNumElements()) &&
+      (VT.getVectorElementType() == MVT::i8 ||
+       VT.getVectorElementType() == MVT::i16)) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
     // Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -32877,37 +34322,31 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     }
 
     if (Other.getNode() && Other->getNumOperands() == 2 &&
-        DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+        Other->getOperand(0) == Cond.getOperand(0)) {
       SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
       SDValue CondRHS = Cond->getOperand(1);
 
-      auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
-                             ArrayRef<SDValue> Ops) {
-        return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
-      };
-
       // Look for a general sub with unsigned saturation first.
       // x >= y ? x-y : 0 --> subus x, y
       // x >  y ? x-y : 0 --> subus x, y
       if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
-          Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
-        return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
-                                SUBUSBuilder);
+          Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
+        return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
 
-      if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
+      if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
         if (isa<BuildVectorSDNode>(CondRHS)) {
           // If the RHS is a constant we have to reverse the const
           // canonicalization.
           // x > C-1 ? x+-C : 0 --> subus x, C
-          auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+          // TODO: Handle build_vectors with undef elements.
+          auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
             return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
           };
           if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
-              ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) {
+              ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
             OpRHS = DAG.getNode(ISD::SUB, DL, VT,
                                 DAG.getConstant(0, DL, VT), OpRHS);
-            return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
-                                    SUBUSBuilder);
+            return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
           }
 
           // Another special case: If C was a sign bit, the sub has been
@@ -32915,24 +34354,82 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
           // FIXME: Would it be better to use computeKnownBits to determine
           //        whether it's safe to decanonicalize the xor?
           // x s< 0 ? x^C : 0 --> subus x, C
-          if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode())
+          if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
             if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
                 ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
                 OpRHSConst->getAPIntValue().isSignMask()) {
-              OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
               // Note that we have to rebuild the RHS constant here to ensure we
               // don't rely on particular values of undef lanes.
-              return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
-                                      SUBUSBuilder);
+              OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+              return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
             }
+          }
         }
+      }
+    }
+  }
+
+  // Match VSELECTs into add with unsigned saturation.
+  if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+      // paddus is available in SSE2 for i8 and i16 vectors.
+      Subtarget.hasSSE2() && VT.getVectorNumElements() >= 2 &&
+      isPowerOf2_32(VT.getVectorNumElements()) &&
+      (VT.getVectorElementType() == MVT::i8 ||
+       VT.getVectorElementType() == MVT::i16)) {
+    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+    SDValue CondLHS = Cond->getOperand(0);
+    SDValue CondRHS = Cond->getOperand(1);
+
+    // Check if one of the arms of the VSELECT is vector with all bits set.
+    // If it's on the left side invert the predicate to simplify logic below.
+    SDValue Other;
+    if (ISD::isBuildVectorAllOnes(LHS.getNode())) {
+      Other = RHS;
+      CC = ISD::getSetCCInverse(CC, true);
+    } else if (ISD::isBuildVectorAllOnes(RHS.getNode())) {
+      Other = LHS;
+    }
+
+    if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
+      SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+      // Canonicalize condition operands.
+      if (CC == ISD::SETUGE) {
+        std::swap(CondLHS, CondRHS);
+        CC = ISD::SETULE;
+      }
+
+      // We can test against either of the addition operands.
+      // x <= x+y ? x+y : ~0 --> addus x, y
+      // x+y >= x ? x+y : ~0 --> addus x, y
+      if (CC == ISD::SETULE && Other == CondRHS &&
+          (OpLHS == CondLHS || OpRHS == CondLHS))
+        return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+      if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+          CondLHS == OpLHS) {
+        // If the RHS is a constant we have to reverse the const
+        // canonicalization.
+        // x > ~C ? x+C : ~0 --> addus x, C
+        auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+          return Cond->getAPIntValue() == ~Op->getAPIntValue();
+        };
+        if (CC == ISD::SETULE &&
+            ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+          return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+      }
     }
   }
 
+  // Early exit check
+  if (!TLI.isTypeLegal(VT))
+    return SDValue();
+
   if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
     return V;
 
-  if (SDValue V = combineVSelectToShrunkBlend(N, DAG, DCI, Subtarget))
+  if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget))
     return V;
 
   // Custom action for SELECT MMX
@@ -33014,16 +34511,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
         /*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
         /*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
         AN->getMemOperand());
-    // If the comparision uses the CF flag we can't use INC/DEC instructions.
-    bool NeedCF = false;
-    switch (CC) {
-    default: break;
-    case X86::COND_A: case X86::COND_AE:
-    case X86::COND_B: case X86::COND_BE:
-      NeedCF = true;
-      break;
-    }
-    auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
+    auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
     DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
                                   DAG.getUNDEF(CmpLHS.getValueType()));
     DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
@@ -33453,10 +34941,13 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
     SDValue Add = TrueOp;
     SDValue Const = FalseOp;
     // Canonicalize the condition code for easier matching and output.
-    if (CC == X86::COND_E) {
+    if (CC == X86::COND_E)
       std::swap(Add, Const);
-      CC = X86::COND_NE;
-    }
+
+    // We might have replaced the constant in the cmov with the LHS of the
+    // compare. If so change it to the RHS of the compare.
+    if (Const == Cond.getOperand(0))
+      Const = Cond.getOperand(1);
 
     // Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant.
     if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD &&
@@ -33468,7 +34959,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
       // This should constant fold.
       SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));
       SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),
-                                 DAG.getConstant(CC, DL, MVT::i8), Cond);
+                                 DAG.getConstant(X86::COND_NE, DL, MVT::i8),
+                                 Cond);
       return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));
     }
   }
@@ -33490,40 +34982,8 @@ static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
   for (unsigned i = 0; i < 2; i++) {
     SDValue Opd = N->getOperand(i);
 
-    // DAG.ComputeNumSignBits return 1 for ISD::ANY_EXTEND, so we need to
-    // compute signbits for it separately.
-    if (Opd.getOpcode() == ISD::ANY_EXTEND) {
-      // For anyextend, it is safe to assume an appropriate number of leading
-      // sign/zero bits.
-      if (Opd.getOperand(0).getValueType().getVectorElementType() == MVT::i8)
-        SignBits[i] = 25;
-      else if (Opd.getOperand(0).getValueType().getVectorElementType() ==
-               MVT::i16)
-        SignBits[i] = 17;
-      else
-        return false;
-      IsPositive[i] = true;
-    } else if (Opd.getOpcode() == ISD::BUILD_VECTOR) {
-      // All the operands of BUILD_VECTOR need to be int constant.
-      // Find the smallest value range which all the operands belong to.
-      SignBits[i] = 32;
-      IsPositive[i] = true;
-      for (const SDValue &SubOp : Opd.getNode()->op_values()) {
-        if (SubOp.isUndef())
-          continue;
-        auto *CN = dyn_cast<ConstantSDNode>(SubOp);
-        if (!CN)
-          return false;
-        APInt IntVal = CN->getAPIntValue();
-        if (IntVal.isNegative())
-          IsPositive[i] = false;
-        SignBits[i] = std::min(SignBits[i], IntVal.getNumSignBits());
-      }
-    } else {
-      SignBits[i] = DAG.ComputeNumSignBits(Opd);
-      if (Opd.getOpcode() == ISD::ZERO_EXTEND)
-        IsPositive[i] = true;
-    }
+    SignBits[i] = DAG.ComputeNumSignBits(Opd);
+    IsPositive[i] = DAG.SignBitIsZero(Opd);
   }
 
   bool AllPositive = IsPositive[0] && IsPositive[1];
@@ -33608,90 +35068,90 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
   SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
   SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
 
-  if (NumElts >= OpsVT.getVectorNumElements()) {
+  if (ExperimentalVectorWideningLegalization ||
+      NumElts >= OpsVT.getVectorNumElements()) {
     // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
     // lower part is needed.
     SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
-    if (Mode == MULU8 || Mode == MULS8) {
+    if (Mode == MULU8 || Mode == MULS8)
       return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
                          DL, VT, MulLo);
-    } else {
-      MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
-      // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
-      // the higher part is also needed.
-      SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
-                                  ReducedVT, NewN0, NewN1);
-
-      // Repack the lower part and higher part result of mul into a wider
-      // result.
-      // Generate shuffle functioning as punpcklwd.
-      SmallVector<int, 16> ShuffleMask(NumElts);
-      for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
-        ShuffleMask[2 * i] = i;
-        ShuffleMask[2 * i + 1] = i + NumElts;
-      }
-      SDValue ResLo =
-          DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
-      ResLo = DAG.getBitcast(ResVT, ResLo);
-      // Generate shuffle functioning as punpckhwd.
-      for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
-        ShuffleMask[2 * i] = i + NumElts / 2;
-        ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
-      }
-      SDValue ResHi =
-          DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
-      ResHi = DAG.getBitcast(ResVT, ResHi);
-      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
-    }
-  } else {
-    // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
-    // to legalize the mul explicitly because implicit legalization for type
-    // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
-    // instructions which will not exist when we explicitly legalize it by
-    // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
-    // <4 x i16> undef).
-    //
-    // Legalize the operands of mul.
-    // FIXME: We may be able to handle non-concatenated vectors by insertion.
-    unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
-    if ((RegSize % ReducedSizeInBits) != 0)
-      return SDValue();
 
-    SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
-                                 DAG.getUNDEF(ReducedVT));
-    Ops[0] = NewN0;
-    NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
-    Ops[0] = NewN1;
-    NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
-
-    if (Mode == MULU8 || Mode == MULS8) {
-      // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
-      // part is needed.
-      SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
-
-      // convert the type of mul result to VT.
-      MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
-      SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
-                                              : ISD::SIGN_EXTEND_VECTOR_INREG,
-                                DL, ResVT, Mul);
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
-                         DAG.getIntPtrConstant(0, DL));
-    } else {
-      // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
-      // MULU16/MULS16, both parts are needed.
-      SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
-      SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
-                                  OpsVT, NewN0, NewN1);
-
-      // Repack the lower part and higher part result of mul into a wider
-      // result. Make sure the type of mul result is VT.
-      MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
-      SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
-      Res = DAG.getBitcast(ResVT, Res);
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
-                         DAG.getIntPtrConstant(0, DL));
-    }
+    MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
+    // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
+    // the higher part is also needed.
+    SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+                                ReducedVT, NewN0, NewN1);
+
+    // Repack the lower part and higher part result of mul into a wider
+    // result.
+    // Generate shuffle functioning as punpcklwd.
+    SmallVector<int, 16> ShuffleMask(NumElts);
+    for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+      ShuffleMask[2 * i] = i;
+      ShuffleMask[2 * i + 1] = i + NumElts;
+    }
+    SDValue ResLo =
+        DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+    ResLo = DAG.getBitcast(ResVT, ResLo);
+    // Generate shuffle functioning as punpckhwd.
+    for (unsigned i = 0, e = NumElts / 2; i < e; i++) {
+      ShuffleMask[2 * i] = i + NumElts / 2;
+      ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2;
+    }
+    SDValue ResHi =
+        DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
+    ResHi = DAG.getBitcast(ResVT, ResHi);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
+  }
+
+  // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
+  // to legalize the mul explicitly because implicit legalization for type
+  // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
+  // instructions which will not exist when we explicitly legalize it by
+  // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
+  // <4 x i16> undef).
+  //
+  // Legalize the operands of mul.
+  // FIXME: We may be able to handle non-concatenated vectors by insertion.
+  unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
+  if ((RegSize % ReducedSizeInBits) != 0)
+    return SDValue();
+
+  SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
+                               DAG.getUNDEF(ReducedVT));
+  Ops[0] = NewN0;
+  NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
+  Ops[0] = NewN1;
+  NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
+
+  if (Mode == MULU8 || Mode == MULS8) {
+    // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
+    // part is needed.
+    SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+
+    // convert the type of mul result to VT.
+    MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+    SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
+                                            : ISD::SIGN_EXTEND_VECTOR_INREG,
+                              DL, ResVT, Mul);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+                       DAG.getIntPtrConstant(0, DL));
   }
+
+  // Generate the lower and higher part of mul: pmulhw/pmulhuw. For
+  // MULU16/MULS16, both parts are needed.
+  SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
+  SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
+                              OpsVT, NewN0, NewN1);
+
+  // Repack the lower part and higher part result of mul into a wider
+  // result. Make sure the type of mul result is VT.
+  MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+  SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi);
+  Res = DAG.getBitcast(ResVT, Res);
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+                     DAG.getIntPtrConstant(0, DL));
 }
 
 static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
@@ -33781,13 +35241,13 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
 }
 
 // If the upper 17 bits of each element are zero then we can use PMADDWD,
-// which is always at least as quick as PMULLD, expect on KNL.
+// which is always at least as quick as PMULLD, except on KNL.
 static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
                                    const X86Subtarget &Subtarget) {
   if (!Subtarget.hasSSE2())
     return SDValue();
 
-  if (Subtarget.getProcFamily() == X86Subtarget::IntelKNL)
+  if (Subtarget.isPMADDWDSlow())
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -33797,12 +35257,24 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
+  // Also allow v2i32 if it will be widened.
   MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(WVT))
+  if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) ||
+        DAG.getTargetLoweringInfo().isTypeLegal(WVT)))
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
+
+  // If we are zero extending two steps without SSE4.1, its better to reduce
+  // the vmul width instead.
+  if (!Subtarget.hasSSE41() &&
+      (N0.getOpcode() == ISD::ZERO_EXTEND &&
+       N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+      (N1.getOpcode() == ISD::ZERO_EXTEND &&
+       N1.getOperand(0).getScalarValueSizeInBits() <= 8))
+    return SDValue();
+
   APInt Mask17 = APInt::getHighBitsSet(32, 17);
   if (!DAG.MaskedValueIsZero(N1, Mask17) ||
       !DAG.MaskedValueIsZero(N0, Mask17))
@@ -33828,7 +35300,8 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
 
   // Only support vXi64 vectors.
   if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 ||
-      !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+      VT.getVectorNumElements() < 2 ||
+      !isPowerOf2_32(VT.getVectorNumElements()))
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
@@ -33929,10 +35402,12 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
        (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
 
     if (isPowerOf2_64(MulAmt2) &&
-        !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+        !(SignMulAmt >= 0 && N->hasOneUse() &&
+          N->use_begin()->getOpcode() == ISD::ADD))
       // If second multiplifer is pow2, issue it first. We want the multiply by
       // 3, 5, or 9 to be folded into the addressing mode unless the lone use
-      // is an add.
+      // is an add. Only do this for positive multiply amounts since the
+      // negate would prevent it from being used as an address mode anyway.
       std::swap(MulAmt1, MulAmt2);
 
     if (isPowerOf2_64(MulAmt1))
@@ -34197,6 +35672,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
          N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
          "Unexpected PACKSS/PACKUS input type");
 
+  bool IsSigned = (X86ISD::PACKSS == Opcode);
+
   // Constant Folding.
   APInt UndefElts0, UndefElts1;
   SmallVector<APInt, 32> EltBits0, EltBits1;
@@ -34209,7 +35686,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
     unsigned NumSrcElts = NumDstElts / 2;
     unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
     unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
-    bool IsSigned = (X86ISD::PACKSS == Opcode);
 
     APInt Undefs(NumDstElts, 0);
     SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
@@ -34253,16 +35729,58 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
     return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
   }
 
+  // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
+  // truncate to create a larger truncate.
+  if (Subtarget.hasAVX512() &&
+      N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+      N0.getOperand(0).getValueType() == MVT::v8i32) {
+    if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
+        (!IsSigned &&
+         DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
+      if (Subtarget.hasVLX())
+        return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+      // Widen input to v16i32 so we can truncate that.
+      SDLoc dl(N);
+      SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32,
+                                   N0.getOperand(0), DAG.getUNDEF(MVT::v8i32));
+      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat);
+    }
+  }
+
   // Attempt to combine as shuffle.
   SDValue Op(N, 0);
   if (SDValue Res =
           combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-                                        /*HasVarMask*/ false, DAG, Subtarget))
+                                        /*HasVarMask*/ false,
+                                        /*AllowVarMask*/ true, DAG, Subtarget))
     return Res;
 
   return SDValue();
 }
 
+static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const X86Subtarget &Subtarget) {
+  assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||
+          X86ISD::VSRL == N->getOpcode()) &&
+         "Unexpected shift opcode");
+  EVT VT = N->getValueType(0);
+
+  // Shift zero -> zero.
+  if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+    return DAG.getConstant(0, SDLoc(N), VT);
+
+  APInt KnownUndef, KnownZero;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+  if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+                                     KnownZero, DCI))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
 static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
                                      TargetLowering::DAGCombinerInfo &DCI,
                                      const X86Subtarget &Subtarget) {
@@ -34277,13 +35795,14 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
   assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
          "Unexpected value type");
+  assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type");
 
   // Out of range logical bit shifts are guaranteed to be zero.
   // Out of range arithmetic bit shifts splat the sign bit.
-  APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
-  if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
+  unsigned ShiftVal = cast<ConstantSDNode>(N1)->getZExtValue();
+  if (ShiftVal >= NumBitsPerElt) {
     if (LogicalShift)
-      return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+      return DAG.getConstant(0, SDLoc(N), VT);
     else
       ShiftVal = NumBitsPerElt - 1;
   }
@@ -34294,30 +35813,25 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
 
   // Shift zero -> zero.
   if (ISD::isBuildVectorAllZeros(N0.getNode()))
-    return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
-
-  // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
-  // This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
-  // TODO - support other sra opcodes as needed.
-  if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
-      N0.getOpcode() == X86ISD::VSRAI)
-    return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
-
-  // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
-  if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI &&
-      N1 == N0.getOperand(1)) {
-    SDValue N00 = N0.getOperand(0);
-    unsigned NumSignBits = DAG.ComputeNumSignBits(N00);
-    if (ShiftVal.ult(NumSignBits))
-      return N00;
+    return DAG.getConstant(0, SDLoc(N), VT);
+
+  // Fold (VSRAI (VSRAI X, C1), C2) --> (VSRAI X, (C1 + C2)) with (C1 + C2)
+  // clamped to (NumBitsPerElt - 1).
+  if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSRAI) {
+    unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    unsigned NewShiftVal = ShiftVal + ShiftVal2;
+    if (NewShiftVal >= NumBitsPerElt)
+      NewShiftVal = NumBitsPerElt - 1;
+    return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
+                       DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
   }
 
   // We can decode 'whole byte' logical bit shifts as shuffles.
-  if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
+  if (LogicalShift && (ShiftVal % 8) == 0) {
     SDValue Op(N, 0);
     if (SDValue Res = combineX86ShufflesRecursively(
             {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, DAG, Subtarget))
+            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
       return Res;
   }
 
@@ -34328,18 +35842,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
       getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
     assert(EltBits.size() == VT.getVectorNumElements() &&
            "Unexpected shift value type");
-    unsigned ShiftImm = ShiftVal.getZExtValue();
     for (APInt &Elt : EltBits) {
       if (X86ISD::VSHLI == Opcode)
-        Elt <<= ShiftImm;
+        Elt <<= ShiftVal;
       else if (X86ISD::VSRAI == Opcode)
-        Elt.ashrInPlace(ShiftImm);
+        Elt.ashrInPlace(ShiftVal);
       else
-        Elt.lshrInPlace(ShiftImm);
+        Elt.lshrInPlace(ShiftVal);
     }
     return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
   }
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+                               APInt::getAllOnesValue(NumBitsPerElt), DCI))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
@@ -34356,7 +35874,8 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
   SDValue Op(N, 0);
   if (SDValue Res =
           combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-                                        /*HasVarMask*/ false, DAG, Subtarget))
+                                        /*HasVarMask*/ false,
+                                        /*AllowVarMask*/ true, DAG, Subtarget))
     return Res;
 
   return SDValue();
@@ -34468,42 +35987,31 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-// Try to match (and (xor X, -1), Y) logic pattern for (andnp X, Y) combines.
-static bool matchANDXORWithAllOnesAsANDNP(SDNode *N, SDValue &X, SDValue &Y) {
-  if (N->getOpcode() != ISD::AND)
-    return false;
+/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
+static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::AND);
 
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
+  MVT VT = N->getSimpleValueType(0);
+  if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
+    return SDValue();
+
+  SDValue X, Y;
+  SDValue N0 = peekThroughBitcasts(N->getOperand(0));
+  SDValue N1 = peekThroughBitcasts(N->getOperand(1));
   if (N0.getOpcode() == ISD::XOR &&
       ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) {
     X = N0.getOperand(0);
     Y = N1;
-    return true;
-  }
-  if (N1.getOpcode() == ISD::XOR &&
-      ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
+  } else if (N1.getOpcode() == ISD::XOR &&
+             ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
     X = N1.getOperand(0);
     Y = N0;
-    return true;
-  }
-
-  return false;
-}
-
-/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
-static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
-  assert(N->getOpcode() == ISD::AND);
-
-  EVT VT = N->getValueType(0);
-  if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
+  } else
     return SDValue();
 
-  SDValue X, Y;
-  if (matchANDXORWithAllOnesAsANDNP(N, X, Y))
-    return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
-
-  return SDValue();
+  X = DAG.getBitcast(VT, X);
+  Y = DAG.getBitcast(VT, Y);
+  return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
 }
 
 // On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
@@ -34512,8 +36020,8 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
 // some of the transition sequences.
 // Even with AVX-512 this is still useful for removing casts around logical
 // operations on vXi1 mask types.
-static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
-                                   const X86Subtarget &Subtarget) {
+static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
   assert(VT.isVector() && "Expected vector type");
 
@@ -34628,6 +36136,10 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
       !SplatVal.isMask())
     return SDValue();
 
+  // Don't prevent creation of ANDN.
+  if (isBitwiseNot(Op0))
+    return SDValue();
+
   if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
     return SDValue();
 
@@ -34761,6 +36273,73 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
+// Turn it into series of XORs and a setnp.
+static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
+                             const X86Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+
+  // We only support 64-bit and 32-bit. 64-bit requires special handling
+  // unless the 64-bit popcnt instruction is legal.
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT))
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // LHS needs to be a single use CTPOP.
+  if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
+    return SDValue();
+
+  // RHS needs to be 1.
+  if (!isOneConstant(N1))
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue X = N0.getOperand(0);
+
+  // If this is 64-bit, its always best to xor the two 32-bit pieces together
+  // even if we have popcnt.
+  if (VT == MVT::i64) {
+    SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+                             DAG.getNode(ISD::SRL, DL, VT, X,
+                                         DAG.getConstant(32, DL, MVT::i8)));
+    SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+    X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi);
+    // Generate a 32-bit parity idiom. This will bring us back here if we need
+    // to expand it too.
+    SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32,
+                                 DAG.getNode(ISD::CTPOP, DL, MVT::i32, X),
+                                 DAG.getConstant(1, DL, MVT::i32));
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity);
+  }
+  assert(VT == MVT::i32 && "Unexpected VT!");
+
+  // Xor the high and low 16-bits together using a 32-bit operation.
+  SDValue Hi16 = DAG.getNode(ISD::SRL, DL, VT, X,
+                             DAG.getConstant(16, DL, MVT::i8));
+  X = DAG.getNode(ISD::XOR, DL, VT, X, Hi16);
+
+  // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.
+  // This should allow an h-reg to be used to save a shift.
+  // FIXME: We only get an h-reg in 32-bit mode.
+  SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
+                           DAG.getNode(ISD::SRL, DL, VT, X,
+                                       DAG.getConstant(8, DL, MVT::i8)));
+  SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X);
+  SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32);
+  SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1);
+
+  // Copy the inverse of the parity flag into a register with setcc.
+  SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
+  // Zero extend to original type.
+  return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
+}
+
 static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
                           TargetLowering::DAGCombinerInfo &DCI,
                           const X86Subtarget &Subtarget) {
@@ -34788,6 +36367,10 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // This must be done before legalization has expanded the ctpop.
+  if (SDValue V = combineParity(N, DAG, Subtarget))
+    return V;
+
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -34811,7 +36394,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
     SDValue Op(N, 0);
     if (SDValue Res = combineX86ShufflesRecursively(
             {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, DAG, Subtarget))
+            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
       return Res;
   }
 
@@ -34848,7 +36431,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
 
       if (SDValue Shuffle = combineX86ShufflesRecursively(
               {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
-              /*HasVarMask*/ false, DAG, Subtarget))
+              /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
         return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
                            N->getOperand(0).getOperand(1));
     }
@@ -34978,7 +36561,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget.hasSSE41())
     return SDValue();
 
-  MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+  MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8;
 
   X = DAG.getBitcast(BlendVT, X);
   Y = DAG.getBitcast(BlendVT, Y);
@@ -35122,11 +36705,21 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
     return R;
 
+  // Attempt to recursively combine an OR of shuffles.
+  if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+    SDValue Op(N, 0);
+    if (SDValue Res = combineX86ShufflesRecursively(
+            {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+      return Res;
+  }
+
   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
   bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+  unsigned Bits = VT.getScalarSizeInBits();
 
   // SHLD/SHRD instructions have lower register pressure, but on some
   // platforms they have higher latency than the equivalent
@@ -35149,6 +36742,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   SDValue ShAmt1 = N1.getOperand(1);
   if (ShAmt1.getValueType() != MVT::i8)
     return SDValue();
+
+  // Peek through any modulo shift masks.
+  SDValue ShMsk0;
+  if (ShAmt0.getOpcode() == ISD::AND &&
+      isa<ConstantSDNode>(ShAmt0.getOperand(1)) &&
+      ShAmt0.getConstantOperandVal(1) == (Bits - 1)) {
+    ShMsk0 = ShAmt0;
+    ShAmt0 = ShAmt0.getOperand(0);
+  }
+  SDValue ShMsk1;
+  if (ShAmt1.getOpcode() == ISD::AND &&
+      isa<ConstantSDNode>(ShAmt1.getOperand(1)) &&
+      ShAmt1.getConstantOperandVal(1) == (Bits - 1)) {
+    ShMsk1 = ShAmt1;
+    ShAmt1 = ShAmt1.getOperand(0);
+  }
+
   if (ShAmt0.getOpcode() == ISD::TRUNCATE)
     ShAmt0 = ShAmt0.getOperand(0);
   if (ShAmt1.getOpcode() == ISD::TRUNCATE)
@@ -35163,27 +36773,29 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     Opc = X86ISD::SHRD;
     std::swap(Op0, Op1);
     std::swap(ShAmt0, ShAmt1);
+    std::swap(ShMsk0, ShMsk1);
   }
 
   // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
   // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
   // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
   // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
-  unsigned Bits = VT.getSizeInBits();
+  // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C )
+  // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C )
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
-    if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+    if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
       SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
       if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
         ShAmt1Op1 = ShAmt1Op1.getOperand(0);
-      if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
-        return DAG.getNode(Opc, DL, VT,
-                           Op0, Op1,
-                           DAG.getNode(ISD::TRUNCATE, DL,
-                                       MVT::i8, ShAmt0));
-    }
-  } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
-    ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+      if ((SumC->getAPIntValue() == Bits ||
+           (SumC->getAPIntValue() == 0 && ShMsk1)) &&
+          ShAmt1Op1 == ShAmt0)
+        return DAG.getNode(Opc, DL, VT, Op0, Op1,
+                           DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+    }
+  } else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+    auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
     if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
       return DAG.getNode(Opc, DL, VT,
                          N0.getOperand(0), N1.getOperand(0),
@@ -35191,12 +36803,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
                                        MVT::i8, ShAmt0));
   } else if (ShAmt1.getOpcode() == ISD::XOR) {
     SDValue Mask = ShAmt1.getOperand(1);
-    if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
+    if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
       unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
       SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
       if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
         ShAmt1Op0 = ShAmt1Op0.getOperand(0);
-      if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
+      if (MaskC->getSExtValue() == (Bits - 1) &&
+          (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
         if (Op1.getOpcode() == InnerShift &&
             isa<ConstantSDNode>(Op1.getOperand(1)) &&
             Op1.getConstantOperandVal(1) == 1) {
@@ -35207,7 +36820,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
         if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
             Op1.getOperand(0) == Op1.getOperand(1)) {
           return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
-                     DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+                             DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
         }
       }
     }
@@ -35478,6 +37091,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
       return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
   }
   if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
+      !Subtarget.hasAVX512() &&
       (SVT == MVT::i8 || SVT == MVT::i16) &&
       (InSVT == MVT::i16 || InSVT == MVT::i32)) {
     if (auto USatVal = detectSSatPattern(In, VT, true)) {
@@ -35514,7 +37128,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
 
   EVT ScalarVT = VT.getVectorElementType();
   if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
-        isPowerOf2_32(NumElems)))
+        NumElems >= 2 && isPowerOf2_32(NumElems)))
     return SDValue();
 
   // InScalarVT is the intermediate type in AVG pattern and it should be greater
@@ -35752,8 +37366,8 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
                   Alignment, ML->getMemOperand()->getFlags());
 
   // Insert the loaded element into the appropriate place in the vector.
-  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getSrc0(),
-                               Load, VecIndex);
+  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+                               ML->getPassThru(), Load, VecIndex);
   return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
 }
 
@@ -35776,7 +37390,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
   if (LoadFirstElt && LoadLastElt) {
     SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
                                 ML->getMemOperand());
-    SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getSrc0());
+    SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,
+                                  ML->getPassThru());
     return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
   }
 
@@ -35786,7 +37401,7 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
 
   // Don't try this if the pass-through operand is already undefined. That would
   // cause an infinite loop because that's what we're about to create.
-  if (ML->getSrc0().isUndef())
+  if (ML->getPassThru().isUndef())
     return SDValue();
 
   // The new masked load has an undef pass-through operand. The select uses the
@@ -35795,7 +37410,8 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
                                     ML->getMask(), DAG.getUNDEF(VT),
                                     ML->getMemoryVT(), ML->getMemOperand(),
                                     ML->getExtensionType());
-  SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getSrc0());
+  SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
+                                ML->getPassThru());
 
   return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
 }
@@ -35842,9 +37458,9 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
           LdVT.getScalarType(), NumElems*SizeRatio);
   assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
 
-  // Convert Src0 value.
-  SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
-  if (!Mld->getSrc0().isUndef()) {
+  // Convert PassThru value.
+  SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru());
+  if (!Mld->getPassThru().isUndef()) {
     SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
     for (unsigned i = 0; i != NumElems; ++i)
       ShuffleVec[i] = i * SizeRatio;
@@ -35852,7 +37468,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
     // Can't shuffle using an illegal type.
     assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
            "WideVecVT should be legal");
-    WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
+    WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru,
                                     DAG.getUNDEF(WideVecVT), ShuffleVec);
   }
 
@@ -35885,10 +37501,10 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
   }
 
   SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
-                                     Mld->getBasePtr(), NewMask, WideSrc0,
+                                     Mld->getBasePtr(), NewMask, WidePassThru,
                                      Mld->getMemoryVT(), Mld->getMemOperand(),
                                      ISD::NON_EXTLOAD);
-  SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
+  SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
   return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
 }
 
@@ -35920,31 +37536,25 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
 }
 
 static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget &Subtarget) {
   MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
-
   if (Mst->isCompressingStore())
     return SDValue();
 
+  EVT VT = Mst->getValue().getValueType();
   if (!Mst->isTruncatingStore()) {
     if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
       return ScalarStore;
 
-    // If the mask is checking (0 > X), we're creating a vector with all-zeros
-    // or all-ones elements based on the sign bits of X. AVX1 masked store only
-    // cares about the sign bit of each mask element, so eliminate the compare:
-    // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X
-    // Note that by waiting to match an x86-specific PCMPGT node, we're
-    // eliminating potentially more complex matching of a setcc node which has
-    // a full range of predicates.
+    // If the mask value has been legalized to a non-boolean vector, try to
+    // simplify ops leading up to it. We only demand the MSB of each lane.
     SDValue Mask = Mst->getMask();
-    if (Mask.getOpcode() == X86ISD::PCMPGT &&
-        ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) {
-      assert(Mask.getValueType() == Mask.getOperand(1).getValueType() &&
-             "Unexpected type for PCMPGT");
-      return DAG.getMaskedStore(
-          Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(),
-          Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand());
+    if (Mask.getScalarValueSizeInBits() != 1) {
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
+      if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+        return SDValue(N, 0);
     }
 
     // TODO: AVX512 targets should also be able to simplify something like the
@@ -35955,7 +37565,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
   }
 
   // Resolve truncating stores.
-  EVT VT = Mst->getValue().getValueType();
   unsigned NumElems = VT.getVectorNumElements();
   EVT StVT = Mst->getMemoryVT();
   SDLoc dl(Mst);
@@ -36043,6 +37652,18 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
+  // Convert a store of vXi1 into a store of iX and a bitcast.
+  if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() &&
+      VT.getVectorElementType() == MVT::i1) {
+
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
+    StoredVal = DAG.getBitcast(NewVT, StoredVal);
+
+    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                        St->getPointerInfo(), St->getAlignment(),
+                        St->getMemOperand()->getFlags());
+  }
+
   // If this is a store of a scalar_to_vector to v1i1, just use a scalar store.
   // This will avoid a copy to k-register.
   if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
@@ -36269,7 +37890,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
     if (Subtarget.is64Bit() || F64IsLegal) {
-      MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
+      MVT LdVT = (Subtarget.is64Bit() &&
+                  (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getMemOperand());
 
@@ -36343,10 +37965,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
 /// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
 /// A horizontal-op B, for some already available A and B, and if so then LHS is
 /// set to A, RHS to B, and the routine returns 'true'.
-/// Note that the binary operation should have the property that if one of the
-/// operands is UNDEF then the result is UNDEF.
 static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
-  // Look for the following pattern: if
+  // If either operand is undef, bail out. The binop should be simplified.
+  if (LHS.isUndef() || RHS.isUndef())
+    return false;
+
+  // Look for the following pattern:
   //   A = < float a0, float a1, float a2, float a3 >
   //   B = < float b0, float b1, float b2, float b3 >
   // and
@@ -36361,25 +37985,15 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
     return false;
 
   MVT VT = LHS.getSimpleValueType();
-
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for horizontal add/sub");
 
-  // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
-  // operate independently on 128-bit lanes.
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned NumLanes = VT.getSizeInBits()/128;
-  unsigned NumLaneElts = NumElts / NumLanes;
-  assert((NumLaneElts % 2 == 0) &&
-         "Vector type should have an even number of elements in each lane");
-  unsigned HalfLaneElts = NumLaneElts/2;
-
   // View LHS in the form
   //   LHS = VECTOR_SHUFFLE A, B, LMask
-  // If LHS is not a shuffle then pretend it is the shuffle
+  // If LHS is not a shuffle, then pretend it is the identity shuffle:
   //   LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
-  // NOTE: in what follows a default initialized SDValue represents an UNDEF of
-  // type VT.
+  // NOTE: A default initialized SDValue represents an UNDEF of type VT.
+  unsigned NumElts = VT.getVectorNumElements();
   SDValue A, B;
   SmallVector<int, 16> LMask(NumElts);
   if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
@@ -36388,10 +38002,9 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
     if (!LHS.getOperand(1).isUndef())
       B = LHS.getOperand(1);
     ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
-    std::copy(Mask.begin(), Mask.end(), LMask.begin());
+    llvm::copy(Mask, LMask.begin());
   } else {
-    if (!LHS.isUndef())
-      A = LHS;
+    A = LHS;
     for (unsigned i = 0; i != NumElts; ++i)
       LMask[i] = i;
   }
@@ -36406,45 +38019,51 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
     if (!RHS.getOperand(1).isUndef())
       D = RHS.getOperand(1);
     ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
-    std::copy(Mask.begin(), Mask.end(), RMask.begin());
+    llvm::copy(Mask, RMask.begin());
   } else {
-    if (!RHS.isUndef())
-      C = RHS;
+    C = RHS;
     for (unsigned i = 0; i != NumElts; ++i)
       RMask[i] = i;
   }
 
+  // If A and B occur in reverse order in RHS, then canonicalize by commuting
+  // RHS operands and shuffle mask.
+  if (A != C) {
+    std::swap(C, D);
+    ShuffleVectorSDNode::commuteMask(RMask);
+  }
   // Check that the shuffles are both shuffling the same vectors.
-  if (!(A == C && B == D) && !(A == D && B == C))
-    return false;
-
-  // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
-  if (!A.getNode() && !B.getNode())
+  if (!(A == C && B == D))
     return false;
 
-  // If A and B occur in reverse order in RHS, then "swap" them (which means
-  // rewriting the mask).
-  if (A != C)
-    ShuffleVectorSDNode::commuteMask(RMask);
-
-  // At this point LHS and RHS are equivalent to
-  //   LHS = VECTOR_SHUFFLE A, B, LMask
-  //   RHS = VECTOR_SHUFFLE A, B, RMask
+  // LHS and RHS are now:
+  //   LHS = shuffle A, B, LMask
+  //   RHS = shuffle A, B, RMask
   // Check that the masks correspond to performing a horizontal operation.
-  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-    for (unsigned i = 0; i != NumLaneElts; ++i) {
-      int LIdx = LMask[i+l], RIdx = RMask[i+l];
-
-      // Ignore any UNDEF components.
+  // AVX defines horizontal add/sub to operate independently on 128-bit lanes,
+  // so we just repeat the inner loop if this is a 256-bit op.
+  unsigned Num128BitChunks = VT.getSizeInBits() / 128;
+  unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
+  assert((NumEltsPer128BitChunk % 2 == 0) &&
+         "Vector type should have an even number of elements in each lane");
+  for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
+    for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) {
+      // Ignore undefined components.
+      int LIdx = LMask[i + j], RIdx = RMask[i + j];
       if (LIdx < 0 || RIdx < 0 ||
           (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
           (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
         continue;
 
-      // Check that successive elements are being operated on.  If not, this is
+      // The  low half of the 128-bit result must choose from A.
+      // The high half of the 128-bit result must choose from B,
+      // unless B is undef. In that case, we are always choosing from A.
+      unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
+      unsigned Src = B.getNode() ? i >= NumEltsPer64BitChunk : 0;
+
+      // Check that successive elements are being operated on. If not, this is
       // not a horizontal operation.
-      unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
-      int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
+      int Index = 2 * (i % NumEltsPer64BitChunk) + NumElts * Src + j;
       if (!(LIdx == Index && RIdx == Index + 1) &&
           !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
         return false;
@@ -36463,21 +38082,24 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   bool IsFadd = N->getOpcode() == ISD::FADD;
+  auto HorizOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
   assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode");
 
   // Try to synthesize horizontal add/sub from adds/subs of shuffles.
   if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
        (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
-      isHorizontalBinOp(LHS, RHS, IsFadd)) {
-    auto NewOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
-    return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
-  }
+      isHorizontalBinOp(LHS, RHS, IsFadd) &&
+      shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
+    return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
+
   return SDValue();
 }
 
 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
 /// the codegen.
 /// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+/// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove
+///       anything that is guaranteed to be transformed by DAGCombiner.
 static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
                                           const X86Subtarget &Subtarget,
                                           const SDLoc &DL) {
@@ -36489,34 +38111,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   EVT SrcVT = Src.getValueType();
 
-  auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
+  auto IsFreeTruncation = [VT](SDValue Op) {
     unsigned TruncSizeInBits = VT.getScalarSizeInBits();
 
-    // Repeated operand, so we are only trading one output truncation for
-    // one input truncation.
-    if (Op0 == Op1)
-      return true;
-
-    // See if either operand has been extended from a smaller/equal size to
+    // See if this has been extended from a smaller/equal size to
     // the truncation size, allowing a truncation to combine with the extend.
-    unsigned Opcode0 = Op0.getOpcode();
-    if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND ||
-         Opcode0 == ISD::ZERO_EXTEND) &&
-        Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
-      return true;
-
-    unsigned Opcode1 = Op1.getOpcode();
-    if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND ||
-         Opcode1 == ISD::ZERO_EXTEND) &&
-        Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+    unsigned Opcode = Op.getOpcode();
+    if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND ||
+         Opcode == ISD::ZERO_EXTEND) &&
+        Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
       return true;
 
-    // See if either operand is a single use constant which can be constant
-    // folded.
-    SDValue BC0 = peekThroughOneUseBitcasts(Op0);
-    SDValue BC1 = peekThroughOneUseBitcasts(Op1);
-    return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
-           ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+    // See if this is a single use constant which can be constant folded.
+    SDValue BC = peekThroughOneUseBitcasts(Op);
+    return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
   };
 
   auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
@@ -36526,7 +38134,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   };
 
   // Don't combine if the operation has other uses.
-  if (!N->isOnlyUserOf(Src.getNode()))
+  if (!Src.hasOneUse())
     return SDValue();
 
   // Only support vector truncation for now.
@@ -36544,7 +38152,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
     SDValue Op0 = Src.getOperand(0);
     SDValue Op1 = Src.getOperand(1);
     if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
-        IsRepeatedOpOrFreeTruncation(Op0, Op1))
+        (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
       return TruncateArithmetic(Op0, Op1);
     break;
   }
@@ -36557,11 +38165,20 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
       return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
     LLVM_FALLTHROUGH;
   case ISD::ADD: {
-    // TODO: ISD::SUB should be here but interferes with combineSubToSubus.
     SDValue Op0 = Src.getOperand(0);
     SDValue Op1 = Src.getOperand(1);
     if (TLI.isOperationLegal(Opcode, VT) &&
-        IsRepeatedOpOrFreeTruncation(Op0, Op1))
+        (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
+      return TruncateArithmetic(Op0, Op1);
+    break;
+  }
+  case ISD::SUB: {
+    // TODO: ISD::SUB We are conservative and require both sides to be freely
+    // truncatable to avoid interfering with combineSubToSubus.
+    SDValue Op0 = Src.getOperand(0);
+    SDValue Op1 = Src.getOperand(1);
+    if (TLI.isOperationLegal(Opcode, VT) &&
+        (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
       return TruncateArithmetic(Op0, Op1);
     break;
   }
@@ -36701,8 +38318,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
 
   // Use PACKUS if the input has zero-bits that extend all the way to the
   // packed/truncated value. e.g. masks, zext_in_reg, etc.
-  KnownBits Known;
-  DAG.computeKnownBits(In, Known);
+  KnownBits Known = DAG.computeKnownBits(In);
   unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
   if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))
     return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);
@@ -36733,9 +38349,11 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
   if (!Subtarget.hasSSE2())
     return SDValue();
 
-  // Only handle vXi16 types that are at least 128-bits.
+  // Only handle vXi16 types that are at least 128-bits unless they will be
+  // widened.
   if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 ||
-      VT.getVectorNumElements() < 8)
+      (!ExperimentalVectorWideningLegalization &&
+       VT.getVectorNumElements() < 8))
     return SDValue();
 
   // Input type should be vXi32.
@@ -36951,29 +38569,72 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
 
 /// Returns the negated value if the node \p N flips sign of FP value.
 ///
-/// FP-negation node may have different forms: FNEG(x) or FXOR (x, 0x80000000).
+/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
+/// or FSUB(0, x)
 /// AVX512F does not have FXOR, so FNEG is lowered as
 /// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
 /// In this case we go though all bitcasts.
-static SDValue isFNEG(SDNode *N) {
+/// This also recognizes splat of a negated value and returns the splat of that
+/// value.
+static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
   if (N->getOpcode() == ISD::FNEG)
     return N->getOperand(0);
 
   SDValue Op = peekThroughBitcasts(SDValue(N, 0));
-  if (Op.getOpcode() != X86ISD::FXOR && Op.getOpcode() != ISD::XOR)
+  auto VT = Op->getValueType(0);
+  if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
+    // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
+    // of this is VECTOR_SHUFFLE(-VEC1, UNDEF).  The mask can be anything here.
+    if (!SVOp->getOperand(1).isUndef())
+      return SDValue();
+    if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
+      return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+                                  SVOp->getMask());
+    return SDValue();
+  }
+  unsigned Opc = Op.getOpcode();
+  if (Opc == ISD::INSERT_VECTOR_ELT) {
+    // Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,
+    // -V, INDEX).
+    SDValue InsVector = Op.getOperand(0);
+    SDValue InsVal = Op.getOperand(1);
+    if (!InsVector.isUndef())
+      return SDValue();
+    if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
+      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+                         NegInsVal, Op.getOperand(2));
+    return SDValue();
+  }
+
+  if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
     return SDValue();
 
   SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
   if (!Op1.getValueType().isFloatingPoint())
     return SDValue();
 
-  // Extract constant bits and see if they are all sign bit masks.
+  SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+
+  // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
+  // masks. For FSUB, we have to check if constant bits of Op0 are sign bit
+  // masks and hence we swap the operands.
+  if (Opc == ISD::FSUB)
+    std::swap(Op0, Op1);
+
   APInt UndefElts;
   SmallVector<APInt, 16> EltBits;
+  // Extract constant bits and see if they are all sign bit masks. Ignore the
+  // undef elements.
   if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
-                                    UndefElts, EltBits, false, false))
-    if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); }))
-      return peekThroughBitcasts(Op.getOperand(0));
+                                    UndefElts, EltBits,
+                                    /* AllowWholeUndefs */ true,
+                                    /* AllowPartialUndefs */ false)) {
+    for (unsigned I = 0, E = EltBits.size(); I < E; I++)
+      if (!UndefElts[I] && !EltBits[I].isSignMask())
+        return SDValue();
+
+    return peekThroughBitcasts(Op0);
+  }
 
   return SDValue();
 }
@@ -36982,8 +38643,9 @@ static SDValue isFNEG(SDNode *N) {
 static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
                            const X86Subtarget &Subtarget) {
   EVT OrigVT = N->getValueType(0);
-  SDValue Arg = isFNEG(N);
-  assert(Arg.getNode() && "N is expected to be an FNEG node");
+  SDValue Arg = isFNEG(DAG, N);
+  if (!Arg)
+    return SDValue();
 
   EVT VT = Arg.getValueType();
   EVT SVT = VT.getScalarType();
@@ -37033,25 +38695,27 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
                                  const X86Subtarget &Subtarget) {
   MVT VT = N->getSimpleValueType(0);
   // If we have integer vector types available, use the integer opcodes.
-  if (VT.isVector() && Subtarget.hasSSE2()) {
-    SDLoc dl(N);
+  if (!VT.isVector() || !Subtarget.hasSSE2())
+    return SDValue();
 
-    MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+  SDLoc dl(N);
 
-    SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
-    SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
-    unsigned IntOpcode;
-    switch (N->getOpcode()) {
-    default: llvm_unreachable("Unexpected FP logic op");
-    case X86ISD::FOR: IntOpcode = ISD::OR; break;
-    case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
-    case X86ISD::FAND: IntOpcode = ISD::AND; break;
-    case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
-    }
-    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
-    return DAG.getBitcast(VT, IntOp);
+  unsigned IntBits = VT.getScalarSizeInBits();
+  MVT IntSVT = MVT::getIntegerVT(IntBits);
+  MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);
+
+  SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
+  SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
+  unsigned IntOpcode;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unexpected FP logic op");
+  case X86ISD::FOR:   IntOpcode = ISD::OR; break;
+  case X86ISD::FXOR:  IntOpcode = ISD::XOR; break;
+  case X86ISD::FAND:  IntOpcode = ISD::AND; break;
+  case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
   }
-  return SDValue();
+  SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+  return DAG.getBitcast(VT, IntOp);
 }
 
 
@@ -37098,9 +38762,7 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
     return FPLogic;
 
-  if (isFNEG(N))
-    return combineFneg(N, DAG, Subtarget);
-  return SDValue();
+  return combineFneg(N, DAG, Subtarget);
 }
 
 static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
@@ -37112,8 +38774,6 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
   unsigned NumBits = VT.getSizeInBits();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                        !DCI.isBeforeLegalizeOps());
 
   // TODO - Constant Folding.
   if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
@@ -37127,12 +38787,9 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
   }
 
   // Only bottom 16-bits of the control bits are required.
-  KnownBits Known;
   APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16));
-  if (TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) {
-    DCI.CommitTargetLoweringOpt(TLO);
+  if (TLI.SimplifyDemandedBits(Op1, DemandedMask, DCI))
     return SDValue(N, 0);
-  }
 
   return SDValue();
 }
@@ -37233,9 +38890,8 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
   if (isNullFPScalarOrVectorConst(N->getOperand(1)))
     return N->getOperand(0);
 
-  if (isFNEG(N))
-    if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
-      return NewVal;
+  if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
+    return NewVal;
 
   return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
@@ -37320,26 +38976,47 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
 }
 
+static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI) {
+  EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  APInt KnownUndef, KnownZero;
+  APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+  if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+                                     KnownZero, DCI))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
 /// Do target-specific dag combines on X86ISD::ANDNP nodes.
 static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
+  MVT VT = N->getSimpleValueType(0);
+
   // ANDNP(0, x) -> x
   if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
     return N->getOperand(1);
 
   // ANDNP(x, 0) -> 0
   if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
-    return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
+    return DAG.getConstant(0, SDLoc(N), VT);
 
-  EVT VT = N->getValueType(0);
+  // Turn ANDNP back to AND if input is inverted.
+  if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) {
+    return DAG.getNode(ISD::AND, SDLoc(N), VT,
+                       N->getOperand(0).getOperand(0), N->getOperand(1));
+  }
 
   // Attempt to recursively combine a bitmask ANDNP with shuffles.
   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
     SDValue Op(N, 0);
     if (SDValue Res = combineX86ShufflesRecursively(
             {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, DAG, Subtarget))
+            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
       return Res;
   }
 
@@ -37502,36 +39179,6 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
   return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
 }
 
-/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
-/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
-/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
-/// extends from AH (which we otherwise need to do contortions to access).
-static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
-  SDValue N0 = N->getOperand(0);
-  auto OpcodeN = N->getOpcode();
-  auto OpcodeN0 = N0.getOpcode();
-  if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
-        (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
-    return SDValue();
-
-  EVT VT = N->getValueType(0);
-  EVT InVT = N0.getValueType();
-  if (N0.getResNo() != 1 || InVT != MVT::i8 ||
-      !(VT == MVT::i32 || VT == MVT::i64))
-    return SDValue();
-
-  SDVTList NodeTys = DAG.getVTList(MVT::i8, MVT::i32);
-  auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
-                                               : X86ISD::UDIVREM8_ZEXT_HREG;
-  SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
-                          N0.getOperand(1));
-  DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
-  // If this was a 64-bit extend, complete it.
-  if (VT == MVT::i64)
-    return DAG.getNode(OpcodeN, SDLoc(N), VT, R.getValue(1));
-  return R.getValue(1);
-}
-
 // If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
 // operands and the result of CMOV is not used anywhere else - promote CMOV
 // itself instead of promoting its result. This could be beneficial, because:
@@ -37685,6 +39332,9 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
 static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
                                           TargetLowering::DAGCombinerInfo &DCI,
                                           const X86Subtarget &Subtarget) {
+  if (ExperimentalVectorWideningLegalization)
+    return SDValue();
+
   unsigned Opcode = N->getOpcode();
   if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
     return SDValue();
@@ -37699,17 +39349,33 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
   EVT InVT = N0.getValueType();
   EVT InSVT = InVT.getScalarType();
 
+  // FIXME: Generic DAGCombiner previously had a bug that would cause a
+  // sign_extend of setcc to sometimes return the original node and tricked it
+  // into thinking CombineTo was used which prevented the target combines from
+  // running.
+  // Earlying out here to avoid regressions like this
+  //  (v4i32 (sext (v4i1 (setcc (v4i16)))))
+  // Becomes
+  //  (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef))))
+  // Type legalized to
+  //  (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32)))))))
+  // Leading to a packssdw+pmovsxwd
+  // We could write a DAG combine to fix this, but really we shouldn't be
+  // creating sext_invec that's forcing v8i16 into the DAG.
+  if (N0.getOpcode() == ISD::SETCC)
+    return SDValue();
+
   // Input type must be a vector and we must be extending legal integer types.
-  if (!VT.isVector())
+  if (!VT.isVector() || VT.getVectorNumElements() < 2)
     return SDValue();
   if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
     return SDValue();
   if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
     return SDValue();
 
-  // On AVX2+ targets, if the input/output types are both legal then we will be
-  // able to use SIGN_EXTEND/ZERO_EXTEND directly.
-  if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+  // If the input/output types are both legal then we have at least AVX1 and
+  // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly.
+  if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
       DAG.getTargetLoweringInfo().isTypeLegal(InVT))
     return SDValue();
 
@@ -37737,16 +39403,16 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
                        DAG.getIntPtrConstant(0, DL));
   }
 
-  // If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
+  // If target-size is 128-bits (or 256-bits on AVX target), then convert to
   // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
   // Also use this if we don't have SSE41 to allow the legalizer do its job.
   if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
-      (VT.is256BitVector() && Subtarget.hasInt256()) ||
+      (VT.is256BitVector() && Subtarget.hasAVX()) ||
       (VT.is512BitVector() && Subtarget.useAVX512Regs())) {
     SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
-    return Opcode == ISD::SIGN_EXTEND
-               ? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
-               : DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
+    Opcode = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
+                                        : ISD::ZERO_EXTEND_VECTOR_INREG;
+    return DAG.getNode(Opcode, DL, VT, ExOp);
   }
 
   auto SplitAndExtendInReg = [&](unsigned SplitSize) {
@@ -37755,22 +39421,23 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
     EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
     EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
 
+    unsigned IROpc = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
+                                                : ISD::ZERO_EXTEND_VECTOR_INREG;
+
     SmallVector<SDValue, 8> Opnds;
     for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
       SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
                                    DAG.getIntPtrConstant(Offset, DL));
       SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
-      SrcVec = Opcode == ISD::SIGN_EXTEND
-                   ? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
-                   : DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
+      SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec);
       Opnds.push_back(SrcVec);
     }
     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
   };
 
-  // On pre-AVX2 targets, split into 128-bit nodes of
+  // On pre-AVX targets, split into 128-bit nodes of
   // ISD::*_EXTEND_VECTOR_INREG.
-  if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
+  if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128))
     return SplitAndExtendInReg(128);
 
   // On pre-AVX512 targets, split into 256-bit nodes of
@@ -37832,9 +39499,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
   EVT InVT = N0.getValueType();
   SDLoc DL(N);
 
-  if (SDValue DivRem8 = getDivRem8(N, DAG))
-    return DivRem8;
-
   if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
     return NewCMov;
 
@@ -37861,7 +39525,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
     return V;
 
   if (VT.isVector())
-    if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
+    if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
       return R;
 
   if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
@@ -37920,7 +39584,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
   SDValue C = N->getOperand(2);
 
   auto invertIfNegative = [&DAG](SDValue &V) {
-    if (SDValue NegVal = isFNEG(V.getNode())) {
+    if (SDValue NegVal = isFNEG(DAG, V.getNode())) {
       V = DAG.getBitcast(V.getValueType(), NegVal);
       return true;
     }
@@ -37928,7 +39592,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
     // new extract from the FNEG input.
     if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
         isNullConstant(V.getOperand(1))) {
-      if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) {
+      if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) {
         NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
         V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
                         NegVal, V.getOperand(1));
@@ -37961,7 +39625,7 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
 
-  SDValue NegVal = isFNEG(N->getOperand(2).getNode());
+  SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode());
   if (!NegVal)
     return SDValue();
 
@@ -38032,12 +39696,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
     return V;
 
   if (VT.isVector())
-    if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
+    if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget))
       return R;
 
-  if (SDValue DivRem8 = getDivRem8(N, DAG))
-    return DivRem8;
-
   if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
     return NewAdd;
 
@@ -38079,12 +39740,15 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
     return SDValue();
 
   // TODO: Use PXOR + PTEST for SSE4.1 or later?
-  // TODO: Add support for AVX-512.
   EVT VT = SetCC->getValueType(0);
   SDLoc DL(SetCC);
   if ((OpSize == 128 && Subtarget.hasSSE2()) ||
-      (OpSize == 256 && Subtarget.hasAVX2())) {
-    EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
+      (OpSize == 256 && Subtarget.hasAVX2()) ||
+      (OpSize == 512 && Subtarget.useAVX512Regs())) {
+    EVT VecVT = OpSize == 512 ? MVT::v16i32 :
+                OpSize == 256 ? MVT::v32i8 :
+                                MVT::v16i8;
+    EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT;
     SDValue Cmp;
     if (IsOrXorXorCCZero) {
       // This is a bitwise-combined equality comparison of 2 pairs of vectors:
@@ -38095,14 +39759,18 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
       SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
       SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
       SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
-      SDValue Cmp1 = DAG.getSetCC(DL, VecVT, A, B, ISD::SETEQ);
-      SDValue Cmp2 = DAG.getSetCC(DL, VecVT, C, D, ISD::SETEQ);
-      Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
+      SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+      SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ);
+      Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2);
     } else {
       SDValue VecX = DAG.getBitcast(VecVT, X);
       SDValue VecY = DAG.getBitcast(VecVT, Y);
-      Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, ISD::SETEQ);
+      Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
     }
+    // For 512-bits we want to emit a setcc that will lower to kortest.
+    if (OpSize == 512)
+      return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp),
+                          DAG.getConstant(0xFFFF, DL, MVT::i16), CC);
     // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
     // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
     // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
@@ -38181,7 +39849,9 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
   // NOTE: The element count check is to ignore operand types that need to
   // go through type promotion to a 128-bit vector.
   if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
-      VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() > 4 &&
+      VT.getVectorElementType() == MVT::i1 &&
+      (ExperimentalVectorWideningLegalization ||
+       VT.getVectorNumElements() > 4) &&
       (OpVT.getVectorElementType() == MVT::i8 ||
        OpVT.getVectorElementType() == MVT::i16)) {
     SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS,
@@ -38202,10 +39872,11 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
                              TargetLowering::DAGCombinerInfo &DCI) {
   SDValue Src = N->getOperand(0);
   MVT SrcVT = Src.getSimpleValueType();
+  MVT VT = N->getSimpleValueType(0);
 
   // Perform constant folding.
   if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
-    assert(N->getValueType(0) == MVT::i32 && "Unexpected result type");
+    assert(VT== MVT::i32 && "Unexpected result type");
     APInt Imm(32, 0);
     for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
       SDValue In = Src.getOperand(Idx);
@@ -38213,20 +39884,53 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
           cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
         Imm.setBit(Idx);
     }
-    return DAG.getConstant(Imm, SDLoc(N), N->getValueType(0));
+    return DAG.getConstant(Imm, SDLoc(N), VT);
   }
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                        !DCI.isBeforeLegalizeOps());
+  // Look through int->fp bitcasts that don't change the element width.
+  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse() &&
+      SrcVT.isFloatingPoint() &&
+      Src.getOperand(0).getValueType() ==
+        EVT(SrcVT).changeVectorElementTypeToInteger())
+    Src = Src.getOperand(0);
 
-  // MOVMSK only uses the MSB from each vector element.
-  KnownBits Known;
-  APInt DemandedMask(APInt::getSignMask(SrcVT.getScalarSizeInBits()));
-  if (TLI.SimplifyDemandedBits(Src, DemandedMask, Known, TLO)) {
-    DCI.AddToWorklist(Src.getNode());
-    DCI.CommitTargetLoweringOpt(TLO);
+  // Simplify the inputs.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
     return SDValue(N, 0);
+
+  // Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
+  // Only do this when the setcc input and output types are the same and the
+  // setcc and the 'and' node have a single use.
+  // FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
+  APInt SplatVal;
+  if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
+      Src.getOperand(0).getValueType() == Src.getValueType() &&
+      cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
+      ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
+      Src.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue And = Src.getOperand(0);
+    if (And.hasOneUse() &&
+        ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
+        SplatVal.isPowerOf2()) {
+      MVT VT = Src.getSimpleValueType();
+      unsigned BitWidth = VT.getScalarSizeInBits();
+      unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
+      SDLoc DL(And);
+      SDValue X = And.getOperand(0);
+      // If the element type is i8, we need to bitcast to i16 to use a legal
+      // shift. If we wait until lowering we end up with an extra and to bits
+      // from crossing the 8-bit elements, but we don't care about that here.
+      if (VT.getVectorElementType() == MVT::i8) {
+        VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
+        X = DAG.getBitcast(VT, X);
+      }
+      SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
+                                DAG.getConstant(ShAmt, DL, VT));
+      SDValue Cast = DAG.getBitcast(SrcVT, Shl);
+      return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
+    }
   }
 
   return SDValue();
@@ -38296,16 +40000,10 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
   // With AVX2 we only demand the upper bit of the mask.
   if (!Subtarget.hasAVX512()) {
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                          !DCI.isBeforeLegalizeOps());
     SDValue Mask = N->getOperand(2);
-    KnownBits Known;
     APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
-    if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) {
-      DCI.AddToWorklist(Mask.getNode());
-      DCI.CommitTargetLoweringOpt(TLO);
+    if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
       return SDValue(N, 0);
-    }
   }
 
   return SDValue();
@@ -38396,7 +40094,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   EVT InVT = Op0.getValueType();
 
-  // UINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
+  // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
   // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
   // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
   if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
@@ -38460,7 +40158,8 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
 
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
-  if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
+      Op0.getOpcode() == ISD::LOAD) {
     LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
     EVT LdVT = Ld->getValueType(0);
 
@@ -38485,6 +40184,159 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static bool needCarryOrOverflowFlag(SDValue Flags) {
+  assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+         UI != UE; ++UI) {
+    SDNode *User = *UI;
+
+    X86::CondCode CC;
+    switch (User->getOpcode()) {
+    default:
+      // Be conservative.
+      return true;
+    case X86ISD::SETCC:
+    case X86ISD::SETCC_CARRY:
+      CC = (X86::CondCode)User->getConstantOperandVal(0);
+      break;
+    case X86ISD::BRCOND:
+      CC = (X86::CondCode)User->getConstantOperandVal(2);
+      break;
+    case X86ISD::CMOV:
+      CC = (X86::CondCode)User->getConstantOperandVal(2);
+      break;
+    }
+
+    switch (CC) {
+    default: break;
+    case X86::COND_A: case X86::COND_AE:
+    case X86::COND_B: case X86::COND_BE:
+    case X86::COND_O: case X86::COND_NO:
+    case X86::COND_G: case X86::COND_GE:
+    case X86::COND_L: case X86::COND_LE:
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static bool onlyZeroFlagUsed(SDValue Flags) {
+  assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+         UI != UE; ++UI) {
+    SDNode *User = *UI;
+
+    unsigned CCOpNo;
+    switch (User->getOpcode()) {
+    default:
+      // Be conservative.
+      return false;
+    case X86ISD::SETCC:       CCOpNo = 0; break;
+    case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+    case X86ISD::BRCOND:      CCOpNo = 2; break;
+    case X86ISD::CMOV:        CCOpNo = 2; break;
+    }
+
+    X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
+    if (CC != X86::COND_E && CC != X86::COND_NE)
+      return false;
+  }
+
+  return true;
+}
+
+static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
+  // Only handle test patterns.
+  if (!isNullConstant(N->getOperand(1)))
+    return SDValue();
+
+  // If we have a CMP of a truncated binop, see if we can make a smaller binop
+  // and use its flags directly.
+  // TODO: Maybe we should try promoting compares that only use the zero flag
+  // first if we can prove the upper bits with computeKnownBits?
+  SDLoc dl(N);
+  SDValue Op = N->getOperand(0);
+  EVT VT = Op.getValueType();
+
+  // If we have a constant logical shift that's only used in a comparison
+  // against zero turn it into an equivalent AND. This allows turning it into
+  // a TEST instruction later.
+  if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) &&
+      Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) &&
+      onlyZeroFlagUsed(SDValue(N, 0))) {
+    EVT VT = Op.getValueType();
+    unsigned BitWidth = VT.getSizeInBits();
+    unsigned ShAmt = Op.getConstantOperandVal(1);
+    if (ShAmt < BitWidth) { // Avoid undefined shifts.
+      APInt Mask = Op.getOpcode() == ISD::SRL
+                       ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
+                       : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
+      if (Mask.isSignedIntN(32)) {
+        Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+                         DAG.getConstant(Mask, dl, VT));
+        return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                           DAG.getConstant(0, dl, VT));
+      }
+    }
+  }
+
+
+  // Look for a truncate with a single use.
+  if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
+    return SDValue();
+
+  Op = Op.getOperand(0);
+
+  // Arithmetic op can only have one use.
+  if (!Op.hasOneUse())
+    return SDValue();
+
+  unsigned NewOpc;
+  switch (Op.getOpcode()) {
+  default: return SDValue();
+  case ISD::AND:
+    // Skip and with constant. We have special handling for and with immediate
+    // during isel to generate test instructions.
+    if (isa<ConstantSDNode>(Op.getOperand(1)))
+      return SDValue();
+    NewOpc = X86ISD::AND;
+    break;
+  case ISD::OR:  NewOpc = X86ISD::OR;  break;
+  case ISD::XOR: NewOpc = X86ISD::XOR; break;
+  case ISD::ADD:
+    // If the carry or overflow flag is used, we can't truncate.
+    if (needCarryOrOverflowFlag(SDValue(N, 0)))
+      return SDValue();
+    NewOpc = X86ISD::ADD;
+    break;
+  case ISD::SUB:
+    // If the carry or overflow flag is used, we can't truncate.
+    if (needCarryOrOverflowFlag(SDValue(N, 0)))
+      return SDValue();
+    NewOpc = X86ISD::SUB;
+    break;
+  }
+
+  // We found an op we can narrow. Truncate its inputs.
+  SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));
+  SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));
+
+  // Use a X86 specific opcode to avoid DAG combine messing with it.
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+  Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1);
+
+  // For AND, keep a CMP so that we can match the test pattern.
+  if (NewOpc == X86ISD::AND)
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, dl, VT));
+
+  // Return the flags.
+  return Op.getValue(1);
+}
+
 static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
   if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
     MVT VT = N->getSimpleValueType(0);
@@ -38531,21 +40383,6 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
-/// which is more useful than 0/1 in some cases.
-static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
-  SDLoc DL(N);
-  // "Condition code B" is also known as "the carry flag" (CF).
-  SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
-  SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
-  MVT VT = N->getSimpleValueType(0);
-  if (VT == MVT::i8)
-    return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
-
-  assert(VT == MVT::i1 && "Unexpected type for SETCC node");
-  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
-}
-
 /// If this is an add or subtract where one operand is produced by a cmp+setcc,
 /// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
 /// with CMP+{ADC, SBB}.
@@ -38616,13 +40453,11 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
   }
 
   if (CC == X86::COND_B) {
-    // X + SETB Z --> X + (mask SBB Z, Z)
-    // X - SETB Z --> X - (mask SBB Z, Z)
-    // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
-    SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
-    if (SBB.getValueSizeInBits() != VT.getSizeInBits())
-      SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
-    return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+    // X + SETB Z --> adc X, 0
+    // X - SETB Z --> sbb X, 0
+    return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+                       DAG.getVTList(VT, MVT::i32), X,
+                       DAG.getConstant(0, DL, VT), Y.getOperand(1));
   }
 
   if (CC == X86::COND_A) {
@@ -38640,10 +40475,9 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
                                    EFLAGS.getNode()->getVTList(),
                                    EFLAGS.getOperand(1), EFLAGS.getOperand(0));
       SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
-      SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
-      if (SBB.getValueSizeInBits() != VT.getSizeInBits())
-        SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
-      return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+      return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+                         DAG.getVTList(VT, MVT::i32), X,
+                         DAG.getConstant(0, DL, VT), NewEFLAGS);
     }
   }
 
@@ -38713,23 +40547,23 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget.hasSSE2())
     return SDValue();
 
-  SDValue MulOp = N->getOperand(0);
-  SDValue Phi = N->getOperand(1);
-
-  if (MulOp.getOpcode() != ISD::MUL)
-    std::swap(MulOp, Phi);
-  if (MulOp.getOpcode() != ISD::MUL)
-    return SDValue();
-
-  ShrinkMode Mode;
-  if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
-    return SDValue();
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
 
   EVT VT = N->getValueType(0);
 
   // If the vector size is less than 128, or greater than the supported RegSize,
   // do not use PMADD.
-  if (VT.getVectorNumElements() < 8)
+  if (!VT.isVector() || VT.getVectorNumElements() < 8)
+    return SDValue();
+
+  if (Op0.getOpcode() != ISD::MUL)
+    std::swap(Op0, Op1);
+  if (Op0.getOpcode() != ISD::MUL)
+    return SDValue();
+
+  ShrinkMode Mode;
+  if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16)
     return SDValue();
 
   SDLoc DL(N);
@@ -38738,22 +40572,34 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
   EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
                                 VT.getVectorNumElements() / 2);
 
-  // Shrink the operands of mul.
-  SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
-  SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
-
   // Madd vector size is half of the original vector size
   auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                            ArrayRef<SDValue> Ops) {
     MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
     return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
   };
-  SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
-                                  PMADDWDBuilder);
-  // Fill the rest of the output with 0
-  SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
-  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
-  return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
+
+  auto BuildPMADDWD = [&](SDValue Mul) {
+    // Shrink the operands of mul.
+    SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0));
+    SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1));
+
+    SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
+                                   PMADDWDBuilder);
+    // Fill the rest of the output with 0
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd,
+                       DAG.getConstant(0, DL, MAddVT));
+  };
+
+  Op0 = BuildPMADDWD(Op0);
+
+  // It's possible that Op1 is also a mul we can reduce.
+  if (Op1.getOpcode() == ISD::MUL &&
+      canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) {
+    Op1 = BuildPMADDWD(Op1);
+  }
+
+  return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
 }
 
 static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
@@ -38786,45 +40632,53 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
 
   // We know N is a reduction add, which means one of its operands is a phi.
   // To match SAD, we need the other operand to be a vector select.
-  SDValue SelectOp, Phi;
-  if (Op0.getOpcode() == ISD::VSELECT) {
-    SelectOp = Op0;
-    Phi = Op1;
-  } else if (Op1.getOpcode() == ISD::VSELECT) {
-    SelectOp = Op1;
-    Phi = Op0;
-  } else
-    return SDValue();
+  if (Op0.getOpcode() != ISD::VSELECT)
+    std::swap(Op0, Op1);
+  if (Op0.getOpcode() != ISD::VSELECT)
+    return SDValue();
+
+  auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) {
+    // SAD pattern detected. Now build a SAD instruction and an addition for
+    // reduction. Note that the number of elements of the result of SAD is less
+    // than the number of elements of its input. Therefore, we could only update
+    // part of elements in the reduction vector.
+    SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
+
+    // The output of PSADBW is a vector of i64.
+    // We need to turn the vector of i64 into a vector of i32.
+    // If the reduction vector is at least as wide as the psadbw result, just
+    // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
+    // anyway.
+    MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
+    if (VT.getSizeInBits() >= ResVT.getSizeInBits())
+      Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
+    else
+      Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+
+    if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
+      // Fill the upper elements with zero to match the add width.
+      SDValue Zero = DAG.getConstant(0, DL, VT);
+      Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
+                        DAG.getIntPtrConstant(0, DL));
+    }
+
+    return Sad;
+  };
 
   // Check whether we have an abs-diff pattern feeding into the select.
-  if(!detectZextAbsDiff(SelectOp, Op0, Op1))
-    return SDValue();
-
-  // SAD pattern detected. Now build a SAD instruction and an addition for
-  // reduction. Note that the number of elements of the result of SAD is less
-  // than the number of elements of its input. Therefore, we could only update
-  // part of elements in the reduction vector.
-  SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget);
-
-  // The output of PSADBW is a vector of i64.
-  // We need to turn the vector of i64 into a vector of i32.
-  // If the reduction vector is at least as wide as the psadbw result, just
-  // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
-  // anyway.
-  MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
-  if (VT.getSizeInBits() >= ResVT.getSizeInBits())
-    Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
-  else
-    Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
+  SDValue SadOp0, SadOp1;
+  if (!detectZextAbsDiff(Op0, SadOp0, SadOp1))
+    return SDValue();
 
-  if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
-    // Fill the upper elements with zero to match the add width.
-    SDValue Zero = DAG.getConstant(0, DL, VT);
-    Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
-                      DAG.getIntPtrConstant(0, DL));
+  Op0 = BuildPSADBW(SadOp0, SadOp1);
+
+  // It's possible we have a sad on the other side too.
+  if (Op1.getOpcode() == ISD::VSELECT &&
+      detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
+    Op1 = BuildPSADBW(SadOp0, SadOp1);
   }
 
-  return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
+  return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
 }
 
 /// Convert vector increment or decrement to sub/add with an all-ones constant:
@@ -38843,10 +40697,8 @@ static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
   if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
     return SDValue();
 
-  SDNode *N1 = N->getOperand(1).getNode();
   APInt SplatVal;
-  if (!ISD::isConstantSplatVector(N1, SplatVal) ||
-      !SplatVal.isOneValue())
+  if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue())
     return SDValue();
 
   SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
@@ -38963,6 +40815,39 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
                           PMADDBuilder);
 }
 
+// Try to turn (add (umax X, C), -C) into (psubus X, C)
+static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget &Subtarget) {
+  if (!Subtarget.hasSSE2())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // psubus is available in SSE2 for i8 and i16 vectors.
+  if (!VT.isVector() || VT.getVectorNumElements() < 2 ||
+      !isPowerOf2_32(VT.getVectorNumElements()) ||
+      !(VT.getVectorElementType() == MVT::i8 ||
+        VT.getVectorElementType() == MVT::i16))
+    return SDValue();
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() != ISD::UMAX)
+    return SDValue();
+
+  // The add should have a constant that is the negative of the max.
+  // TODO: Handle build_vectors with undef elements.
+  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+    return Max->getAPIntValue() == (-Op->getAPIntValue());
+  };
+  if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT))
+    return SDValue();
+
+  SDLoc DL(N);
+  return DAG.getNode(ISD::USUBSAT, DL, VT, Op0.getOperand(0),
+                     Op0.getOperand(1));
+}
+
 // Attempt to turn this pattern into PMADDWD.
 // (mul (add (zext (build_vector)), (zext (build_vector))),
 //      (add (zext (build_vector)), (zext (build_vector)))
@@ -39105,7 +40990,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   // Try to synthesize horizontal adds from adds of shuffles.
   if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
        VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true)) {
+      Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) &&
+      shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
     auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                           ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
@@ -39117,6 +41003,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineIncDecVector(N, DAG))
     return V;
 
+  if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget))
+    return V;
+
   return combineAddOrSubToADCOrSBB(N, DAG);
 }
 
@@ -39162,23 +41051,22 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
   } else
     return SDValue();
 
-  auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
-                         ArrayRef<SDValue> Ops) {
-    return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
+  auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+                           ArrayRef<SDValue> Ops) {
+    return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
   };
 
   // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
   // special preprocessing in some cases.
   if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
     return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
-                            { SubusLHS, SubusRHS }, SUBUSBuilder);
+                            { SubusLHS, SubusRHS }, USUBSATBuilder);
 
   // Special preprocessing case can be only applied
   // if the value was zero extended from 16 bit,
   // so we require first 16 bits to be zeros for 32 bit
   // values, or first 48 bits for 64 bit values.
-  KnownBits Known;
-  DAG.computeKnownBits(SubusLHS, Known);
+  KnownBits Known = DAG.computeKnownBits(SubusLHS);
   unsigned NumZeros = Known.countMinLeadingZeros();
   if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16)
     return SDValue();
@@ -39203,7 +41091,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
   SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
   SDValue Psubus =
       SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
-                       { NewSubusLHS, NewSubusRHS }, SUBUSBuilder);
+                       { NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
   // Zero extend the result, it may be used somewhere as 32 bit,
   // if not zext and following trunc will shrink.
   return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
@@ -39236,7 +41124,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
        VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false)) {
+      Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) &&
+      shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
     auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                           ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
@@ -39255,98 +41144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   return combineAddOrSubToADCOrSBB(N, DAG);
 }
 
-static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
-                             TargetLowering::DAGCombinerInfo &DCI,
-                             const X86Subtarget &Subtarget) {
-  if (DCI.isBeforeLegalize())
-    return SDValue();
-
-  SDLoc DL(N);
-  unsigned Opcode = N->getOpcode();
-  MVT VT = N->getSimpleValueType(0);
-  MVT SVT = VT.getVectorElementType();
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned EltSizeInBits = SVT.getSizeInBits();
-
-  SDValue Op = N->getOperand(0);
-  MVT OpVT = Op.getSimpleValueType();
-  MVT OpEltVT = OpVT.getVectorElementType();
-  unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
-  unsigned InputBits = OpEltSizeInBits * NumElts;
-
-  // Perform any constant folding.
-  // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
-  APInt UndefElts;
-  SmallVector<APInt, 64> EltBits;
-  if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
-    APInt Undefs(NumElts, 0);
-    SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
-    bool IsZEXT =
-        (Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
-    for (unsigned i = 0; i != NumElts; ++i) {
-      if (UndefElts[i]) {
-        Undefs.setBit(i);
-        continue;
-      }
-      Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
-                       : EltBits[i].sextOrTrunc(EltSizeInBits);
-    }
-    return getConstVector(Vals, Undefs, VT, DAG, DL);
-  }
-
-  // (vzext (bitcast (vzext (x)) -> (vzext x)
-  // TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
-  SDValue V = peekThroughBitcasts(Op);
-  if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
-    MVT InnerVT = V.getSimpleValueType();
-    MVT InnerEltVT = InnerVT.getVectorElementType();
-
-    // If the element sizes match exactly, we can just do one larger vzext. This
-    // is always an exact type match as vzext operates on integer types.
-    if (OpEltVT == InnerEltVT) {
-      assert(OpVT == InnerVT && "Types must match for vzext!");
-      return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
-    }
-
-    // The only other way we can combine them is if only a single element of the
-    // inner vzext is used in the input to the outer vzext.
-    if (InnerEltVT.getSizeInBits() < InputBits)
-      return SDValue();
-
-    // In this case, the inner vzext is completely dead because we're going to
-    // only look at bits inside of the low element. Just do the outer vzext on
-    // a bitcast of the input to the inner.
-    return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
-  }
-
-  // Check if we can bypass extracting and re-inserting an element of an input
-  // vector. Essentially:
-  // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
-  // TODO: Add X86ISD::VSEXT support
-  if (Opcode == X86ISD::VZEXT &&
-      V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-      V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-      V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
-    SDValue ExtractedV = V.getOperand(0);
-    SDValue OrigV = ExtractedV.getOperand(0);
-    if (isNullConstant(ExtractedV.getOperand(1))) {
-        MVT OrigVT = OrigV.getSimpleValueType();
-        // Extract a subvector if necessary...
-        if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
-          int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
-          OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
-                                    OrigVT.getVectorNumElements() / Ratio);
-          OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
-                              DAG.getIntPtrConstant(0, DL));
-        }
-        Op = DAG.getBitcast(OpVT, OrigV);
-        return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
-      }
-  }
-
-  return SDValue();
-}
-
 static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
                                     const X86Subtarget &Subtarget) {
   MVT VT = N->getSimpleValueType(0);
@@ -39354,9 +41151,9 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
 
   if (N->getOperand(0) == N->getOperand(1)) {
     if (N->getOpcode() == X86ISD::PCMPEQ)
-      return getOnesVector(VT, DAG, DL);
+      return DAG.getConstant(-1, DL, VT);
     if (N->getOpcode() == X86ISD::PCMPGT)
-      return getZeroVector(VT, Subtarget, DAG, DL);
+      return DAG.getConstant(0, DL, VT);
   }
 
   return SDValue();
@@ -39487,11 +41284,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
             return Ld;
         }
       }
-      // If lower/upper loads are the same and the only users of the load, then
-      // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+      // If lower/upper loads are the same and there's no other use of the lower
+      // load, then splat the loaded value with a broadcast.
       if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)))
-        if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
-            SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode()))
+        if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())
           return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
 
       // If this is subv_broadcast insert into both halves, use a larger
@@ -39528,6 +41324,39 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
 static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const X86Subtarget &Subtarget) {
+  // For AVX1 only, if we are extracting from a 256-bit and+not (which will
+  // eventually get combined/lowered into ANDNP) with a concatenated operand,
+  // split the 'and' into 128-bit ops to avoid the concatenate and extract.
+  // We let generic combining take over from there to simplify the
+  // insert/extract and 'not'.
+  // This pattern emerges during AVX1 legalization. We handle it before lowering
+  // to avoid complications like splitting constant vector loads.
+
+  // Capture the original wide type in the likely case that we need to bitcast
+  // back to this type.
+  EVT VT = N->getValueType(0);
+  EVT WideVecVT = N->getOperand(0).getValueType();
+  SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
+      TLI.isTypeLegal(WideVecVT) &&
+      WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) {
+    auto isConcatenatedNot = [] (SDValue V) {
+      V = peekThroughBitcasts(V);
+      if (!isBitwiseNot(V))
+        return false;
+      SDValue NotOp = V->getOperand(0);
+      return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS;
+    };
+    if (isConcatenatedNot(WideVec.getOperand(0)) ||
+        isConcatenatedNot(WideVec.getOperand(1))) {
+      // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
+      SDValue Concat = split256IntArith(WideVec, DAG);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+                         DAG.getBitcast(WideVecVT, Concat), N->getOperand(1));
+    }
+  }
+
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -39565,13 +41394,32 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
         return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
       }
     }
-    if ((InOpcode == X86ISD::VZEXT || InOpcode == X86ISD::VSEXT) &&
+    if ((InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::SIGN_EXTEND) &&
         OpVT.is128BitVector() &&
         InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
-      unsigned ExtOp = InOpcode == X86ISD::VZEXT ? ISD::ZERO_EXTEND_VECTOR_INREG
-                                                 : ISD::SIGN_EXTEND_VECTOR_INREG;
+      unsigned ExtOp =
+        InOpcode == ISD::ZERO_EXTEND ? ISD::ZERO_EXTEND_VECTOR_INREG
+                                     : ISD::SIGN_EXTEND_VECTOR_INREG;
       return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
     }
+    if ((InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+         InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+        OpVT.is128BitVector() &&
+        InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
+      return DAG.getNode(InOpcode, SDLoc(N), OpVT, InVec.getOperand(0));
+    }
+    if (InOpcode == ISD::BITCAST) {
+      // TODO - do this for target shuffles in general.
+      SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
+      if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
+        SDLoc DL(N);
+        SDValue SubPSHUFB =
+            DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+                        extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
+                        extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
+        return DAG.getBitcast(OpVT, SubPSHUFB);
+      }
+    }
   }
 
   return SDValue();
@@ -39591,6 +41439,15 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
                            Src.getOperand(0));
 
+  // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
+  if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
+      Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+    if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
+      if (C->isNullValue())
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+                           Src.getOperand(0), Src.getOperand(1));
+
   return SDValue();
 }
 
@@ -39600,23 +41457,28 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
+  // Canonicalize constant to RHS.
+  if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) &&
+      !DAG.isConstantIntBuildVectorOrConstantInt(RHS))
+    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);
+
+  // Multiply by zero.
+  if (ISD::isBuildVectorAllZeros(RHS.getNode()))
+    return RHS;
+
+  // Aggressively peek through ops to get at the demanded low bits.
+  APInt DemandedMask = APInt::getLowBitsSet(64, 32);
+  SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask);
+  SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask);
+  if (DemandedLHS || DemandedRHS)
+    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+                       DemandedLHS ? DemandedLHS : LHS,
+                       DemandedRHS ? DemandedRHS : RHS);
+
+  // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                        !DCI.isBeforeLegalizeOps());
-  APInt DemandedMask(APInt::getLowBitsSet(64, 32));
-
-  // PMULQDQ/PMULUDQ only uses lower 32 bits from each vector element.
-  KnownBits LHSKnown;
-  if (TLI.SimplifyDemandedBits(LHS, DemandedMask, LHSKnown, TLO)) {
-    DCI.CommitTargetLoweringOpt(TLO);
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
     return SDValue(N, 0);
-  }
-
-  KnownBits RHSKnown;
-  if (TLI.SimplifyDemandedBits(RHS, DemandedMask, RHSKnown, TLO)) {
-    DCI.CommitTargetLoweringOpt(TLO);
-    return SDValue(N, 0);
-  }
 
   return SDValue();
 }
@@ -39638,9 +41500,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
     return combineExtractSubvector(N, DAG, DCI, Subtarget);
   case ISD::VSELECT:
   case ISD::SELECT:
-  case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
+  case X86ISD::BLENDV:      return combineSelect(N, DAG, DCI, Subtarget);
   case ISD::BITCAST:        return combineBitcast(N, DAG, DCI, Subtarget);
   case X86ISD::CMOV:        return combineCMov(N, DAG, DCI, Subtarget);
+  case X86ISD::CMP:         return combineCMP(N, DAG);
   case ISD::ADD:            return combineAdd(N, DAG, Subtarget);
   case ISD::SUB:            return combineSub(N, DAG, Subtarget);
   case X86ISD::SBB:         return combineSBB(N, DAG);
@@ -39656,7 +41519,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
   case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return combineStore(N, DAG, Subtarget);
-  case ISD::MSTORE:         return combineMaskedStore(N, DAG, Subtarget);
+  case ISD::MSTORE:         return combineMaskedStore(N, DAG, DCI, Subtarget);
   case ISD::SINT_TO_FP:     return combineSIntToFP(N, DAG, Subtarget);
   case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
   case ISD::FADD:
@@ -39672,6 +41535,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FMAX:        return combineFMinFMax(N, DAG);
   case ISD::FMINNUM:
   case ISD::FMAXNUM:        return combineFMinNumFMaxNum(N, DAG, Subtarget);
+  case X86ISD::CVTSI2P:  
+  case X86ISD::CVTUI2P:     return combineX86INT_TO_FP(N, DAG, DCI);
   case X86ISD::BT:          return combineBT(N, DAG, DCI);
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return combineZext(N, DAG, DCI, Subtarget);
@@ -39682,14 +41547,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::BRCOND:      return combineBrCond(N, DAG, Subtarget);
   case X86ISD::PACKSS:
   case X86ISD::PACKUS:      return combineVectorPack(N, DAG, DCI, Subtarget);
+  case X86ISD::VSHL:
+  case X86ISD::VSRA:
+  case X86ISD::VSRL:
+    return combineVectorShiftVar(N, DAG, DCI, Subtarget);
   case X86ISD::VSHLI:
   case X86ISD::VSRAI:
   case X86ISD::VSRLI:
     return combineVectorShiftImm(N, DAG, DCI, Subtarget);
-  case ISD::SIGN_EXTEND_VECTOR_INREG:
-  case ISD::ZERO_EXTEND_VECTOR_INREG:
-  case X86ISD::VSEXT:
-  case X86ISD::VZEXT:       return combineVSZext(N, DAG, DCI, Subtarget);
   case X86ISD::PINSRB:
   case X86ISD::PINSRW:      return combineVectorInsert(N, DAG, DCI, Subtarget);
   case X86ISD::SHUFP:       // Handle all target specific shuffles
@@ -39751,10 +41616,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   return SDValue();
 }
 
-/// Return true if the target has native support for the specified value type
-/// and it is 'desirable' to use the type for the given node type. e.g. On x86
-/// i16 is legal, but undesirable since i16 instruction encodings are longer and
-/// some i16 instructions are slow.
 bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
   if (!isTypeLegal(VT))
     return false;
@@ -39763,26 +41624,37 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
   if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
     return false;
 
-  if (VT != MVT::i16)
-    return true;
-
-  switch (Opc) {
-  default:
-    return true;
-  case ISD::LOAD:
-  case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND:
-  case ISD::SHL:
-  case ISD::SRL:
-  case ISD::SUB:
-  case ISD::ADD:
-  case ISD::MUL:
-  case ISD::AND:
-  case ISD::OR:
-  case ISD::XOR:
+  // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
+  // we have specializations to turn 32-bit multiply into LEA or other ops.
+  // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
+  // check for a constant operand to the multiply.
+  if (Opc == ISD::MUL && VT == MVT::i8)
     return false;
+
+  // i16 instruction encodings are longer and some i16 instructions are slow,
+  // so those are not desirable.
+  if (VT == MVT::i16) {
+    switch (Opc) {
+    default:
+      break;
+    case ISD::LOAD:
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SUB:
+    case ISD::ADD:
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      return false;
+    }
   }
+
+  // Any legal type not explicitly accounted for above here is desirable.
+  return true;
 }
 
 SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
@@ -39801,12 +41673,16 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
   return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
 }
 
-/// This method query the target whether it is beneficial for dag combiner to
-/// promote the specified node. If true, it should return the desired promotion
-/// type by reference.
 bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
   EVT VT = Op.getValueType();
-  if (VT != MVT::i16)
+  bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
+                             isa<ConstantSDNode>(Op.getOperand(1));
+
+  // i16 is legal, but undesirable since i16 instruction encodings are longer
+  // and some i16 instructions are slow.
+  // 8-bit multiply-by-constant can usually be expanded to something cheaper
+  // using LEA and/or other ALU ops.
+  if (VT != MVT::i16 && !Is8BitMulByConstant)
     return false;
 
   auto IsFoldableRMW = [](SDValue Load, SDValue Op) {
@@ -39820,6 +41696,19 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
     return Ld->getBasePtr() == St->getBasePtr();
   };
 
+  auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
+    if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD)
+      return false;
+    if (!Op.hasOneUse())
+      return false;
+    SDNode *User = *Op->use_begin();
+    if (User->getOpcode() != ISD::ATOMIC_STORE)
+      return false;
+    auto *Ld = cast<AtomicSDNode>(Load);
+    auto *St = cast<AtomicSDNode>(User);
+    return Ld->getBasePtr() == St->getBasePtr();
+  };
+
   bool Commute = false;
   switch (Op.getOpcode()) {
   default: return false;
@@ -39854,6 +41743,9 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
         ((Commute && !isa<ConstantSDNode>(N1)) ||
          (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
       return false;
+    if (IsFoldableAtomicRMW(N0, Op) ||
+        (Commute && IsFoldableAtomicRMW(N1, Op)))
+      return false;
   }
   }
 
@@ -40593,44 +42485,33 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   if (!Res.second) {
     // Map st(0) -> st(7) -> ST0
     if (Constraint.size() == 7 && Constraint[0] == '{' &&
-        tolower(Constraint[1]) == 's' &&
-        tolower(Constraint[2]) == 't' &&
+        tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
         Constraint[3] == '(' &&
         (Constraint[4] >= '0' && Constraint[4] <= '7') &&
-        Constraint[5] == ')' &&
-        Constraint[6] == '}') {
-
-      Res.first = X86::FP0+Constraint[4]-'0';
-      Res.second = &X86::RFP80RegClass;
-      return Res;
+        Constraint[5] == ')' && Constraint[6] == '}') {
+      // st(7) is not allocatable and thus not a member of RFP80. Return
+      // singleton class in cases where we have a reference to it.
+      if (Constraint[4] == '7')
+        return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
+      return std::make_pair(X86::FP0 + Constraint[4] - '0',
+                            &X86::RFP80RegClass);
     }
 
     // GCC allows "st(0)" to be called just plain "st".
-    if (StringRef("{st}").equals_lower(Constraint)) {
-      Res.first = X86::FP0;
-      Res.second = &X86::RFP80RegClass;
-      return Res;
-    }
+    if (StringRef("{st}").equals_lower(Constraint))
+      return std::make_pair(X86::FP0, &X86::RFP80RegClass);
 
     // flags -> EFLAGS
-    if (StringRef("{flags}").equals_lower(Constraint)) {
-      Res.first = X86::EFLAGS;
-      Res.second = &X86::CCRRegClass;
-      return Res;
-    }
+    if (StringRef("{flags}").equals_lower(Constraint))
+      return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
 
     // 'A' means [ER]AX + [ER]DX.
     if (Constraint == "A") {
-      if (Subtarget.is64Bit()) {
-        Res.first = X86::RAX;
-        Res.second = &X86::GR64_ADRegClass;
-      } else {
-        assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
-               "Expecting 64, 32 or 16 bit subtarget");
-        Res.first = X86::EAX;
-        Res.second = &X86::GR32_ADRegClass;
-      }
-      return Res;
+      if (Subtarget.is64Bit())
+        return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
+      assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
+             "Expecting 64, 32 or 16 bit subtarget");
+      return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
     }
     return Res;
   }
@@ -40640,18 +42521,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       (isFRClass(*Res.second) || isGRClass(*Res.second)) &&
       TRI->getEncodingValue(Res.first) >= 8) {
     // Register requires REX prefix, but we're in 32-bit mode.
-    Res.first = 0;
-    Res.second = nullptr;
-    return Res;
+    return std::make_pair(0, nullptr);
   }
 
   // Make sure it isn't a register that requires AVX512.
   if (!Subtarget.hasAVX512() && isFRClass(*Res.second) &&
       TRI->getEncodingValue(Res.first) & 0x10) {
     // Register requires EVEX prefix.
-    Res.first = 0;
-    Res.second = nullptr;
-    return Res;
+    return std::make_pair(0, nullptr);
   }
 
   // Otherwise, check to see if this is a register class of the wrong value
@@ -40679,14 +42556,36 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
           Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
         : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
         : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
-        : &X86::GR64RegClass;
-      if (RC->contains(DestReg))
-        Res = std::make_pair(DestReg, RC);
-    } else {
-      // No register found/type mismatch.
-      Res.first = 0;
-      Res.second = nullptr;
+        : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr)
+        : nullptr;
+      if (Size == 64 && !is64Bit) {
+        // Model GCC's behavior here and select a fixed pair of 32-bit
+        // registers.
+        switch (Res.first) {
+        case X86::EAX:
+          return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
+        case X86::EDX:
+          return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
+        case X86::ECX:
+          return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
+        case X86::EBX:
+          return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
+        case X86::ESI:
+          return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
+        case X86::EDI:
+          return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
+        case X86::EBP:
+          return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
+        default:
+          return std::make_pair(0, nullptr);
+        }
+      }
+      if (RC && RC->contains(DestReg))
+        return std::make_pair(DestReg, RC);
+      return Res;
     }
+    // No register found/type mismatch.
+    return std::make_pair(0, nullptr);
   } else if (isFRClass(*Class)) {
     // Handle references to XMM physical registers that got mapped into the
     // wrong class.  This can happen with constraints like {xmm0} where the
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index ff5006d208e5..910acd80e8b8 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -98,7 +98,7 @@ namespace llvm {
       SETCC,
 
       /// X86 Select
-      SELECT, SELECTS,
+      SELECTS,
 
       // Same as SETCC except it's materialized with a sbb and the value is all
       // one's or all zero's.
@@ -203,8 +203,9 @@ namespace llvm {
 
       /// Dynamic (non-constant condition) vector blend where only the sign bits
       /// of the condition elements are used. This is used to enforce that the
-      /// condition mask is not valid for generic VSELECT optimizations.
-      SHRUNKBLEND,
+      /// condition mask is not valid for generic VSELECT optimizations. This
+      /// can also be used to implement the intrinsics.
+      BLENDV,
 
       /// Combined add and sub on an FP vector.
       ADDSUB,
@@ -226,14 +227,6 @@ namespace llvm {
       SCALEF,
       SCALEFS,
 
-      // Integer add/sub with unsigned saturation.
-      ADDUS,
-      SUBUS,
-
-      // Integer add/sub with signed saturation.
-      ADDS,
-      SUBS,
-
       // Unsigned Integer average.
       AVG,
 
@@ -295,22 +288,27 @@ namespace llvm {
       // Vector move to low scalar and zero higher vector elements.
       VZEXT_MOVL,
 
-      // Vector integer zero-extend.
-      VZEXT,
-      // Vector integer signed-extend.
-      VSEXT,
-
       // Vector integer truncate.
       VTRUNC,
       // Vector integer truncate with unsigned/signed saturation.
       VTRUNCUS, VTRUNCS,
 
+      // Masked version of the above. Used when less than a 128-bit result is
+      // produced since the mask only applies to the lower elements and can't
+      // be represented by a select.
+      // SRC, PASSTHRU, MASK
+      VMTRUNC, VMTRUNCUS, VMTRUNCS,
+
       // Vector FP extend.
       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 
       // Vector FP round.
       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 
+      // Masked version of above. Used for v2f64->v4f32.
+      // SRC, PASSTHRU, MASK
+      VMFPROUND,
+
       // 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -349,21 +347,14 @@ namespace llvm {
       CMPM_RND,
 
       // Arithmetic operations with FLAGS results.
-      ADD, SUB, ADC, SBB, SMUL,
-      INC, DEC, OR, XOR, AND,
+      ADD, SUB, ADC, SBB, SMUL, UMUL,
+      OR, XOR, AND,
 
       // Bit field extract.
       BEXTR,
 
-      // LOW, HI, FLAGS = umul LHS, RHS.
-      UMUL,
-
-      // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
-      SMUL8, UMUL8,
-
-      // 8-bit divrem that zero-extend the high result (AH).
-      UDIVREM8_ZEXT_HREG,
-      SDIVREM8_SEXT_HREG,
+      // Zero High Bits Starting with Specified Bit Position.
+      BZHI,
 
       // X86-specific multiply by immediate.
       MUL_IMM,
@@ -513,16 +504,20 @@ namespace llvm {
       // Vector float/double to signed/unsigned integer.
       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
       // Scalar float/double to signed/unsigned integer.
-      CVTS2SI_RND, CVTS2UI_RND,
+      CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 
       // Vector float/double to signed/unsigned integer with truncation.
       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
       // Scalar float/double to signed/unsigned integer with truncation.
-      CVTTS2SI_RND, CVTTS2UI_RND,
+      CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
 
       // Vector signed/unsigned integer to float/double.
       CVTSI2P, CVTUI2P,
 
+      // Masked versions of above. Used for v2f64->v4f32.
+      // SRC, PASSTHRU, MASK
+      MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
+
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
       VASTART_SAVE_XMM_REGS,
@@ -570,6 +565,10 @@ namespace llvm {
       // Conversions between float and half-float.
       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 
+      // Masked version of above.
+      // SRC, RND, PASSTHRU, MASK
+      MCVTPS2PH,
+
       // Galois Field Arithmetic Instructions
       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 
@@ -588,7 +587,7 @@ namespace llvm {
 
       /// LOCK-prefixed arithmetic read-modify-write instructions.
       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
-      LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
+      LADD, LSUB, LOR, LXOR, LAND,
 
       // Load, scalar_to_vector, and zero extend.
       VZEXT_LOAD,
@@ -833,6 +832,8 @@ namespace llvm {
       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
     }
 
+    bool shouldSplatInsEltVarIndex(EVT VT) const override;
+
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
       return VT.isScalarInteger();
     }
@@ -866,10 +867,21 @@ namespace llvm {
                                              const SelectionDAG &DAG,
                                              unsigned Depth) const override;
 
-    SDValue unwrapAddress(SDValue N) const override;
+    bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
+                                                 const APInt &DemandedElts,
+                                                 APInt &KnownUndef,
+                                                 APInt &KnownZero,
+                                                 TargetLoweringOpt &TLO,
+                                                 unsigned Depth) const override;
 
-    bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
-                        int64_t &Offset) const override;
+    bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+                                           const APInt &DemandedBits,
+                                           const APInt &DemandedElts,
+                                           KnownBits &Known,
+                                           TargetLoweringOpt &TLO,
+                                           unsigned Depth) const override;
+
+    SDValue unwrapAddress(SDValue N) const override;
 
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
@@ -932,6 +944,8 @@ namespace llvm {
     /// the immediate into a register.
     bool isLegalAddImmediate(int64_t Imm) const override;
 
+    bool isLegalStoreImmediate(int64_t Imm) const override;
+
     /// Return the cost of the scaling factor used in the addressing
     /// mode represented by AM for this target, for a load/store
     /// of the specified type.
@@ -1030,13 +1044,25 @@ namespace llvm {
     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                            Type *Ty) const override;
 
+    bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+
     bool convertSelectOfConstantsToMath(EVT VT) const override;
 
+    bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+
+    bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+                                  bool IsSigned) const override;
+
     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
     /// with this index.
     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
                                  unsigned Index) const override;
 
+    /// Scalar ops always have equal or better analysis/performance/power than
+    /// the vector equivalent, so this always makes sense if the scalar op is
+    /// supported.
+    bool shouldScalarizeBinop(SDValue) const override;
+
     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
                                       unsigned AddrSpace) const override {
       // If we can replace more than 2 scalar stores, there will be a reduction
@@ -1095,7 +1121,7 @@ namespace llvm {
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
 
     /// Customize the preferred legalization strategy for certain types.
-    LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+    LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
 
     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
                                       EVT VT) const override;
@@ -1347,11 +1373,6 @@ namespace llvm {
     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
                                              MachineBasicBlock *MBB) const;
 
-    /// Emit nodes that will be selected as "test Op0,Op0", or something
-    /// equivalent, for use with the given x86 condition code.
-    SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
-                     SelectionDAG &DAG) const;
-
     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
@@ -1360,6 +1381,13 @@ namespace llvm {
     /// Convert a comparison if required by the subtarget.
     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
 
+    /// Emit flags for the given setcc condition and operands. Also returns the
+    /// corresponding X86 condition code constant in X86CC.
+    SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
+                              ISD::CondCode CC, const SDLoc &dl,
+                              SelectionDAG &DAG,
+                              SDValue &X86CC) const;
+
     /// Check if replacement of SQRT with RSQRT should be disabled.
     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
 
@@ -1407,9 +1435,9 @@ namespace llvm {
                          MachineMemOperand *MMO)
       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
 
-    const SDValue &getBasePtr() const { return getOperand(1); }
-    const SDValue &getMask()    const { return getOperand(2); }
-    const SDValue &getValue()   const { return getOperand(3); }
+    const SDValue &getValue()   const { return getOperand(1); }
+    const SDValue &getBasePtr() const { return getOperand(2); }
+    const SDValue &getMask()    const { return getOperand(3); }
 
     static bool classof(const SDNode *N) {
       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
@@ -1480,7 +1508,6 @@ namespace llvm {
     const SDValue &getBasePtr() const { return getOperand(3); }
     const SDValue &getIndex()   const { return getOperand(4); }
     const SDValue &getMask()    const { return getOperand(2); }
-    const SDValue &getValue()   const { return getOperand(1); }
     const SDValue &getScale()   const { return getOperand(5); }
 
     static bool classof(const SDNode *N) {
@@ -1496,6 +1523,8 @@ namespace llvm {
         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
                                        MMO) {}
 
+    const SDValue &getPassThru() const { return getOperand(1); }
+
     static bool classof(const SDNode *N) {
       return N->getOpcode() == X86ISD::MGATHER;
     }
@@ -1508,6 +1537,8 @@ namespace llvm {
         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
                                        MMO) {}
 
+    const SDValue &getValue() const { return getOperand(1); }
+
     static bool classof(const SDNode *N) {
       return N->getOpcode() == X86ISD::MSCATTER;
     }
diff --git a/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp
new file mode 100644
index 000000000000..30b46a09ef0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -0,0 +1,253 @@
+//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass applies cache prefetch instructions based on a profile. The pass
+// assumes DiscriminateMemOps ran immediately before, to ensure debug info
+// matches the one used at profile generation time. The profile is encoded in
+// afdo format (text or binary). It contains prefetch hints recommendations.
+// Each recommendation is made in terms of debug info locations, a type (i.e.
+// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
+// memory operand (see X86DiscriminateMemOps). The prefetch will be made for
+// a location at that memory operand + the delta specified in the
+// recommendation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+using namespace llvm;
+using namespace sampleprof;
+
+static cl::opt<std::string>
+    PrefetchHintsFile("prefetch-hints-file",
+                      cl::desc("Path to the prefetch hints profile."),
+                      cl::Hidden);
+namespace {
+
+class X86InsertPrefetch : public MachineFunctionPass {
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool doInitialization(Module &) override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  struct PrefetchInfo {
+    unsigned InstructionID;
+    int64_t Delta;
+  };
+  typedef SmallVectorImpl<PrefetchInfo> Prefetches;
+  bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
+                        Prefetches &prefetches) const;
+
+public:
+  static char ID;
+  X86InsertPrefetch(const std::string &PrefetchHintsFilename);
+  StringRef getPassName() const override {
+    return "X86 Insert Cache Prefetches";
+  }
+
+private:
+  std::string Filename;
+  std::unique_ptr<SampleProfileReader> Reader;
+};
+
+using PrefetchHints = SampleRecord::CallTargetMap;
+
+// Return any prefetching hints for the specified MachineInstruction. The hints
+// are returned as pairs (name, delta).
+ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
+                                        const MachineInstr &MI) {
+  if (const auto &Loc = MI.getDebugLoc())
+    if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
+      return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
+                                          Loc->getBaseDiscriminator());
+  return std::error_code();
+}
+
+// The prefetch instruction can't take memory operands involving vector
+// registers.
+bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
+  unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
+  unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
+  return (BaseReg == 0 ||
+          X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
+          X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
+         (IndexReg == 0 ||
+          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
+          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
+}
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//            Implementation
+//===----------------------------------------------------------------------===//
+
+char X86InsertPrefetch::ID = 0;
+
+X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
+    : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
+
+/// Return true if the provided MachineInstruction has cache prefetch hints. In
+/// that case, the prefetch hints are stored, in order, in the Prefetches
+/// vector.
+bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
+                                         const MachineInstr &MI,
+                                         Prefetches &Prefetches) const {
+  assert(Prefetches.empty() &&
+         "Expected caller passed empty PrefetchInfo vector.");
+  static const std::pair<const StringRef, unsigned> HintTypes[] = {
+      {"_nta_", X86::PREFETCHNTA},
+      {"_t0_", X86::PREFETCHT0},
+      {"_t1_", X86::PREFETCHT1},
+      {"_t2_", X86::PREFETCHT2},
+  };
+  static const char *SerializedPrefetchPrefix = "__prefetch";
+
+  const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
+  if (!T)
+    return false;
+  int16_t max_index = -1;
+  // Convert serialized prefetch hints into PrefetchInfo objects, and populate
+  // the Prefetches vector.
+  for (const auto &S_V : *T) {
+    StringRef Name = S_V.getKey();
+    if (Name.consume_front(SerializedPrefetchPrefix)) {
+      int64_t D = static_cast<int64_t>(S_V.second);
+      unsigned IID = 0;
+      for (const auto &HintType : HintTypes) {
+        if (Name.startswith(HintType.first)) {
+          Name = Name.drop_front(HintType.first.size());
+          IID = HintType.second;
+          break;
+        }
+      }
+      if (IID == 0)
+        return false;
+      uint8_t index = 0;
+      Name.consumeInteger(10, index);
+
+      if (index >= Prefetches.size())
+        Prefetches.resize(index + 1);
+      Prefetches[index] = {IID, D};
+      max_index = std::max(max_index, static_cast<int16_t>(index));
+    }
+  }
+  assert(max_index + 1 >= 0 &&
+         "Possible overflow: max_index + 1 should be positive.");
+  assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
+         "The number of prefetch hints received should match the number of "
+         "PrefetchInfo objects returned");
+  return !Prefetches.empty();
+}
+
+bool X86InsertPrefetch::doInitialization(Module &M) {
+  if (Filename.empty())
+    return false;
+
+  LLVMContext &Ctx = M.getContext();
+  ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
+      SampleProfileReader::create(Filename, Ctx);
+  if (std::error_code EC = ReaderOrErr.getError()) {
+    std::string Msg = "Could not open profile: " + EC.message();
+    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
+                                             DiagnosticSeverity::DS_Warning));
+    return false;
+  }
+  Reader = std::move(ReaderOrErr.get());
+  Reader->read();
+  return true;
+}
+
+void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+}
+
+bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
+  if (!Reader)
+    return false;
+  const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
+  if (!Samples)
+    return false;
+
+  bool Changed = false;
+
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  SmallVector<PrefetchInfo, 4> Prefetches;
+  for (auto &MBB : MF) {
+    for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
+      auto Current = MI;
+      ++MI;
+
+      int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
+      if (Offset < 0)
+        continue;
+      unsigned Bias = X86II::getOperandBias(Current->getDesc());
+      int MemOpOffset = Offset + Bias;
+      // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
+      if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
+        continue;
+      Prefetches.clear();
+      if (!findPrefetchInfo(Samples, *Current, Prefetches))
+        continue;
+      assert(!Prefetches.empty() &&
+             "The Prefetches vector should contain at least a value if "
+             "findPrefetchInfo returned true.");
+      for (auto &PrefInfo : Prefetches) {
+        unsigned PFetchInstrID = PrefInfo.InstructionID;
+        int64_t Delta = PrefInfo.Delta;
+        const MCInstrDesc &Desc = TII->get(PFetchInstrID);
+        MachineInstr *PFetch =
+            MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
+        MachineInstrBuilder MIB(MF, PFetch);
+
+        assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
+               X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
+               X86::AddrSegmentReg == 4 &&
+               "Unexpected change in X86 operand offset order.");
+
+        // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
+        // FIXME(mtrofin): consider adding a:
+        //     MachineInstrBuilder::set(unsigned offset, op).
+        MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
+            .addImm(
+                Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
+            .addReg(
+                Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
+            .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
+                    Delta)
+            .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
+                        .getReg());
+
+        if (!Current->memoperands_empty()) {
+          MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
+          MIB.addMemOperand(MF.getMachineMemOperand(
+              CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
+        }
+
+        // Insert before Current. This is because Current may clobber some of
+        // the registers used to describe the input memory operand.
+        MBB.insert(Current, PFetch);
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
+
+FunctionPass *llvm::createX86InsertPrefetchPass() {
+  return new X86InsertPrefetch(PrefetchHintsFile);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
index 46dc6bf7661a..49e9e924887a 100644
--- a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -38,7 +38,7 @@ multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
     [(set VR64:$dst, (!cast<Intrinsic>(
       !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
         (bitconvert (load_mmx addr:$src2))))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
@@ -51,7 +51,7 @@ multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
     [(set VR64:$dst, (!cast<Intrinsic>(
       !strconcat("int_x86_3dnow", Ver, "_", Mn))
         (bitconvert (load_mmx addr:$src))))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 defm PAVGUSB  : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 2d95061a8213..7423cb85acd2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -66,21 +66,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
 
   // Load patterns
-  // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
-  //       due to load promotion during legalization
-  PatFrag LdFrag = !cast<PatFrag>("load" #
-                                  !if (!eq (TypeVariantName, "i"),
-                                       !if (!eq (Size, 128), "v2i64",
-                                       !if (!eq (Size, 256), "v4i64",
-                                       !if (!eq (Size, 512), "v8i64",
-                                            VTName))), VTName));
-
-  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
-                                         !if (!eq (TypeVariantName, "i"),
-                                               !if (!eq (Size, 128), "v2i64",
-                                               !if (!eq (Size, 256), "v4i64",
-                                               !if (!eq (Size, 512), "v8i64",
-                                                   VTName))), VTName));
+  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
+
+  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
 
   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
 
@@ -107,10 +95,6 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
 
   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
 
-  // A vector tye of the same width with element type i64. This is used to
-  // create patterns for logic ops.
-  ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
-
   // A vector type of the same width with element type i32.  This is used to
   // create the canonical constant zero node ImmAllZerosV.
   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
@@ -518,13 +502,13 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
                    "$src3, $src2, $src1", "$src1, $src2, $src3",
                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
-                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
+                               (From.VT (From.LdFrag addr:$src2)),
                                (iPTR imm)),
                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
-                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
+                               (From.VT (From.LdFrag addr:$src2)),
                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
-                   Sched<[sched.Folded, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -547,7 +531,7 @@ multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
 
     def : Pat<(vinsert_insert:$ins
                   (To.VT To.RC:$src1),
-                  (From.VT (bitconvert (From.LdFrag addr:$src2))),
+                  (From.VT (From.LdFrag addr:$src2)),
                   (iPTR imm)),
               (To.VT (!cast<Instruction>(InstrStr#"rm")
                   To.RC:$src1, addr:$src2,
@@ -680,9 +664,7 @@ let Predicates = p in {
              (vselect Cast.KRCWM:$mask,
                       (bitconvert
                        (vinsert_insert:$ins (To.VT To.RC:$src1),
-                                            (From.VT
-                                             (bitconvert
-                                              (From.LdFrag addr:$src2))),
+                                            (From.VT (From.LdFrag addr:$src2)),
                                             (iPTR imm))),
                       Cast.ImmAllZerosV)),
             (!cast<Instruction>(InstrStr#"rmkz")
@@ -783,7 +765,7 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
                           imm:$src3))]>,
       EVEX_4V, EVEX_CD8<32, CD8VT1>,
-      Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1374,7 +1356,7 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
                            (_Dst.VT (X86SubVBroadcast
-                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+                             (_Src.VT (_Src.LdFrag addr:$src))))>,
                            Sched<[SchedWriteShuffle.YMM.Folded]>,
                            AVX5128IBase, EVEX;
 }
@@ -1389,7 +1371,7 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
                            (null_frag),
                            (_Dst.VT (X86SubVBroadcast
-                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+                             (_Src.VT (_Src.LdFrag addr:$src))))>,
                            Sched<[SchedWriteShuffle.YMM.Folded]>,
                            AVX5128IBase, EVEX;
 }
@@ -1442,11 +1424,11 @@ defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
 let Predicates = [HasAVX512] in {
 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
           (VBROADCASTF64X4rm addr:$src)>;
-def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
+def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
           (VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
+def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
           (VBROADCASTI64X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
+def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
           (VBROADCASTI64X4rm addr:$src)>;
 
 // Provide fallback in case the load node that is used in the patterns above
@@ -1474,9 +1456,9 @@ def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
           (VBROADCASTF32X4rm addr:$src)>;
 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
           (VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
           (VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
           (VBROADCASTI32X4rm addr:$src)>;
 
 // Patterns for selects of bitcasted operations.
@@ -1506,11 +1488,11 @@ def : Pat<(vselect VK8WM:$mask,
                    VR512:$src0),
           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
-                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
                    (bc_v8i64 (v16i32 immAllZerosV))),
           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
-                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
                    VR512:$src0),
           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 }
@@ -1527,9 +1509,9 @@ def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
           (VBROADCASTF32X4Z256rm addr:$src)>;
 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
           (VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
           (VBROADCASTI32X4Z256rm addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
           (VBROADCASTI32X4Z256rm addr:$src)>;
 
 // Patterns for selects of bitcasted operations.
@@ -1591,11 +1573,11 @@ def : Pat<(vselect VK4WM:$mask,
                    VR256X:$src0),
           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect VK4WM:$mask,
-                   (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
                    (bc_v4i64 (v8i32 immAllZerosV))),
           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect VK4WM:$mask,
-                   (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
                    VR256X:$src0),
           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
 }
@@ -1641,11 +1623,11 @@ def : Pat<(vselect VK8WM:$mask,
                    VR512:$src0),
           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
-                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
                    (bc_v8i64 (v16i32 immAllZerosV))),
           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
-                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
                    VR512:$src0),
           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 }
@@ -1741,8 +1723,8 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
             (ins _.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
-                   (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
-            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+                   (_.VT (_.LdFrag addr:$src3)))), 1>,
+            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -1758,7 +1740,7 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
               (_.VT (X86VPermt2 _.RC:$src2,
                IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
               AVX5128IBase, EVEX_4V, EVEX_B,
-              Sched<[sched.Folded, ReadAfterLd]>;
+              Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
@@ -1859,8 +1841,8 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
             (ins IdxVT.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
-                   (bitconvert (_.LdFrag addr:$src3)))), 1>,
-            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
+                   (_.LdFrag addr:$src3))), 1>,
+            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
@@ -1874,7 +1856,7 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
               (_.VT (X86VPermt2 _.RC:$src1,
                IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
               AVX5128IBase, EVEX_4V, EVEX_B,
-              Sched<[sched.Folded, ReadAfterLd]>;
+              Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
@@ -1955,19 +1937,19 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
              !strconcat(OpcodeStr,
              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-             Sched<[sched.Folded, ReadAfterLd]>;
+             Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
              !strconcat(OpcodeStr,
              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
-             Sched<[sched.Folded, ReadAfterLd]>;
+             Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
              !strconcat(OpcodeStr,
              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
-             Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
   }
   }
 }
@@ -1980,7 +1962,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
@@ -1988,7 +1970,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
-      Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
 
   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
       (ins _.RC:$src1, _.ScalarMemOp:$src2),
@@ -1996,7 +1978,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -2067,7 +2049,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
                     "$src2, $src1", "$src1, $src2",
                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
                         imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
                      (outs _.KRC:$dst),
@@ -2094,7 +2076,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
                         "vcmp"#_.Suffix,
                         "$cc, $src2, $src1", "$src1, $src2, $cc">,
                         EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
-                        Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+                        Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
 
     defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
                        (outs _.KRC:$dst),
@@ -2123,7 +2105,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
                                         (_.ScalarLdFrag addr:$src2),
                                         imm:$cc))]>,
               EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
-              Sched<[sched.Folded, ReadAfterLd]>;
+              Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -2149,8 +2131,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
-                                       (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
-             EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+                                       (_.VT (_.LdFrag addr:$src2))))]>,
+             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   let isCommutable = IsCommutable in
   def rrk : AVX512BI<opc, MRMSrcReg,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -2165,9 +2147,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
                           "$dst {${mask}}, $src1, $src2}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
                                    (OpNode (_.VT _.RC:$src1),
-                                       (_.VT (bitconvert
-                                              (_.LdFrag addr:$src2))))))]>,
-              EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+                                       (_.VT (_.LdFrag addr:$src2)))))]>,
+              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2180,7 +2161,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
-              EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmbk : AVX512BI<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
                                        _.ScalarMemOp:$src2),
@@ -2192,7 +2173,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
                                         (X86VBroadcast
                                           (_.ScalarLdFrag addr:$src2)))))]>,
                EVEX_4V, EVEX_K, EVEX_B,
-               Sched<[sched.Folded, ReadAfterLd]>;
+               Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@@ -2291,9 +2272,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
              [(set _.KRC:$dst, (_.KVT
                                 (Frag:$cc
                                  (_.VT _.RC:$src1),
-                                 (_.VT (bitconvert (_.LdFrag addr:$src2))),
+                                 (_.VT (_.LdFrag addr:$src2)),
                                  cond)))]>,
-             EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   let isCommutable = 1 in
   def rrik : AVX512AIi8<opc, MRMSrcReg,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
@@ -2316,10 +2297,9 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
                                      (_.KVT
                                       (Frag:$cc
                                        (_.VT _.RC:$src1),
-                                       (_.VT (bitconvert
-                                              (_.LdFrag addr:$src2))),
+                                       (_.VT (_.LdFrag addr:$src2)),
                                        cond))))]>,
-              EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2333,7 +2313,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
                (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
                           "$dst, $src1, $src2, $cc}"), []>,
-               EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
                                        u8imm:$cc),
@@ -2348,17 +2328,17 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
                !strconcat("vpcmp", Suffix,
                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2, $cc}"), []>,
-               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
+               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
                NotMemoryFoldable;
   }
 
-  def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+  def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
                                  (_.VT _.RC:$src1), cond)),
             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
 
   def : Pat<(and _.KRCWM:$mask,
-                 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
+                 (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
                                       (_.VT _.RC:$src1), cond))),
             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
@@ -2380,7 +2360,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
                                        (X86VBroadcast
                                         (_.ScalarLdFrag addr:$src2)),
                                        cond)))]>,
-             EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmibk : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
                                        _.ScalarMemOp:$src2, AVX512ICC:$cc),
@@ -2393,7 +2373,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
                                              (X86VBroadcast
                                               (_.ScalarLdFrag addr:$src2)),
                                              cond))))]>,
-              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
@@ -2403,7 +2383,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
                !strconcat("vpcmp", Suffix,
                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
-               EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+               EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
                NotMemoryFoldable;
     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
@@ -2411,7 +2391,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
                !strconcat("vpcmp", Suffix,
                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
-               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
                NotMemoryFoldable;
   }
 
@@ -2544,9 +2524,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                 "vcmp${cc}"#_.Suffix,
                 "$src2, $src1", "$src1, $src2",
                 (X86cmpm (_.VT _.RC:$src1),
-                        (_.VT (bitconvert (_.LdFrag addr:$src2))),
+                        (_.VT (_.LdFrag addr:$src2)),
                         imm:$cc)>,
-                Sched<[sched.Folded, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
                 (outs _.KRC:$dst),
@@ -2557,7 +2537,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                 (X86cmpm (_.VT _.RC:$src1),
                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
                         imm:$cc)>,
-                EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
     defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
@@ -2573,7 +2553,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                              (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
                              "vcmp"#_.Suffix,
                              "$cc, $src2, $src1", "$src1, $src2, $cc">,
-                             Sched<[sched.Folded, ReadAfterLd]>,
+                             Sched<[sched.Folded, sched.ReadAfterFold]>,
                              NotMemoryFoldable;
 
       defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
@@ -2582,7 +2562,7 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                          "vcmp"#_.Suffix,
                          "$cc, ${src2}"##_.BroadcastStr##", $src1",
                          "$src1, ${src2}"##_.BroadcastStr##", $cc">,
-                         EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
                          NotMemoryFoldable;
     }
   }
@@ -2694,7 +2674,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     [(set _.KRC:$dst,
                           (OpNode _.ScalarIntMemCPat:$src1,
                                   (i32 imm:$src2)))]>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
                     OpcodeStr##_.Suffix##
@@ -2702,7 +2682,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
                         (OpNode _.ScalarIntMemCPat:$src1,
                             (i32 imm:$src2))))]>,
-                    EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -2732,17 +2712,17 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     OpcodeStr##_.Suffix##mem#
                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set _.KRC:$dst,(OpNode
-                                     (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                                     (_.VT (_.LdFrag addr:$src1)),
                                      (i32 imm:$src2)))]>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
                     OpcodeStr##_.Suffix##mem#
                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
-                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                                  (_.VT (_.LdFrag addr:$src1)),
                                   (i32 imm:$src2))))]>,
-                    EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
@@ -2752,7 +2732,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                      (_.VT (X86VBroadcast
                                            (_.ScalarLdFrag addr:$src1))),
                                      (i32 imm:$src2)))]>,
-                    EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
@@ -2762,7 +2742,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                      (_.VT (X86VBroadcast
                                            (_.ScalarLdFrag addr:$src1))),
                                      (i32 imm:$src2))))]>,
-                    EVEX_B, EVEX_K,  Sched<[sched.Folded, ReadAfterLd]>;
+                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -2907,8 +2887,15 @@ let Predicates = [HasDQI] in {
 let Predicates = [HasAVX512] in {
   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
+  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
+            (KMOVWkm addr:$src)>;
 }
 
+def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
+                                              SDTCVecEltisVT<1, i1>,
+                                              SDTCisPtrTy<2>]>>;
+
 let Predicates = [HasAVX512] in {
   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
@@ -2916,6 +2903,12 @@ let Predicates = [HasAVX512] in {
 
     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
+
+    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
+              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
+
+    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
+              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
   }
 
   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
@@ -3353,7 +3346,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     !if(NoRMPattern, [],
                         [(set _.RC:$dst,
-                          (_.VT (bitconvert (ld_frag addr:$src))))]),
+                          (_.VT (ld_frag addr:$src)))]),
                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
 
@@ -3372,7 +3365,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
                       "${dst} {${mask}}, $src1}"),
                      [(set _.RC:$dst, (_.VT
                          (vselect _.KRCWM:$mask,
-                          (_.VT (bitconvert (ld_frag addr:$src1))),
+                          (_.VT (ld_frag addr:$src1)),
                            (_.VT _.RC:$src0))))], _.ExeDomain>,
                      EVEX, EVEX_K, Sched<[Sched.RM]>;
   }
@@ -3381,7 +3374,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
                                 "${dst} {${mask}} {z}, $src}",
                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
-                    (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
+                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
   }
   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
@@ -3474,7 +3467,7 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
                NotMemoryFoldable;
 
-  def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
+  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
                                                         _.KRCWM:$mask, _.RC:$src)>;
 
@@ -3681,6 +3674,20 @@ let Predicates = [HasBWI, NoVLX] in {
 }
 
 let Predicates = [HasAVX512] in {
+  // 512-bit load.
+  def : Pat<(alignedloadv16i32 addr:$src),
+            (VMOVDQA64Zrm addr:$src)>;
+  def : Pat<(alignedloadv32i16 addr:$src),
+            (VMOVDQA64Zrm addr:$src)>;
+  def : Pat<(alignedloadv64i8 addr:$src),
+            (VMOVDQA64Zrm addr:$src)>;
+  def : Pat<(loadv16i32 addr:$src),
+            (VMOVDQU64Zrm addr:$src)>;
+  def : Pat<(loadv32i16 addr:$src),
+            (VMOVDQU64Zrm addr:$src)>;
+  def : Pat<(loadv64i8 addr:$src),
+            (VMOVDQU64Zrm addr:$src)>;
+
   // 512-bit store.
   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
@@ -3697,6 +3704,20 @@ let Predicates = [HasAVX512] in {
 }
 
 let Predicates = [HasVLX] in {
+  // 128-bit load.
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (VMOVDQA64Z128rm addr:$src)>;
+  def : Pat<(alignedloadv8i16 addr:$src),
+            (VMOVDQA64Z128rm addr:$src)>;
+  def : Pat<(alignedloadv16i8 addr:$src),
+            (VMOVDQA64Z128rm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (VMOVDQU64Z128rm addr:$src)>;
+  def : Pat<(loadv8i16 addr:$src),
+            (VMOVDQU64Z128rm addr:$src)>;
+  def : Pat<(loadv16i8 addr:$src),
+            (VMOVDQU64Z128rm addr:$src)>;
+
   // 128-bit store.
   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
@@ -3711,6 +3732,20 @@ let Predicates = [HasVLX] in {
   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
 
+  // 256-bit load.
+  def : Pat<(alignedloadv8i32 addr:$src),
+            (VMOVDQA64Z256rm addr:$src)>;
+  def : Pat<(alignedloadv16i16 addr:$src),
+            (VMOVDQA64Z256rm addr:$src)>;
+  def : Pat<(alignedloadv32i8 addr:$src),
+            (VMOVDQA64Z256rm addr:$src)>;
+  def : Pat<(loadv8i32 addr:$src),
+            (VMOVDQU64Z256rm addr:$src)>;
+  def : Pat<(loadv16i16 addr:$src),
+            (VMOVDQU64Z256rm addr:$src)>;
+  def : Pat<(loadv32i8 addr:$src),
+            (VMOVDQU64Z256rm addr:$src)>;
+
   // 256-bit store.
   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
@@ -4029,10 +4064,10 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
                                         dag Mask, RegisterClass MaskRC> {
 
-def : Pat<(masked_store addr:$dst, Mask,
+def : Pat<(masked_store
              (_.info512.VT (insert_subvector undef,
                                (_.info128.VT _.info128.RC:$src),
-                               (iPTR 0)))),
+                               (iPTR 0))), addr:$dst, Mask),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4044,10 +4079,10 @@ multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
                                                dag Mask, RegisterClass MaskRC,
                                                SubRegIndex subreg> {
 
-def : Pat<(masked_store addr:$dst, Mask,
+def : Pat<(masked_store
              (_.info512.VT (insert_subvector undef,
                                (_.info128.VT _.info128.RC:$src),
-                               (iPTR 0)))),
+                               (iPTR 0))), addr:$dst, Mask),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4064,16 +4099,16 @@ multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
                                                SubRegIndex subreg> {
 
 // AVX512F pattern.
-def : Pat<(masked_store addr:$dst, Mask512,
+def : Pat<(masked_store
              (_.info512.VT (insert_subvector undef,
                                (_.info128.VT _.info128.RC:$src),
-                               (iPTR 0)))),
+                               (iPTR 0))), addr:$dst, Mask512),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
 
 // AVX512VL pattern.
-def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
+def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
@@ -4421,8 +4456,6 @@ let Predicates = [HasAVX512] in {
             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
-  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
   def : Pat<(v2f64 (X86vzload addr:$src)),
             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
 
@@ -4497,7 +4530,7 @@ let Predicates = [HasAVX512] in {
             (VMOVDI2PDIZrm addr:$src)>;
   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
             (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
             (VMOVDI2PDIZrm addr:$src)>;
   def : Pat<(v4i32 (X86vzload addr:$src)),
             (VMOVDI2PDIZrm addr:$src)>;
@@ -4593,6 +4626,12 @@ let Predicates = [HasAVX512], AddedComplexity = 400 in {
             (VMOVNTDQAZrm addr:$src)>;
   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZrm addr:$src)>;
+  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZrm addr:$src)>;
+  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZrm addr:$src)>;
+  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZrm addr:$src)>;
 }
 
 let Predicates = [HasVLX], AddedComplexity = 400 in {
@@ -4609,6 +4648,12 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
             (VMOVNTDQAZ256rm addr:$src)>;
   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ256rm addr:$src)>;
+  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZ256rm addr:$src)>;
+  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZ256rm addr:$src)>;
+  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZ256rm addr:$src)>;
 
   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
@@ -4623,6 +4668,12 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
             (VMOVNTDQAZ128rm addr:$src)>;
   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ128rm addr:$src)>;
+  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZ128rm addr:$src)>;
+  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZ128rm addr:$src)>;
+  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAZ128rm addr:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4641,10 +4692,9 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                   "$src2, $src1", "$src1, $src2",
-                  (_.VT (OpNode _.RC:$src1,
-                                (bitconvert (_.LdFrag addr:$src2))))>,
+                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
                   AVX512BIBase, EVEX_4V,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4659,7 +4709,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                 (X86VBroadcast
                                     (_.ScalarLdFrag addr:$src2))))>,
                   AVX512BIBase, EVEX_4V, EVEX_B,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4773,9 +4823,9 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
                         "$src2, $src1", "$src1, $src2",
                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
-                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
+                                      (_Src.LdFrag addr:$src2)))>,
                         AVX512BIBase, EVEX_4V,
-                        Sched<[sched.Folded, ReadAfterLd]>;
+                        Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
@@ -4786,20 +4836,20 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
                                  (_Brdct.VT (X86VBroadcast
                                           (_Brdct.ScalarLdFrag addr:$src2))))))>,
                     AVX512BIBase, EVEX_4V, EVEX_B,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
                                     SchedWriteVecALU, 1>;
 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
                                     SchedWriteVecALU, 0>;
-defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
+defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
                                     SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
+defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
                                     SchedWriteVecALU, HasBWI, 0>;
-defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
+defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
                                      SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
+defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
                                      SchedWriteVecALU, HasBWI, 0>;
 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
@@ -4859,7 +4909,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                  (_Src.VT (X86VBroadcast
                                           (_Src.ScalarLdFrag addr:$src2))))))>,
                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
@@ -4878,9 +4928,9 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
                         "$src2, $src1", "$src1, $src2",
                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
-                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
+                                      (_Src.LdFrag addr:$src2)))>,
                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
@@ -5046,95 +5096,356 @@ let Predicates = [HasAVX512, NoVLX] in {
 // AVX-512  Logical Instructions
 //===----------------------------------------------------------------------===//
 
-// OpNodeMsk is the OpNode to use when element size is important. OpNode will
-// be set to null_frag for 32-bit elements.
-multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
-                           SDPatternOperator OpNode,
-                           SDNode OpNodeMsk, X86FoldableSchedWrite sched,
-                           X86VectorVTInfo _, bit IsCommutable = 0> {
-  let hasSideEffects = 0 in
-  defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
-                    "$src2, $src1", "$src1, $src2",
-                    (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
-                                     (bitconvert (_.VT _.RC:$src2)))),
-                    (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
-                                                          _.RC:$src2)))),
-                    IsCommutable>, AVX512BIBase, EVEX_4V,
-                    Sched<[sched]>;
+defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+                                   SchedWriteVecLogic, HasAVX512, 1>;
+defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+                                  SchedWriteVecLogic, HasAVX512, 1>;
+defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+                                   SchedWriteVecLogic, HasAVX512, 1>;
+defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+                                    SchedWriteVecLogic, HasAVX512>;
 
-  let hasSideEffects = 0, mayLoad = 1 in
-  defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
-                  "$src2, $src1", "$src1, $src2",
-                  (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
-                                   (bitconvert (_.LdFrag addr:$src2)))),
-                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
-                                     (bitconvert (_.LdFrag addr:$src2))))))>,
-                  AVX512BIBase, EVEX_4V,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
+            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
+            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
+            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
+            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
+            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
+            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
+            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
+  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
+            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
+
+  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
+            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
+            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
+
+  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
+            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
+            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
+            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
+            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
+            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
+            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
+
+  def : Pat<(and VR128X:$src1,
+                 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
+  def : Pat<(or VR128X:$src1,
+                (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
+  def : Pat<(xor VR128X:$src1,
+                 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128X:$src1,
+                      (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
+
+  def : Pat<(and VR128X:$src1,
+                 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
+  def : Pat<(or VR128X:$src1,
+                (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
+  def : Pat<(xor VR128X:$src1,
+                 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128X:$src1,
+                      (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
+
+  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
+            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
+            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
+            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
+            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
+            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
+            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
+            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
+  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
+            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
+
+  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
+            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
+  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
+            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
+
+  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
+            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
+  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
+            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
+            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
+  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
+            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
+            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
+            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
+
+  def : Pat<(and VR256X:$src1,
+                 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
+  def : Pat<(or VR256X:$src1,
+                (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
+  def : Pat<(xor VR256X:$src1,
+                 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256X:$src1,
+                      (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
+
+  def : Pat<(and VR256X:$src1,
+                 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
+  def : Pat<(or VR256X:$src1,
+                (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
+  def : Pat<(xor VR256X:$src1,
+                 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256X:$src1,
+                      (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
 }
 
-// OpNodeMsk is the OpNode to use where element size is important. So use
-// for all of the broadcast patterns.
-multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
-                            SDPatternOperator OpNode,
-                            SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
-                            bit IsCommutable = 0> :
-           avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
-                           IsCommutable> {
-  defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
-                  "${src2}"##_.BroadcastStr##", $src1",
-                  "$src1, ${src2}"##_.BroadcastStr,
-                  (_.i64VT (OpNodeMsk _.RC:$src1,
-                                   (bitconvert
-                                    (_.VT (X86VBroadcast
-                                            (_.ScalarLdFrag addr:$src2)))))),
-                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
-                                     (bitconvert
-                                      (_.VT (X86VBroadcast
-                                             (_.ScalarLdFrag addr:$src2))))))))>,
-                  AVX512BIBase, EVEX_4V, EVEX_B,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+let Predicates = [HasAVX512] in {
+  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
+            (VPANDQZrr VR512:$src1, VR512:$src2)>;
+  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
+            (VPANDQZrr VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
+            (VPORQZrr VR512:$src1, VR512:$src2)>;
+  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
+            (VPORQZrr VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
+            (VPXORQZrr VR512:$src1, VR512:$src2)>;
+  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
+            (VPXORQZrr VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
+            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
+  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
+            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
+
+  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
+            (VPANDQZrm VR512:$src1, addr:$src2)>;
+  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
+            (VPANDQZrm VR512:$src1, addr:$src2)>;
+
+  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
+            (VPORQZrm VR512:$src1, addr:$src2)>;
+  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
+            (VPORQZrm VR512:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
+            (VPXORQZrm VR512:$src1, addr:$src2)>;
+  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
+            (VPXORQZrm VR512:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
+            (VPANDNQZrm VR512:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
+            (VPANDNQZrm VR512:$src1, addr:$src2)>;
+
+  def : Pat<(and VR512:$src1,
+                 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPANDDZrmb VR512:$src1, addr:$src2)>;
+  def : Pat<(or VR512:$src1,
+                (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPORDZrmb VR512:$src1, addr:$src2)>;
+  def : Pat<(xor VR512:$src1,
+                 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPXORDZrmb VR512:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR512:$src1,
+                      (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
+            (VPANDNDZrmb VR512:$src1, addr:$src2)>;
+
+  def : Pat<(and VR512:$src1,
+                 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPANDQZrmb VR512:$src1, addr:$src2)>;
+  def : Pat<(or VR512:$src1,
+                (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPORQZrmb VR512:$src1, addr:$src2)>;
+  def : Pat<(xor VR512:$src1,
+                 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPXORQZrmb VR512:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR512:$src1,
+                      (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
+            (VPANDNQZrmb VR512:$src1, addr:$src2)>;
+}
+
+// Patterns to catch vselect with different type than logic op.
+multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
+                                    X86VectorVTInfo _,
+                                    X86VectorVTInfo IntInfo> {
+  // Masked register-register logical operations.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, _.RC:$src2)>;
+
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
+             _.RC:$src2)>;
+
+  // Masked register-memory logical operations.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
+                                            (load addr:$src2)))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
+                                            (load addr:$src2)))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
+             addr:$src2)>;
 }
 
-multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
-                               SDPatternOperator OpNode,
-                               SDNode OpNodeMsk, X86SchedWriteWidths sched,
-                               AVX512VLVectorVTInfo VTInfo,
-                               bit IsCommutable = 0> {
-  let Predicates = [HasAVX512] in
-    defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
-                              VTInfo.info512, IsCommutable>, EVEX_V512;
+multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
+                                         X86VectorVTInfo _,
+                                         X86VectorVTInfo IntInfo> {
+  // Register-broadcast logical operations.
+  def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
+                         (bitconvert (_.VT (X86VBroadcast
+                                            (_.ScalarLdFrag addr:$src2)))))),
+            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert
+                    (IntInfo.VT (OpNode _.RC:$src1,
+                                 (bitconvert (_.VT
+                                              (X86VBroadcast
+                                               (_.ScalarLdFrag addr:$src2))))))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert
+                    (IntInfo.VT (OpNode _.RC:$src1,
+                                 (bitconvert (_.VT
+                                              (X86VBroadcast
+                                               (_.ScalarLdFrag addr:$src2))))))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+}
 
-  let Predicates = [HasAVX512, HasVLX] in {
-    defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
-                                 VTInfo.info256, IsCommutable>, EVEX_V256;
-    defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
-                                 VTInfo.info128, IsCommutable>, EVEX_V128;
-  }
+multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
+                                         AVX512VLVectorVTInfo SelectInfo,
+                                         AVX512VLVectorVTInfo IntInfo> {
+let Predicates = [HasVLX] in {
+  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
+                                 IntInfo.info128>;
+  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
+                                 IntInfo.info256>;
+}
+let Predicates = [HasAVX512] in {
+  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
+                                 IntInfo.info512>;
+}
 }
 
-multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
-                                 SDNode OpNode, X86SchedWriteWidths sched,
-                                 bit IsCommutable = 0> {
-  defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
-                               avx512vl_i64_info, IsCommutable>,
-                               VEX_W, EVEX_CD8<64, CD8VF>;
-  defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
-                               avx512vl_i32_info, IsCommutable>,
-                               EVEX_CD8<32, CD8VF>;
-}
-
-defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
-                                   SchedWriteVecLogic, 1>;
-defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
-                                  SchedWriteVecLogic, 1>;
-defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
-                                   SchedWriteVecLogic, 1>;
-defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
-                                    SchedWriteVecLogic>;
+multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
+                                               AVX512VLVectorVTInfo SelectInfo,
+                                               AVX512VLVectorVTInfo IntInfo> {
+let Predicates = [HasVLX] in {
+  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
+                                       SelectInfo.info128, IntInfo.info128>;
+  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
+                                       SelectInfo.info256, IntInfo.info256>;
+}
+let Predicates = [HasAVX512] in {
+  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
+                                       SelectInfo.info512, IntInfo.info512>;
+}
+}
+
+multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
+  // i64 vselect with i32/i16/i8 logic op
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+                                       avx512vl_i32_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+                                       avx512vl_i16_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
+                                       avx512vl_i8_info>;
+
+  // i32 vselect with i64/i16/i8 logic op
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+                                       avx512vl_i64_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+                                       avx512vl_i16_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
+                                       avx512vl_i8_info>;
+
+  // f32 vselect with i64/i32/i16/i8 logic op
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+                                       avx512vl_i64_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+                                       avx512vl_i32_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+                                       avx512vl_i16_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
+                                       avx512vl_i8_info>;
+
+  // f64 vselect with i64/i32/i16/i8 logic op
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+                                       avx512vl_i64_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+                                       avx512vl_i32_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+                                       avx512vl_i16_info>;
+  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
+                                       avx512vl_i8_info>;
+
+  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
+                                             avx512vl_f32_info,
+                                             avx512vl_i32_info>;
+  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
+                                             avx512vl_f64_info,
+                                             avx512vl_i64_info>;
+}
+
+defm : avx512_logical_lowering_types<"VPAND", and>;
+defm : avx512_logical_lowering_types<"VPOR",  or>;
+defm : avx512_logical_lowering_types<"VPXOR", xor>;
+defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  FP arithmetic
@@ -5157,7 +5468,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          (_.VT (VecNode _.RC:$src1,
                                         _.ScalarIntMemCPat:$src2,
                                         (i32 FROUND_CURRENT)))>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
                          (ins _.FRC:$src1, _.FRC:$src2),
@@ -5171,7 +5482,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src2)))]>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
   }
 }
@@ -5202,7 +5513,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (VecNode _.RC:$src1,
                                         _.ScalarIntMemCPat:$src2))>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5217,7 +5528,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src2)))]>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 
   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5285,7 +5596,7 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src2)))]>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
@@ -5320,7 +5631,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
                     "$src2, $src1", "$src1, $src2",
                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
-                    EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
                      "${src2}"##_.BroadcastStr##", $src1",
@@ -5328,7 +5639,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
                      (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
                                                 (_.ScalarLdFrag addr:$src2))))>,
                      EVEX_4V, EVEX_B,
-                     Sched<[sched.Folded, ReadAfterLd]>;
+                     Sched<[sched.Folded, sched.ReadAfterFold]>;
     }
   }
 }
@@ -5439,73 +5750,6 @@ defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
                                SchedWriteFLogicSizes, 1>;
 
-// Patterns catch floating point selects with bitcasted integer logic ops.
-multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
-                                      X86VectorVTInfo _, Predicate prd> {
-let Predicates = [prd] in {
-  // Masked register-register logical operations.
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
-                   _.RC:$src0)),
-            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
-             _.RC:$src1, _.RC:$src2)>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
-                   _.ImmAllZerosV)),
-            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
-             _.RC:$src2)>;
-  // Masked register-memory logical operations.
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                   (bitconvert (_.i64VT (OpNode _.RC:$src1,
-                                         (load addr:$src2)))),
-                   _.RC:$src0)),
-            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
-             _.RC:$src1, addr:$src2)>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                   (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
-                   _.ImmAllZerosV)),
-            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
-             addr:$src2)>;
-  // Register-broadcast logical operations.
-  def : Pat<(_.i64VT (OpNode _.RC:$src1,
-                      (bitconvert (_.VT (X86VBroadcast
-                                         (_.ScalarLdFrag addr:$src2)))))),
-            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                   (bitconvert
-                    (_.i64VT (OpNode _.RC:$src1,
-                              (bitconvert (_.VT
-                                           (X86VBroadcast
-                                            (_.ScalarLdFrag addr:$src2))))))),
-                   _.RC:$src0)),
-            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
-             _.RC:$src1, addr:$src2)>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                   (bitconvert
-                    (_.i64VT (OpNode _.RC:$src1,
-                              (bitconvert (_.VT
-                                           (X86VBroadcast
-                                            (_.ScalarLdFrag addr:$src2))))))),
-                   _.ImmAllZerosV)),
-            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
-             _.RC:$src1, addr:$src2)>;
-}
-}
-
-multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
-  defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
-  defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
-  defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
-  defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
-  defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
-  defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
-}
-
-defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
-defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
-defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
-defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
-
 let Predicates = [HasVLX,HasDQI] in {
   // Use packed logical operations for scalar ops.
   def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
@@ -5563,7 +5807,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
-                  EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
                    "${src2}"##_.BroadcastStr##", $src1",
@@ -5571,7 +5815,7 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
                                               (_.ScalarLdFrag addr:$src2))),
                                               (i32 FROUND_CURRENT))>,
-                   EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -5588,7 +5832,7 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                   "$src2, $src1", "$src1, $src2",
                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
                           (i32 FROUND_CURRENT))>,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -5635,18 +5879,15 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
-                   (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
-                           _.ImmAllZerosV)>,
+                   (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
                    EVEX_4V, Sched<[sched]>;
   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (OpNode (bitconvert
-                            (_.i64VT (and _.RC:$src1,
-                                          (bitconvert (_.LdFrag addr:$src2))))),
+                   (OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
                            _.ImmAllZerosV)>,
                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                   Sched<[sched.Folded, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 
   // Patterns for compare with 0 that just use the same source twice.
@@ -5671,13 +5912,13 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
                                         (_.ScalarLdFrag addr:$src2))),
                             _.ImmAllZerosV)>,
                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 // Use 512bit version to implement 128/256 bit in case NoVLX.
 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
                                   X86VectorVTInfo _, string Name> {
-  def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+  def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
                            _.ImmAllZerosV)),
             (_.KVT (COPY_TO_REGCLASS
                      (!cast<Instruction>(Name # "Zrr")
@@ -5688,7 +5929,7 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
                    _.KRC))>;
 
   def : Pat<(_.KVT (and _.KRC:$mask,
-                        (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
+                        (OpNode (and _.RC:$src1, _.RC:$src2),
                                 _.ImmAllZerosV))),
             (COPY_TO_REGCLASS
              (!cast<Instruction>(Name # "Zrrk")
@@ -5765,7 +6006,7 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
                             v16i8x_info, NAME#"B">, EVEX_V128;
   }
 
-  let Predicates = [HasAVX512, NoVLX] in {
+  let Predicates = [HasBWI, NoVLX] in {
   defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
   defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
   defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
@@ -5791,6 +6032,125 @@ defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
                                          SchedWriteVecLogic>, T8XS;
 
+
+multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
+                                       X86VectorVTInfo _,
+                                       X86VectorVTInfo AndInfo> {
+  def : Pat<(_.KVT (OpNode (bitconvert
+                            (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+                           _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
+
+  def : Pat<(_.KVT (and _.KRC:$mask,
+                    (OpNode (bitconvert
+                             (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+                            _.ImmAllZerosV))),
+            (!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
+                                                  _.RC:$src2)>;
+
+  def : Pat<(_.KVT (OpNode (bitconvert
+                            (AndInfo.VT (and _.RC:$src1,
+                                             (AndInfo.LdFrag addr:$src2)))),
+                           _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
+
+  def : Pat<(_.KVT (and _.KRC:$mask,
+                    (OpNode (bitconvert
+                             (AndInfo.VT (and _.RC:$src1,
+                                              (AndInfo.LdFrag addr:$src2)))),
+                            _.ImmAllZerosV))),
+            (!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
+                                                  addr:$src2)>;
+}
+
+// Patterns to use 512-bit instructions when 128/256 are not available.
+multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
+                                            X86VectorVTInfo _,
+                                            X86VectorVTInfo AndInfo,
+                                            X86VectorVTInfo ExtendInfo> {
+  def : Pat<(_.KVT (OpNode (bitconvert
+                            (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+                           _.ImmAllZerosV)),
+            (_.KVT (COPY_TO_REGCLASS
+                     (!cast<Instruction>(InstrStr#"rr")
+                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+                                      _.RC:$src1, _.SubRegIdx),
+                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+                                      _.RC:$src2, _.SubRegIdx)),
+                   _.KRC))>;
+
+  def : Pat<(_.KVT (and _.KRC:$mask,
+                    (OpNode (bitconvert
+                             (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
+                            _.ImmAllZerosV))),
+            (COPY_TO_REGCLASS
+             (!cast<Instruction>(InstrStr#"rrk")
+              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
+              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+                             _.RC:$src1, _.SubRegIdx),
+              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
+                             _.RC:$src2, _.SubRegIdx)),
+             _.KRC)>;
+}
+
+multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
+                                        Predicate prd,
+                                        AVX512VLVectorVTInfo CmpInfo,
+                                        AVX512VLVectorVTInfo AndInfo> {
+let Predicates = [prd, HasVLX] in {
+  defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
+                                     CmpInfo.info128, AndInfo.info128>;
+  defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
+                                     CmpInfo.info256, AndInfo.info256>;
+}
+let Predicates = [prd] in {
+  defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
+                                     CmpInfo.info512, AndInfo.info512>;
+}
+
+let Predicates = [prd, NoVLX] in {
+  defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+                                          CmpInfo.info128, AndInfo.info128,
+                                          CmpInfo.info512>;
+  defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
+                                          CmpInfo.info256, AndInfo.info256,
+                                          CmpInfo.info512>;
+}
+}
+
+multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
+  defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+                                      avx512vl_i8_info, avx512vl_i16_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+                                      avx512vl_i8_info, avx512vl_i32_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
+                                      avx512vl_i8_info, avx512vl_i64_info>;
+
+  defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+                                      avx512vl_i16_info, avx512vl_i8_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+                                      avx512vl_i16_info, avx512vl_i32_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
+                                      avx512vl_i16_info, avx512vl_i64_info>;
+
+  defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+                                      avx512vl_i32_info, avx512vl_i8_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+                                      avx512vl_i32_info, avx512vl_i16_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
+                                      avx512vl_i32_info, avx512vl_i64_info>;
+
+  defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+                                      avx512vl_i64_info, avx512vl_i8_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+                                      avx512vl_i64_info, avx512vl_i16_info>;
+  defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
+                                      avx512vl_i64_info, avx512vl_i32_info>;
+}
+
+defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
+defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
+
 //===----------------------------------------------------------------------===//
 // AVX-512  Shift instructions
 //===----------------------------------------------------------------------===//
@@ -5807,7 +6167,7 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
                           (i8 imm:$src2)))>,
                    Sched<[sched.Folded]>;
   }
@@ -5826,7 +6186,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
 
 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86FoldableSchedWrite sched, ValueType SrcVT,
-                            PatFrag bc_frag, X86VectorVTInfo _> {
+                            X86VectorVTInfo _> {
    // src2 is always 128-bit
   let ExeDomain = _.ExeDomain in {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5837,26 +6197,26 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
+                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
                    AVX512BIBase,
-                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               X86SchedWriteWidths sched, ValueType SrcVT,
-                              PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
+                              AVX512VLVectorVTInfo VTInfo,
                               Predicate prd> {
   let Predicates = [prd] in
   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
-                               bc_frag, VTInfo.info512>, EVEX_V512,
+                               VTInfo.info512>, EVEX_V512,
                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
   let Predicates = [prd, HasVLX] in {
   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
-                               bc_frag, VTInfo.info256>, EVEX_V256,
+                               VTInfo.info256>, EVEX_V256,
                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
-                               bc_frag, VTInfo.info128>, EVEX_V128,
+                               VTInfo.info128>, EVEX_V128,
                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
   }
 }
@@ -5866,12 +6226,12 @@ multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
                               X86SchedWriteWidths sched,
                               bit NotEVEX2VEXConvertibleQ = 0> {
   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
-                              bc_v4i32, avx512vl_i32_info, HasAVX512>;
+                              avx512vl_i32_info, HasAVX512>;
   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
-                              bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
+                              avx512vl_i64_info, HasAVX512>, VEX_W;
   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
-                              bc_v2i64, avx512vl_i16_info, HasBWI>;
+                              avx512vl_i16_info, HasBWI>;
 }
 
 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
@@ -5991,9 +6351,9 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
                    (_.VT (OpNode _.RC:$src1,
-                   (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
+                   (_.VT (_.LdFrag addr:$src2))))>,
                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                   Sched<[sched.Folded, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -6007,7 +6367,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
                                                 (_.ScalarLdFrag addr:$src2)))))>,
                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6091,7 +6451,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
     def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
               (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
                _.RC:$src2)>;
-    def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
+    def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
                _.RC:$src1, addr:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
@@ -6099,7 +6459,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
               (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
                _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+                     (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
                      _.RC:$src0)),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
@@ -6108,7 +6468,7 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
               (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
                _.RC:$src1, _.RC:$src2)>;
     def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+                     (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
                      _.ImmAllZerosV)),
               (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
                _.RC:$src1, addr:$src2)>;
@@ -6333,9 +6693,9 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
                   "$src2, $src1", "$src1, $src2",
                   (_.VT (OpNode
                            _.RC:$src1,
-                           (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
+                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
                    "${src2}"##_.BroadcastStr##", $src1",
@@ -6345,7 +6705,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
                             (Ctrl.VT (X86VBroadcast
                                        (Ctrl.ScalarLdFrag addr:$src2)))))>,
                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
-                   Sched<[sched.Folded, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
@@ -6448,7 +6808,7 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
                      (OpNode _.RC:$src1,
                        (_.VT (bitconvert
                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
-                  Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
+                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
 }
 
 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
@@ -6524,7 +6884,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
           (ins _.RC:$src2, _.MemOp:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
-          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
             (ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6532,7 +6892,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
             !strconcat("$src2, ${src3}", _.BroadcastStr ),
             (OpNode _.RC:$src2,
              _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
-             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -6598,7 +6958,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
           (ins _.RC:$src2, _.MemOp:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
-          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
          (ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6607,7 +6967,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          (_.VT (OpNode _.RC:$src2,
                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
                       _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
-         Sched<[sched.Folded, ReadAfterLd]>;
+         Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -6675,7 +7035,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
           (ins _.RC:$src2, _.MemOp:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
-          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   // Pattern is 312 order so that the load is in a different place from the
   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6685,7 +7045,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          "$src2, ${src3}"##_.BroadcastStr,
          (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
                        _.RC:$src1, _.RC:$src2)), 1, 0>,
-         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -6750,7 +7110,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
-          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
 
   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
@@ -6767,7 +7127,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
                     !strconcat(OpcodeStr,
                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
+                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
 
     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
@@ -7069,7 +7429,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
           (ins _.RC:$src2, _.MemOp:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
-          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
+          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
             (ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7078,7 +7438,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
             (OpNode _.RC:$src2,
                     (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
                     _.RC:$src1)>,
-            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 } // Constraints = "$src1 = $dst"
@@ -7120,7 +7480,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched
       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
               (ins DstVT.FRC:$src1, x86memop:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
-              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   } // hasSideEffects = 0
   let isCodeGenOnly = 1 in {
     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
@@ -7139,7 +7499,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched
                         (OpNode (DstVT.VT DstVT.RC:$src1),
                                  (ld_frag addr:$src2),
                                  (i32 FROUND_CURRENT)))]>,
-                  EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }//isCodeGenOnly = 1
 }
 
@@ -7246,26 +7606,26 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
 
 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
                                   X86VectorVTInfo DstVT, SDNode OpNode,
+                                  SDNode OpNodeRnd,
                                   X86FoldableSchedWrite sched, string asm,
                                   string aliasStr,
                                   bit CodeGenOnly = 1> {
   let Predicates = [HasAVX512] in {
     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
+                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
                 EVEX, VEX_LIG, Sched<[sched]>;
     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
-                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
+                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
                  Sched<[sched]>;
     let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
                 [(set DstVT.RC:$dst, (OpNode
-                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
-                      (i32 FROUND_CURRENT)))]>,
-                EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
+                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
     def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
             (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7276,9 +7636,10 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
 
 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
                                           X86VectorVTInfo DstVT, SDNode OpNode,
+                                          SDNode OpNodeRnd,
                                           X86FoldableSchedWrite sched, string asm,
                                           string aliasStr> :
-  avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
+  avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
   let Predicates = [HasAVX512] in {
     def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
             (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
@@ -7287,52 +7648,31 @@ multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
 }
 
 // Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
-                                   X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
+                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
-                                   X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
+                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
-                                   X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
+                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
-                                   X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
+                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
-                                   X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
+                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
-                                   X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
+                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
-                                   X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
+defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
+                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
-                                   X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
+                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
-// The SSE version of these instructions are disabled for AVX512.
-// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
-let Predicates = [HasAVX512] in {
-  def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
-            (VCVTSS2SIZrr_Int VR128X:$src)>;
-  def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
-            (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
-  def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
-            (VCVTSS2SI64Zrr_Int VR128X:$src)>;
-  def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
-            (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
-  def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
-            (VCVTSD2SIZrr_Int VR128X:$src)>;
-  def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
-            (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
-  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
-            (VCVTSD2SI64Zrr_Int VR128X:$src)>;
-  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
-            (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
-} // HasAVX512
-
 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
 // which produce unnecessary vmovs{s,d} instructions
 let Predicates = [HasAVX512] in {
@@ -7420,8 +7760,9 @@ def : Pat<(v2f64 (X86Movsd
 // Convert float/double to signed/unsigned int 32/64 with truncation
 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
                             X86VectorVTInfo _DstRC, SDNode OpNode,
-                            SDNode OpNodeRnd, X86FoldableSchedWrite sched,
-                            string aliasStr, bit CodeGenOnly = 1>{
+                            SDNode OpNodeInt, SDNode OpNodeRnd,
+                            X86FoldableSchedWrite sched, string aliasStr,
+                            bit CodeGenOnly = 1>{
 let Predicates = [HasAVX512] in {
   let isCodeGenOnly = 1 in {
   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
@@ -7431,13 +7772,12 @@ let Predicates = [HasAVX512] in {
   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
-              EVEX, Sched<[sched.Folded, ReadAfterLd]>;
+              EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 
   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-           [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
-                                 (i32 FROUND_CURRENT)))]>,
+           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
            EVEX, VEX_LIG, Sched<[sched]>;
   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
@@ -7448,10 +7788,9 @@ let Predicates = [HasAVX512] in {
   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
               (ins _SrcRC.IntScalarMemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set _DstRC.RC:$dst, (OpNodeRnd
-                                     (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
-                                     (i32 FROUND_CURRENT)))]>,
-              EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+              [(set _DstRC.RC:$dst,
+                (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
+              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7463,9 +7802,10 @@ let Predicates = [HasAVX512] in {
 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
                                      X86VectorVTInfo _SrcRC,
                                      X86VectorVTInfo _DstRC, SDNode OpNode,
-                                     SDNode OpNodeRnd, X86FoldableSchedWrite sched,
+                                     SDNode OpNodeInt, SDNode OpNodeRnd,
+                                     X86FoldableSchedWrite sched,
                                      string aliasStr> :
-  avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
+  avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
                    aliasStr, 0> {
 let Predicates = [HasAVX512] in {
   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
@@ -7475,49 +7815,30 @@ let Predicates = [HasAVX512] in {
 }
 
 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
-                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
-                        XS, EVEX_CD8<32, CD8VT1>;
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
-                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
-                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
-                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
-                        XD, EVEX_CD8<64, CD8VT1>;
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
-                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
-                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
 
 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
-                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
-                        XS, EVEX_CD8<32, CD8VT1>;
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
-                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
-                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
-                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
-                        XD, EVEX_CD8<64, CD8VT1>;
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
-                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
-                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-
-let Predicates = [HasAVX512] in {
-  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
-            (VCVTTSS2SIZrr_Int VR128X:$src)>;
-  def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
-            (VCVTTSS2SIZrm_Int ssmem:$src)>;
-  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
-            (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
-  def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
-            (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
-  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
-            (VCVTTSD2SIZrr_Int VR128X:$src)>;
-  def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
-            (VCVTTSD2SIZrm_Int sdmem:$src)>;
-  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
-            (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
-  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
-            (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
-} // HasAVX512
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  Convert form float to double and back
@@ -7540,7 +7861,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
                                   (_Src.VT _Src.ScalarIntMemCPat:$src2),
                                   (i32 FROUND_CURRENT)))>,
                          EVEX_4V, VEX_LIG,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   let isCodeGenOnly = 1, hasSideEffects = 0 in {
     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -7551,7 +7872,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-               EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
+               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -7648,26 +7969,53 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                           X86VectorVTInfo _Src, SDNode OpNode,
                           X86FoldableSchedWrite sched,
                           string Broadcast = _.BroadcastStr,
-                          string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
+                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+                          RegisterClass MaskRC = _.KRCWM> {
 
-  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                         (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
-                         (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+  defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src),
+                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
+                         (ins MaskRC:$mask, _Src.RC:$src),
+                          OpcodeStr, "$src", "$src",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+                         (vselect MaskRC:$mask,
+                                  (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+                                  _.RC:$src0),
+                         vselect, "$src0 = $dst">,
                          EVEX, Sched<[sched]>;
 
-  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                         (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+  defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins MemOp:$src),
+                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
+                         (ins MaskRC:$mask, MemOp:$src),
+                         OpcodeStr#Alias, "$src", "$src",
                          (_.VT (OpNode (_Src.VT
-                             (bitconvert (_Src.LdFrag addr:$src)))))>,
+                             (_Src.LdFrag addr:$src)))),
+                         (vselect MaskRC:$mask,
+                                  (_.VT (OpNode (_Src.VT
+                                                 (_Src.LdFrag addr:$src)))),
+                                  _.RC:$src0),
+                         vselect, "$src0 = $dst">,
                          EVEX, Sched<[sched.Folded]>;
 
-  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                         (ins _Src.ScalarMemOp:$src), OpcodeStr,
+  defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _Src.ScalarMemOp:$src),
+                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
+                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
+                         OpcodeStr,
                          "${src}"##Broadcast, "${src}"##Broadcast,
                          (_.VT (OpNode (_Src.VT
                                   (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
-                            ))>, EVEX, EVEX_B,
-                         Sched<[sched.Folded]>;
+                            )),
+                         (vselect MaskRC:$mask,
+                                  (_.VT
+                                   (OpNode
+                                    (_Src.VT
+                                     (X86VBroadcast
+                                      (_Src.ScalarLdFrag addr:$src))))),
+                                  _.RC:$src0),
+                         vselect, "$src0 = $dst">,
+                         EVEX, EVEX_B, Sched<[sched.Folded]>;
 }
 // Coversion with SAE - suppress all exceptions
 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -7718,7 +8066,8 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
   }
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
-                               X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
+                               EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
 
@@ -7752,6 +8101,35 @@ let Predicates = [HasVLX] in {
               (VCVTPS2PDZ128rm addr:$src)>;
   def : Pat<(v4f64 (extloadv4f32 addr:$src)),
               (VCVTPS2PDZ256rm addr:$src)>;
+
+  // Special patterns to allow use of X86vmfpround for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(X86vfpround (v2f64 VR128X:$src)),
+            (VCVTPD2PSZ128rr VR128X:$src)>;
+  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(X86vfpround (loadv2f64 addr:$src)),
+            (VCVTPD2PSZ128rm addr:$src)>;
+  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
+            (VCVTPD2PSZ128rmb addr:$src)>;
+  def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          (v4f32 VR128X:$src0), VK2WM:$mask),
+            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
 }
 
 // Convert Signed/Unsigned Doubleword to Double
@@ -7836,7 +8214,8 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
     // due to the same reason.
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
-                               OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+                               VK2WM>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
 
@@ -7865,8 +8244,9 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
     // memory forms of these instructions in Asm Parcer. They have the same
     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
     // due to the same reason.
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
-                               sched.XMM, "{1to2}", "{x}">, EVEX_V128;
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
+                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+                               VK2WM>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
 
@@ -8149,6 +8529,122 @@ let Predicates = [HasVLX] in {
             (VCVTTPD2UDQZ256rr VR256X:$src)>;
   def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
             (VCVTTPD2UDQZ256rm addr:$src)>;
+
+  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
+            (VCVTPD2DQZ128rr VR128X:$src)>;
+  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
+            (VCVTPD2DQZ128rm addr:$src)>;
+  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+            (VCVTPD2DQZ128rmb addr:$src)>;
+  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          (v4i32 VR128X:$src0), VK2WM:$mask),
+            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
+            (VCVTTPD2DQZ128rr VR128X:$src)>;
+  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
+            (VCVTTPD2DQZ128rm addr:$src)>;
+  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+            (VCVTTPD2DQZ128rmb addr:$src)>;
+  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          (v4i32 VR128X:$src0), VK2WM:$mask),
+            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
+            (VCVTPD2UDQZ128rr VR128X:$src)>;
+  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
+            (VCVTPD2UDQZ128rm addr:$src)>;
+  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+            (VCVTPD2UDQZ128rmb addr:$src)>;
+  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                           (v4i32 VR128X:$src0), VK2WM:$mask),
+            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
+            (VCVTTPD2UDQZ128rr VR128X:$src)>;
+  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
+            (VCVTTPD2UDQZ128rm addr:$src)>;
+  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+                          VK2WM:$mask),
+            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+                          VK2WM:$mask),
+            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
+            (VCVTTPD2UDQZ128rmb addr:$src)>;
+  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          (v4i32 VR128X:$src0), VK2WM:$mask),
+            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
 }
 
 let Predicates = [HasDQI] in {
@@ -8365,8 +8861,7 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
                             (X86cvtph2ps (_src.VT
-                                          (bitconvert
-                                           (ld_frag addr:$src))))>,
+                                          (ld_frag addr:$src)))>,
                             T8PD, Sched<[sched.Folded]>;
 }
 
@@ -8381,17 +8876,17 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
 }
 
 let Predicates = [HasAVX512] in
-  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
+  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
                                     WriteCvtPH2PSZ>,
                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
 
 let Predicates = [HasVLX] in {
   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
-                       loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
+                       load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
                        EVEX_CD8<32, CD8VH>;
   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
-                       loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
+                       load, WriteCvtPH2PS>, EVEX, EVEX_V128,
                        EVEX_CD8<32, CD8VH>;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
@@ -8406,12 +8901,28 @@ let Predicates = [HasVLX] in {
 
 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
-  defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
-                   (ins _src.RC:$src1, i32u8imm:$src2),
-                   "vcvtps2ph", "$src2, $src1", "$src1, $src2",
-                   (X86cvtps2ph (_src.VT _src.RC:$src1),
-                                (i32 imm:$src2)), 0, 0>,
-                   AVX512AIi8Base, Sched<[RR]>;
+let ExeDomain = GenericDomain in {
+  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+             (ins _src.RC:$src1, i32u8imm:$src2),
+             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+             [(set _dest.RC:$dst,
+                   (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
+             Sched<[RR]>;
+  let Constraints = "$src0 = $dst" in
+  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+             [(set _dest.RC:$dst,
+                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
+             Sched<[RR]>, EVEX_K;
+  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
+             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
+             [(set _dest.RC:$dst,
+                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
+                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
+             Sched<[RR]>, EVEX_KZ;
   let hasSideEffects = 0, mayStore = 1 in {
     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
@@ -8423,6 +8934,7 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
   }
 }
+}
 
 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                                SchedWrite Sched> {
@@ -8483,7 +8995,7 @@ let Predicates = [HasVLX] in {
                (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
 }
 
-//  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
+//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
                             string OpcodeStr, X86FoldableSchedWrite sched> {
   let hasSideEffects = 0 in
@@ -8549,7 +9061,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                          "$src2, $src1", "$src1, $src2",
                          (OpNode (_.VT _.RC:$src1),
                           _.ScalarIntMemCPat:$src2)>, EVEX_4V,
-                          Sched<[sched.Folded, ReadAfterLd]>;
+                          Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
 
@@ -8578,13 +9090,13 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
                          (OpNode (_.VT
                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                           (ins _.ScalarMemOp:$src), OpcodeStr,
                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
                           (OpNode (_.VT
                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
-                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -8638,7 +9150,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          "$src2, $src1", "$src1, $src2",
                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
                          (i32 FROUND_CURRENT))>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -8674,7 +9186,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          (OpNode (_.VT
                              (bitconvert (_.LdFrag addr:$src))),
                           (i32 FROUND_CURRENT))>,
-                          Sched<[sched.Folded, ReadAfterLd]>;
+                          Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.ScalarMemOp:$src), OpcodeStr,
@@ -8682,7 +9194,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          (OpNode (_.VT
                                   (X86VBroadcast (_.ScalarLdFrag addr:$src))),
                                  (i32 FROUND_CURRENT))>, EVEX_B,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8749,13 +9261,13 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
                          (fsqrt (_.VT
                            (bitconvert (_.LdFrag addr:$src))))>, EVEX,
-                           Sched<[sched.Folded, ReadAfterLd]>;
+                           Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                           (ins _.ScalarMemOp:$src), OpcodeStr,
                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
                           (fsqrt (_.VT
                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
-                          EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -8810,7 +9322,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
                          (X86fsqrtRnds (_.VT _.RC:$src1),
                                     _.ScalarIntMemCPat:$src2,
                                     (i32 FROUND_CURRENT))>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
                          "$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -8828,7 +9340,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>;
     }
   }
 
@@ -8881,7 +9393,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
                          "$src3, $src2, $src1", "$src1, $src2, $src3",
                          (_.VT (X86RndScales _.RC:$src1,
                                 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
-                         Sched<[sched.Folded, ReadAfterLd]>;
+                         Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -8893,7 +9405,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-                 []>, Sched<[sched.Folded, ReadAfterLd]>;
+                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
   }
 
@@ -9000,14 +9512,47 @@ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
 // Integer truncate and extend operations
 //-------------------------------------------------
 
+// PatFrags that contain a select and a truncate op. The take operands in the
+// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
+// either to the multiclasses.
+def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
+                           (vselect node:$mask,
+                                    (trunc node:$src), node:$src0)>;
+def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
+                            (vselect node:$mask,
+                                     (X86vtruncs node:$src), node:$src0)>;
+def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
+                             (vselect node:$mask,
+                                      (X86vtruncus node:$src), node:$src0)>;
+
 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              SDPatternOperator MaskNode,
                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
-  let ExeDomain = DestInfo.ExeDomain in
-  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
-                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
-                      (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
-                      EVEX, T8XS, Sched<[sched]>;
+  let ExeDomain = DestInfo.ExeDomain in {
+  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+             (ins SrcInfo.RC:$src),
+             OpcodeStr # "\t{$src, $dst|$dst, $src}",
+             [(set DestInfo.RC:$dst,
+                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
+             EVEX, Sched<[sched]>;
+  let Constraints = "$src0 = $dst" in
+  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+             [(set DestInfo.RC:$dst,
+                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
+                             (DestInfo.VT DestInfo.RC:$src0),
+                             SrcInfo.KRCWM:$mask))]>,
+             EVEX, EVEX_K, Sched<[sched]>;
+  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
+             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+             [(set DestInfo.RC:$dst,
+                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
+                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
+             EVEX, EVEX_KZ, Sched<[sched]>;
+  }
 
   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
@@ -9031,14 +9576,18 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
                                     addr:$dst, SrcInfo.RC:$src)>;
 
-  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
-                                               (SrcInfo.VT SrcInfo.RC:$src)),
+  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
+                        SrcInfo.KRCWM:$mask),
             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
 }
 
 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
-                        SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
+                        SDNode OpNode256, SDNode OpNode512,
+                        SDPatternOperator MaskNode128,
+                        SDPatternOperator MaskNode256,
+                        SDPatternOperator MaskNode512,
+                        X86FoldableSchedWrite sched,
                         AVX512VLVectorVTInfo VTSrcInfo,
                         X86VectorVTInfo DestInfoZ128,
                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@@ -9047,118 +9596,167 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
 
   let Predicates = [HasVLX, prd] in {
-    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
+    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
 
-    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
+    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
   }
   let Predicates = [prd] in
-    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
+    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
 }
 
 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDPatternOperator MaskNode,
                            X86FoldableSchedWrite sched, PatFrag StoreNode,
-                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
-  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
+                           PatFrag MaskedStoreNode, SDNode InVecNode,
+                           SDPatternOperator InVecMaskNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
+                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
 }
 
 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDPatternOperator MaskNode,
                            X86FoldableSchedWrite sched, PatFrag StoreNode,
-                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
-  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
+                           PatFrag MaskedStoreNode, SDNode InVecNode,
+                           SDPatternOperator InVecMaskNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
+                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
 }
 
 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDPatternOperator MaskNode,
                            X86FoldableSchedWrite sched, PatFrag StoreNode,
-                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
-  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
+                           PatFrag MaskedStoreNode, SDNode InVecNode,
+                           SDPatternOperator InVecMaskNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
+                          InVecMaskNode, MaskNode, MaskNode, sched,
                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
 }
 
 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDPatternOperator MaskNode,
                            X86FoldableSchedWrite sched, PatFrag StoreNode,
-                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
-  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
+                           PatFrag MaskedStoreNode, SDNode InVecNode,
+                           SDPatternOperator InVecMaskNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
+                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
 }
 
 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDPatternOperator MaskNode,
                            X86FoldableSchedWrite sched, PatFrag StoreNode,
-                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
-  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
+                           PatFrag MaskedStoreNode, SDNode InVecNode,
+                           SDPatternOperator InVecMaskNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
+                          InVecMaskNode, MaskNode, MaskNode, sched,
                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
 }
 
 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDPatternOperator MaskNode,
                            X86FoldableSchedWrite sched, PatFrag StoreNode,
-                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
+                           PatFrag MaskedStoreNode, SDNode InVecNode,
+                           SDPatternOperator InVecMaskNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
-                          sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
+                          InVecMaskNode, MaskNode, MaskNode, sched,
+                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
 }
 
-defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, WriteShuffle256,
-                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, WriteShuffle256,
-                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
-                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
-
-defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   trunc, WriteShuffle256,
-                                  truncstorevi16, masked_truncstorevi16, X86vtrunc>;
-defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, WriteShuffle256,
-                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
-defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
-                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
-
-defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   trunc, WriteShuffle256,
-                                  truncstorevi32, masked_truncstorevi32, X86vtrunc>;
-defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, WriteShuffle256,
-                                  truncstore_s_vi32, masked_truncstore_s_vi32>;
-defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
-                                  truncstore_us_vi32, masked_truncstore_us_vi32>;
-
-defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
-                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, WriteShuffle256,
-                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, WriteShuffle256,
-                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
-
-defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
-                                  truncstorevi16, masked_truncstorevi16, X86vtrunc>;
-defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, WriteShuffle256,
-                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
-defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, WriteShuffle256,
-                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
-
-defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
-                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
-defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, WriteShuffle256,
-                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
-defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, WriteShuffle256,
-                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
+defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
+                                  WriteShuffle256, truncstorevi8,
+                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
+                                  WriteShuffle256, truncstore_s_vi8,
+                                  masked_truncstore_s_vi8, X86vtruncs,
+                                  X86vmtruncs>;
+defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
+                                  select_truncus, WriteShuffle256,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8,
+                                  X86vtruncus, X86vmtruncus>;
+
+defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
+                                  WriteShuffle256, truncstorevi16,
+                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
+                                  WriteShuffle256, truncstore_s_vi16,
+                                  masked_truncstore_s_vi16, X86vtruncs,
+                                  X86vmtruncs>;
+defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
+                                  select_truncus, WriteShuffle256,
+                                  truncstore_us_vi16, masked_truncstore_us_vi16,
+                                  X86vtruncus, X86vmtruncus>;
+
+defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
+                                  WriteShuffle256, truncstorevi32,
+                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
+defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
+                                  WriteShuffle256, truncstore_s_vi32,
+                                  masked_truncstore_s_vi32, X86vtruncs,
+                                  X86vmtruncs>;
+defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
+                                  select_truncus, WriteShuffle256,
+                                  truncstore_us_vi32, masked_truncstore_us_vi32,
+                                  X86vtruncus, X86vmtruncus>;
+
+defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
+                                  WriteShuffle256, truncstorevi8,
+                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
+defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
+                                  WriteShuffle256, truncstore_s_vi8,
+                                  masked_truncstore_s_vi8, X86vtruncs,
+                                  X86vmtruncs>;
+defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
+                                  select_truncus, WriteShuffle256,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8,
+                                  X86vtruncus, X86vmtruncus>;
+
+defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
+                                  WriteShuffle256, truncstorevi16,
+                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
+defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
+                                  WriteShuffle256, truncstore_s_vi16,
+                                  masked_truncstore_s_vi16, X86vtruncs,
+                                  X86vmtruncs>;
+defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
+                                  select_truncus, WriteShuffle256,
+                                  truncstore_us_vi16, masked_truncstore_us_vi16,
+                                  X86vtruncus, X86vmtruncus>;
+
+defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
+                                  WriteShuffle256, truncstorevi8,
+                                  masked_truncstorevi8, X86vtrunc,
+                                  X86vmtrunc>;
+defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
+                                  WriteShuffle256, truncstore_s_vi8,
+                                  masked_truncstore_s_vi8, X86vtruncs,
+                                  X86vmtruncs>;
+defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
+                                  select_truncus, WriteShuffle256,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8,
+                                  X86vtruncus, X86vmtruncus>;
 
 let Predicates = [HasAVX512, NoVLX] in {
 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
@@ -9177,6 +9775,44 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
                                             VR256X:$src, sub_ymm))), sub_xmm))>;
 }
 
+// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
+multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
+                           X86VectorVTInfo DestInfo,
+                           X86VectorVTInfo SrcInfo> {
+  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
+                                 DestInfo.RC:$src0,
+                                 SrcInfo.KRCWM:$mask)),
+            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
+                                                 SrcInfo.KRCWM:$mask,
+                                                 SrcInfo.RC:$src)>;
+
+  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
+                                 DestInfo.ImmAllZerosV,
+                                 SrcInfo.KRCWM:$mask)),
+            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
+                                                  SrcInfo.RC:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
+defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
+defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
+}
+
+let Predicates = [HasAVX512] in {
+defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
+
+defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
+defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
+
+defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
+defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
+defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
+}
+
 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
@@ -9221,7 +9857,7 @@ multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
 
     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
-                   v16i8x_info, i64mem, LdFrag, OpNode>,
+                   v16i8x_info, i64mem, LdFrag, InVecNode>,
                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
   }
   let Predicates = [HasAVX512] in {
@@ -9240,12 +9876,12 @@ multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
 
     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
-                   v16i8x_info, i32mem, LdFrag, OpNode>,
+                   v16i8x_info, i32mem, LdFrag, InVecNode>,
                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
   }
   let Predicates = [HasAVX512] in {
     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
-                   v16i8x_info, i64mem, LdFrag, OpNode>,
+                   v16i8x_info, i64mem, LdFrag, InVecNode>,
                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
   }
 }
@@ -9278,7 +9914,7 @@ multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
 
     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
-                   v8i16x_info, i64mem, LdFrag, OpNode>,
+                   v8i16x_info, i64mem, LdFrag, InVecNode>,
                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
   }
   let Predicates = [HasAVX512] in {
@@ -9308,23 +9944,107 @@ multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
   }
 }
 
-defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
+
+defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
+
 
-defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
+// Patterns that we also need any extend versions of. aext_vector_inreg
+// is currently legalized to zext_vector_inreg.
+multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
+  // 256-bit patterns
+  let Predicates = [HasVLX, HasBWI] in {
+    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+    def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+    def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+  }
+
+  let Predicates = [HasVLX] in {
+    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+    def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+    def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+
+    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+    def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+    def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+  }
+
+  // 512-bit patterns
+  let Predicates = [HasBWI] in {
+    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
+              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
+  }
+  let Predicates = [HasAVX512] in {
+    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
+              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
+    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
+              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
+
+    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
+              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+
+    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
+              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
+  }
+}
+
+multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> :
+    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
+  let Predicates = [HasVLX, HasBWI] in {
+    def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))),
+              (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>;
+  }
+
+  let Predicates = [HasVLX] in {
+    def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))),
+              (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>;
+
+    def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))),
+              (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>;
+  }
+
+  // 512-bit patterns
+  let Predicates = [HasBWI] in {
+    def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))),
+              (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>;
+  }
+  let Predicates = [HasAVX512] in {
+    def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))),
+              (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>;
+    def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))),
+              (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>;
+
+    def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))),
+              (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>;
+
+    def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))),
+              (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>;
+  }
+}
 
 
 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
-                                 SDNode InVecOp> {
+                                 SDNode InVecOp> :
+    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
   // 128-bit patterns
   let Predicates = [HasVLX, HasBWI] in {
   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9335,7 +10055,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
-  def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   }
   let Predicates = [HasVLX] in {
@@ -9345,7 +10065,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
   def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -9354,7 +10074,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
   def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
 
   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9365,7 +10085,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
@@ -9374,7 +10094,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
   def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
+  def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -9385,87 +10105,73 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   }
-  // 256-bit patterns
-  let Predicates = [HasVLX, HasBWI] in {
-  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
-  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
-  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
-  }
   let Predicates = [HasVLX] in {
-  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
 
-  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
 
-  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-
-  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-
-  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
   }
   // 512-bit patterns
-  let Predicates = [HasBWI] in {
-  def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
-  }
   let Predicates = [HasAVX512] in {
-  def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
-
-  def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
-  def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+  }
+}
 
-  def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
-
-  def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
+defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
 
-  def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
-  }
+// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
+// ext+trunc aggresively making it impossible to legalize the DAG to this
+// pattern directly.
+let Predicates = [HasAVX512, NoBWI] in {
+def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
+         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
+def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
+         (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
 }
 
-defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
-defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
+// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
+// ext+trunc aggresively making it impossible to legalize the DAG to this
+// pattern directly.
+let Predicates = [HasAVX512, NoBWI] in {
+def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
+         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
+def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
+         (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
+}
 
 //===----------------------------------------------------------------------===//
 // GATHER - SCATTER Operations
@@ -9651,6 +10357,10 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
+
+// Also need a pattern for anyextend.
+def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
+          (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
 }
 
 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
@@ -9724,6 +10434,19 @@ let Predicates = [HasDQI, NoBWI] in {
             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+
+  def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
+            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+  def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
+            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
+}
+
+let Predicates = [HasDQI, NoBWI, HasVLX] in {
+  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
+            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
+
+  def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
+            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -9753,8 +10476,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
 }
 
 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
-  def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
-                                               (_.VT _.RC:$src)),
+  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
             (!cast<Instruction>(Name#_.ZSuffix##mrk)
                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
 }
@@ -9798,7 +10520,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
               (_.VT (X86expand (_.VT (bitconvert
                                       (_.LdFrag addr:$src1)))))>,
             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
-            Sched<[sched.Folded, ReadAfterLd]>;
+            Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
@@ -9860,14 +10582,14 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
                             (i32 imm:$src2))>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
                     "${src1}"##_.BroadcastStr##", $src2",
                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
                             (i32 imm:$src2))>, EVEX_B,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -9923,7 +10645,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (OpNode (_.VT _.RC:$src1),
                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
                             (i32 imm:$src3))>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -9931,7 +10653,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (OpNode (_.VT _.RC:$src1),
                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
                             (i32 imm:$src3))>, EVEX_B,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -9955,7 +10677,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
                              (SrcInfo.VT (bitconvert
                                                 (SrcInfo.LdFrag addr:$src2))),
                              (i8 imm:$src3)))>,
-                Sched<[sched.Folded, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -9974,7 +10696,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (OpNode (_.VT _.RC:$src1),
                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
                             (i8 imm:$src3))>, EVEX_B,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -9996,7 +10718,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             (_.VT (scalar_to_vector
                                       (_.ScalarLdFrag addr:$src2))),
                             (i32 imm:$src3))>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -10360,9 +11082,9 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
                 (_.VT
                  (bitconvert
                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
-                                           (bitconvert (_.LdFrag addr:$src2)),
+                                           (CastInfo.LdFrag addr:$src2),
                                            (i8 imm:$src3)))))>,
-                Sched<[sched.Folded, ReadAfterLd]>,
+                Sched<[sched.Folded, sched.ReadAfterFold]>,
                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
@@ -10374,7 +11096,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
                        (X86Shuf128 _.RC:$src1,
                                    (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
                                    (i8 imm:$src3)))))>, EVEX_B,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -10450,7 +11172,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
                 (_.VT (X86VAlign _.RC:$src1,
                                  (bitconvert (_.LdFrag addr:$src2)),
                                  (i8 imm:$src3)))>,
-                Sched<[sched.Folded, ReadAfterLd]>,
+                Sched<[sched.Folded, sched.ReadAfterFold]>,
                 EVEX2VEXOverride<"VPALIGNRrmi">;
 
   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -10460,7 +11182,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
                    (X86VAlign _.RC:$src1,
                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
                               (i8 imm:$src3))>, EVEX_B,
-                   Sched<[sched.Folded, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -10526,7 +11248,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
   def : Pat<(To.VT (vselect To.KRCWM:$mask,
                             (bitconvert
                              (From.VT (OpNode From.RC:$src1,
-                                      (bitconvert (To.LdFrag addr:$src2)),
+                                              (From.LdFrag addr:$src2),
                                       imm:$src3))),
                             To.RC:$src0)),
             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
@@ -10536,7 +11258,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
   def : Pat<(To.VT (vselect To.KRCWM:$mask,
                             (bitconvert
                              (From.VT (OpNode From.RC:$src1,
-                                      (bitconvert (To.LdFrag addr:$src2)),
+                                              (From.LdFrag addr:$src2),
                                       imm:$src3))),
                             To.ImmAllZerosV)),
             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
@@ -10824,6 +11546,8 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
           (VMOVDDUPZ128rm addr:$src)>;
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+          (VMOVDDUPZ128rm addr:$src)>;
 
 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
                    (v2f64 VR128X:$src0)),
@@ -10954,7 +11678,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
       [(set _.RC:$dst,
           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
-      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
 }
 
 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -11027,7 +11751,7 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
            [(set _.RC:$dst,(_.VT (OpNode
                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
                                  (i8 imm:$src2))))]>,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
@@ -11067,7 +11791,7 @@ multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
                               (OpNode (_src.VT _src.RC:$src1),
                               (_src.VT (bitconvert
                                         (_src.LdFrag addr:$src2))))))]>,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
@@ -11169,7 +11893,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
                             (i8 imm:$src4)), 1, 0>,
                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -11179,7 +11903,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
                             (i8 imm:$src4)), 1, 0>, EVEX_B,
                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }// Constraints = "$src1 = $dst"
 
   // Additional patterns for matching passthru operand in other positions.
@@ -11343,19 +12067,68 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
 // TODO: We should maybe have a more generalized algorithm for folding to
 // vpternlog.
 let Predicates = [HasAVX512] in {
-  def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
+  def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
+            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+  def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
+            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+  def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
+            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
+  def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
 }
 
 let Predicates = [HasAVX512, NoVLX] in {
-  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+  def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (i8 15)), sub_xmm)>;
+  def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (i8 15)), sub_xmm)>;
+  def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+              (i8 15)), sub_xmm)>;
+  def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (i8 15)), sub_xmm)>;
-  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+
+  def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (i8 15)), sub_ymm)>;
+  def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (i8 15)), sub_ymm)>;
+  def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+            (EXTRACT_SUBREG
+             (VPTERNLOGQZrri
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+              (i8 15)), sub_ymm)>;
+  def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
@@ -11365,9 +12138,22 @@ let Predicates = [HasAVX512, NoVLX] in {
 }
 
 let Predicates = [HasVLX] in {
-  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
+  def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+  def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+  def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
+  def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
-  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
+
+  def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+  def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+  def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
+  def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
 }
 
@@ -11395,7 +12181,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
                               (i32 imm:$src4),
                               (i32 FROUND_CURRENT))>,
-                      Sched<[sched.Folded, ReadAfterLd]>;
+                      Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -11405,7 +12191,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
                               (i32 imm:$src4),
                               (i32 FROUND_CURRENT))>,
-                    EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   } // Constraints = "$src1 = $dst"
 }
 
@@ -11448,7 +12234,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               (_src3VT.VT _src3VT.RC:$src3),
                               (i32 imm:$src4),
                               (i32 FROUND_NO_EXC))>,
-                      EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -11458,7 +12244,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                        (_src3VT.ScalarLdFrag addr:$src3))),
                              (i32 imm:$src4),
                              (i32 FROUND_CURRENT))>,
-                     Sched<[sched.Folded, ReadAfterLd]>;
+                     Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -11680,9 +12466,9 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
                 "$src3, $src2", "$src2, $src3",
                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
-                        (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
+                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
                 AVX512FMA3Base,
-                Sched<[sched.Folded, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
@@ -11698,7 +12484,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
               (OpNode VTI.RC:$src1, VTI.RC:$src2,
                (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
               AVX512FMA3Base, EVEX_B,
-              Sched<[sched.Folded, ReadAfterLd]>;
+              Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -11783,10 +12569,9 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
                                    "$src3, $src2", "$src2, $src3",
                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
-                                            (VTI.VT (bitconvert
-                                                     (VTI.LdFrag addr:$src3)))))>,
+                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
-                                   Sched<[sched.Folded, ReadAfterLd]>;
+                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
@@ -11795,7 +12580,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
                                     (VTI.VT (X86VBroadcast
                                              (VTI.ScalarLdFrag addr:$src3))))>,
                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
-                                   T8PD, Sched<[sched.Folded, ReadAfterLd]>;
+                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
@@ -11840,9 +12625,9 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
                                 "vpshufbitqmb",
                                 "$src2, $src1", "$src1, $src2",
                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
-                                (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
+                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
-                                Sched<[sched.Folded, ReadAfterLd]>;
+                                Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
@@ -11890,7 +12675,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
                 (OpNode (VTI.VT VTI.RC:$src1),
                  (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
                  (i8 imm:$src3))>, EVEX_B,
-                 Sched<[sched.Folded, ReadAfterLd]>;
+                 Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index c444fa761960..cb5a4e5b5d41 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -46,11 +46,11 @@ def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
 // (and possibly third) value from a register.
 // This is used for instructions that put the memory operands before other
 // uses.
-class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
   // Memory operand.
   ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
   // Register reads (implicit or explicit).
-  ReadAfterLd, ReadAfterLd]>;
+  Sched.ReadAfterFold, Sched.ReadAfterFold]>;
 
 // Extra precision multiplication
 
@@ -63,18 +63,18 @@ def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, GR8:$src)),
-                (implicit EFLAGS)]>, Sched<[WriteIMul]>;
+                (implicit EFLAGS)]>, Sched<[WriteIMul8]>;
 // AX,DX = AX*GR16
 let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
 def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
                "mul{w}\t$src",
-               []>, OpSize16, Sched<[WriteIMul]>;
+               []>, OpSize16, Sched<[WriteIMul16]>;
 // EAX,EDX = EAX*GR32
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
 def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
                "mul{l}\t$src",
                [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>,
-               OpSize32, Sched<[WriteIMul]>;
+               OpSize32, Sched<[WriteIMul32]>;
 // RAX,RDX = RAX*GR64
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
 def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
@@ -89,20 +89,20 @@ def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, (loadi8 addr:$src))),
-                (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>;
+                (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8>;
 // AX,DX = AX*[mem16]
 let mayLoad = 1, hasSideEffects = 0 in {
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
-               "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+               "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
 // EAX,EDX = EAX*[mem32]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
-              "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+              "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
 // RAX,RDX = RAX*[mem64]
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+                "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
                 Requires<[In64BitMode]>;
 }
 
@@ -110,15 +110,15 @@ let hasSideEffects = 0 in {
 // AL,AH = AL*GR8
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>,
-                Sched<[WriteIMul]>;
+                Sched<[WriteIMul8]>;
 // AX,DX = AX*GR16
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
-                OpSize16, Sched<[WriteIMul]>;
+                OpSize16, Sched<[WriteIMul16]>;
 // EAX,EDX = EAX*GR32
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>,
-                OpSize32, Sched<[WriteIMul]>;
+                OpSize32, Sched<[WriteIMul32]>;
 // RAX,RDX = RAX*GR64
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>,
@@ -128,19 +128,19 @@ let mayLoad = 1 in {
 // AL,AH = AL*[mem8]
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>;
+                "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8>;
 // AX,DX = AX*[mem16]
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+                "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
 // EAX,EDX = EAX*[mem32]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+                "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
 // RAX,RDX = RAX*[mem64]
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>,
+                 "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
                  Requires<[In64BitMode]>;
 }
 } // hasSideEffects
@@ -156,18 +156,18 @@ def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, EFLAGS,
                        (X86smul_flag GR16:$src1, GR16:$src2))]>,
-                 Sched<[WriteIMul]>, TB, OpSize16;
+                 Sched<[WriteIMul16Reg]>, TB, OpSize16;
 def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, EFLAGS,
                        (X86smul_flag GR32:$src1, GR32:$src2))]>,
-                 Sched<[WriteIMul]>, TB, OpSize32;
+                 Sched<[WriteIMul32Reg]>, TB, OpSize32;
 def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
                                    (ins GR64:$src1, GR64:$src2),
                   "imul{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
                         (X86smul_flag GR64:$src1, GR64:$src2))]>,
-                  Sched<[WriteIMul64]>, TB;
+                  Sched<[WriteIMul64Reg]>, TB;
 } // isCommutable
 
 // Register-Memory Signed Integer Multiply
@@ -176,19 +176,19 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, EFLAGS,
                        (X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
-                 Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16;
+                 Sched<[WriteIMul16Reg.Folded, WriteIMul16Reg.ReadAfterFold]>, TB, OpSize16;
 def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
                  (ins GR32:$src1, i32mem:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, EFLAGS,
                        (X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
-                 Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32;
+                 Sched<[WriteIMul32Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB, OpSize32;
 def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
                                    (ins GR64:$src1, i64mem:$src2),
                   "imul{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
                         (X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
-                  Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB;
+                  Sched<[WriteIMul64Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB;
 } // Constraints = "$src1 = $dst"
 
 } // Defs = [EFLAGS]
@@ -201,37 +201,37 @@ def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR16:$dst, EFLAGS,
                             (X86smul_flag GR16:$src1, imm:$src2))]>,
-                      Sched<[WriteIMul]>, OpSize16;
+                      Sched<[WriteIMul16Imm]>, OpSize16;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                      (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR16:$dst, EFLAGS,
                            (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
-                     Sched<[WriteIMul]>, OpSize16;
+                     Sched<[WriteIMul16Imm]>, OpSize16;
 def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR32:$dst, EFLAGS,
                             (X86smul_flag GR32:$src1, imm:$src2))]>,
-                      Sched<[WriteIMul]>, OpSize32;
+                      Sched<[WriteIMul32Imm]>, OpSize32;
 def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
                      (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR32:$dst, EFLAGS,
                            (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
-                     Sched<[WriteIMul]>, OpSize32;
+                     Sched<[WriteIMul32Imm]>, OpSize32;
 def IMUL64rri32 : RIi32S<0x69, MRMSrcReg,                    // GR64 = GR64*I32
                          (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
                          "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set GR64:$dst, EFLAGS,
                              (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
-                         Sched<[WriteIMul64]>;
+                         Sched<[WriteIMul64Imm]>;
 def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR64:$dst, EFLAGS,
                             (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
-                      Sched<[WriteIMul64]>;
+                      Sched<[WriteIMul64Imm]>;
 
 // Memory-Integer Signed Integer Multiply
 def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
@@ -239,41 +239,41 @@ def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR16:$dst, EFLAGS,
                             (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
-                      Sched<[WriteIMul.Folded]>, OpSize16;
+                      Sched<[WriteIMul16Imm.Folded]>, OpSize16;
 def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                      (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR16:$dst, EFLAGS,
                            (X86smul_flag (loadi16 addr:$src1),
                                          i16immSExt8:$src2))]>,
-                     Sched<[WriteIMul.Folded]>, OpSize16;
+                     Sched<[WriteIMul16Imm.Folded]>, OpSize16;
 def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
                       (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR32:$dst, EFLAGS,
                             (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
-                      Sched<[WriteIMul.Folded]>, OpSize32;
+                      Sched<[WriteIMul32Imm.Folded]>, OpSize32;
 def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                      (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR32:$dst, EFLAGS,
                            (X86smul_flag (loadi32 addr:$src1),
                                          i32immSExt8:$src2))]>,
-                     Sched<[WriteIMul.Folded]>, OpSize32;
+                     Sched<[WriteIMul32Imm.Folded]>, OpSize32;
 def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
                          (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
                          "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set GR64:$dst, EFLAGS,
                               (X86smul_flag (loadi64 addr:$src1),
                                             i64immSExt32:$src2))]>,
-                         Sched<[WriteIMul64.Folded]>;
+                         Sched<[WriteIMul64Imm.Folded]>;
 def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                       (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR64:$dst, EFLAGS,
                             (X86smul_flag (loadi64 addr:$src1),
                                           i64immSExt8:$src2))]>,
-                      Sched<[WriteIMul64.Folded]>;
+                      Sched<[WriteIMul64Imm.Folded]>;
 } // Defs = [EFLAGS]
 
 // unsigned division/remainder
@@ -295,17 +295,17 @@ def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
 let mayLoad = 1 in {
 let Defs = [AL,AH,EFLAGS], Uses = [AX] in
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "div{b}\t$src", []>, SchedLoadReg<WriteDiv8.Folded>;
+               "div{b}\t$src", []>, SchedLoadReg<WriteDiv8>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16.Folded>;
+               "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16>;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
-               "div{l}\t$src", []>, SchedLoadReg<WriteDiv32.Folded>, OpSize32;
+               "div{l}\t$src", []>, SchedLoadReg<WriteDiv32>, OpSize32;
 // RDX:RAX/[mem64] = RAX,RDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
-                "div{q}\t$src", []>, SchedLoadReg<WriteDiv64.Folded>,
+                "div{q}\t$src", []>, SchedLoadReg<WriteDiv64>,
                 Requires<[In64BitMode]>;
 }
 
@@ -327,16 +327,16 @@ def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
 let mayLoad = 1 in {
 let Defs = [AL,AH,EFLAGS], Uses = [AX] in
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8.Folded>;
+               "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16.Folded>;
+               "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16>;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
-               "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32.Folded>;
+               "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32>;
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
 def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
-                "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64.Folded>,
+                "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64>,
                 Requires<[In64BitMode]>;
 }
 } // hasSideEffects = 0
@@ -422,22 +422,35 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
 } // SchedRW
 } // CodeSize
 
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                               (X86add_flag node:$lhs, node:$rhs), [{
+  return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                               (X86sub_flag node:$lhs, node:$rhs), [{
+  // Only use DEC if the result is used.
+  return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
 let Defs = [EFLAGS] in {
 let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let CodeSize = 2 in
 def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                "inc{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+               [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
 let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
                "inc{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16;
+               [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
+               OpSize16;
 def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
                "inc{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32;
+               [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
+               OpSize32;
 def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+                [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 2
 
 // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -474,16 +487,18 @@ let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let CodeSize = 2 in
 def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                "dec{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+               [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
 let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                "dec{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16;
+               [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
+               OpSize16;
 def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                "dec{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32;
+               [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
+               OpSize32;
 def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+                [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 2
 
 // Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -691,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : ITy<opcode, MRMSrcMem, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
         mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
-    Sched<[sched.Folded, ReadAfterLd]>;
+    Sched<[sched.Folded, sched.ReadAfterFold]>;
 
 // BinOpRM_F - Instructions like "cmp reg, [mem]".
 class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -806,8 +821,8 @@ class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpMR<opcode, mnemonic, typeinfo,
             [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
                                    typeinfo.RegClass:$src))]>,
-            Sched<[WriteALULd, ReadDefault, ReadDefault, ReadDefault,
-                   ReadDefault, ReadDefault, ReadAfterLd]>;
+            Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+                   ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
 
 // BinOpMI - Instructions like "add [mem], imm".
 class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -839,7 +854,7 @@ class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpMI<opcode, mnemonic, typeinfo, f,
             [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
                                   typeinfo.ImmOperator:$src))]>,
-            Sched<[WriteALULd]>;
+            Sched<[WriteALU.Folded]>;
 
 // BinOpMI8 - Instructions like "add [mem], imm8".
 class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
@@ -872,7 +887,7 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
   : BinOpMI8<mnemonic, typeinfo, f,
              [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
                                     typeinfo.Imm8Operator:$src))]>,
-             Sched<[WriteALULd]>;
+             Sched<[WriteALU.Folded]>;
 
 // BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
 class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -913,8 +928,8 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
   let Defs = [EFLAGS] in {
     let Constraints = "$src1 = $dst" in {
       let isCommutable = CommutableRR in {
-        def NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
         let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+          def NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
           def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
           def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
           def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
@@ -931,9 +946,9 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
       def NAME#32rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
       def NAME#64rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
 
-      def NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
-
       let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+
         // NOTE: These are order specific, we want the ri8 forms to be listed
         // first so that they are slightly preferred to the ri forms.
         def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
@@ -1176,6 +1191,30 @@ let isCompare = 1 in {
 defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
 }
 
+// Patterns to recognize loads on the LHS of an ADC. We can't make X86adc_flag
+// commutable since it has EFLAGs as an input.
+def : Pat<(X86adc_flag (loadi8 addr:$src2), GR8:$src1, EFLAGS),
+          (ADC8rm GR8:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi16 addr:$src2), GR16:$src1, EFLAGS),
+          (ADC16rm GR16:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi32 addr:$src2), GR32:$src1, EFLAGS),
+          (ADC32rm GR32:$src1, addr:$src2)>;
+def : Pat<(X86adc_flag (loadi64 addr:$src2), GR64:$src1, EFLAGS),
+          (ADC64rm GR64:$src1, addr:$src2)>;
+
+// Patterns to recognize RMW ADC with loads in operand 1.
+def : Pat<(store (X86adc_flag GR8:$src, (loadi8 addr:$dst), EFLAGS),
+                 addr:$dst),
+          (ADC8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (X86adc_flag GR16:$src, (loadi16 addr:$dst), EFLAGS),
+                 addr:$dst),
+          (ADC16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (X86adc_flag GR32:$src, (loadi32 addr:$dst), EFLAGS),
+                 addr:$dst),
+          (ADC32mr addr:$dst, GR32:$src)>;
+def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
+                 addr:$dst),
+          (ADC64mr addr:$dst, GR64:$src)>;
 
 //===----------------------------------------------------------------------===//
 // Semantically, test instructions are similar like AND, except they don't
@@ -1188,16 +1227,21 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
 let isCompare = 1 in {
   let Defs = [EFLAGS] in {
     let isCommutable = 1 in {
-      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
-      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
-      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
-      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
+      // Avoid selecting these and instead use a test+and. Post processing will
+      // combine them. This gives bunch of other patterns that start with
+      // and a chance to match.
+      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , null_frag>;
+      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, null_frag>;
+      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, null_frag>;
+      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, null_frag>;
     } // isCommutable
 
-    def TEST8mr    : BinOpMR_F<0x84, "test", Xi8 , X86testpat>;
-    def TEST16mr   : BinOpMR_F<0x84, "test", Xi16, X86testpat>;
-    def TEST32mr   : BinOpMR_F<0x84, "test", Xi32, X86testpat>;
-    def TEST64mr   : BinOpMR_F<0x84, "test", Xi64, X86testpat>;
+    let hasSideEffects = 0, mayLoad = 1 in {
+    def TEST8mr    : BinOpMR_F<0x84, "test", Xi8 , null_frag>;
+    def TEST16mr   : BinOpMR_F<0x84, "test", Xi16, null_frag>;
+    def TEST32mr   : BinOpMR_F<0x84, "test", Xi32, null_frag>;
+    def TEST64mr   : BinOpMR_F<0x84, "test", Xi64, null_frag>;
+    }
 
     def TEST8ri    : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
     def TEST16ri   : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
@@ -1235,7 +1279,7 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [(set RC:$dst, EFLAGS,
              (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>,
-           Sched<[WriteALULd, ReadAfterLd]>;
+           Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
 }
 
 // Complexity is reduced to give and with immediate a chance to match first.
@@ -1275,7 +1319,7 @@ let hasSideEffects = 0 in {
 
 let Predicates = [HasBMI2] in {
   let Uses = [EDX] in
-    defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>;
+    defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
   let Uses = [RDX] in
     defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
 }
@@ -1283,22 +1327,18 @@ let Predicates = [HasBMI2] in {
 //===----------------------------------------------------------------------===//
 // ADCX and ADOX Instructions
 //
+// We don't have patterns for these as there is no advantage over ADC for
+// most code.
 let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
-    Constraints = "$src1 = $dst", AddedComplexity = 10 in {
-  let SchedRW = [WriteADC] in {
+    Constraints = "$src1 = $dst", hasSideEffects = 0 in {
+  let SchedRW = [WriteADC], isCommutable = 1 in {
   def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
                    (ins GR32:$src1, GR32:$src2),
-                   "adcx{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS,
-                     (X86adc_flag GR32:$src1, GR32:$src2, EFLAGS))]>, T8PD;
+                   "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
   def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
                     (ins GR64:$src1, GR64:$src2),
-                    "adcx{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                      (X86adc_flag GR64:$src1, GR64:$src2, EFLAGS))]>, T8PD;
+                    "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
 
-  // We don't have patterns for ADOX yet.
-  let hasSideEffects = 0 in {
   def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
                    (ins GR32:$src1, GR32:$src2),
                    "adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
@@ -1306,26 +1346,17 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
   def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
                     (ins GR64:$src1, GR64:$src2),
                     "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
-  } // hasSideEffects = 0
   } // SchedRW
 
-  let mayLoad = 1, SchedRW = [WriteADCLd, ReadAfterLd] in {
+  let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
   def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
                    (ins GR32:$src1, i32mem:$src2),
-                   "adcx{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, EFLAGS,
-                     (X86adc_flag GR32:$src1, (loadi32 addr:$src2), EFLAGS))]>,
-                   T8PD;
+                   "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
 
   def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
                     (ins GR64:$src1, i64mem:$src2),
-                    "adcx{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, EFLAGS,
-                      (X86adc_flag GR64:$src1, (loadi64 addr:$src2), EFLAGS))]>,
-                    T8PD;
+                    "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
 
-  // We don't have patterns for ADOX yet.
-  let hasSideEffects = 0 in {
   def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
                    (ins GR32:$src1, i32mem:$src2),
                    "adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
@@ -1333,6 +1364,5 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
   def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
                     (ins GR64:$src1, i64mem:$src2),
                     "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
-  } // hasSideEffects = 0
-  } // mayLoad = 1, SchedRW = [WriteADCLd]
+  } // mayLoad, SchedRW
 }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index eda4ba5ae6f0..f5494fc0b13f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -38,7 +38,7 @@ multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
   }
 
   let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
-      SchedRW = [Sched.Folded, ReadAfterLd] in {
+      SchedRW = [Sched.Folded, Sched.ReadAfterFold] in {
     def NAME#16rm
       : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 373f85020372..394dca8e7817 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -37,11 +37,6 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP],
   def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
                       "", []>;
 
-// 64-bit large code model PIC base construction.
-let hasSideEffects = 0, mayLoad = 1, isNotDuplicable = 1, SchedRW = [WriteJump] in
-  def MOVGOT64r : PseudoI<(outs GR64:$reg),
-                          (ins GR64:$scratch, i64i32imm_pcrel:$got), []>;
-
 // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
 // a stack adjustment and the codegen must know that they may modify the stack
 // pointer before prolog-epilog rewriting occurs.
@@ -148,7 +143,7 @@ def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
 // These instructions XOR the frame pointer into a GPR. They are used in some
 // stack protection schemes. These are post-RA pseudos because we only know the
 // frame register after register allocation.
-let Constraints = "$src = $dst", isPseudo = 1, Defs = [EFLAGS] in {
+let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in {
   def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
                   "xorl\t$$FP, $src", []>,
                   Requires<[NotLP64]>, Sched<[WriteALU]>;
@@ -178,7 +173,7 @@ def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
 }
 
 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
-    isCodeGenOnly = 1, isReturn = 1 in {
+    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in {
   def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
 
   // CATCHRET needs a custom inserter for SEH.
@@ -240,6 +235,8 @@ let isPseudo = 1, SchedRW = [WriteSystem] in {
                             "#SEH_SaveXMM $reg, $dst", []>;
   def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
                             "#SEH_StackAlloc $size", []>;
+  def SEH_StackAlign : I<0, Pseudo, (outs), (ins i32imm:$align),
+                            "#SEH_StackAlign $align", []>;
   def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
                             "#SEH_SetFrame $reg, $offset", []>;
   def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
@@ -273,7 +270,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>;
 // Alias instruction mapping movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isPseudo = 1, AddedComplexity = 10 in
+    isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in
 def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
 
@@ -319,16 +316,14 @@ def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
 // that would make it more difficult to rematerialize.
 let isReMaterializable = 1, isAsCheapAsAMove = 1,
     isPseudo = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
-def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;
+def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "", []>;
 
 // This 64-bit pseudo-move can be used for both a 64-bit constant that is
 // actually the zero-extension of a 32-bit constant and for labels in the
 // x86-64 small code model.
 def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
 
-let AddedComplexity = 1 in
-def : Pat<(i64 mov64imm32:$src),
-          (SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
+def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
 
 // Use sbb to materialize carry bit.
 let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
@@ -367,95 +362,109 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
 def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
           (SETBr)>;
 
-// (add OP, SETB) -> (adc OP, 0)
-def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
-          (ADC8ri GR8:$op, 0)>;
-def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
-          (ADC32ri8 GR32:$op, 0)>;
-def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
-          (ADC64ri8 GR64:$op, 0)>;
-
-// (sub OP, SETB) -> (sbb OP, 0)
-def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
-          (SBB8ri GR8:$op, 0)>;
-def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
-          (SBB32ri8 GR32:$op, 0)>;
-def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
-          (SBB64ri8 GR64:$op, 0)>;
-
-// (sub OP, SETCC_CARRY) -> (adc OP, 0)
-def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
-          (ADC8ri GR8:$op, 0)>;
-def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
-          (ADC32ri8 GR32:$op, 0)>;
-def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
-          (ADC64ri8 GR64:$op, 0)>;
+// Patterns to give priority when both inputs are zero so that we don't use
+// an immediate for the RHS.
+// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
+def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
+          (SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
+                  (EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
+def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
+          (SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
+                   (EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
+def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
+          (SBB32rr (MOV32r0), (MOV32r0))>;
+def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
+          (SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
+                   (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
 
 //===----------------------------------------------------------------------===//
 // String Pseudo Instructions
 //
 let SchedRW = [WriteMicrocoded] in {
 let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
-                    [(X86rep_movs i8)]>, REP,
-                   Requires<[Not64BitMode]>;
-def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
-                    [(X86rep_movs i16)]>, REP, OpSize16,
-                   Requires<[Not64BitMode]>;
-def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
-                    [(X86rep_movs i32)]>, REP, OpSize32,
-                   Requires<[Not64BitMode]>;
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins),
+                    "{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}",
+                    [(X86rep_movs i8)]>, REP, AdSize32,
+                   Requires<[NotLP64]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins),
+                    "{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}",
+                    [(X86rep_movs i16)]>, REP, AdSize32, OpSize16,
+                   Requires<[NotLP64]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins),
+                    "{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}",
+                    [(X86rep_movs i32)]>, REP, AdSize32, OpSize32,
+                   Requires<[NotLP64]>;
+def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins),
+                    "{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}",
+                    [(X86rep_movs i64)]>, REP, AdSize32,
+                   Requires<[NotLP64, In64BitMode]>;
 }
 
 let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
-def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
-                    [(X86rep_movs i8)]>, REP,
-                   Requires<[In64BitMode]>;
-def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
-                    [(X86rep_movs i16)]>, REP, OpSize16,
-                   Requires<[In64BitMode]>;
-def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
-                    [(X86rep_movs i32)]>, REP, OpSize32,
-                   Requires<[In64BitMode]>;
-def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
-                    [(X86rep_movs i64)]>, REP,
-                   Requires<[In64BitMode]>;
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins),
+                    "{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}",
+                    [(X86rep_movs i8)]>, REP, AdSize64,
+                   Requires<[IsLP64]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins),
+                    "{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}",
+                    [(X86rep_movs i16)]>, REP, AdSize64, OpSize16,
+                   Requires<[IsLP64]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins),
+                    "{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}",
+                    [(X86rep_movs i32)]>, REP, AdSize64, OpSize32,
+                   Requires<[IsLP64]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins),
+                    "{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}",
+                    [(X86rep_movs i64)]>, REP, AdSize64,
+                   Requires<[IsLP64]>;
 }
 
 // FIXME: Should use "(X86rep_stos AL)" as the pattern.
 let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
   let Uses = [AL,ECX,EDI] in
-  def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
-                      [(X86rep_stos i8)]>, REP,
-                     Requires<[Not64BitMode]>;
+  def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins),
+                       "{rep;stosb %al, %es:(%edi)|rep stosb es:[edi], al}",
+                      [(X86rep_stos i8)]>, REP, AdSize32,
+                     Requires<[NotLP64]>;
   let Uses = [AX,ECX,EDI] in
-  def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
-                      [(X86rep_stos i16)]>, REP, OpSize16,
-                     Requires<[Not64BitMode]>;
+  def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins),
+                      "{rep;stosw %ax, %es:(%edi)|rep stosw es:[edi], ax}",
+                      [(X86rep_stos i16)]>, REP, AdSize32, OpSize16,
+                     Requires<[NotLP64]>;
   let Uses = [EAX,ECX,EDI] in
-  def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
-                      [(X86rep_stos i32)]>, REP, OpSize32,
-                     Requires<[Not64BitMode]>;
+  def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins),
+                      "{rep;stosl %eax, %es:(%edi)|rep stosd es:[edi], eax}",
+                      [(X86rep_stos i32)]>, REP, AdSize32, OpSize32,
+                     Requires<[NotLP64]>;
+  let Uses = [RAX,RCX,RDI] in
+  def REP_STOSQ_32 : RI<0xAB, RawFrm, (outs), (ins),
+                        "{rep;stosq %rax, %es:(%edi)|rep stosq es:[edi], rax}",
+                        [(X86rep_stos i64)]>, REP, AdSize32,
+                        Requires<[NotLP64, In64BitMode]>;
 }
 
 let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
   let Uses = [AL,RCX,RDI] in
-  def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
-                       [(X86rep_stos i8)]>, REP,
-                       Requires<[In64BitMode]>;
+  def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins),
+                       "{rep;stosb %al, %es:(%rdi)|rep stosb es:[rdi], al}",
+                       [(X86rep_stos i8)]>, REP, AdSize64,
+                       Requires<[IsLP64]>;
   let Uses = [AX,RCX,RDI] in
-  def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
-                       [(X86rep_stos i16)]>, REP, OpSize16,
-                       Requires<[In64BitMode]>;
+  def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins),
+                       "{rep;stosw %ax, %es:(%rdi)|rep stosw es:[rdi], ax}",
+                       [(X86rep_stos i16)]>, REP, AdSize64, OpSize16,
+                       Requires<[IsLP64]>;
   let Uses = [RAX,RCX,RDI] in
-  def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
-                       [(X86rep_stos i32)]>, REP, OpSize32,
-                       Requires<[In64BitMode]>;
+  def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins),
+                      "{rep;stosl %eax, %es:(%rdi)|rep stosd es:[rdi], eax}",
+                       [(X86rep_stos i32)]>, REP, AdSize64, OpSize32,
+                       Requires<[IsLP64]>;
 
   let Uses = [RAX,RCX,RDI] in
-  def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
-                        [(X86rep_stos i64)]>, REP,
-                        Requires<[In64BitMode]>;
+  def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins),
+                        "{rep;stosq %rax, %es:(%rdi)|rep stosq es:[rdi], rax}",
+                        [(X86rep_stos i64)]>, REP, AdSize64,
+                        Requires<[IsLP64]>;
 }
 } // SchedRW
 
@@ -567,22 +576,84 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
 
   defm _FR32   : CMOVrr_PSEUDO<FR32, f32>;
   defm _FR64   : CMOVrr_PSEUDO<FR64, f64>;
-  defm _F128   : CMOVrr_PSEUDO<VR128, f128>;
-  defm _V4F32  : CMOVrr_PSEUDO<VR128, v4f32>;
-  defm _V2F64  : CMOVrr_PSEUDO<VR128, v2f64>;
-  defm _V2I64  : CMOVrr_PSEUDO<VR128, v2i64>;
-  defm _V8F32  : CMOVrr_PSEUDO<VR256, v8f32>;
-  defm _V4F64  : CMOVrr_PSEUDO<VR256, v4f64>;
-  defm _V4I64  : CMOVrr_PSEUDO<VR256, v4i64>;
-  defm _V8I64  : CMOVrr_PSEUDO<VR512, v8i64>;
-  defm _V8F64  : CMOVrr_PSEUDO<VR512, v8f64>;
-  defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
-  defm _V8I1   : CMOVrr_PSEUDO<VK8,  v8i1>;
-  defm _V16I1  : CMOVrr_PSEUDO<VK16, v16i1>;
-  defm _V32I1  : CMOVrr_PSEUDO<VK32, v32i1>;
-  defm _V64I1  : CMOVrr_PSEUDO<VK64, v64i1>;
+  let Predicates = [NoVLX] in {
+    defm _VR128  : CMOVrr_PSEUDO<VR128, v2i64>;
+    defm _VR256  : CMOVrr_PSEUDO<VR256, v4i64>;
+  }
+  let Predicates = [HasVLX] in {
+    defm _VR128X : CMOVrr_PSEUDO<VR128X, v2i64>;
+    defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>;
+  }
+  defm _VR512  : CMOVrr_PSEUDO<VR512, v8i64>;
+  defm _VK2    : CMOVrr_PSEUDO<VK2,  v2i1>;
+  defm _VK4    : CMOVrr_PSEUDO<VK4,  v4i1>;
+  defm _VK8    : CMOVrr_PSEUDO<VK8,  v8i1>;
+  defm _VK16   : CMOVrr_PSEUDO<VK16, v16i1>;
+  defm _VK32   : CMOVrr_PSEUDO<VK32, v32i1>;
+  defm _VK64   : CMOVrr_PSEUDO<VK64, v64i1>;
 } // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS]
 
+def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+          (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+
+let Predicates = [NoVLX] in {
+  def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+  def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+  def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+  def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+  def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>;
+
+  def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+  def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+  def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+  def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+  def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>;
+}
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+  def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+  def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+  def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+  def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>;
+
+  def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+  def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+  def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+  def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+  def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)),
+            (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>;
+}
+
+def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+          (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+          (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+          (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+          (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)),
+          (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>;
+
 //===----------------------------------------------------------------------===//
 // Normal-Instructions-With-Lock-Prefix Pseudo Instructions
 //===----------------------------------------------------------------------===//
@@ -591,12 +662,11 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
 
 // Memory barriers
 
-// TODO: Get this to fold the constant into the instruction.
 let isCodeGenOnly = 1, Defs = [EFLAGS] in
-def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
-                      "or{l}\t{$zero, $dst|$dst, $zero}", []>,
-                      Requires<[Not64BitMode]>, OpSize32, LOCK,
-                      Sched<[WriteALULd, WriteRMW]>;
+def OR32mi8Locked  : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero),
+                         "or{l}\t{$zero, $dst|$dst, $zero}", []>,
+                         Requires<[Not64BitMode]>, OpSize32, LOCK,
+                         Sched<[WriteALURMW]>;
 
 let hasSideEffects = 1 in
 def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
@@ -610,7 +680,7 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
 multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
                            Format ImmMod, SDNode Op, string mnemonic> {
 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
-    SchedRW = [WriteALULd, WriteRMW] in {
+    SchedRW = [WriteALURMW] in {
 
 def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
@@ -706,53 +776,64 @@ defm LOCK_OR  : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
 defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
 defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
 
-multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
-                          string frag, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
-    SchedRW = [WriteALULd, WriteRMW] in {
-def NAME#8m  : I<Opc8, Form, (outs), (ins i8mem :$dst),
-                 !strconcat(mnemonic, "{b}\t$dst"),
-                 [(set EFLAGS, (!cast<PatFrag>(frag # "_8") addr:$dst))]>,
-                 LOCK;
-def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
-                 !strconcat(mnemonic, "{w}\t$dst"),
-                 [(set EFLAGS, (!cast<PatFrag>(frag # "_16") addr:$dst))]>,
-                 OpSize16, LOCK;
-def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
-                 !strconcat(mnemonic, "{l}\t$dst"),
-                 [(set EFLAGS, (!cast<PatFrag>(frag # "_32") addr:$dst))]>,
-                 OpSize32, LOCK;
-def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
-                  !strconcat(mnemonic, "{q}\t$dst"),
-                  [(set EFLAGS, (!cast<PatFrag>(frag # "_64") addr:$dst))]>,
-                  LOCK;
-}
-}
+def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                               (X86lock_add node:$lhs, node:$rhs), [{
+  return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
 
-multiclass unary_atomic_intrin<SDNode atomic_op> {
-  def _8 : PatFrag<(ops node:$ptr),
-                   (atomic_op  node:$ptr), [{
-    return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-  }]>;
-  def _16 : PatFrag<(ops node:$ptr),
-                    (atomic_op node:$ptr), [{
-    return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-  }]>;
-  def _32 : PatFrag<(ops node:$ptr),
-                    (atomic_op node:$ptr), [{
-    return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-  }]>;
-  def _64 : PatFrag<(ops node:$ptr),
-                    (atomic_op node:$ptr), [{
-    return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-  }]>;
-}
+def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                               (X86lock_sub node:$lhs, node:$rhs), [{
+  return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
 
-defm X86lock_inc : unary_atomic_intrin<X86lock_inc>;
-defm X86lock_dec : unary_atomic_intrin<X86lock_dec>;
+let Predicates = [UseIncDec] in {
+  let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+      SchedRW = [WriteALURMW]  in {
+    def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+                        "inc{b}\t$dst",
+                        [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
+                        LOCK;
+    def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+                        "inc{w}\t$dst",
+                        [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
+                        OpSize16, LOCK;
+    def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+                        "inc{l}\t$dst",
+                        [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
+                        OpSize32, LOCK;
+    def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+                         "inc{q}\t$dst",
+                         [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
+                         LOCK;
+
+    def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+                        "dec{b}\t$dst",
+                        [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
+                        LOCK;
+    def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+                        "dec{w}\t$dst",
+                        [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
+                        OpSize16, LOCK;
+    def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+                        "dec{l}\t$dst",
+                        [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
+                        OpSize32, LOCK;
+    def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+                         "dec{q}\t$dst",
+                         [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
+                         LOCK;
+  }
 
-defm LOCK_INC    : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
-defm LOCK_DEC    : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
+  // Additional patterns for -1 constant.
+  def : Pat<(X86lock_add addr:$dst, (i8  -1)), (LOCK_DEC8m  addr:$dst)>;
+  def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
+  def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
+  def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
+  def : Pat<(X86lock_sub addr:$dst, (i8  -1)), (LOCK_INC8m  addr:$dst)>;
+  def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
+  def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
+  def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
+}
 
 // Atomic compare and swap.
 multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
@@ -766,7 +847,7 @@ let isCodeGenOnly = 1, usesCustomInserter = 1 in {
 
 multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic, SDPatternOperator frag> {
-let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
   let Defs = [AL, EFLAGS], Uses = [AL] in
   def NAME#8  : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
                   !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
@@ -787,7 +868,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
-    SchedRW = [WriteALULd, WriteRMW] in {
+    SchedRW = [WriteCMPXCHGRMW] in {
 defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
 }
 
@@ -811,7 +892,7 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
 // the instruction and we are sure we will have a valid register to restore
 // the value of RBX.
 let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
-    SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1,
+    SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1,
     Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {
 def LCMPXCHG8B_SAVE_EBX :
     I<0, Pseudo, (outs GR32:$dst),
@@ -823,14 +904,14 @@ def LCMPXCHG8B_SAVE_EBX :
 
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
-    Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
+    Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW] in {
 defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
                                  X86cas16, i128mem>, REX_W;
 }
 
 // Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.
 let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
-    Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW],
+    Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW],
     isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
     usesCustomInserter = 1 in {
 def LCMPXCHG16B_SAVE_RBX :
@@ -847,7 +928,7 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>;
 multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
                              string frag> {
   let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
-      SchedRW = [WriteALULd, WriteRMW] in {
+      SchedRW = [WriteALURMW] in {
     def NAME#8  : I<opc8, MRMSrcMem, (outs GR8:$dst),
                     (ins GR8:$val, i8mem:$ptr),
                     !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -887,42 +968,38 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK;
  * extremely late to prevent them from being accidentally reordered in the backend
  * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
  */
-multiclass RELEASE_BINOP_MI<SDNode op> {
-    def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
-        "#BINOP "#NAME#"8mi PSEUDO!",
-        [(atomic_store_8 addr:$dst, (op
-            (atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
-    def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),
-        "#BINOP "#NAME#"8mr PSEUDO!",
-        [(atomic_store_8 addr:$dst, (op
-            (atomic_load_8 addr:$dst), GR8:$src))]>;
-    // NAME#16 is not generated as 16-bit arithmetic instructions are considered
-    // costly and avoided as far as possible by this backend anyway
-    def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
-        "#BINOP "#NAME#"32mi PSEUDO!",
-        [(atomic_store_32 addr:$dst, (op
-            (atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
-    def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
-        "#BINOP "#NAME#"32mr PSEUDO!",
-        [(atomic_store_32 addr:$dst, (op
-            (atomic_load_32 addr:$dst), GR32:$src))]>;
-    def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
-        "#BINOP "#NAME#"64mi32 PSEUDO!",
-        [(atomic_store_64 addr:$dst, (op
-            (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
-    def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
-        "#BINOP "#NAME#"64mr PSEUDO!",
-        [(atomic_store_64 addr:$dst, (op
-            (atomic_load_64 addr:$dst), GR64:$src))]>;
-}
-let Defs = [EFLAGS], SchedRW = [WriteMicrocoded] in {
-  defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
-  defm RELEASE_AND : RELEASE_BINOP_MI<and>;
-  defm RELEASE_OR  : RELEASE_BINOP_MI<or>;
-  defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
-  // Note: we don't deal with sub, because substractions of constants are
-  //       optimized into additions before this code can run.
+multiclass RELEASE_BINOP_MI<string Name, SDNode op> {
+  def : Pat<(atomic_store_8 addr:$dst,
+             (op (atomic_load_8 addr:$dst), (i8 imm:$src))),
+            (!cast<Instruction>(Name#"8mi") addr:$dst, imm:$src)>;
+  def : Pat<(atomic_store_16 addr:$dst,
+             (op (atomic_load_16 addr:$dst), (i16 imm:$src))),
+            (!cast<Instruction>(Name#"16mi") addr:$dst, imm:$src)>;
+  def : Pat<(atomic_store_32 addr:$dst,
+             (op (atomic_load_32 addr:$dst), (i32 imm:$src))),
+            (!cast<Instruction>(Name#"32mi") addr:$dst, imm:$src)>;
+  def : Pat<(atomic_store_64 addr:$dst,
+             (op (atomic_load_64 addr:$dst), (i64immSExt32:$src))),
+            (!cast<Instruction>(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>;
+
+  def : Pat<(atomic_store_8 addr:$dst,
+             (op (atomic_load_8 addr:$dst), (i8 GR8:$src))),
+            (!cast<Instruction>(Name#"8mr") addr:$dst, GR8:$src)>;
+  def : Pat<(atomic_store_16 addr:$dst,
+             (op (atomic_load_16 addr:$dst), (i16 GR16:$src))),
+            (!cast<Instruction>(Name#"16mr") addr:$dst, GR16:$src)>;
+  def : Pat<(atomic_store_32 addr:$dst,
+             (op (atomic_load_32 addr:$dst), (i32 GR32:$src))),
+            (!cast<Instruction>(Name#"32mr") addr:$dst, GR32:$src)>;
+  def : Pat<(atomic_store_64 addr:$dst,
+             (op (atomic_load_64 addr:$dst), (i64 GR64:$src))),
+            (!cast<Instruction>(Name#"64mr") addr:$dst, GR64:$src)>;
 }
+defm : RELEASE_BINOP_MI<"ADD", add>;
+defm : RELEASE_BINOP_MI<"AND", and>;
+defm : RELEASE_BINOP_MI<"OR",  or>;
+defm : RELEASE_BINOP_MI<"XOR", xor>;
+defm : RELEASE_BINOP_MI<"SUB", sub>;
 
 // Same as above, but for floating-point.
 // FIXME: imm version.
@@ -947,91 +1024,64 @@ defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
 // FIXME: Add fsub, fmul, fdiv, ...
 }
 
-multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
-    def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
-        "#UNOP "#NAME#"8m PSEUDO!",
-        [(atomic_store_8 addr:$dst, dag8)]>;
-    def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
-        "#UNOP "#NAME#"16m PSEUDO!",
-        [(atomic_store_16 addr:$dst, dag16)]>;
-    def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
-        "#UNOP "#NAME#"32m PSEUDO!",
-        [(atomic_store_32 addr:$dst, dag32)]>;
-    def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
-        "#UNOP "#NAME#"64m PSEUDO!",
-        [(atomic_store_64 addr:$dst, dag64)]>;
+multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
+                        dag dag64> {
+  def : Pat<(atomic_store_8 addr:$dst, dag8),
+            (!cast<Instruction>(Name#8m) addr:$dst)>;
+  def : Pat<(atomic_store_16 addr:$dst, dag16),
+            (!cast<Instruction>(Name#16m) addr:$dst)>;
+  def : Pat<(atomic_store_32 addr:$dst, dag32),
+            (!cast<Instruction>(Name#32m) addr:$dst)>;
+  def : Pat<(atomic_store_64 addr:$dst, dag64),
+            (!cast<Instruction>(Name#64m) addr:$dst)>;
 }
 
-let Defs = [EFLAGS], Predicates = [UseIncDec], SchedRW = [WriteMicrocoded] in {
-  defm RELEASE_INC : RELEASE_UNOP<
+let Predicates = [UseIncDec] in {
+  defm : RELEASE_UNOP<"INC",
       (add (atomic_load_8  addr:$dst), (i8 1)),
       (add (atomic_load_16 addr:$dst), (i16 1)),
       (add (atomic_load_32 addr:$dst), (i32 1)),
       (add (atomic_load_64 addr:$dst), (i64 1))>;
-  defm RELEASE_DEC : RELEASE_UNOP<
+  defm : RELEASE_UNOP<"DEC",
       (add (atomic_load_8  addr:$dst), (i8 -1)),
       (add (atomic_load_16 addr:$dst), (i16 -1)),
       (add (atomic_load_32 addr:$dst), (i32 -1)),
       (add (atomic_load_64 addr:$dst), (i64 -1))>;
 }
-/*
-TODO: These don't work because the type inference of TableGen fails.
-TODO: find a way to fix it.
-let Defs = [EFLAGS] in {
-  defm RELEASE_NEG : RELEASE_UNOP<
-      (ineg (atomic_load_8  addr:$dst)),
-      (ineg (atomic_load_16 addr:$dst)),
-      (ineg (atomic_load_32 addr:$dst)),
-      (ineg (atomic_load_64 addr:$dst))>;
-}
-// NOT doesn't set flags.
-defm RELEASE_NOT : RELEASE_UNOP<
-    (not (atomic_load_8  addr:$dst)),
-    (not (atomic_load_16 addr:$dst)),
-    (not (atomic_load_32 addr:$dst)),
-    (not (atomic_load_64 addr:$dst))>;
-*/
 
-let SchedRW = [WriteMicrocoded] in {
-def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
-            "#RELEASE_MOV8mi PSEUDO!",
-            [(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
-def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
-            "#RELEASE_MOV16mi PSEUDO!",
-            [(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
-def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
-            "#RELEASE_MOV32mi PSEUDO!",
-            [(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
-def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
-            "#RELEASE_MOV64mi32 PSEUDO!",
-            [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
-
-def RELEASE_MOV8mr  : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
-                        "#RELEASE_MOV8mr PSEUDO!",
-                        [(atomic_store_8  addr:$dst, GR8 :$src)]>;
-def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
-                        "#RELEASE_MOV16mr PSEUDO!",
-                        [(atomic_store_16 addr:$dst, GR16:$src)]>;
-def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
-                        "#RELEASE_MOV32mr PSEUDO!",
-                        [(atomic_store_32 addr:$dst, GR32:$src)]>;
-def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
-                        "#RELEASE_MOV64mr PSEUDO!",
-                        [(atomic_store_64 addr:$dst, GR64:$src)]>;
-
-def ACQUIRE_MOV8rm  : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
-                      "#ACQUIRE_MOV8rm PSEUDO!",
-                      [(set GR8:$dst,  (atomic_load_8  addr:$src))]>;
-def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
-                      "#ACQUIRE_MOV16rm PSEUDO!",
-                      [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
-def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
-                      "#ACQUIRE_MOV32rm PSEUDO!",
-                      [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
-def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
-                      "#ACQUIRE_MOV64rm PSEUDO!",
-                      [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
-} // SchedRW
+defm : RELEASE_UNOP<"NEG",
+    (ineg (i8 (atomic_load_8  addr:$dst))),
+    (ineg (i16 (atomic_load_16 addr:$dst))),
+    (ineg (i32 (atomic_load_32 addr:$dst))),
+    (ineg (i64 (atomic_load_64 addr:$dst)))>;
+defm : RELEASE_UNOP<"NOT",
+    (not (i8 (atomic_load_8  addr:$dst))),
+    (not (i16 (atomic_load_16 addr:$dst))),
+    (not (i32 (atomic_load_32 addr:$dst))),
+    (not (i64 (atomic_load_64 addr:$dst)))>;
+
+def : Pat<(atomic_store_8 addr:$dst, (i8 imm:$src)),
+          (MOV8mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_16 addr:$dst, (i16 imm:$src)),
+          (MOV16mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 imm:$src)),
+          (MOV32mi addr:$dst, imm:$src)>;
+def : Pat<(atomic_store_64 addr:$dst, (i64immSExt32:$src)),
+          (MOV64mi32 addr:$dst, i64immSExt32:$src)>;
+
+def : Pat<(atomic_store_8 addr:$dst, GR8:$src),
+          (MOV8mr addr:$dst, GR8:$src)>;
+def : Pat<(atomic_store_16 addr:$dst, GR16:$src),
+          (MOV16mr addr:$dst, GR16:$src)>;
+def : Pat<(atomic_store_32 addr:$dst, GR32:$src),
+          (MOV32mr addr:$dst, GR32:$src)>;
+def : Pat<(atomic_store_64 addr:$dst, GR64:$src),
+          (MOV64mr addr:$dst, GR64:$src)>;
+
+def : Pat<(i8  (atomic_load_8 addr:$src)),  (MOV8rm addr:$src)>;
+def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
+def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
+def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
 
 //===----------------------------------------------------------------------===//
 // DAG Pattern Matching Rules
@@ -1041,12 +1091,12 @@ def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
 // binary size compared to a regular MOV, but it introduces an unnecessary
 // load, so is not suitable for regular or optsize functions.
 let Predicates = [OptForMinSize] in {
-def : Pat<(store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
-def : Pat<(store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
-def : Pat<(store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
-def : Pat<(store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
-def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
-def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
+def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
+def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
 }
 
 // In kernel code model, we can get the address of a label
@@ -1128,14 +1178,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[Not64BitMode, NotUseRetpoline]>;
+          Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
 
 // FIXME: This is disabled for 32-bit PIC mode because the global base
 // register which is part of the address mode may be assigned a
 // callee-saved register.
 def : Pat<(X86tcret (load addr:$dst), imm:$off),
           (TCRETURNmi addr:$dst, imm:$off)>,
-          Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>;
+          Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
 
 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1147,21 +1197,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode, NotUseRetpoline]>;
+          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
 
 // Don't fold loads into X86tcret requiring more than 6 regs.
 // There wouldn't be enough scratch registers for base+index.
 def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
           (TCRETURNmi64 addr:$dst, imm:$off)>,
-          Requires<[In64BitMode, NotUseRetpoline]>;
+          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode, UseRetpoline]>;
+          Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[Not64BitMode, UseRetpoline]>;
+          Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
@@ -1226,7 +1276,8 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
 // i1 stored in one byte in zero-extended form.
 // Upper bits cleanup should be executed before Store.
 def : Pat<(zextloadi8i1  addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src),
+          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
 def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
 def : Pat<(zextloadi64i1 addr:$src),
           (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
@@ -1237,9 +1288,11 @@ def : Pat<(zextloadi64i1 addr:$src),
 // defined, avoiding partial-register updates.
 
 def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src),
+          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
 def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src),
+          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
 def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
 def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
 
@@ -1271,6 +1324,15 @@ def : Pat<(i64 (anyext GR16:$src)),
 def : Pat<(i64 (anyext GR32:$src)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>;
 
+// If this is an anyext of the remainder of an 8-bit sdivrem, use a MOVSX
+// instead of a MOVZX. The sdivrem lowering will emit emit a MOVSX to move
+// %ah to the lower byte of a register. By using a MOVSX here we allow a
+// post-isel peephole to merge the two MOVSX instructions into one.
+def anyext_sdiv : PatFrag<(ops node:$lhs), (anyext node:$lhs),[{
+  return (N->getOperand(0).getOpcode() == ISD::SDIVREM &&
+          N->getOperand(0).getResNo() == 1);
+}]>;
+def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
 
 // Any instruction that defines a 32-bit result leaves the high half of the
 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
@@ -1305,17 +1367,15 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
     return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
 
-  KnownBits Known0;
-  CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
-  KnownBits Known1;
-  CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
   return (~Known0.Zero & ~Known1.Zero) == 0;
 }]>;
 
 
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
 // Try this before the selecting to OR.
-let AddedComplexity = 5, SchedRW = [WriteALU] in {
+let SchedRW = [WriteALU] in {
 
 let isConvertibleToThreeAddress = 1,
     Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1375,8 +1435,7 @@ def ADD64ri32_DB : I<0, Pseudo,
 
 def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
-    KnownBits Known;
-    CurDAG->computeKnownBits(N->getOperand(1), Known);
+    KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
 
     // If all possible ones in the RHS are set in the LHS then there can't be
     // a borrow and we can use xor.
@@ -1973,6 +2032,15 @@ let Predicates = [UseIncDec] in {
   def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
   def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
   def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+  def : Pat<(X86add_flag_nocf GR8:$src, -1),  (DEC8r GR8:$src)>;
+  def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
+  def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
+  def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
+  def : Pat<(X86sub_flag_nocf GR8:$src, -1),  (INC8r GR8:$src)>;
+  def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
+  def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
+  def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
 }
 
 // or reg/reg.
@@ -2081,23 +2149,3 @@ def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
 let Predicates = [HasMOVBE] in {
  def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
 }
-
-// These patterns are selected by some custom code in X86ISelDAGToDAG.cpp that
-// custom combines and+srl into BEXTR. We use these patterns to avoid a bunch
-// of manual code for folding loads.
-let Predicates = [HasBMI, NoTBM] in {
-  def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
-            (BEXTR32rr GR32:$src1, (MOV32ri imm:$src2))>;
-  def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
-            (BEXTR32rm addr:$src1, (MOV32ri imm:$src2))>;
-  def : Pat<(X86bextr GR64:$src1, mov64imm32:$src2),
-            (BEXTR64rr GR64:$src1,
-                       (SUBREG_TO_REG (i64 0),
-                                      (MOV32ri64 mov64imm32:$src2),
-                                      sub_32bit))>;
-  def : Pat<(X86bextr (loadi64 addr:$src1), mov64imm32:$src2),
-            (BEXTR64rm addr:$src1,
-                       (SUBREG_TO_REG (i64 0),
-                                      (MOV32ri64 mov64imm32:$src2),
-                                      sub_32bit))>;
-} // HasBMI, NoTBM
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 650bce74dcf2..a7c7aaab2285 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -222,11 +222,13 @@ let isCall = 1 in
                         Sched<[WriteJumpLd]>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
                         "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
-                        Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
+                        Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+                        Sched<[WriteJump]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
                         "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
                         OpSize32,
-                        Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
+                        Requires<[Not64BitMode,FavorMemIndirectCall,
+                                  NotUseRetpolineIndirectCalls]>,
                         Sched<[WriteJumpLd]>;
 
     // Non-tracking calls for IBT, use with caution.
@@ -320,11 +322,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
                       Requires<[In64BitMode]>;
   def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
                         "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
-                      Requires<[In64BitMode,NotUseRetpoline]>;
+                      Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
   def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                         "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
                       Requires<[In64BitMode,FavorMemIndirectCall,
-                                NotUseRetpoline]>;
+                                NotUseRetpolineIndirectCalls]>;
 
   // Non-tracking calls for IBT, use with caution.
   let isCodeGenOnly = 1 in {
@@ -379,11 +381,11 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
     SchedRW = [WriteJump] in {
   def RETPOLINE_CALL32 :
     PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
-            Requires<[Not64BitMode,UseRetpoline]>;
+            Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
 
   def RETPOLINE_CALL64 :
     PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
-            Requires<[In64BitMode,UseRetpoline]>;
+            Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
 
   // Retpoline variant of indirect tail calls.
   let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index 421792c5599f..c24d6d5b8df1 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -163,6 +163,26 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     [(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
                     Sched<[WriteALULd]>, Requires<[In64BitMode]>;
 
+// These instructions exist as a consequence of operand size prefix having
+// control of the destination size, but not the input size. Only support them
+// for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MOVSX16rr32: I<0x63, MRMSrcReg, (outs GR16:$dst), (ins GR32:$src),
+                   "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+                   Sched<[WriteALU]>, OpSize16, Requires<[In64BitMode]>;
+def MOVSX32rr32: I<0x63, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                   "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+                   Sched<[WriteALU]>, OpSize32, Requires<[In64BitMode]>;
+let mayLoad = 1 in {
+def MOVSX16rm32: I<0x63, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src),
+                   "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+                   Sched<[WriteALULd]>, OpSize16, Requires<[In64BitMode]>;
+def MOVSX32rm32: I<0x63, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>,
+                   Sched<[WriteALULd]>, OpSize32, Requires<[In64BitMode]>;
+} // mayLoad = 1
+} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
+
 // movzbq and movzwq encodings for the disassembler
 let hasSideEffects = 0 in {
 def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index a559f62c8f38..1a8e529431af 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -51,7 +51,7 @@ multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                    [(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
                                           (MemFrag addr:$src3))))]>,
-                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -71,7 +71,7 @@ multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                    [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
                                           RC:$src1)))]>,
-                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -93,7 +93,7 @@ multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                    [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
                                           RC:$src2)))]>,
-                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                   Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
@@ -192,7 +192,7 @@ multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
                            "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                 [(set RC:$dst,
                   (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
-                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
@@ -212,7 +212,7 @@ multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
                            "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                 [(set RC:$dst,
                   (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
-                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
@@ -234,7 +234,7 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
                            "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                 [(set RC:$dst,
                   (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
-                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
@@ -279,7 +279,7 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
                         (ins RC:$src1, RC:$src2, memopr:$src3),
                         !strconcat(OpcodeStr,
                                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                        []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                        []>, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
 // The FMA 213 form is created for lowering of scalar FMA intrinscis
@@ -402,19 +402,19 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
                            (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
-           Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+           Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
   def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
            (ins RC:$src1, x86memop:$src2, RC:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set RC:$dst,
              (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
-           Sched<[sched.Folded, ReadAfterLd,
+           Sched<[sched.Folded, sched.ReadAfterFold,
                   // x86memop:$src2
                   ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                   ReadDefault,
                   // RC:$src3
-                  ReadAfterLd]>;
+                  sched.ReadAfterFold]>;
 // For disassembler
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
@@ -438,19 +438,19 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                []>, VEX_W, VEX_LIG,
-               Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+               Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
   let mayLoad = 1 in
   def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
                (ins VR128:$src1, memop:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                []>,
-               VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
+               VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold,
                                // memop:$src2
                                ReadDefault, ReadDefault, ReadDefault,
                                ReadDefault, ReadDefault,
                                // VR128::$src3
-                               ReadAfterLd]>;
+                               sched.ReadAfterFold]>;
   def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
@@ -477,19 +477,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
                               (ld_frag128 addr:$src3)))]>, VEX_W,
-           Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
+           Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, sched.XMM.ReadAfterFold]>;
   def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, f128mem:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
              (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
-           Sched<[sched.XMM.Folded, ReadAfterLd,
+           Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold,
                   // f128mem:$src2
                   ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                   ReadDefault,
                   // VR128::$src3
-                  ReadAfterLd]>;
+                  sched.XMM.ReadAfterFold]>;
   let isCommutable = 1 in
   def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
            (ins VR256:$src1, VR256:$src2, VR256:$src3),
@@ -504,19 +504,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
                               (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
-           Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
+           Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, sched.YMM.ReadAfterFold]>;
   def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, f256mem:$src2, VR256:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst, (OpNode VR256:$src1,
                               (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
-           Sched<[sched.YMM.Folded, ReadAfterLd,
+           Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold,
                   // f256mem:$src2
                   ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                   ReadDefault,
                   // VR256::$src3
-                  ReadAfterLd]>;
+                  sched.YMM.ReadAfterFold]>;
 // For disassembler
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
   def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index cc81a919ec99..5912a3199613 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -592,10 +592,13 @@ def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
 let SchedRW = [WriteFCom] in {
 // CC = ST(0) cmp ST(i)
 let Defs = [EFLAGS, FPSW] in {
+let Predicates = [FPStackf32, HasCMov] in
 def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
                   [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+let Predicates = [FPStackf64, HasCMov] in
 def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
                   [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+let Predicates = [HasCMov] in
 def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
                   [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
 }
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 739275907978..11a27ba90586 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -75,7 +75,7 @@ def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
                   SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
                                        SDTCVecEltisVT<1, i8>,
                                        SDTCisSameSizeAs<0,1>,
-                                       SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
+                                       SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>>;
 def X86andnp   : SDNode<"X86ISD::ANDNP",
                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
@@ -103,23 +103,22 @@ def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
-def X86vzext   : SDNode<"X86ISD::VZEXT",
-                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
-                                              SDTCisInt<0>, SDTCisInt<1>,
-                                              SDTCisOpSmallerThanOp<1, 0>]>>;
-
-def X86vsext   : SDNode<"X86ISD::VSEXT",
-                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
-                                              SDTCisInt<0>, SDTCisInt<1>,
-                                              SDTCisOpSmallerThanOp<1, 0>]>>;
-
 def SDTVtrunc    : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
-                                       SDTCisInt<0>, SDTCisInt<1>,
-                                       SDTCisOpSmallerThanOp<0, 1>]>;
+                                        SDTCisInt<0>, SDTCisInt<1>,
+                                        SDTCisOpSmallerThanOp<0, 1>]>;
+def SDTVmtrunc   : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+                                        SDTCisInt<0>, SDTCisInt<1>,
+                                        SDTCisOpSmallerThanOp<0, 1>,
+                                        SDTCisSameAs<0, 2>,
+                                        SDTCVecEltisVT<3, i1>,
+                                        SDTCisSameNumEltsAs<1, 3>]>;
 
 def X86vtrunc    : SDNode<"X86ISD::VTRUNC",   SDTVtrunc>;
 def X86vtruncs   : SDNode<"X86ISD::VTRUNCS",  SDTVtrunc>;
 def X86vtruncus  : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+def X86vmtrunc   : SDNode<"X86ISD::VMTRUNC",   SDTVmtrunc>;
+def X86vmtruncs  : SDNode<"X86ISD::VMTRUNCS",  SDTVmtrunc>;
+def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
 
 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
                         SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
@@ -144,6 +143,14 @@ def X86fpextRnd  : SDNode<"X86ISD::VFPEXTS_RND",
                                              SDTCisSameSizeAs<0, 2>,
                                              SDTCisVT<3, i32>]>>;
 
+def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
+                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+                                              SDTCVecEltisVT<1, f64>,
+                                              SDTCisSameSizeAs<0, 1>,
+                                              SDTCisSameAs<0, 2>,
+                                              SDTCVecEltisVT<3, i1>,
+                                              SDTCisSameNumEltsAs<1, 3>]>>;
+
 def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                         SDTCisVT<2, i8>, SDTCisInt<0>]>;
 
@@ -182,7 +189,7 @@ def X86phminpos: SDNode<"X86ISD::PHMINPOS",
 
 def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                             SDTCisVec<2>, SDTCisInt<0>,
-                                            SDTCisInt<1>]>;
+                                            SDTCisInt<2>]>;
 
 def X86vshl    : SDNode<"X86ISD::VSHL", X86vshiftuniform>;
 def X86vsrl    : SDNode<"X86ISD::VSRL", X86vshiftuniform>;
@@ -237,10 +244,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
                                           SDTCisVec<1>,
                                           SDTCisSameAs<2, 1>]>;
 
-def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
-def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subs    : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
 def X86mulhrs  : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
 def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
@@ -535,6 +538,8 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
 def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                            SDTCisInt<0>, SDTCisFP<1>,
                                            SDTCisVT<2, i32>]>;
+def SDTSFloatToInt: SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisFP<1>,
+                                         SDTCisVec<1>]>;
 def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
                                             SDTCisVec<1>, SDTCisVT<2, i32>]>;
 
@@ -548,11 +553,15 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
 def X86SintToFpRnd  : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND",  SDTintToFPRound>;
 def X86UintToFpRnd  : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND",  SDTintToFPRound>;
 
+def X86cvtts2Int  : SDNode<"X86ISD::CVTTS2SI",  SDTSFloatToInt>;
+def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI",  SDTSFloatToInt>;
 def X86cvtts2IntRnd  : SDNode<"X86ISD::CVTTS2SI_RND",  SDTSFloatToIntRnd>;
 def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND",  SDTSFloatToIntRnd>;
 
-def  X86cvts2si  : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
-def  X86cvts2usi : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
+def X86cvts2si  : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>;
+def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
+def X86cvts2siRnd  : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
+def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
 
 // Vector with rounding mode
 
@@ -581,6 +590,19 @@ def X86cvtp2Int      : SDNode<"X86ISD::CVTP2SI",  SDTFloatToInt>;
 def X86cvtp2UInt     : SDNode<"X86ISD::CVTP2UI",  SDTFloatToInt>;
 
 
+def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+                                         SDTCisInt<0>, SDTCisFP<1>,
+                                         SDTCisSameSizeAs<0, 1>,
+                                         SDTCisSameAs<0, 2>,
+                                         SDTCVecEltisVT<3, i1>,
+                                         SDTCisSameNumEltsAs<1, 3>]>;
+
+def X86mcvtp2Int     : SDNode<"X86ISD::MCVTP2SI",  SDTMFloatToInt>;
+def X86mcvtp2UInt    : SDNode<"X86ISD::MCVTP2UI",  SDTMFloatToInt>;
+def X86mcvttp2si     : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
+def X86mcvttp2ui     : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+
+
 def X86cvtph2ps     : SDNode<"X86ISD::CVTPH2PS",
                               SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
                                                    SDTCVecEltisVT<1, i16>]> >;
@@ -594,6 +616,13 @@ def X86cvtps2ph   : SDNode<"X86ISD::CVTPS2PH",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
                                              SDTCVecEltisVT<1, f32>,
                                              SDTCisVT<2, i32>]> >;
+def X86mcvtps2ph   : SDNode<"X86ISD::MCVTPS2PH",
+                        SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
+                                             SDTCVecEltisVT<1, f32>,
+                                             SDTCisVT<2, i32>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCVecEltisVT<4, i1>,
+                                             SDTCisSameNumEltsAs<1, 4>]> >;
 def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT_RND",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
                                              SDTCVecEltisVT<1, f32>,
@@ -641,28 +670,29 @@ def sdmem : Operand<v2f64> {
 // SSE pattern fragments
 //===----------------------------------------------------------------------===//
 
-// Vector load wrappers to prevent folding of non-temporal aligned loads on
-// supporting targets.
-def vecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return !useNonTemporalLoad(cast<LoadSDNode>(N));
-}]>;
-
 // 128-bit load pattern fragments
-// NOTE: all 128-bit integer vector loads are promoted to v2i64
-def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (vecload node:$ptr))>;
-def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (vecload node:$ptr))>;
-def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (vecload node:$ptr))>;
+def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv8i16    : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
+def loadv16i8    : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
 
 // 256-bit load pattern fragments
-// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (vecload node:$ptr))>;
-def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (vecload node:$ptr))>;
-def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (vecload node:$ptr))>;
+def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32  (load node:$ptr))>;
+def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64  (load node:$ptr))>;
+def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64  (load node:$ptr))>;
+def loadv8i32    : PatFrag<(ops node:$ptr), (v8i32  (load node:$ptr))>;
+def loadv16i16   : PatFrag<(ops node:$ptr), (v16i16 (load node:$ptr))>;
+def loadv32i8    : PatFrag<(ops node:$ptr), (v32i8  (load node:$ptr))>;
 
 // 512-bit load pattern fragments
-def loadv16f32   : PatFrag<(ops node:$ptr), (v16f32 (vecload node:$ptr))>;
-def loadv8f64    : PatFrag<(ops node:$ptr), (v8f64 (vecload node:$ptr))>;
-def loadv8i64    : PatFrag<(ops node:$ptr), (v8i64 (vecload node:$ptr))>;
+def loadv16f32   : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
+def loadv8f64    : PatFrag<(ops node:$ptr), (v8f64  (load node:$ptr))>;
+def loadv8i64    : PatFrag<(ops node:$ptr), (v8i64  (load node:$ptr))>;
+def loadv16i32   : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
+def loadv32i16   : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
+def loadv64i8    : PatFrag<(ops node:$ptr), (v64i8  (load node:$ptr))>;
 
 // 128-/256-/512-bit extload pattern fragments
 def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
@@ -676,46 +706,63 @@ def alignedstore : PatFrag<(ops node:$val, node:$ptr),
   return St->getAlignment() >= St->getMemoryVT().getStoreSize();
 }]>;
 
-// Like 'load', but always requires 128-bit vector alignment.
-def alignedvecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+// Like 'load', but always requires vector size alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   auto *Ld = cast<LoadSDNode>(N);
-  return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize() &&
-         !useNonTemporalLoad(cast<LoadSDNode>(N));
+  return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
 }]>;
 
 // 128-bit aligned load pattern fragments
 // NOTE: all 128-bit integer vector loads are promoted to v2i64
 def alignedloadv4f32 : PatFrag<(ops node:$ptr),
-                               (v4f32 (alignedvecload node:$ptr))>;
+                               (v4f32 (alignedload node:$ptr))>;
 def alignedloadv2f64 : PatFrag<(ops node:$ptr),
-                               (v2f64 (alignedvecload node:$ptr))>;
+                               (v2f64 (alignedload node:$ptr))>;
 def alignedloadv2i64 : PatFrag<(ops node:$ptr),
-                               (v2i64 (alignedvecload node:$ptr))>;
+                               (v2i64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+                               (v4i32 (alignedload node:$ptr))>;
+def alignedloadv8i16 : PatFrag<(ops node:$ptr),
+                               (v8i16 (alignedload node:$ptr))>;
+def alignedloadv16i8 : PatFrag<(ops node:$ptr),
+                               (v16i8 (alignedload node:$ptr))>;
 
 // 256-bit aligned load pattern fragments
 // NOTE: all 256-bit integer vector loads are promoted to v4i64
-def alignedloadv8f32 : PatFrag<(ops node:$ptr),
-                               (v8f32 (alignedvecload node:$ptr))>;
-def alignedloadv4f64 : PatFrag<(ops node:$ptr),
-                               (v4f64 (alignedvecload node:$ptr))>;
-def alignedloadv4i64 : PatFrag<(ops node:$ptr),
-                               (v4i64 (alignedvecload node:$ptr))>;
+def alignedloadv8f32  : PatFrag<(ops node:$ptr),
+                                (v8f32  (alignedload node:$ptr))>;
+def alignedloadv4f64  : PatFrag<(ops node:$ptr),
+                                (v4f64  (alignedload node:$ptr))>;
+def alignedloadv4i64  : PatFrag<(ops node:$ptr),
+                                (v4i64  (alignedload node:$ptr))>;
+def alignedloadv8i32  : PatFrag<(ops node:$ptr),
+                                (v8i32  (alignedload node:$ptr))>;
+def alignedloadv16i16 : PatFrag<(ops node:$ptr),
+                                (v16i16 (alignedload node:$ptr))>;
+def alignedloadv32i8  : PatFrag<(ops node:$ptr),
+                                (v32i8  (alignedload node:$ptr))>;
 
 // 512-bit aligned load pattern fragments
 def alignedloadv16f32 : PatFrag<(ops node:$ptr),
-                                (v16f32 (alignedvecload node:$ptr))>;
+                                (v16f32 (alignedload node:$ptr))>;
 def alignedloadv8f64  : PatFrag<(ops node:$ptr),
-                                (v8f64  (alignedvecload node:$ptr))>;
+                                (v8f64  (alignedload node:$ptr))>;
 def alignedloadv8i64  : PatFrag<(ops node:$ptr),
-                                (v8i64  (alignedvecload node:$ptr))>;
-
-// Like 'vecload', but uses special alignment checks suitable for use in
+                                (v8i64  (alignedload node:$ptr))>;
+def alignedloadv16i32 : PatFrag<(ops node:$ptr),
+                                (v16i32 (alignedload node:$ptr))>;
+def alignedloadv32i16 : PatFrag<(ops node:$ptr),
+                                (v32i16 (alignedload node:$ptr))>;
+def alignedloadv64i8  : PatFrag<(ops node:$ptr),
+                                (v64i8  (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
 // memory operands in most SSE instructions, which are required to
 // be naturally aligned on some targets but not on others.  If the subtarget
 // allows unaligned accesses, match any load, though this may require
 // setting a feature bit in the processor (on startup, for example).
 // Opteron 10h and later implement such a feature.
-def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   auto *Ld = cast<LoadSDNode>(N);
   return Subtarget->hasSSEUnalignedMem() ||
          Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
@@ -726,6 +773,9 @@ def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
 
 def X86masked_gather : SDNode<"X86ISD::MGATHER",
                               SDTypeProfile<2, 3, [SDTCisVec<0>,
@@ -828,6 +878,7 @@ def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>;
 
 // 512-bit bitconvert pattern fragments
 def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
+def bc_v32i16 : PatFrag<(ops node:$in), (v32i16 (bitconvert node:$in))>;
 def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
 def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
 def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index e56452362168..ab14ee7fadf2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -411,8 +411,13 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
     if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    const MachineMemOperand *Dummy;
-    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+    SmallVector<const MachineMemOperand *, 1> Accesses;
+    if (hasLoadFromStackSlot(MI, Accesses)) {
+      FrameIndex =
+          cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+              ->getFrameIndex();
+      return 1;
+    }
   }
   return 0;
 }
@@ -441,8 +446,13 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
     if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    const MachineMemOperand *Dummy;
-    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+    SmallVector<const MachineMemOperand *, 1> Accesses;
+    if (hasStoreToStackSlot(MI, Accesses)) {
+      FrameIndex =
+          cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
+              ->getFrameIndex();
+      return 1;
+    }
   }
   return 0;
 }
@@ -708,7 +718,7 @@ bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
 }
 
 /// Check whether the shift count for a machine operand is non-zero.
-inline static unsigned getTruncatedShiftCount(MachineInstr &MI,
+inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
                                               unsigned ShiftAmtOperandIdx) {
   // The shift count is six bits with the REX.W prefix and five bits without.
   unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
@@ -729,8 +739,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
 
 bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
                                   unsigned Opc, bool AllowSP, unsigned &NewSrc,
-                                  bool &isKill, bool &isUndef,
-                                  MachineOperand &ImplicitOp,
+                                  bool &isKill, MachineOperand &ImplicitOp,
                                   LiveVariables *LV) const {
   MachineFunction &MF = *MI.getParent()->getParent();
   const TargetRegisterClass *RC;
@@ -747,7 +756,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
   if (Opc != X86::LEA64_32r) {
     NewSrc = SrcReg;
     isKill = Src.isKill();
-    isUndef = Src.isUndef();
+    assert(!Src.isUndef() && "Undef op doesn't need optimization");
 
     if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
         !MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -764,7 +773,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
 
     NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
     isKill = Src.isKill();
-    isUndef = Src.isUndef();
+    assert(!Src.isUndef() && "Undef op doesn't need optimization");
   } else {
     // Virtual register of the wrong class, we have to create a temporary 64-bit
     // vreg to feed into the LEA.
@@ -776,7 +785,6 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
 
     // Which is obviously going to be dead after we're done with it.
     isKill = true;
-    isUndef = false;
 
     if (LV)
       LV->replaceKillInstruction(SrcReg, MI, *Copy);
@@ -786,88 +794,99 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
   return true;
 }
 
-/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
-/// LEA to form 3-address code by promoting to a 32-bit superregister and then
-/// truncating back down to a 16-bit subregister.
 MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
     unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
     LiveVariables *LV) const {
-  MachineBasicBlock::iterator MBBI = MI.getIterator();
-  unsigned Dest = MI.getOperand(0).getReg();
-  unsigned Src = MI.getOperand(1).getReg();
-  bool isDead = MI.getOperand(0).isDead();
-  bool isKill = MI.getOperand(1).isKill();
-
+  // We handle 8-bit adds and various 16-bit opcodes in the switch below.
+  bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
   MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-  unsigned Opc, leaInReg;
-  if (Subtarget.is64Bit()) {
-    Opc = X86::LEA64_32r;
-    leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
-  } else {
-    Opc = X86::LEA32r;
-    leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
-  }
+  assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
+              *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
+         "Unexpected type for LEA transform");
+
+  // TODO: For a 32-bit target, we need to adjust the LEA variables with
+  // something like this:
+  //   Opcode = X86::LEA32r;
+  //   InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+  //   OutRegLEA =
+  //       Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
+  //                : RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  if (!Subtarget.is64Bit())
+    return nullptr;
+
+  unsigned Opcode = X86::LEA64_32r;
+  unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+  unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
 
   // Build and insert into an implicit UNDEF value. This is OK because
-  // well be shifting and then extracting the lower 16-bits.
+  // we will be shifting and then extracting the lower 8/16-bits.
   // This has the potential to cause partial register stall. e.g.
   //   movw    (%rbp,%rcx,2), %dx
   //   leal    -65(%rdx), %esi
   // But testing has shown this *does* help performance in 64-bit mode (at
   // least on modern x86 machines).
-  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+  MachineBasicBlock::iterator MBBI = MI.getIterator();
+  unsigned Dest = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
+  bool IsDead = MI.getOperand(0).isDead();
+  bool IsKill = MI.getOperand(1).isKill();
+  unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
+  assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
+  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
   MachineInstr *InsMI =
       BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
-          .addReg(leaInReg, RegState::Define, X86::sub_16bit)
-          .addReg(Src, getKillRegState(isKill));
+          .addReg(InRegLEA, RegState::Define, SubReg)
+          .addReg(Src, getKillRegState(IsKill));
 
   MachineInstrBuilder MIB =
-      BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opc), leaOutReg);
+      BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
   switch (MIOpc) {
   default: llvm_unreachable("Unreachable!");
   case X86::SHL16ri: {
     unsigned ShAmt = MI.getOperand(2).getImm();
     MIB.addReg(0).addImm(1ULL << ShAmt)
-       .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+       .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
     break;
   }
   case X86::INC16r:
-    addRegOffset(MIB, leaInReg, true, 1);
+    addRegOffset(MIB, InRegLEA, true, 1);
     break;
   case X86::DEC16r:
-    addRegOffset(MIB, leaInReg, true, -1);
+    addRegOffset(MIB, InRegLEA, true, -1);
     break;
+  case X86::ADD8ri:
   case X86::ADD16ri:
   case X86::ADD16ri8:
   case X86::ADD16ri_DB:
   case X86::ADD16ri8_DB:
-    addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm());
+    addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
     break;
+  case X86::ADD8rr:
   case X86::ADD16rr:
   case X86::ADD16rr_DB: {
     unsigned Src2 = MI.getOperand(2).getReg();
-    bool isKill2 = MI.getOperand(2).isKill();
-    unsigned leaInReg2 = 0;
+    bool IsKill2 = MI.getOperand(2).isKill();
+    assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
+    unsigned InRegLEA2 = 0;
     MachineInstr *InsMI2 = nullptr;
     if (Src == Src2) {
-      // ADD16rr killed %reg1028, %reg1028
+      // ADD8rr/ADD16rr killed %reg1028, %reg1028
       // just a single insert_subreg.
-      addRegReg(MIB, leaInReg, true, leaInReg, false);
+      addRegReg(MIB, InRegLEA, true, InRegLEA, false);
     } else {
       if (Subtarget.is64Bit())
-        leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
+        InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
       else
-        leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+        InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
       // Build and insert into an implicit UNDEF value. This is OK because
-      // well be shifting and then extracting the lower 16-bits.
-      BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      // we will be shifting and then extracting the lower 8/16-bits.
+      BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
       InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
-                   .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
-                   .addReg(Src2, getKillRegState(isKill2));
-      addRegReg(MIB, leaInReg, true, leaInReg2, true);
+                   .addReg(InRegLEA2, RegState::Define, SubReg)
+                   .addReg(Src2, getKillRegState(IsKill2));
+      addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
     }
-    if (LV && isKill2 && InsMI2)
+    if (LV && IsKill2 && InsMI2)
       LV->replaceKillInstruction(Src2, MI, *InsMI2);
     break;
   }
@@ -876,16 +895,16 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
   MachineInstr *NewMI = MIB;
   MachineInstr *ExtMI =
       BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
-          .addReg(Dest, RegState::Define | getDeadRegState(isDead))
-          .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+          .addReg(Dest, RegState::Define | getDeadRegState(IsDead))
+          .addReg(OutRegLEA, RegState::Kill, SubReg);
 
   if (LV) {
-    // Update live variables
-    LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
-    LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
-    if (isKill)
+    // Update live variables.
+    LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
+    LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
+    if (IsKill)
       LV->replaceKillInstruction(Src, MI, *InsMI);
-    if (isDead)
+    if (IsDead)
       LV->replaceKillInstruction(Dest, MI, *ExtMI);
   }
 
@@ -916,12 +935,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   const MachineOperand &Dest = MI.getOperand(0);
   const MachineOperand &Src = MI.getOperand(1);
 
+  // Ideally, operations with undef should be folded before we get here, but we
+  // can't guarantee it. Bail out because optimizing undefs is a waste of time.
+  // Without this, we have to forward undef state to new register operands to
+  // avoid machine verifier errors.
+  if (Src.isUndef())
+    return nullptr;
+  if (MI.getNumOperands() > 2)
+    if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
+      return nullptr;
+
   MachineInstr *NewMI = nullptr;
-  // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
-  // we have better subtarget support, enable the 16-bit LEA generation here.
-  // 16-bit LEA is also slow on Core2.
-  bool DisableLEA16 = true;
-  bool is64Bit = Subtarget.is64Bit();
+  bool Is64Bit = Subtarget.is64Bit();
 
   unsigned MIOpc = MI.getOpcode();
   switch (MIOpc) {
@@ -951,14 +976,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned ShAmt = getTruncatedShiftCount(MI, 2);
     if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
 
-    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+    unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
 
     // LEA can't handle ESP.
-    bool isKill, isUndef;
+    bool isKill;
     unsigned SrcReg;
     MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
     if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
-                        SrcReg, isKill, isUndef, ImplicitOp, LV))
+                        SrcReg, isKill, ImplicitOp, LV))
       return nullptr;
 
     MachineInstrBuilder MIB =
@@ -966,7 +991,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
             .add(Dest)
             .addReg(0)
             .addImm(1ULL << ShAmt)
-            .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+            .addReg(SrcReg, getKillRegState(isKill))
             .addImm(0)
             .addReg(0);
     if (ImplicitOp.getReg() != 0)
@@ -978,37 +1003,26 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   case X86::SHL16ri: {
     assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
     unsigned ShAmt = getTruncatedShiftCount(MI, 2);
-    if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
-
-    if (DisableLEA16)
-      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
-                     : nullptr;
-    NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
-                .add(Dest)
-                .addReg(0)
-                .addImm(1ULL << ShAmt)
-                .add(Src)
-                .addImm(0)
-                .addReg(0);
-    break;
+    if (!isTruncatedShiftCountForLEA(ShAmt))
+      return nullptr;
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
   }
   case X86::INC64r:
   case X86::INC32r: {
     assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
-    unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
-      : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
-    bool isKill, isUndef;
+    unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
+        (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+    bool isKill;
     unsigned SrcReg;
     MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
-    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
-                        SrcReg, isKill, isUndef, ImplicitOp, LV))
+    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
+                        ImplicitOp, LV))
       return nullptr;
 
     MachineInstrBuilder MIB =
         BuildMI(MF, MI.getDebugLoc(), get(Opc))
             .add(Dest)
-            .addReg(SrcReg,
-                    getKillRegState(isKill) | getUndefRegState(isUndef));
+            .addReg(SrcReg, getKillRegState(isKill));
     if (ImplicitOp.getReg() != 0)
       MIB.add(ImplicitOp);
 
@@ -1016,30 +1030,23 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     break;
   }
   case X86::INC16r:
-    if (DisableLEA16)
-      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
-                     : nullptr;
-    assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
-    NewMI = addOffset(
-        BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), 1);
-    break;
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
   case X86::DEC64r:
   case X86::DEC32r: {
     assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
     unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
-      : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+        : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
 
-    bool isKill, isUndef;
+    bool isKill;
     unsigned SrcReg;
     MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
-    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
-                        SrcReg, isKill, isUndef, ImplicitOp, LV))
+    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
+                        ImplicitOp, LV))
       return nullptr;
 
     MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
                                   .add(Dest)
-                                  .addReg(SrcReg, getUndefRegState(isUndef) |
-                                                      getKillRegState(isKill));
+                                  .addReg(SrcReg, getKillRegState(isKill));
     if (ImplicitOp.getReg() != 0)
       MIB.add(ImplicitOp);
 
@@ -1048,13 +1055,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     break;
   }
   case X86::DEC16r:
-    if (DisableLEA16)
-      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
-                     : nullptr;
-    assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
-    NewMI = addOffset(
-        BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), -1);
-    break;
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
   case X86::ADD64rr:
   case X86::ADD64rr_DB:
   case X86::ADD32rr:
@@ -1064,21 +1065,21 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
       Opc = X86::LEA64r;
     else
-      Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+      Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
 
-    bool isKill, isUndef;
+    bool isKill;
     unsigned SrcReg;
     MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
     if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
-                        SrcReg, isKill, isUndef, ImplicitOp, LV))
+                        SrcReg, isKill, ImplicitOp, LV))
       return nullptr;
 
     const MachineOperand &Src2 = MI.getOperand(2);
-    bool isKill2, isUndef2;
+    bool isKill2;
     unsigned SrcReg2;
     MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
     if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
-                        SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
+                        SrcReg2, isKill2, ImplicitOp2, LV))
       return nullptr;
 
     MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
@@ -1088,36 +1089,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       MIB.add(ImplicitOp2);
 
     NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
-
-    // Preserve undefness of the operands.
-    NewMI->getOperand(1).setIsUndef(isUndef);
-    NewMI->getOperand(3).setIsUndef(isUndef2);
-
     if (LV && Src2.isKill())
       LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
     break;
   }
+  case X86::ADD8rr:
   case X86::ADD16rr:
-  case X86::ADD16rr_DB: {
-    if (DisableLEA16)
-      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
-                     : nullptr;
-    assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
-    unsigned Src2 = MI.getOperand(2).getReg();
-    bool isKill2 = MI.getOperand(2).isKill();
-    NewMI = addRegReg(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest),
-                      Src.getReg(), Src.isKill(), Src2, isKill2);
-
-    // Preserve undefness of the operands.
-    bool isUndef = MI.getOperand(1).isUndef();
-    bool isUndef2 = MI.getOperand(2).isUndef();
-    NewMI->getOperand(1).setIsUndef(isUndef);
-    NewMI->getOperand(3).setIsUndef(isUndef2);
-
-    if (LV && isKill2)
-      LV->replaceKillInstruction(Src2, MI, *NewMI);
-    break;
-  }
+  case X86::ADD16rr_DB:
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
   case X86::ADD64ri32:
   case X86::ADD64ri8:
   case X86::ADD64ri32_DB:
@@ -1132,38 +1111,30 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   case X86::ADD32ri_DB:
   case X86::ADD32ri8_DB: {
     assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
-    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+    unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
 
-    bool isKill, isUndef;
+    bool isKill;
     unsigned SrcReg;
     MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
     if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
-                        SrcReg, isKill, isUndef, ImplicitOp, LV))
+                        SrcReg, isKill, ImplicitOp, LV))
       return nullptr;
 
     MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
                                   .add(Dest)
-                                  .addReg(SrcReg, getUndefRegState(isUndef) |
-                                                      getKillRegState(isKill));
+                                  .addReg(SrcReg, getKillRegState(isKill));
     if (ImplicitOp.getReg() != 0)
       MIB.add(ImplicitOp);
 
     NewMI = addOffset(MIB, MI.getOperand(2));
     break;
   }
+  case X86::ADD8ri:
   case X86::ADD16ri:
   case X86::ADD16ri8:
   case X86::ADD16ri_DB:
   case X86::ADD16ri8_DB:
-    if (DisableLEA16)
-      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
-                     : nullptr;
-    assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
-    NewMI = addOffset(
-        BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src),
-        MI.getOperand(2));
-    break;
-
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
   case X86::VMOVDQU8Z128rmk:
   case X86::VMOVDQU8Z256rmk:
   case X86::VMOVDQU8Zrmk:
@@ -2540,7 +2511,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
   // call. This way they still appear live across the call.
   LivePhysRegs LiveRegs(getRegisterInfo());
   LiveRegs.addLiveOuts(MBB);
-  SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
+  SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
   LiveRegs.stepForward(*MIB, Clobbers);
   for (const auto &C : Clobbers) {
     MIB.addReg(C.first, RegState::Implicit);
@@ -2630,6 +2601,11 @@ bool X86InstrInfo::AnalyzeBranchImpl(
     if (BranchCode == X86::COND_INVALID)
       return true;  // Can't handle indirect branch.
 
+    // In practice we should never have an undef eflags operand, if we do
+    // abort here as we are not prepared to preserve the flag.
+    if (I->getOperand(1).isUndef())
+      return true;
+
     // Working from the bottom, handle the first conditional branch.
     if (Cond.empty()) {
       MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
@@ -3112,9 +3088,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   report_fatal_error("Cannot emit physreg copy instruction");
 }
 
-bool X86InstrInfo::isCopyInstr(const MachineInstr &MI,
-                               const MachineOperand *&Src,
-                               const MachineOperand *&Dest) const {
+bool X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI,
+                                   const MachineOperand *&Src,
+                                   const MachineOperand *&Dest) const {
   if (MI.isMoveReg()) {
     Dest = &MI.getOperand(0);
     Src = &MI.getOperand(1);
@@ -3242,9 +3218,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
   }
 }
 
-bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
-                                         int64_t &Offset,
-                                         const TargetRegisterInfo *TRI) const {
+bool X86InstrInfo::getMemOperandWithOffset(
+    MachineInstr &MemOp, MachineOperand *&BaseOp, int64_t &Offset,
+    const TargetRegisterInfo *TRI) const {
   const MCInstrDesc &Desc = MemOp.getDesc();
   int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
   if (MemRefBegin < 0)
@@ -3252,11 +3228,10 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
 
   MemRefBegin += X86II::getOperandBias(Desc);
 
-  MachineOperand &BaseMO = MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
-  if (!BaseMO.isReg()) // Can be an MO_FrameIndex
+  BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
+  if (!BaseOp->isReg()) // Can be an MO_FrameIndex
     return false;
 
-  BaseReg = BaseMO.getReg();
   if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
     return false;
 
@@ -3272,6 +3247,8 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
 
   Offset = DispMO.getImm();
 
+  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
+                            "operands of type register.");
   return true;
 }
 
@@ -3303,29 +3280,25 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
       RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
-  DebugLoc DL = MBB.findDebugLoc(MI);
-  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+  addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
     .addReg(SrcReg, getKillRegState(isKill));
 }
 
-void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                  bool isKill,
-                                  SmallVectorImpl<MachineOperand> &Addr,
-                                  const TargetRegisterClass *RC,
-                                  MachineInstr::mmo_iterator MMOBegin,
-                                  MachineInstr::mmo_iterator MMOEnd,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
+void X86InstrInfo::storeRegToAddr(
+    MachineFunction &MF, unsigned SrcReg, bool isKill,
+    SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
+    ArrayRef<MachineMemOperand *> MMOs,
+    SmallVectorImpl<MachineInstr *> &NewMIs) const {
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
-  bool isAligned = MMOBegin != MMOEnd &&
-                   (*MMOBegin)->getAlignment() >= Alignment;
+  bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
   DebugLoc DL;
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.add(Addr[i]);
   MIB.addReg(SrcReg, getKillRegState(isKill));
-  (*MIB).setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
   NewMIs.push_back(MIB);
 }
 
@@ -3341,26 +3314,23 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
       RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
-  DebugLoc DL = MBB.findDebugLoc(MI);
-  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+  addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
 }
 
-void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                 SmallVectorImpl<MachineOperand> &Addr,
-                                 const TargetRegisterClass *RC,
-                                 MachineInstr::mmo_iterator MMOBegin,
-                                 MachineInstr::mmo_iterator MMOEnd,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+void X86InstrInfo::loadRegFromAddr(
+    MachineFunction &MF, unsigned DestReg,
+    SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC,
+    ArrayRef<MachineMemOperand *> MMOs,
+    SmallVectorImpl<MachineInstr *> &NewMIs) const {
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
-  bool isAligned = MMOBegin != MMOEnd &&
-                   (*MMOBegin)->getAlignment() >= Alignment;
+  bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
   DebugLoc DL;
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.add(Addr[i]);
-  (*MIB).setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(MMOs);
   NewMIs.push_back(MIB);
 }
 
@@ -3451,9 +3421,10 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
 /// This function can be extended later on.
 /// SrcReg, SrcRegs: register operands for FlagI.
 /// ImmValue: immediate for FlagI if it takes an immediate.
-inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
-                                        unsigned SrcReg2, int ImmMask,
-                                        int ImmValue, MachineInstr &OI) {
+inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
+                                        unsigned SrcReg, unsigned SrcReg2,
+                                        int ImmMask, int ImmValue,
+                                        const MachineInstr &OI) {
   if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
        (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
        (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
@@ -3484,7 +3455,9 @@ inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
 
 /// Check whether the definition can be converted
 /// to remove a comparison against zero.
-inline static bool isDefConvertible(MachineInstr &MI) {
+inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
+  NoSignFlag = false;
+
   switch (MI.getOpcode()) {
   default: return false;
 
@@ -3549,8 +3522,6 @@ inline static bool isDefConvertible(MachineInstr &MI) {
   case X86::SHL8r1:    case X86::SHL16r1:  case X86::SHL32r1:case X86::SHL64r1:
   case X86::ANDN32rr:  case X86::ANDN32rm:
   case X86::ANDN64rr:  case X86::ANDN64rm:
-  case X86::BEXTR32rr: case X86::BEXTR64rr:
-  case X86::BEXTR32rm: case X86::BEXTR64rm:
   case X86::BLSI32rr:  case X86::BLSI32rm:
   case X86::BLSI64rr:  case X86::BLSI64rm:
   case X86::BLSMSK32rr:case X86::BLSMSK32rm:
@@ -3568,8 +3539,6 @@ inline static bool isDefConvertible(MachineInstr &MI) {
   case X86::TZCNT16rr: case X86::TZCNT16rm:
   case X86::TZCNT32rr: case X86::TZCNT32rm:
   case X86::TZCNT64rr: case X86::TZCNT64rm:
-  case X86::BEXTRI32ri:  case X86::BEXTRI32mi:
-  case X86::BEXTRI64ri:  case X86::BEXTRI64mi:
   case X86::BLCFILL32rr: case X86::BLCFILL32rm:
   case X86::BLCFILL64rr: case X86::BLCFILL64rm:
   case X86::BLCI32rr:    case X86::BLCI32rm:
@@ -3584,12 +3553,23 @@ inline static bool isDefConvertible(MachineInstr &MI) {
   case X86::BLSFILL64rr: case X86::BLSFILL64rm:
   case X86::BLSIC32rr:   case X86::BLSIC32rm:
   case X86::BLSIC64rr:   case X86::BLSIC64rm:
+  case X86::T1MSKC32rr:  case X86::T1MSKC32rm:
+  case X86::T1MSKC64rr:  case X86::T1MSKC64rm:
+  case X86::TZMSK32rr:   case X86::TZMSK32rm:
+  case X86::TZMSK64rr:   case X86::TZMSK64rm:
+    return true;
+  case X86::BEXTR32rr:   case X86::BEXTR64rr:
+  case X86::BEXTR32rm:   case X86::BEXTR64rm:
+  case X86::BEXTRI32ri:  case X86::BEXTRI32mi:
+  case X86::BEXTRI64ri:  case X86::BEXTRI64mi:
+    // BEXTR doesn't update the sign flag so we can't use it.
+    NoSignFlag = true;
     return true;
   }
 }
 
 /// Check whether the use can be converted to remove a comparison against zero.
-static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
+static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default: return X86::COND_INVALID;
   case X86::LZCNT16rr: case X86::LZCNT16rm:
@@ -3604,12 +3584,12 @@ static X86::CondCode isUseDefConvertible(MachineInstr &MI) {
   case X86::TZCNT32rr: case X86::TZCNT32rm:
   case X86::TZCNT64rr: case X86::TZCNT64rm:
     return X86::COND_B;
-  case X86::BSF16rr:
-  case X86::BSF16rm:
-  case X86::BSF32rr:
-  case X86::BSF32rm:
-  case X86::BSF64rr:
-  case X86::BSF64rm:
+  case X86::BSF16rr: case X86::BSF16rm:
+  case X86::BSF32rr: case X86::BSF32rm:
+  case X86::BSF64rr: case X86::BSF64rm:
+  case X86::BSR16rr: case X86::BSR16rm:
+  case X86::BSR32rr: case X86::BSR32rm:
+  case X86::BSR64rr: case X86::BSR64rm:
     return X86::COND_E;
   }
 }
@@ -3687,8 +3667,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   // instruction we can eliminate the compare iff the use sets EFLAGS in the
   // right way.
   bool ShouldUpdateCC = false;
+  bool NoSignFlag = false;
   X86::CondCode NewCC = X86::COND_INVALID;
-  if (IsCmpZero && !isDefConvertible(*MI)) {
+  if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
     // Scan forward from the use until we hit the use we're looking for or the
     // compare instruction.
     for (MachineBasicBlock::iterator J = MI;; ++J) {
@@ -3807,6 +3788,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
       case X86::COND_O: case X86::COND_NO:
         // CF and OF are used, we can't perform this optimization.
         return false;
+      case X86::COND_S: case X86::COND_NS:
+        // If SF is used, but the instruction doesn't update the SF, then we
+        // can't do the optimization.
+        if (NoSignFlag)
+          return false;
+        break;
       }
 
       // If we're updating the condition code check if we have to reverse the
@@ -4267,9 +4254,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   case X86::VMOVUPSZ256mr_NOVLX:
     return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
                             get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
-  case X86::MOV32ri64:
+  case X86::MOV32ri64: {
+    unsigned Reg = MIB->getOperand(0).getReg();
+    unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
     MI.setDesc(get(X86::MOV32ri));
+    MIB->getOperand(0).setReg(Reg32);
+    MIB.addReg(Reg, RegState::ImplicitDefine);
     return true;
+  }
 
   // KNL does not recognize dependency-breaking idioms for mask registers,
   // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
@@ -5353,6 +5345,54 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
                                /*Size=*/0, Alignment, /*AllowCommute=*/true);
 }
 
+static SmallVector<MachineMemOperand *, 2>
+extractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
+  SmallVector<MachineMemOperand *, 2> LoadMMOs;
+
+  for (MachineMemOperand *MMO : MMOs) {
+    if (!MMO->isLoad())
+      continue;
+
+    if (!MMO->isStore()) {
+      // Reuse the MMO.
+      LoadMMOs.push_back(MMO);
+    } else {
+      // Clone the MMO and unset the store flag.
+      LoadMMOs.push_back(MF.getMachineMemOperand(
+          MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOStore,
+          MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
+          MMO->getSyncScopeID(), MMO->getOrdering(),
+          MMO->getFailureOrdering()));
+    }
+  }
+
+  return LoadMMOs;
+}
+
+static SmallVector<MachineMemOperand *, 2>
+extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
+  SmallVector<MachineMemOperand *, 2> StoreMMOs;
+
+  for (MachineMemOperand *MMO : MMOs) {
+    if (!MMO->isStore())
+      continue;
+
+    if (!MMO->isLoad()) {
+      // Reuse the MMO.
+      StoreMMOs.push_back(MMO);
+    } else {
+      // Clone the MMO and unset the load flag.
+      StoreMMOs.push_back(MF.getMachineMemOperand(
+          MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOLoad,
+          MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
+          MMO->getSyncScopeID(), MMO->getOrdering(),
+          MMO->getFailureOrdering()));
+    }
+  }
+
+  return StoreMMOs;
+}
+
 bool X86InstrInfo::unfoldMemoryOperand(
     MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
     bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
@@ -5397,9 +5437,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
 
   // Emit the load instruction.
   if (UnfoldLoad) {
-    std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
-        MF.extractLoadMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+    auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
+    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs);
     if (UnfoldStore) {
       // Address operands cannot be marked isKill.
       for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
@@ -5464,9 +5503,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
   // Emit the store instruction.
   if (UnfoldStore) {
     const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
-    std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs =
-        MF.extractStoreMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+    auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
+    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs);
   }
 
   return true;
@@ -5511,26 +5549,21 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   SDNode *Load = nullptr;
   if (FoldedLoad) {
     EVT VT = *TRI.legalclasstypes_begin(*RC);
-    std::pair<MachineInstr::mmo_iterator,
-              MachineInstr::mmo_iterator> MMOs =
-      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
-                            cast<MachineSDNode>(N)->memoperands_end());
-    if (!(*MMOs.first) &&
-        RC == &X86::VR128RegClass &&
+    auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
+    if (MMOs.empty() && RC == &X86::VR128RegClass &&
         Subtarget.isUnalignedMem16Slow())
       // Do not introduce a slow unaligned load.
       return false;
     // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
     // memory access is slow above.
     unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
-    bool isAligned = (*MMOs.first) &&
-                     (*MMOs.first)->getAlignment() >= Alignment;
+    bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
     Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
                               VT, MVT::Other, AddrOps);
     NewNodes.push_back(Load);
 
     // Preserve memory reference information.
-    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+    DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
   }
 
   // Emit the data processing instruction.
@@ -5580,27 +5613,22 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     AddrOps.pop_back();
     AddrOps.push_back(SDValue(NewNode, 0));
     AddrOps.push_back(Chain);
-    std::pair<MachineInstr::mmo_iterator,
-              MachineInstr::mmo_iterator> MMOs =
-      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
-                             cast<MachineSDNode>(N)->memoperands_end());
-    if (!(*MMOs.first) &&
-        RC == &X86::VR128RegClass &&
+    auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
+    if (MMOs.empty() && RC == &X86::VR128RegClass &&
         Subtarget.isUnalignedMem16Slow())
       // Do not introduce a slow unaligned store.
       return false;
     // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
     // memory access is slow above.
     unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
-    bool isAligned = (*MMOs.first) &&
-                     (*MMOs.first)->getAlignment() >= Alignment;
+    bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
     SDNode *Store =
         DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
                            dl, MVT::Other, AddrOps);
     NewNodes.push_back(Store);
 
     // Preserve memory reference information.
-    cast<MachineSDNode>(Store)->setMemRefs(MMOs.first, MMOs.second);
+    DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
   }
 
   return true;
@@ -6511,6 +6539,19 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
 
     // All domains are valid.
     return 0xe;
+  case X86::MOVHLPSrr:
+    // We can swap domains when both inputs are the same register.
+    // FIXME: This doesn't catch all the cases we would like. If the input
+    // register isn't KILLed by the instruction, the two address instruction
+    // pass puts a COPY on one input. The other input uses the original
+    // register. This prevents the same physical register from being used by
+    // both inputs.
+    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+        MI.getOperand(0).getSubReg() == 0 &&
+        MI.getOperand(1).getSubReg() == 0 &&
+        MI.getOperand(2).getSubReg() == 0)
+      return 0x6;
+    return 0;
   }
   return 0;
 }
@@ -6617,6 +6658,20 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
     MI.setDesc(get(table[Domain - 1]));
     return true;
   }
+  case X86::UNPCKHPDrr:
+  case X86::MOVHLPSrr:
+    // We just need to commute the instruction which will switch the domains.
+    if (Domain != dom && Domain != 3 &&
+        MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+        MI.getOperand(0).getSubReg() == 0 &&
+        MI.getOperand(1).getSubReg() == 0 &&
+        MI.getOperand(2).getSubReg() == 0) {
+      commuteInstruction(MI, false);
+      return true;
+    }
+    // We must always return true for MOVHLPSrr.
+    if (Opcode == X86::MOVHLPSrr)
+      return true;
   }
   return false;
 }
@@ -7339,7 +7394,8 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
       {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
       {MO_TLVP, "x86-tlvp"},
       {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
-      {MO_SECREL, "x86-secrel"}};
+      {MO_SECREL, "x86-secrel"},
+      {MO_COFFSTUB, "x86-coffstub"}};
   return makeArrayRef(TargetFlags);
 }
 
@@ -7396,12 +7452,28 @@ namespace {
               .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
               .addReg(0);
         } else if (TM->getCodeModel() == CodeModel::Large) {
-          // Loading the GOT in the large code model requires math with labels,
-          // so we use a pseudo instruction and expand it during MC emission.
-          unsigned Scratch = RegInfo.createVirtualRegister(&X86::GR64RegClass);
-          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVGOT64r), PC)
-              .addReg(Scratch, RegState::Undef | RegState::Define)
-              .addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+          // In the large code model, we are aiming for this code, though the
+          // register allocation may vary:
+          //   leaq .LN$pb(%rip), %rax
+          //   movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
+          //   addq %rcx, %rax
+          // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
+          unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+          unsigned GOTReg =
+              RegInfo.createVirtualRegister(&X86::GR64RegClass);
+          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
+              .addReg(X86::RIP)
+              .addImm(0)
+              .addReg(0)
+              .addSym(MF.getPICBaseSymbol())
+              .addReg(0);
+          std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
+          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
+              .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+                                 X86II::MO_PIC_BASE_OFFSET);
+          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
+              .addReg(PBReg, RegState::Kill)
+              .addReg(GOTReg, RegState::Kill);
         } else {
           llvm_unreachable("unexpected code model");
         }
@@ -7736,3 +7808,6 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
 
   return It;
 }
+
+#define GET_INSTRINFO_HELPERS
+#include "X86GenInstrInfo.inc"
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index b1ceb767cce4..159cb50afc5c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -117,6 +117,7 @@ inline static bool isGlobalStubReference(unsigned char TargetFlag) {
   case X86II::MO_GOT:                     // normal GOT reference.
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
   case X86II::MO_DARWIN_NONLAZY:          // Normal $non_lazy_ptr ref.
+  case X86II::MO_COFFSTUB:                // COFF .refptr stub.
     return true;
   default:
     return false;
@@ -257,7 +258,7 @@ public:
   /// operand to the LEA instruction.
   bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
                       unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
-                      bool &isKill, bool &isUndef, MachineOperand &ImplicitOp,
+                      bool &isKill, MachineOperand &ImplicitOp,
                       LiveVariables *LV) const;
 
   /// convertToThreeAddress - This method must be implemented by targets that
@@ -326,9 +327,9 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
-                             int64_t &Offset,
-                             const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+                               int64_t &Offset,
+                               const TargetRegisterInfo *TRI) const override;
   bool analyzeBranchPredicate(MachineBasicBlock &MBB,
                               TargetInstrInfo::MachineBranchPredicate &MBP,
                               bool AllowModify = false) const override;
@@ -348,8 +349,6 @@ public:
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
-  bool isCopyInstr(const MachineInstr &MI, const MachineOperand *&Src,
-                   const MachineOperand *&Dest) const override;
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, unsigned SrcReg,
                            bool isKill, int FrameIndex,
@@ -359,8 +358,7 @@ public:
   void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
                       SmallVectorImpl<MachineOperand> &Addr,
                       const TargetRegisterClass *RC,
-                      MachineInstr::mmo_iterator MMOBegin,
-                      MachineInstr::mmo_iterator MMOEnd,
+                      ArrayRef<MachineMemOperand *> MMOs,
                       SmallVectorImpl<MachineInstr *> &NewMIs) const;
 
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -371,8 +369,7 @@ public:
   void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                        SmallVectorImpl<MachineOperand> &Addr,
                        const TargetRegisterClass *RC,
-                       MachineInstr::mmo_iterator MMOBegin,
-                       MachineInstr::mmo_iterator MMOEnd,
+                       ArrayRef<MachineMemOperand *> MMOs,
                        SmallVectorImpl<MachineInstr *> &NewMIs) const;
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
@@ -561,6 +558,9 @@ public:
                      MachineBasicBlock::iterator &It, MachineFunction &MF,
                      const outliner::Candidate &C) const override;
 
+#define GET_INSTRINFO_HELPER_DECLS
+#include "X86GenInstrInfo.inc"
+
 protected:
   /// Commutes the operands in the given instruction by changing the operands
   /// order and/or changing the instruction's opcode and/or the immediate value
@@ -577,7 +577,16 @@ protected:
                                        unsigned CommuteOpIdx1,
                                        unsigned CommuteOpIdx2) const override;
 
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+                       const MachineOperand *&Destination) const override;
+
 private:
+  /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
+  /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
+  /// super-register and then truncating back down to a 8/16-bit sub-register.
   MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
                                              MachineFunction::iterator &MFI,
                                              MachineInstr &MI,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index bc7afd32d494..e53f83baa3c6 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -17,10 +17,6 @@
 // X86 specific DAG Nodes.
 //
 
-def SDTIntShiftDOp: SDTypeProfile<1, 3,
-                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
-                                   SDTCisInt<0>, SDTCisInt<3>]>;
-
 def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
 
 def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
@@ -257,8 +253,6 @@ def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
 def X86adc_flag  : SDNode<"X86ISD::ADC",  SDTBinaryArithWithFlagsInOut>;
 def X86sbb_flag  : SDNode<"X86ISD::SBB",  SDTBinaryArithWithFlagsInOut>;
 
-def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
-def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
 def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
 def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags,
@@ -282,15 +276,10 @@ def X86lock_and  : SDNode<"X86ISD::LAND",  SDTLockBinaryArithWithFlags,
                           [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
                            SDNPMemOperand]>;
 
-def X86lock_inc  : SDNode<"X86ISD::LINC",  SDTLockUnaryArithWithFlags,
-                          [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
-                           SDNPMemOperand]>;
-def X86lock_dec  : SDNode<"X86ISD::LDEC",  SDTLockUnaryArithWithFlags,
-                          [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
-                           SDNPMemOperand]>;
-
 def X86bextr  : SDNode<"X86ISD::BEXTR",  SDTIntBinOp>;
 
+def X86bzhi   : SDNode<"X86ISD::BZHI",   SDTIntBinOp>;
+
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
 def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
@@ -955,8 +944,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
 def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
 def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
-def UseRetpoline : Predicate<"Subtarget->useRetpoline()">;
-def NotUseRetpoline : Predicate<"!Subtarget->useRetpoline()">;
+def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
+def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
@@ -1210,12 +1199,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
                 OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
 } // isCodeGenOnly = 1, ForceDisassemble = 1
 } // mayLoad, SchedRW
-let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
 def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
                 OpSize16;
 def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
                 OpSize32, Requires<[Not64BitMode]>;
-} // mayStore, mayLoad, WriteRMW
+} // mayStore, mayLoad, SchedRW
 
 let mayStore = 1, SchedRW = [WriteStore] in {
 def PUSH16r  : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
@@ -1243,7 +1232,7 @@ def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
                    Requires<[Not64BitMode]>;
 } // mayStore, SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
 def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
                  OpSize16;
 def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
@@ -1302,7 +1291,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
                 OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
 } // isCodeGenOnly = 1, ForceDisassemble = 1
 } // mayLoad, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
 def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
                 OpSize32, Requires<[In64BitMode]>;
 let mayStore = 1, SchedRW = [WriteStore] in {
@@ -1314,7 +1303,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
                  OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
 } // isCodeGenOnly = 1, ForceDisassemble = 1
 } // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
 def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
                  OpSize32, Requires<[In64BitMode]>;
 } // mayLoad, mayStore, SchedRW
@@ -1491,7 +1480,7 @@ def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}", []>;
 }
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
 def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
                    [(set GR8:$dst, imm:$src)]>;
@@ -1505,7 +1494,7 @@ def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                        "mov{q}\t{$src, $dst|$dst, $src}",
                        [(set GR64:$dst, i64immSExt32:$src)]>;
 }
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isMoveImm = 1 in {
 def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
                     "movabs{q}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, relocImm:$src)]>;
@@ -1771,7 +1760,7 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
 // only for now. These instructions are also slow on modern CPUs so that's
 // another reason to avoid generating them.
 
-let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
   def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                  "bt{w}\t{$src2, $src1|$src1, $src2}",
                  []>, OpSize16, TB, NotMemoryFoldable;
@@ -1799,7 +1788,7 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
 
 // Note that these instructions aren't slow because that only applies when the
 // other operand is in a register. When it's an immediate, bt is still fast.
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTestImmLd] in {
 def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                   "bt{w}\t{$src2, $src1|$src1, $src2}",
                   [(set EFLAGS, (X86bt (loadi16 addr:$src1),
@@ -1818,7 +1807,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
 } // SchedRW
 
 let hasSideEffects = 0 in {
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
 def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1830,7 +1819,7 @@ def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
                  NotMemoryFoldable;
 } // SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
 def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1831,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
 def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
 def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1851,7 +1840,7 @@ def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
                     "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
 def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
 def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
@@ -1861,7 +1850,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     Requires<[In64BitMode]>;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
 def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1873,7 +1862,7 @@ def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
                  NotMemoryFoldable;
 } // SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
 def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1874,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
 def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize16, TB;
@@ -1896,7 +1885,7 @@ def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
                     "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
 def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize16, TB;
@@ -1908,7 +1897,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     Requires<[In64BitMode]>;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
 def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
                 OpSize16, TB, NotMemoryFoldable;
@@ -1920,7 +1909,7 @@ def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2
                NotMemoryFoldable;
 } // SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
 def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
               "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
               OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1921,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  NotMemoryFoldable;
 }
 
-let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
 def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
 def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1941,7 +1930,7 @@ def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$sr
                     "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // SchedRW
 
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
 def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
 def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
@@ -1994,7 +1983,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag>
 defm XCHG    : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap">, NotMemoryFoldable;
 
 // Swap between registers.
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteXCHG] in {
 let Constraints = "$src1 = $dst1, $src2 = $dst2", hasSideEffects = 0 in {
 def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst1, GR8:$dst2),
                 (ins GR8:$src1, GR8:$src2),
@@ -2027,7 +2016,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
 } // SchedRW
 
 let hasSideEffects = 0, Constraints = "$src1 = $dst1, $src2 = $dst2",
-    Defs = [EFLAGS], SchedRW = [WriteALU] in {
+    Defs = [EFLAGS], SchedRW = [WriteXCHG] in {
 def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst1, GR8:$dst2),
                 (ins GR8:$src1, GR8:$src2),
                 "xadd{b}\t{$src2, $src1|$src1, $src2}", []>, TB;
@@ -2061,7 +2050,7 @@ def XADD64rm  : RI<0xC1, MRMSrcMem, (outs GR64:$dst),
 
 }
 
-let SchedRW = [WriteALU], hasSideEffects = 0 in {
+let SchedRW = [WriteCMPXCHG], hasSideEffects = 0 in {
 let Defs = [AL, EFLAGS], Uses = [AL] in
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                    "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB,
@@ -2080,7 +2069,7 @@ def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                       NotMemoryFoldable;
 } // SchedRW, hasSideEffects
 
-let SchedRW = [WriteALULd, WriteRMW], mayLoad = 1, mayStore = 1,
+let SchedRW = [WriteCMPXCHGRMW], mayLoad = 1, mayStore = 1,
     hasSideEffects = 0 in {
 let Defs = [AL, EFLAGS], Uses = [AL] in
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -2368,11 +2357,11 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
 let hasSideEffects = 0 in {
   def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
              !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
-             T8PS, VEX_4V, Sched<[WriteALU]>;
+             T8PS, VEX_4V, Sched<[WriteBLS]>;
   let mayLoad = 1 in
   def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
              !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
-             T8PS, VEX_4V, Sched<[WriteALULd]>;
+             T8PS, VEX_4V, Sched<[WriteBLS.Folded]>;
 }
 }
 
@@ -2389,6 +2378,16 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
 // Pattern fragments to auto generate BMI instructions.
 //===----------------------------------------------------------------------===//
 
+def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                           (X86or_flag node:$lhs, node:$rhs), [{
+  return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                            (X86xor_flag node:$lhs, node:$rhs), [{
+  return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
 let Predicates = [HasBMI] in {
   // FIXME: patterns for the load versions are not implemented
   def : Pat<(and GR32:$src, (add GR32:$src, -1)),
@@ -2405,6 +2404,14 @@ let Predicates = [HasBMI] in {
             (BLSI32rr GR32:$src)>;
   def : Pat<(and GR64:$src, (ineg GR64:$src)),
             (BLSI64rr GR64:$src)>;
+
+  // Versions to match flag producing ops.
+  // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+  // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+  def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
+            (BLSMSK32rr GR32:$src)>;
+  def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
+            (BLSMSK64rr GR64:$src)>;
 }
 
 multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -2423,7 +2430,7 @@ multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
                     ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                     ReadDefault,
                     // RC:$src2
-                    ReadAfterLd]>;
+                    Sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -2449,14 +2456,14 @@ multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
                     ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                     ReadDefault,
                     // RC:$src2
-                    ReadAfterLd]>;
+                    Sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI2], Defs = [EFLAGS] in {
   defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
-                         int_x86_bmi_bzhi_32, loadi32, WriteBZHI>;
+                         X86bzhi, loadi32, WriteBZHI>;
   defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
-                         int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W;
+                         X86bzhi, loadi64, WriteBZHI>, VEX_W;
 }
 
 def CountTrailingOnes : SDNodeXForm<imm, [{
@@ -2497,84 +2504,6 @@ let Predicates = [HasBMI2, NoTBM] in {
                              (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
 }
 
-let Predicates = [HasBMI2] in {
-  multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
-                               ValueType VT, Instruction DstInst,
-                               Instruction DstMemInst> {
-    def : Pat<regpattern,
-              (DstInst RC:$src,
-                (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-    def : Pat<mempattern,
-              (DstMemInst addr:$src,
-                (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-  }
-
-  multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
-                               Instruction DstInst, X86MemOperand x86memop,
-                               Instruction DstMemInst> {
-    // x & ((1 << y) - 1)
-    defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
-                             (and (x86memop addr:$src),
-                                  (add (shl 1, GR8:$lz), -1)),
-                             RC, VT, DstInst, DstMemInst>;
-
-    // x & ~(-1 << y)
-    defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
-                             (and (x86memop addr:$src),
-                                  (xor (shl -1, GR8:$lz), -1)),
-                             RC, VT, DstInst, DstMemInst>;
-
-    // x & (-1 >> (bitwidth - y))
-    defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
-                             (and (x86memop addr:$src),
-                                  (srl -1, (sub bitwidth, GR8:$lz))),
-                             RC, VT, DstInst, DstMemInst>;
-
-    // x << (bitwidth - y) >> (bitwidth - y)
-    defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
-                                  (sub bitwidth, GR8:$lz)),
-                             (srl (shl (x86memop addr:$src),
-                                        (sub bitwidth, GR8:$lz)),
-                                  (sub bitwidth, GR8:$lz)),
-                             RC, VT, DstInst, DstMemInst>;
-  }
-
-  defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
-  defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
-
-  // x & (-1 >> (32 - y))
-  def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
-            (BZHI32rr GR32:$src, GR32:$lz)>;
-  def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
-            (BZHI32rm addr:$src, GR32:$lz)>;
-
-  // x & (-1 >> (64 - y))
-  def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
-            (BZHI64rr GR64:$src,
-              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-  def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
-            (BZHI64rm addr:$src,
-              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-
-  // x << (32 - y) >> (32 - y)
-  def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))),
-                 (i8 (trunc (sub 32, GR32:$lz)))),
-            (BZHI32rr GR32:$src, GR32:$lz)>;
-  def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))),
-                 (i8 (trunc (sub 32, GR32:$lz)))),
-            (BZHI32rm addr:$src, GR32:$lz)>;
-
-  // x << (64 - y) >> (64 - y)
-  def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))),
-                 (i8 (trunc (sub 64, GR32:$lz)))),
-            (BZHI64rr GR64:$src,
-              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-  def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))),
-                 (i8 (trunc (sub 64, GR32:$lz)))),
-            (BZHI64rm addr:$src,
-              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
-} // HasBMI2
-
 multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
                          X86MemOperand x86memop, Intrinsic Int,
                          PatFrag ld_frag> {
@@ -2585,7 +2514,7 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
   def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
              !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>,
-             VEX_4V, Sched<[WriteALULd, ReadAfterLd]>;
+             VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
 }
 
 let Predicates = [HasBMI2] in {
@@ -2881,6 +2810,45 @@ let Predicates = [HasTBM] in {
             (TZMSK32rr GR32:$src)>;
   def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
             (TZMSK64rr GR64:$src)>;
+
+  // Patterns to match flag producing ops.
+  // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+  // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+  def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
+            (BLCI32rr GR32:$src)>;
+  def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
+            (BLCI64rr GR64:$src)>;
+
+  // Extra patterns because opt can optimize the above patterns to this.
+  def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
+            (BLCI32rr GR32:$src)>;
+  def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
+            (BLCI64rr GR64:$src)>;
+
+  def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
+            (BLCMSK32rr GR32:$src)>;
+  def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
+            (BLCMSK64rr GR64:$src)>;
+
+  def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
+            (BLCS32rr GR32:$src)>;
+  def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
+            (BLCS64rr GR64:$src)>;
+
+  def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
+            (BLSFILL32rr GR32:$src)>;
+  def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
+            (BLSFILL64rr GR64:$src)>;
+
+  def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+            (BLSIC32rr GR32:$src)>;
+  def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+            (BLSIC64rr GR64:$src)>;
+
+  def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+            (T1MSKC32rr GR32:$src)>;
+  def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+            (T1MSKC64rr GR64:$src)>;
 } // HasTBM
 
 //===----------------------------------------------------------------------===//
@@ -2976,6 +2944,8 @@ def : MnemonicAlias<"popf",  "popfl", "att">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"popf",  "popfq", "att">, Requires<[In64BitMode]>;
 def : MnemonicAlias<"popf",  "popfq", "intel">, Requires<[In64BitMode]>;
 def : MnemonicAlias<"popfd", "popfl", "att">;
+def : MnemonicAlias<"popfw", "popf",  "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popfw", "popf",  "intel">, Requires<[In64BitMode]>;
 
 // FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
 // all modes.  However: "push (addr)" and "push $42" should default to
@@ -2988,6 +2958,8 @@ def : MnemonicAlias<"pushf",  "pushfl", "att">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"pushf",  "pushfq", "att">, Requires<[In64BitMode]>;
 def : MnemonicAlias<"pushf",  "pushfq", "intel">, Requires<[In64BitMode]>;
 def : MnemonicAlias<"pushfd", "pushfl", "att">;
+def : MnemonicAlias<"pushfw", "pushf",  "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushfw", "pushf",  "intel">, Requires<[In64BitMode]>;
 
 def : MnemonicAlias<"popad",  "popal",  "intel">, Requires<[Not64BitMode]>;
 def : MnemonicAlias<"pushad", "pushal", "intel">, Requires<[Not64BitMode]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index aefeffedfc1a..8f3357170576 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -47,7 +47,7 @@ let Constraints = "$src1 = $dst" in {
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))]>,
-                 Sched<[sched.Folded, ReadAfterLd]>;
+                 Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 
   multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -64,7 +64,7 @@ let Constraints = "$src1 = $dst" in {
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (IntId VR64:$src1,
                                     (bitconvert (load_mmx addr:$src2))))]>,
-                  Sched<[sched.Folded, ReadAfterLd]>;
+                  Sched<[sched.Folded, sched.ReadAfterFold]>;
     def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
                                    (ins VR64:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -105,7 +105,7 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
        [(set VR64:$dst,
          (IntId64 VR64:$src1,
           (bitconvert (load_mmx addr:$src2))))]>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
 
@@ -122,7 +122,7 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
       !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
       [(set VR64:$dst, (IntId VR64:$src1,
                        (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -553,7 +553,7 @@ let Predicates = [HasMMX, HasSSE1] in {
                    [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                        (i32 (anyext (loadi16 addr:$src2))),
                                      imm:$src3))]>,
-                   Sched<[WriteVecInsertLd, ReadAfterLd]>;
+                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
 }
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 6a9b20998210..e2bcd18ce660 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -35,7 +35,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -57,7 +57,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
 
@@ -81,7 +81,7 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
           d>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -103,7 +103,7 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        pat_rm, d>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 
@@ -265,8 +265,6 @@ let Predicates = [UseAVX] in {
             (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
             (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
-  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzload addr:$src)),
             (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
 
@@ -349,8 +347,6 @@ let Predicates = [UseSSE2] in {
             (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
             (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
-  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzload addr:$src)),
             (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
 }
@@ -593,8 +589,21 @@ let Predicates = [HasAVX, NoVLX] in {
   // available and changing the domain is beneficial.
   def : Pat<(alignedloadv4i64 addr:$src),
             (VMOVAPSYrm addr:$src)>;
+  def : Pat<(alignedloadv8i32 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(alignedloadv16i16 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(alignedloadv32i8 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
   def : Pat<(loadv4i64 addr:$src),
             (VMOVUPSYrm addr:$src)>;
+  def : Pat<(loadv8i32 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(loadv16i16 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(loadv32i8 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+
   def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
             (VMOVAPSYmr addr:$dst, VR256:$src)>;
   def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
@@ -619,8 +628,20 @@ let Predicates = [HasAVX, NoVLX] in {
 let Predicates = [UseSSE1] in {
   def : Pat<(alignedloadv2i64 addr:$src),
             (MOVAPSrm addr:$src)>;
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(alignedloadv8i16 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(alignedloadv16i8 addr:$src),
+            (MOVAPSrm addr:$src)>;
   def : Pat<(loadv2i64 addr:$src),
             (MOVUPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (MOVUPSrm addr:$src)>;
+  def : Pat<(loadv8i16 addr:$src),
+            (MOVUPSrm addr:$src)>;
+  def : Pat<(loadv16i8 addr:$src),
+            (MOVUPSrm addr:$src)>;
 
   def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
             (MOVAPSmr addr:$dst, VR128:$src)>;
@@ -652,7 +673,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc,  SDNode pdnode,
                 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
                 !strconcat(base_opc, "s", asm_opr),
                 [], SSEPackedSingle>, PS,
-                Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+                Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
 
   def PDrm : PI<opc, MRMSrcMem,
          (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
@@ -660,7 +681,7 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc,  SDNode pdnode,
      [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
                               (scalar_to_vector (loadf64 addr:$src2)))))],
               SSEPackedDouble>, PD,
-     Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+     Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
 }
 
 multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
@@ -820,19 +841,6 @@ let Constraints = "$src1 = $dst" in {
                       Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
 }
 
-// TODO: This is largely to trick fastisel into ignoring the pattern.
-def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2),
-                          (X86Unpckh node:$src1, node:$src2), [{
-  return N->getOperand(0) == N->getOperand(1);
-}]>;
-
-let Predicates = [UseSSE2] in {
-  // TODO: This is a hack pattern to allow lowering to emit unpckh instead of
-  // movhlps for sse2 without changing a bunch of tests.
-  def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)),
-            (MOVHLPSrr VR128:$src, VR128:$src)>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Conversion Instructions
 //===----------------------------------------------------------------------===//
@@ -858,7 +866,7 @@ let hasSideEffects = 0 in {
   let mayLoad = 1 in
   def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
              [(set RC:$dst, (DstTy (sint_to_fp
-                                    (SrcTy (bitconvert (ld_frag addr:$src))))))], d>,
+                                    (SrcTy (ld_frag addr:$src)))))], d>,
              Sched<[sched.Folded]>;
 }
 }
@@ -874,7 +882,7 @@ let hasSideEffects = 0, Predicates = [UseAVX] in {
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // hasSideEffects = 0
 }
 
@@ -1001,18 +1009,17 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
 // and/or XMM operand(s).
 
-// FIXME: We probably want to match the rm form only when optimizing for
-// size, to avoid false depenendecies (see sse_fp_unop_s for details)
 multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                          Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
-                          string asm, X86FoldableSchedWrite sched> {
+                          ValueType DstVT, ValueType SrcVT, SDNode OpNode,
+                          Operand memop, ComplexPattern mem_cpat, string asm,
+                          X86FoldableSchedWrite sched> {
   def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
                   !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                  [(set DstRC:$dst, (Int SrcRC:$src))]>,
+                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>,
                Sched<[sched]>;
   def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
                   !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                  [(set DstRC:$dst, (Int mem_cpat:$src))]>,
+                  [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
                Sched<[sched.Folded]>;
 }
 
@@ -1032,21 +1039,21 @@ let hasSideEffects = 0 in {
                   !if(Is2Addr,
                       !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                       !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-                  []>, Sched<[sched.Folded, ReadAfterLd]>;
+                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
 
 let Predicates = [UseAVX] in {
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
-                  int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
+                  X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
                   WriteCvtSD2I>, XD, VEX, VEX_LIG;
-defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
-                    int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
+defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
+                    X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
                     WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG;
 }
-defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
                  sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD;
-defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
                    sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
 
 
@@ -1078,60 +1085,60 @@ let isCodeGenOnly = 1 in {
 // Aliases for intrinsics
 let isCodeGenOnly = 1 in {
 let Predicates = [UseAVX] in {
-defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
                                 ssmem, sse_load_f32, "cvttss2si",
                                 WriteCvtSS2I>, XS, VEX;
-defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                               int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
+                               X86cvtts2Int, ssmem, sse_load_f32,
                                "cvttss2si", WriteCvtSS2I>,
                                XS, VEX, VEX_W;
-defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
                                 sdmem, sse_load_f64, "cvttsd2si",
                                 WriteCvtSS2I>, XD, VEX;
-defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                              int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
+                              X86cvtts2Int, sdmem, sse_load_f64,
                               "cvttsd2si", WriteCvtSS2I>,
                               XD, VEX, VEX_W;
 }
-defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
                                     ssmem, sse_load_f32, "cvttss2si",
                                     WriteCvtSS2I>, XS;
-defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                                   int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
+                                   X86cvtts2Int, ssmem, sse_load_f32,
                                    "cvttss2si", WriteCvtSS2I>, XS, REX_W;
-defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
                                     sdmem, sse_load_f64, "cvttsd2si",
                                     WriteCvtSD2I>, XD;
-defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                                  int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
+                                  X86cvtts2Int, sdmem, sse_load_f64,
                                   "cvttsd2si", WriteCvtSD2I>, XD, REX_W;
 } // isCodeGenOnly = 1
 
 let Predicates = [UseAVX] in {
-defm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
                                   ssmem, sse_load_f32, "cvtss2si",
                                   WriteCvtSS2I>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
                                   ssmem, sse_load_f32, "cvtss2si",
                                   WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG;
 }
-defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
                                ssmem, sse_load_f32, "cvtss2si",
                                WriteCvtSS2I>, XS;
-defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
                                  ssmem, sse_load_f32, "cvtss2si",
                                  WriteCvtSS2I>, XS, REX_W;
 
-defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
+defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle, WriteCvtI2PS>,
                                PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
-defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
+defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle, WriteCvtI2PSY>,
                                PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
 
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
                             "cvtdq2ps\t{$src, $dst|$dst, $src}",
                             SSEPackedSingle, WriteCvtI2PS>,
                             PS, Requires<[UseSSE2]>;
@@ -1186,7 +1193,7 @@ def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                      (ins FR32:$src1, f64mem:$src2),
                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
                      XD, VEX_4V, VEX_LIG, VEX_WIG,
-                     Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+                     Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
 }
 
 def : Pat<(f32 (fpround FR64:$src)),
@@ -1217,7 +1224,7 @@ def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))]>,
                        XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
-                       Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
 let Constraints = "$src1 = $dst" in {
 def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1231,7 +1238,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))]>,
                        XD, Requires<[UseSSE2]>,
-                       Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>;
+                       Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
 }
 } // isCodeGenOnly = 1
 
@@ -1248,7 +1255,7 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     (ins FR64:$src1, f32mem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
                     XS, VEX_4V, VEX_LIG, VEX_WIG,
-                    Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>,
+                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
                     Requires<[UseAVX, OptForSize]>;
 }
 
@@ -1295,7 +1302,7 @@ def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
-                    Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1307,7 +1314,7 @@ def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     []>, XS, Requires<[UseSSE2]>,
-                    Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>;
+                    Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
 }
 } // isCodeGenOnly = 1
 
@@ -1690,7 +1697,7 @@ let hasSideEffects = 0, mayLoad = 1 in
 def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                          (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+                          (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
                         VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
 def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1700,7 +1707,7 @@ def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 def VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
                          "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                          [(set VR256:$dst,
-                           (v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+                           (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
                          VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
                          VEX_WIG;
 def VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
@@ -1714,7 +1721,7 @@ let hasSideEffects = 0, mayLoad = 1 in
 def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+                         (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
                        Sched<[WriteCvtI2PDLd]>;
 def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1826,7 +1833,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1),
                                          (ld_frag addr:$src2), imm:$cc))]>,
-                Sched<[sched.Folded, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -1836,7 +1843,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
     let mayLoad = 1 in
     def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
                       (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
-                      Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
+                      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
   }
 }
 
@@ -1878,7 +1885,7 @@ let mayLoad = 1 in
                       (ins VR128:$src1, memop:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                mem_cpat:$src, imm:$cc))]>,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let isCodeGenOnly = 1 in {
@@ -1920,7 +1927,7 @@ let mayLoad = 1 in
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1),
                                            (ld_frag addr:$src2)))]>,
-          Sched<[sched.Folded, ReadAfterLd]>;
+          Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
 
@@ -1938,7 +1945,7 @@ let mayLoad = 1 in
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1),
                                            mem_cpat:$src2))]>,
-          Sched<[sched.Folded, ReadAfterLd]>;
+          Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Defs = [EFLAGS] in {
@@ -2003,7 +2010,7 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
              (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
              [(set RC:$dst,
                (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
-            Sched<[sched.Folded, ReadAfterLd]>;
+            Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2013,7 +2020,7 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
     let mayLoad = 1 in
     def rmi_alt : PIi8<0xC2, MRMSrcMem,
                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
-               asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>,
+               asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
                NotMemoryFoldable;
   }
 }
@@ -2109,7 +2116,7 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
                    (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
                                        (i8 imm:$src3))))], d>,
-            Sched<[sched.Folded, ReadAfterLd]>;
+            Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
                  (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
                  [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -2165,58 +2172,58 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
                 asm, [(set RC:$dst,
                            (vt (OpNode RC:$src1,
                                        (mem_frag addr:$src2))))], d>,
-             Sched<[sched.Folded, ReadAfterLd]>;
+             Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoVLX] in {
-defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
       VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
-defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
       VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
-defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
       VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
-defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
       VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
 
-defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
       VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
       VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
       VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
       VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
 }// Predicates = [HasAVX, NoVLX]
 
 let Constraints = "$src1 = $dst" in {
-  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memop,
         VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
                        SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
-  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memop,
         VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
                        SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
-  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memop,
         VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
                        SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
-  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memop,
         VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
                        SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
 } // Constraints = "$src1 = $dst"
 
 let Predicates = [HasAVX1Only] in {
-  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (loadv8i32 addr:$src2))),
             (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
   def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
             (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (loadv8i32 addr:$src2))),
             (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
   def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
             (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
@@ -2253,6 +2260,16 @@ let Predicates = [HasAVX] in {
                                          SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
   defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
                                          SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
+
+  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
+  def : Pat<(X86movmsk (v4i32 VR128:$src)),
+            (VMOVMSKPSrr VR128:$src)>;
+  def : Pat<(X86movmsk (v2i64 VR128:$src)),
+            (VMOVMSKPDrr VR128:$src)>;
+  def : Pat<(X86movmsk (v8i32 VR256:$src)),
+            (VMOVMSKPSYrr VR256:$src)>;
+  def : Pat<(X86movmsk (v4i64 VR256:$src)),
+            (VMOVMSKPDYrr VR256:$src)>;
 }
 
 defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
@@ -2260,6 +2277,14 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
 defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
                                      SSEPackedDouble>, PD;
 
+let Predicates = [UseSSE2] in {
+  // Also support integer VTs to avoid a int->fp bitcast in the DAG.
+  def : Pat<(X86movmsk (v4i32 VR128:$src)),
+            (MOVMSKPSrr VR128:$src)>;
+  def : Pat<(X86movmsk (v2i64 VR128:$src)),
+            (MOVMSKPDrr VR128:$src)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Logical Instructions
 //===---------------------------------------------------------------------===//
@@ -2284,9 +2309,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpVT (OpNode RC:$src1,
-                                     (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -2296,16 +2320,16 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
                          Predicate prd> {
 let Predicates = [HasAVX, prd] in
   defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
-                             VR128, loadv2i64, i128mem, sched.XMM,
+                             VR128, load, i128mem, sched.XMM,
                              IsCommutable, 0>, VEX_4V, VEX_WIG;
 
 let Constraints = "$src1 = $dst" in
   defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
-                           memopv2i64, i128mem, sched.XMM, IsCommutable, 1>;
+                           memop, i128mem, sched.XMM, IsCommutable, 1>;
 
 let Predicates = [HasAVX2, prd] in
   defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
-                               OpVT256, VR256, loadv4i64, i256mem, sched.YMM,
+                               OpVT256, VR256, load, i256mem, sched.YMM,
                                IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
 }
 
@@ -2365,24 +2389,136 @@ defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
 let isCommutable = 0 in
   defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
 
+let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
+            (VPANDYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
+            (VPANDYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
+            (VPANDYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
+            (VPORYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
+            (VPORYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
+            (VPORYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
+            (VPXORYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
+            (VPXORYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
+            (VPXORYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
+            (VPANDNYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
+            (VPANDNYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
+            (VPANDNYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
+            (VPANDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
+            (VPANDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
+            (VPANDYrm VR256:$src1, addr:$src2)>;
+
+  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
+            (VPORYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
+            (VPORYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
+            (VPORYrm VR256:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
+            (VPXORYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
+            (VPXORYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
+            (VPXORYrm VR256:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
+            (VPANDNYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
+            (VPANDNYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
+            (VPANDNYrm VR256:$src1, addr:$src2)>;
+}
+
 // If only AVX1 is supported, we need to handle integer operations with
 // floating point instructions since the integer versions aren't available.
 let Predicates = [HasAVX1Only] in {
+  def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
+            (VANDPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (and VR256:$src1, VR256:$src2)),
+            (VANDPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (and VR256:$src1, VR256:$src2)),
+            (VANDPSYrr VR256:$src1, VR256:$src2)>;
   def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
             (VANDPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v32i8 (or VR256:$src1, VR256:$src2)),
+            (VORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (or VR256:$src1, VR256:$src2)),
+            (VORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (or VR256:$src1, VR256:$src2)),
+            (VORPSYrr VR256:$src1, VR256:$src2)>;
   def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
             (VORPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v32i8 (xor VR256:$src1, VR256:$src2)),
+            (VXORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (xor VR256:$src1, VR256:$src2)),
+            (VXORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (xor VR256:$src1, VR256:$src2)),
+            (VXORPSYrr VR256:$src1, VR256:$src2)>;
   def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
             (VXORPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v32i8 (X86andnp VR256:$src1, VR256:$src2)),
+            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v16i16 (X86andnp VR256:$src1, VR256:$src2)),
+            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (X86andnp VR256:$src1, VR256:$src2)),
+            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
   def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
             (VANDNPSYrr VR256:$src1, VR256:$src2)>;
 
+  def : Pat<(and VR256:$src1, (loadv32i8 addr:$src2)),
+            (VANDPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(and VR256:$src1, (loadv16i16 addr:$src2)),
+            (VANDPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(and VR256:$src1, (loadv8i32 addr:$src2)),
+            (VANDPSYrm VR256:$src1, addr:$src2)>;
   def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
             (VANDPSYrm VR256:$src1, addr:$src2)>;
+
+  def : Pat<(or VR256:$src1, (loadv32i8 addr:$src2)),
+            (VORPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(or VR256:$src1, (loadv16i16 addr:$src2)),
+            (VORPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(or VR256:$src1, (loadv8i32 addr:$src2)),
+            (VORPSYrm VR256:$src1, addr:$src2)>;
   def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
             (VORPSYrm VR256:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR256:$src1, (loadv32i8 addr:$src2)),
+            (VXORPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(xor VR256:$src1, (loadv16i16 addr:$src2)),
+            (VXORPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(xor VR256:$src1, (loadv8i32 addr:$src2)),
+            (VXORPSYrm VR256:$src1, addr:$src2)>;
   def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
             (VXORPSYrm VR256:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR256:$src1, (loadv32i8 addr:$src2)),
+            (VANDNPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256:$src1, (loadv16i16 addr:$src2)),
+            (VANDNPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR256:$src1, (loadv8i32 addr:$src2)),
+            (VANDNPSYrm VR256:$src1, addr:$src2)>;
   def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
             (VANDNPSYrm VR256:$src1, addr:$src2)>;
 }
@@ -2480,6 +2616,122 @@ let Predicates = [UseSSE2] in {
              FR64)>;
 }
 
+let Predicates = [HasAVX, NoVLX] in {
+  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
+            (VPANDrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
+            (VPANDrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
+            (VPANDrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
+            (VPORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
+            (VPORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
+            (VPORrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
+            (VPXORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
+            (VPXORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
+            (VPXORrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
+            (VPANDNrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
+            (VPANDNrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
+            (VPANDNrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(and VR128:$src1, (loadv16i8 addr:$src2)),
+            (VPANDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(and VR128:$src1, (loadv8i16 addr:$src2)),
+            (VPANDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(and VR128:$src1, (loadv4i32 addr:$src2)),
+            (VPANDrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(or VR128:$src1, (loadv16i8 addr:$src2)),
+            (VPORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(or VR128:$src1, (loadv8i16 addr:$src2)),
+            (VPORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(or VR128:$src1, (loadv4i32 addr:$src2)),
+            (VPORrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR128:$src1, (loadv16i8 addr:$src2)),
+            (VPXORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(xor VR128:$src1, (loadv8i16 addr:$src2)),
+            (VPXORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(xor VR128:$src1, (loadv4i32 addr:$src2)),
+            (VPXORrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR128:$src1, (loadv16i8 addr:$src2)),
+            (VPANDNrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128:$src1, (loadv8i16 addr:$src2)),
+            (VPANDNrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128:$src1, (loadv4i32 addr:$src2)),
+            (VPANDNrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2] in {
+  def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
+            (PANDrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
+            (PANDrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
+            (PANDrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
+            (PORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
+            (PORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
+            (PORrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
+            (PXORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
+            (PXORrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
+            (PXORrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(v16i8 (X86andnp VR128:$src1, VR128:$src2)),
+            (PANDNrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v8i16 (X86andnp VR128:$src1, VR128:$src2)),
+            (PANDNrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (X86andnp VR128:$src1, VR128:$src2)),
+            (PANDNrr VR128:$src1, VR128:$src2)>;
+
+  def : Pat<(and VR128:$src1, (memopv16i8 addr:$src2)),
+            (PANDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(and VR128:$src1, (memopv8i16 addr:$src2)),
+            (PANDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(and VR128:$src1, (memopv4i32 addr:$src2)),
+            (PANDrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(or VR128:$src1, (memopv16i8 addr:$src2)),
+            (PORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(or VR128:$src1, (memopv8i16 addr:$src2)),
+            (PORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(or VR128:$src1, (memopv4i32 addr:$src2)),
+            (PORrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(xor VR128:$src1, (memopv16i8 addr:$src2)),
+            (PXORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(xor VR128:$src1, (memopv8i16 addr:$src2)),
+            (PXORrm VR128:$src1, addr:$src2)>;
+  def : Pat<(xor VR128:$src1, (memopv4i32 addr:$src2)),
+            (PXORrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(X86andnp VR128:$src1, (memopv16i8 addr:$src2)),
+            (PANDNrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128:$src1, (memopv8i16 addr:$src2)),
+            (PANDNrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86andnp VR128:$src1, (memopv4i32 addr:$src2)),
+            (PANDNrm VR128:$src1, addr:$src2)>;
+}
+
 // Patterns for packed operations when we don't have integer type available.
 def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
           (ANDPSrr VR128:$src1, VR128:$src2)>;
@@ -2713,7 +2965,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
             !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
             [(set RC:$dst, (OpNode (load addr:$src1)))], d>,
-            Sched<[sched.Folded, ReadAfterLd]>,
+            Sched<[sched.Folded]>,
             Requires<[target, OptForSize]>;
 
   let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
@@ -2723,7 +2975,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   let mayLoad = 1 in
   def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, intmemop:$src2),
                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
-                Sched<[sched.Folded, ReadAfterLd]>;
+                Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
   }
 
@@ -2777,7 +3029,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   let mayLoad = 1 in
   def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [], d>, Sched<[sched.Folded, ReadAfterLd]>;
+            [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
   let isCodeGenOnly = 1, ExeDomain = d in {
   def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
                 (ins VR128:$src1, VR128:$src2),
@@ -2787,7 +3039,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
                 (ins VR128:$src1, intmemop:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, Sched<[sched.Folded, ReadAfterLd]>;
+             []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
   }
 
@@ -3306,6 +3558,19 @@ def : InstAlias<"movdqu.s\t{$src, $dst|$dst, $src}",
 
 let Predicates = [HasAVX, NoVLX] in {
   // Additional patterns for other integer sizes.
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (VMOVDQArm addr:$src)>;
+  def : Pat<(alignedloadv8i16 addr:$src),
+            (VMOVDQArm addr:$src)>;
+  def : Pat<(alignedloadv16i8 addr:$src),
+            (VMOVDQArm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (VMOVDQUrm addr:$src)>;
+  def : Pat<(loadv8i16 addr:$src),
+            (VMOVDQUrm addr:$src)>;
+  def : Pat<(loadv16i8 addr:$src),
+            (VMOVDQUrm addr:$src)>;
+
   def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
             (VMOVDQAmr addr:$dst, VR128:$src)>;
   def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
@@ -3345,8 +3610,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
-                                     (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+                                     (memop_frag addr:$src2))))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -3358,13 +3623,13 @@ defm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
                              SchedWriteVecALU, 1, NoVLX>;
 defm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
                              SchedWriteVecALU, 1, NoVLX>;
-defm PADDSB  : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
+defm PADDSB  : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
                              SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDSW  : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
+defm PADDSW  : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
                              SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
+defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
                              SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
+defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
                              SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
 defm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
                              SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
@@ -3380,13 +3645,13 @@ defm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
                              SchedWriteVecALU, 0, NoVLX>;
 defm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
                              SchedWriteVecALU, 0, NoVLX>;
-defm PSUBSB  : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
+defm PSUBSB  : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
                              SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBSW  : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
+defm PSUBSW  : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
                              SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
                              SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
                              SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
 defm PMINUB  : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
                              SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
@@ -3405,28 +3670,28 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
 defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
-                              loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>,
+                              load, i128mem, SchedWriteVecIMul.XMM, 0>,
                               VEX_4V, VEX_WIG;
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
 defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
-                               VR256, loadv4i64, i256mem, SchedWriteVecIMul.YMM,
+                               VR256, load, i256mem, SchedWriteVecIMul.YMM,
                                0>, VEX_4V, VEX_L, VEX_WIG;
 let Constraints = "$src1 = $dst" in
 defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
-                             memopv2i64, i128mem, SchedWriteVecIMul.XMM>;
+                             memop, i128mem, SchedWriteVecIMul.XMM>;
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
 defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
-                             loadv2i64, i128mem, SchedWritePSADBW.XMM, 0>,
+                             load, i128mem, SchedWritePSADBW.XMM, 0>,
                              VEX_4V, VEX_WIG;
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
 defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
-                             loadv4i64, i256mem, SchedWritePSADBW.YMM, 0>,
+                             load, i256mem, SchedWritePSADBW.YMM, 0>,
                              VEX_4V, VEX_L, VEX_WIG;
 let Constraints = "$src1 = $dst" in
 defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
-                            memopv2i64, i128mem, SchedWritePSADBW.XMM>;
+                            memop, i128mem, SchedWritePSADBW.XMM>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Logical Instructions
@@ -3453,8 +3718,8 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode RC:$src1,
-                       (SrcVT (bitconvert (ld_frag addr:$src2))))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+                       (SrcVT (ld_frag addr:$src2)))))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
   def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
        (ins RC:$src1, u8imm:$src2),
        !if(Is2Addr,
@@ -3473,16 +3738,16 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
 let Predicates = [HasAVX, prd] in
   defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
                               OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
-                              DstVT128, SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG;
+                              DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
 let Predicates = [HasAVX2, prd] in
   defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
                                 OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
-                                DstVT256, SrcVT, loadv2i64, 0>, VEX_4V, VEX_L,
+                                DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
                                 VEX_WIG;
 let Constraints = "$src1 = $dst" in
   defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
                             VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
-                            memopv2i64>;
+                            memop>;
 }
 
 multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
@@ -3582,7 +3847,7 @@ let Predicates = [HasAVX, prd] in {
                       !strconcat("v", OpcodeStr,
                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR128:$dst,
-                       (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
+                       (vt128 (OpNode (load addr:$src1),
                         (i8 imm:$src2))))]>, VEX,
                   Sched<[sched.XMM.Folded]>, VEX_WIG;
 }
@@ -3600,7 +3865,7 @@ let Predicates = [HasAVX2, prd] in {
                        !strconcat("v", OpcodeStr,
                                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                       [(set VR256:$dst,
-                        (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
+                        (vt256 (OpNode (load addr:$src1),
                          (i8 imm:$src2))))]>, VEX, VEX_L,
                    Sched<[sched.YMM.Folded]>, VEX_WIG;
 }
@@ -3618,7 +3883,7 @@ let Predicates = [UseSSE2] in {
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                [(set VR128:$dst,
-                 (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+                 (vt128 (OpNode (memop addr:$src1),
                         (i8 imm:$src2))))]>,
                Sched<[sched.XMM.Folded]>;
 }
@@ -3658,8 +3923,8 @@ multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
                [(set RC:$dst,
                      (OutVT (OpNode (ArgVT RC:$src1),
-                                    (bitconvert (ld_frag addr:$src2)))))]>,
-               Sched<[sched.Folded, ReadAfterLd]>;
+                                    (ld_frag addr:$src2))))]>,
+               Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
@@ -3683,53 +3948,53 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
                  [(set RC:$dst,
                        (OutVT (OpNode (ArgVT RC:$src1),
-                                      (bitconvert (ld_frag addr:$src2)))))]>,
-                 Sched<[sched.Folded, ReadAfterLd]>;
+                                      (ld_frag addr:$src2))))]>,
+                 Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
-                             i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                             i128mem, SchedWriteShuffle.XMM, load, 0>,
                              VEX_4V, VEX_WIG;
   defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
-                             i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                             i128mem, SchedWriteShuffle.XMM, load, 0>,
                              VEX_4V, VEX_WIG;
 
   defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
-                             i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                             i128mem, SchedWriteShuffle.XMM, load, 0>,
                              VEX_4V, VEX_WIG;
   defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
-                             i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                             i128mem, SchedWriteShuffle.XMM, load, 0>,
                              VEX_4V;
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
-                              i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                              i256mem, SchedWriteShuffle.YMM, load, 0>,
                               VEX_4V, VEX_L, VEX_WIG;
   defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
-                              i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                              i256mem, SchedWriteShuffle.YMM, load, 0>,
                               VEX_4V, VEX_L, VEX_WIG;
 
   defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
-                              i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                              i256mem, SchedWriteShuffle.YMM, load, 0>,
                               VEX_4V, VEX_L, VEX_WIG;
   defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
-                              i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                              i256mem, SchedWriteShuffle.YMM, load, 0>,
                               VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128,
-                            i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                            i128mem, SchedWriteShuffle.XMM, memop>;
   defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128,
-                            i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                            i128mem, SchedWriteShuffle.XMM, memop>;
 
   defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128,
-                            i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                            i128mem, SchedWriteShuffle.XMM, memop>;
 
   defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128,
-                            i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                            i128mem, SchedWriteShuffle.XMM, memop>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -3754,89 +4019,88 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
       !if(Is2Addr,
           !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
           !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1,
-                                  (bitconvert (ld_frag addr:$src2)))))]>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
   defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
   defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
   defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
 }
 
 let Predicates = [HasAVX, NoVLX] in {
   defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
   defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
   defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
   defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
-                                 i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>,
+                                 i128mem, SchedWriteShuffle.XMM, load, 0>,
                                  VEX_4V, VEX_WIG;
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   defm VPUNPCKLBWY  : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPUNPCKLWDY  : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPUNPCKHBWY  : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPUNPCKHWDY  : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
   defm VPUNPCKLDQY  : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPUNPCKHDQY  : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
-                                  i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>,
+                                  i256mem, SchedWriteShuffle.YMM, load, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
   defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
   defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
   defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
 
   defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
   defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
   defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
   defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128,
-                                i128mem, SchedWriteShuffle.XMM, memopv2i64>;
+                                i128mem, SchedWriteShuffle.XMM, memop>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -3864,7 +4128,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
        [(set VR128:$dst,
          (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
                     imm:$src3))]>,
-       Sched<[WriteVecInsertLd, ReadAfterLd]>;
+       Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
 }
 
 // Extract
@@ -4155,7 +4419,7 @@ let Predicates = [UseAVX] in {
             (VMOVDI2PDIrm addr:$src)>;
   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
             (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
             (VMOVDI2PDIrm addr:$src)>;
   def : Pat<(v4i32 (X86vzload addr:$src)),
             (VMOVDI2PDIrm addr:$src)>;
@@ -4180,7 +4444,7 @@ let Predicates = [UseSSE2] in {
             (MOVDI2PDIrm addr:$src)>;
   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
             (MOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
             (MOVDI2PDIrm addr:$src)>;
   def : Pat<(v4i32 (X86vzload addr:$src)),
             (MOVDI2PDIrm addr:$src)>;
@@ -4335,30 +4599,30 @@ defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (X86Movshdup VR128:$src)),
             (VMOVSHDUPrr VR128:$src)>;
-  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86Movshdup (load addr:$src))),
             (VMOVSHDUPrm addr:$src)>;
   def : Pat<(v4i32 (X86Movsldup VR128:$src)),
             (VMOVSLDUPrr VR128:$src)>;
-  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86Movsldup (load addr:$src))),
             (VMOVSLDUPrm addr:$src)>;
   def : Pat<(v8i32 (X86Movshdup VR256:$src)),
             (VMOVSHDUPYrr VR256:$src)>;
-  def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))),
+  def : Pat<(v8i32 (X86Movshdup (load addr:$src))),
             (VMOVSHDUPYrm addr:$src)>;
   def : Pat<(v8i32 (X86Movsldup VR256:$src)),
             (VMOVSLDUPYrr VR256:$src)>;
-  def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))),
+  def : Pat<(v8i32 (X86Movsldup (load addr:$src))),
             (VMOVSLDUPYrm addr:$src)>;
 }
 
 let Predicates = [UseSSE3] in {
   def : Pat<(v4i32 (X86Movshdup VR128:$src)),
             (MOVSHDUPrr VR128:$src)>;
-  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86Movshdup (memop addr:$src))),
             (MOVSHDUPrm addr:$src)>;
   def : Pat<(v4i32 (X86Movsldup VR128:$src)),
             (MOVSLDUPrr VR128:$src)>;
-  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+  def : Pat<(v4i32 (X86Movsldup (memop addr:$src))),
             (MOVSLDUPrm addr:$src)>;
 }
 
@@ -4405,12 +4669,16 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(X86Movddup (loadv2f64 addr:$src)),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
 }
 
 let Predicates = [UseSSE3] in {
   // No need for aligned memory as this only loads 64-bits.
   def : Pat<(X86Movddup (loadv2f64 addr:$src)),
             (MOVDDUPrm addr:$src)>;
+  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+            (MOVDDUPrm addr:$src)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4453,7 +4721,7 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4504,7 +4772,7 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
                   X86MemOperand x86memop, SDNode OpNode,
@@ -4522,7 +4790,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4580,7 +4848,7 @@ multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
                  (ins i128mem:$src),
                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                  [(set VR128:$dst,
-                   (vt (OpNode (bitconvert (ld_frag addr:$src)))))]>,
+                   (vt (OpNode (ld_frag addr:$src))))]>,
                  Sched<[sched.XMM.Folded]>;
 }
 
@@ -4597,19 +4865,19 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
                   (ins i256mem:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                   [(set VR256:$dst,
-                    (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>,
+                    (vt (OpNode (load addr:$src))))]>,
                   Sched<[sched.YMM.Folded]>;
 }
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   defm VPABSB  : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
-                              loadv2i64>, VEX, VEX_WIG;
+                              load>, VEX, VEX_WIG;
   defm VPABSW  : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
-                              loadv2i64>, VEX, VEX_WIG;
+                              load>, VEX, VEX_WIG;
 }
 let Predicates = [HasAVX, NoVLX] in {
   defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
-                              loadv2i64>, VEX, VEX_WIG;
+                              load>, VEX, VEX_WIG;
 }
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   defm VPABSB  : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
@@ -4623,11 +4891,11 @@ let Predicates = [HasAVX2, NoVLX] in {
 }
 
 defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
-                          memopv2i64>;
+                          memop>;
 defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU,
-                          memopv2i64>;
+                          memop>;
 defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU,
-                          memopv2i64>;
+                          memop>;
 
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
@@ -4652,9 +4920,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
-         (DstVT (OpNode (OpVT RC:$src1),
-          (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+         (DstVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -4675,9 +4942,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
-         (IntId128 VR128:$src1,
-          (bitconvert (ld_frag addr:$src2))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+         (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -4693,83 +4959,83 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
        (ins VR256:$src1, i256mem:$src2),
        !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
        [(set VR256:$dst,
-         (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+         (IntId256 VR256:$src1, (load addr:$src2)))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
 let isCommutable = 0 in {
   defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
-                                  VR128, loadv2i64, i128mem,
+                                  VR128, load, i128mem,
                                   SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
-                                  v16i8, VR128, loadv2i64, i128mem,
+                                  v16i8, VR128, load, i128mem,
                                   SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
 }
 defm VPMULHRSW    : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
-                                  VR128, loadv2i64, i128mem,
+                                  VR128, load, i128mem,
                                   SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX] in {
 let isCommutable = 0 in {
   defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
-                                  loadv2i64, i128mem,
+                                  load, i128mem,
                                   SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
-                                  loadv2i64, i128mem,
+                                  load, i128mem,
                                   SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
-                                  loadv2i64, i128mem,
+                                  load, i128mem,
                                   SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
-                                  loadv2i64, i128mem,
+                                  load, i128mem,
                                   SchedWritePHAdd.XMM, 0>, VEX_4V;
   defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
                                       int_x86_ssse3_psign_b_128,
-                                      SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
   defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw",
                                       int_x86_ssse3_psign_w_128,
-                                      SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
   defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd",
                                       int_x86_ssse3_psign_d_128,
-                                      SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+                                      SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
   defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
                                       int_x86_ssse3_phadd_sw_128,
-                                      SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
   defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
                                       int_x86_ssse3_phsub_sw_128,
-                                      SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
+                                      SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
 }
 }
 
 let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
 let isCommutable = 0 in {
   defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
-                                  VR256, loadv4i64, i256mem,
+                                  VR256, load, i256mem,
                                   SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
-                                   v32i8, VR256, loadv4i64, i256mem,
+                                   v32i8, VR256, load, i256mem,
                                    SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
 }
 defm VPMULHRSWY   : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
-                                  VR256, loadv4i64, i256mem,
+                                  VR256, load, i256mem,
                                   SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX2] in {
 let isCommutable = 0 in {
   defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
-                                  VR256, loadv4i64, i256mem,
+                                  VR256, load, i256mem,
                                   SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
-                                  loadv4i64, i256mem,
+                                  load, i256mem,
                                   SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
-                                  VR256, loadv4i64, i256mem,
+                                  VR256, load, i256mem,
                                   SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
-                                  loadv4i64, i256mem,
+                                  load, i256mem,
                                   SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
   defm VPSIGNB   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
                                        SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
@@ -4790,33 +5056,33 @@ let isCommutable = 0 in {
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 let isCommutable = 0 in {
   defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
-                                 memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+                                 memop, i128mem, SchedWritePHAdd.XMM>;
   defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
-                                 memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+                                 memop, i128mem, SchedWritePHAdd.XMM>;
   defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
-                                 memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+                                 memop, i128mem, SchedWritePHAdd.XMM>;
   defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
-                                 memopv2i64, i128mem, SchedWritePHAdd.XMM>;
+                                 memop, i128mem, SchedWritePHAdd.XMM>;
   defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
-                                     SchedWriteVecALU.XMM, memopv2i64>;
+                                     SchedWriteVecALU.XMM, memop>;
   defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
-                                     SchedWriteVecALU.XMM, memopv2i64>;
+                                     SchedWriteVecALU.XMM, memop>;
   defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
-                                     SchedWriteVecALU.XMM, memopv2i64>;
+                                     SchedWriteVecALU.XMM, memop>;
   defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
-                                 memopv2i64, i128mem, SchedWriteVarShuffle.XMM>;
+                                 memop, i128mem, SchedWriteVarShuffle.XMM>;
   defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
                                      int_x86_ssse3_phadd_sw_128,
-                                     SchedWritePHAdd.XMM, memopv2i64>;
+                                     SchedWritePHAdd.XMM, memop>;
   defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
                                      int_x86_ssse3_phsub_sw_128,
-                                     SchedWritePHAdd.XMM, memopv2i64>;
+                                     SchedWritePHAdd.XMM, memop>;
   defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
-                                 v16i8, VR128, memopv2i64, i128mem,
+                                 v16i8, VR128, memop, i128mem,
                                  SchedWriteVecIMul.XMM>;
 }
 defm PMULHRSW    : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
-                                 VR128, memopv2i64, i128mem, SchedWriteVecIMul.XMM>;
+                                 VR128, memop, i128mem, SchedWriteVecIMul.XMM>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4843,20 +5109,20 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set RC:$dst, (VT (X86PAlignr RC:$src1,
-                                     (bitconvert (memop_frag addr:$src2)),
+                                     (memop_frag addr:$src2),
                                      (i8 imm:$src3))))]>,
-      Sched<[sched.Folded, ReadAfterLd]>;
+      Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
-  defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, i128mem,
+  defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
                                 SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
-  defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, i256mem,
+  defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
                                  SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
 let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
-  defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, i128mem,
+  defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
                                SchedWriteShuffle.XMM>;
 
 //===---------------------------------------------------------------------===//
@@ -4936,34 +5202,72 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
 
 defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
 
-// AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
+// Patterns that we also need for any_extend.
+// Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg.
+multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> {
   // Register-Register patterns
-  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
-  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
-            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
+  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+    def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
+              (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
   }
-  let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),
+
+  let Predicates = [HasAVX2, NoVLX] in {
+    def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
+              (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
+
+    def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
+              (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
+  }
+
+  // AVX2 Register-Memory patterns
+  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+              (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+    def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+    def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+  }
+
+  let Predicates = [HasAVX2, NoVLX] in {
+    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+              (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+    def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+    def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+
+    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+              (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+    def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+    def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+              (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+  }
+}
+
+// AVX2 Patterns
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
+                                     SDNode ExtOp, SDNode InVecOp> :
+    SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> {
+
+  // Register-Register patterns
+  let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
-  def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),
+  def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
 
-  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
-            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
-  def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),
+  def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
             (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
-
-  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
-            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
   }
 
   // Simple Register-Memory patterns
-  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
             (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
   }
-  let Predicates = [HasAVX, NoVLX] in {
+  let Predicates = [HasAVX2, NoVLX] in {
   def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
   def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
@@ -4979,60 +5283,39 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
   }
 
   // AVX2 Register-Memory patterns
-  let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
-  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-  }
-  let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+  let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
 
-  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
 
-  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-
-  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-
-  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
   }
 }
 
-defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
-defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
+defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
+defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
+defm : SS41I_pmovx_avx2_patterns_base<"VPMOVZX", anyext>;
 
 // SSE4.1/AVX patterns.
 multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
@@ -5082,7 +5365,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
-  def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   }
   let Predicates = [HasAVX, NoVLX] in {
@@ -5092,7 +5375,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
 
   def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -5101,7 +5384,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
 
   def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5112,7 +5395,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+  def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
 
   def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
@@ -5121,7 +5404,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
 
   def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5132,7 +5415,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
   }
 }
@@ -5298,8 +5581,8 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
         !strconcat(asm,
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set VR128:$dst,
-        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
-                   imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>,
+                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoBWI] in
@@ -5324,8 +5607,8 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
         !strconcat(asm,
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set VR128:$dst,
-        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
-                          imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>,
+                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoDQI] in
@@ -5350,8 +5633,8 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
         !strconcat(asm,
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set VR128:$dst,
-        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
-                          imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
+        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>,
+                   Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoDQI] in
@@ -5383,7 +5666,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
         (X86insertps VR128:$src1,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
                     imm:$src3))]>,
-      Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
 }
 
 let ExeDomain = SSEPackedSingle in {
@@ -5446,7 +5729,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
         (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
         !strconcat(OpcodeStr,
              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, Sched<[sched.Folded, ReadAfterLd]>;
+        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // ExeDomain = SSEPackedSingle, hasSideEffects = 0
 
 let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
@@ -5461,7 +5744,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
         (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
         !strconcat(OpcodeStr,
              "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, Sched<[sched.Folded, ReadAfterLd]>;
+        []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // ExeDomain = SSEPackedDouble, hasSideEffects = 0
 }
 
@@ -5479,7 +5762,7 @@ let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in {
                     (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                                "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, Sched<[sched.Folded, ReadAfterLd]>;
+                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // ExeDomain = SSEPackedSingle, hasSideEffects = 0
 
 let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
@@ -5494,7 +5777,7 @@ let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in {
                     (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
                     !strconcat(OpcodeStr,
                                "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, Sched<[sched.Folded, ReadAfterLd]>;
+                    []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // ExeDomain = SSEPackedDouble, hasSideEffects = 0
 }
 
@@ -5522,7 +5805,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
                 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
         [(set VR128:$dst,
              (OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
 
 let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
@@ -5545,7 +5828,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
                 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
         [(set VR128:$dst,
               (OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
 }
 
@@ -5846,7 +6129,7 @@ def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
 def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
-                Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>,
+                Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
                 VEX, VEX_WIG;
 
 def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -5856,7 +6139,7 @@ def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
 def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
-                Sched<[SchedWriteVecTest.YMM.Folded, ReadAfterLd]>,
+                Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
                 VEX, VEX_L, VEX_WIG;
 }
 
@@ -5868,7 +6151,7 @@ def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
 def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
               "ptest\t{$src2, $src1|$src1, $src2}",
               [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
-              Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>;
+              Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
 }
 
 // The bit test instructions below are AVX only
@@ -5882,7 +6165,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
   def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
             !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
             [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
-            Sched<[sched.Folded, ReadAfterLd]>, VEX;
+            Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
 }
 
 let Defs = [EFLAGS], Predicates = [HasAVX] in {
@@ -5950,7 +6233,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                   (ins i128mem:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                   [(set VR128:$dst,
-                    (v8i16 (OpNode (v8i16 (bitconvert (ld_frag addr:$src))))))]>,
+                    (v8i16 (OpNode (ld_frag addr:$src))))]>,
                  Sched<[Sched.Folded]>;
 }
 
@@ -5958,10 +6241,10 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
 // model, although the naming is misleading.
 let Predicates = [HasAVX] in
 defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
-                                         X86phminpos, loadv2i64,
+                                         X86phminpos, load,
                                          WritePHMINPOS>, VEX, VEX_WIG;
 defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
-                                         X86phminpos, memopv2i64,
+                                         X86phminpos, memop,
                                          WritePHMINPOS>;
 
 /// SS48I_binop_rm - Simple SSE41 binary operator.
@@ -5983,118 +6266,118 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
-         (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX, NoVLX] in {
   defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMULDQ   : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>,
+                                  load, i128mem, SchedWriteVecIMul.XMM, 0>,
                                   VEX_4V, VEX_WIG;
 }
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
   defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
-                                  loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                  load, i128mem, SchedWriteVecALU.XMM, 0>,
                                   VEX_4V, VEX_WIG;
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
   defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMULDQY  : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecIMul.YMM, 0>,
+                                  load, i256mem, SchedWriteVecIMul.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 }
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
   defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecALU.XMM, 1>;
   defm PMULDQ   : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128,
-                                 memopv2i64, i128mem, SchedWriteVecIMul.XMM, 1>;
+                                 memop, i128mem, SchedWriteVecIMul.XMM, 1>;
 }
 
 let Predicates = [HasAVX, NoVLX] in
   defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
-                                 loadv2i64, i128mem, SchedWritePMULLD.XMM, 0>,
+                                 load, i128mem, SchedWritePMULLD.XMM, 0>,
                                  VEX_4V, VEX_WIG;
 let Predicates = [HasAVX] in
   defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
-                                 loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
                                  VEX_4V, VEX_WIG;
 
 let Predicates = [HasAVX2, NoVLX] in
   defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
-                                  loadv4i64, i256mem, SchedWritePMULLD.YMM, 0>,
+                                  load, i256mem, SchedWritePMULLD.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 let Predicates = [HasAVX2] in
   defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 
 let Constraints = "$src1 = $dst" in {
   defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
-                                memopv2i64, i128mem, SchedWritePMULLD.XMM, 1>;
+                                memop, i128mem, SchedWritePMULLD.XMM, 1>;
   defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
-                                memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>;
+                                memop, i128mem, SchedWriteVecALU.XMM, 1>;
 }
 
 /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
@@ -6120,9 +6403,8 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
             !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
         [(set RC:$dst,
-          (IntId RC:$src1,
-           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+          (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 /// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
@@ -6148,9 +6430,8 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
             !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
         [(set RC:$dst,
-          (OpVT (OpNode RC:$src1,
-                 (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 def BlendCommuteImm2 : SDNodeXForm<imm, [{
@@ -6171,28 +6452,28 @@ def BlendCommuteImm8 : SDNodeXForm<imm, [{
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in {
     defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
-                                        VR128, loadv2i64, i128mem, 0,
+                                        VR128, load, i128mem, 0,
                                         SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
   }
 
   let ExeDomain = SSEPackedSingle in
   defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
-                                   VR128, loadv4f32, f128mem, 0,
+                                   VR128, load, f128mem, 0,
                                    SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
   let ExeDomain = SSEPackedDouble in
   defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
-                                   VR128, loadv2f64, f128mem, 0,
+                                   VR128, load, f128mem, 0,
                                    SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
   let ExeDomain = SSEPackedSingle in
   defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
-                                    VR256, loadv8f32, i256mem, 0,
+                                    VR256, load, i256mem, 0,
                                     SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Predicates = [HasAVX2] in {
   let isCommutable = 0 in {
   defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
-                                  VR256, loadv4i64, i256mem, 0,
+                                  VR256, load, i256mem, 0,
                                   SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
   }
 }
@@ -6200,17 +6481,17 @@ let Predicates = [HasAVX2] in {
 let Constraints = "$src1 = $dst" in {
   let isCommutable = 0 in {
   defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
-                                     VR128, memopv2i64, i128mem, 1,
+                                     VR128, memop, i128mem, 1,
                                      SchedWriteMPSAD.XMM>;
   }
 
   let ExeDomain = SSEPackedSingle in
   defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
-                                  VR128, memopv4f32, f128mem, 1,
+                                  VR128, memop, f128mem, 1,
                                   SchedWriteDPPS.XMM>;
   let ExeDomain = SSEPackedDouble in
   defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
-                                  VR128, memopv2f64, f128mem, 1,
+                                  VR128, memop, f128mem, 1,
                                   SchedWriteDPPD.XMM>;
 }
 
@@ -6238,56 +6519,54 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
             !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
         [(set RC:$dst,
-          (OpVT (OpNode RC:$src1,
-                 (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
-        Sched<[sched.Folded, ReadAfterLd]>;
+          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
+        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
   // Pattern to commute if load is in first source.
-  def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
-                          RC:$src1, imm:$src3)),
+  def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
             (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
                                             (commuteXForm imm:$src3))>;
 }
 
 let Predicates = [HasAVX] in {
   defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
-                                  VR128, loadv4f32, f128mem, 0, SSEPackedSingle,
+                                  VR128, load, f128mem, 0, SSEPackedSingle,
                                   SchedWriteFBlend.XMM, BlendCommuteImm4>,
                                   VEX_4V, VEX_WIG;
   defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
-                                   VR256, loadv8f32, f256mem, 0, SSEPackedSingle,
+                                   VR256, load, f256mem, 0, SSEPackedSingle,
                                    SchedWriteFBlend.YMM, BlendCommuteImm8>,
                                    VEX_4V, VEX_L, VEX_WIG;
   defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
-                                  VR128, loadv2f64, f128mem, 0, SSEPackedDouble,
+                                  VR128, load, f128mem, 0, SSEPackedDouble,
                                   SchedWriteFBlend.XMM, BlendCommuteImm2>,
                                   VEX_4V, VEX_WIG;
   defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
-                                   VR256, loadv4f64, f256mem, 0, SSEPackedDouble,
+                                   VR256, load, f256mem, 0, SSEPackedDouble,
                                    SchedWriteFBlend.YMM, BlendCommuteImm4>,
                                    VEX_4V, VEX_L, VEX_WIG;
   defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
-                                  VR128, loadv2i64, i128mem, 0, SSEPackedInt,
+                                  VR128, load, i128mem, 0, SSEPackedInt,
                                   SchedWriteBlend.XMM, BlendCommuteImm8>,
                                   VEX_4V, VEX_WIG;
 }
 
 let Predicates = [HasAVX2] in {
   defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
-                                   VR256, loadv4i64, i256mem, 0, SSEPackedInt,
+                                   VR256, load, i256mem, 0, SSEPackedInt,
                                    SchedWriteBlend.YMM, BlendCommuteImm8>,
                                    VEX_4V, VEX_L, VEX_WIG;
 }
 
 defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
-                               VR128, memopv4f32, f128mem, 1, SSEPackedSingle,
+                               VR128, memop, f128mem, 1, SSEPackedSingle,
                                SchedWriteFBlend.XMM, BlendCommuteImm4>;
 defm BLENDPD : SS41I_blend_rmi<0x0D, "blendpd", X86Blendi, v2f64,
-                               VR128, memopv2f64, f128mem, 1, SSEPackedDouble,
+                               VR128, memop, f128mem, 1, SSEPackedDouble,
                                SchedWriteFBlend.XMM, BlendCommuteImm2>;
 defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
-                               VR128, memopv2i64, i128mem, 1, SSEPackedInt,
+                               VR128, memop, i128mem, 1, SSEPackedInt,
                                SchedWriteBlend.XMM, BlendCommuteImm8>;
 
 // For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -6321,20 +6600,20 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst,
-                        (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+                        (IntId RC:$src1, (mem_frag addr:$src2),
                                RC:$src3))], SSEPackedInt>, TAPD, VEX_4V,
-                Sched<[sched.Folded, ReadAfterLd,
+                Sched<[sched.Folded, sched.ReadAfterFold,
                        // x86memop:$src2
                        ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                        ReadDefault,
                        // RC::$src3
-                       ReadAfterLd]>;
+                       sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX] in {
 let ExeDomain = SSEPackedDouble in {
 defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
-                                           loadv2f64, int_x86_sse41_blendvpd,
+                                           load, int_x86_sse41_blendvpd,
                                            SchedWriteFVarBlend.XMM>;
 defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
                                   loadv4f64, int_x86_avx_blendv_pd_256,
@@ -6342,20 +6621,20 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
 } // ExeDomain = SSEPackedDouble
 let ExeDomain = SSEPackedSingle in {
 defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
-                                           loadv4f32, int_x86_sse41_blendvps,
+                                           load, int_x86_sse41_blendvps,
                                            SchedWriteFVarBlend.XMM>;
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
                                   loadv8f32, int_x86_avx_blendv_ps_256,
                                   SchedWriteFVarBlend.YMM>, VEX_L;
 } // ExeDomain = SSEPackedSingle
 defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
-                                           loadv2i64, int_x86_sse41_pblendvb,
+                                           load, int_x86_sse41_pblendvb,
                                            SchedWriteVarBlend.XMM>;
 }
 
 let Predicates = [HasAVX2] in {
 defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
-                                      loadv4i64, int_x86_avx2_pblendvb,
+                                      load, int_x86_avx2_pblendvb,
                                       SchedWriteVarBlend.YMM>, VEX_L;
 }
 
@@ -6486,18 +6765,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
                      "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
                     [(set VR128:$dst,
                       (IntId VR128:$src1,
-                       (bitconvert (mem_frag addr:$src2)), XMM0))]>,
-                    Sched<[sched.Folded, ReadAfterLd]>;
+                       (mem_frag addr:$src2), XMM0))]>,
+                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
 let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memop, f128mem,
                                   int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>;
 let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memop, f128mem,
                                   int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memop, i128mem,
                                   int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>;
 
 // Aliases with the implicit xmm0 argument
@@ -6553,6 +6832,12 @@ let Predicates = [HasAVX2, NoVLX] in {
             (VMOVNTDQAYrm addr:$src)>;
   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAYrm addr:$src)>;
+  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAYrm addr:$src)>;
+  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAYrm addr:$src)>;
+  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+            (VMOVNTDQAYrm addr:$src)>;
 }
 
 let Predicates = [HasAVX, NoVLX] in {
@@ -6562,6 +6847,12 @@ let Predicates = [HasAVX, NoVLX] in {
             (VMOVNTDQArm addr:$src)>;
   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQArm addr:$src)>;
+  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+            (VMOVNTDQArm addr:$src)>;
+  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+            (VMOVNTDQArm addr:$src)>;
+  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+            (VMOVNTDQArm addr:$src)>;
 }
 
 let Predicates = [UseSSE41] in {
@@ -6571,6 +6862,12 @@ let Predicates = [UseSSE41] in {
             (MOVNTDQArm addr:$src)>;
   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
             (MOVNTDQArm addr:$src)>;
+  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+            (MOVNTDQArm addr:$src)>;
+  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+            (MOVNTDQArm addr:$src)>;
+  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+            (MOVNTDQArm addr:$src)>;
 }
 
 } // AddedComplexity
@@ -6598,22 +6895,22 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
          (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
-       Sched<[sched.Folded, ReadAfterLd]>;
+       Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX] in
   defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
-                                 loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>,
+                                 load, i128mem, SchedWriteVecALU.XMM, 0>,
                                  VEX_4V, VEX_WIG;
 
 let Predicates = [HasAVX2] in
   defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
-                                  loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>,
+                                  load, i256mem, SchedWriteVecALU.YMM, 0>,
                                   VEX_4V, VEX_L, VEX_WIG;
 
 let Constraints = "$src1 = $dst" in
   defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
-                                memopv2i64, i128mem, SchedWriteVecALU.XMM>;
+                                memop, i128mem, SchedWriteVecALU.XMM>;
 
 //===----------------------------------------------------------------------===//
 // SSE4.2 - String/text Processing Instructions
@@ -6628,7 +6925,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
   def rm :SS42AI<0x62, MRMSrcMem, (outs),
     (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
     !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
-    []>, Sched<[WritePCmpIStrM.Folded, ReadAfterLd]>;
+    []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>;
 }
 
 let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
@@ -6646,7 +6943,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
   def rm : SS42AI<0x60, MRMSrcMem, (outs),
     (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
     !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
-    []>, Sched<[WritePCmpEStrM.Folded, ReadAfterLd]>;
+    []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>;
 }
 
 let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
@@ -6664,7 +6961,7 @@ multiclass SS42AI_pcmpistri<string asm> {
   def rm : SS42AI<0x63, MRMSrcMem, (outs),
     (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
     !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
-    []>, Sched<[WritePCmpIStrI.Folded, ReadAfterLd]>;
+    []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>;
 }
 
 let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
@@ -6682,7 +6979,7 @@ multiclass SS42AI_pcmpestri<string asm> {
   def rm : SS42AI<0x61, MRMSrcMem, (outs),
     (ins VR128:$src1, i128mem:$src3, u8imm:$src5),
     !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
-    []>, Sched<[WritePCmpEStrI.Folded, ReadAfterLd]>;
+    []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>;
 }
 
 let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
@@ -6712,7 +7009,7 @@ class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
   SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
          !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
          [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
-         Sched<[WriteCRC32.Folded, ReadAfterLd]>;
+         Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
 
 let Constraints = "$src1 = $dst" in {
   def CRC32r32m8  : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
@@ -6764,10 +7061,10 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
              [!if(UsesXMM0,
                   (set VR128:$dst, (IntId VR128:$src1,
-                    (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
+                    (memop addr:$src2), XMM0)),
                   (set VR128:$dst, (IntId VR128:$src1,
-                    (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8,
-             Sched<[sched.Folded, ReadAfterLd]>;
+                    (memop addr:$src2))))]>, T8,
+             Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
@@ -6783,9 +7080,10 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
                          "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                          [(set VR128:$dst,
                            (int_x86_sha1rnds4 VR128:$src1,
-                            (bc_v4i32 (memopv2i64 addr:$src2)),
+                            (memop addr:$src2),
                             (i8 imm:$src3)))]>, TA,
-                         Sched<[SchedWriteVecIMul.XMM.Folded, ReadAfterLd]>;
+                         Sched<[SchedWriteVecIMul.XMM.Folded,
+                                SchedWriteVecIMul.XMM.ReadAfterFold]>;
 
   defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte,
                               SchedWriteVecIMul.XMM>;
@@ -6828,46 +7126,46 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
     def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst),
                    (ins RC:$src1, MemOp:$src2), "",
                    [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>,
-                   Sched<[WriteAESDecEnc.Folded, ReadAfterLd]>;
+                   Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>;
   }
 }
 
 // Perform One Round of an AES Encryption/Decryption Flow
 let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
   defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
-                         int_x86_aesni_aesenc, loadv2i64>, VEX_4V, VEX_WIG;
+                         int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
   defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
-                         int_x86_aesni_aesenclast, loadv2i64>, VEX_4V, VEX_WIG;
+                         int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
   defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
-                         int_x86_aesni_aesdec, loadv2i64>, VEX_4V, VEX_WIG;
+                         int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
   defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
-                         int_x86_aesni_aesdeclast, loadv2i64>, VEX_4V, VEX_WIG;
+                         int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
 }
 
 let Predicates = [NoVLX, HasVAES] in {
   defm VAESENCY         : AESI_binop_rm_int<0xDC, "vaesenc",
-                         int_x86_aesni_aesenc_256, loadv4i64, 0, VR256,
+                         int_x86_aesni_aesenc_256, load, 0, VR256,
                          i256mem>, VEX_4V, VEX_L, VEX_WIG;
   defm VAESENCLASTY     : AESI_binop_rm_int<0xDD, "vaesenclast",
-                         int_x86_aesni_aesenclast_256, loadv4i64, 0, VR256,
+                         int_x86_aesni_aesenclast_256, load, 0, VR256,
                          i256mem>, VEX_4V, VEX_L, VEX_WIG;
   defm VAESDECY         : AESI_binop_rm_int<0xDE, "vaesdec",
-                         int_x86_aesni_aesdec_256, loadv4i64, 0, VR256,
+                         int_x86_aesni_aesdec_256, load, 0, VR256,
                          i256mem>, VEX_4V, VEX_L, VEX_WIG;
   defm VAESDECLASTY     : AESI_binop_rm_int<0xDF, "vaesdeclast",
-                         int_x86_aesni_aesdeclast_256, loadv4i64, 0, VR256,
+                         int_x86_aesni_aesdeclast_256, load, 0, VR256,
                          i256mem>, VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
-                         int_x86_aesni_aesenc, memopv2i64, 1>;
+                         int_x86_aesni_aesenc, memop, 1>;
   defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
-                         int_x86_aesni_aesenclast, memopv2i64, 1>;
+                         int_x86_aesni_aesenclast, memop, 1>;
   defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
-                         int_x86_aesni_aesdec, memopv2i64, 1>;
+                         int_x86_aesni_aesdec, memop, 1>;
   defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
-                         int_x86_aesni_aesdeclast, memopv2i64, 1>;
+                         int_x86_aesni_aesdeclast, memop, 1>;
 }
 
 // Perform the AES InvMixColumn Transformation
@@ -6881,7 +7179,7 @@ let Predicates = [HasAVX, HasAES] in {
   def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
       (ins i128mem:$src1),
       "vaesimc\t{$src1, $dst|$dst, $src1}",
-      [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
+      [(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
       Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
 }
 def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6892,7 +7190,7 @@ def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
 def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
   (ins i128mem:$src1),
   "aesimc\t{$src1, $dst|$dst, $src1}",
-  [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+  [(set VR128:$dst, (int_x86_aesni_aesimc (memop addr:$src1)))]>,
   Sched<[WriteAESIMC.Folded]>;
 
 // AES Round Key Generation Assist
@@ -6907,7 +7205,7 @@ let Predicates = [HasAVX, HasAES] in {
       (ins i128mem:$src1, u8imm:$src2),
       "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       [(set VR128:$dst,
-        (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
+        (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
       Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
 }
 def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -6920,7 +7218,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
   (ins i128mem:$src1, u8imm:$src2),
   "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   [(set VR128:$dst,
-    (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+    (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
   Sched<[WriteAESKeyGen.Folded]>;
 
 //===----------------------------------------------------------------------===//
@@ -6948,12 +7246,12 @@ let Predicates = [NoAVX, HasPCLMUL] in {
               (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
               "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
               [(set VR128:$dst,
-                 (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
+                 (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
                   imm:$src3))]>,
-              Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+              Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
   } // Constraints = "$src1 = $dst"
 
-  def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
+  def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
                                 (i8 imm:$src3)),
             (PCLMULQDQrm VR128:$src1, addr:$src2,
                           (PCLMULCommuteImm imm:$src3))>;
@@ -6986,7 +7284,7 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
             "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
             [(set RC:$dst,
                (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
-            Sched<[WriteCLMul.Folded, ReadAfterLd]>;
+            Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
 
   // We can commute a load in the first operand by swapping the sources and
   // rotating the immediate.
@@ -6996,11 +7294,11 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
 }
 
 let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
-defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, loadv2i64,
+defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
                              int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
 
 let Predicates = [NoVLX, HasVPCLMULQDQ] in
-defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, loadv4i64,
+defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
                               int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
 
 multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
@@ -7156,11 +7454,11 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
 let Predicates = [HasAVX2, NoVLX] in {
 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
           (VBROADCASTI128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
           (VBROADCASTI128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
           (VBROADCASTI128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
           (VBROADCASTI128 addr:$src)>;
 }
 
@@ -7174,11 +7472,11 @@ def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
 let Predicates = [HasAVX1Only] in {
 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
 }
 
@@ -7194,7 +7492,7 @@ let mayLoad = 1 in
 def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, f128mem:$src2, u8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+          []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
 }
 
 // To create a 256-bit all ones value, we should produce VCMPTRUEPS
@@ -7211,7 +7509,7 @@ multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
             (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
                                        (INSERT_get_vinsert128_imm VR256:$ins))>;
   def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
-                                    (From (bitconvert (memop_frag addr:$src2))),
+                                    (From (memop_frag addr:$src2)),
                                     (iPTR imm)),
             (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
                                        (INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7224,9 +7522,9 @@ let Predicates = [HasAVX, NoVLX] in {
 
 let Predicates = [HasAVX1Only] in {
   defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64,  loadv2i64>;
-  defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32,  loadv2i64>;
-  defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv2i64>;
-  defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8,  loadv2i64>;
+  defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32,  loadv4i32>;
+  defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
+  defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8,  loadv16i8>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7315,7 +7613,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
 
 multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
                       RegisterClass RC, X86MemOperand x86memop_f,
-                      X86MemOperand x86memop_i, PatFrag i_frag,
+                      X86MemOperand x86memop_i,
                       ValueType f_vt, ValueType i_vt,
                       X86FoldableSchedWrite sched,
                       X86FoldableSchedWrite varsched> {
@@ -7329,8 +7627,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
                (ins RC:$src1, x86memop_i:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
-                              (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
-               Sched<[varsched.Folded, ReadAfterLd]>;
+                              (i_vt (load addr:$src2)))))]>, VEX_4V,
+               Sched<[varsched.Folded, sched.ReadAfterFold]>;
 
     def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, u8imm:$src2),
@@ -7348,18 +7646,18 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
 
 let ExeDomain = SSEPackedSingle in {
   defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
-                               loadv2i64, v4f32, v4i32, SchedWriteFShuffle.XMM,
+                               v4f32, v4i32, SchedWriteFShuffle.XMM,
                                SchedWriteFVarShuffle.XMM>;
   defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
-                               loadv4i64, v8f32, v8i32, SchedWriteFShuffle.YMM,
+                               v8f32, v8i32, SchedWriteFShuffle.YMM,
                                SchedWriteFVarShuffle.YMM>, VEX_L;
 }
 let ExeDomain = SSEPackedDouble in {
   defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
-                               loadv2i64, v2f64, v2i64, SchedWriteFShuffle.XMM,
+                               v2f64, v2i64, SchedWriteFShuffle.XMM,
                                SchedWriteFVarShuffle.XMM>;
   defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
-                               loadv4i64, v4f64, v4i64, SchedWriteFShuffle.YMM,
+                               v4f64, v4i64, SchedWriteFShuffle.YMM,
                                SchedWriteFVarShuffle.YMM>, VEX_L;
 }
 
@@ -7380,7 +7678,7 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
                              (i8 imm:$src3)))]>, VEX_4V, VEX_L,
-          Sched<[WriteFShuffle256Ld, ReadAfterLd]>;
+          Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
 }
 
 // Immediate transform to help with commuting.
@@ -7440,8 +7738,7 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
   let hasSideEffects = 0, mayLoad = 1 in
   def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
              "vcvtph2ps\t{$src, $dst|$dst, $src}",
-             [(set RC:$dst, (X86cvtph2ps (bc_v8i16
-                                          (loadv2i64 addr:$src))))]>,
+             [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>,
              T8PD, VEX, Sched<[sched.Folded]>;
 }
 
@@ -7515,7 +7812,7 @@ let Predicates = [HasF16C, NoVLX] in {
 /// AVX2_blend_rmi - AVX2 blend with 8-bit immediate
 multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           ValueType OpVT, X86FoldableSchedWrite sched,
-                          RegisterClass RC, PatFrag memop_frag,
+                          RegisterClass RC,
                           X86MemOperand x86memop, SDNodeXForm commuteXForm> {
   let isCommutable = 1 in
   def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
@@ -7529,22 +7826,20 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
         !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
         [(set RC:$dst,
-          (OpVT (OpNode RC:$src1,
-           (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
-        Sched<[sched.Folded, ReadAfterLd]>, VEX_4V;
+          (OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
+        Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
 
   // Pattern to commute if load is in first source.
-  def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)),
-                          RC:$src1, imm:$src3)),
+  def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
             (!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
                                             (commuteXForm imm:$src3))>;
 }
 
 defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
-                               SchedWriteBlend.XMM, VR128, loadv2i64, i128mem,
+                               SchedWriteBlend.XMM, VR128, i128mem,
                                BlendCommuteImm4>;
 defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
-                                SchedWriteBlend.YMM, VR256, loadv4i64, i256mem,
+                                SchedWriteBlend.YMM, VR256, i256mem,
                                 BlendCommuteImm8>, VEX_L;
 
 // For insertion into the zero index (low half) of a 256-bit vector, it is
@@ -7743,6 +8038,8 @@ let Predicates = [HasAVX, NoVLX] in {
             (VMOVDDUPrr VR128:$src)>;
   def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
             (VMOVDDUPrm addr:$src)>;
+  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+            (VMOVDDUPrm addr:$src)>;
 }
 
 let Predicates = [HasAVX1Only] in {
@@ -7778,7 +8075,7 @@ let Predicates = [HasAVX1Only] in {
 // VPERM - Permute instructions
 //
 
-multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+multiclass avx2_perm<bits<8> opc, string OpcodeStr,
                      ValueType OpVT, X86FoldableSchedWrite Sched,
                      X86MemOperand memOp> {
   let Predicates = [HasAVX2, NoVLX] in {
@@ -7795,16 +8092,14 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR256:$dst,
                        (OpVT (X86VPermv VR256:$src1,
-                              (bitconvert (mem_frag addr:$src2)))))]>,
-                     Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
+                              (load addr:$src2))))]>,
+                     Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L;
   }
 }
 
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256,
-                        i256mem>;
+defm VPERMD : avx2_perm<0x36, "vpermd", v8i32, WriteVarShuffle256, i256mem>;
 let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256,
-                        f256mem>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", v8f32, WriteFVarShuffle256, f256mem>;
 
 multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
                          ValueType OpVT, X86FoldableSchedWrite Sched,
@@ -7824,7 +8119,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
                        [(set VR256:$dst,
                          (OpVT (X86VPermi (mem_frag addr:$src1),
                                 (i8 imm:$src2))))]>,
-                       Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
+                       Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
   }
 }
 
@@ -7849,7 +8144,7 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
           "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
                              (i8 imm:$src3)))]>,
-          Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+          Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
 
 let Predicates = [HasAVX2] in
 def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
@@ -7869,14 +8164,14 @@ let mayLoad = 1 in
 def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, i128mem:$src2, u8imm:$src3),
           "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+          []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
   defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64,  loadv2i64>;
-  defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32,  loadv2i64>;
-  defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv2i64>;
-  defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8,  loadv2i64>;
+  defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32,  loadv4i32>;
+  defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
+  defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8,  loadv16i8>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7941,7 +8236,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
 multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
                           ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
     // masked store
-    def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)),
+    def: Pat<(X86mstore (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
              (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
     // masked load
     def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)),
@@ -8035,8 +8330,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set VR128:$dst,
                (vt128 (OpNode VR128:$src1,
-                       (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
-             VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, ReadAfterLd]>;
+                       (vt128 (load addr:$src2)))))]>,
+             VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded,
+                            SchedWriteVarVecShift.XMM.ReadAfterFold]>;
   def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
              (ins VR256:$src1, VR256:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -8048,8 +8344,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set VR256:$dst,
                (vt256 (OpNode VR256:$src1,
-                       (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
-             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, ReadAfterLd]>;
+                       (vt256 (load addr:$src2)))))]>,
+             VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
+                                   SchedWriteVarVecShift.YMM.ReadAfterFold]>;
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
@@ -8061,13 +8358,11 @@ let Predicates = [HasAVX2, NoVLX] in {
 
   def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
             (VPSRAVDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86vsrav VR128:$src1,
-                    (bitconvert (loadv2i64 addr:$src2)))),
+  def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
             (VPSRAVDrm VR128:$src1, addr:$src2)>;
   def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
             (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86vsrav VR256:$src1,
-                    (bitconvert (loadv4i64 addr:$src2)))),
+  def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
             (VPSRAVDYrm VR256:$src1, addr:$src2)>;
 }
 
@@ -8132,51 +8427,6 @@ let Predicates = [UseAVX2] in {
 }
 
 //===----------------------------------------------------------------------===//
-// Extra selection patterns for f128, f128mem
-
-// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
-def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-def : Pat<(store (f128 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 VR128:$src), VR128))>;
-
-def : Pat<(alignedloadf128 addr:$src),
-          (COPY_TO_REGCLASS (MOVAPSrm addr:$src), VR128)>;
-def : Pat<(loadf128 addr:$src),
-          (COPY_TO_REGCLASS (MOVUPSrm addr:$src), VR128)>;
-
-// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
-def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
-          (COPY_TO_REGCLASS
-           (ANDPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
-           VR128)>;
-
-def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
-          (COPY_TO_REGCLASS
-           (ANDPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
-                    (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
-          (COPY_TO_REGCLASS
-           (ORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
-           VR128)>;
-
-def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
-          (COPY_TO_REGCLASS
-           (ORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
-                   (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
-          (COPY_TO_REGCLASS
-           (XORPSrm (COPY_TO_REGCLASS VR128:$src1, VR128), f128mem:$src2),
-           VR128)>;
-
-def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
-          (COPY_TO_REGCLASS
-           (XORPSrr (COPY_TO_REGCLASS VR128:$src1, VR128),
-                    (COPY_TO_REGCLASS VR128:$src2, VR128)), VR128)>;
-
-//===----------------------------------------------------------------------===//
 // GFNI instructions
 //===----------------------------------------------------------------------===//
 
@@ -8194,8 +8444,8 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
 
     def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "",
                  [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1,
-                                 (bitconvert (MemOpFrag addr:$src2)))))]>,
-             Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>, T8PD;
+                                 (MemOpFrag addr:$src2))))]>,
+             Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD;
   }
 }
 
@@ -8212,9 +8462,9 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
   def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
               (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
               [(set RC:$dst, (OpVT (OpNode RC:$src1,
-                                    (bitconvert (MemOpFrag addr:$src2)),
+                                    (MemOpFrag addr:$src2),
                               imm:$src3)))], SSEPackedInt>,
-              Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>;
+              Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
   }
 }
 
@@ -8222,24 +8472,24 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
   let Constraints = "$src1 = $dst",
       Predicates  = [HasGFNI, UseSSE2] in
   defm NAME         : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
-                                      VR128, loadv2i64, i128mem, 1>;
+                                      VR128, load, i128mem, 1>;
   let Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
     defm V##NAME    : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128,
-                                      loadv2i64, i128mem>, VEX_4V, VEX_W;
+                                      load, i128mem>, VEX_4V, VEX_W;
     defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256,
-                                      loadv4i64, i256mem>, VEX_4V, VEX_L, VEX_W;
+                                      load, i256mem>, VEX_4V, VEX_L, VEX_W;
   }
 }
 
 // GF2P8MULB
 let Constraints = "$src1 = $dst",
     Predicates  = [HasGFNI, UseSSE2] in
-defm GF2P8MULB      : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memopv2i64,
+defm GF2P8MULB      : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memop,
                                     i128mem, 1>;
 let Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
-  defm VGF2P8MULB   : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, loadv2i64,
+  defm VGF2P8MULB   : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, load,
                                    i128mem>, VEX_4V;
-  defm VGF2P8MULBY  : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, loadv4i64,
+  defm VGF2P8MULBY  : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, load,
                                    i256mem>, VEX_4V, VEX_L;
 }
 // GF2P8AFFINEINVQB, GF2P8AFFINEQB
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 023137634df1..7cd63a6dd820 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -16,7 +16,7 @@
 let Defs = [EFLAGS] in {
 
 let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shl{b}\t{%cl, $dst|$dst, cl}",
                  [(set GR8:$dst, (shl GR8:$src1, CL))]>;
@@ -29,7 +29,7 @@ def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
 def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
                   "shl{q}\t{%cl, $dst|$dst, cl}",
                   [(set GR64:$dst, (shl GR64:$src1, CL))]>;
-} // Uses = [CL]
+} // Uses = [CL], SchedRW
 
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
@@ -64,11 +64,9 @@ def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
 } // hasSideEffects = 0
 } // Constraints = "$src = $dst", SchedRW
 
-
-let SchedRW = [WriteShiftLd, WriteRMW] in {
 // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
 // using CL?
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
 def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
                  "shl{b}\t{%cl, $dst|$dst, cl}",
                  [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -85,6 +83,8 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
                   [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>,
                   Requires<[In64BitMode]>;
 }
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "shl{b}\t{$src, $dst|$dst, $src}",
                 [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -120,7 +120,7 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
 } // SchedRW
 
 let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shr{b}\t{%cl, $dst|$dst, cl}",
                  [(set GR8:$dst, (srl GR8:$src1, CL))]>;
@@ -166,8 +166,7 @@ def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
 } // Constraints = "$src = $dst", SchedRW
 
 
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
 def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
                  "shr{b}\t{%cl, $dst|$dst, cl}",
                  [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -184,6 +183,8 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
                   [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>,
                   Requires<[In64BitMode]>;
 }
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "shr{b}\t{$src, $dst|$dst, $src}",
                 [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -219,7 +220,7 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
 } // SchedRW
 
 let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "sar{b}\t{%cl, $dst|$dst, cl}",
                  [(set GR8:$dst, (sra GR8:$src1, CL))]>;
@@ -268,8 +269,7 @@ def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
 } // Constraints = "$src = $dst", SchedRW
 
 
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in {
 def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
                  "sar{b}\t{%cl, $dst|$dst, cl}",
                  [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -286,6 +286,8 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
                  [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>,
                  Requires<[In64BitMode]>;
 }
+
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "sar{b}\t{$src, $dst|$dst, $src}",
                 [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -325,9 +327,9 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 0 in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
 
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
 def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
                 "rcl{b}\t{%cl, $dst|$dst, cl}", []>;
 def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
@@ -357,7 +359,7 @@ def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
                    "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
 } // Uses = [EFLAGS]
 
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
 def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
                 "rcr{b}\t{%cl, $dst|$dst, cl}", []>;
 def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
@@ -389,7 +391,7 @@ def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
 
 } // Constraints = "$src = $dst"
 
-let SchedRW = [WriteShiftLd, WriteRMW], mayStore = 1 in {
+let SchedRW = [WriteRotateLd, WriteRMW], mayStore = 1 in {
 let Uses = [EFLAGS] in {
 def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
                "rcl{b}\t$dst", []>;
@@ -428,7 +430,7 @@ def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
                    Requires<[In64BitMode]>;
 } // Uses = [EFLAGS]
 
-let Uses = [CL, EFLAGS] in {
+let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCLLd, WriteRMW] in {
 def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
                 "rcl{b}\t{%cl, $dst|$dst, cl}", []>;
 def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
@@ -452,9 +454,9 @@ def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
 } // SchedRW
 } // hasSideEffects = 0
 
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
 // FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "rol{b}\t{%cl, $dst|$dst, cl}",
                  [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
@@ -498,8 +500,7 @@ def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
 } // Constraints = "$src = $dst", SchedRW
 
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
 def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
                  "rol{b}\t{%cl, $dst|$dst, cl}",
                  [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -514,6 +515,8 @@ def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
                    [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>,
                    Requires<[In64BitMode]>;
 }
+
+let SchedRW = [WriteRotateLd, WriteRMW] in {
 def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
                    "rol{b}\t{$src1, $dst|$dst, $src1}",
                [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
@@ -548,8 +551,8 @@ def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
                  Requires<[In64BitMode]>;
 } // SchedRW
 
-let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
-let Uses = [CL] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in {
+let Uses = [CL], SchedRW = [WriteRotateCL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t{%cl, $dst|$dst, cl}",
                  [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
@@ -595,8 +598,7 @@ def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
 } // Constraints = "$src = $dst", SchedRW
 
-let SchedRW = [WriteShiftLd, WriteRMW] in {
-let Uses = [CL] in {
+let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
 def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t{%cl, $dst|$dst, cl}",
                  [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
@@ -611,6 +613,8 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
                   [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>,
                   Requires<[In64BitMode]>;
 }
+
+let SchedRW = [WriteRotateLd, WriteRMW] in {
 def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "ror{b}\t{$src, $dst|$dst, $src}",
                    [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -822,6 +826,8 @@ def ROT64L2R_imm8  : SDNodeXForm<imm, [{
   return getI8Imm(64 - N->getZExtValue(), SDLoc(N));
 }]>;
 
+// NOTE: We use WriteShift for these rotates as they avoid the stalls
+// of many of the older x86 rotate instructions.
 multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let hasSideEffects = 0 in {
   def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
@@ -844,12 +850,12 @@ let hasSideEffects = 0 in {
   def rm : I<0xF7, MRMSrcMem4VOp3,
              (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX, Sched<[WriteShiftLd,
+             VEX, Sched<[WriteShift.Folded,
                          // x86memop:$src1
                          ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                          ReadDefault,
                          // RC:$src2
-                         ReadAfterLd]>;
+                         WriteShift.ReadAfterFold]>;
 }
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 322bdb74e2de..c417dc99b84d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -13,126 +13,42 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// No op bitconverts
-//===----------------------------------------------------------------------===//
-
-// Bitcasts between 128-bit vector types. Return the original type since
-// no instruction is needed for the conversion
-def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
-
-// Bitcasts between 256-bit vector types. Return the original type since
-// no instruction is needed for the conversion
-def : Pat<(v4i64  (bitconvert (v8i32  VR256:$src))), (v4i64  VR256:$src)>;
-def : Pat<(v4i64  (bitconvert (v16i16 VR256:$src))), (v4i64  VR256:$src)>;
-def : Pat<(v4i64  (bitconvert (v32i8  VR256:$src))), (v4i64  VR256:$src)>;
-def : Pat<(v4i64  (bitconvert (v8f32  VR256:$src))), (v4i64  VR256:$src)>;
-def : Pat<(v4i64  (bitconvert (v4f64  VR256:$src))), (v4i64  VR256:$src)>;
-def : Pat<(v8i32  (bitconvert (v4i64  VR256:$src))), (v8i32  VR256:$src)>;
-def : Pat<(v8i32  (bitconvert (v16i16 VR256:$src))), (v8i32  VR256:$src)>;
-def : Pat<(v8i32  (bitconvert (v32i8  VR256:$src))), (v8i32  VR256:$src)>;
-def : Pat<(v8i32  (bitconvert (v4f64  VR256:$src))), (v8i32  VR256:$src)>;
-def : Pat<(v8i32  (bitconvert (v8f32  VR256:$src))), (v8i32  VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v4i64  VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v8i32  VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v32i8  VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v4f64  VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v16i16 (bitconvert (v8f32  VR256:$src))), (v16i16 VR256:$src)>;
-def : Pat<(v32i8  (bitconvert (v4i64  VR256:$src))), (v32i8  VR256:$src)>;
-def : Pat<(v32i8  (bitconvert (v8i32  VR256:$src))), (v32i8  VR256:$src)>;
-def : Pat<(v32i8  (bitconvert (v16i16 VR256:$src))), (v32i8  VR256:$src)>;
-def : Pat<(v32i8  (bitconvert (v4f64  VR256:$src))), (v32i8  VR256:$src)>;
-def : Pat<(v32i8  (bitconvert (v8f32  VR256:$src))), (v32i8  VR256:$src)>;
-def : Pat<(v8f32  (bitconvert (v4i64  VR256:$src))), (v8f32  VR256:$src)>;
-def : Pat<(v8f32  (bitconvert (v8i32  VR256:$src))), (v8f32  VR256:$src)>;
-def : Pat<(v8f32  (bitconvert (v16i16 VR256:$src))), (v8f32  VR256:$src)>;
-def : Pat<(v8f32  (bitconvert (v32i8  VR256:$src))), (v8f32  VR256:$src)>;
-def : Pat<(v8f32  (bitconvert (v4f64  VR256:$src))), (v8f32  VR256:$src)>;
-def : Pat<(v4f64  (bitconvert (v4i64  VR256:$src))), (v4f64  VR256:$src)>;
-def : Pat<(v4f64  (bitconvert (v8i32  VR256:$src))), (v4f64  VR256:$src)>;
-def : Pat<(v4f64  (bitconvert (v16i16 VR256:$src))), (v4f64  VR256:$src)>;
-def : Pat<(v4f64  (bitconvert (v32i8  VR256:$src))), (v4f64  VR256:$src)>;
-def : Pat<(v4f64  (bitconvert (v8f32  VR256:$src))), (v4f64  VR256:$src)>;
-
-// Bitcasts between 512-bit vector types. Return the original type since
-// no instruction is needed for the conversion.
-def : Pat<(v8f64  (bitconvert (v8i64  VR512:$src))), (v8f64  VR512:$src)>;
-def : Pat<(v8f64  (bitconvert (v16i32 VR512:$src))), (v8f64  VR512:$src)>;
-def : Pat<(v8f64  (bitconvert (v32i16 VR512:$src))), (v8f64  VR512:$src)>;
-def : Pat<(v8f64  (bitconvert (v64i8  VR512:$src))), (v8f64  VR512:$src)>;
-def : Pat<(v8f64  (bitconvert (v16f32 VR512:$src))), (v8f64  VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v8i64  VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v64i8  VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v16f32 (bitconvert (v8f64  VR512:$src))), (v16f32 VR512:$src)>;
-def : Pat<(v8i64  (bitconvert (v16i32 VR512:$src))), (v8i64  VR512:$src)>;
-def : Pat<(v8i64  (bitconvert (v32i16 VR512:$src))), (v8i64  VR512:$src)>;
-def : Pat<(v8i64  (bitconvert (v64i8  VR512:$src))), (v8i64  VR512:$src)>;
-def : Pat<(v8i64  (bitconvert (v8f64  VR512:$src))), (v8i64  VR512:$src)>;
-def : Pat<(v8i64  (bitconvert (v16f32 VR512:$src))), (v8i64  VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v8i64  VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v64i8  VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v16i32 (bitconvert (v8f64  VR512:$src))), (v16i32 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v8i64  VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v64i8  VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v8f64  VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
-def : Pat<(v64i8  (bitconvert (v8i64  VR512:$src))), (v64i8  VR512:$src)>;
-def : Pat<(v64i8  (bitconvert (v16i32 VR512:$src))), (v64i8  VR512:$src)>;
-def : Pat<(v64i8  (bitconvert (v32i16 VR512:$src))), (v64i8  VR512:$src)>;
-def : Pat<(v64i8  (bitconvert (v8f64  VR512:$src))), (v64i8  VR512:$src)>;
-def : Pat<(v64i8  (bitconvert (v16f32 VR512:$src))), (v64i8  VR512:$src)>;
-
-
-//===----------------------------------------------------------------------===//
 //  Non-instruction patterns
 //===----------------------------------------------------------------------===//
 
-// A vector extract of the first f32/f64 position is a subregister copy
-def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
-          (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
-def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
-          (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+let Predicates = [NoAVX512] in {
+  // A vector extract of the first f32/f64 position is a subregister copy
+  def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
+            (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
+  def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
+            (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+}
+
+let Predicates = [HasAVX512] in {
+  // A vector extract of the first f32/f64 position is a subregister copy
+  def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
+            (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
+  def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
+            (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X)>;
+}
 
-// Implicitly promote a 32-bit scalar to a vector.
-def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
-          (COPY_TO_REGCLASS FR32:$src, VR128)>;
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
-          (COPY_TO_REGCLASS FR64:$src, VR128)>;
+let Predicates = [NoVLX] in {
+  // Implicitly promote a 32-bit scalar to a vector.
+  def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+            (COPY_TO_REGCLASS FR32:$src, VR128)>;
+  // Implicitly promote a 64-bit scalar to a vector.
+  def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+            (COPY_TO_REGCLASS FR64:$src, VR128)>;
+}
 
+let Predicates = [HasVLX] in {
+  // Implicitly promote a 32-bit scalar to a vector.
+  def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
+            (COPY_TO_REGCLASS FR32X:$src, VR128X)>;
+  // Implicitly promote a 64-bit scalar to a vector.
+  def : Pat<(v2f64 (scalar_to_vector FR64X:$src)),
+            (COPY_TO_REGCLASS FR64X:$src, VR128X)>;
+}
 
 //===----------------------------------------------------------------------===//
 // Subvector tricks
@@ -509,3 +425,85 @@ let Predicates = [HasBWI, HasVLX] in {
             (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
                                     (i8 60)), (i8 60))>;
 }
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+let Predicates = [NoAVX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+          (MOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+          (MOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+          (MOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+          (MOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(alignedstore (f128 VR128:$src), addr:$dst),
+          (VMOVAPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f128 VR128:$src), addr:$dst),
+          (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+          (VMOVAPSrm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+          (VMOVUPSrm addr:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+def : Pat<(alignedstore (f128 VR128X:$src), addr:$dst),
+          (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
+def : Pat<(store (f128 VR128X:$src), addr:$dst),
+          (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
+
+def : Pat<(alignedloadf128 addr:$src),
+          (VMOVAPSZ128rm addr:$src)>;
+def : Pat<(loadf128 addr:$src),
+          (VMOVUPSZ128rm addr:$src)>;
+}
+
+let Predicates = [UseSSE1] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (memopf128 addr:$src2))),
+          (ANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+          (ANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (memopf128 addr:$src2))),
+          (ORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+          (ORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (memopf128 addr:$src2))),
+          (XORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+          (XORPSrr VR128:$src1, VR128:$src2)>;
+}
+
+let Predicates = [HasAVX] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
+          (VANDPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128:$src1, VR128:$src2)),
+          (VANDPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, (loadf128 addr:$src2))),
+          (VORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128:$src1, VR128:$src2)),
+          (VORPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
+          (VXORPSrm VR128:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
+          (VXORPSrr VR128:$src1, VR128:$src2)>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index ff3e3be48a24..9d810a675e3b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -11,32 +11,32 @@
 //
 //===----------------------------------------------------------------------===//
 
-multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
   def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
            [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
   def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
-           Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
+           [(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
+           Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>;
 }
 
 let ExeDomain = SSEPackedInt in {
-  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
-  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
-  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
-  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
-  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
-  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
-  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
-  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
-  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
-  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
-  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
-  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
-  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
-  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
-  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
+  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd>;
+  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq>;
+  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw>;
+  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq>;
+  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd>;
+  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq>;
+  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd>;
+  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq>;
+  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw>;
+  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq>;
+  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd>;
+  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq>;
+  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw>;
+  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq>;
+  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd>;
 }
 
 // Scalar load 2 addr operand instructions
@@ -48,47 +48,47 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
            [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
   def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           [(set VR128:$dst, (Int mem_cpat:$src))]>, XOP,
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
-                     PatFrag memop, X86FoldableSchedWrite sched> {
+                     X86FoldableSchedWrite sched> {
   def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
            [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
   def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           [(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
-                     PatFrag memop, X86FoldableSchedWrite sched> {
+                     X86FoldableSchedWrite sched> {
   def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
            [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
   def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
-           Sched<[sched.Folded, ReadAfterLd]>;
+           [(set VR256:$dst, (Int (load addr:$src)))]>, XOP, VEX_L,
+           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let ExeDomain = SSEPackedSingle in {
   defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
                            ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
-  defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
+  defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps,
                            SchedWriteFRnd.XMM>;
-  defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
+  defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
                            SchedWriteFRnd.YMM>;
 }
 
 let ExeDomain = SSEPackedDouble in {
   defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
                            sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
-  defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
+  defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd,
                            SchedWriteFRnd.XMM>;
-  defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
+  defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
                            SchedWriteFRnd.YMM>;
 }
 
@@ -105,15 +105,15 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
               (vt128 (OpNode (vt128 VR128:$src1),
-                             (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
-           XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
+                             (vt128 (load addr:$src2)))))]>,
+           XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
            (ins i128mem:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+              (vt128 (OpNode (vt128 (load addr:$src1)),
                              (vt128 VR128:$src2))))]>,
-             XOP, Sched<[sched.Folded, ReadAfterLd]>;
+             XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
@@ -150,8 +150,8 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
            (ins i128mem:$src1, u8imm:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
-           XOP, Sched<[sched.Folded, ReadAfterLd]>;
+              (vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
+           XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let ExeDomain = SSEPackedInt in {
@@ -181,8 +181,8 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
-              (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
-              VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+              (Int VR128:$src1, (load addr:$src2),
+              VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 let ExeDomain = SSEPackedInt in {
@@ -260,9 +260,9 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set VR128:$dst,
                 (vt128 (OpNode (vt128 VR128:$src1),
-                               (vt128 (bitconvert (loadv2i64 addr:$src2))),
+                               (vt128 (load addr:$src2)),
                                 imm:$cc)))]>,
-             XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+             XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
     let isAsmParserOnly = 1, hasSideEffects = 0 in {
       def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src1, VR128:$src2, u8imm:$src3),
@@ -274,12 +274,12 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
                    (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
                    !strconcat("vpcom", Suffix,
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+                   []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>,
                    NotMemoryFoldable;
     }
   }
 
-  def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
+  def : Pat<(OpNode (load addr:$src2),
                     (vt128 VR128:$src1), imm:$cc),
             (!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
                                            (CommuteVPCOMCC imm:$cc))>;
@@ -310,21 +310,21 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
               (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
-                             (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
-            XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+                             (vt128 (load addr:$src3)))))]>,
+            XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
   def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
             (ins VR128:$src1, i128mem:$src2, VR128:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
-              (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
+              (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)),
                              (vt128 VR128:$src3))))]>,
-            XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+            XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
                            // 128mem:$src2
                            ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                            ReadDefault,
                            // VR128:$src3
-                           ReadAfterLd]>;
+                           sched.ReadAfterFold]>;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
@@ -350,25 +350,26 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
             [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
                                    (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
             Sched<[sched]>;
+  // FIXME: This pattern can't match.
   def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
             (ins RC:$src1, RC:$src2, x86memop:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
                                    (X86andnp (load addr:$src3), RC:$src2))))]>,
-            XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+            XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
   def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
             (ins RC:$src1, x86memop:$src2, RC:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
                                    (X86andnp RC:$src3, (load addr:$src2)))))]>,
-            XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+            XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold,
                            // x86memop:$src2
                            ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                            ReadDefault,
                            // RC::$src3
-                           ReadAfterLd]>;
+                           sched.ReadAfterFold]>;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
@@ -385,6 +386,48 @@ let ExeDomain = SSEPackedInt in {
                             SchedWriteShuffle.YMM>, VEX_L;
 }
 
+let Predicates = [HasXOP] in {
+  def : Pat<(v16i8 (or (and VR128:$src3, VR128:$src1),
+                   (X86andnp VR128:$src3, VR128:$src2))),
+            (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v8i16 (or (and VR128:$src3, VR128:$src1),
+                   (X86andnp VR128:$src3, VR128:$src2))),
+            (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v4i32 (or (and VR128:$src3, VR128:$src1),
+                   (X86andnp VR128:$src3, VR128:$src2))),
+            (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+  def : Pat<(or (and VR128:$src3, VR128:$src1),
+                (X86andnp VR128:$src3, (bc_v16i8 (loadv2i64 addr:$src2)))),
+            (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+  def : Pat<(or (and VR128:$src3, VR128:$src1),
+                (X86andnp VR128:$src3, (bc_v8i16 (loadv2i64 addr:$src2)))),
+            (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+  def : Pat<(or (and VR128:$src3, VR128:$src1),
+                (X86andnp VR128:$src3, (bc_v4i32 (loadv2i64 addr:$src2)))),
+            (VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+  def : Pat<(v32i8 (or (and VR256:$src3, VR256:$src1),
+                   (X86andnp VR256:$src3, VR256:$src2))),
+            (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(v16i16 (or (and VR256:$src3, VR256:$src1),
+                    (X86andnp VR256:$src3, VR256:$src2))),
+            (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(v8i32 (or (and VR256:$src3, VR256:$src1),
+                   (X86andnp VR256:$src3, VR256:$src2))),
+            (VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+
+  def : Pat<(or (and VR256:$src3, VR256:$src1),
+                (X86andnp VR256:$src3, (bc_v32i8 (loadv4i64 addr:$src2)))),
+            (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+  def : Pat<(or (and VR256:$src3, VR256:$src1),
+                (X86andnp VR256:$src3, (bc_v16i16 (loadv4i64 addr:$src2)))),
+            (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+  def : Pat<(or (and VR256:$src3, VR256:$src1),
+                (X86andnp VR256:$src3, (bc_v8i32 (loadv4i64 addr:$src2)))),
+            (VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
+}
+
 multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
                         X86MemOperand intmemop, X86MemOperand fpmemop,
                         ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
@@ -401,10 +444,9 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set RC:$dst,
-          (VT (X86vpermil2 RC:$src1, RC:$src2,
-                           (bitconvert (IntLdFrag addr:$src3)),
+          (VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
                            (i8 imm:$src4))))]>, VEX_W,
-        Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+        Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
   def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
         (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
@@ -412,11 +454,11 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
         [(set RC:$dst,
           (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
                            RC:$src3, (i8 imm:$src4))))]>,
-        Sched<[sched.Folded, ReadAfterLd,
+        Sched<[sched.Folded, sched.ReadAfterFold,
                // fpmemop:$src2
                ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
                // RC:$src3
-               ReadAfterLd]>;
+               sched.ReadAfterFold]>;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
@@ -437,10 +479,10 @@ let ExeDomain = SSEPackedDouble in {
 
 let ExeDomain = SSEPackedSingle in {
   defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
-                                 v4f32, loadv4f32, loadv2i64,
+                                 v4f32, loadv4f32, loadv4i32,
                                  SchedWriteFVarShuffle.XMM>;
   defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
-                                  v8f32, loadv8f32, loadv4i64,
+                                  v8f32, loadv8f32, loadv8i32,
                                   SchedWriteFVarShuffle.YMM>, VEX_L;
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 36d36cb11d72..c20336387b2d 100644
--- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -89,6 +89,8 @@ private:
                     MachineFunction &MF) const;
   bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
                  MachineFunction &MF) const;
+  bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
+                  MachineFunction &MF) const;
   bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
                    MachineFunction &MF) const;
   bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -114,8 +116,10 @@ private:
   bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectShift(MachineInstr &I, MachineRegisterInfo &MRI,
                    MachineFunction &MF) const;
-  bool selectSDiv(MachineInstr &I, MachineRegisterInfo &MRI,
-                   MachineFunction &MF) const;
+  bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
+                    MachineFunction &MF) const;
+  bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
+                                   MachineFunction &MF) const;
 
   // emit insert subreg instruction and insert it before MachineInstr &I
   bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
@@ -362,11 +366,14 @@ bool X86InstructionSelector::select(MachineInstr &I,
     return selectAnyext(I, MRI, MF);
   case TargetOpcode::G_ICMP:
     return selectCmp(I, MRI, MF);
+  case TargetOpcode::G_FCMP:
+    return selectFCmp(I, MRI, MF);
   case TargetOpcode::G_UADDE:
     return selectUadde(I, MRI, MF);
   case TargetOpcode::G_UNMERGE_VALUES:
     return selectUnmergeValues(I, MRI, MF, CoverageInfo);
   case TargetOpcode::G_MERGE_VALUES:
+  case TargetOpcode::G_CONCAT_VECTORS:
     return selectMergeValues(I, MRI, MF, CoverageInfo);
   case TargetOpcode::G_EXTRACT:
     return selectExtract(I, MRI, MF);
@@ -382,7 +389,12 @@ bool X86InstructionSelector::select(MachineInstr &I,
   case TargetOpcode::G_LSHR:
     return selectShift(I, MRI, MF);
   case TargetOpcode::G_SDIV:
-    return selectSDiv(I, MRI, MF);
+  case TargetOpcode::G_UDIV:
+  case TargetOpcode::G_SREM:
+  case TargetOpcode::G_UREM:
+    return selectDivRem(I, MRI, MF);
+  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+    return selectIntrinsicWSideEffects(I, MRI, MF);
   }
 
   return false;
@@ -967,6 +979,98 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
   return true;
 }
 
+bool X86InstructionSelector::selectFCmp(MachineInstr &I,
+                                        MachineRegisterInfo &MRI,
+                                        MachineFunction &MF) const {
+  assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
+
+  unsigned LhsReg = I.getOperand(2).getReg();
+  unsigned RhsReg = I.getOperand(3).getReg();
+  CmpInst::Predicate Predicate =
+      (CmpInst::Predicate)I.getOperand(1).getPredicate();
+
+  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+  static const uint16_t SETFOpcTable[2][3] = {
+      {X86::SETEr, X86::SETNPr, X86::AND8rr},
+      {X86::SETNEr, X86::SETPr, X86::OR8rr}};
+  const uint16_t *SETFOpc = nullptr;
+  switch (Predicate) {
+  default:
+    break;
+  case CmpInst::FCMP_OEQ:
+    SETFOpc = &SETFOpcTable[0][0];
+    break;
+  case CmpInst::FCMP_UNE:
+    SETFOpc = &SETFOpcTable[1][0];
+    break;
+  }
+
+  // Compute the opcode for the CMP instruction.
+  unsigned OpCmp;
+  LLT Ty = MRI.getType(LhsReg);
+  switch (Ty.getSizeInBits()) {
+  default:
+    return false;
+  case 32:
+    OpCmp = X86::UCOMISSrr;
+    break;
+  case 64:
+    OpCmp = X86::UCOMISDrr;
+    break;
+  }
+
+  unsigned ResultReg = I.getOperand(0).getReg();
+  RBI.constrainGenericRegister(
+      ResultReg,
+      *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
+  if (SETFOpc) {
+    MachineInstr &CmpInst =
+        *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
+             .addReg(LhsReg)
+             .addReg(RhsReg);
+
+    unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
+    unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
+    MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                  TII.get(SETFOpc[0]), FlagReg1);
+    MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                  TII.get(SETFOpc[1]), FlagReg2);
+    MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                                  TII.get(SETFOpc[2]), ResultReg)
+                              .addReg(FlagReg1)
+                              .addReg(FlagReg2);
+    constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
+
+    I.eraseFromParent();
+    return true;
+  }
+
+  X86::CondCode CC;
+  bool SwapArgs;
+  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
+  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+  unsigned Opc = X86::getSETFromCond(CC);
+
+  if (SwapArgs)
+    std::swap(LhsReg, RhsReg);
+
+  // Emit a compare of LHS/RHS.
+  MachineInstr &CmpInst =
+      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
+           .addReg(LhsReg)
+           .addReg(RhsReg);
+
+  MachineInstr &Set =
+      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc), ResultReg);
+  constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
+  constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
 bool X86InstructionSelector::selectUadde(MachineInstr &I,
                                          MachineRegisterInfo &MRI,
                                          MachineFunction &MF) const {
@@ -1246,7 +1350,8 @@ bool X86InstructionSelector::selectUnmergeValues(
 bool X86InstructionSelector::selectMergeValues(
     MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF,
     CodeGenCoverage &CoverageInfo) const {
-  assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES) &&
+  assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
+          I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
          "unexpected instruction");
 
   // Split to inserts.
@@ -1485,23 +1590,33 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
   return true;
 }
 
-bool X86InstructionSelector::selectSDiv(MachineInstr &I,
-                                        MachineRegisterInfo &MRI,
-                                        MachineFunction &MF) const {
-
-  assert(I.getOpcode() == TargetOpcode::G_SDIV && "unexpected instruction");
+bool X86InstructionSelector::selectDivRem(MachineInstr &I,
+                                          MachineRegisterInfo &MRI,
+                                          MachineFunction &MF) const {
+  // The implementation of this function is taken from X86FastISel.
+  assert((I.getOpcode() == TargetOpcode::G_SDIV ||
+          I.getOpcode() == TargetOpcode::G_SREM ||
+          I.getOpcode() == TargetOpcode::G_UDIV ||
+          I.getOpcode() == TargetOpcode::G_UREM) &&
+         "unexpected instruction");
 
   const unsigned DstReg = I.getOperand(0).getReg();
-  const unsigned DividentReg = I.getOperand(1).getReg();
-  const unsigned DiviserReg = I.getOperand(2).getReg();
+  const unsigned Op1Reg = I.getOperand(1).getReg();
+  const unsigned Op2Reg = I.getOperand(2).getReg();
 
   const LLT RegTy = MRI.getType(DstReg);
-  assert(RegTy == MRI.getType(DividentReg) &&
-         RegTy == MRI.getType(DiviserReg) &&
+  assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
          "Arguments and return value types must match");
 
   const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
+  if (RegRB.getID() != X86::GPRRegBankID)
+    return false;
 
+  const static unsigned NumTypes = 4; // i8, i16, i32, i64
+  const static unsigned NumOps = 4;   // SDiv, SRem, UDiv, URem
+  const static bool S = true;         // IsSigned
+  const static bool U = false;        // !IsSigned
+  const static unsigned Copy = TargetOpcode::COPY;
   // For the X86 IDIV instruction, in most cases the dividend
   // (numerator) must be in a specific register pair highreg:lowreg,
   // producing the quotient in lowreg and the remainder in highreg.
@@ -1510,56 +1625,182 @@ bool X86InstructionSelector::selectSDiv(MachineInstr &I,
   // exception is i8, where the dividend is defined as a single register rather
   // than a register pair, and we therefore directly sign-extend the dividend
   // into lowreg, instead of copying, and ignore the highreg.
-  const static struct SDivEntry {
+  const static struct DivRemEntry {
+    // The following portion depends only on the data type.
     unsigned SizeInBits;
-    unsigned QuotientReg;
-    unsigned DividentRegUpper;
-    unsigned DividentRegLower;
-    unsigned OpSignExtend;
-    unsigned OpCopy;
-    unsigned OpDiv;
-  } OpTable[] = {
-      {8, X86::AL, X86::NoRegister, X86::AX, 0, X86::MOVSX16rr8,
-       X86::IDIV8r}, // i8
-      {16, X86::AX, X86::DX, X86::AX, X86::CWD, TargetOpcode::COPY,
-       X86::IDIV16r}, // i16
-      {32, X86::EAX, X86::EDX, X86::EAX, X86::CDQ, TargetOpcode::COPY,
-       X86::IDIV32r}, // i32
-      {64, X86::RAX, X86::RDX, X86::RAX, X86::CQO, TargetOpcode::COPY,
-       X86::IDIV64r} // i64
+    unsigned LowInReg;  // low part of the register pair
+    unsigned HighInReg; // high part of the register pair
+    // The following portion depends on both the data type and the operation.
+    struct DivRemResult {
+      unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
+      unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
+                                // highreg, or copying a zero into highreg.
+      unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
+                                // zero/sign-extending into lowreg for i8.
+      unsigned DivRemResultReg; // Register containing the desired result.
+      bool IsOpSigned;          // Whether to use signed or unsigned form.
+    } ResultTable[NumOps];
+  } OpTable[NumTypes] = {
+      {8,
+       X86::AX,
+       0,
+       {
+           {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
+           {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
+           {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U},  // UDiv
+           {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U},  // URem
+       }},                                                // i8
+      {16,
+       X86::AX,
+       X86::DX,
+       {
+           {X86::IDIV16r, X86::CWD, Copy, X86::AX, S},    // SDiv
+           {X86::IDIV16r, X86::CWD, Copy, X86::DX, S},    // SRem
+           {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
+           {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
+       }},                                                // i16
+      {32,
+       X86::EAX,
+       X86::EDX,
+       {
+           {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S},    // SDiv
+           {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S},    // SRem
+           {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
+           {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
+       }},                                                 // i32
+      {64,
+       X86::RAX,
+       X86::RDX,
+       {
+           {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S},    // SDiv
+           {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S},    // SRem
+           {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
+           {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
+       }},                                                 // i64
   };
 
-  if (RegRB.getID() != X86::GPRRegBankID)
+  auto OpEntryIt = std::find_if(std::begin(OpTable), std::end(OpTable),
+                                [RegTy](const DivRemEntry &El) {
+                                  return El.SizeInBits == RegTy.getSizeInBits();
+                                });
+  if (OpEntryIt == std::end(OpTable))
     return false;
 
-  auto SDivEntryIt = std::find_if(
-      std::begin(OpTable), std::end(OpTable), [RegTy](const SDivEntry &El) {
-    return El.SizeInBits == RegTy.getSizeInBits();
-      });
+  unsigned OpIndex;
+  switch (I.getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected div/rem opcode");
+  case TargetOpcode::G_SDIV:
+    OpIndex = 0;
+    break;
+  case TargetOpcode::G_SREM:
+    OpIndex = 1;
+    break;
+  case TargetOpcode::G_UDIV:
+    OpIndex = 2;
+    break;
+  case TargetOpcode::G_UREM:
+    OpIndex = 3;
+    break;
+  }
 
-  if (SDivEntryIt == std::end(OpTable))
-    return false;
+  const DivRemEntry &TypeEntry = *OpEntryIt;
+  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
 
   const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
-  if (!RBI.constrainGenericRegister(DividentReg, *RegRC, MRI) ||
-      !RBI.constrainGenericRegister(DiviserReg, *RegRC, MRI) ||
+  if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
+      !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
                       << " operand\n");
     return false;
   }
 
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SDivEntryIt->OpCopy),
-          SDivEntryIt->DividentRegLower)
-      .addReg(DividentReg);
-  if (SDivEntryIt->DividentRegUpper != X86::NoRegister)
+  // Move op1 into low-order input register.
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
+          TypeEntry.LowInReg)
+      .addReg(Op1Reg);
+  // Zero-extend or sign-extend into high-order input register.
+  if (OpEntry.OpSignExtend) {
+    if (OpEntry.IsOpSigned)
+      BuildMI(*I.getParent(), I, I.getDebugLoc(),
+              TII.get(OpEntry.OpSignExtend));
+    else {
+      unsigned Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
+      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
+              Zero32);
+
+      // Copy the zero into the appropriate sub/super/identical physical
+      // register. Unfortunately the operations needed are not uniform enough
+      // to fit neatly into the table above.
+      if (RegTy.getSizeInBits() == 16) {
+        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
+                TypeEntry.HighInReg)
+            .addReg(Zero32, 0, X86::sub_16bit);
+      } else if (RegTy.getSizeInBits() == 32) {
+        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
+                TypeEntry.HighInReg)
+            .addReg(Zero32);
+      } else if (RegTy.getSizeInBits() == 64) {
+        BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+            .addImm(0)
+            .addReg(Zero32)
+            .addImm(X86::sub_32bit);
+      }
+    }
+  }
+  // Generate the DIV/IDIV instruction.
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
+      .addReg(Op2Reg);
+  // For i8 remainder, we can't reference ah directly, as we'll end
+  // up with bogus copies like %r9b = COPY %ah. Reference ax
+  // instead to prevent ah references in a rex instruction.
+  //
+  // The current assumption of the fast register allocator is that isel
+  // won't generate explicit references to the GR8_NOREX registers. If
+  // the allocator and/or the backend get enhanced to be more robust in
+  // that regard, this can be, and should be, removed.
+  if ((I.getOpcode() == Instruction::SRem ||
+       I.getOpcode() == Instruction::URem) &&
+      OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
+    unsigned SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+    unsigned ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
+    BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
+        .addReg(X86::AX);
+
+    // Shift AX right by 8 bits instead of using AH.
+    BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
+            ResultSuperReg)
+        .addReg(SourceSuperReg)
+        .addImm(8);
+
+    // Now reference the 8-bit subreg of the result.
     BuildMI(*I.getParent(), I, I.getDebugLoc(),
-            TII.get(SDivEntryIt->OpSignExtend));
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SDivEntryIt->OpDiv))
-      .addReg(DiviserReg);
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
-          DstReg)
-      .addReg(SDivEntryIt->QuotientReg);
+            TII.get(TargetOpcode::SUBREG_TO_REG))
+        .addDef(DstReg)
+        .addImm(0)
+        .addReg(ResultSuperReg)
+        .addImm(X86::sub_8bit);
+  } else {
+    BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
+            DstReg)
+        .addReg(OpEntry.DivRemResultReg);
+  }
+  I.eraseFromParent();
+  return true;
+}
+
+bool X86InstructionSelector::selectIntrinsicWSideEffects(
+    MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
+
+  assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+         "unexpected instruction");
+
+  if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
+    return false;
+
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
 
   I.eraseFromParent();
   return true;
diff --git a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 6c7fb9c339ac..28940754a203 100644
--- a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -463,7 +463,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
 //  {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
 //  Imm variable sets the offset amount. The result of the
 //  function is stored inside ShuffleMask vector and it built as described in
-//  the begin of the description. AlignDirection is a boolean that indecat the
+//  the begin of the description. AlignDirection is a boolean that indicates the
 //  direction of the alignment. (false - align to the "right" side while true -
 //  align to the "left" side)
 static void DecodePALIGNRMask(MVT VT, unsigned Imm,
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2dd60a1b8b5a..151e1b9136c4 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -20,18 +20,18 @@
 namespace llvm {
 
 enum IntrinsicType : uint16_t {
-  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,
+  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
-  INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP_IMM8,
-  CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
-  CVTPD2PS, CVTPD2PS_MASK,
+  INTR_TYPE_3OP_IMM8,
+  CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
+  CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
   INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
   INTR_TYPE_3OP_MASK,
-  FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_SCALAR,
   IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
   INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
   COMPRESS_EXPAND_IN_REG,
+  TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
   FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
   FIXUPIMMS_MASKZ, GATHER_AVX2,
@@ -64,11 +64,6 @@ struct IntrinsicData {
  * the alphabetical order.
  */
 static const IntrinsicData IntrinsicsWithChain[] = {
-  X86_INTRINSIC_DATA(addcarry_u32,  ADX, X86ISD::ADC, 0),
-  X86_INTRINSIC_DATA(addcarry_u64,  ADX, X86ISD::ADC, 0),
-  X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
-  X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
-
   X86_INTRINSIC_DATA(avx2_gather_d_d,      GATHER_AVX2, X86::VPGATHERDDrm, 0),
   X86_INTRINSIC_DATA(avx2_gather_d_d_256,  GATHER_AVX2, X86::VPGATHERDDYrm, 0),
   X86_INTRINSIC_DATA(avx2_gather_d_pd,     GATHER_AVX2, X86::VGATHERDPDrm, 0),
@@ -120,6 +115,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
                      X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
 
+  X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -229,6 +249,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNCUS, 0),
 
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
+
   X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -270,9 +315,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
   X86_INTRINSIC_DATA(rdtsc,     RDTSC,  X86ISD::RDTSC_DAG, 0),
   X86_INTRINSIC_DATA(rdtscp,    RDTSC,  X86ISD::RDTSCP_DAG, 0),
-
-  X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
-  X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
   X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
   X86_INTRINSIC_DATA(xtest,     XTEST,  X86ISD::XTEST,  0),
 };
@@ -294,6 +336,8 @@ static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
  * the alphabetical order.
  */
 static const IntrinsicData  IntrinsicsWithoutChain[] = {
+  X86_INTRINSIC_DATA(addcarry_32,       ADX, X86ISD::ADC, X86ISD::ADD),
+  X86_INTRINSIC_DATA(addcarry_64,       ADX, X86ISD::ADC, X86ISD::ADD),
   X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
   X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
   X86_INTRINSIC_DATA(avx_cmp_pd_256,    INTR_TYPE_3OP, X86ISD::CMPP, 0),
@@ -325,10 +369,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -369,10 +409,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
   X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
   X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
   X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
   X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -384,14 +420,14 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
   X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
@@ -402,12 +438,16 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
   X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
-  X86_INTRINSIC_DATA(avx512_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
-  X86_INTRINSIC_DATA(avx512_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
-  X86_INTRINSIC_DATA(avx512_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
-  X86_INTRINSIC_DATA(avx512_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
-  X86_INTRINSIC_DATA(avx512_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
-  X86_INTRINSIC_DATA(avx512_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_fpclass_ps_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_fpclass_ps_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_fpclass_ps_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_kadd_b, INTR_TYPE_2OP, X86ISD::KADD, 0),
+  X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0),
+  X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0),
+  X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::FADDS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
@@ -467,13 +507,13 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CONFLICT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK,
+                     X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
-                    X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps,     INTR_TYPE_1OP_MASK,
-                    X86ISD::VFPROUND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps,     CVTPD2PS_MASK,
+                     X86ISD::VFPROUND, X86ISD::VMFPROUND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
                      ISD::FP_ROUND, X86ISD::VFPROUND_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, 0),
@@ -481,8 +521,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK,
+                     X86ISD::CVTP2UI, X86ISD::MCVTP2UI),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
@@ -531,8 +571,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VFPROUNDS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::VFPEXTS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK,
+                     X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
@@ -541,8 +581,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK,
+                     X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
@@ -677,144 +717,114 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FMULS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::FMULS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, TRUNCATE_TO_REG,
+                     ISD::TRUNCATE, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, TRUNCATE_TO_REG,
+                     ISD::TRUNCATE, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, TRUNCATE_TO_REG,
+                     ISD::TRUNCATE, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
   X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
                      ISD::TRUNCATE, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
                      ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, TRUNCATE_TO_REG,
+                     ISD::TRUNCATE, X86ISD::VMTRUNC),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNC, X86ISD::VMTRUNC),
   X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
                      ISD::TRUNCATE, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
                      ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, TRUNCATE_TO_REG,
+                     X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::MULTISHIFT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_256, INTR_TYPE_2OP_MASK,
-                     X86ISD::MULTISHIFT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::MULTISHIFT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
   X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
   X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
@@ -871,38 +881,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CVTPH2PS, 0),
   X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::CVTPS2PH, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
-                     X86ISD::CVTPS2PH, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::CVTPS2PH, 0),
-
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
-
-  X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
-                     X86ISD::VPSHUFBITQMB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_256, CMP_MASK,
-                     X86ISD::VPSHUFBITQMB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
-                     X86ISD::VPSHUFBITQMB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
+                     X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
+                     X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
+                     X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
 
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
                      X86ISD::VFIXUPIMM, 0),
@@ -921,25 +905,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
                      X86ISD::VFIXUPIMMS, 0),
 
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
-
   X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
   X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
   X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
@@ -967,30 +932,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
   X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
   X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
-  X86_INTRINSIC_DATA(avx512_prol_d_128,  INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(avx512_prol_d_256,  INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(avx512_prol_d_512,  INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(avx512_prol_q_128,  INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(avx512_prol_q_256,  INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(avx512_prol_q_512,  INTR_TYPE_2OP_IMM8, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(avx512_prolv_d_128, INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(avx512_prolv_d_256, INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(avx512_prolv_d_512, INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(avx512_prolv_q_128, INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(avx512_prolv_q_256, INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(avx512_prolv_q_512, INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(avx512_pror_d_128,  INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
-  X86_INTRINSIC_DATA(avx512_pror_d_256,  INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
-  X86_INTRINSIC_DATA(avx512_pror_d_512,  INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
-  X86_INTRINSIC_DATA(avx512_pror_q_128,  INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
-  X86_INTRINSIC_DATA(avx512_pror_q_256,  INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
-  X86_INTRINSIC_DATA(avx512_pror_q_512,  INTR_TYPE_2OP_IMM8, X86ISD::VROTRI, 0),
-  X86_INTRINSIC_DATA(avx512_prorv_d_128, INTR_TYPE_2OP, ISD::ROTR, 0),
-  X86_INTRINSIC_DATA(avx512_prorv_d_256, INTR_TYPE_2OP, ISD::ROTR, 0),
-  X86_INTRINSIC_DATA(avx512_prorv_d_512, INTR_TYPE_2OP, ISD::ROTR, 0),
-  X86_INTRINSIC_DATA(avx512_prorv_q_128, INTR_TYPE_2OP, ISD::ROTR, 0),
-  X86_INTRINSIC_DATA(avx512_prorv_q_256, INTR_TYPE_2OP, ISD::ROTR, 0),
-  X86_INTRINSIC_DATA(avx512_prorv_q_512, INTR_TYPE_2OP, ISD::ROTR, 0),
+  X86_INTRINSIC_DATA(avx512_pmultishift_qb_128, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
+  X86_INTRINSIC_DATA(avx512_pmultishift_qb_256, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
+  X86_INTRINSIC_DATA(avx512_pmultishift_qb_512, INTR_TYPE_2OP, X86ISD::MULTISHIFT, 0),
   X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
   X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
@@ -1068,14 +1012,14 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
   X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
   X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
   X86_INTRINSIC_DATA(avx512_vfmadd_f32, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
   X86_INTRINSIC_DATA(avx512_vfmadd_f64, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
   X86_INTRINSIC_DATA(avx512_vfmadd_pd_512, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
@@ -1124,26 +1068,13 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_d_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_d_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_d_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_q_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_q_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_q_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_w_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_w_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshld_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHLD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_d_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_d_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_d_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_q_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_q_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_q_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_w_128, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_w_256, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
-  X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+  X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+  X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
   X86_INTRINSIC_DATA(bmi_bextr_32,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bextr_64,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
+  X86_INTRINSIC_DATA(bmi_bzhi_32,          INTR_TYPE_2OP, X86ISD::BZHI, 0),
+  X86_INTRINSIC_DATA(bmi_bzhi_64,          INTR_TYPE_2OP, X86ISD::BZHI, 0),
   X86_INTRINSIC_DATA(sse_cmp_ps,        INTR_TYPE_3OP, X86ISD::CMPP, 0),
   X86_INTRINSIC_DATA(sse_comieq_ss,     COMI, X86ISD::COMI, ISD::SETEQ),
   X86_INTRINSIC_DATA(sse_comige_ss,     COMI, X86ISD::COMI, ISD::SETGE),
@@ -1151,6 +1082,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse_comile_ss,     COMI, X86ISD::COMI, ISD::SETLE),
   X86_INTRINSIC_DATA(sse_comilt_ss,     COMI, X86ISD::COMI, ISD::SETLT),
   X86_INTRINSIC_DATA(sse_comineq_ss,    COMI, X86ISD::COMI, ISD::SETNE),
+  X86_INTRINSIC_DATA(sse_cvtss2si,      INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+  X86_INTRINSIC_DATA(sse_cvtss2si64,    INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+  X86_INTRINSIC_DATA(sse_cvttss2si,     INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
+  X86_INTRINSIC_DATA(sse_cvttss2si64,   INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
   X86_INTRINSIC_DATA(sse_max_ps,        INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(sse_max_ss,        INTR_TYPE_2OP, X86ISD::FMAXS, 0),
   X86_INTRINSIC_DATA(sse_min_ps,        INTR_TYPE_2OP, X86ISD::FMIN, 0),
@@ -1174,8 +1109,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_cvtpd2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvtpd2ps,     INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
   X86_INTRINSIC_DATA(sse2_cvtps2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvtsd2si,     INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvtsd2si64,   INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvttpd2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvttps2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvttsd2si,    INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvttsd2si64,  INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
   X86_INTRINSIC_DATA(sse2_max_pd,       INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(sse2_max_sd,       INTR_TYPE_2OP, X86ISD::FMAXS, 0),
   X86_INTRINSIC_DATA(sse2_min_pd,       INTR_TYPE_2OP, X86ISD::FMIN, 0),
@@ -1184,10 +1123,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(sse2_padds_b,      INTR_TYPE_2OP, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(sse2_padds_w,      INTR_TYPE_2OP, X86ISD::ADDS, 0),
-  X86_INTRINSIC_DATA(sse2_paddus_b,     INTR_TYPE_2OP, X86ISD::ADDUS, 0),
-  X86_INTRINSIC_DATA(sse2_paddus_w,     INTR_TYPE_2OP, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(sse2_pmadd_wd,     INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
   X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
   X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1209,10 +1144,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_psrli_d,      VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(sse2_psrli_q,      VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(sse2_psrli_w,      VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(sse2_psubs_b,      INTR_TYPE_2OP, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(sse2_psubs_w,      INTR_TYPE_2OP, X86ISD::SUBS, 0),
-  X86_INTRINSIC_DATA(sse2_psubus_b,     INTR_TYPE_2OP, X86ISD::SUBUS, 0),
-  X86_INTRINSIC_DATA(sse2_psubus_w,     INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(sse2_ucomieq_sd,   COMI, X86ISD::UCOMI, ISD::SETEQ),
   X86_INTRINSIC_DATA(sse2_ucomige_sd,   COMI, X86ISD::UCOMI, ISD::SETGE),
   X86_INTRINSIC_DATA(sse2_ucomigt_sd,   COMI, X86ISD::UCOMI, ISD::SETGT),
@@ -1241,6 +1172,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
   X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
   X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+  X86_INTRINSIC_DATA(subborrow_32,      ADX, X86ISD::SBB, X86ISD::SUB),
+  X86_INTRINSIC_DATA(subborrow_64,      ADX, X86ISD::SBB, X86ISD::SUB),
   X86_INTRINSIC_DATA(tbm_bextri_u32,    INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(tbm_bextri_u64,    INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(vcvtph2ps_128,     INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
@@ -1280,14 +1213,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(xop_vpermil2ps,     INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
   X86_INTRINSIC_DATA(xop_vpermil2ps_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
   X86_INTRINSIC_DATA(xop_vpperm,        INTR_TYPE_3OP, X86ISD::VPPERM, 0),
-  X86_INTRINSIC_DATA(xop_vprotb,        INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(xop_vprotbi,       INTR_TYPE_2OP, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(xop_vprotd,        INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(xop_vprotdi,       INTR_TYPE_2OP, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(xop_vprotq,        INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(xop_vprotqi,       INTR_TYPE_2OP, X86ISD::VROTLI, 0),
-  X86_INTRINSIC_DATA(xop_vprotw,        INTR_TYPE_2OP, ISD::ROTL, 0),
-  X86_INTRINSIC_DATA(xop_vprotwi,       INTR_TYPE_2OP, X86ISD::VROTLI, 0),
   X86_INTRINSIC_DATA(xop_vpshab,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
   X86_INTRINSIC_DATA(xop_vpshad,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
   X86_INTRINSIC_DATA(xop_vpshaq,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index d372cada8de8..4a49fa68dd06 100644
--- a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -133,7 +133,8 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
     getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
 
     // Shifts and SDIV
-    getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR, G_SDIV})
+    getActionDefinitionsBuilder(
+        {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
         .legalFor({s8, s16, s32})
         .clampScalar(0, s8, s32);
   }
@@ -219,13 +220,27 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0);
 
+  getActionDefinitionsBuilder(G_FPTOSI)
+      .legalForCartesianProduct({s32, s64})
+      .clampScalar(1, s32, s64)
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, s32, s64)
+      .widenScalarToNextPow2(1);
+
   // Comparison
   setAction({G_ICMP, 1, s64}, Legal);
 
+  getActionDefinitionsBuilder(G_FCMP)
+      .legalForCartesianProduct({s8}, {s32, s64})
+      .clampScalar(0, s8, s8)
+      .clampScalar(1, s32, s64)
+      .widenScalarToNextPow2(1);
+
   // Shifts and SDIV
-  getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR, G_SDIV})
-    .legalFor({s8, s16, s32, s64})
-    .clampScalar(0, s8, s64);
+  getActionDefinitionsBuilder(
+      {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
+      .legalFor({s8, s16, s32, s64})
+      .clampScalar(0, s8, s64);
 
   // Merge/Unmerge
   setAction({G_MERGE_VALUES, s128}, Legal);
@@ -256,7 +271,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE1() {
 
   // Merge/Unmerge
   for (const auto &Ty : {v4s32, v2s64}) {
-    setAction({G_MERGE_VALUES, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
   }
   setAction({G_MERGE_VALUES, 1, s64}, Legal);
@@ -292,17 +307,20 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
   setAction({G_FPEXT, s64}, Legal);
   setAction({G_FPEXT, 1, s32}, Legal);
 
+  setAction({G_FPTRUNC, s32}, Legal);
+  setAction({G_FPTRUNC, 1, s64}, Legal);
+
   // Constants
   setAction({TargetOpcode::G_FCONSTANT, s64}, Legal);
 
   // Merge/Unmerge
   for (const auto &Ty :
        {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
-    setAction({G_MERGE_VALUES, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
   }
   for (const auto &Ty : {v16s8, v8s16, v4s32, v2s64}) {
-    setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, Ty}, Legal);
   }
 }
@@ -349,12 +367,12 @@ void X86LegalizerInfo::setLegalizerInfoAVX() {
   // Merge/Unmerge
   for (const auto &Ty :
        {v32s8, v64s8, v16s16, v32s16, v8s32, v16s32, v4s64, v8s64}) {
-    setAction({G_MERGE_VALUES, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
   }
   for (const auto &Ty :
        {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
-    setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, Ty}, Legal);
   }
 }
@@ -382,11 +400,11 @@ void X86LegalizerInfo::setLegalizerInfoAVX2() {
 
   // Merge/Unmerge
   for (const auto &Ty : {v64s8, v32s16, v16s32, v8s64}) {
-    setAction({G_MERGE_VALUES, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, 1, Ty}, Legal);
   }
   for (const auto &Ty : {v32s8, v16s16, v8s32, v4s64}) {
-    setAction({G_MERGE_VALUES, 1, Ty}, Legal);
+    setAction({G_CONCAT_VECTORS, 1, Ty}, Legal);
     setAction({G_UNMERGE_VALUES, Ty}, Legal);
   }
 }
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index d38c7b497965..2816f8c62bfb 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -132,6 +132,9 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
     // Handle dllimport linkage.
     Name += "__imp_";
     break;
+  case X86II::MO_COFFSTUB:
+    Name += ".refptr.";
+    break;
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
     Suffix = "$non_lazy_ptr";
@@ -160,6 +163,17 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
   switch (MO.getTargetFlags()) {
   default:
     break;
+  case X86II::MO_COFFSTUB: {
+    MachineModuleInfoCOFF &MMICOFF =
+        MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
+    MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
+    if (!StubSym.getPointer()) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym = MachineModuleInfoImpl::StubValueTy(
+          AsmPrinter.getSymbol(MO.getGlobal()), true);
+    }
+    break;
+  }
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
     MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -191,6 +205,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   // These affect the name of the symbol, not any suffix.
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DLLIMPORT:
+  case X86II::MO_COFFSTUB:
     break;
 
   case X86II::MO_TLVP:
@@ -512,7 +527,7 @@ ReSimplify:
   }
 
   case X86::CLEANUPRET: {
-    // Replace CATCHRET with the appropriate RET.
+    // Replace CLEANUPRET with the appropriate RET.
     OutMI = MCInst();
     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
     break;
@@ -584,54 +599,6 @@ ReSimplify:
   case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8);  goto ReSimplify;
   case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8);  goto ReSimplify;
 
-  // Atomic load and store require a separate pseudo-inst because Acquire
-  // implies mayStore and Release implies mayLoad; fix these to regular MOV
-  // instructions here
-  case X86::ACQUIRE_MOV8rm:    OutMI.setOpcode(X86::MOV8rm);    goto ReSimplify;
-  case X86::ACQUIRE_MOV16rm:   OutMI.setOpcode(X86::MOV16rm);   goto ReSimplify;
-  case X86::ACQUIRE_MOV32rm:   OutMI.setOpcode(X86::MOV32rm);   goto ReSimplify;
-  case X86::ACQUIRE_MOV64rm:   OutMI.setOpcode(X86::MOV64rm);   goto ReSimplify;
-  case X86::RELEASE_MOV8mr:    OutMI.setOpcode(X86::MOV8mr);    goto ReSimplify;
-  case X86::RELEASE_MOV16mr:   OutMI.setOpcode(X86::MOV16mr);   goto ReSimplify;
-  case X86::RELEASE_MOV32mr:   OutMI.setOpcode(X86::MOV32mr);   goto ReSimplify;
-  case X86::RELEASE_MOV64mr:   OutMI.setOpcode(X86::MOV64mr);   goto ReSimplify;
-  case X86::RELEASE_MOV8mi:    OutMI.setOpcode(X86::MOV8mi);    goto ReSimplify;
-  case X86::RELEASE_MOV16mi:   OutMI.setOpcode(X86::MOV16mi);   goto ReSimplify;
-  case X86::RELEASE_MOV32mi:   OutMI.setOpcode(X86::MOV32mi);   goto ReSimplify;
-  case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
-  case X86::RELEASE_ADD8mi:    OutMI.setOpcode(X86::ADD8mi);    goto ReSimplify;
-  case X86::RELEASE_ADD8mr:    OutMI.setOpcode(X86::ADD8mr);    goto ReSimplify;
-  case X86::RELEASE_ADD32mi:   OutMI.setOpcode(X86::ADD32mi);   goto ReSimplify;
-  case X86::RELEASE_ADD32mr:   OutMI.setOpcode(X86::ADD32mr);   goto ReSimplify;
-  case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
-  case X86::RELEASE_ADD64mr:   OutMI.setOpcode(X86::ADD64mr);   goto ReSimplify;
-  case X86::RELEASE_AND8mi:    OutMI.setOpcode(X86::AND8mi);    goto ReSimplify;
-  case X86::RELEASE_AND8mr:    OutMI.setOpcode(X86::AND8mr);    goto ReSimplify;
-  case X86::RELEASE_AND32mi:   OutMI.setOpcode(X86::AND32mi);   goto ReSimplify;
-  case X86::RELEASE_AND32mr:   OutMI.setOpcode(X86::AND32mr);   goto ReSimplify;
-  case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
-  case X86::RELEASE_AND64mr:   OutMI.setOpcode(X86::AND64mr);   goto ReSimplify;
-  case X86::RELEASE_OR8mi:     OutMI.setOpcode(X86::OR8mi);     goto ReSimplify;
-  case X86::RELEASE_OR8mr:     OutMI.setOpcode(X86::OR8mr);     goto ReSimplify;
-  case X86::RELEASE_OR32mi:    OutMI.setOpcode(X86::OR32mi);    goto ReSimplify;
-  case X86::RELEASE_OR32mr:    OutMI.setOpcode(X86::OR32mr);    goto ReSimplify;
-  case X86::RELEASE_OR64mi32:  OutMI.setOpcode(X86::OR64mi32);  goto ReSimplify;
-  case X86::RELEASE_OR64mr:    OutMI.setOpcode(X86::OR64mr);    goto ReSimplify;
-  case X86::RELEASE_XOR8mi:    OutMI.setOpcode(X86::XOR8mi);    goto ReSimplify;
-  case X86::RELEASE_XOR8mr:    OutMI.setOpcode(X86::XOR8mr);    goto ReSimplify;
-  case X86::RELEASE_XOR32mi:   OutMI.setOpcode(X86::XOR32mi);   goto ReSimplify;
-  case X86::RELEASE_XOR32mr:   OutMI.setOpcode(X86::XOR32mr);   goto ReSimplify;
-  case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
-  case X86::RELEASE_XOR64mr:   OutMI.setOpcode(X86::XOR64mr);   goto ReSimplify;
-  case X86::RELEASE_INC8m:     OutMI.setOpcode(X86::INC8m);     goto ReSimplify;
-  case X86::RELEASE_INC16m:    OutMI.setOpcode(X86::INC16m);    goto ReSimplify;
-  case X86::RELEASE_INC32m:    OutMI.setOpcode(X86::INC32m);    goto ReSimplify;
-  case X86::RELEASE_INC64m:    OutMI.setOpcode(X86::INC64m);    goto ReSimplify;
-  case X86::RELEASE_DEC8m:     OutMI.setOpcode(X86::DEC8m);     goto ReSimplify;
-  case X86::RELEASE_DEC16m:    OutMI.setOpcode(X86::DEC16m);    goto ReSimplify;
-  case X86::RELEASE_DEC32m:    OutMI.setOpcode(X86::DEC32m);    goto ReSimplify;
-  case X86::RELEASE_DEC64m:    OutMI.setOpcode(X86::DEC64m);    goto ReSimplify;
-
   // We don't currently select the correct instruction form for instructions
   // which have a short %eax, etc. form. Handle this by custom lowering, for
   // now.
@@ -946,7 +913,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
       break;
     case MachineOperand::MO_Register:
       // FIXME: Add retpoline support and remove this.
-      if (Subtarget->useRetpoline())
+      if (Subtarget->useRetpolineIndirectCalls())
         report_fatal_error("Lowering register statepoints with retpoline not "
                            "yet implemented.");
       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
@@ -1103,7 +1070,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
     EmitAndCountInstruction(
         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
     // FIXME: Add retpoline support and remove this.
-    if (Subtarget->useRetpoline())
+    if (Subtarget->useRetpolineIndirectCalls())
       report_fatal_error(
           "Lowering patchpoint with retpoline not yet implemented.");
     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
@@ -1412,7 +1379,7 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
 
 static const Constant *getConstantFromPool(const MachineInstr &MI,
                                            const MachineOperand &Op) {
-  if (!Op.isCPI())
+  if (!Op.isCPI() || Op.getOffset() != 0)
     return nullptr;
 
   ArrayRef<MachineConstantPoolEntry> Constants =
@@ -1424,7 +1391,7 @@ static const Constant *getConstantFromPool(const MachineInstr &MI,
   if (ConstantEntry.isMachineConstantPoolEntry())
     return nullptr;
 
-  auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
+  const Constant *C = ConstantEntry.Val.ConstVal;
   assert((!C || ConstantEntry.getType() == C->getType()) &&
          "Expected a constant of the same type!");
   return C;
@@ -1515,27 +1482,35 @@ static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
   return Comment;
 }
 
+static void printConstant(const APInt &Val, raw_ostream &CS) {
+  if (Val.getBitWidth() <= 64) {
+    CS << Val.getZExtValue();
+  } else {
+    // print multi-word constant as (w0,w1)
+    CS << "(";
+    for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+      if (i > 0)
+        CS << ",";
+      CS << Val.getRawData()[i];
+    }
+    CS << ")";
+  }
+}
+
+static void printConstant(const APFloat &Flt, raw_ostream &CS) {
+  SmallString<32> Str;
+  // Force scientific notation to distinquish from integers.
+  Flt.toString(Str, 0, 0);
+  CS << Str;
+}
+
 static void printConstant(const Constant *COp, raw_ostream &CS) {
   if (isa<UndefValue>(COp)) {
     CS << "u";
   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
-    if (CI->getBitWidth() <= 64) {
-      CS << CI->getZExtValue();
-    } else {
-      // print multi-word constant as (w0,w1)
-      const auto &Val = CI->getValue();
-      CS << "(";
-      for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
-        if (i > 0)
-          CS << ",";
-        CS << Val.getRawData()[i];
-      }
-      CS << ")";
-    }
+    printConstant(CI->getValue(), CS);
   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
-    SmallString<32> Str;
-    CF->getValueAPF().toString(Str);
-    CS << Str;
+    printConstant(CF->getValueAPF(), CS);
   } else {
     CS << "?";
   }
@@ -1558,6 +1533,9 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
     case X86::SEH_StackAlloc:
       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
       break;
+    case X86::SEH_StackAlign:
+      XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
+      break;
     case X86::SEH_SetFrame:
       assert(MI->getOperand(1).getImm() == 0 &&
              ".cv_fpo_setframe takes no offset");
@@ -1617,6 +1595,18 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
   }
 }
 
+static unsigned getRegisterWidth(const MCOperandInfo &Info) {
+  if (Info.RegClass == X86::VR128RegClassID ||
+      Info.RegClass == X86::VR128XRegClassID)
+    return 128;
+  if (Info.RegClass == X86::VR256RegClassID ||
+      Info.RegClass == X86::VR256XRegClassID)
+    return 256;
+  if (Info.RegClass == X86::VR512RegClassID)
+    return 512;
+  llvm_unreachable("Unknown register class!");
+}
+
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(*MF, *this);
   const X86RegisterInfo *RI =
@@ -1720,41 +1710,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
-  case X86::MOVGOT64r: {
-    // Materializes the GOT for the 64-bit large code model.
-    MCSymbol *DotSym = OutContext.createTempSymbol();
-    OutStreamer->EmitLabel(DotSym);
-
-    unsigned DstReg = MI->getOperand(0).getReg();
-    unsigned ScratchReg = MI->getOperand(1).getReg();
-    MCSymbol *GOTSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
-    // .LtmpN: leaq .LtmpN(%rip), %dst
-    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
-    EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
-                                .addReg(DstReg)   // dest
-                                .addReg(X86::RIP) // base
-                                .addImm(1)        // scale
-                                .addReg(0)        // index
-                                .addExpr(DotExpr) // disp
-                                .addReg(0));      // seg
-
-    // movq $_GLOBAL_OFFSET_TABLE_ - .LtmpN, %scratch
-    const MCExpr *GOTSymExpr = MCSymbolRefExpr::create(GOTSym, OutContext);
-    const MCExpr *GOTDiffExpr =
-        MCBinaryExpr::createSub(GOTSymExpr, DotExpr, OutContext);
-    EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
-                                .addReg(ScratchReg)     // dest
-                                .addExpr(GOTDiffExpr)); // disp
-
-    // addq %scratch, %dst
-    EmitAndCountInstruction(MCInstBuilder(X86::ADD64rr)
-                                .addReg(DstReg)       // dest
-                                .addReg(DstReg)       // dest
-                                .addReg(ScratchReg)); // src
-    return;
-  }
-
   case X86::ADD32ri: {
     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
@@ -1835,6 +1790,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::SEH_SaveReg:
   case X86::SEH_SaveXMM:
   case X86::SEH_StackAlloc:
+  case X86::SEH_StackAlign:
   case X86::SEH_SetFrame:
   case X86::SEH_PushFrame:
   case X86::SEH_EndPrologue:
@@ -1901,8 +1857,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
       SmallVector<int, 64> Mask;
-      DecodePSHUFBMask(C, Mask);
+      DecodePSHUFBMask(C, Width, Mask);
       if (!Mask.empty())
         OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
                                 !EnablePrintSchedInfo);
@@ -1973,8 +1930,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
       SmallVector<int, 16> Mask;
-      DecodeVPERMILPMask(C, ElSize, Mask);
+      DecodeVPERMILPMask(C, ElSize, Width, Mask);
       if (!Mask.empty())
         OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
                                 !EnablePrintSchedInfo);
@@ -2004,8 +1962,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     const MachineOperand &MaskOp = MI->getOperand(6);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
       SmallVector<int, 16> Mask;
-      DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
+      DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
       if (!Mask.empty())
         OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
                                 !EnablePrintSchedInfo);
@@ -2021,8 +1980,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     const MachineOperand &MaskOp = MI->getOperand(6);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
       SmallVector<int, 16> Mask;
-      DecodeVPPERMMask(C, Mask);
+      DecodeVPPERMMask(C, Width, Mask);
       if (!Mask.empty())
         OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
                                 !EnablePrintSchedInfo);
@@ -2129,11 +2089,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
             if (i != 0 || l != 0)
               CS << ",";
             if (CDS->getElementType()->isIntegerTy())
-              CS << CDS->getElementAsInteger(i);
-            else if (CDS->getElementType()->isFloatTy())
-              CS << CDS->getElementAsFloat(i);
-            else if (CDS->getElementType()->isDoubleTy())
-              CS << CDS->getElementAsDouble(i);
+              printConstant(CDS->getElementAsAPInt(i), CS);
+            else if (CDS->getElementType()->isHalfTy() ||
+                     CDS->getElementType()->isFloatTy() ||
+                     CDS->getElementType()->isDoubleTy())
+              printConstant(CDS->getElementAsAPFloat(i), CS);
             else
               CS << "?";
           }
@@ -2155,6 +2115,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       }
     }
     break;
+  case X86::MOVDDUPrm:
+  case X86::VMOVDDUPrm:
+  case X86::VMOVDDUPZ128rm:
   case X86::VBROADCASTSSrm:
   case X86::VBROADCASTSSYrm:
   case X86::VBROADCASTSSZ128m:
@@ -2191,6 +2154,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       int NumElts;
       switch (MI->getOpcode()) {
       default: llvm_unreachable("Invalid opcode");
+      case X86::MOVDDUPrm:         NumElts = 2;  break;
+      case X86::VMOVDDUPrm:        NumElts = 2;  break;
+      case X86::VMOVDDUPZ128rm:    NumElts = 2;  break;
       case X86::VBROADCASTSSrm:    NumElts = 4;  break;
       case X86::VBROADCASTSSYrm:   NumElts = 8;  break;
       case X86::VBROADCASTSSZ128m: NumElts = 4;  break;
diff --git a/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
index df3abb17014d..5c09597d0442 100644
--- a/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -79,53 +79,46 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::TEST8ri:
   case X86::TEST16ri:
   case X86::TEST32ri:
-  case X86::TEST32i32:
-  case X86::TEST64i32:
   case X86::TEST64ri32:
   case X86::TEST8mr:
   case X86::TEST16mr:
   case X86::TEST32mr:
   case X86::TEST64mr:
-  case X86::AND16i16:
   case X86::AND16ri:
   case X86::AND16ri8:
   case X86::AND16rm:
   case X86::AND16rr:
-  case X86::AND32i32:
   case X86::AND32ri:
   case X86::AND32ri8:
   case X86::AND32rm:
   case X86::AND32rr:
-  case X86::AND64i32:
   case X86::AND64ri32:
   case X86::AND64ri8:
   case X86::AND64rm:
   case X86::AND64rr:
-  case X86::AND8i8:
   case X86::AND8ri:
   case X86::AND8rm:
   case X86::AND8rr:
     return true;
-  case X86::CMP16i16:
   case X86::CMP16ri:
   case X86::CMP16ri8:
   case X86::CMP16rm:
   case X86::CMP16rr:
-  case X86::CMP32i32:
+  case X86::CMP16mr:
   case X86::CMP32ri:
   case X86::CMP32ri8:
   case X86::CMP32rm:
   case X86::CMP32rr:
-  case X86::CMP64i32:
+  case X86::CMP32mr:
   case X86::CMP64ri32:
   case X86::CMP64ri8:
   case X86::CMP64rm:
   case X86::CMP64rr:
-  case X86::CMP8i8:
+  case X86::CMP64mr:
   case X86::CMP8ri:
   case X86::CMP8rm:
   case X86::CMP8rr:
-  case X86::ADD16i16:
+  case X86::CMP8mr:
   case X86::ADD16ri:
   case X86::ADD16ri8:
   case X86::ADD16ri8_DB:
@@ -133,7 +126,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::ADD16rm:
   case X86::ADD16rr:
   case X86::ADD16rr_DB:
-  case X86::ADD32i32:
   case X86::ADD32ri:
   case X86::ADD32ri8:
   case X86::ADD32ri8_DB:
@@ -141,7 +133,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::ADD32rm:
   case X86::ADD32rr:
   case X86::ADD32rr_DB:
-  case X86::ADD64i32:
   case X86::ADD64ri32:
   case X86::ADD64ri32_DB:
   case X86::ADD64ri8:
@@ -149,28 +140,21 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::ADD64rm:
   case X86::ADD64rr:
   case X86::ADD64rr_DB:
-  case X86::ADD8i8:
-  case X86::ADD8mi:
-  case X86::ADD8mr:
   case X86::ADD8ri:
   case X86::ADD8rm:
   case X86::ADD8rr:
-  case X86::SUB16i16:
   case X86::SUB16ri:
   case X86::SUB16ri8:
   case X86::SUB16rm:
   case X86::SUB16rr:
-  case X86::SUB32i32:
   case X86::SUB32ri:
   case X86::SUB32ri8:
   case X86::SUB32rm:
   case X86::SUB32rr:
-  case X86::SUB64i32:
   case X86::SUB64ri32:
   case X86::SUB64ri8:
   case X86::SUB64rm:
   case X86::SUB64rr:
-  case X86::SUB8i8:
   case X86::SUB8ri:
   case X86::SUB8rm:
   case X86::SUB8rr:
diff --git a/contrib/llvm/lib/Target/X86/X86MacroFusion.h b/contrib/llvm/lib/Target/X86/X86MacroFusion.h
index 13fa2d78a018..97ef1d6d3b61 100644
--- a/contrib/llvm/lib/Target/X86/X86MacroFusion.h
+++ b/contrib/llvm/lib/Target/X86/X86MacroFusion.h
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TARGET_X86_X86MACROFUSION_H
+#define LLVM_LIB_TARGET_X86_X86MACROFUSION_H
+
 #include "llvm/CodeGen/MachineScheduler.h"
 
 namespace llvm {
@@ -23,3 +26,5 @@ std::unique_ptr<ScheduleDAGMutation>
 createX86MacroFusionDAGMutation();
 
 } // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 42db51b3cf01..b56d02b6bfb6 100644
--- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -510,12 +510,16 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
 
     MemOpNo += X86II::getOperandBias(Desc);
 
+    // Do not call chooseBestLEA if there was no matching LEA
+    auto Insns = LEAs.find(getMemOpKey(MI, MemOpNo));
+    if (Insns == LEAs.end())
+      continue;
+
     // Get the best LEA instruction to replace address calculation.
     MachineInstr *DefMI;
     int64_t AddrDispShift;
     int Dist;
-    if (!chooseBestLEA(LEAs[getMemOpKey(MI, MemOpNo)], MI, DefMI, AddrDispShift,
-                       Dist))
+    if (!chooseBestLEA(Insns->second, MI, DefMI, AddrDispShift, Dist))
       continue;
 
     // If LEA occurs before current instruction, we can freely replace
diff --git a/contrib/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm/lib/Target/X86/X86PfmCounters.td
index 093fbafa3fba..a1a4210b5ebf 100644
--- a/contrib/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/contrib/llvm/lib/Target/X86/X86PfmCounters.td
@@ -11,67 +11,216 @@
 //
 //===----------------------------------------------------------------------===//
 
-let SchedModel = SandyBridgeModel in {
-def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
-def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
-def SBPort23Counter : PfmIssueCounter<SBPort23,
-                                      ["uops_dispatched_port:port_2",
-                                       "uops_dispatched_port:port_3"]>;
-def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
-def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
-}
-
-let SchedModel = HaswellModel in {
-def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
-def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
-def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
-def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
-def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
-def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
-def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
-def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = BroadwellModel in {
-def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
-def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
-def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
-def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
-def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
-def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
-def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
-def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
-}
-
-let SchedModel = SkylakeClientModel in {
-def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
-def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
-def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
-def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
-def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
-def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
-def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
-def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = SkylakeServerModel in {
-def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
-def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
-def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
-def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
-def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
-def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
-def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
-def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
-def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
-}
-
-let SchedModel = BtVer2Model in {
-def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
-def JFPU0Counter  : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
-def JFPU1Counter  : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
+def UnhaltedCoreCyclesPfmCounter : PfmCounter<"unhalted_core_cycles">;
+def UopsIssuedPfmCounter : PfmCounter<"uops_issued:any">;
+
+// No default counters on X86.
+def DefaultPfmCounters : ProcPfmCounters {}
+def : PfmCountersDefaultBinding<DefaultPfmCounters>;
+
+// Intel X86 Counters.
+def PentiumPfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+  let UopsCounter = PfmCounter<"uops_retired">;
+}
+def : PfmCountersBinding<"pentiumpro", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium2", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium3", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium3m", PentiumPfmCounters>;
+def : PfmCountersBinding<"pentium-m", PentiumPfmCounters>;
+
+def CorePfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"yonah", CorePfmCounters>;
+def : PfmCountersBinding<"prescott", CorePfmCounters>;
+def : PfmCountersBinding<"core2", CorePfmCounters>;
+def : PfmCountersBinding<"penryn", CorePfmCounters>;
+def : PfmCountersBinding<"nehalem", CorePfmCounters>;
+def : PfmCountersBinding<"corei7", CorePfmCounters>;
+def : PfmCountersBinding<"westmere", CorePfmCounters>;
+
+def AtomPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"bonnell", AtomPfmCounters>;
+def : PfmCountersBinding<"atom", AtomPfmCounters>;
+
+def SLMPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = PfmCounter<"uops_retired:any">;
+}
+def : PfmCountersBinding<"silvermont", SLMPfmCounters>;
+def : PfmCountersBinding<"goldmont", SLMPfmCounters>;
+def : PfmCountersBinding<"goldmont-plus", SLMPfmCounters>;
+def : PfmCountersBinding<"tremont", SLMPfmCounters>;
+
+def KnightPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = PfmCounter<"uops_retired:all">;
+}
+def : PfmCountersBinding<"knl", KnightPfmCounters>;
+def : PfmCountersBinding<"knm", KnightPfmCounters>;
+
+def SandyBridgePfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = UopsIssuedPfmCounter;
+  let IssueCounters = [
+    PfmIssueCounter<"SBPort0",  "uops_dispatched_port:port_0">,
+    PfmIssueCounter<"SBPort1",  "uops_dispatched_port:port_1">,
+    PfmIssueCounter<"SBPort23", "uops_dispatched_port:port_2 + uops_dispatched_port:port_3">,
+    PfmIssueCounter<"SBPort4",  "uops_dispatched_port:port_4">,
+    PfmIssueCounter<"SBPort5",  "uops_dispatched_port:port_5">
+  ];
+}
+def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
+def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
+
+def HaswellPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = UopsIssuedPfmCounter;
+  let IssueCounters = [
+    PfmIssueCounter<"HWPort0", "uops_dispatched_port:port_0">,
+    PfmIssueCounter<"HWPort1", "uops_dispatched_port:port_1">,
+    PfmIssueCounter<"HWPort2", "uops_dispatched_port:port_2">,
+    PfmIssueCounter<"HWPort3", "uops_dispatched_port:port_3">,
+    PfmIssueCounter<"HWPort4", "uops_dispatched_port:port_4">,
+    PfmIssueCounter<"HWPort5", "uops_dispatched_port:port_5">,
+    PfmIssueCounter<"HWPort6", "uops_dispatched_port:port_6">,
+    PfmIssueCounter<"HWPort7", "uops_dispatched_port:port_7">
+  ];
+}
+def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
+
+def BroadwellPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = UopsIssuedPfmCounter;
+  let IssueCounters = [
+    PfmIssueCounter<"BWPort0", "uops_executed_port:port_0">,
+    PfmIssueCounter<"BWPort1", "uops_executed_port:port_1">,
+    PfmIssueCounter<"BWPort2", "uops_executed_port:port_2">,
+    PfmIssueCounter<"BWPort3", "uops_executed_port:port_3">,
+    PfmIssueCounter<"BWPort4", "uops_executed_port:port_4">,
+    PfmIssueCounter<"BWPort5", "uops_executed_port:port_5">,
+    PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
+    PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
+  ];
+}
+def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
+
+def SkylakeClientPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = UopsIssuedPfmCounter;
+  let IssueCounters = [
+    PfmIssueCounter<"SKLPort0", "uops_dispatched_port:port_0">,
+    PfmIssueCounter<"SKLPort1", "uops_dispatched_port:port_1">,
+    PfmIssueCounter<"SKLPort2", "uops_dispatched_port:port_2">,
+    PfmIssueCounter<"SKLPort3", "uops_dispatched_port:port_3">,
+    PfmIssueCounter<"SKLPort4", "uops_dispatched_port:port_4">,
+    PfmIssueCounter<"SKLPort5", "uops_dispatched_port:port_5">,
+    PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
+    PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
+  ];
+}
+def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
+
+def SkylakeServerPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = UopsIssuedPfmCounter;
+  let IssueCounters = [
+    PfmIssueCounter<"SKXPort0", "uops_dispatched_port:port_0">,
+    PfmIssueCounter<"SKXPort1", "uops_dispatched_port:port_1">,
+    PfmIssueCounter<"SKXPort2", "uops_dispatched_port:port_2">,
+    PfmIssueCounter<"SKXPort3", "uops_dispatched_port:port_3">,
+    PfmIssueCounter<"SKXPort4", "uops_dispatched_port:port_4">,
+    PfmIssueCounter<"SKXPort5", "uops_dispatched_port:port_5">,
+    PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
+    PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
+  ];
+}
+def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"cannonlake", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"icelake-client", SkylakeServerPfmCounters>;
+def : PfmCountersBinding<"icelake-server", SkylakeServerPfmCounters>;
+
+// AMD X86 Counters.
+// Set basic counters for AMD cpus that we know libpfm4 supports.
+def DefaultAMDPfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+  let UopsCounter = PfmCounter<"retired_uops">;
+}
+def : PfmCountersBinding<"athlon", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-tbird", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-4", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-xp", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-mp", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"k8", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"opteron", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon64", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon-fx", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"k8-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"opteron-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"athlon64-sse3", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"amdfam10", DefaultAMDPfmCounters>;
+def : PfmCountersBinding<"barcelona", DefaultAMDPfmCounters>;
+
+def BdVer2PfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+  let UopsCounter = PfmCounter<"retired_uops">;
+  let IssueCounters = [
+    PfmIssueCounter<"PdFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
+    PfmIssueCounter<"PdFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
+    PfmIssueCounter<"PdFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">,
+    PfmIssueCounter<"PdFPU3", "dispatched_fpu_ops:ops_pipe3 + dispatched_fpu_ops:ops_dual_pipe3">
+  ];
+}
+def : PfmCountersBinding<"bdver1", BdVer2PfmCounters>;
+def : PfmCountersBinding<"bdver2", BdVer2PfmCounters>;
+
+def BdVer3PfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+  let UopsCounter = PfmCounter<"retired_uops">;
+  let IssueCounters = [
+    PfmIssueCounter<"SrFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
+    PfmIssueCounter<"SrFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
+    PfmIssueCounter<"SrFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">
+  ];
+}
+def : PfmCountersBinding<"bdver3", BdVer3PfmCounters>;
+def : PfmCountersBinding<"bdver4", BdVer3PfmCounters>;
+
+def BtVer1PfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+  let UopsCounter = PfmCounter<"retired_uops">;
+  let IssueCounters = [
+    PfmIssueCounter<"BtFPU0", "dispatched_fpu:pipe0">,
+    PfmIssueCounter<"BtFPU1", "dispatched_fpu:pipe1">
+  ];
+}
+def : PfmCountersBinding<"btver1", BtVer1PfmCounters>;
+
+def BtVer2PfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
+  let UopsCounter = PfmCounter<"retired_uops">;
+  let IssueCounters = [
+    PfmIssueCounter<"JFPU0", "dispatched_fpu:pipe0">,
+    PfmIssueCounter<"JFPU1", "dispatched_fpu:pipe1">
+  ];
+}
+def : PfmCountersBinding<"btver2", BtVer2PfmCounters>;
+
+def ZnVer1PfmCounters : ProcPfmCounters {
+  let CycleCounter = PfmCounter<"cycles_not_in_halt">;
+  let UopsCounter = PfmCounter<"retired_uops">;
+  let IssueCounters = [
+    PfmIssueCounter<"ZnFPU0", "fpu_pipe_assignment:total0">,
+    PfmIssueCounter<"ZnFPU1", "fpu_pipe_assignment:total1">,
+    PfmIssueCounter<"ZnFPU2", "fpu_pipe_assignment:total2">,
+    PfmIssueCounter<"ZnFPU3", "fpu_pipe_assignment:total3">,
+    PfmIssueCounter<"ZnDivider", "div_op_count">
+  ];
 }
+def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index 246d6d5a58d0..355291916ee8 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -194,19 +194,40 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
   switch (Opc) {
   case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
   case TargetOpcode::G_FCONSTANT:
     // Instruction having only floating-point operands (all scalars in VECRReg)
     getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx);
     break;
-  case TargetOpcode::G_SITOFP: {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_FPTOSI: {
     // Some of the floating-point instructions have mixed GPR and FP operands:
     // fine-tune the computed mapping.
     auto &Op0 = MI.getOperand(0);
     auto &Op1 = MI.getOperand(1);
     const LLT Ty0 = MRI.getType(Op0.getReg());
     const LLT Ty1 = MRI.getType(Op1.getReg());
-    OpRegBankIdx[0] = getPartialMappingIdx(Ty0, /* isFP */ true);
-    OpRegBankIdx[1] = getPartialMappingIdx(Ty1, /* isFP */ false);
+
+    bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP;
+    bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI;
+    OpRegBankIdx[0] = getPartialMappingIdx(Ty0, /* isFP */ FirstArgIsFP);
+    OpRegBankIdx[1] = getPartialMappingIdx(Ty1, /* isFP */ SecondArgIsFP);
+    break;
+  }
+  case TargetOpcode::G_FCMP: {
+    LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
+    LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+    (void)Ty2;
+    assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() &&
+           "Mismatched operand sizes for G_FCMP");
+
+    unsigned Size = Ty1.getSizeInBits();
+    (void)Size;
+    assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+    auto FpRegBank = getPartialMappingIdx(Ty1, /* isFP */ true);
+    OpRegBankIdx = {PMI_GPR8,
+                    /* Predicate */ PMI_None, FpRegBank, FpRegBank};
     break;
   }
   case TargetOpcode::G_TRUNC:
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index ee9e7891f9f6..aa20273f89ab 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -436,11 +436,12 @@ def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
 def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
 def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
 def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
-def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESP)>;
 def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
-                                                     R8, R9, R11, RIP)>;
+                                                     R8, R9, R11, RIP, RSP)>;
 def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
-                                                      R8, R9, R10, R11, RIP)>;
+                                                      R8, R9, R10, R11,
+                                                      RIP, RSP)>;
 
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
@@ -499,6 +500,16 @@ def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
 def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
 def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
 
+// Classes to support the 64-bit assembler constraint tied to a fixed
+// register in 32-bit mode. The second register is always the next in
+// the list. Wrap around causes an error.
+def GR32_DC : RegisterClass<"X86", [i32], 32, (add EDX, ECX)>;
+def GR32_CB : RegisterClass<"X86", [i32], 32, (add ECX, EBX)>;
+def GR32_BSI : RegisterClass<"X86", [i32], 32, (add EBX, ESI)>;
+def GR32_SIDI : RegisterClass<"X86", [i32], 32, (add ESI, EDI)>;
+def GR32_DIBP : RegisterClass<"X86", [i32], 32, (add EDI, EBP)>;
+def GR32_BPSP : RegisterClass<"X86", [i32], 32, (add EBP, ESP)>;
+
 // Scalar SSE2 floating point registers.
 def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
 
@@ -511,10 +522,16 @@ def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
 // faster on common hardware.  In reality, this should be controlled by a
 // command line option or something.
 
+
 def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
 def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
 def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
 
+// st(7) may be is not allocatable.
+def RFP80_7 : RegisterClass<"X86",[f80], 32, (add FP7)> {
+  let isAllocatable = 0;
+}
+
 // Floating point stack registers (these are not allocatable by the
 // register allocator - the floating point stackifier is responsible
 // for transforming FPn allocations to STn registers)
diff --git a/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
index 250deb3523b4..08994cccb21e 100644
--- a/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RetpolineThunks.cpp
@@ -74,7 +74,7 @@ private:
 
   void createThunkFunction(Module &M, StringRef Name);
   void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
-  void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
+  void populateThunk(MachineFunction &MF, unsigned Reg);
 };
 
 } // end anonymous namespace
@@ -115,7 +115,9 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
     // FIXME: It's a little silly to look at every function just to enumerate
     // the subtargets, but eventually we'll want to look at them for indirect
     // calls, so maybe this is OK.
-    if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
+    if ((!STI->useRetpolineIndirectCalls() &&
+         !STI->useRetpolineIndirectBranches()) ||
+        STI->useRetpolineExternalThunk())
       return false;
 
     // Otherwise, we need to insert the thunk.
@@ -234,25 +236,33 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
 }
 
 void X86RetpolineThunks::populateThunk(MachineFunction &MF,
-                                       Optional<unsigned> Reg) {
+                                       unsigned Reg) {
   // Set MF properties. We never use vregs...
   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
 
+  // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+  // generate two bbs for the entry block.
   MachineBasicBlock *Entry = &MF.front();
   Entry->clear();
+  while (MF.size() > 1)
+    MF.erase(std::next(MF.begin()));
 
   MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
   MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+  MCSymbol *TargetSym = MF.getContext().createTempSymbol();
   MF.push_back(CaptureSpec);
   MF.push_back(CallTarget);
 
   const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
   const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
 
-  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
-  Entry->addSuccessor(CallTarget);
+  Entry->addLiveIn(Reg);
+  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+  // The MIR verifier thinks that the CALL in the entry block will fall through
+  // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+  // the successor, but the MIR verifier doesn't know how to cope with that.
   Entry->addSuccessor(CaptureSpec);
-  CallTarget->setHasAddressTaken();
 
   // In the capture loop for speculation, we want to stop the processor from
   // speculating as fast as possible. On Intel processors, the PAUSE instruction
@@ -268,7 +278,10 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
   CaptureSpec->setHasAddressTaken();
   CaptureSpec->addSuccessor(CaptureSpec);
 
+  CallTarget->addLiveIn(Reg);
+  CallTarget->setHasAddressTaken();
   CallTarget->setAlignment(4);
-  insertRegReturnAddrClobber(*CallTarget, *Reg);
+  insertRegReturnAddrClobber(*CallTarget, Reg);
+  CallTarget->back().setPreInstrSymbol(MF, TargetSym);
   BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
 }
diff --git a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
index 6334d9e89a60..971a50196e45 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -72,10 +72,16 @@ def BWDivider : ProcResource<1>;
 // FP division and sqrt on port 0.
 def BWFPDivider : ProcResource<1>;
 
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 5>;
 
+// Vector loads are 5/5/6 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/5/6 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 6>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -108,22 +114,47 @@ def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
 // Arithmetic.
 defm : BWWriteResPair<WriteALU,    [BWPort0156], 1>; // Simple integer ALU op.
 defm : BWWriteResPair<WriteADC,    [BWPort06], 1>; // Integer ALU + flags op.
-defm : BWWriteResPair<WriteIMul,   [BWPort1], 3>; // Integer multiplication.
-defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication.
-defm : BWWriteResPair<WriteDiv8,   [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv16,  [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv32,  [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteDiv64,  [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv8,  [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
-defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>;
 
+// Integer multiplication.
+defm : BWWriteResPair<WriteIMul8,     [BWPort1],   3>;
+defm : BWWriteResPair<WriteIMul16,    [BWPort1,BWPort06,BWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm,    [BWPort1,BWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd,  [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul16Reg, [BWPort1],   3>;
+defm : BWWriteResPair<WriteIMul32,    [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul32Imm, [BWPort1],   3>;
+defm : BWWriteResPair<WriteIMul32Reg, [BWPort1],   3>;
+defm : BWWriteResPair<WriteIMul64,    [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteIMul64Imm, [BWPort1],   3>;
+defm : BWWriteResPair<WriteIMul64Reg, [BWPort1],   3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+// TODO: Why isn't the BWDivider used consistently?
+defm : X86WriteRes<WriteDiv8,      [BWPort0, BWDivider], 25, [1, 10], 1>;
+defm : X86WriteRes<WriteDiv16,     [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32,     [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64,     [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld,    [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv16Ld,   [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv32Ld,   [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv64Ld,   [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteIDiv8,     [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16,    [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv32,    [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv64,    [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv8Ld,   [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld,  [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld,  [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld,  [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
 defm : X86WriteRes<WriteBSWAP32,   [BWPort15], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64,   [BWPort06, BWPort15], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteXCHG,      [BWPort0156], 2, [3], 3>;
 
 defm : BWWriteResPair<WriteCRC32, [BWPort1],   3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
 
 def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
 
@@ -136,8 +167,14 @@ def  : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
   let Latency = 2;
   let NumMicroOps = 3;
 }
-def  : WriteRes<WriteLAHFSAHF, [BWPort06]>;
-def  : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
+
+defm : X86WriteRes<WriteLAHFSAHF,        [BWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest,         [BWPort06], 1, [1], 1>; // Bit Test instrs
+defm : X86WriteRes<WriteBitTestImmLd,    [BWPort06,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd,    [BWPort0156,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet,      [BWPort06], 1, [1], 1>; // Bit Test + Set instrs
+defm : X86WriteRes<WriteBitTestSetImmLd, [BWPort06,BWPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [BWPort0156,BWPort23], 5, [1,1], 2>;
 
 // Bit counts.
 defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -147,7 +184,10 @@ defm : BWWriteResPair<WriteTZCNT,          [BWPort1], 3>;
 defm : BWWriteResPair<WritePOPCNT,         [BWPort1], 3>;
 
 // Integer shifts and rotates.
-defm : BWWriteResPair<WriteShift, [BWPort06],  1>;
+defm : BWWriteResPair<WriteShift,    [BWPort06],  1>;
+defm : BWWriteResPair<WriteShiftCL,  [BWPort06,BWPort0156],  3, [2,1], 3>;
+defm : BWWriteResPair<WriteRotate,   [BWPort06],  2, [2], 2>;
+defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156],  3, [2,1], 3>;
 
 // SHLD/SHRD.
 defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
@@ -155,9 +195,10 @@ defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
 defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
 defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
-defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
+defm : BWWriteResPair<WriteBLS,   [BWPort15], 1>;
+defm : BWWriteResPair<WriteBZHI,  [BWPort15], 1>;
 
 // Loads, stores, and moves, not folded with other operations.
 defm : X86WriteRes<WriteLoad,    [BWPort23], 5, [1], 1>;
@@ -582,7 +623,7 @@ def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[BWWriteResGroup3], (instrs MMX_MOVQ2DQrr)>;
 
 def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> {
   let Latency = 1;
@@ -610,10 +651,7 @@ def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr",
-                                           "BLSI(32|64)rr",
-                                           "BLSMSK(32|64)rr",
-                                           "BLSR(32|64)rr")>;
+def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr")>;
 
 def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> {
   let Latency = 1;
@@ -627,19 +665,19 @@ def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup9], (instregex "SGDT64m",
-                                           "SIDT64m",
-                                           "SMSW16m",
-                                           "STRm",
-                                           "SYSCALL")>;
+def: InstRW<[BWWriteResGroup9], (instrs SGDT64m,
+                                        SIDT64m,
+                                        SMSW16m,
+                                        STRm,
+                                        SYSCALL)>;
 
 def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
   let Latency = 1;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
-                                            "ST_FP(32|64|80)m")>;
+def: InstRW<[BWWriteResGroup10], (instrs FBSTPm)>;
+def: InstRW<[BWWriteResGroup10], (instregex "ST_FP(32|64|80)m")>;
 
 def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
   let Latency = 2;
@@ -648,16 +686,6 @@ def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
 }
 def: InstRW<[BWWriteResGroup12], (instrs FDECSTP)>;
 
-def BWWriteResGroup13 : SchedWriteRes<[BWPort06]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[BWWriteResGroup13], (instregex "ROL(8|16|32|64)r1",
-                                            "ROL(8|16|32|64)ri",
-                                            "ROR(8|16|32|64)r1",
-                                            "ROR(8|16|32|64)ri")>;
-
 def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -688,7 +716,7 @@ def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup17], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[BWWriteResGroup17], (instrs MMX_MOVDQ2Qrr)>;
 
 def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> {
   let Latency = 2;
@@ -702,11 +730,10 @@ def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup20], (instrs CWD)>;
-def: InstRW<[BWWriteResGroup20], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8",
-                                            "ADC8ri",
-                                            "SBB8i8",
+def: InstRW<[BWWriteResGroup20], (instrs CWD,
+                                         JCXZ, JECXZ, JRCXZ,
+                                         ADC8i8, SBB8i8)>;
+def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri",
                                             "SBB8ri",
                                             "SET(A|BE)r")>;
 
@@ -729,53 +756,35 @@ def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[BWWriteResGroup25], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
                                          STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr",
-                                            "PUSH64i8")>;
+def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>;
 
 def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr",
-                                            "PDEP(32|64)rr",
-                                            "PEXT(32|64)rr",
+def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
                                             "(V?)CVTDQ2PS(Y?)rr")>;
 
-def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>;
-
 def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr",
-                                            "VPBROADCASTWrr")>;
-
-def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> {
-  let Latency = 2;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[BWWriteResGroup30], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
-                                         XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
-                                         XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[BWWriteResGroup28], (instrs VPBROADCASTBrr,
+                                         VPBROADCASTWrr)>;
 
 def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> {
   let Latency = 3;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSDWirr",
-                                            "MMX_PACKSSWBirr",
-                                            "MMX_PACKUSWBirr")>;
+def: InstRW<[BWWriteResGroup33], (instrs MMX_PACKSSDWirr,
+                                         MMX_PACKSSWBirr,
+                                         MMX_PACKUSWBirr)>;
 
 def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> {
   let Latency = 3;
@@ -789,21 +798,8 @@ def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r1",
-                                            "RCL(8|16|32|64)ri",
-                                            "RCR(8|16|32|64)r1",
-                                            "RCR(8|16|32|64)ri")>;
-
-def BWWriteResGroup36 : SchedWriteRes<[BWPort06,BWPort0156]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[BWWriteResGroup36], (instregex "ROL(8|16|32|64)rCL",
-                                            "ROR(8|16|32|64)rCL",
-                                            "SAR(8|16|32|64)rCL",
-                                            "SHL(8|16|32|64)rCL",
-                                            "SHR(8|16|32|64)rCL")>;
+def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r(1|i)",
+                                            "RCR(8|16|32|64)r(1|i)")>;
 
 def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> {
   let Latency = 3;
@@ -835,7 +831,7 @@ def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>;
 
 def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> {
   let Latency = 4;
@@ -849,9 +845,8 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr)>;
-def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr",
-                                            "MMX_CVT(T?)PD2PIirr",
+def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDirr)>;
+def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr",
                                             "MMX_CVT(T?)PS2PIirr",
                                             "(V?)CVTDQ2PDrr",
                                             "(V?)CVTPD2PSrr",
@@ -861,13 +856,6 @@ def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr",
                                             "(V?)CVTSI2SSrr",
                                             "(V?)CVT(T?)PD2DQrr")>;
 
-def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup42_16], (instrs IMUL16r, MUL16r)>;
-
 def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
   let Latency = 4;
   let NumMicroOps = 3;
@@ -890,10 +878,10 @@ def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> {
 }
 def: InstRW<[BWWriteResGroup45], (instrs FNCLEX)>;
 
-def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> {
-  let Latency = 4;
+def BWWriteResGroup46 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[BWWriteResGroup46], (instrs VZEROUPPER)>;
 
@@ -910,17 +898,14 @@ def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm16",
-                                            "MOVSX(16|32|64)rm32",
-                                            "MOVSX(16|32|64)rm8",
-                                            "MOVZX(16|32|64)rm16",
-                                            "MOVZX(16|32|64)rm8",
-                                            "VBROADCASTSSrm",
-                                            "(V?)MOVDDUPrm",
-                                            "(V?)MOVSHDUPrm",
-                                            "(V?)MOVSLDUPrm",
-                                            "VPBROADCASTDrm",
-                                            "VPBROADCASTQrm")>;
+def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+                                            "MOVZX(16|32|64)rm(8|16)")>;
+def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm,
+                                         VMOVDDUPrm, MOVDDUPrm,
+                                         VMOVSHDUPrm, MOVSHDUPrm,
+                                         VMOVSLDUPrm, MOVSLDUPrm,
+                                         VPBROADCASTDrm,
+                                         VPBROADCASTQrm)>;
 
 def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> {
   let Latency = 5;
@@ -936,13 +921,6 @@ def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> {
 }
 def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>;
 
-def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
 def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
   let Latency = 5;
   let NumMicroOps = 5;
@@ -957,13 +935,6 @@ def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> {
 }
 def: InstRW<[BWWriteResGroup55], (instrs XSETBV)>;
 
-def BWWriteResGroup56 : SchedWriteRes<[BWPort06,BWPort0156]> {
-  let Latency = 5;
-  let NumMicroOps = 5;
-  let ResourceCycles = [2,3];
-}
-def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
 def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> {
   let Latency = 5;
   let NumMicroOps = 6;
@@ -976,50 +947,44 @@ def BWWriteResGroup58 : SchedWriteRes<[BWPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m",
-                                            "VBROADCASTF128",
-                                            "VBROADCASTI128",
-                                            "VBROADCASTSDYrm",
-                                            "VBROADCASTSSYrm",
-                                            "VMOVDDUPYrm",
-                                            "VMOVSHDUPYrm",
-                                            "VMOVSLDUPYrm",
-                                            "VPBROADCASTDYrm",
-                                            "VPBROADCASTQYrm")>;
+def: InstRW<[BWWriteResGroup58], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[BWWriteResGroup58], (instrs VBROADCASTF128,
+                                         VBROADCASTI128,
+                                         VBROADCASTSDYrm,
+                                         VBROADCASTSSYrm,
+                                         VMOVDDUPYrm,
+                                         VMOVSHDUPYrm,
+                                         VMOVSLDUPYrm,
+                                         VPBROADCASTDYrm,
+                                         VPBROADCASTQYrm)>;
 
 def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup59], (instregex "(V?)CVTPS2PDrm",
-                                            "(V?)CVTSS2SDrm",
-                                            "VPSLLVQrm",
-                                            "VPSRLVQrm")>;
+def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
+                                         CVTSS2SDrm, VCVTSS2SDrm,
+                                         VPSLLVQrm,
+                                         VPSRLVQrm)>;
 
 def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr",
-                                            "VCVTPD2PSYrr",
-                                            "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[BWWriteResGroup60], (instrs VCVTDQ2PDYrr,
+                                         VCVTPD2PSYrr,
+                                         VCVTPD2DQYrr,
+                                         VCVTTPD2DQYrr)>;
 
 def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup62], (instregex "FARJMP64",
-                                            "JMP(16|32|64)m")>;
-
-def BWWriteResGroup63 : SchedWriteRes<[BWPort23,BWPort06]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[BWWriteResGroup62], (instrs FARJMP64)>;
+def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>;
 
 def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
   let Latency = 6;
@@ -1027,9 +992,6 @@ def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm",
-                                            "BLSI(32|64)rm",
-                                            "BLSMSK(32|64)rm",
-                                            "BLSR(32|64)rm",
                                             "MOVBE(16|32|64)rm")>;
 
 def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
@@ -1037,9 +999,9 @@ def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup65], (instregex "VINSERTF128rm",
-                                            "VINSERTI128rm",
-                                            "VPBLENDDrmi")>;
+def: InstRW<[BWWriteResGroup65], (instrs VINSERTF128rm,
+                                         VINSERTI128rm,
+                                         VPBLENDDrmi)>;
 
 def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
   let Latency = 6;
@@ -1061,15 +1023,9 @@ def BWWriteResGroup69 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
   let NumMicroOps = 4;
   let ResourceCycles = [1,1,1,1];
 }
-def: InstRW<[BWWriteResGroup69], (instregex "BTC(16|32|64)mi8",
-                                            "BTR(16|32|64)mi8",
-                                            "BTS(16|32|64)mi8",
-                                            "SAR(8|16|32|64)m1",
-                                            "SAR(8|16|32|64)mi",
-                                            "SHL(8|16|32|64)m1",
-                                            "SHL(8|16|32|64)mi",
-                                            "SHR(8|16|32|64)m1",
-                                            "SHR(8|16|32|64)mi")>;
+def: InstRW<[BWWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
+                                            "SHL(8|16|32|64)m(1|i)",
+                                            "SHR(8|16|32|64)m(1|i)")>;
 
 def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
   let Latency = 6;
@@ -1091,8 +1047,8 @@ def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm",
-                                            "VPSRLVQYrm")>;
+def: InstRW<[BWWriteResGroup73], (instrs VPSLLVQYrm,
+                                         VPSRLVQYrm)>;
 
 def BWWriteResGroup74 : SchedWriteRes<[BWPort1,BWPort23]> {
   let Latency = 7;
@@ -1106,16 +1062,16 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>;
+def: InstRW<[BWWriteResGroup77], (instrs VPBLENDDYrmi)>;
 
 def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> {
   let Latency = 7;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm",
-                                            "MMX_PACKSSWBirm",
-                                            "MMX_PACKUSWBirm")>;
+def: InstRW<[BWWriteResGroup79], (instrs MMX_PACKSSDWirm,
+                                         MMX_PACKSSWBirm,
+                                         MMX_PACKUSWBirm)>;
 
 def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> {
   let Latency = 7;
@@ -1144,10 +1100,8 @@ def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m1",
-                                            "ROL(8|16|32|64)mi",
-                                            "ROR(8|16|32|64)m1",
-                                            "ROR(8|16|32|64)mi")>;
+def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
+                                            "ROR(8|16|32|64)m(1|i)")>;
 
 def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
   let Latency = 7;
@@ -1161,8 +1115,8 @@ def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,1,1];
 }
-def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m",
-                                            "FARCALL64")>;
+def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[BWWriteResGroup89], (instrs FARCALL64)>;
 
 def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> {
   let Latency = 7;
@@ -1176,54 +1130,31 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm",
-                                            "PDEP(32|64)rm",
-                                            "PEXT(32|64)rm",
-                                            "(V?)CVTDQ2PSrm")>;
-
-def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort06, BWPort0156, BWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>;
+def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSirm,
+                                         CVTDQ2PSrm,
+                                         VCVTDQ2PSrm)>;
+def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
 
 def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm",
-                                            "VPMOVSXBQYrm",
-                                            "VPMOVSXBWYrm",
-                                            "VPMOVSXDQYrm",
-                                            "VPMOVSXWDYrm",
-                                            "VPMOVSXWQYrm",
-                                            "VPMOVZXWDYrm")>;
+def: InstRW<[BWWriteResGroup92], (instrs VPMOVSXBDYrm,
+                                         VPMOVSXBQYrm,
+                                         VPMOVSXBWYrm,
+                                         VPMOVSXDQYrm,
+                                         VPMOVSXWDYrm,
+                                         VPMOVSXWQYrm,
+                                         VPMOVZXWDYrm)>;
 
 def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
   let Latency = 8;
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m1",
-                                            "RCL(8|16|32|64)mi",
-                                            "RCR(8|16|32|64)m1",
-                                            "RCR(8|16|32|64)mi")>;
-
-def BWWriteResGroup98 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup98], (instregex "ROR(8|16|32|64)mCL")>;
+def: InstRW<[BWWriteResGroup97], (instregex "RCL(8|16|32|64)m(1|i)",
+                                            "RCR(8|16|32|64)m(1|i)")>;
 
 def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
   let Latency = 8;
@@ -1238,8 +1169,8 @@ def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo
   let ResourceCycles = [1,1,1,2,1];
 }
 def : SchedAlias<WriteADCRMW, BWWriteResGroup100>;
-def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG(8|16|32|64)rm",
-                                             "ROL(8|16|32|64)mCL",
+def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL",
+                                             "ROR(8|16|32|64)mCL",
                                              "SAR(8|16|32|64)mCL",
                                              "SHL(8|16|32|64)mCL",
                                              "SHR(8|16|32|64)mCL")>;
@@ -1250,9 +1181,9 @@ def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[BWWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
-                                             "ILD_F(16|32|64)m",
-                                             "VCVTPS2DQYrm",
-                                             "VCVTTPS2DQYrm")>;
+                                             "ILD_F(16|32|64)m")>;
+def: InstRW<[BWWriteResGroup101], (instrs VCVTPS2DQYrm,
+                                          VCVTTPS2DQYrm)>;
 
 def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
   let Latency = 9;
@@ -1270,18 +1201,18 @@ def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[BWWriteResGroup106], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>;
 
 def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
   let Latency = 9;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm)>;
-def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2PSrm",
-                                             "CVT(T?)PD2DQrm",
-                                             "MMX_CVTPI2PDirm",
-                                             "MMX_CVT(T?)PD2PIirm",
+def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm,
+                                          CVTPD2DQrm,
+                                          CVTTPD2DQrm,
+                                          MMX_CVTPI2PDirm)>;
+def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIirm",
                                              "(V?)CVTDQ2PDrm",
                                              "(V?)CVTSD2SSrm")>;
 
@@ -1298,7 +1229,7 @@ def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,3];
 }
-def: InstRW<[BWWriteResGroup112], (instregex "RDRAND(16|32|64)r")>;
+def: InstRW<[BWWriteResGroup112], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
 
 def BWWriteResGroup113 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> {
   let Latency = 9;
@@ -1329,13 +1260,6 @@ def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
 }
 def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>;
 
-def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> {
-  let Latency = 9;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
 def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
   let Latency = 11;
   let NumMicroOps = 1;
@@ -1348,15 +1272,15 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
-                                             "VPCMPGTQYrm")>;
+def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m")>;
+def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>;
 
 def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
   let Latency = 11;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[BWWriteResGroup128], (instrs VCVTDQ2PDYrm)>;
 
 def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
   let Latency = 11;
@@ -1371,7 +1295,7 @@ def BWWriteResGroup132 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
   let NumMicroOps = 9;
   let ResourceCycles = [1,4,1,3];
 }
-def: InstRW<[BWWriteResGroup132], (instregex "RCL8rCL")>;
+def: InstRW<[BWWriteResGroup132], (instrs RCL8rCL)>;
 
 def BWWriteResGroup133 : SchedWriteRes<[BWPort06,BWPort0156]> {
   let Latency = 11;
@@ -1414,7 +1338,7 @@ def BWWriteResGroup145 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> {
   let NumMicroOps = 10;
   let ResourceCycles = [2,3,1,4];
 }
-def: InstRW<[BWWriteResGroup145], (instregex "RCR8rCL")>;
+def: InstRW<[BWWriteResGroup145], (instrs RCR8rCL)>;
 
 def BWWriteResGroup146 : SchedWriteRes<[BWPort0,BWPort1,BWPort6,BWPort0156]> {
   let Latency = 14;
@@ -1451,10 +1375,10 @@ def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo
 }
 def: InstRW<[BWWriteResGroup153], (instrs CMPXCHG8B)>;
 
-def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> {
-  let Latency = 16;
-  let NumMicroOps = 16;
-  let ResourceCycles = [16];
+def BWWriteResGroup154 : SchedWriteRes<[BWPort5,BWPort6]> {
+  let Latency = 8;
+  let NumMicroOps = 20;
+  let ResourceCycles = [1,1];
 }
 def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>;
 
@@ -1513,7 +1437,7 @@ def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
   let NumMicroOps = 18;
   let ResourceCycles = [1,1,16];
 }
-def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>;
+def: InstRW<[BWWriteResGroup172], (instrs POPF64)>;
 
 def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
   let Latency = 23;
@@ -1608,13 +1532,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor
 def: InstRW<[BWWriteResGroup186], (instrs XSAVE)>;
 def: InstRW<[BWWriteResGroup186], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
 
-def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
-  let Latency = 34;
-  let NumMicroOps = 8;
-  let ResourceCycles = [2,2,2,1,1];
-}
-def: InstRW<[BWWriteResGroup190], (instregex "DIV(8|16|32|64)m")>;
-
 def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort0156]> {
   let Latency = 34;
   let NumMicroOps = 23;
@@ -1623,13 +1540,6 @@ def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort
 def: InstRW<[BWWriteResGroup191], (instregex "IN(8|16|32)ri",
                                              "IN(8|16|32)rr")>;
 
-def BWWriteResGroup193 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
-  let Latency = 35;
-  let NumMicroOps = 8;
-  let ResourceCycles = [2,2,2,1,1];
-}
-def: InstRW<[BWWriteResGroup193], (instregex "IDIV(8|16|32|64)m")>;
-
 def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> {
   let Latency = 35;
   let NumMicroOps = 23;
@@ -1673,13 +1583,6 @@ def BWWriteResGroup200 : SchedWriteRes<[BWPort5,BWPort01,BWPort0156]> {
 }
 def: InstRW<[BWWriteResGroup200], (instrs FNINIT)>;
 
-def BWWriteResGroup201 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156]> {
-  let Latency = 80;
-  let NumMicroOps = 32;
-  let ResourceCycles = [7,7,3,3,1,11];
-}
-def: InstRW<[BWWriteResGroup201], (instregex "DIV(16|32|64)r")>;
-
 def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6,BWPort237,BWPort06,BWPort0156]> {
   let Latency = 115;
   let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 876c3e4162cf..06a32fb0b1cd 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -77,10 +77,16 @@ def HWDivider : ProcResource<1>;
 // FP division and sqrt on port 0.
 def HWFPDivider : ProcResource<1>;
 
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 5>;
 
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -121,16 +127,32 @@ def  : WriteRes<WriteZero,       []>;
 // Arithmetic.
 defm : HWWriteResPair<WriteALU,    [HWPort0156], 1>;
 defm : HWWriteResPair<WriteADC,    [HWPort06, HWPort0156], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteIMul,   [HWPort1],   3>;
-defm : HWWriteResPair<WriteIMul64, [HWPort1],   3>;
+
+// Integer multiplication.
+defm : HWWriteResPair<WriteIMul8,     [HWPort1],   3>;
+defm : HWWriteResPair<WriteIMul16,    [HWPort1,HWPort06,HWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm,    [HWPort1,HWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd,  [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul16Reg, [HWPort1],   3>;
+defm : HWWriteResPair<WriteIMul32,    [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul32Imm, [HWPort1],   3>;
+defm : HWWriteResPair<WriteIMul32Reg, [HWPort1],   3>;
+defm : HWWriteResPair<WriteIMul64,    [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteIMul64Imm, [HWPort1],   3>;
+defm : HWWriteResPair<WriteIMul64Reg, [HWPort1],   3>;
+def  : WriteRes<WriteIMulH, []> { let Latency = 3; }
 
 defm : X86WriteRes<WriteBSWAP32,   [HWPort15], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64,   [HWPort06, HWPort15], 2, [1,1], 2>;
-
-def  : WriteRes<WriteIMulH, []> { let Latency = 3; }
+defm : X86WriteRes<WriteCMPXCHG,[HWPort06, HWPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[HWPort23,HWPort06,HWPort0156,HWPort237,HWPort4], 9, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>;
 
 // Integer shifts and rotates.
-defm : HWWriteResPair<WriteShift,  [HWPort06],  1>;
+defm : HWWriteResPair<WriteShift,    [HWPort06],  1>;
+defm : HWWriteResPair<WriteShiftCL,  [HWPort06, HWPort0156],  3, [2,1], 3>;
+defm : HWWriteResPair<WriteRotate,   [HWPort06],  2, [2], 2>;
+defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156],  3, [2,1], 3>;
 
 // SHLD/SHRD.
 defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>;
@@ -149,8 +171,14 @@ def  : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
   let Latency = 2;
   let NumMicroOps = 3;
 }
-def  : WriteRes<WriteLAHFSAHF, [HWPort06]>;
-def  : WriteRes<WriteBitTest,[HWPort06]>;
+
+defm : X86WriteRes<WriteLAHFSAHF,        [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest,         [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [HWPort06,HWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd,    [], 1, [], 10>;
+defm : X86WriteRes<WriteBitTestSet,      [HWPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [HWPort06,HWPort23], 6, [1,1], 3>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [], 1, [], 11>;
 
 // This is for simple LEAs with one or two input operands.
 // The complex ones can only execute on port 1, and they require two cycles on
@@ -164,18 +192,29 @@ defm : HWWriteResPair<WriteLZCNT,          [HWPort1], 3>;
 defm : HWWriteResPair<WriteTZCNT,          [HWPort1], 3>;
 defm : HWWriteResPair<WritePOPCNT,         [HWPort1], 3>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : HWWriteResPair<WriteBEXTR, [HWPort06,HWPort15], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>;
-
-defm : HWWriteResPair<WriteDiv8,   [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv16,  [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv32,  [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteDiv64,  [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv8,  [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv16, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv32, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
-defm : HWWriteResPair<WriteIDiv64, [HWPort0, HWDivider], 25, [1,10], 1, 4>;
+defm : HWWriteResPair<WriteBLS,   [HWPort15], 1>;
+defm : HWWriteResPair<WriteBZHI,  [HWPort15], 1>;
+
+// TODO: Why isn't the HWDivider used?
+defm : X86WriteRes<WriteDiv8,     [HWPort0,HWPort1,HWPort5,HWPort6], 22, [], 9>;
+defm : X86WriteRes<WriteDiv16,    [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32,    [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64,    [HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156], 98, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld,   [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv16Ld,  [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld,  [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld,  [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8,    [HWPort0,HWPort1,HWPort5,HWPort6], 23, [], 9>;
+defm : X86WriteRes<WriteIDiv16,   [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32,   [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64,   [HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156], 112, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld,  [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteIDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
 
 // Scalar and vector floating point.
 defm : X86WriteRes<WriteFLD0,          [HWPort01], 1, [1], 1>;
@@ -614,35 +653,12 @@ def : InstRW<[HWWritePopA], (instregex "POPA(16|32)")>;
 
 //-- Arithmetic instructions --//
 
-// DIV.
-// r8.
-def HWWriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
-  let Latency = 22;
-  let NumMicroOps = 9;
-}
-def : InstRW<[HWWriteDiv8], (instregex "DIV8r")>;
-
-// IDIV.
-// r8.
-def HWWriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
-  let Latency = 23;
-  let NumMicroOps = 9;
-}
-def : InstRW<[HWWriteIDiv8], (instregex "IDIV8r")>;
-
-// BT.
-// m,r.
-def HWWriteBTmr : SchedWriteRes<[]> {
-  let NumMicroOps = 10;
-}
-def : InstRW<[HWWriteBTmr], (instregex "BT(16|32|64)mr")>;
-
 // BTR BTS BTC.
 // m,r.
 def HWWriteBTRSCmr : SchedWriteRes<[]> {
   let NumMicroOps = 11;
 }
-def : InstRW<[HWWriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestSetRegRMW, HWWriteBTRSCmr>;
 
 //-- Control transfer instructions --//
 
@@ -704,14 +720,14 @@ def HWWriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
   let NumMicroOps = 17;
   let ResourceCycles = [1, 16];
 }
-def : InstRW<[HWWriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
+def : InstRW<[HWWriteRDRAND], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
 
 //=== Floating Point x87 Instructions ===//
 //-- Move instructions --//
 
 // FLD.
 // m80.
-def : InstRW<[HWWriteP01], (instregex "LD_Frr")>;
+def : InstRW<[HWWriteP01], (instrs LD_Frr)>;
 
 // FBLD.
 // m80.
@@ -719,7 +735,7 @@ def HWWriteFBLD : SchedWriteRes<[]> {
   let Latency = 47;
   let NumMicroOps = 43;
 }
-def : InstRW<[HWWriteFBLD], (instregex "FBLDm")>;
+def : InstRW<[HWWriteFBLD], (instrs FBLDm)>;
 
 // FST(P).
 // r.
@@ -732,13 +748,13 @@ def : InstRW<[HWWriteP01], (instregex "FFREE")>;
 def HWWriteFNSAVE : SchedWriteRes<[]> {
   let NumMicroOps = 147;
 }
-def : InstRW<[HWWriteFNSAVE], (instregex "FSAVEm")>;
+def : InstRW<[HWWriteFNSAVE], (instrs FSAVEm)>;
 
 // FRSTOR.
 def HWWriteFRSTOR : SchedWriteRes<[]> {
   let NumMicroOps = 90;
 }
-def : InstRW<[HWWriteFRSTOR], (instregex "FRSTORm")>;
+def : InstRW<[HWWriteFRSTOR], (instrs FRSTORm)>;
 
 //-- Arithmetic instructions --//
 
@@ -812,8 +828,8 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
-                                           "(V?)MOVSHDUPrm",
+def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
+def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
                                            "(V?)MOVSLDUPrm",
                                            "VPBROADCAST(D|Q)rm")>;
 
@@ -822,14 +838,14 @@ def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
+def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
+                                          VBROADCASTI128,
+                                          VBROADCASTSDYrm,
+                                          VBROADCASTSSYrm,
+                                          VMOVDDUPYrm,
+                                          VMOVSHDUPYrm,
+                                          VMOVSLDUPYrm)>;
 def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
-                                             "VBROADCASTF128",
-                                             "VBROADCASTI128",
-                                             "VBROADCASTSDYrm",
-                                             "VBROADCASTSSYrm",
-                                             "VMOVDDUPYrm",
-                                             "VMOVSHDUPYrm",
-                                             "VMOVSLDUPYrm",
                                              "VPBROADCAST(D|Q)Yrm")>;
 
 def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
@@ -837,11 +853,8 @@ def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm16",
-                                             "MOVSX(16|32|64)rm32",
-                                             "MOVSX(16|32|64)rm8",
-                                             "MOVZX(16|32|64)rm16",
-                                             "MOVZX(16|32|64)rm8",
+def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+                                             "MOVZX(16|32|64)rm(8|16)",
                                              "(V?)MOVDDUPrm")>;
 
 def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
@@ -849,9 +862,8 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
-                                           "ST_FP(32|64|80)m",
-                                           "VMPTRSTm")>;
+def: InstRW<[HWWriteResGroup1], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[HWWriteResGroup1], (instregex "ST_FP(32|64|80)m")>;
 
 def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
   let Latency = 1;
@@ -874,7 +886,7 @@ def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[HWWriteResGroup4], (instrs MMX_MOVQ2DQrr)>;
 
 def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> {
   let Latency = 1;
@@ -902,10 +914,7 @@ def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr",
-                                           "BLSI(32|64)rr",
-                                           "BLSMSK(32|64)rr",
-                                           "BLSR(32|64)rr")>;
+def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr")>;
 
 def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> {
   let Latency = 1;
@@ -920,12 +929,12 @@ def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[HWWriteResGroup10], (instrs CBW, CWDE, CDQE,
-                                         CMC, STC)>;
-def: InstRW<[HWWriteResGroup10], (instregex "SGDT64m",
-                                            "SIDT64m",
-                                            "SMSW16m",
-                                            "STRm",
-                                            "SYSCALL")>;
+                                         CMC, STC,
+                                         SGDT64m,
+                                         SIDT64m,
+                                         SMSW16m,
+                                         STRm,
+                                         SYSCALL)>;
 
 def HWWriteResGroup11 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 6;
@@ -939,40 +948,23 @@ def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm",
-                                              "VPSLLVQrm",
-                                              "VPSRLVQrm")>;
+def: InstRW<[HWWriteResGroup11_1], (instrs VPSLLVQrm, VPSRLVQrm)>;
+def: InstRW<[HWWriteResGroup11_1], (instregex "(V?)CVTSS2SDrm")>;
 
 def HWWriteResGroup11_2 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLVQYrm",
-                                              "VPSRLVQYrm")>;
+def: InstRW<[HWWriteResGroup11_2], (instrs VPSLLVQYrm, VPSRLVQYrm)>;
 
 def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm",
-                                            "PDEP(32|64)rm",
-                                            "PEXT(32|64)rm")>;
-
-def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup12_1], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>;
+def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSirm)>;
+def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
 
 def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
   let Latency = 6;
@@ -991,24 +983,17 @@ def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXBDYrm",
-                                              "VPMOVSXBQYrm",
-                                              "VPMOVSXWQYrm")>;
+def: InstRW<[HWWriteResGroup13_1], (instrs VPMOVSXBDYrm,
+                                           VPMOVSXBQYrm,
+                                           VPMOVSXWQYrm)>;
 
 def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup14], (instregex "FARJMP64",
-                                            "JMP(16|32|64)m")>;
-
-def HWWriteResGroup15 : SchedWriteRes<[HWPort23,HWPort06]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup15], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[HWWriteResGroup14], (instrs FARJMP64)>;
+def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>;
 
 def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
   let Latency = 6;
@@ -1016,9 +1001,6 @@ def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm",
-                                            "BLSI(32|64)rm",
-                                            "BLSMSK(32|64)rm",
-                                            "BLSR(32|64)rm",
                                             "MOVBE(16|32|64)rm")>;
 
 def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
@@ -1026,16 +1008,16 @@ def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup17], (instregex "VINSERTF128rm",
-                                            "VINSERTI128rm",
-                                            "VPBLENDDrmi")>;
+def: InstRW<[HWWriteResGroup17], (instrs VINSERTF128rm,
+                                         VINSERTI128rm,
+                                         VPBLENDDrmi)>;
 
 def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup17_2], (instregex "VPBLENDDYrmi")>;
+def: InstRW<[HWWriteResGroup17_2], (instrs VPBLENDDYrmi)>;
 
 def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> {
   let Latency = 6;
@@ -1078,25 +1060,18 @@ def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[HWWriteResGroup24], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
                                          STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr",
-                                            "PUSH64i8")>;
+def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)rmr")>;
 
 def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
   let Latency = 7;
   let NumMicroOps = 4;
   let ResourceCycles = [1,1,1,1];
 }
-def: InstRW<[HWWriteResGroup25], (instregex "BTC(16|32|64)mi8",
-                                            "BTR(16|32|64)mi8",
-                                            "BTS(16|32|64)mi8",
-                                            "SAR(8|16|32|64)m1",
-                                            "SAR(8|16|32|64)mi",
-                                            "SHL(8|16|32|64)m1",
-                                            "SHL(8|16|32|64)mi",
-                                            "SHR(8|16|32|64)m1",
-                                            "SHR(8|16|32|64)mi")>;
+def: InstRW<[HWWriteResGroup25], (instregex "SAR(8|16|32|64)m(1|i)",
+                                            "SHL(8|16|32|64)m(1|i)",
+                                            "SHR(8|16|32|64)m(1|i)")>;
 
 def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
   let Latency = 7;
@@ -1113,16 +1088,6 @@ def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
 }
 def: InstRW<[HWWriteResGroup28], (instrs FDECSTP)>;
 
-def HWWriteResGroup29 : SchedWriteRes<[HWPort06]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[HWWriteResGroup29], (instregex "ROL(8|16|32|64)r1",
-                                            "ROL(8|16|32|64)ri",
-                                            "ROR(8|16|32|64)r1",
-                                            "ROR(8|16|32|64)ri")>;
-
 def HWWriteResGroup30 : SchedWriteRes<[HWPort0156]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -1153,7 +1118,7 @@ def HWWriteResGroup33 : SchedWriteRes<[HWPort01,HWPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup33], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[HWWriteResGroup33], (instrs MMX_MOVDQ2Qrr)>;
 
 def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> {
   let Latency = 2;
@@ -1168,9 +1133,9 @@ def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[HWWriteResGroup36_2], (instregex "MMX_PACKSSDWirm",
-                                              "MMX_PACKSSWBirm",
-                                              "MMX_PACKUSWBirm")>;
+def: InstRW<[HWWriteResGroup36_2], (instrs MMX_PACKSSDWirm,
+                                           MMX_PACKSSWBirm,
+                                           MMX_PACKUSWBirm)>;
 
 def HWWriteResGroup37 : SchedWriteRes<[HWPort23,HWPort0156]> {
   let Latency = 7;
@@ -1214,10 +1179,8 @@ def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m1",
-                                            "ROL(8|16|32|64)mi",
-                                            "ROR(8|16|32|64)m1",
-                                            "ROR(8|16|32|64)mi")>;
+def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
+                                            "ROR(8|16|32|64)m(1|i)")>;
 
 def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
   let Latency = 8;
@@ -1231,26 +1194,18 @@ def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,1,1];
 }
-def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m",
-                                            "FARCALL64")>;
+def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[HWWriteResGroup48], (instrs FARCALL64)>;
 
 def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr",
-                                            "PDEP(32|64)rr",
-                                            "PEXT(32|64)rr",
+def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
                                             "(V?)CVTDQ2PS(Y?)rr")>;
 
-def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup50_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
 def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
   let Latency = 3;
   let NumMicroOps = 1;
@@ -1272,38 +1227,29 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
-                                              "ILD_F(16|32|64)m",
-                                              "VCVTDQ2PSYrm",
-                                              "VCVTPS2DQYrm",
-                                              "VCVTTPS2DQYrm")>;
+                                              "ILD_F(16|32|64)m")>;
+def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm,
+                                           VCVTPS2DQYrm,
+                                           VCVTTPS2DQYrm)>;
 
 def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> {
   let Latency = 9;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVSXBWYrm",
-                                              "VPMOVSXDQYrm",
-                                              "VPMOVSXWDYrm",
-                                              "VPMOVZXWDYrm")>;
-
-def HWWriteResGroup54 : SchedWriteRes<[HWPort0156]> {
-  let Latency = 2;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[HWWriteResGroup54], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
-                                         XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
-                                         XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[HWWriteResGroup53_1], (instrs VPMOVSXBWYrm,
+                                           VPMOVSXDQYrm,
+                                           VPMOVSXWDYrm,
+                                           VPMOVZXWDYrm)>;
 
 def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> {
   let Latency = 3;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[HWWriteResGroup57], (instregex "MMX_PACKSSDWirr",
-                                            "MMX_PACKSSWBirr",
-                                            "MMX_PACKUSWBirr")>;
+def: InstRW<[HWWriteResGroup57], (instrs MMX_PACKSSDWirr,
+                                         MMX_PACKSSWBirr,
+                                         MMX_PACKUSWBirr)>;
 
 def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> {
   let Latency = 3;
@@ -1317,21 +1263,8 @@ def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r1",
-                                            "RCL(8|16|32|64)ri",
-                                            "RCR(8|16|32|64)r1",
-                                            "RCR(8|16|32|64)ri")>;
-
-def HWWriteResGroup60 : SchedWriteRes<[HWPort06,HWPort0156]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [2,1];
-}
-def: InstRW<[HWWriteResGroup60], (instregex "ROL(8|16|32|64)rCL",
-                                            "ROR(8|16|32|64)rCL",
-                                            "SAR(8|16|32|64)rCL",
-                                            "SHL(8|16|32|64)rCL",
-                                            "SHR(8|16|32|64)rCL")>;
+def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r(1|i)",
+                                            "RCR(8|16|32|64)r(1|i)")>;
 
 def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> {
   let Latency = 4;
@@ -1353,17 +1286,8 @@ def HWWriteResGroup66 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]>
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m1",
-                                            "RCL(8|16|32|64)mi",
-                                            "RCR(8|16|32|64)m1",
-                                            "RCR(8|16|32|64)mi")>;
-
-def HWWriteResGroup67 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]> {
-  let Latency = 9;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup67], (instregex "ROR(8|16|32|64)mCL")>;
+def: InstRW<[HWWriteResGroup66], (instregex "RCL(8|16|32|64)m(1|i)",
+                                            "RCR(8|16|32|64)m(1|i)")>;
 
 def HWWriteResGroup68 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
   let Latency = 9;
@@ -1377,8 +1301,8 @@ def HWWriteResGroup69 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPor
   let NumMicroOps = 6;
   let ResourceCycles = [1,1,1,2,1];
 }
-def: InstRW<[HWWriteResGroup69], (instregex "CMPXCHG(8|16|32|64)rm",
-                                            "ROL(8|16|32|64)mCL",
+def: InstRW<[HWWriteResGroup69], (instregex "ROL(8|16|32|64)mCL",
+                                            "ROR(8|16|32|64)mCL",
                                             "SAR(8|16|32|64)mCL",
                                             "SHL(8|16|32|64)mCL",
                                             "SHR(8|16|32|64)mCL")>;
@@ -1397,7 +1321,7 @@ def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup71], (instregex "VCVTPS2PDYrr")>;
+def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>;
 
 def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> {
   let Latency = 4;
@@ -1411,30 +1335,18 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup73], (instregex "MMX_CVTPI2PDirr",
-                                            "MMX_CVT(T?)PD2PIirr",
-                                            "MMX_CVT(T?)PS2PIirr",
-                                            "(V?)CVTDQ2PDrr",
+def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDirr,
+                                         MMX_CVTPD2PIirr,
+                                         MMX_CVTPS2PIirr,
+                                         MMX_CVTTPD2PIirr,
+                                         MMX_CVTTPS2PIirr)>;
+def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr",
                                             "(V?)CVTPD2PSrr",
                                             "(V?)CVTSD2SSrr",
                                             "(V?)CVTSI(64)?2SDrr",
                                             "(V?)CVTSI2SSrr",
                                             "(V?)CVT(T?)PD2DQrr")>;
 
-def HWWriteResGroup74 : SchedWriteRes<[HWPort1,HWPort6]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup74], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort06, HWPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>;
-
 def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 11;
   let NumMicroOps = 3;
@@ -1458,32 +1370,29 @@ def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[HWWriteResGroup77], (instregex "VCVTPS2PDYrm")>;
+def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>;
 
 def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let Latency = 10;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[HWWriteResGroup78], (instregex "CVTPD2PSrm",
-                                            "CVT(T?)PD2DQrm",
-                                            "MMX_CVT(T?)PD2PIirm",
-                                            "(V?)CVTDQ2PDrm")>;
+def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm,
+                                         CVTPD2DQrm,
+                                         CVTTPD2DQrm,
+                                         MMX_CVTPD2PIirm,
+                                         MMX_CVTTPD2PIirm,
+                                         CVTDQ2PDrm,
+                                         VCVTDQ2PDrm)>;
 
 def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let Latency = 9;
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[HWWriteResGroup78_1], (instregex "MMX_CVTPI2PDirm",
-                                              "(V?)CVTSD2SSrm")>;
-
-def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>;
+def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm,
+                                           CVTSD2SSrm,
+                                           VCVTSD2SSrm)>;
 
 def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
   let Latency = 9;
@@ -1499,10 +1408,10 @@ def HWWriteResGroup81 : SchedWriteRes<[HWPort0156]> {
 }
 def: InstRW<[HWWriteResGroup81], (instrs FNCLEX)>;
 
-def HWWriteResGroup82 : SchedWriteRes<[HWPort015,HWPort0156]> {
-  let Latency = 4;
+def HWWriteResGroup82 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[HWWriteResGroup82], (instrs VZEROUPPER)>;
 
@@ -1548,8 +1457,8 @@ def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
-                                              "VPCMPGTQYrm")>;
+def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m")>;
+def: InstRW<[HWWriteResGroup91_3], (instrs VPCMPGTQYrm)>;
 
 def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> {
   let Latency = 5;
@@ -1565,13 +1474,6 @@ def HWWriteResGroup94 : SchedWriteRes<[HWPort1,HWPort6,HWPort06]> {
 }
 def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
 
-def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
 def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
   let Latency = 10;
   let NumMicroOps = 4;
@@ -1579,13 +1481,6 @@ def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>;
 
-def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> {
-  let Latency = 9;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
 def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
   let Latency = 5;
   let NumMicroOps = 5;
@@ -1600,21 +1495,15 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> {
 }
 def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>;
 
-def HWWriteResGroup101 : SchedWriteRes<[HWPort06,HWPort0156]> {
-  let Latency = 5;
-  let NumMicroOps = 5;
-  let ResourceCycles = [2,3];
-}
-def: InstRW<[HWWriteResGroup101], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
 def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup102], (instregex "VCVTDQ2PDYrr",
-                                             "VCVTPD2PSYrr",
-                                             "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[HWWriteResGroup102], (instrs VCVTDQ2PDYrr,
+                                          VCVTPD2PSYrr,
+                                          VCVTPD2DQYrr,
+                                          VCVTTPD2DQYrr)>;
 
 def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> {
   let Latency = 13;
@@ -1628,7 +1517,7 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[HWWriteResGroup104], (instrs VCVTDQ2PDYrm)>;
 
 def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> {
   let Latency = 6;
@@ -1678,7 +1567,7 @@ def HWWriteResGroup130 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
   let NumMicroOps = 9;
   let ResourceCycles = [1,4,1,3];
 }
-def: InstRW<[HWWriteResGroup130], (instregex "RCL8rCL")>;
+def: InstRW<[HWWriteResGroup130], (instrs RCL8rCL)>;
 
 def HWWriteResGroup131 : SchedWriteRes<[HWPort06,HWPort0156]> {
   let Latency = 11;
@@ -1706,14 +1595,14 @@ def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
   let NumMicroOps = 10;
   let ResourceCycles = [2,3,1,4];
 }
-def: InstRW<[HWWriteResGroup142], (instregex "RCR8rCL")>;
+def: InstRW<[HWWriteResGroup142], (instrs RCR8rCL)>;
 
 def HWWriteResGroup143 : SchedWriteRes<[HWPort23,HWPort0156]> {
   let Latency = 19;
   let NumMicroOps = 15;
   let ResourceCycles = [1,14];
 }
-def: InstRW<[HWWriteResGroup143], (instregex "POPF16")>;
+def: InstRW<[HWWriteResGroup143], (instrs POPF16)>;
 
 def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> {
   let Latency = 21;
@@ -1722,10 +1611,10 @@ def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort2
 }
 def: InstRW<[HWWriteResGroup144], (instrs INSB, INSL, INSW)>;
 
-def HWWriteResGroup145 : SchedWriteRes<[HWPort5]> {
-  let Latency = 16;
-  let NumMicroOps = 16;
-  let ResourceCycles = [16];
+def HWWriteResGroup145 : SchedWriteRes<[HWPort5, HWPort6]> {
+  let Latency = 8;
+  let NumMicroOps = 20;
+  let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup145], (instrs VZEROALL)>;
 
@@ -1879,20 +1768,6 @@ def HWWriteResGroup180 : SchedWriteRes<[HWPort5,HWPort01,HWPort0156]> {
 }
 def: InstRW<[HWWriteResGroup180], (instrs FNINIT)>;
 
-def HWWriteResGroup181 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156]> {
-  let Latency = 98;
-  let NumMicroOps = 32;
-  let ResourceCycles = [7,7,3,3,1,11];
-}
-def: InstRW<[HWWriteResGroup181], (instregex "DIV(16|32|64)r")>;
-
-def HWWriteResGroup182 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort06,HWPort0156]> {
-  let Latency = 112;
-  let NumMicroOps = 66;
-  let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[HWWriteResGroup182], (instregex "IDIV(16|32|64)r")>;
-
 def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort06,HWPort0156]> {
   let Latency = 115;
   let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedPredicates.td b/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
index 27aaeb193583..1c7f24375f61 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedPredicates.td
@@ -19,11 +19,16 @@
 // different zero-idioms.
 def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
 
-// A predicate used to check if an instruction is a LEA, and if it uses all
-// three source operands: base, index, and offset.
-def IsThreeOperandsLEAPredicate: CheckAll<[
-  CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
+// A predicate used to identify VPERM that have bits 3 and 7 of their mask set.
+// On some processors, these VPERM instructions are zero-idioms.
+def ZeroIdiomVPERMPredicate : CheckAll<[
+  ZeroIdiomPredicate,
+  CheckImmOperand<3, 0x88>
+]>;
 
+// A predicate used to check if a LEA instruction uses all three source
+// operands: base, index, and offset.
+def IsThreeOperandsLEAPredicate: CheckAll<[
   // isRegOperand(Base)
   CheckIsRegOperand<1>,
   CheckNot<CheckInvalidRegOperand<1>>,
@@ -42,8 +47,17 @@ def IsThreeOperandsLEAPredicate: CheckAll<[
   ]>
 ]>;
 
+def LEACases : MCOpcodeSwitchCase<
+    [LEA32r, LEA64r, LEA64_32r, LEA16r],
+    MCReturnStatement<IsThreeOperandsLEAPredicate>
+>;
+
+// Used to generate the body of a TII member function.
+def IsThreeOperandsLEABody :
+    MCOpcodeSwitchStatement<[LEACases], MCReturnStatement<FalsePred>>;
+
 // This predicate evaluates to true only if the input machine instruction is a
 // 3-operands LEA.  Tablegen automatically generates a new method for it in
 // X86GenInstrInfo.
 def IsThreeOperandsLEAFn :
-    TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;
+    TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 6b7bbdea860a..9dbf0976989f 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -67,10 +67,16 @@ def SBDivider : ProcResource<1>;
 // FP division and sqrt on port 0.
 def SBFPDivider : ProcResource<1>;
 
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 5>;
 
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -109,11 +115,25 @@ def : WriteRes<WriteZero,    []>;
 // Arithmetic.
 defm : SBWriteResPair<WriteALU,    [SBPort015], 1>;
 defm : SBWriteResPair<WriteADC,    [SBPort05,SBPort015], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteIMul,   [SBPort1],   3>;
-defm : SBWriteResPair<WriteIMul64, [SBPort1],   3>;
 
+defm : SBWriteResPair<WriteIMul8,     [SBPort1],   3>;
+defm : SBWriteResPair<WriteIMul16,    [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm,    [SBPort1,SBPort015], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd,  [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul16Reg, [SBPort1],   3>;
+defm : SBWriteResPair<WriteIMul32,    [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul32Imm, [SBPort1],   3>;
+defm : SBWriteResPair<WriteIMul32Reg, [SBPort1],   3>;
+defm : SBWriteResPair<WriteIMul64,    [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteIMul64Imm, [SBPort1],   3>;
+defm : SBWriteResPair<WriteIMul64Reg, [SBPort1],   3>;
+def  : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+defm : X86WriteRes<WriteXCHG,      [SBPort015], 2, [3], 3>;
 defm : X86WriteRes<WriteBSWAP32,   [SBPort1], 1, [1], 1>;
-defm : X86WriteRes<WriteBSWAP64,   [SBPort1,SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteBSWAP64,   [SBPort1, SBPort05], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCMPXCHG,   [SBPort05, SBPort015], 5, [1,3], 4>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>;
 
 defm : SBWriteResPair<WriteDiv8,   [SBPort0, SBDivider], 25, [1, 10]>;
 defm : SBWriteResPair<WriteDiv16,  [SBPort0, SBDivider], 25, [1, 10]>;
@@ -124,15 +144,17 @@ defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
 defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
 defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
 
-def  : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
 // SHLD/SHRD.
 defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
 defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
 defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
 defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
 
-defm : SBWriteResPair<WriteShift, [SBPort05],  1>;
+defm : SBWriteResPair<WriteShift,    [SBPort05],  1>;
+defm : SBWriteResPair<WriteShiftCL,  [SBPort05],  3, [3], 3>;
+defm : SBWriteResPair<WriteRotate,   [SBPort05],  2, [2], 2>;
+defm : SBWriteResPair<WriteRotateCL, [SBPort05],  3, [3], 3>;
+
 defm : SBWriteResPair<WriteJump,  [SBPort5],   1>;
 defm : SBWriteResPair<WriteCRC32, [SBPort1],   3, [1], 1, 5>;
 
@@ -144,8 +166,14 @@ def  : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
   let Latency = 2;
   let NumMicroOps = 3;
 }
-def  : WriteRes<WriteLAHFSAHF, [SBPort05]>;
-def  : WriteRes<WriteBitTest,[SBPort05]>;
+
+defm : X86WriteRes<WriteLAHFSAHF,        [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest,         [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [SBPort05,SBPort23], 6, [1,1], 2>;
+//defm : X86WriteRes<WriteBitTestRegLd,    [SBPort05,SBPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet,      [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>;
 
 // This is for simple LEAs with one or two input operands.
 // The complex ones can only execute on port 1, and they require two cycles on
@@ -159,10 +187,11 @@ defm : SBWriteResPair<WriteLZCNT,          [SBPort1], 3, [1], 1, 5>;
 defm : SBWriteResPair<WriteTZCNT,          [SBPort1], 3, [1], 1, 5>;
 defm : SBWriteResPair<WritePOPCNT,         [SBPort1], 3, [1], 1, 6>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 // NOTE: These don't exist on Sandy Bridge. Ports are guesses.
 defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
+defm : SBWriteResPair<WriteBLS,   [SBPort015], 1>;
+defm : SBWriteResPair<WriteBZHI,  [SBPort1], 1>;
 
 // Scalar and vector floating point.
 defm : X86WriteRes<WriteFLD0,          [SBPort5], 1, [1], 1>;
@@ -577,21 +606,21 @@ def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
-                                           "MMX_PADDQirr",
-                                           "MMX_PALIGNRrri",
-                                           "MMX_PSIGN(B|D|W)rr")>;
+def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
+                                        MMX_PABSDrr,
+                                        MMX_PABSWrr,
+                                        MMX_PADDQirr,
+                                        MMX_PALIGNRrri,
+                                        MMX_PSIGNBrr,
+                                        MMX_PSIGNDrr,
+                                        MMX_PSIGNWrr)>;
 
 def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SBWriteResGroup9], (instregex "ROL(8|16|32|64)r1",
-                                           "ROL(8|16|32|64)ri",
-                                           "ROR(8|16|32|64)r1",
-                                           "ROR(8|16|32|64)ri",
-                                           "SET(A|BE)r")>;
+def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>;
 
 def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
   let Latency = 2;
@@ -608,10 +637,7 @@ def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup12], (instregex "(V?)COMISDrr",
-                                            "(V?)COMISSrr",
-                                            "(V?)UCOMISDrr",
-                                            "(V?)UCOMISSrr")>;
+def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
 
 def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
   let Latency = 2;
@@ -626,22 +652,15 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
+                                         MMX_MOVDQ2Qrr)>;
 
 def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup21], (instregex "PUSHFS64")>;
-
-def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup21_16i], (instrs IMUL16rri, IMUL16rri8)>;
+def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
 
 def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
   let Latency = 3;
@@ -650,25 +669,13 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
 }
 def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
 
-def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[SBWriteResGroup23_2], (instregex "ROL(8|16|32|64)rCL",
-                                              "ROR(8|16|32|64)rCL",
-                                              "SAR(8|16|32|64)rCL",
-                                              "SHL(8|16|32|64)rCL",
-                                              "SHR(8|16|32|64)rCL")>;
-
-def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
+def SBWriteResGroup23 : SchedWriteRes<[SBPort05]> {
   let Latency = 2;
   let NumMicroOps = 3;
   let ResourceCycles = [3];
 }
-def: InstRW<[SBWriteResGroup25], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
-                                         XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
-                                         XCHG16ar, XCHG32ar, XCHG64ar)>;
+def: InstRW<[SBWriteResGroup23], (instregex "RCL(8|16|32|64)r1",
+                                            "RCR(8|16|32|64)r1")>;
 
 def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
   let Latency = 7;
@@ -684,33 +691,12 @@ def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
 }
 def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
 
-def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup27], (instrs IMUL64r, MUL64r)>;
-
-def SBWriteResGroup27_1 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
-  let Latency = 4;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup27_1], (instrs IMUL32r, MUL32r)>;
-
-def SBWriteResGroup27_2 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
-  let Latency = 4;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup27_2], (instrs IMUL16r, MUL16r)>;
-
 def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
   let Latency = 4;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
+def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
 
 def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
   let Latency = 4;
@@ -724,7 +710,6 @@ def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup30], (instregex "(V?)PCMPGTQrr")>;
 
 def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
   let Latency = 5;
@@ -734,6 +719,14 @@ def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
 def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm(8|16|32)",
                                             "MOVZX(16|32|64)rm(8|16)")>;
 
+def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
+  let Latency = 5;
+  let NumMicroOps = 8;
+  let ResourceCycles = [8];
+}
+def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)r(i|CL)",
+                                            "RCR(8|16|32|64)r(i|CL)")>;
+
 def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
   let Latency = 5;
   let NumMicroOps = 2;
@@ -753,8 +746,8 @@ def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m",
-                                              "PUSHGS64")>;
+def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
+def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
 
 def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
   let Latency = 5;
@@ -779,13 +772,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
 }
 def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
 
-def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> {
-  let Latency = 5;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
-}
-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
 def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let Latency = 3;
   let NumMicroOps = 4;
@@ -820,9 +806,9 @@ def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm",
-                                            "POP(16|32|64)r",
-                                            "VBROADCASTSSrm",
+def: InstRW<[SBWriteResGroup48], (instrs MMX_MOVD64from64rm,
+                                         VBROADCASTSSrm)>;
+def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
                                             "(V?)MOV64toPQIrm",
                                             "(V?)MOVDDUPrm",
                                             "(V?)MOVDI2PDIrm",
@@ -837,23 +823,20 @@ def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup49], (instregex "MOV16sm")>;
-
-def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort05]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup50], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
 
 def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABS(B|D|W)rm",
-                                            "MMX_PALIGNRrmi",
-                                            "MMX_PSIGN(B|D|W)rm")>;
+def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
+                                         MMX_PABSDrm,
+                                         MMX_PABSWrm,
+                                         MMX_PALIGNRrmi,
+                                         MMX_PSIGNBrm,
+                                         MMX_PSIGNDrm,
+                                         MMX_PSIGNWrm)>;
 
 def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
   let Latency = 6;
@@ -875,11 +858,11 @@ def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm",
-                                            "VBROADCASTSSYrm",
-                                            "VMOVDDUPYrm",
-                                            "VMOVSHDUPYrm",
-                                            "VMOVSLDUPYrm")>;
+def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
+                                         VBROADCASTSSYrm,
+                                         VMOVDDUPYrm,
+                                         VMOVSHDUPYrm,
+                                         VMOVSLDUPYrm)>;
 
 def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 7;
@@ -893,14 +876,14 @@ def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
+def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQirm)>;
 
 def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
   let Latency = 7;
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SBWriteResGroup62], (instregex "VER(R|W)m")>;
+def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
 
 def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
   let Latency = 7;
@@ -944,15 +927,9 @@ def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let NumMicroOps = 4;
   let ResourceCycles = [1,2,1];
 }
-def: InstRW<[SBWriteResGroup69], (instregex "BTC(16|32|64)mi8",
-                                            "BTR(16|32|64)mi8",
-                                            "BTS(16|32|64)mi8",
-                                            "SAR(8|16|32|64)m1",
-                                            "SAR(8|16|32|64)mi",
-                                            "SHL(8|16|32|64)m1",
-                                            "SHL(8|16|32|64)mi",
-                                            "SHR(8|16|32|64)m1",
-                                            "SHR(8|16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
+                                            "SHL(8|16|32|64)m(1|i)",
+                                            "SHR(8|16|32|64)m(1|i)")>;
 
 def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
   let Latency = 8;
@@ -961,12 +938,12 @@ def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
 }
 def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
 
-def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1, 2, 1];
 }
-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16|32|64)rm")>;
+def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
 
 def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
   let Latency = 8;
@@ -990,10 +967,8 @@ def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
   let NumMicroOps = 5;
   let ResourceCycles = [1,2,2];
 }
-def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m1",
-                                            "ROL(8|16|32|64)mi",
-                                            "ROR(8|16|32|64)m1",
-                                            "ROR(8|16|32|64)mi")>;
+def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
+                                            "ROR(8|16|32|64)m(1|i)")>;
 
 def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
   let Latency = 8;
@@ -1015,36 +990,7 @@ def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)SD2SI(64)?rm",
-                                            "CVT(T?)SS2SI(64)?rm")>;
-
-def SBWriteResGroup93_1 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_1], (instrs IMUL64m, MUL64m)>;
-
-def SBWriteResGroup93_2 : SchedWriteRes<[SBPort1,SBPort23,SBPort05,SBPort015]> {
-  let Latency = 9;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_2], (instrs IMUL32m, MUL32m)>;
-
-def SBWriteResGroup93_3 : SchedWriteRes<[SBPort1,SBPort05,SBPort015,SBPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SBWriteResGroup93_3], (instrs IMUL16m, MUL16m)>;
-
-def SBWriteResGroup93_4 : SchedWriteRes<[SBPort1,SBPort015,SBPort23]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_4], (instrs IMUL16rmi, IMUL16rmi8)>;
+def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>;
 
 def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
   let Latency = 9;
@@ -1092,10 +1038,7 @@ def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort
   let NumMicroOps = 6;
   let ResourceCycles = [1,1,2,1,1];
 }
-def: InstRW<[SBWriteResGroup100], (instregex "BT(16|32|64)mr",
-                                             "BTC(16|32|64)mr",
-                                             "BTR(16|32|64)mr",
-                                             "BTS(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
 
 def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
   let Latency = 10;
@@ -1119,6 +1062,14 @@ def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
 }
 def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
 
+def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
+  let Latency = 11;
+  let NumMicroOps = 11;
+  let ResourceCycles = [7,4];
+}
+def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
+                                             "RCR(8|16|32|64)m")>;
+
 def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
   let Latency = 12;
   let NumMicroOps = 2;
@@ -1154,6 +1105,71 @@ def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
 }
 def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
 
+def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
+  let Latency = 9;
+  let NumMicroOps = 20;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
+
+def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
+  let Latency = 1;
+  let NumMicroOps = 4;
+  let ResourceCycles = [];
+}
+def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
+
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SBWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def SBWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                         XOR32rr, XOR64rr)>;
+
+def SBWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                          VXORPDrr)>;
+
+def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def SBWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                              PSUBDrr, VPSUBDrr,
+                                              PSUBQrr, VPSUBQrr,
+                                              PSUBWrr, VPSUBWrr,
+                                              PCMPGTBrr, VPCMPGTBrr,
+                                              PCMPGTDrr, VPCMPGTDrr,
+                                              PCMPGTWrr, VPCMPGTWrr)>;
+
+def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SBWriteResGroup30]>
+]>;
+def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
+
 } // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index bda088e1512f..2c9eb7516085 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -71,10 +71,16 @@ def SKLPortAny : ProcResGroup<[SKLPort0, SKLPort1, SKLPort2, SKLPort3, SKLPort4,
   let BufferSize=60;
 }
 
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 5>;
 
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -107,24 +113,47 @@ def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>;
 // Arithmetic.
 defm : SKLWriteResPair<WriteALU,    [SKLPort0156], 1>; // Simple integer ALU op.
 defm : SKLWriteResPair<WriteADC,    [SKLPort06],   1>; // Integer ALU + flags op.
-defm : SKLWriteResPair<WriteIMul,   [SKLPort1],    3>; // Integer multiplication.
-defm : SKLWriteResPair<WriteIMul64, [SKLPort1],    3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKLWriteResPair<WriteIMul8,     [SKLPort1],   3>;
+defm : SKLWriteResPair<WriteIMul16,    [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm,     [SKLPort1,SKLPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd,   [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1],   3>;
+defm : SKLWriteResPair<WriteIMul32,    [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1],   3>;
+defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1],   3>;
+defm : SKLWriteResPair<WriteIMul64,    [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1],   3>;
+defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1],   3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
 
 defm : X86WriteRes<WriteBSWAP32,    [SKLPort15], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64,    [SKLPort06, SKLPort15], 2, [1,1], 2>;
-
-defm : SKLWriteResPair<WriteDiv8,   [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv16,  [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv32,  [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteDiv64,  [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv8,  [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv32, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
-defm : SKLWriteResPair<WriteIDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteCMPXCHG,[SKLPort06, SKLPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SKLPort23,SKLPort06,SKLPort0156,SKLPort237,SKLPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [SKLPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the SKLDivider used?
+defm : SKLWriteResPair<WriteDiv8,   [SKLPort0,SKLDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16,      [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32,      [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64,      [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld,    [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld,    [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld,    [SKLPort0,SKLPort23,SKLDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8,    [SKLPort0,SKLDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16,   [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32,   [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64,   [SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld,  [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [SKLPort0,SKLPort5,SKLPort23,SKLPort0156], 28, [2,4,1,1], 8>;
 
 defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
 
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
 def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
 
 defm : SKLWriteResPair<WriteCMOV,  [SKLPort06], 1, [1], 1>; // Conditional move.
@@ -135,8 +164,14 @@ def  : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
   let Latency = 2;
   let NumMicroOps = 3;
 }
-def  : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
-def  : WriteRes<WriteBitTest,[SKLPort06]>; //
+
+defm : X86WriteRes<WriteLAHFSAHF,        [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest,         [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [SKLPort06,SKLPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd,    [SKLPort0156,SKLPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet,      [SKLPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SKLPort06,SKLPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SKLPort0156,SKLPort23], 5, [1,1], 2>;
 
 // Bit counts.
 defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -146,7 +181,10 @@ defm : SKLWriteResPair<WriteTZCNT,          [SKLPort1], 3>;
 defm : SKLWriteResPair<WritePOPCNT,         [SKLPort1], 3>;
 
 // Integer shifts and rotates.
-defm : SKLWriteResPair<WriteShift, [SKLPort06],  1>;
+defm : SKLWriteResPair<WriteShift,    [SKLPort06],  1>;
+defm : SKLWriteResPair<WriteShiftCL,  [SKLPort06],  3, [3], 3>;
+defm : SKLWriteResPair<WriteRotate,   [SKLPort06],  2, [2], 2>;
+defm : SKLWriteResPair<WriteRotateCL, [SKLPort06],  3, [3], 3>;
 
 // SHLD/SHRD.
 defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>;
@@ -154,9 +192,10 @@ defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1],
 defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>;
 defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
-defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
+defm : SKLWriteResPair<WriteBLS,   [SKLPort15], 1>;
+defm : SKLWriteResPair<WriteBZHI,  [SKLPort15], 1>;
 
 // Loads, stores, and moves, not folded with other operations.
 defm : X86WriteRes<WriteLoad,    [SKLPort23], 5, [1], 1>;
@@ -612,10 +651,7 @@ def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr",
-                                            "BLSI(32|64)rr",
-                                            "BLSMSK(32|64)rr",
-                                            "BLSR(32|64)rr")>;
+def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr")>;
 
 def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
   let Latency = 1;
@@ -632,47 +668,42 @@ def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup10], (instrs CBW, CWDE, CDQE,
-                                          CMC, STC)>;
-def: InstRW<[SKLWriteResGroup10], (instregex "SGDT64m",
-                                             "SIDT64m",
-                                             "SMSW16m",
-                                             "STRm",
-                                             "SYSCALL")>;
+                                          CMC, STC,
+                                          SGDT64m,
+                                          SIDT64m,
+                                          SMSW16m,
+                                          STRm,
+                                          SYSCALL)>;
 
 def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
   let Latency = 1;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
-                                             "ST_FP(32|64|80)m",
-                                             "VMPTRSTm")>;
+def: InstRW<[SKLWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP(32|64|80)m")>;
 
 def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SKLWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
 
 def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP)>;
-def: InstRW<[SKLWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
+                                          MMX_MOVDQ2Qrr)>;
 
 def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKLWriteResGroup15], (instregex "ROL(8|16|32|64)r1",
-                                             "ROL(8|16|32|64)ri",
-                                             "ROR(8|16|32|64)r1",
-                                             "ROR(8|16|32|64)ri",
-                                             "SET(A|BE)r")>;
+def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>;
 
 def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
   let Latency = 2;
@@ -702,11 +733,10 @@ def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup23], (instrs CWD)>;
-def: InstRW<[SKLWriteResGroup23], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8",
-                                             "ADC8ri",
-                                             "SBB8i8",
+def: InstRW<[SKLWriteResGroup23], (instrs CWD,
+                                          JCXZ, JECXZ, JRCXZ,
+                                          ADC8i8, SBB8i8)>;
+def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri",
                                              "SBB8ri")>;
 
 def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
@@ -728,10 +758,9 @@ def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[SKLWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
                                           STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr",
-                                             "PUSH64i8")>;
+def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
 
 def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
   let Latency = 3;
@@ -741,21 +770,13 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> {
 def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
                                              "PEXT(32|64)rr")>;
 
-def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup29_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
 def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
-                                             "VPBROADCASTBrr",
-                                             "VPBROADCASTWrr",
+                                             "VPBROADCAST(B|W)rr",
                                              "(V?)PCMPGTQ(Y?)rr")>;
 
 def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
@@ -765,26 +786,6 @@ def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
 }
 def: InstRW<[SKLWriteResGroup32], (instrs FNSTSW16r)>;
 
-def SKLWriteResGroup33 : SchedWriteRes<[SKLPort06]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[SKLWriteResGroup33], (instregex "ROL(8|16|32|64)rCL",
-                                             "ROR(8|16|32|64)rCL",
-                                             "SAR(8|16|32|64)rCL",
-                                             "SHL(8|16|32|64)rCL",
-                                             "SHR(8|16|32|64)rCL")>;
-
-def SKLWriteResGroup34 : SchedWriteRes<[SKLPort0156]> {
-  let Latency = 2;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[SKLWriteResGroup34], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
-                                          XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
-                                          XCHG16ar, XCHG32ar, XCHG64ar)>;
-
 def SKLWriteResGroup35 : SchedWriteRes<[SKLPort0,SKLPort5]> {
   let Latency = 3;
   let NumMicroOps = 3;
@@ -805,9 +806,9 @@ def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSDWirr",
-                                             "MMX_PACKSSWBirr",
-                                             "MMX_PACKUSWBirr")>;
+def: InstRW<[SKLWriteResGroup39], (instrs MMX_PACKSSDWirr,
+                                          MMX_PACKSSWBirr,
+                                          MMX_PACKUSWBirr)>;
 
 def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
   let Latency = 3;
@@ -828,10 +829,8 @@ def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r1",
-                                             "RCL(8|16|32|64)ri",
-                                             "RCR(8|16|32|64)r1",
-                                             "RCR(8|16|32|64)ri")>;
+def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r(1|i)",
+                                             "RCR(8|16|32|64)r(1|i)")>;
 
 def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
   let Latency = 3;
@@ -876,20 +875,6 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> {
 def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
                                              "(V?)CVT(T?)PS2DQ(Y?)rr")>;
 
-def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>;
-
 def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
   let Latency = 4;
   let NumMicroOps = 3;
@@ -912,10 +897,10 @@ def SKLWriteResGroup55 : SchedWriteRes<[SKLPort6,SKLPort0156]> {
 }
 def: InstRW<[SKLWriteResGroup55], (instrs PAUSE)>;
 
-def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> {
-  let Latency = 4;
+def SKLWriteResGroup56 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[SKLWriteResGroup56], (instrs VZEROUPPER)>;
 
@@ -931,11 +916,8 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
-                                             "MOVSX(16|32|64)rm32",
-                                             "MOVSX(16|32|64)rm8",
-                                             "MOVZX(16|32|64)rm16",
-                                             "MOVZX(16|32|64)rm8",
+def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+                                             "MOVZX(16|32|64)rm(8|16)",
                                              "(V?)MOVDDUPrm")>; // TODO: Should this be SKLWriteResGroup67?
 
 def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
@@ -943,8 +925,9 @@ def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup59], (instregex "MMX_CVTPI2PDirr",
-                                             "(V?)CVTDQ2PDrr")>;
+def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDirr,
+                                          CVTDQ2PDrr,
+                                          VCVTDQ2PDrr)>;
 
 def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> {
   let Latency = 5;
@@ -969,13 +952,6 @@ def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> {
 }
 def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>;
 
-def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
 def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
   let Latency = 5;
   let NumMicroOps = 5;
@@ -983,13 +959,6 @@ def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
 }
 def: InstRW<[SKLWriteResGroup63], (instrs XSETBV)>;
 
-def SKLWriteResGroup64 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
-  let Latency = 5;
-  let NumMicroOps = 5;
-  let ResourceCycles = [2,3];
-}
-def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
 def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> {
   let Latency = 5;
   let NumMicroOps = 6;
@@ -1002,44 +971,44 @@ def SKLWriteResGroup67 : SchedWriteRes<[SKLPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKLWriteResGroup67], (instregex "VBROADCASTSSrm",
-                                             "(V?)MOVSHDUPrm",
-                                             "(V?)MOVSLDUPrm",
-                                             "VPBROADCASTDrm",
-                                             "VPBROADCASTQrm")>;
+def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm,
+                                          VPBROADCASTDrm,
+                                          VPBROADCASTQrm)>;
+def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
+                                             "(V?)MOVSLDUPrm")>;
 
 def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKLWriteResGroup68], (instregex "MMX_CVTPI2PSirr")>;
+def: InstRW<[SKLWriteResGroup68], (instrs MMX_CVTPI2PSirr)>;
 
 def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSBirm",
-                                             "MMX_PADDSWirm",
-                                             "MMX_PADDUSBirm",
-                                             "MMX_PADDUSWirm",
-                                             "MMX_PAVGBirm",
-                                             "MMX_PAVGWirm",
-                                             "MMX_PCMPEQBirm",
-                                             "MMX_PCMPEQDirm",
-                                             "MMX_PCMPEQWirm",
-                                             "MMX_PCMPGTBirm",
-                                             "MMX_PCMPGTDirm",
-                                             "MMX_PCMPGTWirm",
-                                             "MMX_PMAXSWirm",
-                                             "MMX_PMAXUBirm",
-                                             "MMX_PMINSWirm",
-                                             "MMX_PMINUBirm",
-                                             "MMX_PSUBSBirm",
-                                             "MMX_PSUBSWirm",
-                                             "MMX_PSUBUSBirm",
-                                             "MMX_PSUBUSWirm")>;
+def: InstRW<[SKLWriteResGroup69], (instrs MMX_PADDSBirm,
+                                          MMX_PADDSWirm,
+                                          MMX_PADDUSBirm,
+                                          MMX_PADDUSWirm,
+                                          MMX_PAVGBirm,
+                                          MMX_PAVGWirm,
+                                          MMX_PCMPEQBirm,
+                                          MMX_PCMPEQDirm,
+                                          MMX_PCMPEQWirm,
+                                          MMX_PCMPGTBirm,
+                                          MMX_PCMPGTDirm,
+                                          MMX_PCMPGTWirm,
+                                          MMX_PMAXSWirm,
+                                          MMX_PMAXUBirm,
+                                          MMX_PMINSWirm,
+                                          MMX_PMINUBirm,
+                                          MMX_PSUBSBirm,
+                                          MMX_PSUBSWirm,
+                                          MMX_PSUBUSBirm,
+                                          MMX_PSUBUSWirm)>;
 
 def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort01]> {
   let Latency = 6;
@@ -1054,15 +1023,8 @@ def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64",
-                                             "JMP(16|32|64)m")>;
-
-def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SKLWriteResGroup72], (instrs FARJMP64)>;
+def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>;
 
 def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
   let Latency = 6;
@@ -1070,9 +1032,6 @@ def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm",
-                                             "BLSI(32|64)rm",
-                                             "BLSMSK(32|64)rm",
-                                             "BLSR(32|64)rm",
                                              "MOVBE(16|32|64)rm")>;
 
 def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
@@ -1102,15 +1061,9 @@ def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]
   let NumMicroOps = 4;
   let ResourceCycles = [1,1,1,1];
 }
-def: InstRW<[SKLWriteResGroup82], (instregex "BTC(16|32|64)mi8",
-                                             "BTR(16|32|64)mi8",
-                                             "BTS(16|32|64)mi8",
-                                             "SAR(8|16|32|64)m1",
-                                             "SAR(8|16|32|64)mi",
-                                             "SHL(8|16|32|64)m1",
-                                             "SHL(8|16|32|64)mi",
-                                             "SHR(8|16|32|64)m1",
-                                             "SHR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup82], (instregex "SAR(8|16|32|64)m(1|i)",
+                                             "SHL(8|16|32|64)m(1|i)",
+                                             "SHR(8|16|32|64)m(1|i)")>;
 
 def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
   let Latency = 6;
@@ -1132,23 +1085,23 @@ def SKLWriteResGroup85 : SchedWriteRes<[SKLPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m",
-                                             "VBROADCASTF128",
-                                             "VBROADCASTI128",
-                                             "VBROADCASTSDYrm",
-                                             "VBROADCASTSSYrm",
-                                             "VMOVDDUPYrm",
-                                             "VMOVSHDUPYrm",
-                                             "VMOVSLDUPYrm",
-                                             "VPBROADCASTDYrm",
-                                             "VPBROADCASTQYrm")>;
+def: InstRW<[SKLWriteResGroup85], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128,
+                                          VBROADCASTI128,
+                                          VBROADCASTSDYrm,
+                                          VBROADCASTSSYrm,
+                                          VMOVDDUPYrm,
+                                          VMOVSHDUPYrm,
+                                          VMOVSLDUPYrm,
+                                          VPBROADCASTDYrm,
+                                          VPBROADCASTQYrm)>;
 
 def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> {
   let Latency = 7;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SKLWriteResGroup86], (instrs VCVTDQ2PDYrr)>;
 
 def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 6;
@@ -1167,19 +1120,21 @@ def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr",
-                                             "VCVTPS2PDYrr",
-                                             "VCVT(T?)PD2DQYrr")>;
+def: InstRW<[SKLWriteResGroup89], (instrs VCVTPD2PSYrr,
+                                          VCVTPS2PDYrr,
+                                          VCVTPD2DQYrr,
+                                          VCVTTPD2DQYrr)>;
 
 def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
   let Latency = 7;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm",
-                                             "(V?)INSERTI128rm",
-                                             "(V?)PADD(B|D|Q|W)rm",
-                                             "(V?)PBLENDDrmi",
+def: InstRW<[SKLWriteResGroup91], (instrs VINSERTF128rm,
+                                          VINSERTI128rm,
+                                          VPBLENDDrmi)>;
+def: InstRW<[SKLWriteResGroup91, ReadAfterVecXLd],
+                                  (instregex "(V?)PADD(B|D|Q|W)rm",
                                              "(V?)PSUB(B|D|Q|W)rm")>;
 
 def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
@@ -1187,9 +1142,9 @@ def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSDWirm",
-                                             "MMX_PACKSSWBirm",
-                                             "MMX_PACKUSWBirm")>;
+def: InstRW<[SKLWriteResGroup92], (instrs MMX_PACKSSDWirm,
+                                          MMX_PACKSSWBirm,
+                                          MMX_PACKUSWBirm)>;
 
 def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> {
   let Latency = 7;
@@ -1225,10 +1180,8 @@ def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m1",
-                                              "ROL(8|16|32|64)mi",
-                                              "ROR(8|16|32|64)m1",
-                                              "ROR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
+                                              "ROR(8|16|32|64)m(1|i)")>;
 
 def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
   let Latency = 7;
@@ -1242,8 +1195,8 @@ def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,1,1];
 }
-def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m",
-                                              "FARCALL64")>;
+def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64)>;
 
 def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> {
   let Latency = 7;
@@ -1260,39 +1213,26 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> {
 def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
                                               "PEXT(32|64)rm")>;
 
-def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort06, SKLPort0156, SKLPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>;
-
 def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m",
-                                              "VPBROADCASTBYrm",
-                                              "VPBROADCASTWYrm",
-                                              "VPMOVSXBDYrm",
-                                              "VPMOVSXBQYrm",
-                                              "VPMOVSXWQYrm")>;
+def: InstRW<[SKLWriteResGroup108], (instregex "FCOM(P?)(32|64)m")>;
+def: InstRW<[SKLWriteResGroup108], (instrs VPBROADCASTBYrm,
+                                           VPBROADCASTWYrm,
+                                           VPMOVSXBDYrm,
+                                           VPMOVSXBQYrm,
+                                           VPMOVSXWQYrm)>;
 
 def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup110], (instregex "VPADD(B|D|Q|W)Yrm",
-                                              "VPBLENDDYrmi",
+def: InstRW<[SKLWriteResGroup110], (instrs VPBLENDDYrmi)>;
+def: InstRW<[SKLWriteResGroup110, ReadAfterVecYLd],
+                                   (instregex "VPADD(B|D|Q|W)Yrm",
                                               "VPSUB(B|D|Q|W)Yrm")>;
 
 def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
@@ -1302,22 +1242,13 @@ def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
 }
 def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PH(ADD|SUB)SWrm")>;
 
-def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKLWriteResGroup115], (instregex "ROR(8|16|32|64)mCL")>;
-
 def SKLWriteResGroup116 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
   let Latency = 8;
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m1",
-                                              "RCL(8|16|32|64)mi",
-                                              "RCR(8|16|32|64)m1",
-                                              "RCR(8|16|32|64)mi")>;
+def: InstRW<[SKLWriteResGroup116], (instregex "RCL(8|16|32|64)m(1|i)",
+                                              "RCR(8|16|32|64)m(1|i)")>;
 
 def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
   let Latency = 8;
@@ -1325,6 +1256,7 @@ def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
   let ResourceCycles = [1,1,1,3];
 }
 def: InstRW<[SKLWriteResGroup117], (instregex "ROL(8|16|32|64)mCL",
+                                              "ROR(8|16|32|64)mCL",
                                               "SAR(8|16|32|64)mCL",
                                               "SHL(8|16|32|64)mCL",
                                               "SHR(8|16|32|64)mCL")>;
@@ -1335,25 +1267,25 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
   let ResourceCycles = [1,1,1,2,1];
 }
 def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>;
-def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(8|16|32|64)rm")>;
 
 def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> {
   let Latency = 9;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup120], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SKLWriteResGroup120], (instrs MMX_CVTPI2PSirm)>;
 
 def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 9;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup121], (instregex "(V?)PCMPGTQrm",
-                                              "VPMOVSXBWYrm",
-                                              "VPMOVSXDQYrm",
-                                              "VPMOVSXWDYrm",
-                                              "VPMOVZXWDYrm")>;
+def: InstRW<[SKLWriteResGroup121], (instrs PCMPGTQrm,
+                                           VPCMPGTQrm,
+                                           VPMOVSXBWYrm,
+                                           VPMOVSXDQYrm,
+                                           VPMOVSXWDYrm,
+                                           VPMOVZXWDYrm)>;
 
 def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
   let Latency = 9;
@@ -1363,13 +1295,6 @@ def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> {
 def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
                                               "(V?)CVTPS2PDrm")>;
 
-def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
 def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
   let Latency = 9;
   let NumMicroOps = 4;
@@ -1392,8 +1317,8 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
-                                              "ILD_F(16|32|64)m",
-                                              "VPCMPGTQYrm")>;
+                                              "ILD_F(16|32|64)m")>;
+def: InstRW<[SKLWriteResGroup133], (instrs VPCMPGTQYrm)>;
 
 def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
   let Latency = 10;
@@ -1410,7 +1335,7 @@ def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKLWriteResGroup138], (instregex "MMX_CVTPI2PDirm")>;
+def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDirm)>;
 
 def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
   let Latency = 10;
@@ -1424,15 +1349,8 @@ def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
   let NumMicroOps = 4;
   let ResourceCycles = [2,1,1];
 }
-def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWYrm",
-                                              "VPHSUBSWYrm")>;
-
-def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
-  let Latency = 9;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup142], (instrs IMUL32m, MUL32m, MULX32rm)>;
+def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm,
+                                           VPHSUBSWYrm)>;
 
 def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
   let Latency = 10;
@@ -1460,9 +1378,10 @@ def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
-                                              "VCVTPS2PDYrm",
-                                              "VCVT(T?)PS2DQYrm")>;
+def: InstRW<[SKLWriteResGroup147], (instrs VCVTDQ2PSYrm,
+                                           VCVTPS2PDYrm,
+                                           VCVTPS2DQYrm,
+                                           VCVTTPS2DQYrm)>;
 
 def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 11;
@@ -1493,9 +1412,11 @@ def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm",
-                                              "CVT(T?)PD2DQrm",
-                                              "MMX_CVT(T?)PD2PIirm")>;
+def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2PSrm,
+                                           CVTPD2DQrm,
+                                           CVTTPD2DQrm,
+                                           MMX_CVTPD2PIirm,
+                                           MMX_CVTTPD2PIirm)>;
 
 def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
   let Latency = 11;
@@ -1510,7 +1431,7 @@ def SKLWriteResGroup155 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort015
   let NumMicroOps = 9;
   let ResourceCycles = [1,5,1,2];
 }
-def: InstRW<[SKLWriteResGroup155], (instregex "RCL8rCL")>;
+def: InstRW<[SKLWriteResGroup155], (instrs RCL8rCL)>;
 
 def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
   let Latency = 11;
@@ -1538,7 +1459,7 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SKLWriteResGroup163], (instrs VCVTDQ2PDYrm)>;
 
 def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
   let Latency = 14;
@@ -1567,7 +1488,7 @@ def SKLWriteResGroup170 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort015
   let NumMicroOps = 10;
   let ResourceCycles = [2,4,1,3];
 }
-def: InstRW<[SKLWriteResGroup170], (instregex "RCR8rCL")>;
+def: InstRW<[SKLWriteResGroup170], (instrs RCR8rCL)>;
 
 def SKLWriteResGroup171 : SchedWriteRes<[SKLPort0]> {
   let Latency = 15;
@@ -1723,13 +1644,6 @@ def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> {
 }
 def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F(32|64)m")>;
 
-def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> {
-  let Latency = 28;
-  let NumMicroOps = 8;
-  let ResourceCycles = [2,4,1,1];
-}
-def: InstRW<[SKLWriteResGroup207], (instregex "IDIV(8|16|32|64)m")>;
-
 def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
   let Latency = 30;
   let NumMicroOps = 3;
@@ -1824,20 +1738,6 @@ def SKLWriteResGroup220 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> {
 }
 def: InstRW<[SKLWriteResGroup220], (instrs FNINIT)>;
 
-def SKLWriteResGroup221 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
-  let Latency = 76;
-  let NumMicroOps = 32;
-  let ResourceCycles = [7,2,8,3,1,11];
-}
-def: InstRW<[SKLWriteResGroup221], (instregex "DIV(16|32|64)r")>;
-
-def SKLWriteResGroup222 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
-  let Latency = 102;
-  let NumMicroOps = 66;
-  let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[SKLWriteResGroup222], (instregex "IDIV(16|32|64)r")>;
-
 def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> {
   let Latency = 106;
   let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 9d5f8555c505..ec8e4db02d8a 100755
--- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -71,10 +71,16 @@ def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4,
   let BufferSize=60;
 }
 
-// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 5>;
 
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -107,24 +113,48 @@ def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>;
 // Arithmetic.
 defm : SKXWriteResPair<WriteALU,    [SKXPort0156], 1>; // Simple integer ALU op.
 defm : SKXWriteResPair<WriteADC,    [SKXPort06],   1>; // Integer ALU + flags op.
-defm : SKXWriteResPair<WriteIMul,   [SKXPort1],    3>; // Integer multiplication.
-defm : SKXWriteResPair<WriteIMul64, [SKXPort1],    3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKXWriteResPair<WriteIMul8,     [SKXPort1],   3>;
+defm : SKXWriteResPair<WriteIMul16,    [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm,     [SKXPort1,SKXPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd,   [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteIMul16Reg,     [SKXPort1],   3, [1], 1>;
+defm : X86WriteRes<WriteIMul16RegLd,   [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32,    [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1],   3>;
+defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1],   3>;
+defm : SKXWriteResPair<WriteIMul64,    [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1],   3>;
+defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1],   3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
 
 defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
-
-defm : SKXWriteResPair<WriteDiv8,   [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv16,  [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv32,  [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteDiv64,  [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv8,  [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv32, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
-defm : SKXWriteResPair<WriteIDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG,       [SKXPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the SKXDivider used?
+defm : SKXWriteResPair<WriteDiv8,  [SKXPort0, SKXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16,     [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32,     [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64,     [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld,   [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld,   [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld,   [SKXPort0,SKXPort23,SKXDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8,     [SKXPort0, SKXDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16,    [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32,    [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64,    [SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld,   [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld,  [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld,  [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld,  [SKXPort0,SKXPort5,SKXPort23,SKXPort0156], 28, [2,4,1,1], 8>;
 
 defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
 
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
 def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
 
 defm : SKXWriteResPair<WriteCMOV,  [SKXPort06], 1, [1], 1>; // Conditional move.
@@ -135,11 +165,19 @@ def  : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
   let Latency = 2;
   let NumMicroOps = 3;
 }
-def  : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
-def  : WriteRes<WriteBitTest,[SKXPort06]>; //
+defm : X86WriteRes<WriteLAHFSAHF,        [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest,         [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [SKXPort06,SKXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd,    [SKXPort0156,SKXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet,      [SKXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SKXPort06,SKXPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>;
 
 // Integer shifts and rotates.
-defm : SKXWriteResPair<WriteShift, [SKXPort06],  1>;
+defm : SKXWriteResPair<WriteShift,    [SKXPort06],  1>;
+defm : SKXWriteResPair<WriteShiftCL,  [SKXPort06],  3, [3], 3>;
+defm : SKXWriteResPair<WriteRotate,   [SKXPort06],  2, [2], 2>;
+defm : SKXWriteResPair<WriteRotateCL, [SKXPort06],  3, [3], 3>;
 
 // SHLD/SHRD.
 defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>;
@@ -154,9 +192,10 @@ defm : SKXWriteResPair<WriteLZCNT,          [SKXPort1], 3>;
 defm : SKXWriteResPair<WriteTZCNT,          [SKXPort1], 3>;
 defm : SKXWriteResPair<WritePOPCNT,         [SKXPort1], 3>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
-defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
+defm : SKXWriteResPair<WriteBLS,   [SKXPort15], 1>;
+defm : SKXWriteResPair<WriteBZHI,  [SKXPort15], 1>;
 
 // Loads, stores, and moves, not folded with other operations.
 defm : X86WriteRes<WriteLoad,    [SKXPort23], 5, [1], 1>;
@@ -625,10 +664,7 @@ def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr",
-                                            "BLSI(32|64)rr",
-                                            "BLSMSK(32|64)rr",
-                                            "BLSR(32|64)rr")>;
+def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
 
 def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> {
   let Latency = 1;
@@ -655,48 +691,43 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE,
-                                          CMC, STC)>;
-def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m",
-                                             "SIDT64m",
-                                             "SMSW16m",
-                                             "STRm",
-                                             "SYSCALL")>;
+                                          CMC, STC,
+                                          SGDT64m,
+                                          SIDT64m,
+                                          SMSW16m,
+                                          STRm,
+                                          SYSCALL)>;
 
 def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
   let Latency = 1;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
-                                             "KMOV(B|D|Q|W)mk",
-                                             "ST_FP(32|64|80)m",
-                                             "VMPTRSTm")>;
+def: InstRW<[SKXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[SKXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
+                                             "ST_FP(32|64|80)m")>;
 
 def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
+def: InstRW<[SKXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
 
 def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP)>;
-def: InstRW<[SKXWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>;
+def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
+                                          MMX_MOVDQ2Qrr)>;
 
 def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> {
   let Latency = 2;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKXWriteResGroup15], (instregex "ROL(8|16|32|64)r1",
-                                             "ROL(8|16|32|64)ri",
-                                             "ROR(8|16|32|64)r1",
-                                             "ROR(8|16|32|64)ri",
-                                             "SET(A|BE)r")>;
+def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>;
 
 def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
   let Latency = 2;
@@ -726,11 +757,10 @@ def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup23], (instrs CWD)>;
-def: InstRW<[SKXWriteResGroup23], (instrs JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8",
-                                             "ADC8ri",
-                                             "SBB8i8",
+def: InstRW<[SKXWriteResGroup23], (instrs CWD,
+                                          JCXZ, JECXZ, JRCXZ,
+                                          ADC8i8, SBB8i8)>;
+def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri",
                                              "SBB8ri")>;
 
 def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
@@ -752,10 +782,9 @@ def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r,
+def: InstRW<[SKXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
                                           STOSB, STOSL, STOSQ, STOSW)>;
-def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr",
-                                             "PUSH64i8")>;
+def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
 
 def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> {
   let Latency = 2;
@@ -781,39 +810,26 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> {
 def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
                                              "PEXT(32|64)rr")>;
 
-def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup31_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
-
 def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
   let Latency = 3;
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
+def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
 def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
                                              "KADD(B|D|Q|W)rr",
                                              "KSHIFTL(B|D|Q|W)ri",
                                              "KSHIFTR(B|D|Q|W)ri",
-                                             "KUNPCKBWrr",
-                                             "KUNPCKDQrr",
-                                             "KUNPCKWDrr",
+                                             "KUNPCK(BW|DQ|WD)rr",
                                              "VALIGND(Z|Z128|Z256)rri",
                                              "VALIGNQ(Z|Z128|Z256)rri",
                                              "VCMPPD(Z|Z128|Z256)rri",
                                              "VCMPPS(Z|Z128|Z256)rri",
-                                             "VCMPSDZrr",
-                                             "VCMPSSZrr",
+                                             "VCMP(SD|SS)Zrr",
                                              "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
-                                             "VFPCLASSPD(Z|Z128|Z256)rr",
-                                             "VFPCLASSPS(Z|Z128|Z256)rr",
-                                             "VFPCLASSSDZrr",
-                                             "VFPCLASSSSZrr",
-                                             "VPBROADCASTBrr",
-                                             "VPBROADCASTWrr",
+                                             "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
+                                             "VFPCLASS(SD|SS)Zrr",
+                                             "VPBROADCAST(B|W)rr",
                                              "VPCMPB(Z|Z128|Z256)rri",
                                              "VPCMPD(Z|Z128|Z256)rri",
                                              "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
@@ -823,7 +839,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
                                              "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
                                              "VPCMPW(Z|Z128|Z256)rri",
                                              "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
-                                             "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
                                              "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
 
 def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -833,26 +848,6 @@ def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
 }
 def: InstRW<[SKXWriteResGroup34], (instrs FNSTSW16r)>;
 
-def SKXWriteResGroup35 : SchedWriteRes<[SKXPort06]> {
-  let Latency = 3;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[SKXWriteResGroup35], (instregex "ROL(8|16|32|64)rCL",
-                                             "ROR(8|16|32|64)rCL",
-                                             "SAR(8|16|32|64)rCL",
-                                             "SHL(8|16|32|64)rCL",
-                                             "SHR(8|16|32|64)rCL")>;
-
-def SKXWriteResGroup36 : SchedWriteRes<[SKXPort0156]> {
-  let Latency = 2;
-  let NumMicroOps = 3;
-  let ResourceCycles = [3];
-}
-def: InstRW<[SKXWriteResGroup36], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr,
-                                          XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr,
-                                          XCHG16ar, XCHG32ar, XCHG64ar)>;
-
 def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> {
   let Latency = 3;
   let NumMicroOps = 3;
@@ -872,9 +867,9 @@ def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSDWirr",
-                                             "MMX_PACKSSWBirr",
-                                             "MMX_PACKUSWBirr")>;
+def: InstRW<[SKXWriteResGroup41], (instrs MMX_PACKSSDWirr,
+                                          MMX_PACKSSWBirr,
+                                          MMX_PACKUSWBirr)>;
 
 def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> {
   let Latency = 3;
@@ -895,10 +890,8 @@ def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,2];
 }
-def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r1",
-                                             "RCL(8|16|32|64)ri",
-                                             "RCR(8|16|32|64)r1",
-                                             "RCR(8|16|32|64)ri")>;
+def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
+                                             "RCR(8|16|32|64)r(1|i)")>;
 
 def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
   let Latency = 3;
@@ -1000,20 +993,6 @@ def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
                                              "VPMOVUSWB(Z|Z128|Z256)rr",
                                              "VPMOVWB(Z|Z128|Z256)rr")>;
 
-def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>;
-
 def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
   let Latency = 4;
   let NumMicroOps = 3;
@@ -1030,10 +1009,10 @@ def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> {
 }
 def: InstRW<[SKXWriteResGroup55], (instrs FNCLEX)>;
 
-def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> {
-  let Latency = 4;
+def SKXWriteResGroup56 : SchedWriteRes<[]> {
+  let Latency = 0;
   let NumMicroOps = 4;
-  let ResourceCycles = [1,3];
+  let ResourceCycles = [];
 }
 def: InstRW<[SKXWriteResGroup56], (instrs VZEROUPPER)>;
 
@@ -1049,11 +1028,8 @@ def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
-                                             "MOVSX(16|32|64)rm32",
-                                             "MOVSX(16|32|64)rm8",
-                                             "MOVZX(16|32|64)rm16",
-                                             "MOVZX(16|32|64)rm8",
+def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+                                             "MOVZX(16|32|64)rm(8|16)",
                                              "(V?)MOVDDUPrm")>;  // TODO: Should this be SKXWriteResGroup71?
 
 def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
@@ -1104,13 +1080,6 @@ def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> {
 }
 def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
 
-def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
-  let Latency = 4;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
 def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> {
   let Latency = 5;
   let NumMicroOps = 3;
@@ -1150,13 +1119,6 @@ def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
 }
 def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>;
 
-def SKXWriteResGroup68 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
-  let Latency = 5;
-  let NumMicroOps = 5;
-  let ResourceCycles = [2,3];
-}
-def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG(8|16|32|64)rr")>;
-
 def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> {
   let Latency = 5;
   let NumMicroOps = 6;
@@ -1169,19 +1131,21 @@ def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm",
-                                             "(V?)MOVSHDUPrm",
-                                             "(V?)MOVSLDUPrm",
-                                             "VPBROADCASTDrm",
-                                             "VPBROADCASTQrm")>;
+def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
+                                          VPBROADCASTDrm,
+                                          VPBROADCASTQrm,
+                                          VMOVSHDUPrm,
+                                          VMOVSLDUPrm,
+                                          MOVSHDUPrm,
+                                          MOVSLDUPrm)>;
 
 def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [2];
 }
-def: InstRW<[SKXWriteResGroup72], (instregex "MMX_CVTPI2PSirr",
-                                             "VCOMPRESSPD(Z|Z128|Z256)rr",
+def: InstRW<[SKXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
                                              "VCOMPRESSPS(Z|Z128|Z256)rr",
                                              "VPCOMPRESSD(Z|Z128|Z256)rr",
                                              "VPCOMPRESSQ(Z|Z128|Z256)rr",
@@ -1192,41 +1156,34 @@ def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSBirm",
-                                             "MMX_PADDSWirm",
-                                             "MMX_PADDUSBirm",
-                                             "MMX_PADDUSWirm",
-                                             "MMX_PAVGBirm",
-                                             "MMX_PAVGWirm",
-                                             "MMX_PCMPEQBirm",
-                                             "MMX_PCMPEQDirm",
-                                             "MMX_PCMPEQWirm",
-                                             "MMX_PCMPGTBirm",
-                                             "MMX_PCMPGTDirm",
-                                             "MMX_PCMPGTWirm",
-                                             "MMX_PMAXSWirm",
-                                             "MMX_PMAXUBirm",
-                                             "MMX_PMINSWirm",
-                                             "MMX_PMINUBirm",
-                                             "MMX_PSUBSBirm",
-                                             "MMX_PSUBSWirm",
-                                             "MMX_PSUBUSBirm",
-                                             "MMX_PSUBUSWirm")>;
+def: InstRW<[SKXWriteResGroup73], (instrs MMX_PADDSBirm,
+                                          MMX_PADDSWirm,
+                                          MMX_PADDUSBirm,
+                                          MMX_PADDUSWirm,
+                                          MMX_PAVGBirm,
+                                          MMX_PAVGWirm,
+                                          MMX_PCMPEQBirm,
+                                          MMX_PCMPEQDirm,
+                                          MMX_PCMPEQWirm,
+                                          MMX_PCMPGTBirm,
+                                          MMX_PCMPGTDirm,
+                                          MMX_PCMPGTWirm,
+                                          MMX_PMAXSWirm,
+                                          MMX_PMAXUBirm,
+                                          MMX_PMINSWirm,
+                                          MMX_PMINUBirm,
+                                          MMX_PSUBSBirm,
+                                          MMX_PSUBSWirm,
+                                          MMX_PSUBUSBirm,
+                                          MMX_PSUBUSWirm)>;
 
 def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
   let Latency = 6;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64",
-                                             "JMP(16|32|64)m")>;
-
-def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8")>;
+def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64)>;
+def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
 
 def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
   let Latency = 6;
@@ -1234,9 +1191,6 @@ def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm",
-                                             "BLSI(32|64)rm",
-                                             "BLSMSK(32|64)rm",
-                                             "BLSR(32|64)rm",
                                              "MOVBE(16|32|64)rm")>;
 
 def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
@@ -1244,8 +1198,8 @@ def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)",
-                                             "VMOVDI2PDIZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
 
 def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> {
   let Latency = 6;
@@ -1276,15 +1230,9 @@ def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]
   let NumMicroOps = 4;
   let ResourceCycles = [1,1,1,1];
 }
-def: InstRW<[SKXWriteResGroup86], (instregex "BTC(16|32|64)mi8",
-                                             "BTR(16|32|64)mi8",
-                                             "BTS(16|32|64)mi8",
-                                             "SAR(8|16|32|64)m1",
-                                             "SAR(8|16|32|64)mi",
-                                             "SHL(8|16|32|64)m1",
-                                             "SHL(8|16|32|64)mi",
-                                             "SHR(8|16|32|64)m1",
-                                             "SHR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
+                                             "SHL(8|16|32|64)m(1|i)",
+                                             "SHR(8|16|32|64)m(1|i)")>;
 
 def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
   let Latency = 6;
@@ -1306,23 +1254,23 @@ def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m",
-                                             "VBROADCASTF128",
-                                             "VBROADCASTI128",
-                                             "VBROADCASTSDYrm",
-                                             "VBROADCASTSSYrm",
-                                             "VMOVDDUPYrm",
-                                             "VMOVSHDUPYrm",
-                                             "VMOVSLDUPYrm",
-                                             "VPBROADCASTDYrm",
-                                             "VPBROADCASTQYrm")>;
+def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[SKXWriteResGroup89], (instrs VBROADCASTF128,
+                                          VBROADCASTI128,
+                                          VBROADCASTSDYrm,
+                                          VBROADCASTSSYrm,
+                                          VMOVDDUPYrm,
+                                          VMOVSHDUPYrm,
+                                          VMOVSLDUPYrm,
+                                          VPBROADCASTDYrm,
+                                          VPBROADCASTQYrm)>;
 
 def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
   let Latency = 7;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>;
+def: InstRW<[SKXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
 
 def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 7;
@@ -1389,12 +1337,14 @@ def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
+def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
+                                          VPBLENDDrmi)>;
+def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
+                                  (instregex "VBLENDMPDZ128rm(b?)",
                                              "VBLENDMPSZ128rm(b?)",
                                              "VBROADCASTI32X2Z128m(b?)",
                                              "VBROADCASTSSZ128m(b?)",
-                                             "VINSERTF128rm",
-                                             "VINSERTI128rm",
+                                             "VINSERT(F|I)128rm",
                                              "VMOVAPDZ128rm(b?)",
                                              "VMOVAPSZ128rm(b?)",
                                              "VMOVDDUPZ128rm(b?)",
@@ -1404,14 +1354,12 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)",
                                              "VMOVDQU32Z128rm(b?)",
                                              "VMOVDQU64Z128rm(b?)",
                                              "VMOVDQU8Z128rm(b?)",
-                                             "VMOVNTDQAZ128rm(b?)",
                                              "VMOVSHDUPZ128rm(b?)",
                                              "VMOVSLDUPZ128rm(b?)",
                                              "VMOVUPDZ128rm(b?)",
                                              "VMOVUPSZ128rm(b?)",
                                              "VPADD(B|D|Q|W)Z128rm(b?)",
                                              "(V?)PADD(B|D|Q|W)rm",
-                                             "VPBLENDDrmi",
                                              "VPBLENDM(B|D|Q|W)Z128rm(b?)",
                                              "VPBROADCASTDZ128m(b?)",
                                              "VPBROADCASTQZ128m(b?)",
@@ -1425,9 +1373,9 @@ def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [2,1];
 }
-def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSDWirm",
-                                             "MMX_PACKSSWBirm",
-                                             "MMX_PACKUSWBirm")>;
+def: InstRW<[SKXWriteResGroup96], (instrs MMX_PACKSSDWirm,
+                                          MMX_PACKSSWBirm,
+                                          MMX_PACKUSWBirm)>;
 
 def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> {
   let Latency = 7;
@@ -1495,10 +1443,8 @@ def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m1",
-                                              "ROL(8|16|32|64)mi",
-                                              "ROR(8|16|32|64)m1",
-                                              "ROR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
+                                              "ROR(8|16|32|64)m(1|i)")>;
 
 def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
   let Latency = 7;
@@ -1512,8 +1458,8 @@ def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,1,1];
 }
-def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m",
-                                              "FARCALL64")>;
+def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64)>;
 
 def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
   let Latency = 7;
@@ -1567,20 +1513,6 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> {
 def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
                                               "PEXT(32|64)rm")>;
 
-def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_1], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>;
-
-def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort06, SKXPort0156, SKXPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>;
-
 def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 8;
   let NumMicroOps = 2;
@@ -1588,20 +1520,23 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
                                               "VFPCLASSSDZrm(b?)",
-                                              "VPBROADCASTBYrm",
                                               "VPBROADCASTB(Z|Z256)m(b?)",
-                                              "VPBROADCASTWYrm",
-                                              "VPBROADCASTW(Z|Z256)m(b?)",
-                                              "VPMOVSXBDYrm",
-                                              "VPMOVSXBQYrm",
-                                              "VPMOVSXWQYrm")>;
+                                              "VPBROADCASTW(Z|Z256)m(b?)")>;
+def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
+                                           VPBROADCASTWYrm,
+                                           VPMOVSXBDYrm,
+                                           VPMOVSXBQYrm,
+                                           VPMOVSXWQYrm)>;
 
 def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let Latency = 8;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPD(Z|Z256)rm(b?)",
+def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
+                                           VPBLENDDYrmi)>;
+def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
+                                   (instregex "VBLENDMPD(Z|Z256)rm(b?)",
                                               "VBLENDMPS(Z|Z256)rm(b?)",
                                               "VBROADCASTF32X2Z256m(b?)",
                                               "VBROADCASTF32X2Zm(b?)",
@@ -1638,14 +1573,12 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPD(Z|Z256)rm(b?)",
                                               "VMOVDQU32(Z|Z256)rm(b?)",
                                               "VMOVDQU64(Z|Z256)rm(b?)",
                                               "VMOVDQU8(Z|Z256)rm(b?)",
-                                              "VMOVNTDQAZ256rm(b?)",
                                               "VMOVSHDUP(Z|Z256)rm(b?)",
                                               "VMOVSLDUP(Z|Z256)rm(b?)",
                                               "VMOVUPD(Z|Z256)rm(b?)",
                                               "VMOVUPS(Z|Z256)rm(b?)",
                                               "VPADD(B|D|Q|W)Yrm",
                                               "VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
-                                              "VPBLENDDYrmi",
                                               "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
                                               "VPBROADCASTD(Z|Z256)m(b?)",
                                               "VPBROADCASTQ(Z|Z256)m(b?)",
@@ -1661,22 +1594,13 @@ def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
 
-def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,1,3];
-}
-def: InstRW<[SKXWriteResGroup126], (instregex "ROR(8|16|32|64)mCL")>;
-
 def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
   let Latency = 8;
   let NumMicroOps = 5;
   let ResourceCycles = [1,1,1,2];
 }
-def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m1",
-                                              "RCL(8|16|32|64)mi",
-                                              "RCR(8|16|32|64)m1",
-                                              "RCR(8|16|32|64)mi")>;
+def: InstRW<[SKXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
+                                              "RCR(8|16|32|64)m(1|i)")>;
 
 def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> {
   let Latency = 8;
@@ -1684,6 +1608,7 @@ def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
   let ResourceCycles = [1,1,1,3];
 }
 def: InstRW<[SKXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
+                                              "ROR(8|16|32|64)mCL",
                                               "SAR(8|16|32|64)mCL",
                                               "SHL(8|16|32|64)mCL",
                                               "SHR(8|16|32|64)mCL")>;
@@ -1694,7 +1619,6 @@ def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
   let ResourceCycles = [1,1,1,2,1];
 }
 def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>;
-def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(8|16|32|64)rm")>;
 
 def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
   let Latency = 8;
@@ -1734,19 +1658,20 @@ def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm")>;
+def: InstRW<[SKXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
 
 def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 9;
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
-                                              "VALIGNQZ128rm(b?)i",
-                                              "VCMPPDZ128rm(b?)i",
-                                              "VCMPPSZ128rm(b?)i",
-                                              "VCMPSDZrm",
-                                              "VCMPSSZrm",
+def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
+                                           VPMOVSXDQYrm,
+                                           VPMOVSXWDYrm,
+                                           VPMOVZXWDYrm)>;
+def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
+                                              "VCMP(PD|PS)Z128rm(b?)i",
+                                              "VCMP(SD|SS)Zrm",
                                               "VFPCLASSSSZrm(b?)",
                                               "VPCMPBZ128rmi(b?)",
                                               "VPCMPDZ128rmi(b?)",
@@ -1770,18 +1695,14 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
                                               "VPMINUQZ128rm(b?)",
                                               "VPMOVSXBDZ128rm(b?)",
                                               "VPMOVSXBQZ128rm(b?)",
-                                              "VPMOVSXBWYrm",
                                               "VPMOVSXBWZ128rm(b?)",
-                                              "VPMOVSXDQYrm",
                                               "VPMOVSXDQZ128rm(b?)",
-                                              "VPMOVSXWDYrm",
                                               "VPMOVSXWDZ128rm(b?)",
                                               "VPMOVSXWQZ128rm(b?)",
                                               "VPMOVZXBDZ128rm(b?)",
                                               "VPMOVZXBQZ128rm(b?)",
                                               "VPMOVZXBWZ128rm(b?)",
                                               "VPMOVZXDQZ128rm(b?)",
-                                              "VPMOVZXWDYrm",
                                               "VPMOVZXWDZ128rm(b?)",
                                               "VPMOVZXWQZ128rm(b?)",
                                               "VPTESTMBZ128rm(b?)",
@@ -1801,13 +1722,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
 def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
                                               "(V?)CVTPS2PDrm")>;
 
-def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
 def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
   let Latency = 9;
   let NumMicroOps = 4;
@@ -1829,6 +1743,7 @@ def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
+def: InstRW<[SKXWriteResGroup148], (instrs VPCMPGTQYrm)>;
 def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
                                               "ILD_F(16|32|64)m",
                                               "VALIGND(Z|Z256)rm(b?)i",
@@ -1843,7 +1758,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
                                               "VPCMPEQW(Z|Z256)rm(b?)",
                                               "VPCMPGTB(Z|Z256)rm(b?)",
                                               "VPCMPGTD(Z|Z256)rm(b?)",
-                                              "VPCMPGTQYrm",
                                               "VPCMPGTQ(Z|Z256)rm(b?)",
                                               "VPCMPGTW(Z|Z256)rm(b?)",
                                               "VPCMPQ(Z|Z256)rmi(b?)",
@@ -1914,15 +1828,8 @@ def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
   let NumMicroOps = 4;
   let ResourceCycles = [2,1,1];
 }
-def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWYrm",
-                                              "VPHSUBSWYrm")>;
-
-def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
-  let Latency = 9;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>;
+def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
+                                           VPHSUBSWYrm)>;
 
 def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
   let Latency = 10;
@@ -1950,11 +1857,10 @@ def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PD(Z|Z256)rm(b?)",
-                                              "VCVTDQ2PSYrm",
-                                              "VCVTDQ2PS(Z|Z256)rm(b?)",
+def: InstRW<[SKXWriteResGroup161], (instrs VCVTDQ2PSYrm,
+                                           VCVTPS2PDYrm)>;
+def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
                                               "VCVTPH2PS(Z|Z256)rm(b?)",
-                                              "VCVTPS2PDYrm",
                                               "VCVTPS2PD(Z|Z256)rm(b?)",
                                               "VCVTQQ2PD(Z|Z256)rm(b?)",
                                               "VCVTQQ2PSZ256rm(b?)",
@@ -1965,8 +1871,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PD(Z|Z256)rm(b?)",
                                               "VCVT(T?)PS2QQZ256rm(b?)",
                                               "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
                                               "VCVT(T?)PS2UQQZ256rm(b?)",
-                                              "VCVTUDQ2PD(Z|Z256)rm(b?)",
-                                              "VCVTUDQ2PS(Z|Z256)rm(b?)",
+                                              "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
                                               "VCVTUQQ2PD(Z|Z256)rm(b?)",
                                               "VCVTUQQ2PSZ256rm(b?)")>;
 
@@ -2000,9 +1905,11 @@ def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2PSrm",
-                                              "CVT(T?)PD2DQrm",
-                                              "MMX_CVT(T?)PD2PIirm")>;
+def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm,
+                                           CVTPD2DQrm,
+                                           CVTTPD2DQrm,
+                                           MMX_CVTPD2PIirm,
+                                           MMX_CVTTPD2PIirm)>;
 
 def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
   let Latency = 11;
@@ -2024,7 +1931,7 @@ def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort015
   let NumMicroOps = 9;
   let ResourceCycles = [1,5,1,2];
 }
-def: InstRW<[SKXWriteResGroup170], (instregex "RCL8rCL")>;
+def: InstRW<[SKXWriteResGroup170], (instrs RCL8rCL)>;
 
 def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
   let Latency = 11;
@@ -2091,7 +1998,7 @@ def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
   let NumMicroOps = 3;
   let ResourceCycles = [1,1,1];
 }
-def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>;
+def: InstRW<[SKXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
 
 def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
   let Latency = 13;
@@ -2151,7 +2058,7 @@ def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort015
   let NumMicroOps = 10;
   let ResourceCycles = [2,4,1,3];
 }
-def: InstRW<[SKXWriteResGroup190], (instregex "RCR8rCL")>;
+def: InstRW<[SKXWriteResGroup190], (instrs RCR8rCL)>;
 
 def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> {
   let Latency = 15;
@@ -2181,10 +2088,10 @@ def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
 }
 def: InstRW<[SKXWriteResGroup199], (instrs CMPXCHG8B)>;
 
-def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> {
-  let Latency = 16;
-  let NumMicroOps = 16;
-  let ResourceCycles = [16];
+def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> {
+  let Latency = 12;
+  let NumMicroOps = 34;
+  let ResourceCycles = [1, 4, 5];
 }
 def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>;
 
@@ -2408,13 +2315,6 @@ def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01
 def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
                                            VPGATHERDDZ256rm)>;
 
-def SKXWriteResGroup241 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> {
-  let Latency = 28;
-  let NumMicroOps = 8;
-  let ResourceCycles = [2,4,1,1];
-}
-def: InstRW<[SKXWriteResGroup241], (instregex "IDIV(8|16|32|64)m")>;
-
 def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
   let Latency = 29;
   let NumMicroOps = 15;
@@ -2547,20 +2447,6 @@ def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> {
 }
 def: InstRW<[SKXWriteResGroup263], (instrs FNINIT)>;
 
-def SKXWriteResGroup264 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
-  let Latency = 76;
-  let NumMicroOps = 32;
-  let ResourceCycles = [7,2,8,3,1,11];
-}
-def: InstRW<[SKXWriteResGroup264], (instregex "DIV(16|32|64)r")>;
-
-def SKXWriteResGroup265 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> {
-  let Latency = 102;
-  let NumMicroOps = 66;
-  let ResourceCycles = [4,2,4,8,14,34];
-}
-def: InstRW<[SKXWriteResGroup265], (instregex "IDIV(16|32|64)r")>;
-
 def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> {
   let Latency = 106;
   let NumMicroOps = 100;
diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td
index ef9ce94706df..25aa83f96d3a 100644
--- a/contrib/llvm/lib/Target/X86/X86Schedule.td
+++ b/contrib/llvm/lib/Target/X86/X86Schedule.td
@@ -14,6 +14,9 @@
 // but other register operands don't have to be read until the load is ready.
 // These operands are marked with ReadAfterLd.
 def ReadAfterLd : SchedRead;
+def ReadAfterVecLd : SchedRead;
+def ReadAfterVecXLd : SchedRead;
+def ReadAfterVecYLd : SchedRead;
 
 // Instructions with both a load and a store folded are modeled as a folded
 // load + WriteRMW.
@@ -37,15 +40,19 @@ multiclass X86WriteRes<SchedWrite SchedRW,
 class X86FoldableSchedWrite : SchedWrite {
   // The SchedWrite to use when a load is folded into the instruction.
   SchedWrite Folded;
+  // The SchedRead to tag register operands than don't need to be ready
+  // until the folded load has completed.
+  SchedRead ReadAfterFold;
 }
 
 // Multiclass that produces a linked pair of SchedWrites.
-multiclass X86SchedWritePair {
+multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
   // Register-Memory operation.
   def Ld : SchedWrite;
   // Register-Register operation.
   def NAME : X86FoldableSchedWrite {
     let Folded = !cast<SchedWrite>(NAME#"Ld");
+    let ReadAfterFold = ReadAfter;
   }
 }
 
@@ -107,19 +114,33 @@ def WriteLoad    : SchedWrite;
 def WriteStore   : SchedWrite;
 def WriteStoreNT : SchedWrite;
 def WriteMove    : SchedWrite;
+def WriteCopy    : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
 
 // Arithmetic.
 defm WriteALU    : X86SchedWritePair; // Simple integer ALU op.
 defm WriteADC    : X86SchedWritePair; // Integer ALU + flags op.
-def  WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
-def  WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
-defm WriteIMul   : X86SchedWritePair; // Integer multiplication.
-defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
-def  WriteIMulH  : SchedWrite;        // Integer multiplication, high part.
+def  WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>;
+def  WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>;
 def  WriteLEA    : SchedWrite;        // LEA instructions can't fold loads.
 
+// Integer multiplication
+defm WriteIMul8     : X86SchedWritePair; // Integer 8-bit multiplication.
+defm WriteIMul16    : X86SchedWritePair; // Integer 16-bit multiplication.
+defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
+defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
+defm WriteIMul32    : X86SchedWritePair; // Integer 32-bit multiplication.
+defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
+defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
+defm WriteIMul64    : X86SchedWritePair; // Integer 64-bit multiplication.
+defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
+defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
+def  WriteIMulH     : SchedWrite;        // Integer multiplication, high part.
+
 def  WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
 def  WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
+defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
+def  WriteCMPXCHGRMW : SchedWrite;     // Compare and set, compare and swap.
+def  WriteXCHG    : SchedWrite;        // Compare+Exchange - TODO RMW support.
 
 // Integer division.
 defm WriteDiv8   : X86SchedWritePair;
@@ -142,18 +163,32 @@ def  WriteFCMOV : SchedWrite; // X87 conditional move.
 def  WriteSETCC : SchedWrite; // Set register based on condition code.
 def  WriteSETCCStore : SchedWrite;
 def  WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
-def  WriteBitTest  : SchedWrite; // Bit Test - TODO add memory folding support
+
+def  WriteBitTest      : SchedWrite; // Bit Test
+def  WriteBitTestImmLd : SchedWrite;
+def  WriteBitTestRegLd : SchedWrite;
+
+def  WriteBitTestSet       : SchedWrite; // Bit Test + Set
+def  WriteBitTestSetImmLd  : SchedWrite;
+def  WriteBitTestSetRegLd  : SchedWrite;
+def  WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>;
+def  WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>;
 
 // Integer shifts and rotates.
-defm WriteShift : X86SchedWritePair;
+defm WriteShift    : X86SchedWritePair;
+defm WriteShiftCL  : X86SchedWritePair;
+defm WriteRotate   : X86SchedWritePair;
+defm WriteRotateCL : X86SchedWritePair;
+
 // Double shift instructions.
 def  WriteSHDrri  : SchedWrite;
 def  WriteSHDrrcl : SchedWrite;
 def  WriteSHDmri  : SchedWrite;
 def  WriteSHDmrcl : SchedWrite;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm WriteBEXTR : X86SchedWritePair;
+defm WriteBLS   : X86SchedWritePair;
 defm WriteBZHI  : X86SchedWritePair;
 
 // Idioms that clear a register, like xorps %xmm0, %xmm0.
@@ -185,98 +220,98 @@ def  WriteFMove         : SchedWrite;
 def  WriteFMoveX        : SchedWrite;
 def  WriteFMoveY        : SchedWrite;
 
-defm WriteFAdd    : X86SchedWritePair; // Floating point add/sub.
-defm WriteFAddX   : X86SchedWritePair; // Floating point add/sub (XMM).
-defm WriteFAddY   : X86SchedWritePair; // Floating point add/sub (YMM).
-defm WriteFAddZ   : X86SchedWritePair; // Floating point add/sub (ZMM).
-defm WriteFAdd64  : X86SchedWritePair; // Floating point double add/sub.
-defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
-defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
-defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
-defm WriteFCmp    : X86SchedWritePair; // Floating point compare.
-defm WriteFCmpX   : X86SchedWritePair; // Floating point compare (XMM).
-defm WriteFCmpY   : X86SchedWritePair; // Floating point compare (YMM).
-defm WriteFCmpZ   : X86SchedWritePair; // Floating point compare (ZMM).
-defm WriteFCmp64  : X86SchedWritePair; // Floating point double compare.
-defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
-defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
-defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
-defm WriteFCom    : X86SchedWritePair; // Floating point compare to flags.
-defm WriteFMul    : X86SchedWritePair; // Floating point multiplication.
-defm WriteFMulX   : X86SchedWritePair; // Floating point multiplication (XMM).
-defm WriteFMulY   : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMulZ   : X86SchedWritePair; // Floating point multiplication (YMM).
-defm WriteFMul64  : X86SchedWritePair; // Floating point double multiplication.
-defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
-defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
-defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
-defm WriteFDiv    : X86SchedWritePair; // Floating point division.
-defm WriteFDivX   : X86SchedWritePair; // Floating point division (XMM).
-defm WriteFDivY   : X86SchedWritePair; // Floating point division (YMM).
-defm WriteFDivZ   : X86SchedWritePair; // Floating point division (ZMM).
-defm WriteFDiv64  : X86SchedWritePair; // Floating point double division.
-defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
-defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
-defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
-defm WriteFSqrt  : X86SchedWritePair; // Floating point square root.
-defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
-defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
-defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
-defm WriteFSqrt64  : X86SchedWritePair; // Floating point double square root.
-defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
-defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
-defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
-defm WriteFSqrt80  : X86SchedWritePair; // Floating point long double square root.
-defm WriteFRcp   : X86SchedWritePair; // Floating point reciprocal estimate.
-defm WriteFRcpX  : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
-defm WriteFRcpY  : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
-defm WriteFRcpZ  : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
-defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
-defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
-defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
-defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
-defm WriteFMA    : X86SchedWritePair; // Fused Multiply Add.
-defm WriteFMAX   : X86SchedWritePair; // Fused Multiply Add (XMM).
-defm WriteFMAY   : X86SchedWritePair; // Fused Multiply Add (YMM).
-defm WriteFMAZ   : X86SchedWritePair; // Fused Multiply Add (ZMM).
-defm WriteDPPD   : X86SchedWritePair; // Floating point double dot product.
-defm WriteDPPS   : X86SchedWritePair; // Floating point single dot product.
-defm WriteDPPSY  : X86SchedWritePair; // Floating point single dot product (YMM).
-defm WriteDPPSZ  : X86SchedWritePair; // Floating point single dot product (ZMM).
-defm WriteFSign  : X86SchedWritePair; // Floating point fabs/fchs.
-defm WriteFRnd   : X86SchedWritePair; // Floating point rounding.
-defm WriteFRndY  : X86SchedWritePair; // Floating point rounding (YMM).
-defm WriteFRndZ  : X86SchedWritePair; // Floating point rounding (ZMM).
-defm WriteFLogic  : X86SchedWritePair; // Floating point and/or/xor logicals.
-defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
-defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
-defm WriteFTest   : X86SchedWritePair; // Floating point TEST instructions.
-defm WriteFTestY  : X86SchedWritePair; // Floating point TEST instructions (YMM).
-defm WriteFTestZ  : X86SchedWritePair; // Floating point TEST instructions (ZMM).
-defm WriteFShuffle  : X86SchedWritePair; // Floating point vector shuffles.
-defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
-defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
-defm WriteFVarShuffle  : X86SchedWritePair; // Floating point vector variable shuffles.
-defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
-defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
-defm WriteFBlend  : X86SchedWritePair; // Floating point vector blends.
-defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
-defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
-defm WriteFVarBlend  : X86SchedWritePair; // Fp vector variable blends.
-defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
-defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
+defm WriteFAdd    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point add/sub.
+defm WriteFAddX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
+defm WriteFAddY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
+defm WriteFAddZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
+defm WriteFAdd64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
+defm WriteFCmp    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare.
+defm WriteFCmpX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
+defm WriteFCmpY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
+defm WriteFCmpZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
+defm WriteFCmp64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
+defm WriteFCom    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point compare to flags.
+defm WriteFMul    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point multiplication.
+defm WriteFMulX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
+defm WriteFMulY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMulZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
+defm WriteFMul64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
+defm WriteFDiv    : X86SchedWritePair<ReadAfterVecLd>;  // Floating point division.
+defm WriteFDivX   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
+defm WriteFDivY   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
+defm WriteFDivZ   : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
+defm WriteFDiv64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
+defm WriteFSqrt  : X86SchedWritePair<ReadAfterVecLd>;   // Floating point square root.
+defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>;  // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>;  // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>;  // Floating point square root (ZMM).
+defm WriteFSqrt64  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
+defm WriteFSqrt80  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point long double square root.
+defm WriteFRcp   : X86SchedWritePair<ReadAfterVecLd>;  // Floating point reciprocal estimate.
+defm WriteFRcpX  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
+defm WriteFRcpY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
+defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>;  // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
+defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
+defm WriteFMA    : X86SchedWritePair<ReadAfterVecLd>;  // Fused Multiply Add.
+defm WriteFMAX   : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
+defm WriteFMAY   : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
+defm WriteFMAZ   : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
+defm WriteDPPD   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
+defm WriteDPPS   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
+defm WriteDPPSY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
+defm WriteDPPSZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
+defm WriteFSign  : X86SchedWritePair<ReadAfterVecLd>;  // Floating point fabs/fchs.
+defm WriteFRnd   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
+defm WriteFRndY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
+defm WriteFRndZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
+defm WriteFLogic  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
+defm WriteFTest   : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
+defm WriteFTestY  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
+defm WriteFTestZ  : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
+defm WriteFShuffle  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
+defm WriteFVarShuffle  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
+defm WriteFBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
+defm WriteFVarBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
 
 // FMA Scheduling helper class.
 class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
 // Horizontal Add/Sub (float and integer)
-defm WriteFHAdd  : X86SchedWritePair;
-defm WriteFHAddY : X86SchedWritePair;
-defm WriteFHAddZ : X86SchedWritePair;
-defm WritePHAdd  : X86SchedWritePair;
-defm WritePHAddX : X86SchedWritePair;
-defm WritePHAddY : X86SchedWritePair;
-defm WritePHAddZ : X86SchedWritePair;
+defm WriteFHAdd  : X86SchedWritePair<ReadAfterVecXLd>;
+defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAdd  : X86SchedWritePair<ReadAfterVecLd>;
+defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
+defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
+defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
 
 // Vector integer operations.
 def  WriteVecLoad         : SchedWrite;
@@ -299,54 +334,54 @@ def  WriteVecMoveY        : SchedWrite;
 def  WriteVecMoveToGpr    : SchedWrite;
 def  WriteVecMoveFromGpr  : SchedWrite;
 
-defm WriteVecALU    : X86SchedWritePair; // Vector integer ALU op, no logicals.
-defm WriteVecALUX   : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
-defm WriteVecALUY   : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
-defm WriteVecALUZ   : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
-defm WriteVecLogic  : X86SchedWritePair; // Vector integer and/or/xor logicals.
-defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
-defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
-defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
-defm WriteVecTest  : X86SchedWritePair; // Vector integer TEST instructions.
-defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
-defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
-defm WriteVecShift  : X86SchedWritePair; // Vector integer shifts (default).
-defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
-defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
-defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
-defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
-defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
-defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
-defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
-defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply (default).
-defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
-defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
-defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
-defm WritePMULLD   : X86SchedWritePair; // Vector PMULLD.
-defm WritePMULLDY   : X86SchedWritePair; // Vector PMULLD (YMM).
-defm WritePMULLDZ   : X86SchedWritePair; // Vector PMULLD (ZMM).
-defm WriteShuffle  : X86SchedWritePair; // Vector shuffles.
-defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
-defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
-defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
-defm WriteVarShuffle  : X86SchedWritePair; // Vector variable shuffles.
-defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
-defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
-defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
-defm WriteBlend  : X86SchedWritePair; // Vector blends.
-defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
-defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
-defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.
-defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
-defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
-defm WritePSADBW  : X86SchedWritePair; // Vector PSADBW.
-defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
-defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
-defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
-defm WriteMPSAD  : X86SchedWritePair; // Vector MPSAD.
-defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
-defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
-defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
+defm WriteVecALU    : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer ALU op, no logicals.
+defm WriteVecALUX   : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY   : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ   : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
+defm WriteVecLogic  : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
+defm WriteVecTest  : X86SchedWritePair<ReadAfterVecXLd>;  // Vector integer TEST instructions.
+defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>;  // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>;  // Vector integer TEST instructions (ZMM).
+defm WriteVecShift  : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
+defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer immediate shifts (default).
+defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
+defm WriteVecIMul  : X86SchedWritePair<ReadAfterVecLd>;  // Vector integer multiply (default).
+defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
+defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
+defm WritePMULLD   : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
+defm WritePMULLDY  : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
+defm WritePMULLDZ  : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
+defm WriteShuffle  : X86SchedWritePair<ReadAfterVecLd>;  // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
+defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
+defm WriteVarShuffle  : X86SchedWritePair<ReadAfterVecLd>;  // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
+defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
+defm WriteBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
+defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
+defm WriteVarBlend  : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
+defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
+defm WritePSADBW  : X86SchedWritePair<ReadAfterVecLd>;  // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
+defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
+defm WriteMPSAD  : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
+defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>;  // Vector PHMINPOS.
 
 // Vector insert/extract operations.
 defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
@@ -360,39 +395,39 @@ def WriteVecMOVMSKY : SchedWrite;
 def WriteMMXMOVMSK  : SchedWrite;
 
 // Conversion between integer and float.
-defm WriteCvtSD2I  : X86SchedWritePair; // Double -> Integer.
-defm WriteCvtPD2I  : X86SchedWritePair; // Double -> Integer (XMM).
-defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
-defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
-
-defm WriteCvtSS2I  : X86SchedWritePair; // Float -> Integer.
-defm WriteCvtPS2I  : X86SchedWritePair; // Float -> Integer (XMM).
-defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
-defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
-
-defm WriteCvtI2SD  : X86SchedWritePair; // Integer -> Double.
-defm WriteCvtI2PD  : X86SchedWritePair; // Integer -> Double (XMM).
-defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
-defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
-
-defm WriteCvtI2SS  : X86SchedWritePair; // Integer -> Float.
-defm WriteCvtI2PS  : X86SchedWritePair; // Integer -> Float (XMM).
-defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
-defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
-
-defm WriteCvtSS2SD  : X86SchedWritePair; // Float -> Double size conversion.
-defm WriteCvtPS2PD  : X86SchedWritePair; // Float -> Double size conversion (XMM).
-defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
-defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
-
-defm WriteCvtSD2SS  : X86SchedWritePair; // Double -> Float size conversion.
-defm WriteCvtPD2PS  : X86SchedWritePair; // Double -> Float size conversion (XMM).
-defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
-defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
-
-defm WriteCvtPH2PS    : X86SchedWritePair; // Half -> Float size conversion.
-defm WriteCvtPH2PSY   : X86SchedWritePair; // Half -> Float size conversion (YMM).
-defm WriteCvtPH2PSZ   : X86SchedWritePair; // Half -> Float size conversion (ZMM).
+defm WriteCvtSD2I  : X86SchedWritePair<ReadAfterVecLd>;  // Double -> Integer.
+defm WriteCvtPD2I  : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
+defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
+
+defm WriteCvtSS2I  : X86SchedWritePair<ReadAfterVecLd>;  // Float -> Integer.
+defm WriteCvtPS2I  : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
+defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
+
+defm WriteCvtI2SD  : X86SchedWritePair<ReadAfterVecLd>;  // Integer -> Double.
+defm WriteCvtI2PD  : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
+defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
+
+defm WriteCvtI2SS  : X86SchedWritePair<ReadAfterVecLd>;  // Integer -> Float.
+defm WriteCvtI2PS  : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
+defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
+
+defm WriteCvtSS2SD  : X86SchedWritePair<ReadAfterVecLd>;  // Float -> Double size conversion.
+defm WriteCvtPS2PD  : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
+
+defm WriteCvtSD2SS  : X86SchedWritePair<ReadAfterVecLd>;  // Double -> Float size conversion.
+defm WriteCvtPD2PS  : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
+
+defm WriteCvtPH2PS  : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
+defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
 
 def  WriteCvtPS2PH    : SchedWrite; // // Float -> Half size conversion.
 def  WriteCvtPS2PHY   : SchedWrite; // // Float -> Half size conversion (YMM).
@@ -402,25 +437,25 @@ def  WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion
 def  WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
 
 // CRC32 instruction.
-defm WriteCRC32 : X86SchedWritePair;
+defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
 
 // Strings instructions.
 // Packed Compare Implicit Length Strings, Return Mask
-defm WritePCmpIStrM : X86SchedWritePair;
+defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
 // Packed Compare Explicit Length Strings, Return Mask
-defm WritePCmpEStrM : X86SchedWritePair;
+defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
 // Packed Compare Implicit Length Strings, Return Index
-defm WritePCmpIStrI : X86SchedWritePair;
+defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
 // Packed Compare Explicit Length Strings, Return Index
-defm WritePCmpEStrI : X86SchedWritePair;
+defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
 
 // AES instructions.
-defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
-defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
-defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
+defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
+defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
+defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
 
 // Carry-less multiplication instructions.
-defm WriteCLMul : X86SchedWritePair;
+defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
 
 // EMMS/FEMMS
 def WriteEMMS : SchedWrite;
@@ -433,13 +468,13 @@ def WriteSTMXCSR : SchedWrite;
 def WriteSystem : SchedWrite;
 
 // AVX2.
-defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
-defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
-defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
-defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
-defm WriteVarVecShift  : X86SchedWritePair; // Variable vector shifts.
-defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
-defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
+defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
+defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
+defm WriteVarVecShift  : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
 
 // Old microcoded instructions that nobody use.
 def WriteMicrocoded : SchedWrite;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
index a7f461c456bd..1589ff2ef402 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -43,6 +43,9 @@ def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
 // Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
 
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
@@ -78,11 +81,24 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
 
 defm : AtomWriteResPair<WriteALU,    [AtomPort01], [AtomPort0]>;
 defm : AtomWriteResPair<WriteADC,    [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul,   [AtomPort01], [AtomPort01],  7,  7,  [7],  [7]>;
-defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
 
+defm : AtomWriteResPair<WriteIMul8,     [AtomPort01], [AtomPort01],  7,  7,  [7],  [7]>;
+defm : AtomWriteResPair<WriteIMul16,    [AtomPort01], [AtomPort01],  7,  8,  [7],  [8]>;
+defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : AtomWriteResPair<WriteIMul32,    [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0],  [AtomPort0],   5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0],  [AtomPort0],   5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteIMul64,    [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
+defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+
+defm : X86WriteRes<WriteXCHG,        [AtomPort01], 2, [2], 1>;
 defm : X86WriteRes<WriteBSWAP32,     [AtomPort0], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64,     [AtomPort0], 1, [1], 1>;
+defm : AtomWriteResPair<WriteCMPXCHG, [AtomPort01], [AtomPort01], 15, 15, [15]>;
+defm : X86WriteRes<WriteCMPXCHGRMW,   [AtomPort01, AtomPort0], 1, [1, 1], 1>;
 
 defm : AtomWriteResPair<WriteDiv8,   [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
 defm : AtomWriteResPair<WriteDiv16,  [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
@@ -108,32 +124,16 @@ def  : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
   let Latency = 2;
   let ResourceCycles = [2];
 }
-def : WriteRes<WriteBitTest,[AtomPort01]>;
-
-defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteRes<WriteBitTest,         [AtomPort1],  1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [AtomPort0],  1, [1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd,    [AtomPort01], 9, [9], 1>;
+defm : X86WriteRes<WriteBitTestSet,      [AtomPort1],  1, [1], 1>;
+//defm : X86WriteRes<WriteBitTestSetImmLd, [AtomPort1],  1, [1], 1>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [AtomPort1],  1, [1], 1>;
 
 // This is for simple LEAs with one or two input operands.
 def : WriteRes<WriteLEA, [AtomPort1]>;
 
-def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
-  let Latency = 8;
-  let ResourceCycles = [8];
-}
-def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
-
-def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
-  let Latency = 6;
-  let ResourceCycles = [6];
-}
-def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
-
-def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
-  let Latency = 14;
-  let ResourceCycles = [14];
-}
-def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
-                                         IMUL64rmi8, IMUL64rmi32)>;
-
 // Bit counts.
 defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
 defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
@@ -141,15 +141,19 @@ defm : X86WriteResPairUnsupported<WritePOPCNT>;
 defm : X86WriteResPairUnsupported<WriteLZCNT>;
 defm : X86WriteResPairUnsupported<WriteTZCNT>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBLS>;
 defm : X86WriteResPairUnsupported<WriteBZHI>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Integer shifts and rotates.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteShift,    [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteShiftCL,  [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteRotate,   [AtomPort0], [AtomPort0]>;
+defm : AtomWriteResPair<WriteRotateCL, [AtomPort0], [AtomPort0]>;
 
 defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
 defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
@@ -498,20 +502,13 @@ def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
 def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
                                         "MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
 
-def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
-  let Latency = 5;
-  let ResourceCycles = [5];
-}
-def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
-
 // Port1
 def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
   let Latency = 1;
   let ResourceCycles = [1];
 }
 def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
-def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
-                                        "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r")>;
 
 def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
   let Latency = 5;
@@ -563,16 +560,14 @@ def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
                                       PUSH16rmm, PUSH32rmm, PUSH64rmm,
                                       LODSB, LODSL, LODSQ, LODSW,
                                       SCASB, SCASL, SCASQ, SCASW)>;
-def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
-                                         "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
-                                         "XADD(8|16|32|64)rr",
-                                         "XCHG(8|16|32|64)(ar|rr)",
+def : InstRW<[AtomWrite01_2], (instregex "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
                                          "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
                                          "MMX_P(ADD|SUB)Qirr",
                                          "MOV(S|Z)X16rr8",
                                          "MOV(UPS|UPD|DQU)mr",
                                          "MASKMOVDQU(64)?",
                                          "P(ADD|SUB)Qrr")>;
+def : SchedAlias<WriteBitTestSetImmRMW, AtomWrite01_2>;
 
 def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
   let Latency = 3;
@@ -616,8 +611,7 @@ def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
                                       SHLD16rri8, SHRD16rri8,
                                       SHLD16mrCL, SHRD16mrCL,
                                       SHLD16mri8, SHRD16mri8)>;
-def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
-                                         "IST_F(P)?(16|32|64)?m",
+def : InstRW<[AtomWrite01_6], (instregex "IST_F(P)?(16|32|64)?m",
                                          "MMX_PH(ADD|SUB)S?Wrm")>;
 
 def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
@@ -639,8 +633,7 @@ def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
   let Latency = 9;
   let ResourceCycles = [9];
 }
-def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
-                                      POPA16, POPA32,
+def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
                                       PUSHF16, PUSHF32, PUSHF64,
                                       SHLD64mrCL, SHRD64mrCL,
                                       SHLD64mri8, SHRD64mri8,
@@ -663,7 +656,7 @@ def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
   let ResourceCycles = [11];
 }
 def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
-def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+def : SchedAlias<WriteBitTestSetRegRMW, AtomWrite01_11>;
 
 def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
   let Latency = 13;
@@ -677,12 +670,6 @@ def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
 }
 def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
 
-def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
-  let Latency = 15;
-  let ResourceCycles = [15];
-}
-def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
-
 def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
   let Latency = 17;
   let ResourceCycles = [17];
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td
new file mode 100644
index 000000000000..5798e1b2671b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -0,0 +1,1282 @@
+//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD bdver2 (Piledriver) to support
+// instruction scheduling and other instruction cost heuristics.
+// Based on:
+//  * AMD Software Optimization Guide for AMD Family 15h Processors.
+//    https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf
+//  * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog
+//    http://www.agner.org/optimize/microarchitecture.pdf
+//  * https://www.realworldtech.com/bulldozer/
+//    Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2.
+//
+//===----------------------------------------------------------------------===//
+
+def BdVer2Model : SchedMachineModel {
+  let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired.
+  let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed.
+  let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer.
+  let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency.
+  let HighLatency = 25; // FIXME: any better choice?
+  let MispredictPenalty = 20; // Minimum branch misdirection penalty.
+
+  let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+
+  // FIXME: Incomplete. This flag is set to allow the scheduler to assign
+  //        a default model to unrecognized opcodes.
+  let CompleteModel = 0;
+} // SchedMachineModel
+
+let SchedModel = BdVer2Model in {
+
+
+//===----------------------------------------------------------------------===//
+// Pipes
+//===----------------------------------------------------------------------===//
+
+// There are total of eight pipes.
+
+//===----------------------------------------------------------------------===//
+// Integer execution pipes
+//
+
+// Two EX (ALU) pipes.
+def PdEX0  : ProcResource<1>; // ALU, Integer Pipe0
+def PdEX1  : ProcResource<1>; // ALU, Integer Pipe1
+def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>;
+
+// Two AGLU pipes, identical.
+def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23]
+
+//===----------------------------------------------------------------------===//
+// Floating point execution pipes
+//
+
+// Four FPU pipes.
+
+def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0
+def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1
+def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2
+def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3
+
+// FPU grouping
+def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>;
+def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>;
+
+
+//===----------------------------------------------------------------------===//
+// RCU
+//===----------------------------------------------------------------------===//
+
+// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle.
+// On the other hand, the RCU reorder buffer size for Piledriver does not
+// seem be specified in any trustworthy source.
+// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had
+// RCU reorder buffer size of 128. So that is a good guess for now.
+def PdRCU : RetireControlUnit<128, 4>;
+
+
+//===----------------------------------------------------------------------===//
+// Pipelines
+//===----------------------------------------------------------------------===//
+
+// There are total of two pipelines, each one with it's own scheduler.
+
+//===----------------------------------------------------------------------===//
+// Integer Pipeline Scheduling
+//
+
+// There is one Integer Scheduler per core.
+
+// Integer physical register file has 96 registers of 64-bit.
+def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>;
+
+// Unified Integer, Memory Scheduler has 40 entries.
+def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> {
+  // Up to 4 IPC can be decoded, issued, retired.
+  let BufferSize = 40;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FPU Pipeline Scheduling
+//
+
+// The FPU unit is shared between the two cores.
+
+// FP physical register file has 160 registers of 128-bit.
+// Operations on 256-bit data types are cracked into two COPs.
+def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// Unified FP Scheduler has 64 entries,
+def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> {
+  // Up to 4 IPC can be decoded, issued, retired.
+  let BufferSize = 64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Functional units
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Load-Store Units
+//
+
+let Super = PdAGLU01 in
+def PdLoad  : ProcResource<2> {
+  // For Piledriver, the load queue is 40 entries deep.
+  let BufferSize = 40;
+}
+
+def PdLoadQueue : LoadQueue<PdLoad>;
+
+let Super = PdAGLU01 in
+def PdStore : ProcResource<1> {
+  // For Piledriver, the store queue is 24 entries deep.
+  let BufferSize = 24;
+}
+
+def PdStoreQueue : StoreQueue<PdStore>;
+
+//===----------------------------------------------------------------------===//
+// Integer Execution Units
+//
+
+def PdDiv    : ProcResource<1>; // PdEX0; unpipelined integer division
+def PdCount  : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT
+
+def PdMul    : ProcResource<1>; // PdEX1; integer multiplication
+def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Units
+//
+
+// Two FMAC/FPFMA units.
+def PdFPFMA  : ProcResource<2>; // PdFPU0, PdFPU1
+
+// One 128-bit integer multiply-accumulate unit.
+def PdFPMMA  : ProcResource<1>; // PdFPU0
+
+// One fp conversion unit.
+def PdFPCVT  : ProcResource<1>; // PdFPU0
+
+// One unit for shuffles, packs, permutes, shifts.
+def PdFPXBR  : ProcResource<1>; // PdFPU1
+
+// Two 128-bit packed integer units.
+def PdFPMAL  : ProcResource<2>; // PdFPU2, PdFPU3
+
+// One FP store unit.
+def PdFPSTO  : ProcResource<1>; // PdFPU3
+
+
+//===----------------------------------------------------------------------===//
+// Basic helper classes.
+//===----------------------------------------------------------------------===//
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass PdWriteRes<SchedWrite SchedRW,
+                      list<ProcResourceKind> ExePorts, int Lat = 1,
+                      list<int> Res = [], int UOps = 1> {
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+}
+
+multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts, int Lat,
+                            list<int> Res, int UOps,
+                            int LoadLat, int LoadRes, int LoadUOps> {
+  defm : PdWriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+
+  defm : PdWriteRes<SchedRW.Folded,
+                    !listconcat([PdLoad], ExePorts),
+                    !add(Lat, LoadLat),
+                    !if(!and(!empty(Res), !eq(LoadRes, 1)),
+                      [],
+                      !listconcat([LoadRes], Res)),
+                    !add(UOps, LoadUOps)>;
+}
+
+multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts, int Lat = 1,
+                            list<int> Res = [], int UOps = 1,
+                            int LoadUOps = 0> {
+  defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+                          /*LoadLat*/4, /*LoadRes*/1, LoadUOps>;
+}
+
+multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
+                             list<ProcResourceKind> ExePorts, int Lat = 1,
+                             list<int> Res = [], int UOps = 1,
+                             int LoadUOps = 0> {
+  defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+                           /*LoadLat*/5, /*LoadRes*/1, LoadUOps>;
+}
+
+multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+                             list<ProcResourceKind> ExePorts, int Lat,
+                             list<int> Res, int UOps = 2,
+                             int LoadUOps = 0> {
+  defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+                           /*LoadLat*/5, /*LoadRes*/2, LoadUOps>;
+}
+
+//===----------------------------------------------------------------------===//
+// Here be dragons.
+//===----------------------------------------------------------------------===//
+
+// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers
+// needn't be available until 4 cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
+// A folded store needs a cycle on the PdStore for the store data.
+def : WriteRes<WriteRMW, [PdStore]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,    [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteStore,   [PdStore]>;
+def : WriteRes<WriteStoreNT, [PdStore]>;
+def : WriteRes<WriteMove,    [PdEX01]>;
+
+// Load/store MXCSR.
+// FIXME: These are copy and pasted from WriteLoad/Store.
+def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; }
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero, [/*No ExePorts*/]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteJump,  [PdEX1, PdBranch]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [PdEX01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [PdEX01]> { let Latency = 100; }
+def : WriteRes<WriteFence,      [PdStore]>;
+
+def PdWriteXLAT : SchedWriteRes<[PdEX01]> {
+  let Latency = 6;
+}
+def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
+
+def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
+  let Latency = 184;
+  let NumMicroOps = 45;
+}
+def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
+                                        "LSL(16|32|64)rr")>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [PdEX01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteALU,     [PdEX01]>;
+
+def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
+  let Latency = 6;
+  let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
+
+def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBMI1],
+             (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr,
+                     BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr,
+                     BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr,
+                     BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
+                     TZMSK32rr, TZMSK64rr)>;
+
+def PdWriteBMI1m : SchedWriteRes<[PdEX01]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBMI1m],
+             (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm,
+                     BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm,
+                     BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm,
+                     BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm,
+                     TZMSK32rm, TZMSK64rm)>;
+
+defm : PdWriteResExPair<WriteADC,    [PdEX01],                  1,  [2]>;
+
+defm : PdWriteRes<WriteBSWAP32,      [PdEX1]>;
+defm : PdWriteRes<WriteBSWAP64,      [PdEX1]>;
+defm : PdWriteRes<WriteCMPXCHG,      [PdEX1],                   3,  [],       5>;
+defm : PdWriteRes<WriteCMPXCHGRMW,   [PdEX1, PdStore, PdLoad],  3,  [], 2>;
+defm : PdWriteRes<WriteXCHG,         [PdEX1],                   1,  [],       2>;
+
+def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+}
+def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
+
+def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
+  let Latency = 3;
+  let NumMicroOps = 5;
+}
+def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
+
+def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
+  let Latency = 3;
+  let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
+             (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
+  let Latency = 3;
+  let NumMicroOps = 18;
+}
+def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
+
+def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
+  let Latency = 3;
+  let NumMicroOps = 22;
+}
+def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
+
+def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>;
+
+def PdWriteXADD : SchedWriteRes<[PdEX1]> {
+  let Latency = 2;
+  let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
+
+def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
+let Latency = 6;
+let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
+
+defm : PdWriteResExPair<WriteIMul8,     [PdEX1, PdMul],          4>;
+defm : PdWriteResExPair<WriteIMul16,    [PdEX1, PdMul],          4,  [],    2>;
+defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul],          5,  [],    2>;
+defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul],          4>;
+defm : PdWriteResExPair<WriteIMul32,    [PdEX1, PdMul],          4>;
+defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul],          4,  [],    1, 1>;
+defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul],          4>;
+defm : PdWriteResExPair<WriteIMul64,    [PdEX1, PdMul],          6,  [1, 4]>;
+defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul],          6,  [1, 4],1, 1>;
+defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul],          6,  [1, 4]>;
+defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
+
+defm : PdWriteResExPair<WriteDiv8,    [PdEX1, PdDiv],           12,  [1, 12]>;
+defm : PdWriteResExPair<WriteDiv16,   [PdEX1, PdDiv],           15,  [1, 15],   2>;
+defm : PdWriteResExPair<WriteDiv32,   [PdEX1, PdDiv],           14,  [1, 14],   2>;
+defm : PdWriteResExPair<WriteDiv64,   [PdEX1, PdDiv],           14,  [1, 14],   2>;
+
+defm : PdWriteResExPair<WriteIDiv8,   [PdEX1, PdDiv],           12,  [1, 12]>;
+defm : PdWriteResExPair<WriteIDiv16,  [PdEX1, PdDiv],           15,  [1, 17],   2>;
+defm : PdWriteResExPair<WriteIDiv32,  [PdEX1, PdDiv],           14,  [1, 25],   2>;
+defm : PdWriteResExPair<WriteIDiv64,  [PdEX1, PdDiv],           14,  [1, 14],   2>;
+
+defm : PdWriteResExPair<WriteCRC32,   [PdEX01],                  3,  [4],       3>;
+
+def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
+  let Latency = 5;
+  let ResourceCycles = [4];
+  let NumMicroOps = 5;
+}
+def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
+
+def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
+  let Latency = 6;
+  let ResourceCycles = [4];
+  let NumMicroOps = 7;
+}
+def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
+
+def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
+  let Latency = 10;
+  let ResourceCycles = [4];
+  let NumMicroOps = 11;
+}
+def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
+
+defm : PdWriteResExPair<WriteCMOV,    [PdEX01]>; // Conditional move.
+defm : PdWriteResExPair<WriteCMOV2,   [PdEX01], 1, [], 1, 1>; // Conditional (CF + ZF flag) move.
+
+def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm,
+                                          CMOVGE16rm, CMOVGE32rm, CMOVGE64rm,
+                                          CMOVL16rm, CMOVL32rm, CMOVL64rm,
+                                          CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>;
+
+defm : PdWriteRes<WriteFCMOV,        [PdFPU0, PdFPFMA]>; // x87 conditional move.
+
+def : WriteRes<WriteSETCC,           [PdEX01]>; // Setcc.
+def : WriteRes<WriteSETCCStore,      [PdEX01, PdStore]>;
+
+def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm,
+                                                      SETLEm, SETLm)>;
+
+defm : PdWriteRes<WriteLAHFSAHF,      [PdEX01],          2,  [],     2>;
+
+def WriteLAHF : SchedWriteRes<[PdEX01]> {
+  let Latency = 2;
+  let NumMicroOps = 4;
+}
+def : InstRW<[WriteLAHF], (instrs LAHF)>;
+
+def WriteSAHF : SchedWriteRes<[PdEX01]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+def : InstRW<[WriteSAHF], (instrs SAHF)>;
+
+defm : PdWriteRes<WriteBitTest,          [PdEX01],         1, [1],     1>;
+defm : PdWriteRes<WriteBitTestImmLd,     [PdEX01, PdLoad], 5, [1, 1],  1>;
+defm : PdWriteRes<WriteBitTestRegLd,     [PdEX01, PdLoad], 5, [1, 1],  7>;
+defm : PdWriteRes<WriteBitTestSet,       [PdEX01],         2, [1],     2>;
+defm : PdWriteRes<WriteBitTestSetImmLd,  [PdEX01, PdLoad], 6, [1, 1],  4>;
+defm : PdWriteRes<WriteBitTestSetImmRMW, [PdEX01, PdLoad], 6, [1, 1],  4>;
+defm : PdWriteRes<WriteBitTestSetRegLd,  [PdEX01, PdLoad], 6, [1, 1], 10>;
+defm : PdWriteRes<WriteBitTestSetRegRMW, [PdEX01, PdLoad], 6, [1, 1], 10>;
+
+// This is for simple LEAs with one or two input operands.
+// FIXME: SAGU 3-operand LEA
+def : WriteRes<WriteLEA,              [PdEX01]> { let NumMicroOps = 2; }
+
+// Bit counts.
+defm : PdWriteResExPair<WriteBSF,     [PdEX01],          3,  [4],     6, 2>;
+defm : PdWriteResExPair<WriteBSR,     [PdEX01],          4,  [4],     7, 2>;
+defm : PdWriteResExPair<WritePOPCNT,  [PdEX01],          4>;
+defm : PdWriteResExPair<WriteLZCNT,   [PdEX01],          2,  [],      2>;
+defm : PdWriteResExPair<WriteTZCNT,   [PdEX01],          2,  [2],     2>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : PdWriteResExPair<WriteBEXTR,   [PdEX01],          2,  [],     2>;
+defm : PdWriteResExPair<WriteBLS,     [PdEX01],          2,  [],     2>;
+defm : PdWriteResExPair<WriteBZHI,    [PdEX01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResExPair<WriteShift,    [PdEX01]>;
+defm : PdWriteResExPair<WriteShiftCL,  [PdEX01]>;
+defm : PdWriteResExPair<WriteRotate,   [PdEX01]>;
+defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
+
+def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 12;
+  let NumMicroOps = 26;
+}
+def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
+
+def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
+  let Latency = 12;
+  let NumMicroOps = 23;
+}
+def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
+
+def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 11;
+  let NumMicroOps = 24;
+}
+def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
+
+def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 10;
+  let NumMicroOps = 22;
+}
+def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
+
+def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
+  let Latency = 10;
+  let NumMicroOps = 19;
+}
+def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
+
+def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 7;
+  let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>;
+
+def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 7;
+  let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCR64rCL], (instrs RCR64rCL)>;
+
+def PdWriteRCR32rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 7;
+  let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCR32rCL ], (instrs RCR32rCL)>;
+
+def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
+  let Latency = 7;
+  let NumMicroOps = 15;
+}
+def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
+
+
+def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 9;
+  let NumMicroOps = 20;
+}
+def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
+
+def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
+  let Latency = 11;
+  let NumMicroOps = 21;
+}
+def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
+
+def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
+  let Latency = 8;
+  let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
+
+def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
+  let Latency = 13;
+  let NumMicroOps = 25;
+}
+def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
+
+// SHLD/SHRD.
+defm : PdWriteRes<WriteSHDrri,       [PdEX01],         4, [6], 6>;
+defm : PdWriteRes<WriteSHDrrcl,      [PdEX01],         4, [8], 7>;
+
+def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
+  let Latency = 3;
+  let ResourceCycles = [6];
+  let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
+
+def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
+  let Latency = 4;
+  let ResourceCycles = [8];
+  let NumMicroOps = 7;
+}
+def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
+                                                              SHLD32rrCL,
+                                                              SHRD32rrCL)>;
+
+defm : PdWriteRes<WriteSHDmri,       [PdLoad, PdEX01], 4, [1, 22], 8>;
+defm : PdWriteRes<WriteSHDmrcl,      [PdLoad, PdEX01], 4, [1, 22], 8>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteFLD0,               [PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFLD1,               [PdFPU1, PdFPSTO], 3>;
+defm : PdWriteRes<WriteFLDC,               [PdFPU1, PdFPSTO], 3>;
+
+defm : PdWriteRes<WriteFLoad,              [PdLoad, PdFPU01, PdFPFMA], 5>;
+defm : PdWriteRes<WriteFLoadX,             [PdLoad, PdFPU01, PdFPFMA], 5>;
+defm : PdWriteRes<WriteFLoadY,             [PdLoad, PdFPU01, PdFPFMA], 5, [], 2>;
+
+defm : PdWriteRes<WriteFMaskedLoad,        [PdLoad, PdFPU01, PdFPFMA], 6, [1, 1, 2]>;
+defm : PdWriteRes<WriteFMaskedLoadY,       [PdLoad, PdFPU01, PdFPFMA], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteFStore,             [PdStore, PdFPU1,  PdFPSTO], 2>;
+defm : PdWriteRes<WriteFStoreX,            [PdStore, PdFPU1,  PdFPSTO]>;
+defm : PdWriteRes<WriteFStoreY,            [PdStore, PdFPU1,  PdFPSTO], 1, [], 4>;
+
+def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU1,  PdFPSTO]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
+
+def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1,  PdFPSTO]> {
+  let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>;
+
+defm : PdWriteRes<WriteFStoreNT,           [PdStore, PdFPU1,  PdFPSTO], 3>;
+defm : PdWriteRes<WriteFStoreNTX,          [PdStore, PdFPU1,  PdFPSTO], 3>;
+defm : PdWriteRes<WriteFStoreNTY,          [PdStore, PdFPU1,  PdFPSTO], 3, [2, 2, 2], 4>;
+
+defm : PdWriteRes<WriteFMaskedStore,       [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 4], 18>;
+defm : PdWriteRes<WriteFMaskedStoreY,      [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 4], 34>;
+
+defm : PdWriteRes<WriteFMove,              [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveX,             [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveY,             [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+
+defm : PdWriteRes<WriteEMMS,               [PdFPU01, PdFPFMA], 2>;
+
+defm : PdWriteResXMMPair<WriteFAdd,         [PdFPU0, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFAddX,        [PdFPU0, PdFPFMA],  5>;
+defm : PdWriteResYMMPair<WriteFAddY,        [PdFPU0, PdFPFMA],  5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+
+defm : PdWriteResXMMPair<WriteFAdd64,       [PdFPU0, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFAdd64X,      [PdFPU0, PdFPFMA],  5>;
+defm : PdWriteResYMMPair<WriteFAdd64Y,      [PdFPU0, PdFPFMA],  5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+
+defm : PdWriteResXMMPair<WriteFCmp,         [PdFPU0, PdFPFMA],  2>;
+defm : PdWriteResXMMPair<WriteFCmpX,        [PdFPU0, PdFPFMA],  2>;
+defm : PdWriteResYMMPair<WriteFCmpY,        [PdFPU0, PdFPFMA],  2, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+
+defm : PdWriteResXMMPair<WriteFCmp64,       [PdFPU0, PdFPFMA],  2>;
+defm : PdWriteResXMMPair<WriteFCmp64X,      [PdFPU0, PdFPFMA],  2>;
+defm : PdWriteResYMMPair<WriteFCmp64Y,      [PdFPU0, PdFPFMA],  2, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+
+defm : PdWriteResXMMPair<WriteFCom,         [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+
+def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
+  let Latency = 6;
+}
+def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>;
+
+def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>;
+def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
+
+defm : PdWriteResXMMPair<WriteFMul,         [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFMulX,        [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResYMMPair<WriteFMulY,        [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+
+defm : PdWriteResXMMPair<WriteFMul64,       [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFMul64X,      [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResYMMPair<WriteFMul64Y,      [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+
+defm : PdWriteResXMMPair<WriteFMA,          [PdFPU, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFMAX,         [PdFPU, PdFPFMA], 5>;
+defm : PdWriteResYMMPair<WriteFMAY,         [PdFPU, PdFPFMA], 5,   [1, 1]>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+
+
+defm : PdWriteResXMMPair<WriteDPPD,         [PdFPU1, PdFPFMA], 15, [1, 3],  15, 2>;
+
+defm : PdWriteResXMMPair<WriteDPPS,         [PdFPU1, PdFPFMA], 25, [1, 3],  16, 2>;
+defm : PdWriteResYMMPair<WriteDPPSY,        [PdFPU1, PdFPFMA], 27, [2, 6], /*or 29*/ 25, 4>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+
+def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
+  let Latency = 25;
+  let ResourceCycles = [1, 3];
+  let NumMicroOps = 17;
+}
+def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
+
+defm : PdWriteResXMMPair<WriteFRcp,         [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFRcpX,        [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResYMMPair<WriteFRcpY,        [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+
+defm : PdWriteResXMMPair<WriteFRsqrt,       [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFRsqrtX,      [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResYMMPair<WriteFRsqrtY,      [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+
+defm : PdWriteResXMMPair<WriteFDiv,         [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResXMMPair<WriteFDivX,        [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResYMMPair<WriteFDivY,        [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+
+defm : PdWriteResXMMPair<WriteFDiv64,       [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResXMMPair<WriteFDiv64X,      [PdFPU1, PdFPFMA], 9, [1, 19]>;
+defm : PdWriteResYMMPair<WriteFDiv64Y,      [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+
+defm : PdWriteResXMMPair<WriteFSqrt,        [PdFPU1, PdFPFMA], 9, [1, 21]>;
+defm : PdWriteResXMMPair<WriteFSqrtX,       [PdFPU1, PdFPFMA], 9, [1, 21]>;
+defm : PdWriteResYMMPair<WriteFSqrtY,       [PdFPU1, PdFPFMA], 9, [2, 42]>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+
+defm : PdWriteResXMMPair<WriteFSqrt64,      [PdFPU1, PdFPFMA], 9, [1, 27]>;
+defm : PdWriteResXMMPair<WriteFSqrt64X,     [PdFPU1, PdFPFMA], 9, [1, 27]>;
+defm : PdWriteResYMMPair<WriteFSqrt64Y,     [PdFPU1, PdFPFMA], 9, [2, 54]>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+
+defm : PdWriteResXMMPair<WriteFSqrt80,      [PdFPU1, PdFPFMA],  1, [1, 35]>;
+defm : PdWriteResXMMPair<WriteFSign,        [PdFPU1, PdFPFMA]>;
+
+defm : PdWriteResXMMPair<WriteFRnd,         [PdFPU1, PdFPSTO],  4>;
+defm : PdWriteResYMMPair<WriteFRndY,        [PdFPU1, PdFPSTO],  4, [2, 1], 2>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+
+def PdWriteVFRCZ : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 10;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZ], (instrs VFRCZPDrr, VFRCZPSrr,
+                                     VFRCZSDrr, VFRCZSSrr)>;
+
+def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 15;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
+                                      VFRCZSDrm, VFRCZSSrm)>;
+
+def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 10;
+  let ResourceCycles = [2, 1];
+  let NumMicroOps = 4;
+}
+def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
+
+def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 15;
+  let ResourceCycles = [2, 1];
+  let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
+
+defm : PdWriteResXMMPair<WriteFLogic,       [PdFPU01, PdFPFMA],  2>;
+defm : PdWriteResYMMPair<WriteFLogicY,      [PdFPU01, PdFPFMA],  2, [2, 2]>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+
+defm : PdWriteResXMMPair<WriteFTest,        [PdFPU0, PdFPFMA, PdEX0],  1, [], 2>;
+defm : PdWriteResYMMPair<WriteFTestY,       [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+
+defm : PdWriteResXMMPair<WriteFShuffle,     [PdFPU01, PdFPFMA],  2>;
+defm : PdWriteResYMMPair<WriteFShuffleY,    [PdFPU01, PdFPFMA],  2, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+
+def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
+
+defm : PdWriteResXMMPair<WriteFVarShuffle,  [PdFPU01, PdFPFMA],  3, [1, 4]>;
+defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA],  3, [2, 6], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteFBlend,       [PdFPU01, PdFPFMA],  2>;
+defm : PdWriteResYMMPair<WriteFBlendY,      [PdFPU01, PdFPFMA],  2, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+
+defm : PdWriteResXMMPair<WriteFVarBlend,    [PdFPU01, PdFPFMA],  2, [1, 4]>;
+defm : PdWriteResYMMPair<WriteFVarBlendY,   [PdFPU01, PdFPFMA],  2, [2, 6], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+
+defm : PdWriteResXMMPair<WriteFShuffle256,  [PdFPU01, PdFPFMA],  2, [], 2>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+  let Latency = 2;
+}
+def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
+
+def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+  let Latency = 7;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
+
+def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+  let Latency = 4;
+  let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
+
+def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
+  let Latency = 8; // 4 + 4
+  let NumMicroOps = 10;
+}
+def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteCvtSS2I,   [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+
+defm : PdWriteResXMMPair<WriteCvtPS2I,   [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtPS2IY,  [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSD2I,   [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+
+defm : PdWriteResXMMPair<WriteCvtPD2I,   [PdFPU1, PdFPSTO],          8, [],        2>;
+defm : PdWriteResYMMPair<WriteCvtPD2IY,  [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
+
+// FIXME: f+3 ST, LD+STC latency
+defm : PdWriteResXMMPair<WriteCvtI2SS,   [PdFPU1, PdFPSTO], 4, [], 2>;
+// FIXME: .Folded version is one NumMicroOp *less*..
+
+defm : PdWriteResXMMPair<WriteCvtI2PS,   [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtI2PSY,  [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+
+defm : PdWriteResXMMPair<WriteCvtI2SD,   [PdFPU1, PdFPSTO], 4, [], 2>;
+// FIXME: .Folded version is one NumMicroOp *less*..
+
+def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 13;
+  let NumMicroOps = 2;
+}
+def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>;
+
+defm : PdWriteResXMMPair<WriteCvtI2PD,   [PdFPU1, PdFPSTO], 8, [],     2>;
+defm : PdWriteResYMMPair<WriteCvtI2PDY,  [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSS2SD,  [PdFPU1, PdFPSTO], 4>;
+
+defm : PdWriteResXMMPair<WriteCvtPS2PD,  [PdFPU1, PdFPSTO], 8, [],     2>;
+defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+
+defm : PdWriteResXMMPair<WriteCvtSD2SS,  [PdFPU1, PdFPSTO], 4>;
+
+defm : PdWriteResXMMPair<WriteCvtPD2PS,  [PdFPU1, PdFPSTO],          8, [],        2>;
+defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+def WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 6;
+  let NumMicroOps = 2;
+}
+def : InstRW<[WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
+                                                            MMX_CVTPI2PDirr)>;
+
+def WriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+def : InstRW<[WriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
+
+defm : PdWriteResXMMPair<WriteCvtPH2PS,  [PdFPU1, PdFPSTO], 8, [],     2, 1>;
+defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+
+defm : PdWriteRes<WriteCvtPS2PH,        [PdFPU1, PdFPSTO],          8, [],        2>;
+defm : PdWriteRes<WriteCvtPS2PHY,       [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+
+defm : PdWriteRes<WriteCvtPS2PHSt,      [PdFPU1, PdFPSTO, PdStore],          4, [],           3>;
+defm : PdWriteRes<WriteCvtPS2PHYSt,     [PdFPU1, PdFPSTO, PdFPFMA, PdStore], 4, [2, 1, 1, 1], 4>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteVecLoad,             [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadX,            [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadY,            [PdLoad, PdFPU01, PdFPMAL], 5, [], 2>;
+
+defm : PdWriteRes<WriteVecLoadNT,           [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadNTY,          [PdLoad, PdFPU01, PdFPMAL], 5>;
+
+defm : PdWriteRes<WriteVecMaskedLoad,       [PdLoad, PdFPU01, PdFPMAL], 6, [1, 1, 2]>;
+defm : PdWriteRes<WriteVecMaskedLoadY,      [PdLoad, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteVecStore,            [PdStore, PdFPU1,   PdFPSTO], 2>;
+defm : PdWriteRes<WriteVecStoreX,           [PdStore, PdFPU1,   PdFPSTO]>;
+defm : PdWriteRes<WriteVecStoreY,           [PdStore, PdFPU1,   PdFPSTO], 1, [], 4>;
+
+def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1,   PdFPSTO]> {
+  let NumMicroOps = 8;
+}
+def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
+
+defm : PdWriteRes<WriteVecStoreNT,          [PdStore, PdFPU1,   PdFPSTO], 2>;
+defm : PdWriteRes<WriteVecStoreNTY,         [PdStore, PdFPU1,   PdFPSTO], 2, [2, 2, 2], 4>;
+
+defm : PdWriteRes<WriteVecMaskedStore,      [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>;
+defm : PdWriteRes<WriteVecMaskedStoreY,     [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+
+defm : PdWriteRes<WriteVecMove,             [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveX,            [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveY,            [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
+
+defm : PdWriteRes<WriteVecMoveToGpr,        [PdFPU0, PdFPFMA, PdEX0], 10>;
+defm : PdWriteRes<WriteVecMoveFromGpr,      [PdFPU01, PdFPFMA], 10, [], 2>;
+
+defm : PdWriteResXMMPair<WriteVecALU,        [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecALUX,       [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+
+defm : PdWriteResXMMPair<WriteVecShift,      [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVecShiftX,     [PdFPU01, PdFPMAL], 3>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+
+defm : PdWriteResXMMPair<WriteVecShiftImm,   [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecShiftImmX,  [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+
+defm : PdWriteResXMMPair<WriteVecIMul,       [PdFPU0, PdFPMMA], 4>;
+defm : PdWriteResXMMPair<WriteVecIMulX,      [PdFPU0, PdFPMMA], 4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+
+defm : PdWriteResXMMPair<WritePMULLD,        [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL], 5, [2, 1, 2, 1]>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+
+def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
+  let Latency = 4;
+  let ResourceCycles = [2, 1, 2, 1];
+}
+def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
+                                     VPMACSSDQLrr)>;
+
+defm : PdWriteResXMMPair<WriteMPSAD,         [PdFPU0, PdFPMMA], 9, [1, 2], 9>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+
+defm : PdWriteResXMMPair<WritePSADBW,        [PdFPU01, PdFPMAL], 4, [], 2>;
+defm : PdWriteResXMMPair<WritePSADBWX,       [PdFPU01, PdFPMAL], 4, [], 2>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+
+defm : PdWriteResXMMPair<WritePHMINPOS,      [PdFPU0,  PdFPMAL], 4, [], 2>;
+
+defm : PdWriteResXMMPair<WriteShuffle,       [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteShuffleX,      [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResYMMPair<WriteShuffleY,      [PdFPU01, PdFPMAL], 2,   [1, 1]>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteVarShuffle,    [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarShuffleX,   [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+
+defm : PdWriteResXMMPair<WriteBlend,         [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+
+defm : PdWriteResXMMPair<WriteVarBlend,      [PdFPU01, PdFPMAL], 2, [1, 4]>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+
+defm : PdWriteResXMMPair<WriteVecLogic,      [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecLogicX,     [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+
+defm : PdWriteResXMMPair<WriteVecTest,       [PdFPU0, PdFPFMA, PdEX0],  1, [], 2>;
+defm : PdWriteResYMMPair<WriteVecTestY,      [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+
+defm : PdWriteResXMMPair<WriteShuffle256,    [PdFPU01, PdFPMAL]>;
+defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
+
+defm : PdWriteResXMMPair<WriteVarVecShift,   [PdFPU01, PdFPMAL], 3>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteVecInsert,    [PdFPU01, PdFPMAL], 2, [], 2>;
+defm : PdWriteRes<WriteVecInsertLd,  [PdFPU01, PdFPMAL, PdLoad], 6, [], 2>;
+
+defm : PdWriteRes<WriteVecExtract,   [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [], 2>;
+
+def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+  let Latency = 3;
+}
+def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 14, [1, 2, 1], 7, 1>;
+defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0],  6, [1, 2, 1], 7, 2>;
+
+defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 15, [1, 2, 6, 4, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 2, 6, 4, 1, 1], 27, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0],   10, [], 2>;
+
+defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
+
+defm : PdWriteRes<WriteMMXMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteAESIMC,    [PdFPU0, PdFPMMA], 5>;
+defm : PdWriteResXMMPair<WriteAESKeyGen, [PdFPU0, PdFPMMA], 5>;
+defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteFHAdd,  [PdFPU0, PdFPFMA], 11, [],     3, 1>;
+defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [2, 1], 8, 2>;
+defm : X86WriteResPairUnsupported<WriteFHAddZ>;
+
+defm : PdWriteResXMMPair<WritePHAdd,  [PdFPU01, PdFPMAL], 5, [], 3, 1>;
+defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
+defm : X86WriteResPairUnsupported<WritePHAddZ>;
+
+def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr,
+                                   PHADDWrr, PHSUBWrr,
+                                   PHADDSWrr, PHSUBSWrr,
+                                   VPHADDDrr, VPHSUBDrr,
+                                   VPHADDWrr, VPHSUBWrr,
+                                   VPHADDSWrr, VPHSUBSWrr)>;
+
+def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
+                                          PHADDWrm, PHSUBWrm,
+                                          PHADDSWrm, PHSUBSWrm,
+                                          VPHADDDrm, VPHSUBDrm,
+                                          VPHADDWrm, VPHSUBWrm,
+                                          VPHADDSWrm, VPHSUBSWrm)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [], 5, 1>;
+
+def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
+  let Latency = 13;
+  let NumMicroOps = 6;
+}
+def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE4A instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+  let Latency = 3;
+  let ResourceCycles = [1, 4];
+}
+def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AVX instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> {
+  let Latency = 6;
+  let ResourceCycles = [1, 2, 4];
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
+                                                          VBROADCASTSSYrm)>;
+
+def PdWriteVZEROALL : SchedWriteRes<[]> {
+  let Latency = 90;
+  let NumMicroOps = 32;
+}
+def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>;
+
+def PdWriteVZEROUPPER : SchedWriteRes<[]> {
+  let Latency = 46;
+  let NumMicroOps = 16;
+}
+def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>;
+
+///////////////////////////////////////////////////////////////////////////////
+//  SchedWriteVariant definitions.
+///////////////////////////////////////////////////////////////////////////////
+
+def PdWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def PdWriteZeroIdiom : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+  SchedVar<MCSchedPredicate<TruePred>,           [WriteALU]>
+]>;
+def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                         XOR32rr, XOR64rr)>;
+
+def PdWriteFZeroIdiom : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+  SchedVar<MCSchedPredicate<TruePred>,           [WriteFLogic]>
+]>;
+def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr,  VXORPSrr,
+                                          XORPDrr,  VXORPDrr,
+                                          ANDNPSrr, VANDNPSrr,
+                                          ANDNPDrr, VANDNPDrr)>;
+
+// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1.
+
+def PdWriteVZeroIdiomLogic : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+  SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogic]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+
+def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+  SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogicX]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr,  VPXORrr,
+                                                PANDNrr, VPANDNrr)>;
+
+def PdWriteVZeroIdiomALU : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+  SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALU]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr,   MMX_PSUBDirr,
+                                             MMX_PSUBQirr,   MMX_PSUBWirr,
+                                             MMX_PCMPGTBirr,
+                                             MMX_PCMPGTDirr,
+                                             MMX_PCMPGTWirr)>;
+
+def PdWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [PdWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALUX]>
+]>;
+def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                              PSUBDrr, VPSUBDrr,
+                                              PSUBQrr, VPSUBQrr,
+                                              PSUBWrr, VPSUBWrr,
+                                              PCMPGTBrr, VPCMPGTBrr,
+                                              PCMPGTDrr, VPCMPGTDrr,
+                                              PCMPGTWrr, VPCMPGTWrr)>;
+
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+// VPCMPGTQ, but not PCMPGTQ!
+
+def : IsZeroIdiomFunction<[
+  // GPR Zero-idioms.
+  DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+  // MMX Zero-idioms.
+  DepBreakingClass<[
+    MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+    MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+    MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
+    MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+  ], ZeroIdiomPredicate>,
+
+  // SSE Zero-idioms.
+  DepBreakingClass<[
+    // fp variants.
+    XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+    // int variants.
+    PXORrr, PANDNrr,
+    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+    PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
+    PCMPGTBrr, PCMPGTDrr, PCMPGTWrr
+  ], ZeroIdiomPredicate>,
+
+  // AVX Zero-idioms.
+  DepBreakingClass<[
+    // xmm fp variants.
+    VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+    // xmm int variants.
+    VPXORrr, VPANDNrr,
+    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+    VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
+    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+    // ymm variants.
+    VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+  ], ZeroIdiomPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+  // GPR
+  DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+  DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+  // MMX
+  DepBreakingClass<[
+    MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+  ], ZeroIdiomPredicate>,
+
+  // SSE
+  DepBreakingClass<[
+    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr
+    // But not PCMPEQQrr.
+  ], ZeroIdiomPredicate>,
+
+  // AVX
+  DepBreakingClass<[
+    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr
+    // But not VPCMPEQQrr.
+  ], ZeroIdiomPredicate>
+]>;
+
+
+} // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 719e71cd25e5..33a6b01546d7 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -48,12 +48,22 @@ def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
 // part of it.
 // Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
 // access" - Agner Fog's "microarchitecture.pdf".
-def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
+def JIntegerPRF : RegisterFile<64, [GR64, CCR], [1, 1], [1, 0],
+                               0,  // Max moves that can be eliminated per cycle.
+                               1>; // Restrict move elimination to zero regs.
 
 // The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
 // registers. Operations on 256-bit data types are cracked into two COPs.
 // Reference: www.realworldtech.com/jaguar/4/
-def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The PRF in the floating point unit can eliminate a move from a MMX or SSE
+// register that is know to be zero (i.e. it has been zeroed using a zero-idiom
+// dependency breaking instruction, or via VZEROALL).
+// Reference: Section 21.8 "AMD Bobcat and Jaguar pipeline: Dependency-breaking
+// instructions" - Agner Fog's "microarchitecture.pdf"
+def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2], [1, 1, 0],
+                          0,  // Max moves that can be eliminated per cycle.
+                          1>; // Restrict move elimination to zero regs.
 
 // The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
 // retire up to two macro-ops per cycle.
@@ -93,6 +103,12 @@ def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 3>;
 
+// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available until 5
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 5>;
+def : ReadAdvance<ReadAfterVecYLd, 5>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when dispatched by the schedulers.
@@ -100,7 +116,8 @@ def : ReadAdvance<ReadAfterLd, 3>;
 // folded loads.
 multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
                             list<ProcResourceKind> ExePorts,
-                            int Lat, list<int> Res = [], int UOps = 1> {
+                            int Lat, list<int> Res = [], int UOps = 1,
+                            int LoadUOps = 0> {
   // Register variant is using a single cycle on ExePort.
   def : WriteRes<SchedRW, ExePorts> {
     let Latency = Lat;
@@ -113,13 +130,14 @@ multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
   def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
     let Latency = !add(Lat, 3);
     let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, LoadUOps);
   }
 }
 
 multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
                             list<ProcResourceKind> ExePorts,
-                            int Lat, list<int> Res = [], int UOps = 1> {
+                            int Lat, list<int> Res = [], int UOps = 1,
+                            int LoadUOps = 0> {
   // Register variant is using a single cycle on ExePort.
   def : WriteRes<SchedRW, ExePorts> {
     let Latency = Lat;
@@ -132,13 +150,14 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
   def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
     let Latency = !add(Lat, 5);
     let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, LoadUOps);
   }
 }
 
 multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
                             list<ProcResourceKind> ExePorts,
-                            int Lat, list<int> Res = [2], int UOps = 2> {
+                            int Lat, list<int> Res = [2], int UOps = 2,
+                            int LoadUOps = 0> {
   // Register variant is using a single cycle on ExePort.
   def : WriteRes<SchedRW, ExePorts> {
     let Latency = Lat;
@@ -151,12 +170,13 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
   def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
     let Latency = !add(Lat, 5);
     let ResourceCycles = !listconcat([2], Res);
-    let NumMicroOps = UOps;
+    let NumMicroOps = !add(UOps, LoadUOps);
   }
 }
 
-// A folded store needs a cycle on the SAGU for the store data.
-def : WriteRes<WriteRMW, [JSAGU]>;
+// A folded store needs a cycle on the SAGU for the store data,
+// most RMW instructions don't need an extra uop.
+defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Arithmetic.
@@ -164,12 +184,24 @@ def : WriteRes<WriteRMW, [JSAGU]>;
 
 defm : JWriteResIntPair<WriteALU,    [JALU01], 1>;
 defm : JWriteResIntPair<WriteADC,    [JALU01], 1, [2]>;
-defm : JWriteResIntPair<WriteIMul,   [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
-defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
-defm : X86WriteRes<WriteIMulH,       [JALU1], 6, [4], 1>;
 
 defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG,[JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>;
+defm : X86WriteRes<WriteXCHG,        [JALU01], 1, [1], 1>;
+
+defm : JWriteResIntPair<WriteIMul8,     [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16,    [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32,    [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul64,    [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>;
+defm : X86WriteRes<WriteIMulH,          [JALU1], 6, [4], 1>;
 
 defm : JWriteResIntPair<WriteDiv8,   [JALU1, JDiv], 12, [1, 12], 1>;
 defm : JWriteResIntPair<WriteDiv16,  [JALU1, JDiv], 17, [1, 17], 2>;
@@ -188,27 +220,37 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
 def  : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
 def  : WriteRes<WriteLAHFSAHF, [JALU01]>;
-def  : WriteRes<WriteBitTest,[JALU01]>;
+
+defm : X86WriteRes<WriteBitTest,         [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [JALU01,JLAGU], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd,    [JALU01,JLAGU], 4, [1,1], 5>;
+defm : X86WriteRes<WriteBitTestSet,      [JALU01], 1, [1], 2>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [JALU01,JLAGU], 4, [1,1], 4>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [JALU01,JLAGU], 4, [1,1], 8>;
 
 // This is for simple LEAs with one or two input operands.
 def : WriteRes<WriteLEA, [JALU01]>;
 
 // Bit counts.
-defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
-defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WriteBSF, [JALU01], 4, [8], 7>;
+defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [8], 8>;
 defm : JWriteResIntPair<WritePOPCNT,         [JALU01], 1>;
 defm : JWriteResIntPair<WriteLZCNT,          [JALU01], 1>;
-defm : JWriteResIntPair<WriteTZCNT,          [JALU01], 2, [2]>;
+defm : JWriteResIntPair<WriteTZCNT,          [JALU01], 2, [2], 2>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
+defm : JWriteResIntPair<WriteBLS,   [JALU01], 2, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteBZHI>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Integer shifts and rotates.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+defm : JWriteResIntPair<WriteShift,    [JALU01], 1>;
+defm : JWriteResIntPair<WriteShiftCL,  [JALU01], 1>;
+defm : JWriteResIntPair<WriteRotate,   [JALU01], 1>;
+defm : JWriteResIntPair<WriteRotateCL, [JALU01], 1>;
 
 // SHLD/SHRD.
 defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
@@ -269,8 +311,8 @@ defm : X86WriteRes<WriteFLDC,          [JFPU1, JSTC], 3, [1,1], 1>;
 defm : X86WriteRes<WriteFLoad,         [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
 defm : X86WriteRes<WriteFLoadX,        [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
 defm : X86WriteRes<WriteFLoadY,        [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteFMaskedLoad,   [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
-defm : X86WriteRes<WriteFMaskedLoadY,  [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+defm : X86WriteRes<WriteFMaskedLoad,   [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY,  [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;
 
 defm : X86WriteRes<WriteFStore,        [JSAGU, JFPU1,  JSTC], 2, [1, 1, 1], 1>;
 defm : X86WriteRes<WriteFStoreX,       [JSAGU, JFPU1,  JSTC], 1, [1, 1, 1], 1>;
@@ -364,21 +406,21 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
 defm : JWriteResFpuPair<WriteFBlend,      [JFPU01, JFPX],  1>;
 defm : JWriteResYMMPair<WriteFBlendY,     [JFPU01, JFPX],  1, [2, 2], 2>;
 defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : JWriteResFpuPair<WriteFVarBlend,   [JFPU01, JFPX],  2, [1, 4], 3>;
-defm : JWriteResYMMPair<WriteFVarBlendY,  [JFPU01, JFPX],  3, [2, 6], 6>;
+defm : JWriteResFpuPair<WriteFVarBlend,   [JFPU01, JFPX],  2, [4, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarBlendY,  [JFPU01, JFPX],  3, [6, 6], 6>;
 defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
-defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX],  1>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX],  1, [2, 2], 2>;
 defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Conversions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteCvtSS2I,      [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtSS2I,      [JFPU1, JSTC, JFPU0, JFPA, JALU0], 7, [1,1,1,1,1], 2>;
 defm : JWriteResFpuPair<WriteCvtPS2I,      [JFPU1, JSTC], 3, [1,1], 1>;
 defm : JWriteResYMMPair<WriteCvtPS2IY,     [JFPU1, JSTC], 3, [2,2], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : JWriteResFpuPair<WriteCvtSD2I,      [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtSD2I,      [JFPU1, JSTC, JFPU0, JFPA, JALU0], 7, [1,1,1,1,1], 2>;
 defm : JWriteResFpuPair<WriteCvtPD2I,      [JFPU1, JSTC], 3, [1,1], 1>;
 defm : JWriteResYMMPair<WriteCvtPD2IY,     [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
@@ -423,8 +465,8 @@ defm : X86WriteRes<WriteVecLoadX,         [JLAGU, JFPU01, JVALU], 5, [1, 1, 1],
 defm : X86WriteRes<WriteVecLoadY,         [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
 defm : X86WriteRes<WriteVecLoadNT,        [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
 defm : X86WriteRes<WriteVecLoadNTY,       [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
-defm : X86WriteRes<WriteVecMaskedLoad,    [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
-defm : X86WriteRes<WriteVecMaskedLoadY,   [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+defm : X86WriteRes<WriteVecMaskedLoad,    [JLAGU, JFPU01, JVALU], 6, [1, 2, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedLoadY,   [JLAGU, JFPU01, JVALU], 6, [2, 4, 4], 2>;
 
 defm : X86WriteRes<WriteVecStore,         [JSAGU, JFPU1,   JSTC], 2, [1, 1, 1], 1>;
 defm : X86WriteRes<WriteVecStoreX,        [JSAGU, JFPU1,   JSTC], 1, [1, 1, 1], 1>;
@@ -462,26 +504,26 @@ defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
 defm : JWriteResFpuPair<WritePMULLD,      [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
 defm : X86WriteResPairUnsupported<WritePMULLDY>;
 defm : X86WriteResPairUnsupported<WritePMULLDZ>;
-defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2], 3>;
 defm : X86WriteResPairUnsupported<WriteMPSADY>;
 defm : X86WriteResPairUnsupported<WriteMPSADZ>;
 defm : JWriteResFpuPair<WritePSADBW,      [JFPU01, JVALU], 2>;
 defm : JWriteResFpuPair<WritePSADBWX,     [JFPU01, JVALU], 2>;
 defm : X86WriteResPairUnsupported<WritePSADBWY>;
 defm : X86WriteResPairUnsupported<WritePSADBWZ>;
-defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU0,  JVALU], 2>;
+defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU01, JVALU], 2>;
 defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;
 defm : JWriteResFpuPair<WriteShuffleX,    [JFPU01, JVALU], 1>;
 defm : X86WriteResPairUnsupported<WriteShuffleY>;
 defm : X86WriteResPairUnsupported<WriteShuffleZ>;
-defm : JWriteResFpuPair<WriteVarShuffle,  [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffle,  [JFPU01, JVALU], 2, [1, 1], 1>;
 defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
 defm : JWriteResFpuPair<WriteBlend,       [JFPU01, JVALU], 1>;
 defm : X86WriteResPairUnsupported<WriteBlendY>;
 defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : JWriteResFpuPair<WriteVarBlend,    [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarBlend,    [JFPU01, JVALU], 2, [4, 4], 3>;
 defm : X86WriteResPairUnsupported<WriteVarBlendY>;
 defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
 defm : JWriteResFpuPair<WriteVecLogic,    [JFPU01, JVALU], 1>;
@@ -507,8 +549,8 @@ defm : X86WriteRes<WriteVecExtractSt,   [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
 // SSE42 String instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
-defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPU0, JFPA, JALU0], 7, [2, 2, 1, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPU0, JFPA, JALU0], 8, [2, 2, 1, 1, 1], 3>;
 defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
 defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
 
@@ -527,7 +569,7 @@ def  : WriteRes<WriteMMXMOVMSK,  [JFPU0, JFPA, JALU0]> { let Latency = 3; }
 
 defm : JWriteResFpuPair<WriteAESIMC,      [JFPU0, JVIMUL], 2>;
 defm : JWriteResFpuPair<WriteAESKeyGen,   [JFPU0, JVIMUL], 2>;
-defm : JWriteResFpuPair<WriteAESDecEnc,   [JFPU0, JVIMUL], 3, [1, 1], 2>;
+defm : JWriteResFpuPair<WriteAESDecEnc,   [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1,1,1,1], 2>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Horizontal add/sub  instructions.
@@ -559,13 +601,17 @@ def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
 // AVX instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
+def JWriteVecExtractF128: SchedWriteRes<[JFPU01, JFPX]>;
+def : InstRW<[JWriteVecExtractF128], (instrs VEXTRACTF128rr)>;
+
 def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
   let Latency = 6;
   let ResourceCycles = [1, 2, 4];
   let NumMicroOps = 2;
 }
-def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
-                                                         VBROADCASTSSYrm)>;
+def : InstRW<[JWriteVBROADCASTYLd], (instrs VBROADCASTSDYrm,
+                                            VBROADCASTSSYrm,
+                                            VBROADCASTF128)>;
 
 def JWriteJVZEROALL: SchedWriteRes<[]> {
   let Latency = 90;
@@ -587,6 +633,10 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
   let Latency = 0;
 }
 
+def JWriteZeroIdiomYmm : SchedWriteRes<[JFPU01, JFPX]> {
+  let NumMicroOps = 2;
+}
+
 // Certain instructions that use the same register for both source
 // operands do not have a real dependency on the previous contents of the
 // register, and thus, do not have to wait before completing. They can be
@@ -598,54 +648,73 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
 
 def JWriteZeroIdiom : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
-    SchedVar<MCSchedPredicate<TruePred>,           [WriteALU]>
+    SchedVar<NoSchedPred,                          [WriteALU]>
 ]>;
 def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
                                         XOR32rr, XOR64rr)>;
 
 def JWriteFZeroIdiom : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
-    SchedVar<MCSchedPredicate<TruePred>,           [WriteFLogic]>
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
 ]>;
 def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
                                          ANDNPSrr, VANDNPSrr,
                                          ANDNPDrr, VANDNPDrr)>;
 
+def JWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroIdiomYmm]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[JWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+                                          VANDNPSYrr, VANDNPDYrr)>;
+
 def JWriteVZeroIdiomLogic : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
-    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogic]>
+    SchedVar<NoSchedPred,                          [WriteVecLogic]>
 ]>;
 def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
 
 def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
-    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogicX]>
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
 ]>;
 def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
                                                PANDNrr, VPANDNrr)>;
 
 def JWriteVZeroIdiomALU : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
-    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALU]>
+    SchedVar<NoSchedPred,                          [WriteVecALU]>
 ]>;
 def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
                                             MMX_PSUBQirr, MMX_PSUBWirr,
+                                            MMX_PSUBSBirr, MMX_PSUBSWirr,
+                                            MMX_PSUBUSBirr, MMX_PSUBUSWirr,
                                             MMX_PCMPGTBirr, MMX_PCMPGTDirr,
                                             MMX_PCMPGTWirr)>;
 
 def JWriteVZeroIdiomALUX : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
-    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALUX]>
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
 ]>;
 def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
                                              PSUBDrr, VPSUBDrr,
                                              PSUBQrr, VPSUBQrr,
                                              PSUBWrr, VPSUBWrr,
+                                             PSUBSBrr, VPSUBSBrr,
+                                             PSUBSWrr, VPSUBSWrr,
+                                             PSUBUSBrr, VPSUBUSBrr,
+                                             PSUBUSWrr, VPSUBUSWrr,
                                              PCMPGTBrr, VPCMPGTBrr,
                                              PCMPGTDrr, VPCMPGTDrr,
                                              PCMPGTQrr, VPCMPGTQrr,
                                              PCMPGTWrr, VPCMPGTWrr)>;
 
+def JWriteVPERM2F128 : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<ZeroIdiomVPERMPredicate>, [JWriteZeroIdiomYmm]>,
+  SchedVar<NoSchedPred,                               [WriteFShuffle256]>
+]>;
+def : InstRW<[JWriteVPERM2F128], (instrs VPERM2F128rr)>;
+
 // This write is used for slow LEA instructions.
 def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
   let Latency = 2;
@@ -666,8 +735,8 @@ def JSlowLEAPredicate : MCSchedPredicate<
 >;
 
 def JWriteLEA : SchedWriteVariant<[
-    SchedVar<JSlowLEAPredicate,          [JWrite3OpsLEA]>,
-    SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
+    SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
+    SchedVar<NoSchedPred,       [WriteLEA]>
 ]>;
 
 def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
@@ -679,4 +748,91 @@ def JSlowLEA16r : SchedWriteRes<[JALU01]> {
 
 def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
 
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+  // GPR Zero-idioms.
+  DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+  // MMX Zero-idioms.
+  DepBreakingClass<[
+    MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr,
+    MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr,
+    MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr,
+    MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr
+  ], ZeroIdiomPredicate>,
+
+  // SSE Zero-idioms.
+  DepBreakingClass<[
+    // fp variants.
+    XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+    // int variants.
+    PXORrr, PANDNrr,
+    PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+    PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr,
+    PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+  ], ZeroIdiomPredicate>,
+
+  // AVX Zero-idioms.
+  DepBreakingClass<[
+    // xmm fp variants.
+    VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+    // xmm int variants.
+    VPXORrr, VPANDNrr,
+    VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+    VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr,
+    VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+    // ymm variants.
+    VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr
+  ], ZeroIdiomPredicate>,
+
+  DepBreakingClass<[ VPERM2F128rr ], ZeroIdiomVPERMPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+  // GPR
+  DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+  DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+  // MMX
+  DepBreakingClass<[
+    MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr
+  ], ZeroIdiomPredicate>,
+
+  // SSE
+  DepBreakingClass<[ 
+    PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+  ], ZeroIdiomPredicate>,
+
+  // AVX
+  DepBreakingClass<[
+    VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+  ], ZeroIdiomPredicate>
+]>;
+
+def : IsOptimizableRegisterMove<[
+  InstructionEquivalenceClass<[
+    // GPR variants.
+    MOV32rr, MOV64rr,
+
+    // MMX variants.
+    MMX_MOVQ64rr,
+
+    // SSE variants.
+    MOVAPSrr, MOVUPSrr,
+    MOVAPDrr, MOVUPDrr,
+    MOVDQArr, MOVDQUrr,
+
+    // AVX variants.
+    VMOVAPSrr, VMOVUPSrr,
+    VMOVAPDrr, VMOVUPDrr,
+    VMOVDQArr, VMOVDQUrr
+  ], TruePred >
+]>;
+
 } // SchedModel
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
index b1e843013707..fcaff7cf810f 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -49,6 +49,9 @@ def SLMFPDivider    : ProcResource<1>;
 // Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
 // cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 3>;
+def : ReadAdvance<ReadAfterVecLd, 3>;
+def : ReadAdvance<ReadAfterVecXLd, 3>;
+def : ReadAdvance<ReadAfterVecYLd, 3>;
 
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
@@ -95,13 +98,28 @@ def : InstRW<[WriteMove], (instrs COPY)>;
 
 defm : SLMWriteResPair<WriteALU,    [SLM_IEC_RSV01], 1>;
 defm : SLMWriteResPair<WriteADC,    [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul,   [SLM_IEC_RSV1],  3>;
-defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1],  3>;
+
+defm : SLMWriteResPair<WriteIMul8,     [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul16,    [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul32,    [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul64,    [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1],  3>;
 
 defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1, 2], 2>;
+defm : X86WriteRes<WriteXCHG,      [SLM_IEC_RSV01], 1, [1], 1>;
 
-defm : SLMWriteResPair<WriteShift,  [SLM_IEC_RSV0],  1>;
+defm : SLMWriteResPair<WriteShift,    [SLM_IEC_RSV0],  1>;
+defm : SLMWriteResPair<WriteShiftCL,  [SLM_IEC_RSV0],  1>;
+defm : SLMWriteResPair<WriteRotate,   [SLM_IEC_RSV0],  1>;
+defm : SLMWriteResPair<WriteRotateCL, [SLM_IEC_RSV0],  1>;
 
 defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0],  1, [1], 1>;
 defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0],  1, [1], 1>;
@@ -119,8 +137,13 @@ def  : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
   // FIXME Latency and NumMicrOps?
   let ResourceCycles = [2,1];
 }
-def  : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
-def  : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
+defm : X86WriteRes<WriteLAHFSAHF,        [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest,         [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd,    [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSet,      [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
 
 // This is for simple LEAs with one or two input operands.
 // The complex ones can only execute on port 1, and they require two cycles on
@@ -134,8 +157,9 @@ defm : SLMWriteResPair<WriteLZCNT,          [SLM_IEC_RSV0], 3>;
 defm : SLMWriteResPair<WriteTZCNT,          [SLM_IEC_RSV0], 3>;
 defm : SLMWriteResPair<WritePOPCNT,         [SLM_IEC_RSV0], 3>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBLS>;
 defm : X86WriteResPairUnsupported<WriteBZHI>;
 
 defm : SLMWriteResPair<WriteDiv8,   [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 7184b850a195..a866f843106b 100644
--- a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -87,9 +87,14 @@ def ZnMultiplier : ProcResource<1>;
 // Integer division issued on ALU2.
 def ZnDivider : ProcResource<1>;
 
-// 4 Cycles load-to use Latency is captured
+// 4 Cycles integer load-to use Latency is captured
 def : ReadAdvance<ReadAfterLd, 4>;
 
+// 8 Cycles vector load-to use Latency is captured
+def : ReadAdvance<ReadAfterVecLd, 8>;
+def : ReadAdvance<ReadAfterVecXLd, 8>;
+def : ReadAdvance<ReadAfterVecYLd, 8>;
+
 // The Integer PRF for Zen is 168 entries, and it holds the architectural and
 // speculative version of the 64-bit integer registers.
 // Reference: "Software Optimization Guide for AMD Family 17h Processors"
@@ -177,13 +182,28 @@ def : WriteRes<WriteZero,  []>;
 def : WriteRes<WriteLEA, [ZnALU]>;
 defm : ZnWriteResPair<WriteALU,   [ZnALU], 1>;
 defm : ZnWriteResPair<WriteADC,   [ZnALU], 1>;
-defm : ZnWriteResPair<WriteIMul,   [ZnALU1, ZnMultiplier], 4>;
-defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+
+defm : ZnWriteResPair<WriteIMul8,     [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16,    [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32,    [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul64,    [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
 
 defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
 defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>;
+defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>;
 
-defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
+defm : ZnWriteResPair<WriteShift,    [ZnALU], 1>;
+defm : ZnWriteResPair<WriteShiftCL,  [ZnALU], 1>;
+defm : ZnWriteResPair<WriteRotate,   [ZnALU], 1>;
+defm : ZnWriteResPair<WriteRotateCL, [ZnALU], 1>;
 
 defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
 defm : X86WriteResUnsupported<WriteSHDrrcl>;
@@ -198,7 +218,13 @@ defm : ZnWriteResPair<WriteCMOV2,  [ZnALU], 1>;
 def  : WriteRes<WriteSETCC,  [ZnALU]>;
 def  : WriteRes<WriteSETCCStore,  [ZnALU, ZnAGU]>;
 defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
-def  : WriteRes<WriteBitTest,[ZnALU]>;
+
+defm : X86WriteRes<WriteBitTest,         [ZnALU], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd,    [ZnALU,ZnAGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd,    [ZnALU,ZnAGU], 5, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet,      [ZnALU], 2, [1], 2>;
+//defm : X86WriteRes<WriteBitTestSetImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
+//defm : X86WriteRes<WriteBitTestSetRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
 
 // Bit counts.
 defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
@@ -210,9 +236,10 @@ defm : ZnWriteResPair<WritePOPCNT,         [ZnALU], 1>;
 // Treat misc copies as a move.
 def : InstRW<[WriteMove], (instrs COPY)>;
 
-// BMI1 BEXTR, BMI2 BZHI
+// BMI1 BEXTR/BLS, BMI2 BZHI
 defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
+//defm : ZnWriteResPair<WriteBLS,   [ZnALU], 2>;
+defm : ZnWriteResPair<WriteBZHI,  [ZnALU], 1>;
 
 // IDIV
 defm : ZnWriteResPair<WriteDiv8,   [ZnALU2, ZnDivider], 15, [1,15], 1>;
@@ -492,21 +519,13 @@ def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
 //-- Move instructions --//
 // MOV.
 // r16,m.
-def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
+def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>;
 
 // MOVSX, MOVZX.
 // r,m.
 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
 
 // XCHG.
-// r,r.
-def ZnWriteXCHG : SchedWriteRes<[ZnALU]> {
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-
-def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
-
 // r,m.
 def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
   let Latency = 5;
@@ -522,7 +541,7 @@ def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
   let Latency = 5;
   let NumMicroOps = 2;
 }
-def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>;
+def : InstRW<[ZnWritePop16r], (instrs POP16rmm)>;
 def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
 def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
 
@@ -582,45 +601,51 @@ def : InstRW<[WriteALULd],
 def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
   let Latency = 3;
 }
-def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>;
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16, ZnWriteMul16>;
+def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
 
 // m16.
 def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
   let Latency = 8;
 }
-def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>;
+def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
 
 // r32.
 def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
   let Latency = 3;
 }
-def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>;
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32, ZnWriteMul32>;
+def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
 
 // m32.
 def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
   let Latency = 8;
 }
-def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>;
+def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
 
 // r64.
 def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
   let Latency = 4;
   let NumMicroOps = 2;
 }
-def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>;
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64, ZnWriteMul64>;
+def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
 
 // m64.
 def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
   let Latency = 9;
   let NumMicroOps = 2;
 }
-def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>;
+def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
 
 // MULX.
 // r32,r32,r32.
@@ -696,31 +721,21 @@ def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
   let Latency = 6;
 }
 
-// BT.
-// m,i.
-def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
-
 // BTR BTS BTC.
-// r,r,i.
-def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
-
 // m,r,i.
 def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
   let Latency = 6;
   let NumMicroOps = 2;
 }
 // m,r,i.
-def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>;
+def : SchedAlias<WriteBitTestSetImmRMW, ZnWriteBTRSCm>;
+def : SchedAlias<WriteBitTestSetRegRMW, ZnWriteBTRSCm>;
 
 // BLSI BLSMSK BLSR.
 // r,r.
-def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>;
+def : SchedAlias<WriteBLS, ZnWriteALULat2>;
 // r,m.
-def : InstRW<[ZnWriteALULat2Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
+def : SchedAlias<WriteBLSLd, ZnWriteALULat2Ld>;
 
 // CLD STD.
 def : InstRW<[WriteALU], (instrs STD, CLD)>;
@@ -750,13 +765,6 @@ def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
 
 //-- Misc instructions --//
-// CMPXCHG.
-def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-}
-def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
-
 // CMPXCHG8B.
 def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
   let NumMicroOps = 18;
@@ -782,10 +790,10 @@ def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
 def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
 
 // RDRAND.
-def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
+def : InstRW<[WriteMicrocoded], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
 
 // XGETBV.
-def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
+def : InstRW<[WriteMicrocoded], (instrs XGETBV)>;
 
 //-- String instructions --//
 // CMPS.
@@ -807,6 +815,8 @@ def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
 def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
 
 // XADD.
+def ZnXADD : SchedWriteRes<[ZnALU]>;
+def : InstRW<[ZnXADD], (instregex "XADD(8|16|32|64)rr")>;
 def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
 
 //=== Floating Point x87 Instructions ===//
@@ -821,16 +831,16 @@ def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
 
 // LD_F.
 // r.
-def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>;
+def : InstRW<[ZnWriteFLDr], (instrs LD_Frr)>;
 
 // m.
 def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
   let NumMicroOps = 2;
 }
-def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>;
+def : InstRW<[ZnWriteLD_F80m], (instrs LD_F80m)>;
 
 // FBLD.
-def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
+def : InstRW<[WriteMicrocoded], (instrs FBLDm)>;
 
 // FST(P).
 // r.
@@ -840,11 +850,11 @@ def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
 def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
   let Latency = 5;
 }
-def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>;
+def : InstRW<[ZnWriteST_FP80m], (instrs ST_FP80m)>;
 
 // FBSTP.
 // m80.
-def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
+def : InstRW<[WriteMicrocoded], (instrs FBSTPm)>;
 
 def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
 
@@ -901,10 +911,10 @@ def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
 def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
 
 // FNSAVE.
-def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
+def : InstRW<[WriteMicrocoded], (instrs FSAVEm)>;
 
 // FRSTOR.
-def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
+def : InstRW<[WriteMicrocoded], (instrs FRSTORm)>;
 
 //-- Arithmetic instructions --//
 
@@ -1401,46 +1411,46 @@ def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
 // SHA1MSG2
 // x,x.
 def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
-def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
+def : InstRW<[ZnWriteSHA1MSG2r], (instrs SHA1MSG2rr)>;
 // x,m.
 def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
   let Latency = 8;
 }
-def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
+def : InstRW<[ZnWriteSHA1MSG2Ld], (instrs SHA1MSG2rm)>;
 
 // SHA1NEXTE
 // x,x.
 def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
-def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
+def : InstRW<[ZnWriteSHA1NEXTEr], (instrs SHA1NEXTErr)>;
 // x,m.
 def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   let Latency = 8;
 }
-def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
+def : InstRW<[ZnWriteSHA1NEXTELd], (instrs SHA1NEXTErm)>;
 
 // SHA1RNDS4
 // x,x.
 def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
   let Latency = 6;
 }
-def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
+def : InstRW<[ZnWriteSHA1RNDS4r], (instrs SHA1RNDS4rri)>;
 // x,m.
 def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   let Latency = 13;
 }
-def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
+def : InstRW<[ZnWriteSHA1RNDS4Ld], (instrs SHA1RNDS4rmi)>;
 
 // SHA256RNDS2
 // x,x.
 def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
   let Latency = 4;
 }
-def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
+def : InstRW<[ZnWriteSHA256RNDS2r], (instrs SHA256RNDS2rr)>;
 // x,m.
 def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
   let Latency = 11;
 }
-def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
+def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
 
 //-- Arithmetic instructions --//
 
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index e131f1a1e4bd..008a9ec2ba3c 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -170,10 +170,11 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
     InFlag = Chain.getValue(1);
   }
 
-  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
+  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
                            Count, InFlag);
   InFlag = Chain.getValue(1);
-  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
                            Dst, InFlag);
   InFlag = Chain.getValue(1);
 
@@ -249,20 +250,21 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
 
     if (Repeats.BytesLeft() > 0 &&
         DAG.getMachineFunction().getFunction().optForMinSize()) {
-      // When agressively optimizing for size, avoid generating the code to
+      // When aggressively optimizing for size, avoid generating the code to
       // handle BytesLeft.
       Repeats.AVT = MVT::i8;
     }
   }
 
+  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
   SDValue InFlag;
-  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
                            DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
   InFlag = Chain.getValue(1);
-  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
                            Dst, InFlag);
   InFlag = Chain.getValue(1);
-  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
+  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI,
                            Src, InFlag);
   InFlag = Chain.getValue(1);
 
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index c7ddf93f8e85..720be8afa62c 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -112,11 +112,10 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
   return true;
 }
 
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  (void)MaskTySize;
-  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+void DecodePSHUFBMask(const Constant *C, unsigned Width,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  assert((Width == 128 || Width == 256 || Width == 512) &&
+         C->getType()->getPrimitiveSizeInBits() >= Width &&
          "Unexpected vector size.");
 
   // The shuffle mask requires a byte vector.
@@ -125,7 +124,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   if (!extractConstantMask(C, 8, UndefElts, RawMask))
     return;
 
-  unsigned NumElts = RawMask.size();
+  unsigned NumElts = Width / 8;
   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
          "Unexpected number of vector elements.");
 
@@ -151,12 +150,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
                         SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  (void)MaskTySize;
-  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+  assert((Width == 128 || Width == 256 || Width == 512) &&
+         C->getType()->getPrimitiveSizeInBits() >= Width &&
          "Unexpected vector size.");
   assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
 
@@ -166,7 +163,7 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
 
-  unsigned NumElts = RawMask.size();
+  unsigned NumElts = Width / ElSize;
   unsigned NumEltsPerLane = 128 / ElSize;
   assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
          "Unexpected number of vector elements.");
@@ -189,11 +186,13 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
 }
 
 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
+                         unsigned Width,
                          SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
   (void)MaskTySize;
-  assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
+  assert((MaskTySize == 128 || MaskTySize == 256) &&
+         Width >= MaskTySize && "Unexpected vector size.");
 
   // The shuffle mask requires elements the same size as the target.
   APInt UndefElts;
@@ -201,7 +200,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
 
-  unsigned NumElts = RawMask.size();
+  unsigned NumElts = Width / ElSize;
   unsigned NumEltsPerLane = 128 / ElSize;
   assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
          "Unexpected number of vector elements.");
@@ -242,9 +241,12 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
   }
 }
 
-void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
-  assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
-         "Unexpected vector size.");
+void DecodeVPPERMMask(const Constant *C, unsigned Width,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  Type *MaskTy = C->getType();
+  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+  (void)MaskTySize;
+  assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");
 
   // The shuffle mask requires a byte vector.
   APInt UndefElts;
@@ -252,7 +254,7 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   if (!extractConstantMask(C, 8, UndefElts, RawMask))
     return;
 
-  unsigned NumElts = RawMask.size();
+  unsigned NumElts = Width / 8;
   assert(NumElts == 16 && "Unexpected number of vector elements.");
 
   for (unsigned i = 0; i != NumElts; ++i) {
@@ -291,12 +293,10 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
                       SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  (void)MaskTySize;
-  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+  assert((Width == 128 || Width == 256 || Width == 512) &&
+         C->getType()->getPrimitiveSizeInBits() >= Width &&
          "Unexpected vector size.");
   assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
          "Unexpected vector element size.");
@@ -307,7 +307,7 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
 
-  unsigned NumElts = RawMask.size();
+  unsigned NumElts = Width / ElSize;
 
   for (unsigned i = 0; i != NumElts; ++i) {
     if (UndefElts[i]) {
@@ -319,12 +319,10 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
   }
 }
 
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
                        SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  (void)MaskTySize;
-  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+  assert((Width == 128 || Width == 256 || Width == 512) &&
+         C->getType()->getPrimitiveSizeInBits() >= Width &&
          "Unexpected vector size.");
   assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
          "Unexpected vector element size.");
@@ -335,7 +333,7 @@ void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
 
-  unsigned NumElts = RawMask.size();
+  unsigned NumElts = Width / ElSize;
 
   for (unsigned i = 0; i != NumElts; ++i) {
     if (UndefElts[i]) {
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index b703cbbd2b29..b08c31935d28 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -26,25 +26,28 @@ class Constant;
 class MVT;
 
 /// Decode a PSHUFB mask from an IR-level vector constant.
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSHUFBMask(const Constant *C, unsigned Width,
+                      SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
                         SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMILP2 variable mask from an IR-level vector constant.
 void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
+                         unsigned Width,
                          SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPPERM variable mask from an IR-level vector constant.
-void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPPERMMask(const Constant *C, unsigned Width,
+                      SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize, unsigned Width,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize, unsigned Width,
                        SmallVectorImpl<int> &ShuffleMask);
 
 } // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 078fe1598f13..a729161a1beb 100644
--- a/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -61,7 +61,7 @@
 
 using namespace llvm;
 
-#define PASS_KEY "x86-speculative-load-hardening"
+#define PASS_KEY "x86-slh"
 #define DEBUG_TYPE PASS_KEY
 
 STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
@@ -75,6 +75,11 @@ STATISTIC(NumCallsOrJumpsHardened,
 STATISTIC(NumInstsInserted, "Number of instructions inserted");
 STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
 
+static cl::opt<bool> EnableSpeculativeLoadHardening(
+    "x86-speculative-load-hardening",
+    cl::desc("Force enable speculative load hardening"), cl::init(false),
+    cl::Hidden);
+
 static cl::opt<bool> HardenEdgesWithLFENCE(
     PASS_KEY "-lfence",
     cl::desc(
@@ -114,12 +119,6 @@ static cl::opt<bool> HardenIndirectCallsAndJumps(
              "mitigate Spectre v1.2 style attacks."),
     cl::init(true), cl::Hidden);
 
-namespace llvm {
-
-void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
-
-} // end namespace llvm
-
 namespace {
 
 class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
@@ -179,6 +178,9 @@ private:
 
   void unfoldCallAndJumpLoads(MachineFunction &MF);
 
+  SmallVector<MachineInstr *, 16>
+  tracePredStateThroughIndirectBranches(MachineFunction &MF);
+
   void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
 
   unsigned saveEFLAGS(MachineBasicBlock &MBB,
@@ -401,6 +403,12 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
                     << " **********\n");
 
+  // Only run if this pass is forced enabled or we detect the relevant function
+  // attribute requesting SLH.
+  if (!EnableSpeculativeLoadHardening &&
+      !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
+    return false;
+
   Subtarget = &MF.getSubtarget<X86Subtarget>();
   MRI = &MF.getRegInfo();
   TII = Subtarget->getInstrInfo();
@@ -522,11 +530,16 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
     }
   }
 
-  // If we are going to harden calls and jumps we need to unfold their memory
-  // operands.
-  if (HardenIndirectCallsAndJumps)
+  if (HardenIndirectCallsAndJumps) {
+    // If we are going to harden calls and jumps we need to unfold their memory
+    // operands.
     unfoldCallAndJumpLoads(MF);
 
+    // Then we trace predicate state through the indirect branches.
+    auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
+    CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
+  }
+
   // Now that we have the predicate state available at the start of each block
   // in the CFG, trace it through each block, hardening vulnerable instructions
   // as we go.
@@ -809,7 +822,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
            "split above!");
 
     // Sort and unique the codes to minimize them.
-    llvm::sort(UncondCodeSeq.begin(), UncondCodeSeq.end());
+    llvm::sort(UncondCodeSeq);
     UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
                         UncondCodeSeq.end());
 
@@ -925,6 +938,265 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
     }
 }
 
+/// Trace the predicate state through indirect branches, instrumenting them to
+/// poison the state if a target is reached that does not match the expected
+/// target.
+///
+/// This is designed to mitigate Spectre variant 1 attacks where an indirect
+/// branch is trained to predict a particular target and then mispredicts that
+/// target in a way that can leak data. Despite using an indirect branch, this
+/// is really a variant 1 style attack: it does not steer execution to an
+/// arbitrary or attacker controlled address, and it does not require any
+/// special code executing next to the victim. This attack can also be mitigated
+/// through retpolines, but those require either replacing indirect branches
+/// with conditional direct branches or lowering them through a device that
+/// blocks speculation. This mitigation can replace these retpoline-style
+/// mitigations for jump tables and other indirect branches within a function
+/// when variant 2 isn't a risk while allowing limited speculation. Indirect
+/// calls, however, cannot be mitigated through this technique without changing
+/// the ABI in a fundamental way.
+SmallVector<MachineInstr *, 16>
+X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
+    MachineFunction &MF) {
+  // We use the SSAUpdater to insert PHI nodes for the target addresses of
+  // indirect branches. We don't actually need the full power of the SSA updater
+  // in this particular case as we always have immediately available values, but
+  // this avoids us having to re-implement the PHI construction logic.
+  MachineSSAUpdater TargetAddrSSA(MF);
+  TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
+
+  // Track which blocks were terminated with an indirect branch.
+  SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
+
+  // We need to know what blocks end up reached via indirect branches. We
+  // expect this to be a subset of those whose address is taken and so track it
+  // directly via the CFG.
+  SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
+
+  // Walk all the blocks which end in an indirect branch and make the
+  // target address available.
+  for (MachineBasicBlock &MBB : MF) {
+    // Find the last terminator.
+    auto MII = MBB.instr_rbegin();
+    while (MII != MBB.instr_rend() && MII->isDebugInstr())
+      ++MII;
+    if (MII == MBB.instr_rend())
+      continue;
+    MachineInstr &TI = *MII;
+    if (!TI.isTerminator() || !TI.isBranch())
+      // No terminator or non-branch terminator.
+      continue;
+
+    unsigned TargetReg;
+
+    switch (TI.getOpcode()) {
+    default:
+      // Direct branch or conditional branch (leading to fallthrough).
+      continue;
+
+    case X86::FARJMP16m:
+    case X86::FARJMP32m:
+    case X86::FARJMP64:
+      // We cannot mitigate far jumps or calls, but we also don't expect them
+      // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
+      continue;
+
+    case X86::JMP16m:
+    case X86::JMP16m_NT:
+    case X86::JMP32m:
+    case X86::JMP32m_NT:
+    case X86::JMP64m:
+    case X86::JMP64m_NT:
+      // Mostly as documentation.
+      report_fatal_error("Memory operand jumps should have been unfolded!");
+
+    case X86::JMP16r:
+      report_fatal_error(
+          "Support for 16-bit indirect branches is not implemented.");
+    case X86::JMP32r:
+      report_fatal_error(
+          "Support for 32-bit indirect branches is not implemented.");
+
+    case X86::JMP64r:
+      TargetReg = TI.getOperand(0).getReg();
+    }
+
+    // We have definitely found an indirect  branch. Verify that there are no
+    // preceding conditional branches as we don't yet support that.
+    if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
+          return !OtherTI.isDebugInstr() && &OtherTI != &TI;
+        })) {
+      LLVM_DEBUG({
+        dbgs() << "ERROR: Found other terminators in a block with an indirect "
+                  "branch! This is not yet supported! Terminator sequence:\n";
+        for (MachineInstr &MI : MBB.terminators()) {
+          MI.dump();
+          dbgs() << '\n';
+        }
+      });
+      report_fatal_error("Unimplemented terminator sequence!");
+    }
+
+    // Make the target register an available value for this block.
+    TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
+    IndirectTerminatedMBBs.insert(&MBB);
+
+    // Add all the successors to our target candidates.
+    for (MachineBasicBlock *Succ : MBB.successors())
+      IndirectTargetMBBs.insert(Succ);
+  }
+
+  // Keep track of the cmov instructions we insert so we can return them.
+  SmallVector<MachineInstr *, 16> CMovs;
+
+  // If we didn't find any indirect branches with targets, nothing to do here.
+  if (IndirectTargetMBBs.empty())
+    return CMovs;
+
+  // We found indirect branches and targets that need to be instrumented to
+  // harden loads within them. Walk the blocks of the function (to get a stable
+  // ordering) and instrument each target of an indirect branch.
+  for (MachineBasicBlock &MBB : MF) {
+    // Skip the blocks that aren't candidate targets.
+    if (!IndirectTargetMBBs.count(&MBB))
+      continue;
+
+    // We don't expect EH pads to ever be reached via an indirect branch. If
+    // this is desired for some reason, we could simply skip them here rather
+    // than asserting.
+    assert(!MBB.isEHPad() &&
+           "Unexpected EH pad as target of an indirect branch!");
+
+    // We should never end up threading EFLAGS into a block to harden
+    // conditional jumps as there would be an additional successor via the
+    // indirect branch. As a consequence, all such edges would be split before
+    // reaching here, and the inserted block will handle the EFLAGS-based
+    // hardening.
+    assert(!MBB.isLiveIn(X86::EFLAGS) &&
+           "Cannot check within a block that already has live-in EFLAGS!");
+
+    // We can't handle having non-indirect edges into this block unless this is
+    // the only successor and we can synthesize the necessary target address.
+    for (MachineBasicBlock *Pred : MBB.predecessors()) {
+      // If we've already handled this by extracting the target directly,
+      // nothing to do.
+      if (IndirectTerminatedMBBs.count(Pred))
+        continue;
+
+      // Otherwise, we have to be the only successor. We generally expect this
+      // to be true as conditional branches should have had a critical edge
+      // split already. We don't however need to worry about EH pad successors
+      // as they'll happily ignore the target and their hardening strategy is
+      // resilient to all ways in which they could be reached speculatively.
+      if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
+            return Succ->isEHPad() || Succ == &MBB;
+          })) {
+        LLVM_DEBUG({
+          dbgs() << "ERROR: Found conditional entry to target of indirect "
+                    "branch!\n";
+          Pred->dump();
+          MBB.dump();
+        });
+        report_fatal_error("Cannot harden a conditional entry to a target of "
+                           "an indirect branch!");
+      }
+
+      // Now we need to compute the address of this block and install it as a
+      // synthetic target in the predecessor. We do this at the bottom of the
+      // predecessor.
+      auto InsertPt = Pred->getFirstTerminator();
+      unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+      if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+          !Subtarget->isPositionIndependent()) {
+        // Directly materialize it into an immediate.
+        auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
+                             TII->get(X86::MOV64ri32), TargetReg)
+                         .addMBB(&MBB);
+        ++NumInstsInserted;
+        (void)AddrI;
+        LLVM_DEBUG(dbgs() << "  Inserting mov: "; AddrI->dump();
+                   dbgs() << "\n");
+      } else {
+        auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
+                             TargetReg)
+                         .addReg(/*Base*/ X86::RIP)
+                         .addImm(/*Scale*/ 1)
+                         .addReg(/*Index*/ 0)
+                         .addMBB(&MBB)
+                         .addReg(/*Segment*/ 0);
+        ++NumInstsInserted;
+        (void)AddrI;
+        LLVM_DEBUG(dbgs() << "  Inserting lea: "; AddrI->dump();
+                   dbgs() << "\n");
+      }
+      // And make this available.
+      TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
+    }
+
+    // Materialize the needed SSA value of the target. Note that we need the
+    // middle of the block as this block might at the bottom have an indirect
+    // branch back to itself. We can do this here because at this point, every
+    // predecessor of this block has an available value. This is basically just
+    // automating the construction of a PHI node for this target.
+    unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
+
+    // Insert a comparison of the incoming target register with this block's
+    // address. This also requires us to mark the block as having its address
+    // taken explicitly.
+    MBB.setHasAddressTaken();
+    auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+    if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+        !Subtarget->isPositionIndependent()) {
+      // Check directly against a relocated immediate when we can.
+      auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
+                        .addReg(TargetReg, RegState::Kill)
+                        .addMBB(&MBB);
+      ++NumInstsInserted;
+      (void)CheckI;
+      LLVM_DEBUG(dbgs() << "  Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+    } else {
+      // Otherwise compute the address into a register first.
+      unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
+      auto AddrI =
+          BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
+              .addReg(/*Base*/ X86::RIP)
+              .addImm(/*Scale*/ 1)
+              .addReg(/*Index*/ 0)
+              .addMBB(&MBB)
+              .addReg(/*Segment*/ 0);
+      ++NumInstsInserted;
+      (void)AddrI;
+      LLVM_DEBUG(dbgs() << "  Inserting lea: "; AddrI->dump(); dbgs() << "\n");
+      auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
+                        .addReg(TargetReg, RegState::Kill)
+                        .addReg(AddrReg, RegState::Kill);
+      ++NumInstsInserted;
+      (void)CheckI;
+      LLVM_DEBUG(dbgs() << "  Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
+    }
+
+    // Now cmov over the predicate if the comparison wasn't equal.
+    int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+    auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+    unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+    auto CMovI =
+        BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
+            .addReg(PS->InitialReg)
+            .addReg(PS->PoisonReg);
+    CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+    ++NumInstsInserted;
+    LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+    CMovs.push_back(&*CMovI);
+
+    // And put the new value into the available values for SSA form of our
+    // predicate state.
+    PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
+  }
+
+  // Return all the newly inserted cmov instructions of the predicate state.
+  return CMovs;
+}
+
 /// Returns true if the instruction has no behavior (specified or otherwise)
 /// that is based on the value of any of its register operands
 ///
@@ -1498,13 +1770,6 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
     // pass specifically so that we have the complete set of instructions for
     // which we will do post-load hardening and can defer it in certain
     // circumstances.
-    //
-    // FIXME: This could probably be made even more effective by doing it
-    // across the entire function. Rather than just walking the flat list
-    // backwards here, we could walk the function in PO and each block bottom
-    // up, allowing us to in some cases sink hardening across block blocks. As
-    // long as the in-block predicate state is used at the eventual hardening
-    // site, this remains safe.
     for (MachineInstr &MI : MBB) {
       if (HardenLoads) {
         // We cannot both require hardening the def of a load and its address.
@@ -1586,8 +1851,8 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
       }
 
       // Otherwise we have a call. We need to handle transferring the predicate
-      // state into a call and recovering it after the call returns unless this
-      // is a tail call.
+      // state into a call and recovering it after the call returns (unless this
+      // is a tail call).
       assert(MI.isCall() && "Should only reach here for calls!");
       tracePredStateThroughCall(MI);
     }
@@ -2109,21 +2374,10 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
   DebugLoc Loc = MI.getDebugLoc();
   auto InsertPt = MI.getIterator();
 
-  if (FenceCallAndRet) {
-    // Simply forcibly block speculation of loads out of the function by using
-    // an LFENCE. This is potentially a heavy-weight mitigation strategy, but
-    // should be secure, is simple from an ABI perspective, and the cost can be
-    // minimized through inlining.
-    //
-    // FIXME: We should investigate ways to establish a strong data-dependency
-    // on the return. However, poisoning the stack pointer is unlikely to work
-    // because the return is *predicted* rather than relying on the load of the
-    // return address to actually resolve.
-    BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE));
-    ++NumInstsInserted;
-    ++NumLFENCEsInserted;
+  if (FenceCallAndRet)
+    // No need to fence here as we'll fence at the return site itself. That
+    // handles more cases than we can handle here.
     return;
-  }
 
   // Take our predicate state, shift it to the high 17 bits (so that we keep
   // pointers canonical) and merge it into RSP. This will allow the caller to
@@ -2141,31 +2395,168 @@ void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
 ///
 /// For tail calls, this is all we need to do.
 ///
-/// For calls where we might return to control flow, we further need to extract
-/// the predicate state built up within that function from the high bits of the
-/// stack pointer, and make that the newly available predicate state.
+/// For calls where we might return and resume the control flow, we need to
+/// extract the predicate state from the high bits of the stack pointer after
+/// control returns from the called function.
+///
+/// We also need to verify that we intended to return to this location in the
+/// code. An attacker might arrange for the processor to mispredict the return
+/// to this valid but incorrect return address in the program rather than the
+/// correct one. See the paper on this attack, called "ret2spec" by the
+/// researchers, here:
+/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
+///
+/// The way we verify that we returned to the correct location is by preserving
+/// the expected return address across the call. One technique involves taking
+/// advantage of the red-zone to load the return address from `8(%rsp)` where it
+/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
+/// directly save the address into a register that will be preserved across the
+/// call. We compare this intended return address against the address
+/// immediately following the call (the observed return address). If these
+/// mismatch, we have detected misspeculation and can poison our predicate
+/// state.
 void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
     MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
   auto InsertPt = MI.getIterator();
   DebugLoc Loc = MI.getDebugLoc();
 
+  if (FenceCallAndRet) {
+    if (MI.isReturn())
+      // Tail call, we don't return to this function.
+      // FIXME: We should also handle noreturn calls.
+      return;
+
+    // We don't need to fence before the call because the function should fence
+    // in its entry. However, we do need to fence after the call returns.
+    // Fencing before the return doesn't correctly handle cases where the return
+    // itself is mispredicted.
+    BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
+    ++NumInstsInserted;
+    ++NumLFENCEsInserted;
+    return;
+  }
+
   // First, we transfer the predicate state into the called function by merging
   // it into the stack pointer. This will kill the current def of the state.
   unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
   mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
 
   // If this call is also a return, it is a tail call and we don't need anything
-  // else to handle it so just continue.
-  // FIXME: We should also handle noreturn calls.
-  if (MI.isReturn())
+  // else to handle it so just return. Also, if there are no further
+  // instructions and no successors, this call does not return so we can also
+  // bail.
+  if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
     return;
 
-  // We need to step past the call and recover the predicate state from SP after
-  // the return, and make this new state available.
+  // Create a symbol to track the return address and attach it to the call
+  // machine instruction. We will lower extra symbols attached to call
+  // instructions as label immediately following the call.
+  MCSymbol *RetSymbol =
+      MF.getContext().createTempSymbol("slh_ret_addr",
+                                       /*AlwaysAddSuffix*/ true);
+  MI.setPostInstrSymbol(MF, RetSymbol);
+
+  const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
+  unsigned ExpectedRetAddrReg = 0;
+
+  // If we have no red zones or if the function returns twice (possibly without
+  // using the `ret` instruction) like setjmp, we need to save the expected
+  // return address prior to the call.
+  if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) ||
+      MF.exposesReturnsTwice()) {
+    // If we don't have red zones, we need to compute the expected return
+    // address prior to the call and store it in a register that lives across
+    // the call.
+    //
+    // In some ways, this is doubly satisfying as a mitigation because it will
+    // also successfully detect stack smashing bugs in some cases (typically,
+    // when a callee-saved register is used and the callee doesn't push it onto
+    // the stack). But that isn't our primary goal, so we only use it as
+    // a fallback.
+    //
+    // FIXME: It isn't clear that this is reliable in the face of
+    // rematerialization in the register allocator. We somehow need to force
+    // that to not occur for this particular instruction, and instead to spill
+    // or otherwise preserve the value computed *prior* to the call.
+    //
+    // FIXME: It is even less clear why MachineCSE can't just fold this when we
+    // end up having to use identical instructions both before and after the
+    // call to feed the comparison.
+    ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+    if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+        !Subtarget->isPositionIndependent()) {
+      BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
+          .addSym(RetSymbol);
+    } else {
+      BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
+          .addReg(/*Base*/ X86::RIP)
+          .addImm(/*Scale*/ 1)
+          .addReg(/*Index*/ 0)
+          .addSym(RetSymbol)
+          .addReg(/*Segment*/ 0);
+    }
+  }
+
+  // Step past the call to handle when it returns.
   ++InsertPt;
+
+  // If we didn't pre-compute the expected return address into a register, then
+  // red zones are enabled and the return address is still available on the
+  // stack immediately after the call. As the very first instruction, we load it
+  // into a register.
+  if (!ExpectedRetAddrReg) {
+    ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
+    BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
+        .addReg(/*Base*/ X86::RSP)
+        .addImm(/*Scale*/ 1)
+        .addReg(/*Index*/ 0)
+        .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
+                                     // the return address is 8-bytes past it.
+        .addReg(/*Segment*/ 0);
+  }
+
+  // Now we extract the callee's predicate state from the stack pointer.
   unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
-  PS->SSA.AddAvailableValue(&MBB, NewStateReg);
+
+  // Test the expected return address against our actual address. If we can
+  // form this basic block's address as an immediate, this is easy. Otherwise
+  // we compute it.
+  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
+      !Subtarget->isPositionIndependent()) {
+    // FIXME: Could we fold this with the load? It would require careful EFLAGS
+    // management.
+    BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
+        .addReg(ExpectedRetAddrReg, RegState::Kill)
+        .addSym(RetSymbol);
+  } else {
+    unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
+    BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
+        .addReg(/*Base*/ X86::RIP)
+        .addImm(/*Scale*/ 1)
+        .addReg(/*Index*/ 0)
+        .addSym(RetSymbol)
+        .addReg(/*Segment*/ 0);
+    BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
+        .addReg(ExpectedRetAddrReg, RegState::Kill)
+        .addReg(ActualRetAddrReg, RegState::Kill);
+  }
+
+  // Now conditionally update the predicate state we just extracted if we ended
+  // up at a different return address than expected.
+  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
+  auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+
+  unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
+  auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
+                   .addReg(NewStateReg, RegState::Kill)
+                   .addReg(PS->PoisonReg);
+  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
+  ++NumInstsInserted;
+  LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
+
+  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
 }
 
 /// An attacker may speculatively store over a value that is then speculatively
@@ -2237,9 +2628,9 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
   ++NumCallsOrJumpsHardened;
 }
 
-INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
                       "X86 speculative load hardener", false, false)
-INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
+INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
                     "X86 speculative load hardener", false, false)
 
 FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 7e84323dda4c..0c9ce8802e1b 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -77,6 +77,8 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
     if (isTargetELF()) {
       switch (TM.getCodeModel()) {
       // 64-bit small code model is simple: All rip-relative.
+      case CodeModel::Tiny:
+        llvm_unreachable("Tiny codesize model not supported on X86");
       case CodeModel::Small:
       case CodeModel::Kernel:
         return X86II::MO_NO_FLAG;
@@ -139,8 +141,11 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
   if (TM.shouldAssumeDSOLocal(M, GV))
     return classifyLocalReference(GV);
 
-  if (isTargetCOFF())
-    return X86II::MO_DLLIMPORT;
+  if (isTargetCOFF()) {
+    if (GV->hasDLLImportStorageClass())
+      return X86II::MO_DLLIMPORT;
+    return X86II::MO_COFFSTUB;
+  }
 
   if (is64Bit()) {
     // ELF supports a large, truly PIC code model with non-PC relative GOT
@@ -220,14 +225,22 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (CPUName.empty())
     CPUName = "generic";
 
-  // Make sure 64-bit features are available in 64-bit mode. (But make sure
-  // SSE2 can be turned off explicitly.)
   std::string FullFS = FS;
   if (In64BitMode) {
+    // SSE2 should default to enabled in 64-bit mode, but can be turned off
+    // explicitly.
     if (!FullFS.empty())
-      FullFS = "+64bit,+sse2," + FullFS;
+      FullFS = "+sse2," + FullFS;
     else
-      FullFS = "+64bit,+sse2";
+      FullFS = "+sse2";
+
+    // If no CPU was specified, enable 64bit feature to satisy later check.
+    if (CPUName == "generic") {
+      if (!FullFS.empty())
+        FullFS = "+64bit," + FullFS;
+      else
+        FullFS = "+64bit";
+    }
   }
 
   // LAHF/SAHF are always supported in non-64-bit mode.
@@ -262,8 +275,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
                     << ", 3DNowLevel " << X863DNowLevel << ", 64bit "
                     << HasX86_64 << "\n");
-  assert((!In64BitMode || HasX86_64) &&
-         "64-bit code requested on a subtarget that doesn't support it!");
+  if (In64BitMode && !HasX86_64)
+    report_fatal_error("64-bit code requested on a subtarget that doesn't "
+                       "support it!");
 
   // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 85e8256a6e94..b1103f823e7f 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -52,21 +52,15 @@ enum Style {
 
 class X86Subtarget final : public X86GenSubtargetInfo {
 public:
+  // NOTE: Do not add anything new to this list. Coarse, CPU name based flags
+  // are not a good idea. We should be migrating away from these.
   enum X86ProcFamilyEnum {
     Others,
     IntelAtom,
     IntelSLM,
     IntelGLM,
     IntelGLP,
-    IntelTRM,
-    IntelHaswell,
-    IntelBroadwell,
-    IntelSkylake,
-    IntelKNL,
-    IntelSKX,
-    IntelCannonlake,
-    IntelIcelakeClient,
-    IntelIcelakeServer,
+    IntelTRM
   };
 
 protected:
@@ -229,6 +223,9 @@ protected:
   //  PMULUDQ.
   bool IsPMULLDSlow = false;
 
+  /// True if the PMADDWD instruction is slow compared to PMULLD.
+  bool IsPMADDWDSlow = false;
+
   /// True if unaligned memory accesses of 16-bytes are slow.
   bool IsUAMem16Slow = false;
 
@@ -385,9 +382,23 @@ protected:
   /// Processor supports PCONFIG instruction
   bool HasPCONFIG = false;
 
+  /// Processor has a single uop BEXTR implementation.
+  bool HasFastBEXTR = false;
+
+  /// Try harder to combine to horizontal vector ops if they are fast.
+  bool HasFastHorizontalOps = false;
+
   /// Use a retpoline thunk rather than indirect calls to block speculative
   /// execution.
-  bool UseRetpoline = false;
+  bool UseRetpolineIndirectCalls = false;
+
+  /// Use a retpoline thunk or remove any indirect branch to block speculative
+  /// execution.
+  bool UseRetpolineIndirectBranches = false;
+
+  /// Deprecated flag, query `UseRetpolineIndirectCalls` and
+  /// `UseRetpolineIndirectBranches` instead.
+  bool DeprecatedUseRetpoline = false;
 
   /// When using a retpoline thunk, call an externally provided thunk rather
   /// than emitting one inside the compiler.
@@ -408,6 +419,9 @@ protected:
   /// Indicates target prefers 256 bit instructions.
   bool Prefer256Bit = false;
 
+  /// Threeway branch is profitable in this subtarget.
+  bool ThreewayBranchProfitable = false;
+
   /// What processor and OS we're targeting.
   Triple TargetTriple;
 
@@ -534,7 +548,9 @@ public:
 
   bool hasX87() const { return HasX87; }
   bool hasNOPL() const { return HasNOPL; }
-  bool hasCMov() const { return HasCMov; }
+  // SSE codegen depends on cmovs, and all SSE1+ processors support them.
+  // All 64-bit processors support cmov.
+  bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
   bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -599,6 +615,7 @@ public:
   bool hasPTWRITE() const { return HasPTWRITE; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
   bool isPMULLDSlow() const { return IsPMULLDSlow; }
+  bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
   bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
   int getGatherOverhead() const { return GatherOverhead; }
@@ -619,6 +636,8 @@ public:
   bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
   bool hasFastLZCNT() const { return HasFastLZCNT; }
   bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+  bool hasFastBEXTR() const { return HasFastBEXTR; }
+  bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
   bool hasMacroFusion() const { return HasMacroFusion; }
   bool hasERMSB() const { return HasERMSB; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
@@ -648,8 +667,12 @@ public:
   bool hasWAITPKG() const { return HasWAITPKG; }
   bool hasPCONFIG() const { return HasPCONFIG; }
   bool hasSGX() const { return HasSGX; }
+  bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
   bool hasINVPCID() const { return HasINVPCID; }
-  bool useRetpoline() const { return UseRetpoline; }
+  bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
+  bool useRetpolineIndirectBranches() const {
+    return UseRetpolineIndirectBranches;
+  }
   bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
 
   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
@@ -804,7 +827,9 @@ public:
 
   /// If we are using retpolines, we need to expand indirectbr to avoid it
   /// lowering to an actual indirect jump.
-  bool enableIndirectBrExpand() const override { return useRetpoline(); }
+  bool enableIndirectBrExpand() const override {
+    return useRetpolineIndirectBranches();
+  }
 
   /// Enable the MachineScheduler pass for all X86 subtargets.
   bool enableMachineScheduler() const override { return true; }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 374bf3daaf9b..afcb49dc2263 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -54,23 +54,10 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
                                cl::desc("Enable the machine combiner pass"),
                                cl::init(true), cl::Hidden);
 
-static cl::opt<bool> EnableSpeculativeLoadHardening(
-    "x86-speculative-load-hardening",
-    cl::desc("Enable speculative load hardening"), cl::init(false), cl::Hidden);
-
-namespace llvm {
-
-void initializeWinEHStatePassPass(PassRegistry &);
-void initializeFixupLEAPassPass(PassRegistry &);
-void initializeShadowCallStackPass(PassRegistry &);
-void initializeX86CallFrameOptimizationPass(PassRegistry &);
-void initializeX86CmovConverterPassPass(PassRegistry &);
-void initializeX86ExecutionDomainFixPass(PassRegistry &);
-void initializeX86DomainReassignmentPass(PassRegistry &);
-void initializeX86AvoidSFBPassPass(PassRegistry &);
-void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-
-} // end namespace llvm
+static cl::opt<bool> EnableCondBrFoldingPass("x86-condbr-folding",
+                               cl::desc("Enable the conditional branch "
+                                        "folding pass"),
+                               cl::init(false), cl::Hidden);
 
 extern "C" void LLVMInitializeX86Target() {
   // Register the target.
@@ -89,7 +76,9 @@ extern "C" void LLVMInitializeX86Target() {
   initializeX86ExecutionDomainFixPass(PR);
   initializeX86DomainReassignmentPass(PR);
   initializeX86AvoidSFBPassPass(PR);
+  initializeX86SpeculativeLoadHardeningPassPass(PR);
   initializeX86FlagsCopyLoweringPassPass(PR);
+  initializeX86CondBrFoldingPassPass(PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -201,10 +190,13 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
-                                              bool JIT, bool Is64Bit) {
-  if (CM)
+static CodeModel::Model getEffectiveX86CodeModel(Optional<CodeModel::Model> CM,
+                                                 bool JIT, bool Is64Bit) {
+  if (CM) {
+    if (*CM == CodeModel::Tiny)
+      report_fatal_error("Target does not support the tiny CodeModel");
     return *CM;
+  }
   if (JIT)
     return Is64Bit ? CodeModel::Large : CodeModel::Small;
   return CodeModel::Small;
@@ -221,7 +213,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(
           T, computeDataLayout(TT), TT, CPU, FS, Options,
           getEffectiveRelocModel(TT, JIT, RM),
-          getEffectiveCodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL),
+          getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
+          OL),
       TLOF(createTLOF(getTargetTriple())) {
   // Windows stack unwinder gets confused when execution flow "falls through"
   // after a call to 'noreturn' function.
@@ -292,13 +285,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
     }
   }
 
-  // Extract required-vector-width attribute.
+  // Extract min-legal-vector-width attribute.
   unsigned RequiredVectorWidth = UINT32_MAX;
-  if (F.hasFnAttribute("required-vector-width")) {
-    StringRef Val = F.getFnAttribute("required-vector-width").getValueAsString();
+  if (F.hasFnAttribute("min-legal-vector-width")) {
+    StringRef Val =
+        F.getFnAttribute("min-legal-vector-width").getValueAsString();
     unsigned Width;
     if (!Val.getAsInteger(0, Width)) {
-      Key += ",required-vector-width=";
+      Key += ",min-legal-vector-width=";
       Key += Val;
       RequiredVectorWidth = Width;
     }
@@ -449,6 +443,8 @@ bool X86PassConfig::addGlobalInstructionSelect() {
 }
 
 bool X86PassConfig::addILPOpts() {
+  if (EnableCondBrFoldingPass)
+    addPass(createX86CondBrFolding());
   addPass(&EarlyIfConverterID);
   if (EnableMachineCombinerPass)
     addPass(&MachineCombinerID);
@@ -473,9 +469,7 @@ void X86PassConfig::addPreRegAlloc() {
     addPass(createX86AvoidStoreForwardingBlocks());
   }
 
-  if (EnableSpeculativeLoadHardening)
-    addPass(createX86SpeculativeLoadHardeningPass());
-
+  addPass(createX86SpeculativeLoadHardeningPass());
   addPass(createX86FlagsCopyLoweringPass());
   addPass(createX86WinAllocaExpander());
 }
@@ -508,6 +502,8 @@ void X86PassConfig::addPreEmitPass() {
     addPass(createX86FixupLEAs());
     addPass(createX86EvexToVexInsts());
   }
+  addPass(createX86DiscriminateMemOpsPass());
+  addPass(createX86InsertPrefetchPass());
 }
 
 void X86PassConfig::addPreEmitPass2() {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index 5b21cd82b5b1..f5b45da0c3dc 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -53,10 +53,6 @@ public:
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
-
-  bool isMachineVerifierClean() const override {
-    return false;
-  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 865462622627..36929a4f5439 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -290,11 +290,6 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SHL,  MVT::v64i8,   2 }, // psllw + pand.
     { ISD::SRL,  MVT::v64i8,   2 }, // psrlw + pand.
     { ISD::SRA,  MVT::v64i8,   4 }, // psrlw, pand, pxor, psubb.
-
-    { ISD::SDIV, MVT::v32i16,  6 }, // vpmulhw sequence
-    { ISD::SREM, MVT::v32i16,  8 }, // vpmulhw+mul+sub sequence
-    { ISD::UDIV, MVT::v32i16,  6 }, // vpmulhuw sequence
-    { ISD::UREM, MVT::v32i16,  8 }, // vpmulhuw+mul+sub sequence
   };
 
   if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -308,11 +303,6 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v2i64,   1 },
     { ISD::SRA,  MVT::v4i64,   1 },
     { ISD::SRA,  MVT::v8i64,   1 },
-
-    { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
-    { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
-    { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
-    { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
   };
 
   if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -328,15 +318,6 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v32i8,   4 }, // psrlw, pand, pxor, psubb.
 
     { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.
-
-    { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
-    { ISD::SREM, MVT::v16i16,  8 }, // vpmulhw+mul+sub sequence
-    { ISD::UDIV, MVT::v16i16,  6 }, // vpmulhuw sequence
-    { ISD::UREM, MVT::v16i16,  8 }, // vpmulhuw+mul+sub sequence
-    { ISD::SDIV, MVT::v8i32,  15 }, // vpmuldq sequence
-    { ISD::SREM, MVT::v8i32,  19 }, // vpmuldq+mul+sub sequence
-    { ISD::UDIV, MVT::v8i32,  15 }, // vpmuludq sequence
-    { ISD::UREM, MVT::v8i32,  19 }, // vpmuludq+mul+sub sequence
   };
 
   if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -354,7 +335,81 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SHL,  MVT::v32i8,   4+2 }, // 2*(psllw + pand) + split.
     { ISD::SRL,  MVT::v32i8,   4+2 }, // 2*(psrlw + pand) + split.
     { ISD::SRA,  MVT::v32i8,   8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
+  };
+
+  // XOP has faster vXi8 shifts.
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasSSE2() && !ST->hasXOP()) {
+    if (const auto *Entry =
+            CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX512BWConstCostTable[] = {
+    { ISD::SDIV, MVT::v64i8,  14 }, // 2*ext+2*pmulhw sequence
+    { ISD::SREM, MVT::v64i8,  16 }, // 2*ext+2*pmulhw+mul+sub sequence
+    { ISD::UDIV, MVT::v64i8,  14 }, // 2*ext+2*pmulhw sequence
+    { ISD::UREM, MVT::v64i8,  16 }, // 2*ext+2*pmulhw+mul+sub sequence
+    { ISD::SDIV, MVT::v32i16,  6 }, // vpmulhw sequence
+    { ISD::SREM, MVT::v32i16,  8 }, // vpmulhw+mul+sub sequence
+    { ISD::UDIV, MVT::v32i16,  6 }, // vpmulhuw sequence
+    { ISD::UREM, MVT::v32i16,  8 }, // vpmulhuw+mul+sub sequence
+  };
+
+  if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+      ST->hasBWI()) {
+    if (const auto *Entry =
+            CostTableLookup(AVX512BWConstCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX512ConstCostTable[] = {
+    { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
+    { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
+    { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
+    { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
+  };
+
+  if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+      ST->hasAVX512()) {
+    if (const auto *Entry =
+            CostTableLookup(AVX512ConstCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
 
+  static const CostTblEntry AVX2ConstCostTable[] = {
+    { ISD::SDIV, MVT::v32i8,  14 }, // 2*ext+2*pmulhw sequence
+    { ISD::SREM, MVT::v32i8,  16 }, // 2*ext+2*pmulhw+mul+sub sequence
+    { ISD::UDIV, MVT::v32i8,  14 }, // 2*ext+2*pmulhw sequence
+    { ISD::UREM, MVT::v32i8,  16 }, // 2*ext+2*pmulhw+mul+sub sequence
+    { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
+    { ISD::SREM, MVT::v16i16,  8 }, // vpmulhw+mul+sub sequence
+    { ISD::UDIV, MVT::v16i16,  6 }, // vpmulhuw sequence
+    { ISD::UREM, MVT::v16i16,  8 }, // vpmulhuw+mul+sub sequence
+    { ISD::SDIV, MVT::v8i32,  15 }, // vpmuldq sequence
+    { ISD::SREM, MVT::v8i32,  19 }, // vpmuldq+mul+sub sequence
+    { ISD::UDIV, MVT::v8i32,  15 }, // vpmuludq sequence
+    { ISD::UREM, MVT::v8i32,  19 }, // vpmuludq+mul+sub sequence
+  };
+
+  if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+      ST->hasAVX2()) {
+    if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry SSE2ConstCostTable[] = {
+    { ISD::SDIV, MVT::v32i8,  28+2 }, // 4*ext+4*pmulhw sequence + split.
+    { ISD::SREM, MVT::v32i8,  32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
+    { ISD::SDIV, MVT::v16i8,    14 }, // 2*ext+2*pmulhw sequence
+    { ISD::SREM, MVT::v16i8,    16 }, // 2*ext+2*pmulhw+mul+sub sequence
+    { ISD::UDIV, MVT::v32i8,  28+2 }, // 4*ext+4*pmulhw sequence + split.
+    { ISD::UREM, MVT::v32i8,  32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
+    { ISD::UDIV, MVT::v16i8,    14 }, // 2*ext+2*pmulhw sequence
+    { ISD::UREM, MVT::v16i8,    16 }, // 2*ext+2*pmulhw+mul+sub sequence
     { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split.
     { ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split.
     { ISD::SDIV, MVT::v8i16,     6 }, // pmulhw sequence
@@ -373,7 +428,8 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::UREM, MVT::v4i32,    20 }, // pmuludq+mul+sub sequence
   };
 
-  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+  if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
       ST->hasSSE2()) {
     // pmuldq sequence.
     if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
@@ -385,12 +441,8 @@ int X86TTIImpl::getArithmeticInstrCost(
     if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41())
       return LT.first * 20;
 
-    // XOP has faster vXi8 shifts.
-    if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) ||
-        !ST->hasXOP())
-      if (const auto *Entry =
-              CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
-        return LT.first * Entry->Cost;
+    if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
   }
 
   static const CostTblEntry AVX2UniformCostTable[] = {
@@ -560,9 +612,18 @@ int X86TTIImpl::getArithmeticInstrCost(
   };
 
   // Look for XOP lowering tricks.
-  if (ST->hasXOP())
-    if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
+  if (ST->hasXOP()) {
+    // If the right shift is constant then we'll fold the negation so
+    // it's as cheap as a left shift.
+    int ShiftISD = ISD;
+    if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) &&
+        (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+      ShiftISD = ISD::SHL;
+    if (const auto *Entry =
+            CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second))
       return LT.first * Entry->Cost;
+  }
 
   static const CostTblEntry SSE2UniformShiftCostTable[] = {
     // Uniform splats are cheaper for the following instructions.
@@ -771,6 +832,12 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::FDIV, MVT::v4f32,      39 }, // Pentium IV from http://www.agner.org/
     { ISD::FDIV, MVT::f64,        38 }, // Pentium IV from http://www.agner.org/
     { ISD::FDIV, MVT::v2f64,      69 }, // Pentium IV from http://www.agner.org/
+
+    { ISD::FADD, MVT::f32,         2 }, // Pentium IV from http://www.agner.org/
+    { ISD::FADD, MVT::f64,         2 }, // Pentium IV from http://www.agner.org/
+
+    { ISD::FSUB, MVT::f32,         2 }, // Pentium IV from http://www.agner.org/
+    { ISD::FSUB, MVT::f64,         2 }, // Pentium IV from http://www.agner.org/
   };
 
   if (ST->hasSSE2())
@@ -780,6 +847,20 @@ int X86TTIImpl::getArithmeticInstrCost(
   static const CostTblEntry SSE1CostTable[] = {
     { ISD::FDIV, MVT::f32,   17 }, // Pentium III from http://www.agner.org/
     { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
+
+    { ISD::FADD, MVT::f32,    1 }, // Pentium III from http://www.agner.org/
+    { ISD::FADD, MVT::v4f32,  2 }, // Pentium III from http://www.agner.org/
+
+    { ISD::FSUB, MVT::f32,    1 }, // Pentium III from http://www.agner.org/
+    { ISD::FSUB, MVT::v4f32,  2 }, // Pentium III from http://www.agner.org/
+
+    { ISD::ADD, MVT::i8,      1 }, // Pentium III from http://www.agner.org/
+    { ISD::ADD, MVT::i16,     1 }, // Pentium III from http://www.agner.org/
+    { ISD::ADD, MVT::i32,     1 }, // Pentium III from http://www.agner.org/
+
+    { ISD::SUB, MVT::i8,      1 }, // Pentium III from http://www.agner.org/
+    { ISD::SUB, MVT::i16,     1 }, // Pentium III from http://www.agner.org/
+    { ISD::SUB, MVT::i32,     1 }, // Pentium III from http://www.agner.org/
   };
 
   if (ST->hasSSE1())
@@ -810,12 +891,30 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
   // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
+  // Treat Transpose as 2-op shuffles - there's no difference in lowering.
+  if (Kind == TTI::SK_Transpose)
+    Kind = TTI::SK_PermuteTwoSrc;
+
   // For Broadcasts we are splatting the first element from the first input
   // register, so only need to reference that input and all the output
   // registers are the same.
   if (Kind == TTI::SK_Broadcast)
     LT.first = 1;
 
+  // Subvector extractions are free if they start at the beginning of a
+  // vector and cheap if the subvectors are aligned.
+  if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) {
+    int NumElts = LT.second.getVectorNumElements();
+    if ((Index % NumElts) == 0)
+      return 0;
+    std::pair<int, MVT> SubLT = TLI->getTypeLegalizationCost(DL, SubTp);
+    if (SubLT.second.isVector()) {
+      int NumSubElts = SubLT.second.getVectorNumElements();
+      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
+        return SubLT.first;
+    }
+  }
+
   // We are going to permute multiple sources and the result will be in multiple
   // destinations. Providing an accurate cost only for splits where the element
   // type remains the same.
@@ -853,15 +952,15 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
   }
 
   static const CostTblEntry AVX512VBMIShuffleTbl[] = {
-    { TTI::SK_Reverse,          MVT::v64i8,  1 }, // vpermb
-    { TTI::SK_Reverse,          MVT::v32i8,  1 }, // vpermb
+      {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
+      {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
 
-    { TTI::SK_PermuteSingleSrc, MVT::v64i8,  1 }, // vpermb
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  1 }, // vpermb
+      {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
 
-    { TTI::SK_PermuteTwoSrc,    MVT::v64i8,  1 }, // vpermt2b
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  1 }, // vpermt2b
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i8,  1 }  // vpermt2b
+      {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}  // vpermt2b
   };
 
   if (ST->hasVBMI())
@@ -870,25 +969,25 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry AVX512BWShuffleTbl[] = {
-    { TTI::SK_Broadcast,        MVT::v32i16, 1 }, // vpbroadcastw
-    { TTI::SK_Broadcast,        MVT::v64i8,  1 }, // vpbroadcastb
+      {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
+      {TTI::SK_Broadcast, MVT::v64i8, 1},  // vpbroadcastb
 
-    { TTI::SK_Reverse,          MVT::v32i16, 1 }, // vpermw
-    { TTI::SK_Reverse,          MVT::v16i16, 1 }, // vpermw
-    { TTI::SK_Reverse,          MVT::v64i8,  2 }, // pshufb + vshufi64x2
+      {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw
+      {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw
+      {TTI::SK_Reverse, MVT::v64i8, 2},  // pshufb + vshufi64x2
 
-    { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v8i16,  1 }, // vpermw
-    { TTI::SK_PermuteSingleSrc, MVT::v64i8,  8 }, // extend to v32i16
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  3 }, // vpermw + zext/trunc
+      {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1},  // vpermw
+      {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8},  // extend to v32i16
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3},  // vpermw + zext/trunc
 
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i16, 1 }, // vpermt2w
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, 1 }, // vpermt2w
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i16,  1 }, // vpermt2w
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  3 }, // zext + vpermt2w + trunc
-    { TTI::SK_PermuteTwoSrc,    MVT::v64i8, 19 }, // 6 * v32i8 + 1
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i8,  3 }  // zext + vpermt2w + trunc
+      {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1},  // vpermt2w
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3},  // zext + vpermt2w + trunc
+      {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}   // zext + vpermt2w + trunc
   };
 
   if (ST->hasBWI())
@@ -897,42 +996,42 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry AVX512ShuffleTbl[] = {
-    { TTI::SK_Broadcast,        MVT::v8f64,  1 }, // vbroadcastpd
-    { TTI::SK_Broadcast,        MVT::v16f32, 1 }, // vbroadcastps
-    { TTI::SK_Broadcast,        MVT::v8i64,  1 }, // vpbroadcastq
-    { TTI::SK_Broadcast,        MVT::v16i32, 1 }, // vpbroadcastd
-
-    { TTI::SK_Reverse,          MVT::v8f64,  1 }, // vpermpd
-    { TTI::SK_Reverse,          MVT::v16f32, 1 }, // vpermps
-    { TTI::SK_Reverse,          MVT::v8i64,  1 }, // vpermq
-    { TTI::SK_Reverse,          MVT::v16i32, 1 }, // vpermd
-
-    { TTI::SK_PermuteSingleSrc, MVT::v8f64,  1 }, // vpermpd
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  1 }, // vpermpd
-    { TTI::SK_PermuteSingleSrc, MVT::v2f64,  1 }, // vpermpd
-    { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  1 }, // vpermps
-    { TTI::SK_PermuteSingleSrc, MVT::v4f32,  1 }, // vpermps
-    { TTI::SK_PermuteSingleSrc, MVT::v8i64,  1 }, // vpermq
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  1 }, // vpermq
-    { TTI::SK_PermuteSingleSrc, MVT::v2i64,  1 }, // vpermq
-    { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  1 }, // vpermd
-    { TTI::SK_PermuteSingleSrc, MVT::v4i32,  1 }, // vpermd
-    { TTI::SK_PermuteSingleSrc, MVT::v16i8,  1 }, // pshufb
-
-    { TTI::SK_PermuteTwoSrc,    MVT::v8f64,  1 }, // vpermt2pd
-    { TTI::SK_PermuteTwoSrc,    MVT::v16f32, 1 }, // vpermt2ps
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i64,  1 }, // vpermt2q
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i32, 1 }, // vpermt2d
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,  1 }, // vpermt2pd
-    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,  1 }, // vpermt2ps
-    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,  1 }, // vpermt2q
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,  1 }, // vpermt2d
-    { TTI::SK_PermuteTwoSrc,    MVT::v2f64,  1 }, // vpermt2pd
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f32,  1 }, // vpermt2ps
-    { TTI::SK_PermuteTwoSrc,    MVT::v2i64,  1 }, // vpermt2q
-    { TTI::SK_PermuteTwoSrc,    MVT::v4i32,  1 }  // vpermt2d
+      {TTI::SK_Broadcast, MVT::v8f64, 1},  // vbroadcastpd
+      {TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps
+      {TTI::SK_Broadcast, MVT::v8i64, 1},  // vpbroadcastq
+      {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
+
+      {TTI::SK_Reverse, MVT::v8f64, 1},  // vpermpd
+      {TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps
+      {TTI::SK_Reverse, MVT::v8i64, 1},  // vpermq
+      {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd
+
+      {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1},  // vpermpd
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1},  // vpermpd
+      {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1},  // vpermpd
+      {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, // vpermps
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1},  // vpermps
+      {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1},  // vpermps
+      {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1},  // vpermq
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1},  // vpermq
+      {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1},  // vpermq
+      {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, // vpermd
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1},  // vpermd
+      {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1},  // vpermd
+      {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1},  // pshufb
+
+      {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1},  // vpermt2pd
+      {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, // vpermt2ps
+      {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1},  // vpermt2q
+      {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, // vpermt2d
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1},  // vpermt2pd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1},  // vpermt2ps
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1},  // vpermt2q
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1},  // vpermt2d
+      {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1},  // vpermt2pd
+      {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1},  // vpermt2ps
+      {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1},  // vpermt2q
+      {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1}   // vpermt2d
   };
 
   if (ST->hasAVX512())
@@ -940,40 +1039,40 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry AVX2ShuffleTbl[] = {
-    { TTI::SK_Broadcast, MVT::v4f64,  1 }, // vbroadcastpd
-    { TTI::SK_Broadcast, MVT::v8f32,  1 }, // vbroadcastps
-    { TTI::SK_Broadcast, MVT::v4i64,  1 }, // vpbroadcastq
-    { TTI::SK_Broadcast, MVT::v8i32,  1 }, // vpbroadcastd
-    { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
-    { TTI::SK_Broadcast, MVT::v32i8,  1 }, // vpbroadcastb
-
-    { TTI::SK_Reverse,   MVT::v4f64,  1 }, // vpermpd
-    { TTI::SK_Reverse,   MVT::v8f32,  1 }, // vpermps
-    { TTI::SK_Reverse,   MVT::v4i64,  1 }, // vpermq
-    { TTI::SK_Reverse,   MVT::v8i32,  1 }, // vpermd
-    { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
-    { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
-
-    { TTI::SK_Select,    MVT::v16i16, 1 }, // vpblendvb
-    { TTI::SK_Select,    MVT::v32i8,  1 }, // vpblendvb
-
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  1 }, // vpermpd
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  1 }, // vpermps
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  1 }, // vpermq
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  1 }, // vpermd
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2*vpshufb
+      {TTI::SK_Broadcast, MVT::v4f64, 1},  // vbroadcastpd
+      {TTI::SK_Broadcast, MVT::v8f32, 1},  // vbroadcastps
+      {TTI::SK_Broadcast, MVT::v4i64, 1},  // vpbroadcastq
+      {TTI::SK_Broadcast, MVT::v8i32, 1},  // vpbroadcastd
+      {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
+      {TTI::SK_Broadcast, MVT::v32i8, 1},  // vpbroadcastb
+
+      {TTI::SK_Reverse, MVT::v4f64, 1},  // vpermpd
+      {TTI::SK_Reverse, MVT::v8f32, 1},  // vpermps
+      {TTI::SK_Reverse, MVT::v4i64, 1},  // vpermq
+      {TTI::SK_Reverse, MVT::v8i32, 1},  // vpermd
+      {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
+      {TTI::SK_Reverse, MVT::v32i8, 2},  // vperm2i128 + pshufb
+
+      {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
+      {TTI::SK_Select, MVT::v32i8, 1},  // vpblendvb
+
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1},  // vpermpd
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1},  // vpermps
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1},  // vpermq
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1},  // vpermd
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
                                                   // + vpblendvb
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  4 }, // vperm2i128 + 2*vpshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vperm2i128 + 2*vpshufb
                                                   // + vpblendvb
 
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,  3 }, // 2*vpermpd + vblendpd
-    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,  3 }, // 2*vpermps + vblendps
-    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,  3 }, // 2*vpermq + vpblendd
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,  3 }, // 2*vpermd + vpblendd
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, 7 }, // 2*vperm2i128 + 4*vpshufb
-                                                  // + vpblendvb
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  7 }, // 2*vperm2i128 + 4*vpshufb
-                                                  // + vpblendvb
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},  // 2*vpermpd + vblendpd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3},  // 2*vpermps + vblendps
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},  // 2*vpermq + vpblendd
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3},  // 2*vpermd + vpblendd
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
+                                               // + vpblendvb
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7},  // 2*vperm2i128 + 4*vpshufb
+                                               // + vpblendvb
   };
 
   if (ST->hasAVX2())
@@ -981,21 +1080,21 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry XOPShuffleTbl[] = {
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64,   2 }, // vperm2f128 + vpermil2pd
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32,   2 }, // vperm2f128 + vpermil2ps
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64,   2 }, // vperm2f128 + vpermil2pd
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32,   2 }, // vperm2f128 + vpermil2ps
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16,  4 }, // vextractf128 + 2*vpperm
-                                                   // + vinsertf128
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,   4 }, // vextractf128 + 2*vpperm
-                                                   // + vinsertf128
-
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i16,  9 }, // 2*vextractf128 + 6*vpperm
-                                                   // + vinsertf128
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i16,   1 }, // vpperm
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,   9 }, // 2*vextractf128 + 6*vpperm
-                                                   // + vinsertf128
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i8,   1 }, // vpperm
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vpermil2pd
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2},  // vperm2f128 + vpermil2ps
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vpermil2pd
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2},  // vperm2f128 + vpermil2ps
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vextractf128 + 2*vpperm
+                                                  // + vinsertf128
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4},  // vextractf128 + 2*vpperm
+                                                  // + vinsertf128
+
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, // 2*vextractf128 + 6*vpperm
+                                               // + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1},  // vpperm
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9},  // 2*vextractf128 + 6*vpperm
+                                               // + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1},  // vpperm
   };
 
   if (ST->hasXOP())
@@ -1003,46 +1102,46 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry AVX1ShuffleTbl[] = {
-    { TTI::SK_Broadcast, MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
-    { TTI::SK_Broadcast, MVT::v8f32,  2 }, // vperm2f128 + vpermilps
-    { TTI::SK_Broadcast, MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
-    { TTI::SK_Broadcast, MVT::v8i32,  2 }, // vperm2f128 + vpermilps
-    { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128
-    { TTI::SK_Broadcast, MVT::v32i8,  2 }, // vpshufb + vinsertf128
-
-    { TTI::SK_Reverse,   MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
-    { TTI::SK_Reverse,   MVT::v8f32,  2 }, // vperm2f128 + vpermilps
-    { TTI::SK_Reverse,   MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
-    { TTI::SK_Reverse,   MVT::v8i32,  2 }, // vperm2f128 + vpermilps
-    { TTI::SK_Reverse,   MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
-                                           // + vinsertf128
-    { TTI::SK_Reverse,   MVT::v32i8,  4 }, // vextractf128 + 2*pshufb
-                                           // + vinsertf128
-
-    { TTI::SK_Select,    MVT::v4i64,  1 }, // vblendpd
-    { TTI::SK_Select,    MVT::v4f64,  1 }, // vblendpd
-    { TTI::SK_Select,    MVT::v8i32,  1 }, // vblendps
-    { TTI::SK_Select,    MVT::v8f32,  1 }, // vblendps
-    { TTI::SK_Select,    MVT::v16i16, 3 }, // vpand + vpandn + vpor
-    { TTI::SK_Select,    MVT::v32i8,  3 }, // vpand + vpandn + vpor
-
-    { TTI::SK_PermuteSingleSrc, MVT::v4f64,  2 }, // vperm2f128 + vshufpd
-    { TTI::SK_PermuteSingleSrc, MVT::v4i64,  2 }, // vperm2f128 + vshufpd
-    { TTI::SK_PermuteSingleSrc, MVT::v8f32,  4 }, // 2*vperm2f128 + 2*vshufps
-    { TTI::SK_PermuteSingleSrc, MVT::v8i32,  4 }, // 2*vperm2f128 + 2*vshufps
-    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 + 4*pshufb
+      {TTI::SK_Broadcast, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
+      {TTI::SK_Broadcast, MVT::v8f32, 2},  // vperm2f128 + vpermilps
+      {TTI::SK_Broadcast, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
+      {TTI::SK_Broadcast, MVT::v8i32, 2},  // vperm2f128 + vpermilps
+      {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
+      {TTI::SK_Broadcast, MVT::v32i8, 2},  // vpshufb + vinsertf128
+
+      {TTI::SK_Reverse, MVT::v4f64, 2},  // vperm2f128 + vpermilpd
+      {TTI::SK_Reverse, MVT::v8f32, 2},  // vperm2f128 + vpermilps
+      {TTI::SK_Reverse, MVT::v4i64, 2},  // vperm2f128 + vpermilpd
+      {TTI::SK_Reverse, MVT::v8i32, 2},  // vperm2f128 + vpermilps
+      {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
+                                         // + vinsertf128
+      {TTI::SK_Reverse, MVT::v32i8, 4},  // vextractf128 + 2*pshufb
+                                         // + vinsertf128
+
+      {TTI::SK_Select, MVT::v4i64, 1},  // vblendpd
+      {TTI::SK_Select, MVT::v4f64, 1},  // vblendpd
+      {TTI::SK_Select, MVT::v8i32, 1},  // vblendps
+      {TTI::SK_Select, MVT::v8f32, 1},  // vblendps
+      {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
+      {TTI::SK_Select, MVT::v32i8, 3},  // vpand + vpandn + vpor
+
+      {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2},  // vperm2f128 + vshufpd
+      {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2},  // vperm2f128 + vshufpd
+      {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4},  // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4},  // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
                                                   // + 2*por + vinsertf128
-    { TTI::SK_PermuteSingleSrc, MVT::v32i8,  8 }, // vextractf128 + 4*pshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8},  // vextractf128 + 4*pshufb
                                                   // + 2*por + vinsertf128
 
-    { TTI::SK_PermuteTwoSrc,    MVT::v4f64,   3 }, // 2*vperm2f128 + vshufpd
-    { TTI::SK_PermuteTwoSrc,    MVT::v4i64,   3 }, // 2*vperm2f128 + vshufpd
-    { TTI::SK_PermuteTwoSrc,    MVT::v8f32,   4 }, // 2*vperm2f128 + 2*vshufps
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i32,   4 }, // 2*vperm2f128 + 2*vshufps
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i16, 15 }, // 2*vextractf128 + 8*pshufb
-                                                   // + 4*por + vinsertf128
-    { TTI::SK_PermuteTwoSrc,    MVT::v32i8,  15 }, // 2*vextractf128 + 8*pshufb
-                                                   // + 4*por + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3},   // 2*vperm2f128 + vshufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3},   // 2*vperm2f128 + vshufpd
+      {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4},   // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4},   // 2*vperm2f128 + 2*vshufps
+      {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
+                                                // + 4*por + vinsertf128
+      {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15},  // 2*vextractf128 + 8*pshufb
+                                                // + 4*por + vinsertf128
   };
 
   if (ST->hasAVX())
@@ -1050,12 +1149,12 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry SSE41ShuffleTbl[] = {
-    { TTI::SK_Select,    MVT::v2i64,  1 }, // pblendw
-    { TTI::SK_Select,    MVT::v2f64,  1 }, // movsd
-    { TTI::SK_Select,    MVT::v4i32,  1 }, // pblendw
-    { TTI::SK_Select,    MVT::v4f32,  1 }, // blendps
-    { TTI::SK_Select,    MVT::v8i16,  1 }, // pblendw
-    { TTI::SK_Select,    MVT::v16i8,  1 }  // pblendvb
+      {TTI::SK_Select, MVT::v2i64, 1}, // pblendw
+      {TTI::SK_Select, MVT::v2f64, 1}, // movsd
+      {TTI::SK_Select, MVT::v4i32, 1}, // pblendw
+      {TTI::SK_Select, MVT::v4f32, 1}, // blendps
+      {TTI::SK_Select, MVT::v8i16, 1}, // pblendw
+      {TTI::SK_Select, MVT::v16i8, 1}  // pblendvb
   };
 
   if (ST->hasSSE41())
@@ -1063,20 +1162,20 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry SSSE3ShuffleTbl[] = {
-    { TTI::SK_Broadcast, MVT::v8i16,  1 }, // pshufb
-    { TTI::SK_Broadcast, MVT::v16i8,  1 }, // pshufb
+      {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
+      {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
 
-    { TTI::SK_Reverse,   MVT::v8i16,  1 }, // pshufb
-    { TTI::SK_Reverse,   MVT::v16i8,  1 }, // pshufb
+      {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
+      {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
 
-    { TTI::SK_Select,    MVT::v8i16,  3 }, // 2*pshufb + por
-    { TTI::SK_Select,    MVT::v16i8,  3 }, // 2*pshufb + por
+      {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
+      {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
 
-    { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
-    { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
+      {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
 
-    { TTI::SK_PermuteTwoSrc,    MVT::v8i16, 3 }, // 2*pshufb + por
-    { TTI::SK_PermuteTwoSrc,    MVT::v16i8, 3 }, // 2*pshufb + por
+      {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
+      {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
   };
 
   if (ST->hasSSSE3())
@@ -1084,29 +1183,29 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       return LT.first * Entry->Cost;
 
   static const CostTblEntry SSE2ShuffleTbl[] = {
-    { TTI::SK_Broadcast, MVT::v2f64,  1 }, // shufpd
-    { TTI::SK_Broadcast, MVT::v2i64,  1 }, // pshufd
-    { TTI::SK_Broadcast, MVT::v4i32,  1 }, // pshufd
-    { TTI::SK_Broadcast, MVT::v8i16,  2 }, // pshuflw + pshufd
-    { TTI::SK_Broadcast, MVT::v16i8,  3 }, // unpck + pshuflw + pshufd
-
-    { TTI::SK_Reverse,   MVT::v2f64,  1 }, // shufpd
-    { TTI::SK_Reverse,   MVT::v2i64,  1 }, // pshufd
-    { TTI::SK_Reverse,   MVT::v4i32,  1 }, // pshufd
-    { TTI::SK_Reverse,   MVT::v8i16,  3 }, // pshuflw + pshufhw + pshufd
-    { TTI::SK_Reverse,   MVT::v16i8,  9 }, // 2*pshuflw + 2*pshufhw
-                                           // + 2*pshufd + 2*unpck + packus
-
-    { TTI::SK_Select,    MVT::v2i64,  1 }, // movsd
-    { TTI::SK_Select,    MVT::v2f64,  1 }, // movsd
-    { TTI::SK_Select,    MVT::v4i32,  2 }, // 2*shufps
-    { TTI::SK_Select,    MVT::v8i16,  3 }, // pand + pandn + por
-    { TTI::SK_Select,    MVT::v16i8,  3 }, // pand + pandn + por
-
-    { TTI::SK_PermuteSingleSrc, MVT::v2f64,  1 }, // shufpd
-    { TTI::SK_PermuteSingleSrc, MVT::v2i64,  1 }, // pshufd
-    { TTI::SK_PermuteSingleSrc, MVT::v4i32,  1 }, // pshufd
-    { TTI::SK_PermuteSingleSrc, MVT::v8i16,  5 }, // 2*pshuflw + 2*pshufhw
+      {TTI::SK_Broadcast, MVT::v2f64, 1}, // shufpd
+      {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
+      {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
+      {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
+      {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
+
+      {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
+      {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
+      {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
+      {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
+      {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
+                                        // + 2*pshufd + 2*unpck + packus
+
+      {TTI::SK_Select, MVT::v2i64, 1}, // movsd
+      {TTI::SK_Select, MVT::v2f64, 1}, // movsd
+      {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
+      {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
+      {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
+
+      {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
+      {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd
+      {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
+      {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
                                                   // + pshufd/unpck
     { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
                                                   // + 2*pshufd + 2*unpck + 2*packus
@@ -1145,6 +1244,27 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
   // FIXME: Need a better design of the cost table to handle non-simple types of
   // potential massive combinations (elem_num x src_type x dst_type).
 
+  static const TypeConversionCostTblEntry AVX512BWConversionTbl[] {
+    { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
+    { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
+
+    // Mask sign extend has an instruction.
+    { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i1,  1 },
+    { ISD::SIGN_EXTEND, MVT::v16i8,  MVT::v16i1, 1 },
+    { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+    { ISD::SIGN_EXTEND, MVT::v32i8,  MVT::v32i1, 1 },
+    { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+    { ISD::SIGN_EXTEND, MVT::v64i8,  MVT::v64i1, 1 },
+
+    // Mask zero extend is a load + broadcast.
+    { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i1,  2 },
+    { ISD::ZERO_EXTEND, MVT::v16i8,  MVT::v16i1, 2 },
+    { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+    { ISD::ZERO_EXTEND, MVT::v32i8,  MVT::v32i1, 2 },
+    { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+    { ISD::ZERO_EXTEND, MVT::v64i8,  MVT::v64i1, 2 },
+  };
+
   static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
     { ISD::SINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  1 },
     { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  1 },
@@ -1208,8 +1328,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 2 },
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
-    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64, 26 },
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64, 26 },
 
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   4 },
     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i1,  3 },
@@ -1231,12 +1349,16 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64, 26 },
     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  5 },
-    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64, 12 },
-    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64, 26 },
+    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  5 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  5 },
+
+    { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    1 },
 
     { ISD::FP_TO_UINT,  MVT::v2i32,  MVT::v2f32,  1 },
     { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f64,  1 },
     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f32,  1 },
     { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v8f64,  2 },
     { ISD::FP_TO_UINT,  MVT::v8i8,   MVT::v8f64,  2 },
@@ -1328,13 +1450,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 6 },
     { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i32, 6 },
     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 9 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i64, 5 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i64, 6 },
     // The generic code to compute the scalar overhead is currently broken.
     // Workaround this limitation by estimating the scalarization overhead
     // here. We have roughly 10 instructions per scalar element.
     // Multiply that by the vector width.
     // FIXME: remove that when PR19268 is fixed.
-    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i64, 10 },
-    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i64, 20 },
     { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i64, 13 },
     { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i64, 13 },
 
@@ -1387,6 +1509,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  3 },
     { ISD::TRUNCATE,    MVT::v16i16, MVT::v16i32, 6 },
 
+    { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    4 },
   };
 
   static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1408,11 +1531,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
 
     { ISD::FP_TO_SINT,  MVT::v2i32,  MVT::v2f64,  3 },
 
+    { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    6 },
+
     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i8,   1 },
     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i8,   6 },
     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i8,   2 },
@@ -1465,43 +1590,51 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return BaseT::getCastInstrCost(Opcode, Dst, Src);
 
-  if (ST->hasDQI())
-    if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
-                                                   DstTy.getSimpleVT(),
-                                                   SrcTy.getSimpleVT()))
-      return Entry->Cost;
+  MVT SimpleSrcTy = SrcTy.getSimpleVT();
+  MVT SimpleDstTy = DstTy.getSimpleVT();
 
-  if (ST->hasAVX512())
-    if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
-                                                   DstTy.getSimpleVT(),
-                                                   SrcTy.getSimpleVT()))
-      return Entry->Cost;
+  // Make sure that neither type is going to be split before using the
+  // AVX512 tables. This handles -mprefer-vector-width=256
+  // with -min-legal-vector-width<=256
+  if (TLI->getTypeAction(SimpleSrcTy) != TargetLowering::TypeSplitVector &&
+      TLI->getTypeAction(SimpleDstTy) != TargetLowering::TypeSplitVector) {
+    if (ST->hasBWI())
+      if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD,
+                                                     SimpleDstTy, SimpleSrcTy))
+        return Entry->Cost;
+
+    if (ST->hasDQI())
+      if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
+                                                     SimpleDstTy, SimpleSrcTy))
+        return Entry->Cost;
+
+    if (ST->hasAVX512())
+      if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
+                                                     SimpleDstTy, SimpleSrcTy))
+        return Entry->Cost;
+  }
 
   if (ST->hasAVX2()) {
     if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
-                                                   DstTy.getSimpleVT(),
-                                                   SrcTy.getSimpleVT()))
+                                                   SimpleDstTy, SimpleSrcTy))
       return Entry->Cost;
   }
 
   if (ST->hasAVX()) {
     if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
-                                                   DstTy.getSimpleVT(),
-                                                   SrcTy.getSimpleVT()))
+                                                   SimpleDstTy, SimpleSrcTy))
       return Entry->Cost;
   }
 
   if (ST->hasSSE41()) {
     if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
-                                                   DstTy.getSimpleVT(),
-                                                   SrcTy.getSimpleVT()))
+                                                   SimpleDstTy, SimpleSrcTy))
       return Entry->Cost;
   }
 
   if (ST->hasSSE2()) {
     if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
-                                                   DstTy.getSimpleVT(),
-                                                   SrcTy.getSimpleVT()))
+                                                   SimpleDstTy, SimpleSrcTy))
       return Entry->Cost;
   }
 
@@ -1629,6 +1762,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::CTTZ,       MVT::v16i32, 14 },
     { ISD::CTTZ,       MVT::v32i16, 12 },
     { ISD::CTTZ,       MVT::v64i8,   9 },
+    { ISD::SADDSAT,    MVT::v32i16,  1 },
+    { ISD::SADDSAT,    MVT::v64i8,   1 },
+    { ISD::SSUBSAT,    MVT::v32i16,  1 },
+    { ISD::SSUBSAT,    MVT::v64i8,   1 },
+    { ISD::UADDSAT,    MVT::v32i16,  1 },
+    { ISD::UADDSAT,    MVT::v64i8,   1 },
+    { ISD::USUBSAT,    MVT::v32i16,  1 },
+    { ISD::USUBSAT,    MVT::v64i8,   1 },
   };
   static const CostTblEntry AVX512CostTbl[] = {
     { ISD::BITREVERSE, MVT::v8i64,  36 },
@@ -1639,6 +1780,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::CTPOP,      MVT::v16i32, 24 },
     { ISD::CTTZ,       MVT::v8i64,  20 },
     { ISD::CTTZ,       MVT::v16i32, 28 },
+    { ISD::USUBSAT,    MVT::v16i32,  2 }, // pmaxud + psubd
+    { ISD::USUBSAT,    MVT::v2i64,   2 }, // pmaxuq + psubq
+    { ISD::USUBSAT,    MVT::v4i64,   2 }, // pmaxuq + psubq
+    { ISD::USUBSAT,    MVT::v8i64,   2 }, // pmaxuq + psubq
   };
   static const CostTblEntry XOPCostTbl[] = {
     { ISD::BITREVERSE, MVT::v4i64,   4 },
@@ -1674,6 +1819,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::CTTZ,       MVT::v8i32,  14 },
     { ISD::CTTZ,       MVT::v16i16, 12 },
     { ISD::CTTZ,       MVT::v32i8,   9 },
+    { ISD::SADDSAT,    MVT::v16i16,  1 },
+    { ISD::SADDSAT,    MVT::v32i8,   1 },
+    { ISD::SSUBSAT,    MVT::v16i16,  1 },
+    { ISD::SSUBSAT,    MVT::v32i8,   1 },
+    { ISD::UADDSAT,    MVT::v16i16,  1 },
+    { ISD::UADDSAT,    MVT::v32i8,   1 },
+    { ISD::USUBSAT,    MVT::v16i16,  1 },
+    { ISD::USUBSAT,    MVT::v32i8,   1 },
+    { ISD::USUBSAT,    MVT::v8i32,   2 }, // pmaxud + psubd
     { ISD::FSQRT,      MVT::f32,     7 }, // Haswell from http://www.agner.org/
     { ISD::FSQRT,      MVT::v4f32,   7 }, // Haswell from http://www.agner.org/
     { ISD::FSQRT,      MVT::v8f32,  14 }, // Haswell from http://www.agner.org/
@@ -1701,6 +1855,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::CTTZ,       MVT::v8i32,  30 }, // 2 x 128-bit Op + extract/insert
     { ISD::CTTZ,       MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
     { ISD::CTTZ,       MVT::v32i8,  20 }, // 2 x 128-bit Op + extract/insert
+    { ISD::SADDSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::SADDSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::SSUBSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::SSUBSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::UADDSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::UADDSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::USUBSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::USUBSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::USUBSAT,    MVT::v8i32,   6 }, // 2 x 128-bit Op + extract/insert
     { ISD::FSQRT,      MVT::f32,    14 }, // SNB from http://www.agner.org/
     { ISD::FSQRT,      MVT::v4f32,  14 }, // SNB from http://www.agner.org/
     { ISD::FSQRT,      MVT::v8f32,  28 }, // SNB from http://www.agner.org/
@@ -1721,6 +1884,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
   };
   static const CostTblEntry SSE42CostTbl[] = {
+    { ISD::USUBSAT,    MVT::v4i32,   2 }, // pmaxud + psubd
     { ISD::FSQRT,      MVT::f32,    18 }, // Nehalem from http://www.agner.org/
     { ISD::FSQRT,      MVT::v4f32,  18 }, // Nehalem from http://www.agner.org/
   };
@@ -1765,6 +1929,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::CTTZ,       MVT::v4i32,  18 },
     { ISD::CTTZ,       MVT::v8i16,  16 },
     { ISD::CTTZ,       MVT::v16i8,  13 },
+    { ISD::SADDSAT,    MVT::v8i16,   1 },
+    { ISD::SADDSAT,    MVT::v16i8,   1 },
+    { ISD::SSUBSAT,    MVT::v8i16,   1 },
+    { ISD::SSUBSAT,    MVT::v16i8,   1 },
+    { ISD::UADDSAT,    MVT::v8i16,   1 },
+    { ISD::UADDSAT,    MVT::v16i8,   1 },
+    { ISD::USUBSAT,    MVT::v8i16,   1 },
+    { ISD::USUBSAT,    MVT::v16i8,   1 },
     { ISD::FSQRT,      MVT::f64,    32 }, // Nehalem from http://www.agner.org/
     { ISD::FSQRT,      MVT::v2f64,  32 }, // Nehalem from http://www.agner.org/
   };
@@ -1800,76 +1972,180 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   case Intrinsic::cttz:
     ISD = ISD::CTTZ;
     break;
+  case Intrinsic::sadd_sat:
+    ISD = ISD::SADDSAT;
+    break;
+  case Intrinsic::ssub_sat:
+    ISD = ISD::SSUBSAT;
+    break;
+  case Intrinsic::uadd_sat:
+    ISD = ISD::UADDSAT;
+    break;
+  case Intrinsic::usub_sat:
+    ISD = ISD::USUBSAT;
+    break;
   case Intrinsic::sqrt:
     ISD = ISD::FSQRT;
     break;
   }
 
-  // Legalize the type.
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
-  MVT MTy = LT.second;
+  if (ISD != ISD::DELETED_NODE) {
+    // Legalize the type.
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    MVT MTy = LT.second;
 
-  // Attempt to lookup cost.
-  if (ST->isGLM())
-    if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    // Attempt to lookup cost.
+    if (ST->isGLM())
+      if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->isSLM())
-    if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->isSLM())
+      if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasCDI())
-    if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasCDI())
+      if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasBWI())
-    if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasBWI())
+      if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasAVX512())
-    if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasAVX512())
+      if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasXOP())
-    if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasXOP())
+      if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasAVX2())
-    if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasAVX2())
+      if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasAVX())
-    if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasSSE42())
-    if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasSSE42())
+      if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasSSSE3())
-    if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasSSSE3())
+      if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasSSE2())
-    if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasSSE2())
+      if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->hasSSE1())
-    if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->hasSSE1())
+      if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (ST->is64Bit())
-    if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+    if (ST->is64Bit())
+      if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-  if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
-    return LT.first * Entry->Cost;
+    if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
+  }
 
   return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
 }
 
 int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                     ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+                                      ArrayRef<Value *> Args, FastMathFlags FMF,
+                                      unsigned VF) {
+  static const CostTblEntry AVX512CostTbl[] = {
+    { ISD::ROTL,       MVT::v8i64,   1 },
+    { ISD::ROTL,       MVT::v4i64,   1 },
+    { ISD::ROTL,       MVT::v2i64,   1 },
+    { ISD::ROTL,       MVT::v16i32,  1 },
+    { ISD::ROTL,       MVT::v8i32,   1 },
+    { ISD::ROTL,       MVT::v4i32,   1 },
+    { ISD::ROTR,       MVT::v8i64,   1 },
+    { ISD::ROTR,       MVT::v4i64,   1 },
+    { ISD::ROTR,       MVT::v2i64,   1 },
+    { ISD::ROTR,       MVT::v16i32,  1 },
+    { ISD::ROTR,       MVT::v8i32,   1 },
+    { ISD::ROTR,       MVT::v4i32,   1 }
+  };
+  // XOP: ROTL = VPROT(X,Y), ROTR = VPROT(X,SUB(0,Y))
+  static const CostTblEntry XOPCostTbl[] = {
+    { ISD::ROTL,       MVT::v4i64,   4 },
+    { ISD::ROTL,       MVT::v8i32,   4 },
+    { ISD::ROTL,       MVT::v16i16,  4 },
+    { ISD::ROTL,       MVT::v32i8,   4 },
+    { ISD::ROTL,       MVT::v2i64,   1 },
+    { ISD::ROTL,       MVT::v4i32,   1 },
+    { ISD::ROTL,       MVT::v8i16,   1 },
+    { ISD::ROTL,       MVT::v16i8,   1 },
+    { ISD::ROTR,       MVT::v4i64,   6 },
+    { ISD::ROTR,       MVT::v8i32,   6 },
+    { ISD::ROTR,       MVT::v16i16,  6 },
+    { ISD::ROTR,       MVT::v32i8,   6 },
+    { ISD::ROTR,       MVT::v2i64,   2 },
+    { ISD::ROTR,       MVT::v4i32,   2 },
+    { ISD::ROTR,       MVT::v8i16,   2 },
+    { ISD::ROTR,       MVT::v16i8,   2 }
+  };
+  static const CostTblEntry X64CostTbl[] = { // 64-bit targets
+    { ISD::ROTL,       MVT::i64,     1 },
+    { ISD::ROTR,       MVT::i64,     1 },
+    { ISD::FSHL,       MVT::i64,     4 }
+  };
+  static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+    { ISD::ROTL,       MVT::i32,     1 },
+    { ISD::ROTL,       MVT::i16,     1 },
+    { ISD::ROTL,       MVT::i8,      1 },
+    { ISD::ROTR,       MVT::i32,     1 },
+    { ISD::ROTR,       MVT::i16,     1 },
+    { ISD::ROTR,       MVT::i8,      1 },
+    { ISD::FSHL,       MVT::i32,     4 },
+    { ISD::FSHL,       MVT::i16,     4 },
+    { ISD::FSHL,       MVT::i8,      4 }
+  };
+
+  unsigned ISD = ISD::DELETED_NODE;
+  switch (IID) {
+  default:
+    break;
+  case Intrinsic::fshl:
+    ISD = ISD::FSHL;
+    if (Args[0] == Args[1])
+      ISD = ISD::ROTL;
+    break;
+  case Intrinsic::fshr:
+    // FSHR has same costs so don't duplicate.
+    ISD = ISD::FSHL;
+    if (Args[0] == Args[1])
+      ISD = ISD::ROTR;
+    break;
+  }
+
+  if (ISD != ISD::DELETED_NODE) {
+    // Legalize the type.
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    MVT MTy = LT.second;
+
+    // Attempt to lookup cost.
+    if (ST->hasAVX512())
+      if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasXOP())
+      if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->is64Bit())
+      if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
+  }
+
   return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
 }
 
@@ -2341,11 +2617,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
       return TTI::TCC_Free;
     ImmIdx = 1;
     break;
-  case Instruction::Mul:
   case Instruction::UDiv:
   case Instruction::SDiv:
   case Instruction::URem:
   case Instruction::SRem:
+    // Division by constant is typically expanded later into a different
+    // instruction sequence. This completely changes the constants.
+    // Report them as "free" to stop ConstantHoist from marking them as opaque.
+    return TTI::TCC_Free;
+  case Instruction::Mul:
   case Instruction::Or:
   case Instruction::Xor:
     ImmIdx = 1;
@@ -2690,6 +2970,9 @@ X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
     Options.LoadSizes.push_back(4);
     Options.LoadSizes.push_back(2);
     Options.LoadSizes.push_back(1);
+    // All GPR and vector loads can be unaligned. SIMD compare requires integer
+    // vectors (SSE2/AVX2).
+    Options.AllowOverlappingLoads = true;
     return Options;
   }();
   return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
@@ -2718,7 +3001,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
                                                unsigned Factor,
                                                ArrayRef<unsigned> Indices,
                                                unsigned Alignment,
-                                               unsigned AddressSpace) {
+                                               unsigned AddressSpace,
+                                               bool UseMaskForCond,
+                                               bool UseMaskForGaps) {
+
+  if (UseMaskForCond || UseMaskForGaps)
+    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace,
+                                             UseMaskForCond, UseMaskForGaps);
 
   // We currently Support only fully-interleaved groups, with no gaps.
   // TODO: Support also strided loads (interleaved-groups with gaps).
@@ -2827,7 +3117,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
                                                  unsigned Factor,
                                                  ArrayRef<unsigned> Indices,
                                                  unsigned Alignment,
-                                                 unsigned AddressSpace) {
+                                                 unsigned AddressSpace,
+                                                 bool UseMaskForCond,
+                                                 bool UseMaskForGaps) {
+
+  if (UseMaskForCond || UseMaskForGaps)
+    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                             Alignment, AddressSpace,
+                                             UseMaskForCond, UseMaskForGaps);
 
   // VecTy for interleave memop is <VF*Factor x Elt>.
   // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@@ -2945,7 +3242,9 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                            unsigned Factor,
                                            ArrayRef<unsigned> Indices,
                                            unsigned Alignment,
-                                           unsigned AddressSpace) {
+                                           unsigned AddressSpace,
+                                           bool UseMaskForCond,
+                                           bool UseMaskForGaps) {
   auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
     Type *EltTy = VecTy->getVectorElementType();
     if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
@@ -2957,11 +3256,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   };
   if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
     return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
-                                            Alignment, AddressSpace);
+                                            Alignment, AddressSpace,
+                                            UseMaskForCond, UseMaskForGaps);
   if (ST->hasAVX2())
     return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
-                                          Alignment, AddressSpace);
+                                          Alignment, AddressSpace,
+                                          UseMaskForCond, UseMaskForGaps);
 
   return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                           Alignment, AddressSpace);
+                                           Alignment, AddressSpace,
+                                           UseMaskForCond, UseMaskForGaps);
 }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 3df899038820..1637592c81f8 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -101,13 +101,19 @@ public:
 
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                  unsigned Factor, ArrayRef<unsigned> Indices,
-                                 unsigned Alignment, unsigned AddressSpace);
+                                 unsigned Alignment, unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
   int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
                                  unsigned Factor, ArrayRef<unsigned> Indices,
-                                 unsigned Alignment, unsigned AddressSpace);
+                                 unsigned Alignment, unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
   int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
                                  unsigned Factor, ArrayRef<unsigned> Indices,
-                                 unsigned Alignment, unsigned AddressSpace);
+                                 unsigned Alignment, unsigned AddressSpace,
+                                 bool UseMaskForCond = false,
+                                 bool UseMaskForGaps = false);
 
   int getIntImmCost(int64_t);
 
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index dde9c734f492..185deda97c1f 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -34,10 +34,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "winehstate"
 
-namespace llvm {
-void initializeWinEHStatePassPass(PassRegistry &);
-}
-
 namespace {
 const int OverdefinedState = INT_MIN;
 
@@ -369,7 +365,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
 
   // Insert an unlink before all returns.
   for (BasicBlock &BB : *F) {
-    TerminatorInst *T = BB.getTerminator();
+    Instruction *T = BB.getTerminator();
     if (!isa<ReturnInst>(T))
       continue;
     Builder.SetInsertPoint(T);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index b87c149a36dc..fff8a66d0e75 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -151,7 +151,7 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
                                       Offset,
                                       FramePtr));
   }
-  llvm::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
+  llvm::sort(SpillList, CompareSSIOffset);
 }
 
 /// Creates an ordered list of EH info register 'spills'.
@@ -170,7 +170,7 @@ static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
   SpillList.push_back(
       StackSlotInfo(EHSlot[0], MFI.getObjectOffset(EHSlot[1]),
                     TL->getExceptionSelectorRegister(PersonalityFn)));
-  llvm::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
+  llvm::sort(SpillList, CompareSSIOffset);
 }
 
 static MachineMemOperand *getFrameIndexMMO(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1a56d1fd6e2f..1688c38efc1d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -150,11 +150,10 @@ void XCoreDAGToDAGISel::Select(SDNode *N) {
       SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
                                             MVT::Other, CPIdx,
                                             CurDAG->getEntryNode());
-      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] =
+      MachineMemOperand *MemOp =
           MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
                                    MachineMemOperand::MOLoad, 4, 4);
-      cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
+      CurDAG->setNodeMemRefs(cast<MachineSDNode>(node), {MemOp});
       ReplaceNode(N, node);
       return;
     }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 99e76144cba3..75d7ae7048a1 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -403,8 +403,7 @@ SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset(
 
 static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
 {
-  KnownBits Known;
-  DAG.computeKnownBits(Value, Known);
+  KnownBits Known = DAG.computeKnownBits(Value);
   return Known.countMinTrailingZeros() >= 2;
 }
 
@@ -1649,10 +1648,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
     // low bit set
     if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
-      KnownBits Known;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.computeKnownBits(N2, Known);
+      KnownBits Known = DAG.computeKnownBits(N2);
       if ((Known.Zero & Mask) == Mask) {
         SDValue Carry = DAG.getConstant(0, dl, VT);
         SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
@@ -1672,10 +1670,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 
     // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
     if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
-      KnownBits Known;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.computeKnownBits(N2, Known);
+      KnownBits Known = DAG.computeKnownBits(N2);
       if ((Known.Zero & Mask) == Mask) {
         SDValue Borrow = N2;
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
@@ -1688,10 +1685,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
     // low bit set
     if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
-      KnownBits Known;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.computeKnownBits(N2, Known);
+      KnownBits Known = DAG.computeKnownBits(N2);
       if ((Known.Zero & Mask) == Mask) {
         SDValue Borrow = DAG.getConstant(0, dl, VT);
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 1c93ba8fa14c..7455cd997ad6 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -129,7 +129,7 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
 static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
   do {
     SmallVector<WeakTrackingVH, 8> WUsers(CE->user_begin(), CE->user_end());
-    llvm::sort(WUsers.begin(), WUsers.end());
+    llvm::sort(WUsers);
     WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
     while (!WUsers.empty())
       if (WeakTrackingVH WU = WUsers.pop_back_val()) {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 9451a05d8d58..2e9fd98ed34f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -33,8 +33,6 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  bool enableMultipleCopyHints() const override { return true; }
-
   bool requiresRegisterScavenging(const MachineFunction &MF) const override;
 
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 38925bfd51b0..2aa9932e2465 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,7 +31,8 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
   return *RM;
 }
 
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
+static CodeModel::Model
+getEffectiveXCoreCodeModel(Optional<CodeModel::Model> CM) {
   if (CM) {
     if (*CM != CodeModel::Small && *CM != CodeModel::Large)
       report_fatal_error("Target only supports CodeModel Small or Large");
@@ -51,7 +52,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(
           T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
           TT, CPU, FS, Options, getEffectiveRelocModel(RM),
-          getEffectiveCodeModel(CM), OL),
+          getEffectiveXCoreCodeModel(CM), OL),
       TLOF(llvm::make_unique<XCoreTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
   initAsmInfo();
diff --git a/contrib/llvm/lib/Testing/Support/SupportHelpers.cpp b/contrib/llvm/lib/Testing/Support/SupportHelpers.cpp
new file mode 100644
index 000000000000..5f53b2330b20
--- /dev/null
+++ b/contrib/llvm/lib/Testing/Support/SupportHelpers.cpp
@@ -0,0 +1,53 @@
+
+#include "llvm/Testing/Support/SupportHelpers.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::unittest;
+
+static std::pair<bool, SmallString<128>> findSrcDirMap(StringRef Argv0) {
+  SmallString<128> BaseDir = llvm::sys::path::parent_path(Argv0);
+
+  llvm::sys::fs::make_absolute(BaseDir);
+
+  SmallString<128> PathInSameDir = BaseDir;
+  llvm::sys::path::append(PathInSameDir, "llvm.srcdir.txt");
+
+  if (llvm::sys::fs::is_regular_file(PathInSameDir))
+    return std::make_pair(true, std::move(PathInSameDir));
+
+  SmallString<128> PathInParentDir = llvm::sys::path::parent_path(BaseDir);
+
+  llvm::sys::path::append(PathInParentDir, "llvm.srcdir.txt");
+  if (llvm::sys::fs::is_regular_file(PathInParentDir))
+    return std::make_pair(true, std::move(PathInParentDir));
+
+  return std::pair<bool, SmallString<128>>(false, {});
+}
+
+SmallString<128> llvm::unittest::getInputFileDirectory(const char *Argv0) {
+  bool Found = false;
+  SmallString<128> InputFilePath;
+  std::tie(Found, InputFilePath) = findSrcDirMap(Argv0);
+
+  EXPECT_TRUE(Found) << "Unit test source directory file does not exist.";
+
+  auto File = MemoryBuffer::getFile(InputFilePath);
+
+  EXPECT_TRUE(static_cast<bool>(File))
+      << "Could not open unit test source directory file.";
+
+  InputFilePath.clear();
+  InputFilePath.append((*File)->getBuffer().trim());
+  llvm::sys::path::append(InputFilePath, "Inputs");
+  llvm::sys::path::native(InputFilePath);
+  return InputFilePath;
+}
diff --git a/contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp b/contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp
new file mode 100644
index 000000000000..248a078a2404
--- /dev/null
+++ b/contrib/llvm/lib/TextAPI/ELF/ELFStub.cpp
@@ -0,0 +1,29 @@
+//===- ELFStub.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+
+#include "llvm/TextAPI/ELF/ELFStub.h"
+
+using namespace llvm;
+using namespace llvm::elfabi;
+
+ELFStub::ELFStub(ELFStub const &Stub) {
+  TbeVersion = Stub.TbeVersion;
+  Arch = Stub.Arch;
+  SoName = Stub.SoName;
+  NeededLibs = Stub.NeededLibs;
+  Symbols = Stub.Symbols;
+}
+
+ELFStub::ELFStub(ELFStub &&Stub) {
+  TbeVersion = std::move(Stub.TbeVersion);
+  Arch = std::move(Stub.Arch);
+  SoName = std::move(Stub.SoName);
+  NeededLibs = std::move(Stub.NeededLibs);
+  Symbols = std::move(Stub.Symbols);
+}
diff --git a/contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp b/contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp
new file mode 100644
index 000000000000..b621829d9358
--- /dev/null
+++ b/contrib/llvm/lib/TextAPI/ELF/TBEHandler.cpp
@@ -0,0 +1,161 @@
+//===- TBEHandler.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-----------------------------------------------------------------------===/
+
+#include "llvm/TextAPI/ELF/TBEHandler.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/TextAPI/ELF/ELFStub.h"
+
+using namespace llvm;
+using namespace llvm::elfabi;
+
+LLVM_YAML_STRONG_TYPEDEF(ELFArch, ELFArchMapper)
+
+namespace llvm {
+namespace yaml {
+
+/// YAML traits for ELFSymbolType.
+template <> struct ScalarEnumerationTraits<ELFSymbolType> {
+  static void enumeration(IO &IO, ELFSymbolType &SymbolType) {
+    IO.enumCase(SymbolType, "NoType", ELFSymbolType::NoType);
+    IO.enumCase(SymbolType, "Func", ELFSymbolType::Func);
+    IO.enumCase(SymbolType, "Object", ELFSymbolType::Object);
+    IO.enumCase(SymbolType, "TLS", ELFSymbolType::TLS);
+    IO.enumCase(SymbolType, "Unknown", ELFSymbolType::Unknown);
+    // Treat other symbol types as noise, and map to Unknown.
+    if (!IO.outputting() && IO.matchEnumFallback())
+      SymbolType = ELFSymbolType::Unknown;
+  }
+};
+
+/// YAML traits for ELFArch.
+template <> struct ScalarTraits<ELFArchMapper> {
+  static void output(const ELFArchMapper &Value, void *,
+                     llvm::raw_ostream &Out) {
+    // Map from integer to architecture string.
+    switch (Value) {
+    case (ELFArch)ELF::EM_X86_64:
+      Out << "x86_64";
+      break;
+    case (ELFArch)ELF::EM_AARCH64:
+      Out << "AArch64";
+      break;
+    case (ELFArch)ELF::EM_NONE:
+    default:
+      Out << "Unknown";
+    }
+  }
+
+  static StringRef input(StringRef Scalar, void *, ELFArchMapper &Value) {
+    // Map from architecture string to integer.
+    Value = StringSwitch<ELFArch>(Scalar)
+                .Case("x86_64", ELF::EM_X86_64)
+                .Case("AArch64", ELF::EM_AARCH64)
+                .Case("Unknown", ELF::EM_NONE)
+                .Default(ELF::EM_NONE);
+
+    // Returning empty StringRef indicates successful parse.
+    return StringRef();
+  }
+
+  // Don't place quotation marks around architecture value.
+  static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+/// YAML traits for TbeVersion.
+template <> struct ScalarTraits<VersionTuple> {
+  static void output(const VersionTuple &Value, void *,
+                     llvm::raw_ostream &Out) {
+    Out << Value.getAsString();
+  }
+
+  static StringRef input(StringRef Scalar, void *, VersionTuple &Value) {
+    if (Value.tryParse(Scalar))
+      return StringRef("Can't parse version: invalid version format.");
+
+    if (Value > TBEVersionCurrent)
+      return StringRef("Unsupported TBE version.");
+
+    // Returning empty StringRef indicates successful parse.
+    return StringRef();
+  }
+
+  // Don't place quotation marks around version value.
+  static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+/// YAML traits for ELFSymbol.
+template <> struct MappingTraits<ELFSymbol> {
+  static void mapping(IO &IO, ELFSymbol &Symbol) {
+    IO.mapRequired("Type", Symbol.Type);
+    // The need for symbol size depends on the symbol type.
+    if (Symbol.Type == ELFSymbolType::NoType) {
+      IO.mapOptional("Size", Symbol.Size, (uint64_t)0);
+    } else if (Symbol.Type == ELFSymbolType::Func) {
+      Symbol.Size = 0;
+    } else {
+      IO.mapRequired("Size", Symbol.Size);
+    }
+    IO.mapOptional("Undefined", Symbol.Undefined, false);
+    IO.mapOptional("Weak", Symbol.Weak, false);
+    IO.mapOptional("Warning", Symbol.Warning);
+  }
+
+  // Compacts symbol information into a single line.
+  static const bool flow = true;
+};
+
+/// YAML traits for set of ELFSymbols.
+template <> struct CustomMappingTraits<std::set<ELFSymbol>> {
+  static void inputOne(IO &IO, StringRef Key, std::set<ELFSymbol> &Set) {
+    ELFSymbol Sym(Key.str());
+    IO.mapRequired(Key.str().c_str(), Sym);
+    Set.insert(Sym);
+  }
+
+  static void output(IO &IO, std::set<ELFSymbol> &Set) {
+    for (auto &Sym : Set)
+      IO.mapRequired(Sym.Name.c_str(), const_cast<ELFSymbol &>(Sym));
+  }
+};
+
+/// YAML traits for ELFStub objects.
+template <> struct MappingTraits<ELFStub> {
+  static void mapping(IO &IO, ELFStub &Stub) {
+    if (!IO.mapTag("!tapi-tbe", true))
+      IO.setError("Not a .tbe YAML file.");
+    IO.mapRequired("TbeVersion", Stub.TbeVersion);
+    IO.mapOptional("SoName", Stub.SoName);
+    IO.mapRequired("Arch", (ELFArchMapper &)Stub.Arch);
+    IO.mapOptional("NeededLibs", Stub.NeededLibs);
+    IO.mapRequired("Symbols", Stub.Symbols);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+Expected<std::unique_ptr<ELFStub>> elfabi::readTBEFromBuffer(StringRef Buf) {
+  yaml::Input YamlIn(Buf);
+  std::unique_ptr<ELFStub> Stub(new ELFStub());
+  YamlIn >> *Stub;
+  if (std::error_code Err = YamlIn.error())
+    return createStringError(Err, "YAML failed reading as TBE");
+
+  return std::move(Stub);
+}
+
+Error elfabi::writeTBEToOutputStream(raw_ostream &OS, const ELFStub &Stub) {
+  yaml::Output YamlOut(OS, NULL, /*WrapColumn =*/0);
+
+  YamlOut << const_cast<ELFStub &>(Stub);
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index f8de7ca73924..c5a28d4f1c08 100644
--- a/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -96,7 +96,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
   // Handle when no input or output is specified
   if (Args.hasArgNoClaim(OPT_INPUT) ||
       (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) {
-    Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false);
+    Table.PrintHelp(outs(), "llvm-dlltool [options] file...", "llvm-dlltool",
+                    false);
     llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm, arm64\n";
     return 1;
   }
diff --git a/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index d636dca7a2c7..64f4fe423f25 100644
--- a/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/contrib/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -123,7 +123,7 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
 
   // Handle /help
   if (Args.hasArg(OPT_help)) {
-    Table.PrintHelp(outs(), ArgsArr[0], "LLVM Lib");
+    Table.PrintHelp(outs(), "llvm-lib [options] file...", "LLVM Lib");
     return 0;
   }
 
diff --git a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index b622d018478a..c795866ec0f2 100644
--- a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -16,7 +16,7 @@
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "AggressiveInstCombineInternal.h"
 #include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Transforms/AggressiveInstCombine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
@@ -59,6 +59,99 @@ public:
 };
 } // namespace
 
+/// Match a pattern for a bitwise rotate operation that partially guards
+/// against undefined behavior by branching around the rotation when the shift
+/// amount is 0.
+static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+  if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
+    return false;
+
+  // As with the one-use checks below, this is not strictly necessary, but we
+  // are being cautious to avoid potential perf regressions on targets that
+  // do not actually have a rotate instruction (where the funnel shift would be
+  // expanded back into math/shift/logic ops).
+  if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
+    return false;
+
+  // Match V to funnel shift left/right and capture the source operand and
+  // shift amount in X and Y.
+  auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
+    Value *L0, *L1, *R0, *R1;
+    unsigned Width = V->getType()->getScalarSizeInBits();
+    auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
+
+    // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
+    auto RotL = m_OneUse(
+        m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), m_LShr(m_Value(R0), Sub)));
+    if (RotL.match(V) && L0 == R0 && L1 == R1) {
+      X = L0;
+      Y = L1;
+      return Intrinsic::fshl;
+    }
+
+    // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
+    auto RotR = m_OneUse(
+        m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), m_Shl(m_Value(R0), Sub)));
+    if (RotR.match(V) && L0 == R0 && L1 == R1) {
+      X = L0;
+      Y = L1;
+      return Intrinsic::fshr;
+    }
+
+    return Intrinsic::not_intrinsic;
+  };
+
+  // One phi operand must be a rotate operation, and the other phi operand must
+  // be the source value of that rotate operation:
+  // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
+  PHINode &Phi = cast<PHINode>(I);
+  Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
+  Value *RotSrc, *RotAmt;
+  Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
+  if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
+    IID = matchRotate(P1, RotSrc, RotAmt);
+    if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
+      return false;
+    assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
+           "Pattern must match funnel shift left or right");
+  }
+
+  // The incoming block with our source operand must be the "guard" block.
+  // That must contain a cmp+branch to avoid the rotate when the shift amount
+  // is equal to 0. The other incoming block is the block with the rotate.
+  BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
+  BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
+  Instruction *TermI = GuardBB->getTerminator();
+  BasicBlock *TrueBB, *FalseBB;
+  ICmpInst::Predicate Pred;
+  if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB,
+                         FalseBB)))
+    return false;
+
+  BasicBlock *PhiBB = Phi.getParent();
+  if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+    return false;
+
+  // We matched a variation of this IR pattern:
+  // GuardBB:
+  //   %cmp = icmp eq i32 %RotAmt, 0
+  //   br i1 %cmp, label %PhiBB, label %RotBB
+  // RotBB:
+  //   %sub = sub i32 32, %RotAmt
+  //   %shr = lshr i32 %X, %sub
+  //   %shl = shl i32 %X, %RotAmt
+  //   %rot = or i32 %shr, %shl
+  //   br label %PhiBB
+  // PhiBB:
+  //   %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
+  // -->
+  // llvm.fshl.i32(i32 %X, i32 %RotAmt)
+  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+  Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
+  Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
+  return true;
+}
+
 /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
 /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
 /// of 'and' ops, then we also need to capture the fact that we saw an
@@ -69,9 +162,9 @@ struct MaskOps {
   bool MatchAndChain;
   bool FoundAnd1;
 
-  MaskOps(unsigned BitWidth, bool MatchAnds) :
-      Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
-      MatchAndChain(MatchAnds), FoundAnd1(false) {}
+  MaskOps(unsigned BitWidth, bool MatchAnds)
+      : Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
+        MatchAndChain(MatchAnds), FoundAnd1(false) {}
 };
 
 /// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
@@ -152,8 +245,8 @@ static bool foldAnyOrAllBitsSet(Instruction &I) {
   IRBuilder<> Builder(&I);
   Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
   Value *And = Builder.CreateAnd(MOps.Root, Mask);
-  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask) :
-                                 Builder.CreateIsNotNull(And);
+  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
+                               : Builder.CreateIsNotNull(And);
   Value *Zext = Builder.CreateZExt(Cmp, I.getType());
   I.replaceAllUsesWith(Zext);
   return true;
@@ -174,8 +267,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
     // Also, we want to avoid matching partial patterns.
     // TODO: It would be more efficient if we removed dead instructions
     // iteratively in this loop rather than waiting until the end.
-    for (Instruction &I : make_range(BB.rbegin(), BB.rend()))
+    for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
       MadeChange |= foldAnyOrAllBitsSet(I);
+      MadeChange |= foldGuardedRotateToFunnelShift(I);
+    }
   }
 
   // We're done with transforms, so remove dead instructions.
diff --git a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index 199374cdabf3..f3c8bde9f8ff 100644
--- a/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -13,6 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TRANSFORMS_AGGRESSIVEINSTCOMBINE_COMBINEINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_AGGRESSIVEINSTCOMBINE_COMBINEINTERNAL_H
+
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -119,3 +122,5 @@ private:
   void ReduceExpressionDag(Type *SclTy);
 };
 } // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index dfe05c4b2a5e..58f952b54f3a 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -157,7 +157,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
   SmallPtrSet<Instruction *, 8> Terminators;
   for (BasicBlock &B : *F) {
     auto *TI = B.getTerminator();
-    if (TI->getNumSuccessors() == 0 && !TI->isExceptional() &&
+    if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
         !isa<UnreachableInst>(TI))
       Terminators.insert(TI);
   }
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index cf63b678b618..4cb0a52961cc 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -49,7 +49,7 @@ public:
   BlockToIndexMapping(Function &F) {
     for (BasicBlock &BB : F)
       V.push_back(&BB);
-    llvm::sort(V.begin(), V.end());
+    llvm::sort(V);
   }
 
   size_t blockToIndex(BasicBlock *BB) const {
@@ -546,7 +546,8 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
         } else {
           // For all other values, the spill is placed immediately after
           // the definition.
-          assert(!isa<TerminatorInst>(E.def()) && "unexpected terminator");
+          assert(!cast<Instruction>(E.def())->isTerminator() &&
+                 "unexpected terminator");
           InsertPt = cast<Instruction>(E.def())->getNextNode();
         }
 
@@ -600,7 +601,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
 }
 
 // Sets the unwind edge of an instruction to a particular successor.
-static void setUnwindEdgeTo(TerminatorInst *TI, BasicBlock *Succ) {
+static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
   if (auto *II = dyn_cast<InvokeInst>(TI))
     II->setUnwindDest(Succ);
   else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 49acc5e93a39..9eeceb217ba8 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -459,7 +459,7 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
   DenseMap<Value *, Value *> ResolvedValues;
 
   Instruction *I = InitialInst;
-  while (isa<TerminatorInst>(I)) {
+  while (I->isTerminator()) {
     if (isa<ReturnInst>(I)) {
       if (I != InitialInst)
         ReplaceInstWithInst(InitialInst, I->clone());
@@ -538,43 +538,92 @@ static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) {
   CoroBegin->eraseFromParent();
 }
 
-// look for a very simple pattern
-//    coro.save
-//    no other calls
-//    resume or destroy call
-//    coro.suspend
-//
-// If there are other calls between coro.save and coro.suspend, they can
-// potentially resume or destroy the coroutine, so it is unsafe to eliminate a
-// suspend point.
-static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
-                                 CoroBeginInst *CoroBegin) {
-  auto *Save = Suspend->getCoroSave();
-  auto *BB = Suspend->getParent();
-  if (BB != Save->getParent())
-    return false;
+// SimplifySuspendPoint needs to check that there is no calls between
+// coro_save and coro_suspend, since any of the calls may potentially resume
+// the coroutine and if that is the case we cannot eliminate the suspend point.
+static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
+  for (Instruction *I = From; I != To; I = I->getNextNode()) {
+    // Assume that no intrinsic can resume the coroutine.
+    if (isa<IntrinsicInst>(I))
+      continue;
 
-  CallSite SingleCallSite;
+    if (CallSite(I))
+      return true;
+  }
+  return false;
+}
 
-  // Check that we have only one CallSite.
-  for (Instruction *I = Save->getNextNode(); I != Suspend;
-       I = I->getNextNode()) {
-    if (isa<CoroFrameInst>(I))
-      continue;
-    if (isa<CoroSubFnInst>(I))
-      continue;
-    if (CallSite CS = CallSite(I)) {
-      if (SingleCallSite)
-        return false;
-      else
-        SingleCallSite = CS;
-    }
+static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
+  SmallPtrSet<BasicBlock *, 8> Set;
+  SmallVector<BasicBlock *, 8> Worklist;
+
+  Set.insert(SaveBB);
+  Worklist.push_back(ResDesBB);
+
+  // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
+  // returns a token consumed by suspend instruction, all blocks in between
+  // will have to eventually hit SaveBB when going backwards from ResDesBB.
+  while (!Worklist.empty()) {
+    auto *BB = Worklist.pop_back_val();
+    Set.insert(BB);
+    for (auto *Pred : predecessors(BB))
+      if (Set.count(Pred) == 0)
+        Worklist.push_back(Pred);
   }
-  auto *CallInstr = SingleCallSite.getInstruction();
-  if (!CallInstr)
+
+  // SaveBB and ResDesBB are checked separately in hasCallsBetween.
+  Set.erase(SaveBB);
+  Set.erase(ResDesBB);
+
+  for (auto *BB : Set)
+    if (hasCallsInBlockBetween(BB->getFirstNonPHI(), nullptr))
+      return true;
+
+  return false;
+}
+
+static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
+  auto *SaveBB = Save->getParent();
+  auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
+
+  if (SaveBB == ResumeOrDestroyBB)
+    return hasCallsInBlockBetween(Save->getNextNode(), ResumeOrDestroy);
+
+  // Any calls from Save to the end of the block?
+  if (hasCallsInBlockBetween(Save->getNextNode(), nullptr))
+    return true;
+
+  // Any calls from begging of the block up to ResumeOrDestroy?
+  if (hasCallsInBlockBetween(ResumeOrDestroyBB->getFirstNonPHI(),
+                             ResumeOrDestroy))
+    return true;
+
+  // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
+  if (hasCallsInBlocksBetween(SaveBB, ResumeOrDestroyBB))
+    return true;
+
+  return false;
+}
+
+// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
+// suspend point and replace it with nornal control flow.
+static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
+                                 CoroBeginInst *CoroBegin) {
+  Instruction *Prev = Suspend->getPrevNode();
+  if (!Prev) {
+    auto *Pred = Suspend->getParent()->getSinglePredecessor();
+    if (!Pred)
+      return false;
+    Prev = Pred->getTerminator();
+  }
+
+  CallSite CS{Prev};
+  if (!CS)
     return false;
 
-  auto *Callee = SingleCallSite.getCalledValue()->stripPointerCasts();
+  auto *CallInstr = CS.getInstruction();
+
+  auto *Callee = CS.getCalledValue()->stripPointerCasts();
 
   // See if the callsite is for resumption or destruction of the coroutine.
   auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
@@ -585,6 +634,13 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
   if (SubFn->getFrame() != CoroBegin)
     return false;
 
+  // See if the transformation is safe. Specifically, see if there are any
+  // calls in between Save and CallInstr. They can potenitally resume the
+  // coroutine rendering this optimization unsafe.
+  auto *Save = Suspend->getCoroSave();
+  if (hasCallsBetween(Save, CallInstr))
+    return false;
+
   // Replace llvm.coro.suspend with the value that results in resumption over
   // the resume or cleanup path.
   Suspend->replaceAllUsesWith(SubFn->getRawIndex());
@@ -592,8 +648,20 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
   Save->eraseFromParent();
 
   // No longer need a call to coro.resume or coro.destroy.
+  if (auto *Invoke = dyn_cast<InvokeInst>(CallInstr)) {
+    BranchInst::Create(Invoke->getNormalDest(), Invoke);
+  }
+
+  // Grab the CalledValue from CS before erasing the CallInstr.
+  auto *CalledValue = CS.getCalledValue();
   CallInstr->eraseFromParent();
 
+  // If no more users remove it. Usually it is a bitcast of SubFn.
+  if (CalledValue != SubFn && CalledValue->user_empty())
+    if (auto *I = dyn_cast<Instruction>(CalledValue))
+      I->eraseFromParent();
+
+  // Now we are good to remove SubFn.
   if (SubFn->user_empty())
     SubFn->eraseFromParent();
 
diff --git a/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 731faeb5dce4..cf84f916e24b 100644
--- a/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Coroutines.h"
+#include "llvm-c/Transforms/Coroutines.h"
 #include "CoroInstr.h"
 #include "CoroInternal.h"
 #include "llvm/ADT/SmallVector.h"
@@ -344,3 +345,19 @@ void coro::Shape::buildFrom(Function &F) {
   for (CoroSaveInst *CoroSave : UnusedCoroSaves)
     CoroSave->eraseFromParent();
 }
+
+void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCoroEarlyPass());
+}
+
+void LLVMAddCoroSplitPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCoroSplitPass());
+}
+
+void LLVMAddCoroElidePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCoroElidePass());
+}
+
+void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCoroCleanupPass());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 3b735ddd192e..07138718ce2c 100644
--- a/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -150,7 +150,7 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
   // declarations.
   if (Callee && !Callee->isDeclaration() &&
       CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
-    return InlineCost::getAlways();
+    return InlineCost::getAlways("always inliner");
 
-  return InlineCost::getNever();
+  return InlineCost::getNever("always inliner");
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f2c2b55b1c5b..4663de0b049e 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -49,6 +49,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -213,7 +214,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
 
   // Create the new function body and insert it into the module.
-  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
+                                  F->getName());
   NF->copyAttributesFrom(F);
 
   // Patch the pointer to LLVM function in debug info descriptor.
@@ -808,6 +810,21 @@ static bool canPaddingBeAccessed(Argument *arg) {
   return false;
 }
 
+static bool areFunctionArgsABICompatible(
+    const Function &F, const TargetTransformInfo &TTI,
+    SmallPtrSetImpl<Argument *> &ArgsToPromote,
+    SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
+  for (const Use &U : F.uses()) {
+    CallSite CS(U.getUser());
+    const Function *Caller = CS.getCaller();
+    const Function *Callee = CS.getCalledFunction();
+    if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
+        !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform))
+      return false;
+  }
+  return true;
+}
+
 /// PromoteArguments - This method checks the specified function to see if there
 /// are any promotable arguments and if it is safe to promote the function (for
 /// example, all callers are direct).  If safe to promote some arguments, it
@@ -816,7 +833,8 @@ static Function *
 promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
                  unsigned MaxElements,
                  Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
-                     ReplaceCallSite) {
+                     ReplaceCallSite,
+                 const TargetTransformInfo &TTI) {
   // Don't perform argument promotion for naked functions; otherwise we can end
   // up removing parameters that are seemingly 'not used' as they are referred
   // to in the assembly.
@@ -845,7 +863,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
 
   // Second check: make sure that all callers are direct callers.  We can't
   // transform functions that have indirect callers.  Also see if the function
-  // is self-recursive.
+  // is self-recursive and check that target features are compatible.
   bool isSelfRecursive = false;
   for (Use &U : F->uses()) {
     CallSite CS(U.getUser());
@@ -954,6 +972,10 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
   if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
     return nullptr;
 
+  if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
+                                    ByValArgsToTransform))
+    return nullptr;
+
   return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
 }
 
@@ -979,7 +1001,9 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
         return FAM.getResult<AAManager>(F);
       };
 
-      Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None);
+      const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
+      Function *NewF =
+          promoteArguments(&OldF, AARGetter, MaxElements, None, TTI);
       if (!NewF)
         continue;
       LocalChange = true;
@@ -1017,6 +1041,7 @@ struct ArgPromotion : public CallGraphSCCPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
     getAAResultsAnalysisUsage(AU);
     CallGraphSCCPass::getAnalysisUsage(AU);
   }
@@ -1042,6 +1067,7 @@ INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
                     "Promote 'by reference' arguments to scalars", false, false)
 
@@ -1078,8 +1104,10 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
         CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
       };
 
+      const TargetTransformInfo &TTI =
+          getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF);
       if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
-                                            {ReplaceCallSite})) {
+                                            {ReplaceCallSite}, TTI)) {
         LocalChange = true;
 
         // Update the call graph for the newly promoted function.
diff --git a/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index d642445b35de..de62cfc0c1db 100644
--- a/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -345,6 +345,9 @@ private:
   void visitInst(Instruction &I,
                  DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
                  SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+    // Simply bail if this instruction has no user.
+    if (I.use_empty())
+      return;
     auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
     ChangedValues[RegI] = getOverdefinedVal();
   }
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index e0b1037053f0..81f3634eaf28 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -40,7 +40,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "constmerge"
 
-STATISTIC(NumMerged, "Number of global constants merged");
+STATISTIC(NumIdenticalMerged, "Number of identical global constants merged");
 
 /// Find values that are marked as llvm.used.
 static void FindUsedValues(GlobalVariable *LLVMUsed,
@@ -91,6 +91,37 @@ static unsigned getAlignment(GlobalVariable *GV) {
   return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
 }
 
+enum class CanMerge { No, Yes };
+static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
+  if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
+    return CanMerge::No;
+  if (hasMetadataOtherThanDebugLoc(Old))
+    return CanMerge::No;
+  assert(!hasMetadataOtherThanDebugLoc(New));
+  if (!Old->hasGlobalUnnamedAddr())
+    New->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+  return CanMerge::Yes;
+}
+
+static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
+  Constant *NewConstant = New;
+
+  LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @"
+                    << New->getName() << "\n");
+
+  // Bump the alignment if necessary.
+  if (Old->getAlignment() || New->getAlignment())
+    New->setAlignment(std::max(getAlignment(Old), getAlignment(New)));
+
+  copyDebugLocMetadata(Old, New);
+  Old->replaceAllUsesWith(NewConstant);
+
+  // Delete the global value from the module.
+  assert(Old->hasLocalLinkage() &&
+         "Refusing to delete an externally visible global variable.");
+  Old->eraseFromParent();
+}
+
 static bool mergeConstants(Module &M) {
   // Find all the globals that are marked "used".  These cannot be merged.
   SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -100,17 +131,18 @@ static bool mergeConstants(Module &M) {
   // Map unique constants to globals.
   DenseMap<Constant *, GlobalVariable *> CMap;
 
-  // Replacements - This vector contains a list of replacements to perform.
-  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+  SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32>
+      SameContentReplacements;
 
-  bool MadeChange = false;
+  size_t ChangesMade = 0;
+  size_t OldChangesMade = 0;
 
   // Iterate constant merging while we are still making progress.  Merging two
   // constants together may allow us to merge other constants together if the
   // second level constants have initializers which point to the globals that
   // were just merged.
   while (true) {
-    // First: Find the canonical constants others will be merged with.
+    // Find the canonical constants others will be merged with.
     for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
          GVI != E; ) {
       GlobalVariable *GV = &*GVI++;
@@ -119,6 +151,7 @@ static bool mergeConstants(Module &M) {
       GV->removeDeadConstantUsers();
       if (GV->use_empty() && GV->hasLocalLinkage()) {
         GV->eraseFromParent();
+        ++ChangesMade;
         continue;
       }
 
@@ -148,12 +181,16 @@ static bool mergeConstants(Module &M) {
       // If this is the first constant we find or if the old one is local,
       // replace with the current one. If the current is externally visible
       // it cannot be replace, but can be the canonical constant we merge with.
-      if (!Slot || IsBetterCanonical(*GV, *Slot))
+      bool FirstConstantFound = !Slot;
+      if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) {
         Slot = GV;
+        LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
+                          << (FirstConstantFound ? "\n" : " (updated)\n"));
+      }
     }
 
-    // Second: identify all globals that can be merged together, filling in
-    // the Replacements vector.  We cannot do the replacement in this pass
+    // Identify all globals that can be merged together, filling in the
+    // SameContentReplacements vector. We cannot do the replacement in this pass
     // because doing so may cause initializers of other globals to be rewritten,
     // invalidating the Constant* pointers in CMap.
     for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
@@ -174,54 +211,43 @@ static bool mergeConstants(Module &M) {
       Constant *Init = GV->getInitializer();
 
       // Check to see if the initializer is already known.
-      GlobalVariable *Slot = CMap[Init];
-
-      if (!Slot || Slot == GV)
+      auto Found = CMap.find(Init);
+      if (Found == CMap.end())
         continue;
 
-      if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
+      GlobalVariable *Slot = Found->second;
+      if (Slot == GV)
         continue;
 
-      if (hasMetadataOtherThanDebugLoc(GV))
+      if (makeMergeable(GV, Slot) == CanMerge::No)
         continue;
 
-      if (!GV->hasGlobalUnnamedAddr())
-        Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
-
       // Make all uses of the duplicate constant use the canonical version.
-      Replacements.push_back(std::make_pair(GV, Slot));
+      LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @"
+                        << Slot->getName() << "\n");
+      SameContentReplacements.push_back(std::make_pair(GV, Slot));
     }
 
-    if (Replacements.empty())
-      return MadeChange;
-    CMap.clear();
-
     // Now that we have figured out which replacements must be made, do them all
     // now.  This avoid invalidating the pointers in CMap, which are unneeded
     // now.
-    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
-      // Bump the alignment if necessary.
-      if (Replacements[i].first->getAlignment() ||
-          Replacements[i].second->getAlignment()) {
-        Replacements[i].second->setAlignment(
-            std::max(getAlignment(Replacements[i].first),
-                     getAlignment(Replacements[i].second)));
-      }
-
-      copyDebugLocMetadata(Replacements[i].first, Replacements[i].second);
-
-      // Eliminate any uses of the dead global.
-      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
-
-      // Delete the global value from the module.
-      assert(Replacements[i].first->hasLocalLinkage() &&
-             "Refusing to delete an externally visible global variable.");
-      Replacements[i].first->eraseFromParent();
+    for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) {
+      GlobalVariable *Old = SameContentReplacements[i].first;
+      GlobalVariable *New = SameContentReplacements[i].second;
+      replace(M, Old, New);
+      ++ChangesMade;
+      ++NumIdenticalMerged;
     }
 
-    NumMerged += Replacements.size();
-    Replacements.clear();
+    if (ChangesMade == OldChangesMade)
+      break;
+    OldChangesMade = ChangesMade;
+
+    SameContentReplacements.clear();
+    CMap.clear();
   }
+
+  return ChangesMade;
 }
 
 PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index cd2bd734eb26..cb30e8f46a54 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -24,7 +24,6 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -165,7 +164,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
   unsigned NumArgs = Params.size();
 
   // Create the new function body and insert it into the module...
-  Function *NF = Function::Create(NFTy, Fn.getLinkage());
+  Function *NF = Function::Create(NFTy, Fn.getLinkage(), Fn.getAddressSpace());
   NF->copyAttributesFrom(&Fn);
   NF->setComdat(Fn.getComdat());
   Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
@@ -289,16 +288,21 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
     return false;
 
   SmallVector<unsigned, 8> UnusedArgs;
+  bool Changed = false;
+
   for (Argument &Arg : Fn.args()) {
-    if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+    if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) {
+      if (Arg.isUsedByMetadata()) {
+        Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
+        Changed = true;
+      }
       UnusedArgs.push_back(Arg.getArgNo());
+    }
   }
 
   if (UnusedArgs.empty())
     return false;
 
-  bool Changed = false;
-
   for (Use &U : Fn.uses()) {
     CallSite CS(U.getUser());
     if (!CS || !CS.isCallee(&U))
@@ -859,7 +863,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
     return false;
 
   // Create the new function body and insert it into the module...
-  Function *NF = Function::Create(NFTy, F->getLinkage());
+  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace());
   NF->copyAttributesFrom(F);
   NF->setComdat(F->getComdat());
   NF->setAttributes(NewPAL);
@@ -949,16 +953,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
     ArgAttrVec.clear();
 
     Instruction *New = NewCS.getInstruction();
-    if (!Call->use_empty()) {
+    if (!Call->use_empty() || Call->isUsedByMetadata()) {
       if (New->getType() == Call->getType()) {
         // Return type not changed? Just replace users then.
         Call->replaceAllUsesWith(New);
         New->takeName(Call);
       } else if (New->getType()->isVoidTy()) {
-        // Our return value has uses, but they will get removed later on.
-        // Replace by null for now.
+        // If the return value is dead, replace any uses of it with undef
+        // (any non-debug value uses will get removed later on).
         if (!Call->getType()->isX86_MMXTy())
-          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+          Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
       } else {
         assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
                "Return type changed, but not into a void. The old return type"
@@ -1018,10 +1022,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
       I2->takeName(&*I);
       ++I2;
     } else {
-      // If this argument is dead, replace any uses of it with null constants
-      // (these are guaranteed to become unused later on).
+      // If this argument is dead, replace any uses of it with undef
+      // (any non-debug value uses will get removed later on).
       if (!I->getType()->isX86_MMXTy())
-        I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+        I->replaceAllUsesWith(UndefValue::get(I->getType()));
     }
 
   // If we change the return value of the function we must rewrite any return
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index d45a88323910..a744d7f2d2d9 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -135,6 +135,7 @@ namespace {
           llvm::Value *Declaration;
           if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
             Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                           CurI->getAddressSpace(),
                                            CurI->getName(), &M);
 
           } else {
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 37273f975417..4dc1529ddbf5 100644
--- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -58,6 +58,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
       .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
       .Case("sanitize_memory", Attribute::SanitizeMemory)
       .Case("sanitize_thread", Attribute::SanitizeThread)
+      .Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening)
       .Case("ssp", Attribute::StackProtect)
       .Case("sspreq", Attribute::StackProtectReq)
       .Case("sspstrong", Attribute::StackProtectStrong)
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 010b0a29807d..4e2a82b56eec 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Type.h"
@@ -66,6 +67,7 @@ using namespace llvm;
 
 STATISTIC(NumReadNone, "Number of functions marked readnone");
 STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumWriteOnly, "Number of functions marked writeonly");
 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
 STATISTIC(NumReturned, "Number of arguments marked returned");
 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
@@ -113,27 +115,30 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
     if (AliasAnalysis::onlyReadsMemory(MRB))
       return MAK_ReadOnly;
 
-    // Conservatively assume it writes to memory.
+    if (AliasAnalysis::doesNotReadMemory(MRB))
+      return MAK_WriteOnly;
+
+    // Conservatively assume it reads and writes to memory.
     return MAK_MayWrite;
   }
 
   // Scan the function body for instructions that may read or write memory.
   bool ReadsMemory = false;
+  bool WritesMemory = false;
   for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
     Instruction *I = &*II;
 
     // Some instructions can be ignored even if they read or write memory.
     // Detect these now, skipping to the next instruction if one is found.
-    CallSite CS(cast<Value>(I));
-    if (CS) {
+    if (auto *Call = dyn_cast<CallBase>(I)) {
       // Ignore calls to functions in the same SCC, as long as the call sites
       // don't have operand bundles.  Calls with operand bundles are allowed to
       // have memory effects not described by the memory effects of the call
       // target.
-      if (!CS.hasOperandBundles() && CS.getCalledFunction() &&
-          SCCNodes.count(CS.getCalledFunction()))
+      if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
+          SCCNodes.count(Call->getCalledFunction()))
         continue;
-      FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
+      FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call);
       ModRefInfo MRI = createModRefInfo(MRB);
 
       // If the call doesn't access memory, we're done.
@@ -141,9 +146,9 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
         continue;
 
       if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
-        // The call could access any memory. If that includes writes, give up.
+        // The call could access any memory. If that includes writes, note it.
         if (isModSet(MRI))
-          return MAK_MayWrite;
+          WritesMemory = true;
         // If it reads, note it.
         if (isRefSet(MRI))
           ReadsMemory = true;
@@ -152,7 +157,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
 
       // Check whether all pointer arguments point to local memory, and
       // ignore calls that only access local memory.
-      for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+      for (CallSite::arg_iterator CI = Call->arg_begin(), CE = Call->arg_end();
            CI != CE; ++CI) {
         Value *Arg = *CI;
         if (!Arg->getType()->isPtrOrPtrVectorTy())
@@ -160,7 +165,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
 
         AAMDNodes AAInfo;
         I->getAAMetadata(AAInfo);
-        MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
+        MemoryLocation Loc(Arg, LocationSize::unknown(), AAInfo);
 
         // Skip accesses to local or constant memory as they don't impact the
         // externally visible mod/ref behavior.
@@ -168,8 +173,8 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
           continue;
 
         if (isModSet(MRI))
-          // Writes non-local memory.  Give up.
-          return MAK_MayWrite;
+          // Writes non-local memory.
+          WritesMemory = true;
         if (isRefSet(MRI))
           // Ok, it reads non-local memory.
           ReadsMemory = true;
@@ -198,14 +203,21 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
 
     // Any remaining instructions need to be taken seriously!  Check if they
     // read or write memory.
-    if (I->mayWriteToMemory())
-      // Writes memory.  Just give up.
-      return MAK_MayWrite;
+    //
+    // Writes memory, remember that.
+    WritesMemory |= I->mayWriteToMemory();
 
     // If this instruction may read memory, remember that.
     ReadsMemory |= I->mayReadFromMemory();
   }
 
+  if (WritesMemory) { 
+    if (!ReadsMemory)
+      return MAK_WriteOnly;
+    else
+      return MAK_MayWrite;
+  }
+
   return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
 }
 
@@ -220,6 +232,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
   // Check if any of the functions in the SCC read or write memory.  If they
   // write memory then they can't be marked readnone or readonly.
   bool ReadsMemory = false;
+  bool WritesMemory = false;
   for (Function *F : SCCNodes) {
     // Call the callable parameter to look up AA results for this function.
     AAResults &AAR = AARGetter(*F);
@@ -234,6 +247,9 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
     case MAK_ReadOnly:
       ReadsMemory = true;
       break;
+    case MAK_WriteOnly:
+      WritesMemory = true;
+      break;
     case MAK_ReadNone:
       // Nothing to do!
       break;
@@ -243,6 +259,9 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
   // Success!  Functions in this SCC do not access memory, or only read memory.
   // Give them the appropriate attribute.
   bool MadeChange = false;
+
+  assert(!(ReadsMemory && WritesMemory) &&
+          "Function marked read-only and write-only");
   for (Function *F : SCCNodes) {
     if (F->doesNotAccessMemory())
       // Already perfect!
@@ -252,16 +271,32 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
       // No change.
       continue;
 
+    if (F->doesNotReadMemory() && WritesMemory)
+      continue;
+
     MadeChange = true;
 
     // Clear out any existing attributes.
     F->removeFnAttr(Attribute::ReadOnly);
     F->removeFnAttr(Attribute::ReadNone);
+    F->removeFnAttr(Attribute::WriteOnly);
+
+    if (!WritesMemory && !ReadsMemory) {
+      // Clear out any "access range attributes" if readnone was deduced.
+      F->removeFnAttr(Attribute::ArgMemOnly);
+      F->removeFnAttr(Attribute::InaccessibleMemOnly);
+      F->removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+    }
 
     // Add in the new attribute.
-    F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+    if (WritesMemory && !ReadsMemory)
+      F->addFnAttr(Attribute::WriteOnly);
+    else
+      F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
 
-    if (ReadsMemory)
+    if (WritesMemory && !ReadsMemory)
+      ++NumWriteOnly;
+    else if (ReadsMemory)
       ++NumReadOnly;
     else
       ++NumReadNone;
@@ -1272,13 +1307,14 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
   // If all of the calls in F are identifiable and are to norecurse functions, F
   // is norecurse. This check also detects self-recursion as F is not currently
   // marked norecurse, so any called from F to F will not be marked norecurse.
-  for (Instruction &I : instructions(*F))
-    if (auto CS = CallSite(&I)) {
-      Function *Callee = CS.getCalledFunction();
-      if (!Callee || Callee == F || !Callee->doesNotRecurse())
-        // Function calls a potentially recursive function.
-        return false;
-    }
+  for (auto &BB : *F)
+    for (auto &I : BB.instructionsWithoutDebug())
+      if (auto CS = CallSite(&I)) {
+        Function *Callee = CS.getCalledFunction();
+        if (!Callee || Callee == F || !Callee->doesNotRecurse())
+          // Function calls a potentially recursive function.
+          return false;
+      }
 
   // Every call was to a non-recursive function other than this function, and
   // we have no indirect recursion as the SCC size is one. This function cannot
@@ -1286,6 +1322,31 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
   return setDoesNotRecurse(*F);
 }
 
+template <typename AARGetterT>
+static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+                                   bool HasUnknownCall) {
+  bool Changed = false;
+
+  // Bail if the SCC only contains optnone functions.
+  if (SCCNodes.empty())
+    return Changed;
+
+  Changed |= addArgumentReturnedAttrs(SCCNodes);
+  Changed |= addReadAttrs(SCCNodes, AARGetter);
+  Changed |= addArgumentAttrs(SCCNodes);
+
+  // If we have no external nodes participating in the SCC, we can deduce some
+  // more precise attributes as well.
+  if (!HasUnknownCall) {
+    Changed |= addNoAliasAttrs(SCCNodes);
+    Changed |= addNonNullAttrs(SCCNodes);
+    Changed |= inferAttrsFromFunctionBodies(SCCNodes);
+    Changed |= addNoRecurseAttrs(SCCNodes);
+  }
+
+  return Changed;
+}
+
 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
                                                   CGSCCAnalysisManager &AM,
                                                   LazyCallGraph &CG,
@@ -1328,21 +1389,10 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
     SCCNodes.insert(&F);
   }
 
-  bool Changed = false;
-  Changed |= addArgumentReturnedAttrs(SCCNodes);
-  Changed |= addReadAttrs(SCCNodes, AARGetter);
-  Changed |= addArgumentAttrs(SCCNodes);
-
-  // If we have no external nodes participating in the SCC, we can deduce some
-  // more precise attributes as well.
-  if (!HasUnknownCall) {
-    Changed |= addNoAliasAttrs(SCCNodes);
-    Changed |= addNonNullAttrs(SCCNodes);
-    Changed |= inferAttrsFromFunctionBodies(SCCNodes);
-    Changed |= addNoRecurseAttrs(SCCNodes);
-  }
+  if (deriveAttrsInPostOrder(SCCNodes, AARGetter, HasUnknownCall))
+    return PreservedAnalyses::none();
 
-  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+  return PreservedAnalyses::all();
 }
 
 namespace {
@@ -1382,7 +1432,6 @@ Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
 
 template <typename AARGetterT>
 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
-  bool Changed = false;
 
   // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
   // whether a given CallGraphNode is in this SCC. Also track whether there are
@@ -1403,24 +1452,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
     SCCNodes.insert(F);
   }
 
-  // Skip it if the SCC only contains optnone functions.
-  if (SCCNodes.empty())
-    return Changed;
-
-  Changed |= addArgumentReturnedAttrs(SCCNodes);
-  Changed |= addReadAttrs(SCCNodes, AARGetter);
-  Changed |= addArgumentAttrs(SCCNodes);
-
-  // If we have no external nodes participating in the SCC, we can deduce some
-  // more precise attributes as well.
-  if (!ExternalNode) {
-    Changed |= addNoAliasAttrs(SCCNodes);
-    Changed |= addNonNullAttrs(SCCNodes);
-    Changed |= inferAttrsFromFunctionBodies(SCCNodes);
-    Changed |= addNoRecurseAttrs(SCCNodes);
-  }
-
-  return Changed;
+  return deriveAttrsInPostOrder(SCCNodes, AARGetter, ExternalNode);
 }
 
 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index ed97d342f348..1223a23512ed 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -60,8 +60,17 @@ using namespace llvm;
 
 #define DEBUG_TYPE "function-import"
 
-STATISTIC(NumImportedFunctions, "Number of functions imported");
-STATISTIC(NumImportedGlobalVars, "Number of global variables imported");
+STATISTIC(NumImportedFunctionsThinLink,
+          "Number of functions thin link decided to import");
+STATISTIC(NumImportedHotFunctionsThinLink,
+          "Number of hot functions thin link decided to import");
+STATISTIC(NumImportedCriticalFunctionsThinLink,
+          "Number of critical functions thin link decided to import");
+STATISTIC(NumImportedGlobalVarsThinLink,
+          "Number of global variables thin link decided to import");
+STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
+STATISTIC(NumImportedGlobalVars,
+          "Number of global variables imported in backend");
 STATISTIC(NumImportedModules, "Number of modules imported from");
 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
@@ -107,6 +116,10 @@ static cl::opt<float> ImportColdMultiplier(
 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
                                   cl::desc("Print imported functions"));
 
+static cl::opt<bool> PrintImportFailures(
+    "print-import-failures", cl::init(false), cl::Hidden,
+    cl::desc("Print information for functions rejected for importing"));
+
 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
                                  cl::desc("Compute dead symbols"));
 
@@ -163,13 +176,18 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
 static const GlobalValueSummary *
 selectCallee(const ModuleSummaryIndex &Index,
              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
-             unsigned Threshold, StringRef CallerModulePath) {
+             unsigned Threshold, StringRef CallerModulePath,
+             FunctionImporter::ImportFailureReason &Reason,
+             GlobalValue::GUID GUID) {
+  Reason = FunctionImporter::ImportFailureReason::None;
   auto It = llvm::find_if(
       CalleeSummaryList,
       [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
         auto *GVSummary = SummaryPtr.get();
-        if (!Index.isGlobalValueLive(GVSummary))
+        if (!Index.isGlobalValueLive(GVSummary)) {
+          Reason = FunctionImporter::ImportFailureReason::NotLive;
           return false;
+        }
 
         // For SamplePGO, in computeImportForFunction the OriginalId
         // may have been used to locate the callee summary list (See
@@ -184,11 +202,15 @@ selectCallee(const ModuleSummaryIndex &Index,
         // When this happens, the logic for SamplePGO kicks in and
         // the static variable in 2) will be found, which needs to be
         // filtered out.
-        if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
+        if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind) {
+          Reason = FunctionImporter::ImportFailureReason::GlobalVar;
           return false;
-        if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
+        }
+        if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) {
+          Reason = FunctionImporter::ImportFailureReason::InterposableLinkage;
           // There is no point in importing these, we can't inline them
           return false;
+        }
 
         auto *Summary = cast<FunctionSummary>(GVSummary->getBaseObject());
 
@@ -204,14 +226,29 @@ selectCallee(const ModuleSummaryIndex &Index,
         // a local in another module.
         if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
             CalleeSummaryList.size() > 1 &&
-            Summary->modulePath() != CallerModulePath)
+            Summary->modulePath() != CallerModulePath) {
+          Reason =
+              FunctionImporter::ImportFailureReason::LocalLinkageNotInModule;
           return false;
+        }
 
-        if (Summary->instCount() > Threshold)
+        if (Summary->instCount() > Threshold) {
+          Reason = FunctionImporter::ImportFailureReason::TooLarge;
           return false;
+        }
 
-        if (Summary->notEligibleToImport())
+        // Skip if it isn't legal to import (e.g. may reference unpromotable
+        // locals).
+        if (Summary->notEligibleToImport()) {
+          Reason = FunctionImporter::ImportFailureReason::NotEligible;
           return false;
+        }
+
+        // Don't bother importing if we can't inline it anyway.
+        if (Summary->fflags().NoInline) {
+          Reason = FunctionImporter::ImportFailureReason::NoInline;
+          return false;
+        }
 
         return true;
       });
@@ -256,12 +293,25 @@ static void computeImportForReferencedGlobals(
 
     LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
 
+    // If this is a local variable, make sure we import the copy
+    // in the caller's module. The only time a local variable can
+    // share an entry in the index is if there is a local with the same name
+    // in another module that had the same source file name (in a different
+    // directory), where each was compiled in their own directory so there
+    // was not distinguishing path.
+    auto LocalNotInModule = [&](const GlobalValueSummary *RefSummary) -> bool {
+      return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
+             RefSummary->modulePath() != Summary.modulePath();
+    };
+
     for (auto &RefSummary : VI.getSummaryList())
-      if (RefSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind &&
-          !RefSummary->notEligibleToImport() &&
-          !GlobalValue::isInterposableLinkage(RefSummary->linkage()) &&
-          RefSummary->refs().empty()) {
-        ImportList[RefSummary->modulePath()].insert(VI.getGUID());
+      if (isa<GlobalVarSummary>(RefSummary.get()) &&
+          canImportGlobalVar(RefSummary.get()) &&
+          !LocalNotInModule(RefSummary.get())) {
+        auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
+        // Only update stat if we haven't already imported this variable.
+        if (ILI.second)
+          NumImportedGlobalVarsThinLink++;
         if (ExportLists)
           (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID());
         break;
@@ -269,6 +319,29 @@ static void computeImportForReferencedGlobals(
   }
 }
 
+static const char *
+getFailureName(FunctionImporter::ImportFailureReason Reason) {
+  switch (Reason) {
+  case FunctionImporter::ImportFailureReason::None:
+    return "None";
+  case FunctionImporter::ImportFailureReason::GlobalVar:
+    return "GlobalVar";
+  case FunctionImporter::ImportFailureReason::NotLive:
+    return "NotLive";
+  case FunctionImporter::ImportFailureReason::TooLarge:
+    return "TooLarge";
+  case FunctionImporter::ImportFailureReason::InterposableLinkage:
+    return "InterposableLinkage";
+  case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
+    return "LocalLinkageNotInModule";
+  case FunctionImporter::ImportFailureReason::NotEligible:
+    return "NotEligible";
+  case FunctionImporter::ImportFailureReason::NoInline:
+    return "NoInline";
+  }
+  llvm_unreachable("invalid reason");
+}
+
 /// Compute the list of functions to import for a given caller. Mark these
 /// imported functions and the symbols they reference in their source module as
 /// exported from their source module.
@@ -315,11 +388,17 @@ static void computeImportForFunction(
     const auto NewThreshold =
         Threshold * GetBonusMultiplier(Edge.second.getHotness());
 
-    auto IT = ImportThresholds.insert(
-        std::make_pair(VI.getGUID(), std::make_pair(NewThreshold, nullptr)));
+    auto IT = ImportThresholds.insert(std::make_pair(
+        VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
     bool PreviouslyVisited = !IT.second;
-    auto &ProcessedThreshold = IT.first->second.first;
-    auto &CalleeSummary = IT.first->second.second;
+    auto &ProcessedThreshold = std::get<0>(IT.first->second);
+    auto &CalleeSummary = std::get<1>(IT.first->second);
+    auto &FailureInfo = std::get<2>(IT.first->second);
+
+    bool IsHotCallsite =
+        Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
+    bool IsCriticalCallsite =
+        Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
 
     const FunctionSummary *ResolvedCalleeSummary = nullptr;
     if (CalleeSummary) {
@@ -344,16 +423,37 @@ static void computeImportForFunction(
         LLVM_DEBUG(
             dbgs() << "ignored! Target was already rejected with Threshold "
             << ProcessedThreshold << "\n");
+        if (PrintImportFailures) {
+          assert(FailureInfo &&
+                 "Expected FailureInfo for previously rejected candidate");
+          FailureInfo->Attempts++;
+        }
         continue;
       }
 
+      FunctionImporter::ImportFailureReason Reason;
       CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
-                                   Summary.modulePath());
+                                   Summary.modulePath(), Reason, VI.getGUID());
       if (!CalleeSummary) {
         // Update with new larger threshold if this was a retry (otherwise
-        // we would have already inserted with NewThreshold above).
-        if (PreviouslyVisited)
+        // we would have already inserted with NewThreshold above). Also
+        // update failure info if requested.
+        if (PreviouslyVisited) {
           ProcessedThreshold = NewThreshold;
+          if (PrintImportFailures) {
+            assert(FailureInfo &&
+                   "Expected FailureInfo for previously rejected candidate");
+            FailureInfo->Reason = Reason;
+            FailureInfo->Attempts++;
+            FailureInfo->MaxHotness =
+                std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
+          }
+        } else if (PrintImportFailures) {
+          assert(!FailureInfo &&
+                 "Expected no FailureInfo for newly rejected candidate");
+          FailureInfo = llvm::make_unique<FunctionImporter::ImportFailureInfo>(
+              VI, Edge.second.getHotness(), Reason, 1);
+        }
         LLVM_DEBUG(
             dbgs() << "ignored! No qualifying callee with summary found.\n");
         continue;
@@ -371,6 +471,13 @@ static void computeImportForFunction(
       // We previously decided to import this GUID definition if it was already
       // inserted in the set of imports from the exporting module.
       bool PreviouslyImported = !ILI.second;
+      if (!PreviouslyImported) {
+        NumImportedFunctionsThinLink++;
+        if (IsHotCallsite)
+          NumImportedHotFunctionsThinLink++;
+        if (IsCriticalCallsite)
+          NumImportedCriticalFunctionsThinLink++;
+      }
 
       // Make exports in the source module.
       if (ExportLists) {
@@ -404,8 +511,6 @@ static void computeImportForFunction(
       return Threshold * ImportInstrFactor;
     };
 
-    bool IsHotCallsite =
-        Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
 
     ImportCount++;
@@ -420,7 +525,7 @@ static void computeImportForFunction(
 /// another module (that may require promotion).
 static void ComputeImportForModule(
     const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
-    FunctionImporter::ImportMapTy &ImportList,
+    StringRef ModName, FunctionImporter::ImportMapTy &ImportList,
     StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
   // Worklist contains the list of function imported in this module, for which
   // we will analyse the callees and may import further down the callgraph.
@@ -460,6 +565,30 @@ static void ComputeImportForModule(
                              Worklist, ImportList, ExportLists,
                              ImportThresholds);
   }
+
+  // Print stats about functions considered but rejected for importing
+  // when requested.
+  if (PrintImportFailures) {
+    dbgs() << "Missed imports into module " << ModName << "\n";
+    for (auto &I : ImportThresholds) {
+      auto &ProcessedThreshold = std::get<0>(I.second);
+      auto &CalleeSummary = std::get<1>(I.second);
+      auto &FailureInfo = std::get<2>(I.second);
+      if (CalleeSummary)
+        continue; // We are going to import.
+      assert(FailureInfo);
+      FunctionSummary *FS = nullptr;
+      if (!FailureInfo->VI.getSummaryList().empty())
+        FS = dyn_cast<FunctionSummary>(
+            FailureInfo->VI.getSummaryList()[0]->getBaseObject());
+      dbgs() << FailureInfo->VI
+             << ": Reason = " << getFailureName(FailureInfo->Reason)
+             << ", Threshold = " << ProcessedThreshold
+             << ", Size = " << (FS ? (int)FS->instCount() : -1)
+             << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
+             << ", Attempts = " << FailureInfo->Attempts << "\n";
+    }
+  }
 }
 
 #ifndef NDEBUG
@@ -497,7 +626,8 @@ void llvm::ComputeCrossModuleImport(
     auto &ImportList = ImportLists[DefinedGVSummaries.first()];
     LLVM_DEBUG(dbgs() << "Computing import for Module '"
                       << DefinedGVSummaries.first() << "'\n");
-    ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
+    ComputeImportForModule(DefinedGVSummaries.second, Index,
+                           DefinedGVSummaries.first(), ImportList,
                            &ExportLists);
   }
 
@@ -568,7 +698,7 @@ void llvm::ComputeCrossModuleImportForModule(
 
   // Compute the import list for this module.
   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
-  ComputeImportForModule(FunctionSummaryMap, Index, ImportList);
+  ComputeImportForModule(FunctionSummaryMap, Index, ModulePath, ImportList);
 
 #ifndef NDEBUG
   dumpImportListForModule(Index, ModulePath, ImportList);
@@ -647,29 +777,38 @@ void llvm::computeDeadSymbols(
     VI = updateValueInfoForIndirectCalls(Index, VI);
     if (!VI)
       return;
-    for (auto &S : VI.getSummaryList())
-      if (S->isLive())
-        return;
+
+    // We need to make sure all variants of the symbol are scanned, alias can
+    // make one (but not all) alive.
+    if (llvm::all_of(VI.getSummaryList(),
+                     [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
+                       return S->isLive();
+                     }))
+      return;
 
     // We only keep live symbols that are known to be non-prevailing if any are
-    // available_externally. Those symbols are discarded later in the
-    // EliminateAvailableExternally pass and setting them to not-live breaks
-    // downstreams users of liveness information (PR36483).
+    // available_externally, linkonceodr, weakodr. Those symbols are discarded
+    // later in the EliminateAvailableExternally pass and setting them to
+    // not-live could break downstreams users of liveness information (PR36483)
+    // or limit optimization opportunities.
     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
-      bool AvailableExternally = false;
+      bool KeepAliveLinkage = false;
       bool Interposable = false;
       for (auto &S : VI.getSummaryList()) {
-        if (S->linkage() == GlobalValue::AvailableExternallyLinkage)
-          AvailableExternally = true;
+        if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
+            S->linkage() == GlobalValue::WeakODRLinkage ||
+            S->linkage() == GlobalValue::LinkOnceODRLinkage)
+          KeepAliveLinkage = true;
         else if (GlobalValue::isInterposableLinkage(S->linkage()))
           Interposable = true;
       }
 
-      if (!AvailableExternally)
+      if (!KeepAliveLinkage)
         return;
 
       if (Interposable)
-        report_fatal_error("Interposable and available_externally symbol");
+        report_fatal_error(
+          "Interposable and available_externally/linkonce_odr/weak_odr symbol");
     }
 
     for (auto &S : VI.getSummaryList())
@@ -700,6 +839,25 @@ void llvm::computeDeadSymbols(
   NumLiveSymbols += LiveSymbols;
 }
 
+// Compute dead symbols and propagate constants in combined index.
+void llvm::computeDeadSymbolsWithConstProp(
+    ModuleSummaryIndex &Index,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+    function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
+    bool ImportEnabled) {
+  computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+  if (ImportEnabled) {
+    Index.propagateConstants(GUIDPreservedSymbols);
+  } else {
+    // If import is disabled we should drop read-only attribute
+    // from all summaries to prevent internalization.
+    for (auto &P : Index)
+      for (auto &S : P.second.SummaryList)
+        if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get()))
+          GVS->setReadOnly(false);
+  }
+}
+
 /// Compute the set of summaries needed for a ThinLTO backend compilation of
 /// \p ModulePath.
 void llvm::gatherImportedSummariesForModule(
@@ -758,7 +916,8 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
     if (GV.getValueType()->isFunctionTy())
       NewGV =
           Function::Create(cast<FunctionType>(GV.getValueType()),
-                           GlobalValue::ExternalLinkage, "", GV.getParent());
+                           GlobalValue::ExternalLinkage, GV.getAddressSpace(),
+                           "", GV.getParent());
     else
       NewGV =
           new GlobalVariable(*GV.getParent(), GV.getValueType(),
@@ -773,8 +932,8 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
   return true;
 }
 
-/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis.
-void llvm::thinLTOResolveWeakForLinkerModule(
+/// Fixup prevailing symbol linkages in \p TheModule based on summary analysis.
+void llvm::thinLTOResolvePrevailingInModule(
     Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
   auto updateLinkage = [&](GlobalValue &GV) {
     // See if the global summary analysis computed a new resolved linkage.
@@ -791,13 +950,15 @@ void llvm::thinLTOResolveWeakForLinkerModule(
     // as we need access to the resolution vectors for each input file in
     // order to find which symbols have been redefined.
     // We may consider reorganizing this code and moving the linkage recording
-    // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex.
+    // somewhere else, e.g. in thinLTOResolvePrevailingInIndex.
     if (NewLinkage == GlobalValue::WeakAnyLinkage) {
       GV.setLinkage(NewLinkage);
       return;
     }
 
-    if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
+    if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
+        // In case it was dead and already converted to declaration.
+        GV.isDeclaration())
       return;
     // Check for a non-prevailing def that has interposable linkage
     // (e.g. non-odr weak or linkonce). In that case we can't simply
@@ -808,7 +969,7 @@ void llvm::thinLTOResolveWeakForLinkerModule(
         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
       if (!convertToDeclaration(GV))
         // FIXME: Change this to collect replaced GVs and later erase
-        // them from the parent module once thinLTOResolveWeakForLinkerGUID is
+        // them from the parent module once thinLTOResolvePrevailingGUID is
         // changed to enable this for aliases.
         llvm_unreachable("Expected GV to be converted");
     } else {
@@ -894,6 +1055,18 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
   return NewFn;
 }
 
+// Internalize values that we marked with specific attribute
+// in processGlobalForThinLTO.
+static void internalizeImmutableGVs(Module &M) {
+  for (auto &GV : M.globals())
+    // Skip GVs which have been converted to declarations
+    // by dropDeadSymbols.
+    if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
+      GV.setLinkage(GlobalValue::InternalLinkage);
+      GV.setVisibility(GlobalValue::DefaultVisibility);
+    }
+}
+
 // Automatically import functions in Module \p DestModule based on the summaries
 // index.
 Expected<bool> FunctionImporter::importFunctions(
@@ -1017,6 +1190,8 @@ Expected<bool> FunctionImporter::importFunctions(
     NumImportedModules++;
   }
 
+  internalizeImmutableGVs(DestModule);
+
   NumImportedFunctions += (ImportedCount - ImportedGVCount);
   NumImportedGlobalVars += ImportedGVCount;
 
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index ada9eb80e680..34de87433367 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/IPO.h"
@@ -75,13 +76,17 @@ ModulePass *llvm::createGlobalDCEPass() {
   return new GlobalDCELegacyPass();
 }
 
-/// Returns true if F contains only a single "ret" instruction.
+/// Returns true if F is effectively empty.
 static bool isEmptyFunction(Function *F) {
   BasicBlock &Entry = F->getEntryBlock();
-  if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
-    return false;
-  ReturnInst &RI = cast<ReturnInst>(Entry.front());
-  return RI.getReturnValue() == nullptr;
+  for (auto &I : Entry) {
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+    if (auto *RI = dyn_cast<ReturnInst>(&I))
+      return !RI->getReturnValue();
+    break;
+  }
+  return false;
 }
 
 /// Compute the set of GlobalValue that depends from V.
@@ -165,7 +170,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
     // Functions with external linkage are needed if they have a body.
     // Externally visible & appending globals are needed, if they have an
     // initializer.
-    if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage())
+    if (!GO.isDeclaration())
       if (!GO.isDiscardableIfUnused())
         MarkLive(GO);
 
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1761d7faff57..3005aafd06b1 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1710,19 +1710,25 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
         }
       }
-      new StoreInst(StoreVal, NewGV, false, 0,
-                    SI->getOrdering(), SI->getSyncScopeID(), SI);
+      StoreInst *NSI =
+          new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(),
+                        SI->getSyncScopeID(), SI);
+      NSI->setDebugLoc(SI->getDebugLoc());
     } else {
       // Change the load into a load of bool then a select.
       LoadInst *LI = cast<LoadInst>(UI);
       LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
                                    LI->getOrdering(), LI->getSyncScopeID(), LI);
-      Value *NSI;
+      Instruction *NSI;
       if (IsOneZero)
         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
       else
         NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
       NSI->takeName(LI);
+      // Since LI is split into two instructions, NLI and NSI both inherit the
+      // same DebugLoc
+      NLI->setDebugLoc(LI->getDebugLoc());
+      NSI->setDebugLoc(LI->getDebugLoc());
       LI->replaceAllUsesWith(NSI);
     }
     UI->eraseFromParent();
@@ -2107,6 +2113,13 @@ static bool hasChangeableCC(Function *F) {
   if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
     return false;
 
+  // Don't break the invariant that the inalloca parameter is the only parameter
+  // passed in memory.
+  // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
+  // alloca it uses to the entry block if possible.
+  if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+    return false;
+
   // FIXME: Change CC for the whole chain of musttail calls when possible.
   //
   // Can't change CC of the function that either has musttail calls, or is a
diff --git a/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
new file mode 100644
index 000000000000..924a7d5fbd9c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -0,0 +1,643 @@
+//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Outline cold regions to a separate function.
+// TODO: Update BFI and BPI
+// TODO: Add all the outlined functions to a separate section.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+
+#define DEBUG_TYPE "hotcoldsplit"
+
+STATISTIC(NumColdRegionsFound, "Number of cold regions found.");
+STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
+                              cl::init(true), cl::Hidden);
+
+static cl::opt<int>
+    MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
+                          cl::desc("Code size threshold for outlining within a "
+                                   "single BB (as a multiple of TCC_Basic)"));
+
+namespace {
+
+struct PostDomTree : PostDomTreeBase<BasicBlock> {
+  PostDomTree(Function &F) { recalculate(F); }
+};
+
+/// A sequence of basic blocks.
+///
+/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
+using BlockSequence = SmallVector<BasicBlock *, 0>;
+
+// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
+// this function unless you modify the MBB version as well.
+//
+/// A no successor, non-return block probably ends in unreachable and is cold.
+/// Also consider a block that ends in an indirect branch to be a return block,
+/// since many targets use plain indirect branches to return.
+bool blockEndsInUnreachable(const BasicBlock &BB) {
+  if (!succ_empty(&BB))
+    return false;
+  if (BB.empty())
+    return true;
+  const Instruction *I = BB.getTerminator();
+  return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
+}
+
+bool unlikelyExecuted(BasicBlock &BB) {
+  // Exception handling blocks are unlikely executed.
+  if (BB.isEHPad())
+    return true;
+
+  // The block is cold if it calls/invokes a cold function.
+  for (Instruction &I : BB)
+    if (auto CS = CallSite(&I))
+      if (CS.hasFnAttr(Attribute::Cold))
+        return true;
+
+  // The block is cold if it has an unreachable terminator, unless it's
+  // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp).
+  if (blockEndsInUnreachable(BB)) {
+    if (auto *CI =
+            dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
+      if (CI->hasFnAttr(Attribute::NoReturn))
+        return false;
+    return true;
+  }
+
+  return false;
+}
+
+/// Check whether it's safe to outline \p BB.
+static bool mayExtractBlock(const BasicBlock &BB) {
+  return !BB.hasAddressTaken() && !BB.isEHPad();
+}
+
+/// Check whether \p Region is profitable to outline.
+static bool isProfitableToOutline(const BlockSequence &Region,
+                                  TargetTransformInfo &TTI) {
+  if (Region.size() > 1)
+    return true;
+
+  int Cost = 0;
+  const BasicBlock &BB = *Region[0];
+  for (const Instruction &I : BB) {
+    if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
+      continue;
+
+    Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+    if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
+      return true;
+  }
+  return false;
+}
+
+/// Mark \p F cold. Return true if it's changed.
+static bool markEntireFunctionCold(Function &F) {
+  assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+  bool Changed = false;
+  if (!F.hasFnAttribute(Attribute::MinSize)) {
+    F.addFnAttr(Attribute::MinSize);
+    Changed = true;
+  }
+  // TODO: Move this function into a cold section.
+  return Changed;
+}
+
+class HotColdSplitting {
+public:
+  HotColdSplitting(ProfileSummaryInfo *ProfSI,
+                   function_ref<BlockFrequencyInfo *(Function &)> GBFI,
+                   function_ref<TargetTransformInfo &(Function &)> GTTI,
+                   std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
+      : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
+  bool run(Module &M);
+
+private:
+  bool shouldOutlineFrom(const Function &F) const;
+  bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+                          BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+                          DominatorTree &DT, PostDomTree &PDT,
+                          OptimizationRemarkEmitter &ORE);
+  Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
+                              BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+                              OptimizationRemarkEmitter &ORE, unsigned Count);
+  SmallPtrSet<const Function *, 2> OutlinedFunctions;
+  ProfileSummaryInfo *PSI;
+  function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
+  function_ref<TargetTransformInfo &(Function &)> GetTTI;
+  std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+};
+
+class HotColdSplittingLegacyPass : public ModulePass {
+public:
+  static char ID;
+  HotColdSplittingLegacyPass() : ModulePass(ID) {
+    initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override;
+};
+
+} // end anonymous namespace
+
+// Returns false if the function should not be considered for hot-cold split
+// optimization.
+bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
+  // Do not try to outline again from an already outlined cold function.
+  if (OutlinedFunctions.count(&F))
+    return false;
+
+  if (F.size() <= 2)
+    return false;
+
+  // TODO: Consider only skipping functions marked `optnone` or `cold`.
+
+  if (F.hasAddressTaken())
+    return false;
+
+  if (F.hasFnAttribute(Attribute::AlwaysInline))
+    return false;
+
+  if (F.hasFnAttribute(Attribute::NoInline))
+    return false;
+
+  if (F.getCallingConv() == CallingConv::Cold)
+    return false;
+
+  if (PSI->isFunctionEntryCold(&F))
+    return false;
+  return true;
+}
+
+Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
+                                              DominatorTree &DT,
+                                              BlockFrequencyInfo *BFI,
+                                              TargetTransformInfo &TTI,
+                                              OptimizationRemarkEmitter &ORE,
+                                              unsigned Count) {
+  assert(!Region.empty());
+
+  // TODO: Pass BFI and BPI to update profile information.
+  CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
+                   /* BPI */ nullptr, /* AllowVarArgs */ false,
+                   /* AllowAlloca */ false,
+                   /* Suffix */ "cold." + std::to_string(Count));
+
+  SetVector<Value *> Inputs, Outputs, Sinks;
+  CE.findInputsOutputs(Inputs, Outputs, Sinks);
+
+  // Do not extract regions that have live exit variables.
+  if (Outputs.size() > 0) {
+    LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
+    return nullptr;
+  }
+
+  // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
+  Function *OrigF = Region[0]->getParent();
+  if (Function *OutF = CE.extractCodeRegion()) {
+    User *U = *OutF->user_begin();
+    CallInst *CI = cast<CallInst>(U);
+    CallSite CS(CI);
+    NumColdRegionsOutlined++;
+    if (TTI.useColdCCForColdCall(*OutF)) {
+      OutF->setCallingConv(CallingConv::Cold);
+      CS.setCallingConv(CallingConv::Cold);
+    }
+    CI->setIsNoInline();
+
+    // Try to make the outlined code as small as possible on the assumption
+    // that it's cold.
+    markEntireFunctionCold(*OutF);
+
+    LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "HotColdSplit",
+                                &*Region[0]->begin())
+             << ore::NV("Original", OrigF) << " split cold code into "
+             << ore::NV("Split", OutF);
+    });
+    return OutF;
+  }
+
+  ORE.emit([&]() {
+    return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
+                                    &*Region[0]->begin())
+           << "Failed to extract region at block "
+           << ore::NV("Block", Region.front());
+  });
+  return nullptr;
+}
+
+/// A pair of (basic block, score).
+using BlockTy = std::pair<BasicBlock *, unsigned>;
+
+namespace {
+/// A maximal outlining region. This contains all blocks post-dominated by a
+/// sink block, the sink block itself, and all blocks dominated by the sink.
+class OutliningRegion {
+  /// A list of (block, score) pairs. A block's score is non-zero iff it's a
+  /// viable sub-region entry point. Blocks with higher scores are better entry
+  /// points (i.e. they are more distant ancestors of the sink block).
+  SmallVector<BlockTy, 0> Blocks = {};
+
+  /// The suggested entry point into the region. If the region has multiple
+  /// entry points, all blocks within the region may not be reachable from this
+  /// entry point.
+  BasicBlock *SuggestedEntryPoint = nullptr;
+
+  /// Whether the entire function is cold.
+  bool EntireFunctionCold = false;
+
+  /// Whether or not \p BB could be the entry point of an extracted region.
+  static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
+
+  /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
+  static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
+    return isViableEntryPoint(BB) ? Score : 0;
+  }
+
+  /// These scores should be lower than the score for predecessor blocks,
+  /// because regions starting at predecessor blocks are typically larger.
+  static constexpr unsigned ScoreForSuccBlock = 1;
+  static constexpr unsigned ScoreForSinkBlock = 1;
+
+  OutliningRegion(const OutliningRegion &) = delete;
+  OutliningRegion &operator=(const OutliningRegion &) = delete;
+
+public:
+  OutliningRegion() = default;
+  OutliningRegion(OutliningRegion &&) = default;
+  OutliningRegion &operator=(OutliningRegion &&) = default;
+
+  static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
+                                const PostDomTree &PDT) {
+    OutliningRegion ColdRegion;
+
+    SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+
+    auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
+      RegionBlocks.insert(BB);
+      ColdRegion.Blocks.emplace_back(BB, Score);
+      assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+    };
+
+    // The ancestor farthest-away from SinkBB, and also post-dominated by it.
+    unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
+    ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+    unsigned BestScore = SinkScore;
+
+    // Visit SinkBB's ancestors using inverse DFS.
+    auto PredIt = ++idf_begin(&SinkBB);
+    auto PredEnd = idf_end(&SinkBB);
+    while (PredIt != PredEnd) {
+      BasicBlock &PredBB = **PredIt;
+      bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
+
+      // If the predecessor is cold and has no predecessors, the entire
+      // function must be cold.
+      if (SinkPostDom && pred_empty(&PredBB)) {
+        ColdRegion.EntireFunctionCold = true;
+        return ColdRegion;
+      }
+
+      // If SinkBB does not post-dominate a predecessor, do not mark the
+      // predecessor (or any of its predecessors) cold.
+      if (!SinkPostDom || !mayExtractBlock(PredBB)) {
+        PredIt.skipChildren();
+        continue;
+      }
+
+      // Keep track of the post-dominated ancestor farthest away from the sink.
+      // The path length is always >= 2, ensuring that predecessor blocks are
+      // considered as entry points before the sink block.
+      unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
+      if (PredScore > BestScore) {
+        ColdRegion.SuggestedEntryPoint = &PredBB;
+        BestScore = PredScore;
+      }
+
+      addBlockToRegion(&PredBB, PredScore);
+      ++PredIt;
+    }
+
+    // Add SinkBB to the cold region. It's considered as an entry point before
+    // any sink-successor blocks.
+    addBlockToRegion(&SinkBB, SinkScore);
+
+    // Find all successors of SinkBB dominated by SinkBB using DFS.
+    auto SuccIt = ++df_begin(&SinkBB);
+    auto SuccEnd = df_end(&SinkBB);
+    while (SuccIt != SuccEnd) {
+      BasicBlock &SuccBB = **SuccIt;
+      bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
+
+      // Don't allow the backwards & forwards DFSes to mark the same block.
+      bool DuplicateBlock = RegionBlocks.count(&SuccBB);
+
+      // If SinkBB does not dominate a successor, do not mark the successor (or
+      // any of its successors) cold.
+      if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
+        SuccIt.skipChildren();
+        continue;
+      }
+
+      unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
+      if (SuccScore > BestScore) {
+        ColdRegion.SuggestedEntryPoint = &SuccBB;
+        BestScore = SuccScore;
+      }
+
+      addBlockToRegion(&SuccBB, SuccScore);
+      ++SuccIt;
+    }
+
+    return ColdRegion;
+  }
+
+  /// Whether this region has nothing to extract.
+  bool empty() const { return !SuggestedEntryPoint; }
+
+  /// The blocks in this region.
+  ArrayRef<std::pair<BasicBlock *, unsigned>> blocks() const { return Blocks; }
+
+  /// Whether the entire function containing this region is cold.
+  bool isEntireFunctionCold() const { return EntireFunctionCold; }
+
+  /// Remove a sub-region from this region and return it as a block sequence.
+  BlockSequence takeSingleEntrySubRegion(DominatorTree &DT) {
+    assert(!empty() && !isEntireFunctionCold() && "Nothing to extract");
+
+    // Remove blocks dominated by the suggested entry point from this region.
+    // During the removal, identify the next best entry point into the region.
+    // Ensure that the first extracted block is the suggested entry point.
+    BlockSequence SubRegion = {SuggestedEntryPoint};
+    BasicBlock *NextEntryPoint = nullptr;
+    unsigned NextScore = 0;
+    auto RegionEndIt = Blocks.end();
+    auto RegionStartIt = remove_if(Blocks, [&](const BlockTy &Block) {
+      BasicBlock *BB = Block.first;
+      unsigned Score = Block.second;
+      bool InSubRegion =
+          BB == SuggestedEntryPoint || DT.dominates(SuggestedEntryPoint, BB);
+      if (!InSubRegion && Score > NextScore) {
+        NextEntryPoint = BB;
+        NextScore = Score;
+      }
+      if (InSubRegion && BB != SuggestedEntryPoint)
+        SubRegion.push_back(BB);
+      return InSubRegion;
+    });
+    Blocks.erase(RegionStartIt, RegionEndIt);
+
+    // Update the suggested entry point.
+    SuggestedEntryPoint = NextEntryPoint;
+
+    return SubRegion;
+  }
+};
+} // namespace
+
+bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+                                          BlockFrequencyInfo *BFI,
+                                          TargetTransformInfo &TTI,
+                                          DominatorTree &DT, PostDomTree &PDT,
+                                          OptimizationRemarkEmitter &ORE) {
+  bool Changed = false;
+
+  // The set of cold blocks.
+  SmallPtrSet<BasicBlock *, 4> ColdBlocks;
+
+  // The worklist of non-intersecting regions left to outline.
+  SmallVector<OutliningRegion, 2> OutliningWorklist;
+
+  // Set up an RPO traversal. Experimentally, this performs better (outlines
+  // more) than a PO traversal, because we prevent region overlap by keeping
+  // the first region to contain a block.
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  // Find all cold regions.
+  for (BasicBlock *BB : RPOT) {
+    // Skip blocks which can't be outlined.
+    if (!mayExtractBlock(*BB))
+      continue;
+
+    // This block is already part of some outlining region.
+    if (ColdBlocks.count(BB))
+      continue;
+
+    bool Cold = PSI.isColdBlock(BB, BFI) ||
+                (EnableStaticAnalyis && unlikelyExecuted(*BB));
+    if (!Cold)
+      continue;
+
+    LLVM_DEBUG({
+      dbgs() << "Found a cold block:\n";
+      BB->dump();
+    });
+
+    auto Region = OutliningRegion::create(*BB, DT, PDT);
+    if (Region.empty())
+      continue;
+
+    if (Region.isEntireFunctionCold()) {
+      LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+      return markEntireFunctionCold(F);
+    }
+
+    // If this outlining region intersects with another, drop the new region.
+    //
+    // TODO: It's theoretically possible to outline more by only keeping the
+    // largest region which contains a block, but the extra bookkeeping to do
+    // this is tricky/expensive.
+    bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+      return !ColdBlocks.insert(Block.first).second;
+    });
+    if (RegionsOverlap)
+      continue;
+
+    OutliningWorklist.emplace_back(std::move(Region));
+    ++NumColdRegionsFound;
+  }
+
+  // Outline single-entry cold regions, splitting up larger regions as needed.
+  unsigned OutlinedFunctionID = 1;
+  while (!OutliningWorklist.empty()) {
+    OutliningRegion Region = OutliningWorklist.pop_back_val();
+    assert(!Region.empty() && "Empty outlining region in worklist");
+    do {
+      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
+      if (!isProfitableToOutline(SubRegion, TTI)) {
+        LLVM_DEBUG({
+          dbgs() << "Skipping outlining; not profitable to outline\n";
+          SubRegion[0]->dump();
+        });
+        continue;
+      }
+
+      LLVM_DEBUG({
+        dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
+        for (BasicBlock *BB : SubRegion)
+          BB->dump();
+      });
+
+      Function *Outlined =
+          extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+      if (Outlined) {
+        ++OutlinedFunctionID;
+        OutlinedFunctions.insert(Outlined);
+        Changed = true;
+      }
+    } while (!Region.empty());
+  }
+
+  return Changed;
+}
+
+bool HotColdSplitting::run(Module &M) {
+  bool Changed = false;
+  OutlinedFunctions.clear();
+  for (auto &F : M) {
+    if (!shouldOutlineFrom(F)) {
+      LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
+      continue;
+    }
+    LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
+    DominatorTree DT(F);
+    PostDomTree PDT(F);
+    PDT.recalculate(F);
+    BlockFrequencyInfo *BFI = GetBFI(F);
+    TargetTransformInfo &TTI = GetTTI(F);
+    OptimizationRemarkEmitter &ORE = (*GetORE)(F);
+    Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
+  }
+  return Changed;
+}
+
+bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
+  if (skipModule(M))
+    return false;
+  ProfileSummaryInfo *PSI =
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  auto GTTI = [this](Function &F) -> TargetTransformInfo & {
+    return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  };
+  auto GBFI = [this](Function &F) {
+    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+  };
+  std::unique_ptr<OptimizationRemarkEmitter> ORE;
+  std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
+      [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+    ORE.reset(new OptimizationRemarkEmitter(&F));
+    return *ORE.get();
+  };
+
+  return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
+}
+
+PreservedAnalyses
+HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&FAM](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(F);
+  };
+
+  auto GBFI = [&FAM](Function &F) {
+    return &FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  std::function<TargetTransformInfo &(Function &)> GTTI =
+      [&FAM](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
+
+  std::unique_ptr<OptimizationRemarkEmitter> ORE;
+  std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
+      [&ORE](Function &F) -> OptimizationRemarkEmitter & {
+    ORE.reset(new OptimizationRemarkEmitter(&F));
+    return *ORE.get();
+  };
+
+  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+  if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+char HotColdSplittingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
+                      "Hot Cold Splitting", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
+                    "Hot Cold Splitting", false, false)
+
+ModulePass *llvm::createHotColdSplittingPass() {
+  return new HotColdSplittingLegacyPass();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index dce9ee076bc5..973382e2b097 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -34,6 +34,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeGlobalDCELegacyPassPass(Registry);
   initializeGlobalOptLegacyPassPass(Registry);
   initializeGlobalSplitPass(Registry);
+  initializeHotColdSplittingLegacyPassPass(Registry);
   initializeIPCPPass(Registry);
   initializeAlwaysInlinerLegacyPassPass(Registry);
   initializeSimpleInlinerPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 3da0c2e83eb8..66a6f80f31e4 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -64,6 +64,7 @@
 #include <algorithm>
 #include <cassert>
 #include <functional>
+#include <sstream>
 #include <tuple>
 #include <utility>
 #include <vector>
@@ -112,6 +113,14 @@ static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
                           "printing of statistics for each inlined function")),
     cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
 
+/// Flag to add inline messages as callsite attributes 'inline-remark'.
+static cl::opt<bool>
+    InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
+                          cl::Hidden,
+                          cl::desc("Enable adding inline-remark attribute to"
+                                   " callsites processed by inliner but decided"
+                                   " to be not inlined"));
+
 LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
 
 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -263,7 +272,7 @@ static void mergeInlinedArrayAllocas(
 /// available from other functions inlined into the caller.  If we are able to
 /// inline this call site we attempt to reuse already available allocas or add
 /// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(
+static InlineResult InlineCallIfPossible(
     CallSite CS, InlineFunctionInfo &IFI,
     InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
     bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
@@ -275,8 +284,9 @@ static bool InlineCallIfPossible(
 
   // Try to inline the function.  Get the list of static allocas that were
   // inlined.
-  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
-    return false;
+  InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
+  if (!IR)
+    return IR;
 
   if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
     ImportedFunctionsStats.recordInline(*Caller, *Callee);
@@ -286,7 +296,7 @@ static bool InlineCallIfPossible(
   if (!DisableInlinedAllocaMerging)
     mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
 
-  return true;
+  return IR; // success
 }
 
 /// Return true if inlining of CS can block the caller from being
@@ -301,6 +311,11 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
   // For now we only handle local or inline functions.
   if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
     return false;
+  // If the cost of inlining CS is non-positive, it is not going to prevent the
+  // caller from being inlined into its callers and hence we don't need to
+  // defer.
+  if (IC.getCost() <= 0)
+    return false;
   // Try to detect the case where the current inlining candidate caller (call
   // it B) is a static or linkonce-ODR function and is an inlining candidate
   // elsewhere, and the current candidate callee (call it C) is large enough
@@ -320,25 +335,31 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
   TotalSecondaryCost = 0;
   // The candidate cost to be imposed upon the current function.
   int CandidateCost = IC.getCost() - 1;
-  // This bool tracks what happens if we do NOT inline C into B.
-  bool callerWillBeRemoved = Caller->hasLocalLinkage();
+  // If the caller has local linkage and can be inlined to all its callers, we
+  // can apply a huge negative bonus to TotalSecondaryCost.
+  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
   // This bool tracks what happens if we DO inline C into B.
   bool inliningPreventsSomeOuterInline = false;
   for (User *U : Caller->users()) {
+    // If the caller will not be removed (either because it does not have a
+    // local linkage or because the LastCallToStaticBonus has been already
+    // applied), then we can exit the loop early.
+    if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
+      return false;
     CallSite CS2(U);
 
     // If this isn't a call to Caller (it could be some other sort
     // of reference) skip it.  Such references will prevent the caller
     // from being removed.
     if (!CS2 || CS2.getCalledFunction() != Caller) {
-      callerWillBeRemoved = false;
+      ApplyLastCallBonus = false;
       continue;
     }
 
     InlineCost IC2 = GetInlineCost(CS2);
     ++NumCallerCallersAnalyzed;
     if (!IC2) {
-      callerWillBeRemoved = false;
+      ApplyLastCallBonus = false;
       continue;
     }
     if (IC2.isAlways())
@@ -356,7 +377,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
   // one is set very low by getInlineCost, in anticipation that Caller will
   // be removed entirely.  We did not account for this above unless there
   // is only one caller of Caller.
-  if (callerWillBeRemoved && !Caller->hasOneUse())
+  if (ApplyLastCallBonus)
     TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
 
   if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
@@ -365,6 +386,33 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
   return false;
 }
 
+static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
+                                            const ore::NV &Arg) {
+  return R << Arg.Val;
+}
+
+template <class RemarkT>
+RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
+  using namespace ore;
+  if (IC.isAlways()) {
+    R << "(cost=always)";
+  } else if (IC.isNever()) {
+    R << "(cost=never)";
+  } else {
+    R << "(cost=" << ore::NV("Cost", IC.getCost())
+      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
+  }
+  if (const char *Reason = IC.getReason())
+    R << ": " << ore::NV("Reason", Reason);
+  return R;
+}
+
+static std::string inlineCostStr(const InlineCost &IC) {
+  std::stringstream Remark;
+  Remark << IC;
+  return Remark.str();
+}
+
 /// Return the cost only if the inliner should attempt to inline at the given
 /// CallSite. If we return the cost, we will emit an optimisation remark later
 /// using that cost, so we won't do so from this function.
@@ -379,35 +427,32 @@ shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
   Function *Caller = CS.getCaller();
 
   if (IC.isAlways()) {
-    LLVM_DEBUG(dbgs() << "    Inlining: cost=always"
+    LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC)
                       << ", Call: " << *CS.getInstruction() << "\n");
     return IC;
   }
 
   if (IC.isNever()) {
-    LLVM_DEBUG(dbgs() << "    NOT Inlining: cost=never"
+    LLVM_DEBUG(dbgs() << "    NOT Inlining " << inlineCostStr(IC)
                       << ", Call: " << *CS.getInstruction() << "\n");
     ORE.emit([&]() {
       return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
              << NV("Callee", Callee) << " not inlined into "
-             << NV("Caller", Caller)
-             << " because it should never be inlined (cost=never)";
+             << NV("Caller", Caller) << " because it should never be inlined "
+             << IC;
     });
-    return None;
+    return IC;
   }
 
   if (!IC) {
-    LLVM_DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
-                      << ", thres=" << IC.getThreshold()
+    LLVM_DEBUG(dbgs() << "    NOT Inlining " << inlineCostStr(IC)
                       << ", Call: " << *CS.getInstruction() << "\n");
     ORE.emit([&]() {
       return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
              << NV("Callee", Callee) << " not inlined into "
-             << NV("Caller", Caller) << " because too costly to inline (cost="
-             << NV("Cost", IC.getCost())
-             << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")";
+             << NV("Caller", Caller) << " because too costly to inline " << IC;
     });
-    return None;
+    return IC;
   }
 
   int TotalSecondaryCost = 0;
@@ -428,8 +473,7 @@ shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
     return None;
   }
 
-  LLVM_DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
-                    << ", thres=" << IC.getThreshold()
+  LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC)
                     << ", Call: " << *CS.getInstruction() << '\n');
   return IC;
 }
@@ -461,6 +505,26 @@ bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
   return inlineCalls(SCC);
 }
 
+static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc,
+                              const BasicBlock *Block, const Function &Callee,
+                              const Function &Caller, const InlineCost &IC) {
+  ORE.emit([&]() {
+    bool AlwaysInline = IC.isAlways();
+    StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+    return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
+           << ore::NV("Callee", &Callee) << " inlined into "
+           << ore::NV("Caller", &Caller) << " with " << IC;
+  });
+}
+
+static void setInlineRemark(CallSite &CS, StringRef message) {
+  if (!InlineRemarkAttribute)
+    return;
+
+  Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
+  CS.addAttribute(AttributeList::FunctionIndex, attr);
+}
+
 static bool
 inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
                 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
@@ -510,6 +574,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
           if (Callee->isDeclaration()) {
             using namespace ore;
 
+            setInlineRemark(CS, "unavailable definition");
             ORE.emit([&]() {
               return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
                      << NV("Callee", Callee) << " will not be inlined into "
@@ -573,8 +638,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
         // infinitely inline.
         InlineHistoryID = CallSites[CSi].second;
         if (InlineHistoryID != -1 &&
-            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+          setInlineRemark(CS, "recursive");
           continue;
+        }
       }
 
       // FIXME for new PM: because of the old PM we currently generate ORE and
@@ -585,8 +652,17 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
       Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
       // If the policy determines that we should inline this function,
       // delete the call instead.
-      if (!OIC)
+      if (!OIC.hasValue()) {
+        setInlineRemark(CS, "deferred");
+        continue;
+      }
+
+      if (!OIC.getValue()) {
+        // shouldInline() call returned a negative inline cost that explains
+        // why this callsite should not be inlined.
+        setInlineRemark(CS, inlineCostStr(*OIC));
         continue;
+      }
 
       // If this call site is dead and it is to a readonly function, we should
       // just delete the call instead of trying to inline it, regardless of
@@ -595,6 +671,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
       if (IsTriviallyDead) {
         LLVM_DEBUG(dbgs() << "    -> Deleting dead call: " << *Instr << "\n");
         // Update the call graph by deleting the edge from Callee to Caller.
+        setInlineRemark(CS, "trivially dead");
         CG[Caller]->removeCallEdgeFor(CS);
         Instr->eraseFromParent();
         ++NumCallsDeleted;
@@ -606,34 +683,22 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
         // Attempt to inline the function.
         using namespace ore;
 
-        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
-                                  InlineHistoryID, InsertLifetime, AARGetter,
-                                  ImportedFunctionsStats)) {
+        InlineResult IR = InlineCallIfPossible(
+            CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
+            InsertLifetime, AARGetter, ImportedFunctionsStats);
+        if (!IR) {
+          setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
           ORE.emit([&]() {
             return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
                                             Block)
                    << NV("Callee", Callee) << " will not be inlined into "
-                   << NV("Caller", Caller);
+                   << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
           });
           continue;
         }
         ++NumInlined;
 
-        ORE.emit([&]() {
-          bool AlwaysInline = OIC->isAlways();
-          StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
-          OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
-          R << NV("Callee", Callee) << " inlined into ";
-          R << NV("Caller", Caller);
-          if (AlwaysInline)
-            R << " with cost=always";
-          else {
-            R << " with cost=" << NV("Cost", OIC->getCost());
-            R << " (threshold=" << NV("Threshold", OIC->getThreshold());
-            R << ")";
-          }
-          return R;
-        });
+        emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
 
         // If inlining this function gave us any new call sites, throw them
         // onto our worklist to process.  They are useful inline candidates.
@@ -692,7 +757,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
 bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   ACT = &getAnalysis<AssumptionCacheTracker>();
-  PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
     return ACT->getAssumptionCache(F);
@@ -865,6 +930,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
             Calls.push_back({CS, -1});
           else if (!isa<IntrinsicInst>(I)) {
             using namespace ore;
+            setInlineRemark(CS, "unavailable definition");
             ORE.emit([&]() {
               return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
                      << NV("Callee", Callee) << " will not be inlined into "
@@ -908,8 +974,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     LazyCallGraph::Node &N = *CG.lookup(F);
     if (CG.lookupSCC(N) != C)
       continue;
-    if (F.hasFnAttribute(Attribute::OptimizeNone))
+    if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+      setInlineRemark(Calls[i].first, "optnone attribute");
       continue;
+    }
 
     LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
 
@@ -953,8 +1021,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
       Function &Callee = *CS.getCalledFunction();
 
       if (InlineHistoryID != -1 &&
-          InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
+          InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+        setInlineRemark(CS, "recursive");
         continue;
+      }
 
       // Check if this inlining may repeat breaking an SCC apart that has
       // already been split once before. In that case, inlining here may
@@ -966,13 +1036,23 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
         LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
                              "previously split out of this SCC by inlining: "
                           << F.getName() << " -> " << Callee.getName() << "\n");
+        setInlineRemark(CS, "recursive SCC split");
         continue;
       }
 
       Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
       // Check whether we want to inline this callsite.
-      if (!OIC)
+      if (!OIC.hasValue()) {
+        setInlineRemark(CS, "deferred");
+        continue;
+      }
+
+      if (!OIC.getValue()) {
+        // shouldInline() call returned a negative inline cost that explains
+        // why this callsite should not be inlined.
+        setInlineRemark(CS, inlineCostStr(*OIC));
         continue;
+      }
 
       // Setup the data structure used to plumb customization into the
       // `InlineFunction` routine.
@@ -987,32 +1067,22 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
 
       using namespace ore;
 
-      if (!InlineFunction(CS, IFI)) {
+      InlineResult IR = InlineFunction(CS, IFI);
+      if (!IR) {
+        setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
         ORE.emit([&]() {
           return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
                  << NV("Callee", &Callee) << " will not be inlined into "
-                 << NV("Caller", &F);
+                 << NV("Caller", &F) << ": " << NV("Reason", IR.message);
         });
         continue;
       }
       DidInline = true;
       InlinedCallees.insert(&Callee);
 
-      ORE.emit([&]() {
-        bool AlwaysInline = OIC->isAlways();
-        StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
-        OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
-        R << NV("Callee", &Callee) << " inlined into ";
-        R << NV("Caller", &F);
-        if (AlwaysInline)
-          R << " with cost=always";
-        else {
-          R << " with cost=" << NV("Cost", OIC->getCost());
-          R << " (threshold=" << NV("Threshold", OIC->getThreshold());
-          R << ")";
-        }
-        return R;
-      });
+      ++NumInlined;
+
+      emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
 
       // Add any new callsites to defined functions to the worklist.
       if (!IFI.InlinedCallSites.empty()) {
@@ -1099,10 +1169,19 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     // SCC splits and merges. To avoid this, we capture the originating caller
     // node and the SCC containing the call edge. This is a slight over
     // approximation of the possible inlining decisions that must be avoided,
-    // but is relatively efficient to store.
+    // but is relatively efficient to store. We use C != OldC to know when
+    // a new SCC is generated and the original SCC may be generated via merge
+    // in later iterations.
+    //
+    // It is also possible that even if no new SCC is generated
+    // (i.e., C == OldC), the original SCC could be split and then merged
+    // into the same one as itself. and the original SCC will be added into
+    // UR.CWorklist again, we want to catch such cases too.
+    //
     // FIXME: This seems like a very heavyweight way of retaining the inline
     // history, we should look for a more efficient way of tracking it.
-    if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
+    if ((C != OldC || UR.CWorklist.count(OldC)) &&
+        llvm::any_of(InlinedCallees, [&](Function *Callee) {
           return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
         })) {
       LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
@@ -1138,6 +1217,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
 
     // And delete the actual function from the module.
     M.getFunctionList().erase(DeadF);
+    ++NumDeleted;
   }
 
   if (!Changed)
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 8c86f7cb806a..733235d45a09 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -104,8 +104,8 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
   bool ShouldExtractLoop = false;
 
   // Extract the loop if the entry block doesn't branch to the loop header.
-  TerminatorInst *EntryTI =
-    L->getHeader()->getParent()->getEntryBlock().getTerminator();
+  Instruction *EntryTI =
+      L->getHeader()->getParent()->getEntryBlock().getTerminator();
   if (!isa<BranchInst>(EntryTI) ||
       !cast<BranchInst>(EntryTI)->isUnconditional() ||
       EntryTI->getSuccessor(0) != L->getHeader()) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 4f7571884707..87c65db09517 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -989,6 +989,7 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
     if (F->isDSOLocal()) {
       Function *RealF = Function::Create(F->getFunctionType(),
                                          GlobalValue::ExternalLinkage,
+                                         F->getAddressSpace(),
                                          Name + ".cfi", &M);
       RealF->setVisibility(GlobalVariable::HiddenVisibility);
       replaceDirectCalls(F, RealF);
@@ -1000,13 +1001,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
   if (F->isDeclarationForLinker() && !isDefinition) {
     // Declaration of an external function.
     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
-                             Name + ".cfi_jt", &M);
+                             F->getAddressSpace(), Name + ".cfi_jt", &M);
     FDecl->setVisibility(GlobalValue::HiddenVisibility);
   } else if (isDefinition) {
     F->setName(Name + ".cfi");
     F->setLinkage(GlobalValue::ExternalLinkage);
     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
-                             Name, &M);
+                             F->getAddressSpace(), Name, &M);
     FDecl->setVisibility(Visibility);
     Visibility = GlobalValue::HiddenVisibility;
 
@@ -1016,7 +1017,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
     for (auto &U : F->uses()) {
       if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
         Function *AliasDecl = Function::Create(
-            F->getFunctionType(), GlobalValue::ExternalLinkage, "", &M);
+            F->getFunctionType(), GlobalValue::ExternalLinkage,
+            F->getAddressSpace(), "", &M);
         AliasDecl->takeName(A);
         A->replaceAllUsesWith(AliasDecl);
         ToErase.push_back(A);
@@ -1191,7 +1193,9 @@ void LowerTypeTestsModule::moveInitializerToModuleConstructor(
     WeakInitializerFn = Function::Create(
         FunctionType::get(Type::getVoidTy(M.getContext()),
                           /* IsVarArg */ false),
-        GlobalValue::InternalLinkage, "__cfi_global_var_init", &M);
+        GlobalValue::InternalLinkage,
+        M.getDataLayout().getProgramAddressSpace(),
+        "__cfi_global_var_init", &M);
     BasicBlock *BB =
         BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn);
     ReturnInst::Create(M.getContext(), BB);
@@ -1234,7 +1238,8 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
   // placeholder first.
   Function *PlaceholderFn =
       Function::Create(cast<FunctionType>(F->getValueType()),
-                       GlobalValue::ExternalWeakLinkage, "", &M);
+                       GlobalValue::ExternalWeakLinkage,
+                       F->getAddressSpace(), "", &M);
   replaceCfiUses(F, PlaceholderFn, IsDefinition);
 
   Constant *Target = ConstantExpr::getSelect(
@@ -1424,7 +1429,9 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
   Function *JumpTableFn =
       Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()),
                                          /* IsVarArg */ false),
-                       GlobalValue::PrivateLinkage, ".cfi.jumptable", &M);
+                       GlobalValue::PrivateLinkage,
+                       M.getDataLayout().getProgramAddressSpace(),
+                       ".cfi.jumptable", &M);
   ArrayType *JumpTableType =
       ArrayType::get(getJumpTableEntryType(), Functions.size());
   auto JumpTable =
@@ -1695,6 +1702,13 @@ bool LowerTypeTestsModule::lower() {
       !ExportSummary && !ImportSummary)
     return false;
 
+  // If only some of the modules were split, we cannot correctly handle
+  // code that contains type tests.
+  if (TypeTestFunc && !TypeTestFunc->use_empty() &&
+      ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+       (ImportSummary && ImportSummary->partiallySplitLTOUnits())))
+    report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test");
+
   if (ImportSummary) {
     if (TypeTestFunc) {
       for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
@@ -1813,7 +1827,8 @@ bool LowerTypeTestsModule::lower() {
         if (!F)
           F = Function::Create(
               FunctionType::get(Type::getVoidTy(M.getContext()), false),
-              GlobalVariable::ExternalLinkage, FunctionName, &M);
+              GlobalVariable::ExternalLinkage,
+              M.getDataLayout().getProgramAddressSpace(), FunctionName, &M);
 
         // If the function is available_externally, remove its definition so
         // that it is handled the same way as a declaration. Later we will try
@@ -1997,7 +2012,7 @@ bool LowerTypeTestsModule::lower() {
     }
     Sets.emplace_back(I, MaxUniqueId);
   }
-  llvm::sort(Sets.begin(), Sets.end(),
+  llvm::sort(Sets,
              [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
                 const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
                return S1.second < S2.second;
@@ -2022,12 +2037,12 @@ bool LowerTypeTestsModule::lower() {
 
     // Order type identifiers by unique ID for determinism. This ordering is
     // stable as there is a one-to-one mapping between metadata and unique IDs.
-    llvm::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) {
+    llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) {
       return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId;
     });
 
     // Same for the branch funnels.
-    llvm::sort(ICallBranchFunnels.begin(), ICallBranchFunnels.end(),
+    llvm::sort(ICallBranchFunnels,
                [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) {
                  return F1->UniqueId < F2->UniqueId;
                });
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 3bebb96c6d35..11efe95b10d4 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -136,6 +136,7 @@ using namespace llvm;
 
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
 STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
 STATISTIC(NumDoubleWeak, "Number of new functions created");
 
 static cl::opt<unsigned> NumFunctionsForSanityCheck(
@@ -165,6 +166,11 @@ static cl::opt<bool>
                       cl::desc("Preserve debug info in thunk when mergefunc "
                                "transformations are made."));
 
+static cl::opt<bool>
+    MergeFunctionsAliases("mergefunc-use-aliases", cl::Hidden,
+                          cl::init(false),
+                          cl::desc("Allow mergefunc to create aliases"));
+
 namespace {
 
 class FunctionNode {
@@ -272,6 +278,13 @@ private:
   /// delete G.
   void writeThunk(Function *F, Function *G);
 
+  // Replace G with an alias to F (deleting function G)
+  void writeAlias(Function *F, Function *G);
+
+  // Replace G with an alias to F if possible, or a thunk to F if
+  // profitable. Returns false if neither is the case.
+  bool writeThunkOrAlias(Function *F, Function *G);
+
   /// Replace function F with function G in the function tree.
   void replaceFunctionInTree(const FunctionNode &FN, Function *G);
 
@@ -284,7 +297,7 @@ private:
   // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
   // dangling iterators into FnTree. The invariant that preserves this is that
   // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
-  ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
+  DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
 };
 
 } // end anonymous namespace
@@ -425,6 +438,7 @@ bool MergeFunctions::runOnModule(Module &M) {
   } while (!Deferred.empty());
 
   FnTree.clear();
+  FNodesInTree.clear();
   GlobalNumbers.clear();
 
   return Changed;
@@ -460,7 +474,7 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
                                           NewPAL.getRetAttributes(),
                                           NewArgAttrs));
 
-      remove(CS.getInstruction()->getParent()->getParent());
+      remove(CS.getInstruction()->getFunction());
       U->set(BitcastNew);
     }
   }
@@ -608,7 +622,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
         LLVM_DEBUG(BI->print(dbgs()));
         LLVM_DEBUG(dbgs() << "\n");
       }
-    } else if (dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()) {
+    } else if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
       LLVM_DEBUG(dbgs() << " Will Include Terminator: ");
       LLVM_DEBUG(BI->print(dbgs()));
       LLVM_DEBUG(dbgs() << "\n");
@@ -679,8 +693,8 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
     GEntryBlock->getTerminator()->eraseFromParent();
     BB = GEntryBlock;
   } else {
-    NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
-                            G->getParent());
+    NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
+                            G->getAddressSpace(), "", G->getParent());
     BB = BasicBlock::Create(F->getContext(), "", NewG);
   }
 
@@ -734,27 +748,76 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   ++NumThunksWritten;
 }
 
+// Whether this function may be replaced by an alias
+static bool canCreateAliasFor(Function *F) {
+  if (!MergeFunctionsAliases || !F->hasGlobalUnnamedAddr())
+    return false;
+
+  // We should only see linkages supported by aliases here
+  assert(F->hasLocalLinkage() || F->hasExternalLinkage()
+      || F->hasWeakLinkage() || F->hasLinkOnceLinkage());
+  return true;
+}
+
+// Replace G with an alias to F (deleting function G)
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+  Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+  PointerType *PtrType = G->getType();
+  auto *GA = GlobalAlias::create(
+      PtrType->getElementType(), PtrType->getAddressSpace(),
+      G->getLinkage(), "", BitcastF, G->getParent());
+
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+  removeUsers(G);
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+
+  LLVM_DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+  ++NumAliasesWritten;
+}
+
+// Replace G with an alias to F if possible, or a thunk to F if
+// profitable. Returns false if neither is the case.
+bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+  if (canCreateAliasFor(G)) {
+    writeAlias(F, G);
+    return true;
+  }
+  if (isThunkProfitable(F)) {
+    writeThunk(F, G);
+    return true;
+  }
+  return false;
+}
+
 // Merge two equivalent functions. Upon completion, Function G is deleted.
 void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
   if (F->isInterposable()) {
     assert(G->isInterposable());
 
-    if (!isThunkProfitable(F)) {
+    // Both writeThunkOrAlias() calls below must succeed, either because we can
+    // create aliases for G and NewF, or because a thunk for F is profitable.
+    // F here has the same signature as NewF below, so that's what we check.
+    if (!isThunkProfitable(F) && (!canCreateAliasFor(F) || !canCreateAliasFor(G))) {
       return;
     }
 
     // Make them both thunks to the same internal function.
-    Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
-                                   F->getParent());
-    H->copyAttributesFrom(F);
-    H->takeName(F);
+    Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
+                                      F->getAddressSpace(), "", F->getParent());
+    NewF->copyAttributesFrom(F);
+    NewF->takeName(F);
     removeUsers(F);
-    F->replaceAllUsesWith(H);
+    F->replaceAllUsesWith(NewF);
 
-    unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+    unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment());
 
-    writeThunk(F, G);
-    writeThunk(F, H);
+    writeThunkOrAlias(F, G);
+    writeThunkOrAlias(F, NewF);
 
     F->setAlignment(MaxAlignment);
     F->setLinkage(GlobalValue::PrivateLinkage);
@@ -770,6 +833,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
         GlobalNumbers.erase(G);
         // If G's address is not significant, replace it entirely.
         Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+        removeUsers(G);
         G->replaceAllUsesWith(BitcastF);
       } else {
         // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
@@ -781,18 +845,15 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
     // If G was internal then we may have replaced all uses of G with F. If so,
     // stop here and delete G. There's no need for a thunk. (See note on
     // MergeFunctionsPDI above).
-    if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
+    if (G->isDiscardableIfUnused() && G->use_empty() && !MergeFunctionsPDI) {
       G->eraseFromParent();
       ++NumFunctionsMerged;
       return;
     }
 
-    if (!isThunkProfitable(F)) {
-      return;
+    if (writeThunkOrAlias(F, G)) {
+      ++NumFunctionsMerged;
     }
-
-    writeThunk(F, G);
-    ++NumFunctionsMerged;
   }
 }
 
@@ -816,6 +877,24 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
   FN.replaceBy(G);
 }
 
+// Ordering for functions that are equal under FunctionComparator
+static bool isFuncOrderCorrect(const Function *F, const Function *G) {
+  if (F->isInterposable() != G->isInterposable()) {
+    // Strong before weak, because the weak function may call the strong
+    // one, but not the other way around.
+    return !F->isInterposable();
+  }
+  if (F->hasLocalLinkage() != G->hasLocalLinkage()) {
+    // External before local, because we definitely have to keep the external
+    // function, but may be able to drop the local one.
+    return !F->hasLocalLinkage();
+  }
+  // Impose a total order (by name) on the replacement of functions. This is
+  // important when operating on more than one module independently to prevent
+  // cycles of thunks calling each other when the modules are linked together.
+  return F->getName() <= G->getName();
+}
+
 // Insert a ComparableFunction into the FnTree, or merge it away if equal to one
 // that was already inserted.
 bool MergeFunctions::insert(Function *NewFunction) {
@@ -832,14 +911,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
 
   const FunctionNode &OldF = *Result.first;
 
-  // Impose a total order (by name) on the replacement of functions. This is
-  // important when operating on more than one module independently to prevent
-  // cycles of thunks calling each other when the modules are linked together.
-  //
-  // First of all, we process strong functions before weak functions.
-  if ((OldF.getFunc()->isInterposable() && !NewFunction->isInterposable()) ||
-     (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() &&
-       OldF.getFunc()->getName() > NewFunction->getName())) {
+  if (!isFuncOrderCorrect(OldF.getFunc(), NewFunction)) {
     // Swap the two functions.
     Function *F = OldF.getFunc();
     replaceFunctionInTree(*Result.first, NewFunction);
@@ -882,7 +954,7 @@ void MergeFunctions::removeUsers(Value *V) {
 
     for (User *U : V->users()) {
       if (Instruction *I = dyn_cast<Instruction>(U)) {
-        remove(I->getParent()->getParent());
+        remove(I->getFunction());
       } else if (isa<GlobalValue>(U)) {
         // do nothing
       } else if (Constant *C = dyn_cast<Constant>(U)) {
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 4907e4b30519..da214a1d3b44 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -359,7 +359,7 @@ struct PartialInlinerLegacyPass : public ModulePass {
     TargetTransformInfoWrapperPass *TTIWP =
         &getAnalysis<TargetTransformInfoWrapperPass>();
     ProfileSummaryInfo *PSI =
-        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+        &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&ACT](Function &F) -> AssumptionCache & {
@@ -403,7 +403,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
 
   auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
     BasicBlock *Dom = BlockList.front();
-    return BlockList.size() > 1 && pred_size(Dom) == 1;
+    return BlockList.size() > 1 && Dom->hasNPredecessors(1);
   };
 
   auto IsSingleExit =
@@ -468,7 +468,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
     // Only consider regions with predecessor blocks that are considered
     // not-cold (default: part of the top 99.99% of all block counters)
     // AND greater than our minimum block execution count (default: 100).
-    if (PSI->isColdBB(thisBB, BFI) ||
+    if (PSI->isColdBlock(thisBB, BFI) ||
         BBProfileCount(thisBB) < MinBlockCounterExecution)
       continue;
     for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
@@ -556,7 +556,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
   };
 
   auto IsReturnBlock = [](BasicBlock *BB) {
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     return isa<ReturnInst>(TI);
   };
 
@@ -834,42 +834,37 @@ bool PartialInlinerImpl::shouldPartialInline(
 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
   int InlineCost = 0;
   const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    switch (I->getOpcode()) {
+  for (Instruction &I : BB->instructionsWithoutDebug()) {
+    // Skip free instructions.
+    switch (I.getOpcode()) {
     case Instruction::BitCast:
     case Instruction::PtrToInt:
     case Instruction::IntToPtr:
     case Instruction::Alloca:
+    case Instruction::PHI:
       continue;
     case Instruction::GetElementPtr:
-      if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+      if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
         continue;
       break;
     default:
       break;
     }
 
-    IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
-    if (IntrInst) {
-      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
-          IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
-        continue;
-    }
+    if (I.isLifetimeStartOrEnd())
+      continue;
 
-    if (CallInst *CI = dyn_cast<CallInst>(I)) {
+    if (CallInst *CI = dyn_cast<CallInst>(&I)) {
       InlineCost += getCallsiteCost(CallSite(CI), DL);
       continue;
     }
 
-    if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
       InlineCost += getCallsiteCost(CallSite(II), DL);
       continue;
     }
 
-    if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
       InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
       continue;
     }
@@ -1251,7 +1246,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
   if (PSI->isFunctionEntryCold(F))
     return {false, nullptr};
 
-  if (F->user_begin() == F->user_end())
+  if (empty(F->users()))
     return {false, nullptr};
 
   OptimizationRemarkEmitter ORE(F);
@@ -1357,7 +1352,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
     return false;
   }
 
-  assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
+  assert(empty(Cloner.OrigFunc->users()) &&
          "F's users should all be replaced!");
 
   std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
@@ -1461,9 +1456,7 @@ bool PartialInlinerImpl::run(Module &M) {
     std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
     if (Result.second)
       Worklist.push_back(Result.second);
-    if (Result.first) {
-      Changed = true;
-    }
+    Changed |= Result.first;
   }
 
   return Changed;
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 5ced6481996a..9764944dc332 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -104,6 +104,10 @@ static cl::opt<bool>
     EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
                             cl::desc("Enable preparation for ThinLTO."));
 
+cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
+    cl::desc("Enable hot-cold splitting pass"));
+
+
 static cl::opt<bool> RunPGOInstrGen(
     "profile-generate", cl::init(false), cl::Hidden,
     cl::desc("Enable PGO instrumentation."));
@@ -152,6 +156,10 @@ static cl::opt<bool> EnableGVNSink(
     "enable-gvn-sink", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN sinking pass (default = off)"));
 
+static cl::opt<bool>
+    EnableCHR("enable-chr", cl::init(true), cl::Hidden,
+              cl::desc("Enable control height reduction optimization (CHR)"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -367,13 +375,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   addExtensionsToPM(EP_LateLoopOptimizations, MPM);
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
-  if (EnableLoopInterchange) {
-    // FIXME: These are function passes and break the loop pass pipeline.
+  if (EnableLoopInterchange)
     MPM.add(createLoopInterchangePass()); // Interchange loops
-    MPM.add(createCFGSimplificationPass());
-  }
-  if (!DisableUnrollLoops)
-    MPM.add(createSimpleLoopUnrollPass(OptLevel));    // Unroll small loops
+
+  MPM.add(createSimpleLoopUnrollPass(OptLevel,
+                                     DisableUnrollLoops)); // Unroll small loops
   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
   // This ends the loop pass pipelines.
 
@@ -411,6 +417,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   // Clean up after everything.
   addInstructionCombiningPass(MPM);
   addExtensionsToPM(EP_Peephole, MPM);
+
+  if (EnableCHR && OptLevel >= 3 &&
+      (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+    MPM.add(createControlHeightReductionLegacyPass());
 }
 
 void PassManagerBuilder::populateModulePassManager(
@@ -452,12 +462,14 @@ void PassManagerBuilder::populateModulePassManager(
 
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
 
-    // Rename anon globals to be able to export them in the summary.
-    // This has to be done after we add the extensions to the pass manager
-    // as there could be passes (e.g. Adddress sanitizer) which introduce
-    // new unnamed globals.
-    if (PrepareForLTO || PrepareForThinLTO)
+    if (PrepareForLTO || PrepareForThinLTO) {
+      MPM.add(createCanonicalizeAliasesPass());
+      // Rename anon globals to be able to export them in the summary.
+      // This has to be done after we add the extensions to the pass manager
+      // as there could be passes (e.g. Adddress sanitizer) which introduce
+      // new unnamed globals.
       MPM.add(createNameAnonGlobalPass());
+    }
     return;
   }
 
@@ -575,6 +587,7 @@ void PassManagerBuilder::populateModulePassManager(
     // Ensure we perform any last passes, but do so before renaming anonymous
     // globals in case the passes add any.
     addExtensionsToPM(EP_OptimizerLast, MPM);
+    MPM.add(createCanonicalizeAliasesPass());
     // Rename anon globals to be able to export them in the summary.
     MPM.add(createNameAnonGlobalPass());
     return;
@@ -627,7 +640,7 @@ void PassManagerBuilder::populateModulePassManager(
   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
   MPM.add(createLoopDistributePass());
 
-  MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+  MPM.add(createLoopVectorizePass(DisableUnrollLoops, !LoopVectorize));
 
   // Eliminate loads by forwarding stores from the previous iteration to loads
   // of the current iteration.
@@ -672,16 +685,17 @@ void PassManagerBuilder::populateModulePassManager(
   addExtensionsToPM(EP_Peephole, MPM);
   addInstructionCombiningPass(MPM);
 
-  if (!DisableUnrollLoops) {
-    if (EnableUnrollAndJam) {
-      // Unroll and Jam. We do this before unroll but need to be in a separate
-      // loop pass manager in order for the outer loop to be processed by
-      // unroll and jam before the inner loop is unrolled.
-      MPM.add(createLoopUnrollAndJamPass(OptLevel));
-    }
+  if (EnableUnrollAndJam && !DisableUnrollLoops) {
+    // Unroll and Jam. We do this before unroll but need to be in a separate
+    // loop pass manager in order for the outer loop to be processed by
+    // unroll and jam before the inner loop is unrolled.
+    MPM.add(createLoopUnrollAndJamPass(OptLevel));
+  }
 
-    MPM.add(createLoopUnrollPass(OptLevel));    // Unroll small loops
+  MPM.add(createLoopUnrollPass(OptLevel,
+                               DisableUnrollLoops)); // Unroll small loops
 
+  if (!DisableUnrollLoops) {
     // LoopUnroll may generate some redundency to cleanup.
     addInstructionCombiningPass(MPM);
 
@@ -690,7 +704,9 @@ void PassManagerBuilder::populateModulePassManager(
     // outer loop. LICM pass can help to promote the runtime check out if the
     // checked value is loop invariant.
     MPM.add(createLICMPass());
- }
+  }
+
+  MPM.add(createWarnMissedTransformationsPass());
 
   // After vectorization and unrolling, assume intrinsics may tell us more
   // about pointer alignments.
@@ -722,18 +738,29 @@ void PassManagerBuilder::populateModulePassManager(
   // flattening of blocks.
   MPM.add(createDivRemPairsPass());
 
+  if (EnableHotColdSplit)
+    MPM.add(createHotColdSplittingPass());
+
   // LoopSink (and other loop passes since the last simplifyCFG) might have
   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
   MPM.add(createCFGSimplificationPass());
 
   addExtensionsToPM(EP_OptimizerLast, MPM);
 
-  // Rename anon globals to be able to handle them in the summary
-  if (PrepareForLTO)
+  if (PrepareForLTO) {
+    MPM.add(createCanonicalizeAliasesPass());
+    // Rename anon globals to be able to handle them in the summary
     MPM.add(createNameAnonGlobalPass());
+  }
 }
 
 void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
+  // Load sample profile before running the LTO optimization pipeline.
+  if (!PGOSampleUse.empty()) {
+    PM.add(createPruneEHPass());
+    PM.add(createSampleProfileLoaderPass(PGOSampleUse));
+  }
+
   // Remove unused virtual tables to improve the quality of code generated by
   // whole-program devirtualization and bitset lowering.
   PM.add(createGlobalDCEPass());
@@ -851,12 +878,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   if (EnableLoopInterchange)
     PM.add(createLoopInterchangePass());
 
-  if (!DisableUnrollLoops)
-    PM.add(createSimpleLoopUnrollPass(OptLevel));   // Unroll small loops
-  PM.add(createLoopVectorizePass(true, LoopVectorize));
+  PM.add(createSimpleLoopUnrollPass(OptLevel,
+                                    DisableUnrollLoops)); // Unroll small loops
+  PM.add(createLoopVectorizePass(true, !LoopVectorize));
   // The vectorizer may have significantly shortened a loop body; unroll again.
-  if (!DisableUnrollLoops)
-    PM.add(createLoopUnrollPass(OptLevel));
+  PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops));
+
+  PM.add(createWarnMissedTransformationsPass());
 
   // Now that we've optimized loops (in particular loop induction variables),
   // we may have exposed more scalar opportunities. Run parts of the scalar
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 2be654258aa8..ae586c017471 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -107,7 +107,7 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
         continue;
 
       for (const BasicBlock &BB : *F) {
-        const TerminatorInst *TI = BB.getTerminator();
+        const Instruction *TI = BB.getTerminator();
         if (CheckUnwind && TI->mayThrow()) {
           SCCMightUnwind = true;
         } else if (CheckReturn && isa<ReturnInst>(TI)) {
@@ -255,7 +255,7 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
   }
 
   if (TokenInst) {
-    if (!isa<TerminatorInst>(TokenInst))
+    if (!TokenInst->isTerminator())
       changeToUnreachable(TokenInst->getNextNode(), /*UseLLVMTrap=*/false);
   } else {
     // Get the list of successors of this block.
diff --git a/contrib/llvm/lib/Transforms/IPO/SCCP.cpp b/contrib/llvm/lib/Transforms/IPO/SCCP.cpp
index cc53c4b8c46f..d2c34abfc132 100644
--- a/contrib/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -1,4 +1,6 @@
 #include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar/SCCP.h"
@@ -8,9 +10,22 @@ using namespace llvm;
 PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
   const DataLayout &DL = M.getDataLayout();
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
-  if (!runIPSCCP(M, DL, &TLI))
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn {
+    DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+    return {
+        make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
+        &DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F)};
+  };
+
+  if (!runIPSCCP(M, DL, &TLI, getAnalysis))
     return PreservedAnalyses::all();
-  return PreservedAnalyses::none();
+
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<PostDominatorTreeAnalysis>();
+  PA.preserve<FunctionAnalysisManagerModuleProxy>();
+  return PA;
 }
 
 namespace {
@@ -34,10 +49,25 @@ public:
     const DataLayout &DL = M.getDataLayout();
     const TargetLibraryInfo *TLI =
         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-    return runIPSCCP(M, DL, TLI);
+
+    auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn {
+      DominatorTree &DT =
+          this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+      return {
+          make_unique<PredicateInfo>(
+              F, DT,
+              this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+                  F)),
+          nullptr,  // We cannot preserve the DT or PDT with the legacy pass
+          nullptr}; // manager, so set them to nullptr.
+    };
+
+    return runIPSCCP(M, DL, TLI, getAnalysis);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 };
@@ -49,6 +79,7 @@ char IPSCCPLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
                       "Interprocedural Sparse Conditional Constant Propagation",
                       false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
                     "Interprocedural Sparse Conditional Constant Propagation",
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index dcd24595f7ea..9f123c2b875e 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -96,6 +96,13 @@ static cl::opt<std::string> SampleProfileFile(
     "sample-profile-file", cl::init(""), cl::value_desc("filename"),
     cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
 
+// The named file contains a set of transformations that may have been applied
+// to the symbol names between the program from which the sample data was
+// collected and the current program's symbols.
+static cl::opt<std::string> SampleProfileRemappingFile(
+    "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
+    cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
+
 static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
     "sample-profile-max-propagate-iterations", cl::init(100),
     cl::desc("Maximum number of iterations to go through when propagating "
@@ -116,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
     cl::desc("Use this option to turn off/on warnings about function with "
              "samples but without debug information to use those samples. "));
 
+static cl::opt<bool> ProfileSampleAccurate(
+    "profile-sample-accurate", cl::Hidden, cl::init(false),
+    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+             "callsite and function as having 0 samples. Otherwise, treat "
+             "un-sampled callsites and functions conservatively as unknown. "));
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -183,12 +196,12 @@ private:
 class SampleProfileLoader {
 public:
   SampleProfileLoader(
-      StringRef Name, bool IsThinLTOPreLink,
+      StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
       std::function<AssumptionCache &(Function &)> GetAssumptionCache,
       std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
       : GetAC(std::move(GetAssumptionCache)),
         GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
-        IsThinLTOPreLink(IsThinLTOPreLink) {}
+        RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
 
   bool doInitialization(Module &M);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -205,6 +218,7 @@ protected:
   const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
   std::vector<const FunctionSamples *>
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
+  mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
   bool inlineCallInstruction(Instruction *I);
   bool inlineHotFunctions(Function &F,
@@ -282,6 +296,9 @@ protected:
   /// Name of the profile file to load.
   std::string Filename;
 
+  /// Name of the profile remapping file to load.
+  std::string RemappingFilename;
+
   /// Flag indicating whether the profile input loaded successfully.
   bool ProfileIsValid = false;
 
@@ -311,13 +328,14 @@ public:
 
   SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
                                 bool IsThinLTOPreLink = false)
-      : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink,
-                                     [&](Function &F) -> AssumptionCache & {
-                                       return ACT->getAssumptionCache(F);
-                                     },
-                                     [&](Function &F) -> TargetTransformInfo & {
-                                       return TTIWP->getTTI(F);
-                                     }) {
+      : ModulePass(ID),
+        SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
+                     [&](Function &F) -> AssumptionCache & {
+                       return ACT->getAssumptionCache(F);
+                     },
+                     [&](Function &F) -> TargetTransformInfo & {
+                       return TTIWP->getTTI(F);
+                     }) {
     initializeSampleProfileLoaderLegacyPassPass(
         *PassRegistry::getPassRegistry());
   }
@@ -527,10 +545,10 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
   if (!FS)
     return std::error_code();
 
-  // Ignore all intrinsics and branch instructions.
-  // Branch instruction usually contains debug info from sources outside of
+  // Ignore all intrinsics, phinodes and branch instructions.
+  // Branch and phinodes instruction usually contains debug info from sources outside of
   // the residing basic block, thus we ignore them during annotation.
-  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
+  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
     return std::error_code();
 
   // If a direct call/invoke instruction is inlined in profile
@@ -643,8 +661,6 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
   if (FS == nullptr)
     return nullptr;
 
-  std::string CalleeGUID;
-  CalleeName = getRepInFormat(CalleeName, Reader->getFormat(), CalleeGUID);
   return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL),
                                                 DIL->getBaseDiscriminator()),
                                    CalleeName);
@@ -683,10 +699,12 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
       Sum += NameFS.second.getEntrySamples();
       R.push_back(&NameFS.second);
     }
-    llvm::sort(R.begin(), R.end(),
-               [](const FunctionSamples *L, const FunctionSamples *R) {
-                 return L->getEntrySamples() > R->getEntrySamples();
-               });
+    llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
+      if (L->getEntrySamples() != R->getEntrySamples())
+        return L->getEntrySamples() > R->getEntrySamples();
+      return FunctionSamples::getGUID(L->getName()) <
+             FunctionSamples::getGUID(R->getName());
+    });
   }
   return R;
 }
@@ -702,12 +720,14 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
 /// \returns the FunctionSamples pointer to the inlined instance.
 const FunctionSamples *
 SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
-  SmallVector<std::pair<LineLocation, StringRef>, 10> S;
   const DILocation *DIL = Inst.getDebugLoc();
   if (!DIL)
     return Samples;
 
-  return Samples->findFunctionSamples(DIL);
+  auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
+  if (it.second)
+    it.first->second = Samples->findFunctionSamples(DIL);
+  return it.first->second;
 }
 
 bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
@@ -760,7 +780,6 @@ bool SampleProfileLoader::inlineHotFunctions(
     Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
   DenseSet<Instruction *> PromotedInsns;
   bool Changed = false;
-  bool isCompact = (Reader->getFormat() == SPF_Compact_Binary);
   while (true) {
     bool LocalChanged = false;
     SmallVector<Instruction *, 10> CIS;
@@ -792,19 +811,16 @@ bool SampleProfileLoader::inlineHotFunctions(
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
           if (IsThinLTOPreLink) {
             FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
-                                     PSI->getOrCompHotCountThreshold(),
-                                     isCompact);
+                                     PSI->getOrCompHotCountThreshold());
             continue;
           }
-          auto CalleeFunctionName = FS->getName();
+          auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
           // If it is a recursive call, we do not inline it as it could bloat
           // the code exponentially. There is way to better handle this, e.g.
           // clone the caller first, and inline the cloned caller if it is
           // recursive. As llvm does not inline recursive calls, we will
           // simply ignore it instead of handling it explicitly.
-          std::string FGUID;
-          auto Fname = getRepInFormat(F.getName(), Reader->getFormat(), FGUID);
-          if (CalleeFunctionName == Fname)
+          if (CalleeFunctionName == F.getName())
             continue;
 
           const char *Reason = "Callee function not available";
@@ -834,8 +850,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           LocalChanged = true;
       } else if (IsThinLTOPreLink) {
         findCalleeFunctionSamples(*I)->findInlinedFunctions(
-            InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold(),
-            isCompact);
+            InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
       }
     }
     if (LocalChanged) {
@@ -1177,14 +1192,13 @@ static SmallVector<InstrProfValueData, 2> SortCallTargets(
     const SampleRecord::CallTargetMap &M) {
   SmallVector<InstrProfValueData, 2> R;
   for (auto I = M.begin(); I != M.end(); ++I)
-    R.push_back({Function::getGUID(I->getKey()), I->getValue()});
-  llvm::sort(R.begin(), R.end(),
-             [](const InstrProfValueData &L, const InstrProfValueData &R) {
-               if (L.Count == R.Count)
-                 return L.Value > R.Value;
-               else
-                 return L.Count > R.Count;
-             });
+    R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
+  llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
+    if (L.Count == R.Count)
+      return L.Value > R.Value;
+    else
+      return L.Count > R.Count;
+  });
   return R;
 }
 
@@ -1292,7 +1306,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
         }
       }
     }
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     if (TI->getNumSuccessors() == 1)
       continue;
     if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
@@ -1519,12 +1533,28 @@ bool SampleProfileLoader::doInitialization(Module &M) {
     return false;
   }
   Reader = std::move(ReaderOrErr.get());
+  Reader->collectFuncsToUse(M);
   ProfileIsValid = (Reader->read() == sampleprof_error::success);
+
+  if (!RemappingFilename.empty()) {
+    // Apply profile remappings to the loaded profile data if requested.
+    // For now, we only support remapping symbols encoded using the Itanium
+    // C++ ABI's name mangling scheme.
+    ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
+        RemappingFilename, Ctx, std::move(Reader));
+    if (std::error_code EC = ReaderOrErr.getError()) {
+      std::string Msg = "Could not open profile remapping file: " + EC.message();
+      Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+      return false;
+    }
+    Reader = std::move(ReaderOrErr.get());
+    ProfileIsValid = (Reader->read() == sampleprof_error::success);
+  }
   return true;
 }
 
 ModulePass *llvm::createSampleProfileLoaderPass() {
-  return new SampleProfileLoaderLegacyPass(SampleProfileFile);
+  return new SampleProfileLoaderLegacyPass();
 }
 
 ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
@@ -1533,6 +1563,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
 
 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
                                       ProfileSummaryInfo *_PSI) {
+  FunctionSamples::GUIDToFuncNameMapper Mapper(M);
   if (!ProfileIsValid)
     return false;
 
@@ -1577,15 +1608,25 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
   ACT = &getAnalysis<AssumptionCacheTracker>();
   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
   ProfileSummaryInfo *PSI =
-      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   return SampleLoader.runOnModule(M, nullptr, PSI);
 }
 
 bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
-  // Initialize the entry count to -1, which will be treated conservatively
-  // by getEntryCount as the same as unknown (None). If we have samples this
-  // will be overwritten in emitAnnotations.
-  F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
+  
+  DILocation2SampleMap.clear();
+  // By default the entry count is initialized to -1, which will be treated
+  // conservatively by getEntryCount as the same as unknown (None). This is
+  // to avoid newly added code to be treated as cold. If we have samples
+  // this will be overwritten in emitAnnotations.
+  // If ProfileSampleAccurate is true or F has profile-sample-accurate
+  // attribute, initialize the entry count to 0 so callsites or functions
+  // unsampled will be treated as cold.
+  uint64_t initialEntryCount =
+      (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
+          ? 0
+          : -1;
+  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
   std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
   if (AM) {
     auto &FAM =
@@ -1616,6 +1657,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
 
   SampleProfileLoader SampleLoader(
       ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
+      ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
+                                       : ProfileRemappingFileName,
       IsThinLTOPreLink, GetAssumptionCache, GetTTI);
 
   SampleLoader.doInitialization(M);
diff --git a/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 3c5ad37bced1..ba4efb3ff60d 100644
--- a/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/SyntheticCountsUtils.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
@@ -46,7 +47,7 @@ using ProfileCount = Function::ProfileCount;
 #define DEBUG_TYPE "synthetic-counts-propagation"
 
 /// Initial synthetic count assigned to functions.
-static cl::opt<int>
+cl::opt<int>
     InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
                           cl::ZeroOrMore,
                           cl::desc("Initial value of synthetic entry count."));
@@ -98,13 +99,15 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
                                                   ModuleAnalysisManager &MAM) {
   FunctionAnalysisManager &FAM =
       MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-  DenseMap<Function *, uint64_t> Counts;
+  DenseMap<Function *, Scaled64> Counts;
   // Set initial entry counts.
-  initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; });
+  initializeCounts(
+      M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
 
-  // Compute the relative block frequency for a call edge. Use scaled numbers
-  // and not integers since the relative block frequency could be less than 1.
-  auto GetCallSiteRelFreq = [&](const CallGraphNode::CallRecord &Edge) {
+  // Edge includes information about the source. Hence ignore the first
+  // parameter.
+  auto GetCallSiteProfCount = [&](const CallGraphNode *,
+                                  const CallGraphNode::CallRecord &Edge) {
     Optional<Scaled64> Res = None;
     if (!Edge.first)
       return Res;
@@ -112,29 +115,33 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
     CallSite CS(cast<Instruction>(Edge.first));
     Function *Caller = CS.getCaller();
     auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
+
+    // Now compute the callsite count from relative frequency and
+    // entry count:
     BasicBlock *CSBB = CS.getInstruction()->getParent();
     Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
-    Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0);
-    BBFreq /= EntryFreq;
-    return Optional<Scaled64>(BBFreq);
+    Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
+    BBCount /= EntryFreq;
+    BBCount *= Counts[Caller];
+    return Optional<Scaled64>(BBCount);
   };
 
   CallGraph CG(M);
   // Propgate the entry counts on the callgraph.
   SyntheticCountsUtils<const CallGraph *>::propagate(
-      &CG, GetCallSiteRelFreq,
-      [&](const CallGraphNode *N) { return Counts[N->getFunction()]; },
-      [&](const CallGraphNode *N, uint64_t New) {
+      &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
         auto F = N->getFunction();
         if (!F || F->isDeclaration())
           return;
+
         Counts[F] += New;
       });
 
   // Set the counts as metadata.
-  for (auto Entry : Counts)
-    Entry.first->setEntryCount(
-        ProfileCount(Entry.second, Function::PCT_Synthetic));
+  for (auto Entry : Counts) {
+    Entry.first->setEntryCount(ProfileCount(
+        Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
+  }
 
   return PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 8fe7ae1282cc..510ecb516dc2 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -154,7 +154,8 @@ void simplifyExternals(Module &M) {
       continue;
 
     Function *NewF =
-        Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
+        Function::Create(EmptyFT, GlobalValue::ExternalLinkage,
+                         F.getAddressSpace(), "", &M);
     NewF->setVisibility(F.getVisibility());
     NewF->takeName(&F);
     F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
@@ -237,7 +238,7 @@ void splitAndWriteThinLTOBitcode(
   // sound because the virtual constant propagation optimizations effectively
   // inline all implementations of the virtual function into each call site,
   // rather than using function attributes to perform local optimization.
-  std::set<const Function *> EligibleVirtualFns;
+  DenseSet<const Function *> EligibleVirtualFns;
   // If any member of a comdat lives in MergedM, put all members of that
   // comdat in MergedM to keep the comdat together.
   DenseSet<const Comdat *> MergedMComdats;
@@ -417,8 +418,18 @@ void splitAndWriteThinLTOBitcode(
   }
 }
 
-// Returns whether this module needs to be split because it uses type metadata.
+// Returns whether this module needs to be split because splitting is
+// enabled and it uses type metadata.
 bool requiresSplit(Module &M) {
+  // First check if the LTO Unit splitting has been enabled.
+  bool EnableSplitLTOUnit = false;
+  if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+          M.getModuleFlag("EnableSplitLTOUnit")))
+    EnableSplitLTOUnit = MD->getZExtValue();
+  if (!EnableSplitLTOUnit)
+    return false;
+
+  // Module only needs to be split if it contains type metadata.
   for (auto &GO : M.global_objects()) {
     if (GO.hasMetadata(LLVMContext::MD_type))
       return true;
@@ -430,7 +441,7 @@ bool requiresSplit(Module &M) {
 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
                          function_ref<AAResults &(Function &)> AARGetter,
                          Module &M, const ModuleSummaryIndex *Index) {
-  // See if this module has any type metadata. If so, we need to split it.
+  // Split module if splitting is enabled and it contains any type metadata.
   if (requiresSplit(M))
     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
 
diff --git a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index d65da2504db4..48bd0cda759d 100644
--- a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -58,6 +58,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -406,6 +407,7 @@ void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
 struct DevirtModule {
   Module &M;
   function_ref<AAResults &(Function &)> AARGetter;
+  function_ref<DominatorTree &(Function &)> LookupDomTree;
 
   ModuleSummaryIndex *ExportSummary;
   const ModuleSummaryIndex *ImportSummary;
@@ -433,10 +435,12 @@ struct DevirtModule {
 
   DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
                function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+               function_ref<DominatorTree &(Function &)> LookupDomTree,
                ModuleSummaryIndex *ExportSummary,
                const ModuleSummaryIndex *ImportSummary)
-      : M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
-        ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())),
+      : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
+        ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+        Int8Ty(Type::getInt8Ty(M.getContext())),
         Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
         Int32Ty(Type::getInt32Ty(M.getContext())),
         Int64Ty(Type::getInt64Ty(M.getContext())),
@@ -533,9 +537,10 @@ struct DevirtModule {
 
   // Lower the module using the action and summary passed as command line
   // arguments. For testing purposes only.
-  static bool runForTesting(
-      Module &M, function_ref<AAResults &(Function &)> AARGetter,
-      function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
+  static bool
+  runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
+                function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+                function_ref<DominatorTree &(Function &)> LookupDomTree);
 };
 
 struct WholeProgramDevirt : public ModulePass {
@@ -572,17 +577,23 @@ struct WholeProgramDevirt : public ModulePass {
       return *ORE;
     };
 
+    auto LookupDomTree = [this](Function &F) -> DominatorTree & {
+      return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+    };
+
     if (UseCommandLine)
-      return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter);
+      return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter,
+                                         LookupDomTree);
 
-    return DevirtModule(M, LegacyAARGetter(*this), OREGetter, ExportSummary,
-                        ImportSummary)
+    return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree,
+                        ExportSummary, ImportSummary)
         .run();
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
   }
 };
 
@@ -592,6 +603,7 @@ INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
                       "Whole program devirtualization", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
                     "Whole program devirtualization", false, false)
 char WholeProgramDevirt::ID = 0;
@@ -611,7 +623,11 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
   auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
     return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
   };
-  if (!DevirtModule(M, AARGetter, OREGetter, ExportSummary, ImportSummary)
+  auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+    return FAM.getResult<DominatorTreeAnalysis>(F);
+  };
+  if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
+                    ImportSummary)
            .run())
     return PreservedAnalyses::all();
   return PreservedAnalyses::none();
@@ -619,7 +635,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
 
 bool DevirtModule::runForTesting(
     Module &M, function_ref<AAResults &(Function &)> AARGetter,
-    function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
+    function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+    function_ref<DominatorTree &(Function &)> LookupDomTree) {
   ModuleSummaryIndex Summary(/*HaveGVs=*/false);
 
   // Handle the command-line summary arguments. This code is for testing
@@ -637,7 +654,7 @@ bool DevirtModule::runForTesting(
 
   bool Changed =
       DevirtModule(
-          M, AARGetter, OREGetter,
+          M, AARGetter, OREGetter, LookupDomTree,
           ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
           ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
           .run();
@@ -665,7 +682,7 @@ void DevirtModule::buildTypeIdentifierMap(
   for (GlobalVariable &GV : M.globals()) {
     Types.clear();
     GV.getMetadata(LLVMContext::MD_type, Types);
-    if (Types.empty())
+    if (GV.isDeclaration() || Types.empty())
       continue;
 
     VTableBits *&BitsPtr = GVToBits[&GV];
@@ -755,7 +772,8 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
   auto Apply = [&](CallSiteInfo &CSInfo) {
     for (auto &&VCallSite : CSInfo.CallSites) {
       if (RemarksEnabled)
-        VCallSite.emitRemark("single-impl", TheFn->getName(), OREGetter);
+        VCallSite.emitRemark("single-impl",
+                             TheFn->stripPointerCasts()->getName(), OREGetter);
       VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
           TheFn, VCallSite.CS.getCalledValue()->getType()));
       // This use is no longer unsafe.
@@ -846,10 +864,13 @@ void DevirtModule::tryICallBranchFunnel(
   Function *JT;
   if (isa<MDString>(Slot.TypeID)) {
     JT = Function::Create(FT, Function::ExternalLinkage,
+                          M.getDataLayout().getProgramAddressSpace(),
                           getGlobalName(Slot, {}, "branch_funnel"), &M);
     JT->setVisibility(GlobalValue::HiddenVisibility);
   } else {
-    JT = Function::Create(FT, Function::InternalLinkage, "branch_funnel", &M);
+    JT = Function::Create(FT, Function::InternalLinkage,
+                          M.getDataLayout().getProgramAddressSpace(),
+                          "branch_funnel", &M);
   }
   JT->addAttribute(1, Attribute::Nest);
 
@@ -891,7 +912,8 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
         continue;
 
       if (RemarksEnabled)
-        VCallSite.emitRemark("branch-funnel", JT->getName(), OREGetter);
+        VCallSite.emitRemark("branch-funnel",
+                             JT->stripPointerCasts()->getName(), OREGetter);
 
       // Pass the address of the vtable in the nest register, which is r10 on
       // x86_64.
@@ -1323,15 +1345,14 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
 
 bool DevirtModule::areRemarksEnabled() {
   const auto &FL = M.getFunctionList();
-  if (FL.empty())
-    return false;
-  const Function &Fn = FL.front();
-
-  const auto &BBL = Fn.getBasicBlockList();
-  if (BBL.empty())
-    return false;
-  auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
-  return DI.isEnabled();
+  for (const Function &Fn : FL) {
+    const auto &BBL = Fn.getBasicBlockList();
+    if (BBL.empty())
+      continue;
+    auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
+    return DI.isEnabled();
+  }
+  return false;
 }
 
 void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
@@ -1341,7 +1362,7 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
   // points to a member of the type identifier %md. Group calls by (type ID,
   // offset) pair (effectively the identity of the virtual function) and store
   // to CallSlots.
-  DenseSet<Value *> SeenPtrs;
+  DenseSet<CallSite> SeenCallSites;
   for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
        I != E;) {
     auto CI = dyn_cast<CallInst>(I->getUser());
@@ -1352,19 +1373,22 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
     // Search for virtual calls based on %p and add them to DevirtCalls.
     SmallVector<DevirtCallSite, 1> DevirtCalls;
     SmallVector<CallInst *, 1> Assumes;
-    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+    auto &DT = LookupDomTree(*CI->getFunction());
+    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
 
-    // If we found any, add them to CallSlots. Only do this if we haven't seen
-    // the vtable pointer before, as it may have been CSE'd with pointers from
-    // other call sites, and we don't want to process call sites multiple times.
+    // If we found any, add them to CallSlots.
     if (!Assumes.empty()) {
       Metadata *TypeId =
           cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
       Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
-      if (SeenPtrs.insert(Ptr).second) {
-        for (DevirtCallSite Call : DevirtCalls) {
+      for (DevirtCallSite Call : DevirtCalls) {
+        // Only add this CallSite if we haven't seen it before. The vtable
+        // pointer may have been CSE'd with pointers from other call sites,
+        // and we don't want to process call sites multiple times. We can't
+        // just skip the vtable Ptr if it has been seen before, however, since
+        // it may be shared by type tests that dominate different calls.
+        if (SeenCallSites.insert(Call.CS).second)
           CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, nullptr);
-        }
       }
     }
 
@@ -1398,8 +1422,9 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
     SmallVector<Instruction *, 1> LoadedPtrs;
     SmallVector<Instruction *, 1> Preds;
     bool HasNonCallUses = false;
+    auto &DT = LookupDomTree(*CI->getFunction());
     findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
-                                               HasNonCallUses, CI);
+                                               HasNonCallUses, CI, DT);
 
     // Start by generating "pessimistic" code that explicitly loads the function
     // pointer from the vtable and performs the type check. If possible, we will
@@ -1538,6 +1563,17 @@ bool DevirtModule::run() {
       M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
   Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
 
+  // If only some of the modules were split, we cannot correctly handle
+  // code that contains type tests or type checked loads.
+  if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+      (ImportSummary && ImportSummary->partiallySplitLTOUnits())) {
+    if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
+        (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
+      report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test "
+                         "or llvm.type.checked.load");
+    return false;
+  }
+
   // Normally if there are no users of the devirtualization intrinsics in the
   // module, this pass has nothing to do. But if we are exporting, we also need
   // to handle any users that appear only in the function summaries.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 83054588a9aa..6e196bfdbd25 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -186,8 +186,6 @@ namespace {
 
     Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
 
-    Value *performFactorization(Instruction *I);
-
     /// Convert given addend to a Value
     Value *createAddendVal(const FAddend &A, bool& NeedNeg);
 
@@ -197,7 +195,6 @@ namespace {
     Value *createFSub(Value *Opnd0, Value *Opnd1);
     Value *createFAdd(Value *Opnd0, Value *Opnd1);
     Value *createFMul(Value *Opnd0, Value *Opnd1);
-    Value *createFDiv(Value *Opnd0, Value *Opnd1);
     Value *createFNeg(Value *V);
     Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
     void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
@@ -427,89 +424,6 @@ unsigned FAddend::drillAddendDownOneStep
   return BreakNum;
 }
 
-// Try to perform following optimization on the input instruction I. Return the
-// simplified expression if was successful; otherwise, return 0.
-//
-//   Instruction "I" is                Simplified into
-// -------------------------------------------------------
-//   (x * y) +/- (x * z)               x * (y +/- z)
-//   (y / x) +/- (z / x)               (y +/- z) / x
-Value *FAddCombine::performFactorization(Instruction *I) {
-  assert((I->getOpcode() == Instruction::FAdd ||
-          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-
-  Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
-  Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-
-  if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
-    return nullptr;
-
-  bool isMpy = false;
-  if (I0->getOpcode() == Instruction::FMul)
-    isMpy = true;
-  else if (I0->getOpcode() != Instruction::FDiv)
-    return nullptr;
-
-  Value *Opnd0_0 = I0->getOperand(0);
-  Value *Opnd0_1 = I0->getOperand(1);
-  Value *Opnd1_0 = I1->getOperand(0);
-  Value *Opnd1_1 = I1->getOperand(1);
-
-  //  Input Instr I       Factor   AddSub0  AddSub1
-  //  ----------------------------------------------
-  // (x*y) +/- (x*z)        x        y         z
-  // (y/x) +/- (z/x)        x        y         z
-  Value *Factor = nullptr;
-  Value *AddSub0 = nullptr, *AddSub1 = nullptr;
-
-  if (isMpy) {
-    if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
-      Factor = Opnd0_0;
-    else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
-      Factor = Opnd0_1;
-
-    if (Factor) {
-      AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
-      AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
-    }
-  } else if (Opnd0_1 == Opnd1_1) {
-    Factor = Opnd0_1;
-    AddSub0 = Opnd0_0;
-    AddSub1 = Opnd1_0;
-  }
-
-  if (!Factor)
-    return nullptr;
-
-  FastMathFlags Flags;
-  Flags.setFast();
-  if (I0) Flags &= I->getFastMathFlags();
-  if (I1) Flags &= I->getFastMathFlags();
-
-  // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
-  Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
-                      createFAdd(AddSub0, AddSub1) :
-                      createFSub(AddSub0, AddSub1);
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
-    const APFloat &F = CFP->getValueAPF();
-    if (!F.isNormal())
-      return nullptr;
-  } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub))
-    II->setFastMathFlags(Flags);
-
-  if (isMpy) {
-    Value *RI = createFMul(Factor, NewAddSub);
-    if (Instruction *II = dyn_cast<Instruction>(RI))
-      II->setFastMathFlags(Flags);
-    return RI;
-  }
-
-  Value *RI = createFDiv(NewAddSub, Factor);
-  if (Instruction *II = dyn_cast<Instruction>(RI))
-    II->setFastMathFlags(Flags);
-  return RI;
-}
-
 Value *FAddCombine::simplify(Instruction *I) {
   assert(I->hasAllowReassoc() && I->hasNoSignedZeros() &&
          "Expected 'reassoc'+'nsz' instruction");
@@ -594,8 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
       return R;
   }
 
-  // step 6: Try factorization as the last resort,
-  return performFactorization(I);
+  return nullptr;
 }
 
 Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -772,13 +685,6 @@ Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
   return V;
 }
 
-Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
-  Value *V = Builder.CreateFDiv(Opnd0, Opnd1);
-  if (Instruction *I = dyn_cast<Instruction>(V))
-    createInstPostProc(I);
-  return V;
-}
-
 void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) {
   NewInstr->setDebugLoc(Instr->getDebugLoc());
 
@@ -1135,7 +1041,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // (A*B)+(A*C) -> A*(B+C) etc
@@ -1285,77 +1191,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     }
   }
 
-  // Check for (add (sext x), y), see if we can merge this into an
-  // integer add followed by a sext.
-  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
-    // (add (sext x), cst) --> (sext (add x, cst'))
-    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
-      if (LHSConv->hasOneUse()) {
-        Constant *CI =
-            ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
-        if (ConstantExpr::getSExt(CI, Ty) == RHSC &&
-            willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
-          // Insert the new, smaller add.
-          Value *NewAdd =
-              Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
-          return new SExtInst(NewAdd, Ty);
-        }
-      }
-    }
-
-    // (add (sext x), (sext y)) --> (sext (add int x, y))
-    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
-      // Only do this if x/y have the same type, if at least one of them has a
-      // single use (so we don't increase the number of sexts), and if the
-      // integer add will not overflow.
-      if (LHSConv->getOperand(0)->getType() ==
-              RHSConv->getOperand(0)->getType() &&
-          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
-          willNotOverflowSignedAdd(LHSConv->getOperand(0),
-                                   RHSConv->getOperand(0), I)) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0),
-                                             RHSConv->getOperand(0), "addconv");
-        return new SExtInst(NewAdd, Ty);
-      }
-    }
-  }
-
-  // Check for (add (zext x), y), see if we can merge this into an
-  // integer add followed by a zext.
-  if (auto *LHSConv = dyn_cast<ZExtInst>(LHS)) {
-    // (add (zext x), cst) --> (zext (add x, cst'))
-    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
-      if (LHSConv->hasOneUse()) {
-        Constant *CI =
-            ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
-        if (ConstantExpr::getZExt(CI, Ty) == RHSC &&
-            willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) {
-          // Insert the new, smaller add.
-          Value *NewAdd =
-              Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
-          return new ZExtInst(NewAdd, Ty);
-        }
-      }
-    }
-
-    // (add (zext x), (zext y)) --> (zext (add int x, y))
-    if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) {
-      // Only do this if x/y have the same type, if at least one of them has a
-      // single use (so we don't increase the number of zexts), and if the
-      // integer add will not overflow.
-      if (LHSConv->getOperand(0)->getType() ==
-              RHSConv->getOperand(0)->getType() &&
-          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
-          willNotOverflowUnsignedAdd(LHSConv->getOperand(0),
-                                     RHSConv->getOperand(0), I)) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder.CreateNUWAdd(
-            LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
-        return new ZExtInst(NewAdd, Ty);
-      }
-    }
-  }
+  if (Instruction *Ext = narrowMathIfNoOverflow(I))
+    return Ext;
 
   // (add (xor A, B) (and A, B)) --> (or A, B)
   // (add (and A, B) (xor A, B)) --> (or A, B)
@@ -1391,6 +1228,45 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
+/// Factor a common operand out of fadd/fsub of fmul/fdiv.
+static Instruction *factorizeFAddFSub(BinaryOperator &I,
+                                      InstCombiner::BuilderTy &Builder) {
+  assert((I.getOpcode() == Instruction::FAdd ||
+          I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub");
+  assert(I.hasAllowReassoc() && I.hasNoSignedZeros() &&
+         "FP factorization requires FMF");
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  Value *X, *Y, *Z;
+  bool IsFMul;
+  if ((match(Op0, m_OneUse(m_FMul(m_Value(X), m_Value(Z)))) &&
+       match(Op1, m_OneUse(m_c_FMul(m_Value(Y), m_Specific(Z))))) ||
+      (match(Op0, m_OneUse(m_FMul(m_Value(Z), m_Value(X)))) &&
+       match(Op1, m_OneUse(m_c_FMul(m_Value(Y), m_Specific(Z))))))
+    IsFMul = true;
+  else if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Z)))) &&
+           match(Op1, m_OneUse(m_FDiv(m_Value(Y), m_Specific(Z)))))
+    IsFMul = false;
+  else
+    return nullptr;
+
+  // (X * Z) + (Y * Z) --> (X + Y) * Z
+  // (X * Z) - (Y * Z) --> (X - Y) * Z
+  // (X / Z) + (Y / Z) --> (X + Y) / Z
+  // (X / Z) - (Y / Z) --> (X - Y) / Z
+  bool IsFAdd = I.getOpcode() == Instruction::FAdd;
+  Value *XY = IsFAdd ? Builder.CreateFAddFMF(X, Y, &I)
+                     : Builder.CreateFSubFMF(X, Y, &I);
+
+  // Bail out if we just created a denormal constant.
+  // TODO: This is copied from a previous implementation. Is it necessary?
+  const APFloat *C;
+  if (match(XY, m_APFloat(C)) && !C->isNormal())
+    return nullptr;
+
+  return IsFMul ? BinaryOperator::CreateFMulFMF(XY, Z, &I)
+                : BinaryOperator::CreateFDivFMF(XY, Z, &I);
+}
+
 Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1),
                                   I.getFastMathFlags(),
@@ -1400,7 +1276,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
@@ -1478,6 +1354,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {
+    if (Instruction *F = factorizeFAddFSub(I, Builder))
+      return F;
     if (Value *V = FAddCombine(Builder).simplify(&I))
       return replaceInstUsesWith(I, V);
   }
@@ -1577,7 +1455,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // (A*B)-(A*C) -> A*(B-C) etc
@@ -1771,19 +1649,51 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // X - A*-B -> X + A*B
     // X - -A*B -> X + A*B
     Value *A, *B;
-    Constant *CI;
     if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B)))))
       return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B));
 
-    // X - A*CI -> X + A*-CI
+    // X - A*C -> X + A*-C
     // No need to handle commuted multiply because multiply handling will
     // ensure constant will be move to the right hand side.
-    if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) {
-      Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI));
+    if (match(Op1, m_Mul(m_Value(A), m_Constant(C))) && !isa<ConstantExpr>(C)) {
+      Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(C));
       return BinaryOperator::CreateAdd(Op0, NewMul);
     }
   }
 
+  {
+    // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
+    // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
+    // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
+    // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
+    // So long as O here is freely invertible, this will be neutral or a win.
+    Value *LHS, *RHS, *A;
+    Value *NotA = Op0, *MinMax = Op1;
+    SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
+    if (!SelectPatternResult::isMinOrMax(SPF)) {
+      NotA = Op1;
+      MinMax = Op0;
+      SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
+    }
+    if (SelectPatternResult::isMinOrMax(SPF) &&
+        match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) {
+      if (NotA == LHS)
+        std::swap(LHS, RHS);
+      // LHS is now O above and expected to have at least 2 uses (the min/max)
+      // NotA is epected to have 2 uses from the min/max and 1 from the sub.
+      if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+          !NotA->hasNUsesOrMore(4)) {
+        // Note: We don't generate the inverse max/min, just create the not of
+        // it and let other folds do the rest.
+        Value *Not = Builder.CreateNot(MinMax);
+        if (NotA == Op0)
+          return BinaryOperator::CreateSub(Not, A);
+        else
+          return BinaryOperator::CreateSub(A, Not);
+      }
+    }
+  }
+
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   Value *LHSOp, *RHSOp;
@@ -1819,6 +1729,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return SelectInst::Create(Cmp, Neg, A);
   }
 
+  if (Instruction *Ext = narrowMathIfNoOverflow(I))
+    return Ext;
+
   bool Changed = false;
   if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
     Changed = true;
@@ -1838,7 +1751,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // Subtraction from -0.0 is the canonical form of fneg.
@@ -1847,13 +1760,27 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP()))
     return BinaryOperator::CreateFNegFMF(Op1, &I);
 
+  Value *X, *Y;
+  Constant *C;
+
+  // Fold negation into constant operand. This is limited with one-use because
+  // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
+  // -(X * C) --> X * (-C)
+  if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
+    return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
+  // -(X / C) --> X / (-C)
+  if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
+    return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
+  // -(C / X) --> (-C) / X
+  if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
+    return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
+
   // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
   // Canonicalize to fadd to make analysis easier.
   // This can also help codegen because fadd is commutative.
   // Note that if this fsub was really an fneg, the fadd with -0.0 will get
   // killed later. We still limit that particular transform with 'hasOneUse'
   // because an fneg is assumed better/cheaper than a generic fsub.
-  Value *X, *Y;
   if (I.hasNoSignedZeros() || CannotBeNegativeZero(Op0, SQ.TLI)) {
     if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
       Value *NewSub = Builder.CreateFSubFMF(Y, X, &I);
@@ -1869,7 +1796,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   // X - C --> X + (-C)
   // But don't transform constant expressions because there's an inverse fold
   // for X + (-Y) --> X - Y.
-  Constant *C;
   if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))
     return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I);
 
@@ -1879,21 +1805,46 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
 
   // Similar to above, but look through a cast of the negated value:
   // X - (fptrunc(-Y)) --> X + fptrunc(Y)
-  if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y)))))) {
-    Value *TruncY = Builder.CreateFPTrunc(Y, I.getType());
-    return BinaryOperator::CreateFAddFMF(Op0, TruncY, &I);
-  }
+  Type *Ty = I.getType();
+  if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y))))))
+    return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPTrunc(Y, Ty), &I);
+
   // X - (fpext(-Y)) --> X + fpext(Y)
-  if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y)))))) {
-    Value *ExtY = Builder.CreateFPExt(Y, I.getType());
-    return BinaryOperator::CreateFAddFMF(Op0, ExtY, &I);
-  }
+  if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y))))))
+    return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I);
 
-  // Handle specials cases for FSub with selects feeding the operation
+  // Handle special cases for FSub with selects feeding the operation
   if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
     return replaceInstUsesWith(I, V);
 
   if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {
+    // (Y - X) - Y --> -X
+    if (match(Op0, m_FSub(m_Specific(Op1), m_Value(X))))
+      return BinaryOperator::CreateFNegFMF(X, &I);
+
+    // Y - (X + Y) --> -X
+    // Y - (Y + X) --> -X
+    if (match(Op1, m_c_FAdd(m_Specific(Op0), m_Value(X))))
+      return BinaryOperator::CreateFNegFMF(X, &I);
+
+    // (X * C) - X --> X * (C - 1.0)
+    if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) {
+      Constant *CSubOne = ConstantExpr::getFSub(C, ConstantFP::get(Ty, 1.0));
+      return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
+    }
+    // X - (X * C) --> X * (1.0 - C)
+    if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) {
+      Constant *OneSubC = ConstantExpr::getFSub(ConstantFP::get(Ty, 1.0), C);
+      return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
+    }
+
+    if (Instruction *F = factorizeFAddFSub(I, Builder))
+      return F;
+
+    // TODO: This performs reassociative folds for FP ops. Some fraction of the
+    // functionality has been subsumed by simple pattern matching here and in
+    // InstSimplify. We should let a dedicated reassociation pass handle more
+    // complex pattern matching and remove this from InstCombine.
     if (Value *V = FAddCombine(Builder).simplify(&I))
       return replaceInstUsesWith(I, V);
   }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3d758e2fe7c9..404c2ad7e6e7 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -53,11 +53,11 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) {
 /// operands into either a constant true or false, or a brand new ICmp
 /// instruction. The sign is passed in to determine which kind of predicate to
 /// use in the new icmp instruction.
-static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS,
                               InstCombiner::BuilderTy &Builder) {
   ICmpInst::Predicate NewPred;
-  if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
-    return NewConstant;
+  if (Constant *TorF = getPredForICmpCode(Code, Sign, LHS->getType(), NewPred))
+    return TorF;
   return Builder.CreateICmp(NewPred, LHS, RHS);
 }
 
@@ -898,6 +898,130 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
   return nullptr;
 }
 
+/// General pattern:
+///   X & Y
+///
+/// Where Y is checking that all the high bits (covered by a mask 4294967168)
+/// are uniform, i.e.  %arg & 4294967168  can be either  4294967168  or  0
+/// Pattern can be one of:
+///   %t = add        i32 %arg,    128
+///   %r = icmp   ult i32 %t,      256
+/// Or
+///   %t0 = shl       i32 %arg,    24
+///   %t1 = ashr      i32 %t0,     24
+///   %r  = icmp  eq  i32 %t1,     %arg
+/// Or
+///   %t0 = trunc     i32 %arg  to i8
+///   %t1 = sext      i8  %t0   to i32
+///   %r  = icmp  eq  i32 %t1,     %arg
+/// This pattern is a signed truncation check.
+///
+/// And X is checking that some bit in that same mask is zero.
+/// I.e. can be one of:
+///   %r = icmp sgt i32   %arg,    -1
+/// Or
+///   %t = and      i32   %arg,    2147483648
+///   %r = icmp eq  i32   %t,      0
+///
+/// Since we are checking that all the bits in that mask are the same,
+/// and a particular bit is zero, what we are really checking is that all the
+/// masked bits are zero.
+/// So this should be transformed to:
+///   %r = icmp ult i32 %arg, 128
+static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
+                                        Instruction &CxtI,
+                                        InstCombiner::BuilderTy &Builder) {
+  assert(CxtI.getOpcode() == Instruction::And);
+
+  // Match  icmp ult (add %arg, C01), C1   (C1 == C01 << 1; powers of two)
+  auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X,
+                                            APInt &SignBitMask) -> bool {
+    CmpInst::Predicate Pred;
+    const APInt *I01, *I1; // powers of two; I1 == I01 << 1
+    if (!(match(ICmp,
+                m_ICmp(Pred, m_Add(m_Value(X), m_Power2(I01)), m_Power2(I1))) &&
+          Pred == ICmpInst::ICMP_ULT && I1->ugt(*I01) && I01->shl(1) == *I1))
+      return false;
+    // Which bit is the new sign bit as per the 'signed truncation' pattern?
+    SignBitMask = *I01;
+    return true;
+  };
+
+  // One icmp needs to be 'signed truncation check'.
+  // We need to match this first, else we will mismatch commutative cases.
+  Value *X1;
+  APInt HighestBit;
+  ICmpInst *OtherICmp;
+  if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit))
+    OtherICmp = ICmp0;
+  else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit))
+    OtherICmp = ICmp1;
+  else
+    return nullptr;
+
+  assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)");
+
+  // Try to match/decompose into:  icmp eq (X & Mask), 0
+  auto tryToDecompose = [](ICmpInst *ICmp, Value *&X,
+                           APInt &UnsetBitsMask) -> bool {
+    CmpInst::Predicate Pred = ICmp->getPredicate();
+    // Can it be decomposed into  icmp eq (X & Mask), 0  ?
+    if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
+                                   Pred, X, UnsetBitsMask,
+                                   /*LookThruTrunc=*/false) &&
+        Pred == ICmpInst::ICMP_EQ)
+      return true;
+    // Is it  icmp eq (X & Mask), 0  already?
+    const APInt *Mask;
+    if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) &&
+        Pred == ICmpInst::ICMP_EQ) {
+      UnsetBitsMask = *Mask;
+      return true;
+    }
+    return false;
+  };
+
+  // And the other icmp needs to be decomposable into a bit test.
+  Value *X0;
+  APInt UnsetBitsMask;
+  if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))
+    return nullptr;
+
+  assert(!UnsetBitsMask.isNullValue() && "empty mask makes no sense.");
+
+  // Are they working on the same value?
+  Value *X;
+  if (X1 == X0) {
+    // Ok as is.
+    X = X1;
+  } else if (match(X0, m_Trunc(m_Specific(X1)))) {
+    UnsetBitsMask = UnsetBitsMask.zext(X1->getType()->getScalarSizeInBits());
+    X = X1;
+  } else
+    return nullptr;
+
+  // So which bits should be uniform as per the 'signed truncation check'?
+  // (all the bits starting with (i.e. including) HighestBit)
+  APInt SignBitsMask = ~(HighestBit - 1U);
+
+  // UnsetBitsMask must have some common bits with SignBitsMask,
+  if (!UnsetBitsMask.intersects(SignBitsMask))
+    return nullptr;
+
+  // Does UnsetBitsMask contain any bits outside of SignBitsMask?
+  if (!UnsetBitsMask.isSubsetOf(SignBitsMask)) {
+    APInt OtherHighestBit = (~UnsetBitsMask) + 1U;
+    if (!OtherHighestBit.isPowerOf2())
+      return nullptr;
+    HighestBit = APIntOps::umin(HighestBit, OtherHighestBit);
+  }
+  // Else, if it does not, then all is ok as-is.
+
+  // %r = icmp ult %X, SignBit
+  return Builder.CreateICmpULT(X, ConstantInt::get(X->getType(), HighestBit),
+                               CxtI.getName() + ".simplified");
+}
+
 /// Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
                                     Instruction &CxtI) {
@@ -909,7 +1033,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
 
   // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
-  if (PredicatesFoldable(PredL, PredR)) {
+  if (predicatesFoldable(PredL, PredR)) {
     if (LHS->getOperand(0) == RHS->getOperand(1) &&
         LHS->getOperand(1) == RHS->getOperand(0))
       LHS->swapOperands();
@@ -917,8 +1041,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
         LHS->getOperand(1) == RHS->getOperand(1)) {
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
-      bool isSigned = LHS->isSigned() || RHS->isSigned();
-      return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+      bool IsSigned = LHS->isSigned() || RHS->isSigned();
+      return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
     }
   }
 
@@ -937,6 +1061,9 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
     return V;
 
+  if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
+    return V;
+
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
   ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1004,7 +1131,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     return nullptr;
 
   // We can't fold (ugt x, C) & (sgt x, C2).
-  if (!PredicatesFoldable(PredL, PredR))
+  if (!predicatesFoldable(PredL, PredR))
     return nullptr;
 
   // Ensure that the larger constant is on the RHS.
@@ -1408,7 +1535,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -1635,10 +1762,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   return nullptr;
 }
 
-/// Given an OR instruction, check to see if this is a bswap idiom. If so,
-/// insert the new intrinsic and return it.
-Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) {
+  assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'");
+  Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1);
 
   // Look through zero extends.
   if (Instruction *Ext = dyn_cast<ZExtInst>(Op0))
@@ -1674,7 +1800,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
     return nullptr;
 
   SmallVector<Instruction*, 4> Insts;
-  if (!recognizeBSwapOrBitReverseIdiom(&I, true, false, Insts))
+  if (!recognizeBSwapOrBitReverseIdiom(&Or, true, false, Insts))
     return nullptr;
   Instruction *LastInst = Insts.pop_back_val();
   LastInst->removeFromParent();
@@ -1684,6 +1810,57 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   return LastInst;
 }
 
+/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
+static Instruction *matchRotate(Instruction &Or) {
+  // TODO: Can we reduce the code duplication between this and the related
+  // rotate matching code under visitSelect and visitTrunc?
+  unsigned Width = Or.getType()->getScalarSizeInBits();
+  if (!isPowerOf2_32(Width))
+    return nullptr;
+
+  // First, find an or'd pair of opposite shifts with the same shifted operand:
+  // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
+  Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+  Value *ShVal, *ShAmt0, *ShAmt1;
+  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
+      !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
+    return nullptr;
+
+  auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+  auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+  if (ShiftOpcode0 == ShiftOpcode1)
+    return nullptr;
+
+  // Match the shift amount operands for a rotate pattern. This always matches
+  // a subtraction on the R operand.
+  auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+    // The shift amount may be masked with negation:
+    // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+    Value *X;
+    unsigned Mask = Width - 1;
+    if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+        match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+      return X;
+
+    return nullptr;
+  };
+
+  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
+  bool SubIsOnLHS = false;
+  if (!ShAmt) {
+    ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
+    SubIsOnLHS = true;
+  }
+  if (!ShAmt)
+    return nullptr;
+
+  bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
+  return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
+}
+
 /// If all elements of two constant vectors are 0/-1 and inverses, return true.
 static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
   unsigned NumElts = C1->getType()->getVectorNumElements();
@@ -1704,14 +1881,33 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
 /// We have an expression of the form (A & C) | (B & D). If A is a scalar or
 /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
 /// B, it can be used as the condition operand of a select instruction.
-static Value *getSelectCondition(Value *A, Value *B,
-                                 InstCombiner::BuilderTy &Builder) {
-  // If these are scalars or vectors of i1, A can be used directly.
+Value *InstCombiner::getSelectCondition(Value *A, Value *B) {
+  // Step 1: We may have peeked through bitcasts in the caller.
+  // Exit immediately if we don't have (vector) integer types.
   Type *Ty = A->getType();
-  if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1))
-    return A;
+  if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy())
+    return nullptr;
+
+  // Step 2: We need 0 or all-1's bitmasks.
+  if (ComputeNumSignBits(A) != Ty->getScalarSizeInBits())
+    return nullptr;
+
+  // Step 3: If B is the 'not' value of A, we have our answer.
+  if (match(A, m_Not(m_Specific(B)))) {
+    // If these are scalars or vectors of i1, A can be used directly.
+    if (Ty->isIntOrIntVectorTy(1))
+      return A;
+    return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(Ty));
+  }
 
-  // If A and B are sign-extended, look through the sexts to find the booleans.
+  // If both operands are constants, see if the constants are inverse bitmasks.
+  Constant *AConst, *BConst;
+  if (match(A, m_Constant(AConst)) && match(B, m_Constant(BConst)))
+    if (AConst == ConstantExpr::getNot(BConst))
+      return Builder.CreateZExtOrTrunc(A, CmpInst::makeCmpResultType(Ty));
+
+  // Look for more complex patterns. The 'not' op may be hidden behind various
+  // casts. Look through sexts and bitcasts to find the booleans.
   Value *Cond;
   Value *NotB;
   if (match(A, m_SExt(m_Value(Cond))) &&
@@ -1727,36 +1923,29 @@ static Value *getSelectCondition(Value *A, Value *B,
   if (!Ty->isVectorTy())
     return nullptr;
 
-  // If both operands are constants, see if the constants are inverse bitmasks.
-  Constant *AC, *BC;
-  if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) &&
-      areInverseVectorBitmasks(AC, BC)) {
-    return Builder.CreateZExtOrTrunc(AC, CmpInst::makeCmpResultType(Ty));
-  }
-
   // If both operands are xor'd with constants using the same sexted boolean
   // operand, see if the constants are inverse bitmasks.
-  if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
-      match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
+  // TODO: Use ConstantExpr::getNot()?
+  if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AConst)))) &&
+      match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BConst)))) &&
       Cond->getType()->isIntOrIntVectorTy(1) &&
-      areInverseVectorBitmasks(AC, BC)) {
-    AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
-    return Builder.CreateXor(Cond, AC);
+      areInverseVectorBitmasks(AConst, BConst)) {
+    AConst = ConstantExpr::getTrunc(AConst, CmpInst::makeCmpResultType(Ty));
+    return Builder.CreateXor(Cond, AConst);
   }
   return nullptr;
 }
 
 /// We have an expression of the form (A & C) | (B & D). Try to simplify this
 /// to "A' ? C : D", where A' is a boolean or vector of booleans.
-static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
-                                   InstCombiner::BuilderTy &Builder) {
+Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B,
+                                          Value *D) {
   // The potential condition of the select may be bitcasted. In that case, look
   // through its bitcast and the corresponding bitcast of the 'not' condition.
   Type *OrigType = A->getType();
   A = peekThroughBitcast(A, true);
   B = peekThroughBitcast(B, true);
-
-  if (Value *Cond = getSelectCondition(A, B, Builder)) {
+  if (Value *Cond = getSelectCondition(A, B)) {
     // ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
     // The bitcasts will either all exist or all not exist. The builder will
     // not create unnecessary casts if the types already match.
@@ -1838,7 +2027,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   }
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
-  if (PredicatesFoldable(PredL, PredR)) {
+  if (predicatesFoldable(PredL, PredR)) {
     if (LHS->getOperand(0) == RHS->getOperand(1) &&
         LHS->getOperand(1) == RHS->getOperand(0))
       LHS->swapOperands();
@@ -1846,8 +2035,8 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
         LHS->getOperand(1) == RHS->getOperand(1)) {
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
-      bool isSigned = LHS->isSigned() || RHS->isSigned();
-      return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+      bool IsSigned = LHS->isSigned() || RHS->isSigned();
+      return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
     }
   }
 
@@ -1928,7 +2117,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     return nullptr;
 
   // We can't fold (ugt x, C) | (sgt x, C2).
-  if (!PredicatesFoldable(PredL, PredR))
+  if (!predicatesFoldable(PredL, PredR))
     return nullptr;
 
   // Ensure that the larger constant is on the RHS.
@@ -2007,7 +2196,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -2029,37 +2218,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
     return FoldedLogic;
 
-  // Given an OR instruction, check to see if this is a bswap.
-  if (Instruction *BSwap = MatchBSwap(I))
+  if (Instruction *BSwap = matchBSwap(I))
     return BSwap;
 
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  {
-    Value *A;
-    const APInt *C;
-    // (X^C)|Y -> (X|Y)^C iff Y&C == 0
-    if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
-        MaskedValueIsZero(Op1, *C, 0, &I)) {
-      Value *NOr = Builder.CreateOr(A, Op1);
-      NOr->takeName(Op0);
-      return BinaryOperator::CreateXor(NOr,
-                                       ConstantInt::get(NOr->getType(), *C));
-    }
+  if (Instruction *Rotate = matchRotate(I))
+    return Rotate;
 
-    // Y|(X^C) -> (X|Y)^C iff Y&C == 0
-    if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
-        MaskedValueIsZero(Op0, *C, 0, &I)) {
-      Value *NOr = Builder.CreateOr(A, Op0);
-      NOr->takeName(Op0);
-      return BinaryOperator::CreateXor(NOr,
-                                       ConstantInt::get(NOr->getType(), *C));
-    }
+  Value *X, *Y;
+  const APInt *CV;
+  if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
+      !CV->isAllOnesValue() && MaskedValueIsZero(Y, *CV, 0, &I)) {
+    // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
+    // The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
+    Value *Or = Builder.CreateOr(X, Y);
+    return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
   }
 
-  Value *A, *B;
-
   // (A & C)|(B & D)
-  Value *C = nullptr, *D = nullptr;
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  Value *A, *B, *C, *D;
   if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
       match(Op1, m_And(m_Value(B), m_Value(D)))) {
     ConstantInt *C1 = dyn_cast<ConstantInt>(C);
@@ -2122,21 +2299,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     // 'or' that it is replacing.
     if (Op0->hasOneUse() || Op1->hasOneUse()) {
       // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
-      if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder))
+      if (Value *V = matchSelectFromAndOr(A, C, B, D))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder))
+      if (Value *V = matchSelectFromAndOr(A, C, D, B))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder))
+      if (Value *V = matchSelectFromAndOr(C, A, B, D))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder))
+      if (Value *V = matchSelectFromAndOr(C, A, D, B))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder))
+      if (Value *V = matchSelectFromAndOr(B, D, A, C))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder))
+      if (Value *V = matchSelectFromAndOr(B, D, C, A))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder))
+      if (Value *V = matchSelectFromAndOr(D, B, A, C))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
+      if (Value *V = matchSelectFromAndOr(D, B, C, A))
         return replaceInstUsesWith(I, V);
     }
   }
@@ -2251,12 +2428,12 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   // be simplified by a later pass either, so we try swapping the inner/outer
   // ORs in the hopes that we'll be able to simplify it this way.
   // (X|C) | V --> (X|V) | C
-  ConstantInt *C1;
+  ConstantInt *CI;
   if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
-      match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+      match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) {
     Value *Inner = Builder.CreateOr(A, Op1);
     Inner->takeName(Op0);
-    return BinaryOperator::CreateOr(Inner, C1);
+    return BinaryOperator::CreateOr(Inner, CI);
   }
 
   // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
@@ -2339,7 +2516,7 @@ static Instruction *foldXorToXor(BinaryOperator &I,
 }
 
 Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
-  if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+  if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
     if (LHS->getOperand(0) == RHS->getOperand(1) &&
         LHS->getOperand(1) == RHS->getOperand(0))
       LHS->swapOperands();
@@ -2348,8 +2525,8 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
-      bool isSigned = LHS->isSigned() || RHS->isSigned();
-      return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+      bool IsSigned = LHS->isSigned() || RHS->isSigned();
+      return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
     }
   }
 
@@ -2360,7 +2537,8 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
   Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
   if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
-      LHS0->getType() == RHS0->getType()) {
+      LHS0->getType() == RHS0->getType() &&
+      LHS0->getType()->isIntOrIntVectorTy()) {
     // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
     // (X <  0) ^ (Y <  0) --> (X ^ Y) < 0
     if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
@@ -2452,6 +2630,32 @@ static Instruction *visitMaskedMerge(BinaryOperator &I,
   return nullptr;
 }
 
+// Transform
+//   ~(x ^ y)
+// into:
+//   (~x) ^ y
+// or into
+//   x ^ (~y)
+static Instruction *sinkNotIntoXor(BinaryOperator &I,
+                                   InstCombiner::BuilderTy &Builder) {
+  Value *X, *Y;
+  // FIXME: one-use check is not needed in general, but currently we are unable
+  // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182)
+  if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))
+    return nullptr;
+
+  // We only want to do the transform if it is free to do.
+  if (IsFreeToInvert(X, X->hasOneUse())) {
+    // Ok, good.
+  } else if (IsFreeToInvert(Y, Y->hasOneUse())) {
+    std::swap(X, Y);
+  } else
+    return nullptr;
+
+  Value *NotX = Builder.CreateNot(X, X->getName() + ".not");
+  return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -2463,7 +2667,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *NewXor = foldXorToXor(I, Builder))
@@ -2481,9 +2685,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
-  // A^B --> A|B iff A and B have no bits set in common.
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (haveNoCommonBitsSet(Op0, Op1, DL, &AC, &I, &DT))
+
+  // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
+  // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
+  // calls in there are unnecessary as SimplifyDemandedInstructionBits should
+  // have already taken care of those cases.
+  Value *M;
+  if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
+                        m_c_And(m_Deferred(M), m_Value()))))
     return BinaryOperator::CreateOr(Op0, Op1);
 
   // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
@@ -2528,8 +2738,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     }
 
     // ~(X - Y) --> ~X + Y
-    if (match(NotVal, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))))
-      return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
+    if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
+      if (isa<Constant>(X) || NotVal->hasOneUse())
+        return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
 
     // ~(~X >>s Y) --> (X >>s Y)
     if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
@@ -2539,19 +2750,36 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     // the 'not' by inverting the constant and using the opposite shift type.
     // Canonicalization rules ensure that only a negative constant uses 'ashr',
     // but we must check that in case that transform has not fired yet.
+
+    // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
     Constant *C;
     if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) &&
-        match(C, m_Negative())) {
-      // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
-      Constant *NotC = ConstantExpr::getNot(C);
-      return BinaryOperator::CreateLShr(NotC, Y);
-    }
+        match(C, m_Negative()))
+      return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y);
 
+    // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
     if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) &&
-        match(C, m_NonNegative())) {
-      // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
-      Constant *NotC = ConstantExpr::getNot(C);
-      return BinaryOperator::CreateAShr(NotC, Y);
+        match(C, m_NonNegative()))
+      return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
+
+    // ~(X + C) --> -(C + 1) - X
+    if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
+      return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
+  }
+
+  // Use DeMorgan and reassociation to eliminate a 'not' op.
+  Constant *C1;
+  if (match(Op1, m_Constant(C1))) {
+    Constant *C2;
+    if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {
+      // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
+      Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2));
+      return BinaryOperator::CreateXor(And, ConstantExpr::getNot(C1));
+    }
+    if (match(Op0, m_OneUse(m_And(m_Not(m_Value(X)), m_Constant(C2))))) {
+      // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1
+      Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2));
+      return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1));
     }
   }
 
@@ -2567,28 +2795,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (match(Op1, m_APInt(RHSC))) {
       Value *X;
       const APInt *C;
-      if (match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
-        // ~(c-X) == X-c-1 == X+(-c-1)
-        if (RHSC->isAllOnesValue()) {
-          Constant *NewC = ConstantInt::get(I.getType(), -(*C) - 1);
-          return BinaryOperator::CreateAdd(X, NewC);
-        }
-        if (RHSC->isSignMask()) {
-          // (C - X) ^ signmask -> (C + signmask - X)
-          Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
-          return BinaryOperator::CreateSub(NewC, X);
-        }
-      } else if (match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
-        // ~(X-c) --> (-c-1)-X
-        if (RHSC->isAllOnesValue()) {
-          Constant *NewC = ConstantInt::get(I.getType(), -(*C) - 1);
-          return BinaryOperator::CreateSub(NewC, X);
-        }
-        if (RHSC->isSignMask()) {
-          // (X + C) ^ signmask -> (X + C + signmask)
-          Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
-          return BinaryOperator::CreateAdd(X, NewC);
-        }
+      if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
+        // (C - X) ^ signmask -> (C + signmask - X)
+        Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
+        return BinaryOperator::CreateSub(NewC, X);
+      }
+      if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
+        // (X + C) ^ signmask -> (X + C + signmask)
+        Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
+        return BinaryOperator::CreateAdd(X, NewC);
       }
 
       // (X|C1)^C2 -> X^(C1^C2) iff X&~C1 == 0
@@ -2635,82 +2850,52 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
     return FoldedLogic;
 
-  {
-    Value *A, *B;
-    if (match(Op1, m_OneUse(m_Or(m_Value(A), m_Value(B))))) {
-      if (A == Op0) {                                      // A^(A|B) == A^(B|A)
-        cast<BinaryOperator>(Op1)->swapOperands();
-        std::swap(A, B);
-      }
-      if (B == Op0) {                                      // A^(B|A) == (B|A)^A
-        I.swapOperands();     // Simplified below.
-        std::swap(Op0, Op1);
-      }
-    } else if (match(Op1, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
-      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
-        cast<BinaryOperator>(Op1)->swapOperands();
-        std::swap(A, B);
-      }
-      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
-        I.swapOperands();     // Simplified below.
-        std::swap(Op0, Op1);
-      }
-    }
-  }
-
-  {
-    Value *A, *B;
-    if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B))))) {
-      if (A == Op1)                                  // (B|A)^B == (A|B)^B
-        std::swap(A, B);
-      if (B == Op1)                                  // (A|B)^B == A & ~B
-        return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1));
-    } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
-      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
-        std::swap(A, B);
-      const APInt *C;
-      if (B == Op1 &&                                      // (B&A)^A == ~B & A
-          !match(Op1, m_APInt(C))) {  // Canonical form is (B&C)^C
-        return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1);
-      }
-    }
-  }
-
-  {
-    Value *A, *B, *C, *D;
-    // (A ^ C)^(A | B) -> ((~A) & B) ^ C
-    if (match(Op0, m_Xor(m_Value(D), m_Value(C))) &&
-        match(Op1, m_Or(m_Value(A), m_Value(B)))) {
-      if (D == A)
-        return BinaryOperator::CreateXor(
-            Builder.CreateAnd(Builder.CreateNot(A), B), C);
-      if (D == B)
-        return BinaryOperator::CreateXor(
-            Builder.CreateAnd(Builder.CreateNot(B), A), C);
-    }
-    // (A | B)^(A ^ C) -> ((~A) & B) ^ C
-    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-        match(Op1, m_Xor(m_Value(D), m_Value(C)))) {
-      if (D == A)
-        return BinaryOperator::CreateXor(
-            Builder.CreateAnd(Builder.CreateNot(A), B), C);
-      if (D == B)
-        return BinaryOperator::CreateXor(
-            Builder.CreateAnd(Builder.CreateNot(B), A), C);
-    }
-    // (A & B) ^ (A ^ B) -> (A | B)
-    if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
-        match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))
-      return BinaryOperator::CreateOr(A, B);
-    // (A ^ B) ^ (A & B) -> (A | B)
-    if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
-        match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
-      return BinaryOperator::CreateOr(A, B);
-  }
+  // Y ^ (X | Y) --> X & ~Y
+  // Y ^ (Y | X) --> X & ~Y
+  if (match(Op1, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op0)))))
+    return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op0));
+  // (X | Y) ^ Y --> X & ~Y
+  // (Y | X) ^ Y --> X & ~Y
+  if (match(Op0, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op1)))))
+    return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op1));
+
+  // Y ^ (X & Y) --> ~X & Y
+  // Y ^ (Y & X) --> ~X & Y
+  if (match(Op1, m_OneUse(m_c_And(m_Value(X), m_Specific(Op0)))))
+    return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(X));
+  // (X & Y) ^ Y --> ~X & Y
+  // (Y & X) ^ Y --> ~X & Y
+  // Canonical form is (X & C) ^ C; don't touch that.
+  // TODO: A 'not' op is better for analysis and codegen, but demanded bits must
+  //       be fixed to prefer that (otherwise we get infinite looping).
+  if (!match(Op1, m_Constant()) &&
+      match(Op0, m_OneUse(m_c_And(m_Value(X), m_Specific(Op1)))))
+    return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(X));
+
+  Value *A, *B, *C;
+  // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants.
+  if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),
+                        m_OneUse(m_c_Or(m_Deferred(A), m_Value(C))))))
+      return BinaryOperator::CreateXor(
+          Builder.CreateAnd(Builder.CreateNot(A), C), B);
+
+  // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants.
+  if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),
+                        m_OneUse(m_c_Or(m_Deferred(B), m_Value(C))))))
+      return BinaryOperator::CreateXor(
+          Builder.CreateAnd(Builder.CreateNot(B), C), A);
+
+  // (A & B) ^ (A ^ B) -> (A | B)
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))
+    return BinaryOperator::CreateOr(A, B);
+  // (A ^ B) ^ (A & B) -> (A | B)
+  if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+      match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
+    return BinaryOperator::CreateOr(A, B);
 
   // (A & ~B) ^ ~A -> ~(A & B)
   // (~B & A) ^ ~A -> ~(A & B)
-  Value *A, *B;
   if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
       match(Op1, m_Not(m_Specific(A))))
     return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
@@ -2759,23 +2944,41 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   //   %res = select i1 %cmp2, i32 %x, i32 %noty
   //
   // Same is applicable for smin/umax/umin.
-  {
+  if (match(Op1, m_AllOnes()) && Op0->hasOneUse()) {
     Value *LHS, *RHS;
     SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
-    if (Op0->hasOneUse() && SelectPatternResult::isMinOrMax(SPF) &&
-        match(Op1, m_AllOnes())) {
-
-      Value *X;
-      if (match(RHS, m_Not(m_Value(X))))
-        std::swap(RHS, LHS);
-
-      if (match(LHS, m_Not(m_Value(X)))) {
+    if (SelectPatternResult::isMinOrMax(SPF)) {
+      // It's possible we get here before the not has been simplified, so make
+      // sure the input to the not isn't freely invertible.
+      if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) {
         Value *NotY = Builder.CreateNot(RHS);
         return SelectInst::Create(
             Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
       }
+
+      // It's possible we get here before the not has been simplified, so make
+      // sure the input to the not isn't freely invertible.
+      if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) {
+        Value *NotX = Builder.CreateNot(LHS);
+        return SelectInst::Create(
+            Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
+      }
+
+      // If both sides are freely invertible, then we can get rid of the xor
+      // completely.
+      if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+          IsFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
+        Value *NotLHS = Builder.CreateNot(LHS);
+        Value *NotRHS = Builder.CreateNot(RHS);
+        return SelectInst::Create(
+            Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
+            NotLHS, NotRHS);
+      }
     }
   }
 
+  if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
+    return NewXor;
+
   return nullptr;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cbfbd8a53993..aeb25d530d71 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -136,6 +136,14 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
   if (Size > 8 || (Size&(Size-1)))
     return nullptr;  // If not 1/2/4/8 bytes, exit.
 
+  // If it is an atomic and alignment is less than the size then we will
+  // introduce the unaligned memory access which will be later transformed
+  // into libcall in CodeGen. This is not evident performance gain so disable
+  // it now.
+  if (isa<AtomicMemTransferInst>(MI))
+    if (CopyDstAlign < Size || CopySrcAlign < Size)
+      return nullptr;
+
   // Use an integer load+store unless we can find something better.
   unsigned SrcAddrSp =
     cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
@@ -174,6 +182,9 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
     MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
   if (LoopMemParallelMD)
     L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+  MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
+  if (AccessGroupMD)
+    L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
 
   StoreInst *S = Builder.CreateStore(L, Dest);
   // Alignment from the mem intrinsic will be better, so use it.
@@ -182,6 +193,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
     S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
   if (LoopMemParallelMD)
     S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+  if (AccessGroupMD)
+    S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
 
   if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
     // non-atomics can be volatile
@@ -215,6 +228,18 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
   Alignment = MI->getDestAlignment();
   assert(Len && "0-sized memory setting should be removed already.");
 
+  // Alignment 0 is identity for alignment 1 for memset, but not store.
+  if (Alignment == 0)
+    Alignment = 1;
+
+  // If it is an atomic and alignment is less than the size then we will
+  // introduce the unaligned memory access which will be later transformed
+  // into libcall in CodeGen. This is not evident performance gain so disable
+  // it now.
+  if (isa<AtomicMemSetInst>(MI))
+    if (Alignment < Len)
+      return nullptr;
+
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
     Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
@@ -224,9 +249,6 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
     Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
     Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
 
-    // Alignment 0 is identity for alignment 1 for memset, but not store.
-    if (Alignment == 0) Alignment = 1;
-
     // Extract the fill value and store.
     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
     StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
@@ -648,7 +670,7 @@ static Value *simplifyX86round(IntrinsicInst &II,
   }
 
   Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
-  Value *Res = Builder.CreateIntrinsic(ID, {Src}, &II);
+  Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
   if (!IsScalar) {
     if (auto *C = dyn_cast<Constant>(Mask))
       if (C->isAllOnesValue())
@@ -675,7 +697,8 @@ static Value *simplifyX86round(IntrinsicInst &II,
   return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
 }
 
-static Value *simplifyX86movmsk(const IntrinsicInst &II) {
+static Value *simplifyX86movmsk(const IntrinsicInst &II,
+                                InstCombiner::BuilderTy &Builder) {
   Value *Arg = II.getArgOperand(0);
   Type *ResTy = II.getType();
   Type *ArgTy = Arg->getType();
@@ -688,29 +711,46 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II) {
   if (!ArgTy->isVectorTy())
     return nullptr;
 
-  auto *C = dyn_cast<Constant>(Arg);
-  if (!C)
-    return nullptr;
+  if (auto *C = dyn_cast<Constant>(Arg)) {
+    // Extract signbits of the vector input and pack into integer result.
+    APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
+    for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
+      auto *COp = C->getAggregateElement(I);
+      if (!COp)
+        return nullptr;
+      if (isa<UndefValue>(COp))
+        continue;
 
-  // Extract signbits of the vector input and pack into integer result.
-  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
-  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
-    auto *COp = C->getAggregateElement(I);
-    if (!COp)
-      return nullptr;
-    if (isa<UndefValue>(COp))
-      continue;
+      auto *CInt = dyn_cast<ConstantInt>(COp);
+      auto *CFp = dyn_cast<ConstantFP>(COp);
+      if (!CInt && !CFp)
+        return nullptr;
 
-    auto *CInt = dyn_cast<ConstantInt>(COp);
-    auto *CFp = dyn_cast<ConstantFP>(COp);
-    if (!CInt && !CFp)
-      return nullptr;
+      if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
+        Result.setBit(I);
+    }
+    return Constant::getIntegerValue(ResTy, Result);
+  }
 
-    if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
-      Result.setBit(I);
+  // Look for a sign-extended boolean source vector as the argument to this
+  // movmsk. If the argument is bitcast, look through that, but make sure the
+  // source of that bitcast is still a vector with the same number of elements.
+  // TODO: We can also convert a bitcast with wider elements, but that requires
+  // duplicating the bool source sign bits to match the number of elements
+  // expected by the movmsk call.
+  Arg = peekThroughBitcast(Arg);
+  Value *X;
+  if (Arg->getType()->isVectorTy() &&
+      Arg->getType()->getVectorNumElements() == ArgTy->getVectorNumElements() &&
+      match(Arg, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
+    // call iM movmsk(sext <N x i1> X) --> zext (bitcast <N x i1> X to iN) to iM
+    unsigned NumElts = X->getType()->getVectorNumElements();
+    Type *ScalarTy = Type::getIntNTy(Arg->getContext(), NumElts);
+    Value *BC = Builder.CreateBitCast(X, ScalarTy);
+    return Builder.CreateZExtOrTrunc(BC, ResTy);
   }
 
-  return Constant::getIntegerValue(ResTy, Result);
+  return nullptr;
 }
 
 static Value *simplifyX86insertps(const IntrinsicInst &II,
@@ -1133,82 +1173,6 @@ static Value *simplifyX86vpcom(const IntrinsicInst &II,
   return nullptr;
 }
 
-static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
-  Value *Arg0 = II.getArgOperand(0);
-  Value *Arg1 = II.getArgOperand(1);
-
-  // fmin(x, x) -> x
-  if (Arg0 == Arg1)
-    return Arg0;
-
-  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
-
-  // fmin(x, nan) -> x
-  if (C1 && C1->isNaN())
-    return Arg0;
-
-  // This is the value because if undef were NaN, we would return the other
-  // value and cannot return a NaN unless both operands are.
-  //
-  // fmin(undef, x) -> x
-  if (isa<UndefValue>(Arg0))
-    return Arg1;
-
-  // fmin(x, undef) -> x
-  if (isa<UndefValue>(Arg1))
-    return Arg0;
-
-  Value *X = nullptr;
-  Value *Y = nullptr;
-  if (II.getIntrinsicID() == Intrinsic::minnum) {
-    // fmin(x, fmin(x, y)) -> fmin(x, y)
-    // fmin(y, fmin(x, y)) -> fmin(x, y)
-    if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
-      if (Arg0 == X || Arg0 == Y)
-        return Arg1;
-    }
-
-    // fmin(fmin(x, y), x) -> fmin(x, y)
-    // fmin(fmin(x, y), y) -> fmin(x, y)
-    if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
-      if (Arg1 == X || Arg1 == Y)
-        return Arg0;
-    }
-
-    // TODO: fmin(nnan x, inf) -> x
-    // TODO: fmin(nnan ninf x, flt_max) -> x
-    if (C1 && C1->isInfinity()) {
-      // fmin(x, -inf) -> -inf
-      if (C1->isNegative())
-        return Arg1;
-    }
-  } else {
-    assert(II.getIntrinsicID() == Intrinsic::maxnum);
-    // fmax(x, fmax(x, y)) -> fmax(x, y)
-    // fmax(y, fmax(x, y)) -> fmax(x, y)
-    if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
-      if (Arg0 == X || Arg0 == Y)
-        return Arg1;
-    }
-
-    // fmax(fmax(x, y), x) -> fmax(x, y)
-    // fmax(fmax(x, y), y) -> fmax(x, y)
-    if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
-      if (Arg1 == X || Arg1 == Y)
-        return Arg0;
-    }
-
-    // TODO: fmax(nnan x, -inf) -> x
-    // TODO: fmax(nnan ninf x, -flt_max) -> x
-    if (C1 && C1->isInfinity()) {
-      // fmax(x, inf) -> inf
-      if (!C1->isNegative())
-        return Arg1;
-    }
-  }
-  return nullptr;
-}
-
 static bool maskIsAllOneOrUndef(Value *Mask) {
   auto *ConstMask = dyn_cast<Constant>(Mask);
   if (!ConstMask)
@@ -1852,6 +1816,17 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
   return nullptr;
 }
 
+static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
+  assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap");
+  Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
+  if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
+    Call.setArgOperand(0, Arg1);
+    Call.setArgOperand(1, Arg0);
+    return &Call;
+  }
+  return nullptr;
+}
+
 /// CallInst simplification. This mostly only handles folding of intrinsic
 /// instructions. For normal calls, it allows visitCallSite to do the heavy
 /// lifting.
@@ -2005,18 +1980,49 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return I;
     break;
 
+  case Intrinsic::fshl:
+  case Intrinsic::fshr: {
+    const APInt *SA;
+    if (match(II->getArgOperand(2), m_APInt(SA))) {
+      Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+      unsigned BitWidth = SA->getBitWidth();
+      uint64_t ShiftAmt = SA->urem(BitWidth);
+      assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
+      // Normalize to funnel shift left.
+      if (II->getIntrinsicID() == Intrinsic::fshr)
+        ShiftAmt = BitWidth - ShiftAmt;
+
+      // fshl(X, 0, C) -> shl X, C
+      // fshl(X, undef, C) -> shl X, C
+      if (match(Op1, m_Zero()) || match(Op1, m_Undef()))
+        return BinaryOperator::CreateShl(
+            Op0, ConstantInt::get(II->getType(), ShiftAmt));
+
+      // fshl(0, X, C) -> lshr X, (BW-C)
+      // fshl(undef, X, C) -> lshr X, (BW-C)
+      if (match(Op0, m_Zero()) || match(Op0, m_Undef()))
+        return BinaryOperator::CreateLShr(
+            Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt));
+    }
+
+    // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
+    // so only the low bits of the shift amount are demanded if the bitwidth is
+    // a power-of-2.
+    unsigned BitWidth = II->getType()->getScalarSizeInBits();
+    if (!isPowerOf2_32(BitWidth))
+      break;
+    APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
+    KnownBits Op2Known(BitWidth);
+    if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
+      return &CI;
+    break;
+  }
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::umul_with_overflow:
   case Intrinsic::smul_with_overflow:
-    if (isa<Constant>(II->getArgOperand(0)) &&
-        !isa<Constant>(II->getArgOperand(1))) {
-      // Canonicalize constants into the RHS.
-      Value *LHS = II->getArgOperand(0);
-      II->setArgOperand(0, II->getArgOperand(1));
-      II->setArgOperand(1, LHS);
-      return II;
-    }
+    if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+      return I;
     LLVM_FALLTHROUGH;
 
   case Intrinsic::usub_with_overflow:
@@ -2034,34 +2040,164 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
-  case Intrinsic::minnum:
-  case Intrinsic::maxnum: {
+  case Intrinsic::uadd_sat:
+  case Intrinsic::sadd_sat:
+    if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+      return I;
+    LLVM_FALLTHROUGH;
+  case Intrinsic::usub_sat:
+  case Intrinsic::ssub_sat: {
     Value *Arg0 = II->getArgOperand(0);
     Value *Arg1 = II->getArgOperand(1);
-    // Canonicalize constants to the RHS.
-    if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
-      II->setArgOperand(0, Arg1);
-      II->setArgOperand(1, Arg0);
-      return II;
+    Intrinsic::ID IID = II->getIntrinsicID();
+
+    // Make use of known overflow information.
+    OverflowResult OR;
+    switch (IID) {
+    default:
+      llvm_unreachable("Unexpected intrinsic!");
+    case Intrinsic::uadd_sat:
+      OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
+      if (OR == OverflowResult::NeverOverflows)
+        return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
+      if (OR == OverflowResult::AlwaysOverflows)
+        return replaceInstUsesWith(*II,
+                                   ConstantInt::getAllOnesValue(II->getType()));
+      break;
+    case Intrinsic::usub_sat:
+      OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
+      if (OR == OverflowResult::NeverOverflows)
+        return BinaryOperator::CreateNUWSub(Arg0, Arg1);
+      if (OR == OverflowResult::AlwaysOverflows)
+        return replaceInstUsesWith(*II,
+                                   ConstantInt::getNullValue(II->getType()));
+      break;
+    case Intrinsic::sadd_sat:
+      if (willNotOverflowSignedAdd(Arg0, Arg1, *II))
+        return BinaryOperator::CreateNSWAdd(Arg0, Arg1);
+      break;
+    case Intrinsic::ssub_sat:
+      if (willNotOverflowSignedSub(Arg0, Arg1, *II))
+        return BinaryOperator::CreateNSWSub(Arg0, Arg1);
+      break;
     }
 
-    // FIXME: Simplifications should be in instsimplify.
-    if (Value *V = simplifyMinnumMaxnum(*II))
-      return replaceInstUsesWith(*II, V);
+    // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
+    Constant *C;
+    if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
+        C->isNotMinSignedValue()) {
+      Value *NegVal = ConstantExpr::getNeg(C);
+      return replaceInstUsesWith(
+          *II, Builder.CreateBinaryIntrinsic(
+              Intrinsic::sadd_sat, Arg0, NegVal));
+    }
+
+    // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
+    // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
+    // if Val and Val2 have the same sign
+    if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
+      Value *X;
+      const APInt *Val, *Val2;
+      APInt NewVal;
+      bool IsUnsigned =
+          IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
+      if (Other->getIntrinsicID() == II->getIntrinsicID() &&
+          match(Arg1, m_APInt(Val)) &&
+          match(Other->getArgOperand(0), m_Value(X)) &&
+          match(Other->getArgOperand(1), m_APInt(Val2))) {
+        if (IsUnsigned)
+          NewVal = Val->uadd_sat(*Val2);
+        else if (Val->isNonNegative() == Val2->isNonNegative()) {
+          bool Overflow;
+          NewVal = Val->sadd_ov(*Val2, Overflow);
+          if (Overflow) {
+            // Both adds together may add more than SignedMaxValue
+            // without saturating the final result.
+            break;
+          }
+        } else {
+          // Cannot fold saturated addition with different signs.
+          break;
+        }
 
+        return replaceInstUsesWith(
+            *II, Builder.CreateBinaryIntrinsic(
+                     IID, X, ConstantInt::get(II->getType(), NewVal)));
+      }
+    }
+    break;
+  }
+
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximum: {
+    if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+      return I;
+    Value *Arg0 = II->getArgOperand(0);
+    Value *Arg1 = II->getArgOperand(1);
+    Intrinsic::ID IID = II->getIntrinsicID();
     Value *X, *Y;
     if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
         (Arg0->hasOneUse() || Arg1->hasOneUse())) {
       // If both operands are negated, invert the call and negate the result:
-      // minnum(-X, -Y) --> -(maxnum(X, Y))
-      // maxnum(-X, -Y) --> -(minnum(X, Y))
-      Intrinsic::ID NewIID = II->getIntrinsicID() == Intrinsic::maxnum ?
-          Intrinsic::minnum : Intrinsic::maxnum;
-      Value *NewCall = Builder.CreateIntrinsic(NewIID, { X, Y }, II);
+      // min(-X, -Y) --> -(max(X, Y))
+      // max(-X, -Y) --> -(min(X, Y))
+      Intrinsic::ID NewIID;
+      switch (IID) {
+      case Intrinsic::maxnum:
+        NewIID = Intrinsic::minnum;
+        break;
+      case Intrinsic::minnum:
+        NewIID = Intrinsic::maxnum;
+        break;
+      case Intrinsic::maximum:
+        NewIID = Intrinsic::minimum;
+        break;
+      case Intrinsic::minimum:
+        NewIID = Intrinsic::maximum;
+        break;
+      default:
+        llvm_unreachable("unexpected intrinsic ID");
+      }
+      Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
       Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall);
       FNeg->copyIRFlags(II);
       return FNeg;
     }
+
+    // m(m(X, C2), C1) -> m(X, C)
+    const APFloat *C1, *C2;
+    if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
+      if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
+          ((match(M->getArgOperand(0), m_Value(X)) &&
+            match(M->getArgOperand(1), m_APFloat(C2))) ||
+           (match(M->getArgOperand(1), m_Value(X)) &&
+            match(M->getArgOperand(0), m_APFloat(C2))))) {
+        APFloat Res(0.0);
+        switch (IID) {
+        case Intrinsic::maxnum:
+          Res = maxnum(*C1, *C2);
+          break;
+        case Intrinsic::minnum:
+          Res = minnum(*C1, *C2);
+          break;
+        case Intrinsic::maximum:
+          Res = maximum(*C1, *C2);
+          break;
+        case Intrinsic::minimum:
+          Res = minimum(*C1, *C2);
+          break;
+        default:
+          llvm_unreachable("unexpected intrinsic ID");
+        }
+        Instruction *NewCall = Builder.CreateBinaryIntrinsic(
+            IID, X, ConstantFP::get(Arg0->getType(), Res));
+        NewCall->copyIRFlags(II);
+        return replaceInstUsesWith(*II, NewCall);
+      }
+    }
+
     break;
   }
   case Intrinsic::fmuladd: {
@@ -2079,17 +2215,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     LLVM_FALLTHROUGH;
   }
   case Intrinsic::fma: {
-    Value *Src0 = II->getArgOperand(0);
-    Value *Src1 = II->getArgOperand(1);
-
-    // Canonicalize constant multiply operand to Src1.
-    if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
-      II->setArgOperand(0, Src1);
-      II->setArgOperand(1, Src0);
-      std::swap(Src0, Src1);
-    }
+    if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+      return I;
 
     // fma fneg(x), fneg(y), z -> fma x, y, z
+    Value *Src0 = II->getArgOperand(0);
+    Value *Src1 = II->getArgOperand(1);
     Value *X, *Y;
     if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
       II->setArgOperand(0, X);
@@ -2135,24 +2266,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *ExtSrc;
     if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
       // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
-      Value *NarrowII = Builder.CreateIntrinsic(II->getIntrinsicID(),
-                                                { ExtSrc }, II);
+      Value *NarrowII =
+          Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), ExtSrc, II);
       return new FPExtInst(NarrowII, II->getType());
     }
     break;
   }
   case Intrinsic::cos:
   case Intrinsic::amdgcn_cos: {
-    Value *SrcSrc;
+    Value *X;
     Value *Src = II->getArgOperand(0);
-    if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
-        match(Src, m_FAbs(m_Value(SrcSrc)))) {
+    if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
       // cos(-x) -> cos(x)
       // cos(fabs(x)) -> cos(x)
-      II->setArgOperand(0, SrcSrc);
+      II->setArgOperand(0, X);
       return II;
     }
-
+    break;
+  }
+  case Intrinsic::sin: {
+    Value *X;
+    if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
+      // sin(-x) --> -sin(x)
+      Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
+      Instruction *FNeg = BinaryOperator::CreateFNeg(NewSin);
+      FNeg->copyFastMathFlags(II);
+      return FNeg;
+    }
     break;
   }
   case Intrinsic::ppc_altivec_lvx:
@@ -2382,7 +2522,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx_movmsk_pd_256:
   case Intrinsic::x86_avx_movmsk_ps_256:
   case Intrinsic::x86_avx2_pmovmskb:
-    if (Value *V = simplifyX86movmsk(*II))
+    if (Value *V = simplifyX86movmsk(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2922,16 +3062,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx_blendv_ps_256:
   case Intrinsic::x86_avx_blendv_pd_256:
   case Intrinsic::x86_avx2_pblendvb: {
-    // Convert blendv* to vector selects if the mask is constant.
-    // This optimization is convoluted because the intrinsic is defined as
-    // getting a vector of floats or doubles for the ps and pd versions.
-    // FIXME: That should be changed.
-
+    // fold (blend A, A, Mask) -> A
     Value *Op0 = II->getArgOperand(0);
     Value *Op1 = II->getArgOperand(1);
     Value *Mask = II->getArgOperand(2);
-
-    // fold (blend A, A, Mask) -> A
     if (Op0 == Op1)
       return replaceInstUsesWith(CI, Op0);
 
@@ -2944,6 +3078,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
       return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
     }
+
+    // Convert to a vector select if we can bypass casts and find a boolean
+    // vector condition value.
+    Value *BoolVec;
+    Mask = peekThroughBitcast(Mask);
+    if (match(Mask, m_SExt(m_Value(BoolVec))) &&
+        BoolVec->getType()->isVectorTy() &&
+        BoolVec->getType()->getScalarSizeInBits() == 1) {
+      assert(Mask->getType()->getPrimitiveSizeInBits() ==
+             II->getType()->getPrimitiveSizeInBits() &&
+             "Not expecting mask and operands with different sizes");
+
+      unsigned NumMaskElts = Mask->getType()->getVectorNumElements();
+      unsigned NumOperandElts = II->getType()->getVectorNumElements();
+      if (NumMaskElts == NumOperandElts)
+        return SelectInst::Create(BoolVec, Op1, Op0);
+
+      // If the mask has less elements than the operands, each mask bit maps to
+      // multiple elements of the operands. Bitcast back and forth.
+      if (NumMaskElts < NumOperandElts) {
+        Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType());
+        Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType());
+        Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
+        return new BitCastInst(Sel, II->getType());
+      }
+    }
+
     break;
   }
 
@@ -3275,6 +3436,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return replaceInstUsesWith(*II, FCmp);
     }
 
+    if (Mask == (N_ZERO | P_ZERO)) {
+      // Equivalent of == 0.
+      Value *FCmp = Builder.CreateFCmpOEQ(
+        Src0, ConstantFP::get(Src0->getType(), 0.0));
+
+      FCmp->takeName(II);
+      return replaceInstUsesWith(*II, FCmp);
+    }
+
+    // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
+    if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) {
+      II->setArgOperand(1, ConstantInt::get(Src1->getType(),
+                                            Mask & ~(S_NAN | Q_NAN)));
+      return II;
+    }
+
     const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
     if (!CVal) {
       if (isa<UndefValue>(Src0))
@@ -3384,22 +3561,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
 
-    // TODO: Also emit sub if only width is constant.
-    if (!CWidth && COffset && Offset == 0) {
-      Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
-      Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
-      ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
-
-      Value *Shl = Builder.CreateShl(Src, ShiftVal);
-      Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
-                                 : Builder.CreateLShr(Shl, ShiftVal);
-      RightShift->takeName(II);
-      return replaceInstUsesWith(*II, RightShift);
-    }
-
     if (!CWidth || !COffset)
       break;
 
+    // The case of Width == 0 is handled above, which makes this tranformation
+    // safe.  If Width == 0, then the ashr and lshr instructions become poison
+    // value since the shift amount would be equal to the bit size.
+    assert(Width != 0);
+
     // TODO: This allows folding to undef when the hardware has specific
     // behavior?
     if (Offset + Width < IntSize) {
@@ -3603,6 +3772,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
         Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
 
+      Type *Ty = SrcLHS->getType();
+      if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
+        // Promote to next legal integer type.
+        unsigned Width = CmpType->getBitWidth();
+        unsigned NewWidth = Width;
+
+        // Don't do anything for i1 comparisons.
+        if (Width == 1)
+          break;
+
+        if (Width <= 16)
+          NewWidth = 16;
+        else if (Width <= 32)
+          NewWidth = 32;
+        else if (Width <= 64)
+          NewWidth = 64;
+        else if (Width > 64)
+          break; // Can't handle this.
+
+        if (Width != NewWidth) {
+          IntegerType *CmpTy = Builder.getIntNTy(NewWidth);
+          if (CmpInst::isSigned(SrcPred)) {
+            SrcLHS = Builder.CreateSExt(SrcLHS, CmpTy);
+            SrcRHS = Builder.CreateSExt(SrcRHS, CmpTy);
+          } else {
+            SrcLHS = Builder.CreateZExt(SrcLHS, CmpTy);
+            SrcRHS = Builder.CreateZExt(SrcRHS, CmpTy);
+          }
+        }
+      } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
+        break;
+
       Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
                                               SrcLHS->getType());
       Value *Args[] = { SrcLHS, SrcRHS,
@@ -3661,7 +3862,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Scan down this block to see if there is another stack restore in the
     // same block without an intervening call/alloca.
     BasicBlock::iterator BI(II);
-    TerminatorInst *TI = II->getParent()->getTerminator();
+    Instruction *TI = II->getParent()->getTerminator();
     bool CannotRemove = false;
     for (++BI; &*BI != TI; ++BI) {
       if (isa<AllocaInst>(BI)) {
@@ -3788,8 +3989,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
 
       // isKnownNonNull -> nonnull attribute
-      if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
+      if (!II->hasRetAttr(Attribute::NonNull) &&
+          isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) {
         II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+        return II;
+      }
     }
 
     // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
@@ -3889,7 +4093,11 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
   auto InstCombineRAUW = [this](Instruction *From, Value *With) {
     replaceInstUsesWith(*From, With);
   };
-  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
+  auto InstCombineErase = [this](Instruction *I) {
+    eraseInstFromFunction(*I);
+  };
+  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+                               InstCombineErase);
   if (Value *With = Simplifier.optimizeCall(CI)) {
     ++NumSimplified;
     return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index fd59c3a7c0c3..1201ac196ec0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -492,12 +492,19 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
 }
 
 /// Rotate left/right may occur in a wider type than necessary because of type
-/// promotion rules. Try to narrow all of the component instructions.
+/// promotion rules. Try to narrow the inputs and convert to funnel shift.
 Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
   assert((isa<VectorType>(Trunc.getSrcTy()) ||
           shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
          "Don't narrow to an illegal scalar type");
 
+  // Bail out on strange types. It is possible to handle some of these patterns
+  // even with non-power-of-2 sizes, but it is not a likely scenario.
+  Type *DestTy = Trunc.getType();
+  unsigned NarrowWidth = DestTy->getScalarSizeInBits();
+  if (!isPowerOf2_32(NarrowWidth))
+    return nullptr;
+
   // First, find an or'd pair of opposite shifts with the same shifted operand:
   // trunc (or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1))
   Value *Or0, *Or1;
@@ -514,22 +521,38 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
   if (ShiftOpcode0 == ShiftOpcode1)
     return nullptr;
 
-  // The shift amounts must add up to the narrow bit width.
-  Value *ShAmt;
-  bool SubIsOnLHS;
-  Type *DestTy = Trunc.getType();
-  unsigned NarrowWidth = DestTy->getScalarSizeInBits();
-  if (match(ShAmt0,
-            m_OneUse(m_Sub(m_SpecificInt(NarrowWidth), m_Specific(ShAmt1))))) {
-    ShAmt = ShAmt1;
-    SubIsOnLHS = true;
-  } else if (match(ShAmt1, m_OneUse(m_Sub(m_SpecificInt(NarrowWidth),
-                                          m_Specific(ShAmt0))))) {
-    ShAmt = ShAmt0;
-    SubIsOnLHS = false;
-  } else {
+  // Match the shift amount operands for a rotate pattern. This always matches
+  // a subtraction on the R operand.
+  auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+    // The shift amounts may add up to the narrow bit width:
+    // (shl ShVal, L) | (lshr ShVal, Width - L)
+    if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
+      return L;
+
+    // The shift amount may be masked with negation:
+    // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+    Value *X;
+    unsigned Mask = Width - 1;
+    if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+        match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+      return X;
+
+    // Same as above, but the shift amount may be extended after masking:
+    if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+        match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
+      return X;
+
     return nullptr;
+  };
+
+  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
+  bool SubIsOnLHS = false;
+  if (!ShAmt) {
+    ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
+    SubIsOnLHS = true;
   }
+  if (!ShAmt)
+    return nullptr;
 
   // The shifted value must have high zeros in the wide type. Typically, this
   // will be a zext, but it could also be the result of an 'and' or 'shift'.
@@ -540,23 +563,15 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
 
   // We have an unnecessarily wide rotate!
   // trunc (or (lshr ShVal, ShAmt), (shl ShVal, BitWidth - ShAmt))
-  // Narrow it down to eliminate the zext/trunc:
-  // or (lshr trunc(ShVal), ShAmt0'), (shl trunc(ShVal), ShAmt1')
+  // Narrow the inputs and convert to funnel shift intrinsic:
+  // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
   Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
-  Value *NegShAmt = Builder.CreateNeg(NarrowShAmt);
-
-  // Mask both shift amounts to ensure there's no UB from oversized shifts.
-  Constant *MaskC = ConstantInt::get(DestTy, NarrowWidth - 1);
-  Value *MaskedShAmt = Builder.CreateAnd(NarrowShAmt, MaskC);
-  Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, MaskC);
-
-  // Truncate the original value and use narrow ops.
   Value *X = Builder.CreateTrunc(ShVal, DestTy);
-  Value *NarrowShAmt0 = SubIsOnLHS ? MaskedNegShAmt : MaskedShAmt;
-  Value *NarrowShAmt1 = SubIsOnLHS ? MaskedShAmt : MaskedNegShAmt;
-  Value *NarrowSh0 = Builder.CreateBinOp(ShiftOpcode0, X, NarrowShAmt0);
-  Value *NarrowSh1 = Builder.CreateBinOp(ShiftOpcode1, X, NarrowShAmt1);
-  return BinaryOperator::CreateOr(NarrowSh0, NarrowSh1);
+  bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
+  return IntrinsicInst::Create(F, { X, X, NarrowShAmt });
 }
 
 /// Try to narrow the width of math or bitwise logic instructions by pulling a
@@ -706,12 +721,35 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
 
-  // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
   if (DestTy->getScalarSizeInBits() == 1) {
-    Constant *One = ConstantInt::get(SrcTy, 1);
-    Src = Builder.CreateAnd(Src, One);
     Value *Zero = Constant::getNullValue(Src->getType());
-    return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+    if (DestTy->isIntegerTy()) {
+      // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
+      // TODO: We canonicalize to more instructions here because we are probably
+      // lacking equivalent analysis for trunc relative to icmp. There may also
+      // be codegen concerns. If those trunc limitations were removed, we could
+      // remove this transform.
+      Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
+      return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+    }
+
+    // For vectors, we do not canonicalize all truncs to icmp, so optimize
+    // patterns that would be covered within visitICmpInst.
+    Value *X;
+    const APInt *C;
+    if (match(Src, m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
+      // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
+      APInt MaskC = APInt(SrcTy->getScalarSizeInBits(), 1).shl(*C);
+      Value *And = Builder.CreateAnd(X, ConstantInt::get(SrcTy, MaskC));
+      return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+    }
+    if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_APInt(C)),
+                                   m_Deferred(X))))) {
+      // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
+      APInt MaskC = APInt(SrcTy->getScalarSizeInBits(), 1).shl(*C) | 1;
+      Value *And = Builder.CreateAnd(X, ConstantInt::get(SrcTy, MaskC));
+      return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+    }
   }
 
   // FIXME: Maybe combine the next two transforms to handle the no cast case
@@ -1061,12 +1099,9 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   Value *Src = CI.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
 
-  // Attempt to extend the entire input expression tree to the destination
-  // type.   Only do this if the dest type is a simple type, don't convert the
-  // expression tree to something weird like i93 unless the source is also
-  // strange.
+  // Try to extend the entire expression tree to the wide destination type.
   unsigned BitsToClear;
-  if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
+  if (shouldChangeType(SrcTy, DestTy) &&
       canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
     assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
            "Can't clear more bits than in SrcTy");
@@ -1343,12 +1378,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     return replaceInstUsesWith(CI, ZExt);
   }
 
-  // Attempt to extend the entire input expression tree to the destination
-  // type.   Only do this if the dest type is a simple type, don't convert the
-  // expression tree to something weird like i93 unless the source is also
-  // strange.
-  if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
-      canEvaluateSExtd(Src, DestTy)) {
+  // Try to extend the entire expression tree to the wide destination type.
+  if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
     // Okay, we can transform this!  Insert the new expression now.
     LLVM_DEBUG(
         dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1589,8 +1620,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
     }
 
     // (fptrunc (fneg x)) -> (fneg (fptrunc x))
-    if (BinaryOperator::isFNeg(OpI)) {
-      Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1), Ty);
+    Value *X;
+    if (match(OpI, m_FNeg(m_Value(X)))) {
+      Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
       return BinaryOperator::CreateFNegFMF(InnerTrunc, OpI);
     }
   }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e1bae11b40d1..b5bbb09935e2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
   }
 
   // Otherwise, there is an index.  The computation we will do will be modulo
-  // the pointer size, so get it.
-  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
-  Offset &= PtrSizeMask;
-  VariableScale &= PtrSizeMask;
+  // the pointer size.
+  Offset = SignExtend64(Offset, IntPtrWidth);
+  VariableScale = SignExtend64(VariableScale, IntPtrWidth);
 
   // To do this transformation, any constant index must be a multiple of the
   // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
@@ -909,7 +907,8 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
           }
 
       // If all indices are the same, just compare the base pointers.
-      if (IndicesTheSame)
+      Type *BaseType = GEPLHS->getOperand(0)->getType();
+      if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
         return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
 
       // If we're comparing GEPs with two base pointers that only differ in type
@@ -976,7 +975,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 
       if (NumDifferences == 0)   // SAME GEP?
         return replaceInstUsesWith(I, // No comparison is needed here.
-                             Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
+          ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond)));
 
       else if (NumDifferences == 1 && GEPsInBounds) {
         Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -1079,19 +1078,20 @@ Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
       ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
 }
 
-/// Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
+/// Fold "icmp pred (X+C), X".
+Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
                                               ICmpInst::Predicate Pred) {
   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
   // so the values can never be equal.  Similarly for all other "or equals"
   // operators.
+  assert(!!C && "C should not be zero!");
 
   // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
   // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
   // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
   if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
-    Value *R =
-      ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+    Constant *R = ConstantInt::get(X->getType(),
+                                   APInt::getMaxValue(C.getBitWidth()) - C);
     return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
   }
 
@@ -1099,11 +1099,10 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
   // (X+2) >u X        --> X <u (0-2)        --> X <u 254
   // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
   if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
-    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+    return new ICmpInst(ICmpInst::ICMP_ULT, X,
+                        ConstantInt::get(X->getType(), -C));
 
-  unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
-  ConstantInt *SMax = ConstantInt::get(X->getContext(),
-                                       APInt::getSignedMaxValue(BitWidth));
+  APInt SMax = APInt::getSignedMaxValue(C.getBitWidth());
 
   // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
   // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
@@ -1112,7 +1111,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
   // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
   // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
   if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
-    return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+    return new ICmpInst(ICmpInst::ICMP_SGT, X,
+                        ConstantInt::get(X->getType(), SMax - C));
 
   // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
   // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
@@ -1122,8 +1122,8 @@ Instruction *InstCombiner::foldICmpAddOpConst(Value *X, ConstantInt *CI,
   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
 
   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
-  Constant *C = Builder.getInt(CI->getValue() - 1);
-  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+  return new ICmpInst(ICmpInst::ICMP_SLT, X,
+                      ConstantInt::get(X->getType(), SMax - (C - 1)));
 }
 
 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
@@ -1333,17 +1333,12 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
   return nullptr;
 }
 
-// Fold icmp Pred X, C.
+/// Fold icmp Pred X, C.
+/// TODO: This code structure does not make sense. The saturating add fold
+/// should be moved to some other helper and extended as noted below (it is also
+/// possible that code has been made unnecessary - do we canonicalize IR to
+/// overflow/saturating intrinsics or not?).
 Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
-  CmpInst::Predicate Pred = Cmp.getPredicate();
-  Value *X = Cmp.getOperand(0);
-
-  const APInt *C;
-  if (!match(Cmp.getOperand(1), m_APInt(C)))
-    return nullptr;
-
-  Value *A = nullptr, *B = nullptr;
-
   // Match the following pattern, which is a common idiom when writing
   // overflow-safe integer arithmetic functions. The source performs an addition
   // in wider type and explicitly checks for overflow using comparisons against
@@ -1355,37 +1350,62 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
   //
   // sum = a + b
   // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
-  {
-    ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
-    if (Pred == ICmpInst::ICMP_UGT &&
-        match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
-      if (Instruction *Res = processUGT_ADDCST_ADD(
-              Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
-        return Res;
-  }
+  CmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
+  Value *A, *B;
+  ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+  if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) &&
+      match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+    if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this))
+      return Res;
+
+  return nullptr;
+}
 
-  // FIXME: Use m_APInt to allow folds for splat constants.
-  ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
-  if (!CI)
+/// Canonicalize icmp instructions based on dominating conditions.
+Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
+  // This is a cheap/incomplete check for dominance - just match a single
+  // predecessor with a conditional branch.
+  BasicBlock *CmpBB = Cmp.getParent();
+  BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+  if (!DomBB)
     return nullptr;
 
-  // Canonicalize icmp instructions based on dominating conditions.
-  BasicBlock *Parent = Cmp.getParent();
-  BasicBlock *Dom = Parent->getSinglePredecessor();
-  auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
-  ICmpInst::Predicate Pred2;
+  Value *DomCond;
   BasicBlock *TrueBB, *FalseBB;
-  ConstantInt *CI2;
-  if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
-                           TrueBB, FalseBB)) &&
-      TrueBB != FalseBB) {
-    ConstantRange CR =
-        ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue());
+  if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+    return nullptr;
+
+  assert((TrueBB == CmpBB || FalseBB == CmpBB) &&
+         "Predecessor block does not point to successor?");
+
+  // The branch should get simplified. Don't bother simplifying this condition.
+  if (TrueBB == FalseBB)
+    return nullptr;
+
+  // Try to simplify this compare to T/F based on the dominating condition.
+  Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
+  if (Imp)
+    return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
+
+  CmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
+  ICmpInst::Predicate DomPred;
+  const APInt *C, *DomC;
+  if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) &&
+      match(Y, m_APInt(C))) {
+    // We have 2 compares of a variable with constants. Calculate the constant
+    // ranges of those compares to see if we can transform the 2nd compare:
+    // DomBB:
+    //   DomCond = icmp DomPred X, DomC
+    //   br DomCond, CmpBB, FalseBB
+    // CmpBB:
+    //   Cmp = icmp Pred X, C
+    ConstantRange CR = ConstantRange::makeAllowedICmpRegion(Pred, *C);
     ConstantRange DominatingCR =
-        (Parent == TrueBB)
-            ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue())
-            : ConstantRange::makeExactICmpRegion(
-                  CmpInst::getInversePredicate(Pred2), CI2->getValue());
+        (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC)
+                          : ConstantRange::makeExactICmpRegion(
+                                CmpInst::getInversePredicate(DomPred), *DomC);
     ConstantRange Intersection = DominatingCR.intersectWith(CR);
     ConstantRange Difference = DominatingCR.difference(CR);
     if (Intersection.isEmptySet())
@@ -1393,23 +1413,20 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
     if (Difference.isEmptySet())
       return replaceInstUsesWith(Cmp, Builder.getTrue());
 
-    // If this is a normal comparison, it demands all bits. If it is a sign
-    // bit comparison, it only demands the sign bit.
-    bool UnusedBit;
-    bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
-
     // Canonicalizing a sign bit comparison that gets used in a branch,
     // pessimizes codegen by generating branch on zero instruction instead
     // of a test and branch. So we avoid canonicalizing in such situations
     // because test and branch instruction has better branch displacement
     // than compare and branch instruction.
+    bool UnusedBit;
+    bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit);
     if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
       return nullptr;
 
-    if (auto *AI = Intersection.getSingleElement())
-      return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
-    if (auto *AD = Difference.getSingleElement())
-      return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
+    if (const APInt *EqC = Intersection.getSingleElement())
+      return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC));
+    if (const APInt *NeC = Difference.getSingleElement())
+      return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC));
   }
 
   return nullptr;
@@ -1498,16 +1515,25 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
     }
   }
 
-  // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
-  //   iff -C is a power of 2
-  if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2())
-    return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
-
-  // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
-  //   iff -C is a power of 2
-  if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2())
-    return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
-
+  // Mask constant magic can eliminate an 'xor' with unsigned compares.
+  if (Pred == ICmpInst::ICMP_UGT) {
+    // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
+    if (*XorC == ~C && (C + 1).isPowerOf2())
+      return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
+    // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
+    if (*XorC == C && (C + 1).isPowerOf2())
+      return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
+  }
+  if (Pred == ICmpInst::ICMP_ULT) {
+    // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
+    if (*XorC == -C && C.isPowerOf2())
+      return new ICmpInst(ICmpInst::ICMP_UGT, X,
+                          ConstantInt::get(X->getType(), ~C));
+    // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
+    if (*XorC == C && (-C).isPowerOf2())
+      return new ICmpInst(ICmpInst::ICMP_UGT, X,
+                          ConstantInt::get(X->getType(), ~C));
+  }
   return nullptr;
 }
 
@@ -1598,6 +1624,13 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
 Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
                                                  BinaryOperator *And,
                                                  const APInt &C1) {
+  // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
+  // TODO: We canonicalize to the longer form for scalars because we have
+  // better analysis/folds for icmp, and codegen may be better with icmp.
+  if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
+      C1.isNullValue() && match(And->getOperand(1), m_One()))
+    return new TruncInst(And->getOperand(0), Cmp.getType());
+
   const APInt *C2;
   if (!match(And->getOperand(1), m_APInt(C2)))
     return nullptr;
@@ -2336,13 +2369,19 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
   Type *Ty = Add->getType();
   CmpInst::Predicate Pred = Cmp.getPredicate();
 
+  if (!Add->hasOneUse())
+    return nullptr;
+
   // If the add does not wrap, we can always adjust the compare by subtracting
-  // the constants. Equality comparisons are handled elsewhere. SGE/SLE are
-  // canonicalized to SGT/SLT.
-  if (Add->hasNoSignedWrap() &&
-      (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) {
+  // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
+  // are canonicalized to SGT/SLT/UGT/ULT.
+  if ((Add->hasNoSignedWrap() &&
+       (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
+      (Add->hasNoUnsignedWrap() &&
+       (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
     bool Overflow;
-    APInt NewC = C.ssub_ov(*C2, Overflow);
+    APInt NewC =
+        Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
     // If there is overflow, the result must be true or false.
     // TODO: Can we assert there is no overflow because InstSimplify always
     // handles those cases?
@@ -2366,9 +2405,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
       return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
   }
 
-  if (!Add->hasOneUse())
-    return nullptr;
-
   // X+C <u C2 -> (X & -C2) == C
   //   iff C & (C2-1) == 0
   //       C2 is a power of 2
@@ -2729,6 +2765,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
 
   // Handle icmp {eq|ne} <intrinsic>, Constant.
   Type *Ty = II->getType();
+  unsigned BitWidth = C.getBitWidth();
   switch (II->getIntrinsicID()) {
   case Intrinsic::bswap:
     Worklist.Add(II);
@@ -2737,21 +2774,39 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
     return &Cmp;
 
   case Intrinsic::ctlz:
-  case Intrinsic::cttz:
+  case Intrinsic::cttz: {
     // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
-    if (C == C.getBitWidth()) {
+    if (C == BitWidth) {
       Worklist.Add(II);
       Cmp.setOperand(0, II->getArgOperand(0));
       Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
       return &Cmp;
     }
+
+    // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
+    // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits.
+    // Limit to one use to ensure we don't increase instruction count.
+    unsigned Num = C.getLimitedValue(BitWidth);
+    if (Num != BitWidth && II->hasOneUse()) {
+      bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz;
+      APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1)
+                               : APInt::getHighBitsSet(BitWidth, Num + 1);
+      APInt Mask2 = IsTrailing
+        ? APInt::getOneBitSet(BitWidth, Num)
+        : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+      Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1));
+      Cmp.setOperand(1, ConstantInt::get(Ty, Mask2));
+      Worklist.Add(II);
+      return &Cmp;
+    }
     break;
+  }
 
   case Intrinsic::ctpop: {
     // popcount(A) == 0  ->  A == 0 and likewise for !=
     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
     bool IsZero = C.isNullValue();
-    if (IsZero || C == C.getBitWidth()) {
+    if (IsZero || C == BitWidth) {
       Worklist.Add(II);
       Cmp.setOperand(0, II->getArgOperand(0));
       auto *NewOp =
@@ -2870,15 +2925,25 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
 /// In this case, we are looking for comparisons that look like
 /// a check for a lossy truncation.
 /// Folds:
-///   x & (-1 >> y) SrcPred x    to    x DstPred (-1 >> y)
+///   icmp SrcPred (x & Mask), x    to    icmp DstPred x, Mask
+/// Where Mask is some pattern that produces all-ones in low bits:
+///    (-1 >> y)
+///    ((-1 << y) >> y)     <- non-canonical, has extra uses
+///   ~(-1 << y)
+///    ((1 << y) + (-1))    <- non-canonical, has extra uses
 /// The Mask can be a constant, too.
 /// For some predicates, the operands are commutative.
 /// For others, x can only be on a specific side.
 static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
                                           InstCombiner::BuilderTy &Builder) {
   ICmpInst::Predicate SrcPred;
-  Value *X, *M;
-  auto m_Mask = m_CombineOr(m_LShr(m_AllOnes(), m_Value()), m_LowBitMask());
+  Value *X, *M, *Y;
+  auto m_VariableMask = m_CombineOr(
+      m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())),
+                  m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
+      m_CombineOr(m_LShr(m_AllOnes(), m_Value()),
+                  m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
+  auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
   if (!match(&I, m_c_ICmp(SrcPred,
                           m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
                           m_Deferred(X))))
@@ -3042,6 +3107,18 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
     return nullptr;
 
   const CmpInst::Predicate Pred = I.getPredicate();
+  Value *X;
+
+  // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
+  // (Op1 + X) <u Op1 --> ~Op1 <u X
+  // Op0 >u (Op0 + X) --> X >u ~Op0
+  if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
+      Pred == ICmpInst::ICMP_ULT)
+    return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
+  if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
+      Pred == ICmpInst::ICMP_UGT)
+    return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
+
   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
   if (BO0 && isa<OverflowingBinaryOperator>(BO0))
     NoOp0WrapProblem =
@@ -4606,6 +4683,83 @@ static Instruction *canonicalizeICmpBool(ICmpInst &I,
   }
 }
 
+// Transform pattern like:
+//   (1 << Y) u<= X  or  ~(-1 << Y) u<  X  or  ((1 << Y)+(-1)) u<  X
+//   (1 << Y) u>  X  or  ~(-1 << Y) u>= X  or  ((1 << Y)+(-1)) u>= X
+// Into:
+//   (X l>> Y) != 0
+//   (X l>> Y) == 0
+static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp,
+                                            InstCombiner::BuilderTy &Builder) {
+  ICmpInst::Predicate Pred, NewPred;
+  Value *X, *Y;
+  if (match(&Cmp,
+            m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
+    // We want X to be the icmp's second operand, so swap predicate if it isn't.
+    if (Cmp.getOperand(0) == X)
+      Pred = Cmp.getSwappedPredicate();
+
+    switch (Pred) {
+    case ICmpInst::ICMP_ULE:
+      NewPred = ICmpInst::ICMP_NE;
+      break;
+    case ICmpInst::ICMP_UGT:
+      NewPred = ICmpInst::ICMP_EQ;
+      break;
+    default:
+      return nullptr;
+    }
+  } else if (match(&Cmp, m_c_ICmp(Pred,
+                                  m_OneUse(m_CombineOr(
+                                      m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
+                                      m_Add(m_Shl(m_One(), m_Value(Y)),
+                                            m_AllOnes()))),
+                                  m_Value(X)))) {
+    // The variant with 'add' is not canonical, (the variant with 'not' is)
+    // we only get it because it has extra uses, and can't be canonicalized,
+
+    // We want X to be the icmp's second operand, so swap predicate if it isn't.
+    if (Cmp.getOperand(0) == X)
+      Pred = Cmp.getSwappedPredicate();
+
+    switch (Pred) {
+    case ICmpInst::ICMP_ULT:
+      NewPred = ICmpInst::ICMP_NE;
+      break;
+    case ICmpInst::ICMP_UGE:
+      NewPred = ICmpInst::ICMP_EQ;
+      break;
+    default:
+      return nullptr;
+    }
+  } else
+    return nullptr;
+
+  Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
+  Constant *Zero = Constant::getNullValue(NewX->getType());
+  return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
+}
+
+static Instruction *foldVectorCmp(CmpInst &Cmp,
+                                  InstCombiner::BuilderTy &Builder) {
+  // If both arguments of the cmp are shuffles that use the same mask and
+  // shuffle within a single vector, move the shuffle after the cmp.
+  Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
+  Value *V1, *V2;
+  Constant *M;
+  if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) &&
+      match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) &&
+      V1->getType() == V2->getType() &&
+      (LHS->hasOneUse() || RHS->hasOneUse())) {
+    // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
+    CmpInst::Predicate P = Cmp.getPredicate();
+    Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2)
+                                       : Builder.CreateFCmp(P, V1, V2);
+    return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
+  }
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4653,6 +4807,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpWithConstant(I))
     return Res;
 
+  if (Instruction *Res = foldICmpWithDominatingICmp(I))
+    return Res;
+
   if (Instruction *Res = foldICmpUsingKnownBits(I))
     return Res;
 
@@ -4865,16 +5022,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           return ExtractValueInst::Create(ACXI, 1);
 
   {
-    Value *X; ConstantInt *Cst;
+    Value *X;
+    const APInt *C;
     // icmp X+Cst, X
-    if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
-      return foldICmpAddOpConst(X, Cst, I.getPredicate());
+    if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
+      return foldICmpAddOpConst(X, *C, I.getPredicate());
 
     // icmp X, X+Cst
-    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
-      return foldICmpAddOpConst(X, Cst, I.getSwappedPredicate());
+    if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
+      return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
   }
 
+  if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
+    return Res;
+
+  if (I.getType()->isVectorTy())
+    if (Instruction *Res = foldVectorCmp(I, Builder))
+      return Res;
+
   return Changed ? &I : nullptr;
 }
 
@@ -5117,6 +5282,117 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
   return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
 }
 
+/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary.
+static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
+                                              Constant *RHSC) {
+  // When C is not 0.0 and infinities are not allowed:
+  // (C / X) < 0.0 is a sign-bit test of X
+  // (C / X) < 0.0 --> X < 0.0 (if C is positive)
+  // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate)
+  //
+  // Proof:
+  // Multiply (C / X) < 0.0 by X * X / C.
+  // - X is non zero, if it is the flag 'ninf' is violated.
+  // - C defines the sign of X * X * C. Thus it also defines whether to swap
+  //   the predicate. C is also non zero by definition.
+  //
+  // Thus X * X / C is non zero and the transformation is valid. [qed]
+
+  FCmpInst::Predicate Pred = I.getPredicate();
+
+  // Check that predicates are valid.
+  if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
+      (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
+    return nullptr;
+
+  // Check that RHS operand is zero.
+  if (!match(RHSC, m_AnyZeroFP()))
+    return nullptr;
+
+  // Check fastmath flags ('ninf').
+  if (!LHSI->hasNoInfs() || !I.hasNoInfs())
+    return nullptr;
+
+  // Check the properties of the dividend. It must not be zero to avoid a
+  // division by zero (see Proof).
+  const APFloat *C;
+  if (!match(LHSI->getOperand(0), m_APFloat(C)))
+    return nullptr;
+
+  if (C->isZero())
+    return nullptr;
+
+  // Get swapped predicate if necessary.
+  if (C->isNegative())
+    Pred = I.getSwappedPredicate();
+
+  return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
+}
+
+/// Optimize fabs(X) compared with zero.
+static Instruction *foldFabsWithFcmpZero(FCmpInst &I) {
+  Value *X;
+  if (!match(I.getOperand(0), m_Intrinsic<Intrinsic::fabs>(m_Value(X))) ||
+      !match(I.getOperand(1), m_PosZeroFP()))
+    return nullptr;
+
+  auto replacePredAndOp0 = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
+    I->setPredicate(P);
+    I->setOperand(0, X);
+    return I;
+  };
+
+  switch (I.getPredicate()) {
+  case FCmpInst::FCMP_UGE:
+  case FCmpInst::FCMP_OLT:
+    // fabs(X) >= 0.0 --> true
+    // fabs(X) <  0.0 --> false
+    llvm_unreachable("fcmp should have simplified");
+
+  case FCmpInst::FCMP_OGT:
+    // fabs(X) > 0.0 --> X != 0.0
+    return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X);
+
+  case FCmpInst::FCMP_UGT:
+    // fabs(X) u> 0.0 --> X u!= 0.0
+    return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X);
+
+  case FCmpInst::FCMP_OLE:
+    // fabs(X) <= 0.0 --> X == 0.0
+    return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X);
+
+  case FCmpInst::FCMP_ULE:
+    // fabs(X) u<= 0.0 --> X u== 0.0
+    return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X);
+
+  case FCmpInst::FCMP_OGE:
+    // fabs(X) >= 0.0 --> !isnan(X)
+    assert(!I.hasNoNaNs() && "fcmp should have simplified");
+    return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X);
+
+  case FCmpInst::FCMP_ULT:
+    // fabs(X) u< 0.0 --> isnan(X)
+    assert(!I.hasNoNaNs() && "fcmp should have simplified");
+    return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X);
+
+  case FCmpInst::FCMP_OEQ:
+  case FCmpInst::FCMP_UEQ:
+  case FCmpInst::FCMP_ONE:
+  case FCmpInst::FCMP_UNE:
+  case FCmpInst::FCMP_ORD:
+  case FCmpInst::FCMP_UNO:
+    // Look through the fabs() because it doesn't change anything but the sign.
+    // fabs(X) == 0.0 --> X == 0.0,
+    // fabs(X) != 0.0 --> X != 0.0
+    // isnan(fabs(X)) --> isnan(X)
+    // !isnan(fabs(X) --> !isnan(X)
+    return replacePredAndOp0(&I, I.getPredicate(), X);
+
+  default:
+    return nullptr;
+  }
+}
+
 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   bool Changed = false;
 
@@ -5161,11 +5437,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
   // then canonicalize the operand to 0.0.
   if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
-    if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0)) {
+    if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) {
       I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
       return &I;
     }
-    if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1)) {
+    if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) {
       I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
       return &I;
     }
@@ -5186,128 +5462,93 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         return nullptr;
     }
 
-  // Handle fcmp with constant RHS
-  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
-    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      switch (LHSI->getOpcode()) {
-      case Instruction::FPExt: {
-        // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless
-        FPExtInst *LHSExt = cast<FPExtInst>(LHSI);
-        ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC);
-        if (!RHSF)
-          break;
-
-        const fltSemantics *Sem;
-        // FIXME: This shouldn't be here.
-        if (LHSExt->getSrcTy()->isHalfTy())
-          Sem = &APFloat::IEEEhalf();
-        else if (LHSExt->getSrcTy()->isFloatTy())
-          Sem = &APFloat::IEEEsingle();
-        else if (LHSExt->getSrcTy()->isDoubleTy())
-          Sem = &APFloat::IEEEdouble();
-        else if (LHSExt->getSrcTy()->isFP128Ty())
-          Sem = &APFloat::IEEEquad();
-        else if (LHSExt->getSrcTy()->isX86_FP80Ty())
-          Sem = &APFloat::x87DoubleExtended();
-        else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
-          Sem = &APFloat::PPCDoubleDouble();
-        else
-          break;
-
-        bool Lossy;
-        APFloat F = RHSF->getValueAPF();
-        F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
-
-        // Avoid lossy conversions and denormals. Zero is a special case
-        // that's OK to convert.
-        APFloat Fabs = F;
-        Fabs.clearSign();
-        if (!Lossy &&
-            ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
-                 APFloat::cmpLessThan) || Fabs.isZero()))
-
-          return new FCmpInst(Pred, LHSExt->getOperand(0),
-                              ConstantFP::get(RHSC->getContext(), F));
-        break;
-      }
-      case Instruction::PHI:
-        // Only fold fcmp into the PHI if the phi and fcmp are in the same
-        // block.  If in the same block, we're encouraging jump threading.  If
-        // not, we are just pessimizing the code by making an i1 phi.
-        if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
-            return NV;
-        break;
-      case Instruction::SIToFP:
-      case Instruction::UIToFP:
-        if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
+  // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
+  // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
+  if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) {
+    I.setOperand(1, ConstantFP::getNullValue(Op1->getType()));
+    return &I;
+  }
+
+  // Handle fcmp with instruction LHS and constant RHS.
+  Instruction *LHSI;
+  Constant *RHSC;
+  if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) {
+    switch (LHSI->getOpcode()) {
+    case Instruction::PHI:
+      // Only fold fcmp into the PHI if the phi and fcmp are in the same
+      // block.  If in the same block, we're encouraging jump threading.  If
+      // not, we are just pessimizing the code by making an i1 phi.
+      if (LHSI->getParent() == I.getParent())
+        if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
           return NV;
-        break;
-      case Instruction::FSub: {
-        // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
-        Value *Op;
-        if (match(LHSI, m_FNeg(m_Value(Op))))
-          return new FCmpInst(I.getSwappedPredicate(), Op,
-                              ConstantExpr::getFNeg(RHSC));
-        break;
-      }
-      case Instruction::Load:
-        if (GetElementPtrInst *GEP =
-            dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
-          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
-            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
-                !cast<LoadInst>(LHSI)->isVolatile())
-              if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
-                return Res;
-        }
-        break;
-      case Instruction::Call: {
-        if (!RHSC->isNullValue())
-          break;
+      break;
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+      if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
+        return NV;
+      break;
+    case Instruction::FDiv:
+      if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC))
+        return NV;
+      break;
+    case Instruction::Load:
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
+        if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+              !cast<LoadInst>(LHSI)->isVolatile())
+            if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
+              return Res;
+      break;
+  }
+  }
 
-        CallInst *CI = cast<CallInst>(LHSI);
-        Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI);
-        if (IID != Intrinsic::fabs)
-          break;
+  if (Instruction *R = foldFabsWithFcmpZero(I))
+    return R;
 
-        // Various optimization for fabs compared with zero.
-        switch (Pred) {
-        default:
-          break;
-        // fabs(x) < 0 --> false
-        case FCmpInst::FCMP_OLT:
-          llvm_unreachable("handled by SimplifyFCmpInst");
-        // fabs(x) > 0 --> x != 0
-        case FCmpInst::FCMP_OGT:
-          return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC);
-        // fabs(x) <= 0 --> x == 0
-        case FCmpInst::FCMP_OLE:
-          return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC);
-        // fabs(x) >= 0 --> !isnan(x)
-        case FCmpInst::FCMP_OGE:
-          return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC);
-        // fabs(x) == 0 --> x == 0
-        // fabs(x) != 0 --> x != 0
-        case FCmpInst::FCMP_OEQ:
-        case FCmpInst::FCMP_UEQ:
-        case FCmpInst::FCMP_ONE:
-        case FCmpInst::FCMP_UNE:
-          return new FCmpInst(Pred, CI->getArgOperand(0), RHSC);
-        }
-      }
+  Value *X, *Y;
+  if (match(Op0, m_FNeg(m_Value(X)))) {
+    // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
+    if (match(Op1, m_FNeg(m_Value(Y))))
+      return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+
+    // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
+    Constant *C;
+    if (match(Op1, m_Constant(C))) {
+      Constant *NegC = ConstantExpr::getFNeg(C);
+      return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I);
+    }
+  }
+
+  if (match(Op0, m_FPExt(m_Value(X)))) {
+    // fcmp (fpext X), (fpext Y) -> fcmp X, Y
+    if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
+      return new FCmpInst(Pred, X, Y, "", &I);
+
+    // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
+    const APFloat *C;
+    if (match(Op1, m_APFloat(C))) {
+      const fltSemantics &FPSem =
+          X->getType()->getScalarType()->getFltSemantics();
+      bool Lossy;
+      APFloat TruncC = *C;
+      TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
+
+      // Avoid lossy conversions and denormals.
+      // Zero is a special case that's OK to convert.
+      APFloat Fabs = TruncC;
+      Fabs.clearSign();
+      if (!Lossy &&
+          ((Fabs.compare(APFloat::getSmallestNormalized(FPSem)) !=
+            APFloat::cmpLessThan) || Fabs.isZero())) {
+        Constant *NewC = ConstantFP::get(X->getType(), TruncC);
+        return new FCmpInst(Pred, X, NewC, "", &I);
       }
+    }
   }
 
-  // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y
-  Value *X, *Y;
-  if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
-    return new FCmpInst(I.getSwappedPredicate(), X, Y);
-
-  // fcmp (fpext x), (fpext y) -> fcmp x, y
-  if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0))
-    if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1))
-      if (LHSExt->getSrcTy() == RHSExt->getSrcTy())
-        return new FCmpInst(Pred, LHSExt->getOperand(0), RHSExt->getOperand(0));
+  if (I.getType()->isVectorTy())
+    if (Instruction *Res = foldVectorCmp(I, Builder))
+      return Res;
 
   return Changed ? &I : nullptr;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 58ef3d41415c..2de41bd5bef5 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetFolder.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -33,6 +32,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
@@ -41,11 +41,14 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
 
 #define DEBUG_TYPE "instcombine"
 
+using namespace llvm::PatternMatch;
+
 namespace llvm {
 
 class APInt;
@@ -79,8 +82,8 @@ class User;
 ///   5 -> Other instructions
 static inline unsigned getComplexity(Value *V) {
   if (isa<Instruction>(V)) {
-    if (isa<CastInst>(V) || BinaryOperator::isNeg(V) ||
-        BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V))
+    if (isa<CastInst>(V) || match(V, m_Neg(m_Value())) ||
+        match(V, m_Not(m_Value())) || match(V, m_FNeg(m_Value())))
       return 4;
     return 5;
   }
@@ -138,7 +141,7 @@ static inline Constant *SubOne(Constant *C) {
 /// uses of V and only keep uses of ~V.
 static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
   // ~(~(X)) -> X.
-  if (BinaryOperator::isNot(V))
+  if (match(V, m_Not(m_Value())))
     return true;
 
   // Constants can be considered to be not'ed values.
@@ -175,6 +178,10 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
       if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
         return WillInvertAllUses;
 
+  // Selects with invertible operands are freely invertible
+  if (match(V, m_Select(m_Value(), m_Not(m_Value()), m_Not(m_Value()))))
+    return WillInvertAllUses;
+
   return false;
 }
 
@@ -496,6 +503,12 @@ private:
            OverflowResult::NeverOverflows;
   }
 
+  bool willNotOverflowAdd(const Value *LHS, const Value *RHS,
+                          const Instruction &CxtI, bool IsSigned) const {
+    return IsSigned ? willNotOverflowSignedAdd(LHS, RHS, CxtI)
+                    : willNotOverflowUnsignedAdd(LHS, RHS, CxtI);
+  }
+
   bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS,
                                 const Instruction &CxtI) const {
     return computeOverflowForSignedSub(LHS, RHS, &CxtI) ==
@@ -508,6 +521,12 @@ private:
            OverflowResult::NeverOverflows;
   }
 
+  bool willNotOverflowSub(const Value *LHS, const Value *RHS,
+                          const Instruction &CxtI, bool IsSigned) const {
+    return IsSigned ? willNotOverflowSignedSub(LHS, RHS, CxtI)
+                    : willNotOverflowUnsignedSub(LHS, RHS, CxtI);
+  }
+
   bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS,
                                 const Instruction &CxtI) const {
     return computeOverflowForSignedMul(LHS, RHS, &CxtI) ==
@@ -520,12 +539,29 @@ private:
            OverflowResult::NeverOverflows;
   }
 
+  bool willNotOverflowMul(const Value *LHS, const Value *RHS,
+                          const Instruction &CxtI, bool IsSigned) const {
+    return IsSigned ? willNotOverflowSignedMul(LHS, RHS, CxtI)
+                    : willNotOverflowUnsignedMul(LHS, RHS, CxtI);
+  }
+
+  bool willNotOverflow(BinaryOperator::BinaryOps Opcode, const Value *LHS,
+                       const Value *RHS, const Instruction &CxtI,
+                       bool IsSigned) const {
+    switch (Opcode) {
+    case Instruction::Add: return willNotOverflowAdd(LHS, RHS, CxtI, IsSigned);
+    case Instruction::Sub: return willNotOverflowSub(LHS, RHS, CxtI, IsSigned);
+    case Instruction::Mul: return willNotOverflowMul(LHS, RHS, CxtI, IsSigned);
+    default: llvm_unreachable("Unexpected opcode for overflow query");
+    }
+  }
+
   Value *EmitGEPOffset(User *GEP);
   Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
-  Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
   Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
   Instruction *narrowBinOp(TruncInst &Trunc);
   Instruction *narrowMaskedBinOp(BinaryOperator &And);
+  Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
   Instruction *narrowRotate(TruncInst &Trunc);
   Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
 
@@ -553,6 +589,9 @@ private:
 
   Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
                                        bool JoinedByAnd, Instruction &CxtI);
+  Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D);
+  Value *getSelectCondition(Value *A, Value *B);
+
 public:
   /// Inserts an instruction \p New before instruction \p Old
   ///
@@ -763,13 +802,14 @@ private:
 
   Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
                                                APInt DemandedElts,
-                                               int DmaskIdx = -1);
+                                               int DmaskIdx = -1,
+                                               int TFCIdx = -1);
 
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                     APInt &UndefElts, unsigned Depth = 0);
 
   /// Canonicalize the position of binops relative to shufflevector.
-  Instruction *foldShuffledBinop(BinaryOperator &Inst);
+  Instruction *foldVectorBinop(BinaryOperator &Inst);
 
   /// Given a binary operator, cast instruction, or select which has a PHI node
   /// as operand #0, see if we can fold the instruction into the PHI (which is
@@ -813,11 +853,12 @@ private:
                                             ConstantInt *AndCst = nullptr);
   Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
                                     Constant *RHSC);
-  Instruction *foldICmpAddOpConst(Value *X, ConstantInt *CI,
+  Instruction *foldICmpAddOpConst(Value *X, const APInt &C,
                                   ICmpInst::Predicate Pred);
   Instruction *foldICmpWithCastAndCast(ICmpInst &ICI);
 
   Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
+  Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
   Instruction *foldICmpWithConstant(ICmpInst &Cmp);
   Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
   Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
@@ -880,8 +921,11 @@ private:
   Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
                          bool isSigned, bool Inside);
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
-  Instruction *MatchBSwap(BinaryOperator &I);
-  bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+  bool mergeStoreIntoSuccessor(StoreInst &SI);
+
+  /// Given an 'or' instruction, check to see if it is part of a bswap idiom.
+  /// If so, return the equivalent bswap intrinsic.
+  Instruction *matchBSwap(BinaryOperator &Or);
 
   Instruction *SimplifyAnyMemTransfer(AnyMemTransferInst *MI);
   Instruction *SimplifyAnyMemSet(AnyMemSetInst *MI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 62769f077b47..76ab614090fa 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
@@ -115,13 +116,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
       }
 
       // Lifetime intrinsics can be handled by the caller.
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-            II->getIntrinsicID() == Intrinsic::lifetime_end) {
-          assert(II->use_empty() && "Lifetime markers have no result to use!");
-          ToDelete.push_back(II);
-          continue;
-        }
+      if (I->isLifetimeStartOrEnd()) {
+        assert(I->use_empty() && "Lifetime markers have no result to use!");
+        ToDelete.push_back(I);
+        continue;
       }
 
       // If this is isn't our memcpy/memmove, reject it as something we can't
@@ -197,30 +195,32 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
 
   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
-    Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-    AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
-    New->setAlignment(AI.getAlignment());
-
-    // Scan to the end of the allocation instructions, to skip over a block of
-    // allocas if possible...also skip interleaved debug info
-    //
-    BasicBlock::iterator It(New);
-    while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
-      ++It;
-
-    // Now that I is pointing to the first non-allocation-inst in the block,
-    // insert our getelementptr instruction...
-    //
-    Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
-    Value *NullIdx = Constant::getNullValue(IdxTy);
-    Value *Idx[2] = {NullIdx, NullIdx};
-    Instruction *GEP =
-        GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
-    IC.InsertNewInstBefore(GEP, *It);
-
-    // Now make everything use the getelementptr instead of the original
-    // allocation.
-    return IC.replaceInstUsesWith(AI, GEP);
+    if (C->getValue().getActiveBits() <= 64) {
+      Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+      AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
+      New->setAlignment(AI.getAlignment());
+
+      // Scan to the end of the allocation instructions, to skip over a block of
+      // allocas if possible...also skip interleaved debug info
+      //
+      BasicBlock::iterator It(New);
+      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+        ++It;
+
+      // Now that I is pointing to the first non-allocation-inst in the block,
+      // insert our getelementptr instruction...
+      //
+      Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+      Value *NullIdx = Constant::getNullValue(IdxTy);
+      Value *Idx[2] = {NullIdx, NullIdx};
+      Instruction *GEP =
+          GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
+      IC.InsertNewInstBefore(GEP, *It);
+
+      // Now make everything use the getelementptr instead of the original
+      // allocation.
+      return IC.replaceInstUsesWith(AI, GEP);
+    }
   }
 
   if (isa<UndefValue>(AI.getArraySize()))
@@ -490,6 +490,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
     case LLVMContext::MD_noalias:
     case LLVMContext::MD_nontemporal:
     case LLVMContext::MD_mem_parallel_loop_access:
+    case LLVMContext::MD_access_group:
       // All of these directly apply.
       NewLoad->setMetadata(ID, N);
       break;
@@ -549,10 +550,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
     case LLVMContext::MD_noalias:
     case LLVMContext::MD_nontemporal:
     case LLVMContext::MD_mem_parallel_loop_access:
+    case LLVMContext::MD_access_group:
       // All of these directly apply.
       NewStore->setMetadata(ID, N);
       break;
-
     case LLVMContext::MD_invariant_load:
     case LLVMContext::MD_nonnull:
     case LLVMContext::MD_range:
@@ -1024,7 +1025,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   if (Value *AvailableVal = FindAvailableLoadedValue(
           &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
     if (IsLoadCSE)
-      combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
+      combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
 
     return replaceInstUsesWith(
         LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
@@ -1496,64 +1497,45 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (isa<UndefValue>(Val))
     return eraseInstFromFunction(SI);
 
-  // If this store is the last instruction in the basic block (possibly
-  // excepting debug info instructions), and if the block ends with an
-  // unconditional branch, try to move it to the successor block.
+  // If this store is the second-to-last instruction in the basic block
+  // (excluding debug info and bitcasts of pointers) and if the block ends with
+  // an unconditional branch, try to move the store to the successor block.
   BBI = SI.getIterator();
   do {
     ++BBI;
   } while (isa<DbgInfoIntrinsic>(BBI) ||
            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+
   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
     if (BI->isUnconditional())
-      if (SimplifyStoreAtEndOfBlock(SI))
-        return nullptr;  // xform done!
+      mergeStoreIntoSuccessor(SI);
 
   return nullptr;
 }
 
-/// SimplifyStoreAtEndOfBlock - Turn things like:
+/// Try to transform:
 ///   if () { *P = v1; } else { *P = v2 }
-/// into a phi node with a store in the successor.
-///
-/// Simplify things like:
+/// or:
 ///   *P = v1; if () { *P = v2; }
 /// into a phi node with a store in the successor.
-///
-bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
   assert(SI.isUnordered() &&
-         "this code has not been auditted for volatile or ordered store case");
+         "This code has not been audited for volatile or ordered store case.");
 
+  // Check if the successor block has exactly 2 incoming edges.
   BasicBlock *StoreBB = SI.getParent();
-
-  // Check to see if the successor block has exactly two incoming edges.  If
-  // so, see if the other predecessor contains a store to the same location.
-  // if so, insert a PHI node (if needed) and move the stores down.
   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
-  // Determine whether Dest has exactly two predecessors and, if so, compute
-  // the other predecessor.
-  pred_iterator PI = pred_begin(DestBB);
-  BasicBlock *P = *PI;
-  BasicBlock *OtherBB = nullptr;
-
-  if (P != StoreBB)
-    OtherBB = P;
-
-  if (++PI == pred_end(DestBB))
+  if (!DestBB->hasNPredecessors(2))
     return false;
 
-  P = *PI;
-  if (P != StoreBB) {
-    if (OtherBB)
-      return false;
-    OtherBB = P;
-  }
-  if (++PI != pred_end(DestBB))
-    return false;
+  // Capture the other block (the block that doesn't contain our store).
+  pred_iterator PredIter = pred_begin(DestBB);
+  if (*PredIter == StoreBB)
+    ++PredIter;
+  BasicBlock *OtherBB = *PredIter;
 
-  // Bail out if all the relevant blocks aren't distinct (this can happen,
-  // for example, if SI is in an infinite loop)
+  // Bail out if all of the relevant blocks aren't distinct. This can happen,
+  // for example, if SI is in an infinite loop.
   if (StoreBB == DestBB || OtherBB == DestBB)
     return false;
 
@@ -1564,7 +1546,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     return false;
 
   // If the other block ends in an unconditional branch, check for the 'if then
-  // else' case.  there is an instruction before the branch.
+  // else' case. There is an instruction before the branch.
   StoreInst *OtherStore = nullptr;
   if (OtherBr->isUnconditional()) {
     --BBI;
@@ -1589,7 +1571,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
       return false;
 
     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
-    // if/then triangle.  See if there is a store to the same ptr as SI that
+    // if/then triangle. See if there is a store to the same ptr as SI that
     // lives in OtherBB.
     for (;; --BBI) {
       // Check to see if we find the matching store.
@@ -1600,15 +1582,14 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
         break;
       }
       // If we find something that may be using or overwriting the stored
-      // value, or if we run out of instructions, we can't do the xform.
+      // value, or if we run out of instructions, we can't do the transform.
       if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
           BBI->mayWriteToMemory() || BBI == OtherBB->begin())
         return false;
     }
 
-    // In order to eliminate the store in OtherBr, we have to
-    // make sure nothing reads or overwrites the stored value in
-    // StoreBB.
+    // In order to eliminate the store in OtherBr, we have to make sure nothing
+    // reads or overwrites the stored value in StoreBB.
     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
       // FIXME: This should really be AA driven.
       if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
@@ -1618,24 +1599,24 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
 
   // Insert a PHI node now if we need it.
   Value *MergedVal = OtherStore->getOperand(0);
+  // The debug locations of the original instructions might differ. Merge them.
+  DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
+                                                     OtherStore->getDebugLoc());
   if (MergedVal != SI.getOperand(0)) {
     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
     PN->addIncoming(SI.getOperand(0), SI.getParent());
     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
     MergedVal = InsertNewInstBefore(PN, DestBB->front());
+    PN->setDebugLoc(MergedLoc);
   }
 
-  // Advance to a place where it is safe to insert the new store and
-  // insert it.
+  // Advance to a place where it is safe to insert the new store and insert it.
   BBI = DestBB->getFirstInsertionPt();
   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
-                                   SI.isVolatile(),
-                                   SI.getAlignment(),
-                                   SI.getOrdering(),
-                                   SI.getSyncScopeID());
+                                   SI.isVolatile(), SI.getAlignment(),
+                                   SI.getOrdering(), SI.getSyncScopeID());
   InsertNewInstBefore(NewSI, *BBI);
-  // The debug locations of the original instructions might differ; merge them.
-  NewSI->applyMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
+  NewSI->setDebugLoc(MergedLoc);
 
   // If the two stores had AA tags, merge them.
   AAMDNodes AATags;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 63761d427235..7e99f3e4e500 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -133,7 +133,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -171,14 +171,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
       // Replace X*(2^C) with X << C, where C is either a scalar or a vector.
       if (Constant *NewCst = getLogBase2(NewOp->getType(), C1)) {
-        unsigned Width = NewCst->getType()->getPrimitiveSizeInBits();
         BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
 
         if (I.hasNoUnsignedWrap())
           Shl->setHasNoUnsignedWrap();
         if (I.hasNoSignedWrap()) {
           const APInt *V;
-          if (match(NewCst, m_APInt(V)) && *V != Width - 1)
+          if (match(NewCst, m_APInt(V)) && *V != V->getBitWidth() - 1)
             Shl->setHasNoSignedWrap();
         }
 
@@ -245,6 +244,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     return NewMul;
   }
 
+  // -X * Y --> -(X * Y)
+  // X * -Y --> -(X * Y)
+  if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))
+    return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y));
+
   // (X / Y) *  Y = X - (X % Y)
   // (X / Y) * -Y = (X % Y) - X
   {
@@ -323,77 +327,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1)
     return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0);
 
-  // Check for (mul (sext x), y), see if we can merge this into an
-  // integer mul followed by a sext.
-  if (SExtInst *Op0Conv = dyn_cast<SExtInst>(Op0)) {
-    // (mul (sext x), cst) --> (sext (mul x, cst'))
-    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
-      if (Op0Conv->hasOneUse()) {
-        Constant *CI =
-            ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
-        if (ConstantExpr::getSExt(CI, I.getType()) == Op1C &&
-            willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
-          // Insert the new, smaller mul.
-          Value *NewMul =
-              Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
-          return new SExtInst(NewMul, I.getType());
-        }
-      }
-    }
-
-    // (mul (sext x), (sext y)) --> (sext (mul int x, y))
-    if (SExtInst *Op1Conv = dyn_cast<SExtInst>(Op1)) {
-      // Only do this if x/y have the same type, if at last one of them has a
-      // single use (so we don't increase the number of sexts), and if the
-      // integer mul will not overflow.
-      if (Op0Conv->getOperand(0)->getType() ==
-              Op1Conv->getOperand(0)->getType() &&
-          (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
-          willNotOverflowSignedMul(Op0Conv->getOperand(0),
-                                   Op1Conv->getOperand(0), I)) {
-        // Insert the new integer mul.
-        Value *NewMul = Builder.CreateNSWMul(
-            Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
-        return new SExtInst(NewMul, I.getType());
-      }
-    }
-  }
-
-  // Check for (mul (zext x), y), see if we can merge this into an
-  // integer mul followed by a zext.
-  if (auto *Op0Conv = dyn_cast<ZExtInst>(Op0)) {
-    // (mul (zext x), cst) --> (zext (mul x, cst'))
-    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
-      if (Op0Conv->hasOneUse()) {
-        Constant *CI =
-            ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
-        if (ConstantExpr::getZExt(CI, I.getType()) == Op1C &&
-            willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) {
-          // Insert the new, smaller mul.
-          Value *NewMul =
-              Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
-          return new ZExtInst(NewMul, I.getType());
-        }
-      }
-    }
-
-    // (mul (zext x), (zext y)) --> (zext (mul int x, y))
-    if (auto *Op1Conv = dyn_cast<ZExtInst>(Op1)) {
-      // Only do this if x/y have the same type, if at last one of them has a
-      // single use (so we don't increase the number of zexts), and if the
-      // integer mul will not overflow.
-      if (Op0Conv->getOperand(0)->getType() ==
-              Op1Conv->getOperand(0)->getType() &&
-          (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
-          willNotOverflowUnsignedMul(Op0Conv->getOperand(0),
-                                     Op1Conv->getOperand(0), I)) {
-        // Insert the new integer mul.
-        Value *NewMul = Builder.CreateNUWMul(
-            Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
-        return new ZExtInst(NewMul, I.getType());
-      }
-    }
-  }
+  if (Instruction *Ext = narrowMathIfNoOverflow(I))
+    return Ext;
 
   bool Changed = false;
   if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) {
@@ -418,7 +353,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   if (SimplifyAssociativeOrCommutative(I))
     return &I;
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
@@ -503,7 +438,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
         match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
       Value *XY = Builder.CreateFMulFMF(X, Y, &I);
-      Value *Sqrt = Builder.CreateIntrinsic(Intrinsic::sqrt, { XY }, &I);
+      Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
       return replaceInstUsesWith(I, Sqrt);
     }
 
@@ -933,7 +868,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // Handle the integer div common cases
@@ -1027,7 +962,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // Handle the integer div common cases
@@ -1175,7 +1110,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *R = foldFDivConstantDivisor(I))
@@ -1227,7 +1162,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
       IRBuilder<>::FastMathFlagGuard FMFGuard(B);
       B.setFastMathFlags(I.getFastMathFlags());
       AttributeList Attrs = CallSite(Op0).getCalledFunction()->getAttributes();
-      Value *Res = emitUnaryFloatFnCall(X, TLI.getName(LibFunc_tan), B, Attrs);
+      Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
+                                        LibFunc_tanl, B, Attrs);
       if (IsCot)
         Res = B.CreateFDiv(ConstantFP::get(I.getType(), 1.0), Res);
       return replaceInstUsesWith(I, Res);
@@ -1304,7 +1240,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *common = commonIRemTransforms(I))
@@ -1351,7 +1287,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   // Handle the integer rem common cases
@@ -1425,7 +1361,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   return nullptr;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index e54a1dd05a24..7603cf4d7958 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -211,20 +211,20 @@ Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) {
   }
 
   // If it requires a conversion for every PHI operand, do not do it.
-  if (std::all_of(AvailablePtrVals.begin(), AvailablePtrVals.end(),
-                  [&](Value *V) {
-                    return (V->getType() != IntToPtr->getType()) ||
-                           isa<IntToPtrInst>(V);
-                  }))
+  if (all_of(AvailablePtrVals, [&](Value *V) {
+        return (V->getType() != IntToPtr->getType()) || isa<IntToPtrInst>(V);
+      }))
     return nullptr;
 
   // If any of the operand that requires casting is a terminator
   // instruction, do not do it.
-  if (std::any_of(AvailablePtrVals.begin(), AvailablePtrVals.end(),
-                  [&](Value *V) {
-                    return (V->getType() != IntToPtr->getType()) &&
-                           isa<TerminatorInst>(V);
-                  }))
+  if (any_of(AvailablePtrVals, [&](Value *V) {
+        if (V->getType() == IntToPtr->getType())
+          return false;
+
+        auto *Inst = dyn_cast<Instruction>(V);
+        return Inst && Inst->isTerminator();
+      }))
     return nullptr;
 
   PHINode *NewPtrPHI = PHINode::Create(
@@ -608,6 +608,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     LLVMContext::MD_align,
     LLVMContext::MD_dereferenceable,
     LLVMContext::MD_dereferenceable_or_null,
+    LLVMContext::MD_access_group,
   };
 
   for (unsigned ID : KnownIDs)
@@ -616,7 +617,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   // Add all operands to the new PHI and combine TBAA metadata.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
-    combineMetadata(NewLI, LI, KnownIDs);
+    combineMetadata(NewLI, LI, KnownIDs, true);
     Value *NewInVal = LI->getOperand(0);
     if (NewInVal != InVal)
       InVal = nullptr;
@@ -649,7 +650,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
 Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
   // We cannot create a new instruction after the PHI if the terminator is an
   // EHPad because there is no valid insertion point.
-  if (TerminatorInst *TI = Phi.getParent()->getTerminator())
+  if (Instruction *TI = Phi.getParent()->getTerminator())
     if (TI->isEHPad())
       return nullptr;
 
@@ -723,7 +724,7 @@ Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
 Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   // We cannot create a new instruction after the PHI if the terminator is an
   // EHPad because there is no valid insertion point.
-  if (TerminatorInst *TI = PN.getParent()->getTerminator())
+  if (Instruction *TI = PN.getParent()->getTerminator())
     if (TI->isEHPad())
       return nullptr;
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 796b4021d273..faf58a08976d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -54,34 +54,62 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,
   return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
 }
 
-/// Fold
-///   %A = icmp eq/ne i8 %x, 0
-///   %B = op i8 %x, %z
-///   %C = select i1 %A, i8 %B, i8 %y
-/// To
-///   %C = select i1 %A, i8 %z, i8 %y
-/// OP: binop with an identity constant
-/// TODO: support for non-commutative and FP opcodes
-static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) {
-
-  Value *Cond = Sel.getCondition();
-  Value *X, *Z;
+/// Replace a select operand based on an equality comparison with the identity
+/// constant of a binop.
+static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
+                                            const TargetLibraryInfo &TLI) {
+  // The select condition must be an equality compare with a constant operand.
+  Value *X;
   Constant *C;
   CmpInst::Predicate Pred;
-  if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) ||
-      !ICmpInst::isEquality(Pred))
+  if (!match(Sel.getCondition(), m_Cmp(Pred, m_Value(X), m_Constant(C))))
     return nullptr;
 
-  bool IsEq = Pred == ICmpInst::ICMP_EQ;
-  auto *BO =
-      dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue());
-  // TODO: support for undefs
-  if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) &&
-      ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) {
-    Sel.setOperand(IsEq ? 1 : 2, Z);
-    return &Sel;
+  bool IsEq;
+  if (ICmpInst::isEquality(Pred))
+    IsEq = Pred == ICmpInst::ICMP_EQ;
+  else if (Pred == FCmpInst::FCMP_OEQ)
+    IsEq = true;
+  else if (Pred == FCmpInst::FCMP_UNE)
+    IsEq = false;
+  else
+    return nullptr;
+
+  // A select operand must be a binop.
+  BinaryOperator *BO;
+  if (!match(Sel.getOperand(IsEq ? 1 : 2), m_BinOp(BO)))
+    return nullptr;
+
+  // The compare constant must be the identity constant for that binop.
+  // If this a floating-point compare with 0.0, any zero constant will do.
+  Type *Ty = BO->getType();
+  Constant *IdC = ConstantExpr::getBinOpIdentity(BO->getOpcode(), Ty, true);
+  if (IdC != C) {
+    if (!IdC || !CmpInst::isFPPredicate(Pred))
+      return nullptr;
+    if (!match(IdC, m_AnyZeroFP()) || !match(C, m_AnyZeroFP()))
+      return nullptr;
   }
-  return nullptr;
+
+  // Last, match the compare variable operand with a binop operand.
+  Value *Y;
+  if (!BO->isCommutative() && !match(BO, m_BinOp(m_Value(Y), m_Specific(X))))
+    return nullptr;
+  if (!match(BO, m_c_BinOp(m_Value(Y), m_Specific(X))))
+    return nullptr;
+
+  // +0.0 compares equal to -0.0, and so it does not behave as required for this
+  // transform. Bail out if we can not exclude that possibility.
+  if (isa<FPMathOperator>(BO))
+    if (!BO->hasNoSignedZeros() && !CannotBeNegativeZero(Y, &TLI))
+      return nullptr;
+
+  // BO = binop Y, X
+  // S = { select (cmp eq X, C), BO, ? } or { select (cmp ne X, C), ?, BO }
+  // =>
+  // S = { select (cmp eq X, C),  Y, ? } or { select (cmp ne X, C), ?,  Y }
+  Sel.setOperand(IsEq ? 1 : 2, Y);
+  return &Sel;
 }
 
 /// This folds:
@@ -343,13 +371,24 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
     return nullptr;
   }
 
+  // If the select condition is a vector, the operands of the original select's
+  // operands also must be vectors. This may not be the case for getelementptr
+  // for example.
+  if (SI.getCondition()->getType()->isVectorTy() &&
+      (!OtherOpT->getType()->isVectorTy() ||
+       !OtherOpF->getType()->isVectorTy()))
+    return nullptr;
+
   // If we reach here, they do have operations in common.
   Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
                                       SI.getName() + ".v", &SI);
   Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
   Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
   if (auto *BO = dyn_cast<BinaryOperator>(TI)) {
-    return BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
+    BinaryOperator *NewBO = BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
+    NewBO->copyIRFlags(TI);
+    NewBO->andIRFlags(FI);
+    return NewBO;
   }
   if (auto *TGEP = dyn_cast<GetElementPtrInst>(TI)) {
     auto *FGEP = cast<GetElementPtrInst>(FI);
@@ -670,17 +709,18 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
       match(Count, m_Trunc(m_Value(V))))
     Count = V;
 
+  // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+  // input to the cttz/ctlz is used as LHS for the compare instruction.
+  if (!match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) &&
+      !match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS))))
+    return nullptr;
+
+  IntrinsicInst *II = cast<IntrinsicInst>(Count);
+
   // Check if the value propagated on zero is a constant number equal to the
   // sizeof in bits of 'Count'.
   unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
-  if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
-    return nullptr;
-
-  // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
-  // input to the cttz/ctlz is used as LHS for the compare instruction.
-  if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
-      match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
-    IntrinsicInst *II = cast<IntrinsicInst>(Count);
+  if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) {
     // Explicitly clear the 'undef_on_zero' flag.
     IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
     NewI->setArgOperand(1, ConstantInt::getFalse(NewI->getContext()));
@@ -688,6 +728,12 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
     return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
   }
 
+  // If the ValueOnZero is not the bitwidth, we can at least make use of the
+  // fact that the cttz/ctlz result will not be used if the input is zero, so
+  // it's okay to relax it to undef for that case.
+  if (II->hasOneUse() && !match(II->getArgOperand(1), m_One()))
+    II->setArgOperand(1, ConstantInt::getTrue(II->getContext()));
+
   return nullptr;
 }
 
@@ -1054,11 +1100,13 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
   if (C == A || C == B) {
     // MAX(MAX(A, B), B) -> MAX(A, B)
     // MIN(MIN(a, b), a) -> MIN(a, b)
+    // TODO: This could be done in instsimplify.
     if (SPF1 == SPF2 && SelectPatternResult::isMinOrMax(SPF1))
       return replaceInstUsesWith(Outer, Inner);
 
     // MAX(MIN(a, b), a) -> a
     // MIN(MAX(a, b), a) -> a
+    // TODO: This could be done in instsimplify.
     if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
         (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
         (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
@@ -1071,6 +1119,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
     if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) {
       // MIN(MIN(A, 23), 97) -> MIN(A, 23)
       // MAX(MAX(A, 97), 23) -> MAX(A, 97)
+      // TODO: This could be done in instsimplify.
       if ((SPF1 == SPF_UMIN && CB->ule(*CC)) ||
           (SPF1 == SPF_SMIN && CB->sle(*CC)) ||
           (SPF1 == SPF_UMAX && CB->uge(*CC)) ||
@@ -1091,6 +1140,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
 
   // ABS(ABS(X)) -> ABS(X)
   // NABS(NABS(X)) -> NABS(X)
+  // TODO: This could be done in instsimplify.
   if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) {
     return replaceInstUsesWith(Outer, Inner);
   }
@@ -1503,6 +1553,60 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
   return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp);
 }
 
+/// Try to reduce a rotate pattern that includes a compare and select into a
+/// funnel shift intrinsic. Example:
+/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
+///              --> call llvm.fshl.i32(a, a, b)
+static Instruction *foldSelectRotate(SelectInst &Sel) {
+  // The false value of the select must be a rotate of the true value.
+  Value *Or0, *Or1;
+  if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
+    return nullptr;
+
+  Value *TVal = Sel.getTrueValue();
+  Value *SA0, *SA1;
+  if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA0)))) ||
+      !match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA1)))))
+    return nullptr;
+
+  auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+  auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+  if (ShiftOpcode0 == ShiftOpcode1)
+    return nullptr;
+
+  // We have one of these patterns so far:
+  // select ?, TVal, (or (lshr TVal, SA0), (shl TVal, SA1))
+  // select ?, TVal, (or (shl TVal, SA0), (lshr TVal, SA1))
+  // This must be a power-of-2 rotate for a bitmasking transform to be valid.
+  unsigned Width = Sel.getType()->getScalarSizeInBits();
+  if (!isPowerOf2_32(Width))
+    return nullptr;
+
+  // Check the shift amounts to see if they are an opposite pair.
+  Value *ShAmt;
+  if (match(SA1, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA0)))))
+    ShAmt = SA0;
+  else if (match(SA0, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA1)))))
+    ShAmt = SA1;
+  else
+    return nullptr;
+
+  // Finally, see if the select is filtering out a shift-by-zero.
+  Value *Cond = Sel.getCondition();
+  ICmpInst::Predicate Pred;
+  if (!match(Cond, m_OneUse(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()))) ||
+      Pred != ICmpInst::ICMP_EQ)
+    return nullptr;
+
+  // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
+  // Convert to funnel shift intrinsic.
+  bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
+  return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
+}
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -1617,31 +1721,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   // See if we are selecting two values based on a comparison of the two values.
   if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
     if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
-      // Transform (X == Y) ? X : Y  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
-        // This is not safe in general for floating point:
-        // consider X== -0, Y== +0.
-        // It becomes safe if either operand is a nonzero constant.
-        ConstantFP *CFPt, *CFPf;
-        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
-              !CFPt->getValueAPF().isZero()) ||
-            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
-             !CFPf->getValueAPF().isZero()))
-        return replaceInstUsesWith(SI, FalseVal);
-      }
-      // Transform (X une Y) ? X : Y  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
-        // This is not safe in general for floating point:
-        // consider X== -0, Y== +0.
-        // It becomes safe if either operand is a nonzero constant.
-        ConstantFP *CFPt, *CFPf;
-        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
-              !CFPt->getValueAPF().isZero()) ||
-            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
-             !CFPf->getValueAPF().isZero()))
-        return replaceInstUsesWith(SI, TrueVal);
-      }
-
       // Canonicalize to use ordered comparisons by swapping the select
       // operands.
       //
@@ -1660,31 +1739,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
       // NOTE: if we wanted to, this is where to detect MIN/MAX
     } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
-      // Transform (X == Y) ? Y : X  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
-        // This is not safe in general for floating point:
-        // consider X== -0, Y== +0.
-        // It becomes safe if either operand is a nonzero constant.
-        ConstantFP *CFPt, *CFPf;
-        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
-              !CFPt->getValueAPF().isZero()) ||
-            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
-             !CFPf->getValueAPF().isZero()))
-          return replaceInstUsesWith(SI, FalseVal);
-      }
-      // Transform (X une Y) ? Y : X  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
-        // This is not safe in general for floating point:
-        // consider X== -0, Y== +0.
-        // It becomes safe if either operand is a nonzero constant.
-        ConstantFP *CFPt, *CFPf;
-        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
-              !CFPt->getValueAPF().isZero()) ||
-            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
-             !CFPf->getValueAPF().isZero()))
-          return replaceInstUsesWith(SI, TrueVal);
-      }
-
       // Canonicalize to use ordered comparisons by swapping the select
       // operands.
       //
@@ -1717,7 +1771,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
            match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) ||
           (X == TrueVal && Pred == FCmpInst::FCMP_OGT &&
            match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(X))))) {
-        Value *Fabs = Builder.CreateIntrinsic(Intrinsic::fabs, { X }, FCI);
+        Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
         return replaceInstUsesWith(SI, Fabs);
       }
       // With nsz:
@@ -1730,7 +1784,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE)) ||
            (X == TrueVal && match(FalseVal, m_FNeg(m_Specific(X))) &&
             (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE)))) {
-        Value *Fabs = Builder.CreateIntrinsic(Intrinsic::fabs, { X }, FCI);
+        Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
         return replaceInstUsesWith(SI, Fabs);
       }
     }
@@ -1759,10 +1813,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     if (Instruction *FoldI = foldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
 
-    Value *LHS, *RHS, *LHS2, *RHS2;
+    Value *LHS, *RHS;
     Instruction::CastOps CastOp;
     SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
     auto SPF = SPR.Flavor;
+    if (SPF) {
+      Value *LHS2, *RHS2;
+      if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
+        if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS), SPF2, LHS2,
+                                          RHS2, SI, SPF, RHS))
+          return R;
+      if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
+        if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS), SPF2, LHS2,
+                                          RHS2, SI, SPF, LHS))
+          return R;
+      // TODO.
+      // ABS(-X) -> ABS(X)
+    }
 
     if (SelectPatternResult::isMinOrMax(SPF)) {
       // Canonicalize so that
@@ -1797,39 +1864,40 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
       // MAX(~a, ~b) -> ~MIN(a, b)
+      // MAX(~a, C)  -> ~MIN(a, ~C)
       // MIN(~a, ~b) -> ~MAX(a, b)
-      Value *A, *B;
-      if (match(LHS, m_Not(m_Value(A))) && match(RHS, m_Not(m_Value(B))) &&
-          (LHS->getNumUses() <= 2 || RHS->getNumUses() <= 2)) {
-        CmpInst::Predicate InvertedPred = getInverseMinMaxPred(SPF);
-        Value *InvertedCmp = Builder.CreateICmp(InvertedPred, A, B);
-        Value *NewSel = Builder.CreateSelect(InvertedCmp, A, B);
-        return BinaryOperator::CreateNot(NewSel);
-      }
+      // MIN(~a, C)  -> ~MAX(a, ~C)
+      auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
+        Value *A;
+        if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) &&
+            !IsFreeToInvert(A, A->hasOneUse()) &&
+            // Passing false to only consider m_Not and constants.
+            IsFreeToInvert(Y, false)) {
+          Value *B = Builder.CreateNot(Y);
+          Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF),
+                                          A, B);
+          // Copy the profile metadata.
+          if (MDNode *MD = SI.getMetadata(LLVMContext::MD_prof)) {
+            cast<SelectInst>(NewMinMax)->setMetadata(LLVMContext::MD_prof, MD);
+            // Swap the metadata if the operands are swapped.
+            if (X == SI.getFalseValue() && Y == SI.getTrueValue())
+              cast<SelectInst>(NewMinMax)->swapProfMetadata();
+          }
 
-      if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
+          return BinaryOperator::CreateNot(NewMinMax);
+        }
+
+        return nullptr;
+      };
+
+      if (Instruction *I = moveNotAfterMinMax(LHS, RHS))
+        return I;
+      if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
         return I;
-    }
 
-    if (SPF) {
-      // MAX(MAX(a, b), a) -> MAX(a, b)
-      // MIN(MIN(a, b), a) -> MIN(a, b)
-      // MAX(MIN(a, b), a) -> a
-      // MIN(MAX(a, b), a) -> a
-      // ABS(ABS(a)) -> ABS(a)
-      // NABS(NABS(a)) -> NABS(a)
-      if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
-        if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
-                                          SI, SPF, RHS))
-          return R;
-      if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
-        if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
-                                          SI, SPF, LHS))
-          return R;
+      if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
+        return I;
     }
-
-    // TODO.
-    // ABS(-X) -> ABS(X)
   }
 
   // See if we can fold the select into a phi node if the condition is a select.
@@ -1934,10 +2002,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     }
   }
 
-  if (BinaryOperator::isNot(CondVal)) {
-    SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+  Value *NotCond;
+  if (match(CondVal, m_Not(m_Value(NotCond)))) {
+    SI.setOperand(0, NotCond);
     SI.setOperand(1, FalseVal);
     SI.setOperand(2, TrueVal);
+    SI.swapProfMetadata();
     return &SI;
   }
 
@@ -1952,24 +2022,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     }
   }
 
-  // See if we can determine the result of this select based on a dominating
-  // condition.
-  BasicBlock *Parent = SI.getParent();
-  if (BasicBlock *Dom = Parent->getSinglePredecessor()) {
-    auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
-    if (PBI && PBI->isConditional() &&
-        PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
-        (PBI->getSuccessor(0) == Parent || PBI->getSuccessor(1) == Parent)) {
-      bool CondIsTrue = PBI->getSuccessor(0) == Parent;
-      Optional<bool> Implication = isImpliedCondition(
-          PBI->getCondition(), SI.getCondition(), DL, CondIsTrue);
-      if (Implication) {
-        Value *V = *Implication ? TrueVal : FalseVal;
-        return replaceInstUsesWith(SI, V);
-      }
-    }
-  }
-
   // If we can compute the condition, there's no need for a select.
   // Like the above fold, we are attempting to reduce compile-time cost by
   // putting this fold here with limitations rather than in InstSimplify.
@@ -1991,8 +2043,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (Instruction *Select = foldSelectCmpXchg(SI))
     return Select;
 
-  if (Instruction *Select = foldSelectBinOpIdentity(SI))
+  if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI))
     return Select;
 
+  if (Instruction *Rot = foldSelectRotate(SI))
+    return Rot;
+
   return nullptr;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 1ca75f3989d4..c562d45a9e2b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -593,7 +593,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *V = commonShiftTransforms(I))
@@ -697,7 +697,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *R = commonShiftTransforms(I))
@@ -725,9 +725,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
 
     Value *X;
     const APInt *ShOp1;
-    if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1)))) {
-      unsigned ShlAmt = ShOp1->getZExtValue();
-      if (ShlAmt < ShAmt) {
+    if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
+      if (ShOp1->ult(ShAmt)) {
+        unsigned ShlAmt = ShOp1->getZExtValue();
         Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt);
         if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
           // (X <<nuw C1) >>u C2 --> X >>u (C2 - C1)
@@ -740,7 +740,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
         APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
         return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
       }
-      if (ShlAmt > ShAmt) {
+      if (ShOp1->ugt(ShAmt)) {
+        unsigned ShlAmt = ShOp1->getZExtValue();
         Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt);
         if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
           // (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
@@ -753,7 +754,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
         APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
         return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
       }
-      assert(ShlAmt == ShAmt);
+      assert(*ShOp1 == ShAmt);
       // (X << C) >>u C --> X & (-1 >>u C)
       APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
       return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
@@ -825,7 +826,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldShuffledBinop(I))
+  if (Instruction *X = foldVectorBinop(I))
     return X;
 
   if (Instruction *R = commonShiftTransforms(I))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 425f5ce384be..9bf87d024607 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -314,11 +314,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     Known.One  = std::move(IKnownOne);
     break;
   }
-  case Instruction::Select:
-    // If this is a select as part of a min/max pattern, don't simplify any
-    // further in case we break the structure.
+  case Instruction::Select: {
     Value *LHS, *RHS;
-    if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
+    SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
+    if (SPF == SPF_UMAX) {
+      // UMax(A, C) == A if ...
+      // The lowest non-zero bit of DemandMask is higher than the highest
+      // non-zero bit of C.
+      const APInt *C;
+      unsigned CTZ = DemandedMask.countTrailingZeros();
+      if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
+        return LHS;
+    } else if (SPF == SPF_UMIN) {
+      // UMin(A, C) == A if ...
+      // The lowest non-zero bit of DemandMask is higher than the highest
+      // non-one bit of C.
+      // This comes from using DeMorgans on the above umax example.
+      const APInt *C;
+      unsigned CTZ = DemandedMask.countTrailingZeros();
+      if (match(RHS, m_APInt(C)) &&
+          CTZ >= C->getBitWidth() - C->countLeadingOnes())
+        return LHS;
+    }
+
+    // If this is a select as part of any other min/max pattern, don't simplify
+    // any further in case we break the structure.
+    if (SPF != SPF_UNKNOWN)
       return nullptr;
 
     if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
@@ -336,6 +357,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     Known.One = RHSKnown.One & LHSKnown.One;
     Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
     break;
+  }
   case Instruction::ZExt:
   case Instruction::Trunc: {
     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
@@ -668,6 +690,30 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // TODO: Could compute known zero/one bits based on the input.
         break;
       }
+      case Intrinsic::fshr:
+      case Intrinsic::fshl: {
+        const APInt *SA;
+        if (!match(I->getOperand(2), m_APInt(SA)))
+          break;
+
+        // Normalize to funnel shift left. APInt shifts of BitWidth are well-
+        // defined, so no need to special-case zero shifts here.
+        uint64_t ShiftAmt = SA->urem(BitWidth);
+        if (II->getIntrinsicID() == Intrinsic::fshr)
+          ShiftAmt = BitWidth - ShiftAmt;
+
+        APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
+        APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
+        if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
+            SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
+          return I;
+
+        Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
+                     RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
+        Known.One = LHSKnown.One.shl(ShiftAmt) |
+                    RHSKnown.One.lshr(BitWidth - ShiftAmt);
+        break;
+      }
       case Intrinsic::x86_mmx_pmovmskb:
       case Intrinsic::x86_sse_movmsk_ps:
       case Intrinsic::x86_sse2_movmsk_pd:
@@ -923,11 +969,24 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
 Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
                                                            APInt DemandedElts,
-                                                           int DMaskIdx) {
+                                                           int DMaskIdx,
+                                                           int TFCIdx) {
   unsigned VWidth = II->getType()->getVectorNumElements();
   if (VWidth == 1)
     return nullptr;
 
+  // Need to change to new instruction format
+  ConstantInt *TFC = nullptr;
+  bool TFELWEEnabled = false;
+  if (TFCIdx > 0) {
+    TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
+    TFELWEEnabled =    TFC->getZExtValue() & 0x1  // TFE
+                    || TFC->getZExtValue() & 0x2; // LWE
+  }
+
+  if (TFELWEEnabled)
+    return nullptr; // TFE not yet supported
+
   ConstantInt *NewDMask = nullptr;
 
   if (DMaskIdx < 0) {
@@ -1052,8 +1111,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
   UndefElts = 0;
 
-  // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
-  if (Constant *C = dyn_cast<Constant>(V)) {
+  if (auto *C = dyn_cast<Constant>(V)) {
     // Check if this is identity. If so, return 0 since we are not simplifying
     // anything.
     if (DemandedElts.isAllOnesValue())
@@ -1061,7 +1119,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
     Constant *Undef = UndefValue::get(EltTy);
-
     SmallVector<Constant*, 16> Elts;
     for (unsigned i = 0; i != VWidth; ++i) {
       if (!DemandedElts[i]) {   // If not demanded, set to undef.
@@ -1109,9 +1166,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
   if (!I) return nullptr;        // Only analyze instructions.
 
   bool MadeChange = false;
+  auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
+                              APInt Demanded, APInt &Undef) {
+    auto *II = dyn_cast<IntrinsicInst>(Inst);
+    Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
+    if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
+      if (II)
+        II->setArgOperand(OpNum, V);
+      else
+        Inst->setOperand(OpNum, V);
+      MadeChange = true;
+    }
+  };
+
   APInt UndefElts2(VWidth, 0);
   APInt UndefElts3(VWidth, 0);
-  Value *TmpV;
   switch (I->getOpcode()) {
   default: break;
 
@@ -1122,9 +1191,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     if (!Idx) {
       // Note that we can't propagate undef elt info, because we don't know
       // which elt is getting updated.
-      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
-                                        UndefElts2, Depth + 1);
-      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+      simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
       break;
     }
 
@@ -1134,9 +1201,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     APInt PreInsertDemandedElts = DemandedElts;
     if (IdxNo < VWidth)
       PreInsertDemandedElts.clearBit(IdxNo);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
-                                      UndefElts, Depth + 1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
 
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
@@ -1169,14 +1235,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     }
 
     APInt LHSUndefElts(LHSVWidth, 0);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
-                                      LHSUndefElts, Depth + 1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+    simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
 
     APInt RHSUndefElts(LHSVWidth, 0);
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
-                                      RHSUndefElts, Depth + 1);
-    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+    simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
 
     bool NewUndefElts = false;
     unsigned LHSIdx = -1u, LHSValIdx = -1u;
@@ -1260,32 +1322,43 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     break;
   }
   case Instruction::Select: {
-    APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
-    if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+    // If this is a vector select, try to transform the select condition based
+    // on the current demanded elements.
+    SelectInst *Sel = cast<SelectInst>(I);
+    if (Sel->getCondition()->getType()->isVectorTy()) {
+      // TODO: We are not doing anything with UndefElts based on this call.
+      // It is overwritten below based on the other select operands. If an
+      // element of the select condition is known undef, then we are free to
+      // choose the output value from either arm of the select. If we know that
+      // one of those values is undef, then the output can be undef.
+      simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+    }
+
+    // Next, see if we can transform the arms of the select.
+    APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
+    if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
       for (unsigned i = 0; i < VWidth; i++) {
+        // isNullValue() always returns false when called on a ConstantExpr.
+        // Skip constant expressions to avoid propagating incorrect information.
         Constant *CElt = CV->getAggregateElement(i);
-        // Method isNullValue always returns false when called on a
-        // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
-        // to avoid propagating incorrect information.
         if (isa<ConstantExpr>(CElt))
           continue;
+        // TODO: If a select condition element is undef, we can demand from
+        // either side. If one side is known undef, choosing that side would
+        // propagate undef.
         if (CElt->isNullValue())
-          LeftDemanded.clearBit(i);
+          DemandedLHS.clearBit(i);
         else
-          RightDemanded.clearBit(i);
+          DemandedRHS.clearBit(i);
       }
     }
 
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
-                                      Depth + 1);
-    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
-                                      UndefElts2, Depth + 1);
-    if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+    simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
+    simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
 
-    // Output elements are undefined if both are undefined.
-    UndefElts &= UndefElts2;
+    // Output elements are undefined if the element from each arm is undefined.
+    // TODO: This can be improved. See comment in select condition handling.
+    UndefElts = UndefElts2 & UndefElts3;
     break;
   }
   case Instruction::BitCast: {
@@ -1323,12 +1396,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       break;
     }
 
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
-                                      UndefElts2, Depth + 1);
-    if (TmpV) {
-      I->setOperand(0, TmpV);
-      MadeChange = true;
-    }
+    simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
 
     if (VWidth == InVWidth) {
       UndefElts = UndefElts2;
@@ -1353,29 +1421,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     }
     break;
   }
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    // div/rem demand all inputs, because they don't want divide by zero.
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
-                                      Depth + 1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
-                                      UndefElts2, Depth + 1);
-    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-
-    // Output elements are undefined if both are undefined.  Consider things
-    // like undef&0.  The result is known zero, not undef.
-    UndefElts &= UndefElts2;
-    break;
   case Instruction::FPTrunc:
   case Instruction::FPExt:
-    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
-                                      Depth + 1);
-    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+    simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
     break;
 
   case Instruction::Call: {
@@ -1395,9 +1443,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
       // Only the lower element is used.
       DemandedElts = 1;
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
-                                        UndefElts, Depth + 1);
-      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
 
       // Only the lower element is undefined. The high elements are zero.
       UndefElts = UndefElts[0];
@@ -1406,9 +1452,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // Unary scalar-as-vector operations that work column-wise.
     case Intrinsic::x86_sse_rcp_ss:
     case Intrinsic::x86_sse_rsqrt_ss:
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
-                                        UndefElts, Depth + 1);
-      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
 
       // If lowest element of a scalar op isn't used then use Arg0.
       if (!DemandedElts[0]) {
@@ -1428,9 +1472,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     case Intrinsic::x86_sse2_min_sd:
     case Intrinsic::x86_sse2_max_sd:
     case Intrinsic::x86_sse2_cmp_sd: {
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
-                                        UndefElts, Depth + 1);
-      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
 
       // If lowest element of a scalar op isn't used then use Arg0.
       if (!DemandedElts[0]) {
@@ -1440,9 +1482,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
       // Only lower element is used for operand 1.
       DemandedElts = 1;
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
-                                        UndefElts2, Depth + 1);
-      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
 
       // Lower element is undefined if both lower elements are undefined.
       // Consider things like undef&0.  The result is known zero, not undef.
@@ -1459,9 +1499,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       // Don't use the low element of operand 0.
       APInt DemandedElts2 = DemandedElts;
       DemandedElts2.clearBit(0);
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
-                                        UndefElts, Depth + 1);
-      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 0, DemandedElts2, UndefElts);
 
       // If lowest element of a scalar op isn't used then use Arg0.
       if (!DemandedElts[0]) {
@@ -1471,9 +1509,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
       // Only lower element is used for operand 1.
       DemandedElts = 1;
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
-                                        UndefElts2, Depth + 1);
-      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
 
       // Take the high undef elements from operand 0 and take the lower element
       // from operand 1.
@@ -1497,9 +1533,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     case Intrinsic::x86_avx512_mask_sub_sd_round:
     case Intrinsic::x86_avx512_mask_max_sd_round:
     case Intrinsic::x86_avx512_mask_min_sd_round:
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
-                                        UndefElts, Depth + 1);
-      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 0, DemandedElts, UndefElts);
 
       // If lowest element of a scalar op isn't used then use Arg0.
       if (!DemandedElts[0]) {
@@ -1509,12 +1543,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
       // Only lower element is used for operand 1 and 2.
       DemandedElts = 1;
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
-                                        UndefElts2, Depth + 1);
-      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
-      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
-                                        UndefElts3, Depth + 1);
-      if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 1, DemandedElts, UndefElts2);
+      simplifyAndSetOp(II, 2, DemandedElts, UndefElts3);
 
       // Lower element is undefined if all three lower elements are undefined.
       // Consider things like undef&0.  The result is known zero, not undef.
@@ -1559,14 +1589,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         }
 
         // Demand elements from the operand.
-        auto *Op = II->getArgOperand(OpNum);
         APInt OpUndefElts(InnerVWidth, 0);
-        TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts,
-                                          Depth + 1);
-        if (TmpV) {
-          II->setArgOperand(OpNum, TmpV);
-          MadeChange = true;
-        }
+        simplifyAndSetOp(II, OpNum, OpDemandedElts, OpUndefElts);
 
         // Pack the operand's UNDEF elements, one lane at a time.
         OpUndefElts = OpUndefElts.zext(VWidth);
@@ -1594,10 +1618,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // PERMV
     case Intrinsic::x86_avx2_permd:
     case Intrinsic::x86_avx2_permps: {
-      Value *Op1 = II->getArgOperand(1);
-      TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
-                                        Depth + 1);
-      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+      simplifyAndSetOp(II, 1, DemandedElts, UndefElts);
       break;
     }
 
@@ -1611,16 +1632,40 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       break;
     case Intrinsic::amdgcn_buffer_load:
     case Intrinsic::amdgcn_buffer_load_format:
+    case Intrinsic::amdgcn_raw_buffer_load:
+    case Intrinsic::amdgcn_raw_buffer_load_format:
+    case Intrinsic::amdgcn_struct_buffer_load:
+    case Intrinsic::amdgcn_struct_buffer_load_format:
       return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
     default: {
       if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
-        return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
+        return simplifyAMDGCNMemoryIntrinsicDemanded(
+            II, DemandedElts, 0, II->getNumArgOperands() - 2);
 
       break;
     }
-    }
+    } // switch on IntrinsicID
     break;
+  } // case Call
+  } // switch on Opcode
+
+  // TODO: We bail completely on integer div/rem and shifts because they have
+  // UB/poison potential, but that should be refined.
+  BinaryOperator *BO;
+  if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
+    simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
+    simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
+
+    // Any change to an instruction with potential poison must clear those flags
+    // because we can not guarantee those constraints now. Other analysis may
+    // determine that it is safe to re-apply the flags.
+    if (MadeChange)
+      BO->dropPoisonGeneratingFlags();
+
+    // Output elements are undefined if both are undefined. Consider things
+    // like undef & 0. The result is known zero, not undef.
+    UndefElts &= UndefElts2;
   }
-  }
+
   return MadeChange ? I : nullptr;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1c2de6352fa5..0ad1fc0e791f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -46,40 +46,34 @@ using namespace PatternMatch;
 #define DEBUG_TYPE "instcombine"
 
 /// Return true if the value is cheaper to scalarize than it is to leave as a
-/// vector operation. isConstant indicates whether we're extracting one known
-/// element. If false we're extracting a variable index.
-static bool cheapToScalarize(Value *V, bool isConstant) {
-  if (Constant *C = dyn_cast<Constant>(V)) {
-    if (isConstant) return true;
+/// vector operation. IsConstantExtractIndex indicates whether we are extracting
+/// one known element from a vector constant.
+///
+/// FIXME: It's possible to create more instructions than previously existed.
+static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
+  // If we can pick a scalar constant value out of a vector, that is free.
+  if (auto *C = dyn_cast<Constant>(V))
+    return IsConstantExtractIndex || C->getSplatValue();
+
+  // An insertelement to the same constant index as our extract will simplify
+  // to the scalar inserted element. An insertelement to a different constant
+  // index is irrelevant to our extract.
+  if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
+    return IsConstantExtractIndex;
+
+  if (match(V, m_OneUse(m_Load(m_Value()))))
+    return true;
 
-    // If all elts are the same, we can extract it and use any of the values.
-    if (Constant *Op0 = C->getAggregateElement(0U)) {
-      for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
-           ++i)
-        if (C->getAggregateElement(i) != Op0)
-          return false;
+  Value *V0, *V1;
+  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
+    if (cheapToScalarize(V0, IsConstantExtractIndex) ||
+        cheapToScalarize(V1, IsConstantExtractIndex))
       return true;
-    }
-  }
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return false;
 
-  // Insert element gets simplified to the inserted element or is deleted if
-  // this is constant idx extract element and its a constant idx insertelt.
-  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
-      isa<ConstantInt>(I->getOperand(2)))
-    return true;
-  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
-    return true;
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
-    if (BO->hasOneUse() &&
-        (cheapToScalarize(BO->getOperand(0), isConstant) ||
-         cheapToScalarize(BO->getOperand(1), isConstant)))
-      return true;
-  if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (CI->hasOneUse() &&
-        (cheapToScalarize(CI->getOperand(0), isConstant) ||
-         cheapToScalarize(CI->getOperand(1), isConstant)))
+  CmpInst::Predicate UnusedPred;
+  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
+    if (cheapToScalarize(V0, IsConstantExtractIndex) ||
+        cheapToScalarize(V1, IsConstantExtractIndex))
       return true;
 
   return false;
@@ -166,92 +160,176 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
   return &EI;
 }
 
+static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
+                                      InstCombiner::BuilderTy &Builder,
+                                      bool IsBigEndian) {
+  Value *X;
+  uint64_t ExtIndexC;
+  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
+      !X->getType()->isVectorTy() ||
+      !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
+    return nullptr;
+
+  // If this extractelement is using a bitcast from a vector of the same number
+  // of elements, see if we can find the source element from the source vector:
+  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
+  Type *SrcTy = X->getType();
+  Type *DestTy = Ext.getType();
+  unsigned NumSrcElts = SrcTy->getVectorNumElements();
+  unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
+  if (NumSrcElts == NumElts)
+    if (Value *Elt = findScalarElement(X, ExtIndexC))
+      return new BitCastInst(Elt, DestTy);
+
+  // If the source elements are wider than the destination, try to shift and
+  // truncate a subset of scalar bits of an insert op.
+  if (NumSrcElts < NumElts) {
+    Value *Scalar;
+    uint64_t InsIndexC;
+    if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
+                                  m_ConstantInt(InsIndexC))))
+      return nullptr;
+
+    // The extract must be from the subset of vector elements that we inserted
+    // into. Example: if we inserted element 1 of a <2 x i64> and we are
+    // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
+    // of elements 4-7 of the bitcasted vector.
+    unsigned NarrowingRatio = NumElts / NumSrcElts;
+    if (ExtIndexC / NarrowingRatio != InsIndexC)
+      return nullptr;
+
+    // We are extracting part of the original scalar. How that scalar is
+    // inserted into the vector depends on the endian-ness. Example:
+    //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7
+    //                                       +--+--+--+--+--+--+--+--+
+    // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|
+    // extelt <4 x i16> V', 3:               |                 |S2|S3|
+    //                                       +--+--+--+--+--+--+--+--+
+    // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
+    // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
+    // In this example, we must right-shift little-endian. Big-endian is just a
+    // truncate.
+    unsigned Chunk = ExtIndexC % NarrowingRatio;
+    if (IsBigEndian)
+      Chunk = NarrowingRatio - 1 - Chunk;
+
+    // Bail out if this is an FP vector to FP vector sequence. That would take
+    // more instructions than we started with unless there is no shift, and it
+    // may not be handled as well in the backend.
+    bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
+    bool NeedDestBitcast = DestTy->isFloatingPointTy();
+    if (NeedSrcBitcast && NeedDestBitcast)
+      return nullptr;
+
+    unsigned SrcWidth = SrcTy->getScalarSizeInBits();
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+    unsigned ShAmt = Chunk * DestWidth;
+
+    // TODO: This limitation is more strict than necessary. We could sum the
+    // number of new instructions and subtract the number eliminated to know if
+    // we can proceed.
+    if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
+      if (NeedSrcBitcast || NeedDestBitcast)
+        return nullptr;
+
+    if (NeedSrcBitcast) {
+      Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
+      Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
+    }
+
+    if (ShAmt) {
+      // Bail out if we could end with more instructions than we started with.
+      if (!Ext.getVectorOperand()->hasOneUse())
+        return nullptr;
+      Scalar = Builder.CreateLShr(Scalar, ShAmt);
+    }
+
+    if (NeedDestBitcast) {
+      Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
+      return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
+    }
+    return new TruncInst(Scalar, DestTy);
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-  if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(),
-                                            EI.getIndexOperand(),
+  Value *SrcVec = EI.getVectorOperand();
+  Value *Index = EI.getIndexOperand();
+  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
                                             SQ.getWithInstruction(&EI)))
     return replaceInstUsesWith(EI, V);
 
-  // If vector val is constant with all elements the same, replace EI with
-  // that element.  We handle a known element # below.
-  if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
-    if (cheapToScalarize(C, false))
-      return replaceInstUsesWith(EI, C->getAggregateElement(0U));
-
   // If extracting a specified index from the vector, see if we can recursively
   // find a previously computed scalar that was inserted into the vector.
-  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
-    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+  auto *IndexC = dyn_cast<ConstantInt>(Index);
+  if (IndexC) {
+    unsigned NumElts = EI.getVectorOperandType()->getNumElements();
 
     // InstSimplify should handle cases where the index is invalid.
-    if (!IdxC->getValue().ule(VectorWidth))
+    if (!IndexC->getValue().ule(NumElts))
       return nullptr;
 
-    unsigned IndexVal = IdxC->getZExtValue();
-
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
     // property.
-    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
-      APInt UndefElts(VectorWidth, 0);
-      APInt DemandedMask(VectorWidth, 0);
-      DemandedMask.setBit(IndexVal);
-      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+    if (SrcVec->hasOneUse() && NumElts != 1) {
+      APInt UndefElts(NumElts, 0);
+      APInt DemandedElts(NumElts, 0);
+      DemandedElts.setBit(IndexC->getZExtValue());
+      if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
                                                 UndefElts)) {
         EI.setOperand(0, V);
         return &EI;
       }
     }
 
-    // If this extractelement is directly using a bitcast from a vector of
-    // the same number of elements, see if we can find the source element from
-    // it.  In this case, we will end up needing to bitcast the scalars.
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
-      if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
-        if (VT->getNumElements() == VectorWidth)
-          if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
-            return new BitCastInst(Elt, EI.getType());
-    }
+    if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
+      return I;
 
     // If there's a vector PHI feeding a scalar use through this extractelement
     // instruction, try to scalarize the PHI.
-    if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
-      Instruction *scalarPHI = scalarizePHI(EI, PN);
-      if (scalarPHI)
-        return scalarPHI;
-    }
+    if (auto *Phi = dyn_cast<PHINode>(SrcVec))
+      if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
+        return ScalarPHI;
   }
 
-  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
-    // Push extractelement into predecessor operation if legal and
-    // profitable to do so.
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-      if (I->hasOneUse() &&
-          cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
-        Value *newEI0 =
-          Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
-                                       EI.getName()+".lhs");
-        Value *newEI1 =
-          Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
-                                       EI.getName()+".rhs");
-        return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
-                                                     newEI0, newEI1, BO);
-      }
-    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+  BinaryOperator *BO;
+  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
+    // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
+    Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
+    Value *E0 = Builder.CreateExtractElement(X, Index);
+    Value *E1 = Builder.CreateExtractElement(Y, Index);
+    return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
+  }
+
+  Value *X, *Y;
+  CmpInst::Predicate Pred;
+  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
+      cheapToScalarize(SrcVec, IndexC)) {
+    // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
+    Value *E0 = Builder.CreateExtractElement(X, Index);
+    Value *E1 = Builder.CreateExtractElement(Y, Index);
+    return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
+  }
+
+  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
+    if (auto *IE = dyn_cast<InsertElementInst>(I)) {
       // Extracting the inserted element?
-      if (IE->getOperand(2) == EI.getOperand(1))
+      if (IE->getOperand(2) == Index)
         return replaceInstUsesWith(EI, IE->getOperand(1));
       // If the inserted and extracted elements are constants, they must not
       // be the same value, extract from the pre-inserted value instead.
-      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
-        Worklist.AddValue(EI.getOperand(0));
+      if (isa<Constant>(IE->getOperand(2)) && IndexC) {
+        Worklist.AddValue(SrcVec);
         EI.setOperand(0, IE->getOperand(0));
         return &EI;
       }
-    } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+    } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
       // If this is extracting an element from a shufflevector, figure out where
       // it came from and extract from the appropriate input element instead.
-      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+      if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
         int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
         Value *Src;
         unsigned LHSWidth =
@@ -270,13 +348,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                           ConstantInt::get(Int32Ty,
                                                            SrcIdx, false));
       }
-    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+    } else if (auto *CI = dyn_cast<CastInst>(I)) {
       // Canonicalize extractelement(cast) -> cast(extractelement).
       // Bitcasts can change the number of vector elements, and they cost
       // nothing.
       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
-        Value *EE = Builder.CreateExtractElement(CI->getOperand(0),
-                                                 EI.getIndexOperand());
+        Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
         Worklist.AddValue(EE);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
@@ -791,43 +868,62 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
     replaceInstUsesWith(IE, VecOp);
 
-  // If the inserted element was extracted from some other vector, and if the
-  // indexes are constant, try to turn this into a shufflevector operation.
-  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
-    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
-      unsigned NumInsertVectorElts = IE.getType()->getNumElements();
-      unsigned NumExtractVectorElts =
-          EI->getOperand(0)->getType()->getVectorNumElements();
-      unsigned ExtractedIdx =
-        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
-      if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
-        return replaceInstUsesWith(IE, VecOp);
-
-      if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
-        return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
-      // If we are extracting a value from a vector, then inserting it right
-      // back into the same place, just use the input vector.
-      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
-        return replaceInstUsesWith(IE, VecOp);
-
-      // If this insertelement isn't used by some other insertelement, turn it
-      // (and any insertelements it points to), into one big shuffle.
-      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
-        SmallVector<Constant*, 16> Mask;
-        ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
-
-        // The proposed shuffle may be trivial, in which case we shouldn't
-        // perform the combine.
-        if (LR.first != &IE && LR.second != &IE) {
-          // We now have a shuffle of LHS, RHS, Mask.
-          if (LR.second == nullptr)
-            LR.second = UndefValue::get(LR.first->getType());
-          return new ShuffleVectorInst(LR.first, LR.second,
-                                       ConstantVector::get(Mask));
-        }
+  // If the inserted element was extracted from some other vector and both
+  // indexes are constant, try to turn this into a shuffle.
+  uint64_t InsertedIdx, ExtractedIdx;
+  Value *ExtVecOp;
+  if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
+      match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
+                                       m_ConstantInt(ExtractedIdx)))) {
+    unsigned NumInsertVectorElts = IE.getType()->getNumElements();
+    unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
+    if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
+      return replaceInstUsesWith(IE, VecOp);
+
+    if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
+      return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+    // If we are extracting a value from a vector, then inserting it right
+    // back into the same place, just use the input vector.
+    if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
+      return replaceInstUsesWith(IE, VecOp);
+
+    // TODO: Looking at the user(s) to determine if this insert is a
+    // fold-to-shuffle opportunity does not match the usual instcombine
+    // constraints. We should decide if the transform is worthy based only
+    // on this instruction and its operands, but that may not work currently.
+    //
+    // Here, we are trying to avoid creating shuffles before reaching
+    // the end of a chain of extract-insert pairs. This is complicated because
+    // we do not generally form arbitrary shuffle masks in instcombine
+    // (because those may codegen poorly), but collectShuffleElements() does
+    // exactly that.
+    //
+    // The rules for determining what is an acceptable target-independent
+    // shuffle mask are fuzzy because they evolve based on the backend's
+    // capabilities and real-world impact.
+    auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
+      if (!Insert.hasOneUse())
+        return true;
+      auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
+      if (!InsertUser)
+        return true;
+      return false;
+    };
+
+    // Try to form a shuffle from a chain of extract-insert ops.
+    if (isShuffleRootCandidate(IE)) {
+      SmallVector<Constant*, 16> Mask;
+      ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
+
+      // The proposed shuffle may be trivial, in which case we shouldn't
+      // perform the combine.
+      if (LR.first != &IE && LR.second != &IE) {
+        // We now have a shuffle of LHS, RHS, Mask.
+        if (LR.second == nullptr)
+          LR.second = UndefValue::get(LR.first->getType());
+        return new ShuffleVectorInst(LR.first, LR.second,
+                                     ConstantVector::get(Mask));
       }
     }
   }
@@ -857,7 +953,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
 
 /// Return true if we can evaluate the specified expression tree if the vector
 /// elements were shuffled in a different order.
-static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
+static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
                                 unsigned Depth = 5) {
   // We can always reorder the elements of a constant.
   if (isa<Constant>(V))
@@ -904,8 +1000,15 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
     case Instruction::FPTrunc:
     case Instruction::FPExt:
     case Instruction::GetElementPtr: {
+      // Bail out if we would create longer vector ops. We could allow creating
+      // longer vector ops, but that may result in more expensive codegen. We
+      // would also need to limit the transform to avoid undefined behavior for
+      // integer div/rem.
+      Type *ITy = I->getType();
+      if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
+        return false;
       for (Value *Operand : I->operands()) {
-        if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
+        if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
           return false;
       }
       return true;
@@ -925,7 +1028,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
           SeenOnce = true;
         }
       }
-      return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
+      return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
     }
   }
   return false;
@@ -1009,12 +1112,12 @@ static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
   llvm_unreachable("failed to rebuild vector instructions");
 }
 
-Value *
-InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
+static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
   // Mask.size() does not need to be equal to the number of vector elements.
 
   assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
   Type *EltTy = V->getType()->getScalarType();
+  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
   if (isa<UndefValue>(V))
     return UndefValue::get(VectorType::get(EltTy, Mask.size()));
 
@@ -1025,9 +1128,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
     SmallVector<Constant *, 16> MaskValues;
     for (int i = 0, e = Mask.size(); i != e; ++i) {
       if (Mask[i] == -1)
-        MaskValues.push_back(UndefValue::get(Builder.getInt32Ty()));
+        MaskValues.push_back(UndefValue::get(I32Ty));
       else
-        MaskValues.push_back(Builder.getInt32(Mask[i]));
+        MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
     }
     return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
                                           ConstantVector::get(MaskValues));
@@ -1069,7 +1172,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
       SmallVector<Value*, 8> NewOps;
       bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
-        Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
+        Value *V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
         NewOps.push_back(V);
         NeedsRebuild |= (V != I->getOperand(i));
       }
@@ -1096,11 +1199,11 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
       // If element is not in Mask, no need to handle the operand 1 (element to
       // be inserted). Just evaluate values in operand 0 according to Mask.
       if (!Found)
-        return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+        return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
 
-      Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
+      Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
       return InsertElementInst::Create(V, I->getOperand(1),
-                                       Builder.getInt32(Index), "", I);
+                                       ConstantInt::get(I32Ty, Index), "", I);
     }
   }
   llvm_unreachable("failed to reorder elements of vector instruction!");
@@ -1350,12 +1453,144 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
   return NewBO;
 }
 
+/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
+/// narrowing (concatenating with undef and extracting back to the original
+/// length). This allows replacing the wide select with a narrow select.
+static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
+                                       InstCombiner::BuilderTy &Builder) {
+  // This must be a narrowing identity shuffle. It extracts the 1st N elements
+  // of the 1st vector operand of a shuffle.
+  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
+    return nullptr;
+
+  // The vector being shuffled must be a vector select that we can eliminate.
+  // TODO: The one-use requirement could be eased if X and/or Y are constants.
+  Value *Cond, *X, *Y;
+  if (!match(Shuf.getOperand(0),
+             m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
+    return nullptr;
+
+  // We need a narrow condition value. It must be extended with undef elements
+  // and have the same number of elements as this shuffle.
+  unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
+  Value *NarrowCond;
+  if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
+                                            m_Constant()))) ||
+      NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
+      !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
+    return nullptr;
+
+  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
+  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
+  Value *Undef = UndefValue::get(X->getType());
+  Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
+  Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
+  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
+}
+
+/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
+static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
+  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+  if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
+    return nullptr;
+
+  Value *X, *Y;
+  Constant *Mask;
+  if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
+    return nullptr;
+
+  // We are extracting a subvector from a shuffle. Remove excess elements from
+  // the 1st shuffle mask to eliminate the extract.
+  //
+  // This transform is conservatively limited to identity extracts because we do
+  // not allow arbitrary shuffle mask creation as a target-independent transform
+  // (because we can't guarantee that will lower efficiently).
+  //
+  // If the extracting shuffle has an undef mask element, it transfers to the
+  // new shuffle mask. Otherwise, copy the original mask element. Example:
+  //   shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
+  //   shuf X, Y, <C0, undef, C2, undef>
+  unsigned NumElts = Shuf.getType()->getVectorNumElements();
+  SmallVector<Constant *, 16> NewMask(NumElts);
+  assert(NumElts < Mask->getType()->getVectorNumElements() &&
+         "Identity with extract must have less elements than its inputs");
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
+    Constant *MaskElt = Mask->getAggregateElement(i);
+    NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
+  }
+  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
+/// Try to replace a shuffle with an insertelement.
+static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
+  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
+  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+
+  // The shuffle must not change vector sizes.
+  // TODO: This restriction could be removed if the insert has only one use
+  //       (because the transform would require a new length-changing shuffle).
+  int NumElts = Mask.size();
+  if (NumElts != (int)(V0->getType()->getVectorNumElements()))
+    return nullptr;
+
+  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
+  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
+    // We need an insertelement with a constant index.
+    if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
+                                   m_ConstantInt(IndexC))))
+      return false;
+
+    // Test the shuffle mask to see if it splices the inserted scalar into the
+    // operand 1 vector of the shuffle.
+    int NewInsIndex = -1;
+    for (int i = 0; i != NumElts; ++i) {
+      // Ignore undef mask elements.
+      if (Mask[i] == -1)
+        continue;
+
+      // The shuffle takes elements of operand 1 without lane changes.
+      if (Mask[i] == NumElts + i)
+        continue;
+
+      // The shuffle must choose the inserted scalar exactly once.
+      if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
+        return false;
+
+      // The shuffle is placing the inserted scalar into element i.
+      NewInsIndex = i;
+    }
+
+    assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
+
+    // Index is updated to the potentially translated insertion lane.
+    IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
+    return true;
+  };
+
+  // If the shuffle is unnecessary, insert the scalar operand directly into
+  // operand 1 of the shuffle. Example:
+  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
+  Value *Scalar;
+  ConstantInt *IndexC;
+  if (isShufflingScalarIntoOp1(Scalar, IndexC))
+    return InsertElementInst::Create(V1, Scalar, IndexC);
+
+  // Try again after commuting shuffle. Example:
+  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
+  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
+  std::swap(V0, V1);
+  ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+  if (isShufflingScalarIntoOp1(Scalar, IndexC))
+    return InsertElementInst::Create(V1, Scalar, IndexC);
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
-  SmallVector<int, 16> Mask = SVI.getShuffleMask();
-  Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
-
   if (auto *V = SimplifyShuffleVectorInst(
           LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
     return replaceInstUsesWith(SVI, V);
@@ -1363,9 +1598,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
     return I;
 
-  bool MadeChange = false;
-  unsigned VWidth = SVI.getType()->getVectorNumElements();
+  if (Instruction *I = narrowVectorSelect(SVI, Builder))
+    return I;
 
+  unsigned VWidth = SVI.getType()->getVectorNumElements();
   APInt UndefElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
@@ -1374,18 +1610,22 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     return &SVI;
   }
 
+  if (Instruction *I = foldIdentityExtractShuffle(SVI))
+    return I;
+
+  // This transform has the potential to lose undef knowledge, so it is
+  // intentionally placed after SimplifyDemandedVectorElts().
+  if (Instruction *I = foldShuffleWithInsert(SVI))
+    return I;
+
+  SmallVector<int, 16> Mask = SVI.getShuffleMask();
+  Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
   unsigned LHSWidth = LHS->getType()->getVectorNumElements();
+  bool MadeChange = false;
 
   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
   if (LHS == RHS || isa<UndefValue>(LHS)) {
-    if (isa<UndefValue>(LHS) && LHS == RHS) {
-      // shuffle(undef,undef,mask) -> undef.
-      Value *Result = (VWidth == LHSWidth)
-                      ? LHS : UndefValue::get(SVI.getType());
-      return replaceInstUsesWith(SVI, Result);
-    }
-
     // Remap any references to RHS to use LHS.
     SmallVector<Constant*, 16> Elts;
     for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
@@ -1421,8 +1661,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     if (isRHSID) return replaceInstUsesWith(SVI, RHS);
   }
 
-  if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
-    Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
+  if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
+    Value *V = evaluateInDifferentElementOrder(LHS, Mask);
     return replaceInstUsesWith(SVI, V);
   }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index cff0d5447290..be7d43bbcf2c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -57,7 +57,6 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -97,6 +96,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -120,6 +120,10 @@ DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
 static cl::opt<bool>
+EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
+                                              cl::init(true));
+
+static cl::opt<bool>
 EnableExpensiveCombines("expensive-combines",
                         cl::desc("Enable expensive instruction combines"));
 
@@ -179,7 +183,10 @@ bool InstCombiner::shouldChangeType(unsigned FromWidth,
 /// a fundamental type in IR, and there are many specialized optimizations for
 /// i1 types.
 bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
-  assert(From->isIntegerTy() && To->isIntegerTy());
+  // TODO: This could be extended to allow vectors. Datalayout changes might be
+  // needed to properly support that.
+  if (!From->isIntegerTy() || !To->isIntegerTy())
+    return false;
 
   unsigned FromWidth = From->getPrimitiveSizeInBits();
   unsigned ToWidth = To->getPrimitiveSizeInBits();
@@ -747,8 +754,9 @@ Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
 /// constant zero (which is the 'negate' form).
 Value *InstCombiner::dyn_castNegVal(Value *V) const {
-  if (BinaryOperator::isNeg(V))
-    return BinaryOperator::getNegArgument(V);
+  Value *NegV;
+  if (match(V, m_Neg(m_Value(NegV))))
+    return NegV;
 
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
@@ -1351,22 +1359,46 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
   } while (true);
 }
 
-Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
+Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
   if (!Inst.getType()->isVectorTy()) return nullptr;
 
+  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
+  unsigned NumElts = cast<VectorType>(Inst.getType())->getNumElements();
+  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
+  assert(cast<VectorType>(LHS->getType())->getNumElements() == NumElts);
+  assert(cast<VectorType>(RHS->getType())->getNumElements() == NumElts);
+
+  // If both operands of the binop are vector concatenations, then perform the
+  // narrow binop on each pair of the source operands followed by concatenation
+  // of the results.
+  Value *L0, *L1, *R0, *R1;
+  Constant *Mask;
+  if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
+      match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
+      LHS->hasOneUse() && RHS->hasOneUse() &&
+      cast<ShuffleVectorInst>(LHS)->isConcat()) {
+    // This transform does not have the speculative execution constraint as
+    // below because the shuffle is a concatenation. The new binops are
+    // operating on exactly the same elements as the existing binop.
+    // TODO: We could ease the mask requirement to allow different undef lanes,
+    //       but that requires an analysis of the binop-with-undef output value.
+    Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
+    if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
+      BO->copyIRFlags(&Inst);
+    Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
+    if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
+      BO->copyIRFlags(&Inst);
+    return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
+  }
+
   // It may not be safe to reorder shuffles and things like div, urem, etc.
   // because we may trap when executing those ops on unknown vector elements.
   // See PR20059.
   if (!isSafeToSpeculativelyExecute(&Inst))
     return nullptr;
 
-  unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
-  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
-  assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
-  assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
-
   auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) {
-    Value *XY = Builder.CreateBinOp(Inst.getOpcode(), X, Y);
+    Value *XY = Builder.CreateBinOp(Opcode, X, Y);
     if (auto *BO = dyn_cast<BinaryOperator>(XY))
       BO->copyIRFlags(&Inst);
     return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
@@ -1375,7 +1407,6 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
   // If both arguments of the binary operation are shuffles that use the same
   // mask and shuffle within a single vector, move the shuffle after the binop.
   Value *V1, *V2;
-  Constant *Mask;
   if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) &&
       match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask))) &&
       V1->getType() == V2->getType() &&
@@ -1393,42 +1424,69 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
   if (match(&Inst, m_c_BinOp(
           m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))),
           m_Constant(C))) &&
-      V1->getType() == Inst.getType()) {
+      V1->getType()->getVectorNumElements() <= NumElts) {
+    assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
+           "Shuffle should not change scalar type");
+
     // Find constant NewC that has property:
     //   shuffle(NewC, ShMask) = C
     // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
     // reorder is not possible. A 1-to-1 mapping is not required. Example:
     // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
+    bool ConstOp1 = isa<Constant>(RHS);
     SmallVector<int, 16> ShMask;
     ShuffleVectorInst::getShuffleMask(Mask, ShMask);
-    SmallVector<Constant *, 16>
-        NewVecC(VWidth, UndefValue::get(C->getType()->getScalarType()));
+    unsigned SrcVecNumElts = V1->getType()->getVectorNumElements();
+    UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
+    SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
     bool MayChange = true;
-    for (unsigned I = 0; I < VWidth; ++I) {
+    for (unsigned I = 0; I < NumElts; ++I) {
+      Constant *CElt = C->getAggregateElement(I);
       if (ShMask[I] >= 0) {
-        assert(ShMask[I] < (int)VWidth);
-        Constant *CElt = C->getAggregateElement(I);
+        assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
         Constant *NewCElt = NewVecC[ShMask[I]];
-        if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt)) {
+        // Bail out if:
+        // 1. The constant vector contains a constant expression.
+        // 2. The shuffle needs an element of the constant vector that can't
+        //    be mapped to a new constant vector.
+        // 3. This is a widening shuffle that copies elements of V1 into the
+        //    extended elements (extending with undef is allowed).
+        if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
+            I >= SrcVecNumElts) {
           MayChange = false;
           break;
         }
         NewVecC[ShMask[I]] = CElt;
       }
+      // If this is a widening shuffle, we must be able to extend with undef
+      // elements. If the original binop does not produce an undef in the high
+      // lanes, then this transform is not safe.
+      // TODO: We could shuffle those non-undef constant values into the
+      //       result by using a constant vector (rather than an undef vector)
+      //       as operand 1 of the new binop, but that might be too aggressive
+      //       for target-independent shuffle creation.
+      if (I >= SrcVecNumElts) {
+        Constant *MaybeUndef =
+            ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
+                     : ConstantExpr::get(Opcode, CElt, UndefScalar);
+        if (!isa<UndefValue>(MaybeUndef)) {
+          MayChange = false;
+          break;
+        }
+      }
     }
     if (MayChange) {
       Constant *NewC = ConstantVector::get(NewVecC);
       // It may not be safe to execute a binop on a vector with undef elements
       // because the entire instruction can be folded to undef or create poison
       // that did not exist in the original code.
-      bool ConstOp1 = isa<Constant>(Inst.getOperand(1));
       if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
-        NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1);
+        NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
 
       // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
       // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
-      Value *NewLHS = isa<Constant>(LHS) ? NewC : V1;
-      Value *NewRHS = isa<Constant>(LHS) ? V1 : NewC;
+      Value *NewLHS = ConstOp1 ? V1 : NewC;
+      Value *NewRHS = ConstOp1 ? NewC : V1;
       return createBinOpShuffle(NewLHS, NewRHS, Mask);
     }
   }
@@ -1436,6 +1494,62 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
   return nullptr;
 }
 
+/// Try to narrow the width of a binop if at least 1 operand is an extend of
+/// of a value. This requires a potentially expensive known bits check to make
+/// sure the narrow op does not overflow.
+Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
+  // We need at least one extended operand.
+  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
+
+  // If this is a sub, we swap the operands since we always want an extension
+  // on the RHS. The LHS can be an extension or a constant.
+  if (BO.getOpcode() == Instruction::Sub)
+    std::swap(Op0, Op1);
+
+  Value *X;
+  bool IsSext = match(Op0, m_SExt(m_Value(X)));
+  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
+    return nullptr;
+
+  // If both operands are the same extension from the same source type and we
+  // can eliminate at least one (hasOneUse), this might work.
+  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
+  Value *Y;
+  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
+        cast<Operator>(Op1)->getOpcode() == CastOpc &&
+        (Op0->hasOneUse() || Op1->hasOneUse()))) {
+    // If that did not match, see if we have a suitable constant operand.
+    // Truncating and extending must produce the same constant.
+    Constant *WideC;
+    if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
+      return nullptr;
+    Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
+    if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
+      return nullptr;
+    Y = NarrowC;
+  }
+
+  // Swap back now that we found our operands.
+  if (BO.getOpcode() == Instruction::Sub)
+    std::swap(X, Y);
+
+  // Both operands have narrow versions. Last step: the math must not overflow
+  // in the narrow width.
+  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
+    return nullptr;
+
+  // bo (ext X), (ext Y) --> ext (bo X, Y)
+  // bo (ext X), C       --> ext (bo X, C')
+  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
+  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
+    if (IsSext)
+      NewBinOp->setHasNoSignedWrap();
+    else
+      NewBinOp->setHasNoUnsignedWrap();
+  }
+  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
+}
+
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
   Type *GEPType = GEP.getType();
@@ -1963,9 +2077,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) ||
          (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() &&
           areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) {
-      GEP.setOperand(0, SrcOp);
-      GEP.setSourceElementType(SrcEltType);
-      return &GEP;
+
+      // Create a new GEP here, as using `setOperand()` followed by
+      // `setSourceElementType()` won't actually update the type of the
+      // existing GEP Value. Causing issues if this Value is accessed when
+      // constructing an AddrSpaceCastInst
+      Value *NGEP =
+          GEP.isInBounds()
+              ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]})
+              : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]});
+      NGEP->takeName(&GEP);
+
+      // Preserve GEP address space to satisfy users
+      if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+        return new AddrSpaceCastInst(NGEP, GEPType);
+
+      return replaceInstUsesWith(GEP, NGEP);
     }
 
     // See if we can simplify:
@@ -2137,14 +2264,21 @@ static bool isAllocSiteRemovable(Instruction *AI,
 }
 
 Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
-  // If we have a malloc call which is only used in any amount of comparisons
-  // to null and free calls, delete the calls and replace the comparisons with
-  // true or false as appropriate.
+  // If we have a malloc call which is only used in any amount of comparisons to
+  // null and free calls, delete the calls and replace the comparisons with true
+  // or false as appropriate.
+
+  // This is based on the principle that we can substitute our own allocation
+  // function (which will never return null) rather than knowledge of the
+  // specific function being called. In some sense this can change the permitted
+  // outputs of a program (when we convert a malloc to an alloca, the fact that
+  // the allocation is now on the stack is potentially visible, for example),
+  // but we believe in a permissible manner.
   SmallVector<WeakTrackingVH, 64> Users;
 
   // If we are removing an alloca with a dbg.declare, insert dbg.value calls
   // before each store.
-  TinyPtrVector<DbgInfoIntrinsic *> DIIs;
+  TinyPtrVector<DbgVariableIntrinsic *> DIIs;
   std::unique_ptr<DIBuilder> DIB;
   if (isa<AllocaInst>(MI)) {
     DIIs = FindDbgAddrUses(&MI);
@@ -2215,14 +2349,14 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
 /// The move is performed only if the block containing the call to free
 /// will be removed, i.e.:
 /// 1. it has only one predecessor P, and P has two successors
-/// 2. it contains the call and an unconditional branch
+/// 2. it contains the call, noops, and an unconditional branch
 /// 3. its successor is the same as its predecessor's successor
 ///
 /// The profitability is out-of concern here and this function should
 /// be called only if the caller knows this transformation would be
 /// profitable (e.g., for code size).
-static Instruction *
-tryToMoveFreeBeforeNullTest(CallInst &FI) {
+static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
+                                                const DataLayout &DL) {
   Value *Op = FI.getArgOperand(0);
   BasicBlock *FreeInstrBB = FI.getParent();
   BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
@@ -2235,20 +2369,34 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
     return nullptr;
 
   // Validate constraint #2: Does this block contains only the call to
-  //                         free and an unconditional branch?
-  // FIXME: We could check if we can speculate everything in the
-  //        predecessor block
-  if (FreeInstrBB->size() != 2)
-    return nullptr;
+  //                         free, noops, and an unconditional branch?
   BasicBlock *SuccBB;
-  if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
+  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
     return nullptr;
 
+  // If there are only 2 instructions in the block, at this point,
+  // this is the call to free and unconditional.
+  // If there are more than 2 instructions, check that they are noops
+  // i.e., they won't hurt the performance of the generated code.
+  if (FreeInstrBB->size() != 2) {
+    for (const Instruction &Inst : *FreeInstrBB) {
+      if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
+        continue;
+      auto *Cast = dyn_cast<CastInst>(&Inst);
+      if (!Cast || !Cast->isNoopCast(DL))
+        return nullptr;
+    }
+  }
   // Validate the rest of constraint #1 by matching on the pred branch.
-  TerminatorInst *TI = PredBB->getTerminator();
+  Instruction *TI = PredBB->getTerminator();
   BasicBlock *TrueBB, *FalseBB;
   ICmpInst::Predicate Pred;
-  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+  if (!match(TI, m_Br(m_ICmp(Pred,
+                             m_CombineOr(m_Specific(Op),
+                                         m_Specific(Op->stripPointerCasts())),
+                             m_Zero()),
+                      TrueBB, FalseBB)))
     return nullptr;
   if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
     return nullptr;
@@ -2259,7 +2407,17 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
   assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
          "Broken CFG: missing edge from predecessor to successor");
 
-  FI.moveBefore(TI);
+  // At this point, we know that everything in FreeInstrBB can be moved
+  // before TI.
+  for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
+       It != End;) {
+    Instruction &Instr = *It++;
+    if (&Instr == FreeInstrBBTerminator)
+      break;
+    Instr.moveBefore(TI);
+  }
+  assert(FreeInstrBB->size() == 1 &&
+         "Only the branch instruction should remain");
   return &FI;
 }
 
@@ -2286,7 +2444,7 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
   // into
   // free(foo);
   if (MinimizeSize)
-    if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+    if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
       return I;
 
   return nullptr;
@@ -2379,9 +2537,11 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
 
   // Shrink the condition operand if the new type is smaller than the old type.
-  // This may produce a non-standard type for the switch, but that's ok because
-  // the backend should extend back to a legal type for the target.
-  if (NewWidth > 0 && NewWidth < Known.getBitWidth()) {
+  // But do not shrink to a non-standard type, because backend can't generate 
+  // good code for that yet.
+  // TODO: We can make it aggressive again after fixing PR39569.
+  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
+      shouldChangeType(Known.getBitWidth(), NewWidth)) {
     IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
     Builder.SetInsertPoint(&SI);
     Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
@@ -2902,7 +3062,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 
   // Cannot move control-flow-involving, volatile loads, vaarg, etc.
   if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
-      isa<TerminatorInst>(I))
+      I->isTerminator())
     return false;
 
   // Do not sink alloca instructions out of the entry block.
@@ -2934,7 +3094,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 
   // Also sink all related debug uses from the source basic block. Otherwise we
   // get debug use before the def.
-  SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+  SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
   findDbgUsers(DbgUsers, I);
   for (auto *DII : DbgUsers) {
     if (DII->getParent() == SrcBlock) {
@@ -3000,7 +3160,7 @@ bool InstCombiner::run() {
     }
 
     // See if we can trivially sink this instruction to a successor basic block.
-    if (I->hasOneUse()) {
+    if (EnableCodeSinking && I->hasOneUse()) {
       BasicBlock *BB = I->getParent();
       Instruction *UserInst = cast<Instruction>(*I->user_begin());
       BasicBlock *UserParent;
@@ -3183,7 +3343,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
 
     // Recursively visit successors.  If this is a branch or switch on a
     // constant, only visit the reachable successor.
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
       if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
         bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
@@ -3198,7 +3358,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
       }
     }
 
-    for (BasicBlock *SuccBB : TI->successors())
+    for (BasicBlock *SuccBB : successors(TI))
       Worklist.push_back(SuccBB);
   } while (!Worklist.empty());
 
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6af44354225c..f1558c75cb90 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -109,6 +109,7 @@ static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
 static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
 static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30;
 static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
 static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
 static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
 
@@ -344,10 +345,14 @@ static cl::opt<uint32_t> ClForceExperiment(
     cl::init(0));
 
 static cl::opt<bool>
-    ClUsePrivateAliasForGlobals("asan-use-private-alias",
-                                cl::desc("Use private aliases for global"
-                                         " variables"),
-                                cl::Hidden, cl::init(false));
+    ClUsePrivateAlias("asan-use-private-alias",
+                      cl::desc("Use private aliases for global variables"),
+                      cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClUseOdrIndicator("asan-use-odr-indicator",
+                      cl::desc("Use odr indicators to improve ODR reporting"),
+                      cl::Hidden, cl::init(false));
 
 static cl::opt<bool>
     ClUseGlobalsGC("asan-globals-live-support",
@@ -436,8 +441,11 @@ public:
     for (auto MDN : Globals->operands()) {
       // Metadata node contains the global and the fields of "Entry".
       assert(MDN->getNumOperands() == 5);
-      auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0));
+      auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
       // The optimizer may optimize away a global entirely.
+      if (!V) continue;
+      auto *StrippedV = V->stripPointerCasts();
+      auto *GV = dyn_cast<GlobalVariable>(StrippedV);
       if (!GV) continue;
       // We can already have an entry for GV if it was merged with another
       // global.
@@ -538,11 +546,14 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
       Mapping.Offset = kPPC64_ShadowOffset64;
     else if (IsSystemZ)
       Mapping.Offset = kSystemZ_ShadowOffset64;
-    else if (IsFreeBSD)
+    else if (IsFreeBSD && !IsMIPS64)
       Mapping.Offset = kFreeBSD_ShadowOffset64;
-    else if (IsNetBSD)
-      Mapping.Offset = kNetBSD_ShadowOffset64;
-    else if (IsPS4CPU)
+    else if (IsNetBSD) {
+      if (IsKasan)
+        Mapping.Offset = kNetBSDKasan_ShadowOffset64;
+      else
+        Mapping.Offset = kNetBSD_ShadowOffset64;
+    } else if (IsPS4CPU)
       Mapping.Offset = kPS4CPU_ShadowOffset64;
     else if (IsLinux && IsX86_64) {
       if (IsKasan)
@@ -731,9 +742,12 @@ public:
 
   explicit AddressSanitizerModule(bool CompileKernel = false,
                                   bool Recover = false,
-                                  bool UseGlobalsGC = true)
-      : ModulePass(ID),
-        UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+                                  bool UseGlobalsGC = true,
+                                  bool UseOdrIndicator = false)
+      : ModulePass(ID), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+        // Enable aliases as they should have no downside with ODR indicators.
+        UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
+        UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
         // Not a typo: ClWithComdat is almost completely pointless without
         // ClUseGlobalsGC (because then it only works on modules without
         // globals, which are rare); it is a prerequisite for ClUseGlobalsGC;
@@ -742,11 +756,10 @@ public:
         // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to
         // do globals-gc.
         UseCtorComdat(UseGlobalsGC && ClWithComdat) {
-          this->Recover = ClRecover.getNumOccurrences() > 0 ?
-              ClRecover : Recover;
-          this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ?
-              ClEnableKasan : CompileKernel;
-	}
+    this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+    this->CompileKernel =
+        ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+  }
 
   bool runOnModule(Module &M) override;
   StringRef getPassName() const override { return "AddressSanitizerModule"; }
@@ -790,6 +803,8 @@ private:
   bool CompileKernel;
   bool Recover;
   bool UseGlobalsGC;
+  bool UsePrivateAlias;
+  bool UseOdrIndicator;
   bool UseCtorComdat;
   Type *IntptrTy;
   LLVMContext *C;
@@ -990,7 +1005,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
     if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
     if (!ASan.UseAfterScope)
       return;
-    if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
+    if (!II.isLifetimeStartOrEnd())
       return;
     // Found lifetime intrinsic, add ASan instrumentation if necessary.
     ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
@@ -1089,9 +1104,11 @@ INITIALIZE_PASS(
 
 ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
                                                    bool Recover,
-                                                   bool UseGlobalsGC) {
+                                                   bool UseGlobalsGC,
+                                                   bool UseOdrIndicator) {
   assert(!CompileKernel || Recover);
-  return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC);
+  return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC,
+                                    UseOdrIndicator);
 }
 
 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1100,25 +1117,11 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
   return Res;
 }
 
-// Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
-                                                    bool AllowMerging) {
-  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
-  // We use private linkage for module-local strings. If they can be merged
-  // with another one, we set the unnamed_addr attribute.
-  GlobalVariable *GV =
-      new GlobalVariable(M, StrConst->getType(), true,
-                         GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix);
-  if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
-  return GV;
-}
-
 /// Create a global describing a source location.
 static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
                                                        LocationMetadata MD) {
   Constant *LocData[] = {
-      createPrivateGlobalForString(M, MD.Filename, true),
+      createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix),
       ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo),
       ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo),
   };
@@ -1132,6 +1135,10 @@ static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
 
 /// Check if \p G has been created by a trusted compiler pass.
 static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
+  // Do not instrument @llvm.global_ctors, @llvm.used, etc.
+  if (G->getName().startswith("llvm."))
+    return true;
+
   // Do not instrument asan globals.
   if (G->getName().startswith(kAsanGenPrefix) ||
       G->getName().startswith(kSanCovGenPrefix) ||
@@ -1379,7 +1386,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
     } else {
       IRBuilder<> IRB(I);
       Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
-      TerminatorInst *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+      Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
       InsertBefore = ThenTerm;
     }
 
@@ -1532,8 +1539,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     Value *TagCheck =
         IRB.CreateICmpEQ(Tag, ConstantInt::get(IntptrTy, kMyriadDDRTag));
 
-    TerminatorInst *TagCheckTerm = SplitBlockAndInsertIfThen(
-        TagCheck, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
+    Instruction *TagCheckTerm =
+        SplitBlockAndInsertIfThen(TagCheck, InsertBefore, false,
+                                  MDBuilder(*C).createBranchWeights(1, 100000));
     assert(cast<BranchInst>(TagCheckTerm)->isUnconditional());
     IRB.SetInsertPoint(TagCheckTerm);
     InsertBefore = TagCheckTerm;
@@ -1549,12 +1557,12 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
 
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
   size_t Granularity = 1ULL << Mapping.Scale;
-  TerminatorInst *CrashTerm = nullptr;
+  Instruction *CrashTerm = nullptr;
 
   if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
     // We use branch weights for the slow path check, to indicate that the slow
     // path is rarely taken. This seems to be the case for SPEC benchmarks.
-    TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
+    Instruction *CheckTerm = SplitBlockAndInsertIfThen(
         Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
     assert(cast<BranchInst>(CheckTerm)->isUnconditional());
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
@@ -1653,14 +1661,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
   if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
-  // Touch only those globals that will not be defined in other modules.
-  // Don't handle ODR linkage types and COMDATs since other modules may be built
-  // without ASan.
-  if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
-      G->getLinkage() != GlobalVariable::PrivateLinkage &&
-      G->getLinkage() != GlobalVariable::InternalLinkage)
-    return false;
-  if (G->hasComdat()) return false;
   // Two problems with thread-locals:
   //   - The address of the main thread's copy can't be computed at link-time.
   //   - Need to poison all copies, not just the main thread's one.
@@ -1668,6 +1668,33 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
   // For now, just ignore this Global if the alignment is large.
   if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
 
+  // For non-COFF targets, only instrument globals known to be defined by this
+  // TU.
+  // FIXME: We can instrument comdat globals on ELF if we are using the
+  // GC-friendly metadata scheme.
+  if (!TargetTriple.isOSBinFormatCOFF()) {
+    if (!G->hasExactDefinition() || G->hasComdat())
+      return false;
+  } else {
+    // On COFF, don't instrument non-ODR linkages.
+    if (G->isInterposable())
+      return false;
+  }
+
+  // If a comdat is present, it must have a selection kind that implies ODR
+  // semantics: no duplicates, any, or exact match.
+  if (Comdat *C = G->getComdat()) {
+    switch (C->getSelectionKind()) {
+    case Comdat::Any:
+    case Comdat::ExactMatch:
+    case Comdat::NoDuplicates:
+      break;
+    case Comdat::Largest:
+    case Comdat::SameSize:
+      return false;
+    }
+  }
+
   if (G->hasSection()) {
     StringRef Section = G->getSection();
 
@@ -2082,7 +2109,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
   GlobalVariable *ModuleName = createPrivateGlobalForString(
-      M, M.getModuleIdentifier(), /*AllowMerging*/ false);
+      M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
 
   for (size_t i = 0; i < n; i++) {
     static const uint64_t kMaxGlobalRedzone = 1 << 18;
@@ -2094,7 +2121,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
     // if it's available, otherwise just write the name of global variable).
     GlobalVariable *Name = createPrivateGlobalForString(
         M, MD.Name.empty() ? NameForGlobal : MD.Name,
-        /*AllowMerging*/ true);
+        /*AllowMerging*/ true, kAsanGenPrefix);
 
     Type *Ty = G->getValueType();
     uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
@@ -2121,7 +2148,12 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
         new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
                            "", G, G->getThreadLocalMode());
     NewGlobal->copyAttributesFrom(G);
+    NewGlobal->setComdat(G->getComdat());
     NewGlobal->setAlignment(MinRZ);
+    // Don't fold globals with redzones. ODR violation detector and redzone
+    // poisoning implicitly creates a dependence on the global's address, so it
+    // is no longer valid for it to be marked unnamed_addr.
+    NewGlobal->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
 
     // Move null-terminated C strings to "__asan_cstring" section on Darwin.
     if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() &&
@@ -2162,12 +2194,18 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
     bool CanUsePrivateAliases =
         TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() ||
         TargetTriple.isOSBinFormatWasm();
-    if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) {
+    if (CanUsePrivateAliases && UsePrivateAlias) {
       // Create local alias for NewGlobal to avoid crash on ODR between
       // instrumented and non-instrumented libraries.
-      auto *GA = GlobalAlias::create(GlobalValue::InternalLinkage,
-                                     NameForGlobal + M.getName(), NewGlobal);
+      InstrumentedGlobal =
+          GlobalAlias::create(GlobalValue::PrivateLinkage, "", NewGlobal);
+    }
 
+    // ODR should not happen for local linkage.
+    if (NewGlobal->hasLocalLinkage()) {
+      ODRIndicator = ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1),
+                                               IRB.getInt8PtrTy());
+    } else if (UseOdrIndicator) {
       // With local aliases, we need to provide another externally visible
       // symbol __odr_asan_XXX to detect ODR violation.
       auto *ODRIndicatorSym =
@@ -2181,7 +2219,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
       ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
       ODRIndicatorSym->setAlignment(1);
       ODRIndicator = ODRIndicatorSym;
-      InstrumentedGlobal = GA;
     }
 
     Constant *Initializer = ConstantStruct::get(
@@ -2996,7 +3033,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
       IntptrPtrTy);
   GlobalVariable *StackDescriptionGlobal =
       createPrivateGlobalForString(*F.getParent(), DescriptionString,
-                                   /*AllowMerging*/ true);
+                                   /*AllowMerging*/ true, kAsanGenPrefix);
   Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
   // Write the PC to redzone[2].
@@ -3054,7 +3091,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
       //     <This is not a fake stack; unpoison the redzones>
       Value *Cmp =
           IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy));
-      TerminatorInst *ThenTerm, *ElseTerm;
+      Instruction *ThenTerm, *ElseTerm;
       SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
 
       IRBuilder<> IRBPoison(ThenTerm);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
index cc9b149d0b6a..e178ef386e68 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -119,7 +119,7 @@ public:
     static const uint32_t CriticalEdgeMultiplier = 1000;
 
     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-      TerminatorInst *TI = BB->getTerminator();
+      Instruction *TI = BB->getTerminator();
       uint64_t BBWeight =
           (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
       uint64_t Weight = 2;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 9606b3da2475..cdcd01726906 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -88,11 +88,10 @@ void CGProfilePass::addModuleFlags(
   std::vector<Metadata *> Nodes;
 
   for (auto E : Counts) {
-    SmallVector<Metadata *, 3> Vals;
-    Vals.push_back(ValueAsMetadata::get(E.first.first));
-    Vals.push_back(ValueAsMetadata::get(E.first.second));
-    Vals.push_back(MDB.createConstant(
-        ConstantInt::get(Type::getInt64Ty(Context), E.second)));
+    Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
+                        ValueAsMetadata::get(E.first.second),
+                        MDB.createConstant(ConstantInt::get(
+                            Type::getInt64Ty(Context), E.second))};
     Nodes.push_back(MDNode::get(Context, Vals));
   }
 
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
new file mode 100644
index 000000000000..1ada0b713092
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -0,0 +1,2074 @@
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+                              cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+    "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+    cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+    "chr-merge-threshold", cl::init(2), cl::Hidden,
+    cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+    "chr-module-list", cl::init(""), cl::Hidden,
+    cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+    "chr-function-list", cl::init(""), cl::Hidden,
+    cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void parseCHRFilterFiles() {
+  if (!CHRModuleList.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+    if (!FileOrErr) {
+      errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+      std::exit(1);
+    }
+    StringRef Buf = FileOrErr->get()->getBuffer();
+    SmallVector<StringRef, 0> Lines;
+    Buf.split(Lines, '\n');
+    for (StringRef Line : Lines) {
+      Line = Line.trim();
+      if (!Line.empty())
+        CHRModules.insert(Line);
+    }
+  }
+  if (!CHRFunctionList.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+    if (!FileOrErr) {
+      errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+      std::exit(1);
+    }
+    StringRef Buf = FileOrErr->get()->getBuffer();
+    SmallVector<StringRef, 0> Lines;
+    Buf.split(Lines, '\n');
+    for (StringRef Line : Lines) {
+      Line = Line.trim();
+      if (!Line.empty())
+        CHRFunctions.insert(Line);
+    }
+  }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+    initializeControlHeightReductionLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+    parseCHRFilterFiles();
+  }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<RegionInfoPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+                      "chr",
+                      "Reduce control height in the hot paths",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+                    "chr",
+                    "Reduce control height in the hot paths",
+                    false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+  return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+  CHRStats() : NumBranches(0), NumBranchesDelta(0),
+               WeightedNumBranchesDelta(0) {}
+  void print(raw_ostream &OS) const {
+    OS << "CHRStats: NumBranches " << NumBranches
+       << " NumBranchesDelta " << NumBranchesDelta
+       << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+  }
+  uint64_t NumBranches;       // The original number of conditional branches /
+                              // selects
+  uint64_t NumBranchesDelta;  // The decrease of the number of conditional
+                              // branches / selects in the hot paths due to CHR.
+  uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+                                     // count at the scope entry.
+};
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+  RegInfo() : R(nullptr), HasBranch(false) {}
+  RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+  Region *R;
+  bool HasBranch;
+  SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+  CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+    assert(RI.R && "Null RegionIn");
+    RegInfos.push_back(RI);
+  }
+
+  Region *getParentRegion() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    Region *Parent = RegInfos[0].R->getParent();
+    assert(Parent && "Unexpected to call this on the top-level region");
+    return Parent;
+  }
+
+  BasicBlock *getEntryBlock() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    return RegInfos.front().R->getEntry();
+  }
+
+  BasicBlock *getExitBlock() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    return RegInfos.back().R->getExit();
+  }
+
+  bool appendable(CHRScope *Next) {
+    // The next scope is appendable only if this scope is directly connected to
+    // it (which implies it post-dominates this scope) and this scope dominates
+    // it (no edge to the next scope outside this scope).
+    BasicBlock *NextEntry = Next->getEntryBlock();
+    if (getExitBlock() != NextEntry)
+      // Not directly connected.
+      return false;
+    Region *LastRegion = RegInfos.back().R;
+    for (BasicBlock *Pred : predecessors(NextEntry))
+      if (!LastRegion->contains(Pred))
+        // There's an edge going into the entry of the next scope from outside
+        // of this scope.
+        return false;
+    return true;
+  }
+
+  void append(CHRScope *Next) {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+    assert(getParentRegion() == Next->getParentRegion() &&
+           "Must be siblings");
+    assert(getExitBlock() == Next->getEntryBlock() &&
+           "Must be adjacent");
+    for (RegInfo &RI : Next->RegInfos)
+      RegInfos.push_back(RI);
+    for (CHRScope *Sub : Next->Subs)
+      Subs.push_back(Sub);
+  }
+
+  void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+    bool IsChild = false;
+    for (RegInfo &RI : RegInfos)
+      if (RI.R == SubIn->getParentRegion()) {
+        IsChild = true;
+        break;
+      }
+    assert(IsChild && "Must be a child");
+#endif
+    Subs.push_back(SubIn);
+  }
+
+  // Split this scope at the boundary region into two, which will belong to the
+  // tail and returns the tail.
+  CHRScope *split(Region *Boundary) {
+    assert(Boundary && "Boundary null");
+    assert(RegInfos.begin()->R != Boundary &&
+           "Can't be split at beginning");
+    auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
+                                   [&Boundary](const RegInfo& RI) {
+                                     return Boundary == RI.R;
+                                   });
+    if (BoundaryIt == RegInfos.end())
+      return nullptr;
+    SmallVector<RegInfo, 8> TailRegInfos;
+    SmallVector<CHRScope *, 8> TailSubs;
+    TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
+    RegInfos.resize(BoundaryIt - RegInfos.begin());
+    DenseSet<Region *> TailRegionSet;
+    for (RegInfo &RI : TailRegInfos)
+      TailRegionSet.insert(RI.R);
+    for (auto It = Subs.begin(); It != Subs.end(); ) {
+      CHRScope *Sub = *It;
+      assert(Sub && "null Sub");
+      Region *Parent = Sub->getParentRegion();
+      if (TailRegionSet.count(Parent)) {
+        TailSubs.push_back(Sub);
+        It = Subs.erase(It);
+      } else {
+        assert(std::find_if(RegInfos.begin(), RegInfos.end(),
+                            [&Parent](const RegInfo& RI) {
+                              return Parent == RI.R;
+                            }) != RegInfos.end() &&
+               "Must be in head");
+        ++It;
+      }
+    }
+    assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+    return new CHRScope(TailRegInfos, TailSubs);
+  }
+
+  bool contains(Instruction *I) const {
+    BasicBlock *Parent = I->getParent();
+    for (const RegInfo &RI : RegInfos)
+      if (RI.R->contains(Parent))
+        return true;
+    return false;
+  }
+
+  void print(raw_ostream &OS) const;
+
+  SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+  SmallVector<CHRScope *, 8> Subs;  // Subscopes.
+
+  // The instruction at which to insert the CHR conditional branch (and hoist
+  // the dependent condition values).
+  Instruction *BranchInsertPoint;
+
+  // True-biased and false-biased regions (conditional blocks),
+  // respectively. Used only for the outermost scope and includes regions in
+  // subscopes. The rest are unbiased.
+  DenseSet<Region *> TrueBiasedRegions;
+  DenseSet<Region *> FalseBiasedRegions;
+  // Among the biased regions, the regions that get CHRed.
+  SmallVector<RegInfo, 8> CHRRegions;
+
+  // True-biased and false-biased selects, respectively. Used only for the
+  // outermost scope and includes ones in subscopes.
+  DenseSet<SelectInst *> TrueBiasedSelects;
+  DenseSet<SelectInst *> FalseBiasedSelects;
+
+  // Map from one of the above regions to the instructions to stop
+  // hoisting instructions at through use-def chains.
+  HoistStopMapTy HoistStopMap;
+
+ private:
+  CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
+           SmallVector<CHRScope *, 8> &SubsIn)
+    : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
+};
+
+class CHR {
+ public:
+  CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+      ProfileSummaryInfo &PSIin, RegionInfo &RIin,
+      OptimizationRemarkEmitter &OREin)
+      : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
+
+  ~CHR() {
+    for (CHRScope *Scope : Scopes) {
+      delete Scope;
+    }
+  }
+
+  bool run();
+
+ private:
+  // See the comments in CHR::run() for the high level flow of the algorithm and
+  // what the following functions do.
+
+  void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+    Region *R = RI.getTopLevelRegion();
+    CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
+    if (Scope) {
+      Output.push_back(Scope);
+    }
+  }
+  CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+                        SmallVectorImpl<CHRScope *> &Scopes);
+  CHRScope *findScope(Region *R);
+  void checkScopeHoistable(CHRScope *Scope);
+
+  void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+                   SmallVectorImpl<CHRScope *> &Output);
+  SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+                                        CHRScope *Outer,
+                                        DenseSet<Value *> *OuterConditionValues,
+                                        Instruction *OuterInsertPoint,
+                                        SmallVectorImpl<CHRScope *> &Output,
+                                        DenseSet<Instruction *> &Unhoistables);
+
+  void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+  void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+  void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+                    SmallVectorImpl<CHRScope *> &Output);
+
+  void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+                     SmallVectorImpl<CHRScope *> &Output);
+  void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+  void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+                  SmallVectorImpl<CHRScope *> &Output);
+
+  void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+  void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+  void cloneScopeBlocks(CHRScope *Scope,
+                        BasicBlock *PreEntryBlock,
+                        BasicBlock *ExitBlock,
+                        Region *LastRegion,
+                        ValueToValueMapTy &VMap);
+  BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+                                 BasicBlock *EntryBlock,
+                                 BasicBlock *NewEntryBlock,
+                                 ValueToValueMapTy &VMap);
+  void fixupBranchesAndSelects(CHRScope *Scope,
+                               BasicBlock *PreEntryBlock,
+                               BranchInst *MergedBR,
+                               uint64_t ProfileCount);
+  void fixupBranch(Region *R,
+                   CHRScope *Scope,
+                   IRBuilder<> &IRB,
+                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
+  void fixupSelect(SelectInst* SI,
+                   CHRScope *Scope,
+                   IRBuilder<> &IRB,
+                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
+  void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+                            Instruction *BranchOrSelect,
+                            CHRScope *Scope,
+                            IRBuilder<> &IRB,
+                            Value *&MergedCondition);
+
+  Function &F;
+  BlockFrequencyInfo &BFI;
+  DominatorTree &DT;
+  ProfileSummaryInfo &PSI;
+  RegionInfo &RI;
+  OptimizationRemarkEmitter &ORE;
+  CHRStats Stats;
+
+  // All the true-biased regions in the function
+  DenseSet<Region *> TrueBiasedRegionsGlobal;
+  // All the false-biased regions in the function
+  DenseSet<Region *> FalseBiasedRegionsGlobal;
+  // All the true-biased selects in the function
+  DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+  // All the false-biased selects in the function
+  DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+  // A map from biased regions to their branch bias
+  DenseMap<Region *, BranchProbability> BranchBiasMap;
+  // A map from biased selects to their branch bias
+  DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+  // All the scopes.
+  DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static inline
+raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
+                                              const CHRStats &Stats) {
+  Stats.print(OS);
+  return OS;
+}
+
+static inline
+raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+  Scope.print(OS);
+  return OS;
+}
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+  if (ForceCHR)
+    return true;
+
+  if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+    if (CHRModules.count(F.getParent()->getName()))
+      return true;
+    return CHRFunctions.count(F.getName());
+  }
+
+  assert(PSI.hasProfileSummary() && "Empty PSI?");
+  return PSI.isFunctionEntryHot(&F);
+}
+
+static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
+                                         CHRStats *Stats) {
+  StringRef FuncName = F.getName();
+  StringRef ModuleName = F.getParent()->getName();
+  (void)(FuncName); // Unused in release build.
+  (void)(ModuleName); // Unused in release build.
+  CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+            << FuncName);
+  if (Stats)
+    CHR_DEBUG(dbgs() << " " << *Stats);
+  CHR_DEBUG(dbgs() << "\n");
+  CHR_DEBUG(F.dump());
+}
+
+void CHRScope::print(raw_ostream &OS) const {
+  assert(RegInfos.size() > 0 && "Empty CHRScope");
+  OS << "CHRScope[";
+  OS << RegInfos.size() << ", Regions[";
+  for (const RegInfo &RI : RegInfos) {
+    OS << RI.R->getNameStr();
+    if (RI.HasBranch)
+      OS << " B";
+    if (RI.Selects.size() > 0)
+      OS << " S" << RI.Selects.size();
+    OS << ", ";
+  }
+  if (RegInfos[0].R->getParent()) {
+    OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+  } else {
+    // top level region
+    OS << "]";
+  }
+  OS << ", Subs[";
+  for (CHRScope *Sub : Subs) {
+    OS << *Sub << ", ";
+  }
+  OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+  return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+      isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+      isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+      isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+      isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+  if (!isHoistableInstructionType(I))
+    return false;
+  return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static std::set<Value *> getBaseValues(Value *V,
+                                       DominatorTree &DT) {
+  std::set<Value *> Result;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    // We don't stop at a block that's not in the Scope because we would miss some
+    // instructions that are based on the same base values if we stop there.
+    if (!isHoistable(I, DT)) {
+      Result.insert(I);
+      return Result;
+    }
+    // I is hoistable above the Scope.
+    for (Value *Op : I->operands()) {
+      std::set<Value *> OpResult = getBaseValues(Op, DT);
+      Result.insert(OpResult.begin(), OpResult.end());
+    }
+    return Result;
+  }
+  if (isa<Argument>(V)) {
+    Result.insert(V);
+    return Result;
+  }
+  // We don't include others like constants because those won't lead to any
+  // chance of folding of conditions (eg two bit checks merged into one check)
+  // after CHR.
+  return Result;  // empty
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+                DenseSet<Instruction *> &Unhoistables,
+                DenseSet<Instruction *> *HoistStops) {
+  assert(InsertPoint && "Null InsertPoint");
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+    assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+    if (Unhoistables.count(I)) {
+      // Don't hoist if they are not to be hoisted.
+      return false;
+    }
+    if (DT.dominates(I, InsertPoint)) {
+      // We are already above the insert point. Stop here.
+      if (HoistStops)
+        HoistStops->insert(I);
+      return true;
+    }
+    // We aren't not above the insert point, check if we can hoist it above the
+    // insert point.
+    if (isHoistable(I, DT)) {
+      // Check operands first.
+      DenseSet<Instruction *> OpsHoistStops;
+      bool AllOpsHoisted = true;
+      for (Value *Op : I->operands()) {
+        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+          AllOpsHoisted = false;
+          break;
+        }
+      }
+      if (AllOpsHoisted) {
+        CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+        if (HoistStops)
+          HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+        return true;
+      }
+    }
+    return false;
+  }
+  // Non-instructions are considered hoistable.
+  return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
+                        BranchProbability &FalseProb) {
+  if (!MD) return false;
+  MDString *MDName = cast<MDString>(MD->getOperand(0));
+  if (MDName->getString() != "branch_weights" ||
+      MD->getNumOperands() != 3)
+    return false;
+  ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+  ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+  if (!TrueWeight || !FalseWeight)
+    return false;
+  uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
+  uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
+  uint64_t SumWt = TrueWt + FalseWt;
+
+  assert(SumWt >= TrueWt && SumWt >= FalseWt &&
+         "Overflow calculating branch probabilities.");
+
+  TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
+  FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
+  return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+  return BranchProbability::getBranchProbability(
+      static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template <typename K, typename S, typename M>
+static bool checkBias(K *Key, BranchProbability TrueProb,
+                      BranchProbability FalseProb, S &TrueSet, S &FalseSet,
+                      M &BiasMap) {
+  BranchProbability Threshold = getCHRBiasThreshold();
+  if (TrueProb >= Threshold) {
+    TrueSet.insert(Key);
+    BiasMap[Key] = TrueProb;
+    return true;
+  } else if (FalseProb >= Threshold) {
+    FalseSet.insert(Key);
+    BiasMap[Key] = FalseProb;
+    return true;
+  }
+  return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool checkBiasedBranch(BranchInst *BI, Region *R,
+                              DenseSet<Region *> &TrueBiasedRegionsGlobal,
+                              DenseSet<Region *> &FalseBiasedRegionsGlobal,
+                              DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+  if (!BI->isConditional())
+    return false;
+  BranchProbability ThenProb, ElseProb;
+  if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
+                   ThenProb, ElseProb))
+    return false;
+  BasicBlock *IfThen = BI->getSuccessor(0);
+  BasicBlock *IfElse = BI->getSuccessor(1);
+  assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+         IfThen != IfElse &&
+         "Invariant from findScopes");
+  if (IfThen == R->getExit()) {
+    // Swap them so that IfThen/ThenProb means going into the conditional code
+    // and IfElse/ElseProb means skipping it.
+    std::swap(IfThen, IfElse);
+    std::swap(ThenProb, ElseProb);
+  }
+  CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+  CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+  CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+  return checkBias(R, ThenProb, ElseProb,
+                   TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+                   BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool checkBiasedSelect(
+    SelectInst *SI, Region *R,
+    DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+    DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+    DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+  BranchProbability TrueProb, FalseProb;
+  if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
+                   TrueProb, FalseProb))
+    return false;
+  CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+  CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+  CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+  return checkBias(SI, TrueProb, FalseProb,
+                   TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+                   SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+  Region *R = RI.R;
+  BasicBlock *EntryBB = R->getEntry();
+  // The hoist point is by default the terminator of the entry block, which is
+  // the same as the branch instruction if RI.HasBranch is true.
+  Instruction *HoistPoint = EntryBB->getTerminator();
+  for (SelectInst *SI : RI.Selects) {
+    if (SI->getParent() == EntryBB) {
+      // Pick the first select in Selects in the entry block.  Note Selects is
+      // sorted in the instruction order within a block (asserted below).
+      HoistPoint = SI;
+      break;
+    }
+  }
+  assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+  // Check that HoistPoint is the first one in Selects in the entry block,
+  // if any.
+  DenseSet<Instruction *> EntryBlockSelectSet;
+  for (SelectInst *SI : RI.Selects) {
+    if (SI->getParent() == EntryBB) {
+      EntryBlockSelectSet.insert(SI);
+    }
+  }
+  for (Instruction &I : *EntryBB) {
+    if (EntryBlockSelectSet.count(&I) > 0) {
+      assert(&I == HoistPoint &&
+             "HoistPoint must be the first one in Selects");
+      break;
+    }
+  }
+#endif
+  return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+  CHRScope *Result = nullptr;
+  BasicBlock *Entry = R->getEntry();
+  BasicBlock *Exit = R->getExit();  // null if top level.
+  assert(Entry && "Entry must not be null");
+  assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+         "Only top level region has a null exit");
+  if (Entry)
+    CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+  else
+    CHR_DEBUG(dbgs() << "Entry null\n");
+  if (Exit)
+    CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+  else
+    CHR_DEBUG(dbgs() << "Exit null\n");
+  // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+  // to this region).
+  bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+  if (EntryInSubregion)
+    return nullptr;
+  // Exclude loops
+  for (BasicBlock *Pred : predecessors(Entry))
+    if (R->contains(Pred))
+      return nullptr;
+  if (Exit) {
+    // Try to find an if-then block (check if R is an if-then).
+    // if (cond) {
+    //  ...
+    // }
+    auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+    if (BI)
+      CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+    else
+      CHR_DEBUG(dbgs() << "BI null\n");
+    if (BI && BI->isConditional()) {
+      BasicBlock *S0 = BI->getSuccessor(0);
+      BasicBlock *S1 = BI->getSuccessor(1);
+      CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+      CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+      if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+        RegInfo RI(R);
+        RI.HasBranch = checkBiasedBranch(
+            BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+            BranchBiasMap);
+        Result = new CHRScope(RI);
+        Scopes.insert(Result);
+        CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+        ++Stats.NumBranches;
+        if (!RI.HasBranch) {
+          ORE.emit([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
+                << "Branch not biased";
+          });
+        }
+      }
+    }
+  }
+  {
+    // Try to look for selects in the direct child blocks (as opposed to in
+    // subregions) of R.
+    // ...
+    // if (..) { // Some subregion
+    //   ...
+    // }
+    // if (..) { // Some subregion
+    //   ...
+    // }
+    // ...
+    // a = cond ? b : c;
+    // ...
+    SmallVector<SelectInst *, 8> Selects;
+    for (RegionNode *E : R->elements()) {
+      if (E->isSubRegion())
+        continue;
+      // This returns the basic block of E if E is a direct child of R (not a
+      // subregion.)
+      BasicBlock *BB = E->getEntry();
+      // Need to push in the order to make it easier to find the first Select
+      // later.
+      for (Instruction &I : *BB) {
+        if (auto *SI = dyn_cast<SelectInst>(&I)) {
+          Selects.push_back(SI);
+          ++Stats.NumBranches;
+        }
+      }
+    }
+    if (Selects.size() > 0) {
+      auto AddSelects = [&](RegInfo &RI) {
+        for (auto *SI : Selects)
+          if (checkBiasedSelect(SI, RI.R,
+                                TrueBiasedSelectsGlobal,
+                                FalseBiasedSelectsGlobal,
+                                SelectBiasMap))
+            RI.Selects.push_back(SI);
+          else
+            ORE.emit([&]() {
+              return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
+                  << "Select not biased";
+            });
+      };
+      if (!Result) {
+        CHR_DEBUG(dbgs() << "Found a select-only region\n");
+        RegInfo RI(R);
+        AddSelects(RI);
+        Result = new CHRScope(RI);
+        Scopes.insert(Result);
+      } else {
+        CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+        AddSelects(Result->RegInfos[0]);
+      }
+    }
+  }
+
+  if (Result) {
+    checkScopeHoistable(Result);
+  }
+  return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+//   ...
+//   c4 = foo() // A call.
+//   g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+  RegInfo &RI = Scope->RegInfos[0];
+  Region *R = RI.R;
+  BasicBlock *EntryBB = R->getEntry();
+  auto *Branch = RI.HasBranch ?
+                 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+  SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+  if (RI.HasBranch || !Selects.empty()) {
+    Instruction *InsertPoint = getBranchInsertPoint(RI);
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+    // Avoid a data dependence from a select or a branch to a(nother)
+    // select. Note no instruction can't data-depend on a branch (a branch
+    // instruction doesn't produce a value).
+    DenseSet<Instruction *> Unhoistables;
+    // Initialize Unhoistables with the selects.
+    for (SelectInst *SI : Selects) {
+      Unhoistables.insert(SI);
+    }
+    // Remove Selects that can't be hoisted.
+    for (auto it = Selects.begin(); it != Selects.end(); ) {
+      SelectInst *SI = *it;
+      if (SI == InsertPoint) {
+        ++it;
+        continue;
+      }
+      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+                                         DT, Unhoistables, nullptr);
+      if (!IsHoistable) {
+        CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+        ORE.emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE,
+                                          "DropUnhoistableSelect", SI)
+              << "Dropped unhoistable select";
+        });
+        it = Selects.erase(it);
+        // Since we are dropping the select here, we also drop it from
+        // Unhoistables.
+        Unhoistables.erase(SI);
+      } else
+        ++it;
+    }
+    // Update InsertPoint after potentially removing selects.
+    InsertPoint = getBranchInsertPoint(RI);
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+    if (RI.HasBranch && InsertPoint != Branch) {
+      bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+                                         DT, Unhoistables, nullptr);
+      if (!IsHoistable) {
+        // If the branch isn't hoistable, drop the selects in the entry
+        // block, preferring the branch, which makes the branch the hoist
+        // point.
+        assert(InsertPoint != Branch && "Branch must not be the hoist point");
+        CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+        CHR_DEBUG(
+            for (SelectInst *SI : Selects) {
+              dbgs() << "SI " << *SI << "\n";
+            });
+        for (SelectInst *SI : Selects) {
+          ORE.emit([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE,
+                                            "DropSelectUnhoistableBranch", SI)
+                << "Dropped select due to unhoistable branch";
+          });
+        }
+        Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
+                                     [EntryBB](SelectInst *SI) {
+                                       return SI->getParent() == EntryBB;
+                                     }), Selects.end());
+        Unhoistables.clear();
+        InsertPoint = Branch;
+      }
+    }
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+    if (RI.HasBranch) {
+      assert(!DT.dominates(Branch, InsertPoint) &&
+             "Branch can't be already above the hoist point");
+      assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+                             DT, Unhoistables, nullptr) &&
+             "checkHoistValue for branch");
+    }
+    for (auto *SI : Selects) {
+      assert(!DT.dominates(SI, InsertPoint) &&
+             "SI can't be already above the hoist point");
+      assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+                             Unhoistables, nullptr) &&
+             "checkHoistValue for selects");
+    }
+    CHR_DEBUG(dbgs() << "Result\n");
+    if (RI.HasBranch) {
+      CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+    }
+    for (auto *SI : Selects) {
+      CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+    }
+#endif
+  }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+                           SmallVectorImpl<CHRScope *> &Scopes) {
+  CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+  CHRScope *Result = findScope(R);
+  // Visit subscopes.
+  CHRScope *ConsecutiveSubscope = nullptr;
+  SmallVector<CHRScope *, 8> Subscopes;
+  for (auto It = R->begin(); It != R->end(); ++It) {
+    const std::unique_ptr<Region> &SubR = *It;
+    auto NextIt = std::next(It);
+    Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
+    CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+              << "\n");
+    CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+    if (SubCHRScope) {
+      CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+    } else {
+      CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+    }
+    if (SubCHRScope) {
+      if (!ConsecutiveSubscope)
+        ConsecutiveSubscope = SubCHRScope;
+      else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+        Subscopes.push_back(ConsecutiveSubscope);
+        ConsecutiveSubscope = SubCHRScope;
+      } else
+        ConsecutiveSubscope->append(SubCHRScope);
+    } else {
+      if (ConsecutiveSubscope) {
+        Subscopes.push_back(ConsecutiveSubscope);
+      }
+      ConsecutiveSubscope = nullptr;
+    }
+  }
+  if (ConsecutiveSubscope) {
+    Subscopes.push_back(ConsecutiveSubscope);
+  }
+  for (CHRScope *Sub : Subscopes) {
+    if (Result) {
+      // Combine it with the parent.
+      Result->addSub(Sub);
+    } else {
+      // Push Subscopes as they won't be combined with the parent.
+      Scopes.push_back(Sub);
+    }
+  }
+  return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+  DenseSet<Value *> ConditionValues;
+  if (RI.HasBranch) {
+    auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+    ConditionValues.insert(BI->getCondition());
+  }
+  for (SelectInst *SI : RI.Selects) {
+    ConditionValues.insert(SI->getCondition());
+  }
+  return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+                        DenseSet<Value *> &PrevConditionValues,
+                        DenseSet<Value *> &ConditionValues,
+                        DominatorTree &DT,
+                        DenseSet<Instruction *> &Unhoistables) {
+  CHR_DEBUG(
+      dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+      for (Value *V : PrevConditionValues) {
+        dbgs() << *V << ", ";
+      }
+      dbgs() << " ConditionValues ";
+      for (Value *V : ConditionValues) {
+        dbgs() << *V << ", ";
+      }
+      dbgs() << "\n");
+  assert(InsertPoint && "Null InsertPoint");
+  // If any of Bases isn't hoistable to the hoist point, split.
+  for (Value *V : ConditionValues) {
+    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+      CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+      return true; // Not hoistable, split.
+    }
+  }
+  // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+  // unnecessary splits at scopes with no branch/selects.  If
+  // PrevConditionValues and ConditionValues don't intersect at all, split.
+  if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+    // Use std::set as DenseSet doesn't work with set_intersection.
+    std::set<Value *> PrevBases, Bases;
+    for (Value *V : PrevConditionValues) {
+      std::set<Value *> BaseValues = getBaseValues(V, DT);
+      PrevBases.insert(BaseValues.begin(), BaseValues.end());
+    }
+    for (Value *V : ConditionValues) {
+      std::set<Value *> BaseValues = getBaseValues(V, DT);
+      Bases.insert(BaseValues.begin(), BaseValues.end());
+    }
+    CHR_DEBUG(
+        dbgs() << "PrevBases ";
+        for (Value *V : PrevBases) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << " Bases ";
+        for (Value *V : Bases) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << "\n");
+    std::set<Value *> Intersection;
+    std::set_intersection(PrevBases.begin(), PrevBases.end(),
+                          Bases.begin(), Bases.end(),
+                          std::inserter(Intersection, Intersection.begin()));
+    if (Intersection.empty()) {
+      // Empty intersection, split.
+      CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+      return true;
+    }
+  }
+  CHR_DEBUG(dbgs() << "No split\n");
+  return false;  // Don't split.
+}
+
+static void getSelectsInScope(CHRScope *Scope,
+                              DenseSet<Instruction *> &Output) {
+  for (RegInfo &RI : Scope->RegInfos)
+    for (SelectInst *SI : RI.Selects)
+      Output.insert(SI);
+  for (CHRScope *Sub : Scope->Subs)
+    getSelectsInScope(Sub, Output);
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+                      SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    assert(!Scope->BranchInsertPoint &&
+           "BranchInsertPoint must not be set");
+    DenseSet<Instruction *> Unhoistables;
+    getSelectsInScope(Scope, Unhoistables);
+    splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+  }
+#ifndef NDEBUG
+  for (CHRScope *Scope : Output) {
+    assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+  }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+    CHRScope *Scope,
+    CHRScope *Outer,
+    DenseSet<Value *> *OuterConditionValues,
+    Instruction *OuterInsertPoint,
+    SmallVectorImpl<CHRScope *> &Output,
+    DenseSet<Instruction *> &Unhoistables) {
+  if (Outer) {
+    assert(OuterConditionValues && "Null OuterConditionValues");
+    assert(OuterInsertPoint && "Null OuterInsertPoint");
+  }
+  bool PrevSplitFromOuter = true;
+  DenseSet<Value *> PrevConditionValues;
+  Instruction *PrevInsertPoint = nullptr;
+  SmallVector<CHRScope *, 8> Splits;
+  SmallVector<bool, 8> SplitsSplitFromOuter;
+  SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+  SmallVector<Instruction *, 8> SplitsInsertPoints;
+  SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
+  for (RegInfo &RI : RegInfos) {
+    Instruction *InsertPoint = getBranchInsertPoint(RI);
+    DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+    CHR_DEBUG(
+        dbgs() << "ConditionValues ";
+        for (Value *V : ConditionValues) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << "\n");
+    if (RI.R == RegInfos[0].R) {
+      // First iteration. Check to see if we should split from the outer.
+      if (Outer) {
+        CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+        CHR_DEBUG(dbgs() << "Should split from outer at "
+                  << RI.R->getNameStr() << "\n");
+        if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+                        ConditionValues, DT, Unhoistables)) {
+          PrevConditionValues = ConditionValues;
+          PrevInsertPoint = InsertPoint;
+          ORE.emit([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE,
+                                            "SplitScopeFromOuter",
+                                            RI.R->getEntry()->getTerminator())
+                << "Split scope from outer due to unhoistable branch/select "
+                << "and/or lack of common condition values";
+          });
+        } else {
+          // Not splitting from the outer. Use the outer bases and insert
+          // point. Union the bases.
+          PrevSplitFromOuter = false;
+          PrevConditionValues = *OuterConditionValues;
+          PrevConditionValues.insert(ConditionValues.begin(),
+                                     ConditionValues.end());
+          PrevInsertPoint = OuterInsertPoint;
+        }
+      } else {
+        CHR_DEBUG(dbgs() << "Outer null\n");
+        PrevConditionValues = ConditionValues;
+        PrevInsertPoint = InsertPoint;
+      }
+    } else {
+      CHR_DEBUG(dbgs() << "Should split from prev at "
+                << RI.R->getNameStr() << "\n");
+      if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+                      DT, Unhoistables)) {
+        CHRScope *Tail = Scope->split(RI.R);
+        Scopes.insert(Tail);
+        Splits.push_back(Scope);
+        SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+        SplitsConditionValues.push_back(PrevConditionValues);
+        SplitsInsertPoints.push_back(PrevInsertPoint);
+        Scope = Tail;
+        PrevConditionValues = ConditionValues;
+        PrevInsertPoint = InsertPoint;
+        PrevSplitFromOuter = true;
+        ORE.emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE,
+                                          "SplitScopeFromPrev",
+                                          RI.R->getEntry()->getTerminator())
+              << "Split scope from previous due to unhoistable branch/select "
+              << "and/or lack of common condition values";
+        });
+      } else {
+        // Not splitting. Union the bases. Keep the hoist point.
+        PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+      }
+    }
+  }
+  Splits.push_back(Scope);
+  SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+  SplitsConditionValues.push_back(PrevConditionValues);
+  assert(PrevInsertPoint && "Null PrevInsertPoint");
+  SplitsInsertPoints.push_back(PrevInsertPoint);
+  assert(Splits.size() == SplitsConditionValues.size() &&
+         Splits.size() == SplitsSplitFromOuter.size() &&
+         Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+  for (size_t I = 0; I < Splits.size(); ++I) {
+    CHRScope *Split = Splits[I];
+    DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+    Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+    SmallVector<CHRScope *, 8> NewSubs;
+    DenseSet<Instruction *> SplitUnhoistables;
+    getSelectsInScope(Split, SplitUnhoistables);
+    for (CHRScope *Sub : Split->Subs) {
+      SmallVector<CHRScope *, 8> SubSplits = splitScope(
+          Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+          SplitUnhoistables);
+      NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+    }
+    Split->Subs = NewSubs;
+  }
+  SmallVector<CHRScope *, 8> Result;
+  for (size_t I = 0; I < Splits.size(); ++I) {
+    CHRScope *Split = Splits[I];
+    if (SplitsSplitFromOuter[I]) {
+      // Split from the outer.
+      Output.push_back(Split);
+      Split->BranchInsertPoint = SplitsInsertPoints[I];
+      CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+                << "\n");
+    } else {
+      // Connected to the outer.
+      Result.push_back(Split);
+    }
+  }
+  if (!Outer)
+    assert(Result.empty() &&
+           "If no outer (top-level), must return no nested ones");
+  return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+  for (CHRScope *Scope : Scopes) {
+    assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+    classifyBiasedScopes(Scope, Scope);
+    CHR_DEBUG(
+        dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+        dbgs() << "TrueBiasedRegions ";
+        for (Region *R : Scope->TrueBiasedRegions) {
+          dbgs() << R->getNameStr() << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "FalseBiasedRegions ";
+        for (Region *R : Scope->FalseBiasedRegions) {
+          dbgs() << R->getNameStr() << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "TrueBiasedSelects ";
+        for (SelectInst *SI : Scope->TrueBiasedSelects) {
+          dbgs() << *SI << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "FalseBiasedSelects ";
+        for (SelectInst *SI : Scope->FalseBiasedSelects) {
+          dbgs() << *SI << ", ";
+        }
+        dbgs() << "\n";);
+  }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+  for (RegInfo &RI : Scope->RegInfos) {
+    if (RI.HasBranch) {
+      Region *R = RI.R;
+      if (TrueBiasedRegionsGlobal.count(R) > 0)
+        OutermostScope->TrueBiasedRegions.insert(R);
+      else if (FalseBiasedRegionsGlobal.count(R) > 0)
+        OutermostScope->FalseBiasedRegions.insert(R);
+      else
+        llvm_unreachable("Must be biased");
+    }
+    for (SelectInst *SI : RI.Selects) {
+      if (TrueBiasedSelectsGlobal.count(SI) > 0)
+        OutermostScope->TrueBiasedSelects.insert(SI);
+      else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+        OutermostScope->FalseBiasedSelects.insert(SI);
+      else
+        llvm_unreachable("Must be biased");
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs) {
+    classifyBiasedScopes(Sub, OutermostScope);
+  }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+  unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+                       Scope->FalseBiasedRegions.size() +
+                       Scope->TrueBiasedSelects.size() +
+                       Scope->FalseBiasedSelects.size();
+  return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+                       SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    // Filter out the ones with only one region and no subs.
+    if (!hasAtLeastTwoBiasedBranches(Scope)) {
+      CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+                << Scope->TrueBiasedRegions.size()
+                << " falsy-regions " << Scope->FalseBiasedRegions.size()
+                << " true-selects " << Scope->TrueBiasedSelects.size()
+                << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(
+            DEBUG_TYPE,
+            "DropScopeWithOneBranchOrSelect",
+            Scope->RegInfos[0].R->getEntry()->getTerminator())
+            << "Drop scope with < "
+            << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
+            << " biased branch(es) or select(s)";
+      });
+      continue;
+    }
+    Output.push_back(Scope);
+  }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+                        SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+           "Empty");
+    setCHRRegions(Scope, Scope);
+    Output.push_back(Scope);
+    CHR_DEBUG(
+        dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+        for (auto pair : Scope->HoistStopMap) {
+          Region *R = pair.first;
+          dbgs() << "Region " << R->getNameStr() << "\n";
+          for (Instruction *I : pair.second) {
+            dbgs() << "HoistStop " << *I << "\n";
+          }
+        }
+        dbgs() << "CHRRegions" << "\n";
+        for (RegInfo &RI : Scope->CHRRegions) {
+          dbgs() << RI.R->getNameStr() << "\n";
+        });
+  }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+  DenseSet<Instruction *> Unhoistables;
+  // Put the biased selects in Unhoistables because they should stay where they
+  // are and constant-folded after CHR (in case one biased select or a branch
+  // can depend on another biased select.)
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (SelectInst *SI : RI.Selects) {
+      Unhoistables.insert(SI);
+    }
+  }
+  Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+  for (RegInfo &RI : Scope->RegInfos) {
+    Region *R = RI.R;
+    DenseSet<Instruction *> HoistStops;
+    bool IsHoisted = false;
+    if (RI.HasBranch) {
+      assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
+              OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+             "Must be truthy or falsy");
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      // Note checkHoistValue fills in HoistStops.
+      bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+                                         Unhoistables, &HoistStops);
+      assert(IsHoistable && "Must be hoistable");
+      (void)(IsHoistable);  // Unused in release build
+      IsHoisted = true;
+    }
+    for (SelectInst *SI : RI.Selects) {
+      assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
+              OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+             "Must be true or false biased");
+      // Note checkHoistValue fills in HoistStops.
+      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+                                         Unhoistables, &HoistStops);
+      assert(IsHoistable && "Must be hoistable");
+      (void)(IsHoistable);  // Unused in release build
+      IsHoisted = true;
+    }
+    if (IsHoisted) {
+      OutermostScope->CHRRegions.push_back(RI);
+      OutermostScope->HoistStopMap[R] = HoistStops;
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs)
+    setCHRRegions(Sub, OutermostScope);
+}
+
+bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+  return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+                     SmallVectorImpl<CHRScope *> &Output) {
+  Output.resize(Input.size());
+  llvm::copy(Input, Output.begin());
+  std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+                       HoistStopMapTy &HoistStopMap,
+                       DenseSet<Instruction *> &HoistedSet,
+                       DenseSet<PHINode *> &TrivialPHIs) {
+  auto IT = HoistStopMap.find(R);
+  assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+  DenseSet<Instruction *> &HoistStops = IT->second;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    if (I == HoistPoint)
+      return;
+    if (HoistStops.count(I))
+      return;
+    if (auto *PN = dyn_cast<PHINode>(I))
+      if (TrivialPHIs.count(PN))
+        // The trivial phi inserted by the previous CHR scope could replace a
+        // non-phi in HoistStops. Note that since this phi is at the exit of a
+        // previous CHR scope, which dominates this scope, it's safe to stop
+        // hoisting there.
+        return;
+    if (HoistedSet.count(I))
+      // Already hoisted, return.
+      return;
+    assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+    for (Value *Op : I->operands()) {
+      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+    }
+    I->moveBefore(HoistPoint);
+    HoistedSet.insert(I);
+    CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+  }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+                                 DenseSet<PHINode *> &TrivialPHIs) {
+  DenseSet<Instruction *> HoistedSet;
+  for (const RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+                 HoistedSet, TrivialPHIs);
+    }
+    for (SelectInst *SI : RI.Selects) {
+      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+      if (!(IsTrueBiased || IsFalseBiased))
+        continue;
+      hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+                 HoistedSet, TrivialPHIs);
+    }
+  }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+                                                 Instruction *ExcludedUser,
+                                                 CHRScope *Scope) {
+  for (User *U : ICmp->users()) {
+    if (U == ExcludedUser)
+      continue;
+    if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+      continue;
+    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+      continue;
+    return false;
+  }
+  for (User *U : ICmp->users()) {
+    if (U == ExcludedUser)
+      continue;
+    if (auto *BI = dyn_cast<BranchInst>(U)) {
+      assert(BI->isConditional() && "Must be conditional");
+      BI->swapSuccessors();
+      // Don't need to swap this in terms of
+      // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+      // mean whehter the branch is likely go into the if-then rather than
+      // successor0/successor1 and because we can tell which edge is the then or
+      // the else one by comparing the destination to the region exit block.
+      continue;
+    }
+    if (auto *SI = dyn_cast<SelectInst>(U)) {
+      // Swap operands
+      Value *TrueValue = SI->getTrueValue();
+      Value *FalseValue = SI->getFalseValue();
+      SI->setTrueValue(FalseValue);
+      SI->setFalseValue(TrueValue);
+      SI->swapProfMetadata();
+      if (Scope->TrueBiasedSelects.count(SI)) {
+        assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+               "Must not be already in");
+        Scope->FalseBiasedSelects.insert(SI);
+      } else if (Scope->FalseBiasedSelects.count(SI)) {
+        assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+               "Must not be already in");
+        Scope->TrueBiasedSelects.insert(SI);
+      }
+      continue;
+    }
+    llvm_unreachable("Must be a branch or a select");
+  }
+  ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+  return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+                              BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+                              DenseSet<PHINode *> &TrivialPHIs) {
+  DenseSet<BasicBlock *> BlocksInScopeSet;
+  SmallVector<BasicBlock *, 8> BlocksInScopeVec;
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+                                            // sub-Scopes.
+      BlocksInScopeSet.insert(BB);
+      BlocksInScopeVec.push_back(BB);
+    }
+  }
+  CHR_DEBUG(
+      dbgs() << "Inserting redudant phis\n";
+      for (BasicBlock *BB : BlocksInScopeVec) {
+        dbgs() << "BlockInScope " << BB->getName() << "\n";
+      });
+  for (BasicBlock *BB : BlocksInScopeVec) {
+    for (Instruction &I : *BB) {
+      SmallVector<Instruction *, 8> Users;
+      for (User *U : I.users()) {
+        if (auto *UI = dyn_cast<Instruction>(U)) {
+          if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
+              // Unless there's already a phi for I at the exit block.
+              !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+            CHR_DEBUG(dbgs() << "V " << I << "\n");
+            CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+            Users.push_back(UI);
+          } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+            // There's a loop backedge from a block that's dominated by this
+            // scope to the entry block.
+            CHR_DEBUG(dbgs() << "V " << I << "\n");
+            CHR_DEBUG(dbgs()
+                      << "Used at entry block (for a back edge) by a phi user "
+                      << *UI << "\n");
+            Users.push_back(UI);
+          }
+        }
+      }
+      if (Users.size() > 0) {
+        // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+        // ExitBlock. Replace I with the new phi in UI unless UI is another
+        // phi at ExitBlock.
+        unsigned PredCount = std::distance(pred_begin(ExitBlock),
+                                           pred_end(ExitBlock));
+        PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+                                      &ExitBlock->front());
+        for (BasicBlock *Pred : predecessors(ExitBlock)) {
+          PN->addIncoming(&I, Pred);
+        }
+        TrivialPHIs.insert(PN);
+        CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+        for (Instruction *UI : Users) {
+          for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+            if (UI->getOperand(J) == &I) {
+              UI->setOperand(J, PN);
+            }
+          }
+          CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+        }
+      }
+    }
+  }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void LLVM_ATTRIBUTE_UNUSED
+assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+  auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+    if (Scope->TrueBiasedRegions.count(RI.R) ||
+        Scope->FalseBiasedRegions.count(RI.R))
+      return true;
+    for (SelectInst *SI : RI.Selects)
+      if (Scope->TrueBiasedSelects.count(SI) ||
+          Scope->FalseBiasedSelects.count(SI))
+        return true;
+    return false;
+  };
+  for (RegInfo &RI : Scope->CHRRegions) {
+    assert(HasBiasedBranchOrSelect(RI, Scope) &&
+           "Must have biased branch or select");
+  }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
+    CHRScope *Scope, BasicBlock *PreEntryBlock) {
+  CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+  for (RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      Value *V = BI->getCondition();
+      CHR_DEBUG(dbgs() << *V << "\n");
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        (void)(I); // Unused in release build.
+        assert((I->getParent() == PreEntryBlock ||
+                !Scope->contains(I)) &&
+               "Must have been hoisted to PreEntryBlock or outside the scope");
+      }
+    }
+    for (SelectInst *SI : RI.Selects) {
+      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+      if (!(IsTrueBiased || IsFalseBiased))
+        continue;
+      Value *V = SI->getCondition();
+      CHR_DEBUG(dbgs() << *V << "\n");
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        (void)(I); // Unused in release build.
+        assert((I->getParent() == PreEntryBlock ||
+                !Scope->contains(I)) &&
+               "Must have been hoisted to PreEntryBlock or outside the scope");
+      }
+    }
+  }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+  CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+  assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+  Region *FirstRegion = Scope->RegInfos[0].R;
+  BasicBlock *EntryBlock = FirstRegion->getEntry();
+  Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+  BasicBlock *ExitBlock = LastRegion->getExit();
+  Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+  if (ExitBlock) {
+    // Insert a trivial phi at the exit block (where the CHR hot path and the
+    // cold path merges) for a value that's defined in the scope but used
+    // outside it (meaning it's alive at the exit block). We will add the
+    // incoming values for the CHR cold paths to it below. Without this, we'd
+    // miss updating phi's for such values unless there happens to already be a
+    // phi for that value there.
+    insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+  }
+
+  // Split the entry block of the first region. The new block becomes the new
+  // entry block of the first region. The old entry block becomes the block to
+  // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+  // through the split, we update the entry of the first region after the split,
+  // and Region only points to the entry and the exit blocks, rather than
+  // keeping everything in a list or set, the blocks membership and the
+  // entry/exit blocks of the region are still valid after the split.
+  CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+            << " at " << *Scope->BranchInsertPoint << "\n");
+  BasicBlock *NewEntryBlock =
+      SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  FirstRegion->replaceEntryRecursive(NewEntryBlock);
+  BasicBlock *PreEntryBlock = EntryBlock;
+
+  ValueToValueMapTy VMap;
+  // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+  // hot path (originals) and a cold path (clones) and update the PHIs at the
+  // exit block.
+  cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+  // Replace the old (placeholder) branch with the new (merged) conditional
+  // branch.
+  BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+                                            NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+  assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+  // Hoist the conditional values of the branches/selects.
+  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+
+#ifndef NDEBUG
+  assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+  // Create the combined branch condition and constant-fold the branches/selects
+  // in the hot path.
+  fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+                          ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+                           BasicBlock *PreEntryBlock,
+                           BasicBlock *ExitBlock,
+                           Region *LastRegion,
+                           ValueToValueMapTy &VMap) {
+  // Clone all the blocks. The original blocks will be the hot-path
+  // CHR-optimized code and the cloned blocks will be the original unoptimized
+  // code. This is so that the block pointers from the
+  // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+  // which CHR should apply to.
+  SmallVector<BasicBlock*, 8> NewBlocks;
+  for (RegInfo &RI : Scope->RegInfos)
+    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+                                            // sub-Scopes.
+      assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+      NewBlocks.push_back(NewBB);
+      VMap[BB] = NewBB;
+    }
+
+  // Place the cloned blocks right after the original blocks (right before the
+  // exit block of.)
+  if (ExitBlock)
+    F.getBasicBlockList().splice(ExitBlock->getIterator(),
+                                 F.getBasicBlockList(),
+                                 NewBlocks[0]->getIterator(), F.end());
+
+  // Update the cloned blocks/instructions to refer to themselves.
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+    for (Instruction &I : *NewBlocks[i])
+      RemapInstruction(&I, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+  // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+  // the top-level region but we don't need to add PHIs. The trivial PHIs
+  // inserted above will be updated here.
+  if (ExitBlock)
+    for (PHINode &PN : ExitBlock->phis())
+      for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+           ++I) {
+        BasicBlock *Pred = PN.getIncomingBlock(I);
+        if (LastRegion->contains(Pred)) {
+          Value *V = PN.getIncomingValue(I);
+          auto It = VMap.find(V);
+          if (It != VMap.end()) V = It->second;
+          assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+          PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+        }
+      }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+                                    BasicBlock *EntryBlock,
+                                    BasicBlock *NewEntryBlock,
+                                    ValueToValueMapTy &VMap) {
+  BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+         "SplitBlock did not work correctly!");
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  assert(VMap.find(NewEntryBlock) != VMap.end() &&
+         "NewEntryBlock must have been copied");
+  OldBR->dropAllReferences();
+  OldBR->eraseFromParent();
+  // The true predicate is a placeholder. It will be replaced later in
+  // fixupBranchesAndSelects().
+  BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+                                         cast<BasicBlock>(VMap[NewEntryBlock]),
+                                         ConstantInt::getTrue(F.getContext()));
+  PreEntryBlock->getInstList().push_back(NewBR);
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+                                  BasicBlock *PreEntryBlock,
+                                  BranchInst *MergedBR,
+                                  uint64_t ProfileCount) {
+  Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+  BranchProbability CHRBranchBias(1, 1);
+  uint64_t NumCHRedBranches = 0;
+  IRBuilder<> IRB(PreEntryBlock->getTerminator());
+  for (RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    if (RI.HasBranch) {
+      fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+      ++NumCHRedBranches;
+    }
+    for (SelectInst *SI : RI.Selects) {
+      fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+      ++NumCHRedBranches;
+    }
+  }
+  Stats.NumBranchesDelta += NumCHRedBranches - 1;
+  Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE,
+                              "CHR",
+                              // Refer to the hot (original) path
+                              MergedBR->getSuccessor(0)->getTerminator())
+        << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
+        << " branches or selects";
+  });
+  MergedBR->setCondition(MergedCondition);
+  SmallVector<uint32_t, 2> Weights;
+  Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
+  Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
+  MDBuilder MDB(F.getContext());
+  MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+  CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+            << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+                      IRBuilder<> &IRB,
+                      Value *&MergedCondition,
+                      BranchProbability &CHRBranchBias) {
+  bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+  assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+         "Must be truthy or falsy");
+  auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+  assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+         "Must be in the bias map");
+  BranchProbability Bias = BranchBiasMap[R];
+  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+  // Take the min.
+  if (CHRBranchBias > Bias)
+    CHRBranchBias = Bias;
+  BasicBlock *IfThen = BI->getSuccessor(1);
+  BasicBlock *IfElse = BI->getSuccessor(0);
+  BasicBlock *RegionExitBlock = R->getExit();
+  assert(RegionExitBlock && "Null ExitBlock");
+  assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+         IfThen != IfElse && "Invariant from findScopes");
+  if (IfThen == RegionExitBlock) {
+    // Swap them so that IfThen means going into it and IfElse means skipping
+    // it.
+    std::swap(IfThen, IfElse);
+  }
+  CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+            << " IfElse " << IfElse->getName() << "\n");
+  Value *Cond = BI->getCondition();
+  BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+  bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+  addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+                       MergedCondition);
+  // Constant-fold the branch at ClonedEntryBlock.
+  assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+         "The successor shouldn't change");
+  Value *NewCondition = ConditionTrue ?
+                        ConstantInt::getTrue(F.getContext()) :
+                        ConstantInt::getFalse(F.getContext());
+  BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+                      IRBuilder<> &IRB,
+                      Value *&MergedCondition,
+                      BranchProbability &CHRBranchBias) {
+  bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+  assert((IsTrueBiased ||
+          Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+  assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+         "Must be in the bias map");
+  BranchProbability Bias = SelectBiasMap[SI];
+  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+  // Take the min.
+  if (CHRBranchBias > Bias)
+    CHRBranchBias = Bias;
+  Value *Cond = SI->getCondition();
+  addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+                       MergedCondition);
+  Value *NewCondition = IsTrueBiased ?
+                        ConstantInt::getTrue(F.getContext()) :
+                        ConstantInt::getFalse(F.getContext());
+  SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+                               Instruction *BranchOrSelect,
+                               CHRScope *Scope,
+                               IRBuilder<> &IRB,
+                               Value *&MergedCondition) {
+  if (IsTrueBiased) {
+    MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+  } else {
+    // If Cond is an icmp and all users of V except for BranchOrSelect is a
+    // branch, negate the icmp predicate and swap the branch targets and avoid
+    // inserting an Xor to negate Cond.
+    bool Done = false;
+    if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+      if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+        MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+        Done = true;
+      }
+    if (!Done) {
+      Value *Negate = IRB.CreateXor(
+          ConstantInt::getTrue(F.getContext()), Cond);
+      MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+    }
+  }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+  unsigned I = 0;
+  DenseSet<PHINode *> TrivialPHIs;
+  for (CHRScope *Scope : CHRScopes) {
+    transformScopes(Scope, TrivialPHIs);
+    CHR_DEBUG(
+        std::ostringstream oss;
+        oss << " after transformScopes " << I++;
+        dumpIR(F, oss.str().c_str(), nullptr));
+    (void)I;
+  }
+}
+
+static void LLVM_ATTRIBUTE_UNUSED
+dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+  dbgs() << Label << " " << Scopes.size() << "\n";
+  for (CHRScope *Scope : Scopes) {
+    dbgs() << *Scope << "\n";
+  }
+}
+
+bool CHR::run() {
+  if (!shouldApply(F, PSI))
+    return false;
+
+  CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+  bool Changed = false;
+  {
+    CHR_DEBUG(
+        dbgs() << "RegionInfo:\n";
+        RI.print(dbgs()));
+
+    // Recursively traverse the region tree and find regions that have biased
+    // branches and/or selects and create scopes.
+    SmallVector<CHRScope *, 8> AllScopes;
+    findScopes(AllScopes);
+    CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+    // Split the scopes if 1) the conditiona values of the biased
+    // branches/selects of the inner/lower scope can't be hoisted up to the
+    // outermost/uppermost scope entry, or 2) the condition values of the biased
+    // branches/selects in a scope (including subscopes) don't share at least
+    // one common value.
+    SmallVector<CHRScope *, 8> SplitScopes;
+    splitScopes(AllScopes, SplitScopes);
+    CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+    // After splitting, set the biased regions and selects of a scope (a tree
+    // root) that include those of the subscopes.
+    classifyBiasedScopes(SplitScopes);
+    CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+    // Filter out the scopes that has only one biased region or select (CHR
+    // isn't useful in such a case).
+    SmallVector<CHRScope *, 8> FilteredScopes;
+    filterScopes(SplitScopes, FilteredScopes);
+    CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+    // Set the regions to be CHR'ed and their hoist stops for each scope.
+    SmallVector<CHRScope *, 8> SetScopes;
+    setCHRRegions(FilteredScopes, SetScopes);
+    CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+    // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+    // ones. We need to apply CHR from outer to inner so that we apply CHR only
+    // to the hot path, rather than both hot and cold paths.
+    SmallVector<CHRScope *, 8> SortedScopes;
+    sortScopes(SetScopes, SortedScopes);
+    CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+    CHR_DEBUG(
+        dbgs() << "RegionInfo:\n";
+        RI.print(dbgs()));
+
+    // Apply the CHR transformation.
+    if (!SortedScopes.empty()) {
+      transformScopes(SortedScopes);
+      Changed = true;
+    }
+  }
+
+  if (Changed) {
+    CHR_DEBUG(dumpIR(F, "after", &Stats));
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
+          << ore::NV("Function", &F) << " "
+          << "Reduced the number of branches in hot paths by "
+          << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
+          << " (static) and "
+          << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
+          << " (weighted by PGO count)";
+    });
+  }
+
+  return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+  BlockFrequencyInfo &BFI =
+      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  ProfileSummaryInfo &PSI =
+      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
+      llvm::make_unique<OptimizationRemarkEmitter>(&F);
+  return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+  parseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+    Function &F,
+    FunctionAnalysisManager &FAM) {
+  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+  auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+  auto &MAM = MAMProxy.getManager();
+  auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
+  if (!Changed)
+    return PreservedAnalyses::all();
+  auto PA = PreservedAnalyses();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index f03fcc9c4e2c..4c3c6c9added 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -645,8 +645,8 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
                                         GlobalValue::LinkageTypes NewFLink,
                                         FunctionType *NewFT) {
   FunctionType *FT = F->getFunctionType();
-  Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
-                                    F->getParent());
+  Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
+                                    NewFName, F->getParent());
   NewF->copyAttributesFrom(F);
   NewF->removeAttributes(
       AttributeList::ReturnIndex,
@@ -819,7 +819,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       // easily identify cases of mismatching ABIs.
       if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
         FunctionType *NewFT = getArgsFunctionType(FT);
-        Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
+        Function *NewF = Function::Create(NewFT, F.getLinkage(),
+                                          F.getAddressSpace(), "", &M);
         NewF->copyAttributesFrom(&F);
         NewF->removeAttributes(
             AttributeList::ReturnIndex,
@@ -924,7 +925,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         Instruction *Next = Inst->getNextNode();
         // DFSanVisitor may delete Inst, so keep track of whether it was a
         // terminator.
-        bool IsTerminator = isa<TerminatorInst>(Inst);
+        bool IsTerminator = Inst->isTerminator();
         if (!DFSF.SkipInsts.count(Inst))
           DFSanVisitor(DFSF).visit(Inst);
         if (IsTerminator)
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
index 33f220a893df..db438e78ded9 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
@@ -144,21 +144,6 @@ OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
   return Options;
 }
 
-// Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
-                                                    bool AllowMerging) {
-  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
-  // We use private linkage for module-local strings. If they can be merged
-  // with another one, we set the unnamed_addr attribute.
-  GlobalVariable *GV =
-    new GlobalVariable(M, StrConst->getType(), true,
-                       GlobalValue::PrivateLinkage, StrConst, "");
-  if (AllowMerging)
-    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
-  return GV;
-}
-
 /// EfficiencySanitizer: instrument each module to find performance issues.
 class EfficiencySanitizer : public ModulePass {
 public:
@@ -902,7 +887,7 @@ bool EfficiencySanitizer::instrumentFastpathWorkingSet(
   Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
   // The AND and CMP will be turned into a TEST instruction by the compiler.
   Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
-  TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
+  Instruction *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
   // FIXME: do I need to call SetCurrentDebugLocation?
   IRB.SetInsertPoint(CmpTerm);
   // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 132e8089fe3b..9af64ed332cd 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -21,9 +21,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/UniqueVector.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/IRBuilder.h"
@@ -36,6 +36,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
@@ -96,30 +97,25 @@ private:
   // profiling runtime to emit .gcda files when run.
   bool emitProfileArcs();
 
+  bool isFunctionInstrumented(const Function &F);
+  std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
+  static bool doesFilenameMatchARegex(StringRef Filename,
+                                      std::vector<Regex> &Regexes);
+
   // Get pointers to the functions in the runtime library.
   Constant *getStartFileFunc();
-  Constant *getIncrementIndirectCounterFunc();
   Constant *getEmitFunctionFunc();
   Constant *getEmitArcsFunc();
   Constant *getSummaryInfoFunc();
   Constant *getEndFileFunc();
 
-  // Create or retrieve an i32 state value that is used to represent the
-  // pred block number for certain non-trivial edges.
-  GlobalVariable *getEdgeStateValue();
-
-  // Produce a table of pointers to counters, by predecessor and successor
-  // block number.
-  GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter,
-                                       const UniqueVector<BasicBlock *> &Preds,
-                                       const UniqueVector<BasicBlock *> &Succs);
-
   // Add the function to write out all our counters to the global destructor
   // list.
   Function *
   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
   Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
-  void insertIndirectCounterIncrement();
+
+  void AddFlushBeforeForkAndExec();
 
   enum class GCovFileType { GCNO, GCDA };
   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
@@ -135,6 +131,9 @@ private:
   const TargetLibraryInfo *TLI;
   LLVMContext *Ctx;
   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+  std::vector<Regex> FilterRe;
+  std::vector<Regex> ExcludeRe;
+  StringMap<bool> InstrumentedFiles;
 };
 
 class GCOVProfilerLegacyPass : public ModulePass {
@@ -181,6 +180,21 @@ static StringRef getFunctionName(const DISubprogram *SP) {
   return SP->getName();
 }
 
+/// Extract a filename for a DISubprogram.
+///
+/// Prefer relative paths in the coverage notes. Clang also may split
+/// up absolute paths into a directory and filename component. When
+/// the relative path doesn't exist, reconstruct the absolute path.
+static SmallString<128> getFilename(const DISubprogram *SP) {
+  SmallString<128> Path;
+  StringRef RelPath = SP->getFilename();
+  if (sys::fs::exists(RelPath))
+    Path = RelPath;
+  else
+    sys::path::append(Path, SP->getDirectory(), SP->getFilename());
+  return Path;
+}
+
 namespace {
   class GCOVRecord {
    protected:
@@ -257,7 +271,7 @@ namespace {
     }
 
    private:
-    StringRef Filename;
+    std::string Filename;
     SmallVector<uint32_t, 32> Lines;
   };
 
@@ -287,11 +301,10 @@ namespace {
       write(Len);
       write(Number);
 
-      llvm::sort(
-          SortedLinesByFile.begin(), SortedLinesByFile.end(),
-          [](StringMapEntry<GCOVLines> *LHS, StringMapEntry<GCOVLines> *RHS) {
-            return LHS->getKey() < RHS->getKey();
-          });
+      llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
+                                       StringMapEntry<GCOVLines> *RHS) {
+        return LHS->getKey() < RHS->getKey();
+      });
       for (auto &I : SortedLinesByFile)
         I->getValue().writeOut();
       write(0);
@@ -379,8 +392,9 @@ namespace {
 
     void writeOut() {
       writeBytes(FunctionTag, 4);
+      SmallString<128> Filename = getFilename(SP);
       uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
-                          1 + lengthOfGCOVString(SP->getFilename()) + 1;
+                          1 + lengthOfGCOVString(Filename) + 1;
       if (UseCfgChecksum)
         ++BlockLen;
       write(BlockLen);
@@ -389,7 +403,7 @@ namespace {
       if (UseCfgChecksum)
         write(CfgChecksum);
       writeGCOVString(getFunctionName(SP));
-      writeGCOVString(SP->getFilename());
+      writeGCOVString(Filename);
       write(SP->getLine());
 
       // Emit count of blocks.
@@ -434,6 +448,72 @@ namespace {
   };
 }
 
+// RegexesStr is a string containing differents regex separated by a semi-colon.
+// For example "foo\..*$;bar\..*$".
+std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
+  std::vector<Regex> Regexes;
+  while (!RegexesStr.empty()) {
+    std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
+    if (!HeadTail.first.empty()) {
+      Regex Re(HeadTail.first);
+      std::string Err;
+      if (!Re.isValid(Err)) {
+        Ctx->emitError(Twine("Regex ") + HeadTail.first +
+                       " is not valid: " + Err);
+      }
+      Regexes.emplace_back(std::move(Re));
+    }
+    RegexesStr = HeadTail.second;
+  }
+  return Regexes;
+}
+
+bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
+                                           std::vector<Regex> &Regexes) {
+  for (Regex &Re : Regexes) {
+    if (Re.match(Filename)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
+  if (FilterRe.empty() && ExcludeRe.empty()) {
+    return true;
+  }
+  SmallString<128> Filename = getFilename(F.getSubprogram());
+  auto It = InstrumentedFiles.find(Filename);
+  if (It != InstrumentedFiles.end()) {
+    return It->second;
+  }
+
+  SmallString<256> RealPath;
+  StringRef RealFilename;
+
+  // Path can be
+  // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
+  // such a case we must get the real_path.
+  if (sys::fs::real_path(Filename, RealPath)) {
+    // real_path can fail with path like "foo.c".
+    RealFilename = Filename;
+  } else {
+    RealFilename = RealPath;
+  }
+
+  bool ShouldInstrument;
+  if (FilterRe.empty()) {
+    ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+  } else if (ExcludeRe.empty()) {
+    ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
+  } else {
+    ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
+                       !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+  }
+  InstrumentedFiles[Filename] = ShouldInstrument;
+  return ShouldInstrument;
+}
+
 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
                                      GCovFileType OutputType) {
   bool Notes = OutputType == GCovFileType::GCNO;
@@ -481,6 +561,11 @@ bool GCOVProfiler::runOnModule(Module &M, const TargetLibraryInfo &TLI) {
   this->TLI = &TLI;
   Ctx = &M.getContext();
 
+  AddFlushBeforeForkAndExec();
+
+  FilterRe = createRegexesFromString(Options.Filter);
+  ExcludeRe = createRegexesFromString(Options.Exclude);
+
   if (Options.EmitNotes) emitProfileNotes();
   if (Options.EmitData) return emitProfileArcs();
   return false;
@@ -537,6 +622,38 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) {
 	return false;
 }
 
+void GCOVProfiler::AddFlushBeforeForkAndExec() {
+  SmallVector<Instruction *, 2> ForkAndExecs;
+  for (auto &F : M->functions()) {
+    for (auto &I : instructions(F)) {
+      if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+        if (Function *Callee = CI->getCalledFunction()) {
+          LibFunc LF;
+          if (TLI->getLibFunc(*Callee, LF) &&
+              (LF == LibFunc_fork || LF == LibFunc_execl ||
+               LF == LibFunc_execle || LF == LibFunc_execlp ||
+               LF == LibFunc_execv || LF == LibFunc_execvp ||
+               LF == LibFunc_execve || LF == LibFunc_execvpe ||
+               LF == LibFunc_execvP)) {
+            ForkAndExecs.push_back(&I);
+          }
+        }
+      }
+    }
+  }
+
+  // We need to split the block after the fork/exec call
+  // because else the counters for the lines after will be
+  // the same as before the call.
+  for (auto I : ForkAndExecs) {
+    IRBuilder<> Builder(I);
+    FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
+    Constant *GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
+    Builder.CreateCall(GCOVFlush);
+    I->getParent()->splitBasicBlock(I);
+  }
+}
+
 void GCOVProfiler::emitProfileNotes() {
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
   if (!CU_Nodes) return;
@@ -566,7 +683,8 @@ void GCOVProfiler::emitProfileNotes() {
     for (auto &F : M->functions()) {
       DISubprogram *SP = F.getSubprogram();
       if (!SP) continue;
-      if (!functionHasLines(F)) continue;
+      if (!functionHasLines(F) || !isFunctionInstrumented(F))
+        continue;
       // TODO: Functions using scope-based EH are currently not supported.
       if (isUsingScopeBasedEH(F)) continue;
 
@@ -583,9 +701,15 @@ void GCOVProfiler::emitProfileNotes() {
                                                 Options.ExitBlockBeforeBody));
       GCOVFunction &Func = *Funcs.back();
 
+      // Add the function line number to the lines of the entry block
+      // to have a counter for the function definition.
+      uint32_t Line = SP->getLine();
+      auto Filename = getFilename(SP);
+      Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
+
       for (auto &BB : F) {
         GCOVBlock &Block = Func.getBlock(&BB);
-        TerminatorInst *TI = BB.getTerminator();
+        Instruction *TI = BB.getTerminator();
         if (int successors = TI->getNumSuccessors()) {
           for (int i = 0; i != successors; ++i) {
             Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
@@ -594,7 +718,6 @@ void GCOVProfiler::emitProfileNotes() {
           Block.addEdge(Func.getReturnBlock());
         }
 
-        uint32_t Line = 0;
         for (auto &I : BB) {
           // Debug intrinsic locations correspond to the location of the
           // declaration, not necessarily any statements or expressions.
@@ -605,16 +728,18 @@ void GCOVProfiler::emitProfileNotes() {
             continue;
 
           // Artificial lines such as calls to the global constructors.
-          if (Loc.getLine() == 0) continue;
+          if (Loc.getLine() == 0 || Loc.isImplicitCode())
+            continue;
 
           if (Line == Loc.getLine()) continue;
           Line = Loc.getLine();
           if (SP != getDISubprogram(Loc.getScope()))
             continue;
 
-          GCOVLines &Lines = Block.getFile(SP->getFilename());
+          GCOVLines &Lines = Block.getFile(Filename);
           Lines.addLine(Loc.getLine());
         }
+        Line = 0;
       }
       EdgeDestinations += Func.getEdgeDestinations();
     }
@@ -639,24 +764,28 @@ bool GCOVProfiler::emitProfileArcs() {
   if (!CU_Nodes) return false;
 
   bool Result = false;
-  bool InsertIndCounterIncrCode = false;
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
     for (auto &F : M->functions()) {
       DISubprogram *SP = F.getSubprogram();
       if (!SP) continue;
-      if (!functionHasLines(F)) continue;
+      if (!functionHasLines(F) || !isFunctionInstrumented(F))
+        continue;
       // TODO: Functions using scope-based EH are currently not supported.
       if (isUsingScopeBasedEH(F)) continue;
       if (!Result) Result = true;
 
+      DenseMap<std::pair<BasicBlock *, BasicBlock *>, unsigned> EdgeToCounter;
       unsigned Edges = 0;
       for (auto &BB : F) {
-        TerminatorInst *TI = BB.getTerminator();
-        if (isa<ReturnInst>(TI))
-          ++Edges;
-        else
-          Edges += TI->getNumSuccessors();
+        Instruction *TI = BB.getTerminator();
+        if (isa<ReturnInst>(TI)) {
+          EdgeToCounter[{&BB, nullptr}] = Edges++;
+        } else {
+          for (BasicBlock *Succ : successors(TI)) {
+            EdgeToCounter[{&BB, Succ}] = Edges++;
+          }
+        }
       }
 
       ArrayType *CounterTy =
@@ -668,63 +797,42 @@ bool GCOVProfiler::emitProfileArcs() {
                            "__llvm_gcov_ctr");
       CountersBySP.push_back(std::make_pair(Counters, SP));
 
-      UniqueVector<BasicBlock *> ComplexEdgePreds;
-      UniqueVector<BasicBlock *> ComplexEdgeSuccs;
-
-      unsigned Edge = 0;
+      // If a BB has several predecessors, use a PHINode to select
+      // the correct counter.
       for (auto &BB : F) {
-        TerminatorInst *TI = BB.getTerminator();
-        int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
-        if (Successors) {
-          if (Successors == 1) {
-            IRBuilder<> Builder(&*BB.getFirstInsertionPt());
-            Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
-                                                                Edge);
-            Value *Count = Builder.CreateLoad(Counter);
-            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
-            Builder.CreateStore(Count, Counter);
-          } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-            IRBuilder<> Builder(BI);
-            Value *Sel = Builder.CreateSelect(BI->getCondition(),
-                                              Builder.getInt64(Edge),
-                                              Builder.getInt64(Edge + 1));
-            Value *Counter = Builder.CreateInBoundsGEP(
-                Counters->getValueType(), Counters, {Builder.getInt64(0), Sel});
+        const unsigned EdgeCount =
+            std::distance(pred_begin(&BB), pred_end(&BB));
+        if (EdgeCount) {
+          // The phi node must be at the begin of the BB.
+          IRBuilder<> BuilderForPhi(&*BB.begin());
+          Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+          PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount);
+          for (BasicBlock *Pred : predecessors(&BB)) {
+            auto It = EdgeToCounter.find({Pred, &BB});
+            assert(It != EdgeToCounter.end());
+            const unsigned Edge = It->second;
+            Value *EdgeCounter =
+                BuilderForPhi.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
+            Phi->addIncoming(EdgeCounter, Pred);
+          }
+
+          // Skip phis, landingpads.
+          IRBuilder<> Builder(&*BB.getFirstInsertionPt());
+          Value *Count = Builder.CreateLoad(Phi);
+          Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+          Builder.CreateStore(Count, Phi);
+
+          Instruction *TI = BB.getTerminator();
+          if (isa<ReturnInst>(TI)) {
+            auto It = EdgeToCounter.find({&BB, nullptr});
+            assert(It != EdgeToCounter.end());
+            const unsigned Edge = It->second;
+            Value *Counter =
+                Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
             Value *Count = Builder.CreateLoad(Counter);
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
-          } else {
-            ComplexEdgePreds.insert(&BB);
-            for (int i = 0; i != Successors; ++i)
-              ComplexEdgeSuccs.insert(TI->getSuccessor(i));
           }
-
-          Edge += Successors;
-        }
-      }
-
-      if (!ComplexEdgePreds.empty()) {
-        GlobalVariable *EdgeTable =
-          buildEdgeLookupTable(&F, Counters,
-                               ComplexEdgePreds, ComplexEdgeSuccs);
-        GlobalVariable *EdgeState = getEdgeStateValue();
-
-        for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
-          IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt());
-          Builder.CreateStore(Builder.getInt32(i), EdgeState);
-        }
-
-        for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
-          // Call runtime to perform increment.
-          IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt());
-          Value *CounterPtrArray =
-            Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
-                                               i * ComplexEdgePreds.size());
-
-          // Build code to increment the counter.
-          InsertIndCounterIncrCode = true;
-          Builder.CreateCall(getIncrementIndirectCounterFunc(),
-                             {EdgeState, CounterPtrArray});
         }
       }
     }
@@ -763,60 +871,9 @@ bool GCOVProfiler::emitProfileArcs() {
     appendToGlobalCtors(*M, F, 0);
   }
 
-  if (InsertIndCounterIncrCode)
-    insertIndirectCounterIncrement();
-
   return Result;
 }
 
-// All edges with successors that aren't branches are "complex", because it
-// requires complex logic to pick which counter to update.
-GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
-    Function *F,
-    GlobalVariable *Counters,
-    const UniqueVector<BasicBlock *> &Preds,
-    const UniqueVector<BasicBlock *> &Succs) {
-  // TODO: support invoke, threads. We rely on the fact that nothing can modify
-  // the whole-Module pred edge# between the time we set it and the time we next
-  // read it. Threads and invoke make this untrue.
-
-  // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
-  size_t TableSize = Succs.size() * Preds.size();
-  Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
-  ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
-
-  std::unique_ptr<Constant * []> EdgeTable(new Constant *[TableSize]);
-  Constant *NullValue = Constant::getNullValue(Int64PtrTy);
-  for (size_t i = 0; i != TableSize; ++i)
-    EdgeTable[i] = NullValue;
-
-  unsigned Edge = 0;
-  for (BasicBlock &BB : *F) {
-    TerminatorInst *TI = BB.getTerminator();
-    int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
-    if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
-      for (int i = 0; i != Successors; ++i) {
-        BasicBlock *Succ = TI->getSuccessor(i);
-        IRBuilder<> Builder(Succ);
-        Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
-                                                            Edge + i);
-        EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) +
-                  (Preds.idFor(&BB) - 1)] = cast<Constant>(Counter);
-      }
-    }
-    Edge += Successors;
-  }
-
-  GlobalVariable *EdgeTableGV =
-      new GlobalVariable(
-          *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
-          ConstantArray::get(EdgeTableTy,
-                             makeArrayRef(&EdgeTable[0],TableSize)),
-          "__llvm_gcda_edge_table");
-  EdgeTableGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  return EdgeTableGV;
-}
-
 Constant *GCOVProfiler::getStartFileFunc() {
   Type *Args[] = {
     Type::getInt8PtrTy(*Ctx),  // const char *orig_filename
@@ -832,17 +889,6 @@ Constant *GCOVProfiler::getStartFileFunc() {
 
 }
 
-Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
-  Type *Int32Ty = Type::getInt32Ty(*Ctx);
-  Type *Int64Ty = Type::getInt64Ty(*Ctx);
-  Type *Args[] = {
-    Int32Ty->getPointerTo(),                // uint32_t *predecessor
-    Int64Ty->getPointerTo()->getPointerTo() // uint64_t **counters
-  };
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
-  return M->getOrInsertFunction("__llvm_gcov_indirect_counter_increment", FTy);
-}
-
 Constant *GCOVProfiler::getEmitFunctionFunc() {
   Type *Args[] = {
     Type::getInt32Ty(*Ctx),    // uint32_t ident
@@ -886,19 +932,6 @@ Constant *GCOVProfiler::getEndFileFunc() {
   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
 }
 
-GlobalVariable *GCOVProfiler::getEdgeStateValue() {
-  GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
-  if (!GV) {
-    GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
-                            GlobalValue::InternalLinkage,
-                            ConstantInt::get(Type::getInt32Ty(*Ctx),
-                                             0xffffffff),
-                            "__llvm_gcov_global_state_pred");
-    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  }
-  return GV;
-}
-
 Function *GCOVProfiler::insertCounterWriteout(
     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
@@ -1122,57 +1155,6 @@ Function *GCOVProfiler::insertCounterWriteout(
   return WriteoutF;
 }
 
-void GCOVProfiler::insertIndirectCounterIncrement() {
-  Function *Fn =
-    cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc());
-  Fn->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  Fn->setLinkage(GlobalValue::InternalLinkage);
-  Fn->addFnAttr(Attribute::NoInline);
-  if (Options.NoRedZone)
-    Fn->addFnAttr(Attribute::NoRedZone);
-
-  // Create basic blocks for function.
-  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
-  IRBuilder<> Builder(BB);
-
-  BasicBlock *PredNotNegOne = BasicBlock::Create(*Ctx, "", Fn);
-  BasicBlock *CounterEnd = BasicBlock::Create(*Ctx, "", Fn);
-  BasicBlock *Exit = BasicBlock::Create(*Ctx, "exit", Fn);
-
-  // uint32_t pred = *predecessor;
-  // if (pred == 0xffffffff) return;
-  Argument *Arg = &*Fn->arg_begin();
-  Arg->setName("predecessor");
-  Value *Pred = Builder.CreateLoad(Arg, "pred");
-  Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
-  BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
-
-  Builder.SetInsertPoint(PredNotNegOne);
-
-  // uint64_t *counter = counters[pred];
-  // if (!counter) return;
-  Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
-  Arg = &*std::next(Fn->arg_begin());
-  Arg->setName("counters");
-  Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
-  Value *Counter = Builder.CreateLoad(GEP, "counter");
-  Cond = Builder.CreateICmpEQ(Counter,
-                              Constant::getNullValue(
-                                  Builder.getInt64Ty()->getPointerTo()));
-  Builder.CreateCondBr(Cond, Exit, CounterEnd);
-
-  // ++*counter;
-  Builder.SetInsertPoint(CounterEnd);
-  Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
-                                 Builder.getInt64(1));
-  Builder.CreateStore(Add, Counter);
-  Builder.CreateBr(Exit);
-
-  // Fill in the exit block.
-  Builder.SetInsertPoint(Exit);
-  Builder.CreateRetVoid();
-}
-
 Function *GCOVProfiler::
 insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index d62598bb5d4f..d04c2b76288f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <sstream>
 
 using namespace llvm;
 
@@ -63,6 +64,8 @@ static const uint64_t kDynamicShadowSentinel =
     std::numeric_limits<uint64_t>::max();
 static const unsigned kPointerTagShift = 56;
 
+static const unsigned kShadowBaseAlignment = 32;
+
 static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
     "hwasan-memory-access-callback-prefix",
     cl::desc("Prefix for memory access callbacks"), cl::Hidden,
@@ -127,6 +130,32 @@ static cl::opt<unsigned long long> ClMappingOffset(
     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), cl::Hidden,
     cl::init(0));
 
+static cl::opt<bool>
+    ClWithIfunc("hwasan-with-ifunc",
+                cl::desc("Access dynamic shadow through an ifunc global on "
+                         "platforms that support this"),
+                cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClWithTls(
+    "hwasan-with-tls",
+    cl::desc("Access dynamic shadow through an thread-local pointer on "
+             "platforms that support this"),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+    ClRecordStackHistory("hwasan-record-stack-history",
+                         cl::desc("Record stack frames with tagged allocations "
+                                  "in a thread-local ring buffer"),
+                         cl::Hidden, cl::init(true));
+static cl::opt<bool>
+    ClCreateFrameDescriptions("hwasan-create-frame-descriptions",
+                              cl::desc("create static frame descriptions"),
+                              cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+    ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
+                              cl::desc("instrument memory intrinsics"),
+                              cl::Hidden, cl::init(true));
 namespace {
 
 /// An instrumentation pass implementing detection of addressability bugs
@@ -150,13 +179,14 @@ public:
 
   void initializeCallbacks(Module &M);
 
-  void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
+  Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
 
   void untagPointerOperand(Instruction *I, Value *Addr);
   Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB);
   void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
                                  unsigned AccessSizeIndex,
                                  Instruction *InsertBefore);
+  void instrumentMemIntrinsic(MemIntrinsic *MI);
   bool instrumentMemAccess(Instruction *I);
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
                                    uint64_t *TypeSize, unsigned *Alignment,
@@ -167,26 +197,53 @@ public:
   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
   bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
-                       SmallVectorImpl<Instruction *> &RetVec);
+                       SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
   Value *getNextTagWithCall(IRBuilder<> &IRB);
   Value *getStackBaseTag(IRBuilder<> &IRB);
   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
                      unsigned AllocaNo);
   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
 
+  Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
+  Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+
 private:
   LLVMContext *C;
+  std::string CurModuleUniqueId;
   Triple TargetTriple;
+  Function *HWAsanMemmove, *HWAsanMemcpy, *HWAsanMemset;
+
+  // Frame description is a way to pass names/sizes of local variables
+  // to the run-time w/o adding extra executable code in every function.
+  // We do this by creating a separate section with {PC,Descr} pairs and passing
+  // the section beg/end to __hwasan_init_frames() at module init time.
+  std::string createFrameString(ArrayRef<AllocaInst*> Allocas);
+  void createFrameGlobal(Function &F, const std::string &FrameString);
+  // Get the section name for frame descriptions. Currently ELF-only.
+  const char *getFrameSection() { return "__hwasan_frames"; }
+  const char *getFrameSectionBeg() { return  "__start___hwasan_frames"; }
+  const char *getFrameSectionEnd() { return  "__stop___hwasan_frames"; }
+  GlobalVariable *createFrameSectionBound(Module &M, Type *Ty,
+                                          const char *Name) {
+    auto GV = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+                                 nullptr, Name);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+    return GV;
+  }
 
   /// This struct defines the shadow mapping using the rule:
   ///   shadow = (mem >> Scale) + Offset.
   /// If InGlobal is true, then
   ///   extern char __hwasan_shadow[];
   ///   shadow = (mem >> Scale) + &__hwasan_shadow
+  /// If InTls is true, then
+  ///   extern char *__hwasan_tls;
+  ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
   struct ShadowMapping {
     int Scale;
     uint64_t Offset;
     bool InGlobal;
+    bool InTls;
 
     void init(Triple &TargetTriple);
     unsigned getAllocaAlignment() const { return 1U << Scale; }
@@ -194,6 +251,7 @@ private:
   ShadowMapping Mapping;
 
   Type *IntptrTy;
+  Type *Int8PtrTy;
   Type *Int8Ty;
 
   bool CompileKernel;
@@ -206,10 +264,12 @@ private:
 
   Function *HwasanTagMemoryFunc;
   Function *HwasanGenerateTagFunc;
+  Function *HwasanThreadEnterFunc;
 
   Constant *ShadowGlobal;
 
   Value *LocalDynamicShadow = nullptr;
+  GlobalValue *ThreadPtrGlobal = nullptr;
 };
 
 } // end anonymous namespace
@@ -243,8 +303,10 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
   Mapping.init(TargetTriple);
 
   C = &(M.getContext());
+  CurModuleUniqueId = getUniqueModuleId(&M);
   IRBuilder<> IRB(*C);
   IntptrTy = IRB.getIntPtrTy(DL);
+  Int8PtrTy = IRB.getInt8PtrTy();
   Int8Ty = IRB.getInt8Ty();
 
   HwasanCtorFunction = nullptr;
@@ -254,8 +316,38 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
                                             kHwasanInitName,
                                             /*InitArgTypes=*/{},
                                             /*InitArgs=*/{});
-    appendToGlobalCtors(M, HwasanCtorFunction, 0);
+    Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+    HwasanCtorFunction->setComdat(CtorComdat);
+    appendToGlobalCtors(M, HwasanCtorFunction, 0, HwasanCtorFunction);
+
+    // Create a zero-length global in __hwasan_frame so that the linker will
+    // always create start and stop symbols.
+    //
+    // N.B. If we ever start creating associated metadata in this pass this
+    // global will need to be associated with the ctor.
+    Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
+    auto GV =
+        new GlobalVariable(M, Int8Arr0Ty, /*isConstantGlobal*/ true,
+                           GlobalVariable::PrivateLinkage,
+                           Constant::getNullValue(Int8Arr0Ty), "__hwasan");
+    GV->setSection(getFrameSection());
+    GV->setComdat(CtorComdat);
+    appendToCompilerUsed(M, GV);
+
+    IRBuilder<> IRBCtor(HwasanCtorFunction->getEntryBlock().getTerminator());
+    IRBCtor.CreateCall(
+        declareSanitizerInitFunction(M, "__hwasan_init_frames",
+                                     {Int8PtrTy, Int8PtrTy}),
+        {createFrameSectionBound(M, Int8Ty, getFrameSectionBeg()),
+         createFrameSectionBound(M, Int8Ty, getFrameSectionEnd())});
   }
+
+  if (!TargetTriple.isAndroid())
+    appendToCompilerUsed(
+        M, ThreadPtrGlobal = new GlobalVariable(
+               M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr,
+               "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel));
+
   return true;
 }
 
@@ -281,21 +373,35 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
   }
 
   HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__hwasan_tag_memory", IRB.getVoidTy(), IntptrTy, Int8Ty, IntptrTy));
+      "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy));
   HwasanGenerateTagFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty));
 
   if (Mapping.InGlobal)
     ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
                                        ArrayType::get(IRB.getInt8Ty(), 0));
+
+  const std::string MemIntrinCallbackPrefix =
+      CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+  HWAsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
+      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
+  HWAsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
+      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
+  HWAsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
+      IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
+
+  HwasanThreadEnterFunc = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy()));
 }
 
-void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
+Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
   // Generate code only when dynamic addressing is needed.
   if (Mapping.Offset != kDynamicShadowSentinel)
-    return;
+    return nullptr;
 
-  IRBuilder<> IRB(&F.front().front());
   if (Mapping.InGlobal) {
     // An empty inline asm with input reg == output reg.
     // An opaque pointer-to-int cast, basically.
@@ -303,11 +409,12 @@ void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
         FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false),
         StringRef(""), StringRef("=r,0"),
         /*hasSideEffects=*/false);
-    LocalDynamicShadow = IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+    return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
   } else {
-    Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
-        kHwasanShadowMemoryDynamicAddress, IntptrTy);
-    LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress);
+    Value *GlobalDynamicAddress =
+        IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
+            kHwasanShadowMemoryDynamicAddress, IntptrTy);
+    return IRB.CreateLoad(GlobalDynamicAddress);
   }
 }
 
@@ -421,8 +528,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
                                   IRB.getInt8Ty());
   Value *AddrLong = untagPointer(IRB, PtrLong);
   Value *ShadowLong = memToShadow(AddrLong, PtrLong->getType(), IRB);
-  Value *MemTag =
-      IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, IRB.getInt8PtrTy()));
+  Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, Int8PtrTy));
   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
 
   int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
@@ -433,7 +539,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
   }
 
-  TerminatorInst *CheckTerm =
+  Instruction *CheckTerm =
       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
                                 MDBuilder(*C).createBranchWeights(1, 100000));
 
@@ -464,12 +570,36 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
   IRB.CreateCall(Asm, PtrLong);
 }
 
+void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+  IRBuilder<> IRB(MI);
+  if (isa<MemTransferInst>(MI)) {
+    IRB.CreateCall(
+        isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+  } else if (isa<MemSetInst>(MI)) {
+    IRB.CreateCall(
+        HWAsanMemset,
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+  }
+  MI->eraseFromParent();
+}
+
 bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
   LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
   bool IsWrite = false;
   unsigned Alignment = 0;
   uint64_t TypeSize = 0;
   Value *MaybeMask = nullptr;
+
+  if (ClInstrumentMemIntrinsics && isa<MemIntrinsic>(I)) {
+    instrumentMemIntrinsic(cast<MemIntrinsic>(I));
+    return true;
+  }
+
   Value *Addr =
       isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
 
@@ -521,13 +651,13 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
   if (ClInstrumentWithCalls) {
     IRB.CreateCall(HwasanTagMemoryFunc,
-                   {IRB.CreatePointerCast(AI, IntptrTy), JustTag,
+                   {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
                     ConstantInt::get(IntptrTy, Size)});
   } else {
     size_t ShadowSize = Size >> Mapping.Scale;
     Value *ShadowPtr = IRB.CreateIntToPtr(
         memToShadow(IRB.CreatePointerCast(AI, IntptrTy), AI->getType(), IRB),
-        IRB.getInt8PtrTy());
+        Int8PtrTy);
     // If this memset is not inlined, it will be intercepted in the hwasan
     // runtime library. That's OK, because the interceptor skips the checks if
     // the address is in the shadow region.
@@ -557,7 +687,7 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
 
 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
   if (ClGenerateTagsWithCalls)
-    return nullptr;
+    return getNextTagWithCall(IRB);
   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
   // first).
   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -625,15 +755,141 @@ Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
   return UntaggedPtrLong;
 }
 
-bool HWAddressSanitizer::instrumentStack(
-    SmallVectorImpl<AllocaInst *> &Allocas,
-    SmallVectorImpl<Instruction *> &RetVec) {
-  Function *F = Allocas[0]->getParent()->getParent();
-  Instruction *InsertPt = &*F->getEntryBlock().begin();
-  IRBuilder<> IRB(InsertPt);
+Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
+    // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
+    // in Bionic's libc/private/bionic_tls.h.
+    Function *ThreadPointerFunc =
+        Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
+    Value *SlotPtr = IRB.CreatePointerCast(
+        IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x30),
+        Ty->getPointerTo(0));
+    return SlotPtr;
+  }
+  if (ThreadPtrGlobal)
+    return ThreadPtrGlobal;
+
+
+  return nullptr;
+}
+
+// Creates a string with a description of the stack frame (set of Allocas).
+// The string is intended to be human readable.
+// The current form is: Size1 Name1; Size2 Name2; ...
+std::string
+HWAddressSanitizer::createFrameString(ArrayRef<AllocaInst *> Allocas) {
+  std::ostringstream Descr;
+  for (auto AI : Allocas)
+    Descr << getAllocaSizeInBytes(*AI) << " " <<  AI->getName().str() << "; ";
+  return Descr.str();
+}
 
-  Value *StackTag = getStackBaseTag(IRB);
+// Creates a global in the frame section which consists of two pointers:
+// the function PC and the frame string constant.
+void HWAddressSanitizer::createFrameGlobal(Function &F,
+                                           const std::string &FrameString) {
+  Module &M = *F.getParent();
+  auto DescrGV = createPrivateGlobalForString(M, FrameString, true);
+  auto PtrPairTy = StructType::get(F.getType(), DescrGV->getType());
+  auto GV = new GlobalVariable(
+      M, PtrPairTy, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
+      ConstantStruct::get(PtrPairTy, (Constant *)&F, (Constant *)DescrGV),
+      "__hwasan");
+  GV->setSection(getFrameSection());
+  appendToCompilerUsed(M, GV);
+  // Put GV into the F's Comadat so that if F is deleted GV can be deleted too.
+  if (auto Comdat =
+          GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+    GV->setComdat(Comdat);
+}
+
+Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
+                                        bool WithFrameRecord) {
+  if (!Mapping.InTls)
+    return getDynamicShadowNonTls(IRB);
+
+  Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
+  assert(SlotPtr);
+
+  Instruction *ThreadLong = IRB.CreateLoad(SlotPtr);
+
+  Function *F = IRB.GetInsertBlock()->getParent();
+  if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
+    Value *ThreadLongEqZero =
+        IRB.CreateICmpEQ(ThreadLong, ConstantInt::get(IntptrTy, 0));
+    auto *Br = cast<BranchInst>(SplitBlockAndInsertIfThen(
+        ThreadLongEqZero, cast<Instruction>(ThreadLongEqZero)->getNextNode(),
+        false, MDBuilder(*C).createBranchWeights(1, 100000)));
+
+    IRB.SetInsertPoint(Br);
+    // FIXME: This should call a new runtime function with a custom calling
+    // convention to avoid needing to spill all arguments here.
+    IRB.CreateCall(HwasanThreadEnterFunc);
+    LoadInst *ReloadThreadLong = IRB.CreateLoad(SlotPtr);
+
+    IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
+    PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
+    ThreadLongPhi->addIncoming(ThreadLong, ThreadLong->getParent());
+    ThreadLongPhi->addIncoming(ReloadThreadLong, ReloadThreadLong->getParent());
+    ThreadLong = ThreadLongPhi;
+  }
+
+  // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
+  Value *ThreadLongMaybeUntagged =
+      TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
+
+  if (WithFrameRecord) {
+    // Prepare ring buffer data.
+    auto PC = IRB.CreatePtrToInt(F, IntptrTy);
+    auto GetStackPointerFn =
+        Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
+    Value *SP = IRB.CreatePtrToInt(
+        IRB.CreateCall(GetStackPointerFn,
+                       {Constant::getNullValue(IRB.getInt32Ty())}),
+        IntptrTy);
+    // Mix SP and PC. TODO: also add the tag to the mix.
+    // Assumptions:
+    // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
+    // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
+    // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
+    //       0xSSSSPPPPPPPPPPPP
+    SP = IRB.CreateShl(SP, 44);
+
+    // Store data to ring buffer.
+    Value *RecordPtr =
+        IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
+    IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
+
+    // Update the ring buffer. Top byte of ThreadLong defines the size of the
+    // buffer in pages, it must be a power of two, and the start of the buffer
+    // must be aligned by twice that much. Therefore wrap around of the ring
+    // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
+    // The use of AShr instead of LShr is due to
+    //   https://bugs.llvm.org/show_bug.cgi?id=39030
+    // Runtime library makes sure not to use the highest bit.
+    Value *WrapMask = IRB.CreateXor(
+        IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
+        ConstantInt::get(IntptrTy, (uint64_t)-1));
+    Value *ThreadLongNew = IRB.CreateAnd(
+        IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
+    IRB.CreateStore(ThreadLongNew, SlotPtr);
+  }
 
+  // Get shadow base address by aligning RecordPtr up.
+  // Note: this is not correct if the pointer is already aligned.
+  // Runtime library will make sure this never happens.
+  Value *ShadowBase = IRB.CreateAdd(
+      IRB.CreateOr(
+          ThreadLongMaybeUntagged,
+          ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
+      ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
+  return ShadowBase;
+}
+
+bool HWAddressSanitizer::instrumentStack(
+    SmallVectorImpl<AllocaInst *> &Allocas,
+    SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
   // alloca addresses using that. Unfortunately, offsets are not known yet
   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
@@ -641,7 +897,7 @@ bool HWAddressSanitizer::instrumentStack(
   // This generates one extra instruction per alloca use.
   for (unsigned N = 0; N < Allocas.size(); ++N) {
     auto *AI = Allocas[N];
-    IRB.SetInsertPoint(AI->getNextNode());
+    IRBuilder<> IRB(AI->getNextNode());
 
     // Replace uses of the alloca with tagged address.
     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
@@ -696,12 +952,6 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
 
   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
 
-  initializeCallbacks(*F.getParent());
-
-  assert(!LocalDynamicShadow);
-  maybeInsertDynamicShadowAtFunctionEntry(F);
-
-  bool Changed = false;
   SmallVector<Instruction*, 16> ToInstrument;
   SmallVector<AllocaInst*, 8> AllocasToInstrument;
   SmallVector<Instruction*, 8> RetVec;
@@ -734,8 +984,28 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
     }
   }
 
-  if (!AllocasToInstrument.empty())
-    Changed |= instrumentStack(AllocasToInstrument, RetVec);
+  if (AllocasToInstrument.empty() && ToInstrument.empty())
+    return false;
+
+  if (ClCreateFrameDescriptions && !AllocasToInstrument.empty())
+    createFrameGlobal(F, createFrameString(AllocasToInstrument));
+
+  initializeCallbacks(*F.getParent());
+
+  assert(!LocalDynamicShadow);
+
+  Instruction *InsertPt = &*F.getEntryBlock().begin();
+  IRBuilder<> EntryIRB(InsertPt);
+  LocalDynamicShadow = emitPrologue(EntryIRB,
+                                    /*WithFrameRecord*/ ClRecordStackHistory &&
+                                        !AllocasToInstrument.empty());
+
+  bool Changed = false;
+  if (!AllocasToInstrument.empty()) {
+    Value *StackTag =
+        ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
+    Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
+  }
 
   for (auto Inst : ToInstrument)
     Changed |= instrumentMemAccess(Inst);
@@ -746,18 +1016,26 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
 }
 
 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
-  const bool IsAndroid = TargetTriple.isAndroid();
-  const bool IsAndroidWithIfuncSupport =
-      IsAndroid && !TargetTriple.isAndroidVersionLT(21);
-
   Scale = kDefaultShadowScale;
-
-  if (ClEnableKhwasan || ClInstrumentWithCalls || !IsAndroidWithIfuncSupport)
+  if (ClMappingOffset.getNumOccurrences() > 0) {
+    InGlobal = false;
+    InTls = false;
+    Offset = ClMappingOffset;
+  } else if (ClEnableKhwasan || ClInstrumentWithCalls) {
+    InGlobal = false;
+    InTls = false;
     Offset = 0;
-  else
+  } else if (ClWithIfunc) {
+    InGlobal = true;
+    InTls = false;
     Offset = kDynamicShadowSentinel;
-  if (ClMappingOffset.getNumOccurrences() > 0)
-    Offset = ClMappingOffset;
-
-  InGlobal = IsAndroidWithIfuncSupport;
+  } else if (ClWithTls) {
+    InGlobal = false;
+    InTls = true;
+    Offset = kDynamicShadowSentinel;
+  } else {
+    InGlobal = false;
+    InTls = false;
+    Offset = kDynamicShadowSentinel;
+  }
 }
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 27fb0e4393af..58436c8560ad 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -19,7 +19,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/IR/Attributes.h"
@@ -41,8 +41,8 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Error.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
@@ -269,7 +269,8 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
       LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
       ORE.emit([&]() {
         return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst)
-               << "Cannot promote indirect call: target not found";
+               << "Cannot promote indirect call: target with md5sum "
+               << ore::NV("target md5sum", Target) << " not found";
       });
       break;
     }
@@ -351,7 +352,7 @@ uint32_t ICallPromotionFunc::tryToPromote(
 bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
   bool Changed = false;
   ICallPromotionAnalysis ICallAnalysis;
-  for (auto &I : findIndirectCallSites(F)) {
+  for (auto &I : findIndirectCalls(F)) {
     uint32_t NumVals, NumCandidates;
     uint64_t TotalCount;
     auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
@@ -426,7 +427,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
 
 bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
   ProfileSummaryInfo *PSI =
-      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
   // Command-line option has the priority for InLTO.
   return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 4d5dfb0aa66b..15b94388cbe5 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -96,6 +96,11 @@ cl::opt<double> NumCountersPerValueSite(
     // is usually smaller than 2.
     cl::init(1.0));
 
+cl::opt<bool> AtomicCounterUpdateAll(
+    "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
+    cl::desc("Make all profile counter updates atomic (for testing only)"),
+    cl::init(false));
+
 cl::opt<bool> AtomicCounterUpdatePromoted(
     "atomic-counter-update-promoted", cl::ZeroOrMore,
     cl::desc("Do counter update using atomic fetch add "
@@ -597,12 +602,17 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
   IRBuilder<> Builder(Inc);
   uint64_t Index = Inc->getIndex()->getZExtValue();
   Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
-  Value *Load = Builder.CreateLoad(Addr, "pgocount");
-  auto *Count = Builder.CreateAdd(Load, Inc->getStep());
-  auto *Store = Builder.CreateStore(Count, Addr);
-  Inc->replaceAllUsesWith(Store);
-  if (isCounterPromotionEnabled())
-    PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
+
+  if (Options.Atomic || AtomicCounterUpdateAll) {
+    Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
+                            AtomicOrdering::Monotonic);
+  } else {
+    Value *Load = Builder.CreateLoad(Addr, "pgocount");
+    auto *Count = Builder.CreateAdd(Load, Inc->getStep());
+    auto *Store = Builder.CreateStore(Count, Addr);
+    if (isCounterPromotionEnabled())
+      PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
+  }
   Inc->eraseFromParent();
 }
 
@@ -691,6 +701,7 @@ static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
   // Use linker script magic to get data/cnts/name start/end.
   if (Triple(M.getTargetTriple()).isOSLinux() ||
       Triple(M.getTargetTriple()).isOSFreeBSD() ||
+      Triple(M.getTargetTriple()).isOSNetBSD() ||
       Triple(M.getTargetTriple()).isOSFuchsia() ||
       Triple(M.getTargetTriple()).isPS4CPU())
     return false;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8e9eea96ced7..c3e323613c70 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -14,7 +14,9 @@
 
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassRegistry.h"
 
@@ -53,21 +55,65 @@ BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
   return IP;
 }
 
+// Create a constant for Str so that we can pass it to the run-time lib.
+GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str,
+                                                   bool AllowMerging,
+                                                   const char *NamePrefix) {
+  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+  // We use private linkage for module-local strings. If they can be merged
+  // with another one, we set the unnamed_addr attribute.
+  GlobalVariable *GV =
+      new GlobalVariable(M, StrConst->getType(), true,
+                         GlobalValue::PrivateLinkage, StrConst, NamePrefix);
+  if (AllowMerging)
+    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
+  return GV;
+}
+
+Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
+                                        const std::string &ModuleId) {
+  if (auto Comdat = F.getComdat()) return Comdat;
+  assert(F.hasName());
+  Module *M = F.getParent();
+  std::string Name = F.getName();
+
+  // Make a unique comdat name for internal linkage things on ELF. On COFF, the
+  // name of the comdat group identifies the leader symbol of the comdat group.
+  // The linkage of the leader symbol is considered during comdat resolution,
+  // and internal symbols with the same name from different objects will not be
+  // merged.
+  if (T.isOSBinFormatELF() && F.hasLocalLinkage()) {
+    if (ModuleId.empty())
+      return nullptr;
+    Name += ModuleId;
+  }
+
+  // Make a new comdat for the function. Use the "no duplicates" selection kind
+  // for non-weak symbols if the object file format supports it.
+  Comdat *C = M->getOrInsertComdat(Name);
+  if (T.isOSBinFormatCOFF() && !F.isWeakForLinker())
+    C->setSelectionKind(Comdat::NoDuplicates);
+  F.setComdat(C);
+  return C;
+}
+
 /// initializeInstrumentation - Initialize all passes in the TransformUtils
 /// library.
 void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeAddressSanitizerPass(Registry);
   initializeAddressSanitizerModulePass(Registry);
   initializeBoundsCheckingLegacyPassPass(Registry);
+  initializeControlHeightReductionLegacyPassPass(Registry);
   initializeGCOVProfilerLegacyPassPass(Registry);
   initializePGOInstrumentationGenLegacyPassPass(Registry);
   initializePGOInstrumentationUseLegacyPassPass(Registry);
   initializePGOIndirectCallPromotionLegacyPassPass(Registry);
   initializePGOMemOPSizeOptLegacyPassPass(Registry);
   initializeInstrProfilingLegacyPassPass(Registry);
-  initializeMemorySanitizerPass(Registry);
+  initializeMemorySanitizerLegacyPassPass(Registry);
   initializeHWAddressSanitizerPass(Registry);
-  initializeThreadSanitizerPass(Registry);
+  initializeThreadSanitizerLegacyPassPass(Registry);
   initializeSanitizerCoverageModulePass(Registry);
   initializeDataFlowSanitizerPass(Registry);
   initializeEfficiencySanitizerPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4bcef6972786..e6573af2077d 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -89,9 +89,58 @@
 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
 /// value. It implements the store part as a simple atomic store by storing a
 /// clean shadow.
-//
+///
+///                      Instrumenting inline assembly.
+///
+/// For inline assembly code LLVM has little idea about which memory locations
+/// become initialized depending on the arguments. It can be possible to figure
+/// out which arguments are meant to point to inputs and outputs, but the
+/// actual semantics can be only visible at runtime. In the Linux kernel it's
+/// also possible that the arguments only indicate the offset for a base taken
+/// from a segment register, so it's dangerous to treat any asm() arguments as
+/// pointers. We take a conservative approach generating calls to
+///   __msan_instrument_asm_store(ptr, size)
+/// , which defer the memory unpoisoning to the runtime library.
+/// The latter can perform more complex address checks to figure out whether
+/// it's safe to touch the shadow memory.
+/// Like with atomic operations, we call __msan_instrument_asm_store() before
+/// the assembly call, so that changes to the shadow memory will be seen by
+/// other threads together with main memory initialization.
+///
+///                  KernelMemorySanitizer (KMSAN) implementation.
+///
+/// The major differences between KMSAN and MSan instrumentation are:
+///  - KMSAN always tracks the origins and implies msan-keep-going=true;
+///  - KMSAN allocates shadow and origin memory for each page separately, so
+///    there are no explicit accesses to shadow and origin in the
+///    instrumentation.
+///    Shadow and origin values for a particular X-byte memory location
+///    (X=1,2,4,8) are accessed through pointers obtained via the
+///      __msan_metadata_ptr_for_load_X(ptr)
+///      __msan_metadata_ptr_for_store_X(ptr)
+///    functions. The corresponding functions check that the X-byte accesses
+///    are possible and returns the pointers to shadow and origin memory.
+///    Arbitrary sized accesses are handled with:
+///      __msan_metadata_ptr_for_load_n(ptr, size)
+///      __msan_metadata_ptr_for_store_n(ptr, size);
+///  - TLS variables are stored in a single per-task struct. A call to a
+///    function __msan_get_context_state() returning a pointer to that struct
+///    is inserted into every instrumented function before the entry block;
+///  - __msan_warning() takes a 32-bit origin parameter;
+///  - local variables are poisoned with __msan_poison_alloca() upon function
+///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
+///    function;
+///  - the pass doesn't declare any global variables or add global constructors
+///    to the translation unit.
+///
+/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
+/// calls, making sure we're on the safe side wrt. possible false positives.
+///
+///  KernelMemorySanitizer only supports X86_64 at the moment.
+///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -101,7 +150,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -139,6 +187,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <algorithm>
 #include <cassert>
@@ -206,10 +255,13 @@ static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
 // passed into an assembly call. Note that this may cause false positives.
 // Because it's impossible to figure out the array sizes, we can only unpoison
 // the first sizeof(type) bytes for each type* pointer.
+// The instrumentation is only enabled in KMSAN builds, and only if
+// -msan-handle-asm-conservative is on. This is done because we may want to
+// quickly disable assembly instrumentation when it breaks.
 static cl::opt<bool> ClHandleAsmConservative(
     "msan-handle-asm-conservative",
     cl::desc("conservative handling of inline assembly"), cl::Hidden,
-    cl::init(false));
+    cl::init(true));
 
 // This flag controls whether we check the shadow of the address
 // operand of load or store. Such bugs are very rare, since load from
@@ -233,6 +285,11 @@ static cl::opt<int> ClInstrumentationWithCallThreshold(
         "inline checks (-1 means never use callbacks)."),
     cl::Hidden, cl::init(3500));
 
+static cl::opt<bool>
+    ClEnableKmsan("msan-kernel",
+                  cl::desc("Enable KernelMemorySanitizer instrumentation"),
+                  cl::Hidden, cl::init(false));
+
 // This is an experiment to enable handling of cases where shadow is a non-zero
 // compile-time constant. For some unexplainable reason they were silently
 // ignored in the instrumentation.
@@ -264,7 +321,6 @@ static cl::opt<unsigned long long> ClOriginBase("msan-origin-base",
        cl::desc("Define custom MSan OriginBase"),
        cl::Hidden, cl::init(0));
 
-static const char *const kMsanModuleCtorName = "msan.module_ctor";
 static const char *const kMsanInitName = "__msan_init";
 
 namespace {
@@ -390,29 +446,35 @@ static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
 
 namespace {
 
-/// An instrumentation pass implementing detection of uninitialized
-/// reads.
+/// Instrument functions of a module to detect uninitialized reads.
 ///
-/// MemorySanitizer: instrument the code in module to find
-/// uninitialized reads.
-class MemorySanitizer : public FunctionPass {
+/// Instantiating MemorySanitizer inserts the msan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __msan_init function is in the list of global constructors for
+/// the module.
+class MemorySanitizer {
 public:
-  // Pass identification, replacement for typeid.
-  static char ID;
-
-  MemorySanitizer(int TrackOrigins = 0, bool Recover = false)
-      : FunctionPass(ID),
-        TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
-        Recover(Recover || ClKeepGoing) {}
-
-  StringRef getPassName() const override { return "MemorySanitizer"; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  MemorySanitizer(Module &M, int TrackOrigins = 0, bool Recover = false,
+                  bool EnableKmsan = false) {
+    this->CompileKernel =
+        ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : EnableKmsan;
+    if (ClTrackOrigins.getNumOccurrences() > 0)
+      this->TrackOrigins = ClTrackOrigins;
+    else
+      this->TrackOrigins = this->CompileKernel ? 2 : TrackOrigins;
+    this->Recover = ClKeepGoing.getNumOccurrences() > 0
+                        ? ClKeepGoing
+                        : (this->CompileKernel | Recover);
+    initializeModule(M);
   }
 
-  bool runOnFunction(Function &F) override;
-  bool doInitialization(Module &M) override;
+  // MSan cannot be moved or copied because of MapParams.
+  MemorySanitizer(MemorySanitizer &&) = delete;
+  MemorySanitizer &operator=(MemorySanitizer &&) = delete;
+  MemorySanitizer(const MemorySanitizer &) = delete;
+  MemorySanitizer &operator=(const MemorySanitizer &) = delete;
+
+  bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
 
 private:
   friend struct MemorySanitizerVisitor;
@@ -421,9 +483,13 @@ private:
   friend struct VarArgAArch64Helper;
   friend struct VarArgPowerPC64Helper;
 
+  void initializeModule(Module &M);
   void initializeCallbacks(Module &M);
+  void createKernelApi(Module &M);
   void createUserspaceApi(Module &M);
 
+  /// True if we're compiling the Linux kernel.
+  bool CompileKernel;
   /// Track origins (allocation points) of uninitialized values.
   int TrackOrigins;
   bool Recover;
@@ -432,29 +498,39 @@ private:
   Type *IntptrTy;
   Type *OriginTy;
 
+  // XxxTLS variables represent the per-thread state in MSan and per-task state
+  // in KMSAN.
+  // For the userspace these point to thread-local globals. In the kernel land
+  // they point to the members of a per-task struct obtained via a call to
+  // __msan_get_context_state().
+
   /// Thread-local shadow storage for function parameters.
-  GlobalVariable *ParamTLS;
+  Value *ParamTLS;
 
   /// Thread-local origin storage for function parameters.
-  GlobalVariable *ParamOriginTLS;
+  Value *ParamOriginTLS;
 
   /// Thread-local shadow storage for function return value.
-  GlobalVariable *RetvalTLS;
+  Value *RetvalTLS;
 
   /// Thread-local origin storage for function return value.
-  GlobalVariable *RetvalOriginTLS;
+  Value *RetvalOriginTLS;
 
   /// Thread-local shadow storage for in-register va_arg function
   /// parameters (x86_64-specific).
-  GlobalVariable *VAArgTLS;
+  Value *VAArgTLS;
+
+  /// Thread-local shadow storage for in-register va_arg function
+  /// parameters (x86_64-specific).
+  Value *VAArgOriginTLS;
 
   /// Thread-local shadow storage for va_arg overflow area
   /// (x86_64-specific).
-  GlobalVariable *VAArgOverflowSizeTLS;
+  Value *VAArgOverflowSizeTLS;
 
   /// Thread-local space used to pass origin value to the UMR reporting
   /// function.
-  GlobalVariable *OriginTLS;
+  Value *OriginTLS;
 
   /// Are the instrumentation callbacks set up?
   bool CallbacksInitialized = false;
@@ -480,6 +556,22 @@ private:
   /// MSan runtime replacements for memmove, memcpy and memset.
   Value *MemmoveFn, *MemcpyFn, *MemsetFn;
 
+  /// KMSAN callback for task-local function argument shadow.
+  Value *MsanGetContextStateFn;
+
+  /// Functions for poisoning/unpoisoning local variables
+  Value *MsanPoisonAllocaFn, *MsanUnpoisonAllocaFn;
+
+  /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
+  /// pointers.
+  Value *MsanMetadataPtrForLoadN, *MsanMetadataPtrForStoreN;
+  Value *MsanMetadataPtrForLoad_1_8[4];
+  Value *MsanMetadataPtrForStore_1_8[4];
+  Value *MsanInstrumentAsmStoreFn;
+
+  /// Helper to choose between different MsanMetadataPtrXxx().
+  Value *getKmsanShadowOriginAccessFn(bool isStore, int size);
+
   /// Memory map parameters used in application-to-shadow calculation.
   const MemoryMapParams *MapParams;
 
@@ -494,24 +586,61 @@ private:
 
   /// An empty volatile inline asm that prevents callback merge.
   InlineAsm *EmptyAsm;
+};
+
+/// A legacy function pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads.
+struct MemorySanitizerLegacyPass : public FunctionPass {
+  // Pass identification, replacement for typeid.
+  static char ID;
 
-  Function *MsanCtorFunction;
+  MemorySanitizerLegacyPass(int TrackOrigins = 0, bool Recover = false,
+                            bool EnableKmsan = false)
+      : FunctionPass(ID), TrackOrigins(TrackOrigins), Recover(Recover),
+        EnableKmsan(EnableKmsan) {}
+  StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    return MSan->sanitizeFunction(
+        F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+  }
+  bool doInitialization(Module &M) override;
+
+  Optional<MemorySanitizer> MSan;
+  int TrackOrigins;
+  bool Recover;
+  bool EnableKmsan;
 };
 
 } // end anonymous namespace
 
-char MemorySanitizer::ID = 0;
+PreservedAnalyses MemorySanitizerPass::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+  MemorySanitizer Msan(*F.getParent(), TrackOrigins, Recover, EnableKmsan);
+  if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
 
-INITIALIZE_PASS_BEGIN(
-    MemorySanitizer, "msan",
-    "MemorySanitizer: detects uninitialized reads.", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
-    MemorySanitizer, "msan",
-    "MemorySanitizer: detects uninitialized reads.", false, false)
+char MemorySanitizerLegacyPass::ID = 0;
 
-FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins, bool Recover) {
-  return new MemorySanitizer(TrackOrigins, Recover);
+INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
+                      "MemorySanitizer: detects uninitialized reads.", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
+                    "MemorySanitizer: detects uninitialized reads.", false,
+                    false)
+
+FunctionPass *llvm::createMemorySanitizerLegacyPassPass(int TrackOrigins,
+                                                        bool Recover,
+                                                        bool CompileKernel) {
+  return new MemorySanitizerLegacyPass(TrackOrigins, Recover, CompileKernel);
 }
 
 /// Create a non-const global initialized with the given string.
@@ -526,6 +655,76 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
                             GlobalValue::PrivateLinkage, StrConst, "");
 }
 
+/// Create KMSAN API callbacks.
+void MemorySanitizer::createKernelApi(Module &M) {
+  IRBuilder<> IRB(*C);
+
+  // These will be initialized in insertKmsanPrologue().
+  RetvalTLS = nullptr;
+  RetvalOriginTLS = nullptr;
+  ParamTLS = nullptr;
+  ParamOriginTLS = nullptr;
+  VAArgTLS = nullptr;
+  VAArgOriginTLS = nullptr;
+  VAArgOverflowSizeTLS = nullptr;
+  // OriginTLS is unused in the kernel.
+  OriginTLS = nullptr;
+
+  // __msan_warning() in the kernel takes an origin.
+  WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
+                                    IRB.getInt32Ty());
+  // Requests the per-task context state (kmsan_context_state*) from the
+  // runtime library.
+  MsanGetContextStateFn = M.getOrInsertFunction(
+      "__msan_get_context_state",
+      PointerType::get(
+          StructType::get(ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+                          ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+                          ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+                          ArrayType::get(IRB.getInt64Ty(),
+                                         kParamTLSSize / 8), /* va_arg_origin */
+                          IRB.getInt64Ty(),
+                          ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+                          OriginTy),
+          0));
+
+  Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
+                                PointerType::get(IRB.getInt32Ty(), 0));
+
+  for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
+    std::string name_load =
+        "__msan_metadata_ptr_for_load_" + std::to_string(size);
+    std::string name_store =
+        "__msan_metadata_ptr_for_store_" + std::to_string(size);
+    MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
+        name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+    MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
+        name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+  }
+
+  MsanMetadataPtrForLoadN = M.getOrInsertFunction(
+      "__msan_metadata_ptr_for_load_n", RetTy,
+      PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+  MsanMetadataPtrForStoreN = M.getOrInsertFunction(
+      "__msan_metadata_ptr_for_store_n", RetTy,
+      PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+
+  // Functions for poisoning and unpoisoning memory.
+  MsanPoisonAllocaFn =
+      M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
+  MsanUnpoisonAllocaFn = M.getOrInsertFunction(
+      "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
+}
+
+static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
+  return M.getOrInsertGlobal(Name, Ty, [&] {
+    return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+                              nullptr, Name, nullptr,
+                              GlobalVariable::InitialExecTLSModel);
+  });
+}
+
 /// Insert declarations for userspace-specific functions and globals.
 void MemorySanitizer::createUserspaceApi(Module &M) {
   IRBuilder<> IRB(*C);
@@ -537,36 +736,31 @@ void MemorySanitizer::createUserspaceApi(Module &M) {
   WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
 
   // Create the global TLS variables.
-  RetvalTLS = new GlobalVariable(
-      M, ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8), false,
-      GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
-      GlobalVariable::InitialExecTLSModel);
-
-  RetvalOriginTLS = new GlobalVariable(
-      M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr,
-      "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
-
-  ParamTLS = new GlobalVariable(
-      M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
-      GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
-      GlobalVariable::InitialExecTLSModel);
-
-  ParamOriginTLS = new GlobalVariable(
-      M, ArrayType::get(OriginTy, kParamTLSSize / 4), false,
-      GlobalVariable::ExternalLinkage, nullptr, "__msan_param_origin_tls",
-      nullptr, GlobalVariable::InitialExecTLSModel);
-
-  VAArgTLS = new GlobalVariable(
-      M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
-      GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
-      GlobalVariable::InitialExecTLSModel);
-  VAArgOverflowSizeTLS = new GlobalVariable(
-      M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
-      "__msan_va_arg_overflow_size_tls", nullptr,
-      GlobalVariable::InitialExecTLSModel);
-  OriginTLS = new GlobalVariable(
-      M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
-      "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
+  RetvalTLS =
+      getOrInsertGlobal(M, "__msan_retval_tls",
+                        ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
+
+  RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
+
+  ParamTLS =
+      getOrInsertGlobal(M, "__msan_param_tls",
+                        ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+  ParamOriginTLS =
+      getOrInsertGlobal(M, "__msan_param_origin_tls",
+                        ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+  VAArgTLS =
+      getOrInsertGlobal(M, "__msan_va_arg_tls",
+                        ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+  VAArgOriginTLS =
+      getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
+                        ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+  VAArgOverflowSizeTLS =
+      getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
+  OriginTLS = getOrInsertGlobal(M, "__msan_origin_tls", IRB.getInt32Ty());
 
   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
        AccessSizeIndex++) {
@@ -615,14 +809,37 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
 
-  createUserspaceApi(M);
+  MsanInstrumentAsmStoreFn =
+      M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
+                            PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
+
+  if (CompileKernel) {
+    createKernelApi(M);
+  } else {
+    createUserspaceApi(M);
+  }
   CallbacksInitialized = true;
 }
 
+Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
+  Value **Fns =
+      isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
+  switch (size) {
+  case 1:
+    return Fns[0];
+  case 2:
+    return Fns[1];
+  case 4:
+    return Fns[2];
+  case 8:
+    return Fns[3];
+  default:
+    return nullptr;
+  }
+}
+
 /// Module-level initialization.
-///
-/// inserts a call to __msan_init to the module's constructor list.
-bool MemorySanitizer::doInitialization(Module &M) {
+void MemorySanitizer::initializeModule(Module &M) {
   auto &DL = M.getDataLayout();
 
   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
@@ -695,27 +912,27 @@ bool MemorySanitizer::doInitialization(Module &M) {
   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
 
-  std::tie(MsanCtorFunction, std::ignore) =
-      createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName,
-                                          /*InitArgTypes=*/{},
-                                          /*InitArgs=*/{});
-  if (ClWithComdat) {
-    Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
-    MsanCtorFunction->setComdat(MsanCtorComdat);
-    appendToGlobalCtors(M, MsanCtorFunction, 0, MsanCtorFunction);
-  } else {
-    appendToGlobalCtors(M, MsanCtorFunction, 0);
-  }
-
-
-  if (TrackOrigins)
-    new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
-                       IRB.getInt32(TrackOrigins), "__msan_track_origins");
-
-  if (Recover)
-    new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
-                       IRB.getInt32(Recover), "__msan_keep_going");
+  if (!CompileKernel) {
+    getOrCreateInitFunction(M, kMsanInitName);
+
+    if (TrackOrigins)
+      M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
+        return new GlobalVariable(
+            M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+            IRB.getInt32(TrackOrigins), "__msan_track_origins");
+      });
+
+    if (Recover)
+      M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
+        return new GlobalVariable(M, IRB.getInt32Ty(), true,
+                                  GlobalValue::WeakODRLinkage,
+                                  IRB.getInt32(Recover), "__msan_keep_going");
+      });
+}
+}
 
+bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
+  MSan.emplace(M, TrackOrigins, Recover, EnableKmsan);
   return true;
 }
 
@@ -796,8 +1013,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
   SmallVector<StoreInst *, 16> StoreList;
 
-  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
-      : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
+                         const TargetLibraryInfo &TLI)
+      : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
     InsertChecks = SanitizeFunction;
     PropagateShadow = SanitizeFunction;
@@ -806,10 +1024,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // FIXME: Consider using SpecialCaseList to specify a list of functions that
     // must always return fully initialized values. For now, we hardcode "main".
     CheckReturnValue = SanitizeFunction && (F.getName() == "main");
-    TLI = &MS.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
     MS.initializeCallbacks(*F.getParent());
-    ActualFnStart = &F.getEntryBlock();
+    if (MS.CompileKernel)
+      ActualFnStart = insertKmsanPrologue(F);
+    else
+      ActualFnStart = &F.getEntryBlock();
 
     LLVM_DEBUG(if (!InsertChecks) dbgs()
                << "MemorySanitizer is not inserting checks into '"
@@ -883,7 +1103,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       unsigned TypeSizeInBits =
           DL.getTypeSizeInBits(ConvertedShadow->getType());
       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
-      if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+      if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
         Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
         Value *ConvertedShadow2 = IRB.CreateZExt(
             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
@@ -932,10 +1152,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
     if (!Origin)
       Origin = (Value *)IRB.getInt32(0);
-    if (MS.TrackOrigins) {
-      IRB.CreateStore(Origin, MS.OriginTLS);
+    if (MS.CompileKernel) {
+      IRB.CreateCall(MS.WarningFn, Origin);
+    } else {
+      if (MS.TrackOrigins) {
+        IRB.CreateStore(Origin, MS.OriginTLS);
+      }
+      IRB.CreateCall(MS.WarningFn, {});
     }
-    IRB.CreateCall(MS.WarningFn, {});
     IRB.CreateCall(MS.EmptyAsm, {});
     // FIXME: Insert UnreachableInst if !MS.Recover?
     // This may invalidate some of the following checks and needs to be done
@@ -961,7 +1185,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
-    if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+    if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
       Value *Fn = MS.MaybeWarningFn[SizeIndex];
       Value *ConvertedShadow2 =
           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
@@ -991,6 +1215,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
   }
 
+  BasicBlock *insertKmsanPrologue(Function &F) {
+    BasicBlock *ret =
+        SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
+    IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+    Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
+    Constant *Zero = IRB.getInt32(0);
+    MS.ParamTLS =
+        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(0)}, "param_shadow");
+    MS.RetvalTLS =
+        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(1)}, "retval_shadow");
+    MS.VAArgTLS =
+        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+    MS.VAArgOriginTLS =
+        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(3)}, "va_arg_origin");
+    MS.VAArgOverflowSizeTLS = IRB.CreateGEP(
+        ContextState, {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+    MS.ParamOriginTLS =
+        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(5)}, "param_origin");
+    MS.RetvalOriginTLS =
+        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(6)}, "retval_origin");
+    return ret;
+  }
+
   /// Add MemorySanitizer instrumentation to a function.
   bool runOnFunction() {
     // In the presence of unreachable blocks, we may see Phi nodes with
@@ -1139,12 +1386,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return std::make_pair(ShadowPtr, OriginPtr);
   }
 
+  std::pair<Value *, Value *>
+  getShadowOriginPtrKernel(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
+                           unsigned Alignment, bool isStore) {
+    Value *ShadowOriginPtrs;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    int Size = DL.getTypeStoreSize(ShadowTy);
+
+    Value *Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+    Value *AddrCast =
+        IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
+    if (Getter) {
+      ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
+    } else {
+      Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+      ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
+                                                : MS.MsanMetadataPtrForLoadN,
+                                        {AddrCast, SizeVal});
+    }
+    Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
+    ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
+    Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
+
+    return std::make_pair(ShadowPtr, OriginPtr);
+  }
+
   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
                                                  Type *ShadowTy,
                                                  unsigned Alignment,
                                                  bool isStore) {
-    std::pair<Value *, Value *> ret =
-        getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
+    std::pair<Value *, Value *> ret;
+    if (MS.CompileKernel)
+      ret = getShadowOriginPtrKernel(Addr, IRB, ShadowTy, Alignment, isStore);
+    else
+      ret = getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
     return ret;
   }
 
@@ -1163,7 +1438,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   /// Compute the origin address for a given function argument.
   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
                                  int ArgOffset) {
-    if (!MS.TrackOrigins) return nullptr;
+    if (!MS.TrackOrigins)
+      return nullptr;
     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
     if (ArgOffset)
       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
@@ -1303,6 +1579,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
                                    /*isStore*/ true)
                     .first;
+            // TODO(glider): need to copy origins.
             if (Overflow) {
               // ParamTLS overflow.
               EntryIRB.CreateMemSet(
@@ -2850,6 +3127,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorComparePackedIntrinsic(I);
       break;
 
+    case Intrinsic::is_constant:
+      // The result of llvm.is.constant() is always defined.
+      setShadow(&I, getCleanShadow(&I));
+      setOrigin(&I, getCleanOrigin());
+      break;
+
     default:
       if (!handleUnknownIntrinsic(I))
         visitInstruction(I);
@@ -2868,7 +3151,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       // outputs as clean. Note that any side effects of the inline asm that are
       // not immediately visible in its constraints are not handled.
       if (Call->isInlineAsm()) {
-        if (ClHandleAsmConservative)
+        if (ClHandleAsmConservative && MS.CompileKernel)
           visitAsmInstruction(I);
         else
           visitInstruction(I);
@@ -2921,12 +3204,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         if (ArgOffset + Size > kParamTLSSize) break;
         unsigned ParamAlignment = CS.getParamAlignment(i);
         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
-        Value *AShadowPtr = getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
-                                               Alignment, /*isStore*/ false)
-                                .first;
+        Value *AShadowPtr =
+            getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ false)
+                .first;
 
         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
                                  Alignment, Size);
+        // TODO(glider): need to copy origins.
       } else {
         Size = DL.getTypeAllocSize(A->getType());
         if (ArgOffset + Size > kParamTLSSize) break;
@@ -2945,8 +3230,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
     LLVM_DEBUG(dbgs() << "  done with call args\n");
 
-    FunctionType *FT =
-      cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
+    FunctionType *FT = CS.getFunctionType();
     if (FT->isVarArg()) {
       VAHelper->visitCallSite(CS, IRB);
     }
@@ -3033,40 +3317,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                   "_msphi_o"));
   }
 
-  void visitAllocaInst(AllocaInst &I) {
-    setShadow(&I, getCleanShadow(&I));
-    setOrigin(&I, getCleanOrigin());
-    IRBuilder<> IRB(I.getNextNode());
-    const DataLayout &DL = F.getParent()->getDataLayout();
-    uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
-    Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
-    if (I.isArrayAllocation())
-      Len = IRB.CreateMul(Len, I.getArraySize());
+  Value *getLocalVarDescription(AllocaInst &I) {
+    SmallString<2048> StackDescriptionStorage;
+    raw_svector_ostream StackDescription(StackDescriptionStorage);
+    // We create a string with a description of the stack allocation and
+    // pass it into __msan_set_alloca_origin.
+    // It will be printed by the run-time if stack-originated UMR is found.
+    // The first 4 bytes of the string are set to '----' and will be replaced
+    // by __msan_va_arg_overflow_size_tls at the first call.
+    StackDescription << "----" << I.getName() << "@" << F.getName();
+    return createPrivateNonConstGlobalForString(*F.getParent(),
+                                                StackDescription.str());
+  }
+
+  void instrumentAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
     if (PoisonStack && ClPoisonStackWithCall) {
       IRB.CreateCall(MS.MsanPoisonStackFn,
                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
     } else {
-      Value *ShadowBase = getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(),
-                                             I.getAlignment(), /*isStore*/ true)
-                              .first;
+      Value *ShadowBase, *OriginBase;
+      std::tie(ShadowBase, OriginBase) =
+          getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), 1, /*isStore*/ true);
 
       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
     }
 
     if (PoisonStack && MS.TrackOrigins) {
-      SmallString<2048> StackDescriptionStorage;
-      raw_svector_ostream StackDescription(StackDescriptionStorage);
-      // We create a string with a description of the stack allocation and
-      // pass it into __msan_set_alloca_origin.
-      // It will be printed by the run-time if stack-originated UMR is found.
-      // The first 4 bytes of the string are set to '----' and will be replaced
-      // by __msan_va_arg_overflow_size_tls at the first call.
-      StackDescription << "----" << I.getName() << "@" << F.getName();
-      Value *Descr =
-          createPrivateNonConstGlobalForString(*F.getParent(),
-                                               StackDescription.str());
-
+      Value *Descr = getLocalVarDescription(I);
       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
@@ -3074,6 +3352,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
+  void instrumentAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+    Value *Descr = getLocalVarDescription(I);
+    if (PoisonStack) {
+      IRB.CreateCall(MS.MsanPoisonAllocaFn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
+    } else {
+      IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+    }
+  }
+
+  void visitAllocaInst(AllocaInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+    IRBuilder<> IRB(I.getNextNode());
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
+    Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
+    if (I.isArrayAllocation())
+      Len = IRB.CreateMul(Len, I.getArraySize());
+
+    if (MS.CompileKernel)
+      instrumentAllocaKmsan(I, IRB, Len);
+    else
+      instrumentAllocaUserspace(I, IRB, Len);
+  }
+
   void visitSelectInst(SelectInst& I) {
     IRBuilder<> IRB(&I);
     // a = select b, c, d
@@ -3196,37 +3502,95 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Nothing to do here.
   }
 
+  void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
+                             const DataLayout &DL, bool isOutput) {
+    // For each assembly argument, we check its value for being initialized.
+    // If the argument is a pointer, we assume it points to a single element
+    // of the corresponding type (or to a 8-byte word, if the type is unsized).
+    // Each such pointer is instrumented with a call to the runtime library.
+    Type *OpType = Operand->getType();
+    // Check the operand value itself.
+    insertShadowCheck(Operand, &I);
+    if (!OpType->isPointerTy() || !isOutput) {
+      assert(!isOutput);
+      return;
+    }
+    Type *ElType = OpType->getPointerElementType();
+    if (!ElType->isSized())
+      return;
+    int Size = DL.getTypeStoreSize(ElType);
+    Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
+    Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+    IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
+  }
+
+  /// Get the number of output arguments returned by pointers.
+  int getNumOutputArgs(InlineAsm *IA, CallInst *CI) {
+    int NumRetOutputs = 0;
+    int NumOutputs = 0;
+    Type *RetTy = dyn_cast<Value>(CI)->getType();
+    if (!RetTy->isVoidTy()) {
+      // Register outputs are returned via the CallInst return value.
+      StructType *ST = dyn_cast_or_null<StructType>(RetTy);
+      if (ST)
+        NumRetOutputs = ST->getNumElements();
+      else
+        NumRetOutputs = 1;
+    }
+    InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+    for (size_t i = 0, n = Constraints.size(); i < n; i++) {
+      InlineAsm::ConstraintInfo Info = Constraints[i];
+      switch (Info.Type) {
+      case InlineAsm::isOutput:
+        NumOutputs++;
+        break;
+      default:
+        break;
+      }
+    }
+    return NumOutputs - NumRetOutputs;
+  }
+
   void visitAsmInstruction(Instruction &I) {
     // Conservative inline assembly handling: check for poisoned shadow of
     // asm() arguments, then unpoison the result and all the memory locations
     // pointed to by those arguments.
+    // An inline asm() statement in C++ contains lists of input and output
+    // arguments used by the assembly code. These are mapped to operands of the
+    // CallInst as follows:
+    //  - nR register outputs ("=r) are returned by value in a single structure
+    //  (SSA value of the CallInst);
+    //  - nO other outputs ("=m" and others) are returned by pointer as first
+    // nO operands of the CallInst;
+    //  - nI inputs ("r", "m" and others) are passed to CallInst as the
+    // remaining nI operands.
+    // The total number of asm() arguments in the source is nR+nO+nI, and the
+    // corresponding CallInst has nO+nI+1 operands (the last operand is the
+    // function to be called).
+    const DataLayout &DL = F.getParent()->getDataLayout();
     CallInst *CI = dyn_cast<CallInst>(&I);
-
-    for (size_t i = 0, n = CI->getNumOperands(); i < n; i++) {
+    IRBuilder<> IRB(&I);
+    InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+    int OutputArgs = getNumOutputArgs(IA, CI);
+    // The last operand of a CallInst is the function itself.
+    int NumOperands = CI->getNumOperands() - 1;
+
+    // Check input arguments. Doing so before unpoisoning output arguments, so
+    // that we won't overwrite uninit values before checking them.
+    for (int i = OutputArgs; i < NumOperands; i++) {
       Value *Operand = CI->getOperand(i);
-      if (Operand->getType()->isSized())
-        insertShadowCheck(Operand, &I);
+      instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
     }
-    setShadow(&I, getCleanShadow(&I));
-    setOrigin(&I, getCleanOrigin());
-    IRBuilder<> IRB(&I);
-    IRB.SetInsertPoint(I.getNextNode());
-    for (size_t i = 0, n = CI->getNumOperands(); i < n; i++) {
+    // Unpoison output arguments. This must happen before the actual InlineAsm
+    // call, so that the shadow for memory published in the asm() statement
+    // remains valid.
+    for (int i = 0; i < OutputArgs; i++) {
       Value *Operand = CI->getOperand(i);
-      Type *OpType = Operand->getType();
-      if (!OpType->isPointerTy())
-        continue;
-      Type *ElType = OpType->getPointerElementType();
-      if (!ElType->isSized())
-        continue;
-      Value *ShadowPtr, *OriginPtr;
-      std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
-          Operand, IRB, ElType, /*Alignment*/ 1, /*isStore*/ true);
-      Value *CShadow = getCleanShadow(ElType);
-      IRB.CreateStore(
-          CShadow,
-          IRB.CreatePointerCast(ShadowPtr, CShadow->getType()->getPointerTo()));
+      instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
     }
+
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
   }
 
   void visitInstruction(Instruction &I) {
@@ -3249,12 +3613,16 @@ struct VarArgAMD64Helper : public VarArgHelper {
   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
   // See a comment in visitCallSite for more details.
   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
-  static const unsigned AMD64FpEndOffset = 176;
+  static const unsigned AMD64FpEndOffsetSSE = 176;
+  // If SSE is disabled, fp_offset in va_list is zero.
+  static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
 
+  unsigned AMD64FpEndOffset;
   Function &F;
   MemorySanitizer &MS;
   MemorySanitizerVisitor &MSV;
   Value *VAArgTLSCopy = nullptr;
+  Value *VAArgTLSOriginCopy = nullptr;
   Value *VAArgOverflowSize = nullptr;
 
   SmallVector<CallInst*, 16> VAStartInstrumentationList;
@@ -3262,7 +3630,18 @@ struct VarArgAMD64Helper : public VarArgHelper {
   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
 
   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
-                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+                    MemorySanitizerVisitor &MSV)
+      : F(F), MS(MS), MSV(MSV) {
+    AMD64FpEndOffset = AMD64FpEndOffsetSSE;
+    for (const auto &Attr : F.getAttributes().getFnAttributes()) {
+      if (Attr.isStringAttribute() &&
+          (Attr.getKindAsString() == "target-features")) {
+        if (Attr.getValueAsString().contains("-sse"))
+          AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
+        break;
+      }
+    }
+  }
 
   ArgKind classifyArgument(Value* arg) {
     // A very rough approximation of X86_64 argument classification rules.
@@ -3304,9 +3683,14 @@ struct VarArgAMD64Helper : public VarArgHelper {
         assert(A->getType()->isPointerTy());
         Type *RealTy = A->getType()->getPointerElementType();
         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
-        Value *ShadowBase =
-            getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
+        Value *ShadowBase = getShadowPtrForVAArgument(
+            RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
+        Value *OriginBase = nullptr;
+        if (MS.TrackOrigins)
+          OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
         OverflowOffset += alignTo(ArgSize, 8);
+        if (!ShadowBase)
+          continue;
         Value *ShadowPtr, *OriginPtr;
         std::tie(ShadowPtr, OriginPtr) =
             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
@@ -3314,20 +3698,31 @@ struct VarArgAMD64Helper : public VarArgHelper {
 
         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
                          kShadowTLSAlignment, ArgSize);
+        if (MS.TrackOrigins)
+          IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
+                           kShadowTLSAlignment, ArgSize);
       } else {
         ArgKind AK = classifyArgument(A);
         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
           AK = AK_Memory;
         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
           AK = AK_Memory;
-        Value *ShadowBase;
+        Value *ShadowBase, *OriginBase = nullptr;
         switch (AK) {
           case AK_GeneralPurpose:
-            ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
+            ShadowBase =
+                getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
             GpOffset += 8;
             break;
           case AK_FloatingPoint:
-            ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
+            ShadowBase =
+                getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
             FpOffset += 16;
             break;
           case AK_Memory:
@@ -3335,15 +3730,27 @@ struct VarArgAMD64Helper : public VarArgHelper {
               continue;
             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
             ShadowBase =
-                getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+                getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
             OverflowOffset += alignTo(ArgSize, 8);
         }
         // Take fixed arguments into account for GpOffset and FpOffset,
         // but don't actually store shadows for them.
+        // TODO(glider): don't call get*PtrForVAArgument() for them.
         if (IsFixed)
           continue;
-        IRB.CreateAlignedStore(MSV.getShadow(A), ShadowBase,
-                               kShadowTLSAlignment);
+        if (!ShadowBase)
+          continue;
+        Value *Shadow = MSV.getShadow(A);
+        IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
+        if (MS.TrackOrigins) {
+          Value *Origin = MSV.getOrigin(A);
+          unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+          MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
+                          std::max(kShadowTLSAlignment, kMinOriginAlignment));
+        }
       }
     }
     Constant *OverflowSize =
@@ -3353,11 +3760,25 @@ struct VarArgAMD64Helper : public VarArgHelper {
 
   /// Compute the shadow address for a given va_arg.
   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
-                                   int ArgOffset) {
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
-                              "_msarg");
+                              "_msarg_va_s");
+  }
+
+  /// Compute the origin address for a given va_arg.
+  Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+    // getOriginPtrForVAArgument() is always called after
+    // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
+    // overflow.
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_va_o");
   }
 
   void unpoisonVAListTagForInst(IntrinsicInst &I) {
@@ -3402,6 +3823,10 @@ struct VarArgAMD64Helper : public VarArgHelper {
                       VAArgOverflowSize);
       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
       IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+      if (MS.TrackOrigins) {
+        VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+        IRB.CreateMemCpy(VAArgTLSOriginCopy, 8, MS.VAArgOriginTLS, 8, CopySize);
+      }
     }
 
     // Instrument va_start.
@@ -3423,6 +3848,9 @@ struct VarArgAMD64Helper : public VarArgHelper {
                                  Alignment, /*isStore*/ true);
       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
                        AMD64FpEndOffset);
+      if (MS.TrackOrigins)
+        IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
+                         Alignment, AMD64FpEndOffset);
       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
                         ConstantInt::get(MS.IntptrTy, 8)),
@@ -3436,6 +3864,12 @@ struct VarArgAMD64Helper : public VarArgHelper {
                                              AMD64FpEndOffset);
       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
                        VAArgOverflowSize);
+      if (MS.TrackOrigins) {
+        SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
+                                        AMD64FpEndOffset);
+        IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
+                         VAArgOverflowSize);
+      }
     }
   }
 };
@@ -3469,9 +3903,11 @@ struct VarArgMIPS64Helper : public VarArgHelper {
         if (ArgSize < 8)
           VAArgOffset += (8 - ArgSize);
       }
-      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset);
+      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
       VAArgOffset += ArgSize;
       VAArgOffset = alignTo(VAArgOffset, 8);
+      if (!Base)
+        continue;
       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
     }
 
@@ -3483,7 +3919,10 @@ struct VarArgMIPS64Helper : public VarArgHelper {
 
   /// Compute the shadow address for a given va_arg.
   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
-                                   int ArgOffset) {
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
@@ -3614,11 +4053,11 @@ struct VarArgAArch64Helper : public VarArgHelper {
       Value *Base;
       switch (AK) {
         case AK_GeneralPurpose:
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
           GrOffset += 8;
           break;
         case AK_FloatingPoint:
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
           VrOffset += 16;
           break;
         case AK_Memory:
@@ -3627,7 +4066,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
           if (IsFixed)
             continue;
           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
+                                           alignTo(ArgSize, 8));
           OverflowOffset += alignTo(ArgSize, 8);
           break;
       }
@@ -3635,6 +4075,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
       // bother to actually store a shadow.
       if (IsFixed)
         continue;
+      if (!Base)
+        continue;
       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
     }
     Constant *OverflowSize =
@@ -3644,7 +4086,10 @@ struct VarArgAArch64Helper : public VarArgHelper {
 
   /// Compute the shadow address for a given va_arg.
   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
-                                   int ArgOffset) {
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
@@ -3849,14 +4294,17 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
           ArgAlign = 8;
         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
         if (!IsFixed) {
-          Value *Base = getShadowPtrForVAArgument(RealTy, IRB,
-                                                  VAArgOffset - VAArgBase);
-          Value *AShadowPtr, *AOriginPtr;
-          std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr(
-              A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment, /*isStore*/ false);
-
-          IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
-                           kShadowTLSAlignment, ArgSize);
+          Value *Base = getShadowPtrForVAArgument(
+              RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
+          if (Base) {
+            Value *AShadowPtr, *AOriginPtr;
+            std::tie(AShadowPtr, AOriginPtr) =
+                MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
+                                       kShadowTLSAlignment, /*isStore*/ false);
+
+            IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
+                             kShadowTLSAlignment, ArgSize);
+          }
         }
         VAArgOffset += alignTo(ArgSize, 8);
       } else {
@@ -3884,8 +4332,9 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
         }
         if (!IsFixed) {
           Base = getShadowPtrForVAArgument(A->getType(), IRB,
-                                           VAArgOffset - VAArgBase);
-          IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+                                           VAArgOffset - VAArgBase, ArgSize);
+          if (Base)
+            IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
         }
         VAArgOffset += ArgSize;
         VAArgOffset = alignTo(VAArgOffset, 8);
@@ -3903,7 +4352,10 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
 
   /// Compute the shadow address for a given va_arg.
   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
-                                   int ArgOffset) {
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
@@ -4005,10 +4457,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
     return new VarArgNoOpHelper(Func, Msan, Visitor);
 }
 
-bool MemorySanitizer::runOnFunction(Function &F) {
-  if (&F == MsanCtorFunction)
-    return false;
-  MemorySanitizerVisitor Visitor(F, *this);
+bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+  MemorySanitizerVisitor Visitor(F, *this, TLI);
 
   // Clear out readonly/readnone attributes.
   AttrBuilder B;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 307b7eaa2196..f043325f5bba 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -63,7 +63,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Attributes.h"
@@ -141,6 +141,11 @@ static cl::opt<std::string>
                        cl::value_desc("filename"),
                        cl::desc("Specify the path of profile data file. This is"
                                 "mainly for test purpose."));
+static cl::opt<std::string> PGOTestProfileRemappingFile(
+    "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
+    cl::value_desc("filename"),
+    cl::desc("Specify the path of profile remapping file. This is mainly for "
+             "test purpose."));
 
 // Command line option to disable value profiling. The default is false:
 // i.e. value profiling is enabled by default. This is for debug purpose.
@@ -539,7 +544,7 @@ public:
     MIVisitor.countMemIntrinsics(Func);
     NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
     NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
-    ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func);
+    ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
     ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
 
     FuncName = getPGOFuncName(F);
@@ -581,7 +586,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
   std::vector<char> Indexes;
   JamCRC JC;
   for (auto &BB : F) {
-    const TerminatorInst *TI = BB.getTerminator();
+    const Instruction *TI = BB.getTerminator();
     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
       BasicBlock *Succ = TI->getSuccessor(I);
       auto BI = findBBInfo(Succ);
@@ -693,7 +698,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
 
   // Instrument the SrcBB if it has a single successor,
   // otherwise, the DestBB if this is not a critical edge.
-  TerminatorInst *TI = SrcBB->getTerminator();
+  Instruction *TI = SrcBB->getTerminator();
   if (TI->getNumSuccessors() <= 1)
     return SrcBB;
   if (!E->IsCritical)
@@ -749,12 +754,12 @@ static void instrumentOneFunc(
   if (DisableValueProfiling)
     return;
 
-  unsigned NumIndirectCallSites = 0;
+  unsigned NumIndirectCalls = 0;
   for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
     CallSite CS(I);
     Value *Callee = CS.getCalledValue();
     LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
-                      << NumIndirectCallSites << "\n");
+                      << NumIndirectCalls << "\n");
     IRBuilder<> Builder(I);
     assert(Builder.GetInsertPoint() != I->getParent()->end() &&
            "Cannot get the Instrumentation point");
@@ -764,9 +769,9 @@ static void instrumentOneFunc(
          Builder.getInt64(FuncInfo.FunctionHash),
          Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
          Builder.getInt32(IPVK_IndirectCallTarget),
-         Builder.getInt32(NumIndirectCallSites++)});
+         Builder.getInt32(NumIndirectCalls++)});
   }
-  NumOfPGOICall += NumIndirectCallSites;
+  NumOfPGOICall += NumIndirectCalls;
 
   // Now instrument memop intrinsic calls.
   FuncInfo.MIVisitor.instrumentMemIntrinsics(
@@ -854,7 +859,7 @@ public:
         FreqAttr(FFA_Normal) {}
 
   // Read counts for the instrumented BB from profile.
-  bool readCounters(IndexedInstrProfReader *PGOReader);
+  bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
 
   // Populate the counts for all BBs.
   void populateCounters();
@@ -899,6 +904,7 @@ public:
     FuncInfo.dumpInfo(Str);
   }
 
+  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
 private:
   Function &F;
   Module *M;
@@ -1008,7 +1014,7 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
 // Read the profile from ProfileFileName and assign the value to the
 // instrumented BB and the edges. This function also updates ProgramMaxCount.
 // Return true if the profile are successfully read, and false on errors.
-bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
   auto &Ctx = M->getContext();
   Expected<InstrProfRecord> Result =
       PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
@@ -1048,6 +1054,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
     LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
     ValueSum += CountFromProfile[I];
   }
+  AllZeros = (ValueSum == 0);
 
   LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
 
@@ -1162,7 +1169,7 @@ void PGOUseFunc::setBranchWeights() {
   // Generate MD_prof metadata for every branch instruction.
   LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n");
   for (auto &BB : F) {
-    TerminatorInst *TI = BB.getTerminator();
+    Instruction *TI = BB.getTerminator();
     if (TI->getNumSuccessors() < 2)
       continue;
     if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
@@ -1208,7 +1215,7 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
     // to become an irreducible loop header after the indirectbr tail
     // duplication.
     if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
-      TerminatorInst *TI = BB.getTerminator();
+      Instruction *TI = BB.getTerminator();
       const UseBBInfo &BBCountInfo = getBBInfo(&BB);
       setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
     }
@@ -1429,13 +1436,14 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
 }
 
 static bool annotateAllFunctions(
-    Module &M, StringRef ProfileFileName,
+    Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
   auto &Ctx = M.getContext();
   // Read the counter array from file.
-  auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName);
+  auto ReaderOrErr =
+      IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
   if (Error E = ReaderOrErr.takeError()) {
     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
       Ctx.diagnose(
@@ -1471,8 +1479,15 @@ static bool annotateAllFunctions(
     // later in getInstrBB() to avoid invalidating it.
     SplitIndirectBrCriticalEdges(F, BPI, BFI);
     PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
-    if (!Func.readCounters(PGOReader.get()))
+    bool AllZeros = false;
+    if (!Func.readCounters(PGOReader.get(), AllZeros))
       continue;
+    if (AllZeros) {
+      F.setEntryCount(ProfileCount(0, Function::PCT_Real));
+      if (Func.getProgramMaxCount() != 0)
+        ColdFunctions.push_back(&F);
+      continue;
+    }
     Func.populateCounters();
     Func.setBranchWeights();
     Func.annotateValueSites();
@@ -1529,10 +1544,14 @@ static bool annotateAllFunctions(
   return true;
 }
 
-PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename)
-    : ProfileFileName(std::move(Filename)) {
+PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
+                                             std::string RemappingFilename)
+    : ProfileFileName(std::move(Filename)),
+      ProfileRemappingFileName(std::move(RemappingFilename)) {
   if (!PGOTestProfileFile.empty())
     ProfileFileName = PGOTestProfileFile;
+  if (!PGOTestProfileRemappingFile.empty())
+    ProfileRemappingFileName = PGOTestProfileRemappingFile;
 }
 
 PreservedAnalyses PGOInstrumentationUse::run(Module &M,
@@ -1547,7 +1566,8 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
 
-  if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI))
+  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
+                            LookupBPI, LookupBFI))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
@@ -1564,7 +1584,7 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
   };
 
-  return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI);
+  return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI);
 }
 
 static std::string getSimpleNodeName(const BasicBlock *Node) {
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index a4dd48c8dd6a..0ba8d5765e8c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
@@ -211,8 +212,8 @@ private:
                              bool IsLeafFunc = true);
   Function *CreateInitCallsForSections(Module &M, const char *InitFunctionName,
                                        Type *Ty, const char *Section);
-  std::pair<GlobalVariable *, GlobalVariable *>
-  CreateSecStartEnd(Module &M, const char *Section, Type *Ty);
+  std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
+                                                Type *Ty);
 
   void SetNoSanitizeMetadata(Instruction *I) {
     I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
@@ -234,6 +235,7 @@ private:
   Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
       *Int16Ty, *Int8Ty, *Int8PtrTy;
   Module *CurModule;
+  std::string CurModuleUniqueId;
   Triple TargetTriple;
   LLVMContext *C;
   const DataLayout *DL;
@@ -249,7 +251,7 @@ private:
 
 } // namespace
 
-std::pair<GlobalVariable *, GlobalVariable *>
+std::pair<Value *, Value *>
 SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
                                            Type *Ty) {
   GlobalVariable *SecStart =
@@ -260,22 +262,28 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
       new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
                          nullptr, getSectionEnd(Section));
   SecEnd->setVisibility(GlobalValue::HiddenVisibility);
-
-  return std::make_pair(SecStart, SecEnd);
+  IRBuilder<> IRB(M.getContext());
+  Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty);
+  if (!TargetTriple.isOSBinFormatCOFF())
+    return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr);
+
+  // Account for the fact that on windows-msvc __start_* symbols actually
+  // point to a uint64_t before the start of the array.
+  auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
+  auto GEP = IRB.CreateGEP(SecStartI8Ptr,
+                           ConstantInt::get(IntptrTy, sizeof(uint64_t)));
+  return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
 }
 
-
 Function *SanitizerCoverageModule::CreateInitCallsForSections(
     Module &M, const char *InitFunctionName, Type *Ty,
     const char *Section) {
-  IRBuilder<> IRB(M.getContext());
   auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
   auto SecStart = SecStartEnd.first;
   auto SecEnd = SecStartEnd.second;
   Function *CtorFunc;
   std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
-      M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty},
-      {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)});
+      M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
 
   if (TargetTriple.supportsCOMDAT()) {
     // Use comdat to dedup CtorFunc.
@@ -284,6 +292,17 @@ Function *SanitizerCoverageModule::CreateInitCallsForSections(
   } else {
     appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
   }
+
+  if (TargetTriple.isOSBinFormatCOFF()) {
+    // In COFF files, if the contructors are set as COMDAT (they are because
+    // COFF supports COMDAT) and the linker flag /OPT:REF (strip unreferenced
+    // functions and data) is used, the constructors get stripped. To prevent
+    // this, give the constructors weak ODR linkage and ensure the linker knows
+    // to include the sancov constructor. This way the linker can deduplicate
+    // the constructors but always leave one copy.
+    CtorFunc->setLinkage(GlobalValue::WeakODRLinkage);
+    appendToUsed(M, CtorFunc);
+  }
   return CtorFunc;
 }
 
@@ -293,6 +312,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   C = &(M.getContext());
   DL = &M.getDataLayout();
   CurModule = &M;
+  CurModuleUniqueId = getUniqueModuleId(CurModule);
   TargetTriple = Triple(M.getTargetTriple());
   FunctionGuardArray = nullptr;
   Function8bitCounterArray = nullptr;
@@ -397,9 +417,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
     Function *InitFunction = declareSanitizerInitFunction(
         M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
     IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
-    IRBCtor.CreateCall(InitFunction,
-                       {IRB.CreatePointerCast(SecStartEnd.first, IntptrPtrTy),
-                        IRB.CreatePointerCast(SecStartEnd.second, IntptrPtrTy)});
+    IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
   }
   // We don't reference these arrays directly in any of our runtime functions,
   // so we need to prevent them from being dead stripped.
@@ -549,11 +567,19 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
   auto Array = new GlobalVariable(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
-  if (auto Comdat = F.getComdat())
-    Array->setComdat(Comdat);
+
+  if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
+    if (auto Comdat =
+            GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+      Array->setComdat(Comdat);
   Array->setSection(getSectionName(Section));
   Array->setAlignment(Ty->isPointerTy() ? DL->getPointerSize()
                                         : Ty->getPrimitiveSizeInBits() / 8);
+  GlobalsToAppendToUsed.push_back(Array);
+  GlobalsToAppendToCompilerUsed.push_back(Array);
+  MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
+  Array->addMetadata(LLVMContext::MD_associated, *MD);
+
   return Array;
 }
 
@@ -587,24 +613,16 @@ SanitizerCoverageModule::CreatePCArray(Function &F,
 
 void SanitizerCoverageModule::CreateFunctionLocalArrays(
     Function &F, ArrayRef<BasicBlock *> AllBlocks) {
-  if (Options.TracePCGuard) {
+  if (Options.TracePCGuard)
     FunctionGuardArray = CreateFunctionLocalArrayInSection(
         AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
-    GlobalsToAppendToUsed.push_back(FunctionGuardArray);
-  }
-  if (Options.Inline8bitCounters) {
+
+  if (Options.Inline8bitCounters)
     Function8bitCounterArray = CreateFunctionLocalArrayInSection(
         AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName);
-    GlobalsToAppendToCompilerUsed.push_back(Function8bitCounterArray);
-    MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
-    Function8bitCounterArray->addMetadata(LLVMContext::MD_associated, *MD);
-  }
-  if (Options.PCTable) {
+
+  if (Options.PCTable)
     FunctionPCsArray = CreatePCArray(F, AllBlocks);
-    GlobalsToAppendToCompilerUsed.push_back(FunctionPCsArray);
-    MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
-    FunctionPCsArray->addMetadata(LLVMContext::MD_associated, *MD);
-  }
 }
 
 bool SanitizerCoverageModule::InjectCoverage(Function &F,
@@ -806,8 +824,13 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
 
 std::string
 SanitizerCoverageModule::getSectionName(const std::string &Section) const {
-  if (TargetTriple.getObjectFormat() == Triple::COFF)
-    return ".SCOV$M";
+  if (TargetTriple.isOSBinFormatCOFF()) {
+    if (Section == SanCovCountersSectionName)
+      return ".SCOV$CM";
+    if (Section == SanCovPCsSectionName)
+      return ".SCOVP$M";
+    return ".SCOV$GM"; // For SanCovGuardsSectionName.
+  }
   if (TargetTriple.isOSBinFormatMachO())
     return "__DATA,__" + Section;
   return "__" + Section;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index fa1e5a157a0f..077364e15c4f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,6 +19,7 @@
 // The rest is handled by the run-time library.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -86,15 +87,16 @@ static const char *const kTsanInitName = "__tsan_init";
 namespace {
 
 /// ThreadSanitizer: instrument the code in module to find races.
-struct ThreadSanitizer : public FunctionPass {
-  ThreadSanitizer() : FunctionPass(ID) {}
-  StringRef getPassName() const override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  bool runOnFunction(Function &F) override;
-  bool doInitialization(Module &M) override;
-  static char ID;  // Pass identification, replacement for typeid.
-
- private:
+///
+/// Instantiating ThreadSanitizer inserts the tsan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __tsan_init function is in the list of global constructors for
+/// the module.
+struct ThreadSanitizer {
+  ThreadSanitizer(Module &M);
+  bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
+
+private:
   void initializeCallbacks(Module &M);
   bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
   bool instrumentAtomic(Instruction *I, const DataLayout &DL);
@@ -130,27 +132,55 @@ struct ThreadSanitizer : public FunctionPass {
   Function *MemmoveFn, *MemcpyFn, *MemsetFn;
   Function *TsanCtorFunction;
 };
+
+struct ThreadSanitizerLegacyPass : FunctionPass {
+  ThreadSanitizerLegacyPass() : FunctionPass(ID) {}
+  StringRef getPassName() const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+  bool doInitialization(Module &M) override;
+  static char ID; // Pass identification, replacement for typeid.
+private:
+  Optional<ThreadSanitizer> TSan;
+};
 }  // namespace
 
-char ThreadSanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(
-    ThreadSanitizer, "tsan",
-    "ThreadSanitizer: detects data races.",
-    false, false)
+PreservedAnalyses ThreadSanitizerPass::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+  ThreadSanitizer TSan(*F.getParent());
+  if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+char ThreadSanitizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan",
+                      "ThreadSanitizer: detects data races.", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
-    ThreadSanitizer, "tsan",
-    "ThreadSanitizer: detects data races.",
-    false, false)
+INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan",
+                    "ThreadSanitizer: detects data races.", false, false)
 
-StringRef ThreadSanitizer::getPassName() const { return "ThreadSanitizer"; }
+StringRef ThreadSanitizerLegacyPass::getPassName() const {
+  return "ThreadSanitizerLegacyPass";
+}
 
-void ThreadSanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
+void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
-FunctionPass *llvm::createThreadSanitizerPass() {
-  return new ThreadSanitizer();
+bool ThreadSanitizerLegacyPass::doInitialization(Module &M) {
+  TSan.emplace(M);
+  return true;
+}
+
+bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) {
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  TSan->sanitizeFunction(F, TLI);
+  return true;
+}
+
+FunctionPass *llvm::createThreadSanitizerLegacyPassPass() {
+  return new ThreadSanitizerLegacyPass();
 }
 
 void ThreadSanitizer::initializeCallbacks(Module &M) {
@@ -252,16 +282,16 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
                             IRB.getInt32Ty(), IntptrTy));
 }
 
-bool ThreadSanitizer::doInitialization(Module &M) {
+ThreadSanitizer::ThreadSanitizer(Module &M) {
   const DataLayout &DL = M.getDataLayout();
   IntptrTy = DL.getIntPtrType(M.getContext());
-  std::tie(TsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
-      M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
-      /*InitArgs=*/{});
-
-  appendToGlobalCtors(M, TsanCtorFunction, 0);
-
-  return true;
+  std::tie(TsanCtorFunction, std::ignore) =
+      getOrCreateSanitizerCtorAndInitFunctions(
+          M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+          /*InitArgs=*/{},
+          // This callback is invoked when the functions are created the first
+          // time. Hook them into the global ctors list in that case:
+          [&](Function *Ctor, Function *) { appendToGlobalCtors(M, Ctor, 0); });
 }
 
 static bool isVtableAccess(Instruction *I) {
@@ -402,7 +432,8 @@ void ThreadSanitizer::InsertRuntimeIgnores(Function &F) {
   }
 }
 
-bool ThreadSanitizer::runOnFunction(Function &F) {
+bool ThreadSanitizer::sanitizeFunction(Function &F,
+                                       const TargetLibraryInfo &TLI) {
   // This is required to prevent instrumenting call to __tsan_init from within
   // the module constructor.
   if (&F == TsanCtorFunction)
@@ -416,8 +447,6 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
   bool HasCalls = false;
   bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
   const DataLayout &DL = F.getParent()->getDataLayout();
-  const TargetLibraryInfo *TLI =
-      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   // Traverse all instructions, collect loads/stores/returns, check for calls.
   for (auto &BB : F) {
@@ -428,7 +457,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
         LocalLoadsAndStores.push_back(&Inst);
       else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
         if (CallInst *CI = dyn_cast<CallInst>(&Inst))
-          maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
+          maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
         if (isa<MemIntrinsic>(Inst))
           MemIntrinCalls.push_back(&Inst);
         HasCalls = true;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index ba4924c9cb2d..7f6b157304a3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -26,6 +26,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -74,27 +75,27 @@ public:
 
     switch (kind) {
     case ARCRuntimeEntryPointKind::AutoreleaseRV:
-      return getI8XRetI8XEntryPoint(AutoreleaseRV,
-                                    "objc_autoreleaseReturnValue", true);
+      return getIntrinsicEntryPoint(AutoreleaseRV,
+                                    Intrinsic::objc_autoreleaseReturnValue);
     case ARCRuntimeEntryPointKind::Release:
-      return getVoidRetI8XEntryPoint(Release, "objc_release");
+      return getIntrinsicEntryPoint(Release, Intrinsic::objc_release);
     case ARCRuntimeEntryPointKind::Retain:
-      return getI8XRetI8XEntryPoint(Retain, "objc_retain", true);
+      return getIntrinsicEntryPoint(Retain, Intrinsic::objc_retain);
     case ARCRuntimeEntryPointKind::RetainBlock:
-      return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false);
+      return getIntrinsicEntryPoint(RetainBlock, Intrinsic::objc_retainBlock);
     case ARCRuntimeEntryPointKind::Autorelease:
-      return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true);
+      return getIntrinsicEntryPoint(Autorelease, Intrinsic::objc_autorelease);
     case ARCRuntimeEntryPointKind::StoreStrong:
-      return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong");
+      return getIntrinsicEntryPoint(StoreStrong, Intrinsic::objc_storeStrong);
     case ARCRuntimeEntryPointKind::RetainRV:
-      return getI8XRetI8XEntryPoint(RetainRV,
-                                    "objc_retainAutoreleasedReturnValue", true);
+      return getIntrinsicEntryPoint(RetainRV,
+                                Intrinsic::objc_retainAutoreleasedReturnValue);
     case ARCRuntimeEntryPointKind::RetainAutorelease:
-      return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease",
-                                    true);
+      return getIntrinsicEntryPoint(RetainAutorelease,
+                                    Intrinsic::objc_retainAutorelease);
     case ARCRuntimeEntryPointKind::RetainAutoreleaseRV:
-      return getI8XRetI8XEntryPoint(RetainAutoreleaseRV,
-                                    "objc_retainAutoreleaseReturnValue", true);
+      return getIntrinsicEntryPoint(RetainAutoreleaseRV,
+                                Intrinsic::objc_retainAutoreleaseReturnValue);
     }
 
     llvm_unreachable("Switch should be a covered switch.");
@@ -131,54 +132,11 @@ private:
   /// Declaration for objc_retainAutoreleaseReturnValue().
   Constant *RetainAutoreleaseRV = nullptr;
 
-  Constant *getVoidRetI8XEntryPoint(Constant *&Decl, StringRef Name) {
+  Constant *getIntrinsicEntryPoint(Constant *&Decl, Intrinsic::ID IntID) {
     if (Decl)
       return Decl;
 
-    LLVMContext &C = TheModule->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttributeList Attr = AttributeList().addAttribute(
-        C, AttributeList::FunctionIndex, Attribute::NoUnwind);
-    FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
-                                          /*isVarArg=*/false);
-    return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
-  }
-
-  Constant *getI8XRetI8XEntryPoint(Constant *&Decl, StringRef Name,
-                                   bool NoUnwind = false) {
-    if (Decl)
-      return Decl;
-
-    LLVMContext &C = TheModule->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *Fty = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeList Attr = AttributeList();
-
-    if (NoUnwind)
-      Attr = Attr.addAttribute(C, AttributeList::FunctionIndex,
-                               Attribute::NoUnwind);
-
-    return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
-  }
-
-  Constant *getI8XRetI8XXI8XEntryPoint(Constant *&Decl, StringRef Name) {
-    if (Decl)
-      return Decl;
-
-    LLVMContext &C = TheModule->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *I8XX = PointerType::getUnqual(I8X);
-    Type *Params[] = { I8XX, I8X };
-
-    AttributeList Attr = AttributeList().addAttribute(
-        C, AttributeList::FunctionIndex, Attribute::NoUnwind);
-    Attr = Attr.addParamAttribute(C, 0, Attribute::NoCapture);
-
-    FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
-                                          /*isVarArg=*/false);
-
-    return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
+    return Decl = Intrinsic::getDeclaration(TheModule, IntID);
   }
 };
 
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 464805051c65..4bd5fd1acd4c 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -45,18 +45,15 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   default: break;
   }
 
-  ImmutableCallSite CS(Inst);
-  assert(CS && "Only calls can alter reference counts!");
+  const auto *Call = cast<CallBase>(Inst);
 
   // See if AliasAnalysis can help us with the call.
-  FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+  FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(Call);
   if (AliasAnalysis::onlyReadsMemory(MRB))
     return false;
   if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
     const DataLayout &DL = Inst->getModule()->getDataLayout();
-    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-         I != E; ++I) {
-      const Value *Op = *I;
+    for (const Value *Op : Call->args()) {
       if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
           PA.related(Ptr, Op, DL))
         return true;
@@ -266,13 +263,10 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
   for (const BasicBlock *BB : Visited) {
     if (BB == StartBB)
       continue;
-    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-      const BasicBlock *Succ = *SI;
+    for (const BasicBlock *Succ : successors(BB))
       if (Succ != StartBB && !Visited.count(Succ)) {
         DependingInsts.insert(reinterpret_cast<Instruction *>(-1));
         return;
       }
-    }
   }
 }
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 1dbe72c7569f..751c8f30e814 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -58,7 +58,7 @@ static inline void EraseInstruction(Instruction *CI) {
     // Replace the return value with the argument.
     assert((IsForwarding(GetBasicARCInstKind(CI)) ||
             (IsNoopOnNull(GetBasicARCInstKind(CI)) &&
-             isa<ConstantPointerNull>(OldArg))) &&
+             IsNullOrUndef(OldArg->stripPointerCasts()))) &&
            "Can't delete non-forwarding instruction with users!");
     CI->replaceAllUsesWith(OldArg);
   }
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 1f1ea9f58739..abe2871c0b8f 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -522,7 +522,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
         TailOkForStoreStrongs = false;
       return true;
     case ARCInstKind::IntrinsicUser:
-      // Remove calls to @clang.arc.use(...).
+      // Remove calls to @llvm.objc.clang.arc.use(...).
       Inst->eraseFromParent();
       return true;
     default:
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 21e2848030fc..9a02174556fc 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -600,6 +600,17 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
     }
   }
 
+  // Track PHIs which are equivalent to our Arg.
+  SmallDenseSet<const Value*, 2> EquivalentArgs;
+  EquivalentArgs.insert(Arg);
+
+  // Add PHIs that are equivalent to Arg to ArgUsers.
+  if (const PHINode *PN = dyn_cast<PHINode>(Arg)) {
+    SmallVector<const Value *, 2> ArgUsers;
+    getEquivalentPHIs(*PN, ArgUsers);
+    EquivalentArgs.insert(ArgUsers.begin(), ArgUsers.end());
+  }
+
   // Check for being preceded by an objc_autoreleaseReturnValue on the same
   // pointer. In this case, we can delete the pair.
   BasicBlock::iterator I = RetainRV->getIterator(),
@@ -609,7 +620,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
       --I;
     while (I != Begin && IsNoopInstruction(&*I));
     if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
-        GetArgRCIdentityRoot(&*I) == Arg) {
+        EquivalentArgs.count(GetArgRCIdentityRoot(&*I))) {
       Changed = true;
       ++NumPeeps;
 
@@ -914,8 +925,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
           GetRCIdentityRoot(PN->getIncomingValue(i));
         if (IsNullOrUndef(Incoming))
           HasNull = true;
-        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
-                   .getNumSuccessors() != 1) {
+        else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
+                 1) {
           HasCriticalEdges = true;
           break;
         }
@@ -1084,18 +1095,15 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
            "Unknown top down sequence state.");
 
     const Value *Arg = I->first;
-    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
     bool SomeSuccHasSame = false;
     bool AllSuccsHaveSame = true;
     bool NotAllSeqEqualButKnownSafe = false;
 
-    succ_const_iterator SI(TI), SE(TI, false);
-
-    for (; SI != SE; ++SI) {
+    for (const BasicBlock *Succ : successors(BB)) {
       // If VisitBottomUp has pointer information for this successor, take
       // what we know about it.
       const DenseMap<const BasicBlock *, BBState>::iterator BBI =
-        BBStates.find(*SI);
+          BBStates.find(Succ);
       assert(BBI != BBStates.end());
       const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
       const Sequence SuccSSeq = SuccS.GetSeq();
@@ -1414,21 +1422,20 @@ ComputePostOrders(Function &F,
   BasicBlock *EntryBB = &F.getEntryBlock();
   BBState &MyStates = BBStates[EntryBB];
   MyStates.SetAsEntry();
-  TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+  Instruction *EntryTI = EntryBB->getTerminator();
   SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
   Visited.insert(EntryBB);
   OnStack.insert(EntryBB);
   do {
   dfs_next_succ:
     BasicBlock *CurrBB = SuccStack.back().first;
-    TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
-    succ_iterator SE(TI, false);
+    succ_iterator SE(CurrBB->getTerminator(), false);
 
     while (SuccStack.back().second != SE) {
       BasicBlock *SuccBB = *SuccStack.back().second++;
       if (Visited.insert(SuccBB).second) {
-        TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
-        SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+        SuccStack.push_back(
+            std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator())));
         BBStates[CurrBB].addSucc(SuccBB);
         BBState &SuccStates = BBStates[SuccBB];
         SuccStates.addPred(CurrBB);
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index ce09a477b5f5..b0602d96798c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -30,9 +30,10 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
@@ -102,7 +103,7 @@ struct BlockInfoType {
   BasicBlock *BB = nullptr;
 
   /// Cache of BB->getTerminator().
-  TerminatorInst *Terminator = nullptr;
+  Instruction *Terminator = nullptr;
 
   /// Post-order numbering of reverse control flow graph.
   unsigned PostOrder;
@@ -115,7 +116,7 @@ class AggressiveDeadCodeElimination {
 
   // ADCE does not use DominatorTree per se, but it updates it to preserve the
   // analysis.
-  DominatorTree &DT;
+  DominatorTree *DT;
   PostDominatorTree &PDT;
 
   /// Mapping of blocks to associated information, an element in BlockInfoVec.
@@ -190,7 +191,7 @@ class AggressiveDeadCodeElimination {
   void makeUnconditional(BasicBlock *BB, BasicBlock *Target);
 
 public:
-  AggressiveDeadCodeElimination(Function &F, DominatorTree &DT,
+  AggressiveDeadCodeElimination(Function &F, DominatorTree *DT,
                                 PostDominatorTree &PDT)
       : F(F), DT(DT), PDT(PDT) {}
 
@@ -205,7 +206,7 @@ bool AggressiveDeadCodeElimination::performDeadCodeElimination() {
   return removeDeadInstructions();
 }
 
-static bool isUnconditionalBranch(TerminatorInst *Term) {
+static bool isUnconditionalBranch(Instruction *Term) {
   auto *BR = dyn_cast<BranchInst>(Term);
   return BR && BR->isUnconditional();
 }
@@ -276,7 +277,7 @@ void AggressiveDeadCodeElimination::initialize() {
     // treat all edges to a block already seen as loop back edges
     // and mark the branch live it if there is a back edge.
     for (auto *BB: depth_first_ext(&F.getEntryBlock(), State)) {
-      TerminatorInst *Term = BB->getTerminator();
+      Instruction *Term = BB->getTerminator();
       if (isLive(Term))
         continue;
 
@@ -330,7 +331,7 @@ bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
       return false;
     return true;
   }
-  if (!isa<TerminatorInst>(I))
+  if (!I.isTerminator())
     return false;
   if (RemoveControlFlowFlag && (isa<BranchInst>(I) || isa<SwitchInst>(I)))
     return false;
@@ -507,7 +508,7 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
       if (isLive(&I))
         continue;
 
-      if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+      if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
         // Check if the scope of this variable location is alive.
         if (AliveScopes.count(DII->getDebugLoc()->getScope()))
           continue;
@@ -614,8 +615,8 @@ void AggressiveDeadCodeElimination::updateDeadRegions() {
       }
     }
 
-    DT.applyUpdates(DeletedEdges);
-    PDT.applyUpdates(DeletedEdges);
+    DomTreeUpdater(DT, &PDT, DomTreeUpdater::UpdateStrategy::Eager)
+        .applyUpdates(DeletedEdges);
 
     NumBranchesRemoved += 1;
   }
@@ -642,7 +643,7 @@ void AggressiveDeadCodeElimination::computeReversePostOrder() {
 
 void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB,
                                                       BasicBlock *Target) {
-  TerminatorInst *PredTerm = BB->getTerminator();
+  Instruction *PredTerm = BB->getTerminator();
   // Collect the live debug info scopes attached to this instruction.
   if (const DILocation *DL = PredTerm->getDebugLoc())
     collectLiveScopes(*DL);
@@ -671,7 +672,9 @@ void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB,
 //
 //===----------------------------------------------------------------------===//
 PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
-  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+  // ADCE does not need DominatorTree, but require DominatorTree here
+  // to update analysis if it is already available.
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
   auto &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
   if (!AggressiveDeadCodeElimination(F, DT, PDT).performDeadCodeElimination())
     return PreservedAnalyses::all();
@@ -697,15 +700,16 @@ struct ADCELegacyPass : public FunctionPass {
     if (skipFunction(F))
       return false;
 
-    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    // ADCE does not need DominatorTree, but require DominatorTree here
+    // to update analysis if it is already available.
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
     return AggressiveDeadCodeElimination(F, DT, PDT)
         .performDeadCodeElimination();
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    // We require DominatorTree here only to update and thus preserve it.
-    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<PostDominatorTreeWrapperPass>();
     if (!RemoveControlFlowFlag)
       AU.setPreservesCFG();
@@ -723,7 +727,6 @@ char ADCELegacyPass::ID = 0;
 
 INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce",
                       "Aggressive Dead Code Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 INITIALIZE_PASS_END(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
                     false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 3a8ef073cb48..d3c9b9a270aa 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -38,7 +38,8 @@ STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
 /// instruction may need to be cleared of assumptions that can no longer be
 /// guaranteed correct.
 static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
-  assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
+  assert(I->getType()->isIntOrIntVectorTy() &&
+         "Trivializing a non-integer value?");
 
   // Initialize the worklist with eligible direct users.
   SmallVector<Instruction *, 16> WorkList;
@@ -46,13 +47,13 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
     // If all bits of a user are demanded, then we know that nothing below that
     // in the def-use chain needs to be changed.
     auto *J = dyn_cast<Instruction>(JU);
-    if (J && J->getType()->isSized() &&
+    if (J && J->getType()->isIntOrIntVectorTy() &&
         !DB.getDemandedBits(J).isAllOnesValue())
       WorkList.push_back(J);
 
-    // Note that we need to check for unsized types above before asking for
+    // Note that we need to check for non-int types above before asking for
     // demanded bits. Normally, the only way to reach an instruction with an
-    // unsized type is via an instruction that has side effects (or otherwise
+    // non-int type is via an instruction that has side effects (or otherwise
     // will demand its input bits). However, if we have a readnone function
     // that returns an unsized type (e.g., void), we must avoid asking for the
     // demanded bits of the function call's return value. A void-returning
@@ -78,7 +79,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
       // If all bits of a user are demanded, then we know that nothing below
       // that in the def-use chain needs to be changed.
       auto *K = dyn_cast<Instruction>(KU);
-      if (K && !Visited.count(K) && K->getType()->isSized() &&
+      if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
           !DB.getDemandedBits(K).isAllOnesValue())
         WorkList.push_back(K);
     }
@@ -95,30 +96,41 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
     if (I.mayHaveSideEffects() && I.use_empty())
       continue;
 
-    if (I.getType()->isIntegerTy() &&
-        !DB.getDemandedBits(&I).getBoolValue()) {
-      // For live instructions that have all dead bits, first make them dead by
-      // replacing all uses with something else. Then, if they don't need to
-      // remain live (because they have side effects, etc.) we can remove them.
-      LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+    // Remove instructions that are dead, either because they were not reached
+    // during analysis or have no demanded bits.
+    if (DB.isInstructionDead(&I) ||
+        (I.getType()->isIntOrIntVectorTy() &&
+         DB.getDemandedBits(&I).isNullValue() &&
+         wouldInstructionBeTriviallyDead(&I))) {
+      salvageDebugInfo(I);
+      Worklist.push_back(&I);
+      I.dropAllReferences();
+      Changed = true;
+      continue;
+    }
+
+    for (Use &U : I.operands()) {
+      // DemandedBits only detects dead integer uses.
+      if (!U->getType()->isIntOrIntVectorTy())
+        continue;
+
+      if (!isa<Instruction>(U) && !isa<Argument>(U))
+        continue;
+
+      if (!DB.isUseDead(&U))
+        continue;
+
+      LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << U << " (all bits dead)\n");
 
       clearAssumptionsOfUsers(&I, DB);
 
       // FIXME: In theory we could substitute undef here instead of zero.
       // This should be reconsidered once we settle on the semantics of
       // undef, poison, etc.
-      Value *Zero = ConstantInt::get(I.getType(), 0);
+      U.set(ConstantInt::get(U->getType(), 0));
       ++NumSimplified;
-      I.replaceNonMetadataUsesWith(Zero);
       Changed = true;
     }
-    if (!DB.isInstructionDead(&I))
-      continue;
-
-    salvageDebugInfo(I);
-    Worklist.push_back(&I);
-    I.dropAllReferences();
-    Changed = true;
   }
 
   for (Instruction *&I : Worklist) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 5ebfbf8a879b..a806d6faed60 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -149,14 +149,14 @@ static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To,
 
 /// Record ICmp conditions relevant to any argument in CS following Pred's
 /// single predecessors. If there are conflicting conditions along a path, like
-/// x == 1 and x == 0, the first condition will be used.
+/// x == 1 and x == 0, the first condition will be used. We stop once we reach
+/// an edge to StopAt.
 static void recordConditions(CallSite CS, BasicBlock *Pred,
-                             ConditionsTy &Conditions) {
-  recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+                             ConditionsTy &Conditions, BasicBlock *StopAt) {
   BasicBlock *From = Pred;
   BasicBlock *To = Pred;
   SmallPtrSet<BasicBlock *, 4> Visited;
-  while (!Visited.count(From->getSinglePredecessor()) &&
+  while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
          (From = From->getSinglePredecessor())) {
     recordCondition(CS, From, To, Conditions);
     Visited.insert(From);
@@ -197,7 +197,7 @@ static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
       isa<IndirectBrInst>(Preds[1]->getTerminator()))
     return false;
 
-  // BasicBlock::canSplitPredecessors is more agressive, so checking for
+  // BasicBlock::canSplitPredecessors is more aggressive, so checking for
   // BasicBlock::isEHPad as well.
   if (!CallSiteBB->canSplitPredecessors() || CallSiteBB->isEHPad())
     return false;
@@ -248,7 +248,7 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
   ReturnInst* RI = dyn_cast<ReturnInst>(&*II);
   assert(RI && "`musttail` call must be followed by `ret` instruction");
 
-  TerminatorInst *TI = SplitBB->getTerminator();
+  Instruction *TI = SplitBB->getTerminator();
   Value *V = NewCI;
   if (BCI)
     V = cloneInstForMustTail(BCI, TI, V);
@@ -302,7 +302,7 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
 static void splitCallSite(
     CallSite CS,
     const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
-    DominatorTree *DT) {
+    DomTreeUpdater &DTU) {
   Instruction *Instr = CS.getInstruction();
   BasicBlock *TailBB = Instr->getParent();
   bool IsMustTailCall = CS.isMustTailCall();
@@ -312,8 +312,10 @@ static void splitCallSite(
   // `musttail` calls must be followed by optional `bitcast`, and `ret`. The
   // split blocks will be terminated right after that so there're no users for
   // this phi in a `TailBB`.
-  if (!IsMustTailCall && !Instr->use_empty())
+  if (!IsMustTailCall && !Instr->use_empty()) {
     CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call");
+    CallPN->setDebugLoc(Instr->getDebugLoc());
+  }
 
   LLVM_DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");
 
@@ -325,7 +327,7 @@ static void splitCallSite(
     BasicBlock *PredBB = Preds[i].first;
     BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
         TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i],
-        DT);
+        DTU);
     assert(SplitBlock && "Unexpected new basic block split.");
 
     Instruction *NewCI =
@@ -363,11 +365,13 @@ static void splitCallSite(
     // attempting removal.
     SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
     assert(Splits.size() == 2 && "Expected exactly 2 splits!");
-    for (unsigned i = 0; i < Splits.size(); i++)
+    for (unsigned i = 0; i < Splits.size(); i++) {
       Splits[i]->getTerminator()->eraseFromParent();
+      DTU.deleteEdge(Splits[i], TailBB);
+    }
 
     // Erase the tail block once done with musttail patching
-    TailBB->eraseFromParent();
+    DTU.deleteBB(TailBB);
     return;
   }
 
@@ -394,6 +398,7 @@ static void splitCallSite(
       if (isa<PHINode>(CurrentI))
         continue;
       PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
+      NewPN->setDebugLoc(CurrentI->getDebugLoc());
       for (auto &Mapping : ValueToValueMaps)
         NewPN->addIncoming(Mapping[CurrentI],
                            cast<Instruction>(Mapping[CurrentI])->getParent());
@@ -435,49 +440,73 @@ static bool isPredicatedOnPHI(CallSite CS) {
   return false;
 }
 
-static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
+using PredsWithCondsTy = SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2>;
+
+// Check if any of the arguments in CS are predicated on a PHI node and return
+// the set of predecessors we should use for splitting.
+static PredsWithCondsTy shouldSplitOnPHIPredicatedArgument(CallSite CS) {
   if (!isPredicatedOnPHI(CS))
-    return false;
+    return {};
 
   auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
-  SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS = {
-      {Preds[0], {}}, {Preds[1], {}}};
-  splitCallSite(CS, PredsCS, DT);
-  return true;
+  return {{Preds[0], {}}, {Preds[1], {}}};
 }
 
-static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
+// Checks if any of the arguments in CS are predicated in a predecessor and
+// returns a list of predecessors with the conditions that hold on their edges
+// to CS.
+static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallSite CS,
+                                                        DomTreeUpdater &DTU) {
   auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
   if (Preds[0] == Preds[1])
-    return false;
+    return {};
+
+  // We can stop recording conditions once we reached the immediate dominator
+  // for the block containing the call site. Conditions in predecessors of the
+  // that node will be the same for all paths to the call site and splitting
+  // is not beneficial.
+  assert(DTU.hasDomTree() && "We need a DTU with a valid DT!");
+  auto *CSDTNode = DTU.getDomTree().getNode(CS.getInstruction()->getParent());
+  BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
 
   SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
   for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
     ConditionsTy Conditions;
-    recordConditions(CS, Pred, Conditions);
+    // Record condition on edge BB(CS) <- Pred
+    recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
+    // Record conditions following Pred's single predecessors.
+    recordConditions(CS, Pred, Conditions, StopAt);
     PredsCS.push_back({Pred, Conditions});
   }
 
-  if (std::all_of(PredsCS.begin(), PredsCS.end(),
-                  [](const std::pair<BasicBlock *, ConditionsTy> &P) {
-                    return P.second.empty();
-                  }))
-    return false;
+  if (all_of(PredsCS, [](const std::pair<BasicBlock *, ConditionsTy> &P) {
+        return P.second.empty();
+      }))
+    return {};
 
-  splitCallSite(CS, PredsCS, DT);
-  return true;
+  return PredsCS;
 }
 
 static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
-                               DominatorTree *DT) {
+                               DomTreeUpdater &DTU) {
+  // Check if we can split the call site.
   if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
     return false;
-  return tryToSplitOnPredicatedArgument(CS, DT) ||
-         tryToSplitOnPHIPredicatedArgument(CS, DT);
+
+  auto PredsWithConds = shouldSplitOnPredicatedArgument(CS, DTU);
+  if (PredsWithConds.empty())
+    PredsWithConds = shouldSplitOnPHIPredicatedArgument(CS);
+  if (PredsWithConds.empty())
+    return false;
+
+  splitCallSite(CS, PredsWithConds, DTU);
+  return true;
 }
 
 static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
-                                TargetTransformInfo &TTI, DominatorTree *DT) {
+                                TargetTransformInfo &TTI, DominatorTree &DT) {
+
+  DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
   bool Changed = false;
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
     BasicBlock &BB = *BI++;
@@ -501,7 +530,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
       // Check if such path is possible before attempting the splitting.
       bool IsMustTail = CS.isMustTailCall();
 
-      Changed |= tryToSplitCallSite(CS, TTI, DT);
+      Changed |= tryToSplitCallSite(CS, TTI, DTU);
 
       // There're no interesting instructions after this. The call site
       // itself might have been erased on splitting.
@@ -522,6 +551,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
@@ -532,9 +562,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
 
     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-    return doCallSiteSplitting(F, TLI, TTI,
-                               DTWP ? &DTWP->getDomTree() : nullptr);
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    return doCallSiteSplitting(F, TLI, TTI, DT);
   }
 };
 } // namespace
@@ -544,6 +573,7 @@ INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
                       "Call-site splitting", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
                     "Call-site splitting", false, false)
 FunctionPass *llvm::createCallSiteSplittingPass() {
@@ -554,7 +584,7 @@ PreservedAnalyses CallSiteSplittingPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
 
   if (!doCallSiteSplitting(F, TLI, TTI, DT))
     return PreservedAnalyses::all();
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 55759e8b1661..beac0d967a98 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -82,6 +82,16 @@ static cl::opt<bool> ConstHoistWithBlockFrequency(
              "chance to execute const materialization more frequently than "
              "without hoisting."));
 
+static cl::opt<bool> ConstHoistGEP(
+    "consthoist-gep", cl::init(false), cl::Hidden,
+    cl::desc("Try hoisting constant gep expressions"));
+
+static cl::opt<unsigned>
+MinNumOfDependentToRebase("consthoist-min-num-to-rebase",
+    cl::desc("Do not rebase if number of dependent constants of a Base is less "
+             "than this number."),
+    cl::init(0), cl::Hidden);
+
 namespace {
 
 /// The constant hoisting pass.
@@ -340,7 +350,7 @@ SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
 ///
 /// The operand at index Idx is not necessarily the constant integer itself. It
 /// could also be a cast instruction or a constant expression that uses the
-// constant integer.
+/// constant integer.
 void ConstantHoistingPass::collectConstantCandidates(
     ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
     ConstantInt *ConstInt) {
@@ -358,12 +368,13 @@ void ConstantHoistingPass::collectConstantCandidates(
   if (Cost > TargetTransformInfo::TCC_Basic) {
     ConstCandMapType::iterator Itr;
     bool Inserted;
-    std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0));
+    ConstPtrUnionType Cand = ConstInt;
+    std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));
     if (Inserted) {
-      ConstCandVec.push_back(ConstantCandidate(ConstInt));
-      Itr->second = ConstCandVec.size() - 1;
+      ConstIntCandVec.push_back(ConstantCandidate(ConstInt));
+      Itr->second = ConstIntCandVec.size() - 1;
     }
-    ConstCandVec[Itr->second].addUser(Inst, Idx, Cost);
+    ConstIntCandVec[Itr->second].addUser(Inst, Idx, Cost);
     LLVM_DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx))) dbgs()
                    << "Collect constant " << *ConstInt << " from " << *Inst
                    << " with cost " << Cost << '\n';
@@ -374,6 +385,48 @@ void ConstantHoistingPass::collectConstantCandidates(
   }
 }
 
+/// Record constant GEP expression for instruction Inst at operand index Idx.
+void ConstantHoistingPass::collectConstantCandidates(
+    ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
+    ConstantExpr *ConstExpr) {
+  // TODO: Handle vector GEPs
+  if (ConstExpr->getType()->isVectorTy())
+    return;
+
+  GlobalVariable *BaseGV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
+  if (!BaseGV)
+    return;
+
+  // Get offset from the base GV.
+  PointerType *GVPtrTy = dyn_cast<PointerType>(BaseGV->getType());
+  IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace());
+  APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true);
+  auto *GEPO = cast<GEPOperator>(ConstExpr);
+  if (!GEPO->accumulateConstantOffset(*DL, Offset))
+    return;
+
+  if (!Offset.isIntN(32))
+    return;
+
+  // A constant GEP expression that has a GlobalVariable as base pointer is
+  // usually lowered to a load from constant pool. Such operation is unlikely
+  // to be cheaper than compute it by <Base + Offset>, which can be lowered to
+  // an ADD instruction or folded into Load/Store instruction.
+  int Cost = TTI->getIntImmCost(Instruction::Add, 1, Offset, PtrIntTy);
+  ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
+  ConstCandMapType::iterator Itr;
+  bool Inserted;
+  ConstPtrUnionType Cand = ConstExpr;
+  std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));
+  if (Inserted) {
+    ExprCandVec.push_back(ConstantCandidate(
+        ConstantInt::get(Type::getInt32Ty(*Ctx), Offset.getLimitedValue()),
+        ConstExpr));
+    Itr->second = ExprCandVec.size() - 1;
+  }
+  ExprCandVec[Itr->second].addUser(Inst, Idx, Cost);
+}
+
 /// Check the operand for instruction Inst at index Idx.
 void ConstantHoistingPass::collectConstantCandidates(
     ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {
@@ -402,6 +455,10 @@ void ConstantHoistingPass::collectConstantCandidates(
 
   // Visit constant expressions that have constant integers.
   if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+    // Handle constant gep expressions.
+    if (ConstHoistGEP && ConstExpr->isGEPWithNoNotionalOverIndexing())
+      collectConstantCandidates(ConstCandMap, Inst, Idx, ConstExpr);
+
     // Only visit constant cast expressions.
     if (!ConstExpr->isCast())
       return;
@@ -544,7 +601,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
 /// Find the base constant within the given range and rebase all other
 /// constants with respect to the base constant.
 void ConstantHoistingPass::findAndMakeBaseConstant(
-    ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
+    ConstCandVecType::iterator S, ConstCandVecType::iterator E,
+    SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec) {
   auto MaxCostItr = S;
   unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
 
@@ -552,26 +610,37 @@ void ConstantHoistingPass::findAndMakeBaseConstant(
   if (NumUses <= 1)
     return;
 
+  ConstantInt *ConstInt = MaxCostItr->ConstInt;
+  ConstantExpr *ConstExpr = MaxCostItr->ConstExpr;
   ConstantInfo ConstInfo;
-  ConstInfo.BaseConstant = MaxCostItr->ConstInt;
-  Type *Ty = ConstInfo.BaseConstant->getType();
+  ConstInfo.BaseInt = ConstInt;
+  ConstInfo.BaseExpr = ConstExpr;
+  Type *Ty = ConstInt->getType();
 
   // Rebase the constants with respect to the base constant.
   for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
-    APInt Diff = ConstCand->ConstInt->getValue() -
-                 ConstInfo.BaseConstant->getValue();
+    APInt Diff = ConstCand->ConstInt->getValue() - ConstInt->getValue();
     Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);
+    Type *ConstTy =
+        ConstCand->ConstExpr ? ConstCand->ConstExpr->getType() : nullptr;
     ConstInfo.RebasedConstants.push_back(
-      RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
+      RebasedConstantInfo(std::move(ConstCand->Uses), Offset, ConstTy));
   }
-  ConstantVec.push_back(std::move(ConstInfo));
+  ConstInfoVec.push_back(std::move(ConstInfo));
 }
 
 /// Finds and combines constant candidates that can be easily
 /// rematerialized with an add from a common base constant.
-void ConstantHoistingPass::findBaseConstants() {
+void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
+  // If BaseGV is nullptr, find base among candidate constant integers;
+  // Otherwise find base among constant GEPs that share the same BaseGV.
+  ConstCandVecType &ConstCandVec = BaseGV ?
+      ConstGEPCandMap[BaseGV] : ConstIntCandVec;
+  ConstInfoVecType &ConstInfoVec = BaseGV ?
+      ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
+
   // Sort the constants by value and type. This invalidates the mapping!
-  llvm::sort(ConstCandVec.begin(), ConstCandVec.end(),
+  std::stable_sort(ConstCandVec.begin(), ConstCandVec.end(),
              [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
     if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
       return LHS.ConstInt->getType()->getBitWidth() <
@@ -585,20 +654,40 @@ void ConstantHoistingPass::findBaseConstants() {
   for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end();
        CC != E; ++CC) {
     if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) {
+      Type *MemUseValTy = nullptr;
+      for (auto &U : CC->Uses) {
+        auto *UI = U.Inst;
+        if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
+          MemUseValTy = LI->getType();
+          break;
+        } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+          // Make sure the constant is used as pointer operand of the StoreInst.
+          if (SI->getPointerOperand() == SI->getOperand(U.OpndIdx)) {
+            MemUseValTy = SI->getValueOperand()->getType();
+            break;
+          }
+        }
+      }
+
       // Check if the constant is in range of an add with immediate.
       APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue();
       if ((Diff.getBitWidth() <= 64) &&
-          TTI->isLegalAddImmediate(Diff.getSExtValue()))
+          TTI->isLegalAddImmediate(Diff.getSExtValue()) &&
+          // Check if Diff can be used as offset in addressing mode of the user
+          // memory instruction.
+          (!MemUseValTy || TTI->isLegalAddressingMode(MemUseValTy,
+           /*BaseGV*/nullptr, /*BaseOffset*/Diff.getSExtValue(),
+           /*HasBaseReg*/true, /*Scale*/0)))
         continue;
     }
     // We either have now a different constant type or the constant is not in
     // range of an add with immediate anymore.
-    findAndMakeBaseConstant(MinValItr, CC);
+    findAndMakeBaseConstant(MinValItr, CC, ConstInfoVec);
     // Start a new base constant search.
     MinValItr = CC;
   }
   // Finalize the last base constant search.
-  findAndMakeBaseConstant(MinValItr, ConstCandVec.end());
+  findAndMakeBaseConstant(MinValItr, ConstCandVec.end(), ConstInfoVec);
 }
 
 /// Updates the operand at Idx in instruction Inst with the result of
@@ -633,12 +722,28 @@ static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
 /// users.
 void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
                                              Constant *Offset,
+                                             Type *Ty,
                                              const ConstantUser &ConstUser) {
   Instruction *Mat = Base;
+
+  // The same offset can be dereferenced to different types in nested struct.
+  if (!Offset && Ty && Ty != Base->getType())
+    Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0);
+
   if (Offset) {
     Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
                                                ConstUser.OpndIdx);
-    Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
+    if (Ty) {
+      // Constant being rebased is a ConstantExpr.
+      PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
+          cast<PointerType>(Ty)->getAddressSpace());
+      Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
+      Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
+          Offset, "mat_gep", InsertionPt);
+      Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
+    } else
+      // Constant being rebased is a ConstantInt.
+      Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
                                  "const_mat", InsertionPt);
 
     LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
@@ -682,6 +787,14 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
 
   // Visit constant expression.
   if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+    if (ConstExpr->isGEPWithNoNotionalOverIndexing()) {
+      // Operand is a ConstantGEP, replace it.
+      updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat);
+      return;
+    }
+
+    // Aside from constant GEPs, only constant cast expressions are collected.
+    assert(ConstExpr->isCast() && "ConstExpr should be a cast");
     Instruction *ConstExprInst = ConstExpr->getAsInstruction();
     ConstExprInst->setOperand(0, Mat);
     ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
@@ -705,28 +818,22 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
 
 /// Hoist and hide the base constant behind a bitcast and emit
 /// materialization code for derived constants.
-bool ConstantHoistingPass::emitBaseConstants() {
+bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
   bool MadeChange = false;
-  for (auto const &ConstInfo : ConstantVec) {
-    // Hoist and hide the base constant behind a bitcast.
+  SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =
+      BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
+  for (auto const &ConstInfo : ConstInfoVec) {
     SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
     assert(!IPSet.empty() && "IPSet is empty");
 
     unsigned UsesNum = 0;
     unsigned ReBasesNum = 0;
+    unsigned NotRebasedNum = 0;
     for (Instruction *IP : IPSet) {
-      IntegerType *Ty = ConstInfo.BaseConstant->getType();
-      Instruction *Base =
-          new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
-
-      Base->setDebugLoc(IP->getDebugLoc());
-
-      LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
-                        << ") to BB " << IP->getParent()->getName() << '\n'
-                        << *Base << '\n');
-
-      // Emit materialization code for all rebased constants.
+      // First, collect constants depending on this IP of the base.
       unsigned Uses = 0;
+      using RebasedUse = std::tuple<Constant *, Type *, ConstantUser>;
+      SmallVector<RebasedUse, 4> ToBeRebased;
       for (auto const &RCI : ConstInfo.RebasedConstants) {
         for (auto const &U : RCI.Uses) {
           Uses++;
@@ -735,31 +842,64 @@ bool ConstantHoistingPass::emitBaseConstants() {
           // If Base constant is to be inserted in multiple places,
           // generate rebase for U using the Base dominating U.
           if (IPSet.size() == 1 ||
-              DT->dominates(Base->getParent(), OrigMatInsertBB)) {
-            emitBaseConstants(Base, RCI.Offset, U);
-            ReBasesNum++;
-          }
-
-          Base->setDebugLoc(DILocation::getMergedLocation(Base->getDebugLoc(), U.Inst->getDebugLoc()));
+              DT->dominates(IP->getParent(), OrigMatInsertBB))
+            ToBeRebased.push_back(RebasedUse(RCI.Offset, RCI.Ty, U));
         }
       }
       UsesNum = Uses;
 
-      // Use the same debug location as the last user of the constant.
+      // If only few constants depend on this IP of base, skip rebasing,
+      // assuming the base and the rebased have the same materialization cost.
+      if (ToBeRebased.size() < MinNumOfDependentToRebase) {
+        NotRebasedNum += ToBeRebased.size();
+        continue;
+      }
+
+      // Emit an instance of the base at this IP.
+      Instruction *Base = nullptr;
+      // Hoist and hide the base constant behind a bitcast.
+      if (ConstInfo.BaseExpr) {
+        assert(BaseGV && "A base constant expression must have an base GV");
+        Type *Ty = ConstInfo.BaseExpr->getType();
+        Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP);
+      } else {
+        IntegerType *Ty = ConstInfo.BaseInt->getType();
+        Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP);
+      }
+
+      Base->setDebugLoc(IP->getDebugLoc());
+
+      LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseInt
+                        << ") to BB " << IP->getParent()->getName() << '\n'
+                        << *Base << '\n');
+
+      // Emit materialization code for rebased constants depending on this IP.
+      for (auto const &R : ToBeRebased) {
+        Constant *Off = std::get<0>(R);
+        Type *Ty = std::get<1>(R);
+        ConstantUser U = std::get<2>(R);
+        emitBaseConstants(Base, Off, Ty, U);
+        ReBasesNum++;
+        // Use the same debug location as the last user of the constant.
+        Base->setDebugLoc(DILocation::getMergedLocation(
+            Base->getDebugLoc(), U.Inst->getDebugLoc()));
+      }
       assert(!Base->use_empty() && "The use list is empty!?");
       assert(isa<Instruction>(Base->user_back()) &&
              "All uses should be instructions.");
     }
     (void)UsesNum;
     (void)ReBasesNum;
+    (void)NotRebasedNum;
     // Expect all uses are rebased after rebase is done.
-    assert(UsesNum == ReBasesNum && "Not all uses are rebased");
+    assert(UsesNum == (ReBasesNum + NotRebasedNum) &&
+           "Not all uses are rebased");
 
     NumConstantsHoisted++;
 
     // Base constant is also included in ConstInfo.RebasedConstants, so
     // deduct 1 from ConstInfo.RebasedConstants.size().
-    NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
+    NumConstantsRebased += ConstInfo.RebasedConstants.size() - 1;
 
     MadeChange = true;
   }
@@ -781,25 +921,29 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
   this->TTI = &TTI;
   this->DT = &DT;
   this->BFI = BFI;
+  this->DL = &Fn.getParent()->getDataLayout();
+  this->Ctx = &Fn.getContext();
   this->Entry = &Entry;
   // Collect all constant candidates.
   collectConstantCandidates(Fn);
 
-  // There are no constant candidates to worry about.
-  if (ConstCandVec.empty())
-    return false;
-
   // Combine constants that can be easily materialized with an add from a common
   // base constant.
-  findBaseConstants();
-
-  // There are no constants to emit.
-  if (ConstantVec.empty())
-    return false;
+  if (!ConstIntCandVec.empty())
+    findBaseConstants(nullptr);
+  for (auto &MapEntry : ConstGEPCandMap)
+    if (!MapEntry.second.empty())
+      findBaseConstants(MapEntry.first);
 
   // Finally hoist the base constant and emit materialization code for dependent
   // constants.
-  bool MadeChange = emitBaseConstants();
+  bool MadeChange = false;
+  if (!ConstIntInfoVec.empty())
+    MadeChange = emitBaseConstants(nullptr);
+  for (auto MapEntry : ConstGEPInfoMap)
+    if (!MapEntry.second.empty())
+      MadeChange |= emitBaseConstants(MapEntry.first);
+
 
   // Cleanup dead instructions.
   deleteDeadCastInst();
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index 46915889ce7c..51032b0625f8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,21 +18,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Transforms/Scalar.h"
-#include <set>
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "constprop"
 
 STATISTIC(NumInstKilled, "Number of instructions killed");
+DEBUG_COUNTER(CPCounter, "constprop-transform",
+              "Controls which instructions are killed");
 
 namespace {
   struct ConstantPropagation : public FunctionPass {
@@ -66,9 +70,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
     return false;
 
   // Initialize the worklist to all of the instructions ready to process...
-  std::set<Instruction*> WorkList;
-  for (Instruction &I: instructions(&F))
+  SmallPtrSet<Instruction *, 16> WorkList;
+  // The SmallVector of WorkList ensures that we do iteration at stable order.
+  // We use two containers rather than one SetVector, since remove is
+  // linear-time, and we don't care enough to remove from Vec.
+  SmallVector<Instruction *, 16> WorkListVec;
+  for (Instruction &I : instructions(&F)) {
     WorkList.insert(&I);
+    WorkListVec.push_back(&I);
+  }
 
   bool Changed = false;
   const DataLayout &DL = F.getParent()->getDataLayout();
@@ -76,29 +86,36 @@ bool ConstantPropagation::runOnFunction(Function &F) {
       &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   while (!WorkList.empty()) {
-    Instruction *I = *WorkList.begin();
-    WorkList.erase(WorkList.begin());    // Get an element from the worklist...
-
-    if (!I->use_empty())                 // Don't muck with dead instructions...
-      if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
-        // Add all of the users of this instruction to the worklist, they might
-        // be constant propagatable now...
-        for (User *U : I->users())
-          WorkList.insert(cast<Instruction>(U));
-
-        // Replace all of the uses of a variable with uses of the constant.
-        I->replaceAllUsesWith(C);
-
-        // Remove the dead instruction.
-        WorkList.erase(I);
-        if (isInstructionTriviallyDead(I, TLI)) {
-          I->eraseFromParent();
-          ++NumInstKilled;
+    SmallVector<Instruction*, 16> NewWorkListVec;
+    for (auto *I : WorkListVec) {
+      WorkList.erase(I); // Remove element from the worklist...
+
+      if (!I->use_empty()) // Don't muck with dead instructions...
+        if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
+          if (!DebugCounter::shouldExecute(CPCounter))
+            continue;
+
+          // Add all of the users of this instruction to the worklist, they might
+          // be constant propagatable now...
+          for (User *U : I->users()) {
+            // If user not in the set, then add it to the vector.
+            if (WorkList.insert(cast<Instruction>(U)).second)
+              NewWorkListVec.push_back(cast<Instruction>(U));
+          }
+
+          // Replace all of the uses of a variable with uses of the constant.
+          I->replaceAllUsesWith(C);
+
+          if (isInstructionTriviallyDead(I, TLI)) {
+            I->eraseFromParent();
+            ++NumInstKilled;
+          }
+
+          // We made a change to the function...
+          Changed = true;
         }
-
-        // We made a change to the function...
-        Changed = true;
-      }
+    }
+    WorkListVec = std::move(NewWorkListVec);
   }
   return Changed;
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2f2d7f620a29..d0105701c73f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -28,6 +27,7 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
@@ -44,6 +44,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <utility>
 
@@ -272,10 +273,11 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
 /// information is sufficient to prove this comparison. Even for local
 /// conditions, this can sometimes prove conditions instcombine can't by
 /// exploiting range information.
-static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
-  Value *Op0 = C->getOperand(0);
-  Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
-  if (!Op1) return false;
+static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+  Value *Op0 = Cmp->getOperand(0);
+  auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
+  if (!C)
+    return false;
 
   // As a policy choice, we choose not to waste compile time on anything where
   // the comparison is testing local values.  While LVI can sometimes reason
@@ -283,20 +285,18 @@ static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
   // the block local query for uses from terminator instructions, but that's
   // handled in the code for each terminator.
   auto *I = dyn_cast<Instruction>(Op0);
-  if (I && I->getParent() == C->getParent())
+  if (I && I->getParent() == Cmp->getParent())
     return false;
 
   LazyValueInfo::Tristate Result =
-    LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C);
-  if (Result == LazyValueInfo::Unknown) return false;
+      LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp);
+  if (Result == LazyValueInfo::Unknown)
+    return false;
 
   ++NumCmps;
-  if (Result == LazyValueInfo::True)
-    C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
-  else
-    C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-  C->eraseFromParent();
-
+  Constant *TorF = ConstantInt::get(Type::getInt1Ty(Cmp->getContext()), Result);
+  Cmp->replaceAllUsesWith(TorF);
+  Cmp->eraseFromParent();
   return true;
 }
 
@@ -307,7 +307,9 @@ static bool processCmp(CmpInst *C, LazyValueInfo *LVI) {
 /// that cannot fire no matter what the incoming edge can safely be removed. If
 /// a case fires on every incoming edge then the entire switch can be removed
 /// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI, DominatorTree *DT) {
+static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+                          DominatorTree *DT) {
+  DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
   Value *Cond = SI->getCondition();
   BasicBlock *BB = SI->getParent();
 
@@ -372,7 +374,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI, DominatorTree *DT)
       ++NumDeadCases;
       Changed = true;
       if (--SuccessorsCount[Succ] == 0)
-        DT->deleteEdge(BB, Succ);
+        DTU.deleteEdge(BB, Succ);
       continue;
     }
     if (State == LazyValueInfo::True) {
@@ -389,15 +391,11 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI, DominatorTree *DT)
     ++CI;
   }
 
-  if (Changed) {
+  if (Changed)
     // If the switch has been simplified to the point where it can be replaced
     // by a branch then do so now.
-    DeferredDominance DDT(*DT);
     ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false,
-                           /*TLI = */ nullptr, &DDT);
-    DDT.flush();
-  }
-
+                           /*TLI = */ nullptr, &DTU);
   return Changed;
 }
 
@@ -432,23 +430,21 @@ static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) {
 }
 
 static void processOverflowIntrinsic(IntrinsicInst *II) {
+  IRBuilder<> B(II);
   Value *NewOp = nullptr;
   switch (II->getIntrinsicID()) {
   default:
     llvm_unreachable("Unexpected instruction.");
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
-    NewOp = BinaryOperator::CreateAdd(II->getOperand(0), II->getOperand(1),
-                                      II->getName(), II);
+    NewOp = B.CreateAdd(II->getOperand(0), II->getOperand(1), II->getName());
     break;
   case Intrinsic::usub_with_overflow:
   case Intrinsic::ssub_with_overflow:
-    NewOp = BinaryOperator::CreateSub(II->getOperand(0), II->getOperand(1),
-                                      II->getName(), II);
+    NewOp = B.CreateSub(II->getOperand(0), II->getOperand(1), II->getName());
     break;
   }
   ++NumOverflows;
-  IRBuilder<> B(II);
   Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0);
   NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1);
   II->replaceAllUsesWith(NewI);
@@ -530,17 +526,17 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
     return false;
 
   ++NumUDivs;
+  IRBuilder<> B{Instr};
   auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
-  auto *LHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(0), TruncTy,
-                               Instr->getName() + ".lhs.trunc", Instr);
-  auto *RHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(1), TruncTy,
-                               Instr->getName() + ".rhs.trunc", Instr);
-  auto *BO =
-      BinaryOperator::Create(Instr->getOpcode(), LHS, RHS, Instr->getName(), Instr);
-  auto *Zext = CastInst::Create(Instruction::ZExt, BO, Instr->getType(),
-                                Instr->getName() + ".zext", Instr);
-  if (BO->getOpcode() == Instruction::UDiv)
-    BO->setIsExact(Instr->isExact());
+  auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
+                                     Instr->getName() + ".lhs.trunc");
+  auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
+                                     Instr->getName() + ".rhs.trunc");
+  auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
+  auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext");
+  if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
+    if (BinOp->getOpcode() == Instruction::UDiv)
+      BinOp->setIsExact(Instr->isExact());
 
   Instr->replaceAllUsesWith(Zext);
   Instr->eraseFromParent();
@@ -554,6 +550,7 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
   ++NumSRems;
   auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1),
                                         SDI->getName(), SDI);
+  BO->setDebugLoc(SDI->getDebugLoc());
   SDI->replaceAllUsesWith(BO);
   SDI->eraseFromParent();
 
@@ -575,6 +572,7 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
   ++NumSDivs;
   auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
                                         SDI->getName(), SDI);
+  BO->setDebugLoc(SDI->getDebugLoc());
   BO->setIsExact(SDI->isExact());
   SDI->replaceAllUsesWith(BO);
   SDI->eraseFromParent();
@@ -597,6 +595,7 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
   ++NumAShrs;
   auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
                                         SDI->getName(), SDI);
+  BO->setDebugLoc(SDI->getDebugLoc());
   BO->setIsExact(SDI->isExact());
   SDI->replaceAllUsesWith(BO);
   SDI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 6078967a0f94..4c964e6e888c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
@@ -31,6 +32,8 @@ using namespace llvm;
 
 STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
 STATISTIC(DCEEliminated, "Number of insts removed");
+DEBUG_COUNTER(DCECounter, "dce-transform",
+              "Controls which instructions are eliminated");
 
 namespace {
   //===--------------------------------------------------------------------===//
@@ -50,6 +53,8 @@ namespace {
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
         Instruction *Inst = &*DI++;
         if (isInstructionTriviallyDead(Inst, TLI)) {
+          if (!DebugCounter::shouldExecute(DCECounter))
+            continue;
           salvageDebugInfo(*Inst);
           Inst->eraseFromParent();
           Changed = true;
@@ -77,6 +82,9 @@ static bool DCEInstruction(Instruction *I,
                            SmallSetVector<Instruction *, 16> &WorkList,
                            const TargetLibraryInfo *TLI) {
   if (isInstructionTriviallyDead(I, TLI)) {
+    if (!DebugCounter::shouldExecute(DCECounter))
+      return false;
+
     salvageDebugInfo(*I);
 
     // Null out all of the instruction's operands to see if any operand becomes
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9a7405e98e7d..469930ca6a19 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -71,7 +71,7 @@ using namespace llvm;
 
 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
 STATISTIC(NumFastStores, "Number of stores deleted");
-STATISTIC(NumFastOther , "Number of other instrs removed");
+STATISTIC(NumFastOther, "Number of other instrs removed");
 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
 STATISTIC(NumModifiedStores, "Number of stores modified");
 
@@ -349,11 +349,14 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
                                    InstOverlapIntervalsTy &IOL,
                                    AliasAnalysis &AA,
                                    const Function *F) {
-  // If we don't know the sizes of either access, then we can't do a comparison.
-  if (Later.Size == MemoryLocation::UnknownSize ||
-      Earlier.Size == MemoryLocation::UnknownSize)
+  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
+  // get imprecise values here, though (except for unknown sizes).
+  if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise())
     return OW_Unknown;
 
+  const uint64_t LaterSize = Later.Size.getValue();
+  const uint64_t EarlierSize = Earlier.Size.getValue();
+
   const Value *P1 = Earlier.Ptr->stripPointerCasts();
   const Value *P2 = Later.Ptr->stripPointerCasts();
 
@@ -361,7 +364,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // the later store was larger than the earlier store.
   if (P1 == P2 || AA.isMustAlias(P1, P2)) {
     // Make sure that the Later size is >= the Earlier size.
-    if (Later.Size >= Earlier.Size)
+    if (LaterSize >= EarlierSize)
       return OW_Complete;
   }
 
@@ -379,7 +382,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // If the "Later" store is to a recognizable object, get its size.
   uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F);
   if (ObjectSize != MemoryLocation::UnknownSize)
-    if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
+    if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
       return OW_Complete;
 
   // Okay, we have stores to two completely different pointers.  Try to
@@ -410,8 +413,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   //
   // We have to be careful here as *Off is signed while *.Size is unsigned.
   if (EarlierOff >= LaterOff &&
-      Later.Size >= Earlier.Size &&
-      uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+      LaterSize >= EarlierSize &&
+      uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
     return OW_Complete;
 
   // We may now overlap, although the overlap is not complete. There might also
@@ -420,21 +423,21 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // Note: The correctness of this logic depends on the fact that this function
   // is not even called providing DepWrite when there are any intervening reads.
   if (EnablePartialOverwriteTracking &&
-      LaterOff < int64_t(EarlierOff + Earlier.Size) &&
-      int64_t(LaterOff + Later.Size) >= EarlierOff) {
+      LaterOff < int64_t(EarlierOff + EarlierSize) &&
+      int64_t(LaterOff + LaterSize) >= EarlierOff) {
 
     // Insert our part of the overlap into the map.
     auto &IM = IOL[DepWrite];
     LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
-                      << ", " << int64_t(EarlierOff + Earlier.Size)
+                      << ", " << int64_t(EarlierOff + EarlierSize)
                       << ") Later [" << LaterOff << ", "
-                      << int64_t(LaterOff + Later.Size) << ")\n");
+                      << int64_t(LaterOff + LaterSize) << ")\n");
 
     // Make sure that we only insert non-overlapping intervals and combine
     // adjacent intervals. The intervals are stored in the map with the ending
     // offset as the key (in the half-open sense) and the starting offset as
     // the value.
-    int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size;
+    int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
 
     // Find any intervals ending at, or after, LaterIntStart which start
     // before LaterIntEnd.
@@ -464,10 +467,10 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
 
     ILI = IM.begin();
     if (ILI->second <= EarlierOff &&
-        ILI->first >= int64_t(EarlierOff + Earlier.Size)) {
+        ILI->first >= int64_t(EarlierOff + EarlierSize)) {
       LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
                         << EarlierOff << ", "
-                        << int64_t(EarlierOff + Earlier.Size)
+                        << int64_t(EarlierOff + EarlierSize)
                         << ") Composite Later [" << ILI->second << ", "
                         << ILI->first << ")\n");
       ++NumCompletePartials;
@@ -478,13 +481,13 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // Check for an earlier store which writes to all the memory locations that
   // the later store writes to.
   if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
-      int64_t(EarlierOff + Earlier.Size) > LaterOff &&
-      uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) {
+      int64_t(EarlierOff + EarlierSize) > LaterOff &&
+      uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
     LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
                       << EarlierOff << ", "
-                      << int64_t(EarlierOff + Earlier.Size)
+                      << int64_t(EarlierOff + EarlierSize)
                       << ") by a later store [" << LaterOff << ", "
-                      << int64_t(LaterOff + Later.Size) << ")\n");
+                      << int64_t(LaterOff + LaterSize) << ")\n");
     // TODO: Maybe come up with a better name?
     return OW_PartialEarlierWithFullLater;
   }
@@ -498,8 +501,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // In this case we may want to trim the size of earlier to avoid generating
   // writes to addresses which will definitely be overwritten later
   if (!EnablePartialOverwriteTracking &&
-      (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
-       int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
+      (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
+       int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
     return OW_End;
 
   // Finally, we also need to check if the later store overwrites the beginning
@@ -512,9 +515,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // of earlier to avoid generating writes to addresses which will definitely
   // be overwritten later.
   if (!EnablePartialOverwriteTracking &&
-      (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
-    assert(int64_t(LaterOff + Later.Size) <
-               int64_t(EarlierOff + Earlier.Size) &&
+      (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
+    assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
            "Expect to be handled as OW_Complete");
     return OW_Begin;
   }
@@ -641,7 +643,7 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
   for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
     BasicBlock *Pred = *I;
     if (Pred == BB) continue;
-    TerminatorInst *PredTI = Pred->getTerminator();
+    Instruction *PredTI = Pred->getTerminator();
     if (PredTI->getNumSuccessors() != 1)
       continue;
 
@@ -832,7 +834,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
       continue;
     }
 
-    if (auto CS = CallSite(&*BBI)) {
+    if (auto *Call = dyn_cast<CallBase>(&*BBI)) {
       // Remove allocation function calls from the list of dead stack objects;
       // there can't be any references before the definition.
       if (isAllocLikeFn(&*BBI, TLI))
@@ -840,15 +842,15 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
 
       // If this call does not access memory, it can't be loading any of our
       // pointers.
-      if (AA->doesNotAccessMemory(CS))
+      if (AA->doesNotAccessMemory(Call))
         continue;
 
       // If the call might load from any of our allocas, then any store above
       // the call is live.
       DeadStackObjects.remove_if([&](Value *I) {
         // See if the call site touches the value.
-        return isRefSet(AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI,
-                                                                BB.getParent())));
+        return isRefSet(AA->getModRefInfo(
+            Call, I, getPointerSize(I, DL, *TLI, BB.getParent())));
       });
 
       // If all of the allocas were clobbered by the call then we're not going
@@ -1002,11 +1004,10 @@ static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
     Instruction *EarlierWrite = OI.first;
     MemoryLocation Loc = getLocForWrite(EarlierWrite);
     assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
-    assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
 
     const Value *Ptr = Loc.Ptr->stripPointerCasts();
     int64_t EarlierStart = 0;
-    int64_t EarlierSize = int64_t(Loc.Size);
+    int64_t EarlierSize = int64_t(Loc.Size.getValue());
     GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
     OverlapIntervalsTy &IntervalMap = OI.second;
     Changed |=
@@ -1203,8 +1204,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
           assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
                                                     "when partial-overwrite "
                                                     "tracking is enabled");
-          int64_t EarlierSize = DepLoc.Size;
-          int64_t LaterSize = Loc.Size;
+          // The overwrite result is known, so these must be known, too.
+          int64_t EarlierSize = DepLoc.Size.getValue();
+          int64_t LaterSize = Loc.Size.getValue();
           bool IsOverwriteEnd = (OR == OW_End);
           MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
                                     InstWriteOffset, LaterSize, IsOverwriteEnd);
diff --git a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index e1bc590c5c9a..ffcf34f1cf7a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
 using namespace llvm;
@@ -29,6 +30,8 @@ using namespace llvm;
 STATISTIC(NumPairs, "Number of div/rem pairs");
 STATISTIC(NumHoisted, "Number of instructions hoisted");
 STATISTIC(NumDecomposed, "Number of instructions decomposed");
+DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
+              "Controls transformations in div-rem-pairs pass");
 
 /// Find matching pairs of integer div/rem ops (they have the same numerator,
 /// denominator, and signedness). If they exist in different basic blocks, bring
@@ -93,6 +96,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
     if (!DivDominates && !DT.dominates(RemInst, DivInst))
       continue;
 
+    if (!DebugCounter::shouldExecute(DRPCounter))
+      continue;
+
     if (HasDivRemOp) {
       // The target has a single div/rem operation. Hoist the lower instruction
       // to make the matched pair visible to the backend.
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 533d16e088c8..1f09979b3382 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
@@ -54,6 +55,7 @@
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
 #include <cassert>
 #include <deque>
 #include <memory>
@@ -602,6 +604,8 @@ private:
   void removeMSSA(Instruction *Inst) {
     if (!MSSA)
       return;
+    if (VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
     // Removing a store here can leave MemorySSA in an unoptimized state by
     // creating MemoryPhis that have identical arguments and by creating
     // MemoryUses whose defining access is not an actual clobber.  We handle the
@@ -808,7 +812,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
         continue;
       }
-      salvageDebugInfo(*Inst);
+      if (!salvageDebugInfo(*Inst))
+        replaceDbgUsesWithUndef(Inst);
       removeMSSA(Inst);
       Inst->eraseFromParent();
       Changed = true;
@@ -863,7 +868,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       continue;
     }
 
-    if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
+    if (isGuard(Inst)) {
       if (auto *CondI =
               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
         if (SimpleValue::canHandle(CondI)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 1e0a22cb14b3..9861948c8297 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -38,7 +38,6 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Attributes.h"
@@ -48,6 +47,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -71,6 +71,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/VNCoercion.h"
 #include <algorithm>
@@ -97,11 +98,16 @@ STATISTIC(NumPRELoad,   "Number of loads PRE'd");
 static cl::opt<bool> EnablePRE("enable-pre",
                                cl::init(true), cl::Hidden);
 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableMemDep("enable-gvn-memdep", cl::init(true));
 
 // Maximum allowed recursion depth.
 static cl::opt<uint32_t>
-MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
-                cl::desc("Max recurse depth (default = 1000)"));
+MaxRecurseDepth("gvn-max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
+                cl::desc("Max recurse depth in GVN (default = 1000)"));
+
+static cl::opt<uint32_t> MaxNumDeps(
+    "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore,
+    cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
 
 struct llvm::GVN::Expression {
   uint32_t opcode;
@@ -392,18 +398,13 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
     uint32_t e = assignExpNewValueNum(exp).first;
     valueNumbering[C] = e;
     return e;
-  } else if (AA->onlyReadsMemory(C)) {
+  } else if (MD && AA->onlyReadsMemory(C)) {
     Expression exp = createExpr(C);
     auto ValNum = assignExpNewValueNum(exp);
     if (ValNum.second) {
       valueNumbering[C] = ValNum.first;
       return ValNum.first;
     }
-    if (!MD) {
-      uint32_t e = assignExpNewValueNum(exp).first;
-      valueNumbering[C] = e;
-      return e;
-    }
 
     MemDepResult local_dep = MD->getDependency(C);
 
@@ -436,7 +437,7 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
 
     // Non-local case.
     const MemoryDependenceResults::NonLocalDepInfo &deps =
-      MD->getNonLocalCallDependency(CallSite(C));
+        MD->getNonLocalCallDependency(C);
     // FIXME: Move the checking logic to MemDep!
     CallInst* cdep = nullptr;
 
@@ -677,7 +678,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
 
   // Optimistically assume that the block is fully available and check to see
   // if we already know about this block in one lookup.
-  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+  std::pair<DenseMap<BasicBlock*, char>::iterator, bool> IV =
     FullyAvailableBlocks.insert(std::make_pair(BB, 2));
 
   // If the entry already existed for this block, return the precomputed value.
@@ -1074,15 +1075,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // because if the index is out of bounds we should deoptimize rather than
   // access the array.
   // Check that there is no guard in this block above our instruction.
-  if (!IsSafeToSpeculativelyExecute) {
-    auto It = FirstImplicitControlFlowInsts.find(TmpBB);
-    if (It != FirstImplicitControlFlowInsts.end()) {
-      assert(It->second->getParent() == TmpBB &&
-             "Implicit control flow map broken?");
-      if (OI->dominates(It->second, LI))
-        return false;
-    }
-  }
+  if (!IsSafeToSpeculativelyExecute && ICF->isDominatedByICFIFromSameBlock(LI))
+    return false;
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1099,8 +1093,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
       return false;
 
     // Check that there is no implicit control flow in a block above.
-    if (!IsSafeToSpeculativelyExecute &&
-        FirstImplicitControlFlowInsts.count(TmpBB))
+    if (!IsSafeToSpeculativelyExecute && ICF->hasICF(TmpBB))
       return false;
   }
 
@@ -1322,7 +1315,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   // dependencies, this load isn't worth worrying about.  Optimizing
   // it will be too expensive.
   unsigned NumDeps = Deps.size();
-  if (NumDeps > 100)
+  if (NumDeps > MaxNumDeps)
     return false;
 
   // If we had a phi translation failure, we'll have a single entry which is a
@@ -1451,37 +1444,6 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
   return Changed;
 }
 
-static void patchReplacementInstruction(Instruction *I, Value *Repl) {
-  auto *ReplInst = dyn_cast<Instruction>(Repl);
-  if (!ReplInst)
-    return;
-
-  // Patch the replacement so that it is not more restrictive than the value
-  // being replaced.
-  // Note that if 'I' is a load being replaced by some operation,
-  // for example, by an arithmetic operation, then andIRFlags()
-  // would just erase all math flags from the original arithmetic
-  // operation, which is clearly not wanted and not needed.
-  if (!isa<LoadInst>(I))
-    ReplInst->andIRFlags(I);
-
-  // FIXME: If both the original and replacement value are part of the
-  // same control-flow region (meaning that the execution of one
-  // guarantees the execution of the other), then we can combine the
-  // noalias scopes here and do better than the general conservative
-  // answer used in combineMetadata().
-
-  // In general, GVN unifies expressions over different control-flow
-  // regions, and so we need a conservative combination of the noalias
-  // scopes.
-  static const unsigned KnownIDs[] = {
-      LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
-      LLVMContext::MD_noalias,        LLVMContext::MD_range,
-      LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group};
-  combineMetadata(ReplInst, I, KnownIDs);
-}
-
 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
   patchReplacementInstruction(I, Repl);
   I->replaceAllUsesWith(Repl);
@@ -1683,10 +1645,12 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
 }
 
 void GVN::assignBlockRPONumber(Function &F) {
+  BlockRPONumber.clear();
   uint32_t NextBlockNumber = 1;
   ReversePostOrderTraversal<Function *> RPOT(&F);
   for (BasicBlock *BB : RPOT)
     BlockRPONumber[BB] = NextBlockNumber++;
+  InvalidBlockRPONumbers = false;
 }
 
 // Tries to replace instruction with const, using information from
@@ -1778,6 +1742,9 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
 
       Changed |= NumReplacements > 0;
       NumGVNEqProp += NumReplacements;
+      // Cached information for anything that uses LHS will be invalid.
+      if (MD)
+        MD->invalidateCachedPointerInfo(LHS);
     }
 
     // Now try to deduce additional equalities from this one. For example, if
@@ -1853,6 +1820,9 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
                                              Root.getStart());
           Changed |= NumReplacements > 0;
           NumGVNEqProp += NumReplacements;
+          // Cached information for anything that uses NotCmp will be invalid.
+          if (MD)
+            MD->invalidateCachedPointerInfo(NotCmp);
         }
       }
       // Ensure that any instruction in scope that gets the "A < B" value number
@@ -1975,7 +1945,7 @@ bool GVN::processInstruction(Instruction *I) {
 
   // Allocations are always uniquely numbered, so we can save time and memory
   // by fast failing them.
-  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+  if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
     addToLeaderTable(Num, I, I->getParent());
     return false;
   }
@@ -2020,20 +1990,22 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   TLI = &RunTLI;
   VN.setAliasAnalysis(&RunAA);
   MD = RunMD;
-  OrderedInstructions OrderedInstrs(DT);
-  OI = &OrderedInstrs;
+  ImplicitControlFlowTracking ImplicitCFT(DT);
+  ICF = &ImplicitCFT;
   VN.setMemDep(MD);
   ORE = RunORE;
+  InvalidBlockRPONumbers = true;
 
   bool Changed = false;
   bool ShouldContinue = true;
 
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
     BasicBlock *BB = &*FI++;
 
-    bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD);
+    bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, nullptr, MD);
     if (removedBlock)
       ++NumGVNBlocks;
 
@@ -2052,7 +2024,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
     // Fabricate val-num for dead-code in order to suppress assertion in
     // performPRE().
     assignValNumForDeadCode();
-    assignBlockRPONumber(F);
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2104,27 +2075,16 @@ bool GVN::processBlock(BasicBlock *BB) {
     if (!AtStart)
       --BI;
 
-    bool InvalidateImplicitCF = false;
-    const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
     for (auto *I : InstrsToErase) {
       assert(I->getParent() == BB && "Removing instruction from wrong block?");
       LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
       salvageDebugInfo(*I);
       if (MD) MD->removeInstruction(I);
       LLVM_DEBUG(verifyRemoved(I));
-      if (MaybeFirstICF == I) {
-        // We have erased the first ICF in block. The map needs to be updated.
-        InvalidateImplicitCF = true;
-        // Do not keep dangling pointer on the erased instruction.
-        MaybeFirstICF = nullptr;
-      }
+      ICF->removeInstruction(I);
       I->eraseFromParent();
     }
-
-    OI->invalidateBlock(BB);
     InstrsToErase.clear();
-    if (InvalidateImplicitCF)
-      fillImplicitControlFlowInfo(BB);
 
     if (AtStart)
       BI = BB->begin();
@@ -2184,7 +2144,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
 }
 
 bool GVN::performScalarPRE(Instruction *CurInst) {
-  if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
+  if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
       isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
       CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
       isa<DbgInfoIntrinsic>(CurInst))
@@ -2197,6 +2157,16 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   if (isa<CmpInst>(CurInst))
     return false;
 
+  // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
+  // sinking the addressing mode computation back to its uses. Extending the
+  // GEP's live range increases the register pressure, and therefore it can
+  // introduce unnecessary spills.
+  //
+  // This doesn't prevent Load PRE. PHI translation will make the GEP available
+  // to the load by moving it to the predecessor block if necessary.
+  if (isa<GetElementPtrInst>(CurInst))
+    return false;
+
   // We don't currently value number ANY inline asm calls.
   if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
     if (CallI->isInlineAsm())
@@ -2215,6 +2185,10 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   BasicBlock *PREPred = nullptr;
   BasicBlock *CurrentBlock = CurInst->getParent();
 
+  // Update the RPO numbers for this function.
+  if (InvalidBlockRPONumbers)
+    assignBlockRPONumber(*CurrentBlock->getParent());
+
   SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
   for (BasicBlock *P : predecessors(CurrentBlock)) {
     // We're not interested in PRE where blocks with predecessors that are
@@ -2226,6 +2200,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
     // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
     // when CurInst has operand defined in CurrentBlock (so it may be defined
     // by phi in the loop header).
+    assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
+           "Invalid BlockRPONumber map.");
     if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
         llvm::any_of(CurInst->operands(), [&](const Use &U) {
           if (auto *Inst = dyn_cast<Instruction>(U.get()))
@@ -2268,13 +2244,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
       // is always executed. An instruction with implicit control flow could
       // prevent us from doing it. If we cannot speculate the execution, then
       // PRE should be prohibited.
-      auto It = FirstImplicitControlFlowInsts.find(CurrentBlock);
-      if (It != FirstImplicitControlFlowInsts.end()) {
-        assert(It->second->getParent() == CurrentBlock &&
-               "Implicit control flow map broken?");
-        if (OI->dominates(It->second, CurInst))
-          return false;
-      }
+      if (ICF->isDominatedByICFIFromSameBlock(CurInst))
+        return false;
     }
 
     // Don't do PRE across indirect branch.
@@ -2335,14 +2306,10 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   if (MD)
     MD->removeInstruction(CurInst);
   LLVM_DEBUG(verifyRemoved(CurInst));
-  bool InvalidateImplicitCF =
-      FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
   // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
   // some assertion failures.
-  OI->invalidateBlock(CurrentBlock);
+  ICF->removeInstruction(CurInst);
   CurInst->eraseFromParent();
-  if (InvalidateImplicitCF)
-    fillImplicitControlFlowInfo(CurrentBlock);
   ++NumGVNInstr;
 
   return true;
@@ -2382,6 +2349,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
       SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
   if (MD)
     MD->invalidateCachedPredecessors();
+  InvalidBlockRPONumbers = true;
   return BB;
 }
 
@@ -2391,11 +2359,12 @@ bool GVN::splitCriticalEdges() {
   if (toSplit.empty())
     return false;
   do {
-    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+    std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
     SplitCriticalEdge(Edge.first, Edge.second,
                       CriticalEdgeSplittingOptions(DT));
   } while (!toSplit.empty());
   if (MD) MD->invalidateCachedPredecessors();
+  InvalidBlockRPONumbers = true;
   return true;
 }
 
@@ -2411,8 +2380,6 @@ bool GVN::iterateOnFunction(Function &F) {
   ReversePostOrderTraversal<Function *> RPOT(&F);
 
   for (BasicBlock *BB : RPOT)
-    fillImplicitControlFlowInfo(BB);
-  for (BasicBlock *BB : RPOT)
     Changed |= processBlock(BB);
 
   return Changed;
@@ -2423,48 +2390,8 @@ void GVN::cleanupGlobalSets() {
   LeaderTable.clear();
   BlockRPONumber.clear();
   TableAllocator.Reset();
-  FirstImplicitControlFlowInsts.clear();
-}
-
-void
-GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
-  // Make sure that all marked instructions are actually deleted by this point,
-  // so that we don't need to care about omitting them.
-  assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
-  auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
-    // If a block's instruction doesn't always pass the control to its successor
-    // instruction, mark the block as having implicit control flow. We use them
-    // to avoid wrong assumptions of sort "if A is executed and B post-dominates
-    // A, then B is also executed". This is not true is there is an implicit
-    // control flow instruction (e.g. a guard) between them.
-    //
-    // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
-    // for volatile stores and loads because they can trap. The discussion on
-    // whether or not it is correct is still ongoing. We might want to get rid
-    // of this logic in the future. Anyways, trapping instructions shouldn't
-    // introduce implicit control flow, so we explicitly allow them here. This
-    // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
-    if (isGuaranteedToTransferExecutionToSuccessor(I))
-      return false;
-    if (isa<LoadInst>(I)) {
-      assert(cast<LoadInst>(I)->isVolatile() &&
-             "Non-volatile load should transfer execution to successor!");
-      return false;
-    }
-    if (isa<StoreInst>(I)) {
-      assert(cast<StoreInst>(I)->isVolatile() &&
-             "Non-volatile store should transfer execution to successor!");
-      return false;
-    }
-    return true;
-  };
-  FirstImplicitControlFlowInsts.erase(BB);
-
-  for (auto &I : *BB)
-    if (MayNotTransferExecutionToSuccessor(&I)) {
-      FirstImplicitControlFlowInsts[BB] = &I;
-      break;
-    }
+  ICF->clear();
+  InvalidBlockRPONumbers = true;
 }
 
 /// Verify that the specified instruction does not occur in our
@@ -2554,6 +2481,8 @@ void GVN::addDeadBlock(BasicBlock *BB) {
         PHINode &Phi = cast<PHINode>(*II);
         Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
                              UndefValue::get(Phi.getType()));
+        if (MD)
+          MD->invalidateCachedPointerInfo(&Phi);
       }
     }
   }
@@ -2613,8 +2542,8 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  explicit GVNLegacyPass(bool NoLoads = false)
-      : FunctionPass(ID), NoLoads(NoLoads) {
+  explicit GVNLegacyPass(bool NoMemDepAnalysis = !EnableMemDep)
+      : FunctionPass(ID), NoMemDepAnalysis(NoMemDepAnalysis) {
     initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
@@ -2629,7 +2558,7 @@ public:
         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
         getAnalysis<AAResultsWrapperPass>().getAAResults(),
-        NoLoads ? nullptr
+        NoMemDepAnalysis ? nullptr
                 : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
         LIWP ? &LIWP->getLoopInfo() : nullptr,
         &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
@@ -2639,7 +2568,7 @@ public:
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    if (!NoLoads)
+    if (!NoMemDepAnalysis)
       AU.addRequired<MemoryDependenceWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
 
@@ -2650,7 +2579,7 @@ public:
   }
 
 private:
-  bool NoLoads;
+  bool NoMemDepAnalysis;
   GVN Impl;
 };
 
@@ -2667,6 +2596,6 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
 
 // The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoLoads) {
-  return new GVNLegacyPass(NoLoads);
+FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) {
+  return new GVNLegacyPass(NoMemDepAnalysis);
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 6d2b25cf6013..76a42d7fe750 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -246,8 +246,8 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
       LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
       LLVMContext::MD_noalias,        LLVMContext::MD_range,
       LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group};
-  combineMetadata(ReplInst, I, KnownIDs);
+      LLVMContext::MD_invariant_group, LLVMContext::MD_access_group};
+  combineMetadata(ReplInst, I, KnownIDs, true);
 }
 
 // This pass hoists common computations across branches sharing common
@@ -365,7 +365,7 @@ private:
 
   // Return true when a successor of BB dominates A.
   bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
-    for (const BasicBlock *Succ : BB->getTerminator()->successors())
+    for (const BasicBlock *Succ : successors(BB))
       if (DT->dominates(Succ, A))
         return true;
 
@@ -577,15 +577,15 @@ private:
   // Returns the edge via which an instruction in BB will get the values from.
 
   // Returns true when the values are flowing out to each edge.
-  bool valueAnticipable(CHIArgs C, TerminatorInst *TI) const {
+  bool valueAnticipable(CHIArgs C, Instruction *TI) const {
     if (TI->getNumSuccessors() > (unsigned)size(C))
       return false; // Not enough args in this CHI.
 
     for (auto CHI : C) {
       BasicBlock *Dest = CHI.Dest;
       // Find if all the edges have values flowing out of BB.
-      bool Found = llvm::any_of(TI->successors(), [Dest](const BasicBlock *BB) {
-          return BB == Dest; });
+      bool Found = llvm::any_of(
+          successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; });
       if (!Found)
         return false;
     }
@@ -748,11 +748,9 @@ private:
     // TODO: Remove fully-redundant expressions.
     // Get instruction from the Map, assume that all the Instructions
     // with same VNs have same rank (this is an approximation).
-    llvm::sort(Ranks.begin(), Ranks.end(),
-               [this, &Map](const VNType &r1, const VNType &r2) {
-                 return (rank(*Map.lookup(r1).begin()) <
-                         rank(*Map.lookup(r2).begin()));
-               });
+    llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) {
+      return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin()));
+    });
 
     // - Sort VNs according to their rank, and start with lowest ranked VN
     // - Take a VN and for each instruction with same VN
@@ -784,6 +782,7 @@ private:
       // which currently have dead terminators that are control
       // dependence sources of a block which is in NewLiveBlocks.
       IDFs.setDefiningBlocks(VNBlocks);
+      IDFBlocks.clear();
       IDFs.calculate(IDFBlocks);
 
       // Make a map of BB vs instructions to be hoisted.
@@ -792,7 +791,7 @@ private:
       }
       // Insert empty CHI node for this VN. This is used to factor out
       // basic blocks where the ANTIC can potentially change.
-      for (auto IDFB : IDFBlocks) { // TODO: Prune out useless CHI insertions.
+      for (auto IDFB : IDFBlocks) {
         for (unsigned i = 0; i < V.size(); ++i) {
           CHIArg C = {VN, nullptr, nullptr};
            // Ignore spurious PDFs.
@@ -1100,7 +1099,7 @@ private:
           break;
 
         // Do not value number terminator instructions.
-        if (isa<TerminatorInst>(&I1))
+        if (I1.isTerminator())
           break;
 
         if (auto *Load = dyn_cast<LoadInst>(&I1))
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 8959038de596..1df5f5400c14 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -239,7 +239,7 @@ public:
     SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
-    llvm::sort(Ops.begin(), Ops.end());
+    llvm::sort(Ops);
     for (auto &P : Ops) {
       Blocks.push_back(P.first);
       Values.push_back(P.second);
@@ -258,14 +258,14 @@ public:
   /// Create a PHI from an array of incoming values and incoming blocks.
   template <typename VArray, typename BArray>
   ModelledPHI(const VArray &V, const BArray &B) {
-    std::copy(V.begin(), V.end(), std::back_inserter(Values));
-    std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+    llvm::copy(V, std::back_inserter(Values));
+    llvm::copy(B, std::back_inserter(Blocks));
   }
 
   /// Create a PHI from [I[OpNum] for I in Insts].
   template <typename BArray>
   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
-    std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+    llvm::copy(B, std::back_inserter(Blocks));
     for (auto *I : Insts)
       Values.push_back(I->getOperand(OpNum));
   }
@@ -762,7 +762,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
   }
   if (Preds.size() < 2)
     return 0;
-  llvm::sort(Preds.begin(), Preds.end());
+  llvm::sort(Preds);
 
   unsigned NumOrigPreds = Preds.size();
   // We can only sink instructions through unconditional branches.
@@ -859,7 +859,7 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
   // Update metadata and IR flags.
   for (auto *I : Insts)
     if (I != I0) {
-      combineMetadataForCSE(I0, I);
+      combineMetadataForCSE(I0, I, true);
       I0->andIRFlags(I);
     }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 055fcbc8436f..efc204d4f74b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -44,6 +44,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/PostDominators.h"
@@ -63,22 +65,69 @@ using namespace llvm;
 #define DEBUG_TYPE "guard-widening"
 
 STATISTIC(GuardsEliminated, "Number of eliminated guards");
+STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
+
+static cl::opt<bool> WidenFrequentBranches(
+    "guard-widening-widen-frequent-branches", cl::Hidden,
+    cl::desc("Widen conditions of explicit branches into dominating guards in "
+             "case if their taken frequency exceeds threshold set by "
+             "guard-widening-frequent-branch-threshold option"),
+    cl::init(false));
+
+static cl::opt<unsigned> FrequentBranchThreshold(
+    "guard-widening-frequent-branch-threshold", cl::Hidden,
+    cl::desc("When WidenFrequentBranches is set to true, this option is used "
+             "to determine which branches are frequently taken. The criteria "
+             "that a branch is taken more often than "
+             "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then "
+             "it is considered frequently taken"),
+    cl::init(1000));
+
 
 namespace {
 
+// Get the condition of \p I. It can either be a guard or a conditional branch.
+static Value *getCondition(Instruction *I) {
+  if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
+    assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+           "Bad guard intrinsic?");
+    return GI->getArgOperand(0);
+  }
+  return cast<BranchInst>(I)->getCondition();
+}
+
+// Set the condition for \p I to \p NewCond. \p I can either be a guard or a
+// conditional branch.
+static void setCondition(Instruction *I, Value *NewCond) {
+  if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
+    assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+           "Bad guard intrinsic?");
+    GI->setArgOperand(0, NewCond);
+    return;
+  }
+  cast<BranchInst>(I)->setCondition(NewCond);
+}
+
+// Eliminates the guard instruction properly.
+static void eliminateGuard(Instruction *GuardInst) {
+  GuardInst->eraseFromParent();
+  ++GuardsEliminated;
+}
+
 class GuardWideningImpl {
   DominatorTree &DT;
   PostDominatorTree *PDT;
   LoopInfo &LI;
+  BranchProbabilityInfo *BPI;
 
   /// Together, these describe the region of interest.  This might be all of
   /// the blocks within a function, or only a given loop's blocks and preheader.
   DomTreeNode *Root;
   std::function<bool(BasicBlock*)> BlockFilter;
 
-  /// The set of guards whose conditions have been widened into dominating
-  /// guards.
-  SmallVector<Instruction *, 16> EliminatedGuards;
+  /// The set of guards and conditional branches whose conditions have been
+  /// widened into dominating guards.
+  SmallVector<Instruction *, 16> EliminatedGuardsAndBranches;
 
   /// The set of guards which have been widened to include conditions to other
   /// guards.
@@ -91,19 +140,7 @@ class GuardWideningImpl {
   bool eliminateGuardViaWidening(
       Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
       const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
-          GuardsPerBlock);
-
-  // Get the condition from \p GuardInst.
-  Value *getGuardCondition(Instruction *GuardInst);
-
-  // Set the condition for \p GuardInst.
-  void setGuardCondition(Instruction *GuardInst, Value *NewCond);
-
-  // Whether or not the particular instruction is a guard.
-  bool isGuard(const Instruction *I);
-
-  // Eliminates the guard instruction properly.
-  void eliminateGuard(Instruction *GuardInst);
+          GuardsPerBlock, bool InvertCondition = false);
 
   /// Used to keep track of which widening potential is more effective.
   enum WideningScore {
@@ -127,11 +164,13 @@ class GuardWideningImpl {
 
   /// Compute the score for widening the condition in \p DominatedGuard
   /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
-  /// \p DominatingGuardLoop).
+  /// \p DominatingGuardLoop). If \p InvertCond is set, then we widen the
+  /// inverted condition of the dominating guard.
   WideningScore computeWideningScore(Instruction *DominatedGuard,
                                      Loop *DominatedGuardLoop,
                                      Instruction *DominatingGuard,
-                                     Loop *DominatingGuardLoop);
+                                     Loop *DominatingGuardLoop,
+                                     bool InvertCond);
 
   /// Helper to check if \p V can be hoisted to \p InsertPos.
   bool isAvailableAt(Value *V, Instruction *InsertPos) {
@@ -147,13 +186,14 @@ class GuardWideningImpl {
   void makeAvailableAt(Value *V, Instruction *InsertPos);
 
   /// Common helper used by \c widenGuard and \c isWideningCondProfitable.  Try
-  /// to generate an expression computing the logical AND of \p Cond0 and \p
-  /// Cond1.  Return true if the expression computing the AND is only as
+  /// to generate an expression computing the logical AND of \p Cond0 and (\p
+  /// Cond1 XOR \p InvertCondition).
+  /// Return true if the expression computing the AND is only as
   /// expensive as computing one of the two. If \p InsertPt is true then
   /// actually generate the resulting expression, make it available at \p
   /// InsertPt and return it in \p Result (else no change to the IR is made).
   bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
-                       Value *&Result);
+                       Value *&Result, bool InvertCondition);
 
   /// Represents a range check of the form \c Base + \c Offset u< \c Length,
   /// with the constraint that \c Length is not negative.  \c CheckInst is the
@@ -214,25 +254,31 @@ class GuardWideningImpl {
 
   /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
   /// computing only one of the two expressions?
-  bool isWideningCondProfitable(Value *Cond0, Value *Cond1) {
+  bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) {
     Value *ResultUnused;
-    return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused);
+    return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused,
+                           InvertCond);
   }
 
-  /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
-  /// whatever it is already checking).
-  void widenGuard(Instruction *ToWiden, Value *NewCondition) {
+  /// If \p InvertCondition is false, Widen \p ToWiden to fail if
+  /// \p NewCondition is false, otherwise make it fail if \p NewCondition is
+  /// true (in addition to whatever it is already checking).
+  void widenGuard(Instruction *ToWiden, Value *NewCondition,
+                  bool InvertCondition) {
     Value *Result;
-    widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result);
-    setGuardCondition(ToWiden, Result);
+    widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result,
+                    InvertCondition);
+    setCondition(ToWiden, Result);
   }
 
 public:
 
   explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
-                             LoopInfo &LI, DomTreeNode *Root,
+                             LoopInfo &LI, BranchProbabilityInfo *BPI,
+                             DomTreeNode *Root,
                              std::function<bool(BasicBlock*)> BlockFilter)
-    : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter) {}
+    : DT(DT), PDT(PDT), LI(LI), BPI(BPI), Root(Root), BlockFilter(BlockFilter)
+        {}
 
   /// The entry point for this pass.
   bool run();
@@ -242,6 +288,12 @@ public:
 bool GuardWideningImpl::run() {
   DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
   bool Changed = false;
+  Optional<BranchProbability> LikelyTaken = None;
+  if (WidenFrequentBranches && BPI) {
+    unsigned Threshold = FrequentBranchThreshold;
+    assert(Threshold > 0 && "Zero threshold makes no sense!");
+    LikelyTaken = BranchProbability(Threshold - 1, Threshold);
+  }
 
   for (auto DFI = df_begin(Root), DFE = df_end(Root);
        DFI != DFE; ++DFI) {
@@ -257,12 +309,31 @@ bool GuardWideningImpl::run() {
 
     for (auto *II : CurrentList)
       Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+    if (WidenFrequentBranches && BPI)
+      if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+        if (BI->isConditional()) {
+          // If one of branches of a conditional is likely taken, try to
+          // eliminate it.
+          if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
+            Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock);
+          else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
+            Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock,
+                                                 /*InvertCondition*/true);
+        }
   }
 
-  assert(EliminatedGuards.empty() || Changed);
-  for (auto *II : EliminatedGuards)
-    if (!WidenedGuards.count(II))
-      eliminateGuard(II);
+  assert(EliminatedGuardsAndBranches.empty() || Changed);
+  for (auto *I : EliminatedGuardsAndBranches)
+    if (!WidenedGuards.count(I)) {
+      assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
+      if (isGuard(I))
+        eliminateGuard(I);
+      else {
+        assert(isa<BranchInst>(I) &&
+               "Eliminated something other than guard or branch?");
+        ++CondBranchEliminated;
+      }
+    }
 
   return Changed;
 }
@@ -270,7 +341,13 @@ bool GuardWideningImpl::run() {
 bool GuardWideningImpl::eliminateGuardViaWidening(
     Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
     const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
-        GuardsInBlock) {
+        GuardsInBlock, bool InvertCondition) {
+  // Ignore trivial true or false conditions. These instructions will be
+  // trivially eliminated by any cleanup pass. Do not erase them because other
+  // guards can possibly be widened into them.
+  if (isa<ConstantInt>(getCondition(GuardInst)))
+    return false;
+
   Instruction *BestSoFar = nullptr;
   auto BestScoreSoFar = WS_IllegalOrNegative;
   auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
@@ -304,7 +381,7 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
 
     assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
 
-    if (i == (e - 1)) {
+    if (i == (e - 1) && CurBB->getTerminator() != GuardInst) {
       // Corner case: make sure we're only looking at guards strictly dominating
       // GuardInst when visiting GuardInst->getParent().
       auto NewEnd = std::find(I, E, GuardInst);
@@ -314,9 +391,10 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
 
     for (auto *Candidate : make_range(I, E)) {
       auto Score =
-          computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
-      LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst)
-                        << " and " << *getGuardCondition(Candidate) << " is "
+          computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop,
+                               InvertCondition);
+      LLVM_DEBUG(dbgs() << "Score between " << *getCondition(GuardInst)
+                        << " and " << *getCondition(Candidate) << " is "
                         << scoreTypeToString(Score) << "\n");
       if (Score > BestScoreSoFar) {
         BestScoreSoFar = Score;
@@ -336,41 +414,19 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
   LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
                     << " with score " << scoreTypeToString(BestScoreSoFar)
                     << "\n");
-  widenGuard(BestSoFar, getGuardCondition(GuardInst));
-  setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext()));
-  EliminatedGuards.push_back(GuardInst);
+  widenGuard(BestSoFar, getCondition(GuardInst), InvertCondition);
+  auto NewGuardCondition = InvertCondition
+                               ? ConstantInt::getFalse(GuardInst->getContext())
+                               : ConstantInt::getTrue(GuardInst->getContext());
+  setCondition(GuardInst, NewGuardCondition);
+  EliminatedGuardsAndBranches.push_back(GuardInst);
   WidenedGuards.insert(BestSoFar);
   return true;
 }
 
-Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) {
-  IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
-  assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
-         "Bad guard intrinsic?");
-  return GI->getArgOperand(0);
-}
-
-void GuardWideningImpl::setGuardCondition(Instruction *GuardInst,
-                                          Value *NewCond) {
-  IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
-  assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
-         "Bad guard intrinsic?");
-  GI->setArgOperand(0, NewCond);
-}
-
-bool GuardWideningImpl::isGuard(const Instruction* I) {
-  using namespace llvm::PatternMatch;
-  return match(I, m_Intrinsic<Intrinsic::experimental_guard>());
-}
-
-void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) {
-  GuardInst->eraseFromParent();
-  ++GuardsEliminated;
-}
-
 GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
     Instruction *DominatedGuard, Loop *DominatedGuardLoop,
-    Instruction *DominatingGuard, Loop *DominatingGuardLoop) {
+    Instruction *DominatingGuard, Loop *DominatingGuardLoop, bool InvertCond) {
   bool HoistingOutOfLoop = false;
 
   if (DominatingGuardLoop != DominatedGuardLoop) {
@@ -383,7 +439,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
     HoistingOutOfLoop = true;
   }
 
-  if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard))
+  if (!isAvailableAt(getCondition(DominatedGuard), DominatingGuard))
     return WS_IllegalOrNegative;
 
   // If the guard was conditional executed, it may never be reached
@@ -394,8 +450,8 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
   // here.  TODO: evaluate cost model for spurious deopt
   // NOTE: As written, this also lets us hoist right over another guard which
   // is essentially just another spelling for control flow.
-  if (isWideningCondProfitable(getGuardCondition(DominatedGuard),
-                               getGuardCondition(DominatingGuard)))
+  if (isWideningCondProfitable(getCondition(DominatedGuard),
+                               getCondition(DominatingGuard), InvertCond))
     return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
 
   if (HoistingOutOfLoop)
@@ -416,8 +472,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
       return false;
     // TODO: diamond, triangle cases
     if (!PDT) return true;
-    return !PDT->dominates(DominatedGuard->getParent(),
-                           DominatingGuard->getParent());
+    return !PDT->dominates(DominatedBlock, DominatingBlock);
   };
 
   return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
@@ -459,7 +514,8 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
 }
 
 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
-                                        Instruction *InsertPt, Value *&Result) {
+                                        Instruction *InsertPt, Value *&Result,
+                                        bool InvertCondition) {
   using namespace llvm::PatternMatch;
 
   {
@@ -469,6 +525,8 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
     ICmpInst::Predicate Pred0, Pred1;
     if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&
         match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {
+      if (InvertCondition)
+        Pred1 = ICmpInst::getInversePredicate(Pred1);
 
       ConstantRange CR0 =
           ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());
@@ -502,7 +560,9 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
 
   {
     SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
-    if (parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
+    // TODO: Support InvertCondition case?
+    if (!InvertCondition &&
+        parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
         combineRangeChecks(Checks, CombinedChecks)) {
       if (InsertPt) {
         Result = nullptr;
@@ -526,7 +586,8 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
   if (InsertPt) {
     makeAvailableAt(Cond0, InsertPt);
     makeAvailableAt(Cond1, InsertPt);
-
+    if (InvertCondition)
+      Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
     Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
   }
 
@@ -636,9 +697,8 @@ bool GuardWideningImpl::combineRangeChecks(
     // CurrentChecks.size() will typically be 3 here, but so far there has been
     // no need to hard-code that fact.
 
-    llvm::sort(CurrentChecks.begin(), CurrentChecks.end(),
-               [&](const GuardWideningImpl::RangeCheck &LHS,
-                   const GuardWideningImpl::RangeCheck &RHS) {
+    llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS,
+                                  const GuardWideningImpl::RangeCheck &RHS) {
       return LHS.getOffsetValue().slt(RHS.getOffsetValue());
     });
 
@@ -728,7 +788,10 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
-  if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
+  BranchProbabilityInfo *BPI = nullptr;
+  if (WidenFrequentBranches)
+    BPI = AM.getCachedResult<BranchProbabilityAnalysis>(F);
+  if (!GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
                          [](BasicBlock*) { return true; } ).run())
     return PreservedAnalyses::all();
 
@@ -751,7 +814,10 @@ struct GuardWideningLegacyPass : public FunctionPass {
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-    return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
+    BranchProbabilityInfo *BPI = nullptr;
+    if (WidenFrequentBranches)
+      BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+    return GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
                          [](BasicBlock*) { return true; } ).run();
   }
 
@@ -760,6 +826,8 @@ struct GuardWideningLegacyPass : public FunctionPass {
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<PostDominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
+    if (WidenFrequentBranches)
+      AU.addRequired<BranchProbabilityInfoWrapperPass>();
   }
 };
 
@@ -785,11 +853,16 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
     auto BlockFilter = [&](BasicBlock *BB) {
       return BB == RootBB || L->contains(BB);
     };
-    return GuardWideningImpl(DT, PDT, LI,
+    BranchProbabilityInfo *BPI = nullptr;
+    if (WidenFrequentBranches)
+      BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+    return GuardWideningImpl(DT, PDT, LI, BPI,
                              DT.getNode(RootBB), BlockFilter).run();
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    if (WidenFrequentBranches)
+      AU.addRequired<BranchProbabilityInfoWrapperPass>();
     AU.setPreservesCFG();
     getLoopAnalysisUsage(AU);
     AU.addPreserved<PostDominatorTreeWrapperPass>();
@@ -805,6 +878,8 @@ INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+if (WidenFrequentBranches)
+  INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
                     false, false)
 
@@ -814,6 +889,8 @@ INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+if (WidenFrequentBranches)
+  INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
                     "Widen guards (within a single loop, as a loop pass)",
                     false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 8656e88b79cb..48d8e457ba7c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -134,26 +134,23 @@ class IndVarSimplify {
   const TargetTransformInfo *TTI;
 
   SmallVector<WeakTrackingVH, 16> DeadInsts;
-  bool Changed = false;
 
   bool isValidRewrite(Value *FromVal, Value *ToVal);
 
-  void handleFloatingPointIV(Loop *L, PHINode *PH);
-  void rewriteNonIntegerIVs(Loop *L);
+  bool handleFloatingPointIV(Loop *L, PHINode *PH);
+  bool rewriteNonIntegerIVs(Loop *L);
 
-  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
 
   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
-  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-  void rewriteFirstIterationLoopExitValues(Loop *L);
-
-  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
-                                   PHINode *IndVar, SCEVExpander &Rewriter);
+  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+  bool rewriteFirstIterationLoopExitValues(Loop *L);
+  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 
-  void sinkUnusedInvariants(Loop *L);
+  bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                                 PHINode *IndVar, SCEVExpander &Rewriter);
 
-  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
-                            Instruction *InsertPt, Type *Ty);
+  bool sinkUnusedInvariants(Loop *L);
 
 public:
   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
@@ -284,7 +281,7 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
 /// is converted into
 /// for(int i = 0; i < 10000; ++i)
 ///   bar((double)i);
-void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
+bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
   unsigned BackEdge     = IncomingEdge^1;
 
@@ -293,12 +290,12 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 
   int64_t InitValue;
   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
-    return;
+    return false;
 
   // Check IV increment. Reject this PN if increment operation is not
   // an add or increment value can not be represented by an integer.
   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
-  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
+  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
 
   // If this is not an add of the PHI with a constantfp, or if the constant fp
   // is not an integer, bail out.
@@ -306,15 +303,15 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
   int64_t IncValue;
   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
-    return;
+    return false;
 
   // Check Incr uses. One user is PN and the other user is an exit condition
   // used by the conditional terminator.
   Value::user_iterator IncrUse = Incr->user_begin();
   Instruction *U1 = cast<Instruction>(*IncrUse++);
-  if (IncrUse == Incr->user_end()) return;
+  if (IncrUse == Incr->user_end()) return false;
   Instruction *U2 = cast<Instruction>(*IncrUse++);
-  if (IncrUse != Incr->user_end()) return;
+  if (IncrUse != Incr->user_end()) return false;
 
   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
   // only used by a branch, we can't transform it.
@@ -323,7 +320,7 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
     Compare = dyn_cast<FCmpInst>(U2);
   if (!Compare || !Compare->hasOneUse() ||
       !isa<BranchInst>(Compare->user_back()))
-    return;
+    return false;
 
   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
 
@@ -335,7 +332,7 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
   if (!L->contains(TheBr->getParent()) ||
       (L->contains(TheBr->getSuccessor(0)) &&
        L->contains(TheBr->getSuccessor(1))))
-    return;
+    return false;
 
   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
   // transform it.
@@ -343,12 +340,12 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
   int64_t ExitValue;
   if (ExitValueVal == nullptr ||
       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
-    return;
+    return false;
 
   // Find new predicate for integer comparison.
   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
   switch (Compare->getPredicate()) {
-  default: return;  // Unknown comparison.
+  default: return false;  // Unknown comparison.
   case CmpInst::FCMP_OEQ:
   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
   case CmpInst::FCMP_ONE:
@@ -371,24 +368,24 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 
   // The start/stride/exit values must all fit in signed i32.
   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
-    return;
+    return false;
 
   // If not actually striding (add x, 0.0), avoid touching the code.
   if (IncValue == 0)
-    return;
+    return false;
 
   // Positive and negative strides have different safety conditions.
   if (IncValue > 0) {
     // If we have a positive stride, we require the init to be less than the
     // exit value.
     if (InitValue >= ExitValue)
-      return;
+      return false;
 
     uint32_t Range = uint32_t(ExitValue-InitValue);
     // Check for infinite loop, either:
     // while (i <= Exit) or until (i > Exit)
     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
-      if (++Range == 0) return;  // Range overflows.
+      if (++Range == 0) return false;  // Range overflows.
     }
 
     unsigned Leftover = Range % uint32_t(IncValue);
@@ -398,23 +395,23 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
-      return;
+      return false;
 
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
-      return;
+      return false;
   } else {
     // If we have a negative stride, we require the init to be greater than the
     // exit value.
     if (InitValue <= ExitValue)
-      return;
+      return false;
 
     uint32_t Range = uint32_t(InitValue-ExitValue);
     // Check for infinite loop, either:
     // while (i >= Exit) or until (i < Exit)
     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
-      if (++Range == 0) return;  // Range overflows.
+      if (++Range == 0) return false;  // Range overflows.
     }
 
     unsigned Leftover = Range % uint32_t(-IncValue);
@@ -424,12 +421,12 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
     // around and do things the fp IV wouldn't.
     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
         Leftover != 0)
-      return;
+      return false;
 
     // If the stride would wrap around the i32 before exiting, we can't
     // transform the IV.
     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
-      return;
+      return false;
   }
 
   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
@@ -475,10 +472,10 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
     PN->replaceAllUsesWith(Conv);
     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
   }
-  Changed = true;
+  return true;
 }
 
-void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
+bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
   // First step.  Check to see if there are any floating-point recurrences.
   // If there are, change them into integer recurrences, permitting analysis by
   // the SCEV routines.
@@ -488,15 +485,17 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
   for (PHINode &PN : Header->phis())
     PHIs.push_back(&PN);
 
+  bool Changed = false;
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
-      handleFloatingPointIV(L, PN);
+      Changed |= handleFloatingPointIV(L, PN);
 
   // If the loop previously had floating-point IV, ScalarEvolution
   // may not have been able to compute a trip count. Now that we've done some
   // re-writing, the trip count may be computable.
   if (Changed)
     SE->forgetLoop(L);
+  return Changed;
 }
 
 namespace {
@@ -521,24 +520,34 @@ struct RewritePhi {
 
 } // end anonymous namespace
 
-Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
-                                          Loop *L, Instruction *InsertPt,
-                                          Type *ResultTy) {
-  // Before expanding S into an expensive LLVM expression, see if we can use an
-  // already existing value as the expansion for S.
-  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
-    if (ExistingValue->getType() == ResultTy)
-      return ExistingValue;
-
-  // We didn't find anything, fall back to using SCEVExpander.
-  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
-}
-
 //===----------------------------------------------------------------------===//
 // rewriteLoopExitValues - Optimize IV users outside the loop.
 // As a side effect, reduces the amount of IV processing within the loop.
 //===----------------------------------------------------------------------===//
 
+bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
+  SmallPtrSet<const Instruction *, 8> Visited;
+  SmallVector<const Instruction *, 8> WorkList;
+  Visited.insert(I);
+  WorkList.push_back(I);
+  while (!WorkList.empty()) {
+    const Instruction *Curr = WorkList.pop_back_val();
+    // This use is outside the loop, nothing to do.
+    if (!L->contains(Curr))
+      continue;
+    // Do we assume it is a "hard" use which will not be eliminated easily?
+    if (Curr->mayHaveSideEffects())
+      return true;
+    // Otherwise, add all its users to worklist.
+    for (auto U : Curr->users()) {
+      auto *UI = cast<Instruction>(U);
+      if (Visited.insert(UI).second)
+        WorkList.push_back(UI);
+    }
+  }
+  return false;
+}
+
 /// Check to see if this loop has a computable loop-invariant execution count.
 /// If so, this means that we can compute the final value of any expressions
 /// that are recurrent in the loop, and substitute the exit values from the loop
@@ -549,7 +558,7 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
 /// happen later, except that it's more powerful in some cases, because it's
 /// able to brute-force evaluate arbitrary instructions as long as they have
 /// constant operands at the beginning of the loop.
-void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   // Check a pre-condition.
   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
          "Indvars did not preserve LCSSA!");
@@ -610,48 +619,14 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
             !isSafeToExpand(ExitValue, *SE))
           continue;
 
-        // Computing the value outside of the loop brings no benefit if :
-        //  - it is definitely used inside the loop in a way which can not be
-        //    optimized away.
-        //  - no use outside of the loop can take advantage of hoisting the
-        //    computation out of the loop
-        if (ExitValue->getSCEVType()>=scMulExpr) {
-          unsigned NumHardInternalUses = 0;
-          unsigned NumSoftExternalUses = 0;
-          unsigned NumUses = 0;
-          for (auto IB = Inst->user_begin(), IE = Inst->user_end();
-               IB != IE && NumUses <= 6; ++IB) {
-            Instruction *UseInstr = cast<Instruction>(*IB);
-            unsigned Opc = UseInstr->getOpcode();
-            NumUses++;
-            if (L->contains(UseInstr)) {
-              if (Opc == Instruction::Call || Opc == Instruction::Ret)
-                NumHardInternalUses++;
-            } else {
-              if (Opc == Instruction::PHI) {
-                // Do not count the Phi as a use. LCSSA may have inserted
-                // plenty of trivial ones.
-                NumUses--;
-                for (auto PB = UseInstr->user_begin(),
-                          PE = UseInstr->user_end();
-                     PB != PE && NumUses <= 6; ++PB, ++NumUses) {
-                  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
-                  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
-                    NumSoftExternalUses++;
-                }
-                continue;
-              }
-              if (Opc != Instruction::Call && Opc != Instruction::Ret)
-                NumSoftExternalUses++;
-            }
-          }
-          if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
-            continue;
-        }
+        // Computing the value outside of the loop brings no benefit if it is
+        // definitely used inside the loop in a way which can not be optimized
+        // away.
+        if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+          continue;
 
         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
-        Value *ExitVal =
-            expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
                           << '\n'
@@ -662,6 +637,16 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
           continue;
         }
 
+#ifndef NDEBUG
+        // If we reuse an instruction from a loop which is neither L nor one of
+        // its containing loops, we end up breaking LCSSA form for this loop by
+        // creating a new use of its instruction.
+        if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
+          if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
+            if (EVL != L)
+              assert(EVL->contains(L) && "LCSSA breach detected!");
+#endif
+
         // Collect all the candidate PHINodes to be rewritten.
         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
       }
@@ -670,6 +655,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
 
   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
 
+  bool Changed = false;
   // Transformation.
   for (const RewritePhi &Phi : RewritePhiSet) {
     PHINode *PN = Phi.PN;
@@ -703,6 +689,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   // The insertion point instruction may have been deleted; clear it out
   // so that the rewriter doesn't trip over it later.
   Rewriter.clearInsertPoint();
+  return Changed;
 }
 
 //===---------------------------------------------------------------------===//
@@ -714,7 +701,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
 /// exits. If so, we know that if the exit path is taken, it is at the first
 /// loop iteration. This lets us predict exit values of PHI nodes that live in
 /// loop header.
-void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
+bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
   // Verify the input to the pass is already in LCSSA form.
   assert(L->isLCSSAForm(*DT));
 
@@ -723,6 +710,7 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
   auto *LoopHeader = L->getHeader();
   assert(LoopHeader && "Invalid loop");
 
+  bool MadeAnyChanges = false;
   for (auto *ExitBB : ExitBlocks) {
     // If there are no more PHI nodes in this exit block, then no more
     // values defined inside the loop are used on this path.
@@ -769,12 +757,14 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
         if (PreheaderIdx != -1) {
           assert(ExitVal->getParent() == LoopHeader &&
                  "ExitVal must be in loop header");
+          MadeAnyChanges = true;
           PN.setIncomingValue(IncomingValIdx,
                               ExitVal->getIncomingValue(PreheaderIdx));
         }
       }
     }
   }
+  return MadeAnyChanges;
 }
 
 /// Check whether it is possible to delete the loop after rewriting exit
@@ -1024,6 +1014,8 @@ protected:
   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
 
   bool widenLoopCompare(NarrowIVDefUse DU);
+  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
+  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
 
   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
 };
@@ -1368,6 +1360,146 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   return true;
 }
 
+/// If the narrow use is an instruction whose two operands are the defining
+/// instruction of DU and a load instruction, then we have the following:
+/// if the load is hoisted outside the loop, then we do not reach this function
+/// as scalar evolution analysis works fine in widenIVUse with variables
+/// hoisted outside the loop and efficient code is subsequently generated by
+/// not emitting truncate instructions. But when the load is not hoisted
+/// (whether due to limitation in alias analysis or due to a true legality),
+/// then scalar evolution can not proceed with loop variant values and
+/// inefficient code is generated. This function handles the non-hoisted load
+/// special case by making the optimization generate the same type of code for
+/// hoisted and non-hoisted load (widen use and eliminate sign extend
+/// instruction). This special case is important especially when the induction
+/// variables are affecting addressing mode in code generation.
+bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
+  Instruction *NarrowUse = DU.NarrowUse;
+  Instruction *NarrowDef = DU.NarrowDef;
+  Instruction *WideDef = DU.WideDef;
+
+  // Handle the common case of add<nsw/nuw>
+  const unsigned OpCode = NarrowUse->getOpcode();
+  // Only Add/Sub/Mul instructions are supported.
+  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+      OpCode != Instruction::Mul)
+    return false;
+
+  // The operand that is not defined by NarrowDef of DU. Let's call it the
+  // other operand.
+  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
+  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
+         "bad DU");
+
+  const SCEV *ExtendOperExpr = nullptr;
+  const OverflowingBinaryOperator *OBO =
+    cast<OverflowingBinaryOperator>(NarrowUse);
+  ExtendKind ExtKind = getExtendKind(NarrowDef);
+  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
+    ExtendOperExpr = SE->getSignExtendExpr(
+      SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
+  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
+    ExtendOperExpr = SE->getZeroExtendExpr(
+      SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
+  else
+    return false;
+
+  // We are interested in the other operand being a load instruction.
+  // But, we should look into relaxing this restriction later on.
+  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
+  if (I && I->getOpcode() != Instruction::Load)
+    return false;
+
+  // Verifying that Defining operand is an AddRec
+  const SCEV *Op1 = SE->getSCEV(WideDef);
+  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
+  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
+    return false;
+  // Verifying that other operand is an Extend.
+  if (ExtKind == SignExtended) {
+    if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
+      return false;
+  } else {
+    if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
+      return false;
+  }
+
+  if (ExtKind == SignExtended) {
+    for (Use &U : NarrowUse->uses()) {
+      SExtInst *User = dyn_cast<SExtInst>(U.getUser());
+      if (!User || User->getType() != WideType)
+        return false;
+    }
+  } else { // ExtKind == ZeroExtended
+    for (Use &U : NarrowUse->uses()) {
+      ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
+      if (!User || User->getType() != WideType)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+/// Special Case for widening with variant Loads (see
+/// WidenIV::widenWithVariantLoadUse). This is the code generation part.
+void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
+  Instruction *NarrowUse = DU.NarrowUse;
+  Instruction *NarrowDef = DU.NarrowDef;
+  Instruction *WideDef = DU.WideDef;
+
+  ExtendKind ExtKind = getExtendKind(NarrowDef);
+
+  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+  // Generating a widening use instruction.
+  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+                   ? WideDef
+                   : createExtendInst(NarrowUse->getOperand(0), WideType,
+                                      ExtKind, NarrowUse);
+  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+                   ? WideDef
+                   : createExtendInst(NarrowUse->getOperand(1), WideType,
+                                      ExtKind, NarrowUse);
+
+  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+                                        NarrowBO->getName());
+  IRBuilder<> Builder(NarrowUse);
+  Builder.Insert(WideBO);
+  WideBO->copyIRFlags(NarrowBO);
+
+  if (ExtKind == SignExtended)
+    ExtendKindMap[NarrowUse] = SignExtended;
+  else
+    ExtendKindMap[NarrowUse] = ZeroExtended;
+
+  // Update the Use.
+  if (ExtKind == SignExtended) {
+    for (Use &U : NarrowUse->uses()) {
+      SExtInst *User = dyn_cast<SExtInst>(U.getUser());
+      if (User && User->getType() == WideType) {
+        LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+                          << *WideBO << "\n");
+        ++NumElimExt;
+        User->replaceAllUsesWith(WideBO);
+        DeadInsts.emplace_back(User);
+      }
+    }
+  } else { // ExtKind == ZeroExtended
+    for (Use &U : NarrowUse->uses()) {
+      ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
+      if (User && User->getType() == WideType) {
+        LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+                          << *WideBO << "\n");
+        ++NumElimExt;
+        User->replaceAllUsesWith(WideBO);
+        DeadInsts.emplace_back(User);
+      }
+    }
+  }
+}
+
 /// Determine whether an individual user of the narrow IV can be widened. If so,
 /// return the wide clone of the user.
 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
@@ -1465,6 +1597,16 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
     if (widenLoopCompare(DU))
       return nullptr;
 
+    // We are here about to generate a truncate instruction that may hurt
+    // performance because the scalar evolution expression computed earlier
+    // in WideAddRec.first does not indicate a polynomial induction expression.
+    // In that case, look at the operands of the use instruction to determine
+    // if we can still widen the use instead of truncating its operand.
+    if (widenWithVariantLoadUse(DU)) {
+      widenWithVariantLoadUseCodegen(DU);
+      return nullptr;
+    }
+
     // This user does not evaluate to a recurrence after widening, so don't
     // follow it. Instead insert a Trunc to kill off the original use,
     // eventually isolating the original narrow IV so it can be removed.
@@ -1781,7 +1923,7 @@ public:
 /// candidates for simplification.
 ///
 /// Sign/Zero extend elimination is interleaved with IV simplification.
-void IndVarSimplify::simplifyAndExtend(Loop *L,
+bool IndVarSimplify::simplifyAndExtend(Loop *L,
                                        SCEVExpander &Rewriter,
                                        LoopInfo *LI) {
   SmallVector<WideIVInfo, 8> WideIVs;
@@ -1798,6 +1940,7 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
   // for all current phis, then determines whether any IVs can be
   // widened. Widening adds new phis to LoopPhis, inducing another round of
   // simplification on the wide IVs.
+  bool Changed = false;
   while (!LoopPhis.empty()) {
     // Evaluate as many IV expressions as possible before widening any IVs. This
     // forces SCEV to set no-wrap flags before evaluating sign/zero
@@ -1827,6 +1970,7 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
       }
     }
   }
+  return Changed;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2193,11 +2337,9 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
 /// able to rewrite the exit tests of any loop where the SCEV analysis can
 /// determine a loop-invariant trip count of the loop, which is actually a much
 /// broader range than just linear tests.
-Value *IndVarSimplify::
-linearFunctionTestReplace(Loop *L,
-                          const SCEV *BackedgeTakenCount,
-                          PHINode *IndVar,
-                          SCEVExpander &Rewriter) {
+bool IndVarSimplify::
+linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                          PHINode *IndVar, SCEVExpander &Rewriter) {
   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
 
   // Initialize CmpIndVar and IVCount to their preincremented values.
@@ -2320,8 +2462,7 @@ linearFunctionTestReplace(Loop *L,
   DeadInsts.push_back(OrigCond);
 
   ++NumLFTR;
-  Changed = true;
-  return Cond;
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2331,13 +2472,14 @@ linearFunctionTestReplace(Loop *L,
 /// If there's a single exit block, sink any loop-invariant values that
 /// were defined in the preheader but not used inside the loop into the
 /// exit block to reduce register pressure in the loop.
-void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
+bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock) return;
+  if (!ExitBlock) return false;
 
   BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader) return;
+  if (!Preheader) return false;
 
+  bool MadeAnyChanges = false;
   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
   BasicBlock::iterator I(Preheader->getTerminator());
   while (I != Preheader->begin()) {
@@ -2407,10 +2549,13 @@ void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
       Done = true;
     }
 
+    MadeAnyChanges = true;
     ToMove->moveBefore(*ExitBlock, InsertPt);
     if (Done) break;
     InsertPt = ToMove->getIterator();
   }
+
+  return MadeAnyChanges;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2421,6 +2566,7 @@ bool IndVarSimplify::run(Loop *L) {
   // We need (and expect!) the incoming loop to be in LCSSA.
   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
          "LCSSA required to run indvars!");
+  bool Changed = false;
 
   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -2436,7 +2582,7 @@ bool IndVarSimplify::run(Loop *L) {
 
   // If there are any floating-point recurrences, attempt to
   // transform them to use integer recurrences.
-  rewriteNonIntegerIVs(L);
+  Changed |= rewriteNonIntegerIVs(L);
 
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
@@ -2453,7 +2599,7 @@ bool IndVarSimplify::run(Loop *L) {
   // other expressions involving loop IVs have been evaluated. This helps SCEV
   // set no-wrap flags before normalizing sign/zero extension.
   Rewriter.disableCanonicalMode();
-  simplifyAndExtend(L, Rewriter, LI);
+  Changed |= simplifyAndExtend(L, Rewriter, LI);
 
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
@@ -2463,7 +2609,7 @@ bool IndVarSimplify::run(Loop *L) {
   //
   if (ReplaceExitValue != NeverRepl &&
       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    rewriteLoopExitValues(L, Rewriter);
+    Changed |= rewriteLoopExitValues(L, Rewriter);
 
   // Eliminate redundant IV cycles.
   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -2484,8 +2630,8 @@ bool IndVarSimplify::run(Loop *L) {
       // explicitly check any assumptions made by SCEV. Brittle.
       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
       if (!AR || AR->getLoop()->getLoopPreheader())
-        (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
-                                        Rewriter);
+        Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+                                             Rewriter);
     }
   }
   // Clear the rewriter cache, because values that are in the rewriter's cache
@@ -2498,18 +2644,18 @@ bool IndVarSimplify::run(Loop *L) {
   while (!DeadInsts.empty())
     if (Instruction *Inst =
             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+      Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
 
   // The Rewriter may not be used from this point on.
 
   // Loop-invariant instructions in the preheader that aren't used in the
   // loop may be sunk below the loop to reduce register pressure.
-  sinkUnusedInvariants(L);
+  Changed |= sinkUnusedInvariants(L);
 
   // rewriteFirstIterationLoopExitValues does not rely on the computation of
   // trip count and therefore can further simplify exit values in addition to
   // rewriteLoopExitValues.
-  rewriteFirstIterationLoopExitValues(L);
+  Changed |= rewriteFirstIterationLoopExitValues(L);
 
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index c5ed6d5c1b87..1c701bbee185 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -133,34 +133,16 @@ namespace {
 ///     taken by the containing loop's induction variable.
 ///
 class InductiveRangeCheck {
-  // Classifies a range check
-  enum RangeCheckKind : unsigned {
-    // Range check of the form "0 <= I".
-    RANGE_CHECK_LOWER = 1,
-
-    // Range check of the form "I < L" where L is known positive.
-    RANGE_CHECK_UPPER = 2,
-
-    // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER
-    // conditions.
-    RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER,
-
-    // Unrecognized range check condition.
-    RANGE_CHECK_UNKNOWN = (unsigned)-1
-  };
-
-  static StringRef rangeCheckKindToStr(RangeCheckKind);
 
   const SCEV *Begin = nullptr;
   const SCEV *Step = nullptr;
   const SCEV *End = nullptr;
   Use *CheckUse = nullptr;
-  RangeCheckKind Kind = RANGE_CHECK_UNKNOWN;
   bool IsSigned = true;
 
-  static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
-                                            ScalarEvolution &SE, Value *&Index,
-                                            Value *&Length, bool &IsSigned);
+  static bool parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE,
+                                  Value *&Index, Value *&Length,
+                                  bool &IsSigned);
 
   static void
   extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse,
@@ -175,7 +157,6 @@ public:
 
   void print(raw_ostream &OS) const {
     OS << "InductiveRangeCheck:\n";
-    OS << "  Kind: " << rangeCheckKindToStr(Kind) << "\n";
     OS << "  Begin: ";
     Begin->print(OS);
     OS << "  Step: ";
@@ -283,32 +264,11 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_END(IRCELegacyPass, "irce", "Inductive range check elimination",
                     false, false)
 
-StringRef InductiveRangeCheck::rangeCheckKindToStr(
-    InductiveRangeCheck::RangeCheckKind RCK) {
-  switch (RCK) {
-  case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
-    return "RANGE_CHECK_UNKNOWN";
-
-  case InductiveRangeCheck::RANGE_CHECK_UPPER:
-    return "RANGE_CHECK_UPPER";
-
-  case InductiveRangeCheck::RANGE_CHECK_LOWER:
-    return "RANGE_CHECK_LOWER";
-
-  case InductiveRangeCheck::RANGE_CHECK_BOTH:
-    return "RANGE_CHECK_BOTH";
-  }
-
-  llvm_unreachable("unknown range check type!");
-}
-
 /// Parse a single ICmp instruction, `ICI`, into a range check.  If `ICI` cannot
-/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
-/// `Index` and `Length` to `nullptr`.  Otherwise set `Index` to the value being
-/// range checked, and set `Length` to the upper limit `Index` is being range
-/// checked with if (and only if) the range check type is stronger or equal to
-/// RANGE_CHECK_UPPER.
-InductiveRangeCheck::RangeCheckKind
+/// be interpreted as a range check, return false and set `Index` and `Length`
+/// to `nullptr`.  Otherwise set `Index` to the value being range checked, and
+/// set `Length` to the upper limit `Index` is being range checked.
+bool
 InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
                                          ScalarEvolution &SE, Value *&Index,
                                          Value *&Length, bool &IsSigned) {
@@ -322,7 +282,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
 
   switch (Pred) {
   default:
-    return RANGE_CHECK_UNKNOWN;
+    return false;
 
   case ICmpInst::ICMP_SLE:
     std::swap(LHS, RHS);
@@ -331,9 +291,9 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
     IsSigned = true;
     if (match(RHS, m_ConstantInt<0>())) {
       Index = LHS;
-      return RANGE_CHECK_LOWER;
+      return true; // Lower.
     }
-    return RANGE_CHECK_UNKNOWN;
+    return false;
 
   case ICmpInst::ICMP_SLT:
     std::swap(LHS, RHS);
@@ -342,15 +302,15 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
     IsSigned = true;
     if (match(RHS, m_ConstantInt<-1>())) {
       Index = LHS;
-      return RANGE_CHECK_LOWER;
+      return true; // Lower.
     }
 
     if (IsLoopInvariant(LHS)) {
       Index = RHS;
       Length = LHS;
-      return RANGE_CHECK_UPPER;
+      return true; // Upper.
     }
-    return RANGE_CHECK_UNKNOWN;
+    return false;
 
   case ICmpInst::ICMP_ULT:
     std::swap(LHS, RHS);
@@ -360,9 +320,9 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
     if (IsLoopInvariant(LHS)) {
       Index = RHS;
       Length = LHS;
-      return RANGE_CHECK_BOTH;
+      return true; // Both lower and upper.
     }
-    return RANGE_CHECK_UNKNOWN;
+    return false;
   }
 
   llvm_unreachable("default clause returns!");
@@ -391,8 +351,7 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
 
   Value *Length = nullptr, *Index;
   bool IsSigned;
-  auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned);
-  if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+  if (!parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned))
     return;
 
   const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Index));
@@ -408,7 +367,6 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
   if (Length)
     End = SE.getSCEV(Length);
   else {
-    assert(RCKind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
     // So far we can only reach this point for Signed range check. This may
     // change in future. In this case we will need to pick Unsigned max for the
     // unsigned range check.
@@ -422,7 +380,6 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
   IRC.Begin = IndexAddRec->getStart();
   IRC.Step = IndexAddRec->getStepRecurrence(SE);
   IRC.CheckUse = &ConditionUse;
-  IRC.Kind = RCKind;
   IRC.IsSigned = IsSigned;
   Checks.push_back(IRC);
 }
@@ -689,17 +646,6 @@ void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
       PN->setIncomingBlock(i, ReplaceBy);
 }
 
-static bool CannotBeMaxInLoop(const SCEV *BoundSCEV, Loop *L,
-                              ScalarEvolution &SE, bool Signed) {
-  unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
-  APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
-    APInt::getMaxValue(BitWidth);
-  auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-  return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
-         SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV,
-                                     SE.getConstant(Max));
-}
-
 /// Given a loop with an deccreasing induction variable, is it possible to
 /// safely calculate the bounds of a new loop using the given Predicate.
 static bool isSafeDecreasingBound(const SCEV *Start,
@@ -795,31 +741,6 @@ static bool isSafeIncreasingBound(const SCEV *Start,
           SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
 }
 
-static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,
-                              ScalarEvolution &SE, bool Signed) {
-  unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
-  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
-    APInt::getMinValue(BitWidth);
-  auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-  return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
-         SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV,
-                                     SE.getConstant(Min));
-}
-
-static bool isKnownNonNegativeInLoop(const SCEV *BoundSCEV, const Loop *L,
-                                     ScalarEvolution &SE) {
-  const SCEV *Zero = SE.getZero(BoundSCEV->getType());
-  return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
-         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, BoundSCEV, Zero);
-}
-
-static bool isKnownNegativeInLoop(const SCEV *BoundSCEV, const Loop *L,
-                                  ScalarEvolution &SE) {
-  const SCEV *Zero = SE.getZero(BoundSCEV->getType());
-  return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
-         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, BoundSCEV, Zero);
-}
-
 Optional<LoopStructure>
 LoopStructure::parseLoopStructure(ScalarEvolution &SE,
                                   BranchProbabilityInfo *BPI, Loop &L,
@@ -977,12 +898,12 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
         //   ...                          ...
         // }                            }
         if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
-            CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
+            cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) {
           Pred = ICmpInst::ICMP_UGT;
           RightSCEV = SE.getMinusSCEV(RightSCEV,
                                       SE.getOne(RightSCEV->getType()));
           DecreasedRightValueByOne = true;
-        } else if (CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
+        } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) {
           Pred = ICmpInst::ICMP_SGT;
           RightSCEV = SE.getMinusSCEV(RightSCEV,
                                       SE.getOne(RightSCEV->getType()));
@@ -1042,11 +963,11 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
         //   ...                          ...
         // }                            }
         if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) &&
-            CannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
+            cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) {
           Pred = ICmpInst::ICMP_ULT;
           RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
           IncreasedRightValueByOne = true;
-        } else if (CannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
+        } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) {
           Pred = ICmpInst::ICMP_SLT;
           RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
           IncreasedRightValueByOne = true;
@@ -1339,29 +1260,20 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
 
   // EnterLoopCond - is it okay to start executing this `LS'?
   Value *EnterLoopCond = nullptr;
-  if (Increasing)
-    EnterLoopCond = IsSignedPredicate
-                        ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
-                        : B.CreateICmpULT(LS.IndVarStart, ExitSubloopAt);
-  else
-    EnterLoopCond = IsSignedPredicate
-                        ? B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt)
-                        : B.CreateICmpUGT(LS.IndVarStart, ExitSubloopAt);
+  auto Pred =
+      Increasing
+          ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
+          : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+  EnterLoopCond = B.CreateICmp(Pred, LS.IndVarStart, ExitSubloopAt);
 
   B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
   PreheaderJump->eraseFromParent();
 
   LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
   B.SetInsertPoint(LS.LatchBr);
-  Value *TakeBackedgeLoopCond = nullptr;
-  if (Increasing)
-    TakeBackedgeLoopCond = IsSignedPredicate
-                        ? B.CreateICmpSLT(LS.IndVarBase, ExitSubloopAt)
-                        : B.CreateICmpULT(LS.IndVarBase, ExitSubloopAt);
-  else
-    TakeBackedgeLoopCond = IsSignedPredicate
-                        ? B.CreateICmpSGT(LS.IndVarBase, ExitSubloopAt)
-                        : B.CreateICmpUGT(LS.IndVarBase, ExitSubloopAt);
+  Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, LS.IndVarBase,
+                                             ExitSubloopAt);
+
   Value *CondForBranch = LS.LatchBrExitIdx == 1
                              ? TakeBackedgeLoopCond
                              : B.CreateNot(TakeBackedgeLoopCond);
@@ -1373,15 +1285,7 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
   // IterationsLeft - are there any more iterations left, given the original
   // upper bound on the induction variable?  If not, we branch to the "real"
   // exit.
-  Value *IterationsLeft = nullptr;
-  if (Increasing)
-    IterationsLeft = IsSignedPredicate
-                         ? B.CreateICmpSLT(LS.IndVarBase, LS.LoopExitAt)
-                         : B.CreateICmpULT(LS.IndVarBase, LS.LoopExitAt);
-  else
-    IterationsLeft = IsSignedPredicate
-                         ? B.CreateICmpSGT(LS.IndVarBase, LS.LoopExitAt)
-                         : B.CreateICmpUGT(LS.IndVarBase, LS.LoopExitAt);
+  Value *IterationsLeft = B.CreateICmp(Pred, LS.IndVarBase, LS.LoopExitAt);
   B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
 
   BranchInst *BranchToContinuation =
@@ -1513,16 +1417,14 @@ bool LoopConstrainer::run() {
 
     if (Increasing)
       ExitPreLoopAtSCEV = *SR.LowLimit;
+    else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
+                               IsSignedPredicate))
+      ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
     else {
-      if (CannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE,
-                            IsSignedPredicate))
-        ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
-      else {
-        LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
-                          << "preloop exit limit.  HighLimit = "
-                          << *(*SR.HighLimit) << "\n");
-        return false;
-      }
+      LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+                        << "preloop exit limit.  HighLimit = "
+                        << *(*SR.HighLimit) << "\n");
+      return false;
     }
 
     if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) {
@@ -1542,16 +1444,14 @@ bool LoopConstrainer::run() {
 
     if (Increasing)
       ExitMainLoopAtSCEV = *SR.HighLimit;
+    else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
+                               IsSignedPredicate))
+      ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
     else {
-      if (CannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE,
-                            IsSignedPredicate))
-        ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
-      else {
-        LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
-                          << "mainloop exit limit.  LowLimit = "
-                          << *(*SR.LowLimit) << "\n");
-        return false;
-      }
+      LLVM_DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+                        << "mainloop exit limit.  LowLimit = "
+                        << *(*SR.LowLimit) << "\n");
+      return false;
     }
 
     if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1d66472f93c8..48de56a02834 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -25,12 +25,12 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -38,6 +38,7 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -65,6 +66,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
@@ -285,7 +287,7 @@ bool JumpThreading::runOnFunction(Function &F) {
   auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
   auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-  DeferredDominance DDT(*DT);
+  DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
   std::unique_ptr<BlockFrequencyInfo> BFI;
   std::unique_ptr<BranchProbabilityInfo> BPI;
   bool HasProfileData = F.hasProfileData();
@@ -295,7 +297,7 @@ bool JumpThreading::runOnFunction(Function &F) {
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
-  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData,
+  bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData,
                               std::move(BFI), std::move(BPI));
   if (PrintLVIAfterJumpThreading) {
     dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -312,7 +314,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &LVI = AM.getResult<LazyValueAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
-  DeferredDominance DDT(DT);
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
 
   std::unique_ptr<BlockFrequencyInfo> BFI;
   std::unique_ptr<BranchProbabilityInfo> BPI;
@@ -322,7 +324,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
-  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData,
+  bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData,
                          std::move(BFI), std::move(BPI));
 
   if (!Changed)
@@ -336,14 +338,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
 
 bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
                                 LazyValueInfo *LVI_, AliasAnalysis *AA_,
-                                DeferredDominance *DDT_, bool HasProfileData_,
+                                DomTreeUpdater *DTU_, bool HasProfileData_,
                                 std::unique_ptr<BlockFrequencyInfo> BFI_,
                                 std::unique_ptr<BranchProbabilityInfo> BPI_) {
   LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TLI = TLI_;
   LVI = LVI_;
   AA = AA_;
-  DDT = DDT_;
+  DTU = DTU_;
   BFI.reset();
   BPI.reset();
   // When profile data is available, we need to update edge weights after
@@ -360,7 +362,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
   // JumpThreading must not processes blocks unreachable from entry. It's a
   // waste of compute time and can potentially lead to hangs.
   SmallPtrSet<BasicBlock *, 16> Unreachable;
-  DominatorTree &DT = DDT->flush();
+  assert(DTU && "DTU isn't passed into JumpThreading before using it.");
+  assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
+  DominatorTree &DT = DTU->getDomTree();
   for (auto &BB : F)
     if (!DT.isReachableFromEntry(&BB))
       Unreachable.insert(&BB);
@@ -379,7 +383,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
       // Stop processing BB if it's the entry or is now deleted. The following
       // routines attempt to eliminate BB and locating a suitable replacement
       // for the entry is non-trivial.
-      if (&BB == &F.getEntryBlock() || DDT->pendingDeletedBB(&BB))
+      if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
         continue;
 
       if (pred_empty(&BB)) {
@@ -390,7 +394,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
                           << '\n');
         LoopHeaders.erase(&BB);
         LVI->eraseBlock(&BB);
-        DeleteDeadBlock(&BB, DDT);
+        DeleteDeadBlock(&BB, DTU);
         Changed = true;
         continue;
       }
@@ -404,9 +408,9 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
           // Don't alter Loop headers and latches to ensure another pass can
           // detect and transform nested loops later.
           !LoopHeaders.count(&BB) && !LoopHeaders.count(BI->getSuccessor(0)) &&
-          TryToSimplifyUncondBranchFromEmptyBlock(&BB, DDT)) {
-        // BB is valid for cleanup here because we passed in DDT. F remains
-        // BB's parent until a DDT->flush() event.
+          TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
+        // BB is valid for cleanup here because we passed in DTU. F remains
+        // BB's parent until a DTU->getDomTree() event.
         LVI->eraseBlock(&BB);
         Changed = true;
       }
@@ -415,7 +419,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
   } while (Changed);
 
   LoopHeaders.clear();
-  DDT->flush();
+  // Flush only the Dominator Tree.
+  DTU->getDomTree();
   LVI->enableDT();
   return EverChanged;
 }
@@ -569,9 +574,11 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
 /// BB in the result vector.
 ///
 /// This returns true if there were any known values.
-bool JumpThreadingPass::ComputeValueKnownInPredecessors(
+bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl(
     Value *V, BasicBlock *BB, PredValueInfo &Result,
-    ConstantPreference Preference, Instruction *CxtI) {
+    ConstantPreference Preference,
+    DenseSet<std::pair<Value *, BasicBlock *>> &RecursionSet,
+    Instruction *CxtI) {
   // This method walks up use-def chains recursively.  Because of this, we could
   // get into an infinite loop going around loops in the use-def chain.  To
   // prevent this, keep track of what (value, block) pairs we've already visited
@@ -579,10 +586,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
   if (!RecursionSet.insert(std::make_pair(V, BB)).second)
     return false;
 
-  // An RAII help to remove this pair from the recursion set once the recursion
-  // stack pops back out again.
-  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-
   // If V is a constant, then it is known in all predecessors.
   if (Constant *KC = getKnownConstant(V, Preference)) {
     for (BasicBlock *Pred : predecessors(BB))
@@ -609,7 +612,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
     // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
     // Perhaps getConstantOnEdge should be smart enough to do this?
 
-    if (DDT->pending())
+    if (DTU->hasPendingDomTreeUpdates())
       LVI->disableDT();
     else
       LVI->enableDT();
@@ -626,7 +629,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
 
   /// If I is a PHI node, then we know the incoming values for any constants.
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
-    if (DDT->pending())
+    if (DTU->hasPendingDomTreeUpdates())
       LVI->disableDT();
     else
       LVI->enableDT();
@@ -652,7 +655,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
     Value *Source = CI->getOperand(0);
     if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
       return false;
-    ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
+    ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,
+                                        RecursionSet, CxtI);
     if (Result.empty())
       return false;
 
@@ -672,10 +676,10 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
         I->getOpcode() == Instruction::And) {
       PredValueInfoTy LHSVals, RHSVals;
 
-      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
-                                      WantInteger, CxtI);
-      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
-                                      WantInteger, CxtI);
+      ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+                                      WantInteger, RecursionSet, CxtI);
+      ComputeValueKnownInPredecessorsImpl(I->getOperand(1), BB, RHSVals,
+                                          WantInteger, RecursionSet, CxtI);
 
       if (LHSVals.empty() && RHSVals.empty())
         return false;
@@ -710,8 +714,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
     if (I->getOpcode() == Instruction::Xor &&
         isa<ConstantInt>(I->getOperand(1)) &&
         cast<ConstantInt>(I->getOperand(1))->isOne()) {
-      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
-                                      WantInteger, CxtI);
+      ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,
+                                          WantInteger, RecursionSet, CxtI);
       if (Result.empty())
         return false;
 
@@ -728,8 +732,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
             && "A binary operator creating a block address?");
     if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
       PredValueInfoTy LHSVals;
-      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
-                                      WantInteger, CxtI);
+      ComputeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
+                                          WantInteger, RecursionSet, CxtI);
 
       // Try to use constant folding to simplify the binary operator.
       for (const auto &LHSVal : LHSVals) {
@@ -759,7 +763,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
       const DataLayout &DL = PN->getModule()->getDataLayout();
       // We can do this simplification if any comparisons fold to true or false.
       // See if any do.
-      if (DDT->pending())
+      if (DTU->hasPendingDomTreeUpdates())
         LVI->disableDT();
       else
         LVI->enableDT();
@@ -806,7 +810,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
 
       if (!isa<Instruction>(CmpLHS) ||
           cast<Instruction>(CmpLHS)->getParent() != BB) {
-        if (DDT->pending())
+        if (DTU->hasPendingDomTreeUpdates())
           LVI->disableDT();
         else
           LVI->enableDT();
@@ -838,7 +842,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
             match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
           if (!isa<Instruction>(AddLHS) ||
               cast<Instruction>(AddLHS)->getParent() != BB) {
-            if (DDT->pending())
+            if (DTU->hasPendingDomTreeUpdates())
               LVI->disableDT();
             else
               LVI->enableDT();
@@ -874,8 +878,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
       // Try to find a constant value for the LHS of a comparison,
       // and evaluate it statically if we can.
       PredValueInfoTy LHSVals;
-      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
-                                      WantInteger, CxtI);
+      ComputeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,
+                                          WantInteger, RecursionSet, CxtI);
 
       for (const auto &LHSVal : LHSVals) {
         Constant *V = LHSVal.first;
@@ -895,8 +899,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
     Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
     PredValueInfoTy Conds;
     if ((TrueVal || FalseVal) &&
-        ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
-                                        WantInteger, CxtI)) {
+        ComputeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,
+                                            WantInteger, RecursionSet, CxtI)) {
       for (auto &C : Conds) {
         Constant *Cond = C.first;
 
@@ -923,7 +927,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
   }
 
   // If all else fails, see if LVI can figure out a constant value for us.
-  if (DDT->pending())
+  if (DTU->hasPendingDomTreeUpdates())
     LVI->disableDT();
   else
     LVI->enableDT();
@@ -942,7 +946,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
 /// Since we can pick an arbitrary destination, we pick the successor with the
 /// fewest predecessors.  This should reduce the in-degree of the others.
 static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
-  TerminatorInst *BBTerm = BB->getTerminator();
+  Instruction *BBTerm = BB->getTerminator();
   unsigned MinSucc = 0;
   BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
   // Compute the successor with the minimum number of predecessors.
@@ -974,7 +978,7 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) {
 bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
   // If the block is trivially dead, just return and let the caller nuke it.
   // This simplifies other transformations.
-  if (DDT->pendingDeletedBB(BB) ||
+  if (DTU->isBBPendingDeletion(BB) ||
       (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))
     return false;
 
@@ -983,15 +987,15 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
   // because now the condition in this block can be threaded through
   // predecessors of our predecessor block.
   if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
-    const TerminatorInst *TI = SinglePred->getTerminator();
-    if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
+    const Instruction *TI = SinglePred->getTerminator();
+    if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 &&
         SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
       // If SinglePred was a loop header, BB becomes one.
       if (LoopHeaders.erase(SinglePred))
         LoopHeaders.insert(BB);
 
       LVI->eraseBlock(SinglePred);
-      MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT);
+      MergeBasicBlockIntoOnlyPred(BB, DTU);
 
       // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
       // BB code within one basic block `BB`), we need to invalidate the LVI
@@ -1075,7 +1079,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
     std::vector<DominatorTree::UpdateType> Updates;
 
     // Fold the branch/switch.
-    TerminatorInst *BBTerm = BB->getTerminator();
+    Instruction *BBTerm = BB->getTerminator();
     Updates.reserve(BBTerm->getNumSuccessors());
     for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
       if (i == BestSucc) continue;
@@ -1088,7 +1092,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
                       << "' folding undef terminator: " << *BBTerm << '\n');
     BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
     BBTerm->eraseFromParent();
-    DDT->applyUpdates(Updates);
+    DTU->applyUpdates(Updates);
     return true;
   }
 
@@ -1100,7 +1104,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
                       << "' folding terminator: " << *BB->getTerminator()
                       << '\n');
     ++NumFolds;
-    ConstantFoldTerminator(BB, true, nullptr, DDT);
+    ConstantFoldTerminator(BB, true, nullptr, DTU);
     return true;
   }
 
@@ -1127,7 +1131,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
       // threading is concerned.
       assert(CondBr->isConditional() && "Threading on unconditional terminator");
 
-      if (DDT->pending())
+      if (DTU->hasPendingDomTreeUpdates())
         LVI->disableDT();
       else
         LVI->enableDT();
@@ -1156,7 +1160,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
             ConstantInt::getFalse(CondCmp->getType());
           ReplaceFoldableUses(CondCmp, CI);
         }
-        DDT->deleteEdge(BB, ToRemoveSucc);
+        DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);
         return true;
       }
 
@@ -1167,6 +1171,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
     }
   }
 
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+    TryToUnfoldSelect(SI, BB);
+
   // Check for some cases that are worth simplifying.  Right now we want to look
   // for loads that are used by a switch or by the condition for the branch.  If
   // we see one, check to see if it's partially redundant.  If so, insert a PHI
@@ -1240,7 +1247,7 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
       RemoveSucc->removePredecessor(BB);
       BranchInst::Create(KeepSucc, BI);
       BI->eraseFromParent();
-      DDT->deleteEdge(BB, RemoveSucc);
+      DTU->deleteEdgeRelaxed(BB, RemoveSucc);
       return true;
     }
     CurrentBB = CurrentPred;
@@ -1296,7 +1303,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
 
     if (IsLoadCSE) {
       LoadInst *NLoadI = cast<LoadInst>(AvailableVal);
-      combineMetadataForCSE(NLoadI, LoadI);
+      combineMetadataForCSE(NLoadI, LoadI, false);
     };
 
     // If the returned value is the load itself, replace with an undef. This can
@@ -1486,7 +1493,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
   }
 
   for (LoadInst *PredLoadI : CSELoads) {
-    combineMetadataForCSE(PredLoadI, LoadI);
+    combineMetadataForCSE(PredLoadI, LoadI, true);
   }
 
   LoadI->replaceAllUsesWith(PN);
@@ -1544,7 +1551,7 @@ FindMostPopularDest(BasicBlock *BB,
   // successor list.
   if (!SamePopularity.empty()) {
     SamePopularity.push_back(MostPopularDest);
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     for (unsigned i = 0; ; ++i) {
       assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
 
@@ -1664,10 +1671,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
       }
 
       // Finally update the terminator.
-      TerminatorInst *Term = BB->getTerminator();
+      Instruction *Term = BB->getTerminator();
       BranchInst::Create(OnlyDest, Term);
       Term->eraseFromParent();
-      DDT->applyUpdates(Updates);
+      DTU->applyUpdates(Updates);
 
       // If the condition is now dead due to the removal of the old terminator,
       // erase it.
@@ -1945,7 +1952,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
                     << "' with cost: " << JumpThreadCost
                     << ", across block:\n    " << *BB << "\n");
 
-  if (DDT->pending())
+  if (DTU->hasPendingDomTreeUpdates())
     LVI->disableDT();
   else
     LVI->enableDT();
@@ -1974,7 +1981,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
 
   // Clone the non-phi instructions of BB into NewBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
-  for (; !isa<TerminatorInst>(BI); ++BI) {
+  for (; !BI->isTerminator(); ++BI) {
     Instruction *New = BI->clone();
     New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
@@ -2001,7 +2008,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
   // Update the terminator of PredBB to jump to NewBB instead of BB.  This
   // eliminates predecessors from BB, which requires us to simplify any PHI
   // nodes in BB.
-  TerminatorInst *PredTerm = PredBB->getTerminator();
+  Instruction *PredTerm = PredBB->getTerminator();
   for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
     if (PredTerm->getSuccessor(i) == BB) {
       BB->removePredecessor(PredBB, true);
@@ -2009,7 +2016,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
     }
 
   // Enqueue required DT updates.
-  DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
+  DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
                      {DominatorTree::Insert, PredBB, NewBB},
                      {DominatorTree::Delete, PredBB, BB}});
 
@@ -2105,12 +2112,12 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
       BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
   }
 
-  DDT->applyUpdates(Updates);
+  DTU->applyUpdates(Updates);
   return NewBBs[0];
 }
 
 bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
-  const TerminatorInst *TI = BB->getTerminator();
+  const Instruction *TI = BB->getTerminator();
   assert(TI->getNumSuccessors() > 1 && "not a split");
 
   MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
@@ -2378,12 +2385,78 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
 
   // Remove the unconditional branch at the end of the PredBB block.
   OldPredBranch->eraseFromParent();
-  DDT->applyUpdates(Updates);
+  DTU->applyUpdates(Updates);
 
   ++NumDupes;
   return true;
 }
 
+// Pred is a predecessor of BB with an unconditional branch to BB. SI is
+// a Select instruction in Pred. BB has other predecessors and SI is used in
+// a PHI node in BB. SI has no other use.
+// A new basic block, NewBB, is created and SI is converted to compare and 
+// conditional branch. SI is erased from parent.
+void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
+                                          SelectInst *SI, PHINode *SIUse,
+                                          unsigned Idx) {
+  // Expand the select.
+  //
+  // Pred --
+  //  |    v
+  //  |  NewBB
+  //  |    |
+  //  |-----
+  //  v
+  // BB
+  BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
+                                         BB->getParent(), BB);
+  // Move the unconditional branch to NewBB.
+  PredTerm->removeFromParent();
+  NewBB->getInstList().insert(NewBB->end(), PredTerm);
+  // Create a conditional branch and update PHI nodes.
+  BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
+  SIUse->setIncomingValue(Idx, SI->getFalseValue());
+  SIUse->addIncoming(SI->getTrueValue(), NewBB);
+
+  // The select is now dead.
+  SI->eraseFromParent();
+  DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB},
+                    {DominatorTree::Insert, Pred, NewBB}});
+
+  // Update any other PHI nodes in BB.
+  for (BasicBlock::iterator BI = BB->begin();
+       PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
+    if (Phi != SIUse)
+      Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+}
+
+bool JumpThreadingPass::TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {
+  PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());
+
+  if (!CondPHI || CondPHI->getParent() != BB)
+    return false;
+
+  for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {
+    BasicBlock *Pred = CondPHI->getIncomingBlock(I);
+    SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));
+
+    // The second and third condition can be potentially relaxed. Currently
+    // the conditions help to simplify the code and allow us to reuse existing
+    // code, developed for TryToUnfoldSelect(CmpInst *, BasicBlock *)
+    if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())
+      continue;
+
+    BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (!PredTerm || !PredTerm->isUnconditional())
+      continue;
+
+    UnfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);
+    return true;
+  }
+  return false;
+}
+
 /// TryToUnfoldSelect - Look for blocks of the form
 /// bb1:
 ///   %a = select
@@ -2421,7 +2494,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
     // Now check if one of the select values would allow us to constant fold the
     // terminator in BB. We don't do the transform if both sides fold, those
     // cases will be threaded in any case.
-    if (DDT->pending())
+    if (DTU->hasPendingDomTreeUpdates())
       LVI->disableDT();
     else
       LVI->enableDT();
@@ -2434,34 +2507,7 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
     if ((LHSFolds != LazyValueInfo::Unknown ||
          RHSFolds != LazyValueInfo::Unknown) &&
         LHSFolds != RHSFolds) {
-      // Expand the select.
-      //
-      // Pred --
-      //  |    v
-      //  |  NewBB
-      //  |    |
-      //  |-----
-      //  v
-      // BB
-      BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",
-                                             BB->getParent(), BB);
-      // Move the unconditional branch to NewBB.
-      PredTerm->removeFromParent();
-      NewBB->getInstList().insert(NewBB->end(), PredTerm);
-      // Create a conditional branch and update PHI nodes.
-      BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);
-      CondLHS->setIncomingValue(I, SI->getFalseValue());
-      CondLHS->addIncoming(SI->getTrueValue(), NewBB);
-      // The select is now dead.
-      SI->eraseFromParent();
-
-      DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB},
-                         {DominatorTree::Insert, Pred, NewBB}});
-      // Update any other PHI nodes in BB.
-      for (BasicBlock::iterator BI = BB->begin();
-           PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)
-        if (Phi != CondLHS)
-          Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);
+      UnfoldSelectInstr(Pred, BB, SI, CondLHS, I);
       return true;
     }
   }
@@ -2533,7 +2579,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
     if (!SI)
       continue;
     // Expand the select.
-    TerminatorInst *Term =
+    Instruction *Term =
         SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
     BasicBlock *SplitBB = SI->getParent();
     BasicBlock *NewBB = Term->getParent();
@@ -2548,12 +2594,12 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
     Updates.push_back({DominatorTree::Insert, BB, SplitBB});
     Updates.push_back({DominatorTree::Insert, BB, NewBB});
     Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});
-    // BB's successors were moved to SplitBB, update DDT accordingly.
+    // BB's successors were moved to SplitBB, update DTU accordingly.
     for (auto *Succ : successors(SplitBB)) {
       Updates.push_back({DominatorTree::Delete, BB, Succ});
       Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
     }
-    DDT->applyUpdates(Updates);
+    DTU->applyUpdates(Updates);
     return true;
   }
   return false;
@@ -2603,9 +2649,8 @@ bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
 
   if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
     for (auto &I : *BB)
-      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
-        if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
-          return true;
+      if (isGuard(&I) && ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
+        return true;
 
   return false;
 }
@@ -2651,28 +2696,16 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
   // Duplicate all instructions before the guard and the guard itself to the
   // branch where implication is not proved.
   BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(
-      BB, PredGuardedBlock, AfterGuard, GuardedMapping);
+      BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);
   assert(GuardedBlock && "Could not create the guarded block?");
   // Duplicate all instructions before the guard in the unguarded branch.
   // Since we have successfully duplicated the guarded block and this block
   // has fewer instructions, we expect it to succeed.
   BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(
-      BB, PredUnguardedBlock, Guard, UnguardedMapping);
+      BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);
   assert(UnguardedBlock && "Could not create the unguarded block?");
   LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
                     << GuardedBlock->getName() << "\n");
-  // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between
-  // PredBB and BB. We need to perform two inserts and one delete for each of
-  // the above calls to update Dominators.
-  DDT->applyUpdates(
-      {// Guarded block split.
-       {DominatorTree::Delete, PredGuardedBlock, BB},
-       {DominatorTree::Insert, PredGuardedBlock, GuardedBlock},
-       {DominatorTree::Insert, GuardedBlock, BB},
-       // Unguarded block split.
-       {DominatorTree::Delete, PredUnguardedBlock, BB},
-       {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock},
-       {DominatorTree::Insert, UnguardedBlock, BB}});
   // Some instructions before the guard may still have uses. For them, we need
   // to create Phi nodes merging their copies in both guarded and unguarded
   // branches. Those instructions that have no uses can be just removed.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index c4ea43a43249..d204654c3915 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -31,6 +31,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
@@ -38,16 +39,18 @@
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -58,6 +61,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -65,6 +69,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
@@ -73,6 +78,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "licm"
 
+STATISTIC(NumCreatedBlocks, "Number of blocks created");
+STATISTIC(NumClonedBranches, "Number of branches cloned");
 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@@ -84,51 +91,81 @@ static cl::opt<bool>
     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
                      cl::desc("Disable memory promotion in LICM pass"));
 
+static cl::opt<bool> ControlFlowHoisting(
+    "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
+    cl::desc("Enable control flow (and PHI) hoisting in LICM"));
+
 static cl::opt<uint32_t> MaxNumUsesTraversed(
     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
     cl::desc("Max num uses visited for identifying load "
              "invariance in loop using invariant start (default = 8)"));
 
+// Default value of zero implies we use the regular alias set tracker mechanism
+// instead of the cross product using AA to identify aliasing of the memory
+// location we are interested in.
+static cl::opt<int>
+LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
+               cl::desc("How many instruction to cross product using AA"));
+
+// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
+// pathological cases, in exchange for faster compile. This is to be removed
+// if MemorySSA starts to address the same issue. This flag applies only when
+// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
+// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
+// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
+// getDefiningAccess, which may not be precise, since optimizeUses is capped.
+static cl::opt<bool> EnableLicmCap(
+    "enable-licm-cap", cl::init(false), cl::Hidden,
+    cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
+             "pathological cases, in exchange for faster compile"));
+
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
                                   TargetTransformInfo *TTI, bool &FreeInLoop);
-static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
-                  const LoopSafetyInfo *SafetyInfo,
-                  OptimizationRemarkEmitter *ORE);
+static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
+                  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
+                  MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
-                 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
-                 OptimizationRemarkEmitter *ORE, bool FreeInLoop);
+                 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
+                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
+                 bool FreeInLoop);
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop,
                                            const LoopSafetyInfo *SafetyInfo,
                                            OptimizationRemarkEmitter *ORE,
                                            const Instruction *CtxI = nullptr);
-static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
-                                     const AAMDNodes &AAInfo,
-                                     AliasSetTracker *CurAST);
-static Instruction *
-CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
-                            const LoopInfo *LI,
-                            const LoopSafetyInfo *SafetyInfo);
+static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
+                                     AliasSetTracker *CurAST, Loop *CurLoop,
+                                     AliasAnalysis *AA);
+static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+                                             Loop *CurLoop);
+static Instruction *CloneInstructionInExitBlock(
+    Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
+    const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
+
+static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
+                             AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
+
+static void moveInstructionBefore(Instruction &I, Instruction &Dest,
+                                  ICFLoopSafetyInfo &SafetyInfo);
 
 namespace {
 struct LoopInvariantCodeMotion {
+  using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
   bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
                  TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
                  ScalarEvolution *SE, MemorySSA *MSSA,
                  OptimizationRemarkEmitter *ORE, bool DeleteAST);
 
-  DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
-    return LoopToAliasSetMap;
-  }
+  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
 
 private:
-  DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
+  ASTrackerMapTy LoopToAliasSetMap;
 
-  AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
-                                           AliasAnalysis *AA);
+  std::unique_ptr<AliasSetTracker>
+  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
 };
 
 struct LegacyLICMPass : public LoopPass {
@@ -142,8 +179,6 @@ struct LegacyLICMPass : public LoopPass {
       // If we have run LICM on a previous loop but now we are skipping
       // (because we've hit the opt-bisect limit), we need to clear the
       // loop alias information.
-      for (auto &LTAS : LICM.getLoopToAliasSetMap())
-        delete LTAS.second;
       LICM.getLoopToAliasSetMap().clear();
       return false;
     }
@@ -173,8 +208,10 @@ struct LegacyLICMPass : public LoopPass {
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    if (EnableMSSALoopDependency)
+    if (EnableMSSALoopDependency) {
       AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     AU.addRequired<TargetTransformInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
@@ -254,14 +291,22 @@ bool LoopInvariantCodeMotion::runOnLoop(
 
   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
 
-  AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);
+  std::unique_ptr<AliasSetTracker> CurAST;
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (!MSSA) {
+    LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
+    CurAST = collectAliasInfoForLoop(L, LI, AA);
+  } else {
+    LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
+    MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+  }
 
   // Get the preheader block to move instructions into...
   BasicBlock *Preheader = L->getLoopPreheader();
 
   // Compute loop safety information.
-  LoopSafetyInfo SafetyInfo;
-  computeLoopSafetyInfo(&SafetyInfo, L);
+  ICFLoopSafetyInfo SafetyInfo(DT);
+  SafetyInfo.computeLoopSafetyInfo(L);
 
   // We want to visit all of the instructions in this loop... that are not parts
   // of our subloops (they have already had their invariants hoisted out of
@@ -275,10 +320,10 @@ bool LoopInvariantCodeMotion::runOnLoop(
   //
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST, &SafetyInfo, ORE);
+                          CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST, &SafetyInfo, ORE);
+                           CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -307,27 +352,30 @@ bool LoopInvariantCodeMotion::runOnLoop(
 
       bool Promoted = false;
 
-      // Loop over all of the alias sets in the tracker object.
-      for (AliasSet &AS : *CurAST) {
-        // We can promote this alias set if it has a store, if it is a "Must"
-        // alias set, if the pointer is loop invariant, and if we are not
-        // eliminating any volatile loads or stores.
-        if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
-            AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue()))
-          continue;
-
-        assert(
-            !AS.empty() &&
-            "Must alias set should have at least one pointer element in it!");
-
-        SmallSetVector<Value *, 8> PointerMustAliases;
-        for (const auto &ASI : AS)
-          PointerMustAliases.insert(ASI.getValue());
-
-        Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks,
-                                                 InsertPts, PIC, LI, DT, TLI, L,
-                                                 CurAST, &SafetyInfo, ORE);
+      if (CurAST.get()) {
+        // Loop over all of the alias sets in the tracker object.
+        for (AliasSet &AS : *CurAST) {
+          // We can promote this alias set if it has a store, if it is a "Must"
+          // alias set, if the pointer is loop invariant, and if we are not
+          // eliminating any volatile loads or stores.
+          if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+              !L->isLoopInvariant(AS.begin()->getValue()))
+            continue;
+
+          assert(
+              !AS.empty() &&
+              "Must alias set should have at least one pointer element in it!");
+
+          SmallSetVector<Value *, 8> PointerMustAliases;
+          for (const auto &ASI : AS)
+            PointerMustAliases.insert(ASI.getValue());
+
+          Promoted |= promoteLoopAccessesToScalars(
+              PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
+              CurAST.get(), &SafetyInfo, ORE);
+        }
       }
+      // FIXME: Promotion initially disabled when using MemorySSA.
 
       // Once we have promoted values across the loop body we have to
       // recursively reform LCSSA as any nested loop may now have values defined
@@ -351,10 +399,11 @@ bool LoopInvariantCodeMotion::runOnLoop(
 
   // If this loop is nested inside of another one, save the alias information
   // for when we process the outer loop.
-  if (L->getParentLoop() && !DeleteAST)
-    LoopToAliasSetMap[L] = CurAST;
-  else
-    delete CurAST;
+  if (CurAST.get() && L->getParentLoop() && !DeleteAST)
+    LoopToAliasSetMap[L] = std::move(CurAST);
+
+  if (MSSAU.get() && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   if (Changed && SE)
     SE->forgetLoopDispositions(L);
@@ -369,13 +418,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       DominatorTree *DT, TargetLibraryInfo *TLI,
                       TargetTransformInfo *TTI, Loop *CurLoop,
-                      AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+                      AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+                      ICFLoopSafetyInfo *SafetyInfo,
                       OptimizationRemarkEmitter *ORE) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
-         "Unexpected input to sinkRegion");
+         CurLoop != nullptr && SafetyInfo != nullptr &&
+         "Unexpected input to sinkRegion.");
+  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+         "Either AliasSetTracker or MemorySSA should be initialized.");
 
   // We want to visit children before parents. We will enque all the parents
   // before their children in the worklist and process the worklist in reverse
@@ -399,8 +451,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
         salvageDebugInfo(I);
         ++II;
-        CurAST->deleteValue(&I);
-        I.eraseFromParent();
+        eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
         Changed = true;
         continue;
       }
@@ -412,21 +463,252 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
       //
       bool FreeInLoop = false;
       if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
-        if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          !I.mayHaveSideEffects()) {
+        if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
           if (!FreeInLoop) {
             ++II;
-            CurAST->deleteValue(&I);
-            I.eraseFromParent();
+            eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
           }
           Changed = true;
         }
       }
     }
   }
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
   return Changed;
 }
 
+namespace {
+// This is a helper class for hoistRegion to make it able to hoist control flow
+// in order to be able to hoist phis. The way this works is that we initially
+// start hoisting to the loop preheader, and when we see a loop invariant branch
+// we make note of this. When we then come to hoist an instruction that's
+// conditional on such a branch we duplicate the branch and the relevant control
+// flow, then hoist the instruction into the block corresponding to its original
+// block in the duplicated control flow.
+class ControlFlowHoister {
+private:
+  // Information about the loop we are hoisting from
+  LoopInfo *LI;
+  DominatorTree *DT;
+  Loop *CurLoop;
+  MemorySSAUpdater *MSSAU;
+
+  // A map of blocks in the loop to the block their instructions will be hoisted
+  // to.
+  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
+
+  // The branches that we can hoist, mapped to the block that marks a
+  // convergence point of their control flow.
+  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
+
+public:
+  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
+                     MemorySSAUpdater *MSSAU)
+      : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
+
+  void registerPossiblyHoistableBranch(BranchInst *BI) {
+    // We can only hoist conditional branches with loop invariant operands.
+    if (!ControlFlowHoisting || !BI->isConditional() ||
+        !CurLoop->hasLoopInvariantOperands(BI))
+      return;
+
+    // The branch destinations need to be in the loop, and we don't gain
+    // anything by duplicating conditional branches with duplicate successors,
+    // as it's essentially the same as an unconditional branch.
+    BasicBlock *TrueDest = BI->getSuccessor(0);
+    BasicBlock *FalseDest = BI->getSuccessor(1);
+    if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
+        TrueDest == FalseDest)
+      return;
+
+    // We can hoist BI if one branch destination is the successor of the other,
+    // or both have common successor which we check by seeing if the
+    // intersection of their successors is non-empty.
+    // TODO: This could be expanded to allowing branches where both ends
+    // eventually converge to a single block.
+    SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
+    TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
+    FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
+    BasicBlock *CommonSucc = nullptr;
+    if (TrueDestSucc.count(FalseDest)) {
+      CommonSucc = FalseDest;
+    } else if (FalseDestSucc.count(TrueDest)) {
+      CommonSucc = TrueDest;
+    } else {
+      set_intersect(TrueDestSucc, FalseDestSucc);
+      // If there's one common successor use that.
+      if (TrueDestSucc.size() == 1)
+        CommonSucc = *TrueDestSucc.begin();
+      // If there's more than one pick whichever appears first in the block list
+      // (we can't use the value returned by TrueDestSucc.begin() as it's
+      // unpredicatable which element gets returned).
+      else if (!TrueDestSucc.empty()) {
+        Function *F = TrueDest->getParent();
+        auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
+        auto It = std::find_if(F->begin(), F->end(), IsSucc);
+        assert(It != F->end() && "Could not find successor in function");
+        CommonSucc = &*It;
+      }
+    }
+    // The common successor has to be dominated by the branch, as otherwise
+    // there will be some other path to the successor that will not be
+    // controlled by this branch so any phi we hoist would be controlled by the
+    // wrong condition. This also takes care of avoiding hoisting of loop back
+    // edges.
+    // TODO: In some cases this could be relaxed if the successor is dominated
+    // by another block that's been hoisted and we can guarantee that the
+    // control flow has been replicated exactly.
+    if (CommonSucc && DT->dominates(BI, CommonSucc))
+      HoistableBranches[BI] = CommonSucc;
+  }
+
+  bool canHoistPHI(PHINode *PN) {
+    // The phi must have loop invariant operands.
+    if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
+      return false;
+    // We can hoist phis if the block they are in is the target of hoistable
+    // branches which cover all of the predecessors of the block.
+    SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
+    BasicBlock *BB = PN->getParent();
+    for (BasicBlock *PredBB : predecessors(BB))
+      PredecessorBlocks.insert(PredBB);
+    // If we have less predecessor blocks than predecessors then the phi will
+    // have more than one incoming value for the same block which we can't
+    // handle.
+    // TODO: This could be handled be erasing some of the duplicate incoming
+    // values.
+    if (PredecessorBlocks.size() != pred_size(BB))
+      return false;
+    for (auto &Pair : HoistableBranches) {
+      if (Pair.second == BB) {
+        // Which blocks are predecessors via this branch depends on if the
+        // branch is triangle-like or diamond-like.
+        if (Pair.first->getSuccessor(0) == BB) {
+          PredecessorBlocks.erase(Pair.first->getParent());
+          PredecessorBlocks.erase(Pair.first->getSuccessor(1));
+        } else if (Pair.first->getSuccessor(1) == BB) {
+          PredecessorBlocks.erase(Pair.first->getParent());
+          PredecessorBlocks.erase(Pair.first->getSuccessor(0));
+        } else {
+          PredecessorBlocks.erase(Pair.first->getSuccessor(0));
+          PredecessorBlocks.erase(Pair.first->getSuccessor(1));
+        }
+      }
+    }
+    // PredecessorBlocks will now be empty if for every predecessor of BB we
+    // found a hoistable branch source.
+    return PredecessorBlocks.empty();
+  }
+
+  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
+    if (!ControlFlowHoisting)
+      return CurLoop->getLoopPreheader();
+    // If BB has already been hoisted, return that
+    if (HoistDestinationMap.count(BB))
+      return HoistDestinationMap[BB];
+
+    // Check if this block is conditional based on a pending branch
+    auto HasBBAsSuccessor =
+        [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
+          return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
+                                       Pair.first->getSuccessor(1) == BB);
+        };
+    auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
+                           HasBBAsSuccessor);
+
+    // If not involved in a pending branch, hoist to preheader
+    BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
+    if (It == HoistableBranches.end()) {
+      LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
+                        << " as hoist destination for " << BB->getName()
+                        << "\n");
+      HoistDestinationMap[BB] = InitialPreheader;
+      return InitialPreheader;
+    }
+    BranchInst *BI = It->first;
+    assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
+               HoistableBranches.end() &&
+           "BB is expected to be the target of at most one branch");
+
+    LLVMContext &C = BB->getContext();
+    BasicBlock *TrueDest = BI->getSuccessor(0);
+    BasicBlock *FalseDest = BI->getSuccessor(1);
+    BasicBlock *CommonSucc = HoistableBranches[BI];
+    BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
+
+    // Create hoisted versions of blocks that currently don't have them
+    auto CreateHoistedBlock = [&](BasicBlock *Orig) {
+      if (HoistDestinationMap.count(Orig))
+        return HoistDestinationMap[Orig];
+      BasicBlock *New =
+          BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
+      HoistDestinationMap[Orig] = New;
+      DT->addNewBlock(New, HoistTarget);
+      if (CurLoop->getParentLoop())
+        CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
+      ++NumCreatedBlocks;
+      LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
+                        << " as hoist destination for " << Orig->getName()
+                        << "\n");
+      return New;
+    };
+    BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
+    BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
+    BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
+
+    // Link up these blocks with branches.
+    if (!HoistCommonSucc->getTerminator()) {
+      // The new common successor we've generated will branch to whatever that
+      // hoist target branched to.
+      BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
+      assert(TargetSucc && "Expected hoist target to have a single successor");
+      HoistCommonSucc->moveBefore(TargetSucc);
+      BranchInst::Create(TargetSucc, HoistCommonSucc);
+    }
+    if (!HoistTrueDest->getTerminator()) {
+      HoistTrueDest->moveBefore(HoistCommonSucc);
+      BranchInst::Create(HoistCommonSucc, HoistTrueDest);
+    }
+    if (!HoistFalseDest->getTerminator()) {
+      HoistFalseDest->moveBefore(HoistCommonSucc);
+      BranchInst::Create(HoistCommonSucc, HoistFalseDest);
+    }
+
+    // If BI is being cloned to what was originally the preheader then
+    // HoistCommonSucc will now be the new preheader.
+    if (HoistTarget == InitialPreheader) {
+      // Phis in the loop header now need to use the new preheader.
+      InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
+      if (MSSAU)
+        MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+            HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
+      // The new preheader dominates the loop header.
+      DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
+      DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
+      DT->changeImmediateDominator(HeaderNode, PreheaderNode);
+      // The preheader hoist destination is now the new preheader, with the
+      // exception of the hoist destination of this branch.
+      for (auto &Pair : HoistDestinationMap)
+        if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
+          Pair.second = HoistCommonSucc;
+    }
+
+    // Now finally clone BI.
+    ReplaceInstWithInst(
+        HoistTarget->getTerminator(),
+        BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
+    ++NumClonedBranches;
+
+    assert(CurLoop->getLoopPreheader() &&
+           "Hoisting blocks should not have destroyed preheader");
+    return HoistDestinationMap[BB];
+  }
+};
+} // namespace
+
 /// Walk the specified region of the CFG (defined by all blocks dominated by
 /// the specified block, and that are in the current loop) in depth first
 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
@@ -434,30 +716,34 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
 ///
 bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
-                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+                       ICFLoopSafetyInfo *SafetyInfo,
                        OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
-         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
-         "Unexpected input to hoistRegion");
-
-  // We want to visit parents before children. We will enque all the parents
-  // before their children in the worklist and process the worklist in order.
-  SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
-
+         CurLoop != nullptr && SafetyInfo != nullptr &&
+         "Unexpected input to hoistRegion.");
+  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
+         "Either AliasSetTracker or MemorySSA should be initialized.");
+
+  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
+
+  // Keep track of instructions that have been hoisted, as they may need to be
+  // re-hoisted if they end up not dominating all of their uses.
+  SmallVector<Instruction *, 16> HoistedInstructions;
+
+  // For PHI hoisting to work we need to hoist blocks before their successors.
+  // We can do this by iterating through the blocks in the loop in reverse
+  // post-order.
+  LoopBlocksRPO Worklist(CurLoop);
+  Worklist.perform(LI);
   bool Changed = false;
-  for (DomTreeNode *DTN : Worklist) {
-    BasicBlock *BB = DTN->getBlock();
+  for (BasicBlock *BB : Worklist) {
     // Only need to process the contents of this block if it is not part of a
     // subloop (which would already have been processed).
     if (inSubLoop(BB, CurLoop, LI))
       continue;
 
-    // Keep track of whether the prefix of instructions visited so far are such
-    // that the next instruction visited is guaranteed to execute if the loop
-    // is entered.
-    bool IsMustExecute = CurLoop->getHeader() == BB;
-
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
       Instruction &I = *II++;
       // Try constant folding this instruction.  If all the operands are
@@ -467,12 +753,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
               &I, I.getModule()->getDataLayout(), TLI)) {
         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
                           << '\n');
-        CurAST->copyValue(&I, C);
+        if (CurAST)
+          CurAST->copyValue(&I, C);
+        // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
         I.replaceAllUsesWith(C);
-        if (isInstructionTriviallyDead(&I, TLI)) {
-          CurAST->deleteValue(&I);
-          I.eraseFromParent();
-        }
+        if (isInstructionTriviallyDead(&I, TLI))
+          eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
         Changed = true;
         continue;
       }
@@ -480,14 +766,18 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
       // Try hoisting the instruction out to the preheader.  We can only do
       // this if all of the operands of the instruction are loop invariant and
       // if it is safe to hoist the instruction.
-      //
+      // TODO: It may be safe to hoist if we are hoisting to a conditional block
+      // and we have accurately duplicated the control flow from the loop header
+      // to that block.
       if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
-          (IsMustExecute ||
-           isSafeToExecuteUnconditionally(
-               I, DT, CurLoop, SafetyInfo, ORE,
-               CurLoop->getLoopPreheader()->getTerminator()))) {
-        Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          isSafeToExecuteUnconditionally(
+              I, DT, CurLoop, SafetyInfo, ORE,
+              CurLoop->getLoopPreheader()->getTerminator())) {
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, ORE);
+        HoistedInstructions.push_back(&I);
+        Changed = true;
         continue;
       }
 
@@ -500,24 +790,101 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
+        SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
         ReciprocalDivisor->insertBefore(&I);
 
         auto Product =
             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
         Product->setFastMathFlags(I.getFastMathFlags());
+        SafetyInfo->insertInstructionTo(Product, I.getParent());
         Product->insertAfter(&I);
         I.replaceAllUsesWith(Product);
-        I.eraseFromParent();
+        eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
 
-        hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+        hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
+              SafetyInfo, MSSAU, ORE);
+        HoistedInstructions.push_back(ReciprocalDivisor);
         Changed = true;
         continue;
       }
 
-      if (IsMustExecute)
-        IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I);
+      using namespace PatternMatch;
+      if (((I.use_empty() &&
+            match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
+           isGuard(&I)) &&
+          CurLoop->hasLoopInvariantOperands(&I) &&
+          SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+          SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
+        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+              MSSAU, ORE);
+        HoistedInstructions.push_back(&I);
+        Changed = true;
+        continue;
+      }
+
+      if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+        if (CFH.canHoistPHI(PN)) {
+          // Redirect incoming blocks first to ensure that we create hoisted
+          // versions of those blocks before we hoist the phi.
+          for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
+            PN->setIncomingBlock(
+                i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
+          hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+                MSSAU, ORE);
+          assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
+          Changed = true;
+          continue;
+        }
+      }
+
+      // Remember possibly hoistable branches so we can actually hoist them
+      // later if needed.
+      if (BranchInst *BI = dyn_cast<BranchInst>(&I))
+        CFH.registerPossiblyHoistableBranch(BI);
+    }
+  }
+
+  // If we hoisted instructions to a conditional block they may not dominate
+  // their uses that weren't hoisted (such as phis where some operands are not
+  // loop invariant). If so make them unconditional by moving them to their
+  // immediate dominator. We iterate through the instructions in reverse order
+  // which ensures that when we rehoist an instruction we rehoist its operands,
+  // and also keep track of where in the block we are rehoisting to to make sure
+  // that we rehoist instructions before the instructions that use them.
+  Instruction *HoistPoint = nullptr;
+  if (ControlFlowHoisting) {
+    for (Instruction *I : reverse(HoistedInstructions)) {
+      if (!llvm::all_of(I->uses(),
+                        [&](Use &U) { return DT->dominates(I, U); })) {
+        BasicBlock *Dominator =
+            DT->getNode(I->getParent())->getIDom()->getBlock();
+        if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
+          if (HoistPoint)
+            assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
+                   "New hoist point expected to dominate old hoist point");
+          HoistPoint = Dominator->getTerminator();
+        }
+        LLVM_DEBUG(dbgs() << "LICM rehoisting to "
+                          << HoistPoint->getParent()->getName()
+                          << ": " << *I << "\n");
+        moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
+        HoistPoint = I;
+        Changed = true;
+      }
     }
   }
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+    // Now that we've finished hoisting make sure that LI and DT are still
+    // valid.
+#ifndef NDEBUG
+  if (Changed) {
+    assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
+           "Dominator tree verification failed");
+    LI->verify(*DT);
+  }
+#endif
 
   return Changed;
 }
@@ -575,13 +942,68 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
   return false;
 }
 
+namespace {
+/// Return true if-and-only-if we know how to (mechanically) both hoist and
+/// sink a given instruction out of a loop.  Does not address legality
+/// concerns such as aliasing or speculation safety.  
+bool isHoistableAndSinkableInst(Instruction &I) {
+  // Only these instructions are hoistable/sinkable.
+  return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
+          isa<CallInst>(I) || isa<FenceInst>(I) || 
+          isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+          isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
+          isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
+          isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
+          isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
+}
+/// Return true if all of the alias sets within this AST are known not to
+/// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
+bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
+                const Loop *L) {
+  if (CurAST) {
+    for (AliasSet &AS : *CurAST) {
+      if (!AS.isForwardingAliasSet() && AS.isMod()) {
+        return false;
+      }
+    }
+    return true;
+  } else { /*MSSAU*/
+    for (auto *BB : L->getBlocks())
+      if (MSSAU->getMemorySSA()->getBlockDefs(BB))
+        return false;
+    return true;
+  }
+}
+
+/// Return true if I is the only Instruction with a MemoryAccess in L.
+bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+                        const MemorySSAUpdater *MSSAU) {
+  for (auto *BB : L->getBlocks())
+    if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
+      int NotAPhi = 0;
+      for (const auto &Acc : *Accs) {
+        if (isa<MemoryPhi>(&Acc))
+          continue;
+        const auto *MUD = cast<MemoryUseOrDef>(&Acc);
+        if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
+          return false;
+      }
+    }
+  return true;
+}
+}
+
 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                               Loop *CurLoop, AliasSetTracker *CurAST,
-                              LoopSafetyInfo *SafetyInfo,
+                              MemorySSAUpdater *MSSAU,
+                              bool TargetExecutesOncePerLoop,
                               OptimizationRemarkEmitter *ORE) {
-  // SafetyInfo is nullptr if we are checking for sinking from preheader to
-  // loop body.
-  const bool SinkingToLoopBody = !SafetyInfo;
+  // If we don't understand the instruction, bail early.
+  if (!isHoistableAndSinkableInst(I))
+    return false;
+
+  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
+
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
@@ -594,23 +1016,20 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
       return true;
 
-    if (LI->isAtomic() && SinkingToLoopBody)
-      return false; // Don't sink unordered atomic loads to loop body.
+    if (LI->isAtomic() && !TargetExecutesOncePerLoop)
+      return false; // Don't risk duplicating unordered loads
 
     // This checks for an invariant.start dominating the load.
     if (isLoadInvariantInLoop(LI, DT, CurLoop))
       return true;
 
-    // Don't hoist loads which have may-aliased stores in loop.
-    uint64_t Size = 0;
-    if (LI->getType()->isSized())
-      Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
-
-    AAMDNodes AAInfo;
-    LI->getAAMetadata(AAInfo);
-
-    bool Invalidated =
-        pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
+    bool Invalidated;
+    if (CurAST)
+      Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
+                                             CurLoop, AA);
+    else
+      Invalidated = pointerInvalidatedByLoopWithMSSA(
+          MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
     // Check loop-invariant address because this may also be a sinkable load
     // whose address is not necessarily loop-invariant.
     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -631,6 +1050,11 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
     if (CI->mayThrow())
       return false;
 
+    using namespace PatternMatch;
+    if (match(CI, m_Intrinsic<Intrinsic::assume>()))
+      // Assumes don't actually alias anything or throw
+      return true;
+
     // Handle simple cases by querying alias analysis.
     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
     if (Behavior == FMRB_DoesNotAccessMemory)
@@ -640,23 +1064,26 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
       // it's arguments with arbitrary offsets.  If we can prove there are no
       // writes to this memory in the loop, we can hoist or sink.
       if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
+        // TODO: expand to writeable arguments
         for (Value *Op : CI->arg_operands())
-          if (Op->getType()->isPointerTy() &&
-              pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
-                                       AAMDNodes(), CurAST))
-            return false;
+          if (Op->getType()->isPointerTy()) {
+            bool Invalidated;
+            if (CurAST)
+              Invalidated = pointerInvalidatedByLoop(
+                  MemoryLocation(Op, LocationSize::unknown(), AAMDNodes()),
+                  CurAST, CurLoop, AA);
+            else
+              Invalidated = pointerInvalidatedByLoopWithMSSA(
+                  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
+            if (Invalidated)
+              return false;
+          }
         return true;
       }
+
       // If this call only reads from memory and there are no writes to memory
       // in the loop, we can hoist or sink the call as appropriate.
-      bool FoundMod = false;
-      for (AliasSet &AS : *CurAST) {
-        if (!AS.isForwardingAliasSet() && AS.isMod()) {
-          FoundMod = true;
-          break;
-        }
-      }
-      if (!FoundMod)
+      if (isReadOnly(CurAST, MSSAU, CurLoop))
         return true;
     }
 
@@ -664,25 +1091,63 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
     // sink the call.
 
     return false;
+  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
+    // Fences alias (most) everything to provide ordering.  For the moment,
+    // just give up if there are any other memory operations in the loop.
+    if (CurAST) {
+      auto Begin = CurAST->begin();
+      assert(Begin != CurAST->end() && "must contain FI");
+      if (std::next(Begin) != CurAST->end())
+        // constant memory for instance, TODO: handle better
+        return false;
+      auto *UniqueI = Begin->getUniqueInstruction();
+      if (!UniqueI)
+        // other memory op, give up
+        return false;
+      (void)FI; // suppress unused variable warning
+      assert(UniqueI == FI && "AS must contain FI");
+      return true;
+    } else // MSSAU
+      return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
+  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+    if (!SI->isUnordered())
+      return false; // Don't sink/hoist volatile or ordered atomic store!
+
+    // We can only hoist a store that we can prove writes a value which is not
+    // read or overwritten within the loop.  For those cases, we fallback to
+    // load store promotion instead.  TODO: We can extend this to cases where
+    // there is exactly one write to the location and that write dominates an
+    // arbitrary number of reads in the loop.
+    if (CurAST) {
+      auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
+
+      if (AS.isRef() || !AS.isMustAlias())
+        // Quick exit test, handled by the full path below as well.
+        return false;
+      auto *UniqueI = AS.getUniqueInstruction();
+      if (!UniqueI)
+        // other memory op, give up
+        return false;
+      assert(UniqueI == SI && "AS must contain SI");
+      return true;
+    } else { // MSSAU
+      if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
+        return true;
+      if (!EnableLicmCap) {
+        auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+        if (MSSA->isLiveOnEntryDef(Source) ||
+            !CurLoop->contains(Source->getBlock()))
+          return true;
+      }
+      return false;
+    }
   }
 
-  // Only these instructions are hoistable/sinkable.
-  if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
-      !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
-      !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
-      !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
-      !isa<InsertValueInst>(I))
-    return false;
-
-  // If we are checking for sinking from preheader to loop body it will be
-  // always safe as there is no speculative execution.
-  if (SinkingToLoopBody)
-    return true;
+  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
 
-  // TODO: Plumb the context instruction through to make hoisting and sinking
-  // more powerful. Hoisting of loads already works due to the special casing
-  // above.
-  return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr);
+  // We've established mechanical ability and aliasing, it's up to the caller
+  // to check fault safety
+  return true;
 }
 
 /// Returns true if a PHINode is a trivially replaceable with an
@@ -730,7 +1195,7 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
                                   TargetTransformInfo *TTI, bool &FreeInLoop) {
-  const auto &BlockColors = SafetyInfo->BlockColors;
+  const auto &BlockColors = SafetyInfo->getBlockColors();
   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
     const Instruction *UI = cast<Instruction>(U);
@@ -759,13 +1224,12 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
   return true;
 }
 
-static Instruction *
-CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
-                            const LoopInfo *LI,
-                            const LoopSafetyInfo *SafetyInfo) {
+static Instruction *CloneInstructionInExitBlock(
+    Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
+    const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
   Instruction *New;
   if (auto *CI = dyn_cast<CallInst>(&I)) {
-    const auto &BlockColors = SafetyInfo->BlockColors;
+    const auto &BlockColors = SafetyInfo->getBlockColors();
 
     // Sinking call-sites need to be handled differently from other
     // instructions.  The cloned call-site needs a funclet bundle operand
@@ -798,6 +1262,21 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
   if (!I.getName().empty())
     New->setName(I.getName() + ".le");
 
+  MemoryAccess *OldMemAcc;
+  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
+    // Create a new MemoryAccess and let MemorySSA set its defining access.
+    MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+        New, nullptr, New->getParent(), MemorySSA::Beginning);
+    if (NewMemAcc) {
+      if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
+        MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+      else {
+        auto *MemUse = cast<MemoryUse>(NewMemAcc);
+        MSSAU->insertUse(MemUse);
+      }
+    }
+  }
+
   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
   // particularly cheap because we can rip off the PHI node that we're
   // replacing for the number and blocks of the predecessors.
@@ -820,10 +1299,28 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
   return New;
 }
 
+static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
+                             AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
+  if (AST)
+    AST->deleteValue(&I);
+  if (MSSAU)
+    MSSAU->removeMemoryAccess(&I);
+  SafetyInfo.removeInstruction(&I);
+  I.eraseFromParent();
+}
+
+static void moveInstructionBefore(Instruction &I, Instruction &Dest,
+                                  ICFLoopSafetyInfo &SafetyInfo) {
+  SafetyInfo.removeInstruction(&I);
+  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
+  I.moveBefore(&Dest);
+}
+
 static Instruction *sinkThroughTriviallyReplaceablePHI(
     PHINode *TPN, Instruction *I, LoopInfo *LI,
     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
-    const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) {
+    const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
+    MemorySSAUpdater *MSSAU) {
   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
          "Expect only trivially replaceable PHI");
   BasicBlock *ExitBlock = TPN->getParent();
@@ -832,8 +1329,8 @@ static Instruction *sinkThroughTriviallyReplaceablePHI(
   if (It != SunkCopies.end())
     New = It->second;
   else
-    New = SunkCopies[ExitBlock] =
-        CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo);
+    New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
+        *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
   return New;
 }
 
@@ -845,7 +1342,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
   // it require updating BlockColors for all offspring blocks accordingly. By
   // skipping such corner case, we can make updating BlockColors after splitting
   // predecessor fairly simple.
-  if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad())
+  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
     return false;
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     BasicBlock *BBPred = *PI;
@@ -857,7 +1354,8 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
 
 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
                                         LoopInfo *LI, const Loop *CurLoop,
-                                        LoopSafetyInfo *SafetyInfo) {
+                                        LoopSafetyInfo *SafetyInfo,
+                                        MemorySSAUpdater *MSSAU) {
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -899,7 +1397,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
   // LE:
   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
   //
-  auto &BlockColors = SafetyInfo->BlockColors;
+  const auto &BlockColors = SafetyInfo->getBlockColors();
   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
   while (!PredBBs.empty()) {
     BasicBlock *PredBB = *PredBBs.begin();
@@ -907,18 +1405,15 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
            "Expect all predecessors are in the loop");
     if (PN->getBasicBlockIndex(PredBB) >= 0) {
       BasicBlock *NewPred = SplitBlockPredecessors(
-          ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+          ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
       // Since we do not allow splitting EH-block with BlockColors in
       // canSplitPredecessors(), we can simply assign predecessor's color to
       // the new block.
-      if (!BlockColors.empty()) {
+      if (!BlockColors.empty())
         // Grab a reference to the ColorVector to be inserted before getting the
         // reference to the vector we are copying because inserting the new
         // element in BlockColors might cause the map to be reallocated.
-        ColorVector &ColorsForNewBlock = BlockColors[NewPred];
-        ColorVector &ColorsForOldBlock = BlockColors[PredBB];
-        ColorsForNewBlock = ColorsForOldBlock;
-      }
+        SafetyInfo->copyColors(NewPred, PredBB);
     }
     PredBBs.remove(PredBB);
   }
@@ -930,8 +1425,9 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
 /// position, and may either delete it or move it to outside of the loop.
 ///
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
-                 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
-                 OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
+                 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
+                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
+                 bool FreeInLoop) {
   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -983,7 +1479,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
 
     // Split predecessors of the PHI so that we can make users trivially
     // replaceable.
-    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
+    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
 
     // Should rebuild the iterators, as they may be invalidated by
     // splitPredecessorsOfLoopExit().
@@ -1018,10 +1514,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
     assert(ExitBlockSet.count(PN->getParent()) &&
            "The LCSSA PHI is not in an exit block!");
     // The PHI must be trivially replaceable.
-    Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies,
-                                                          SafetyInfo, CurLoop);
+    Instruction *New = sinkThroughTriviallyReplaceablePHI(
+        PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
     PN->replaceAllUsesWith(New);
-    PN->eraseFromParent();
+    eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
     Changed = true;
   }
   return Changed;
@@ -1030,11 +1526,10 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
 /// When an instruction is found to only use loop invariant operands that
 /// is safe to hoist, this instruction is called to do the dirty work.
 ///
-static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
-                  const LoopSafetyInfo *SafetyInfo,
-                  OptimizationRemarkEmitter *ORE) {
-  auto *Preheader = CurLoop->getLoopPreheader();
-  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
+static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
+                  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
+                  MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
+  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
                     << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
@@ -1049,11 +1544,22 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
       // time in isGuaranteedToExecute if we don't actually have anything to
       // drop.  It is a compile time optimization, not required for correctness.
-      !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
+      !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
     I.dropUnknownNonDebugMetadata();
 
-  // Move the new node to the Preheader, before its terminator.
-  I.moveBefore(Preheader->getTerminator());
+  if (isa<PHINode>(I))
+    // Move the new node to the end of the phi list in the destination block.
+    moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
+  else
+    // Move the new node to the destination block, before its terminator.
+    moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
+  if (MSSAU) {
+    // If moving, I just moved a load or store, so update MemorySSA.
+    MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+        MSSAU->getMemorySSA()->getMemoryAccess(&I));
+    if (OldMemAcc)
+      MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
+  }
 
   // Do not retain debug locations when we are moving instructions to different
   // basic blocks, because we want to avoid jumpy line tables. Calls, however,
@@ -1068,7 +1574,6 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
   else if (isa<CallInst>(I))
     ++NumMovedCalls;
   ++NumHoisted;
-  return true;
 }
 
 /// Only sink or hoist an instruction if it is not a trapping instruction,
@@ -1084,7 +1589,7 @@ static bool isSafeToExecuteUnconditionally(Instruction &Inst,
     return true;
 
   bool GuaranteedToExecute =
-      isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
+      SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
 
   if (!GuaranteedToExecute) {
     auto *LI = dyn_cast<LoadInst>(&Inst);
@@ -1113,6 +1618,7 @@ class LoopPromoter : public LoadAndStorePromoter {
   int Alignment;
   bool UnorderedAtomic;
   AAMDNodes AATags;
+  ICFLoopSafetyInfo &SafetyInfo;
 
   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
     if (Instruction *I = dyn_cast<Instruction>(V))
@@ -1135,11 +1641,13 @@ public:
                SmallVectorImpl<BasicBlock *> &LEB,
                SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
                AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
-               bool UnorderedAtomic, const AAMDNodes &AATags)
+               bool UnorderedAtomic, const AAMDNodes &AATags,
+               ICFLoopSafetyInfo &SafetyInfo)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
         LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
         LI(li), DL(std::move(dl)), Alignment(alignment),
-        UnorderedAtomic(UnorderedAtomic), AATags(AATags) {}
+        UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
+      {}
 
   bool isInstInList(Instruction *I,
                     const SmallVectorImpl<Instruction *> &) const override {
@@ -1176,7 +1684,10 @@ public:
     // Update alias analysis.
     AST.copyValue(LI, V);
   }
-  void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
+  void instructionDeleted(Instruction *I) const override {
+    SafetyInfo.removeInstruction(I);
+    AST.deleteValue(I);
+  }
 };
 
 
@@ -1214,7 +1725,7 @@ bool llvm::promoteLoopAccessesToScalars(
     SmallVectorImpl<BasicBlock *> &ExitBlocks,
     SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
-    Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
+    Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
     OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
@@ -1277,7 +1788,7 @@ bool llvm::promoteLoopAccessesToScalars(
   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
 
   bool IsKnownThreadLocalObject = false;
-  if (SafetyInfo->MayThrow) {
+  if (SafetyInfo->anyBlockMayThrow()) {
     // If a loop can throw, we have to insert a store along each unwind edge.
     // That said, we can't actually make the unwind edge explicit. Therefore,
     // we have to prove that the store is dead along the unwind edge.  We do
@@ -1310,7 +1821,6 @@ bool llvm::promoteLoopAccessesToScalars(
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
-        assert(!Load->isVolatile() && "AST broken");
         if (!Load->isUnordered())
           return false;
 
@@ -1325,7 +1835,6 @@ bool llvm::promoteLoopAccessesToScalars(
         // pointer.
         if (UI->getOperand(1) != ASIV)
           continue;
-        assert(!Store->isVolatile() && "AST broken");
         if (!Store->isUnordered())
           return false;
 
@@ -1344,7 +1853,7 @@ bool llvm::promoteLoopAccessesToScalars(
 
         if (!DereferenceableInPH || !SafeToInsertStore ||
             (InstAlignment > Alignment)) {
-          if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
+          if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
             DereferenceableInPH = true;
             SafeToInsertStore = true;
             Alignment = std::max(Alignment, InstAlignment);
@@ -1435,7 +1944,7 @@ bool llvm::promoteLoopAccessesToScalars(
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, PIC, *CurAST, *LI, DL, Alignment,
-                        SawUnorderedAtomic, AATags);
+                        SawUnorderedAtomic, AATags, *SafetyInfo);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
@@ -1455,7 +1964,7 @@ bool llvm::promoteLoopAccessesToScalars(
 
   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   if (PreheaderLoad->use_empty())
-    PreheaderLoad->eraseFromParent();
+    eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
 
   return true;
 }
@@ -1466,10 +1975,10 @@ bool llvm::promoteLoopAccessesToScalars(
 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
 /// from scratch for every loop. Hook up with the helper functions when
 /// available in the new pass manager to avoid redundant computation.
-AliasSetTracker *
+std::unique_ptr<AliasSetTracker>
 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
                                                  AliasAnalysis *AA) {
-  AliasSetTracker *CurAST = nullptr;
+  std::unique_ptr<AliasSetTracker> CurAST;
   SmallVector<Loop *, 4> RecomputeLoops;
   for (Loop *InnerL : L->getSubLoops()) {
     auto MapI = LoopToAliasSetMap.find(InnerL);
@@ -1480,35 +1989,30 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
       RecomputeLoops.push_back(InnerL);
       continue;
     }
-    AliasSetTracker *InnerAST = MapI->second;
+    std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
 
-    if (CurAST != nullptr) {
+    if (CurAST) {
       // What if InnerLoop was modified by other passes ?
-      CurAST->add(*InnerAST);
-
       // Once we've incorporated the inner loop's AST into ours, we don't need
       // the subloop's anymore.
-      delete InnerAST;
+      CurAST->add(*InnerAST);
     } else {
-      CurAST = InnerAST;
+      CurAST = std::move(InnerAST);
     }
     LoopToAliasSetMap.erase(MapI);
   }
-  if (CurAST == nullptr)
-    CurAST = new AliasSetTracker(*AA);
-
-  auto mergeLoop = [&](Loop *L) {
-    // Loop over the body of this loop, looking for calls, invokes, and stores.
-    for (BasicBlock *BB : L->blocks())
-      CurAST->add(*BB); // Incorporate the specified basic block
-  };
+  if (!CurAST)
+    CurAST = make_unique<AliasSetTracker>(*AA);
 
   // Add everything from the sub loops that are no longer directly available.
   for (Loop *InnerL : RecomputeLoops)
-    mergeLoop(InnerL);
+    for (BasicBlock *BB : InnerL->blocks())
+      CurAST->add(*BB);
 
-  // And merge in this loop.
-  mergeLoop(L);
+  // And merge in this loop (without anything from inner loops).
+  for (BasicBlock *BB : L->blocks())
+    if (LI->getLoopFor(BB) == L)
+      CurAST->add(*BB);
 
   return CurAST;
 }
@@ -1517,42 +2021,89 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
 ///
 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
                                              Loop *L) {
-  AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
-  if (!AST)
+  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
+  if (ASTIt == LICM.getLoopToAliasSetMap().end())
     return;
 
-  AST->copyValue(From, To);
+  ASTIt->second->copyValue(From, To);
 }
 
 /// Simple Analysis hook. Delete value V from alias set
 ///
 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
-  AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
-  if (!AST)
+  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
+  if (ASTIt == LICM.getLoopToAliasSetMap().end())
     return;
 
-  AST->deleteValue(V);
+  ASTIt->second->deleteValue(V);
 }
 
 /// Simple Analysis hook. Delete value L from alias set map.
 ///
 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
-  AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
-  if (!AST)
+  if (!LICM.getLoopToAliasSetMap().count(L))
     return;
 
-  delete AST;
   LICM.getLoopToAliasSetMap().erase(L);
 }
 
-/// Return true if the body of this loop may store into the memory
-/// location pointed to by V.
-///
-static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
-                                     const AAMDNodes &AAInfo,
-                                     AliasSetTracker *CurAST) {
-  // Check to see if any of the basic blocks in CurLoop invalidate *V.
-  return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
+static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
+                                     AliasSetTracker *CurAST, Loop *CurLoop,
+                                     AliasAnalysis *AA) {
+  // First check to see if any of the basic blocks in CurLoop invalidate *V.
+  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
+
+  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
+    return isInvalidatedAccordingToAST;
+
+  // Check with a diagnostic analysis if we can refine the information above.
+  // This is to identify the limitations of using the AST.
+  // The alias set mechanism used by LICM has a major weakness in that it
+  // combines all things which may alias into a single set *before* asking
+  // modref questions. As a result, a single readonly call within a loop will
+  // collapse all loads and stores into a single alias set and report
+  // invalidation if the loop contains any store. For example, readonly calls
+  // with deopt states have this form and create a general alias set with all
+  // loads and stores.  In order to get any LICM in loops containing possible
+  // deopt states we need a more precise invalidation of checking the mod ref
+  // info of each instruction within the loop and LI. This has a complexity of
+  // O(N^2), so currently, it is used only as a diagnostic tool since the
+  // default value of LICMN2Threshold is zero.
+
+  // Don't look at nested loops.
+  if (CurLoop->begin() != CurLoop->end())
+    return true;
+
+  int N = 0;
+  for (BasicBlock *BB : CurLoop->getBlocks())
+    for (Instruction &I : *BB) {
+      if (N >= LICMN2Theshold) {
+        LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
+                          << *(MemLoc.Ptr) << "\n");
+        return true;
+      }
+      N++;
+      auto Res = AA->getModRefInfo(&I, MemLoc);
+      if (isModSet(Res)) {
+        LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
+                          << *(MemLoc.Ptr) << "\n");
+        return true;
+      }
+    }
+  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
+  return false;
+}
+
+static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
+                                             Loop *CurLoop) {
+  MemoryAccess *Source;
+  // See declaration of EnableLicmCap for usage details.
+  if (EnableLicmCap)
+    Source = MU->getDefiningAccess();
+  else
+    Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+  return !MSSA->isLiveOnEntryDef(Source) &&
+         CurLoop->contains(Source->getBlock());
 }
 
 /// Little predicate that returns true if the specified basic block is in
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 06083a4f5086..d797c9dc9e72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -78,6 +78,18 @@ using namespace llvm;
 #define LDIST_NAME "loop-distribute"
 #define DEBUG_TYPE LDIST_NAME
 
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopDistributeFollowupAll =
+    "llvm.loop.distribute.followup_all";
+static const char *const LLVMLoopDistributeFollowupCoincident =
+    "llvm.loop.distribute.followup_coincident";
+static const char *const LLVMLoopDistributeFollowupSequential =
+    "llvm.loop.distribute.followup_sequential";
+static const char *const LLVMLoopDistributeFollowupFallback =
+    "llvm.loop.distribute.followup_fallback";
+/// @}
+
 static cl::opt<bool>
     LDistVerify("loop-distribute-verify", cl::Hidden,
                 cl::desc("Turn on DominatorTree and LoopInfo verification "
@@ -186,7 +198,7 @@ public:
   /// Returns the loop where this partition ends up after distribution.
   /// If this partition is mapped to the original loop then use the block from
   /// the loop.
-  const Loop *getDistributedLoop() const {
+  Loop *getDistributedLoop() const {
     return ClonedLoop ? ClonedLoop : OrigLoop;
   }
 
@@ -443,6 +455,9 @@ public:
     assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
            "preheader not empty");
 
+    // Preserve the original loop ID for use after the transformation.
+    MDNode *OrigLoopID = L->getLoopID();
+
     // Create a loop for each partition except the last.  Clone the original
     // loop before PH along with adding a preheader for the cloned loop.  Then
     // update PH to point to the newly added preheader.
@@ -457,9 +472,13 @@ public:
 
       Part->getVMap()[ExitBlock] = TopPH;
       Part->remapInstructions();
+      setNewLoopID(OrigLoopID, Part);
     }
     Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
 
+    // Also set a new loop ID for the last loop.
+    setNewLoopID(OrigLoopID, &PartitionContainer.back());
+
     // Now go in forward order and update the immediate dominator for the
     // preheaders with the exiting block of the previous loop.  Dominance
     // within the loop is updated in cloneLoopWithPreheader.
@@ -575,6 +594,19 @@ private:
       }
     }
   }
+
+  /// Assign new LoopIDs for the partition's cloned loop.
+  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
+    Optional<MDNode *> PartitionID = makeFollowupLoopID(
+        OrigLoopID,
+        {LLVMLoopDistributeFollowupAll,
+         Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
+                             : LLVMLoopDistributeFollowupCoincident});
+    if (PartitionID.hasValue()) {
+      Loop *NewLoop = Part->getDistributedLoop();
+      NewLoop->setLoopID(PartitionID.getValue());
+    }
+  }
 };
 
 /// For each memory instruction, this class maintains difference of the
@@ -743,6 +775,9 @@ public:
       return fail("TooManySCEVRuntimeChecks",
                   "too many SCEV run-time checks needed.\n");
 
+    if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
+      return fail("HeuristicDisabled", "distribution heuristic disabled");
+
     LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
     // We're done forming the partitions set up the reverse mapping from
     // instructions to partitions.
@@ -762,6 +797,8 @@ public:
                                                   RtPtrChecking);
 
     if (!Pred.isAlwaysTrue() || !Checks.empty()) {
+      MDNode *OrigLoopID = L->getLoopID();
+
       LLVM_DEBUG(dbgs() << "\nPointers:\n");
       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
       LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
@@ -769,6 +806,17 @@ public:
       LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
       LVer.versionLoop(DefsUsedOutside);
       LVer.annotateLoopWithNoAlias();
+
+      // The unversioned loop will not be changed, so we inherit all attributes
+      // from the original loop, but remove the loop distribution metadata to
+      // avoid to distribute it again.
+      MDNode *UnversionedLoopID =
+          makeFollowupLoopID(OrigLoopID,
+                             {LLVMLoopDistributeFollowupAll,
+                              LLVMLoopDistributeFollowupFallback},
+                             "llvm.loop.distribute.", true)
+              .getValue();
+      LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
     }
 
     // Create identical copies of the original loop for each partition and hook
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index eb3188711858..fbffa1920a84 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -26,7 +26,7 @@
 // Future floating point idioms to recognize in -ffast-math mode:
 //   fpowi
 // Future integer operation idioms to recognize:
-//   ctpop, ctlz, cttz
+//   ctpop
 //
 // Beware that isel's default lowering for ctpop is highly inefficient for
 // i64 and larger types when i64 is legal and the value has few bits set.  It
@@ -163,8 +163,9 @@ private:
 
   void collectStores(BasicBlock *BB);
   LegalStoreKind isLegalStore(StoreInst *SI);
+  enum class ForMemset { No, Yes };
   bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
-                         bool ForMemset);
+                         ForMemset For);
   bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
 
   bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
@@ -186,9 +187,10 @@ private:
   bool recognizePopcount();
   void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
                                PHINode *CntPhi, Value *Var);
-  bool recognizeAndInsertCTLZ();
-  void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
-                                PHINode *CntPhi, Value *Var, Instruction *DefX,
+  bool recognizeAndInsertFFS();  /// Find First Set: ctlz or cttz
+  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
+                                Instruction *CntInst, PHINode *CntPhi,
+                                Value *Var, Instruction *DefX,
                                 const DebugLoc &DL, bool ZeroCheck,
                                 bool IsCntPhiUsedOutsideLoop);
 
@@ -319,9 +321,9 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
 
   // The following transforms hoist stores/memsets into the loop pre-header.
   // Give up if the loop has instructions may throw.
-  LoopSafetyInfo SafetyInfo;
-  computeLoopSafetyInfo(&SafetyInfo, CurLoop);
-  if (SafetyInfo.MayThrow)
+  SimpleLoopSafetyInfo SafetyInfo;
+  SafetyInfo.computeLoopSafetyInfo(CurLoop);
+  if (SafetyInfo.anyBlockMayThrow())
     return MadeChange;
 
   // Scan all the blocks in the loop that are not in subloops.
@@ -347,6 +349,9 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
 /// just replicate their input array and then pass on to memset_pattern16.
 static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+  // FIXME: This could check for UndefValue because it can be merged into any
+  // other valid pattern.
+
   // If the value isn't a constant, we can't promote it to being in a constant
   // array.  We could theoretically do a store to an alloca or something, but
   // that doesn't seem worthwhile.
@@ -543,10 +548,10 @@ bool LoopIdiomRecognize::runOnLoopBlock(
   // optimized into a memset (memset_pattern).  The latter most commonly happens
   // with structs and handunrolled loops.
   for (auto &SL : StoreRefsForMemset)
-    MadeChange |= processLoopStores(SL.second, BECount, true);
+    MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
 
   for (auto &SL : StoreRefsForMemsetPattern)
-    MadeChange |= processLoopStores(SL.second, BECount, false);
+    MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
 
   // Optimize the store into a memcpy, if it feeds an similarly strided load.
   for (auto &SI : StoreRefsForMemcpy)
@@ -572,10 +577,9 @@ bool LoopIdiomRecognize::runOnLoopBlock(
   return MadeChange;
 }
 
-/// processLoopStores - See if this store(s) can be promoted to a memset.
+/// See if this store(s) can be promoted to a memset.
 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
-                                           const SCEV *BECount,
-                                           bool ForMemset) {
+                                           const SCEV *BECount, ForMemset For) {
   // Try to find consecutive stores that can be transformed into memsets.
   SetVector<StoreInst *> Heads, Tails;
   SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
@@ -602,7 +606,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
     Value *FirstSplatValue = nullptr;
     Constant *FirstPatternValue = nullptr;
 
-    if (ForMemset)
+    if (For == ForMemset::Yes)
       FirstSplatValue = isBytewiseValue(FirstStoredVal);
     else
       FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
@@ -635,7 +639,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
       Value *SecondSplatValue = nullptr;
       Constant *SecondPatternValue = nullptr;
 
-      if (ForMemset)
+      if (For == ForMemset::Yes)
         SecondSplatValue = isBytewiseValue(SecondStoredVal);
       else
         SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
@@ -644,10 +648,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
              "Expected either splat value or pattern value.");
 
       if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
-        if (ForMemset) {
+        if (For == ForMemset::Yes) {
+          if (isa<UndefValue>(FirstSplatValue))
+            FirstSplatValue = SecondSplatValue;
           if (FirstSplatValue != SecondSplatValue)
             continue;
         } else {
+          if (isa<UndefValue>(FirstPatternValue))
+            FirstPatternValue = SecondPatternValue;
           if (FirstPatternValue != SecondPatternValue)
             continue;
         }
@@ -772,12 +780,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
   // Get the location that may be stored across the loop.  Since the access is
   // strided positively through memory, we say that the modified location starts
   // at the pointer and has infinite size.
-  uint64_t AccessSize = MemoryLocation::UnknownSize;
+  LocationSize AccessSize = LocationSize::unknown();
 
   // If the loop iterates a fixed number of times, we can refine the access size
   // to be exactly the size of the memset, which is (BECount+1)*StoreSize
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
-    AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+    AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
+                                       StoreSize);
 
   // TODO: For this to be really effective, we have to dive into the pointer
   // operand in the store.  Store to &A[i] of 100 will always return may alias
@@ -1100,15 +1109,17 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
 }
 
 bool LoopIdiomRecognize::runOnNoncountableLoop() {
-  return recognizePopcount() || recognizeAndInsertCTLZ();
+  return recognizePopcount() || recognizeAndInsertFFS();
 }
 
 /// Check if the given conditional branch is based on the comparison between
-/// a variable and zero, and if the variable is non-zero, the control yields to
-/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparison is returned. This function will be called to see if the
-/// precondition and postcondition of the loop are in desirable form.
-static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
+/// true), the control yields to the loop entry. If the branch matches the
+/// behavior, the variable involved in the comparison is returned. This function
+/// will be called to see if the precondition and postcondition of the loop are
+/// in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
+                             bool JmpOnZero = false) {
   if (!BI || !BI->isConditional())
     return nullptr;
 
@@ -1120,9 +1131,14 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
   if (!CmpZero || !CmpZero->isZero())
     return nullptr;
 
+  BasicBlock *TrueSucc = BI->getSuccessor(0);
+  BasicBlock *FalseSucc = BI->getSuccessor(1);
+  if (JmpOnZero)
+    std::swap(TrueSucc, FalseSucc);
+
   ICmpInst::Predicate Pred = Cond->getPredicate();
-  if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
-      (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
+      (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
     return Cond->getOperand(0);
 
   return nullptr;
@@ -1298,14 +1314,14 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
 ///
 /// loop-exit:
 /// \endcode
-static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
-                            Instruction *&CntInst, PHINode *&CntPhi,
-                            Instruction *&DefX) {
+static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
+                                      Intrinsic::ID &IntrinID, Value *&InitX,
+                                      Instruction *&CntInst, PHINode *&CntPhi,
+                                      Instruction *&DefX) {
   BasicBlock *LoopEntry;
   Value *VarX = nullptr;
 
   DefX = nullptr;
-  PhiX = nullptr;
   CntInst = nullptr;
   CntPhi = nullptr;
   LoopEntry = *(CurLoop->block_begin());
@@ -1317,20 +1333,28 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
   else
     return false;
 
-  // step 2: detect instructions corresponding to "x.next = x >> 1"
-  if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
-                DefX->getOpcode() != Instruction::LShr))
+  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
+  if (!DefX || !DefX->isShift())
     return false;
+  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
+                                                     Intrinsic::ctlz;
   ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
   if (!Shft || !Shft->isOne())
     return false;
   VarX = DefX->getOperand(0);
 
   // step 3: Check the recurrence of variable X
-  PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
   if (!PhiX)
     return false;
 
+  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+  // Make sure the initial value can't be negative otherwise the ashr in the
+  // loop might never reach zero which would make the loop infinite.
+  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
+    return false;
+
   // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
   // TODO: We can skip the step. If loop trip count is known (CTLZ),
   //       then all uses of "cnt.next" could be optimized to the trip count
@@ -1362,17 +1386,25 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
   return true;
 }
 
-/// Recognize CTLZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ trip count).
-/// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
   // Give up if the loop has multiple blocks or multiple backedges.
   if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
     return false;
 
-  Instruction *CntInst, *DefX;
-  PHINode *CntPhi, *PhiX;
-  if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
+  Intrinsic::ID IntrinID;
+  Value *InitX;
+  Instruction *DefX = nullptr;
+  PHINode *CntPhi = nullptr;
+  Instruction *CntInst = nullptr;
+  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+  // this is always 6.
+  size_t IdiomCanonicalSize = 6;
+
+  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+                                 CntInst, CntPhi, DefX))
     return false;
 
   bool IsCntPhiUsedOutsideLoop = false;
@@ -1399,12 +1431,6 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
   // It is safe to assume Preheader exist as it was checked in
   // parent function RunOnLoop.
   BasicBlock *PH = CurLoop->getLoopPreheader();
-  Value *InitX = PhiX->getIncomingValueForBlock(PH);
-
-  // Make sure the initial value can't be negative otherwise the ashr in the
-  // loop might never reach zero which would make the loop infinite.
-  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
-    return false;
 
   // If we are using the count instruction outside the loop, make sure we
   // have a zero check as a precondition. Without the check the loop would run
@@ -1422,8 +1448,10 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
     ZeroCheck = true;
   }
 
-  // Check if CTLZ intrinsic is profitable. Assume it is always profitable
-  // if we delete the loop (the loop has only 6 instructions):
+  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+  // profitable if we delete the loop.
+
+  // the loop has only 6 instructions:
   //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
   //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
   //  %shr = ashr %n.addr.0, 1
@@ -1434,12 +1462,12 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
   const Value *Args[] =
       {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
                         : ConstantInt::getFalse(InitX->getContext())};
-  if (CurLoop->getHeader()->size() != 6 &&
-      TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
-          TargetTransformInfo::TCC_Basic)
+  if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+      TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
+        TargetTransformInfo::TCC_Basic)
     return false;
 
-  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
+  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
                            DefX->getDebugLoc(), ZeroCheck,
                            IsCntPhiUsedOutsideLoop);
   return true;
@@ -1508,20 +1536,21 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
   return CI;
 }
 
-static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
-                                     const DebugLoc &DL, bool ZeroCheck) {
+static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+                                    const DebugLoc &DL, bool ZeroCheck,
+                                    Intrinsic::ID IID) {
   Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
   Type *Tys[] = {Val->getType()};
 
   Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
-  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
+  Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
   CallInst *CI = IRBuilder.CreateCall(Func, Ops);
   CI->setDebugLoc(DL);
 
   return CI;
 }
 
-/// Transform the following loop:
+/// Transform the following loop (Using CTLZ, CTTZ is similar):
 /// loop:
 ///   CntPhi = PHI [Cnt0, CntInst]
 ///   PhiX = PHI [InitX, DefX]
@@ -1553,19 +1582,19 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
 /// If LOOP_BODY is empty the loop will be deleted.
 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
 void LoopIdiomRecognize::transformLoopToCountable(
-    BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
-    Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
-    bool IsCntPhiUsedOutsideLoop) {
+    Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
+    PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
+    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
-  // Step 1: Insert the CTLZ instruction at the end of the preheader block
-  //   Count = BitWidth - CTLZ(InitX);
-  // If there are uses of CntPhi create:
-  //   CountPrev = BitWidth - CTLZ(InitX >> 1);
+  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
   IRBuilder<> Builder(PreheaderBr);
   Builder.SetCurrentDebugLocation(DL);
-  Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
+  Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
 
+  //   Count = BitWidth - CTLZ(InitX);
+  // If there are uses of CntPhi create:
+  //   CountPrev = BitWidth - CTLZ(InitX >> 1);
   if (IsCntPhiUsedOutsideLoop) {
     if (DefX->getOpcode() == Instruction::AShr)
       InitXNext =
@@ -1573,29 +1602,30 @@ void LoopIdiomRecognize::transformLoopToCountable(
     else if (DefX->getOpcode() == Instruction::LShr)
       InitXNext =
           Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
+    else if (DefX->getOpcode() == Instruction::Shl) // cttz
+      InitXNext =
+          Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
     else
       llvm_unreachable("Unexpected opcode!");
   } else
     InitXNext = InitX;
-  CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
+  FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
   Count = Builder.CreateSub(
-      ConstantInt::get(CTLZ->getType(),
-                       CTLZ->getType()->getIntegerBitWidth()),
-      CTLZ);
+      ConstantInt::get(FFS->getType(),
+                       FFS->getType()->getIntegerBitWidth()),
+      FFS);
   if (IsCntPhiUsedOutsideLoop) {
     CountPrev = Count;
     Count = Builder.CreateAdd(
         CountPrev,
         ConstantInt::get(CountPrev->getType(), 1));
   }
-  if (IsCntPhiUsedOutsideLoop)
-    NewCount = Builder.CreateZExtOrTrunc(CountPrev,
-        cast<IntegerType>(CntInst->getType()));
-  else
-    NewCount = Builder.CreateZExtOrTrunc(Count,
-        cast<IntegerType>(CntInst->getType()));
 
-  // If the CTLZ counter's initial value is not zero, insert Add Inst.
+  NewCount = Builder.CreateZExtOrTrunc(
+                      IsCntPhiUsedOutsideLoop ? CountPrev : Count,
+                      cast<IntegerType>(CntInst->getType()));
+
+  // If the counter's initial value is not zero, insert Add Inst.
   Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
   ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
   if (!InitConst || !InitConst->isZero())
@@ -1631,8 +1661,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
   LbCond->setOperand(1, ConstantInt::get(Ty, 0));
 
   // Step 3: All the references to the original counter outside
-  //  the loop are replaced with the NewCount -- the value returned from
-  //  __builtin_ctlz(x).
+  //  the loop are replaced with the NewCount
   if (IsCntPhiUsedOutsideLoop)
     CntPhi->replaceUsesOutsideBlock(NewCount, Body);
   else
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 71859efbf4bd..6f7dc2429c09 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -22,8 +22,9 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DataLayout.h"
@@ -36,6 +37,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <utility>
@@ -47,8 +49,8 @@ using namespace llvm;
 STATISTIC(NumSimplified, "Number of redundant instructions simplified");
 
 static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
-                             AssumptionCache &AC,
-                             const TargetLibraryInfo &TLI) {
+                             AssumptionCache &AC, const TargetLibraryInfo &TLI,
+                             MemorySSAUpdater *MSSAU) {
   const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
   SimplifyQuery SQ(DL, &TLI, &DT, &AC);
 
@@ -75,9 +77,12 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
   // iterate.
   LoopBlocksRPO RPOT(&L);
   RPOT.perform(&LI);
+  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
 
   bool Changed = false;
   for (;;) {
+    if (MSSAU && VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
     for (BasicBlock *BB : RPOT) {
       for (Instruction &I : *BB) {
         if (auto *PI = dyn_cast<PHINode>(&I))
@@ -129,6 +134,12 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
             ToSimplify->insert(UserI);
         }
 
+        if (MSSAU)
+          if (Instruction *SimpleI = dyn_cast_or_null<Instruction>(V))
+            if (MemoryAccess *MA = MSSA->getMemoryAccess(&I))
+              if (MemoryAccess *ReplacementMA = MSSA->getMemoryAccess(SimpleI))
+                MA->replaceAllUsesWith(ReplacementMA);
+
         assert(I.use_empty() && "Should always have replaced all uses!");
         if (isInstructionTriviallyDead(&I, &TLI))
           DeadInsts.push_back(&I);
@@ -141,9 +152,12 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
     // iteration over all instructions in all the loop blocks.
     if (!DeadInsts.empty()) {
       Changed = true;
-      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI);
+      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU);
     }
 
+    if (MSSAU && VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
+
     // If we never found a PHI that needs to be simplified in the next
     // iteration, we're done.
     if (Next->empty())
@@ -180,8 +194,15 @@ public:
             *L->getHeader()->getParent());
     const TargetLibraryInfo &TLI =
         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    MemorySSA *MSSA = nullptr;
+    Optional<MemorySSAUpdater> MSSAU;
+    if (EnableMSSALoopDependency) {
+      MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+      MSSAU = MemorySSAUpdater(MSSA);
+    }
 
-    return simplifyLoopInst(*L, DT, LI, AC, TLI);
+    return simplifyLoopInst(*L, DT, LI, AC, TLI,
+                            MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -189,6 +210,10 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.setPreservesCFG();
+    if (EnableMSSALoopDependency) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     getLoopAnalysisUsage(AU);
   }
 };
@@ -198,7 +223,13 @@ public:
 PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
                                             LoopStandardAnalysisResults &AR,
                                             LPMUpdater &) {
-  if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI))
+  Optional<MemorySSAUpdater> MSSAU;
+  if (AR.MSSA) {
+    MSSAU = MemorySSAUpdater(AR.MSSA);
+    AR.MSSA->verifyMemorySSA();
+  }
+  if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
+                        MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
     return PreservedAnalyses::all();
 
   auto PA = getLoopPassPreservedAnalyses();
@@ -212,6 +243,7 @@ INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify",
                       "Simplify instructions in loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LoopInstSimplifyLegacyPass, "loop-instsimplify",
                     "Simplify instructions in loops", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 2978165ed8a9..766e39b439a0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,9 +17,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -271,7 +271,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
   return true;
 }
 
-static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
+static LoopVector populateWorklist(Loop &L) {
   LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
                     << L.getHeader()->getParent()->getName() << " Loop: %"
                     << L.getHeader()->getName() << '\n');
@@ -282,16 +282,15 @@ static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
     // The current loop has multiple subloops in it hence it is not tightly
     // nested.
     // Discard all loops above it added into Worklist.
-    if (Vec->size() != 1) {
-      LoopList.clear();
-      return;
-    }
+    if (Vec->size() != 1)
+      return {};
+
     LoopList.push_back(CurrentLoop);
     CurrentLoop = Vec->front();
     Vec = &CurrentLoop->getSubLoops();
   }
   LoopList.push_back(CurrentLoop);
-  V.push_back(std::move(LoopList));
+  return LoopList;
 }
 
 static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
@@ -327,10 +326,8 @@ namespace {
 class LoopInterchangeLegality {
 public:
   LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
-                          LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA,
                           OptimizationRemarkEmitter *ORE)
-      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
-        PreserveLCSSA(PreserveLCSSA), ORE(ORE) {}
+      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
 
   /// Check if the loops can be interchanged.
   bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -342,29 +339,33 @@ public:
 
   bool currentLimitations();
 
-  bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
+  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
+    return OuterInnerReductions;
+  }
 
 private:
   bool tightlyNested(Loop *Outer, Loop *Inner);
-  bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
-  bool areAllUsesReductions(Instruction *Ins, Loop *L);
-  bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
+  bool containsUnsafeInstructions(BasicBlock *BB);
+
+  /// Discover induction and reduction PHIs in the header of \p L. Induction
+  /// PHIs are added to \p Inductions, reductions are added to
+  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
+  /// to be passed as \p InnerLoop.
   bool findInductionAndReductions(Loop *L,
                                   SmallVector<PHINode *, 8> &Inductions,
-                                  SmallVector<PHINode *, 8> &Reductions);
+                                  Loop *InnerLoop);
 
   Loop *OuterLoop;
   Loop *InnerLoop;
 
   ScalarEvolution *SE;
-  LoopInfo *LI;
-  DominatorTree *DT;
-  bool PreserveLCSSA;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
-  bool InnerLoopHasReduction = false;
+  /// Set of reduction PHIs taking part of a reduction across the inner and
+  /// outer loop.
+  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
 };
 
 /// LoopInterchangeProfitability checks if it is profitable to interchange the
@@ -398,10 +399,9 @@ public:
   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
                            LoopInfo *LI, DominatorTree *DT,
                            BasicBlock *LoopNestExit,
-                           bool InnerLoopContainsReductions)
+                           const LoopInterchangeLegality &LIL)
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
-        LoopExit(LoopNestExit),
-        InnerLoopHasReduction(InnerLoopContainsReductions) {}
+        LoopExit(LoopNestExit), LIL(LIL) {}
 
   /// Interchange OuterLoop and InnerLoop.
   bool transform();
@@ -416,8 +416,6 @@ private:
   bool adjustLoopLinks();
   void adjustLoopPreheaders();
   bool adjustLoopBranches();
-  void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
-                           BasicBlock *NewPred);
 
   Loop *OuterLoop;
   Loop *InnerLoop;
@@ -428,41 +426,34 @@ private:
   LoopInfo *LI;
   DominatorTree *DT;
   BasicBlock *LoopExit;
-  bool InnerLoopHasReduction;
+
+  const LoopInterchangeLegality &LIL;
 };
 
 // Main LoopInterchange Pass.
-struct LoopInterchange : public FunctionPass {
+struct LoopInterchange : public LoopPass {
   static char ID;
   ScalarEvolution *SE = nullptr;
   LoopInfo *LI = nullptr;
   DependenceInfo *DI = nullptr;
   DominatorTree *DT = nullptr;
-  bool PreserveLCSSA;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
-  LoopInterchange() : FunctionPass(ID) {
+  LoopInterchange() : LoopPass(ID) {
     initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<ScalarEvolutionWrapperPass>();
-    AU.addRequired<AAResultsWrapperPass>();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DependenceAnalysisWrapperPass>();
-    AU.addRequiredID(LoopSimplifyID);
-    AU.addRequiredID(LCSSAID);
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
 
-    AU.addPreserved<DominatorTreeWrapperPass>();
-    AU.addPreserved<LoopInfoWrapperPass>();
+    getLoopAnalysisUsage(AU);
   }
 
-  bool runOnFunction(Function &F) override {
-    if (skipFunction(F))
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+    if (skipLoop(L) || L->getParentLoop())
       return false;
 
     SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
@@ -470,21 +461,8 @@ struct LoopInterchange : public FunctionPass {
     DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-    PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
-    // Build up a worklist of loop pairs to analyze.
-    SmallVector<LoopVector, 8> Worklist;
-
-    for (Loop *L : *LI)
-      populateWorklist(*L, Worklist);
 
-    LLVM_DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
-    bool Changed = true;
-    while (!Worklist.empty()) {
-      LoopVector LoopList = Worklist.pop_back_val();
-      Changed = processLoopList(LoopList, F);
-    }
-    return Changed;
+    return processLoopList(populateWorklist(*L));
   }
 
   bool isComputableLoopNest(LoopVector LoopList) {
@@ -512,7 +490,7 @@ struct LoopInterchange : public FunctionPass {
     return LoopList.size() - 1;
   }
 
-  bool processLoopList(LoopVector LoopList, Function &F) {
+  bool processLoopList(LoopVector LoopList) {
     bool Changed = false;
     unsigned LoopNestDepth = LoopList.size();
     if (LoopNestDepth < 2) {
@@ -580,8 +558,7 @@ struct LoopInterchange : public FunctionPass {
     Loop *InnerLoop = LoopList[InnerLoopId];
     Loop *OuterLoop = LoopList[OuterLoopId];
 
-    LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
-                                PreserveLCSSA, ORE);
+    LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
     if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
       LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
       return false;
@@ -600,8 +577,8 @@ struct LoopInterchange : public FunctionPass {
              << "Loop interchanged with enclosing loop.";
     });
 
-    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
-                                 LoopNestExit, LIL.hasInnerLoopReduction());
+    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
+                                 LIL);
     LIT.transform();
     LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
     LoopsInterchanged++;
@@ -611,42 +588,12 @@ struct LoopInterchange : public FunctionPass {
 
 } // end anonymous namespace
 
-bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
-  return llvm::none_of(Ins->users(), [=](User *U) -> bool {
-    auto *UserIns = dyn_cast<PHINode>(U);
-    RecurrenceDescriptor RD;
-    return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
+bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
+  return any_of(*BB, [](const Instruction &I) {
+    return I.mayHaveSideEffects() || I.mayReadFromMemory();
   });
 }
 
-bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
-    BasicBlock *BB) {
-  for (Instruction &I : *BB) {
-    // Load corresponding to reduction PHI's are safe while concluding if
-    // tightly nested.
-    if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
-      if (!areAllUsesReductions(L, InnerLoop))
-        return true;
-    } else if (I.mayHaveSideEffects() || I.mayReadFromMemory())
-      return true;
-  }
-  return false;
-}
-
-bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
-    BasicBlock *BB) {
-  for (Instruction &I : *BB) {
-    // Stores corresponding to reductions are safe while concluding if tightly
-    // nested.
-    if (StoreInst *L = dyn_cast<StoreInst>(&I)) {
-      if (!isa<PHINode>(L->getOperand(0)))
-        return true;
-    } else if (I.mayHaveSideEffects() || I.mayReadFromMemory())
-      return true;
-  }
-  return false;
-}
-
 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
@@ -662,15 +609,16 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
   if (!OuterLoopHeaderBI)
     return false;
 
-  for (BasicBlock *Succ : OuterLoopHeaderBI->successors())
-    if (Succ != InnerLoopPreHeader && Succ != OuterLoopLatch)
+  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
+    if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
+        Succ != OuterLoopLatch)
       return false;
 
   LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
   // We do not have any basic block in between now make sure the outer header
   // and outer loop latch doesn't contain any unsafe instructions.
-  if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
-      containsUnsafeInstructionsInLatch(OuterLoopLatch))
+  if (containsUnsafeInstructions(OuterLoopHeader) ||
+      containsUnsafeInstructions(OuterLoopLatch))
     return false;
 
   LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
@@ -702,9 +650,36 @@ bool LoopInterchangeLegality::isLoopStructureUnderstood(
   return true;
 }
 
+// If SV is a LCSSA PHI node with a single incoming value, return the incoming
+// value.
+static Value *followLCSSA(Value *SV) {
+  PHINode *PHI = dyn_cast<PHINode>(SV);
+  if (!PHI)
+    return SV;
+
+  if (PHI->getNumIncomingValues() != 1)
+    return SV;
+  return followLCSSA(PHI->getIncomingValue(0));
+}
+
+// Check V's users to see if it is involved in a reduction in L.
+static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+  for (Value *User : V->users()) {
+    if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+      if (PHI->getNumIncomingValues() == 1)
+        continue;
+      RecurrenceDescriptor RD;
+      if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+        return PHI;
+      return nullptr;
+    }
+  }
+
+  return nullptr;
+}
+
 bool LoopInterchangeLegality::findInductionAndReductions(
-    Loop *L, SmallVector<PHINode *, 8> &Inductions,
-    SmallVector<PHINode *, 8> &Reductions) {
+    Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
   if (!L->getLoopLatch() || !L->getLoopPredecessor())
     return false;
   for (PHINode &PHI : L->getHeader()->phis()) {
@@ -712,12 +687,33 @@ bool LoopInterchangeLegality::findInductionAndReductions(
     InductionDescriptor ID;
     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
       Inductions.push_back(&PHI);
-    else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD))
-      Reductions.push_back(&PHI);
     else {
-      LLVM_DEBUG(
-          dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
-      return false;
+      // PHIs in inner loops need to be part of a reduction in the outer loop,
+      // discovered when checking the PHIs of the outer loop earlier.
+      if (!InnerLoop) {
+        if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
+          LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
+                               "across the outer loop.\n");
+          return false;
+        }
+      } else {
+        assert(PHI.getNumIncomingValues() == 2 &&
+               "Phis in loop header should have exactly 2 incoming values");
+        // Check if we have a PHI node in the outer loop that has a reduction
+        // result from the inner loop as an incoming value.
+        Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
+        PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
+        if (!InnerRedPhi ||
+            !llvm::any_of(InnerRedPhi->incoming_values(),
+                          [&PHI](Value *V) { return V == &PHI; })) {
+          LLVM_DEBUG(
+              dbgs()
+              << "Failed to recognize PHI as an induction or reduction.\n");
+          return false;
+        }
+        OuterInnerReductions.insert(&PHI);
+        OuterInnerReductions.insert(InnerRedPhi);
+      }
     }
   }
   return true;
@@ -766,81 +762,64 @@ bool LoopInterchangeLegality::currentLimitations() {
 
   PHINode *InnerInductionVar;
   SmallVector<PHINode *, 8> Inductions;
-  SmallVector<PHINode *, 8> Reductions;
-  if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
+  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
     LLVM_DEBUG(
-        dbgs() << "Only inner loops with induction or reduction PHI nodes "
+        dbgs() << "Only outer loops with induction or reduction PHI nodes "
                << "are supported currently.\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
-                                      InnerLoop->getStartLoc(),
-                                      InnerLoop->getHeader())
-             << "Only inner loops with induction or reduction PHI nodes can be"
-                " interchange currently.";
+      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
+                                      OuterLoop->getStartLoc(),
+                                      OuterLoop->getHeader())
+             << "Only outer loops with induction or reduction PHI nodes can be"
+                " interchanged currently.";
     });
     return true;
   }
 
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
-    LLVM_DEBUG(
-        dbgs() << "We currently only support loops with 1 induction variable."
-               << "Failed to interchange due to current limitation\n");
+    LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
+                      << "supported currently.\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
-                                      InnerLoop->getStartLoc(),
-                                      InnerLoop->getHeader())
-             << "Only inner loops with 1 induction variable can be "
+      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
+                                      OuterLoop->getStartLoc(),
+                                      OuterLoop->getHeader())
+             << "Only outer loops with 1 induction variable can be "
                 "interchanged currently.";
     });
     return true;
   }
-  if (Reductions.size() > 0)
-    InnerLoopHasReduction = true;
 
-  InnerInductionVar = Inductions.pop_back_val();
-  Reductions.clear();
-  if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
+  Inductions.clear();
+  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
     LLVM_DEBUG(
-        dbgs() << "Only outer loops with induction or reduction PHI nodes "
+        dbgs() << "Only inner loops with induction or reduction PHI nodes "
                << "are supported currently.\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
-                                      OuterLoop->getStartLoc(),
-                                      OuterLoop->getHeader())
-             << "Only outer loops with induction or reduction PHI nodes can be"
-                " interchanged currently.";
+      return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
+                                      InnerLoop->getStartLoc(),
+                                      InnerLoop->getHeader())
+             << "Only inner loops with induction or reduction PHI nodes can be"
+                " interchange currently.";
     });
     return true;
   }
 
-  // Outer loop cannot have reduction because then loops will not be tightly
-  // nested.
-  if (!Reductions.empty()) {
-    LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported "
-                      << "currently.\n");
-    ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter",
-                                      OuterLoop->getStartLoc(),
-                                      OuterLoop->getHeader())
-             << "Outer loops with reductions cannot be interchangeed "
-                "currently.";
-    });
-    return true;
-  }
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
-    LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
-                      << "supported currently.\n");
+    LLVM_DEBUG(
+        dbgs() << "We currently only support loops with 1 induction variable."
+               << "Failed to interchange due to current limitation\n");
     ORE->emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
-                                      OuterLoop->getStartLoc(),
-                                      OuterLoop->getHeader())
-             << "Only outer loops with 1 induction variable can be "
+      return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
+                                      InnerLoop->getStartLoc(),
+                                      InnerLoop->getHeader())
+             << "Only inner loops with 1 induction variable can be "
                 "interchanged currently.";
     });
     return true;
   }
+  InnerInductionVar = Inductions.pop_back_val();
 
   // TODO: Triangular loops are not handled for now.
   if (!isLoopStructureUnderstood(InnerInductionVar)) {
@@ -1016,28 +995,6 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
         return false;
       }
 
-  // Create unique Preheaders if we already do not have one.
-  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
-  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
-
-  // Create  a unique outer preheader -
-  // 1) If OuterLoop preheader is not present.
-  // 2) If OuterLoop Preheader is same as OuterLoop Header
-  // 3) If OuterLoop Preheader is same as Header of the previous loop.
-  // 4) If OuterLoop Preheader is Entry node.
-  if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
-      isa<PHINode>(OuterLoopPreHeader->begin()) ||
-      !OuterLoopPreHeader->getUniquePredecessor()) {
-    OuterLoopPreHeader =
-        InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
-  }
-
-  if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
-      InnerLoopPreHeader == OuterLoop->getHeader()) {
-    InnerLoopPreHeader =
-        InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
-  }
-
   // TODO: The loops could not be interchanged due to current limitations in the
   // transform module.
   if (currentLimitations()) {
@@ -1258,6 +1215,10 @@ void LoopInterchangeTransform::restructureLoops(
   // outer loop.
   NewOuter->addBlockEntry(OrigOuterPreHeader);
   LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
+
+  // Tell SE that we move the loops around.
+  SE->forgetLoop(NewOuter);
+  SE->forgetLoop(NewInner);
 }
 
 bool LoopInterchangeTransform::transform() {
@@ -1319,9 +1280,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
                 FromBB->getTerminator()->getIterator());
 }
 
-void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
-                                                   BasicBlock *OldPred,
-                                                   BasicBlock *NewPred) {
+static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
+                                BasicBlock *NewPred) {
   for (PHINode &PHI : CurrBlock->phis()) {
     unsigned Num = PHI.getNumIncomingValues();
     for (unsigned i = 0; i < Num; ++i) {
@@ -1336,7 +1296,7 @@ void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
                             BasicBlock *NewBB,
                             std::vector<DominatorTree::UpdateType> &DTUpdates) {
-  assert(llvm::count_if(BI->successors(),
+  assert(llvm::count_if(successors(BI),
                         [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
          "BI must jump to OldBB at most once.");
   for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
@@ -1352,17 +1312,77 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
   }
 }
 
+// Move Lcssa PHIs to the right place.
+static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
+                          BasicBlock *OuterLatch) {
+  SmallVector<PHINode *, 8> LcssaInnerExit;
+  for (PHINode &P : InnerExit->phis())
+    LcssaInnerExit.push_back(&P);
+
+  SmallVector<PHINode *, 8> LcssaInnerLatch;
+  for (PHINode &P : InnerLatch->phis())
+    LcssaInnerLatch.push_back(&P);
+
+  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
+  // If a PHI node has users outside of InnerExit, it has a use outside the
+  // interchanged loop and we have to preserve it. We move these to
+  // InnerLatch, which will become the new exit block for the innermost
+  // loop after interchanging. For PHIs only used in InnerExit, we can just
+  // replace them with the incoming value.
+  for (PHINode *P : LcssaInnerExit) {
+    bool hasUsersOutside = false;
+    for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
+      Use &U = *UI;
+      ++UI;
+      auto *Usr = cast<Instruction>(U.getUser());
+      if (Usr->getParent() != InnerExit) {
+        hasUsersOutside = true;
+        continue;
+      }
+      U.set(P->getIncomingValueForBlock(InnerLatch));
+    }
+    if (hasUsersOutside)
+      P->moveBefore(InnerLatch->getFirstNonPHI());
+    else
+      P->eraseFromParent();
+  }
+
+  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
+  // and we have to move them to the new inner latch.
+  for (PHINode *P : LcssaInnerLatch)
+    P->moveBefore(InnerExit->getFirstNonPHI());
+
+  // Now adjust the incoming blocks for the LCSSA PHIs.
+  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
+  // with the new latch.
+  updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
+}
+
 bool LoopInterchangeTransform::adjustLoopBranches() {
   LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
   std::vector<DominatorTree::UpdateType> DTUpdates;
 
+  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+
+  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
+         InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
+         InnerLoopPreHeader && "Guaranteed by loop-simplify form");
+  // Ensure that both preheaders do not contain PHI nodes and have single
+  // predecessors. This allows us to move them easily. We use
+  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
+  // preheaders do not satisfy those conditions.
+  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
+      !OuterLoopPreHeader->getUniquePredecessor())
+    OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
+  if (InnerLoopPreHeader == OuterLoop->getHeader())
+    InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
+
   // Adjust the loop preheader
   BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
-  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
-  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
   BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
   BasicBlock *InnerLoopLatchPredecessor =
       InnerLoopLatch->getUniquePredecessor();
@@ -1417,17 +1437,6 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
                   InnerLoopLatchSuccessor, DTUpdates);
 
-  // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
-  // the value and remove this PHI node from inner loop.
-  SmallVector<PHINode *, 8> LcssaVec;
-  for (PHINode &P : InnerLoopLatchSuccessor->phis())
-    LcssaVec.push_back(&P);
-
-  for (PHINode *P : LcssaVec) {
-    Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
-    P->replaceAllUsesWith(Incoming);
-    P->eraseFromParent();
-  }
 
   if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
@@ -1439,12 +1448,15 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
                   DTUpdates);
 
-  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
-
   DT->applyUpdates(DTUpdates);
   restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
                    OuterLoopPreHeader);
 
+  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
+  // For PHIs in the exit block of the outer loop, outer's latch has been
+  // replaced by Inners'.
+  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+
   // Now update the reduction PHIs in the inner and outer loop headers.
   SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
   for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
@@ -1452,26 +1464,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
     OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
 
-  for (PHINode *PHI : OuterLoopPHIs)
-    PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
+  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
+  (void)OuterInnerReductions;
 
-  // Move the PHI nodes from the inner loop header to the outer loop header.
-  // We have to deal with one kind of PHI nodes:
-  //  1) PHI nodes that are part of inner loop-only reductions.
-  // We only have to move the PHI node and update the incoming blocks.
+  // Now move the remaining reduction PHIs from outer to inner loop header and
+  // vice versa. The PHI nodes must be part of a reduction across the inner and
+  // outer loop and all the remains to do is and updating the incoming blocks.
+  for (PHINode *PHI : OuterLoopPHIs) {
+    PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
+    assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+           "Expected a reduction PHI node");
+  }
   for (PHINode *PHI : InnerLoopPHIs) {
     PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
-    for (BasicBlock *InBB : PHI->blocks()) {
-      if (InnerLoop->contains(InBB))
-        continue;
-
-      assert(!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) &&
-             "Unexpected incoming PHI node, reductions in outer loop are not "
-             "supported yet");
-      PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB));
-      PHI->eraseFromParent();
-      break;
-    }
+    assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+           "Expected a reduction PHI node");
   }
 
   // Update the incoming blocks for moved PHI nodes.
@@ -1514,13 +1521,8 @@ char LoopInterchange::ID = 0;
 
 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
                       "Interchanges loops for cache reuse", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 
 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 10f6fcdcfdb7..774ad7b945a0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -30,12 +30,26 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
   if (DebugLogging)
     dbgs() << "Starting Loop pass manager run.\n";
 
+  // Request PassInstrumentation from analysis manager, will use it to run
+  // instrumenting callbacks for the passes later.
+  PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(L, AR);
   for (auto &Pass : Passes) {
     if (DebugLogging)
       dbgs() << "Running pass: " << Pass->name() << " on " << L;
 
+    // Check the PassInstrumentation's BeforePass callbacks before running the
+    // pass, skip its execution completely if asked to (callback returns false).
+    if (!PI.runBeforePass<Loop>(*Pass, L))
+      continue;
+
     PreservedAnalyses PassPA = Pass->run(L, AM, AR, U);
 
+    // do not pass deleted Loop into the instrumentation
+    if (U.skipCurrentLoop())
+      PI.runAfterPassInvalidated<Loop>(*Pass);
+    else
+      PI.runAfterPass<Loop>(*Pass, L);
+
     // If the loop was deleted, abort the run and return to the outer walk.
     if (U.skipCurrentLoop()) {
       PA.intersect(std::move(PassPA));
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index cbb6594cf8f4..5983c804c0c1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -178,7 +178,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -196,6 +198,9 @@
 
 #define DEBUG_TYPE "loop-predication"
 
+STATISTIC(TotalConsidered, "Number of guards considered");
+STATISTIC(TotalWidened, "Number of checks widened");
+
 using namespace llvm;
 
 static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
@@ -574,6 +579,8 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
   LLVM_DEBUG(dbgs() << "Processing guard:\n");
   LLVM_DEBUG(Guard->dump());
 
+  TotalConsidered++;
+
   IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
 
   // The guard condition is expected to be in form of:
@@ -615,6 +622,8 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
   if (NumWidened == 0)
     return false;
 
+  TotalWidened += NumWidened;
+
   // Emit the new guard condition
   Builder.SetInsertPoint(Guard);
   Value *LastCheck = nullptr;
@@ -812,9 +821,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
   SmallVector<IntrinsicInst *, 4> Guards;
   for (const auto BB : L->blocks())
     for (auto &I : *BB)
-      if (auto *II = dyn_cast<IntrinsicInst>(&I))
-        if (II->getIntrinsicID() == Intrinsic::experimental_guard)
-          Guards.push_back(II);
+      if (isGuard(&I))
+        Guards.push_back(cast<IntrinsicInst>(&I));
 
   if (Guards.empty())
     return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index eeaad39dc1d1..fd22128f7fe6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,6 +15,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Support/Debug.h"
@@ -40,12 +42,19 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
   const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
   const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
 
-  bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ,
-                              false, Threshold, false);
+  Optional<MemorySSAUpdater> MSSAU;
+  if (AR.MSSA)
+    MSSAU = MemorySSAUpdater(AR.MSSA);
+  bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+                              MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+                              SQ, false, Threshold, false);
 
   if (!Changed)
     return PreservedAnalyses::all();
 
+  if (AR.MSSA && VerifyMemorySSA)
+    AR.MSSA->verifyMemorySSA();
+
   return getLoopPassPreservedAnalyses();
 }
 
@@ -68,6 +77,10 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    if (EnableMSSALoopDependency) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     getLoopAnalysisUsage(AU);
   }
 
@@ -84,8 +97,14 @@ public:
     auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
     auto *SE = SEWP ? &SEWP->getSE() : nullptr;
     const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
-    return LoopRotation(L, LI, TTI, AC, DT, SE, SQ, false, MaxHeaderSize,
-                        false);
+    Optional<MemorySSAUpdater> MSSAU;
+    if (EnableMSSALoopDependency) {
+      MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+      MSSAU = MemorySSAUpdater(MSSA);
+    }
+    return LoopRotation(L, LI, TTI, AC, DT, SE,
+                        MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
+                        false, MaxHeaderSize, false);
   }
 };
 }
@@ -96,6 +115,7 @@ INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
                     false)
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 2b83d3dc5f1b..2e5927f9a068 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,9 +24,12 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -38,9 +41,527 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-simplifycfg"
 
-static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
-                            ScalarEvolution &SE) {
+static cl::opt<bool> EnableTermFolding("enable-loop-simplifycfg-term-folding",
+                                       cl::init(false));
+
+STATISTIC(NumTerminatorsFolded,
+          "Number of terminators folded to unconditional branches");
+STATISTIC(NumLoopBlocksDeleted,
+          "Number of loop blocks deleted");
+STATISTIC(NumLoopExitsDeleted,
+          "Number of loop exiting edges deleted");
+
+/// If \p BB is a switch or a conditional branch, but only one of its successors
+/// can be reached from this block in runtime, return this successor. Otherwise,
+/// return nullptr.
+static BasicBlock *getOnlyLiveSuccessor(BasicBlock *BB) {
+  Instruction *TI = BB->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (BI->isUnconditional())
+      return nullptr;
+    if (BI->getSuccessor(0) == BI->getSuccessor(1))
+      return BI->getSuccessor(0);
+    ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+    if (!Cond)
+      return nullptr;
+    return Cond->isZero() ? BI->getSuccessor(1) : BI->getSuccessor(0);
+  }
+
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    auto *CI = dyn_cast<ConstantInt>(SI->getCondition());
+    if (!CI)
+      return nullptr;
+    for (auto Case : SI->cases())
+      if (Case.getCaseValue() == CI)
+        return Case.getCaseSuccessor();
+    return SI->getDefaultDest();
+  }
+
+  return nullptr;
+}
+
+namespace {
+/// Helper class that can turn branches and switches with constant conditions
+/// into unconditional branches.
+class ConstantTerminatorFoldingImpl {
+private:
+  Loop &L;
+  LoopInfo &LI;
+  DominatorTree &DT;
+  ScalarEvolution &SE;
+  MemorySSAUpdater *MSSAU;
+
+  // Whether or not the current loop has irreducible CFG.
+  bool HasIrreducibleCFG = false;
+  // Whether or not the current loop will still exist after terminator constant
+  // folding will be done. In theory, there are two ways how it can happen:
+  // 1. Loop's latch(es) become unreachable from loop header;
+  // 2. Loop's header becomes unreachable from method entry.
+  // In practice, the second situation is impossible because we only modify the
+  // current loop and its preheader and do not affect preheader's reachibility
+  // from any other block. So this variable set to true means that loop's latch
+  // has become unreachable from loop header.
+  bool DeleteCurrentLoop = false;
+
+  // The blocks of the original loop that will still be reachable from entry
+  // after the constant folding.
+  SmallPtrSet<BasicBlock *, 8> LiveLoopBlocks;
+  // The blocks of the original loop that will become unreachable from entry
+  // after the constant folding.
+  SmallVector<BasicBlock *, 8> DeadLoopBlocks;
+  // The exits of the original loop that will still be reachable from entry
+  // after the constant folding.
+  SmallPtrSet<BasicBlock *, 8> LiveExitBlocks;
+  // The exits of the original loop that will become unreachable from entry
+  // after the constant folding.
+  SmallVector<BasicBlock *, 8> DeadExitBlocks;
+  // The blocks that will still be a part of the current loop after folding.
+  SmallPtrSet<BasicBlock *, 8> BlocksInLoopAfterFolding;
+  // The blocks that have terminators with constant condition that can be
+  // folded. Note: fold candidates should be in L but not in any of its
+  // subloops to avoid complex LI updates.
+  SmallVector<BasicBlock *, 8> FoldCandidates;
+
+  void dump() const {
+    dbgs() << "Constant terminator folding for loop " << L << "\n";
+    dbgs() << "After terminator constant-folding, the loop will";
+    if (!DeleteCurrentLoop)
+      dbgs() << " not";
+    dbgs() << " be destroyed\n";
+    auto PrintOutVector = [&](const char *Message,
+                           const SmallVectorImpl<BasicBlock *> &S) {
+      dbgs() << Message << "\n";
+      for (const BasicBlock *BB : S)
+        dbgs() << "\t" << BB->getName() << "\n";
+    };
+    auto PrintOutSet = [&](const char *Message,
+                           const SmallPtrSetImpl<BasicBlock *> &S) {
+      dbgs() << Message << "\n";
+      for (const BasicBlock *BB : S)
+        dbgs() << "\t" << BB->getName() << "\n";
+    };
+    PrintOutVector("Blocks in which we can constant-fold terminator:",
+                   FoldCandidates);
+    PrintOutSet("Live blocks from the original loop:", LiveLoopBlocks);
+    PrintOutVector("Dead blocks from the original loop:", DeadLoopBlocks);
+    PrintOutSet("Live exit blocks:", LiveExitBlocks);
+    PrintOutVector("Dead exit blocks:", DeadExitBlocks);
+    if (!DeleteCurrentLoop)
+      PrintOutSet("The following blocks will still be part of the loop:",
+                  BlocksInLoopAfterFolding);
+  }
+
+  /// Whether or not the current loop has irreducible CFG.
+  bool hasIrreducibleCFG(LoopBlocksDFS &DFS) {
+    assert(DFS.isComplete() && "DFS is expected to be finished");
+    // Index of a basic block in RPO traversal.
+    DenseMap<const BasicBlock *, unsigned> RPO;
+    unsigned Current = 0;
+    for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I)
+      RPO[*I] = Current++;
+
+    for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      for (auto *Succ : successors(BB))
+        if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ])
+          // If an edge goes from a block with greater order number into a block
+          // with lesses number, and it is not a loop backedge, then it can only
+          // be a part of irreducible non-loop cycle.
+          return true;
+    }
+    return false;
+  }
+
+  /// Fill all information about status of blocks and exits of the current loop
+  /// if constant folding of all branches will be done.
+  void analyze() {
+    LoopBlocksDFS DFS(&L);
+    DFS.perform(&LI);
+    assert(DFS.isComplete() && "DFS is expected to be finished");
+
+    // TODO: The algorithm below relies on both RPO and Postorder traversals.
+    // When the loop has only reducible CFG inside, then the invariant "all
+    // predecessors of X are processed before X in RPO" is preserved. However
+    // an irreducible loop can break this invariant (e.g. latch does not have to
+    // be the last block in the traversal in this case, and the algorithm relies
+    // on this). We can later decide to support such cases by altering the
+    // algorithms, but so far we just give up analyzing them.
+    if (hasIrreducibleCFG(DFS)) {
+      HasIrreducibleCFG = true;
+      return;
+    }
+
+    // Collect live and dead loop blocks and exits.
+    LiveLoopBlocks.insert(L.getHeader());
+    for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) {
+      BasicBlock *BB = *I;
+
+      // If a loop block wasn't marked as live so far, then it's dead.
+      if (!LiveLoopBlocks.count(BB)) {
+        DeadLoopBlocks.push_back(BB);
+        continue;
+      }
+
+      BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
+
+      // If a block has only one live successor, it's a candidate on constant
+      // folding. Only handle blocks from current loop: branches in child loops
+      // are skipped because if they can be folded, they should be folded during
+      // the processing of child loops.
+      if (TheOnlySucc && LI.getLoopFor(BB) == &L)
+        FoldCandidates.push_back(BB);
+
+      // Handle successors.
+      for (BasicBlock *Succ : successors(BB))
+        if (!TheOnlySucc || TheOnlySucc == Succ) {
+          if (L.contains(Succ))
+            LiveLoopBlocks.insert(Succ);
+          else
+            LiveExitBlocks.insert(Succ);
+        }
+    }
+
+    // Sanity check: amount of dead and live loop blocks should match the total
+    // number of blocks in loop.
+    assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() &&
+           "Malformed block sets?");
+
+    // Now, all exit blocks that are not marked as live are dead.
+    SmallVector<BasicBlock *, 8> ExitBlocks;
+    L.getExitBlocks(ExitBlocks);
+    for (auto *ExitBlock : ExitBlocks)
+      if (!LiveExitBlocks.count(ExitBlock))
+        DeadExitBlocks.push_back(ExitBlock);
+
+    // Whether or not the edge From->To will still be present in graph after the
+    // folding.
+    auto IsEdgeLive = [&](BasicBlock *From, BasicBlock *To) {
+      if (!LiveLoopBlocks.count(From))
+        return false;
+      BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(From);
+      return !TheOnlySucc || TheOnlySucc == To;
+    };
+
+    // The loop will not be destroyed if its latch is live.
+    DeleteCurrentLoop = !IsEdgeLive(L.getLoopLatch(), L.getHeader());
+
+    // If we are going to delete the current loop completely, no extra analysis
+    // is needed.
+    if (DeleteCurrentLoop)
+      return;
+
+    // Otherwise, we should check which blocks will still be a part of the
+    // current loop after the transform.
+    BlocksInLoopAfterFolding.insert(L.getLoopLatch());
+    // If the loop is live, then we should compute what blocks are still in
+    // loop after all branch folding has been done. A block is in loop if
+    // it has a live edge to another block that is in the loop; by definition,
+    // latch is in the loop.
+    auto BlockIsInLoop = [&](BasicBlock *BB) {
+      return any_of(successors(BB), [&](BasicBlock *Succ) {
+        return BlocksInLoopAfterFolding.count(Succ) && IsEdgeLive(BB, Succ);
+      });
+    };
+    for (auto I = DFS.beginPostorder(), E = DFS.endPostorder(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      if (BlockIsInLoop(BB))
+        BlocksInLoopAfterFolding.insert(BB);
+    }
+
+    // Sanity check: header must be in loop.
+    assert(BlocksInLoopAfterFolding.count(L.getHeader()) &&
+           "Header not in loop?");
+    assert(BlocksInLoopAfterFolding.size() <= LiveLoopBlocks.size() &&
+           "All blocks that stay in loop should be live!");
+  }
+
+  /// We need to preserve static reachibility of all loop exit blocks (this is)
+  /// required by loop pass manager. In order to do it, we make the following
+  /// trick:
+  ///
+  ///  preheader:
+  ///    <preheader code>
+  ///    br label %loop_header
+  ///
+  ///  loop_header:
+  ///    ...
+  ///    br i1 false, label %dead_exit, label %loop_block
+  ///    ...
+  ///
+  /// We cannot simply remove edge from the loop to dead exit because in this
+  /// case dead_exit (and its successors) may become unreachable. To avoid that,
+  /// we insert the following fictive preheader:
+  ///
+  ///  preheader:
+  ///    <preheader code>
+  ///    switch i32 0, label %preheader-split,
+  ///                  [i32 1, label %dead_exit_1],
+  ///                  [i32 2, label %dead_exit_2],
+  ///                  ...
+  ///                  [i32 N, label %dead_exit_N],
+  ///
+  ///  preheader-split:
+  ///    br label %loop_header
+  ///
+  ///  loop_header:
+  ///    ...
+  ///    br i1 false, label %dead_exit_N, label %loop_block
+  ///    ...
+  ///
+  /// Doing so, we preserve static reachibility of all dead exits and can later
+  /// remove edges from the loop to these blocks.
+  void handleDeadExits() {
+    // If no dead exits, nothing to do.
+    if (DeadExitBlocks.empty())
+      return;
+
+    // Construct split preheader and the dummy switch to thread edges from it to
+    // dead exits.
+    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+    BasicBlock *Preheader = L.getLoopPreheader();
+    BasicBlock *NewPreheader = Preheader->splitBasicBlock(
+        Preheader->getTerminator(),
+        Twine(Preheader->getName()).concat("-split"));
+    DTU.deleteEdge(Preheader, L.getHeader());
+    DTU.insertEdge(NewPreheader, L.getHeader());
+    DTU.insertEdge(Preheader, NewPreheader);
+    IRBuilder<> Builder(Preheader->getTerminator());
+    SwitchInst *DummySwitch =
+        Builder.CreateSwitch(Builder.getInt32(0), NewPreheader);
+    Preheader->getTerminator()->eraseFromParent();
+
+    unsigned DummyIdx = 1;
+    for (BasicBlock *BB : DeadExitBlocks) {
+      SmallVector<Instruction *, 4> DeadPhis;
+      for (auto &PN : BB->phis())
+        DeadPhis.push_back(&PN);
+
+      // Eliminate all Phis from dead exits.
+      for (Instruction *PN : DeadPhis) {
+        PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+        PN->eraseFromParent();
+      }
+      assert(DummyIdx != 0 && "Too many dead exits!");
+      DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
+      DTU.insertEdge(Preheader, BB);
+      ++NumLoopExitsDeleted;
+    }
+
+    assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
+    if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
+      OuterLoop->addBasicBlockToLoop(NewPreheader, LI);
+
+      // When we break dead edges, the outer loop may become unreachable from
+      // the current loop. We need to fix loop info accordingly. For this, we
+      // find the most nested loop that still contains L and remove L from all
+      // loops that are inside of it.
+      Loop *StillReachable = nullptr;
+      for (BasicBlock *BB : LiveExitBlocks) {
+        Loop *BBL = LI.getLoopFor(BB);
+        if (BBL && BBL->contains(L.getHeader()))
+          if (!StillReachable ||
+              BBL->getLoopDepth() > StillReachable->getLoopDepth())
+            StillReachable = BBL;
+      }
+
+      // Okay, our loop is no longer in the outer loop (and maybe not in some of
+      // its parents as well). Make the fixup.
+      if (StillReachable != OuterLoop) {
+        LI.changeLoopFor(NewPreheader, StillReachable);
+        for (Loop *NotContaining = OuterLoop; NotContaining != StillReachable;
+             NotContaining = NotContaining->getParentLoop()) {
+          NotContaining->removeBlockFromLoop(NewPreheader);
+          for (auto *BB : L.blocks())
+            NotContaining->removeBlockFromLoop(BB);
+        }
+        OuterLoop->removeChildLoop(&L);
+        if (StillReachable)
+          StillReachable->addChildLoop(&L);
+        else
+          LI.addTopLevelLoop(&L);
+      }
+    }
+  }
+
+  /// Delete loop blocks that have become unreachable after folding. Make all
+  /// relevant updates to DT and LI.
+  void deleteDeadLoopBlocks() {
+    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+    if (MSSAU) {
+      SmallPtrSet<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
+                                                     DeadLoopBlocks.end());
+      MSSAU->removeBlocks(DeadLoopBlocksSet);
+    }
+    for (auto *BB : DeadLoopBlocks) {
+      assert(BB != L.getHeader() &&
+             "Header of the current loop cannot be dead!");
+      LLVM_DEBUG(dbgs() << "Deleting dead loop block " << BB->getName()
+                        << "\n");
+      if (LI.isLoopHeader(BB)) {
+        assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
+        LI.erase(LI.getLoopFor(BB));
+      }
+      LI.removeBlock(BB);
+      DeleteDeadBlock(BB, &DTU);
+      ++NumLoopBlocksDeleted;
+    }
+  }
+
+  /// Constant-fold terminators of blocks acculumated in FoldCandidates into the
+  /// unconditional branches.
+  void foldTerminators() {
+    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+
+    for (BasicBlock *BB : FoldCandidates) {
+      assert(LI.getLoopFor(BB) == &L && "Should be a loop block!");
+      BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
+      assert(TheOnlySucc && "Should have one live successor!");
+
+      LLVM_DEBUG(dbgs() << "Replacing terminator of " << BB->getName()
+                        << " with an unconditional branch to the block "
+                        << TheOnlySucc->getName() << "\n");
+
+      SmallPtrSet<BasicBlock *, 2> DeadSuccessors;
+      // Remove all BB's successors except for the live one.
+      unsigned TheOnlySuccDuplicates = 0;
+      for (auto *Succ : successors(BB))
+        if (Succ != TheOnlySucc) {
+          DeadSuccessors.insert(Succ);
+          // If our successor lies in a different loop, we don't want to remove
+          // the one-input Phi because it is a LCSSA Phi.
+          bool PreserveLCSSAPhi = !L.contains(Succ);
+          Succ->removePredecessor(BB, PreserveLCSSAPhi);
+          if (MSSAU)
+            MSSAU->removeEdge(BB, Succ);
+        } else
+          ++TheOnlySuccDuplicates;
+
+      assert(TheOnlySuccDuplicates > 0 && "Should be!");
+      // If TheOnlySucc was BB's successor more than once, after transform it
+      // will be its successor only once. Remove redundant inputs from
+      // TheOnlySucc's Phis.
+      bool PreserveLCSSAPhi = !L.contains(TheOnlySucc);
+      for (unsigned Dup = 1; Dup < TheOnlySuccDuplicates; ++Dup)
+        TheOnlySucc->removePredecessor(BB, PreserveLCSSAPhi);
+      if (MSSAU && TheOnlySuccDuplicates > 1)
+        MSSAU->removeDuplicatePhiEdgesBetween(BB, TheOnlySucc);
+
+      IRBuilder<> Builder(BB->getContext());
+      Instruction *Term = BB->getTerminator();
+      Builder.SetInsertPoint(Term);
+      Builder.CreateBr(TheOnlySucc);
+      Term->eraseFromParent();
+
+      for (auto *DeadSucc : DeadSuccessors)
+        DTU.deleteEdge(BB, DeadSucc);
+
+      ++NumTerminatorsFolded;
+    }
+  }
+
+public:
+  ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT,
+                                ScalarEvolution &SE,
+                                MemorySSAUpdater *MSSAU)
+      : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU) {}
+  bool run() {
+    assert(L.getLoopLatch() && "Should be single latch!");
+
+    // Collect all available information about status of blocks after constant
+    // folding.
+    analyze();
+
+    LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
+                      << ": ");
+
+    if (HasIrreducibleCFG) {
+      LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n");
+      return false;
+    }
+
+    // Nothing to constant-fold.
+    if (FoldCandidates.empty()) {
+      LLVM_DEBUG(
+          dbgs() << "No constant terminator folding candidates found in loop "
+                 << L.getHeader()->getName() << "\n");
+      return false;
+    }
+
+    // TODO: Support deletion of the current loop.
+    if (DeleteCurrentLoop) {
+      LLVM_DEBUG(
+          dbgs()
+          << "Give up constant terminator folding in loop "
+          << L.getHeader()->getName()
+          << ": we don't currently support deletion of the current loop.\n");
+      return false;
+    }
+
+    // TODO: Support blocks that are not dead, but also not in loop after the
+    // folding.
+    if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() !=
+        L.getNumBlocks()) {
+      LLVM_DEBUG(
+          dbgs() << "Give up constant terminator folding in loop "
+                 << L.getHeader()->getName()
+                 << ": we don't currently"
+                    " support blocks that are not dead, but will stop "
+                    "being a part of the loop after constant-folding.\n");
+      return false;
+    }
+
+    SE.forgetTopmostLoop(&L);
+    // Dump analysis results.
+    LLVM_DEBUG(dump());
+
+    LLVM_DEBUG(dbgs() << "Constant-folding " << FoldCandidates.size()
+                      << " terminators in loop " << L.getHeader()->getName()
+                      << "\n");
+
+    // Make the actual transforms.
+    handleDeadExits();
+    foldTerminators();
+
+    if (!DeadLoopBlocks.empty()) {
+      LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size()
+                    << " dead blocks in loop " << L.getHeader()->getName()
+                    << "\n");
+      deleteDeadLoopBlocks();
+    }
+
+#ifndef NDEBUG
+    // Make sure that we have preserved all data structures after the transform.
+    DT.verify();
+    assert(DT.isReachableFromEntry(L.getHeader()));
+    LI.verify(DT);
+#endif
+
+    return true;
+  }
+};
+} // namespace
+
+/// Turn branches and switches with known constant conditions into unconditional
+/// branches.
+static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
+                                    ScalarEvolution &SE,
+                                    MemorySSAUpdater *MSSAU) {
+  if (!EnableTermFolding)
+    return false;
+
+  // To keep things simple, only process loops with single latch. We
+  // canonicalize most loops to this form. We can support multi-latch if needed.
+  if (!L.getLoopLatch())
+    return false;
+
+  ConstantTerminatorFoldingImpl BranchFolder(L, LI, DT, SE, MSSAU);
+  return BranchFolder.run();
+}
+
+static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
+                                        LoopInfo &LI, MemorySSAUpdater *MSSAU) {
   bool Changed = false;
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   // Copy blocks into a temporary array to avoid iterator invalidation issues
   // as we remove them.
   SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());
@@ -57,19 +578,38 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
       continue;
 
     // Merge Succ into Pred and delete it.
-    MergeBlockIntoPredecessor(Succ, &DT, &LI);
+    MergeBlockIntoPredecessor(Succ, &DTU, &LI, MSSAU);
 
-    SE.forgetLoop(&L);
     Changed = true;
   }
 
   return Changed;
 }
 
+static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
+                            ScalarEvolution &SE, MemorySSAUpdater *MSSAU) {
+  bool Changed = false;
+
+  // Constant-fold terminators with known constant conditions.
+  Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU);
+
+  // Eliminate unconditional branches by merging blocks into their predecessors.
+  Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU);
+
+  if (Changed)
+    SE.forgetTopmostLoop(&L);
+
+  return Changed;
+}
+
 PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
                                            LoopStandardAnalysisResults &AR,
                                            LPMUpdater &) {
-  if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE))
+  Optional<MemorySSAUpdater> MSSAU;
+  if (EnableMSSALoopDependency && AR.MSSA)
+    MSSAU = MemorySSAUpdater(AR.MSSA);
+  if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
+                       MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
     return PreservedAnalyses::all();
 
   return getLoopPassPreservedAnalyses();
@@ -90,10 +630,22 @@ public:
     DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-    return simplifyLoopCFG(*L, DT, LI, SE);
+    Optional<MemorySSAUpdater> MSSAU;
+    if (EnableMSSALoopDependency) {
+      MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+      MSSAU = MemorySSAUpdater(MSSA);
+      if (VerifyMemorySSA)
+        MSSA->verifyMemorySSA();
+    }
+    return simplifyLoopCFG(*L, DT, LI, SE,
+                           MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    if (EnableMSSALoopDependency) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     AU.addPreserved<DependenceAnalysisWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
@@ -104,6 +656,7 @@ char LoopSimplifyCFGLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
                       "Simplify loop CFG", false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(LoopSimplifyCFGLegacyPass, "loop-simplifycfg",
                     "Simplify loop CFG", false, false)
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 7d62349d4719..2f7ad2126ed3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -202,17 +202,22 @@ static bool sinkInstruction(Loop &L, Instruction &I,
   if (BBsToSinkInto.empty())
     return false;
 
+  // Return if any of the candidate blocks to sink into is non-cold.
+  if (BBsToSinkInto.size() > 1) {
+    for (auto *BB : BBsToSinkInto)
+      if (!LoopBlockNumber.count(BB))
+        return false;
+  }
+
   // Copy the final BBs into a vector and sort them using the total ordering
   // of the loop block numbers as iterating the set doesn't give a useful
   // order. No need to stable sort as the block numbers are a total ordering.
   SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
   SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(),
                              BBsToSinkInto.end());
-  llvm::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(),
-             [&](BasicBlock *A, BasicBlock *B) {
-               return LoopBlockNumber.find(A)->second <
-                      LoopBlockNumber.find(B)->second;
-             });
+  llvm::sort(SortedBBsToSinkInto, [&](BasicBlock *A, BasicBlock *B) {
+    return LoopBlockNumber.find(A)->second < LoopBlockNumber.find(B)->second;
+  });
 
   BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
   // FIXME: Optimize the efficiency for cloned value replacement. The current
@@ -275,6 +280,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
   // Compute alias set.
   for (BasicBlock *BB : L.blocks())
     CurAST.add(*BB);
+  CurAST.add(*Preheader);
 
   // Sort loop's basic blocks by frequency
   SmallVector<BasicBlock *, 10> ColdLoopBBs;
@@ -298,7 +304,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
     // No need to check for instruction's operands are loop invariant.
     assert(L.hasLoopInvariantOperands(I) &&
            "Insts in a loop's preheader should have loop invariant operands!");
-    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
+    if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
       continue;
     if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
       Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fa83b48210bc..773ffb9df0a2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -155,6 +155,11 @@ static cl::opt<bool> FilterSameScaledReg(
     cl::desc("Narrow LSR search space by filtering non-optimal formulae"
              " with the same ScaledReg and Scale"));
 
+static cl::opt<unsigned> ComplexityLimit(
+  "lsr-complexity-limit", cl::Hidden,
+  cl::init(std::numeric_limits<uint16_t>::max()),
+  cl::desc("LSR search space complexity limit"));
+
 #ifndef NDEBUG
 // Stress test IV chain generation.
 static cl::opt<bool> StressIVChain(
@@ -1487,7 +1492,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
-  llvm::sort(Key.begin(), Key.end());
+  llvm::sort(Key);
   return Uniquifier.count(Key);
 }
 
@@ -1511,7 +1516,7 @@ bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
-  llvm::sort(Key.begin(), Key.end());
+  llvm::sort(Key);
 
   if (!Uniquifier.insert(Key).second)
     return false;
@@ -3638,32 +3643,62 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
                                        Formula Base) {
   // This method is only interesting on a plurality of registers.
-  if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+  if (Base.BaseRegs.size() + (Base.Scale == 1) +
+      (Base.UnfoldedOffset != 0) <= 1)
     return;
 
   // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
   // processing the formula.
   Base.unscale();
-  Formula F = Base;
-  F.BaseRegs.clear();
   SmallVector<const SCEV *, 4> Ops;
+  Formula NewBase = Base;
+  NewBase.BaseRegs.clear();
+  Type *CombinedIntegerType = nullptr;
   for (const SCEV *BaseReg : Base.BaseRegs) {
     if (SE.properlyDominates(BaseReg, L->getHeader()) &&
-        !SE.hasComputableLoopEvolution(BaseReg, L))
+        !SE.hasComputableLoopEvolution(BaseReg, L)) {
+      if (!CombinedIntegerType)
+        CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
       Ops.push_back(BaseReg);
+    }
     else
-      F.BaseRegs.push_back(BaseReg);
+      NewBase.BaseRegs.push_back(BaseReg);
   }
-  if (Ops.size() > 1) {
-    const SCEV *Sum = SE.getAddExpr(Ops);
+
+  // If no register is relevant, we're done.
+  if (Ops.size() == 0)
+    return;
+
+  // Utility function for generating the required variants of the combined
+  // registers.
+  auto GenerateFormula = [&](const SCEV *Sum) {
+    Formula F = NewBase;
+
     // TODO: If Sum is zero, it probably means ScalarEvolution missed an
     // opportunity to fold something. For now, just ignore such cases
     // rather than proceed with zero in a register.
-    if (!Sum->isZero()) {
-      F.BaseRegs.push_back(Sum);
-      F.canonicalize(*L);
-      (void)InsertFormula(LU, LUIdx, F);
-    }
+    if (Sum->isZero())
+      return;
+
+    F.BaseRegs.push_back(Sum);
+    F.canonicalize(*L);
+    (void)InsertFormula(LU, LUIdx, F);
+  };
+
+  // If we collected at least two registers, generate a formula combining them.
+  if (Ops.size() > 1) {
+    SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
+    GenerateFormula(SE.getAddExpr(OpsCopy));
+  }
+
+  // If we have an unfolded offset, generate a formula combining it with the
+  // registers collected.
+  if (NewBase.UnfoldedOffset) {
+    assert(CombinedIntegerType && "Missing a type for the unfolded offset");
+    Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
+                                 true));
+    NewBase.UnfoldedOffset = 0;
+    GenerateFormula(SE.getAddExpr(Ops));
   }
 }
 
@@ -4238,7 +4273,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
           Key.push_back(F.ScaledReg);
         // Unstable sort by host order ok, because this is only used for
         // uniquifying.
-        llvm::sort(Key.begin(), Key.end());
+        llvm::sort(Key);
 
         std::pair<BestFormulaeTy::const_iterator, bool> P =
           BestFormulae.insert(std::make_pair(Key, FIdx));
@@ -4281,9 +4316,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
   });
 }
 
-// This is a rough guess that seems to work fairly well.
-static const size_t ComplexityLimit = std::numeric_limits<uint16_t>::max();
-
 /// Estimate the worst-case number of solutions the solver might have to
 /// consider. It almost never considers this many solutions because it prune the
 /// search space, but the pruning isn't always sufficient.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 86c99aed4417..da46210b6fdd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -56,6 +56,20 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-unroll-and-jam"
 
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopUnrollAndJamFollowupAll =
+    "llvm.loop.unroll_and_jam.followup_all";
+static const char *const LLVMLoopUnrollAndJamFollowupInner =
+    "llvm.loop.unroll_and_jam.followup_inner";
+static const char *const LLVMLoopUnrollAndJamFollowupOuter =
+    "llvm.loop.unroll_and_jam.followup_outer";
+static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =
+    "llvm.loop.unroll_and_jam.followup_remainder_inner";
+static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =
+    "llvm.loop.unroll_and_jam.followup_remainder_outer";
+/// @}
+
 static cl::opt<bool>
     AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
                       cl::desc("Allows loops to be unroll-and-jammed."));
@@ -112,11 +126,6 @@ static bool HasUnrollAndJamEnablePragma(const Loop *L) {
   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
 }
 
-// Returns true if the loop has an unroll_and_jam(disable) pragma.
-static bool HasUnrollAndJamDisablePragma(const Loop *L) {
-  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.disable");
-}
-
 // If loop has an unroll_and_jam_count pragma return the (necessarily
 // positive) value from the pragma.  Otherwise return 0.
 static unsigned UnrollAndJamCountPragmaValue(const Loop *L) {
@@ -149,7 +158,26 @@ static bool computeUnrollAndJamCount(
     OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
     unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
     unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
-  // Check for explicit Count from the "unroll-and-jam-count" option.
+  // First up use computeUnrollCount from the loop unroller to get a count
+  // for unrolling the outer loop, plus any loops requiring explicit
+  // unrolling we leave to the unroller. This uses UP.Threshold /
+  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
+  // We have already checked that the loop has no unroll.* pragmas.
+  unsigned MaxTripCount = 0;
+  bool UseUpperBound = false;
+  bool ExplicitUnroll = computeUnrollCount(
+      L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
+      OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
+  if (ExplicitUnroll || UseUpperBound) {
+    // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
+    // for the unroller instead.
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
+                         "computeUnrollCount\n");
+    UP.Count = 0;
+    return false;
+  }
+
+  // Override with any explicit Count from the "unroll-and-jam-count" option.
   bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
   if (UserUnrollCount) {
     UP.Count = UnrollAndJamCount;
@@ -174,80 +202,76 @@ static bool computeUnrollAndJamCount(
       return true;
   }
 
-  // Use computeUnrollCount from the loop unroller to get a sensible count
-  // for the unrolling the outer loop. This uses UP.Threshold /
-  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
-  // We have already checked that the loop has no unroll.* pragmas.
-  unsigned MaxTripCount = 0;
-  bool UseUpperBound = false;
-  bool ExplicitUnroll = computeUnrollCount(
-      L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
-      OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
-  if (ExplicitUnroll || UseUpperBound) {
-    // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
-    // for the unroller instead.
-    UP.Count = 0;
-    return false;
-  }
-
   bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L);
-  ExplicitUnroll = PragmaCount > 0 || PragmaEnableUnroll || UserUnrollCount;
+  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
+  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
 
   // If the loop has an unrolling pragma, we want to be more aggressive with
   // unrolling limits.
-  if (ExplicitUnroll && OuterTripCount != 0)
+  if (ExplicitUnrollAndJam)
     UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;
 
   if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
                                 UP.UnrollAndJamInnerLoopThreshold) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
+                         "inner loop too large\n");
     UP.Count = 0;
     return false;
   }
 
+  // We have a sensible limit for the outer loop, now adjust it for the inner
+  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
+  // explicitly, we want to stick to it.
+  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
+    while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
+                                UP.UnrollAndJamInnerLoopThreshold)
+      UP.Count--;
+  }
+
+  // If we are explicitly unroll and jamming, we are done. Otherwise there are a
+  // number of extra performance heuristics to check.
+  if (ExplicitUnrollAndJam)
+    return true;
+
   // If the inner loop count is known and small, leave the entire loop nest to
   // be the unroller
-  if (!ExplicitUnroll && InnerTripCount &&
-      InnerLoopSize * InnerTripCount < UP.Threshold) {
+  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
+                         "being left for the unroller\n");
     UP.Count = 0;
     return false;
   }
 
-  // We have a sensible limit for the outer loop, now adjust it for the inner
-  // loop and UP.UnrollAndJamInnerLoopThreshold.
-  while (UP.Count != 0 && UP.AllowRemainder &&
-         getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
-             UP.UnrollAndJamInnerLoopThreshold)
-    UP.Count--;
-
-  if (!ExplicitUnroll) {
-    // Check for situations where UnJ is likely to be unprofitable. Including
-    // subloops with more than 1 block.
-    if (SubLoop->getBlocks().size() != 1) {
-      UP.Count = 0;
-      return false;
-    }
+  // Check for situations where UnJ is likely to be unprofitable. Including
+  // subloops with more than 1 block.
+  if (SubLoop->getBlocks().size() != 1) {
+    LLVM_DEBUG(
+        dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
+    UP.Count = 0;
+    return false;
+  }
 
-    // Limit to loops where there is something to gain from unrolling and
-    // jamming the loop. In this case, look for loads that are invariant in the
-    // outer loop and can become shared.
-    unsigned NumInvariant = 0;
-    for (BasicBlock *BB : SubLoop->getBlocks()) {
-      for (Instruction &I : *BB) {
-        if (auto *Ld = dyn_cast<LoadInst>(&I)) {
-          Value *V = Ld->getPointerOperand();
-          const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
-          if (SE.isLoopInvariant(LSCEV, L))
-            NumInvariant++;
-        }
+  // Limit to loops where there is something to gain from unrolling and
+  // jamming the loop. In this case, look for loads that are invariant in the
+  // outer loop and can become shared.
+  unsigned NumInvariant = 0;
+  for (BasicBlock *BB : SubLoop->getBlocks()) {
+    for (Instruction &I : *BB) {
+      if (auto *Ld = dyn_cast<LoadInst>(&I)) {
+        Value *V = Ld->getPointerOperand();
+        const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
+        if (SE.isLoopInvariant(LSCEV, L))
+          NumInvariant++;
       }
     }
-    if (NumInvariant == 0) {
-      UP.Count = 0;
-      return false;
-    }
+  }
+  if (NumInvariant == 0) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
+    UP.Count = 0;
+    return false;
   }
 
-  return ExplicitUnroll;
+  return false;
 }
 
 static LoopUnrollResult
@@ -284,13 +308,16 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
                     << L->getHeader()->getParent()->getName() << "] Loop %"
                     << L->getHeader()->getName() << "\n");
 
+  TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
+  if (EnableMode & TM_Disable)
+    return LoopUnrollResult::Unmodified;
+
   // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
   // the unroller, so long as it does not explicitly have unroll_and_jam
   // metadata. This means #pragma nounroll will disable unroll and jam as well
   // as unrolling
-  if (HasUnrollAndJamDisablePragma(L) ||
-      (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
-       !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam."))) {
+  if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
+      !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
     LLVM_DEBUG(dbgs() << "  Disabled due to pragma.\n");
     return LoopUnrollResult::Unmodified;
   }
@@ -329,6 +356,19 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
     return LoopUnrollResult::Unmodified;
   }
 
+  // Save original loop IDs for after the transformation.
+  MDNode *OrigOuterLoopID = L->getLoopID();
+  MDNode *OrigSubLoopID = SubLoop->getLoopID();
+
+  // To assign the loop id of the epilogue, assign it before unrolling it so it
+  // is applied to every inner loop of the epilogue. We later apply the loop ID
+  // for the jammed inner loop.
+  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
+      OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+                        LLVMLoopUnrollAndJamFollowupRemainderInner});
+  if (NewInnerEpilogueLoopID.hasValue())
+    SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
+
   // Find trip count and trip multiple
   unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
   unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
@@ -344,9 +384,39 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
   if (OuterTripCount && UP.Count > OuterTripCount)
     UP.Count = OuterTripCount;
 
-  LoopUnrollResult UnrollResult =
-      UnrollAndJamLoop(L, UP.Count, OuterTripCount, OuterTripMultiple,
-                       UP.UnrollRemainder, LI, &SE, &DT, &AC, &ORE);
+  Loop *EpilogueOuterLoop = nullptr;
+  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
+      L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
+      &SE, &DT, &AC, &ORE, &EpilogueOuterLoop);
+
+  // Assign new loop attributes.
+  if (EpilogueOuterLoop) {
+    Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
+        OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+                          LLVMLoopUnrollAndJamFollowupRemainderOuter});
+    if (NewOuterEpilogueLoopID.hasValue())
+      EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
+  }
+
+  Optional<MDNode *> NewInnerLoopID =
+      makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+                                           LLVMLoopUnrollAndJamFollowupInner});
+  if (NewInnerLoopID.hasValue())
+    SubLoop->setLoopID(NewInnerLoopID.getValue());
+  else
+    SubLoop->setLoopID(OrigSubLoopID);
+
+  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
+    Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
+        OrigOuterLoopID,
+        {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
+    if (NewOuterLoopID.hasValue()) {
+      L->setLoopID(NewOuterLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if a followup was given.
+      return UnrollResult;
+    }
+  }
 
   // If loop has an unroll count pragma or unrolled by explicitly set count
   // mark loop as unrolled to prevent unrolling beyond that requested.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index e955821effa0..38b80f48ed0e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -540,7 +540,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
         }
       }
 
-      TerminatorInst *TI = BB->getTerminator();
+      Instruction *TI = BB->getTerminator();
 
       // Add in the live successors by first checking whether we have terminator
       // that may be simplified based on the values simplified by this call.
@@ -661,11 +661,6 @@ static bool HasUnrollEnablePragma(const Loop *L) {
   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
 }
 
-// Returns true if the loop has an unroll(disable) pragma.
-static bool HasUnrollDisablePragma(const Loop *L) {
-  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
-}
-
 // Returns true if the loop has an runtime unroll(disable) pragma.
 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
@@ -713,12 +708,19 @@ static uint64_t getUnrolledLoopSize(
 
 // Returns true if unroll count was set explicitly.
 // Calculates unroll count and writes it to UP.Count.
+// Unless IgnoreUser is true, will also use metadata and command-line options
+// that are specific to to the LoopUnroll pass (which, for instance, are
+// irrelevant for the LoopUnrollAndJam pass).
+// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
+// many LoopUnroll-specific options. The shared functionality should be
+// refactored into it own function.
 bool llvm::computeUnrollCount(
     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
     ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
     OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
     unsigned &TripMultiple, unsigned LoopSize,
     TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
+
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
   bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@@ -801,7 +803,7 @@ bool llvm::computeUnrollCount(
     }
   }
 
-  // 4th priority is loop peeling
+  // 4th priority is loop peeling.
   computePeelCount(L, LoopSize, UP, TripCount, SE);
   if (UP.PeelCount) {
     UP.Runtime = false;
@@ -963,13 +965,15 @@ static LoopUnrollResult tryToUnrollLoop(
     Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
     const TargetTransformInfo &TTI, AssumptionCache &AC,
     OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
-    Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
-    Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
-    Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
+    bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
+    Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
+    Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
+    Optional<bool> ProvidedAllowPeeling) {
   LLVM_DEBUG(dbgs() << "Loop Unroll: F["
                     << L->getHeader()->getParent()->getName() << "] Loop %"
                     << L->getHeader()->getName() << "\n");
-  if (HasUnrollDisablePragma(L))
+  TransformationMode TM = hasUnrollTransformation(L);
+  if (TM & TM_Disable)
     return LoopUnrollResult::Unmodified;
   if (!L->isLoopSimplifyForm()) {
     LLVM_DEBUG(
@@ -977,6 +981,11 @@ static LoopUnrollResult tryToUnrollLoop(
     return LoopUnrollResult::Unmodified;
   }
 
+  // When automtatic unrolling is disabled, do not unroll unless overridden for
+  // this loop.
+  if (OnlyWhenForced && !(TM & TM_Enable))
+    return LoopUnrollResult::Unmodified;
+
   unsigned NumInlineCandidates;
   bool NotDuplicatable;
   bool Convergent;
@@ -1066,14 +1075,39 @@ static LoopUnrollResult tryToUnrollLoop(
   if (TripCount && UP.Count > TripCount)
     UP.Count = TripCount;
 
+  // Save loop properties before it is transformed.
+  MDNode *OrigLoopID = L->getLoopID();
+
   // Unroll the loop.
+  Loop *RemainderLoop = nullptr;
   LoopUnrollResult UnrollResult = UnrollLoop(
       L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
       UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
-      LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
+      LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
   if (UnrollResult == LoopUnrollResult::Unmodified)
     return LoopUnrollResult::Unmodified;
 
+  if (RemainderLoop) {
+    Optional<MDNode *> RemainderLoopID =
+        makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
+                                        LLVMLoopUnrollFollowupRemainder});
+    if (RemainderLoopID.hasValue())
+      RemainderLoop->setLoopID(RemainderLoopID.getValue());
+  }
+
+  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
+    Optional<MDNode *> NewLoopID =
+        makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
+                                        LLVMLoopUnrollFollowupUnrolled});
+    if (NewLoopID.hasValue()) {
+      L->setLoopID(NewLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if loop attributes have been specified
+      // explicitly.
+      return UnrollResult;
+    }
+  }
+
   // If loop has an unroll count pragma or unrolled by explicitly set count
   // mark loop as unrolled to prevent unrolling beyond that requested.
   // If the loop was peeled, we already "used up" the profile information
@@ -1092,6 +1126,12 @@ public:
   static char ID; // Pass ID, replacement for typeid
 
   int OptLevel;
+
+  /// If false, use a cost model to determine whether unrolling of a loop is
+  /// profitable. If true, only loops that explicitly request unrolling via
+  /// metadata are considered. All other loops are skipped.
+  bool OnlyWhenForced;
+
   Optional<unsigned> ProvidedCount;
   Optional<unsigned> ProvidedThreshold;
   Optional<bool> ProvidedAllowPartial;
@@ -1099,15 +1139,16 @@ public:
   Optional<bool> ProvidedUpperBound;
   Optional<bool> ProvidedAllowPeeling;
 
-  LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
+  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
+             Optional<unsigned> Threshold = None,
              Optional<unsigned> Count = None,
              Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
              Optional<bool> UpperBound = None,
              Optional<bool> AllowPeeling = None)
-      : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
-        ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
-        ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
-        ProvidedAllowPeeling(AllowPeeling) {
+      : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+        ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
+        ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
+        ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
   }
 
@@ -1130,8 +1171,8 @@ public:
     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
     LoopUnrollResult Result = tryToUnrollLoop(
-        L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
-        ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
+        L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
+        ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
         ProvidedUpperBound, ProvidedAllowPeeling);
 
     if (Result == LoopUnrollResult::FullyUnrolled)
@@ -1161,14 +1202,16 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
-Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
-                                 int AllowPartial, int Runtime, int UpperBound,
+Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
+                                 int Threshold, int Count, int AllowPartial,
+                                 int Runtime, int UpperBound,
                                  int AllowPeeling) {
   // TODO: It would make more sense for this function to take the optionals
   // directly, but that's dangerous since it would silently break out of tree
   // callers.
   return new LoopUnroll(
-      OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
+      OptLevel, OnlyWhenForced,
+      Threshold == -1 ? None : Optional<unsigned>(Threshold),
       Count == -1 ? None : Optional<unsigned>(Count),
       AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
       Runtime == -1 ? None : Optional<bool>(Runtime),
@@ -1176,8 +1219,8 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
       AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
 }
 
-Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
-  return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
+  return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
 }
 
 PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1207,7 +1250,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
 
   bool Changed =
       tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
-                      /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+                      /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
+                      /*Count*/ None,
                       /*Threshold*/ None, /*AllowPartial*/ false,
                       /*Runtime*/ false, /*UpperBound*/ false,
                       /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@@ -1333,23 +1377,21 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
     Loop *ParentL = L.getParentLoop();
 #endif
 
-    // The API here is quite complex to call, but there are only two interesting
-    // states we support: partial and full (or "simple") unrolling. However, to
-    // enable these things we actually pass "None" in for the optional to avoid
-    // providing an explicit choice.
-    Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
-        AllowPeeling;
     // Check if the profile summary indicates that the profiled application
     // has a huge working set size, in which case we disable peeling to avoid
     // bloating it further.
+    Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
     if (PSI && PSI->hasHugeWorkingSetSize())
-      AllowPeeling = false;
+      LocalAllowPeeling = false;
     std::string LoopName = L.getName();
-    LoopUnrollResult Result =
-        tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
-                        /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
-                        /*Threshold*/ None, AllowPartialParam, RuntimeParam,
-                        UpperBoundParam, AllowPeeling);
+    // The API here is quite complex to call and we allow to select some
+    // flavors of unrolling during construction time (by setting UnrollOpts).
+    LoopUnrollResult Result = tryToUnrollLoop(
+        &L, DT, &LI, SE, TTI, AC, ORE,
+        /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
+        /*Count*/ None,
+        /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
+        UnrollOpts.AllowUpperBound, LocalAllowPeeling);
     Changed |= Result != LoopUnrollResult::Unmodified;
 
     // The parent must not be damaged by unrolling!
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 6aad077ff19e..4a089dfa7dbf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -28,18 +28,19 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
@@ -65,8 +66,10 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
@@ -180,11 +183,13 @@ namespace {
 
     Loop *currentLoop = nullptr;
     DominatorTree *DT = nullptr;
+    MemorySSA *MSSA = nullptr;
+    std::unique_ptr<MemorySSAUpdater> MSSAU;
     BasicBlock *loopHeader = nullptr;
     BasicBlock *loopPreheader = nullptr;
 
     bool SanitizeMemory;
-    LoopSafetyInfo SafetyInfo;
+    SimpleLoopSafetyInfo SafetyInfo;
 
     // LoopBlocks contains all of the basic blocks of the loop, including the
     // preheader of the loop, the body of the loop, and the exit blocks of the
@@ -214,8 +219,12 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
+      if (EnableMSSALoopDependency) {
+        AU.addRequired<MemorySSAWrapperPass>();
+        AU.addPreserved<MemorySSAWrapperPass>();
+      }
       if (hasBranchDivergence)
-        AU.addRequired<DivergenceAnalysis>();
+        AU.addRequired<LegacyDivergenceAnalysis>();
       getLoopAnalysisUsage(AU);
     }
 
@@ -237,11 +246,11 @@ namespace {
     bool TryTrivialLoopUnswitch(bool &Changed);
 
     bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
-                              TerminatorInst *TI = nullptr);
+                              Instruction *TI = nullptr);
     void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
-                                  BasicBlock *ExitBlock, TerminatorInst *TI);
+                                  BasicBlock *ExitBlock, Instruction *TI);
     void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
-                                     TerminatorInst *TI);
+                                     Instruction *TI);
 
     void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                               Constant *Val, bool isEqual);
@@ -249,8 +258,7 @@ namespace {
     void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                         BasicBlock *TrueDest,
                                         BasicBlock *FalseDest,
-                                        BranchInst *OldBranch,
-                                        TerminatorInst *TI);
+                                        BranchInst *OldBranch, Instruction *TI);
 
     void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
 
@@ -383,7 +391,8 @@ INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
 
@@ -515,20 +524,33 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   LPM = &LPM_Ref;
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  if (EnableMSSALoopDependency) {
+    MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+    assert(DT && "Cannot update MemorySSA without a valid DomTree.");
+  }
   currentLoop = L;
   Function *F = currentLoop->getHeader()->getParent();
 
   SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
   if (SanitizeMemory)
-    computeLoopSafetyInfo(&SafetyInfo, L);
+    SafetyInfo.computeLoopSafetyInfo(L);
+
+  if (MSSA && VerifyMemorySSA)
+    MSSA->verifyMemorySSA();
 
   bool Changed = false;
   do {
     assert(currentLoop->isLCSSAForm(*DT));
+    if (MSSA && VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
     redoLoop = false;
     Changed |= processCurrentLoop();
   } while(redoLoop);
 
+  if (MSSA && VerifyMemorySSA)
+    MSSA->verifyMemorySSA();
+
   return Changed;
 }
 
@@ -690,7 +712,7 @@ bool LoopUnswitch::processCurrentLoop() {
   // loop.
   for (Loop::block_iterator I = currentLoop->block_begin(),
          E = currentLoop->block_end(); I != E; ++I) {
-    TerminatorInst *TI = (*I)->getTerminator();
+    Instruction *TI = (*I)->getTerminator();
 
     // Unswitching on a potentially uninitialized predicate is not
     // MSan-friendly. Limit this to the cases when the original predicate is
@@ -699,7 +721,7 @@ bool LoopUnswitch::processCurrentLoop() {
     // This is a workaround for the discrepancy between LLVM IR and MSan
     // semantics. See PR28054 for more details.
     if (SanitizeMemory &&
-        !isGuaranteedToExecute(*TI, DT, currentLoop, &SafetyInfo))
+        !SafetyInfo.isGuaranteedToExecute(*TI, DT, currentLoop))
       continue;
 
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -853,7 +875,7 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
 /// simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
-                                        TerminatorInst *TI) {
+                                        Instruction *TI) {
   // Check to see if it would be profitable to unswitch current loop.
   if (!BranchesInfo.CostAllowsUnswitching()) {
     LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
@@ -864,7 +886,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
     return false;
   }
   if (hasBranchDivergence &&
-      getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
+      getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
     LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
                       << currentLoop->getHeader()->getName()
                       << " at non-trivial condition '" << *Val
@@ -908,7 +930,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                                   BasicBlock *TrueDest,
                                                   BasicBlock *FalseDest,
                                                   BranchInst *OldBranch,
-                                                  TerminatorInst *TI) {
+                                                  Instruction *TI) {
   assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
   assert(TrueDest != FalseDest && "Branch targets should be different");
   // Insert a conditional branch on LIC to the two preheaders.  The original
@@ -952,13 +974,16 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
     if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
       Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
     }
-
     DT->applyUpdates(Updates);
+
+    if (MSSAU)
+      MSSAU->applyUpdates(Updates, *DT);
   }
 
   // If either edge is critical, split it. This helps preserve LoopSimplify
   // form for enclosing loops.
-  auto Options = CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA();
+  auto Options =
+      CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
   SplitCriticalEdge(BI, 0, Options);
   SplitCriticalEdge(BI, 1, Options);
 }
@@ -970,7 +995,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 /// outside of the loop and updating loop info.
 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
                                             BasicBlock *ExitBlock,
-                                            TerminatorInst *TI) {
+                                            Instruction *TI) {
   LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
                     << loopHeader->getName() << " [" << L->getBlocks().size()
                     << " blocks] in Function "
@@ -984,7 +1009,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
   // conditional branch on Cond.
-  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI);
+  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
 
   // Now that we have a place to insert the conditional branch, create a place
   // to branch to: this is the exit block out of the loop that we should
@@ -995,7 +1020,8 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
   // without actually branching to it (the exit block should be dominated by the
   // loop header, not the preheader).
   assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
-  BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
+  BasicBlock *NewExit =
+      SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
 
   // Okay, now we have a position to branch from and a position to branch to,
   // insert the new conditional branch.
@@ -1015,6 +1041,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
   // particular value, rewrite the loop with this info.  We know that this will
   // at least eliminate the old branch.
   RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+
   ++NumTrivial;
 }
 
@@ -1026,7 +1053,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
 /// condition.
 bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
   BasicBlock *CurrentBB = currentLoop->getHeader();
-  TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+  Instruction *CurrentTerm = CurrentBB->getTerminator();
   LLVMContext &Context = CurrentBB->getContext();
 
   // If loop header has only one reachable successor (currently via an
@@ -1190,7 +1217,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 
     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
     // general, if we call it on all predecessors of all exits then it does.
-    SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI,
+    SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
                            /*PreserveLCSSA*/ true);
   }
 }
@@ -1199,7 +1226,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 /// Split it into loop versions and test the condition outside of either loop.
 /// Return the loops created as Out1/Out2.
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
-                                               Loop *L, TerminatorInst *TI) {
+                                               Loop *L, Instruction *TI) {
   Function *F = loopHeader->getParent();
   LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
                     << loopHeader->getName() << " [" << L->getBlocks().size()
@@ -1216,7 +1243,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
   // First step, split the preheader and exit blocks, and add these blocks to
   // the LoopBlocks list.
-  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, DT, LI);
+  BasicBlock *NewPreheader =
+      SplitEdge(loopPreheader, loopHeader, DT, LI, MSSAU.get());
   LoopBlocks.push_back(NewPreheader);
 
   // We want the loop to come after the preheader, but before the exit blocks.
@@ -1318,10 +1346,24 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
          "Preheader splitting did not work correctly!");
 
+  if (MSSAU) {
+    // Update MemorySSA after cloning, and before splitting to unreachables,
+    // since that invalidates the 1:1 mapping of clones in VMap.
+    LoopBlocksRPO LBRPO(L);
+    LBRPO.perform(LI);
+    MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
+  }
+
   // Emit the new branch that selects between the two versions of this loop.
   EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
                                  TI);
   LPM->deleteSimpleAnalysisValue(OldBR, L);
+  if (MSSAU) {
+    // Update MemoryPhis in Exit blocks.
+    MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
+    if (VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
+  }
 
   // The OldBr was replaced by a new one and removed (but not erased) by
   // EmitPreheaderBranchOnCondition. It is no longer needed, so delete it.
@@ -1347,6 +1389,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
       LICHandle && !isa<Constant>(LICHandle))
     RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+
+  if (MSSA && VerifyMemorySSA)
+    MSSA->verifyMemorySSA();
 }
 
 /// Remove all instances of I from the worklist vector specified.
@@ -1485,7 +1530,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     // and hooked up so as to preserve the loop structure, because
     // trying to update it is complicated.  So instead we preserve the
     // loop structure and put the block on a dead code path.
-    SplitEdge(Switch, SISucc, DT, LI);
+    SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
     // Compute the successors instead of relying on the return value
     // of SplitEdge, since it may have split the switch successor
     // after PHI nodes.
@@ -1539,6 +1584,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
           Worklist.push_back(Use);
       LPM->deleteSimpleAnalysisValue(I, L);
       RemoveFromWorklist(I, Worklist);
+      if (MSSAU)
+        MSSAU->removeMemoryAccess(I);
       I->eraseFromParent();
       ++NumSimplify;
       continue;
@@ -1578,6 +1625,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         // Move all of the successor contents from Succ to Pred.
         Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
                                    Succ->begin(), Succ->end());
+        if (MSSAU)
+          MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI);
         LPM->deleteSimpleAnalysisValue(BI, L);
         RemoveFromWorklist(BI, Worklist);
         BI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 06e86081e8a0..83861b98fbd8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -360,10 +360,11 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
   assert(I != nullptr && "Null instruction found!");
   // Check function call safety
-  if (isa<CallInst>(I) && !AA->doesNotAccessMemory(CallSite(I))) {
-    LLVM_DEBUG(dbgs() << "    Unsafe call site found.\n");
-    return false;
-  }
+  if (auto *Call = dyn_cast<CallBase>(I))
+    if (!AA->doesNotAccessMemory(Call)) {
+      LLVM_DEBUG(dbgs() << "    Unsafe call site found.\n");
+      return false;
+    }
   // Avoid loops with possiblity of throw
   if (I->mayThrow()) {
     LLVM_DEBUG(dbgs() << "    May throw instruction found in loop body\n");
@@ -594,6 +595,11 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   if (skipLoop(L))
     return false;
+
+  // Do not do the transformation if disabled by metadata.
+  if (hasLICMVersioningTransformation(L) & TM_Disable)
+    return false;
+
   // Get Analysis information.
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
@@ -628,6 +634,8 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     // Set Loop Versioning metaData for version loop.
     addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
     // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
+    // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
+    // instructions, not loops.
     addStringMetadataToLoop(LVer.getVersionedLoop(),
                             "llvm.mem.parallel_loop_access");
     // Update version loop with aggressive aliasing assumption.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 070114a84cc5..4867b33d671f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -15,25 +15,19 @@
 
 #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
 
 using namespace llvm;
 
-static cl::opt<uint32_t> PredicatePassBranchWeight(
-    "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
-    cl::desc("The probability of a guard failing is assumed to be the "
-             "reciprocal of this value (default = 1 << 20)"));
-
 namespace {
 struct LowerGuardIntrinsicLegacyPass : public FunctionPass {
   static char ID;
@@ -46,45 +40,6 @@ struct LowerGuardIntrinsicLegacyPass : public FunctionPass {
 };
 }
 
-static void MakeGuardControlFlowExplicit(Function *DeoptIntrinsic,
-                                         CallInst *CI) {
-  OperandBundleDef DeoptOB(*CI->getOperandBundle(LLVMContext::OB_deopt));
-  SmallVector<Value *, 4> Args(std::next(CI->arg_begin()), CI->arg_end());
-
-  auto *CheckBB = CI->getParent();
-  auto *DeoptBlockTerm =
-      SplitBlockAndInsertIfThen(CI->getArgOperand(0), CI, true);
-
-  auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
-
-  // SplitBlockAndInsertIfThen inserts control flow that branches to
-  // DeoptBlockTerm if the condition is true.  We want the opposite.
-  CheckBI->swapSuccessors();
-
-  CheckBI->getSuccessor(0)->setName("guarded");
-  CheckBI->getSuccessor(1)->setName("deopt");
-
-  if (auto *MD = CI->getMetadata(LLVMContext::MD_make_implicit))
-    CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
-
-  MDBuilder MDB(CI->getContext());
-  CheckBI->setMetadata(LLVMContext::MD_prof,
-                       MDB.createBranchWeights(PredicatePassBranchWeight, 1));
-
-  IRBuilder<> B(DeoptBlockTerm);
-  auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
-
-  if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
-    B.CreateRetVoid();
-  } else {
-    DeoptCall->setName("deoptcall");
-    B.CreateRet(DeoptCall);
-  }
-
-  DeoptCall->setCallingConv(CI->getCallingConv());
-  DeoptBlockTerm->eraseFromParent();
-}
-
 static bool lowerGuardIntrinsic(Function &F) {
   // Check if we can cheaply rule out the possibility of not having any work to
   // do.
@@ -95,10 +50,8 @@ static bool lowerGuardIntrinsic(Function &F) {
 
   SmallVector<CallInst *, 8> ToLower;
   for (auto &I : instructions(F))
-    if (auto *CI = dyn_cast<CallInst>(&I))
-      if (auto *F = CI->getCalledFunction())
-        if (F->getIntrinsicID() == Intrinsic::experimental_guard)
-          ToLower.push_back(CI);
+    if (isGuard(&I))
+      ToLower.push_back(cast<CallInst>(&I));
 
   if (ToLower.empty())
     return false;
@@ -108,7 +61,7 @@ static bool lowerGuardIntrinsic(Function &F) {
   DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
 
   for (auto *CI : ToLower) {
-    MakeGuardControlFlowExplicit(DeoptIntrinsic, CI);
+    makeGuardControlFlowExplicit(DeoptIntrinsic, CI);
     CI->eraseFromParent();
   }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
new file mode 100644
index 000000000000..1ba3994eba0e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -0,0 +1,120 @@
+//===- MakeGuardsExplicit.cpp - Turn guard intrinsics into guard branches -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the @llvm.experimental.guard intrinsic to the new form of
+// guard represented as widenable explicit branch to the deopt block. The
+// difference between this pass and LowerGuardIntrinsic is that after this pass
+// the guard represented as intrinsic:
+//
+//   call void(i1, ...) @llvm.experimental.guard(i1 %old_cond) [ "deopt"() ]
+//
+// transforms to a guard represented as widenable explicit branch:
+//
+//   %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+//   br i1 (%old_cond & %widenable_cond), label %guarded, label %deopt
+//
+// Here:
+//   - The semantics of @llvm.experimental.widenable.condition allows to replace
+//     %widenable_cond with the construction (%widenable_cond & %any_other_cond)
+//     without loss of correctness;
+//   - %guarded is the lower part of old guard intrinsic's parent block split by
+//     the intrinsic call;
+//   - %deopt is a block containing a sole call to @llvm.experimental.deoptimize
+//     intrinsic.
+//
+// Therefore, this branch preserves the property of widenability.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
+
+using namespace llvm;
+
+namespace {
+struct MakeGuardsExplicitLegacyPass : public FunctionPass {
+  static char ID;
+  MakeGuardsExplicitLegacyPass() : FunctionPass(ID) {
+    initializeMakeGuardsExplicitLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+};
+}
+
+static void turnToExplicitForm(CallInst *Guard, Function *DeoptIntrinsic) {
+  // Replace the guard with an explicit branch (just like in GuardWidening).
+  BasicBlock *BB = Guard->getParent();
+  makeGuardControlFlowExplicit(DeoptIntrinsic, Guard);
+  BranchInst *ExplicitGuard = cast<BranchInst>(BB->getTerminator());
+  assert(ExplicitGuard->isConditional() && "Must be!");
+
+  // We want the guard to be expressed as explicit control flow, but still be
+  // widenable. For that, we add Widenable Condition intrinsic call to the
+  // guard's condition.
+  IRBuilder<> B(ExplicitGuard);
+  auto *WidenableCondition =
+      B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
+                        {}, {}, nullptr, "widenable_cond");
+  WidenableCondition->setCallingConv(Guard->getCallingConv());
+  auto *NewCond =
+      B.CreateAnd(ExplicitGuard->getCondition(), WidenableCondition);
+  NewCond->setName("exiplicit_guard_cond");
+  ExplicitGuard->setCondition(NewCond);
+  Guard->eraseFromParent();
+}
+
+static bool explicifyGuards(Function &F) {
+  // Check if we can cheaply rule out the possibility of not having any work to
+  // do.
+  auto *GuardDecl = F.getParent()->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_guard));
+  if (!GuardDecl || GuardDecl->use_empty())
+    return false;
+
+  SmallVector<CallInst *, 8> GuardIntrinsics;
+  for (auto &I : instructions(F))
+    if (isGuard(&I))
+      GuardIntrinsics.push_back(cast<CallInst>(&I));
+
+  if (GuardIntrinsics.empty())
+    return false;
+
+  auto *DeoptIntrinsic = Intrinsic::getDeclaration(
+      F.getParent(), Intrinsic::experimental_deoptimize, {F.getReturnType()});
+  DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
+
+  for (auto *Guard : GuardIntrinsics)
+    turnToExplicitForm(Guard, DeoptIntrinsic);
+
+  return true;
+}
+
+bool MakeGuardsExplicitLegacyPass::runOnFunction(Function &F) {
+  return explicifyGuards(F);
+}
+
+char MakeGuardsExplicitLegacyPass::ID = 0;
+INITIALIZE_PASS(MakeGuardsExplicitLegacyPass, "make-guards-explicit",
+                "Lower the guard intrinsic to explicit control flow form",
+                false, false)
+
+PreservedAnalyses MakeGuardsExplicitPass::run(Function &F,
+                                           FunctionAnalysisManager &) {
+  if (explicifyGuards(F))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3b74421a47a0..ced923d6973d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -398,7 +398,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
   MemsetRanges Ranges(DL);
 
   BasicBlock::iterator BI(StartInst);
-  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+  for (++BI; !BI->isTerminator(); ++BI) {
     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
       // If the instruction is readnone, ignore it, otherwise bail out.  We
       // don't even allow readonly here because we don't want something like:
@@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
       if (!NextStore->isSimple()) break;
 
       // Check to see if this stored value is of the same byte-splattable value.
-      if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+      Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
+      if (isa<UndefValue>(ByteVal) && StoredByte)
+        ByteVal = StoredByte;
+      if (ByteVal != StoredByte)
         break;
 
       // Check to see if this store is to a constant offset from the start ptr.
@@ -543,8 +546,8 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
   // Memory locations of lifted instructions.
   SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
 
-  // Lifted callsites.
-  SmallVector<ImmutableCallSite, 8> CallSites;
+  // Lifted calls.
+  SmallVector<const CallBase *, 8> Calls;
 
   const MemoryLocation LoadLoc = MemoryLocation::get(LI);
 
@@ -562,10 +565,9 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
       });
 
       if (!NeedLift)
-        NeedLift =
-            llvm::any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
-              return isModOrRefSet(AA.getModRefInfo(C, CS));
-            });
+        NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) {
+          return isModOrRefSet(AA.getModRefInfo(C, Call));
+        });
     }
 
     if (!NeedLift)
@@ -576,12 +578,12 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
       // none of them may modify its source.
       if (isModSet(AA.getModRefInfo(C, LoadLoc)))
         return false;
-      else if (auto CS = ImmutableCallSite(C)) {
+      else if (const auto *Call = dyn_cast<CallBase>(C)) {
         // If we can't lift this before P, it's game over.
-        if (isModOrRefSet(AA.getModRefInfo(P, CS)))
+        if (isModOrRefSet(AA.getModRefInfo(P, Call)))
           return false;
 
-        CallSites.push_back(CS);
+        Calls.push_back(Call);
       } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
         // If we can't lift this before P, it's game over.
         auto ML = MemoryLocation::get(C);
@@ -672,13 +674,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
           if (UseMemMove)
             M = Builder.CreateMemMove(
                 SI->getPointerOperand(), findStoreAlignment(DL, SI),
-                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
-                SI->isVolatile());
+                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
           else
             M = Builder.CreateMemCpy(
                 SI->getPointerOperand(), findStoreAlignment(DL, SI),
-                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size,
-                SI->isVolatile());
+                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
 
           LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
                             << *M << "\n");
@@ -767,8 +767,8 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       if (!Align)
         Align = DL.getABITypeAlignment(T);
       IRBuilder<> Builder(SI);
-      auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
-                                     Size, Align, SI->isVolatile());
+      auto *M =
+          Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align);
 
       LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
 
@@ -916,8 +916,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
       continue;
     }
     if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
-      if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
-          IT->getIntrinsicID() == Intrinsic::lifetime_end)
+      if (IT->isLifetimeStartOrEnd())
         continue;
 
     if (U != C && U != cpy)
@@ -942,10 +941,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
   // the use analysis, we also need to know that it does not sneakily
   // access dest.  We rely on AA to figure this out for us.
   AliasAnalysis &AA = LookupAliasAnalysis();
-  ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
+  ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
   // If necessary, perform additional analysis.
   if (isModOrRefSet(MR))
-    MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
+    MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT);
   if (isModOrRefSet(MR))
     return false;
 
@@ -993,8 +992,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
   // handled here, but combineMetadata doesn't support them yet
   unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
                          LLVMContext::MD_noalias,
-                         LLVMContext::MD_invariant_group};
-  combineMetadata(C, cpy, KnownIDs);
+                         LLVMContext::MD_invariant_group,
+                         LLVMContext::MD_access_group};
+  combineMetadata(C, cpy, KnownIDs, true);
 
   // Remove the memcpy.
   MD->removeInstruction(cpy);
@@ -1056,6 +1056,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
     UseMemMove = true;
 
   // If all checks passed, then we can transform M.
+  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
+                    << *MDep << '\n' << *M << '\n');
 
   // TODO: Is this worth it if we're creating a less aligned memcpy? For
   // example we could be moving from movaps -> movq on x86.
@@ -1141,6 +1143,21 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
   return true;
 }
 
+/// Determine whether the instruction has undefined content for the given Size,
+/// either because it was freshly alloca'd or started its lifetime.
+static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
+  if (isa<AllocaInst>(I))
+    return true;
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+      if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+        if (LTSize->getZExtValue() >= Size->getZExtValue())
+          return true;
+
+  return false;
+}
+
 /// Transform memcpy to memset when its source was just memset.
 /// In other words, turn:
 /// \code
@@ -1164,12 +1181,27 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
   if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
     return false;
 
-  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+  // A known memset size is required.
   ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
+  if (!MemSetSize)
+    return false;
+
   // Make sure the memcpy doesn't read any more than what the memset wrote.
   // Don't worry about sizes larger than i64.
-  if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
-    return false;
+  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+  if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
+    // If the memcpy is larger than the memset, but the memory was undef prior
+    // to the memset, we can just ignore the tail. Technically we're only
+    // interested in the bytes from MemSetSize..CopySize here, but as we can't
+    // easily represent this location, we use the full 0..CopySize range.
+    MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
+    MemDepResult DepInfo = MD->getPointerDependencyFrom(
+        MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+    if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+      CopySize = MemSetSize;
+    else
+      return false;
+  }
 
   IRBuilder<> Builder(MemCpy);
   Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
@@ -1249,19 +1281,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
     if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
       return processMemCpyMemCpyDependence(M, MDep);
   } else if (SrcDepInfo.isDef()) {
-    Instruction *I = SrcDepInfo.getInst();
-    bool hasUndefContents = false;
-
-    if (isa<AllocaInst>(I)) {
-      hasUndefContents = true;
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
-        if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
-          if (LTSize->getZExtValue() >= CopySize->getZExtValue())
-            hasUndefContents = true;
-    }
-
-    if (hasUndefContents) {
+    if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
       MD->removeInstruction(M);
       M->eraseFromParent();
       ++NumMemCpyInstr;
@@ -1320,7 +1340,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
   MemDepResult DepInfo = MD->getPointerDependencyFrom(
-      MemoryLocation(ByValArg, ByValSize), true,
+      MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
       CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
   if (!DepInfo.isClobber())
     return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index ff0183a8ea2d..69fd8b163a07 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -41,6 +41,15 @@ namespace {
 
 #define DEBUG_TYPE "mergeicmps"
 
+// Returns true if the instruction is a simple load or a simple store
+static bool isSimpleLoadOrStore(const Instruction *I) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isSimple();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isSimple();
+  return false;
+}
+
 // A BCE atom.
 struct BCEAtom {
   BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
@@ -81,14 +90,15 @@ BCEAtom visitICmpLoadOperand(Value *const Val) {
       LLVM_DEBUG(dbgs() << "used outside of block\n");
       return {};
     }
-    if (LoadI->isVolatile()) {
-      LLVM_DEBUG(dbgs() << "volatile\n");
+    // Do not optimize atomic loads to non-atomic memcmp
+    if (!LoadI->isSimple()) {
+      LLVM_DEBUG(dbgs() << "volatile or atomic\n");
       return {};
     }
     Value *const Addr = LoadI->getOperand(0);
     if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
       LLVM_DEBUG(dbgs() << "GEP\n");
-      if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
+      if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
         LLVM_DEBUG(dbgs() << "used outside of block\n");
         return {};
       }
@@ -150,18 +160,19 @@ class BCECmpBlock {
 
   // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
   // instructions in the block.
-  bool canSplit() const;
+  bool canSplit(AliasAnalysis *AA) const;
 
   // Return true if this all the relevant instructions in the BCE-cmp-block can
   // be sunk below this instruction. By doing this, we know we can separate the
   // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
   // block.
-  bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &) const;
+  bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
+                         AliasAnalysis *AA) const;
 
   // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
   // instructions. Split the old block and move all non-BCE-cmp-insts into the
   // new parent block.
-  void split(BasicBlock *NewParent) const;
+  void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
 
   // The basic block where this comparison happens.
   BasicBlock *BB = nullptr;
@@ -179,12 +190,21 @@ private:
 };
 
 bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
-                                    DenseSet<Instruction *> &BlockInsts) const {
+                                    DenseSet<Instruction *> &BlockInsts,
+                                    AliasAnalysis *AA) const {
   // If this instruction has side effects and its in middle of the BCE cmp block
   // instructions, then bail for now.
-  // TODO: use alias analysis to tell whether there is real interference.
-  if (Inst->mayHaveSideEffects())
-    return false;
+  if (Inst->mayHaveSideEffects()) {
+    // Bail if this is not a simple load or store
+    if (!isSimpleLoadOrStore(Inst))
+      return false;
+    // Disallow stores that might alias the BCE operands
+    MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
+    MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
+    if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
+        isModSet(AA->getModRefInfo(Inst, RLoc)))
+        return false;
+  }
   // Make sure this instruction does not use any of the BCE cmp block
   // instructions as operand.
   for (auto BI : BlockInsts) {
@@ -194,14 +214,15 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
   return true;
 }
 
-void BCECmpBlock::split(BasicBlock *NewParent) const {
+void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
   DenseSet<Instruction *> BlockInsts(
       {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
   llvm::SmallVector<Instruction *, 4> OtherInsts;
   for (Instruction &Inst : *BB) {
     if (BlockInsts.count(&Inst))
       continue;
-    assert(canSinkBCECmpInst(&Inst, BlockInsts) && "Split unsplittable block");
+      assert(canSinkBCECmpInst(&Inst, BlockInsts, AA) &&
+             "Split unsplittable block");
     // This is a non-BCE-cmp-block instruction. And it can be separated
     // from the BCE-cmp-block instruction.
     OtherInsts.push_back(&Inst);
@@ -213,12 +234,12 @@ void BCECmpBlock::split(BasicBlock *NewParent) const {
   }
 }
 
-bool BCECmpBlock::canSplit() const {
+bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
   DenseSet<Instruction *> BlockInsts(
       {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
   for (Instruction &Inst : *BB) {
     if (!BlockInsts.count(&Inst)) {
-      if (!canSinkBCECmpInst(&Inst, BlockInsts))
+      if (!canSinkBCECmpInst(&Inst, BlockInsts, AA))
         return false;
     }
   }
@@ -262,8 +283,9 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
     if (!Lhs.Base()) return {};
     auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
     if (!Rhs.Base()) return {};
+    const auto &DL = CmpI->getModule()->getDataLayout();
     return BCECmpBlock(std::move(Lhs), std::move(Rhs),
-                       CmpI->getOperand(0)->getType()->getScalarSizeInBits());
+                       DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
   }
   return {};
 }
@@ -324,7 +346,8 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
 // A chain of comparisons.
 class BCECmpChain {
  public:
-  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi);
+  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+              AliasAnalysis *AA);
 
   int size() const { return Comparisons_.size(); }
 
@@ -332,7 +355,7 @@ class BCECmpChain {
   void dump() const;
 #endif  // MERGEICMPS_DOT_ON
 
-  bool simplify(const TargetLibraryInfo *const TLI);
+  bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
 
  private:
   static bool IsContiguous(const BCECmpBlock &First,
@@ -348,7 +371,7 @@ class BCECmpChain {
   // null, the merged block will link to the phi block.
   void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
                         BasicBlock *const NextBBInChain, PHINode &Phi,
-                        const TargetLibraryInfo *const TLI);
+                        const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
 
   PHINode &Phi_;
   std::vector<BCECmpBlock> Comparisons_;
@@ -356,7 +379,8 @@ class BCECmpChain {
   BasicBlock *EntryBlock_;
 };
 
-BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
+BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+                         AliasAnalysis *AA)
     : Phi_(Phi) {
   assert(!Blocks.empty() && "a chain should have at least one block");
   // Now look inside blocks to check for BCE comparisons.
@@ -388,7 +412,7 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
         // and start anew.
         //
         // NOTE: we only handle block with single predecessor for now.
-        if (Comparison.canSplit()) {
+        if (Comparison.canSplit(AA)) {
           LLVM_DEBUG(dbgs()
                      << "Split initial block '" << Comparison.BB->getName()
                      << "' that does extra work besides compare\n");
@@ -442,10 +466,9 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
 #endif  // MERGEICMPS_DOT_ON
   // Reorder blocks by LHS. We can do that without changing the
   // semantics because we are only accessing dereferencable memory.
-  llvm::sort(Comparisons_.begin(), Comparisons_.end(),
-             [](const BCECmpBlock &a, const BCECmpBlock &b) {
-               return a.Lhs() < b.Lhs();
-             });
+  llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
+    return a.Lhs() < b.Lhs();
+  });
 #ifdef MERGEICMPS_DOT_ON
   errs() << "AFTER REORDERING:\n\n";
   dump();
@@ -475,7 +498,8 @@ void BCECmpChain::dump() const {
 }
 #endif  // MERGEICMPS_DOT_ON
 
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
+bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
+                           AliasAnalysis *AA) {
   // First pass to check if there is at least one merge. If not, we don't do
   // anything and we keep analysis passes intact.
   {
@@ -523,13 +547,13 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
       // Merge all previous comparisons and start a new merge block.
       mergeComparisons(
           makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
-          Comparisons_[I].BB, Phi_, TLI);
+          Comparisons_[I].BB, Phi_, TLI, AA);
       NumMerged = 1;
     }
   }
   mergeComparisons(makeArrayRef(Comparisons_)
                        .slice(Comparisons_.size() - NumMerged, NumMerged),
-                   nullptr, Phi_, TLI);
+                   nullptr, Phi_, TLI, AA);
 
   return true;
 }
@@ -537,7 +561,8 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
 void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
                                    BasicBlock *const NextBBInChain,
                                    PHINode &Phi,
-                                   const TargetLibraryInfo *const TLI) {
+                                   const TargetLibraryInfo *const TLI,
+                                   AliasAnalysis *AA) {
   assert(!Comparisons.empty());
   const auto &FirstComparison = *Comparisons.begin();
   BasicBlock *const BB = FirstComparison.BB;
@@ -550,7 +575,7 @@ void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
     auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
                           [](const BCECmpBlock &B) { return B.RequireSplit; });
     if (C != Comparisons.end())
-      C->split(EntryBlock_);
+      C->split(EntryBlock_, AA);
 
     LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
     const auto TotalSize =
@@ -666,7 +691,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
   return Blocks;
 }
 
-bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
+bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
+                AliasAnalysis *AA) {
   LLVM_DEBUG(dbgs() << "processPhi()\n");
   if (Phi.getNumIncomingValues() <= 1) {
     LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@@ -724,14 +750,14 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
   const auto Blocks =
       getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
   if (Blocks.empty()) return false;
-  BCECmpChain CmpChain(Blocks, Phi);
+  BCECmpChain CmpChain(Blocks, Phi, AA);
 
   if (CmpChain.size() < 2) {
     LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
     return false;
   }
 
-  return CmpChain.simplify(TLI);
+  return CmpChain.simplify(TLI, AA);
 }
 
 class MergeICmps : public FunctionPass {
@@ -746,7 +772,8 @@ class MergeICmps : public FunctionPass {
     if (skipFunction(F)) return false;
     const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
     const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    auto PA = runImpl(F, &TLI, &TTI);
+    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+    auto PA = runImpl(F, &TLI, &TTI, AA);
     return !PA.areAllPreserved();
   }
 
@@ -754,14 +781,16 @@ class MergeICmps : public FunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
   }
 
   PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
-                            const TargetTransformInfo *TTI);
+                            const TargetTransformInfo *TTI, AliasAnalysis *AA);
 };
 
 PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
-                                      const TargetTransformInfo *TTI) {
+                                      const TargetTransformInfo *TTI,
+                                      AliasAnalysis *AA) {
   LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
 
   // We only try merging comparisons if the target wants to expand memcmp later.
@@ -777,7 +806,7 @@ PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
   for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
     // A Phi operation is always first in a basic block.
     if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
-      MadeChange |= processPhi(*Phi, TLI);
+      MadeChange |= processPhi(*Phi, TLI, AA);
   }
 
   if (MadeChange) return PreservedAnalyses::none();
@@ -791,6 +820,7 @@ INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
                       "Merge contiguous icmps into a memcmp", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
                     "Merge contiguous icmps into a memcmp", false, false)
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 3464b759280f..ee21feca8d2c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -211,6 +211,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
 
   auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
                                 &BB->front());
+  NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
   NewPN->addIncoming(Opd1, S0->getParent());
   NewPN->addIncoming(Opd2, S1->getParent());
   return NewPN;
diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 3e47e9441d15..7cbb0fe70f82 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -657,8 +657,8 @@ public:
          TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
          const DataLayout &DL)
       : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
-        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) {
-  }
+        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)),
+        SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false) {}
 
   bool runGVN();
 
@@ -777,7 +777,7 @@ private:
 
   // Reachability handling.
   void updateReachableEdge(BasicBlock *, BasicBlock *);
-  void processOutgoingEdges(TerminatorInst *, BasicBlock *);
+  void processOutgoingEdges(Instruction *, BasicBlock *);
   Value *findConditionEquivalence(Value *) const;
 
   // Elimination.
@@ -959,8 +959,7 @@ static bool isCopyOfAPHI(const Value *V) {
 // order. The BlockInstRange numbers are generated in an RPO walk of the basic
 // blocks.
 void NewGVN::sortPHIOps(MutableArrayRef<ValPair> Ops) const {
-  llvm::sort(Ops.begin(), Ops.end(),
-             [&](const ValPair &P1, const ValPair &P2) {
+  llvm::sort(Ops, [&](const ValPair &P1, const ValPair &P2) {
     return BlockInstRange.lookup(P1.second).first <
            BlockInstRange.lookup(P2.second).first;
   });
@@ -1087,9 +1086,13 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
   CongruenceClass *CC = ValueToClass.lookup(V);
   if (CC) {
     if (CC->getLeader() && CC->getLeader() != I) {
-      // Don't add temporary instructions to the user lists.
-      if (!AllTempInstructions.count(I))
-        addAdditionalUsers(V, I);
+      // If we simplified to something else, we need to communicate
+      // that we're users of the value we simplified to.
+      if (I != V) {
+        // Don't add temporary instructions to the user lists.
+        if (!AllTempInstructions.count(I))
+          addAdditionalUsers(V, I);
+      }
       return createVariableOrConstant(CC->getLeader());
     }
     if (CC->getDefiningExpr()) {
@@ -1752,7 +1755,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
     return true;
   });
   // If we are left with no operands, it's dead.
-  if (Filtered.begin() == Filtered.end()) {
+  if (empty(Filtered)) {
     // If it has undef at this point, it means there are no-non-undef arguments,
     // and thus, the value of the phi node must be undef.
     if (HasUndef) {
@@ -2484,7 +2487,7 @@ Value *NewGVN::findConditionEquivalence(Value *Cond) const {
 }
 
 // Process the outgoing edges of a block for reachability.
-void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
   // Evaluate reachability of terminator instruction.
   BranchInst *BR;
   if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
@@ -2925,7 +2928,7 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
               PHINodeUses.insert(UInst);
       // Don't insert void terminators into the class. We don't value number
       // them, and they just end up sitting in TOP.
-      if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
+      if (I.isTerminator() && I.getType()->isVoidTy())
         continue;
       TOPClass->insert(&I);
       ValueToClass[&I] = TOPClass;
@@ -3134,7 +3137,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
       auto *Symbolized = createUnknownExpression(I);
       performCongruenceFinding(I, Symbolized);
     }
-    processOutgoingEdges(dyn_cast<TerminatorInst>(I), I->getParent());
+    processOutgoingEdges(I, I->getParent());
   }
 }
 
@@ -3172,12 +3175,8 @@ bool NewGVN::singleReachablePHIPath(
   auto FilteredPhiArgs =
       make_filter_range(MP->operands(), ReachableOperandPred);
   SmallVector<const Value *, 32> OperandList;
-  std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
-            std::back_inserter(OperandList));
-  bool Okay = OperandList.size() == 1;
-  if (!Okay)
-    Okay =
-        std::equal(OperandList.begin(), OperandList.end(), OperandList.begin());
+  llvm::copy(FilteredPhiArgs, std::back_inserter(OperandList));
+  bool Okay = is_splat(OperandList);
   if (Okay)
     return singleReachablePHIPath(Visited, cast<MemoryAccess>(OperandList[0]),
                                   Second);
@@ -3272,8 +3271,7 @@ void NewGVN::verifyMemoryCongruency() const {
                        const MemoryDef *MD = cast<MemoryDef>(U);
                        return ValueToClass.lookup(MD->getMemoryInst());
                      });
-      assert(std::equal(PhiOpClasses.begin(), PhiOpClasses.end(),
-                        PhiOpClasses.begin()) &&
+      assert(is_splat(PhiOpClasses) &&
              "All MemoryPhi arguments should be in the same class");
     }
   }
@@ -3501,9 +3499,11 @@ bool NewGVN::runGVN() {
     if (!ToErase->use_empty())
       ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
 
-    if (ToErase->getParent())
-      ToErase->eraseFromParent();
+    assert(ToErase->getParent() &&
+           "BB containing ToErase deleted unexpectedly!");
+    ToErase->eraseFromParent();
   }
+	Changed |= !InstructionsToErase.empty();
 
   // Delete all unreachable blocks.
   auto UnreachableBlockPred = [&](const BasicBlock &BB) {
@@ -3697,37 +3697,6 @@ void NewGVN::convertClassToLoadsAndStores(
   }
 }
 
-static void patchReplacementInstruction(Instruction *I, Value *Repl) {
-  auto *ReplInst = dyn_cast<Instruction>(Repl);
-  if (!ReplInst)
-    return;
-
-  // Patch the replacement so that it is not more restrictive than the value
-  // being replaced.
-  // Note that if 'I' is a load being replaced by some operation,
-  // for example, by an arithmetic operation, then andIRFlags()
-  // would just erase all math flags from the original arithmetic
-  // operation, which is clearly not wanted and not needed.
-  if (!isa<LoadInst>(I))
-    ReplInst->andIRFlags(I);
-
-  // FIXME: If both the original and replacement value are part of the
-  // same control-flow region (meaning that the execution of one
-  // guarantees the execution of the other), then we can combine the
-  // noalias scopes here and do better than the general conservative
-  // answer used in combineMetadata().
-
-  // In general, GVN unifies expressions over different control-flow
-  // regions, and so we need a conservative combination of the noalias
-  // scopes.
-  static const unsigned KnownIDs[] = {
-      LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
-      LLVMContext::MD_noalias,        LLVMContext::MD_range,
-      LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group};
-  combineMetadata(ReplInst, I, KnownIDs);
-}
-
 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
   patchReplacementInstruction(I, Repl);
   I->replaceAllUsesWith(Repl);
@@ -3988,7 +3957,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
         convertClassToDFSOrdered(*CC, DFSOrderedSet, UseCounts, ProbablyDead);
 
         // Sort the whole thing.
-        llvm::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
+        llvm::sort(DFSOrderedSet);
         for (auto &VD : DFSOrderedSet) {
           int MemberDFSIn = VD.DFSIn;
           int MemberDFSOut = VD.DFSOut;
@@ -4124,10 +4093,13 @@ bool NewGVN::eliminateInstructions(Function &F) {
           // It's about to be alive again.
           if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader))
             ProbablyDead.erase(cast<Instruction>(DominatingLeader));
-          // Copy instructions, however, are still dead because we use their
-          // operand as the leader.
-          if (LeaderUseCount == 0 && isSSACopy)
-            ProbablyDead.insert(II);
+          // For copy instructions, we use their operand as a leader,
+          // which means we remove a user of the copy and it may become dead.
+          if (isSSACopy) {
+            unsigned &IIUseCount = UseCounts[II];
+            if (--IIUseCount == 0)
+              ProbablyDead.insert(II);
+          }
           ++LeaderUseCount;
           AnythingReplaced = true;
         }
@@ -4151,7 +4123,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
     // If we have possible dead stores to look at, try to eliminate them.
     if (CC->getStoreCount() > 0) {
       convertClassToLoadsAndStores(*CC, PossibleDeadStores);
-      llvm::sort(PossibleDeadStores.begin(), PossibleDeadStores.end());
+      llvm::sort(PossibleDeadStores);
       ValueDFSStack EliminationStack;
       for (auto &VD : PossibleDeadStores) {
         int MemberDFSIn = VD.DFSIn;
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 1748815c5941..05ea9144f66c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
@@ -24,6 +25,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "partially-inline-libcalls"
 
+DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
+              "Controls transformations in partially-inline-libcalls");
 
 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
                          BasicBlock &CurrBB, Function::iterator &BB,
@@ -33,6 +36,9 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
   if (Call->onlyReadsMemory())
     return false;
 
+  if (!DebugCounter::shouldExecute(PILCounter))
+    return false;
+
   // Do the following transformation:
   //
   // (before)
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 8f30bccf48f1..fd2eb85fd7bf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -105,7 +105,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
 
   /// The output of the pass - gives a list of each backedge (described by
   /// pointing at the branch) which need a poll inserted.
-  std::vector<TerminatorInst *> PollLocations;
+  std::vector<Instruction *> PollLocations;
 
   /// True unless we're running spp-no-calls in which case we need to disable
   /// the call-dependent placement opts.
@@ -348,7 +348,7 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
     // Safepoint insertion would involve creating a new basic block (as the
     // target of the current backedge) which does the safepoint (of all live
     // variables) and branches to the true header
-    TerminatorInst *Term = Pred->getTerminator();
+    Instruction *Term = Pred->getTerminator();
 
     LLVM_DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term);
 
@@ -524,7 +524,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
     };
     // We need the order of list to be stable so that naming ends up stable
     // when we split edges.  This makes test cases much easier to write.
-    llvm::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);
+    llvm::sort(PollLocations, OrderByBBName);
 
     // We can sometimes end up with duplicate poll locations.  This happens if
     // a single loop is visited more than once.   The fact this happens seems
@@ -535,7 +535,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
 
     // Insert a poll at each point the analysis pass identified
     // The poll location must be the terminator of a loop latch block.
-    for (TerminatorInst *Term : PollLocations) {
+    for (Instruction *Term : PollLocations) {
       // We are inserting a poll, the function is modified
       Modified = true;
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 1df0a9c49fb1..cb893eab1654 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -63,6 +63,7 @@
 
 using namespace llvm;
 using namespace reassociate;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "reassociate"
 
@@ -125,10 +126,10 @@ XorOpnd::XorOpnd(Value *V) {
     Value *V0 = I->getOperand(0);
     Value *V1 = I->getOperand(1);
     const APInt *C;
-    if (match(V0, PatternMatch::m_APInt(C)))
+    if (match(V0, m_APInt(C)))
       std::swap(V0, V1);
 
-    if (match(V1, PatternMatch::m_APInt(C))) {
+    if (match(V1, m_APInt(C))) {
       ConstPart = *C;
       SymbolicPart = V0;
       isOr = (I->getOpcode() == Instruction::Or);
@@ -204,10 +205,10 @@ unsigned ReassociatePass::getRank(Value *V) {
   for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i)
     Rank = std::max(Rank, getRank(I->getOperand(i)));
 
-  // If this is a not or neg instruction, do not count it for rank.  This
+  // If this is a 'not' or 'neg' instruction, do not count it for rank. This
   // assures us that X and ~X will have the same rank.
-  if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
-      !BinaryOperator::isFNeg(I))
+  if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) &&
+      !match(I, m_FNeg(m_Value())))
     ++Rank;
 
   LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank
@@ -573,8 +574,8 @@ static bool LinearizeExprTree(BinaryOperator *I,
       // If this is a multiply expression, turn any internal negations into
       // multiplies by -1 so they can be reassociated.
       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
-        if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO)) ||
-            (Opcode == Instruction::FMul && BinaryOperator::isFNeg(BO))) {
+        if ((Opcode == Instruction::Mul && match(BO, m_Neg(m_Value()))) ||
+            (Opcode == Instruction::FMul && match(BO, m_FNeg(m_Value())))) {
           LLVM_DEBUG(dbgs()
                      << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
           BO = LowerNegateToMultiply(BO);
@@ -788,13 +789,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
       // Discard any debug info related to the expressions that has changed (we
       // can leave debug infor related to the root, since the result of the
       // expression tree should be the same even after reassociation).
-      SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
-      findDbgUsers(DbgUsers, ExpressionChanged);
-      for (auto *DII : DbgUsers) {
-        Value *Undef = UndefValue::get(ExpressionChanged->getType());
-        DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
-                                                ValueAsMetadata::get(Undef)));
-      }
+      replaceDbgUsesWithUndef(ExpressionChanged);
 
       ExpressionChanged->moveBefore(I);
       ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
@@ -854,7 +849,7 @@ static Value *NegateValue(Value *V, Instruction *BI,
   // Okay, we need to materialize a negated version of V with an instruction.
   // Scan the use lists of V to see if we have one already.
   for (User *U : V->users()) {
-    if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U))
+    if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value())))
       continue;
 
     // We found one!  Now we have to make sure that the definition dominates
@@ -899,7 +894,7 @@ static Value *NegateValue(Value *V, Instruction *BI,
 /// Return true if we should break up this subtract of X-Y into (X + -Y).
 static bool ShouldBreakUpSubtract(Instruction *Sub) {
   // If this is a negation, we can't split it up!
-  if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
+  if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value()))) 
     return false;
 
   // Don't breakup X - undef.
@@ -1113,8 +1108,8 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
     // First, check for X and ~X in the operand list.
     assert(i < Ops.size());
-    if (BinaryOperator::isNot(Ops[i].Op)) {    // Cannot occur for ^.
-      Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+    Value *X;
+    if (match(Ops[i].Op, m_Not(m_Value(X)))) {    // Cannot occur for ^.
       unsigned FoundX = FindInOperandList(Ops, i, X);
       if (FoundX != i) {
         if (Opcode == Instruction::And)   // ...&X&~X = 0
@@ -1304,7 +1299,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
     Value *V = Ops[i].Op;
     const APInt *C;
     // TODO: Support non-splat vectors.
-    if (match(V, PatternMatch::m_APInt(C))) {
+    if (match(V, m_APInt(C))) {
       ConstOpnd ^= *C;
     } else {
       XorOpnd O(V);
@@ -1460,27 +1455,22 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
     }
 
     // Check for X and -X or X and ~X in the operand list.
-    if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isFNeg(TheOp) &&
-        !BinaryOperator::isNot(TheOp))
+    Value *X;
+    if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) &&
+        !match(TheOp, m_FNeg(m_Value(X))))
       continue;
 
-    Value *X = nullptr;
-    if (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp))
-      X = BinaryOperator::getNegArgument(TheOp);
-    else if (BinaryOperator::isNot(TheOp))
-      X = BinaryOperator::getNotArgument(TheOp);
-
     unsigned FoundX = FindInOperandList(Ops, i, X);
     if (FoundX == i)
       continue;
 
     // Remove X and -X from the operand list.
     if (Ops.size() == 2 &&
-        (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp)))
+        (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value()))))
       return Constant::getNullValue(X->getType());
 
     // Remove X and ~X from the operand list.
-    if (Ops.size() == 2 && BinaryOperator::isNot(TheOp))
+    if (Ops.size() == 2 && match(TheOp, m_Not(m_Value())))
       return Constant::getAllOnesValue(X->getType());
 
     Ops.erase(Ops.begin()+i);
@@ -1494,7 +1484,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
     e -= 2;  // Removed two elements.
 
     // if X and ~X we append -1 to the operand list.
-    if (BinaryOperator::isNot(TheOp)) {
+    if (match(TheOp, m_Not(m_Value()))) {
       Value *V = Constant::getAllOnesValue(X->getType());
       Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
       e += 1;
@@ -2058,7 +2048,7 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
       RedoInsts.insert(I);
       MadeChange = true;
       I = NI;
-    } else if (BinaryOperator::isNeg(I)) {
+    } else if (match(I, m_Neg(m_Value()))) {
       // Otherwise, this is a negation.  See if the operand is a multiply tree
       // and if this is not an inner node of a multiply tree.
       if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
@@ -2082,7 +2072,7 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
       RedoInsts.insert(I);
       MadeChange = true;
       I = NI;
-    } else if (BinaryOperator::isFNeg(I)) {
+    } else if (match(I, m_FNeg(m_Value()))) {
       // Otherwise, this is a negation.  See if the operand is a multiply tree
       // and if this is not an inner node of a multiply tree.
       if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 0de2bc72b522..42d7ed5bc534 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -28,7 +28,6 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -38,6 +37,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -65,6 +65,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
 #include <cassert>
@@ -346,7 +347,7 @@ static bool containsGCPtrType(Type *Ty) {
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
     return containsGCPtrType(AT->getElementType());
   if (StructType *ST = dyn_cast<StructType>(Ty))
-    return llvm::any_of(ST->subtypes(), containsGCPtrType);
+    return llvm::any_of(ST->elements(), containsGCPtrType);
   return false;
 }
 
@@ -1824,7 +1825,7 @@ static void relocationViaAlloca(
       }
     }
 
-    llvm::sort(Uses.begin(), Uses.end());
+    llvm::sort(Uses);
     auto Last = std::unique(Uses.begin(), Uses.end());
     Uses.erase(Last, Uses.end());
 
@@ -1850,13 +1851,13 @@ static void relocationViaAlloca(
     StoreInst *Store = new StoreInst(Def, Alloca);
     if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
       if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
-        // InvokeInst is a TerminatorInst so the store need to be inserted
-        // into its normal destination block.
+        // InvokeInst is a terminator so the store need to be inserted into its
+        // normal destination block.
         BasicBlock *NormalDest = Invoke->getNormalDest();
         Store->insertBefore(NormalDest->getFirstNonPHI());
       } else {
         assert(!Inst->isTerminator() &&
-               "The only TerminatorInst that can produce a value is "
+               "The only terminator that can produce a value is "
                "InvokeInst which is handled above.");
         Store->insertAfter(Inst);
       }
@@ -2534,9 +2535,10 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
   // Delete any unreachable statepoints so that we don't have unrewritten
   // statepoints surviving this pass.  This makes testing easier and the
   // resulting IR less confusing to human readers.
-  DeferredDominance DD(DT);
-  bool MadeChange = removeUnreachableBlocks(F, nullptr, &DD);
-  DD.flush();
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+  bool MadeChange = removeUnreachableBlocks(F, nullptr, &DTU);
+  // Flush the Dominator Tree.
+  DTU.getDomTree();
 
   // Gather all the statepoints which need rewritten.  Be careful to only
   // consider those in reachable code since we need to ask dominance queries
@@ -2582,7 +2584,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
   // increase the liveset of any statepoint we move over.  This is profitable
   // as long as all statepoints are in rare blocks.  If we had in-register
   // lowering for live values this would be a much safer transform.
-  auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
+  auto getConditionInst = [](Instruction *TI) -> Instruction * {
     if (auto *BI = dyn_cast<BranchInst>(TI))
       if (BI->isConditional())
         return dyn_cast<Instruction>(BI->getCondition());
@@ -2590,7 +2592,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
     return nullptr;
   };
   for (BasicBlock &BB : F) {
-    TerminatorInst *TI = BB.getTerminator();
+    Instruction *TI = BB.getTerminator();
     if (auto *Cond = getConditionInst(TI))
       // TODO: Handle more than just ICmps here.  We should be able to move
       // most instructions without side effects or memory access.
@@ -2673,7 +2675,7 @@ static SetVector<Value *> computeKillSet(BasicBlock *BB) {
 /// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
 /// sanity check for the liveness computation.
 static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
-                          TerminatorInst *TI, bool TermOkay = false) {
+                          Instruction *TI, bool TermOkay = false) {
   for (Value *V : Live) {
     if (auto *I = dyn_cast<Instruction>(V)) {
       // The terminator can be a member of the LiveOut set.  LLVM's definition
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 5e3ddeda2d49..2f6ed05c023b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -55,6 +55,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
 #include <cassert>
 #include <utility>
 #include <vector>
@@ -246,7 +247,27 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   using Edge = std::pair<BasicBlock *, BasicBlock *>;
   DenseSet<Edge> KnownFeasibleEdges;
 
+  DenseMap<Function *, AnalysisResultsForFn> AnalysisResults;
+  DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers;
+
 public:
+  void addAnalysis(Function &F, AnalysisResultsForFn A) {
+    AnalysisResults.insert({&F, std::move(A)});
+  }
+
+  const PredicateBase *getPredicateInfoFor(Instruction *I) {
+    auto A = AnalysisResults.find(I->getParent()->getParent());
+    if (A == AnalysisResults.end())
+      return nullptr;
+    return A->second.PredInfo->getPredicateInfoFor(I);
+  }
+
+  DomTreeUpdater getDTU(Function &F) {
+    auto A = AnalysisResults.find(&F);
+    assert(A != AnalysisResults.end() && "Need analysis results for function.");
+    return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
+  }
+
   SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli)
       : DL(DL), TLI(tli) {}
 
@@ -548,7 +569,7 @@ private:
 
   // getFeasibleSuccessors - Return a vector of booleans to indicate which
   // successors are reachable from a given terminator instruction.
-  void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs);
+  void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs);
 
   // OperandChangedState - This method is invoked on all of the users of an
   // instruction that was just changed state somehow.  Based on this
@@ -558,6 +579,26 @@ private:
       visit(*I);
   }
 
+  // Add U as additional user of V.
+  void addAdditionalUser(Value *V, User *U) {
+    auto Iter = AdditionalUsers.insert({V, {}});
+    Iter.first->second.insert(U);
+  }
+
+  // Mark I's users as changed, including AdditionalUsers.
+  void markUsersAsChanged(Value *I) {
+    for (User *U : I->users())
+      if (auto *UI = dyn_cast<Instruction>(U))
+        OperandChangedState(UI);
+
+    auto Iter = AdditionalUsers.find(I);
+    if (Iter != AdditionalUsers.end()) {
+      for (User *U : Iter->second)
+        if (auto *UI = dyn_cast<Instruction>(U))
+          OperandChangedState(UI);
+    }
+  }
+
 private:
   friend class InstVisitor<SCCPSolver>;
 
@@ -569,7 +610,7 @@ private:
   // Terminators
 
   void visitReturnInst(ReturnInst &I);
-  void visitTerminatorInst(TerminatorInst &TI);
+  void visitTerminator(Instruction &TI);
 
   void visitCastInst(CastInst &I);
   void visitSelectInst(SelectInst &I);
@@ -580,7 +621,7 @@ private:
 
   void visitCatchSwitchInst(CatchSwitchInst &CPI) {
     markOverdefined(&CPI);
-    visitTerminatorInst(CPI);
+    visitTerminator(CPI);
   }
 
   // Instructions that cannot be folded away.
@@ -595,12 +636,12 @@ private:
 
   void visitInvokeInst    (InvokeInst &II) {
     visitCallSite(&II);
-    visitTerminatorInst(II);
+    visitTerminator(II);
   }
 
   void visitCallSite      (CallSite CS);
-  void visitResumeInst    (TerminatorInst &I) { /*returns void*/ }
-  void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+  void visitResumeInst    (ResumeInst &I) { /*returns void*/ }
+  void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ }
   void visitFenceInst     (FenceInst &I) { /*returns void*/ }
 
   void visitInstruction(Instruction &I) {
@@ -615,7 +656,7 @@ private:
 
 // getFeasibleSuccessors - Return a vector of booleans to indicate which
 // successors are reachable from a given terminator instruction.
-void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
                                        SmallVectorImpl<bool> &Succs) {
   Succs.resize(TI.getNumSuccessors());
   if (auto *BI = dyn_cast<BranchInst>(&TI)) {
@@ -640,7 +681,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
   }
 
   // Unwinding instructions successors are always executable.
-  if (TI.isExceptional()) {
+  if (TI.isExceptionalTerminator()) {
     Succs.assign(TI.getNumSuccessors(), true);
     return;
   }
@@ -802,7 +843,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
   }
 }
 
-void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+void SCCPSolver::visitTerminator(Instruction &TI) {
   SmallVector<bool, 16> SuccFeasible;
   getFeasibleSuccessors(TI, SuccFeasible);
 
@@ -982,8 +1023,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
 
 // Handle ICmpInst instruction.
 void SCCPSolver::visitCmpInst(CmpInst &I) {
-  LatticeVal &IV = ValueState[&I];
-  if (IV.isOverdefined()) return;
+  // Do not cache this lookup, getValueState calls later in the function might
+  // invalidate the reference.
+  if (ValueState[&I].isOverdefined()) return;
 
   Value *Op1 = I.getOperand(0);
   Value *Op2 = I.getOperand(1);
@@ -1011,7 +1053,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
   }
 
   // If operands are still unknown, wait for it to resolve.
-  if (!V1State.isOverdefined() && !V2State.isOverdefined() && !IV.isConstant())
+  if (!V1State.isOverdefined() && !V2State.isOverdefined() &&
+      !ValueState[&I].isConstant())
     return;
 
   markOverdefined(&I);
@@ -1119,6 +1162,65 @@ void SCCPSolver::visitCallSite(CallSite CS) {
   Function *F = CS.getCalledFunction();
   Instruction *I = CS.getInstruction();
 
+  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+    if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
+      if (ValueState[I].isOverdefined())
+        return;
+
+      auto *PI = getPredicateInfoFor(I);
+      if (!PI)
+        return;
+
+      Value *CopyOf = I->getOperand(0);
+      auto *PBranch = dyn_cast<PredicateBranch>(PI);
+      if (!PBranch) {
+        mergeInValue(ValueState[I], I, getValueState(CopyOf));
+        return;
+      }
+
+      Value *Cond = PBranch->Condition;
+
+      // Everything below relies on the condition being a comparison.
+      auto *Cmp = dyn_cast<CmpInst>(Cond);
+      if (!Cmp) {
+        mergeInValue(ValueState[I], I, getValueState(CopyOf));
+        return;
+      }
+
+      Value *CmpOp0 = Cmp->getOperand(0);
+      Value *CmpOp1 = Cmp->getOperand(1);
+      if (CopyOf != CmpOp0 && CopyOf != CmpOp1) {
+        mergeInValue(ValueState[I], I, getValueState(CopyOf));
+        return;
+      }
+
+      if (CmpOp0 != CopyOf)
+        std::swap(CmpOp0, CmpOp1);
+
+      LatticeVal OriginalVal = getValueState(CopyOf);
+      LatticeVal EqVal = getValueState(CmpOp1);
+      LatticeVal &IV = ValueState[I];
+      if (PBranch->TrueEdge && Cmp->getPredicate() == CmpInst::ICMP_EQ) {
+        addAdditionalUser(CmpOp1, I);
+        if (OriginalVal.isConstant())
+          mergeInValue(IV, I, OriginalVal);
+        else
+          mergeInValue(IV, I, EqVal);
+        return;
+      }
+      if (!PBranch->TrueEdge && Cmp->getPredicate() == CmpInst::ICMP_NE) {
+        addAdditionalUser(CmpOp1, I);
+        if (OriginalVal.isConstant())
+          mergeInValue(IV, I, OriginalVal);
+        else
+          mergeInValue(IV, I, EqVal);
+        return;
+      }
+
+      return (void)mergeInValue(IV, I, getValueState(CopyOf));
+    }
+  }
+
   // The common case is that we aren't tracking the callee, either because we
   // are not doing interprocedural analysis or the callee is indirect, or is
   // external.  Handle these cases first.
@@ -1134,6 +1236,8 @@ CallOverdefined:
       SmallVector<Constant*, 8> Operands;
       for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
            AI != E; ++AI) {
+        if (AI->get()->getType()->isStructTy())
+          return markOverdefined(I); // Can't handle struct args.
         LatticeVal State = getValueState(*AI);
 
         if (State.isUnknown())
@@ -1238,9 +1342,7 @@ void SCCPSolver::Solve() {
       // since all of its users will have already been marked as overdefined
       // Update all of the users of this instruction's value.
       //
-      for (User *U : I->users())
-        if (auto *UI = dyn_cast<Instruction>(U))
-          OperandChangedState(UI);
+      markUsersAsChanged(I);
     }
 
     // Process the instruction work list.
@@ -1257,9 +1359,7 @@ void SCCPSolver::Solve() {
       // Update all of the users of this instruction's value.
       //
       if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
-        for (User *U : I->users())
-          if (auto *UI = dyn_cast<Instruction>(U))
-            OperandChangedState(UI);
+        markUsersAsChanged(I);
     }
 
     // Process the basic block work list.
@@ -1522,7 +1622,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     // Check to see if we have a branch or switch on an undefined value.  If so
     // we force the branch to go one way or the other to make the successor
     // values live.  It doesn't really matter which way we force it.
-    TerminatorInst *TI = BB.getTerminator();
+    Instruction *TI = BB.getTerminator();
     if (auto *BI = dyn_cast<BranchInst>(TI)) {
       if (!BI->isConditional()) continue;
       if (!getValueState(BI->getCondition()).isUnknown())
@@ -1694,7 +1794,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
     // constants if we have found them to be of constant values.
     for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
       Instruction *Inst = &*BI++;
-      if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+      if (Inst->getType()->isVoidTy() || Inst->isTerminator())
         continue;
 
       if (tryToReplaceWithConstant(Solver, Inst)) {
@@ -1798,8 +1898,44 @@ static void findReturnsToZap(Function &F,
   }
 }
 
-bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
-                     const TargetLibraryInfo *TLI) {
+// Update the condition for terminators that are branching on indeterminate
+// values, forcing them to use a specific edge.
+static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) {
+  BasicBlock *Dest = nullptr;
+  Constant *C = nullptr;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+    if (!isa<ConstantInt>(SI->getCondition())) {
+      // Indeterminate switch; use first case value.
+      Dest = SI->case_begin()->getCaseSuccessor();
+      C = SI->case_begin()->getCaseValue();
+    }
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+    if (!isa<ConstantInt>(BI->getCondition())) {
+      // Indeterminate branch; use false.
+      Dest = BI->getSuccessor(1);
+      C = ConstantInt::getFalse(BI->getContext());
+    }
+  } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(I)) {
+    if (!isa<BlockAddress>(IBR->getAddress()->stripPointerCasts())) {
+      // Indeterminate indirectbr; use successor 0.
+      Dest = IBR->getSuccessor(0);
+      C = BlockAddress::get(IBR->getSuccessor(0));
+    }
+  } else {
+    llvm_unreachable("Unexpected terminator instruction");
+  }
+  if (C) {
+    assert(Solver.isEdgeFeasible(I->getParent(), Dest) &&
+           "Didn't find feasible edge?");
+    (void)Dest;
+
+    I->setOperand(0, C);
+  }
+}
+
+bool llvm::runIPSCCP(
+    Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI,
+    function_ref<AnalysisResultsForFn(Function &)> getAnalysis) {
   SCCPSolver Solver(DL, TLI);
 
   // Loop over all functions, marking arguments to those with their addresses
@@ -1808,6 +1944,8 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
     if (F.isDeclaration())
       continue;
 
+    Solver.addAnalysis(F, getAnalysis(F));
+
     // Determine if we can track the function's return values. If so, add the
     // function to the solver's set of return-tracked functions.
     if (canTrackReturnsInterprocedurally(&F))
@@ -1856,12 +1994,13 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
 
   // Iterate over all of the instructions in the module, replacing them with
   // constants if we have found them to be of constant values.
-  SmallVector<BasicBlock*, 512> BlocksToErase;
 
   for (Function &F : M) {
     if (F.isDeclaration())
       continue;
 
+    SmallVector<BasicBlock *, 512> BlocksToErase;
+
     if (Solver.isBlockExecutable(&F.front()))
       for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
            ++AI) {
@@ -1897,23 +2036,26 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
       }
     }
 
-    // Change dead blocks to unreachable. We do it after replacing constants in
-    // all executable blocks, because changeToUnreachable may remove PHI nodes
-    // in executable blocks we found values for. The function's entry block is
-    // not part of BlocksToErase, so we have to handle it separately.
-    for (BasicBlock *BB : BlocksToErase)
+    DomTreeUpdater DTU = Solver.getDTU(F);
+    // Change dead blocks to unreachable. We do it after replacing constants
+    // in all executable blocks, because changeToUnreachable may remove PHI
+    // nodes in executable blocks we found values for. The function's entry
+    // block is not part of BlocksToErase, so we have to handle it separately.
+    for (BasicBlock *BB : BlocksToErase) {
       NumInstRemoved +=
-          changeToUnreachable(BB->getFirstNonPHI(), /*UseLLVMTrap=*/false);
+          changeToUnreachable(BB->getFirstNonPHI(), /*UseLLVMTrap=*/false,
+                              /*PreserveLCSSA=*/false, &DTU);
+    }
     if (!Solver.isBlockExecutable(&F.front()))
       NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(),
-                                            /*UseLLVMTrap=*/false);
+                                            /*UseLLVMTrap=*/false,
+                                            /*PreserveLCSSA=*/false, &DTU);
 
-    // Now that all instructions in the function are constant folded, erase dead
-    // blocks, because we can now use ConstantFoldTerminator to get rid of
-    // in-edges.
-    for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+    // Now that all instructions in the function are constant folded,
+    // use ConstantFoldTerminator to get rid of in-edges, record DT updates and
+    // delete dead BBs.
+    for (BasicBlock *DeadBB : BlocksToErase) {
       // If there are any PHI nodes in this successor, drop entries for BB now.
-      BasicBlock *DeadBB = BlocksToErase[i];
       for (Value::user_iterator UI = DeadBB->user_begin(),
                                 UE = DeadBB->user_end();
            UI != UE;) {
@@ -1925,41 +2067,34 @@ bool llvm::runIPSCCP(Module &M, const DataLayout &DL,
         // Ignore blockaddress users; BasicBlock's dtor will handle them.
         if (!I) continue;
 
-        bool Folded = ConstantFoldTerminator(I->getParent());
-        if (!Folded) {
-          // If the branch can't be folded, we must have forced an edge
-          // for an indeterminate value. Force the terminator to fold
-          // to that edge.
-          Constant *C;
-          BasicBlock *Dest;
-          if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
-            Dest = SI->case_begin()->getCaseSuccessor();
-            C = SI->case_begin()->getCaseValue();
-          } else if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-            Dest = BI->getSuccessor(1);
-            C = ConstantInt::getFalse(BI->getContext());
-          } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(I)) {
-            Dest = IBR->getSuccessor(0);
-            C = BlockAddress::get(IBR->getSuccessor(0));
-          } else {
-            llvm_unreachable("Unexpected terminator instruction");
-          }
-          assert(Solver.isEdgeFeasible(I->getParent(), Dest) &&
-                 "Didn't find feasible edge?");
-          (void)Dest;
-
-          I->setOperand(0, C);
-          Folded = ConstantFoldTerminator(I->getParent());
-        }
+        // If we have forced an edge for an indeterminate value, then force the
+        // terminator to fold to that edge.
+        forceIndeterminateEdge(I, Solver);
+        bool Folded = ConstantFoldTerminator(I->getParent(),
+                                             /*DeleteDeadConditions=*/false,
+                                             /*TLI=*/nullptr, &DTU);
         assert(Folded &&
               "Expect TermInst on constantint or blockaddress to be folded");
         (void) Folded;
       }
+      // Mark dead BB for deletion.
+      DTU.deleteBB(DeadBB);
+    }
 
-      // Finally, delete the basic block.
-      F.getBasicBlockList().erase(DeadBB);
+    for (BasicBlock &BB : F) {
+      for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
+        Instruction *Inst = &*BI++;
+        if (Solver.getPredicateInfoFor(Inst)) {
+          if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+            if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
+              Value *Op = II->getOperand(0);
+              Inst->replaceAllUsesWith(Op);
+              Inst->eraseFromParent();
+            }
+          }
+        }
+      }
     }
-    BlocksToErase.clear();
   }
 
   // If we inferred constant or undef return values for a function, we replaced
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index bf482bf5272e..eab77cf4cda9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -913,8 +913,7 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
-    if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
-        II.getIntrinsicID() == Intrinsic::lifetime_end) {
+    if (II.isLifetimeStartOrEnd()) {
       ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
       uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
                                Length->getLimitedValue());
@@ -1060,7 +1059,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
 
   // Sort the uses. This arranges for the offsets to be in ascending order,
   // and the sizes to be in descending order.
-  llvm::sort(Slices.begin(), Slices.end());
+  llvm::sort(Slices);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1211,7 +1210,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
   // predecessor blocks. The only thing to watch out for is that we can't put
   // a possibly trapping load in the predecessor if it is a critical edge.
   for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
-    TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+    Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
     Value *InVal = PN.getIncomingValue(Idx);
 
     // If the value is produced by the terminator of the predecessor (an
@@ -1275,7 +1274,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
       continue;
     }
 
-    TerminatorInst *TI = Pred->getTerminator();
+    Instruction *TI = Pred->getTerminator();
     IRBuilderTy PredBuilder(TI);
 
     LoadInst *Load = PredBuilder.CreateLoad(
@@ -1400,8 +1399,8 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
   if (Ty == TargetTy)
     return buildGEP(IRB, BasePtr, Indices, NamePrefix);
 
-  // Pointer size to use for the indices.
-  unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
+  // Offset size to use for the indices.
+  unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
 
   // See if we can descend into a struct and locate a field with the correct
   // type.
@@ -1413,7 +1412,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
 
     if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
       ElementTy = ArrayTy->getElementType();
-      Indices.push_back(IRB.getIntN(PtrSize, 0));
+      Indices.push_back(IRB.getIntN(OffsetSize, 0));
     } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
       ElementTy = VectorTy->getElementType();
       Indices.push_back(IRB.getInt32(0));
@@ -1807,8 +1806,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
     if (!S.isSplittable())
       return false; // Skip any unsplittable intrinsics.
   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
-    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-        II->getIntrinsicID() != Intrinsic::lifetime_end)
+    if (!II->isLifetimeStartOrEnd())
       return false;
   } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
     // Disable vector promotion when there are loads or stores of an FCA.
@@ -1906,7 +1904,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
              "All non-integer types eliminated!");
       return RHSTy->getNumElements() < LHSTy->getNumElements();
     };
-    llvm::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
+    llvm::sort(CandidateTys, RankVectorTypes);
     CandidateTys.erase(
         std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
         CandidateTys.end());
@@ -2029,8 +2027,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
     if (!S.isSplittable())
       return false; // Skip any unsplittable intrinsics.
   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
-    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-        II->getIntrinsicID() != Intrinsic::lifetime_end)
+    if (!II->isLifetimeStartOrEnd())
       return false;
   } else {
     return false;
@@ -2377,7 +2374,7 @@ private:
 #endif
 
     return getAdjustedPtr(IRB, DL, &NewAI,
-                          APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset),
+                          APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
                           PointerTy,
 #ifndef NDEBUG
                           Twine(OldName) + "."
@@ -2593,7 +2590,8 @@ private:
     }
     V = convertValue(DL, IRB, V, NewAllocaTy);
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-    Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+    Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+                             LLVMContext::MD_access_group});
     if (AATags)
       Store->setAAMetadata(AATags);
     Pass.DeadInsts.insert(&SI);
@@ -2662,7 +2660,8 @@ private:
       NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
                                      SI.isVolatile());
     }
-    NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+    NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+                             LLVMContext::MD_access_group});
     if (AATags)
       NewSI->setAAMetadata(AATags);
     if (SI.isVolatile())
@@ -2899,8 +2898,8 @@ private:
     unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
 
     // Compute the relative offset for the other pointer within the transfer.
-    unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
-    APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
+    unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
+    APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
     unsigned OtherAlign =
       IsDest ? II.getSourceAlignment() : II.getDestAlignment();
     OtherAlign =  MinAlign(OtherAlign ? OtherAlign : 1,
@@ -3011,8 +3010,7 @@ private:
   }
 
   bool visitIntrinsicInst(IntrinsicInst &II) {
-    assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
-           II.getIntrinsicID() == Intrinsic::lifetime_end);
+    assert(II.isLifetimeStartOrEnd());
     LLVM_DEBUG(dbgs() << "    original: " << II << "\n");
     assert(II.getArgOperand(1) == OldPtr);
 
@@ -3164,7 +3162,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
   /// value (as opposed to the user).
   Use *U;
 
+  /// Used to calculate offsets, and hence alignment, of subobjects.
+  const DataLayout &DL;
+
 public:
+  AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
+
   /// Rewrite loads and stores through a pointer and all pointers derived from
   /// it.
   bool rewrite(Instruction &I) {
@@ -3208,10 +3211,22 @@ private:
     /// split operations.
     Value *Ptr;
 
+    /// The base pointee type being GEPed into.
+    Type *BaseTy;
+
+    /// Known alignment of the base pointer.
+    unsigned BaseAlign;
+
+    /// To calculate offset of each component so we can correctly deduce
+    /// alignments.
+    const DataLayout &DL;
+
     /// Initialize the splitter with an insertion point, Ptr and start with a
     /// single zero GEP index.
-    OpSplitter(Instruction *InsertionPoint, Value *Ptr)
-        : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
+    OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+               unsigned BaseAlign, const DataLayout &DL)
+        : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
+          BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
 
   public:
     /// Generic recursive split emission routine.
@@ -3228,8 +3243,11 @@ private:
     /// \param Agg The aggregate value being built up or stored, depending on
     /// whether this is splitting a load or a store respectively.
     void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
-      if (Ty->isSingleValueType())
-        return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
+      if (Ty->isSingleValueType()) {
+        unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
+        return static_cast<Derived *>(this)->emitFunc(
+            Ty, Agg, MinAlign(BaseAlign, Offset), Name);
+      }
 
       if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
         unsigned OldSize = Indices.size();
@@ -3268,17 +3286,19 @@ private:
   struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
     AAMDNodes AATags;
 
-    LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
-        : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+    LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+                   AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+        : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
+                                     DL), AATags(AATags) {}
 
     /// Emit a leaf load of a single value. This is called at the leaves of the
     /// recursive emission to actually load values.
-    void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+    void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
       Value *GEP =
           IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
-      LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load");
+      LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load");
       if (AATags)
         Load->setAAMetadata(AATags);
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3295,7 +3315,8 @@ private:
     LLVM_DEBUG(dbgs() << "    original: " << LI << "\n");
     AAMDNodes AATags;
     LI.getAAMetadata(AATags);
-    LoadOpSplitter Splitter(&LI, *U, AATags);
+    LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+                            getAdjustedAlignment(&LI, 0, DL), DL);
     Value *V = UndefValue::get(LI.getType());
     Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
     LI.replaceAllUsesWith(V);
@@ -3304,13 +3325,15 @@ private:
   }
 
   struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
-    StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
-        : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+    StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+                    AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+        : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
+                                      DL),
+          AATags(AATags) {}
     AAMDNodes AATags;
-
     /// Emit a leaf store of a single value. This is called at the leaves of the
     /// recursive emission to actually produce stores.
-    void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+    void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Extract the single value and store it using the indices.
       //
@@ -3320,7 +3343,8 @@ private:
           IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
       Value *InBoundsGEP =
           IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
-      StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
+      StoreInst *Store =
+          IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
       if (AATags)
         Store->setAAMetadata(AATags);
       LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
@@ -3338,7 +3362,8 @@ private:
     LLVM_DEBUG(dbgs() << "    original: " << SI << "\n");
     AAMDNodes AATags;
     SI.getAAMetadata(AATags);
-    StoreOpSplitter Splitter(&SI, *U, AATags);
+    StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+                             getAdjustedAlignment(&SI, 0, DL), DL);
     Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
     SI.eraseFromParent();
     return true;
@@ -3772,7 +3797,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                          PartPtrTy, BasePtr->getName() + "."),
           getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
           LI->getName());
-      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+      PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+                                LLVMContext::MD_access_group});
 
       // Append this load onto the list of split loads so we can find it later
       // to rewrite the stores.
@@ -3828,7 +3854,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                            APInt(DL.getIndexSizeInBits(AS), PartOffset),
                            PartPtrTy, StoreBasePtr->getName() + "."),
             getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
-        PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+        PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+                                   LLVMContext::MD_access_group});
         LLVM_DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
       }
 
@@ -4221,7 +4248,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
   }
 
   if (!IsSorted)
-    llvm::sort(AS.begin(), AS.end());
+    llvm::sort(AS);
 
   /// Describes the allocas introduced by rewritePartition in order to migrate
   /// the debug info.
@@ -4254,7 +4281,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
 
   // Migrate debug information from the old alloca to the new alloca(s)
   // and the individual partitions.
-  TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
+  TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
   if (!DbgDeclares.empty()) {
     auto *Var = DbgDeclares.front()->getVariable();
     auto *Expr = DbgDeclares.front()->getExpression();
@@ -4306,7 +4333,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
       }
 
       // Remove any existing intrinsics describing the same alloca.
-      for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
+      for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
         OldDII->eraseFromParent();
 
       DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
@@ -4356,7 +4383,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
 
   // First, split any FCA loads and stores touching this alloca to promote
   // better splitting and promotion opportunities.
-  AggLoadStoreRewriter AggRewriter;
+  AggLoadStoreRewriter AggRewriter(DL);
   Changed |= AggRewriter.rewrite(AI);
 
   // Build the slices using a recursive instruction-visiting builder.
@@ -4421,7 +4448,7 @@ bool SROA::deleteDeadInstructions(
     // not be able to find it.
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
       DeletedAllocas.insert(AI);
-      for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(AI))
+      for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
         OldDII->eraseFromParent();
     }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index 526487d3477e..976daf4c78c2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -25,7 +25,9 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 
 using namespace llvm;
 
@@ -42,7 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeDCELegacyPassPass(Registry);
   initializeDeadInstEliminationPass(Registry);
   initializeDivRemPairsLegacyPassPass(Registry);
-  initializeScalarizerPass(Registry);
+  initializeScalarizerLegacyPassPass(Registry);
   initializeDSELegacyPassPass(Registry);
   initializeGuardWideningLegacyPassPass(Registry);
   initializeLoopGuardWideningLegacyPassPass(Registry);
@@ -50,6 +52,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeNewGVNLegacyPassPass(Registry);
   initializeEarlyCSELegacyPassPass(Registry);
   initializeEarlyCSEMemSSALegacyPassPass(Registry);
+  initializeMakeGuardsExplicitLegacyPassPass(Registry);
   initializeGVNHoistLegacyPassPass(Registry);
   initializeGVNSinkLegacyPassPass(Registry);
   initializeFlattenCFGPassPass(Registry);
@@ -72,6 +75,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoopUnrollPass(Registry);
   initializeLoopUnrollAndJamPass(Registry);
   initializeLoopUnswitchPass(Registry);
+  initializeWarnMissedTransformationsLegacyPass(Registry);
   initializeLoopVersioningLICMPass(Registry);
   initializeLoopIdiomRecognizeLegacyPassPass(Registry);
   initializeLowerAtomicLegacyPassPass(Registry);
@@ -194,6 +198,10 @@ void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopUnswitchPass());
 }
 
+void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLowerAtomicPass());
+}
+
 void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createMemCpyOptPass());
 }
@@ -274,3 +282,7 @@ void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
 void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLowerExpectIntrinsicPass());
 }
+
+void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createUnifyFunctionExitNodesPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 34ed126155be..5eb3fdab6d5c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/VectorUtils.h"
@@ -38,6 +39,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Options.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
 #include <cassert>
 #include <cstdint>
 #include <iterator>
@@ -48,6 +50,13 @@ using namespace llvm;
 
 #define DEBUG_TYPE "scalarizer"
 
+// This is disabled by default because having separate loads and stores
+// makes it more likely that the -combiner-alias-analysis limits will be
+// reached.
+static cl::opt<bool>
+    ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
+                       cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+
 namespace {
 
 // Used to store the scattered form of a vector.
@@ -151,17 +160,13 @@ struct VectorLayout {
   uint64_t ElemSize = 0;
 };
 
-class Scalarizer : public FunctionPass,
-                   public InstVisitor<Scalarizer, bool> {
+class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
 public:
-  static char ID;
-
-  Scalarizer() : FunctionPass(ID) {
-    initializeScalarizerPass(*PassRegistry::getPassRegistry());
+  ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
+    : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
   }
 
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
+  bool visit(Function &F);
 
   // InstVisitor methods.  They return true if the instruction was scalarized,
   // false if nothing changed.
@@ -179,16 +184,6 @@ public:
   bool visitStoreInst(StoreInst &SI);
   bool visitCallInst(CallInst &ICI);
 
-  static void registerOptions() {
-    // This is disabled by default because having separate loads and stores
-    // makes it more likely that the -combiner-alias-analysis limits will be
-    // reached.
-    OptionRegistry::registerOption<bool, Scalarizer,
-                                 &Scalarizer::ScalarizeLoadStore>(
-        "scalarize-load-store",
-        "Allow the scalarizer pass to scalarize loads and store", false);
-  }
-
 private:
   Scatterer scatter(Instruction *Point, Value *V);
   void gather(Instruction *Op, const ValueVector &CV);
@@ -204,16 +199,28 @@ private:
 
   ScatterMap Scattered;
   GatherList Gathered;
+
   unsigned ParallelLoopAccessMDKind;
-  bool ScalarizeLoadStore;
 };
 
-} // end anonymous namespace
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+  static char ID;
 
-char Scalarizer::ID = 0;
+  ScalarizerLegacyPass() : FunctionPass(ID) {
+    initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
 
-INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
-                             "Scalarize vector operations", false, false)
+  bool runOnFunction(Function &F) override;
+};
+
+} // end anonymous namespace
+
+char ScalarizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
+                      "Scalarize vector operations", false, false)
+INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
+                    "Scalarize vector operations", false, false)
 
 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
                      ValueVector *cachePtr)
@@ -277,22 +284,31 @@ Value *Scatterer::operator[](unsigned I) {
   return CV[I];
 }
 
-bool Scalarizer::doInitialization(Module &M) {
-  ParallelLoopAccessMDKind =
+bool ScalarizerLegacyPass::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  Module &M = *F.getParent();
+  unsigned ParallelLoopAccessMDKind =
       M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
-  ScalarizeLoadStore =
-      M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
-  return false;
+  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+  return Impl.visit(F);
 }
 
-bool Scalarizer::runOnFunction(Function &F) {
-  if (skipFunction(F))
-    return false;
+FunctionPass *llvm::createScalarizerPass() {
+  return new ScalarizerLegacyPass();
+}
+
+bool ScalarizerVisitor::visit(Function &F) {
   assert(Gathered.empty() && Scattered.empty());
-  for (BasicBlock &BB : F) {
-    for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+
+  // To ensure we replace gathered components correctly we need to do an ordered
+  // traversal of the basic blocks in the function.
+  ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
+  for (BasicBlock *BB : RPOT) {
+    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
       Instruction *I = &*II;
-      bool Done = visit(I);
+      bool Done = InstVisitor::visit(I);
       ++II;
       if (Done && I->getType()->isVoidTy())
         I->eraseFromParent();
@@ -303,7 +319,7 @@ bool Scalarizer::runOnFunction(Function &F) {
 
 // Return a scattered form of V that can be accessed by Point.  V must be a
 // vector or a pointer to a vector.
-Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
+Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
   if (Argument *VArg = dyn_cast<Argument>(V)) {
     // Put the scattered form of arguments in the entry block,
     // so that it can be used everywhere.
@@ -327,7 +343,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
 // deletion of Op and creation of the gathered form to the end of the pass,
 // so that we can avoid creating the gathered form if all uses of Op are
 // replaced with uses of CV.
-void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
   // Since we're not deleting Op yet, stub out its operands, so that it
   // doesn't make anything live unnecessarily.
   for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
@@ -356,19 +372,20 @@ void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
 
 // Return true if it is safe to transfer the given metadata tag from
 // vector to scalar instructions.
-bool Scalarizer::canTransferMetadata(unsigned Tag) {
+bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
   return (Tag == LLVMContext::MD_tbaa
           || Tag == LLVMContext::MD_fpmath
           || Tag == LLVMContext::MD_tbaa_struct
           || Tag == LLVMContext::MD_invariant_load
           || Tag == LLVMContext::MD_alias_scope
           || Tag == LLVMContext::MD_noalias
-          || Tag == ParallelLoopAccessMDKind);
+          || Tag == ParallelLoopAccessMDKind
+          || Tag == LLVMContext::MD_access_group);
 }
 
 // Transfer metadata from Op to the instructions in CV if it is known
 // to be safe to do so.
-void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   Op->getAllMetadataOtherThanDebugLoc(MDs);
   for (unsigned I = 0, E = CV.size(); I != E; ++I) {
@@ -384,7 +401,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
 
 // Try to fill in Layout from Ty, returning true on success.  Alignment is
 // the alignment of the vector, or 0 if the ABI default should be used.
-bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
+bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
                                  VectorLayout &Layout, const DataLayout &DL) {
   // Make sure we're dealing with a vector.
   Layout.VecTy = dyn_cast<VectorType>(Ty);
@@ -408,7 +425,7 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
 // to create an instruction like I with operands X and Y and name Name.
 template<typename Splitter>
-bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
+bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
   VectorType *VT = dyn_cast<VectorType>(I.getType());
   if (!VT)
     return false;
@@ -441,7 +458,7 @@ static Function *getScalarIntrinsicDeclaration(Module *M,
 
 /// If a call to a vector typed intrinsic function, split into a scalar call per
 /// element if possible for the intrinsic.
-bool Scalarizer::splitCall(CallInst &CI) {
+bool ScalarizerVisitor::splitCall(CallInst &CI) {
   VectorType *VT = dyn_cast<VectorType>(CI.getType());
   if (!VT)
     return false;
@@ -499,7 +516,7 @@ bool Scalarizer::splitCall(CallInst &CI) {
   return true;
 }
 
-bool Scalarizer::visitSelectInst(SelectInst &SI) {
+bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
   VectorType *VT = dyn_cast<VectorType>(SI.getType());
   if (!VT)
     return false;
@@ -529,19 +546,19 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
   return true;
 }
 
-bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
+bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
   return splitBinary(ICI, ICmpSplitter(ICI));
 }
 
-bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
+bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
   return splitBinary(FCI, FCmpSplitter(FCI));
 }
 
-bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
+bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
   return splitBinary(BO, BinarySplitter(BO));
 }
 
-bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
   if (!VT)
     return false;
@@ -587,7 +604,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   return true;
 }
 
-bool Scalarizer::visitCastInst(CastInst &CI) {
+bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
   VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
   if (!VT)
     return false;
@@ -605,7 +622,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
   return true;
 }
 
-bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
+bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
   VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
   VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
   if (!DstVT || !SrcVT)
@@ -660,7 +677,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
   return true;
 }
 
-bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   VectorType *VT = dyn_cast<VectorType>(SVI.getType());
   if (!VT)
     return false;
@@ -684,7 +701,7 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   return true;
 }
 
-bool Scalarizer::visitPHINode(PHINode &PHI) {
+bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
   VectorType *VT = dyn_cast<VectorType>(PHI.getType());
   if (!VT)
     return false;
@@ -709,7 +726,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
   return true;
 }
 
-bool Scalarizer::visitLoadInst(LoadInst &LI) {
+bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
   if (!ScalarizeLoadStore)
     return false;
   if (!LI.isSimple())
@@ -733,7 +750,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
   return true;
 }
 
-bool Scalarizer::visitStoreInst(StoreInst &SI) {
+bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
   if (!ScalarizeLoadStore)
     return false;
   if (!SI.isSimple())
@@ -760,13 +777,13 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
   return true;
 }
 
-bool Scalarizer::visitCallInst(CallInst &CI) {
+bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
   return splitCall(CI);
 }
 
 // Delete the instructions that we scalarized.  If a full vector result
 // is still needed, recreate it using InsertElements.
-bool Scalarizer::finish() {
+bool ScalarizerVisitor::finish() {
   // The presence of data in Gathered or Scattered indicates changes
   // made to the Function.
   if (Gathered.empty() && Scattered.empty())
@@ -797,6 +814,11 @@ bool Scalarizer::finish() {
   return true;
 }
 
-FunctionPass *llvm::createScalarizerPass() {
-  return new Scalarizer();
+PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
+  Module &M = *F.getParent();
+  unsigned ParallelLoopAccessMDKind =
+      M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
+  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+  bool Changed = Impl.visit(F);
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5834b619046b..5a67178cef37 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -19,11 +19,14 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -59,7 +62,11 @@ using namespace llvm;
 
 STATISTIC(NumBranches, "Number of branches unswitched");
 STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
+STATISTIC(
+    NumCostMultiplierSkipped,
+    "Number of unswitch candidates that had their cost multiplier skipped");
 
 static cl::opt<bool> EnableNonTrivialUnswitch(
     "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
@@ -70,6 +77,22 @@ static cl::opt<int>
     UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
                       cl::desc("The cost threshold for unswitching a loop."));
 
+static cl::opt<bool> EnableUnswitchCostMultiplier(
+    "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
+    cl::desc("Enable unswitch cost multiplier that prohibits exponential "
+             "explosion in nontrivial unswitch."));
+static cl::opt<int> UnswitchSiblingsToplevelDiv(
+    "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
+    cl::desc("Toplevel siblings divisor for cost multiplier."));
+static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
+    "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
+    cl::desc("Number of unswitch candidates that are ignored when calculating "
+             "cost multiplier."));
+static cl::opt<bool> UnswitchGuards(
+    "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
+    cl::desc("If enabled, simple loop unswitching will also consider "
+             "llvm.experimental.guard intrinsics as unswitch candidates."));
+
 /// Collect all of the loop invariant input values transitively used by the
 /// homogeneous instruction graph from a given root.
 ///
@@ -302,10 +325,11 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
     formLCSSA(*OldContainingL, DT, &LI, nullptr);
 
     // We shouldn't need to form dedicated exits because the exit introduced
-    // here is the (just split by unswitching) preheader. As such, it is
-    // necessarily dedicated.
-    assert(OldContainingL->hasDedicatedExits() &&
-           "Unexpected predecessor of hoisted loop preheader!");
+    // here is the (just split by unswitching) preheader. However, after trivial
+    // unswitching it is possible to get new non-dedicated exits out of parent
+    // loop so let's conservatively form dedicated exit blocks and figure out
+    // if we can optimize later.
+    formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
   }
 }
 
@@ -327,7 +351,8 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
 /// invalidated by this.
 static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
-                                  LoopInfo &LI, ScalarEvolution *SE) {
+                                  LoopInfo &LI, ScalarEvolution *SE,
+                                  MemorySSAUpdater *MSSAU) {
   assert(BI.isConditional() && "Can only unswitch a conditional branch!");
   LLVM_DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n");
 
@@ -401,11 +426,14 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
       SE->forgetTopmostLoop(&L);
   }
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // Split the preheader, so that we know that there is a safe place to insert
   // the conditional branch. We will change the preheader to have a conditional
   // branch on LoopCond.
   BasicBlock *OldPH = L.getLoopPreheader();
-  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
 
   // Now that we have a place to insert the conditional branch, create a place
   // to branch to: this is the exit block out of the loop that we are
@@ -417,9 +445,13 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
            "A branch's parent isn't a predecessor!");
     UnswitchedBB = LoopExitBB;
   } else {
-    UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
+    UnswitchedBB =
+        SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
   }
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // Actually move the invariant uses into the unswitched position. If possible,
   // we do this by moving the instructions, but when doing partial unswitching
   // we do it by building a new merge of the values in the unswitched position.
@@ -430,12 +462,17 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
     // its successors.
     OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
                                 BI);
+    if (MSSAU) {
+      // Temporarily clone the terminator, to make MSSA update cheaper by
+      // separating "insert edge" updates from "remove edge" ones.
+      ParentBB->getInstList().push_back(BI.clone());
+    } else {
+      // Create a new unconditional branch that will continue the loop as a new
+      // terminator.
+      BranchInst::Create(ContinueBB, ParentBB);
+    }
     BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
     BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
-
-    // Create a new unconditional branch that will continue the loop as a new
-    // terminator.
-    BranchInst::Create(ContinueBB, ParentBB);
   } else {
     // Only unswitching a subset of inputs to the condition, so we will need to
     // build a new branch that merges the invariant inputs.
@@ -451,6 +488,32 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
                                           *UnswitchedBB, *NewPH);
   }
 
+  // Update the dominator tree with the added edge.
+  DT.insertEdge(OldPH, UnswitchedBB);
+
+  // After the dominator tree was updated with the added edge, update MemorySSA
+  // if available.
+  if (MSSAU) {
+    SmallVector<CFGUpdate, 1> Updates;
+    Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
+    MSSAU->applyInsertUpdates(Updates, DT);
+  }
+
+  // Finish updating dominator tree and memory ssa for full unswitch.
+  if (FullUnswitch) {
+    if (MSSAU) {
+      // Remove the cloned branch instruction.
+      ParentBB->getTerminator()->eraseFromParent();
+      // Create unconditional branch now.
+      BranchInst::Create(ContinueBB, ParentBB);
+      MSSAU->removeEdge(ParentBB, LoopExitBB);
+    }
+    DT.deleteEdge(ParentBB, LoopExitBB);
+  }
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // Rewrite the relevant PHI nodes.
   if (UnswitchedBB == LoopExitBB)
     rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
@@ -458,13 +521,6 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
     rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
                                               *ParentBB, *OldPH, FullUnswitch);
 
-  // Now we need to update the dominator tree.
-  SmallVector<DominatorTree::UpdateType, 2> DTUpdates;
-  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
-  if (FullUnswitch)
-    DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB});
-  DT.applyUpdates(DTUpdates);
-
   // The constant we can replace all of our invariants with inside the loop
   // body. If any of the invariants have a value other than this the loop won't
   // be entered.
@@ -482,6 +538,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
   if (FullUnswitch)
     hoistLoopToNewParent(L, *NewPH, DT, LI);
 
+  LLVM_DEBUG(dbgs() << "    done: unswitching trivial branch...\n");
   ++NumTrivial;
   ++NumBranches;
   return true;
@@ -514,7 +571,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
 /// invalidated by this.
 static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
-                                  LoopInfo &LI, ScalarEvolution *SE) {
+                                  LoopInfo &LI, ScalarEvolution *SE,
+                                  MemorySSAUpdater *MSSAU) {
   LLVM_DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n");
   Value *LoopCond = SI.getCondition();
 
@@ -539,7 +597,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
   else if (ExitCaseIndices.empty())
     return false;
 
-  LLVM_DEBUG(dbgs() << "    unswitching trivial cases...\n");
+  LLVM_DEBUG(dbgs() << "    unswitching trivial switch...\n");
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   // We may need to invalidate SCEVs for the outermost loop reached by any of
   // the exits.
@@ -603,7 +664,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
   // Split the preheader, so that we know that there is a safe place to insert
   // the switch.
   BasicBlock *OldPH = L.getLoopPreheader();
-  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
   OldPH->getTerminator()->eraseFromParent();
 
   // Now add the unswitched switch.
@@ -626,9 +687,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
       rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
     } else {
       auto *SplitBB =
-          SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
-      rewritePHINodesForExitAndUnswitchedBlocks(
-          *DefaultExitBB, *SplitBB, *ParentBB, *OldPH, /*FullUnswitch*/ true);
+          SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
+      rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
+                                                *ParentBB, *OldPH,
+                                                /*FullUnswitch*/ true);
       DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
     }
   }
@@ -652,9 +714,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
     BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
     if (!SplitExitBB) {
       // If this is the first time we see this, do the split and remember it.
-      SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
-      rewritePHINodesForExitAndUnswitchedBlocks(
-          *ExitBB, *SplitExitBB, *ParentBB, *OldPH, /*FullUnswitch*/ true);
+      SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
+      rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
+                                                *ParentBB, *OldPH,
+                                                /*FullUnswitch*/ true);
     }
     // Update the case pair to point to the split block.
     CasePair.second = SplitExitBB;
@@ -731,6 +794,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
     DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
   }
   DT.applyUpdates(DTUpdates);
+
+  if (MSSAU) {
+    MSSAU->applyUpdates(DTUpdates, DT);
+    if (VerifyMemorySSA)
+      MSSAU->getMemorySSA()->verifyMemorySSA();
+  }
+
   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
 
   // We may have changed the nesting relationship for this loop so hoist it to
@@ -739,6 +809,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
 
   ++NumTrivial;
   ++NumSwitches;
+  LLVM_DEBUG(dbgs() << "    done: unswitching trivial switch...\n");
   return true;
 }
 
@@ -755,7 +826,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
 /// If `SE` is not null, it will be updated based on the potential loop SCEVs
 /// invalidated by this.
 static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
-                                         LoopInfo &LI, ScalarEvolution *SE) {
+                                         LoopInfo &LI, ScalarEvolution *SE,
+                                         MemorySSAUpdater *MSSAU) {
   bool Changed = false;
 
   // If loop header has only one reachable successor we should keep looking for
@@ -780,7 +852,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
                      [](Instruction &I) { return I.mayHaveSideEffects(); }))
       return Changed;
 
-    TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+    Instruction *CurrentTerm = CurrentBB->getTerminator();
 
     if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
       // Don't bother trying to unswitch past a switch with a constant
@@ -789,7 +861,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
       if (isa<Constant>(SI->getCondition()))
         return Changed;
 
-      if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE))
+      if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
         // Couldn't unswitch this one so we're done.
         return Changed;
 
@@ -821,7 +893,7 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
 
     // Found a trivial condition candidate: non-foldable conditional branch. If
     // we fail to unswitch this, we can't do anything else that is trivial.
-    if (!unswitchTrivialBranch(L, *BI, DT, LI, SE))
+    if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
       return Changed;
 
     // Mark that we managed to unswitch something.
@@ -874,7 +946,7 @@ static BasicBlock *buildClonedLoopBlocks(
     const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
     ValueToValueMapTy &VMap,
     SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
-    DominatorTree &DT, LoopInfo &LI) {
+    DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
   SmallVector<BasicBlock *, 4> NewBlocks;
   NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
 
@@ -919,7 +991,7 @@ static BasicBlock *buildClonedLoopBlocks(
     // place to merge the CFG, so split the exit first. This is always safe to
     // do because there cannot be any non-loop predecessors of a loop exit in
     // loop simplified form.
-    auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+    auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
 
     // Rearrange the names to make it easier to write test cases by having the
     // exit block carry the suffix rather than the merge block carrying the
@@ -1262,11 +1334,10 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
   // matter as we're just trying to build up the map from inside-out; we use
   // the map in a more stably ordered way below.
   auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
-  llvm::sort(OrderedClonedExitsInLoops.begin(), OrderedClonedExitsInLoops.end(),
-             [&](BasicBlock *LHS, BasicBlock *RHS) {
-               return ExitLoopMap.lookup(LHS)->getLoopDepth() <
-                      ExitLoopMap.lookup(RHS)->getLoopDepth();
-             });
+  llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
+    return ExitLoopMap.lookup(LHS)->getLoopDepth() <
+           ExitLoopMap.lookup(RHS)->getLoopDepth();
+  });
 
   // Populate the existing ExitLoopMap with everything reachable from each
   // exit, starting from the inner most exit.
@@ -1351,7 +1422,7 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
 static void
 deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
                        ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
-                       DominatorTree &DT) {
+                       DominatorTree &DT, MemorySSAUpdater *MSSAU) {
   // Find all the dead clones, and remove them from their successors.
   SmallVector<BasicBlock *, 16> DeadBlocks;
   for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
@@ -1363,6 +1434,13 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
           DeadBlocks.push_back(ClonedBB);
         }
 
+  // Remove all MemorySSA in the dead blocks
+  if (MSSAU) {
+    SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
+                                               DeadBlocks.end());
+    MSSAU->removeBlocks(DeadBlockSet);
+  }
+
   // Drop any remaining references to break cycles.
   for (BasicBlock *BB : DeadBlocks)
     BB->dropAllReferences();
@@ -1371,21 +1449,33 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
     BB->eraseFromParent();
 }
 
-static void
-deleteDeadBlocksFromLoop(Loop &L,
-                         SmallVectorImpl<BasicBlock *> &ExitBlocks,
-                         DominatorTree &DT, LoopInfo &LI) {
-  // Find all the dead blocks, and remove them from their successors.
-  SmallVector<BasicBlock *, 16> DeadBlocks;
-  for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
-    if (!DT.isReachableFromEntry(BB)) {
-      for (BasicBlock *SuccBB : successors(BB))
+static void deleteDeadBlocksFromLoop(Loop &L,
+                                     SmallVectorImpl<BasicBlock *> &ExitBlocks,
+                                     DominatorTree &DT, LoopInfo &LI,
+                                     MemorySSAUpdater *MSSAU) {
+  // Find all the dead blocks tied to this loop, and remove them from their
+  // successors.
+  SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+
+  // Start with loop/exit blocks and get a transitive closure of reachable dead
+  // blocks.
+  SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
+                                                ExitBlocks.end());
+  DeathCandidates.append(L.blocks().begin(), L.blocks().end());
+  while (!DeathCandidates.empty()) {
+    auto *BB = DeathCandidates.pop_back_val();
+    if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
+      for (BasicBlock *SuccBB : successors(BB)) {
         SuccBB->removePredecessor(BB);
-      DeadBlocks.push_back(BB);
+        DeathCandidates.push_back(SuccBB);
+      }
+      DeadBlockSet.insert(BB);
     }
+  }
 
-  SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
-                                             DeadBlocks.end());
+  // Remove all MemorySSA in the dead blocks
+  if (MSSAU)
+    MSSAU->removeBlocks(DeadBlockSet);
 
   // Filter out the dead blocks from the exit blocks list so that it can be
   // used in the caller.
@@ -1394,7 +1484,7 @@ deleteDeadBlocksFromLoop(Loop &L,
 
   // Walk from this loop up through its parents removing all of the dead blocks.
   for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
-    for (auto *BB : DeadBlocks)
+    for (auto *BB : DeadBlockSet)
       ParentL->getBlocksSet().erase(BB);
     llvm::erase_if(ParentL->getBlocksVector(),
                    [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
@@ -1419,7 +1509,7 @@ deleteDeadBlocksFromLoop(Loop &L,
   // Remove the loop mappings for the dead blocks and drop all the references
   // from these blocks to others to handle cyclic references as we start
   // deleting the blocks themselves.
-  for (auto *BB : DeadBlocks) {
+  for (auto *BB : DeadBlockSet) {
     // Check that the dominator tree has already been updated.
     assert(!DT.getNode(BB) && "Should already have cleared domtree!");
     LI.changeLoopFor(BB, nullptr);
@@ -1428,7 +1518,7 @@ deleteDeadBlocksFromLoop(Loop &L,
 
   // Actually delete the blocks now that they've been fully unhooked from the
   // IR.
-  for (auto *BB : DeadBlocks)
+  for (auto *BB : DeadBlockSet)
     BB->eraseFromParent();
 }
 
@@ -1782,11 +1872,11 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
   } while (!DomWorklist.empty());
 }
 
-static bool unswitchNontrivialInvariants(
-    Loop &L, TerminatorInst &TI, ArrayRef<Value *> Invariants,
-    DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
-    function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
-    ScalarEvolution *SE) {
+static void unswitchNontrivialInvariants(
+    Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
+    SmallVectorImpl<BasicBlock *> &ExitBlocks, DominatorTree &DT, LoopInfo &LI,
+    AssumptionCache &AC, function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
+    ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
   auto *ParentBB = TI.getParent();
   BranchInst *BI = dyn_cast<BranchInst>(&TI);
   SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -1803,6 +1893,9 @@ static bool unswitchNontrivialInvariants(
     assert(isa<Instruction>(BI->getCondition()) &&
            "Partial unswitching requires an instruction as the condition!");
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // Constant and BBs tracking the cloned and continuing successor. When we are
   // unswitching the entire condition, this can just be trivially chosen to
   // unswitch towards `true`. However, when we are unswitching a set of
@@ -1841,19 +1934,12 @@ static bool unswitchNontrivialInvariants(
   // whatever reason).
   assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
 
-  SmallVector<BasicBlock *, 4> ExitBlocks;
-  L.getUniqueExitBlocks(ExitBlocks);
-
-  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
-  // don't know how to split those exit blocks.
-  // FIXME: We should teach SplitBlock to handle this and remove this
-  // restriction.
-  for (auto *ExitBB : ExitBlocks)
-    if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
-      return false;
-
   // Compute the parent loop now before we start hacking on things.
   Loop *ParentL = L.getParentLoop();
+  // Get blocks in RPO order for MSSA update, before changing the CFG.
+  LoopBlocksRPO LBRPO(&L);
+  if (MSSAU)
+    LBRPO.perform(&LI);
 
   // Compute the outer-most loop containing one of our exit blocks. This is the
   // furthest up our loopnest which can be mutated, which we will use below to
@@ -1903,7 +1989,7 @@ static bool unswitchNontrivialInvariants(
   // between the unswitched versions, and we will have a new preheader for the
   // original loop.
   BasicBlock *SplitBB = L.getLoopPreheader();
-  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
+  BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
 
   // Keep track of the dominator tree updates needed.
   SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
@@ -1916,7 +2002,7 @@ static bool unswitchNontrivialInvariants(
     VMaps.emplace_back(new ValueToValueMapTy());
     ClonedPHs[SuccBB] = buildClonedLoopBlocks(
         L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
-        DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI);
+        DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
   }
 
   // The stitching of the branched code back together depends on whether we're
@@ -1924,7 +2010,63 @@ static bool unswitchNontrivialInvariants(
   // nuke the initial terminator placed in the split block.
   SplitBB->getTerminator()->eraseFromParent();
   if (FullUnswitch) {
-    // First we need to unhook the successor relationship as we'll be replacing
+    // Splice the terminator from the original loop and rewrite its
+    // successors.
+    SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
+
+    // Keep a clone of the terminator for MSSA updates.
+    Instruction *NewTI = TI.clone();
+    ParentBB->getInstList().push_back(NewTI);
+
+    // First wire up the moved terminator to the preheaders.
+    if (BI) {
+      BasicBlock *ClonedPH = ClonedPHs.begin()->second;
+      BI->setSuccessor(ClonedSucc, ClonedPH);
+      BI->setSuccessor(1 - ClonedSucc, LoopPH);
+      DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
+    } else {
+      assert(SI && "Must either be a branch or switch!");
+
+      // Walk the cases and directly update their successors.
+      assert(SI->getDefaultDest() == RetainedSuccBB &&
+             "Not retaining default successor!");
+      SI->setDefaultDest(LoopPH);
+      for (auto &Case : SI->cases())
+        if (Case.getCaseSuccessor() == RetainedSuccBB)
+          Case.setSuccessor(LoopPH);
+        else
+          Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+
+      // We need to use the set to populate domtree updates as even when there
+      // are multiple cases pointing at the same successor we only want to
+      // remove and insert one edge in the domtree.
+      for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+        DTUpdates.push_back(
+            {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
+    }
+
+    if (MSSAU) {
+      DT.applyUpdates(DTUpdates);
+      DTUpdates.clear();
+
+      // Remove all but one edge to the retained block and all unswitched
+      // blocks. This is to avoid having duplicate entries in the cloned Phis,
+      // when we know we only keep a single edge for each case.
+      MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
+      for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+        MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
+
+      for (auto &VMap : VMaps)
+        MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
+                                   /*IgnoreIncomingWithNoClones=*/true);
+      MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
+
+      // Remove all edges to unswitched blocks.
+      for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+        MSSAU->removeEdge(ParentBB, SuccBB);
+    }
+
+    // Now unhook the successor relationship as we'll be replacing
     // the terminator with a direct branch. This is much simpler for branches
     // than switches so we handle those first.
     if (BI) {
@@ -1942,9 +2084,10 @@ static bool unswitchNontrivialInvariants(
       // is a duplicate edge to the retained successor as the retained successor
       // is always the default successor and as we'll replace this with a direct
       // branch we no longer need the duplicate entries in the PHI nodes.
-      assert(SI->getDefaultDest() == RetainedSuccBB &&
+      SwitchInst *NewSI = cast<SwitchInst>(NewTI);
+      assert(NewSI->getDefaultDest() == RetainedSuccBB &&
              "Not retaining default successor!");
-      for (auto &Case : SI->cases())
+      for (auto &Case : NewSI->cases())
         Case.getCaseSuccessor()->removePredecessor(
             ParentBB,
             /*DontDeleteUselessPHIs*/ true);
@@ -1956,34 +2099,8 @@ static bool unswitchNontrivialInvariants(
         DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
     }
 
-    // Now that we've unhooked the successor relationship, splice the terminator
-    // from the original loop to the split.
-    SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
-
-    // Now wire up the terminator to the preheaders.
-    if (BI) {
-      BasicBlock *ClonedPH = ClonedPHs.begin()->second;
-      BI->setSuccessor(ClonedSucc, ClonedPH);
-      BI->setSuccessor(1 - ClonedSucc, LoopPH);
-      DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
-    } else {
-      assert(SI && "Must either be a branch or switch!");
-
-      // Walk the cases and directly update their successors.
-      SI->setDefaultDest(LoopPH);
-      for (auto &Case : SI->cases())
-        if (Case.getCaseSuccessor() == RetainedSuccBB)
-          Case.setSuccessor(LoopPH);
-        else
-          Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
-
-      // We need to use the set to populate domtree updates as even when there
-      // are multiple cases pointing at the same successor we only want to
-      // remove and insert one edge in the domtree.
-      for (BasicBlock *SuccBB : UnswitchedSuccBBs)
-        DTUpdates.push_back(
-            {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
-    }
+    // After MSSAU update, remove the cloned terminator instruction NewTI.
+    ParentBB->getTerminator()->eraseFromParent();
 
     // Create a new unconditional branch to the continuing block (as opposed to
     // the one cloned).
@@ -2002,12 +2119,19 @@ static bool unswitchNontrivialInvariants(
 
   // Apply the updates accumulated above to get an up-to-date dominator tree.
   DT.applyUpdates(DTUpdates);
+  if (!FullUnswitch && MSSAU) {
+    // Update MSSA for partial unswitch, after DT update.
+    SmallVector<CFGUpdate, 1> Updates;
+    Updates.push_back(
+        {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second});
+    MSSAU->applyInsertUpdates(Updates, DT);
+  }
 
   // Now that we have an accurate dominator tree, first delete the dead cloned
   // blocks so that we can accurately build any cloned loops. It is important to
   // not delete the blocks from the original loop yet because we still want to
   // reference the original loop to understand the cloned loop's structure.
-  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT);
+  deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
 
   // Build the cloned loop structure itself. This may be substantially
   // different from the original structure due to the simplified CFG. This also
@@ -2019,10 +2143,17 @@ static bool unswitchNontrivialInvariants(
   // Now that our cloned loops have been built, we can update the original loop.
   // First we delete the dead blocks from it and then we rebuild the loop
   // structure taking these deletions into account.
-  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI);
+  deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   SmallVector<Loop *, 4> HoistedLoops;
   bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // This transformation has a high risk of corrupting the dominator tree, and
   // the below steps to rebuild loop structures will result in hard to debug
   // errors in that case so verify that the dominator tree is sane first.
@@ -2038,6 +2169,18 @@ static bool unswitchNontrivialInvariants(
     assert(UnswitchedSuccBBs.size() == 1 &&
            "Only one possible unswitched block for a branch!");
     BasicBlock *ClonedPH = ClonedPHs.begin()->second;
+
+    // When considering multiple partially-unswitched invariants
+    // we cant just go replace them with constants in both branches.
+    //
+    // For 'AND' we infer that true branch ("continue") means true
+    // for each invariant operand.
+    // For 'OR' we can infer that false branch ("continue") means false
+    // for each invariant operand.
+    // So it happens that for multiple-partial case we dont replace
+    // in the unswitched branch.
+    bool ReplaceUnswitched = FullUnswitch || (Invariants.size() == 1);
+
     ConstantInt *UnswitchedReplacement =
         Direction ? ConstantInt::getTrue(BI->getContext())
                   : ConstantInt::getFalse(BI->getContext());
@@ -2057,7 +2200,8 @@ static bool unswitchNontrivialInvariants(
         // unswitched if in the cloned blocks.
         if (DT.dominates(LoopPH, UserI->getParent()))
           U->set(ContinueReplacement);
-        else if (DT.dominates(ClonedPH, UserI->getParent()))
+        else if (ReplaceUnswitched &&
+                 DT.dominates(ClonedPH, UserI->getParent()))
           U->set(UnswitchedReplacement);
       }
   }
@@ -2134,8 +2278,13 @@ static bool unswitchNontrivialInvariants(
       SibLoops.push_back(UpdatedL);
   UnswitchCB(IsStillLoop, SibLoops);
 
-  ++NumBranches;
-  return true;
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+  if (BI)
+    ++NumBranches;
+  else
+    ++NumSwitches;
 }
 
 /// Recursively compute the cost of a dominator subtree based on the per-block
@@ -2171,19 +2320,208 @@ computeDomSubtreeCost(DomTreeNode &N,
   return Cost;
 }
 
+/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
+/// making the following replacement:
+///
+///   --code before guard--
+///   call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+///   --code after guard--
+///
+/// into
+///
+///   --code before guard--
+///   br i1 %cond, label %guarded, label %deopt
+///
+/// guarded:
+///   --code after guard--
+///
+/// deopt:
+///   call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+///   unreachable
+///
+/// It also makes all relevant DT and LI updates, so that all structures are in
+/// valid state after this transform.
+static BranchInst *
+turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
+                    SmallVectorImpl<BasicBlock *> &ExitBlocks,
+                    DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
+  SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
+  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
+  BasicBlock *CheckBB = GI->getParent();
+
+  if (MSSAU && VerifyMemorySSA)
+     MSSAU->getMemorySSA()->verifyMemorySSA();
+
+  // Remove all CheckBB's successors from DomTree. A block can be seen among
+  // successors more than once, but for DomTree it should be added only once.
+  SmallPtrSet<BasicBlock *, 4> Successors;
+  for (auto *Succ : successors(CheckBB))
+    if (Successors.insert(Succ).second)
+      DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
+
+  Instruction *DeoptBlockTerm =
+      SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
+  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+  // SplitBlockAndInsertIfThen inserts control flow that branches to
+  // DeoptBlockTerm if the condition is true.  We want the opposite.
+  CheckBI->swapSuccessors();
+
+  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
+  GuardedBlock->setName("guarded");
+  CheckBI->getSuccessor(1)->setName("deopt");
+  BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
+
+  // We now have a new exit block.
+  ExitBlocks.push_back(CheckBI->getSuccessor(1));
+
+  if (MSSAU)
+    MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
+
+  GI->moveBefore(DeoptBlockTerm);
+  GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
+
+  // Add new successors of CheckBB into DomTree.
+  for (auto *Succ : successors(CheckBB))
+    DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
+
+  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
+  // successors.
+  for (auto *Succ : Successors)
+    DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
+
+  // Make proper changes to DT.
+  DT.applyUpdates(DTUpdates);
+  // Inform LI of a new loop block.
+  L.addBasicBlockToLoop(GuardedBlock, LI);
+
+  if (MSSAU) {
+    MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
+    MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End);
+    if (VerifyMemorySSA)
+      MSSAU->getMemorySSA()->verifyMemorySSA();
+  }
+
+  ++NumGuards;
+  return CheckBI;
+}
+
+/// Cost multiplier is a way to limit potentially exponential behavior
+/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
+/// candidates available. Also accounting for the number of "sibling" loops with
+/// the idea to account for previous unswitches that already happened on this
+/// cluster of loops. There was an attempt to keep this formula simple,
+/// just enough to limit the worst case behavior. Even if it is not that simple
+/// now it is still not an attempt to provide a detailed heuristic size
+/// prediction.
+///
+/// TODO: Make a proper accounting of "explosion" effect for all kinds of
+/// unswitch candidates, making adequate predictions instead of wild guesses.
+/// That requires knowing not just the number of "remaining" candidates but
+/// also costs of unswitching for each of these candidates.
+static int calculateUnswitchCostMultiplier(
+    Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
+    ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
+        UnswitchCandidates) {
+
+  // Guards and other exiting conditions do not contribute to exponential
+  // explosion as soon as they dominate the latch (otherwise there might be
+  // another path to the latch remaining that does not allow to eliminate the
+  // loop copy on unswitch).
+  BasicBlock *Latch = L.getLoopLatch();
+  BasicBlock *CondBlock = TI.getParent();
+  if (DT.dominates(CondBlock, Latch) &&
+      (isGuard(&TI) ||
+       llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
+         return L.contains(SuccBB);
+       }) <= 1)) {
+    NumCostMultiplierSkipped++;
+    return 1;
+  }
+
+  auto *ParentL = L.getParentLoop();
+  int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
+                               : std::distance(LI.begin(), LI.end()));
+  // Count amount of clones that all the candidates might cause during
+  // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
+  int UnswitchedClones = 0;
+  for (auto Candidate : UnswitchCandidates) {
+    Instruction *CI = Candidate.first;
+    BasicBlock *CondBlock = CI->getParent();
+    bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
+    if (isGuard(CI)) {
+      if (!SkipExitingSuccessors)
+        UnswitchedClones++;
+      continue;
+    }
+    int NonExitingSuccessors = llvm::count_if(
+        successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
+          return !SkipExitingSuccessors || L.contains(SuccBB);
+        });
+    UnswitchedClones += Log2_32(NonExitingSuccessors);
+  }
+
+  // Ignore up to the "unscaled candidates" number of unswitch candidates
+  // when calculating the power-of-two scaling of the cost. The main idea
+  // with this control is to allow a small number of unswitches to happen
+  // and rely more on siblings multiplier (see below) when the number
+  // of candidates is small.
+  unsigned ClonesPower =
+      std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
+
+  // Allowing top-level loops to spread a bit more than nested ones.
+  int SiblingsMultiplier =
+      std::max((ParentL ? SiblingsCount
+                        : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
+               1);
+  // Compute the cost multiplier in a way that won't overflow by saturating
+  // at an upper bound.
+  int CostMultiplier;
+  if (ClonesPower > Log2_32(UnswitchThreshold) ||
+      SiblingsMultiplier > UnswitchThreshold)
+    CostMultiplier = UnswitchThreshold;
+  else
+    CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
+                              (int)UnswitchThreshold);
+
+  LLVM_DEBUG(dbgs() << "  Computed multiplier  " << CostMultiplier
+                    << " (siblings " << SiblingsMultiplier << " * clones "
+                    << (1 << ClonesPower) << ")"
+                    << " for unswitch candidate: " << TI << "\n");
+  return CostMultiplier;
+}
+
 static bool
 unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
                       AssumptionCache &AC, TargetTransformInfo &TTI,
                       function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
-                      ScalarEvolution *SE) {
+                      ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
   // Collect all invariant conditions within this loop (as opposed to an inner
   // loop which would be handled when visiting that inner loop).
-  SmallVector<std::pair<TerminatorInst *, TinyPtrVector<Value *>>, 4>
+  SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
       UnswitchCandidates;
+
+  // Whether or not we should also collect guards in the loop.
+  bool CollectGuards = false;
+  if (UnswitchGuards) {
+    auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
+        Intrinsic::getName(Intrinsic::experimental_guard));
+    if (GuardDecl && !GuardDecl->use_empty())
+      CollectGuards = true;
+  }
+
   for (auto *BB : L.blocks()) {
     if (LI.getLoopFor(BB) != &L)
       continue;
 
+    if (CollectGuards)
+      for (auto &I : *BB)
+        if (isGuard(&I)) {
+          auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
+          // TODO: Support AND, OR conditions and partial unswitching.
+          if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
+            UnswitchCandidates.push_back({&I, {Cond}});
+        }
+
     if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
       // We can only consider fully loop-invariant switch conditions as we need
       // to completely eliminate the switch after unswitching.
@@ -2231,6 +2569,19 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
     return false;
 
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L.getUniqueExitBlocks(ExitBlocks);
+
+  // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
+  // don't know how to split those exit blocks.
+  // FIXME: We should teach SplitBlock to handle this and remove this
+  // restriction.
+  for (auto *ExitBB : ExitBlocks)
+    if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI())) {
+      dbgs() << "Cannot unswitch because of cleanuppad in exit block\n";
+      return false;
+    }
+
   LLVM_DEBUG(
       dbgs() << "Considering " << UnswitchCandidates.size()
              << " non-trivial loop invariant conditions for unswitching.\n");
@@ -2288,7 +2639,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
   SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
   // Given a terminator which might be unswitched, computes the non-duplicated
   // cost for that terminator.
-  auto ComputeUnswitchedCost = [&](TerminatorInst &TI, bool FullUnswitch) {
+  auto ComputeUnswitchedCost = [&](Instruction &TI, bool FullUnswitch) {
     BasicBlock &BB = *TI.getParent();
     SmallPtrSet<BasicBlock *, 4> Visited;
 
@@ -2335,22 +2686,40 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
     // Now scale the cost by the number of unique successors minus one. We
     // subtract one because there is already at least one copy of the entire
     // loop. This is computing the new cost of unswitching a condition.
-    assert(Visited.size() > 1 &&
+    // Note that guards always have 2 unique successors that are implicit and
+    // will be materialized if we decide to unswitch it.
+    int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
+    assert(SuccessorsCount > 1 &&
            "Cannot unswitch a condition without multiple distinct successors!");
-    return Cost * (Visited.size() - 1);
+    return Cost * (SuccessorsCount - 1);
   };
-  TerminatorInst *BestUnswitchTI = nullptr;
+  Instruction *BestUnswitchTI = nullptr;
   int BestUnswitchCost;
   ArrayRef<Value *> BestUnswitchInvariants;
   for (auto &TerminatorAndInvariants : UnswitchCandidates) {
-    TerminatorInst &TI = *TerminatorAndInvariants.first;
+    Instruction &TI = *TerminatorAndInvariants.first;
     ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
     BranchInst *BI = dyn_cast<BranchInst>(&TI);
     int CandidateCost = ComputeUnswitchedCost(
         TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
                                      Invariants[0] == BI->getCondition()));
-    LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCost
-                      << " for unswitch candidate: " << TI << "\n");
+    // Calculate cost multiplier which is a tool to limit potentially
+    // exponential behavior of loop-unswitch.
+    if (EnableUnswitchCostMultiplier) {
+      int CostMultiplier =
+          calculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
+      assert(
+          (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
+          "cost multiplier needs to be in the range of 1..UnswitchThreshold");
+      CandidateCost *= CostMultiplier;
+      LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCost
+                        << " (multiplier: " << CostMultiplier << ")"
+                        << " for unswitch candidate: " << TI << "\n");
+    } else {
+      LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCost
+                        << " for unswitch candidate: " << TI << "\n");
+    }
+
     if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
       BestUnswitchTI = &TI;
       BestUnswitchCost = CandidateCost;
@@ -2364,11 +2733,17 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
     return false;
   }
 
-  LLVM_DEBUG(dbgs() << "  Trying to unswitch non-trivial (cost = "
+  // If the best candidate is a guard, turn it into a branch.
+  if (isGuard(BestUnswitchTI))
+    BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
+                                         ExitBlocks, DT, LI, MSSAU);
+
+  LLVM_DEBUG(dbgs() << "  Unswitching non-trivial (cost = "
                     << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
                     << "\n");
-  return unswitchNontrivialInvariants(
-      L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB, SE);
+  unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
+                               ExitBlocks, DT, LI, AC, UnswitchCB, SE, MSSAU);
+  return true;
 }
 
 /// Unswitch control flow predicated on loop invariant conditions.
@@ -2380,6 +2755,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
 ///
 /// The `DT`, `LI`, `AC`, `TTI` parameters are required analyses that are also
 /// updated based on the unswitch.
+/// The `MSSA` analysis is also updated if valid (i.e. its use is enabled).
 ///
 /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
 /// true, we will attempt to do non-trivial unswitching as well as trivial
@@ -2395,7 +2771,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
                          AssumptionCache &AC, TargetTransformInfo &TTI,
                          bool NonTrivial,
                          function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB,
-                         ScalarEvolution *SE) {
+                         ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
   assert(L.isRecursivelyLCSSAForm(DT, LI) &&
          "Loops must be in LCSSA form before unswitching.");
   bool Changed = false;
@@ -2405,7 +2781,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
     return false;
 
   // Try trivial unswitch first before loop over other basic blocks in the loop.
-  if (unswitchAllTrivialConditions(L, DT, LI, SE)) {
+  if (unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
     // If we unswitched successfully we will want to clean up the loop before
     // processing it further so just mark it as unswitched and return.
     UnswitchCB(/*CurrentLoopValid*/ true, {});
@@ -2426,7 +2802,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
 
   // Try to unswitch the best invariant condition. We prefer this full unswitch to
   // a partial unswitch when possible below the threshold.
-  if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE))
+  if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU))
     return true;
 
   // No other opportunities to unswitch.
@@ -2460,10 +2836,19 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
       U.markLoopAsDeleted(L, LoopName);
   };
 
+  Optional<MemorySSAUpdater> MSSAU;
+  if (AR.MSSA) {
+    MSSAU = MemorySSAUpdater(AR.MSSA);
+    if (VerifyMemorySSA)
+      AR.MSSA->verifyMemorySSA();
+  }
   if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB,
-                    &AR.SE))
+                    &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
     return PreservedAnalyses::all();
 
+  if (AR.MSSA && VerifyMemorySSA)
+    AR.MSSA->verifyMemorySSA();
+
   // Historically this pass has had issues with the dominator tree so verify it
   // in asserts builds.
   assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
@@ -2489,6 +2874,10 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    if (EnableMSSALoopDependency) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     getLoopAnalysisUsage(AU);
   }
 };
@@ -2508,6 +2897,12 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  MemorySSA *MSSA = nullptr;
+  Optional<MemorySSAUpdater> MSSAU;
+  if (EnableMSSALoopDependency) {
+    MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    MSSAU = MemorySSAUpdater(MSSA);
+  }
 
   auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
   auto *SE = SEWP ? &SEWP->getSE() : nullptr;
@@ -2527,7 +2922,14 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
       LPM.markLoopAsDeleted(*L);
   };
 
-  bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE);
+  if (MSSA && VerifyMemorySSA)
+    MSSA->verifyMemorySSA();
+
+  bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE,
+                              MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+
+  if (MSSA && VerifyMemorySSA)
+    MSSA->verifyMemorySSA();
 
   // If anything was unswitched, also clear any cached information about this
   // loop.
@@ -2547,6 +2949,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
                     "Simple unswitch loops", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index ca6b93e0b4a9..c99da8f0737a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -72,18 +72,18 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
         return false;
   }
 
-  if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst) || Inst->isEHPad() ||
+  if (Inst->isTerminator() || isa<PHINode>(Inst) || Inst->isEHPad() ||
       Inst->mayThrow())
     return false;
 
-  if (auto CS = CallSite(Inst)) {
+  if (auto *Call = dyn_cast<CallBase>(Inst)) {
     // Convergent operations cannot be made control-dependent on additional
     // values.
-    if (CS.hasFnAttr(Attribute::Convergent))
+    if (Call->hasFnAttr(Attribute::Convergent))
       return false;
 
     for (Instruction *S : Stores)
-      if (isModSet(AA.getModRefInfo(S, CS)))
+      if (isModSet(AA.getModRefInfo(S, Call)))
         return false;
   }
 
@@ -104,7 +104,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
 
   // It's never legal to sink an instruction into a block which terminates in an
   // EH-pad.
-  if (SuccToSinkTo->getTerminator()->isExceptional())
+  if (SuccToSinkTo->getTerminator()->isExceptionalTerminator())
     return false;
 
   // If the block has multiple predecessors, this would introduce computation
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index 6743e19a7c92..c0f75ddddbe0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumSpeculatedInstructions,
 STATISTIC(NumNewRedundantInstructions,
           "Number of new, redundant instructions inserted");
 
-/// Check wether speculating the users of a PHI node around the PHI
+/// Check whether speculating the users of a PHI node around the PHI
 /// will be safe.
 ///
 /// This checks both that all of the users are safe and also that all of their
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 2061db13639a..b5089b006bdd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -640,12 +640,12 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
   Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
   switch (C.CandidateKind) {
   case Candidate::Add:
-  case Candidate::Mul:
+  case Candidate::Mul: {
     // C = Basis + Bump
-    if (BinaryOperator::isNeg(Bump)) {
+    Value *NegBump;
+    if (match(Bump, m_Neg(m_Value(NegBump)))) {
       // If Bump is a neg instruction, emit C = Basis - (-Bump).
-      Reduced =
-          Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+      Reduced = Builder.CreateSub(Basis.Ins, NegBump);
       // We only use the negative argument of Bump, and Bump itself may be
       // trivially dead.
       RecursivelyDeleteTriviallyDeadInstructions(Bump);
@@ -662,6 +662,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
       Reduced = Builder.CreateAdd(Basis.Ins, Bump);
     }
     break;
+  }
   case Candidate::GEP:
     {
       Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d650264176aa..0db762d846f2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -13,7 +13,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
@@ -183,7 +184,7 @@ class StructurizeCFG : public RegionPass {
   Function *Func;
   Region *ParentRegion;
 
-  DivergenceAnalysis *DA;
+  LegacyDivergenceAnalysis *DA;
   DominatorTree *DT;
   LoopInfo *LI;
 
@@ -269,7 +270,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     if (SkipUniformRegions)
-      AU.addRequired<DivergenceAnalysis>();
+      AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
@@ -285,7 +286,7 @@ char StructurizeCFG::ID = 0;
 
 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
                       false, false)
-INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
@@ -596,7 +597,8 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
 
 /// Add the real PHI value as soon as everything is set up
 void StructurizeCFG::setPhiValues() {
-  SSAUpdater Updater;
+  SmallVector<PHINode *, 8> InsertedPhis;
+  SSAUpdater Updater(&InsertedPhis);
   for (const auto &AddedPhi : AddedPhis) {
     BasicBlock *To = AddedPhi.first;
     const BBVector &From = AddedPhi.second;
@@ -632,11 +634,31 @@ void StructurizeCFG::setPhiValues() {
     DeletedPhis.erase(To);
   }
   assert(DeletedPhis.empty());
+
+  // Simplify any phis inserted by the SSAUpdater if possible
+  bool Changed;
+  do {
+    Changed = false;
+
+    SimplifyQuery Q(Func->getParent()->getDataLayout());
+    Q.DT = DT;
+    for (size_t i = 0; i < InsertedPhis.size(); ++i) {
+      PHINode *Phi = InsertedPhis[i];
+      if (Value *V = SimplifyInstruction(Phi, Q)) {
+        Phi->replaceAllUsesWith(V);
+        Phi->eraseFromParent();
+        InsertedPhis[i] = InsertedPhis.back();
+        InsertedPhis.pop_back();
+        i--;
+        Changed = true;
+      }
+    }
+  } while (Changed);
 }
 
 /// Remove phi values from all successors and then remove the terminator.
 void StructurizeCFG::killTerminator(BasicBlock *BB) {
-  TerminatorInst *Term = BB->getTerminator();
+  Instruction *Term = BB->getTerminator();
   if (!Term)
     return;
 
@@ -914,7 +936,7 @@ void StructurizeCFG::rebuildSSA() {
 }
 
 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
-                                   const DivergenceAnalysis &DA) {
+                                   const LegacyDivergenceAnalysis &DA) {
   for (auto E : R->elements()) {
     if (!E->isSubRegion()) {
       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -962,7 +984,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
     // but we shouldn't rely on metadata for correctness!
     unsigned UniformMDKindID =
         R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
-    DA = &getAnalysis<DivergenceAnalysis>();
+    DA = &getAnalysis<LegacyDivergenceAnalysis>();
 
     if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
       LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f8cd6c17a5a6..0f6db21f73b6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -61,6 +61,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
@@ -68,6 +69,8 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
@@ -124,6 +127,12 @@ struct AllocaDerivedValueTracker {
       case Instruction::Call:
       case Instruction::Invoke: {
         CallSite CS(I);
+        // If the alloca-derived argument is passed byval it is not an escape
+        // point, or a use of an alloca. Calling with byval copies the contents
+        // of the alloca into argument registers or stack slots, which exist
+        // beyond the lifetime of the current frame.
+        if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U)))
+          continue;
         bool IsNocapture =
             CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
         callUsesLocalStack(CS, IsNocapture);
@@ -488,12 +497,10 @@ static CallInst *findTRECandidate(Instruction *TI,
   return CI;
 }
 
-static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
-                                       BasicBlock *&OldEntry,
-                                       bool &TailCallsAreMarkedTail,
-                                       SmallVectorImpl<PHINode *> &ArgumentPHIs,
-                                       AliasAnalysis *AA,
-                                       OptimizationRemarkEmitter *ORE) {
+static bool eliminateRecursiveTailCall(
+    CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
+    bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
+    AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
   // If we are introducing accumulator recursion to eliminate operations after
   // the call instruction that are both associative and commutative, the initial
   // value for the accumulator is placed in this variable.  If this value is set
@@ -566,7 +573,8 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
     NewEntry->takeName(OldEntry);
     OldEntry->setName("tailrecurse");
-    BranchInst::Create(OldEntry, NewEntry);
+    BranchInst *BI = BranchInst::Create(OldEntry, NewEntry);
+    BI->setDebugLoc(CI->getDebugLoc());
 
     // If this tail call is marked 'tail' and if there are any allocas in the
     // entry block, move them up to the new entry block.
@@ -592,6 +600,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
       PN->addIncoming(&*I, NewEntry);
       ArgumentPHIs.push_back(PN);
     }
+    // The entry block was changed from OldEntry to NewEntry.
+    // The forward DominatorTree needs to be recalculated when the EntryBB is
+    // changed. In this corner-case we recalculate the entire tree.
+    DTU.recalculate(*NewEntry->getParent());
   }
 
   // If this function has self recursive calls in the tail position where some
@@ -667,6 +679,7 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
 
   BB->getInstList().erase(Ret);  // Remove return.
   BB->getInstList().erase(CI);   // Remove call.
+  DTU.insertEdge(BB, OldEntry);
   ++NumEliminated;
   return true;
 }
@@ -675,7 +688,7 @@ static bool foldReturnAndProcessPred(
     BasicBlock *BB, ReturnInst *Ret, BasicBlock *&OldEntry,
     bool &TailCallsAreMarkedTail, SmallVectorImpl<PHINode *> &ArgumentPHIs,
     bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
-    AliasAnalysis *AA, OptimizationRemarkEmitter *ORE) {
+    AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
   bool Change = false;
 
   // Make sure this block is a trivial return block.
@@ -689,7 +702,7 @@ static bool foldReturnAndProcessPred(
   SmallVector<BranchInst*, 8> UncondBranchPreds;
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     BasicBlock *Pred = *PI;
-    TerminatorInst *PTI = Pred->getTerminator();
+    Instruction *PTI = Pred->getTerminator();
     if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
       if (BI->isUnconditional())
         UncondBranchPreds.push_back(BI);
@@ -701,17 +714,17 @@ static bool foldReturnAndProcessPred(
     if (CallInst *CI = findTRECandidate(BI, CannotTailCallElimCallsMarkedTail, TTI)){
       LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
                         << "INTO UNCOND BRANCH PRED: " << *Pred);
-      ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
+      ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
 
       // Cleanup: if all predecessors of BB have been eliminated by
       // FoldReturnIntoUncondBranch, delete it.  It is important to empty it,
       // because the ret instruction in there is still using a value which
       // eliminateRecursiveTailCall will attempt to remove.
       if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
-        BB->eraseFromParent();
+        DTU.deleteBB(BB);
 
       eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
-                                 ArgumentPHIs, AA, ORE);
+                                 ArgumentPHIs, AA, ORE, DTU);
       ++NumRetDuped;
       Change = true;
     }
@@ -720,24 +733,23 @@ static bool foldReturnAndProcessPred(
   return Change;
 }
 
-static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
-                                  bool &TailCallsAreMarkedTail,
-                                  SmallVectorImpl<PHINode *> &ArgumentPHIs,
-                                  bool CannotTailCallElimCallsMarkedTail,
-                                  const TargetTransformInfo *TTI,
-                                  AliasAnalysis *AA,
-                                  OptimizationRemarkEmitter *ORE) {
+static bool processReturningBlock(
+    ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail,
+    SmallVectorImpl<PHINode *> &ArgumentPHIs,
+    bool CannotTailCallElimCallsMarkedTail, const TargetTransformInfo *TTI,
+    AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) {
   CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
   if (!CI)
     return false;
 
   return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
-                                    ArgumentPHIs, AA, ORE);
+                                    ArgumentPHIs, AA, ORE, DTU);
 }
 
 static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
                                    AliasAnalysis *AA,
-                                   OptimizationRemarkEmitter *ORE) {
+                                   OptimizationRemarkEmitter *ORE,
+                                   DomTreeUpdater &DTU) {
   if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
     return false;
 
@@ -772,11 +784,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
       bool Change = processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
                                           ArgumentPHIs, !CanTRETailMarkedCall,
-                                          TTI, AA, ORE);
+                                          TTI, AA, ORE, DTU);
       if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
-        Change = foldReturnAndProcessPred(BB, Ret, OldEntry,
-                                          TailCallsAreMarkedTail, ArgumentPHIs,
-                                          !CanTRETailMarkedCall, TTI, AA, ORE);
+        Change = foldReturnAndProcessPred(
+            BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+            !CanTRETailMarkedCall, TTI, AA, ORE, DTU);
       MadeChange |= Change;
     }
   }
@@ -809,16 +821,27 @@ struct TailCallElim : public FunctionPass {
     AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<PostDominatorTreeWrapperPass>();
   }
 
   bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
 
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+    auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+    auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+    // There is no noticable performance difference here between Lazy and Eager
+    // UpdateStrategy based on some test results. It is feasible to switch the
+    // UpdateStrategy to Lazy if we find it profitable later.
+    DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
+
     return eliminateTailRecursion(
         F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
         &getAnalysis<AAResultsWrapperPass>().getAAResults(),
-        &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
+        &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
   }
 };
 }
@@ -842,12 +865,19 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
   TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
   AliasAnalysis &AA = AM.getResult<AAManager>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
-  bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE);
+  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+  auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
+  // There is no noticable performance difference here between Lazy and Eager
+  // UpdateStrategy based on some test results. It is feasible to switch the
+  // UpdateStrategy to Lazy if we find it profitable later.
+  DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
+  bool Changed = eliminateTailRecursion(F, &TTI, &AA, &ORE, DTU);
 
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
   PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<PostDominatorTreeAnalysis>();
   return PA;
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
new file mode 100644
index 000000000000..80f761e53774
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -0,0 +1,149 @@
+//===- LoopTransformWarning.cpp -  ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit warnings if forced code transformations have not been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "transform-warning"
+
+/// Emit warnings for forced (i.e. user-defined) loop transformations which have
+/// still not been performed.
+static void warnAboutLeftoverTransformations(Loop *L,
+                                             OptimizationRemarkEmitter *ORE) {
+  if (hasUnrollTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover unroll transformation\n");
+    ORE->emit(
+        DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+                                          "FailedRequestedUnrolling",
+                                          L->getStartLoc(), L->getHeader())
+        << "loop not unrolled: the optimizer was unable to perform the "
+           "requested transformation; the transformation might be disabled or "
+           "specified as part of an unsupported transformation ordering");
+  }
+
+  if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover unroll-and-jam transformation\n");
+    ORE->emit(
+        DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+                                          "FailedRequestedUnrollAndJamming",
+                                          L->getStartLoc(), L->getHeader())
+        << "loop not unroll-and-jammed: the optimizer was unable to perform "
+           "the requested transformation; the transformation might be disabled "
+           "or specified as part of an unsupported transformation ordering");
+  }
+
+  if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
+    Optional<int> VectorizeWidth =
+        getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+    Optional<int> InterleaveCount =
+        getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+    if (VectorizeWidth.getValueOr(0) != 1)
+      ORE->emit(
+          DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+                                            "FailedRequestedVectorization",
+                                            L->getStartLoc(), L->getHeader())
+          << "loop not vectorized: the optimizer was unable to perform the "
+             "requested transformation; the transformation might be disabled "
+             "or specified as part of an unsupported transformation ordering");
+    else if (InterleaveCount.getValueOr(0) != 1)
+      ORE->emit(
+          DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+                                            "FailedRequestedInterleaving",
+                                            L->getStartLoc(), L->getHeader())
+          << "loop not interleaved: the optimizer was unable to perform the "
+             "requested transformation; the transformation might be disabled "
+             "or specified as part of an unsupported transformation ordering");
+  }
+
+  if (hasDistributeTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover distribute transformation\n");
+    ORE->emit(
+        DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+                                          "FailedRequestedDistribution",
+                                          L->getStartLoc(), L->getHeader())
+        << "loop not distributed: the optimizer was unable to perform the "
+           "requested transformation; the transformation might be disabled or "
+           "specified as part of an unsupported transformation ordering");
+  }
+}
+
+static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI,
+                                             OptimizationRemarkEmitter *ORE) {
+  for (auto *L : LI->getLoopsInPreorder())
+    warnAboutLeftoverTransformations(L, ORE);
+}
+
+// New pass manager boilerplate
+PreservedAnalyses
+WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
+  // Do not warn about not applied transformations if optimizations are
+  // disabled.
+  if (F.hasFnAttribute(Attribute::OptimizeNone))
+    return PreservedAnalyses::all();
+
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  auto &LI = AM.getResult<LoopAnalysis>(F);
+
+  warnAboutLeftoverTransformations(&F, &LI, &ORE);
+
+  return PreservedAnalyses::all();
+}
+
+// Legacy pass manager boilerplate
+namespace {
+class WarnMissedTransformationsLegacy : public FunctionPass {
+public:
+  static char ID;
+
+  explicit WarnMissedTransformationsLegacy() : FunctionPass(ID) {
+    initializeWarnMissedTransformationsLegacyPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+
+    auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+    auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+    warnAboutLeftoverTransformations(&F, &LI, &ORE);
+    return false;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+
+    AU.setPreservesAll();
+  }
+};
+} // end anonymous namespace
+
+char WarnMissedTransformationsLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(WarnMissedTransformationsLegacy, "transform-warning",
+                      "Warn about non-applied transformations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(WarnMissedTransformationsLegacy, "transform-warning",
+                    "Warn about non-applied transformations", false, false)
+
+Pass *llvm::createWarnMissedTransformationsPass() {
+  return new WarnMissedTransformationsLegacy();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index e3ef42362223..564537af0c2a 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -209,10 +209,18 @@ static bool addDiscriminators(Function &F) {
       // Only the lowest 7 bits are used to represent a discriminator to fit
       // it in 1 byte ULEB128 representation.
       unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
-      I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
-      LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
-                        << DIL->getColumn() << ":" << Discriminator << " " << I
-                        << "\n");
+      auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
+      if (!NewDIL) {
+        LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
+                          << DIL->getFilename() << ":" << DIL->getLine() << ":"
+                          << DIL->getColumn() << ":" << Discriminator << " "
+                          << I << "\n");
+      } else {
+        I.setDebugLoc(NewDIL.getValue());
+        LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+                   << DIL->getColumn() << ":" << Discriminator << " " << I
+                   << "\n");
+      }
       Changed = true;
     }
   }
@@ -224,23 +232,31 @@ static bool addDiscriminators(Function &F) {
   for (BasicBlock &B : F) {
     LocationSet CallLocations;
     for (auto &I : B.getInstList()) {
-      CallInst *Current = dyn_cast<CallInst>(&I);
       // We bypass intrinsic calls for the following two reasons:
       //  1) We want to avoid a non-deterministic assigment of
       //     discriminators.
       //  2) We want to minimize the number of base discriminators used.
-      if (!Current || isa<IntrinsicInst>(&I))
+      if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))  
         continue;
 
-      DILocation *CurrentDIL = Current->getDebugLoc();
+      DILocation *CurrentDIL = I.getDebugLoc();
       if (!CurrentDIL)
         continue;
       Location L =
           std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
       if (!CallLocations.insert(L).second) {
         unsigned Discriminator = ++LDM[L];
-        Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
-        Changed = true;
+        auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
+        if (!NewDIL) {
+          LLVM_DEBUG(dbgs()
+                     << "Could not encode discriminator: "
+                     << CurrentDIL->getFilename() << ":"
+                     << CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
+                     << ":" << Discriminator << " " << I << "\n");
+        } else {
+          I.setDebugLoc(NewDIL.getValue());
+          Changed = true;
+        }
       }
     }
   }
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 516a785dce1e..7da768252fc1 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -20,11 +20,13 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -37,6 +39,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
 #include <string>
@@ -45,42 +48,58 @@
 
 using namespace llvm;
 
-void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) {
-  assert((pred_begin(BB) == pred_end(BB) ||
-         // Can delete self loop.
-         BB->getSinglePredecessor() == BB) && "Block is not dead!");
-  TerminatorInst *BBTerm = BB->getTerminator();
-  std::vector<DominatorTree::UpdateType> Updates;
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
+  SmallVector<BasicBlock *, 1> BBs = {BB};
+  DeleteDeadBlocks(BBs, DTU);
+}
 
-  // Loop through all of our successors and make sure they know that one
-  // of their predecessors is going away.
-  if (DDT)
-    Updates.reserve(BBTerm->getNumSuccessors());
-  for (BasicBlock *Succ : BBTerm->successors()) {
-    Succ->removePredecessor(BB);
-    if (DDT)
-      Updates.push_back({DominatorTree::Delete, BB, Succ});
-  }
+void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
+                            DomTreeUpdater *DTU) {
+#ifndef NDEBUG
+  // Make sure that all predecessors of each dead block is also dead.
+  SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+  assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+  for (auto *BB : Dead)
+    for (BasicBlock *Pred : predecessors(BB))
+      assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+  SmallVector<DominatorTree::UpdateType, 4> Updates;
+  for (auto *BB : BBs) {
+    // Loop through all of our successors and make sure they know that one
+    // of their predecessors is going away.
+    for (BasicBlock *Succ : successors(BB)) {
+      Succ->removePredecessor(BB);
+      if (DTU)
+        Updates.push_back({DominatorTree::Delete, BB, Succ});
+    }
 
-  // Zap all the instructions in the block.
-  while (!BB->empty()) {
-    Instruction &I = BB->back();
-    // If this instruction is used, replace uses with an arbitrary value.
-    // Because control flow can't get here, we don't care what we replace the
-    // value with.  Note that since this block is unreachable, and all values
-    // contained within it must dominate their uses, that all uses will
-    // eventually be removed (they are themselves dead).
-    if (!I.use_empty())
-      I.replaceAllUsesWith(UndefValue::get(I.getType()));
-    BB->getInstList().pop_back();
+    // Zap all the instructions in the block.
+    while (!BB->empty()) {
+      Instruction &I = BB->back();
+      // If this instruction is used, replace uses with an arbitrary value.
+      // Because control flow can't get here, we don't care what we replace the
+      // value with.  Note that since this block is unreachable, and all values
+      // contained within it must dominate their uses, that all uses will
+      // eventually be removed (they are themselves dead).
+      if (!I.use_empty())
+        I.replaceAllUsesWith(UndefValue::get(I.getType()));
+      BB->getInstList().pop_back();
+    }
+    new UnreachableInst(BB->getContext(), BB);
+    assert(BB->getInstList().size() == 1 &&
+           isa<UnreachableInst>(BB->getTerminator()) &&
+           "The successor list of BB isn't empty before "
+           "applying corresponding DTU updates.");
   }
+  if (DTU)
+    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
 
-  if (DDT) {
-    DDT->applyUpdates(Updates);
-    DDT->deleteBB(BB); // Deferred deletion of BB.
-  } else {
-    BB->eraseFromParent(); // Zap the block!
-  }
+  for (BasicBlock *BB : BBs)
+    if (DTU)
+      DTU->deleteBB(BB);
+    else
+      BB->eraseFromParent();
 }
 
 void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
@@ -115,12 +134,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
   return Changed;
 }
 
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
-                                     LoopInfo *LI,
-                                     MemoryDependenceResults *MemDep,
-                                     DeferredDominance *DDT) {
-  assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
-
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
+                                     LoopInfo *LI, MemorySSAUpdater *MSSAU,
+                                     MemoryDependenceResults *MemDep) {
   if (BB->hasAddressTaken())
     return false;
 
@@ -131,7 +147,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
   // Don't break self-loops.
   if (PredBB == BB) return false;
   // Don't break unwinding instructions.
-  if (PredBB->getTerminator()->isExceptional())
+  if (PredBB->getTerminator()->isExceptionalTerminator())
     return false;
 
   // Can't merge if there are multiple distinct successors.
@@ -154,10 +170,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
     FoldSingleEntryPHINodes(BB, MemDep);
   }
 
-  // Deferred DT update: Collect all the edges that exit BB. These
-  // dominator edges will be redirected from Pred.
+  // DTU update: Collect all the edges that exit BB.
+  // These dominator edges will be redirected from Pred.
   std::vector<DominatorTree::UpdateType> Updates;
-  if (DDT) {
+  if (DTU) {
     Updates.reserve(1 + (2 * succ_size(BB)));
     Updates.push_back({DominatorTree::Delete, PredBB, BB});
     for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
@@ -166,6 +182,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
     }
   }
 
+  if (MSSAU)
+    MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin()));
+
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
 
@@ -175,6 +194,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
 
   // Move all definitions in the successor to the predecessor...
   PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  new UnreachableInst(BB->getContext(), BB);
 
   // Eliminate duplicate dbg.values describing the entry PHI node post-splice.
   for (auto Incoming : IncomingValues) {
@@ -195,28 +215,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
   if (!PredBB->hasName())
     PredBB->takeName(BB);
 
-  // Finally, erase the old block and update dominator info.
-  if (DT)
-    if (DomTreeNode *DTN = DT->getNode(BB)) {
-      DomTreeNode *PredDTN = DT->getNode(PredBB);
-      SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
-      for (DomTreeNode *DI : Children)
-        DT->changeImmediateDominator(DI, PredDTN);
-
-      DT->eraseNode(BB);
-    }
-
   if (LI)
     LI->removeBlock(BB);
 
   if (MemDep)
     MemDep->invalidateCachedPredecessors();
 
-  if (DDT) {
-    DDT->deleteBB(BB); // Deferred deletion of BB.
-    DDT->applyUpdates(Updates);
-  } else {
-    BB->eraseFromParent(); // Nuke BB.
+  // Finally, erase the old block and update dominator info.
+  if (DTU) {
+    assert(BB->getInstList().size() == 1 &&
+           isa<UnreachableInst>(BB->getTerminator()) &&
+           "The successor list of BB isn't empty before "
+           "applying corresponding DTU updates.");
+    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->deleteBB(BB);
+  }
+
+  else {
+    BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
   }
   return true;
 }
@@ -261,13 +277,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
 }
 
 BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
-                            LoopInfo *LI) {
+                            LoopInfo *LI, MemorySSAUpdater *MSSAU) {
   unsigned SuccNum = GetSuccessorNumber(BB, Succ);
 
   // If this is a critical edge, let SplitCriticalEdge do it.
-  TerminatorInst *LatchTerm = BB->getTerminator();
-  if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
-                                                .setPreserveLCSSA()))
+  Instruction *LatchTerm = BB->getTerminator();
+  if (SplitCriticalEdge(
+          LatchTerm, SuccNum,
+          CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()))
     return LatchTerm->getSuccessor(SuccNum);
 
   // If the edge isn't critical, then BB has a single successor or Succ has a
@@ -277,14 +294,14 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
     // block.
     assert(SP == BB && "CFG broken");
     SP = nullptr;
-    return SplitBlock(Succ, &Succ->front(), DT, LI);
+    return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU);
   }
 
   // Otherwise, if BB has a single successor, split it at the bottom of the
   // block.
   assert(BB->getTerminator()->getNumSuccessors() == 1 &&
          "Should have a single succ!");
-  return SplitBlock(BB, BB->getTerminator(), DT, LI);
+  return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU);
 }
 
 unsigned
@@ -292,7 +309,7 @@ llvm::SplitAllCriticalEdges(Function &F,
                             const CriticalEdgeSplittingOptions &Options) {
   unsigned NumBroken = 0;
   for (BasicBlock &BB : F) {
-    TerminatorInst *TI = BB.getTerminator();
+    Instruction *TI = BB.getTerminator();
     if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
       for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
         if (SplitCriticalEdge(TI, i, Options))
@@ -302,7 +319,8 @@ llvm::SplitAllCriticalEdges(Function &F,
 }
 
 BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
-                             DominatorTree *DT, LoopInfo *LI) {
+                             DominatorTree *DT, LoopInfo *LI,
+                             MemorySSAUpdater *MSSAU) {
   BasicBlock::iterator SplitIt = SplitPt->getIterator();
   while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
     ++SplitIt;
@@ -324,6 +342,11 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
         DT->changeImmediateDominator(I, NewNode);
     }
 
+  // Move MemoryAccesses still tracked in Old, but part of New now.
+  // Update accesses in successor blocks accordingly.
+  if (MSSAU)
+    MSSAU->moveAllAfterSpliceBlocks(Old, New, &*(New->begin()));
+
   return New;
 }
 
@@ -331,6 +354,7 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
 static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
                                       ArrayRef<BasicBlock *> Preds,
                                       DominatorTree *DT, LoopInfo *LI,
+                                      MemorySSAUpdater *MSSAU,
                                       bool PreserveLCSSA, bool &HasLoopExit) {
   // Update dominator tree if available.
   if (DT) {
@@ -343,6 +367,10 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
     }
   }
 
+  // Update MemoryPhis after split if MemorySSA is available
+  if (MSSAU)
+    MSSAU->wireOldPredecessorsToNewImmediatePredecessor(OldBB, NewBB, Preds);
+
   // The rest of the logic is only relevant for updating the loop structures.
   if (!LI)
     return;
@@ -483,7 +511,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
                                          ArrayRef<BasicBlock *> Preds,
                                          const char *Suffix, DominatorTree *DT,
-                                         LoopInfo *LI, bool PreserveLCSSA) {
+                                         LoopInfo *LI, MemorySSAUpdater *MSSAU,
+                                         bool PreserveLCSSA) {
   // Do not attempt to split that which cannot be split.
   if (!BB->canSplitPredecessors())
     return nullptr;
@@ -495,7 +524,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     std::string NewName = std::string(Suffix) + ".split-lp";
 
     SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
-                                LI, PreserveLCSSA);
+                                LI, MSSAU, PreserveLCSSA);
     return NewBBs[0];
   }
 
@@ -529,7 +558,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   // Update DominatorTree, LoopInfo, and LCCSA analysis information.
   bool HasLoopExit = false;
-  UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+  UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, MSSAU, PreserveLCSSA,
                             HasLoopExit);
 
   if (!Preds.empty()) {
@@ -545,6 +574,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                                        const char *Suffix1, const char *Suffix2,
                                        SmallVectorImpl<BasicBlock *> &NewBBs,
                                        DominatorTree *DT, LoopInfo *LI,
+                                       MemorySSAUpdater *MSSAU,
                                        bool PreserveLCSSA) {
   assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
 
@@ -570,7 +600,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
   }
 
   bool HasLoopExit = false;
-  UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+  UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, MSSAU, PreserveLCSSA,
                             HasLoopExit);
 
   // Update the PHI nodes in OrigBB with the values coming from NewBB1.
@@ -606,7 +636,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
 
     // Update DominatorTree, LoopInfo, and LCCSA analysis information.
     HasLoopExit = false;
-    UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+    UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, MSSAU,
                               PreserveLCSSA, HasLoopExit);
 
     // Update the PHI nodes in OrigBB with the values coming from NewBB2.
@@ -644,7 +674,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
 }
 
 ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
-                                             BasicBlock *Pred) {
+                                             BasicBlock *Pred,
+                                             DomTreeUpdater *DTU) {
   Instruction *UncondBranch = Pred->getTerminator();
   // Clone the return and add it to the end of the predecessor.
   Instruction *NewRet = RI->clone();
@@ -678,19 +709,24 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   // longer branch to them.
   BB->removePredecessor(Pred);
   UncondBranch->eraseFromParent();
+
+  if (DTU)
+    DTU->deleteEdge(Pred, BB);
+
   return cast<ReturnInst>(NewRet);
 }
 
-TerminatorInst *
-llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
-                                bool Unreachable, MDNode *BranchWeights,
-                                DominatorTree *DT, LoopInfo *LI) {
+Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+                                             Instruction *SplitBefore,
+                                             bool Unreachable,
+                                             MDNode *BranchWeights,
+                                             DominatorTree *DT, LoopInfo *LI) {
   BasicBlock *Head = SplitBefore->getParent();
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
-  TerminatorInst *HeadOldTerm = Head->getTerminator();
+  Instruction *HeadOldTerm = Head->getTerminator();
   LLVMContext &C = Head->getContext();
   BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
-  TerminatorInst *CheckTerm;
+  Instruction *CheckTerm;
   if (Unreachable)
     CheckTerm = new UnreachableInst(C, ThenBlock);
   else
@@ -725,12 +761,12 @@ llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
 }
 
 void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
-                                         TerminatorInst **ThenTerm,
-                                         TerminatorInst **ElseTerm,
+                                         Instruction **ThenTerm,
+                                         Instruction **ElseTerm,
                                          MDNode *BranchWeights) {
   BasicBlock *Head = SplitBefore->getParent();
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
-  TerminatorInst *HeadOldTerm = Head->getTerminator();
+  Instruction *HeadOldTerm = Head->getTerminator();
   LLVMContext &C = Head->getContext();
   BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
   BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 3e30c27a9f33..fafc9aaba5c9 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -129,7 +130,7 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
 }
 
 BasicBlock *
-llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
                         const CriticalEdgeSplittingOptions &Options) {
   if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
     return nullptr;
@@ -198,6 +199,11 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   // If we have nothing to update, just return.
   auto *DT = Options.DT;
   auto *LI = Options.LI;
+  auto *MSSAU = Options.MSSAU;
+  if (MSSAU)
+    MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+        DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
+
   if (!DT && !LI)
     return NewBB;
 
@@ -283,7 +289,7 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
         if (!LoopPreds.empty()) {
           assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
           BasicBlock *NewExitBB = SplitBlockPredecessors(
-              DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
+              DestBB, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
           if (Options.PreserveLCSSA)
             createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
         }
@@ -312,7 +318,7 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
   BasicBlock *IBB = nullptr;
   for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
     BasicBlock *PredBB = PN->getIncomingBlock(Pred);
-    TerminatorInst *PredTerm = PredBB->getTerminator();
+    Instruction *PredTerm = PredBB->getTerminator();
     switch (PredTerm->getOpcode()) {
     case Instruction::IndirectBr:
       if (IBB)
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 245200362018..3466dedd3236 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -38,6 +38,7 @@ STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
 STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
 STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
 STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
+STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
 
 static bool setDoesNotAccessMemory(Function &F) {
   if (F.doesNotAccessMemory())
@@ -105,6 +106,14 @@ static bool setRetNonNull(Function &F) {
   return true;
 }
 
+static bool setReturnedArg(Function &F, unsigned ArgNo) {
+  if (F.hasParamAttribute(ArgNo, Attribute::Returned))
+    return false;
+  F.addParamAttr(ArgNo, Attribute::Returned);
+  ++NumReturnedArg;
+  return true;
+}
+
 static bool setNonLazyBind(Function &F) {
   if (F.hasFnAttribute(Attribute::NonLazyBind))
     return false;
@@ -155,10 +164,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_strcpy:
-  case LibFunc_stpcpy:
+  case LibFunc_strncpy:
   case LibFunc_strcat:
   case LibFunc_strncat:
-  case LibFunc_strncpy:
+    Changed |= setReturnedArg(F, 0);
+    LLVM_FALLTHROUGH;
+  case LibFunc_stpcpy:
   case LibFunc_stpncpy:
     Changed |= setDoesNotThrow(F);
     Changed |= setDoesNotCapture(F, 1);
@@ -270,9 +281,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_memcpy:
+  case LibFunc_memmove:
+    Changed |= setReturnedArg(F, 0);
+    LLVM_FALLTHROUGH;
   case LibFunc_mempcpy:
   case LibFunc_memccpy:
-  case LibFunc_memmove:
     Changed |= setDoesNotThrow(F);
     Changed |= setDoesNotCapture(F, 1);
     Changed |= setOnlyReadsMemory(F, 1);
@@ -741,6 +754,8 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
                            LibFunc DoubleFn, LibFunc FloatFn,
                            LibFunc LongDoubleFn) {
   switch (Ty->getTypeID()) {
+  case Type::HalfTyID:
+    return false;
   case Type::FloatTyID:
     return TLI->has(FloatFn);
   case Type::DoubleTyID:
@@ -750,6 +765,24 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
   }
 }
 
+StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+                                LibFunc DoubleFn, LibFunc FloatFn,
+                                LibFunc LongDoubleFn) {
+  assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+         "Cannot get name for unavailable function!");
+
+  switch (Ty->getTypeID()) {
+  case Type::HalfTyID:
+    llvm_unreachable("No name for HalfTy!");
+  case Type::FloatTyID:
+    return TLI->getName(FloatFn);
+  case Type::DoubleTyID:
+    return TLI->getName(DoubleFn);
+  default:
+    return TLI->getName(LongDoubleFn);
+  }
+}
+
 //- Emit LibCalls ------------------------------------------------------------//
 
 Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
@@ -927,10 +960,10 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
   }
 }
 
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
-                                  const AttributeList &Attrs) {
-  SmallString<20> NameBuffer;
-  appendTypeSuffix(Op, Name, NameBuffer);
+static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
+                                         IRBuilder<> &B,
+                                         const AttributeList &Attrs) {
+  assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
 
   Module *M = B.GetInsertBlock()->getModule();
   Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -949,8 +982,29 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
   return CI;
 }
 
+Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+                                  const AttributeList &Attrs) {
+  SmallString<20> NameBuffer;
+  appendTypeSuffix(Op, Name, NameBuffer);
+
+  return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+}
+
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+                                  LibFunc DoubleFn, LibFunc FloatFn,
+                                  LibFunc LongDoubleFn, IRBuilder<> &B,
+                                  const AttributeList &Attrs) {
+  // Get the name of the function according to TLI.
+  StringRef Name = getUnaryFloatFn(TLI, Op->getType(),
+                                   DoubleFn, FloatFn, LongDoubleFn);
+
+  return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+}
+
 Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
                                    IRBuilder<> &B, const AttributeList &Attrs) {
+  assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
+
   SmallString<20> NameBuffer;
   appendTypeSuffix(Op1, Name, NameBuffer);
 
diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 6d18d0614611..e58ddcf34667 100644
--- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -177,8 +177,8 @@ static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) {
     InsertBefore = &*std::next(CS.getInstruction()->getIterator());
 
   // Bitcast the return value to the correct type.
-  auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
-                                RetTy, "", InsertBefore);
+  auto *Cast = CastInst::CreateBitOrPointerCast(CS.getInstruction(), RetTy, "",
+                                                InsertBefore);
   if (RetBitCast)
     *RetBitCast = Cast;
 
@@ -270,8 +270,8 @@ static Instruction *versionCallSite(CallSite CS, Value *Callee,
   // Create an if-then-else structure. The original instruction is moved into
   // the "else" block, and a clone of the original instruction is placed in the
   // "then" block.
-  TerminatorInst *ThenTerm = nullptr;
-  TerminatorInst *ElseTerm = nullptr;
+  Instruction *ThenTerm = nullptr;
+  Instruction *ElseTerm = nullptr;
   SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
                                 BranchWeights);
   BasicBlock *ThenBlock = ThenTerm->getParent();
@@ -321,12 +321,14 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
                             const char **FailureReason) {
   assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
 
+  auto &DL = Callee->getParent()->getDataLayout();
+
   // Check the return type. The callee's return value type must be bitcast
   // compatible with the call site's type.
   Type *CallRetTy = CS.getInstruction()->getType();
   Type *FuncRetTy = Callee->getReturnType();
   if (CallRetTy != FuncRetTy)
-    if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) {
+    if (!CastInst::isBitOrNoopPointerCastable(FuncRetTy, CallRetTy, DL)) {
       if (FailureReason)
         *FailureReason = "Return type mismatch";
       return false;
@@ -351,7 +353,7 @@ bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
     Type *ActualTy = CS.getArgument(I)->getType();
     if (FormalTy == ActualTy)
       continue;
-    if (!CastInst::isBitCastable(ActualTy, FormalTy)) {
+    if (!CastInst::isBitOrNoopPointerCastable(ActualTy, FormalTy, DL)) {
       if (FailureReason)
         *FailureReason = "Argument type mismatch";
       return false;
@@ -391,21 +393,46 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
   // to the correct type.
   auto CalleeType = Callee->getFunctionType();
   auto CalleeParamNum = CalleeType->getNumParams();
+
+  LLVMContext &Ctx = Callee->getContext();
+  const AttributeList &CallerPAL = CS.getAttributes();
+  // The new list of argument attributes.
+  SmallVector<AttributeSet, 4> NewArgAttrs;
+  bool AttributeChanged = false;
+
   for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
     auto *Arg = CS.getArgument(ArgNo);
     Type *FormalTy = CalleeType->getParamType(ArgNo);
     Type *ActualTy = Arg->getType();
     if (FormalTy != ActualTy) {
-      auto *Cast = CastInst::Create(Instruction::BitCast, Arg, FormalTy, "",
-                                    CS.getInstruction());
+      auto *Cast = CastInst::CreateBitOrPointerCast(Arg, FormalTy, "",
+                                                    CS.getInstruction());
       CS.setArgument(ArgNo, Cast);
-    }
+
+      // Remove any incompatible attributes for the argument.
+      AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
+      ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+      NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
+      AttributeChanged = true;
+    } else
+      NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo));
   }
 
   // If the return type of the call site doesn't match that of the callee, cast
   // the returned value to the appropriate type.
-  if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
+  // Remove any incompatible return value attribute.
+  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
+  if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) {
     createRetBitCast(CS, CallSiteRetTy, RetBitCast);
+    RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy));
+    AttributeChanged = true;
+  }
+
+  // Set the new callsite attribute.
+  if (AttributeChanged)
+    CS.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
+                                        AttributeSet::get(Ctx, RAttrs),
+                                        NewArgAttrs));
 
   return CS.getInstruction();
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
new file mode 100644
index 000000000000..cf41fd2e14c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -0,0 +1,105 @@
+//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Currently this file implements partial alias canonicalization, to
+// flatten chains of aliases (also done by GlobalOpt, but not on for
+// O0 compiles). E.g.
+//  @a = alias i8, i8 *@b
+//  @b = alias i8, i8 *@g
+//
+// will be converted to:
+//  @a = alias i8, i8 *@g  <-- @a is now an alias to base object @g
+//  @b = alias i8, i8 *@g
+//
+// Eventually this file will implement full alias canonicalation, so that
+// all aliasees are private anonymous values. E.g.
+//  @a = alias i8, i8 *@g
+//  @g = global i8 0
+//
+// will be converted to:
+//  @0 = private global
+//  @a = alias i8, i8* @0
+//  @g = alias i8, i8* @0
+//
+// This simplifies optimization and ThinLTO linking of the original symbols.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
+
+using namespace llvm;
+
+namespace {
+
+static Constant *canonicalizeAlias(Constant *C, bool &Changed) {
+  if (auto *GA = dyn_cast<GlobalAlias>(C)) {
+    auto *NewAliasee = canonicalizeAlias(GA->getAliasee(), Changed);
+    if (NewAliasee != GA->getAliasee()) {
+      GA->setAliasee(NewAliasee);
+      Changed = true;
+    }
+    return NewAliasee;
+  }
+
+  auto *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE)
+    return C;
+
+  std::vector<Constant *> Ops;
+  for (Use &U : CE->operands())
+    Ops.push_back(canonicalizeAlias(cast<Constant>(U), Changed));
+  return CE->getWithOperands(Ops);
+}
+
+/// Convert aliases to canonical form.
+static bool canonicalizeAliases(Module &M) {
+  bool Changed = false;
+  for (auto &GA : M.aliases())
+    canonicalizeAlias(&GA, Changed);
+  return Changed;
+}
+
+// Legacy pass that canonicalizes aliases.
+class CanonicalizeAliasesLegacyPass : public ModulePass {
+
+public:
+  /// Pass identification, replacement for typeid
+  static char ID;
+
+  /// Specify pass name for debug output
+  StringRef getPassName() const override { return "Canonicalize Aliases"; }
+
+  explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {}
+
+  bool runOnModule(Module &M) override { return canonicalizeAliases(M); }
+};
+char CanonicalizeAliasesLegacyPass::ID = 0;
+
+} // anonymous namespace
+
+PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
+                                               ModuleAnalysisManager &AM) {
+  if (!canonicalizeAliases(M))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
+                      "Canonicalize aliases", false, false)
+INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
+                    "Canonicalize aliases", false, false)
+
+namespace llvm {
+ModulePass *createCanonicalizeAliasesPass() {
+  return new CanonicalizeAliasesLegacyPass();
+}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 9ae60962a631..8f8c601f5f13 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -18,11 +18,11 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
@@ -32,6 +32,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <map>
 using namespace llvm;
@@ -235,8 +236,8 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
                                     ArgTypes, F->getFunctionType()->isVarArg());
 
   // Create the new function...
-  Function *NewF =
-      Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());
+  Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
+                                    F->getName(), F->getParent());
 
   // Loop over the arguments, copying the names of the mapped arguments over...
   Function::arg_iterator DestI = NewF->arg_begin();
@@ -365,7 +366,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   }
 
   // Finally, clone over the terminator.
-  const TerminatorInst *OldTI = BB->getTerminator();
+  const Instruction *OldTI = BB->getTerminator();
   bool TerminatorDone = false;
   if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
     if (BI->isConditional()) {
@@ -414,8 +415,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
           CodeInfo->OperandBundleCallSites.push_back(NewInst);
 
     // Recursively clone any reachable successor blocks.
-    const TerminatorInst *TI = BB->getTerminator();
-    for (const BasicBlock *Succ : TI->successors())
+    const Instruction *TI = BB->getTerminator();
+    for (const BasicBlock *Succ : successors(TI))
       ToClone.push_back(Succ);
   }
 
@@ -795,11 +796,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
 
 /// Duplicate non-Phi instructions from the beginning of block up to
 /// StopAt instruction into a split block between BB and its predecessor.
-BasicBlock *
-llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
-                                          Instruction *StopAt,
-                                          ValueToValueMapTy &ValueMapping,
-                                          DominatorTree *DT) {
+BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
+    BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt,
+    ValueToValueMapTy &ValueMapping, DomTreeUpdater &DTU) {
+
+  assert(count(successors(PredBB), BB) == 1 &&
+         "There must be a single edge between PredBB and BB!");
   // We are going to have to map operands from the original BB block to the new
   // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
   // account for entry from PredBB.
@@ -807,10 +809,16 @@ llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
     ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
 
-  BasicBlock *NewBB = SplitEdge(PredBB, BB, DT);
+  BasicBlock *NewBB = SplitEdge(PredBB, BB);
   NewBB->setName(PredBB->getName() + ".split");
   Instruction *NewTerm = NewBB->getTerminator();
 
+  // FIXME: SplitEdge does not yet take a DTU, so we include the split edge
+  //        in the update set here.
+  DTU.applyUpdates({{DominatorTree::Delete, PredBB, BB},
+                    {DominatorTree::Insert, PredBB, NewBB},
+                    {DominatorTree::Insert, NewBB, BB}});
+
   // Clone the non-phi instructions of BB into NewBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
   // Stop once we see the terminator too. This covers the case where BB's
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index c7d68bab8170..659993aa5478 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -74,8 +74,9 @@ std::unique_ptr<Module> llvm::CloneModule(
 
   // Loop over the functions in the module, making external functions as before
   for (const Function &I : M) {
-    Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),
-                                    I.getLinkage(), I.getName(), New.get());
+    Function *NF =
+        Function::Create(cast<FunctionType>(I.getValueType()), I.getLinkage(),
+                         I.getAddressSpace(), I.getName(), New.get());
     NF->copyAttributesFrom(&I);
     VMap[&I] = NF;
   }
@@ -91,8 +92,8 @@ std::unique_ptr<Module> llvm::CloneModule(
       GlobalValue *GV;
       if (I->getValueType()->isFunctionTy())
         GV = Function::Create(cast<FunctionType>(I->getValueType()),
-                              GlobalValue::ExternalLinkage, I->getName(),
-                              New.get());
+                              GlobalValue::ExternalLinkage,
+                              I->getAddressSpace(), I->getName(), New.get());
       else
         GV = new GlobalVariable(
             *New, I->getValueType(), false, GlobalValue::ExternalLinkage,
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index cb349e34606c..25d4ae583ecc 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -57,6 +57,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
 #include <iterator>
@@ -167,14 +168,22 @@ static bool isBlockValidForExtraction(const BasicBlock &BB,
       continue;
     }
 
-    if (const CallInst *CI = dyn_cast<CallInst>(I))
-      if (const Function *F = CI->getCalledFunction())
-        if (F->getIntrinsicID() == Intrinsic::vastart) {
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (const Function *F = CI->getCalledFunction()) {
+        auto IID = F->getIntrinsicID();
+        if (IID == Intrinsic::vastart) {
           if (AllowVarArgs)
             continue;
           else
             return false;
         }
+
+        // Currently, we miscompile outlined copies of eh_typid_for. There are
+        // proposals for fixing this in llvm.org/PR39545.
+        if (IID == Intrinsic::eh_typeid_for)
+          return false;
+      }
+    }
   }
 
   return true;
@@ -228,19 +237,21 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
 CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
                              bool AggregateArgs, BlockFrequencyInfo *BFI,
                              BranchProbabilityInfo *BPI, bool AllowVarArgs,
-                             bool AllowAlloca)
+                             bool AllowAlloca, std::string Suffix)
     : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
       BPI(BPI), AllowVarArgs(AllowVarArgs),
-      Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {}
+      Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
+      Suffix(Suffix) {}
 
 CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
                              BlockFrequencyInfo *BFI,
-                             BranchProbabilityInfo *BPI)
+                             BranchProbabilityInfo *BPI, std::string Suffix)
     : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
       BPI(BPI), AllowVarArgs(false),
       Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
                                      /* AllowVarArgs */ false,
-                                     /* AllowAlloca */ false)) {}
+                                     /* AllowAlloca */ false)),
+      Suffix(Suffix) {}
 
 /// definedInRegion - Return true if the specified value is defined in the
 /// extracted region.
@@ -321,8 +332,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
       default: {
         IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
         if (IntrInst) {
-          if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
-              IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+          if (IntrInst->isLifetimeStartOrEnd())
             break;
           return false;
         }
@@ -520,10 +530,10 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
   }
 }
 
-/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
-/// region, we need to split the entry block of the region so that the PHI node
-/// is easier to deal with.
-void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
+/// of the region, we need to split the entry block of the region so that the
+/// PHI node is easier to deal with.
+void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
   unsigned NumPredsFromRegion = 0;
   unsigned NumPredsOutsideRegion = 0;
 
@@ -566,7 +576,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
     // changing them to branch to NewBB instead.
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
       if (Blocks.count(PN->getIncomingBlock(i))) {
-        TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+        Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
         TI->replaceUsesOfWith(OldPred, NewBB);
       }
 
@@ -595,6 +605,56 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
   }
 }
 
+/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
+/// outlined region, we split these PHIs on two: one with inputs from region
+/// and other with remaining incoming blocks; then first PHIs are placed in
+/// outlined region.
+void CodeExtractor::severSplitPHINodesOfExits(
+    const SmallPtrSetImpl<BasicBlock *> &Exits) {
+  for (BasicBlock *ExitBB : Exits) {
+    BasicBlock *NewBB = nullptr;
+
+    for (PHINode &PN : ExitBB->phis()) {
+      // Find all incoming values from the outlining region.
+      SmallVector<unsigned, 2> IncomingVals;
+      for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+        if (Blocks.count(PN.getIncomingBlock(i)))
+          IncomingVals.push_back(i);
+
+      // Do not process PHI if there is one (or fewer) predecessor from region.
+      // If PHI has exactly one predecessor from region, only this one incoming
+      // will be replaced on codeRepl block, so it should be safe to skip PHI.
+      if (IncomingVals.size() <= 1)
+        continue;
+
+      // Create block for new PHIs and add it to the list of outlined if it
+      // wasn't done before.
+      if (!NewBB) {
+        NewBB = BasicBlock::Create(ExitBB->getContext(),
+                                   ExitBB->getName() + ".split",
+                                   ExitBB->getParent(), ExitBB);
+        SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBB),
+                                           pred_end(ExitBB));
+        for (BasicBlock *PredBB : Preds)
+          if (Blocks.count(PredBB))
+            PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
+        BranchInst::Create(ExitBB, NewBB);
+        Blocks.insert(NewBB);
+      }
+
+      // Split this PHI.
+      PHINode *NewPN =
+          PHINode::Create(PN.getType(), IncomingVals.size(),
+                          PN.getName() + ".ce", NewBB->getFirstNonPHI());
+      for (unsigned i : IncomingVals)
+        NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
+      for (unsigned i : reverse(IncomingVals))
+        PN.removeIncomingValue(i, false);
+      PN.addIncoming(NewPN, NewBB);
+    }
+  }
+}
+
 void CodeExtractor::splitReturnBlocks() {
   for (BasicBlock *Block : Blocks)
     if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
@@ -669,11 +729,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
                   FunctionType::get(RetTy, paramTy,
                                     AllowVarArgs && oldFunction->isVarArg());
 
+  std::string SuffixToUse =
+      Suffix.empty()
+          ? (header->getName().empty() ? "extracted" : header->getName().str())
+          : Suffix;
   // Create the new function
-  Function *newFunction = Function::Create(funcType,
-                                           GlobalValue::InternalLinkage,
-                                           oldFunction->getName() + "_" +
-                                           header->getName(), M);
+  Function *newFunction = Function::Create(
+      funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
+      oldFunction->getName() + "." + SuffixToUse, M);
   // If the old function is no-throw, so is the new one.
   if (oldFunction->doesNotThrow())
     newFunction->setDoesNotThrow();
@@ -754,6 +817,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::SanitizeMemory:
       case Attribute::SanitizeThread:
       case Attribute::SanitizeHWAddress:
+      case Attribute::SpeculativeLoadHardening:
       case Attribute::StackProtect:
       case Attribute::StackProtectReq:
       case Attribute::StackProtectStrong:
@@ -778,7 +842,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
       Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
-      TerminatorInst *TI = newFunction->begin()->getTerminator();
+      Instruction *TI = newFunction->begin()->getTerminator();
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
           StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
       RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
@@ -808,10 +872,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   for (unsigned i = 0, e = Users.size(); i != e; ++i)
     // The BasicBlock which contains the branch is not in the region
     // modify the branch target to a new block
-    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
-      if (!Blocks.count(TI->getParent()) &&
-          TI->getParent()->getParent() == oldFunction)
-        TI->replaceUsesOfWith(header, newHeader);
+    if (Instruction *I = dyn_cast<Instruction>(Users[i]))
+      if (I->isTerminator() && !Blocks.count(I->getParent()) &&
+          I->getParent()->getParent() == oldFunction)
+        I->replaceUsesOfWith(header, newHeader);
 
   return newFunction;
 }
@@ -819,9 +883,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
 /// the call instruction, splitting any PHI nodes in the header block as
 /// necessary.
-void CodeExtractor::
-emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
-                           ValueSet &inputs, ValueSet &outputs) {
+CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
+                                                    BasicBlock *codeReplacer,
+                                                    ValueSet &inputs,
+                                                    ValueSet &outputs) {
   // Emit a call to the new function, passing in: *pointer to struct (if
   // aggregating parameters), or plan inputs and allocated memory for outputs
   std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
@@ -829,6 +894,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   Module *M = newFunction->getParent();
   LLVMContext &Context = M->getContext();
   const DataLayout &DL = M->getDataLayout();
+  CallInst *call = nullptr;
 
   // Add inputs as params, or to be filled into the struct
   for (Value *input : inputs)
@@ -879,8 +945,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   }
 
   // Emit the call to the function
-  CallInst *call = CallInst::Create(newFunction, params,
-                                    NumExitBlocks > 1 ? "targetBlock" : "");
+  call = CallInst::Create(newFunction, params,
+                          NumExitBlocks > 1 ? "targetBlock" : "");
   // Add debug location to the new call, if the original function has debug
   // info. In that case, the terminator of the entry block of the extracted
   // function contains the first debug location of the extracted function,
@@ -925,11 +991,17 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     auto *OutI = dyn_cast<Instruction>(outputs[i]);
     if (!OutI)
       continue;
+
     // Find proper insertion point.
-    Instruction *InsertPt = OutI->getNextNode();
-    // Let's assume that there is no other guy interleave non-PHI in PHIs.
-    if (isa<PHINode>(InsertPt))
-      InsertPt = InsertPt->getParent()->getFirstNonPHI();
+    BasicBlock::iterator InsertPt;
+    // In case OutI is an invoke, we insert the store at the beginning in the
+    // 'normal destination' BB. Otherwise we insert the store right after OutI.
+    if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+      InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+    else if (auto *Phi = dyn_cast<PHINode>(OutI))
+      InsertPt = Phi->getParent()->getFirstInsertionPt();
+    else
+      InsertPt = std::next(OutI->getIterator());
 
     assert(OAI != newFunction->arg_end() &&
            "Number of output arguments should match "
@@ -939,13 +1011,13 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
       Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
-          StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt);
-      new StoreInst(outputs[i], GEP, InsertPt);
+          StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt);
+      new StoreInst(outputs[i], GEP, &*InsertPt);
       // Since there should be only one struct argument aggregating
       // all the output values, we shouldn't increment OAI, which always
       // points to the struct argument, in this case.
     } else {
-      new StoreInst(outputs[i], &*OAI, InsertPt);
+      new StoreInst(outputs[i], &*OAI, &*InsertPt);
       ++OAI;
     }
   }
@@ -964,7 +1036,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   unsigned switchVal = 0;
   for (BasicBlock *Block : Blocks) {
-    TerminatorInst *TI = Block->getTerminator();
+    Instruction *TI = Block->getTerminator();
     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
       if (!Blocks.count(TI->getSuccessor(i))) {
         BasicBlock *OldTarget = TI->getSuccessor(i);
@@ -1046,6 +1118,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
     break;
   }
+
+  return call;
 }
 
 void CodeExtractor::moveCodeToFunction(Function *newFunction) {
@@ -1070,7 +1144,7 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
   using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
 
   // Update the branch weights for the exit block.
-  TerminatorInst *TI = CodeReplacer->getTerminator();
+  Instruction *TI = CodeReplacer->getTerminator();
   SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
 
   // Block Frequency distribution with dummy node.
@@ -1107,6 +1181,71 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
       MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
 }
 
+/// Scan the extraction region for lifetime markers which reference inputs.
+/// Erase these markers. Return the inputs which were referenced.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static SetVector<Value *>
+eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+                             const SetVector<Value *> &SunkAllocas) {
+  SetVector<Value *> InputObjectsWithLifetime;
+  for (BasicBlock *BB : Blocks) {
+    for (auto It = BB->begin(), End = BB->end(); It != End;) {
+      auto *II = dyn_cast<IntrinsicInst>(&*It);
+      ++It;
+      if (!II || !II->isLifetimeStartOrEnd())
+        continue;
+
+      // Get the memory operand of the lifetime marker. If the underlying
+      // object is a sunk alloca, or is otherwise defined in the extraction
+      // region, the lifetime marker must not be erased.
+      Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+      if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+        continue;
+
+      InputObjectsWithLifetime.insert(Mem);
+      II->eraseFromParent();
+    }
+  }
+  return InputObjectsWithLifetime;
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+    Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
+    CallInst *TheCall) {
+  if (InputObjectsWithLifetime.empty())
+    return;
+
+  LLVMContext &Ctx = M->getContext();
+  auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+  auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+  auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
+      M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+  auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
+      M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+  for (Value *Mem : InputObjectsWithLifetime) {
+    assert((!isa<Instruction>(Mem) ||
+            cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
+           "Input memory not defined in original function");
+    Value *MemAsI8Ptr = nullptr;
+    if (Mem->getType() == Int8PtrTy)
+      MemAsI8Ptr = Mem;
+    else
+      MemAsI8Ptr =
+          CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+
+    auto StartMarker =
+        CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
+    StartMarker->insertBefore(TheCall);
+    auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
+    EndMarker->insertAfter(TheCall);
+  }
+}
+
 Function *CodeExtractor::extractCodeRegion() {
   if (!isEligible())
     return nullptr;
@@ -1150,13 +1289,33 @@ Function *CodeExtractor::extractCodeRegion() {
     }
   }
 
-  // If we have to split PHI nodes or the entry block, do so now.
-  severSplitPHINodes(header);
-
   // If we have any return instructions in the region, split those blocks so
   // that the return is not in the region.
   splitReturnBlocks();
 
+  // Calculate the exit blocks for the extracted region and the total exit
+  // weights for each of those blocks.
+  DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
+  SmallPtrSet<BasicBlock *, 1> ExitBlocks;
+  for (BasicBlock *Block : Blocks) {
+    for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
+         ++SI) {
+      if (!Blocks.count(*SI)) {
+        // Update the branch weight for this successor.
+        if (BFI) {
+          BlockFrequency &BF = ExitWeights[*SI];
+          BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
+        }
+        ExitBlocks.insert(*SI);
+      }
+    }
+  }
+  NumExitBlocks = ExitBlocks.size();
+
+  // If we have to split PHI nodes of the entry or exit blocks, do so now.
+  severSplitPHINodesOfEntry(header);
+  severSplitPHINodesOfExits(ExitBlocks);
+
   // This takes place of the original loop
   BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
                                                 "codeRepl", oldFunction,
@@ -1201,30 +1360,17 @@ Function *CodeExtractor::extractCodeRegion() {
       cast<Instruction>(II)->moveBefore(TI);
   }
 
-  // Calculate the exit blocks for the extracted region and the total exit
-  // weights for each of those blocks.
-  DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
-  SmallPtrSet<BasicBlock *, 1> ExitBlocks;
-  for (BasicBlock *Block : Blocks) {
-    for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
-         ++SI) {
-      if (!Blocks.count(*SI)) {
-        // Update the branch weight for this successor.
-        if (BFI) {
-          BlockFrequency &BF = ExitWeights[*SI];
-          BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
-        }
-        ExitBlocks.insert(*SI);
-      }
-    }
-  }
-  NumExitBlocks = ExitBlocks.size();
+  // Collect objects which are inputs to the extraction region and also
+  // referenced by lifetime start/end markers within it. The effects of these
+  // markers must be replicated in the calling function to prevent the stack
+  // coloring pass from merging slots which store input objects.
+  ValueSet InputObjectsWithLifetime =
+      eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
 
   // Construct new function based on inputs/outputs & add allocas for all defs.
-  Function *newFunction = constructFunction(inputs, outputs, header,
-                                            newFuncRoot,
-                                            codeReplacer, oldFunction,
-                                            oldFunction->getParent());
+  Function *newFunction =
+      constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
+                        oldFunction, oldFunction->getParent());
 
   // Update the entry count of the function.
   if (BFI) {
@@ -1235,10 +1381,16 @@ Function *CodeExtractor::extractCodeRegion() {
     BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
   }
 
-  emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+  CallInst *TheCall =
+      emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
 
   moveCodeToFunction(newFunction);
 
+  // Replicate the effects of any lifetime start/end markers which referenced
+  // input objects in the extraction region by placing markers around the call.
+  insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
+                                       InputObjectsWithLifetime, TheCall);
+
   // Propagate personality info to the new function if there is one.
   if (oldFunction->hasPersonalityFn())
     newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
@@ -1247,8 +1399,8 @@ Function *CodeExtractor::extractCodeRegion() {
   if (BFI && NumExitBlocks > 1)
     calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
 
-  // Loop over all of the PHI nodes in the header block, and change any
-  // references to the old incoming edge to be the new incoming edge.
+  // Loop over all of the PHI nodes in the header and exit blocks, and change
+  // any references to the old incoming edge to be the new incoming edge.
   for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
     PHINode *PN = cast<PHINode>(I);
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -1256,29 +1408,60 @@ Function *CodeExtractor::extractCodeRegion() {
         PN->setIncomingBlock(i, newFuncRoot);
   }
 
-  // Look at all successors of the codeReplacer block.  If any of these blocks
-  // had PHI nodes in them, we need to update the "from" block to be the code
-  // replacer, not the original block in the extracted region.
-  std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),
-                                  succ_end(codeReplacer));
-  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
-    for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
-      PHINode *PN = cast<PHINode>(I);
-      std::set<BasicBlock*> ProcessedPreds;
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        if (Blocks.count(PN->getIncomingBlock(i))) {
-          if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
-            PN->setIncomingBlock(i, codeReplacer);
-          else {
-            // There were multiple entries in the PHI for this block, now there
-            // is only one, so remove the duplicated entries.
-            PN->removeIncomingValue(i, false);
-            --i; --e;
-          }
-        }
+  for (BasicBlock *ExitBB : ExitBlocks)
+    for (PHINode &PN : ExitBB->phis()) {
+      Value *IncomingCodeReplacerVal = nullptr;
+      for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+        // Ignore incoming values from outside of the extracted region.
+        if (!Blocks.count(PN.getIncomingBlock(i)))
+          continue;
+
+        // Ensure that there is only one incoming value from codeReplacer.
+        if (!IncomingCodeReplacerVal) {
+          PN.setIncomingBlock(i, codeReplacer);
+          IncomingCodeReplacerVal = PN.getIncomingValue(i);
+        } else
+          assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
+                 "PHI has two incompatbile incoming values from codeRepl");
+      }
+    }
+
+  // Erase debug info intrinsics. Variable updates within the new function are
+  // invisible to debuggers. This could be improved by defining a DISubprogram
+  // for the new function.
+  for (BasicBlock &BB : *newFunction) {
+    auto BlockIt = BB.begin();
+    // Remove debug info intrinsics from the new function.
+    while (BlockIt != BB.end()) {
+      Instruction *Inst = &*BlockIt;
+      ++BlockIt;
+      if (isa<DbgInfoIntrinsic>(Inst))
+        Inst->eraseFromParent();
     }
+    // Remove debug info intrinsics which refer to values in the new function
+    // from the old function.
+    SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
+    for (Instruction &I : BB)
+      findDbgUsers(DbgUsers, &I);
+    for (DbgVariableIntrinsic *DVI : DbgUsers)
+      DVI->eraseFromParent();
+  }
 
-  LLVM_DEBUG(if (verifyFunction(*newFunction))
-                 report_fatal_error("verifyFunction failed!"));
+  // Mark the new function `noreturn` if applicable. Terminators which resume
+  // exception propagation are treated as returning instructions. This is to
+  // avoid inserting traps after calls to outlined functions which unwind.
+  bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
+    const Instruction *Term = BB.getTerminator();
+    return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
+  });
+  if (doesNotReturn)
+    newFunction->setDoesNotReturn();
+
+  LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
+    newFunction->dump();
+    report_fatal_error("verification of newFunction failed!");
+  });
+  LLVM_DEBUG(if (verifyFunction(*oldFunction))
+             report_fatal_error("verification of oldFunction failed!"));
   return newFunction;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 9a0240144d08..4e7da7d0449f 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -22,11 +22,10 @@
 
 #define DEBUG_TYPE "ctor_utils"
 
-namespace llvm {
+using namespace llvm;
 
-namespace {
 /// Given a specified llvm.global_ctors list, remove the listed elements.
-void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
+static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
   // Filter out the initializer elements to remove.
   ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
   SmallVector<Constant *, 10> CAList;
@@ -64,7 +63,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
 
 /// Given a llvm.global_ctors list that we can understand,
 /// return a list of the functions and null terminator as a vector.
-std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
+static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
   if (GV->getInitializer()->isNullValue())
     return std::vector<Function *>();
   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
@@ -79,7 +78,7 @@ std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
 
 /// Find the llvm.global_ctors list, verifying that all initializers have an
 /// init priority of 65535.
-GlobalVariable *findGlobalCtors(Module &M) {
+static GlobalVariable *findGlobalCtors(Module &M) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   if (!GV)
     return nullptr;
@@ -112,12 +111,11 @@ GlobalVariable *findGlobalCtors(Module &M) {
 
   return GV;
 }
-} // namespace
 
 /// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
 /// entries for which it returns true.  Return true if anything changed.
-bool optimizeGlobalCtorsList(Module &M,
-                             function_ref<bool(Function *)> ShouldRemove) {
+bool llvm::optimizeGlobalCtorsList(
+    Module &M, function_ref<bool(Function *)> ShouldRemove) {
   GlobalVariable *GlobalCtors = findGlobalCtors(M);
   if (!GlobalCtors)
     return false;
@@ -160,5 +158,3 @@ bool optimizeGlobalCtorsList(Module &M,
   removeGlobalCtors(GlobalCtors, CtorsToRemove);
   return true;
 }
-
-} // End llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 56ff03c7f5e1..975b363859a9 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -90,7 +90,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   // careful if I is an invoke instruction, because we can't insert the store
   // AFTER the terminator instruction.
   BasicBlock::iterator InsertPt;
-  if (!isa<TerminatorInst>(I)) {
+  if (!I.isTerminator()) {
     InsertPt = ++I.getIterator();
     for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
       /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index c9c96fbe5da0..762a374c135c 100644
--- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -37,7 +37,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
 
     // Branches and invokes do not escape, only unwind, resume, and return
     // do.
-    TerminatorInst *TI = CurBB->getTerminator();
+    Instruction *TI = CurBB->getTerminator();
     if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
       continue;
 
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
index 7fd9425efed3..e875cd686b00 100644
--- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -483,8 +483,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
           }
         }
 
-        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-            II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        if (II->isLifetimeStartOrEnd()) {
           LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
           ++CurInst;
           continue;
@@ -578,7 +577,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
                      << "Successfully evaluated function. Result: 0\n\n");
         }
       }
-    } else if (isa<TerminatorInst>(CurInst)) {
+    } else if (CurInst->isTerminator()) {
       LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");
 
       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 3c6c9c9a5df4..d9778f4a1fb7 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -232,7 +232,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
   if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
     return false;
 
-  TerminatorInst *TBB = LastCondBlock->getTerminator();
+  Instruction *TBB = LastCondBlock->getTerminator();
   BasicBlock *PS1 = TBB->getSuccessor(0);
   BasicBlock *PS2 = TBB->getSuccessor(1);
   BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
@@ -325,7 +325,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
 bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
                                          BasicBlock *Block1,
                                          BasicBlock *Block2) {
-  TerminatorInst *PTI2 = Head2->getTerminator();
+  Instruction *PTI2 = Head2->getTerminator();
   Instruction *PBI2 = &Head2->front();
 
   bool eq1 = (Block1 == Head1);
@@ -421,7 +421,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
   if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
     return false;
 
-  TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+  Instruction *PTI2 = SecondEntryBlock->getTerminator();
   Instruction *PBI2 = &SecondEntryBlock->front();
 
   if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 69203f9f2485..a717d9b72819 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -410,8 +410,6 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
   switch (TyL->getTypeID()) {
   default:
     llvm_unreachable("Unknown type!");
-    // Fall through in Release mode.
-    LLVM_FALLTHROUGH;
   case Type::IntegerTyID:
     return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
                       cast<IntegerType>(TyR)->getBitWidth());
@@ -867,8 +865,8 @@ int FunctionComparator::compare() {
     if (int Res = cmpBasicBlocks(BBL, BBR))
       return Res;
 
-    const TerminatorInst *TermL = BBL->getTerminator();
-    const TerminatorInst *TermR = BBR->getTerminator();
+    const Instruction *TermL = BBL->getTerminator();
+    const Instruction *TermR = BBR->getTerminator();
 
     assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
     for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
@@ -938,7 +936,7 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
     for (auto &Inst : *BB) {
       H.add(Inst.getOpcode());
     }
-    const TerminatorInst *Term = BB->getTerminator();
+    const Instruction *Term = BB->getTerminator();
     for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
       if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
         continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 479816a339d0..a9772e31da50 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -124,7 +124,6 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     return SGV->getLinkage();
 
   switch (SGV->getLinkage()) {
-  case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::ExternalLinkage:
     // External and linkonce definitions are converted to available_externally
@@ -144,11 +143,13 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     // An imported available_externally declaration stays that way.
     return SGV->getLinkage();
 
+  case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::WeakAnyLinkage:
-    // Can't import weak_any definitions correctly, or we might change the
-    // program semantics, since the linker will pick the first weak_any
-    // definition and importing would change the order they are seen by the
-    // linker. The module linking caller needs to enforce this.
+    // Can't import linkonce_any/weak_any definitions correctly, or we might
+    // change the program semantics, since the linker will pick the first
+    // linkonce_any/weak_any definition and importing would change the order
+    // they are seen by the linker. The module linking caller needs to enforce
+    // this.
     assert(!doImportAsDefinition(SGV));
     // If imported as a declaration, it becomes external_weak.
     return SGV->getLinkage();
@@ -202,10 +203,26 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
 
 void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
 
-  // Check the summaries to see if the symbol gets resolved to a known local
-  // definition.
+  ValueInfo VI;
   if (GV.hasName()) {
-    ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
+    VI = ImportIndex.getValueInfo(GV.getGUID());
+    // Set synthetic function entry counts.
+    if (VI && ImportIndex.hasSyntheticEntryCounts()) {
+      if (Function *F = dyn_cast<Function>(&GV)) {
+        if (!F->isDeclaration()) {
+          for (auto &S : VI.getSummaryList()) {
+            FunctionSummary *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+            if (FS->modulePath() == M.getModuleIdentifier()) {
+              F->setEntryCount(Function::ProfileCount(FS->entryCount(),
+                                                      Function::PCT_Synthetic));
+              break;
+            }
+          }
+        }
+      }
+    }
+    // Check the summaries to see if the symbol gets resolved to a known local
+    // definition.
     if (VI && VI.isDSOLocal()) {
       GV.setDSOLocal(true);
       if (GV.hasDLLImportStorageClass())
@@ -213,6 +230,22 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
     }
   }
 
+  // Mark read-only variables which can be imported with specific attribute.
+  // We can't internalize them now because IRMover will fail to link variable
+  // definitions to their external declarations during ThinLTO import. We'll
+  // internalize read-only variables later, after import is finished.
+  // See internalizeImmutableGVs.
+  //
+  // If global value dead stripping is not enabled in summary then
+  // propagateConstants hasn't been run. We can't internalize GV
+  // in such case.
+  if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
+    const auto &SL = VI.getSummaryList();
+    auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
+    if (GVS && GVS->isReadOnly())
+      cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
+  }
+
   bool DoPromote = false;
   if (GV.hasLocalLinkage() &&
       ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
@@ -230,7 +263,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   // Remove functions imported as available externally defs from comdats,
   // as this is a declaration for the linker, and will be dropped eventually.
   // It is illegal for comdats to contain declarations.
-  auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
+  auto *GO = dyn_cast<GlobalObject>(&GV);
   if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
     // The IRMover should not have placed any imported declarations in
     // a comdat, so the only declaration that should be in a comdat
diff --git a/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
new file mode 100644
index 000000000000..08de0a4c53e9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -0,0 +1,64 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform transformations related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static cl::opt<uint32_t> PredicatePassBranchWeight(
+    "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
+    cl::desc("The probability of a guard failing is assumed to be the "
+             "reciprocal of this value (default = 1 << 20)"));
+
+void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
+                                        CallInst *Guard) {
+  OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
+  SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
+
+  auto *CheckBB = Guard->getParent();
+  auto *DeoptBlockTerm =
+      SplitBlockAndInsertIfThen(Guard->getArgOperand(0), Guard, true);
+
+  auto *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+
+  // SplitBlockAndInsertIfThen inserts control flow that branches to
+  // DeoptBlockTerm if the condition is true.  We want the opposite.
+  CheckBI->swapSuccessors();
+
+  CheckBI->getSuccessor(0)->setName("guarded");
+  CheckBI->getSuccessor(1)->setName("deopt");
+
+  if (auto *MD = Guard->getMetadata(LLVMContext::MD_make_implicit))
+    CheckBI->setMetadata(LLVMContext::MD_make_implicit, MD);
+
+  MDBuilder MDB(Guard->getContext());
+  CheckBI->setMetadata(LLVMContext::MD_prof,
+                       MDB.createBranchWeights(PredicatePassBranchWeight, 1));
+
+  IRBuilder<> B(DeoptBlockTerm);
+  auto *DeoptCall = B.CreateCall(DeoptIntrinsic, Args, {DeoptOB}, "");
+
+  if (DeoptIntrinsic->getReturnType()->isVoidTy()) {
+    B.CreateRetVoid();
+  } else {
+    DeoptCall->setName("deoptcall");
+    B.CreateRet(DeoptCall);
+  }
+
+  DeoptCall->setCallingConv(Guard->getCallingConv());
+  DeoptBlockTerm->eraseFromParent();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 8382220fc9e1..02482c550321 100644
--- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -161,7 +161,7 @@ void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
 
 void ImportedFunctionsInliningStatistics::calculateRealInlines() {
   // Removing duplicated Callers.
-  llvm::sort(NonImportedCallers.begin(), NonImportedCallers.end());
+  llvm::sort(NonImportedCallers);
   NonImportedCallers.erase(
       std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
       NonImportedCallers.end());
@@ -190,17 +190,14 @@ ImportedFunctionsInliningStatistics::getSortedNodes() {
   for (const NodesMapTy::value_type& Node : NodesMap)
     SortedNodes.push_back(&Node);
 
-  llvm::sort(
-      SortedNodes.begin(), SortedNodes.end(),
-      [&](const SortedNodesTy::value_type &Lhs,
-          const SortedNodesTy::value_type &Rhs) {
-        if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
-          return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
-        if (Lhs->second->NumberOfRealInlines !=
-            Rhs->second->NumberOfRealInlines)
-          return Lhs->second->NumberOfRealInlines >
-                 Rhs->second->NumberOfRealInlines;
-        return Lhs->first() < Rhs->first();
-      });
+  llvm::sort(SortedNodes, [&](const SortedNodesTy::value_type &Lhs,
+                              const SortedNodesTy::value_type &Rhs) {
+    if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
+      return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
+    if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
+      return Lhs->second->NumberOfRealInlines >
+             Rhs->second->NumberOfRealInlines;
+    return Lhs->first() < Rhs->first();
+  });
   return SortedNodes;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index ddc6e07e2f59..623fe91a5a60 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -84,13 +85,15 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
   cl::init(true), cl::Hidden,
   cl::desc("Convert align attributes to assumptions during inlining."));
 
-bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
-                          AAResults *CalleeAAR, bool InsertLifetime) {
+llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+                                        AAResults *CalleeAAR,
+                                        bool InsertLifetime) {
   return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
 }
 
-bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
-                          AAResults *CalleeAAR, bool InsertLifetime) {
+llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+                                        AAResults *CalleeAAR,
+                                        bool InsertLifetime) {
   return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
 }
 
@@ -768,14 +771,16 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
   UnwindDest->removePredecessor(InvokeBB);
 }
 
-/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
-/// that metadata should be propagated to all memory-accessing cloned
-/// instructions.
+/// When inlining a call site that has !llvm.mem.parallel_loop_access or
+/// llvm.access.group metadata, that metadata should be propagated to all
+/// memory-accessing cloned instructions.
 static void PropagateParallelLoopAccessMetadata(CallSite CS,
                                                 ValueToValueMapTy &VMap) {
   MDNode *M =
     CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
-  if (!M)
+  MDNode *CallAccessGroup =
+      CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
+  if (!M && !CallAccessGroup)
     return;
 
   for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
@@ -787,11 +792,20 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
     if (!NI)
       continue;
 
-    if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+    if (M) {
+      if (MDNode *PM =
+              NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
         M = MDNode::concatenate(PM, M);
       NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
-    } else if (NI->mayReadOrWriteMemory()) {
-      NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+      } else if (NI->mayReadOrWriteMemory()) {
+        NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+      }
+    }
+
+    if (NI->mayReadOrWriteMemory()) {
+      MDNode *UnitedAccGroups = uniteAccessGroups(
+          NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
+      NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
     }
   }
 }
@@ -985,22 +999,22 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
         PtrArgs.push_back(CXI->getPointerOperand());
       else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
         PtrArgs.push_back(RMWI->getPointerOperand());
-      else if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+      else if (const auto *Call = dyn_cast<CallBase>(I)) {
         // If we know that the call does not access memory, then we'll still
         // know that about the inlined clone of this call site, and we don't
         // need to add metadata.
-        if (ICS.doesNotAccessMemory())
+        if (Call->doesNotAccessMemory())
           continue;
 
         IsFuncCall = true;
         if (CalleeAAR) {
-          FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+          FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
           if (MRB == FMRB_OnlyAccessesArgumentPointees ||
               MRB == FMRB_OnlyReadsArgumentPointees)
             IsArgMemOnlyCall = true;
         }
 
-        for (Value *Arg : ICS.args()) {
+        for (Value *Arg : Call->args()) {
           // We need to check the underlying objects of all arguments, not just
           // the pointer arguments, because we might be passing pointers as
           // integers, etc.
@@ -1306,16 +1320,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
 
 // Check whether this Value is used by a lifetime intrinsic.
 static bool isUsedByLifetimeMarker(Value *V) {
-  for (User *U : V->users()) {
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      switch (II->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::lifetime_start:
-      case Intrinsic::lifetime_end:
+  for (User *U : V->users())
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
+      if (II->isLifetimeStartOrEnd())
         return true;
-      }
-    }
-  }
   return false;
 }
 
@@ -1491,9 +1499,10 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
 /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
-bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
-                          AAResults *CalleeAAR, bool InsertLifetime,
-                          Function *ForwardVarArgsTo) {
+llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+                                        AAResults *CalleeAAR,
+                                        bool InsertLifetime,
+                                        Function *ForwardVarArgsTo) {
   Instruction *TheCall = CS.getInstruction();
   assert(TheCall->getParent() && TheCall->getFunction()
          && "Instruction not in function!");
@@ -1504,7 +1513,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   Function *CalledFunc = CS.getCalledFunction();
   if (!CalledFunc ||               // Can't inline external function or indirect
       CalledFunc->isDeclaration()) // call!
-    return false;
+    return "external or indirect";
 
   // The inliner does not know how to inline through calls with operand bundles
   // in general ...
@@ -1518,7 +1527,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       if (Tag == LLVMContext::OB_funclet)
         continue;
 
-      return false;
+      return "unsupported operand bundle";
     }
   }
 
@@ -1537,7 +1546,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     if (!Caller->hasGC())
       Caller->setGC(CalledFunc->getGC());
     else if (CalledFunc->getGC() != Caller->getGC())
-      return false;
+      return "incompatible GC";
   }
 
   // Get the personality function from the callee if it contains a landing pad.
@@ -1561,7 +1570,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // TODO: This isn't 100% true. Some personality functions are proper
     //       supersets of others and can be used in place of the other.
     else if (CalledPersonality != CallerPersonality)
-      return false;
+      return "incompatible personality";
   }
 
   // We need to figure out which funclet the callsite was in so that we may
@@ -1586,7 +1595,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
             // for catchpads.
             for (const BasicBlock &CalledBB : *CalledFunc) {
               if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
-                return false;
+                return "catch in cleanup funclet";
             }
           }
         } else if (isAsynchronousEHPersonality(Personality)) {
@@ -1594,7 +1603,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
           // funclet in the callee.
           for (const BasicBlock &CalledBB : *CalledFunc) {
             if (CalledBB.isEHPad())
-              return false;
+              return "SEH in cleanup funclet";
           }
         }
       }
@@ -2244,7 +2253,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // Change the branch that used to go to AfterCallBB to branch to the first
   // basic block of the inlined function.
   //
-  TerminatorInst *Br = OrigBB->getTerminator();
+  Instruction *Br = OrigBB->getTerminator();
   assert(Br && Br->getOpcode() == Instruction::Br &&
          "splitBasicBlock broken!");
   Br->setOperand(0, &*FirstNewBlock);
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 9832a6f24e1f..e1592c867636 100644
--- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -487,7 +487,7 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
   MDNode *BranchWeights =
       MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
 
-  TerminatorInst *NewInst =
+  Instruction *NewInst =
       SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
   BasicBlock *CallBB = NewInst->getParent();
   CallBB->setName("cdce.call");
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index ae3cb077a3af..499e611acb57 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -31,8 +31,10 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
@@ -47,6 +49,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -102,8 +105,8 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
 /// DeleteDeadConditions is true.
 bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
                                   const TargetLibraryInfo *TLI,
-                                  DeferredDominance *DDT) {
-  TerminatorInst *T = BB->getTerminator();
+                                  DomTreeUpdater *DTU) {
+  Instruction *T = BB->getTerminator();
   IRBuilder<> Builder(T);
 
   // Branch - See if we are conditional jumping on constant
@@ -125,8 +128,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       // Replace the conditional branch with an unconditional one.
       Builder.CreateBr(Destination);
       BI->eraseFromParent();
-      if (DDT)
-        DDT->deleteEdge(BB, OldDest);
+      if (DTU)
+        DTU->deleteEdgeRelaxed(BB, OldDest);
       return true;
     }
 
@@ -201,8 +204,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
         DefaultDest->removePredecessor(ParentBB);
         i = SI->removeCase(i);
         e = SI->case_end();
-        if (DDT)
-          DDT->deleteEdge(ParentBB, DefaultDest);
+        if (DTU)
+          DTU->deleteEdgeRelaxed(ParentBB, DefaultDest);
         continue;
       }
 
@@ -229,17 +232,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       Builder.CreateBr(TheOnlyDest);
       BasicBlock *BB = SI->getParent();
       std::vector <DominatorTree::UpdateType> Updates;
-      if (DDT)
+      if (DTU)
         Updates.reserve(SI->getNumSuccessors() - 1);
 
       // Remove entries from PHI nodes which we no longer branch to...
-      for (BasicBlock *Succ : SI->successors()) {
+      for (BasicBlock *Succ : successors(SI)) {
         // Found case matching a constant operand?
         if (Succ == TheOnlyDest) {
           TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
         } else {
           Succ->removePredecessor(BB);
-          if (DDT)
+          if (DTU)
             Updates.push_back({DominatorTree::Delete, BB, Succ});
         }
       }
@@ -249,8 +252,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       SI->eraseFromParent();
       if (DeleteDeadConditions)
         RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
-      if (DDT)
-        DDT->applyUpdates(Updates);
+      if (DTU)
+        DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
       return true;
     }
 
@@ -297,7 +300,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
           dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
       BasicBlock *TheOnlyDest = BA->getBasicBlock();
       std::vector <DominatorTree::UpdateType> Updates;
-      if (DDT)
+      if (DTU)
         Updates.reserve(IBI->getNumDestinations() - 1);
 
       // Insert the new branch.
@@ -310,7 +313,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
           BasicBlock *ParentBB = IBI->getParent();
           BasicBlock *DestBB = IBI->getDestination(i);
           DestBB->removePredecessor(ParentBB);
-          if (DDT)
+          if (DTU)
             Updates.push_back({DominatorTree::Delete, ParentBB, DestBB});
         }
       }
@@ -327,8 +330,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
         new UnreachableInst(BB->getContext(), BB);
       }
 
-      if (DDT)
-        DDT->applyUpdates(Updates);
+      if (DTU)
+        DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
       return true;
     }
   }
@@ -352,7 +355,7 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
 
 bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
                                            const TargetLibraryInfo *TLI) {
-  if (isa<TerminatorInst>(I))
+  if (I->isTerminator())
     return false;
 
   // We don't want the landingpad-like instructions removed by anything this
@@ -390,8 +393,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
       return true;
 
     // Lifetime intrinsics are dead when their right-hand is undef.
-    if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-        II->getIntrinsicID() == Intrinsic::lifetime_end)
+    if (II->isLifetimeStartOrEnd())
       return isa<UndefValue>(II->getArgOperand(1));
 
     // Assumptions are dead if their condition is trivially true.  Guards on
@@ -425,22 +427,22 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
 /// trivially dead instruction, delete it.  If that makes any of its operands
 /// trivially dead, delete them too, recursively.  Return true if any
 /// instructions were deleted.
-bool
-llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
-                                                 const TargetLibraryInfo *TLI) {
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
+    Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
     return false;
 
   SmallVector<Instruction*, 16> DeadInsts;
   DeadInsts.push_back(I);
-  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI);
+  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
 
   return true;
 }
 
 void llvm::RecursivelyDeleteTriviallyDeadInstructions(
-    SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI) {
+    SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI,
+    MemorySSAUpdater *MSSAU) {
   // Process the dead instruction list until empty.
   while (!DeadInsts.empty()) {
     Instruction &I = *DeadInsts.pop_back_val();
@@ -467,11 +469,24 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
         if (isInstructionTriviallyDead(OpI, TLI))
           DeadInsts.push_back(OpI);
     }
+    if (MSSAU)
+      MSSAU->removeMemoryAccess(&I);
 
     I.eraseFromParent();
   }
 }
 
+bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
+  SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+  findDbgUsers(DbgUsers, I);
+  for (auto *DII : DbgUsers) {
+    Value *Undef = UndefValue::get(I->getType());
+    DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+                                            ValueAsMetadata::get(Undef)));
+  }
+  return !DbgUsers.empty();
+}
+
 /// areAllUsesEqual - Check whether the uses of a value are all the same.
 /// This is similar to Instruction::hasOneUse() except this will also return
 /// true when there are no uses or multiple uses that all refer to the same
@@ -626,7 +641,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
 /// .. and delete the predecessor corresponding to the '1', this will attempt to
 /// recursively fold the and to 0.
 void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
-                                        DeferredDominance *DDT) {
+                                        DomTreeUpdater *DTU) {
   // This only adjusts blocks with PHI nodes.
   if (!isa<PHINode>(BB->begin()))
     return;
@@ -649,17 +664,16 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
     // of the block.
     if (PhiIt != OldPhiIt) PhiIt = &BB->front();
   }
-  if (DDT)
-    DDT->deleteEdge(Pred, BB);
+  if (DTU)
+    DTU->deleteEdgeRelaxed(Pred, BB);
 }
 
 /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
-/// predecessor is known to have one successor (DestBB!).  Eliminate the edge
+/// predecessor is known to have one successor (DestBB!). Eliminate the edge
 /// between them, moving the instructions in the predecessor into DestBB and
 /// deleting the predecessor block.
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
-                                       DeferredDominance *DDT) {
-  assert(!(DT && DDT) && "Cannot call with both DT and DDT.");
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
+                                       DomTreeUpdater *DTU) {
 
   // If BB has single-entry PHI nodes, fold them.
   while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
@@ -677,11 +691,11 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
   if (PredBB == &DestBB->getParent()->getEntryBlock())
     ReplaceEntryBB = true;
 
-  // Deferred DT update: Collect all the edges that enter PredBB. These
-  // dominator edges will be redirected to DestBB.
-  std::vector <DominatorTree::UpdateType> Updates;
-  if (DDT && !ReplaceEntryBB) {
-    Updates.reserve(1 + (2 * pred_size(PredBB)));
+  // DTU updates: Collect all the edges that enter
+  // PredBB. These dominator edges will be redirected to DestBB.
+  SmallVector<DominatorTree::UpdateType, 32> Updates;
+
+  if (DTU) {
     Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
     for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
       Updates.push_back({DominatorTree::Delete, *I, PredBB});
@@ -708,33 +722,32 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT,
   // Splice all the instructions from PredBB to DestBB.
   PredBB->getTerminator()->eraseFromParent();
   DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+  new UnreachableInst(PredBB->getContext(), PredBB);
 
   // If the PredBB is the entry block of the function, move DestBB up to
   // become the entry block after we erase PredBB.
   if (ReplaceEntryBB)
     DestBB->moveAfter(PredBB);
 
-  if (DT) {
-    // For some irreducible CFG we end up having forward-unreachable blocks
-    // so check if getNode returns a valid node before updating the domtree.
-    if (DomTreeNode *DTN = DT->getNode(PredBB)) {
-      BasicBlock *PredBBIDom = DTN->getIDom()->getBlock();
-      DT->changeImmediateDominator(DestBB, PredBBIDom);
-      DT->eraseNode(PredBB);
+  if (DTU) {
+    assert(PredBB->getInstList().size() == 1 &&
+           isa<UnreachableInst>(PredBB->getTerminator()) &&
+           "The successor list of PredBB isn't empty before "
+           "applying corresponding DTU updates.");
+    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->deleteBB(PredBB);
+    // Recalculation of DomTree is needed when updating a forward DomTree and
+    // the Entry BB is replaced.
+    if (ReplaceEntryBB && DTU->hasDomTree()) {
+      // The entry block was removed and there is no external interface for
+      // the dominator tree to be notified of this change. In this corner-case
+      // we recalculate the entire tree.
+      DTU->recalculate(*(DestBB->getParent()));
     }
   }
 
-  if (DDT) {
-    DDT->deleteBB(PredBB); // Deferred deletion of BB.
-    if (ReplaceEntryBB)
-      // The entry block was removed and there is no external interface for the
-      // dominator tree to be notified of this change. In this corner-case we
-      // recalculate the entire tree.
-      DDT->recalculate(*(DestBB->getParent()));
-    else
-      DDT->applyUpdates(Updates);
-  } else {
-    PredBB->eraseFromParent(); // Nuke BB.
+  else {
+    PredBB->eraseFromParent(); // Nuke BB if DTU is nullptr.
   }
 }
 
@@ -945,7 +958,7 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,
 /// eliminate BB by rewriting all the predecessors to branch to the successor
 /// block and return true.  If we can't transform, return false.
 bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
-                                                   DeferredDominance *DDT) {
+                                                   DomTreeUpdater *DTU) {
   assert(BB != &BB->getParent()->getEntryBlock() &&
          "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
 
@@ -986,9 +999,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
 
   LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
 
-  std::vector<DominatorTree::UpdateType> Updates;
-  if (DDT) {
-    Updates.reserve(1 + (2 * pred_size(BB)));
+  SmallVector<DominatorTree::UpdateType, 32> Updates;
+  if (DTU) {
     Updates.push_back({DominatorTree::Delete, BB, Succ});
     // All predecessors of BB will be moved to Succ.
     for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
@@ -1044,9 +1056,16 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
   BB->replaceAllUsesWith(Succ);
   if (!Succ->hasName()) Succ->takeName(BB);
 
-  if (DDT) {
-    DDT->deleteBB(BB); // Deferred deletion of the old basic block.
-    DDT->applyUpdates(Updates);
+  // Clear the successor list of BB to match updates applying to DTU later.
+  if (BB->getTerminator())
+    BB->getInstList().pop_back();
+  new UnreachableInst(BB->getContext(), BB);
+  assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+                           "applying corresponding DTU updates.");
+
+  if (DTU) {
+    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->deleteBB(BB);
   } else {
     BB->eraseFromParent(); // Delete the old basic block.
   }
@@ -1237,7 +1256,7 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
 /// alloc size of the value when doing the comparison. E.g. an i1 value will be
 /// identified as covering an n-bit fragment, if the store size of i1 is at
 /// least n bits.
-static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
+static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
   const DataLayout &DL = DII->getModule()->getDataLayout();
   uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy);
   if (auto FragmentSize = DII->getFragmentSizeInBits())
@@ -1255,7 +1274,7 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) {
 
 /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
                                            StoreInst *SI, DIBuilder &Builder) {
   assert(DII->isAddressOfVariable());
   auto *DIVar = DII->getVariable();
@@ -1278,33 +1297,6 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
     return;
   }
 
-  // If an argument is zero extended then use argument directly. The ZExt
-  // may be zapped by an optimization pass in future.
-  Argument *ExtendedArg = nullptr;
-  if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
-    ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
-  if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
-    ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
-  if (ExtendedArg) {
-    // If this DII was already describing only a fragment of a variable, ensure
-    // that fragment is appropriately narrowed here.
-    // But if a fragment wasn't used, describe the value as the original
-    // argument (rather than the zext or sext) so that it remains described even
-    // if the sext/zext is optimized away. This widens the variable description,
-    // leaving it up to the consumer to know how the smaller value may be
-    // represented in a larger register.
-    if (auto Fragment = DIExpr->getFragmentInfo()) {
-      unsigned FragmentOffset = Fragment->OffsetInBits;
-      SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(),
-                                   DIExpr->elements_end() - 3);
-      Ops.push_back(dwarf::DW_OP_LLVM_fragment);
-      Ops.push_back(FragmentOffset);
-      const DataLayout &DL = DII->getModule()->getDataLayout();
-      Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
-      DIExpr = Builder.createExpression(Ops);
-    }
-    DV = ExtendedArg;
-  }
   if (!LdStHasDebugValue(DIVar, DIExpr, SI))
     Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
                                     SI);
@@ -1312,7 +1304,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
 
 /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
                                            LoadInst *LI, DIBuilder &Builder) {
   auto *DIVar = DII->getVariable();
   auto *DIExpr = DII->getExpression();
@@ -1341,7 +1333,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
 
 /// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
 /// llvm.dbg.declare or llvm.dbg.addr intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
+void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
                                            PHINode *APN, DIBuilder &Builder) {
   auto *DIVar = DII->getVariable();
   auto *DIExpr = DII->getExpression();
@@ -1443,7 +1435,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
   // Map existing PHI nodes to their dbg.values.
   ValueToValueMapTy DbgValueMap;
   for (auto &I : *BB) {
-    if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+    if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) {
       if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
         DbgValueMap.insert({Loc, DbgII});
     }
@@ -1464,7 +1456,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
     for (auto VI : PHI->operand_values()) {
       auto V = DbgValueMap.find(VI);
       if (V != DbgValueMap.end()) {
-        auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
+        auto *DbgII = cast<DbgVariableIntrinsic>(V->second);
         Instruction *NewDbgII = DbgII->clone();
         NewDbgII->setOperand(0, PhiMAV);
         auto InsertionPt = Parent->getFirstInsertionPt();
@@ -1478,7 +1470,7 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
 /// Finds all intrinsics declaring local variables as living in the memory that
 /// 'V' points to. This may include a mix of dbg.declare and
 /// dbg.addr intrinsics.
-TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
   // This function is hot. Check whether the value has any metadata to avoid a
   // DenseMap lookup.
   if (!V->isUsedByMetadata())
@@ -1490,9 +1482,9 @@ TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
   if (!MDV)
     return {};
 
-  TinyPtrVector<DbgInfoIntrinsic *> Declares;
+  TinyPtrVector<DbgVariableIntrinsic *> Declares;
   for (User *U : MDV->users()) {
-    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(U))
+    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U))
       if (DII->isAddressOfVariable())
         Declares.push_back(DII);
   }
@@ -1512,7 +1504,7 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
           DbgValues.push_back(DVI);
 }
 
-void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
+void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
                         Value *V) {
   // This function is hot. Check whether the value has any metadata to avoid a
   // DenseMap lookup.
@@ -1521,7 +1513,7 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
   if (auto *L = LocalAsMetadata::getIfExists(V))
     if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
       for (User *U : MDV->users())
-        if (DbgInfoIntrinsic *DII = dyn_cast<DbgInfoIntrinsic>(U))
+        if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
           DbgUsers.push_back(DII);
 }
 
@@ -1529,7 +1521,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
                              Instruction *InsertBefore, DIBuilder &Builder,
                              bool DerefBefore, int Offset, bool DerefAfter) {
   auto DbgAddrs = FindDbgAddrUses(Address);
-  for (DbgInfoIntrinsic *DII : DbgAddrs) {
+  for (DbgVariableIntrinsic *DII : DbgAddrs) {
     DebugLoc Loc = DII->getDebugLoc();
     auto *DIVar = DII->getVariable();
     auto *DIExpr = DII->getExpression();
@@ -1597,7 +1589,7 @@ static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
 }
 
 bool llvm::salvageDebugInfo(Instruction &I) {
-  SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+  SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
   findDbgUsers(DbgUsers, &I);
   if (DbgUsers.empty())
     return false;
@@ -1607,7 +1599,7 @@ bool llvm::salvageDebugInfo(Instruction &I) {
   auto &Ctx = I.getContext();
   auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
 
-  auto doSalvage = [&](DbgInfoIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
+  auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
     auto *DIExpr = DII->getExpression();
     if (!Ops.empty()) {
       // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
@@ -1621,13 +1613,13 @@ bool llvm::salvageDebugInfo(Instruction &I) {
     LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
   };
 
-  auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) {
+  auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) {
     SmallVector<uint64_t, 8> Ops;
     DIExpression::appendOffset(Ops, Offset);
     doSalvage(DII, Ops);
   };
 
-  auto applyOps = [&](DbgInfoIntrinsic *DII,
+  auto applyOps = [&](DbgVariableIntrinsic *DII,
                       std::initializer_list<uint64_t> Opcodes) {
     SmallVector<uint64_t, 8> Ops(Opcodes);
     doSalvage(DII, Ops);
@@ -1726,16 +1718,16 @@ using DbgValReplacement = Optional<DIExpression *>;
 /// changes are made.
 static bool rewriteDebugUsers(
     Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
-    function_ref<DbgValReplacement(DbgInfoIntrinsic &DII)> RewriteExpr) {
+    function_ref<DbgValReplacement(DbgVariableIntrinsic &DII)> RewriteExpr) {
   // Find debug users of From.
-  SmallVector<DbgInfoIntrinsic *, 1> Users;
+  SmallVector<DbgVariableIntrinsic *, 1> Users;
   findDbgUsers(Users, &From);
   if (Users.empty())
     return false;
 
   // Prevent use-before-def of To.
   bool Changed = false;
-  SmallPtrSet<DbgInfoIntrinsic *, 1> DeleteOrSalvage;
+  SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage;
   if (isa<Instruction>(&To)) {
     bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
 
@@ -1824,7 +1816,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
   Type *FromTy = From.getType();
   Type *ToTy = To.getType();
 
-  auto Identity = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+  auto Identity = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
     return DII.getExpression();
   };
 
@@ -1848,7 +1840,7 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
 
     // The width of the result has shrunk. Use sign/zero extension to describe
     // the source variable's high bits.
-    auto SignOrZeroExt = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement {
+    auto SignOrZeroExt = [&](DbgVariableIntrinsic &DII) -> DbgValReplacement {
       DILocalVariable *Var = DII.getVariable();
 
       // Without knowing signedness, sign/zero extension isn't possible.
@@ -1902,17 +1894,17 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
 }
 
 unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
-                                   bool PreserveLCSSA, DeferredDominance *DDT) {
+                                   bool PreserveLCSSA, DomTreeUpdater *DTU) {
   BasicBlock *BB = I->getParent();
   std::vector <DominatorTree::UpdateType> Updates;
 
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
-  if (DDT)
+  if (DTU)
     Updates.reserve(BB->getTerminator()->getNumSuccessors());
   for (BasicBlock *Successor : successors(BB)) {
     Successor->removePredecessor(BB, PreserveLCSSA);
-    if (DDT)
+    if (DTU)
       Updates.push_back({DominatorTree::Delete, BB, Successor});
   }
   // Insert a call to llvm.trap right before this.  This turns the undefined
@@ -1923,7 +1915,8 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
     CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
     CallTrap->setDebugLoc(I->getDebugLoc());
   }
-  new UnreachableInst(I->getContext(), I);
+  auto *UI = new UnreachableInst(I->getContext(), I);
+  UI->setDebugLoc(I->getDebugLoc());
 
   // All instructions after this are dead.
   unsigned NumInstrsRemoved = 0;
@@ -1934,13 +1927,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
     BB->getInstList().erase(BBI++);
     ++NumInstrsRemoved;
   }
-  if (DDT)
-    DDT->applyUpdates(Updates);
+  if (DTU)
+    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
   return NumInstrsRemoved;
 }
 
 /// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
+static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
   SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
   SmallVector<OperandBundleDef, 1> OpBundles;
   II->getOperandBundlesAsDefs(OpBundles);
@@ -1950,6 +1943,7 @@ static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
   NewCall->setDebugLoc(II->getDebugLoc());
+  NewCall->copyMetadata(*II);
   II->replaceAllUsesWith(NewCall);
 
   // Follow the call by a branch to the normal destination.
@@ -1961,8 +1955,8 @@ static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {
   BasicBlock *UnwindDestBB = II->getUnwindDest();
   UnwindDestBB->removePredecessor(BB);
   II->eraseFromParent();
-  if (DDT)
-    DDT->deleteEdge(BB, UnwindDestBB);
+  if (DTU)
+    DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
 }
 
 BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -2003,8 +1997,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
 }
 
 static bool markAliveBlocks(Function &F,
-                            SmallPtrSetImpl<BasicBlock*> &Reachable,
-                            DeferredDominance *DDT = nullptr) {
+                            SmallPtrSetImpl<BasicBlock *> &Reachable,
+                            DomTreeUpdater *DTU = nullptr) {
   SmallVector<BasicBlock*, 128> Worklist;
   BasicBlock *BB = &F.front();
   Worklist.push_back(BB);
@@ -2029,7 +2023,7 @@ static bool markAliveBlocks(Function &F,
           if (IntrinsicID == Intrinsic::assume) {
             if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
               // Don't insert a call to llvm.trap right before the unreachable.
-              changeToUnreachable(CI, false, false, DDT);
+              changeToUnreachable(CI, false, false, DTU);
               Changed = true;
               break;
             }
@@ -2046,7 +2040,7 @@ static bool markAliveBlocks(Function &F,
             if (match(CI->getArgOperand(0), m_Zero()))
               if (!isa<UnreachableInst>(CI->getNextNode())) {
                 changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false,
-                                    false, DDT);
+                                    false, DTU);
                 Changed = true;
                 break;
               }
@@ -2054,7 +2048,7 @@ static bool markAliveBlocks(Function &F,
         } else if ((isa<ConstantPointerNull>(Callee) &&
                     !NullPointerIsDefined(CI->getFunction())) ||
                    isa<UndefValue>(Callee)) {
-          changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT);
+          changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU);
           Changed = true;
           break;
         }
@@ -2064,7 +2058,7 @@ static bool markAliveBlocks(Function &F,
           // though.
           if (!isa<UnreachableInst>(CI->getNextNode())) {
             // Don't insert a call to llvm.trap right before the unreachable.
-            changeToUnreachable(CI->getNextNode(), false, false, DDT);
+            changeToUnreachable(CI->getNextNode(), false, false, DTU);
             Changed = true;
           }
           break;
@@ -2083,21 +2077,21 @@ static bool markAliveBlocks(Function &F,
             (isa<ConstantPointerNull>(Ptr) &&
              !NullPointerIsDefined(SI->getFunction(),
                                    SI->getPointerAddressSpace()))) {
-          changeToUnreachable(SI, true, false, DDT);
+          changeToUnreachable(SI, true, false, DTU);
           Changed = true;
           break;
         }
       }
     }
 
-    TerminatorInst *Terminator = BB->getTerminator();
+    Instruction *Terminator = BB->getTerminator();
     if (auto *II = dyn_cast<InvokeInst>(Terminator)) {
       // Turn invokes that call 'nounwind' functions into ordinary calls.
       Value *Callee = II->getCalledValue();
       if ((isa<ConstantPointerNull>(Callee) &&
            !NullPointerIsDefined(BB->getParent())) ||
           isa<UndefValue>(Callee)) {
-        changeToUnreachable(II, true, false, DDT);
+        changeToUnreachable(II, true, false, DTU);
         Changed = true;
       } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
         if (II->use_empty() && II->onlyReadsMemory()) {
@@ -2107,10 +2101,10 @@ static bool markAliveBlocks(Function &F,
           BranchInst::Create(NormalDestBB, II);
           UnwindDestBB->removePredecessor(II->getParent());
           II->eraseFromParent();
-          if (DDT)
-            DDT->deleteEdge(BB, UnwindDestBB);
+          if (DTU)
+            DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
         } else
-          changeToCall(II, DDT);
+          changeToCall(II, DTU);
         Changed = true;
       }
     } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
@@ -2156,7 +2150,7 @@ static bool markAliveBlocks(Function &F,
       }
     }
 
-    Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT);
+    Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
     for (BasicBlock *Successor : successors(BB))
       if (Reachable.insert(Successor).second)
         Worklist.push_back(Successor);
@@ -2164,15 +2158,15 @@ static bool markAliveBlocks(Function &F,
   return Changed;
 }
 
-void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
-  TerminatorInst *TI = BB->getTerminator();
+void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
+  Instruction *TI = BB->getTerminator();
 
   if (auto *II = dyn_cast<InvokeInst>(TI)) {
-    changeToCall(II, DDT);
+    changeToCall(II, DTU);
     return;
   }
 
-  TerminatorInst *NewTI;
+  Instruction *NewTI;
   BasicBlock *UnwindDest;
 
   if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
@@ -2196,8 +2190,8 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
   UnwindDest->removePredecessor(BB);
   TI->replaceAllUsesWith(NewTI);
   TI->eraseFromParent();
-  if (DDT)
-    DDT->deleteEdge(BB, UnwindDest);
+  if (DTU)
+    DTU->deleteEdgeRelaxed(BB, UnwindDest);
 }
 
 /// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2205,9 +2199,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {
 /// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
 /// after modifying the CFG.
 bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
-                                   DeferredDominance *DDT) {
+                                   DomTreeUpdater *DTU,
+                                   MemorySSAUpdater *MSSAU) {
   SmallPtrSet<BasicBlock*, 16> Reachable;
-  bool Changed = markAliveBlocks(F, Reachable, DDT);
+  bool Changed = markAliveBlocks(F, Reachable, DTU);
 
   // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
@@ -2216,45 +2211,68 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
   assert(Reachable.size() < F.size());
   NumRemoved += F.size()-Reachable.size();
 
-  // Loop over all of the basic blocks that are not reachable, dropping all of
-  // their internal references. Update DDT and LVI if available.
-  std::vector <DominatorTree::UpdateType> Updates;
+  SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
   for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
     auto *BB = &*I;
     if (Reachable.count(BB))
       continue;
+    DeadBlockSet.insert(BB);
+  }
+
+  if (MSSAU)
+    MSSAU->removeBlocks(DeadBlockSet);
+
+  // Loop over all of the basic blocks that are not reachable, dropping all of
+  // their internal references. Update DTU and LVI if available.
+  std::vector<DominatorTree::UpdateType> Updates;
+  for (auto *BB : DeadBlockSet) {
     for (BasicBlock *Successor : successors(BB)) {
-      if (Reachable.count(Successor))
+      if (!DeadBlockSet.count(Successor))
         Successor->removePredecessor(BB);
-      if (DDT)
+      if (DTU)
         Updates.push_back({DominatorTree::Delete, BB, Successor});
     }
     if (LVI)
       LVI->eraseBlock(BB);
     BB->dropAllReferences();
   }
-
   for (Function::iterator I = ++F.begin(); I != F.end();) {
     auto *BB = &*I;
     if (Reachable.count(BB)) {
       ++I;
       continue;
     }
-    if (DDT) {
-      DDT->deleteBB(BB); // deferred deletion of BB.
+    if (DTU) {
+      // Remove the terminator of BB to clear the successor list of BB.
+      if (BB->getTerminator())
+        BB->getInstList().pop_back();
+      new UnreachableInst(BB->getContext(), BB);
+      assert(succ_empty(BB) && "The successor list of BB isn't empty before "
+                               "applying corresponding DTU updates.");
       ++I;
     } else {
       I = F.getBasicBlockList().erase(I);
     }
   }
 
-  if (DDT)
-    DDT->applyUpdates(Updates);
+  if (DTU) {
+    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    bool Deleted = false;
+    for (auto *BB : DeadBlockSet) {
+      if (DTU->isBBPendingDeletion(BB))
+        --NumRemoved;
+      else
+        Deleted = true;
+      DTU->deleteBB(BB);
+    }
+    if (!Deleted)
+      return false;
+  }
   return true;
 }
 
 void llvm::combineMetadata(Instruction *K, const Instruction *J,
-                           ArrayRef<unsigned> KnownIDs) {
+                           ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   K->dropUnknownNonDebugMetadata(KnownIDs);
   K->getAllMetadataOtherThanDebugLoc(Metadata);
@@ -2279,8 +2297,20 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
       case LLVMContext::MD_mem_parallel_loop_access:
         K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
         break;
+      case LLVMContext::MD_access_group:
+        K->setMetadata(LLVMContext::MD_access_group,
+                       intersectAccessGroups(K, J));
+        break;
       case LLVMContext::MD_range:
-        K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
+
+        // If K does move, use most generic range. Otherwise keep the range of
+        // K.
+        if (DoesKMove)
+          // FIXME: If K does move, we should drop the range info and nonnull.
+          //        Currently this function is used with DoesKMove in passes
+          //        doing hoisting/sinking and the current behavior of using the
+          //        most generic range is correct in those cases.
+          K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
         break;
       case LLVMContext::MD_fpmath:
         K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
@@ -2290,8 +2320,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
         K->setMetadata(Kind, JMD);
         break;
       case LLVMContext::MD_nonnull:
-        // Only set the !nonnull if it is present in both instructions.
-        K->setMetadata(Kind, JMD);
+        // If K does move, keep nonull if it is present in both instructions.
+        if (DoesKMove)
+          K->setMetadata(Kind, JMD);
         break;
       case LLVMContext::MD_invariant_group:
         // Preserve !invariant.group in K.
@@ -2318,15 +2349,49 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
       K->setMetadata(LLVMContext::MD_invariant_group, JMD);
 }
 
-void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
+                                 bool KDominatesJ) {
   unsigned KnownIDs[] = {
       LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
       LLVMContext::MD_noalias,         LLVMContext::MD_range,
       LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
       LLVMContext::MD_invariant_group, LLVMContext::MD_align,
       LLVMContext::MD_dereferenceable,
-      LLVMContext::MD_dereferenceable_or_null};
-  combineMetadata(K, J, KnownIDs);
+      LLVMContext::MD_dereferenceable_or_null,
+      LLVMContext::MD_access_group};
+  combineMetadata(K, J, KnownIDs, KDominatesJ);
+}
+
+void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
+  auto *ReplInst = dyn_cast<Instruction>(Repl);
+  if (!ReplInst)
+    return;
+
+  // Patch the replacement so that it is not more restrictive than the value
+  // being replaced.
+  // Note that if 'I' is a load being replaced by some operation,
+  // for example, by an arithmetic operation, then andIRFlags()
+  // would just erase all math flags from the original arithmetic
+  // operation, which is clearly not wanted and not needed.
+  if (!isa<LoadInst>(I))
+    ReplInst->andIRFlags(I);
+
+  // FIXME: If both the original and replacement value are part of the
+  // same control-flow region (meaning that the execution of one
+  // guarantees the execution of the other), then we can combine the
+  // noalias scopes here and do better than the general conservative
+  // answer used in combineMetadata().
+
+  // In general, GVN unifies expressions over different control-flow
+  // regions, and so we need a conservative combination of the noalias
+  // scopes.
+  static const unsigned KnownIDs[] = {
+      LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
+      LLVMContext::MD_noalias,         LLVMContext::MD_range,
+      LLVMContext::MD_fpmath,          LLVMContext::MD_invariant_load,
+      LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
+      LLVMContext::MD_access_group};
+  combineMetadata(ReplInst, I, KnownIDs, false);
 }
 
 template <typename RootType, typename DominatesFn>
@@ -2454,6 +2519,54 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
   }
 }
 
+void llvm::dropDebugUsers(Instruction &I) {
+  SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+  findDbgUsers(DbgUsers, &I);
+  for (auto *DII : DbgUsers)
+    DII->eraseFromParent();
+}
+
+void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
+                                    BasicBlock *BB) {
+  // Since we are moving the instructions out of its basic block, we do not
+  // retain their original debug locations (DILocations) and debug intrinsic
+  // instructions (dbg.values).
+  //
+  // Doing so would degrade the debugging experience and adversely affect the
+  // accuracy of profiling information.
+  //
+  // Currently, when hoisting the instructions, we take the following actions:
+  // - Remove their dbg.values.
+  // - Set their debug locations to the values from the insertion point.
+  //
+  // As per PR39141 (comment #8), the more fundamental reason why the dbg.values
+  // need to be deleted, is because there will not be any instructions with a
+  // DILocation in either branch left after performing the transformation. We
+  // can only insert a dbg.value after the two branches are joined again.
+  //
+  // See PR38762, PR39243 for more details.
+  //
+  // TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
+  // encode predicated DIExpressions that yield different results on different
+  // code paths.
+  for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
+    Instruction *I = &*II;
+    I->dropUnknownNonDebugMetadata();
+    if (I->isUsedByMetadata())
+      dropDebugUsers(*I);
+    if (isa<DbgVariableIntrinsic>(I)) {
+      // Remove DbgInfo Intrinsics.
+      II = I->eraseFromParent();
+      continue;
+    }
+    I->setDebugLoc(InsertPt->getDebugLoc());
+    ++II;
+  }
+  DomBlock->getInstList().splice(InsertPt->getIterator(), BB->getInstList(),
+                                 BB->begin(),
+                                 BB->getTerminator()->getIterator());
+}
+
 namespace {
 
 /// A potential constituent of a bitreverse or bswap expression. See
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 6e92e679f999..41f14a834617 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -20,13 +20,15 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -35,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -53,6 +56,7 @@ class LoopRotate {
   AssumptionCache *AC;
   DominatorTree *DT;
   ScalarEvolution *SE;
+  MemorySSAUpdater *MSSAU;
   const SimplifyQuery &SQ;
   bool RotationOnly;
   bool IsUtilMode;
@@ -60,10 +64,11 @@ class LoopRotate {
 public:
   LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
              const TargetTransformInfo *TTI, AssumptionCache *AC,
-             DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ,
-             bool RotationOnly, bool IsUtilMode)
+             DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+             const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
       : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
-        SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {}
+        MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
+        IsUtilMode(IsUtilMode) {}
   bool processLoop(Loop *L);
 
 private:
@@ -268,6 +273,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     SE->forgetTopmostLoop(L);
 
   LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   // Find new Loop header. NewHeader is a Header's one and only successor
   // that is inside loop.  Header's other successor is outside the
@@ -298,18 +305,18 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
 
   // For the rest of the instructions, either hoist to the OrigPreheader if
   // possible or create a clone in the OldPreHeader if not.
-  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+  Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
 
   // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
   using DbgIntrinsicHash =
       std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
-  auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash {
+  auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
     return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
   };
   SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
   for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
        I != E; ++I) {
-    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I))
+    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
       DbgIntrinsics.insert(makeHash(DII));
     else
       break;
@@ -325,7 +332,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // something that might trap, but isn't safe to hoist something that reads
     // memory (without proving that the loop doesn't write).
     if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
-        !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) &&
+        !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
         !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
       Inst->moveBefore(LoopEntryBranch);
       continue;
@@ -339,7 +346,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                      RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
 
     // Avoid inserting the same intrinsic twice.
-    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C))
+    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
       if (DbgIntrinsics.count(makeHash(DII))) {
         C->deleteValue();
         continue;
@@ -374,8 +381,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // Along with all the other instructions, we just cloned OrigHeader's
   // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
   // successors by duplicating their incoming values for OrigHeader.
-  TerminatorInst *TI = OrigHeader->getTerminator();
-  for (BasicBlock *SuccBB : TI->successors())
+  for (BasicBlock *SuccBB : successors(OrigHeader))
     for (BasicBlock::iterator BI = SuccBB->begin();
          PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
       PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
@@ -385,6 +391,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // remove the corresponding incoming values from the PHI nodes in OrigHeader.
   LoopEntryBranch->eraseFromParent();
 
+  // Update MemorySSA before the rewrite call below changes the 1:1
+  // instruction:cloned_instruction_or_value mapping in ValueMap.
+  if (MSSAU) {
+    ValueMap[OrigHeader] = OrigPreheader;
+    MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap);
+  }
 
   SmallVector<PHINode*, 2> InsertedPHIs;
   // If there were any uses of instructions in the duplicated block outside the
@@ -411,6 +423,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
     Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
     DT->applyUpdates(Updates);
+
+    if (MSSAU) {
+      MSSAU->applyUpdates(Updates, *DT);
+      if (VerifyMemorySSA)
+        MSSAU->getMemorySSA()->verifyMemorySSA();
+    }
   }
 
   // At this point, we've finished our major CFG changes.  As part of cloning
@@ -433,7 +451,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // Split the edge to form a real preheader.
     BasicBlock *NewPH = SplitCriticalEdge(
         OrigPreheader, NewHeader,
-        CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+        CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
     NewPH->setName(NewHeader->getName() + ".lr.ph");
 
     // Preserve canonical loop form, which means that 'Exit' should have only
@@ -452,7 +470,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
       SplitLatchEdge |= L->getLoopLatch() == ExitPred;
       BasicBlock *ExitSplit = SplitCriticalEdge(
           ExitPred, Exit,
-          CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
+          CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
       ExitSplit->moveBefore(Exit);
     }
     assert(SplitLatchEdge &&
@@ -467,16 +485,27 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
 
     // With our CFG finalized, update DomTree if it is available.
     if (DT) DT->deleteEdge(OrigPreheader, Exit);
+
+    // Update MSSA too, if available.
+    if (MSSAU)
+      MSSAU->removeEdge(OrigPreheader, Exit);
   }
 
   assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
   assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // Now that the CFG and DomTree are in a consistent state again, try to merge
   // the OrigHeader block into OrigLatch.  This will succeed if they are
   // connected by an unconditional branch.  This is just a cleanup so the
   // emitted code isn't too gross in this common case.
-  MergeBlockIntoPredecessor(OrigHeader, DT, LI);
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+  MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
 
@@ -585,9 +614,14 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
                     << LastExit->getName() << "\n");
 
   // Hoist the instructions from Latch into LastExit.
+  Instruction *FirstLatchInst = &*(Latch->begin());
   LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
                                  Latch->begin(), Jmp->getIterator());
 
+  // Update MemorySSA
+  if (MSSAU)
+    MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
+
   unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
   BasicBlock *Header = Jmp->getSuccessor(0);
   assert(Header == L->getHeader() && "expected a backward branch");
@@ -603,6 +637,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
   if (DT)
     DT->eraseNode(Latch);
   Latch->eraseFromParent();
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   return true;
 }
 
@@ -635,11 +673,16 @@ bool LoopRotate::processLoop(Loop *L) {
 /// The utility to convert a loop into a loop with bottom test.
 bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
                         AssumptionCache *AC, DominatorTree *DT,
-                        ScalarEvolution *SE, const SimplifyQuery &SQ,
-                        bool RotationOnly = true,
+                        ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+                        const SimplifyQuery &SQ, bool RotationOnly = true,
                         unsigned Threshold = unsigned(-1),
                         bool IsUtilMode = true) {
-  LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode);
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+  LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
+                IsUtilMode);
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   return LR.processLoop(L);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 970494eb4704..380f4fca54d9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -137,7 +137,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
   // Split out the loop pre-header.
   BasicBlock *PreheaderBB;
   PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
-                                       LI, PreserveLCSSA);
+                                       LI, nullptr, PreserveLCSSA);
   if (!PreheaderBB)
     return nullptr;
 
@@ -251,7 +251,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
     SE->forgetLoop(L);
 
   BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
-                                             DT, LI, PreserveLCSSA);
+                                             DT, LI, nullptr, PreserveLCSSA);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -435,7 +435,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
   unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
   MDNode *LoopMD = nullptr;
   for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
-    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+    Instruction *TI = BackedgeBlocks[i]->getTerminator();
     if (!LoopMD)
       LoopMD = TI->getMetadata(LoopMDKind);
     TI->setMetadata(LoopMDKind, nullptr);
@@ -488,7 +488,7 @@ ReprocessLoop:
                         << P->getName() << "\n");
 
       // Zap the dead pred's terminator and replace it with unreachable.
-      TerminatorInst *TI = P->getTerminator();
+      Instruction *TI = P->getTerminator();
       changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
       Changed = true;
     }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 04b8c1417e0a..da7ed2bd1652 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -54,10 +54,10 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
 static cl::opt<bool>
 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
                     cl::desc("Verify domtree after unrolling"),
-#ifdef NDEBUG
-    cl::init(false)
-#else
+#ifdef EXPENSIVE_CHECKS
     cl::init(true)
+#else
+    cl::init(false)
 #endif
                     );
 
@@ -275,8 +275,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
   // inserted code, doing constant propagation and dead code elimination as we
   // go.
   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-  const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks();
-  for (BasicBlock *BB : NewLoopBlocks) {
+  for (BasicBlock *BB : L->getBlocks()) {
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
       Instruction *Inst = &*I++;
 
@@ -330,12 +329,15 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
 ///
 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
 /// DominatorTree if they are non-null.
+///
+/// If RemainderLoop is non-null, it will receive the remainder loop (if
+/// required and not fully unrolled).
 LoopUnrollResult llvm::UnrollLoop(
     Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
     bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
     unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
     LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
-    OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
+    OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
 
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
@@ -469,7 +471,7 @@ LoopUnrollResult llvm::UnrollLoop(
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
                                   EpilogProfitability, UnrollRemainder, LI, SE,
-                                  DT, AC, PreserveLCSSA)) {
+                                  DT, AC, PreserveLCSSA, RemainderLoop)) {
     if (Force)
       RuntimeTripCount = false;
     else {
@@ -596,8 +598,15 @@ LoopUnrollResult llvm::UnrollLoop(
     for (BasicBlock *BB : L->getBlocks())
       for (Instruction &I : *BB)
         if (!isa<DbgInfoIntrinsic>(&I))
-          if (const DILocation *DIL = I.getDebugLoc())
-            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+          if (const DILocation *DIL = I.getDebugLoc()) {
+            auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+            if (NewDIL)
+              I.setDebugLoc(NewDIL.getValue());
+            else
+              LLVM_DEBUG(dbgs()
+                         << "Failed to create new discriminator: "
+                         << DIL->getFilename() << " Line: " << DIL->getLine());
+          }
 
   for (unsigned It = 1; It != Count; ++It) {
     std::vector<BasicBlock*> NewBlocks;
@@ -782,7 +791,7 @@ LoopUnrollResult llvm::UnrollLoop(
         // there is no such latch.
         NewIDom = Latches.back();
         for (BasicBlock *IterLatch : Latches) {
-          TerminatorInst *Term = IterLatch->getTerminator();
+          Instruction *Term = IterLatch->getTerminator();
           if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
             NewIDom = IterLatch;
             break;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index b919f73c3817..e26762639c13 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -72,7 +72,7 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
   for (BasicBlock *BB : ForeBlocks) {
     if (BB == SubLoopPreHeader)
       continue;
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
       if (!ForeBlocks.count(TI->getSuccessor(i)))
         return false;
@@ -167,12 +167,14 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
 
   isSafeToUnrollAndJam should be used prior to calling this to make sure the
   unrolling will be valid. Checking profitablility is also advisable.
+
+  If EpilogueLoop is non-null, it receives the epilogue loop (if it was
+  necessary to create one and not fully unrolled).
 */
-LoopUnrollResult
-llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
-                       unsigned TripMultiple, bool UnrollRemainder,
-                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
-                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {
+LoopUnrollResult llvm::UnrollAndJamLoop(
+    Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
+    bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+    AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
 
   // When we enter here we should have already checked that it is safe
   BasicBlock *Header = L->getHeader();
@@ -181,7 +183,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
 
   // Don't enter the unroll code if there is nothing to do.
   if (TripCount == 0 && Count < 2) {
-    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
     return LoopUnrollResult::Unmodified;
   }
 
@@ -196,7 +198,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
   if (TripMultiple == 1 || TripMultiple % Count != 0) {
     if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                     /*UseEpilogRemainder*/ true,
-                                    UnrollRemainder, LI, SE, DT, AC, true)) {
+                                    UnrollRemainder, LI, SE, DT, AC, true,
+                                    EpilogueLoop)) {
       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                            "generated when assuming runtime trip count\n");
       return LoopUnrollResult::Unmodified;
@@ -297,8 +300,15 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
     for (BasicBlock *BB : L->getBlocks())
       for (Instruction &I : *BB)
         if (!isa<DbgInfoIntrinsic>(&I))
-          if (const DILocation *DIL = I.getDebugLoc())
-            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+          if (const DILocation *DIL = I.getDebugLoc()) {
+            auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+            if (NewDIL)
+              I.setDebugLoc(NewDIL.getValue());
+            else
+              LLVM_DEBUG(dbgs()
+                         << "Failed to create new discriminator: "
+                         << DIL->getFilename() << " Line: " << DIL->getLine());
+          }
 
   // Copy all blocks
   for (unsigned It = 1; It != Count; ++It) {
@@ -619,16 +629,28 @@ static bool checkDependencies(SmallVector<Value *, 4> &Earlier,
       if (auto D = DI.depends(Src, Dst, true)) {
         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
 
-        if (D->isConfused())
+        if (D->isConfused()) {
+          LLVM_DEBUG(dbgs() << "  Confused dependency between:\n"
+                            << "  " << *Src << "\n"
+                            << "  " << *Dst << "\n");
           return false;
+        }
         if (!InnerLoop) {
-          if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT)
+          if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) {
+            LLVM_DEBUG(dbgs() << "  > dependency between:\n"
+                              << "  " << *Src << "\n"
+                              << "  " << *Dst << "\n");
             return false;
+          }
         } else {
           assert(LoopDepth + 1 <= D->getLevels());
           if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT &&
-              D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT)
+              D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) {
+            LLVM_DEBUG(dbgs() << "  < > dependency between:\n"
+                              << "  " << *Src << "\n"
+                              << "  " << *Dst << "\n");
             return false;
+          }
         }
       }
     }
@@ -716,38 +738,45 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
   if (SubLoopLatch != SubLoopExit)
     return false;
 
-  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken())
+  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
     return false;
+  }
 
   // Split blocks into Fore/SubLoop/Aft based on dominators
   BasicBlockSet SubLoopBlocks;
   BasicBlockSet ForeBlocks;
   BasicBlockSet AftBlocks;
   if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks,
-                                AftBlocks, &DT))
+                                AftBlocks, &DT)) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
     return false;
+  }
 
   // Aft blocks may need to move instructions to fore blocks, which becomes more
   // difficult if there are multiple (potentially conditionally executed)
   // blocks. For now we just exclude loops with multiple aft blocks.
-  if (AftBlocks.size() != 1)
+  if (AftBlocks.size() != 1) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
+                         "multiple blocks after the loop\n");
     return false;
+  }
 
-  // Check inner loop IV is consistent between all iterations
-  const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch);
-  if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) ||
-      !SubLoopBECountSC->getType()->isIntegerTy())
-    return false;
-  ScalarEvolution::LoopDisposition LD =
-      SE.getLoopDisposition(SubLoopBECountSC, L);
-  if (LD != ScalarEvolution::LoopInvariant)
+  // Check inner loop backedge count is consistent on all iterations of the
+  // outer loop
+  if (!hasIterationCountInvariantInParent(SubLoop, SE)) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
+                         "not consistent on each iteration\n");
     return false;
+  }
 
   // Check the loop safety info for exceptions.
-  LoopSafetyInfo LSI;
-  computeLoopSafetyInfo(&LSI, L);
-  if (LSI.MayThrow)
+  SimpleLoopSafetyInfo LSI;
+  LSI.computeLoopSafetyInfo(L);
+  if (LSI.anyBlockMayThrow()) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
     return false;
+  }
 
   // We've ruled out the easy stuff and now need to check that there are no
   // interdependencies which may prevent us from moving the:
@@ -772,14 +801,19 @@ bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
             }
             // Keep going
             return true;
-          }))
+          })) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
+                         "instructions after subloop to before it\n");
     return false;
+  }
 
   // Check for memory dependencies which prohibit the unrolling we are doing.
   // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
   // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
-  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI))
+  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) {
+    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
     return false;
+  }
 
   return true;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 78afe748e596..151a285af4e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -615,11 +615,17 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
       // the original loop body.
       if (Iter == 0)
         DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+#ifdef EXPENSIVE_CHECKS
       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+#endif
     }
 
-    updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
+    auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
+    updateBranchWeights(InsertBot, LatchBRCopy, Iter,
                         PeelCount, ExitWeight);
+    // Remove Loop metadata from the latch branch instruction
+    // because it is not the Loop's latch branch anymore.
+    LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
 
     InsertTop = InsertBot;
     InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0057b4ba7ce1..00d2fd2fdbac 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -70,6 +70,17 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                           BasicBlock *PreHeader, BasicBlock *NewPreHeader,
                           ValueToValueMapTy &VMap, DominatorTree *DT,
                           LoopInfo *LI, bool PreserveLCSSA) {
+  // Loop structure should be the following:
+  // Preheader
+  //  PrologHeader
+  //  ...
+  //  PrologLatch
+  //  PrologExit
+  //   NewPreheader
+  //    Header
+  //    ...
+  //    Latch
+  //      LatchExit
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Loop must have a latch");
   BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
@@ -83,14 +94,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
     for (PHINode &PN : Succ->phis()) {
       // Add a new PHI node to the prolog end block and add the
       // appropriate incoming values.
+      // TODO: This code assumes that the PrologExit (or the LatchExit block for
+      // prolog loop) contains only one predecessor from the loop, i.e. the
+      // PrologLatch. When supporting multiple-exiting block loops, we can have
+      // two or more blocks that have the LatchExit as the target in the
+      // original loop.
       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
                                        PrologExit->getFirstNonPHI());
       // Adding a value to the new PHI node from the original loop preheader.
       // This is the value that skips all the prolog code.
       if (L->contains(&PN)) {
+        // Succ is loop header.
         NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
                            PreHeader);
       } else {
+        // Succ is LatchExit.
         NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
       }
 
@@ -124,7 +142,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
         PrologExitPreds.push_back(PredBB);
 
     SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
-                           PreserveLCSSA);
+                           nullptr, PreserveLCSSA);
   }
 
   // Create a branch around the original loop, which is taken if there are no
@@ -143,7 +161,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   // Split the exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
   SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
-                         PreserveLCSSA);
+                         nullptr, PreserveLCSSA);
   // Add the branch to the exit block (around the unrolled loop)
   B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
   InsertPt->eraseFromParent();
@@ -257,7 +275,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
   assert(Exit && "Loop must have a single exit block only");
   // Split the epilogue exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
-  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
+  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
                          PreserveLCSSA);
   // Add the branch to the exit block (around the unrolling loop)
   B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
@@ -267,7 +285,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
 
   // Split the main loop exit to maintain canonicalization guarantees.
   SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
-  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
                          PreserveLCSSA);
 }
 
@@ -380,6 +398,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
   }
   if (CreateRemainderLoop) {
     Loop *NewLoop = NewLoops[L];
+    MDNode *LoopID = NewLoop->getLoopID();
     assert(NewLoop && "L should have been cloned");
 
     // Only add loop metadata if the loop is not going to be completely
@@ -387,6 +406,16 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
     if (UnrollRemainder)
       return NewLoop;
 
+    Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+        LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+    if (NewLoopID.hasValue()) {
+      NewLoop->setLoopID(NewLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+      // explicitly.
+      return NewLoop;
+    }
+
     // Add unroll disable metadata to disable future unrolling for this loop.
     NewLoop->setLoopAlreadyUnrolled();
     return NewLoop;
@@ -525,10 +554,10 @@ static bool canProfitablyUnrollMultiExitLoop(
 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool AllowExpensiveTripCount,
                                       bool UseEpilogRemainder,
-                                      bool UnrollRemainder,
-                                      LoopInfo *LI, ScalarEvolution *SE,
-                                      DominatorTree *DT, AssumptionCache *AC,
-                                      bool PreserveLCSSA) {
+                                      bool UnrollRemainder, LoopInfo *LI,
+                                      ScalarEvolution *SE, DominatorTree *DT,
+                                      AssumptionCache *AC, bool PreserveLCSSA,
+                                      Loop **ResultLoop) {
   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
   LLVM_DEBUG(L->dump());
   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -545,13 +574,27 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   BasicBlock *Header = L->getHeader();
 
   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+
+  if (!LatchBR || LatchBR->isUnconditional()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    LLVM_DEBUG(
+        dbgs()
+        << "Loop latch not terminated by a conditional branch.\n");
+    return false;
+  }
+
   unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
   BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
-  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
-  // targets of the Latch be an exit block out of the loop. This needs
-  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
-  assert(!L->contains(LatchExit) &&
-         "one of the loop latch successors should be the exit block!");
+
+  if (L->contains(LatchExit)) {
+    // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+    // targets of the Latch be an exit block out of the loop.
+    LLVM_DEBUG(
+        dbgs()
+        << "One of the loop latch successors must be the exit block.\n");
+    return false;
+  }
+
   // These are exit blocks other than the target of the latch exiting block.
   SmallVector<BasicBlock *, 4> OtherExits;
   bool isMultiExitUnrollingEnabled =
@@ -636,8 +679,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
     NewPreHeader->setName(PreHeader->getName() + ".new");
     // Split LatchExit to create phi nodes from branch above.
     SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
-    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
-                                     DT, LI, PreserveLCSSA);
+    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
+                                     nullptr, PreserveLCSSA);
     // NewExit gets its DebugLoc from LatchExit, which is not part of the
     // original Loop.
     // Fix this by setting Loop's DebugLoc to NewExit.
@@ -762,10 +805,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   // Now the loop blocks are cloned and the other exiting blocks from the
   // remainder are connected to the original Loop's exit blocks. The remaining
   // work is to update the phi nodes in the original loop, and take in the
-  // values from the cloned region. Also update the dominator info for
-  // OtherExits and their immediate successors, since we have new edges into
-  // OtherExits.
-  SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
+  // values from the cloned region.
   for (auto *BB : OtherExits) {
    for (auto &II : *BB) {
 
@@ -800,27 +840,30 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
              "Breaks the definition of dedicated exits!");
     }
 #endif
-   // Update the dominator info because the immediate dominator is no longer the
-   // header of the original Loop. BB has edges both from L and remainder code.
-   // Since the preheader determines which loop is run (L or directly jump to
-   // the remainder code), we set the immediate dominator as the preheader.
-   if (DT) {
-     DT->changeImmediateDominator(BB, PreHeader);
-     // Also update the IDom for immediate successors of BB.  If the current
-     // IDom is the header, update the IDom to be the preheader because that is
-     // the nearest common dominator of all predecessors of SuccBB.  We need to
-     // check for IDom being the header because successors of exit blocks can
-     // have edges from outside the loop, and we should not incorrectly update
-     // the IDom in that case.
-     for (BasicBlock *SuccBB: successors(BB))
-       if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
-         if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
-           assert(!SuccBB->getSinglePredecessor() &&
-                  "BB should be the IDom then!");
-           DT->changeImmediateDominator(SuccBB, PreHeader);
-         }
-       }
+  }
+
+  // Update the immediate dominator of the exit blocks and blocks that are
+  // reachable from the exit blocks. This is needed because we now have paths
+  // from both the original loop and the remainder code reaching the exit
+  // blocks. While the IDom of these exit blocks were from the original loop,
+  // now the IDom is the preheader (which decides whether the original loop or
+  // remainder code should run).
+  if (DT && !L->getExitingBlock()) {
+    SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+    // NB! We have to examine the dom children of all loop blocks, not just
+    // those which are the IDom of the exit blocks. This is because blocks
+    // reachable from the exit blocks can have their IDom as the nearest common
+    // dominator of the exit blocks.
+    for (auto *BB : L->blocks()) {
+      auto *DomNodeBB = DT->getNode(BB);
+      for (auto *DomChild : DomNodeBB->getChildren()) {
+        auto *DomChildBB = DomChild->getBlock();
+        if (!L->contains(LI->getLoopFor(DomChildBB)))
+          ChildrenToUpdate.push_back(DomChildBB);
+      }
     }
+    for (auto *BB : ChildrenToUpdate)
+      DT->changeImmediateDominator(BB, PreHeader);
   }
 
   // Loop structure should be the following:
@@ -884,6 +927,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   // of its parent loops, so the Scalar Evolution pass needs to be run again.
   SE->forgetTopmostLoop(L);
 
+  // Verify that the Dom Tree is correct.
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+  if (DT)
+    assert(DT->verify(DominatorTree::VerificationLevel::Full));
+#endif
+
   // Canonicalize to LoopSimplifyForm both original and remainder loops. We
   // cannot rely on the LoopUnrollPass to do this because it only does
   // canonicalization for parent/subloops and not the sibling loops.
@@ -897,16 +946,20 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
       formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
   }
 
+  auto UnrollResult = LoopUnrollResult::Unmodified;
   if (remainderLoop && UnrollRemainder) {
     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
-    UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
-               /*Force*/ false, /*AllowRuntime*/ false,
-               /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
-               /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
-               /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
-               /*ORE*/ nullptr, PreserveLCSSA);
+    UnrollResult =
+        UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+                   /*Force*/ false, /*AllowRuntime*/ false,
+                   /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+                   /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+                   /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+                   /*ORE*/ nullptr, PreserveLCSSA);
   }
 
+  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
+    *ResultLoop = remainderLoop;
   NumRuntimeUnrolled++;
   return true;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 46af120a428b..a93d1aeb62ef 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -26,8 +26,11 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
@@ -41,1104 +44,7 @@ using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "loop-utils"
 
-bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
-                                        SmallPtrSetImpl<Instruction *> &Set) {
-  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
-    if (!Set.count(dyn_cast<Instruction>(*Use)))
-      return false;
-  return true;
-}
-
-bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
-  switch (Kind) {
-  default:
-    break;
-  case RK_IntegerAdd:
-  case RK_IntegerMult:
-  case RK_IntegerOr:
-  case RK_IntegerAnd:
-  case RK_IntegerXor:
-  case RK_IntegerMinMax:
-    return true;
-  }
-  return false;
-}
-
-bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
-  return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
-}
-
-bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
-  switch (Kind) {
-  default:
-    break;
-  case RK_IntegerAdd:
-  case RK_IntegerMult:
-  case RK_FloatAdd:
-  case RK_FloatMult:
-    return true;
-  }
-  return false;
-}
-
-/// Determines if Phi may have been type-promoted. If Phi has a single user
-/// that ANDs the Phi with a type mask, return the user. RT is updated to
-/// account for the narrower bit width represented by the mask, and the AND
-/// instruction is added to CI.
-static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
-                                   SmallPtrSetImpl<Instruction *> &Visited,
-                                   SmallPtrSetImpl<Instruction *> &CI) {
-  if (!Phi->hasOneUse())
-    return Phi;
-
-  const APInt *M = nullptr;
-  Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
-
-  // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
-  // with a new integer type of the corresponding bit width.
-  if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
-    int32_t Bits = (*M + 1).exactLogBase2();
-    if (Bits > 0) {
-      RT = IntegerType::get(Phi->getContext(), Bits);
-      Visited.insert(Phi);
-      CI.insert(J);
-      return J;
-    }
-  }
-  return Phi;
-}
-
-/// Compute the minimal bit width needed to represent a reduction whose exit
-/// instruction is given by Exit.
-static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
-                                                     DemandedBits *DB,
-                                                     AssumptionCache *AC,
-                                                     DominatorTree *DT) {
-  bool IsSigned = false;
-  const DataLayout &DL = Exit->getModule()->getDataLayout();
-  uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
-
-  if (DB) {
-    // Use the demanded bits analysis to determine the bits that are live out
-    // of the exit instruction, rounding up to the nearest power of two. If the
-    // use of demanded bits results in a smaller bit width, we know the value
-    // must be positive (i.e., IsSigned = false), because if this were not the
-    // case, the sign bit would have been demanded.
-    auto Mask = DB->getDemandedBits(Exit);
-    MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
-  }
-
-  if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
-    // If demanded bits wasn't able to limit the bit width, we can try to use
-    // value tracking instead. This can be the case, for example, if the value
-    // may be negative.
-    auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
-    auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
-    MaxBitWidth = NumTypeBits - NumSignBits;
-    KnownBits Bits = computeKnownBits(Exit, DL);
-    if (!Bits.isNonNegative()) {
-      // If the value is not known to be non-negative, we set IsSigned to true,
-      // meaning that we will use sext instructions instead of zext
-      // instructions to restore the original type.
-      IsSigned = true;
-      if (!Bits.isNegative())
-        // If the value is not known to be negative, we don't known what the
-        // upper bit is, and therefore, we don't know what kind of extend we
-        // will need. In this case, just increase the bit width by one bit and
-        // use sext.
-        ++MaxBitWidth;
-    }
-  }
-  if (!isPowerOf2_64(MaxBitWidth))
-    MaxBitWidth = NextPowerOf2(MaxBitWidth);
-
-  return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
-                        IsSigned);
-}
-
-/// Collect cast instructions that can be ignored in the vectorizer's cost
-/// model, given a reduction exit value and the minimal type in which the
-/// reduction can be represented.
-static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
-                                 Type *RecurrenceType,
-                                 SmallPtrSetImpl<Instruction *> &Casts) {
-
-  SmallVector<Instruction *, 8> Worklist;
-  SmallPtrSet<Instruction *, 8> Visited;
-  Worklist.push_back(Exit);
-
-  while (!Worklist.empty()) {
-    Instruction *Val = Worklist.pop_back_val();
-    Visited.insert(Val);
-    if (auto *Cast = dyn_cast<CastInst>(Val))
-      if (Cast->getSrcTy() == RecurrenceType) {
-        // If the source type of a cast instruction is equal to the recurrence
-        // type, it will be eliminated, and should be ignored in the vectorizer
-        // cost model.
-        Casts.insert(Cast);
-        continue;
-      }
-
-    // Add all operands to the work list if they are loop-varying values that
-    // we haven't yet visited.
-    for (Value *O : cast<User>(Val)->operands())
-      if (auto *I = dyn_cast<Instruction>(O))
-        if (TheLoop->contains(I) && !Visited.count(I))
-          Worklist.push_back(I);
-  }
-}
-
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
-                                           Loop *TheLoop, bool HasFunNoNaNAttr,
-                                           RecurrenceDescriptor &RedDes,
-                                           DemandedBits *DB,
-                                           AssumptionCache *AC,
-                                           DominatorTree *DT) {
-  if (Phi->getNumIncomingValues() != 2)
-    return false;
-
-  // Reduction variables are only found in the loop header block.
-  if (Phi->getParent() != TheLoop->getHeader())
-    return false;
-
-  // Obtain the reduction start value from the value that comes from the loop
-  // preheader.
-  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
-
-  // ExitInstruction is the single value which is used outside the loop.
-  // We only allow for a single reduction value to be used outside the loop.
-  // This includes users of the reduction, variables (which form a cycle
-  // which ends in the phi node).
-  Instruction *ExitInstruction = nullptr;
-  // Indicates that we found a reduction operation in our scan.
-  bool FoundReduxOp = false;
-
-  // We start with the PHI node and scan for all of the users of this
-  // instruction. All users must be instructions that can be used as reduction
-  // variables (such as ADD). We must have a single out-of-block user. The cycle
-  // must include the original PHI.
-  bool FoundStartPHI = false;
-
-  // To recognize min/max patterns formed by a icmp select sequence, we store
-  // the number of instruction we saw from the recognized min/max pattern,
-  //  to make sure we only see exactly the two instructions.
-  unsigned NumCmpSelectPatternInst = 0;
-  InstDesc ReduxDesc(false, nullptr);
-
-  // Data used for determining if the recurrence has been type-promoted.
-  Type *RecurrenceType = Phi->getType();
-  SmallPtrSet<Instruction *, 4> CastInsts;
-  Instruction *Start = Phi;
-  bool IsSigned = false;
-
-  SmallPtrSet<Instruction *, 8> VisitedInsts;
-  SmallVector<Instruction *, 8> Worklist;
-
-  // Return early if the recurrence kind does not match the type of Phi. If the
-  // recurrence kind is arithmetic, we attempt to look through AND operations
-  // resulting from the type promotion performed by InstCombine.  Vector
-  // operations are not limited to the legal integer widths, so we may be able
-  // to evaluate the reduction in the narrower width.
-  if (RecurrenceType->isFloatingPointTy()) {
-    if (!isFloatingPointRecurrenceKind(Kind))
-      return false;
-  } else {
-    if (!isIntegerRecurrenceKind(Kind))
-      return false;
-    if (isArithmeticRecurrenceKind(Kind))
-      Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
-  }
-
-  Worklist.push_back(Start);
-  VisitedInsts.insert(Start);
-
-  // A value in the reduction can be used:
-  //  - By the reduction:
-  //      - Reduction operation:
-  //        - One use of reduction value (safe).
-  //        - Multiple use of reduction value (not safe).
-  //      - PHI:
-  //        - All uses of the PHI must be the reduction (safe).
-  //        - Otherwise, not safe.
-  //  - By instructions outside of the loop (safe).
-  //      * One value may have several outside users, but all outside
-  //        uses must be of the same value.
-  //  - By an instruction that is not part of the reduction (not safe).
-  //    This is either:
-  //      * An instruction type other than PHI or the reduction operation.
-  //      * A PHI in the header other than the initial PHI.
-  while (!Worklist.empty()) {
-    Instruction *Cur = Worklist.back();
-    Worklist.pop_back();
-
-    // No Users.
-    // If the instruction has no users then this is a broken chain and can't be
-    // a reduction variable.
-    if (Cur->use_empty())
-      return false;
-
-    bool IsAPhi = isa<PHINode>(Cur);
-
-    // A header PHI use other than the original PHI.
-    if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
-      return false;
-
-    // Reductions of instructions such as Div, and Sub is only possible if the
-    // LHS is the reduction variable.
-    if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
-        !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
-        !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
-      return false;
-
-    // Any reduction instruction must be of one of the allowed kinds. We ignore
-    // the starting value (the Phi or an AND instruction if the Phi has been
-    // type-promoted).
-    if (Cur != Start) {
-      ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
-      if (!ReduxDesc.isRecurrence())
-        return false;
-    }
-
-    // A reduction operation must only have one use of the reduction value.
-    if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
-        hasMultipleUsesOf(Cur, VisitedInsts))
-      return false;
-
-    // All inputs to a PHI node must be a reduction value.
-    if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
-      return false;
-
-    if (Kind == RK_IntegerMinMax &&
-        (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
-      ++NumCmpSelectPatternInst;
-    if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
-      ++NumCmpSelectPatternInst;
-
-    // Check  whether we found a reduction operator.
-    FoundReduxOp |= !IsAPhi && Cur != Start;
-
-    // Process users of current instruction. Push non-PHI nodes after PHI nodes
-    // onto the stack. This way we are going to have seen all inputs to PHI
-    // nodes once we get to them.
-    SmallVector<Instruction *, 8> NonPHIs;
-    SmallVector<Instruction *, 8> PHIs;
-    for (User *U : Cur->users()) {
-      Instruction *UI = cast<Instruction>(U);
-
-      // Check if we found the exit user.
-      BasicBlock *Parent = UI->getParent();
-      if (!TheLoop->contains(Parent)) {
-        // If we already know this instruction is used externally, move on to
-        // the next user.
-        if (ExitInstruction == Cur)
-          continue;
-
-        // Exit if you find multiple values used outside or if the header phi
-        // node is being used. In this case the user uses the value of the
-        // previous iteration, in which case we would loose "VF-1" iterations of
-        // the reduction operation if we vectorize.
-        if (ExitInstruction != nullptr || Cur == Phi)
-          return false;
-
-        // The instruction used by an outside user must be the last instruction
-        // before we feed back to the reduction phi. Otherwise, we loose VF-1
-        // operations on the value.
-        if (!is_contained(Phi->operands(), Cur))
-          return false;
-
-        ExitInstruction = Cur;
-        continue;
-      }
-
-      // Process instructions only once (termination). Each reduction cycle
-      // value must only be used once, except by phi nodes and min/max
-      // reductions which are represented as a cmp followed by a select.
-      InstDesc IgnoredVal(false, nullptr);
-      if (VisitedInsts.insert(UI).second) {
-        if (isa<PHINode>(UI))
-          PHIs.push_back(UI);
-        else
-          NonPHIs.push_back(UI);
-      } else if (!isa<PHINode>(UI) &&
-                 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
-                   !isa<SelectInst>(UI)) ||
-                  !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
-        return false;
-
-      // Remember that we completed the cycle.
-      if (UI == Phi)
-        FoundStartPHI = true;
-    }
-    Worklist.append(PHIs.begin(), PHIs.end());
-    Worklist.append(NonPHIs.begin(), NonPHIs.end());
-  }
-
-  // This means we have seen one but not the other instruction of the
-  // pattern or more than just a select and cmp.
-  if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
-      NumCmpSelectPatternInst != 2)
-    return false;
-
-  if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
-    return false;
-
-  if (Start != Phi) {
-    // If the starting value is not the same as the phi node, we speculatively
-    // looked through an 'and' instruction when evaluating a potential
-    // arithmetic reduction to determine if it may have been type-promoted.
-    //
-    // We now compute the minimal bit width that is required to represent the
-    // reduction. If this is the same width that was indicated by the 'and', we
-    // can represent the reduction in the smaller type. The 'and' instruction
-    // will be eliminated since it will essentially be a cast instruction that
-    // can be ignore in the cost model. If we compute a different type than we
-    // did when evaluating the 'and', the 'and' will not be eliminated, and we
-    // will end up with different kinds of operations in the recurrence
-    // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
-    // the case.
-    //
-    // The vectorizer relies on InstCombine to perform the actual
-    // type-shrinking. It does this by inserting instructions to truncate the
-    // exit value of the reduction to the width indicated by RecurrenceType and
-    // then extend this value back to the original width. If IsSigned is false,
-    // a 'zext' instruction will be generated; otherwise, a 'sext' will be
-    // used.
-    //
-    // TODO: We should not rely on InstCombine to rewrite the reduction in the
-    //       smaller type. We should just generate a correctly typed expression
-    //       to begin with.
-    Type *ComputedType;
-    std::tie(ComputedType, IsSigned) =
-        computeRecurrenceType(ExitInstruction, DB, AC, DT);
-    if (ComputedType != RecurrenceType)
-      return false;
-
-    // The recurrence expression will be represented in a narrower type. If
-    // there are any cast instructions that will be unnecessary, collect them
-    // in CastInsts. Note that the 'and' instruction was already included in
-    // this list.
-    //
-    // TODO: A better way to represent this may be to tag in some way all the
-    //       instructions that are a part of the reduction. The vectorizer cost
-    //       model could then apply the recurrence type to these instructions,
-    //       without needing a white list of instructions to ignore.
-    collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
-  }
-
-  // We found a reduction var if we have reached the original phi node and we
-  // only have a single instruction with out-of-loop users.
-
-  // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
-  // is saved as part of the RecurrenceDescriptor.
-
-  // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(
-      RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
-      ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
-  RedDes = RD;
-
-  return true;
-}
-
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
-
-  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
-         "Expect a select instruction");
-  Instruction *Cmp = nullptr;
-  SelectInst *Select = nullptr;
-
-  // We must handle the select(cmp()) as a single instruction. Advance to the
-  // select.
-  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
-    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
-      return InstDesc(false, I);
-    return InstDesc(Select, Prev.getMinMaxKind());
-  }
-
-  // Only handle single use cases for now.
-  if (!(Select = dyn_cast<SelectInst>(I)))
-    return InstDesc(false, I);
-  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
-      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
-    return InstDesc(false, I);
-  if (!Cmp->hasOneUse())
-    return InstDesc(false, I);
-
-  Value *CmpLeft;
-  Value *CmpRight;
-
-  // Look for a min/max pattern.
-  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_UIntMin);
-  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_UIntMax);
-  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_SIntMax);
-  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_SIntMin);
-  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_FloatMin);
-  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_FloatMax);
-  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_FloatMin);
-  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-    return InstDesc(Select, MRK_FloatMax);
-
-  return InstDesc(false, I);
-}
-
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
-                                        InstDesc &Prev, bool HasFunNoNaNAttr) {
-  bool FP = I->getType()->isFloatingPointTy();
-  Instruction *UAI = Prev.getUnsafeAlgebraInst();
-  if (!UAI && FP && !I->isFast())
-    UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
-
-  switch (I->getOpcode()) {
-  default:
-    return InstDesc(false, I);
-  case Instruction::PHI:
-    return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
-  case Instruction::Sub:
-  case Instruction::Add:
-    return InstDesc(Kind == RK_IntegerAdd, I);
-  case Instruction::Mul:
-    return InstDesc(Kind == RK_IntegerMult, I);
-  case Instruction::And:
-    return InstDesc(Kind == RK_IntegerAnd, I);
-  case Instruction::Or:
-    return InstDesc(Kind == RK_IntegerOr, I);
-  case Instruction::Xor:
-    return InstDesc(Kind == RK_IntegerXor, I);
-  case Instruction::FMul:
-    return InstDesc(Kind == RK_FloatMult, I, UAI);
-  case Instruction::FSub:
-  case Instruction::FAdd:
-    return InstDesc(Kind == RK_FloatAdd, I, UAI);
-  case Instruction::FCmp:
-  case Instruction::ICmp:
-  case Instruction::Select:
-    if (Kind != RK_IntegerMinMax &&
-        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
-      return InstDesc(false, I);
-    return isMinMaxSelectCmpPattern(I, Prev);
-  }
-}
-
-bool RecurrenceDescriptor::hasMultipleUsesOf(
-    Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
-  unsigned NumUses = 0;
-  for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
-       ++Use) {
-    if (Insts.count(dyn_cast<Instruction>(*Use)))
-      ++NumUses;
-    if (NumUses > 1)
-      return true;
-  }
-
-  return false;
-}
-bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                                          RecurrenceDescriptor &RedDes,
-                                          DemandedBits *DB, AssumptionCache *AC,
-                                          DominatorTree *DT) {
-
-  BasicBlock *Header = TheLoop->getHeader();
-  Function &F = *Header->getParent();
-  bool HasFunNoNaNAttr =
-      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
-  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
-                      DB, AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
-    return true;
-  }
-  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
-                      AC, DT)) {
-    LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
-                      << "\n");
-    return true;
-  }
-  // Not a reduction of known type.
-  return false;
-}
-
-bool RecurrenceDescriptor::isFirstOrderRecurrence(
-    PHINode *Phi, Loop *TheLoop,
-    DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
-
-  // Ensure the phi node is in the loop header and has two incoming values.
-  if (Phi->getParent() != TheLoop->getHeader() ||
-      Phi->getNumIncomingValues() != 2)
-    return false;
-
-  // Ensure the loop has a preheader and a single latch block. The loop
-  // vectorizer will need the latch to set up the next iteration of the loop.
-  auto *Preheader = TheLoop->getLoopPreheader();
-  auto *Latch = TheLoop->getLoopLatch();
-  if (!Preheader || !Latch)
-    return false;
-
-  // Ensure the phi node's incoming blocks are the loop preheader and latch.
-  if (Phi->getBasicBlockIndex(Preheader) < 0 ||
-      Phi->getBasicBlockIndex(Latch) < 0)
-    return false;
-
-  // Get the previous value. The previous value comes from the latch edge while
-  // the initial value comes form the preheader edge.
-  auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
-  if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
-      SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
-    return false;
-
-  // Ensure every user of the phi node is dominated by the previous value.
-  // The dominance requirement ensures the loop vectorizer will not need to
-  // vectorize the initial value prior to the first iteration of the loop.
-  // TODO: Consider extending this sinking to handle other kinds of instructions
-  // and expressions, beyond sinking a single cast past Previous.
-  if (Phi->hasOneUse()) {
-    auto *I = Phi->user_back();
-    if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
-        DT->dominates(Previous, I->user_back())) {
-      if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
-        SinkAfter[I] = Previous;
-      return true;
-    }
-  }
-
-  for (User *U : Phi->users())
-    if (auto *I = dyn_cast<Instruction>(U)) {
-      if (!DT->dominates(Previous, I))
-        return false;
-    }
-
-  return true;
-}
-
-/// This function returns the identity element (or neutral element) for
-/// the operation K.
-Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
-                                                      Type *Tp) {
-  switch (K) {
-  case RK_IntegerXor:
-  case RK_IntegerAdd:
-  case RK_IntegerOr:
-    // Adding, Xoring, Oring zero to a number does not change it.
-    return ConstantInt::get(Tp, 0);
-  case RK_IntegerMult:
-    // Multiplying a number by 1 does not change it.
-    return ConstantInt::get(Tp, 1);
-  case RK_IntegerAnd:
-    // AND-ing a number with an all-1 value does not change it.
-    return ConstantInt::get(Tp, -1, true);
-  case RK_FloatMult:
-    // Multiplying a number by 1 does not change it.
-    return ConstantFP::get(Tp, 1.0L);
-  case RK_FloatAdd:
-    // Adding zero to a number does not change it.
-    return ConstantFP::get(Tp, 0.0L);
-  default:
-    llvm_unreachable("Unknown recurrence kind");
-  }
-}
-
-/// This function translates the recurrence kind to an LLVM binary operator.
-unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
-  switch (Kind) {
-  case RK_IntegerAdd:
-    return Instruction::Add;
-  case RK_IntegerMult:
-    return Instruction::Mul;
-  case RK_IntegerOr:
-    return Instruction::Or;
-  case RK_IntegerAnd:
-    return Instruction::And;
-  case RK_IntegerXor:
-    return Instruction::Xor;
-  case RK_FloatMult:
-    return Instruction::FMul;
-  case RK_FloatAdd:
-    return Instruction::FAdd;
-  case RK_IntegerMinMax:
-    return Instruction::ICmp;
-  case RK_FloatMinMax:
-    return Instruction::FCmp;
-  default:
-    llvm_unreachable("Unknown recurrence operation");
-  }
-}
-
-Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
-                                            MinMaxRecurrenceKind RK,
-                                            Value *Left, Value *Right) {
-  CmpInst::Predicate P = CmpInst::ICMP_NE;
-  switch (RK) {
-  default:
-    llvm_unreachable("Unknown min/max recurrence kind");
-  case MRK_UIntMin:
-    P = CmpInst::ICMP_ULT;
-    break;
-  case MRK_UIntMax:
-    P = CmpInst::ICMP_UGT;
-    break;
-  case MRK_SIntMin:
-    P = CmpInst::ICMP_SLT;
-    break;
-  case MRK_SIntMax:
-    P = CmpInst::ICMP_SGT;
-    break;
-  case MRK_FloatMin:
-    P = CmpInst::FCMP_OLT;
-    break;
-  case MRK_FloatMax:
-    P = CmpInst::FCMP_OGT;
-    break;
-  }
-
-  // We only match FP sequences that are 'fast', so we can unconditionally
-  // set it on any generated instructions.
-  IRBuilder<>::FastMathFlagGuard FMFG(Builder);
-  FastMathFlags FMF;
-  FMF.setFast();
-  Builder.setFastMathFlags(FMF);
-
-  Value *Cmp;
-  if (RK == MRK_FloatMin || RK == MRK_FloatMax)
-    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
-  else
-    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
-  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
-  return Select;
-}
-
-InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
-                                         const SCEV *Step, BinaryOperator *BOp,
-                                         SmallVectorImpl<Instruction *> *Casts)
-  : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
-  assert(IK != IK_NoInduction && "Not an induction");
-
-  // Start value type should match the induction kind and the value
-  // itself should not be null.
-  assert(StartValue && "StartValue is null");
-  assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
-         "StartValue is not a pointer for pointer induction");
-  assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
-         "StartValue is not an integer for integer induction");
-
-  // Check the Step Value. It should be non-zero integer value.
-  assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
-         "Step value is zero");
-
-  assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
-         "Step value should be constant for pointer induction");
-  assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
-         "StepValue is not an integer");
-
-  assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
-         "StepValue is not FP for FpInduction");
-  assert((IK != IK_FpInduction || (InductionBinOp &&
-          (InductionBinOp->getOpcode() == Instruction::FAdd ||
-           InductionBinOp->getOpcode() == Instruction::FSub))) &&
-         "Binary opcode should be specified for FP induction");
-
-  if (Casts) {
-    for (auto &Inst : *Casts) {
-      RedundantCasts.push_back(Inst);
-    }
-  }
-}
-
-int InductionDescriptor::getConsecutiveDirection() const {
-  ConstantInt *ConstStep = getConstIntStepValue();
-  if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
-    return ConstStep->getSExtValue();
-  return 0;
-}
-
-ConstantInt *InductionDescriptor::getConstIntStepValue() const {
-  if (isa<SCEVConstant>(Step))
-    return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
-  return nullptr;
-}
-
-Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
-                                      ScalarEvolution *SE,
-                                      const DataLayout& DL) const {
-
-  SCEVExpander Exp(*SE, DL, "induction");
-  assert(Index->getType() == Step->getType() &&
-         "Index type does not match StepValue type");
-  switch (IK) {
-  case IK_IntInduction: {
-    assert(Index->getType() == StartValue->getType() &&
-           "Index type does not match StartValue type");
-
-    // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
-    // and calculate (Start + Index * Step) for all cases, without
-    // special handling for "isOne" and "isMinusOne".
-    // But in the real life the result code getting worse. We mix SCEV
-    // expressions and ADD/SUB operations and receive redundant
-    // intermediate values being calculated in different ways and
-    // Instcombine is unable to reduce them all.
-
-    if (getConstIntStepValue() &&
-        getConstIntStepValue()->isMinusOne())
-      return B.CreateSub(StartValue, Index);
-    if (getConstIntStepValue() &&
-        getConstIntStepValue()->isOne())
-      return B.CreateAdd(StartValue, Index);
-    const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
-                                   SE->getMulExpr(Step, SE->getSCEV(Index)));
-    return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
-  }
-  case IK_PtrInduction: {
-    assert(isa<SCEVConstant>(Step) &&
-           "Expected constant step for pointer induction");
-    const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
-    Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
-    return B.CreateGEP(nullptr, StartValue, Index);
-  }
-  case IK_FpInduction: {
-    assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
-    assert(InductionBinOp &&
-           (InductionBinOp->getOpcode() == Instruction::FAdd ||
-            InductionBinOp->getOpcode() == Instruction::FSub) &&
-           "Original bin op should be defined for FP induction");
-
-    Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
-
-    // Floating point operations had to be 'fast' to enable the induction.
-    FastMathFlags Flags;
-    Flags.setFast();
-
-    Value *MulExp = B.CreateFMul(StepValue, Index);
-    if (isa<Instruction>(MulExp))
-      // We have to check, the MulExp may be a constant.
-      cast<Instruction>(MulExp)->setFastMathFlags(Flags);
-
-    Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
-                               MulExp, "induction");
-    if (isa<Instruction>(BOp))
-      cast<Instruction>(BOp)->setFastMathFlags(Flags);
-
-    return BOp;
-  }
-  case IK_NoInduction:
-    return nullptr;
-  }
-  llvm_unreachable("invalid enum");
-}
-
-bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
-                                           ScalarEvolution *SE,
-                                           InductionDescriptor &D) {
-
-  // Here we only handle FP induction variables.
-  assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
-
-  if (TheLoop->getHeader() != Phi->getParent())
-    return false;
-
-  // The loop may have multiple entrances or multiple exits; we can analyze
-  // this phi if it has a unique entry value and a unique backedge value.
-  if (Phi->getNumIncomingValues() != 2)
-    return false;
-  Value *BEValue = nullptr, *StartValue = nullptr;
-  if (TheLoop->contains(Phi->getIncomingBlock(0))) {
-    BEValue = Phi->getIncomingValue(0);
-    StartValue = Phi->getIncomingValue(1);
-  } else {
-    assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
-           "Unexpected Phi node in the loop");
-    BEValue = Phi->getIncomingValue(1);
-    StartValue = Phi->getIncomingValue(0);
-  }
-
-  BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
-  if (!BOp)
-    return false;
-
-  Value *Addend = nullptr;
-  if (BOp->getOpcode() == Instruction::FAdd) {
-    if (BOp->getOperand(0) == Phi)
-      Addend = BOp->getOperand(1);
-    else if (BOp->getOperand(1) == Phi)
-      Addend = BOp->getOperand(0);
-  } else if (BOp->getOpcode() == Instruction::FSub)
-    if (BOp->getOperand(0) == Phi)
-      Addend = BOp->getOperand(1);
-
-  if (!Addend)
-    return false;
-
-  // The addend should be loop invariant
-  if (auto *I = dyn_cast<Instruction>(Addend))
-    if (TheLoop->contains(I))
-      return false;
-
-  // FP Step has unknown SCEV
-  const SCEV *Step = SE->getUnknown(Addend);
-  D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
-  return true;
-}
-
-/// This function is called when we suspect that the update-chain of a phi node
-/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
-/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
-/// predicate P under which the SCEV expression for the phi can be the
-/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
-/// cast instructions that are involved in the update-chain of this induction.
-/// A caller that adds the required runtime predicate can be free to drop these
-/// cast instructions, and compute the phi using \p AR (instead of some scev
-/// expression with casts).
-///
-/// For example, without a predicate the scev expression can take the following
-/// form:
-///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
-///
-/// It corresponds to the following IR sequence:
-/// %for.body:
-///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
-///   %casted_phi = "ExtTrunc i64 %x"
-///   %add = add i64 %casted_phi, %step
-///
-/// where %x is given in \p PN,
-/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
-/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
-/// several forms, for example, such as:
-///   ExtTrunc1:    %casted_phi = and  %x, 2^n-1
-/// or:
-///   ExtTrunc2:    %t = shl %x, m
-///                 %casted_phi = ashr %t, m
-///
-/// If we are able to find such sequence, we return the instructions
-/// we found, namely %casted_phi and the instructions on its use-def chain up
-/// to the phi (not including the phi).
-static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
-                                    const SCEVUnknown *PhiScev,
-                                    const SCEVAddRecExpr *AR,
-                                    SmallVectorImpl<Instruction *> &CastInsts) {
-
-  assert(CastInsts.empty() && "CastInsts is expected to be empty.");
-  auto *PN = cast<PHINode>(PhiScev->getValue());
-  assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
-  const Loop *L = AR->getLoop();
-
-  // Find any cast instructions that participate in the def-use chain of
-  // PhiScev in the loop.
-  // FORNOW/TODO: We currently expect the def-use chain to include only
-  // two-operand instructions, where one of the operands is an invariant.
-  // createAddRecFromPHIWithCasts() currently does not support anything more
-  // involved than that, so we keep the search simple. This can be
-  // extended/generalized as needed.
-
-  auto getDef = [&](const Value *Val) -> Value * {
-    const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
-    if (!BinOp)
-      return nullptr;
-    Value *Op0 = BinOp->getOperand(0);
-    Value *Op1 = BinOp->getOperand(1);
-    Value *Def = nullptr;
-    if (L->isLoopInvariant(Op0))
-      Def = Op1;
-    else if (L->isLoopInvariant(Op1))
-      Def = Op0;
-    return Def;
-  };
-
-  // Look for the instruction that defines the induction via the
-  // loop backedge.
-  BasicBlock *Latch = L->getLoopLatch();
-  if (!Latch)
-    return false;
-  Value *Val = PN->getIncomingValueForBlock(Latch);
-  if (!Val)
-    return false;
-
-  // Follow the def-use chain until the induction phi is reached.
-  // If on the way we encounter a Value that has the same SCEV Expr as the
-  // phi node, we can consider the instructions we visit from that point
-  // as part of the cast-sequence that can be ignored.
-  bool InCastSequence = false;
-  auto *Inst = dyn_cast<Instruction>(Val);
-  while (Val != PN) {
-    // If we encountered a phi node other than PN, or if we left the loop,
-    // we bail out.
-    if (!Inst || !L->contains(Inst)) {
-      return false;
-    }
-    auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
-    if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
-      InCastSequence = true;
-    if (InCastSequence) {
-      // Only the last instruction in the cast sequence is expected to have
-      // uses outside the induction def-use chain.
-      if (!CastInsts.empty())
-        if (!Inst->hasOneUse())
-          return false;
-      CastInsts.push_back(Inst);
-    }
-    Val = getDef(Val);
-    if (!Val)
-      return false;
-    Inst = dyn_cast<Instruction>(Val);
-  }
-
-  return InCastSequence;
-}
-
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
-                                         PredicatedScalarEvolution &PSE,
-                                         InductionDescriptor &D,
-                                         bool Assume) {
-  Type *PhiTy = Phi->getType();
-
-  // Handle integer and pointer inductions variables.
-  // Now we handle also FP induction but not trying to make a
-  // recurrent expression from the PHI node in-place.
-
-  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
-      !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
-    return false;
-
-  if (PhiTy->isFloatingPointTy())
-    return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
-
-  const SCEV *PhiScev = PSE.getSCEV(Phi);
-  const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-
-  // We need this expression to be an AddRecExpr.
-  if (Assume && !AR)
-    AR = PSE.getAsAddRec(Phi);
-
-  if (!AR) {
-    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return false;
-  }
-
-  // Record any Cast instructions that participate in the induction update
-  const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
-  // If we started from an UnknownSCEV, and managed to build an addRecurrence
-  // only after enabling Assume with PSCEV, this means we may have encountered
-  // cast instructions that required adding a runtime check in order to
-  // guarantee the correctness of the AddRecurence respresentation of the
-  // induction.
-  if (PhiScev != AR && SymbolicPhi) {
-    SmallVector<Instruction *, 2> Casts;
-    if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
-      return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
-  }
-
-  return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
-}
-
-bool InductionDescriptor::isInductionPHI(
-    PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
-    InductionDescriptor &D, const SCEV *Expr,
-    SmallVectorImpl<Instruction *> *CastsToIgnore) {
-  Type *PhiTy = Phi->getType();
-  // We only handle integer and pointer inductions variables.
-  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
-    return false;
-
-  // Check that the PHI is consecutive.
-  const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
-
-  if (!AR) {
-    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return false;
-  }
-
-  if (AR->getLoop() != TheLoop) {
-    // FIXME: We should treat this as a uniform. Unfortunately, we
-    // don't currently know how to handled uniform PHIs.
-    LLVM_DEBUG(
-        dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
-    return false;
-  }
-
-  Value *StartValue =
-    Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
-  const SCEV *Step = AR->getStepRecurrence(*SE);
-  // Calculate the pointer stride and check if it is consecutive.
-  // The stride may be a constant or a loop invariant integer value.
-  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
-  if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
-    return false;
-
-  if (PhiTy->isIntegerTy()) {
-    D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
-                            CastsToIgnore);
-    return true;
-  }
-
-  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
-  // Pointer induction should be a constant.
-  if (!ConstStep)
-    return false;
-
-  ConstantInt *CV = ConstStep->getValue();
-  Type *PointerElementType = PhiTy->getPointerElementType();
-  // The pointer stride cannot be determined if the pointer element type is not
-  // sized.
-  if (!PointerElementType->isSized())
-    return false;
-
-  const DataLayout &DL = Phi->getModule()->getDataLayout();
-  int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
-  if (!Size)
-    return false;
-
-  int64_t CVSize = CV->getSExtValue();
-  if (CVSize % Size)
-    return false;
-  auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
-                                    true /* signed */);
-  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
-  return true;
-}
+static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
 
 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
                                    bool PreserveLCSSA) {
@@ -1173,7 +79,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
       return false;
 
     auto *NewExitBB = SplitBlockPredecessors(
-        BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
+        BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
 
     if (!NewExitBB)
       LLVM_DEBUG(
@@ -1286,37 +192,231 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {
 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
 /// operand or null otherwise.  If the string metadata is not found return
 /// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
                                                             StringRef Name) {
-  MDNode *LoopID = TheLoop->getLoopID();
-  // Return none if LoopID is false.
-  if (!LoopID)
+  MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+  if (!MD)
     return None;
+  switch (MD->getNumOperands()) {
+  case 1:
+    return nullptr;
+  case 2:
+    return &MD->getOperand(1);
+  default:
+    llvm_unreachable("loop metadata has 0 or 1 operand");
+  }
+}
 
-  // First operand should refer to the loop id itself.
-  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
-  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
+                                                   StringRef Name) {
+  MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+  if (!MD)
+    return None;
+  switch (MD->getNumOperands()) {
+  case 1:
+    // When the value is absent it is interpreted as 'attribute set'.
+    return true;
+  case 2:
+    return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+  }
+  llvm_unreachable("unexpected number of options");
+}
 
-  // Iterate over LoopID operands and look for MDString Metadata
-  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
-    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
-    if (!MD)
-      continue;
-    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
-    if (!S)
+static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
+  return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
+}
+
+llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
+                                                      StringRef Name) {
+  const MDOperand *AttrMD =
+      findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
+  if (!AttrMD)
+    return None;
+
+  ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
+  if (!IntMD)
+    return None;
+
+  return IntMD->getSExtValue();
+}
+
+Optional<MDNode *> llvm::makeFollowupLoopID(
+    MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
+    const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
+  if (!OrigLoopID) {
+    if (AlwaysNew)
+      return nullptr;
+    return None;
+  }
+
+  assert(OrigLoopID->getOperand(0) == OrigLoopID);
+
+  bool InheritAllAttrs = !InheritOptionsExceptPrefix;
+  bool InheritSomeAttrs =
+      InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
+  SmallVector<Metadata *, 8> MDs;
+  MDs.push_back(nullptr);
+
+  bool Changed = false;
+  if (InheritAllAttrs || InheritSomeAttrs) {
+    for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
+      MDNode *Op = cast<MDNode>(Existing.get());
+
+      auto InheritThisAttribute = [InheritSomeAttrs,
+                                   InheritOptionsExceptPrefix](MDNode *Op) {
+        if (!InheritSomeAttrs)
+          return false;
+
+        // Skip malformatted attribute metadata nodes.
+        if (Op->getNumOperands() == 0)
+          return true;
+        Metadata *NameMD = Op->getOperand(0).get();
+        if (!isa<MDString>(NameMD))
+          return true;
+        StringRef AttrName = cast<MDString>(NameMD)->getString();
+
+        // Do not inherit excluded attributes.
+        return !AttrName.startswith(InheritOptionsExceptPrefix);
+      };
+
+      if (InheritThisAttribute(Op))
+        MDs.push_back(Op);
+      else
+        Changed = true;
+    }
+  } else {
+    // Modified if we dropped at least one attribute.
+    Changed = OrigLoopID->getNumOperands() > 1;
+  }
+
+  bool HasAnyFollowup = false;
+  for (StringRef OptionName : FollowupOptions) {
+    MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
+    if (!FollowupNode)
       continue;
-    // Return true if MDString holds expected MetaData.
-    if (Name.equals(S->getString()))
-      switch (MD->getNumOperands()) {
-      case 1:
-        return nullptr;
-      case 2:
-        return &MD->getOperand(1);
-      default:
-        llvm_unreachable("loop metadata has 0 or 1 operand");
-      }
+
+    HasAnyFollowup = true;
+    for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
+      MDs.push_back(Option.get());
+      Changed = true;
+    }
   }
-  return None;
+
+  // Attributes of the followup loop not specified explicity, so signal to the
+  // transformation pass to add suitable attributes.
+  if (!AlwaysNew && !HasAnyFollowup)
+    return None;
+
+  // If no attributes were added or remove, the previous loop Id can be reused.
+  if (!AlwaysNew && !Changed)
+    return OrigLoopID;
+
+  // No attributes is equivalent to having no !llvm.loop metadata at all.
+  if (MDs.size() == 1)
+    return nullptr;
+
+  // Build the new loop ID.
+  MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
+  FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
+  return FollowupLoopID;
+}
+
+bool llvm::hasDisableAllTransformsHint(const Loop *L) {
+  return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);
+}
+
+TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
+    return TM_SuppressedByUser;
+
+  Optional<int> Count =
+      getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+  if (Count.hasValue())
+    return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
+    return TM_ForcedByUser;
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
+    return TM_ForcedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
+    return TM_SuppressedByUser;
+
+  Optional<int> Count =
+      getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
+  if (Count.hasValue())
+    return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
+    return TM_ForcedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+  Optional<bool> Enable =
+      getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
+
+  if (Enable == false)
+    return TM_SuppressedByUser;
+
+  Optional<int> VectorizeWidth =
+      getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+  Optional<int> InterleaveCount =
+      getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+  if (Enable == true) {
+    // 'Forcing' vector width and interleave count to one effectively disables
+    // this tranformation.
+    if (VectorizeWidth == 1 && InterleaveCount == 1)
+      return TM_SuppressedByUser;
+    return TM_ForcedByUser;
+  }
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+    return TM_Disable;
+
+  if (VectorizeWidth == 1 && InterleaveCount == 1)
+    return TM_Disable;
+
+  if (VectorizeWidth > 1 || InterleaveCount > 1)
+    return TM_Enable;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
+    return TM_ForcedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
+    return TM_SuppressedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
 }
 
 /// Does a BFS from a given node to all of its children inside a given loop.
@@ -1425,14 +525,19 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
   // Remove the old branch.
   Preheader->getTerminator()->eraseFromParent();
 
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   if (DT) {
     // Update the dominator tree by informing it about the new edge from the
     // preheader to the exit.
-    DT->insertEdge(Preheader, ExitBlock);
+    DTU.insertEdge(Preheader, ExitBlock);
     // Inform the dominator tree about the removed edge.
-    DT->deleteEdge(Preheader, L->getHeader());
+    DTU.deleteEdge(Preheader, L->getHeader());
   }
 
+  // Use a map to unique and a vector to guarantee deterministic ordering.
+  llvm::SmallDenseSet<std::pair<DIVariable *, DIExpression *>, 4> DeadDebugSet;
+  llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;
+
   // Given LCSSA form is satisfied, we should not have users of instructions
   // within the dead loop outside of the loop. However, LCSSA doesn't take
   // unreachable uses into account. We handle them here.
@@ -1457,8 +562,27 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
                  "Unexpected user in reachable block");
         U.set(Undef);
       }
+      auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+      if (!DVI)
+        continue;
+      auto Key = DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()});
+      if (Key != DeadDebugSet.end())
+        continue;
+      DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()});
+      DeadDebugInst.push_back(DVI);
     }
 
+  // After the loop has been deleted all the values defined and modified
+  // inside the loop are going to be unavailable.
+  // Since debug values in the loop have been deleted, inserting an undef
+  // dbg.value truncates the range of any dbg.value before the loop where the
+  // loop used to be. This is particularly important for constant values.
+  DIBuilder DIB(*ExitBlock->getModule());
+  for (auto *DVI : DeadDebugInst)
+    DIB.insertDbgValueIntrinsic(
+        UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(),
+        DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI());
+
   // Remove the block from the reference counting scheme, so that we can
   // delete it freely later.
   for (auto *Block : L->blocks())
@@ -1519,6 +643,28 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
     return (FalseVal + (TrueVal / 2)) / TrueVal;
 }
 
+bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
+                                              ScalarEvolution &SE) {
+  Loop *OuterL = InnerLoop->getParentLoop();
+  if (!OuterL)
+    return true;
+
+  // Get the backedge taken count for the inner loop
+  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+  const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
+  if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
+      !InnerLoopBECountSC->getType()->isIntegerTy())
+    return false;
+
+  // Get whether count is invariant to the outer loop
+  ScalarEvolution::LoopDisposition LD =
+      SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
+  if (LD != ScalarEvolution::LoopInvariant)
+    return false;
+
+  return true;
+}
+
 /// Adds a 'fast' flag to floating point operations.
 static Value *addFastMathFlag(Value *V) {
   if (isa<FPMathOperator>(V)) {
@@ -1529,6 +675,51 @@ static Value *addFastMathFlag(Value *V) {
   return V;
 }
 
+Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
+                            RecurrenceDescriptor::MinMaxRecurrenceKind RK,
+                            Value *Left, Value *Right) {
+  CmpInst::Predicate P = CmpInst::ICMP_NE;
+  switch (RK) {
+  default:
+    llvm_unreachable("Unknown min/max recurrence kind");
+  case RecurrenceDescriptor::MRK_UIntMin:
+    P = CmpInst::ICMP_ULT;
+    break;
+  case RecurrenceDescriptor::MRK_UIntMax:
+    P = CmpInst::ICMP_UGT;
+    break;
+  case RecurrenceDescriptor::MRK_SIntMin:
+    P = CmpInst::ICMP_SLT;
+    break;
+  case RecurrenceDescriptor::MRK_SIntMax:
+    P = CmpInst::ICMP_SGT;
+    break;
+  case RecurrenceDescriptor::MRK_FloatMin:
+    P = CmpInst::FCMP_OLT;
+    break;
+  case RecurrenceDescriptor::MRK_FloatMax:
+    P = CmpInst::FCMP_OGT;
+    break;
+  }
+
+  // We only match FP sequences that are 'fast', so we can unconditionally
+  // set it on any generated instructions.
+  IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+  FastMathFlags FMF;
+  FMF.setFast();
+  Builder.setFastMathFlags(FMF);
+
+  Value *Cmp;
+  if (RK == RecurrenceDescriptor::MRK_FloatMin ||
+      RK == RecurrenceDescriptor::MRK_FloatMax)
+    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+  else
+    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+  return Select;
+}
+
 // Helper to generate an ordered reduction.
 Value *
 llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
@@ -1550,8 +741,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
     } else {
       assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
              "Invalid min/max");
-      Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result,
-                                                    Ext);
+      Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
     }
 
     if (!RedOps.empty())
@@ -1594,8 +784,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
     } else {
       assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
              "Invalid min/max");
-      TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
-                                                    Shuf);
+      TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
     }
     if (!RedOps.empty())
       propagateIRFlags(TmpVec, RedOps);
@@ -1613,7 +802,7 @@ Value *llvm::createSimpleTargetReduction(
   assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
 
   Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
-  std::function<Value*()> BuildFunc;
+  std::function<Value *()> BuildFunc;
   using RD = RecurrenceDescriptor;
   RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
   // TODO: Support creating ordered reductions.
@@ -1739,3 +928,39 @@ void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
       VecOp->andIRFlags(V);
   }
 }
+
+bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,
+                                 ScalarEvolution &SE) {
+  const SCEV *Zero = SE.getZero(S->getType());
+  return SE.isAvailableAtLoopEntry(S, L) &&
+         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);
+}
+
+bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
+                                    ScalarEvolution &SE) {
+  const SCEV *Zero = SE.getZero(S->getType());
+  return SE.isAvailableAtLoopEntry(S, L) &&
+         SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
+}
+
+bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+                             bool Signed) {
+  unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
+    APInt::getMinValue(BitWidth);
+  auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+  return SE.isAvailableAtLoopEntry(S, L) &&
+         SE.isLoopEntryGuardedByCond(L, Predicate, S,
+                                     SE.getConstant(Min));
+}
+
+bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
+                             bool Signed) {
+  unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
+  APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :
+    APInt::getMaxValue(BitWidth);
+  auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+  return SE.isAvailableAtLoopEntry(S, L) &&
+         SE.isLoopEntryGuardedByCond(L, Predicate, S,
+                                     SE.getConstant(Max));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 03006ef3a2d3..661b4fa5bcb7 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -301,7 +301,7 @@ static void createMemMoveLoop(Instruction *InsertBefore,
   // the appropriate conditional branches when the loop is built.
   ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
                                       SrcAddr, DstAddr, "compare_src_dst");
-  TerminatorInst *ThenTerm, *ElseTerm;
+  Instruction *ThenTerm, *ElseTerm;
   SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
                                 &ElseTerm);
 
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index e99ecfef19cd..d019a44fc705 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -372,7 +372,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
     Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
                               Case.getCaseSuccessor()));
 
-  llvm::sort(Cases.begin(), Cases.end(), CaseCmp());
+  llvm::sort(Cases, CaseCmp());
 
   // Merge case into clusters
   if (Cases.size() >= 2) {
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index ba4b7f3cc263..ae5e72ea4d30 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -174,6 +174,49 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
   return std::make_pair(Ctor, InitFunction);
 }
 
+std::pair<Function *, Function *>
+llvm::getOrCreateSanitizerCtorAndInitFunctions(
+    Module &M, StringRef CtorName, StringRef InitName,
+    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+    function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+    StringRef VersionCheckName) {
+  assert(!CtorName.empty() && "Expected ctor function name");
+
+  if (Function *Ctor = M.getFunction(CtorName))
+    // FIXME: Sink this logic into the module, similar to the handling of
+    // globals. This will make moving to a concurrent model much easier.
+    if (Ctor->arg_size() == 0 ||
+        Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
+      return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
+
+  Function *Ctor, *InitFunction;
+  std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
+      M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
+  FunctionsCreatedCallback(Ctor, InitFunction);
+  return std::make_pair(Ctor, InitFunction);
+}
+
+Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
+  assert(!Name.empty() && "Expected init function name");
+  if (Function *F = M.getFunction(Name)) {
+    if (F->arg_size() != 0 ||
+        F->getReturnType() != Type::getVoidTy(M.getContext())) {
+      std::string Err;
+      raw_string_ostream Stream(Err);
+      Stream << "Sanitizer interface function defined with wrong type: " << *F;
+      report_fatal_error(Err);
+    }
+    return F;
+  }
+  Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      Name, AttributeList(), Type::getVoidTy(M.getContext())));
+  F->setLinkage(Function::ExternalLinkage);
+
+  appendToGlobalCtors(M, F, 0);
+
+  return F;
+}
+
 void llvm::filterDeadComdatFunctions(
     Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
   // Build a map from the comdat to the number of entries in that comdat we
diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 2923977b791a..585ce6b4c118 100644
--- a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -35,7 +35,6 @@
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/OrderedInstructions.h"
 #include <algorithm>
 #define DEBUG_TYPE "predicateinfo"
 using namespace llvm;
@@ -523,7 +522,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
     if (isa<PredicateWithEdge>(ValInfo)) {
       IRBuilder<> B(getBranchTerminator(ValInfo));
       Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
-      if (IF->user_begin() == IF->user_end())
+      if (empty(IF->users()))
         CreatedDeclarations.insert(IF);
       CallInst *PIC =
           B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -535,7 +534,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
              "Should not have gotten here without it being an assume");
       IRBuilder<> B(PAssume->AssumeInst);
       Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
-      if (IF->user_begin() == IF->user_end())
+      if (empty(IF->users()))
         CreatedDeclarations.insert(IF);
       CallInst *PIC = B.CreateCall(IF, Op);
       PredicateMap.insert({PIC, ValInfo});
@@ -570,7 +569,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
   auto Comparator = [&](const Value *A, const Value *B) {
     return valueComesBefore(OI, A, B);
   };
-  llvm::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);
+  llvm::sort(OpsToRename, Comparator);
   ValueDFS_Compare Compare(OI);
   // Compute liveness, and rename in O(uses) per Op.
   for (auto *Op : OpsToRename) {
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 86e15bbd7f22..91e4f4254b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -82,8 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
       if (SI->isVolatile())
         return false;
     } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-          II->getIntrinsicID() != Intrinsic::lifetime_end)
+      if (!II->isLifetimeStartOrEnd())
         return false;
     } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
@@ -116,7 +115,7 @@ struct AllocaInfo {
   bool OnlyUsedInOneBlock;
 
   Value *AllocaPointerVal;
-  TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares;
+  TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
 
   void clear() {
     DefiningBlocks.clear();
@@ -263,7 +262,7 @@ struct PromoteMem2Reg {
   /// For each alloca, we keep track of the dbg.declare intrinsic that
   /// describes it, if any, so that we can convert it to a dbg.value
   /// intrinsic if the alloca gets promoted.
-  SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares;
+  SmallVector<TinyPtrVector<DbgVariableIntrinsic *>, 8> AllocaDbgDeclares;
 
   /// The set of basic blocks the renamer has already visited.
   SmallPtrSet<BasicBlock *, 16> Visited;
@@ -426,7 +425,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 
   // Record debuginfo for the store and remove the declaration's
   // debuginfo.
-  for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+  for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
     DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
     ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
     DII->eraseFromParent();
@@ -477,7 +476,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
 
   // Sort the stores by their index, making it efficient to do a lookup with a
   // binary search.
-  llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
+  llvm::sort(StoresByIndex, less_first());
 
   // Walk all of the loads from this alloca, replacing them with the nearest
   // store above them, if any.
@@ -527,7 +526,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   while (!AI->use_empty()) {
     StoreInst *SI = cast<StoreInst>(AI->user_back());
     // Record debuginfo for the store before removing it.
-    for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+    for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
       DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
       ConvertDebugDeclareToDebugValue(DII, SI, DIB);
     }
@@ -539,7 +538,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   LBI.deleteValue(AI);
 
   // The alloca's debuginfo can be removed as well.
-  for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+  for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
     DII->eraseFromParent();
     LBI.deleteValue(DII);
   }
@@ -638,10 +637,9 @@ void PromoteMem2Reg::run() {
     SmallVector<BasicBlock *, 32> PHIBlocks;
     IDF.calculate(PHIBlocks);
     if (PHIBlocks.size() > 1)
-      llvm::sort(PHIBlocks.begin(), PHIBlocks.end(),
-                 [this](BasicBlock *A, BasicBlock *B) {
-                   return BBNumbers.lookup(A) < BBNumbers.lookup(B);
-                 });
+      llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+        return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+      });
 
     unsigned CurrentVersion = 0;
     for (BasicBlock *BB : PHIBlocks)
@@ -752,14 +750,18 @@ void PromoteMem2Reg::run() {
     // Ok, now we know that all of the PHI nodes are missing entries for some
     // basic blocks.  Start by sorting the incoming predecessors for efficient
     // access.
-    llvm::sort(Preds.begin(), Preds.end());
+    auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
+      return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+    };
+    llvm::sort(Preds, CompareBBNumbers);
 
     // Now we loop through all BB's which have entries in SomePHI and remove
     // them from the Preds list.
     for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
       // Do a log(n) search of the Preds list for the entry we want.
       SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
-          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
+          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i),
+          CompareBBNumbers);
       assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
              "PHI node has entry for a block which is not a predecessor!");
 
@@ -932,7 +934,7 @@ NextIteration:
 
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
-        for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
+        for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
           ConvertDebugDeclareToDebugValue(DII, APN, DIB);
 
         // Get the next phi node.
@@ -951,7 +953,7 @@ NextIteration:
   if (!Visited.insert(BB).second)
     return;
 
-  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
+  for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
     Instruction *I = &*II++; // get the instruction, increment iterator
 
     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -992,7 +994,7 @@ NextIteration:
 
       // Record debuginfo for the store before removing it.
       IncomingLocs[AllocaNo] = SI->getDebugLoc();
-      for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
+      for (DbgVariableIntrinsic *DII : AllocaDbgDeclares[ai->second])
         ConvertDebugDeclareToDebugValue(DII, SI, DIB);
       BB->getInstList().erase(SI);
     }
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c87b5c16ffce..03b73954321d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -173,14 +173,15 @@ class SimplifyCFGOpt {
   const DataLayout &DL;
   SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
   const SimplifyCFGOptions &Options;
+  bool Resimplify;
 
-  Value *isValueEqualityComparison(TerminatorInst *TI);
+  Value *isValueEqualityComparison(Instruction *TI);
   BasicBlock *GetValueEqualityComparisonCases(
-      TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
-  bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+      Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
+  bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
                                                      BasicBlock *Pred,
                                                      IRBuilder<> &Builder);
-  bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+  bool FoldValueComparisonIntoPredecessors(Instruction *TI,
                                            IRBuilder<> &Builder);
 
   bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
@@ -194,6 +195,9 @@ class SimplifyCFGOpt {
   bool SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
   bool SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
 
+  bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+                                             IRBuilder<> &Builder);
+
 public:
   SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
                  SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
@@ -201,6 +205,13 @@ public:
       : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
 
   bool run(BasicBlock *BB);
+  bool simplifyOnce(BasicBlock *BB);
+
+  // Helper to set Resimplify and return change indication.
+  bool requestResimplify() {
+    Resimplify = true;
+    return true;
+  }
 };
 
 } // end anonymous namespace
@@ -208,7 +219,7 @@ public:
 /// Return true if it is safe to merge these two
 /// terminator instructions together.
 static bool
-SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2,
+SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
                        SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
   if (SI1 == SI2)
     return false; // Can't merge with self!
@@ -315,7 +326,7 @@ static unsigned ComputeSpeculationCost(const User *I,
 /// V plus its non-dominating operands.  If that cost is greater than
 /// CostRemaining, false is returned and CostRemaining is undefined.
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
-                                SmallPtrSetImpl<Instruction *> *AggressiveInsts,
+                                SmallPtrSetImpl<Instruction *> &AggressiveInsts,
                                 unsigned &CostRemaining,
                                 const TargetTransformInfo &TTI,
                                 unsigned Depth = 0) {
@@ -349,13 +360,8 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
     return true;
 
-  // If we aren't allowing aggressive promotion anymore, then don't consider
-  // instructions in the 'if region'.
-  if (!AggressiveInsts)
-    return false;
-
   // If we have seen this instruction before, don't count it again.
-  if (AggressiveInsts->count(I))
+  if (AggressiveInsts.count(I))
     return true;
 
   // Okay, it looks like the instruction IS in the "condition".  Check to
@@ -373,7 +379,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // is expected to be undone in CodeGenPrepare if the speculation has not
   // enabled further IR optimizations.
   if (Cost > CostRemaining &&
-      (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
+      (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
     return false;
 
   // Avoid unsigned wrap.
@@ -386,7 +392,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                              Depth + 1))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
-  AggressiveInsts->insert(I);
+  AggressiveInsts.insert(I);
   return true;
 }
 
@@ -664,7 +670,7 @@ private:
 
 } // end anonymous namespace
 
-static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+static void EraseTerminatorAndDCECond(Instruction *TI) {
   Instruction *Cond = nullptr;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cond = dyn_cast<Instruction>(SI->getCondition());
@@ -682,12 +688,12 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
 
 /// Return true if the specified terminator checks
 /// to see if a value is equal to constant integer value.
-Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
   Value *CV = nullptr;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     // Do not permit merging of large switch instructions into their
     // predecessors unless there is only one predecessor.
-    if (SI->getNumSuccessors() * pred_size(SI->getParent()) <= 128)
+    if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
       CV = SI->getCondition();
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
     if (BI->isConditional() && BI->getCondition()->hasOneUse())
@@ -710,7 +716,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
 /// Given a value comparison instruction,
 /// decode all of the 'cases' that it represents and return the 'default' block.
 BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
-    TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
+    Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cases.reserve(SI->getNumCases());
     for (auto Case : SI->cases())
@@ -800,7 +806,7 @@ static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
 /// determines the outcome of this comparison. If so, simplify TI. This does a
 /// very limited form of jump threading.
 bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
-    TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
+    Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
   Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
   if (!PredVal)
     return false; // Not a value comparison in predecessor.
@@ -848,7 +854,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
                         << "Through successor TI: " << *TI << "Leaving: " << *NI
                         << "\n");
 
-      EraseTerminatorInstAndDCECond(TI);
+      EraseTerminatorAndDCECond(TI);
       return true;
     }
 
@@ -930,7 +936,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
                     << "Through successor TI: " << *TI << "Leaving: " << *NI
                     << "\n");
 
-  EraseTerminatorInstAndDCECond(TI);
+  EraseTerminatorAndDCECond(TI);
   return true;
 }
 
@@ -965,10 +971,10 @@ static inline bool HasBranchWeights(const Instruction *I) {
   return false;
 }
 
-/// Get Weights of a given TerminatorInst, the default weight is at the front
+/// Get Weights of a given terminator, the default weight is at the front
 /// of the vector. If TI is a conditional eq, we need to swap the branch-weight
 /// metadata.
-static void GetBranchWeights(TerminatorInst *TI,
+static void GetBranchWeights(Instruction *TI,
                              SmallVectorImpl<uint64_t> &Weights) {
   MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
   assert(MD);
@@ -1002,7 +1008,7 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
 /// (either a switch or a branch on "X == c").
 /// See if any of the predecessors of the terminator block are value comparisons
 /// on the same value.  If so, and if safe to do so, fold them together.
-bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
                                                          IRBuilder<> &Builder) {
   BasicBlock *BB = TI->getParent();
   Value *CV = isValueEqualityComparison(TI); // CondVal
@@ -1014,7 +1020,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
     BasicBlock *Pred = Preds.pop_back_val();
 
     // See if the predecessor is a comparison with the same value.
-    TerminatorInst *PTI = Pred->getTerminator();
+    Instruction *PTI = Pred->getTerminator();
     Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
 
     if (PCV == CV && TI != PTI) {
@@ -1191,7 +1197,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
         setBranchWeights(NewSI, MDWeights);
       }
 
-      EraseTerminatorInstAndDCECond(PTI);
+      EraseTerminatorAndDCECond(PTI);
 
       // Okay, last check.  If BB is still a successor of PSI, then we must
       // have an infinite loop case.  If so, add an infinitely looping block
@@ -1270,7 +1276,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
   do {
     // If we are hoisting the terminator instruction, don't move one (making a
     // broken BB), instead clone it, and remove BI.
-    if (isa<TerminatorInst>(I1))
+    if (I1->isTerminator())
       goto HoistTerminator;
 
     // If we're going to hoist a call, make sure that the two instructions we're
@@ -1315,8 +1321,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
                              LLVMContext::MD_align,
                              LLVMContext::MD_dereferenceable,
                              LLVMContext::MD_dereferenceable_or_null,
-                             LLVMContext::MD_mem_parallel_loop_access};
-      combineMetadata(I1, I2, KnownIDs);
+                             LLVMContext::MD_mem_parallel_loop_access,
+                             LLVMContext::MD_access_group};
+      combineMetadata(I1, I2, KnownIDs, true);
 
       // I1 and I2 are being combined into a single instruction.  Its debug
       // location is the merged locations of the original instructions.
@@ -1375,7 +1382,13 @@ HoistTerminator:
     NT->takeName(I1);
   }
 
+  // Ensure terminator gets a debug location, even an unknown one, in case
+  // it involves inlinable calls.
+  NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
+
+  // PHIs created below will adopt NT's merged DebugLoc.
   IRBuilder<NoFolder> Builder(NT);
+
   // Hoisting one of the terminators from our successor is a great thing.
   // Unfortunately, the successors of the if/else blocks may have PHI nodes in
   // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
@@ -1407,7 +1420,7 @@ HoistTerminator:
   for (BasicBlock *Succ : successors(BB1))
     AddPredecessorToBlock(Succ, BIParent, BB1);
 
-  EraseTerminatorInstAndDCECond(BI);
+  EraseTerminatorAndDCECond(BI);
   return true;
 }
 
@@ -1582,7 +1595,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       // However, as N-way merge for CallInst is rare, so we use simplified API
       // instead of using complex API for N-way merge.
       I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
-      combineMetadataForCSE(I0, I);
+      combineMetadataForCSE(I0, I, true);
       I0->andIRFlags(I);
     }
 
@@ -1940,11 +1953,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
   }
   assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
 
-  // Keep a count of how many times instructions are used within CondBB when
-  // they are candidates for sinking into CondBB. Specifically:
+  // Keep a count of how many times instructions are used within ThenBB when
+  // they are candidates for sinking into ThenBB. Specifically:
   // - They are defined in BB, and
   // - They have no side effects, and
-  // - All of their uses are in CondBB.
+  // - All of their uses are in ThenBB.
   SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
 
   SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
@@ -1994,14 +2007,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     }
   }
 
-  // Consider any sink candidates which are only used in CondBB as costs for
+  // Consider any sink candidates which are only used in ThenBB as costs for
   // speculation. Note, while we iterate over a DenseMap here, we are summing
   // and so iteration order isn't significant.
   for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
            I = SinkCandidateUseCounts.begin(),
            E = SinkCandidateUseCounts.end();
        I != E; ++I)
-    if (I->first->getNumUses() == I->second) {
+    if (I->first->hasNUses(I->second)) {
       ++SpeculationCost;
       if (SpeculationCost > 1)
         return false;
@@ -2241,7 +2254,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
 
     // Loop over all of the edges from PredBB to BB, changing them to branch
     // to EdgeBB instead.
-    TerminatorInst *PredBBTI = PredBB->getTerminator();
+    Instruction *PredBBTI = PredBB->getTerminator();
     for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
       if (PredBBTI->getSuccessor(i) == BB) {
         BB->removePredecessor(PredBB);
@@ -2249,7 +2262,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
       }
 
     // Recurse, simplifying any other constants.
-    return FoldCondBranchOnPHI(BI, DL, AC) | true;
+    return FoldCondBranchOnPHI(BI, DL, AC) || true;
   }
 
   return false;
@@ -2304,9 +2317,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
       continue;
     }
 
-    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
+    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
                              MaxCostVal0, TTI) ||
-        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
+        !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
                              MaxCostVal1, TTI))
       return false;
   }
@@ -2336,8 +2349,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
     IfBlock1 = nullptr;
   } else {
     DomBlock = *pred_begin(IfBlock1);
-    for (BasicBlock::iterator I = IfBlock1->begin(); !isa<TerminatorInst>(I);
-         ++I)
+    for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
       if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control flow, so
@@ -2350,8 +2362,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
     IfBlock2 = nullptr;
   } else {
     DomBlock = *pred_begin(IfBlock2);
-    for (BasicBlock::iterator I = IfBlock2->begin(); !isa<TerminatorInst>(I);
-         ++I)
+    for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
       if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control flow, so
@@ -2371,20 +2382,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
 
   // Move all 'aggressive' instructions, which are defined in the
   // conditional parts of the if's up to the dominating block.
-  if (IfBlock1) {
-    for (auto &I : *IfBlock1)
-      I.dropUnknownNonDebugMetadata();
-    DomBlock->getInstList().splice(InsertPt->getIterator(),
-                                   IfBlock1->getInstList(), IfBlock1->begin(),
-                                   IfBlock1->getTerminator()->getIterator());
-  }
-  if (IfBlock2) {
-    for (auto &I : *IfBlock2)
-      I.dropUnknownNonDebugMetadata();
-    DomBlock->getInstList().splice(InsertPt->getIterator(),
-                                   IfBlock2->getInstList(), IfBlock2->begin(),
-                                   IfBlock2->getTerminator()->getIterator());
-  }
+  if (IfBlock1)
+    hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
+  if (IfBlock2)
+    hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
 
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     // Change the PHI node into a select instruction.
@@ -2400,7 +2401,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
   // has been flattened.  Change DomBlock to jump directly to our new block to
   // avoid other simplifycfg's kicking in on the diamond.
-  TerminatorInst *OldTI = DomBlock->getTerminator();
+  Instruction *OldTI = DomBlock->getTerminator();
   Builder.SetInsertPoint(OldTI);
   Builder.CreateBr(BB);
   OldTI->eraseFromParent();
@@ -2434,7 +2435,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
     TrueSucc->removePredecessor(BI->getParent());
     FalseSucc->removePredecessor(BI->getParent());
     Builder.CreateRetVoid();
-    EraseTerminatorInstAndDCECond(BI);
+    EraseTerminatorAndDCECond(BI);
     return true;
   }
 
@@ -2490,7 +2491,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
                     << "\n  " << *BI << "NewRet = " << *RI << "TRUEBLOCK: "
                     << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);
 
-  EraseTerminatorInstAndDCECond(BI);
+  EraseTerminatorAndDCECond(BI);
 
   return true;
 }
@@ -2541,6 +2542,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
 bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   BasicBlock *BB = BI->getParent();
 
+  const unsigned PredCount = pred_size(BB);
+
   Instruction *Cond = nullptr;
   if (BI->isConditional())
     Cond = dyn_cast<Instruction>(BI->getCondition());
@@ -2590,7 +2593,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   // too many instructions and these involved instructions can be executed
   // unconditionally. We denote all involved instructions except the condition
   // as "bonus instructions", and only allow this transformation when the
-  // number of the bonus instructions does not exceed a certain threshold.
+  // number of the bonus instructions we'll need to create when cloning into
+  // each predecessor does not exceed a certain threshold. 
   unsigned NumBonusInsts = 0;
   for (auto I = BB->begin(); Cond != &*I; ++I) {
     // Ignore dbg intrinsics.
@@ -2605,7 +2609,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
     // I is used in the same BB. Since BI uses Cond and doesn't have more slots
     // to use any other instruction, User must be an instruction between next(I)
     // and Cond.
-    ++NumBonusInsts;
+
+    // Account for the cost of duplicating this instruction into each
+    // predecessor. 
+    NumBonusInsts += PredCount;
     // Early exits once we reach the limit.
     if (NumBonusInsts > BonusInstThreshold)
       return false;
@@ -2711,16 +2718,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
 
     // Clone Cond into the predecessor basic block, and or/and the
     // two conditions together.
-    Instruction *New = Cond->clone();
-    RemapInstruction(New, VMap,
+    Instruction *CondInPred = Cond->clone();
+    RemapInstruction(CondInPred, VMap,
                      RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
-    PredBlock->getInstList().insert(PBI->getIterator(), New);
-    New->takeName(Cond);
-    Cond->setName(New->getName() + ".old");
+    PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
+    CondInPred->takeName(Cond);
+    Cond->setName(CondInPred->getName() + ".old");
 
     if (BI->isConditional()) {
       Instruction *NewCond = cast<Instruction>(
-          Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond"));
+          Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
       PBI->setCondition(NewCond);
 
       uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
@@ -2784,7 +2791,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
           Instruction *NotCond = cast<Instruction>(
               Builder.CreateNot(PBI->getCondition(), "not.cond"));
           MergedCond = cast<Instruction>(
-              Builder.CreateBinOp(Instruction::And, NotCond, New, "and.cond"));
+               Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
+                                   "and.cond"));
           if (PBI_C->isOne())
             MergedCond = cast<Instruction>(Builder.CreateBinOp(
                 Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
@@ -2793,7 +2801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
           // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
           //       is false: PBI_Cond and BI_Value
           MergedCond = cast<Instruction>(Builder.CreateBinOp(
-              Instruction::And, PBI->getCondition(), New, "and.cond"));
+              Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
           if (PBI_C->isOne()) {
             Instruction *NotCond = cast<Instruction>(
                 Builder.CreateNot(PBI->getCondition(), "not.cond"));
@@ -2807,7 +2815,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
       }
       // Change PBI from Conditional to Unconditional.
       BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
-      EraseTerminatorInstAndDCECond(PBI);
+      EraseTerminatorAndDCECond(PBI);
       PBI = New_PBI;
     }
 
@@ -2873,7 +2881,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
       if (!AlternativeV)
         break;
 
-      assert(pred_size(Succ) == 2);
+      assert(Succ->hasNPredecessors(2));
       auto PredI = pred_begin(Succ);
       BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
       if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
@@ -2922,7 +2930,7 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
           isa<StoreInst>(I))
         ++N;
       // Free instructions.
-      else if (isa<TerminatorInst>(I) || IsaBitcastOfPointerType(I))
+      else if (I.isTerminator() || IsaBitcastOfPointerType(I))
         continue;
       else
         return false;
@@ -3402,7 +3410,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
 // Takes care of updating the successors and removing the old terminator.
 // Also makes sure not to introduce new successors by assuming that edges to
 // non-successor TrueBBs and FalseBBs aren't reachable.
-static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
                                        BasicBlock *TrueBB, BasicBlock *FalseBB,
                                        uint32_t TrueWeight,
                                        uint32_t FalseWeight) {
@@ -3414,7 +3422,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
   BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
 
   // Then remove the rest.
-  for (BasicBlock *Succ : OldTerm->successors()) {
+  for (BasicBlock *Succ : successors(OldTerm)) {
     // Make sure only to keep exactly one copy of each edge.
     if (Succ == KeepEdge1)
       KeepEdge1 = nullptr;
@@ -3457,7 +3465,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
       Builder.CreateBr(FalseBB);
   }
 
-  EraseTerminatorInstAndDCECond(OldTerm);
+  EraseTerminatorAndDCECond(OldTerm);
   return true;
 }
 
@@ -3534,9 +3542,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 ///
 /// We prefer to split the edge to 'end' so that there is a true/false entry to
 /// the PHI, merging the third icmp into the switch.
-static bool tryToSimplifyUncondBranchWithICmpInIt(
-    ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
-    const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) {
+bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
+    ICmpInst *ICI, IRBuilder<> &Builder) {
   BasicBlock *BB = ICI->getParent();
 
   // If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -3571,7 +3578,7 @@ static bool tryToSimplifyUncondBranchWithICmpInIt(
       ICI->eraseFromParent();
     }
     // BB is now empty, so it is likely to simplify away.
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
   }
 
   // Ok, the block is reachable from the default dest.  If the constant we're
@@ -3587,7 +3594,7 @@ static bool tryToSimplifyUncondBranchWithICmpInIt(
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
   }
 
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -3701,7 +3708,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
     BasicBlock *NewBB =
         BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
     // Remove the uncond branch added to the old block.
-    TerminatorInst *OldTI = BB->getTerminator();
+    Instruction *OldTI = BB->getTerminator();
     Builder.SetInsertPoint(OldTI);
 
     if (TrueWhenEqual)
@@ -3745,7 +3752,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
   }
 
   // Erase the old branch instruction.
-  EraseTerminatorInstAndDCECond(BI);
+  EraseTerminatorAndDCECond(BI);
 
   LLVM_DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
   return true;
@@ -3861,9 +3868,9 @@ bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
   }
 
   // The landingpad is now unreachable.  Zap it.
-  BB->eraseFromParent();
   if (LoopHeaders)
     LoopHeaders->erase(BB);
+  BB->eraseFromParent();
   return true;
 }
 
@@ -3993,7 +4000,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {
     if (UnwindDest == nullptr) {
       removeUnwindEdge(PredBB);
     } else {
-      TerminatorInst *TI = PredBB->getTerminator();
+      Instruction *TI = PredBB->getTerminator();
       TI->replaceUsesOfWith(BB, UnwindDest);
     }
   }
@@ -4062,7 +4069,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
   SmallVector<BranchInst *, 8> CondBranchPreds;
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     BasicBlock *P = *PI;
-    TerminatorInst *PTI = P->getTerminator();
+    Instruction *PTI = P->getTerminator();
     if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
       if (BI->isUnconditional())
         UncondBranchPreds.push_back(P);
@@ -4083,9 +4090,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
     // If we eliminated all predecessors of the block, delete the block now.
     if (pred_empty(BB)) {
       // We know there are no successors, so just nuke the block.
-      BB->eraseFromParent();
       if (LoopHeaders)
         LoopHeaders->erase(BB);
+      BB->eraseFromParent();
     }
 
     return true;
@@ -4167,7 +4174,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
 
   SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
-    TerminatorInst *TI = Preds[i]->getTerminator();
+    Instruction *TI = Preds[i]->getTerminator();
     IRBuilder<> Builder(TI);
     if (auto *BI = dyn_cast<BranchInst>(TI)) {
       if (BI->isUnconditional()) {
@@ -4179,10 +4186,10 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
       } else {
         if (BI->getSuccessor(0) == BB) {
           Builder.CreateBr(BI->getSuccessor(1));
-          EraseTerminatorInstAndDCECond(BI);
+          EraseTerminatorAndDCECond(BI);
         } else if (BI->getSuccessor(1) == BB) {
           Builder.CreateBr(BI->getSuccessor(0));
-          EraseTerminatorInstAndDCECond(BI);
+          EraseTerminatorAndDCECond(BI);
           Changed = true;
         }
       }
@@ -4245,9 +4252,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
   // If this block is now dead, remove it.
   if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
     // We know there are no successors, so just nuke the block.
-    BB->eraseFromParent();
     if (LoopHeaders)
       LoopHeaders->erase(BB);
+    BB->eraseFromParent();
     return true;
   }
 
@@ -4424,7 +4431,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
     SplitBlock(&*NewDefault, &NewDefault->front());
     auto *OldTI = NewDefault->getTerminator();
     new UnreachableInst(SI->getContext(), OldTI);
-    EraseTerminatorInstAndDCECond(OldTI);
+    EraseTerminatorAndDCECond(OldTI);
     return true;
   }
 
@@ -4635,12 +4642,12 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
   SmallDenseMap<Value *, Constant *> ConstantPool;
   ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
   for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
-    if (TerminatorInst *T = dyn_cast<TerminatorInst>(&I)) {
+    if (I.isTerminator()) {
       // If the terminator is a simple branch, continue to the next block.
-      if (T->getNumSuccessors() != 1 || T->isExceptional())
+      if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
         return false;
       Pred = CaseDest;
-      CaseDest = T->getSuccessor(0);
+      CaseDest = I.getSuccessor(0);
     } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
       // Instruction is side-effect free and constant.
 
@@ -5031,6 +5038,9 @@ SwitchLookupTable::SwitchLookupTable(
                              GlobalVariable::PrivateLinkage, Initializer,
                              "switch.table." + FuncName);
   Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  // Set the alignment to that of an array items. We will be only loading one
+  // value out of it.
+  Array->setAlignment(DL.getPrefTypeAlignment(ValueType));
   Kind = ArrayKind;
 }
 
@@ -5257,7 +5267,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
   // Figure out the corresponding result for each case value and phi node in the
   // common destination, as well as the min and max case values.
-  assert(SI->case_begin() != SI->case_end());
+  assert(!empty(SI->cases()));
   SwitchInst::CaseIt CI = SI->case_begin();
   ConstantInt *MinCaseVal = CI->getCaseValue();
   ConstantInt *MaxCaseVal = CI->getCaseValue();
@@ -5509,7 +5519,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   SmallVector<int64_t,4> Values;
   for (auto &C : SI->cases())
     Values.push_back(C.getCaseValue()->getValue().getSExtValue());
-  llvm::sort(Values.begin(), Values.end());
+  llvm::sort(Values);
 
   // If the switch is already dense, there's nothing useful to do here.
   if (isSwitchDense(Values))
@@ -5583,33 +5593,33 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     // see if that predecessor totally determines the outcome of this switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
 
     Value *Cond = SI->getCondition();
     if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
       if (SimplifySwitchOnSelect(SI, Select))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
 
     // If the block only contains the switch, see if we can fold the block
     // away into any preds.
     if (SI == &*BB->instructionsWithoutDebug().begin())
       if (FoldValueComparisonIntoPredecessors(SI, Builder))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
   }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   // Remove unreachable cases.
   if (eliminateDeadSwitchCases(SI, Options.AC, DL))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   if (switchToSelect(SI, Builder, DL, TTI))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   // The conversion from switch to lookup tables results in difficult-to-analyze
   // code and makes pruning branches much harder. This is a problem if the
@@ -5618,10 +5628,10 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   // optimisation pipeline.
   if (Options.ConvertSwitchToLookupTable &&
       SwitchToLookupTable(SI, Builder, DL, TTI))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   if (ReduceSwitchRange(SI, Builder, DL, TTI))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   return false;
 }
@@ -5646,20 +5656,20 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
   if (IBI->getNumDestinations() == 0) {
     // If the indirectbr has no successors, change it to unreachable.
     new UnreachableInst(IBI->getContext(), IBI);
-    EraseTerminatorInstAndDCECond(IBI);
+    EraseTerminatorAndDCECond(IBI);
     return true;
   }
 
   if (IBI->getNumDestinations() == 1) {
     // If the indirectbr has one successor, change it to a direct branch.
     BranchInst::Create(IBI->getDestination(0), IBI);
-    EraseTerminatorInstAndDCECond(IBI);
+    EraseTerminatorAndDCECond(IBI);
     return true;
   }
 
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
-      return simplifyCFG(BB, TTI, Options) | true;
+      return requestResimplify();
   }
   return Changed;
 }
@@ -5755,7 +5765,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
   // backedge, so we can eliminate BB.
   bool NeedCanonicalLoop =
       Options.NeedCanonicalLoop &&
-      (LoopHeaders && pred_size(BB) > 1 &&
+      (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
        (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
   BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
@@ -5769,7 +5779,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
       if (I->isTerminator() &&
-          tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options))
+          tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
         return true;
     }
 
@@ -5787,7 +5797,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
   if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
   return false;
 }
 
@@ -5815,18 +5825,18 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     // switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
 
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
     auto I = BB->instructionsWithoutDebug().begin();
     if (&*I == BI) {
       if (FoldValueComparisonIntoPredecessors(BI, Builder))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
     } else if (&*I == cast<Instruction>(BI->getCondition())) {
       ++I;
       if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
     }
   }
 
@@ -5834,35 +5844,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (SimplifyBranchOnICmpChain(BI, Builder, DL))
     return true;
 
-  // If this basic block has a single dominating predecessor block and the
-  // dominating block's condition implies BI's condition, we know the direction
-  // of the BI branch.
-  if (BasicBlock *Dom = BB->getSinglePredecessor()) {
-    auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
-    if (PBI && PBI->isConditional() &&
-        PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
-      assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
-      bool CondIsTrue = PBI->getSuccessor(0) == BB;
-      Optional<bool> Implication = isImpliedCondition(
-          PBI->getCondition(), BI->getCondition(), DL, CondIsTrue);
-      if (Implication) {
-        // Turn this into a branch on constant.
-        auto *OldCond = BI->getCondition();
-        ConstantInt *CI = *Implication
-                              ? ConstantInt::getTrue(BB->getContext())
-                              : ConstantInt::getFalse(BB->getContext());
-        BI->setCondition(CI);
-        RecursivelyDeleteTriviallyDeadInstructions(OldCond);
-        return simplifyCFG(BB, TTI, Options) | true;
-      }
-    }
+  // If this basic block has dominating predecessor blocks and the dominating
+  // blocks' conditions imply BI's condition, we know the direction of BI.
+  Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
+  if (Imp) {
+    // Turn this into a branch on constant.
+    auto *OldCond = BI->getCondition();
+    ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
+                             : ConstantInt::getFalse(BB->getContext());
+    BI->setCondition(TorF);
+    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+    return requestResimplify();
   }
 
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
   if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
-    return simplifyCFG(BB, TTI, Options) | true;
+    return requestResimplify();
 
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
@@ -5871,24 +5870,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (BI->getSuccessor(0)->getSinglePredecessor()) {
     if (BI->getSuccessor(1)->getSinglePredecessor()) {
       if (HoistThenElseCodeToIf(BI, TTI))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to Successor #1.
-      TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+      Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
       if (Succ0TI->getNumSuccessors() == 1 &&
           Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
         if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
-          return simplifyCFG(BB, TTI, Options) | true;
+          return requestResimplify();
     }
   } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
     // If Successor #0 has multiple preds, we may be able to conditionally
     // execute Successor #1 if it branches to Successor #0.
-    TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+    Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
     if (Succ1TI->getNumSuccessors() == 1 &&
         Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
       if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
   }
 
   // If this is a branch on a phi node in the current block, thread control
@@ -5896,14 +5895,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
       if (FoldCondBranchOnPHI(BI, DL, Options.AC))
-        return simplifyCFG(BB, TTI, Options) | true;
+        return requestResimplify();
 
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
         if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
-          return simplifyCFG(BB, TTI, Options) | true;
+          return requestResimplify();
 
   // Look for diamond patterns.
   if (MergeCondStores)
@@ -5911,7 +5910,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
         if (PBI != BI && PBI->isConditional())
           if (mergeConditionalStores(PBI, BI, DL))
-            return simplifyCFG(BB, TTI, Options) | true;
+            return requestResimplify();
 
   return false;
 }
@@ -5974,7 +5973,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
   for (PHINode &PHI : BB->phis())
     for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
       if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
-        TerminatorInst *T = PHI.getIncomingBlock(i)->getTerminator();
+        Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
         IRBuilder<> Builder(T);
         if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
           BB->removePredecessor(PHI.getIncomingBlock(i));
@@ -5994,7 +5993,7 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
   return false;
 }
 
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
   bool Changed = false;
 
   assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6068,6 +6067,21 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   return Changed;
 }
 
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+  bool Changed = false;
+
+  // Repeated simplify BB as long as resimplification is requested.
+  do {
+    Resimplify = false;
+
+    // Perform one round of simplifcation. Resimplify flag will be set if
+    // another iteration is requested.
+    Changed |= simplifyOnce(BB);
+  } while (Resimplify);
+
+  return Changed;
+}
+
 bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
                        const SimplifyCFGOptions &Options,
                        SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 65b23f4d94a1..7faf291e73d9 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -106,8 +106,9 @@ namespace {
 /// Otherwise return null.
 Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
   Value *IVSrc = nullptr;
-  unsigned OperIdx = 0;
+  const unsigned OperIdx = 0;
   const SCEV *FoldedExpr = nullptr;
+  bool MustDropExactFlag = false;
   switch (UseInst->getOpcode()) {
   default:
     return nullptr;
@@ -140,6 +141,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
                            APInt::getOneBitSet(BitWidth, D->getZExtValue()));
     }
     FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+    // We might have 'exact' flag set at this point which will no longer be
+    // correct after we make the replacement.
+    if (UseInst->isExact() &&
+        SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D)))
+      MustDropExactFlag = true;
   }
   // We have something that might fold it's operand. Compare SCEVs.
   if (!SE->isSCEVable(UseInst->getType()))
@@ -155,6 +161,9 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
   UseInst->setOperand(OperIdx, IVSrc);
   assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
 
+  if (MustDropExactFlag)
+    UseInst->dropPoisonGeneratingFlags();
+
   ++NumElimOperand;
   Changed = true;
   if (IVOperand->use_empty())
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 15e035874002..1bb26caa2af2 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
@@ -22,6 +23,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -150,6 +152,32 @@ static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B,
   return true;
 }
 
+static bool isOnlyUsedInComparisonWithZero(Value *V) {
+  for (User *U : V->users()) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
+      if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+        if (C->isNullValue())
+          continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
+                                 const DataLayout &DL) {
+  if (!isOnlyUsedInComparisonWithZero(CI))
+    return false;
+
+  if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL))
+    return false;
+
+  if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory))
+    return false;
+
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // String and Memory Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -322,6 +350,21 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
                       B, DL, TLI);
   }
 
+  // strcmp to memcmp
+  if (!HasStr1 && HasStr2) {
+    if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+      return emitMemCmp(
+          Str1P, Str2P,
+          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
+          TLI);
+  } else if (HasStr1 && !HasStr2) {
+    if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+      return emitMemCmp(
+          Str1P, Str2P,
+          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
+          TLI);
+  }
+
   return nullptr;
 }
 
@@ -361,6 +404,26 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
   if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
     return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
 
+  uint64_t Len1 = GetStringLength(Str1P);
+  uint64_t Len2 = GetStringLength(Str2P);
+
+  // strncmp to memcmp
+  if (!HasStr1 && HasStr2) {
+    Len2 = std::min(Len2, Length);
+    if (canTransformToMemCmp(CI, Str1P, Len2, DL))
+      return emitMemCmp(
+          Str1P, Str2P,
+          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2), B, DL,
+          TLI);
+  } else if (HasStr1 && !HasStr2) {
+    Len1 = std::min(Len1, Length);
+    if (canTransformToMemCmp(CI, Str2P, Len1, DL))
+      return emitMemCmp(
+          Str1P, Str2P,
+          ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1), B, DL,
+          TLI);
+  }
+
   return nullptr;
 }
 
@@ -735,8 +798,11 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
       Bitfield.setBit((unsigned char)C);
     Value *BitfieldC = B.getInt(Bitfield);
 
-    // First check that the bit field access is within bounds.
+    // Adjust width of "C" to the bitfield width, then mask off the high bits.
     Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+    C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
+
+    // First check that the bit field access is within bounds.
     Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
                                  "memchr.bounds");
 
@@ -860,8 +926,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
 }
 
 /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
-                               const TargetLibraryInfo &TLI) {
+Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
   // This has to be a memset of zeros (bzero).
   auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
   if (!FillValue || FillValue->getZExtValue() != 0)
@@ -881,7 +946,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
     return nullptr;
 
   LibFunc Func;
-  if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+  if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
       Func != LibFunc_malloc)
     return nullptr;
 
@@ -896,18 +961,18 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
   IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
   Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
                              Malloc->getArgOperand(0), Malloc->getAttributes(),
-                             B, TLI);
+                             B, *TLI);
   if (!Calloc)
     return nullptr;
 
   Malloc->replaceAllUsesWith(Calloc);
-  Malloc->eraseFromParent();
+  eraseFromParent(Malloc);
 
   return Calloc;
 }
 
 Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
-  if (auto *Calloc = foldMallocMemset(CI, B, *TLI))
+  if (auto *Calloc = foldMallocMemset(CI, B))
     return Calloc;
 
   // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
@@ -927,6 +992,20 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) {
 // Math Library Optimizations
 //===----------------------------------------------------------------------===//
 
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+  // Propagate fast-math flags from the existing call to the new call.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(CI->getFastMathFlags());
+
+  Module *M = CI->getModule();
+  Value *V = CI->getArgOperand(0);
+  Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+  CallInst *NewCall = B.CreateCall(F, V);
+  NewCall->takeName(CI);
+  return NewCall;
+}
+
 /// Return a variant of Val with float type.
 /// Currently this works in two cases: If Val is an FPExtension of a float
 /// value to something bigger, simply return the operand.
@@ -949,104 +1028,75 @@ static Value *valueHasFloatPrecision(Value *Val) {
   return nullptr;
 }
 
-/// Shrink double -> float for unary functions like 'floor'.
-static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
-                                    bool CheckRetType) {
-  Function *Callee = CI->getCalledFunction();
-  // We know this libcall has a valid prototype, but we don't know which.
+/// Shrink double -> float functions.
+static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
+                               bool isBinary, bool isPrecise = false) {
   if (!CI->getType()->isDoubleTy())
     return nullptr;
 
-  if (CheckRetType) {
-    // Check if all the uses for function like 'sin' are converted to float.
+  // If not all the uses of the function are converted to float, then bail out.
+  // This matters if the precision of the result is more important than the
+  // precision of the arguments.
+  if (isPrecise)
     for (User *U : CI->users()) {
       FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
       if (!Cast || !Cast->getType()->isFloatTy())
         return nullptr;
     }
-  }
 
-  // If this is something like 'floor((double)floatval)', convert to floorf.
-  Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
-  if (V == nullptr)
+  // If this is something like 'g((double) float)', convert to 'gf(float)'.
+  Value *V[2];
+  V[0] = valueHasFloatPrecision(CI->getArgOperand(0));
+  V[1] = isBinary ? valueHasFloatPrecision(CI->getArgOperand(1)) : nullptr;
+  if (!V[0] || (isBinary && !V[1]))
     return nullptr;
 
   // If call isn't an intrinsic, check that it isn't within a function with the
-  // same name as the float version of this call.
+  // same name as the float version of this call, otherwise the result is an
+  // infinite loop.  For example, from MinGW-w64:
   //
-  // e.g. inline float expf(float val) { return (float) exp((double) val); }
-  //
-  // A similar such definition exists in the MinGW-w64 math.h header file which
-  // when compiled with -O2 -ffast-math causes the generation of infinite loops
-  // where expf is called.
-  if (!Callee->isIntrinsic()) {
-    const Function *F = CI->getFunction();
-    StringRef FName = F->getName();
-    StringRef CalleeName = Callee->getName();
-    if ((FName.size() == (CalleeName.size() + 1)) &&
-        (FName.back() == 'f') &&
-        FName.startswith(CalleeName))
+  // float expf(float val) { return (float) exp((double) val); }
+  Function *CalleeFn = CI->getCalledFunction();
+  StringRef CalleeNm = CalleeFn->getName();
+  AttributeList CalleeAt = CalleeFn->getAttributes();
+  if (CalleeFn && !CalleeFn->isIntrinsic()) {
+    const Function *Fn = CI->getFunction();
+    StringRef FnName = Fn->getName();
+    if (FnName.back() == 'f' &&
+        FnName.size() == (CalleeNm.size() + 1) &&
+        FnName.startswith(CalleeNm))
       return nullptr;
   }
 
-  // Propagate fast-math flags from the existing call to the new call.
+  // Propagate the math semantics from the current function to the new function.
   IRBuilder<>::FastMathFlagGuard Guard(B);
   B.setFastMathFlags(CI->getFastMathFlags());
 
-  // floor((double)floatval) -> (double)floorf(floatval)
-  if (Callee->isIntrinsic()) {
+  // g((double) float) -> (double) gf(float)
+  Value *R;
+  if (CalleeFn->isIntrinsic()) {
     Module *M = CI->getModule();
-    Intrinsic::ID IID = Callee->getIntrinsicID();
-    Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
-    V = B.CreateCall(F, V);
-  } else {
-    // The call is a library call rather than an intrinsic.
-    V = emitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+    Intrinsic::ID IID = CalleeFn->getIntrinsicID();
+    Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
+    R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
   }
+  else
+    R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt)
+                 : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt);
 
-  return B.CreateFPExt(V, B.getDoubleTy());
+  return B.CreateFPExt(R, B.getDoubleTy());
 }
 
-// Replace a libcall \p CI with a call to intrinsic \p IID
-static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
-  // Propagate fast-math flags from the existing call to the new call.
-  IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(CI->getFastMathFlags());
-
-  Module *M = CI->getModule();
-  Value *V = CI->getArgOperand(0);
-  Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
-  CallInst *NewCall = B.CreateCall(F, V);
-  NewCall->takeName(CI);
-  return NewCall;
+/// Shrink double -> float for unary functions.
+static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+                                    bool isPrecise = false) {
+  return optimizeDoubleFP(CI, B, false, isPrecise);
 }
 
-/// Shrink double -> float for binary functions like 'fmin/fmax'.
-static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  // We know this libcall has a valid prototype, but we don't know which.
-  if (!CI->getType()->isDoubleTy())
-    return nullptr;
-
-  // If this is something like 'fmin((double)floatval1, (double)floatval2)',
-  // or fmin(1.0, (double)floatval), then we convert it to fminf.
-  Value *V1 = valueHasFloatPrecision(CI->getArgOperand(0));
-  if (V1 == nullptr)
-    return nullptr;
-  Value *V2 = valueHasFloatPrecision(CI->getArgOperand(1));
-  if (V2 == nullptr)
-    return nullptr;
-
-  // Propagate fast-math flags from the existing call to the new call.
-  IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(CI->getFastMathFlags());
-
-  // fmin((double)floatval1, (double)floatval2)
-  //                      -> (double)fminf(floatval1, floatval2)
-  // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
-  Value *V = emitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
-                                   Callee->getAttributes());
-  return B.CreateFPExt(V, B.getDoubleTy());
+/// Shrink double -> float for binary functions.
+static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+                                     bool isPrecise = false) {
+  return optimizeDoubleFP(CI, B, true, isPrecise);
 }
 
 // cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
@@ -1078,20 +1128,39 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
   return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
 }
 
-Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  Value *Ret = nullptr;
-  StringRef Name = Callee->getName();
-  if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
-    Ret = optimizeUnaryDoubleFP(CI, B, true);
-
-  // cos(-x) -> cos(x)
-  Value *Op1 = CI->getArgOperand(0);
-  if (BinaryOperator::isFNeg(Op1)) {
-    BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
-    return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
+                                      IRBuilder<> &B) {
+  if (!isa<FPMathOperator>(Call))
+    return nullptr;
+  
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(Call->getFastMathFlags());
+  
+  // TODO: Can this be shared to also handle LLVM intrinsics?
+  Value *X;
+  switch (Func) {
+  case LibFunc_sin:
+  case LibFunc_sinf:
+  case LibFunc_sinl:
+  case LibFunc_tan:
+  case LibFunc_tanf:
+  case LibFunc_tanl:
+    // sin(-X) --> -sin(X)
+    // tan(-X) --> -tan(X)
+    if (match(Call->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X)))))
+      return B.CreateFNeg(B.CreateCall(Call->getCalledFunction(), X));
+    break;
+  case LibFunc_cos:
+  case LibFunc_cosf:
+  case LibFunc_cosl:
+    // cos(-X) --> cos(X)
+    if (match(Call->getArgOperand(0), m_FNeg(m_Value(X))))
+      return B.CreateCall(Call->getCalledFunction(), X, "cos");
+    break;
+  default:
+    break;
   }
-  return Ret;
+  return nullptr;
 }
 
 static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
@@ -1119,37 +1188,175 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
   return InnerChain[Exp];
 }
 
-/// Use square root in place of pow(x, +/-0.5).
-Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
-  // TODO: There is some subset of 'fast' under which these transforms should
-  // be allowed.
-  if (!Pow->isFast())
-    return nullptr;
-
-  Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
+/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
+Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
+  Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+  Module *Mod = Pow->getModule();
   Type *Ty = Pow->getType();
+  bool Ignored;
 
-  const APFloat *ExpoF;
-  if (!match(Expo, m_APFloat(ExpoF)) ||
-      (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
+  // Evaluate special cases related to a nested function as the base.
+
+  // pow(exp(x), y) -> exp(x * y)
+  // pow(exp2(x), y) -> exp2(x * y)
+  // If exp{,2}() is used only once, it is better to fold two transcendental
+  // math functions into one.  If used again, exp{,2}() would still have to be
+  // called with the original argument, then keep both original transcendental
+  // functions.  However, this transformation is only safe with fully relaxed
+  // math semantics, since, besides rounding differences, it changes overflow
+  // and underflow behavior quite dramatically.  For example:
+  //   pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
+  // Whereas:
+  //   exp(1000 * 0.001) = exp(1)
+  // TODO: Loosen the requirement for fully relaxed math semantics.
+  // TODO: Handle exp10() when more targets have it available.
+  CallInst *BaseFn = dyn_cast<CallInst>(Base);
+  if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
+    LibFunc LibFn;
+
+    Function *CalleeFn = BaseFn->getCalledFunction();
+    if (CalleeFn &&
+        TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
+      StringRef ExpName;
+      Intrinsic::ID ID;
+      Value *ExpFn;
+      LibFunc LibFnFloat;
+      LibFunc LibFnDouble;
+      LibFunc LibFnLongDouble;
+
+      switch (LibFn) {
+      default:
+        return nullptr;
+      case LibFunc_expf:  case LibFunc_exp:  case LibFunc_expl:
+        ExpName = TLI->getName(LibFunc_exp);
+        ID = Intrinsic::exp;
+        LibFnFloat = LibFunc_expf;
+        LibFnDouble = LibFunc_exp;
+        LibFnLongDouble = LibFunc_expl;
+        break;
+      case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l:
+        ExpName = TLI->getName(LibFunc_exp2);
+        ID = Intrinsic::exp2;
+        LibFnFloat = LibFunc_exp2f;
+        LibFnDouble = LibFunc_exp2;
+        LibFnLongDouble = LibFunc_exp2l;
+        break;
+      }
+
+      // Create new exp{,2}() with the product as its argument.
+      Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+      ExpFn = BaseFn->doesNotAccessMemory()
+              ? B.CreateCall(Intrinsic::getDeclaration(Mod, ID, Ty),
+                             FMul, ExpName)
+              : emitUnaryFloatFnCall(FMul, TLI, LibFnDouble, LibFnFloat,
+                                     LibFnLongDouble, B,
+                                     BaseFn->getAttributes());
+
+      // Since the new exp{,2}() is different from the original one, dead code
+      // elimination cannot be trusted to remove it, since it may have side
+      // effects (e.g., errno).  When the only consumer for the original
+      // exp{,2}() is pow(), then it has to be explicitly erased.
+      BaseFn->replaceAllUsesWith(ExpFn);
+      eraseFromParent(BaseFn);
+
+      return ExpFn;
+    }
+  }
+
+  // Evaluate special cases related to a constant base.
+
+  const APFloat *BaseF;
+  if (!match(Pow->getArgOperand(0), m_APFloat(BaseF)))
     return nullptr;
 
+  // pow(2.0 ** n, x) -> exp2(n * x)
+  if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+    APFloat BaseR = APFloat(1.0);
+    BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
+    BaseR = BaseR / *BaseF;
+    bool IsInteger    = BaseF->isInteger(),
+         IsReciprocal = BaseR.isInteger();
+    const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
+    APSInt NI(64, false);
+    if ((IsInteger || IsReciprocal) &&
+        !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+        NI > 1 && NI.isPowerOf2()) {
+      double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
+      Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
+      if (Pow->doesNotAccessMemory())
+        return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
+                            FMul, "exp2");
+      else
+        return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+                                    LibFunc_exp2l, B, Attrs);
+    }
+  }
+
+  // pow(10.0, x) -> exp10(x)
+  // TODO: There is no exp10() intrinsic yet, but some day there shall be one.
+  if (match(Base, m_SpecificFP(10.0)) &&
+      hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+    return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
+                                LibFunc_exp10l, B, Attrs);
+
+  return nullptr;
+}
+
+static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
+                          Module *M, IRBuilder<> &B,
+                          const TargetLibraryInfo *TLI) {
   // If errno is never set, then use the intrinsic for sqrt().
-  if (Pow->hasFnAttr(Attribute::ReadNone)) {
-    Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
-                                                 Intrinsic::sqrt, Ty);
-    Sqrt = B.CreateCall(SqrtFn, Base);
+  if (NoErrno) {
+    Function *SqrtFn =
+        Intrinsic::getDeclaration(M, Intrinsic::sqrt, V->getType());
+    return B.CreateCall(SqrtFn, V, "sqrt");
   }
+
   // Otherwise, use the libcall for sqrt().
-  else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+  if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+                      LibFunc_sqrtl))
     // TODO: We also should check that the target can in fact lower the sqrt()
     // libcall. We currently have no way to ask this question, so we ask if
     // the target has a sqrt() libcall, which is not exactly the same.
-    Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
-                                Pow->getCalledFunction()->getAttributes());
-  else
+    return emitUnaryFloatFnCall(V, TLI, LibFunc_sqrt, LibFunc_sqrtf,
+                                LibFunc_sqrtl, B, Attrs);
+
+  return nullptr;
+}
+
+/// Use square root in place of pow(x, +/-0.5).
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
+  Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+  Module *Mod = Pow->getModule();
+  Type *Ty = Pow->getType();
+
+  const APFloat *ExpoF;
+  if (!match(Expo, m_APFloat(ExpoF)) ||
+      (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
     return nullptr;
 
+  Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
+  if (!Sqrt)
+    return nullptr;
+
+  // Handle signed zero base by expanding to fabs(sqrt(x)).
+  if (!Pow->hasNoSignedZeros()) {
+    Function *FAbsFn = Intrinsic::getDeclaration(Mod, Intrinsic::fabs, Ty);
+    Sqrt = B.CreateCall(FAbsFn, Sqrt, "abs");
+  }
+
+  // Handle non finite base by expanding to
+  // (x == -infinity ? +infinity : sqrt(x)).
+  if (!Pow->hasNoInfs()) {
+    Value *PosInf = ConstantFP::getInfinity(Ty),
+          *NegInf = ConstantFP::getInfinity(Ty, true);
+    Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+    Sqrt = B.CreateSelect(FCmp, PosInf, Sqrt);
+  }
+
   // If the exponent is negative, then get the reciprocal.
   if (ExpoF->isNegative())
     Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
@@ -1160,134 +1367,109 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
   Function *Callee = Pow->getCalledFunction();
-  AttributeList Attrs = Callee->getAttributes();
   StringRef Name = Callee->getName();
-  Module *Module = Pow->getModule();
   Type *Ty = Pow->getType();
   Value *Shrunk = nullptr;
   bool Ignored;
 
-  if (UnsafeFPShrink &&
-      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
-    Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+  // Bail out if simplifying libcalls to pow() is disabled.
+  if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl))
+    return nullptr;
 
   // Propagate the math semantics from the call to any created instructions.
   IRBuilder<>::FastMathFlagGuard Guard(B);
   B.setFastMathFlags(Pow->getFastMathFlags());
 
+  // Shrink pow() to powf() if the arguments are single precision,
+  // unless the result is expected to be double precision.
+  if (UnsafeFPShrink &&
+      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+    Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
+
   // Evaluate special cases related to the base.
 
   // pow(1.0, x) -> 1.0
-  if (match(Base, m_SpecificFP(1.0)))
+  if (match(Base, m_FPOne()))
     return Base;
 
-  // pow(2.0, x) -> exp2(x)
-  if (match(Base, m_SpecificFP(2.0))) {
-    Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
-    return B.CreateCall(Exp2, Expo, "exp2");
-  }
-
-  // pow(10.0, x) -> exp10(x)
-  if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
-    // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
-    if (BaseC->isExactlyValue(10.0) &&
-        hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
-      return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
-
-  // pow(exp(x), y) -> exp(x * y)
-  // pow(exp2(x), y) -> exp2(x * y)
-  // We enable these only with fast-math. Besides rounding differences, the
-  // transformation changes overflow and underflow behavior quite dramatically.
-  // Example: x = 1000, y = 0.001.
-  // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
-  auto *BaseFn = dyn_cast<CallInst>(Base);
-  if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
-    LibFunc LibFn;
-    Function *CalleeFn = BaseFn->getCalledFunction();
-    if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
-        (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
-      IRBuilder<>::FastMathFlagGuard Guard(B);
-      B.setFastMathFlags(Pow->getFastMathFlags());
-
-      Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
-      return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
-                                  CalleeFn->getAttributes());
-    }
-  }
+  if (Value *Exp = replacePowWithExp(Pow, B))
+    return Exp;
 
   // Evaluate special cases related to the exponent.
 
-  if (Value *Sqrt = replacePowWithSqrt(Pow, B))
-    return Sqrt;
-
-  ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
-  if (!ExpoC)
-    return Shrunk;
-
   // pow(x, -1.0) -> 1.0 / x
-  if (ExpoC->isExactlyValue(-1.0))
+  if (match(Expo, m_SpecificFP(-1.0)))
     return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
 
   // pow(x, 0.0) -> 1.0
-  if (ExpoC->getValueAPF().isZero())
-    return ConstantFP::get(Ty, 1.0);
+  if (match(Expo, m_SpecificFP(0.0)))
+      return ConstantFP::get(Ty, 1.0);
 
   // pow(x, 1.0) -> x
-  if (ExpoC->isExactlyValue(1.0))
+  if (match(Expo, m_FPOne()))
     return Base;
 
   // pow(x, 2.0) -> x * x
-  if (ExpoC->isExactlyValue(2.0))
+  if (match(Expo, m_SpecificFP(2.0)))
     return B.CreateFMul(Base, Base, "square");
 
-  // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
-  if (ExpoC->isExactlyValue(0.5) &&
-      hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
-    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
-    // This is faster than calling pow(), and still handles -0.0 and
-    // negative infinity correctly.
-    // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
-    Value *PosInf = ConstantFP::getInfinity(Ty);
-    Value *NegInf = ConstantFP::getInfinity(Ty, true);
-
-    // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
-    // an intrinsic, to match errno semantics.
-    Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
-                                       B, Attrs);
-    Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
-    Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
-    Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
-    Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+  if (Value *Sqrt = replacePowWithSqrt(Pow, B))
     return Sqrt;
-  }
 
-  // pow(x, n) -> x * x * x * ....
-  if (Pow->isFast()) {
-    APFloat ExpoA = abs(ExpoC->getValueAPF());
-    // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
-    // This transformation applies to integer exponents only.
-    if (!ExpoA.isInteger() ||
-        ExpoA.compare
-            (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
-      return nullptr;
+  // pow(x, n) -> x * x * x * ...
+  const APFloat *ExpoF;
+  if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+    // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
+    // If the exponent is an integer+0.5 we generate a call to sqrt and an
+    // additional fmul.
+    // TODO: This whole transformation should be backend specific (e.g. some
+    //       backends might prefer libcalls or the limit for the exponent might
+    //       be different) and it should also consider optimizing for size.
+    APFloat LimF(ExpoF->getSemantics(), 33.0),
+            ExpoA(abs(*ExpoF));
+    if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
+      // This transformation applies to integer or integer+0.5 exponents only.
+      // For integer+0.5, we create a sqrt(Base) call.
+      Value *Sqrt = nullptr;
+      if (!ExpoA.isInteger()) {
+        APFloat Expo2 = ExpoA;
+        // To check if ExpoA is an integer + 0.5, we add it to itself. If there
+        // is no floating point exception and the result is an integer, then
+        // ExpoA == integer + 0.5
+        if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
+          return nullptr;
+
+        if (!Expo2.isInteger())
+          return nullptr;
+
+        Sqrt =
+            getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+                        Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+      }
 
-    // We will memoize intermediate products of the Addition Chain.
-    Value *InnerChain[33] = {nullptr};
-    InnerChain[1] = Base;
-    InnerChain[2] = B.CreateFMul(Base, Base, "square");
+      // We will memoize intermediate products of the Addition Chain.
+      Value *InnerChain[33] = {nullptr};
+      InnerChain[1] = Base;
+      InnerChain[2] = B.CreateFMul(Base, Base, "square");
 
-    // We cannot readily convert a non-double type (like float) to a double.
-    // So we first convert it to something which could be converted to double.
-    ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
-    Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+      // We cannot readily convert a non-double type (like float) to a double.
+      // So we first convert it to something which could be converted to double.
+      ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+      Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
 
-    // If the exponent is negative, then get the reciprocal.
-    if (ExpoC->isNegative())
-      FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
-    return FMul;
+      // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
+      if (Sqrt)
+        FMul = B.CreateFMul(FMul, Sqrt);
+
+      // If the exponent is negative, then get the reciprocal.
+      if (ExpoF->isNegative())
+        FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
+
+      return FMul;
+    }
   }
 
-  return nullptr;
+  return Shrunk;
 }
 
 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
@@ -2285,11 +2467,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
   if (CI->isStrictFP())
     return nullptr;
 
+  if (Value *V = optimizeTrigReflections(CI, Func, Builder))
+    return V;
+
   switch (Func) {
-  case LibFunc_cosf:
-  case LibFunc_cos:
-  case LibFunc_cosl:
-    return optimizeCos(CI, Builder);
   case LibFunc_sinpif:
   case LibFunc_sinpi:
   case LibFunc_cospif:
@@ -2344,6 +2525,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
   case LibFunc_exp:
   case LibFunc_exp10:
   case LibFunc_expm1:
+  case LibFunc_cos:
   case LibFunc_sin:
   case LibFunc_sinh:
   case LibFunc_tanh:
@@ -2425,7 +2607,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
       if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
         // If we were able to further simplify, remove the now redundant call.
         SimplifiedCI->replaceAllUsesWith(V);
-        SimplifiedCI->eraseFromParent();
+        eraseFromParent(SimplifiedCI);
         return V;
       }
     }
@@ -2504,15 +2686,20 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
 LibCallSimplifier::LibCallSimplifier(
     const DataLayout &DL, const TargetLibraryInfo *TLI,
     OptimizationRemarkEmitter &ORE,
-    function_ref<void(Instruction *, Value *)> Replacer)
+    function_ref<void(Instruction *, Value *)> Replacer,
+    function_ref<void(Instruction *)> Eraser)
     : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
-      UnsafeFPShrink(false), Replacer(Replacer) {}
+      UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
 
 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
   // Indirect through the replacer used in this instance.
   Replacer(I, With);
 }
 
+void LibCallSimplifier::eraseFromParent(Instruction *I) {
+  Eraser(I);
+}
+
 // TODO:
 //   Additional cases that we need to add to this file:
 //
diff --git a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
index f8d758c54983..5db4d2e4df9d 100644
--- a/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -181,14 +181,12 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
           std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
                                        GVtoClusterMap.member_end()), I));
 
-  llvm::sort(Sets.begin(), Sets.end(),
-             [](const SortType &a, const SortType &b) {
-               if (a.first == b.first)
-                 return a.second->getData()->getName() >
-                        b.second->getData()->getName();
-               else
-                 return a.first > b.first;
-             });
+  llvm::sort(Sets, [](const SortType &a, const SortType &b) {
+    if (a.first == b.first)
+      return a.second->getData()->getName() > b.second->getData()->getName();
+    else
+      return a.first > b.first;
+  });
 
   for (auto &I : Sets) {
     unsigned CurrentClusterID = BalancinQueue.top().first;
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index afd842f59911..95416de07439 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -26,6 +26,7 @@ using namespace llvm;
 void llvm::initializeTransformUtils(PassRegistry &Registry) {
   initializeAddDiscriminatorsLegacyPassPass(Registry);
   initializeBreakCriticalEdgesPass(Registry);
+  initializeCanonicalizeAliasesLegacyPassPass(Registry);
   initializeInstNamerPass(Registry);
   initializeLCSSAWrapperPassPass(Registry);
   initializeLibCallsShrinkWrapLegacyPassPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 5f3d127202ad..9ff18328c219 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -79,6 +79,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdlib>
@@ -205,12 +206,12 @@ private:
                           unsigned Alignment);
 };
 
-class LoadStoreVectorizer : public FunctionPass {
+class LoadStoreVectorizerLegacyPass : public FunctionPass {
 public:
   static char ID;
 
-  LoadStoreVectorizer() : FunctionPass(ID) {
-    initializeLoadStoreVectorizerPass(*PassRegistry::getPassRegistry());
+  LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
+    initializeLoadStoreVectorizerLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override;
@@ -230,30 +231,23 @@ public:
 
 } // end anonymous namespace
 
-char LoadStoreVectorizer::ID = 0;
+char LoadStoreVectorizerLegacyPass::ID = 0;
 
-INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
                       "Vectorize load and Store instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
+INITIALIZE_PASS_END(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
                     "Vectorize load and store instructions", false, false)
 
 Pass *llvm::createLoadStoreVectorizerPass() {
-  return new LoadStoreVectorizer();
+  return new LoadStoreVectorizerLegacyPass();
 }
 
-// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
-// vectors of Instructions.
-static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
-  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
-  propagateMetadata(I, VL);
-}
-
-bool LoadStoreVectorizer::runOnFunction(Function &F) {
+bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {
   // Don't vectorize when the attribute NoImplicitFloat is used.
   if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
     return false;
@@ -268,6 +262,30 @@ bool LoadStoreVectorizer::runOnFunction(Function &F) {
   return V.run();
 }
 
+PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
+  // Don't vectorize when the attribute NoImplicitFloat is used.
+  if (F.hasFnAttribute(Attribute::NoImplicitFloat))
+    return PreservedAnalyses::all();
+
+  AliasAnalysis &AA = AM.getResult<AAManager>(F);
+  DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+  TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+
+  Vectorizer V(F, AA, DT, SE, TTI);
+  bool Changed = V.run();
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return Changed ? PA : PreservedAnalyses::all();
+}
+
+// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
+// vectors of Instructions.
+static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
+  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
+  propagateMetadata(I, VL);
+}
+
 // Vectorizer Implementation
 bool Vectorizer::run() {
   bool Changed = false;
@@ -954,11 +972,6 @@ bool Vectorizer::vectorizeStoreChain(
   // try again.
   unsigned EltSzInBytes = Sz / 8;
   unsigned SzInBytes = EltSzInBytes * ChainSize;
-  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
-    auto Chains = splitOddVectorElts(Chain, Sz);
-    return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
-           vectorizeStoreChain(Chains.second, InstructionsProcessed);
-  }
 
   VectorType *VecTy;
   VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
@@ -991,14 +1004,23 @@ bool Vectorizer::vectorizeStoreChain(
 
   // If the store is going to be misaligned, don't vectorize it.
   if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
-    if (S0->getPointerAddressSpace() != 0)
-      return false;
+    if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
+      auto Chains = splitOddVectorElts(Chain, Sz);
+      return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+             vectorizeStoreChain(Chains.second, InstructionsProcessed);
+    }
 
     unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
                                                    StackAdjustedAlignment,
                                                    DL, S0, nullptr, &DT);
-    if (NewAlign < StackAdjustedAlignment)
-      return false;
+    if (NewAlign != 0)
+      Alignment = NewAlign;
+  }
+
+  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
+    auto Chains = splitOddVectorElts(Chain, Sz);
+    return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+           vectorizeStoreChain(Chains.second, InstructionsProcessed);
   }
 
   BasicBlock::iterator First, Last;
@@ -1037,13 +1059,11 @@ bool Vectorizer::vectorizeStoreChain(
     }
   }
 
-  // This cast is safe because Builder.CreateStore() always creates a bona fide
-  // StoreInst.
-  StoreInst *SI = cast<StoreInst>(
-      Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
-                                                     VecTy->getPointerTo(AS))));
+  StoreInst *SI = Builder.CreateAlignedStore(
+    Vec,
+    Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)),
+    Alignment);
   propagateMetadata(SI, Chain);
-  SI->setAlignment(Alignment);
 
   eraseInstructions(Chain);
   ++NumVectorInstructions;
@@ -1102,12 +1122,6 @@ bool Vectorizer::vectorizeLoadChain(
   // try again.
   unsigned EltSzInBytes = Sz / 8;
   unsigned SzInBytes = EltSzInBytes * ChainSize;
-  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
-    auto Chains = splitOddVectorElts(Chain, Sz);
-    return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
-           vectorizeLoadChain(Chains.second, InstructionsProcessed);
-  }
-
   VectorType *VecTy;
   VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
   if (VecLoadTy)
@@ -1132,18 +1146,27 @@ bool Vectorizer::vectorizeLoadChain(
 
   // If the load is going to be misaligned, don't vectorize it.
   if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
-    if (L0->getPointerAddressSpace() != 0)
-      return false;
+    if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
+      auto Chains = splitOddVectorElts(Chain, Sz);
+      return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+             vectorizeLoadChain(Chains.second, InstructionsProcessed);
+    }
 
     unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
                                                    StackAdjustedAlignment,
                                                    DL, L0, nullptr, &DT);
-    if (NewAlign < StackAdjustedAlignment)
-      return false;
+    if (NewAlign != 0)
+      Alignment = NewAlign;
 
     Alignment = NewAlign;
   }
 
+  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
+    auto Chains = splitOddVectorElts(Chain, Sz);
+    return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+           vectorizeLoadChain(Chains.second, InstructionsProcessed);
+  }
+
   LLVM_DEBUG({
     dbgs() << "LSV: Loads to vectorize:\n";
     for (Instruction *I : Chain)
@@ -1159,11 +1182,8 @@ bool Vectorizer::vectorizeLoadChain(
 
   Value *Bitcast =
       Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
-  // This cast is safe because Builder.CreateLoad always creates a bona fide
-  // LoadInst.
-  LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
+  LoadInst *LI = Builder.CreateAlignedLoad(Bitcast, Alignment);
   propagateMetadata(LI, Chain);
-  LI->setAlignment(Alignment);
 
   if (VecLoadTy) {
     SmallVector<Instruction *, 16> InstrsToErase;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 697bc1b448d7..b44fe5a52a2f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -80,10 +80,11 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
   return false;
 }
 
-LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
+                                       bool InterleaveOnlyWhenForced,
                                        OptimizationRemarkEmitter &ORE)
     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
-      Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
   // Populate values with existing loop metadata.
@@ -98,19 +99,19 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
     // consider the loop to have been already vectorized because there's
     // nothing more that we can do.
     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
-  LLVM_DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
              << "LV: Interleaving disabled by the pass manager\n");
 }
 
-bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L,
-                                            bool AlwaysVectorize) const {
+bool LoopVectorizeHints::allowVectorization(
+    Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
   if (getForce() == LoopVectorizeHints::FK_Disabled) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
     emitRemarkWithHints();
     return false;
   }
 
-  if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
     emitRemarkWithHints();
     return false;
@@ -434,7 +435,7 @@ static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
 /// identified reduction variable.
 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
                                SmallPtrSetImpl<Value *> &AllowedExit) {
-  // Reduction and Induction instructions are allowed to have exit users. All
+  // Reductions, Inductions and non-header phis are allowed to have exit users. All
   // other instructions must not have external users.
   if (!AllowedExit.count(Inst))
     // Check that all of the users of the loop are inside the BB.
@@ -516,6 +517,18 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
       return false;
   }
 
+  // Check whether we are able to set up outer loop induction.
+  if (!setupOuterLoopInductions()) {
+    LLVM_DEBUG(
+        dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
+    ORE->emit(createMissedAnalysis("UnsupportedPhi")
+              << "Unsupported outer loop Phi(s)");
+    if (DoExtraAnalysis)
+      Result = false;
+    else
+      return false;
+  }
+
   return Result;
 }
 
@@ -561,7 +574,8 @@ void LoopVectorizationLegality::addInductionPhi(
   // back into the PHI node may have external users.
   // We can allow those uses, except if the SCEVs we have for them rely
   // on predicates that only hold within the loop, since allowing the exit
-  // currently means re-using this SCEV outside the loop.
+  // currently means re-using this SCEV outside the loop (see PR33706 for more
+  // details).
   if (PSE.getUnionPredicate().isAlwaysTrue()) {
     AllowedExit.insert(Phi);
     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
@@ -570,6 +584,32 @@ void LoopVectorizationLegality::addInductionPhi(
   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
 }
 
+bool LoopVectorizationLegality::setupOuterLoopInductions() {
+  BasicBlock *Header = TheLoop->getHeader();
+
+  // Returns true if a given Phi is a supported induction.
+  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
+    InductionDescriptor ID;
+    if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
+        ID.getKind() == InductionDescriptor::IK_IntInduction) {
+      addInductionPhi(&Phi, ID, AllowedExit);
+      return true;
+    } else {
+      // Bail out for any Phi in the outer loop header that is not a supported
+      // induction.
+      LLVM_DEBUG(
+          dbgs()
+          << "LV: Found unsupported PHI for outer loop vectorization.\n");
+      return false;
+    }
+  };
+
+  if (llvm::all_of(Header->phis(), isSupportedPhi))
+    return true;
+  else
+    return false;
+}
+
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
 
@@ -597,14 +637,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // can convert it to select during if-conversion. No need to check if
         // the PHIs in this block are induction or reduction variables.
         if (BB != Header) {
-          // Check that this instruction has no outside users or is an
-          // identified reduction value with an outside user.
-          if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
-            continue;
-          ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
-                    << "value could not be identified as "
-                       "an induction or reduction variable");
-          return false;
+          // Non-header phi nodes that have outside uses can be vectorized. Add
+          // them to the list of allowed exits.
+          // Unsafe cyclic dependencies with header phis are identified during
+          // legalization for reduction, induction and first order
+          // recurrences.
+          continue;
         }
 
         // We only allow if-converted PHIs with exactly two incoming values.
@@ -625,6 +663,20 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           continue;
         }
 
+        // TODO: Instead of recording the AllowedExit, it would be good to record the
+        // complementary set: NotAllowedExit. These include (but may not be
+        // limited to):
+        // 1. Reduction phis as they represent the one-before-last value, which
+        // is not available when vectorized 
+        // 2. Induction phis and increment when SCEV predicates cannot be used
+        // outside the loop - see addInductionPhi
+        // 3. Non-Phis with outside uses when SCEV predicates cannot be used
+        // outside the loop - see call to hasOutsideLoopUser in the non-phi
+        // handling below
+        // 4. FirstOrderRecurrence phis that can possibly be handled by
+        // extraction.
+        // By recording these, we can then reason about ways to vectorize each
+        // of these NotAllowedExit. 
         InductionDescriptor ID;
         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
           addInductionPhi(Phi, ID, AllowedExit);
@@ -662,10 +714,30 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           !isa<DbgInfoIntrinsic>(CI) &&
           !(CI->getCalledFunction() && TLI &&
             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
-        ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
-                  << "call instruction cannot be vectorized");
+        // If the call is a recognized math libary call, it is likely that
+        // we can vectorize it given loosened floating-point constraints.
+        LibFunc Func;
+        bool IsMathLibCall =
+            TLI && CI->getCalledFunction() &&
+            CI->getType()->isFloatingPointTy() &&
+            TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+            TLI->hasOptimizedCodeGen(Func);
+
+        if (IsMathLibCall) {
+          // TODO: Ideally, we should not use clang-specific language here,
+          // but it's hard to provide meaningful yet generic advice.
+          // Also, should this be guarded by allowExtraAnalysis() and/or be part
+          // of the returned info from isFunctionVectorizable()?
+          ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
+              << "library call cannot be vectorized. "
+                 "Try compiling with -fno-math-errno, -ffast-math, "
+                 "or similar flags");
+        } else {
+          ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
+                    << "call instruction cannot be vectorized");
+        }
         LLVM_DEBUG(
-            dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
+            dbgs() << "LV: Found a non-intrinsic callsite.\n");
         return false;
       }
 
@@ -717,6 +789,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
+        // We can safely vectorize loops where instructions within the loop are
+        // used outside the loop only if the SCEV predicates within the loop is
+        // same as outside the loop. Allowing the exit means reusing the SCEV
+        // outside the loop.
+        if (PSE.getUnionPredicate().isAlwaysTrue()) {
+          AllowedExit.insert(&I);
+          continue;
+        }
         ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
                   << "value cannot be used outside the loop");
         return false;
@@ -730,6 +810,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       ORE->emit(createMissedAnalysis("NoInductionVariable")
                 << "loop induction variable could not be identified");
       return false;
+    } else if (!WidestIndTy) {
+      ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
+                << "integer loop induction variable could not be identified");
+      return false;
     }
   }
 
@@ -754,13 +838,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   if (!LAI->canVectorizeMemory())
     return false;
 
-  if (LAI->hasStoreToLoopInvariantAddress()) {
+  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
     ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
-              << "write to a loop invariant address could not be vectorized");
-    LLVM_DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+              << "write to a loop invariant address could not "
+                 "be vectorized");
+    LLVM_DEBUG(
+        dbgs() << "LV: Non vectorizable stores to a uniform address\n");
     return false;
   }
-
   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
 
@@ -1069,4 +1154,59 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   return Result;
 }
 
+bool LoopVectorizationLegality::canFoldTailByMasking() {
+
+  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
+
+  if (!PrimaryInduction) {
+    ORE->emit(createMissedAnalysis("NoPrimaryInduction")
+              << "Missing a primary induction variable in the loop, which is "
+              << "needed in order to fold tail by masking as required.");
+    LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
+                      << "masking.\n");
+    return false;
+  }
+
+  // TODO: handle reductions when tail is folded by masking.
+  if (!Reductions.empty()) {
+    ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
+              << "Cannot fold tail by masking in the presence of reductions.");
+    LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
+                      << "masking.\n");
+    return false;
+  }
+
+  // TODO: handle outside users when tail is folded by masking.
+  for (auto *AE : AllowedExit) {
+    // Check that all users of allowed exit values are inside the loop.
+    for (User *U : AE->users()) {
+      Instruction *UI = cast<Instruction>(U);
+      if (TheLoop->contains(UI))
+        continue;
+      ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
+                << "Cannot fold tail by masking in the presence of live outs.");
+      LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
+                        << "outside user for : " << *UI << '\n');
+      return false;
+    }
+  }
+
+  // The list of pointers that we can safely read and write to remains empty.
+  SmallPtrSet<Value *, 8> SafePointers;
+
+  // Check and mark all blocks for predication, including those that ordinarily
+  // do not need predication such as the header block.
+  for (BasicBlock *BB : TheLoop->blocks()) {
+    if (!blockCanBePredicated(BB, SafePointers)) {
+      ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+                << "control flow cannot be substituted for a select");
+      LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+      return false;
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
+  return true;
+}
+
 } // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1c7d0a63a5ca..c45dee590b84 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,6 +58,7 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPRecipeBuilder.h"
 #include "VPlanHCFGBuilder.h"
+#include "VPlanHCFGTransforms.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -151,6 +152,16 @@ using namespace llvm;
 #define LV_NAME "loop-vectorize"
 #define DEBUG_TYPE LV_NAME
 
+/// @{
+/// Metadata attribute names
+static const char *const LLVMLoopVectorizeFollowupAll =
+    "llvm.loop.vectorize.followup_all";
+static const char *const LLVMLoopVectorizeFollowupVectorized =
+    "llvm.loop.vectorize.followup_vectorized";
+static const char *const LLVMLoopVectorizeFollowupEpilogue =
+    "llvm.loop.vectorize.followup_epilogue";
+/// @}
+
 STATISTIC(LoopsVectorized, "Number of loops vectorized");
 STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
 
@@ -171,11 +182,11 @@ static cl::opt<bool> EnableInterleavedMemAccesses(
     "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
     cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
 
-/// Maximum factor for an interleaved memory access.
-static cl::opt<unsigned> MaxInterleaveGroupFactor(
-    "max-interleave-group-factor", cl::Hidden,
-    cl::desc("Maximum factor for an interleaved access group (default = 8)"),
-    cl::init(8));
+/// An interleave-group may need masking if it resides in a block that needs
+/// predication, or in order to mask away gaps. 
+static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
+    "enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
+    cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
 
 /// We don't interleave loops with a known constant trip count below this
 /// number.
@@ -240,7 +251,7 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
     cl::desc("The maximum interleave count to use when interleaving a scalar "
              "reduction in a nested loop."));
 
-static cl::opt<bool> EnableVPlanNativePath(
+cl::opt<bool> EnableVPlanNativePath(
     "enable-vplan-native-path", cl::init(false), cl::Hidden,
     cl::desc("Enable VPlan-native vectorization path with "
              "support for outer loop vectorization."));
@@ -265,10 +276,6 @@ static Type *ToVectorTy(Type *Scalar, unsigned VF) {
   return VectorType::get(Scalar, VF);
 }
 
-// FIXME: The following helper functions have multiple implementations
-// in the project. They can be effectively organized in a common Load/Store
-// utilities unit.
-
 /// A helper function that returns the type of loaded or stored value.
 static Type *getMemInstValueType(Value *I) {
   assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -278,25 +285,6 @@ static Type *getMemInstValueType(Value *I) {
   return cast<StoreInst>(I)->getValueOperand()->getType();
 }
 
-/// A helper function that returns the alignment of load or store instruction.
-static unsigned getMemInstAlignment(Value *I) {
-  assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
-         "Expected Load or Store instruction");
-  if (auto *LI = dyn_cast<LoadInst>(I))
-    return LI->getAlignment();
-  return cast<StoreInst>(I)->getAlignment();
-}
-
-/// A helper function that returns the address space of the pointer operand of
-/// load or store instruction.
-static unsigned getMemInstAddressSpace(Value *I) {
-  assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
-         "Expected Load or Store instruction");
-  if (auto *LI = dyn_cast<LoadInst>(I))
-    return LI->getPointerAddressSpace();
-  return cast<StoreInst>(I)->getPointerAddressSpace();
-}
-
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
 /// element of the corresponding vector type at the given vectorization factor.
@@ -436,8 +424,10 @@ public:
   /// Construct the vector value of a scalarized value \p V one lane at a time.
   void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
 
-  /// Try to vectorize the interleaved access group that \p Instr belongs to.
-  void vectorizeInterleaveGroup(Instruction *Instr);
+  /// Try to vectorize the interleaved access group that \p Instr belongs to,
+  /// optionally masking the vector operations if \p BlockInMask is non-null.
+  void vectorizeInterleaveGroup(Instruction *Instr,
+                                VectorParts *BlockInMask = nullptr);
 
   /// Vectorize Load and Store instructions, optionally masking the vector
   /// operations if \p BlockInMask is non-null.
@@ -448,6 +438,9 @@ public:
   /// the instruction.
   void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
 
+  /// Fix the non-induction PHIs in the OrigPHIsToFix vector.
+  void fixNonInductionPHIs(void);
+
 protected:
   friend class LoopVectorizationPlanner;
 
@@ -584,6 +577,16 @@ protected:
   /// Emit bypass checks to check any memory assumptions we may have made.
   void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
 
+  /// Compute the transformed value of Index at offset StartValue using step
+  /// StepValue.
+  /// For integer induction, returns StartValue + Index * StepValue.
+  /// For pointer induction, returns StartValue[Index * StepValue].
+  /// FIXME: The newly created binary instructions should contain nsw/nuw
+  /// flags, which can be found from the original scalar operations.
+  Value *emitTransformedIndex(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
+                              const DataLayout &DL,
+                              const InductionDescriptor &ID) const;
+
   /// Add additional metadata to \p To that was not present on \p Orig.
   ///
   /// Currently this is used to add the noalias annotations based on the
@@ -705,6 +708,10 @@ protected:
   // Holds the end values for each induction variable. We save the end values
   // so we can later fix-up the external users of the induction variables.
   DenseMap<PHINode *, Value *> IVEndValues;
+
+  // Vector of original scalar PHIs whose corresponding widened PHIs need to be
+  // fixed up at the end of vector code generation.
+  SmallVector<PHINode *, 8> OrigPHIsToFix;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -752,8 +759,15 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
   if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
     const DILocation *DIL = Inst->getDebugLoc();
     if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
-        !isa<DbgInfoIntrinsic>(Inst))
-      B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF));
+        !isa<DbgInfoIntrinsic>(Inst)) {
+      auto NewDIL = DIL->cloneWithDuplicationFactor(UF * VF);
+      if (NewDIL)
+        B.SetCurrentDebugLocation(NewDIL.getValue());
+      else
+        LLVM_DEBUG(dbgs()
+                   << "Failed to create new discriminator: "
+                   << DIL->getFilename() << " Line: " << DIL->getLine());
+    }
     else
       B.SetCurrentDebugLocation(DIL);
   } else
@@ -801,367 +815,6 @@ void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To,
 
 namespace llvm {
 
-/// The group of interleaved loads/stores sharing the same stride and
-/// close to each other.
-///
-/// Each member in this group has an index starting from 0, and the largest
-/// index should be less than interleaved factor, which is equal to the absolute
-/// value of the access's stride.
-///
-/// E.g. An interleaved load group of factor 4:
-///        for (unsigned i = 0; i < 1024; i+=4) {
-///          a = A[i];                           // Member of index 0
-///          b = A[i+1];                         // Member of index 1
-///          d = A[i+3];                         // Member of index 3
-///          ...
-///        }
-///
-///      An interleaved store group of factor 4:
-///        for (unsigned i = 0; i < 1024; i+=4) {
-///          ...
-///          A[i]   = a;                         // Member of index 0
-///          A[i+1] = b;                         // Member of index 1
-///          A[i+2] = c;                         // Member of index 2
-///          A[i+3] = d;                         // Member of index 3
-///        }
-///
-/// Note: the interleaved load group could have gaps (missing members), but
-/// the interleaved store group doesn't allow gaps.
-class InterleaveGroup {
-public:
-  InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
-      : Align(Align), InsertPos(Instr) {
-    assert(Align && "The alignment should be non-zero");
-
-    Factor = std::abs(Stride);
-    assert(Factor > 1 && "Invalid interleave factor");
-
-    Reverse = Stride < 0;
-    Members[0] = Instr;
-  }
-
-  bool isReverse() const { return Reverse; }
-  unsigned getFactor() const { return Factor; }
-  unsigned getAlignment() const { return Align; }
-  unsigned getNumMembers() const { return Members.size(); }
-
-  /// Try to insert a new member \p Instr with index \p Index and
-  /// alignment \p NewAlign. The index is related to the leader and it could be
-  /// negative if it is the new leader.
-  ///
-  /// \returns false if the instruction doesn't belong to the group.
-  bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
-    assert(NewAlign && "The new member's alignment should be non-zero");
-
-    int Key = Index + SmallestKey;
-
-    // Skip if there is already a member with the same index.
-    if (Members.count(Key))
-      return false;
-
-    if (Key > LargestKey) {
-      // The largest index is always less than the interleave factor.
-      if (Index >= static_cast<int>(Factor))
-        return false;
-
-      LargestKey = Key;
-    } else if (Key < SmallestKey) {
-      // The largest index is always less than the interleave factor.
-      if (LargestKey - Key >= static_cast<int>(Factor))
-        return false;
-
-      SmallestKey = Key;
-    }
-
-    // It's always safe to select the minimum alignment.
-    Align = std::min(Align, NewAlign);
-    Members[Key] = Instr;
-    return true;
-  }
-
-  /// Get the member with the given index \p Index
-  ///
-  /// \returns nullptr if contains no such member.
-  Instruction *getMember(unsigned Index) const {
-    int Key = SmallestKey + Index;
-    if (!Members.count(Key))
-      return nullptr;
-
-    return Members.find(Key)->second;
-  }
-
-  /// Get the index for the given member. Unlike the key in the member
-  /// map, the index starts from 0.
-  unsigned getIndex(Instruction *Instr) const {
-    for (auto I : Members)
-      if (I.second == Instr)
-        return I.first - SmallestKey;
-
-    llvm_unreachable("InterleaveGroup contains no such member");
-  }
-
-  Instruction *getInsertPos() const { return InsertPos; }
-  void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
-
-  /// Add metadata (e.g. alias info) from the instructions in this group to \p
-  /// NewInst.
-  ///
-  /// FIXME: this function currently does not add noalias metadata a'la
-  /// addNewMedata.  To do that we need to compute the intersection of the
-  /// noalias info from all members.
-  void addMetadata(Instruction *NewInst) const {
-    SmallVector<Value *, 4> VL;
-    std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
-                   [](std::pair<int, Instruction *> p) { return p.second; });
-    propagateMetadata(NewInst, VL);
-  }
-
-private:
-  unsigned Factor; // Interleave Factor.
-  bool Reverse;
-  unsigned Align;
-  DenseMap<int, Instruction *> Members;
-  int SmallestKey = 0;
-  int LargestKey = 0;
-
-  // To avoid breaking dependences, vectorized instructions of an interleave
-  // group should be inserted at either the first load or the last store in
-  // program order.
-  //
-  // E.g. %even = load i32             // Insert Position
-  //      %add = add i32 %even         // Use of %even
-  //      %odd = load i32
-  //
-  //      store i32 %even
-  //      %odd = add i32               // Def of %odd
-  //      store i32 %odd               // Insert Position
-  Instruction *InsertPos;
-};
-} // end namespace llvm
-
-namespace {
-
-/// Drive the analysis of interleaved memory accesses in the loop.
-///
-/// Use this class to analyze interleaved accesses only when we can vectorize
-/// a loop. Otherwise it's meaningless to do analysis as the vectorization
-/// on interleaved accesses is unsafe.
-///
-/// The analysis collects interleave groups and records the relationships
-/// between the member and the group in a map.
-class InterleavedAccessInfo {
-public:
-  InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
-                        DominatorTree *DT, LoopInfo *LI,
-                        const LoopAccessInfo *LAI)
-    : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
-
-  ~InterleavedAccessInfo() {
-    SmallPtrSet<InterleaveGroup *, 4> DelSet;
-    // Avoid releasing a pointer twice.
-    for (auto &I : InterleaveGroupMap)
-      DelSet.insert(I.second);
-    for (auto *Ptr : DelSet)
-      delete Ptr;
-  }
-
-  /// Analyze the interleaved accesses and collect them in interleave
-  /// groups. Substitute symbolic strides using \p Strides.
-  void analyzeInterleaving();
-
-  /// Check if \p Instr belongs to any interleave group.
-  bool isInterleaved(Instruction *Instr) const {
-    return InterleaveGroupMap.count(Instr);
-  }
-
-  /// Get the interleave group that \p Instr belongs to.
-  ///
-  /// \returns nullptr if doesn't have such group.
-  InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
-    if (InterleaveGroupMap.count(Instr))
-      return InterleaveGroupMap.find(Instr)->second;
-    return nullptr;
-  }
-
-  /// Returns true if an interleaved group that may access memory
-  /// out-of-bounds requires a scalar epilogue iteration for correctness.
-  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
-
-private:
-  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
-  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
-  /// The interleaved access analysis can also add new predicates (for example
-  /// by versioning strides of pointers).
-  PredicatedScalarEvolution &PSE;
-
-  Loop *TheLoop;
-  DominatorTree *DT;
-  LoopInfo *LI;
-  const LoopAccessInfo *LAI;
-
-  /// True if the loop may contain non-reversed interleaved groups with
-  /// out-of-bounds accesses. We ensure we don't speculatively access memory
-  /// out-of-bounds by executing at least one scalar epilogue iteration.
-  bool RequiresScalarEpilogue = false;
-
-  /// Holds the relationships between the members and the interleave group.
-  DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
-
-  /// Holds dependences among the memory accesses in the loop. It maps a source
-  /// access to a set of dependent sink accesses.
-  DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
-
-  /// The descriptor for a strided memory access.
-  struct StrideDescriptor {
-    StrideDescriptor() = default;
-    StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
-                     unsigned Align)
-        : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
-
-    // The access's stride. It is negative for a reverse access.
-    int64_t Stride = 0;
-
-    // The scalar expression of this access.
-    const SCEV *Scev = nullptr;
-
-    // The size of the memory object.
-    uint64_t Size = 0;
-
-    // The alignment of this access.
-    unsigned Align = 0;
-  };
-
-  /// A type for holding instructions and their stride descriptors.
-  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
-
-  /// Create a new interleave group with the given instruction \p Instr,
-  /// stride \p Stride and alignment \p Align.
-  ///
-  /// \returns the newly created interleave group.
-  InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
-                                         unsigned Align) {
-    assert(!InterleaveGroupMap.count(Instr) &&
-           "Already in an interleaved access group");
-    InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
-    return InterleaveGroupMap[Instr];
-  }
-
-  /// Release the group and remove all the relationships.
-  void releaseGroup(InterleaveGroup *Group) {
-    for (unsigned i = 0; i < Group->getFactor(); i++)
-      if (Instruction *Member = Group->getMember(i))
-        InterleaveGroupMap.erase(Member);
-
-    delete Group;
-  }
-
-  /// Collect all the accesses with a constant stride in program order.
-  void collectConstStrideAccesses(
-      MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
-      const ValueToValueMap &Strides);
-
-  /// Returns true if \p Stride is allowed in an interleaved group.
-  static bool isStrided(int Stride) {
-    unsigned Factor = std::abs(Stride);
-    return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
-  }
-
-  /// Returns true if \p BB is a predicated block.
-  bool isPredicated(BasicBlock *BB) const {
-    return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
-  }
-
-  /// Returns true if LoopAccessInfo can be used for dependence queries.
-  bool areDependencesValid() const {
-    return LAI && LAI->getDepChecker().getDependences();
-  }
-
-  /// Returns true if memory accesses \p A and \p B can be reordered, if
-  /// necessary, when constructing interleaved groups.
-  ///
-  /// \p A must precede \p B in program order. We return false if reordering is
-  /// not necessary or is prevented because \p A and \p B may be dependent.
-  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
-                                                 StrideEntry *B) const {
-    // Code motion for interleaved accesses can potentially hoist strided loads
-    // and sink strided stores. The code below checks the legality of the
-    // following two conditions:
-    //
-    // 1. Potentially moving a strided load (B) before any store (A) that
-    //    precedes B, or
-    //
-    // 2. Potentially moving a strided store (A) after any load or store (B)
-    //    that A precedes.
-    //
-    // It's legal to reorder A and B if we know there isn't a dependence from A
-    // to B. Note that this determination is conservative since some
-    // dependences could potentially be reordered safely.
-
-    // A is potentially the source of a dependence.
-    auto *Src = A->first;
-    auto SrcDes = A->second;
-
-    // B is potentially the sink of a dependence.
-    auto *Sink = B->first;
-    auto SinkDes = B->second;
-
-    // Code motion for interleaved accesses can't violate WAR dependences.
-    // Thus, reordering is legal if the source isn't a write.
-    if (!Src->mayWriteToMemory())
-      return true;
-
-    // At least one of the accesses must be strided.
-    if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
-      return true;
-
-    // If dependence information is not available from LoopAccessInfo,
-    // conservatively assume the instructions can't be reordered.
-    if (!areDependencesValid())
-      return false;
-
-    // If we know there is a dependence from source to sink, assume the
-    // instructions can't be reordered. Otherwise, reordering is legal.
-    return !Dependences.count(Src) || !Dependences.lookup(Src).count(Sink);
-  }
-
-  /// Collect the dependences from LoopAccessInfo.
-  ///
-  /// We process the dependences once during the interleaved access analysis to
-  /// enable constant-time dependence queries.
-  void collectDependences() {
-    if (!areDependencesValid())
-      return;
-    auto *Deps = LAI->getDepChecker().getDependences();
-    for (auto Dep : *Deps)
-      Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
-  }
-};
-
-} // end anonymous namespace
-
-static void emitMissedWarning(Function *F, Loop *L,
-                              const LoopVectorizeHints &LH,
-                              OptimizationRemarkEmitter *ORE) {
-  LH.emitRemarkWithHints();
-
-  if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
-    if (LH.getWidth() != 1)
-      ORE->emit(DiagnosticInfoOptimizationFailure(
-                    DEBUG_TYPE, "FailedRequestedVectorization",
-                    L->getStartLoc(), L->getHeader())
-                << "loop not vectorized: "
-                << "failed explicitly specified loop vectorization");
-    else if (LH.getInterleave() != 1)
-      ORE->emit(DiagnosticInfoOptimizationFailure(
-                    DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(),
-                    L->getHeader())
-                << "loop not interleaved: "
-                << "failed explicitly specified loop interleaving");
-  }
-}
-
-namespace llvm {
-
 /// LoopVectorizationCostModel - estimates the expected speedups due to
 /// vectorization.
 /// In many cases vectorization is not profitable. This can happen because of
@@ -1247,34 +900,55 @@ public:
   /// vectorization factor \p VF.
   bool isProfitableToScalarize(Instruction *I, unsigned VF) const {
     assert(VF > 1 && "Profitable to scalarize relevant only for VF > 1.");
+
+    // Cost model is not run in the VPlan-native path - return conservative
+    // result until this changes.
+    if (EnableVPlanNativePath)
+      return false;
+
     auto Scalars = InstsToScalarize.find(VF);
     assert(Scalars != InstsToScalarize.end() &&
            "VF not yet analyzed for scalarization profitability");
-    return Scalars->second.count(I);
+    return Scalars->second.find(I) != Scalars->second.end();
   }
 
   /// Returns true if \p I is known to be uniform after vectorization.
   bool isUniformAfterVectorization(Instruction *I, unsigned VF) const {
     if (VF == 1)
       return true;
-    assert(Uniforms.count(VF) && "VF not yet analyzed for uniformity");
+
+    // Cost model is not run in the VPlan-native path - return conservative
+    // result until this changes.
+    if (EnableVPlanNativePath)
+      return false;
+
     auto UniformsPerVF = Uniforms.find(VF);
-    return UniformsPerVF->second.count(I);
+    assert(UniformsPerVF != Uniforms.end() &&
+           "VF not yet analyzed for uniformity");
+    return UniformsPerVF->second.find(I) != UniformsPerVF->second.end();
   }
 
   /// Returns true if \p I is known to be scalar after vectorization.
   bool isScalarAfterVectorization(Instruction *I, unsigned VF) const {
     if (VF == 1)
       return true;
-    assert(Scalars.count(VF) && "Scalar values are not calculated for VF");
+
+    // Cost model is not run in the VPlan-native path - return conservative
+    // result until this changes.
+    if (EnableVPlanNativePath)
+      return false;
+
     auto ScalarsPerVF = Scalars.find(VF);
-    return ScalarsPerVF->second.count(I);
+    assert(ScalarsPerVF != Scalars.end() &&
+           "Scalar values are not calculated for VF");
+    return ScalarsPerVF->second.find(I) != ScalarsPerVF->second.end();
   }
 
   /// \returns True if instruction \p I can be truncated to a smaller bitwidth
   /// for vectorization factor \p VF.
   bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
-    return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
+    return VF > 1 && MinBWs.find(I) != MinBWs.end() &&
+           !isProfitableToScalarize(I, VF) &&
            !isScalarAfterVectorization(I, VF);
   }
 
@@ -1298,7 +972,7 @@ public:
 
   /// Save vectorization decision \p W and \p Cost taken by the cost model for
   /// interleaving group \p Grp and vector width \p VF.
-  void setWideningDecision(const InterleaveGroup *Grp, unsigned VF,
+  void setWideningDecision(const InterleaveGroup<Instruction> *Grp, unsigned VF,
                            InstWidening W, unsigned Cost) {
     assert(VF >= 2 && "Expected VF >=2");
     /// Broadcast this decicion to all instructions inside the group.
@@ -1318,6 +992,12 @@ public:
   /// through the cost modeling.
   InstWidening getWideningDecision(Instruction *I, unsigned VF) {
     assert(VF >= 2 && "Expected VF >=2");
+
+    // Cost model is not run in the VPlan-native path - return conservative
+    // result until this changes.
+    if (EnableVPlanNativePath)
+      return CM_GatherScatter;
+
     std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
     auto Itr = WideningDecisions.find(InstOnVF);
     if (Itr == WideningDecisions.end())
@@ -1330,7 +1010,8 @@ public:
   unsigned getWideningCost(Instruction *I, unsigned VF) {
     assert(VF >= 2 && "Expected VF >=2");
     std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
-    assert(WideningDecisions.count(InstOnVF) && "The cost is not calculated");
+    assert(WideningDecisions.find(InstOnVF) != WideningDecisions.end() &&
+           "The cost is not calculated");
     return WideningDecisions[InstOnVF].second;
   }
 
@@ -1369,7 +1050,7 @@ public:
   /// that may be vectorized as interleave, gather-scatter or scalarized.
   void collectUniformsAndScalars(unsigned VF) {
     // Do the analysis once.
-    if (VF == 1 || Uniforms.count(VF))
+    if (VF == 1 || Uniforms.find(VF) != Uniforms.end())
       return;
     setCostBasedWideningDecision(VF);
     collectLoopUniforms(VF);
@@ -1414,26 +1095,58 @@ public:
   /// Returns true if \p I is an instruction that will be scalarized with
   /// predication. Such instructions include conditional stores and
   /// instructions that may divide by zero.
-  bool isScalarWithPredication(Instruction *I);
+  /// If a non-zero VF has been calculated, we check if I will be scalarized
+  /// predication for that VF.
+  bool isScalarWithPredication(Instruction *I, unsigned VF = 1);
+
+  // Returns true if \p I is an instruction that will be predicated either
+  // through scalar predication or masked load/store or masked gather/scatter.
+  // Superset of instructions that return true for isScalarWithPredication.
+  bool isPredicatedInst(Instruction *I) {
+    if (!blockNeedsPredication(I->getParent()))
+      return false;
+    // Loads and stores that need some form of masked operation are predicated
+    // instructions.
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      return Legal->isMaskRequired(I);
+    return isScalarWithPredication(I);
+  }
 
   /// Returns true if \p I is a memory instruction with consecutive memory
   /// access that can be widened.
   bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
 
+  /// Returns true if \p I is a memory instruction in an interleaved-group
+  /// of memory accesses that can be vectorized with wide vector loads/stores
+  /// and shuffles.
+  bool interleavedAccessCanBeWidened(Instruction *I, unsigned VF = 1);
+
   /// Check if \p Instr belongs to any interleaved access group.
   bool isAccessInterleaved(Instruction *Instr) {
     return InterleaveInfo.isInterleaved(Instr);
   }
 
   /// Get the interleaved access group that \p Instr belongs to.
-  const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
+  const InterleaveGroup<Instruction> *
+  getInterleavedAccessGroup(Instruction *Instr) {
     return InterleaveInfo.getInterleaveGroup(Instr);
   }
 
   /// Returns true if an interleaved group requires a scalar iteration
-  /// to handle accesses with gaps.
+  /// to handle accesses with gaps, and there is nothing preventing us from
+  /// creating a scalar epilogue.
   bool requiresScalarEpilogue() const {
-    return InterleaveInfo.requiresScalarEpilogue();
+    return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue();
+  }
+
+  /// Returns true if a scalar epilogue is not allowed due to optsize.
+  bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; }
+
+  /// Returns true if all loop blocks should be masked to fold tail loop.
+  bool foldTailByMasking() const { return FoldTailByMasking; }
+
+  bool blockNeedsPredication(BasicBlock *BB) {
+    return foldTailByMasking() || Legal->blockNeedsPredication(BB);
   }
 
 private:
@@ -1482,8 +1195,10 @@ private:
   /// memory access.
   unsigned getConsecutiveMemOpCost(Instruction *I, unsigned VF);
 
-  /// The cost calculation for Load instruction \p I with uniform pointer -
-  /// scalar load + broadcast.
+  /// The cost calculation for Load/Store instruction \p I with uniform pointer -
+  /// Load: scalar load + broadcast.
+  /// Store: scalar store + (loop invariant value stored? 0 : extract of last
+  /// element)
   unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
 
   /// Returns whether the instruction is a load or store and will be a emitted
@@ -1517,6 +1232,18 @@ private:
   /// vectorization as a predicated block.
   SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
 
+  /// Records whether it is allowed to have the original scalar loop execute at
+  /// least once. This may be needed as a fallback loop in case runtime 
+  /// aliasing/dependence checks fail, or to handle the tail/remainder
+  /// iterations when the trip count is unknown or doesn't divide by the VF,
+  /// or as a peel-loop to handle gaps in interleave-groups.
+  /// Under optsize and when the trip count is very small we don't allow any
+  /// iterations to execute in the scalar loop.
+  bool IsScalarEpilogueAllowed = true;
+
+  /// All blocks of loop are to be masked to fold tail of scalar iterations.
+  bool FoldTailByMasking = false;
+
   /// A map holding scalar costs for different vectorization factors. The
   /// presence of a cost for an instruction in the mapping indicates that the
   /// instruction will be scalarized when vectorizing with the associated
@@ -1639,14 +1366,15 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
     return false;
 
   Function *Fn = OuterLp->getHeader()->getParent();
-  if (!Hints.allowVectorization(Fn, OuterLp, false /*AlwaysVectorize*/)) {
+  if (!Hints.allowVectorization(Fn, OuterLp,
+                                true /*VectorizeOnlyWhenForced*/)) {
     LLVM_DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n");
     return false;
   }
 
   if (!Hints.getWidth()) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
-    emitMissedWarning(Fn, OuterLp, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -1654,7 +1382,7 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
     // TODO: Interleave support is future work.
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
                          "outer loops.\n");
-    emitMissedWarning(Fn, OuterLp, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -1695,10 +1423,11 @@ struct LoopVectorize : public FunctionPass {
 
   LoopVectorizePass Impl;
 
-  explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
+  explicit LoopVectorize(bool InterleaveOnlyWhenForced = false,
+                         bool VectorizeOnlyWhenForced = false)
       : FunctionPass(ID) {
-    Impl.DisableUnrolling = NoUnrolling;
-    Impl.AlwaysVectorize = AlwaysVectorize;
+    Impl.InterleaveOnlyWhenForced = InterleaveOnlyWhenForced;
+    Impl.VectorizeOnlyWhenForced = VectorizeOnlyWhenForced;
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -1737,8 +1466,16 @@ struct LoopVectorize : public FunctionPass {
     AU.addRequired<LoopAccessLegacyAnalysis>();
     AU.addRequired<DemandedBitsWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
-    AU.addPreserved<LoopInfoWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
+
+    // We currently do not preserve loopinfo/dominator analyses with outer loop
+    // vectorization. Until this is addressed, mark these analyses as preserved
+    // only for non-VPlan-native path.
+    // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
+    if (!EnableVPlanNativePath) {
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+    }
+
     AU.addPreserved<BasicAAWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
@@ -1950,7 +1687,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
                      ? Builder.CreateSExtOrTrunc(Induction, IV->getType())
                      : Builder.CreateCast(Instruction::SIToFP, Induction,
                                           IV->getType());
-      ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
+      ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID);
       ScalarIV->setName("offset.idx");
     }
     if (Trunc) {
@@ -2089,8 +1826,9 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
   assert(!V->getType()->isVectorTy() && "Can't widen a vector");
   assert(!V->getType()->isVoidTy() && "Type does not produce a value");
 
-  // If we have a stride that is replaced by one, do it here.
-  if (Legal->hasStride(V))
+  // If we have a stride that is replaced by one, do it here. Defer this for
+  // the VPlan-native path until we start running Legal checks in that path.
+  if (!EnableVPlanNativePath && Legal->hasStride(V))
     V = ConstantInt::get(V->getType(), 1);
 
   // If we have a vector mapped to this value, return it.
@@ -2214,6 +1952,17 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
                                      "reverse");
 }
 
+// Return whether we allow using masked interleave-groups (for dealing with
+// strided loads/stores that reside in predicated blocks, or for dealing
+// with gaps).
+static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
+  // If an override option has been passed in for interleaved accesses, use it.
+  if (EnableMaskedInterleavedMemAccesses.getNumOccurrences() > 0)
+    return EnableMaskedInterleavedMemAccesses;
+
+  return TTI.enableMaskedInterleavedAccessVectorization();
+}
+
 // Try to vectorize the interleave group that \p Instr belongs to.
 //
 // E.g. Translate following interleaved load group (factor = 3):
@@ -2242,8 +1991,10 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
 //   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
 //        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
 //   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
-void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
-  const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr);
+void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
+                                                   VectorParts *BlockInMask) {
+  const InterleaveGroup<Instruction> *Group =
+      Cost->getInterleavedAccessGroup(Instr);
   assert(Group && "Fail to get an interleaved access group.");
 
   // Skip if current instruction is not the insert position.
@@ -2257,13 +2008,22 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
   Type *ScalarTy = getMemInstValueType(Instr);
   unsigned InterleaveFactor = Group->getFactor();
   Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
-  Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr));
+  Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
 
   // Prepare for the new pointers.
   setDebugLocFromInst(Builder, Ptr);
   SmallVector<Value *, 2> NewPtrs;
   unsigned Index = Group->getIndex(Instr);
 
+  VectorParts Mask;
+  bool IsMaskForCondRequired = BlockInMask;
+  if (IsMaskForCondRequired) {
+    Mask = *BlockInMask;
+    // TODO: extend the masked interleaved-group support to reversed access.
+    assert(!Group->isReverse() && "Reversed masked interleave-group "
+                                  "not supported.");
+  }
+
   // If the group is reverse, adjust the index to refer to the last vector lane
   // instead of the first. We adjust the index from the first vector lane,
   // rather than directly getting the pointer for lane VF - 1, because the
@@ -2302,13 +2062,39 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
   setDebugLocFromInst(Builder, Instr);
   Value *UndefVec = UndefValue::get(VecTy);
 
+  Value *MaskForGaps = nullptr;
+  if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
+    MaskForGaps = createBitMaskForGaps(Builder, VF, *Group);
+    assert(MaskForGaps && "Mask for Gaps is required but it is null");
+  }
+
   // Vectorize the interleaved load group.
   if (isa<LoadInst>(Instr)) {
     // For each unroll part, create a wide load for the group.
     SmallVector<Value *, 2> NewLoads;
     for (unsigned Part = 0; Part < UF; Part++) {
-      auto *NewLoad = Builder.CreateAlignedLoad(
-          NewPtrs[Part], Group->getAlignment(), "wide.vec");
+      Instruction *NewLoad;
+      if (IsMaskForCondRequired || MaskForGaps) {
+        assert(useMaskedInterleavedAccesses(*TTI) &&
+               "masked interleaved groups are not allowed.");
+        Value *GroupMask = MaskForGaps;
+        if (IsMaskForCondRequired) {
+          auto *Undefs = UndefValue::get(Mask[Part]->getType());
+          auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+          Value *ShuffledMask = Builder.CreateShuffleVector(
+              Mask[Part], Undefs, RepMask, "interleaved.mask");
+          GroupMask = MaskForGaps
+                          ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
+                                                MaskForGaps)
+                          : ShuffledMask;
+        }
+        NewLoad =
+            Builder.CreateMaskedLoad(NewPtrs[Part], Group->getAlignment(),
+                                     GroupMask, UndefVec, "wide.masked.vec");
+      }
+      else
+        NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part], 
+          Group->getAlignment(), "wide.vec");
       Group->addMetadata(NewLoad);
       NewLoads.push_back(NewLoad);
     }
@@ -2375,8 +2161,18 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
     Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
                                               "interleaved.vec");
 
-    Instruction *NewStoreInstr =
-        Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment());
+    Instruction *NewStoreInstr;
+    if (IsMaskForCondRequired) {
+      auto *Undefs = UndefValue::get(Mask[Part]->getType());
+      auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
+      Value *ShuffledMask = Builder.CreateShuffleVector(
+          Mask[Part], Undefs, RepMask, "interleaved.mask");
+      NewStoreInstr = Builder.CreateMaskedStore(
+          IVec, NewPtrs[Part], Group->getAlignment(), ShuffledMask);
+    }
+    else
+      NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part], 
+        Group->getAlignment());
 
     Group->addMetadata(NewStoreInstr);
   }
@@ -2400,13 +2196,13 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
   Type *ScalarDataTy = getMemInstValueType(Instr);
   Type *DataTy = VectorType::get(ScalarDataTy, VF);
   Value *Ptr = getLoadStorePointerOperand(Instr);
-  unsigned Alignment = getMemInstAlignment(Instr);
+  unsigned Alignment = getLoadStoreAlignment(Instr);
   // An alignment of 0 means target abi alignment. We need to use the scalar's
   // target abi alignment in such a case.
   const DataLayout &DL = Instr->getModule()->getDataLayout();
   if (!Alignment)
     Alignment = DL.getABITypeAlignment(ScalarDataTy);
-  unsigned AddressSpace = getMemInstAddressSpace(Instr);
+  unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
 
   // Determine if the pointer operand of the access is either consecutive or
   // reverse consecutive.
@@ -2594,6 +2390,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
   if (TripCount)
     return TripCount;
 
+  assert(L && "Create Trip Count for null loop.");
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   // Find the loop boundaries.
   ScalarEvolution *SE = PSE.getSE();
@@ -2602,6 +2399,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
          "Invalid loop count");
 
   Type *IdxTy = Legal->getWidestInductionType();
+  assert(IdxTy && "No type for induction");
 
   // The exit count might have the type of i64 while the phi is i32. This can
   // happen if we have an induction variable that is sign extended before the
@@ -2642,12 +2440,26 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
   Value *TC = getOrCreateTripCount(L);
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
 
+  Type *Ty = TC->getType();
+  Constant *Step = ConstantInt::get(Ty, VF * UF);
+
+  // If the tail is to be folded by masking, round the number of iterations N
+  // up to a multiple of Step instead of rounding down. This is done by first
+  // adding Step-1 and then rounding down. Note that it's ok if this addition
+  // overflows: the vector induction variable will eventually wrap to zero given
+  // that it starts at zero and its Step is a power of two; the loop will then
+  // exit, with the last early-exit vector comparison also producing all-true.
+  if (Cost->foldTailByMasking()) {
+    assert(isPowerOf2_32(VF * UF) &&
+           "VF*UF must be a power of 2 when folding tail by masking");
+    TC = Builder.CreateAdd(TC, ConstantInt::get(Ty, VF * UF - 1), "n.rnd.up");
+  }
+
   // Now we need to generate the expression for the part of the loop that the
   // vectorized body will execute. This is equal to N - (N % Step) if scalar
   // iterations are not required for correctness, or N - Step, otherwise. Step
   // is equal to the vectorization factor (number of SIMD elements) times the
   // unroll factor (number of SIMD instructions).
-  Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
   Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
 
   // If there is a non-reversed interleaved group that may speculatively access
@@ -2710,8 +2522,13 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
   // of zero. In this case we will also jump to the scalar loop.
   auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
                                           : ICmpInst::ICMP_ULT;
-  Value *CheckMinIters = Builder.CreateICmp(
-      P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
+
+  // If tail is to be folded, vector loop takes care of all iterations.
+  Value *CheckMinIters = Builder.getFalse();
+  if (!Cost->foldTailByMasking())
+    CheckMinIters = Builder.CreateICmp(
+        P, Count, ConstantInt::get(Count->getType(), VF * UF),
+        "min.iters.check");
 
   BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
   // Update dominator tree immediately if the generated block is a
@@ -2740,6 +2557,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
     if (C->isZero())
       return;
 
+  assert(!Cost->foldTailByMasking() &&
+         "Cannot SCEV check stride or overflow when folding tail");
   // Create a new block containing the stride check.
   BB->setName("vector.scevcheck");
   auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
@@ -2756,6 +2575,10 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
 }
 
 void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
+  // VPlan-native path does not do any analysis for runtime checks currently.
+  if (EnableVPlanNativePath)
+    return;
+
   BasicBlock *BB = L->getLoopPreheader();
 
   // Generate the code that checks in runtime if arrays overlap. We put the
@@ -2768,6 +2591,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   if (!MemRuntimeCheck)
     return;
 
+  assert(!Cost->foldTailByMasking() && "Cannot check memory when folding tail");
   // Create a new block containing the memory check.
   BB->setName("vector.memcheck");
   auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
@@ -2789,6 +2613,94 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   LVer->prepareNoAliasMetadata();
 }
 
+Value *InnerLoopVectorizer::emitTransformedIndex(
+    IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL,
+    const InductionDescriptor &ID) const {
+
+  SCEVExpander Exp(*SE, DL, "induction");
+  auto Step = ID.getStep();
+  auto StartValue = ID.getStartValue();
+  assert(Index->getType() == Step->getType() &&
+         "Index type does not match StepValue type");
+
+  // Note: the IR at this point is broken. We cannot use SE to create any new
+  // SCEV and then expand it, hoping that SCEV's simplification will give us
+  // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+  // lead to various SCEV crashes. So all we can do is to use builder and rely
+  // on InstCombine for future simplifications. Here we handle some trivial
+  // cases only.
+  auto CreateAdd = [&B](Value *X, Value *Y) {
+    assert(X->getType() == Y->getType() && "Types don't match!");
+    if (auto *CX = dyn_cast<ConstantInt>(X))
+      if (CX->isZero())
+        return Y;
+    if (auto *CY = dyn_cast<ConstantInt>(Y))
+      if (CY->isZero())
+        return X;
+    return B.CreateAdd(X, Y);
+  };
+
+  auto CreateMul = [&B](Value *X, Value *Y) {
+    assert(X->getType() == Y->getType() && "Types don't match!");
+    if (auto *CX = dyn_cast<ConstantInt>(X))
+      if (CX->isOne())
+        return Y;
+    if (auto *CY = dyn_cast<ConstantInt>(Y))
+      if (CY->isOne())
+        return X;
+    return B.CreateMul(X, Y);
+  };
+
+  switch (ID.getKind()) {
+  case InductionDescriptor::IK_IntInduction: {
+    assert(Index->getType() == StartValue->getType() &&
+           "Index type does not match StartValue type");
+    if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne())
+      return B.CreateSub(StartValue, Index);
+    auto *Offset = CreateMul(
+        Index, Exp.expandCodeFor(Step, Index->getType(), &*B.GetInsertPoint()));
+    return CreateAdd(StartValue, Offset);
+  }
+  case InductionDescriptor::IK_PtrInduction: {
+    assert(isa<SCEVConstant>(Step) &&
+           "Expected constant step for pointer induction");
+    return B.CreateGEP(
+        nullptr, StartValue,
+        CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
+                                           &*B.GetInsertPoint())));
+  }
+  case InductionDescriptor::IK_FpInduction: {
+    assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+    auto InductionBinOp = ID.getInductionBinOp();
+    assert(InductionBinOp &&
+           (InductionBinOp->getOpcode() == Instruction::FAdd ||
+            InductionBinOp->getOpcode() == Instruction::FSub) &&
+           "Original bin op should be defined for FP induction");
+
+    Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
+
+    // Floating point operations had to be 'fast' to enable the induction.
+    FastMathFlags Flags;
+    Flags.setFast();
+
+    Value *MulExp = B.CreateFMul(StepValue, Index);
+    if (isa<Instruction>(MulExp))
+      // We have to check, the MulExp may be a constant.
+      cast<Instruction>(MulExp)->setFastMathFlags(Flags);
+
+    Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
+                               "induction");
+    if (isa<Instruction>(BOp))
+      cast<Instruction>(BOp)->setFastMathFlags(Flags);
+
+    return BOp;
+  }
+  case InductionDescriptor::IK_NoInduction:
+    return nullptr;
+  }
+  llvm_unreachable("invalid enum");
+}
+
 BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   /*
    In this function we generate a new loop. The new loop will contain
@@ -2825,6 +2737,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   BasicBlock *OldBasicBlock = OrigLoop->getHeader();
   BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
   BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+  MDNode *OrigLoopID = OrigLoop->getLoopID();
   assert(VectorPH && "Invalid loop structure");
   assert(ExitBlock && "Must have an exit block");
 
@@ -2927,7 +2840,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
         CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
       Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
       const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
-      EndValue = II.transform(B, CRD, PSE.getSE(), DL);
+      EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II);
       EndValue->setName("ind.end");
     }
 
@@ -2948,9 +2861,12 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
   // If (N - N%VF) == N, then we *don't* need to run the remainder.
-  Value *CmpN =
-      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
-                      CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
+  // If tail is to be folded, we know we don't need to run the remainder.
+  Value *CmpN = Builder.getTrue();
+  if (!Cost->foldTailByMasking())
+    CmpN =
+        CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+                        CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
   ReplaceInstWithInst(MiddleBlock->getTerminator(),
                       BranchInst::Create(ExitBlock, ScalarPH, CmpN));
 
@@ -2965,6 +2881,17 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
   LoopVectorBody = VecBody;
   LoopScalarBody = OldBasicBlock;
 
+  Optional<MDNode *> VectorizedLoopID =
+      makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
+                                      LLVMLoopVectorizeFollowupVectorized});
+  if (VectorizedLoopID.hasValue()) {
+    Lp->setLoopID(VectorizedLoopID.getValue());
+
+    // Do not setAlreadyVectorized if loop attributes have been defined
+    // explicitly.
+    return LoopVectorPreHeader;
+  }
+
   // Keep all loop hints from the original loop on the vector loop (we'll
   // replace the vectorizer-specific hints below).
   if (MDNode *LID = OrigLoop->getLoopID())
@@ -3023,7 +2950,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
                              II.getStep()->getType())
               : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
       CMO->setName("cast.cmo");
-      Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
+      Value *Escape = emitTransformedIndex(B, CMO, PSE.getSE(), DL, II);
       Escape->setName("ind.escape");
       MissingVals[UI] = Escape;
     }
@@ -3109,6 +3036,10 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
        !TTI.supportsEfficientVectorElementLoadStore()))
     Cost += TTI.getScalarizationOverhead(RetTy, true, false);
 
+  // Some targets keep addresses scalar.
+  if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+    return Cost;
+
   if (CallInst *CI = dyn_cast<CallInst>(I)) {
     SmallVector<const Value *, 4> Operands(CI->arg_operands());
     Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
@@ -3212,7 +3143,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
       continue;
     for (unsigned Part = 0; Part < UF; ++Part) {
       Value *I = getOrCreateVectorValue(KV.first, Part);
-      if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
+      if (Erased.find(I) != Erased.end() || I->use_empty() ||
+          !isa<Instruction>(I))
         continue;
       Type *OriginalTy = I->getType();
       Type *ScalarTruncatedTy =
@@ -3330,6 +3262,13 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
   if (VF > 1)
     truncateToMinimalBitwidths();
 
+  // Fix widened non-induction PHIs by setting up the PHI operands.
+  if (OrigPHIsToFix.size()) {
+    assert(EnableVPlanNativePath &&
+           "Unexpected non-induction PHIs for fixup in non VPlan-native path");
+    fixNonInductionPHIs();
+  }
+
   // At this point every instruction in the original loop is widened to a
   // vector form. Now we need to fix the recurrences in the loop. These PHI
   // nodes are currently empty because we did not want to introduce cycles.
@@ -3666,8 +3605,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
           Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
                               ReducedPartRdx, "bin.rdx"));
     else
-      ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
-          Builder, MinMaxKind, ReducedPartRdx, RdxPart);
+      ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
+                                      RdxPart);
   }
 
   if (VF > 1) {
@@ -3720,9 +3659,20 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
 void InnerLoopVectorizer::fixLCSSAPHIs() {
   for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
     if (LCSSAPhi.getNumIncomingValues() == 1) {
-      assert(OrigLoop->isLoopInvariant(LCSSAPhi.getIncomingValue(0)) &&
-             "Incoming value isn't loop invariant");
-      LCSSAPhi.addIncoming(LCSSAPhi.getIncomingValue(0), LoopMiddleBlock);
+      auto *IncomingValue = LCSSAPhi.getIncomingValue(0);
+      // Non-instruction incoming values will have only one value.
+      unsigned LastLane = 0;
+      if (isa<Instruction>(IncomingValue)) 
+          LastLane = Cost->isUniformAfterVectorization(
+                         cast<Instruction>(IncomingValue), VF)
+                         ? 0
+                         : VF - 1;
+      // Can be a loop invariant incoming value or the last scalar value to be
+      // extracted from the vectorized loop.
+      Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
+      Value *lastIncomingValue =
+          getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane });
+      LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
     }
   }
 }
@@ -3791,12 +3741,62 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
   } while (Changed);
 }
 
+void InnerLoopVectorizer::fixNonInductionPHIs() {
+  for (PHINode *OrigPhi : OrigPHIsToFix) {
+    PHINode *NewPhi =
+        cast<PHINode>(VectorLoopValueMap.getVectorValue(OrigPhi, 0));
+    unsigned NumIncomingValues = OrigPhi->getNumIncomingValues();
+
+    SmallVector<BasicBlock *, 2> ScalarBBPredecessors(
+        predecessors(OrigPhi->getParent()));
+    SmallVector<BasicBlock *, 2> VectorBBPredecessors(
+        predecessors(NewPhi->getParent()));
+    assert(ScalarBBPredecessors.size() == VectorBBPredecessors.size() &&
+           "Scalar and Vector BB should have the same number of predecessors");
+
+    // The insertion point in Builder may be invalidated by the time we get
+    // here. Force the Builder insertion point to something valid so that we do
+    // not run into issues during insertion point restore in
+    // getOrCreateVectorValue calls below.
+    Builder.SetInsertPoint(NewPhi);
+
+    // The predecessor order is preserved and we can rely on mapping between
+    // scalar and vector block predecessors.
+    for (unsigned i = 0; i < NumIncomingValues; ++i) {
+      BasicBlock *NewPredBB = VectorBBPredecessors[i];
+
+      // When looking up the new scalar/vector values to fix up, use incoming
+      // values from original phi.
+      Value *ScIncV =
+          OrigPhi->getIncomingValueForBlock(ScalarBBPredecessors[i]);
+
+      // Scalar incoming value may need a broadcast
+      Value *NewIncV = getOrCreateVectorValue(ScIncV, 0);
+      NewPhi->addIncoming(NewIncV, NewPredBB);
+    }
+  }
+}
+
 void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
                                               unsigned VF) {
+  PHINode *P = cast<PHINode>(PN);
+  if (EnableVPlanNativePath) {
+    // Currently we enter here in the VPlan-native path for non-induction
+    // PHIs where all control flow is uniform. We simply widen these PHIs.
+    // Create a vector phi with no operands - the vector phi operands will be
+    // set at the end of vector code generation.
+    Type *VecTy =
+        (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
+    Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
+    VectorLoopValueMap.setVectorValue(P, 0, VecPhi);
+    OrigPHIsToFix.push_back(P);
+
+    return;
+  }
+
   assert(PN->getParent() == OrigLoop->getHeader() &&
          "Non-header phis should have been handled elsewhere");
 
-  PHINode *P = cast<PHINode>(PN);
   // In order to support recurrences we need to be able to vectorize Phi nodes.
   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
@@ -3846,7 +3846,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
       for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
         Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
+        Value *SclrGep =
+            emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
         SclrGep->setName("next.gep");
         VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
       }
@@ -4151,6 +4152,10 @@ void InnerLoopVectorizer::updateAnalysis() {
   // Forget the original basic block.
   PSE.getSE()->forgetLoop(OrigLoop);
 
+  // DT is not kept up-to-date for outer loop vectorization
+  if (EnableVPlanNativePath)
+    return;
+
   // Update the dominator tree information.
   assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
          "Entry does not dominate exit.");
@@ -4167,7 +4172,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   // We should not collect Scalars more than once per VF. Right now, this
   // function is called from collectUniformsAndScalars(), which already does
   // this check. Collecting Scalars for VF=1 does not make any sense.
-  assert(VF >= 2 && !Scalars.count(VF) &&
+  assert(VF >= 2 && Scalars.find(VF) == Scalars.end() &&
          "This function should not be visited twice for the same VF");
 
   SmallSetVector<Instruction *, 8> Worklist;
@@ -4253,7 +4258,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
       }
     }
   for (auto *I : ScalarPtrs)
-    if (!PossibleNonScalarPtrs.count(I)) {
+    if (PossibleNonScalarPtrs.find(I) == PossibleNonScalarPtrs.end()) {
       LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n");
       Worklist.insert(I);
     }
@@ -4279,8 +4284,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   // Insert the forced scalars.
   // FIXME: Currently widenPHIInstruction() often creates a dead vector
   // induction variable when the PHI user is scalarized.
-  if (ForcedScalars.count(VF))
-    for (auto *I : ForcedScalars.find(VF)->second)
+  auto ForcedScalar = ForcedScalars.find(VF);
+  if (ForcedScalar != ForcedScalars.end())
+    for (auto *I : ForcedScalar->second)
       Worklist.insert(I);
 
   // Expand the worklist by looking through any bitcasts and getelementptr
@@ -4348,8 +4354,8 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   Scalars[VF].insert(Worklist.begin(), Worklist.end());
 }
 
-bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
-  if (!Legal->blockNeedsPredication(I->getParent()))
+bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigned VF) {
+  if (!blockNeedsPredication(I->getParent()))
     return false;
   switch(I->getOpcode()) {
   default:
@@ -4360,6 +4366,14 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
       return false;
     auto *Ptr = getLoadStorePointerOperand(I);
     auto *Ty = getMemInstValueType(I);
+    // We have already decided how to vectorize this instruction, get that
+    // result.
+    if (VF > 1) {
+      InstWidening WideningDecision = getWideningDecision(I, VF);
+      assert(WideningDecision != CM_Unknown &&
+             "Widening decision should be ready at this moment");
+      return WideningDecision == CM_Scalarize;
+    }
     return isa<LoadInst>(I) ?
         !(isLegalMaskedLoad(Ty, Ptr)  || isLegalMaskedGather(Ty))
       : !(isLegalMaskedStore(Ty, Ptr) || isLegalMaskedScatter(Ty));
@@ -4373,6 +4387,35 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
   return false;
 }
 
+bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
+                                                               unsigned VF) {
+  assert(isAccessInterleaved(I) && "Expecting interleaved access.");
+  assert(getWideningDecision(I, VF) == CM_Unknown &&
+         "Decision should not be set yet.");
+  auto *Group = getInterleavedAccessGroup(I);
+  assert(Group && "Must have a group.");
+
+  // Check if masking is required.
+  // A Group may need masking for one of two reasons: it resides in a block that
+  // needs predication, or it was decided to use masking to deal with gaps.
+  bool PredicatedAccessRequiresMasking = 
+      Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I);
+  bool AccessWithGapsRequiresMasking = 
+      Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+  if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking)
+    return true;
+
+  // If masked interleaving is required, we expect that the user/target had
+  // enabled it, because otherwise it either wouldn't have been created or
+  // it should have been invalidated by the CostModel.
+  assert(useMaskedInterleavedAccesses(TTI) &&
+         "Masked interleave-groups for predicated accesses are not enabled.");
+
+  auto *Ty = getMemInstValueType(I);
+  return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty)
+                          : TTI.isLegalMaskedStore(Ty);
+}
+
 bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
                                                                unsigned VF) {
   // Get and ensure we have a valid memory instruction.
@@ -4407,7 +4450,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
   // already does this check. Collecting Uniforms for VF=1 does not make any
   // sense.
 
-  assert(VF >= 2 && !Uniforms.count(VF) &&
+  assert(VF >= 2 && Uniforms.find(VF) == Uniforms.end() &&
          "This function should not be visited twice for the same VF");
 
   // Visit the list of Uniforms. If we'll not find any uniform value, we'll
@@ -4494,20 +4537,20 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
   // Add to the Worklist all consecutive and consecutive-like pointers that
   // aren't also identified as possibly non-uniform.
   for (auto *V : ConsecutiveLikePtrs)
-    if (!PossibleNonUniformPtrs.count(V)) {
+    if (PossibleNonUniformPtrs.find(V) == PossibleNonUniformPtrs.end()) {
       LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n");
       Worklist.insert(V);
     }
 
   // Expand Worklist in topological order: whenever a new instruction
-  // is added , its users should be either already inside Worklist, or
-  // out of scope. It ensures a uniform instruction will only be used
-  // by uniform instructions or out of scope instructions.
+  // is added , its users should be already inside Worklist.  It ensures
+  // a uniform instruction will only be used by uniform instructions.
   unsigned idx = 0;
   while (idx != Worklist.size()) {
     Instruction *I = Worklist[idx++];
 
     for (auto OV : I->operand_values()) {
+      // isOutOfScope operands cannot be uniform instructions.
       if (isOutOfScope(OV))
         continue;
       // First order recurrence Phi's should typically be considered
@@ -4520,7 +4563,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
       auto *OI = cast<Instruction>(OV);
       if (llvm::all_of(OI->users(), [&](User *U) -> bool {
             auto *J = cast<Instruction>(U);
-            return !TheLoop->contains(J) || Worklist.count(J) ||
+            return Worklist.count(J) ||
                    (OI == getLoadStorePointerOperand(J) &&
                     isUniformDecision(J, VF));
           })) {
@@ -4578,318 +4621,6 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
   Uniforms[VF].insert(Worklist.begin(), Worklist.end());
 }
 
-void InterleavedAccessInfo::collectConstStrideAccesses(
-    MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
-    const ValueToValueMap &Strides) {
-  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-
-  // Since it's desired that the load/store instructions be maintained in
-  // "program order" for the interleaved access analysis, we have to visit the
-  // blocks in the loop in reverse postorder (i.e., in a topological order).
-  // Such an ordering will ensure that any load/store that may be executed
-  // before a second load/store will precede the second load/store in
-  // AccessStrideInfo.
-  LoopBlocksDFS DFS(TheLoop);
-  DFS.perform(LI);
-  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
-    for (auto &I : *BB) {
-      auto *LI = dyn_cast<LoadInst>(&I);
-      auto *SI = dyn_cast<StoreInst>(&I);
-      if (!LI && !SI)
-        continue;
-
-      Value *Ptr = getLoadStorePointerOperand(&I);
-      // We don't check wrapping here because we don't know yet if Ptr will be
-      // part of a full group or a group with gaps. Checking wrapping for all
-      // pointers (even those that end up in groups with no gaps) will be overly
-      // conservative. For full groups, wrapping should be ok since if we would
-      // wrap around the address space we would do a memory access at nullptr
-      // even without the transformation. The wrapping checks are therefore
-      // deferred until after we've formed the interleaved groups.
-      int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
-                                    /*Assume=*/true, /*ShouldCheckWrap=*/false);
-
-      const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-      PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
-      uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
-
-      // An alignment of 0 means target ABI alignment.
-      unsigned Align = getMemInstAlignment(&I);
-      if (!Align)
-        Align = DL.getABITypeAlignment(PtrTy->getElementType());
-
-      AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
-    }
-}
-
-// Analyze interleaved accesses and collect them into interleaved load and
-// store groups.
-//
-// When generating code for an interleaved load group, we effectively hoist all
-// loads in the group to the location of the first load in program order. When
-// generating code for an interleaved store group, we sink all stores to the
-// location of the last store. This code motion can change the order of load
-// and store instructions and may break dependences.
-//
-// The code generation strategy mentioned above ensures that we won't violate
-// any write-after-read (WAR) dependences.
-//
-// E.g., for the WAR dependence:  a = A[i];      // (1)
-//                                A[i] = b;      // (2)
-//
-// The store group of (2) is always inserted at or below (2), and the load
-// group of (1) is always inserted at or above (1). Thus, the instructions will
-// never be reordered. All other dependences are checked to ensure the
-// correctness of the instruction reordering.
-//
-// The algorithm visits all memory accesses in the loop in bottom-up program
-// order. Program order is established by traversing the blocks in the loop in
-// reverse postorder when collecting the accesses.
-//
-// We visit the memory accesses in bottom-up order because it can simplify the
-// construction of store groups in the presence of write-after-write (WAW)
-// dependences.
-//
-// E.g., for the WAW dependence:  A[i] = a;      // (1)
-//                                A[i] = b;      // (2)
-//                                A[i + 1] = c;  // (3)
-//
-// We will first create a store group with (3) and (2). (1) can't be added to
-// this group because it and (2) are dependent. However, (1) can be grouped
-// with other accesses that may precede it in program order. Note that a
-// bottom-up order does not imply that WAW dependences should not be checked.
-void InterleavedAccessInfo::analyzeInterleaving() {
-  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
-  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
-
-  // Holds all accesses with a constant stride.
-  MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
-  collectConstStrideAccesses(AccessStrideInfo, Strides);
-
-  if (AccessStrideInfo.empty())
-    return;
-
-  // Collect the dependences in the loop.
-  collectDependences();
-
-  // Holds all interleaved store groups temporarily.
-  SmallSetVector<InterleaveGroup *, 4> StoreGroups;
-  // Holds all interleaved load groups temporarily.
-  SmallSetVector<InterleaveGroup *, 4> LoadGroups;
-
-  // Search in bottom-up program order for pairs of accesses (A and B) that can
-  // form interleaved load or store groups. In the algorithm below, access A
-  // precedes access B in program order. We initialize a group for B in the
-  // outer loop of the algorithm, and then in the inner loop, we attempt to
-  // insert each A into B's group if:
-  //
-  //  1. A and B have the same stride,
-  //  2. A and B have the same memory object size, and
-  //  3. A belongs in B's group according to its distance from B.
-  //
-  // Special care is taken to ensure group formation will not break any
-  // dependences.
-  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
-       BI != E; ++BI) {
-    Instruction *B = BI->first;
-    StrideDescriptor DesB = BI->second;
-
-    // Initialize a group for B if it has an allowable stride. Even if we don't
-    // create a group for B, we continue with the bottom-up algorithm to ensure
-    // we don't break any of B's dependences.
-    InterleaveGroup *Group = nullptr;
-    if (isStrided(DesB.Stride)) {
-      Group = getInterleaveGroup(B);
-      if (!Group) {
-        LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
-                          << '\n');
-        Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
-      }
-      if (B->mayWriteToMemory())
-        StoreGroups.insert(Group);
-      else
-        LoadGroups.insert(Group);
-    }
-
-    for (auto AI = std::next(BI); AI != E; ++AI) {
-      Instruction *A = AI->first;
-      StrideDescriptor DesA = AI->second;
-
-      // Our code motion strategy implies that we can't have dependences
-      // between accesses in an interleaved group and other accesses located
-      // between the first and last member of the group. Note that this also
-      // means that a group can't have more than one member at a given offset.
-      // The accesses in a group can have dependences with other accesses, but
-      // we must ensure we don't extend the boundaries of the group such that
-      // we encompass those dependent accesses.
-      //
-      // For example, assume we have the sequence of accesses shown below in a
-      // stride-2 loop:
-      //
-      //  (1, 2) is a group | A[i]   = a;  // (1)
-      //                    | A[i-1] = b;  // (2) |
-      //                      A[i-3] = c;  // (3)
-      //                      A[i]   = d;  // (4) | (2, 4) is not a group
-      //
-      // Because accesses (2) and (3) are dependent, we can group (2) with (1)
-      // but not with (4). If we did, the dependent access (3) would be within
-      // the boundaries of the (2, 4) group.
-      if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
-        // If a dependence exists and A is already in a group, we know that A
-        // must be a store since A precedes B and WAR dependences are allowed.
-        // Thus, A would be sunk below B. We release A's group to prevent this
-        // illegal code motion. A will then be free to form another group with
-        // instructions that precede it.
-        if (isInterleaved(A)) {
-          InterleaveGroup *StoreGroup = getInterleaveGroup(A);
-          StoreGroups.remove(StoreGroup);
-          releaseGroup(StoreGroup);
-        }
-
-        // If a dependence exists and A is not already in a group (or it was
-        // and we just released it), B might be hoisted above A (if B is a
-        // load) or another store might be sunk below A (if B is a store). In
-        // either case, we can't add additional instructions to B's group. B
-        // will only form a group with instructions that it precedes.
-        break;
-      }
-
-      // At this point, we've checked for illegal code motion. If either A or B
-      // isn't strided, there's nothing left to do.
-      if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
-        continue;
-
-      // Ignore A if it's already in a group or isn't the same kind of memory
-      // operation as B.
-      // Note that mayReadFromMemory() isn't mutually exclusive to mayWriteToMemory
-      // in the case of atomic loads. We shouldn't see those here, canVectorizeMemory()
-      // should have returned false - except for the case we asked for optimization
-      // remarks.
-      if (isInterleaved(A) || (A->mayReadFromMemory() != B->mayReadFromMemory())
-          || (A->mayWriteToMemory() != B->mayWriteToMemory()))
-        continue;
-
-      // Check rules 1 and 2. Ignore A if its stride or size is different from
-      // that of B.
-      if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
-        continue;
-
-      // Ignore A if the memory object of A and B don't belong to the same
-      // address space
-      if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B))
-        continue;
-
-      // Calculate the distance from A to B.
-      const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
-          PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
-      if (!DistToB)
-        continue;
-      int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
-
-      // Check rule 3. Ignore A if its distance to B is not a multiple of the
-      // size.
-      if (DistanceToB % static_cast<int64_t>(DesB.Size))
-        continue;
-
-      // Ignore A if either A or B is in a predicated block. Although we
-      // currently prevent group formation for predicated accesses, we may be
-      // able to relax this limitation in the future once we handle more
-      // complicated blocks.
-      if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
-        continue;
-
-      // The index of A is the index of B plus A's distance to B in multiples
-      // of the size.
-      int IndexA =
-          Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
-
-      // Try to insert A into B's group.
-      if (Group->insertMember(A, IndexA, DesA.Align)) {
-        LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
-                          << "    into the interleave group with" << *B
-                          << '\n');
-        InterleaveGroupMap[A] = Group;
-
-        // Set the first load in program order as the insert position.
-        if (A->mayReadFromMemory())
-          Group->setInsertPos(A);
-      }
-    } // Iteration over A accesses.
-  } // Iteration over B accesses.
-
-  // Remove interleaved store groups with gaps.
-  for (InterleaveGroup *Group : StoreGroups)
-    if (Group->getNumMembers() != Group->getFactor()) {
-      LLVM_DEBUG(
-          dbgs() << "LV: Invalidate candidate interleaved store group due "
-                    "to gaps.\n");
-      releaseGroup(Group);
-    }
-  // Remove interleaved groups with gaps (currently only loads) whose memory
-  // accesses may wrap around. We have to revisit the getPtrStride analysis,
-  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
-  // not check wrapping (see documentation there).
-  // FORNOW we use Assume=false;
-  // TODO: Change to Assume=true but making sure we don't exceed the threshold
-  // of runtime SCEV assumptions checks (thereby potentially failing to
-  // vectorize altogether).
-  // Additional optional optimizations:
-  // TODO: If we are peeling the loop and we know that the first pointer doesn't
-  // wrap then we can deduce that all pointers in the group don't wrap.
-  // This means that we can forcefully peel the loop in order to only have to
-  // check the first pointer for no-wrap. When we'll change to use Assume=true
-  // we'll only need at most one runtime check per interleaved group.
-  for (InterleaveGroup *Group : LoadGroups) {
-    // Case 1: A full group. Can Skip the checks; For full groups, if the wide
-    // load would wrap around the address space we would do a memory access at
-    // nullptr even without the transformation.
-    if (Group->getNumMembers() == Group->getFactor())
-      continue;
-
-    // Case 2: If first and last members of the group don't wrap this implies
-    // that all the pointers in the group don't wrap.
-    // So we check only group member 0 (which is always guaranteed to exist),
-    // and group member Factor - 1; If the latter doesn't exist we rely on
-    // peeling (if it is a non-reveresed accsess -- see Case 3).
-    Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
-    if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
-                      /*ShouldCheckWrap=*/true)) {
-      LLVM_DEBUG(
-          dbgs() << "LV: Invalidate candidate interleaved group due to "
-                    "first group member potentially pointer-wrapping.\n");
-      releaseGroup(Group);
-      continue;
-    }
-    Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
-    if (LastMember) {
-      Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
-      if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
-                        /*ShouldCheckWrap=*/true)) {
-        LLVM_DEBUG(
-            dbgs() << "LV: Invalidate candidate interleaved group due to "
-                      "last group member potentially pointer-wrapping.\n");
-        releaseGroup(Group);
-      }
-    } else {
-      // Case 3: A non-reversed interleaved load group with gaps: We need
-      // to execute at least one scalar epilogue iteration. This will ensure
-      // we don't speculatively access memory out-of-bounds. We only need
-      // to look for a member at index factor - 1, since every group must have
-      // a member at index zero.
-      if (Group->isReverse()) {
-        LLVM_DEBUG(
-            dbgs() << "LV: Invalidate candidate interleaved group due to "
-                      "a reverse access with gaps.\n");
-        releaseGroup(Group);
-        continue;
-      }
-      LLVM_DEBUG(
-          dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
-      RequiresScalarEpilogue = true;
-    }
-  }
-}
-
 Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
   if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
     // TODO: It may by useful to do since it's still likely to be dynamically
@@ -4919,39 +4650,78 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
     return None;
   }
 
+  if (!PSE.getUnionPredicate().getPredicates().empty()) {
+    ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
+              << "runtime SCEV checks needed. Enable vectorization of this "
+                 "loop with '#pragma clang loop vectorize(enable)' when "
+                 "compiling with -Os/-Oz");
+    LLVM_DEBUG(
+        dbgs()
+        << "LV: Aborting. Runtime SCEV check is required with -Os/-Oz.\n");
+    return None;
+  }
+
+  // FIXME: Avoid specializing for stride==1 instead of bailing out.
+  if (!Legal->getLAI()->getSymbolicStrides().empty()) {
+    ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
+              << "runtime stride == 1 checks needed. Enable vectorization of "
+                 "this loop with '#pragma clang loop vectorize(enable)' when "
+                 "compiling with -Os/-Oz");
+    LLVM_DEBUG(
+        dbgs()
+        << "LV: Aborting. Runtime stride check is required with -Os/-Oz.\n");
+    return None;
+  }
+
   // If we optimize the program for size, avoid creating the tail loop.
   LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
 
-  // If we don't know the precise trip count, don't try to vectorize.
-  if (TC < 2) {
-    ORE->emit(
-        createMissedAnalysis("UnknownLoopCountComplexCFG")
-        << "unable to calculate the loop count due to complex control flow");
-    LLVM_DEBUG(
-        dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
+  if (TC == 1) {
+    ORE->emit(createMissedAnalysis("SingleIterationLoop")
+              << "loop trip count is one, irrelevant for vectorization");
+    LLVM_DEBUG(dbgs() << "LV: Aborting, single iteration (non) loop.\n");
     return None;
   }
 
+  // Record that scalar epilogue is not allowed.
+  LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n");
+
+  IsScalarEpilogueAllowed = !OptForSize;
+
+  // We don't create an epilogue when optimizing for size.
+  // Invalidate interleave groups that require an epilogue if we can't mask
+  // the interleave-group.
+  if (!useMaskedInterleavedAccesses(TTI)) 
+    InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
+
   unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
 
-  if (TC % MaxVF != 0) {
-    // If the trip count that we found modulo the vectorization factor is not
-    // zero then we require a tail.
-    // FIXME: look for a smaller MaxVF that does divide TC rather than give up.
-    // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
-    //        smaller MaxVF that does not require a scalar epilog.
-
-    ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
-              << "cannot optimize for size and vectorize at the "
-                 "same time. Enable vectorization of this loop "
-                 "with '#pragma clang loop vectorize(enable)' "
-                 "when compiling with -Os/-Oz");
-    LLVM_DEBUG(
-        dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
+  if (TC > 0 && TC % MaxVF == 0) {
+    LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+    return MaxVF;
+  }
+
+  // If we don't know the precise trip count, or if the trip count that we
+  // found modulo the vectorization factor is not zero, try to fold the tail
+  // by masking.
+  // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
+  if (Legal->canFoldTailByMasking()) {
+    FoldTailByMasking = true;
+    return MaxVF;
+  }
+
+  if (TC == 0) {
+    ORE->emit(
+        createMissedAnalysis("UnknownLoopCountComplexCFG")
+        << "unable to calculate the loop count due to complex control flow");
     return None;
   }
 
-  return MaxVF;
+  ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
+            << "cannot optimize for size and vectorize at the same time. "
+               "Enable vectorization of this loop with '#pragma clang loop "
+               "vectorize(enable)' when compiling with -Os/-Oz");
+  return None;
 }
 
 unsigned
@@ -5087,11 +4857,11 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     // For each instruction in the loop.
-    for (Instruction &I : *BB) {
+    for (Instruction &I : BB->instructionsWithoutDebug()) {
       Type *T = I.getType();
 
       // Skip ignored values.
-      if (ValuesToIgnore.count(&I))
+      if (ValuesToIgnore.find(&I) != ValuesToIgnore.end())
         continue;
 
       // Only examine Loads, Stores and PHINodes.
@@ -5189,6 +4959,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
   // fit without causing spills. All of this is rounded down if necessary to be
   // a power of two. We want power of two interleave count to simplify any
   // addressing operations or alignment considerations.
+  // We also want power of two interleave counts to ensure that the induction
+  // variable of the vector loop wraps to zero, when tail is folded by masking;
+  // this currently happens when OptForSize, in which case IC is set to 1 above.
   unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
                               R.MaxLocalUsers);
 
@@ -5314,7 +5087,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   using IntervalMap = DenseMap<Instruction *, unsigned>;
 
   // Maps instruction to its index.
-  DenseMap<unsigned, Instruction *> IdxToInstr;
+  SmallVector<Instruction *, 64> IdxToInstr;
   // Marks the end of each interval.
   IntervalMap EndPoint;
   // Saves the list of instruction indices that are used in the loop.
@@ -5323,10 +5096,9 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   // defined outside the loop, such as arguments and constants.
   SmallPtrSet<Value *, 8> LoopInvariants;
 
-  unsigned Index = 0;
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
-    for (Instruction &I : *BB) {
-      IdxToInstr[Index++] = &I;
+    for (Instruction &I : BB->instructionsWithoutDebug()) {
+      IdxToInstr.push_back(&I);
 
       // Save the end location of each USE.
       for (Value *U : I.operands()) {
@@ -5343,7 +5115,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
         }
 
         // Overwrite previous end points.
-        EndPoint[Instr] = Index;
+        EndPoint[Instr] = IdxToInstr.size();
         Ends.insert(Instr);
       }
     }
@@ -5380,7 +5152,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
     return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
   };
 
-  for (unsigned int i = 0; i < Index; ++i) {
+  for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
     Instruction *I = IdxToInstr[i];
 
     // Remove all of the instructions that end at this location.
@@ -5389,11 +5161,11 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
       OpenIntervals.erase(ToRemove);
 
     // Ignore instructions that are never used within the loop.
-    if (!Ends.count(I))
+    if (Ends.find(I) == Ends.end())
       continue;
 
     // Skip ignored values.
-    if (ValuesToIgnore.count(I))
+    if (ValuesToIgnore.find(I) != ValuesToIgnore.end())
       continue;
 
     // For each VF find the maximum usage of registers.
@@ -5407,7 +5179,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
       unsigned RegUsage = 0;
       for (auto Inst : OpenIntervals) {
         // Skip ignored values for VF > 1.
-        if (VecValuesToIgnore.count(Inst) ||
+        if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
             isScalarAfterVectorization(Inst, VFs[j]))
           continue;
         RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
@@ -5453,8 +5225,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
   // from moving "masked load/store" check from legality to cost model.
   // Masked Load/Gather emulation was previously never allowed.
   // Limited number of Masked Store/Scatter emulation was allowed.
-  assert(isScalarWithPredication(I) &&
-         "Expecting a scalar emulated instruction");
+  assert(isPredicatedInst(I) && "Expecting a scalar emulated instruction");
   return isa<LoadInst>(I) ||
          (isa<StoreInst>(I) &&
           NumPredStores > NumberOfStoresToPredicate);
@@ -5465,7 +5236,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
   // instructions to scalarize, there's nothing to do. Collection may already
   // have occurred if we have a user-selected VF and are now computing the
   // expected cost for interleaving.
-  if (VF < 2 || InstsToScalarize.count(VF))
+  if (VF < 2 || InstsToScalarize.find(VF) != InstsToScalarize.end())
     return;
 
   // Initialize a mapping for VF in InstsToScalalarize. If we find that it's
@@ -5477,7 +5248,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
   // determine if it would be better to not if-convert the blocks they are in.
   // If so, we also record the instructions to scalarize.
   for (BasicBlock *BB : TheLoop->blocks()) {
-    if (!Legal->blockNeedsPredication(BB))
+    if (!blockNeedsPredication(BB))
       continue;
     for (Instruction &I : *BB)
       if (isScalarWithPredication(&I)) {
@@ -5560,7 +5331,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
     Instruction *I = Worklist.pop_back_val();
 
     // If we've already analyzed the instruction, there's nothing to do.
-    if (ScalarCosts.count(I))
+    if (ScalarCosts.find(I) != ScalarCosts.end())
       continue;
 
     // Compute the cost of the vector instruction. Note that this cost already
@@ -5619,8 +5390,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
     // For each instruction in the old loop.
     for (Instruction &I : BB->instructionsWithoutDebug()) {
       // Skip ignored values.
-      if (ValuesToIgnore.count(&I) ||
-          (VF > 1 && VecValuesToIgnore.count(&I)))
+      if (ValuesToIgnore.find(&I) != ValuesToIgnore.end() ||
+          (VF > 1 && VecValuesToIgnore.find(&I) != VecValuesToIgnore.end()))
         continue;
 
       VectorizationCostTy C = getInstructionCost(&I, VF);
@@ -5642,7 +5413,7 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
     // unconditionally executed. For the scalar case, we may not always execute
     // the predicated block. Thus, scale the block's cost by the probability of
     // executing it.
-    if (VF == 1 && Legal->blockNeedsPredication(BB))
+    if (VF == 1 && blockNeedsPredication(BB))
       BlockCost.first /= getReciprocalPredBlockProb();
 
     Cost.first += BlockCost.first;
@@ -5689,11 +5460,12 @@ static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
 
 unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
                                                                  unsigned VF) {
+  assert(VF > 1 && "Scalarization cost of instruction implies vectorization.");
   Type *ValTy = getMemInstValueType(I);
   auto SE = PSE.getSE();
 
-  unsigned Alignment = getMemInstAlignment(I);
-  unsigned AS = getMemInstAddressSpace(I);
+  unsigned Alignment = getLoadStoreAlignment(I);
+  unsigned AS = getLoadStoreAddressSpace(I);
   Value *Ptr = getLoadStorePointerOperand(I);
   Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
 
@@ -5704,9 +5476,11 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
   // Get the cost of the scalar memory instruction and address computation.
   unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
 
+  // Don't pass *I here, since it is scalar but will actually be part of a
+  // vectorized loop where the user of it is a vectorized instruction.
   Cost += VF *
           TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
-                              AS, I);
+                              AS);
 
   // Get the overhead of the extractelement and insertelement instructions
   // we might create due to scalarization.
@@ -5715,7 +5489,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
   // If we have a predicated store, it may not be executed for each vector
   // lane. Scale the cost by the probability of executing the predicated
   // block.
-  if (isScalarWithPredication(I)) {
+  if (isPredicatedInst(I)) {
     Cost /= getReciprocalPredBlockProb();
 
     if (useEmulatedMaskMemRefHack(I))
@@ -5731,9 +5505,9 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
                                                              unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
-  unsigned Alignment = getMemInstAlignment(I);
+  unsigned Alignment = getLoadStoreAlignment(I);
   Value *Ptr = getLoadStorePointerOperand(I);
-  unsigned AS = getMemInstAddressSpace(I);
+  unsigned AS = getLoadStoreAddressSpace(I);
   int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
 
   assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -5752,22 +5526,30 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
 
 unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
                                                          unsigned VF) {
-  LoadInst *LI = cast<LoadInst>(I);
-  Type *ValTy = LI->getType();
+  Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
-  unsigned Alignment = LI->getAlignment();
-  unsigned AS = LI->getPointerAddressSpace();
+  unsigned Alignment = getLoadStoreAlignment(I);
+  unsigned AS = getLoadStoreAddressSpace(I);
+  if (isa<LoadInst>(I)) {
+    return TTI.getAddressComputationCost(ValTy) +
+           TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
+           TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
+  }
+  StoreInst *SI = cast<StoreInst>(I);
 
+  bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
   return TTI.getAddressComputationCost(ValTy) +
-         TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
-         TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
+         TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) +
+         (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(
+                                               Instruction::ExtractElement,
+                                               VectorTy, VF - 1));
 }
 
 unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
                                                           unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
-  unsigned Alignment = getMemInstAlignment(I);
+  unsigned Alignment = getLoadStoreAlignment(I);
   Value *Ptr = getLoadStorePointerOperand(I);
 
   return TTI.getAddressComputationCost(VectorTy) +
@@ -5779,7 +5561,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
                                                             unsigned VF) {
   Type *ValTy = getMemInstValueType(I);
   Type *VectorTy = ToVectorTy(ValTy, VF);
-  unsigned AS = getMemInstAddressSpace(I);
+  unsigned AS = getLoadStoreAddressSpace(I);
 
   auto Group = getInterleavedAccessGroup(I);
   assert(Group && "Fail to get an interleaved access group.");
@@ -5797,13 +5579,19 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
   }
 
   // Calculate the cost of the whole interleaved group.
-  unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy,
-                                                 Group->getFactor(), Indices,
-                                                 Group->getAlignment(), AS);
-
-  if (Group->isReverse())
+  bool UseMaskForGaps = 
+      Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed;
+  unsigned Cost = TTI.getInterleavedMemoryOpCost(
+      I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
+      Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps);
+
+  if (Group->isReverse()) {
+    // TODO: Add support for reversed masked interleaved access.
+    assert(!Legal->isMaskRequired(I) &&
+           "Reverse masked interleaved access not supported.");
     Cost += Group->getNumMembers() *
             TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
+  }
   return Cost;
 }
 
@@ -5813,8 +5601,8 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
   // moment.
   if (VF == 1) {
     Type *ValTy = getMemInstValueType(I);
-    unsigned Alignment = getMemInstAlignment(I);
-    unsigned AS = getMemInstAddressSpace(I);
+    unsigned Alignment = getLoadStoreAlignment(I);
+    unsigned AS = getLoadStoreAddressSpace(I);
 
     return TTI.getAddressComputationCost(ValTy) +
            TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
@@ -5833,9 +5621,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     return VectorizationCostTy(InstsToScalarize[VF][I], false);
 
   // Forced scalars do not have any scalarization overhead.
-  if (VF > 1 && ForcedScalars.count(VF) &&
-      ForcedScalars.find(VF)->second.count(I))
-    return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
+  auto ForcedScalar = ForcedScalars.find(VF);
+  if (VF > 1 && ForcedScalar != ForcedScalars.end()) {
+    auto InstSet = ForcedScalar->second;
+    if (InstSet.find(I) != InstSet.end())
+      return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false);
+  }
 
   Type *VectorTy;
   unsigned C = getInstructionCost(I, VF, VectorTy);
@@ -5856,10 +5647,22 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
       if (!Ptr)
         continue;
 
+      // TODO: We should generate better code and update the cost model for
+      // predicated uniform stores. Today they are treated as any other
+      // predicated store (see added test cases in
+      // invariant-store-vectorization.ll).
       if (isa<StoreInst>(&I) && isScalarWithPredication(&I))
         NumPredStores++;
-      if (isa<LoadInst>(&I) && Legal->isUniform(Ptr)) {
-        // Scalar load + broadcast
+
+      if (Legal->isUniform(Ptr) &&
+          // Conditional loads and stores should be scalarized and predicated.
+          // isScalarWithPredication cannot be used here since masked
+          // gather/scatters are not considered scalar with predication.
+          !Legal->blockNeedsPredication(I.getParent())) {
+        // TODO: Avoid replicating loads and stores instead of
+        // relying on instcombine to remove them.
+        // Load: Scalar load + broadcast
+        // Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
         unsigned Cost = getUniformMemOpCost(&I, VF);
         setWideningDecision(&I, VF, CM_Scalarize, Cost);
         continue;
@@ -5890,7 +5693,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
           continue;
 
         NumAccesses = Group->getNumMembers();
-        InterleaveCost = getInterleaveGroupCost(&I, VF);
+        if (interleavedAccessCanBeWidened(&I, VF))
+          InterleaveCost = getInterleaveGroupCost(&I, VF);
       }
 
       unsigned GatherScatterCost =
@@ -6008,8 +5812,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
     bool ScalarPredicatedBB = false;
     BranchInst *BI = cast<BranchInst>(I);
     if (VF > 1 && BI->isConditional() &&
-        (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
-         PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
+        (PredicatedBBsAfterVectorization.find(BI->getSuccessor(0)) !=
+             PredicatedBBsAfterVectorization.end() ||
+         PredicatedBBsAfterVectorization.find(BI->getSuccessor(1)) !=
+             PredicatedBBsAfterVectorization.end()))
       ScalarPredicatedBB = true;
 
     if (ScalarPredicatedBB) {
@@ -6032,9 +5838,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
     auto *Phi = cast<PHINode>(I);
 
     // First-order recurrences are replaced by vector shuffles inside the loop.
+    // NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
     if (VF > 1 && Legal->isFirstOrderRecurrence(Phi))
       return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
-                                VectorTy, VF - 1, VectorTy);
+                                VectorTy, VF - 1, VectorType::get(RetTy, 1));
 
     // Phi nodes in non-header blocks (not inductions, reductions, etc.) are
     // converted into select instructions. We require N - 1 selects per phi
@@ -6096,38 +5903,18 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
       return 0;
     // Certain instructions can be cheaper to vectorize if they have a constant
     // second vector operand. One example of this are shifts on x86.
-    TargetTransformInfo::OperandValueKind Op1VK =
-        TargetTransformInfo::OK_AnyValue;
-    TargetTransformInfo::OperandValueKind Op2VK =
-        TargetTransformInfo::OK_AnyValue;
-    TargetTransformInfo::OperandValueProperties Op1VP =
-        TargetTransformInfo::OP_None;
-    TargetTransformInfo::OperandValueProperties Op2VP =
-        TargetTransformInfo::OP_None;
     Value *Op2 = I->getOperand(1);
-
-    // Check for a splat or for a non uniform vector of constants.
-    if (isa<ConstantInt>(Op2)) {
-      ConstantInt *CInt = cast<ConstantInt>(Op2);
-      if (CInt && CInt->getValue().isPowerOf2())
-        Op2VP = TargetTransformInfo::OP_PowerOf2;
-      Op2VK = TargetTransformInfo::OK_UniformConstantValue;
-    } else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
-      Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
-      Constant *SplatValue = cast<Constant>(Op2)->getSplatValue();
-      if (SplatValue) {
-        ConstantInt *CInt = dyn_cast<ConstantInt>(SplatValue);
-        if (CInt && CInt->getValue().isPowerOf2())
-          Op2VP = TargetTransformInfo::OP_PowerOf2;
-        Op2VK = TargetTransformInfo::OK_UniformConstantValue;
-      }
-    } else if (Legal->isUniform(Op2)) {
+    TargetTransformInfo::OperandValueProperties Op2VP;
+    TargetTransformInfo::OperandValueKind Op2VK =
+        TTI.getOperandInfo(Op2, Op2VP);
+    if (Op2VK == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
       Op2VK = TargetTransformInfo::OK_UniformValue;
-    }
+
     SmallVector<const Value *, 4> Operands(I->operand_values());
     unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
-    return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
-                                          Op2VK, Op1VP, Op2VP, Operands);
+    return N * TTI.getArithmeticInstrCost(
+                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+                   Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
   }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
@@ -6244,8 +6031,9 @@ INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
 
-Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
-  return new LoopVectorize(NoUnrolling, AlwaysVectorize);
+Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
+                              bool VectorizeOnlyWhenForced) {
+  return new LoopVectorize(InterleaveOnlyWhenForced, VectorizeOnlyWhenForced);
 }
 
 } // end namespace llvm
@@ -6323,6 +6111,16 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
   if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
     return NoVectorization;
 
+  // Invalidate interleave groups if all blocks of loop will be predicated.
+  if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
+      !useMaskedInterleavedAccesses(*TTI)) {
+    LLVM_DEBUG(
+        dbgs()
+        << "LV: Invalidate all interleaved groups due to fold-tail by masking "
+           "which requires masked-interleaved support.\n");
+    CM.InterleaveInfo.reset();
+  }
+
   if (UserVF) {
     LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
     assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
@@ -6379,6 +6177,7 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
                          DT,     ILV.Builder, ILV.VectorLoopValueMap,
                          &ILV,   CallbackILV};
   State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
+  State.TripCount = ILV.getOrCreateTripCount(nullptr);
 
   //===------------------------------------------------===//
   //
@@ -6415,7 +6214,8 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
     PHINode *Ind = Induction.first;
     auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
     if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
-          return U == Ind || DeadInstructions.count(cast<Instruction>(U));
+          return U == Ind || DeadInstructions.find(cast<Instruction>(U)) !=
+                                 DeadInstructions.end();
         }))
       DeadInstructions.insert(IndUpdate);
 
@@ -6558,9 +6358,17 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
   // load/store/gather/scatter. Initialize BlockMask to no-mask.
   VPValue *BlockMask = nullptr;
 
-  // Loop incoming mask is all-one.
-  if (OrigLoop->getHeader() == BB)
+  if (OrigLoop->getHeader() == BB) {
+    if (!CM.blockNeedsPredication(BB))
+      return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
+
+    // Introduce the early-exit compare IV <= BTC to form header block mask.
+    // This is used instead of IV < TC because TC may wrap, unlike BTC.
+    VPValue *IV = Plan->getVPValue(Legal->getPrimaryInduction());
+    VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
+    BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
     return BlockMaskCache[BB] = BlockMask;
+  }
 
   // This is the block mask. We OR all incoming edges.
   for (auto *Predecessor : predecessors(BB)) {
@@ -6580,8 +6388,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
 }
 
 VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
-                                                           VFRange &Range) {
-  const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I);
+                                                           VFRange &Range,
+                                                           VPlanPtr &Plan) {
+  const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
   if (!IG)
     return nullptr;
 
@@ -6602,7 +6411,11 @@ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
   assert(I == IG->getInsertPos() &&
          "Generating a recipe for an adjunct member of an interleave group");
 
-  return new VPInterleaveRecipe(IG);
+  VPValue *Mask = nullptr;
+  if (Legal->isMaskRequired(I))
+    Mask = createBlockInMask(I->getParent(), Plan);
+
+  return new VPInterleaveRecipe(IG, Mask);
 }
 
 VPWidenMemoryInstructionRecipe *
@@ -6695,7 +6508,11 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(Instruction *I, VPlanPtr &Plan) {
 
 bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
                                  VFRange &Range) {
-  if (CM.isScalarWithPredication(I))
+
+  bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
+      [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
+
+  if (IsPredicated)
     return false;
 
   auto IsVectorizableOpcode = [](unsigned Opcode) {
@@ -6802,7 +6619,9 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
       [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); },
       Range);
 
-  bool IsPredicated = CM.isScalarWithPredication(I);
+  bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
+      [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
+
   auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
 
   // Find if I uses a predicated instruction. If so, it will use its scalar
@@ -6864,7 +6683,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
   VPRecipeBase *Recipe = nullptr;
   // Check if Instr should belong to an interleave memory recipe, or already
   // does. In the latter case Instr is irrelevant.
-  if ((Recipe = tryToInterleaveMemory(Instr, Range))) {
+  if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
     VPBB->appendRecipe(Recipe);
     return true;
   }
@@ -6915,6 +6734,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
       NeedDef.insert(Branch->getCondition());
   }
 
+  // If the tail is to be folded by masking, the primary induction variable
+  // needs to be represented in VPlan for it to model early-exit masking.
+  if (CM.foldTailByMasking())
+    NeedDef.insert(Legal->getPrimaryInduction());
+
   // Collect instructions from the original loop that will become trivially dead
   // in the vectorized loop. We don't need to vectorize these instructions. For
   // example, original induction update instructions can become dead because we
@@ -6976,18 +6800,21 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
 
       // First filter out irrelevant instructions, to ensure no recipes are
       // built for them.
-      if (isa<BranchInst>(Instr) || DeadInstructions.count(Instr))
+      if (isa<BranchInst>(Instr) ||
+          DeadInstructions.find(Instr) != DeadInstructions.end())
         continue;
 
       // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
       // member of the IG, do not construct any Recipe for it.
-      const InterleaveGroup *IG = CM.getInterleavedAccessGroup(Instr);
+      const InterleaveGroup<Instruction> *IG =
+          CM.getInterleavedAccessGroup(Instr);
       if (IG && Instr != IG->getInsertPos() &&
           Range.Start >= 2 && // Query is illegal for VF == 1
           CM.getWideningDecision(Instr, Range.Start) ==
               LoopVectorizationCostModel::CM_Interleave) {
-        if (SinkAfterInverse.count(Instr))
-          Ingredients.push_back(SinkAfterInverse.find(Instr)->second);
+        auto SinkCandidate = SinkAfterInverse.find(Instr);
+        if (SinkCandidate != SinkAfterInverse.end())
+          Ingredients.push_back(SinkCandidate->second);
         continue;
       }
 
@@ -7070,6 +6897,13 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
   HCFGBuilder.buildHierarchicalCFG();
 
+  SmallPtrSet<Instruction *, 1> DeadInstructions;
+  VPlanHCFGTransforms::VPInstructionsToVPRecipes(
+      Plan, Legal->getInductionVars(), DeadInstructions);
+
+  for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
+    Plan->addVF(VF);
+
   return Plan;
 }
 
@@ -7082,6 +6916,10 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
   O << " +\n"
     << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
   IG->getInsertPos()->printAsOperand(O, false);
+  if (User) {
+    O << ", ";
+    User->getOperand(0)->printAsOperand(O);
+  }
   O << "\\l\"";
   for (unsigned i = 0; i < IG->getFactor(); ++i)
     if (Instruction *I = IG->getMember(i))
@@ -7144,7 +6982,15 @@ void VPBlendRecipe::execute(VPTransformState &State) {
 
 void VPInterleaveRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Interleave group being replicated.");
-  State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
+  if (!User)
+    return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
+
+  // Last (and currently only) operand is a mask.
+  InnerLoopVectorizer::VectorParts MaskValues(State.UF);
+  VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
+  for (unsigned Part = 0; Part < State.UF; ++Part)
+    MaskValues[Part] = State.get(Mask, Part);
+  State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues);
 }
 
 void VPReplicateRecipe::execute(VPTransformState &State) {
@@ -7271,11 +7117,26 @@ static bool processLoopInVPlanNativePath(
       Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
 
   // Plan how to best vectorize, return the best VF and its cost.
-  LVP.planInVPlanNativePath(OptForSize, UserVF);
+  VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
 
-  // Returning false. We are currently not generating vector code in the VPlan
-  // native path.
-  return false;
+  // If we are stress testing VPlan builds, do not attempt to generate vector
+  // code.
+  if (VPlanBuildStressTest)
+    return false;
+
+  LVP.setBestPlan(VF.Width, 1);
+
+  InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+                         &CM);
+  LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
+                    << L->getHeader()->getParent()->getName() << "\"\n");
+  LVP.executePlan(LB, DT);
+
+  // Mark the loop as already vectorized to avoid vectorizing again.
+  Hints.setAlreadyVectorized();
+
+  LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent()));
+  return true;
 }
 
 bool LoopVectorizePass::processLoop(Loop *L) {
@@ -7290,7 +7151,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                     << L->getHeader()->getParent()->getName() << "\" from "
                     << DebugLocStr << "\n");
 
-  LoopVectorizeHints Hints(L, DisableUnrolling, *ORE);
+  LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE);
 
   LLVM_DEBUG(
       dbgs() << "LV: Loop hints:"
@@ -7314,7 +7175,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // less verbose reporting vectorized loops and unvectorized loops that may
   // benefit from vectorization, respectively.
 
-  if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
+  if (!Hints.allowVectorization(F, L, VectorizeOnlyWhenForced)) {
     LLVM_DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
     return false;
   }
@@ -7327,7 +7188,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                                 &Requirements, &Hints, DB, AC);
   if (!LVL.canVectorize(EnableVPlanNativePath)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7400,7 +7261,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(),
                                      "NoImplicitFloat", L)
               << "loop not vectorized due to NoImplicitFloat attribute");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7415,7 +7276,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     ORE->emit(
         createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
         << "loop not vectorized due to unsafe FP support.");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7428,7 +7289,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
   // Analyze interleaved memory accesses.
   if (UseInterleaved) {
-    IAI.analyzeInterleaving();
+    IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI));
   }
 
   // Use the cost model.
@@ -7457,7 +7318,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   if (Requirements.doesNotMeet(F, L, Hints)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
                          "requirements.\n");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7534,6 +7395,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   LVP.setBestPlan(VF.Width, IC);
 
   using namespace ore;
+  bool DisableRuntimeUnroll = false;
+  MDNode *OrigLoopID = L->getLoopID();
 
   if (!VectorizeLoop) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
@@ -7560,7 +7423,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     // no runtime checks about strides and memory. A scalar loop that is
     // rarely used is not worth unrolling.
     if (!LB.areSafetyChecksAdded())
-      AddRuntimeUnrollDisableMetaData(L);
+      DisableRuntimeUnroll = true;
 
     // Report the vectorization decision.
     ORE->emit([&]() {
@@ -7572,8 +7435,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     });
   }
 
-  // Mark the loop as already vectorized to avoid vectorizing again.
-  Hints.setAlreadyVectorized();
+  Optional<MDNode *> RemainderLoopID =
+      makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
+                                      LLVMLoopVectorizeFollowupEpilogue});
+  if (RemainderLoopID.hasValue()) {
+    L->setLoopID(RemainderLoopID.getValue());
+  } else {
+    if (DisableRuntimeUnroll)
+      AddRuntimeUnrollDisableMetaData(L);
+
+    // Mark the loop as already vectorized to avoid vectorizing again.
+    Hints.setAlreadyVectorized();
+  }
 
   LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent()));
   return true;
@@ -7666,8 +7539,15 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
     if (!Changed)
       return PreservedAnalyses::all();
     PreservedAnalyses PA;
-    PA.preserve<LoopAnalysis>();
-    PA.preserve<DominatorTreeAnalysis>();
+
+    // We currently do not preserve loopinfo/dominator analyses with outer loop
+    // vectorization. Until this is addressed, mark these analyses as preserved
+    // only for non-VPlan-native path.
+    // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
+    if (!EnableVPlanNativePath) {
+      PA.preserve<LoopAnalysis>();
+      PA.preserve<DominatorTreeAnalysis>();
+    }
     PA.preserve<BasicAA>();
     PA.preserve<GlobalsAA>();
     return PA;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 32df6d581577..2e856a7e6802 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1536,12 +1536,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // Check for terminator values (e.g. invoke).
       for (unsigned j = 0; j < VL.size(); ++j)
         for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
-          TerminatorInst *Term = dyn_cast<TerminatorInst>(
-              cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
-          if (Term) {
-            LLVM_DEBUG(
-                dbgs()
-                << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+          Instruction *Term = dyn_cast<Instruction>(
+              cast<PHINode>(VL[j])->getIncomingValueForBlock(
+                  PH->getIncomingBlock(i)));
+          if (Term && Term->isTerminator()) {
+            LLVM_DEBUG(dbgs()
+                       << "SLP: Need to swizzle PHINodes (terminator use).\n");
             BS.cancelScheduling(VL, VL0);
             newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
             return;
@@ -2164,7 +2164,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
                 // extractelement/ext pair.
                 DeadCost -= TTI->getExtractWithExtendCost(
                     Ext->getOpcode(), Ext->getType(), VecTy, i);
-                // Add back the cost of s|zext which is subtracted seperately.
+                // Add back the cost of s|zext which is subtracted separately.
                 DeadCost += TTI->getCastInstrCost(
                     Ext->getOpcode(), Ext->getType(), E->getType(), Ext);
                 continue;
@@ -2536,13 +2536,13 @@ int BoUpSLP::getTreeCost() {
     // uses. However, we should not compute the cost of duplicate sequences.
     // For example, if we have a build vector (i.e., insertelement sequence)
     // that is used by more than one vector instruction, we only need to
-    // compute the cost of the insertelement instructions once. The redundent
+    // compute the cost of the insertelement instructions once. The redundant
     // instructions will be eliminated by CSE.
     //
     // We should consider not creating duplicate tree entries for gather
     // sequences, and instead add additional edges to the tree representing
     // their uses. Since such an approach results in fewer total entries,
-    // existing heuristics based on tree size may yeild different results.
+    // existing heuristics based on tree size may yield different results.
     //
     if (TE.NeedToGather &&
         std::any_of(std::next(VectorizableTree.begin(), I + 1),
@@ -3643,6 +3643,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
       auto &Locs = ExternallyUsedValues[Scalar];
       ExternallyUsedValues.insert({Ex, Locs});
       ExternallyUsedValues.erase(Scalar);
+      // Required to update internally referenced instructions.
+      Scalar->replaceAllUsesWith(Ex);
       continue;
     }
 
@@ -3652,7 +3654,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
       if (PHINode *PH = dyn_cast<PHINode>(User)) {
         for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
           if (PH->getIncomingValue(i) == Scalar) {
-            TerminatorInst *IncomingTerminator =
+            Instruction *IncomingTerminator =
                 PH->getIncomingBlock(i)->getTerminator();
             if (isa<CatchSwitchInst>(IncomingTerminator)) {
               Builder.SetInsertPoint(VecI->getParent(),
@@ -3960,7 +3962,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
     ScheduleEnd = I->getNextNode();
     if (isOneOf(S, I) != I)
       CheckSheduleForI(I);
-    assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+    assert(ScheduleEnd && "tried to vectorize a terminator?");
     LLVM_DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
     return true;
   }
@@ -3996,7 +3998,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
         ScheduleEnd = I->getNextNode();
         if (isOneOf(S, I) != I)
           CheckSheduleForI(I);
-        assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+        assert(ScheduleEnd && "tried to vectorize a terminator?");
         LLVM_DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I
                           << "\n");
         return true;
@@ -4267,7 +4269,7 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
     Worklist.push_back(I);
 
   // Traverse the expression tree in bottom-up order looking for loads. If we
-  // encounter an instruciton we don't yet handle, we give up.
+  // encounter an instruction we don't yet handle, we give up.
   auto MaxWidth = 0u;
   auto FoundUnknownInst = false;
   while (!Worklist.empty() && !FoundUnknownInst) {
@@ -4840,7 +4842,7 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
         continue;
       if (GEP->getType()->isVectorTy())
         continue;
-      GEPs[GetUnderlyingObject(GEP->getPointerOperand(), *DL)].push_back(GEP);
+      GEPs[GEP->getPointerOperand()].push_back(GEP);
     }
   }
 }
@@ -5126,9 +5128,12 @@ class HorizontalReduction {
     /// Checks if the reduction operation can be vectorized.
     bool isVectorizable() const {
       return LHS && RHS &&
-             // We currently only support adds && min/max reductions.
+             // We currently only support add/mul/logical && min/max reductions.
              ((Kind == RK_Arithmetic &&
-               (Opcode == Instruction::Add || Opcode == Instruction::FAdd)) ||
+               (Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
+                Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
+                Opcode == Instruction::And || Opcode == Instruction::Or ||
+                Opcode == Instruction::Xor)) ||
               ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
                (Kind == RK_Min || Kind == RK_Max)) ||
               (Opcode == Instruction::ICmp &&
@@ -5450,7 +5455,7 @@ class HorizontalReduction {
     }
   };
 
-  Instruction *ReductionRoot = nullptr;
+  WeakTrackingVH ReductionRoot;
 
   /// The operation data of the reduction operation.
   OperationData ReductionData;
@@ -5735,7 +5740,7 @@ public:
     unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
 
     Value *VectorizedTree = nullptr;
-    IRBuilder<> Builder(ReductionRoot);
+    IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
     FastMathFlags Unsafe;
     Unsafe.setFast();
     Builder.setFastMathFlags(Unsafe);
@@ -5744,8 +5749,13 @@ public:
     BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
     // The same extra argument may be used several time, so log each attempt
     // to use it.
-    for (auto &Pair : ExtraArgs)
+    for (auto &Pair : ExtraArgs) {
+      assert(Pair.first && "DebugLoc must be set.");
       ExternallyUsedValues[Pair.second].push_back(Pair.first);
+    }
+    // The reduction root is used as the insertion point for new instructions,
+    // so set it as externally used to prevent it from being deleted.
+    ExternallyUsedValues[ReductionRoot];
     SmallVector<Value *, 16> IgnoreList;
     for (auto &V : ReductionOps)
       IgnoreList.append(V.begin(), V.end());
@@ -5797,6 +5807,7 @@ public:
       Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
 
       // Emit a reduction.
+      Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
       Value *ReducedSubTree =
           emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
       if (VectorizedTree) {
@@ -5823,8 +5834,6 @@ public:
         VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
       }
       for (auto &Pair : ExternallyUsedValues) {
-        assert(!Pair.second.empty() &&
-               "At least one DebugLoc must be inserted");
         // Add each externally used value to the final reduction.
         for (auto *I : Pair.second) {
           Builder.SetCurrentDebugLocation(I->getDebugLoc());
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index f43a8bb123b1..15d38ac9c84c 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -69,7 +69,8 @@ public:
   /// \return value is <true, nullptr>, as it is handled by another recipe.
   /// \p Range.End may be decreased to ensure same decision from \p Range.Start
   /// to \p Range.End.
-  VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range);
+  VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
+                                            VPlanPtr &Plan);
 
   /// Check if \I is a memory instruction to be widened for \p Range.Start and
   /// potentially masked. Such instructions are handled by a recipe that takes
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0780e70809d0..05a5400beb4e 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -44,6 +44,7 @@
 #include <vector>
 
 using namespace llvm;
+extern cl::opt<bool> EnableVPlanNativePath;
 
 #define DEBUG_TYPE "vplan"
 
@@ -124,6 +125,20 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
     auto &PredVPSuccessors = PredVPBB->getSuccessors();
     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+
+    // In outer loop vectorization scenario, the predecessor BBlock may not yet
+    // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
+    // vectorization. We do not encounter this case in inner loop vectorization
+    // as we start out by building a loop skeleton with the vector loop header
+    // and latch blocks. As a result, we never enter this function for the
+    // header block in the non VPlan-native path.
+    if (!PredBB) {
+      assert(EnableVPlanNativePath &&
+             "Unexpected null predecessor in non VPlan-native path");
+      CFG.VPBBsToFix.push_back(PredVPBB);
+      continue;
+    }
+
     assert(PredBB && "Predecessor basic-block not found building successor.");
     auto *PredBBTerminator = PredBB->getTerminator();
     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
@@ -185,6 +200,31 @@ void VPBasicBlock::execute(VPTransformState *State) {
   for (VPRecipeBase &Recipe : Recipes)
     Recipe.execute(*State);
 
+  VPValue *CBV;
+  if (EnableVPlanNativePath && (CBV = getCondBit())) {
+    Value *IRCBV = CBV->getUnderlyingValue();
+    assert(IRCBV && "Unexpected null underlying value for condition bit");
+
+    // Condition bit value in a VPBasicBlock is used as the branch selector. In
+    // the VPlan-native path case, since all branches are uniform we generate a
+    // branch instruction using the condition value from vector lane 0 and dummy
+    // successors. The successors are fixed later when the successor blocks are
+    // visited.
+    Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0);
+    NewCond = State->Builder.CreateExtractElement(NewCond,
+                                                  State->Builder.getInt32(0));
+
+    // Replace the temporary unreachable terminator with the new conditional
+    // branch.
+    auto *CurrentTerminator = NewBB->getTerminator();
+    assert(isa<UnreachableInst>(CurrentTerminator) &&
+           "Expected to replace unreachable terminator with conditional "
+           "branch.");
+    auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
+    CondBr->setSuccessor(0, nullptr);
+    ReplaceInstWithInst(CurrentTerminator, CondBr);
+  }
+
   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
 }
 
@@ -194,6 +234,20 @@ void VPRegionBlock::execute(VPTransformState *State) {
   if (!isReplicator()) {
     // Visit the VPBlocks connected to "this", starting from it.
     for (VPBlockBase *Block : RPOT) {
+      if (EnableVPlanNativePath) {
+        // The inner loop vectorization path does not represent loop preheader
+        // and exit blocks as part of the VPlan. In the VPlan-native path, skip
+        // vectorizing loop preheader block. In future, we may replace this
+        // check with the check for loop preheader.
+        if (Block->getNumPredecessors() == 0)
+          continue;
+
+        // Skip vectorizing loop exit block. In future, we may replace this
+        // check with the check for loop exit.
+        if (Block->getNumSuccessors() == 0)
+          continue;
+      }
+
       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
       Block->execute(State);
     }
@@ -249,6 +303,13 @@ void VPInstruction::generateInstruction(VPTransformState &State,
     State.set(this, V, Part);
     break;
   }
+  case VPInstruction::ICmpULE: {
+    Value *IV = State.get(getOperand(0), Part);
+    Value *TC = State.get(getOperand(1), Part);
+    Value *V = Builder.CreateICmpULE(IV, TC);
+    State.set(this, V, Part);
+    break;
+  }
   default:
     llvm_unreachable("Unsupported opcode for instruction");
   }
@@ -274,6 +335,15 @@ void VPInstruction::print(raw_ostream &O) const {
   case VPInstruction::Not:
     O << "not";
     break;
+  case VPInstruction::ICmpULE:
+    O << "icmp ule";
+    break;
+  case VPInstruction::SLPLoad:
+    O << "combined load";
+    break;
+  case VPInstruction::SLPStore:
+    O << "combined store";
+    break;
   default:
     O << Instruction::getOpcodeName(getOpcode());
   }
@@ -288,6 +358,15 @@ void VPInstruction::print(raw_ostream &O) const {
 /// LoopVectorBody basic-block was created for this. Introduce additional
 /// basic-blocks as needed, and fill them all.
 void VPlan::execute(VPTransformState *State) {
+  // -1. Check if the backedge taken count is needed, and if so build it.
+  if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+    Value *TC = State->TripCount;
+    IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
+    auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
+                                   "trip.count.minus.1");
+    Value2VPValue[TCMO] = BackedgeTakenCount;
+  }
+
   // 0. Set the reverse mapping from VPValues to Values for code generation.
   for (auto &Entry : Value2VPValue)
     State->VPValue2Value[Entry.second] = Entry.first;
@@ -319,11 +398,32 @@ void VPlan::execute(VPTransformState *State) {
   for (VPBlockBase *Block : depth_first(Entry))
     Block->execute(State);
 
+  // Setup branch terminator successors for VPBBs in VPBBsToFix based on
+  // VPBB's successors.
+  for (auto VPBB : State->CFG.VPBBsToFix) {
+    assert(EnableVPlanNativePath &&
+           "Unexpected VPBBsToFix in non VPlan-native path");
+    BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
+    assert(BB && "Unexpected null basic block for VPBB");
+
+    unsigned Idx = 0;
+    auto *BBTerminator = BB->getTerminator();
+
+    for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
+      VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
+      BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
+      ++Idx;
+    }
+  }
+
   // 3. Merge the temporary latch created with the last basic-block filled.
   BasicBlock *LastBB = State->CFG.PrevBB;
   // Connect LastBB to VectorLatchBB to facilitate their merge.
-  assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
-         "Expected VPlan CFG to terminate with unreachable");
+  assert((EnableVPlanNativePath ||
+          isa<UnreachableInst>(LastBB->getTerminator())) &&
+         "Expected InnerLoop VPlan CFG to terminate with unreachable");
+  assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
+         "Expected VPlan CFG to terminate with branch in NativePath");
   LastBB->getTerminator()->eraseFromParent();
   BranchInst::Create(VectorLatchBB, LastBB);
 
@@ -333,7 +433,9 @@ void VPlan::execute(VPTransformState *State) {
   assert(Merged && "Could not merge last basic block with latch.");
   VectorLatchBB = LastBB;
 
-  updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
+  // We do not attempt to preserve DT for outer loop vectorization currently.
+  if (!EnableVPlanNativePath)
+    updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
 }
 
 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
@@ -366,7 +468,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
            "One successor of a basic block does not lead to the other.");
     assert(InterimSucc->getSinglePredecessor() &&
            "Interim successor has more than one predecessor.");
-    assert(pred_size(PostDomSucc) == 2 &&
+    assert(PostDomSucc->hasNPredecessors(2) &&
            "PostDom successor has more than two predecessors.");
     DT->addNewBlock(InterimSucc, BB);
     DT->addNewBlock(PostDomSucc, BB);
@@ -392,8 +494,11 @@ void VPlanPrinter::dump() {
   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
   if (!Plan.getName().empty())
     OS << "\\n" << DOT::EscapeString(Plan.getName());
-  if (!Plan.Value2VPValue.empty()) {
+  if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
     OS << ", where:";
+    if (Plan.BackedgeTakenCount)
+      OS << "\\n"
+         << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
     for (auto Entry : Plan.Value2VPValue) {
       OS << "\\n" << *Entry.second;
       OS << DOT::EscapeString(" := ");
@@ -466,8 +571,10 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
     if (const VPInstruction *CBI = dyn_cast<VPInstruction>(CBV)) {
       CBI->printAsOperand(OS);
       OS << " (" << DOT::EscapeString(CBI->getParent()->getName()) << ")\\l\"";
-    } else
+    } else {
       CBV->printAsOperand(OS);
+      OS << "\"";
+    }
   }
 
   bumpIndent(-2);
@@ -579,3 +686,55 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
 }
 
 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
+
+void VPValue::replaceAllUsesWith(VPValue *New) {
+  for (VPUser *User : users())
+    for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
+      if (User->getOperand(I) == this)
+        User->setOperand(I, New);
+}
+
+void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
+                                          Old2NewTy &Old2New,
+                                          InterleavedAccessInfo &IAI) {
+  ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
+  for (VPBlockBase *Base : RPOT) {
+    visitBlock(Base, Old2New, IAI);
+  }
+}
+
+void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+                                         InterleavedAccessInfo &IAI) {
+  if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
+    for (VPRecipeBase &VPI : *VPBB) {
+      assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
+      auto *VPInst = cast<VPInstruction>(&VPI);
+      auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
+      auto *IG = IAI.getInterleaveGroup(Inst);
+      if (!IG)
+        continue;
+
+      auto NewIGIter = Old2New.find(IG);
+      if (NewIGIter == Old2New.end())
+        Old2New[IG] = new InterleaveGroup<VPInstruction>(
+            IG->getFactor(), IG->isReverse(), IG->getAlignment());
+
+      if (Inst == IG->getInsertPos())
+        Old2New[IG]->setInsertPos(VPInst);
+
+      InterleaveGroupMap[VPInst] = Old2New[IG];
+      InterleaveGroupMap[VPInst]->insertMember(
+          VPInst, IG->getIndex(Inst),
+          IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor());
+    }
+  } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
+    visitRegion(Region, Old2New, IAI);
+  else
+    llvm_unreachable("Unsupported kind of VPBlock.");
+}
+
+VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
+                                                 InterleavedAccessInfo &IAI) {
+  Old2NewTy Old2New;
+  visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm/lib/Transforms/Vectorize/VPlan.h
index 883e6f52369a..5c1b4a83c30e 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/IRBuilder.h"
 #include <algorithm>
 #include <cassert>
@@ -52,12 +53,14 @@ class LoopVectorizationCostModel;
 class BasicBlock;
 class DominatorTree;
 class InnerLoopVectorizer;
-class InterleaveGroup;
+template <class T> class InterleaveGroup;
+class LoopInfo;
 class raw_ostream;
 class Value;
 class VPBasicBlock;
 class VPRegionBlock;
 class VPlan;
+class VPlanSlp;
 
 /// A range of powers-of-2 vectorization factors with fixed start and
 /// adjustable end. The range includes start and excludes end, e.g.,:
@@ -293,6 +296,10 @@ struct VPTransformState {
     /// of replication, maps the BasicBlock of the last replica created.
     SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
 
+    /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
+    /// up at the end of vector code generation.
+    SmallVector<VPBasicBlock *, 8> VPBBsToFix;
+
     CFGState() = default;
   } CFG;
 
@@ -313,6 +320,9 @@ struct VPTransformState {
   /// Values they correspond to.
   VPValue2ValueTy VPValue2Value;
 
+  /// Hold the trip count of the scalar loop.
+  Value *TripCount = nullptr;
+
   /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
   InnerLoopVectorizer *ILV;
 
@@ -600,10 +610,16 @@ public:
 /// the VPInstruction is also a single def-use vertex.
 class VPInstruction : public VPUser, public VPRecipeBase {
   friend class VPlanHCFGTransforms;
+  friend class VPlanSlp;
 
 public:
   /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
-  enum { Not = Instruction::OtherOpsEnd + 1 };
+  enum {
+    Not = Instruction::OtherOpsEnd + 1,
+    ICmpULE,
+    SLPLoad,
+    SLPStore,
+  };
 
 private:
   typedef unsigned char OpcodeTy;
@@ -613,6 +629,13 @@ private:
   /// modeled instruction.
   void generateInstruction(VPTransformState &State, unsigned Part);
 
+protected:
+  Instruction *getUnderlyingInstr() {
+    return cast_or_null<Instruction>(getUnderlyingValue());
+  }
+
+  void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
+
 public:
   VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
       : VPUser(VPValue::VPInstructionSC, Operands),
@@ -626,6 +649,11 @@ public:
     return V->getVPValueID() == VPValue::VPInstructionSC;
   }
 
+  VPInstruction *clone() const {
+    SmallVector<VPValue *, 2> Operands(operands());
+    return new VPInstruction(Opcode, Operands);
+  }
+
   /// Method to support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const VPRecipeBase *R) {
     return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
@@ -643,6 +671,14 @@ public:
 
   /// Print the VPInstruction.
   void print(raw_ostream &O) const;
+
+  /// Return true if this instruction may modify memory.
+  bool mayWriteToMemory() const {
+    // TODO: we can use attributes of the called function to rule out memory
+    //       modifications.
+    return Opcode == Instruction::Store || Opcode == Instruction::Call ||
+           Opcode == Instruction::Invoke || Opcode == SLPStore;
+  }
 };
 
 /// VPWidenRecipe is a recipe for producing a copy of vector type for each
@@ -764,11 +800,15 @@ public:
 /// or stores into one wide load/store and shuffles.
 class VPInterleaveRecipe : public VPRecipeBase {
 private:
-  const InterleaveGroup *IG;
+  const InterleaveGroup<Instruction> *IG;
+  std::unique_ptr<VPUser> User;
 
 public:
-  VPInterleaveRecipe(const InterleaveGroup *IG)
-      : VPRecipeBase(VPInterleaveSC), IG(IG) {}
+  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask)
+      : VPRecipeBase(VPInterleaveSC), IG(IG) {
+    if (Mask) // Create a VPInstruction to register as a user of the mask.
+      User.reset(new VPUser({Mask}));
+  }
   ~VPInterleaveRecipe() override = default;
 
   /// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -782,7 +822,7 @@ public:
   /// Print the recipe.
   void print(raw_ostream &O, const Twine &Indent) const override;
 
-  const InterleaveGroup *getInterleaveGroup() { return IG; }
+  const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
 };
 
 /// VPReplicateRecipe replicates a given instruction producing multiple scalar
@@ -1107,6 +1147,10 @@ private:
   // (operators '==' and '<').
   SmallPtrSet<VPValue *, 16> VPExternalDefs;
 
+  /// Represents the backedge taken count of the original loop, for folding
+  /// the tail.
+  VPValue *BackedgeTakenCount = nullptr;
+
   /// Holds a mapping between Values and their corresponding VPValue inside
   /// VPlan.
   Value2VPValueTy Value2VPValue;
@@ -1114,6 +1158,9 @@ private:
   /// Holds the VPLoopInfo analysis for this VPlan.
   VPLoopInfo VPLInfo;
 
+  /// Holds the condition bit values built during VPInstruction to VPRecipe transformation.
+  SmallVector<VPValue *, 4> VPCBVs;
+
 public:
   VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
 
@@ -1121,9 +1168,14 @@ public:
     if (Entry)
       VPBlockBase::deleteCFG(Entry);
     for (auto &MapEntry : Value2VPValue)
-      delete MapEntry.second;
+      if (MapEntry.second != BackedgeTakenCount)
+        delete MapEntry.second;
+    if (BackedgeTakenCount)
+      delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not.
     for (VPValue *Def : VPExternalDefs)
       delete Def;
+    for (VPValue *CBV : VPCBVs)
+      delete CBV;
   }
 
   /// Generate the IR code for this VPlan.
@@ -1134,6 +1186,13 @@ public:
 
   VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; }
 
+  /// The backedge taken count of the original loop.
+  VPValue *getOrCreateBackedgeTakenCount() {
+    if (!BackedgeTakenCount)
+      BackedgeTakenCount = new VPValue();
+    return BackedgeTakenCount;
+  }
+
   void addVF(unsigned VF) { VFs.insert(VF); }
 
   bool hasVF(unsigned VF) { return VFs.count(VF); }
@@ -1148,6 +1207,11 @@ public:
     VPExternalDefs.insert(VPVal);
   }
 
+  /// Add \p CBV to the vector of condition bit values.
+  void addCBV(VPValue *CBV) {
+    VPCBVs.push_back(CBV);
+  }
+
   void addVPValue(Value *V) {
     assert(V && "Trying to add a null Value to VPlan");
     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
@@ -1429,6 +1493,144 @@ public:
   }
 };
 
+class VPInterleavedAccessInfo {
+private:
+  DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
+      InterleaveGroupMap;
+
+  /// Type for mapping of instruction based interleave groups to VPInstruction
+  /// interleave groups
+  using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
+                             InterleaveGroup<VPInstruction> *>;
+
+  /// Recursively \p Region and populate VPlan based interleave groups based on
+  /// \p IAI.
+  void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
+                   InterleavedAccessInfo &IAI);
+  /// Recursively traverse \p Block and populate VPlan based interleave groups
+  /// based on \p IAI.
+  void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+                  InterleavedAccessInfo &IAI);
+
+public:
+  VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
+
+  ~VPInterleavedAccessInfo() {
+    SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
+    // Avoid releasing a pointer twice.
+    for (auto &I : InterleaveGroupMap)
+      DelSet.insert(I.second);
+    for (auto *Ptr : DelSet)
+      delete Ptr;
+  }
+
+  /// Get the interleave group that \p Instr belongs to.
+  ///
+  /// \returns nullptr if doesn't have such group.
+  InterleaveGroup<VPInstruction> *
+  getInterleaveGroup(VPInstruction *Instr) const {
+    if (InterleaveGroupMap.count(Instr))
+      return InterleaveGroupMap.find(Instr)->second;
+    return nullptr;
+  }
+};
+
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+private:
+  enum class OpMode { Failed, Load, Opcode };
+
+  /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+  /// DenseMap keys.
+  struct BundleDenseMapInfo {
+    static SmallVector<VPValue *, 4> getEmptyKey() {
+      return {reinterpret_cast<VPValue *>(-1)};
+    }
+
+    static SmallVector<VPValue *, 4> getTombstoneKey() {
+      return {reinterpret_cast<VPValue *>(-2)};
+    }
+
+    static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+      return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+    }
+
+    static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+                        const SmallVector<VPValue *, 4> &RHS) {
+      return LHS == RHS;
+    }
+  };
+
+  /// Mapping of values in the original VPlan to a combined VPInstruction.
+  DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+      BundleToCombined;
+
+  VPInterleavedAccessInfo &IAI;
+
+  /// Basic block to operate on. For now, only instructions in a single BB are
+  /// considered.
+  const VPBasicBlock &BB;
+
+  /// Indicates whether we managed to combine all visited instructions or not.
+  bool CompletelySLP = true;
+
+  /// Width of the widest combined bundle in bits.
+  unsigned WidestBundleBits = 0;
+
+  using MultiNodeOpTy =
+      typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+  // Input operand bundles for the current multi node. Each multi node operand
+  // bundle contains values not matching the multi node's opcode. They will
+  // be reordered in reorderMultiNodeOps, once we completed building a
+  // multi node.
+  SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+  /// Indicates whether we are building a multi node currently.
+  bool MultiNodeActive = false;
+
+  /// Check if we can vectorize Operands together.
+  bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+  /// Add combined instruction \p New for the bundle \p Operands.
+  void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+  /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+  VPInstruction *markFailed();
+
+  /// Reorder operands in the multi node to maximize sequential memory access
+  /// and commutative operations.
+  SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+  /// Choose the best candidate to use for the lane after \p Last. The set of
+  /// candidates to choose from are values with an opcode matching \p Last's
+  /// or loads consecutive to \p Last.
+  std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+                                       SmallPtrSetImpl<VPValue *> &Candidates,
+                                       VPInterleavedAccessInfo &IAI);
+
+  /// Print bundle \p Values to dbgs().
+  void dumpBundle(ArrayRef<VPValue *> Values);
+
+public:
+  VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+  ~VPlanSlp() {
+    for (auto &KV : BundleToCombined)
+      delete KV.second;
+  }
+
+  /// Tries to build an SLP tree rooted at \p Operands and returns a
+  /// VPInstruction combining \p Operands, if they can be combined.
+  VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+  /// Return the width of the widest combined bundle in bits.
+  unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+  /// Return true if all visited instruction can be combined.
+  bool isCompletelySLP() const { return CompletelySLP; }
+};
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index b6307acb9474..0f42694e193b 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -268,7 +268,7 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
     // Set VPBB successors. We create empty VPBBs for successors if they don't
     // exist already. Recipes will be created when the successor is visited
     // during the RPO traversal.
-    TerminatorInst *TI = BB->getTerminator();
+    Instruction *TI = BB->getTerminator();
     assert(TI && "Terminator expected.");
     unsigned NumSuccs = TI->getNumSuccessors();
 
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
index e3cbab077e61..3ad7fc7e7b96 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -24,6 +24,18 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
 
   VPRegionBlock *TopRegion = dyn_cast<VPRegionBlock>(Plan->getEntry());
   ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry());
+
+  // Condition bit VPValues get deleted during transformation to VPRecipes.
+  // Create new VPValues and save away as condition bits. These will be deleted
+  // after finalizing the vector IR basic blocks.
+  for (VPBlockBase *Base : RPOT) {
+    VPBasicBlock *VPBB = Base->getEntryBasicBlock();
+    if (auto *CondBit = VPBB->getCondBit()) {
+      auto *NCondBit = new VPValue(CondBit->getUnderlyingValue());
+      VPBB->setCondBit(NCondBit);
+      Plan->addCBV(NCondBit);
+    }
+  }
   for (VPBlockBase *Base : RPOT) {
     // Do not widen instructions in pre-header and exit blocks.
     if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
new file mode 100644
index 000000000000..ad3a85a6f760
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -0,0 +1,468 @@
+//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// This file implements SLP analysis based on VPlan. The analysis is based on
+/// the ideas described in
+///
+///   Look-ahead SLP: auto-vectorization in the presence of commutative
+///   operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
+///   Luís F. W. Góes
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vplan-slp"
+
+// Number of levels to look ahead when re-ordering multi node operands.
+static unsigned LookaheadMaxDepth = 5;
+
+VPInstruction *VPlanSlp::markFailed() {
+  // FIXME: Currently this is used to signal we hit instructions we cannot
+  //        trivially SLP'ize.
+  CompletelySLP = false;
+  return nullptr;
+}
+
+void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
+  if (all_of(Operands, [](VPValue *V) {
+        return cast<VPInstruction>(V)->getUnderlyingInstr();
+      })) {
+    unsigned BundleSize = 0;
+    for (VPValue *V : Operands) {
+      Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
+      assert(!T->isVectorTy() && "Only scalar types supported for now");
+      BundleSize += T->getScalarSizeInBits();
+    }
+    WidestBundleBits = std::max(WidestBundleBits, BundleSize);
+  }
+
+  auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
+  assert(Res.second &&
+         "Already created a combined instruction for the operand bundle");
+  (void)Res;
+}
+
+bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
+  // Currently we only support VPInstructions.
+  if (!all_of(Operands, [](VPValue *Op) {
+        return Op && isa<VPInstruction>(Op) &&
+               cast<VPInstruction>(Op)->getUnderlyingInstr();
+      })) {
+    LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
+    return false;
+  }
+
+  // Check if opcodes and type width agree for all instructions in the bundle.
+  // FIXME: Differing widths/opcodes can be handled by inserting additional
+  //        instructions.
+  // FIXME: Deal with non-primitive types.
+  const Instruction *OriginalInstr =
+      cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
+  unsigned Opcode = OriginalInstr->getOpcode();
+  unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
+  if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
+        const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
+        return I->getOpcode() == Opcode &&
+               I->getType()->getPrimitiveSizeInBits() == Width;
+      })) {
+    LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
+    return false;
+  }
+
+  // For now, all operands must be defined in the same BB.
+  if (any_of(Operands, [this](VPValue *Op) {
+        return cast<VPInstruction>(Op)->getParent() != &this->BB;
+      })) {
+    LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
+    return false;
+  }
+
+  if (any_of(Operands,
+             [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
+    LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
+    return false;
+  }
+
+  // For loads, check that there are no instructions writing to memory in
+  // between them.
+  // TODO: we only have to forbid instructions writing to memory that could
+  //       interfere with any of the loads in the bundle
+  if (Opcode == Instruction::Load) {
+    unsigned LoadsSeen = 0;
+    VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
+    for (auto &I : *Parent) {
+      auto *VPI = cast<VPInstruction>(&I);
+      if (VPI->getOpcode() == Instruction::Load &&
+          std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
+        LoadsSeen++;
+
+      if (LoadsSeen == Operands.size())
+        break;
+      if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
+        LLVM_DEBUG(
+            dbgs() << "VPSLP: instruction modifying memory between loads\n");
+        return false;
+      }
+    }
+
+    if (!all_of(Operands, [](VPValue *Op) {
+          return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+              ->isSimple();
+        })) {
+      LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
+      return false;
+    }
+  }
+
+  if (Opcode == Instruction::Store)
+    if (!all_of(Operands, [](VPValue *Op) {
+          return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
+              ->isSimple();
+        })) {
+      LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
+      return false;
+    }
+
+  return true;
+}
+
+static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
+                                             unsigned OperandIndex) {
+  SmallVector<VPValue *, 4> Operands;
+  for (VPValue *V : Values) {
+    auto *U = cast<VPUser>(V);
+    Operands.push_back(U->getOperand(OperandIndex));
+  }
+  return Operands;
+}
+
+static bool areCommutative(ArrayRef<VPValue *> Values) {
+  return Instruction::isCommutative(
+      cast<VPInstruction>(Values[0])->getOpcode());
+}
+
+static SmallVector<SmallVector<VPValue *, 4>, 4>
+getOperands(ArrayRef<VPValue *> Values) {
+  SmallVector<SmallVector<VPValue *, 4>, 4> Result;
+  auto *VPI = cast<VPInstruction>(Values[0]);
+
+  switch (VPI->getOpcode()) {
+  case Instruction::Load:
+    llvm_unreachable("Loads terminate a tree, no need to get operands");
+  case Instruction::Store:
+    Result.push_back(getOperands(Values, 0));
+    break;
+  default:
+    for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
+      Result.push_back(getOperands(Values, I));
+    break;
+  }
+
+  return Result;
+}
+
+/// Returns the opcode of Values or ~0 if they do not all agree.
+static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
+  unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
+  if (any_of(Values, [Opcode](VPValue *V) {
+        return cast<VPInstruction>(V)->getOpcode() != Opcode;
+      }))
+    return None;
+  return {Opcode};
+}
+
+/// Returns true if A and B access sequential memory if they are loads or
+/// stores or if they have identical opcodes otherwise.
+static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
+                                  VPInterleavedAccessInfo &IAI) {
+  if (A->getOpcode() != B->getOpcode())
+    return false;
+
+  if (A->getOpcode() != Instruction::Load &&
+      A->getOpcode() != Instruction::Store)
+    return true;
+  auto *GA = IAI.getInterleaveGroup(A);
+  auto *GB = IAI.getInterleaveGroup(B);
+
+  return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
+}
+
+/// Implements getLAScore from Listing 7 in the paper.
+/// Traverses and compares operands of V1 and V2 to MaxLevel.
+static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
+                           VPInterleavedAccessInfo &IAI) {
+  if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
+    return 0;
+
+  if (MaxLevel == 0)
+    return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
+                                           cast<VPInstruction>(V2), IAI);
+
+  unsigned Score = 0;
+  for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
+    for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
+      Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
+                          cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
+  return Score;
+}
+
+std::pair<VPlanSlp::OpMode, VPValue *>
+VPlanSlp::getBest(OpMode Mode, VPValue *Last,
+                  SmallPtrSetImpl<VPValue *> &Candidates,
+                  VPInterleavedAccessInfo &IAI) {
+  assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
+         "Currently we only handle load and commutative opcodes");
+  LLVM_DEBUG(dbgs() << "      getBest\n");
+
+  SmallVector<VPValue *, 4> BestCandidates;
+  LLVM_DEBUG(dbgs() << "        Candidates  for "
+                    << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
+  for (auto *Candidate : Candidates) {
+    auto *LastI = cast<VPInstruction>(Last);
+    auto *CandidateI = cast<VPInstruction>(Candidate);
+    if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
+      LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
+                        << " ");
+      BestCandidates.push_back(Candidate);
+    }
+  }
+  LLVM_DEBUG(dbgs() << "\n");
+
+  if (BestCandidates.empty())
+    return {OpMode::Failed, nullptr};
+
+  if (BestCandidates.size() == 1)
+    return {Mode, BestCandidates[0]};
+
+  VPValue *Best = nullptr;
+  unsigned BestScore = 0;
+  for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
+    unsigned PrevScore = ~0u;
+    bool AllSame = true;
+
+    // FIXME: Avoid visiting the same operands multiple times.
+    for (auto *Candidate : BestCandidates) {
+      unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
+      if (PrevScore == ~0u)
+        PrevScore = Score;
+      if (PrevScore != Score)
+        AllSame = false;
+      PrevScore = Score;
+
+      if (Score > BestScore) {
+        BestScore = Score;
+        Best = Candidate;
+      }
+    }
+    if (!AllSame)
+      break;
+  }
+  LLVM_DEBUG(dbgs() << "Found best "
+                    << *cast<VPInstruction>(Best)->getUnderlyingInstr()
+                    << "\n");
+  Candidates.erase(Best);
+
+  return {Mode, Best};
+}
+
+SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
+  SmallVector<MultiNodeOpTy, 4> FinalOrder;
+  SmallVector<OpMode, 4> Mode;
+  FinalOrder.reserve(MultiNodeOps.size());
+  Mode.reserve(MultiNodeOps.size());
+
+  LLVM_DEBUG(dbgs() << "Reordering multinode\n");
+
+  for (auto &Operands : MultiNodeOps) {
+    FinalOrder.push_back({Operands.first, {Operands.second[0]}});
+    if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
+        Instruction::Load)
+      Mode.push_back(OpMode::Load);
+    else
+      Mode.push_back(OpMode::Opcode);
+  }
+
+  for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
+    LLVM_DEBUG(dbgs() << "  Finding best value for lane " << Lane << "\n");
+    SmallPtrSet<VPValue *, 4> Candidates;
+    LLVM_DEBUG(dbgs() << "  Candidates  ");
+    for (auto Ops : MultiNodeOps) {
+      LLVM_DEBUG(
+          dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
+                 << " ");
+      Candidates.insert(Ops.second[Lane]);
+    }
+    LLVM_DEBUG(dbgs() << "\n");
+
+    for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
+      LLVM_DEBUG(dbgs() << "  Checking " << Op << "\n");
+      if (Mode[Op] == OpMode::Failed)
+        continue;
+
+      VPValue *Last = FinalOrder[Op].second[Lane - 1];
+      std::pair<OpMode, VPValue *> Res =
+          getBest(Mode[Op], Last, Candidates, IAI);
+      if (Res.second)
+        FinalOrder[Op].second.push_back(Res.second);
+      else
+        // TODO: handle this case
+        FinalOrder[Op].second.push_back(markFailed());
+    }
+  }
+
+  return FinalOrder;
+}
+
+void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
+  dbgs() << " Ops: ";
+  for (auto Op : Values)
+    if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
+      dbgs() << *Instr << " | ";
+    else
+      dbgs() << " nullptr | ";
+  dbgs() << "\n";
+}
+
+VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
+  assert(!Values.empty() && "Need some operands!");
+
+  // If we already visited this instruction bundle, re-use the existing node
+  auto I = BundleToCombined.find(to_vector<4>(Values));
+  if (I != BundleToCombined.end()) {
+#ifndef NDEBUG
+    // Check that the resulting graph is a tree. If we re-use a node, this means
+    // its values have multiple users. We only allow this, if all users of each
+    // value are the same instruction.
+    for (auto *V : Values) {
+      auto UI = V->user_begin();
+      auto *FirstUser = *UI++;
+      while (UI != V->user_end()) {
+        assert(*UI == FirstUser && "Currently we only support SLP trees.");
+        UI++;
+      }
+    }
+#endif
+    return I->second;
+  }
+
+  // Dump inputs
+  LLVM_DEBUG({
+    dbgs() << "buildGraph: ";
+    dumpBundle(Values);
+  });
+
+  if (!areVectorizable(Values))
+    return markFailed();
+
+  assert(getOpcode(Values) && "Opcodes for all values must match");
+  unsigned ValuesOpcode = getOpcode(Values).getValue();
+
+  SmallVector<VPValue *, 4> CombinedOperands;
+  if (areCommutative(Values)) {
+    bool MultiNodeRoot = !MultiNodeActive;
+    MultiNodeActive = true;
+    for (auto &Operands : getOperands(Values)) {
+      LLVM_DEBUG({
+        dbgs() << "  Visiting Commutative";
+        dumpBundle(Operands);
+      });
+
+      auto OperandsOpcode = getOpcode(Operands);
+      if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
+        LLVM_DEBUG(dbgs() << "    Same opcode, continue building\n");
+        CombinedOperands.push_back(buildGraph(Operands));
+      } else {
+        LLVM_DEBUG(dbgs() << "    Adding multinode Ops\n");
+        // Create dummy VPInstruction, which will we replace later by the
+        // re-ordered operand.
+        VPInstruction *Op = new VPInstruction(0, {});
+        CombinedOperands.push_back(Op);
+        MultiNodeOps.emplace_back(Op, Operands);
+      }
+    }
+
+    if (MultiNodeRoot) {
+      LLVM_DEBUG(dbgs() << "Reorder \n");
+      MultiNodeActive = false;
+
+      auto FinalOrder = reorderMultiNodeOps();
+
+      MultiNodeOps.clear();
+      for (auto &Ops : FinalOrder) {
+        VPInstruction *NewOp = buildGraph(Ops.second);
+        Ops.first->replaceAllUsesWith(NewOp);
+        for (unsigned i = 0; i < CombinedOperands.size(); i++)
+          if (CombinedOperands[i] == Ops.first)
+            CombinedOperands[i] = NewOp;
+        delete Ops.first;
+        Ops.first = NewOp;
+      }
+      LLVM_DEBUG(dbgs() << "Found final order\n");
+    }
+  } else {
+    LLVM_DEBUG(dbgs() << "  NonCommuntative\n");
+    if (ValuesOpcode == Instruction::Load)
+      for (VPValue *V : Values)
+        CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
+    else
+      for (auto &Operands : getOperands(Values))
+        CombinedOperands.push_back(buildGraph(Operands));
+  }
+
+  unsigned Opcode;
+  switch (ValuesOpcode) {
+  case Instruction::Load:
+    Opcode = VPInstruction::SLPLoad;
+    break;
+  case Instruction::Store:
+    Opcode = VPInstruction::SLPStore;
+    break;
+  default:
+    Opcode = ValuesOpcode;
+    break;
+  }
+
+  if (!CompletelySLP)
+    return markFailed();
+
+  assert(CombinedOperands.size() > 0 && "Need more some operands");
+  auto *VPI = new VPInstruction(Opcode, CombinedOperands);
+  VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
+
+  LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
+             cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
+  addCombined(Values, VPI);
+  return VPI;
+}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h b/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 08f142915b49..b473579b699f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -38,6 +38,10 @@ class VPUser;
 // and live-outs which the VPlan will need to fix accordingly.
 class VPValue {
   friend class VPBuilder;
+  friend class VPlanHCFGTransforms;
+  friend class VPBasicBlock;
+  friend class VPInterleavedAccessInfo;
+
 private:
   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
 
@@ -102,6 +106,20 @@ public:
   const_user_range users() const {
     return const_user_range(user_begin(), user_end());
   }
+
+  /// Returns true if the value has more than one unique user.
+  bool hasMoreThanOneUniqueUser() {
+    if (getNumUsers() == 0)
+      return false;
+
+    // Check if all users match the first user.
+    auto Current = std::next(user_begin());
+    while (Current != user_end() && *user_begin() == *Current)
+      Current++;
+    return Current != user_end();
+  }
+
+  void replaceAllUsesWith(VPValue *New);
 };
 
 typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
@@ -147,6 +165,8 @@ public:
     return Operands[N];
   }
 
+  void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }
+
   typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
   typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
   typedef iterator_range<operand_iterator> operand_range;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index f62a88558328..559ab1968844 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 void llvm::initializeVectorization(PassRegistry &Registry) {
   initializeLoopVectorizePass(Registry);
   initializeSLPVectorizerPass(Registry);
-  initializeLoadStoreVectorizerPass(Registry);
+  initializeLoadStoreVectorizerLegacyPassPass(Registry);
 }
 
 void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/XRay/BlockIndexer.cpp b/contrib/llvm/lib/XRay/BlockIndexer.cpp
new file mode 100644
index 000000000000..4dbe2d2717ad
--- /dev/null
+++ b/contrib/llvm/lib/XRay/BlockIndexer.cpp
@@ -0,0 +1,98 @@
+//===- BlockIndexer.cpp - FDR Block Indexing VIsitor ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the RecordVisitor which generates a mapping between a
+// thread and a range of records representing a block.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/BlockIndexer.h"
+
+namespace llvm {
+namespace xray {
+
+Error BlockIndexer::visit(BufferExtents &) { return Error::success(); }
+
+Error BlockIndexer::visit(WallclockRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  CurrentBlock.WallclockTime = &R;
+  return Error::success();
+}
+
+Error BlockIndexer::visit(NewCPUIDRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(TSCWrapRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(CustomEventRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(CustomEventRecordV5 &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(TypedEventRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(CallArgRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(PIDRecord &R) {
+  CurrentBlock.ProcessID = R.pid();
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(NewBufferRecord &R) {
+  if (!CurrentBlock.Records.empty())
+    if (auto E = flush())
+      return E;
+
+  CurrentBlock.ThreadID = R.tid();
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(EndBufferRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::visit(FunctionRecord &R) {
+  CurrentBlock.Records.push_back(&R);
+  return Error::success();
+}
+
+Error BlockIndexer::flush() {
+  Index::iterator It;
+  std::tie(It, std::ignore) =
+      Indices.insert({{CurrentBlock.ProcessID, CurrentBlock.ThreadID}, {}});
+  It->second.push_back({CurrentBlock.ProcessID, CurrentBlock.ThreadID,
+                        CurrentBlock.WallclockTime,
+                        std::move(CurrentBlock.Records)});
+  CurrentBlock.ProcessID = 0;
+  CurrentBlock.ThreadID = 0;
+  CurrentBlock.Records = {};
+  CurrentBlock.WallclockTime = nullptr;
+  return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/BlockPrinter.cpp b/contrib/llvm/lib/XRay/BlockPrinter.cpp
new file mode 100644
index 000000000000..0acebee0cbdd
--- /dev/null
+++ b/contrib/llvm/lib/XRay/BlockPrinter.cpp
@@ -0,0 +1,114 @@
+//===- BlockPrinter.cpp - FDR Block Pretty Printer Implementation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/BlockPrinter.h"
+
+namespace llvm {
+namespace xray {
+
+Error BlockPrinter::visit(BufferExtents &R) {
+  OS << "\n[New Block]\n";
+  CurrentState = State::Preamble;
+  return RP.visit(R);
+}
+
+// Preamble printing.
+Error BlockPrinter::visit(NewBufferRecord &R) {
+  if (CurrentState == State::Start)
+    OS << "\n[New Block]\n";
+
+  OS << "Preamble: \n";
+  CurrentState = State::Preamble;
+  return RP.visit(R);
+}
+
+Error BlockPrinter::visit(WallclockRecord &R) {
+  CurrentState = State::Preamble;
+  return RP.visit(R);
+}
+
+Error BlockPrinter::visit(PIDRecord &R) {
+  CurrentState = State::Preamble;
+  return RP.visit(R);
+}
+
+// Metadata printing.
+Error BlockPrinter::visit(NewCPUIDRecord &R) {
+  if (CurrentState == State::Preamble)
+    OS << "\nBody:\n";
+  if (CurrentState == State::Function)
+    OS << "\nMetadata: ";
+  CurrentState = State::Metadata;
+  OS << " ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+Error BlockPrinter::visit(TSCWrapRecord &R) {
+  if (CurrentState == State::Function)
+    OS << "\nMetadata:";
+  CurrentState = State::Metadata;
+  OS << " ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+// Custom events will be rendered like "function" events.
+Error BlockPrinter::visit(CustomEventRecord &R) {
+  if (CurrentState == State::Metadata)
+    OS << "\n";
+  CurrentState = State::CustomEvent;
+  OS << "*  ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+Error BlockPrinter::visit(CustomEventRecordV5 &R) {
+  if (CurrentState == State::Metadata)
+    OS << "\n";
+  CurrentState = State::CustomEvent;
+  OS << "*  ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+Error BlockPrinter::visit(TypedEventRecord &R) {
+  if (CurrentState == State::Metadata)
+    OS << "\n";
+  CurrentState = State::CustomEvent;
+  OS << "*  ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+// Function call printing.
+Error BlockPrinter::visit(FunctionRecord &R) {
+  if (CurrentState == State::Metadata)
+    OS << "\n";
+  CurrentState = State::Function;
+  OS << "-  ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+Error BlockPrinter::visit(CallArgRecord &R) {
+  CurrentState = State::Arg;
+  OS << " : ";
+  auto E = RP.visit(R);
+  return E;
+}
+
+Error BlockPrinter::visit(EndBufferRecord &R) {
+    CurrentState = State::End;
+    OS << " *** ";
+    auto E = RP.visit(R);
+    return E;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/BlockVerifier.cpp b/contrib/llvm/lib/XRay/BlockVerifier.cpp
new file mode 100644
index 000000000000..5e949ec4e46a
--- /dev/null
+++ b/contrib/llvm/lib/XRay/BlockVerifier.cpp
@@ -0,0 +1,205 @@
+//===- BlockVerifier.cpp - FDR Block Verifier -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/BlockVerifier.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace xray {
+namespace {
+
+constexpr unsigned long long mask(BlockVerifier::State S) {
+  return 1uLL << static_cast<std::size_t>(S);
+}
+
+constexpr std::size_t number(BlockVerifier::State S) {
+  return static_cast<std::size_t>(S);
+}
+
+StringRef recordToString(BlockVerifier::State R) {
+  switch (R) {
+  case BlockVerifier::State::BufferExtents:
+    return "BufferExtents";
+  case BlockVerifier::State::NewBuffer:
+    return "NewBuffer";
+  case BlockVerifier::State::WallClockTime:
+    return "WallClockTime";
+  case BlockVerifier::State::PIDEntry:
+    return "PIDEntry";
+  case BlockVerifier::State::NewCPUId:
+    return "NewCPUId";
+  case BlockVerifier::State::TSCWrap:
+    return "TSCWrap";
+  case BlockVerifier::State::CustomEvent:
+    return "CustomEvent";
+  case BlockVerifier::State::Function:
+    return "Function";
+  case BlockVerifier::State::CallArg:
+    return "CallArg";
+  case BlockVerifier::State::EndOfBuffer:
+    return "EndOfBuffer";
+  case BlockVerifier::State::TypedEvent:
+    return "TypedEvent";
+  case BlockVerifier::State::StateMax:
+  case BlockVerifier::State::Unknown:
+    return "Unknown";
+  }
+  llvm_unreachable("Unkown state!");
+}
+
+struct Transition {
+  BlockVerifier::State From;
+  std::bitset<number(BlockVerifier::State::StateMax)> ToStates;
+};
+
+} // namespace
+
+Error BlockVerifier::transition(State To) {
+  using ToSet = std::bitset<number(State::StateMax)>;
+  static constexpr std::array<const Transition, number(State::StateMax)>
+      TransitionTable{{{State::Unknown,
+                        {mask(State::BufferExtents) | mask(State::NewBuffer)}},
+
+                       {State::BufferExtents, {mask(State::NewBuffer)}},
+
+                       {State::NewBuffer, {mask(State::WallClockTime)}},
+
+                       {State::WallClockTime,
+                        {mask(State::PIDEntry) | mask(State::NewCPUId)}},
+
+                       {State::PIDEntry, {mask(State::NewCPUId)}},
+
+                       {State::NewCPUId,
+                        {mask(State::NewCPUId) | mask(State::TSCWrap) |
+                         mask(State::CustomEvent) | mask(State::Function) |
+                         mask(State::EndOfBuffer) | mask(State::TypedEvent)}},
+
+                       {State::TSCWrap,
+                        {mask(State::TSCWrap) | mask(State::NewCPUId) |
+                         mask(State::CustomEvent) | mask(State::Function) |
+                         mask(State::EndOfBuffer) | mask(State::TypedEvent)}},
+
+                       {State::CustomEvent,
+                        {mask(State::CustomEvent) | mask(State::TSCWrap) |
+                         mask(State::NewCPUId) | mask(State::Function) |
+                         mask(State::EndOfBuffer) | mask(State::TypedEvent)}},
+
+                       {State::TypedEvent,
+                        {mask(State::TypedEvent) | mask(State::TSCWrap) |
+                         mask(State::NewCPUId) | mask(State::Function) |
+                         mask(State::EndOfBuffer) | mask(State::CustomEvent)}},
+
+                       {State::Function,
+                        {mask(State::Function) | mask(State::TSCWrap) |
+                         mask(State::NewCPUId) | mask(State::CustomEvent) |
+                         mask(State::CallArg) | mask(State::EndOfBuffer) |
+                         mask(State::TypedEvent)}},
+
+                       {State::CallArg,
+                        {mask(State::CallArg) | mask(State::Function) |
+                         mask(State::TSCWrap) | mask(State::NewCPUId) |
+                         mask(State::CustomEvent) | mask(State::EndOfBuffer) |
+                         mask(State::TypedEvent)}},
+
+                       {State::EndOfBuffer, {}}}};
+
+  if (CurrentRecord >= State::StateMax)
+    return createStringError(
+        std::make_error_code(std::errc::executable_format_error),
+        "BUG (BlockVerifier): Cannot find transition table entry for %s, "
+        "transitioning to %s.",
+        recordToString(CurrentRecord).data(), recordToString(To).data());
+
+  // If we're at an EndOfBuffer record, we ignore anything that follows that
+  // isn't a NewBuffer record.
+  if (CurrentRecord == State::EndOfBuffer && To != State::NewBuffer)
+    return Error::success();
+
+  auto &Mapping = TransitionTable[number(CurrentRecord)];
+  auto &Destinations = Mapping.ToStates;
+  assert(Mapping.From == CurrentRecord &&
+         "BUG: Wrong index for record mapping.");
+  if ((Destinations & ToSet(mask(To))) == 0)
+    return createStringError(
+        std::make_error_code(std::errc::executable_format_error),
+        "BlockVerifier: Invalid transition from %s to %s.",
+        recordToString(CurrentRecord).data(), recordToString(To).data());
+
+  CurrentRecord = To;
+  return Error::success();
+} // namespace xray
+
+Error BlockVerifier::visit(BufferExtents &) {
+  return transition(State::BufferExtents);
+}
+
+Error BlockVerifier::visit(WallclockRecord &) {
+  return transition(State::WallClockTime);
+}
+
+Error BlockVerifier::visit(NewCPUIDRecord &) {
+  return transition(State::NewCPUId);
+}
+
+Error BlockVerifier::visit(TSCWrapRecord &) {
+  return transition(State::TSCWrap);
+}
+
+Error BlockVerifier::visit(CustomEventRecord &) {
+  return transition(State::CustomEvent);
+}
+
+Error BlockVerifier::visit(CustomEventRecordV5 &) {
+  return transition(State::CustomEvent);
+}
+
+Error BlockVerifier::visit(TypedEventRecord &) {
+  return transition(State::TypedEvent);
+}
+
+Error BlockVerifier::visit(CallArgRecord &) {
+  return transition(State::CallArg);
+}
+
+Error BlockVerifier::visit(PIDRecord &) { return transition(State::PIDEntry); }
+
+Error BlockVerifier::visit(NewBufferRecord &) {
+  return transition(State::NewBuffer);
+}
+
+Error BlockVerifier::visit(EndBufferRecord &) {
+  return transition(State::EndOfBuffer);
+}
+
+Error BlockVerifier::visit(FunctionRecord &) {
+  return transition(State::Function);
+}
+
+Error BlockVerifier::verify() {
+  // The known terminal conditions are the following:
+  switch (CurrentRecord) {
+  case State::EndOfBuffer:
+  case State::NewCPUId:
+  case State::CustomEvent:
+  case State::TypedEvent:
+  case State::Function:
+  case State::CallArg:
+  case State::TSCWrap:
+    return Error::success();
+  default:
+    return createStringError(
+        std::make_error_code(std::errc::executable_format_error),
+        "BlockVerifier: Invalid terminal condition %s, malformed block.",
+        recordToString(CurrentRecord).data());
+  }
+}
+
+void BlockVerifier::reset() { CurrentRecord = State::Unknown; }
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRRecordProducer.cpp b/contrib/llvm/lib/XRay/FDRRecordProducer.cpp
new file mode 100644
index 000000000000..25b3ee8af219
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRRecordProducer.cpp
@@ -0,0 +1,198 @@
+//===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecordProducer.h"
+#include "llvm/Support/DataExtractor.h"
+
+#include <cstdint>
+
+namespace llvm {
+namespace xray {
+
+namespace {
+
+// Keep this in sync with the values written in the XRay FDR mode runtime in
+// compiler-rt.
+enum MetadataRecordKinds : uint8_t {
+  NewBufferKind,
+  EndOfBufferKind,
+  NewCPUIdKind,
+  TSCWrapKind,
+  WalltimeMarkerKind,
+  CustomEventMarkerKind,
+  CallArgumentKind,
+  BufferExtentsKind,
+  TypedEventMarkerKind,
+  PidKind,
+  // This is an end marker, used to identify the upper bound for this enum.
+  EnumEndMarker,
+};
+
+Expected<std::unique_ptr<Record>>
+metadataRecordType(const XRayFileHeader &Header, uint8_t T) {
+
+  if (T >= static_cast<uint8_t>(MetadataRecordKinds::EnumEndMarker))
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Invalid metadata record type: %d", T);
+  switch (T) {
+  case MetadataRecordKinds::NewBufferKind:
+    return make_unique<NewBufferRecord>();
+  case MetadataRecordKinds::EndOfBufferKind:
+    if (Header.Version >= 2)
+      return createStringError(
+          std::make_error_code(std::errc::executable_format_error),
+          "End of buffer records are no longer supported starting version "
+          "2 of the log.");
+    return make_unique<EndBufferRecord>();
+  case MetadataRecordKinds::NewCPUIdKind:
+    return make_unique<NewCPUIDRecord>();
+  case MetadataRecordKinds::TSCWrapKind:
+    return make_unique<TSCWrapRecord>();
+  case MetadataRecordKinds::WalltimeMarkerKind:
+    return make_unique<WallclockRecord>();
+  case MetadataRecordKinds::CustomEventMarkerKind:
+    if (Header.Version >= 5)
+      return make_unique<CustomEventRecordV5>();
+    return make_unique<CustomEventRecord>();
+  case MetadataRecordKinds::CallArgumentKind:
+    return make_unique<CallArgRecord>();
+  case MetadataRecordKinds::BufferExtentsKind:
+    return make_unique<BufferExtents>();
+  case MetadataRecordKinds::TypedEventMarkerKind:
+    return make_unique<TypedEventRecord>();
+  case MetadataRecordKinds::PidKind:
+    return make_unique<PIDRecord>();
+  case MetadataRecordKinds::EnumEndMarker:
+    llvm_unreachable("Invalid MetadataRecordKind");
+  }
+  llvm_unreachable("Unhandled MetadataRecordKinds enum value");
+}
+
+constexpr bool isMetadataIntroducer(uint8_t FirstByte) {
+  return FirstByte & 0x01u;
+}
+
+} // namespace
+
+Expected<std::unique_ptr<Record>>
+FileBasedRecordProducer::findNextBufferExtent() {
+  // We seek one byte at a time until we find a suitable buffer extents metadata
+  // record introducer.
+  std::unique_ptr<Record> R;
+  while (!R) {
+    auto PreReadOffset = OffsetPtr;
+    uint8_t FirstByte = E.getU8(&OffsetPtr);
+    if (OffsetPtr == PreReadOffset)
+      return createStringError(
+          std::make_error_code(std::errc::executable_format_error),
+          "Failed reading one byte from offset %d.", OffsetPtr);
+
+    if (isMetadataIntroducer(FirstByte)) {
+      auto LoadedType = FirstByte >> 1;
+      if (LoadedType == MetadataRecordKinds::BufferExtentsKind) {
+        auto MetadataRecordOrErr = metadataRecordType(Header, LoadedType);
+        if (!MetadataRecordOrErr)
+          return MetadataRecordOrErr.takeError();
+
+        R = std::move(MetadataRecordOrErr.get());
+        RecordInitializer RI(E, OffsetPtr);
+        if (auto Err = R->apply(RI))
+          return std::move(Err);
+        return std::move(R);
+      }
+    }
+  }
+  llvm_unreachable("Must always terminate with either an error or a record.");
+}
+
+Expected<std::unique_ptr<Record>> FileBasedRecordProducer::produce() {
+  // First, we set up our result record.
+  std::unique_ptr<Record> R;
+
+  // Before we do any further reading, we should check whether we're at the end
+  // of the current buffer we're been consuming. In FDR logs version >= 3, we
+  // rely on the buffer extents record to determine how many bytes we should be
+  // considering as valid records.
+  if (Header.Version >= 3 && CurrentBufferBytes == 0) {
+    // Find the next buffer extents record.
+    auto BufferExtentsOrError = findNextBufferExtent();
+    if (!BufferExtentsOrError)
+      return joinErrors(
+          BufferExtentsOrError.takeError(),
+          createStringError(
+              std::make_error_code(std::errc::executable_format_error),
+              "Failed to find the next BufferExtents record."));
+
+    R = std::move(BufferExtentsOrError.get());
+    assert(R != nullptr);
+    assert(isa<BufferExtents>(R.get()));
+    auto BE = dyn_cast<BufferExtents>(R.get());
+    CurrentBufferBytes = BE->size();
+    return std::move(R);
+  }
+
+  //
+  // At the top level, we read one byte to determine the type of the record to
+  // create. This byte will comprise of the following bits:
+  //
+  //   - offset 0: A '1' indicates a metadata record, a '0' indicates a function
+  //     record.
+  //   - offsets 1-7: For metadata records, this will indicate the kind of
+  //     metadata record should be loaded.
+  //
+  // We read first byte, then create the appropriate type of record to consume
+  // the rest of the bytes.
+  auto PreReadOffset = OffsetPtr;
+  uint8_t FirstByte = E.getU8(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::executable_format_error),
+        "Failed reading one byte from offset %d.", OffsetPtr);
+
+  // For metadata records, handle especially here.
+  if (isMetadataIntroducer(FirstByte)) {
+    auto LoadedType = FirstByte >> 1;
+    auto MetadataRecordOrErr = metadataRecordType(Header, LoadedType);
+    if (!MetadataRecordOrErr)
+      return joinErrors(
+          MetadataRecordOrErr.takeError(),
+          createStringError(
+              std::make_error_code(std::errc::executable_format_error),
+              "Encountered an unsupported metadata record (%d) at offset %d.",
+              LoadedType, PreReadOffset));
+    R = std::move(MetadataRecordOrErr.get());
+  } else {
+    R = llvm::make_unique<FunctionRecord>();
+  }
+  RecordInitializer RI(E, OffsetPtr);
+
+  if (auto Err = R->apply(RI))
+    return std::move(Err);
+
+  // If we encountered a BufferExtents record, we should record the remaining
+  // bytes for the current buffer, to determine when we should start ignoring
+  // potentially malformed data and looking for buffer extents records.
+  if (auto BE = dyn_cast<BufferExtents>(R.get())) {
+    CurrentBufferBytes = BE->size();
+  } else if (Header.Version >= 3) {
+    if (OffsetPtr - PreReadOffset > CurrentBufferBytes)
+      return createStringError(
+          std::make_error_code(std::errc::executable_format_error),
+          "Buffer over-read at offset %d (over-read by %d bytes); Record Type "
+          "= %s.",
+          OffsetPtr, (OffsetPtr - PreReadOffset) - CurrentBufferBytes,
+          Record::kindToString(R->getRecordType()).data());
+
+    CurrentBufferBytes -= OffsetPtr - PreReadOffset;
+  }
+  assert(R != nullptr);
+  return std::move(R);
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRRecords.cpp b/contrib/llvm/lib/XRay/FDRRecords.cpp
new file mode 100644
index 000000000000..2a40d5e06229
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRRecords.cpp
@@ -0,0 +1,67 @@
+//===- FDRRecords.cpp -  XRay Flight Data Recorder Mode Records -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define types and operations on these types that represent the different kinds
+// of records we encounter in XRay flight data recorder mode traces.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+Error BufferExtents::apply(RecordVisitor &V) { return V.visit(*this); }
+Error WallclockRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error NewCPUIDRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error TSCWrapRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error CustomEventRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error CallArgRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error PIDRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error NewBufferRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error EndBufferRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error FunctionRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+Error CustomEventRecordV5::apply(RecordVisitor &V) { return V.visit(*this); }
+Error TypedEventRecord::apply(RecordVisitor &V) { return V.visit(*this); }
+
+StringRef Record::kindToString(RecordKind K) {
+  switch (K) {
+  case RecordKind::RK_Metadata:
+    return "Metadata";
+  case RecordKind::RK_Metadata_BufferExtents:
+    return "Metadata:BufferExtents";
+  case RecordKind::RK_Metadata_WallClockTime:
+    return "Metadata:WallClockTime";
+  case RecordKind::RK_Metadata_NewCPUId:
+    return "Metadata:NewCPUId";
+  case RecordKind::RK_Metadata_TSCWrap:
+    return "Metadata:TSCWrap";
+  case RecordKind::RK_Metadata_CustomEvent:
+    return "Metadata:CustomEvent";
+  case RecordKind::RK_Metadata_CustomEventV5:
+    return "Metadata:CustomEventV5";
+  case RecordKind::RK_Metadata_CallArg:
+    return "Metadata:CallArg";
+  case RecordKind::RK_Metadata_PIDEntry:
+    return "Metadata:PIDEntry";
+  case RecordKind::RK_Metadata_NewBuffer:
+    return "Metadata:NewBuffer";
+  case RecordKind::RK_Metadata_EndOfBuffer:
+    return "Metadata:EndOfBuffer";
+  case RecordKind::RK_Metadata_TypedEvent:
+    return "Metadata:TypedEvent";
+  case RecordKind::RK_Metadata_LastMetadata:
+    return "Metadata:LastMetadata";
+  case RecordKind::RK_Function:
+    return "Function";
+  }
+  return "Unknown";
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRTraceExpander.cpp b/contrib/llvm/lib/XRay/FDRTraceExpander.cpp
new file mode 100644
index 000000000000..a6e1521da87f
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRTraceExpander.cpp
@@ -0,0 +1,132 @@
+//===- FDRTraceExpander.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRTraceExpander.h"
+
+namespace llvm {
+namespace xray {
+
+void TraceExpander::resetCurrentRecord() {
+  if (BuildingRecord)
+    C(CurrentRecord);
+  BuildingRecord = false;
+  CurrentRecord.CallArgs.clear();
+  CurrentRecord.Data.clear();
+}
+
+Error TraceExpander::visit(BufferExtents &) {
+  resetCurrentRecord();
+  return Error::success();
+}
+
+Error TraceExpander::visit(WallclockRecord &) { return Error::success(); }
+
+Error TraceExpander::visit(NewCPUIDRecord &R) {
+  CPUId = R.cpuid();
+  BaseTSC = R.tsc();
+  return Error::success();
+}
+
+Error TraceExpander::visit(TSCWrapRecord &R) {
+  BaseTSC = R.tsc();
+  return Error::success();
+}
+
+Error TraceExpander::visit(CustomEventRecord &R) {
+  resetCurrentRecord();
+  if (!IgnoringRecords) {
+    CurrentRecord.TSC = R.tsc();
+    CurrentRecord.CPU = R.cpu();
+    CurrentRecord.PId = PID;
+    CurrentRecord.TId = TID;
+    CurrentRecord.Type = RecordTypes::CUSTOM_EVENT;
+    CurrentRecord.Data = R.data();
+    BuildingRecord = true;
+  }
+  return Error::success();
+}
+
+Error TraceExpander::visit(CustomEventRecordV5 &R) {
+  resetCurrentRecord();
+  if (!IgnoringRecords) {
+    BaseTSC += R.delta();
+    CurrentRecord.TSC = BaseTSC;
+    CurrentRecord.CPU = CPUId;
+    CurrentRecord.PId = PID;
+    CurrentRecord.TId = TID;
+    CurrentRecord.Type = RecordTypes::CUSTOM_EVENT;
+    CurrentRecord.Data = R.data();
+    BuildingRecord = true;
+  }
+  return Error::success();
+}
+
+Error TraceExpander::visit(TypedEventRecord &R) {
+  resetCurrentRecord();
+  if (!IgnoringRecords) {
+    BaseTSC += R.delta();
+    CurrentRecord.TSC = BaseTSC;
+    CurrentRecord.CPU = CPUId;
+    CurrentRecord.PId = PID;
+    CurrentRecord.TId = TID;
+    CurrentRecord.RecordType = R.eventType();
+    CurrentRecord.Type = RecordTypes::TYPED_EVENT;
+    CurrentRecord.Data = R.data();
+    BuildingRecord = true;
+  }
+  return Error::success();
+}
+
+Error TraceExpander::visit(CallArgRecord &R) {
+  CurrentRecord.CallArgs.push_back(R.arg());
+  CurrentRecord.Type = RecordTypes::ENTER_ARG;
+  return Error::success();
+}
+
+Error TraceExpander::visit(PIDRecord &R) {
+  PID = R.pid();
+  return Error::success();
+}
+
+Error TraceExpander::visit(NewBufferRecord &R) {
+  if (IgnoringRecords)
+    IgnoringRecords = false;
+  TID = R.tid();
+  if (LogVersion == 2)
+    PID = R.tid();
+  return Error::success();
+}
+
+Error TraceExpander::visit(EndBufferRecord &) {
+  IgnoringRecords = true;
+  resetCurrentRecord();
+  return Error::success();
+}
+
+Error TraceExpander::visit(FunctionRecord &R) {
+  resetCurrentRecord();
+  if (!IgnoringRecords) {
+    BaseTSC += R.delta();
+    CurrentRecord.Type = R.recordType();
+    CurrentRecord.FuncId = R.functionId();
+    CurrentRecord.TSC = BaseTSC;
+    CurrentRecord.PId = PID;
+    CurrentRecord.TId = TID;
+    CurrentRecord.CPU = CPUId;
+    BuildingRecord = true;
+  }
+  return Error::success();
+}
+
+Error TraceExpander::flush() {
+  resetCurrentRecord();
+  return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FDRTraceWriter.cpp b/contrib/llvm/lib/XRay/FDRTraceWriter.cpp
new file mode 100644
index 000000000000..c5224f4be094
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FDRTraceWriter.cpp
@@ -0,0 +1,154 @@
+//===- FDRTraceWriter.cpp - XRay FDR Trace Writer ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Test a utility that can write out XRay FDR Mode formatted trace files.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRTraceWriter.h"
+#include <tuple>
+
+namespace llvm {
+namespace xray {
+
+namespace {
+
+template <size_t Index> struct IndexedWriter {
+  template <
+      class Tuple,
+      typename std::enable_if<
+          (Index <
+           std::tuple_size<typename std::remove_reference<Tuple>::type>::value),
+          int>::type = 0>
+  static size_t write(support::endian::Writer &OS, Tuple &&T) {
+    OS.write(std::get<Index>(T));
+    return sizeof(std::get<Index>(T)) + IndexedWriter<Index + 1>::write(OS, T);
+  }
+
+  template <
+      class Tuple,
+      typename std::enable_if<
+          (Index >=
+           std::tuple_size<typename std::remove_reference<Tuple>::type>::value),
+          int>::type = 0>
+  static size_t write(support::endian::Writer &OS, Tuple &&) {
+    return 0;
+  }
+};
+
+template <uint8_t Kind, class... Values>
+Error writeMetadata(support::endian::Writer &OS, Values &&... Ds) {
+  // The first bit in the first byte of metadata records is always set to 1, so
+  // we ensure this is the case when we write out the first byte of the record.
+  uint8_t FirstByte = (static_cast<uint8_t>(Kind) << 1) | uint8_t{0x01u};
+  auto T = std::make_tuple(std::forward<Values>(std::move(Ds))...);
+  // Write in field order.
+  OS.write(FirstByte);
+  auto Bytes = IndexedWriter<0>::write(OS, T);
+  assert(Bytes <= 15 && "Must only ever write at most 16 byte metadata!");
+  // Pad out with appropriate numbers of zero's.
+  for (; Bytes < 15; ++Bytes)
+    OS.write('\0');
+  return Error::success();
+}
+
+} // namespace
+
+FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H)
+    : OS(O, support::endianness::native) {
+  // We need to re-construct a header, by writing the fields we care about for
+  // traces, in the format that the runtime would have written.
+  uint32_t BitField =
+      (H.ConstantTSC ? 0x01 : 0x0) | (H.NonstopTSC ? 0x02 : 0x0);
+
+  // For endian-correctness, we need to write these fields in the order they
+  // appear and that we expect, instead of blasting bytes of the struct through.
+  OS.write(H.Version);
+  OS.write(H.Type);
+  OS.write(BitField);
+  OS.write(H.CycleFrequency);
+  ArrayRef<char> FreeFormBytes(H.FreeFormData,
+                               sizeof(XRayFileHeader::FreeFormData));
+  OS.write(FreeFormBytes);
+}
+
+FDRTraceWriter::~FDRTraceWriter() {}
+
+Error FDRTraceWriter::visit(BufferExtents &R) {
+  return writeMetadata<7u>(OS, R.size());
+}
+
+Error FDRTraceWriter::visit(WallclockRecord &R) {
+  return writeMetadata<4u>(OS, R.seconds(), R.nanos());
+}
+
+Error FDRTraceWriter::visit(NewCPUIDRecord &R) {
+  return writeMetadata<2u>(OS, R.cpuid(), R.tsc());
+}
+
+Error FDRTraceWriter::visit(TSCWrapRecord &R) {
+  return writeMetadata<3u>(OS, R.tsc());
+}
+
+Error FDRTraceWriter::visit(CustomEventRecord &R) {
+  if (auto E = writeMetadata<5u>(OS, R.size(), R.tsc(), R.cpu()))
+    return E;
+  auto D = R.data();
+  ArrayRef<char> Bytes(D.data(), D.size());
+  OS.write(Bytes);
+  return Error::success();
+}
+
+Error FDRTraceWriter::visit(CustomEventRecordV5 &R) {
+  if (auto E = writeMetadata<5u>(OS, R.size(), R.delta()))
+    return E;
+  auto D = R.data();
+  ArrayRef<char> Bytes(D.data(), D.size());
+  OS.write(Bytes);
+  return Error::success();
+}
+
+Error FDRTraceWriter::visit(TypedEventRecord &R) {
+  if (auto E = writeMetadata<8u>(OS, R.size(), R.delta(), R.eventType()))
+    return E;
+  auto D = R.data();
+  ArrayRef<char> Bytes(D.data(), D.size());
+  OS.write(Bytes);
+  return Error::success();
+}
+
+Error FDRTraceWriter::visit(CallArgRecord &R) {
+  return writeMetadata<6u>(OS, R.arg());
+}
+
+Error FDRTraceWriter::visit(PIDRecord &R) {
+  return writeMetadata<9u>(OS, R.pid());
+}
+
+Error FDRTraceWriter::visit(NewBufferRecord &R) {
+  return writeMetadata<0u>(OS, R.tid());
+}
+
+Error FDRTraceWriter::visit(EndBufferRecord &R) {
+  return writeMetadata<1u>(OS, 0);
+}
+
+Error FDRTraceWriter::visit(FunctionRecord &R) {
+  // Write out the data in "field" order, to be endian-aware.
+  uint32_t TypeRecordFuncId = uint32_t{R.functionId() & ~uint32_t{0x0Fu << 28}};
+  TypeRecordFuncId <<= 3;
+  TypeRecordFuncId |= static_cast<uint32_t>(R.recordType());
+  TypeRecordFuncId <<= 1;
+  TypeRecordFuncId &= ~uint32_t{0x01};
+  OS.write(TypeRecordFuncId);
+  OS.write(R.delta());
+  return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/FileHeaderReader.cpp b/contrib/llvm/lib/XRay/FileHeaderReader.cpp
new file mode 100644
index 000000000000..0b3fb8b6f692
--- /dev/null
+++ b/contrib/llvm/lib/XRay/FileHeaderReader.cpp
@@ -0,0 +1,70 @@
+//===- FileHeaderReader.cpp - XRay File Header Reader  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FileHeaderReader.h"
+
+namespace llvm {
+namespace xray {
+
+// Populates the FileHeader reference by reading the first 32 bytes of the file.
+Expected<XRayFileHeader> readBinaryFormatHeader(DataExtractor &HeaderExtractor,
+                                                uint32_t &OffsetPtr) {
+  // FIXME: Maybe deduce whether the data is little or big-endian using some
+  // magic bytes in the beginning of the file?
+
+  // First 32 bytes of the file will always be the header. We assume a certain
+  // format here:
+  //
+  //   (2)   uint16 : version
+  //   (2)   uint16 : type
+  //   (4)   uint32 : bitfield
+  //   (8)   uint64 : cycle frequency
+  //   (16)  -      : padding
+  XRayFileHeader FileHeader;
+  auto PreReadOffset = OffsetPtr;
+  FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading version from file header at offset %d.", OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading file type from file header at offset %d.", OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading flag bits from file header at offset %d.", OffsetPtr);
+
+  FileHeader.ConstantTSC = Bitfield & 1uL;
+  FileHeader.NonstopTSC = Bitfield & 1uL << 1;
+  PreReadOffset = OffsetPtr;
+  FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading cycle frequency from file header at offset %d.",
+        OffsetPtr);
+
+  std::memcpy(&FileHeader.FreeFormData,
+              HeaderExtractor.getData().bytes_begin() + OffsetPtr, 16);
+
+  // Manually advance the offset pointer 16 bytes, after getting a raw memcpy
+  // from the underlying data.
+  OffsetPtr += 16;
+  return std::move(FileHeader);
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/InstrumentationMap.cpp b/contrib/llvm/lib/XRay/InstrumentationMap.cpp
index a7d6600b0d8a..9f2b179486f0 100644
--- a/contrib/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/contrib/llvm/lib/XRay/InstrumentationMap.cpp
@@ -12,12 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/XRay/InstrumentationMap.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Error.h"
@@ -46,19 +48,21 @@ Optional<uint64_t> InstrumentationMap::getFunctionAddr(int32_t FuncId) const {
   return None;
 }
 
+using RelocMap = DenseMap<uint64_t, uint64_t>;
+
 static Error
-loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
+loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
           InstrumentationMap::SledContainer &Sleds,
           InstrumentationMap::FunctionAddressMap &FunctionAddresses,
           InstrumentationMap::FunctionAddressReverseMap &FunctionIds) {
   InstrumentationMap Map;
 
   // Find the section named "xray_instr_map".
-  if (!ObjFile.getBinary()->isELF() ||
+  if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) ||
       !(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
         ObjFile.getBinary()->getArch() == Triple::ppc64le))
     return make_error<StringError>(
-        "File format not supported (only does ELF little endian 64-bit).",
+        "File format not supported (only does ELF and Mach-O little endian 64-bit).",
         std::make_error_code(std::errc::not_supported));
 
   StringRef Contents = "";
@@ -79,6 +83,31 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
     return errorCodeToError(
         std::make_error_code(std::errc::executable_format_error));
 
+  RelocMap Relocs;
+  if (ObjFile.getBinary()->isELF()) {
+    uint32_t RelativeRelocation = [](object::ObjectFile *ObjFile) {
+      if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(ObjFile))
+        return ELFObj->getELFFile()->getRelativeRelocationType();
+      else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(ObjFile))
+        return ELFObj->getELFFile()->getRelativeRelocationType();
+      else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(ObjFile))
+        return ELFObj->getELFFile()->getRelativeRelocationType();
+      else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(ObjFile))
+        return ELFObj->getELFFile()->getRelativeRelocationType();
+      else
+        return static_cast<uint32_t>(0);
+    }(ObjFile.getBinary());
+
+    for (const object::SectionRef &Section : Sections) {
+      for (const object::RelocationRef &Reloc : Section.relocations()) {
+        if (Reloc.getType() != RelativeRelocation)
+          continue;
+        if (auto AddendOrErr = object::ELFRelocationRef(Reloc).getAddend())
+          Relocs.insert({Reloc.getOffset(), *AddendOrErr});
+      }
+    }
+  }
+
   // Copy the instrumentation map data into the Sleds data structure.
   auto C = Contents.bytes_begin();
   static constexpr size_t ELF64SledEntrySize = 32;
@@ -89,6 +118,16 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
               "an XRay sled entry in ELF64."),
         std::make_error_code(std::errc::executable_format_error));
 
+  auto RelocateOrElse = [&](uint32_t Offset, uint64_t Address) {
+    if (!Address) {
+      uint64_t A = I->getAddress() + C - Contents.bytes_begin() + Offset;
+      RelocMap::const_iterator R = Relocs.find(A);
+      if (R != Relocs.end())
+        return R->second;
+    }
+    return Address;
+  };
+
   int32_t FuncId = 1;
   uint64_t CurFn = 0;
   for (; C != Contents.bytes_end(); C += ELF64SledEntrySize) {
@@ -98,8 +137,10 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
     Sleds.push_back({});
     auto &Entry = Sleds.back();
     uint32_t OffsetPtr = 0;
-    Entry.Address = Extractor.getU64(&OffsetPtr);
-    Entry.Function = Extractor.getU64(&OffsetPtr);
+    uint32_t AddrOff = OffsetPtr;
+    Entry.Address = RelocateOrElse(AddrOff, Extractor.getU64(&OffsetPtr));
+    uint32_t FuncOff = OffsetPtr;
+    Entry.Function = RelocateOrElse(FuncOff, Extractor.getU64(&OffsetPtr));
     auto Kind = Extractor.getU8(&OffsetPtr);
     static constexpr SledEntry::FunctionKinds Kinds[] = {
         SledEntry::FunctionKinds::ENTRY, SledEntry::FunctionKinds::EXIT,
@@ -191,7 +232,7 @@ llvm::xray::loadInstrumentationMap(StringRef Filename) {
     if (auto E = loadYAML(Fd, FileSize, Filename, Map.Sleds,
                           Map.FunctionAddresses, Map.FunctionIds))
       return std::move(E);
-  } else if (auto E = loadELF64(Filename, *ObjectFileOrError, Map.Sleds,
+  } else if (auto E = loadObj(Filename, *ObjectFileOrError, Map.Sleds,
                                 Map.FunctionAddresses, Map.FunctionIds)) {
     return std::move(E);
   }
diff --git a/contrib/llvm/lib/XRay/LogBuilderConsumer.cpp b/contrib/llvm/lib/XRay/LogBuilderConsumer.cpp
new file mode 100644
index 000000000000..88b7d2d728b1
--- /dev/null
+++ b/contrib/llvm/lib/XRay/LogBuilderConsumer.cpp
@@ -0,0 +1,38 @@
+//===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecordConsumer.h"
+
+namespace llvm {
+namespace xray {
+
+Error LogBuilderConsumer::consume(std::unique_ptr<Record> R) {
+  if (!R)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Must not call RecordConsumer::consume() with a null pointer.");
+  Records.push_back(std::move(R));
+  return Error::success();
+}
+
+Error PipelineConsumer::consume(std::unique_ptr<Record> R) {
+  if (!R)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Must not call RecordConsumer::consume() with a null pointer.");
+
+  // We apply all of the visitors in order, and concatenate errors
+  // appropriately.
+  Error Result = Error::success();
+  for (auto *V : Visitors)
+    Result = joinErrors(std::move(Result), R->apply(*V));
+  return Result;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/Profile.cpp b/contrib/llvm/lib/XRay/Profile.cpp
new file mode 100644
index 000000000000..e8a082884d69
--- /dev/null
+++ b/contrib/llvm/lib/XRay/Profile.cpp
@@ -0,0 +1,403 @@
+//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the XRay Profile class representing the latency profile generated by
+// XRay's profiling mode.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/Profile.h"
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/XRay/Trace.h"
+#include <deque>
+#include <memory>
+
+namespace llvm {
+namespace xray {
+
+Profile::Profile(const Profile &O) {
+  // We need to re-create all the tries from the original (O), into the current
+  // Profile being initialized, through the Block instances we see.
+  for (const auto &Block : O) {
+    Blocks.push_back({Block.Thread, {}});
+    auto &B = Blocks.back();
+    for (const auto &PathData : Block.PathData)
+      B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
+                            PathData.second});
+  }
+}
+
+Profile &Profile::operator=(const Profile &O) {
+  Profile P = O;
+  *this = std::move(P);
+  return *this;
+}
+
+namespace {
+
+struct BlockHeader {
+  uint32_t Size;
+  uint32_t Number;
+  uint64_t Thread;
+};
+
+static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
+                                             uint32_t &Offset) {
+  BlockHeader H;
+  uint32_t CurrentOffset = Offset;
+  H.Size = Extractor.getU32(&Offset);
+  if (Offset == CurrentOffset)
+    return make_error<StringError>(
+        Twine("Error parsing block header size at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  CurrentOffset = Offset;
+  H.Number = Extractor.getU32(&Offset);
+  if (Offset == CurrentOffset)
+    return make_error<StringError>(
+        Twine("Error parsing block header number at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  CurrentOffset = Offset;
+  H.Thread = Extractor.getU64(&Offset);
+  if (Offset == CurrentOffset)
+    return make_error<StringError>(
+        Twine("Error parsing block header thread id at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  return H;
+}
+
+static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
+                                                       uint32_t &Offset) {
+  // We're reading a sequence of int32_t's until we find a 0.
+  std::vector<Profile::FuncID> Path;
+  auto CurrentOffset = Offset;
+  int32_t FuncId;
+  do {
+    FuncId = Extractor.getSigned(&Offset, 4);
+    if (CurrentOffset == Offset)
+      return make_error<StringError>(
+          Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
+          std::make_error_code(std::errc::invalid_argument));
+    CurrentOffset = Offset;
+    Path.push_back(FuncId);
+  } while (FuncId != 0);
+  return std::move(Path);
+}
+
+static Expected<Profile::Data> readData(DataExtractor &Extractor,
+                                        uint32_t &Offset) {
+  // We expect a certain number of elements for Data:
+  //   - A 64-bit CallCount
+  //   - A 64-bit CumulativeLocalTime counter
+  Profile::Data D;
+  auto CurrentOffset = Offset;
+  D.CallCount = Extractor.getU64(&Offset);
+  if (CurrentOffset == Offset)
+    return make_error<StringError>(
+        Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
+            "'",
+        std::make_error_code(std::errc::invalid_argument));
+  CurrentOffset = Offset;
+  D.CumulativeLocalTime = Extractor.getU64(&Offset);
+  if (CurrentOffset == Offset)
+    return make_error<StringError>(
+        Twine("Error parsing cumulative local time at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  return D;
+}
+
+} // namespace
+
+Error Profile::addBlock(Block &&B) {
+  if (B.PathData.empty())
+    return make_error<StringError>(
+        "Block may not have empty path data.",
+        std::make_error_code(std::errc::invalid_argument));
+
+  Blocks.emplace_back(std::move(B));
+  return Error::success();
+}
+
+Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
+  auto It = PathIDMap.find(P);
+  if (It == PathIDMap.end())
+    return make_error<StringError>(
+        Twine("PathID not found: ") + Twine(P),
+        std::make_error_code(std::errc::invalid_argument));
+  std::vector<Profile::FuncID> Path;
+  for (auto Node = It->second; Node; Node = Node->Caller)
+    Path.push_back(Node->Func);
+  return std::move(Path);
+}
+
+Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
+  if (P.empty())
+    return 0;
+
+  auto RootToLeafPath = reverse(P);
+
+  // Find the root.
+  auto It = RootToLeafPath.begin();
+  auto PathRoot = *It++;
+  auto RootIt =
+      find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
+
+  // If we've not seen this root before, remember it.
+  TrieNode *Node = nullptr;
+  if (RootIt == Roots.end()) {
+    NodeStorage.emplace_back();
+    Node = &NodeStorage.back();
+    Node->Func = PathRoot;
+    Roots.push_back(Node);
+  } else {
+    Node = *RootIt;
+  }
+
+  // Now traverse the path, re-creating if necessary.
+  while (It != RootToLeafPath.end()) {
+    auto NodeFuncID = *It++;
+    auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
+      return N->Func == NodeFuncID;
+    });
+    if (CalleeIt == Node->Callees.end()) {
+      NodeStorage.emplace_back();
+      auto NewNode = &NodeStorage.back();
+      NewNode->Func = NodeFuncID;
+      NewNode->Caller = Node;
+      Node->Callees.push_back(NewNode);
+      Node = NewNode;
+    } else {
+      Node = *CalleeIt;
+    }
+  }
+
+  // At this point, Node *must* be pointing at the leaf.
+  assert(Node->Func == P.front());
+  if (Node->ID == 0) {
+    Node->ID = NextID++;
+    PathIDMap.insert({Node->ID, Node});
+  }
+  return Node->ID;
+}
+
+Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
+  Profile Merged;
+  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
+  using PathDataMapPtr = std::unique_ptr<PathDataMap>;
+  using PathDataVector = decltype(Profile::Block::PathData);
+  using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
+  ThreadProfileIndexMap ThreadProfileIndex;
+
+  for (const auto &P : {std::ref(L), std::ref(R)})
+    for (const auto &Block : P.get()) {
+      ThreadProfileIndexMap::iterator It;
+      std::tie(It, std::ignore) = ThreadProfileIndex.insert(
+          {Block.Thread, PathDataMapPtr{new PathDataMap()}});
+      for (const auto &PathAndData : Block.PathData) {
+        auto &PathID = PathAndData.first;
+        auto &Data = PathAndData.second;
+        auto NewPathID =
+            Merged.internPath(cantFail(P.get().expandPath(PathID)));
+        PathDataMap::iterator PathDataIt;
+        bool Inserted;
+        std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
+        if (!Inserted) {
+          auto &ExistingData = PathDataIt->second;
+          ExistingData.CallCount += Data.CallCount;
+          ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
+        }
+      }
+    }
+
+  for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
+    PathDataVector PathAndData;
+    PathAndData.reserve(IndexedThreadBlock.second->size());
+    copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
+    cantFail(
+        Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
+  }
+  return Merged;
+}
+
+Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
+  Profile Merged;
+  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
+  PathDataMap PathData;
+  using PathDataVector = decltype(Profile::Block::PathData);
+  for (const auto &P : {std::ref(L), std::ref(R)})
+    for (const auto &Block : P.get())
+      for (const auto &PathAndData : Block.PathData) {
+        auto &PathId = PathAndData.first;
+        auto &Data = PathAndData.second;
+        auto NewPathID =
+            Merged.internPath(cantFail(P.get().expandPath(PathId)));
+        PathDataMap::iterator PathDataIt;
+        bool Inserted;
+        std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
+        if (!Inserted) {
+          auto &ExistingData = PathDataIt->second;
+          ExistingData.CallCount += Data.CallCount;
+          ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
+        }
+      }
+
+  // In the end there's a single Block, for thread 0.
+  PathDataVector Block;
+  Block.reserve(PathData.size());
+  copy(PathData, std::back_inserter(Block));
+  cantFail(Merged.addBlock({0, std::move(Block)}));
+  return Merged;
+}
+
+Expected<Profile> loadProfile(StringRef Filename) {
+  int Fd;
+  if (auto EC = sys::fs::openFileForRead(Filename, Fd))
+    return make_error<StringError>(
+        Twine("Cannot read profile from '") + Filename + "'", EC);
+
+  uint64_t FileSize;
+  if (auto EC = sys::fs::file_size(Filename, FileSize))
+    return make_error<StringError>(
+        Twine("Cannot get filesize of '") + Filename + "'", EC);
+
+  std::error_code EC;
+  sys::fs::mapped_file_region MappedFile(
+      Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+  if (EC)
+    return make_error<StringError>(
+        Twine("Cannot mmap profile '") + Filename + "'", EC);
+  StringRef Data(MappedFile.data(), MappedFile.size());
+
+  Profile P;
+  uint32_t Offset = 0;
+  DataExtractor Extractor(Data, true, 8);
+
+  // For each block we get from the file:
+  while (Offset != MappedFile.size()) {
+    auto HeaderOrError = readBlockHeader(Extractor, Offset);
+    if (!HeaderOrError)
+      return HeaderOrError.takeError();
+
+    // TODO: Maybe store this header information for each block, even just for
+    // debugging?
+    const auto &Header = HeaderOrError.get();
+
+    // Read in the path data.
+    auto PathOrError = readPath(Extractor, Offset);
+    if (!PathOrError)
+      return PathOrError.takeError();
+    const auto &Path = PathOrError.get();
+
+    // For each path we encounter, we should intern it to get a PathID.
+    auto DataOrError = readData(Extractor, Offset);
+    if (!DataOrError)
+      return DataOrError.takeError();
+    auto &Data = DataOrError.get();
+
+    if (auto E =
+            P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
+                                      {{P.internPath(Path), std::move(Data)}}}))
+      return std::move(E);
+  }
+
+  return P;
+}
+
+namespace {
+
+struct StackEntry {
+  uint64_t Timestamp;
+  Profile::FuncID FuncId;
+};
+
+} // namespace
+
+Expected<Profile> profileFromTrace(const Trace &T) {
+  Profile P;
+
+  // The implementation of the algorithm re-creates the execution of
+  // the functions based on the trace data. To do this, we set up a number of
+  // data structures to track the execution context of every thread in the
+  // Trace.
+  DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
+  DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
+      ThreadPathData;
+
+  //  We then do a pass through the Trace to account data on a per-thread-basis.
+  for (const auto &E : T) {
+    auto &TSD = ThreadStacks[E.TId];
+    switch (E.Type) {
+    case RecordTypes::ENTER:
+    case RecordTypes::ENTER_ARG:
+
+      // Push entries into the function call stack.
+      TSD.push_back({E.TSC, E.FuncId});
+      break;
+
+    case RecordTypes::EXIT:
+    case RecordTypes::TAIL_EXIT:
+
+      // Exits cause some accounting to happen, based on the state of the stack.
+      // For each function we pop off the stack, we take note of the path and
+      // record the cumulative state for this path. As we're doing this, we
+      // intern the path into the Profile.
+      while (!TSD.empty()) {
+        auto Top = TSD.back();
+        auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
+        SmallVector<Profile::FuncID, 16> Path;
+        transform(reverse(TSD), std::back_inserter(Path),
+                  std::mem_fn(&StackEntry::FuncId));
+        auto InternedPath = P.internPath(Path);
+        auto &TPD = ThreadPathData[E.TId][InternedPath];
+        ++TPD.CallCount;
+        TPD.CumulativeLocalTime += FunctionLocalTime;
+        TSD.pop_back();
+
+        // If we've matched the corresponding entry event for this function,
+        // then we exit the loop.
+        if (Top.FuncId == E.FuncId)
+          break;
+
+        // FIXME: Consider the intermediate times and the cumulative tree time
+        // as well.
+      }
+
+      break;
+
+    case RecordTypes::CUSTOM_EVENT:
+    case RecordTypes::TYPED_EVENT:
+      // TODO: Support an extension point to allow handling of custom and typed
+      // events in profiles.
+      break;
+    }
+  }
+
+  // Once we've gone through the Trace, we now create one Block per thread in
+  // the Profile.
+  for (const auto &ThreadPaths : ThreadPathData) {
+    const auto &TID = ThreadPaths.first;
+    const auto &PathsData = ThreadPaths.second;
+    if (auto E = P.addBlock({
+            TID,
+            std::vector<std::pair<Profile::PathID, Profile::Data>>(
+                PathsData.begin(), PathsData.end()),
+        }))
+      return std::move(E);
+  }
+
+  return P;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/RecordInitializer.cpp b/contrib/llvm/lib/XRay/RecordInitializer.cpp
new file mode 100644
index 000000000000..f136a1e456b7
--- /dev/null
+++ b/contrib/llvm/lib/XRay/RecordInitializer.cpp
@@ -0,0 +1,418 @@
+//===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/FDRRecords.h"
+
+namespace llvm {
+namespace xray {
+
+Error RecordInitializer::visit(BufferExtents &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t)))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a buffer extent (%d).",
+                             OffsetPtr);
+
+  auto PreReadOffset = OffsetPtr;
+  R.Size = E.getU64(&OffsetPtr);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read buffer extent at offset %d.",
+                             OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(WallclockRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a wallclock record (%d).",
+                             OffsetPtr);
+  auto BeginOffset = OffsetPtr;
+  auto PreReadOffset = OffsetPtr;
+  R.Seconds = E.getU64(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read wall clock 'seconds' field at offset %d.", OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  R.Nanos = E.getU32(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read wall clock 'nanos' field at offset %d.", OffsetPtr);
+
+  // Align to metadata record size boundary.
+  assert(OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(NewCPUIDRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a new cpu id record (%d).",
+                             OffsetPtr);
+  auto BeginOffset = OffsetPtr;
+  auto PreReadOffset = OffsetPtr;
+  R.CPUId = E.getU16(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read CPU id at offset %d.", OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  R.TSC = E.getU64(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read CPU TSC at offset %d.", OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(TSCWrapRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a new TSC wrap record (%d).",
+                             OffsetPtr);
+
+  auto PreReadOffset = OffsetPtr;
+  R.BaseTSC = E.getU64(&OffsetPtr);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read TSC wrap record at offset %d.",
+                             OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(CustomEventRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a custom event record (%d).",
+                             OffsetPtr);
+
+  auto BeginOffset = OffsetPtr;
+  auto PreReadOffset = OffsetPtr;
+  R.Size = E.getSigned(&OffsetPtr, sizeof(int32_t));
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a custom event record size field offset %d.", OffsetPtr);
+
+  if (R.Size <= 0)
+    return createStringError(
+        std::make_error_code(std::errc::bad_address),
+        "Invalid size for custom event (size = %d) at offset %d.", R.Size,
+        OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  R.TSC = E.getU64(&OffsetPtr);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a custom event TSC field at offset %d.", OffsetPtr);
+
+  // For version 4 onwards, of the FDR log, we want to also capture the CPU ID
+  // of the custom event.
+  if (Version >= 4) {
+    PreReadOffset = OffsetPtr;
+    R.CPU = E.getU16(&OffsetPtr);
+    if (PreReadOffset == OffsetPtr)
+      return createStringError(
+          std::make_error_code(std::errc::invalid_argument),
+          "Missing CPU field at offset %d", OffsetPtr);
+  }
+
+  assert(OffsetPtr > BeginOffset &&
+         OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+
+  // Next we read in a fixed chunk of data from the given offset.
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
+    return createStringError(
+        std::make_error_code(std::errc::bad_address),
+        "Cannot read %d bytes of custom event data from offset %d.", R.Size,
+        OffsetPtr);
+
+  std::vector<uint8_t> Buffer;
+  Buffer.resize(R.Size);
+  PreReadOffset = OffsetPtr;
+  if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading data into buffer of size %d at offset %d.", R.Size,
+        OffsetPtr);
+
+  assert(OffsetPtr >= PreReadOffset);
+  if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading enough bytes for the custom event payload -- read %d "
+        "expecting %d bytes at offset %d.",
+        OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
+
+  R.Data.assign(Buffer.begin(), Buffer.end());
+  return Error::success();
+}
+
+Error RecordInitializer::visit(CustomEventRecordV5 &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a custom event record (%d).",
+                             OffsetPtr);
+
+  auto BeginOffset = OffsetPtr;
+  auto PreReadOffset = OffsetPtr;
+
+  R.Size = E.getSigned(&OffsetPtr, sizeof(int32_t));
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a custom event record size field offset %d.", OffsetPtr);
+
+  if (R.Size <= 0)
+    return createStringError(
+        std::make_error_code(std::errc::bad_address),
+        "Invalid size for custom event (size = %d) at offset %d.", R.Size,
+        OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t));
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a custom event record TSC delta field at offset %d.",
+        OffsetPtr);
+
+  assert(OffsetPtr > BeginOffset &&
+         OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+
+  // Next we read in a fixed chunk of data from the given offset.
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
+    return createStringError(
+        std::make_error_code(std::errc::bad_address),
+        "Cannot read %d bytes of custom event data from offset %d.", R.Size,
+        OffsetPtr);
+
+  std::vector<uint8_t> Buffer;
+  Buffer.resize(R.Size);
+  PreReadOffset = OffsetPtr;
+  if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading data into buffer of size %d at offset %d.", R.Size,
+        OffsetPtr);
+
+  assert(OffsetPtr >= PreReadOffset);
+  if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading enough bytes for the custom event payload -- read %d "
+        "expecting %d bytes at offset %d.",
+        OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
+
+  R.Data.assign(Buffer.begin(), Buffer.end());
+  return Error::success();
+}
+
+Error RecordInitializer::visit(TypedEventRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a typed event record (%d).",
+                             OffsetPtr);
+
+  auto BeginOffset = OffsetPtr;
+  auto PreReadOffset = OffsetPtr;
+
+  R.Size = E.getSigned(&OffsetPtr, sizeof(int32_t));
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a typed event record size field offset %d.", OffsetPtr);
+
+  if (R.Size <= 0)
+    return createStringError(
+        std::make_error_code(std::errc::bad_address),
+        "Invalid size for typed event (size = %d) at offset %d.", R.Size,
+        OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t));
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a typed event record TSC delta field at offset %d.",
+        OffsetPtr);
+
+  PreReadOffset = OffsetPtr;
+  R.EventType = E.getU16(&OffsetPtr);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Cannot read a typed event record type field at offset %d.", OffsetPtr);
+
+  assert(OffsetPtr > BeginOffset &&
+         OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize);
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset);
+
+  // Next we read in a fixed chunk of data from the given offset.
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size))
+    return createStringError(
+        std::make_error_code(std::errc::bad_address),
+        "Cannot read %d bytes of custom event data from offset %d.", R.Size,
+        OffsetPtr);
+
+  std::vector<uint8_t> Buffer;
+  Buffer.resize(R.Size);
+  PreReadOffset = OffsetPtr;
+  if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading data into buffer of size %d at offset %d.", R.Size,
+        OffsetPtr);
+
+  assert(OffsetPtr >= PreReadOffset);
+  if (OffsetPtr - PreReadOffset != static_cast<uint32_t>(R.Size))
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "Failed reading enough bytes for the typed event payload -- read %d "
+        "expecting %d bytes at offset %d.",
+        OffsetPtr - PreReadOffset, R.Size, PreReadOffset);
+
+  R.Data.assign(Buffer.begin(), Buffer.end());
+  return Error::success();
+}
+
+Error RecordInitializer::visit(CallArgRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a call argument record (%d).",
+                             OffsetPtr);
+
+  auto PreReadOffset = OffsetPtr;
+  R.Arg = E.getU64(&OffsetPtr);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read a call arg record at offset %d.",
+                             OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(PIDRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a process ID record (%d).",
+                             OffsetPtr);
+
+  auto PreReadOffset = OffsetPtr;
+  R.PID = E.getSigned(&OffsetPtr, 4);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read a process ID record at offset %d.",
+                             OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(NewBufferRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a new buffer record (%d).",
+                             OffsetPtr);
+
+  auto PreReadOffset = OffsetPtr;
+  R.TID = E.getSigned(&OffsetPtr, sizeof(int32_t));
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Cannot read a new buffer record at offset %d.",
+                             OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset);
+  return Error::success();
+}
+
+Error RecordInitializer::visit(EndBufferRecord &R) {
+  if (!E.isValidOffsetForDataOfSize(OffsetPtr,
+                                    MetadataRecord::kMetadataBodySize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for an end-of-buffer record (%d).",
+                             OffsetPtr);
+
+  OffsetPtr += MetadataRecord::kMetadataBodySize;
+  return Error::success();
+}
+
+Error RecordInitializer::visit(FunctionRecord &R) {
+  // For function records, we need to retreat one byte back to read a full
+  // unsigned 32-bit value. The first four bytes will have the following
+  // layout:
+  //
+  //   bit  0     : function record indicator (must be 0)
+  //   bits 1..3  : function record type
+  //   bits 4..32 : function id
+  //
+  if (OffsetPtr == 0 || !E.isValidOffsetForDataOfSize(
+                            --OffsetPtr, FunctionRecord::kFunctionRecordSize))
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Invalid offset for a function record (%d).",
+                             OffsetPtr);
+
+  auto BeginOffset = OffsetPtr;
+  auto PreReadOffset = BeginOffset;
+  uint32_t Buffer = E.getU32(&OffsetPtr);
+  if (PreReadOffset == OffsetPtr)
+    return createStringError(std::make_error_code(std::errc::bad_address),
+                             "Cannot read function id field from offset %d.",
+                             OffsetPtr);
+
+  // To get the function record type, we shift the buffer one to the right
+  // (truncating the function record indicator) then take the three bits
+  // (0b0111) to get the record type as an unsigned value.
+  unsigned FunctionType = (Buffer >> 1) & 0x07u;
+  switch (FunctionType) {
+  case static_cast<unsigned>(RecordTypes::ENTER):
+  case static_cast<unsigned>(RecordTypes::ENTER_ARG):
+  case static_cast<unsigned>(RecordTypes::EXIT):
+  case static_cast<unsigned>(RecordTypes::TAIL_EXIT):
+    R.Kind = static_cast<RecordTypes>(FunctionType);
+    break;
+  default:
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Unknown function record type '%d' at offset %d.",
+                             FunctionType, BeginOffset);
+  }
+
+  R.FuncId = Buffer >> 4;
+  PreReadOffset = OffsetPtr;
+  R.Delta = E.getU32(&OffsetPtr);
+  if (OffsetPtr == PreReadOffset)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Failed reading TSC delta from offset %d.",
+                             OffsetPtr);
+  assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset));
+  return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/RecordPrinter.cpp b/contrib/llvm/lib/XRay/RecordPrinter.cpp
new file mode 100644
index 000000000000..71ea7d0e969f
--- /dev/null
+++ b/contrib/llvm/lib/XRay/RecordPrinter.cpp
@@ -0,0 +1,109 @@
+//===- RecordPrinter.cpp - FDR Record Printer -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/RecordPrinter.h"
+
+#include "llvm/Support/FormatVariadic.h"
+
+namespace llvm {
+namespace xray {
+
+Error RecordPrinter::visit(BufferExtents &R) {
+  OS << formatv("<Buffer: size = {0} bytes>", R.size()) << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(WallclockRecord &R) {
+  OS << formatv("<Wall Time: seconds = {0}.{1,0+6}>", R.seconds(), R.nanos())
+     << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(NewCPUIDRecord &R) {
+  OS << formatv("<CPU: id = {0}, tsc = {1}>", R.cpuid(), R.tsc()) << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(TSCWrapRecord &R) {
+  OS << formatv("<TSC Wrap: base = {0}>", R.tsc()) << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(CustomEventRecord &R) {
+  OS << formatv(
+            "<Custom Event: tsc = {0}, cpu = {1}, size = {2}, data = '{3}'>",
+            R.tsc(), R.cpu(), R.size(), R.data())
+     << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(CustomEventRecordV5 &R) {
+  OS << formatv("<Custom Event: delta = +{0}, size = {1}, data = '{2}'>",
+                R.delta(), R.size(), R.data())
+     << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(TypedEventRecord &R) {
+  OS << formatv(
+            "<Typed Event: delta = +{0}, type = {1}, size = {2}, data = '{3}'",
+            R.delta(), R.eventType(), R.size(), R.data())
+     << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(CallArgRecord &R) {
+  OS << formatv("<Call Argument: data = {0} (hex = {0:x})>", R.arg()) << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(PIDRecord &R) {
+  OS << formatv("<PID: {0}>", R.pid()) << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(NewBufferRecord &R) {
+  OS << formatv("<Thread ID: {0}>", R.tid()) << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(EndBufferRecord &R) {
+  OS << "<End of Buffer>" << Delim;
+  return Error::success();
+}
+
+Error RecordPrinter::visit(FunctionRecord &R) {
+  // FIXME: Support symbolization here?
+  switch (R.recordType()) {
+  case RecordTypes::ENTER:
+    OS << formatv("<Function Enter: #{0} delta = +{1}>", R.functionId(),
+                  R.delta());
+    break;
+  case RecordTypes::ENTER_ARG:
+    OS << formatv("<Function Enter With Arg: #{0} delta = +{1}>",
+                  R.functionId(), R.delta());
+    break;
+  case RecordTypes::EXIT:
+    OS << formatv("<Function Exit: #{0} delta = +{1}>", R.functionId(),
+                  R.delta());
+    break;
+  case RecordTypes::TAIL_EXIT:
+    OS << formatv("<Function Tail Exit: #{0} delta = +{1}>", R.functionId(),
+                  R.delta());
+    break;
+  case RecordTypes::CUSTOM_EVENT:
+  case RecordTypes::TYPED_EVENT:
+    // TODO: Flag as a bug?
+    break;
+  }
+  OS << Delim;
+  return Error::success();
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/lib/XRay/Trace.cpp b/contrib/llvm/lib/XRay/Trace.cpp
index a8764b25483c..4f28f3f754c1 100644
--- a/contrib/llvm/lib/XRay/Trace.cpp
+++ b/contrib/llvm/lib/XRay/Trace.cpp
@@ -15,7 +15,16 @@
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/XRay/BlockIndexer.h"
+#include "llvm/XRay/BlockVerifier.h"
+#include "llvm/XRay/FDRRecordConsumer.h"
+#include "llvm/XRay/FDRRecordProducer.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/FDRTraceExpander.h"
+#include "llvm/XRay/FileHeaderReader.h"
 #include "llvm/XRay/YAMLXRayRecord.h"
+#include <memory>
+#include <vector>
 
 using namespace llvm;
 using namespace llvm::xray;
@@ -25,38 +34,8 @@ namespace {
 using XRayRecordStorage =
     std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
 
-// Populates the FileHeader reference by reading the first 32 bytes of the file.
-Error readBinaryFormatHeader(StringRef Data, XRayFileHeader &FileHeader) {
-  // FIXME: Maybe deduce whether the data is little or big-endian using some
-  // magic bytes in the beginning of the file?
-
-  // First 32 bytes of the file will always be the header. We assume a certain
-  // format here:
-  //
-  //   (2)   uint16 : version
-  //   (2)   uint16 : type
-  //   (4)   uint32 : bitfield
-  //   (8)   uint64 : cycle frequency
-  //   (16)  -      : padding
-
-  DataExtractor HeaderExtractor(Data, true, 8);
-  uint32_t OffsetPtr = 0;
-  FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
-  FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
-  uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
-  FileHeader.ConstantTSC = Bitfield & 1uL;
-  FileHeader.NonstopTSC = Bitfield & 1uL << 1;
-  FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
-  std::memcpy(&FileHeader.FreeFormData, Data.bytes_begin() + OffsetPtr, 16);
-  if (FileHeader.Version != 1 && FileHeader.Version != 2 &&
-      FileHeader.Version != 3)
-    return make_error<StringError>(
-        Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
-        std::make_error_code(std::errc::invalid_argument));
-  return Error::success();
-}
-
-Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
+Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
+                         XRayFileHeader &FileHeader,
                          std::vector<XRayRecord> &Records) {
   if (Data.size() < 32)
     return make_error<StringError>(
@@ -68,8 +47,12 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
         "Invalid-sized XRay data.",
         std::make_error_code(std::errc::invalid_argument));
 
-  if (auto E = readBinaryFormatHeader(Data, FileHeader))
-    return E;
+  DataExtractor Reader(Data, IsLittleEndian, 8);
+  uint32_t OffsetPtr = 0;
+  auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr);
+  if (!FileHeaderOrError)
+    return FileHeaderOrError.takeError();
+  FileHeader = std::move(FileHeaderOrError.get());
 
   // Each record after the header will be 32 bytes, in the following format:
   //
@@ -81,16 +64,38 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
   //   (4)   uint32 : thread id
   //   (4)   uint32 : process id
   //   (8)   -      : padding
-  for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(32)) {
-    DataExtractor RecordExtractor(S, true, 8);
-    uint32_t OffsetPtr = 0;
-    switch (auto RecordType = RecordExtractor.getU16(&OffsetPtr)) {
+  while (Reader.isValidOffset(OffsetPtr)) {
+    if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32))
+      return createStringError(
+          std::make_error_code(std::errc::executable_format_error),
+          "Not enough bytes to read a full record at offset %d.", OffsetPtr);
+    auto PreReadOffset = OffsetPtr;
+    auto RecordType = Reader.getU16(&OffsetPtr);
+    if (OffsetPtr == PreReadOffset)
+      return createStringError(
+          std::make_error_code(std::errc::executable_format_error),
+          "Failed reading record type at offset %d.", OffsetPtr);
+
+    switch (RecordType) {
     case 0: { // Normal records.
       Records.emplace_back();
       auto &Record = Records.back();
       Record.RecordType = RecordType;
-      Record.CPU = RecordExtractor.getU8(&OffsetPtr);
-      auto Type = RecordExtractor.getU8(&OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      Record.CPU = Reader.getU8(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading CPU field at offset %d.", OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      auto Type = Reader.getU8(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading record type field at offset %d.", OffsetPtr);
+
       switch (Type) {
       case 0:
         Record.Type = RecordTypes::ENTER;
@@ -105,393 +110,96 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
         Record.Type = RecordTypes::ENTER_ARG;
         break;
       default:
-        return make_error<StringError>(
-            Twine("Unknown record type '") + Twine(int{Type}) + "'",
-            std::make_error_code(std::errc::executable_format_error));
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Unknown record type '%d' at offset %d.", Type, OffsetPtr);
       }
-      Record.FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t));
-      Record.TSC = RecordExtractor.getU64(&OffsetPtr);
-      Record.TId = RecordExtractor.getU32(&OffsetPtr);
-      Record.PId = RecordExtractor.getU32(&OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      Record.FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading function id field at offset %d.", OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      Record.TSC = Reader.getU64(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading TSC field at offset %d.", OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      Record.TId = Reader.getU32(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading thread id field at offset %d.", OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      Record.PId = Reader.getU32(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading process id at offset %d.", OffsetPtr);
+
       break;
     }
     case 1: { // Arg payload record.
       auto &Record = Records.back();
-      // Advance two bytes to avoid padding.
+
+      // We skip the next two bytes of the record, because we don't need the
+      // type and the CPU record for arg payloads.
       OffsetPtr += 2;
-      int32_t FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t));
-      auto TId = RecordExtractor.getU32(&OffsetPtr);
-      auto PId = RecordExtractor.getU32(&OffsetPtr);
+      PreReadOffset = OffsetPtr;
+      int32_t FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading function id field at offset %d.", OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      auto TId = Reader.getU32(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading thread id field at offset %d.", OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      auto PId = Reader.getU32(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading process id field at offset %d.", OffsetPtr);
 
       // Make a check for versions above 3 for the Pid field
       if (Record.FuncId != FuncId || Record.TId != TId ||
           (FileHeader.Version >= 3 ? Record.PId != PId : false))
-        return make_error<StringError>(
-            Twine("Corrupted log, found arg payload following non-matching "
-                  "function + thread record. Record for function ") +
-                Twine(Record.FuncId) + " != " + Twine(FuncId) + "; offset: " +
-                Twine(S.data() - Data.data()),
-            std::make_error_code(std::errc::executable_format_error));
-
-      auto Arg = RecordExtractor.getU64(&OffsetPtr);
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Corrupted log, found arg payload following non-matching "
+            "function+thread record. Record for function %d != %d at offset "
+            "%d",
+            Record.FuncId, FuncId, OffsetPtr);
+
+      PreReadOffset = OffsetPtr;
+      auto Arg = Reader.getU64(&OffsetPtr);
+      if (OffsetPtr == PreReadOffset)
+        return createStringError(
+            std::make_error_code(std::errc::executable_format_error),
+            "Failed reading argument payload at offset %d.", OffsetPtr);
+
       Record.CallArgs.push_back(Arg);
       break;
     }
     default:
-      return make_error<StringError>(
-          Twine("Unknown record type == ") + Twine(RecordType),
-          std::make_error_code(std::errc::executable_format_error));
+      return createStringError(
+          std::make_error_code(std::errc::executable_format_error),
+          "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr);
     }
-  }
-  return Error::success();
-}
-
-/// When reading from a Flight Data Recorder mode log, metadata records are
-/// sparse compared to packed function records, so we must maintain state as we
-/// read through the sequence of entries. This allows the reader to denormalize
-/// the CPUId and Thread Id onto each Function Record and transform delta
-/// encoded TSC values into absolute encodings on each record.
-struct FDRState {
-  uint16_t CPUId;
-  uint16_t ThreadId;
-  int32_t ProcessId;
-  uint64_t BaseTSC;
-
-  /// Encode some of the state transitions for the FDR log reader as explicit
-  /// checks. These are expectations for the next Record in the stream.
-  enum class Token {
-    NEW_BUFFER_RECORD_OR_EOF,
-    WALLCLOCK_RECORD,
-    NEW_CPU_ID_RECORD,
-    FUNCTION_SEQUENCE,
-    SCAN_TO_END_OF_THREAD_BUF,
-    CUSTOM_EVENT_DATA,
-    CALL_ARGUMENT,
-    BUFFER_EXTENTS,
-    PID_RECORD,
-  };
-  Token Expects;
-
-  // Each threads buffer may have trailing garbage to scan over, so we track our
-  // progress.
-  uint64_t CurrentBufferSize;
-  uint64_t CurrentBufferConsumed;
-};
-
-const char *fdrStateToTwine(const FDRState::Token &state) {
-  switch (state) {
-  case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
-    return "NEW_BUFFER_RECORD_OR_EOF";
-  case FDRState::Token::WALLCLOCK_RECORD:
-    return "WALLCLOCK_RECORD";
-  case FDRState::Token::NEW_CPU_ID_RECORD:
-    return "NEW_CPU_ID_RECORD";
-  case FDRState::Token::FUNCTION_SEQUENCE:
-    return "FUNCTION_SEQUENCE";
-  case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF:
-    return "SCAN_TO_END_OF_THREAD_BUF";
-  case FDRState::Token::CUSTOM_EVENT_DATA:
-    return "CUSTOM_EVENT_DATA";
-  case FDRState::Token::CALL_ARGUMENT:
-    return "CALL_ARGUMENT";
-  case FDRState::Token::BUFFER_EXTENTS:
-    return "BUFFER_EXTENTS";
-  case FDRState::Token::PID_RECORD:
-    return "PID_RECORD";
-  }
-  return "UNKNOWN";
-}
-
-/// State transition when a NewBufferRecord is encountered.
-Error processFDRNewBufferRecord(FDRState &State, uint8_t RecordFirstByte,
-                                DataExtractor &RecordExtractor) {
-
-  if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
-    return make_error<StringError>(
-        Twine("Malformed log. Read New Buffer record kind out of sequence; "
-              "expected: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-  uint32_t OffsetPtr = 1; // 1 byte into record.
-  State.ThreadId = RecordExtractor.getU16(&OffsetPtr);
-  State.Expects = FDRState::Token::WALLCLOCK_RECORD;
-  return Error::success();
-}
-
-/// State transition when an EndOfBufferRecord is encountered.
-Error processFDREndOfBufferRecord(FDRState &State, uint8_t RecordFirstByte,
-                                  DataExtractor &RecordExtractor) {
-  if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
-    return make_error<StringError>(
-        Twine("Malformed log. Received EOB message without current buffer; "
-              "expected: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-  State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF;
-  return Error::success();
-}
-
-/// State transition when a NewCPUIdRecord is encountered.
-Error processFDRNewCPUIdRecord(FDRState &State, uint8_t RecordFirstByte,
-                               DataExtractor &RecordExtractor) {
-  if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE &&
-      State.Expects != FDRState::Token::NEW_CPU_ID_RECORD)
-    return make_error<StringError>(
-        Twine("Malformed log. Read NewCPUId record kind out of sequence; "
-              "expected: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-  uint32_t OffsetPtr = 1; // Read starting after the first byte.
-  State.CPUId = RecordExtractor.getU16(&OffsetPtr);
-  State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
-  State.Expects = FDRState::Token::FUNCTION_SEQUENCE;
-  return Error::success();
-}
-
-/// State transition when a TSCWrapRecord (overflow detection) is encountered.
-Error processFDRTSCWrapRecord(FDRState &State, uint8_t RecordFirstByte,
-                              DataExtractor &RecordExtractor) {
-  if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE)
-    return make_error<StringError>(
-        Twine("Malformed log. Read TSCWrap record kind out of sequence; "
-              "expecting: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-  uint32_t OffsetPtr = 1; // Read starting after the first byte.
-  State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
-  return Error::success();
-}
-
-/// State transition when a WallTimeMarkerRecord is encountered.
-Error processFDRWallTimeRecord(FDRState &State, uint8_t RecordFirstByte,
-                               DataExtractor &RecordExtractor) {
-  if (State.Expects != FDRState::Token::WALLCLOCK_RECORD)
-    return make_error<StringError>(
-        Twine("Malformed log. Read Wallclock record kind out of sequence; "
-              "expecting: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-
-  // TODO: Someday, reconcile the TSC ticks to wall clock time for presentation
-  // purposes. For now, we're ignoring these records.
-  State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
-  return Error::success();
-}
-
-/// State transition when a PidRecord is encountered.
-Error processFDRPidRecord(FDRState &State, uint8_t RecordFirstByte,
-                          DataExtractor &RecordExtractor) {
-
-  if (State.Expects != FDRState::Token::PID_RECORD)
-    return make_error<StringError>(
-        Twine("Malformed log. Read Pid record kind out of sequence; "
-              "expected: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-
-  uint32_t OffsetPtr = 1; // Read starting after the first byte.
-  State.ProcessId = RecordExtractor.getU32(&OffsetPtr);
-  State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
-  return Error::success();
-}
-
-/// State transition when a CustomEventMarker is encountered.
-Error processCustomEventMarker(FDRState &State, uint8_t RecordFirstByte,
-                               DataExtractor &RecordExtractor,
-                               size_t &RecordSize) {
-  // We can encounter a CustomEventMarker anywhere in the log, so we can handle
-  // it regardless of the expectation. However, we do set the expectation to
-  // read a set number of fixed bytes, as described in the metadata.
-  uint32_t OffsetPtr = 1; // Read after the first byte.
-  uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr);
-  uint64_t TSC = RecordExtractor.getU64(&OffsetPtr);
-
-  // FIXME: Actually represent the record through the API. For now we only
-  // skip through the data.
-  (void)TSC;
-  RecordSize = 16 + DataSize;
-  return Error::success();
-}
-
-/// State transition when an BufferExtents record is encountered.
-Error processBufferExtents(FDRState &State, uint8_t RecordFirstByte,
-                           DataExtractor &RecordExtractor) {
-  if (State.Expects != FDRState::Token::BUFFER_EXTENTS)
-    return make_error<StringError>(
-        Twine("Malformed log. Buffer Extents unexpected; expected: ") +
-            fdrStateToTwine(State.Expects),
-        std::make_error_code(std::errc::executable_format_error));
-  uint32_t OffsetPtr = 1; // Read after the first byte.
-  State.CurrentBufferSize = RecordExtractor.getU64(&OffsetPtr);
-  State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
-  return Error::success();
-}
-
-/// State transition when a CallArgumentRecord is encountered.
-Error processFDRCallArgumentRecord(FDRState &State, uint8_t RecordFirstByte,
-                                   DataExtractor &RecordExtractor,
-                                   std::vector<XRayRecord> &Records) {
-  uint32_t OffsetPtr = 1; // Read starting after the first byte.
-  auto &Enter = Records.back();
-
-  if (Enter.Type != RecordTypes::ENTER)
-    return make_error<StringError>(
-        "CallArgument needs to be right after a function entry",
-        std::make_error_code(std::errc::executable_format_error));
-  Enter.Type = RecordTypes::ENTER_ARG;
-  Enter.CallArgs.emplace_back(RecordExtractor.getU64(&OffsetPtr));
-  return Error::success();
-}
-
-/// Advances the state machine for reading the FDR record type by reading one
-/// Metadata Record and updating the State appropriately based on the kind of
-/// record encountered. The RecordKind is encoded in the first byte of the
-/// Record, which the caller should pass in because they have already read it
-/// to determine that this is a metadata record as opposed to a function record.
-///
-/// Beginning with Version 2 of the FDR log, we do not depend on the size of the
-/// buffer, but rather use the extents to determine how far to read in the log
-/// for this particular buffer.
-///
-/// In Version 3, FDR log now includes a pid metadata record after
-/// WallTimeMarker
-Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte,
-                               DataExtractor &RecordExtractor,
-                               size_t &RecordSize,
-                               std::vector<XRayRecord> &Records,
-                               uint16_t Version) {
-  // The remaining 7 bits are the RecordKind enum.
-  uint8_t RecordKind = RecordFirstByte >> 1;
-  switch (RecordKind) {
-  case 0: // NewBuffer
-    if (auto E =
-            processFDRNewBufferRecord(State, RecordFirstByte, RecordExtractor))
-      return E;
-    break;
-  case 1: // EndOfBuffer
-    if (Version >= 2)
-      return make_error<StringError>(
-          "Since Version 2 of FDR logging, we no longer support EOB records.",
-          std::make_error_code(std::errc::executable_format_error));
-    if (auto E = processFDREndOfBufferRecord(State, RecordFirstByte,
-                                             RecordExtractor))
-      return E;
-    break;
-  case 2: // NewCPUId
-    if (auto E =
-            processFDRNewCPUIdRecord(State, RecordFirstByte, RecordExtractor))
-      return E;
-    break;
-  case 3: // TSCWrap
-    if (auto E =
-            processFDRTSCWrapRecord(State, RecordFirstByte, RecordExtractor))
-      return E;
-    break;
-  case 4: // WallTimeMarker
-    if (auto E =
-            processFDRWallTimeRecord(State, RecordFirstByte, RecordExtractor))
-      return E;
-    // In Version 3 and and above, a PidRecord is expected after WallTimeRecord
-    if (Version >= 3)
-      State.Expects = FDRState::Token::PID_RECORD;
-    break;
-  case 5: // CustomEventMarker
-    if (auto E = processCustomEventMarker(State, RecordFirstByte,
-                                          RecordExtractor, RecordSize))
-      return E;
-    break;
-  case 6: // CallArgument
-    if (auto E = processFDRCallArgumentRecord(State, RecordFirstByte,
-                                              RecordExtractor, Records))
-      return E;
-    break;
-  case 7: // BufferExtents
-    if (auto E = processBufferExtents(State, RecordFirstByte, RecordExtractor))
-      return E;
-    break;
-  case 9: // Pid
-    if (auto E = processFDRPidRecord(State, RecordFirstByte, RecordExtractor))
-      return E;
-    break;
-  default:
-    // Widen the record type to uint16_t to prevent conversion to char.
-    return make_error<StringError>(
-        Twine("Illegal metadata record type: ")
-            .concat(Twine(static_cast<unsigned>(RecordKind))),
-        std::make_error_code(std::errc::executable_format_error));
-  }
-  return Error::success();
-}
-
-/// Reads a function record from an FDR format log, appending a new XRayRecord
-/// to the vector being populated and updating the State with a new value
-/// reference value to interpret TSC deltas.
-///
-/// The XRayRecord constructed includes information from the function record
-/// processed here as well as Thread ID and CPU ID formerly extracted into
-/// State.
-Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte,
-                               DataExtractor &RecordExtractor,
-                               std::vector<XRayRecord> &Records) {
-  switch (State.Expects) {
-  case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
-    return make_error<StringError>(
-        "Malformed log. Received Function Record before new buffer setup.",
-        std::make_error_code(std::errc::executable_format_error));
-  case FDRState::Token::WALLCLOCK_RECORD:
-    return make_error<StringError>(
-        "Malformed log. Received Function Record when expecting wallclock.",
-        std::make_error_code(std::errc::executable_format_error));
-  case FDRState::Token::PID_RECORD:
-    return make_error<StringError>(
-        "Malformed log. Received Function Record when expecting pid.",
-        std::make_error_code(std::errc::executable_format_error));
-  case FDRState::Token::NEW_CPU_ID_RECORD:
-    return make_error<StringError>(
-        "Malformed log. Received Function Record before first CPU record.",
-        std::make_error_code(std::errc::executable_format_error));
-  default:
-    Records.emplace_back();
-    auto &Record = Records.back();
-    Record.RecordType = 0; // Record is type NORMAL.
-    // Strip off record type bit and use the next three bits.
-    uint8_t RecordType = (RecordFirstByte >> 1) & 0x07;
-    switch (RecordType) {
-    case static_cast<uint8_t>(RecordTypes::ENTER):
-      Record.Type = RecordTypes::ENTER;
-      break;
-    case static_cast<uint8_t>(RecordTypes::EXIT):
-      Record.Type = RecordTypes::EXIT;
-      break;
-    case static_cast<uint8_t>(RecordTypes::TAIL_EXIT):
-      Record.Type = RecordTypes::TAIL_EXIT;
-      break;
-    default:
-      // Cast to an unsigned integer to not interpret the record type as a char.
-      return make_error<StringError>(
-          Twine("Illegal function record type: ")
-              .concat(Twine(static_cast<unsigned>(RecordType))),
-          std::make_error_code(std::errc::executable_format_error));
-    }
-    Record.CPU = State.CPUId;
-    Record.TId = State.ThreadId;
-    Record.PId = State.ProcessId;
-    // Back up to read first 32 bits, including the 4 we pulled RecordType
-    // and RecordKind out of. The remaining 28 are FunctionId.
-    uint32_t OffsetPtr = 0;
-    // Despite function Id being a signed int on XRayRecord,
-    // when it is written to an FDR format, the top bits are truncated,
-    // so it is effectively an unsigned value. When we shift off the
-    // top four bits, we want the shift to be logical, so we read as
-    // uint32_t.
-    uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr);
-    Record.FuncId = FuncIdBitField >> 4;
-    // FunctionRecords have a 32 bit delta from the previous absolute TSC
-    // or TSC delta. If this would overflow, we should read a TSCWrap record
-    // with an absolute TSC reading.
-    uint64_t NewTSC = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr);
-    State.BaseTSC = NewTSC;
-    Record.TSC = NewTSC;
+    // Advance the offset pointer enough bytes to align to 32-byte records for
+    // basic mode logs.
+    OffsetPtr += 8;
   }
   return Error::success();
 }
@@ -539,112 +247,97 @@ Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte,
 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
 ///               FunctionSequence
 /// EOB: *deprecated*
-Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader,
-                 std::vector<XRayRecord> &Records) {
+///
+/// In Version 4, we make the following changes:
+///
+/// CustomEventRecord now includes the CPU data.
+///
+/// In Version 5, we make the following changes:
+///
+/// CustomEventRecord and TypedEventRecord now use TSC delta encoding similar to
+/// what FunctionRecord instances use, and we no longer need to include the CPU
+/// id in the CustomEventRecord.
+///
+Error loadFDRLog(StringRef Data, bool IsLittleEndian,
+                 XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) {
+
   if (Data.size() < 32)
-    return make_error<StringError>(
-        "Not enough bytes for an XRay log.",
-        std::make_error_code(std::errc::invalid_argument));
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Not enough bytes for an XRay FDR log.");
+  DataExtractor DE(Data, IsLittleEndian, 8);
 
-  // For an FDR log, there are records sized 16 and 8 bytes.
-  // There actually may be no records if no non-trivial functions are
-  // instrumented.
-  if (Data.size() % 8 != 0)
-    return make_error<StringError>(
-        "Invalid-sized XRay data.",
-        std::make_error_code(std::errc::invalid_argument));
+  uint32_t OffsetPtr = 0;
+  auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
+  if (!FileHeaderOrError)
+    return FileHeaderOrError.takeError();
+  FileHeader = std::move(FileHeaderOrError.get());
 
-  if (auto E = readBinaryFormatHeader(Data, FileHeader))
-    return E;
+  // First we load the records into memory.
+  std::vector<std::unique_ptr<Record>> FDRRecords;
 
-  uint64_t BufferSize = 0;
   {
-    StringRef ExtraDataRef(FileHeader.FreeFormData, 16);
-    DataExtractor ExtraDataExtractor(ExtraDataRef, true, 8);
-    uint32_t ExtraDataOffset = 0;
-    BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset);
+    FileBasedRecordProducer P(FileHeader, DE, OffsetPtr);
+    LogBuilderConsumer C(FDRRecords);
+    while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
+      auto R = P.produce();
+      if (!R)
+        return R.takeError();
+      if (auto E = C.consume(std::move(R.get())))
+        return E;
+    }
   }
 
-  FDRState::Token InitialExpectation;
-  switch (FileHeader.Version) {
-  case 1:
-    InitialExpectation = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
-    break;
-  case 2:
-  case 3:
-    InitialExpectation = FDRState::Token::BUFFER_EXTENTS;
-    break;
-  default:
-    return make_error<StringError>(
-        Twine("Unsupported version '") + Twine(FileHeader.Version) + "'",
-        std::make_error_code(std::errc::executable_format_error));
+  // Next we index the records into blocks.
+  BlockIndexer::Index Index;
+  {
+    BlockIndexer Indexer(Index);
+    for (auto &R : FDRRecords)
+      if (auto E = R->apply(Indexer))
+        return E;
+    if (auto E = Indexer.flush())
+      return E;
   }
-  FDRState State{0, 0, 0, 0, InitialExpectation, BufferSize, 0};
-
-  // RecordSize will tell the loop how far to seek ahead based on the record
-  // type that we have just read.
-  size_t RecordSize = 0;
-  for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(RecordSize)) {
-    DataExtractor RecordExtractor(S, true, 8);
-    uint32_t OffsetPtr = 0;
-    if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) {
-      RecordSize = State.CurrentBufferSize - State.CurrentBufferConsumed;
-      if (S.size() < RecordSize) {
-        return make_error<StringError>(
-            Twine("Incomplete thread buffer. Expected at least ") +
-                Twine(RecordSize) + " bytes but found " + Twine(S.size()),
-            make_error_code(std::errc::invalid_argument));
+
+  // Then we verify the consistency of the blocks.
+  {
+    for (auto &PTB : Index) {
+      auto &Blocks = PTB.second;
+      for (auto &B : Blocks) {
+        BlockVerifier Verifier;
+        for (auto *R : B.Records)
+          if (auto E = R->apply(Verifier))
+            return E;
+        if (auto E = Verifier.verify())
+          return E;
       }
-      State.CurrentBufferConsumed = 0;
-      State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
-      continue;
-    }
-    uint8_t BitField = RecordExtractor.getU8(&OffsetPtr);
-    bool isMetadataRecord = BitField & 0x01uL;
-    bool isBufferExtents =
-        (BitField >> 1) == 7; // BufferExtents record kind == 7
-    if (isMetadataRecord) {
-      RecordSize = 16;
-      if (auto E =
-              processFDRMetadataRecord(State, BitField, RecordExtractor,
-                                       RecordSize, Records, FileHeader.Version))
-        return E;
-    } else { // Process Function Record
-      RecordSize = 8;
-      if (auto E = processFDRFunctionRecord(State, BitField, RecordExtractor,
-                                            Records))
-        return E;
     }
+  }
 
-    // The BufferExtents record is technically not part of the buffer, so we
-    // don't count the size of that record against the buffer's actual size.
-    if (!isBufferExtents)
-      State.CurrentBufferConsumed += RecordSize;
-    assert(State.CurrentBufferConsumed <= State.CurrentBufferSize);
-    if ((FileHeader.Version == 2 || FileHeader.Version == 3) &&
-        State.CurrentBufferSize == State.CurrentBufferConsumed) {
-      // In Version 2 of the log, we don't need to scan to the end of the thread
-      // buffer if we've already consumed all the bytes we need to.
-      State.Expects = FDRState::Token::BUFFER_EXTENTS;
-      State.CurrentBufferSize = BufferSize;
-      State.CurrentBufferConsumed = 0;
+  // This is now the meat of the algorithm. Here we sort the blocks according to
+  // the Walltime record in each of the blocks for the same thread. This allows
+  // us to more consistently recreate the execution trace in temporal order.
+  // After the sort, we then reconstitute `Trace` records using a stateful
+  // visitor associated with a single process+thread pair.
+  {
+    for (auto &PTB : Index) {
+      auto &Blocks = PTB.second;
+      llvm::sort(Blocks, [](const BlockIndexer::Block &L,
+                            const BlockIndexer::Block &R) {
+        return (L.WallclockTime->seconds() < R.WallclockTime->seconds() &&
+                L.WallclockTime->nanos() < R.WallclockTime->nanos());
+      });
+      auto Adder = [&](const XRayRecord &R) { Records.push_back(R); };
+      TraceExpander Expander(Adder, FileHeader.Version);
+      for (auto &B : Blocks) {
+        for (auto *R : B.Records)
+          if (auto E = R->apply(Expander))
+            return E;
+      }
+      if (auto E = Expander.flush())
+        return E;
     }
   }
 
-  // Having iterated over everything we've been given, we've either consumed
-  // everything and ended up in the end state, or were told to skip the rest.
-  bool Finished = State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF &&
-                  State.CurrentBufferSize == State.CurrentBufferConsumed;
-  if ((State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF &&
-       State.Expects != FDRState::Token::BUFFER_EXTENTS) &&
-      !Finished)
-    return make_error<StringError>(
-        Twine("Encountered EOF with unexpected state expectation ") +
-            fdrStateToTwine(State.Expects) +
-            ". Remaining expected bytes in thread buffer total " +
-            Twine(State.CurrentBufferSize - State.CurrentBufferConsumed),
-        std::make_error_code(std::errc::executable_format_error));
-
   return Error::success();
 }
 
@@ -670,8 +363,9 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
   Records.clear();
   std::transform(Trace.Records.begin(), Trace.Records.end(),
                  std::back_inserter(Records), [&](const YAMLXRayRecord &R) {
-                   return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId,
-                                     R.TSC,        R.TId, R.PId,  R.CallArgs};
+                   return XRayRecord{R.RecordType, R.CPU,      R.Type,
+                                     R.FuncId,     R.TSC,      R.TId,
+                                     R.PId,        R.CallArgs, R.Data};
                  });
   return Error::success();
 }
@@ -705,6 +399,17 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
   }
   auto Data = StringRef(MappedFile.data(), MappedFile.size());
 
+  // TODO: Lift the endianness and implementation selection here.
+  DataExtractor LittleEndianDE(Data, true, 8);
+  auto TraceOrError = loadTrace(LittleEndianDE, Sort);
+  if (!TraceOrError) {
+    DataExtractor BigEndianDE(Data, false, 8);
+    TraceOrError = loadTrace(BigEndianDE, Sort);
+  }
+  return TraceOrError;
+}
+
+Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
   // Attempt to detect the file type using file magic. We have a slight bias
   // towards the binary format, and we do this by making sure that the first 4
   // bytes of the binary file is some combination of the following byte
@@ -719,8 +424,7 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
   //
   // Only if we can't load either the binary or the YAML format will we yield an
   // error.
-  StringRef Magic(MappedFile.data(), 4);
-  DataExtractor HeaderExtractor(Magic, true, 8);
+  DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8);
   uint32_t OffsetPtr = 0;
   uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
   uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
@@ -731,7 +435,8 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
   switch (Type) {
   case NAIVE_FORMAT:
     if (Version == 1 || Version == 2 || Version == 3) {
-      if (auto E = loadNaiveFormatLog(Data, T.FileHeader, T.Records))
+      if (auto E = loadNaiveFormatLog(DE.getData(), DE.isLittleEndian(),
+                                      T.FileHeader, T.Records))
         return std::move(E);
     } else {
       return make_error<StringError>(
@@ -741,8 +446,9 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
     }
     break;
   case FLIGHT_DATA_RECORDER_FORMAT:
-    if (Version == 1 || Version == 2 || Version == 3) {
-      if (auto E = loadFDRLog(Data, T.FileHeader, T.Records))
+    if (Version >= 1 && Version <= 5) {
+      if (auto E = loadFDRLog(DE.getData(), DE.isLittleEndian(), T.FileHeader,
+                              T.Records))
         return std::move(E);
     } else {
       return make_error<StringError>(
@@ -751,15 +457,15 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
     }
     break;
   default:
-    if (auto E = loadYAMLLog(Data, T.FileHeader, T.Records))
+    if (auto E = loadYAMLLog(DE.getData(), T.FileHeader, T.Records))
       return std::move(E);
   }
 
   if (Sort)
     std::stable_sort(T.Records.begin(), T.Records.end(),
-              [&](const XRayRecord &L, const XRayRecord &R) {
-                return L.TSC < R.TSC;
-              });
+                     [&](const XRayRecord &L, const XRayRecord &R) {
+                       return L.TSC < R.TSC;
+                     });
 
   return std::move(T);
 }
diff --git a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
index a5b31e1ab321..ef6a214fde20 100644
--- a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -315,6 +315,66 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
 }
 
 namespace {
+/// ReduceCrashingFunctionAttributes reducer - This works by removing
+/// attributes on a particular function and seeing if the program still crashes.
+/// If it does, then keep the newer, smaller program.
+///
+class ReduceCrashingFunctionAttributes : public ListReducer<Attribute> {
+  BugDriver &BD;
+  std::string FnName;
+  BugTester TestFn;
+
+public:
+  ReduceCrashingFunctionAttributes(BugDriver &bd, const std::string &FnName,
+                                   BugTester testFn)
+      : BD(bd), FnName(FnName), TestFn(testFn) {}
+
+  Expected<TestResult> doTest(std::vector<Attribute> &Prefix,
+                              std::vector<Attribute> &Kept) override {
+    if (!Kept.empty() && TestFuncAttrs(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestFuncAttrs(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestFuncAttrs(std::vector<Attribute> &Attrs);
+};
+}
+
+bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
+    std::vector<Attribute> &Attrs) {
+  // Clone the program to try hacking it apart...
+  std::unique_ptr<Module> M = CloneModule(BD.getProgram());
+  Function *F = M->getFunction(FnName);
+
+  // Build up an AttributeList from the attributes we've been given by the
+  // reducer.
+  AttrBuilder AB;
+  for (auto A : Attrs)
+    AB.addAttribute(A);
+  AttributeList NewAttrs;
+  NewAttrs =
+      NewAttrs.addAttributes(BD.getContext(), AttributeList::FunctionIndex, AB);
+
+  // Set this new list of attributes on the function.
+  F->setAttributes(NewAttrs);
+
+  // Try running on the hacked up program...
+  if (TestFn(BD, M.get())) {
+    BD.setNewProgram(std::move(M)); // It crashed, keep the trimmed version...
+
+    // Pass along the set of attributes that caused the crash.
+    Attrs.clear();
+    for (Attribute A : NewAttrs.getFnAttributes()) {
+      Attrs.push_back(A);
+    }
+    return true;
+  }
+  return false;
+}
+
+namespace {
 /// Simplify the CFG without completely destroying it.
 /// This is not well defined, but basically comes down to "try to eliminate
 /// unreachable blocks and constant fold terminators without deciding that
@@ -409,7 +469,7 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock *> &BBs) {
         for (BasicBlock *Succ : successors(&BB))
           Succ->removePredecessor(&BB);
 
-        TerminatorInst *BBTerm = BB.getTerminator();
+        Instruction *BBTerm = BB.getTerminator();
         if (BBTerm->isEHPad() || BBTerm->getType()->isTokenTy())
           continue;
         if (!BBTerm->getType()->isVoidTy())
@@ -703,7 +763,7 @@ bool ReduceCrashingInstructions::TestInsts(
   // Convert list to set for fast lookup...
   SmallPtrSet<Instruction *, 32> Instructions;
   for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
-    assert(!isa<TerminatorInst>(Insts[i]));
+    assert(!Insts[i]->isTerminator());
     Instructions.insert(cast<Instruction>(VMap[Insts[i]]));
   }
 
@@ -717,7 +777,7 @@ bool ReduceCrashingInstructions::TestInsts(
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
       for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
         Instruction *Inst = &*I++;
-        if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst) &&
+        if (!Instructions.count(Inst) && !Inst->isTerminator() &&
             !Inst->isEHPad() && !Inst->getType()->isTokenTy() &&
             !Inst->isSwiftError()) {
           if (!Inst->getType()->isVoidTy())
@@ -950,7 +1010,7 @@ static Error ReduceInsts(BugDriver &BD, BugTester TestFn) {
     for (const Function &F : BD.getProgram())
       for (const BasicBlock &BB : F)
         for (const Instruction &I : BB)
-          if (!isa<TerminatorInst>(&I))
+          if (!I.isTerminator())
             Insts.push_back(&I);
 
     Expected<bool> Result =
@@ -1056,6 +1116,38 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) {
       BD.EmitProgressBitcode(BD.getProgram(), "reduced-function");
   }
 
+  // For each remaining function, try to reduce that function's attributes.
+  std::vector<std::string> FunctionNames;
+  for (Function &F : BD.getProgram())
+    FunctionNames.push_back(F.getName());
+
+  if (!FunctionNames.empty() && !BugpointIsInterrupted) {
+    outs() << "\n*** Attempting to reduce the number of function attributes in "
+              "the testcase\n";
+
+    unsigned OldSize = 0;
+    unsigned NewSize = 0;
+    for (std::string &Name : FunctionNames) {
+      Function *Fn = BD.getProgram().getFunction(Name);
+      assert(Fn && "Could not find funcion?");
+
+      std::vector<Attribute> Attrs;
+      for (Attribute A : Fn->getAttributes().getFnAttributes())
+        Attrs.push_back(A);
+
+      OldSize += Attrs.size();
+      Expected<bool> Result =
+          ReduceCrashingFunctionAttributes(BD, Name, TestFn).reduceList(Attrs);
+      if (Error E = Result.takeError())
+        return E;
+
+      NewSize += Attrs.size();
+    }
+
+    if (OldSize < NewSize)
+      BD.EmitProgressBitcode(BD.getProgram(), "reduced-function-attributes");
+  }
+
   // Attempt to change conditional branches into unconditional branches to
   // eliminate blocks.
   if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
diff --git a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
index 773bad69fae0..1b86b103d835 100644
--- a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -148,8 +148,9 @@ Error BugDriver::initializeExecutionEnvironment() {
   std::string Message;
 
   if (CCBinary.empty()) {
-    if (sys::findProgramByName("clang"))
-      CCBinary = "clang";
+    if (ErrorOr<std::string> ClangPath =
+            FindProgramByName("clang", getToolName(), &AbsTolerance))
+      CCBinary = *ClangPath;
     else
       CCBinary = "gcc";
   }
@@ -193,11 +194,11 @@ Error BugDriver::initializeExecutionEnvironment() {
     break;
   case CompileCustom:
     Interpreter = AbstractInterpreter::createCustomCompiler(
-        Message, CustomCompileCommand);
+        getToolName(), Message, CustomCompileCommand);
     break;
   case Custom:
-    Interpreter =
-        AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+    Interpreter = AbstractInterpreter::createCustomExecutor(
+        getToolName(), Message, CustomExecCommand);
     break;
   }
   if (!Interpreter)
@@ -239,8 +240,8 @@ Error BugDriver::initializeExecutionEnvironment() {
         SafeInterpreterSel == RunLLCIA);
     break;
   case Custom:
-    SafeInterpreter =
-        AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+    SafeInterpreter = AbstractInterpreter::createCustomExecutor(
+        getToolName(), Message, CustomExecCommand);
     break;
   default:
     Message = "Sorry, this back-end is not supported by bugpoint as the "
@@ -252,7 +253,7 @@ Error BugDriver::initializeExecutionEnvironment() {
     exit(1);
   }
 
-  cc = CC::create(Message, CCBinary, &CCToolArgv);
+  cc = CC::create(getToolName(), Message, CCBinary, &CCToolArgv);
   if (!cc) {
     outs() << Message << "\nExiting.\n";
     exit(1);
@@ -299,26 +300,32 @@ Expected<std::string> BugDriver::executeProgram(const Module &Program,
   if (!AI)
     AI = Interpreter;
   assert(AI && "Interpreter should have been created already!");
+  bool CreatedBitcode = false;
   if (BitcodeFile.empty()) {
     // Emit the program to a bitcode file...
-    auto File =
-        sys::fs::TempFile::create(OutputPrefix + "-test-program-%%%%%%%.bc");
-    if (!File) {
-      errs() << ToolName
-             << ": Error making unique filename: " << toString(File.takeError())
+    SmallString<128> UniqueFilename;
+    int UniqueFD;
+    std::error_code EC = sys::fs::createUniqueFile(
+        OutputPrefix + "-test-program-%%%%%%%.bc", UniqueFD, UniqueFilename);
+    if (EC) {
+      errs() << ToolName << ": Error making unique filename: " << EC.message()
              << "!\n";
       exit(1);
     }
-    DiscardTemp Discard{*File};
-    BitcodeFile = File->TmpName;
+    BitcodeFile = UniqueFilename.str();
 
-    if (writeProgramToFile(File->FD, Program)) {
+    if (writeProgramToFile(BitcodeFile, UniqueFD, Program)) {
       errs() << ToolName << ": Error emitting bitcode to file '" << BitcodeFile
              << "'!\n";
       exit(1);
     }
+    CreatedBitcode = true;
   }
 
+  // Remove the temporary bitcode file when we are done.
+  std::string BitcodePath(BitcodeFile);
+  FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
+
   if (OutputFile.empty())
     OutputFile = OutputPrefix + "-execution-output-%%%%%%%";
 
diff --git a/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp b/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
index cbb048db8fe7..64fe675de20c 100644
--- a/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -16,6 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
+#include "ToolRunner.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
@@ -166,7 +167,8 @@ bool BugDriver::runPasses(Module &Program,
 
   std::string tool = OptCmd;
   if (OptCmd.empty()) {
-    if (ErrorOr<std::string> Path = sys::findProgramByName("opt"))
+    if (ErrorOr<std::string> Path =
+            FindProgramByName("opt", getToolName(), &OutputPrefix))
       tool = *Path;
     else
       errs() << Path.getError().message() << "\n";
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.cpp b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
index 812e8e3bbae5..7ba8ea1f16c5 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
@@ -202,19 +202,7 @@ Expected<int> LLI::ExecuteProgram(const std::string &Bitcode,
 
 void AbstractInterpreter::anchor() {}
 
-#if defined(LLVM_ON_UNIX)
-const char EXESuffix[] = "";
-#elif defined(_WIN32)
-const char EXESuffix[] = "exe";
-#endif
-
-/// Prepend the path to the program being executed
-/// to \p ExeName, given the value of argv[0] and the address of main()
-/// itself. This allows us to find another LLVM tool if it is built in the same
-/// directory. An empty string is returned on error; note that this function
-/// just mainpulates the path and doesn't check for executability.
-/// Find a named executable.
-static std::string PrependMainExecutablePath(const std::string &ExeName,
+ErrorOr<std::string> llvm::FindProgramByName(const std::string &ExeName,
                                              const char *Argv0,
                                              void *MainAddr) {
   // Check the directory that the calling program is in.  We can do
@@ -222,30 +210,25 @@ static std::string PrependMainExecutablePath(const std::string &ExeName,
   // is a relative path to the executable itself.
   std::string Main = sys::fs::getMainExecutable(Argv0, MainAddr);
   StringRef Result = sys::path::parent_path(Main);
+  if (ErrorOr<std::string> Path = sys::findProgramByName(ExeName, Result))
+    return *Path;
 
-  if (!Result.empty()) {
-    SmallString<128> Storage = Result;
-    sys::path::append(Storage, ExeName);
-    sys::path::replace_extension(Storage, EXESuffix);
-    return Storage.str();
-  }
-
-  return Result.str();
+  // Check the user PATH.
+  return sys::findProgramByName(ExeName);
 }
 
 // LLI create method - Try to find the LLI executable
 AbstractInterpreter *
 AbstractInterpreter::createLLI(const char *Argv0, std::string &Message,
                                const std::vector<std::string> *ToolArgs) {
-  std::string LLIPath =
-      PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createLLI);
-  if (!LLIPath.empty()) {
-    Message = "Found lli: " + LLIPath + "\n";
-    return new LLI(LLIPath, ToolArgs);
+  if (ErrorOr<std::string> LLIPath =
+      FindProgramByName("lli", Argv0, (void *)(intptr_t)&createLLI)) {
+    Message = "Found lli: " + *LLIPath + "\n";
+    return new LLI(*LLIPath, ToolArgs);
+  } else {
+    Message = LLIPath.getError().message() + "\n";
+    return nullptr;
   }
-
-  Message = "Cannot find `lli' in executable directory!\n";
-  return nullptr;
 }
 
 //===---------------------------------------------------------------------===//
@@ -368,8 +351,9 @@ Expected<int> CustomExecutor::ExecuteProgram(
 // '\ ' -> ' '
 // 'exa\mple' -> 'example'
 //
-static void lexCommand(std::string &Message, const std::string &CommandLine,
-                       std::string &CmdPath, std::vector<std::string> &Args) {
+static void lexCommand(const char *Argv0, std::string &Message,
+                       const std::string &CommandLine, std::string &CmdPath,
+                       std::vector<std::string> &Args) {
 
   std::string Token;
   std::string Command;
@@ -402,7 +386,7 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
     Token.push_back(CommandLine[Pos]);
   }
 
-  auto Path = sys::findProgramByName(Command);
+  auto Path = FindProgramByName(Command, Argv0, (void *)(intptr_t)&lexCommand);
   if (!Path) {
     Message = std::string("Cannot find '") + Command +
               "' in PATH: " + Path.getError().message() + "\n";
@@ -416,11 +400,12 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
 // Custom execution environment create method, takes the execution command
 // as arguments
 AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
-    std::string &Message, const std::string &CompileCommandLine) {
+    const char *Argv0, std::string &Message,
+    const std::string &CompileCommandLine) {
 
   std::string CmdPath;
   std::vector<std::string> Args;
-  lexCommand(Message, CompileCommandLine, CmdPath, Args);
+  lexCommand(Argv0, Message, CompileCommandLine, CmdPath, Args);
   if (CmdPath.empty())
     return nullptr;
 
@@ -430,12 +415,13 @@ AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
 // Custom execution environment create method, takes the execution command
 // as arguments
 AbstractInterpreter *
-AbstractInterpreter::createCustomExecutor(std::string &Message,
+AbstractInterpreter::createCustomExecutor(const char *Argv0,
+                                          std::string &Message,
                                           const std::string &ExecCommandLine) {
 
   std::string CmdPath;
   std::vector<std::string> Args;
-  lexCommand(Message, ExecCommandLine, CmdPath, Args);
+  lexCommand(Argv0, Message, ExecCommandLine, CmdPath, Args);
   if (CmdPath.empty())
     return nullptr;
 
@@ -524,20 +510,20 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0, std::string &Message,
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *CCArgs,
                                     bool UseIntegratedAssembler) {
-  std::string LLCPath =
-      PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createLLC);
-  if (LLCPath.empty()) {
-    Message = "Cannot find `llc' in executable directory!\n";
+  ErrorOr<std::string> LLCPath =
+      FindProgramByName("llc", Argv0, (void *)(intptr_t)&createLLC);
+  if (!LLCPath) {
+    Message = LLCPath.getError().message() + "\n";
     return nullptr;
   }
 
-  CC *cc = CC::create(Message, CCBinary, CCArgs);
+  CC *cc = CC::create(Argv0, Message, CCBinary, CCArgs);
   if (!cc) {
     errs() << Message << "\n";
     exit(1);
   }
-  Message = "Found llc: " + LLCPath + "\n";
-  return new LLC(LLCPath, cc, Args, UseIntegratedAssembler);
+  Message = "Found llc: " + *LLCPath + "\n";
+  return new LLC(*LLCPath, cc, Args, UseIntegratedAssembler);
 }
 
 //===---------------------------------------------------------------------===//
@@ -606,15 +592,14 @@ Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
 AbstractInterpreter *
 AbstractInterpreter::createJIT(const char *Argv0, std::string &Message,
                                const std::vector<std::string> *Args) {
-  std::string LLIPath =
-      PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createJIT);
-  if (!LLIPath.empty()) {
-    Message = "Found lli: " + LLIPath + "\n";
-    return new JIT(LLIPath, Args);
+  if (ErrorOr<std::string> LLIPath =
+          FindProgramByName("lli", Argv0, (void *)(intptr_t)&createJIT)) {
+    Message = "Found lli: " + *LLIPath + "\n";
+    return new JIT(*LLIPath, Args);
+  } else {
+    Message = LLIPath.getError().message() + "\n";
+    return nullptr;
   }
-
-  Message = "Cannot find `lli' in executable directory!\n";
-  return nullptr;
 }
 
 //===---------------------------------------------------------------------===//
@@ -855,9 +840,10 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
 
 /// create - Try to find the CC executable
 ///
-CC *CC::create(std::string &Message, const std::string &CCBinary,
+CC *CC::create(const char *Argv0, std::string &Message,
+               const std::string &CCBinary,
                const std::vector<std::string> *Args) {
-  auto CCPath = sys::findProgramByName(CCBinary);
+  auto CCPath = FindProgramByName(CCBinary, Argv0, (void *)(intptr_t)&create);
   if (!CCPath) {
     Message = "Cannot find `" + CCBinary + "' in PATH: " +
               CCPath.getError().message() + "\n";
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.h b/contrib/llvm/tools/bugpoint/ToolRunner.h
index f218ad534ee9..ef8551cc669b 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.h
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.h
@@ -49,7 +49,8 @@ class CC {
 public:
   enum FileType { AsmFile, ObjectFile, CFile };
 
-  static CC *create(std::string &Message, const std::string &CCBinary,
+  static CC *create(const char *Argv0, std::string &Message,
+                    const std::string &CCBinary,
                     const std::vector<std::string> *Args);
 
   /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
@@ -98,11 +99,11 @@ public:
             const std::vector<std::string> *Args = nullptr);
 
   static AbstractInterpreter *
-  createCustomCompiler(std::string &Message,
+  createCustomCompiler(const char *Argv0, std::string &Message,
                        const std::string &CompileCommandLine);
 
   static AbstractInterpreter *
-  createCustomExecutor(std::string &Message,
+  createCustomExecutor(const char *Argv0, std::string &Message,
                        const std::string &ExecCommandLine);
 
   virtual ~AbstractInterpreter() {}
@@ -178,6 +179,13 @@ public:
                                     unsigned MemoryLimit = 0) override;
 };
 
+/// Find the first executable file \ExeName, either in the user's PATH or,
+/// failing that, in the same directory as argv[0]. This allows us to find
+/// another LLVM tool if it is built in the same directory. If no executable is
+/// found, an error is returned.
+ErrorOr<std::string> FindProgramByName(const std::string &ExeName,
+                                       const char *Argv0, void *MainAddr);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp
index 1940dbd848cc..7e93d31361aa 100644
--- a/contrib/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm/tools/lli/lli.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
 #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
 #include "llvm/ExecutionEngine/OrcMCJITReplacement.h"
@@ -34,7 +35,6 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/IR/TypeBuilder.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Object/Archive.h"
@@ -97,6 +97,28 @@ namespace {
                                            "orc-lazy",
                                            "Orc-based lazy JIT.")));
 
+  cl::opt<unsigned>
+  LazyJITCompileThreads("compile-threads",
+                        cl::desc("Choose the number of compile threads "
+                                 "(jit-kind=orc-lazy only)"),
+                        cl::init(0));
+
+  cl::list<std::string>
+  ThreadEntryPoints("thread-entry",
+                    cl::desc("calls the given entry-point on a new thread "
+                             "(jit-kind=orc-lazy only)"));
+
+  cl::opt<bool> PerModuleLazy(
+      "per-module-lazy",
+      cl::desc("Performs lazy compilation on whole module boundaries "
+               "rather than individual functions"),
+      cl::init(false));
+
+  cl::list<std::string>
+      JITDylibs("jd",
+                cl::desc("Specifies the JITDylib to be used for any subsequent "
+                         "-extra-module arguments."));
+
   // The MCJIT supports building for a target address space separate from
   // the JIT compilation process. Use a forked process and a copying
   // memory manager with IPC to execute using this functionality.
@@ -294,23 +316,18 @@ static void addCygMingExtraModule(ExecutionEngine &EE, LLVMContext &Context,
   M->setTargetTriple(TargetTripleStr);
 
   // Create an empty function named "__main".
-  Function *Result;
-  if (TargetTriple.isArch64Bit()) {
-    Result = Function::Create(
-      TypeBuilder<int64_t(void), false>::get(Context),
-      GlobalValue::ExternalLinkage, "__main", M.get());
-  } else {
-    Result = Function::Create(
-      TypeBuilder<int32_t(void), false>::get(Context),
-      GlobalValue::ExternalLinkage, "__main", M.get());
-  }
-  BasicBlock *BB = BasicBlock::Create(Context, "__main", Result);
-  Builder.SetInsertPoint(BB);
-  Value *ReturnVal;
+  Type *ReturnTy;
   if (TargetTriple.isArch64Bit())
-    ReturnVal = ConstantInt::get(Context, APInt(64, 0));
+    ReturnTy = Type::getInt64Ty(Context);
   else
-    ReturnVal = ConstantInt::get(Context, APInt(32, 0));
+    ReturnTy = Type::getInt32Ty(Context);
+  Function *Result =
+      Function::Create(FunctionType::get(ReturnTy, {}, false),
+                       GlobalValue::ExternalLinkage, "__main", M.get());
+
+  BasicBlock *BB = BasicBlock::Create(Context, "__main", Result);
+  Builder.SetInsertPoint(BB);
+  Value *ReturnVal = ConstantInt::get(ReturnTy, 0);
   Builder.CreateRet(ReturnVal);
 
   // Add this new module to the ExecutionEngine.
@@ -337,8 +354,8 @@ static void reportError(SMDiagnostic Err, const char *ProgName) {
   exit(1);
 }
 
-int runOrcLazyJIT(LLVMContext &Ctx, std::vector<std::unique_ptr<Module>> Ms,
-                  const std::vector<std::string> &Args);
+int runOrcLazyJIT(const char *ProgName);
+void disallowOrcOptions();
 
 //===----------------------------------------------------------------------===//
 // main Driver function
@@ -362,6 +379,11 @@ int main(int argc, char **argv, char * const *envp) {
   if (DisableCoreFiles)
     sys::Process::PreventCoreFiles();
 
+  if (UseJITKind == JITKind::OrcLazy)
+    return runOrcLazyJIT(argv[0]);
+  else
+    disallowOrcOptions();
+
   LLVMContext Context;
 
   // Load the bitcode...
@@ -371,21 +393,6 @@ int main(int argc, char **argv, char * const *envp) {
   if (!Mod)
     reportError(Err, argv[0]);
 
-  if (UseJITKind == JITKind::OrcLazy) {
-    std::vector<std::unique_ptr<Module>> Ms;
-    Ms.push_back(std::move(Owner));
-    for (auto &ExtraMod : ExtraModules) {
-      Ms.push_back(parseIRFile(ExtraMod, Err, Context));
-      if (!Ms.back())
-        reportError(Err, argv[0]);
-    }
-    std::vector<std::string> Args;
-    Args.push_back(InputFile);
-    for (auto &Arg : InputArgv)
-      Args.push_back(Arg);
-    return runOrcLazyJIT(Context, std::move(Ms), Args);
-  }
-
   if (EnableCacheManager) {
     std::string CacheName("file:");
     CacheName.append(InputFile);
@@ -498,7 +505,7 @@ int main(int argc, char **argv, char * const *envp) {
     if (!ArOrErr) {
       std::string Buf;
       raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(ArOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(ArOrErr.takeError(), OS);
       OS.flush();
       errs() << Buf;
       exit(1);
@@ -688,16 +695,18 @@ int main(int argc, char **argv, char * const *envp) {
   return Result;
 }
 
-static orc::IRTransformLayer2::TransformFunction createDebugDumper() {
+static orc::IRTransformLayer::TransformFunction createDebugDumper() {
   switch (OrcDumpKind) {
   case DumpKind::NoDump:
-    return [](std::unique_ptr<Module> M) { return M; };
+    return [](orc::ThreadSafeModule TSM,
+              const orc::MaterializationResponsibility &R) { return TSM; };
 
   case DumpKind::DumpFuncsToStdOut:
-    return [](std::unique_ptr<Module> M) {
+    return [](orc::ThreadSafeModule TSM,
+              const orc::MaterializationResponsibility &R) {
       printf("[ ");
 
-      for (const auto &F : *M) {
+      for (const auto &F : *TSM.getModule()) {
         if (F.isDeclaration())
           continue;
 
@@ -709,55 +718,58 @@ static orc::IRTransformLayer2::TransformFunction createDebugDumper() {
       }
 
       printf("]\n");
-      return M;
+      return TSM;
     };
 
   case DumpKind::DumpModsToStdOut:
-    return [](std::unique_ptr<Module> M) {
+    return [](orc::ThreadSafeModule TSM,
+              const orc::MaterializationResponsibility &R) {
       outs() << "----- Module Start -----\n"
-             << *M << "----- Module End -----\n";
+             << *TSM.getModule() << "----- Module End -----\n";
 
-      return M;
+      return TSM;
     };
 
   case DumpKind::DumpModsToDisk:
-    return [](std::unique_ptr<Module> M) {
+    return [](orc::ThreadSafeModule TSM,
+              const orc::MaterializationResponsibility &R) {
       std::error_code EC;
-      raw_fd_ostream Out(M->getModuleIdentifier() + ".ll", EC, sys::fs::F_Text);
+      raw_fd_ostream Out(TSM.getModule()->getModuleIdentifier() + ".ll", EC,
+                         sys::fs::F_Text);
       if (EC) {
-        errs() << "Couldn't open " << M->getModuleIdentifier()
+        errs() << "Couldn't open " << TSM.getModule()->getModuleIdentifier()
                << " for dumping.\nError:" << EC.message() << "\n";
         exit(1);
       }
-      Out << *M;
-      return M;
+      Out << *TSM.getModule();
+      return TSM;
     };
   }
   llvm_unreachable("Unknown DumpKind");
 }
 
-int runOrcLazyJIT(LLVMContext &Ctx, std::vector<std::unique_ptr<Module>> Ms,
-                  const std::vector<std::string> &Args) {
-  // Bail out early if no modules loaded.
-  if (Ms.empty())
-    return 0;
-
-  // Add lli's symbols into the JIT's search space.
-  std::string ErrMsg;
-  sys::DynamicLibrary LibLLI =
-      sys::DynamicLibrary::getPermanentLibrary(nullptr, &ErrMsg);
-  if (!LibLLI.isValid()) {
-    errs() << "Error loading lli symbols: " << ErrMsg << ".\n";
-    return 1;
-  }
+static void exitOnLazyCallThroughFailure() { exit(1); }
+
+int runOrcLazyJIT(const char *ProgName) {
+  // Start setting up the JIT environment.
 
-  const auto &TT = Ms.front()->getTargetTriple();
-  orc::JITTargetMachineBuilder TMD =
+  // Parse the main module.
+  orc::ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
+  SMDiagnostic Err;
+  auto MainModule = orc::ThreadSafeModule(
+      parseIRFile(InputFile, Err, *TSCtx.getContext()), TSCtx);
+  if (!MainModule)
+    reportError(Err, ProgName);
+
+  const auto &TT = MainModule.getModule()->getTargetTriple();
+  orc::JITTargetMachineBuilder JTMB =
       TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost())
                  : orc::JITTargetMachineBuilder(Triple(TT));
 
-  TMD.setArch(MArch)
-      .setCPU(getCPUStr())
+  if (!MArch.empty())
+    JTMB.getTargetTriple().setArchName(MArch);
+
+  JTMB.setCPU(getCPUStr())
       .addFeatures(getFeatureList())
       .setRelocationModel(RelocModel.getNumOccurrences()
                               ? Optional<Reloc::Model>(RelocModel)
@@ -765,53 +777,135 @@ int runOrcLazyJIT(LLVMContext &Ctx, std::vector<std::unique_ptr<Module>> Ms,
       .setCodeModel(CMModel.getNumOccurrences()
                         ? Optional<CodeModel::Model>(CMModel)
                         : None);
-  auto TM = ExitOnErr(TMD.createTargetMachine());
-  auto DL = TM->createDataLayout();
-  auto ES = llvm::make_unique<orc::ExecutionSession>();
-  auto J =
-      ExitOnErr(orc::LLLazyJIT::Create(std::move(ES), std::move(TM), DL, Ctx));
+
+  DataLayout DL = ExitOnErr(JTMB.getDefaultDataLayoutForTarget());
+
+  auto J = ExitOnErr(orc::LLLazyJIT::Create(
+      std::move(JTMB), DL,
+      pointerToJITTargetAddress(exitOnLazyCallThroughFailure),
+      LazyJITCompileThreads));
+
+  if (PerModuleLazy)
+    J->setPartitionFunction(orc::CompileOnDemandLayer::compileWholeModule);
 
   auto Dump = createDebugDumper();
 
-  J->setLazyCompileTransform(
-    [&](std::unique_ptr<Module> M) {
-      if (verifyModule(*M, &dbgs())) {
-        dbgs() << "Bad module: " << *M << "\n";
-        exit(1);
-      }
-      return Dump(std::move(M));
-    });
-  J->getMainVSO().setFallbackDefinitionGenerator(
-      orc::DynamicLibraryFallbackGenerator(
-          std::move(LibLLI), DL, [](orc::SymbolStringPtr) { return true; }));
+  J->setLazyCompileTransform([&](orc::ThreadSafeModule TSM,
+                                 const orc::MaterializationResponsibility &R) {
+    if (verifyModule(*TSM.getModule(), &dbgs())) {
+      dbgs() << "Bad module: " << *TSM.getModule() << "\n";
+      exit(1);
+    }
+    return Dump(std::move(TSM), R);
+  });
+  J->getMainJITDylib().setGenerator(
+      ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(DL)));
 
   orc::MangleAndInterner Mangle(J->getExecutionSession(), DL);
-  orc::LocalCXXRuntimeOverrides2 CXXRuntimeOverrides;
-  ExitOnErr(CXXRuntimeOverrides.enable(J->getMainVSO(), Mangle));
+  orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
+  ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
+
+  // Add the main module.
+  ExitOnErr(J->addLazyIRModule(std::move(MainModule)));
+
+  // Create JITDylibs and add any extra modules.
+  {
+    // Create JITDylibs, keep a map from argument index to dylib. We will use
+    // -extra-module argument indexes to determine what dylib to use for each
+    // -extra-module.
+    std::map<unsigned, orc::JITDylib *> IdxToDylib;
+    IdxToDylib[0] = &J->getMainJITDylib();
+    for (auto JDItr = JITDylibs.begin(), JDEnd = JITDylibs.end();
+         JDItr != JDEnd; ++JDItr) {
+      IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] =
+          &J->createJITDylib(*JDItr);
+    }
 
-  for (auto &M : Ms) {
-    orc::makeAllSymbolsExternallyAccessible(*M);
-    ExitOnErr(J->addLazyIRModule(std::move(M)));
+    for (auto EMItr = ExtraModules.begin(), EMEnd = ExtraModules.end();
+         EMItr != EMEnd; ++EMItr) {
+      auto M = parseIRFile(*EMItr, Err, *TSCtx.getContext());
+      if (!M)
+        reportError(Err, ProgName);
+
+      auto EMIdx = ExtraModules.getPosition(EMItr - ExtraModules.begin());
+      assert(EMIdx != 0 && "ExtraModule should have index > 0");
+      auto JDItr = std::prev(IdxToDylib.lower_bound(EMIdx));
+      auto &JD = *JDItr->second;
+      ExitOnErr(
+          J->addLazyIRModule(JD, orc::ThreadSafeModule(std::move(M), TSCtx)));
+    }
   }
 
+  // Add the objects.
+  for (auto &ObjPath : ExtraObjects) {
+    auto Obj = ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ObjPath)));
+    ExitOnErr(J->addObjectFile(std::move(Obj)));
+  }
+
+  // Generate a argument string.
+  std::vector<std::string> Args;
+  Args.push_back(InputFile);
+  for (auto &Arg : InputArgv)
+    Args.push_back(Arg);
+
+  // Run any static constructors.
   ExitOnErr(J->runConstructors());
 
+  // Run any -thread-entry points.
+  std::vector<std::thread> AltEntryThreads;
+  for (auto &ThreadEntryPoint : ThreadEntryPoints) {
+    auto EntryPointSym = ExitOnErr(J->lookup(ThreadEntryPoint));
+    typedef void (*EntryPointPtr)();
+    auto EntryPoint =
+      reinterpret_cast<EntryPointPtr>(static_cast<uintptr_t>(EntryPointSym.getAddress()));
+    AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); }));
+  }
+
+  J->getExecutionSession().dump(llvm::dbgs());
+
+  // Run main.
   auto MainSym = ExitOnErr(J->lookup("main"));
   typedef int (*MainFnPtr)(int, const char *[]);
   std::vector<const char *> ArgV;
   for (auto &Arg : Args)
     ArgV.push_back(Arg.c_str());
+  ArgV.push_back(nullptr);
+
+  int ArgC = ArgV.size() - 1;
   auto Main =
       reinterpret_cast<MainFnPtr>(static_cast<uintptr_t>(MainSym.getAddress()));
-  auto Result = Main(ArgV.size(), (const char **)ArgV.data());
+  auto Result = Main(ArgC, (const char **)ArgV.data());
 
-  ExitOnErr(J->runDestructors());
+  // Wait for -entry-point threads.
+  for (auto &AltEntryThread : AltEntryThreads)
+    AltEntryThread.join();
 
+  // Run destructors.
+  ExitOnErr(J->runDestructors());
   CXXRuntimeOverrides.runDestructors();
 
   return Result;
 }
 
+void disallowOrcOptions() {
+  // Make sure nobody used an orc-lazy specific option accidentally.
+
+  if (LazyJITCompileThreads != 0) {
+    errs() << "-compile-threads requires -jit-kind=orc-lazy\n";
+    exit(1);
+  }
+
+  if (!ThreadEntryPoints.empty()) {
+    errs() << "-thread-entry requires -jit-kind=orc-lazy\n";
+    exit(1);
+  }
+
+  if (PerModuleLazy) {
+    errs() << "-per-module-lazy requires -jit-kind=orc-lazy\n";
+    exit(1);
+  }
+}
+
 std::unique_ptr<FDRawChannel> launchRemote() {
 #ifndef LLVM_ON_UNIX
   llvm_unreachable("launchRemote not supported on non-Unix platforms");
diff --git a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
index 64be08ff946a..1c453ee0b569 100644
--- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -32,6 +33,7 @@
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
 #include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
@@ -95,6 +97,7 @@ MODIFIERS:
   [D] - use zero for timestamps and uids/gids (default)
   [i] - put [files] before [relpos] (same as [b])
   [l] - ignored for compatibility
+  [L] - add archive's contents
   [o] - preserve original dates
   [s] - create an archive index (cf. ranlib)
   [S] - do not build a symbol table
@@ -113,7 +116,7 @@ void printHelpMessage() {
 
 // Show the error message and exit.
 LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
-  errs() << ToolName << ": " << Error << ".\n";
+  WithColor::error(errs(), ToolName) << Error << ".\n";
   printHelpMessage();
   exit(1);
 }
@@ -123,7 +126,7 @@ static void failIfError(std::error_code EC, Twine Context = "") {
     return;
 
   std::string ContextStr = Context.str();
-  if (ContextStr == "")
+  if (ContextStr.empty())
     fail(EC.message());
   fail(Context + ": " + EC.message());
 }
@@ -134,7 +137,7 @@ static void failIfError(Error E, Twine Context = "") {
 
   handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
     std::string ContextStr = Context.str();
-    if (ContextStr == "")
+    if (ContextStr.empty())
       fail(EIB.message());
     fail(Context + ": " + EIB.message());
   });
@@ -155,14 +158,14 @@ static std::string Options;
 // This enumeration delineates the kinds of operations on an archive
 // that are permitted.
 enum ArchiveOperation {
-  Print,            ///< Print the contents of the archive
-  Delete,           ///< Delete the specified members
-  Move,             ///< Move members to end or as given by {a,b,i} modifiers
-  QuickAppend,      ///< Quickly append to end of archive
-  ReplaceOrInsert,  ///< Replace or Insert members
-  DisplayTable,     ///< Display the table of contents
-  Extract,          ///< Extract files back to file system
-  CreateSymTab      ///< Create a symbol table in an existing archive
+  Print,           ///< Print the contents of the archive
+  Delete,          ///< Delete the specified members
+  Move,            ///< Move members to end or as given by {a,b,i} modifiers
+  QuickAppend,     ///< Quickly append to end of archive
+  ReplaceOrInsert, ///< Replace or Insert members
+  DisplayTable,    ///< Display the table of contents
+  Extract,         ///< Extract files back to file system
+  CreateSymTab     ///< Create a symbol table in an existing archive
 };
 
 // Modifiers to follow operation to vary behavior
@@ -175,6 +178,7 @@ static bool Verbose = false;       ///< 'v' modifier
 static bool Symtab = true;         ///< 's' modifier
 static bool Deterministic = true;  ///< 'D' and 'U' modifiers
 static bool Thin = false;          ///< 'T' modifier
+static bool AddLibrary = false;    ///< 'L' modifier
 
 // Relative Positional Argument (for insert/move). This variable holds
 // the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -193,7 +197,7 @@ static std::vector<StringRef> Members;
 // Extract the member filename from the command line for the [relpos] argument
 // associated with a, b, and i modifiers
 static void getRelPos() {
-  if (PositionalArgs.size() == 0)
+  if (PositionalArgs.empty())
     fail("Expected [relpos] for a, b, or i modifier");
   RelPos = PositionalArgs[0];
   PositionalArgs.erase(PositionalArgs.begin());
@@ -201,7 +205,7 @@ static void getRelPos() {
 
 // Get the archive file name from the command line
 static void getArchive() {
-  if (PositionalArgs.size() == 0)
+  if (PositionalArgs.empty())
     fail("An archive name must be specified");
   ArchiveName = PositionalArgs[0];
   PositionalArgs.erase(PositionalArgs.begin());
@@ -213,6 +217,21 @@ static void getMembers() {
     Members.push_back(Arg);
 }
 
+std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
+std::vector<std::unique_ptr<object::Archive>> Archives;
+
+static object::Archive &readLibrary(const Twine &Library) {
+  auto BufOrErr = MemoryBuffer::getFile(Library, -1, false);
+  failIfError(BufOrErr.getError(), "Could not open library " + Library);
+  ArchiveBuffers.push_back(std::move(*BufOrErr));
+  auto LibOrErr =
+      object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
+  failIfError(errorToErrorCode(LibOrErr.takeError()),
+              "Could not parse library");
+  Archives.push_back(std::move(*LibOrErr));
+  return *Archives.back();
+}
+
 static void runMRIScript();
 
 // Parse the command line options as presented and return the operation
@@ -238,18 +257,44 @@ static ArchiveOperation parseCommandLine() {
 
   bool MaybeJustCreateSymTab = false;
 
-  for(unsigned i=0; i<Options.size(); ++i) {
-    switch(Options[i]) {
-    case 'd': ++NumOperations; Operation = Delete; break;
-    case 'm': ++NumOperations; Operation = Move ; break;
-    case 'p': ++NumOperations; Operation = Print; break;
-    case 'q': ++NumOperations; Operation = QuickAppend; break;
-    case 'r': ++NumOperations; Operation = ReplaceOrInsert; break;
-    case 't': ++NumOperations; Operation = DisplayTable; break;
-    case 'x': ++NumOperations; Operation = Extract; break;
-    case 'c': Create = true; break;
-    case 'l': /* accepted but unused */ break;
-    case 'o': OriginalDates = true; break;
+  for (unsigned i = 0; i < Options.size(); ++i) {
+    switch (Options[i]) {
+    case 'd':
+      ++NumOperations;
+      Operation = Delete;
+      break;
+    case 'm':
+      ++NumOperations;
+      Operation = Move;
+      break;
+    case 'p':
+      ++NumOperations;
+      Operation = Print;
+      break;
+    case 'q':
+      ++NumOperations;
+      Operation = QuickAppend;
+      break;
+    case 'r':
+      ++NumOperations;
+      Operation = ReplaceOrInsert;
+      break;
+    case 't':
+      ++NumOperations;
+      Operation = DisplayTable;
+      break;
+    case 'x':
+      ++NumOperations;
+      Operation = Extract;
+      break;
+    case 'c':
+      Create = true;
+      break;
+    case 'l': /* accepted but unused */
+      break;
+    case 'o':
+      OriginalDates = true;
+      break;
     case 's':
       Symtab = true;
       MaybeJustCreateSymTab = true;
@@ -257,8 +302,12 @@ static ArchiveOperation parseCommandLine() {
     case 'S':
       Symtab = false;
       break;
-    case 'u': OnlyUpdate = true; break;
-    case 'v': Verbose = true; break;
+    case 'u':
+      OnlyUpdate = true;
+      break;
+    case 'v':
+      Verbose = true;
+      break;
     case 'a':
       getRelPos();
       AddAfter = true;
@@ -283,6 +332,9 @@ static ArchiveOperation parseCommandLine() {
     case 'T':
       Thin = true;
       break;
+    case 'L':
+      AddLibrary = true;
+      break;
     default:
       fail(std::string("unknown option ") + Options[i]);
     }
@@ -295,7 +347,7 @@ static ArchiveOperation parseCommandLine() {
   // Everything on the command line at this point is a member.
   getMembers();
 
- if (NumOperations == 0 && MaybeJustCreateSymTab) {
+  if (NumOperations == 0 && MaybeJustCreateSymTab) {
     NumOperations = 1;
     Operation = CreateSymTab;
     if (!Members.empty())
@@ -319,6 +371,8 @@ static ArchiveOperation parseCommandLine() {
     fail("The 'o' modifier is only applicable to the 'x' operation");
   if (OnlyUpdate && Operation != ReplaceOrInsert)
     fail("The 'u' modifier is only applicable to the 'r' operation");
+  if (AddLibrary && Operation != QuickAppend)
+    fail("The 'L' modifier is only applicable to the 'q' operation");
 
   // Return the parsed operation to the caller
   return Operation;
@@ -367,7 +421,11 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
     outs() << ' ' << format("%6llu", Size.get());
     auto ModTimeOrErr = C.getLastModified();
     failIfError(ModTimeOrErr.takeError());
-    outs() << ' ' << ModTimeOrErr.get();
+    // Note: formatv() only handles the default TimePoint<>, which is in
+    // nanoseconds.
+    // TODO: fix format_provider<TimePoint<>> to allow other units.
+    sys::TimePoint<> ModTimeInNs = ModTimeOrErr.get();
+    outs() << ' ' << formatv("{0:%b %e %H:%M %Y}", ModTimeInNs);
     outs() << ' ';
   }
 
@@ -410,7 +468,7 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
     auto ModTimeOrErr = C.getLastModified();
     failIfError(ModTimeOrErr.takeError());
     failIfError(
-        sys::fs::setLastModificationAndAccessTime(FD, ModTimeOrErr.get()));
+        sys::fs::setLastAccessAndModificationTime(FD, ModTimeOrErr.get()));
   }
 
   if (close(FD))
@@ -475,36 +533,57 @@ static void performReadOperation(ArchiveOperation Operation,
   if (Members.empty())
     return;
   for (StringRef Name : Members)
-    errs() << Name << " was not found\n";
+    WithColor::error(errs(), ToolName) << "'" << Name << "' was not found\n";
   exit(1);
 }
 
+static void addChildMember(std::vector<NewArchiveMember> &Members,
+                           const object::Archive::Child &M,
+                           bool FlattenArchive = false) {
+  if (Thin && !M.getParent()->isThin())
+    fail("Cannot convert a regular archive to a thin one");
+  Expected<NewArchiveMember> NMOrErr =
+      NewArchiveMember::getOldMember(M, Deterministic);
+  failIfError(NMOrErr.takeError());
+  if (FlattenArchive &&
+      identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
+    Expected<std::string> FileNameOrErr = M.getFullName();
+    failIfError(FileNameOrErr.takeError());
+    object::Archive &Lib = readLibrary(*FileNameOrErr);
+    // When creating thin archives, only flatten if the member is also thin.
+    if (!Thin || Lib.isThin()) {
+      Error Err = Error::success();
+      // Only Thin archives are recursively flattened.
+      for (auto &Child : Lib.children(Err))
+        addChildMember(Members, Child, /*FlattenArchive=*/Thin);
+      failIfError(std::move(Err));
+      return;
+    }
+  }
+  Members.push_back(std::move(*NMOrErr));
+}
+
 static void addMember(std::vector<NewArchiveMember> &Members,
-                      StringRef FileName, int Pos = -1) {
+                      StringRef FileName, bool FlattenArchive = false) {
   Expected<NewArchiveMember> NMOrErr =
       NewArchiveMember::getFile(FileName, Deterministic);
   failIfError(NMOrErr.takeError(), FileName);
-
+  if (FlattenArchive &&
+      identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
+    object::Archive &Lib = readLibrary(FileName);
+    // When creating thin archives, only flatten if the member is also thin.
+    if (!Thin || Lib.isThin()) {
+      Error Err = Error::success();
+      // Only Thin archives are recursively flattened.
+      for (auto &Child : Lib.children(Err))
+        addChildMember(Members, Child, /*FlattenArchive=*/Thin);
+      failIfError(std::move(Err));
+      return;
+    }
+  }
   // Use the basename of the object path for the member name.
   NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
-
-  if (Pos == -1)
-    Members.push_back(std::move(*NMOrErr));
-  else
-    Members[Pos] = std::move(*NMOrErr);
-}
-
-static void addMember(std::vector<NewArchiveMember> &Members,
-                      const object::Archive::Child &M, int Pos = -1) {
-  if (Thin && !M.getParent()->isThin())
-    fail("Cannot convert a regular archive to a thin one");
-  Expected<NewArchiveMember> NMOrErr =
-      NewArchiveMember::getOldMember(M, Deterministic);
-  failIfError(NMOrErr.takeError());
-  if (Pos == -1)
-    Members.push_back(std::move(*NMOrErr));
-  else
-    Members[Pos] = std::move(*NMOrErr);
+  Members.push_back(std::move(*NMOrErr));
 }
 
 enum InsertAction {
@@ -593,7 +672,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
           computeInsertAction(Operation, Child, Name, MemberI);
       switch (Action) {
       case IA_AddOldMember:
-        addMember(Ret, Child);
+        addChildMember(Ret, Child);
         break;
       case IA_AddNewMember:
         addMember(Ret, *MemberI);
@@ -601,7 +680,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
       case IA_Delete:
         break;
       case IA_MoveOldMember:
-        addMember(Moved, Child);
+        addChildMember(Moved, Child);
         break;
       case IA_MoveNewMember:
         addMember(Moved, *MemberI);
@@ -629,14 +708,20 @@ computeNewArchiveMembers(ArchiveOperation Operation,
     ++Pos;
   }
 
-  for (unsigned I = 0; I != Members.size(); ++I)
-    Ret.insert(Ret.begin() + InsertPos, NewArchiveMember());
-  Pos = InsertPos;
-  for (auto &Member : Members) {
-    addMember(Ret, Member, Pos);
-    ++Pos;
+  if (AddLibrary) {
+    assert(Operation == QuickAppend);
+    for (auto &Member : Members)
+      addMember(Ret, Member, /*FlattenArchive=*/true);
+    return Ret;
   }
 
+  std::vector<NewArchiveMember> NewMembers;
+  for (auto &Member : Members)
+    addMember(NewMembers, Member, /*FlattenArchive=*/Thin);
+  Ret.reserve(Ret.size() + NewMembers.size());
+  std::move(NewMembers.begin(), NewMembers.end(),
+            std::inserter(Ret, std::next(Ret.begin(), InsertPos)));
+
   return Ret;
 }
 
@@ -660,11 +745,10 @@ static object::Archive::Kind getKindFromMember(const NewArchiveMember &Member) {
   return getDefaultForHost();
 }
 
-static void
-performWriteOperation(ArchiveOperation Operation,
-                      object::Archive *OldArchive,
-                      std::unique_ptr<MemoryBuffer> OldArchiveBuf,
-                      std::vector<NewArchiveMember> *NewMembersP) {
+static void performWriteOperation(ArchiveOperation Operation,
+                                  object::Archive *OldArchive,
+                                  std::unique_ptr<MemoryBuffer> OldArchiveBuf,
+                                  std::vector<NewArchiveMember> *NewMembersP) {
   std::vector<NewArchiveMember> NewMembers;
   if (!NewMembersP)
     NewMembers = computeNewArchiveMembers(Operation, OldArchive);
@@ -677,11 +761,11 @@ performWriteOperation(ArchiveOperation Operation,
     else if (OldArchive)
       Kind = OldArchive->kind();
     else if (NewMembersP)
-      Kind = NewMembersP->size() ? getKindFromMember(NewMembersP->front())
-                                 : getDefaultForHost();
+      Kind = !NewMembersP->empty() ? getKindFromMember(NewMembersP->front())
+                                   : getDefaultForHost();
     else
-      Kind = NewMembers.size() ? getKindFromMember(NewMembers.front())
-                               : getDefaultForHost();
+      Kind = !NewMembers.empty() ? getKindFromMember(NewMembers.front())
+                                 : getDefaultForHost();
     break;
   case GNU:
     Kind = object::Archive::K_GNU;
@@ -770,7 +854,8 @@ static int performOperation(ArchiveOperation Operation,
   } else {
     if (!Create) {
       // Produce a warning if we should and we're creating the archive
-      errs() << ToolName << ": creating " << ArchiveName << "\n";
+      WithColor::warning(errs(), ToolName)
+          << "creating " << ArchiveName << "\n";
     }
   }
 
@@ -786,11 +871,14 @@ static void runMRIScript() {
   const MemoryBuffer &Ref = *Buf.get();
   bool Saved = false;
   std::vector<NewArchiveMember> NewMembers;
-  std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
-  std::vector<std::unique_ptr<object::Archive>> Archives;
 
-  for (line_iterator I(Ref, /*SkipBlanks*/ true, ';'), E; I != E; ++I) {
+  for (line_iterator I(Ref, /*SkipBlanks*/ false), E; I != E; ++I) {
     StringRef Line = *I;
+    Line = Line.split(';').first;
+    Line = Line.split('*').first;
+    Line = Line.trim();
+    if (Line.empty())
+      continue;
     StringRef CommandStr, Rest;
     std::tie(CommandStr, Rest) = Line.split(' ');
     Rest = Rest.trim();
@@ -807,19 +895,11 @@ static void runMRIScript() {
 
     switch (Command) {
     case MRICommand::AddLib: {
-      auto BufOrErr = MemoryBuffer::getFile(Rest, -1, false);
-      failIfError(BufOrErr.getError(), "Could not open library");
-      ArchiveBuffers.push_back(std::move(*BufOrErr));
-      auto LibOrErr =
-          object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
-      failIfError(errorToErrorCode(LibOrErr.takeError()),
-                  "Could not parse library");
-      Archives.push_back(std::move(*LibOrErr));
-      object::Archive &Lib = *Archives.back();
+      object::Archive &Lib = readLibrary(Rest);
       {
         Error Err = Error::success();
         for (auto &Member : Lib.children(Err))
-          addMember(NewMembers, Member);
+          addChildMember(NewMembers, Member);
         failIfError(std::move(Err));
       }
       break;
@@ -874,7 +954,7 @@ static int ar_main(int argc, char **argv) {
   BumpPtrAllocator Alloc;
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
-  for(size_t i = 1; i < Argv.size(); ++i) {
+  for (size_t i = 1; i < Argv.size(); ++i) {
     StringRef Arg = Argv[i];
     const char *match;
     auto MatchFlagWithArg = [&](const char *expected) {
@@ -885,8 +965,7 @@ static int ar_main(int argc, char **argv) {
         match = Argv[i];
         return true;
       }
-      if (Arg.startswith(expected) && Arg.size() > len &&
-                 Arg[len] == '=') {
+      if (Arg.startswith(expected) && Arg.size() > len && Arg[len] == '=') {
         match = Arg.data() + len + 1;
         return true;
       }
@@ -895,7 +974,7 @@ static int ar_main(int argc, char **argv) {
     if (handleGenericOption(Argv[i]))
       return 0;
     if (Arg == "--") {
-      for(; i < Argv.size(); ++i)
+      for (; i < Argv.size(); ++i)
         PositionalArgs.push_back(Argv[i]);
       break;
     }
@@ -908,11 +987,11 @@ static int ar_main(int argc, char **argv) {
         MRI = true;
       } else if (MatchFlagWithArg("format")) {
         FormatType = StringSwitch<Format>(match)
-            .Case("default", Default)
-            .Case("gnu", GNU)
-            .Case("darwin", DARWIN)
-            .Case("bsd", BSD)
-            .Default(Unknown);
+                         .Case("default", Default)
+                         .Case("gnu", GNU)
+                         .Case("darwin", DARWIN)
+                         .Case("bsd", BSD)
+                         .Default(Unknown);
         if (FormatType == Unknown)
           fail(std::string("Invalid format ") + match);
       } else if (MatchFlagWithArg("plugin")) {
@@ -932,7 +1011,7 @@ static int ar_main(int argc, char **argv) {
 
 static int ranlib_main(int argc, char **argv) {
   bool ArchiveSpecified = false;
-  for(int i = 1; i < argc; ++i) {
+  for (int i = 1; i < argc; ++i) {
     if (handleGenericOption(argv[i])) {
       return 0;
     } else {
diff --git a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 1939dc6440fe..789a666cb41a 100644
--- a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -247,6 +247,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(CST_CODE, CE_CMP)
       STRINGIFY_CODE(CST_CODE, INLINEASM)
       STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
+      STRINGIFY_CODE(CST_CODE, CE_UNOP)
     case bitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
       STRINGIFY_CODE(CST_CODE, DATA)
     }
@@ -267,6 +268,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FUNC_CODE, INST_BR)
       STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
       STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
+      STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
       STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
       STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
       STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
@@ -285,6 +287,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
       STRINGIFY_CODE(FUNC_CODE, INST_GEP)
       STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
+      STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
+      STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
+      STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
+      STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
+      STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
     }
   case bitc::VALUE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
diff --git a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
index e93b63d388e0..728e00e7c3c2 100644
--- a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CoverageExporterJson.h"
+#include "CoverageExporterLcov.h"
 #include "CoverageFilters.h"
 #include "CoverageReport.h"
 #include "CoverageSummaryInfo.h"
@@ -215,12 +216,13 @@ void CodeCoverageTool::collectPaths(const std::string &Path) {
     for (llvm::sys::fs::recursive_directory_iterator F(Path, EC), E;
          F != E; F.increment(EC)) {
 
-      if (EC) {
-        warning(EC.message(), F->path());
+      auto Status = F->status();
+      if (!Status) {
+        warning(Status.getError().message(), F->path());
         continue;
       }
 
-      if (llvm::sys::fs::is_regular_file(F->path()))
+      if (Status->type() == llvm::sys::fs::file_type::regular_file)
         addCollectedPath(F->path());
     }
   }
@@ -368,12 +370,6 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() {
                << "No profile record found for '" << HashMismatch.first << "'"
                << " with hash = 0x" << Twine::utohexstr(HashMismatch.second)
                << '\n';
-
-      for (const auto &CounterMismatch : Coverage->getCounterMismatches())
-        errs() << "counter-mismatch: "
-               << "Coverage mapping for " << CounterMismatch.first
-               << " only has " << CounterMismatch.second
-               << " valid counter expressions\n";
     }
   }
 
@@ -571,7 +567,9 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
       cl::values(clEnumValN(CoverageViewOptions::OutputFormat::Text, "text",
                             "Text output"),
                  clEnumValN(CoverageViewOptions::OutputFormat::HTML, "html",
-                            "HTML output")),
+                            "HTML output"),
+                 clEnumValN(CoverageViewOptions::OutputFormat::Lcov, "lcov",
+                            "lcov tracefile output")),
       cl::init(CoverageViewOptions::OutputFormat::Text));
 
   cl::opt<std::string> PathRemap(
@@ -679,6 +677,11 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
         errs() << "Color output cannot be disabled when generating html.\n";
       ViewOpts.Colors = true;
       break;
+    case CoverageViewOptions::OutputFormat::Lcov:
+      if (UseColor == cl::BOU_TRUE)
+        errs() << "Color output cannot be enabled when generating lcov.\n";
+      ViewOpts.Colors = false;
+      break;
     }
 
     // If path-equivalence was given and is a comma seperated pair then set
@@ -688,7 +691,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
       PathRemapping = EquivPair;
 
     // If a demangler is supplied, check if it exists and register it.
-    if (DemanglerOpts.size()) {
+    if (!DemanglerOpts.empty()) {
       auto DemanglerPathOrErr = sys::findProgramByName(DemanglerOpts[0]);
       if (!DemanglerPathOrErr) {
         error("Could not find the demangler!",
@@ -838,6 +841,11 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
   if (Err)
     return Err;
 
+  if (ViewOpts.Format == CoverageViewOptions::OutputFormat::Lcov) {
+    error("Lcov format should be used with 'llvm-cov export'.");
+    return 1;
+  }
+
   ViewOpts.ShowLineNumbers = true;
   ViewOpts.ShowLineStats = ShowLineExecutionCounts.getNumOccurrences() != 0 ||
                            !ShowRegions || ShowBestLineRegionsCounts;
@@ -969,6 +977,9 @@ int CodeCoverageTool::doReport(int argc, const char **argv,
   if (ViewOpts.Format == CoverageViewOptions::OutputFormat::HTML) {
     error("HTML output for summary reports is not yet supported.");
     return 1;
+  } else if (ViewOpts.Format == CoverageViewOptions::OutputFormat::Lcov) {
+    error("Lcov format should be used with 'llvm-cov export'.");
+    return 1;
   }
 
   auto Coverage = load();
@@ -1000,8 +1011,10 @@ int CodeCoverageTool::doExport(int argc, const char **argv,
   if (Err)
     return Err;
 
-  if (ViewOpts.Format != CoverageViewOptions::OutputFormat::Text) {
-    error("Coverage data can only be exported as textual JSON.");
+  if (ViewOpts.Format != CoverageViewOptions::OutputFormat::Text &&
+      ViewOpts.Format != CoverageViewOptions::OutputFormat::Lcov) {
+    error("Coverage data can only be exported as textual JSON or an "
+          "lcov tracefile.");
     return 1;
   }
 
@@ -1011,12 +1024,27 @@ int CodeCoverageTool::doExport(int argc, const char **argv,
     return 1;
   }
 
-  auto Exporter = CoverageExporterJson(*Coverage.get(), ViewOpts, outs());
+  std::unique_ptr<CoverageExporter> Exporter;
+
+  switch (ViewOpts.Format) {
+  case CoverageViewOptions::OutputFormat::Text:
+    Exporter = llvm::make_unique<CoverageExporterJson>(*Coverage.get(),
+                                                       ViewOpts, outs());
+    break;
+  case CoverageViewOptions::OutputFormat::HTML:
+    // Unreachable because we should have gracefully terminated with an error
+    // above.
+    llvm_unreachable("Export in HTML is not supported!");
+  case CoverageViewOptions::OutputFormat::Lcov:
+    Exporter = llvm::make_unique<CoverageExporterLcov>(*Coverage.get(),
+                                                       ViewOpts, outs());
+    break;
+  }
 
   if (SourceFiles.empty())
-    Exporter.renderRoot(IgnoreFilenameFilters);
+    Exporter->renderRoot(IgnoreFilenameFilters);
   else
-    Exporter.renderRoot(SourceFiles);
+    Exporter->renderRoot(SourceFiles);
 
   return 0;
 }
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporter.h b/contrib/llvm/tools/llvm-cov/CoverageExporter.h
index 884fba96d618..b226d68813d9 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageExporter.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporter.h
@@ -30,7 +30,7 @@ protected:
   /// The options passed to the tool.
   const CoverageViewOptions &Options;
 
-  /// Output stream to print JSON to.
+  /// Output stream to print to.
   raw_ostream &OS;
 
   CoverageExporter(const coverage::CoverageMapping &CoverageMapping,
@@ -41,10 +41,10 @@ public:
   virtual ~CoverageExporter(){};
 
   /// Render the CoverageMapping object.
-  virtual void renderRoot(const CoverageFilters &IgnoreFilenameFilters) = 0;
+  virtual void renderRoot(const CoverageFilters &IgnoreFilters) = 0;
 
   /// Render the CoverageMapping object for specified source files.
-  virtual void renderRoot(const std::vector<std::string> &SourceFiles) = 0;
+  virtual void renderRoot(ArrayRef<std::string> SourceFiles) = 0;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
index 56c3a0003b02..22243f8e2c3e 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -16,33 +16,34 @@
 // The json code coverage export follows the following format
 // Root: dict => Root Element containing metadata
 // -- Data: array => Homogeneous array of one or more export objects
-// ---- Export: dict => Json representation of one CoverageMapping
-// ------ Files: array => List of objects describing coverage for files
-// -------- File: dict => Coverage for a single file
-// ---------- Segments: array => List of Segments contained in the file
-// ------------ Segment: dict => Describes a segment of the file with a counter
-// ---------- Expansions: array => List of expansion records
-// ------------ Expansion: dict => Object that descibes a single expansion
-// -------------- CountedRegion: dict => The region to be expanded
-// -------------- TargetRegions: array => List of Regions in the expansion
-// ---------------- CountedRegion: dict => Single Region in the expansion
-// ---------- Summary: dict => Object summarizing the coverage for this file
-// ------------ LineCoverage: dict => Object summarizing line coverage
-// ------------ FunctionCoverage: dict => Object summarizing function coverage
-// ------------ RegionCoverage: dict => Object summarizing region coverage
-// ------ Functions: array => List of objects describing coverage for functions
-// -------- Function: dict => Coverage info for a single function
-// ---------- Filenames: array => List of filenames that the function relates to
-// ---- Summary: dict => Object summarizing the coverage for the entire binary
-// ------ LineCoverage: dict => Object summarizing line coverage
-// ------ FunctionCoverage: dict => Object summarizing function coverage
-// ------ InstantiationCoverage: dict => Object summarizing inst. coverage
-// ------ RegionCoverage: dict => Object summarizing region coverage
+//   -- Export: dict => Json representation of one CoverageMapping
+//     -- Files: array => List of objects describing coverage for files
+//       -- File: dict => Coverage for a single file
+//         -- Segments: array => List of Segments contained in the file
+//           -- Segment: dict => Describes a segment of the file with a counter
+//         -- Expansions: array => List of expansion records
+//           -- Expansion: dict => Object that descibes a single expansion
+//             -- CountedRegion: dict => The region to be expanded
+//             -- TargetRegions: array => List of Regions in the expansion
+//               -- CountedRegion: dict => Single Region in the expansion
+//         -- Summary: dict => Object summarizing the coverage for this file
+//           -- LineCoverage: dict => Object summarizing line coverage
+//           -- FunctionCoverage: dict => Object summarizing function coverage
+//           -- RegionCoverage: dict => Object summarizing region coverage
+//     -- Functions: array => List of objects describing coverage for functions
+//       -- Function: dict => Coverage info for a single function
+//         -- Filenames: array => List of filenames that the function relates to
+//   -- Summary: dict => Object summarizing the coverage for the entire binary
+//     -- LineCoverage: dict => Object summarizing line coverage
+//     -- FunctionCoverage: dict => Object summarizing function coverage
+//     -- InstantiationCoverage: dict => Object summarizing inst. coverage
+//     -- RegionCoverage: dict => Object summarizing region coverage
 //
 //===----------------------------------------------------------------------===//
 
 #include "CoverageExporterJson.h"
 #include "CoverageReport.h"
+#include "llvm/Support/JSON.h"
 
 /// The semantic version combined as a string.
 #define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.0"
@@ -52,335 +53,142 @@
 
 using namespace llvm;
 
-CoverageExporterJson::CoverageExporterJson(
-    const coverage::CoverageMapping &CoverageMapping,
-    const CoverageViewOptions &Options, raw_ostream &OS)
-    : CoverageExporter(CoverageMapping, Options, OS) {
-  State.push(JsonState::None);
-}
-
-void CoverageExporterJson::emitSerialized(const int64_t Value) { OS << Value; }
+namespace {
 
-void CoverageExporterJson::emitSerialized(const std::string &Value) {
-  OS << "\"";
-  for (char C : Value) {
-    if (C != '\\')
-      OS << C;
-    else
-      OS << "\\\\";
-  }
-  OS << "\"";
+json::Array renderSegment(const coverage::CoverageSegment &Segment) {
+  return json::Array({Segment.Line, Segment.Col, int64_t(Segment.Count),
+                      Segment.HasCount, Segment.IsRegionEntry});
 }
 
-void CoverageExporterJson::emitComma() {
-  if (State.top() == JsonState::NonEmptyElement) {
-    OS << ",";
-  } else if (State.top() == JsonState::EmptyElement) {
-    State.pop();
-    assert((State.size() >= 1) && "Closed too many JSON elements");
-    State.push(JsonState::NonEmptyElement);
-  }
+json::Array renderRegion(const coverage::CountedRegion &Region) {
+  return json::Array({Region.LineStart, Region.ColumnStart, Region.LineEnd,
+                      Region.ColumnEnd, int64_t(Region.ExecutionCount),
+                      Region.FileID, Region.ExpandedFileID,
+                      int64_t(Region.Kind)});
 }
 
-void CoverageExporterJson::emitDictStart() {
-  emitComma();
-  State.push(JsonState::EmptyElement);
-  OS << "{";
+json::Array renderRegions(ArrayRef<coverage::CountedRegion> Regions) {
+  json::Array RegionArray;
+  for (const auto &Region : Regions)
+    RegionArray.push_back(renderRegion(Region));
+  return RegionArray;
+}
+
+json::Object renderExpansion(const coverage::ExpansionRecord &Expansion) {
+  return json::Object(
+      {{"filenames", json::Array(Expansion.Function.Filenames)},
+       // Mark the beginning and end of this expansion in the source file.
+       {"source_region", renderRegion(Expansion.Region)},
+       // Enumerate the coverage information for the expansion.
+       {"target_regions", renderRegions(Expansion.Function.CountedRegions)}});
+}
+
+json::Object renderSummary(const FileCoverageSummary &Summary) {
+  return json::Object(
+      {{"lines",
+        json::Object({{"count", int64_t(Summary.LineCoverage.getNumLines())},
+                      {"covered", int64_t(Summary.LineCoverage.getCovered())},
+                      {"percent", Summary.LineCoverage.getPercentCovered()}})},
+       {"functions",
+        json::Object(
+            {{"count", int64_t(Summary.FunctionCoverage.getNumFunctions())},
+             {"covered", int64_t(Summary.FunctionCoverage.getExecuted())},
+             {"percent", Summary.FunctionCoverage.getPercentCovered()}})},
+       {"instantiations",
+        json::Object(
+            {{"count",
+              int64_t(Summary.InstantiationCoverage.getNumFunctions())},
+             {"covered", int64_t(Summary.InstantiationCoverage.getExecuted())},
+             {"percent", Summary.InstantiationCoverage.getPercentCovered()}})},
+       {"regions",
+        json::Object(
+            {{"count", int64_t(Summary.RegionCoverage.getNumRegions())},
+             {"covered", int64_t(Summary.RegionCoverage.getCovered())},
+             {"notcovered", int64_t(Summary.RegionCoverage.getNumRegions() -
+                                    Summary.RegionCoverage.getCovered())},
+             {"percent", Summary.RegionCoverage.getPercentCovered()}})}});
+}
+
+json::Array renderFileExpansions(const coverage::CoverageData &FileCoverage,
+                                 const FileCoverageSummary &FileReport) {
+  json::Array ExpansionArray;
+  for (const auto &Expansion : FileCoverage.getExpansions())
+    ExpansionArray.push_back(renderExpansion(Expansion));
+  return ExpansionArray;
 }
 
-void CoverageExporterJson::emitDictKey(const std::string &Key) {
-  emitComma();
-  emitSerialized(Key);
-  OS << ":";
-  State.pop();
-  assert((State.size() >= 1) && "Closed too many JSON elements");
-
-  // We do not want to emit a comma after this key.
-  State.push(JsonState::EmptyElement);
+json::Array renderFileSegments(const coverage::CoverageData &FileCoverage,
+                               const FileCoverageSummary &FileReport) {
+  json::Array SegmentArray;
+  for (const auto &Segment : FileCoverage)
+    SegmentArray.push_back(renderSegment(Segment));
+  return SegmentArray;
 }
 
-void CoverageExporterJson::emitDictEnd() {
-  State.pop();
-  assert((State.size() >= 1) && "Closed too many JSON elements");
-  OS << "}";
+json::Object renderFile(const coverage::CoverageMapping &Coverage,
+                        const std::string &Filename,
+                        const FileCoverageSummary &FileReport,
+                        bool ExportSummaryOnly) {
+  json::Object File({{"filename", Filename}});
+  if (!ExportSummaryOnly) {
+    // Calculate and render detailed coverage information for given file.
+    auto FileCoverage = Coverage.getCoverageForFile(Filename);
+    File["segments"] = renderFileSegments(FileCoverage, FileReport);
+    File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+  }
+  File["summary"] = renderSummary(FileReport);
+  return File;
 }
 
-void CoverageExporterJson::emitArrayStart() {
-  emitComma();
-  State.push(JsonState::EmptyElement);
-  OS << "[";
+json::Array renderFiles(const coverage::CoverageMapping &Coverage,
+                        ArrayRef<std::string> SourceFiles,
+                        ArrayRef<FileCoverageSummary> FileReports,
+                        bool ExportSummaryOnly) {
+  json::Array FileArray;
+  for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
+    FileArray.push_back(renderFile(Coverage, SourceFiles[I], FileReports[I],
+                                   ExportSummaryOnly));
+  return FileArray;
 }
 
-void CoverageExporterJson::emitArrayEnd() {
-  State.pop();
-  assert((State.size() >= 1) && "Closed too many JSON elements");
-  OS << "]";
+json::Array renderFunctions(
+    const iterator_range<coverage::FunctionRecordIterator> &Functions) {
+  json::Array FunctionArray;
+  for (const auto &F : Functions)
+    FunctionArray.push_back(
+        json::Object({{"name", F.Name},
+                      {"count", int64_t(F.ExecutionCount)},
+                      {"regions", renderRegions(F.CountedRegions)},
+                      {"filenames", json::Array(F.Filenames)}}));
+  return FunctionArray;
 }
 
-void CoverageExporterJson::renderRoot(
-    const CoverageFilters &IgnoreFilenameFilters) {
+} // end anonymous namespace
+
+void CoverageExporterJson::renderRoot(const CoverageFilters &IgnoreFilters) {
   std::vector<std::string> SourceFiles;
   for (StringRef SF : Coverage.getUniqueSourceFiles()) {
-    if (!IgnoreFilenameFilters.matchesFilename(SF))
+    if (!IgnoreFilters.matchesFilename(SF))
       SourceFiles.emplace_back(SF);
   }
   renderRoot(SourceFiles);
 }
 
-void CoverageExporterJson::renderRoot(
-    const std::vector<std::string> &SourceFiles) {
-  // Start Root of JSON object.
-  emitDictStart();
-
-  emitDictElement("version", LLVM_COVERAGE_EXPORT_JSON_STR);
-  emitDictElement("type", LLVM_COVERAGE_EXPORT_JSON_TYPE_STR);
-  emitDictKey("data");
-
-  // Start List of Exports.
-  emitArrayStart();
-
-  // Start Export.
-  emitDictStart();
-
-  emitDictKey("files");
-
+void CoverageExporterJson::renderRoot(ArrayRef<std::string> SourceFiles) {
   FileCoverageSummary Totals = FileCoverageSummary("Totals");
   auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
                                                         SourceFiles, Options);
-  renderFiles(SourceFiles, FileReports);
-
+  auto Export =
+      json::Object({{"files", renderFiles(Coverage, SourceFiles, FileReports,
+                                          Options.ExportSummaryOnly)},
+                    {"totals", renderSummary(Totals)}});
   // Skip functions-level information for summary-only export mode.
-  if (!Options.ExportSummaryOnly) {
-    emitDictKey("functions");
-    renderFunctions(Coverage.getCoveredFunctions());
-  }
-
-  emitDictKey("totals");
-  renderSummary(Totals);
-
-  // End Export.
-  emitDictEnd();
-
-  // End List of Exports.
-  emitArrayEnd();
-
-  // End Root of JSON Object.
-  emitDictEnd();
-
-  assert((State.top() == JsonState::None) &&
-         "All Elements In JSON were Closed");
-}
-
-void CoverageExporterJson::renderFunctions(
-    const iterator_range<coverage::FunctionRecordIterator> &Functions) {
-  // Start List of Functions.
-  emitArrayStart();
-
-  for (const auto &Function : Functions) {
-    // Start Function.
-    emitDictStart();
-
-    emitDictElement("name", Function.Name);
-    emitDictElement("count", Function.ExecutionCount);
-    emitDictKey("regions");
-
-    renderRegions(Function.CountedRegions);
-
-    emitDictKey("filenames");
-
-    // Start Filenames for Function.
-    emitArrayStart();
-
-    for (const auto &FileName : Function.Filenames)
-      emitArrayElement(FileName);
-
-    // End Filenames for Function.
-    emitArrayEnd();
-
-    // End Function.
-    emitDictEnd();
-  }
-
-  // End List of Functions.
-  emitArrayEnd();
-}
-
-void CoverageExporterJson::renderFiles(
-    ArrayRef<std::string> SourceFiles,
-    ArrayRef<FileCoverageSummary> FileReports) {
-  // Start List of Files.
-  emitArrayStart();
-
-  for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I) {
-    renderFile(SourceFiles[I], FileReports[I]);
-  }
-
-  // End List of Files.
-  emitArrayEnd();
-}
-
-void CoverageExporterJson::renderFile(const std::string &Filename,
-                                      const FileCoverageSummary &FileReport) {
-  // Start File.
-  emitDictStart();
-
-  emitDictElement("filename", Filename);
-
-  if (!Options.ExportSummaryOnly) {
-    // Calculate and render detailed coverage information for given file.
-    auto FileCoverage = Coverage.getCoverageForFile(Filename);
-    renderFileCoverage(FileCoverage, FileReport);
-  }
-
-  emitDictKey("summary");
-  renderSummary(FileReport);
-
-  // End File.
-  emitDictEnd();
-}
-
-
-void CoverageExporterJson::renderFileCoverage(
-    const coverage::CoverageData &FileCoverage,
-    const FileCoverageSummary &FileReport) {
-  emitDictKey("segments");
-
-  // Start List of Segments.
-  emitArrayStart();
-
-  for (const auto &Segment : FileCoverage)
-    renderSegment(Segment);
-
-  // End List of Segments.
-  emitArrayEnd();
-
-  emitDictKey("expansions");
-
-  // Start List of Expansions.
-  emitArrayStart();
-
-  for (const auto &Expansion : FileCoverage.getExpansions())
-    renderExpansion(Expansion);
-
-  // End List of Expansions.
-  emitArrayEnd();
-}
-
-void CoverageExporterJson::renderSegment(
-    const coverage::CoverageSegment &Segment) {
-  // Start Segment.
-  emitArrayStart();
-
-  emitArrayElement(Segment.Line);
-  emitArrayElement(Segment.Col);
-  emitArrayElement(Segment.Count);
-  emitArrayElement(Segment.HasCount);
-  emitArrayElement(Segment.IsRegionEntry);
-
-  // End Segment.
-  emitArrayEnd();
-}
-
-void CoverageExporterJson::renderExpansion(
-    const coverage::ExpansionRecord &Expansion) {
-  // Start Expansion.
-  emitDictStart();
-
-  // Mark the beginning and end of this expansion in the source file.
-  emitDictKey("source_region");
-  renderRegion(Expansion.Region);
-
-  // Enumerate the coverage information for the expansion.
-  emitDictKey("target_regions");
-  renderRegions(Expansion.Function.CountedRegions);
-
-  emitDictKey("filenames");
-  // Start List of Filenames to map the fileIDs.
-  emitArrayStart();
-  for (const auto &Filename : Expansion.Function.Filenames)
-    emitArrayElement(Filename);
-  // End List of Filenames.
-  emitArrayEnd();
-
-  // End Expansion.
-  emitDictEnd();
-}
-
-void CoverageExporterJson::renderRegions(
-    ArrayRef<coverage::CountedRegion> Regions) {
-  // Start List of Regions.
-  emitArrayStart();
-
-  for (const auto &Region : Regions)
-    renderRegion(Region);
-
-  // End List of Regions.
-  emitArrayEnd();
-}
-
-void CoverageExporterJson::renderRegion(const coverage::CountedRegion &Region) {
-  // Start CountedRegion.
-  emitArrayStart();
-
-  emitArrayElement(Region.LineStart);
-  emitArrayElement(Region.ColumnStart);
-  emitArrayElement(Region.LineEnd);
-  emitArrayElement(Region.ColumnEnd);
-  emitArrayElement(Region.ExecutionCount);
-  emitArrayElement(Region.FileID);
-  emitArrayElement(Region.ExpandedFileID);
-  emitArrayElement(Region.Kind);
-
-  // End CountedRegion.
-  emitArrayEnd();
-}
-
-void CoverageExporterJson::renderSummary(const FileCoverageSummary &Summary) {
-  // Start Summary for the file.
-  emitDictStart();
-
-  emitDictKey("lines");
-
-  // Start Line Coverage Summary.
-  emitDictStart();
-  emitDictElement("count", Summary.LineCoverage.getNumLines());
-  emitDictElement("covered", Summary.LineCoverage.getCovered());
-  emitDictElement("percent", Summary.LineCoverage.getPercentCovered());
-  // End Line Coverage Summary.
-  emitDictEnd();
-
-  emitDictKey("functions");
-
-  // Start Function Coverage Summary.
-  emitDictStart();
-  emitDictElement("count", Summary.FunctionCoverage.getNumFunctions());
-  emitDictElement("covered", Summary.FunctionCoverage.getExecuted());
-  emitDictElement("percent", Summary.FunctionCoverage.getPercentCovered());
-  // End Function Coverage Summary.
-  emitDictEnd();
-
-  emitDictKey("instantiations");
-
-  // Start Instantiation Coverage Summary.
-  emitDictStart();
-  emitDictElement("count", Summary.InstantiationCoverage.getNumFunctions());
-  emitDictElement("covered", Summary.InstantiationCoverage.getExecuted());
-  emitDictElement("percent", Summary.InstantiationCoverage.getPercentCovered());
-  // End Function Coverage Summary.
-  emitDictEnd();
-
-  emitDictKey("regions");
+  if (!Options.ExportSummaryOnly)
+    Export["functions"] = renderFunctions(Coverage.getCoveredFunctions());
 
-  // Start Region Coverage Summary.
-  emitDictStart();
-  emitDictElement("count", Summary.RegionCoverage.getNumRegions());
-  emitDictElement("covered", Summary.RegionCoverage.getCovered());
-  emitDictElement("notcovered", Summary.RegionCoverage.getNumRegions() -
-                                    Summary.RegionCoverage.getCovered());
-  emitDictElement("percent", Summary.RegionCoverage.getPercentCovered());
-  // End Region Coverage Summary.
-  emitDictEnd();
+  auto ExportArray = json::Array({std::move(Export)});
 
-  // End Summary for the file.
-  emitDictEnd();
+  OS << json::Object({{"version", LLVM_COVERAGE_EXPORT_JSON_STR},
+                      {"type", LLVM_COVERAGE_EXPORT_JSON_TYPE_STR},
+                      {"data", std::move(ExportArray)}});
 }
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h
index f88dffa0ebea..c37c86b42be9 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.h
@@ -15,96 +15,20 @@
 #define LLVM_COV_COVERAGEEXPORTERJSON_H
 
 #include "CoverageExporter.h"
-#include <stack>
 
 namespace llvm {
 
 class CoverageExporterJson : public CoverageExporter {
-  /// States that the JSON rendering machine can be in.
-  enum JsonState { None, NonEmptyElement, EmptyElement };
-
-  /// Tracks state of the JSON output.
-  std::stack<JsonState> State;
-
-  /// Emit a serialized scalar.
-  void emitSerialized(const int64_t Value);
-
-  /// Emit a serialized string.
-  void emitSerialized(const std::string &Value);
-
-  /// Emit a comma if there is a previous element to delimit.
-  void emitComma();
-
-  /// Emit a starting dictionary/object character.
-  void emitDictStart();
-
-  /// Emit a dictionary/object key but no value.
-  void emitDictKey(const std::string &Key);
-
-  /// Emit a dictionary/object key/value pair.
-  template <typename V>
-  void emitDictElement(const std::string &Key, const V &Value) {
-    emitComma();
-    emitSerialized(Key);
-    OS << ":";
-    emitSerialized(Value);
-  }
-
-  /// Emit a closing dictionary/object character.
-  void emitDictEnd();
-
-  /// Emit a starting array character.
-  void emitArrayStart();
-
-  /// Emit an array element.
-  template <typename V> void emitArrayElement(const V &Value) {
-    emitComma();
-    emitSerialized(Value);
-  }
-
-  /// emit a closing array character.
-  void emitArrayEnd();
-
-  /// Render an array of all the given functions.
-  void renderFunctions(
-      const iterator_range<coverage::FunctionRecordIterator> &Functions);
-
-  /// Render an array of all the source files, also pass back a Summary.
-  void renderFiles(ArrayRef<std::string> SourceFiles,
-                   ArrayRef<FileCoverageSummary> FileReports);
-
-  /// Render a single file.
-  void renderFile(const std::string &Filename,
-                  const FileCoverageSummary &FileReport);
-
-  /// Render summary for a single file.
-  void renderFileCoverage(const coverage::CoverageData &FileCoverage,
-                          const FileCoverageSummary &FileReport);
-
-  /// Render a CoverageSegment.
-  void renderSegment(const coverage::CoverageSegment &Segment);
-
-  /// Render an ExpansionRecord.
-  void renderExpansion(const coverage::ExpansionRecord &Expansion);
-
-  /// Render a list of CountedRegions.
-  void renderRegions(ArrayRef<coverage::CountedRegion> Regions);
-
-  /// Render a single CountedRegion.
-  void renderRegion(const coverage::CountedRegion &Region);
-
-  /// Render a FileCoverageSummary.
-  void renderSummary(const FileCoverageSummary &Summary);
-
 public:
   CoverageExporterJson(const coverage::CoverageMapping &CoverageMapping,
-                       const CoverageViewOptions &Options, raw_ostream &OS);
+                       const CoverageViewOptions &Options, raw_ostream &OS)
+      : CoverageExporter(CoverageMapping, Options, OS) {}
 
   /// Render the CoverageMapping object.
-  void renderRoot(const CoverageFilters &IgnoreFilenameFilters) override;
+  void renderRoot(const CoverageFilters &IgnoreFilters) override;
 
   /// Render the CoverageMapping object for specified source files.
-  void renderRoot(const std::vector<std::string> &SourceFiles) override;
+  void renderRoot(ArrayRef<std::string> SourceFiles) override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
new file mode 100644
index 000000000000..d149ba1a4c87
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -0,0 +1,125 @@
+//===- CoverageExporterLcov.cpp - Code coverage export --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements export of code coverage data to lcov trace file format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// The trace file code coverage export follows the following format (see also
+// https://linux.die.net/man/1/geninfo). Each quoted string appears on its own
+// line; the indentation shown here is only for documentation purposes.
+//
+// - for each source file:
+//   - "SF:<absolute path to source file>"
+//   - for each function:
+//     - "FN:<line number of function start>,<function name>"
+//   - for each function:
+//     - "FNDA:<execution count>,<function name>"
+//   - "FNF:<number of functions found>"
+//   - "FNH:<number of functions hit>"
+//   - for each instrumented line:
+//     - "DA:<line number>,<execution count>[,<checksum>]
+//   - "LH:<number of lines with non-zero execution count>"
+//   - "LF:<nubmer of instrumented lines>"
+//   - "end_of_record"
+//
+// If the user is exporting summary information only, then the FN, FNDA, and DA
+// lines will not be present.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CoverageExporterLcov.h"
+#include "CoverageReport.h"
+
+using namespace llvm;
+
+namespace {
+
+void renderFunctionSummary(raw_ostream &OS,
+                           const FileCoverageSummary &Summary) {
+  OS << "FNF:" << Summary.FunctionCoverage.getNumFunctions() << '\n'
+     << "FNH:" << Summary.FunctionCoverage.getExecuted() << '\n';
+}
+
+void renderFunctions(
+    raw_ostream &OS,
+    const iterator_range<coverage::FunctionRecordIterator> &Functions) {
+  for (const auto &F : Functions) {
+    auto StartLine = F.CountedRegions.front().LineStart;
+    OS << "FN:" << StartLine << ',' << F.Name << '\n';
+  }
+  for (const auto &F : Functions)
+    OS << "FNDA:" << F.ExecutionCount << ',' << F.Name << '\n';
+}
+
+void renderLineExecutionCounts(raw_ostream &OS,
+                               const coverage::CoverageData &FileCoverage) {
+  coverage::LineCoverageIterator LCI{FileCoverage, 1};
+  coverage::LineCoverageIterator LCIEnd = LCI.getEnd();
+  for (; LCI != LCIEnd; ++LCI) {
+    const coverage::LineCoverageStats &LCS = *LCI;
+    if (LCS.isMapped()) {
+      OS << "DA:" << LCS.getLine() << ',' << LCS.getExecutionCount() << '\n';
+    }
+  }
+}
+
+void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
+  OS << "LF:" << Summary.LineCoverage.getNumLines() << '\n'
+     << "LH:" << Summary.LineCoverage.getCovered() << '\n';
+}
+
+void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
+                const std::string &Filename,
+                const FileCoverageSummary &FileReport, bool ExportSummaryOnly) {
+  OS << "SF:" << Filename << '\n';
+
+  if (!ExportSummaryOnly) {
+    renderFunctions(OS, Coverage.getCoveredFunctions());
+  }
+  renderFunctionSummary(OS, FileReport);
+
+  if (!ExportSummaryOnly) {
+    // Calculate and render detailed coverage information for given file.
+    auto FileCoverage = Coverage.getCoverageForFile(Filename);
+    renderLineExecutionCounts(OS, FileCoverage);
+  }
+  renderLineSummary(OS, FileReport);
+
+  OS << "end_of_record\n";
+}
+
+void renderFiles(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
+                 ArrayRef<std::string> SourceFiles,
+                 ArrayRef<FileCoverageSummary> FileReports,
+                 bool ExportSummaryOnly) {
+  for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
+    renderFile(OS, Coverage, SourceFiles[I], FileReports[I], ExportSummaryOnly);
+}
+
+} // end anonymous namespace
+
+void CoverageExporterLcov::renderRoot(const CoverageFilters &IgnoreFilters) {
+  std::vector<std::string> SourceFiles;
+  for (StringRef SF : Coverage.getUniqueSourceFiles()) {
+    if (!IgnoreFilters.matchesFilename(SF))
+      SourceFiles.emplace_back(SF);
+  }
+  renderRoot(SourceFiles);
+}
+
+void CoverageExporterLcov::renderRoot(ArrayRef<std::string> SourceFiles) {
+  FileCoverageSummary Totals = FileCoverageSummary("Totals");
+  auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
+                                                        SourceFiles, Options);
+  renderFiles(OS, Coverage, SourceFiles, FileReports,
+              Options.ExportSummaryOnly);
+}
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h
new file mode 100644
index 000000000000..539b2dacd384
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterLcov.h
@@ -0,0 +1,36 @@
+//===- CoverageExporterLcov.h - Code coverage lcov exporter ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a code coverage exporter for lcov trace file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_COV_COVERAGEEXPORTERLCOV_H
+#define LLVM_COV_COVERAGEEXPORTERLCOV_H
+
+#include "CoverageExporter.h"
+
+namespace llvm {
+
+class CoverageExporterLcov : public CoverageExporter {
+public:
+  CoverageExporterLcov(const coverage::CoverageMapping &CoverageMapping,
+                       const CoverageViewOptions &Options, raw_ostream &OS)
+      : CoverageExporter(CoverageMapping, Options, OS) {}
+
+  /// Render the CoverageMapping object.
+  void renderRoot(const CoverageFilters &IgnoreFilters) override;
+
+  /// Render the CoverageMapping object for specified source files.
+  void renderRoot(ArrayRef<std::string> SourceFiles) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_COV_COVERAGEEXPORTERLCOV_H
diff --git a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
index 20085a957bb5..c8a472860027 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -20,7 +20,8 @@ namespace llvm {
 struct CoverageViewOptions {
   enum class OutputFormat {
     Text,
-    HTML
+    HTML,
+    Lcov
   };
 
   bool Debug;
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
index a5a8fa9a4814..cebaf63adb12 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
@@ -31,7 +31,7 @@ void CoveragePrinter::StreamDestructor::operator()(raw_ostream *OS) const {
 std::string CoveragePrinter::getOutputPath(StringRef Path, StringRef Extension,
                                            bool InToplevel,
                                            bool Relative) const {
-  assert(Extension.size() && "The file extension may not be empty");
+  assert(!Extension.empty() && "The file extension may not be empty");
 
   SmallString<256> FullPath;
 
@@ -80,6 +80,10 @@ CoveragePrinter::create(const CoverageViewOptions &Opts) {
     return llvm::make_unique<CoveragePrinterText>(Opts);
   case CoverageViewOptions::OutputFormat::HTML:
     return llvm::make_unique<CoveragePrinterHTML>(Opts);
+  case CoverageViewOptions::OutputFormat::Lcov:
+    // Unreachable because CodeCoverage.cpp should terminate with an error
+    // before we get here.
+    llvm_unreachable("Lcov format is not supported!");
   }
   llvm_unreachable("Unknown coverage output format!");
 }
@@ -143,6 +147,10 @@ SourceCoverageView::create(StringRef SourceName, const MemoryBuffer &File,
   case CoverageViewOptions::OutputFormat::HTML:
     return llvm::make_unique<SourceCoverageViewHTML>(
         SourceName, File, Options, std::move(CoverageInfo));
+  case CoverageViewOptions::OutputFormat::Lcov:
+    // Unreachable because CodeCoverage.cpp should terminate with an error
+    // before we get here.
+    llvm_unreachable("Lcov format is not supported!");
   }
   llvm_unreachable("Unknown coverage output format!");
 }
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
index acb67aa5cfc7..3f730bb7bc82 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -54,7 +54,7 @@ std::string escape(StringRef Str, const CoverageViewOptions &Opts) {
 std::string tag(const std::string &Name, const std::string &Str,
                 const std::string &ClassName = "") {
   std::string Tag = "<" + Name;
-  if (ClassName != "")
+  if (!ClassName.empty())
     Tag += " class='" + ClassName + "'";
   return Tag + ">" + Str + "</" + Name + ">";
 }
diff --git a/contrib/llvm/tools/llvm-cov/TestingSupport.cpp b/contrib/llvm/tools/llvm-cov/TestingSupport.cpp
index e07abdbd17f1..16a1c2665299 100644
--- a/contrib/llvm/tools/llvm-cov/TestingSupport.cpp
+++ b/contrib/llvm/tools/llvm-cov/TestingSupport.cpp
@@ -33,7 +33,7 @@ int convertForTestingMain(int argc, const char *argv[]) {
   if (!ObjErr) {
     std::string Buf;
     raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(ObjErr.takeError(), OS, "");
+    logAllUnhandledErrors(ObjErr.takeError(), OS);
     OS.flush();
     errs() << "error: " << Buf;
     return 1;
diff --git a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 09e40d9b0db7..7594066a395d 100644
--- a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <string>
@@ -43,17 +44,18 @@ namespace llvm {
 static void error(std::error_code EC) {
   if (!EC)
     return;
-  outs() << "\nError reading file: " << EC.message() << ".\n";
+  WithColor::error(outs(), "") << "reading file: " << EC.message() << ".\n";
   outs().flush();
   exit(1);
 }
 
 static void error(Error Err) {
-  if (Err) {
-    logAllUnhandledErrors(std::move(Err), outs(), "Error reading file: ");
-    outs().flush();
-    exit(1);
-  }
+  if (!Err)
+    return;
+  logAllUnhandledErrors(std::move(Err), WithColor::error(outs()),
+                        "reading file: ");
+  outs().flush();
+  exit(1);
 }
 
 } // namespace llvm
@@ -61,7 +63,7 @@ static void error(Error Err) {
 static void reportError(StringRef Input, StringRef Message) {
   if (Input == "-")
     Input = "<stdin>";
-  errs() << Input << ": " << Message << "\n";
+  WithColor::error(errs(), Input) << Message << "\n";
   errs().flush();
   exit(1);
 }
@@ -496,7 +498,7 @@ static void dumpArchive(const Archive *Arc) {
       if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
         std::string Buf;
         raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(std::move(E), OS, "");
+        logAllUnhandledErrors(std::move(E), OS);
         OS.flush();
         reportError(Arc->getFileName(), Buf);
       }
diff --git a/contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp b/contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
new file mode 100644
index 000000000000..39028cc86723
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
@@ -0,0 +1,155 @@
+//===- llvm-cxxmap.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// llvm-cxxmap computes a correspondence between old symbol names and new
+// symbol names based on a symbol equivalence file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SymbolRemappingReader.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+cl::opt<std::string> OldSymbolFile(cl::Positional, cl::Required,
+                                   cl::desc("<symbol-file>"));
+cl::opt<std::string> NewSymbolFile(cl::Positional, cl::Required,
+                                   cl::desc("<symbol-file>"));
+cl::opt<std::string> RemappingFile("remapping-file", cl::Required,
+                                   cl::desc("Remapping file"));
+cl::alias RemappingFileA("r", cl::aliasopt(RemappingFile));
+cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
+                                    cl::init("-"), cl::desc("Output file"));
+cl::alias OutputFilenameA("o", cl::aliasopt(OutputFilename));
+
+cl::opt<bool> WarnAmbiguous(
+    "Wambiguous",
+    cl::desc("Warn on equivalent symbols in the output symbol list"));
+cl::opt<bool> WarnIncomplete(
+    "Wincomplete",
+    cl::desc("Warn on input symbols missing from output symbol list"));
+
+static void warn(Twine Message, Twine Whence = "",
+                 std::string Hint = "") {
+  WithColor::warning();
+  std::string WhenceStr = Whence.str();
+  if (!WhenceStr.empty())
+    errs() << WhenceStr << ": ";
+  errs() << Message << "\n";
+  if (!Hint.empty())
+    WithColor::note() << Hint << "\n";
+}
+
+static void exitWithError(Twine Message, Twine Whence = "",
+                          std::string Hint = "") {
+  WithColor::error();
+  std::string WhenceStr = Whence.str();
+  if (!WhenceStr.empty())
+    errs() << WhenceStr << ": ";
+  errs() << Message << "\n";
+  if (!Hint.empty())
+    WithColor::note() << Hint << "\n";
+  ::exit(1);
+}
+
+static void exitWithError(Error E, StringRef Whence = "") {
+  exitWithError(toString(std::move(E)), Whence);
+}
+
+static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
+  exitWithError(EC.message(), Whence);
+}
+
+static void remapSymbols(MemoryBuffer &OldSymbolFile,
+                         MemoryBuffer &NewSymbolFile,
+                         MemoryBuffer &RemappingFile,
+                         raw_ostream &Out) {
+  // Load the remapping file and prepare to canonicalize symbols.
+  SymbolRemappingReader Reader;
+  if (Error E = Reader.read(RemappingFile))
+    exitWithError(std::move(E));
+
+  // Canonicalize the new symbols.
+  DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
+  DenseSet<StringRef> UnparseableSymbols;
+  for (line_iterator LineIt(NewSymbolFile, /*SkipBlanks=*/true, '#');
+       !LineIt.is_at_eof(); ++LineIt) {
+    StringRef Symbol = *LineIt;
+
+    auto K = Reader.insert(Symbol);
+    if (!K) {
+      UnparseableSymbols.insert(Symbol);
+      continue;
+    }
+
+    auto ItAndIsNew = MappedNames.insert({K, Symbol});
+    if (WarnAmbiguous && !ItAndIsNew.second &&
+        ItAndIsNew.first->second != Symbol) {
+      warn("symbol " + Symbol + " is equivalent to earlier symbol " +
+               ItAndIsNew.first->second,
+           NewSymbolFile.getBufferIdentifier() + ":" +
+               Twine(LineIt.line_number()),
+           "later symbol will not be the target of any remappings");
+    }
+  }
+
+  // Figure out which new symbol each old symbol is equivalent to.
+  for (line_iterator LineIt(OldSymbolFile, /*SkipBlanks=*/true, '#');
+       !LineIt.is_at_eof(); ++LineIt) {
+    StringRef Symbol = *LineIt;
+
+    auto K = Reader.lookup(Symbol);
+    StringRef NewSymbol = MappedNames.lookup(K);
+
+    if (NewSymbol.empty()) {
+      if (WarnIncomplete && !UnparseableSymbols.count(Symbol)) {
+        warn("no new symbol matches old symbol " + Symbol,
+             OldSymbolFile.getBufferIdentifier() + ":" +
+                 Twine(LineIt.line_number()));
+      }
+      continue;
+    }
+
+    Out << Symbol << " " << NewSymbol << "\n";
+  }
+}
+
+int main(int argc, const char *argv[]) {
+  InitLLVM X(argc, argv);
+
+  cl::ParseCommandLineOptions(argc, argv, "LLVM C++ mangled name remapper\n");
+
+  auto OldSymbolBufOrError = MemoryBuffer::getFileOrSTDIN(OldSymbolFile);
+  if (!OldSymbolBufOrError)
+    exitWithErrorCode(OldSymbolBufOrError.getError(), OldSymbolFile);
+
+  auto NewSymbolBufOrError = MemoryBuffer::getFileOrSTDIN(NewSymbolFile);
+  if (!NewSymbolBufOrError)
+    exitWithErrorCode(NewSymbolBufOrError.getError(), NewSymbolFile);
+
+  auto RemappingBufOrError = MemoryBuffer::getFileOrSTDIN(RemappingFile);
+  if (!RemappingBufOrError)
+    exitWithErrorCode(RemappingBufOrError.getError(), RemappingFile);
+
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  remapSymbols(*OldSymbolBufOrError.get(), *NewSymbolBufOrError.get(),
+               *RemappingBufOrError.get(), OS);
+}
diff --git a/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp b/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
index af0a055ea21f..acff8bb3e89b 100644
--- a/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/contrib/llvm/tools/llvm-diff/DifferenceEngine.cpp
@@ -629,8 +629,8 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
   // If the terminators have different kinds, but one is an invoke and the
   // other is an unconditional branch immediately following a call, unify
   // the results and the destinations.
-  TerminatorInst *LTerm = LStart->getParent()->getTerminator();
-  TerminatorInst *RTerm = RStart->getParent()->getTerminator();
+  Instruction *LTerm = LStart->getParent()->getTerminator();
+  Instruction *RTerm = RStart->getParent()->getTerminator();
   if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
     if (cast<BranchInst>(LTerm)->isConditional()) return;
     BasicBlock::iterator I = LTerm->getIterator();
@@ -686,9 +686,18 @@ void DifferenceEngine::diff(Module *L, Module *R) {
   StringSet<> LNames;
   SmallVector<std::pair<Function*,Function*>, 20> Queue;
 
+  unsigned LeftAnonCount = 0;
+  unsigned RightAnonCount = 0;
+
   for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) {
     Function *LFn = &*I;
-    LNames.insert(LFn->getName());
+    StringRef Name = LFn->getName();
+    if (Name.empty()) {
+      ++LeftAnonCount;
+      continue;
+    }
+
+    LNames.insert(Name);
 
     if (Function *RFn = R->getFunction(LFn->getName()))
       Queue.push_back(std::make_pair(LFn, RFn));
@@ -698,10 +707,25 @@ void DifferenceEngine::diff(Module *L, Module *R) {
 
   for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) {
     Function *RFn = &*I;
-    if (!LNames.count(RFn->getName()))
+    StringRef Name = RFn->getName();
+    if (Name.empty()) {
+      ++RightAnonCount;
+      continue;
+    }
+
+    if (!LNames.count(Name))
       logf("function %r exists only in right module") << RFn;
   }
 
+
+  if (LeftAnonCount != 0 || RightAnonCount != 0) {
+    SmallString<32> Tmp;
+    logf(("not comparing " + Twine(LeftAnonCount) +
+          " anonymous functions in the left module and " +
+          Twine(RightAnonCount) + " in the right module")
+             .toStringRef(Tmp));
+  }
+
   for (SmallVectorImpl<std::pair<Function*,Function*> >::iterator
          I = Queue.begin(), E = Queue.end(); I != E; ++I)
     diff(I->first, I->second);
diff --git a/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp b/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 5af853d4ef28..5fe7e8b4615b 100644
--- a/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/contrib/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -1,4 +1,6 @@
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
@@ -18,18 +20,27 @@ struct PerFunctionStats {
   /// Number of constants with location across all inlined instances.
   unsigned ConstantMembers = 0;
   /// List of all Variables in this function.
-  SmallDenseSet<uint32_t, 4> VarsInFunction;
+  StringSet<> VarsInFunction;
   /// Compile units also cover a PC range, but have this flag set to false.
   bool IsFunction = false;
 };
 
-/// Holds accumulated global statistics about local variables.
+/// Holds accumulated global statistics about DIEs.
 struct GlobalStats {
   /// Total number of PC range bytes covered by DW_AT_locations.
   unsigned ScopeBytesCovered = 0;
   /// Total number of PC range bytes in each variable's enclosing scope,
   /// starting from the first definition of the variable.
   unsigned ScopeBytesFromFirstDefinition = 0;
+  /// Total number of call site entries (DW_TAG_call_site).
+  unsigned CallSiteEntries = 0;
+  /// Total byte size of concrete functions. This byte size includes
+  /// inline functions contained in the concrete functions.
+  uint64_t FunctionSize = 0;
+  /// Total byte size of inlined functions. This is the total number of bytes
+  /// for the top inline functions within concrete functions. This can help
+  /// tune the inline settings when compiling to match user expectations.
+  uint64_t InlineFunctionSize = 0;
 };
 
 /// Extract the low pc from a Die.
@@ -46,19 +57,37 @@ static uint64_t getLowPC(DWARFDie Die) {
 }
 
 /// Collect debug info quality metrics for one DIE.
-static void collectStatsForDie(DWARFDie Die, std::string Prefix,
-                               uint64_t ScopeLowPC, uint64_t BytesInScope,
+static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
+                               std::string VarPrefix, uint64_t ScopeLowPC,
+                               uint64_t BytesInScope,
+                               uint32_t InlineDepth,
                                StringMap<PerFunctionStats> &FnStatMap,
                                GlobalStats &GlobalStats) {
   bool HasLoc = false;
   uint64_t BytesCovered = 0;
   uint64_t OffsetToFirstDefinition = 0;
+
+  if (Die.getTag() == dwarf::DW_TAG_call_site) {
+    GlobalStats.CallSiteEntries++;
+    return;
+  }
+
+  if (Die.getTag() != dwarf::DW_TAG_formal_parameter &&
+      Die.getTag() != dwarf::DW_TAG_variable &&
+      Die.getTag() != dwarf::DW_TAG_member) {
+    // Not a variable or constant member.
+    return;
+  }
+
   if (Die.find(dwarf::DW_AT_const_value)) {
     // This catches constant members *and* variables.
     HasLoc = true;
     BytesCovered = BytesInScope;
-  } else if (Die.getTag() == dwarf::DW_TAG_variable ||
-             Die.getTag() == dwarf::DW_TAG_formal_parameter) {
+  } else {
+    if (Die.getTag() == dwarf::DW_TAG_member) {
+      // Non-const member.
+      return;
+    }
     // Handle variables and function arguments.
     auto FormValue = Die.find(dwarf::DW_AT_location);
     HasLoc = FormValue.hasValue();
@@ -86,19 +115,17 @@ static void collectStatsForDie(DWARFDie Die, std::string Prefix,
         BytesCovered = BytesInScope;
       }
     }
-  } else {
-    // Not a variable or constant member.
-    return;
   }
 
   // Collect PC range coverage data.
-  auto &FnStats = FnStatMap[Prefix];
+  auto &FnStats = FnStatMap[FnPrefix];
   if (DWARFDie D =
           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
     Die = D;
-  // This is a unique ID for the variable inside the current object file.
-  unsigned CanonicalDieOffset = Die.getOffset();
-  FnStats.VarsInFunction.insert(CanonicalDieOffset);
+  // By using the variable name + the path through the lexical block tree, the
+  // keys are consistent across duplicate abstract origins in different CUs.
+  std::string VarName = StringRef(Die.getName(DINameKind::ShortName));
+  FnStats.VarsInFunction.insert(VarPrefix+VarName);
   if (BytesInScope) {
     FnStats.TotalVarWithLoc += (unsigned)HasLoc;
     // Adjust for the fact the variables often start their lifetime in the
@@ -115,24 +142,34 @@ static void collectStatsForDie(DWARFDie Die, std::string Prefix,
 }
 
 /// Recursively collect debug info quality metrics.
-static void collectStatsRecursive(DWARFDie Die, std::string Prefix,
-                                  uint64_t ScopeLowPC, uint64_t BytesInScope,
+static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
+                                  std::string VarPrefix, uint64_t ScopeLowPC,
+                                  uint64_t BytesInScope,
+                                  uint32_t InlineDepth,
                                   StringMap<PerFunctionStats> &FnStatMap,
                                   GlobalStats &GlobalStats) {
   // Handle any kind of lexical scope.
-  if (Die.getTag() == dwarf::DW_TAG_subprogram ||
-      Die.getTag() == dwarf::DW_TAG_inlined_subroutine ||
-      Die.getTag() == dwarf::DW_TAG_lexical_block) {
+  const dwarf::Tag Tag = Die.getTag();
+  const bool IsFunction = Tag == dwarf::DW_TAG_subprogram;
+  const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block;
+  const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine;
+  if (IsFunction || IsInlinedFunction || IsBlock) {
+
+    // Reset VarPrefix when entering a new function.
+    if (Die.getTag() == dwarf::DW_TAG_subprogram ||
+        Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
+      VarPrefix = "v";
+
     // Ignore forward declarations.
     if (Die.find(dwarf::DW_AT_declaration))
       return;
 
     // Count the function.
-    if (Die.getTag() != dwarf::DW_TAG_lexical_block) {
+    if (!IsBlock) {
       StringRef Name = Die.getName(DINameKind::LinkageName);
       if (Name.empty())
         Name = Die.getName(DINameKind::ShortName);
-      Prefix = Name;
+      FnPrefix = Name;
       // Skip over abstract origins.
       if (Die.find(dwarf::DW_AT_inline))
         return;
@@ -148,26 +185,42 @@ static void collectStatsRecursive(DWARFDie Die, std::string Prefix,
       llvm::consumeError(RangesOrError.takeError());
       return;
     }
-       
+
     auto Ranges = RangesOrError.get();
     uint64_t BytesInThisScope = 0;
     for (auto Range : Ranges)
       BytesInThisScope += Range.HighPC - Range.LowPC;
     ScopeLowPC = getLowPC(Die);
 
-    if (BytesInThisScope)
+    if (BytesInThisScope) {
       BytesInScope = BytesInThisScope;
+      if (IsFunction)
+        GlobalStats.FunctionSize += BytesInThisScope;
+      else if (IsInlinedFunction && InlineDepth == 0)
+        GlobalStats.InlineFunctionSize += BytesInThisScope;
+    }
   } else {
     // Not a scope, visit the Die itself. It could be a variable.
-    collectStatsForDie(Die, Prefix, ScopeLowPC, BytesInScope, FnStatMap,
-                       GlobalStats);
+    collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
+                       InlineDepth, FnStatMap, GlobalStats);
   }
 
+  // Set InlineDepth correctly for child recursion
+  if (IsFunction)
+    InlineDepth = 0;
+  else if (IsInlinedFunction)
+    ++InlineDepth;
+
   // Traverse children.
+  unsigned LexicalBlockIndex = 0;
   DWARFDie Child = Die.getFirstChild();
   while (Child) {
-    collectStatsRecursive(Child, Prefix, ScopeLowPC, BytesInScope, FnStatMap,
-                          GlobalStats);
+    std::string ChildVarPrefix = VarPrefix;
+    if (Child.getTag() == dwarf::DW_TAG_lexical_block)
+      ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
+
+    collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC,
+                          BytesInScope, InlineDepth, FnStatMap, GlobalStats);
     Child = Child.getSibling();
   }
 }
@@ -200,7 +253,7 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   StringMap<PerFunctionStats> Statistics;
   for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
     if (DWARFDie CUDie = CU->getUnitDIE(false))
-      collectStatsRecursive(CUDie, "/", 0, 0, Statistics, GlobalStats);
+      collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
 
   /// The version number should be increased every time the algorithm is changed
   /// (including bug fixes). New metrics may be added without increasing the
@@ -218,16 +271,15 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
     VarWithLoc += Stats.TotalVarWithLoc + Constants;
     VarTotal += TotalVars + Constants;
     VarUnique += Stats.VarsInFunction.size();
-    LLVM_DEBUG(for (auto V
-                    : Stats.VarsInFunction) llvm::dbgs()
-               << Entry.getKey() << ": " << V << "\n");
+    LLVM_DEBUG(for (auto &V : Stats.VarsInFunction) llvm::dbgs()
+               << Entry.getKey() << ": " << V.getKey() << "\n");
     NumFunctions += Stats.IsFunction;
     NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
   }
 
   // Print summary.
   OS.SetBufferSize(1024);
-  OS << "{\"version\":\"" << Version << '"';
+  OS << "{\"version\":" << Version;
   LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
              llvm::dbgs() << "---------------------------------\n");
   printDatum(OS, "file", Filename.str());
@@ -237,9 +289,12 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   printDatum(OS, "unique source variables", VarUnique);
   printDatum(OS, "source variables", VarTotal);
   printDatum(OS, "variables with location", VarWithLoc);
+  printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
   printDatum(OS, "scope bytes total",
              GlobalStats.ScopeBytesFromFirstDefinition);
   printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
+  printDatum(OS, "total function size", GlobalStats.FunctionSize);
+  printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
   OS << "}\n";
   LLVM_DEBUG(
       llvm::dbgs() << "Total Availability: "
diff --git a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index d75f33906098..d9e8e36efe5c 100644
--- a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -156,7 +156,7 @@ static list<std::string> Name(
     value_desc("pattern"), cat(DwarfDumpCategory));
 static alias NameAlias("n", desc("Alias for -name"), aliasopt(Name));
 static opt<unsigned long long> Lookup("lookup",
-           desc("Lookup <address> in the debug information and print out any"
+           desc("Lookup <address> in the debug information and print out any "
                 "available file, function, block and line table details."),
            value_desc("address"), cat(DwarfDumpCategory));
 static opt<std::string>
@@ -226,7 +226,7 @@ static alias VerboseAlias("v", desc("Alias for -verbose."), aliasopt(Verbose),
 static void error(StringRef Prefix, std::error_code EC) {
   if (!EC)
     return;
-  errs() << Prefix << ": " << EC.message() << "\n";
+  WithColor::error() << Prefix << ": " << EC.message() << "\n";
   exit(1);
 }
 
@@ -281,32 +281,45 @@ using HandlerFn = std::function<bool(ObjectFile &, DWARFContext &DICtx, Twine,
                                      raw_ostream &)>;
 
 /// Print only DIEs that have a certain name.
+static bool filterByName(const StringSet<> &Names, DWARFDie Die,
+                         StringRef NameRef, raw_ostream &OS) {
+  std::string Name =
+      (IgnoreCase && !UseRegex) ? NameRef.lower() : NameRef.str();
+  if (UseRegex) {
+    // Match regular expression.
+    for (auto Pattern : Names.keys()) {
+      Regex RE(Pattern, IgnoreCase ? Regex::IgnoreCase : Regex::NoFlags);
+      std::string Error;
+      if (!RE.isValid(Error)) {
+        errs() << "error in regular expression: " << Error << "\n";
+        exit(1);
+      }
+      if (RE.match(Name)) {
+        Die.dump(OS, 0, getDumpOpts());
+        return true;
+      }
+    }
+  } else if (Names.count(Name)) {
+    // Match full text.
+    Die.dump(OS, 0, getDumpOpts());
+    return true;
+  }
+  return false;
+}
+
+/// Print only DIEs that have a certain name.
 static void filterByName(const StringSet<> &Names,
-                         DWARFContext::cu_iterator_range CUs, raw_ostream &OS) {
+                         DWARFContext::unit_iterator_range CUs,
+                         raw_ostream &OS) {
   for (const auto &CU : CUs)
     for (const auto &Entry : CU->dies()) {
       DWARFDie Die = {CU.get(), &Entry};
-      if (const char *NamePtr = Die.getName(DINameKind::ShortName)) {
-        std::string Name =
-            (IgnoreCase && !UseRegex) ? StringRef(NamePtr).lower() : NamePtr;
-        // Match regular expression.
-        if (UseRegex)
-          for (auto Pattern : Names.keys()) {
-            Regex RE(Pattern, IgnoreCase ? Regex::IgnoreCase : Regex::NoFlags);
-            std::string Error;
-            if (!RE.isValid(Error)) {
-              errs() << "error in regular expression: " << Error << "\n";
-              exit(1);
-            }
-            if (RE.match(Name))
-              Die.dump(OS, 0, getDumpOpts());
-          }
-        // Match full text.
-        else if (Names.count(Name))
-          Die.dump(OS, 0, getDumpOpts());
-      }
+      if (const char *Name = Die.getName(DINameKind::ShortName))
+        if (filterByName(Names, Die, Name, OS))
+          continue;
+      if (const char *Name = Die.getName(DINameKind::LinkageName))
+        filterByName(Names, Die, Name, OS);
     }
-
 }
 
 static void getDies(DWARFContext &DICtx, const AppleAcceleratorTable &Accel,
@@ -358,7 +371,7 @@ static void filterByAccelName(ArrayRef<std::string> Names, DWARFContext &DICtx,
     getDies(DICtx, DICtx.getAppleNamespaces(), Name, Dies);
     getDies(DICtx, DICtx.getDebugNames(), Name, Dies);
   }
-  llvm::sort(Dies.begin(), Dies.end());
+  llvm::sort(Dies);
   Dies.erase(std::unique(Dies.begin(), Dies.end()), Dies.end());
 
   for (DWARFDie Die : Dies)
@@ -409,8 +422,8 @@ static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename,
     for (auto name : Name)
       Names.insert((IgnoreCase && !UseRegex) ? StringRef(name).lower() : name);
 
-    filterByName(Names, DICtx.compile_units(), OS);
-    filterByName(Names, DICtx.dwo_compile_units(), OS);
+    filterByName(Names, DICtx.normal_units(), OS);
+    filterByName(Names, DICtx.dwo_units(), OS);
     return true;
   }
 
@@ -558,6 +571,14 @@ int main(int argc, char **argv) {
     return 0;
   }
 
+  // FIXME: Audit interactions between these two options and make them
+  //        compatible.
+  if (Diff && Verbose) {
+    WithColor::error() << "incompatible arguments: specifying both -diff and "
+                          "-verbose is currently not supported";
+    return 0;
+  }
+
   std::unique_ptr<ToolOutputFile> OutputFile;
   if (!OutputFilename.empty()) {
     std::error_code EC;
@@ -611,7 +632,7 @@ int main(int argc, char **argv) {
 
   if (Verify) {
     // If we encountered errors during verify, exit with a non-zero exit status.
-    if (!std::all_of(Objects.begin(), Objects.end(), [&](std::string Object) {
+    if (!all_of(Objects, [&](std::string Object) {
           return handleFile(Object, verifyObjectFile, OS);
         }))
       exit(1);
diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
index 75668a9dd8b6..b6facc919b51 100644
--- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -158,7 +158,7 @@ static cl::opt<int>
     ThinLTOCachePruningInterval("thinlto-cache-pruning-interval",
     cl::init(1200), cl::desc("Set ThinLTO cache pruning interval."));
 
-static cl::opt<int>
+static cl::opt<unsigned long long>
     ThinLTOCacheMaxSizeBytes("thinlto-cache-max-size-bytes",
     cl::desc("Set ThinLTO cache pruning directory maximum size in bytes."));
 
@@ -166,6 +166,10 @@ static cl::opt<int>
     ThinLTOCacheMaxSizeFiles("thinlto-cache-max-size-files", cl::init(1000000),
     cl::desc("Set ThinLTO cache pruning directory maximum number of files."));
 
+static cl::opt<unsigned>
+    ThinLTOCacheEntryExpiration("thinlto-cache-entry-expiration", cl::init(604800) /* 1w */,
+    cl::desc("Set ThinLTO cache entry expiration time."));
+
 static cl::opt<std::string> ThinLTOSaveTempsPrefix(
     "thinlto-save-temps",
     cl::desc("Save ThinLTO temp files using filenames created by adding "
@@ -481,6 +485,7 @@ public:
     ThinGenerator.setTargetOptions(Options);
     ThinGenerator.setCacheDir(ThinLTOCacheDir);
     ThinGenerator.setCachePruningInterval(ThinLTOCachePruningInterval);
+    ThinGenerator.setCacheEntryExpiration(ThinLTOCacheEntryExpiration);
     ThinGenerator.setCacheMaxSizeFiles(ThinLTOCacheMaxSizeFiles);
     ThinGenerator.setCacheMaxSizeBytes(ThinLTOCacheMaxSizeBytes);
     ThinGenerator.setFreestanding(EnableFreestanding);
@@ -557,11 +562,14 @@ private:
 
     auto Index = loadCombinedIndex();
     for (auto &Filename : InputFilenames) {
+      LLVMContext Ctx;
+      auto TheModule = loadModule(Filename, Ctx);
+
       // Build a map of module to the GUIDs and summary objects that should
       // be written to its index.
       std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
-      ThinLTOCodeGenerator::gatherImportedSummariesForModule(
-          Filename, *Index, ModuleToSummariesForIndex);
+      ThinGenerator.gatherImportedSummariesForModule(*TheModule, *Index,
+                                                     ModuleToSummariesForIndex);
 
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
@@ -589,12 +597,14 @@ private:
 
     auto Index = loadCombinedIndex();
     for (auto &Filename : InputFilenames) {
+      LLVMContext Ctx;
+      auto TheModule = loadModule(Filename, Ctx);
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
         OutputName = Filename + ".imports";
       }
       OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
-      ThinLTOCodeGenerator::emitImports(Filename, OutputName, *Index);
+      ThinGenerator.emitImports(*TheModule, OutputName, *Index);
     }
   }
 
diff --git a/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp b/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 442973f90209..26426367e252 100644
--- a/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -23,6 +23,7 @@
 #include "llvm/LTO/LTO.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/Threading.h"
 
@@ -388,6 +389,7 @@ static int dumpSymtab(int argc, char **argv) {
 }
 
 int main(int argc, char **argv) {
+  InitLLVM X(argc, argv);
   InitializeAllTargets();
   InitializeAllTargetMCs();
   InitializeAllAsmPrinters();
diff --git a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
index f494d02f3bca..c0976502f545 100644
--- a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -164,6 +164,10 @@ MainFileName("main-file-name",
 static cl::opt<bool> SaveTempLabels("save-temp-labels",
                                     cl::desc("Don't discard temporary labels"));
 
+static cl::opt<bool> LexMasmIntegers(
+    "masm-integers",
+    cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
+
 static cl::opt<bool> NoExecStack("no-exec-stack",
                                  cl::desc("File doesn't need an exec stack"));
 
@@ -293,6 +297,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget,
     return SymbolResult;
   Parser->setShowParsedOperands(ShowInstOperands);
   Parser->setTargetParser(*TAP);
+  Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
 
   int Res = Parser->Run(NoInitialTextSection);
 
@@ -313,7 +318,6 @@ int main(int argc, char **argv) {
 
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
   MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
-  TripleName = Triple::normalize(TripleName);
   setDwarfDebugFlags(argc, argv);
 
   setDwarfDebugProducer();
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegion.cpp b/contrib/llvm/tools/llvm-mca/CodeRegion.cpp
index 896865996504..29a27c50c171 100644
--- a/contrib/llvm/tools/llvm-mca/CodeRegion.cpp
+++ b/contrib/llvm/tools/llvm-mca/CodeRegion.cpp
@@ -14,11 +14,10 @@
 
 #include "CodeRegion.h"
 
-using namespace llvm;
-
+namespace llvm {
 namespace mca {
 
-bool CodeRegion::isLocInRange(SMLoc Loc) const {
+bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const {
   if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer())
     return false;
   if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer())
@@ -26,11 +25,11 @@ bool CodeRegion::isLocInRange(SMLoc Loc) const {
   return true;
 }
 
-void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
+void CodeRegions::beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
   assert(!Regions.empty() && "Missing Default region");
   const CodeRegion &CurrentRegion = *Regions.back();
   if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) {
-    SM.PrintMessage(Loc, SourceMgr::DK_Warning,
+    SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
                     "Ignoring invalid region start");
     return;
   }
@@ -41,26 +40,28 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
   addRegion(Description, Loc);
 }
 
-void CodeRegions::endRegion(SMLoc Loc) {
+void CodeRegions::endRegion(llvm::SMLoc Loc) {
   assert(!Regions.empty() && "Missing Default region");
   CodeRegion &CurrentRegion = *Regions.back();
   if (CurrentRegion.endLoc().isValid()) {
-    SM.PrintMessage(Loc, SourceMgr::DK_Warning, "Ignoring invalid region end");
+    SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
+                    "Ignoring invalid region end");
     return;
   }
 
   CurrentRegion.setEndLocation(Loc);
 }
 
-void CodeRegions::addInstruction(std::unique_ptr<const MCInst> Instruction) {
-  const SMLoc &Loc = Instruction->getLoc();
+void CodeRegions::addInstruction(const llvm::MCInst &Instruction) {
+  const llvm::SMLoc &Loc = Instruction.getLoc();
   const auto It =
       std::find_if(Regions.rbegin(), Regions.rend(),
                    [Loc](const std::unique_ptr<CodeRegion> &Region) {
                      return Region->isLocInRange(Loc);
                    });
   if (It != Regions.rend())
-    (*It)->addInstruction(std::move(Instruction));
+    (*It)->addInstruction(Instruction);
 }
 
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegion.h b/contrib/llvm/tools/llvm-mca/CodeRegion.h
index 7f0025e4884c..867aa18bb4fe 100644
--- a/contrib/llvm/tools/llvm-mca/CodeRegion.h
+++ b/contrib/llvm/tools/llvm-mca/CodeRegion.h
@@ -34,12 +34,14 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_H
 #define LLVM_TOOLS_LLVM_MCA_CODEREGION_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include <vector>
 
+namespace llvm {
 namespace mca {
 
 /// A region of assembly code.
@@ -49,7 +51,7 @@ class CodeRegion {
   // An optional descriptor for this region.
   llvm::StringRef Description;
   // Instructions that form this region.
-  std::vector<std::unique_ptr<const llvm::MCInst>> Instructions;
+  std::vector<llvm::MCInst> Instructions;
   // Source location range.
   llvm::SMLoc RangeStart;
   llvm::SMLoc RangeEnd;
@@ -61,8 +63,8 @@ public:
   CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start)
       : Description(Desc), RangeStart(Start), RangeEnd() {}
 
-  void addInstruction(std::unique_ptr<const llvm::MCInst> Instruction) {
-    Instructions.emplace_back(std::move(Instruction));
+  void addInstruction(const llvm::MCInst &Instruction) {
+    Instructions.emplace_back(Instruction);
   }
 
   llvm::SMLoc startLoc() const { return RangeStart; }
@@ -72,10 +74,7 @@ public:
   bool empty() const { return Instructions.empty(); }
   bool isLocInRange(llvm::SMLoc Loc) const;
 
-  const std::vector<std::unique_ptr<const llvm::MCInst>> &
-  getInstructions() const {
-    return Instructions;
-  }
+  llvm::ArrayRef<llvm::MCInst> getInstructions() const { return Instructions; }
 
   llvm::StringRef getDescription() const { return Description; }
 };
@@ -106,26 +105,26 @@ public:
 
   void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
   void endRegion(llvm::SMLoc Loc);
-  void addInstruction(std::unique_ptr<const llvm::MCInst> Instruction);
+  void addInstruction(const llvm::MCInst &Instruction);
+  llvm::SourceMgr &getSourceMgr() const { return SM; }
 
   CodeRegions(llvm::SourceMgr &S) : SM(S) {
     // Create a default region for the input code sequence.
     addRegion("Default", llvm::SMLoc());
   }
 
-  const std::vector<std::unique_ptr<const llvm::MCInst>> &
-  getInstructionSequence(unsigned Idx) const {
+  llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const {
     return Regions[Idx]->getInstructions();
   }
 
   bool empty() const {
-    return std::all_of(Regions.begin(), Regions.end(),
-                       [](const std::unique_ptr<CodeRegion> &Region) {
-                         return Region->empty();
-                       });
+    return llvm::all_of(Regions, [](const std::unique_ptr<CodeRegion> &Region) {
+      return Region->empty();
+    });
   }
 };
 
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
new file mode 100644
index 000000000000..5bd37adeeae9
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -0,0 +1,137 @@
+//===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines classes responsible for generating llvm-mca
+/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions,
+/// so the classes here provide the input-to-CodeRegions translation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeRegionGenerator.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SMLoc.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+// This virtual dtor serves as the anchor for the CodeRegionGenerator class.
+CodeRegionGenerator::~CodeRegionGenerator() {}
+
+// A comment consumer that parses strings.  The only valid tokens are strings.
+class MCACommentConsumer : public AsmCommentConsumer {
+public:
+  CodeRegions &Regions;
+
+  MCACommentConsumer(CodeRegions &R) : Regions(R) {}
+  void HandleComment(SMLoc Loc, StringRef CommentText) override;
+};
+
+// This class provides the callbacks that occur when parsing input assembly.
+class MCStreamerWrapper final : public MCStreamer {
+  CodeRegions &Regions;
+
+public:
+  MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
+      : MCStreamer(Context), Regions(R) {}
+
+  // We only want to intercept the emission of new instructions.
+  virtual void EmitInstruction(const MCInst &Inst,
+                               const MCSubtargetInfo & /* unused */,
+                               bool /* unused */) override {
+    Regions.addInstruction(Inst);
+  }
+
+  bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
+    return true;
+  }
+
+  void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                        unsigned ByteAlignment) override {}
+  void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+                    uint64_t Size = 0, unsigned ByteAlignment = 0,
+                    SMLoc Loc = SMLoc()) override {}
+  void EmitGPRel32Value(const MCExpr *Value) override {}
+  void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+  void EmitCOFFSymbolStorageClass(int StorageClass) override {}
+  void EmitCOFFSymbolType(int Type) override {}
+  void EndCOFFSymbolDef() override {}
+
+  ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const {
+    return Regions.getInstructionSequence(Index);
+  }
+};
+
+void MCACommentConsumer::HandleComment(SMLoc Loc, StringRef CommentText) {
+  // Skip empty comments.
+  StringRef Comment(CommentText);
+  if (Comment.empty())
+    return;
+
+  // Skip spaces and tabs.
+  unsigned Position = Comment.find_first_not_of(" \t");
+  if (Position >= Comment.size())
+    // We reached the end of the comment. Bail out.
+    return;
+
+  Comment = Comment.drop_front(Position);
+  if (Comment.consume_front("LLVM-MCA-END")) {
+    Regions.endRegion(Loc);
+    return;
+  }
+
+  // Try to parse the LLVM-MCA-BEGIN comment.
+  if (!Comment.consume_front("LLVM-MCA-BEGIN"))
+    return;
+
+  // Skip spaces and tabs.
+  Position = Comment.find_first_not_of(" \t");
+  if (Position < Comment.size())
+    Comment = Comment.drop_front(Position);
+  // Use the rest of the string as a descriptor for this code snippet.
+  Regions.beginRegion(Comment, Loc);
+}
+
+Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
+  MCTargetOptions Opts;
+  Opts.PreserveAsmComments = false;
+  MCStreamerWrapper Str(Ctx, Regions);
+
+  // Create a MCAsmParser and setup the lexer to recognize llvm-mca ASM
+  // comments.
+  std::unique_ptr<MCAsmParser> Parser(
+      createMCAsmParser(Regions.getSourceMgr(), Ctx, Str, MAI));
+  MCAsmLexer &Lexer = Parser->getLexer();
+  MCACommentConsumer CC(Regions);
+  Lexer.setCommentConsumer(&CC);
+
+  // Create a target-specific parser and perform the parse.
+  std::unique_ptr<MCTargetAsmParser> TAP(
+      TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts));
+  if (!TAP)
+    return make_error<StringError>(
+        "This target does not support assembly parsing.",
+        inconvertibleErrorCode());
+  Parser->setTargetParser(*TAP);
+  Parser->Run(false);
+
+  // Get the assembler dialect from the input.  llvm-mca will use this as the
+  // default dialect when printing reports.
+  AssemblerDialect = Parser->getAssemblerDialect();
+  return Regions;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h
new file mode 100644
index 000000000000..892cafb92686
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/CodeRegionGenerator.h
@@ -0,0 +1,70 @@
+//===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares classes responsible for generating llvm-mca
+/// CodeRegions from various types of input. llvm-mca only analyzes CodeRegions,
+/// so the classes here provide the input-to-CodeRegions translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
+#define LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
+
+#include "CodeRegion.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <memory>
+
+namespace llvm {
+namespace mca {
+
+/// This class is responsible for parsing the input given to the llvm-mca
+/// driver, and converting that into a CodeRegions instance.
+class CodeRegionGenerator {
+protected:
+  CodeRegions Regions;
+  CodeRegionGenerator(const CodeRegionGenerator &) = delete;
+  CodeRegionGenerator &operator=(const CodeRegionGenerator &) = delete;
+
+public:
+  CodeRegionGenerator(SourceMgr &SM) : Regions(SM) {}
+  virtual ~CodeRegionGenerator();
+  virtual Expected<const CodeRegions &> parseCodeRegions() = 0;
+};
+
+/// This class is responsible for parsing input ASM and generating
+/// a CodeRegions instance.
+class AsmCodeRegionGenerator final : public CodeRegionGenerator {
+  const Target &TheTarget;
+  MCContext &Ctx;
+  const MCAsmInfo &MAI;
+  const MCSubtargetInfo &STI;
+  const MCInstrInfo &MCII;
+  unsigned AssemblerDialect; // This is set during parsing.
+
+public:
+  AsmCodeRegionGenerator(const Target &T, SourceMgr &SM, MCContext &C,
+                         const MCAsmInfo &A, const MCSubtargetInfo &S,
+                         const MCInstrInfo &I)
+      : CodeRegionGenerator(SM), TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I),
+        AssemblerDialect(0) {}
+
+  unsigned getAssemblerDialect() const { return AssemblerDialect; }
+  Expected<const CodeRegions &> parseCodeRegions() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_MCA_CODEREGION_GENERATOR_H
diff --git a/contrib/llvm/tools/llvm-mca/Context.cpp b/contrib/llvm/tools/llvm-mca/Context.cpp
deleted file mode 100644
index 685714e64b92..000000000000
--- a/contrib/llvm/tools/llvm-mca/Context.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===---------------------------- Context.cpp -------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a class for holding ownership of various simulated
-/// hardware units.  A Context also provides a utility routine for constructing
-/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
-/// stages).
-///
-//===----------------------------------------------------------------------===//
-
-#include "Context.h"
-#include "DispatchStage.h"
-#include "ExecuteStage.h"
-#include "FetchStage.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "RetireStage.h"
-#include "Scheduler.h"
-
-namespace mca {
-
-using namespace llvm;
-
-std::unique_ptr<Pipeline>
-Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
-                               SourceMgr &SrcMgr) {
-  const MCSchedModel &SM = STI.getSchedModel();
-
-  // Create the hardware units defining the backend.
-  auto RCU = llvm::make_unique<RetireControlUnit>(SM);
-  auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
-  auto HWS = llvm::make_unique<Scheduler>(
-      SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias);
-
-  // Create the pipeline and its stages.
-  auto P = llvm::make_unique<Pipeline>();
-  auto F = llvm::make_unique<FetchStage>(IB, SrcMgr);
-  auto D = llvm::make_unique<DispatchStage>(
-      STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF, *HWS);
-  auto R = llvm::make_unique<RetireStage>(*RCU, *PRF);
-  auto E = llvm::make_unique<ExecuteStage>(*RCU, *HWS);
-
-  // Add the hardware to the context.
-  addHardwareUnit(std::move(RCU));
-  addHardwareUnit(std::move(PRF));
-  addHardwareUnit(std::move(HWS));
-
-  // Build the pipeline.
-  P->appendStage(std::move(F));
-  P->appendStage(std::move(D));
-  P->appendStage(std::move(R));
-  P->appendStage(std::move(E));
-  return P;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp b/contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp
deleted file mode 100644
index 4bddbef9a0c8..000000000000
--- a/contrib/llvm/tools/llvm-mca/DispatchStatistics.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-//===--------------------- DispatchStatistics.cpp ---------------------*- C++
-//-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the DispatchStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "DispatchStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void DispatchStatistics::onEvent(const HWStallEvent &Event) {
-  if (Event.Type < HWStallEvent::LastGenericEvent)
-    HWStalls[Event.Type]++;
-}
-
-void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
-  if (Event.Type == HWInstructionEvent::Dispatched)
-    ++NumDispatched;
-}
-
-void DispatchStatistics::printDispatchHistogram(llvm::raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  TempStream << "\n\nDispatch Logic - "
-             << "number of cycles where we saw N instructions dispatched:\n";
-  TempStream << "[# dispatched], [# cycles]\n";
-  for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
-    TempStream << " " << Entry.first << ",              " << Entry.second
-               << "  ("
-               << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
-               << "%)\n";
-  }
-
-  TempStream.flush();
-  OS << Buffer;
-}
-
-void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  TempStream << "\n\nDynamic Dispatch Stall Cycles:\n";
-  TempStream << "RAT     - Register unavailable:                      "
-             << HWStalls[HWStallEvent::RegisterFileStall];
-  TempStream << "\nRCU     - Retire tokens unavailable:                 "
-             << HWStalls[HWStallEvent::RetireControlUnitStall];
-  TempStream << "\nSCHEDQ  - Scheduler full:                            "
-             << HWStalls[HWStallEvent::SchedulerQueueFull];
-  TempStream << "\nLQ      - Load queue full:                           "
-             << HWStalls[HWStallEvent::LoadQueueFull];
-  TempStream << "\nSQ      - Store queue full:                          "
-             << HWStalls[HWStallEvent::StoreQueueFull];
-  TempStream << "\nGROUP   - Static restrictions on the dispatch group: "
-             << HWStalls[HWStallEvent::DispatchGroupStall];
-  TempStream << '\n';
-  TempStream.flush();
-  OS << Buffer;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/ExecuteStage.cpp b/contrib/llvm/tools/llvm-mca/ExecuteStage.cpp
deleted file mode 100644
index 437f864b072c..000000000000
--- a/contrib/llvm/tools/llvm-mca/ExecuteStage.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the execution stage of an instruction pipeline.
-///
-/// The ExecuteStage is responsible for managing the hardware scheduler
-/// and issuing notifications that an instruction has been executed.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ExecuteStage.h"
-#include "Scheduler.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-using namespace llvm;
-
-// Reclaim the simulated resources used by the scheduler.
-void ExecuteStage::reclaimSchedulerResources() {
-  SmallVector<ResourceRef, 8> ResourcesFreed;
-  HWS.reclaimSimulatedResources(ResourcesFreed);
-  for (const ResourceRef &RR : ResourcesFreed)
-    notifyResourceAvailable(RR);
-}
-
-// Update the scheduler's instruction queues.
-void ExecuteStage::updateSchedulerQueues() {
-  SmallVector<InstRef, 4> InstructionIDs;
-  HWS.updateIssuedQueue(InstructionIDs);
-  for (const InstRef &IR : InstructionIDs)
-    notifyInstructionExecuted(IR);
-  InstructionIDs.clear();
-
-  HWS.updatePendingQueue(InstructionIDs);
-  for (const InstRef &IR : InstructionIDs)
-    notifyInstructionReady(IR);
-}
-
-// Issue instructions that are waiting in the scheduler's ready queue.
-void ExecuteStage::issueReadyInstructions() {
-  SmallVector<InstRef, 4> InstructionIDs;
-  InstRef IR = HWS.select();
-  while (IR.isValid()) {
-    SmallVector<std::pair<ResourceRef, double>, 4> Used;
-    HWS.issueInstruction(IR, Used);
-
-    // Reclaim instruction resources and perform notifications.
-    const InstrDesc &Desc = IR.getInstruction()->getDesc();
-    notifyReleasedBuffers(Desc.Buffers);
-    notifyInstructionIssued(IR, Used);
-    if (IR.getInstruction()->isExecuted())
-      notifyInstructionExecuted(IR);
-
-    // Instructions that have been issued during this cycle might have unblocked
-    // other dependent instructions. Dependent instructions may be issued during
-    // this same cycle if operands have ReadAdvance entries.  Promote those
-    // instructions to the ReadyQueue and tell to the caller that we need
-    // another round of 'issue()'.
-    HWS.promoteToReadyQueue(InstructionIDs);
-    for (const InstRef &I : InstructionIDs)
-      notifyInstructionReady(I);
-    InstructionIDs.clear();
-
-    // Select the next instruction to issue.
-    IR = HWS.select();
-  }
-}
-
-// The following routine is the maintenance routine of the ExecuteStage.
-// It is responsible for updating the hardware scheduler (HWS), including
-// reclaiming the HWS's simulated hardware resources, as well as updating the
-// HWS's queues.
-//
-// This routine also processes the instructions that are ready for issuance.
-// These instructions are managed by the HWS's ready queue and can be accessed
-// via the Scheduler::select() routine.
-//
-// Notifications are issued to this stage's listeners when instructions are
-// moved between the HWS's queues.  In particular, when an instruction becomes
-// ready or executed.
-void ExecuteStage::cycleStart() {
-  reclaimSchedulerResources();
-  updateSchedulerQueues();
-  issueReadyInstructions();
-}
-
-// Schedule the instruction for execution on the hardware.
-bool ExecuteStage::execute(InstRef &IR) {
-#ifndef NDEBUG
-  // Ensure that the HWS has not stored this instruction in its queues.
-  HWS.sanityCheck(IR);
-#endif
-  // Reserve a slot in each buffered resource. Also, mark units with
-  // BufferSize=0 as reserved. Resources with a buffer size of zero will only
-  // be released after MCIS is issued, and all the ResourceCycles for those
-  // units have been consumed.
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  HWS.reserveBuffers(Desc.Buffers);
-  notifyReservedBuffers(Desc.Buffers);
-
-  // Obtain a slot in the LSU.  If we cannot reserve resources, return true, so
-  // that succeeding stages can make progress.
-  if (!HWS.reserveResources(IR))
-    return true;
-
-  // If we did not return early, then the scheduler is ready for execution.
-  notifyInstructionReady(IR);
-
-  // Don't add a zero-latency instruction to the Wait or Ready queue.
-  // A zero-latency instruction doesn't consume any scheduler resources. That is
-  // because it doesn't need to be executed, and it is often removed at register
-  // renaming stage. For example, register-register moves are often optimized at
-  // register renaming stage by simply updating register aliases. On some
-  // targets, zero-idiom instructions (for example: a xor that clears the value
-  // of a register) are treated specially, and are often eliminated at register
-  // renaming stage.
-  //
-  // Instructions that use an in-order dispatch/issue processor resource must be
-  // issued immediately to the pipeline(s). Any other in-order buffered
-  // resources (i.e. BufferSize=1) is consumed.
-  //
-  // If we cannot issue immediately, the HWS will add IR to its ready queue for
-  // execution later, so we must return early here.
-  if (!HWS.issueImmediately(IR))
-    return true;
-
-  LLVM_DEBUG(dbgs() << "[SCHEDULER] Instruction #" << IR
-                    << " issued immediately\n");
-
-  // Issue IR.  The resources for this issuance will be placed in 'Used.'
-  SmallVector<std::pair<ResourceRef, double>, 4> Used;
-  HWS.issueInstruction(IR, Used);
-
-  // Perform notifications.
-  notifyReleasedBuffers(Desc.Buffers);
-  notifyInstructionIssued(IR, Used);
-  if (IR.getInstruction()->isExecuted())
-    notifyInstructionExecuted(IR);
-
-  return true;
-}
-
-void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
-  HWS.onInstructionExecuted(IR);
-  LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
-  notifyEvent<HWInstructionEvent>(
-      HWInstructionEvent(HWInstructionEvent::Executed, IR));
-  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
-}
-
-void ExecuteStage::notifyInstructionReady(const InstRef &IR) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
-  notifyEvent<HWInstructionEvent>(
-      HWInstructionEvent(HWInstructionEvent::Ready, IR));
-}
-
-void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) {
-  LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
-                    << RR.second << "]\n");
-  for (HWEventListener *Listener : getListeners())
-    Listener->onResourceAvailable(RR);
-}
-
-void ExecuteStage::notifyInstructionIssued(
-    const InstRef &IR, ArrayRef<std::pair<ResourceRef, double>> Used) {
-  LLVM_DEBUG({
-    dbgs() << "[E] Instruction Issued: #" << IR << '\n';
-    for (const std::pair<ResourceRef, unsigned> &Resource : Used) {
-      dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
-             << Resource.first.second << "], ";
-      dbgs() << "cycles: " << Resource.second << '\n';
-    }
-  });
-  notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
-}
-
-void ExecuteStage::notifyReservedBuffers(ArrayRef<uint64_t> Buffers) {
-  if (Buffers.empty())
-    return;
-
-  SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
-  std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
-                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
-  for (HWEventListener *Listener : getListeners())
-    Listener->onReservedBuffers(BufferIDs);
-}
-
-void ExecuteStage::notifyReleasedBuffers(ArrayRef<uint64_t> Buffers) {
-  if (Buffers.empty())
-    return;
-
-  SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
-  std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
-                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
-  for (HWEventListener *Listener : getListeners())
-    Listener->onReleasedBuffers(BufferIDs);
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/ExecuteStage.h b/contrib/llvm/tools/llvm-mca/ExecuteStage.h
deleted file mode 100644
index 4914a9373e7c..000000000000
--- a/contrib/llvm/tools/llvm-mca/ExecuteStage.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the execution stage of an instruction pipeline.
-///
-/// The ExecuteStage is responsible for managing the hardware scheduler
-/// and issuing notifications that an instruction has been executed.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
-
-#include "Instruction.h"
-#include "RetireControlUnit.h"
-#include "Scheduler.h"
-#include "Stage.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mca {
-
-class ExecuteStage : public Stage {
-  // Owner will go away when we move listeners/eventing to the stages.
-  RetireControlUnit &RCU;
-  Scheduler &HWS;
-
-  // The following routines are used to maintain the HWS.
-  void reclaimSchedulerResources();
-  void updateSchedulerQueues();
-  void issueReadyInstructions();
-
-public:
-  ExecuteStage(RetireControlUnit &R, Scheduler &S) : Stage(), RCU(R), HWS(S) {}
-  ExecuteStage(const ExecuteStage &Other) = delete;
-  ExecuteStage &operator=(const ExecuteStage &Other) = delete;
-
-  // The ExecuteStage will always complete all of its work per call to
-  // execute(), so it is never left in a 'to-be-processed' state.
-  virtual bool hasWorkToComplete() const override final { return false; }
-
-  virtual void cycleStart() override final;
-  virtual bool execute(InstRef &IR) override final;
-
-  void
-  notifyInstructionIssued(const InstRef &IR,
-                          llvm::ArrayRef<std::pair<ResourceRef, double>> Used);
-  void notifyInstructionExecuted(const InstRef &IR);
-  void notifyInstructionReady(const InstRef &IR);
-  void notifyResourceAvailable(const ResourceRef &RR);
-
-  // Notify listeners that buffered resources were consumed.
-  void notifyReservedBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
-  // Notify listeners that buffered resources were freed.
-  void notifyReleasedBuffers(llvm::ArrayRef<uint64_t> Buffers);
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/FetchStage.cpp b/contrib/llvm/tools/llvm-mca/FetchStage.cpp
deleted file mode 100644
index 3da117c0abc1..000000000000
--- a/contrib/llvm/tools/llvm-mca/FetchStage.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===---------------------- FetchStage.cpp ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the Fetch stage of an instruction pipeline.  Its sole
-/// purpose in life is to produce instructions for the rest of the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#include "FetchStage.h"
-
-namespace mca {
-
-bool FetchStage::hasWorkToComplete() const { return SM.hasNext(); }
-
-bool FetchStage::execute(InstRef &IR) {
-  if (!SM.hasNext())
-    return false;
-  const SourceRef SR = SM.peekNext();
-  std::unique_ptr<Instruction> I = IB.createInstruction(*SR.second);
-  IR = InstRef(SR.first, I.get());
-  Instructions[IR.getSourceIndex()] = std::move(I);
-  return true;
-}
-
-void FetchStage::postExecute() { SM.updateNext(); }
-
-void FetchStage::cycleEnd() {
-  // Find the first instruction which hasn't been retired.
-  const InstMap::iterator It =
-      llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) {
-        return !KeyValuePair.second->isRetired();
-      });
-
-  // Erase instructions up to the first that hasn't been retired.
-  if (It != Instructions.begin())
-    Instructions.erase(Instructions.begin(), It);
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/FetchStage.h b/contrib/llvm/tools/llvm-mca/FetchStage.h
deleted file mode 100644
index 620075d24fea..000000000000
--- a/contrib/llvm/tools/llvm-mca/FetchStage.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===---------------------- FetchStage.h ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the Fetch stage of an instruction pipeline.  Its sole
-/// purpose in life is to produce instructions for the rest of the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
-
-#include "InstrBuilder.h"
-#include "SourceMgr.h"
-#include "Stage.h"
-#include <map>
-
-namespace mca {
-
-class FetchStage : public Stage {
-  using InstMap = std::map<unsigned, std::unique_ptr<Instruction>>;
-  InstMap Instructions;
-  InstrBuilder &IB;
-  SourceMgr &SM;
-
-public:
-  FetchStage(InstrBuilder &IB, SourceMgr &SM) : IB(IB), SM(SM) {}
-  FetchStage(const FetchStage &Other) = delete;
-  FetchStage &operator=(const FetchStage &Other) = delete;
-
-  bool hasWorkToComplete() const override final;
-  bool execute(InstRef &IR) override final;
-  void postExecute() override final;
-  void cycleEnd() override final;
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
diff --git a/contrib/llvm/tools/llvm-mca/InstrBuilder.h b/contrib/llvm/tools/llvm-mca/InstrBuilder.h
deleted file mode 100644
index 69a53b6fec21..000000000000
--- a/contrib/llvm/tools/llvm-mca/InstrBuilder.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A builder class for instructions that are statically analyzed by llvm-mca.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
-
-#include "Instruction.h"
-#include "Support.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace mca {
-
-class DispatchUnit;
-
-/// A builder class that knows how to construct Instruction objects.
-///
-/// Every llvm-mca Instruction is described by an object of class InstrDesc.
-/// An InstrDesc describes which registers are read/written by the instruction,
-/// as well as the instruction latency and hardware resources consumed.
-///
-/// This class is used by the tool to construct Instructions and instruction
-/// descriptors (i.e. InstrDesc objects).
-/// Information from the machine scheduling model is used to identify processor
-/// resources that are consumed by an instruction.
-class InstrBuilder {
-  const llvm::MCSubtargetInfo &STI;
-  const llvm::MCInstrInfo &MCII;
-  const llvm::MCRegisterInfo &MRI;
-  const llvm::MCInstrAnalysis &MCIA;
-  llvm::MCInstPrinter &MCIP;
-  llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
-
-  llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
-  llvm::DenseMap<const llvm::MCInst *, std::unique_ptr<const InstrDesc>>
-      VariantDescriptors;
-
-  const InstrDesc &createInstrDescImpl(const llvm::MCInst &MCI);
-  InstrBuilder(const InstrBuilder &) = delete;
-  InstrBuilder &operator=(const InstrBuilder &) = delete;
-
-  void populateWrites(InstrDesc &ID, const llvm::MCInst &MCI,
-                      unsigned SchedClassID);
-  void populateReads(InstrDesc &ID, const llvm::MCInst &MCI,
-                     unsigned SchedClassID);
-
-public:
-  InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii,
-               const llvm::MCRegisterInfo &mri,
-               const llvm::MCInstrAnalysis &mcia, llvm::MCInstPrinter &mcip)
-      : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), MCIP(mcip),
-        ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) {
-    computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
-  }
-
-  const InstrDesc &getOrCreateInstrDesc(const llvm::MCInst &MCI);
-  // Returns an array of processor resource masks.
-  // Masks are computed by function mca::computeProcResourceMasks. see
-  // Support.h for a description of how masks are computed and how masks can be
-  // used to solve set membership problems.
-  llvm::ArrayRef<uint64_t> getProcResourceMasks() const {
-    return ProcResourceMasks;
-  }
-
-  void clear() { VariantDescriptors.shrink_and_clear(); }
-
-  std::unique_ptr<Instruction> createInstruction(const llvm::MCInst &MCI);
-};
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/LSUnit.cpp b/contrib/llvm/tools/llvm-mca/LSUnit.cpp
deleted file mode 100644
index 9ee3b6171893..000000000000
--- a/contrib/llvm/tools/llvm-mca/LSUnit.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A Load-Store Unit for the llvm-mca tool.
-///
-//===----------------------------------------------------------------------===//
-
-#include "LSUnit.h"
-#include "Instruction.h"
-
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-#ifndef NDEBUG
-void LSUnit::dump() const {
-  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
-  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
-  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
-  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
-}
-#endif
-
-void LSUnit::assignLQSlot(unsigned Index) {
-  assert(!isLQFull());
-  assert(LoadQueue.count(Index) == 0);
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
-                    << ",slot=" << LoadQueue.size() << ">\n");
-  LoadQueue.insert(Index);
-}
-
-void LSUnit::assignSQSlot(unsigned Index) {
-  assert(!isSQFull());
-  assert(StoreQueue.count(Index) == 0);
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
-                    << ",slot=" << StoreQueue.size() << ">\n");
-  StoreQueue.insert(Index);
-}
-
-bool LSUnit::reserve(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  unsigned MayLoad = Desc.MayLoad;
-  unsigned MayStore = Desc.MayStore;
-  unsigned IsMemBarrier = Desc.HasSideEffects;
-  if (!MayLoad && !MayStore)
-    return false;
-
-  const unsigned Index = IR.getSourceIndex();
-  if (MayLoad) {
-    if (IsMemBarrier)
-      LoadBarriers.insert(Index);
-    assignLQSlot(Index);
-  }
-  if (MayStore) {
-    if (IsMemBarrier)
-      StoreBarriers.insert(Index);
-    assignSQSlot(Index);
-  }
-  return true;
-}
-
-bool LSUnit::isReady(const InstRef &IR) const {
-  const unsigned Index = IR.getSourceIndex();
-  bool IsALoad = LoadQueue.count(Index) != 0;
-  bool IsAStore = StoreQueue.count(Index) != 0;
-  assert((IsALoad || IsAStore) && "Instruction is not in queue!");
-
-  if (IsALoad && !LoadBarriers.empty()) {
-    unsigned LoadBarrierIndex = *LoadBarriers.begin();
-    if (Index > LoadBarrierIndex)
-      return false;
-    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
-      return false;
-  }
-
-  if (IsAStore && !StoreBarriers.empty()) {
-    unsigned StoreBarrierIndex = *StoreBarriers.begin();
-    if (Index > StoreBarrierIndex)
-      return false;
-    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
-      return false;
-  }
-
-  if (NoAlias && IsALoad)
-    return true;
-
-  if (StoreQueue.size()) {
-    // Check if this memory operation is younger than the older store.
-    if (Index > *StoreQueue.begin())
-      return false;
-  }
-
-  // Okay, we are older than the oldest store in the queue.
-  // If there are no pending loads, then we can say for sure that this
-  // instruction is ready.
-  if (isLQEmpty())
-    return true;
-
-  // Check if there are no older loads.
-  if (Index <= *LoadQueue.begin())
-    return true;
-
-  // There is at least one younger load.
-  return !IsAStore;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
-  const unsigned Index = IR.getSourceIndex();
-  std::set<unsigned>::iterator it = LoadQueue.find(Index);
-  if (it != LoadQueue.end()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the load queue.\n");
-    LoadQueue.erase(it);
-  }
-
-  it = StoreQueue.find(Index);
-  if (it != StoreQueue.end()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the store queue.\n");
-    StoreQueue.erase(it);
-  }
-
-  if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the set of store barriers.\n");
-    StoreBarriers.erase(StoreBarriers.begin());
-  }
-  if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the set of load barriers.\n");
-    LoadBarriers.erase(LoadBarriers.begin());
-  }
-}
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/LSUnit.h b/contrib/llvm/tools/llvm-mca/LSUnit.h
deleted file mode 100644
index 817522190589..000000000000
--- a/contrib/llvm/tools/llvm-mca/LSUnit.h
+++ /dev/null
@@ -1,147 +0,0 @@
-//===------------------------- LSUnit.h --------------------------*- C++-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A Load/Store unit class that models load/store queues and that implements
-/// a simple weak memory consistency model.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H
-#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H
-
-#include <set>
-
-namespace mca {
-
-class InstRef;
-struct InstrDesc;
-
-/// A Load/Store Unit implementing a load and store queues.
-///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
-///
-/// Rules are:
-/// 1) A younger load is allowed to pass an older load only if there are no
-///    stores nor barriers in between the two loads.
-/// 2) An younger store is not allowed to pass an older store.
-/// 3) A younger store is not allowed to pass an older load.
-/// 4) A younger load is allowed to pass an older store only if the load does
-///    not alias with the store.
-///
-/// This class optimistically assumes that loads don't alias store operations.
-/// Under this assumption, younger loads are always allowed to pass older
-/// stores (this would only affects rule 4).
-/// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to
-/// predict when loads and stores don't alias with eachother.
-///
-/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
-/// set to `false` by the constructor of LSUnit.
-///
-/// In the case of write-combining memory, rule 2. could be relaxed to allow
-/// reordering of non-aliasing store operations. At the moment, this is not
-/// allowed.
-/// To put it in another way, there is no option to specify a different memory
-/// type for memory operations (example: write-through, write-combining, etc.).
-/// Also, there is no way to weaken the memory model, and this unit currently
-/// doesn't support write-combining behavior.
-///
-/// No assumptions are made on the size of the store buffer.
-/// As mentioned before, this class doesn't perform alias analysis.
-/// Consequently,  LSUnit doesn't know how to identify cases where
-/// store-to-load forwarding may occur.
-///
-/// LSUnit doesn't attempt to predict whether a load or store hits or misses
-/// the L1 cache. To be more specific, LSUnit doesn't know anything about
-/// the cache hierarchy and memory types.
-/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
-/// scheduling model provides an "optimistic" load-to-use latency (which usually
-/// matches the load-to-use latency for when there is a hit in the L1D).
-///
-/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
-/// memory-barrier like instructions.
-/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
-/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
-/// serializes loads without forcing a flush of the load queue.
-/// Similarly, instructions that both `mayStore` and have `unmodeled side
-/// effects` are treated like store barriers. A full memory
-/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
-/// effects. This is obviously inaccurate, but this is the best that we can do
-/// at the moment.
-///
-/// Each load/store barrier consumes one entry in the load/store queue. A
-/// load/store barrier enforces ordering of loads/stores:
-///  - A younger load cannot pass a load barrier.
-///  - A younger store cannot pass a store barrier.
-///
-/// A younger load has to wait for the memory load barrier to execute.
-/// A load/store barrier is "executed" when it becomes the oldest entry in
-/// the load/store queue(s). That also means, all the older loads/stores have
-/// already been executed.
-class LSUnit {
-  // Load queue size.
-  // LQ_Size == 0 means that there are infinite slots in the load queue.
-  unsigned LQ_Size;
-
-  // Store queue size.
-  // SQ_Size == 0 means that there are infinite slots in the store queue.
-  unsigned SQ_Size;
-
-  // If true, loads will never alias with stores. This is the default.
-  bool NoAlias;
-
-  std::set<unsigned> LoadQueue;
-  std::set<unsigned> StoreQueue;
-
-  void assignLQSlot(unsigned Index);
-  void assignSQSlot(unsigned Index);
-  bool isReadyNoAlias(unsigned Index) const;
-
-  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
-  // conservatively treated as a store barrier. It forces older store to be
-  // executed before newer stores are issued.
-  std::set<unsigned> StoreBarriers;
-
-  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
-  // conservatively treated as a load barrier. It forces older loads to execute
-  // before newer loads are issued.
-  std::set<unsigned> LoadBarriers;
-
-public:
-  LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false)
-      : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {}
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-
-  bool isSQEmpty() const { return StoreQueue.empty(); }
-  bool isLQEmpty() const { return LoadQueue.empty(); }
-  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
-  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
-
-  // Returns true if this instruction has been successfully enqueued.
-  bool reserve(const InstRef &IR);
-
-  // The rules are:
-  // 1. A store may not pass a previous store.
-  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
-  // 3. A load may pass a previous load.
-  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
-  // 5. A load has to wait until an older load barrier is fully executed.
-  // 6. A store has to wait until an older store barrier is fully executed.
-  bool isReady(const InstRef &IR) const;
-  void onInstructionExecuted(const InstRef &IR);
-};
-
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/Pipeline.cpp b/contrib/llvm/tools/llvm-mca/Pipeline.cpp
deleted file mode 100644
index 7c937e7b48b5..000000000000
--- a/contrib/llvm/tools/llvm-mca/Pipeline.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements an ordered container of stages that simulate the
-/// pipeline of a hardware backend.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Pipeline.h"
-#include "HWEventListener.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/Debug.h"
-
-namespace mca {
-
-#define DEBUG_TYPE "llvm-mca"
-
-using namespace llvm;
-
-void Pipeline::addEventListener(HWEventListener *Listener) {
-  if (Listener)
-    Listeners.insert(Listener);
-  for (auto &S : Stages)
-    S->addListener(Listener);
-}
-
-bool Pipeline::hasWorkToProcess() {
-  const auto It = llvm::find_if(Stages, [](const std::unique_ptr<Stage> &S) {
-    return S->hasWorkToComplete();
-  });
-  return It != Stages.end();
-}
-
-// This routine returns early if any stage returns 'false' after execute() is
-// called on it.
-bool Pipeline::executeStages(InstRef &IR) {
-  for (const std::unique_ptr<Stage> &S : Stages)
-    if (!S->execute(IR))
-      return false;
-  return true;
-}
-
-void Pipeline::preExecuteStages() {
-  for (const std::unique_ptr<Stage> &S : Stages)
-    S->preExecute();
-}
-
-void Pipeline::postExecuteStages() {
-  for (const std::unique_ptr<Stage> &S : Stages)
-    S->postExecute();
-}
-
-void Pipeline::run() {
-  while (hasWorkToProcess()) {
-    notifyCycleBegin();
-    runCycle();
-    notifyCycleEnd();
-    ++Cycles;
-  }
-}
-
-void Pipeline::runCycle() {
-  // Update the stages before we do any processing for this cycle.
-  InstRef IR;
-  for (auto &S : Stages)
-    S->cycleStart();
-
-  // Continue executing this cycle until any stage claims it cannot make
-  // progress.
-  while (true) {
-    preExecuteStages();
-    if (!executeStages(IR))
-      break;
-    postExecuteStages();
-  }
-
-  for (auto &S : Stages)
-    S->cycleEnd();
-}
-
-void Pipeline::notifyCycleBegin() {
-  LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
-  for (HWEventListener *Listener : Listeners)
-    Listener->onCycleBegin();
-}
-
-void Pipeline::notifyCycleEnd() {
-  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
-  for (HWEventListener *Listener : Listeners)
-    Listener->onCycleEnd();
-}
-} // namespace mca.
diff --git a/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp b/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp
index c5b1a12b792f..18ef45fc2a65 100644
--- a/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp
+++ b/contrib/llvm/tools/llvm-mca/PipelinePrinter.cpp
@@ -13,14 +13,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "PipelinePrinter.h"
-#include "View.h"
+#include "Views/View.h"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
-
 void PipelinePrinter::printReport(llvm::raw_ostream &OS) const {
   for (const auto &V : Views)
     V->printView(OS);
 }
 } // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/PipelinePrinter.h b/contrib/llvm/tools/llvm-mca/PipelinePrinter.h
index fe871414418f..456026e12df3 100644
--- a/contrib/llvm/tools/llvm-mca/PipelinePrinter.h
+++ b/contrib/llvm/tools/llvm-mca/PipelinePrinter.h
@@ -17,13 +17,14 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
 #define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
 
-#include "Pipeline.h"
-#include "View.h"
+#include "Views/View.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Pipeline.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
 /// A printer class that knows how to collects statistics on the
@@ -48,5 +49,6 @@ public:
   void printReport(llvm::raw_ostream &OS) const;
 };
 } // namespace mca
+} // namespace llvm
 
 #endif // LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp b/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp
deleted file mode 100644
index 1b07bf9a3b33..000000000000
--- a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the RegisterFileStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RegisterFileStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void RegisterFileStatistics::initializeRegisterFileInfo() {
-  const MCSchedModel &SM = STI.getSchedModel();
-  RegisterFileUsage Empty = {0, 0, 0};
-  if (!SM.hasExtraProcessorInfo()) {
-    // Assume a single register file.
-    RegisterFiles.emplace_back(Empty);
-    return;
-  }
-
-  // Initialize a RegisterFileUsage for every user defined register file, plus
-  // the default register file which is always at index #0.
-  const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
-  // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
-  // be skipped. If there are no user defined register files, then reserve a
-  // single entry for the default register file at index #0.
-  unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
-  RegisterFiles.resize(NumRegFiles);
-  std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
-}
-
-void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
-  switch (Event.Type) {
-  default:
-    break;
-  case HWInstructionEvent::Retired: {
-    const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
-    for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
-      RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
-    break;
-  }
-  case HWInstructionEvent::Dispatched: {
-    const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
-    for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
-      RegisterFileUsage &RFU = RegisterFiles[I];
-      unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
-      RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
-      RFU.TotalMappings += NumUsedPhysRegs;
-      RFU.MaxUsedMappings =
-          std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
-    }
-  }
-  }
-}
-
-void RegisterFileStatistics::printView(raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-
-  TempStream << "\n\nRegister File statistics:";
-  const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
-  TempStream << "\nTotal number of mappings created:    "
-             << GlobalUsage.TotalMappings;
-  TempStream << "\nMax number of mappings used:         "
-             << GlobalUsage.MaxUsedMappings << '\n';
-
-  for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
-    const RegisterFileUsage &RFU = RegisterFiles[I];
-    // Obtain the register file descriptor from the scheduling model.
-    assert(STI.getSchedModel().hasExtraProcessorInfo() &&
-           "Unable to find register file info!");
-    const MCExtraProcessorInfo &PI =
-        STI.getSchedModel().getExtraProcessorInfo();
-    assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
-    const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
-    // Skip invalid register files.
-    if (!RFDesc.NumPhysRegs)
-      continue;
-
-    TempStream << "\n*  Register File #" << I;
-    TempStream << " -- " << StringRef(RFDesc.Name) << ':';
-    TempStream << "\n   Number of physical registers:     ";
-    if (!RFDesc.NumPhysRegs)
-      TempStream << "unbounded";
-    else
-      TempStream << RFDesc.NumPhysRegs;
-    TempStream << "\n   Total number of mappings created: "
-               << RFU.TotalMappings;
-    TempStream << "\n   Max number of mappings used:      "
-               << RFU.MaxUsedMappings << '\n';
-  }
-
-  TempStream.flush();
-  OS << Buffer;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp b/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp
deleted file mode 100644
index edb855e11e84..000000000000
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the RetireControlUnitStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RetireControlUnitStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
-  if (Event.Type == HWInstructionEvent::Retired)
-    ++NumRetired;
-}
-
-void RetireControlUnitStatistics::printView(llvm::raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  TempStream << "\n\nRetire Control Unit - "
-             << "number of cycles where we saw N instructions retired:\n";
-  TempStream << "[# retired], [# cycles]\n";
-
-  for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) {
-    TempStream << " " << Entry.first;
-    if (Entry.first < 10)
-      TempStream << ",           ";
-    else
-      TempStream << ",          ";
-    TempStream << Entry.second << "  ("
-               << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
-               << "%)\n";
-  }
-
-  TempStream.flush();
-  OS << Buffer;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/Scheduler.cpp b/contrib/llvm/tools/llvm-mca/Scheduler.cpp
deleted file mode 100644
index 975a50e4b638..000000000000
--- a/contrib/llvm/tools/llvm-mca/Scheduler.cpp
+++ /dev/null
@@ -1,403 +0,0 @@
-//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// A scheduler for processor resource units and processor resource groups.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Scheduler.h"
-#include "Support.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mca {
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-uint64_t ResourceState::selectNextInSequence() {
-  assert(isReady());
-  uint64_t Next = getNextInSequence();
-  while (!isSubResourceReady(Next)) {
-    updateNextInSequence();
-    Next = getNextInSequence();
-  }
-  return Next;
-}
-
-#ifndef NDEBUG
-void ResourceState::dump() const {
-  dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask
-         << ", NEXT: " << NextInSequenceMask << ", RDYMASK: " << ReadyMask
-         << ", BufferSize=" << BufferSize
-         << ", AvailableSlots=" << AvailableSlots
-         << ", Reserved=" << Unavailable << '\n';
-}
-#endif
-
-void ResourceManager::initialize(const llvm::MCSchedModel &SM) {
-  computeProcResourceMasks(SM, ProcResID2Mask);
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I)
-    addResource(*SM.getProcResource(I), I, ProcResID2Mask[I]);
-}
-
-// Adds a new resource state in Resources, as well as a new descriptor in
-// ResourceDescriptor. Map 'Resources' allows to quickly obtain ResourceState
-// objects from resource mask identifiers.
-void ResourceManager::addResource(const MCProcResourceDesc &Desc,
-                                  unsigned Index, uint64_t Mask) {
-  assert(Resources.find(Mask) == Resources.end() && "Resource already added!");
-  Resources[Mask] = llvm::make_unique<ResourceState>(Desc, Index, Mask);
-}
-
-// Returns the actual resource consumed by this Use.
-// First, is the primary resource ID.
-// Second, is the specific sub-resource ID.
-std::pair<uint64_t, uint64_t> ResourceManager::selectPipe(uint64_t ResourceID) {
-  ResourceState &RS = *Resources[ResourceID];
-  uint64_t SubResourceID = RS.selectNextInSequence();
-  if (RS.isAResourceGroup())
-    return selectPipe(SubResourceID);
-  return std::pair<uint64_t, uint64_t>(ResourceID, SubResourceID);
-}
-
-void ResourceState::removeFromNextInSequence(uint64_t ID) {
-  assert(NextInSequenceMask);
-  assert(countPopulation(ID) == 1);
-  if (ID > getNextInSequence())
-    RemovedFromNextInSequence |= ID;
-  NextInSequenceMask = NextInSequenceMask & (~ID);
-  if (!NextInSequenceMask) {
-    NextInSequenceMask = ResourceSizeMask;
-    assert(NextInSequenceMask != RemovedFromNextInSequence);
-    NextInSequenceMask ^= RemovedFromNextInSequence;
-    RemovedFromNextInSequence = 0;
-  }
-}
-
-void ResourceManager::use(ResourceRef RR) {
-  // Mark the sub-resource referenced by RR as used.
-  ResourceState &RS = *Resources[RR.first];
-  RS.markSubResourceAsUsed(RR.second);
-  // If there are still available units in RR.first,
-  // then we are done.
-  if (RS.isReady())
-    return;
-
-  // Notify to other resources that RR.first is no longer available.
-  for (const std::pair<uint64_t, UniqueResourceState> &Res : Resources) {
-    ResourceState &Current = *Res.second.get();
-    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
-      continue;
-
-    if (Current.containsResource(RR.first)) {
-      Current.markSubResourceAsUsed(RR.first);
-      Current.removeFromNextInSequence(RR.first);
-    }
-  }
-}
-
-void ResourceManager::release(ResourceRef RR) {
-  ResourceState &RS = *Resources[RR.first];
-  bool WasFullyUsed = !RS.isReady();
-  RS.releaseSubResource(RR.second);
-  if (!WasFullyUsed)
-    return;
-
-  for (const std::pair<uint64_t, UniqueResourceState> &Res : Resources) {
-    ResourceState &Current = *Res.second.get();
-    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
-      continue;
-
-    if (Current.containsResource(RR.first))
-      Current.releaseSubResource(RR.first);
-  }
-}
-
-ResourceStateEvent
-ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
-  ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
-  for (uint64_t Buffer : Buffers) {
-    Result = isBufferAvailable(Buffer);
-    if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
-      break;
-  }
-  return Result;
-}
-
-void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
-  for (const uint64_t R : Buffers) {
-    reserveBuffer(R);
-    ResourceState &Resource = *Resources[R];
-    if (Resource.isADispatchHazard()) {
-      assert(!Resource.isReserved());
-      Resource.setReserved();
-    }
-  }
-}
-
-void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
-  for (const uint64_t R : Buffers)
-    releaseBuffer(R);
-}
-
-bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
-  return std::all_of(Desc.Resources.begin(), Desc.Resources.end(),
-                     [&](const std::pair<uint64_t, const ResourceUsage> &E) {
-                       unsigned NumUnits =
-                           E.second.isReserved() ? 0U : E.second.NumUnits;
-                       return isReady(E.first, NumUnits);
-                     });
-}
-
-// Returns true if all resources are in-order, and there is at least one
-// resource which is a dispatch hazard (BufferSize = 0).
-bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) {
-  if (!canBeIssued(Desc))
-    return false;
-  bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
-    const ResourceState &Resource = *Resources[BufferMask];
-    return Resource.isInOrder() || Resource.isADispatchHazard();
-  });
-  if (!AllInOrderResources)
-    return false;
-
-  return any_of(Desc.Buffers, [&](uint64_t BufferMask) {
-    return Resources[BufferMask]->isADispatchHazard();
-  });
-}
-
-void ResourceManager::issueInstruction(
-    const InstrDesc &Desc,
-    SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes) {
-  for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
-    const CycleSegment &CS = R.second.CS;
-    if (!CS.size()) {
-      releaseResource(R.first);
-      continue;
-    }
-
-    assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
-    if (!R.second.isReserved()) {
-      ResourceRef Pipe = selectPipe(R.first);
-      use(Pipe);
-      BusyResources[Pipe] += CS.size();
-      // Replace the resource mask with a valid processor resource index.
-      const ResourceState &RS = *Resources[Pipe.first];
-      Pipe.first = RS.getProcResourceID();
-      Pipes.emplace_back(
-          std::pair<ResourceRef, double>(Pipe, static_cast<double>(CS.size())));
-    } else {
-      assert((countPopulation(R.first) > 1) && "Expected a group!");
-      // Mark this group as reserved.
-      assert(R.second.isReserved());
-      reserveResource(R.first);
-      BusyResources[ResourceRef(R.first, R.first)] += CS.size();
-    }
-  }
-}
-
-void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
-  for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
-    if (BR.second)
-      BR.second--;
-    if (!BR.second) {
-      // Release this resource.
-      const ResourceRef &RR = BR.first;
-
-      if (countPopulation(RR.first) == 1)
-        release(RR);
-
-      releaseResource(RR.first);
-      ResourcesFreed.push_back(RR);
-    }
-  }
-
-  for (const ResourceRef &RF : ResourcesFreed)
-    BusyResources.erase(RF);
-}
-
-#ifndef NDEBUG
-void Scheduler::dump() const {
-  dbgs() << "[SCHEDULER]: WaitQueue size is: " << WaitQueue.size() << '\n';
-  dbgs() << "[SCHEDULER]: ReadyQueue size is: " << ReadyQueue.size() << '\n';
-  dbgs() << "[SCHEDULER]: IssuedQueue size is: " << IssuedQueue.size() << '\n';
-  Resources->dump();
-}
-#endif
-
-bool Scheduler::canBeDispatched(const InstRef &IR,
-                                HWStallEvent::GenericEventType &Event) const {
-  Event = HWStallEvent::Invalid;
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-
-  if (Desc.MayLoad && LSU->isLQFull())
-    Event = HWStallEvent::LoadQueueFull;
-  else if (Desc.MayStore && LSU->isSQFull())
-    Event = HWStallEvent::StoreQueueFull;
-  else {
-    switch (Resources->canBeDispatched(Desc.Buffers)) {
-    default:
-      return true;
-    case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
-      Event = HWStallEvent::SchedulerQueueFull;
-      break;
-    case ResourceStateEvent::RS_RESERVED:
-      Event = HWStallEvent::DispatchGroupStall;
-    }
-  }
-
-  return false;
-}
-
-void Scheduler::issueInstructionImpl(
-    InstRef &IR,
-    SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
-  Instruction *IS = IR.getInstruction();
-  const InstrDesc &D = IS->getDesc();
-
-  // Issue the instruction and collect all the consumed resources
-  // into a vector. That vector is then used to notify the listener.
-  Resources->issueInstruction(D, UsedResources);
-
-  // Notify the instruction that it started executing.
-  // This updates the internal state of each write.
-  IS->execute();
-
-  if (IS->isExecuting())
-    IssuedQueue[IR.getSourceIndex()] = IS;
-}
-
-// Release the buffered resources and issue the instruction.
-void Scheduler::issueInstruction(
-    InstRef &IR,
-    SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  releaseBuffers(Desc.Buffers);
-  issueInstructionImpl(IR, UsedResources);
-}
-
-void Scheduler::promoteToReadyQueue(SmallVectorImpl<InstRef> &Ready) {
-  // Scan the set of waiting instructions and promote them to the
-  // ready queue if operands are all ready.
-  for (auto I = WaitQueue.begin(), E = WaitQueue.end(); I != E;) {
-    const unsigned IID = I->first;
-    Instruction *IS = I->second;
-
-    // Check if this instruction is now ready. In case, force
-    // a transition in state using method 'update()'.
-    if (!IS->isReady())
-      IS->update();
-
-    const InstrDesc &Desc = IS->getDesc();
-    bool IsMemOp = Desc.MayLoad || Desc.MayStore;
-    if (!IS->isReady() || (IsMemOp && !LSU->isReady({IID, IS}))) {
-      ++I;
-      continue;
-    }
-
-    Ready.emplace_back(IID, IS);
-    ReadyQueue[IID] = IS;
-    auto ToRemove = I;
-    ++I;
-    WaitQueue.erase(ToRemove);
-  }
-}
-
-InstRef Scheduler::select() {
-  // Find the oldest ready-to-issue instruction in the ReadyQueue.
-  auto It = std::find_if(ReadyQueue.begin(), ReadyQueue.end(),
-                         [&](const QueueEntryTy &Entry) {
-                           const InstrDesc &D = Entry.second->getDesc();
-                           return Resources->canBeIssued(D);
-                         });
-
-  if (It == ReadyQueue.end())
-    return {0, nullptr};
-
-  // We want to prioritize older instructions over younger instructions to
-  // minimize the pressure on the reorder buffer.  We also want to
-  // rank higher the instructions with more users to better expose ILP.
-
-  // Compute a rank value based on the age of an instruction (i.e. its source
-  // index) and its number of users. The lower the rank value, the better.
-  int Rank = It->first - It->second->getNumUsers();
-  for (auto I = It, E = ReadyQueue.end(); I != E; ++I) {
-    int CurrentRank = I->first - I->second->getNumUsers();
-    if (CurrentRank < Rank) {
-      const InstrDesc &D = I->second->getDesc();
-      if (Resources->canBeIssued(D))
-        It = I;
-    }
-  }
-
-  // We found an instruction to issue.
-  InstRef IR(It->first, It->second);
-  ReadyQueue.erase(It);
-  return IR;
-}
-
-void Scheduler::updatePendingQueue(SmallVectorImpl<InstRef> &Ready) {
-  // Notify to instructions in the pending queue that a new cycle just
-  // started.
-  for (QueueEntryTy Entry : WaitQueue)
-    Entry.second->cycleEvent();
-  promoteToReadyQueue(Ready);
-}
-
-void Scheduler::updateIssuedQueue(SmallVectorImpl<InstRef> &Executed) {
-  for (auto I = IssuedQueue.begin(), E = IssuedQueue.end(); I != E;) {
-    const QueueEntryTy Entry = *I;
-    Instruction *IS = Entry.second;
-    IS->cycleEvent();
-    if (IS->isExecuted()) {
-      Executed.push_back({Entry.first, Entry.second});
-      auto ToRemove = I;
-      ++I;
-      IssuedQueue.erase(ToRemove);
-    } else {
-      LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << Entry.first
-                        << " is still executing.\n");
-      ++I;
-    }
-  }
-}
-
-void Scheduler::onInstructionExecuted(const InstRef &IR) {
-  LSU->onInstructionExecuted(IR);
-}
-
-void Scheduler::reclaimSimulatedResources(SmallVectorImpl<ResourceRef> &Freed) {
-  Resources->cycleEvent(Freed);
-}
-
-bool Scheduler::reserveResources(InstRef &IR) {
-  // If necessary, reserve queue entries in the load-store unit (LSU).
-  const bool Reserved = LSU->reserve(IR);
-  if (!IR.getInstruction()->isReady() || (Reserved && !LSU->isReady(IR))) {
-    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the Wait Queue\n");
-    WaitQueue[IR.getSourceIndex()] = IR.getInstruction();
-    return false;
-  }
-  return true;
-}
-
-bool Scheduler::issueImmediately(InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
-    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
-                      << " to the Ready Queue\n");
-    ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();
-    return false;
-  }
-  return true;
-}
-
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/Scheduler.h b/contrib/llvm/tools/llvm-mca/Scheduler.h
deleted file mode 100644
index 428fbc01707d..000000000000
--- a/contrib/llvm/tools/llvm-mca/Scheduler.h
+++ /dev/null
@@ -1,515 +0,0 @@
-//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A scheduler for Processor Resource Units and Processor Resource Groups.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
-#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
-
-#include "HWEventListener.h"
-#include "HardwareUnit.h"
-#include "Instruction.h"
-#include "LSUnit.h"
-#include "RetireControlUnit.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include <map>
-
-namespace mca {
-
-/// Used to notify the internal state of a processor resource.
-///
-/// A processor resource is available if it is not reserved, and there are
-/// available slots in the buffer.  A processor resource is unavailable if it
-/// is either reserved, or the associated buffer is full. A processor resource
-/// with a buffer size of -1 is always available if it is not reserved.
-///
-/// Values of type ResourceStateEvent are returned by method
-/// ResourceState::isBufferAvailable(), which is used to query the internal
-/// state of a resource.
-///
-/// The naming convention for resource state events is:
-///  * Event names start with prefix RS_
-///  * Prefix RS_ is followed by a string describing the actual resource state.
-enum ResourceStateEvent {
-  RS_BUFFER_AVAILABLE,
-  RS_BUFFER_UNAVAILABLE,
-  RS_RESERVED
-};
-
-/// A descriptor for processor resources.
-///
-/// Each object of class ResourceState is associated to a specific processor
-/// resource. There is an instance of this class for every processor resource
-/// defined by the scheduling model.
-/// A ResourceState dynamically tracks the availability of units of a processor
-/// resource. For example, the ResourceState of a ProcResGroup tracks the
-/// availability of resource units which are part of the group.
-///
-/// Internally, ResourceState uses a round-robin selector to identify
-/// which unit of the group shall be used next.
-class ResourceState {
-  // Index to the MCProcResourceDesc in the processor Model.
-  unsigned ProcResourceDescIndex;
-  // A resource mask. This is generated by the tool with the help of
-  // function `mca::createProcResourceMasks' (see Support.h).
-  uint64_t ResourceMask;
-
-  // A ProcResource can specify a number of units. For the purpose of dynamic
-  // scheduling, a processor resource with more than one unit behaves like a
-  // group. This field has one bit set for every unit/resource that is part of
-  // the group.
-  // For groups, this field defaults to 'ResourceMask'. For non-group
-  // resources, the number of bits set in this mask is equivalent to the
-  // number of units (i.e. field 'NumUnits' in 'ProcResourceUnits').
-  uint64_t ResourceSizeMask;
-
-  // A simple round-robin selector for processor resources.
-  // Each bit of the mask identifies a sub resource within this group.
-  //
-  // As an example, lets assume that this ResourceState describes a
-  // processor resource group composed of the following three units:
-  //   ResourceA -- 0b001
-  //   ResourceB -- 0b010
-  //   ResourceC -- 0b100
-  //
-  // Each unit is identified by a ResourceMask which always contains a
-  // single bit set. Field NextInSequenceMask is initially set to value
-  // 0xb111. That value is obtained by OR'ing the resource masks of
-  // processor resource that are part of the group.
-  //
-  //   NextInSequenceMask  -- 0b111
-  //
-  // Field NextInSequenceMask is used by the resource manager (i.e.
-  // an object of class ResourceManager) to select the "next available resource"
-  // from the set. The algorithm would prioritize resources with a bigger
-  // ResourceMask value.
-  //
-  // In this example, there are three resources in the set, and 'ResourceC'
-  // has the highest mask value. The round-robin selector would firstly select
-  //  'ResourceC', then 'ResourceB', and eventually 'ResourceA'.
-  //
-  // When a resource R is used, its corresponding bit is cleared from the set.
-  //
-  // Back to the example:
-  // If 'ResourceC' is selected, then the new value of NextInSequenceMask
-  // becomes 0xb011.
-  //
-  // When NextInSequenceMask becomes zero, it is reset to its original value
-  // (in this example, that value would be 0b111).
-  uint64_t NextInSequenceMask;
-
-  // Some instructions can only be issued on very specific pipeline resources.
-  // For those instructions, we know exactly which resource would be consumed
-  // without having to dynamically select it using field 'NextInSequenceMask'.
-  //
-  // The resource mask bit associated to the (statically) selected
-  // processor resource is still cleared from the 'NextInSequenceMask'.
-  // If that bit was already zero in NextInSequenceMask, then we update
-  // mask 'RemovedFromNextInSequence'.
-  //
-  // When NextInSequenceMask is reset back to its initial value, the algorithm
-  // removes any bits which are set in RemoveFromNextInSequence.
-  uint64_t RemovedFromNextInSequence;
-
-  // A mask of ready units.
-  uint64_t ReadyMask;
-
-  // Buffered resources will have this field set to a positive number bigger
-  // than 0. A buffered resource behaves like a separate reservation station
-  // implementing its own buffer for out-of-order execution.
-  // A buffer of 1 is for units that force in-order execution.
-  // A value of 0 is treated specially. In particular, a resource with
-  // A BufferSize = 0 is for an in-order issue/dispatch resource.
-  // That means, this resource is reserved starting from the dispatch event,
-  // until all the "resource cycles" are consumed after the issue event.
-  // While this resource is reserved, no other instruction may be dispatched.
-  int BufferSize;
-
-  // Available slots in the buffer (zero, if this is not a buffered resource).
-  unsigned AvailableSlots;
-
-  // True if this is resource is currently unavailable.
-  // An instruction may "reserve" a resource for a number of cycles.
-  // During those cycles, the reserved resource cannot be used for other
-  // instructions, even if the ReadyMask is set.
-  bool Unavailable;
-
-  bool isSubResourceReady(uint64_t ID) const { return ReadyMask & ID; }
-
-  /// Returns the mask identifier of the next available resource in the set.
-  uint64_t getNextInSequence() const {
-    assert(NextInSequenceMask);
-    return llvm::PowerOf2Floor(NextInSequenceMask);
-  }
-
-  /// Returns the mask of the next available resource within the set,
-  /// and updates the resource selector.
-  void updateNextInSequence() {
-    NextInSequenceMask ^= getNextInSequence();
-    if (!NextInSequenceMask)
-      NextInSequenceMask = ResourceSizeMask;
-  }
-
-  uint64_t computeResourceSizeMaskForGroup(uint64_t ResourceMask) {
-    assert(llvm::countPopulation(ResourceMask) > 1);
-    return ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
-  }
-
-public:
-  ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index,
-                uint64_t Mask)
-      : ProcResourceDescIndex(Index), ResourceMask(Mask) {
-    bool IsAGroup = llvm::countPopulation(ResourceMask) > 1;
-    ResourceSizeMask = IsAGroup ? computeResourceSizeMaskForGroup(ResourceMask)
-                                : ((1ULL << Desc.NumUnits) - 1);
-    NextInSequenceMask = ResourceSizeMask;
-    RemovedFromNextInSequence = 0;
-    ReadyMask = ResourceSizeMask;
-    BufferSize = Desc.BufferSize;
-    AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
-    Unavailable = false;
-  }
-
-  unsigned getProcResourceID() const { return ProcResourceDescIndex; }
-  uint64_t getResourceMask() const { return ResourceMask; }
-  int getBufferSize() const { return BufferSize; }
-
-  bool isBuffered() const { return BufferSize > 0; }
-  bool isInOrder() const { return BufferSize == 1; }
-  bool isADispatchHazard() const { return BufferSize == 0; }
-  bool isReserved() const { return Unavailable; }
-
-  void setReserved() { Unavailable = true; }
-  void clearReserved() { Unavailable = false; }
-
-  // A resource is ready if it is not reserved, and if there are enough
-  // available units.
-  // If a resource is also a dispatch hazard, then we don't check if
-  // it is reserved because that check would always return true.
-  // A resource marked as "dispatch hazard" is always reserved at
-  // dispatch time. When this method is called, the assumption is that
-  // the user of this resource has been already dispatched.
-  bool isReady(unsigned NumUnits = 1) const {
-    return (!isReserved() || isADispatchHazard()) &&
-           llvm::countPopulation(ReadyMask) >= NumUnits;
-  }
-  bool isAResourceGroup() const {
-    return llvm::countPopulation(ResourceMask) > 1;
-  }
-
-  bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
-
-  void markSubResourceAsUsed(uint64_t ID) {
-    assert(isSubResourceReady(ID));
-    ReadyMask ^= ID;
-  }
-
-  void releaseSubResource(uint64_t ID) {
-    assert(!isSubResourceReady(ID));
-    ReadyMask ^= ID;
-  }
-
-  unsigned getNumUnits() const {
-    return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
-  }
-
-  uint64_t selectNextInSequence();
-  void removeFromNextInSequence(uint64_t ID);
-
-  ResourceStateEvent isBufferAvailable() const {
-    if (isADispatchHazard() && isReserved())
-      return RS_RESERVED;
-    if (!isBuffered() || AvailableSlots)
-      return RS_BUFFER_AVAILABLE;
-    return RS_BUFFER_UNAVAILABLE;
-  }
-
-  void reserveBuffer() {
-    if (AvailableSlots)
-      AvailableSlots--;
-  }
-
-  void releaseBuffer() {
-    if (BufferSize > 0)
-      AvailableSlots++;
-    assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
-  }
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
-/// A resource unit identifier.
-///
-/// This is used to identify a specific processor resource unit using a pair
-/// of indices where the 'first' index is a processor resource mask, and the
-/// 'second' index is an index for a "sub-resource" (i.e. unit).
-typedef std::pair<uint64_t, uint64_t> ResourceRef;
-
-// First: a MCProcResourceDesc index identifying a buffered resource.
-// Second: max number of buffer entries used in this resource.
-typedef std::pair<unsigned, unsigned> BufferUsageEntry;
-
-/// A resource manager for processor resource units and groups.
-///
-/// This class owns all the ResourceState objects, and it is responsible for
-/// acting on requests from a Scheduler by updating the internal state of
-/// ResourceState objects.
-/// This class doesn't know about instruction itineraries and functional units.
-/// In future, it can be extended to support itineraries too through the same
-/// public interface.
-class ResourceManager {
-  // The resource manager owns all the ResourceState.
-  using UniqueResourceState = std::unique_ptr<ResourceState>;
-  llvm::SmallDenseMap<uint64_t, UniqueResourceState> Resources;
-
-  // Keeps track of which resources are busy, and how many cycles are left
-  // before those become usable again.
-  llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
-
-  // A table to map processor resource IDs to processor resource masks.
-  llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
-
-  // Adds a new resource state in Resources, as well as a new descriptor in
-  // ResourceDescriptor.
-  void addResource(const llvm::MCProcResourceDesc &Desc, unsigned Index,
-                   uint64_t Mask);
-
-  // Populate resource descriptors.
-  void initialize(const llvm::MCSchedModel &SM);
-
-  // Returns the actual resource unit that will be used.
-  ResourceRef selectPipe(uint64_t ResourceID);
-
-  void use(ResourceRef RR);
-  void release(ResourceRef RR);
-
-  unsigned getNumUnits(uint64_t ResourceID) const {
-    assert(Resources.find(ResourceID) != Resources.end());
-    return Resources.find(ResourceID)->getSecond()->getNumUnits();
-  }
-
-  // Reserve a specific Resource kind.
-  void reserveBuffer(uint64_t ResourceID) {
-    assert(isBufferAvailable(ResourceID) ==
-           ResourceStateEvent::RS_BUFFER_AVAILABLE);
-    ResourceState &Resource = *Resources[ResourceID];
-    Resource.reserveBuffer();
-  }
-
-  void releaseBuffer(uint64_t ResourceID) {
-    Resources[ResourceID]->releaseBuffer();
-  }
-
-  ResourceStateEvent isBufferAvailable(uint64_t ResourceID) const {
-    const ResourceState &Resource = *Resources.find(ResourceID)->second;
-    return Resource.isBufferAvailable();
-  }
-
-  bool isReady(uint64_t ResourceID, unsigned NumUnits) const {
-    const ResourceState &Resource = *Resources.find(ResourceID)->second;
-    return Resource.isReady(NumUnits);
-  }
-
-public:
-  ResourceManager(const llvm::MCSchedModel &SM)
-      : ProcResID2Mask(SM.getNumProcResourceKinds()) {
-    initialize(SM);
-  }
-
-  // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
-  // there are enough available slots in the buffers.
-  ResourceStateEvent canBeDispatched(llvm::ArrayRef<uint64_t> Buffers) const;
-
-  // Return the processor resource identifier associated to this Mask.
-  unsigned resolveResourceMask(uint64_t Mask) const {
-    return Resources.find(Mask)->second->getProcResourceID();
-  }
-
-  // Consume a slot in every buffered resource from array 'Buffers'. Resource
-  // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
-  void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
-  // Release buffer entries previously allocated by method reserveBuffers.
-  void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
-  void reserveResource(uint64_t ResourceID) {
-    ResourceState &Resource = *Resources[ResourceID];
-    assert(!Resource.isReserved());
-    Resource.setReserved();
-  }
-
-  void releaseResource(uint64_t ResourceID) {
-    ResourceState &Resource = *Resources[ResourceID];
-    Resource.clearReserved();
-  }
-
-  // Returns true if all resources are in-order, and there is at least one
-  // resource which is a dispatch hazard (BufferSize = 0).
-  bool mustIssueImmediately(const InstrDesc &Desc);
-
-  bool canBeIssued(const InstrDesc &Desc) const;
-
-  void issueInstruction(
-      const InstrDesc &Desc,
-      llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
-
-  void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
-
-#ifndef NDEBUG
-  void dump() const {
-    for (const std::pair<uint64_t, UniqueResourceState> &Resource : Resources)
-      Resource.second->dump();
-  }
-#endif
-}; // namespace mca
-
-/// Class Scheduler is responsible for issuing instructions to pipeline
-/// resources. Internally, it delegates to a ResourceManager the management of
-/// processor resources.
-/// This class is also responsible for tracking the progress of instructions
-/// from the dispatch stage, until the write-back stage.
-///
-/// An nstruction dispatched to the Scheduler is initially placed into either
-/// the 'WaitQueue' or the 'ReadyQueue' depending on the availability of the
-/// input operands. Instructions in the WaitQueue are ordered by instruction
-/// index. An instruction is moved from the WaitQueue to the ReadyQueue when
-/// register operands become available, and all memory dependencies are met.
-/// Instructions that are moved from the WaitQueue to the ReadyQueue transition
-/// from state 'IS_AVAILABLE' to state 'IS_READY'.
-///
-/// At the beginning of each cycle, the Scheduler checks if there are
-/// instructions in the WaitQueue that can be moved to the ReadyQueue.  If the
-/// ReadyQueue is not empty, then older instructions from the queue are issued
-/// to the processor pipelines, and the underlying ResourceManager is updated
-/// accordingly.  The ReadyQueue is ordered by instruction index to guarantee
-/// that the first instructions in the set are also the oldest.
-///
-/// An Instruction is moved from the ReadyQueue the `IssuedQueue` when it is
-/// issued to a (one or more) pipeline(s). This event also causes an instruction
-/// state transition (i.e. from state IS_READY, to state IS_EXECUTING).
-/// An Instruction leaves the IssuedQueue when it reaches the write-back stage.
-class Scheduler : public HardwareUnit {
-  const llvm::MCSchedModel &SM;
-
-  // Hardware resources that are managed by this scheduler.
-  std::unique_ptr<ResourceManager> Resources;
-  std::unique_ptr<LSUnit> LSU;
-
-  using QueueEntryTy = std::pair<unsigned, Instruction *>;
-  std::map<unsigned, Instruction *> WaitQueue;
-  std::map<unsigned, Instruction *> ReadyQueue;
-  std::map<unsigned, Instruction *> IssuedQueue;
-
-  /// Issue an instruction without updating the ready queue.
-  void issueInstructionImpl(
-      InstRef &IR,
-      llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
-
-public:
-  Scheduler(const llvm::MCSchedModel &Model, unsigned LoadQueueSize,
-            unsigned StoreQueueSize, bool AssumeNoAlias)
-      : SM(Model), Resources(llvm::make_unique<ResourceManager>(SM)),
-        LSU(llvm::make_unique<LSUnit>(LoadQueueSize, StoreQueueSize,
-                                      AssumeNoAlias)) {}
-
-  /// Check if the instruction in 'IR' can be dispatched.
-  ///
-  /// The DispatchStage is responsible for querying the Scheduler before
-  /// dispatching new instructions. This routine is used for performing such
-  /// a query.  If the instruction 'IR' can be dispatched, then true is
-  /// returned, otherwise false is returned with Event set to the stall type.
-  bool canBeDispatched(const InstRef &IR,
-                       HWStallEvent::GenericEventType &Event) const;
-
-  /// Returns true if there is availibility for IR in the LSU.
-  bool isReady(const InstRef &IR) const { return LSU->isReady(IR); }
-
-  /// Issue an instruction.  The Used container is populated with
-  /// the resource objects consumed on behalf of issuing this instruction.
-  void
-  issueInstruction(InstRef &IR,
-                   llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used);
-
-  /// This routine will attempt to issue an instruction immediately (for
-  /// zero-latency instructions).
-  ///
-  /// Returns true if the instruction is issued immediately.  If this does not
-  /// occur, then the instruction will be added to the Scheduler's ReadyQueue.
-  bool issueImmediately(InstRef &IR);
-
-  /// Reserve one entry in each buffered resource.
-  void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers) {
-    Resources->reserveBuffers(Buffers);
-  }
-
-  /// Release buffer entries previously allocated by method reserveBuffers.
-  void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers) {
-    Resources->releaseBuffers(Buffers);
-  }
-
-  /// Update the resources managed by the scheduler.
-  /// This routine is to be called at the start of a new cycle, and is
-  /// responsible for updating scheduler resources.  Resources are released
-  /// once they have been fully consumed.
-  void reclaimSimulatedResources(llvm::SmallVectorImpl<ResourceRef> &Freed);
-
-  /// Move instructions from the WaitQueue to the ReadyQueue if input operands
-  /// are all available.
-  void promoteToReadyQueue(llvm::SmallVectorImpl<InstRef> &Ready);
-
-  /// Update the ready queue.
-  void updatePendingQueue(llvm::SmallVectorImpl<InstRef> &Ready);
-
-  /// Update the issued queue.
-  void updateIssuedQueue(llvm::SmallVectorImpl<InstRef> &Executed);
-
-  /// Updates the Scheduler's resources to reflect that an instruction has just
-  /// been executed.
-  void onInstructionExecuted(const InstRef &IR);
-
-  /// Obtain the processor's resource identifier for the given
-  /// resource mask.
-  unsigned getResourceID(uint64_t Mask) {
-    return Resources->resolveResourceMask(Mask);
-  }
-
-  /// Reserve resources necessary to issue the instruction.
-  /// Returns true if the resources are ready and the (LSU) can
-  /// execute the given instruction immediately.
-  bool reserveResources(InstRef &IR);
-
-  /// Select the next instruction to issue from the ReadyQueue.
-  /// This method gives priority to older instructions.
-  InstRef select();
-
-#ifndef NDEBUG
-  // Update the ready queues.
-  void dump() const;
-
-  // This routine performs a sanity check.  This routine should only be called
-  // when we know that 'IR' is not in the scheduler's instruction queues.
-  void sanityCheck(const InstRef &IR) const {
-    const unsigned Idx = IR.getSourceIndex();
-    assert(WaitQueue.find(Idx) == WaitQueue.end());
-    assert(ReadyQueue.find(Idx) == ReadyQueue.end());
-    assert(IssuedQueue.find(Idx) == IssuedQueue.end());
-  }
-#endif // !NDEBUG
-};
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
diff --git a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp b/contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp
deleted file mode 100644
index 5c6d22a71812..000000000000
--- a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the SchedulerStatistics interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "SchedulerStatistics.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
-  if (Event.Type == HWInstructionEvent::Issued)
-    ++NumIssued;
-}
-
-void SchedulerStatistics::onReservedBuffers(ArrayRef<unsigned> Buffers) {
-  for (const unsigned Buffer : Buffers) {
-    if (BufferedResources.find(Buffer) != BufferedResources.end()) {
-      BufferUsage &BU = BufferedResources[Buffer];
-      BU.SlotsInUse++;
-      BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
-      continue;
-    }
-
-    BufferedResources.insert(
-        std::pair<unsigned, BufferUsage>(Buffer, {1U, 1U}));
-  }
-}
-
-void SchedulerStatistics::onReleasedBuffers(ArrayRef<unsigned> Buffers) {
-  for (const unsigned Buffer : Buffers) {
-    assert(BufferedResources.find(Buffer) != BufferedResources.end() &&
-           "Buffered resource not in map?");
-    BufferUsage &BU = BufferedResources[Buffer];
-    BU.SlotsInUse--;
-  }
-}
-
-void SchedulerStatistics::printSchedulerStatistics(
-    llvm::raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  TempStream << "\n\nSchedulers - number of cycles where we saw N instructions "
-                "issued:\n";
-  TempStream << "[# issued], [# cycles]\n";
-  for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) {
-    TempStream << " " << Entry.first << ",          " << Entry.second << "  ("
-               << format("%.1f", ((double)Entry.second / NumCycles) * 100)
-               << "%)\n";
-  }
-
-  TempStream.flush();
-  OS << Buffer;
-}
-
-void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  TempStream << "\n\nScheduler's queue usage:\n";
-  // Early exit if no buffered resources were consumed.
-  if (BufferedResources.empty()) {
-    TempStream << "No scheduler resources used.\n";
-    TempStream.flush();
-    OS << Buffer;
-    return;
-  }
-
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
-    if (ProcResource.BufferSize <= 0)
-      continue;
-
-    const auto It = BufferedResources.find(I);
-    unsigned MaxUsedSlots =
-        It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots;
-    TempStream << ProcResource.Name << ",  " << MaxUsedSlots << '/'
-               << ProcResource.BufferSize << '\n';
-  }
-
-  TempStream.flush();
-  OS << Buffer;
-}
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/SourceMgr.h b/contrib/llvm/tools/llvm-mca/SourceMgr.h
deleted file mode 100644
index 15a85a69569f..000000000000
--- a/contrib/llvm/tools/llvm-mca/SourceMgr.h
+++ /dev/null
@@ -1,63 +0,0 @@
-//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements class SourceMgr. Class SourceMgr abstracts the input
-/// code sequence (a sequence of MCInst), and assings unique identifiers to
-/// every instruction in the sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
-#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
-
-#include "llvm/MC/MCInst.h"
-#include <vector>
-
-namespace mca {
-
-typedef std::pair<unsigned, const llvm::MCInst *> SourceRef;
-
-class SourceMgr {
-  using InstVec = std::vector<std::unique_ptr<const llvm::MCInst>>;
-  const InstVec &Sequence;
-  unsigned Current;
-  unsigned Iterations;
-  static const unsigned DefaultIterations = 100;
-
-public:
-  SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations)
-      : Sequence(MCInstSequence), Current(0),
-        Iterations(NumIterations ? NumIterations : DefaultIterations) {}
-
-  unsigned getCurrentIteration() const { return Current / Sequence.size(); }
-  unsigned getNumIterations() const { return Iterations; }
-  unsigned size() const { return Sequence.size(); }
-  const InstVec &getSequence() const { return Sequence; }
-
-  bool hasNext() const { return Current < (Iterations * size()); }
-  void updateNext() { Current++; }
-
-  const SourceRef peekNext() const {
-    unsigned Index = getCurrentInstructionIndex();
-    return SourceRef(Current, Sequence[Index].get());
-  }
-
-  unsigned getCurrentInstructionIndex() const {
-    return Current % Sequence.size();
-  }
-
-  const llvm::MCInst &getMCInstFromIndex(unsigned Index) const {
-    return *Sequence[Index % size()];
-  }
-
-  bool isEmpty() const { return size() == 0; }
-};
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/Support.h b/contrib/llvm/tools/llvm-mca/Support.h
deleted file mode 100644
index fd8d8b5a23b3..000000000000
--- a/contrib/llvm/tools/llvm-mca/Support.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===--------------------- Support.h ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Helper functions used by various pipeline components.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H
-#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-/// Populates vector Masks with processor resource masks.
-///
-/// The number of bits set in a mask depends on the processor resource type.
-/// Each processor resource mask has at least one bit set. For groups, the
-/// number of bits set in the mask is equal to the cardinality of the group plus
-/// one. Excluding the most significant bit, the remaining bits in the mask
-/// identify processor resources that are part of the group.
-///
-/// Example:
-///
-///  ResourceA  -- Mask: 0b001
-///  ResourceB  -- Mask: 0b010
-///  ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
-///
-/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
-/// Each resource mask uniquely identifies a resource; both ResourceA and
-/// ResourceB only have one bit set.
-/// ResourceAB is a group; excluding the most significant bit in the mask, the
-/// remaining bits identify the composition of the group.
-///
-/// Resource masks are used by the ResourceManager to solve set membership
-/// problems with simple bit manipulation operations.
-void computeProcResourceMasks(const llvm::MCSchedModel &SM,
-                              llvm::SmallVectorImpl<uint64_t> &Masks);
-
-/// Compute the reciprocal block throughput from a set of processor resource
-/// cycles. The reciprocal block throughput is computed as the MAX between:
-///  - NumMicroOps / DispatchWidth
-///  - ProcResourceCycles / #ProcResourceUnits  (for every consumed resource).
-double computeBlockRThroughput(const llvm::MCSchedModel &SM,
-                               unsigned DispatchWidth, unsigned NumMicroOps,
-                               llvm::ArrayRef<unsigned> ProcResourceUsage);
-} // namespace mca
-
-#endif
diff --git a/contrib/llvm/tools/llvm-mca/TimelineView.cpp b/contrib/llvm/tools/llvm-mca/TimelineView.cpp
deleted file mode 100644
index 6e75cac0d432..000000000000
--- a/contrib/llvm/tools/llvm-mca/TimelineView.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \brief
-///
-/// This file implements the TimelineView interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "TimelineView.h"
-
-using namespace llvm;
-
-namespace mca {
-
-void TimelineView::initialize(unsigned MaxIterations) {
-  unsigned NumInstructions =
-      AsmSequence.getNumIterations() * AsmSequence.size();
-  if (!MaxIterations)
-    MaxIterations = DEFAULT_ITERATIONS;
-  unsigned NumEntries =
-      std::min(NumInstructions, MaxIterations * AsmSequence.size());
-  Timeline.resize(NumEntries);
-  TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0};
-  std::fill(Timeline.begin(), Timeline.end(), NullTVEntry);
-
-  WaitTime.resize(AsmSequence.size());
-  WaitTimeEntry NullWTEntry = {0, 0, 0, 0};
-  std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
-}
-
-void TimelineView::onEvent(const HWInstructionEvent &Event) {
-  const unsigned Index = Event.IR.getSourceIndex();
-  if (CurrentCycle >= MaxCycle || Index >= Timeline.size())
-    return;
-  switch (Event.Type) {
-  case HWInstructionEvent::Retired: {
-    TimelineViewEntry &TVEntry = Timeline[Index];
-    TVEntry.CycleRetired = CurrentCycle;
-
-    // Update the WaitTime entry which corresponds to this Index.
-    WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
-    WTEntry.Executions++;
-    WTEntry.CyclesSpentInSchedulerQueue +=
-        TVEntry.CycleIssued - TVEntry.CycleDispatched;
-    assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
-    WTEntry.CyclesSpentInSQWhileReady +=
-        TVEntry.CycleIssued - TVEntry.CycleReady;
-    WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
-        (TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted;
-    break;
-  }
-  case HWInstructionEvent::Ready:
-    Timeline[Index].CycleReady = CurrentCycle;
-    break;
-  case HWInstructionEvent::Issued:
-    Timeline[Index].CycleIssued = CurrentCycle;
-    break;
-  case HWInstructionEvent::Executed:
-    Timeline[Index].CycleExecuted = CurrentCycle;
-    break;
-  case HWInstructionEvent::Dispatched:
-    Timeline[Index].CycleDispatched = CurrentCycle;
-    break;
-  default:
-    return;
-  }
-  LastCycle = std::max(LastCycle, CurrentCycle);
-}
-
-void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
-                                      const WaitTimeEntry &Entry,
-                                      unsigned SourceIndex) const {
-  OS << SourceIndex << '.';
-  OS.PadToColumn(7);
-
-  if (Entry.Executions == 0) {
-    OS << "-      -      -      -     ";
-  } else {
-    double AverageTime1, AverageTime2, AverageTime3;
-    unsigned Executions = Entry.Executions;
-    AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
-    AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
-    AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
-
-    OS << Executions;
-    OS.PadToColumn(13);
-
-    OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
-    OS.PadToColumn(20);
-    OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
-    OS.PadToColumn(27);
-    OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
-    OS.PadToColumn(34);
-  }
-}
-
-void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
-  if (WaitTime.empty())
-    return;
-
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  formatted_raw_ostream FOS(TempStream);
-
-  FOS << "\n\nAverage Wait times (based on the timeline view):\n"
-      << "[0]: Executions\n"
-      << "[1]: Average time spent waiting in a scheduler's queue\n"
-      << "[2]: Average time spent waiting in a scheduler's queue while ready\n"
-      << "[3]: Average time elapsed from WB until retire stage\n\n";
-  FOS << "      [0]    [1]    [2]    [3]\n";
-
-  // Use a different string stream for the instruction.
-  std::string Instruction;
-  raw_string_ostream InstrStream(Instruction);
-
-  for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) {
-    printWaitTimeEntry(FOS, WaitTime[I], I);
-    // Append the instruction info at the end of the line.
-    const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
-
-    MCIP.printInst(&Inst, InstrStream, "", STI);
-    InstrStream.flush();
-
-    // Consume any tabs or spaces at the beginning of the string.
-    StringRef Str(Instruction);
-    Str = Str.ltrim();
-    FOS << "   " << Str << '\n';
-    FOS.flush();
-    Instruction = "";
-
-    OS << Buffer;
-    Buffer = "";
-  }
-}
-
-void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
-                                          const TimelineViewEntry &Entry,
-                                          unsigned Iteration,
-                                          unsigned SourceIndex) const {
-  if (Iteration == 0 && SourceIndex == 0)
-    OS << '\n';
-  OS << '[' << Iteration << ',' << SourceIndex << ']';
-  OS.PadToColumn(10);
-  for (unsigned I = 0, E = Entry.CycleDispatched; I < E; ++I)
-    OS << ((I % 5 == 0) ? '.' : ' ');
-  OS << TimelineView::DisplayChar::Dispatched;
-  if (Entry.CycleDispatched != Entry.CycleExecuted) {
-    // Zero latency instructions have the same value for CycleDispatched,
-    // CycleIssued and CycleExecuted.
-    for (unsigned I = Entry.CycleDispatched + 1, E = Entry.CycleIssued; I < E;
-         ++I)
-      OS << TimelineView::DisplayChar::Waiting;
-    if (Entry.CycleIssued == Entry.CycleExecuted)
-      OS << TimelineView::DisplayChar::DisplayChar::Executed;
-    else {
-      if (Entry.CycleDispatched != Entry.CycleIssued)
-        OS << TimelineView::DisplayChar::Executing;
-      for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
-           ++I)
-        OS << TimelineView::DisplayChar::Executing;
-      OS << TimelineView::DisplayChar::Executed;
-    }
-  }
-
-  for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
-    OS << TimelineView::DisplayChar::RetireLag;
-  OS << TimelineView::DisplayChar::Retired;
-
-  // Skip other columns.
-  for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
-    OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
-}
-
-static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
-  OS << "\n\nTimeline view:\n";
-  if (Cycles >= 10) {
-    OS.PadToColumn(10);
-    for (unsigned I = 0; I <= Cycles; ++I) {
-      if (((I / 10) & 1) == 0)
-        OS << ' ';
-      else
-        OS << I % 10;
-    }
-    OS << '\n';
-  }
-
-  OS << "Index";
-  OS.PadToColumn(10);
-  for (unsigned I = 0; I <= Cycles; ++I) {
-    if (((I / 10) & 1) == 0)
-      OS << I % 10;
-    else
-      OS << ' ';
-  }
-  OS << '\n';
-}
-
-void TimelineView::printTimeline(raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream StringStream(Buffer);
-  formatted_raw_ostream FOS(StringStream);
-
-  printTimelineHeader(FOS, LastCycle);
-  FOS.flush();
-  OS << Buffer;
-
-  // Use a different string stream for the instruction.
-  std::string Instruction;
-  raw_string_ostream InstrStream(Instruction);
-
-  for (unsigned I = 0, E = Timeline.size(); I < E; ++I) {
-    Buffer = "";
-    const TimelineViewEntry &Entry = Timeline[I];
-    if (Entry.CycleRetired == 0)
-      return;
-
-    unsigned Iteration = I / AsmSequence.size();
-    unsigned SourceIndex = I % AsmSequence.size();
-    printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
-    // Append the instruction info at the end of the line.
-    const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
-    MCIP.printInst(&Inst, InstrStream, "", STI);
-    InstrStream.flush();
-
-    // Consume any tabs or spaces at the beginning of the string.
-    StringRef Str(Instruction);
-    Str = Str.ltrim();
-    FOS << "   " << Str << '\n';
-    FOS.flush();
-    Instruction = "";
-    OS << Buffer;
-  }
-}
-} // namespace mca
diff --git a/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
new file mode 100644
index 000000000000..2562c82407bf
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -0,0 +1,86 @@
+//===--------------------- DispatchStatistics.cpp ---------------------*- C++
+//-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the DispatchStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/DispatchStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+void DispatchStatistics::onEvent(const HWStallEvent &Event) {
+  if (Event.Type < HWStallEvent::LastGenericEvent)
+    HWStalls[Event.Type]++;
+}
+
+void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
+  if (Event.Type != HWInstructionEvent::Dispatched)
+    return;
+
+  const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+  NumDispatched += DE.MicroOpcodes;
+}
+
+void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const {
+  std::string Buffer;
+  raw_string_ostream TempStream(Buffer);
+  TempStream << "\n\nDispatch Logic - "
+             << "number of cycles where we saw N micro opcodes dispatched:\n";
+  TempStream << "[# dispatched], [# cycles]\n";
+  for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) {
+    double Percentage = ((double)Entry.second / NumCycles) * 100.0;
+    TempStream << " " << Entry.first << ",              " << Entry.second
+               << "  (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
+               << "%)\n";
+  }
+
+  TempStream.flush();
+  OS << Buffer;
+}
+
+static void printStalls(raw_ostream &OS, unsigned NumStalls,
+                        unsigned NumCycles) {
+  if (!NumStalls) {
+    OS << NumStalls;
+    return;
+  }
+
+  double Percentage = ((double)NumStalls / NumCycles) * 100.0;
+  OS << NumStalls << "  ("
+     << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)";
+}
+
+void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
+  std::string Buffer;
+  raw_string_ostream SS(Buffer);
+  SS << "\n\nDynamic Dispatch Stall Cycles:\n";
+  SS << "RAT     - Register unavailable:                      ";
+  printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles);
+  SS << "\nRCU     - Retire tokens unavailable:                 ";
+  printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles);
+  SS << "\nSCHEDQ  - Scheduler full:                            ";
+  printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles);
+  SS << "\nLQ      - Load queue full:                           ";
+  printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles);
+  SS << "\nSQ      - Store queue full:                          ";
+  printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles);
+  SS << "\nGROUP   - Static restrictions on the dispatch group: ";
+  printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles);
+  SS << '\n';
+  SS.flush();
+  OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/DispatchStatistics.h b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 1e389d54766b..6679c81efe95 100644
--- a/contrib/llvm/tools/llvm-mca/DispatchStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -24,7 +24,7 @@
 /// GROUP   - Static restrictions on the dispatch group: 0
 ///
 ///
-/// Dispatch Logic - number of cycles where we saw N instructions dispatched:
+/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
 /// [# dispatched], [# cycles]
 ///  0,              15  (11.5%)
 ///  2,              4  (3.1%)
@@ -34,11 +34,12 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
 #define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
 
-#include "View.h"
+#include "Views/View.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include <map>
 
+namespace llvm {
 namespace mca {
 
 class DispatchStatistics : public View {
@@ -80,5 +81,6 @@ public:
   }
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/InstructionInfoView.cpp b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index 0e50a96d19c1..5016afb49e44 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionInfoView.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -12,17 +12,15 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "InstructionInfoView.h"
+#include "Views/InstructionInfoView.h"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
-
 void InstructionInfoView::printView(raw_ostream &OS) const {
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
   const MCSchedModel &SM = STI.getSchedModel();
-  unsigned Instructions = Source.size();
 
   std::string Instruction;
   raw_string_ostream InstrStream(Instruction);
@@ -32,8 +30,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
              << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n";
 
   TempStream << "[1]    [2]    [3]    [4]    [5]    [6]    Instructions:\n";
-  for (unsigned I = 0, E = Instructions; I < E; ++I) {
-    const MCInst &Inst = Source.getMCInstFromIndex(I);
+  for (const MCInst &Inst : Source) {
     const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
 
     // Obtain the scheduling class information from the instruction.
@@ -89,3 +86,4 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
   OS << Buffer;
 }
 } // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/InstructionInfoView.h b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.h
index 0770ae3d2b57..3ef95d474490 100644
--- a/contrib/llvm/tools/llvm-mca/InstructionInfoView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -35,8 +35,9 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
 #define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
 
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -44,23 +45,25 @@
 
 #define DEBUG_TYPE "llvm-mca"
 
+namespace llvm {
 namespace mca {
 
 /// A view that prints out generic instruction information.
 class InstructionInfoView : public View {
   const llvm::MCSubtargetInfo &STI;
   const llvm::MCInstrInfo &MCII;
-  const SourceMgr &Source;
+  llvm::ArrayRef<llvm::MCInst> Source;
   llvm::MCInstPrinter &MCIP;
 
 public:
   InstructionInfoView(const llvm::MCSubtargetInfo &sti,
-                      const llvm::MCInstrInfo &mcii, const SourceMgr &S,
-                      llvm::MCInstPrinter &IP)
+                      const llvm::MCInstrInfo &mcii,
+                      llvm::ArrayRef<llvm::MCInst> S, llvm::MCInstPrinter &IP)
       : STI(sti), MCII(mcii), Source(S), MCIP(IP) {}
 
   void printView(llvm::raw_ostream &OS) const override;
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
new file mode 100644
index 000000000000..06202bc41421
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -0,0 +1,168 @@
+//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RegisterFileStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RegisterFileStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
+    : STI(sti) {
+  const MCSchedModel &SM = STI.getSchedModel();
+  RegisterFileUsage RFUEmpty = {0, 0, 0};
+  MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
+  if (!SM.hasExtraProcessorInfo()) {
+    // Assume a single register file.
+    PRFUsage.emplace_back(RFUEmpty);
+    MoveElimInfo.emplace_back(MEIEmpty);
+    return;
+  }
+
+  // Initialize a RegisterFileUsage for every user defined register file, plus
+  // the default register file which is always at index #0.
+  const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
+  // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
+  // be skipped. If there are no user defined register files, then reserve a
+  // single entry for the default register file at index #0.
+  unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
+
+  PRFUsage.resize(NumRegFiles);
+  std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
+
+  MoveElimInfo.resize(NumRegFiles);
+  std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
+}
+
+void RegisterFileStatistics::updateRegisterFileUsage(
+    ArrayRef<unsigned> UsedPhysRegs) {
+  for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
+    RegisterFileUsage &RFU = PRFUsage[I];
+    unsigned NumUsedPhysRegs = UsedPhysRegs[I];
+    RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+    RFU.TotalMappings += NumUsedPhysRegs;
+    RFU.MaxUsedMappings =
+        std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+  }
+}
+
+void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
+  if (!Inst.isOptimizableMove())
+    return;
+
+  assert(Inst.getDefs().size() == 1 && "Expected a single definition!");
+  assert(Inst.getUses().size() == 1 && "Expected a single register use!");
+  const WriteState &WS = Inst.getDefs()[0];
+  const ReadState &RS = Inst.getUses()[0];
+
+  MoveEliminationInfo &Info =
+      MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
+  Info.TotalMoveEliminationCandidates++;
+  if (WS.isEliminated())
+    Info.CurrentMovesEliminated++;
+  if (WS.isWriteZero() && RS.isReadZero())
+    Info.TotalMovesThatPropagateZero++;
+}
+
+void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
+  switch (Event.Type) {
+  default:
+    break;
+  case HWInstructionEvent::Retired: {
+    const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
+    for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
+      PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+    break;
+  }
+  case HWInstructionEvent::Dispatched: {
+    const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+    updateRegisterFileUsage(DE.UsedPhysRegs);
+    updateMoveElimInfo(*DE.IR.getInstruction());
+  }
+  }
+}
+
+void RegisterFileStatistics::onCycleEnd() {
+  for (MoveEliminationInfo &MEI : MoveElimInfo) {
+    unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
+    CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
+    MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
+    MEI.CurrentMovesEliminated = 0;
+  }
+}
+
+void RegisterFileStatistics::printView(raw_ostream &OS) const {
+  std::string Buffer;
+  raw_string_ostream TempStream(Buffer);
+
+  TempStream << "\n\nRegister File statistics:";
+  const RegisterFileUsage &GlobalUsage = PRFUsage[0];
+  TempStream << "\nTotal number of mappings created:    "
+             << GlobalUsage.TotalMappings;
+  TempStream << "\nMax number of mappings used:         "
+             << GlobalUsage.MaxUsedMappings << '\n';
+
+  for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
+    const RegisterFileUsage &RFU = PRFUsage[I];
+    // Obtain the register file descriptor from the scheduling model.
+    assert(STI.getSchedModel().hasExtraProcessorInfo() &&
+           "Unable to find register file info!");
+    const MCExtraProcessorInfo &PI =
+        STI.getSchedModel().getExtraProcessorInfo();
+    assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
+    const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
+    // Skip invalid register files.
+    if (!RFDesc.NumPhysRegs)
+      continue;
+
+    TempStream << "\n*  Register File #" << I;
+    TempStream << " -- " << StringRef(RFDesc.Name) << ':';
+    TempStream << "\n   Number of physical registers:     ";
+    if (!RFDesc.NumPhysRegs)
+      TempStream << "unbounded";
+    else
+      TempStream << RFDesc.NumPhysRegs;
+    TempStream << "\n   Total number of mappings created: "
+               << RFU.TotalMappings;
+    TempStream << "\n   Max number of mappings used:      "
+               << RFU.MaxUsedMappings << '\n';
+    const MoveEliminationInfo &MEI = MoveElimInfo[I];
+
+    if (MEI.TotalMoveEliminationCandidates) {
+      TempStream << "   Number of optimizable moves:      "
+                 << MEI.TotalMoveEliminationCandidates;
+      double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
+                                       MEI.TotalMoveEliminationCandidates *
+                                       100.0;
+      double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
+                                 MEI.TotalMoveEliminationCandidates * 100.0;
+      TempStream << "\n   Number of moves eliminated:       "
+                 << MEI.TotalMovesEliminated << "  "
+                 << format("(%.1f%%)",
+                           floor((EliminatedMovProportion * 10) + 0.5) / 10);
+      TempStream << "\n   Number of zero moves:             "
+                 << MEI.TotalMovesThatPropagateZero << "  "
+                 << format("(%.1f%%)",
+                           floor((ZeroMovProportion * 10) + 0.5) / 10);
+      TempStream << "\n   Max moves eliminated per cycle:   "
+                 << MEI.MaxMovesEliminatedPerCycle << '\n';
+    }
+  }
+
+  TempStream.flush();
+  OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.h b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
index cbe816cd3332..a2c52a668dae 100644
--- a/contrib/llvm/tools/llvm-mca/RegisterFileStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -21,6 +21,10 @@
 ///    Number of physical registers:     72
 ///    Total number of mappings created: 0
 ///    Max number of mappings used:      0
+///    Number of optimizable moves:      200
+///    Number of moves eliminated:       200 (100.0%)
+///    Number of zero moves:             200 (100.0%)
+///    Max moves eliminated per cycle:   2
 ///
 /// *  Register File #2 -- IntegerPRF:
 ///    Number of physical registers:     64
@@ -32,10 +36,11 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
 #define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
 
-#include "View.h"
+#include "Views/View.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 
+namespace llvm {
 namespace mca {
 
 class RegisterFileStatistics : public View {
@@ -48,20 +53,29 @@ class RegisterFileStatistics : public View {
     unsigned CurrentlyUsedMappings;
   };
 
+  struct MoveEliminationInfo {
+    unsigned TotalMoveEliminationCandidates;
+    unsigned TotalMovesEliminated;
+    unsigned TotalMovesThatPropagateZero;
+    unsigned MaxMovesEliminatedPerCycle;
+    unsigned CurrentMovesEliminated;
+  };
+
   // There is one entry for each register file implemented by the processor.
-  llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
+  llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
+  llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
 
-  void initializeRegisterFileInfo();
+  void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
+  void updateMoveElimInfo(const Instruction &Inst);
 
 public:
-  RegisterFileStatistics(const llvm::MCSubtargetInfo &sti) : STI(sti) {
-    initializeRegisterFileInfo();
-  }
+  RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
 
+  void onCycleEnd() override;
   void onEvent(const HWInstructionEvent &Event) override;
-
   void printView(llvm::raw_ostream &OS) const override;
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/ResourcePressureView.cpp b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
index fe9d5b7fabc8..6df61840437d 100644
--- a/contrib/llvm/tools/llvm-mca/ResourcePressureView.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -12,15 +12,17 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "ResourcePressureView.h"
+#include "Views/ResourcePressureView.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 
+namespace llvm {
 namespace mca {
 
-using namespace llvm;
-
-void ResourcePressureView::initialize() {
+ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+                                           MCInstPrinter &Printer,
+                                           ArrayRef<MCInst> S)
+    : STI(sti), MCIP(Printer), Source(S), LastInstructionIdx(0) {
   // Populate the map of resource descriptors.
   unsigned R2VIndex = 0;
   const MCSchedModel &SM = STI.getSchedModel();
@@ -41,12 +43,19 @@ void ResourcePressureView::initialize() {
 }
 
 void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
+  if (Event.Type == HWInstructionEvent::Dispatched) {
+    LastInstructionIdx = Event.IR.getSourceIndex();
+    return;
+  }
+
   // We're only interested in Issue events.
   if (Event.Type != HWInstructionEvent::Issued)
     return;
+
   const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
   const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
-  for (const std::pair<ResourceRef, double> &Use : IssueEvent.UsedResources) {
+  for (const std::pair<ResourceRef, ResourceCycles> &Use :
+       IssueEvent.UsedResources) {
     const ResourceRef &RR = Use.first;
     assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
     unsigned R2VIndex = Resource2VecIndex[RR.first];
@@ -91,8 +100,7 @@ static void printResourcePressure(formatted_raw_ostream &OS, double Pressure,
   OS.PadToColumn(Col);
 }
 
-void ResourcePressureView::printResourcePressurePerIteration(
-    raw_ostream &OS, unsigned Executions) const {
+void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const {
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
   formatted_raw_ostream FOS(TempStream);
@@ -125,6 +133,7 @@ void ResourcePressureView::printResourcePressurePerIteration(
   FOS << '\n';
   FOS.flush();
 
+  const unsigned Executions = LastInstructionIdx / Source.size() + 1;
   for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) {
     double Usage = ResourceUsage[I + Source.size() * E];
     printResourcePressure(FOS, Usage / Executions, (I + 1) * 7);
@@ -134,8 +143,7 @@ void ResourcePressureView::printResourcePressurePerIteration(
   OS << Buffer;
 }
 
-void ResourcePressureView::printResourcePressurePerInstruction(
-    raw_ostream &OS, unsigned Executions) const {
+void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
   formatted_raw_ostream FOS(TempStream);
@@ -147,13 +155,16 @@ void ResourcePressureView::printResourcePressurePerInstruction(
   std::string Instruction;
   raw_string_ostream InstrStream(Instruction);
 
-  for (unsigned I = 0, E = Source.size(); I < E; ++I) {
+  unsigned InstrIndex = 0;
+  const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+  for (const MCInst &MCI : Source) {
+    unsigned BaseEltIdx = InstrIndex * NumResourceUnits;
     for (unsigned J = 0; J < NumResourceUnits; ++J) {
-      double Usage = ResourceUsage[J + I * NumResourceUnits];
+      double Usage = ResourceUsage[J + BaseEltIdx];
       printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
     }
 
-    MCIP.printInst(&Source.getMCInstFromIndex(I), InstrStream, "", STI);
+    MCIP.printInst(&MCI, InstrStream, "", STI);
     InstrStream.flush();
     StringRef Str(Instruction);
 
@@ -166,6 +177,9 @@ void ResourcePressureView::printResourcePressurePerInstruction(
     FOS.flush();
     OS << Buffer;
     Buffer = "";
+
+    ++InstrIndex;
   }
 }
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/ResourcePressureView.h b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.h
index fe1c6af5e6f6..572ce6fe6b70 100644
--- a/contrib/llvm/tools/llvm-mca/ResourcePressureView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/ResourcePressureView.h
@@ -58,13 +58,14 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
 #define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
 
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include <map>
 
+namespace llvm {
 namespace mca {
 
 /// This class collects resource pressure statistics and it is able to print
@@ -72,38 +73,32 @@ namespace mca {
 class ResourcePressureView : public View {
   const llvm::MCSubtargetInfo &STI;
   llvm::MCInstPrinter &MCIP;
-  const SourceMgr &Source;
+  llvm::ArrayRef<llvm::MCInst> Source;
+  unsigned LastInstructionIdx;
 
   // Map to quickly obtain the ResourceUsage column index from a processor
   // resource ID.
   llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
 
   // Table of resources used by instructions.
-  std::vector<double> ResourceUsage;
+  std::vector<ResourceCycles> ResourceUsage;
   unsigned NumResourceUnits;
 
-  const llvm::MCInst &GetMCInstFromIndex(unsigned Index) const;
-  void printResourcePressurePerIteration(llvm::raw_ostream &OS,
-                                         unsigned Executions) const;
-  void printResourcePressurePerInstruction(llvm::raw_ostream &OS,
-                                           unsigned Executions) const;
-  void initialize();
+  void printResourcePressurePerIter(llvm::raw_ostream &OS) const;
+  void printResourcePressurePerInst(llvm::raw_ostream &OS) const;
 
 public:
   ResourcePressureView(const llvm::MCSubtargetInfo &sti,
-                       llvm::MCInstPrinter &Printer, const SourceMgr &SM)
-      : STI(sti), MCIP(Printer), Source(SM) {
-    initialize();
-  }
+                       llvm::MCInstPrinter &Printer,
+                       llvm::ArrayRef<llvm::MCInst> S);
 
   void onEvent(const HWInstructionEvent &Event) override;
-
   void printView(llvm::raw_ostream &OS) const override {
-    unsigned Executions = Source.getNumIterations();
-    printResourcePressurePerIteration(OS, Executions);
-    printResourcePressurePerInstruction(OS, Executions);
+    printResourcePressurePerIter(OS);
+    printResourcePressurePerInst(OS);
   }
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
new file mode 100644
index 000000000000..54eb28f1add9
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -0,0 +1,91 @@
+//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RetireControlUnitStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RetireControlUnitStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnitStatistics::RetireControlUnitStatistics(const MCSchedModel &SM)
+    : NumRetired(0), NumCycles(0), EntriesInUse(0), MaxUsedEntries(0),
+      SumOfUsedEntries(0) {
+  TotalROBEntries = SM.MicroOpBufferSize;
+  if (SM.hasExtraProcessorInfo()) {
+    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+    if (EPI.ReorderBufferSize)
+      TotalROBEntries = EPI.ReorderBufferSize;
+  }
+}
+
+void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
+  if (Event.Type == HWInstructionEvent::Dispatched) {
+    unsigned NumEntries =
+        static_cast<const HWInstructionDispatchedEvent &>(Event).MicroOpcodes;
+    EntriesInUse += NumEntries;
+  }
+
+  if (Event.Type == HWInstructionEvent::Retired) {
+    unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps;
+    assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!");
+    EntriesInUse -= ReleasedEntries;
+    ++NumRetired;
+  }
+}
+
+void RetireControlUnitStatistics::onCycleEnd() {
+  // Update histogram
+  RetiredPerCycle[NumRetired]++;
+  NumRetired = 0;
+  ++NumCycles;
+  MaxUsedEntries = std::max(MaxUsedEntries, EntriesInUse);
+  SumOfUsedEntries += EntriesInUse;
+}
+
+void RetireControlUnitStatistics::printView(raw_ostream &OS) const {
+  std::string Buffer;
+  raw_string_ostream TempStream(Buffer);
+  TempStream << "\n\nRetire Control Unit - "
+             << "number of cycles where we saw N instructions retired:\n";
+  TempStream << "[# retired], [# cycles]\n";
+
+  for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) {
+    TempStream << " " << Entry.first;
+    if (Entry.first < 10)
+      TempStream << ",           ";
+    else
+      TempStream << ",          ";
+    TempStream << Entry.second << "  ("
+               << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+               << "%)\n";
+  }
+
+  unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles;
+  double MaxUsagePercentage = ((double)MaxUsedEntries / TotalROBEntries) * 100.0;
+  double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10;
+  double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0;
+  double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10;
+
+  TempStream << "\nTotal ROB Entries:                " << TotalROBEntries
+             << "\nMax Used ROB Entries:             " << MaxUsedEntries
+             << format("  ( %.1f%% )", NormalizedMaxPercentage)
+             << "\nAverage Used ROB Entries per cy:  " << AvgUsage
+             << format("  ( %.1f%% )\n", NormalizedAvgPercentage);
+
+  TempStream.flush();
+  OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.h b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 1f03e7efe889..02aa13bc444a 100644
--- a/contrib/llvm/tools/llvm-mca/RetireControlUnitStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -16,20 +16,24 @@
 ///
 /// Retire Control Unit - number of cycles where we saw N instructions retired:
 /// [# retired], [# cycles]
-///  0,           9  (6.9%)
-///  1,           6  (4.6%)
-///  2,           1  (0.8%)
-///  4,           3  (2.3%)
+///  0,           109  (17.9%)
+///  1,           102  (16.7%)
+///  2,           399  (65.4%)
+///
+/// Total ROB Entries:                64
+/// Max Used ROB Entries:             35  ( 54.7% )
+/// Average Used ROB Entries per cy:  32  ( 50.0% )
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
 #define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
 
-#include "View.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "Views/View.h"
+#include "llvm/MC/MCSchedule.h"
 #include <map>
 
+namespace llvm {
 namespace mca {
 
 class RetireControlUnitStatistics : public View {
@@ -38,23 +42,20 @@ class RetireControlUnitStatistics : public View {
 
   unsigned NumRetired;
   unsigned NumCycles;
-
-  void updateHistograms() {
-    RetiredPerCycle[NumRetired]++;
-    NumRetired = 0;
-  }
+  unsigned TotalROBEntries;
+  unsigned EntriesInUse;
+  unsigned MaxUsedEntries;
+  unsigned SumOfUsedEntries;
 
 public:
-  RetireControlUnitStatistics() : NumRetired(0), NumCycles(0) {}
+  RetireControlUnitStatistics(const MCSchedModel &SM);
 
   void onEvent(const HWInstructionEvent &Event) override;
-
-  void onCycleBegin() override { NumCycles++; }
-
-  void onCycleEnd() override { updateHistograms(); }
-
+  void onCycleEnd() override;
   void printView(llvm::raw_ostream &OS) const override;
 };
+
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
new file mode 100644
index 000000000000..670f90127f18
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -0,0 +1,183 @@
+//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the SchedulerStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SchedulerStatistics.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace mca {
+
+SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
+    : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
+      NumCycles(0), MostRecentLoadDispatched(~0U),
+      MostRecentStoreDispatched(~0U),
+      IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
+      Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
+  if (SM.hasExtraProcessorInfo()) {
+    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+    LQResourceID = EPI.LoadQueueID;
+    SQResourceID = EPI.StoreQueueID;
+  }
+}
+
+// FIXME: This implementation works under the assumption that load/store queue
+// entries are reserved at 'instruction dispatched' stage, and released at
+// 'instruction executed' stage. This currently matches the behavior of LSUnit.
+//
+// The current design minimizes the number of events generated by the
+// Dispatch/Execute stages, at the cost of doing extra bookkeeping in method
+// `onEvent`. However, it introduces a subtle dependency between this view and
+// how the LSUnit works.
+//
+// In future we should add a new "memory queue" event type, so that we stop
+// making assumptions on how LSUnit internally works (See PR39828).
+void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
+  if (Event.Type == HWInstructionEvent::Issued)
+    ++NumIssued;
+  else if (Event.Type == HWInstructionEvent::Dispatched) {
+    const Instruction &Inst = *Event.IR.getInstruction();
+    const unsigned Index = Event.IR.getSourceIndex();
+    if (LQResourceID && Inst.getDesc().MayLoad &&
+        MostRecentLoadDispatched != Index) {
+      Usage[LQResourceID].SlotsInUse++;
+      MostRecentLoadDispatched = Index;
+    }
+    if (SQResourceID && Inst.getDesc().MayStore &&
+        MostRecentStoreDispatched != Index) {
+      Usage[SQResourceID].SlotsInUse++;
+      MostRecentStoreDispatched = Index;
+    }
+  } else if (Event.Type == HWInstructionEvent::Executed) {
+    const Instruction &Inst = *Event.IR.getInstruction();
+    if (LQResourceID && Inst.getDesc().MayLoad) {
+      assert(Usage[LQResourceID].SlotsInUse);
+      Usage[LQResourceID].SlotsInUse--;
+    }
+    if (SQResourceID && Inst.getDesc().MayStore) {
+      assert(Usage[SQResourceID].SlotsInUse);
+      Usage[SQResourceID].SlotsInUse--;
+    }
+  }
+}
+
+void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */,
+                                            ArrayRef<unsigned> Buffers) {
+  for (const unsigned Buffer : Buffers) {
+    if (Buffer == LQResourceID || Buffer == SQResourceID)
+      continue;
+    Usage[Buffer].SlotsInUse++;
+  }
+}
+
+void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */,
+                                            ArrayRef<unsigned> Buffers) {
+  for (const unsigned Buffer : Buffers) {
+    if (Buffer == LQResourceID || Buffer == SQResourceID)
+      continue;
+    Usage[Buffer].SlotsInUse--;
+  }
+}
+
+void SchedulerStatistics::updateHistograms() {
+  for (BufferUsage &BU : Usage) {
+    BU.CumulativeNumUsedSlots += BU.SlotsInUse;
+    BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
+  }
+
+  IssuedPerCycle[NumIssued]++;
+  NumIssued = 0;
+}
+
+void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
+  OS << "\n\nSchedulers - "
+     << "number of cycles where we saw N instructions issued:\n";
+  OS << "[# issued], [# cycles]\n";
+
+  const auto It =
+      std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
+  unsigned Index = std::distance(IssuedPerCycle.begin(), It);
+
+  bool HasColors = OS.has_colors();
+  for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
+    unsigned IPC = IssuedPerCycle[I];
+    if (!IPC)
+      continue;
+
+    if (I == Index && HasColors)
+      OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+    OS << " " << I << ",          " << IPC << "  ("
+       << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
+    if (HasColors)
+      OS.resetColor();
+  }
+}
+
+void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
+  assert(NumCycles && "Unexpected number of cycles!");
+
+  OS << "\nScheduler's queue usage:\n";
+  if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) {
+    OS << "No scheduler resources used.\n";
+    return;
+  }
+
+  OS << "[1] Resource name.\n"
+     << "[2] Average number of used buffer entries.\n"
+     << "[3] Maximum number of used buffer entries.\n"
+     << "[4] Total number of buffer entries.\n\n"
+     << " [1]            [2]        [3]        [4]\n";
+
+  formatted_raw_ostream FOS(OS);
+  bool HasColors = FOS.has_colors();
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+    if (ProcResource.BufferSize <= 0)
+      continue;
+
+    const BufferUsage &BU = Usage[I];
+    double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles;
+    double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5;
+    unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10;
+    unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10;
+
+    FOS << ProcResource.Name;
+    FOS.PadToColumn(17);
+    if (HasColors && NormalizedAvg >= NormalizedThreshold)
+      FOS.changeColor(raw_ostream::YELLOW, true, false);
+    FOS << NormalizedAvg;
+    if (HasColors)
+      FOS.resetColor();
+    FOS.PadToColumn(28);
+    if (HasColors &&
+        BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize))
+      FOS.changeColor(raw_ostream::RED, true, false);
+    FOS << BU.MaxUsedSlots;
+    if (HasColors)
+      FOS.resetColor();
+    FOS.PadToColumn(39);
+    FOS << ProcResource.BufferSize << '\n';
+  }
+
+  FOS.flush();
+}
+
+void SchedulerStatistics::printView(raw_ostream &OS) const {
+  printSchedulerStats(OS);
+  printSchedulerUsage(OS);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.h b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
index 7383c54a1615..d99a395a726d 100644
--- a/contrib/llvm/tools/llvm-mca/SchedulerStatistics.h
+++ b/contrib/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -17,75 +17,78 @@
 ///
 /// Schedulers - number of cycles where we saw N instructions issued:
 /// [# issued], [# cycles]
-///  0,          7  (5.4%)
-///  1,          4  (3.1%)
-///  2,          8  (6.2%)
+///  0,          6  (2.9%)
+///  1,          106  (50.7%)
+///  2,          97  (46.4%)
 ///
 /// Scheduler's queue usage:
-/// JALU01,  0/20
-/// JFPU01,  18/18
-/// JLSAGU,  0/12
+/// [1] Resource name.
+/// [2] Average number of used buffer entries.
+/// [3] Maximum number of used buffer entries.
+/// [4] Total number of buffer entries.
 ///
+///  [1]            [2]        [3]        [4]
+/// JALU01           0          0          20
+/// JFPU01           15         18         18
+/// JLSAGU           0          0          12
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
 #define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
 
-#include "View.h"
+#include "Views/View.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include <map>
 
+namespace llvm {
 namespace mca {
 
-class SchedulerStatistics : public View {
+class SchedulerStatistics final : public View {
   const llvm::MCSchedModel &SM;
-
-  using Histogram = std::map<unsigned, unsigned>;
-  Histogram IssuedPerCycle;
+  unsigned LQResourceID;
+  unsigned SQResourceID;
 
   unsigned NumIssued;
   unsigned NumCycles;
 
+  unsigned MostRecentLoadDispatched;
+  unsigned MostRecentStoreDispatched;
+
   // Tracks the usage of a scheduler's queue.
   struct BufferUsage {
     unsigned SlotsInUse;
     unsigned MaxUsedSlots;
+    uint64_t CumulativeNumUsedSlots;
   };
 
-  std::map<unsigned, BufferUsage> BufferedResources;
-
-  void updateHistograms() {
-    IssuedPerCycle[NumIssued]++;
-    NumIssued = 0;
-  }
+  std::vector<unsigned> IssuedPerCycle;
+  std::vector<BufferUsage> Usage;
 
-  void printSchedulerStatistics(llvm::raw_ostream &OS) const;
+  void updateHistograms();
+  void printSchedulerStats(llvm::raw_ostream &OS) const;
   void printSchedulerUsage(llvm::raw_ostream &OS) const;
 
 public:
-  SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
-      : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {}
-
+  SchedulerStatistics(const llvm::MCSubtargetInfo &STI);
   void onEvent(const HWInstructionEvent &Event) override;
-
   void onCycleBegin() override { NumCycles++; }
-
   void onCycleEnd() override { updateHistograms(); }
 
   // Increases the number of used scheduler queue slots of every buffered
   // resource in the Buffers set.
-  void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
+  void onReservedBuffers(const InstRef &IR,
+                         llvm::ArrayRef<unsigned> Buffers) override;
 
   // Decreases by one the number of used scheduler queue slots of every
   // buffered resource in the Buffers set.
-  void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
+  void onReleasedBuffers(const InstRef &IR,
+                         llvm::ArrayRef<unsigned> Buffers) override;
 
-  void printView(llvm::raw_ostream &OS) const override {
-    printSchedulerStatistics(OS);
-    printSchedulerUsage(OS);
-  }
+  void printView(llvm::raw_ostream &OS) const override;
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/SummaryView.cpp b/contrib/llvm/tools/llvm-mca/Views/SummaryView.cpp
index 01399055c4fd..d8ac709e784d 100644
--- a/contrib/llvm/tools/llvm-mca/SummaryView.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -13,32 +13,33 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "SummaryView.h"
-#include "Support.h"
+#include "Views/SummaryView.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Support.h"
 #include "llvm/Support/Format.h"
 
+namespace llvm {
 namespace mca {
 
 #define DEBUG_TYPE "llvm-mca"
 
-using namespace llvm;
-
-SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
                          unsigned Width)
-    : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
-      NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
-      ProcResourceMasks(Model.getNumProcResourceKinds(), 0) {
+    : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
+      TotalCycles(0), NumMicroOps(0),
+      ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+      ProcResourceMasks(Model.getNumProcResourceKinds()) {
   computeProcResourceMasks(SM, ProcResourceMasks);
 }
 
 void SummaryView::onEvent(const HWInstructionEvent &Event) {
-  // We are only interested in the "instruction dispatched" events generated by
-  // the dispatch stage for instructions that are part of iteration #0.
-  if (Event.Type != HWInstructionEvent::Dispatched)
-    return;
+  if (Event.Type == HWInstructionEvent::Dispatched)
+    LastInstructionIdx = Event.IR.getSourceIndex();
 
-  if (Event.IR.getSourceIndex() >= Source.size())
+  // We are only interested in the "instruction retired" events generated by
+  // the retire stage for instructions that are part of iteration #0.
+  if (Event.Type != HWInstructionEvent::Retired ||
+      Event.IR.getSourceIndex() >= Source.size())
     return;
 
   // Update the cumulative number of resource cycles based on the processor
@@ -60,10 +61,12 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
 }
 
 void SummaryView::printView(raw_ostream &OS) const {
-  unsigned Iterations = Source.getNumIterations();
   unsigned Instructions = Source.size();
+  unsigned Iterations = (LastInstructionIdx / Instructions) + 1;
   unsigned TotalInstructions = Instructions * Iterations;
+  unsigned TotalUOps = NumMicroOps * Iterations;
   double IPC = (double)TotalInstructions / TotalCycles;
+  double UOpsPerCycle = (double)TotalUOps / TotalCycles;
   double BlockRThroughput = computeBlockRThroughput(
       SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
 
@@ -72,10 +75,12 @@ void SummaryView::printView(raw_ostream &OS) const {
   TempStream << "Iterations:        " << Iterations;
   TempStream << "\nInstructions:      " << TotalInstructions;
   TempStream << "\nTotal Cycles:      " << TotalCycles;
+  TempStream << "\nTotal uOps:        " << TotalUOps << '\n';
   TempStream << "\nDispatch Width:    " << DispatchWidth;
-  TempStream << "\nIPC:               " << format("%.2f", IPC);
-
-  // Round to the block reciprocal throughput to the nearest tenth.
+  TempStream << "\nuOps Per Cycle:    "
+             << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100);
+  TempStream << "\nIPC:               "
+             << format("%.2f", floor((IPC * 100) + 0.5) / 100);
   TempStream << "\nBlock RThroughput: "
              << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10)
              << '\n';
@@ -83,3 +88,4 @@ void SummaryView::printView(raw_ostream &OS) const {
   OS << Buffer;
 }
 } // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/SummaryView.h b/contrib/llvm/tools/llvm-mca/Views/SummaryView.h
index b799ce3aa747..f59fd4233fbe 100644
--- a/contrib/llvm/tools/llvm-mca/SummaryView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -29,19 +29,20 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
 #define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
 
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/Support/raw_ostream.h"
 
+namespace llvm {
 namespace mca {
 
 /// A view that collects and prints a few performance numbers.
 class SummaryView : public View {
   const llvm::MCSchedModel &SM;
-  const SourceMgr &Source;
+  llvm::ArrayRef<llvm::MCInst> Source;
   const unsigned DispatchWidth;
+  unsigned LastInstructionIdx;
   unsigned TotalCycles;
   // The total number of micro opcodes contributed by a block of instructions.
   unsigned NumMicroOps;
@@ -62,15 +63,15 @@ class SummaryView : public View {
   double getBlockRThroughput() const;
 
 public:
-  SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+  SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
               unsigned Width);
 
   void onCycleEnd() override { ++TotalCycles; }
-
   void onEvent(const HWInstructionEvent &Event) override;
 
   void printView(llvm::raw_ostream &OS) const override;
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp b/contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp
new file mode 100644
index 000000000000..7d55bbc99c73
--- /dev/null
+++ b/contrib/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -0,0 +1,294 @@
+//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements the TimelineView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/TimelineView.h"
+
+namespace llvm {
+namespace mca {
+
+TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
+                           llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+                           unsigned Cycles)
+    : STI(sti), MCIP(Printer), Source(S), CurrentCycle(0),
+      MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()),
+      UsedBuffer(S.size()) {
+  unsigned NumInstructions = Source.size();
+  assert(Iterations && "Invalid number of iterations specified!");
+  NumInstructions *= Iterations;
+  Timeline.resize(NumInstructions);
+  TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
+  std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
+
+  WaitTimeEntry NullWTEntry = {0, 0, 0};
+  std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
+
+  std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
+                                                  /* unknown buffer size */ -1};
+  std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
+}
+
+void TimelineView::onReservedBuffers(const InstRef &IR,
+                                     ArrayRef<unsigned> Buffers) {
+  if (IR.getSourceIndex() >= Source.size())
+    return;
+
+  const MCSchedModel &SM = STI.getSchedModel();
+  std::pair<unsigned, int> BufferInfo = {0, -1};
+  for (const unsigned Buffer : Buffers) {
+    const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
+    if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
+      BufferInfo.first = Buffer;
+      BufferInfo.second = MCDesc.BufferSize;
+    }
+  }
+
+  UsedBuffer[IR.getSourceIndex()] = BufferInfo;
+}
+
+void TimelineView::onEvent(const HWInstructionEvent &Event) {
+  const unsigned Index = Event.IR.getSourceIndex();
+  if (Index >= Timeline.size())
+    return;
+
+  switch (Event.Type) {
+  case HWInstructionEvent::Retired: {
+    TimelineViewEntry &TVEntry = Timeline[Index];
+    if (CurrentCycle < MaxCycle)
+      TVEntry.CycleRetired = CurrentCycle;
+
+    // Update the WaitTime entry which corresponds to this Index.
+    assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
+    unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
+    WaitTimeEntry &WTEntry = WaitTime[Index % Source.size()];
+    WTEntry.CyclesSpentInSchedulerQueue +=
+        TVEntry.CycleIssued - CycleDispatched;
+    assert(CycleDispatched <= TVEntry.CycleReady &&
+           "Instruction cannot be ready if it hasn't been dispatched yet!");
+    WTEntry.CyclesSpentInSQWhileReady +=
+        TVEntry.CycleIssued - TVEntry.CycleReady;
+    WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
+        (CurrentCycle - 1) - TVEntry.CycleExecuted;
+    break;
+  }
+  case HWInstructionEvent::Ready:
+    Timeline[Index].CycleReady = CurrentCycle;
+    break;
+  case HWInstructionEvent::Issued:
+    Timeline[Index].CycleIssued = CurrentCycle;
+    break;
+  case HWInstructionEvent::Executed:
+    Timeline[Index].CycleExecuted = CurrentCycle;
+    break;
+  case HWInstructionEvent::Dispatched:
+    // There may be multiple dispatch events. Microcoded instructions that are
+    // expanded into multiple uOps may require multiple dispatch cycles. Here,
+    // we want to capture the first dispatch cycle.
+    if (Timeline[Index].CycleDispatched == -1)
+      Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
+    break;
+  default:
+    return;
+  }
+  if (CurrentCycle < MaxCycle)
+    LastCycle = std::max(LastCycle, CurrentCycle);
+}
+
+static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
+                                       unsigned Executions, int BufferSize) {
+  if (CumulativeCycles && BufferSize < 0)
+    return raw_ostream::MAGENTA;
+  unsigned Size = static_cast<unsigned>(BufferSize);
+  if (CumulativeCycles >= Size * Executions)
+    return raw_ostream::RED;
+  if ((CumulativeCycles * 2) >= Size * Executions)
+    return raw_ostream::YELLOW;
+  return raw_ostream::SAVEDCOLOR;
+}
+
+static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
+                           unsigned Executions, int BufferSize) {
+  if (!OS.has_colors())
+    return;
+
+  raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
+  if (Color == raw_ostream::SAVEDCOLOR) {
+    OS.resetColor();
+    return;
+  }
+  OS.changeColor(Color, /* bold */ true, /* BG */ false);
+}
+
+void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
+                                      const WaitTimeEntry &Entry,
+                                      unsigned SourceIndex,
+                                      unsigned Executions) const {
+  OS << SourceIndex << '.';
+  OS.PadToColumn(7);
+
+  double AverageTime1, AverageTime2, AverageTime3;
+  AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
+  AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
+  AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
+
+  OS << Executions;
+  OS.PadToColumn(13);
+  int BufferSize = UsedBuffer[SourceIndex].second;
+  tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
+  OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+  OS.PadToColumn(20);
+  tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
+  OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+  OS.PadToColumn(27);
+  tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
+                 STI.getSchedModel().MicroOpBufferSize);
+  OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+
+  if (OS.has_colors())
+    OS.resetColor();
+  OS.PadToColumn(34);
+}
+
+void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
+  std::string Header =
+      "\n\nAverage Wait times (based on the timeline view):\n"
+      "[0]: Executions\n"
+      "[1]: Average time spent waiting in a scheduler's queue\n"
+      "[2]: Average time spent waiting in a scheduler's queue while ready\n"
+      "[3]: Average time elapsed from WB until retire stage\n\n"
+      "      [0]    [1]    [2]    [3]\n";
+  OS << Header;
+
+  // Use a different string stream for printing instructions.
+  std::string Instruction;
+  raw_string_ostream InstrStream(Instruction);
+
+  formatted_raw_ostream FOS(OS);
+  unsigned Executions = Timeline.size() / Source.size();
+  unsigned IID = 0;
+  for (const MCInst &Inst : Source) {
+    printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
+    // Append the instruction info at the end of the line.
+    MCIP.printInst(&Inst, InstrStream, "", STI);
+    InstrStream.flush();
+
+    // Consume any tabs or spaces at the beginning of the string.
+    StringRef Str(Instruction);
+    Str = Str.ltrim();
+    FOS << "   " << Str << '\n';
+    FOS.flush();
+    Instruction = "";
+
+    ++IID;
+  }
+}
+
+void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
+                                          const TimelineViewEntry &Entry,
+                                          unsigned Iteration,
+                                          unsigned SourceIndex) const {
+  if (Iteration == 0 && SourceIndex == 0)
+    OS << '\n';
+  OS << '[' << Iteration << ',' << SourceIndex << ']';
+  OS.PadToColumn(10);
+  assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
+  unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
+  for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
+    OS << ((I % 5 == 0) ? '.' : ' ');
+  OS << TimelineView::DisplayChar::Dispatched;
+  if (CycleDispatched != Entry.CycleExecuted) {
+    // Zero latency instructions have the same value for CycleDispatched,
+    // CycleIssued and CycleExecuted.
+    for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
+      OS << TimelineView::DisplayChar::Waiting;
+    if (Entry.CycleIssued == Entry.CycleExecuted)
+      OS << TimelineView::DisplayChar::DisplayChar::Executed;
+    else {
+      if (CycleDispatched != Entry.CycleIssued)
+        OS << TimelineView::DisplayChar::Executing;
+      for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
+           ++I)
+        OS << TimelineView::DisplayChar::Executing;
+      OS << TimelineView::DisplayChar::Executed;
+    }
+  }
+
+  for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
+    OS << TimelineView::DisplayChar::RetireLag;
+  OS << TimelineView::DisplayChar::Retired;
+
+  // Skip other columns.
+  for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
+    OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
+}
+
+static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
+  OS << "\n\nTimeline view:\n";
+  if (Cycles >= 10) {
+    OS.PadToColumn(10);
+    for (unsigned I = 0; I <= Cycles; ++I) {
+      if (((I / 10) & 1) == 0)
+        OS << ' ';
+      else
+        OS << I % 10;
+    }
+    OS << '\n';
+  }
+
+  OS << "Index";
+  OS.PadToColumn(10);
+  for (unsigned I = 0; I <= Cycles; ++I) {
+    if (((I / 10) & 1) == 0)
+      OS << I % 10;
+    else
+      OS << ' ';
+  }
+  OS << '\n';
+}
+
+void TimelineView::printTimeline(raw_ostream &OS) const {
+  formatted_raw_ostream FOS(OS);
+  printTimelineHeader(FOS, LastCycle);
+  FOS.flush();
+
+  // Use a different string stream for the instruction.
+  std::string Instruction;
+  raw_string_ostream InstrStream(Instruction);
+
+  unsigned IID = 0;
+  const unsigned Iterations = Timeline.size() / Source.size();
+  for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
+    for (const MCInst &Inst : Source) {
+      const TimelineViewEntry &Entry = Timeline[IID];
+      if (Entry.CycleRetired == 0)
+        return;
+
+      unsigned SourceIndex = IID % Source.size();
+      printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
+      // Append the instruction info at the end of the line.
+      MCIP.printInst(&Inst, InstrStream, "", STI);
+      InstrStream.flush();
+
+      // Consume any tabs or spaces at the beginning of the string.
+      StringRef Str(Instruction);
+      Str = Str.ltrim();
+      FOS << "   " << Str << '\n';
+      FOS.flush();
+      Instruction = "";
+
+      ++IID;
+    }
+  }
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/TimelineView.h b/contrib/llvm/tools/llvm-mca/Views/TimelineView.h
index e53c23ec1cc2..ee981800161c 100644
--- a/contrib/llvm/tools/llvm-mca/TimelineView.h
+++ b/contrib/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -100,14 +100,15 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
 #define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
 
-#include "SourceMgr.h"
-#include "View.h"
+#include "Views/View.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
-#include <map>
 
+namespace llvm {
 namespace mca {
 
 /// This class listens to instruction state transition events
@@ -120,14 +121,14 @@ namespace mca {
 class TimelineView : public View {
   const llvm::MCSubtargetInfo &STI;
   llvm::MCInstPrinter &MCIP;
-  const SourceMgr &AsmSequence;
+  llvm::ArrayRef<llvm::MCInst> Source;
 
   unsigned CurrentCycle;
   unsigned MaxCycle;
   unsigned LastCycle;
 
   struct TimelineViewEntry {
-    unsigned CycleDispatched;
+    int CycleDispatched;  // A negative value is an "invalid cycle".
     unsigned CycleReady;
     unsigned CycleIssued;
     unsigned CycleExecuted;
@@ -136,22 +137,22 @@ class TimelineView : public View {
   std::vector<TimelineViewEntry> Timeline;
 
   struct WaitTimeEntry {
-    unsigned Executions;
     unsigned CyclesSpentInSchedulerQueue;
     unsigned CyclesSpentInSQWhileReady;
     unsigned CyclesSpentAfterWBAndBeforeRetire;
   };
   std::vector<WaitTimeEntry> WaitTime;
 
+  // This field is used to map instructions to buffered resources.
+  // Elements of this vector are <resourceID, BufferSizer> pairs.
+  std::vector<std::pair<unsigned, int>> UsedBuffer;
+
   void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
                               const TimelineViewEntry &E, unsigned Iteration,
                               unsigned SourceIndex) const;
   void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
-                          const WaitTimeEntry &E, unsigned Index) const;
-
-  const unsigned DEFAULT_ITERATIONS = 10;
-
-  void initialize(unsigned MaxIterations);
+                          const WaitTimeEntry &E, unsigned Index,
+                          unsigned Executions) const;
 
   // Display characters for the TimelineView report output.
   struct DisplayChar {
@@ -165,16 +166,14 @@ class TimelineView : public View {
 
 public:
   TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
-               const SourceMgr &Sequence, unsigned MaxIterations,
-               unsigned Cycles)
-      : STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0),
-        MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) {
-    initialize(MaxIterations);
-  }
+               llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+               unsigned Cycles);
 
   // Event handlers.
   void onCycleEnd() override { ++CurrentCycle; }
   void onEvent(const HWInstructionEvent &Event) override;
+  void onReservedBuffers(const InstRef &IR,
+                         llvm::ArrayRef<unsigned> Buffers) override;
 
   // print functionalities.
   void printTimeline(llvm::raw_ostream &OS) const;
@@ -185,5 +184,6 @@ public:
   }
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/View.cpp b/contrib/llvm/tools/llvm-mca/Views/View.cpp
index 390a7aeb3b9d..6cfb9dd9f394 100644
--- a/contrib/llvm/tools/llvm-mca/View.cpp
+++ b/contrib/llvm/tools/llvm-mca/Views/View.cpp
@@ -12,9 +12,11 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "View.h"
+#include "Views/View.h"
 
+namespace llvm {
 namespace mca {
 
 void View::anchor() {}
 } // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-mca/View.h b/contrib/llvm/tools/llvm-mca/Views/View.h
index 9ba94a5da977..4b82b0da0d27 100644
--- a/contrib/llvm/tools/llvm-mca/View.h
+++ b/contrib/llvm/tools/llvm-mca/Views/View.h
@@ -16,9 +16,10 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H
 #define LLVM_TOOLS_LLVM_MCA_VIEW_H
 
-#include "HWEventListener.h"
+#include "llvm/MCA/HWEventListener.h"
 #include "llvm/Support/raw_ostream.h"
 
+namespace llvm {
 namespace mca {
 
 class View : public HWEventListener {
@@ -28,5 +29,6 @@ public:
   void anchor() override;
 };
 } // namespace mca
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/tools/llvm-mca/llvm-mca.cpp b/contrib/llvm/tools/llvm-mca/llvm-mca.cpp
index 897ff232a36d..68d63db599d7 100644
--- a/contrib/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/contrib/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -22,26 +22,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeRegion.h"
-#include "Context.h"
-#include "DispatchStatistics.h"
-#include "FetchStage.h"
-#include "InstructionInfoView.h"
-#include "InstructionTables.h"
-#include "Pipeline.h"
+#include "CodeRegionGenerator.h"
 #include "PipelinePrinter.h"
-#include "RegisterFileStatistics.h"
-#include "ResourcePressureView.h"
-#include "RetireControlUnitStatistics.h"
-#include "SchedulerStatistics.h"
-#include "SummaryView.h"
-#include "TimelineView.h"
+#include "Views/DispatchStatistics.h"
+#include "Views/InstructionInfoView.h"
+#include "Views/RegisterFileStatistics.h"
+#include "Views/ResourcePressureView.h"
+#include "Views/RetireControlUnitStatistics.h"
+#include "Views/SchedulerStatistics.h"
+#include "Views/SummaryView.h"
+#include "Views/TimelineView.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MCA/Context.h"
+#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/Stages/EntryStage.h"
+#include "llvm/MCA/Stages/InstructionTables.h"
+#include "llvm/MCA/Support.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
@@ -67,13 +68,13 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
                                            cl::value_desc("filename"));
 
 static cl::opt<std::string>
-    ArchName("march", cl::desc("Target arch to assemble for, "
-                               "see -version for available targets"),
+    ArchName("march", cl::desc("Target architecture. "
+                               "See -version for available targets"),
              cl::cat(ToolOptions));
 
 static cl::opt<std::string>
-    TripleName("mtriple", cl::desc("Target triple to assemble for, "
-                                   "see -version for available targets"),
+    TripleName("mtriple",
+               cl::desc("Target triple. See -version for available targets"),
                cl::cat(ToolOptions));
 
 static cl::opt<std::string>
@@ -148,15 +149,13 @@ static cl::opt<bool>
                   cl::desc("If set, assume that loads and stores do not alias"),
                   cl::cat(ToolOptions), cl::init(true));
 
-static cl::opt<unsigned>
-    LoadQueueSize("lqueue",
-                  cl::desc("Size of the load queue (unbound by default)"),
-                  cl::cat(ToolOptions), cl::init(0));
+static cl::opt<unsigned> LoadQueueSize("lqueue",
+                                       cl::desc("Size of the load queue"),
+                                       cl::cat(ToolOptions), cl::init(0));
 
-static cl::opt<unsigned>
-    StoreQueueSize("squeue",
-                   cl::desc("Size of the store queue (unbound by default)"),
-                   cl::cat(ToolOptions), cl::init(0));
+static cl::opt<unsigned> StoreQueueSize("squeue",
+                                        cl::desc("Size of the store queue"),
+                                        cl::cat(ToolOptions), cl::init(0));
 
 static cl::opt<bool>
     PrintInstructionTables("instruction-tables",
@@ -180,7 +179,6 @@ static cl::opt<bool>
 namespace {
 
 const Target *getTarget(const char *ProgName) {
-  TripleName = Triple::normalize(TripleName);
   if (TripleName.empty())
     TripleName = Triple::normalize(sys::getDefaultTargetTriple());
   Triple TheTriple(TripleName);
@@ -198,59 +196,6 @@ const Target *getTarget(const char *ProgName) {
   return TheTarget;
 }
 
-// A comment consumer that parses strings.
-// The only valid tokens are strings.
-class MCACommentConsumer : public AsmCommentConsumer {
-public:
-  mca::CodeRegions &Regions;
-
-  MCACommentConsumer(mca::CodeRegions &R) : Regions(R) {}
-  void HandleComment(SMLoc Loc, StringRef CommentText) override {
-    // Skip empty comments.
-    StringRef Comment(CommentText);
-    if (Comment.empty())
-      return;
-
-    // Skip spaces and tabs
-    unsigned Position = Comment.find_first_not_of(" \t");
-    if (Position >= Comment.size())
-      // we reached the end of the comment. Bail out.
-      return;
-
-    Comment = Comment.drop_front(Position);
-    if (Comment.consume_front("LLVM-MCA-END")) {
-      Regions.endRegion(Loc);
-      return;
-    }
-
-    // Now try to parse string LLVM-MCA-BEGIN
-    if (!Comment.consume_front("LLVM-MCA-BEGIN"))
-      return;
-
-    // Skip spaces and tabs
-    Position = Comment.find_first_not_of(" \t");
-    if (Position < Comment.size())
-      Comment = Comment.drop_front(Position);
-    // Use the rest of the string as a descriptor for this code snippet.
-    Regions.beginRegion(Comment, Loc);
-  }
-};
-
-int AssembleInput(const char *ProgName, MCAsmParser &Parser,
-                  const Target *TheTarget, MCSubtargetInfo &STI,
-                  MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
-  std::unique_ptr<MCTargetAsmParser> TAP(
-      TheTarget->createMCAsmParser(STI, Parser, MCII, MCOptions));
-
-  if (!TAP) {
-    WithColor::error() << "this target does not support assembly parsing.\n";
-    return 1;
-  }
-
-  Parser.setTargetParser(*TAP);
-  return Parser.Run(false);
-}
-
 ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
   if (OutputFilename == "")
     OutputFilename = "-";
@@ -261,40 +206,6 @@ ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
     return std::move(Out);
   return EC;
 }
-
-class MCStreamerWrapper final : public MCStreamer {
-  mca::CodeRegions &Regions;
-
-public:
-  MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
-      : MCStreamer(Context), Regions(R) {}
-
-  // We only want to intercept the emission of new instructions.
-  virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                               bool /* unused */) override {
-    Regions.addInstruction(llvm::make_unique<const MCInst>(Inst));
-  }
-
-  bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
-    return true;
-  }
-
-  void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                        unsigned ByteAlignment) override {}
-  void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
-                    uint64_t Size = 0, unsigned ByteAlignment = 0,
-                    SMLoc Loc = SMLoc()) override {}
-  void EmitGPRel32Value(const MCExpr *Value) override {}
-  void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
-  void EmitCOFFSymbolStorageClass(int StorageClass) override {}
-  void EmitCOFFSymbolType(int Type) override {}
-  void EndCOFFSymbolDef() override {}
-
-  const std::vector<std::unique_ptr<const MCInst>> &
-  GetInstructionSequence(unsigned Index) const {
-    return Regions.getInstructionSequence(Index);
-  }
-};
 } // end of anonymous namespace
 
 static void processOptionImpl(cl::opt<bool> &O, const cl::opt<bool> &Default) {
@@ -318,20 +229,30 @@ static void processViewOptions() {
       EnableAllViews.getPosition() < EnableAllStats.getPosition()
           ? EnableAllStats
           : EnableAllViews;
-  processOptionImpl(PrintSummaryView, Default);
   processOptionImpl(PrintRegisterFileStats, Default);
   processOptionImpl(PrintDispatchStats, Default);
   processOptionImpl(PrintSchedulerStats, Default);
   processOptionImpl(PrintRetireStats, Default);
 }
 
+// Returns true on success.
+static bool runPipeline(mca::Pipeline &P) {
+  // Handle pipeline errors here.
+  Expected<unsigned> Cycles = P.run();
+  if (!Cycles) {
+    WithColor::error() << toString(Cycles.takeError());
+    return false;
+  }
+  return true;
+}
+
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
 
   // Initialize targets and assembly parsers.
-  llvm::InitializeAllTargetInfos();
-  llvm::InitializeAllTargetMCs();
-  llvm::InitializeAllAsmParsers();
+  InitializeAllTargetInfos();
+  InitializeAllTargetMCs();
+  InitializeAllAsmParsers();
 
   // Enable printing of available targets when flag --version is specified.
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
@@ -342,9 +263,6 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv,
                               "llvm machine code performance analyzer.\n");
 
-  MCTargetOptions MCOptions;
-  MCOptions.PreserveAsmComments = false;
-
   // Get the target from the triple. If a triple is not specified, then select
   // the default triple for the host. If the triple doesn't correspond to any
   // registered target, then exit with an error message.
@@ -384,9 +302,6 @@ int main(int argc, char **argv) {
 
   std::unique_ptr<buffer_ostream> BOS;
 
-  mca::CodeRegions Regions(SrcMgr);
-  MCStreamerWrapper Str(Ctx, Regions);
-
   std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
 
   std::unique_ptr<MCInstrAnalysis> MCIA(
@@ -419,14 +334,20 @@ int main(int argc, char **argv) {
     return 1;
   }
 
-  std::unique_ptr<MCAsmParser> P(createMCAsmParser(SrcMgr, Ctx, Str, *MAI));
-  MCAsmLexer &Lexer = P->getLexer();
-  MCACommentConsumer CC(Regions);
-  Lexer.setCommentConsumer(&CC);
-
-  if (AssembleInput(ProgName, *P, TheTarget, *STI, *MCII, MCOptions))
+  // Parse the input and create CodeRegions that llvm-mca can analyze.
+  mca::AsmCodeRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, *MCII);
+  Expected<const mca::CodeRegions &> RegionsOrErr = CRG.parseCodeRegions();
+  if (!RegionsOrErr) {
+    if (auto Err =
+            handleErrors(RegionsOrErr.takeError(), [](const StringError &E) {
+              WithColor::error() << E.getMessage() << '\n';
+            })) {
+      // Default case.
+      WithColor::error() << toString(std::move(Err)) << '\n';
+    }
     return 1;
-
+  }
+  const mca::CodeRegions &Regions = *RegionsOrErr;
   if (Regions.empty()) {
     WithColor::error() << "no assembly instructions found.\n";
     return 1;
@@ -439,7 +360,7 @@ int main(int argc, char **argv) {
     return 1;
   }
 
-  unsigned AssemblerDialect = P->getAssemblerDialect();
+  unsigned AssemblerDialect = CRG.getAssemblerDialect();
   if (OutputAsmVariant >= 0)
     AssemblerDialect = static_cast<unsigned>(OutputAsmVariant);
   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
@@ -452,7 +373,7 @@ int main(int argc, char **argv) {
     return 1;
   }
 
-  std::unique_ptr<llvm::ToolOutputFile> TOF = std::move(*OF);
+  std::unique_ptr<ToolOutputFile> TOF = std::move(*OF);
 
   const MCSchedModel &SM = STI->getSchedModel();
 
@@ -461,7 +382,7 @@ int main(int argc, char **argv) {
     Width = DispatchWidth;
 
   // Create an instruction builder.
-  mca::InstrBuilder IB(*STI, *MCII, *MRI, *MCIA, *IP);
+  mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get());
 
   // Create a context to control ownership of the pipeline hardware.
   mca::Context MCA(*MRI, *STI);
@@ -471,6 +392,7 @@ int main(int argc, char **argv) {
 
   // Number each region in the sequence.
   unsigned RegionIdx = 0;
+
   for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) {
     // Skip empty code regions.
     if (Region->empty())
@@ -486,24 +408,53 @@ int main(int argc, char **argv) {
       TOF->os() << "\n\n";
     }
 
-    mca::SourceMgr S(Region->getInstructions(),
-                     PrintInstructionTables ? 1 : Iterations);
+    // Lower the MCInst sequence into an mca::Instruction sequence.
+    ArrayRef<MCInst> Insts = Region->getInstructions();
+    std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
+    for (const MCInst &MCI : Insts) {
+      Expected<std::unique_ptr<mca::Instruction>> Inst =
+          IB.createInstruction(MCI);
+      if (!Inst) {
+        if (auto NewE = handleErrors(
+                Inst.takeError(),
+                [&IP, &STI](const mca::InstructionError<MCInst> &IE) {
+                  std::string InstructionStr;
+                  raw_string_ostream SS(InstructionStr);
+                  WithColor::error() << IE.Message << '\n';
+                  IP->printInst(&IE.Inst, SS, "", *STI);
+                  SS.flush();
+                  WithColor::note() << "instruction: " << InstructionStr
+                                    << '\n';
+                })) {
+          // Default case.
+          WithColor::error() << toString(std::move(NewE));
+        }
+        return 1;
+      }
+
+      LoweredSequence.emplace_back(std::move(Inst.get()));
+    }
+
+    mca::SourceMgr S(LoweredSequence, PrintInstructionTables ? 1 : Iterations);
 
     if (PrintInstructionTables) {
       //  Create a pipeline, stages, and a printer.
       auto P = llvm::make_unique<mca::Pipeline>();
-      P->appendStage(llvm::make_unique<mca::FetchStage>(IB, S));
-      P->appendStage(llvm::make_unique<mca::InstructionTables>(SM, IB));
+      P->appendStage(llvm::make_unique<mca::EntryStage>(S));
+      P->appendStage(llvm::make_unique<mca::InstructionTables>(SM));
       mca::PipelinePrinter Printer(*P);
 
       // Create the views for this pipeline, execute, and emit a report.
       if (PrintInstructionInfoView) {
-        Printer.addView(
-            llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
+        Printer.addView(llvm::make_unique<mca::InstructionInfoView>(
+            *STI, *MCII, Insts, *IP));
       }
       Printer.addView(
-          llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, S));
-      P->run();
+          llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
+
+      if (!runPipeline(*P))
+        return 1;
+
       Printer.printReport(TOF->os());
       continue;
     }
@@ -513,11 +464,11 @@ int main(int argc, char **argv) {
     mca::PipelinePrinter Printer(*P);
 
     if (PrintSummaryView)
-      Printer.addView(llvm::make_unique<mca::SummaryView>(SM, S, Width));
+      Printer.addView(llvm::make_unique<mca::SummaryView>(SM, Insts, Width));
 
     if (PrintInstructionInfoView)
       Printer.addView(
-          llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, S, *IP));
+          llvm::make_unique<mca::InstructionInfoView>(*STI, *MCII, Insts, *IP));
 
     if (PrintDispatchStats)
       Printer.addView(llvm::make_unique<mca::DispatchStatistics>());
@@ -526,21 +477,26 @@ int main(int argc, char **argv) {
       Printer.addView(llvm::make_unique<mca::SchedulerStatistics>(*STI));
 
     if (PrintRetireStats)
-      Printer.addView(llvm::make_unique<mca::RetireControlUnitStatistics>());
+      Printer.addView(llvm::make_unique<mca::RetireControlUnitStatistics>(SM));
 
     if (PrintRegisterFileStats)
       Printer.addView(llvm::make_unique<mca::RegisterFileStatistics>(*STI));
 
     if (PrintResourcePressureView)
       Printer.addView(
-          llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, S));
+          llvm::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
 
     if (PrintTimelineView) {
+      unsigned TimelineIterations =
+          TimelineMaxIterations ? TimelineMaxIterations : 10;
       Printer.addView(llvm::make_unique<mca::TimelineView>(
-          *STI, *IP, S, TimelineMaxIterations, TimelineMaxCycles));
+          *STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()),
+          TimelineMaxCycles));
     }
 
-    P->run();
+    if (!runPipeline(*P))
+      return 1;
+
     Printer.printReport(TOF->os());
 
     // Clear the InstrBuilder internal state in preparation for another round.
diff --git a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
index 37c1bf85809e..042e284e8369 100644
--- a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
 
@@ -90,6 +91,8 @@ cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"),
                         cl::Grouping);
 cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
                           cl::Grouping);
+cl::alias Portability("portability", cl::desc("Alias for --format=posix"),
+                      cl::aliasopt(POSIXFormat), cl::NotHidden);
 cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
                            cl::Grouping);
 
@@ -183,6 +186,8 @@ cl::opt<bool> DyldInfoOnly("dyldinfo-only",
 cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
                             cl::desc("Disable LLVM bitcode reader"));
 
+cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
+
 bool PrintAddress = true;
 
 bool MultipleFiles = false;
@@ -194,7 +199,7 @@ std::string ToolName;
 
 static void error(Twine Message, Twine Path = Twine()) {
   HadError = true;
-  errs() << ToolName << ": " << Path << ": " << Message << ".\n";
+  WithColor::error(errs(), ToolName) << Path << ": " << Message << ".\n";
 }
 
 static bool error(std::error_code EC, Twine Path = Twine()) {
@@ -207,11 +212,11 @@ static bool error(std::error_code EC, Twine Path = Twine()) {
 
 // This version of error() prints the archive name and member name, for example:
 // "libx.a(foo.o)" after the ToolName before the error message.  It sets
-// HadError but returns allowing the code to move on to other archive members. 
+// HadError but returns allowing the code to move on to other archive members.
 static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
                   StringRef ArchitectureName = StringRef()) {
   HadError = true;
-  errs() << ToolName << ": " << FileName;
+  WithColor::error(errs(), ToolName) << FileName;
 
   Expected<StringRef> NameOrErr = C.getName();
   // TODO: if we have a error getting the name then it would be nice to print
@@ -228,7 +233,7 @@ static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
 
   std::string Buf;
   raw_string_ostream OS(Buf);
-  logAllUnhandledErrors(std::move(E), OS, "");
+  logAllUnhandledErrors(std::move(E), OS);
   OS.flush();
   errs() << " " << Buf << "\n";
 }
@@ -236,18 +241,18 @@ static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
 // This version of error() prints the file name and which architecture slice it
 // is from, for example: "foo.o (for architecture i386)" after the ToolName
 // before the error message.  It sets HadError but returns allowing the code to
-// move on to other architecture slices. 
+// move on to other architecture slices.
 static void error(llvm::Error E, StringRef FileName,
                   StringRef ArchitectureName = StringRef()) {
   HadError = true;
-  errs() << ToolName << ": " << FileName;
+  WithColor::error(errs(), ToolName) << FileName;
 
   if (!ArchitectureName.empty())
     errs() << " (for architecture " << ArchitectureName << ") ";
 
   std::string Buf;
   raw_string_ostream OS(Buf);
-  logAllUnhandledErrors(std::move(E), OS, "");
+  logAllUnhandledErrors(std::move(E), OS);
   OS.flush();
   errs() << " " << Buf << "\n";
 }
@@ -674,7 +679,7 @@ static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
 }
 
 static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
-  if (StripUnderscore && Name.size() > 0 && Name[0] == '_')
+  if (StripUnderscore && !Name.empty() && Name[0] == '_')
     Name = Name.substr(1);
 
   if (!Name.startswith("_Z"))
@@ -709,7 +714,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
 
     if (ReverseSort)
       Cmp = [=](const NMSymbol &A, const NMSymbol &B) { return Cmp(B, A); };
-    llvm::sort(SymbolList.begin(), SymbolList.end(), Cmp);
+    llvm::sort(SymbolList, Cmp);
   }
 
   if (!PrintFileName) {
@@ -757,6 +762,24 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     }
   }
 
+  auto writeFileName = [&](raw_ostream &S) {
+    if (!ArchitectureName.empty())
+      S << "(for architecture " << ArchitectureName << "):";
+    if (OutputFormat == posix && !ArchiveName.empty())
+      S << ArchiveName << "[" << CurrentFilename << "]: ";
+    else {
+      if (!ArchiveName.empty())
+        S << ArchiveName << ":";
+      S << CurrentFilename << ": ";
+    }
+  };
+
+  if (SymbolList.empty()) {
+    if (PrintFileName)
+      writeFileName(errs());
+    errs() << "no symbols\n";
+  }
+
   for (SymbolListT::iterator I = SymbolList.begin(), E = SymbolList.end();
        I != E; ++I) {
     uint32_t SymFlags;
@@ -778,17 +801,8 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
         (!Global && ExternalOnly) || (SizeSort && !PrintAddress) ||
         (Weak && NoWeakSymbols))
       continue;
-    if (PrintFileName) {
-      if (!ArchitectureName.empty())
-        outs() << "(for architecture " << ArchitectureName << "):";
-      if (OutputFormat == posix && !ArchiveName.empty())
-        outs() << ArchiveName << "[" << CurrentFilename << "]: ";
-      else {
-        if (!ArchiveName.empty())
-          outs() << ArchiveName << ":";
-        outs() << CurrentFilename << ": ";
-      }
-    }
+    if (PrintFileName)
+      writeFileName(outs());
     if ((JustSymbolName ||
          (UndefinedOnly && MachO && OutputFormat != darwin)) &&
         OutputFormat != posix) {
@@ -1018,8 +1032,7 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
     StringRef SectionName;
     Obj.getSectionName(Ref, SectionName);
     StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
-    if (Obj.is64Bit() && 
-        Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
+    if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
         SegmentName == "__TEXT_EXEC" && SectionName == "__text")
       return 't';
     if (SegmentName == "__TEXT" && SectionName == "__text")
@@ -1152,7 +1165,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
   // file get the section number for that section in this object file.
   unsigned int Nsect = 0;
   MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
-  if (SegSect.size() != 0 && MachO) {
+  if (!SegSect.empty() && MachO) {
     Nsect = getNsectForSegSect(MachO);
     // If this section is not in the object file no symbols are printed.
     if (Nsect == 0)
@@ -1170,8 +1183,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
       // see if this symbol is a symbol from that section and if not skip it.
       if (Nsect && Nsect != getNsectInMachO(*MachO, Sym))
         continue;
-      NMSymbol S;
-      memset(&S, '\0', sizeof(S));
+      NMSymbol S = {};
       S.Size = 0;
       S.Address = 0;
       if (PrintSize) {
@@ -1265,8 +1277,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
           }
         }
         if (!found) {
-          NMSymbol S;
-          memset(&S, '\0', sizeof(NMSymbol));
+          NMSymbol S = {};
           S.Address = Entry.address() + BaseSegmentAddress;
           S.Size = 0;
           S.TypeChar = '\0';
@@ -1356,8 +1367,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
 
             // Now create the undefined symbol using the referened dynamic
             // library.
-            NMSymbol U;
-            memset(&U, '\0', sizeof(NMSymbol));
+            NMSymbol U = {};
             U.Address = 0;
             U.Size = 0;
             U.TypeChar = 'U';
@@ -1423,8 +1433,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
         }
         if (!found) {
           LastSymbolName = Entry.symbolName();
-          NMSymbol B;
-          memset(&B, '\0', sizeof(NMSymbol));
+          NMSymbol B = {};
           B.Address = 0;
           B.Size = 0;
           B.TypeChar = 'U';
@@ -1483,8 +1492,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
         }
         if (!found) {
           LastSymbolName = Entry.symbolName();
-          NMSymbol L;
-          memset(&L, '\0', sizeof(NMSymbol));
+          NMSymbol L = {};
           L.Name = Entry.symbolName();
           L.Address = 0;
           L.Size = 0;
@@ -1600,7 +1608,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
       uint64_t lc_main_offset = UINT64_MAX;
       for (const auto &Command : MachO->load_commands()) {
         if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
-          // We found a function starts segment, parse the addresses for 
+          // We found a function starts segment, parse the addresses for
           // consumption.
           MachO::linkedit_data_command LLC =
             MachO->getLinkeditDataLoadCommand(Command);
@@ -1622,9 +1630,8 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
         }
         // See this address is not already in the symbol table fake up an
         // nlist for it.
-	if (!found) {
-          NMSymbol F;
-          memset(&F, '\0', sizeof(NMSymbol));
+        if (!found) {
+          NMSymbol F = {};
           F.Name = "<redacted function X>";
           F.Address = FoundFns[f] + BaseSegmentAddress;
           F.Size = 0;
@@ -1744,12 +1751,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
         outs() << "Archive map\n";
         for (; I != E; ++I) {
           Expected<Archive::Child> C = I->getMember();
-          if (!C)
+          if (!C) {
             error(C.takeError(), Filename);
+            break;
+          }
           Expected<StringRef> FileNameOrErr = C->getName();
           if (!FileNameOrErr) {
             error(FileNameOrErr.takeError(), Filename);
-            return;
+            break;
           }
           StringRef SymName = I->getName();
           outs() << SymName << " in " << FileNameOrErr.get() << "\n";
@@ -1769,8 +1778,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
         }
         if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
           if (!MachOPrintSizeWarning && PrintSize &&  isa<MachOObjectFile>(O)) {
-            errs() << ToolName << ": warning sizes with -print-size for Mach-O "
-                      "files are always zero.\n";
+            WithColor::warning(errs(), ToolName)
+                << "sizes with -print-size for Mach-O files are always zero.\n";
             MachOPrintSizeWarning = true;
           }
           if (!checkMachOAndArchFlags(O, Filename))
@@ -1793,7 +1802,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
   }
   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
     // If we have a list of architecture flags specified dump only those.
-    if (!ArchAll && ArchFlags.size() != 0) {
+    if (!ArchAll && !ArchFlags.empty()) {
       // Look for a slice in the universal binary that matches each ArchFlag.
       bool ArchFound;
       for (unsigned i = 0; i < ArchFlags.size(); ++i) {
@@ -1882,14 +1891,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
     // No architecture flags were specified so if this contains a slice that
     // matches the host architecture dump only that.
     if (!ArchAll) {
-      StringRef HostArchName = MachOObjectFile::getHostArch().getArchName();
+      Triple HostTriple = MachOObjectFile::getHostArch();
+      StringRef HostArchName = HostTriple.getArchName();
       for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
                                                  E = UB->end_objects();
            I != E; ++I) {
         if (HostArchName == I->getArchFlagName()) {
           Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
           std::string ArchiveName;
-          ArchiveName.clear();
           if (ObjOrErr) {
             ObjectFile &Obj = *ObjOrErr.get();
             dumpSymbolNamesFromObject(Obj, false);
@@ -2011,8 +2020,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
   }
   if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
     if (!MachOPrintSizeWarning && PrintSize &&  isa<MachOObjectFile>(O)) {
-      errs() << ToolName << ": warning sizes with -print-size for Mach-O files "
-                "are always zero.\n";
+      WithColor::warning(errs(), ToolName)
+          << "sizes with -print-size for Mach-O files are always zero.\n";
       MachOPrintSizeWarning = true;
     }
     if (!checkMachOAndArchFlags(O, Filename))
@@ -2064,7 +2073,7 @@ int main(int argc, char **argv) {
     }
   }
 
-  if (SegSect.size() != 0 && SegSect.size() != 2)
+  if (!SegSect.empty() && SegSect.size() != 2)
     error("bad number of arguments (must be two arguments)",
           "for the -s option");
 
diff --git a/contrib/llvm/tools/llvm-objcopy/Buffer.cpp b/contrib/llvm/tools/llvm-objcopy/Buffer.cpp
new file mode 100644
index 000000000000..8044b023aaad
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/Buffer.cpp
@@ -0,0 +1,51 @@
+//===- Buffer.cpp ---------------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Buffer.h"
+#include "llvm-objcopy.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+
+Buffer::~Buffer() {}
+
+void FileBuffer::allocate(size_t Size) {
+  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+      FileOutputBuffer::create(getName(), Size, FileOutputBuffer::F_executable);
+  handleAllErrors(BufferOrErr.takeError(), [this](const ErrorInfoBase &E) {
+    error("failed to open " + getName() + ": " + E.message());
+  });
+  Buf = std::move(*BufferOrErr);
+}
+
+Error FileBuffer::commit() { return Buf->commit(); }
+
+uint8_t *FileBuffer::getBufferStart() {
+  return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+}
+
+void MemBuffer::allocate(size_t Size) {
+  Buf = WritableMemoryBuffer::getNewMemBuffer(Size, getName());
+}
+
+Error MemBuffer::commit() { return Error::success(); }
+
+uint8_t *MemBuffer::getBufferStart() {
+  return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+}
+
+std::unique_ptr<WritableMemoryBuffer> MemBuffer::releaseMemoryBuffer() {
+  return std::move(Buf);
+}
+
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/Buffer.h b/contrib/llvm/tools/llvm-objcopy/Buffer.h
new file mode 100644
index 000000000000..e5b9c5b2d22b
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/Buffer.h
@@ -0,0 +1,66 @@
+//===- Buffer.h -------------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_BUFFER_H
+#define LLVM_TOOLS_OBJCOPY_BUFFER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+
+// The class Buffer abstracts out the common interface of FileOutputBuffer and
+// WritableMemoryBuffer so that the hierarchy of Writers depends on this
+// abstract interface and doesn't depend on a particular implementation.
+// TODO: refactor the buffer classes in LLVM to enable us to use them here
+// directly.
+class Buffer {
+  StringRef Name;
+
+public:
+  virtual ~Buffer();
+  virtual void allocate(size_t Size) = 0;
+  virtual uint8_t *getBufferStart() = 0;
+  virtual Error commit() = 0;
+
+  explicit Buffer(StringRef Name) : Name(Name) {}
+  StringRef getName() const { return Name; }
+};
+
+class FileBuffer : public Buffer {
+  std::unique_ptr<FileOutputBuffer> Buf;
+
+public:
+  void allocate(size_t Size) override;
+  uint8_t *getBufferStart() override;
+  Error commit() override;
+
+  explicit FileBuffer(StringRef FileName) : Buffer(FileName) {}
+};
+
+class MemBuffer : public Buffer {
+  std::unique_ptr<WritableMemoryBuffer> Buf;
+
+public:
+  void allocate(size_t Size) override;
+  uint8_t *getBufferStart() override;
+  Error commit() override;
+
+  explicit MemBuffer(StringRef Name) : Buffer(Name) {}
+
+  std::unique_ptr<WritableMemoryBuffer> releaseMemoryBuffer();
+};
+
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_BUFFER_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
new file mode 100644
index 000000000000..6b386d29979c
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -0,0 +1,98 @@
+//===- COFFObjcopy.cpp ----------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "COFFObjcopy.h"
+#include "Buffer.h"
+#include "CopyConfig.h"
+#include "Object.h"
+#include "Reader.h"
+#include "Writer.h"
+#include "llvm-objcopy.h"
+
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Errc.h"
+#include <cassert>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+using namespace COFF;
+
+static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+  // StripAll removes all symbols and thus also removes all relocations.
+  if (Config.StripAll || Config.StripAllGNU)
+    for (Section &Sec : Obj.Sections)
+      Sec.Relocs.clear();
+
+  // If we need to do per-symbol removals, initialize the Referenced field.
+  if (Config.StripUnneeded || Config.DiscardAll ||
+      !Config.SymbolsToRemove.empty())
+    if (Error E = Obj.markSymbols())
+      return E;
+
+  // Actually do removals of symbols.
+  Obj.removeSymbols([&](const Symbol &Sym) {
+    // For StripAll, all relocations have been stripped and we remove all
+    // symbols.
+    if (Config.StripAll || Config.StripAllGNU)
+      return true;
+
+    if (is_contained(Config.SymbolsToRemove, Sym.Name)) {
+      // Explicitly removing a referenced symbol is an error.
+      if (Sym.Referenced)
+        reportError(Config.OutputFilename,
+                    make_error<StringError>(
+                        "not stripping symbol '" + Sym.Name +
+                            "' because it is named in a relocation.",
+                        llvm::errc::invalid_argument));
+      return true;
+    }
+
+    if (!Sym.Referenced) {
+      // With --strip-unneeded, GNU objcopy removes all unreferenced local
+      // symbols, and any unreferenced undefined external.
+      if (Config.StripUnneeded &&
+          (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
+           Sym.Sym.SectionNumber == 0))
+        return true;
+
+      // GNU objcopy keeps referenced local symbols and external symbols
+      // if --discard-all is set, similar to what --strip-unneeded does,
+      // but undefined local symbols are kept when --discard-all is set.
+      if (Config.DiscardAll && Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+          Sym.Sym.SectionNumber != 0)
+        return true;
+    }
+
+    return false;
+  });
+  return Error::success();
+}
+
+void executeObjcopyOnBinary(const CopyConfig &Config,
+                            object::COFFObjectFile &In, Buffer &Out) {
+  COFFReader Reader(In);
+  Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create();
+  if (!ObjOrErr)
+    reportError(Config.InputFilename, ObjOrErr.takeError());
+  Object *Obj = ObjOrErr->get();
+  assert(Obj && "Unable to deserialize COFF object");
+  if (Error E = handleArgs(Config, *Obj))
+    reportError(Config.InputFilename, std::move(E));
+  COFFWriter Writer(*Obj, Out);
+  if (Error E = Writer.write())
+    reportError(Config.OutputFilename, std::move(E));
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h
new file mode 100644
index 000000000000..bf70bd9b4d84
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h
@@ -0,0 +1,31 @@
+//===- COFFObjcopy.h --------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
+#define LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
+
+namespace llvm {
+
+namespace object {
+class COFFObjectFile;
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+class Buffer;
+
+namespace coff {
+void executeObjcopyOnBinary(const CopyConfig &Config,
+                            object::COFFObjectFile &In, Buffer &Out);
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp
new file mode 100644
index 000000000000..315d3a778623
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -0,0 +1,70 @@
+//===- Object.cpp ---------------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Object.h"
+#include <algorithm>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+
+void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
+  for (Symbol S : NewSymbols) {
+    S.UniqueId = NextSymbolUniqueId++;
+    Symbols.emplace_back(S);
+  }
+  updateSymbols();
+}
+
+void Object::updateSymbols() {
+  SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
+  size_t RawSymIndex = 0;
+  for (Symbol &Sym : Symbols) {
+    SymbolMap[Sym.UniqueId] = &Sym;
+    Sym.RawIndex = RawSymIndex;
+    RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
+  }
+}
+
+const Symbol *Object::findSymbol(size_t UniqueId) const {
+  auto It = SymbolMap.find(UniqueId);
+  if (It == SymbolMap.end())
+    return nullptr;
+  return It->second;
+}
+
+void Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+  Symbols.erase(
+      std::remove_if(std::begin(Symbols), std::end(Symbols),
+                     [ToRemove](const Symbol &Sym) { return ToRemove(Sym); }),
+      std::end(Symbols));
+  updateSymbols();
+}
+
+Error Object::markSymbols() {
+  for (Symbol &Sym : Symbols)
+    Sym.Referenced = false;
+  for (const Section &Sec : Sections) {
+    for (const Relocation &R : Sec.Relocs) {
+      auto It = SymbolMap.find(R.Target);
+      if (It == SymbolMap.end())
+        return make_error<StringError>("Relocation target " + Twine(R.Target) +
+                                           " not found",
+                                       object_error::invalid_symbol_index);
+      It->second->Referenced = true;
+    }
+  }
+  return Error::success();
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Object.h b/contrib/llvm/tools/llvm-objcopy/COFF/Object.h
new file mode 100644
index 000000000000..7531fb4cf39e
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -0,0 +1,148 @@
+//===- Object.h -------------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
+#define LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+struct Relocation {
+  Relocation() {}
+  Relocation(const object::coff_relocation& R) : Reloc(R) {}
+
+  object::coff_relocation Reloc;
+  size_t Target;
+  StringRef TargetName; // Used for diagnostics only
+};
+
+struct Section {
+  object::coff_section Header;
+  ArrayRef<uint8_t> Contents;
+  std::vector<Relocation> Relocs;
+  StringRef Name;
+};
+
+struct Symbol {
+  object::coff_symbol32 Sym;
+  StringRef Name;
+  ArrayRef<uint8_t> AuxData;
+  size_t UniqueId;
+  size_t RawIndex;
+  bool Referenced;
+};
+
+struct Object {
+  bool IsPE = false;
+
+  object::dos_header DosHeader;
+  ArrayRef<uint8_t> DosStub;
+
+  object::coff_file_header CoffFileHeader;
+
+  bool Is64 = false;
+  object::pe32plus_header PeHeader;
+  uint32_t BaseOfData = 0; // pe32plus_header lacks this field.
+
+  std::vector<object::data_directory> DataDirectories;
+  std::vector<Section> Sections;
+
+  ArrayRef<Symbol> getSymbols() const { return Symbols; }
+  // This allows mutating individual Symbols, but not mutating the list
+  // of symbols itself.
+  iterator_range<std::vector<Symbol>::iterator> getMutableSymbols() {
+    return make_range(Symbols.begin(), Symbols.end());
+  }
+
+  const Symbol *findSymbol(size_t UniqueId) const;
+
+  void addSymbols(ArrayRef<Symbol> NewSymbols);
+  void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+
+  // Set the Referenced field on all Symbols, based on relocations in
+  // all sections.
+  Error markSymbols();
+
+private:
+  std::vector<Symbol> Symbols;
+  DenseMap<size_t, Symbol *> SymbolMap;
+
+  size_t NextSymbolUniqueId = 0;
+
+  // Update SymbolMap and RawIndex in each Symbol.
+  void updateSymbols();
+};
+
+// Copy between coff_symbol16 and coff_symbol32.
+// The source and destination files can use either coff_symbol16 or
+// coff_symbol32, while we always store them as coff_symbol32 in the
+// intermediate data structure.
+template <class Symbol1Ty, class Symbol2Ty>
+void copySymbol(Symbol1Ty &Dest, const Symbol2Ty &Src) {
+  static_assert(sizeof(Dest.Name.ShortName) == sizeof(Src.Name.ShortName),
+                "Mismatched name sizes");
+  memcpy(Dest.Name.ShortName, Src.Name.ShortName, sizeof(Dest.Name.ShortName));
+  Dest.Value = Src.Value;
+  Dest.SectionNumber = Src.SectionNumber;
+  Dest.Type = Src.Type;
+  Dest.StorageClass = Src.StorageClass;
+  Dest.NumberOfAuxSymbols = Src.NumberOfAuxSymbols;
+}
+
+// Copy between pe32_header and pe32plus_header.
+// We store the intermediate state in a pe32plus_header.
+template <class PeHeader1Ty, class PeHeader2Ty>
+void copyPeHeader(PeHeader1Ty &Dest, const PeHeader2Ty &Src) {
+  Dest.Magic = Src.Magic;
+  Dest.MajorLinkerVersion = Src.MajorLinkerVersion;
+  Dest.MinorLinkerVersion = Src.MinorLinkerVersion;
+  Dest.SizeOfCode = Src.SizeOfCode;
+  Dest.SizeOfInitializedData = Src.SizeOfInitializedData;
+  Dest.SizeOfUninitializedData = Src.SizeOfUninitializedData;
+  Dest.AddressOfEntryPoint = Src.AddressOfEntryPoint;
+  Dest.BaseOfCode = Src.BaseOfCode;
+  Dest.ImageBase = Src.ImageBase;
+  Dest.SectionAlignment = Src.SectionAlignment;
+  Dest.FileAlignment = Src.FileAlignment;
+  Dest.MajorOperatingSystemVersion = Src.MajorOperatingSystemVersion;
+  Dest.MinorOperatingSystemVersion = Src.MinorOperatingSystemVersion;
+  Dest.MajorImageVersion = Src.MajorImageVersion;
+  Dest.MinorImageVersion = Src.MinorImageVersion;
+  Dest.MajorSubsystemVersion = Src.MajorSubsystemVersion;
+  Dest.MinorSubsystemVersion = Src.MinorSubsystemVersion;
+  Dest.Win32VersionValue = Src.Win32VersionValue;
+  Dest.SizeOfImage = Src.SizeOfImage;
+  Dest.SizeOfHeaders = Src.SizeOfHeaders;
+  Dest.CheckSum = Src.CheckSum;
+  Dest.Subsystem = Src.Subsystem;
+  Dest.DLLCharacteristics = Src.DLLCharacteristics;
+  Dest.SizeOfStackReserve = Src.SizeOfStackReserve;
+  Dest.SizeOfStackCommit = Src.SizeOfStackCommit;
+  Dest.SizeOfHeapReserve = Src.SizeOfHeapReserve;
+  Dest.SizeOfHeapCommit = Src.SizeOfHeapCommit;
+  Dest.LoaderFlags = Src.LoaderFlags;
+  Dest.NumberOfRvaAndSize = Src.NumberOfRvaAndSize;
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp
new file mode 100644
index 000000000000..a01768392d7d
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.cpp
@@ -0,0 +1,171 @@
+//===- Reader.cpp ---------------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Reader.h"
+#include "Object.h"
+#include "llvm-objcopy.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+
+Error COFFReader::readExecutableHeaders(Object &Obj) const {
+  const dos_header *DH = COFFObj.getDOSHeader();
+  Obj.Is64 = COFFObj.is64();
+  if (!DH)
+    return Error::success();
+
+  Obj.IsPE = true;
+  Obj.DosHeader = *DH;
+  if (DH->AddressOfNewExeHeader > sizeof(*DH))
+    Obj.DosStub = ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&DH[1]),
+                                    DH->AddressOfNewExeHeader - sizeof(*DH));
+
+  if (COFFObj.is64()) {
+    const pe32plus_header *PE32Plus = nullptr;
+    if (auto EC = COFFObj.getPE32PlusHeader(PE32Plus))
+      return errorCodeToError(EC);
+    Obj.PeHeader = *PE32Plus;
+  } else {
+    const pe32_header *PE32 = nullptr;
+    if (auto EC = COFFObj.getPE32Header(PE32))
+      return errorCodeToError(EC);
+    copyPeHeader(Obj.PeHeader, *PE32);
+    // The pe32plus_header (stored in Object) lacks the BaseOfData field.
+    Obj.BaseOfData = PE32->BaseOfData;
+  }
+
+  for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) {
+    const data_directory *Dir;
+    if (auto EC = COFFObj.getDataDirectory(I, Dir))
+      return errorCodeToError(EC);
+    Obj.DataDirectories.emplace_back(*Dir);
+  }
+  return Error::success();
+}
+
+Error COFFReader::readSections(Object &Obj) const {
+  // Section indexing starts from 1.
+  for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
+    const coff_section *Sec;
+    if (auto EC = COFFObj.getSection(I, Sec))
+      return errorCodeToError(EC);
+    Obj.Sections.push_back(Section());
+    Section &S = Obj.Sections.back();
+    S.Header = *Sec;
+    if (auto EC = COFFObj.getSectionContents(Sec, S.Contents))
+      return errorCodeToError(EC);
+    ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec);
+    for (const coff_relocation &R : Relocs)
+      S.Relocs.push_back(R);
+    if (auto EC = COFFObj.getSectionName(Sec, S.Name))
+      return errorCodeToError(EC);
+    if (Sec->hasExtendedRelocations())
+      return make_error<StringError>("Extended relocations not supported yet",
+                                     object_error::parse_failed);
+  }
+  return Error::success();
+}
+
+Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
+  std::vector<Symbol> Symbols;
+  Symbols.reserve(COFFObj.getRawNumberOfSymbols());
+  for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) {
+    Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
+    if (!SymOrErr)
+      return SymOrErr.takeError();
+    COFFSymbolRef SymRef = *SymOrErr;
+
+    Symbols.push_back(Symbol());
+    Symbol &Sym = Symbols.back();
+    // Copy symbols from the original form into an intermediate coff_symbol32.
+    if (IsBigObj)
+      copySymbol(Sym.Sym,
+                 *reinterpret_cast<const coff_symbol32 *>(SymRef.getRawPtr()));
+    else
+      copySymbol(Sym.Sym,
+                 *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
+    if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
+      return errorCodeToError(EC);
+    Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
+    assert((Sym.AuxData.size() %
+            (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+    I += 1 + SymRef.getNumberOfAuxSymbols();
+  }
+  Obj.addSymbols(Symbols);
+  return Error::success();
+}
+
+Error COFFReader::setRelocTargets(Object &Obj) const {
+  std::vector<const Symbol *> RawSymbolTable;
+  for (const Symbol &Sym : Obj.getSymbols()) {
+    RawSymbolTable.push_back(&Sym);
+    for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
+      RawSymbolTable.push_back(nullptr);
+  }
+  for (Section &Sec : Obj.Sections) {
+    for (Relocation &R : Sec.Relocs) {
+      if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
+        return make_error<StringError>("SymbolTableIndex out of range",
+                                       object_error::parse_failed);
+      const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex];
+      if (Sym == nullptr)
+        return make_error<StringError>("Invalid SymbolTableIndex",
+                                       object_error::parse_failed);
+      R.Target = Sym->UniqueId;
+      R.TargetName = Sym->Name;
+    }
+  }
+  return Error::success();
+}
+
+Expected<std::unique_ptr<Object>> COFFReader::create() const {
+  auto Obj = llvm::make_unique<Object>();
+
+  const coff_file_header *CFH = nullptr;
+  const coff_bigobj_file_header *CBFH = nullptr;
+  COFFObj.getCOFFHeader(CFH);
+  COFFObj.getCOFFBigObjHeader(CBFH);
+  bool IsBigObj = false;
+  if (CFH) {
+    Obj->CoffFileHeader = *CFH;
+  } else {
+    if (!CBFH)
+      return make_error<StringError>("No COFF file header returned",
+                                     object_error::parse_failed);
+    // Only copying the few fields from the bigobj header that we need
+    // and won't recreate in the end.
+    Obj->CoffFileHeader.Machine = CBFH->Machine;
+    Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp;
+    IsBigObj = true;
+  }
+
+  if (Error E = readExecutableHeaders(*Obj))
+    return std::move(E);
+  if (Error E = readSections(*Obj))
+    return std::move(E);
+  if (Error E = readSymbols(*Obj, IsBigObj))
+    return std::move(E);
+  if (Error E = setRelocTargets(*Obj))
+    return std::move(E);
+
+  return std::move(Obj);
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Reader.h b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.h
new file mode 100644
index 000000000000..ca7057d08c9f
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Reader.h
@@ -0,0 +1,43 @@
+//===- Reader.h -------------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFF_READER_H
+#define LLVM_TOOLS_OBJCOPY_COFF_READER_H
+
+#include "Buffer.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+struct Object;
+
+using object::COFFObjectFile;
+
+class COFFReader {
+  const COFFObjectFile &COFFObj;
+
+  Error readExecutableHeaders(Object &Obj) const;
+  Error readSections(Object &Obj) const;
+  Error readSymbols(Object &Obj, bool IsBigObj) const;
+  Error setRelocTargets(Object &Obj) const;
+
+public:
+  explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {}
+  Expected<std::unique_ptr<Object>> create() const;
+};
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFF_READER_H
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp
new file mode 100644
index 000000000000..385d43b1bae5
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -0,0 +1,337 @@
+//===- Writer.cpp ---------------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Writer.h"
+#include "Object.h"
+#include "llvm-objcopy.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+using namespace object;
+using namespace COFF;
+
+Error COFFWriter::finalizeRelocTargets() {
+  for (Section &Sec : Obj.Sections) {
+    for (Relocation &R : Sec.Relocs) {
+      const Symbol *Sym = Obj.findSymbol(R.Target);
+      if (Sym == nullptr)
+        return make_error<StringError>("Relocation target " + R.TargetName +
+                                           " (" + Twine(R.Target) +
+                                           ") not found",
+                                       object_error::invalid_symbol_index);
+      R.Reloc.SymbolTableIndex = Sym->RawIndex;
+    }
+  }
+  return Error::success();
+}
+
+void COFFWriter::layoutSections() {
+  for (auto &S : Obj.Sections) {
+    if (S.Header.SizeOfRawData > 0)
+      S.Header.PointerToRawData = FileSize;
+    FileSize += S.Header.SizeOfRawData; // For executables, this is already
+                                        // aligned to FileAlignment.
+    S.Header.NumberOfRelocations = S.Relocs.size();
+    S.Header.PointerToRelocations =
+        S.Header.NumberOfRelocations > 0 ? FileSize : 0;
+    FileSize += S.Relocs.size() * sizeof(coff_relocation);
+    FileSize = alignTo(FileSize, FileAlignment);
+
+    if (S.Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
+      SizeOfInitializedData += S.Header.SizeOfRawData;
+  }
+}
+
+size_t COFFWriter::finalizeStringTable() {
+  for (auto &S : Obj.Sections)
+    if (S.Name.size() > COFF::NameSize)
+      StrTabBuilder.add(S.Name);
+
+  for (const auto &S : Obj.getSymbols())
+    if (S.Name.size() > COFF::NameSize)
+      StrTabBuilder.add(S.Name);
+
+  StrTabBuilder.finalize();
+
+  for (auto &S : Obj.Sections) {
+    if (S.Name.size() > COFF::NameSize) {
+      snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
+               (int)StrTabBuilder.getOffset(S.Name));
+    } else {
+      strncpy(S.Header.Name, S.Name.data(), COFF::NameSize);
+    }
+  }
+  for (auto &S : Obj.getMutableSymbols()) {
+    if (S.Name.size() > COFF::NameSize) {
+      S.Sym.Name.Offset.Zeroes = 0;
+      S.Sym.Name.Offset.Offset = StrTabBuilder.getOffset(S.Name);
+    } else {
+      strncpy(S.Sym.Name.ShortName, S.Name.data(), COFF::NameSize);
+    }
+  }
+  return StrTabBuilder.getSize();
+}
+
+template <class SymbolTy>
+std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
+  size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
+  for (const auto &S : Obj.getSymbols())
+    SymTabSize += S.AuxData.size();
+  return std::make_pair(SymTabSize, sizeof(SymbolTy));
+}
+
+Error COFFWriter::finalize(bool IsBigObj) {
+  if (Error E = finalizeRelocTargets())
+    return E;
+
+  size_t SizeOfHeaders = 0;
+  FileAlignment = 1;
+  size_t PeHeaderSize = 0;
+  if (Obj.IsPE) {
+    Obj.DosHeader.AddressOfNewExeHeader =
+        sizeof(Obj.DosHeader) + Obj.DosStub.size();
+    SizeOfHeaders += Obj.DosHeader.AddressOfNewExeHeader + sizeof(PEMagic);
+
+    FileAlignment = Obj.PeHeader.FileAlignment;
+    Obj.PeHeader.NumberOfRvaAndSize = Obj.DataDirectories.size();
+
+    PeHeaderSize = Obj.Is64 ? sizeof(pe32plus_header) : sizeof(pe32_header);
+    SizeOfHeaders +=
+        PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
+  }
+  Obj.CoffFileHeader.NumberOfSections = Obj.Sections.size();
+  SizeOfHeaders +=
+      IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header);
+  SizeOfHeaders += sizeof(coff_section) * Obj.Sections.size();
+  SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment);
+
+  Obj.CoffFileHeader.SizeOfOptionalHeader =
+      PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
+
+  FileSize = SizeOfHeaders;
+  SizeOfInitializedData = 0;
+
+  layoutSections();
+
+  if (Obj.IsPE) {
+    Obj.PeHeader.SizeOfHeaders = SizeOfHeaders;
+    Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData;
+
+    if (!Obj.Sections.empty()) {
+      const Section &S = Obj.Sections.back();
+      Obj.PeHeader.SizeOfImage =
+          alignTo(S.Header.VirtualAddress + S.Header.VirtualSize,
+                  Obj.PeHeader.SectionAlignment);
+    }
+
+    // If the PE header had a checksum, clear it, since it isn't valid
+    // any longer. (We don't calculate a new one.)
+    Obj.PeHeader.CheckSum = 0;
+  }
+
+  size_t StrTabSize = finalizeStringTable();
+  size_t SymTabSize, SymbolSize;
+  std::tie(SymTabSize, SymbolSize) = IsBigObj
+                                         ? finalizeSymbolTable<coff_symbol32>()
+                                         : finalizeSymbolTable<coff_symbol16>();
+
+  size_t PointerToSymbolTable = FileSize;
+  // StrTabSize <= 4 is the size of an empty string table, only consisting
+  // of the length field.
+  if (SymTabSize == 0 && StrTabSize <= 4 && Obj.IsPE) {
+    // For executables, don't point to the symbol table and skip writing
+    // the length field, if both the symbol and string tables are empty.
+    PointerToSymbolTable = 0;
+    StrTabSize = 0;
+  }
+
+  size_t NumRawSymbols = SymTabSize / SymbolSize;
+  Obj.CoffFileHeader.PointerToSymbolTable = PointerToSymbolTable;
+  Obj.CoffFileHeader.NumberOfSymbols = NumRawSymbols;
+  FileSize += SymTabSize + StrTabSize;
+  FileSize = alignTo(FileSize, FileAlignment);
+
+  return Error::success();
+}
+
+void COFFWriter::writeHeaders(bool IsBigObj) {
+  uint8_t *Ptr = Buf.getBufferStart();
+  if (Obj.IsPE) {
+    memcpy(Ptr, &Obj.DosHeader, sizeof(Obj.DosHeader));
+    Ptr += sizeof(Obj.DosHeader);
+    memcpy(Ptr, Obj.DosStub.data(), Obj.DosStub.size());
+    Ptr += Obj.DosStub.size();
+    memcpy(Ptr, PEMagic, sizeof(PEMagic));
+    Ptr += sizeof(PEMagic);
+  }
+  if (!IsBigObj) {
+    memcpy(Ptr, &Obj.CoffFileHeader, sizeof(Obj.CoffFileHeader));
+    Ptr += sizeof(Obj.CoffFileHeader);
+  } else {
+    // Generate a coff_bigobj_file_header, filling it in with the values
+    // from Obj.CoffFileHeader. All extra fields that don't exist in
+    // coff_file_header can be set to hardcoded values.
+    coff_bigobj_file_header BigObjHeader;
+    BigObjHeader.Sig1 = IMAGE_FILE_MACHINE_UNKNOWN;
+    BigObjHeader.Sig2 = 0xffff;
+    BigObjHeader.Version = BigObjHeader::MinBigObjectVersion;
+    BigObjHeader.Machine = Obj.CoffFileHeader.Machine;
+    BigObjHeader.TimeDateStamp = Obj.CoffFileHeader.TimeDateStamp;
+    memcpy(BigObjHeader.UUID, BigObjMagic, sizeof(BigObjMagic));
+    BigObjHeader.unused1 = 0;
+    BigObjHeader.unused2 = 0;
+    BigObjHeader.unused3 = 0;
+    BigObjHeader.unused4 = 0;
+    // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus
+    // get the original one instead.
+    BigObjHeader.NumberOfSections = Obj.Sections.size();
+    BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable;
+    BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols;
+
+    memcpy(Ptr, &BigObjHeader, sizeof(BigObjHeader));
+    Ptr += sizeof(BigObjHeader);
+  }
+  if (Obj.IsPE) {
+    if (Obj.Is64) {
+      memcpy(Ptr, &Obj.PeHeader, sizeof(Obj.PeHeader));
+      Ptr += sizeof(Obj.PeHeader);
+    } else {
+      pe32_header PeHeader;
+      copyPeHeader(PeHeader, Obj.PeHeader);
+      // The pe32plus_header (stored in Object) lacks the BaseOfData field.
+      PeHeader.BaseOfData = Obj.BaseOfData;
+
+      memcpy(Ptr, &PeHeader, sizeof(PeHeader));
+      Ptr += sizeof(PeHeader);
+    }
+    for (const auto &DD : Obj.DataDirectories) {
+      memcpy(Ptr, &DD, sizeof(DD));
+      Ptr += sizeof(DD);
+    }
+  }
+  for (const auto &S : Obj.Sections) {
+    memcpy(Ptr, &S.Header, sizeof(S.Header));
+    Ptr += sizeof(S.Header);
+  }
+}
+
+void COFFWriter::writeSections() {
+  for (const auto &S : Obj.Sections) {
+    uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData;
+    std::copy(S.Contents.begin(), S.Contents.end(), Ptr);
+
+    // For executable sections, pad the remainder of the raw data size with
+    // 0xcc, which is int3 on x86.
+    if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) &&
+        S.Header.SizeOfRawData > S.Contents.size())
+      memset(Ptr + S.Contents.size(), 0xcc,
+             S.Header.SizeOfRawData - S.Contents.size());
+
+    Ptr += S.Header.SizeOfRawData;
+    for (const auto &R : S.Relocs) {
+      memcpy(Ptr, &R.Reloc, sizeof(R.Reloc));
+      Ptr += sizeof(R.Reloc);
+    }
+  }
+}
+
+template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
+  uint8_t *Ptr = Buf.getBufferStart() + Obj.CoffFileHeader.PointerToSymbolTable;
+  for (const auto &S : Obj.getSymbols()) {
+    // Convert symbols back to the right size, from coff_symbol32.
+    copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
+                                        S.Sym);
+    Ptr += sizeof(SymbolTy);
+    std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
+    Ptr += S.AuxData.size();
+  }
+  if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
+    // Always write a string table in object files, even an empty one.
+    StrTabBuilder.write(Ptr);
+    Ptr += StrTabBuilder.getSize();
+  }
+}
+
+Error COFFWriter::write(bool IsBigObj) {
+  if (Error E = finalize(IsBigObj))
+    return E;
+
+  Buf.allocate(FileSize);
+
+  writeHeaders(IsBigObj);
+  writeSections();
+  if (IsBigObj)
+    writeSymbolStringTables<coff_symbol32>();
+  else
+    writeSymbolStringTables<coff_symbol16>();
+
+  if (Obj.IsPE)
+    if (Error E = patchDebugDirectory())
+      return E;
+
+  return Buf.commit();
+}
+
+// Locate which sections contain the debug directories, iterate over all
+// the debug_directory structs in there, and set the PointerToRawData field
+// in all of them, according to their new physical location in the file.
+Error COFFWriter::patchDebugDirectory() {
+  if (Obj.DataDirectories.size() < DEBUG_DIRECTORY)
+    return Error::success();
+  const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
+  if (Dir->Size <= 0)
+    return Error::success();
+  for (const auto &S : Obj.Sections) {
+    if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress &&
+        Dir->RelativeVirtualAddress <
+            S.Header.VirtualAddress + S.Header.SizeOfRawData) {
+      if (Dir->RelativeVirtualAddress + Dir->Size >
+          S.Header.VirtualAddress + S.Header.SizeOfRawData)
+        return make_error<StringError>(
+            "Debug directory extends past end of section",
+            object_error::parse_failed);
+
+      size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress;
+      uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData + Offset;
+      uint8_t *End = Ptr + Dir->Size;
+      while (Ptr < End) {
+        debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr);
+        Debug->PointerToRawData =
+            S.Header.PointerToRawData + Offset + sizeof(debug_directory);
+        Ptr += sizeof(debug_directory) + Debug->SizeOfData;
+        Offset += sizeof(debug_directory) + Debug->SizeOfData;
+      }
+      // Debug directory found and patched, all done.
+      return Error::success();
+    }
+  }
+  return make_error<StringError>("Debug directory not found",
+                                 object_error::parse_failed);
+}
+
+Error COFFWriter::write() {
+  bool IsBigObj = Obj.Sections.size() > MaxNumberOfSections16;
+  if (IsBigObj && Obj.IsPE)
+    return make_error<StringError>("Too many sections for executable",
+                                   object_error::parse_failed);
+  return write(IsBigObj);
+}
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/COFF/Writer.h b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.h
new file mode 100644
index 000000000000..ab66e0cc1134
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/COFF/Writer.h
@@ -0,0 +1,61 @@
+//===- Writer.h -------------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
+#define LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
+
+#include "Buffer.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Error.h"
+#include <cstddef>
+#include <utility>
+
+namespace llvm {
+namespace objcopy {
+namespace coff {
+
+struct Object;
+
+class COFFWriter {
+  Object &Obj;
+  Buffer &Buf;
+
+  size_t FileSize;
+  size_t FileAlignment;
+  size_t SizeOfInitializedData;
+  StringTableBuilder StrTabBuilder;
+
+  Error finalizeRelocTargets();
+  void layoutSections();
+  size_t finalizeStringTable();
+  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
+
+  Error finalize(bool IsBigObj);
+
+  void writeHeaders(bool IsBigObj);
+  void writeSections();
+  template <class SymbolTy> void writeSymbolStringTables();
+
+  Error write(bool IsBigObj);
+
+  Error patchDebugDirectory();
+
+public:
+  virtual ~COFFWriter() {}
+  Error write();
+
+  COFFWriter(Object &Obj, Buffer &Buf)
+      : Obj(Obj), Buf(Buf), StrTabBuilder(StringTableBuilder::WinCOFF) {}
+};
+
+} // end namespace coff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
diff --git a/contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp b/contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp
new file mode 100644
index 000000000000..3737f571ae61
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -0,0 +1,474 @@
+//===- CopyConfig.cpp -----------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CopyConfig.h"
+#include "llvm-objcopy.h"
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace objcopy {
+
+namespace {
+enum ObjcopyID {
+  OBJCOPY_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  OBJCOPY_##ID,
+#include "ObjcopyOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const OBJCOPY_##NAME[] = VALUE;
+#include "ObjcopyOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info ObjcopyInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  {OBJCOPY_##PREFIX,                                                           \
+   NAME,                                                                       \
+   HELPTEXT,                                                                   \
+   METAVAR,                                                                    \
+   OBJCOPY_##ID,                                                               \
+   opt::Option::KIND##Class,                                                   \
+   PARAM,                                                                      \
+   FLAGS,                                                                      \
+   OBJCOPY_##GROUP,                                                            \
+   OBJCOPY_##ALIAS,                                                            \
+   ALIASARGS,                                                                  \
+   VALUES},
+#include "ObjcopyOpts.inc"
+#undef OPTION
+};
+
+class ObjcopyOptTable : public opt::OptTable {
+public:
+  ObjcopyOptTable() : OptTable(ObjcopyInfoTable) {}
+};
+
+enum StripID {
+  STRIP_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  STRIP_##ID,
+#include "StripOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const STRIP_##NAME[] = VALUE;
+#include "StripOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info StripInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  {STRIP_##PREFIX, NAME,       HELPTEXT,                                       \
+   METAVAR,        STRIP_##ID, opt::Option::KIND##Class,                       \
+   PARAM,          FLAGS,      STRIP_##GROUP,                                  \
+   STRIP_##ALIAS,  ALIASARGS,  VALUES},
+#include "StripOpts.inc"
+#undef OPTION
+};
+
+class StripOptTable : public opt::OptTable {
+public:
+  StripOptTable() : OptTable(StripInfoTable) {}
+};
+
+enum SectionFlag {
+  SecNone = 0,
+  SecAlloc = 1 << 0,
+  SecLoad = 1 << 1,
+  SecNoload = 1 << 2,
+  SecReadonly = 1 << 3,
+  SecDebug = 1 << 4,
+  SecCode = 1 << 5,
+  SecData = 1 << 6,
+  SecRom = 1 << 7,
+  SecMerge = 1 << 8,
+  SecStrings = 1 << 9,
+  SecContents = 1 << 10,
+  SecShare = 1 << 11,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare)
+};
+
+} // namespace
+
+static SectionFlag parseSectionRenameFlag(StringRef SectionName) {
+  return llvm::StringSwitch<SectionFlag>(SectionName)
+      .Case("alloc", SectionFlag::SecAlloc)
+      .Case("load", SectionFlag::SecLoad)
+      .Case("noload", SectionFlag::SecNoload)
+      .Case("readonly", SectionFlag::SecReadonly)
+      .Case("debug", SectionFlag::SecDebug)
+      .Case("code", SectionFlag::SecCode)
+      .Case("data", SectionFlag::SecData)
+      .Case("rom", SectionFlag::SecRom)
+      .Case("merge", SectionFlag::SecMerge)
+      .Case("strings", SectionFlag::SecStrings)
+      .Case("contents", SectionFlag::SecContents)
+      .Case("share", SectionFlag::SecShare)
+      .Default(SectionFlag::SecNone);
+}
+
+static SectionRename parseRenameSectionValue(StringRef FlagValue) {
+  if (!FlagValue.contains('='))
+    error("Bad format for --rename-section: missing '='");
+
+  // Initial split: ".foo" = ".bar,f1,f2,..."
+  auto Old2New = FlagValue.split('=');
+  SectionRename SR;
+  SR.OriginalName = Old2New.first;
+
+  // Flags split: ".bar" "f1" "f2" ...
+  SmallVector<StringRef, 6> NameAndFlags;
+  Old2New.second.split(NameAndFlags, ',');
+  SR.NewName = NameAndFlags[0];
+
+  if (NameAndFlags.size() > 1) {
+    SectionFlag Flags = SectionFlag::SecNone;
+    for (size_t I = 1, Size = NameAndFlags.size(); I < Size; ++I) {
+      SectionFlag Flag = parseSectionRenameFlag(NameAndFlags[I]);
+      if (Flag == SectionFlag::SecNone)
+        error("Unrecognized section flag '" + NameAndFlags[I] +
+              "'. Flags supported for GNU compatibility: alloc, load, noload, "
+              "readonly, debug, code, data, rom, share, contents, merge, "
+              "strings.");
+      Flags |= Flag;
+    }
+
+    SR.NewFlags = 0;
+    if (Flags & SectionFlag::SecAlloc)
+      *SR.NewFlags |= ELF::SHF_ALLOC;
+    if (!(Flags & SectionFlag::SecReadonly))
+      *SR.NewFlags |= ELF::SHF_WRITE;
+    if (Flags & SectionFlag::SecCode)
+      *SR.NewFlags |= ELF::SHF_EXECINSTR;
+    if (Flags & SectionFlag::SecMerge)
+      *SR.NewFlags |= ELF::SHF_MERGE;
+    if (Flags & SectionFlag::SecStrings)
+      *SR.NewFlags |= ELF::SHF_STRINGS;
+  }
+
+  return SR;
+}
+
+static const StringMap<MachineInfo> ArchMap{
+    // Name, {EMachine, 64bit, LittleEndian}
+    {"aarch64", {ELF::EM_AARCH64, true, true}},
+    {"arm", {ELF::EM_ARM, false, true}},
+    {"i386", {ELF::EM_386, false, true}},
+    {"i386:x86-64", {ELF::EM_X86_64, true, true}},
+    {"powerpc:common64", {ELF::EM_PPC64, true, true}},
+    {"sparc", {ELF::EM_SPARC, false, true}},
+    {"x86-64", {ELF::EM_X86_64, true, true}},
+};
+
+static const MachineInfo &getMachineInfo(StringRef Arch) {
+  auto Iter = ArchMap.find(Arch);
+  if (Iter == std::end(ArchMap))
+    error("Invalid architecture: '" + Arch + "'");
+  return Iter->getValue();
+}
+
+static const StringMap<MachineInfo> OutputFormatMap{
+    // Name, {EMachine, 64bit, LittleEndian}
+    {"elf32-i386", {ELF::EM_386, false, true}},
+    {"elf32-powerpcle", {ELF::EM_PPC, false, true}},
+    {"elf32-x86-64", {ELF::EM_X86_64, false, true}},
+    {"elf64-powerpcle", {ELF::EM_PPC64, true, true}},
+    {"elf64-x86-64", {ELF::EM_X86_64, true, true}},
+};
+
+static const MachineInfo &getOutputFormatMachineInfo(StringRef Format) {
+  auto Iter = OutputFormatMap.find(Format);
+  if (Iter == std::end(OutputFormatMap))
+    error("Invalid output format: '" + Format + "'");
+  return Iter->getValue();
+}
+
+static void addGlobalSymbolsFromFile(std::vector<std::string> &Symbols,
+                                     StringRef Filename) {
+  SmallVector<StringRef, 16> Lines;
+  auto BufOrErr = MemoryBuffer::getFile(Filename);
+  if (!BufOrErr)
+    reportError(Filename, BufOrErr.getError());
+
+  BufOrErr.get()->getBuffer().split(Lines, '\n');
+  for (StringRef Line : Lines) {
+    // Ignore everything after '#', trim whitespace, and only add the symbol if
+    // it's not empty.
+    auto TrimmedLine = Line.split('#').first.trim();
+    if (!TrimmedLine.empty())
+      Symbols.push_back(TrimmedLine.str());
+  }
+}
+
+// ParseObjcopyOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseObjcopyOptions will print the help messege and
+// exit.
+DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+  ObjcopyOptTable T;
+  unsigned MissingArgumentIndex, MissingArgumentCount;
+  llvm::opt::InputArgList InputArgs =
+      T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+  if (InputArgs.size() == 0) {
+    T.PrintHelp(errs(), "llvm-objcopy input [output]", "objcopy tool");
+    exit(1);
+  }
+
+  if (InputArgs.hasArg(OBJCOPY_help)) {
+    T.PrintHelp(outs(), "llvm-objcopy input [output]", "objcopy tool");
+    exit(0);
+  }
+
+  if (InputArgs.hasArg(OBJCOPY_version)) {
+    outs() << "llvm-objcopy, compatible with GNU objcopy\n";
+    cl::PrintVersionMessage();
+    exit(0);
+  }
+
+  SmallVector<const char *, 2> Positional;
+
+  for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN))
+    error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+
+  for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT))
+    Positional.push_back(Arg->getValue());
+
+  if (Positional.empty())
+    error("No input file specified");
+
+  if (Positional.size() > 2)
+    error("Too many positional arguments");
+
+  CopyConfig Config;
+  Config.InputFilename = Positional[0];
+  Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
+  if (InputArgs.hasArg(OBJCOPY_target) &&
+      (InputArgs.hasArg(OBJCOPY_input_target) ||
+       InputArgs.hasArg(OBJCOPY_output_target)))
+    error("--target cannot be used with --input-target or --output-target");
+
+  if (InputArgs.hasArg(OBJCOPY_target)) {
+    Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+    Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+  } else {
+    Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
+    Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
+  }
+  if (Config.InputFormat == "binary") {
+    auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
+    if (BinaryArch.empty())
+      error("Specified binary input without specifiying an architecture");
+    Config.BinaryArch = getMachineInfo(BinaryArch);
+  }
+  if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary")
+    Config.OutputArch = getOutputFormatMachineInfo(Config.OutputFormat);
+
+  if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections,
+                                      OBJCOPY_compress_debug_sections_eq)) {
+    Config.CompressionType = DebugCompressionType::Z;
+
+    if (Arg->getOption().getID() == OBJCOPY_compress_debug_sections_eq) {
+      Config.CompressionType =
+          StringSwitch<DebugCompressionType>(
+              InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq))
+              .Case("zlib-gnu", DebugCompressionType::GNU)
+              .Case("zlib", DebugCompressionType::Z)
+              .Default(DebugCompressionType::None);
+      if (Config.CompressionType == DebugCompressionType::None)
+        error("Invalid or unsupported --compress-debug-sections format: " +
+              InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq));
+      if (!zlib::isAvailable())
+        error("LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress.");
+    }
+  }
+
+  Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);
+  Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir);
+  if (InputArgs.hasArg(OBJCOPY_build_id_link_input))
+    Config.BuildIdLinkInput =
+        InputArgs.getLastArgValue(OBJCOPY_build_id_link_input);
+  if (InputArgs.hasArg(OBJCOPY_build_id_link_output))
+    Config.BuildIdLinkOutput =
+        InputArgs.getLastArgValue(OBJCOPY_build_id_link_output);
+  Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
+  Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols);
+
+  for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
+    if (!StringRef(Arg->getValue()).contains('='))
+      error("Bad format for --redefine-sym");
+    auto Old2New = StringRef(Arg->getValue()).split('=');
+    if (!Config.SymbolsToRename.insert(Old2New).second)
+      error("Multiple redefinition of symbol " + Old2New.first);
+  }
+
+  for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) {
+    SectionRename SR = parseRenameSectionValue(StringRef(Arg->getValue()));
+    if (!Config.SectionsToRename.try_emplace(SR.OriginalName, SR).second)
+      error("Multiple renames of section " + SR.OriginalName);
+  }
+
+  for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
+    Config.ToRemove.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section))
+    Config.KeepSection.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_only_section))
+    Config.OnlySection.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
+    Config.AddSection.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section))
+    Config.DumpSection.push_back(Arg->getValue());
+  Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all);
+  Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu);
+  Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug);
+  Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo);
+  Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections);
+  Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
+  Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
+  Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
+  Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
+  Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
+  Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all);
+  Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
+  Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
+  Config.DecompressDebugSections =
+      InputArgs.hasArg(OBJCOPY_decompress_debug_sections);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
+    Config.SymbolsToLocalize.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol))
+    Config.SymbolsToKeepGlobal.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols))
+    addGlobalSymbolsFromFile(Config.SymbolsToKeepGlobal, Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
+    Config.SymbolsToGlobalize.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
+    Config.SymbolsToWeaken.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
+    Config.SymbolsToRemove.push_back(Arg->getValue());
+  for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
+    Config.SymbolsToKeep.push_back(Arg->getValue());
+
+  Config.DeterministicArchives = InputArgs.hasFlag(
+      OBJCOPY_enable_deterministic_archives,
+      OBJCOPY_disable_deterministic_archives, /*default=*/true);
+
+  Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates);
+
+  if (Config.DecompressDebugSections &&
+      Config.CompressionType != DebugCompressionType::None) {
+    error("Cannot specify --compress-debug-sections at the same time as "
+          "--decompress-debug-sections at the same time");
+  }
+
+  if (Config.DecompressDebugSections && !zlib::isAvailable())
+    error("LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress.");
+
+  DriverConfig DC;
+  DC.CopyConfigs.push_back(std::move(Config));
+  return DC;
+}
+
+// ParseStripOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseStripOptions will print the help messege and
+// exit.
+DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
+  StripOptTable T;
+  unsigned MissingArgumentIndex, MissingArgumentCount;
+  llvm::opt::InputArgList InputArgs =
+      T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+  if (InputArgs.size() == 0) {
+    T.PrintHelp(errs(), "llvm-strip [options] file...", "strip tool");
+    exit(1);
+  }
+
+  if (InputArgs.hasArg(STRIP_help)) {
+    T.PrintHelp(outs(), "llvm-strip [options] file...", "strip tool");
+    exit(0);
+  }
+
+  if (InputArgs.hasArg(STRIP_version)) {
+    outs() << "llvm-strip, compatible with GNU strip\n";
+    cl::PrintVersionMessage();
+    exit(0);
+  }
+
+  SmallVector<const char *, 2> Positional;
+  for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN))
+    error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+  for (auto Arg : InputArgs.filtered(STRIP_INPUT))
+    Positional.push_back(Arg->getValue());
+
+  if (Positional.empty())
+    error("No input file specified");
+
+  if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output))
+    error("Multiple input files cannot be used in combination with -o");
+
+  CopyConfig Config;
+  Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
+
+  Config.DiscardAll = InputArgs.hasArg(STRIP_discard_all);
+  Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
+  Config.StripAll = InputArgs.hasArg(STRIP_strip_all);
+  Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu);
+
+  if (!Config.StripDebug && !Config.StripUnneeded && !Config.DiscardAll &&
+      !Config.StripAllGNU)
+    Config.StripAll = true;
+
+  for (auto Arg : InputArgs.filtered(STRIP_keep_section))
+    Config.KeepSection.push_back(Arg->getValue());
+
+  for (auto Arg : InputArgs.filtered(STRIP_remove_section))
+    Config.ToRemove.push_back(Arg->getValue());
+
+  for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
+    Config.SymbolsToKeep.push_back(Arg->getValue());
+
+  Config.DeterministicArchives =
+      InputArgs.hasFlag(STRIP_enable_deterministic_archives,
+                        STRIP_disable_deterministic_archives, /*default=*/true);
+
+  Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates);
+
+  DriverConfig DC;
+  if (Positional.size() == 1) {
+    Config.InputFilename = Positional[0];
+    Config.OutputFilename =
+        InputArgs.getLastArgValue(STRIP_output, Positional[0]);
+    DC.CopyConfigs.push_back(std::move(Config));
+  } else {
+    for (const char *Filename : Positional) {
+      Config.InputFilename = Filename;
+      Config.OutputFilename = Filename;
+      DC.CopyConfigs.push_back(Config);
+    }
+  }
+
+  return DC;
+}
+
+} // namespace objcopy
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/CopyConfig.h b/contrib/llvm/tools/llvm-objcopy/CopyConfig.h
new file mode 100644
index 000000000000..71a2423ae1c8
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/CopyConfig.h
@@ -0,0 +1,119 @@
+//===- CopyConfig.h -------------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
+#define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+// Necessary for llvm::DebugCompressionType::None
+#include "llvm/Target/TargetOptions.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+
+// This type keeps track of the machine info for various architectures. This
+// lets us map architecture names to ELF types and the e_machine value of the
+// ELF file.
+struct MachineInfo {
+  uint16_t EMachine;
+  bool Is64Bit;
+  bool IsLittleEndian;
+};
+
+struct SectionRename {
+  StringRef OriginalName;
+  StringRef NewName;
+  Optional<uint64_t> NewFlags;
+};
+
+// Configuration for copying/stripping a single file.
+struct CopyConfig {
+  // Main input/output options
+  StringRef InputFilename;
+  StringRef InputFormat;
+  StringRef OutputFilename;
+  StringRef OutputFormat;
+
+  // Only applicable for --input-format=binary
+  MachineInfo BinaryArch;
+  // Only applicable when --output-format!=binary (e.g. elf64-x86-64).
+  Optional<MachineInfo> OutputArch;
+
+  // Advanced options
+  StringRef AddGnuDebugLink;
+  StringRef BuildIdLinkDir;
+  Optional<StringRef> BuildIdLinkInput;
+  Optional<StringRef> BuildIdLinkOutput;
+  StringRef SplitDWO;
+  StringRef SymbolsPrefix;
+
+  // Repeated options
+  std::vector<StringRef> AddSection;
+  std::vector<StringRef> DumpSection;
+  std::vector<StringRef> KeepSection;
+  std::vector<StringRef> OnlySection;
+  std::vector<StringRef> SymbolsToGlobalize;
+  std::vector<StringRef> SymbolsToKeep;
+  std::vector<StringRef> SymbolsToLocalize;
+  std::vector<StringRef> SymbolsToRemove;
+  std::vector<StringRef> SymbolsToWeaken;
+  std::vector<StringRef> ToRemove;
+  std::vector<std::string> SymbolsToKeepGlobal;
+
+  // Map options
+  StringMap<SectionRename> SectionsToRename;
+  StringMap<StringRef> SymbolsToRename;
+
+  // Boolean options
+  bool DeterministicArchives = true;
+  bool DiscardAll = false;
+  bool ExtractDWO = false;
+  bool KeepFileSymbols = false;
+  bool LocalizeHidden = false;
+  bool OnlyKeepDebug = false;
+  bool PreserveDates = false;
+  bool StripAll = false;
+  bool StripAllGNU = false;
+  bool StripDWO = false;
+  bool StripDebug = false;
+  bool StripNonAlloc = false;
+  bool StripSections = false;
+  bool StripUnneeded = false;
+  bool Weaken = false;
+  bool DecompressDebugSections = false;
+  DebugCompressionType CompressionType = DebugCompressionType::None;
+};
+
+// Configuration for the overall invocation of this tool. When invoked as
+// objcopy, will always contain exactly one CopyConfig. When invoked as strip,
+// will contain one or more CopyConfigs.
+struct DriverConfig {
+  SmallVector<CopyConfig, 1> CopyConfigs;
+};
+
+// ParseObjcopyOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseObjcopyOptions will print the help messege and
+// exit.
+DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
+
+// ParseStripOptions returns the config and sets the input arguments. If a
+// help flag is set then ParseStripOptions will print the help messege and
+// exit.
+DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr);
+
+} // namespace objcopy
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
new file mode 100644
index 000000000000..f5ab8e708267
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -0,0 +1,584 @@
+//===- ELFObjcopy.cpp -----------------------------------------------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFObjcopy.h"
+#include "Buffer.h"
+#include "CopyConfig.h"
+#include "Object.h"
+#include "llvm-objcopy.h"
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+
+namespace llvm {
+namespace objcopy {
+namespace elf {
+
+using namespace object;
+using namespace ELF;
+using SectionPred = std::function<bool(const SectionBase &Sec)>;
+
+static bool isDebugSection(const SectionBase &Sec) {
+  return StringRef(Sec.Name).startswith(".debug") ||
+         StringRef(Sec.Name).startswith(".zdebug") || Sec.Name == ".gdb_index";
+}
+
+static bool isDWOSection(const SectionBase &Sec) {
+  return StringRef(Sec.Name).endswith(".dwo");
+}
+
+static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
+  // We can't remove the section header string table.
+  if (&Sec == Obj.SectionNames)
+    return false;
+  // Short of keeping the string table we want to keep everything that is a DWO
+  // section and remove everything else.
+  return !isDWOSection(Sec);
+}
+
+static ElfType getOutputElfType(const Binary &Bin) {
+  // Infer output ELF type from the input ELF object
+  if (isa<ELFObjectFile<ELF32LE>>(Bin))
+    return ELFT_ELF32LE;
+  if (isa<ELFObjectFile<ELF64LE>>(Bin))
+    return ELFT_ELF64LE;
+  if (isa<ELFObjectFile<ELF32BE>>(Bin))
+    return ELFT_ELF32BE;
+  if (isa<ELFObjectFile<ELF64BE>>(Bin))
+    return ELFT_ELF64BE;
+  llvm_unreachable("Invalid ELFType");
+}
+
+static ElfType getOutputElfType(const MachineInfo &MI) {
+  // Infer output ELF type from the binary arch specified
+  if (MI.Is64Bit)
+    return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE;
+  else
+    return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
+}
+
+static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
+                                            Object &Obj, Buffer &Buf,
+                                            ElfType OutputElfType) {
+  if (Config.OutputFormat == "binary") {
+    return llvm::make_unique<BinaryWriter>(Obj, Buf);
+  }
+  // Depending on the initial ELFT and OutputFormat we need a different Writer.
+  switch (OutputElfType) {
+  case ELFT_ELF32LE:
+    return llvm::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
+                                                 !Config.StripSections);
+  case ELFT_ELF64LE:
+    return llvm::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
+                                                 !Config.StripSections);
+  case ELFT_ELF32BE:
+    return llvm::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
+                                                 !Config.StripSections);
+  case ELFT_ELF64BE:
+    return llvm::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
+                                                 !Config.StripSections);
+  }
+  llvm_unreachable("Invalid output format");
+}
+
+template <class ELFT>
+static Expected<ArrayRef<uint8_t>>
+findBuildID(const object::ELFFile<ELFT> &In) {
+  for (const auto &Phdr : unwrapOrError(In.program_headers())) {
+    if (Phdr.p_type != PT_NOTE)
+      continue;
+    Error Err = Error::success();
+    for (const auto &Note : In.notes(Phdr, Err))
+      if (Note.getType() == NT_GNU_BUILD_ID && Note.getName() == ELF_NOTE_GNU)
+        return Note.getDesc();
+    if (Err)
+      return std::move(Err);
+  }
+  return createStringError(llvm::errc::invalid_argument,
+                           "Could not find build ID.");
+}
+
+static Expected<ArrayRef<uint8_t>>
+findBuildID(const object::ELFObjectFileBase &In) {
+  if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(&In))
+    return findBuildID(*O->getELFFile());
+  else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(&In))
+    return findBuildID(*O->getELFFile());
+  else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(&In))
+    return findBuildID(*O->getELFFile());
+  else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(&In))
+    return findBuildID(*O->getELFFile());
+
+  llvm_unreachable("Bad file format");
+}
+
+static void linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
+                             StringRef Suffix, ArrayRef<uint8_t> BuildIdBytes) {
+  SmallString<128> Path = Config.BuildIdLinkDir;
+  sys::path::append(Path, llvm::toHex(BuildIdBytes[0], /*LowerCase*/ true));
+  if (auto EC = sys::fs::create_directories(Path))
+    error("cannot create build ID link directory " + Path + ": " +
+          EC.message());
+
+  sys::path::append(Path,
+                    llvm::toHex(BuildIdBytes.slice(1), /*LowerCase*/ true));
+  Path += Suffix;
+  if (auto EC = sys::fs::create_hard_link(ToLink, Path)) {
+    // Hard linking failed, try to remove the file first if it exists.
+    if (sys::fs::exists(Path))
+      sys::fs::remove(Path);
+    EC = sys::fs::create_hard_link(ToLink, Path);
+    if (EC)
+      error("cannot link " + ToLink + " to " + Path + ": " + EC.message());
+  }
+}
+
+static void splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
+                           StringRef File, ElfType OutputElfType) {
+  auto DWOFile = Reader.create();
+  DWOFile->removeSections(
+      [&](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); });
+  if (Config.OutputArch)
+    DWOFile->Machine = Config.OutputArch.getValue().EMachine;
+  FileBuffer FB(File);
+  auto Writer = createWriter(Config, *DWOFile, FB, OutputElfType);
+  Writer->finalize();
+  Writer->write();
+}
+
+static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
+                               Object &Obj) {
+  for (auto &Sec : Obj.sections()) {
+    if (Sec.Name == SecName) {
+      if (Sec.OriginalData.empty())
+        return make_error<StringError>("Can't dump section \"" + SecName +
+                                           "\": it has no contents",
+                                       object_error::parse_failed);
+      Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+          FileOutputBuffer::create(Filename, Sec.OriginalData.size());
+      if (!BufferOrErr)
+        return BufferOrErr.takeError();
+      std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+      std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(),
+                Buf->getBufferStart());
+      if (Error E = Buf->commit())
+        return E;
+      return Error::success();
+    }
+  }
+  return make_error<StringError>("Section not found",
+                                 object_error::parse_failed);
+}
+
+static bool isCompressed(const SectionBase &Section) {
+  const char *Magic = "ZLIB";
+  return StringRef(Section.Name).startswith(".zdebug") ||
+         (Section.OriginalData.size() > strlen(Magic) &&
+          !strncmp(reinterpret_cast<const char *>(Section.OriginalData.data()),
+                   Magic, strlen(Magic))) ||
+         (Section.Flags & ELF::SHF_COMPRESSED);
+}
+
+static bool isCompressable(const SectionBase &Section) {
+  return !isCompressed(Section) && isDebugSection(Section) &&
+         Section.Name != ".gdb_index";
+}
+
+static void replaceDebugSections(
+    const CopyConfig &Config, Object &Obj, SectionPred &RemovePred,
+    function_ref<bool(const SectionBase &)> shouldReplace,
+    function_ref<SectionBase *(const SectionBase *)> addSection) {
+  SmallVector<SectionBase *, 13> ToReplace;
+  SmallVector<RelocationSection *, 13> RelocationSections;
+  for (auto &Sec : Obj.sections()) {
+    if (RelocationSection *R = dyn_cast<RelocationSection>(&Sec)) {
+      if (shouldReplace(*R->getSection()))
+        RelocationSections.push_back(R);
+      continue;
+    }
+
+    if (shouldReplace(Sec))
+      ToReplace.push_back(&Sec);
+  }
+
+  for (SectionBase *S : ToReplace) {
+    SectionBase *NewSection = addSection(S);
+
+    for (RelocationSection *RS : RelocationSections) {
+      if (RS->getSection() == S)
+        RS->setSection(NewSection);
+    }
+  }
+
+  RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) {
+    return shouldReplace(Sec) || RemovePred(Sec);
+  };
+}
+
+// This function handles the high level operations of GNU objcopy including
+// handling command line options. It's important to outline certain properties
+// we expect to hold of the command line operations. Any operation that "keeps"
+// should keep regardless of a remove. Additionally any removal should respect
+// any previous removals. Lastly whether or not something is removed shouldn't
+// depend a) on the order the options occur in or b) on some opaque priority
+// system. The only priority is that keeps/copies overrule removes.
+static void handleArgs(const CopyConfig &Config, Object &Obj,
+                       const Reader &Reader, ElfType OutputElfType) {
+
+  if (!Config.SplitDWO.empty()) {
+    splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType);
+  }
+  if (Config.OutputArch)
+    Obj.Machine = Config.OutputArch.getValue().EMachine;
+
+  // TODO: update or remove symbols only if there is an option that affects
+  // them.
+  if (Obj.SymbolTable) {
+    Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
+      if (!Sym.isCommon() &&
+          ((Config.LocalizeHidden &&
+            (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
+           is_contained(Config.SymbolsToLocalize, Sym.Name)))
+        Sym.Binding = STB_LOCAL;
+
+      // Note: these two globalize flags have very similar names but different
+      // meanings:
+      //
+      // --globalize-symbol: promote a symbol to global
+      // --keep-global-symbol: all symbols except for these should be made local
+      //
+      // If --globalize-symbol is specified for a given symbol, it will be
+      // global in the output file even if it is not included via
+      // --keep-global-symbol. Because of that, make sure to check
+      // --globalize-symbol second.
+      if (!Config.SymbolsToKeepGlobal.empty() &&
+          !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
+          Sym.getShndx() != SHN_UNDEF)
+        Sym.Binding = STB_LOCAL;
+
+      if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
+          Sym.getShndx() != SHN_UNDEF)
+        Sym.Binding = STB_GLOBAL;
+
+      if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
+          Sym.Binding == STB_GLOBAL)
+        Sym.Binding = STB_WEAK;
+
+      if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
+          Sym.getShndx() != SHN_UNDEF)
+        Sym.Binding = STB_WEAK;
+
+      const auto I = Config.SymbolsToRename.find(Sym.Name);
+      if (I != Config.SymbolsToRename.end())
+        Sym.Name = I->getValue();
+
+      if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION)
+        Sym.Name = (Config.SymbolsPrefix + Sym.Name).str();
+    });
+
+    // The purpose of this loop is to mark symbols referenced by sections
+    // (like GroupSection or RelocationSection). This way, we know which
+    // symbols are still 'needed' and which are not.
+    if (Config.StripUnneeded) {
+      for (auto &Section : Obj.sections())
+        Section.markSymbols();
+    }
+
+    Obj.removeSymbols([&](const Symbol &Sym) {
+      if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
+          (Config.KeepFileSymbols && Sym.Type == STT_FILE))
+        return false;
+
+      if (Config.DiscardAll && Sym.Binding == STB_LOCAL &&
+          Sym.getShndx() != SHN_UNDEF && Sym.Type != STT_FILE &&
+          Sym.Type != STT_SECTION)
+        return true;
+
+      if (Config.StripAll || Config.StripAllGNU)
+        return true;
+
+      if (is_contained(Config.SymbolsToRemove, Sym.Name))
+        return true;
+
+      if (Config.StripUnneeded && !Sym.Referenced &&
+          (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
+          Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
+        return true;
+
+      return false;
+    });
+  }
+
+  SectionPred RemovePred = [](const SectionBase &) { return false; };
+
+  // Removes:
+  if (!Config.ToRemove.empty()) {
+    RemovePred = [&Config](const SectionBase &Sec) {
+      return is_contained(Config.ToRemove, Sec.Name);
+    };
+  }
+
+  if (Config.StripDWO || !Config.SplitDWO.empty())
+    RemovePred = [RemovePred](const SectionBase &Sec) {
+      return isDWOSection(Sec) || RemovePred(Sec);
+    };
+
+  if (Config.ExtractDWO)
+    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+      return onlyKeepDWOPred(Obj, Sec) || RemovePred(Sec);
+    };
+
+  if (Config.StripAllGNU)
+    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+      if (RemovePred(Sec))
+        return true;
+      if ((Sec.Flags & SHF_ALLOC) != 0)
+        return false;
+      if (&Sec == Obj.SectionNames)
+        return false;
+      switch (Sec.Type) {
+      case SHT_SYMTAB:
+      case SHT_REL:
+      case SHT_RELA:
+      case SHT_STRTAB:
+        return true;
+      }
+      return isDebugSection(Sec);
+    };
+
+  if (Config.StripSections) {
+    RemovePred = [RemovePred](const SectionBase &Sec) {
+      return RemovePred(Sec) || (Sec.Flags & SHF_ALLOC) == 0;
+    };
+  }
+
+  if (Config.StripDebug) {
+    RemovePred = [RemovePred](const SectionBase &Sec) {
+      return RemovePred(Sec) || isDebugSection(Sec);
+    };
+  }
+
+  if (Config.StripNonAlloc)
+    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+      if (RemovePred(Sec))
+        return true;
+      if (&Sec == Obj.SectionNames)
+        return false;
+      return (Sec.Flags & SHF_ALLOC) == 0;
+    };
+
+  if (Config.StripAll)
+    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+      if (RemovePred(Sec))
+        return true;
+      if (&Sec == Obj.SectionNames)
+        return false;
+      if (StringRef(Sec.Name).startswith(".gnu.warning"))
+        return false;
+      return (Sec.Flags & SHF_ALLOC) == 0;
+    };
+
+  // Explicit copies:
+  if (!Config.OnlySection.empty()) {
+    RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
+      // Explicitly keep these sections regardless of previous removes.
+      if (is_contained(Config.OnlySection, Sec.Name))
+        return false;
+
+      // Allow all implicit removes.
+      if (RemovePred(Sec))
+        return true;
+
+      // Keep special sections.
+      if (Obj.SectionNames == &Sec)
+        return false;
+      if (Obj.SymbolTable == &Sec ||
+          (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec))
+        return false;
+
+      // Remove everything else.
+      return true;
+    };
+  }
+
+  if (!Config.KeepSection.empty()) {
+    RemovePred = [&Config, RemovePred](const SectionBase &Sec) {
+      // Explicitly keep these sections regardless of previous removes.
+      if (is_contained(Config.KeepSection, Sec.Name))
+        return false;
+      // Otherwise defer to RemovePred.
+      return RemovePred(Sec);
+    };
+  }
+
+  // This has to be the last predicate assignment.
+  // If the option --keep-symbol has been specified
+  // and at least one of those symbols is present
+  // (equivalently, the updated symbol table is not empty)
+  // the symbol table and the string table should not be removed.
+  if ((!Config.SymbolsToKeep.empty() || Config.KeepFileSymbols) &&
+      Obj.SymbolTable && !Obj.SymbolTable->empty()) {
+    RemovePred = [&Obj, RemovePred](const SectionBase &Sec) {
+      if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab())
+        return false;
+      return RemovePred(Sec);
+    };
+  }
+
+  if (Config.CompressionType != DebugCompressionType::None)
+    replaceDebugSections(Config, Obj, RemovePred, isCompressable,
+                         [&Config, &Obj](const SectionBase *S) {
+                           return &Obj.addSection<CompressedSection>(
+                               *S, Config.CompressionType);
+                         });
+  else if (Config.DecompressDebugSections)
+    replaceDebugSections(
+        Config, Obj, RemovePred,
+        [](const SectionBase &S) { return isa<CompressedSection>(&S); },
+        [&Obj](const SectionBase *S) {
+          auto CS = cast<CompressedSection>(S);
+          return &Obj.addSection<DecompressedSection>(*CS);
+        });
+
+  Obj.removeSections(RemovePred);
+
+  if (!Config.SectionsToRename.empty()) {
+    for (auto &Sec : Obj.sections()) {
+      const auto Iter = Config.SectionsToRename.find(Sec.Name);
+      if (Iter != Config.SectionsToRename.end()) {
+        const SectionRename &SR = Iter->second;
+        Sec.Name = SR.NewName;
+        if (SR.NewFlags.hasValue()) {
+          // Preserve some flags which should not be dropped when setting flags.
+          // Also, preserve anything OS/processor dependant.
+          const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE |
+                                        ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
+                                        ELF::SHF_MASKOS | ELF::SHF_MASKPROC |
+                                        ELF::SHF_TLS | ELF::SHF_INFO_LINK;
+          Sec.Flags = (Sec.Flags & PreserveMask) |
+                      (SR.NewFlags.getValue() & ~PreserveMask);
+        }
+      }
+    }
+  }
+
+  if (!Config.AddSection.empty()) {
+    for (const auto &Flag : Config.AddSection) {
+      std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+      StringRef SecName = SecPair.first;
+      StringRef File = SecPair.second;
+      ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+          MemoryBuffer::getFile(File);
+      if (!BufOrErr)
+        reportError(File, BufOrErr.getError());
+      std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+      ArrayRef<uint8_t> Data(
+          reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+          Buf->getBufferSize());
+      OwnedDataSection &NewSection =
+          Obj.addSection<OwnedDataSection>(SecName, Data);
+      if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
+        NewSection.Type = SHT_NOTE;
+    }
+  }
+
+  if (!Config.DumpSection.empty()) {
+    for (const auto &Flag : Config.DumpSection) {
+      std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+      StringRef SecName = SecPair.first;
+      StringRef File = SecPair.second;
+      if (Error E = dumpSectionToFile(SecName, File, Obj))
+        reportError(Config.InputFilename, std::move(E));
+    }
+  }
+
+  if (!Config.AddGnuDebugLink.empty())
+    Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
+}
+
+void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+                               Buffer &Out) {
+  BinaryReader Reader(Config.BinaryArch, &In);
+  std::unique_ptr<Object> Obj = Reader.create();
+
+  // Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
+  // (-B<arch>).
+  const ElfType OutputElfType = getOutputElfType(
+      Config.OutputArch ? Config.OutputArch.getValue() : Config.BinaryArch);
+  handleArgs(Config, *Obj, Reader, OutputElfType);
+  std::unique_ptr<Writer> Writer =
+      createWriter(Config, *Obj, Out, OutputElfType);
+  Writer->finalize();
+  Writer->write();
+}
+
+void executeObjcopyOnBinary(const CopyConfig &Config,
+                            object::ELFObjectFileBase &In, Buffer &Out) {
+  ELFReader Reader(&In);
+  std::unique_ptr<Object> Obj = Reader.create();
+  // Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
+  const ElfType OutputElfType =
+      Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue())
+                        : getOutputElfType(In);
+  ArrayRef<uint8_t> BuildIdBytes;
+
+  if (!Config.BuildIdLinkDir.empty()) {
+    BuildIdBytes = unwrapOrError(findBuildID(In));
+    if (BuildIdBytes.size() < 2)
+      error("build ID in file '" + Config.InputFilename +
+            "' is smaller than two bytes");
+  }
+
+  if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkInput) {
+    linkToBuildIdDir(Config, Config.InputFilename,
+                     Config.BuildIdLinkInput.getValue(), BuildIdBytes);
+  }
+  handleArgs(Config, *Obj, Reader, OutputElfType);
+  std::unique_ptr<Writer> Writer =
+      createWriter(Config, *Obj, Out, OutputElfType);
+  Writer->finalize();
+  Writer->write();
+  if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput) {
+    linkToBuildIdDir(Config, Config.OutputFilename,
+                     Config.BuildIdLinkOutput.getValue(), BuildIdBytes);
+  }
+}
+
+} // end namespace elf
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h
new file mode 100644
index 000000000000..43f41c00ce5b
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h
@@ -0,0 +1,34 @@
+//===- ELFObjcopy.h ---------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
+#define LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
+
+namespace llvm {
+class MemoryBuffer;
+
+namespace object {
+class ELFObjectFileBase;
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+class Buffer;
+
+namespace elf {
+void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+                               Buffer &Out);
+void executeObjcopyOnBinary(const CopyConfig &Config,
+                            object::ELFObjectFileBase &In, Buffer &Out);
+
+} // end namespace elf
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
diff --git a/contrib/llvm/tools/llvm-objcopy/Object.cpp b/contrib/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 7e88f5263a39..3d3e029c09eb 100644
--- a/contrib/llvm/tools/llvm-objcopy/Object.cpp
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -15,7 +15,9 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/Path.h"
@@ -26,45 +28,14 @@
 #include <utility>
 #include <vector>
 
-using namespace llvm;
-using namespace llvm::objcopy;
+namespace llvm {
+namespace objcopy {
+namespace elf {
+
 using namespace object;
 using namespace ELF;
 
-Buffer::~Buffer() {}
-
-void FileBuffer::allocate(size_t Size) {
-  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
-      FileOutputBuffer::create(getName(), Size, FileOutputBuffer::F_executable);
-  handleAllErrors(BufferOrErr.takeError(), [this](const ErrorInfoBase &E) {
-    error("failed to open " + getName() + ": " + E.message());
-  });
-  Buf = std::move(*BufferOrErr);
-}
-
-Error FileBuffer::commit() { return Buf->commit(); }
-
-uint8_t *FileBuffer::getBufferStart() {
-  return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
-}
-
-void MemBuffer::allocate(size_t Size) {
-  Buf = WritableMemoryBuffer::getNewMemBuffer(Size, getName());
-}
-
-Error MemBuffer::commit() { return Error::success(); }
-
-uint8_t *MemBuffer::getBufferStart() {
-  return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
-}
-
-std::unique_ptr<WritableMemoryBuffer> MemBuffer::releaseMemoryBuffer() {
-  return std::move(Buf);
-}
-
 template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
-  using Elf_Phdr = typename ELFT::Phdr;
-
   uint8_t *B = Buf.getBufferStart();
   B += Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr);
   Elf_Phdr &Phdr = *reinterpret_cast<Elf_Phdr *>(B);
@@ -87,7 +58,7 @@ void SectionBase::markSymbols() {}
 template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
   uint8_t *B = Buf.getBufferStart();
   B += Sec.HeaderOffset;
-  typename ELFT::Shdr &Shdr = *reinterpret_cast<typename ELFT::Shdr *>(B);
+  Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
   Shdr.sh_name = Sec.NameIndex;
   Shdr.sh_type = Sec.Type;
   Shdr.sh_flags = Sec.Flags;
@@ -100,7 +71,46 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
   Shdr.sh_entsize = Sec.EntrySize;
 }
 
-SectionVisitor::~SectionVisitor() {}
+template <class ELFT> void ELFSectionSizer<ELFT>::visit(Section &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(OwnedDataSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(StringTableSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(DynamicRelocationSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(SymbolTableSection &Sec) {
+  Sec.EntrySize = sizeof(Elf_Sym);
+  Sec.Size = Sec.Symbols.size() * Sec.EntrySize;
+  // Align to the largest field in Elf_Sym.
+  Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) {
+  Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela);
+  Sec.Size = Sec.Relocations.size() * Sec.EntrySize;
+  // Align to the largest field in Elf_Rel(a).
+  Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(GnuDebugLinkSection &Sec) {}
+
+template <class ELFT> void ELFSectionSizer<ELFT>::visit(GroupSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(SectionIndexSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(CompressedSection &Sec) {}
+
+template <class ELFT>
+void ELFSectionSizer<ELFT>::visit(DecompressedSection &Sec) {}
 
 void BinarySectionWriter::visit(const SectionIndexSection &Sec) {
   error("Cannot write symbol section index table '" + Sec.Name + "' ");
@@ -126,20 +136,169 @@ void SectionWriter::visit(const Section &Sec) {
   if (Sec.Type == SHT_NOBITS)
     return;
   uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  std::copy(std::begin(Sec.Contents), std::end(Sec.Contents), Buf);
+  llvm::copy(Sec.Contents, Buf);
 }
 
 void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
 
+void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
+
 void SectionWriter::visit(const OwnedDataSection &Sec) {
   uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  std::copy(std::begin(Sec.Data), std::end(Sec.Data), Buf);
+  llvm::copy(Sec.Data, Buf);
+}
+
+static const std::vector<uint8_t> ZlibGnuMagic = {'Z', 'L', 'I', 'B'};
+
+static bool isDataGnuCompressed(ArrayRef<uint8_t> Data) {
+  return Data.size() > ZlibGnuMagic.size() &&
+         std::equal(ZlibGnuMagic.begin(), ZlibGnuMagic.end(), Data.data());
+}
+
+template <class ELFT>
+static std::tuple<uint64_t, uint64_t>
+getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
+  const bool IsGnuDebug = isDataGnuCompressed(Data);
+  const uint64_t DecompressedSize =
+      IsGnuDebug
+          ? support::endian::read64be(reinterpret_cast<const uint64_t *>(
+                Data.data() + ZlibGnuMagic.size()))
+          : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())->ch_size;
+  const uint64_t DecompressedAlign =
+      IsGnuDebug ? 1
+                 : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())
+                       ->ch_addralign;
+
+  return std::make_tuple(DecompressedSize, DecompressedAlign);
+}
+
+template <class ELFT>
+void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
+  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
+
+  if (!zlib::isAvailable()) {
+    std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
+    return;
+  }
+
+  const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData)
+                                ? (ZlibGnuMagic.size() + sizeof(Sec.Size))
+                                : sizeof(Elf_Chdr_Impl<ELFT>);
+
+  StringRef CompressedContent(
+      reinterpret_cast<const char *>(Sec.OriginalData.data()) + DataOffset,
+      Sec.OriginalData.size() - DataOffset);
+
+  SmallVector<char, 128> DecompressedContent;
+  if (Error E = zlib::uncompress(CompressedContent, DecompressedContent,
+                                 static_cast<size_t>(Sec.Size)))
+    reportError(Sec.Name, std::move(E));
+
+  std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf);
+}
+
+void BinarySectionWriter::visit(const DecompressedSection &Sec) {
+  error("Cannot write compressed section '" + Sec.Name + "' ");
+}
+
+void DecompressedSection::accept(SectionVisitor &Visitor) const {
+  Visitor.visit(*this);
+}
+
+void DecompressedSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
 }
 
 void OwnedDataSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
+void BinarySectionWriter::visit(const CompressedSection &Sec) {
+  error("Cannot write compressed section '" + Sec.Name + "' ");
+}
+
+template <class ELFT>
+void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
+  uint8_t *Buf = Out.getBufferStart();
+  Buf += Sec.Offset;
+
+  if (Sec.CompressionType == DebugCompressionType::None) {
+    std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
+    return;
+  }
+
+  if (Sec.CompressionType == DebugCompressionType::GNU) {
+    const char *Magic = "ZLIB";
+    memcpy(Buf, Magic, strlen(Magic));
+    Buf += strlen(Magic);
+    const uint64_t DecompressedSize =
+        support::endian::read64be(&Sec.DecompressedSize);
+    memcpy(Buf, &DecompressedSize, sizeof(DecompressedSize));
+    Buf += sizeof(DecompressedSize);
+  } else {
+    Elf_Chdr_Impl<ELFT> Chdr;
+    Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB;
+    Chdr.ch_size = Sec.DecompressedSize;
+    Chdr.ch_addralign = Sec.DecompressedAlign;
+    memcpy(Buf, &Chdr, sizeof(Chdr));
+    Buf += sizeof(Chdr);
+  }
+
+  std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf);
+}
+
+CompressedSection::CompressedSection(const SectionBase &Sec,
+                                     DebugCompressionType CompressionType)
+    : SectionBase(Sec), CompressionType(CompressionType),
+      DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
+
+  if (!zlib::isAvailable()) {
+    CompressionType = DebugCompressionType::None;
+    return;
+  }
+
+  if (Error E = zlib::compress(
+          StringRef(reinterpret_cast<const char *>(OriginalData.data()),
+                    OriginalData.size()),
+          CompressedData))
+    reportError(Name, std::move(E));
+
+  size_t ChdrSize;
+  if (CompressionType == DebugCompressionType::GNU) {
+    Name = ".z" + Sec.Name.substr(1);
+    ChdrSize = sizeof("ZLIB") - 1 + sizeof(uint64_t);
+  } else {
+    Flags |= ELF::SHF_COMPRESSED;
+    ChdrSize =
+        std::max(std::max(sizeof(object::Elf_Chdr_Impl<object::ELF64LE>),
+                          sizeof(object::Elf_Chdr_Impl<object::ELF64BE>)),
+                 std::max(sizeof(object::Elf_Chdr_Impl<object::ELF32LE>),
+                          sizeof(object::Elf_Chdr_Impl<object::ELF32BE>)));
+  }
+  Size = ChdrSize + CompressedData.size();
+  Align = 8;
+}
+
+CompressedSection::CompressedSection(ArrayRef<uint8_t> CompressedData,
+                                     uint64_t DecompressedSize,
+                                     uint64_t DecompressedAlign)
+    : CompressionType(DebugCompressionType::None),
+      DecompressedSize(DecompressedSize), DecompressedAlign(DecompressedAlign) {
+  OriginalData = CompressedData;
+}
+
+void CompressedSection::accept(SectionVisitor &Visitor) const {
+  Visitor.visit(*this);
+}
+
+void CompressedSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 void StringTableSection::addString(StringRef Name) {
   StrTabBuilder.add(Name);
   Size = StrTabBuilder.getSize();
@@ -159,11 +318,15 @@ void StringTableSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void StringTableSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
   uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  auto *IndexesBuffer = reinterpret_cast<typename ELFT::Word *>(Buf);
-  std::copy(std::begin(Sec.Indexes), std::end(Sec.Indexes), IndexesBuffer);
+  auto *IndexesBuffer = reinterpret_cast<Elf_Word *>(Buf);
+  llvm::copy(Sec.Indexes, IndexesBuffer);
 }
 
 void SectionIndexSection::initialize(SectionTableRef SecTable) {
@@ -182,6 +345,10 @@ void SectionIndexSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void SectionIndexSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) {
   switch (Index) {
   case SHN_ABS:
@@ -226,18 +393,20 @@ uint16_t Symbol::getShndx() const {
   llvm_unreachable("Symbol with invalid ShndxType encountered");
 }
 
+bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; }
+
 void SymbolTableSection::assignIndices() {
   uint32_t Index = 0;
   for (auto &Sym : Symbols)
     Sym->Index = Index++;
 }
 
-void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
+void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
                                    SectionBase *DefinedIn, uint64_t Value,
                                    uint8_t Visibility, uint16_t Shndx,
-                                   uint64_t Sz) {
+                                   uint64_t Size) {
   Symbol Sym;
-  Sym.Name = Name;
+  Sym.Name = Name.str();
   Sym.Binding = Bind;
   Sym.Type = Type;
   Sym.DefinedIn = DefinedIn;
@@ -251,7 +420,7 @@ void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
   }
   Sym.Value = Value;
   Sym.Visibility = Visibility;
-  Sym.Size = Sz;
+  Sym.Size = Size;
   Sym.Index = Symbols.size();
   Symbols.emplace_back(llvm::make_unique<Symbol>(Sym));
   Size += this->EntrySize;
@@ -344,7 +513,7 @@ template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
   uint8_t *Buf = Out.getBufferStart();
   Buf += Sec.Offset;
-  typename ELFT::Sym *Sym = reinterpret_cast<typename ELFT::Sym *>(Buf);
+  Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Buf);
   // Loop though symbols setting each entry of the symbol table.
   for (auto &Symbol : Sec.Symbols) {
     Sym->st_name = Symbol->NameIndex;
@@ -362,6 +531,10 @@ void SymbolTableSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 template <class SymTabType>
 void RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
     const SectionBase *Sec) {
@@ -377,11 +550,13 @@ void RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
 template <class SymTabType>
 void RelocSectionWithSymtabBase<SymTabType>::initialize(
     SectionTableRef SecTable) {
-  setSymTab(SecTable.getSectionOfType<SymTabType>(
-      Link,
-      "Link field value " + Twine(Link) + " in section " + Name + " is invalid",
-      "Link field value " + Twine(Link) + " in section " + Name +
-          " is not a symbol table"));
+  if (Link != SHN_UNDEF)
+    setSymTab(SecTable.getSectionOfType<SymTabType>(
+        Link,
+        "Link field value " + Twine(Link) + " in section " + Name +
+            " is invalid",
+        "Link field value " + Twine(Link) + " in section " + Name +
+            " is not a symbol table"));
 
   if (Info != SHN_UNDEF)
     setSection(SecTable.getSection(Info, "Info field value " + Twine(Info) +
@@ -393,7 +568,8 @@ void RelocSectionWithSymtabBase<SymTabType>::initialize(
 
 template <class SymTabType>
 void RelocSectionWithSymtabBase<SymTabType>::finalize() {
-  this->Link = Symbols->Index;
+  this->Link = Symbols ? Symbols->Index : 0;
+
   if (SecToApplyRel != nullptr)
     this->Info = SecToApplyRel->Index;
 }
@@ -429,11 +605,15 @@ void RelocationSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void RelocationSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 void RelocationSection::removeSymbols(
     function_ref<bool(const Symbol &)> ToRemove) {
   for (const Relocation &Reloc : Relocations)
     if (ToRemove(*Reloc.RelocSymbol))
-      error("not stripping symbol `" + Reloc.RelocSymbol->Name +
+      error("not stripping symbol '" + Reloc.RelocSymbol->Name +
             "' because it is named in a relocation");
 }
 
@@ -443,7 +623,7 @@ void RelocationSection::markSymbols() {
 }
 
 void SectionWriter::visit(const DynamicRelocationSection &Sec) {
-  std::copy(std::begin(Sec.Contents), std::end(Sec.Contents),
+  llvm::copy(Sec.Contents,
             Out.getBufferStart() + Sec.Offset);
 }
 
@@ -451,6 +631,10 @@ void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 void Section::removeSectionReferences(const SectionBase *Sec) {
   if (LinkSection == Sec) {
     error("Section " + LinkSection->Name +
@@ -506,12 +690,12 @@ void GnuDebugLinkSection::init(StringRef File, StringRef Data) {
   // establish the order that sections should go in. By using the maximum
   // possible offset we cause this section to wind up at the end.
   OriginalOffset = std::numeric_limits<uint64_t>::max();
-  JamCRC crc;
-  crc.update(ArrayRef<char>(Data.data(), Data.size()));
+  JamCRC CRC;
+  CRC.update(ArrayRef<char>(Data.data(), Data.size()));
   // The CRC32 value needs to be complemented because the JamCRC dosn't
   // finalize the CRC32 value. It also dosn't negate the initial CRC32 value
   // but it starts by default at 0xFFFFFFFF which is the complement of zero.
-  CRC32 = ~crc.getCRC();
+  CRC32 = ~CRC.getCRC();
 }
 
 GnuDebugLinkSection::GnuDebugLinkSection(StringRef File) : FileName(File) {
@@ -530,13 +714,17 @@ void ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) {
   Elf_Word *CRC =
       reinterpret_cast<Elf_Word *>(Buf + Sec.Size - sizeof(Elf_Word));
   *CRC = Sec.CRC32;
-  std::copy(std::begin(Sec.FileName), std::end(Sec.FileName), File);
+  llvm::copy(Sec.FileName, File);
 }
 
 void GnuDebugLinkSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void GnuDebugLinkSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
   ELF::Elf32_Word *Buf =
@@ -550,6 +738,10 @@ void GroupSection::accept(SectionVisitor &Visitor) const {
   Visitor.visit(*this);
 }
 
+void GroupSection::accept(MutableSectionVisitor &Visitor) {
+  Visitor.visit(*this);
+}
+
 // Returns true IFF a section is wholly inside the range of a segment
 static bool sectionWithinSegment(const SectionBase &Section,
                                  const Segment &Segment) {
@@ -589,6 +781,79 @@ static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) {
   return A->Index < B->Index;
 }
 
+void BinaryELFBuilder::initFileHeader() {
+  Obj->Flags = 0x0;
+  Obj->Type = ET_REL;
+  Obj->OSABI = ELFOSABI_NONE;
+  Obj->ABIVersion = 0;
+  Obj->Entry = 0x0;
+  Obj->Machine = EMachine;
+  Obj->Version = 1;
+}
+
+void BinaryELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; }
+
+StringTableSection *BinaryELFBuilder::addStrTab() {
+  auto &StrTab = Obj->addSection<StringTableSection>();
+  StrTab.Name = ".strtab";
+
+  Obj->SectionNames = &StrTab;
+  return &StrTab;
+}
+
+SymbolTableSection *BinaryELFBuilder::addSymTab(StringTableSection *StrTab) {
+  auto &SymTab = Obj->addSection<SymbolTableSection>();
+
+  SymTab.Name = ".symtab";
+  SymTab.Link = StrTab->Index;
+
+  // The symbol table always needs a null symbol
+  SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0);
+
+  Obj->SymbolTable = &SymTab;
+  return &SymTab;
+}
+
+void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
+  auto Data = ArrayRef<uint8_t>(
+      reinterpret_cast<const uint8_t *>(MemBuf->getBufferStart()),
+      MemBuf->getBufferSize());
+  auto &DataSection = Obj->addSection<Section>(Data);
+  DataSection.Name = ".data";
+  DataSection.Type = ELF::SHT_PROGBITS;
+  DataSection.Size = Data.size();
+  DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+
+  std::string SanitizedFilename = MemBuf->getBufferIdentifier().str();
+  std::replace_if(std::begin(SanitizedFilename), std::end(SanitizedFilename),
+                  [](char C) { return !isalnum(C); }, '_');
+  Twine Prefix = Twine("_binary_") + SanitizedFilename;
+
+  SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection,
+                    /*Value=*/0, STV_DEFAULT, 0, 0);
+  SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection,
+                    /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0);
+  SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr,
+                    /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0);
+}
+
+void BinaryELFBuilder::initSections() {
+  for (auto &Section : Obj->sections()) {
+    Section.initialize(Obj->sections());
+  }
+}
+
+std::unique_ptr<Object> BinaryELFBuilder::build() {
+  initFileHeader();
+  initHeaderSegment();
+  StringTableSection *StrTab = addStrTab();
+  SymbolTableSection *SymTab = addSymTab(StrTab);
+  initSections();
+  addData(SymTab);
+
+  return std::move(Obj);
+}
+
 template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
   for (auto &Parent : Obj.segments()) {
     // Every segment will overlap with itself but we don't want a segment to
@@ -633,15 +898,6 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
   }
 
   auto &ElfHdr = Obj.ElfHdrSegment;
-  // Creating multiple PT_PHDR segments technically is not valid, but PT_LOAD
-  // segments must not overlap, and other types fit even less.
-  ElfHdr.Type = PT_PHDR;
-  ElfHdr.Flags = 0;
-  ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
-  ElfHdr.VAddr = 0;
-  ElfHdr.PAddr = 0;
-  ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
-  ElfHdr.Align = 0;
   ElfHdr.Index = Index++;
 
   const auto &Ehdr = *ElfFile.getHeader();
@@ -725,8 +981,7 @@ void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) {
       Elf_Word Index = ShndxData[&Sym - Symbols.begin()];
       DefSection = Obj.sections().getSection(
           Index,
-          "Symbol '" + Name + "' has invalid section index " +
-              Twine(Index));
+          "Symbol '" + Name + "' has invalid section index " + Twine(Index));
     } else if (Sym.st_shndx >= SHN_LORESERVE) {
       if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) {
         error(
@@ -828,10 +1083,20 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
   }
   case SHT_NOBITS:
     return Obj.addSection<Section>(Data);
-  default:
+  default: {
     Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
+
+    if (isDataGnuCompressed(Data) || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
+      uint64_t DecompressedSize, DecompressedAlign;
+      std::tie(DecompressedSize, DecompressedAlign) =
+          getDecompressedSizeAndAlignment<ELFT>(Data);
+      return Obj.addSection<CompressedSection>(Data, DecompressedSize,
+                                               DecompressedAlign);
+    }
+
     return Obj.addSection<Section>(Data);
   }
+  }
 }
 
 template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
@@ -854,6 +1119,9 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
     Sec.Align = Shdr.sh_addralign;
     Sec.EntrySize = Shdr.sh_entsize;
     Sec.Index = Index++;
+    Sec.OriginalData =
+        ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
+                          (Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size);
   }
 
   // If a section index table exists we'll need to initialize it before we
@@ -894,7 +1162,8 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
 template <class ELFT> void ELFBuilder<ELFT>::build() {
   const auto &Ehdr = *ElfFile.getHeader();
 
-  std::copy(Ehdr.e_ident, Ehdr.e_ident + 16, Obj.Ident);
+  Obj.OSABI = Ehdr.e_ident[EI_OSABI];
+  Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
   Obj.Type = Ehdr.e_type;
   Obj.Machine = Ehdr.e_machine;
   Obj.Version = Ehdr.e_version;
@@ -926,34 +1195,26 @@ Writer::~Writer() {}
 
 Reader::~Reader() {}
 
-ElfType ELFReader::getElfType() const {
-  if (isa<ELFObjectFile<ELF32LE>>(Bin))
-    return ELFT_ELF32LE;
-  if (isa<ELFObjectFile<ELF64LE>>(Bin))
-    return ELFT_ELF64LE;
-  if (isa<ELFObjectFile<ELF32BE>>(Bin))
-    return ELFT_ELF32BE;
-  if (isa<ELFObjectFile<ELF64BE>>(Bin))
-    return ELFT_ELF64BE;
-  llvm_unreachable("Invalid ELFType");
+std::unique_ptr<Object> BinaryReader::create() const {
+  return BinaryELFBuilder(MInfo.EMachine, MemBuf).build();
 }
 
 std::unique_ptr<Object> ELFReader::create() const {
   auto Obj = llvm::make_unique<Object>();
-  if (auto *o = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
-    ELFBuilder<ELF32LE> Builder(*o, *Obj);
+  if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
+    ELFBuilder<ELF32LE> Builder(*O, *Obj);
     Builder.build();
     return Obj;
-  } else if (auto *o = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
-    ELFBuilder<ELF64LE> Builder(*o, *Obj);
+  } else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
+    ELFBuilder<ELF64LE> Builder(*O, *Obj);
     Builder.build();
     return Obj;
-  } else if (auto *o = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
-    ELFBuilder<ELF32BE> Builder(*o, *Obj);
+  } else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
+    ELFBuilder<ELF32BE> Builder(*O, *Obj);
     Builder.build();
     return Obj;
-  } else if (auto *o = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
-    ELFBuilder<ELF64BE> Builder(*o, *Obj);
+  } else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
+    ELFBuilder<ELF64BE> Builder(*O, *Obj);
     Builder.build();
     return Obj;
   }
@@ -963,18 +1224,31 @@ std::unique_ptr<Object> ELFReader::create() const {
 template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
   uint8_t *B = Buf.getBufferStart();
   Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(B);
-  std::copy(Obj.Ident, Obj.Ident + 16, Ehdr.e_ident);
+  std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0);
+  Ehdr.e_ident[EI_MAG0] = 0x7f;
+  Ehdr.e_ident[EI_MAG1] = 'E';
+  Ehdr.e_ident[EI_MAG2] = 'L';
+  Ehdr.e_ident[EI_MAG3] = 'F';
+  Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
+  Ehdr.e_ident[EI_DATA] =
+      ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB;
+  Ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+  Ehdr.e_ident[EI_OSABI] = Obj.OSABI;
+  Ehdr.e_ident[EI_ABIVERSION] = Obj.ABIVersion;
+
   Ehdr.e_type = Obj.Type;
   Ehdr.e_machine = Obj.Machine;
   Ehdr.e_version = Obj.Version;
   Ehdr.e_entry = Obj.Entry;
-  Ehdr.e_phoff = Obj.ProgramHdrSegment.Offset;
+  // We have to use the fully-qualified name llvm::size
+  // since some compilers complain on ambiguous resolution.
+  Ehdr.e_phnum = llvm::size(Obj.segments());
+  Ehdr.e_phoff = (Ehdr.e_phnum != 0) ? Obj.ProgramHdrSegment.Offset : 0;
+  Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0;
   Ehdr.e_flags = Obj.Flags;
   Ehdr.e_ehsize = sizeof(Elf_Ehdr);
-  Ehdr.e_phentsize = sizeof(Elf_Phdr);
-  Ehdr.e_phnum = size(Obj.segments());
-  Ehdr.e_shentsize = sizeof(Elf_Shdr);
-  if (WriteSectionHeaders) {
+  if (WriteSectionHeaders && size(Obj.sections()) != 0) {
+    Ehdr.e_shentsize = sizeof(Elf_Shdr);
     Ehdr.e_shoff = Obj.SHOffset;
     // """
     // If the number of sections is greater than or equal to
@@ -998,6 +1272,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
     else
       Ehdr.e_shstrndx = Obj.SectionNames->Index;
   } else {
+    Ehdr.e_shentsize = 0;
     Ehdr.e_shoff = 0;
     Ehdr.e_shnum = 0;
     Ehdr.e_shstrndx = 0;
@@ -1106,7 +1381,7 @@ static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
 }
 
 // Orders segments such that if x = y->ParentSegment then y comes before x.
-static void OrderSegments(std::vector<Segment *> &Segments) {
+static void orderSegments(std::vector<Segment *> &Segments) {
   std::stable_sort(std::begin(Segments), std::end(Segments),
                    compareSegmentsByOffset);
 }
@@ -1148,7 +1423,7 @@ static uint64_t LayoutSegments(std::vector<Segment *> &Segments,
 // sections had a ParentSegment or an offset one past the last section if there
 // was a section that didn't have a ParentSegment.
 template <class Range>
-static uint64_t LayoutSections(Range Sections, uint64_t Offset) {
+static uint64_t layoutSections(Range Sections, uint64_t Offset) {
   // Now the offset of every segment has been set we can assign the offsets
   // of each section. For sections that are covered by a segment we should use
   // the segment's original offset and the section's original offset to compute
@@ -1172,6 +1447,17 @@ static uint64_t LayoutSections(Range Sections, uint64_t Offset) {
   return Offset;
 }
 
+template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
+  auto &ElfHdr = Obj.ElfHdrSegment;
+  ElfHdr.Type = PT_PHDR;
+  ElfHdr.Flags = 0;
+  ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
+  ElfHdr.VAddr = 0;
+  ElfHdr.PAddr = 0;
+  ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
+  ElfHdr.Align = 0;
+}
+
 template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
   // We need a temporary list of segments that has a special order to it
   // so that we know that anytime ->ParentSegment is set that segment has
@@ -1181,17 +1467,17 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
     OrderedSegments.push_back(&Segment);
   OrderedSegments.push_back(&Obj.ElfHdrSegment);
   OrderedSegments.push_back(&Obj.ProgramHdrSegment);
-  OrderSegments(OrderedSegments);
+  orderSegments(OrderedSegments);
   // Offset is used as the start offset of the first segment to be laid out.
   // Since the ELF Header (ElfHdrSegment) must be at the start of the file,
   // we start at offset 0.
   uint64_t Offset = 0;
   Offset = LayoutSegments(OrderedSegments, Offset);
-  Offset = LayoutSections(Obj.sections(), Offset);
+  Offset = layoutSections(Obj.sections(), Offset);
   // If we need to write the section header table out then we need to align the
   // Offset so that SHOffset is valid.
   if (WriteSectionHeaders)
-    Offset = alignTo(Offset, sizeof(typename ELFT::Addr));
+    Offset = alignTo(Offset, sizeof(Elf_Addr));
   Obj.SHOffset = Offset;
 }
 
@@ -1263,10 +1549,17 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
       Obj.SectionNames->addString(Section.Name);
     }
 
+  initEhdrSegment();
+
   // Before we can prepare for layout the indexes need to be finalized.
+  // Also, the output arch may not be the same as the input arch, so fix up
+  // size-related fields before doing layout calculations.
   uint64_t Index = 0;
-  for (auto &Sec : Obj.sections())
+  auto SecSizer = llvm::make_unique<ELFSectionSizer<ELFT>>();
+  for (auto &Sec : Obj.sections()) {
     Sec.Index = Index++;
+    Sec.accept(*SecSizer);
+  }
 
   // The symbol table does not update all other sections on update. For
   // instance, symbol names are not added as new symbols are added. This means
@@ -1324,10 +1617,10 @@ void BinaryWriter::finalize() {
   // loading and physical addresses are intended for ROM loading.
   // However, if no segment has a physical address, we'll fallback to using
   // virtual addresses for all.
-  if (std::all_of(std::begin(OrderedSegments), std::end(OrderedSegments),
-                  [](const Segment *Segment) { return Segment->PAddr == 0; }))
-    for (const auto &Segment : OrderedSegments)
-      Segment->PAddr = Segment->VAddr;
+  if (all_of(OrderedSegments,
+             [](const Segment *Seg) { return Seg->PAddr == 0; }))
+    for (Segment *Seg : OrderedSegments)
+      Seg->PAddr = Seg->VAddr;
 
   std::stable_sort(std::begin(OrderedSegments), std::end(OrderedSegments),
                    compareSegmentsByPAddr);
@@ -1342,8 +1635,8 @@ void BinaryWriter::finalize() {
   uint64_t Offset = 0;
 
   // Modify the first segment so that there is no gap at the start. This allows
-  // our layout algorithm to proceed as expected while not out writing out the
-  // gap at the start.
+  // our layout algorithm to proceed as expected while not writing out the gap
+  // at the start.
   if (!OrderedSegments.empty()) {
     auto Seg = OrderedSegments[0];
     auto Sec = Seg->firstSection();
@@ -1371,7 +1664,7 @@ void BinaryWriter::finalize() {
       continue;
     AllocatedSections.push_back(&Section);
   }
-  LayoutSections(make_pointee_range(AllocatedSections), Offset);
+  layoutSections(make_pointee_range(AllocatedSections), Offset);
 
   // Now that every section has been laid out we just need to compute the total
   // file size. This might not be the same as the offset returned by
@@ -1387,9 +1680,6 @@ void BinaryWriter::finalize() {
   SecWriter = llvm::make_unique<BinarySectionWriter>(Buf);
 }
 
-namespace llvm {
-namespace objcopy {
-
 template class ELFBuilder<ELF64LE>;
 template class ELFBuilder<ELF64BE>;
 template class ELFBuilder<ELF32LE>;
@@ -1399,5 +1689,7 @@ template class ELFWriter<ELF64LE>;
 template class ELFWriter<ELF64BE>;
 template class ELFWriter<ELF32LE>;
 template class ELFWriter<ELF32BE>;
+
+} // end namespace elf
 } // end namespace objcopy
 } // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-objcopy/Object.h b/contrib/llvm/tools/llvm-objcopy/ELF/Object.h
index 76748d5fc641..e5730cd543ee 100644
--- a/contrib/llvm/tools/llvm-objcopy/Object.h
+++ b/contrib/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -10,6 +10,8 @@
 #ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H
 #define LLVM_TOOLS_OBJCOPY_OBJECT_H
 
+#include "Buffer.h"
+#include "CopyConfig.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -26,9 +28,10 @@
 #include <vector>
 
 namespace llvm {
+enum class DebugCompressionType;
 namespace objcopy {
+namespace elf {
 
-class Buffer;
 class SectionBase;
 class Section;
 class OwnedDataSection;
@@ -39,6 +42,8 @@ class DynamicRelocationSection;
 class GnuDebugLinkSection;
 class GroupSection;
 class SectionIndexSection;
+class CompressedSection;
+class DecompressedSection;
 class Segment;
 class Object;
 struct Symbol;
@@ -66,7 +71,7 @@ enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
 
 class SectionVisitor {
 public:
-  virtual ~SectionVisitor();
+  virtual ~SectionVisitor() = default;
 
   virtual void visit(const Section &Sec) = 0;
   virtual void visit(const OwnedDataSection &Sec) = 0;
@@ -77,6 +82,25 @@ public:
   virtual void visit(const GnuDebugLinkSection &Sec) = 0;
   virtual void visit(const GroupSection &Sec) = 0;
   virtual void visit(const SectionIndexSection &Sec) = 0;
+  virtual void visit(const CompressedSection &Sec) = 0;
+  virtual void visit(const DecompressedSection &Sec) = 0;
+};
+
+class MutableSectionVisitor {
+public:
+  virtual ~MutableSectionVisitor() = default;
+
+  virtual void visit(Section &Sec) = 0;
+  virtual void visit(OwnedDataSection &Sec) = 0;
+  virtual void visit(StringTableSection &Sec) = 0;
+  virtual void visit(SymbolTableSection &Sec) = 0;
+  virtual void visit(RelocationSection &Sec) = 0;
+  virtual void visit(DynamicRelocationSection &Sec) = 0;
+  virtual void visit(GnuDebugLinkSection &Sec) = 0;
+  virtual void visit(GroupSection &Sec) = 0;
+  virtual void visit(SectionIndexSection &Sec) = 0;
+  virtual void visit(CompressedSection &Sec) = 0;
+  virtual void visit(DecompressedSection &Sec) = 0;
 };
 
 class SectionWriter : public SectionVisitor {
@@ -95,6 +119,8 @@ public:
   virtual void visit(const GnuDebugLinkSection &Sec) override = 0;
   virtual void visit(const GroupSection &Sec) override = 0;
   virtual void visit(const SectionIndexSection &Sec) override = 0;
+  virtual void visit(const CompressedSection &Sec) override = 0;
+  virtual void visit(const DecompressedSection &Sec) override = 0;
 
   explicit SectionWriter(Buffer &Buf) : Out(Buf) {}
 };
@@ -104,6 +130,7 @@ private:
   using Elf_Word = typename ELFT::Word;
   using Elf_Rel = typename ELFT::Rel;
   using Elf_Rela = typename ELFT::Rela;
+  using Elf_Sym = typename ELFT::Sym;
 
 public:
   virtual ~ELFSectionWriter() {}
@@ -112,13 +139,38 @@ public:
   void visit(const GnuDebugLinkSection &Sec) override;
   void visit(const GroupSection &Sec) override;
   void visit(const SectionIndexSection &Sec) override;
+  void visit(const CompressedSection &Sec) override;
+  void visit(const DecompressedSection &Sec) override;
 
   explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
 };
 
+template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
+private:
+  using Elf_Rel = typename ELFT::Rel;
+  using Elf_Rela = typename ELFT::Rela;
+  using Elf_Sym = typename ELFT::Sym;
+  using Elf_Word = typename ELFT::Word;
+  using Elf_Xword = typename ELFT::Xword;
+
+public:
+  void visit(Section &Sec) override;
+  void visit(OwnedDataSection &Sec) override;
+  void visit(StringTableSection &Sec) override;
+  void visit(DynamicRelocationSection &Sec) override;
+  void visit(SymbolTableSection &Sec) override;
+  void visit(RelocationSection &Sec) override;
+  void visit(GnuDebugLinkSection &Sec) override;
+  void visit(GroupSection &Sec) override;
+  void visit(SectionIndexSection &Sec) override;
+  void visit(CompressedSection &Sec) override;
+  void visit(DecompressedSection &Sec) override;
+};
+
 #define MAKE_SEC_WRITER_FRIEND                                                 \
   friend class SectionWriter;                                                  \
-  template <class ELFT> friend class ELFSectionWriter;
+  template <class ELFT> friend class ELFSectionWriter;                         \
+  template <class ELFT> friend class ELFSectionSizer;
 
 class BinarySectionWriter : public SectionWriter {
 public:
@@ -129,52 +181,12 @@ public:
   void visit(const GnuDebugLinkSection &Sec) override;
   void visit(const GroupSection &Sec) override;
   void visit(const SectionIndexSection &Sec) override;
+  void visit(const CompressedSection &Sec) override;
+  void visit(const DecompressedSection &Sec) override;
 
   explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
 };
 
-// The class Buffer abstracts out the common interface of FileOutputBuffer and
-// WritableMemoryBuffer so that the hierarchy of Writers depends on this
-// abstract interface and doesn't depend on a particular implementation.
-// TODO: refactor the buffer classes in LLVM to enable us to use them here
-// directly.
-class Buffer {
-  StringRef Name;
-
-public:
-  virtual ~Buffer();
-  virtual void allocate(size_t Size) = 0;
-  virtual uint8_t *getBufferStart() = 0;
-  virtual Error commit() = 0;
-
-  explicit Buffer(StringRef Name) : Name(Name) {}
-  StringRef getName() const { return Name; }
-};
-
-class FileBuffer : public Buffer {
-  std::unique_ptr<FileOutputBuffer> Buf;
-
-public:
-  void allocate(size_t Size) override;
-  uint8_t *getBufferStart() override;
-  Error commit() override;
-
-  explicit FileBuffer(StringRef FileName) : Buffer(FileName) {}
-};
-
-class MemBuffer : public Buffer {
-  std::unique_ptr<WritableMemoryBuffer> Buf;
-
-public:
-  void allocate(size_t Size) override;
-  uint8_t *getBufferStart() override;
-  Error commit() override;
-
-  explicit MemBuffer(StringRef Name) : Buffer(Name) {}
-
-  std::unique_ptr<WritableMemoryBuffer> releaseMemoryBuffer();
-};
-
 class Writer {
 protected:
   Object &Obj;
@@ -190,10 +202,13 @@ public:
 
 template <class ELFT> class ELFWriter : public Writer {
 private:
+  using Elf_Addr = typename ELFT::Addr;
   using Elf_Shdr = typename ELFT::Shdr;
   using Elf_Phdr = typename ELFT::Phdr;
   using Elf_Ehdr = typename ELFT::Ehdr;
 
+  void initEhdrSegment();
+
   void writeEhdr();
   void writePhdr(const Segment &Seg);
   void writeShdr(const SectionBase &Sec);
@@ -233,7 +248,7 @@ public:
 
 class SectionBase {
 public:
-  StringRef Name;
+  std::string Name;
   Segment *ParentSegment = nullptr;
   uint64_t HeaderOffset;
   uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
@@ -250,6 +265,10 @@ public:
   uint64_t Offset = 0;
   uint64_t Size = 0;
   uint64_t Type = ELF::SHT_NULL;
+  ArrayRef<uint8_t> OriginalData;
+
+  SectionBase() = default;
+  SectionBase(const SectionBase &) = default;
 
   virtual ~SectionBase() = default;
 
@@ -258,6 +277,7 @@ public:
   virtual void removeSectionReferences(const SectionBase *Sec);
   virtual void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
   virtual void accept(SectionVisitor &Visitor) const = 0;
+  virtual void accept(MutableSectionVisitor &Visitor) = 0;
   virtual void markSymbols();
 };
 
@@ -275,21 +295,21 @@ private:
   };
 
   std::set<const SectionBase *, SectionCompare> Sections;
-  ArrayRef<uint8_t> Contents;
 
 public:
-  uint64_t Align;
-  uint64_t FileSize;
+  uint32_t Type;
   uint32_t Flags;
-  uint32_t Index;
-  uint64_t MemSize;
   uint64_t Offset;
-  uint64_t PAddr;
-  uint64_t Type;
   uint64_t VAddr;
+  uint64_t PAddr;
+  uint64_t FileSize;
+  uint64_t MemSize;
+  uint64_t Align;
 
+  uint32_t Index;
   uint64_t OriginalOffset;
   Segment *ParentSegment = nullptr;
+  ArrayRef<uint8_t> Contents;
 
   explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
   Segment() {}
@@ -314,6 +334,7 @@ public:
   explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
 
   void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
   void removeSectionReferences(const SectionBase *Sec) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
@@ -327,13 +348,57 @@ class OwnedDataSection : public SectionBase {
 public:
   OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
       : Data(std::begin(Data), std::end(Data)) {
-    Name = SecName;
+    Name = SecName.str();
     Type = ELF::SHT_PROGBITS;
     Size = Data.size();
     OriginalOffset = std::numeric_limits<uint64_t>::max();
   }
 
   void accept(SectionVisitor &Sec) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
+};
+
+class CompressedSection : public SectionBase {
+  MAKE_SEC_WRITER_FRIEND
+
+  DebugCompressionType CompressionType;
+  uint64_t DecompressedSize;
+  uint64_t DecompressedAlign;
+  SmallVector<char, 128> CompressedData;
+
+public:
+  CompressedSection(const SectionBase &Sec,
+                    DebugCompressionType CompressionType);
+  CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
+                    uint64_t DecompressedAlign);
+
+  uint64_t getDecompressedSize() const { return DecompressedSize; }
+  uint64_t getDecompressedAlign() const { return DecompressedAlign; }
+
+  void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
+
+  static bool classof(const SectionBase *S) {
+    return (S->Flags & ELF::SHF_COMPRESSED) ||
+           (StringRef(S->Name).startswith(".zdebug"));
+  }
+};
+
+class DecompressedSection : public SectionBase {
+  MAKE_SEC_WRITER_FRIEND
+
+public:
+  explicit DecompressedSection(const CompressedSection &Sec)
+      : SectionBase(Sec) {
+    Size = Sec.getDecompressedSize();
+    Align = Sec.getDecompressedAlign();
+    Flags = (Flags & ~ELF::SHF_COMPRESSED);
+    if (StringRef(Name).startswith(".zdebug"))
+      Name = "." + Name.substr(2);
+  }
+
+  void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
 };
 
 // There are two types of string tables that can exist, dynamic and not dynamic.
@@ -358,6 +423,7 @@ public:
   uint32_t findIndex(StringRef Name) const;
   void finalize() override;
   void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
 
   static bool classof(const SectionBase *S) {
     if (S->Flags & ELF::SHF_ALLOC)
@@ -386,7 +452,7 @@ struct Symbol {
   SectionBase *DefinedIn = nullptr;
   SymbolShndxType ShndxType;
   uint32_t Index;
-  StringRef Name;
+  std::string Name;
   uint32_t NameIndex;
   uint64_t Size;
   uint8_t Type;
@@ -395,6 +461,7 @@ struct Symbol {
   bool Referenced = false;
 
   uint16_t getShndx() const;
+  bool isCommon() const;
 };
 
 class SectionIndexSection : public SectionBase {
@@ -414,6 +481,7 @@ public:
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
   void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
 
   SectionIndexSection() {
     Name = ".symtab_shndx";
@@ -437,9 +505,11 @@ protected:
   using SymPtr = std::unique_ptr<Symbol>;
 
 public:
-  void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
-                 SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility,
-                 uint16_t Shndx, uint64_t Sz);
+  SymbolTableSection() { Type = ELF::SHT_SYMTAB; }
+
+  void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
+                 uint64_t Value, uint8_t Visibility, uint16_t Shndx,
+                 uint64_t Size);
   void prepareForLayout();
   // An 'empty' symbol table still contains a null symbol.
   bool empty() const { return Symbols.size() == 1; }
@@ -456,6 +526,7 @@ public:
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
   void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
   void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
 
   static bool classof(const SectionBase *S) {
@@ -517,6 +588,7 @@ class RelocationSection
 public:
   void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
   void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
   void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
   void markSymbols() override;
 
@@ -549,8 +621,8 @@ public:
   void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
   void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
 
-  void initialize(SectionTableRef SecTable) override{};
   void accept(SectionVisitor &) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
   void finalize() override;
   void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
   void markSymbols() override;
@@ -589,6 +661,7 @@ public:
   explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
 
   void accept(SectionVisitor &) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
 
   static bool classof(const SectionBase *S) {
     if (!(S->Flags & ELF::SHF_ALLOC))
@@ -610,6 +683,7 @@ public:
   // If we add this section from an external source we can use this ctor.
   explicit GnuDebugLinkSection(StringRef File);
   void accept(SectionVisitor &Visitor) const override;
+  void accept(MutableSectionVisitor &Visitor) override;
 };
 
 class Reader {
@@ -623,11 +697,29 @@ using object::ELFFile;
 using object::ELFObjectFile;
 using object::OwningBinary;
 
+class BinaryELFBuilder {
+  uint16_t EMachine;
+  MemoryBuffer *MemBuf;
+  std::unique_ptr<Object> Obj;
+
+  void initFileHeader();
+  void initHeaderSegment();
+  StringTableSection *addStrTab();
+  SymbolTableSection *addSymTab(StringTableSection *StrTab);
+  void addData(SymbolTableSection *SymTab);
+  void initSections();
+
+public:
+  BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB)
+      : EMachine(EM), MemBuf(MB), Obj(llvm::make_unique<Object>()) {}
+
+  std::unique_ptr<Object> build();
+};
+
 template <class ELFT> class ELFBuilder {
 private:
   using Elf_Addr = typename ELFT::Addr;
   using Elf_Shdr = typename ELFT::Shdr;
-  using Elf_Ehdr = typename ELFT::Ehdr;
   using Elf_Word = typename ELFT::Word;
 
   const ELFFile<ELFT> &ElfFile;
@@ -647,13 +739,22 @@ public:
   void build();
 };
 
+class BinaryReader : public Reader {
+  const MachineInfo &MInfo;
+  MemoryBuffer *MemBuf;
+
+public:
+  BinaryReader(const MachineInfo &MI, MemoryBuffer *MB)
+      : MInfo(MI), MemBuf(MB) {}
+  std::unique_ptr<Object> create() const override;
+};
+
 class ELFReader : public Reader {
   Binary *Bin;
 
 public:
-  ElfType getElfType() const;
   std::unique_ptr<Object> create() const override;
-  explicit ELFReader(Binary *B) : Bin(B){};
+  explicit ELFReader(Binary *B) : Bin(B) {}
 };
 
 class Object {
@@ -682,7 +783,8 @@ public:
   Segment ElfHdrSegment;
   Segment ProgramHdrSegment;
 
-  uint8_t Ident[16];
+  uint8_t OSABI;
+  uint8_t ABIVersion;
   uint64_t Entry;
   uint64_t SHOffset;
   uint32_t Type;
@@ -708,6 +810,7 @@ public:
     auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
     auto Ptr = Sec.get();
     Sections.emplace_back(std::move(Sec));
+    Ptr->Index = Sections.size();
     return *Ptr;
   }
   Segment &addSegment(ArrayRef<uint8_t> Data) {
@@ -715,6 +818,8 @@ public:
     return *Segments.back();
   }
 };
+
+} // end namespace elf
 } // end namespace objcopy
 } // end namespace llvm
 
diff --git a/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 2af2108d98d3..1f7e64e4091c 100644
--- a/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/contrib/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -1,55 +1,98 @@
 include "llvm/Option/OptParser.td"
 
-multiclass Eq<string name> {
-  def NAME: Separate<["--", "-"], name>;
-  def NAME # _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>;
+multiclass Eq<string name, string help> {
+  def NAME : Separate<["--", "-"], name>;
+  def NAME #_eq : Joined<["--", "-"], name #"=">,
+                  Alias<!cast<Separate>(NAME)>,
+                  HelpText<help>;
 }
 
 def help : Flag<["-", "--"], "help">;
-defm binary_architecture : Eq<"binary-architecture">,
-                           HelpText<"Used when transforming an architecture-less format (such as binary) to another format">;
-def B : JoinedOrSeparate<["-"], "B">,
-        Alias<binary_architecture>;
-defm input_target : Eq<"input-target">,
-                    HelpText<"Format of the input file">,
+
+defm binary_architecture
+    : Eq<"binary-architecture", "Used when transforming an architecture-less "
+                                "format (such as binary) to another format">;
+def B : JoinedOrSeparate<["-"], "B">, Alias<binary_architecture>;
+
+defm target : Eq<"target", "Format of the input and output file">,
+              Values<"binary">;
+def F : JoinedOrSeparate<["-"], "F">, Alias<target>;
+
+defm input_target : Eq<"input-target", "Format of the input file">,
                     Values<"binary">;
-defm output_target : Eq<"output-target">,
-                     HelpText<"Format of the output file">,
+def I : JoinedOrSeparate<["-"], "I">, Alias<input_target>;
+
+defm output_target : Eq<"output-target", "Format of the output file">,
                      Values<"binary">;
-def O : JoinedOrSeparate<["-"], "O">,
-        Alias<output_target>;
-defm split_dwo : Eq<"split-dwo">,
-                 MetaVarName<"dwo-file">,
-                 HelpText<"Equivalent to extract-dwo on the input file to <dwo-file>, then strip-dwo on the input file">;
-defm add_gnu_debuglink : Eq<"add-gnu-debuglink">,
-                         MetaVarName<"debug-file">,
-                         HelpText<"Add a .gnu_debuglink for <debug-file>">;
-defm remove_section : Eq<"remove-section">,
-                      MetaVarName<"section">,
-                      HelpText<"Remove <section>">;
-defm rename_section : Eq<"rename-section">,
-                      MetaVarName<"old=new">,
-                      HelpText<"Renames a section from old to new">;
-defm redefine_symbol : Eq<"redefine-sym">,
-                       MetaVarName<"old=new">,
-                       HelpText<"Change the name of a symbol old to new">;
-def R : JoinedOrSeparate<["-"], "R">,
-        Alias<remove_section>;
-defm keep : Eq<"keep">,
-            MetaVarName<"section">,
-            HelpText<"Keep <section>">;
-defm only_keep : Eq<"only-keep">,
-                 MetaVarName<"section">,
-                 HelpText<"Remove all but <section>">;
-def j : JoinedOrSeparate<["-"], "j">,
-                      Alias<only_keep>;
-defm add_section : Eq<"add-section">,
-                   MetaVarName<"section=file">,
-                   HelpText<"Make a section named <section> with the contents of <file>.">;
-def strip_all : Flag<["-", "--"], "strip-all">,
-                HelpText<"Remove non-allocated sections other than .gnu.warning* sections">;
+def O : JoinedOrSeparate<["-"], "O">, Alias<output_target>;
+
+def compress_debug_sections : Flag<["--", "-"], "compress-debug-sections">;
+def compress_debug_sections_eq
+    : Joined<["--", "-"], "compress-debug-sections=">,
+      MetaVarName<"[ zlib | zlib-gnu ]">,
+      HelpText<"Compress DWARF debug sections using specified style. Supported "
+               "styles: 'zlib-gnu' and 'zlib'">;
+def decompress_debug_sections : Flag<["-", "--"], "decompress-debug-sections">,
+                                HelpText<"Decompress DWARF debug sections.">;
+defm split_dwo
+    : Eq<"split-dwo", "Equivalent to extract-dwo on the input file to "
+                      "<dwo-file>, then strip-dwo on the input file">,
+      MetaVarName<"dwo-file">;
+
+def enable_deterministic_archives
+    : Flag<["-", "--"], "enable-deterministic-archives">,
+      HelpText<"Enable deterministic mode when copying archives (use zero for "
+               "UIDs, GIDs, and timestamps).">;
+def D : Flag<["-"], "D">,
+        Alias<enable_deterministic_archives>,
+        HelpText<"Alias for --enable-deterministic-archives">;
+
+def disable_deterministic_archives
+    : Flag<["-", "--"], "disable-deterministic-archives">,
+      HelpText<"Disable deterministic mode when copying archives (use real "
+               "values for UIDs, GIDs, and timestamps).">;
+def U : Flag<["-"], "U">,
+        Alias<disable_deterministic_archives>,
+        HelpText<"Alias for --disable-deterministic-archives">;
+
+def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+                     HelpText<"Preserve access and modification timestamps">;
+def p : Flag<["-"], "p">, Alias<preserve_dates>;
+
+defm add_gnu_debuglink
+    : Eq<"add-gnu-debuglink", "Add a .gnu_debuglink for <debug-file>">,
+      MetaVarName<"debug-file">;
+
+defm remove_section : Eq<"remove-section", "Remove <section>">,
+                      MetaVarName<"section">;
+def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
+
+defm rename_section
+    : Eq<"rename-section",
+         "Renames a section from old to new, optionally with specified flags. "
+         "Flags supported for GNU compatibility: alloc, load, noload, "
+         "readonly, debug, code, data, rom, share, contents, merge, strings.">,
+      MetaVarName<"old=new[,flag1,...]">;
+defm redefine_symbol
+    : Eq<"redefine-sym", "Change the name of a symbol old to new">,
+      MetaVarName<"old=new">;
+defm keep_section : Eq<"keep-section", "Keep <section>">,
+                    MetaVarName<"section">;
+defm only_section : Eq<"only-section", "Remove all but <section>">,
+                    MetaVarName<"section">;
+def j : JoinedOrSeparate<["-"], "j">, Alias<only_section>;
+defm add_section
+    : Eq<"add-section",
+         "Make a section named <section> with the contents of <file>.">,
+      MetaVarName<"section=file">;
+
+def strip_all
+    : Flag<["-", "--"], "strip-all">,
+      HelpText<
+          "Remove non-allocated sections other than .gnu.warning* sections">;
+def S : Flag<["-"], "S">, Alias<strip_all>;
 def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
-                    HelpText<"Compaitable with GNU objcopy's --strip-all">;
+                    HelpText<"Compatible with GNU objcopy's --strip-all">;
 def strip_debug : Flag<["-", "--"], "strip-debug">,
                   HelpText<"Remove all debug information">;
 def strip_dwo : Flag<["-", "--"], "strip-dwo">,
@@ -58,42 +101,80 @@ def strip_sections : Flag<["-", "--"], "strip-sections">,
                      HelpText<"Remove all section headers">;
 def strip_non_alloc : Flag<["-", "--"], "strip-non-alloc">,
                       HelpText<"Remove all non-allocated sections">;
-def extract_dwo : Flag<["-", "--"], "extract-dwo">,
-                  HelpText<"Remove all sections that are not DWARF .dwo sections from file">;
-def localize_hidden : Flag<["-", "--"], "localize-hidden">,
-                      HelpText<"Mark all symbols that have hidden or internal visibility as local">;
-defm localize_symbol : Eq<"localize-symbol">,
-                       MetaVarName<"symbol">,
-                       HelpText<"Mark <symbol> as local">;
-def L : JoinedOrSeparate<["-"], "L">,
-        Alias<localize_symbol>;
-defm globalize_symbol : Eq<"globalize-symbol">,
-                       MetaVarName<"symbol">,
-                       HelpText<"Mark <symbol> as global">;
-defm weaken_symbol : Eq<"weaken-symbol">,
-                       MetaVarName<"symbol">,
-                       HelpText<"Mark <symbol> as weak">;
-def W : JoinedOrSeparate<["-"], "W">,
-        Alias<weaken_symbol>;
-def weaken : Flag<["-", "--"], "weaken">,
-                  HelpText<"Mark all global symbols as weak">;
-def discard_all : Flag<["-", "--"], "discard-all">,
-                      HelpText<"Remove all local symbols except file and section symbols">;
-def x : Flag<["-"], "x">,
-        Alias<discard_all>;
-defm strip_symbol : Eq<"strip-symbol">,
-                       MetaVarName<"symbol">,
-                       HelpText<"Remove symbol <symbol>">;
-def N : JoinedOrSeparate<["-"], "N">,
-        Alias<strip_symbol>;
-defm keep_symbol : Eq<"keep-symbol">,
-                       MetaVarName<"symbol">,
-                       HelpText<"Do not remove symbol <symbol>">;
-def K : JoinedOrSeparate<["-"], "K">,
-        Alias<keep_symbol>;
-def only_keep_debug : Flag<["-", "--"], "only-keep-debug">,
-                          HelpText<"Currently ignored. Only for compaitability with GNU objcopy.">;
 def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
-                      HelpText<"Remove all symbols not needed by relocations">;
+                     HelpText<"Remove all symbols not needed by relocations">;
+
+def extract_dwo
+    : Flag<["-", "--"], "extract-dwo">,
+      HelpText<
+          "Remove all sections that are not DWARF .dwo sections from file">;
+
+def localize_hidden
+    : Flag<["-", "--"], "localize-hidden">,
+      HelpText<
+          "Mark all symbols that have hidden or internal visibility as local">;
+defm localize_symbol : Eq<"localize-symbol", "Mark <symbol> as local">,
+                       MetaVarName<"symbol">;
+def L : JoinedOrSeparate<["-"], "L">, Alias<localize_symbol>;
+
+defm globalize_symbol : Eq<"globalize-symbol", "Mark <symbol> as global">,
+                        MetaVarName<"symbol">;
+defm keep_global_symbol
+    : Eq<"keep-global-symbol",
+         "Convert all symbols except <symbol> to local. May be repeated to "
+         "convert all except a set of symbols to local.">,
+      MetaVarName<"symbol">;
+def G : JoinedOrSeparate<["-"], "G">, Alias<keep_global_symbol>;
+
+defm keep_global_symbols
+    : Eq<"keep-global-symbols",
+         "Reads a list of symbols from <filename> and runs as if "
+         "--keep-global-symbol=<symbol> is set for each one. <filename> "
+         "contains one symbol per line and may contain comments beginning with "
+         "'#'. Leading and trailing whitespace is stripped from each line. May "
+         "be repeated to read symbols from many files.">,
+      MetaVarName<"filename">;
+
+defm weaken_symbol : Eq<"weaken-symbol", "Mark <symbol> as weak">,
+                     MetaVarName<"symbol">;
+def W : JoinedOrSeparate<["-"], "W">, Alias<weaken_symbol>;
+def weaken : Flag<["-", "--"], "weaken">,
+             HelpText<"Mark all global symbols as weak">;
+def discard_all
+    : Flag<["-", "--"], "discard-all">,
+      HelpText<"Remove all local symbols except file and section symbols">;
+def x : Flag<["-"], "x">, Alias<discard_all>;
+defm strip_symbol : Eq<"strip-symbol", "Remove symbol <symbol>">,
+                    MetaVarName<"symbol">;
+def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
+defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
+                   MetaVarName<"symbol">;
+def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
+def only_keep_debug
+    : Flag<["-", "--"], "only-keep-debug">,
+      HelpText<"Currently ignored. Only for compatibility with GNU objcopy.">;
 def keep_file_symbols : Flag<["-", "--"], "keep-file-symbols">,
-                      HelpText<"Do not remove file symbols">;
+                        HelpText<"Do not remove file symbols">;
+defm dump_section
+    : Eq<"dump-section",
+         "Dump contents of section named <section> into file <file>">,
+      MetaVarName<"section=file">;
+defm prefix_symbols
+    : Eq<"prefix-symbols", "Add <prefix> to the start of every symbol name">,
+      MetaVarName<"prefix">;
+
+def version : Flag<["-", "--"], "version">,
+              HelpText<"Print the version and exit.">;
+def V : Flag<["-"], "V">, Alias<version>;
+defm build_id_link_dir
+    : Eq<"build-id-link-dir", "Set directory for --build-id-link-input and "
+                              "--build-id-link-output to <dir>">,
+      MetaVarName<"dir">;
+defm build_id_link_input
+    : Eq<"build-id-link-input", "Hard-link the input to <dir>/xx/xxx<suffix> "
+                                "name derived from hex build ID">,
+      MetaVarName<"suffix">;
+defm build_id_link_output
+    : Eq<"build-id-link-output", "Hard-link the output to <dir>/xx/xxx<suffix> "
+                                 "name derived from hex build ID">,
+      MetaVarName<"suffix">;
diff --git a/contrib/llvm/tools/llvm-objcopy/StripOpts.td b/contrib/llvm/tools/llvm-objcopy/StripOpts.td
index 333b0d288efa..fa98e27e9321 100644
--- a/contrib/llvm/tools/llvm-objcopy/StripOpts.td
+++ b/contrib/llvm/tools/llvm-objcopy/StripOpts.td
@@ -1,49 +1,67 @@
 include "llvm/Option/OptParser.td"
 
-multiclass Eq<string name> {
-  def NAME: Separate<["--", "-"], name>;
-  def NAME # _eq: Joined<["--", "-"], name # "=">, Alias<!cast<Separate>(NAME)>;
+multiclass Eq<string name, string help> {
+  def NAME : Separate<["--", "-"], name>;
+  def NAME #_eq : Joined<["--", "-"], name #"=">,
+                  Alias<!cast<Separate>(NAME)>,
+                  HelpText<help>;
 }
 
 def help : Flag<["-", "--"], "help">;
 
-defm output : Eq<"o">,
-              MetaVarName<"output">,
-              HelpText<"Write output to <file>">;
+def enable_deterministic_archives
+    : Flag<["-", "--"], "enable-deterministic-archives">,
+      HelpText<"Enable deterministic mode when stripping archives (use zero "
+               "for UIDs, GIDs, and timestamps).">;
+def D : Flag<["-"], "D">,
+        Alias<enable_deterministic_archives>,
+        HelpText<"Alias for --enable-deterministic-archives">;
 
-def strip_all : Flag<["-", "--"], "strip-all">,
-                HelpText<"Remove non-allocated sections other than .gnu.warning* sections">;
+def disable_deterministic_archives
+    : Flag<["-", "--"], "disable-deterministic-archives">,
+      HelpText<"Disable deterministic mode when stripping archives (use real "
+               "values for UIDs, GIDs, and timestamps).">;
+def U : Flag<["-"], "U">,
+        Alias<disable_deterministic_archives>,
+        HelpText<"Alias for --disable-deterministic-archives">;
 
-def strip_debug : Flag<["-", "--"], "strip-debug">,
-                  HelpText<"Remove debugging symbols only">;
-
-def d : Flag<["-"], "d">,
-        Alias<strip_debug>;
-
-def g : Flag<["-"], "g">,
-        Alias<strip_debug>;
+defm output : Eq<"o", "Write output to <file>">, MetaVarName<"output">;
 
-def S : Flag<["-"], "S">,
-        Alias<strip_debug>;
+def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+                     HelpText<"Preserve access and modification timestamps">;
+def p : Flag<["-"], "p">, Alias<preserve_dates>;
 
-defm remove_section : Eq<"remove-section">,
-                      MetaVarName<"section">,
-                      HelpText<"Remove <section>">;
+def strip_all
+    : Flag<["-", "--"], "strip-all">,
+      HelpText<
+          "Remove non-allocated sections other than .gnu.warning* sections">;
+def s : Flag<["-"], "s">, Alias<strip_all>;
 
-def R : JoinedOrSeparate<["-"], "R">,
-        Alias<remove_section>;
+def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
+                    HelpText<"Compatible with GNU strip's --strip-all">;
+def strip_debug : Flag<["-", "--"], "strip-debug">,
+                  HelpText<"Remove debugging symbols only">;
+def d : Flag<["-"], "d">, Alias<strip_debug>;
+def g : Flag<["-"], "g">, Alias<strip_debug>;
+def S : Flag<["-"], "S">, Alias<strip_debug>;
+def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
+                     HelpText<"Remove all symbols not needed by relocations">;
 
-defm keep_symbol : Eq<"keep-symbol">,
-                   MetaVarName<"symbol">,
-                   HelpText<"Do not remove symbol <symbol>">;
+defm remove_section : Eq<"remove-section", "Remove <section>">,
+                      MetaVarName<"section">;
+def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
 
-def K : JoinedOrSeparate<["-"], "K">,
-        Alias<keep_symbol>;
+defm keep_section : Eq<"keep-section", "Keep <section>">,
+                    MetaVarName<"section">;
+defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
+                   MetaVarName<"symbol">;
+def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
 
-def discard_all : Flag<["-", "--"], "discard-all">,
-                  HelpText<"Remove all local symbols except file and section symbols">;
-def x : Flag<["-"], "x">,
-        Alias<discard_all>;
+def discard_all
+    : Flag<["-", "--"], "discard-all">,
+      HelpText<"Remove all local symbols except file and section symbols">;
+def x : Flag<["-"], "x">, Alias<discard_all>;
 
-def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
-                      HelpText<"Remove all symbols not needed by relocations">;
+def version : Flag<["-", "--"], "version">,
+              HelpText<"Print the version and exit.">;
+def V : Flag<["-"], "V">, Alias<version>;
diff --git a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index 21a1622db765..fb1ff18b015b 100644
--- a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -8,14 +8,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objcopy.h"
-#include "Object.h"
+#include "Buffer.h"
+#include "COFF/COFFObjcopy.h"
+#include "CopyConfig.h"
+#include "ELF/ELFObjcopy.h"
+
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ELFTypes.h"
 #include "llvm/Object/Error.h"
@@ -23,137 +28,23 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdlib>
-#include <functional>
-#include <iterator>
 #include <memory>
 #include <string>
 #include <system_error>
 #include <utility>
 
-using namespace llvm;
-using namespace llvm::objcopy;
-using namespace object;
-using namespace ELF;
-
-namespace {
-
-enum ObjcopyID {
-  OBJCOPY_INVALID = 0, // This is not an option ID.
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
-               HELPTEXT, METAVAR, VALUES)                                      \
-  OBJCOPY_##ID,
-#include "ObjcopyOpts.inc"
-#undef OPTION
-};
-
-#define PREFIX(NAME, VALUE) const char *const OBJCOPY_##NAME[] = VALUE;
-#include "ObjcopyOpts.inc"
-#undef PREFIX
-
-static const opt::OptTable::Info ObjcopyInfoTable[] = {
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
-               HELPTEXT, METAVAR, VALUES)                                      \
-  {OBJCOPY_##PREFIX,                                                           \
-   NAME,                                                                       \
-   HELPTEXT,                                                                   \
-   METAVAR,                                                                    \
-   OBJCOPY_##ID,                                                               \
-   opt::Option::KIND##Class,                                                   \
-   PARAM,                                                                      \
-   FLAGS,                                                                      \
-   OBJCOPY_##GROUP,                                                            \
-   OBJCOPY_##ALIAS,                                                            \
-   ALIASARGS,                                                                  \
-   VALUES},
-#include "ObjcopyOpts.inc"
-#undef OPTION
-};
-
-class ObjcopyOptTable : public opt::OptTable {
-public:
-  ObjcopyOptTable() : OptTable(ObjcopyInfoTable, true) {}
-};
-
-enum StripID {
-  STRIP_INVALID = 0, // This is not an option ID.
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
-               HELPTEXT, METAVAR, VALUES)                                      \
-  STRIP_##ID,
-#include "StripOpts.inc"
-#undef OPTION
-};
-
-#define PREFIX(NAME, VALUE) const char *const STRIP_##NAME[] = VALUE;
-#include "StripOpts.inc"
-#undef PREFIX
-
-static const opt::OptTable::Info StripInfoTable[] = {
-#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
-               HELPTEXT, METAVAR, VALUES)                                      \
-  {STRIP_##PREFIX, NAME,       HELPTEXT,                                       \
-   METAVAR,        STRIP_##ID, opt::Option::KIND##Class,                       \
-   PARAM,          FLAGS,      STRIP_##GROUP,                                  \
-   STRIP_##ALIAS,  ALIASARGS,  VALUES},
-#include "StripOpts.inc"
-#undef OPTION
-};
-
-class StripOptTable : public opt::OptTable {
-public:
-  StripOptTable() : OptTable(StripInfoTable, true) {}
-};
-
-struct CopyConfig {
-  StringRef OutputFilename;
-  StringRef InputFilename;
-  StringRef OutputFormat;
-  StringRef InputFormat;
-  StringRef BinaryArch;
-
-  StringRef SplitDWO;
-  StringRef AddGnuDebugLink;
-  std::vector<StringRef> ToRemove;
-  std::vector<StringRef> Keep;
-  std::vector<StringRef> OnlyKeep;
-  std::vector<StringRef> AddSection;
-  std::vector<StringRef> SymbolsToLocalize;
-  std::vector<StringRef> SymbolsToGlobalize;
-  std::vector<StringRef> SymbolsToWeaken;
-  std::vector<StringRef> SymbolsToRemove;
-  std::vector<StringRef> SymbolsToKeep;
-  StringMap<StringRef> SectionsToRename;
-  StringMap<StringRef> SymbolsToRename;
-  bool StripAll = false;
-  bool StripAllGNU = false;
-  bool StripDebug = false;
-  bool StripSections = false;
-  bool StripNonAlloc = false;
-  bool StripDWO = false;
-  bool StripUnneeded = false;
-  bool ExtractDWO = false;
-  bool LocalizeHidden = false;
-  bool Weaken = false;
-  bool DiscardAll = false;
-  bool OnlyKeepDebug = false;
-  bool KeepFileSymbols = false;
-};
-
-using SectionPred = std::function<bool(const SectionBase &Sec)>;
-
-} // namespace
-
 namespace llvm {
 namespace objcopy {
 
@@ -161,14 +52,15 @@ namespace objcopy {
 StringRef ToolName;
 
 LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
-  errs() << ToolName << ": " << Message << ".\n";
+  WithColor::error(errs(), ToolName) << Message << ".\n";
   errs().flush();
   exit(1);
 }
 
 LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, std::error_code EC) {
   assert(EC);
-  errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
+  WithColor::error(errs(), ToolName)
+      << "'" << File << "': " << EC.message() << ".\n";
   exit(1);
 }
 
@@ -176,304 +68,18 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
   assert(E);
   std::string Buf;
   raw_string_ostream OS(Buf);
-  logAllUnhandledErrors(std::move(E), OS, "");
+  logAllUnhandledErrors(std::move(E), OS);
   OS.flush();
-  errs() << ToolName << ": '" << File << "': " << Buf;
+  WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
   exit(1);
 }
 
 } // end namespace objcopy
 } // end namespace llvm
 
-static bool IsDebugSection(const SectionBase &Sec) {
-  return Sec.Name.startswith(".debug") || Sec.Name.startswith(".zdebug") ||
-         Sec.Name == ".gdb_index";
-}
-
-static bool IsDWOSection(const SectionBase &Sec) {
-  return Sec.Name.endswith(".dwo");
-}
-
-static bool OnlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
-  // We can't remove the section header string table.
-  if (&Sec == Obj.SectionNames)
-    return false;
-  // Short of keeping the string table we want to keep everything that is a DWO
-  // section and remove everything else.
-  return !IsDWOSection(Sec);
-}
-
-static std::unique_ptr<Writer> CreateWriter(const CopyConfig &Config,
-                                            Object &Obj, Buffer &Buf,
-                                            ElfType OutputElfType) {
-  if (Config.OutputFormat == "binary") {
-    return llvm::make_unique<BinaryWriter>(Obj, Buf);
-  }
-  // Depending on the initial ELFT and OutputFormat we need a different Writer.
-  switch (OutputElfType) {
-  case ELFT_ELF32LE:
-    return llvm::make_unique<ELFWriter<ELF32LE>>(Obj, Buf,
-                                                 !Config.StripSections);
-  case ELFT_ELF64LE:
-    return llvm::make_unique<ELFWriter<ELF64LE>>(Obj, Buf,
-                                                 !Config.StripSections);
-  case ELFT_ELF32BE:
-    return llvm::make_unique<ELFWriter<ELF32BE>>(Obj, Buf,
-                                                 !Config.StripSections);
-  case ELFT_ELF64BE:
-    return llvm::make_unique<ELFWriter<ELF64BE>>(Obj, Buf,
-                                                 !Config.StripSections);
-  }
-  llvm_unreachable("Invalid output format");
-}
-
-static void SplitDWOToFile(const CopyConfig &Config, const Reader &Reader,
-                           StringRef File, ElfType OutputElfType) {
-  auto DWOFile = Reader.create();
-  DWOFile->removeSections(
-      [&](const SectionBase &Sec) { return OnlyKeepDWOPred(*DWOFile, Sec); });
-  FileBuffer FB(File);
-  auto Writer = CreateWriter(Config, *DWOFile, FB, OutputElfType);
-  Writer->finalize();
-  Writer->write();
-}
-
-// This function handles the high level operations of GNU objcopy including
-// handling command line options. It's important to outline certain properties
-// we expect to hold of the command line operations. Any operation that "keeps"
-// should keep regardless of a remove. Additionally any removal should respect
-// any previous removals. Lastly whether or not something is removed shouldn't
-// depend a) on the order the options occur in or b) on some opaque priority
-// system. The only priority is that keeps/copies overrule removes.
-static void HandleArgs(const CopyConfig &Config, Object &Obj,
-                       const Reader &Reader, ElfType OutputElfType) {
-
-  if (!Config.SplitDWO.empty()) {
-    SplitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType);
-  }
-
-  // TODO: update or remove symbols only if there is an option that affects
-  // them.
-  if (Obj.SymbolTable) {
-    Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
-      if ((Config.LocalizeHidden &&
-           (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
-          (!Config.SymbolsToLocalize.empty() &&
-           is_contained(Config.SymbolsToLocalize, Sym.Name)))
-        Sym.Binding = STB_LOCAL;
-
-      if (!Config.SymbolsToGlobalize.empty() &&
-          is_contained(Config.SymbolsToGlobalize, Sym.Name))
-        Sym.Binding = STB_GLOBAL;
-
-      if (!Config.SymbolsToWeaken.empty() &&
-          is_contained(Config.SymbolsToWeaken, Sym.Name) &&
-          Sym.Binding == STB_GLOBAL)
-        Sym.Binding = STB_WEAK;
-
-      if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
-          Sym.getShndx() != SHN_UNDEF)
-        Sym.Binding = STB_WEAK;
-
-      const auto I = Config.SymbolsToRename.find(Sym.Name);
-      if (I != Config.SymbolsToRename.end())
-        Sym.Name = I->getValue();
-    });
-
-    // The purpose of this loop is to mark symbols referenced by sections
-    // (like GroupSection or RelocationSection). This way, we know which
-    // symbols are still 'needed' and wich are not.
-    if (Config.StripUnneeded) {
-      for (auto &Section : Obj.sections())
-        Section.markSymbols();
-    }
-
-    Obj.removeSymbols([&](const Symbol &Sym) {
-      if ((!Config.SymbolsToKeep.empty() &&
-           is_contained(Config.SymbolsToKeep, Sym.Name)) ||
-          (Config.KeepFileSymbols && Sym.Type == STT_FILE))
-        return false;
-
-      if (Config.DiscardAll && Sym.Binding == STB_LOCAL &&
-          Sym.getShndx() != SHN_UNDEF && Sym.Type != STT_FILE &&
-          Sym.Type != STT_SECTION)
-        return true;
-
-      if (Config.StripAll || Config.StripAllGNU)
-        return true;
-
-      if (!Config.SymbolsToRemove.empty() &&
-          is_contained(Config.SymbolsToRemove, Sym.Name)) {
-        return true;
-      }
-
-      if (Config.StripUnneeded && !Sym.Referenced &&
-          (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
-          Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
-        return true;
-
-      return false;
-    });
-  }
-
-  SectionPred RemovePred = [](const SectionBase &) { return false; };
-
-  // Removes:
-  if (!Config.ToRemove.empty()) {
-    RemovePred = [&Config](const SectionBase &Sec) {
-      return find(Config.ToRemove, Sec.Name) != Config.ToRemove.end();
-    };
-  }
-
-  if (Config.StripDWO || !Config.SplitDWO.empty())
-    RemovePred = [RemovePred](const SectionBase &Sec) {
-      return IsDWOSection(Sec) || RemovePred(Sec);
-    };
-
-  if (Config.ExtractDWO)
-    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
-      return OnlyKeepDWOPred(Obj, Sec) || RemovePred(Sec);
-    };
-
-  if (Config.StripAllGNU)
-    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
-      if (RemovePred(Sec))
-        return true;
-      if ((Sec.Flags & SHF_ALLOC) != 0)
-        return false;
-      if (&Sec == Obj.SectionNames)
-        return false;
-      switch (Sec.Type) {
-      case SHT_SYMTAB:
-      case SHT_REL:
-      case SHT_RELA:
-      case SHT_STRTAB:
-        return true;
-      }
-      return IsDebugSection(Sec);
-    };
-
-  if (Config.StripSections) {
-    RemovePred = [RemovePred](const SectionBase &Sec) {
-      return RemovePred(Sec) || (Sec.Flags & SHF_ALLOC) == 0;
-    };
-  }
-
-  if (Config.StripDebug) {
-    RemovePred = [RemovePred](const SectionBase &Sec) {
-      return RemovePred(Sec) || IsDebugSection(Sec);
-    };
-  }
-
-  if (Config.StripNonAlloc)
-    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
-      if (RemovePred(Sec))
-        return true;
-      if (&Sec == Obj.SectionNames)
-        return false;
-      return (Sec.Flags & SHF_ALLOC) == 0;
-    };
-
-  if (Config.StripAll)
-    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
-      if (RemovePred(Sec))
-        return true;
-      if (&Sec == Obj.SectionNames)
-        return false;
-      if (Sec.Name.startswith(".gnu.warning"))
-        return false;
-      return (Sec.Flags & SHF_ALLOC) == 0;
-    };
-
-  // Explicit copies:
-  if (!Config.OnlyKeep.empty()) {
-    RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
-      // Explicitly keep these sections regardless of previous removes.
-      if (find(Config.OnlyKeep, Sec.Name) != Config.OnlyKeep.end())
-        return false;
-
-      // Allow all implicit removes.
-      if (RemovePred(Sec))
-        return true;
-
-      // Keep special sections.
-      if (Obj.SectionNames == &Sec)
-        return false;
-      if (Obj.SymbolTable == &Sec ||
-          (Obj.SymbolTable && Obj.SymbolTable->getStrTab() == &Sec))
-        return false;
-
-      // Remove everything else.
-      return true;
-    };
-  }
-
-  if (!Config.Keep.empty()) {
-    RemovePred = [Config, RemovePred](const SectionBase &Sec) {
-      // Explicitly keep these sections regardless of previous removes.
-      if (find(Config.Keep, Sec.Name) != Config.Keep.end())
-        return false;
-      // Otherwise defer to RemovePred.
-      return RemovePred(Sec);
-    };
-  }
-
-  // This has to be the last predicate assignment.
-  // If the option --keep-symbol has been specified
-  // and at least one of those symbols is present
-  // (equivalently, the updated symbol table is not empty)
-  // the symbol table and the string table should not be removed.
-  if ((!Config.SymbolsToKeep.empty() || Config.KeepFileSymbols) &&
-      Obj.SymbolTable && !Obj.SymbolTable->empty()) {
-    RemovePred = [&Obj, RemovePred](const SectionBase &Sec) {
-      if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab())
-        return false;
-      return RemovePred(Sec);
-    };
-  }
-
-  Obj.removeSections(RemovePred);
-
-  if (!Config.SectionsToRename.empty()) {
-    for (auto &Sec : Obj.sections()) {
-      const auto Iter = Config.SectionsToRename.find(Sec.Name);
-      if (Iter != Config.SectionsToRename.end())
-        Sec.Name = Iter->second;
-    }
-  }
-
-  if (!Config.AddSection.empty()) {
-    for (const auto &Flag : Config.AddSection) {
-      auto SecPair = Flag.split("=");
-      auto SecName = SecPair.first;
-      auto File = SecPair.second;
-      auto BufOrErr = MemoryBuffer::getFile(File);
-      if (!BufOrErr)
-        reportError(File, BufOrErr.getError());
-      auto Buf = std::move(*BufOrErr);
-      auto BufPtr = reinterpret_cast<const uint8_t *>(Buf->getBufferStart());
-      auto BufSize = Buf->getBufferSize();
-      Obj.addSection<OwnedDataSection>(SecName,
-                                       ArrayRef<uint8_t>(BufPtr, BufSize));
-    }
-  }
-
-  if (!Config.AddGnuDebugLink.empty())
-    Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
-}
-
-static void ExecuteElfObjcopyOnBinary(const CopyConfig &Config, Binary &Binary,
-                                      Buffer &Out) {
-  ELFReader Reader(&Binary);
-  std::unique_ptr<Object> Obj = Reader.create();
-
-  HandleArgs(Config, *Obj, Reader, Reader.getElfType());
-
-  std::unique_ptr<Writer> Writer =
-      CreateWriter(Config, *Obj, Out, Reader.getElfType());
-  Writer->finalize();
-  Writer->write();
-}
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::objcopy;
 
 // For regular archives this function simply calls llvm::writeArchive,
 // For thin archives it writes the archive file itself as well as its members.
@@ -504,22 +110,48 @@ static Error deepWriteArchive(StringRef ArcName,
   return Error::success();
 }
 
-static void ExecuteElfObjcopyOnArchive(const CopyConfig &Config, const Archive &Ar) {
+/// The function executeObjcopyOnRawBinary does the dispatch based on the format
+/// of the output specified by the command line options.
+static void executeObjcopyOnRawBinary(const CopyConfig &Config,
+                                      MemoryBuffer &In, Buffer &Out) {
+  // TODO: llvm-objcopy should parse CopyConfig.OutputFormat to recognize
+  // formats other than ELF / "binary" and invoke
+  // elf::executeObjcopyOnRawBinary, macho::executeObjcopyOnRawBinary or
+  // coff::executeObjcopyOnRawBinary accordingly.
+  return elf::executeObjcopyOnRawBinary(Config, In, Out);
+}
+
+/// The function executeObjcopyOnBinary does the dispatch based on the format
+/// of the input binary (ELF, MachO or COFF).
+static void executeObjcopyOnBinary(const CopyConfig &Config, object::Binary &In,
+                                   Buffer &Out) {
+  if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In))
+    return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out);
+  else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In))
+    return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out);
+  else
+    error("Unsupported object file format");
+}
+
+static void executeObjcopyOnArchive(const CopyConfig &Config,
+                                    const Archive &Ar) {
   std::vector<NewArchiveMember> NewArchiveMembers;
   Error Err = Error::success();
   for (const Archive::Child &Child : Ar.children(Err)) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
     if (!ChildOrErr)
       reportError(Ar.getFileName(), ChildOrErr.takeError());
+    Binary *Bin = ChildOrErr->get();
+
     Expected<StringRef> ChildNameOrErr = Child.getName();
     if (!ChildNameOrErr)
       reportError(Ar.getFileName(), ChildNameOrErr.takeError());
 
     MemBuffer MB(ChildNameOrErr.get());
-    ExecuteElfObjcopyOnBinary(Config, **ChildOrErr, MB);
+    executeObjcopyOnBinary(Config, *Bin, MB);
 
     Expected<NewArchiveMember> Member =
-        NewArchiveMember::getOldMember(Child, true);
+        NewArchiveMember::getOldMember(Child, Config.DeterministicArchives);
     if (!Member)
       reportError(Ar.getFileName(), Member.takeError());
     Member->Buf = MB.releaseMemoryBuffer();
@@ -529,180 +161,72 @@ static void ExecuteElfObjcopyOnArchive(const CopyConfig &Config, const Archive &
 
   if (Err)
     reportError(Config.InputFilename, std::move(Err));
-  if (Error E =
-          deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
-                           Ar.hasSymbolTable(), Ar.kind(), true, Ar.isThin()))
+  if (Error E = deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
+                                 Ar.hasSymbolTable(), Ar.kind(),
+                                 Config.DeterministicArchives, Ar.isThin()))
     reportError(Config.OutputFilename, std::move(E));
 }
 
-static void ExecuteElfObjcopy(const CopyConfig &Config) {
-  Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
-      createBinary(Config.InputFilename);
-  if (!BinaryOrErr)
-    reportError(Config.InputFilename, BinaryOrErr.takeError());
-
-  if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary()))
-    return ExecuteElfObjcopyOnArchive(Config, *Ar);
-
-  FileBuffer FB(Config.OutputFilename);
-  ExecuteElfObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
-}
-
-// ParseObjcopyOptions returns the config and sets the input arguments. If a
-// help flag is set then ParseObjcopyOptions will print the help messege and
-// exit.
-static CopyConfig ParseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
-  ObjcopyOptTable T;
-  unsigned MissingArgumentIndex, MissingArgumentCount;
-  llvm::opt::InputArgList InputArgs =
-      T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
-
-  if (InputArgs.size() == 0) {
-    T.PrintHelp(errs(), "llvm-objcopy <input> [ <output> ]", "objcopy tool");
-    exit(1);
-  }
-
-  if (InputArgs.hasArg(OBJCOPY_help)) {
-    T.PrintHelp(outs(), "llvm-objcopy <input> [ <output> ]", "objcopy tool");
-    exit(0);
-  }
-
-  SmallVector<const char *, 2> Positional;
-
-  for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN))
-    error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
-
-  for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT))
-    Positional.push_back(Arg->getValue());
-
-  if (Positional.empty())
-    error("No input file specified");
-
-  if (Positional.size() > 2)
-    error("Too many positional arguments");
-
-  CopyConfig Config;
-  Config.InputFilename = Positional[0];
-  Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
-  Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
-  Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
-  Config.BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
-
-  Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
-  Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);
-
-  for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
-    if (!StringRef(Arg->getValue()).contains('='))
-      error("Bad format for --redefine-sym");
-    auto Old2New = StringRef(Arg->getValue()).split('=');
-    if (!Config.SymbolsToRename.insert(Old2New).second)
-      error("Multiple redefinition of symbol " + Old2New.first);
-  }
-
-  for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) {
-    if (!StringRef(Arg->getValue()).contains('='))
-      error("Bad format for --rename-section");
-    auto Old2New = StringRef(Arg->getValue()).split('=');
-    if (!Config.SectionsToRename.insert(Old2New).second)
-      error("Already have a section rename for " + Old2New.first);
-  }
-
-  for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
-    Config.ToRemove.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_keep))
-    Config.Keep.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_only_keep))
-    Config.OnlyKeep.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
-    Config.AddSection.push_back(Arg->getValue());
-  Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all);
-  Config.StripAllGNU = InputArgs.hasArg(OBJCOPY_strip_all_gnu);
-  Config.StripDebug = InputArgs.hasArg(OBJCOPY_strip_debug);
-  Config.StripDWO = InputArgs.hasArg(OBJCOPY_strip_dwo);
-  Config.StripSections = InputArgs.hasArg(OBJCOPY_strip_sections);
-  Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
-  Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
-  Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
-  Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
-  Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
-  Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all);
-  Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
-  Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
-  for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
-    Config.SymbolsToLocalize.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
-    Config.SymbolsToGlobalize.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
-    Config.SymbolsToWeaken.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
-    Config.SymbolsToRemove.push_back(Arg->getValue());
-  for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
-    Config.SymbolsToKeep.push_back(Arg->getValue());
-
-  return Config;
-}
-
-// ParseStripOptions returns the config and sets the input arguments. If a
-// help flag is set then ParseStripOptions will print the help messege and
-// exit.
-static CopyConfig ParseStripOptions(ArrayRef<const char *> ArgsArr) {
-  StripOptTable T;
-  unsigned MissingArgumentIndex, MissingArgumentCount;
-  llvm::opt::InputArgList InputArgs =
-      T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
-
-  if (InputArgs.size() == 0) {
-    T.PrintHelp(errs(), "llvm-strip <input> [ <output> ]", "strip tool");
-    exit(1);
+static void restoreDateOnFile(StringRef Filename,
+                              const sys::fs::file_status &Stat) {
+  int FD;
+
+  if (auto EC =
+          sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
+    reportError(Filename, EC);
+
+  if (auto EC = sys::fs::setLastAccessAndModificationTime(
+          FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
+    reportError(Filename, EC);
+
+  if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
+    reportError(Filename, EC);
+}
+
+/// The function executeObjcopy does the higher level dispatch based on the type
+/// of input (raw binary, archive or single object file) and takes care of the
+/// format-agnostic modifications, i.e. preserving dates.
+static void executeObjcopy(const CopyConfig &Config) {
+  sys::fs::file_status Stat;
+  if (Config.PreserveDates)
+    if (auto EC = sys::fs::status(Config.InputFilename, Stat))
+      reportError(Config.InputFilename, EC);
+
+  if (Config.InputFormat == "binary") {
+    auto BufOrErr = MemoryBuffer::getFile(Config.InputFilename);
+    if (!BufOrErr)
+      reportError(Config.InputFilename, BufOrErr.getError());
+    FileBuffer FB(Config.OutputFilename);
+    executeObjcopyOnRawBinary(Config, *BufOrErr->get(), FB);
+  } else {
+    Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
+        createBinary(Config.InputFilename);
+    if (!BinaryOrErr)
+      reportError(Config.InputFilename, BinaryOrErr.takeError());
+
+    if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary())) {
+      executeObjcopyOnArchive(Config, *Ar);
+    } else {
+      FileBuffer FB(Config.OutputFilename);
+      executeObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
+    }
   }
 
-  if (InputArgs.hasArg(STRIP_help)) {
-    T.PrintHelp(outs(), "llvm-strip <input> [ <output> ]", "strip tool");
-    exit(0);
+  if (Config.PreserveDates) {
+    restoreDateOnFile(Config.OutputFilename, Stat);
+    if (!Config.SplitDWO.empty())
+      restoreDateOnFile(Config.SplitDWO, Stat);
   }
-
-  SmallVector<const char *, 2> Positional;
-  for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN))
-    error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
-  for (auto Arg : InputArgs.filtered(STRIP_INPUT))
-    Positional.push_back(Arg->getValue());
-
-  if (Positional.empty())
-    error("No input file specified");
-
-  if (Positional.size() > 2)
-    error("Support for multiple input files is not implemented yet");
-
-  CopyConfig Config;
-  Config.InputFilename = Positional[0];
-  Config.OutputFilename =
-      InputArgs.getLastArgValue(STRIP_output, Positional[0]);
-
-  Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
-
-  Config.DiscardAll = InputArgs.hasArg(STRIP_discard_all);
-  Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
-  Config.StripAll = InputArgs.hasArg(STRIP_strip_all);
-
-  if (!Config.StripDebug && !Config.StripUnneeded && !Config.DiscardAll)
-    Config.StripAll = true;
-
-  for (auto Arg : InputArgs.filtered(STRIP_remove_section))
-    Config.ToRemove.push_back(Arg->getValue());
-
-  for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
-    Config.SymbolsToKeep.push_back(Arg->getValue());
-
-  return Config;
 }
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   ToolName = argv[0];
-  CopyConfig Config;
-  if (sys::path::stem(ToolName).endswith_lower("strip"))
-    Config = ParseStripOptions(makeArrayRef(argv + 1, argc));
+  DriverConfig DriverConfig;
+  if (sys::path::stem(ToolName).contains("strip"))
+    DriverConfig = parseStripOptions(makeArrayRef(argv + 1, argc));
   else
-    Config = ParseObjcopyOptions(makeArrayRef(argv + 1, argc));
-  ExecuteElfObjcopy(Config);
+    DriverConfig = parseObjcopyOptions(makeArrayRef(argv + 1, argc));
+  for (const CopyConfig &CopyConfig : DriverConfig.CopyConfigs)
+    executeObjcopy(CopyConfig);
 }
diff --git a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h
index e222b65dc78f..d8edf3e29ee0 100644
--- a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h
+++ b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.h
@@ -31,7 +31,7 @@ template <class T> T unwrapOrError(Expected<T> EO) {
     return *EO;
   std::string Buf;
   raw_string_ostream OS(Buf);
-  logAllUnhandledErrors(EO.takeError(), OS, "");
+  logAllUnhandledErrors(EO.takeError(), OS);
   OS.flush();
   error(Buf);
 }
diff --git a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
index 7ca5d04593ff..55607ec299be 100644
--- a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -16,11 +16,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Win64EH.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -453,7 +455,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
       Rels.push_back(Reloc);
 
     // Sort relocations by address.
-    llvm::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+    llvm::sort(Rels, isRelocAddressLess);
 
     ArrayRef<uint8_t> Contents;
     error(Obj->getSectionContents(Pdata, Contents));
@@ -578,8 +580,9 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
 
 void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
   if (Obj->getMachine() != COFF::IMAGE_FILE_MACHINE_AMD64) {
-    errs() << "Unsupported image machine type "
-              "(currently only AMD64 is supported).\n";
+    WithColor::error(errs(), "llvm-objdump")
+        << "unsupported image machine type "
+           "(currently only AMD64 is supported).\n";
     return;
   }
 
@@ -646,10 +649,26 @@ void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
            << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
            << "(fl 0x00)" // Flag bits, which COFF doesn't have.
            << "(ty " << format("%3x", unsigned(Symbol->getType())) << ")"
-           << "(scl " << format("%3x", unsigned(Symbol->getStorageClass())) << ") "
+           << "(scl " << format("%3x", unsigned(Symbol->getStorageClass()))
+           << ") "
            << "(nx " << unsigned(Symbol->getNumberOfAuxSymbols()) << ") "
            << "0x" << format("%08x", unsigned(Symbol->getValue())) << " "
-           << Name << "\n";
+           << Name;
+    if (Demangle && Name.startswith("?")) {
+      char *DemangledSymbol = nullptr;
+      size_t Size = 0;
+      int Status = -1;
+      DemangledSymbol =
+          microsoftDemangle(Name.data(), DemangledSymbol, &Size, &Status);
+
+      if (Status == 0 && DemangledSymbol) {
+        outs() << " (" << StringRef(DemangledSymbol) << ")";
+        std::free(DemangledSymbol);
+      } else {
+        outs() << " (invalid mangled name)";
+      }
+    }
+    outs() << "\n";
 
     for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
       if (Symbol->isSectionDefinition()) {
diff --git a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
index f4d36656a6c4..b17a15a0d8fc 100644
--- a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -158,37 +158,23 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
 }
 
 void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
-  // Little-endian 32-bit
-  if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+  if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
     printProgramHeaders(ELFObj->getELFFile());
-
-  // Big-endian 32-bit
-  if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
     printProgramHeaders(ELFObj->getELFFile());
-
-  // Little-endian 64-bit
-  if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
     printProgramHeaders(ELFObj->getELFFile());
-
-  // Big-endian 64-bit
-  if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
     printProgramHeaders(ELFObj->getELFFile());
 }
 
 void llvm::printELFDynamicSection(const object::ObjectFile *Obj) {
-  // Little-endian 32-bit
-  if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+  if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
     printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-
-  // Big-endian 32-bit
-  if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
     printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-
-  // Little-endian 64-bit
-  if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
     printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-
-  // Big-endian 64-bit
-  if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+  else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
     printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
 }
diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
index bdf80c73b999..5ef7058ec9da 100644
--- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstring>
@@ -166,7 +167,7 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
   if (*ThumbTarget)
     return TheTarget;
 
-  errs() << "llvm-objdump: error: unable to get target for '";
+  WithColor::error(errs(), "llvm-objdump") << "unable to get target for '";
   if (!TheTarget)
     errs() << TripleName;
   else
@@ -483,7 +484,7 @@ static void PrintRType(const uint64_t cputype, const unsigned r_type) {
     "GOTLDP  ", "GOTLDPOF", "PTRTGOT ", "TLVLDP  ", "TLVLDPOF",
     "ADDEND  ", " 11 (?) ", " 12 (?) ", " 13 (?) ", " 14 (?) ", " 15 (?) "
   };
-  
+
   if (r_type > 0xf){
     outs() << format("%-7u", r_type) << " ";
     return;
@@ -552,7 +553,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
   bool previous_arm_half = false;
   bool previous_sectdiff = false;
   uint32_t sectdiff_r_type = 0;
-  
+
   for (relocation_iterator Reloc = Begin; Reloc != End; ++Reloc) {
     const DataRefImpl Rel = Reloc->getRawDataRefImpl();
     const MachO::any_relocation_info RE = O->getRelocation(Rel);
@@ -567,7 +568,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
                               O->getScatteredRelocationValue(RE) : 0);
     const unsigned r_symbolnum = (r_scattered ? 0 :
                                   O->getPlainRelocationSymbolNum(RE));
-    
+
     if (r_scattered && cputype != MachO::CPU_TYPE_X86_64) {
       if (verbose) {
         // scattered: address
@@ -578,20 +579,20 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
           outs() << "         ";
         else
           outs() << format("%08x ", (unsigned int)r_address);
-        
+
         // scattered: pcrel
         if (r_pcrel)
           outs() << "True  ";
         else
           outs() << "False ";
-        
+
         // scattered: length
         PrintRLength(cputype, r_type, r_length, previous_arm_half);
-        
+
         // scattered: extern & type
         outs() << "n/a    ";
         PrintRType(cputype, r_type);
-        
+
         // scattered: scattered & value
         outs() << format("True      0x%08x", (unsigned int)r_value);
         if (previous_sectdiff == false) {
@@ -639,22 +640,22 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
           outs() << "         ";
         else
           outs() << format("%08x ", (unsigned int)r_address);
-        
+
         // plain: pcrel
         if (r_pcrel)
           outs() << "True  ";
         else
           outs() << "False ";
-        
+
         // plain: length
         PrintRLength(cputype, r_type, r_length, previous_arm_half);
-        
+
         if (r_extern) {
           // plain: extern & type & scattered
           outs() << "True   ";
           PrintRType(cputype, r_type);
           outs() << "False     ";
-          
+
           // plain: symbolnum/value
           if (r_symbolnum > Symtab.nsyms)
             outs() << format("?(%d)\n", r_symbolnum);
@@ -675,7 +676,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
           outs() << "False  ";
           PrintRType(cputype, r_type);
           outs() << "False     ";
-          
+
           // plain: symbolnum/value
           if (cputype == MachO::CPU_TYPE_ARM &&
                    r_type == llvm::MachO::ARM_RELOC_PAIR)
@@ -1411,7 +1412,7 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
     std::pair<StringRef, StringRef> DumpSegSectName;
     DumpSegSectName = DumpSection.split(',');
     StringRef DumpSegName, DumpSectName;
-    if (DumpSegSectName.second.size()) {
+    if (!DumpSegSectName.second.empty()) {
       DumpSegName = DumpSegSectName.first;
       DumpSectName = DumpSegSectName.second;
     } else {
@@ -1559,7 +1560,8 @@ static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
   if (none_of(ArchFlags, [&](const std::string &Name) {
         return Name == ArchFlagName;
       })) {
-    errs() << "llvm-objdump: " + Filename + ": No architecture specified.\n";
+    WithColor::error(errs(), "llvm-objdump")
+        << Filename << ": no architecture specified.\n";
     return false;
   }
   return true;
@@ -1580,7 +1582,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
   if (Disassemble || Relocations || PrivateHeaders || ExportsTrie || Rebase ||
       Bind || SymbolTable || LazyBind || WeakBind || IndirectSymbols ||
       DataInCode || LinkOptHints || DylibsUsed || DylibId || ObjcMetaData ||
-      (FilterSections.size() != 0)) {
+      (!FilterSections.empty())) {
     if (!NoLeadingHeaders) {
       outs() << Name;
       if (!ArchiveMemberName.empty())
@@ -1605,12 +1607,22 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
   // If we need the symbol table to do the operation then check it here to
   // produce a good error message as to where the Mach-O file comes from in
   // the error message.
-  if (Disassemble || IndirectSymbols || FilterSections.size() != 0 ||
-      UnwindInfo)
+  if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo)
     if (Error Err = MachOOF->checkSymbolTable())
       report_error(ArchiveName, FileName, std::move(Err), ArchitectureName);
-
-  if (Disassemble) {
+  
+  if (DisassembleAll) {
+    for (const SectionRef &Section : MachOOF->sections()) {
+      StringRef SectName;
+      Section.getName(SectName);
+      if (SectName.equals("__text")) {
+        DataRefImpl Ref = Section.getRawDataRefImpl();
+        StringRef SegName = MachOOF->getSectionFinalSegmentName(Ref);
+        DisassembleMachO(FileName, MachOOF, SegName, SectName);
+      }
+    }
+  }
+  else if (Disassemble) {
     if (MachOOF->getHeader().filetype == MachO::MH_KEXT_BUNDLE &&
         MachOOF->getHeader().cputype == MachO::CPU_TYPE_ARM64)
       DisassembleMachO(FileName, MachOOF, "__TEXT_EXEC", "__text");
@@ -1626,10 +1638,10 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
   if (Relocations)
     PrintRelocations(MachOOF, !NonVerbose);
   if (SectionHeaders)
-    PrintSectionHeaders(MachOOF);
+    printSectionHeaders(MachOOF);
   if (SectionContents)
-    PrintSectionContents(MachOOF);
-  if (FilterSections.size() != 0)
+    printSectionContents(MachOOF);
+  if (!FilterSections.empty())
     DumpSectionContents(FileName, MachOOF, !NonVerbose);
   if (InfoPlist)
     DumpInfoPlistSectionContents(FileName, MachOOF);
@@ -1638,7 +1650,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
   if (DylibId)
     PrintDylibs(MachOOF, true);
   if (SymbolTable)
-    PrintSymbolTable(MachOOF, ArchiveName, ArchitectureName);
+    printSymbolTable(MachOOF, ArchiveName, ArchitectureName);
   if (UnwindInfo)
     printMachOUnwindInfo(MachOOF);
   if (PrivateHeaders) {
@@ -1937,23 +1949,30 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose,
     report_error(StringRef(), Filename, std::move(Err), ArchitectureName);
 }
 
-// ParseInputMachO() parses the named Mach-O file in Filename and handles the
-// -arch flags selecting just those slices as specified by them and also parses
-// archive files.  Then for each individual Mach-O file ProcessMachO() is
-// called to process the file based on the command line options.
-void llvm::ParseInputMachO(StringRef Filename) {
+static bool ValidateArchFlags() {
   // Check for -arch all and verifiy the -arch flags are valid.
   for (unsigned i = 0; i < ArchFlags.size(); ++i) {
     if (ArchFlags[i] == "all") {
       ArchAll = true;
     } else {
       if (!MachOObjectFile::isValidArch(ArchFlags[i])) {
-        errs() << "llvm-objdump: Unknown architecture named '" + ArchFlags[i] +
-                      "'for the -arch option\n";
-        return;
+        WithColor::error(errs(), "llvm-objdump")
+            << "unknown architecture named '" + ArchFlags[i] +
+                   "'for the -arch option\n";
+        return false;
       }
     }
   }
+  return true;
+}
+
+// ParseInputMachO() parses the named Mach-O file in Filename and handles the
+// -arch flags selecting just those slices as specified by them and also parses
+// archive files.  Then for each individual Mach-O file ProcessMachO() is
+// called to process the file based on the command line options.
+void llvm::parseInputMachO(StringRef Filename) {
+  if (!ValidateArchFlags())
+    return;
 
   // Attempt to open the binary.
   Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
@@ -1989,191 +2008,199 @@ void llvm::ParseInputMachO(StringRef Filename) {
       report_error(Filename, std::move(Err));
     return;
   }
-  if (UniversalHeaders) {
-    if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin))
-      printMachOUniversalHeaders(UB, !NonVerbose);
-  }
   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
-    // If we have a list of architecture flags specified dump only those.
-    if (!ArchAll && ArchFlags.size() != 0) {
-      // Look for a slice in the universal binary that matches each ArchFlag.
-      bool ArchFound;
-      for (unsigned i = 0; i < ArchFlags.size(); ++i) {
-        ArchFound = false;
-        for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
-                                                   E = UB->end_objects();
-             I != E; ++I) {
-          if (ArchFlags[i] == I->getArchFlagName()) {
-            ArchFound = true;
-            Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
-                I->getAsObjectFile();
-            std::string ArchitectureName = "";
-            if (ArchFlags.size() > 1)
-              ArchitectureName = I->getArchFlagName();
-            if (ObjOrErr) {
-              ObjectFile &O = *ObjOrErr.get();
-              if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
-                ProcessMachO(Filename, MachOOF, "", ArchitectureName);
-            } else if (auto E = isNotObjectErrorInvalidFileType(
-                       ObjOrErr.takeError())) {
-              report_error(Filename, StringRef(), std::move(E),
-                           ArchitectureName);
-              continue;
-            } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                           I->getAsArchive()) {
-              std::unique_ptr<Archive> &A = *AOrErr;
-              outs() << "Archive : " << Filename;
-              if (!ArchitectureName.empty())
-                outs() << " (architecture " << ArchitectureName << ")";
-              outs() << "\n";
-              if (ArchiveHeaders)
-                printArchiveHeaders(Filename, A.get(), !NonVerbose,
-                                    ArchiveMemberOffsets, ArchitectureName);
-              Error Err = Error::success();
-              for (auto &C : A->children(Err)) {
-                Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
-                if (!ChildOrErr) {
-                  if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-                    report_error(Filename, C, std::move(E), ArchitectureName);
-                  continue;
-                }
-                if (MachOObjectFile *O =
-                        dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
-                  ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
-              }
-              if (Err)
-                report_error(Filename, std::move(Err));
-            } else {
-              consumeError(AOrErr.takeError());
-              error("Mach-O universal file: " + Filename + " for " +
-                    "architecture " + StringRef(I->getArchFlagName()) +
-                    " is not a Mach-O file or an archive file");
-            }
-          }
-        }
-        if (!ArchFound) {
-          errs() << "llvm-objdump: file: " + Filename + " does not contain "
-                 << "architecture: " + ArchFlags[i] + "\n";
-          return;
-        }
-      }
+    parseInputMachO(UB);
+    return;
+  }
+  if (ObjectFile *O = dyn_cast<ObjectFile>(&Bin)) {
+    if (!checkMachOAndArchFlags(O, Filename))
       return;
-    }
-    // No architecture flags were specified so if this contains a slice that
-    // matches the host architecture dump only that.
-    if (!ArchAll) {
+    if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*O))
+      ProcessMachO(Filename, MachOOF);
+    else
+      WithColor::error(errs(), "llvm-objdump")
+          << Filename << "': "
+          << "object is not a Mach-O file type.\n";
+    return;
+  }
+  llvm_unreachable("Input object can't be invalid at this point");
+}
+
+void llvm::parseInputMachO(MachOUniversalBinary *UB) {
+  if (!ValidateArchFlags())
+    return;
+
+  auto Filename = UB->getFileName();
+
+  if (UniversalHeaders)
+    printMachOUniversalHeaders(UB, !NonVerbose);
+
+  // If we have a list of architecture flags specified dump only those.
+  if (!ArchAll && !ArchFlags.empty()) {
+    // Look for a slice in the universal binary that matches each ArchFlag.
+    bool ArchFound;
+    for (unsigned i = 0; i < ArchFlags.size(); ++i) {
+      ArchFound = false;
       for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
-                                                 E = UB->end_objects();
-           I != E; ++I) {
-        if (MachOObjectFile::getHostArch().getArchName() ==
-            I->getArchFlagName()) {
-          Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
-          std::string ArchiveName;
-          ArchiveName.clear();
+                                                  E = UB->end_objects();
+            I != E; ++I) {
+        if (ArchFlags[i] == I->getArchFlagName()) {
+          ArchFound = true;
+          Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
+              I->getAsObjectFile();
+          std::string ArchitectureName = "";
+          if (ArchFlags.size() > 1)
+            ArchitectureName = I->getArchFlagName();
           if (ObjOrErr) {
             ObjectFile &O = *ObjOrErr.get();
             if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
-              ProcessMachO(Filename, MachOOF);
+              ProcessMachO(Filename, MachOOF, "", ArchitectureName);
           } else if (auto E = isNotObjectErrorInvalidFileType(
-                     ObjOrErr.takeError())) {
-            report_error(Filename, std::move(E));
+                      ObjOrErr.takeError())) {
+            report_error(Filename, StringRef(), std::move(E),
+                          ArchitectureName);
             continue;
           } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                         I->getAsArchive()) {
+                          I->getAsArchive()) {
             std::unique_ptr<Archive> &A = *AOrErr;
-            outs() << "Archive : " << Filename << "\n";
+            outs() << "Archive : " << Filename;
+            if (!ArchitectureName.empty())
+              outs() << " (architecture " << ArchitectureName << ")";
+            outs() << "\n";
             if (ArchiveHeaders)
               printArchiveHeaders(Filename, A.get(), !NonVerbose,
-                                  ArchiveMemberOffsets);
+                                  ArchiveMemberOffsets, ArchitectureName);
             Error Err = Error::success();
             for (auto &C : A->children(Err)) {
               Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
               if (!ChildOrErr) {
                 if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-                  report_error(Filename, C, std::move(E));
+                  report_error(Filename, C, std::move(E), ArchitectureName);
                 continue;
               }
               if (MachOObjectFile *O =
                       dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
-                ProcessMachO(Filename, O, O->getFileName());
+                ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
             }
             if (Err)
               report_error(Filename, std::move(Err));
           } else {
             consumeError(AOrErr.takeError());
-            error("Mach-O universal file: " + Filename + " for architecture " +
-                  StringRef(I->getArchFlagName()) +
+            error("Mach-O universal file: " + Filename + " for " +
+                  "architecture " + StringRef(I->getArchFlagName()) +
                   " is not a Mach-O file or an archive file");
           }
-          return;
         }
       }
+      if (!ArchFound) {
+        WithColor::error(errs(), "llvm-objdump")
+            << "file: " + Filename + " does not contain "
+            << "architecture: " + ArchFlags[i] + "\n";
+        return;
+      }
     }
-    // Either all architectures have been specified or none have been specified
-    // and this does not contain the host architecture so dump all the slices.
-    bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+    return;
+  }
+  // No architecture flags were specified so if this contains a slice that
+  // matches the host architecture dump only that.
+  if (!ArchAll) {
     for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
-                                               E = UB->end_objects();
-         I != E; ++I) {
-      Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
-      std::string ArchitectureName = "";
-      if (moreThanOneArch)
-        ArchitectureName = I->getArchFlagName();
-      if (ObjOrErr) {
-        ObjectFile &Obj = *ObjOrErr.get();
-        if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
-          ProcessMachO(Filename, MachOOF, "", ArchitectureName);
-      } else if (auto E = isNotObjectErrorInvalidFileType(
-                 ObjOrErr.takeError())) {
-        report_error(StringRef(), Filename, std::move(E), ArchitectureName);
-        continue;
-      } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                   I->getAsArchive()) {
-        std::unique_ptr<Archive> &A = *AOrErr;
-        outs() << "Archive : " << Filename;
-        if (!ArchitectureName.empty())
-          outs() << " (architecture " << ArchitectureName << ")";
-        outs() << "\n";
-        if (ArchiveHeaders)
-          printArchiveHeaders(Filename, A.get(), !NonVerbose,
-                              ArchiveMemberOffsets, ArchitectureName);
-        Error Err = Error::success();
-        for (auto &C : A->children(Err)) {
-          Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
-          if (!ChildOrErr) {
-            if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-              report_error(Filename, C, std::move(E), ArchitectureName);
-            continue;
-          }
-          if (MachOObjectFile *O =
-                  dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
-            if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(O))
-              ProcessMachO(Filename, MachOOF, MachOOF->getFileName(),
-                           ArchitectureName);
+                                                E = UB->end_objects();
+          I != E; ++I) {
+      if (MachOObjectFile::getHostArch().getArchName() ==
+          I->getArchFlagName()) {
+        Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+        std::string ArchiveName;
+        ArchiveName.clear();
+        if (ObjOrErr) {
+          ObjectFile &O = *ObjOrErr.get();
+          if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
+            ProcessMachO(Filename, MachOOF);
+        } else if (auto E = isNotObjectErrorInvalidFileType(
+                    ObjOrErr.takeError())) {
+          report_error(Filename, std::move(E));
+        } else if (Expected<std::unique_ptr<Archive>> AOrErr =
+                        I->getAsArchive()) {
+          std::unique_ptr<Archive> &A = *AOrErr;
+          outs() << "Archive : " << Filename << "\n";
+          if (ArchiveHeaders)
+            printArchiveHeaders(Filename, A.get(), !NonVerbose,
+                                ArchiveMemberOffsets);
+          Error Err = Error::success();
+          for (auto &C : A->children(Err)) {
+            Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
+            if (!ChildOrErr) {
+              if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+                report_error(Filename, C, std::move(E));
+              continue;
+            }
+            if (MachOObjectFile *O =
+                    dyn_cast<MachOObjectFile>(&*ChildOrErr.get()))
+              ProcessMachO(Filename, O, O->getFileName());
           }
+          if (Err)
+            report_error(Filename, std::move(Err));
+        } else {
+          consumeError(AOrErr.takeError());
+          error("Mach-O universal file: " + Filename + " for architecture " +
+                StringRef(I->getArchFlagName()) +
+                " is not a Mach-O file or an archive file");
         }
-        if (Err)
-          report_error(Filename, std::move(Err));
-      } else {
-        consumeError(AOrErr.takeError());
-        error("Mach-O universal file: " + Filename + " for architecture " +
-              StringRef(I->getArchFlagName()) +
-              " is not a Mach-O file or an archive file");
+        return;
       }
     }
-    return;
   }
-  if (ObjectFile *O = dyn_cast<ObjectFile>(&Bin)) {
-    if (!checkMachOAndArchFlags(O, Filename))
-      return;
-    if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*O)) {
-      ProcessMachO(Filename, MachOOF);
-    } else
-      errs() << "llvm-objdump: '" << Filename << "': "
-             << "Object is not a Mach-O file type.\n";
-    return;
+  // Either all architectures have been specified or none have been specified
+  // and this does not contain the host architecture so dump all the slices.
+  bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+  for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
+                                              E = UB->end_objects();
+        I != E; ++I) {
+    Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+    std::string ArchitectureName = "";
+    if (moreThanOneArch)
+      ArchitectureName = I->getArchFlagName();
+    if (ObjOrErr) {
+      ObjectFile &Obj = *ObjOrErr.get();
+      if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
+        ProcessMachO(Filename, MachOOF, "", ArchitectureName);
+    } else if (auto E = isNotObjectErrorInvalidFileType(
+                ObjOrErr.takeError())) {
+      report_error(StringRef(), Filename, std::move(E), ArchitectureName);
+    } else if (Expected<std::unique_ptr<Archive>> AOrErr =
+                  I->getAsArchive()) {
+      std::unique_ptr<Archive> &A = *AOrErr;
+      outs() << "Archive : " << Filename;
+      if (!ArchitectureName.empty())
+        outs() << " (architecture " << ArchitectureName << ")";
+      outs() << "\n";
+      if (ArchiveHeaders)
+        printArchiveHeaders(Filename, A.get(), !NonVerbose,
+                            ArchiveMemberOffsets, ArchitectureName);
+      Error Err = Error::success();
+      for (auto &C : A->children(Err)) {
+        Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
+        if (!ChildOrErr) {
+          if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+            report_error(Filename, C, std::move(E), ArchitectureName);
+          continue;
+        }
+        if (MachOObjectFile *O =
+                dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
+          if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(O))
+            ProcessMachO(Filename, MachOOF, MachOOF->getFileName(),
+                          ArchitectureName);
+        }
+      }
+      if (Err)
+        report_error(Filename, std::move(Err));
+    } else {
+      consumeError(AOrErr.takeError());
+      error("Mach-O universal file: " + Filename + " for architecture " +
+            StringRef(I->getArchFlagName()) +
+            " is not a Mach-O file or an archive file");
+    }
   }
-  llvm_unreachable("Input object can't be invalid at this point");
 }
 
 // The block of info used by the Symbolizer call backs.
@@ -5609,7 +5636,9 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) {
     else if(swift_version == 5)
       outs() << " Swift 4.0";
     else if(swift_version == 6)
-      outs() << " Swift 4.1";
+      outs() << " Swift 4.1/Swift 4.2";
+    else if(swift_version == 7)
+      outs() << " Swift 5 or later";
     else
       outs() << " unknown future Swift version (" << swift_version << ")";
   }
@@ -5660,7 +5689,9 @@ static void print_image_info32(SectionRef S, struct DisassembleInfo *info) {
     else if(swift_version == 5)
       outs() << " Swift 4.0";
     else if(swift_version == 6)
-      outs() << " Swift 4.1";
+      outs() << " Swift 4.1/Swift 4.2";
+    else if(swift_version == 7)
+      outs() << " Swift 5 or later";
     else
       outs() << " unknown future Swift version (" << swift_version << ")";
   }
@@ -6172,8 +6203,9 @@ static void PrintXarFilesSummary(const char *XarFilename, xar_t xar) {
 
   ScopedXarIter xi;
   if (!xi) {
-    errs() << "Can't obtain an xar iterator for xar archive "
-           << XarFilename << "\n";
+    WithColor::error(errs(), "llvm-objdump")
+        << "can't obtain an xar iterator for xar archive " << XarFilename
+        << "\n";
     return;
   }
 
@@ -6181,8 +6213,9 @@ static void PrintXarFilesSummary(const char *XarFilename, xar_t xar) {
   for (xf = xar_file_first(xar, xi); xf; xf = xar_file_next(xi)) {
     ScopedXarIter xp;
     if(!xp){
-      errs() << "Can't obtain an xar iterator for xar archive "
-             << XarFilename << "\n";
+      WithColor::error(errs(), "llvm-objdump")
+          << "can't obtain an xar iterator for xar archive " << XarFilename
+          << "\n";
       return;
     }
     type = nullptr;
@@ -6306,7 +6339,7 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
   std::error_code XarEC =
       sys::fs::createTemporaryFile("llvm-objdump", "xar", FD, XarFilename);
   if (XarEC) {
-    errs() << XarEC.message() << "\n";
+    WithColor::error(errs(), "llvm-objdump") << XarEC.message() << "\n";
     return;
   }
   ToolOutputFile XarFile(XarFilename, FD);
@@ -6319,7 +6352,8 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
 
   ScopedXarFile xar(XarFilename.c_str(), READ);
   if (!xar) {
-    errs() << "Can't create temporary xar archive " << XarFilename << "\n";
+    WithColor::error(errs(), "llvm-objdump")
+        << "can't create temporary xar archive " << XarFilename << "\n";
     return;
   }
 
@@ -6327,7 +6361,7 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
   std::error_code TocEC =
       sys::fs::createTemporaryFile("llvm-objdump", "toc", TocFilename);
   if (TocEC) {
-    errs() << TocEC.message() << "\n";
+    WithColor::error(errs(), "llvm-objdump") << TocEC.message() << "\n";
     return;
   }
   xar_serialize(xar, TocFilename.c_str());
@@ -6344,7 +6378,7 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
     MemoryBuffer::getFileOrSTDIN(TocFilename.c_str());
   if (std::error_code EC = FileOrErr.getError()) {
-    errs() << EC.message() << "\n";
+    WithColor::error(errs(), "llvm-objdump") << EC.message() << "\n";
     return;
   }
   std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get();
@@ -6359,8 +6393,9 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
   // TODO: Go through the xar's files.
   ScopedXarIter xi;
   if(!xi){
-    errs() << "Can't obtain an xar iterator for xar archive "
-           << XarFilename.c_str() << "\n";
+    WithColor::error(errs(), "llvm-objdump")
+        << "can't obtain an xar iterator for xar archive "
+        << XarFilename.c_str() << "\n";
     return;
   }
   for(xar_file_t xf = xar_file_first(xar, xi); xf; xf = xar_file_next(xi)){
@@ -6370,8 +6405,9 @@ static void DumpBitcodeSection(MachOObjectFile *O, const char *sect,
 
     ScopedXarIter xp;
     if(!xp){
-      errs() << "Can't obtain an xar iterator for xar archive "
-             << XarFilename.c_str() << "\n";
+      WithColor::error(errs(), "llvm-objdump")
+          << "can't obtain an xar iterator for xar archive "
+          << XarFilename.c_str() << "\n";
       return;
     }
     member_name = NULL;
@@ -6805,7 +6841,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (MAttrs.size()) {
+  if (!MAttrs.empty()) {
     SubtargetFeatures Features;
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
@@ -6848,8 +6884,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   // IP->setCommentStream(CommentStream);
 
   if (!AsmInfo || !STI || !DisAsm || !IP) {
-    errs() << "error: couldn't initialize disassembler for target "
-           << TripleName << '\n';
+    WithColor::error(errs(), "llvm-objdump")
+        << "couldn't initialize disassembler for target " << TripleName << '\n';
     return;
   }
 
@@ -6890,8 +6926,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   }
 
   if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) {
-    errs() << "error: couldn't initialize disassembler for target "
-           << ThumbTripleName << '\n';
+    WithColor::error(errs(), "llvm-objdump")
+        << "couldn't initialize disassembler for target " << ThumbTripleName
+        << '\n';
     return;
   }
 
@@ -6910,7 +6947,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
                         BaseSegmentAddress);
 
   // Sort the symbols by address, just in case they didn't come in that way.
-  llvm::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
+  llvm::sort(Symbols, SymbolSorter());
 
   // Build a data in code table that is sorted on by the address of each entry.
   uint64_t BaseAddress = 0;
@@ -6935,6 +6972,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
 
   std::unique_ptr<DIContext> diContext;
   ObjectFile *DbgObj = MachOOF;
+  std::unique_ptr<MemoryBuffer> DSYMBuf;
   // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
     // A separate DSym file path was specified, parse it as a macho file,
@@ -6943,22 +6981,28 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
       ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
           MemoryBuffer::getFileOrSTDIN(DSYMFile);
       if (std::error_code EC = BufOrErr.getError()) {
-        errs() << "llvm-objdump: " << Filename << ": " << EC.message() << '\n';
+        report_error(DSYMFile, errorCodeToError(EC));
         return;
       }
+
       Expected<std::unique_ptr<MachOObjectFile>> DbgObjCheck =
           ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef());
 
-      if (DbgObjCheck.takeError())
-        report_error(MachOOF->getFileName(), DbgObjCheck.takeError());
+      if (Error E = DbgObjCheck.takeError()) {
+        report_error(DSYMFile, std::move(E));
+        return;
+      }
+
       DbgObj = DbgObjCheck.get().release();
+      // We need to keep the file alive, because we're replacing DbgObj with it.
+      DSYMBuf = std::move(BufOrErr.get());
     }
 
     // Setup the DIContext
     diContext = DWARFContext::create(*DbgObj);
   }
 
-  if (FilterSections.size() == 0)
+  if (FilterSections.empty())
     outs() << "(" << DisSegName << "," << DisSectName << ") section\n";
 
   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
@@ -7021,7 +7065,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     unsigned int Arch = MachOOF->getArch();
 
     // Skip all symbols if this is a stubs file.
-    if (Bytes.size() == 0)
+    if (Bytes.empty())
       return;
 
     // If the section has symbols but no symbol at the start of the section
@@ -7228,7 +7272,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
             outs() << format("\t.short\t0x%04x\n", opcode);
             Size = 2;
           } else{
-            errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+            WithColor::warning(errs(), "llvm-objdump")
+                << "invalid instruction encoding\n";
             if (Size == 0)
               Size = 1; // skip illegible bytes
           }
@@ -7275,7 +7320,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
                              *(Bytes.data() + Index) & 0xff);
             InstSize = 1; // skip exactly one illegible byte and move on.
           } else {
-            errs() << "llvm-objdump: warning: invalid instruction encoding\n";
+            WithColor::warning(errs(), "llvm-objdump")
+                << "invalid instruction encoding\n";
             if (InstSize == 0)
               InstSize = 1; // skip illegible bytes
           }
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 8041e6f59940..ba8d3c5b8d5c 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -42,6 +42,7 @@
 #include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/Wasm.h"
 #include "llvm/Support/Casting.h"
@@ -55,8 +56,10 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cctype>
@@ -91,12 +94,11 @@ static cl::alias
 DisassembleAlld("D", cl::desc("Alias for --disassemble-all"),
              cl::aliasopt(DisassembleAll));
 
-cl::opt<std::string> llvm::Demangle("demangle",
-                                    cl::desc("Demangle symbols names"),
-                                    cl::ValueOptional, cl::init("none"));
+cl::opt<bool> llvm::Demangle("demangle", cl::desc("Demangle symbols names"),
+                             cl::init(false));
 
 static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
-                               cl::aliasopt(Demangle));
+                               cl::aliasopt(llvm::Demangle));
 
 static cl::list<std::string>
 DisassembleFunctions("df",
@@ -105,7 +107,11 @@ DisassembleFunctions("df",
 static StringSet<> DisasmFuncsSet;
 
 cl::opt<bool>
-llvm::Relocations("r", cl::desc("Display the relocation entries in the file"));
+llvm::Relocations("reloc",
+                  cl::desc("Display the relocation entries in the file"));
+static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
+                                  cl::NotHidden,
+                                  cl::aliasopt(llvm::Relocations));
 
 cl::opt<bool>
 llvm::DynamicRelocations("dynamic-reloc",
@@ -115,10 +121,16 @@ DynamicRelocationsd("R", cl::desc("Alias for --dynamic-reloc"),
              cl::aliasopt(DynamicRelocations));
 
 cl::opt<bool>
-llvm::SectionContents("s", cl::desc("Display the content of each section"));
+    llvm::SectionContents("full-contents",
+                          cl::desc("Display the content of each section"));
+static cl::alias SectionContentsShort("s",
+                                      cl::desc("Alias for --full-contents"),
+                                      cl::aliasopt(SectionContents));
 
-cl::opt<bool>
-llvm::SymbolTable("t", cl::desc("Display the symbol table"));
+cl::opt<bool> llvm::SymbolTable("syms", cl::desc("Display the symbol table"));
+static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
+                                  cl::NotHidden,
+                                  cl::aliasopt(llvm::SymbolTable));
 
 cl::opt<bool>
 llvm::ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
@@ -253,8 +265,17 @@ cl::opt<unsigned long long>
     StartAddress("start-address", cl::desc("Disassemble beginning at address"),
                  cl::value_desc("address"), cl::init(0));
 cl::opt<unsigned long long>
-    StopAddress("stop-address", cl::desc("Stop disassembly at address"),
+    StopAddress("stop-address",
+                cl::desc("Stop disassembly at address"),
                 cl::value_desc("address"), cl::init(UINT64_MAX));
+
+cl::opt<bool> DisassembleZeroes(
+                "disassemble-zeroes",
+                cl::desc("Do not skip blocks of zeroes when disassembling"));
+cl::alias DisassembleZeroesShort("z",
+                                 cl::desc("Alias for --disassemble-zeroes"),
+                                 cl::aliasopt(DisassembleZeroes));
+
 static StringRef ToolName;
 
 typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
@@ -326,33 +347,35 @@ SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
 void llvm::error(std::error_code EC) {
   if (!EC)
     return;
-
-  errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
+  WithColor::error(errs(), ToolName)
+      << "reading file: " << EC.message() << ".\n";
   errs().flush();
   exit(1);
 }
 
 LLVM_ATTRIBUTE_NORETURN void llvm::error(Twine Message) {
-  errs() << ToolName << ": " << Message << ".\n";
+  WithColor::error(errs(), ToolName) << Message << ".\n";
   errs().flush();
   exit(1);
 }
 
 void llvm::warn(StringRef Message) {
-  errs() << ToolName << ": warning: " << Message << ".\n";
+  WithColor::warning(errs(), ToolName) << Message << ".\n";
   errs().flush();
 }
 
 LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
                                                 Twine Message) {
-  errs() << ToolName << ": '" << File << "': " << Message << ".\n";
+  WithColor::error(errs(), ToolName)
+      << "'" << File << "': " << Message << ".\n";
   exit(1);
 }
 
 LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
                                                 std::error_code EC) {
   assert(EC);
-  errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
+  WithColor::error(errs(), ToolName)
+      << "'" << File << "': " << EC.message() << ".\n";
   exit(1);
 }
 
@@ -361,9 +384,9 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
   assert(E);
   std::string Buf;
   raw_string_ostream OS(Buf);
-  logAllUnhandledErrors(std::move(E), OS, "");
+  logAllUnhandledErrors(std::move(E), OS);
   OS.flush();
-  errs() << ToolName << ": '" << File << "': " << Buf;
+  WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
   exit(1);
 }
 
@@ -372,7 +395,7 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
                                                 llvm::Error E,
                                                 StringRef ArchitectureName) {
   assert(E);
-  errs() << ToolName << ": ";
+  WithColor::error(errs(), ToolName);
   if (ArchiveName != "")
     errs() << ArchiveName << "(" << FileName << ")";
   else
@@ -381,7 +404,7 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
     errs() << " (for architecture " << ArchitectureName << ")";
   std::string Buf;
   raw_string_ostream OS(Buf);
-  logAllUnhandledErrors(std::move(E), OS, "");
+  logAllUnhandledErrors(std::move(E), OS);
   OS.flush();
   errs() << ": " << Buf;
   exit(1);
@@ -407,18 +430,16 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
   // Figure out the target triple.
   llvm::Triple TheTriple("unknown-unknown-unknown");
   if (TripleName.empty()) {
-    if (Obj) {
+    if (Obj)
       TheTriple = Obj->makeTriple();
-    }
   } else {
     TheTriple.setTriple(Triple::normalize(TripleName));
 
     // Use the triple, but also try to combine with ARM build attributes.
     if (Obj) {
       auto Arch = Obj->getArch();
-      if (Arch == Triple::arm || Arch == Triple::armeb) {
+      if (Arch == Triple::arm || Arch == Triple::armeb)
         Obj->setARMSubArch(TheTriple);
-      }
     }
   }
 
@@ -438,22 +459,35 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
   return TheTarget;
 }
 
-bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
-  return a.getOffset() < b.getOffset();
+bool llvm::isRelocAddressLess(RelocationRef A, RelocationRef B) {
+  return A.getOffset() < B.getOffset();
+}
+
+static std::string demangle(StringRef Name) {
+  char *Demangled = nullptr;
+  if (Name.startswith("_Z"))
+    Demangled = itaniumDemangle(Name.data(), Demangled, nullptr, nullptr);
+  else if (Name.startswith("?"))
+    Demangled = microsoftDemangle(Name.data(), Demangled, nullptr, nullptr);
+
+  if (!Demangled)
+    return Name;
+
+  std::string Ret = Demangled;
+  free(Demangled);
+  return Ret;
 }
 
 template <class ELFT>
 static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
                                                 const RelocationRef &RelRef,
                                                 SmallVectorImpl<char> &Result) {
-  DataRefImpl Rel = RelRef.getRawDataRefImpl();
-
   typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
   typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
   typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
 
   const ELFFile<ELFT> &EF = *Obj->getELFFile();
-
+  DataRefImpl Rel = RelRef.getRawDataRefImpl();
   auto SecOrErr = EF.getSection(Rel.d.a);
   if (!SecOrErr)
     return errorToErrorCode(SecOrErr.takeError());
@@ -471,11 +505,11 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
   if (!StrTabOrErr)
     return errorToErrorCode(StrTabOrErr.takeError());
   StringRef StrTab = *StrTabOrErr;
-  int64_t addend = 0;
+  int64_t Addend = 0;
   // If there is no Symbol associated with the relocation, we set the undef
   // boolean value to 'true'. This will prevent us from calling functions that
   // requires the relocation to be associated with a symbol.
-  bool undef = false;
+  bool Undef = false;
   switch (Sec->sh_type) {
   default:
     return object_error::parse_failed;
@@ -485,13 +519,13 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
   }
   case ELF::SHT_RELA: {
     const Elf_Rela *ERela = Obj->getRela(Rel);
-    addend = ERela->r_addend;
-    undef = ERela->getSymbol(false) == 0;
+    Addend = ERela->r_addend;
+    Undef = ERela->getSymbol(false) == 0;
     break;
   }
   }
-  StringRef Target;
-  if (!undef) {
+  std::string Target;
+  if (!Undef) {
     symbol_iterator SI = RelRef.getSymbol();
     const Elf_Sym *symb = Obj->getSymbol(SI->getRawDataRefImpl());
     if (symb->getType() == ELF::STT_SECTION) {
@@ -507,20 +541,23 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
       Expected<StringRef> SymName = symb->getName(StrTab);
       if (!SymName)
         return errorToErrorCode(SymName.takeError());
-      Target = *SymName;
+      if (Demangle)
+        Target = demangle(*SymName);
+      else
+        Target = *SymName;
     }
   } else
     Target = "*ABS*";
 
   // Default scheme is to print Target, as well as "+ <addend>" for nonzero
   // addend. Should be acceptable for all normal purposes.
-  std::string fmtbuf;
-  raw_string_ostream fmt(fmtbuf);
-  fmt << Target;
-  if (addend != 0)
-    fmt << (addend < 0 ? "" : "+") << addend;
-  fmt.flush();
-  Result.append(fmtbuf.begin(), fmtbuf.end());
+  std::string FmtBuf;
+  raw_string_ostream Fmt(FmtBuf);
+  Fmt << Target;
+  if (Addend != 0)
+    Fmt << (Addend < 0 ? "" : "+") << Addend;
+  Fmt.flush();
+  Result.append(FmtBuf.begin(), FmtBuf.end());
   return std::error_code();
 }
 
@@ -551,18 +588,15 @@ static std::error_code getRelocationValueString(const COFFObjectFile *Obj,
 
 static void printRelocationTargetName(const MachOObjectFile *O,
                                       const MachO::any_relocation_info &RE,
-                                      raw_string_ostream &fmt) {
-  bool IsScattered = O->isRelocationScattered(RE);
-
+                                      raw_string_ostream &Fmt) {
   // Target of a scattered relocation is an address.  In the interest of
   // generating pretty output, scan through the symbol table looking for a
   // symbol that aligns with that address.  If we find one, print it.
   // Otherwise, we just print the hex address of the target.
-  if (IsScattered) {
+  if (O->isRelocationScattered(RE)) {
     uint32_t Val = O->getPlainRelocationSymbolNum(RE);
 
     for (const SymbolRef &Symbol : O->symbols()) {
-      std::error_code ec;
       Expected<uint64_t> Addr = Symbol.getAddress();
       if (!Addr)
         report_error(O->getFileName(), Addr.takeError());
@@ -571,7 +605,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
       Expected<StringRef> Name = Symbol.getName();
       if (!Name)
         report_error(O->getFileName(), Name.takeError());
-      fmt << *Name;
+      Fmt << *Name;
       return;
     }
 
@@ -586,11 +620,11 @@ static void printRelocationTargetName(const MachOObjectFile *O,
         continue;
       if ((ec = Section.getName(Name)))
         report_error(O->getFileName(), ec);
-      fmt << Name;
+      Fmt << Name;
       return;
     }
 
-    fmt << format("0x%x", Val);
+    Fmt << format("0x%x", Val);
     return;
   }
 
@@ -599,9 +633,11 @@ static void printRelocationTargetName(const MachOObjectFile *O,
   uint64_t Val = O->getPlainRelocationSymbolNum(RE);
 
   if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
-    fmt << format("0x%0" PRIx64, Val);
+    Fmt << format("0x%0" PRIx64, Val);
     return;
-  } else if (isExtern) {
+  }
+
+  if (isExtern) {
     symbol_iterator SI = O->symbol_begin();
     advance(SI, Val);
     Expected<StringRef> SOrErr = SI->getName();
@@ -612,21 +648,21 @@ static void printRelocationTargetName(const MachOObjectFile *O,
     section_iterator SI = O->section_begin();
     // Adjust for the fact that sections are 1-indexed.
     if (Val == 0) {
-      fmt << "0 (?,?)";
+      Fmt << "0 (?,?)";
       return;
     }
-    uint32_t i = Val - 1;
-    while (i != 0 && SI != O->section_end()) {
-      i--;
+    uint32_t I = Val - 1;
+    while (I != 0 && SI != O->section_end()) {
+      --I;
       advance(SI, 1);
     }
     if (SI == O->section_end())
-      fmt << Val << " (?,?)";
+      Fmt << Val << " (?,?)";
     else
       SI->getName(S);
   }
 
-  fmt << S;
+  Fmt << S;
 }
 
 static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
@@ -634,12 +670,12 @@ static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
                                                 SmallVectorImpl<char> &Result) {
   const wasm::WasmRelocation& Rel = Obj->getWasmRelocation(RelRef);
   symbol_iterator SI = RelRef.getSymbol();
-  std::string fmtbuf;
-  raw_string_ostream fmt(fmtbuf);
+  std::string FmtBuf;
+  raw_string_ostream Fmt(FmtBuf);
   if (SI == Obj->symbol_end()) {
     // Not all wasm relocations have symbols associated with them.
     // In particular R_WEBASSEMBLY_TYPE_INDEX_LEB.
-    fmt << Rel.Index;
+    Fmt << Rel.Index;
   } else {
     Expected<StringRef> SymNameOrErr = SI->getName();
     if (!SymNameOrErr)
@@ -647,9 +683,9 @@ static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
     StringRef SymName = *SymNameOrErr;
     Result.append(SymName.begin(), SymName.end());
   }
-  fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
-  fmt.flush();
-  Result.append(fmtbuf.begin(), fmtbuf.end());
+  Fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
+  Fmt.flush();
+  Result.append(FmtBuf.begin(), FmtBuf.end());
   return std::error_code();
 }
 
@@ -661,8 +697,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
 
   unsigned Arch = Obj->getArch();
 
-  std::string fmtbuf;
-  raw_string_ostream fmt(fmtbuf);
+  std::string FmtBuf;
+  raw_string_ostream Fmt(FmtBuf);
   unsigned Type = Obj->getAnyRelocationType(RE);
   bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
 
@@ -671,15 +707,13 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
 
   // X86_64 has entirely custom relocation types.
   if (Arch == Triple::x86_64) {
-    bool isPCRel = Obj->getAnyRelocationPCRel(RE);
-
     switch (Type) {
     case MachO::X86_64_RELOC_GOT_LOAD:
     case MachO::X86_64_RELOC_GOT: {
-      printRelocationTargetName(Obj, RE, fmt);
-      fmt << "@GOT";
-      if (isPCRel)
-        fmt << "PCREL";
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "@GOT";
+      if (IsPCRel)
+        Fmt << "PCREL";
       break;
     }
     case MachO::X86_64_RELOC_SUBTRACTOR: {
@@ -697,31 +731,31 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
 
       // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
       // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
-      printRelocationTargetName(Obj, RENext, fmt);
-      fmt << "-";
-      printRelocationTargetName(Obj, RE, fmt);
+      printRelocationTargetName(Obj, RENext, Fmt);
+      Fmt << "-";
+      printRelocationTargetName(Obj, RE, Fmt);
       break;
     }
     case MachO::X86_64_RELOC_TLV:
-      printRelocationTargetName(Obj, RE, fmt);
-      fmt << "@TLV";
-      if (isPCRel)
-        fmt << "P";
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "@TLV";
+      if (IsPCRel)
+        Fmt << "P";
       break;
     case MachO::X86_64_RELOC_SIGNED_1:
-      printRelocationTargetName(Obj, RE, fmt);
-      fmt << "-1";
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-1";
       break;
     case MachO::X86_64_RELOC_SIGNED_2:
-      printRelocationTargetName(Obj, RE, fmt);
-      fmt << "-2";
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-2";
       break;
     case MachO::X86_64_RELOC_SIGNED_4:
-      printRelocationTargetName(Obj, RE, fmt);
-      fmt << "-4";
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-4";
       break;
     default:
-      printRelocationTargetName(Obj, RE, fmt);
+      printRelocationTargetName(Obj, RE, Fmt);
       break;
     }
     // X86 and ARM share some relocation types in common.
@@ -744,9 +778,9 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
         report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
                      "GENERIC_RELOC_SECTDIFF.");
 
-      printRelocationTargetName(Obj, RE, fmt);
-      fmt << "-";
-      printRelocationTargetName(Obj, RENext, fmt);
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-";
+      printRelocationTargetName(Obj, RENext, Fmt);
       break;
     }
     }
@@ -765,20 +799,20 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
           report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
                        "GENERIC_RELOC_LOCAL_SECTDIFF.");
 
-        printRelocationTargetName(Obj, RE, fmt);
-        fmt << "-";
-        printRelocationTargetName(Obj, RENext, fmt);
+        printRelocationTargetName(Obj, RE, Fmt);
+        Fmt << "-";
+        printRelocationTargetName(Obj, RENext, Fmt);
         break;
       }
       case MachO::GENERIC_RELOC_TLV: {
-        printRelocationTargetName(Obj, RE, fmt);
-        fmt << "@TLV";
+        printRelocationTargetName(Obj, RE, Fmt);
+        Fmt << "@TLV";
         if (IsPCRel)
-          fmt << "P";
+          Fmt << "P";
         break;
       }
       default:
-        printRelocationTargetName(Obj, RE, fmt);
+        printRelocationTargetName(Obj, RE, Fmt);
       }
     } else { // ARM-specific relocations
       switch (Type) {
@@ -789,10 +823,10 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
         bool isUpper = (Obj->getAnyRelocationLength(RE) & 0x1) == 1;
 
         if (isUpper)
-          fmt << ":upper16:(";
+          Fmt << ":upper16:(";
         else
-          fmt << ":lower16:(";
-        printRelocationTargetName(Obj, RE, fmt);
+          Fmt << ":lower16:(";
+        printRelocationTargetName(Obj, RE, Fmt);
 
         DataRefImpl RelNext = Rel;
         Obj->moveRelocationNext(RelNext);
@@ -813,21 +847,21 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
         // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
         // symbol/section pointer of the follow-on relocation.
         if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
-          fmt << "-";
-          printRelocationTargetName(Obj, RENext, fmt);
+          Fmt << "-";
+          printRelocationTargetName(Obj, RENext, Fmt);
         }
 
-        fmt << ")";
+        Fmt << ")";
         break;
       }
-      default: { printRelocationTargetName(Obj, RE, fmt); }
+      default: { printRelocationTargetName(Obj, RE, Fmt); }
       }
     }
   } else
-    printRelocationTargetName(Obj, RE, fmt);
+    printRelocationTargetName(Obj, RE, Fmt);
 
-  fmt.flush();
-  Result.append(fmtbuf.begin(), fmtbuf.end());
+  Fmt.flush();
+  Result.append(FmtBuf.begin(), FmtBuf.end());
   return std::error_code();
 }
 
@@ -849,8 +883,7 @@ static std::error_code getRelocationValueString(const RelocationRef &Rel,
 /// relocations, usually because it is the trailing part of a multipart
 /// relocation that will be printed as part of the leading relocation.
 static bool getHidden(RelocationRef RelRef) {
-  const ObjectFile *Obj = RelRef.getObject();
-  auto *MachO = dyn_cast<MachOObjectFile>(Obj);
+  auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject());
   if (!MachO)
     return false;
 
@@ -860,10 +893,10 @@ static bool getHidden(RelocationRef RelRef) {
 
   // On arches that use the generic relocations, GENERIC_RELOC_PAIR
   // is always hidden.
-  if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) {
-    if (Type == MachO::GENERIC_RELOC_PAIR)
-      return true;
-  } else if (Arch == Triple::x86_64) {
+  if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc)
+    return Type == MachO::GENERIC_RELOC_PAIR;
+
+  if (Arch == Triple::x86_64) {
     // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
     // an X86_64_RELOC_SUBTRACTOR.
     if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
@@ -1038,27 +1071,27 @@ public:
     auto Preamble = " { ";
     auto Separator = "";
     StringRef Fmt = "\t\t\t%08" PRIx64 ":  ";
-    std::vector<RelocationRef>::const_iterator rel_cur = Rels->begin();
-    std::vector<RelocationRef>::const_iterator rel_end = Rels->end();
+    std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
+    std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
 
     // Hexagon's packets require relocations to be inline rather than
     // clustered at the end of the packet.
     auto PrintReloc = [&]() -> void {
-      while ((rel_cur != rel_end) && (rel_cur->getOffset() <= Address)) {
-        if (rel_cur->getOffset() == Address) {
-          SmallString<16> name;
-          SmallString<32> val;
-          rel_cur->getTypeName(name);
-          error(getRelocationValueString(*rel_cur, val));
-          OS << Separator << format(Fmt.data(), Address) << name << "\t" << val
+      while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address)) {
+        if (RelCur->getOffset() == Address) {
+          SmallString<16> Name;
+          SmallString<32> Val;
+          RelCur->getTypeName(Name);
+          error(getRelocationValueString(*RelCur, Val));
+          OS << Separator << format(Fmt.data(), Address) << Name << "\t" << Val
                 << "\n";
           return;
         }
-        rel_cur++;
+        ++RelCur;
       }
     };
 
-    while(!HeadTail.first.empty()) {
+    while (!HeadTail.first.empty()) {
       OS << Separator;
       Separator = "\n";
       if (SP && (PrintSource || PrintLines))
@@ -1068,7 +1101,7 @@ public:
       Preamble = "   ";
       StringRef Inst;
       auto Duplex = HeadTail.first.split('\v');
-      if(!Duplex.second.empty()){
+      if (!Duplex.second.empty()) {
         OS << Duplex.first;
         OS << "; ";
         Inst = Duplex.second;
@@ -1200,7 +1233,6 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
     Expected<uint64_t> AddressOrErr = Symbol.getAddress();
     if (!AddressOrErr)
       report_error(Obj->getFileName(), AddressOrErr.takeError());
-    uint64_t Address = *AddressOrErr;
 
     Expected<StringRef> Name = Symbol.getName();
     if (!Name)
@@ -1215,7 +1247,7 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
     if (SecI == Obj->section_end())
       continue;
 
-    AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType);
+    AllSymbols[*SecI].emplace_back(*AddressOrErr, *Name, SymbolType);
   }
 }
 
@@ -1235,7 +1267,60 @@ addDynamicElfSymbols(const ObjectFile *Obj,
     llvm_unreachable("Unsupported binary format");
 }
 
-static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
+static void addPltEntries(const ObjectFile *Obj,
+                          std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
+                          StringSaver &Saver) {
+  Optional<SectionRef> Plt = None;
+  for (const SectionRef &Section : Obj->sections()) {
+    StringRef Name;
+    if (Section.getName(Name))
+      continue;
+    if (Name == ".plt")
+      Plt = Section;
+  }
+  if (!Plt)
+    return;
+  if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) {
+    for (auto PltEntry : ElfObj->getPltAddresses()) {
+      SymbolRef Symbol(PltEntry.first, ElfObj);
+      uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
+
+      Expected<StringRef> NameOrErr = Symbol.getName();
+      if (!NameOrErr)
+        report_error(Obj->getFileName(), NameOrErr.takeError());
+      if (NameOrErr->empty())
+        continue;
+      StringRef Name = Saver.save((*NameOrErr + "@plt").str());
+
+      AllSymbols[*Plt].emplace_back(PltEntry.second, Name, SymbolType);
+    }
+  }
+}
+
+// Normally the disassembly output will skip blocks of zeroes. This function
+// returns the number of zero bytes that can be skipped when dumping the
+// disassembly of the instructions in Buf.
+static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
+  // When -z or --disassemble-zeroes are given we always dissasemble them.
+  if (DisassembleZeroes)
+    return 0;
+
+  // Find the number of leading zeroes.
+  size_t N = 0;
+  while (N < Buf.size() && !Buf[N])
+    ++N;
+
+  // We may want to skip blocks of zero bytes, but unless we see
+  // at least 8 of them in a row.
+  if (N < 8)
+    return 0;
+
+  // We skip zeroes in multiples of 4 because do not want to truncate an
+  // instruction if it starts with a zero byte.
+  return N & ~0x3;
+}
+
+static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   if (StartAddress > StopAddress)
     error("Start address should be less than stop address");
 
@@ -1243,10 +1328,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
   // Package up features to be passed to target/subtarget
   SubtargetFeatures Features = Obj->getFeatures();
-  if (MAttrs.size()) {
-    for (unsigned i = 0; i != MAttrs.size(); ++i)
-      Features.AddFeature(MAttrs[i]);
-  }
+  if (!MAttrs.empty())
+    for (unsigned I = 0; I != MAttrs.size(); ++I)
+      Features.AddFeature(MAttrs[I]);
 
   std::unique_ptr<const MCRegisterInfo> MRI(
       TheTarget->createMCRegInfo(TripleName));
@@ -1342,6 +1426,10 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   if (AllSymbols.empty() && Obj->isELF())
     addDynamicElfSymbols(Obj, AllSymbols);
 
+  BumpPtrAllocator A;
+  StringSaver Saver(A);
+  addPltEntries(Obj, AllSymbols, Saver);
+
   // Create a mapping from virtual address to section.
   std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
   for (SectionRef Sec : Obj->sections())
@@ -1411,8 +1499,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       }
     }
 
-    llvm::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
-    llvm::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
+    llvm::sort(DataMappingSymsAddr);
+    llvm::sort(TextMappingSymsAddr);
 
     if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
       // AMDGPU disassembler uses symbolizer for printing labels
@@ -1437,7 +1525,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     }
 
     // Sort relocations by address.
-    llvm::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+    llvm::sort(Rels, isRelocAddressLess);
 
     StringRef SegmentName = "";
     if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
@@ -1467,15 +1555,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     uint64_t Index;
     bool PrintedSection = false;
 
-    std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
-    std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
+    std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
+    std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
     // Disassemble symbol by symbol.
-    for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
-      uint64_t Start = std::get<0>(Symbols[si]) - SectionAddr;
+    for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
+      uint64_t Start = std::get<0>(Symbols[SI]) - SectionAddr;
       // The end is either the section end or the beginning of the next
       // symbol.
-      uint64_t End =
-          (si == se - 1) ? SectSize : std::get<0>(Symbols[si + 1]) - SectionAddr;
+      uint64_t End = (SI == SE - 1)
+                         ? SectSize
+                         : std::get<0>(Symbols[SI + 1]) - SectionAddr;
       // Don't try to disassemble beyond the end of section contents.
       if (End > SectSize)
         End = SectSize;
@@ -1492,7 +1581,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
       /// Skip if user requested specific symbols and this is not in the list
       if (!DisasmFuncsSet.empty() &&
-          !DisasmFuncsSet.count(std::get<1>(Symbols[si])))
+          !DisasmFuncsSet.count(std::get<1>(Symbols[SI])))
         continue;
 
       if (!PrintedSection) {
@@ -1508,12 +1597,12 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         End = StopAddress - SectionAddr;
 
       if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
-        if (std::get<2>(Symbols[si]) == ELF::STT_AMDGPU_HSA_KERNEL) {
+        if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
           // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
           Start += 256;
         }
-        if (si == se - 1 ||
-            std::get<2>(Symbols[si + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) {
+        if (SI == SE - 1 ||
+            std::get<2>(Symbols[SI + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) {
           // cut trailing zeroes at the end of kernel
           // cut up to 256 bytes
           const uint64_t EndAlign = 256;
@@ -1524,25 +1613,15 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         }
       }
 
-      auto PrintSymbol = [](StringRef Name) {
-        outs() << '\n' << Name << ":\n";
-      };
-      StringRef SymbolName = std::get<1>(Symbols[si]);
-      if (Demangle.getValue() == "" || Demangle.getValue() == "itanium") {
-        char *DemangledSymbol = nullptr;
-        size_t Size = 0;
-        int Status;
-        DemangledSymbol =
-            itaniumDemangle(SymbolName.data(), DemangledSymbol, &Size, &Status);
-        if (Status == 0)
-          PrintSymbol(StringRef(DemangledSymbol));
-        else
-          PrintSymbol(SymbolName);
+      outs() << '\n';
+      if (!NoLeadingAddr)
+        outs() << format("%016" PRIx64 " ", SectionAddr + Start);
 
-        if (Size != 0)
-          free(DemangledSymbol);
-      } else
-        PrintSymbol(SymbolName);
+      StringRef SymbolName = std::get<1>(Symbols[SI]);
+      if (Demangle)
+        outs() << demangle(SymbolName) << ":\n";
+      else
+        outs() << SymbolName << ":\n";
 
       // Don't print raw contents of a virtual section. A virtual section
       // doesn't have any contents in the file.
@@ -1570,7 +1649,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         // same section. We rely on the markers introduced to
         // understand what we need to dump. If the data marker is within a
         // function, it is denoted as a word/short etc
-        if (isArmElf(Obj) && std::get<2>(Symbols[si]) != ELF::STT_OBJECT &&
+        if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
             !DisassembleAll) {
           uint64_t Stride = 0;
 
@@ -1634,7 +1713,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         // disassembling text (applicable all architectures),
         // we are in a situation where we must print the data and not
         // disassemble it.
-        if (Obj->isELF() && std::get<2>(Symbols[si]) == ELF::STT_OBJECT &&
+        if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
             !DisassembleAll && Section.isText()) {
           // print out data up to 8 bytes at a time in hex and ascii
           uint8_t AsciiData[9] = {'\0'};
@@ -1675,6 +1754,14 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         if (Index >= End)
           break;
 
+        if (size_t N =
+                countSkippableZeroBytes(Bytes.slice(Index, End - Index))) {
+          outs() << "\t\t..." << '\n';
+          Index += N;
+          if (Index >= End)
+            break;
+        }
+
         // Disassemble a real instruction or a data when disassemble all is
         // provided
         bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
@@ -1753,32 +1840,32 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         // Hexagon does this in pretty printer
         if (Obj->getArch() != Triple::hexagon)
           // Print relocation for instruction.
-          while (rel_cur != rel_end) {
-            bool hidden = getHidden(*rel_cur);
-            uint64_t addr = rel_cur->getOffset();
-            SmallString<16> name;
-            SmallString<32> val;
+          while (RelCur != RelEnd) {
+            uint64_t Addr = RelCur->getOffset();
+            SmallString<16> Name;
+            SmallString<32> Val;
 
             // If this relocation is hidden, skip it.
-            if (hidden || ((SectionAddr + addr) < StartAddress)) {
-              ++rel_cur;
+            if (getHidden(*RelCur) || ((SectionAddr + Addr) < StartAddress)) {
+              ++RelCur;
               continue;
             }
 
             // Stop when rel_cur's address is past the current instruction.
-            if (addr >= Index + Size) break;
-            rel_cur->getTypeName(name);
-            error(getRelocationValueString(*rel_cur, val));
-            outs() << format(Fmt.data(), SectionAddr + addr) << name
-                   << "\t" << val << "\n";
-            ++rel_cur;
+            if (Addr >= Index + Size)
+              break;
+            RelCur->getTypeName(Name);
+            error(getRelocationValueString(*RelCur, Val));
+            outs() << format(Fmt.data(), SectionAddr + Addr) << Name << "\t"
+                   << Val << "\n";
+            ++RelCur;
           }
       }
     }
   }
 }
 
-void llvm::PrintRelocations(const ObjectFile *Obj) {
+void llvm::printRelocations(const ObjectFile *Obj) {
   StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
                                                  "%08" PRIx64;
   // Regular objdump doesn't print relocations in non-relocatable object
@@ -1789,61 +1876,57 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     if (Section.relocation_begin() == Section.relocation_end())
       continue;
-    StringRef secname;
-    error(Section.getName(secname));
-    outs() << "RELOCATION RECORDS FOR [" << secname << "]:\n";
+    StringRef SecName;
+    error(Section.getName(SecName));
+    outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n";
     for (const RelocationRef &Reloc : Section.relocations()) {
-      bool hidden = getHidden(Reloc);
-      uint64_t address = Reloc.getOffset();
-      SmallString<32> relocname;
-      SmallString<32> valuestr;
-      if (address < StartAddress || address > StopAddress || hidden)
+      uint64_t Address = Reloc.getOffset();
+      SmallString<32> RelocName;
+      SmallString<32> ValueStr;
+      if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
         continue;
-      Reloc.getTypeName(relocname);
-      error(getRelocationValueString(Reloc, valuestr));
-      outs() << format(Fmt.data(), address) << " " << relocname << " "
-             << valuestr << "\n";
+      Reloc.getTypeName(RelocName);
+      error(getRelocationValueString(Reloc, ValueStr));
+      outs() << format(Fmt.data(), Address) << " " << RelocName << " "
+             << ValueStr << "\n";
     }
     outs() << "\n";
   }
 }
 
-void llvm::PrintDynamicRelocations(const ObjectFile *Obj) {
-
+void llvm::printDynamicRelocations(const ObjectFile *Obj) {
   // For the moment, this option is for ELF only
   if (!Obj->isELF())
     return;
 
   const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
-
   if (!Elf || Elf->getEType() != ELF::ET_DYN) {
     error("not a dynamic object");
     return;
   }
 
-  StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
-
   std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
   if (DynRelSec.empty())
     return;
 
   outs() << "DYNAMIC RELOCATION RECORDS\n";
+  StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
   for (const SectionRef &Section : DynRelSec) {
     if (Section.relocation_begin() == Section.relocation_end())
       continue;
     for (const RelocationRef &Reloc : Section.relocations()) {
-      uint64_t address = Reloc.getOffset();
-      SmallString<32> relocname;
-      SmallString<32> valuestr;
-      Reloc.getTypeName(relocname);
-      error(getRelocationValueString(Reloc, valuestr));
-      outs() << format(Fmt.data(), address) << " " << relocname << " "
-             << valuestr << "\n";
+      uint64_t Address = Reloc.getOffset();
+      SmallString<32> RelocName;
+      SmallString<32> ValueStr;
+      Reloc.getTypeName(RelocName);
+      error(getRelocationValueString(Reloc, ValueStr));
+      outs() << format(Fmt.data(), Address) << " " << RelocName << " "
+             << ValueStr << "\n";
     }
   }
 }
 
-void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
+void llvm::printSectionHeaders(const ObjectFile *Obj) {
   outs() << "Sections:\n"
             "Idx Name          Size      Address          Type\n";
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
@@ -1860,9 +1943,10 @@ void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
                      (unsigned)Section.getIndex(), Name.str().c_str(), Size,
                      Address, Type.c_str());
   }
+  outs() << "\n";
 }
 
-void llvm::PrintSectionContents(const ObjectFile *Obj) {
+void llvm::printSectionContents(const ObjectFile *Obj) {
   std::error_code EC;
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     StringRef Name;
@@ -1884,23 +1968,23 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
     error(Section.getContents(Contents));
 
     // Dump out the content as hex and printable ascii characters.
-    for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
-      outs() << format(" %04" PRIx64 " ", BaseAddr + addr);
+    for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
+      outs() << format(" %04" PRIx64 " ", BaseAddr + Addr);
       // Dump line of hex.
-      for (std::size_t i = 0; i < 16; ++i) {
-        if (i != 0 && i % 4 == 0)
+      for (std::size_t I = 0; I < 16; ++I) {
+        if (I != 0 && I % 4 == 0)
           outs() << ' ';
-        if (addr + i < end)
-          outs() << hexdigit((Contents[addr + i] >> 4) & 0xF, true)
-                 << hexdigit(Contents[addr + i] & 0xF, true);
+        if (Addr + I < End)
+          outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true)
+                 << hexdigit(Contents[Addr + I] & 0xF, true);
         else
           outs() << "  ";
       }
       // Print ascii.
       outs() << "  ";
-      for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
-        if (isPrint(static_cast<unsigned char>(Contents[addr + i]) & 0xFF))
-          outs() << Contents[addr + i];
+      for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) {
+        if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF))
+          outs() << Contents[Addr + I];
         else
           outs() << ".";
       }
@@ -1909,40 +1993,47 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
   }
 }
 
-void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
+void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
                             StringRef ArchitectureName) {
   outs() << "SYMBOL TABLE:\n";
 
-  if (const COFFObjectFile *coff = dyn_cast<const COFFObjectFile>(o)) {
-    printCOFFSymbolTable(coff);
+  if (const COFFObjectFile *Coff = dyn_cast<const COFFObjectFile>(O)) {
+    printCOFFSymbolTable(Coff);
     return;
   }
-  for (const SymbolRef &Symbol : o->symbols()) {
+
+  for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
+    // Skip printing the special zero symbol when dumping an ELF file.
+    // This makes the output consistent with the GNU objdump.
+    if (I == O->symbol_begin() && isa<ELFObjectFileBase>(O))
+      continue;
+
+    const SymbolRef &Symbol = *I;
     Expected<uint64_t> AddressOrError = Symbol.getAddress();
     if (!AddressOrError)
-      report_error(ArchiveName, o->getFileName(), AddressOrError.takeError(),
+      report_error(ArchiveName, O->getFileName(), AddressOrError.takeError(),
                    ArchitectureName);
     uint64_t Address = *AddressOrError;
     if ((Address < StartAddress) || (Address > StopAddress))
       continue;
     Expected<SymbolRef::Type> TypeOrError = Symbol.getType();
     if (!TypeOrError)
-      report_error(ArchiveName, o->getFileName(), TypeOrError.takeError(),
+      report_error(ArchiveName, O->getFileName(), TypeOrError.takeError(),
                    ArchitectureName);
     SymbolRef::Type Type = *TypeOrError;
     uint32_t Flags = Symbol.getFlags();
     Expected<section_iterator> SectionOrErr = Symbol.getSection();
     if (!SectionOrErr)
-      report_error(ArchiveName, o->getFileName(), SectionOrErr.takeError(),
+      report_error(ArchiveName, O->getFileName(), SectionOrErr.takeError(),
                    ArchitectureName);
     section_iterator Section = *SectionOrErr;
     StringRef Name;
-    if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
+    if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
       Section->getName(Name);
     } else {
       Expected<StringRef> NameOrErr = Symbol.getName();
       if (!NameOrErr)
-        report_error(ArchiveName, o->getFileName(), NameOrErr.takeError(),
+        report_error(ArchiveName, O->getFileName(), NameOrErr.takeError(),
                      ArchitectureName);
       Name = *NameOrErr;
     }
@@ -1963,8 +2054,10 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
       FileFunc = 'f';
     else if (Type == SymbolRef::ST_Function)
       FileFunc = 'F';
+    else if (Type == SymbolRef::ST_Data)
+      FileFunc = 'O';
 
-    const char *Fmt = o->getBytesInAddress() > 4 ? "%016" PRIx64 :
+    const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 :
                                                    "%08" PRIx64;
 
     outs() << format(Fmt, Address) << " "
@@ -1980,11 +2073,11 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
       outs() << "*ABS*";
     } else if (Common) {
       outs() << "*COM*";
-    } else if (Section == o->section_end()) {
+    } else if (Section == O->section_end()) {
       outs() << "*UND*";
     } else {
       if (const MachOObjectFile *MachO =
-          dyn_cast<const MachOObjectFile>(o)) {
+          dyn_cast<const MachOObjectFile>(O)) {
         DataRefImpl DR = Section->getRawDataRefImpl();
         StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
         outs() << SegmentName << ",";
@@ -1995,98 +2088,95 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
     }
 
     outs() << '\t';
-    if (Common || isa<ELFObjectFileBase>(o)) {
+    if (Common || isa<ELFObjectFileBase>(O)) {
       uint64_t Val =
           Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
       outs() << format("\t %08" PRIx64 " ", Val);
     }
 
-    if (Hidden) {
+    if (Hidden)
       outs() << ".hidden ";
-    }
-    outs() << Name
-           << '\n';
+
+    if (Demangle)
+      outs() << demangle(Name) << '\n';
+    else
+      outs() << Name << '\n';
   }
 }
 
-static void PrintUnwindInfo(const ObjectFile *o) {
+static void printUnwindInfo(const ObjectFile *O) {
   outs() << "Unwind info:\n\n";
 
-  if (const COFFObjectFile *coff = dyn_cast<COFFObjectFile>(o)) {
-    printCOFFUnwindInfo(coff);
-  } else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+  if (const COFFObjectFile *Coff = dyn_cast<COFFObjectFile>(O))
+    printCOFFUnwindInfo(Coff);
+  else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O))
     printMachOUnwindInfo(MachO);
-  else {
+  else
     // TODO: Extract DWARF dump tool to objdump.
-    errs() << "This operation is only currently supported "
-              "for COFF and MachO object files.\n";
-    return;
-  }
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for COFF and MachO object files.\n";
 }
 
 void llvm::printExportsTrie(const ObjectFile *o) {
   outs() << "Exports trie:\n";
   if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
     printMachOExportsTrie(MachO);
-  else {
-    errs() << "This operation is only currently supported "
-              "for Mach-O executable files.\n";
-    return;
-  }
+  else
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
 }
 
 void llvm::printRebaseTable(ObjectFile *o) {
   outs() << "Rebase table:\n";
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
     printMachORebaseTable(MachO);
-  else {
-    errs() << "This operation is only currently supported "
-              "for Mach-O executable files.\n";
-    return;
-  }
+  else
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
 }
 
 void llvm::printBindTable(ObjectFile *o) {
   outs() << "Bind table:\n";
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
     printMachOBindTable(MachO);
-  else {
-    errs() << "This operation is only currently supported "
-              "for Mach-O executable files.\n";
-    return;
-  }
+  else
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
 }
 
 void llvm::printLazyBindTable(ObjectFile *o) {
   outs() << "Lazy bind table:\n";
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
     printMachOLazyBindTable(MachO);
-  else {
-    errs() << "This operation is only currently supported "
-              "for Mach-O executable files.\n";
-    return;
-  }
+  else
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
 }
 
 void llvm::printWeakBindTable(ObjectFile *o) {
   outs() << "Weak bind table:\n";
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
     printMachOWeakBindTable(MachO);
-  else {
-    errs() << "This operation is only currently supported "
-              "for Mach-O executable files.\n";
-    return;
-  }
+  else
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
 }
 
 /// Dump the raw contents of the __clangast section so the output can be piped
 /// into llvm-bcanalyzer.
 void llvm::printRawClangAST(const ObjectFile *Obj) {
   if (outs().is_displayed()) {
-    errs() << "The -raw-clang-ast option will dump the raw binary contents of "
-              "the clang ast section.\n"
-              "Please redirect the output to a file or another program such as "
-              "llvm-bcanalyzer.\n";
+    WithColor::error(errs(), ToolName)
+        << "The -raw-clang-ast option will dump the raw binary contents of "
+           "the clang ast section.\n"
+           "Please redirect the output to a file or another program such as "
+           "llvm-bcanalyzer.\n";
     return;
   }
 
@@ -2113,15 +2203,16 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
 }
 
 static void printFaultMaps(const ObjectFile *Obj) {
-  const char *FaultMapSectionName = nullptr;
+  StringRef FaultMapSectionName;
 
   if (isa<ELFObjectFileBase>(Obj)) {
     FaultMapSectionName = ".llvm_faultmaps";
   } else if (isa<MachOObjectFile>(Obj)) {
     FaultMapSectionName = "__llvm_faultmaps";
   } else {
-    errs() << "This operation is only currently supported "
-              "for ELF and Mach-O executable files.\n";
+    WithColor::error(errs(), ToolName)
+        << "This operation is only currently supported "
+           "for ELF and Mach-O executable files.\n";
     return;
   }
 
@@ -2152,42 +2243,44 @@ static void printFaultMaps(const ObjectFile *Obj) {
   outs() << FMP;
 }
 
-static void printPrivateFileHeaders(const ObjectFile *o, bool onlyFirst) {
-  if (o->isELF()) {
-    printELFFileHeader(o);
-    return printELFDynamicSection(o);
+static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
+  if (O->isELF()) {
+    printELFFileHeader(O);
+    return printELFDynamicSection(O);
   }
-  if (o->isCOFF())
-    return printCOFFFileHeader(o);
-  if (o->isWasm())
-    return printWasmFileHeader(o);
-  if (o->isMachO()) {
-    printMachOFileHeader(o);
-    if (!onlyFirst)
-      printMachOLoadCommands(o);
+  if (O->isCOFF())
+    return printCOFFFileHeader(O);
+  if (O->isWasm())
+    return printWasmFileHeader(O);
+  if (O->isMachO()) {
+    printMachOFileHeader(O);
+    if (!OnlyFirst)
+      printMachOLoadCommands(O);
     return;
   }
-  report_error(o->getFileName(), "Invalid/Unsupported object file format");
+  report_error(O->getFileName(), "Invalid/Unsupported object file format");
 }
 
-static void printFileHeaders(const ObjectFile *o) {
-  if (!o->isELF() && !o->isCOFF())
-    report_error(o->getFileName(), "Invalid/Unsupported object file format");
+static void printFileHeaders(const ObjectFile *O) {
+  if (!O->isELF() && !O->isCOFF())
+    report_error(O->getFileName(), "Invalid/Unsupported object file format");
 
-  Triple::ArchType AT = o->getArch();
+  Triple::ArchType AT = O->getArch();
   outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
-  Expected<uint64_t> StartAddrOrErr = o->getStartAddress();
+  Expected<uint64_t> StartAddrOrErr = O->getStartAddress();
   if (!StartAddrOrErr)
-    report_error(o->getFileName(), StartAddrOrErr.takeError());
+    report_error(O->getFileName(), StartAddrOrErr.takeError());
+
+  StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
+  uint64_t Address = StartAddrOrErr.get();
   outs() << "start address: "
-         << format("0x%0*x", o->getBytesInAddress(), StartAddrOrErr.get())
-         << "\n";
+         << "0x" << format(Fmt.data(), Address) << "\n\n";
 }
 
 static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
   Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
   if (!ModeOrErr) {
-    errs() << "ill-formed archive entry.\n";
+    WithColor::error(errs(), ToolName) << "ill-formed archive entry.\n";
     consumeError(ModeOrErr.takeError());
     return;
   }
@@ -2248,55 +2341,55 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
   outs() << Name << "\n";
 }
 
-static void DumpObject(ObjectFile *o, const Archive *a = nullptr,
-                       const Archive::Child *c = nullptr) {
-  StringRef ArchiveName = a != nullptr ? a->getFileName() : "";
+static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
+                       const Archive::Child *C = nullptr) {
   // Avoid other output when using a raw option.
   if (!RawClangAST) {
     outs() << '\n';
-    if (a)
-      outs() << a->getFileName() << "(" << o->getFileName() << ")";
+    if (A)
+      outs() << A->getFileName() << "(" << O->getFileName() << ")";
     else
-      outs() << o->getFileName();
-    outs() << ":\tfile format " << o->getFileFormatName() << "\n\n";
+      outs() << O->getFileName();
+    outs() << ":\tfile format " << O->getFileFormatName() << "\n\n";
   }
 
-  if (ArchiveHeaders && !MachOOpt)
-    printArchiveChild(a->getFileName(), *c);
+  StringRef ArchiveName = A ? A->getFileName() : "";
+  if (FileHeaders)
+    printFileHeaders(O);
+  if (ArchiveHeaders && !MachOOpt && C)
+    printArchiveChild(ArchiveName, *C);
   if (Disassemble)
-    DisassembleObject(o, Relocations);
+    disassembleObject(O, Relocations);
   if (Relocations && !Disassemble)
-    PrintRelocations(o);
+    printRelocations(O);
   if (DynamicRelocations)
-    PrintDynamicRelocations(o);
+    printDynamicRelocations(O);
   if (SectionHeaders)
-    PrintSectionHeaders(o);
+    printSectionHeaders(O);
   if (SectionContents)
-    PrintSectionContents(o);
+    printSectionContents(O);
   if (SymbolTable)
-    PrintSymbolTable(o, ArchiveName);
+    printSymbolTable(O, ArchiveName);
   if (UnwindInfo)
-    PrintUnwindInfo(o);
+    printUnwindInfo(O);
   if (PrivateHeaders || FirstPrivateHeader)
-    printPrivateFileHeaders(o, FirstPrivateHeader);
-  if (FileHeaders)
-    printFileHeaders(o);
+    printPrivateFileHeaders(O, FirstPrivateHeader);
   if (ExportsTrie)
-    printExportsTrie(o);
+    printExportsTrie(O);
   if (Rebase)
-    printRebaseTable(o);
+    printRebaseTable(O);
   if (Bind)
-    printBindTable(o);
+    printBindTable(O);
   if (LazyBind)
-    printLazyBindTable(o);
+    printLazyBindTable(O);
   if (WeakBind)
-    printWeakBindTable(o);
+    printWeakBindTable(O);
   if (RawClangAST)
-    printRawClangAST(o);
+    printRawClangAST(O);
   if (PrintFaultMaps)
-    printFaultMaps(o);
+    printFaultMaps(O);
   if (DwarfDumpType != DIDT_Null) {
-    std::unique_ptr<DIContext> DICtx = DWARFContext::create(*o);
+    std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
     // Dump the complete DWARF structure.
     DIDumpOptions DumpOpts;
     DumpOpts.DumpType = DwarfDumpType;
@@ -2304,7 +2397,7 @@ static void DumpObject(ObjectFile *o, const Archive *a = nullptr,
   }
 }
 
-static void DumpObject(const COFFImportFile *I, const Archive *A,
+static void dumpObject(const COFFImportFile *I, const Archive *A,
                        const Archive::Child *C = nullptr) {
   StringRef ArchiveName = A ? A->getFileName() : "";
 
@@ -2315,41 +2408,40 @@ static void DumpObject(const COFFImportFile *I, const Archive *A,
            << ":\tfile format COFF-import-file"
            << "\n\n";
 
-  if (ArchiveHeaders && !MachOOpt)
-    printArchiveChild(A->getFileName(), *C);
+  if (ArchiveHeaders && !MachOOpt && C)
+    printArchiveChild(ArchiveName, *C);
   if (SymbolTable)
     printCOFFSymbolTable(I);
 }
 
 /// Dump each object file in \a a;
-static void DumpArchive(const Archive *a) {
+static void dumpArchive(const Archive *A) {
   Error Err = Error::success();
-  for (auto &C : a->children(Err)) {
+  for (auto &C : A->children(Err)) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
     if (!ChildOrErr) {
       if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-        report_error(a->getFileName(), C, std::move(E));
+        report_error(A->getFileName(), C, std::move(E));
       continue;
     }
-    if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
-      DumpObject(o, a, &C);
+    if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
+      dumpObject(O, A, &C);
     else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
-      DumpObject(I, a, &C);
+      dumpObject(I, A, &C);
     else
-      report_error(a->getFileName(), object_error::invalid_file_type);
+      report_error(A->getFileName(), object_error::invalid_file_type);
   }
   if (Err)
-    report_error(a->getFileName(), std::move(Err));
+    report_error(A->getFileName(), std::move(Err));
 }
 
 /// Open file and figure out how to dump it.
-static void DumpInput(StringRef file) {
-
+static void dumpInput(StringRef file) {
   // If we are using the Mach-O specific object file parser, then let it parse
   // the file and process the command line options.  So the -arch flags can
   // be used to select specific slices, etc.
   if (MachOOpt) {
-    ParseInputMachO(file);
+    parseInputMachO(file);
     return;
   }
 
@@ -2359,10 +2451,12 @@ static void DumpInput(StringRef file) {
     report_error(file, BinaryOrErr.takeError());
   Binary &Binary = *BinaryOrErr.get().getBinary();
 
-  if (Archive *a = dyn_cast<Archive>(&Binary))
-    DumpArchive(a);
-  else if (ObjectFile *o = dyn_cast<ObjectFile>(&Binary))
-    DumpObject(o);
+  if (Archive *A = dyn_cast<Archive>(&Binary))
+    dumpArchive(A);
+  else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
+    dumpObject(O);
+  else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
+    parseInputMachO(UB);
   else
     report_error(file, object_error::invalid_file_type);
 }
@@ -2379,24 +2473,20 @@ int main(int argc, char **argv) {
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
   cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
-  TripleName = Triple::normalize(TripleName);
 
   ToolName = argv[0];
 
   // Defaults to a.out if no filenames specified.
-  if (InputFilenames.size() == 0)
+  if (InputFilenames.empty())
     InputFilenames.push_back("a.out");
 
   if (AllHeaders)
-    PrivateHeaders = Relocations = SectionHeaders = SymbolTable = true;
+    FileHeaders = PrivateHeaders = Relocations = SectionHeaders = SymbolTable =
+        true;
 
   if (DisassembleAll || PrintSource || PrintLines)
     Disassemble = true;
 
-  if (Demangle.getValue() != "none" && Demangle.getValue() != "" &&
-      Demangle.getValue() != "itanium")
-    warn("Unsupported demangling style");
-
   if (!Disassemble
       && !Relocations
       && !DynamicRelocations
@@ -2422,7 +2512,7 @@ int main(int argc, char **argv) {
       && !(DylibsUsed && MachOOpt)
       && !(DylibId && MachOOpt)
       && !(ObjcMetaData && MachOOpt)
-      && !(FilterSections.size() != 0 && MachOOpt)
+      && !(!FilterSections.empty() && MachOOpt)
       && !PrintFaultMaps
       && DwarfDumpType == DIDT_Null) {
     cl::PrintHelpMessage();
@@ -2432,7 +2522,7 @@ int main(int argc, char **argv) {
   DisasmFuncsSet.insert(DisassembleFunctions.begin(),
                         DisassembleFunctions.end());
 
-  llvm::for_each(InputFilenames, DumpInput);
+  llvm::for_each(InputFilenames, dumpInput);
 
   return EXIT_SUCCESS;
 }
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
index b2eb6e9d7771..fe2cb05fe227 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -22,6 +22,7 @@ namespace object {
   class COFFObjectFile;
   class COFFImportFile;
   class MachOObjectFile;
+  class MachOUniversalBinary;
   class ObjectFile;
   class Archive;
   class RelocationRef;
@@ -30,10 +31,10 @@ namespace object {
 extern cl::opt<std::string> TripleName;
 extern cl::opt<std::string> ArchName;
 extern cl::opt<std::string> MCPU;
-extern cl::opt<std::string> Demangle;
 extern cl::list<std::string> MAttrs;
 extern cl::list<std::string> FilterSections;
 extern cl::opt<bool> AllHeaders;
+extern cl::opt<bool> Demangle;
 extern cl::opt<bool> Disassemble;
 extern cl::opt<bool> DisassembleAll;
 extern cl::opt<bool> NoShowRawInsn;
@@ -69,34 +70,35 @@ extern cl::opt<DIDumpType> DwarfDumpType;
 
 // Various helper functions.
 void error(std::error_code ec);
-bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
-void ParseInputMachO(StringRef Filename);
-void printCOFFUnwindInfo(const object::COFFObjectFile* o);
-void printMachOUnwindInfo(const object::MachOObjectFile* o);
-void printMachOExportsTrie(const object::MachOObjectFile* o);
-void printMachORebaseTable(object::MachOObjectFile* o);
-void printMachOBindTable(object::MachOObjectFile* o);
-void printMachOLazyBindTable(object::MachOObjectFile* o);
-void printMachOWeakBindTable(object::MachOObjectFile* o);
-void printELFFileHeader(const object::ObjectFile *o);
+bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);
+void parseInputMachO(StringRef Filename);
+void parseInputMachO(object::MachOUniversalBinary *UB);
+void printCOFFUnwindInfo(const object::COFFObjectFile *O);
+void printMachOUnwindInfo(const object::MachOObjectFile *O);
+void printMachOExportsTrie(const object::MachOObjectFile *O);
+void printMachORebaseTable(object::MachOObjectFile *O);
+void printMachOBindTable(object::MachOObjectFile *O);
+void printMachOLazyBindTable(object::MachOObjectFile *O);
+void printMachOWeakBindTable(object::MachOObjectFile *O);
+void printELFFileHeader(const object::ObjectFile *O);
 void printELFDynamicSection(const object::ObjectFile *Obj);
-void printCOFFFileHeader(const object::ObjectFile *o);
-void printCOFFSymbolTable(const object::COFFImportFile *i);
-void printCOFFSymbolTable(const object::COFFObjectFile *o);
-void printMachOFileHeader(const object::ObjectFile *o);
-void printMachOLoadCommands(const object::ObjectFile *o);
-void printWasmFileHeader(const object::ObjectFile *o);
-void printExportsTrie(const object::ObjectFile *o);
-void printRebaseTable(object::ObjectFile *o);
-void printBindTable(object::ObjectFile *o);
-void printLazyBindTable(object::ObjectFile *o);
-void printWeakBindTable(object::ObjectFile *o);
-void printRawClangAST(const object::ObjectFile *o);
-void PrintRelocations(const object::ObjectFile *o);
-void PrintDynamicRelocations(const object::ObjectFile *o);
-void PrintSectionHeaders(const object::ObjectFile *o);
-void PrintSectionContents(const object::ObjectFile *o);
-void PrintSymbolTable(const object::ObjectFile *o, StringRef ArchiveName,
+void printCOFFFileHeader(const object::ObjectFile *O);
+void printCOFFSymbolTable(const object::COFFImportFile *I);
+void printCOFFSymbolTable(const object::COFFObjectFile *O);
+void printMachOFileHeader(const object::ObjectFile *O);
+void printMachOLoadCommands(const object::ObjectFile *O);
+void printWasmFileHeader(const object::ObjectFile *O);
+void printExportsTrie(const object::ObjectFile *O);
+void printRebaseTable(object::ObjectFile *O);
+void printBindTable(object::ObjectFile *O);
+void printLazyBindTable(object::ObjectFile *O);
+void printWeakBindTable(object::ObjectFile *O);
+void printRawClangAST(const object::ObjectFile *O);
+void printRelocations(const object::ObjectFile *O);
+void printDynamicRelocations(const object::ObjectFile *O);
+void printSectionHeaders(const object::ObjectFile *O);
+void printSectionContents(const object::ObjectFile *O);
+void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
                       StringRef ArchitectureName = StringRef());
 void warn(StringRef Message);
 LLVM_ATTRIBUTE_NORETURN void error(Twine Message);
diff --git a/contrib/llvm/tools/llvm-pdbutil/Analyze.cpp b/contrib/llvm/tools/llvm-pdbutil/Analyze.cpp
deleted file mode 100644
index 974ab49d9440..000000000000
--- a/contrib/llvm/tools/llvm-pdbutil/Analyze.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//===- Analyze.cpp - PDB analysis functions ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Analyze.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
-
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <list>
-
-using namespace llvm;
-using namespace llvm::codeview;
-using namespace llvm::pdb;
-
-static StringRef getLeafTypeName(TypeLeafKind LT) {
-  switch (LT) {
-#define TYPE_RECORD(ename, value, name)                                        \
-  case ename:                                                                  \
-    return #name;
-#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
-  default:
-    break;
-  }
-  return "UnknownLeaf";
-}
-
-namespace {
-struct HashLookupVisitor : public TypeVisitorCallbacks {
-  struct Entry {
-    TypeIndex TI;
-    CVType Record;
-  };
-
-  explicit HashLookupVisitor(TpiStream &Tpi) : Tpi(Tpi) {}
-
-  Error visitTypeBegin(CVType &Record) override {
-    uint32_t H = Tpi.getHashValues()[I];
-    Record.Hash = H;
-    TypeIndex TI(I + TypeIndex::FirstNonSimpleIndex);
-    Lookup[H].push_back(Entry{TI, Record});
-    ++I;
-    return Error::success();
-  }
-
-  uint32_t I = 0;
-  DenseMap<uint32_t, std::list<Entry>> Lookup;
-  TpiStream &Tpi;
-};
-}
-
-AnalysisStyle::AnalysisStyle(PDBFile &File) : File(File) {}
-
-Error AnalysisStyle::dump() {
-  auto Tpi = File.getPDBTpiStream();
-  if (!Tpi)
-    return Tpi.takeError();
-
-  HashLookupVisitor Hasher(*Tpi);
-
-  uint32_t RecordCount = Tpi->getNumTypeRecords();
-  auto Offsets = Tpi->getTypeIndexOffsets();
-  auto Types = llvm::make_unique<LazyRandomTypeCollection>(
-      Tpi->typeArray(), RecordCount, Offsets);
-
-  if (auto EC = codeview::visitTypeStream(*Types, Hasher))
-    return EC;
-
-  auto &Adjusters = Tpi->getHashAdjusters();
-  DenseSet<uint32_t> AdjusterSet;
-  for (const auto &Adj : Adjusters) {
-    assert(AdjusterSet.find(Adj.second) == AdjusterSet.end());
-    AdjusterSet.insert(Adj.second);
-  }
-
-  uint32_t Count = 0;
-  outs() << "Searching for hash collisions\n";
-  for (const auto &H : Hasher.Lookup) {
-    if (H.second.size() <= 1)
-      continue;
-    ++Count;
-    outs() << formatv("Hash: {0}, Count: {1} records\n", H.first,
-                      H.second.size());
-    for (const auto &R : H.second) {
-      auto Iter = AdjusterSet.find(R.TI.getIndex());
-      StringRef Prefix;
-      if (Iter != AdjusterSet.end()) {
-        Prefix = "[HEAD]";
-        AdjusterSet.erase(Iter);
-      }
-      StringRef LeafName = getLeafTypeName(R.Record.Type);
-      uint32_t TI = R.TI.getIndex();
-      StringRef TypeName = Types->getTypeName(R.TI);
-      outs() << formatv("{0,-6} {1} ({2:x}) {3}\n", Prefix, LeafName, TI,
-                        TypeName);
-    }
-  }
-
-  outs() << "\n";
-  outs() << "Dumping hash adjustment chains\n";
-  for (const auto &A : Tpi->getHashAdjusters()) {
-    TypeIndex TI(A.second);
-    StringRef TypeName = Types->getTypeName(TI);
-    const CVType &HeadRecord = Types->getType(TI);
-    assert(HeadRecord.Hash.hasValue());
-
-    auto CollisionsIter = Hasher.Lookup.find(*HeadRecord.Hash);
-    if (CollisionsIter == Hasher.Lookup.end())
-      continue;
-
-    const auto &Collisions = CollisionsIter->second;
-    outs() << TypeName << "\n";
-    outs() << formatv("    [HEAD] {0:x} {1} {2}\n", uint32_t(A.second),
-                      getLeafTypeName(HeadRecord.Type), TypeName);
-    for (const auto &Chain : Collisions) {
-      if (Chain.TI == TI)
-        continue;
-      const CVType &TailRecord = Types->getType(Chain.TI);
-      outs() << formatv("           {0:x} {1} {2}\n", Chain.TI.getIndex(),
-                        getLeafTypeName(TailRecord.Type),
-                        Types->getTypeName(Chain.TI));
-    }
-  }
-  outs() << formatv("There are {0} orphaned hash adjusters\n",
-                    AdjusterSet.size());
-  for (const auto &Adj : AdjusterSet) {
-    outs() << formatv("    {0}\n", Adj);
-  }
-
-  uint32_t DistinctHashValues = Hasher.Lookup.size();
-  outs() << formatv("{0}/{1} hash collisions", Count, DistinctHashValues);
-  return Error::success();
-}
diff --git a/contrib/llvm/tools/llvm-pdbutil/Analyze.h b/contrib/llvm/tools/llvm-pdbutil/Analyze.h
deleted file mode 100644
index 7230ae45b0c8..000000000000
--- a/contrib/llvm/tools/llvm-pdbutil/Analyze.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- Analyze.h - PDB analysis functions -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVMPDBDUMP_ANALYSIS_H
-#define LLVM_TOOLS_LLVMPDBDUMP_ANALYSIS_H
-
-#include "OutputStyle.h"
-
-namespace llvm {
-namespace pdb {
-class PDBFile;
-class AnalysisStyle : public OutputStyle {
-public:
-  explicit AnalysisStyle(PDBFile &File);
-
-  Error dump() override;
-
-private:
-  PDBFile &File;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index 9e59adc71967..e4f6aa7f6ec5 100644
--- a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -22,6 +22,7 @@
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
@@ -65,6 +66,16 @@ DumpOutputStyle::DumpOutputStyle(InputFile &File)
 PDBFile &DumpOutputStyle::getPdb() { return File.pdb(); }
 object::COFFObjectFile &DumpOutputStyle::getObj() { return File.obj(); }
 
+void DumpOutputStyle::printStreamNotValidForObj() {
+  AutoIndent Indent(P, 4);
+  P.formatLine("Dumping this stream is not valid for object files");
+}
+
+void DumpOutputStyle::printStreamNotPresent(StringRef StreamName) {
+  AutoIndent Indent(P, 4);
+  P.formatLine("{0} stream not present", StreamName);
+}
+
 Error DumpOutputStyle::dump() {
   if (opts::dump::DumpSummary) {
     if (auto EC = dumpFileSummary())
@@ -132,6 +143,11 @@ Error DumpOutputStyle::dump() {
       return EC;
   }
 
+  if (opts::dump::DumpFpo) {
+    if (auto EC = dumpFpo())
+      return EC;
+  }
+
   if (File.isObj()) {
     if (opts::dump::DumpTypes || !opts::dump::DumpTypeIndex.empty() ||
         opts::dump::DumpTypeExtras)
@@ -199,14 +215,14 @@ static void printHeader(LinePrinter &P, const Twine &S) {
 Error DumpOutputStyle::dumpFileSummary() {
   printHeader(P, "Summary");
 
-  ExitOnError Err("Invalid PDB Format: ");
-
-  AutoIndent Indent(P);
   if (File.isObj()) {
-    P.formatLine("Dumping File summary is not valid for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
+  AutoIndent Indent(P);
+  ExitOnError Err("Invalid PDB Format: ");
+
   P.formatLine("Block Size: {0}", getPdb().getBlockSize());
   P.formatLine("Number of blocks: {0}", getPdb().getBlockCount());
   P.formatLine("Number of streams: {0}", getPdb().getNumStreams());
@@ -234,7 +250,7 @@ Error DumpOutputStyle::dumpFileSummary() {
 static StatCollection getSymbolStats(const SymbolGroup &SG,
                                      StatCollection &CumulativeStats) {
   StatCollection Stats;
-  if (SG.getFile().isPdb()) {
+  if (SG.getFile().isPdb() && SG.hasDebugStream()) {
     // For PDB files, all symbols are packed into one stream.
     for (const auto &S : SG.getPdbModuleStream().symbols(nullptr)) {
       Stats.update(S.kind(), S.length());
@@ -326,12 +342,13 @@ static bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group) {
 Error DumpOutputStyle::dumpStreamSummary() {
   printHeader(P, "Streams");
 
-  AutoIndent Indent(P);
   if (File.isObj()) {
-    P.formatLine("Dumping streams is not valid for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
+  AutoIndent Indent(P);
+
   if (StreamPurposes.empty())
     discoverStreamPurposes(getPdb(), StreamPurposes);
 
@@ -527,18 +544,18 @@ static void dumpSectionContrib(LinePrinter &P, const SectionContrib2 &SC,
 
 Error DumpOutputStyle::dumpModules() {
   printHeader(P, "Modules");
-  AutoIndent Indent(P);
 
   if (File.isObj()) {
-    P.formatLine("Dumping modules is not supported for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
   if (!getPdb().hasPDBDbiStream()) {
-    P.formatLine("DBI Stream not present");
+    printStreamNotPresent("DBI");
     return Error::success();
   }
 
+  AutoIndent Indent(P);
   ExitOnError Err("Unexpected error processing modules: ");
 
   auto &Stream = Err(getPdb().getPDBDbiStream());
@@ -570,7 +587,12 @@ Error DumpOutputStyle::dumpModuleFiles() {
   printHeader(P, "Files");
 
   if (File.isObj()) {
-    P.formatLine("Dumping files is not valid for object files");
+    printStreamNotValidForObj();
+    return Error::success();
+  }
+
+  if (!getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
     return Error::success();
   }
 
@@ -591,6 +613,11 @@ Error DumpOutputStyle::dumpModuleFiles() {
 Error DumpOutputStyle::dumpSymbolStats() {
   printHeader(P, "Module Stats");
 
+  if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
+    return Error::success();
+  }
+
   ExitOnError Err("Unexpected error processing modules: ");
 
   StatCollection SymStats;
@@ -625,9 +652,9 @@ Error DumpOutputStyle::dumpSymbolStats() {
     }
   });
 
-  P.printLine("  Summary |");
-  AutoIndent Indent(P, 4);
   if (SymStats.Totals.Count > 0) {
+    P.printLine("  Summary |");
+    AutoIndent Indent(P, 4);
     printModuleDetailStats<SymbolKind>(P, "Symbols", SymStats);
     printModuleDetailStats<DebugSubsectionKind>(P, "Chunks", ChunkStats);
   }
@@ -680,6 +707,11 @@ static uint32_t getLongestTypeLeafName(const StatCollection &Stats) {
 Error DumpOutputStyle::dumpUdtStats() {
   printHeader(P, "S_UDT Record Stats");
 
+  if (File.isPdb() && !getPdb().hasPDBGlobalsStream()) {
+    printStreamNotPresent("Globals");
+    return Error::success();
+  }
+
   StatCollection UdtStats;
   StatCollection UdtTargetStats;
   AutoIndent Indent(P, 4);
@@ -726,11 +758,6 @@ Error DumpOutputStyle::dumpUdtStats() {
   P.NewLine();
 
   if (File.isPdb()) {
-    if (!getPdb().hasPDBGlobalsStream()) {
-      P.printLine("- Error: globals stream not present");
-      return Error::success();
-    }
-
     auto &SymbolRecords = cantFail(getPdb().getPDBSymbolStream());
     auto ExpGlobals = getPdb().getPDBGlobalsStream();
     if (!ExpGlobals)
@@ -839,6 +866,11 @@ static void typesetLinesAndColumns(LinePrinter &P, uint32_t Start,
 Error DumpOutputStyle::dumpLines() {
   printHeader(P, "Lines");
 
+  if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
+    return Error::success();
+  }
+
   uint32_t LastModi = UINT32_MAX;
   uint32_t LastNameIndex = UINT32_MAX;
   iterateModuleSubsections<DebugLinesSubsectionRef>(
@@ -875,6 +907,11 @@ Error DumpOutputStyle::dumpLines() {
 Error DumpOutputStyle::dumpInlineeLines() {
   printHeader(P, "Inlinee Lines");
 
+  if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
+    return Error::success();
+  }
+
   iterateModuleSubsections<DebugInlineeLinesSubsectionRef>(
       File, PrintScope{P, 2},
       [this](uint32_t Modi, const SymbolGroup &Strings,
@@ -893,6 +930,12 @@ Error DumpOutputStyle::dumpInlineeLines() {
 
 Error DumpOutputStyle::dumpXmi() {
   printHeader(P, "Cross Module Imports");
+
+  if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
+    return Error::success();
+  }
+
   iterateModuleSubsections<DebugCrossModuleImportsSubsectionRef>(
       File, PrintScope{P, 2},
       [this](uint32_t Modi, const SymbolGroup &Strings,
@@ -929,6 +972,11 @@ Error DumpOutputStyle::dumpXmi() {
 Error DumpOutputStyle::dumpXme() {
   printHeader(P, "Cross Module Exports");
 
+  if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
+    return Error::success();
+  }
+
   iterateModuleSubsections<DebugCrossModuleExportsSubsectionRef>(
       File, PrintScope{P, 2},
       [this](uint32_t Modi, const SymbolGroup &Strings,
@@ -943,6 +991,111 @@ Error DumpOutputStyle::dumpXme() {
   return Error::success();
 }
 
+std::string formatFrameType(object::frame_type FT) {
+  switch (FT) {
+  case object::frame_type::Fpo:
+    return "FPO";
+  case object::frame_type::NonFpo:
+    return "Non-FPO";
+  case object::frame_type::Trap:
+    return "Trap";
+  case object::frame_type::Tss:
+    return "TSS";
+  }
+  return "<unknown>";
+}
+
+Error DumpOutputStyle::dumpOldFpo(PDBFile &File) {
+  printHeader(P, "Old FPO Data");
+
+  ExitOnError Err("Error dumping old fpo data:");
+  auto &Dbi = Err(File.getPDBDbiStream());
+
+  uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::FPO);
+  if (Index == kInvalidStreamIndex) {
+    printStreamNotPresent("FPO");
+    return Error::success();
+  }
+
+  std::unique_ptr<MappedBlockStream> OldFpo = File.createIndexedStream(Index);
+  BinaryStreamReader Reader(*OldFpo);
+  FixedStreamArray<object::FpoData> Records;
+  Err(Reader.readArray(Records,
+                       Reader.bytesRemaining() / sizeof(object::FpoData)));
+
+  P.printLine("  RVA    | Code | Locals | Params | Prolog | Saved Regs | Use "
+              "BP | Has SEH | Frame Type");
+
+  for (const object::FpoData &FD : Records) {
+    P.formatLine("{0:X-8} | {1,4} | {2,6} | {3,6} | {4,6} | {5,10} | {6,6} | "
+                 "{7,7} | {8,9}",
+                 uint32_t(FD.Offset), uint32_t(FD.Size), uint32_t(FD.NumLocals),
+                 uint32_t(FD.NumParams), FD.getPrologSize(),
+                 FD.getNumSavedRegs(), FD.useBP(), FD.hasSEH(),
+                 formatFrameType(FD.getFP()));
+  }
+  return Error::success();
+}
+
+Error DumpOutputStyle::dumpNewFpo(PDBFile &File) {
+  printHeader(P, "New FPO Data");
+
+  ExitOnError Err("Error dumping new fpo data:");
+  auto &Dbi = Err(File.getPDBDbiStream());
+
+  uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::NewFPO);
+  if (Index == kInvalidStreamIndex) {
+    printStreamNotPresent("New FPO");
+    return Error::success();
+  }
+
+  std::unique_ptr<MappedBlockStream> NewFpo = File.createIndexedStream(Index);
+
+  DebugFrameDataSubsectionRef FDS;
+  if (auto EC = FDS.initialize(*NewFpo))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid new fpo stream");
+
+  P.printLine("  RVA    | Code | Locals | Params | Stack | Prolog | Saved Regs "
+              "| Has SEH | Has C++EH | Start | Program");
+  for (const FrameData &FD : FDS) {
+    bool IsFuncStart = FD.Flags & FrameData::IsFunctionStart;
+    bool HasEH = FD.Flags & FrameData::HasEH;
+    bool HasSEH = FD.Flags & FrameData::HasSEH;
+
+    auto &StringTable = Err(File.getStringTable());
+
+    auto Program = Err(StringTable.getStringForID(FD.FrameFunc));
+    P.formatLine("{0:X-8} | {1,4} | {2,6} | {3,6} | {4,5} | {5,6} | {6,10} | "
+                 "{7,7} | {8,9} | {9,5} | {10}",
+                 uint32_t(FD.RvaStart), uint32_t(FD.CodeSize),
+                 uint32_t(FD.LocalSize), uint32_t(FD.ParamsSize),
+                 uint32_t(FD.MaxStackSize), uint16_t(FD.PrologSize),
+                 uint16_t(FD.SavedRegsSize), HasSEH, HasEH, IsFuncStart,
+                 Program);
+  }
+  return Error::success();
+}
+
+Error DumpOutputStyle::dumpFpo() {
+  if (!File.isPdb()) {
+    printStreamNotValidForObj();
+    return Error::success();
+  }
+
+  PDBFile &File = getPdb();
+  if (!File.hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
+    return Error::success();
+  }
+
+  if (auto EC = dumpOldFpo(File))
+    return EC;
+  if (auto EC = dumpNewFpo(File))
+    return EC;
+  return Error::success();
+}
+
 Error DumpOutputStyle::dumpStringTableFromPdb() {
   AutoIndent Indent(P);
   auto IS = getPdb().getStringTable();
@@ -965,7 +1118,7 @@ Error DumpOutputStyle::dumpStringTableFromPdb() {
 
       std::vector<uint32_t> SortedIDs(IS->name_ids().begin(),
                                       IS->name_ids().end());
-      llvm::sort(SortedIDs.begin(), SortedIDs.end());
+      llvm::sort(SortedIDs);
       for (uint32_t I : SortedIDs) {
         auto ES = IS->getStringForID(I);
         llvm::SmallString<32> Str;
@@ -1037,12 +1190,13 @@ Error DumpOutputStyle::dumpStringTableFromObj() {
 
 Error DumpOutputStyle::dumpNamedStreams() {
   printHeader(P, "Named Streams");
-  AutoIndent Indent(P, 2);
 
   if (File.isObj()) {
-    P.formatLine("Dumping Named Streams is only supported for PDB files.");
+    printStreamNotValidForObj();
     return Error::success();
   }
+
+  AutoIndent Indent(P);
   ExitOnError Err("Invalid PDB File: ");
 
   auto &IS = Err(File.pdb().getPDBInfoStream());
@@ -1087,13 +1241,13 @@ static void
 dumpFullTypeStream(LinePrinter &Printer, LazyRandomTypeCollection &Types,
                    uint32_t NumTypeRecords, uint32_t NumHashBuckets,
                    FixedStreamArray<support::ulittle32_t> HashValues,
-                   bool Bytes, bool Extras) {
+                   TpiStream *Stream, bool Bytes, bool Extras) {
 
   Printer.formatLine("Showing {0:N} records", NumTypeRecords);
   uint32_t Width = NumDigits(TypeIndex::FirstNonSimpleIndex + NumTypeRecords);
 
   MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
-                           NumHashBuckets, HashValues);
+                           NumHashBuckets, HashValues, Stream);
 
   if (auto EC = codeview::visitTypeStream(Types, V)) {
     Printer.formatLine("An error occurred dumping type records: {0}",
@@ -1109,7 +1263,8 @@ static void dumpPartialTypeStream(LinePrinter &Printer,
       NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
 
   MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
-                           Stream.getNumHashBuckets(), Stream.getHashValues());
+                           Stream.getNumHashBuckets(), Stream.getHashValues(),
+                           &Stream);
 
   if (opts::dump::DumpTypeDependents) {
     // If we need to dump all dependents, then iterate each index and find
@@ -1171,7 +1326,8 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
     Types.reset(Reader, 100);
 
     if (opts::dump::DumpTypes) {
-      dumpFullTypeStream(P, Types, 0, 0, {}, opts::dump::DumpTypeData, false);
+      dumpFullTypeStream(P, Types, 0, 0, {}, nullptr, opts::dump::DumpTypeData,
+                         false);
     } else if (opts::dump::DumpTypeExtras) {
       auto LocalHashes = LocallyHashedType::hashTypeCollection(Types);
       auto GlobalHashes = GloballyHashedType::hashTypeCollection(Types);
@@ -1204,7 +1360,6 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     printHeader(P, "Types (IPI Stream)");
   }
 
-  AutoIndent Indent(P);
   assert(!File.isObj());
 
   bool Present = false;
@@ -1229,10 +1384,11 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
   }
 
   if (!Present) {
-    P.formatLine("Stream not present");
+    printStreamNotPresent(StreamIdx == StreamTPI ? "TPI" : "IPI");
     return Error::success();
   }
 
+  AutoIndent Indent(P);
   ExitOnError Err("Unexpected error processing types: ");
 
   auto &Stream = Err((StreamIdx == StreamTPI) ? getPdb().getPDBTpiStream()
@@ -1240,11 +1396,14 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
 
   auto &Types = (StreamIdx == StreamTPI) ? File.types() : File.ids();
 
+  // Enable resolving forward decls.
+  Stream.buildHashMap();
+
   if (DumpTypes || !Indices.empty()) {
     if (Indices.empty())
       dumpFullTypeStream(P, Types, Stream.getNumTypeRecords(),
                          Stream.getNumHashBuckets(), Stream.getHashValues(),
-                         DumpBytes, DumpExtras);
+                         &Stream, DumpBytes, DumpExtras);
     else {
       std::vector<TypeIndex> TiList(Indices.begin(), Indices.end());
       dumpPartialTypeStream(P, Types, Stream, TiList, DumpBytes, DumpExtras,
@@ -1261,19 +1420,21 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
       P.formatLine("TI: {0}, Offset: {1}", IO.Type, fmtle(IO.Offset));
     }
 
-    P.NewLine();
-    P.formatLine("Hash Adjusters:");
-    auto &Adjusters = Stream.getHashAdjusters();
-    auto &Strings = Err(getPdb().getStringTable());
-    for (const auto &A : Adjusters) {
-      AutoIndent Indent2(P);
-      auto ExpectedStr = Strings.getStringForID(A.first);
-      TypeIndex TI(A.second);
-      if (ExpectedStr)
-        P.formatLine("`{0}` -> {1}", *ExpectedStr, TI);
-      else {
-        P.formatLine("unknown str id ({0}) -> {1}", A.first, TI);
-        consumeError(ExpectedStr.takeError());
+    if (getPdb().hasPDBStringTable()) {
+      P.NewLine();
+      P.formatLine("Hash Adjusters:");
+      auto &Adjusters = Stream.getHashAdjusters();
+      auto &Strings = Err(getPdb().getStringTable());
+      for (const auto &A : Adjusters) {
+        AutoIndent Indent2(P);
+        auto ExpectedStr = Strings.getStringForID(A.first);
+        TypeIndex TI(A.second);
+        if (ExpectedStr)
+          P.formatLine("`{0}` -> {1}", *ExpectedStr, TI);
+        else {
+          P.formatLine("unknown str id ({0}) -> {1}", A.first, TI);
+          consumeError(ExpectedStr.takeError());
+        }
       }
     }
   }
@@ -1321,12 +1482,12 @@ Error DumpOutputStyle::dumpModuleSymsForObj() {
 Error DumpOutputStyle::dumpModuleSymsForPdb() {
   printHeader(P, "Symbols");
 
-  AutoIndent Indent(P);
-  if (!getPdb().hasPDBDbiStream()) {
-    P.formatLine("DBI Stream not present");
+  if (File.isPdb() && !getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
     return Error::success();
   }
 
+  AutoIndent Indent(P);
   ExitOnError Err("Unexpected error processing symbols: ");
 
   auto &Ids = File.ids();
@@ -1364,18 +1525,19 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() {
 
 Error DumpOutputStyle::dumpGSIRecords() {
   printHeader(P, "GSI Records");
-  AutoIndent Indent(P);
 
   if (File.isObj()) {
-    P.formatLine("Dumping Globals is not supported for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
   if (!getPdb().hasPDBSymbolStream()) {
-    P.formatLine("GSI Common Symbol Stream not present");
+    printStreamNotPresent("GSI Common Symbol");
     return Error::success();
   }
 
+  AutoIndent Indent(P);
+
   auto &Records = cantFail(getPdb().getPDBSymbolStream());
   auto &Types = File.types();
   auto &Ids = File.ids();
@@ -1397,38 +1559,72 @@ Error DumpOutputStyle::dumpGSIRecords() {
 
 Error DumpOutputStyle::dumpGlobals() {
   printHeader(P, "Global Symbols");
-  AutoIndent Indent(P);
 
   if (File.isObj()) {
-    P.formatLine("Dumping Globals is not supported for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
   if (!getPdb().hasPDBGlobalsStream()) {
-    P.formatLine("Globals stream not present");
+    printStreamNotPresent("Globals");
     return Error::success();
   }
+
+  AutoIndent Indent(P);
   ExitOnError Err("Error dumping globals stream: ");
   auto &Globals = Err(getPdb().getPDBGlobalsStream());
 
-  const GSIHashTable &Table = Globals.getGlobalsTable();
-  Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras));
+  if (opts::dump::DumpGlobalNames.empty()) {
+    const GSIHashTable &Table = Globals.getGlobalsTable();
+    Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras));
+  } else {
+    SymbolStream &SymRecords = cantFail(getPdb().getPDBSymbolStream());
+    auto &Types = File.types();
+    auto &Ids = File.ids();
+
+    SymbolVisitorCallbackPipeline Pipeline;
+    SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb);
+    MinimalSymbolDumper Dumper(P, opts::dump::DumpSymRecordBytes, Ids, Types);
+
+    Pipeline.addCallbackToPipeline(Deserializer);
+    Pipeline.addCallbackToPipeline(Dumper);
+    CVSymbolVisitor Visitor(Pipeline);
+
+    using ResultEntryType = std::pair<uint32_t, CVSymbol>;
+    for (StringRef Name : opts::dump::DumpGlobalNames) {
+      AutoIndent Indent(P);
+      P.formatLine("Global Name `{0}`", Name);
+      std::vector<ResultEntryType> Results =
+          Globals.findRecordsByName(Name, SymRecords);
+      if (Results.empty()) {
+        AutoIndent Indent(P);
+        P.printLine("(no matching records found)");
+        continue;
+      }
+
+      for (ResultEntryType Result : Results) {
+        if (auto E = Visitor.visitSymbolRecord(Result.second, Result.first))
+          return E;
+      }
+    }
+  }
   return Error::success();
 }
 
 Error DumpOutputStyle::dumpPublics() {
   printHeader(P, "Public Symbols");
-  AutoIndent Indent(P);
 
   if (File.isObj()) {
-    P.formatLine("Dumping Globals is not supported for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
   if (!getPdb().hasPDBPublicsStream()) {
-    P.formatLine("Publics stream not present");
+    printStreamNotPresent("Publics");
     return Error::success();
   }
+
+  AutoIndent Indent(P);
   ExitOnError Err("Error dumping publics stream: ");
   auto &Publics = Err(getPdb().getPDBPublicsStream());
 
@@ -1514,8 +1710,6 @@ Error DumpOutputStyle::dumpSymbolsFromGSI(const GSIHashTable &Table,
 
   // Return early if we aren't dumping public hash table and address map info.
   if (HashExtras) {
-    P.formatBinary("Hash Bitmap", Table.HashBitmap, 0);
-
     P.formatLine("Hash Entries");
     {
       AutoIndent Indent2(P);
@@ -1560,12 +1754,17 @@ Error DumpOutputStyle::dumpSectionHeaders() {
 void DumpOutputStyle::dumpSectionHeaders(StringRef Label, DbgHeaderType Type) {
   printHeader(P, Label);
 
-  AutoIndent Indent(P);
   if (File.isObj()) {
-    P.formatLine("Dumping Section Headers is not supported for object files");
+    printStreamNotValidForObj();
+    return;
+  }
+
+  if (!getPdb().hasPDBDbiStream()) {
+    printStreamNotPresent("DBI");
     return;
   }
 
+  AutoIndent Indent(P);
   ExitOnError Err("Error dumping section headers: ");
   std::unique_ptr<MappedBlockStream> Stream;
   ArrayRef<object::coff_section> Headers;
@@ -1606,20 +1805,19 @@ void DumpOutputStyle::dumpSectionHeaders(StringRef Label, DbgHeaderType Type) {
 Error DumpOutputStyle::dumpSectionContribs() {
   printHeader(P, "Section Contributions");
 
-  AutoIndent Indent(P);
   if (File.isObj()) {
-    P.formatLine(
-        "Dumping section contributions is not supported for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
-  ExitOnError Err("Error dumping section contributions: ");
   if (!getPdb().hasPDBDbiStream()) {
-    P.formatLine(
-        "Section contribs require a DBI Stream, which could not be loaded");
+    printStreamNotPresent("DBI");
     return Error::success();
   }
 
+  AutoIndent Indent(P);
+  ExitOnError Err("Error dumping section contributions: ");
+
   auto &Dbi = Err(getPdb().getPDBDbiStream());
 
   class Visitor : public ISectionContribVisitor {
@@ -1651,21 +1849,20 @@ Error DumpOutputStyle::dumpSectionContribs() {
 
 Error DumpOutputStyle::dumpSectionMap() {
   printHeader(P, "Section Map");
-  AutoIndent Indent(P);
 
   if (File.isObj()) {
-    P.formatLine("Dumping section map is not supported for object files");
+    printStreamNotValidForObj();
     return Error::success();
   }
 
-  ExitOnError Err("Error dumping section map: ");
-
   if (!getPdb().hasPDBDbiStream()) {
-    P.formatLine("Dumping the section map requires a DBI Stream, which could "
-                 "not be loaded");
+    printStreamNotPresent("DBI");
     return Error::success();
   }
 
+  AutoIndent Indent(P);
+  ExitOnError Err("Error dumping section map: ");
+
   auto &Dbi = Err(getPdb().getPDBDbiStream());
 
   uint32_t I = 0;
diff --git a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
index e7e9252f2fa9..9b3a85587bde 100644
--- a/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/contrib/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -70,6 +70,9 @@ private:
   PDBFile &getPdb();
   object::COFFObjectFile &getObj();
 
+  void printStreamNotValidForObj();
+  void printStreamNotPresent(StringRef StreamName);
+
   Error dumpFileSummary();
   Error dumpStreamSummary();
   Error dumpSymbolStats();
@@ -82,6 +85,9 @@ private:
   Error dumpInlineeLines();
   Error dumpXmi();
   Error dumpXme();
+  Error dumpFpo();
+  Error dumpOldFpo(PDBFile &File);
+  Error dumpNewFpo(PDBFile &File);
   Error dumpTpiStream(uint32_t StreamIdx);
   Error dumpTypesFromObjectFile();
   Error dumpModules();
diff --git a/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp b/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp
index 7b5af7e96920..8eb116cf0d80 100644
--- a/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/InputFile.cpp
@@ -41,6 +41,10 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
 
   auto &Dbi = Err(File.getPDBDbiStream());
   const auto &Modules = Dbi.modules();
+  if (Index >= Modules.getModuleCount())
+    return make_error<RawError>(raw_error_code::index_out_of_bounds,
+                                "Invalid module index");
+
   auto Modi = Modules.getModuleDescriptor(Index);
 
   ModuleName = Modi.getModuleName();
@@ -112,10 +116,6 @@ static std::string formatChecksumKind(FileChecksumKind Kind) {
   return formatUnknownEnum(Kind);
 }
 
-static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) {
-  return cantFail(File.getStringTable()).getStringTable();
-}
-
 template <typename... Args>
 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
   if (Append)
@@ -164,8 +164,13 @@ void SymbolGroup::initializeForPdb(uint32_t Modi) {
 
   // PDB always uses the same string table, but each module has its own
   // checksums.  So we only set the strings if they're not already set.
-  if (!SC.hasStrings())
-    SC.setStrings(extractStringTable(File->pdb()));
+  if (!SC.hasStrings()) {
+    auto StringTable = File->pdb().getStringTable();
+    if (StringTable)
+      SC.setStrings(StringTable->getStringTable());
+    else
+      consumeError(StringTable.takeError());
+  }
 
   SC.resetChecksums();
   auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
diff --git a/contrib/llvm/tools/llvm-pdbutil/InputFile.h b/contrib/llvm/tools/llvm-pdbutil/InputFile.h
index 552f3a3b2127..ee4e651c1e99 100644
--- a/contrib/llvm/tools/llvm-pdbutil/InputFile.h
+++ b/contrib/llvm/tools/llvm-pdbutil/InputFile.h
@@ -110,6 +110,8 @@ public:
   const InputFile &getFile() const { return *File; }
   InputFile &getFile() { return *File; }
 
+  bool hasDebugStream() const { return DebugStream != nullptr; }
+
 private:
   void initializeForPdb(uint32_t Modi);
   void updatePdbModi(uint32_t Modi);
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index f4e38a32a511..2c7b213b0a9f 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -296,6 +296,14 @@ static std::string formatRegisterId(RegisterId Id) {
   return formatUnknownEnum(Id);
 }
 
+static std::string formatRegisterId(uint16_t Reg16) {
+  return formatRegisterId(RegisterId(Reg16));
+}
+
+static std::string formatRegisterId(ulittle16_t &Reg16) {
+  return formatRegisterId(uint16_t(Reg16));
+}
+
 static std::string formatRange(LocalVariableAddrRange Range) {
   return formatv("[{0},+{1})",
                  formatSegmentOffset(Range.ISectStart, Range.OffsetStart),
@@ -482,6 +490,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   SourceLanguage Lang = static_cast<SourceLanguage>(
       Compile2.Flags & CompileSym2Flags::SourceLanguageMask);
+  CompilationCPU = Compile2.Machine;
   P.formatLine("machine = {0}, ver = {1}, language = {2}",
                formatMachineType(Compile2.Machine), Compile2.Version,
                formatSourceLanguage(Lang));
@@ -502,6 +511,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   SourceLanguage Lang = static_cast<SourceLanguage>(
       Compile3.Flags & CompileSym3Flags::SourceLanguageMask);
+  CompilationCPU = Compile3.Machine;
   P.formatLine("machine = {0}, Ver = {1}, language = {2}",
                formatMachineType(Compile3.Machine), Compile3.Version,
                formatSourceLanguage(Lang));
@@ -550,10 +560,11 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
 Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
                                             DefRangeRegisterRelSym &Def) {
   AutoIndent Indent(P, 7);
-  P.formatLine("register = {0}, base ptr = {1}, offset in parent = {2}, has "
+  P.formatLine("register = {0}, offset = {1}, offset in parent = {2}, has "
                "spilled udt = {3}",
-               uint16_t(Def.Hdr.Register), int32_t(Def.Hdr.BasePointerOffset),
-               Def.offsetInParent(), Def.hasSpilledUDTMember());
+               formatRegisterId(Def.Hdr.Register),
+               int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(),
+               Def.hasSpilledUDTMember());
   P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
                formatGaps(P.getIndentLevel() + 9, Def.Gaps));
   return Error::success();
@@ -564,8 +575,8 @@ Error MinimalSymbolDumper::visitKnownRecord(
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, may have no name = {1}, range start = "
                "{2}, length = {3}",
-               uint16_t(DefRangeRegister.Hdr.Register),
-               uint16_t(DefRangeRegister.Hdr.MayHaveNoName),
+               formatRegisterId(DefRangeRegister.Hdr.Register),
+               bool(DefRangeRegister.Hdr.MayHaveNoName),
                formatSegmentOffset(DefRangeRegister.Range.ISectStart,
                                    DefRangeRegister.Range.OffsetStart),
                DefRangeRegister.Range.Range);
@@ -579,7 +590,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   bool NoName = !!(Def.Hdr.MayHaveNoName == 0);
   P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
-               uint16_t(Def.Hdr.Register), NoName,
+               formatRegisterId(Def.Hdr.Register), NoName,
                uint32_t(Def.Hdr.OffsetInParent));
   P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
                formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -606,8 +617,8 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
 Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
   AutoIndent Indent(P, 7);
   P.formatLine("code offset = {0}, Register = {1}, kind = {2}, flags = {3}",
-               FC.CodeOffset, FC.Register, formatCookieKind(FC.CookieKind),
-               FC.Flags);
+               FC.CodeOffset, formatRegisterId(FC.Register),
+               formatCookieKind(FC.CookieKind), FC.Flags);
   return Error::success();
 }
 
@@ -620,6 +631,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
                FP.BytesOfCalleeSavedRegisters,
                formatSegmentOffset(FP.SectionIdOfExceptionHandler,
                                    FP.OffsetOfExceptionHandler));
+  P.formatLine("local fp reg = {0}, param fp reg = {1}",
+               formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU)),
+               formatRegisterId(FP.getParamFramePtrReg(CompilationCPU)));
   P.formatLine("flags = {0}",
                formatFrameProcedureOptions(P.getIndentLevel() + 9, FP.Flags));
   return Error::success();
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h
index 1c26a85a4eaf..033e193cee6c 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h
@@ -53,6 +53,11 @@ private:
   std::string idIndex(codeview::TypeIndex TI) const;
 
   LinePrinter &P;
+
+  /// Dumping certain records requires knowing what machine this is. The
+  /// S_COMPILE3 record will tell us, but if we don't see one, default to X64.
+  codeview::CPUType CompilationCPU = codeview::CPUType::X64;
+
   bool RecordBytes;
   const SymbolGroup *SymGroup = nullptr;
   codeview::LazyRandomTypeCollection &Ids;
@@ -61,4 +66,4 @@ private:
 } // namespace pdb
 } // namespace llvm
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 569bca7490fa..3f10e8ab8a1e 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -12,6 +12,7 @@
 #include "FormatUtil.h"
 #include "LinePrinter.h"
 
+#include "llvm-pdbutil.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -19,6 +20,7 @@
 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -27,15 +29,37 @@ using namespace llvm::codeview;
 using namespace llvm::pdb;
 
 static std::string formatClassOptions(uint32_t IndentLevel,
-                                      ClassOptions Options) {
+                                      ClassOptions Options, TpiStream *Stream,
+                                      TypeIndex CurrentTypeIndex) {
   std::vector<std::string> Opts;
+
+  if (Stream && Stream->supportsTypeLookup() &&
+      !opts::dump::DontResolveForwardRefs &&
+      ((Options & ClassOptions::ForwardReference) != ClassOptions::None)) {
+    // If we're able to resolve forward references, do that.
+    Expected<TypeIndex> ETI =
+        Stream->findFullDeclForForwardRef(CurrentTypeIndex);
+    if (!ETI) {
+      consumeError(ETI.takeError());
+      PUSH_FLAG(ClassOptions, ForwardReference, Options, "forward ref (??\?)");
+    } else {
+      const char *Direction = (*ETI == CurrentTypeIndex)
+                                  ? "="
+                                  : ((*ETI < CurrentTypeIndex) ? "<-" : "->");
+      std::string Formatted =
+          formatv("forward ref ({0} {1})", Direction, *ETI).str();
+      PUSH_FLAG(ClassOptions, ForwardReference, Options, std::move(Formatted));
+    }
+  } else {
+    PUSH_FLAG(ClassOptions, ForwardReference, Options, "forward ref");
+  }
+
   PUSH_FLAG(ClassOptions, HasConstructorOrDestructor, Options,
             "has ctor / dtor");
   PUSH_FLAG(ClassOptions, ContainsNestedClass, Options,
             "contains nested class");
   PUSH_FLAG(ClassOptions, HasConversionOperator, Options,
             "conversion operator");
-  PUSH_FLAG(ClassOptions, ForwardReference, Options, "forward ref");
   PUSH_FLAG(ClassOptions, HasUniqueName, Options, "has unique name");
   PUSH_FLAG(ClassOptions, Intrinsic, Options, "intrin");
   PUSH_FLAG(ClassOptions, Nested, Options, "is nested");
@@ -194,6 +218,7 @@ static std::string formatFunctionOptions(FunctionOptions Options) {
 }
 
 Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
+  CurrentTypeIndex = Index;
   // formatLine puts the newline at the beginning, so we use formatLine here
   // to start a new line, and then individual visit methods use format to
   // append to the existing line.
@@ -304,7 +329,8 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR,
   P.formatLine("vtable: {0}, base list: {1}, field list: {2}",
                Class.VTableShape, Class.DerivationList, Class.FieldList);
   P.formatLine("options: {0}, sizeof {1}",
-               formatClassOptions(P.getIndentLevel(), Class.Options),
+               formatClassOptions(P.getIndentLevel(), Class.Options, Stream,
+                                  CurrentTypeIndex),
                Class.Size);
   return Error::success();
 }
@@ -316,7 +342,8 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR,
     P.formatLine("unique name: `{0}`", Union.UniqueName);
   P.formatLine("field list: {0}", Union.FieldList);
   P.formatLine("options: {0}, sizeof {1}",
-               formatClassOptions(P.getIndentLevel(), Union.Options),
+               formatClassOptions(P.getIndentLevel(), Union.Options, Stream,
+                                  CurrentTypeIndex),
                Union.Size);
   return Error::success();
 }
@@ -328,7 +355,8 @@ Error MinimalTypeDumpVisitor::visitKnownRecord(CVType &CVR, EnumRecord &Enum) {
   P.formatLine("field list: {0}, underlying type: {1}", Enum.FieldList,
                Enum.UnderlyingType);
   P.formatLine("options: {0}",
-               formatClassOptions(P.getIndentLevel(), Enum.Options));
+               formatClassOptions(P.getIndentLevel(), Enum.Options, Stream,
+                                  CurrentTypeIndex));
   return Error::success();
 }
 
diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h
index 4227688f0f71..8f6bdc6110ae 100644
--- a/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/MinimalTypeDumper.h
@@ -20,15 +20,18 @@ class LazyRandomTypeCollection;
 
 namespace pdb {
 class LinePrinter;
+class TpiStream;
 
 class MinimalTypeDumpVisitor : public codeview::TypeVisitorCallbacks {
 public:
   MinimalTypeDumpVisitor(LinePrinter &P, uint32_t Width, bool RecordBytes,
                          bool Hashes, codeview::LazyRandomTypeCollection &Types,
                          uint32_t NumHashBuckets,
-                         FixedStreamArray<support::ulittle32_t> HashValues)
+                         FixedStreamArray<support::ulittle32_t> HashValues,
+                         pdb::TpiStream *Stream)
       : P(P), Width(Width), RecordBytes(RecordBytes), Hashes(Hashes),
-        Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues) {}
+        Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues),
+        Stream(Stream) {}
 
   Error visitTypeBegin(codeview::CVType &Record,
                        codeview::TypeIndex Index) override;
@@ -55,7 +58,9 @@ private:
   bool Hashes = false;
   codeview::LazyRandomTypeCollection &Types;
   uint32_t NumHashBuckets;
+  codeview::TypeIndex CurrentTypeIndex;
   FixedStreamArray<support::ulittle32_t> HashValues;
+  pdb::TpiStream *Stream = nullptr;
 };
 } // namespace pdb
 } // namespace llvm
diff --git a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp
index eb39708a27e9..3ea333608314 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.cpp
@@ -110,6 +110,7 @@ void MappingTraits<PdbObject>::mapping(IO &IO, PdbObject &Obj) {
   IO.mapOptional("DbiStream", Obj.DbiStream);
   IO.mapOptional("TpiStream", Obj.TpiStream);
   IO.mapOptional("IpiStream", Obj.IpiStream);
+  IO.mapOptional("PublicsStream", Obj.PublicsStream);
 }
 
 void MappingTraits<MSFHeaders>::mapping(IO &IO, MSFHeaders &Obj) {
@@ -163,6 +164,11 @@ void MappingTraits<PdbTpiStream>::mapping(IO &IO,
   IO.mapRequired("Records", Obj.Records);
 }
 
+void MappingTraits<PdbPublicsStream>::mapping(
+    IO &IO, pdb::yaml::PdbPublicsStream &Obj) {
+  IO.mapRequired("Records", Obj.PubSyms);
+}
+
 void MappingTraits<NamedStreamMapping>::mapping(IO &IO,
                                                 NamedStreamMapping &Obj) {
   IO.mapRequired("Name", Obj.StreamName);
diff --git a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h
index 91e054490a5f..97ba87266cc6 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h
+++ b/contrib/llvm/tools/llvm-pdbutil/PdbYaml.h
@@ -92,6 +92,10 @@ struct PdbTpiStream {
   std::vector<CodeViewYAML::LeafRecord> Records;
 };
 
+struct PdbPublicsStream {
+  std::vector<CodeViewYAML::SymbolRecord> PubSyms;
+};
+
 struct PdbObject {
   explicit PdbObject(BumpPtrAllocator &Allocator) : Allocator(Allocator) {}
 
@@ -102,6 +106,7 @@ struct PdbObject {
   Optional<PdbDbiStream> DbiStream;
   Optional<PdbTpiStream> TpiStream;
   Optional<PdbTpiStream> IpiStream;
+  Optional<PdbPublicsStream> PublicsStream;
 
   Optional<std::vector<StringRef>> StringTable;
 
@@ -118,6 +123,7 @@ LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::StreamBlockList)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbInfoStream)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbDbiStream)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbTpiStream)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbPublicsStream)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::NamedStreamMapping)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbModiStream)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(pdb::yaml::PdbDbiModuleInfo)
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
index 651cb8b7649e..f009f53a3932 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
@@ -51,6 +51,13 @@ void ClassDefinitionDumper::prettyPrintClassIntro(const ClassLayout &Layout) {
   uint32_t Size = Layout.getSize();
   const PDBSymbolTypeUDT &Class = Layout.getClass();
 
+  if (Layout.getClass().isConstType())
+    WithColor(Printer, PDB_ColorItem::Keyword).get() << "const ";
+  if (Layout.getClass().isVolatileType())
+    WithColor(Printer, PDB_ColorItem::Keyword).get() << "volatile ";
+  if (Layout.getClass().isUnalignedType())
+    WithColor(Printer, PDB_ColorItem::Keyword).get() << "unaligned ";
+
   WithColor(Printer, PDB_ColorItem::Keyword).get() << Class.getUdtKind() << " ";
   WithColor(Printer, PDB_ColorItem::Type).get() << Class.getName();
   WithColor(Printer, PDB_ColorItem::Comment).get() << " [sizeof = " << Size
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index 0d99c9b1245c..94a0b2d5e780 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -28,6 +28,7 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -216,3 +217,13 @@ void CompilandDumper::dump(const PDBSymbolUnknown &Symbol) {
   Printer.NewLine();
   Printer << "unknown (" << Symbol.getSymTag() << ")";
 }
+
+void CompilandDumper::dump(const PDBSymbolUsingNamespace &Symbol) {
+  if (Printer.IsSymbolExcluded(Symbol.getName()))
+    return;
+
+  Printer.NewLine();
+  Printer << "using namespace ";
+  std::string Name = Symbol.getName();
+  WithColor(Printer, PDB_ColorItem::Identifier).get() << Name;
+}
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h
index cae196e9d134..1a840e49607c 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.h
@@ -34,6 +34,7 @@ public:
   void dump(const PDBSymbolThunk &Symbol) override;
   void dump(const PDBSymbolTypeTypedef &Symbol) override;
   void dump(const PDBSymbolUnknown &Symbol) override;
+  void dump(const PDBSymbolUsingNamespace &Symbol) override;
 
 private:
   LinePrinter &Printer;
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
index bf22e75e3949..f4cbd3f8fa14 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
@@ -23,6 +23,18 @@ using namespace llvm::pdb;
 EnumDumper::EnumDumper(LinePrinter &P) : PDBSymDumper(true), Printer(P) {}
 
 void EnumDumper::start(const PDBSymbolTypeEnum &Symbol) {
+  if (Symbol.getUnmodifiedTypeId() != 0) {
+    if (Symbol.isConstType())
+      WithColor(Printer, PDB_ColorItem::Keyword).get() << "const ";
+    if (Symbol.isVolatileType())
+      WithColor(Printer, PDB_ColorItem::Keyword).get() << "volatile ";
+    if (Symbol.isUnalignedType())
+      WithColor(Printer, PDB_ColorItem::Keyword).get() << "unaligned ";
+    WithColor(Printer, PDB_ColorItem::Keyword).get() << "enum ";
+    WithColor(Printer, PDB_ColorItem::Type).get() << Symbol.getName();
+    return;
+  }
+
   WithColor(Printer, PDB_ColorItem::Keyword).get() << "enum ";
   WithColor(Printer, PDB_ColorItem::Type).get() << Symbol.getName();
   if (!opts::pretty::NoEnumDefs) {
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index 177d8a009a2b..836ede41054e 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -53,7 +53,10 @@ FunctionDumper::FunctionDumper(LinePrinter &P)
 void FunctionDumper::start(const PDBSymbolTypeFunctionSig &Symbol,
                            const char *Name, PointerType Pointer) {
   auto ReturnType = Symbol.getReturnType();
-  ReturnType->dump(*this);
+  if (!ReturnType)
+    Printer << "<unknown-type>";
+  else
+    ReturnType->dump(*this);
   Printer << " ";
   uint32_t ClassParentId = Symbol.getClassParentId();
   auto ClassParent =
@@ -225,9 +228,10 @@ void FunctionDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) {
   // through to the real thing and dump it.
   uint32_t TypeId = Symbol.getTypeId();
   auto Type = Symbol.getSession().getSymbolById(TypeId);
-  if (!Type)
-    return;
-  Type->dump(*this);
+  if (Type)
+    Printer << "<unknown-type>";
+  else
+    Type->dump(*this);
 }
 
 void FunctionDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
index 663a608fe429..daf3cd45b327 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -13,13 +13,17 @@
 #include "PrettyBuiltinDumper.h"
 #include "PrettyClassDefinitionDumper.h"
 #include "PrettyEnumDumper.h"
+#include "PrettyFunctionDumper.h"
 #include "PrettyTypedefDumper.h"
 #include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
 #include "llvm/DebugInfo/PDB/UDTLayout.h"
@@ -128,36 +132,85 @@ filterAndSortClassDefs(LinePrinter &Printer, Enumerator &E,
   }
 
   if (Comp)
-    llvm::sort(Filtered.begin(), Filtered.end(), Comp);
+    llvm::sort(Filtered, Comp);
   return Filtered;
 }
 
 TypeDumper::TypeDumper(LinePrinter &P) : PDBSymDumper(true), Printer(P) {}
 
-void TypeDumper::start(const PDBSymbolExe &Exe) {
-  if (opts::pretty::Enums) {
-    if (auto Enums = Exe.findAllChildren<PDBSymbolTypeEnum>()) {
+template <typename T>
+static bool isTypeExcluded(LinePrinter &Printer, const T &Symbol) {
+  return false;
+}
+
+static bool isTypeExcluded(LinePrinter &Printer,
+                           const PDBSymbolTypeEnum &Enum) {
+  if (Printer.IsTypeExcluded(Enum.getName(), Enum.getLength()))
+    return true;
+  // Dump member enums when dumping their class definition.
+  if (nullptr != Enum.getClassParent())
+    return true;
+  return false;
+}
+
+static bool isTypeExcluded(LinePrinter &Printer,
+                           const PDBSymbolTypeTypedef &Typedef) {
+  return Printer.IsTypeExcluded(Typedef.getName(), Typedef.getLength());
+}
+
+template <typename SymbolT>
+static void dumpSymbolCategory(LinePrinter &Printer, const PDBSymbolExe &Exe,
+                               TypeDumper &TD, StringRef Label) {
+  if (auto Children = Exe.findAllChildren<SymbolT>()) {
+    Printer.NewLine();
+    WithColor(Printer, PDB_ColorItem::Identifier).get() << Label;
+    Printer << ": (" << Children->getChildCount() << " items)";
+    Printer.Indent();
+    while (auto Child = Children->getNext()) {
+      if (isTypeExcluded(Printer, *Child))
+        continue;
+
       Printer.NewLine();
-      WithColor(Printer, PDB_ColorItem::Identifier).get() << "Enums";
-      Printer << ": (" << Enums->getChildCount() << " items)";
-      Printer.Indent();
-      while (auto Enum = Enums->getNext())
-        Enum->dump(*this);
-      Printer.Unindent();
+      Child->dump(TD);
     }
+    Printer.Unindent();
   }
+}
 
-  if (opts::pretty::Typedefs) {
-    if (auto Typedefs = Exe.findAllChildren<PDBSymbolTypeTypedef>()) {
-      Printer.NewLine();
-      WithColor(Printer, PDB_ColorItem::Identifier).get() << "Typedefs";
-      Printer << ": (" << Typedefs->getChildCount() << " items)";
-      Printer.Indent();
-      while (auto Typedef = Typedefs->getNext())
-        Typedef->dump(*this);
-      Printer.Unindent();
-    }
+static void printClassDecl(LinePrinter &Printer,
+                           const PDBSymbolTypeUDT &Class) {
+  if (Class.getUnmodifiedTypeId() != 0) {
+    if (Class.isConstType())
+      WithColor(Printer, PDB_ColorItem::Keyword).get() << "const ";
+    if (Class.isVolatileType())
+      WithColor(Printer, PDB_ColorItem::Keyword).get() << "volatile ";
+    if (Class.isUnalignedType())
+      WithColor(Printer, PDB_ColorItem::Keyword).get() << "unaligned ";
   }
+  WithColor(Printer, PDB_ColorItem::Keyword).get() << Class.getUdtKind() << " ";
+  WithColor(Printer, PDB_ColorItem::Type).get() << Class.getName();
+}
+
+void TypeDumper::start(const PDBSymbolExe &Exe) {
+  if (opts::pretty::Enums)
+    dumpSymbolCategory<PDBSymbolTypeEnum>(Printer, Exe, *this, "Enums");
+
+  if (opts::pretty::Funcsigs)
+    dumpSymbolCategory<PDBSymbolTypeFunctionSig>(Printer, Exe, *this,
+                                                 "Function Signatures");
+
+  if (opts::pretty::Typedefs)
+    dumpSymbolCategory<PDBSymbolTypeTypedef>(Printer, Exe, *this, "Typedefs");
+
+  if (opts::pretty::Arrays)
+    dumpSymbolCategory<PDBSymbolTypeArray>(Printer, Exe, *this, "Arrays");
+
+  if (opts::pretty::Pointers)
+    dumpSymbolCategory<PDBSymbolTypePointer>(Printer, Exe, *this, "Pointers");
+
+  if (opts::pretty::VTShapes)
+    dumpSymbolCategory<PDBSymbolTypeVTableShape>(Printer, Exe, *this,
+                                                 "VFTable Shapes");
 
   if (opts::pretty::Classes) {
     if (auto Classes = Exe.findAllChildren<PDBSymbolTypeUDT>()) {
@@ -196,11 +249,16 @@ void TypeDumper::start(const PDBSymbolExe &Exe) {
           dumpClassLayout(*Class);
       } else {
         while (auto Class = Classes->getNext()) {
-          if (Class->getUnmodifiedTypeId() != 0)
+          if (Printer.IsTypeExcluded(Class->getName(), Class->getLength()))
             continue;
 
-          if (Printer.IsTypeExcluded(Class->getName(), Class->getLength()))
+          // No point duplicating a full class layout.  Just print the modified
+          // declaration and continue.
+          if (Class->getUnmodifiedTypeId() != 0) {
+            Printer.NewLine();
+            printClassDecl(Printer, *Class);
             continue;
+          }
 
           auto Layout = llvm::make_unique<ClassLayout>(std::move(Class));
           if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold)
@@ -218,35 +276,83 @@ void TypeDumper::start(const PDBSymbolExe &Exe) {
 void TypeDumper::dump(const PDBSymbolTypeEnum &Symbol) {
   assert(opts::pretty::Enums);
 
-  if (Printer.IsTypeExcluded(Symbol.getName(), Symbol.getLength()))
-    return;
-  // Dump member enums when dumping their class definition.
-  if (nullptr != Symbol.getClassParent())
-    return;
-
-  Printer.NewLine();
   EnumDumper Dumper(Printer);
   Dumper.start(Symbol);
 }
 
+void TypeDumper::dump(const PDBSymbolTypeBuiltin &Symbol) {
+  BuiltinDumper BD(Printer);
+  BD.start(Symbol);
+}
+
+void TypeDumper::dump(const PDBSymbolTypeUDT &Symbol) {
+  printClassDecl(Printer, Symbol);
+}
+
 void TypeDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
   assert(opts::pretty::Typedefs);
 
-  if (Printer.IsTypeExcluded(Symbol.getName(), Symbol.getLength()))
-    return;
-
-  Printer.NewLine();
   TypedefDumper Dumper(Printer);
   Dumper.start(Symbol);
 }
 
+void TypeDumper::dump(const PDBSymbolTypeArray &Symbol) {
+  auto ElementType = Symbol.getElementType();
+
+  ElementType->dump(*this);
+  Printer << "[";
+  WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Symbol.getCount();
+  Printer << "]";
+}
+
+void TypeDumper::dump(const PDBSymbolTypeFunctionSig &Symbol) {
+  FunctionDumper Dumper(Printer);
+  Dumper.start(Symbol, nullptr, FunctionDumper::PointerType::None);
+}
+
+void TypeDumper::dump(const PDBSymbolTypePointer &Symbol) {
+  std::unique_ptr<PDBSymbol> P = Symbol.getPointeeType();
+
+  if (auto *FS = dyn_cast<PDBSymbolTypeFunctionSig>(P.get())) {
+    FunctionDumper Dumper(Printer);
+    FunctionDumper::PointerType PT =
+        Symbol.isReference() ? FunctionDumper::PointerType::Reference
+                             : FunctionDumper::PointerType::Pointer;
+    Dumper.start(*FS, nullptr, PT);
+    return;
+  }
+
+  if (auto *UDT = dyn_cast<PDBSymbolTypeUDT>(P.get())) {
+    printClassDecl(Printer, *UDT);
+  } else if (P) {
+    P->dump(*this);
+  }
+
+  if (auto Parent = Symbol.getClassParent()) {
+    auto UDT = llvm::unique_dyn_cast<PDBSymbolTypeUDT>(std::move(Parent));
+    if (UDT)
+      Printer << " " << UDT->getName() << "::";
+  }
+
+  if (Symbol.isReference())
+    Printer << "&";
+  else if (Symbol.isRValueReference())
+    Printer << "&&";
+  else
+    Printer << "*";
+}
+
+void TypeDumper::dump(const PDBSymbolTypeVTableShape &Symbol) {
+  Printer.format("<vtshape ({0} methods)>", Symbol.getCount());
+}
+
 void TypeDumper::dumpClassLayout(const ClassLayout &Class) {
   assert(opts::pretty::Classes);
 
   if (opts::pretty::ClassFormat == opts::pretty::ClassDefinitionFormat::None) {
-    Printer.NewLine();
-    WithColor(Printer, PDB_ColorItem::Keyword).get() << "class ";
-    WithColor(Printer, PDB_ColorItem::Identifier).get() << Class.getName();
+    WithColor(Printer, PDB_ColorItem::Keyword).get()
+        << Class.getClass().getUdtKind() << " ";
+    WithColor(Printer, PDB_ColorItem::Type).get() << Class.getName();
   } else {
     ClassDefinitionDumper Dumper(Printer);
     Dumper.start(Class);
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h
index 68a2f0246eba..36e586fea7e3 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyTypeDumper.h
@@ -25,6 +25,12 @@ public:
 
   void dump(const PDBSymbolTypeEnum &Symbol) override;
   void dump(const PDBSymbolTypeTypedef &Symbol) override;
+  void dump(const PDBSymbolTypeFunctionSig &Symbol) override;
+  void dump(const PDBSymbolTypeArray &Symbol) override;
+  void dump(const PDBSymbolTypeBuiltin &Symbol) override;
+  void dump(const PDBSymbolTypePointer &Symbol) override;
+  void dump(const PDBSymbolTypeVTableShape &Symbol) override;
+  void dump(const PDBSymbolTypeUDT &Symbol) override;
 
   void dumpClassLayout(const ClassLayout &Class);
 
diff --git a/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
index 65443d6bca90..2b3f3691ed98 100644
--- a/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
@@ -12,6 +12,7 @@
 #include "LinePrinter.h"
 #include "PrettyBuiltinDumper.h"
 #include "PrettyFunctionDumper.h"
+#include "PrettyTypeDumper.h"
 
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
@@ -35,7 +36,10 @@ void TypedefDumper::start(const PDBSymbolTypeTypedef &Symbol) {
                                                       << Symbol.getName();
 }
 
-void TypedefDumper::dump(const PDBSymbolTypeArray &Symbol) {}
+void TypedefDumper::dump(const PDBSymbolTypeArray &Symbol) {
+  TypeDumper Dumper(Printer);
+  Dumper.dump(Symbol);
+}
 
 void TypedefDumper::dump(const PDBSymbolTypeBuiltin &Symbol) {
   BuiltinDumper Dumper(Printer);
diff --git a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp
index a7afbf1242c5..62b5c428d410 100644
--- a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.cpp
@@ -18,10 +18,13 @@
 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 
 using namespace llvm;
@@ -68,6 +71,9 @@ Error YAMLOutputStyle::dump() {
   if (auto EC = dumpIpiStream())
     return EC;
 
+  if (auto EC = dumpPublics())
+    return EC;
+
   flush();
   return Error::success();
 }
@@ -191,6 +197,9 @@ Error YAMLOutputStyle::dumpDbiStream() {
   if (!opts::pdb2yaml::DbiStream)
     return Error::success();
 
+  if (!File.hasPDBDbiStream())
+    return Error::success();
+
   auto DbiS = File.getPDBDbiStream();
   if (!DbiS)
     return DbiS.takeError();
@@ -323,6 +332,42 @@ Error YAMLOutputStyle::dumpIpiStream() {
   return Error::success();
 }
 
+Error YAMLOutputStyle::dumpPublics() {
+  if (!opts::pdb2yaml::PublicsStream)
+    return Error::success();
+
+  Obj.PublicsStream.emplace();
+  auto ExpectedPublics = File.getPDBPublicsStream();
+  if (!ExpectedPublics) {
+    llvm::consumeError(ExpectedPublics.takeError());
+    return Error::success();
+  }
+
+  PublicsStream &Publics = *ExpectedPublics;
+  const GSIHashTable &PublicsTable = Publics.getPublicsTable();
+
+  auto ExpectedSyms = File.getPDBSymbolStream();
+  if (!ExpectedSyms) {
+    llvm::consumeError(ExpectedSyms.takeError());
+    return Error::success();
+  }
+
+  BinaryStreamRef SymStream =
+      ExpectedSyms->getSymbolArray().getUnderlyingStream();
+  for (uint32_t PubSymOff : PublicsTable) {
+    Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, PubSymOff);
+    if (!Sym)
+      return Sym.takeError();
+    auto ES = CodeViewYAML::SymbolRecord::fromCodeViewSymbol(*Sym);
+    if (!ES)
+      return ES.takeError();
+
+    Obj.PublicsStream->PubSyms.push_back(*ES);
+  }
+
+  return Error::success();
+}
+
 void YAMLOutputStyle::flush() {
   Out << Obj;
   outs().flush();
diff --git a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h
index 3690e3529d4a..a5ad3355d2ab 100644
--- a/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h
+++ b/contrib/llvm/tools/llvm-pdbutil/YAMLOutputStyle.h
@@ -35,6 +35,7 @@ private:
   Error dumpDbiStream();
   Error dumpTpiStream();
   Error dumpIpiStream();
+  Error dumpPublics();
 
   void flush();
 
diff --git a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 5b0d21f83db7..76f61a2a95a7 100644
--- a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm-pdbutil.h"
 
-#include "Analyze.h"
 #include "BytesOutputStyle.h"
 #include "DumpOutputStyle.h"
 #include "ExplainOutputStyle.h"
@@ -46,7 +45,6 @@
 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
@@ -71,6 +69,8 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
 #include "llvm/Support/BinaryByteStream.h"
@@ -102,6 +102,9 @@ namespace opts {
 cl::SubCommand DumpSubcommand("dump", "Dump MSF and CodeView debug info");
 cl::SubCommand BytesSubcommand("bytes", "Dump raw bytes from the PDB file");
 
+cl::SubCommand DiaDumpSubcommand("diadump",
+                                 "Dump debug information using a DIA-like API");
+
 cl::SubCommand
     PrettySubcommand("pretty",
                      "Dump semantic information about types and symbols");
@@ -113,10 +116,6 @@ cl::SubCommand
     PdbToYamlSubcommand("pdb2yaml",
                         "Generate a detailed YAML description of a PDB File");
 
-cl::SubCommand
-    AnalyzeSubcommand("analyze",
-                      "Analyze various aspects of a PDB's structure");
-
 cl::SubCommand MergeSubcommand("merge",
                                "Merge multiple PDBs into a single PDB");
 
@@ -155,6 +154,48 @@ cl::ValuesClass ChunkValues = cl::values(
                "Any subsection not covered by another option"),
     clEnumValN(ModuleSubsection::All, "all", "All known subsections"));
 
+namespace diadump {
+cl::list<std::string> InputFilenames(cl::Positional,
+                                     cl::desc("<input PDB files>"),
+                                     cl::OneOrMore, cl::sub(DiaDumpSubcommand));
+
+cl::opt<bool> Native("native", cl::desc("Use native PDB reader instead of DIA"),
+                     cl::sub(DiaDumpSubcommand));
+
+static cl::opt<bool>
+    ShowClassHierarchy("hierarchy", cl::desc("Show lexical and class parents"),
+                       cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> NoSymIndexIds(
+    "no-ids",
+    cl::desc("Don't show any SymIndexId fields (overrides -hierarchy)"),
+    cl::sub(DiaDumpSubcommand));
+
+static cl::opt<bool>
+    Recurse("recurse",
+            cl::desc("When dumping a SymIndexId, dump the full details of the "
+                     "corresponding record"),
+            cl::sub(DiaDumpSubcommand));
+
+static cl::opt<bool> Enums("enums", cl::desc("Dump enum types"),
+                           cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Pointers("pointers", cl::desc("Dump enum types"),
+                              cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> UDTs("udts", cl::desc("Dump udt types"),
+                          cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Compilands("compilands",
+                                cl::desc("Dump compiland information"),
+                                cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Funcsigs("funcsigs",
+                              cl::desc("Dump function signature information"),
+                              cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Arrays("arrays", cl::desc("Dump array types"),
+                            cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> VTShapes("vtshapes", cl::desc("Dump virtual table shapes"),
+                              cl::sub(DiaDumpSubcommand));
+static cl::opt<bool> Typedefs("typedefs", cl::desc("Dump typedefs"),
+                              cl::sub(DiaDumpSubcommand));
+} // namespace diadump
+
 namespace pretty {
 cl::list<std::string> InputFilenames(cl::Positional,
                                      cl::desc("<input PDB files>"),
@@ -201,6 +242,15 @@ cl::opt<bool> Enums("enums", cl::desc("Display enum types"),
                     cl::cat(TypeCategory), cl::sub(PrettySubcommand));
 cl::opt<bool> Typedefs("typedefs", cl::desc("Display typedef types"),
                        cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> Funcsigs("funcsigs", cl::desc("Display function signatures"),
+                       cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> Pointers("pointers", cl::desc("Display pointer types"),
+                       cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> Arrays("arrays", cl::desc("Display arrays"),
+                     cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+cl::opt<bool> VTShapes("vtshapes", cl::desc("Display vftable shapes"),
+                       cl::cat(TypeCategory), cl::sub(PrettySubcommand));
+
 cl::opt<SymbolSortMode> SymbolOrder(
     "symbol-order", cl::desc("symbol sort order"),
     cl::init(SymbolSortMode::None),
@@ -432,6 +482,12 @@ cl::opt<bool> DumpTypeExtras("type-extras",
                              cl::desc("dump type hashes and index offsets"),
                              cl::cat(TypeOptions), cl::sub(DumpSubcommand));
 
+cl::opt<bool> DontResolveForwardRefs(
+    "dont-resolve-forward-refs",
+    cl::desc("When dumping type records for classes, unions, enums, and "
+             "structs, don't try to resolve forward references"),
+    cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+
 cl::list<uint32_t> DumpTypeIndex(
     "type-index", cl::ZeroOrMore, cl::CommaSeparated,
     cl::desc("only dump types with the specified hexadecimal type index"),
@@ -465,6 +521,11 @@ cl::opt<bool> DumpGlobals("globals", cl::desc("dump Globals symbol records"),
                           cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
 cl::opt<bool> DumpGlobalExtras("global-extras", cl::desc("dump Globals hashes"),
                                cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::list<std::string> DumpGlobalNames(
+    "global-name",
+    cl::desc(
+        "With -globals, only dump globals whose name matches the given value"),
+    cl::cat(SymbolOptions), cl::sub(DumpSubcommand), cl::ZeroOrMore);
 cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
                           cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
 cl::opt<bool> DumpPublicExtras("public-extras",
@@ -482,6 +543,9 @@ cl::opt<bool>
                        cl::desc("dump CodeView symbol record raw bytes"),
                        cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
 
+cl::opt<bool> DumpFpo("fpo", cl::desc("dump FPO records"),
+                      cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+
 // MODULE & FILE OPTIONS
 cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
                           cl::cat(FileOptions), cl::sub(DumpSubcommand));
@@ -594,6 +658,10 @@ cl::opt<bool> IpiStream("ipi-stream",
                         cl::desc("Dump the IPI Stream (Stream 5)"),
                         cl::sub(PdbToYamlSubcommand), cl::init(false));
 
+cl::opt<bool> PublicsStream("publics-stream",
+                            cl::desc("Dump the Publics Stream"),
+                            cl::sub(PdbToYamlSubcommand), cl::init(false));
+
 // MODULE & FILE OPTIONS
 cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
                           cl::cat(FileOptions), cl::sub(PdbToYamlSubcommand));
@@ -613,14 +681,6 @@ cl::list<std::string> InputFilename(cl::Positional,
                                     cl::sub(PdbToYamlSubcommand));
 } // namespace pdb2yaml
 
-namespace analyze {
-cl::opt<bool> StringTable("hash-collisions", cl::desc("Find hash collisions"),
-                          cl::sub(AnalyzeSubcommand), cl::init(false));
-cl::list<std::string> InputFilename(cl::Positional,
-                                    cl::desc("<input PDB file>"), cl::Required,
-                                    cl::sub(AnalyzeSubcommand));
-}
-
 namespace merge {
 cl::list<std::string> InputFilenames(cl::Positional,
                                      cl::desc("<input PDB files>"),
@@ -681,7 +741,7 @@ static void yamlToPdb(StringRef Path) {
                                    /*RequiresNullTerminator=*/false);
 
   if (ErrorOrBuffer.getError()) {
-    ExitOnErr(make_error<GenericError>(generic_error_code::invalid_path, Path));
+    ExitOnErr(createFileError(Path, errorCodeToError(ErrorOrBuffer.getError())));
   }
 
   std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get();
@@ -781,7 +841,8 @@ static void yamlToPdb(StringRef Path) {
 
   Builder.getStringTableBuilder().setStrings(*Strings.strings());
 
-  ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile));
+  codeview::GUID IgnoredOutGuid;
+  ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile, &IgnoredOutGuid));
 }
 
 static PDBFile &loadPDB(StringRef Path, std::unique_ptr<IPDBSession> &Session) {
@@ -817,14 +878,6 @@ static void dumpBytes(StringRef Path) {
   ExitOnErr(O->dump());
 }
 
-static void dumpAnalysis(StringRef Path) {
-  std::unique_ptr<IPDBSession> Session;
-  auto &File = loadPDB(Path, Session);
-  auto O = llvm::make_unique<AnalysisStyle>(File);
-
-  ExitOnErr(O->dump());
-}
-
 bool opts::pretty::shouldDumpSymLevel(SymLevel Search) {
   if (SymTypes.empty())
     return true;
@@ -924,6 +977,69 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
   }
 }
 
+template <typename OuterT, typename ChildT>
+void diaDumpChildren(PDBSymbol &Outer, PdbSymbolIdField Ids,
+                     PdbSymbolIdField Recurse) {
+  OuterT *ConcreteOuter = dyn_cast<OuterT>(&Outer);
+  if (!ConcreteOuter)
+    return;
+
+  auto Children = ConcreteOuter->template findAllChildren<ChildT>();
+  while (auto Child = Children->getNext()) {
+    outs() << "  {";
+    Child->defaultDump(outs(), 4, Ids, Recurse);
+    outs() << "\n  }\n";
+  }
+}
+
+static void dumpDia(StringRef Path) {
+  std::unique_ptr<IPDBSession> Session;
+
+  const auto ReaderType =
+      opts::diadump::Native ? PDB_ReaderType::Native : PDB_ReaderType::DIA;
+  ExitOnErr(loadDataForPDB(ReaderType, Path, Session));
+
+  auto GlobalScope = Session->getGlobalScope();
+
+  std::vector<PDB_SymType> SymTypes;
+
+  if (opts::diadump::Compilands)
+    SymTypes.push_back(PDB_SymType::Compiland);
+  if (opts::diadump::Enums)
+    SymTypes.push_back(PDB_SymType::Enum);
+  if (opts::diadump::Pointers)
+    SymTypes.push_back(PDB_SymType::PointerType);
+  if (opts::diadump::UDTs)
+    SymTypes.push_back(PDB_SymType::UDT);
+  if (opts::diadump::Funcsigs)
+    SymTypes.push_back(PDB_SymType::FunctionSig);
+  if (opts::diadump::Arrays)
+    SymTypes.push_back(PDB_SymType::ArrayType);
+  if (opts::diadump::VTShapes)
+    SymTypes.push_back(PDB_SymType::VTableShape);
+  if (opts::diadump::Typedefs)
+    SymTypes.push_back(PDB_SymType::Typedef);
+  PdbSymbolIdField Ids = opts::diadump::NoSymIndexIds ? PdbSymbolIdField::None
+                                                      : PdbSymbolIdField::All;
+
+  PdbSymbolIdField Recurse = PdbSymbolIdField::None;
+  if (opts::diadump::Recurse)
+    Recurse = PdbSymbolIdField::All;
+  if (!opts::diadump::ShowClassHierarchy)
+    Ids &= ~(PdbSymbolIdField::ClassParent | PdbSymbolIdField::LexicalParent);
+
+  for (PDB_SymType ST : SymTypes) {
+    auto Children = GlobalScope->findAllChildren(ST);
+    while (auto Child = Children->getNext()) {
+      outs() << "{";
+      Child->defaultDump(outs(), 2, Ids, Recurse);
+
+      diaDumpChildren<PDBSymbolTypeEnum, PDBSymbolData>(*Child, Ids, Recurse);
+      outs() << "\n}\n";
+    }
+  }
+}
+
 static void dumpPretty(StringRef Path) {
   std::unique_ptr<IPDBSession> Session;
 
@@ -1055,7 +1171,9 @@ static void dumpPretty(StringRef Path) {
     Printer.NewLine();
     WithColor(Printer, PDB_ColorItem::SectionHeader).get()
         << "---COMPILANDS---";
-    if (auto Compilands = GlobalScope->findAllChildren<PDBSymbolCompiland>()) {
+    auto Compilands = GlobalScope->findAllChildren<PDBSymbolCompiland>();
+
+    if (Compilands) {
       Printer.Indent();
       CompilandDumper Dumper(Printer);
       CompilandDumpFlags options = CompilandDumper::Flags::None;
@@ -1067,7 +1185,9 @@ static void dumpPretty(StringRef Path) {
     }
   }
 
-  if (opts::pretty::Classes || opts::pretty::Enums || opts::pretty::Typedefs) {
+  if (opts::pretty::Classes || opts::pretty::Enums || opts::pretty::Typedefs ||
+      opts::pretty::Funcsigs || opts::pretty::Pointers ||
+      opts::pretty::Arrays || opts::pretty::VTShapes) {
     Printer.NewLine();
     WithColor(Printer, PDB_ColorItem::SectionHeader).get() << "---TYPES---";
     Printer.Indent();
@@ -1104,8 +1224,7 @@ static void dumpPretty(StringRef Path) {
           std::vector<std::unique_ptr<PDBSymbolFunc>> Funcs;
           while (auto Func = Functions->getNext())
             Funcs.push_back(std::move(Func));
-          llvm::sort(Funcs.begin(), Funcs.end(),
-                     opts::pretty::compareFunctionSymbols);
+          llvm::sort(Funcs, opts::pretty::compareFunctionSymbols);
           for (const auto &Func : Funcs) {
             Printer.NewLine();
             Dumper.start(*Func, FunctionDumper::PointerType::None);
@@ -1123,8 +1242,7 @@ static void dumpPretty(StringRef Path) {
           std::vector<std::unique_ptr<PDBSymbolData>> Datas;
           while (auto Var = Vars->getNext())
             Datas.push_back(std::move(Var));
-          llvm::sort(Datas.begin(), Datas.end(),
-                     opts::pretty::compareDataSymbols);
+          llvm::sort(Datas, opts::pretty::compareDataSymbols);
           for (const auto &Var : Datas)
             Dumper.start(*Var);
         }
@@ -1162,6 +1280,7 @@ static void dumpPretty(StringRef Path) {
       dumpInjectedSources(Printer, *Session);
   }
 
+  Printer.NewLine();
   outs().flush();
 }
 
@@ -1211,7 +1330,9 @@ static void mergePdbs() {
     OutFile = opts::merge::InputFilenames[0];
     llvm::sys::path::replace_extension(OutFile, "merged.pdb");
   }
-  ExitOnErr(Builder.commit(OutFile));
+
+  codeview::GUID IgnoredOutGuid;
+  ExitOnErr(Builder.commit(OutFile, &IgnoredOutGuid));
 }
 
 static void explain() {
@@ -1323,6 +1444,7 @@ int main(int Argc, const char **Argv) {
   if (opts::DumpSubcommand) {
     if (opts::dump::RawAll) {
       opts::dump::DumpGlobals = true;
+      opts::dump::DumpFpo = true;
       opts::dump::DumpInlineeLines = true;
       opts::dump::DumpIds = true;
       opts::dump::DumpIdExtras = true;
@@ -1356,6 +1478,7 @@ int main(int Argc, const char **Argv) {
       opts::pdb2yaml::DbiStream = true;
       opts::pdb2yaml::TpiStream = true;
       opts::pdb2yaml::IpiStream = true;
+      opts::pdb2yaml::PublicsStream = true;
       opts::pdb2yaml::DumpModules = true;
       opts::pdb2yaml::DumpModuleFiles = true;
       opts::pdb2yaml::DumpModuleSyms = true;
@@ -1382,8 +1505,8 @@ int main(int Argc, const char **Argv) {
       opts::yaml2pdb::YamlPdbOutputFile = OutputFilename.str();
     }
     yamlToPdb(opts::yaml2pdb::InputFilename);
-  } else if (opts::AnalyzeSubcommand) {
-    dumpAnalysis(opts::analyze::InputFilename.front());
+  } else if (opts::DiaDumpSubcommand) {
+    llvm::for_each(opts::diadump::InputFilenames, dumpDia);
   } else if (opts::PrettySubcommand) {
     if (opts::pretty::Lines)
       opts::pretty::Compilands = true;
@@ -1401,6 +1524,8 @@ int main(int Argc, const char **Argv) {
       opts::pretty::Classes = true;
       opts::pretty::Typedefs = true;
       opts::pretty::Enums = true;
+      opts::pretty::Pointers = true;
+      opts::pretty::Funcsigs = true;
     }
 
     // When adding filters for excluded compilands and types, we need to
diff --git a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
index 7496adaeb62f..a57cc51d7fd7 100644
--- a/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/contrib/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -82,7 +82,11 @@ extern llvm::cl::opt<bool> Symbols;
 extern llvm::cl::opt<bool> Globals;
 extern llvm::cl::opt<bool> Classes;
 extern llvm::cl::opt<bool> Enums;
+extern llvm::cl::opt<bool> Funcsigs;
+extern llvm::cl::opt<bool> Arrays;
 extern llvm::cl::opt<bool> Typedefs;
+extern llvm::cl::opt<bool> Pointers;
+extern llvm::cl::opt<bool> VTShapes;
 extern llvm::cl::opt<bool> All;
 extern llvm::cl::opt<bool> ExcludeCompilerGenerated;
 
@@ -160,10 +164,12 @@ extern llvm::cl::opt<bool> DumpIdExtras;
 extern llvm::cl::list<uint32_t> DumpIdIndex;
 extern llvm::cl::opt<uint32_t> DumpModi;
 extern llvm::cl::opt<bool> JustMyCode;
+extern llvm::cl::opt<bool> DontResolveForwardRefs;
 extern llvm::cl::opt<bool> DumpSymbols;
 extern llvm::cl::opt<bool> DumpSymRecordBytes;
 extern llvm::cl::opt<bool> DumpGSIRecords;
 extern llvm::cl::opt<bool> DumpGlobals;
+extern llvm::cl::list<std::string> DumpGlobalNames;
 extern llvm::cl::opt<bool> DumpGlobalExtras;
 extern llvm::cl::opt<bool> DumpPublics;
 extern llvm::cl::opt<bool> DumpPublicExtras;
@@ -171,6 +177,7 @@ extern llvm::cl::opt<bool> DumpSectionContribs;
 extern llvm::cl::opt<bool> DumpSectionMap;
 extern llvm::cl::opt<bool> DumpModules;
 extern llvm::cl::opt<bool> DumpModuleFiles;
+extern llvm::cl::opt<bool> DumpFpo;
 extern llvm::cl::opt<bool> RawAll;
 }
 
@@ -185,6 +192,7 @@ extern llvm::cl::opt<bool> PdbStream;
 extern llvm::cl::opt<bool> DbiStream;
 extern llvm::cl::opt<bool> TpiStream;
 extern llvm::cl::opt<bool> IpiStream;
+extern llvm::cl::opt<bool> PublicsStream;
 extern llvm::cl::list<std::string> InputFilename;
 extern llvm::cl::opt<bool> DumpModules;
 extern llvm::cl::opt<bool> DumpModuleFiles;
diff --git a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1a0b9e127bbc..c25cbc2b64df 100644
--- a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -123,6 +123,47 @@ static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
   }
 }
 
+namespace {
+/// A remapper from original symbol names to new symbol names based on a file
+/// containing a list of mappings from old name to new name.
+class SymbolRemapper {
+  std::unique_ptr<MemoryBuffer> File;
+  DenseMap<StringRef, StringRef> RemappingTable;
+
+public:
+  /// Build a SymbolRemapper from a file containing a list of old/new symbols.
+  static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
+    auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
+    if (!BufOrError)
+      exitWithErrorCode(BufOrError.getError(), InputFile);
+
+    auto Remapper = llvm::make_unique<SymbolRemapper>();
+    Remapper->File = std::move(BufOrError.get());
+
+    for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
+         !LineIt.is_at_eof(); ++LineIt) {
+      std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
+      if (Parts.first.empty() || Parts.second.empty() ||
+          Parts.second.count(' ')) {
+        exitWithError("unexpected line in remapping file",
+                      (InputFile + ":" + Twine(LineIt.line_number())).str(),
+                      "expected 'old_symbol new_symbol'");
+      }
+      Remapper->RemappingTable.insert(Parts);
+    }
+    return Remapper;
+  }
+
+  /// Attempt to map the given old symbol into a new symbol.
+  ///
+  /// \return The new symbol, or \p Name if no such symbol was found.
+  StringRef operator()(StringRef Name) {
+    StringRef New = RemappingTable.lookup(Name);
+    return New.empty() ? Name : New;
+  }
+};
+}
+
 struct WeightedFile {
   std::string Filename;
   uint64_t Weight;
@@ -161,7 +202,8 @@ static bool isFatalError(instrprof_error IPE) {
 }
 
 /// Load an input into a writer context.
-static void loadInput(const WeightedFile &Input, WriterContext *WC) {
+static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
+                      WriterContext *WC) {
   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
 
   // If there's a pending hard error, don't do more work.
@@ -192,6 +234,8 @@ static void loadInput(const WeightedFile &Input, WriterContext *WC) {
   }
 
   for (auto &I : *Reader) {
+    if (Remapper)
+      I.Name = (*Remapper)(I.Name);
     const StringRef FuncName = I.Name;
     bool Reported = false;
     WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
@@ -236,6 +280,7 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
 }
 
 static void mergeInstrProfile(const WeightedFileVector &Inputs,
+                              SymbolRemapper *Remapper,
                               StringRef OutputFilename,
                               ProfileFormat OutputFormat, bool OutputSparse,
                               unsigned NumThreads) {
@@ -267,14 +312,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
 
   if (NumThreads == 1) {
     for (const auto &Input : Inputs)
-      loadInput(Input, Contexts[0].get());
+      loadInput(Input, Remapper, Contexts[0].get());
   } else {
     ThreadPool Pool(NumThreads);
 
     // Load the inputs in parallel (N/NumThreads serial steps).
     unsigned Ctx = 0;
     for (const auto &Input : Inputs) {
-      Pool.async(loadInput, Input, Contexts[Ctx].get());
+      Pool.async(loadInput, Input, Remapper, Contexts[Ctx].get());
       Ctx = (Ctx + 1) % NumThreads;
     }
     Pool.wait();
@@ -322,11 +367,43 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
   }
 }
 
+/// Make a copy of the given function samples with all symbol names remapped
+/// by the provided symbol remapper.
+static sampleprof::FunctionSamples
+remapSamples(const sampleprof::FunctionSamples &Samples,
+             SymbolRemapper &Remapper, sampleprof_error &Error) {
+  sampleprof::FunctionSamples Result;
+  Result.setName(Remapper(Samples.getName()));
+  Result.addTotalSamples(Samples.getTotalSamples());
+  Result.addHeadSamples(Samples.getHeadSamples());
+  for (const auto &BodySample : Samples.getBodySamples()) {
+    Result.addBodySamples(BodySample.first.LineOffset,
+                          BodySample.first.Discriminator,
+                          BodySample.second.getSamples());
+    for (const auto &Target : BodySample.second.getCallTargets()) {
+      Result.addCalledTargetSamples(BodySample.first.LineOffset,
+                                    BodySample.first.Discriminator,
+                                    Remapper(Target.first()), Target.second);
+    }
+  }
+  for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
+    sampleprof::FunctionSamplesMap &Target =
+        Result.functionSamplesAt(CallsiteSamples.first);
+    for (const auto &Callsite : CallsiteSamples.second) {
+      sampleprof::FunctionSamples Remapped =
+          remapSamples(Callsite.second, Remapper, Error);
+      MergeResult(Error, Target[Remapped.getName()].merge(Remapped));
+    }
+  }
+  return Result;
+}
+
 static sampleprof::SampleProfileFormat FormatMap[] = {
     sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Compact_Binary,
     sampleprof::SPF_GCC, sampleprof::SPF_Binary};
 
 static void mergeSampleProfile(const WeightedFileVector &Inputs,
+                               SymbolRemapper *Remapper,
                                StringRef OutputFilename,
                                ProfileFormat OutputFormat) {
   using namespace sampleprof;
@@ -357,9 +434,13 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
     for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
                                               E = Profiles.end();
          I != E; ++I) {
-      StringRef FName = I->first();
-      FunctionSamples &Samples = I->second;
-      sampleprof_error Result = ProfileMap[FName].merge(Samples, Input.Weight);
+      sampleprof_error Result = sampleprof_error::success;
+      FunctionSamples Remapped =
+          Remapper ? remapSamples(I->second, *Remapper, Result)
+                   : FunctionSamples();
+      FunctionSamples &Samples = Remapper ? Remapped : I->second;
+      StringRef FName = Samples.getName();
+      MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
       if (Result != sampleprof_error::success) {
         std::error_code EC = make_error_code(Result);
         handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName);
@@ -461,6 +542,10 @@ static int merge_main(int argc, const char *argv[]) {
   cl::opt<bool> DumpInputFileList(
       "dump-input-file-list", cl::init(false), cl::Hidden,
       cl::desc("Dump the list of input files and their weights, then exit"));
+  cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
+                                     cl::desc("Symbol remapping file"));
+  cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
+                           cl::aliasopt(RemappingFile));
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                       cl::init("-"), cl::Required,
                                       cl::desc("Output file"));
@@ -509,11 +594,16 @@ static int merge_main(int argc, const char *argv[]) {
     return 0;
   }
 
+  std::unique_ptr<SymbolRemapper> Remapper;
+  if (!RemappingFile.empty())
+    Remapper = SymbolRemapper::create(RemappingFile);
+
   if (ProfileKind == instr)
-    mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,
-                      OutputSparse, NumThreads);
+    mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
+                      OutputFormat, OutputSparse, NumThreads);
   else
-    mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
+    mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
+                       OutputFormat);
 
   return 0;
 }
@@ -543,13 +633,21 @@ static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
         Stats.ValueSitesHistogram.resize(NV, 0);
       Stats.ValueSitesHistogram[NV - 1]++;
     }
+
+    uint64_t SiteSum = 0;
+    for (uint32_t V = 0; V < NV; V++)
+      SiteSum += VD[V].Count;
+    if (SiteSum == 0)
+      SiteSum = 1;
+
     for (uint32_t V = 0; V < NV; V++) {
-      OS << "\t[ " << I << ", ";
+      OS << "\t[ " << format("%2u", I) << ", ";
       if (Symtab == nullptr)
-        OS << VD[V].Value;
+        OS << format("%4u", VD[V].Value);
       else
         OS << Symtab->getFuncName(VD[V].Value);
-      OS << ", " << VD[V].Count << " ]\n";
+      OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
+         << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
     }
   }
 }
@@ -572,9 +670,9 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
                             uint32_t TopN, bool ShowIndirectCallTargets,
                             bool ShowMemOPSizes, bool ShowDetailedSummary,
                             std::vector<uint32_t> DetailedSummaryCutoffs,
-                            bool ShowAllFunctions,
-                            const std::string &ShowFunction, bool TextFormat,
-                            raw_fd_ostream &OS) {
+                            bool ShowAllFunctions, uint64_t ValueCutoff,
+                            bool OnlyListBelow, const std::string &ShowFunction,
+                            bool TextFormat, raw_fd_ostream &OS) {
   auto ReaderOrErr = InstrProfReader::create(Filename);
   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
   if (ShowDetailedSummary && Cutoffs.empty()) {
@@ -587,6 +685,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
   auto Reader = std::move(ReaderOrErr.get());
   bool IsIRInstr = Reader->isIRLevelProfile();
   size_t ShownFunctions = 0;
+  size_t BelowCutoffFunctions = 0;
   int NumVPKind = IPVK_Last - IPVK_First + 1;
   std::vector<ValueSitesStats> VPStats(NumVPKind);
 
@@ -600,12 +699,21 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
                       decltype(MinCmp)>
       HottestFuncs(MinCmp);
 
+  if (!TextFormat && OnlyListBelow) {
+    OS << "The list of functions with the maximum counter less than "
+       << ValueCutoff << ":\n";
+  }
+
+  // Add marker so that IR-level instrumentation round-trips properly.
+  if (TextFormat && IsIRInstr)
+    OS << ":ir\n";
+
   for (const auto &Func : *Reader) {
     bool Show =
         ShowAllFunctions || (!ShowFunction.empty() &&
                              Func.Name.find(ShowFunction) != Func.Name.npos);
 
-    bool doTextFormatDump = (Show && ShowCounts && TextFormat);
+    bool doTextFormatDump = (Show && TextFormat);
 
     if (doTextFormatDump) {
       InstrProfSymtab &Symtab = Reader->getSymtab();
@@ -617,11 +725,24 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
     assert(Func.Counts.size() > 0 && "function missing entry counter");
     Builder.addRecord(Func);
 
-    if (TopN) {
-      uint64_t FuncMax = 0;
-      for (size_t I = 0, E = Func.Counts.size(); I < E; ++I)
-        FuncMax = std::max(FuncMax, Func.Counts[I]);
+    uint64_t FuncMax = 0;
+    uint64_t FuncSum = 0;
+    for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
+      FuncMax = std::max(FuncMax, Func.Counts[I]);
+      FuncSum += Func.Counts[I];
+    }
 
+    if (FuncMax < ValueCutoff) {
+      ++BelowCutoffFunctions;
+      if (OnlyListBelow) {
+        OS << "  " << Func.Name << ": (Max = " << FuncMax
+           << " Sum = " << FuncSum << ")\n";
+      }
+      continue;
+    } else if (OnlyListBelow)
+      continue;
+
+    if (TopN) {
       if (HottestFuncs.size() == TopN) {
         if (HottestFuncs.top().second < FuncMax) {
           HottestFuncs.pop();
@@ -632,7 +753,6 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
     }
 
     if (Show) {
-
       if (!ShownFunctions)
         OS << "Counters:\n";
 
@@ -679,7 +799,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
   if (Reader->hasError())
     exitWithError(Reader->getError(), Filename);
 
-  if (ShowCounts && TextFormat)
+  if (TextFormat)
     return 0;
   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
   OS << "Instrumentation level: "
@@ -687,6 +807,12 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
   if (ShowAllFunctions || !ShowFunction.empty())
     OS << "Functions shown: " << ShownFunctions << "\n";
   OS << "Total functions: " << PS->getNumFunctions() << "\n";
+  if (ValueCutoff > 0) {
+    OS << "Number of functions with maximum count (< " << ValueCutoff
+       << "): " << BelowCutoffFunctions << "\n";
+    OS << "Number of functions with maximum count (>= " << ValueCutoff
+       << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
+  }
   OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
   OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
 
@@ -788,7 +914,14 @@ static int show_main(int argc, const char *argv[]) {
   cl::opt<uint32_t> TopNFunctions(
       "topn", cl::init(0),
       cl::desc("Show the list of functions with the largest internal counts"));
-
+  cl::opt<uint32_t> ValueCutoff(
+      "value-cutoff", cl::init(0),
+      cl::desc("Set the count value cutoff. Functions with the maximum count "
+               "less than this value will not be printed out. (Default is 0)"));
+  cl::opt<bool> OnlyListBelow(
+      "list-below-cutoff", cl::init(false),
+      cl::desc("Only output names of functions whose max count values are "
+               "below the cutoff value"));
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
 
   if (OutputFilename.empty())
@@ -808,7 +941,8 @@ static int show_main(int argc, const char *argv[]) {
     return showInstrProfile(Filename, ShowCounts, TopNFunctions,
                             ShowIndirectCallTargets, ShowMemOPSizes,
                             ShowDetailedSummary, DetailedSummaryCutoffs,
-                            ShowAllFunctions, ShowFunction, TextFormat, OS);
+                            ShowAllFunctions, ValueCutoff, OnlyListBelow,
+                            ShowFunction, TextFormat, OS);
   else
     return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
                              ShowFunction, OS);
diff --git a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index a90840b22c8d..4b823b816c35 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -118,31 +118,57 @@ const size_t Decoder::PDataEntrySize = sizeof(RuntimeFunction);
 
 // TODO name the uops more appropriately
 const Decoder::RingEntry Decoder::Ring[] = {
-  { 0x80, 0x00, &Decoder::opcode_0xxxxxxx },  // UOP_STACK_FREE (16-bit)
-  { 0xc0, 0x80, &Decoder::opcode_10Lxxxxx },  // UOP_POP (32-bit)
-  { 0xf0, 0xc0, &Decoder::opcode_1100xxxx },  // UOP_STACK_SAVE (16-bit)
-  { 0xf8, 0xd0, &Decoder::opcode_11010Lxx },  // UOP_POP (16-bit)
-  { 0xf8, 0xd8, &Decoder::opcode_11011Lxx },  // UOP_POP (32-bit)
-  { 0xf8, 0xe0, &Decoder::opcode_11100xxx },  // UOP_VPOP (32-bit)
-  { 0xfc, 0xe8, &Decoder::opcode_111010xx },  // UOP_STACK_FREE (32-bit)
-  { 0xfe, 0xec, &Decoder::opcode_1110110L },  // UOP_POP (16-bit)
-  { 0xff, 0xee, &Decoder::opcode_11101110 },  // UOP_MICROSOFT_SPECIFIC (16-bit)
+  { 0x80, 0x00, 1, &Decoder::opcode_0xxxxxxx },  // UOP_STACK_FREE (16-bit)
+  { 0xc0, 0x80, 2, &Decoder::opcode_10Lxxxxx },  // UOP_POP (32-bit)
+  { 0xf0, 0xc0, 1, &Decoder::opcode_1100xxxx },  // UOP_STACK_SAVE (16-bit)
+  { 0xf8, 0xd0, 1, &Decoder::opcode_11010Lxx },  // UOP_POP (16-bit)
+  { 0xf8, 0xd8, 1, &Decoder::opcode_11011Lxx },  // UOP_POP (32-bit)
+  { 0xf8, 0xe0, 1, &Decoder::opcode_11100xxx },  // UOP_VPOP (32-bit)
+  { 0xfc, 0xe8, 2, &Decoder::opcode_111010xx },  // UOP_STACK_FREE (32-bit)
+  { 0xfe, 0xec, 2, &Decoder::opcode_1110110L },  // UOP_POP (16-bit)
+  { 0xff, 0xee, 2, &Decoder::opcode_11101110 },  // UOP_MICROSOFT_SPECIFIC (16-bit)
                                               // UOP_PUSH_MACHINE_FRAME
                                               // UOP_PUSH_CONTEXT
                                               // UOP_PUSH_TRAP_FRAME
                                               // UOP_REDZONE_RESTORE_LR
-  { 0xff, 0xef, &Decoder::opcode_11101111 },  // UOP_LDRPC_POSTINC (32-bit)
-  { 0xff, 0xf5, &Decoder::opcode_11110101 },  // UOP_VPOP (32-bit)
-  { 0xff, 0xf6, &Decoder::opcode_11110110 },  // UOP_VPOP (32-bit)
-  { 0xff, 0xf7, &Decoder::opcode_11110111 },  // UOP_STACK_RESTORE (16-bit)
-  { 0xff, 0xf8, &Decoder::opcode_11111000 },  // UOP_STACK_RESTORE (16-bit)
-  { 0xff, 0xf9, &Decoder::opcode_11111001 },  // UOP_STACK_RESTORE (32-bit)
-  { 0xff, 0xfa, &Decoder::opcode_11111010 },  // UOP_STACK_RESTORE (32-bit)
-  { 0xff, 0xfb, &Decoder::opcode_11111011 },  // UOP_NOP (16-bit)
-  { 0xff, 0xfc, &Decoder::opcode_11111100 },  // UOP_NOP (32-bit)
-  { 0xff, 0xfd, &Decoder::opcode_11111101 },  // UOP_NOP (16-bit) / END
-  { 0xff, 0xfe, &Decoder::opcode_11111110 },  // UOP_NOP (32-bit) / END
-  { 0xff, 0xff, &Decoder::opcode_11111111 },  // UOP_END
+  { 0xff, 0xef, 2, &Decoder::opcode_11101111 },  // UOP_LDRPC_POSTINC (32-bit)
+  { 0xff, 0xf5, 2, &Decoder::opcode_11110101 },  // UOP_VPOP (32-bit)
+  { 0xff, 0xf6, 2, &Decoder::opcode_11110110 },  // UOP_VPOP (32-bit)
+  { 0xff, 0xf7, 3, &Decoder::opcode_11110111 },  // UOP_STACK_RESTORE (16-bit)
+  { 0xff, 0xf8, 4, &Decoder::opcode_11111000 },  // UOP_STACK_RESTORE (16-bit)
+  { 0xff, 0xf9, 3, &Decoder::opcode_11111001 },  // UOP_STACK_RESTORE (32-bit)
+  { 0xff, 0xfa, 4, &Decoder::opcode_11111010 },  // UOP_STACK_RESTORE (32-bit)
+  { 0xff, 0xfb, 1, &Decoder::opcode_11111011 },  // UOP_NOP (16-bit)
+  { 0xff, 0xfc, 1, &Decoder::opcode_11111100 },  // UOP_NOP (32-bit)
+  { 0xff, 0xfd, 1, &Decoder::opcode_11111101 },  // UOP_NOP (16-bit) / END
+  { 0xff, 0xfe, 1, &Decoder::opcode_11111110 },  // UOP_NOP (32-bit) / END
+  { 0xff, 0xff, 1, &Decoder::opcode_11111111 },  // UOP_END
+};
+
+
+// Unwind opcodes for ARM64.
+// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+const Decoder::RingEntry Decoder::Ring64[] = {
+  { 0xe0, 0x00, 1, &Decoder::opcode_alloc_s },
+  { 0xe0, 0x20, 1, &Decoder::opcode_save_r19r20_x },
+  { 0xc0, 0x40, 1, &Decoder::opcode_save_fplr },
+  { 0xc0, 0x80, 1, &Decoder::opcode_save_fplr_x },
+  { 0xf8, 0xc0, 2, &Decoder::opcode_alloc_m },
+  { 0xfc, 0xc8, 2, &Decoder::opcode_save_regp },
+  { 0xfc, 0xcc, 2, &Decoder::opcode_save_regp_x },
+  { 0xfc, 0xd0, 2, &Decoder::opcode_save_reg },
+  { 0xfe, 0xd4, 2, &Decoder::opcode_save_reg_x },
+  { 0xfe, 0xd6, 2, &Decoder::opcode_save_lrpair },
+  { 0xfe, 0xd8, 2, &Decoder::opcode_save_fregp },
+  { 0xfe, 0xda, 2, &Decoder::opcode_save_fregp_x },
+  { 0xfe, 0xdc, 2, &Decoder::opcode_save_freg },
+  { 0xff, 0xde, 2, &Decoder::opcode_save_freg_x },
+  { 0xff, 0xe0, 4, &Decoder::opcode_alloc_l },
+  { 0xff, 0xe1, 1, &Decoder::opcode_setfp },
+  { 0xff, 0xe2, 2, &Decoder::opcode_addfp },
+  { 0xff, 0xe3, 1, &Decoder::opcode_nop },
+  { 0xff, 0xe4, 1, &Decoder::opcode_end },
+  { 0xff, 0xe5, 1, &Decoder::opcode_end_c },
 };
 
 void Decoder::printRegisters(const std::pair<uint16_t, uint32_t> &RegisterMask) {
@@ -493,18 +519,291 @@ bool Decoder::opcode_11111111(const uint8_t *OC, unsigned &Offset,
   return true;
 }
 
+// ARM64 unwind codes start here.
+bool Decoder::opcode_alloc_s(const uint8_t *OC, unsigned &Offset,
+                             unsigned Length, bool Prologue) {
+  uint32_t NumBytes = (OC[Offset] & 0x1F) << 4;
+  SW.startLine() << format("0x%02x                ; %s sp, #%u\n", OC[Offset],
+                           static_cast<const char *>(Prologue ? "sub" : "add"),
+                           NumBytes);
+  ++Offset;
+  return false;
+}
+
+bool Decoder::opcode_save_r19r20_x(const uint8_t *OC, unsigned &Offset,
+                                   unsigned Length, bool Prologue) {
+  uint32_t Off = (OC[Offset] & 0x1F) << 3;
+  if (Prologue)
+    SW.startLine() << format(
+        "0x%02x                ; stp x19, x20, [sp, #-%u]!\n", OC[Offset], Off);
+  else
+    SW.startLine() << format(
+        "0x%02x                ; ldp x19, x20, [sp], #%u\n", OC[Offset], Off);
+  ++Offset;
+  return false;
+}
+
+bool Decoder::opcode_save_fplr(const uint8_t *OC, unsigned &Offset,
+                               unsigned Length, bool Prologue) {
+  uint32_t Off = (OC[Offset] & 0x3F) << 3;
+  SW.startLine() << format(
+      "0x%02x                ; %s x29, x30, [sp, #%u]\n", OC[Offset],
+      static_cast<const char *>(Prologue ? "stp" : "ldp"), Off);
+  ++Offset;
+  return false;
+}
+
+bool Decoder::opcode_save_fplr_x(const uint8_t *OC, unsigned &Offset,
+                                 unsigned Length, bool Prologue) {
+  uint32_t Off = ((OC[Offset] & 0x3F) + 1) << 3;
+  if (Prologue)
+    SW.startLine() << format(
+        "0x%02x                ; stp x29, x30, [sp, #-%u]!\n", OC[Offset], Off);
+  else
+    SW.startLine() << format(
+        "0x%02x                ; ldp x29, x30, [sp], #%u\n", OC[Offset], Off);
+  ++Offset;
+  return false;
+}
+
+bool Decoder::opcode_alloc_m(const uint8_t *OC, unsigned &Offset,
+                             unsigned Length, bool Prologue) {
+  uint32_t NumBytes = ((OC[Offset] & 0x07) << 8);
+  NumBytes |= (OC[Offset + 1] & 0xFF);
+  NumBytes <<= 4;
+  SW.startLine() << format("0x%02x%02x              ; %s sp, #%u\n",
+                           OC[Offset], OC[Offset + 1],
+                           static_cast<const char *>(Prologue ? "sub" : "add"),
+                           NumBytes);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_regp(const uint8_t *OC, unsigned &Offset,
+                               unsigned Length, bool Prologue) {
+  uint32_t Reg = ((OC[Offset] & 0x03) << 8);
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg += 19;
+  uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+  SW.startLine() << format(
+      "0x%02x%02x              ; %s x%u, x%u, [sp, #%u]\n",
+      OC[Offset], OC[Offset + 1],
+      static_cast<const char *>(Prologue ? "stp" : "ldp"), Reg, Reg + 1, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_regp_x(const uint8_t *OC, unsigned &Offset,
+                                 unsigned Length, bool Prologue) {
+  uint32_t Reg = ((OC[Offset] & 0x03) << 8);
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg += 19;
+  uint32_t Off = ((OC[Offset + 1] & 0x3F) + 1) << 3;
+  if (Prologue)
+    SW.startLine() << format(
+        "0x%02x%02x              ; stp x%u, x%u, [sp, #-%u]!\n",
+        OC[Offset], OC[Offset + 1], Reg,
+        Reg + 1, Off);
+  else
+    SW.startLine() << format(
+        "0x%02x%02x              ; ldp x%u, x%u, [sp], #%u\n",
+        OC[Offset], OC[Offset + 1], Reg,
+        Reg + 1, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_reg(const uint8_t *OC, unsigned &Offset,
+                              unsigned Length, bool Prologue) {
+  uint32_t Reg = (OC[Offset] & 0x03) << 8;
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg += 19;
+  uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+  SW.startLine() << format("0x%02x%02x              ; %s x%u, [sp, #%u]\n",
+                           OC[Offset], OC[Offset + 1],
+                           static_cast<const char *>(Prologue ? "str" : "ldr"),
+                           Reg, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_reg_x(const uint8_t *OC, unsigned &Offset,
+                                unsigned Length, bool Prologue) {
+  uint32_t Reg = (OC[Offset] & 0x01) << 8;
+  Reg |= (OC[Offset + 1] & 0xE0);
+  Reg >>= 5;
+  Reg += 19;
+  uint32_t Off = ((OC[Offset + 1] & 0x1F) + 1) << 3;
+  if (Prologue)
+    SW.startLine() << format("0x%02x%02x              ; str x%u, [sp, #%u]!\n",
+                             OC[Offset], OC[Offset + 1], Reg, Off);
+  else
+    SW.startLine() << format("0x%02x%02x              ; ldr x%u, [sp], #%u\n",
+                             OC[Offset], OC[Offset + 1], Reg, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_lrpair(const uint8_t *OC, unsigned &Offset,
+                                 unsigned Length, bool Prologue) {
+  uint32_t Reg = (OC[Offset] & 0x01) << 8;
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg *= 2;
+  Reg += 19;
+  uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+  SW.startLine() << format("0x%02x%02x              ; %s x%u, lr, [sp, #%u]\n",
+                           OC[Offset], OC[Offset + 1],
+                           static_cast<const char *>(Prologue ? "stp" : "ldp"),
+                           Reg, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_fregp(const uint8_t *OC, unsigned &Offset,
+                                unsigned Length, bool Prologue) {
+  uint32_t Reg = (OC[Offset] & 0x01) << 8;
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg += 8;
+  uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+  SW.startLine() << format("0x%02x%02x              ; %s d%u, d%u, [sp, #%u]\n",
+                           OC[Offset], OC[Offset + 1],
+                           static_cast<const char *>(Prologue ? "stp" : "ldp"),
+                           Reg, Reg + 1, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_fregp_x(const uint8_t *OC, unsigned &Offset,
+                                  unsigned Length, bool Prologue) {
+  uint32_t Reg = (OC[Offset] & 0x01) << 8;
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg += 8;
+  uint32_t Off = ((OC[Offset + 1] & 0x3F) + 1) << 3;
+  if (Prologue)
+    SW.startLine() << format(
+        "0x%02x%02x              ; stp d%u, d%u, [sp, #-%u]!\n", OC[Offset],
+        OC[Offset + 1], Reg, Reg + 1, Off);
+  else
+    SW.startLine() << format(
+        "0x%02x%02x              ; ldp d%u, d%u, [sp], #%u\n", OC[Offset],
+        OC[Offset + 1], Reg, Reg + 1, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_freg(const uint8_t *OC, unsigned &Offset,
+                               unsigned Length, bool Prologue) {
+  uint32_t Reg = (OC[Offset] & 0x01) << 8;
+  Reg |= (OC[Offset + 1] & 0xC0);
+  Reg >>= 6;
+  Reg += 8;
+  uint32_t Off = (OC[Offset + 1] & 0x3F) << 3;
+  SW.startLine() << format("0x%02x%02x                ; %s d%u, [sp, #%u]\n",
+                           OC[Offset], OC[Offset + 1],
+                           static_cast<const char *>(Prologue ? "str" : "ldr"),
+                           Reg, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_save_freg_x(const uint8_t *OC, unsigned &Offset,
+                                 unsigned Length, bool Prologue) {
+  uint32_t Reg = ((OC[Offset + 1] & 0xE0) >> 5) + 8;
+  uint32_t Off = ((OC[Offset + 1] & 0x1F) + 1) << 3;
+  if (Prologue)
+    SW.startLine() << format(
+        "0x%02x%02x              ; str d%u, [sp, #-%u]!\n", OC[Offset],
+        OC[Offset + 1], Reg, Off);
+  else
+    SW.startLine() << format(
+        "0x%02x%02x              ; ldr d%u, [sp], #%u\n", OC[Offset],
+        OC[Offset + 1], Reg, Off);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_alloc_l(const uint8_t *OC, unsigned &Offset,
+                             unsigned Length, bool Prologue) {
+  unsigned Off =
+      (OC[Offset + 1] << 16) | (OC[Offset + 2] << 8) | (OC[Offset + 3] << 0);
+  Off <<= 4;
+  SW.startLine() << format(
+      "0x%02x%02x%02x%02x          ; %s sp, #%u\n", OC[Offset], OC[Offset + 1],
+      OC[Offset + 2], OC[Offset + 3],
+      static_cast<const char *>(Prologue ? "sub" : "add"), Off);
+  Offset += 4;
+  return false;
+}
+
+bool Decoder::opcode_setfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
+                           bool Prologue) {
+  SW.startLine() << format("0x%02x                ; mov fp, sp\n", OC[Offset]);
+  ++Offset;
+  return false;
+}
+
+bool Decoder::opcode_addfp(const uint8_t *OC, unsigned &Offset, unsigned Length,
+                           bool Prologue) {
+  unsigned NumBytes = OC[Offset + 1] << 3;
+  SW.startLine() << format("0x%02x%02x              ; add fp, sp, #%u\n",
+                           OC[Offset], OC[Offset + 1], NumBytes);
+  Offset += 2;
+  return false;
+}
+
+bool Decoder::opcode_nop(const uint8_t *OC, unsigned &Offset, unsigned Length,
+                         bool Prologue) {
+  SW.startLine() << format("0x%02x                ; nop\n", OC[Offset]);
+  ++Offset;
+  return false;
+}
+
+bool Decoder::opcode_end(const uint8_t *OC, unsigned &Offset, unsigned Length,
+                         bool Prologue) {
+  SW.startLine() << format("0x%02x                ; end\n", OC[Offset]);
+  ++Offset;
+  return true;
+}
+
+bool Decoder::opcode_end_c(const uint8_t *OC, unsigned &Offset, unsigned Length,
+                           bool Prologue) {
+  SW.startLine() << format("0x%02x                ; end_c\n", OC[Offset]);
+  ++Offset;
+  return true;
+}
+
 void Decoder::decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
                             bool Prologue) {
   assert((!Prologue || Offset == 0) && "prologue should always use offset 0");
-
+  const RingEntry* DecodeRing = isAArch64 ? Ring64 : Ring;
   bool Terminated = false;
   for (unsigned OI = Offset, OE = Opcodes.size(); !Terminated && OI < OE; ) {
     for (unsigned DI = 0;; ++DI) {
-      if ((Opcodes[OI] & Ring[DI].Mask) == Ring[DI].Value) {
-        Terminated = (this->*Ring[DI].Routine)(Opcodes.data(), OI, 0, Prologue);
+      if ((isAArch64 && (DI >= array_lengthof(Ring64))) ||
+          (!isAArch64 && (DI >= array_lengthof(Ring)))) {
+        SW.startLine() << format("0x%02x                ; Bad opcode!\n",
+                                 Opcodes.data()[OI]);
+        ++OI;
+        break;
+      }
+
+      if ((Opcodes[OI] & DecodeRing[DI].Mask) == DecodeRing[DI].Value) {
+        if (OI + DecodeRing[DI].Length > OE) {
+          SW.startLine() << format("Opcode 0x%02x goes past the unwind data\n",
+                                    Opcodes[OI]);
+          OI += DecodeRing[DI].Length;
+          break;
+        }
+        Terminated =
+            (this->*DecodeRing[DI].Routine)(Opcodes.data(), OI, 0, Prologue);
         break;
       }
-      assert(DI < array_lengthof(Ring) && "unhandled opcode");
     }
   }
 }
@@ -520,22 +819,36 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
   uint64_t Offset = VA - SectionVA;
   const ulittle32_t *Data =
     reinterpret_cast<const ulittle32_t *>(Contents.data() + Offset);
-  const ExceptionDataRecord XData(Data);
 
+  // Sanity check to ensure that the .xdata header is present.
+  // A header is one or two words, followed by at least one word to describe
+  // the unwind codes. Applicable to both ARM and AArch64.
+  if (Contents.size() - Offset < 8)
+    report_fatal_error(".xdata must be at least 8 bytes in size");
+
+  const ExceptionDataRecord XData(Data, isAArch64);
   DictScope XRS(SW, "ExceptionData");
-  SW.printNumber("FunctionLength", XData.FunctionLength() << 1);
+  SW.printNumber("FunctionLength",
+                 isAArch64 ? XData.FunctionLengthInBytesAArch64() :
+                 XData.FunctionLengthInBytesARM());
   SW.printNumber("Version", XData.Vers());
   SW.printBoolean("ExceptionData", XData.X());
   SW.printBoolean("EpiloguePacked", XData.E());
-  SW.printBoolean("Fragment", XData.F());
+  if (!isAArch64)
+    SW.printBoolean("Fragment", XData.F());
   SW.printNumber(XData.E() ? "EpilogueOffset" : "EpilogueScopes",
                  XData.EpilogueCount());
-  SW.printNumber("ByteCodeLength",
-                 static_cast<uint64_t>(XData.CodeWords() * sizeof(uint32_t)));
+  uint64_t ByteCodeLength = XData.CodeWords() * sizeof(uint32_t);
+  SW.printNumber("ByteCodeLength", ByteCodeLength);
+
+  if ((int64_t)(Contents.size() - Offset - 4 * HeaderWords(XData) -
+                (XData.E() ? 0 : XData.EpilogueCount() * 4) -
+                (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength)
+    report_fatal_error("Malformed unwind data");
 
   if (XData.E()) {
     ArrayRef<uint8_t> UC = XData.UnwindByteCode();
-    if (!XData.F()) {
+    if (isAArch64 || !XData.F()) {
       ListScope PS(SW, "Prologue");
       decodeOpcodes(UC, 0, /*Prologue=*/true);
     }
@@ -544,16 +857,27 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
       decodeOpcodes(UC, XData.EpilogueCount(), /*Prologue=*/false);
     }
   } else {
+    {
+      ListScope PS(SW, "Prologue");
+      decodeOpcodes(XData.UnwindByteCode(), 0, /*Prologue=*/true);
+    }
     ArrayRef<ulittle32_t> EpilogueScopes = XData.EpilogueScopes();
     ListScope ESS(SW, "EpilogueScopes");
     for (const EpilogueScope ES : EpilogueScopes) {
       DictScope ESES(SW, "EpilogueScope");
       SW.printNumber("StartOffset", ES.EpilogueStartOffset());
-      SW.printNumber("Condition", ES.Condition());
-      SW.printNumber("EpilogueStartIndex", ES.EpilogueStartIndex());
+      if (!isAArch64)
+        SW.printNumber("Condition", ES.Condition());
+      SW.printNumber("EpilogueStartIndex",
+                     isAArch64 ? ES.EpilogueStartIndexAArch64()
+                               : ES.EpilogueStartIndexARM());
+      if (ES.ES & ~0xffc3ffff)
+        SW.printNumber("ReservedBits", (ES.ES >> 18) & 0xF);
 
       ListScope Opcodes(SW, "Opcodes");
-      decodeOpcodes(XData.UnwindByteCode(), ES.EpilogueStartIndex(),
+      decodeOpcodes(XData.UnwindByteCode(),
+                    isAArch64 ? ES.EpilogueStartIndexAArch64()
+                              : ES.EpilogueStartIndexARM(),
                     /*Prologue=*/false);
     }
   }
@@ -565,16 +889,21 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
                                + (XData.E() ? 0 : XData.EpilogueCount())
                                + XData.CodeWords();
 
-    ErrorOr<SymbolRef> Symbol =
-      getRelocatedSymbol(COFF, Section, HandlerOffset * sizeof(uint32_t));
+    ErrorOr<SymbolRef> Symbol = getRelocatedSymbol(
+        COFF, Section, Offset + HandlerOffset * sizeof(uint32_t));
     if (!Symbol)
       Symbol = getSymbol(COFF, Address, /*FunctionOnly=*/true);
+    if (!Symbol) {
+      ListScope EHS(SW, "ExceptionHandler");
+      SW.printString("Routine", "(null)");
+      return true;
+    }
 
     Expected<StringRef> Name = Symbol->getName();
     if (!Name) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(Name.takeError(), OS, "");
+      logAllUnhandledErrors(Name.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -613,7 +942,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
     if (!FunctionNameOrErr) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -622,16 +951,13 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
     if (!FunctionAddressOrErr) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
     FunctionAddress = *FunctionAddressOrErr;
   } else {
-    const pe32_header *PEHeader;
-    if (COFF.getPE32Header(PEHeader))
-      return false;
-    FunctionAddress = PEHeader->ImageBase + RF.BeginAddress;
+    FunctionAddress = COFF.getImageBase() + RF.BeginAddress;
   }
 
   SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
@@ -641,7 +967,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
     if (!Name) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(Name.takeError(), OS, "");
+      logAllUnhandledErrors(Name.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -650,7 +976,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
     if (!AddressOrErr) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(AddressOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(AddressOrErr.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -666,22 +992,18 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
     }
     section_iterator SI = *SIOrErr;
 
-    return dumpXDataRecord(COFF, *SI, FunctionAddress, Address);
+    // FIXME: Do we need to add an offset from the relocation?
+    return dumpXDataRecord(COFF, *SI, FunctionAddress,
+                           RF.ExceptionInformationRVA());
   } else {
-    const pe32_header *PEHeader;
-    if (COFF.getPE32Header(PEHeader))
-      return false;
-
-    uint64_t Address = PEHeader->ImageBase + RF.ExceptionInformationRVA();
+    uint64_t Address = COFF.getImageBase() + RF.ExceptionInformationRVA();
     SW.printString("ExceptionRecord", formatSymbol("", Address));
 
-    ErrorOr<SectionRef> Section =
-      getSectionContaining(COFF, RF.ExceptionInformationRVA());
+    ErrorOr<SectionRef> Section = getSectionContaining(COFF, Address);
     if (!Section)
       return false;
 
-    return dumpXDataRecord(COFF, *Section, FunctionAddress,
-                           RF.ExceptionInformationRVA());
+    return dumpXDataRecord(COFF, *Section, FunctionAddress, Address);
   }
 }
 
@@ -703,7 +1025,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
     if (!FunctionNameOrErr) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -712,7 +1034,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
     if (!FunctionAddressOrErr) {
       std::string Buf;
       llvm::raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS, "");
+      logAllUnhandledErrors(FunctionAddressOrErr.takeError(), OS);
       OS.flush();
       report_fatal_error(Buf);
     }
@@ -725,8 +1047,9 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
   }
 
   SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
-  SW.printBoolean("Fragment",
-                  RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
+  if (!isAArch64)
+    SW.printBoolean("Fragment",
+                    RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
   SW.printNumber("FunctionLength", RF.FunctionLength());
   SW.startLine() << "ReturnType: " << RF.Ret() << '\n';
   SW.printBoolean("HomedParameters", RF.H());
@@ -749,6 +1072,10 @@ bool Decoder::dumpProcedureDataEntry(const COFFObjectFile &COFF,
   DictScope RFS(SW, "RuntimeFunction");
   if (Entry.Flag() == RuntimeFunctionFlag::RFF_Unpacked)
     return dumpUnpackedEntry(COFF, Section, Offset, Index, Entry);
+  if (isAArch64) {
+    SW.startLine() << "Packed unwind data not yet supported for ARM64\n";
+    return true;
+  }
   return dumpPackedEntry(COFF, Section, Offset, Index, Entry);
 }
 
diff --git a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
index 95f521702268..e271a1e6fe77 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -24,13 +24,16 @@ class Decoder {
 
   ScopedPrinter &SW;
   raw_ostream &OS;
+  bool isAArch64;
 
   struct RingEntry {
     uint8_t Mask;
     uint8_t Value;
+    uint8_t Length;
     bool (Decoder::*Routine)(const uint8_t *, unsigned &, unsigned, bool);
   };
   static const RingEntry Ring[];
+  static const RingEntry Ring64[];
 
   bool opcode_0xxxxxxx(const uint8_t *Opcodes, unsigned &Offset,
                        unsigned Length, bool Prologue);
@@ -75,6 +78,50 @@ class Decoder {
   bool opcode_11111111(const uint8_t *Opcodes, unsigned &Offset,
                        unsigned Length, bool Prologue);
 
+  // ARM64 unwind codes start here.
+  bool opcode_alloc_s(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                      bool Prologue);
+  bool opcode_save_r19r20_x(const uint8_t *Opcodes, unsigned &Offset,
+                            unsigned Length, bool Prologue);
+  bool opcode_save_fplr(const uint8_t *Opcodes, unsigned &Offset,
+                        unsigned Length, bool Prologue);
+  bool opcode_save_fplr_x(const uint8_t *Opcodes, unsigned &Offset,
+                          unsigned Length, bool Prologue);
+  bool opcode_alloc_m(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                      bool Prologue);
+  bool opcode_save_regp(const uint8_t *Opcodes, unsigned &Offset,
+                        unsigned Length, bool Prologue);
+  bool opcode_save_regp_x(const uint8_t *Opcodes, unsigned &Offset,
+                          unsigned Length, bool Prologue);
+  bool opcode_save_reg(const uint8_t *Opcodes, unsigned &Offset,
+                       unsigned Length, bool Prologue);
+  bool opcode_save_reg_x(const uint8_t *Opcodes, unsigned &Offset,
+                         unsigned Length, bool Prologue);
+  bool opcode_save_lrpair(const uint8_t *Opcodes, unsigned &Offset,
+                          unsigned Length, bool Prologue);
+  bool opcode_save_fregp(const uint8_t *Opcodes, unsigned &Offset,
+                         unsigned Length, bool Prologue);
+  bool opcode_save_fregp_x(const uint8_t *Opcodes, unsigned &Offset,
+                           unsigned Length, bool Prologue);
+  bool opcode_save_freg(const uint8_t *Opcodes, unsigned &Offset,
+                        unsigned Length, bool Prologue);
+  bool opcode_save_freg_x(const uint8_t *Opcodes, unsigned &Offset,
+                          unsigned Length, bool Prologue);
+  bool opcode_alloc_l(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                      bool Prologue);
+  bool opcode_setfp(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                    bool Prologue);
+  bool opcode_addfp(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                    bool Prologue);
+  bool opcode_nop(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                  bool Prologue);
+  bool opcode_end(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                  bool Prologue);
+  bool opcode_end_c(const uint8_t *Opcodes, unsigned &Offset, unsigned Length,
+                    bool Prologue);
+  bool opcode_save_next(const uint8_t *Opcodes, unsigned &Offset,
+                        unsigned Length, bool Prologue);
+
   void decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
                      bool Prologue);
 
@@ -107,7 +154,9 @@ class Decoder {
                          const object::SectionRef Section);
 
 public:
-  Decoder(ScopedPrinter &SW) : SW(SW), OS(SW.getOStream()) {}
+  Decoder(ScopedPrinter &SW, bool isAArch64) : SW(SW),
+                                               OS(SW.getOStream()),
+                                               isAArch64(isAArch64) {}
   std::error_code dumpProcedureData(const object::COFFObjectFile &COFF);
 };
 }
diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
index 0ed4ccd09f6f..3e2626dad118 100644
--- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -50,6 +50,7 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/Win64EH.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -78,7 +79,7 @@ public:
       : ObjDumper(Writer), Obj(Obj), Writer(Writer), Types(100) {}
 
   void printFileHeaders() override;
-  void printSections() override;
+  void printSectionHeaders() override;
   void printRelocations() override;
   void printSymbols() override;
   void printDynamicSymbols() override;
@@ -98,6 +99,7 @@ public:
   mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
                      llvm::codeview::MergingTypeTableBuilder &CVTypes) override;
   void printStackMap() const override;
+  void printAddrsig() override;
 private:
   void printSymbol(const SymbolRef &Sym);
   void printRelocation(const SectionRef &Section, const RelocationRef &Reloc,
@@ -177,6 +179,10 @@ private:
 
   DebugStringTableSubsectionRef CVStringTable;
 
+  /// Track the compilation CPU type. S_COMPILE3 symbol records typically come
+  /// first, but if we don't see one, just assume an X64 CPU type. It is common.
+  CPUType CompilationCPUType = CPUType::X64;
+
   ScopedPrinter &Writer;
   BinaryByteStream TypeContents;
   LazyRandomTypeCollection Types;
@@ -607,8 +613,7 @@ void COFFDumper::cacheRelocations() {
       RelocMap[Section].push_back(Reloc);
 
     // Sort relocations by address.
-    llvm::sort(RelocMap[Section].begin(), RelocMap[Section].end(),
-               relocAddressLess);
+    llvm::sort(RelocMap[Section], relocAddressLess);
   }
 }
 
@@ -749,7 +754,7 @@ void COFFDumper::printCOFFDebugDirectory() {
         W.printNumber("PDBAge", DebugInfo->PDB70.Age);
         W.printString("PDBFileName", PDBFileName);
       }
-    } else {
+    } else if (D.SizeOfData != 0) {
       // FIXME: Type values of 12 and 13 are commonly observed but are not in
       // the documented type enum.  Figure out what they mean.
       ArrayRef<uint8_t> RawData;
@@ -954,7 +959,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
   StringMap<StringRef> FunctionLineTables;
 
   ListScope D(W, "CodeViewDebugInfo");
-  // Print the section to allow correlation with printSections.
+  // Print the section to allow correlation with printSectionHeaders.
   W.printNumber("Section", SectionName, Obj->getSectionID(Section));
 
   uint32_t Magic;
@@ -1060,10 +1065,28 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
         W.printHex("LocalSize", FD.LocalSize);
         W.printHex("ParamsSize", FD.ParamsSize);
         W.printHex("MaxStackSize", FD.MaxStackSize);
-        W.printString("FrameFunc", FrameFunc);
         W.printHex("PrologSize", FD.PrologSize);
         W.printHex("SavedRegsSize", FD.SavedRegsSize);
         W.printFlags("Flags", FD.Flags, makeArrayRef(FrameDataFlags));
+
+        // The FrameFunc string is a small RPN program. It can be broken up into
+        // statements that end in the '=' operator, which assigns the value on
+        // the top of the stack to the previously pushed variable. Variables can
+        // be temporary values ($T0) or physical registers ($esp). Print each
+        // assignment on its own line to make these programs easier to read.
+        {
+          ListScope FFS(W, "FrameFunc");
+          while (!FrameFunc.empty()) {
+            size_t EqOrEnd = FrameFunc.find('=');
+            if (EqOrEnd == StringRef::npos)
+              EqOrEnd = FrameFunc.size();
+            else
+              ++EqOrEnd;
+            StringRef Stmt = FrameFunc.substr(0, EqOrEnd);
+            W.printString(Stmt);
+            FrameFunc = FrameFunc.drop_front(EqOrEnd).trim();
+          }
+        }
       }
       break;
     }
@@ -1130,7 +1153,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
   auto CODD = llvm::make_unique<COFFObjectDumpDelegate>(*this, Section, Obj,
                                                         SectionContents);
   CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD),
-                      opts::CodeViewSubsectionBytes);
+                      CompilationCPUType, opts::CodeViewSubsectionBytes);
   CVSymbolArray Symbols;
   BinaryStreamReader Reader(BinaryData, llvm::support::little);
   if (auto EC = Reader.readArray(Symbols, Reader.getLength())) {
@@ -1143,6 +1166,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
     W.flush();
     error(std::move(EC));
   }
+  CompilationCPUType = CVSD.getCompilationCPUType();
   W.flush();
 }
 
@@ -1224,7 +1248,9 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
         error(object_error::parse_failed);
       }
       SmallVector<TypeIndex, 128> SourceToDest;
-      if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types))
+      Optional<uint32_t> PCHSignature;
+      if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
+                                          PCHSignature))
         return error(std::move(EC));
     }
   }
@@ -1253,7 +1279,7 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
   W.flush();
 }
 
-void COFFDumper::printSections() {
+void COFFDumper::printSectionHeaders() {
   ListScope SectionsD(W, "Sections");
   int SectionNumber = 0;
   for (const SectionRef &Sec : Obj->sections()) {
@@ -1339,10 +1365,12 @@ void COFFDumper::printRelocation(const SectionRef &Section,
   StringRef SymbolName;
   Reloc.getTypeName(RelocName);
   symbol_iterator Symbol = Reloc.getSymbol();
+  int64_t SymbolIndex = -1;
   if (Symbol != Obj->symbol_end()) {
     Expected<StringRef> SymbolNameOrErr = Symbol->getName();
     error(errorToErrorCode(SymbolNameOrErr.takeError()));
     SymbolName = *SymbolNameOrErr;
+    SymbolIndex = Obj->getSymbolIndex(Obj->getCOFFSymbol(*Symbol));
   }
 
   if (opts::ExpandRelocs) {
@@ -1350,11 +1378,13 @@ void COFFDumper::printRelocation(const SectionRef &Section,
     W.printHex("Offset", Offset);
     W.printNumber("Type", RelocName, RelocType);
     W.printString("Symbol", SymbolName.empty() ? "-" : SymbolName);
+    W.printNumber("SymbolIndex", SymbolIndex);
   } else {
     raw_ostream& OS = W.startLine();
     OS << W.hex(Offset)
        << " " << RelocName
        << " " << (SymbolName.empty() ? "-" : SymbolName)
+       << " (" << SymbolIndex << ")"
        << "\n";
   }
 }
@@ -1525,8 +1555,10 @@ void COFFDumper::printUnwindInfo() {
     Dumper.printData(Ctx);
     break;
   }
+  case COFF::IMAGE_FILE_MACHINE_ARM64:
   case COFF::IMAGE_FILE_MACHINE_ARMNT: {
-    ARM::WinEH::Decoder Decoder(W);
+    ARM::WinEH::Decoder Decoder(W, Obj->getMachine() ==
+                                       COFF::IMAGE_FILE_MACHINE_ARM64);
     Decoder.dumpProcedureData(*Obj);
     break;
   }
@@ -1830,6 +1862,49 @@ void COFFDumper::printStackMap() const {
                         StackMapV2Parser<support::big>(StackMapContentsArray));
 }
 
+void COFFDumper::printAddrsig() {
+  object::SectionRef AddrsigSection;
+  for (auto Sec : Obj->sections()) {
+    StringRef Name;
+    Sec.getName(Name);
+    if (Name == ".llvm_addrsig") {
+      AddrsigSection = Sec;
+      break;
+    }
+  }
+
+  if (AddrsigSection == object::SectionRef())
+    return;
+
+  StringRef AddrsigContents;
+  AddrsigSection.getContents(AddrsigContents);
+  ArrayRef<uint8_t> AddrsigContentsArray(
+      reinterpret_cast<const uint8_t*>(AddrsigContents.data()),
+      AddrsigContents.size());
+
+  ListScope L(W, "Addrsig");
+  auto *Cur = reinterpret_cast<const uint8_t *>(AddrsigContents.begin());
+  auto *End = reinterpret_cast<const uint8_t *>(AddrsigContents.end());
+  while (Cur != End) {
+    unsigned Size;
+    const char *Err;
+    uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err);
+    if (Err)
+      reportError(Err);
+
+    Expected<COFFSymbolRef> Sym = Obj->getSymbol(SymIndex);
+    StringRef SymName;
+    std::error_code EC = errorToErrorCode(Sym.takeError());
+    if (EC || (EC = Obj->getSymbolName(*Sym, SymName))) {
+      SymName = "";
+      error(EC);
+    }
+
+    W.printNumber("Sym", SymName, SymIndex);
+    Cur += Size;
+  }
+}
+
 void llvm::dumpCodeViewMergedTypes(
     ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
     llvm::codeview::MergingTypeTableBuilder &CVTypes) {
diff --git a/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
index 5a1eef1d007d..d91d764c4d0a 100644
--- a/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -16,6 +16,7 @@
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/Debug.h"
@@ -31,15 +32,15 @@ namespace DwarfCFIEH {
 template <typename ELFT>
 class PrinterContext {
   ScopedPrinter &W;
-  const object::ELFFile<ELFT> *Obj;
+  const object::ELFObjectFile<ELFT> *ObjF;
 
   void printEHFrameHdr(uint64_t Offset, uint64_t Address, uint64_t Size) const;
 
   void printEHFrame(const typename ELFT::Shdr *EHFrameShdr) const;
 
 public:
-  PrinterContext(ScopedPrinter &W, const object::ELFFile<ELFT> *Obj)
-      : W(W), Obj(Obj) {}
+  PrinterContext(ScopedPrinter &W, const object::ELFObjectFile<ELFT> *ObjF)
+      : W(W), ObjF(ObjF) {}
 
   void printUnwindInformation() const;
 };
@@ -59,6 +60,7 @@ static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj,
 
 template <typename ELFT>
 void PrinterContext<ELFT>::printUnwindInformation() const {
+  const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
   const typename ELFT::Phdr *EHFramePhdr = nullptr;
 
   auto PHs = Obj->program_headers();
@@ -101,6 +103,7 @@ void PrinterContext<ELFT>::printEHFrameHdr(uint64_t EHFrameHdrOffset,
   W.startLine() << format("Offset: 0x%" PRIx64 "\n", EHFrameHdrOffset);
   W.startLine() << format("Size: 0x%" PRIx64 "\n", EHFrameHdrSize);
 
+  const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
   const auto *EHFrameHdrShdr = findSectionByAddress(Obj, EHFrameHdrAddress);
   if (EHFrameHdrShdr) {
     auto SectionName = Obj->getSectionName(EHFrameHdrShdr);
@@ -173,6 +176,7 @@ void PrinterContext<ELFT>::printEHFrame(
                           ShOffset, Address);
   W.indent();
 
+  const object::ELFFile<ELFT> *Obj = ObjF->getELFFile();
   auto Result = Obj->getSectionContents(EHFrameShdr);
   if (Error E = Result.takeError())
     reportError(toString(std::move(E)));
@@ -183,7 +187,8 @@ void PrinterContext<ELFT>::printEHFrame(
                 Contents.size()),
       ELFT::TargetEndianness == support::endianness::little,
       ELFT::Is64Bits ? 8 : 4);
-  DWARFDebugFrame EHFrame(/*IsEH=*/true, /*EHFrameAddress=*/Address);
+  DWARFDebugFrame EHFrame(Triple::ArchType(ObjF->getArch()), /*IsEH=*/true,
+                          /*EHFrameAddress=*/Address);
   EHFrame.parse(DE);
 
   for (const auto &Entry : EHFrame) {
diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
index 645ec2d7e04b..93254717e921 100644
--- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -22,12 +22,13 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFObjectFile.h"
@@ -43,6 +44,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
@@ -139,10 +141,10 @@ struct DynRegionInfo {
 template<typename ELFT>
 class ELFDumper : public ObjDumper {
 public:
-  ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer);
+  ELFDumper(const object::ELFObjectFile<ELFT> *ObjF, ScopedPrinter &Writer);
 
   void printFileHeaders() override;
-  void printSections() override;
+  void printSectionHeaders() override;
   void printRelocations() override;
   void printDynamicRelocations() override;
   void printSymbols() override;
@@ -181,6 +183,7 @@ private:
   TYPEDEF_ELF_TYPES(ELFT)
 
   DynRegionInfo checkDRI(DynRegionInfo DRI) {
+    const ELFFile<ELFT> *Obj = ObjF->getELFFile();
     if (DRI.Addr < Obj->base() ||
         (const uint8_t *)DRI.Addr + DRI.Size > Obj->base() + Obj->getBufSize())
       error(llvm::object::object_error::parse_failed);
@@ -188,11 +191,11 @@ private:
   }
 
   DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) {
-    return checkDRI({Obj->base() + P->p_offset, P->p_filesz, EntSize});
+    return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
   }
 
   DynRegionInfo createDRIFrom(const Elf_Shdr *S) {
-    return checkDRI({Obj->base() + S->sh_offset, S->sh_size, S->sh_entsize});
+    return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
   }
 
   void parseDynamicTable(ArrayRef<const Elf_Phdr *> LoadSegments);
@@ -206,7 +209,7 @@ private:
   void LoadVersionNeeds(const Elf_Shdr *ec) const;
   void LoadVersionDefs(const Elf_Shdr *sec) const;
 
-  const ELFO *Obj;
+  const object::ELFObjectFile<ELFT> *ObjF;
   DynRegionInfo DynRelRegion;
   DynRegionInfo DynRelaRegion;
   DynRegionInfo DynRelrRegion;
@@ -289,6 +292,7 @@ void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
   StringRef StrTable, SymtabName;
   size_t Entries = 0;
   Elf_Sym_Range Syms(nullptr, nullptr);
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   if (IsDynamic) {
     StrTable = DynamicStringTable;
     Syms = dynamic_symbols();
@@ -323,7 +327,7 @@ public:
   virtual void printFileHeaders(const ELFFile<ELFT> *Obj) = 0;
   virtual void printGroupSections(const ELFFile<ELFT> *Obj) = 0;
   virtual void printRelocations(const ELFFile<ELFT> *Obj) = 0;
-  virtual void printSections(const ELFFile<ELFT> *Obj) = 0;
+  virtual void printSectionHeaders(const ELFFile<ELFT> *Obj) = 0;
   virtual void printSymbols(const ELFFile<ELFT> *Obj) = 0;
   virtual void printDynamicSymbols(const ELFFile<ELFT> *Obj) = 0;
   virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
@@ -358,7 +362,7 @@ public:
   void printFileHeaders(const ELFO *Obj) override;
   void printGroupSections(const ELFFile<ELFT> *Obj) override;
   void printRelocations(const ELFO *Obj) override;
-  void printSections(const ELFO *Obj) override;
+  void printSectionHeaders(const ELFO *Obj) override;
   void printSymbols(const ELFO *Obj) override;
   void printDynamicSymbols(const ELFO *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
@@ -390,6 +394,33 @@ private:
     return to_hexString(Value, false);
   }
 
+  template <typename T, typename TEnum>
+  std::string printFlags(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues,
+                         TEnum EnumMask1 = {}, TEnum EnumMask2 = {},
+                         TEnum EnumMask3 = {}) {
+    std::string Str;
+    for (const auto &Flag : EnumValues) {
+      if (Flag.Value == 0)
+        continue;
+
+      TEnum EnumMask{};
+      if (Flag.Value & EnumMask1)
+        EnumMask = EnumMask1;
+      else if (Flag.Value & EnumMask2)
+        EnumMask = EnumMask2;
+      else if (Flag.Value & EnumMask3)
+        EnumMask = EnumMask3;
+      bool IsEnum = (Flag.Value & EnumMask) != 0;
+      if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
+          (IsEnum && (Value & EnumMask) == Flag.Value)) {
+        if (!Str.empty())
+          Str += ", ";
+        Str += Flag.AltName;
+      }
+    }
+    return Str;
+  }
+
   formatted_raw_ostream &printField(struct Field F) {
     if (F.Column != 0)
       OS.PadToColumn(F.Column);
@@ -424,7 +455,7 @@ public:
   void printGroupSections(const ELFFile<ELFT> *Obj) override;
   void printRelocations(const ELFO *Obj) override;
   void printRelocations(const Elf_Shdr *Sec, const ELFO *Obj);
-  void printSections(const ELFO *Obj) override;
+  void printSectionHeaders(const ELFO *Obj) override;
   void printSymbols(const ELFO *Obj) override;
   void printDynamicSymbols(const ELFO *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
@@ -451,7 +482,7 @@ private:
 namespace llvm {
 
 template <class ELFT>
-static std::error_code createELFDumper(const ELFFile<ELFT> *Obj,
+static std::error_code createELFDumper(const ELFObjectFile<ELFT> *Obj,
                                        ScopedPrinter &Writer,
                                        std::unique_ptr<ObjDumper> &Result) {
   Result.reset(new ELFDumper<ELFT>(Obj, Writer));
@@ -463,19 +494,19 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
                                 std::unique_ptr<ObjDumper> &Result) {
   // Little-endian 32-bit
   if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
-    return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+    return createELFDumper(ELFObj, Writer, Result);
 
   // Big-endian 32-bit
   if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
-    return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+    return createELFDumper(ELFObj, Writer, Result);
 
   // Little-endian 64-bit
   if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
-    return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+    return createELFDumper(ELFObj, Writer, Result);
 
   // Big-endian 64-bit
   if (const ELF64BEObjectFile *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
-    return createELFDumper(ELFObj->getELFFile(), Writer, Result);
+    return createELFDumper(ELFObj, Writer, Result);
 
   return readobj_error::unsupported_obj_file_format;
 }
@@ -488,7 +519,7 @@ template <class ELFT>
 void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
   unsigned vn_size = sec->sh_size;  // Size of section in bytes
   unsigned vn_count = sec->sh_info; // Number of Verneed entries
-  const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+  const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
   const char *sec_end = sec_start + vn_size;
   // The first Verneed entry is at the start of the section.
   const char *p = sec_start;
@@ -522,7 +553,7 @@ template <class ELFT>
 void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
   unsigned vd_size = sec->sh_size;  // Size of section in bytes
   unsigned vd_count = sec->sh_info; // Number of Verdef entries
-  const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+  const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
   const char *sec_end = sec_start + vd_size;
   // The first Verdef entry is at the start of the section.
   const char *p = sec_start;
@@ -547,7 +578,7 @@ template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
     return;
 
   // Has the VersionMap already been loaded?
-  if (VersionMap.size() > 0)
+  if (!VersionMap.empty())
     return;
 
   // The first two version indexes are reserved.
@@ -611,9 +642,12 @@ static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
   // is determined by DT_VERDEFNUM tag.
   unsigned VerDefsNum = 0;
   for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
-    if (Dyn.d_tag == DT_VERDEFNUM)
+    if (Dyn.d_tag == DT_VERDEFNUM) {
       VerDefsNum = Dyn.d_un.d_val;
+      break;
+    }
   }
+
   const uint8_t *SecStartAddress =
       (const uint8_t *)Obj->base() + Sec->sh_offset;
   const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
@@ -664,9 +698,12 @@ static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
     return;
 
   unsigned VerNeedNum = 0;
-  for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table())
-    if (Dyn.d_tag == DT_VERNEEDNUM)
+  for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
+    if (Dyn.d_tag == DT_VERNEEDNUM) {
       VerNeedNum = Dyn.d_un.d_val;
+      break;
+    }
+  }
 
   const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset;
   const typename ELFO::Elf_Shdr *StrTab =
@@ -700,13 +737,13 @@ static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
 
 template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
   // Dump version symbol section.
-  printVersionSymbolSection(this, Obj, dot_gnu_version_sec, W);
+  printVersionSymbolSection(this, ObjF->getELFFile(), dot_gnu_version_sec, W);
 
   // Dump version definition section.
-  printVersionDefinitionSection(this, Obj, dot_gnu_version_d_sec, W);
+  printVersionDefinitionSection(this, ObjF->getELFFile(), dot_gnu_version_d_sec, W);
 
   // Dump version dependency section.
-  printVersionDependencySection(this, Obj, dot_gnu_version_r_sec, W);
+  printVersionDependencySection(this, ObjF->getELFFile(), dot_gnu_version_r_sec, W);
 }
 
 template <typename ELFT>
@@ -727,7 +764,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
 
   // Get the corresponding version index entry
   const Elf_Versym *vs = unwrapOrError(
-      Obj->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
+      ObjF->getELFFile()->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
   size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
 
   // Special markers for unversioned symbols.
@@ -760,6 +797,7 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
 
 template <typename ELFT>
 StringRef ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
   Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
   if (Index >= Syms.size())
@@ -780,8 +818,10 @@ std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
 
   bool IsDefault;
   StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
-  FullSymbolName += (IsDefault ? "@@" : "@");
-  FullSymbolName += Version;
+  if (!Version.empty()) {
+    FullSymbolName += (IsDefault ? "@@" : "@");
+    FullSymbolName += Version;
+  }
   return FullSymbolName;
 }
 
@@ -807,6 +847,7 @@ void ELFDumper<ELFT>::getSectionNameIndex(const Elf_Sym *Symbol,
     if (SectionIndex == SHN_XINDEX)
       SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
           Symbol, FirstSym, ShndxTable));
+    const ELFFile<ELFT> *Obj = ObjF->getELFFile();
     const typename ELFT::Shdr *Sec =
         unwrapOrError(Obj->getSection(SectionIndex));
     SectionName = unwrapOrError(Obj->getSectionName(Sec));
@@ -1167,6 +1208,7 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
     switch (Type) {
     LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
     }
+    break;
   case ELF::EM_MIPS:
   case ELF::EM_MIPS_RS3_LE:
     switch (Type) {
@@ -1175,6 +1217,7 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
     LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
     }
+    break;
   }
 
   switch (Type) {
@@ -1221,7 +1264,7 @@ static std::string getElfPtType(unsigned Arch, unsigned Type) {
     case ELF::EM_ARM:
       if (Type == ELF::PT_ARM_EXIDX)
         return "EXIDX";
-      return "";
+      break;
     case ELF::EM_MIPS:
     case ELF::EM_MIPS_RS3_LE:
       switch (Type) {
@@ -1234,7 +1277,7 @@ static std::string getElfPtType(unsigned Arch, unsigned Type) {
       case PT_MIPS_ABIFLAGS:
         return "ABIFLAGS";
       }
-      return "";
+      break;
     }
   }
   return std::string("<unknown>: ") + to_string(format_hex(Type, 1));
@@ -1247,49 +1290,49 @@ static const EnumEntry<unsigned> ElfSegmentFlags[] = {
 };
 
 static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_NOREORDER),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_PIC),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_CPIC),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI2),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_32BITMODE),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_FP64),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_NAN2008),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_O32),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_O64),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_EABI32),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ABI_EABI64),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_3900),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4010),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4100),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4650),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4120),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_4111),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_SB1),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_OCTEON),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_XLR),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_OCTEON2),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_OCTEON3),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_5400),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_5900),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_5500),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_9000),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_LS2E),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_LS2F),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MACH_LS3A),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_MICROMIPS),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_ASE_M16),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_ASE_MDMX),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_1),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_2),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_3),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_4),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_5),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_32),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_32R2),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64R2),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_32R6),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64R6)
+  ENUM_ENT(EF_MIPS_NOREORDER, "noreorder"),
+  ENUM_ENT(EF_MIPS_PIC, "pic"),
+  ENUM_ENT(EF_MIPS_CPIC, "cpic"),
+  ENUM_ENT(EF_MIPS_ABI2, "abi2"),
+  ENUM_ENT(EF_MIPS_32BITMODE, "32bitmode"),
+  ENUM_ENT(EF_MIPS_FP64, "fp64"),
+  ENUM_ENT(EF_MIPS_NAN2008, "nan2008"),
+  ENUM_ENT(EF_MIPS_ABI_O32, "o32"),
+  ENUM_ENT(EF_MIPS_ABI_O64, "o64"),
+  ENUM_ENT(EF_MIPS_ABI_EABI32, "eabi32"),
+  ENUM_ENT(EF_MIPS_ABI_EABI64, "eabi64"),
+  ENUM_ENT(EF_MIPS_MACH_3900, "3900"),
+  ENUM_ENT(EF_MIPS_MACH_4010, "4010"),
+  ENUM_ENT(EF_MIPS_MACH_4100, "4100"),
+  ENUM_ENT(EF_MIPS_MACH_4650, "4650"),
+  ENUM_ENT(EF_MIPS_MACH_4120, "4120"),
+  ENUM_ENT(EF_MIPS_MACH_4111, "4111"),
+  ENUM_ENT(EF_MIPS_MACH_SB1, "sb1"),
+  ENUM_ENT(EF_MIPS_MACH_OCTEON, "octeon"),
+  ENUM_ENT(EF_MIPS_MACH_XLR, "xlr"),
+  ENUM_ENT(EF_MIPS_MACH_OCTEON2, "octeon2"),
+  ENUM_ENT(EF_MIPS_MACH_OCTEON3, "octeon3"),
+  ENUM_ENT(EF_MIPS_MACH_5400, "5400"),
+  ENUM_ENT(EF_MIPS_MACH_5900, "5900"),
+  ENUM_ENT(EF_MIPS_MACH_5500, "5500"),
+  ENUM_ENT(EF_MIPS_MACH_9000, "9000"),
+  ENUM_ENT(EF_MIPS_MACH_LS2E, "loongson-2e"),
+  ENUM_ENT(EF_MIPS_MACH_LS2F, "loongson-2f"),
+  ENUM_ENT(EF_MIPS_MACH_LS3A, "loongson-3a"),
+  ENUM_ENT(EF_MIPS_MICROMIPS, "micromips"),
+  ENUM_ENT(EF_MIPS_ARCH_ASE_M16, "mips16"),
+  ENUM_ENT(EF_MIPS_ARCH_ASE_MDMX, "mdmx"),
+  ENUM_ENT(EF_MIPS_ARCH_1, "mips1"),
+  ENUM_ENT(EF_MIPS_ARCH_2, "mips2"),
+  ENUM_ENT(EF_MIPS_ARCH_3, "mips3"),
+  ENUM_ENT(EF_MIPS_ARCH_4, "mips4"),
+  ENUM_ENT(EF_MIPS_ARCH_5, "mips5"),
+  ENUM_ENT(EF_MIPS_ARCH_32, "mips32"),
+  ENUM_ENT(EF_MIPS_ARCH_64, "mips64"),
+  ENUM_ENT(EF_MIPS_ARCH_32R2, "mips32r2"),
+  ENUM_ENT(EF_MIPS_ARCH_64R2, "mips64r2"),
+  ENUM_ENT(EF_MIPS_ARCH_32R6, "mips32r6"),
+  ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
 };
 
 static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
@@ -1325,15 +1368,17 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK)
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
 };
 
 static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_RVC),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_FLOAT_ABI_SINGLE),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_FLOAT_ABI_DOUBLE),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_FLOAT_ABI_QUAD),
-  LLVM_READOBJ_ENUM_ENT(ELF, EF_RISCV_RVE)
+  ENUM_ENT(EF_RISCV_RVC, "RVC"),
+  ENUM_ENT(EF_RISCV_FLOAT_ABI_SINGLE, "single-float ABI"),
+  ENUM_ENT(EF_RISCV_FLOAT_ABI_DOUBLE, "double-float ABI"),
+  ENUM_ENT(EF_RISCV_FLOAT_ABI_QUAD, "quad-float ABI"),
+  ENUM_ENT(EF_RISCV_RVE, "RVE")
 };
 
 static const EnumEntry<unsigned> ElfSymOtherFlags[] = {
@@ -1375,9 +1420,11 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
 }
 
 template <typename ELFT>
-ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
-    : ObjDumper(Writer), Obj(Obj) {
+ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
+    ScopedPrinter &Writer)
+    : ObjDumper(Writer), ObjF(ObjF) {
   SmallVector<const Elf_Phdr *, 4> LoadSegments;
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     if (Phdr.p_type == ELF::PT_DYNAMIC) {
       DynamicTable = createDRIFrom(&Phdr, sizeof(Elf_Dyn));
@@ -1423,7 +1470,7 @@ ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
       break;
     case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
       if (DotCGProfileSec != nullptr)
-        reportError("Multiple .note.llvm.cgprofile");
+        reportError("Multiple .llvm.call-graph-profile");
       DotCGProfileSec = &Sec;
       break;
     case ELF::SHT_LLVM_ADDRSIG:
@@ -1446,19 +1493,10 @@ template <typename ELFT>
 void ELFDumper<ELFT>::parseDynamicTable(
     ArrayRef<const Elf_Phdr *> LoadSegments) {
   auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
-    const Elf_Phdr *const *I =
-        std::upper_bound(LoadSegments.begin(), LoadSegments.end(), VAddr,
-                         [](uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
-                           return VAddr < Phdr->p_vaddr;
-                         });
-    if (I == LoadSegments.begin())
-      report_fatal_error("Virtual address is not in any segment");
-    --I;
-    const Elf_Phdr &Phdr = **I;
-    uint64_t Delta = VAddr - Phdr.p_vaddr;
-    if (Delta >= Phdr.p_filesz)
-      report_fatal_error("Virtual address is not in any segment");
-    return Obj->base() + Phdr.p_offset + Delta;
+    auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
+    if (!MappedAddrOrError)
+      report_fatal_error(MappedAddrOrError.takeError());
+    return MappedAddrOrError.get();
   };
 
   uint64_t SONameOffset = 0;
@@ -1557,51 +1595,51 @@ typename ELFDumper<ELFT>::Elf_Relr_Range ELFDumper<ELFT>::dyn_relrs() const {
 
 template<class ELFT>
 void ELFDumper<ELFT>::printFileHeaders() {
-  ELFDumperStyle->printFileHeaders(Obj);
+  ELFDumperStyle->printFileHeaders(ObjF->getELFFile());
 }
 
 template<class ELFT>
-void ELFDumper<ELFT>::printSections() {
-  ELFDumperStyle->printSections(Obj);
+void ELFDumper<ELFT>::printSectionHeaders() {
+  ELFDumperStyle->printSectionHeaders(ObjF->getELFFile());
 }
 
 template<class ELFT>
 void ELFDumper<ELFT>::printRelocations() {
-  ELFDumperStyle->printRelocations(Obj);
+  ELFDumperStyle->printRelocations(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printProgramHeaders() {
-  ELFDumperStyle->printProgramHeaders(Obj);
+  ELFDumperStyle->printProgramHeaders(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
-  ELFDumperStyle->printDynamicRelocations(Obj);
+  ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile());
 }
 
 template<class ELFT>
 void ELFDumper<ELFT>::printSymbols() {
-  ELFDumperStyle->printSymbols(Obj);
+  ELFDumperStyle->printSymbols(ObjF->getELFFile());
 }
 
 template<class ELFT>
 void ELFDumper<ELFT>::printDynamicSymbols() {
-  ELFDumperStyle->printDynamicSymbols(Obj);
+  ELFDumperStyle->printDynamicSymbols(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printHashHistogram() {
-  ELFDumperStyle->printHashHistogram(Obj);
+  ELFDumperStyle->printHashHistogram(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printCGProfile() {
-  ELFDumperStyle->printCGProfile(Obj);
+  ELFDumperStyle->printCGProfile(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printNotes() {
-  ELFDumperStyle->printNotes(Obj);
+  ELFDumperStyle->printNotes(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
-  ELFDumperStyle->printELFLinkerOptions(Obj);
+  ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
 }
 
 static const char *getTypeString(unsigned Arch, uint64_t Type) {
@@ -1610,29 +1648,32 @@ static const char *getTypeString(unsigned Arch, uint64_t Type) {
   case EM_HEXAGON:
     switch (Type) {
 #define HEXAGON_DYNAMIC_TAG(name, value)                                       \
-  case DT_##name:                                                              \
-    return #name;
+    case DT_##name:                                                            \
+      return #name;
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef HEXAGON_DYNAMIC_TAG
     }
+    break;
 
   case EM_MIPS:
     switch (Type) {
 #define MIPS_DYNAMIC_TAG(name, value)                                          \
-  case DT_##name:                                                              \
-    return #name;
+    case DT_##name:                                                            \
+      return #name;
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef MIPS_DYNAMIC_TAG
     }
+    break;
 
-    case EM_PPC64:
-      switch(Type) {
+  case EM_PPC64:
+    switch(Type) {
 #define PPC64_DYNAMIC_TAG(name, value)                                         \
     case DT_##name:                                                            \
       return #name;
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef PPC64_DYNAMIC_TAG
     }
+    break;
   }
 #undef DYNAMIC_TAG
   switch (Type) {
@@ -1842,9 +1883,9 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
 
 template<class ELFT>
 void ELFDumper<ELFT>::printUnwindInfo() {
-  const unsigned Machine = Obj->getHeader()->e_machine;
+  const unsigned Machine = ObjF->getELFFile()->getHeader()->e_machine;
   if (Machine == EM_386 || Machine == EM_X86_64) {
-    DwarfCFIEH::PrinterContext<ELFT> Ctx(W, Obj);
+    DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
     return Ctx.printUnwindInformation();
   }
   W.startLine() << "UnwindInfo not implemented.\n";
@@ -1853,6 +1894,7 @@ void ELFDumper<ELFT>::printUnwindInfo() {
 namespace {
 
 template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
+  const ELFFile<ELF32LE> *Obj = ObjF->getELFFile();
   const unsigned Machine = Obj->getHeader()->e_machine;
   if (Machine == EM_ARM) {
     ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, DotSymtabSec);
@@ -1895,7 +1937,7 @@ void ELFDumper<ELFT>::printDynamicTable() {
     uintX_t Tag = Entry.getTag();
     ++I;
     W.startLine() << "  " << format_hex(Tag, Is64 ? 18 : 10, opts::Output != opts::GNU) << " "
-                  << format("%-21s", getTypeString(Obj->getHeader()->e_machine, Tag));
+                  << format("%-21s", getTypeString(ObjF->getELFFile()->getHeader()->e_machine, Tag));
     printValue(Tag, Entry.getVal());
     OS << "\n";
   }
@@ -1962,6 +2004,7 @@ void ELFDumper<ELFT>::printAttributes() {
 namespace {
 
 template <> void ELFDumper<ELF32LE>::printAttributes() {
+  const ELFFile<ELF32LE> *Obj = ObjF->getELFFile();
   if (Obj->getHeader()->e_machine != EM_ARM) {
     W.startLine() << "Attributes not implemented.\n";
     return;
@@ -2247,6 +2290,7 @@ MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   if (Obj->getHeader()->e_machine != EM_MIPS)
     reportError("MIPS PLT GOT is available for MIPS targets only");
 
@@ -2331,6 +2375,7 @@ static int getMipsRegisterSize(uint8_t Flag) {
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printMipsABIFlags() {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.abiflags");
   if (!Shdr) {
     W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
@@ -2376,6 +2421,7 @@ static void printMipsReginfoData(ScopedPrinter &W,
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   const Elf_Shdr *Shdr = findSectionByName(*Obj, ".reginfo");
   if (!Shdr) {
     W.startLine() << "There is no .reginfo section in the file.\n";
@@ -2393,6 +2439,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.options");
   if (!Shdr) {
     W.startLine() << "There is no .MIPS.options section in the file.\n";
@@ -2422,6 +2469,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
   const Elf_Shdr *StackMapSection = nullptr;
   for (const auto &Sec : unwrapOrError(Obj->sections())) {
     StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
@@ -2442,11 +2490,11 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printGroupSections() {
-  ELFDumperStyle->printGroupSections(Obj);
+  ELFDumperStyle->printGroupSections(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printAddrsig() {
-  ELFDumperStyle->printAddrsig(Obj);
+  ELFDumperStyle->printAddrsig(ObjF->getELFFile());
 }
 
 static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
@@ -2517,7 +2565,17 @@ template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
   printFields(OS, "Start of program headers:", Str);
   Str = to_string(e->e_shoff) + " (bytes into file)";
   printFields(OS, "Start of section headers:", Str);
+  std::string ElfFlags;
+  if (e->e_machine == EM_MIPS)
+    ElfFlags =
+        printFlags(e->e_flags, makeArrayRef(ElfHeaderMipsFlags),
+                   unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
+                   unsigned(ELF::EF_MIPS_MACH));
+  else if (e->e_machine == EM_RISCV)
+    ElfFlags = printFlags(e->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
   Str = "0x" + to_hexString(e->e_flags);
+  if (!ElfFlags.empty())
+    Str = Str + ", " + ElfFlags;
   printFields(OS, "Flags:", Str);
   Str = to_string(e->e_ehsize) + " (bytes)";
   printFields(OS, "Size of this header:", Str);
@@ -2791,11 +2849,13 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
     case SHT_ARM_OVERLAYSECTION:
       return "ARM_OVERLAYSECTION";
     }
+    break;
   case EM_X86_64:
     switch (Type) {
     case SHT_X86_64_UNWIND:
       return "X86_64_UNWIND";
     }
+    break;
   case EM_MIPS:
   case EM_MIPS_RS3_LE:
     switch (Type) {
@@ -2808,6 +2868,7 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
     case SHT_MIPS_DWARF:
       return "SHT_MIPS_DWARF";
     }
+    break;
   }
   switch (Type) {
   case SHT_NULL:
@@ -2872,7 +2933,8 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
   return "";
 }
 
-template <class ELFT> void GNUStyle<ELFT>::printSections(const ELFO *Obj) {
+template <class ELFT>
+void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
   size_t SectionIndex = 0;
   std::string Number, Type, Size, Address, Offset, Flags, Link, Info, EntrySize,
       Alignment;
@@ -3583,7 +3645,7 @@ static std::string getFreeBSDNoteTypeName(const uint32_t NT) {
   return OS.str();
 }
 
-static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
+static std::string getAMDNoteTypeName(const uint32_t NT) {
   static const struct {
     uint32_t ID;
     const char *Name;
@@ -3606,41 +3668,52 @@ static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
   return OS.str();
 }
 
+static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
+  if (NT == ELF::NT_AMDGPU_METADATA)
+    return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)");
+
+  std::string string;
+  raw_string_ostream OS(string);
+  OS << format("Unknown note type (0x%08x)", NT);
+  return OS.str();
+}
+
 template <typename ELFT>
-static void printGNUProperty(raw_ostream &OS, uint32_t Type, uint32_t DataSize,
-                             ArrayRef<uint8_t> Data) {
+static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
+                                  ArrayRef<uint8_t> Data) {
+  std::string str;
+  raw_string_ostream OS(str);
   switch (Type) {
   default:
-    OS << format("    <application-specific type 0x%x>\n", Type);
-    return;
+    OS << format("<application-specific type 0x%x>", Type);
+    return OS.str();
   case GNU_PROPERTY_STACK_SIZE: {
-    OS << "    stack size: ";
+    OS << "stack size: ";
     if (DataSize == sizeof(typename ELFT::uint))
-      OS << format("0x%llx\n",
-                   (uint64_t)(*(const typename ELFT::Addr *)Data.data()));
+      OS << formatv("{0:x}",
+                    (uint64_t)(*(const typename ELFT::Addr *)Data.data()));
     else
-      OS << format("<corrupt length: 0x%x>\n", DataSize);
-    break;
+      OS << format("<corrupt length: 0x%x>", DataSize);
+    return OS.str();
   }
   case GNU_PROPERTY_NO_COPY_ON_PROTECTED:
-    OS << "    no copy on protected";
+    OS << "no copy on protected";
     if (DataSize)
       OS << format(" <corrupt length: 0x%x>", DataSize);
-    OS << "\n";
-    break;
+    return OS.str();
   case GNU_PROPERTY_X86_FEATURE_1_AND:
-    OS << "    X86 features: ";
+    OS << "X86 features: ";
     if (DataSize != 4 && DataSize != 8) {
-      OS << format("<corrupt length: 0x%x>\n", DataSize);
-      break;
+      OS << format("<corrupt length: 0x%x>", DataSize);
+      return OS.str();
     }
     uint64_t CFProtection =
         (DataSize == 4)
             ? support::endian::read32<ELFT::TargetEndianness>(Data.data())
             : support::endian::read64<ELFT::TargetEndianness>(Data.data());
     if (CFProtection == 0) {
-      OS << "none\n";
-      break;
+      OS << "none";
+      return OS.str();
     }
     if (CFProtection & GNU_PROPERTY_X86_FEATURE_1_IBT) {
       OS << "IBT";
@@ -3656,105 +3729,177 @@ static void printGNUProperty(raw_ostream &OS, uint32_t Type, uint32_t DataSize,
     }
     if (CFProtection)
       OS << format("<unknown flags: 0x%llx>", CFProtection);
-    OS << "\n";
-    break;
+    return OS.str();
   }
 }
 
 template <typename ELFT>
-static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
-                         ArrayRef<typename ELFT::Word> Words, size_t Size) {
+static SmallVector<std::string, 4>
+getGNUPropertyList(ArrayRef<uint8_t> Arr) {
   using Elf_Word = typename ELFT::Word;
 
+  SmallVector<std::string, 4> Properties;
+  while (Arr.size() >= 8) {
+    uint32_t Type = *reinterpret_cast<const Elf_Word *>(Arr.data());
+    uint32_t DataSize = *reinterpret_cast<const Elf_Word *>(Arr.data() + 4);
+    Arr = Arr.drop_front(8);
+
+    // Take padding size into account if present.
+    uint64_t PaddedSize = alignTo(DataSize, sizeof(typename ELFT::uint));
+    std::string str;
+    raw_string_ostream OS(str);
+    if (Arr.size() < PaddedSize) {
+      OS << format("<corrupt type (0x%x) datasz: 0x%x>", Type, DataSize);
+      Properties.push_back(OS.str());
+      break;
+    }
+    Properties.push_back(
+        getGNUProperty<ELFT>(Type, DataSize, Arr.take_front(PaddedSize)));
+    Arr = Arr.drop_front(PaddedSize);
+  }
+
+  if (!Arr.empty())
+    Properties.push_back("<corrupted GNU_PROPERTY_TYPE_0>");
+
+  return Properties;
+}
+
+struct GNUAbiTag {
+  std::string OSName;
+  std::string ABI;
+  bool IsValid;
+};
+
+template <typename ELFT>
+static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
+  typedef typename ELFT::Word Elf_Word;
+
+  ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word*>(Desc.begin()),
+                           reinterpret_cast<const Elf_Word*>(Desc.end()));
+
+  if (Words.size() < 4)
+    return {"", "", /*IsValid=*/false};
+
+  static const char *OSNames[] = {
+      "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
+  };
+  StringRef OSName = "Unknown";
+  if (Words[0] < array_lengthof(OSNames))
+    OSName = OSNames[Words[0]];
+  uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
+  std::string str;
+  raw_string_ostream ABI(str);
+  ABI << Major << "." << Minor << "." << Patch;
+  return {OSName, ABI.str(), /*IsValid=*/true};
+}
+
+static std::string getGNUBuildId(ArrayRef<uint8_t> Desc) {
+  std::string str;
+  raw_string_ostream OS(str);
+  for (const auto &B : Desc)
+    OS << format_hex_no_prefix(B, 2);
+  return OS.str();
+}
+
+static StringRef getGNUGoldVersion(ArrayRef<uint8_t> Desc) {
+  return StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+}
+
+template <typename ELFT>
+static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
+                         ArrayRef<uint8_t> Desc) {
   switch (NoteType) {
   default:
     return;
   case ELF::NT_GNU_ABI_TAG: {
-    static const char *OSNames[] = {
-        "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
-    };
-
-    StringRef OSName = "Unknown";
-    if (Words[0] < array_lengthof(OSNames))
-      OSName = OSNames[Words[0]];
-    uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
-
-    if (Words.size() < 4)
+    const GNUAbiTag &AbiTag = getGNUAbiTag<ELFT>(Desc);
+    if (!AbiTag.IsValid)
       OS << "    <corrupt GNU_ABI_TAG>";
     else
-      OS << "    OS: " << OSName << ", ABI: " << Major << "." << Minor << "."
-         << Patch;
+      OS << "    OS: " << AbiTag.OSName << ", ABI: " << AbiTag.ABI;
     break;
   }
   case ELF::NT_GNU_BUILD_ID: {
-    OS << "    Build ID: ";
-    ArrayRef<uint8_t> ID(reinterpret_cast<const uint8_t *>(Words.data()), Size);
-    for (const auto &B : ID)
-      OS << format_hex_no_prefix(B, 2);
+    OS << "    Build ID: " << getGNUBuildId(Desc);
     break;
   }
   case ELF::NT_GNU_GOLD_VERSION:
-    OS << "    Version: "
-       << StringRef(reinterpret_cast<const char *>(Words.data()), Size);
+    OS << "    Version: " << getGNUGoldVersion(Desc);
     break;
   case ELF::NT_GNU_PROPERTY_TYPE_0:
     OS << "    Properties:";
-
-    ArrayRef<uint8_t> Arr(reinterpret_cast<const uint8_t *>(Words.data()),
-                          Size);
-    while (Arr.size() >= 8) {
-      uint32_t Type = *reinterpret_cast<const Elf_Word *>(Arr.data());
-      uint32_t DataSize = *reinterpret_cast<const Elf_Word *>(Arr.data() + 4);
-      Arr = Arr.drop_front(8);
-
-      // Take padding size into account if present.
-      uint64_t PaddedSize = alignTo(DataSize, sizeof(typename ELFT::uint));
-      if (Arr.size() < PaddedSize) {
-        OS << format("    <corrupt type (0x%x) datasz: 0x%x>\n", Type,
-                     DataSize);
-        break;
-      }
-      printGNUProperty<ELFT>(OS, Type, DataSize, Arr.take_front(PaddedSize));
-      Arr = Arr.drop_front(PaddedSize);
-    }
-
-    if (!Arr.empty())
-      OS << "    <corrupted GNU_PROPERTY_TYPE_0>";
+    for (const auto &Property : getGNUPropertyList<ELFT>(Desc))
+      OS << "    " << Property << "\n";
     break;
   }
   OS << '\n';
 }
 
+struct AMDNote {
+  std::string Type;
+  std::string Value;
+};
+
 template <typename ELFT>
-static void printAMDGPUNote(raw_ostream &OS, uint32_t NoteType,
-                            ArrayRef<typename ELFT::Word> Words, size_t Size) {
+static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
   switch (NoteType) {
   default:
-    return;
-    case ELF::NT_AMD_AMDGPU_HSA_METADATA:
-      OS << "    HSA Metadata:\n"
-         << StringRef(reinterpret_cast<const char *>(Words.data()), Size);
-      break;
-    case ELF::NT_AMD_AMDGPU_ISA:
-      OS << "    ISA Version:\n"
-         << "        "
-         << StringRef(reinterpret_cast<const char *>(Words.data()), Size);
-      break;
-    case ELF::NT_AMD_AMDGPU_PAL_METADATA:
-      const uint32_t *PALMetadataBegin = reinterpret_cast<const uint32_t *>(Words.data());
-      const uint32_t *PALMetadataEnd = PALMetadataBegin + Size;
-      std::vector<uint32_t> PALMetadata(PALMetadataBegin, PALMetadataEnd);
-      std::string PALMetadataString;
-      auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString);
-      OS << "    PAL Metadata:\n";
-      if (Error) {
-        OS << "        Invalid";
-        return;
-      }
-      OS << PALMetadataString;
-      break;
+    return {"", ""};
+  case ELF::NT_AMD_AMDGPU_HSA_METADATA:
+    return {"HSA Metadata",
+            std::string(reinterpret_cast<const char *>(Desc.data()),
+                        Desc.size())};
+  case ELF::NT_AMD_AMDGPU_ISA:
+    return {"ISA Version",
+            std::string(reinterpret_cast<const char *>(Desc.data()),
+                        Desc.size())};
+  case ELF::NT_AMD_AMDGPU_PAL_METADATA:
+    const uint32_t *PALMetadataBegin =
+        reinterpret_cast<const uint32_t *>(Desc.data());
+    const uint32_t *PALMetadataEnd = PALMetadataBegin + Desc.size();
+    std::vector<uint32_t> PALMetadata(PALMetadataBegin, PALMetadataEnd);
+    std::string PALMetadataString;
+    auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString);
+    if (Error) {
+      return {"PAL Metadata", "Invalid"};
+    }
+    return {"PAL Metadata", PALMetadataString};
+  }
+}
+
+struct AMDGPUNote {
+  std::string Type;
+  std::string Value;
+};
+
+template <typename ELFT>
+static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
+  switch (NoteType) {
+  default:
+    return {"", ""};
+  case ELF::NT_AMDGPU_METADATA:
+    auto MsgPackString =
+        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+    msgpack::Reader MsgPackReader(MsgPackString);
+    auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader);
+    if (errorToBool(OptMsgPackNodeOrErr.takeError()))
+      return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+    auto &OptMsgPackNode = *OptMsgPackNodeOrErr;
+    if (!OptMsgPackNode)
+      return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+    auto &MsgPackNode = *OptMsgPackNode;
+
+    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+    if (!Verifier.verify(*MsgPackNode))
+      return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+
+    std::string HSAMetadataString;
+    raw_string_ostream StrOS(HSAMetadataString);
+    yaml::Output YOut(StrOS);
+    YOut << MsgPackNode;
+
+    return {"AMDGPU Metadata", StrOS.str()};
   }
-  OS.flush();
 }
 
 template <class ELFT>
@@ -3771,7 +3916,7 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
 
   auto ProcessNote = [&](const Elf_Note &Note) {
     StringRef Name = Note.getName();
-    ArrayRef<Elf_Word> Descriptor = Note.getDesc();
+    ArrayRef<uint8_t> Descriptor = Note.getDesc();
     Elf_Word Type = Note.getType();
 
     OS << "  " << Name << std::string(22 - Name.size(), ' ')
@@ -3779,12 +3924,19 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
 
     if (Name == "GNU") {
       OS << getGNUNoteTypeName(Type) << '\n';
-      printGNUNote<ELFT>(OS, Type, Descriptor, Descriptor.size());
+      printGNUNote<ELFT>(OS, Type, Descriptor);
     } else if (Name == "FreeBSD") {
       OS << getFreeBSDNoteTypeName(Type) << '\n';
     } else if (Name == "AMD") {
+      OS << getAMDNoteTypeName(Type) << '\n';
+      const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+      if (!N.Type.empty())
+        OS << "    " << N.Type << ":\n        " << N.Value << '\n';
+    } else if (Name == "AMDGPU") {
       OS << getAMDGPUNoteTypeName(Type) << '\n';
-      printAMDGPUNote<ELFT>(OS, Type, Descriptor, Descriptor.size());
+      const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
+      if (!N.Type.empty())
+        OS << "    " << N.Type << ":\n        " << N.Value << '\n';
     } else {
       OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
     }
@@ -4123,7 +4275,8 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
   }
 }
 
-template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
+template <class ELFT>
+void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
   ListScope SectionsD(W, "Sections");
 
   int SectionIndex = -1;
@@ -4379,7 +4532,7 @@ void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
   while (Cur != End) {
     unsigned Size;
     const char *Err;
-    uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
+    uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err);
     if (Err)
       reportError(Err);
     W.printNumber("Sym", this->dumper()->getStaticSymbolName(SymIndex),
@@ -4388,9 +4541,103 @@ void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
   }
 }
 
+template <typename ELFT>
+static void printGNUNoteLLVMStyle(uint32_t NoteType,
+                                  ArrayRef<uint8_t> Desc,
+                                  ScopedPrinter &W) {
+  switch (NoteType) {
+  default:
+    return;
+  case ELF::NT_GNU_ABI_TAG: {
+    const GNUAbiTag &AbiTag = getGNUAbiTag<ELFT>(Desc);
+    if (!AbiTag.IsValid) {
+      W.printString("ABI", "<corrupt GNU_ABI_TAG>");
+    } else {
+      W.printString("OS", AbiTag.OSName);
+      W.printString("ABI", AbiTag.ABI);
+    }
+    break;
+  }
+  case ELF::NT_GNU_BUILD_ID: {
+    W.printString("Build ID", getGNUBuildId(Desc));
+    break;
+  }
+  case ELF::NT_GNU_GOLD_VERSION:
+    W.printString("Version", getGNUGoldVersion(Desc));
+    break;
+  case ELF::NT_GNU_PROPERTY_TYPE_0:
+    ListScope D(W, "Property");
+    for (const auto &Property : getGNUPropertyList<ELFT>(Desc))
+      W.printString(Property);
+    break;
+  }
+}
+
 template <class ELFT>
 void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
-  W.startLine() << "printNotes not implemented!\n";
+  ListScope L(W, "Notes");
+  const Elf_Ehdr *e = Obj->getHeader();
+  bool IsCore = e->e_type == ELF::ET_CORE;
+
+  auto PrintHeader = [&](const typename ELFT::Off Offset,
+                         const typename ELFT::Addr Size) {
+    W.printHex("Offset", Offset);
+    W.printHex("Size", Size);
+  };
+
+  auto ProcessNote = [&](const Elf_Note &Note) {
+    DictScope D2(W, "Note");
+    StringRef Name = Note.getName();
+    ArrayRef<uint8_t> Descriptor = Note.getDesc();
+    Elf_Word Type = Note.getType();
+
+    W.printString("Owner", Name);
+    W.printHex("Data size", Descriptor.size());
+    if (Name == "GNU") {
+      W.printString("Type", getGNUNoteTypeName(Type));
+      printGNUNoteLLVMStyle<ELFT>(Type, Descriptor, W);
+    } else if (Name == "FreeBSD") {
+      W.printString("Type", getFreeBSDNoteTypeName(Type));
+    } else if (Name == "AMD") {
+      W.printString("Type", getAMDNoteTypeName(Type));
+      const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+      if (!N.Type.empty())
+        W.printString(N.Type, N.Value);
+    } else if (Name == "AMDGPU") {
+      W.printString("Type", getAMDGPUNoteTypeName(Type));
+      const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
+      if (!N.Type.empty())
+        W.printString(N.Type, N.Value);
+    } else {
+      W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')';
+    }
+  };
+
+  if (IsCore) {
+    for (const auto &P : unwrapOrError(Obj->program_headers())) {
+      if (P.p_type != PT_NOTE)
+        continue;
+      DictScope D(W, "NoteSection");
+      PrintHeader(P.p_offset, P.p_filesz);
+      Error Err = Error::success();
+      for (const auto &Note : Obj->notes(P, Err))
+        ProcessNote(Note);
+      if (Err)
+        error(std::move(Err));
+    }
+  } else {
+    for (const auto &S : unwrapOrError(Obj->sections())) {
+      if (S.sh_type != SHT_NOTE)
+        continue;
+      DictScope D(W, "NoteSection");
+      PrintHeader(S.sh_offset, S.sh_size);
+      Error Err = Error::success();
+      for (const auto &Note : Obj->notes(S, Err))
+        ProcessNote(Note);
+      if (Err)
+        error(std::move(Err));
+    }
+  }
 }
 
 template <class ELFT>
diff --git a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
index 69ef1556f78d..35e4cfcb6b10 100644
--- a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -32,7 +32,7 @@ public:
       : ObjDumper(Writer), Obj(Obj) {}
 
   void printFileHeaders() override;
-  void printSections() override;
+  void printSectionHeaders() override;
   void printRelocations() override;
   void printSymbols() override;
   void printDynamicSymbols() override;
@@ -59,7 +59,7 @@ private:
 
   void printRelocation(const MachOObjectFile *Obj, const RelocationRef &Reloc);
 
-  void printSections(const MachOObjectFile *Obj);
+  void printSectionHeaders(const MachOObjectFile *Obj);
 
   const MachOObjectFile *Obj;
 };
@@ -428,11 +428,9 @@ void MachODumper::printFileHeaders(const MachHeader &Header) {
   W.printFlags("Flags", Header.flags, makeArrayRef(MachOHeaderFlags));
 }
 
-void MachODumper::printSections() {
-  return printSections(Obj);
-}
+void MachODumper::printSectionHeaders() { return printSectionHeaders(Obj); }
 
-void MachODumper::printSections(const MachOObjectFile *Obj) {
+void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
   ListScope Group(W, "Sections");
 
   int SectionIndex = -1;
diff --git a/contrib/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
index 8c3a7bec73be..13de563469ab 100644
--- a/contrib/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
@@ -33,7 +33,7 @@ public:
   virtual ~ObjDumper();
 
   virtual void printFileHeaders() = 0;
-  virtual void printSections() = 0;
+  virtual void printSectionHeaders() = 0;
   virtual void printRelocations() = 0;
   virtual void printSymbols() = 0;
   virtual void printDynamicSymbols() = 0;
diff --git a/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp b/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp
index ce224836225e..79d3db4e2d29 100644
--- a/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -23,28 +23,21 @@ using namespace object;
 namespace {
 
 static const EnumEntry<unsigned> WasmSymbolTypes[] = {
-#define ENUM_ENTRY(X) { #X, wasm::WASM_SYMBOL_TYPE_##X }
-  ENUM_ENTRY(FUNCTION),
-  ENUM_ENTRY(DATA),
-  ENUM_ENTRY(GLOBAL),
-  ENUM_ENTRY(SECTION),
+#define ENUM_ENTRY(X)                                                          \
+  { #X, wasm::WASM_SYMBOL_TYPE_##X }
+    ENUM_ENTRY(FUNCTION), ENUM_ENTRY(DATA),  ENUM_ENTRY(GLOBAL),
+    ENUM_ENTRY(SECTION),  ENUM_ENTRY(EVENT),
 #undef ENUM_ENTRY
 };
 
 static const EnumEntry<uint32_t> WasmSectionTypes[] = {
-#define ENUM_ENTRY(X) { #X, wasm::WASM_SEC_##X }
-  ENUM_ENTRY(CUSTOM),
-  ENUM_ENTRY(TYPE),
-  ENUM_ENTRY(IMPORT),
-  ENUM_ENTRY(FUNCTION),
-  ENUM_ENTRY(TABLE),
-  ENUM_ENTRY(MEMORY),
-  ENUM_ENTRY(GLOBAL),
-  ENUM_ENTRY(EXPORT),
-  ENUM_ENTRY(START),
-  ENUM_ENTRY(ELEM),
-  ENUM_ENTRY(CODE),
-  ENUM_ENTRY(DATA),
+#define ENUM_ENTRY(X)                                                          \
+  { #X, wasm::WASM_SEC_##X }
+    ENUM_ENTRY(CUSTOM),   ENUM_ENTRY(TYPE),  ENUM_ENTRY(IMPORT),
+    ENUM_ENTRY(FUNCTION), ENUM_ENTRY(TABLE), ENUM_ENTRY(MEMORY),
+    ENUM_ENTRY(GLOBAL),   ENUM_ENTRY(EVENT), ENUM_ENTRY(EXPORT),
+    ENUM_ENTRY(START),    ENUM_ENTRY(ELEM),  ENUM_ENTRY(CODE),
+    ENUM_ENTRY(DATA),
 #undef ENUM_ENTRY
 };
 
@@ -54,7 +47,7 @@ public:
       : ObjDumper(Writer), Obj(Obj) {}
 
   void printFileHeaders() override;
-  void printSections() override;
+  void printSectionHeaders() override;
   void printRelocations() override;
   void printSymbols() override;
   void printDynamicSymbols() override { llvm_unreachable("unimplemented"); }
@@ -108,7 +101,7 @@ void WasmDumper::printRelocation(const SectionRef &Section,
     if (HasAddend)
       W.printNumber("Addend", WasmReloc.Addend);
   } else {
-    raw_ostream& OS = W.startLine();
+    raw_ostream &OS = W.startLine();
     OS << W.hex(Reloc.getOffset()) << " " << RelocTypeName << " ";
     if (!SymName.empty())
       OS << SymName;
@@ -154,7 +147,7 @@ void WasmDumper::printSymbols() {
     printSymbol(Symbol);
 }
 
-void WasmDumper::printSections() {
+void WasmDumper::printSectionHeaders() {
   ListScope Group(W, "Sections");
   for (const SectionRef &Section : Obj->sections()) {
     const WasmSection &WasmSec = Obj->getWasmSection(Section);
@@ -169,7 +162,7 @@ void WasmDumper::printSections() {
         const wasm::WasmLinkingData &LinkingData = Obj->linkingData();
         if (!LinkingData.InitFunctions.empty()) {
           ListScope Group(W, "InitFunctions");
-          for (const wasm::WasmInitFunc &F: LinkingData.InitFunctions)
+          for (const wasm::WasmInitFunc &F : LinkingData.InitFunctions)
             W.startLine() << F.Symbol << " (priority=" << F.Priority << ")\n";
         }
       }
@@ -177,7 +170,7 @@ void WasmDumper::printSections() {
     case wasm::WASM_SEC_DATA: {
       ListScope Group(W, "Segments");
       for (const WasmSegment &Segment : Obj->dataSegments()) {
-        const wasm::WasmDataSegment& Seg = Segment.Data;
+        const wasm::WasmDataSegment &Seg = Segment.Data;
         DictScope Group(W, "Segment");
         if (!Seg.Name.empty())
           W.printString("Name", Seg.Name);
@@ -219,7 +212,7 @@ void WasmDumper::printSymbol(const SymbolRef &Sym) {
   W.printHex("Flags", Symbol.Info.Flags);
 }
 
-}
+} // namespace
 
 namespace llvm {
 
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
index a7236c02b8ae..81ce7a590364 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -48,58 +48,72 @@ namespace opts {
     cl::desc("<input object files>"),
     cl::ZeroOrMore);
 
+  // -all, -a
+  cl::opt<bool>
+      All("all",
+          cl::desc("Equivalent to setting: --file-headers, --program-headers, "
+                   "--section-headers, --symbols, --relocations, "
+                   "--dynamic-table, --notes, --version-info, --unwind, "
+                   "--section-groups and --elf-hash-histogram."));
+  cl::alias AllShort("a", cl::desc("Alias for --all"), cl::aliasopt(All));
+
+  // --headers -e
+  cl::opt<bool>
+      Headers("headers",
+          cl::desc("Equivalent to setting: --file-headers, --program-headers, "
+                   "--section-headers"));
+  cl::alias HeadersShort("e", cl::desc("Alias for --headers"),
+     cl::aliasopt(Headers));
+
   // -wide, -W
-  cl::opt<bool> WideOutput("wide",
-    cl::desc("Ignored for compatibility with GNU readelf"));
+  cl::opt<bool>
+      WideOutput("wide", cl::desc("Ignored for compatibility with GNU readelf"),
+                 cl::Hidden);
   cl::alias WideOutputShort("W",
     cl::desc("Alias for --wide"),
     cl::aliasopt(WideOutput));
 
-  // -file-headers, -h
+  // -file-headers, -file-header, -h
   cl::opt<bool> FileHeaders("file-headers",
     cl::desc("Display file headers "));
-  cl::alias FileHeadersShort("h",
-    cl::desc("Alias for --file-headers"),
-    cl::aliasopt(FileHeaders));
-
-  // -sections, -s, -S
-  // Note: In GNU readelf, -s means --symbols!
-  cl::opt<bool> Sections("sections",
-    cl::desc("Display all sections."));
-  cl::alias SectionsShort("s",
-    cl::desc("Alias for --sections"),
-    cl::aliasopt(Sections));
-  cl::alias SectionsShortUpper("S",
-    cl::desc("Alias for --sections"),
-    cl::aliasopt(Sections));
-
-  // -section-relocations, -sr
+  cl::alias FileHeadersShort("h", cl::desc("Alias for --file-headers"),
+                             cl::aliasopt(FileHeaders), cl::NotHidden);
+  cl::alias FileHeadersSingular("file-header",
+                                cl::desc("Alias for --file-headers"),
+                                cl::aliasopt(FileHeaders));
+
+  // -section-headers, -sections, -S
+  // Also -s in llvm-readobj mode.
+  cl::opt<bool> SectionHeaders("section-headers",
+                               cl::desc("Display all section headers."));
+  cl::alias SectionsShortUpper("S", cl::desc("Alias for --section-headers"),
+                               cl::aliasopt(SectionHeaders), cl::NotHidden);
+  cl::alias SectionHeadersAlias("sections",
+                                cl::desc("Alias for --section-headers"),
+                                cl::aliasopt(SectionHeaders), cl::NotHidden);
+
+  // -section-relocations
+  // Also -sr in llvm-readobj mode.
   cl::opt<bool> SectionRelocations("section-relocations",
     cl::desc("Display relocations for each section shown."));
-  cl::alias SectionRelocationsShort("sr",
-    cl::desc("Alias for --section-relocations"),
-    cl::aliasopt(SectionRelocations));
 
-  // -section-symbols, -st
+  // -section-symbols
+  // Also -st in llvm-readobj mode.
   cl::opt<bool> SectionSymbols("section-symbols",
     cl::desc("Display symbols for each section shown."));
-  cl::alias SectionSymbolsShort("st",
-    cl::desc("Alias for --section-symbols"),
-    cl::aliasopt(SectionSymbols));
 
-  // -section-data, -sd
+  // -section-data
+  // Also -sd in llvm-readobj mode.
   cl::opt<bool> SectionData("section-data",
     cl::desc("Display section data for each section shown."));
-  cl::alias SectionDataShort("sd",
-    cl::desc("Alias for --section-data"),
-    cl::aliasopt(SectionData));
 
-  // -relocations, -r
+  // -relocations, -relocs, -r
   cl::opt<bool> Relocations("relocations",
     cl::desc("Display the relocation entries in the file"));
-  cl::alias RelocationsShort("r",
-    cl::desc("Alias for --relocations"),
-    cl::aliasopt(Relocations));
+  cl::alias RelocationsShort("r", cl::desc("Alias for --relocations"),
+                             cl::aliasopt(Relocations), cl::NotHidden);
+  cl::alias RelocationsGNU("relocs", cl::desc("Alias for --relocations"),
+                           cl::aliasopt(Relocations));
 
   // -notes, -n
   cl::opt<bool> Notes("notes", cl::desc("Display the ELF notes in the file"));
@@ -109,19 +123,19 @@ namespace opts {
   cl::opt<bool> DynRelocs("dyn-relocations",
     cl::desc("Display the dynamic relocation entries in the file"));
 
-  // -symbols, -t
+  // -symbols
+  // Also -s in llvm-readelf mode, or -t in llvm-readobj mode.
   cl::opt<bool> Symbols("symbols",
     cl::desc("Display the symbol table"));
-  cl::alias SymbolsShort("t",
-    cl::desc("Alias for --symbols"),
-    cl::aliasopt(Symbols));
+  cl::alias SymbolsGNU("syms", cl::desc("Alias for --symbols"),
+                       cl::aliasopt(Symbols));
 
-  // -dyn-symbols, -dt
+  // -dyn-symbols, -dyn-syms
+  // Also -dt in llvm-readobj mode.
   cl::opt<bool> DynamicSymbols("dyn-symbols",
     cl::desc("Display the dynamic symbol table"));
-  cl::alias DynamicSymbolsShort("dt",
-    cl::desc("Alias for --dyn-symbols"),
-    cl::aliasopt(DynamicSymbols));
+  cl::alias DynSymsGNU("dyn-syms", cl::desc("Alias for --dyn-symbols"),
+                       cl::aliasopt(DynamicSymbols));
 
   // -unwind, -u
   cl::opt<bool> UnwindInfo("unwind",
@@ -130,29 +144,33 @@ namespace opts {
     cl::desc("Alias for --unwind"),
     cl::aliasopt(UnwindInfo));
 
-  // -dynamic-table
+  // -dynamic-table, -dynamic, -d
   cl::opt<bool> DynamicTable("dynamic-table",
     cl::desc("Display the ELF .dynamic section table"));
   cl::alias DynamicTableShort("d", cl::desc("Alias for --dynamic-table"),
+                              cl::aliasopt(DynamicTable), cl::NotHidden);
+  cl::alias DynamicTableAlias("dynamic", cl::desc("Alias for --dynamic-table"),
                               cl::aliasopt(DynamicTable));
 
   // -needed-libs
   cl::opt<bool> NeededLibraries("needed-libs",
     cl::desc("Display the needed libraries"));
 
-  // -program-headers
+  // -program-headers, -segments, -l
   cl::opt<bool> ProgramHeaders("program-headers",
     cl::desc("Display ELF program headers"));
   cl::alias ProgramHeadersShort("l", cl::desc("Alias for --program-headers"),
-                                cl::aliasopt(ProgramHeaders));
+                                cl::aliasopt(ProgramHeaders), cl::NotHidden);
+  cl::alias SegmentsAlias("segments", cl::desc("Alias for --program-headers"),
+                          cl::aliasopt(ProgramHeaders));
 
-  // -string-dump
+  // -string-dump, -p
   cl::list<std::string> StringDump("string-dump", cl::desc("<number|name>"),
                                    cl::ZeroOrMore);
   cl::alias StringDumpShort("p", cl::desc("Alias for --string-dump"),
                             cl::aliasopt(StringDump));
 
-  // -hex-dump
+  // -hex-dump, -x
   cl::list<std::string> HexDump("hex-dump", cl::desc("<number|name>"),
                                 cl::ZeroOrMore);
   cl::alias HexDumpShort("x", cl::desc("Alias for --hex-dump"),
@@ -188,11 +206,9 @@ namespace opts {
       "codeview-subsection-bytes",
       cl::desc("Dump raw contents of codeview debug sections and records"));
 
-  // -arm-attributes, -a
+  // -arm-attributes
   cl::opt<bool> ARMAttributes("arm-attributes",
                               cl::desc("Display the ARM attributes section"));
-  cl::alias ARMAttributesShort("a", cl::desc("Alias for --arm-attributes"),
-                               cl::aliasopt(ARMAttributes));
 
   // -mips-plt-got
   cl::opt<bool>
@@ -283,28 +299,40 @@ namespace opts {
   PrintStackMap("stackmap",
                 cl::desc("Display contents of stackmap section"));
 
-  // -version-info
+  // -version-info, -V
   cl::opt<bool>
       VersionInfo("version-info",
                   cl::desc("Display ELF version sections (if present)"));
   cl::alias VersionInfoShort("V", cl::desc("Alias for -version-info"),
                              cl::aliasopt(VersionInfo));
 
+  // -elf-section-groups, -section-groups, -g
   cl::opt<bool> SectionGroups("elf-section-groups",
                               cl::desc("Display ELF section group contents"));
+  cl::alias SectionGroupsAlias("section-groups",
+                               cl::desc("Alias for -elf-sections-groups"),
+                               cl::aliasopt(SectionGroups));
   cl::alias SectionGroupsShort("g", cl::desc("Alias for -elf-sections-groups"),
                                cl::aliasopt(SectionGroups));
+
+  // -elf-hash-histogram, -histogram, -I
   cl::opt<bool> HashHistogram(
       "elf-hash-histogram",
       cl::desc("Display bucket list histogram for hash sections"));
   cl::alias HashHistogramShort("I", cl::desc("Alias for -elf-hash-histogram"),
                                cl::aliasopt(HashHistogram));
+  cl::alias HistogramAlias("histogram",
+                           cl::desc("Alias for --elf-hash-histogram"),
+                           cl::aliasopt(HashHistogram));
 
+  // -elf-cg-profile
   cl::opt<bool> CGProfile("elf-cg-profile", cl::desc("Display callgraph profile section"));
 
-  cl::opt<bool> Addrsig("elf-addrsig",
+  // -addrsig
+  cl::opt<bool> Addrsig("addrsig",
                         cl::desc("Display address-significance table"));
 
+  // -elf-output-style
   cl::opt<OutputStyleTy>
       Output("elf-output-style", cl::desc("Specify ELF dump style"),
              cl::values(clEnumVal(LLVM, "LLVM default style"),
@@ -418,8 +446,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
 
   if (opts::FileHeaders)
     Dumper->printFileHeaders();
-  if (opts::Sections)
-    Dumper->printSections();
+  if (opts::SectionHeaders)
+    Dumper->printSectionHeaders();
   if (opts::Relocations)
     Dumper->printRelocations();
   if (opts::DynRelocs)
@@ -492,6 +520,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
       Dumper->printCOFFResources();
     if (opts::COFFLoadConfig)
       Dumper->printCOFFLoadConfig();
+    if (opts::Addrsig)
+      Dumper->printAddrsig();
     if (opts::CodeView)
       Dumper->printCodeViewDebugInfo();
     if (opts::CodeViewMergedTypes)
@@ -586,21 +616,87 @@ static void dumpInput(StringRef File) {
     reportError(File, readobj_error::unrecognized_file_format);
 }
 
+/// Registers aliases that should only be allowed by readobj.
+static void registerReadobjAliases() {
+  // -s has meant --sections for a very long time in llvm-readobj despite
+  // meaning --symbols in readelf.
+  static cl::alias SectionsShort("s", cl::desc("Alias for --section-headers"),
+                                 cl::aliasopt(opts::SectionHeaders),
+                                 cl::NotHidden);
+
+  // Only register -t in llvm-readobj, as readelf reserves it for
+  // --section-details (not implemented yet).
+  static cl::alias SymbolsShort("t", cl::desc("Alias for --symbols"),
+                                cl::aliasopt(opts::Symbols), cl::NotHidden);
+
+  // The following two-letter aliases are only provided for readobj, as readelf
+  // allows single-letter args to be grouped together.
+  static cl::alias SectionRelocationsShort(
+      "sr", cl::desc("Alias for --section-relocations"),
+      cl::aliasopt(opts::SectionRelocations));
+  static cl::alias SectionDataShort("sd", cl::desc("Alias for --section-data"),
+                                    cl::aliasopt(opts::SectionData));
+  static cl::alias SectionSymbolsShort("st",
+                                       cl::desc("Alias for --section-symbols"),
+                                       cl::aliasopt(opts::SectionSymbols));
+  static cl::alias DynamicSymbolsShort("dt",
+                                       cl::desc("Alias for --dyn-symbols"),
+                                       cl::aliasopt(opts::DynamicSymbols));
+}
+
+/// Registers aliases that should only be allowed by readelf.
+static void registerReadelfAliases() {
+  // -s is here because for readobj it means --sections.
+  static cl::alias SymbolsShort("s", cl::desc("Alias for --symbols"),
+                                cl::aliasopt(opts::Symbols), cl::NotHidden,
+                                cl::Grouping);
+
+  // Allow all single letter flags to be grouped together.
+  for (auto &OptEntry : cl::getRegisteredOptions()) {
+    StringRef ArgName = OptEntry.getKey();
+    cl::Option *Option = OptEntry.getValue();
+    if (ArgName.size() == 1)
+      Option->setFormattingFlag(cl::Grouping);
+  }
+}
+
 int main(int argc, const char *argv[]) {
   InitLLVM X(argc, argv);
 
   // Register the target printer for --version.
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
-  opts::WideOutput.setHiddenFlag(cl::Hidden);
-
-  if (sys::path::stem(argv[0]).find("readelf") != StringRef::npos)
+  if (sys::path::stem(argv[0]).contains("readelf")) {
     opts::Output = opts::GNU;
+    registerReadelfAliases();
+  } else {
+    registerReadobjAliases();
+  }
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM Object Reader\n");
 
+  if (opts::All) {
+    opts::FileHeaders = true;
+    opts::ProgramHeaders = true;
+    opts::SectionHeaders = true;
+    opts::Symbols = true;
+    opts::Relocations = true;
+    opts::DynamicTable = true;
+    opts::Notes = true;
+    opts::VersionInfo = true;
+    opts::UnwindInfo = true;
+    opts::SectionGroups = true;
+    opts::HashHistogram = true;
+  }
+
+  if (opts::Headers) {
+    opts::FileHeaders = true;
+    opts::ProgramHeaders = true;
+    opts::SectionHeaders = true;
+  }
+
   // Default to stdin if no filename is specified.
-  if (opts::InputFilenames.size() == 0)
+  if (opts::InputFilenames.empty())
     opts::InputFilenames.push_back("-");
 
   llvm::for_each(opts::InputFilenames, dumpInput);
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
index 374ffd03e13a..92ed098dc642 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -40,7 +40,7 @@ namespace llvm {
       return *EO;
     std::string Buf;
     raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(EO.takeError(), OS, "");
+    logAllUnhandledErrors(EO.takeError(), OS);
     OS.flush();
     reportError(Buf);
   }
@@ -49,22 +49,13 @@ namespace llvm {
 } // namespace llvm
 
 namespace opts {
-  extern llvm::cl::list<std::string> InputFilenames;
-  extern llvm::cl::opt<bool> FileHeaders;
-  extern llvm::cl::opt<bool> Sections;
   extern llvm::cl::opt<bool> SectionRelocations;
   extern llvm::cl::opt<bool> SectionSymbols;
   extern llvm::cl::opt<bool> SectionData;
-  extern llvm::cl::opt<bool> Relocations;
-  extern llvm::cl::opt<bool> Symbols;
   extern llvm::cl::opt<bool> DynamicSymbols;
-  extern llvm::cl::opt<bool> UnwindInfo;
   extern llvm::cl::opt<bool> ExpandRelocs;
   extern llvm::cl::opt<bool> RawRelr;
-  extern llvm::cl::opt<bool> CodeView;
   extern llvm::cl::opt<bool> CodeViewSubsectionBytes;
-  extern llvm::cl::opt<bool> ARMAttributes;
-  extern llvm::cl::opt<bool> MipsPLTGOT;
   enum OutputStyleTy { LLVM, GNU };
   extern llvm::cl::opt<OutputStyleTy> Output;
 } // namespace opts
diff --git a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 54db1ec113fc..975638ed82d1 100644
--- a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -88,25 +88,30 @@ CheckFiles("check",
            cl::desc("File containing RuntimeDyld verifier checks."),
            cl::ZeroOrMore);
 
-static cl::opt<uint64_t>
+// Tracking BUG: 19665
+// http://llvm.org/bugs/show_bug.cgi?id=19665
+//
+// Do not change these options to cl::opt<uint64_t> since this silently breaks
+// argument parsing.
+static cl::opt<unsigned long long>
 PreallocMemory("preallocate",
               cl::desc("Allocate memory upfront rather than on-demand"),
               cl::init(0));
 
-static cl::opt<uint64_t>
+static cl::opt<unsigned long long>
 TargetAddrStart("target-addr-start",
                 cl::desc("For -verify only: start of phony target address "
                          "range."),
                 cl::init(4096), // Start at "page 1" - no allocating at "null".
                 cl::Hidden);
 
-static cl::opt<uint64_t>
+static cl::opt<unsigned long long>
 TargetAddrEnd("target-addr-end",
               cl::desc("For -verify only: end of phony target address range."),
               cl::init(~0ULL),
               cl::Hidden);
 
-static cl::opt<uint64_t>
+static cl::opt<unsigned long long>
 TargetSectionSep("target-section-sep",
                  cl::desc("For -verify only: Separation between sections in "
                           "phony target address space."),
@@ -304,7 +309,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
     if (!MaybeObj) {
       std::string Buf;
       raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(MaybeObj.takeError(), OS, "");
+      logAllUnhandledErrors(MaybeObj.takeError(), OS);
       OS.flush();
       ErrorAndExit("unable to create object file: '" + Buf + "'");
     }
@@ -433,7 +438,7 @@ static int executeInput() {
     if (!MaybeObj) {
       std::string Buf;
       raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(MaybeObj.takeError(), OS, "");
+      logAllUnhandledErrors(MaybeObj.takeError(), OS);
       OS.flush();
       ErrorAndExit("unable to create object file: '" + Buf + "'");
     }
@@ -577,7 +582,11 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
     if (LoadAddr &&
         *LoadAddr != static_cast<uint64_t>(
                        reinterpret_cast<uintptr_t>(Tmp->first))) {
-      AlreadyAllocated[*LoadAddr] = Tmp->second;
+      // A section will have a LoadAddr of 0 if it wasn't loaded for whatever
+      // reason (e.g. zero byte COFF sections). Don't include those sections in
+      // the allocation map.
+      if (*LoadAddr != 0)
+        AlreadyAllocated[*LoadAddr] = Tmp->second;
       Worklist.erase(Tmp);
     }
   }
@@ -701,7 +710,7 @@ static int linkAndVerify() {
     if (!MaybeObj) {
       std::string Buf;
       raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(MaybeObj.takeError(), OS, "");
+      logAllUnhandledErrors(MaybeObj.takeError(), OS);
       OS.flush();
       ErrorAndExit("unable to create object file: '" + Buf + "'");
     }
diff --git a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
index d8ec11251ff6..c29b7a7f7e46 100644
--- a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -356,8 +356,8 @@ struct StoreModifier: public Modifier {
   void Act() override {
     // Try to use predefined pointers. If non-exist, use undef pointer value;
     Value *Ptr = getRandomPointerValue();
-    Type  *Tp = Ptr->getType();
-    Value *Val = getRandomValue(Tp->getContainedType(0));
+    PointerType *Tp = cast<PointerType>(Ptr->getType());
+    Value *Val = getRandomValue(Tp->getElementType());
     Type  *ValTy = Val->getType();
 
     // Do not store vectors of i1s because they are unsupported
diff --git a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 6d40a5403504..9d19f994b739 100644
--- a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -55,17 +55,29 @@ static cl::opt<bool>
     ClPrintInlining("inlining", cl::init(true),
                     cl::desc("Print all inlined frames for a given address"));
 
+// -demangle, -C
 static cl::opt<bool>
 ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
+static cl::alias
+ClDemangleShort("C", cl::desc("Alias for -demangle"),
+                cl::NotHidden, cl::aliasopt(ClDemangle));
 
 static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
                                           cl::desc("Default architecture "
                                                    "(for multi-arch objects)"));
 
+// -obj, -exe, -e
 static cl::opt<std::string>
 ClBinaryName("obj", cl::init(""),
              cl::desc("Path to object file to be symbolized (if not provided, "
                       "object file should be specified for each input line)"));
+static cl::alias
+ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
+                     cl::NotHidden, cl::aliasopt(ClBinaryName));
+static cl::alias
+ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
+                   cl::NotHidden, cl::aliasopt(ClBinaryName));
+
 
 static cl::opt<std::string>
     ClDwpName("dwp", cl::init(""),
@@ -75,13 +87,25 @@ static cl::list<std::string>
 ClDsymHint("dsym-hint", cl::ZeroOrMore,
            cl::desc("Path to .dSYM bundles to search for debug info for the "
                     "object files"));
-static cl::opt<bool>
-    ClPrintAddress("print-address", cl::init(false),
-                   cl::desc("Show address before line information"));
 
+// -print-address, -addresses, -a
+static cl::opt<bool>
+ClPrintAddress("print-address", cl::init(false),
+               cl::desc("Show address before line information"));
+static cl::alias
+ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
+                             cl::NotHidden, cl::aliasopt(ClPrintAddress));
+static cl::alias
+ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
+                     cl::NotHidden, cl::aliasopt(ClPrintAddress));
+
+// -pretty-print, -p
 static cl::opt<bool>
     ClPrettyPrint("pretty-print", cl::init(false),
                   cl::desc("Make the output more human friendly"));
+static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
+                                    cl::NotHidden,
+                                    cl::aliasopt(ClPrettyPrint));
 
 static cl::opt<int> ClPrintSourceContextLines(
     "print-source-context-lines", cl::init(0),
@@ -90,6 +114,10 @@ static cl::opt<int> ClPrintSourceContextLines(
 static cl::opt<bool> ClVerbose("verbose", cl::init(false),
                                cl::desc("Print verbose line info"));
 
+static cl::list<std::string> ClInputAddresses(cl::Positional,
+                                              cl::desc("<input addresses>..."),
+                                              cl::ZeroOrMore);
+
 template<typename T>
 static bool error(Expected<T> &ResOrErr) {
   if (ResOrErr)
@@ -137,6 +165,38 @@ static bool parseCommand(StringRef InputString, bool &IsData,
   return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset);
 }
 
+static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
+                           DIPrinter &Printer) {
+  bool IsData = false;
+  std::string ModuleName;
+  uint64_t ModuleOffset = 0;
+  if (!parseCommand(StringRef(InputString), IsData, ModuleName, ModuleOffset)) {
+    outs() << InputString;
+    return;
+  }
+
+  if (ClPrintAddress) {
+    outs() << "0x";
+    outs().write_hex(ModuleOffset);
+    StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
+    outs() << Delimiter;
+  }
+  if (IsData) {
+    auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
+    Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
+  } else if (ClPrintInlining) {
+    auto ResOrErr =
+        Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset, ClDwpName);
+    Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
+  } else {
+    auto ResOrErr =
+        Symbolizer.symbolizeCode(ModuleName, ModuleOffset, ClDwpName);
+    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
+  }
+  outs() << "\n";
+  outs().flush();
+}
+
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
 
@@ -159,43 +219,15 @@ int main(int argc, char **argv) {
   DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
                     ClPrettyPrint, ClPrintSourceContextLines, ClVerbose);
 
-  const int kMaxInputStringLength = 1024;
-  char InputString[kMaxInputStringLength];
-
-  while (true) {
-    if (!fgets(InputString, sizeof(InputString), stdin))
-      break;
-
-    bool IsData = false;
-    std::string ModuleName;
-    uint64_t ModuleOffset = 0;
-    if (!parseCommand(StringRef(InputString), IsData, ModuleName,
-                      ModuleOffset)) {
-      outs() << InputString;
-      continue;
-    }
+  if (ClInputAddresses.empty()) {
+    const int kMaxInputStringLength = 1024;
+    char InputString[kMaxInputStringLength];
 
-    if (ClPrintAddress) {
-      outs() << "0x";
-      outs().write_hex(ModuleOffset);
-      StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
-      outs() << Delimiter;
-    }
-    if (IsData) {
-      auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
-      Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
-    } else if (ClPrintInlining) {
-      auto ResOrErr =
-          Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset, ClDwpName);
-      Printer << (error(ResOrErr) ? DIInliningInfo()
-                                             : ResOrErr.get());
-    } else {
-      auto ResOrErr =
-          Symbolizer.symbolizeCode(ModuleName, ModuleOffset, ClDwpName);
-      Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
-    }
-    outs() << "\n";
-    outs().flush();
+    while (fgets(InputString, sizeof(InputString), stdin))
+      symbolizeInput(InputString, Symbolizer, Printer);
+  } else {
+    for (StringRef Address : ClInputAddresses)
+      symbolizeInput(Address, Symbolizer, Printer);
   }
 
   return 0;
diff --git a/contrib/llvm/tools/llvm-xray/xray-account.cpp b/contrib/llvm/tools/llvm-xray/xray-account.cpp
index 77de1de496f6..9985c9adcf6c 100644
--- a/contrib/llvm/tools/llvm-xray/xray-account.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-account.cpp
@@ -146,6 +146,10 @@ bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
 
   auto &ThreadStack = PerThreadFunctionStack[Record.TId];
   switch (Record.Type) {
+  case RecordTypes::CUSTOM_EVENT:
+  case RecordTypes::TYPED_EVENT:
+    // TODO: Support custom and typed event accounting in the future.
+    return true;
   case RecordTypes::ENTER:
   case RecordTypes::ENTER_ARG: {
     ThreadStack.emplace_back(Record.FuncId, Record.TSC);
@@ -255,9 +259,18 @@ ResultRow getStats(std::vector<uint64_t> &Timings) {
 
 } // namespace
 
+using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
+
+template <typename F>
+static void sortByKey(std::vector<TupleType> &Results, F Fn) {
+  bool ASC = AccountSortOrder == SortDirection::ASCENDING;
+  llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {
+    return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);
+  });
+}
+
 template <class F>
 void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
-  using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
   std::vector<TupleType> Results;
   Results.reserve(FunctionLatencies.size());
   for (auto FT : FunctionLatencies) {
@@ -282,80 +295,31 @@ void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
   // Sort the data according to user-provided flags.
   switch (AccountSortOutput) {
   case SortField::FUNCID:
-    llvm::sort(Results.begin(), Results.end(),
-               [](const TupleType &L, const TupleType &R) {
-                 if (AccountSortOrder == SortDirection::ASCENDING)
-                   return std::get<0>(L) < std::get<0>(R);
-                 if (AccountSortOrder == SortDirection::DESCENDING)
-                   return std::get<0>(L) > std::get<0>(R);
-                 llvm_unreachable("Unknown sort direction");
-               });
+    sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });
     break;
   case SortField::COUNT:
-    llvm::sort(Results.begin(), Results.end(),
-               [](const TupleType &L, const TupleType &R) {
-                 if (AccountSortOrder == SortDirection::ASCENDING)
-                   return std::get<1>(L) < std::get<1>(R);
-                 if (AccountSortOrder == SortDirection::DESCENDING)
-                   return std::get<1>(L) > std::get<1>(R);
-                 llvm_unreachable("Unknown sort direction");
-               });
+    sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });
+    break;
+  case SortField::MIN:
+    sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });
+    break;
+  case SortField::MED:
+    sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });
+    break;
+  case SortField::PCT90:
+    sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });
+    break;
+  case SortField::PCT99:
+    sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });
     break;
-  default:
-    // Here we need to look into the ResultRow for the rest of the data that
-    // we want to sort by.
-    llvm::sort(Results.begin(), Results.end(),
-               [&](const TupleType &L, const TupleType &R) {
-                 auto &LR = std::get<2>(L);
-                 auto &RR = std::get<2>(R);
-                 switch (AccountSortOutput) {
-                 case SortField::COUNT:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Count < RR.Count;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Count > RR.Count;
-                   llvm_unreachable("Unknown sort direction");
-                 case SortField::MIN:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Min < RR.Min;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Min > RR.Min;
-                   llvm_unreachable("Unknown sort direction");
-                 case SortField::MED:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Median < RR.Median;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Median > RR.Median;
-                   llvm_unreachable("Unknown sort direction");
-                 case SortField::PCT90:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Pct90 < RR.Pct90;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Pct90 > RR.Pct90;
-                   llvm_unreachable("Unknown sort direction");
-                 case SortField::PCT99:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Pct99 < RR.Pct99;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Pct99 > RR.Pct99;
-                   llvm_unreachable("Unknown sort direction");
-                 case SortField::MAX:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Max < RR.Max;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Max > RR.Max;
-                   llvm_unreachable("Unknown sort direction");
-                 case SortField::SUM:
-                   if (AccountSortOrder == SortDirection::ASCENDING)
-                     return LR.Sum < RR.Sum;
-                   if (AccountSortOrder == SortDirection::DESCENDING)
-                     return LR.Sum > RR.Sum;
-                   llvm_unreachable("Unknown sort direction");
-                 default:
-                   llvm_unreachable("Unsupported sort order");
-                 }
-               });
+  case SortField::MAX:
+    sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });
     break;
+  case SortField::SUM:
+    sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });
+    break;
+  case SortField::FUNC:
+    llvm_unreachable("Not implemented");
   }
 
   if (AccountTop > 0) {
@@ -420,19 +384,25 @@ namespace llvm {
 template <> struct format_provider<llvm::xray::RecordTypes> {
   static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,
                      StringRef Style) {
-    switch(T) {
-      case RecordTypes::ENTER:
-        Stream << "enter";
-        break;
-      case RecordTypes::ENTER_ARG:
-        Stream << "enter-arg";
-        break;
-      case RecordTypes::EXIT:
-        Stream << "exit";
-        break;
-      case RecordTypes::TAIL_EXIT:
-        Stream << "tail-exit";
-        break;
+    switch (T) {
+    case RecordTypes::ENTER:
+      Stream << "enter";
+      break;
+    case RecordTypes::ENTER_ARG:
+      Stream << "enter-arg";
+      break;
+    case RecordTypes::EXIT:
+      Stream << "exit";
+      break;
+    case RecordTypes::TAIL_EXIT:
+      Stream << "tail-exit";
+      break;
+    case RecordTypes::CUSTOM_EVENT:
+      Stream << "custom-event";
+      break;
+    case RecordTypes::TYPED_EVENT:
+      Stream << "typed-event";
+      break;
     }
   }
 };
diff --git a/contrib/llvm/tools/llvm-xray/xray-converter.cpp b/contrib/llvm/tools/llvm-xray/xray-converter.cpp
index 90e14d0d8896..3f153b99bc93 100644
--- a/contrib/llvm/tools/llvm-xray/xray-converter.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-converter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/JSON.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
@@ -91,9 +92,10 @@ void TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) {
     Trace.Records.push_back({R.RecordType, R.CPU, R.Type, R.FuncId,
                              Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
                                        : llvm::to_string(R.FuncId),
-                             R.TSC, R.TId, R.PId, R.CallArgs});
+                             R.TSC, R.TId, R.PId, R.CallArgs, R.Data});
   }
   Output Out(OS, nullptr, 0);
+  Out.setWriteDefaultValues(false);
   Out << Trace;
 }
 
@@ -122,21 +124,27 @@ void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) {
   // Then write out the rest of the records, still in an endian-appropriate
   // format.
   for (const auto &R : Records) {
-    Writer.write(R.RecordType);
-    // The on disk naive raw format uses 8 bit CPUs, but the record has 16.
-    // There's no choice but truncation.
-    Writer.write(static_cast<uint8_t>(R.CPU));
     switch (R.Type) {
     case RecordTypes::ENTER:
     case RecordTypes::ENTER_ARG:
+      Writer.write(R.RecordType);
+      Writer.write(static_cast<uint8_t>(R.CPU));
       Writer.write(uint8_t{0});
       break;
     case RecordTypes::EXIT:
+      Writer.write(R.RecordType);
+      Writer.write(static_cast<uint8_t>(R.CPU));
       Writer.write(uint8_t{1});
       break;
     case RecordTypes::TAIL_EXIT:
+      Writer.write(R.RecordType);
+      Writer.write(static_cast<uint8_t>(R.CPU));
       Writer.write(uint8_t{2});
       break;
+    case RecordTypes::CUSTOM_EVENT:
+    case RecordTypes::TYPED_EVENT:
+      // Skip custom and typed event records for v1 logs.
+      continue;
     }
     Writer.write(R.FuncId);
     Writer.write(R.TSC);
@@ -234,31 +242,6 @@ StackTrieNode *findOrCreateStackNode(
   return CurrentStack;
 }
 
-void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
-                            uint32_t TId, uint32_t PId, bool Symbolize,
-                            const FuncIdConversionHelper &FuncIdHelper,
-                            double EventTimestampUs,
-                            const StackTrieNode &StackCursor,
-                            StringRef FunctionPhenotype) {
-  OS << "    ";
-  if (Version >= 3) {
-    OS << llvm::formatv(
-        R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
-        R"("ts" : "{4:f4}", "sf" : "{5}" })",
-        (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
-                   : llvm::to_string(FuncId)),
-        FunctionPhenotype, TId, PId, EventTimestampUs,
-        StackCursor.ExtraData.id);
-  } else {
-    OS << llvm::formatv(
-        R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
-        R"("ts" : "{3:f3}", "sf" : "{4}" })",
-        (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
-                   : llvm::to_string(FuncId)),
-        FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
-  }
-}
-
 } // namespace
 
 void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
@@ -269,18 +252,14 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
 
   unsigned id_counter = 0;
 
-  OS << "{\n  \"traceEvents\": [";
   DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
   DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
   DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
   std::forward_list<StackTrieNode> NodeStore{};
-  int loop_count = 0;
-  for (const auto &R : Records) {
-    if (loop_count++ == 0)
-      OS << "\n";
-    else
-      OS << ",\n";
 
+  // Create a JSON Array which will hold all trace events.
+  json::Array TraceEvents;
+  for (const auto &R : Records) {
     // Chrome trace event format always wants data in micros.
     // CyclesPerMicro = CycleHertz / 10^6
     // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
@@ -292,6 +271,10 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
     double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC);
     StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId];
     switch (R.Type) {
+    case RecordTypes::CUSTOM_EVENT:
+    case RecordTypes::TYPED_EVENT:
+      // TODO: Support typed and custom event rendering on Chrome Trace Viewer.
+      break;
     case RecordTypes::ENTER:
     case RecordTypes::ENTER_ARG:
       StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId,
@@ -301,8 +284,15 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
       // type of B for begin or E for end, thread id, process id,
       // timestamp in microseconds, and a stack frame id. The ids are logged
       // in an id dictionary after the events.
-      writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
-                             FuncIdHelper, EventTimestampUs, *StackCursor, "B");
+      TraceEvents.push_back(json::Object({
+          {"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
+                             : llvm::to_string(R.FuncId)},
+          {"ph", "B"},
+          {"tid", llvm::to_string(R.TId)},
+          {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
+          {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
+          {"sf", llvm::to_string(StackCursor->ExtraData.id)},
+      }));
       break;
     case RecordTypes::EXIT:
     case RecordTypes::TAIL_EXIT:
@@ -313,43 +303,51 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
       // (And/Or in loop termination below)
       StackTrieNode *PreviousCursor = nullptr;
       do {
-        if (PreviousCursor != nullptr) {
-          OS << ",\n";
-        }
-        writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
-                               Symbolize, FuncIdHelper, EventTimestampUs,
-                               *StackCursor, "E");
+        TraceEvents.push_back(json::Object({
+            {"name", Symbolize
+                         ? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
+                         : llvm::to_string(StackCursor->FuncId)},
+            {"ph", "E"},
+            {"tid", llvm::to_string(R.TId)},
+            {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
+            {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
+            {"sf", llvm::to_string(StackCursor->ExtraData.id)},
+        }));
         PreviousCursor = StackCursor;
         StackCursor = StackCursor->Parent;
       } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
       break;
     }
   }
-  OS << "\n  ],\n"; // Close the Trace Events array.
-  OS << "  "
-     << "\"displayTimeUnit\": \"ns\",\n";
 
   // The stackFrames dictionary substantially reduces size of the output file by
   // avoiding repeating the entire call stack of function names for each entry.
-  OS << R"(  "stackFrames": {)";
-  int stack_frame_count = 0;
-  for (auto map_iter : StacksByStackId) {
-    if (stack_frame_count++ == 0)
-      OS << "\n";
-    else
-      OS << ",\n";
-    OS << "    ";
-    OS << llvm::formatv(
-        R"("{0}" : { "name" : "{1}")", map_iter.first,
-        (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
-                   : llvm::to_string(map_iter.second->FuncId)));
-    if (map_iter.second->Parent != nullptr)
-      OS << llvm::formatv(R"(, "parent": "{0}")",
-                          map_iter.second->Parent->ExtraData.id);
-    OS << " }";
+  json::Object StackFrames;
+  for (const auto &Stack : StacksByStackId) {
+    const auto &StackId = Stack.first;
+    const auto &StackFunctionNode = Stack.second;
+    json::Object::iterator It;
+    std::tie(It, std::ignore) = StackFrames.insert({
+        llvm::to_string(StackId),
+        json::Object{
+            {"name",
+             Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
+                       : llvm::to_string(StackFunctionNode->FuncId)}},
+    });
+
+    if (StackFunctionNode->Parent != nullptr)
+      It->second.getAsObject()->insert(
+          {"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
   }
-  OS << "\n  }\n"; // Close the stack frames map.
-  OS << "}\n";     // Close the JSON entry.
+
+  json::Object TraceJSON{
+      {"displayTimeUnit", "ns"},
+      {"traceEvents", std::move(TraceEvents)},
+      {"stackFrames", std::move(StackFrames)},
+  };
+
+  // Pretty-print the JSON using two spaces for indentations.
+  OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
 }
 
 namespace llvm {
diff --git a/contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp b/contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp
new file mode 100644
index 000000000000..389825605b62
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-fdr-dump.cpp
@@ -0,0 +1,119 @@
+//===- xray-fdr-dump.cpp: XRay FDR Trace Dump Tool ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the FDR trace dumping tool, using the libraries for handling FDR
+// mode traces specifically.
+//
+//===----------------------------------------------------------------------===//
+#include "xray-registry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/XRay/BlockIndexer.h"
+#include "llvm/XRay/BlockPrinter.h"
+#include "llvm/XRay/BlockVerifier.h"
+#include "llvm/XRay/FDRRecordConsumer.h"
+#include "llvm/XRay/FDRRecordProducer.h"
+#include "llvm/XRay/FDRRecords.h"
+#include "llvm/XRay/FileHeaderReader.h"
+#include "llvm/XRay/RecordPrinter.h"
+
+using namespace llvm;
+using namespace xray;
+
+static cl::SubCommand Dump("fdr-dump", "FDR Trace Dump");
+static cl::opt<std::string> DumpInput(cl::Positional,
+                                      cl::desc("<xray fdr mode log>"),
+                                      cl::Required, cl::sub(Dump));
+static cl::opt<bool> DumpVerify("verify",
+                                cl::desc("verify structure of the log"),
+                                cl::init(false), cl::sub(Dump));
+
+static CommandRegistration Unused(&Dump, []() -> Error {
+  // Open the file provided.
+  int Fd;
+  if (auto EC = sys::fs::openFileForRead(DumpInput, Fd))
+    return createStringError(EC, "Cannot open file '%s' for read.",
+                             DumpInput.c_str());
+
+  uint64_t FileSize;
+  if (auto EC = sys::fs::file_size(DumpInput, FileSize))
+    return createStringError(EC, "Failed to get file size for '%s'.",
+                             DumpInput.c_str());
+
+  std::error_code EC;
+  sys::fs::mapped_file_region MappedFile(
+      Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+
+  DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8);
+  uint32_t OffsetPtr = 0;
+
+  auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
+  if (!FileHeaderOrError)
+    return FileHeaderOrError.takeError();
+  auto &H = FileHeaderOrError.get();
+
+  FileBasedRecordProducer P(H, DE, OffsetPtr);
+
+  RecordPrinter RP(outs(), "\n");
+  if (!DumpVerify) {
+    PipelineConsumer C({&RP});
+    while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
+      auto R = P.produce();
+      if (!R)
+        return R.takeError();
+      if (auto E = C.consume(std::move(R.get())))
+        return E;
+    }
+    return Error::success();
+  }
+
+  BlockPrinter BP(outs(), RP);
+  std::vector<std::unique_ptr<Record>> Records;
+  LogBuilderConsumer C(Records);
+  while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
+    auto R = P.produce();
+    if (!R) {
+      // Print records we've found so far.
+      for (auto &Ptr : Records)
+        if (auto E = Ptr->apply(RP))
+          return joinErrors(std::move(E), R.takeError());
+      return R.takeError();
+    }
+    if (auto E = C.consume(std::move(R.get())))
+      return E;
+  }
+
+  // Once we have a trace, we then index the blocks.
+  BlockIndexer::Index Index;
+  BlockIndexer BI(Index);
+  for (auto &Ptr : Records)
+    if (auto E = Ptr->apply(BI))
+      return E;
+
+  if (auto E = BI.flush())
+    return E;
+
+  // Then we validate while printing each block.
+  BlockVerifier BV;
+  for (auto ProcessThreadBlocks : Index) {
+    auto &Blocks = ProcessThreadBlocks.second;
+    for (auto &B : Blocks) {
+      for (auto *R : B.Records) {
+        if (auto E = R->apply(BV))
+          return E;
+        if (auto E = R->apply(BP))
+          return E;
+      }
+      BV.reset();
+      BP.reset();
+    }
+  }
+  outs().flush();
+  return Error::success();
+});
diff --git a/contrib/llvm/tools/llvm-xray/xray-graph.cpp b/contrib/llvm/tools/llvm-xray/xray-graph.cpp
index c619bf86299b..fe49cca20d57 100644
--- a/contrib/llvm/tools/llvm-xray/xray-graph.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-graph.cpp
@@ -246,6 +246,10 @@ Error GraphRenderer::accountRecord(const XRayRecord &Record) {
     updateStat(G[Record.FuncId].S, D);
     break;
   }
+  case RecordTypes::CUSTOM_EVENT:
+  case RecordTypes::TYPED_EVENT:
+    // TODO: Support custom and typed events in the graph processing?
+    break;
   }
 
   return Error::success();
diff --git a/contrib/llvm/tools/llvm-xray/xray-stacks.cpp b/contrib/llvm/tools/llvm-xray/xray-stacks.cpp
index 1a6069780a31..d3af9e25e6f2 100644
--- a/contrib/llvm/tools/llvm-xray/xray-stacks.cpp
+++ b/contrib/llvm/tools/llvm-xray/xray-stacks.cpp
@@ -366,6 +366,9 @@ public:
                                     AccountRecordState *state) {
     auto &TS = ThreadStackMap[R.TId];
     switch (R.Type) {
+    case RecordTypes::CUSTOM_EVENT:
+    case RecordTypes::TYPED_EVENT:
+      return AccountRecordStatus::OK;
     case RecordTypes::ENTER:
     case RecordTypes::ENTER_ARG: {
       state->wasLastRecordExit = false;
@@ -734,7 +737,7 @@ static CommandRegistration Unused(&Stack, []() -> Error {
                 Twine("Failed loading input file '") + Filename + "'",
                 std::make_error_code(std::errc::invalid_argument)),
             TraceOrErr.takeError());
-      logAllUnhandledErrors(TraceOrErr.takeError(), errs(), "");
+      logAllUnhandledErrors(TraceOrErr.takeError(), errs());
       continue;
     }
     auto &T = *TraceOrErr;
diff --git a/contrib/llvm/tools/opt/Debugify.cpp b/contrib/llvm/tools/opt/Debugify.cpp
index 6c3cdc75e334..3b1effba1592 100644
--- a/contrib/llvm/tools/opt/Debugify.cpp
+++ b/contrib/llvm/tools/opt/Debugify.cpp
@@ -96,11 +96,12 @@ bool applyDebugifyMetadata(Module &M,
       continue;
 
     auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
-    bool IsLocalToUnit = F.hasPrivateLinkage() || F.hasInternalLinkage();
-    auto SP =
-        DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine, SPType,
-                           IsLocalToUnit, /*isDefinition=*/true, NextLine,
-                           DINode::FlagZero, /*isOptimized=*/true);
+    DISubprogram::DISPFlags SPFlags =
+        DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized;
+    if (F.hasPrivateLinkage() || F.hasInternalLinkage())
+      SPFlags |= DISubprogram::SPFlagLocalToUnit;
+    auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine,
+                                 SPType, NextLine, DINode::FlagZero, SPFlags);
     F.setSubprogram(SP);
     for (BasicBlock &BB : F) {
       // Attach debug locations.
diff --git a/contrib/llvm/tools/opt/NewPMDriver.cpp b/contrib/llvm/tools/opt/NewPMDriver.cpp
index a91d4cb5f9cd..211a3b151fe1 100644
--- a/contrib/llvm/tools/opt/NewPMDriver.cpp
+++ b/contrib/llvm/tools/opt/NewPMDriver.cpp
@@ -13,8 +13,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Debugify.h"
 #include "NewPMDriver.h"
+#include "Debugify.h"
 #include "PassPrinters.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -29,6 +29,7 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/Passes/PassBuilder.h"
 #include "llvm/Passes/PassPlugin.h"
+#include "llvm/Passes/StandardInstrumentations.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ToolOutputFile.h"
@@ -94,6 +95,12 @@ static cl::opt<std::string> PipelineStartEPPipeline(
     cl::desc("A textual description of the function pass pipeline inserted at "
              "the PipelineStart extension point into default pipelines"),
     cl::Hidden);
+static cl::opt<std::string> OptimizerLastEPPipeline(
+    "passes-ep-optimizer-last",
+    cl::desc("A textual description of the function pass pipeline inserted at "
+             "the OptimizerLast extension point into default pipelines"),
+    cl::Hidden);
+
 enum PGOKind { NoPGO, InstrGen, InstrUse, SampleUse };
 static cl::opt<PGOKind> PGOKindFlag(
     "pgo-kind", cl::init(NoPGO), cl::Hidden,
@@ -107,24 +114,30 @@ static cl::opt<PGOKind> PGOKindFlag(
                           "Use sampled profile to guide PGO.")));
 static cl::opt<std::string> ProfileFile(
     "profile-file", cl::desc("Path to the profile."), cl::Hidden);
+static cl::opt<std::string>
+    ProfileRemappingFile("profile-remapping-file",
+                         cl::desc("Path to the profile remapping file."),
+                         cl::Hidden);
 static cl::opt<bool> DebugInfoForProfiling(
     "new-pm-debug-info-for-profiling", cl::init(false), cl::Hidden,
     cl::desc("Emit special debug info to enable PGO profile generation."));
 /// @}}
 
 template <typename PassManagerT>
-bool tryParsePipelineText(PassBuilder &PB, StringRef PipelineText) {
-  if (PipelineText.empty())
+bool tryParsePipelineText(PassBuilder &PB,
+                          const cl::opt<std::string> &PipelineOpt) {
+  if (PipelineOpt.empty())
     return false;
 
   // Verify the pipeline is parseable:
   PassManagerT PM;
-  if (PB.parsePassPipeline(PM, PipelineText))
-    return true;
-
-  errs() << "Could not parse pipeline '" << PipelineText
-         << "'. I'm going to igore it.\n";
-  return false;
+  if (auto Err = PB.parsePassPipeline(PM, PipelineOpt)) {
+    errs() << "Could not parse -" << PipelineOpt.ArgStr
+           << " pipeline: " << toString(std::move(Err))
+           << "... I'm going to ignore it.\n";
+    return false;
+  }
+  return true;
 }
 
 /// If one of the EPPipeline command line options was given, register callbacks
@@ -132,50 +145,69 @@ bool tryParsePipelineText(PassBuilder &PB, StringRef PipelineText) {
 static void registerEPCallbacks(PassBuilder &PB, bool VerifyEachPass,
                                 bool DebugLogging) {
   if (tryParsePipelineText<FunctionPassManager>(PB, PeepholeEPPipeline))
-    PB.registerPeepholeEPCallback([&PB, VerifyEachPass, DebugLogging](
-        FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
-      PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass,
-                           DebugLogging);
-    });
+    PB.registerPeepholeEPCallback(
+        [&PB, VerifyEachPass, DebugLogging](
+            FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+          ExitOnError Err("Unable to parse PeepholeEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, PeepholeEPPipeline, VerifyEachPass,
+                                   DebugLogging));
+        });
   if (tryParsePipelineText<LoopPassManager>(PB,
                                             LateLoopOptimizationsEPPipeline))
     PB.registerLateLoopOptimizationsEPCallback(
         [&PB, VerifyEachPass, DebugLogging](
             LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
-          PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline,
-                               VerifyEachPass, DebugLogging);
+          ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline,
+                                   VerifyEachPass, DebugLogging));
         });
   if (tryParsePipelineText<LoopPassManager>(PB, LoopOptimizerEndEPPipeline))
-    PB.registerLoopOptimizerEndEPCallback([&PB, VerifyEachPass, DebugLogging](
-        LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
-      PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline, VerifyEachPass,
-                           DebugLogging);
-    });
+    PB.registerLoopOptimizerEndEPCallback(
+        [&PB, VerifyEachPass, DebugLogging](
+            LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+          ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline,
+                                   VerifyEachPass, DebugLogging));
+        });
   if (tryParsePipelineText<FunctionPassManager>(PB,
                                                 ScalarOptimizerLateEPPipeline))
     PB.registerScalarOptimizerLateEPCallback(
         [&PB, VerifyEachPass, DebugLogging](
             FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
-          PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline,
-                               VerifyEachPass, DebugLogging);
+          ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline,
+                                   VerifyEachPass, DebugLogging));
         });
   if (tryParsePipelineText<CGSCCPassManager>(PB, CGSCCOptimizerLateEPPipeline))
-    PB.registerCGSCCOptimizerLateEPCallback([&PB, VerifyEachPass, DebugLogging](
-        CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
-      PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline, VerifyEachPass,
-                           DebugLogging);
-    });
+    PB.registerCGSCCOptimizerLateEPCallback(
+        [&PB, VerifyEachPass, DebugLogging](
+            CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
+          ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline,
+                                   VerifyEachPass, DebugLogging));
+        });
   if (tryParsePipelineText<FunctionPassManager>(PB, VectorizerStartEPPipeline))
-    PB.registerVectorizerStartEPCallback([&PB, VerifyEachPass, DebugLogging](
-        FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
-      PB.parsePassPipeline(PM, VectorizerStartEPPipeline, VerifyEachPass,
-                           DebugLogging);
-    });
+    PB.registerVectorizerStartEPCallback(
+        [&PB, VerifyEachPass, DebugLogging](
+            FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+          ExitOnError Err("Unable to parse VectorizerStartEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline,
+                                   VerifyEachPass, DebugLogging));
+        });
   if (tryParsePipelineText<ModulePassManager>(PB, PipelineStartEPPipeline))
     PB.registerPipelineStartEPCallback(
         [&PB, VerifyEachPass, DebugLogging](ModulePassManager &PM) {
-          PB.parsePassPipeline(PM, PipelineStartEPPipeline, VerifyEachPass,
-                               DebugLogging);
+          ExitOnError Err("Unable to parse PipelineStartEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline, VerifyEachPass,
+                                   DebugLogging));
+        });
+  if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
+    PB.registerOptimizerLastEPCallback(
+        [&PB, VerifyEachPass, DebugLogging](FunctionPassManager &PM,
+                                            PassBuilder::OptimizationLevel) {
+          ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
+          Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline, VerifyEachPass,
+                                   DebugLogging));
         });
 }
 
@@ -199,21 +231,25 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
   Optional<PGOOptions> P;
   switch (PGOKindFlag) {
     case InstrGen:
-      P = PGOOptions(ProfileFile, "", "", true);
+      P = PGOOptions(ProfileFile, "", "", "", true);
       break;
     case InstrUse:
-      P = PGOOptions("", ProfileFile, "", false);
+      P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false);
       break;
     case SampleUse:
-      P = PGOOptions("", "", ProfileFile, false);
+      P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false);
       break;
     case NoPGO:
       if (DebugInfoForProfiling)
-        P = PGOOptions("", "", "", false, true);
+        P = PGOOptions("", "", "", "", false, true);
       else
         P = None;
   }
-  PassBuilder PB(TM, P);
+  PassInstrumentationCallbacks PIC;
+  StandardInstrumentations SI;
+  SI.registerCallbacks(PIC);
+
+  PassBuilder PB(TM, P, &PIC);
   registerEPCallbacks(PB, VerifyEachPass, DebugPM);
 
   // Load requested pass plugins and let them register pass builder callbacks
@@ -249,8 +285,8 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
   // Specially handle the alias analysis manager so that we can register
   // a custom pipeline of AA passes with it.
   AAManager AA;
-  if (!PB.parseAAPipeline(AA, AAPipeline)) {
-    errs() << Arg0 << ": unable to parse AA pipeline description.\n";
+  if (auto Err = PB.parseAAPipeline(AA, AAPipeline)) {
+    errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
     return false;
   }
 
@@ -275,8 +311,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
   if (EnableDebugify)
     MPM.addPass(NewPMDebugifyPass());
 
-  if (!PB.parsePassPipeline(MPM, PassPipeline, VerifyEachPass, DebugPM)) {
-    errs() << Arg0 << ": unable to parse pass pipeline description.\n";
+  if (auto Err =
+          PB.parsePassPipeline(MPM, PassPipeline, VerifyEachPass, DebugPM)) {
+    errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
     return false;
   }
 
diff --git a/contrib/llvm/tools/opt/opt.cpp b/contrib/llvm/tools/opt/opt.cpp
index 6e287b6c0ab6..a4967a234d9c 100644
--- a/contrib/llvm/tools/opt/opt.cpp
+++ b/contrib/llvm/tools/opt/opt.cpp
@@ -103,6 +103,10 @@ static cl::opt<bool>
     OutputThinLTOBC("thinlto-bc",
                     cl::desc("Write output as ThinLTO-ready bitcode"));
 
+static cl::opt<bool>
+    SplitLTOUnit("thinlto-split-lto-unit",
+                 cl::desc("Enable splitting of a ThinLTO LTOUnit"));
+
 static cl::opt<std::string> ThinLinkBitcodeFile(
     "thin-link-bitcode-file", cl::value_desc("filename"),
     cl::desc(
@@ -463,6 +467,7 @@ int main(int argc, char **argv) {
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeGlobalMergePass(Registry);
   initializeIndirectBrExpandPassPass(Registry);
+  initializeInterleavedLoadCombinePass(Registry);
   initializeInterleavedAccessPass(Registry);
   initializeEntryExitInstrumenterPass(Registry);
   initializePostInlineEntryExitInstrumenterPass(Registry);
@@ -595,6 +600,9 @@ int main(int argc, char **argv) {
     if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
       NoOutput = true;
 
+  if (OutputThinLTOBC)
+    M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit);
+
   if (PassPipeline.getNumOccurrences() > 0) {
     OutputKind OK = OK_NoOutput;
     if (!NoOutput)
diff --git a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index e808661b7a51..5b4229e64682 100644
--- a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -2415,10 +2415,9 @@ static void emitOperandMatchErrorDiagStrings(AsmMatcherInfo &Info, raw_ostream &
 static void emitRegisterMatchErrorFunc(AsmMatcherInfo &Info, raw_ostream &OS) {
   OS << "static unsigned getDiagKindFromRegisterClass(MatchClassKind "
         "RegisterClass) {\n";
-  if (std::none_of(Info.Classes.begin(), Info.Classes.end(),
-                   [](const ClassInfo &CI) {
-                     return CI.isRegisterClass() && !CI.DiagnosticType.empty();
-                   })) {
+  if (none_of(Info.Classes, [](const ClassInfo &CI) {
+        return CI.isRegisterClass() && !CI.DiagnosticType.empty();
+      })) {
     OS << "  return MCTargetAsmParser::Match_InvalidOperand;\n";
   } else {
     OS << "  switch (RegisterClass) {\n";
diff --git a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 3c4c9c8e5c6e..a8f191181766 100644
--- a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -835,15 +835,20 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
       for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
         // Skip over tied operands as they're not part of an alias declaration.
         auto &Operands = CGA.ResultInst->Operands;
-        unsigned OpNum = Operands.getSubOperandNumber(MIOpNum).first;
-        if (Operands[OpNum].MINumOperands == 1 &&
-            Operands[OpNum].getTiedRegister() != -1) {
-          // Tied operands of different RegisterClass should be explicit within
-          // an instruction's syntax and so cannot be skipped.
-          int TiedOpNum = Operands[OpNum].getTiedRegister();
-          if (Operands[OpNum].Rec->getName() ==
-              Operands[TiedOpNum].Rec->getName())
-            ++MIOpNum;
+        while (true) {
+          unsigned OpNum = Operands.getSubOperandNumber(MIOpNum).first;
+          if (Operands[OpNum].MINumOperands == 1 &&
+              Operands[OpNum].getTiedRegister() != -1) {
+            // Tied operands of different RegisterClass should be explicit within
+            // an instruction's syntax and so cannot be skipped.
+            int TiedOpNum = Operands[OpNum].getTiedRegister();
+            if (Operands[OpNum].Rec->getName() ==
+                Operands[TiedOpNum].Rec->getName()) {
+              ++MIOpNum;
+              continue;
+            }
+          }
+          break;
         }
 
         std::string Op = "MI->getOperand(" + utostr(MIOpNum) + ")";
diff --git a/contrib/llvm/utils/TableGen/CTagsEmitter.cpp b/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
index a0f83f1c9910..bd596bcb47a8 100644
--- a/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
@@ -73,7 +73,7 @@ void CTagsEmitter::run(raw_ostream &OS) {
   for (const auto &D : Defs)
     Tags.push_back(Tag(D.first, locate(D.second.get())));
   // Emit tags.
-  llvm::sort(Tags.begin(), Tags.end());
+  llvm::sort(Tags);
   OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n";
   OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n";
   for (const Tag &T : Tags)
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index cc2b9d788980..96c90c9cf6bd 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -26,6 +28,7 @@
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <cstdio>
+#include <iterator>
 #include <set>
 using namespace llvm;
 
@@ -99,22 +102,29 @@ bool TypeSetByHwMode::isPossible() const {
 
 bool TypeSetByHwMode::insert(const ValueTypeByHwMode &VVT) {
   bool Changed = false;
+  bool ContainsDefault = false;
+  MVT DT = MVT::Other;
+
   SmallDenseSet<unsigned, 4> Modes;
   for (const auto &P : VVT) {
     unsigned M = P.first;
     Modes.insert(M);
     // Make sure there exists a set for each specific mode from VVT.
     Changed |= getOrCreate(M).insert(P.second).second;
+    // Cache VVT's default mode.
+    if (DefaultMode == M) {
+      ContainsDefault = true;
+      DT = P.second;
+    }
   }
 
   // If VVT has a default mode, add the corresponding type to all
   // modes in "this" that do not exist in VVT.
-  if (Modes.count(DefaultMode)) {
-    MVT DT = VVT.getType(DefaultMode);
+  if (ContainsDefault)
     for (auto &I : *this)
       if (!Modes.count(I.first))
         Changed |= I.second.insert(DT).second;
-  }
+
   return Changed;
 }
 
@@ -198,16 +208,18 @@ void TypeSetByHwMode::writeToStream(const SetType &S, raw_ostream &OS) {
 }
 
 bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
-  bool HaveDefault = hasDefault();
-  if (HaveDefault != VTS.hasDefault())
+  // The isSimple call is much quicker than hasDefault - check this first.
+  bool IsSimple = isSimple();
+  bool VTSIsSimple = VTS.isSimple();
+  if (IsSimple && VTSIsSimple)
+    return *begin() == *VTS.begin();
+
+  // Speedup: We have a default if the set is simple.
+  bool HaveDefault = IsSimple || hasDefault();
+  bool VTSHaveDefault = VTSIsSimple || VTS.hasDefault();
+  if (HaveDefault != VTSHaveDefault)
     return false;
 
-  if (isSimple()) {
-    if (VTS.isSimple())
-      return *begin() == *VTS.begin();
-    return false;
-  }
-
   SmallDenseSet<unsigned, 4> Modes;
   for (auto &I : *this)
     Modes.insert(I.first);
@@ -731,17 +743,12 @@ bool TypeInfer::EnforceSameSize(TypeSetByHwMode &A, TypeSetByHwMode &B) {
 
 void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) {
   ValidateOnExit _1(VTS, *this);
-  TypeSetByHwMode Legal = getLegalTypes();
-  bool HaveLegalDef = Legal.hasDefault();
+  const TypeSetByHwMode &Legal = getLegalTypes();
+  assert(Legal.isDefaultOnly() && "Default-mode only expected");
+  const TypeSetByHwMode::SetType &LegalTypes = Legal.get(DefaultMode);
 
-  for (auto &I : VTS) {
-    unsigned M = I.first;
-    if (!Legal.hasMode(M) && !HaveLegalDef) {
-      TP.error("Invalid mode " + Twine(M));
-      return;
-    }
-    expandOverloads(I.second, Legal.get(M));
-  }
+  for (auto &I : VTS)
+    expandOverloads(I.second, LegalTypes);
 }
 
 void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
@@ -793,17 +800,17 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
   }
 }
 
-TypeSetByHwMode TypeInfer::getLegalTypes() {
+const TypeSetByHwMode &TypeInfer::getLegalTypes() {
   if (!LegalTypesCached) {
+    TypeSetByHwMode::SetType &LegalTypes = LegalCache.getOrCreate(DefaultMode);
     // Stuff all types from all modes into the default mode.
     const TypeSetByHwMode &LTS = TP.getDAGPatterns().getLegalTypes();
     for (const auto &I : LTS)
-      LegalCache.insert(I.second);
+      LegalTypes.insert(I.second);
     LegalTypesCached = true;
   }
-  TypeSetByHwMode VTS;
-  VTS.getOrCreate(DefaultMode) = LegalCache;
-  return VTS;
+  assert(LegalCache.isDefaultOnly() && "Default-mode only expected");
+  return LegalCache;
 }
 
 #ifndef NDEBUG
@@ -819,6 +826,20 @@ TypeInfer::ValidateOnExit::~ValidateOnExit() {
 }
 #endif
 
+
+//===----------------------------------------------------------------------===//
+// ScopedName Implementation
+//===----------------------------------------------------------------------===//
+
+bool ScopedName::operator==(const ScopedName &o) const {
+  return Scope == o.Scope && Identifier == o.Identifier;
+}
+
+bool ScopedName::operator!=(const ScopedName &o) const {
+  return !(*this == o);
+}
+
+
 //===----------------------------------------------------------------------===//
 // TreePredicateFn Implementation
 //===----------------------------------------------------------------------===//
@@ -1064,6 +1085,9 @@ bool TreePredicateFn::isPredefinedPredicateEqualTo(StringRef Field,
     return false;
   return Result == Value;
 }
+bool TreePredicateFn::usesOperands() const {
+  return isPredefinedPredicateEqualTo("PredicateCodeUsesOperands", true);
+}
 bool TreePredicateFn::isLoad() const {
   return isPredefinedPredicateEqualTo("IsLoad", true);
 }
@@ -1245,7 +1269,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
   else
     Result = "    auto *N = cast<" + ClassName.str() + ">(Node);\n";
 
-  return Result + getPredCode();
+  return (Twine(Result) + "    (void)N;\n" + getPredCode()).str();
 }
 
 //===----------------------------------------------------------------------===//
@@ -1271,14 +1295,14 @@ static unsigned getPatternSize(const TreePatternNode *P,
 
   // If this node has some predicate function that must match, it adds to the
   // complexity of this node.
-  if (!P->getPredicateFns().empty())
+  if (!P->getPredicateCalls().empty())
     ++Size;
 
   // Count children in the count if they are also nodes.
   for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
     const TreePatternNode *Child = P->getChild(i);
     if (!Child->isLeaf() && Child->getNumTypes()) {
-      const TypeSetByHwMode &T0 = Child->getType(0);
+      const TypeSetByHwMode &T0 = Child->getExtType(0);
       // At this point, all variable type sets should be simple, i.e. only
       // have a default mode.
       if (T0.getMachineValueType() != MVT::Other) {
@@ -1291,7 +1315,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
         Size += 5;  // Matches a ConstantSDNode (+3) and a specific value (+2).
       else if (Child->getComplexPatternInfo(CGP))
         Size += getPatternSize(Child, CGP);
-      else if (!Child->getPredicateFns().empty())
+      else if (!Child->getPredicateCalls().empty())
         ++Size;
     }
   }
@@ -1313,7 +1337,7 @@ std::string PatternToMatch::getPredicateCheck() const {
   SmallVector<const Predicate*,4> PredList;
   for (const Predicate &P : Predicates)
     PredList.push_back(&P);
-  llvm::sort(PredList.begin(), PredList.end(), deref<llvm::less>());
+  llvm::sort(PredList, deref<llvm::less>());
 
   std::string Check;
   for (unsigned i = 0, e = PredList.size(); i != e; ++i) {
@@ -1746,13 +1770,19 @@ void TreePatternNode::print(raw_ostream &OS) const {
     OS << ")";
   }
 
-  for (const TreePredicateFn &Pred : PredicateFns)
-    OS << "<<P:" << Pred.getFnName() << ">>";
+  for (const TreePredicateCall &Pred : PredicateCalls) {
+    OS << "<<P:";
+    if (Pred.Scope)
+      OS << Pred.Scope << ":";
+    OS << Pred.Fn.getFnName() << ">>";
+  }
   if (TransformFn)
     OS << "<<X:" << TransformFn->getName() << ">>";
   if (!getName().empty())
     OS << ":$" << getName();
 
+  for (const ScopedName &Name : NamesAsPredicateArg)
+    OS << ":$pred:" << Name.getScope() << ":" << Name.getIdentifier();
 }
 void TreePatternNode::dump() const {
   print(errs());
@@ -1769,7 +1799,7 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
                                      const MultipleUseVarSet &DepVars) const {
   if (N == this) return true;
   if (N->isLeaf() != isLeaf() || getExtTypes() != N->getExtTypes() ||
-      getPredicateFns() != N->getPredicateFns() ||
+      getPredicateCalls() != N->getPredicateCalls() ||
       getTransformFn() != N->getTransformFn())
     return false;
 
@@ -1807,8 +1837,9 @@ TreePatternNodePtr TreePatternNode::clone() const {
                                             getNumTypes());
   }
   New->setName(getName());
+  New->setNamesAsPredicateArg(getNamesAsPredicateArg());
   New->Types = Types;
-  New->setPredicateFns(getPredicateFns());
+  New->setPredicateCalls(getPredicateCalls());
   New->setTransformFn(getTransformFn());
   return New;
 }
@@ -1840,8 +1871,8 @@ void TreePatternNode::SubstituteFormalArguments(
         // We found a use of a formal argument, replace it with its value.
         TreePatternNodePtr NewChild = ArgMap[Child->getName()];
         assert(NewChild && "Couldn't find formal argument!");
-        assert((Child->getPredicateFns().empty() ||
-                NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+        assert((Child->getPredicateCalls().empty() ||
+                NewChild->getPredicateCalls() == Child->getPredicateCalls()) &&
                "Non-empty child predicate clobbered!");
         setChild(i, std::move(NewChild));
       }
@@ -1887,8 +1918,8 @@ void TreePatternNode::InlinePatternFragments(
         return;
 
       for (auto NewChild : ChildAlternatives[i])
-        assert((Child->getPredicateFns().empty() ||
-                NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+        assert((Child->getPredicateCalls().empty() ||
+                NewChild->getPredicateCalls() == Child->getPredicateCalls()) &&
                "Non-empty child predicate clobbered!");
     }
 
@@ -1906,10 +1937,13 @@ void TreePatternNode::InlinePatternFragments(
 
       // Copy over properties.
       R->setName(getName());
-      R->setPredicateFns(getPredicateFns());
+      R->setNamesAsPredicateArg(getNamesAsPredicateArg());
+      R->setPredicateCalls(getPredicateCalls());
       R->setTransformFn(getTransformFn());
       for (unsigned i = 0, e = getNumTypes(); i != e; ++i)
         R->setType(i, getExtType(i));
+      for (unsigned i = 0, e = getNumResults(); i != e; ++i)
+        R->setResultIndex(i, getResultIndex(i));
 
       // Register alternative.
       OutAlternatives.push_back(R);
@@ -1941,10 +1975,19 @@ void TreePatternNode::InlinePatternFragments(
     return;
   }
 
+  TreePredicateFn PredFn(Frag);
+  unsigned Scope = 0;
+  if (TreePredicateFn(Frag).usesOperands())
+    Scope = TP.getDAGPatterns().allocateScope();
+
   // Compute the map of formal to actual arguments.
   std::map<std::string, TreePatternNodePtr> ArgMap;
   for (unsigned i = 0, e = Frag->getNumArgs(); i != e; ++i) {
-    const TreePatternNodePtr &Child = getChildShared(i);
+    TreePatternNodePtr Child = getChildShared(i);
+    if (Scope != 0) {
+      Child = Child->clone();
+      Child->addNameAsPredicateArg(ScopedName(Scope, Frag->getArgName(i)));
+    }
     ArgMap[Frag->getArgName(i)] = Child;
   }
 
@@ -1952,9 +1995,8 @@ void TreePatternNode::InlinePatternFragments(
   for (auto Alternative : Frag->getTrees()) {
     TreePatternNodePtr FragTree = Alternative->clone();
 
-    TreePredicateFn PredFn(Frag);
     if (!PredFn.isAlwaysTrue())
-      FragTree->addPredicateFn(PredFn);
+      FragTree->addPredicateCall(PredFn, Scope);
 
     // Resolve formal arguments to their actual value.
     if (Frag->getNumArgs())
@@ -1967,8 +2009,8 @@ void TreePatternNode::InlinePatternFragments(
       FragTree->UpdateNodeType(i, getExtType(i), TP);
 
     // Transfer in the old predicates.
-    for (const TreePredicateFn &Pred : getPredicateFns())
-      FragTree->addPredicateFn(Pred);
+    for (const TreePredicateCall &Pred : getPredicateCalls())
+      FragTree->addPredicateCall(Pred);
 
     // The fragment we inlined could have recursive inlining that is needed.  See
     // if there are any pattern fragments in it and inline them as needed.
@@ -3032,13 +3074,6 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
       P->error("Operands list does not contain an entry for operand '" +
                *OperandsSet.begin() + "'!");
 
-    // If there is a code init for this fragment, keep track of the fact that
-    // this fragment uses it.
-    TreePredicateFn PredFn(P);
-    if (!PredFn.isAlwaysTrue())
-      for (auto T : P->getTrees())
-        T->addPredicateFn(PredFn);
-
     // If there is a node transformation corresponding to this, keep track of
     // it.
     Record *Transform = Frag->getValueAsDef("OperandTransform");
@@ -3176,7 +3211,8 @@ static bool HandleUse(TreePattern &I, TreePatternNodePtr Pat,
 void CodeGenDAGPatterns::FindPatternInputsAndOutputs(
     TreePattern &I, TreePatternNodePtr Pat,
     std::map<std::string, TreePatternNodePtr> &InstInputs,
-    std::map<std::string, TreePatternNodePtr> &InstResults,
+    MapVector<std::string, TreePatternNodePtr, std::map<std::string, unsigned>>
+        &InstResults,
     std::vector<Record *> &InstImpResults) {
 
   // The instruction pattern still has unresolved fragments.  For *named*
@@ -3496,7 +3532,8 @@ void CodeGenDAGPatterns::parseInstructionPattern(
 
   // InstResults - Keep track of all the virtual registers that are 'set'
   // in the instruction, including what reg class they are.
-  std::map<std::string, TreePatternNodePtr> InstResults;
+  MapVector<std::string, TreePatternNodePtr, std::map<std::string, unsigned>>
+      InstResults;
 
   std::vector<Record*> InstImpResults;
 
@@ -3533,19 +3570,28 @@ void CodeGenDAGPatterns::parseInstructionPattern(
 
   // Check that all of the results occur first in the list.
   std::vector<Record*> Results;
+  std::vector<unsigned> ResultIndices;
   SmallVector<TreePatternNodePtr, 2> ResNodes;
   for (unsigned i = 0; i != NumResults; ++i) {
-    if (i == CGI.Operands.size())
-      I.error("'" + InstResults.begin()->first +
-               "' set but does not appear in operand list!");
+    if (i == CGI.Operands.size()) {
+      const std::string &OpName =
+          std::find_if(InstResults.begin(), InstResults.end(),
+                       [](const std::pair<std::string, TreePatternNodePtr> &P) {
+                         return P.second;
+                       })
+              ->first;
+
+      I.error("'" + OpName + "' set but does not appear in operand list!");
+    }
+
     const std::string &OpName = CGI.Operands[i].Name;
 
     // Check that it exists in InstResults.
-    TreePatternNodePtr RNode = InstResults[OpName];
-    if (!RNode)
+    auto InstResultIter = InstResults.find(OpName);
+    if (InstResultIter == InstResults.end() || !InstResultIter->second)
       I.error("Operand $" + OpName + " does not exist in operand list!");
 
-
+    TreePatternNodePtr RNode = InstResultIter->second;
     Record *R = cast<DefInit>(RNode->getLeafValue())->getDef();
     ResNodes.push_back(std::move(RNode));
     if (!R)
@@ -3558,8 +3604,11 @@ void CodeGenDAGPatterns::parseInstructionPattern(
     // Remember the return type.
     Results.push_back(CGI.Operands[i].Rec);
 
+    // Remember the result index.
+    ResultIndices.push_back(std::distance(InstResults.begin(), InstResultIter));
+
     // Okay, this one checks out.
-    InstResults.erase(OpName);
+    InstResultIter->second = nullptr;
   }
 
   // Loop over the inputs next.
@@ -3598,7 +3647,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
     TreePatternNodePtr OpNode = InVal->clone();
 
     // No predicate is useful on the result.
-    OpNode->clearPredicateFns();
+    OpNode->clearPredicateCalls();
 
     // Promote the xform function to be an explicit node if set.
     if (Record *Xform = OpNode->getTransformFn()) {
@@ -3623,6 +3672,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
   for (unsigned i = 0; i != NumResults; ++i) {
     assert(ResNodes[i]->getNumTypes() == 1 && "FIXME: Unhandled");
     ResultPattern->setType(i, ResNodes[i]->getExtType(0));
+    ResultPattern->setResultIndex(i, ResultIndices[i]);
   }
 
   // FIXME: Assume only the first tree is the pattern. The others are clobber
@@ -3737,7 +3787,7 @@ std::vector<Predicate> CodeGenDAGPatterns::makePredList(ListInit *L) {
   }
 
   // Sort so that different orders get canonicalized to the same string.
-  llvm::sort(Preds.begin(), Preds.end());
+  llvm::sort(Preds);
   return Preds;
 }
 
@@ -4082,7 +4132,8 @@ void CodeGenDAGPatterns::ParsePatterns() {
 
     // Validate that the input pattern is correct.
     std::map<std::string, TreePatternNodePtr> InstInputs;
-    std::map<std::string, TreePatternNodePtr> InstResults;
+    MapVector<std::string, TreePatternNodePtr, std::map<std::string, unsigned>>
+        InstResults;
     std::vector<Record*> InstImpResults;
     for (unsigned j = 0, ee = Pattern.getNumTrees(); j != ee; ++j)
       FindPatternInputsAndOutputs(Pattern, Pattern.getTree(j), InstInputs,
@@ -4253,7 +4304,8 @@ static void CombineChildVariants(
 
     // Copy over properties.
     R->setName(Orig->getName());
-    R->setPredicateFns(Orig->getPredicateFns());
+    R->setNamesAsPredicateArg(Orig->getNamesAsPredicateArg());
+    R->setPredicateCalls(Orig->getPredicateCalls());
     R->setTransformFn(Orig->getTransformFn());
     for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
       R->setType(i, Orig->getExtType(i));
@@ -4305,7 +4357,7 @@ GatherChildrenOfAssociativeOpcode(TreePatternNodePtr N,
   Record *Operator = N->getOperator();
 
   // Only permit raw nodes.
-  if (!N->getName().empty() || !N->getPredicateFns().empty() ||
+  if (!N->getName().empty() || !N->getPredicateCalls().empty() ||
       N->getTransformFn()) {
     Children.push_back(N);
     return;
@@ -4456,8 +4508,18 @@ void CodeGenDAGPatterns::GenerateVariants() {
   // intentionally do not reconsider these.  Any variants of added patterns have
   // already been added.
   //
-  for (unsigned i = 0, e = PatternsToMatch.size(); i != e; ++i) {
-    MultipleUseVarSet             DepVars;
+  const unsigned NumOriginalPatterns = PatternsToMatch.size();
+  BitVector MatchedPatterns(NumOriginalPatterns);
+  std::vector<BitVector> MatchedPredicates(NumOriginalPatterns,
+                                           BitVector(NumOriginalPatterns));
+
+  typedef std::pair<MultipleUseVarSet, std::vector<TreePatternNodePtr>>
+      DepsAndVariants;
+  std::map<unsigned, DepsAndVariants> PatternsWithVariants;
+
+  // Collect patterns with more than one variant.
+  for (unsigned i = 0; i != NumOriginalPatterns; ++i) {
+    MultipleUseVarSet DepVars;
     std::vector<TreePatternNodePtr> Variants;
     FindDepVars(PatternsToMatch[i].getSrcPattern(), DepVars);
     LLVM_DEBUG(errs() << "Dependent/multiply used variables: ");
@@ -4467,14 +4529,46 @@ void CodeGenDAGPatterns::GenerateVariants() {
                        *this, DepVars);
 
     assert(!Variants.empty() && "Must create at least original variant!");
-    if (Variants.size() == 1)  // No additional variants for this pattern.
+    if (Variants.size() == 1) // No additional variants for this pattern.
       continue;
 
     LLVM_DEBUG(errs() << "FOUND VARIANTS OF: ";
                PatternsToMatch[i].getSrcPattern()->dump(); errs() << "\n");
 
+    PatternsWithVariants[i] = std::make_pair(DepVars, Variants);
+
+    // Cache matching predicates.
+    if (MatchedPatterns[i])
+      continue;
+
+    const std::vector<Predicate> &Predicates =
+        PatternsToMatch[i].getPredicates();
+
+    BitVector &Matches = MatchedPredicates[i];
+    MatchedPatterns.set(i);
+    Matches.set(i);
+
+    // Don't test patterns that have already been cached - it won't match.
+    for (unsigned p = 0; p != NumOriginalPatterns; ++p)
+      if (!MatchedPatterns[p])
+        Matches[p] = (Predicates == PatternsToMatch[p].getPredicates());
+
+    // Copy this to all the matching patterns.
+    for (int p = Matches.find_first(); p != -1; p = Matches.find_next(p))
+      if (p != (int)i) {
+        MatchedPatterns.set(p);
+        MatchedPredicates[p] = Matches;
+      }
+  }
+
+  for (auto it : PatternsWithVariants) {
+    unsigned i = it.first;
+    const MultipleUseVarSet &DepVars = it.second.first;
+    const std::vector<TreePatternNodePtr> &Variants = it.second.second;
+
     for (unsigned v = 0, e = Variants.size(); v != e; ++v) {
       TreePatternNodePtr Variant = Variants[v];
+      BitVector &Matches = MatchedPredicates[i];
 
       LLVM_DEBUG(errs() << "  VAR#" << v << ": "; Variant->dump();
                  errs() << "\n");
@@ -4483,8 +4577,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
       bool AlreadyExists = false;
       for (unsigned p = 0, e = PatternsToMatch.size(); p != e; ++p) {
         // Skip if the top level predicates do not match.
-        if (PatternsToMatch[i].getPredicates() !=
-            PatternsToMatch[p].getPredicates())
+        if (!Matches[p])
           continue;
         // Check to see if this variant already exists.
         if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(),
@@ -4503,6 +4596,11 @@ void CodeGenDAGPatterns::GenerateVariants() {
           Variant, PatternsToMatch[i].getDstPatternShared(),
           PatternsToMatch[i].getDstRegs(),
           PatternsToMatch[i].getAddedComplexity(), Record::getNewUID()));
+      MatchedPredicates.push_back(Matches);
+
+      // Add a new match the same as this pattern.
+      for (auto &P : MatchedPredicates)
+        P.push_back(P[i]);
     }
 
     LLVM_DEBUG(errs() << "\n");
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 9be3816cc7fc..4be9afdcacd2 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -19,6 +19,7 @@
 #include "CodeGenIntrinsics.h"
 #include "CodeGenTarget.h"
 #include "SDNodeProperties.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
@@ -28,6 +29,7 @@
 #include <array>
 #include <functional>
 #include <map>
+#include <numeric>
 #include <set>
 #include <vector>
 
@@ -350,11 +352,11 @@ struct TypeInfer {
   bool Validate = true;   // Indicate whether to validate types.
 
 private:
-  TypeSetByHwMode getLegalTypes();
+  const TypeSetByHwMode &getLegalTypes();
 
-  /// Cached legal types.
+  /// Cached legal types (in default mode).
   bool LegalTypesCached = false;
-  TypeSetByHwMode::SetType LegalCache = {};
+  TypeSetByHwMode LegalCache;
 };
 
 /// Set type used to track multiply used variables in patterns
@@ -408,6 +410,29 @@ struct SDTypeConstraint {
                            TreePattern &TP) const;
 };
 
+/// ScopedName - A name of a node associated with a "scope" that indicates
+/// the context (e.g. instance of Pattern or PatFrag) in which the name was
+/// used. This enables substitution of pattern fragments while keeping track
+/// of what name(s) were originally given to various nodes in the tree.
+class ScopedName {
+  unsigned Scope;
+  std::string Identifier;
+public:
+  ScopedName(unsigned Scope, StringRef Identifier)
+    : Scope(Scope), Identifier(Identifier) {
+    assert(Scope != 0 &&
+           "Scope == 0 is used to indicate predicates without arguments");
+  }
+
+  unsigned getScope() const { return Scope; }
+  const std::string &getIdentifier() const { return Identifier; }
+
+  std::string getFullName() const;
+
+  bool operator==(const ScopedName &o) const;
+  bool operator!=(const ScopedName &o) const;
+};
+
 /// SDNodeInfo - One of these records is created for each SDNode instance in
 /// the target .td file.  This represents the various dag nodes we will be
 /// processing.
@@ -503,6 +528,9 @@ public:
   /// usable as part of an identifier.
   StringRef getImmTypeIdentifier() const;
 
+  // Predicate code uses the PatFrag's captured operands.
+  bool usesOperands() const;
+
   // Is the desired predefined predicate for a load?
   bool isLoad() const;
   // Is the desired predefined predicate for a store?
@@ -570,6 +598,23 @@ private:
   bool isPredefinedPredicateEqualTo(StringRef Field, bool Value) const;
 };
 
+struct TreePredicateCall {
+  TreePredicateFn Fn;
+
+  // Scope -- unique identifier for retrieving named arguments. 0 is used when
+  // the predicate does not use named arguments.
+  unsigned Scope;
+
+  TreePredicateCall(const TreePredicateFn &Fn, unsigned Scope)
+    : Fn(Fn), Scope(Scope) {}
+
+  bool operator==(const TreePredicateCall &o) const {
+    return Fn == o.Fn && Scope == o.Scope;
+  }
+  bool operator!=(const TreePredicateCall &o) const {
+    return !(*this == o);
+  }
+};
 
 class TreePatternNode {
   /// The type of each node result.  Before and during type inference, each
@@ -577,6 +622,9 @@ class TreePatternNode {
   /// each is a single concrete type.
   std::vector<TypeSetByHwMode> Types;
 
+  /// The index of each result in results of the pattern.
+  std::vector<unsigned> ResultPerm;
+
   /// Operator - The Record for the operator if this is an interior node (not
   /// a leaf).
   Record *Operator;
@@ -589,9 +637,11 @@ class TreePatternNode {
   ///
   std::string Name;
 
-  /// PredicateFns - The predicate functions to execute on this node to check
+  std::vector<ScopedName> NamesAsPredicateArg;
+
+  /// PredicateCalls - The predicate functions to execute on this node to check
   /// for a match.  If this list is empty, no predicate is involved.
-  std::vector<TreePredicateFn> PredicateFns;
+  std::vector<TreePredicateCall> PredicateCalls;
 
   /// TransformFn - The transformation function to execute on this node before
   /// it can be substituted into the resulting instruction on a pattern match.
@@ -605,16 +655,30 @@ public:
       : Operator(Op), Val(nullptr), TransformFn(nullptr),
         Children(std::move(Ch)) {
     Types.resize(NumResults);
+    ResultPerm.resize(NumResults);
+    std::iota(ResultPerm.begin(), ResultPerm.end(), 0);
   }
   TreePatternNode(Init *val, unsigned NumResults)    // leaf ctor
     : Operator(nullptr), Val(val), TransformFn(nullptr) {
     Types.resize(NumResults);
+    ResultPerm.resize(NumResults);
+    std::iota(ResultPerm.begin(), ResultPerm.end(), 0);
   }
 
   bool hasName() const { return !Name.empty(); }
   const std::string &getName() const { return Name; }
   void setName(StringRef N) { Name.assign(N.begin(), N.end()); }
 
+  const std::vector<ScopedName> &getNamesAsPredicateArg() const {
+    return NamesAsPredicateArg;
+  }
+  void setNamesAsPredicateArg(const std::vector<ScopedName>& Names) {
+    NamesAsPredicateArg = Names;
+  }
+  void addNameAsPredicateArg(const ScopedName &N) {
+    NamesAsPredicateArg.push_back(N);
+  }
+
   bool isLeaf() const { return Val != nullptr; }
 
   // Type accessors.
@@ -639,6 +703,10 @@ public:
     return Types[ResNo].empty();
   }
 
+  unsigned getNumResults() const { return ResultPerm.size(); }
+  unsigned getResultIndex(unsigned ResNo) const { return ResultPerm[ResNo]; }
+  void setResultIndex(unsigned ResNo, unsigned RI) { ResultPerm[ResNo] = RI; }
+
   Init *getLeafValue() const { assert(isLeaf()); return Val; }
   Record *getOperator() const { assert(!isLeaf()); return Operator; }
 
@@ -661,20 +729,24 @@ public:
   bool hasPossibleType() const;
   bool setDefaultMode(unsigned Mode);
 
-  bool hasAnyPredicate() const { return !PredicateFns.empty(); }
+  bool hasAnyPredicate() const { return !PredicateCalls.empty(); }
 
-  const std::vector<TreePredicateFn> &getPredicateFns() const {
-    return PredicateFns;
+  const std::vector<TreePredicateCall> &getPredicateCalls() const {
+    return PredicateCalls;
   }
-  void clearPredicateFns() { PredicateFns.clear(); }
-  void setPredicateFns(const std::vector<TreePredicateFn> &Fns) {
-    assert(PredicateFns.empty() && "Overwriting non-empty predicate list!");
-    PredicateFns = Fns;
+  void clearPredicateCalls() { PredicateCalls.clear(); }
+  void setPredicateCalls(const std::vector<TreePredicateCall> &Calls) {
+    assert(PredicateCalls.empty() && "Overwriting non-empty predicate list!");
+    PredicateCalls = Calls;
   }
-  void addPredicateFn(const TreePredicateFn &Fn) {
-    assert(!Fn.isAlwaysTrue() && "Empty predicate string!");
-    if (!is_contained(PredicateFns, Fn))
-      PredicateFns.push_back(Fn);
+  void addPredicateCall(const TreePredicateCall &Call) {
+    assert(!Call.Fn.isAlwaysTrue() && "Empty predicate string!");
+    assert(!is_contained(PredicateCalls, Call) && "predicate applied recursively");
+    PredicateCalls.push_back(Call);
+  }
+  void addPredicateCall(const TreePredicateFn &Fn, unsigned Scope) {
+    assert((Scope != 0) == Fn.usesOperands());
+    addPredicateCall(TreePredicateCall(Fn, Scope));
   }
 
   Record *getTransformFn() const { return TransformFn; }
@@ -1081,6 +1153,8 @@ class CodeGenDAGPatterns {
   using PatternRewriterFn = std::function<void (TreePattern *)>;
   PatternRewriterFn PatternRewriter;
 
+  unsigned NumScopes = 0;
+
 public:
   CodeGenDAGPatterns(RecordKeeper &R,
                      PatternRewriterFn PatternRewriter = nullptr);
@@ -1196,6 +1270,8 @@ public:
 
   bool hasTargetIntrinsics() { return !TgtIntrinsics.empty(); }
 
+  unsigned allocateScope() { return ++NumScopes; }
+
 private:
   void ParseNodeInfo();
   void ParseNodeTransforms();
@@ -1218,7 +1294,8 @@ private:
   void FindPatternInputsAndOutputs(
       TreePattern &I, TreePatternNodePtr Pat,
       std::map<std::string, TreePatternNodePtr> &InstInputs,
-      std::map<std::string, TreePatternNodePtr> &InstResults,
+      MapVector<std::string, TreePatternNodePtr,
+                std::map<std::string, unsigned>> &InstResults,
       std::vector<Record *> &InstImpResults);
 };
 
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
index eb35020d3d3a..6d06ba2c8b67 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -202,7 +202,8 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   return std::make_pair(0U, 0U);
 }
 
-static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
+static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops,
+                            Record *Rec) {
   // EARLY_CLOBBER: @early $reg
   std::string::size_type wpos = CStr.find_first_of(" \t");
   std::string::size_type start = CStr.find_first_not_of(" \t");
@@ -211,13 +212,17 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
     std::string Name = CStr.substr(wpos+1);
     wpos = Name.find_first_not_of(" \t");
     if (wpos == std::string::npos)
-      PrintFatalError("Illegal format for @earlyclobber constraint: '" + CStr + "'");
+      PrintFatalError(
+        Rec->getLoc(), "Illegal format for @earlyclobber constraint in '" +
+        Rec->getName() + "': '" + CStr + "'");
     Name = Name.substr(wpos);
     std::pair<unsigned,unsigned> Op = Ops.ParseOperandName(Name, false);
 
     // Build the string for the operand
     if (!Ops[Op.first].Constraints[Op.second].isNone())
-      PrintFatalError("Operand '" + Name + "' cannot have multiple constraints!");
+      PrintFatalError(
+        Rec->getLoc(), "Operand '" + Name + "' of '" + Rec->getName() +
+        "' cannot have multiple constraints!");
     Ops[Op.first].Constraints[Op.second] =
     CGIOperandList::ConstraintInfo::getEarlyClobber();
     return;
@@ -225,39 +230,73 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
 
   // Only other constraint is "TIED_TO" for now.
   std::string::size_type pos = CStr.find_first_of('=');
-  assert(pos != std::string::npos && "Unrecognized constraint");
+  if (pos == std::string::npos)
+    PrintFatalError(
+      Rec->getLoc(), "Unrecognized constraint '" + CStr +
+      "' in '" + Rec->getName() + "'");
   start = CStr.find_first_not_of(" \t");
-  std::string Name = CStr.substr(start, pos - start);
 
   // TIED_TO: $src1 = $dst
-  wpos = Name.find_first_of(" \t");
+  wpos = CStr.find_first_of(" \t", start);
+  if (wpos == std::string::npos || wpos > pos)
+    PrintFatalError(
+      Rec->getLoc(), "Illegal format for tied-to constraint in '" +
+      Rec->getName() + "': '" + CStr + "'");
+  std::string LHSOpName = StringRef(CStr).substr(start, wpos - start);
+  std::pair<unsigned,unsigned> LHSOp = Ops.ParseOperandName(LHSOpName, false);
+
+  wpos = CStr.find_first_not_of(" \t", pos + 1);
   if (wpos == std::string::npos)
-    PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'");
-  std::string DestOpName = Name.substr(0, wpos);
-  std::pair<unsigned,unsigned> DestOp = Ops.ParseOperandName(DestOpName, false);
-
-  Name = CStr.substr(pos+1);
-  wpos = Name.find_first_not_of(" \t");
-  if (wpos == std::string::npos)
-    PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'");
+    PrintFatalError(
+      Rec->getLoc(), "Illegal format for tied-to constraint: '" + CStr + "'");
+
+  std::string RHSOpName = StringRef(CStr).substr(wpos);
+  std::pair<unsigned,unsigned> RHSOp = Ops.ParseOperandName(RHSOpName, false);
+
+  // Sort the operands into order, which should put the output one
+  // first. But keep the original order, for use in diagnostics.
+  bool FirstIsDest = (LHSOp < RHSOp);
+  std::pair<unsigned,unsigned> DestOp = (FirstIsDest ? LHSOp : RHSOp);
+  StringRef DestOpName = (FirstIsDest ? LHSOpName : RHSOpName);
+  std::pair<unsigned,unsigned> SrcOp = (FirstIsDest ? RHSOp : LHSOp);
+  StringRef SrcOpName = (FirstIsDest ? RHSOpName : LHSOpName);
+
+  // Ensure one operand is a def and the other is a use.
+  if (DestOp.first >= Ops.NumDefs)
+    PrintFatalError(
+      Rec->getLoc(), "Input operands '" + LHSOpName + "' and '" + RHSOpName +
+      "' of '" + Rec->getName() + "' cannot be tied!");
+  if (SrcOp.first < Ops.NumDefs)
+    PrintFatalError(
+      Rec->getLoc(), "Output operands '" + LHSOpName + "' and '" + RHSOpName +
+      "' of '" + Rec->getName() + "' cannot be tied!");
+
+  // The constraint has to go on the operand with higher index, i.e.
+  // the source one. Check there isn't another constraint there
+  // already.
+  if (!Ops[SrcOp.first].Constraints[SrcOp.second].isNone())
+    PrintFatalError(
+      Rec->getLoc(), "Operand '" + SrcOpName + "' of '" + Rec->getName() +
+      "' cannot have multiple constraints!");
 
-  std::string SrcOpName = Name.substr(wpos);
-  std::pair<unsigned,unsigned> SrcOp = Ops.ParseOperandName(SrcOpName, false);
-  if (SrcOp > DestOp) {
-    std::swap(SrcOp, DestOp);
-    std::swap(SrcOpName, DestOpName);
+  unsigned DestFlatOpNo = Ops.getFlattenedOperandNumber(DestOp);
+  auto NewConstraint = CGIOperandList::ConstraintInfo::getTied(DestFlatOpNo);
+
+  // Check that the earlier operand is not the target of another tie
+  // before making it the target of this one.
+  for (const CGIOperandList::OperandInfo &Op : Ops) {
+    for (unsigned i = 0; i < Op.MINumOperands; i++)
+      if (Op.Constraints[i] == NewConstraint)
+        PrintFatalError(
+          Rec->getLoc(), "Operand '" + DestOpName + "' of '" + Rec->getName() +
+          "' cannot have multiple operands tied to it!");
   }
 
-  unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp);
-
-  if (!Ops[DestOp.first].Constraints[DestOp.second].isNone())
-    PrintFatalError("Operand '" + DestOpName +
-      "' cannot have multiple constraints!");
-  Ops[DestOp.first].Constraints[DestOp.second] =
-    CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
+  Ops[SrcOp.first].Constraints[SrcOp.second] = NewConstraint;
 }
 
-static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
+static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops,
+                             Record *Rec) {
   if (CStr.empty()) return;
 
   const std::string delims(",");
@@ -269,7 +308,7 @@ static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
     if (eidx == std::string::npos)
       eidx = CStr.length();
 
-    ParseConstraint(CStr.substr(bidx, eidx - bidx), Ops);
+    ParseConstraint(CStr.substr(bidx, eidx - bidx), Ops, Rec);
     bidx = CStr.find_first_not_of(delims, eidx);
   }
 }
@@ -302,6 +341,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   AsmString = R->getValueAsString("AsmString");
 
   isReturn     = R->getValueAsBit("isReturn");
+  isEHScopeReturn = R->getValueAsBit("isEHScopeReturn");
   isBranch     = R->getValueAsBit("isBranch");
   isIndirectBranch = R->getValueAsBit("isIndirectBranch");
   isCompare    = R->getValueAsBit("isCompare");
@@ -330,6 +370,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   isConvergent = R->getValueAsBit("isConvergent");
   hasNoSchedulingInfo = R->getValueAsBit("hasNoSchedulingInfo");
   FastISelShouldIgnore = R->getValueAsBit("FastISelShouldIgnore");
+  variadicOpsAreDefs = R->getValueAsBit("variadicOpsAreDefs");
 
   bool Unset;
   mayLoad      = R->getValueAsBitOrUnset("mayLoad", Unset);
@@ -352,7 +393,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   hasChain_Inferred = false;
 
   // Parse Constraints.
-  ParseConstraints(R->getValueAsString("Constraints"), Operands);
+  ParseConstraints(R->getValueAsString("Constraints"), Operands, R);
 
   // Parse the DisableEncoding field.
   Operands.ProcessDisableEncoding(R->getValueAsString("DisableEncoding"));
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.h b/contrib/llvm/utils/TableGen/CodeGenInstruction.h
index a50c3e60e6e7..2e3d2f48a928 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.h
@@ -57,6 +57,17 @@ template <typename T> class ArrayRef;
         assert(isTied());
         return OtherTiedOperand;
       }
+
+      bool operator==(const ConstraintInfo &RHS) const {
+        if (Kind != RHS.Kind)
+          return false;
+        if (Kind == Tied && OtherTiedOperand != RHS.OtherTiedOperand)
+          return false;
+        return true;
+      }
+      bool operator!=(const ConstraintInfo &RHS) const {
+        return !(*this == RHS);
+      }
     };
 
     /// OperandInfo - The information we keep track of for each operand in the
@@ -222,6 +233,7 @@ template <typename T> class ArrayRef;
 
     // Various boolean values we track for the instruction.
     bool isReturn : 1;
+    bool isEHScopeReturn : 1;
     bool isBranch : 1;
     bool isIndirectBranch : 1;
     bool isCompare : 1;
@@ -263,6 +275,7 @@ template <typename T> class ArrayRef;
     bool FastISelShouldIgnore : 1;
     bool hasChain : 1;
     bool hasChain_Inferred : 1;
+    bool variadicOpsAreDefs : 1;
 
     std::string DeprecatedReason;
     bool HasComplexDeprecationPredicate;
diff --git a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
index 5d0715959120..9487a79c1432 100644
--- a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -124,6 +124,9 @@ struct CodeGenIntrinsic {
   /// True if the intrinsic is no-return.
   bool isNoReturn;
 
+  /// True if the intrinsic is cold.
+  bool isCold;
+
   /// True if the intrinsic is marked as convergent.
   bool isConvergent;
 
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
index b0d13b7d38f3..74a2b078dfb3 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/IntEqClasses.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -725,7 +726,7 @@ struct TupleExpander : SetTheory::Expander {
 //===----------------------------------------------------------------------===//
 
 static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
-  llvm::sort(M.begin(), M.end(), deref<llvm::less>());
+  llvm::sort(M, deref<llvm::less>());
   M.erase(std::unique(M.begin(), M.end(), deref<llvm::equal>()), M.end());
 }
 
@@ -997,7 +998,7 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
   for (auto &RC : RegClasses)
     if (SuperRegRCsBV[RC.EnumValue])
       SuperRegRCs.emplace_back(&RC);
-  llvm::sort(SuperRegRCs.begin(), SuperRegRCs.end(), SizeOrder);
+  llvm::sort(SuperRegRCs, SizeOrder);
   assert(SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first");
 
   // Find all the subreg classes and order them by size too.
@@ -1008,7 +1009,7 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
     if (SuperRegClassesBV.any())
       SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV));
   }
-  llvm::sort(SuperRegClasses.begin(), SuperRegClasses.end(),
+  llvm::sort(SuperRegClasses,
              [&](const std::pair<CodeGenRegisterClass *, BitVector> &A,
                  const std::pair<CodeGenRegisterClass *, BitVector> &B) {
                return SizeOrder(A.first, B.first);
@@ -1073,7 +1074,7 @@ void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank,
     if (!RU.Artificial)
       TmpUnits.push_back(*UnitI);
   }
-  llvm::sort(TmpUnits.begin(), TmpUnits.end());
+  llvm::sort(TmpUnits);
   std::unique_copy(TmpUnits.begin(), TmpUnits.end(),
                    std::back_inserter(RegUnits));
 }
@@ -1093,7 +1094,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
   // Read in the user-defined (named) sub-register indices.
   // More indices will be synthesized later.
   std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
-  llvm::sort(SRIs.begin(), SRIs.end(), LessRecord());
+  llvm::sort(SRIs, LessRecord());
   for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
     getSubRegIdx(SRIs[i]);
   // Build composite maps from ComposedOf fields.
@@ -1102,7 +1103,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
 
   // Read in the register definitions.
   std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register");
-  llvm::sort(Regs.begin(), Regs.end(), LessRecordRegister());
+  llvm::sort(Regs, LessRecordRegister());
   // Assign the enumeration values.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i)
     getReg(Regs[i]);
@@ -1113,7 +1114,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
 
   for (Record *R : Tups) {
     std::vector<Record *> TupRegs = *Sets.expand(R);
-    llvm::sort(TupRegs.begin(), TupRegs.end(), LessRecordRegister());
+    llvm::sort(TupRegs, LessRecordRegister());
     for (Record *RC : TupRegs)
       getReg(RC);
   }
@@ -1309,6 +1310,55 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts) {
 }
 
 void CodeGenRegBank::computeComposites() {
+  using RegMap = std::map<const CodeGenRegister*, const CodeGenRegister*>;
+
+  // Subreg -> { Reg->Reg }, where the right-hand side is the mapping from
+  // register to (sub)register associated with the action of the left-hand
+  // side subregister.
+  std::map<const CodeGenSubRegIndex*, RegMap> SubRegAction;
+  for (const CodeGenRegister &R : Registers) {
+    const CodeGenRegister::SubRegMap &SM = R.getSubRegs();
+    for (std::pair<const CodeGenSubRegIndex*, const CodeGenRegister*> P : SM)
+      SubRegAction[P.first].insert({&R, P.second});
+  }
+
+  // Calculate the composition of two subregisters as compositions of their
+  // associated actions.
+  auto compose = [&SubRegAction] (const CodeGenSubRegIndex *Sub1,
+                                  const CodeGenSubRegIndex *Sub2) {
+    RegMap C;
+    const RegMap &Img1 = SubRegAction.at(Sub1);
+    const RegMap &Img2 = SubRegAction.at(Sub2);
+    for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Img1) {
+      auto F = Img2.find(P.second);
+      if (F != Img2.end())
+        C.insert({P.first, F->second});
+    }
+    return C;
+  };
+
+  // Check if the two maps agree on the intersection of their domains.
+  auto agree = [] (const RegMap &Map1, const RegMap &Map2) {
+    // Technically speaking, an empty map agrees with any other map, but
+    // this could flag false positives. We're interested in non-vacuous
+    // agreements.
+    if (Map1.empty() || Map2.empty())
+      return false;
+    for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Map1) {
+      auto F = Map2.find(P.first);
+      if (F == Map2.end() || P.second != F->second)
+        return false;
+    }
+    return true;
+  };
+
+  using CompositePair = std::pair<const CodeGenSubRegIndex*,
+                                  const CodeGenSubRegIndex*>;
+  SmallSet<CompositePair,4> UserDefined;
+  for (const CodeGenSubRegIndex &Idx : SubRegIndices)
+    for (auto P : Idx.getComposites())
+      UserDefined.insert(std::make_pair(&Idx, P.first));
+
   // Keep track of TopoSigs visited. We only need to visit each TopoSig once,
   // and many registers will share TopoSigs on regular architectures.
   BitVector TopoSigs(getNumTopoSigs());
@@ -1341,11 +1391,15 @@ void CodeGenRegBank::computeComposites() {
         assert(Idx3 && "Sub-register doesn't have an index");
 
         // Conflicting composition? Emit a warning but allow it.
-        if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, Idx3))
-          PrintWarning(Twine("SubRegIndex ") + Idx1->getQualifiedName() +
-                       " and " + Idx2->getQualifiedName() +
-                       " compose ambiguously as " + Prev->getQualifiedName() +
-                       " or " + Idx3->getQualifiedName());
+        if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, Idx3)) {
+          // If the composition was not user-defined, always emit a warning.
+          if (!UserDefined.count({Idx1, Idx2}) ||
+              agree(compose(Idx1, Idx2), SubRegAction.at(Idx3)))
+            PrintWarning(Twine("SubRegIndex ") + Idx1->getQualifiedName() +
+                         " and " + Idx2->getQualifiedName() +
+                         " compose ambiguously as " + Prev->getQualifiedName() +
+                         " or " + Idx3->getQualifiedName());
+        }          
       }
     }
   }
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.h b/contrib/llvm/utils/TableGen/CodeGenRegisters.h
index 32aa33c80b3a..0f7a025ded10 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.h
@@ -348,7 +348,7 @@ namespace llvm {
     ArrayRef<ValueTypeByHwMode> getValueTypes() const { return VTs; }
     unsigned getNumValueTypes() const { return VTs.size(); }
 
-    ValueTypeByHwMode getValueTypeNum(unsigned VTNum) const {
+    const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const {
       if (VTNum < VTs.size())
         return VTs[VTNum];
       llvm_unreachable("VTNum greater than number of ValueTypes in RegClass!");
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
index 9331fadf4099..6d259cbb33ee 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -222,9 +222,248 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
   // Collect optional processor description.
   collectOptionalProcessorInfo();
 
+  // Check MCInstPredicate definitions.
+  checkMCInstPredicates();
+
+  // Check STIPredicate definitions.
+  checkSTIPredicates();
+
+  // Find STIPredicate definitions for each processor model, and construct
+  // STIPredicateFunction objects.
+  collectSTIPredicates();
+
   checkCompleteness();
 }
 
+void CodeGenSchedModels::checkSTIPredicates() const {
+  DenseMap<StringRef, const Record *> Declarations;
+
+  // There cannot be multiple declarations with the same name.
+  const RecVec Decls = Records.getAllDerivedDefinitions("STIPredicateDecl");
+  for (const Record *R : Decls) {
+    StringRef Name = R->getValueAsString("Name");
+    const auto It = Declarations.find(Name);
+    if (It == Declarations.end()) {
+      Declarations[Name] = R;
+      continue;
+    }
+
+    PrintError(R->getLoc(), "STIPredicate " + Name + " multiply declared.");
+    PrintNote(It->second->getLoc(), "Previous declaration was here.");
+    PrintFatalError(R->getLoc(), "Invalid STIPredicateDecl found.");
+  }
+
+  // Disallow InstructionEquivalenceClasses with an empty instruction list.
+  const RecVec Defs =
+      Records.getAllDerivedDefinitions("InstructionEquivalenceClass");
+  for (const Record *R : Defs) {
+    RecVec Opcodes = R->getValueAsListOfDefs("Opcodes");
+    if (Opcodes.empty()) {
+      PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass "
+                                   "defined with an empty opcode list.");
+    }
+  }
+}
+
+// Used by function `processSTIPredicate` to construct a mask of machine
+// instruction operands.
+static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
+  APInt OperandMask;
+  if (Indices.empty())
+    return OperandMask;
+
+  int64_t MaxIndex = *std::max_element(Indices.begin(), Indices.end());
+  assert(MaxIndex >= 0 && "Invalid negative indices in input!");
+  OperandMask = OperandMask.zext(MaxIndex + 1);
+  for (const int64_t Index : Indices) {
+    assert(Index >= 0 && "Invalid negative indices!");
+    OperandMask.setBit(Index);
+  }
+
+  return OperandMask;
+}
+
+static void
+processSTIPredicate(STIPredicateFunction &Fn,
+                    const DenseMap<Record *, unsigned> &ProcModelMap) {
+  DenseMap<const Record *, unsigned> Opcode2Index;
+  using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
+  std::vector<OpcodeMapPair> OpcodeMappings;
+  std::vector<std::pair<APInt, APInt>> OpcodeMasks;
+
+  DenseMap<const Record *, unsigned> Predicate2Index;
+  unsigned NumUniquePredicates = 0;
+
+  // Number unique predicates and opcodes used by InstructionEquivalenceClass
+  // definitions. Each unique opcode will be associated with an OpcodeInfo
+  // object.
+  for (const Record *Def : Fn.getDefinitions()) {
+    RecVec Classes = Def->getValueAsListOfDefs("Classes");
+    for (const Record *EC : Classes) {
+      const Record *Pred = EC->getValueAsDef("Predicate");
+      if (Predicate2Index.find(Pred) == Predicate2Index.end())
+        Predicate2Index[Pred] = NumUniquePredicates++;
+
+      RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+      for (const Record *Opcode : Opcodes) {
+        if (Opcode2Index.find(Opcode) == Opcode2Index.end()) {
+          Opcode2Index[Opcode] = OpcodeMappings.size();
+          OpcodeMappings.emplace_back(Opcode, OpcodeInfo());
+        }
+      }
+    }
+  }
+
+  // Initialize vector `OpcodeMasks` with default values.  We want to keep track
+  // of which processors "use" which opcodes.  We also want to be able to
+  // identify predicates that are used by different processors for a same
+  // opcode.
+  // This information is used later on by this algorithm to sort OpcodeMapping
+  // elements based on their processor and predicate sets.
+  OpcodeMasks.resize(OpcodeMappings.size());
+  APInt DefaultProcMask(ProcModelMap.size(), 0);
+  APInt DefaultPredMask(NumUniquePredicates, 0);
+  for (std::pair<APInt, APInt> &MaskPair : OpcodeMasks)
+    MaskPair = std::make_pair(DefaultProcMask, DefaultPredMask);
+
+  // Construct a OpcodeInfo object for every unique opcode declared by an
+  // InstructionEquivalenceClass definition.
+  for (const Record *Def : Fn.getDefinitions()) {
+    RecVec Classes = Def->getValueAsListOfDefs("Classes");
+    const Record *SchedModel = Def->getValueAsDef("SchedModel");
+    unsigned ProcIndex = ProcModelMap.find(SchedModel)->second;
+    APInt ProcMask(ProcModelMap.size(), 0);
+    ProcMask.setBit(ProcIndex);
+
+    for (const Record *EC : Classes) {
+      RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+
+      std::vector<int64_t> OpIndices =
+          EC->getValueAsListOfInts("OperandIndices");
+      APInt OperandMask = constructOperandMask(OpIndices);
+
+      const Record *Pred = EC->getValueAsDef("Predicate");
+      APInt PredMask(NumUniquePredicates, 0);
+      PredMask.setBit(Predicate2Index[Pred]);
+
+      for (const Record *Opcode : Opcodes) {
+        unsigned OpcodeIdx = Opcode2Index[Opcode];
+        if (OpcodeMasks[OpcodeIdx].first[ProcIndex]) {
+          std::string Message =
+              "Opcode " + Opcode->getName().str() +
+              " used by multiple InstructionEquivalenceClass definitions.";
+          PrintFatalError(EC->getLoc(), Message);
+        }
+        OpcodeMasks[OpcodeIdx].first |= ProcMask;
+        OpcodeMasks[OpcodeIdx].second |= PredMask;
+        OpcodeInfo &OI = OpcodeMappings[OpcodeIdx].second;
+
+        OI.addPredicateForProcModel(ProcMask, OperandMask, Pred);
+      }
+    }
+  }
+
+  // Sort OpcodeMappings elements based on their CPU and predicate masks.
+  // As a last resort, order elements by opcode identifier.
+  llvm::sort(OpcodeMappings,
+             [&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) {
+               unsigned LhsIdx = Opcode2Index[Lhs.first];
+               unsigned RhsIdx = Opcode2Index[Rhs.first];
+               std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
+               std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
+
+               if (LhsMasks.first != RhsMasks.first) {
+                 if (LhsMasks.first.countPopulation() <
+                     RhsMasks.first.countPopulation())
+                   return true;
+                 return LhsMasks.first.countLeadingZeros() >
+                        RhsMasks.first.countLeadingZeros();
+               }
+
+               if (LhsMasks.second != RhsMasks.second) {
+                 if (LhsMasks.second.countPopulation() <
+                     RhsMasks.second.countPopulation())
+                   return true;
+                 return LhsMasks.second.countLeadingZeros() >
+                        RhsMasks.second.countLeadingZeros();
+               }
+
+               return LhsIdx < RhsIdx;
+             });
+
+  // Now construct opcode groups. Groups are used by the SubtargetEmitter when
+  // expanding the body of a STIPredicate function. In particular, each opcode
+  // group is expanded into a sequence of labels in a switch statement.
+  // It identifies opcodes for which different processors define same predicates
+  // and same opcode masks.
+  for (OpcodeMapPair &Info : OpcodeMappings)
+    Fn.addOpcode(Info.first, std::move(Info.second));
+}
+
+void CodeGenSchedModels::collectSTIPredicates() {
+  // Map STIPredicateDecl records to elements of vector
+  // CodeGenSchedModels::STIPredicates.
+  DenseMap<const Record *, unsigned> Decl2Index;
+
+  RecVec RV = Records.getAllDerivedDefinitions("STIPredicate");
+  for (const Record *R : RV) {
+    const Record *Decl = R->getValueAsDef("Declaration");
+
+    const auto It = Decl2Index.find(Decl);
+    if (It == Decl2Index.end()) {
+      Decl2Index[Decl] = STIPredicates.size();
+      STIPredicateFunction Predicate(Decl);
+      Predicate.addDefinition(R);
+      STIPredicates.emplace_back(std::move(Predicate));
+      continue;
+    }
+
+    STIPredicateFunction &PreviousDef = STIPredicates[It->second];
+    PreviousDef.addDefinition(R);
+  }
+
+  for (STIPredicateFunction &Fn : STIPredicates)
+    processSTIPredicate(Fn, ProcModelMap);
+}
+
+void OpcodeInfo::addPredicateForProcModel(const llvm::APInt &CpuMask,
+                                          const llvm::APInt &OperandMask,
+                                          const Record *Predicate) {
+  auto It = llvm::find_if(
+      Predicates, [&OperandMask, &Predicate](const PredicateInfo &P) {
+        return P.Predicate == Predicate && P.OperandMask == OperandMask;
+      });
+  if (It == Predicates.end()) {
+    Predicates.emplace_back(CpuMask, OperandMask, Predicate);
+    return;
+  }
+  It->ProcModelMask |= CpuMask;
+}
+
+void CodeGenSchedModels::checkMCInstPredicates() const {
+  RecVec MCPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
+  if (MCPredicates.empty())
+    return;
+
+  // A target cannot have multiple TIIPredicate definitions with a same name.
+  llvm::StringMap<const Record *> TIIPredicates(MCPredicates.size());
+  for (const Record *TIIPred : MCPredicates) {
+    StringRef Name = TIIPred->getValueAsString("FunctionName");
+    StringMap<const Record *>::const_iterator It = TIIPredicates.find(Name);
+    if (It == TIIPredicates.end()) {
+      TIIPredicates[Name] = TIIPred;
+      continue;
+    }
+
+    PrintError(TIIPred->getLoc(),
+               "TIIPredicate " + Name + " is multiply defined.");
+    PrintNote(It->second->getLoc(),
+              " Previous definition of " + Name + " was here.");
+    PrintFatalError(TIIPred->getLoc(),
+                    "Found conflicting definitions of TIIPredicate.");
+  }
+}
+
 void CodeGenSchedModels::collectRetireControlUnits() {
   RecVec Units = Records.getAllDerivedDefinitions("RetireControlUnit");
 
@@ -240,6 +479,35 @@ void CodeGenSchedModels::collectRetireControlUnits() {
   }
 }
 
+void CodeGenSchedModels::collectLoadStoreQueueInfo() {
+  RecVec Queues = Records.getAllDerivedDefinitions("MemoryQueue");
+
+  for (Record *Queue : Queues) {
+    CodeGenProcModel &PM = getProcModel(Queue->getValueAsDef("SchedModel"));
+    if (Queue->isSubClassOf("LoadQueue")) {
+      if (PM.LoadQueue) {
+        PrintError(Queue->getLoc(),
+                   "Expected a single LoadQueue definition");
+        PrintNote(PM.LoadQueue->getLoc(),
+                  "Previous definition of LoadQueue was here");
+      }
+
+      PM.LoadQueue = Queue;
+    }
+
+    if (Queue->isSubClassOf("StoreQueue")) {
+      if (PM.StoreQueue) {
+        PrintError(Queue->getLoc(),
+                   "Expected a single StoreQueue definition");
+        PrintNote(PM.LoadQueue->getLoc(),
+                  "Previous definition of StoreQueue was here");
+      }
+
+      PM.StoreQueue = Queue;
+    }
+  }
+}
+
 /// Collect optional processor information.
 void CodeGenSchedModels::collectOptionalProcessorInfo() {
   // Find register file definitions for each processor.
@@ -248,8 +516,8 @@ void CodeGenSchedModels::collectOptionalProcessorInfo() {
   // Collect processor RetireControlUnit descriptors if available.
   collectRetireControlUnits();
 
-  // Find pfm counter definitions for each processor.
-  collectPfmCounters();
+  // Collect information about load/store queues.
+  collectLoadStoreQueueInfo();
 
   checkCompleteness();
 }
@@ -257,7 +525,7 @@ void CodeGenSchedModels::collectOptionalProcessorInfo() {
 /// Gather all processor models.
 void CodeGenSchedModels::collectProcModels() {
   RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor");
-  llvm::sort(ProcRecords.begin(), ProcRecords.end(), LessRecordFieldName());
+  llvm::sort(ProcRecords, LessRecordFieldName());
 
   // Reserve space because we can. Reallocation would be ok.
   ProcModels.reserve(ProcRecords.size()+1);
@@ -376,7 +644,7 @@ void CodeGenSchedModels::collectSchedRW() {
   // Find all ReadWrites referenced by SchedAlias. AliasDefs needs to be sorted
   // for the loop below that initializes Alias vectors.
   RecVec AliasDefs = Records.getAllDerivedDefinitions("SchedAlias");
-  llvm::sort(AliasDefs.begin(), AliasDefs.end(), LessRecord());
+  llvm::sort(AliasDefs, LessRecord());
   for (Record *ADef : AliasDefs) {
     Record *MatchDef = ADef->getValueAsDef("MatchRW");
     Record *AliasDef = ADef->getValueAsDef("AliasRW");
@@ -394,12 +662,12 @@ void CodeGenSchedModels::collectSchedRW() {
   }
   // Sort and add the SchedReadWrites directly referenced by instructions or
   // itinerary resources. Index reads and writes in separate domains.
-  llvm::sort(SWDefs.begin(), SWDefs.end(), LessRecord());
+  llvm::sort(SWDefs, LessRecord());
   for (Record *SWDef : SWDefs) {
     assert(!getSchedRWIdx(SWDef, /*IsRead=*/false) && "duplicate SchedWrite");
     SchedWrites.emplace_back(SchedWrites.size(), SWDef);
   }
-  llvm::sort(SRDefs.begin(), SRDefs.end(), LessRecord());
+  llvm::sort(SRDefs, LessRecord());
   for (Record *SRDef : SRDefs) {
     assert(!getSchedRWIdx(SRDef, /*IsRead-*/true) && "duplicate SchedWrite");
     SchedReads.emplace_back(SchedReads.size(), SRDef);
@@ -619,7 +887,7 @@ void CodeGenSchedModels::collectSchedClasses() {
   }
   // Create classes for InstRW defs.
   RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
-  llvm::sort(InstRWDefs.begin(), InstRWDefs.end(), LessRecord());
+  llvm::sort(InstRWDefs, LessRecord());
   LLVM_DEBUG(dbgs() << "\n+++ SCHED CLASSES (createInstRWClass) +++\n");
   for (Record *RWDef : InstRWDefs)
     createInstRWClass(RWDef);
@@ -923,7 +1191,7 @@ void CodeGenSchedModels::collectProcItins() {
 // Gather the read/write types for each itinerary class.
 void CodeGenSchedModels::collectProcItinRW() {
   RecVec ItinRWDefs = Records.getAllDerivedDefinitions("ItinRW");
-  llvm::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord());
+  llvm::sort(ItinRWDefs, LessRecord());
   for (Record *RWDef  : ItinRWDefs) {
     if (!RWDef->getValueInit("SchedModel")->isComplete())
       PrintFatalError(RWDef->getLoc(), "SchedModel is undefined");
@@ -1520,33 +1788,33 @@ void CodeGenSchedModels::collectRegisterFiles() {
     CodeGenProcModel &PM = getProcModel(RF->getValueAsDef("SchedModel"));
     PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(),RF));
     CodeGenRegisterFile &CGRF = PM.RegisterFiles.back();
+    CGRF.MaxMovesEliminatedPerCycle =
+        RF->getValueAsInt("MaxMovesEliminatedPerCycle");
+    CGRF.AllowZeroMoveEliminationOnly =
+        RF->getValueAsBit("AllowZeroMoveEliminationOnly");
 
     // Now set the number of physical registers as well as the cost of registers
     // in each register class.
     CGRF.NumPhysRegs = RF->getValueAsInt("NumPhysRegs");
+    if (!CGRF.NumPhysRegs) {
+      PrintFatalError(RF->getLoc(),
+                      "Invalid RegisterFile with zero physical registers");
+    }
+
     RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses");
     std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts");
+    ListInit *MoveElimInfo = RF->getValueAsListInit("AllowMoveElimination");
     for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) {
       int Cost = RegisterCosts.size() > I ? RegisterCosts[I] : 1;
-      CGRF.Costs.emplace_back(RegisterClasses[I], Cost);
-    }
-  }
-}
 
-// Collect all the RegisterFile definitions available in this target.
-void CodeGenSchedModels::collectPfmCounters() {
-  for (Record *Def : Records.getAllDerivedDefinitions("PfmIssueCounter")) {
-    CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel"));
-    PM.PfmIssueCounterDefs.emplace_back(Def);
-  }
-  for (Record *Def : Records.getAllDerivedDefinitions("PfmCycleCounter")) {
-    CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel"));
-    if (PM.PfmCycleCounterDef) {
-      PrintFatalError(Def->getLoc(),
-                      "multiple cycle counters for " +
-                          Def->getValueAsDef("SchedModel")->getName());
+      bool AllowMoveElim = false;
+      if (MoveElimInfo->size() > I) {
+        BitInit *Val = cast<BitInit>(MoveElimInfo->getElement(I));
+        AllowMoveElim = Val->getValue();
+      }
+
+      CGRF.Costs.emplace_back(RegisterClasses[I], Cost, AllowMoveElim);
     }
-    PM.PfmCycleCounterDef = Def;
   }
 }
 
@@ -1620,12 +1888,9 @@ void CodeGenSchedModels::collectProcResources() {
   }
   // Finalize each ProcModel by sorting the record arrays.
   for (CodeGenProcModel &PM : ProcModels) {
-    llvm::sort(PM.WriteResDefs.begin(), PM.WriteResDefs.end(),
-               LessRecord());
-    llvm::sort(PM.ReadAdvanceDefs.begin(), PM.ReadAdvanceDefs.end(),
-               LessRecord());
-    llvm::sort(PM.ProcResourceDefs.begin(), PM.ProcResourceDefs.end(),
-               LessRecord());
+    llvm::sort(PM.WriteResDefs, LessRecord());
+    llvm::sort(PM.ReadAdvanceDefs, LessRecord());
+    llvm::sort(PM.ProcResourceDefs, LessRecord());
     LLVM_DEBUG(
         PM.dump();
         dbgs() << "WriteResDefs: "; for (RecIter RI = PM.WriteResDefs.begin(),
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.h b/contrib/llvm/utils/TableGen/CodeGenSchedule.h
index 07c11596adee..87a051b0c05e 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
 #define LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -166,8 +167,9 @@ struct CodeGenSchedClass {
 struct CodeGenRegisterCost {
   Record *RCDef;
   unsigned Cost;
-  CodeGenRegisterCost(Record *RC, unsigned RegisterCost)
-      : RCDef(RC), Cost(RegisterCost) {}
+  bool AllowMoveElimination;
+  CodeGenRegisterCost(Record *RC, unsigned RegisterCost, bool AllowMoveElim = false)
+      : RCDef(RC), Cost(RegisterCost), AllowMoveElimination(AllowMoveElim) {}
   CodeGenRegisterCost(const CodeGenRegisterCost &) = default;
   CodeGenRegisterCost &operator=(const CodeGenRegisterCost &) = delete;
 };
@@ -180,12 +182,18 @@ struct CodeGenRegisterCost {
 struct CodeGenRegisterFile {
   std::string Name;
   Record *RegisterFileDef;
+  unsigned MaxMovesEliminatedPerCycle;
+  bool AllowZeroMoveEliminationOnly;
 
   unsigned NumPhysRegs;
   std::vector<CodeGenRegisterCost> Costs;
 
-  CodeGenRegisterFile(StringRef name, Record *def)
-      : Name(name), RegisterFileDef(def), NumPhysRegs(0) {}
+  CodeGenRegisterFile(StringRef name, Record *def, unsigned MaxMoveElimPerCy = 0,
+                      bool AllowZeroMoveElimOnly = false)
+      : Name(name), RegisterFileDef(def),
+        MaxMovesEliminatedPerCycle(MaxMoveElimPerCy),
+        AllowZeroMoveEliminationOnly(AllowZeroMoveElimOnly),
+        NumPhysRegs(0) {}
 
   bool hasDefaultCosts() const { return Costs.empty(); }
 };
@@ -238,14 +246,14 @@ struct CodeGenProcModel {
   // Optional Retire Control Unit definition.
   Record *RetireControlUnit;
 
-  // List of PfmCounters.
-  RecVec PfmIssueCounterDefs;
-  Record *PfmCycleCounterDef = nullptr;
+  // Load/Store queue descriptors.
+  Record *LoadQueue;
+  Record *StoreQueue;
 
   CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef,
                    Record *IDef) :
     Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef),
-    RetireControlUnit(nullptr) {}
+    RetireControlUnit(nullptr), LoadQueue(nullptr), StoreQueue(nullptr) {}
 
   bool hasItineraries() const {
     return !ItinsDef->getValueAsListOfDefs("IID").empty();
@@ -256,9 +264,8 @@ struct CodeGenProcModel {
   }
 
   bool hasExtraProcessorInfo() const {
-    return RetireControlUnit || !RegisterFiles.empty() ||
-        !PfmIssueCounterDefs.empty() ||
-        PfmCycleCounterDef != nullptr;
+    return RetireControlUnit || LoadQueue || StoreQueue ||
+           !RegisterFiles.empty();
   }
 
   unsigned getProcResourceIdx(Record *PRDef) const;
@@ -270,6 +277,137 @@ struct CodeGenProcModel {
 #endif
 };
 
+/// Used to correlate instructions to MCInstPredicates specified by
+/// InstructionEquivalentClass tablegen definitions.
+///
+/// Example: a XOR of a register with self, is a known zero-idiom for most
+/// X86 processors.
+///
+/// Each processor can use a (potentially different) InstructionEquivalenceClass
+///  definition to classify zero-idioms. That means, XORrr is likely to appear
+/// in more than one equivalence class (where each class definition is
+/// contributed by a different processor).
+///
+/// There is no guarantee that the same MCInstPredicate will be used to describe
+/// equivalence classes that identify XORrr as a zero-idiom.
+///
+/// To be more specific, the requirements for being a zero-idiom XORrr may be
+/// different for different processors.
+///
+/// Class PredicateInfo identifies a subset of processors that specify the same
+/// requirements (i.e. same MCInstPredicate and OperandMask) for an instruction
+/// opcode.
+///
+/// Back to the example. Field `ProcModelMask` will have one bit set for every
+/// processor model that sees XORrr as a zero-idiom, and that specifies the same
+/// set of constraints.
+///
+/// By construction, there can be multiple instances of PredicateInfo associated
+/// with a same instruction opcode. For example, different processors may define
+/// different constraints on the same opcode.
+///
+/// Field OperandMask can be used as an extra constraint.
+/// It may be used to describe conditions that appy only to a subset of the
+/// operands of a machine instruction, and the operands subset may not be the
+/// same for all processor models.
+struct PredicateInfo {
+  llvm::APInt ProcModelMask; // A set of processor model indices.
+  llvm::APInt OperandMask;   // An operand mask.
+  const Record *Predicate;   // MCInstrPredicate definition.
+  PredicateInfo(llvm::APInt CpuMask, llvm::APInt Operands, const Record *Pred)
+      : ProcModelMask(CpuMask), OperandMask(Operands), Predicate(Pred) {}
+
+  bool operator==(const PredicateInfo &Other) const {
+    return ProcModelMask == Other.ProcModelMask &&
+           OperandMask == Other.OperandMask && Predicate == Other.Predicate;
+  }
+};
+
+/// A collection of PredicateInfo objects.
+///
+/// There is at least one OpcodeInfo object for every opcode specified by a
+/// TIPredicate definition.
+class OpcodeInfo {
+  std::vector<PredicateInfo> Predicates;
+
+  OpcodeInfo(const OpcodeInfo &Other) = delete;
+  OpcodeInfo &operator=(const OpcodeInfo &Other) = delete;
+
+public:
+  OpcodeInfo() = default;
+  OpcodeInfo &operator=(OpcodeInfo &&Other) = default;
+  OpcodeInfo(OpcodeInfo &&Other) = default;
+
+  ArrayRef<PredicateInfo> getPredicates() const { return Predicates; }
+
+  void addPredicateForProcModel(const llvm::APInt &CpuMask,
+                                const llvm::APInt &OperandMask,
+                                const Record *Predicate);
+};
+
+/// Used to group together tablegen instruction definitions that are subject
+/// to a same set of constraints (identified by an instance of OpcodeInfo).
+class OpcodeGroup {
+  OpcodeInfo Info;
+  std::vector<const Record *> Opcodes;
+
+  OpcodeGroup(const OpcodeGroup &Other) = delete;
+  OpcodeGroup &operator=(const OpcodeGroup &Other) = delete;
+
+public:
+  OpcodeGroup(OpcodeInfo &&OpInfo) : Info(std::move(OpInfo)) {}
+  OpcodeGroup(OpcodeGroup &&Other) = default;
+
+  void addOpcode(const Record *Opcode) {
+    assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() &&
+           "Opcode already in set!");
+    Opcodes.push_back(Opcode);
+  }
+
+  ArrayRef<const Record *> getOpcodes() const { return Opcodes; }
+  const OpcodeInfo &getOpcodeInfo() const { return Info; }
+};
+
+/// An STIPredicateFunction descriptor used by tablegen backends to
+/// auto-generate the body of a predicate function as a member of tablegen'd
+/// class XXXGenSubtargetInfo.
+class STIPredicateFunction {
+  const Record *FunctionDeclaration;
+
+  std::vector<const Record *> Definitions;
+  std::vector<OpcodeGroup> Groups;
+
+  STIPredicateFunction(const STIPredicateFunction &Other) = delete;
+  STIPredicateFunction &operator=(const STIPredicateFunction &Other) = delete;
+
+public:
+  STIPredicateFunction(const Record *Rec) : FunctionDeclaration(Rec) {}
+  STIPredicateFunction(STIPredicateFunction &&Other) = default;
+
+  bool isCompatibleWith(const STIPredicateFunction &Other) const {
+    return FunctionDeclaration == Other.FunctionDeclaration;
+  }
+
+  void addDefinition(const Record *Def) { Definitions.push_back(Def); }
+  void addOpcode(const Record *OpcodeRec, OpcodeInfo &&Info) {
+    if (Groups.empty() ||
+        Groups.back().getOpcodeInfo().getPredicates() != Info.getPredicates())
+      Groups.emplace_back(std::move(Info));
+    Groups.back().addOpcode(OpcodeRec);
+  }
+
+  StringRef getName() const {
+    return FunctionDeclaration->getValueAsString("Name");
+  }
+  const Record *getDefaultReturnPredicate() const {
+    return FunctionDeclaration->getValueAsDef("DefaultReturnValue");
+  }
+
+  const Record *getDeclaration() const { return FunctionDeclaration; }
+  ArrayRef<const Record *> getDefinitions() const { return Definitions; }
+  ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
+};
+
 /// Top level container for machine model data.
 class CodeGenSchedModels {
   RecordKeeper &Records;
@@ -303,6 +441,8 @@ class CodeGenSchedModels {
   using InstClassMapTy = DenseMap<Record*, unsigned>;
   InstClassMapTy InstrClassMap;
 
+  std::vector<STIPredicateFunction> STIPredicates;
+
 public:
   CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
 
@@ -430,6 +570,9 @@ public:
   Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM,
                            ArrayRef<SMLoc> Loc) const;
 
+  ArrayRef<STIPredicateFunction> getSTIPredicates() const {
+    return STIPredicates;
+  }
 private:
   void collectProcModels();
 
@@ -447,8 +590,6 @@ private:
 
   void collectRegisterFiles();
 
-  void collectPfmCounters();
-
   void collectOptionalProcessorInfo();
 
   std::string createSchedClassName(Record *ItinClassDef,
@@ -465,6 +606,14 @@ private:
 
   void inferSchedClasses();
 
+  void checkMCInstPredicates() const;
+
+  void checkSTIPredicates() const;
+
+  void collectSTIPredicates();
+
+  void collectLoadStoreQueueInfo();
+
   void checkCompleteness();
 
   void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
index cb73ca83c9bb..bcb653135551 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -278,7 +278,7 @@ CodeGenRegBank &CodeGenTarget::getRegBank() const {
 
 void CodeGenTarget::ReadRegAltNameIndices() const {
   RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex");
-  llvm::sort(RegAltNameIndices.begin(), RegAltNameIndices.end(), LessRecord());
+  llvm::sort(RegAltNameIndices, LessRecord());
 }
 
 /// getRegisterByName - If there is a register with the specific AsmName,
@@ -303,7 +303,7 @@ std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
   }
 
   // Remove duplicates.
-  llvm::sort(Result.begin(), Result.end());
+  llvm::sort(Result);
   Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
   return Result;
 }
@@ -314,7 +314,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
     LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
 
   // Remove duplicates.
-  llvm::sort(LegalValueTypes.begin(), LegalValueTypes.end());
+  llvm::sort(LegalValueTypes);
   LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
                                     LegalValueTypes.end()),
                         LegalValueTypes.end());
@@ -513,7 +513,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC,
     if (isTarget == TargetOnly)
       Intrinsics.push_back(CodeGenIntrinsic(Defs[I]));
   }
-  llvm::sort(Intrinsics.begin(), Intrinsics.end(),
+  llvm::sort(Intrinsics,
              [](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
                return std::tie(LHS.TargetPrefix, LHS.Name) <
                       std::tie(RHS.TargetPrefix, RHS.Name);
@@ -536,6 +536,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   isCommutative = false;
   canThrow = false;
   isNoReturn = false;
+  isCold = false;
   isNoDuplicate = false;
   isConvergent = false;
   isSpeculatable = false;
@@ -682,6 +683,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       isConvergent = true;
     else if (Property->getName() == "IntrNoReturn")
       isNoReturn = true;
+    else if (Property->getName() == "IntrCold")
+      isCold = true;
     else if (Property->getName() == "IntrSpeculatable")
       isSpeculatable = true;
     else if (Property->getName() == "IntrHasSideEffects")
@@ -709,6 +712,5 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   Properties = parseSDPatternOperatorProperties(R);
 
   // Sort the argument attributes for later benefit.
-  llvm::sort(ArgumentAttributes.begin(), ArgumentAttributes.end());
+  llvm::sort(ArgumentAttributes);
 }
-
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp
index 4a918d15691b..c8e005739460 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcher.cpp
@@ -93,13 +93,23 @@ SwitchTypeMatcher::~SwitchTypeMatcher() {
     delete C.second;
 }
 
-CheckPredicateMatcher::CheckPredicateMatcher(const TreePredicateFn &pred)
-  : Matcher(CheckPredicate), Pred(pred.getOrigPatFragRecord()) {}
+CheckPredicateMatcher::CheckPredicateMatcher(
+    const TreePredicateFn &pred, const SmallVectorImpl<unsigned> &Ops)
+  : Matcher(CheckPredicate), Pred(pred.getOrigPatFragRecord()),
+    Operands(Ops.begin(), Ops.end()) {}
 
 TreePredicateFn CheckPredicateMatcher::getPredicate() const {
   return TreePredicateFn(Pred);
 }
 
+unsigned CheckPredicateMatcher::getNumOperands() const {
+  return Operands.size();
+}
+
+unsigned CheckPredicateMatcher::getOperandNo(unsigned i) const {
+  assert(i < Operands.size());
+  return Operands[i];
+}
 
 
 // printImpl methods.
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcher.h b/contrib/llvm/utils/TableGen/DAGISelMatcher.h
index ecc1f1dd094a..9be7295c67d4 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcher.h
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcher.h
@@ -414,10 +414,14 @@ private:
 /// see if the node is acceptable.
 class CheckPredicateMatcher : public Matcher {
   TreePattern *Pred;
+  const SmallVector<unsigned, 4> Operands;
 public:
-  CheckPredicateMatcher(const TreePredicateFn &pred);
+  CheckPredicateMatcher(const TreePredicateFn &pred,
+                        const SmallVectorImpl<unsigned> &Operands);
 
   TreePredicateFn getPredicate() const;
+  unsigned getNumOperands() const;
+  unsigned getOperandNo(unsigned i) const;
 
   static bool classof(const Matcher *N) {
     return N->getKind() == CheckPredicate;
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index e64943c1d025..90ca1bff5344 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -50,6 +50,7 @@ class MatcherTableEmitter {
 
   DenseMap<TreePattern *, unsigned> NodePredicateMap;
   std::vector<TreePredicateFn> NodePredicates;
+  std::vector<TreePredicateFn> NodePredicatesWithOperands;
 
   // We de-duplicate the predicates by code string, and use this map to track
   // all the patterns with "identical" predicates.
@@ -92,6 +93,9 @@ public:
   void EmitPatternMatchTable(raw_ostream &OS);
 
 private:
+  void EmitNodePredicatesFunction(const std::vector<TreePredicateFn> &Preds,
+                                  StringRef Decl, raw_ostream &OS);
+
   unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
                        raw_ostream &OS);
 
@@ -103,12 +107,20 @@ private:
           NodePredicatesByCodeToRun[Pred.getCodeToRunOnSDNode()];
       if (SameCodePreds.empty()) {
         // We've never seen a predicate with the same code: allocate an entry.
-        NodePredicates.push_back(Pred);
-        Entry = NodePredicates.size();
+        if (Pred.usesOperands()) {
+          NodePredicatesWithOperands.push_back(Pred);
+          Entry = NodePredicatesWithOperands.size();
+        } else {
+          NodePredicates.push_back(Pred);
+          Entry = NodePredicates.size();
+        }
       } else {
         // We did see an identical predicate: re-use it.
         Entry = NodePredicateMap[SameCodePreds.front()];
         assert(Entry != 0);
+        assert(TreePredicateFn(SameCodePreds.front()).usesOperands() ==
+               Pred.usesOperands() &&
+               "PatFrags with some code must have same usesOperands setting");
       }
       // In both cases, we've never seen this particular predicate before, so
       // mark it in the list of predicates sharing the same code.
@@ -396,11 +408,23 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
   }
   case Matcher::CheckPredicate: {
     TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate();
-    OS << "OPC_CheckPredicate, " << getNodePredicate(Pred) << ',';
+    unsigned OperandBytes = 0;
+
+    if (Pred.usesOperands()) {
+      unsigned NumOps = cast<CheckPredicateMatcher>(N)->getNumOperands();
+      OS << "OPC_CheckPredicateWithOperands, " << NumOps << "/*#Ops*/, ";
+      for (unsigned i = 0; i < NumOps; ++i)
+        OS << cast<CheckPredicateMatcher>(N)->getOperandNo(i) << ", ";
+      OperandBytes = 1 + NumOps;
+    } else {
+      OS << "OPC_CheckPredicate, ";
+    }
+
+    OS << getNodePredicate(Pred) << ',';
     if (!OmitComments)
       OS << " // " << Pred.getFnName();
     OS << '\n';
-    return 2;
+    return 2 + OperandBytes;
   }
 
   case Matcher::CheckOpcode:
@@ -783,6 +807,33 @@ EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
   return Size;
 }
 
+void MatcherTableEmitter::EmitNodePredicatesFunction(
+    const std::vector<TreePredicateFn> &Preds, StringRef Decl,
+    raw_ostream &OS) {
+  if (Preds.empty())
+    return;
+
+  BeginEmitFunction(OS, "bool", Decl, true/*AddOverride*/);
+  OS << "{\n";
+  OS << "  switch (PredNo) {\n";
+  OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    // Emit the predicate code corresponding to this pattern.
+    TreePredicateFn PredFn = Preds[i];
+
+    assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
+    OS << "  case " << i << ": { \n";
+    for (auto *SimilarPred :
+          NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
+      OS << "    // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
+
+    OS << PredFn.getCodeToRunOnSDNode() << "\n  }\n";
+  }
+  OS << "  }\n";
+  OS << "}\n";
+  EndEmitFunction(OS);
+}
+
 void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
   // Emit pattern predicates.
   if (!PatternPredicates.empty()) {
@@ -799,29 +850,14 @@ void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
   }
 
   // Emit Node predicates.
-  if (!NodePredicates.empty()) {
-    BeginEmitFunction(OS, "bool",
-          "CheckNodePredicate(SDNode *Node, unsigned PredNo) const",
-          true/*AddOverride*/);
-    OS << "{\n";
-    OS << "  switch (PredNo) {\n";
-    OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
-    for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
-      // Emit the predicate code corresponding to this pattern.
-      TreePredicateFn PredFn = NodePredicates[i];
-
-      assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
-      OS << "  case " << i << ": { \n";
-      for (auto *SimilarPred :
-           NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
-        OS << "    // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
-
-      OS << PredFn.getCodeToRunOnSDNode() << "\n  }\n";
-    }
-    OS << "  }\n";
-    OS << "}\n";
-    EndEmitFunction(OS);
-  }
+  EmitNodePredicatesFunction(
+      NodePredicates, "CheckNodePredicate(SDNode *Node, unsigned PredNo) const",
+      OS);
+  EmitNodePredicatesFunction(
+      NodePredicatesWithOperands,
+      "CheckNodePredicateWithOperands(SDNode *Node, unsigned PredNo, "
+      "const SmallVectorImpl<SDValue> &Operands) const",
+      OS);
 
   // Emit CompletePattern matchers.
   // FIXME: This should be const.
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index ce23651b9682..612342ddcddf 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -33,15 +33,15 @@ static MVT::SimpleValueType getRegisterValueType(Record *R,
 
     if (!FoundRC) {
       FoundRC = true;
-      ValueTypeByHwMode VVT = RC.getValueTypeNum(0);
+      const ValueTypeByHwMode &VVT = RC.getValueTypeNum(0);
       if (VVT.isSimple())
         VT = VVT.getSimple().SimpleTy;
       continue;
     }
 
-    // If this occurs in multiple register classes, they all have to agree.
 #ifndef NDEBUG
-    ValueTypeByHwMode T = RC.getValueTypeNum(0);
+    // If this occurs in multiple register classes, they all have to agree.
+    const ValueTypeByHwMode &T = RC.getValueTypeNum(0);
     assert((!T.isSimple() || T.getSimple().SimpleTy == VT) &&
            "ValueType mismatch between register classes for this register");
 #endif
@@ -120,7 +120,7 @@ namespace {
     /// If this is the first time a node with unique identifier Name has been
     /// seen, record it. Otherwise, emit a check to make sure this is the same
     /// node. Returns true if this is the first encounter.
-    bool recordUniqueNode(const std::string &Name);
+    bool recordUniqueNode(ArrayRef<std::string> Names);
 
     // Result Code Generation.
     unsigned getNamedArgumentSlot(StringRef Name) {
@@ -319,8 +319,8 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
   // to handle this.
   if ((N->getOperator()->getName() == "and" ||
        N->getOperator()->getName() == "or") &&
-      N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateFns().empty() &&
-      N->getPredicateFns().empty()) {
+      N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateCalls().empty() &&
+      N->getPredicateCalls().empty()) {
     if (IntInit *II = dyn_cast<IntInit>(N->getChild(1)->getLeafValue())) {
       if (!isPowerOf2_32(II->getValue())) {  // Don't bother with single bits.
         // If this is at the root of the pattern, we emit a redundant
@@ -441,21 +441,39 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
   }
 }
 
-bool MatcherGen::recordUniqueNode(const std::string &Name) {
-  unsigned &VarMapEntry = VariableMap[Name];
-  if (VarMapEntry == 0) {
+bool MatcherGen::recordUniqueNode(ArrayRef<std::string> Names) {
+  unsigned Entry = 0;
+  for (const std::string &Name : Names) {
+    unsigned &VarMapEntry = VariableMap[Name];
+    if (!Entry)
+      Entry = VarMapEntry;
+    assert(Entry == VarMapEntry);
+  }
+
+  bool NewRecord = false;
+  if (Entry == 0) {
     // If it is a named node, we must emit a 'Record' opcode.
-    AddMatcher(new RecordMatcher("$" + Name, NextRecordedOperandNo));
-    VarMapEntry = ++NextRecordedOperandNo;
-    return true;
+    std::string WhatFor;
+    for (const std::string &Name : Names) {
+      if (!WhatFor.empty())
+        WhatFor += ',';
+      WhatFor += "$" + Name;
+    }
+    AddMatcher(new RecordMatcher(WhatFor, NextRecordedOperandNo));
+    Entry = ++NextRecordedOperandNo;
+    NewRecord = true;
+  } else {
+    // If we get here, this is a second reference to a specific name.  Since
+    // we already have checked that the first reference is valid, we don't
+    // have to recursively match it, just check that it's the same as the
+    // previously named thing.
+    AddMatcher(new CheckSameMatcher(Entry-1));
   }
 
-  // If we get here, this is a second reference to a specific name.  Since
-  // we already have checked that the first reference is valid, we don't
-  // have to recursively match it, just check that it's the same as the
-  // previously named thing.
-  AddMatcher(new CheckSameMatcher(VarMapEntry-1));
-  return false;
+  for (const std::string &Name : Names)
+    VariableMap[Name] = Entry;
+
+  return NewRecord;
 }
 
 void MatcherGen::EmitMatchCode(const TreePatternNode *N,
@@ -475,9 +493,18 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
 
   // If this node has a name associated with it, capture it in VariableMap. If
   // we already saw this in the pattern, emit code to verify dagness.
+  SmallVector<std::string, 4> Names;
   if (!N->getName().empty())
-    if (!recordUniqueNode(N->getName()))
+    Names.push_back(N->getName());
+
+  for (const ScopedName &Name : N->getNamesAsPredicateArg()) {
+    Names.push_back(("pred:" + Twine(Name.getScope()) + ":" + Name.getIdentifier()).str());
+  }
+
+  if (!Names.empty()) {
+    if (!recordUniqueNode(Names))
       return;
+  }
 
   if (N->isLeaf())
     EmitLeafMatchCode(N);
@@ -485,8 +512,19 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
     EmitOperatorMatchCode(N, NodeNoTypes, ForceMode);
 
   // If there are node predicates for this node, generate their checks.
-  for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
-    AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
+  for (unsigned i = 0, e = N->getPredicateCalls().size(); i != e; ++i) {
+    const TreePredicateCall &Pred = N->getPredicateCalls()[i];
+    SmallVector<unsigned, 4> Operands;
+    if (Pred.Fn.usesOperands()) {
+      TreePattern *TP = Pred.Fn.getOrigPatFragRecord();
+      for (unsigned i = 0; i < TP->getNumArgs(); ++i) {
+        std::string Name =
+            ("pred:" + Twine(Pred.Scope) + ":" + TP->getArgName(i)).str();
+        Operands.push_back(getNamedArgumentSlot(Name));
+      }
+    }
+    AddMatcher(new CheckPredicateMatcher(Pred.Fn, Operands));
+  }
 
   for (unsigned i = 0, e = ResultsToTypeCheck.size(); i != e; ++i)
     AddMatcher(new CheckTypeMatcher(N->getSimpleType(ResultsToTypeCheck[i]),
@@ -962,9 +1000,16 @@ void MatcherGen::EmitResultCode() {
   }
 
   assert(Ops.size() >= NumSrcResults && "Didn't provide enough results");
-  Ops.resize(NumSrcResults);
+  SmallVector<unsigned, 8> Results(Ops);
+
+  // Apply result permutation.
+  for (unsigned ResNo = 0; ResNo < Pattern.getDstPattern()->getNumResults();
+       ++ResNo) {
+    Results[ResNo] = Ops[Pattern.getDstPattern()->getResultIndex(ResNo)];
+  }
 
-  AddMatcher(new CompleteMatchMatcher(Ops, Pattern));
+  Results.resize(NumSrcResults);
+  AddMatcher(new CompleteMatchMatcher(Results, Pattern));
 }
 
 
diff --git a/contrib/llvm/utils/TableGen/ExegesisEmitter.cpp b/contrib/llvm/utils/TableGen/ExegesisEmitter.cpp
new file mode 100644
index 000000000000..208237aca20c
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -0,0 +1,216 @@
+//===- ExegesisEmitter.cpp - Generate exegesis target data ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits llvm-exegesis information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "exegesis-emitter"
+
+namespace {
+
+class ExegesisEmitter {
+public:
+  ExegesisEmitter(RecordKeeper &RK);
+
+  void run(raw_ostream &OS) const;
+
+private:
+  unsigned getPfmCounterId(llvm::StringRef Name) const {
+    const auto It = PfmCounterNameTable.find(Name);
+    if (It == PfmCounterNameTable.end())
+      PrintFatalError("no pfm counter id for " + Name);
+    return It->second;
+  }
+
+  // Collects all the ProcPfmCounters definitions available in this target.
+  void emitPfmCounters(raw_ostream &OS) const;
+
+  void emitPfmCountersInfo(const Record &Def,
+                           unsigned &IssueCountersTableOffset,
+                           raw_ostream &OS) const;
+
+  void emitPfmCountersLookupTable(raw_ostream &OS) const;
+
+  RecordKeeper &Records;
+  std::string Target;
+
+  // Table of counter name -> counter index.
+  const std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
+};
+
+static std::map<llvm::StringRef, unsigned>
+collectPfmCounters(const RecordKeeper &Records) {
+  std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
+  const auto AddPfmCounterName = [&PfmCounterNameTable](
+                                     const Record *PfmCounterDef) {
+    const llvm::StringRef Counter = PfmCounterDef->getValueAsString("Counter");
+    if (!Counter.empty())
+      PfmCounterNameTable.emplace(Counter, 0);
+  };
+  for (Record *Def : Records.getAllDerivedDefinitions("ProcPfmCounters")) {
+    // Check that ResourceNames are unique.
+    llvm::SmallSet<llvm::StringRef, 16> Seen;
+    for (const Record *IssueCounter :
+         Def->getValueAsListOfDefs("IssueCounters")) {
+      const llvm::StringRef ResourceName =
+          IssueCounter->getValueAsString("ResourceName");
+      if (ResourceName.empty())
+        PrintFatalError(IssueCounter->getLoc(), "invalid empty ResourceName");
+      if (!Seen.insert(ResourceName).second)
+        PrintFatalError(IssueCounter->getLoc(),
+                        "duplicate ResourceName " + ResourceName);
+      AddPfmCounterName(IssueCounter);
+    }
+    AddPfmCounterName(Def->getValueAsDef("CycleCounter"));
+    AddPfmCounterName(Def->getValueAsDef("UopsCounter"));
+  }
+  unsigned Index = 0;
+  for (auto &NameAndIndex : PfmCounterNameTable)
+    NameAndIndex.second = Index++;
+  return PfmCounterNameTable;
+}
+
+ExegesisEmitter::ExegesisEmitter(RecordKeeper &RK)
+    : Records(RK), PfmCounterNameTable(collectPfmCounters(RK)) {
+  std::vector<Record *> Targets = Records.getAllDerivedDefinitions("Target");
+  if (Targets.size() == 0)
+    PrintFatalError("ERROR: No 'Target' subclasses defined!");
+  if (Targets.size() != 1)
+    PrintFatalError("ERROR: Multiple subclasses of Target defined!");
+  Target = Targets[0]->getName();
+}
+
+void ExegesisEmitter::emitPfmCountersInfo(const Record &Def,
+                                          unsigned &IssueCountersTableOffset,
+                                          raw_ostream &OS) const {
+  const auto CycleCounter =
+      Def.getValueAsDef("CycleCounter")->getValueAsString("Counter");
+  const auto UopsCounter =
+      Def.getValueAsDef("UopsCounter")->getValueAsString("Counter");
+  const size_t NumIssueCounters =
+      Def.getValueAsListOfDefs("IssueCounters").size();
+
+  OS << "\nstatic const PfmCountersInfo " << Target << Def.getName()
+     << " = {\n";
+
+  // Cycle Counter.
+  if (CycleCounter.empty())
+    OS << "  nullptr,  // No cycle counter.\n";
+  else
+    OS << "  " << Target << "PfmCounterNames[" << getPfmCounterId(CycleCounter)
+       << "],  // Cycle counter\n";
+
+  // Uops Counter.
+  if (UopsCounter.empty())
+    OS << "  nullptr,  // No uops counter.\n";
+  else
+    OS << "  " << Target << "PfmCounterNames[" << getPfmCounterId(UopsCounter)
+       << "],  // Uops counter\n";
+
+  // Issue Counters
+  if (NumIssueCounters == 0)
+    OS << "  nullptr,  // No issue counters.\n  0\n";
+  else
+    OS << "  " << Target << "PfmIssueCounters + " << IssueCountersTableOffset
+       << ", " << NumIssueCounters << " // Issue counters.\n";
+
+  OS << "};\n";
+  IssueCountersTableOffset += NumIssueCounters;
+}
+
+void ExegesisEmitter::emitPfmCounters(raw_ostream &OS) const {
+  // Emit the counter name table.
+  OS << "\nstatic const char* " << Target << "PfmCounterNames[] = {\n";
+  for (const auto &NameAndIndex : PfmCounterNameTable)
+    OS << "  \"" << NameAndIndex.first << "\", // " << NameAndIndex.second
+       << "\n";
+  OS << "};\n\n";
+
+  // Emit the IssueCounters table.
+  const auto PfmCounterDefs =
+      Records.getAllDerivedDefinitions("ProcPfmCounters");
+  // Only emit if non-empty.
+  const bool HasAtLeastOnePfmIssueCounter =
+      llvm::any_of(PfmCounterDefs, [](const Record *Def) {
+        return !Def->getValueAsListOfDefs("IssueCounters").empty();
+      });
+  if (HasAtLeastOnePfmIssueCounter) {
+    OS << "static const PfmCountersInfo::IssueCounter " << Target
+       << "PfmIssueCounters[] = {\n";
+    for (const Record *Def : PfmCounterDefs) {
+      for (const Record *ICDef : Def->getValueAsListOfDefs("IssueCounters"))
+        OS << "  { " << Target << "PfmCounterNames["
+           << getPfmCounterId(ICDef->getValueAsString("Counter")) << "], \""
+           << ICDef->getValueAsString("ResourceName") << "\"},\n";
+    }
+    OS << "};\n";
+  }
+
+  // Now generate the PfmCountersInfo.
+  unsigned IssueCountersTableOffset = 0;
+  for (const Record *Def : PfmCounterDefs)
+    emitPfmCountersInfo(*Def, IssueCountersTableOffset, OS);
+
+  OS << "\n";
+} // namespace
+
+void ExegesisEmitter::emitPfmCountersLookupTable(raw_ostream &OS) const {
+  std::vector<Record *> Bindings =
+      Records.getAllDerivedDefinitions("PfmCountersBinding");
+  assert(!Bindings.empty() && "there must be at least one binding");
+  llvm::sort(Bindings, [](const Record *L, const Record *R) {
+    return L->getValueAsString("CpuName") < R->getValueAsString("CpuName");
+  });
+
+  OS << "// Sorted (by CpuName) array of pfm counters.\n"
+     << "static const CpuAndPfmCounters " << Target << "CpuPfmCounters[] = {\n";
+  for (Record *Binding : Bindings) {
+    // Emit as { "cpu", procinit },
+    OS << "  { \""                                                        //
+       << Binding->getValueAsString("CpuName") << "\","                   //
+       << " &" << Target << Binding->getValueAsDef("Counters")->getName() //
+       << " },\n";
+  }
+  OS << "};\n\n";
+}
+
+void ExegesisEmitter::run(raw_ostream &OS) const {
+  emitSourceFileHeader("Exegesis Tables", OS);
+  emitPfmCounters(OS);
+  emitPfmCountersLookupTable(OS);
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+void EmitExegesis(RecordKeeper &RK, raw_ostream &OS) {
+  ExegesisEmitter(RK).run(OS);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/FastISelEmitter.cpp b/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
index c0902e4c6f1a..5134b684c6f9 100644
--- a/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -39,11 +39,12 @@ struct InstructionMemo {
   std::vector<std::string> PhysRegs;
   std::string PredicateCheck;
 
-  InstructionMemo(std::string Name, const CodeGenRegisterClass *RC,
+  InstructionMemo(StringRef Name, const CodeGenRegisterClass *RC,
                   std::string SubRegNo, std::vector<std::string> PhysRegs,
                   std::string PredicateCheck)
-    : Name(Name), RC(RC), SubRegNo(SubRegNo), PhysRegs(PhysRegs),
-      PredicateCheck(PredicateCheck) {}
+      : Name(Name), RC(RC), SubRegNo(std::move(SubRegNo)),
+        PhysRegs(std::move(PhysRegs)),
+        PredicateCheck(std::move(PredicateCheck)) {}
 
   // Make sure we do not copy InstructionMemo.
   InstructionMemo(const InstructionMemo &Other) = delete;
@@ -209,13 +210,13 @@ struct OperandsSignature {
       // Handle imm operands specially.
       if (!Op->isLeaf() && Op->getOperator()->getName() == "imm") {
         unsigned PredNo = 0;
-        if (!Op->getPredicateFns().empty()) {
-          TreePredicateFn PredFn = Op->getPredicateFns()[0];
+        if (!Op->getPredicateCalls().empty()) {
+          TreePredicateFn PredFn = Op->getPredicateCalls()[0].Fn;
           // If there is more than one predicate weighing in on this operand
           // then we don't handle it.  This doesn't typically happen for
           // immediates anyway.
-          if (Op->getPredicateFns().size() > 1 ||
-              !PredFn.isImmediatePattern())
+          if (Op->getPredicateCalls().size() > 1 ||
+              !PredFn.isImmediatePattern() || PredFn.usesOperands())
             return false;
           // Ignore any instruction with 'FastIselShouldIgnore', these are
           // not needed and just bloat the fast instruction selector.  For
@@ -235,7 +236,7 @@ struct OperandsSignature {
 
       // For now, filter out any operand with a predicate.
       // For now, filter out any operand with multiple values.
-      if (!Op->getPredicateFns().empty() || Op->getNumTypes() != 1)
+      if (!Op->getPredicateCalls().empty() || Op->getNumTypes() != 1)
         return false;
 
       if (!Op->isLeaf()) {
@@ -528,7 +529,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
     }
 
     // For now, filter out any instructions with predicates.
-    if (!InstPatNode->getPredicateFns().empty())
+    if (!InstPatNode->getPredicateCalls().empty())
       continue;
 
     // Check all the operands.
@@ -828,7 +829,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
       = SignaturesWithConstantForms.find(Operands);
     if (MI != SignaturesWithConstantForms.end()) {
       // Unique any duplicates out of the list.
-      llvm::sort(MI->second.begin(), MI->second.end());
+      llvm::sort(MI->second);
       MI->second.erase(std::unique(MI->second.begin(), MI->second.end()),
                        MI->second.end());
 
diff --git a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
index 76ba1c001092..5e621fc0efdd 100644
--- a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -87,8 +87,23 @@ struct DecoderTableInfo {
   DecoderSet Decoders;
 };
 
+struct EncodingAndInst {
+  const Record *EncodingDef;
+  const CodeGenInstruction *Inst;
+
+  EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst)
+      : EncodingDef(EncodingDef), Inst(Inst) {}
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
+  if (Value.EncodingDef != Value.Inst->TheDef)
+    OS << Value.EncodingDef->getName() << ":";
+  OS << Value.Inst->TheDef->getName();
+  return OS;
+}
+
 class FixedLenDecoderEmitter {
-  ArrayRef<const CodeGenInstruction *> NumberedInstructions;
+  std::vector<EncodingAndInst> NumberedEncodings;
 
 public:
   // Defaults preserved here for documentation, even though they aren't
@@ -323,7 +338,7 @@ protected:
   friend class Filter;
 
   // Vector of codegen instructions to choose our filter.
-  ArrayRef<const CodeGenInstruction *> AllInstructions;
+  ArrayRef<EncodingAndInst> AllInstructions;
 
   // Vector of uid's for this filter chooser to work on.
   const std::vector<unsigned> &Opcodes;
@@ -351,25 +366,24 @@ protected:
   const FixedLenDecoderEmitter *Emitter;
 
 public:
-  FilterChooser(ArrayRef<const CodeGenInstruction *> Insts,
+  FilterChooser(ArrayRef<EncodingAndInst> Insts,
                 const std::vector<unsigned> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
-                unsigned BW,
-                const FixedLenDecoderEmitter *E)
-    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
-      FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
-      BitWidth(BW), Emitter(E) {
+                unsigned BW, const FixedLenDecoderEmitter *E)
+      : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+        FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
+        BitWidth(BW), Emitter(E) {
     doFilter();
   }
 
-  FilterChooser(ArrayRef<const CodeGenInstruction *> Insts,
+  FilterChooser(ArrayRef<EncodingAndInst> Insts,
                 const std::vector<unsigned> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 const std::vector<bit_value_t> &ParentFilterBitValues,
                 const FilterChooser &parent)
-    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
-      FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
-      BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
+      : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+        FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
+        BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
     doFilter();
   }
 
@@ -381,7 +395,7 @@ public:
 protected:
   // Populates the insn given the uid.
   void insnWithID(insn_t &Insn, unsigned Opcode) const {
-    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+    BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst");
 
     // We may have a SoftFail bitmask, which specifies a mask where an encoding
     // may differ from the value in "Inst" and yet still be valid, but the
@@ -389,7 +403,7 @@ protected:
     //
     // This is used for marking UNPREDICTABLE instructions in the ARM world.
     BitsInit *SFBits =
-      AllInstructions[Opcode]->TheDef->getValueAsBitsInit("SoftFail");
+        AllInstructions[Opcode].EncodingDef->getValueAsBitsInit("SoftFail");
 
     for (unsigned i = 0; i < BitWidth; ++i) {
       if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
@@ -399,11 +413,6 @@ protected:
     }
   }
 
-  // Returns the record name.
-  const StringRef nameWithID(unsigned Opcode) const {
-    return AllInstructions[Opcode]->TheDef->getName();
-  }
-
   // Populates the field of the insn given the start position and the number of
   // consecutive bits to scan for.
   //
@@ -827,8 +836,7 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
       OS << (unsigned)*I++ << ", ";
 
       if (!IsTry) {
-        OS << "// Opcode: "
-           << NumberedInstructions[Opc]->TheDef->getName() << "\n";
+        OS << "// Opcode: " << NumberedEncodings[Opc] << "\n";
         break;
       }
 
@@ -845,8 +853,7 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
       OS << utostr(Byte) << ", ";
       NumToSkip |= Byte << 16;
 
-      OS << "// Opcode: "
-         << NumberedInstructions[Opc]->TheDef->getName()
+      OS << "// Opcode: " << NumberedEncodings[Opc]
          << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
     }
@@ -1153,7 +1160,7 @@ static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
                                        unsigned Opc) const {
   ListInit *Predicates =
-    AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+      AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
   bool IsFirstEmission = true;
   for (unsigned i = 0; i < Predicates->size(); ++i) {
     Record *Pred = Predicates->getElementAsRecord(i);
@@ -1182,7 +1189,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
 
 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
   ListInit *Predicates =
-    AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+      AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
   for (unsigned i = 0; i < Predicates->size(); ++i) {
     Record *Pred = Predicates->getElementAsRecord(i);
     if (!Pred->getValue("AssemblerMatcherPredicate"))
@@ -1247,9 +1254,10 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
                                            unsigned Opc) const {
   BitsInit *SFBits =
-    AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
+      AllInstructions[Opc].EncodingDef->getValueAsBitsInit("SoftFail");
   if (!SFBits) return;
-  BitsInit *InstBits = AllInstructions[Opc]->TheDef->getValueAsBitsInit("Inst");
+  BitsInit *InstBits =
+      AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst");
 
   APInt PositiveMask(BitWidth, 0ULL);
   APInt NegativeMask(BitWidth, 0ULL);
@@ -1270,9 +1278,9 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
       break;
     default:
       // The bit is not set; this must be an error!
-      StringRef Name = AllInstructions[Opc]->TheDef->getName();
-      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " << Name
-             << " is set but Inst{" << i << "} is unset!\n"
+      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
+             << AllInstructions[Opc] << " is set but Inst{" << i
+             << "} is unset!\n"
              << "  - You can only mark a bit as SoftFail if it is fully defined"
              << " (1/0 - not '?') in Inst\n";
       return;
@@ -1709,9 +1717,9 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
   dumpStack(errs(), "\t\t");
 
   for (unsigned i = 0; i < Opcodes.size(); ++i) {
-    errs() << '\t' << nameWithID(Opcodes[i]) << " ";
+    errs() << '\t' << AllInstructions[Opcodes[i]] << " ";
     dumpBits(errs(),
-             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+             getBitsField(*AllInstructions[Opcodes[i]].EncodingDef, "Inst"));
     errs() << '\n';
   }
 }
@@ -2067,21 +2075,59 @@ static bool populateInstruction(CodeGenTarget &Target,
 // using the VS compiler. It has a bug which causes the function
 // to be optimized out in some circustances. See llvm.org/pr38292
 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
-  OS << "// Helper function for extracting fields from encoded instructions.\n"
+  OS << "// Helper functions for extracting fields from encoded instructions.\n"
+     << "// InsnType must either be integral or an APInt-like object that "
+        "must:\n"
+     << "// * Have a static const max_size_in_bits equal to the number of bits "
+        "in the\n"
+     << "//   encoding.\n"
+     << "// * be default-constructible and copy-constructible\n"
+     << "// * be constructible from a uint64_t\n"
+     << "// * be constructible from an APInt (this can be private)\n"
+     << "// * Support getBitsSet(loBit, hiBit)\n"
+     << "// * be convertible to uint64_t\n"
+     << "// * Support the ~, &, ==, !=, and |= operators with other objects of "
+        "the same type\n"
+     << "// * Support shift (<<, >>) with signed and unsigned integers on the "
+        "RHS\n"
+     << "// * Support put (<<) to raw_ostream&\n"
      << "template<typename InsnType>\n"
      << "#if defined(_MSC_VER) && !defined(__clang__)\n"
      << "__declspec(noinline)\n"
      << "#endif\n"
-     << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
+     << "static InsnType fieldFromInstruction(InsnType insn, unsigned "
+        "startBit,\n"
+     << "                                     unsigned numBits, "
+        "std::true_type) {\n"
+     << "  assert(startBit + numBits <= 64 && \"Cannot support >64-bit "
+        "extractions!\");\n"
+     << "  assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n"
+     << "         \"Instruction field out of bounds!\");\n"
+     << "  InsnType fieldMask;\n"
+     << "  if (numBits == sizeof(InsnType) * 8)\n"
+     << "    fieldMask = (InsnType)(-1LL);\n"
+     << "  else\n"
+     << "    fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n"
+     << "  return (insn & fieldMask) >> startBit;\n"
+     << "}\n"
+     << "\n"
+     << "template<typename InsnType>\n"
+     << "static InsnType fieldFromInstruction(InsnType insn, unsigned "
+        "startBit,\n"
+     << "                                     unsigned numBits, "
+        "std::false_type) {\n"
+     << "  assert(startBit + numBits <= InsnType::max_size_in_bits && "
+        "\"Instruction field out of bounds!\");\n"
+     << "  InsnType fieldMask = InsnType::getBitsSet(0, numBits);\n"
+     << "  return (insn >> startBit) & fieldMask;\n"
+     << "}\n"
+     << "\n"
+     << "template<typename InsnType>\n"
+     << "static InsnType fieldFromInstruction(InsnType insn, unsigned "
+        "startBit,\n"
      << "                                     unsigned numBits) {\n"
-     << "    assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n"
-     << "           \"Instruction field out of bounds!\");\n"
-     << "    InsnType fieldMask;\n"
-     << "    if (numBits == sizeof(InsnType)*8)\n"
-     << "      fieldMask = (InsnType)(-1LL);\n"
-     << "    else\n"
-     << "      fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n"
-     << "    return (insn & fieldMask) >> startBit;\n"
+     << "  return fieldFromInstruction(insn, startBit, numBits, "
+        "std::is_integral<InsnType>());\n"
      << "}\n\n";
 }
 
@@ -2288,13 +2334,17 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
   Target.reverseBitsForLittleEndianEncoding();
 
   // Parameterize the decoders based on namespace and instruction width.
-  NumberedInstructions = Target.getInstructionsByEnumValue();
+  const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
+  NumberedEncodings.reserve(NumberedInstructions.size());
+  for (const auto &NumberedInstruction : NumberedInstructions)
+    NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
+
   std::map<std::pair<std::string, unsigned>,
            std::vector<unsigned>> OpcMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
 
-  for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
-    const CodeGenInstruction *Inst = NumberedInstructions[i];
+  for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
+    const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
     const Record *Def = Inst->TheDef;
     unsigned Size = Def->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
@@ -2315,8 +2365,10 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
   DecoderTableInfo TableInfo;
   for (const auto &Opc : OpcMap) {
     // Emit the decoder for this namespace+width combination.
-    FilterChooser FC(NumberedInstructions, Opc.second, Operands,
-                     8*Opc.first.second, this);
+    ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
+                                                   NumberedEncodings.size());
+    FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
+                     8 * Opc.first.second, this);
 
     // The decode table is cleared for each top level decoder function. The
     // predicates and decoders themselves, however, are shared across all
diff --git a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 69726cc9f257..997ceb12becd 100644
--- a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -200,7 +200,8 @@ static Optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
 static std::string explainPredicates(const TreePatternNode *N) {
   std::string Explanation = "";
   StringRef Separator = "";
-  for (const auto &P : N->getPredicateFns()) {
+  for (const TreePredicateCall &Call : N->getPredicateCalls()) {
+    const TreePredicateFn &P = Call.Fn;
     Explanation +=
         (Separator + P.getOrigPatFragRecord()->getRecord()->getName()).str();
     Separator = ", ";
@@ -284,7 +285,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
   std::string Separator = "";
 
   bool HasUnsupportedPredicate = false;
-  for (const auto &Predicate : N->getPredicateFns()) {
+  for (const TreePredicateCall &Call : N->getPredicateCalls()) {
+    const TreePredicateFn &Predicate = Call.Fn;
+
     if (Predicate.isAlwaysTrue())
       continue;
 
@@ -1837,6 +1840,12 @@ public:
   static bool classof(const InstructionPredicateMatcher *P) {
     return P->getKind() == IPM_GenericPredicate;
   }
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return InstructionPredicateMatcher::isIdentical(B) &&
+           Predicate ==
+               static_cast<const GenericInstructionPredicateMatcher &>(B)
+                   .Predicate;
+  }
   void emitPredicateOpcodes(MatchTable &Table,
                             RuleMatcher &Rule) const override {
     Table << MatchTable::Opcode("GIM_CheckCxxInsnPredicate")
@@ -2607,7 +2616,7 @@ public:
       std::vector<unsigned> MergeInsnIDs;
       for (const auto &IDMatcherPair : Rule.defined_insn_vars())
         MergeInsnIDs.push_back(IDMatcherPair.second);
-      llvm::sort(MergeInsnIDs.begin(), MergeInsnIDs.end());
+      llvm::sort(MergeInsnIDs);
       for (const auto &MergeInsnID : MergeInsnIDs)
         Table << MatchTable::IntValue(MergeInsnID);
       Table << MatchTable::NamedValue("GIU_MergeMemOperands_EndOfList")
@@ -2806,7 +2815,7 @@ void RuleMatcher::emit(MatchTable &Table) {
 
       InsnIDs.push_back(Pair.second);
     }
-    llvm::sort(InsnIDs.begin(), InsnIDs.end());
+    llvm::sort(InsnIDs);
 
     for (const auto &InsnID : InsnIDs) {
       // Reject the difficult cases until we have a more accurate check.
@@ -2984,9 +2993,6 @@ private:
   void gatherOpcodeValues();
   void gatherTypeIDValues();
   void gatherNodeEquivs();
-  // Instruction predicate code that will be emitted in generated functions.
-  SmallVector<std::string, 2> InstructionPredicateCodes;
-  unsigned getOrCreateInstructionPredicateFnId(StringRef Code);
 
   Record *findNodeEquiv(Record *N) const;
   const CodeGenInstruction *getEquivNode(Record &Equiv,
@@ -3085,20 +3091,6 @@ void GlobalISelEmitter::gatherOpcodeValues() {
 void GlobalISelEmitter::gatherTypeIDValues() {
   LLTOperandMatcher::initTypeIDValuesMap();
 }
-unsigned GlobalISelEmitter::getOrCreateInstructionPredicateFnId(StringRef Code) {
-  // There's not very many predicates that need to be here at the moment so we
-  // just maintain a simple set-like vector. If it grows then we'll need to do
-  // something more efficient.
-  const auto &I = std::find(InstructionPredicateCodes.begin(),
-                            InstructionPredicateCodes.end(),
-                            Code);
-  if (I == InstructionPredicateCodes.end()) {
-    unsigned ID = InstructionPredicateCodes.size();
-    InstructionPredicateCodes.push_back(Code);
-    return ID;
-  }
-  return std::distance(InstructionPredicateCodes.begin(), I);
-}
 
 void GlobalISelEmitter::gatherNodeEquivs() {
   assert(NodeEquivs.empty());
@@ -3128,7 +3120,8 @@ Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
 
 const CodeGenInstruction *
 GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
-  for (const auto &Predicate : N->getPredicateFns()) {
+  for (const TreePredicateCall &Call : N->getPredicateCalls()) {
+    const TreePredicateFn &Predicate = Call.Fn;
     if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
         Predicate.isSignExtLoad())
       return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend"));
@@ -3197,7 +3190,8 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
                           " for result of Src pattern operator");
   }
 
-  for (const auto &Predicate : Src->getPredicateFns()) {
+  for (const TreePredicateCall &Call : Src->getPredicateCalls()) {
+    const TreePredicateFn &Predicate = Call.Fn;
     if (Predicate.isAlwaysTrue())
       continue;
 
@@ -4299,11 +4293,11 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
 
   std::vector<Record *> ComplexPredicates =
       RK.getAllDerivedDefinitions("GIComplexOperandMatcher");
-  llvm::sort(ComplexPredicates.begin(), ComplexPredicates.end(), orderByName);
+  llvm::sort(ComplexPredicates, orderByName);
 
   std::vector<Record *> CustomRendererFns =
       RK.getAllDerivedDefinitions("GICustomOperandRenderer");
-  llvm::sort(CustomRendererFns.begin(), CustomRendererFns.end(), orderByName);
+  llvm::sort(CustomRendererFns, orderByName);
 
   unsigned MaxTemporaries = 0;
   for (const auto &Rule : Rules)
@@ -4382,7 +4376,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   std::vector<LLTCodeGen> TypeObjects;
   for (const auto &Ty : KnownTypes)
     TypeObjects.push_back(Ty);
-  llvm::sort(TypeObjects.begin(), TypeObjects.end());
+  llvm::sort(TypeObjects);
   OS << "// LLT Objects.\n"
      << "enum {\n";
   for (const auto &TypeObject : TypeObjects) {
@@ -4405,21 +4399,20 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   std::vector<std::vector<Record *>> FeatureBitsets;
   for (auto &Rule : Rules)
     FeatureBitsets.push_back(Rule.getRequiredFeatures());
-  llvm::sort(
-      FeatureBitsets.begin(), FeatureBitsets.end(),
-      [&](const std::vector<Record *> &A, const std::vector<Record *> &B) {
-        if (A.size() < B.size())
-          return true;
-        if (A.size() > B.size())
-          return false;
-        for (const auto &Pair : zip(A, B)) {
-          if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
-            return true;
-          if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
-            return false;
-        }
+  llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+                                 const std::vector<Record *> &B) {
+    if (A.size() < B.size())
+      return true;
+    if (A.size() > B.size())
+      return false;
+    for (const auto &Pair : zip(A, B)) {
+      if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
+        return true;
+      if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
         return false;
-      });
+    }
+    return false;
+  });
   FeatureBitsets.erase(
       std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
       FeatureBitsets.end());
@@ -4588,13 +4581,11 @@ void RuleMatcher::optimize() {
     }
     InsnMatcher.optimize();
   }
-  llvm::sort(
-      EpilogueMatchers.begin(), EpilogueMatchers.end(),
-      [](const std::unique_ptr<PredicateMatcher> &L,
-         const std::unique_ptr<PredicateMatcher> &R) {
-        return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
-               std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
-      });
+  llvm::sort(EpilogueMatchers, [](const std::unique_ptr<PredicateMatcher> &L,
+                                  const std::unique_ptr<PredicateMatcher> &R) {
+    return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
+           std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
+  });
 }
 
 bool RuleMatcher::hasFirstCondition() const {
diff --git a/contrib/llvm/utils/TableGen/InfoByHwMode.cpp b/contrib/llvm/utils/TableGen/InfoByHwMode.cpp
index 7d1f71cc2647..086e12dafd74 100644
--- a/contrib/llvm/utils/TableGen/InfoByHwMode.cpp
+++ b/contrib/llvm/utils/TableGen/InfoByHwMode.cpp
@@ -84,7 +84,7 @@ void ValueTypeByHwMode::writeToStream(raw_ostream &OS) const {
   std::vector<const PairType*> Pairs;
   for (const auto &P : Map)
     Pairs.push_back(&P);
-  llvm::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+  llvm::sort(Pairs, deref<std::less<PairType>>());
 
   OS << '{';
   for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
@@ -176,7 +176,7 @@ void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const {
   std::vector<const PairType*> Pairs;
   for (const auto &P : Map)
     Pairs.push_back(&P);
-  llvm::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+  llvm::sort(Pairs, deref<std::less<PairType>>());
 
   OS << '{';
   for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
diff --git a/contrib/llvm/utils/TableGen/InfoByHwMode.h b/contrib/llvm/utils/TableGen/InfoByHwMode.h
index 4838198e704d..7be4678f271b 100644
--- a/contrib/llvm/utils/TableGen/InfoByHwMode.h
+++ b/contrib/llvm/utils/TableGen/InfoByHwMode.h
@@ -47,10 +47,12 @@ std::vector<unsigned> union_modes(const InfoByHwMode<InfoT> &A,
   for (const auto &P : B)
     U.insert(P.first);
   // Make sure that the default mode is last on the list.
-  bool HasDefault = U.count(DefaultMode);
+  bool HasDefault = false;
   for (unsigned M : U)
     if (M != DefaultMode)
       V.push_back(M);
+    else
+      HasDefault = true;
   if (HasDefault)
     V.push_back(DefaultMode);
   return V;
diff --git a/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp b/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp
index 65cb28cd17a3..9d50351854ec 100644
--- a/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/InstrDocsEmitter.cpp
@@ -100,6 +100,7 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
 #define str(s) #s
 #define FLAG(f) if (II->f) { FlagStrings.push_back(str(f)); }
     FLAG(isReturn)
+    FLAG(isEHScopeReturn)
     FLAG(isBranch)
     FLAG(isIndirectBranch)
     FLAG(isCompare)
@@ -137,6 +138,7 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
     FLAG(isInsertSubreg)
     FLAG(isConvergent)
     FLAG(hasNoSchedulingInfo)
+    FLAG(variadicOpsAreDefs)
     if (!FlagStrings.empty()) {
       OS << "Flags: ";
       bool IsFirst = true;
diff --git a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
index a492daac0d09..39d9e8526386 100644
--- a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -66,11 +66,12 @@ private:
   /// This method is used to custom expand TIIPredicate definitions.
   /// See file llvm/Target/TargetInstPredicates.td for a description of what is
   /// a TIIPredicate and how to use it.
-  void emitTIIHelperMethods(raw_ostream &OS);
+  void emitTIIHelperMethods(raw_ostream &OS, StringRef TargetName,
+                            bool ExpandDefinition = true);
 
   /// Expand TIIPredicate definitions to functions that accept a const MCInst
   /// reference.
-  void emitMCIIHelperMethods(raw_ostream &OS);
+  void emitMCIIHelperMethods(raw_ostream &OS, StringRef TargetName);
   void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                   Record *InstrInfo,
                   std::map<std::vector<Record*>, unsigned> &EL,
@@ -351,71 +352,79 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
   OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n";
 }
 
-void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS) {
+void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
+                                             StringRef TargetName) {
   RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
   if (TIIPredicates.empty())
     return;
 
-  CodeGenTarget &Target = CDP.getTargetInfo();
-  const StringRef TargetName = Target.getName();
-  formatted_raw_ostream FOS(OS);
-
-  FOS << "#ifdef GET_GENINSTRINFO_MC_DECL\n";
-  FOS << "#undef GET_GENINSTRINFO_MC_DECL\n\n";
+  OS << "#ifdef GET_INSTRINFO_MC_HELPER_DECLS\n";
+  OS << "#undef GET_INSTRINFO_MC_HELPER_DECLS\n\n";
 
-  FOS << "namespace llvm {\n";
-  FOS << "class MCInst;\n\n";
+  OS << "namespace llvm {\n";
+  OS << "class MCInst;\n\n";
 
-  FOS << "namespace " << TargetName << "_MC {\n\n";
+  OS << "namespace " << TargetName << "_MC {\n\n";
 
   for (const Record *Rec : TIIPredicates) {
-    FOS << "bool " << Rec->getValueAsString("FunctionName")
+    OS << "bool " << Rec->getValueAsString("FunctionName")
         << "(const MCInst &MI);\n";
   }
 
-  FOS << "\n} // end " << TargetName << "_MC namespace\n";
-  FOS << "} // end llvm namespace\n\n";
+  OS << "\n} // end " << TargetName << "_MC namespace\n";
+  OS << "} // end llvm namespace\n\n";
 
-  FOS << "#endif // GET_GENINSTRINFO_MC_DECL\n\n";
+  OS << "#endif // GET_INSTRINFO_MC_HELPER_DECLS\n\n";
 
-  FOS << "#ifdef GET_GENINSTRINFO_MC_HELPERS\n";
-  FOS << "#undef GET_GENINSTRINFO_MC_HELPERS\n\n";
+  OS << "#ifdef GET_INSTRINFO_MC_HELPERS\n";
+  OS << "#undef GET_INSTRINFO_MC_HELPERS\n\n";
 
-  FOS << "namespace llvm {\n";
-  FOS << "namespace " << TargetName << "_MC {\n\n";
+  OS << "namespace llvm {\n";
+  OS << "namespace " << TargetName << "_MC {\n\n";
 
-  PredicateExpander PE;
+  PredicateExpander PE(TargetName);
   PE.setExpandForMC(true);
+
   for (const Record *Rec : TIIPredicates) {
-    FOS << "bool " << Rec->getValueAsString("FunctionName");
-    FOS << "(const MCInst &MI) {\n";
-    FOS << "  return ";
-    PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
-    FOS << ";\n}\n";
+    OS << "bool " << Rec->getValueAsString("FunctionName");
+    OS << "(const MCInst &MI) {\n";
+
+    OS.indent(PE.getIndentLevel() * 2);
+    PE.expandStatement(OS, Rec->getValueAsDef("Body"));
+    OS << "\n}\n\n";
   }
 
-  FOS << "\n} // end " << TargetName << "_MC namespace\n";
-  FOS << "} // end llvm namespace\n\n";
+  OS << "} // end " << TargetName << "_MC namespace\n";
+  OS << "} // end llvm namespace\n\n";
 
-  FOS << "#endif // GET_GENISTRINFO_MC_HELPERS\n";
+  OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n";
 }
 
-void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS) {
+void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS,
+                                            StringRef TargetName,
+                                            bool ExpandDefinition) {
   RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
   if (TIIPredicates.empty())
     return;
 
-  formatted_raw_ostream FOS(OS);
-  PredicateExpander PE;
+  PredicateExpander PE(TargetName);
   PE.setExpandForMC(false);
-  PE.setIndentLevel(2);
 
   for (const Record *Rec : TIIPredicates) {
-    FOS << "\n  static bool " << Rec->getValueAsString("FunctionName");
-    FOS << "(const MachineInstr &MI) {\n";
-    FOS << "    return ";
-    PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
-    FOS << ";\n  }\n";
+    OS << (ExpandDefinition ? "" : "static ") << "bool ";
+    if (ExpandDefinition)
+      OS << TargetName << "InstrInfo::";
+    OS << Rec->getValueAsString("FunctionName");
+    OS << "(const MachineInstr &MI)";
+    if (!ExpandDefinition) {
+      OS << ";\n";
+      continue;
+    }
+
+    OS << " {\n";
+    OS.indent(PE.getIndentLevel() * 2);
+    PE.expandStatement(OS, Rec->getValueAsDef("Body"));
+    OS << "\n}\n\n";
   }
 }
 
@@ -517,12 +526,22 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
      << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1, int ReturnOpcode = -1);\n"
      << "  ~" << ClassName << "() override = default;\n";
 
-  emitTIIHelperMethods(OS);
 
   OS << "\n};\n} // end llvm namespace\n";
 
   OS << "#endif // GET_INSTRINFO_HEADER\n\n";
 
+  OS << "#ifdef GET_INSTRINFO_HELPER_DECLS\n";
+  OS << "#undef GET_INSTRINFO_HELPER_DECLS\n\n";
+  emitTIIHelperMethods(OS, TargetName, /* ExpandDefintion = */false);
+  OS << "\n";
+  OS << "#endif // GET_INSTRINFO_HELPER_DECLS\n\n";
+
+  OS << "#ifdef GET_INSTRINFO_HELPERS\n";
+  OS << "#undef GET_INSTRINFO_HELPERS\n\n";
+  emitTIIHelperMethods(OS, TargetName, /* ExpandDefintion = */true);
+  OS << "#endif // GET_INSTRINFO_HELPERS\n\n";
+
   OS << "#ifdef GET_INSTRINFO_CTOR_DTOR\n";
   OS << "#undef GET_INSTRINFO_CTOR_DTOR\n";
 
@@ -544,7 +563,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   emitOperandTypesEnum(OS, Target);
 
-  emitMCIIHelperMethods(OS);
+  emitMCIIHelperMethods(OS, TargetName);
 }
 
 void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
@@ -569,6 +588,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   // Emit all of the target independent flags...
   if (Inst.isPseudo)           OS << "|(1ULL<<MCID::Pseudo)";
   if (Inst.isReturn)           OS << "|(1ULL<<MCID::Return)";
+  if (Inst.isEHScopeReturn)    OS << "|(1ULL<<MCID::EHScopeReturn)";
   if (Inst.isBranch)           OS << "|(1ULL<<MCID::Branch)";
   if (Inst.isIndirectBranch)   OS << "|(1ULL<<MCID::IndirectBranch)";
   if (Inst.isCompare)          OS << "|(1ULL<<MCID::Compare)";
@@ -604,6 +624,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isExtractSubreg) OS << "|(1ULL<<MCID::ExtractSubreg)";
   if (Inst.isInsertSubreg) OS << "|(1ULL<<MCID::InsertSubreg)";
   if (Inst.isConvergent) OS << "|(1ULL<<MCID::Convergent)";
+  if (Inst.variadicOpsAreDefs) OS << "|(1ULL<<MCID::VariadicOpsAreDefs)";
 
   // Emit all of the target-specific flags...
   BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
diff --git a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 06e44e3b57c1..049282e5ebfe 100644
--- a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -489,6 +489,9 @@ struct AttributeComparator {
     if (L->isNoReturn != R->isNoReturn)
       return R->isNoReturn;
 
+    if (L->isCold != R->isCold)
+      return R->isCold;
+
     if (L->isConvergent != R->isConvergent)
       return R->isConvergent;
 
@@ -622,7 +625,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
 
     if (!intrinsic.canThrow ||
         intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
-        intrinsic.isNoReturn || intrinsic.isNoDuplicate ||
+        intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate ||
         intrinsic.isConvergent || intrinsic.isSpeculatable) {
       OS << "      const Attribute::AttrKind Atts[] = {";
       bool addComma = false;
@@ -636,6 +639,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         OS << "Attribute::NoReturn";
         addComma = true;
       }
+      if (intrinsic.isCold) {
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::Cold";
+        addComma = true;
+      }
       if (intrinsic.isNoDuplicate) {
         if (addComma)
           OS << ",";
diff --git a/contrib/llvm/utils/TableGen/PredicateExpander.cpp b/contrib/llvm/utils/TableGen/PredicateExpander.cpp
index 68eb32794a02..2e01b7c3138e 100644
--- a/contrib/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/contrib/llvm/utils/TableGen/PredicateExpander.cpp
@@ -12,65 +12,110 @@
 //===----------------------------------------------------------------------===//
 
 #include "PredicateExpander.h"
+#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.
 
 namespace llvm {
 
-void PredicateExpander::expandTrue(formatted_raw_ostream &OS) { OS << "true"; }
-void PredicateExpander::expandFalse(formatted_raw_ostream &OS) {
-  OS << "false";
+void PredicateExpander::expandTrue(raw_ostream &OS) { OS << "true"; }
+void PredicateExpander::expandFalse(raw_ostream &OS) { OS << "false"; }
+
+void PredicateExpander::expandCheckImmOperand(raw_ostream &OS, int OpIndex,
+                                              int ImmVal,
+                                              StringRef FunctionMapper) {
+  if (!FunctionMapper.empty())
+    OS << FunctionMapper << "(";
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+     << ").getImm()";
+  if (!FunctionMapper.empty())
+    OS << ")";
+  OS << (shouldNegate() ? " != " : " == ") << ImmVal;
 }
 
-void PredicateExpander::expandCheckImmOperand(formatted_raw_ostream &OS,
-                                              int OpIndex, int ImmVal) {
+void PredicateExpander::expandCheckImmOperand(raw_ostream &OS, int OpIndex,
+                                              StringRef ImmVal,
+                                              StringRef FunctionMapper) {
+  if (ImmVal.empty())
+    expandCheckImmOperandSimple(OS, OpIndex, FunctionMapper);
+
+  if (!FunctionMapper.empty())
+    OS << FunctionMapper << "(";
   OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
-     << ").getImm() " << (shouldNegate() ? "!= " : "== ") << ImmVal;
+     << ").getImm()";
+  if (!FunctionMapper.empty())
+    OS << ")";
+  OS << (shouldNegate() ? " != " : " == ") << ImmVal;
 }
 
-void PredicateExpander::expandCheckImmOperand(formatted_raw_ostream &OS,
-                                              int OpIndex, StringRef ImmVal) {
+void PredicateExpander::expandCheckImmOperandSimple(raw_ostream &OS,
+                                                    int OpIndex,
+                                                    StringRef FunctionMapper) {
+  if (shouldNegate())
+    OS << "!";
+  if (!FunctionMapper.empty())
+    OS << FunctionMapper << "(";
   OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
-     << ").getImm() " << (shouldNegate() ? "!= " : "== ") << ImmVal;
+     << ").getImm()";
+  if (!FunctionMapper.empty())
+    OS << ")";
 }
 
-void PredicateExpander::expandCheckRegOperand(formatted_raw_ostream &OS,
-                                              int OpIndex, const Record *Reg) {
+void PredicateExpander::expandCheckRegOperand(raw_ostream &OS, int OpIndex,
+                                              const Record *Reg,
+                                              StringRef FunctionMapper) {
   assert(Reg->isSubClassOf("Register") && "Expected a register Record!");
 
+  if (!FunctionMapper.empty())
+    OS << FunctionMapper << "(";
   OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
-     << ").getReg() " << (shouldNegate() ? "!= " : "== ");
+     << ").getReg()";
+  if (!FunctionMapper.empty())
+    OS << ")";
+  OS << (shouldNegate() ? " != " : " == ");
   const StringRef Str = Reg->getValueAsString("Namespace");
   if (!Str.empty())
     OS << Str << "::";
   OS << Reg->getName();
 }
 
-void PredicateExpander::expandCheckInvalidRegOperand(formatted_raw_ostream &OS,
+
+void PredicateExpander::expandCheckRegOperandSimple(raw_ostream &OS,
+                                                    int OpIndex,
+                                                    StringRef FunctionMapper) {
+  if (shouldNegate())
+    OS << "!";
+  if (!FunctionMapper.empty())
+    OS << FunctionMapper << "(";
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+     << ").getReg()";
+  if (!FunctionMapper.empty())
+    OS << ")";
+}
+
+void PredicateExpander::expandCheckInvalidRegOperand(raw_ostream &OS,
                                                      int OpIndex) {
   OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
      << ").getReg() " << (shouldNegate() ? "!= " : "== ") << "0";
 }
 
-void PredicateExpander::expandCheckSameRegOperand(formatted_raw_ostream &OS,
-                                                  int First, int Second) {
+void PredicateExpander::expandCheckSameRegOperand(raw_ostream &OS, int First,
+                                                  int Second) {
   OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << First
      << ").getReg() " << (shouldNegate() ? "!=" : "==") << " MI"
      << (isByRef() ? "." : "->") << "getOperand(" << Second << ").getReg()";
 }
 
-void PredicateExpander::expandCheckNumOperands(formatted_raw_ostream &OS,
-                                               int NumOps) {
+void PredicateExpander::expandCheckNumOperands(raw_ostream &OS, int NumOps) {
   OS << "MI" << (isByRef() ? "." : "->") << "getNumOperands() "
      << (shouldNegate() ? "!= " : "== ") << NumOps;
 }
 
-void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
-                                          const Record *Inst) {
+void PredicateExpander::expandCheckOpcode(raw_ostream &OS, const Record *Inst) {
   OS << "MI" << (isByRef() ? "." : "->") << "getOpcode() "
      << (shouldNegate() ? "!= " : "== ") << Inst->getValueAsString("Namespace")
      << "::" << Inst->getName();
 }
 
-void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckOpcode(raw_ostream &OS,
                                           const RecVec &Opcodes) {
   assert(!Opcodes.empty() && "Expected at least one opcode to check!");
   bool First = true;
@@ -86,7 +131,7 @@ void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
   increaseIndentLevel();
   for (const Record *Rec : Opcodes) {
     OS << '\n';
-    OS.PadToColumn(getIndentLevel() * 2);
+    OS.indent(getIndentLevel() * 2);
     if (!First)
       OS << (shouldNegate() ? "&& " : "|| ");
 
@@ -96,11 +141,11 @@ void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
 
   OS << '\n';
   decreaseIndentLevel();
-  OS.PadToColumn(getIndentLevel() * 2);
+  OS.indent(getIndentLevel() * 2);
   OS << ')';
 }
 
-void PredicateExpander::expandCheckPseudo(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckPseudo(raw_ostream &OS,
                                           const RecVec &Opcodes) {
   if (shouldExpandForMC())
     expandFalse(OS);
@@ -108,7 +153,7 @@ void PredicateExpander::expandCheckPseudo(formatted_raw_ostream &OS,
     expandCheckOpcode(OS, Opcodes);
 }
 
-void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
+void PredicateExpander::expandPredicateSequence(raw_ostream &OS,
                                                 const RecVec &Sequence,
                                                 bool IsCheckAll) {
   assert(!Sequence.empty() && "Found an invalid empty predicate set!");
@@ -124,7 +169,7 @@ void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
   setNegatePredicate(false);
   for (const Record *Rec : Sequence) {
     OS << '\n';
-    OS.PadToColumn(getIndentLevel() * 2);
+    OS.indent(getIndentLevel() * 2);
     if (!First)
       OS << (IsCheckAll ? "&& " : "|| ");
     expandPredicate(OS, Rec);
@@ -132,43 +177,36 @@ void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
   }
   OS << '\n';
   decreaseIndentLevel();
-  OS.PadToColumn(getIndentLevel() * 2);
+  OS.indent(getIndentLevel() * 2);
   OS << ')';
   setNegatePredicate(OldValue);
 }
 
-void PredicateExpander::expandTIIFunctionCall(formatted_raw_ostream &OS,
-                                              StringRef TargetName,
+void PredicateExpander::expandTIIFunctionCall(raw_ostream &OS,
                                               StringRef MethodName) {
   OS << (shouldNegate() ? "!" : "");
-  if (shouldExpandForMC())
-    OS << TargetName << "_MC::";
-  else
-    OS << TargetName << "Gen"
-       << "InstrInfo::";
+  OS << TargetName << (shouldExpandForMC() ? "_MC::" : "InstrInfo::");
   OS << MethodName << (isByRef() ? "(MI)" : "(*MI)");
 }
 
-void PredicateExpander::expandCheckIsRegOperand(formatted_raw_ostream &OS,
-                                                int OpIndex) {
+void PredicateExpander::expandCheckIsRegOperand(raw_ostream &OS, int OpIndex) {
   OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
      << "getOperand(" << OpIndex << ").isReg() ";
 }
 
-void PredicateExpander::expandCheckIsImmOperand(formatted_raw_ostream &OS,
-                                                int OpIndex) {
+void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) {
   OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
      << "getOperand(" << OpIndex << ").isImm() ";
 }
 
-void PredicateExpander::expandCheckFunctionPredicate(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckFunctionPredicate(raw_ostream &OS,
                                                      StringRef MCInstFn,
                                                      StringRef MachineInstrFn) {
   OS << (shouldExpandForMC() ? MCInstFn : MachineInstrFn)
      << (isByRef() ? "(MI)" : "(*MI)");
 }
 
-void PredicateExpander::expandCheckNonPortable(formatted_raw_ostream &OS,
+void PredicateExpander::expandCheckNonPortable(raw_ostream &OS,
                                                StringRef Code) {
   if (shouldExpandForMC())
     return expandFalse(OS);
@@ -176,13 +214,79 @@ void PredicateExpander::expandCheckNonPortable(formatted_raw_ostream &OS,
   OS << '(' << Code << ')';
 }
 
-void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
-                                        const Record *Rec) {
-  OS.flush();
-  unsigned ColNum = getIndentLevel() * 2;
-  if (OS.getColumn() < ColNum)
-    OS.PadToColumn(ColNum);
+void PredicateExpander::expandReturnStatement(raw_ostream &OS,
+                                              const Record *Rec) {
+  std::string Buffer;
+  raw_string_ostream SS(Buffer);
 
+  SS << "return ";
+  expandPredicate(SS, Rec);
+  SS << ";";
+  SS.flush();
+  OS << Buffer;
+}
+
+void PredicateExpander::expandOpcodeSwitchCase(raw_ostream &OS,
+                                               const Record *Rec) {
+  const RecVec &Opcodes = Rec->getValueAsListOfDefs("Opcodes");
+  for (const Record *Opcode : Opcodes) {
+    OS.indent(getIndentLevel() * 2);
+    OS << "case " << Opcode->getValueAsString("Namespace")
+       << "::" << Opcode->getName() << ":\n";
+  }
+
+  increaseIndentLevel();
+  OS.indent(getIndentLevel() * 2);
+  expandStatement(OS, Rec->getValueAsDef("CaseStmt"));
+  decreaseIndentLevel();
+}
+
+void PredicateExpander::expandOpcodeSwitchStatement(raw_ostream &OS,
+                                                    const RecVec &Cases,
+                                                    const Record *Default) {
+  std::string Buffer;
+  raw_string_ostream SS(Buffer);
+
+  SS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n";
+  for (const Record *Rec : Cases) {
+    expandOpcodeSwitchCase(SS, Rec);
+    SS << '\n';
+  }
+
+  // Expand the default case.
+  SS.indent(getIndentLevel() * 2);
+  SS << "default:\n";
+
+  increaseIndentLevel();
+  SS.indent(getIndentLevel() * 2);
+  expandStatement(SS, Default);
+  decreaseIndentLevel();
+  SS << '\n';
+
+  SS.indent(getIndentLevel() * 2);
+  SS << "} // end of switch-stmt";
+  SS.flush();
+  OS << Buffer;
+}
+
+void PredicateExpander::expandStatement(raw_ostream &OS, const Record *Rec) {
+  // Assume that padding has been added by the caller.
+  if (Rec->isSubClassOf("MCOpcodeSwitchStatement")) {
+    expandOpcodeSwitchStatement(OS, Rec->getValueAsListOfDefs("Cases"),
+                                Rec->getValueAsDef("DefaultCase"));
+    return;
+  }
+
+  if (Rec->isSubClassOf("MCReturnStatement")) {
+    expandReturnStatement(OS, Rec->getValueAsDef("Pred"));
+    return;
+  }
+
+  llvm_unreachable("No known rules to expand this MCStatement");
+}
+
+void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) {
+  // Assume that padding has been added by the caller.
   if (Rec->isSubClassOf("MCTrue")) {
     if (shouldNegate())
       return expandFalse(OS);
@@ -210,18 +314,29 @@ void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
 
   if (Rec->isSubClassOf("CheckRegOperand"))
     return expandCheckRegOperand(OS, Rec->getValueAsInt("OpIndex"),
-                                 Rec->getValueAsDef("Reg"));
+                                 Rec->getValueAsDef("Reg"),
+                                 Rec->getValueAsString("FunctionMapper"));
+
+  if (Rec->isSubClassOf("CheckRegOperandSimple"))
+    return expandCheckRegOperandSimple(OS, Rec->getValueAsInt("OpIndex"),
+                                       Rec->getValueAsString("FunctionMapper"));
 
   if (Rec->isSubClassOf("CheckInvalidRegOperand"))
     return expandCheckInvalidRegOperand(OS, Rec->getValueAsInt("OpIndex"));
 
   if (Rec->isSubClassOf("CheckImmOperand"))
     return expandCheckImmOperand(OS, Rec->getValueAsInt("OpIndex"),
-                                 Rec->getValueAsInt("ImmVal"));
+                                 Rec->getValueAsInt("ImmVal"),
+                                 Rec->getValueAsString("FunctionMapper"));
 
   if (Rec->isSubClassOf("CheckImmOperand_s"))
     return expandCheckImmOperand(OS, Rec->getValueAsInt("OpIndex"),
-                                 Rec->getValueAsString("ImmVal"));
+                                 Rec->getValueAsString("ImmVal"),
+                                 Rec->getValueAsString("FunctionMapper"));
+
+  if (Rec->isSubClassOf("CheckImmOperandSimple"))
+    return expandCheckImmOperandSimple(OS, Rec->getValueAsInt("OpIndex"),
+                                       Rec->getValueAsString("FunctionMapper"));
 
   if (Rec->isSubClassOf("CheckSameRegOperand"))
     return expandCheckSameRegOperand(OS, Rec->getValueAsInt("FirstIndex"),
@@ -253,10 +368,163 @@ void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
     return expandCheckNonPortable(OS, Rec->getValueAsString("CodeBlock"));
 
   if (Rec->isSubClassOf("TIIPredicate"))
-    return expandTIIFunctionCall(OS, Rec->getValueAsString("TargetName"),
-                                 Rec->getValueAsString("FunctionName"));
+    return expandTIIFunctionCall(OS, Rec->getValueAsString("FunctionName"));
 
   llvm_unreachable("No known rules to expand this MCInstPredicate");
 }
 
+void STIPredicateExpander::expandHeader(raw_ostream &OS,
+                                        const STIPredicateFunction &Fn) {
+  const Record *Rec = Fn.getDeclaration();
+  StringRef FunctionName = Rec->getValueAsString("Name");
+
+  OS.indent(getIndentLevel() * 2);
+  OS << "bool ";
+  if (shouldExpandDefinition())
+    OS << getClassPrefix() << "::";
+  OS << FunctionName << "(";
+  if (shouldExpandForMC())
+    OS << "const MCInst " << (isByRef() ? "&" : "*") << "MI";
+  else
+    OS << "const MachineInstr " << (isByRef() ? "&" : "*") << "MI";
+  if (Rec->getValueAsBit("UpdatesOpcodeMask"))
+    OS << ", APInt &Mask";
+  OS << (shouldExpandForMC() ? ", unsigned ProcessorID) const " : ") const ");
+  if (shouldExpandDefinition()) {
+    OS << "{\n";
+    return;
+  }
+
+  if (Rec->getValueAsBit("OverridesBaseClassMember"))
+    OS << "override";
+  OS << ";\n";
+}
+
+void STIPredicateExpander::expandPrologue(raw_ostream &OS,
+                                          const STIPredicateFunction &Fn) {
+  RecVec Delegates = Fn.getDeclaration()->getValueAsListOfDefs("Delegates");
+  bool UpdatesOpcodeMask =
+      Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");
+
+  increaseIndentLevel();
+  unsigned IndentLevel = getIndentLevel();
+  for (const Record *Delegate : Delegates) {
+    OS.indent(IndentLevel * 2);
+    OS << "if (" << Delegate->getValueAsString("Name") << "(MI";
+    if (UpdatesOpcodeMask)
+      OS << ", Mask";
+    if (shouldExpandForMC())
+      OS << ", ProcessorID";
+    OS << "))\n";
+    OS.indent((1 + IndentLevel) * 2);
+    OS << "return true;\n\n";
+  }
+
+  if (shouldExpandForMC())
+    return;
+
+  OS.indent(IndentLevel * 2);
+  OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n";
+}
+
+void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
+                                             bool ShouldUpdateOpcodeMask) {
+  const OpcodeInfo &OI = Group.getOpcodeInfo();
+  for (const PredicateInfo &PI : OI.getPredicates()) {
+    const APInt &ProcModelMask = PI.ProcModelMask;
+    bool FirstProcID = true;
+    for (unsigned I = 0, E = ProcModelMask.getActiveBits(); I < E; ++I) {
+      if (!ProcModelMask[I])
+        continue;
+
+      if (FirstProcID) {
+        OS.indent(getIndentLevel() * 2);
+        OS << "if (ProcessorID == " << I;
+      } else {
+        OS << " || ProcessorID == " << I;
+      }
+      FirstProcID = false;
+    }
+
+    OS << ") {\n";
+
+    increaseIndentLevel();
+    OS.indent(getIndentLevel() * 2);
+    if (ShouldUpdateOpcodeMask) {
+      if (PI.OperandMask.isNullValue())
+        OS << "Mask.clearAllBits();\n";
+      else
+        OS << "Mask = " << PI.OperandMask << ";\n";
+      OS.indent(getIndentLevel() * 2);
+    }
+    OS << "return ";
+    expandPredicate(OS, PI.Predicate);
+    OS << ";\n";
+    decreaseIndentLevel();
+    OS.indent(getIndentLevel() * 2);
+    OS << "}\n";
+  }
+}
+
+void STIPredicateExpander::expandBody(raw_ostream &OS,
+                                      const STIPredicateFunction &Fn) {
+  bool UpdatesOpcodeMask =
+      Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask");
+
+  unsigned IndentLevel = getIndentLevel();
+  OS.indent(IndentLevel * 2);
+  OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n";
+  OS.indent(IndentLevel * 2);
+  OS << "default:\n";
+  OS.indent(IndentLevel * 2);
+  OS << "  break;";
+
+  for (const OpcodeGroup &Group : Fn.getGroups()) {
+    for (const Record *Opcode : Group.getOpcodes()) {
+      OS << '\n';
+      OS.indent(IndentLevel * 2);
+      OS << "case " << getTargetName() << "::" << Opcode->getName() << ":";
+    }
+
+    OS << '\n';
+    increaseIndentLevel();
+    expandOpcodeGroup(OS, Group, UpdatesOpcodeMask);
+
+    OS.indent(getIndentLevel() * 2);
+    OS << "break;\n";
+    decreaseIndentLevel();
+  }
+
+  OS.indent(IndentLevel * 2);
+  OS << "}\n";
+}
+
+void STIPredicateExpander::expandEpilogue(raw_ostream &OS,
+                                          const STIPredicateFunction &Fn) {
+  OS << '\n';
+  OS.indent(getIndentLevel() * 2);
+  OS << "return ";
+  expandPredicate(OS, Fn.getDefaultReturnPredicate());
+  OS << ";\n";
+
+  decreaseIndentLevel();
+  OS.indent(getIndentLevel() * 2);
+  StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name");
+  OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n";
+}
+
+void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS,
+                                              const STIPredicateFunction &Fn) {
+  const Record *Rec = Fn.getDeclaration();
+  if (shouldExpandForMC() && !Rec->getValueAsBit("ExpandForMC"))
+    return;
+
+  expandHeader(OS, Fn);
+  if (shouldExpandDefinition()) {
+    expandPrologue(OS, Fn);
+    expandBody(OS, Fn);
+    expandEpilogue(OS, Fn);
+  }
+}
+
 } // namespace llvm
diff --git a/contrib/llvm/utils/TableGen/PredicateExpander.h b/contrib/llvm/utils/TableGen/PredicateExpander.h
index 398b376f7a83..0f3ee6867e65 100644
--- a/contrib/llvm/utils/TableGen/PredicateExpander.h
+++ b/contrib/llvm/utils/TableGen/PredicateExpander.h
@@ -18,67 +18,105 @@
 #define LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 
 namespace llvm {
 
-class formatted_raw_ostream;
+class raw_ostream;
 
 class PredicateExpander {
   bool EmitCallsByRef;
   bool NegatePredicate;
   bool ExpandForMC;
   unsigned IndentLevel;
+  StringRef TargetName;
 
   PredicateExpander(const PredicateExpander &) = delete;
   PredicateExpander &operator=(const PredicateExpander &) = delete;
 
 public:
-  PredicateExpander()
+  PredicateExpander(StringRef Target)
       : EmitCallsByRef(true), NegatePredicate(false), ExpandForMC(false),
-        IndentLevel(1U) {}
+        IndentLevel(1U), TargetName(Target) {}
   bool isByRef() const { return EmitCallsByRef; }
   bool shouldNegate() const { return NegatePredicate; }
   bool shouldExpandForMC() const { return ExpandForMC; }
   unsigned getIndentLevel() const { return IndentLevel; }
+  StringRef getTargetName() const { return TargetName; }
 
   void setByRef(bool Value) { EmitCallsByRef = Value; }
   void flipNegatePredicate() { NegatePredicate = !NegatePredicate; }
   void setNegatePredicate(bool Value) { NegatePredicate = Value; }
   void setExpandForMC(bool Value) { ExpandForMC = Value; }
+  void setIndentLevel(unsigned Level) { IndentLevel = Level; }
   void increaseIndentLevel() { ++IndentLevel; }
   void decreaseIndentLevel() { --IndentLevel; }
-  void setIndentLevel(unsigned Level) { IndentLevel = Level; }
 
   using RecVec = std::vector<Record *>;
-  void expandTrue(formatted_raw_ostream &OS);
-  void expandFalse(formatted_raw_ostream &OS);
-  void expandCheckImmOperand(formatted_raw_ostream &OS, int OpIndex,
-                             int ImmVal);
-  void expandCheckImmOperand(formatted_raw_ostream &OS, int OpIndex,
-                             StringRef ImmVal);
-  void expandCheckRegOperand(formatted_raw_ostream &OS, int OpIndex,
-                             const Record *Reg);
-  void expandCheckSameRegOperand(formatted_raw_ostream &OS, int First,
-                                 int Second);
-  void expandCheckNumOperands(formatted_raw_ostream &OS, int NumOps);
-  void expandCheckOpcode(formatted_raw_ostream &OS, const Record *Inst);
-
-  void expandCheckPseudo(formatted_raw_ostream &OS, const RecVec &Opcodes);
-  void expandCheckOpcode(formatted_raw_ostream &OS, const RecVec &Opcodes);
-  void expandPredicateSequence(formatted_raw_ostream &OS,
-                               const RecVec &Sequence, bool IsCheckAll);
-  void expandTIIFunctionCall(formatted_raw_ostream &OS, StringRef TargetName,
-                             StringRef MethodName);
-  void expandCheckIsRegOperand(formatted_raw_ostream &OS, int OpIndex);
-  void expandCheckIsImmOperand(formatted_raw_ostream &OS, int OpIndex);
-  void expandCheckInvalidRegOperand(formatted_raw_ostream &OS, int OpIndex);
-  void expandCheckFunctionPredicate(formatted_raw_ostream &OS,
-                                    StringRef MCInstFn,
+  void expandTrue(raw_ostream &OS);
+  void expandFalse(raw_ostream &OS);
+  void expandCheckImmOperand(raw_ostream &OS, int OpIndex, int ImmVal,
+                             StringRef FunctionMapper);
+  void expandCheckImmOperand(raw_ostream &OS, int OpIndex, StringRef ImmVal,
+                             StringRef FunctionMapperer);
+  void expandCheckImmOperandSimple(raw_ostream &OS, int OpIndex,
+                                   StringRef FunctionMapper);
+  void expandCheckRegOperand(raw_ostream &OS, int OpIndex, const Record *Reg,
+                             StringRef FunctionMapper);
+  void expandCheckRegOperandSimple(raw_ostream &OS, int OpIndex,
+                                   StringRef FunctionMapper);
+  void expandCheckSameRegOperand(raw_ostream &OS, int First, int Second);
+  void expandCheckNumOperands(raw_ostream &OS, int NumOps);
+  void expandCheckOpcode(raw_ostream &OS, const Record *Inst);
+
+  void expandCheckPseudo(raw_ostream &OS, const RecVec &Opcodes);
+  void expandCheckOpcode(raw_ostream &OS, const RecVec &Opcodes);
+  void expandPredicateSequence(raw_ostream &OS, const RecVec &Sequence,
+                               bool IsCheckAll);
+  void expandTIIFunctionCall(raw_ostream &OS, StringRef MethodName);
+  void expandCheckIsRegOperand(raw_ostream &OS, int OpIndex);
+  void expandCheckIsImmOperand(raw_ostream &OS, int OpIndex);
+  void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex);
+  void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn,
                                     StringRef MachineInstrFn);
-  void expandCheckNonPortable(formatted_raw_ostream &OS, StringRef CodeBlock);
-  void expandPredicate(formatted_raw_ostream &OS, const Record *Rec);
+  void expandCheckNonPortable(raw_ostream &OS, StringRef CodeBlock);
+  void expandPredicate(raw_ostream &OS, const Record *Rec);
+  void expandReturnStatement(raw_ostream &OS, const Record *Rec);
+  void expandOpcodeSwitchCase(raw_ostream &OS, const Record *Rec);
+  void expandOpcodeSwitchStatement(raw_ostream &OS, const RecVec &Cases,
+                                   const Record *Default);
+  void expandStatement(raw_ostream &OS, const Record *Rec);
+};
+
+// Forward declarations.
+class STIPredicateFunction;
+class OpcodeGroup;
+
+class STIPredicateExpander : public PredicateExpander {
+  StringRef ClassPrefix;
+  bool ExpandDefinition;
+
+  STIPredicateExpander(const PredicateExpander &) = delete;
+  STIPredicateExpander &operator=(const PredicateExpander &) = delete;
+
+  void expandHeader(raw_ostream &OS, const STIPredicateFunction &Fn);
+  void expandPrologue(raw_ostream &OS, const STIPredicateFunction &Fn);
+  void expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group,
+                         bool ShouldUpdateOpcodeMask);
+  void expandBody(raw_ostream &OS, const STIPredicateFunction &Fn);
+  void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn);
+
+public:
+  STIPredicateExpander(StringRef Target)
+      : PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {}
+
+  bool shouldExpandDefinition() const { return ExpandDefinition; }
+  StringRef getClassPrefix() const { return ClassPrefix; }
+  void setClassPrefix(StringRef S) { ClassPrefix = S; }
+  void setExpandDefinition(bool Value) { ExpandDefinition = Value; }
+
+  void expandSTIPredicate(raw_ostream &OS, const STIPredicateFunction &Fn);
 };
 
 } // namespace llvm
diff --git a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 49016cca799e..ded54c828bcd 100644
--- a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -296,7 +296,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
            PSetE = PSetIDs.end(); PSetI != PSetE; ++PSetI) {
       PSets[i].push_back(RegBank.getRegPressureSet(*PSetI).Order);
     }
-    llvm::sort(PSets[i].begin(), PSets[i].end());
+    llvm::sort(PSets[i]);
     PSetsSeqs.add(PSets[i]);
   }
 
@@ -340,11 +340,38 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "}\n\n";
 }
 
+using DwarfRegNumsMapPair = std::pair<Record*, std::vector<int64_t>>;
+using DwarfRegNumsVecTy = std::vector<DwarfRegNumsMapPair>;
+
+void finalizeDwarfRegNumsKeys(DwarfRegNumsVecTy &DwarfRegNums) {
+  // Sort and unique to get a map-like vector. We want the last assignment to
+  // match previous behaviour.
+  std::stable_sort(DwarfRegNums.begin(), DwarfRegNums.end(),
+                   on_first<LessRecordRegister>());
+  // Warn about duplicate assignments.
+  const Record *LastSeenReg = nullptr;
+  for (const auto &X : DwarfRegNums) {
+    const auto &Reg = X.first;
+    // The only way LessRecordRegister can return equal is if they're the same
+    // string. Use simple equality instead.
+    if (LastSeenReg && Reg->getName() == LastSeenReg->getName())
+      PrintWarning(Reg->getLoc(), Twine("DWARF numbers for register ") +
+                                      getQualifiedName(Reg) +
+                                      "specified multiple times");
+    LastSeenReg = Reg;
+  }
+  auto Last = std::unique(
+      DwarfRegNums.begin(), DwarfRegNums.end(),
+      [](const DwarfRegNumsMapPair &A, const DwarfRegNumsMapPair &B) {
+        return A.first->getName() == B.first->getName();
+      });
+  DwarfRegNums.erase(Last, DwarfRegNums.end());
+}
+
 void RegisterInfoEmitter::EmitRegMappingTables(
     raw_ostream &OS, const std::deque<CodeGenRegister> &Regs, bool isCtor) {
   // Collect all information about dwarf register numbers
-  typedef std::map<Record*, std::vector<int64_t>, LessRecordRegister> DwarfRegNumsMapTy;
-  DwarfRegNumsMapTy DwarfRegNums;
+  DwarfRegNumsVecTy DwarfRegNums;
 
   // First, just pull all provided information to the map
   unsigned maxLength = 0;
@@ -352,18 +379,17 @@ void RegisterInfoEmitter::EmitRegMappingTables(
     Record *Reg = RE.TheDef;
     std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");
     maxLength = std::max((size_t)maxLength, RegNums.size());
-    if (DwarfRegNums.count(Reg))
-      PrintWarning(Reg->getLoc(), Twine("DWARF numbers for register ") +
-                   getQualifiedName(Reg) + "specified multiple times");
-    DwarfRegNums[Reg] = RegNums;
+    DwarfRegNums.emplace_back(Reg, std::move(RegNums));
   }
+  finalizeDwarfRegNumsKeys(DwarfRegNums);
 
   if (!maxLength)
     return;
 
   // Now we know maximal length of number list. Append -1's, where needed
-  for (DwarfRegNumsMapTy::iterator
-       I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I)
+  for (DwarfRegNumsVecTy::iterator I = DwarfRegNums.begin(),
+                                   E = DwarfRegNums.end();
+       I != E; ++I)
     for (unsigned i = I->second.size(), e = maxLength; i != e; ++i)
       I->second.push_back(-1);
 
@@ -384,7 +410,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
         // Store the mapping sorted by the LLVM reg num so lookup can be done
         // with a binary search.
         std::map<uint64_t, Record*> Dwarf2LMap;
-        for (DwarfRegNumsMapTy::iterator
+        for (DwarfRegNumsVecTy::iterator
                I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
           int DwarfRegNo = I->second[i];
           if (DwarfRegNo < 0)
@@ -423,7 +449,21 @@ void RegisterInfoEmitter::EmitRegMappingTables(
 
     DefInit *DI = cast<DefInit>(V->getValue());
     Record *Alias = DI->getDef();
-    DwarfRegNums[Reg] = DwarfRegNums[Alias];
+    const auto &AliasIter =
+        std::lower_bound(DwarfRegNums.begin(), DwarfRegNums.end(), Alias,
+                         [](const DwarfRegNumsMapPair &A, const Record *B) {
+                           return LessRecordRegister()(A.first, B);
+                         });
+    assert(AliasIter != DwarfRegNums.end() && AliasIter->first == Alias &&
+           "Expected Alias to be present in map");
+    const auto &RegIter =
+        std::lower_bound(DwarfRegNums.begin(), DwarfRegNums.end(), Reg,
+                         [](const DwarfRegNumsMapPair &A, const Record *B) {
+                           return LessRecordRegister()(A.first, B);
+                         });
+    assert(RegIter != DwarfRegNums.end() && RegIter->first == Reg &&
+           "Expected Reg to be present in map");
+    RegIter->second = AliasIter->second;
   }
 
   // Emit information about the dwarf register numbers.
@@ -436,7 +476,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
         OS << " = {\n";
         // Store the mapping sorted by the Dwarf reg num so lookup can be done
         // with a binary search.
-        for (DwarfRegNumsMapTy::iterator
+        for (DwarfRegNumsVecTy::iterator
                I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
           int RegNo = I->second[i];
           if (RegNo == -1) // -1 is the default value, don't emit a mapping.
@@ -1035,14 +1075,10 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   for (const auto &RC : RegisterClasses) {
     assert(isInt<8>(RC.CopyCost) && "Copy cost too large.");
-    uint32_t RegSize = 0;
-    if (RC.RSI.isSimple())
-      RegSize = RC.RSI.getSimple().RegSize;
     OS << "  { " << RC.getName() << ", " << RC.getName() << "Bits, "
        << RegClassStrings.get(RC.getName()) << ", "
        << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "
        << RC.getQualifiedName() + "RegClassID" << ", "
-       << RegSize/8 << ", "
        << RC.CopyCost << ", "
        << ( RC.Allocatable ? "true" : "false" ) << " },\n";
   }
diff --git a/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp b/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 664de2217e94..f98a7c74bf0c 100644
--- a/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -155,17 +155,15 @@ private:
     } else if (BitsRecTy *BI = dyn_cast<BitsRecTy>(Field.RecType)) {
       unsigned NumBits = BI->getNumBits();
       if (NumBits <= 8)
-        NumBits = 8;
-      else if (NumBits <= 16)
-        NumBits = 16;
-      else if (NumBits <= 32)
-        NumBits = 32;
-      else if (NumBits <= 64)
-        NumBits = 64;
-      else
-        PrintFatalError(Twine("bitfield '") + Field.Name +
-                        "' too large to search");
-      return "uint" + utostr(NumBits) + "_t";
+        return "uint8_t";
+      if (NumBits <= 16)
+        return "uint16_t";
+      if (NumBits <= 32)
+        return "uint32_t";
+      if (NumBits <= 64)
+        return "uint64_t";
+      PrintFatalError(Twine("bitfield '") + Field.Name +
+                      "' too large to search");
     } else if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction)
       return "unsigned";
     PrintFatalError(Twine("Field '") + Field.Name + "' has unknown type '" +
@@ -430,6 +428,15 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
          << ").compare(RHS." << Field.Name << ");\n";
       OS << "      if (Cmp" << Field.Name << " < 0) return true;\n";
       OS << "      if (Cmp" << Field.Name << " > 0) return false;\n";
+    } else if (Field.Enum) {
+      // Explicitly cast to unsigned, because the signedness of enums is
+      // compiler-dependent.
+      OS << "      if ((unsigned)LHS." << Field.Name << " < (unsigned)RHS."
+         << Field.Name << ")\n";
+      OS << "        return true;\n";
+      OS << "      if ((unsigned)LHS." << Field.Name << " > (unsigned)RHS."
+         << Field.Name << ")\n";
+      OS << "        return false;\n";
     } else {
       OS << "      if (LHS." << Field.Name << " < RHS." << Field.Name << ")\n";
       OS << "        return true;\n";
diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
index c5da8d8142ff..731c14bdb9a0 100644
--- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -93,6 +93,8 @@ class SubtargetEmitter {
                          &ProcItinLists);
   unsigned EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
                                   raw_ostream &OS);
+  void EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
+                              raw_ostream &OS);
   void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
                               raw_ostream &OS);
   void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
@@ -116,6 +118,7 @@ class SubtargetEmitter {
   void emitSchedModelHelpersImpl(raw_ostream &OS,
                                  bool OnlyExpandMCInstPredicates = false);
   void emitGenMCSubtargetInfo(raw_ostream &OS);
+  void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);
 
   void EmitSchedModel(raw_ostream &OS);
   void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
@@ -139,7 +142,7 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
   // Get all records of class and sort
   std::vector<Record*> DefList =
     Records.getAllDerivedDefinitions("SubtargetFeature");
-  llvm::sort(DefList.begin(), DefList.end(), LessRecord());
+  llvm::sort(DefList, LessRecord());
 
   unsigned N = DefList.size();
   if (N == 0)
@@ -178,7 +181,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
   if (FeatureList.empty())
     return 0;
 
-  llvm::sort(FeatureList.begin(), FeatureList.end(), LessRecordFieldName());
+  llvm::sort(FeatureList, LessRecordFieldName());
 
   // Begin feature table
   OS << "// Sorted (by key) array of values for CPU features.\n"
@@ -228,7 +231,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
   // Gather and sort processor information
   std::vector<Record*> ProcessorList =
                           Records.getAllDerivedDefinitions("Processor");
-  llvm::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+  llvm::sort(ProcessorList, LessRecordFieldName());
 
   // Begin processor table
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
@@ -652,7 +655,7 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
     return 0;
 
   // Print the RegisterCost table first.
-  OS << "\n// {RegisterClassID, Register Cost}\n";
+  OS << "\n// {RegisterClassID, Register Cost, AllowMoveElimination }\n";
   OS << "static const llvm::MCRegisterCostEntry " << ProcModel.ModelName
      << "RegisterCosts"
      << "[] = {\n";
@@ -667,24 +670,28 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
       Record *Rec = RC.RCDef;
       if (Rec->getValue("Namespace"))
         OS << Rec->getValueAsString("Namespace") << "::";
-      OS << Rec->getName() << "RegClassID, " << RC.Cost << "},\n";
+      OS << Rec->getName() << "RegClassID, " << RC.Cost << ", "
+         << RC.AllowMoveElimination << "},\n";
     }
   }
   OS << "};\n";
 
   // Now generate a table with register file info.
-  OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl}\n";
+  OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl, "
+     << "MaxMovesEliminatedPerCycle, AllowZeroMoveEliminationOnly }\n";
   OS << "static const llvm::MCRegisterFileDesc " << ProcModel.ModelName
      << "RegisterFiles"
      << "[] = {\n"
-     << "  { \"InvalidRegisterFile\", 0, 0, 0 },\n";
+     << "  { \"InvalidRegisterFile\", 0, 0, 0, 0, 0 },\n";
   unsigned CostTblIndex = 0;
 
   for (const CodeGenRegisterFile &RD : ProcModel.RegisterFiles) {
     OS << "  { ";
     OS << '"' << RD.Name << '"' << ", " << RD.NumPhysRegs << ", ";
     unsigned NumCostEntries = RD.Costs.size();
-    OS << NumCostEntries << ", " << CostTblIndex << "},\n";
+    OS << NumCostEntries << ", " << CostTblIndex << ", "
+       << RD.MaxMovesEliminatedPerCycle << ", "
+       << RD.AllowZeroMoveEliminationOnly << "},\n";
     CostTblIndex += NumCostEntries;
   }
   OS << "};\n";
@@ -692,62 +699,28 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
   return CostTblIndex;
 }
 
-static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel,
-                                      raw_ostream &OS) {
-  unsigned NumCounterDefs = 1 + ProcModel.ProcResourceDefs.size();
-  std::vector<const Record *> CounterDefs(NumCounterDefs);
-  bool HasCounters = false;
-  for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs) {
-    const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx(
-        CounterDef->getValueAsDef("Resource"))];
-    if (CD) {
-      PrintFatalError(CounterDef->getLoc(),
-                      "multiple issue counters for " +
-                          CounterDef->getValueAsDef("Resource")->getName());
-    }
-    CD = CounterDef;
-    HasCounters = true;
-  }
-  if (!HasCounters) {
-    return false;
+void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
+                                              raw_ostream &OS) {
+  unsigned QueueID = 0;
+  if (ProcModel.LoadQueue) {
+    const Record *Queue = ProcModel.LoadQueue->getValueAsDef("QueueDescriptor");
+    QueueID =
+        1 + std::distance(ProcModel.ProcResourceDefs.begin(),
+                          std::find(ProcModel.ProcResourceDefs.begin(),
+                                    ProcModel.ProcResourceDefs.end(), Queue));
   }
-  OS << "\nstatic const char* " << ProcModel.ModelName
-     << "PfmIssueCounters[] = {\n";
-  for (unsigned i = 0; i != NumCounterDefs; ++i) {
-    const Record *CounterDef = CounterDefs[i];
-    if (CounterDef) {
-      const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters");
-      if (PfmCounters.empty())
-        PrintFatalError(CounterDef->getLoc(), "empty counter list");
-      OS << "  \"" << PfmCounters[0];
-      for (unsigned p = 1, e = PfmCounters.size(); p != e; ++p)
-        OS << ",\" \"" << PfmCounters[p];
-      OS << "\",  // #" << i << " = ";
-      OS << CounterDef->getValueAsDef("Resource")->getName() << "\n";
-    } else {
-      OS << "  nullptr, // #" << i << "\n";
-    }
+  OS << "  " << QueueID << ", // Resource Descriptor for the Load Queue\n";
+
+  QueueID = 0;
+  if (ProcModel.StoreQueue) {
+    const Record *Queue =
+        ProcModel.StoreQueue->getValueAsDef("QueueDescriptor");
+    QueueID =
+        1 + std::distance(ProcModel.ProcResourceDefs.begin(),
+                          std::find(ProcModel.ProcResourceDefs.begin(),
+                                    ProcModel.ProcResourceDefs.end(), Queue));
   }
-  OS << "};\n";
-  return true;
-}
-
-static void EmitPfmCounters(const CodeGenProcModel &ProcModel,
-                            const bool HasPfmIssueCounters, raw_ostream &OS) {
-  OS << "  {\n";
-  // Emit the cycle counter.
-  if (ProcModel.PfmCycleCounterDef)
-    OS << "    \"" << ProcModel.PfmCycleCounterDef->getValueAsString("Counter")
-       << "\",  // Cycle counter.\n";
-  else
-    OS << "    nullptr,  // No cycle counter.\n";
-
-  // Emit a reference to issue counters table.
-  if (HasPfmIssueCounters)
-    OS << "    " << ProcModel.ModelName << "PfmIssueCounters\n";
-  else
-    OS << "    nullptr  // No issue counters.\n";
-  OS << "  }\n";
+  OS << "  " << QueueID << ", // Resource Descriptor for the Store Queue\n";
 }
 
 void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
@@ -756,9 +729,6 @@ void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
   // defined register file), and a table of register costs.
   unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS);
 
-  // Generate a table of ProcRes counter names.
-  const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS);
-
   // Now generate a table for the extra processor info.
   OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName
      << "ExtraInfo = {\n  ";
@@ -771,7 +741,8 @@ void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
   EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),
                        NumCostEntries, OS);
 
-  EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS);
+  // Add information about load/store queues.
+  EmitLoadStoreQueueInfo(ProcModel, OS);
 
   OS << "};\n";
 }
@@ -780,7 +751,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
                                               raw_ostream &OS) {
   EmitProcessorResourceSubUnits(ProcModel, OS);
 
-  OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered, SubUnitsIdxBegin}\n";
+  OS << "\n// {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin}\n";
   OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName
      << "ProcResources"
      << "[] = {\n"
@@ -1174,7 +1145,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
           WriteIDs.push_back(SchedModels.getSchedRWIdx(VW, /*IsRead=*/false));
         }
       }
-      llvm::sort(WriteIDs.begin(), WriteIDs.end());
+      llvm::sort(WriteIDs);
       for(unsigned W : WriteIDs) {
         MCReadAdvanceEntry RAEntry;
         RAEntry.UseIdx = UseIdx;
@@ -1192,8 +1163,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     // compression.
     //
     // WritePrecRes entries are sorted by ProcResIdx.
-    llvm::sort(WriteProcResources.begin(), WriteProcResources.end(),
-               LessWriteProcResources());
+    llvm::sort(WriteProcResources, LessWriteProcResources());
 
     SCDesc.NumWriteProcResEntries = WriteProcResources.size();
     std::vector<MCWriteProcResEntry>::iterator WPRPos =
@@ -1399,20 +1369,19 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
 }
 
 //
-// EmitProcessorLookup - generate cpu name to itinerary lookup table.
+// EmitProcessorLookup - generate cpu name to sched model lookup tables.
 //
 void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
   // Gather and sort processor information
   std::vector<Record*> ProcessorList =
                           Records.getAllDerivedDefinitions("Processor");
-  llvm::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+  llvm::sort(ProcessorList, LessRecordFieldName());
 
-  // Begin processor table
+  // Begin processor->sched model table
   OS << "\n";
-  OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
-     << "extern const llvm::SubtargetInfoKV "
-     << Target << "ProcSchedKV[] = {\n";
-
+  OS << "// Sorted (by key) array of sched model for CPU subtype.\n"
+     << "extern const llvm::SubtargetInfoKV " << Target
+     << "ProcSchedKV[] = {\n";
   // For each processor
   for (Record *Processor : ProcessorList) {
     StringRef Name = Processor->getValueAsString("Name");
@@ -1422,8 +1391,7 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
     // Emit as { "cpu", procinit },
     OS << "  { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n";
   }
-
-  // End processor table
+  // End processor->sched model table
   OS << "};\n";
 }
 
@@ -1471,7 +1439,7 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
   // stream.
   std::vector<Record *> Prologs =
       Records.getAllDerivedDefinitions("PredicateProlog");
-  llvm::sort(Prologs.begin(), Prologs.end(), LessRecord());
+  llvm::sort(Prologs, LessRecord());
   for (Record *P : Prologs)
     Stream << P->getValueAsString("Code") << '\n';
 
@@ -1480,114 +1448,190 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
 }
 
 static void emitPredicates(const CodeGenSchedTransition &T,
-                           const CodeGenSchedClass &SC,
-                           PredicateExpander &PE,
+                           const CodeGenSchedClass &SC, PredicateExpander &PE,
                            raw_ostream &OS) {
   std::string Buffer;
-  raw_string_ostream StringStream(Buffer);
-  formatted_raw_ostream FOS(StringStream);
-
-  FOS.PadToColumn(6);
-  FOS << "if (";
-  for (RecIter RI = T.PredTerm.begin(), RE = T.PredTerm.end(); RI != RE; ++RI) {
-    if (RI != T.PredTerm.begin()) {
-      FOS << "\n";
-      FOS.PadToColumn(8);
-      FOS << "&& ";
+  raw_string_ostream SS(Buffer);
+
+  auto IsTruePredicate = [](const Record *Rec) {
+    return Rec->isSubClassOf("MCSchedPredicate") &&
+           Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
+  };
+
+  // If not all predicates are MCTrue, then we need an if-stmt.
+  unsigned NumNonTruePreds =
+      T.PredTerm.size() - count_if(T.PredTerm, IsTruePredicate);
+
+  SS.indent(PE.getIndentLevel() * 2);
+
+  if (NumNonTruePreds) {
+    bool FirstNonTruePredicate = true;
+    SS << "if (";
+
+    PE.setIndentLevel(PE.getIndentLevel() + 2);
+
+    for (const Record *Rec : T.PredTerm) {
+      // Skip predicates that evaluate to "true".
+      if (IsTruePredicate(Rec))
+        continue;
+
+      if (FirstNonTruePredicate) {
+        FirstNonTruePredicate = false;
+      } else {
+        SS << "\n";
+        SS.indent(PE.getIndentLevel() * 2);
+        SS << "&& ";
+      }
+
+      if (Rec->isSubClassOf("MCSchedPredicate")) {
+        PE.expandPredicate(SS, Rec->getValueAsDef("Pred"));
+        continue;
+      }
+
+      // Expand this legacy predicate and wrap it around braces if there is more
+      // than one predicate to expand.
+      SS << ((NumNonTruePreds > 1) ? "(" : "")
+         << Rec->getValueAsString("Predicate")
+         << ((NumNonTruePreds > 1) ? ")" : "");
     }
-    const Record *Rec = *RI;
-    if (Rec->isSubClassOf("MCSchedPredicate"))
-      PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
-    else
-      FOS << "(" << Rec->getValueAsString("Predicate") << ")";
+
+    SS << ")\n"; // end of if-stmt
+    PE.decreaseIndentLevel();
+    SS.indent(PE.getIndentLevel() * 2);
+    PE.decreaseIndentLevel();
   }
 
-  FOS << ")\n";
-  FOS.PadToColumn(8);
-  FOS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n';
-  FOS.flush();
+  SS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n';
+  SS.flush();
   OS << Buffer;
 }
 
-void SubtargetEmitter::emitSchedModelHelpersImpl(
-    raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
-  // Collect Variant Classes.
-  IdxVec VariantClasses;
+// Used by method `SubtargetEmitter::emitSchedModelHelpersImpl()` to generate
+// epilogue code for the auto-generated helper.
+void emitSchedModelHelperEpilogue(raw_ostream &OS, bool ShouldReturnZero) {
+  if (ShouldReturnZero) {
+    OS << "  // Don't know how to resolve this scheduling class.\n"
+       << "  return 0;\n";
+    return;
+  }
+
+  OS << "  report_fatal_error(\"Expected a variant SchedClass\");\n";
+}
+
+bool hasMCSchedPredicates(const CodeGenSchedTransition &T) {
+  return all_of(T.PredTerm, [](const Record *Rec) {
+    return Rec->isSubClassOf("MCSchedPredicate");
+  });
+}
+
+void collectVariantClasses(const CodeGenSchedModels &SchedModels,
+                           IdxVec &VariantClasses,
+                           bool OnlyExpandMCInstPredicates) {
   for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) {
+    // Ignore non-variant scheduling classes.
     if (SC.Transitions.empty())
       continue;
+
+    if (OnlyExpandMCInstPredicates) {
+      // Ignore this variant scheduling class no transitions use any meaningful
+      // MCSchedPredicate definitions.
+      if (!any_of(SC.Transitions, [](const CodeGenSchedTransition &T) {
+            return hasMCSchedPredicates(T);
+          }))
+        continue;
+    }
+
     VariantClasses.push_back(SC.Index);
   }
+}
 
-  if (!VariantClasses.empty()) {
-    bool FoundPredicates = false;
-    for (unsigned VC : VariantClasses) {
-      // Emit code for each variant scheduling class.
-      const CodeGenSchedClass &SC = SchedModels.getSchedClass(VC);
-      IdxVec ProcIndices;
-      for (const CodeGenSchedTransition &T : SC.Transitions) {
-        if (OnlyExpandMCInstPredicates &&
-            !all_of(T.PredTerm, [](const Record *Rec) {
-              return Rec->isSubClassOf("MCSchedPredicate");
-            }))
-          continue;
+void collectProcessorIndices(const CodeGenSchedClass &SC, IdxVec &ProcIndices) {
+  // A variant scheduling class may define transitions for multiple
+  // processors.  This function identifies wich processors are associated with
+  // transition rules specified by variant class `SC`.
+  for (const CodeGenSchedTransition &T : SC.Transitions) {
+    IdxVec PI;
+    std::set_union(T.ProcIndices.begin(), T.ProcIndices.end(),
+                   ProcIndices.begin(), ProcIndices.end(),
+                   std::back_inserter(PI));
+    ProcIndices.swap(PI);
+  }
+}
 
-        IdxVec PI;
-        std::set_union(T.ProcIndices.begin(), T.ProcIndices.end(),
-                       ProcIndices.begin(), ProcIndices.end(),
-                       std::back_inserter(PI));
-        ProcIndices.swap(PI);
-      }
-      if (ProcIndices.empty())
-        continue;
+void SubtargetEmitter::emitSchedModelHelpersImpl(
+    raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
+  IdxVec VariantClasses;
+  collectVariantClasses(SchedModels, VariantClasses,
+                        OnlyExpandMCInstPredicates);
 
-      // Emit a switch statement only if there are predicates to expand.
-      if (!FoundPredicates) {
-        OS << "  switch (SchedClass) {\n";
-        FoundPredicates = true;
-      }
+  if (VariantClasses.empty()) {
+    emitSchedModelHelperEpilogue(OS, OnlyExpandMCInstPredicates);
+    return;
+  }
 
-      OS << "  case " << VC << ": // " << SC.Name << '\n';
-      PredicateExpander PE;
-      PE.setByRef(false);
-      PE.setExpandForMC(OnlyExpandMCInstPredicates);
-      for (unsigned PI : ProcIndices) {
-        OS << "    ";
-        if (PI != 0) {
-          OS << (OnlyExpandMCInstPredicates
-                     ? "if (CPUID == "
-                     : "if (SchedModel->getProcessorID() == ");
-          OS << PI << ") ";
-        }
+  // Construct a switch statement where the condition is a check on the
+  // scheduling class identifier. There is a `case` for every variant class
+  // defined by the processor models of this target.
+  // Each `case` implements a number of rules to resolve (i.e. to transition from)
+  // a variant scheduling class to another scheduling class.  Rules are
+  // described by instances of CodeGenSchedTransition. Note that transitions may
+  // not be valid for all processors.
+  OS << "  switch (SchedClass) {\n";
+  for (unsigned VC : VariantClasses) {
+    IdxVec ProcIndices;
+    const CodeGenSchedClass &SC = SchedModels.getSchedClass(VC);
+    collectProcessorIndices(SC, ProcIndices);
+
+    OS << "  case " << VC << ": // " << SC.Name << '\n';
+
+    PredicateExpander PE(Target);
+    PE.setByRef(false);
+    PE.setExpandForMC(OnlyExpandMCInstPredicates);
+    for (unsigned PI : ProcIndices) {
+      OS << "    ";
+
+      // Emit a guard on the processor ID.
+      if (PI != 0) {
+        OS << (OnlyExpandMCInstPredicates
+                   ? "if (CPUID == "
+                   : "if (SchedModel->getProcessorID() == ");
+        OS << PI << ") ";
         OS << "{ // " << (SchedModels.procModelBegin() + PI)->ModelName << '\n';
+      }
 
-        for (const CodeGenSchedTransition &T : SC.Transitions) {
-          if (PI != 0 && !count(T.ProcIndices, PI))
-            continue;
-          PE.setIndentLevel(4);
-          emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
-        }
+      // Now emit transitions associated with processor PI.
+      for (const CodeGenSchedTransition &T : SC.Transitions) {
+        if (PI != 0 && !count(T.ProcIndices, PI))
+          continue;
 
-        OS << "    }\n";
-        if (PI == 0)
-          break;
+        // Emit only transitions based on MCSchedPredicate, if it's the case.
+        // At least the transition specified by NoSchedPred is emitted,
+        // which becomes the default transition for those variants otherwise
+        // not based on MCSchedPredicate.
+        // FIXME: preferably, llvm-mca should instead assume a reasonable
+        // default when a variant transition is not based on MCSchedPredicate
+        // for a given processor.
+        if (OnlyExpandMCInstPredicates && !hasMCSchedPredicates(T))
+          continue;
+
+        PE.setIndentLevel(3);
+        emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
       }
-      if (SC.isInferred())
-        OS << "    return " << SC.Index << ";\n";
-      OS << "    break;\n";
+
+      OS << "    }\n";
+
+      if (PI == 0)
+        break;
     }
 
-    if (FoundPredicates)
-     OS << "  };\n";
+    if (SC.isInferred())
+      OS << "    return " << SC.Index << ";\n";
+    OS << "    break;\n";
   }
 
-  if (OnlyExpandMCInstPredicates) {
-    OS << "  // Don't know how to resolve this scheduling class.\n"
-       << "  return 0;\n";
-    return;
-  }
+  OS << "  };\n";
 
-  OS << "  report_fatal_error(\"Expected a variant SchedClass\");\n";
+  emitSchedModelHelperEpilogue(OS, OnlyExpandMCInstPredicates);
 }
 
 void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
@@ -1601,7 +1645,7 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
 
   // Emit target predicates.
   emitSchedModelHelpersImpl(OS);
-  
+
   OS << "} // " << ClassName << "::resolveSchedClass\n\n";
 
   OS << "unsigned " << ClassName
@@ -1609,7 +1653,16 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
      << " unsigned CPUID) const {\n"
      << "  return " << Target << "_MC"
      << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n"
-     << "} // " << ClassName << "::resolveVariantSchedClass\n";
+     << "} // " << ClassName << "::resolveVariantSchedClass\n\n";
+
+  STIPredicateExpander PE(Target);
+  PE.setClassPrefix(ClassName);
+  PE.setExpandDefinition(true);
+  PE.setByRef(false);
+  PE.setIndentLevel(0);
+
+  for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+    PE.expandSTIPredicate(OS, Fn);
 }
 
 void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
@@ -1637,7 +1690,7 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
                                              unsigned NumProcs) {
   std::vector<Record*> Features =
                        Records.getAllDerivedDefinitions("SubtargetFeature");
-  llvm::sort(Features.begin(), Features.end(), LessRecord());
+  llvm::sort(Features, LessRecord());
 
   OS << "// ParseSubtargetFeatures - Parses features string setting specified\n"
      << "// subtarget options.\n"
@@ -1703,6 +1756,31 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
   OS << "};\n";
 }
 
+void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) {
+  OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n";
+  OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
+
+  STIPredicateExpander PE(Target);
+  PE.setExpandForMC(true);
+  PE.setByRef(true);
+  for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+    PE.expandSTIPredicate(OS, Fn);
+
+  OS << "#endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
+
+  OS << "\n#ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n";
+  OS << "#undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
+
+  std::string ClassPrefix = Target + "MCInstrAnalysis";
+  PE.setExpandDefinition(true);
+  PE.setClassPrefix(ClassPrefix);
+  PE.setIndentLevel(0);
+  for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+    PE.expandSTIPredicate(OS, Fn);
+
+  OS << "#endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n";
+}
+
 //
 // SubtargetEmitter::run - Main subtarget enumeration emitter.
 //
@@ -1800,6 +1878,12 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << " const;\n";
   if (TGT.getHwModes().getNumModeIds() > 1)
     OS << "  unsigned getHwMode() const override;\n";
+
+  STIPredicateExpander PE(Target);
+  PE.setByRef(false);
+  for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates())
+    PE.expandSTIPredicate(OS, Fn);
+
   OS << "};\n"
      << "} // end namespace llvm\n\n";
 
@@ -1857,6 +1941,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "} // end namespace llvm\n\n";
 
   OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
+
+  EmitMCInstrAnalysisPredicateFunctions(OS);
 }
 
 namespace llvm {
diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp
index b78260625cb2..d5b6a3c12647 100644
--- a/contrib/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm/utils/TableGen/TableGen.cpp
@@ -53,6 +53,7 @@ enum ActionType {
   GenX86EVEX2VEXTables,
   GenX86FoldTables,
   GenRegisterBank,
+  GenExegesis,
 };
 
 namespace {
@@ -117,7 +118,9 @@ namespace {
                     clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
                                "Generate X86 fold tables"),
                     clEnumValN(GenRegisterBank, "gen-register-bank",
-                               "Generate registers bank descriptions")));
+                               "Generate registers bank descriptions"),
+                    clEnumValN(GenExegesis, "gen-exegesis",
+                               "Generate llvm-exegesis tables")));
 
   cl::OptionCategory PrintEnumsCat("Options for -print-enums");
   cl::opt<std::string>
@@ -231,6 +234,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenX86FoldTables:
     EmitX86FoldTables(Records, OS);
     break;
+  case GenExegesis:
+    EmitExegesis(Records, OS);
+    break;
   }
 
   return false;
diff --git a/contrib/llvm/utils/TableGen/TableGenBackends.h b/contrib/llvm/utils/TableGen/TableGenBackends.h
index 1329a6d833f4..f4f2909f8e88 100644
--- a/contrib/llvm/utils/TableGen/TableGenBackends.h
+++ b/contrib/llvm/utils/TableGen/TableGenBackends.h
@@ -89,6 +89,7 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
+void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index df63337d5637..788f142e125f 100644
--- a/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -19,6 +19,8 @@
 
 namespace llvm {
 
+static constexpr int WebAssemblyInstructionTableSize = 256;
+
 void emitWebAssemblyDisassemblerTables(
     raw_ostream &OS,
     const ArrayRef<const CodeGenInstruction *> &NumberedInstructions) {
@@ -42,36 +44,41 @@ void emitWebAssemblyDisassemblerTables(
     auto Prefix = Opc >> 8;
     Opc = Opc & 0xFF;
     auto &CGIP = OpcodeTable[Prefix][Opc];
-    if (!CGIP.second ||
-        // Make sure we store the variant with the least amount of operands,
-        // which is the one without explicit registers. Only few instructions
-        // have these currently, would be good to have for all of them.
-        // FIXME: this picks the first of many typed variants, which is
-        // currently the except_ref one, though this shouldn't matter for
-        // disassembly purposes.
-        CGIP.second->Operands.OperandList.size() >
-            CGI.Operands.OperandList.size()) {
+    // All wasm instructions have a StackBased field of type string, we only
+    // want the instructions for which this is "true".
+    auto StackString =
+        Def.getValue("StackBased")->getValue()->getCastTo(StringRecTy::get());
+    auto IsStackBased =
+        StackString &&
+        reinterpret_cast<const StringInit *>(StackString)->getValue() == "true";
+    if (IsStackBased && !CGIP.second) {
+      // this picks the first of many typed variants, which is
+      // currently the except_ref one, though this shouldn't matter for
+      // disassembly purposes.
       CGIP = std::make_pair(I, &CGI);
     }
   }
   OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n";
   OS << "\n";
   OS << "namespace llvm {\n\n";
+  OS << "static constexpr int WebAssemblyInstructionTableSize = ";
+  OS << WebAssemblyInstructionTableSize << ";\n\n";
   OS << "enum EntryType : uint8_t { ";
   OS << "ET_Unused, ET_Prefix, ET_Instruction };\n\n";
   OS << "struct WebAssemblyInstruction {\n";
   OS << "  uint16_t Opcode;\n";
   OS << "  EntryType ET;\n";
   OS << "  uint8_t NumOperands;\n";
-  OS << "  uint8_t Operands[4];\n";
+  OS << "  uint16_t OperandStart;\n";
   OS << "};\n\n";
+  std::vector<std::string> OperandTable, CurOperandList;
   // Output one table per prefix.
   for (auto &PrefixPair : OpcodeTable) {
     if (PrefixPair.second.empty())
       continue;
     OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first;
     OS << "[] = {\n";
-    for (unsigned I = 0; I <= 0xFF; I++) {
+    for (unsigned I = 0; I < WebAssemblyInstructionTableSize; I++) {
       auto InstIt = PrefixPair.second.find(I);
       if (InstIt != PrefixPair.second.end()) {
         // Regular instruction.
@@ -81,24 +88,54 @@ void emitWebAssemblyDisassemblerTables(
         OS.write_hex(static_cast<unsigned long long>(I));
         OS << ": " << CGI.AsmString << "\n";
         OS << "  { " << InstIt->second.first << ", ET_Instruction, ";
-        OS << CGI.Operands.OperandList.size() << ", {\n";
+        OS << CGI.Operands.OperandList.size() << ", ";
+        // Collect operand types for storage in a shared list.
+        CurOperandList.clear();
         for (auto &Op : CGI.Operands.OperandList) {
-          OS << "      " << Op.OperandType << ",\n";
+          assert(Op.OperandType != "MCOI::OPERAND_UNKNOWN");
+          CurOperandList.push_back(Op.OperandType);
+        }
+        // See if we already have stored this sequence before. This is not
+        // strictly necessary but makes the table really small.
+        size_t OperandStart = OperandTable.size();
+        if (CurOperandList.size() <= OperandTable.size()) {
+          for (size_t J = 0; J <= OperandTable.size() - CurOperandList.size();
+               ++J) {
+            size_t K = 0;
+            for (; K < CurOperandList.size(); ++K) {
+              if (OperandTable[J + K] != CurOperandList[K]) break;
+            }
+            if (K == CurOperandList.size()) {
+              OperandStart = J;
+              break;
+            }
+          }
+        }
+        // Store operands if no prior occurrence.
+        if (OperandStart == OperandTable.size()) {
+          OperandTable.insert(OperandTable.end(), CurOperandList.begin(),
+                              CurOperandList.end());
         }
-        OS << "    }\n";
+        OS << OperandStart;
       } else {
         auto PrefixIt = OpcodeTable.find(I);
         // If we have a non-empty table for it that's not 0, this is a prefix.
         if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
-          OS << "  { 0, ET_Prefix, 0, {}";
+          OS << "  { 0, ET_Prefix, 0, 0";
         } else {
-          OS << "  { 0, ET_Unused, 0, {}";
+          OS << "  { 0, ET_Unused, 0, 0";
         }
       }
       OS << "  },\n";
     }
     OS << "};\n\n";
   }
+  // Create a table of all operands:
+  OS << "const uint8_t OperandTable[] = {\n";
+  for (auto &Op : OperandTable) {
+    OS << "  " << Op << ",\n";
+  }
+  OS << "};\n\n";
   // Create a table of all extension tables:
   OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
   OS << "PrefixTable[] = {\n";
diff --git a/contrib/llvm/utils/TableGen/X86ModRMFilters.h b/contrib/llvm/utils/TableGen/X86ModRMFilters.h
index 73d5602fd91c..b0248e878d07 100644
--- a/contrib/llvm/utils/TableGen/X86ModRMFilters.h
+++ b/contrib/llvm/utils/TableGen/X86ModRMFilters.h
@@ -38,7 +38,7 @@ public:
   /// @result       - True if the filter returns the same value for any ModR/M
   ///                 byte; false if not.
   virtual bool isDumb() const { return false; }
-  
+
   /// accepts       - Indicates whether the filter accepts a particular ModR/M
   ///                 byte value.
   ///
@@ -85,7 +85,7 @@ public:
 };
 
 /// ExtendedFilter - Extended opcodes are classified based on the value of the
-///   mod field [bits 7-6] and the value of the nnn field [bits 5-3]. 
+///   mod field [bits 7-6] and the value of the nnn field [bits 5-3].
 class ExtendedFilter : public ModRMFilter {
   void anchor() override;
   bool R;
@@ -96,7 +96,7 @@ public:
   /// \param r   True if the mod field must be set to 11; false otherwise.
   ///            The name is explained at ModFilter.
   /// \param nnn The required value of the nnn field.
-  ExtendedFilter(bool r, uint8_t nnn) : 
+  ExtendedFilter(bool r, uint8_t nnn) :
     ModRMFilter(),
     R(r),
     NNN(nnn) {
diff --git a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
index efd5c195d02b..2f9b428b8cfe 100644
--- a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -1132,8 +1132,8 @@ RecognizableInstr::relocationEncodingFromString(const std::string &s,
   ENCODING("i64i32imm_pcrel", ENCODING_ID)
   ENCODING("i16imm_pcrel",    ENCODING_IW)
   ENCODING("i32imm_pcrel",    ENCODING_ID)
-  ENCODING("brtarget32",      ENCODING_Iv)
-  ENCODING("brtarget16",      ENCODING_Iv)
+  ENCODING("brtarget32",      ENCODING_ID)
+  ENCODING("brtarget16",      ENCODING_IW)
   ENCODING("brtarget8",       ENCODING_IB)
   ENCODING("i64imm",          ENCODING_IO)
   ENCODING("offset16_8",      ENCODING_Ia)